From bb09bcf7d1c2407700f24e7e1d909f5b093df7ae Mon Sep 17 00:00:00 2001 From: Yury Hayeu Date: Sat, 4 May 2024 08:06:39 +0200 Subject: [PATCH] Fixes --- diploma_thesis/b.txt | 32221 ----------- diploma_thesis/d.txt | 117246 ---------------------------------------- 2 files changed, 149467 deletions(-) delete mode 100644 diploma_thesis/b.txt delete mode 100644 diploma_thesis/d.txt diff --git a/diploma_thesis/b.txt b/diploma_thesis/b.txt deleted file mode 100644 index 3f9ea00..0000000 --- a/diploma_thesis/b.txt +++ /dev/null @@ -1,32221 +0,0 @@ -Running 3 simulations -Running 2 simulations -Task started model_1 -Total rewards: 3 -Total Receives: 1 -Total Receives: 2 -Total Receives: 3 -Total rewards: 5 -Total Receives: 4 -Total Receives: 5 -Total rewards: 10 -Total Receives: 6 -Total Receives: 7 -Total Receives: 8 -Total Receives: 9 -Total Receives: 10 -Total rewards: 11 -Total Receives: 11 -Total rewards: 12 -Total Receives: 12 -Total rewards: 13 -Total Receives: 13 -Total rewards: 14 -Total Receives: 14 -Total rewards: 17 -Total Receives: 15 -Total Receives: 16 -Total Receives: 17 -Total rewards: 22 -Total Receives: 18 -Total Receives: 19 -Total Receives: 20 -Total Receives: 21 -Total Receives: 22 -Total rewards: 27 -Total Receives: 23 -Stores 1 1 -Total Receives: 24 -Total Receives: 25 -Total Receives: 26 -Total Receives: 27 -Total rewards: 32 -Total Receives: 28 -Total Receives: 29 -Total Receives: 30 -Total Receives: 31 -Total Receives: 32 -Total rewards: 33 -Total Receives: 33 -Total rewards: 40 -Total Receives: 34 -Stores 2 1 -Total Receives: 35 -Stores 3 1 -Total Receives: 36 -Total Receives: 37 -Total Receives: 38 -Total Receives: 39 -Total Receives: 40 -Total rewards: 42 -Total Receives: 41 -Total Receives: 42 -Total rewards: 45 -Total Receives: 43 -Total Receives: 44 -Total Receives: 45 -Total rewards: 47 -Total Receives: 46 -Total Receives: 47 -Total rewards: 52 -Total Receives: 48 -Stores 4 1 -Total Receives: 49 -Total Receives: 50 -Total Receives: 51 -Total Receives: 52 -Total rewards: 57 -Total Receives: 53 -Stores 5 1 -Total Receives: 54 -Total Receives: 55 -Total Receives: 56 -Total Receives: 57 -Total rewards: 61 -Total Receives: 58 -Total Receives: 59 -Total Receives: 60 -Total Receives: 61 -Total rewards: 66 -Total Receives: 62 -Total Receives: 63 -Total Receives: 64 -Total Receives: 65 -Total Receives: 66 -Total rewards: 69 -Total Receives: 67 -Total Receives: 68 -Total Receives: 69 -Total rewards: 73 -Total Receives: 70 -Total Receives: 71 -Total Receives: 72 -Total Receives: 73 -Total rewards: 78 -Total Receives: 74 -Stores 6 1 -Total Receives: 75 -Stores 7 1 -Total Receives: 76 -Stores 8 1 -Total Receives: 77 -Stores 9 1 -Total Receives: 78 -Stores 10 1 -Total rewards: 83 -Total Receives: 79 -Total Receives: 80 -Total Receives: 81 -Total Receives: 82 -Total Receives: 83 -Total rewards: 85 -Total Receives: 84 -Total Receives: 85 -Total rewards: 88 -Total Receives: 86 -Stores 11 1 -Total Receives: 87 -Stores 12 1 -Total Receives: 88 -Stores 13 1 -Total rewards: 92 -Total Receives: 89 -Stores 14 1 -Total Receives: 90 -Stores 15 1 -Total Receives: 91 -Stores 16 1 -Total Receives: 92 -Stores 17 1 -Total rewards: 95 -Total Receives: 93 -Stores 18 1 -Total Receives: 94 -Stores 19 1 -Total Receives: 95 -Stores 20 1 -Total rewards: 98 -Total Receives: 96 -Total Receives: 97 -Total Receives: 98 -Total rewards: 102 -Total Receives: 99 -Total Receives: 100 -Total Receives: 101 -Total Receives: 102 -Total rewards: 105 -Total Receives: 103 -Total Receives: 104 -Total Receives: 105 -Total rewards: 108 -Total Receives: 106 -Total Receives: 107 -Total Receives: 108 -Total rewards: 112 -Total Receives: 109 -Total Receives: 110 -Total Receives: 111 -Total Receives: 112 -Total rewards: 115 -Total Receives: 113 -Stores 21 1 -Total Receives: 114 -Total Receives: 115 -Total rewards: 118 -Total Receives: 116 -Total Receives: 117 -Total Receives: 118 -Total rewards: 124 -Total Receives: 119 -Stores 22 1 -Total Receives: 120 -Stores 23 1 -Total Receives: 121 -Stores 24 1 -Total Receives: 122 -Stores 25 1 -Total Receives: 123 -Stores 26 1 -Total Receives: 124 -Stores 27 1 -Total rewards: 128 -Total Receives: 125 -Total Receives: 126 -Total Receives: 127 -Total Receives: 128 -Total rewards: 133 -Total Receives: 129 -Total Receives: 130 -Total Receives: 131 -Total Receives: 132 -Total Receives: 133 -Total rewards: 140 -Total Receives: 134 -Stores 28 1 -Total Receives: 135 -Total Receives: 136 -Total Receives: 137 -Total Receives: 138 -Total Receives: 139 -Total Receives: 140 -Total rewards: 146 -Total Receives: 141 -Total Receives: 142 -Total Receives: 143 -Total Receives: 144 -Total Receives: 145 -Total Receives: 146 -Total rewards: 153 -Total Receives: 147 -Stores 29 1 -Total Receives: 148 -Total Receives: 149 -Total Receives: 150 -Total Receives: 151 -Total Receives: 152 -Total Receives: 153 -Total rewards: 157 -Total Receives: 154 -Total Receives: 155 -Total Receives: 156 -Total Receives: 157 -Total rewards: 162 -Total Receives: 158 -Total Receives: 159 -Total Receives: 160 -Total Receives: 161 -Total Receives: 162 -Total rewards: 168 -Total Receives: 163 -Stores 30 1 -Total Receives: 164 -Stores 31 1 -Total Receives: 165 -Stores 32 1 -Train step: 1.2874603271484375e-05 Optimizer Step: 0 -Total Receives: 166 -Total Receives: 167 -Total Receives: 168 -Total rewards: 174 -Total Receives: 169 -Stores 33 1 -Total Receives: 170 -Stores 34 1 -Total Receives: 171 -Stores 35 1 -Total Receives: 172 -Stores 36 1 -Total Receives: 173 -Stores 37 1 -Total Receives: 174 -Stores 38 1 -Total rewards: 177 -Total Receives: 175 -Total Receives: 176 -Total Receives: 177 -Total rewards: 180 -Total Receives: 178 -Stores 39 1 -Total Receives: 179 -Stores 40 1 -Total Receives: 180 -Stores 41 1 -Total rewards: 182 -Total Receives: 181 -Total Receives: 182 -Total rewards: 187 -Total Receives: 183 -Total Receives: 184 -Total Receives: 185 -Total Receives: 186 -Total Receives: 187 -Total rewards: 190 -Total Receives: 188 -Total Receives: 189 -Total Receives: 190 -Total rewards: 195 -Total Receives: 191 -Total Receives: 192 -Total Receives: 193 -Total Receives: 194 -Total Receives: 195 -Total rewards: 196 -Total Receives: 196 -Stores 42 1 -Total rewards: 198 -Total Receives: 197 -Stores 43 1 -Total Receives: 198 -Stores 44 1 -Total rewards: 201 -Total Receives: 199 -Total Receives: 200 -Total Receives: 201 -Total rewards: 208 -Total Receives: 202 -Stores 45 1 -Total Receives: 203 -Stores 46 1 -Total Receives: 204 -Stores 47 1 -Total Receives: 205 -Stores 48 1 -Total Receives: 206 -Stores 49 1 -Total Receives: 207 -Stores 50 1 -Total Receives: 208 -Stores 51 1 -Total rewards: 211 -Total Receives: 209 -Stores 52 1 -Total Receives: 210 -Stores 53 1 -Total Receives: 211 -Stores 54 1 -Total rewards: 215 -Total Receives: 212 -Total Receives: 213 -Total Receives: 214 -Total Receives: 215 -Total rewards: 220 -Total Receives: 216 -Stores 55 1 -Total Receives: 217 -Stores 56 1 -Total Receives: 218 -Stores 57 1 -Total Receives: 219 -Stores 58 1 -Total Receives: 220 -Stores 59 1 -Total rewards: 224 -Total Receives: 221 -Total Receives: 222 -Total Receives: 223 -Total Receives: 224 -Total rewards: 228 -Total Receives: 225 -Total Receives: 226 -Total Receives: 227 -Total Receives: 228 -Total rewards: 231 -Total Receives: 229 -Stores 60 1 -Total Receives: 230 -Stores 61 1 -Total Receives: 231 -Stores 62 1 -Total rewards: 236 -Total Receives: 232 -Total Receives: 233 -Total Receives: 234 -Total Receives: 235 -Total Receives: 236 -Total rewards: 239 -Total Receives: 237 -Stores 63 1 -Total Receives: 238 -Stores 64 1 -Train step: 1.2159347534179688e-05 Optimizer Step: 0 -Total Receives: 239 -Stores 65 1 -Total rewards: 244 -Total Receives: 240 -Total Receives: 241 -Total Receives: 242 -Total Receives: 243 -Total Receives: 244 -Total rewards: 249 -Total Receives: 245 -Total Receives: 246 -Total Receives: 247 -Total Receives: 248 -Total Receives: 249 -Total rewards: 253 -Total Receives: 250 -Total Receives: 251 -Total Receives: 252 -Total Receives: 253 -Total rewards: 257 -Total Receives: 254 -Stores 66 1 -Total Receives: 255 -Stores 67 1 -Total Receives: 256 -Stores 68 1 -Total Receives: 257 -Stores 69 1 -Total rewards: 262 -Total Receives: 258 -Stores 70 1 -Total Receives: 259 -Stores 71 1 -Total Receives: 260 -Stores 72 1 -Total Receives: 261 -Stores 73 1 -Total Receives: 262 -Stores 74 1 -Total rewards: 265 -Total Receives: 263 -Stores 75 1 -Total Receives: 264 -Stores 76 1 -Total Receives: 265 -Total rewards: 272 -Total Receives: 266 -Stores 77 1 -Total Receives: 267 -Stores 78 1 -Total Receives: 268 -Stores 79 1 -Total Receives: 269 -Stores 80 1 -Total Receives: 270 -Stores 81 1 -Total Receives: 271 -Stores 82 1 -Total Receives: 272 -Stores 83 1 -Total rewards: 275 -Total Receives: 273 -Total Receives: 274 -Total Receives: 275 -Total rewards: 277 -Total Receives: 276 -Total Receives: 277 -Total rewards: 282 -Total Receives: 278 -Stores 84 1 -Total Receives: 279 -Stores 85 1 -Total Receives: 280 -Stores 86 1 -Total Receives: 281 -Stores 87 1 -Total Receives: 282 -Stores 88 1 -Total rewards: 284 -Total Receives: 283 -Stores 89 1 -Total Receives: 284 -Stores 90 1 -Total rewards: 290 -Total Receives: 285 -Task started model_0 -Total rewards: 3 -Total Receives: 1 -Total Receives: 2 -Total Receives: 3 -Total rewards: 5 -Total Receives: 4 -Total Receives: 5 -Total rewards: 10 -Total Receives: 6 -Total Receives: 7 -Total Receives: 8 -Total Receives: 9 -Total Receives: 10 -Total rewards: 11 -Total Receives: 11 -Total rewards: 12 -Total Receives: 12 -Total rewards: 13 -Total Receives: 13 -Total rewards: 14 -Total Receives: 14 -Total rewards: 17 -Total Receives: 15 -Total Receives: 16 -Total Receives: 17 -Total rewards: 22 -Total Receives: 18 -Total Receives: 19 -Total Receives: 20 -Total Receives: 21 -Total Receives: 22 -Total rewards: 27 -Total Receives: 23 -Stores 1 1 -Total Receives: 24 -Total Receives: 25 -Total Receives: 26 -Total Receives: 27 -Total rewards: 32 -Total Receives: 28 -Total Receives: 29 -Total Receives: 30 -Total Receives: 31 -Total Receives: 32 -Total rewards: 33 -Total Receives: 33 -Total rewards: 40 -Total Receives: 34 -Stores 2 1 -Total Receives: 35 -Stores 3 1 -Total Receives: 36 -Total Receives: 37 -Total Receives: 38 -Total Receives: 39 -Total Receives: 40 -Total rewards: 42 -Total Receives: 41 -Total Receives: 42 -Total rewards: 45 -Total Receives: 43 -Total Receives: 44 -Total Receives: 45 -Total rewards: 47 -Total Receives: 46 -Total Receives: 47 -Total rewards: 52 -Total Receives: 48 -Stores 4 1 -Total Receives: 49 -Total Receives: 50 -Total Receives: 51 -Total Receives: 52 -Total rewards: 57 -Total Receives: 53 -Stores 5 1 -Total Receives: 54 -Total Receives: 55 -Total Receives: 56 -Total Receives: 57 -Total rewards: 61 -Total Receives: 58 -Total Receives: 59 -Total Receives: 60 -Total Receives: 61 -Total rewards: 66 -Total Receives: 62 -Total Receives: 63 -Total Receives: 64 -Total Receives: 65 -Total Receives: 66 -Total rewards: 69 -Total Receives: 67 -Total Receives: 68 -Total Receives: 69 -Total rewards: 73 -Total Receives: 70 -Total Receives: 71 -Total Receives: 72 -Total Receives: 73 -Total rewards: 78 -Total Receives: 74 -Stores 6 1 -Total Receives: 75 -Stores 7 1 -Total Receives: 76 -Stores 8 1 -Total Receives: 77 -Stores 9 1 -Total Receives: 78 -Stores 10 1 -Total rewards: 83 -Total Receives: 79 -Total Receives: 80 -Total Receives: 81 -Total Receives: 82 -Total Receives: 83 -Total rewards: 85 -Total Receives: 84 -Total Receives: 85 -Total rewards: 88 -Total Receives: 86 -Stores 11 1 -Total Receives: 87 -Stores 12 1 -Total Receives: 88 -Stores 13 1 -Total rewards: 92 -Total Receives: 89 -Stores 14 1 -Total Receives: 90 -Stores 15 1 -Total Receives: 91 -Stores 16 1 -Total Receives: 92 -Stores 17 1 -Total rewards: 95 -Total Receives: 93 -Stores 18 1 -Total Receives: 94 -Stores 19 1 -Total Receives: 95 -Stores 20 1 -Total rewards: 98 -Total Receives: 96 -Total Receives: 97 -Total Receives: 98 -Total rewards: 102 -Total Receives: 99 -Total Receives: 100 -Total Receives: 101 -Total Receives: 102 -Total rewards: 105 -Total Receives: 103 -Total Receives: 104 -Total Receives: 105 -Total rewards: 108 -Total Receives: 106 -Total Receives: 107 -Total Receives: 108 -Total rewards: 112 -Total Receives: 109 -Total Receives: 110 -Total Receives: 111 -Total Receives: 112 -Total rewards: 115 -Total Receives: 113 -Stores 21 1 -Total Receives: 114 -Total Receives: 115 -Total rewards: 118 -Total Receives: 116 -Total Receives: 117 -Total Receives: 118 -Total rewards: 124 -Total Receives: 119 -Stores 22 1 -Total Receives: 120 -Stores 23 1 -Total Receives: 121 -Stores 24 1 -Total Receives: 122 -Stores 25 1 -Total Receives: 123 -Stores 26 1 -Total Receives: 124 -Stores 27 1 -Total rewards: 128 -Total Receives: 125 -Total Receives: 126 -Total Receives: 127 -Total Receives: 128 -Total rewards: 133 -Total Receives: 129 -Total Receives: 130 -Total Receives: 131 -Total Receives: 132 -Total Receives: 133 -Total rewards: 140 -Total Receives: 134 -Stores 28 1 -Total Receives: 135 -Total Receives: 136 -Total Receives: 137 -Total Receives: 138 -Total Receives: 139 -Total Receives: 140 -Total rewards: 146 -Total Receives: 141 -Total Receives: 142 -Total Receives: 143 -Total Receives: 144 -Total Receives: 145 -Total Receives: 146 -Total rewards: 153 -Total Receives: 147 -Stores 29 1 -Total Receives: 148 -Total Receives: 149 -Total Receives: 150 -Total Receives: 151 -Total Receives: 152 -Total Receives: 153 -Total rewards: 157 -Total Receives: 154 -Total Receives: 155 -Total Receives: 156 -Total Receives: 157 -Total rewards: 162 -Total Receives: 158 -Total Receives: 159 -Total Receives: 160 -Total Receives: 161 -Total Receives: 162 -Total rewards: 168 -Total Receives: 163 -Stores 30 1 -Total Receives: 164 -Stores 31 1 -Total Receives: 165 -Stores 32 1 -Train step: 1.3113021850585938e-05 Optimizer Step: 0 -Total Receives: 166 -Total Receives: 167 -Total Receives: 168 -Total rewards: 174 -Total Receives: 169 -Stores 33 1 -Total Receives: 170 -Stores 34 1 -Total Receives: 171 -Stores 35 1 -Total Receives: 172 -Stores 36 1 -Total Receives: 173 -Stores 37 1 -Total Receives: 174 -Stores 38 1 -Total rewards: 177 -Total Receives: 175 -Total Receives: 176 -Total Receives: 177 -Total rewards: 180 -Total Receives: 178 -Stores 39 1 -Total Receives: 179 -Stores 40 1 -Total Receives: 180 -Stores 41 1 -Total rewards: 182 -Total Receives: 181 -Total Receives: 182 -Total rewards: 187 -Total Receives: 183 -Total Receives: 184 -Total Receives: 185 -Total Receives: 186 -Total Receives: 187 -Total rewards: 190 -Total Receives: 188 -Total Receives: 189 -Total Receives: 190 -Total rewards: 195 -Total Receives: 191 -Total Receives: 192 -Total Receives: 193 -Total Receives: 194 -Total Receives: 195 -Total rewards: 196 -Total Receives: 196 -Stores 42 1 -Total rewards: 198 -Total Receives: 197 -Stores 43 1 -Total Receives: 198 -Stores 44 1 -Total rewards: 201 -Total Receives: 199 -Total Receives: 200 -Total Receives: 201 -Total rewards: 208 -Total Receives: 202 -Stores 45 1 -Total Receives: 203 -Stores 46 1 -Total Receives: 204 -Stores 47 1 -Total Receives: 205 -Stores 48 1 -Total Receives: 206 -Stores 49 1 -Total Receives: 207 -Stores 50 1 -Total Receives: 208 -Stores 51 1 -Total rewards: 211 -Total Receives: 209 -Stores 52 1 -Total Receives: 210 -Stores 53 1 -Total Receives: 211 -Stores 54 1 -Total rewards: 215 -Total Receives: 212 -Total Receives: 213 -Total Receives: 214 -Total Receives: 215 -Total rewards: 220 -Total Receives: 216 -Stores 55 1 -Total Receives: 217 -Stores 56 1 -Total Receives: 218 -Stores 57 1 -Total Receives: 219 -Stores 58 1 -Total Receives: 220 -Stores 59 1 -Total rewards: 224 -Total Receives: 221 -Total Receives: 222 -Total Receives: 223 -Total Receives: 224 -Total rewards: 228 -Total Receives: 225 -Total Receives: 226 -Total Receives: 227 -Total Receives: 228 -Total rewards: 231 -Total Receives: 229 -Stores 60 1 -Total Receives: 230 -Stores 61 1 -Total Receives: 231 -Stores 62 1 -Total rewards: 236 -Total Receives: 232 -Total Receives: 233 -Total Receives: 234 -Total Receives: 235 -Total Receives: 236 -Total rewards: 239 -Total Receives: 237 -Stores 63 1 -Total Receives: 238 -Stores 64 1 -Train step: 1.1920928955078125e-05 Optimizer Step: 0 -Total Receives: 239 -Stores 65 1 -Total rewards: 244 -Total Receives: 240 -Total Receives: 241 -Total Receives: 242 -Total Receives: 243 -Total Receives: 244 -Total rewards: 249 -Total Receives: 245 -Total Receives: 246 -Total Receives: 247 -Total Receives: 248 -Total Receives: 249 -Total rewards: 253 -Total Receives: 250 -Total Receives: 251 -Total Receives: 252 -Total Receives: 253 -Total rewards: 257 -Total Receives: 254 -Stores 66 1 -Total Receives: 255 -Stores 67 1 -Total Receives: 256 -Stores 68 1 -Total Receives: 257 -Stores 69 1 -Total rewards: 262 -Total Receives: 258 -Stores 70 1 -Total Receives: 259 -Stores 71 1 -Total Receives: 260 -Stores 72 1 -Total Receives: 261 -Stores 73 1 -Total Receives: 262 -Stores 74 1 -Total rewards: 265 -Total Receives: 263 -Stores 75 1 -Total Receives: 264 -Stores 76 1 -Total Receives: 265 -Total rewards: 272 -Total Receives: 266 -Stores 77 1 -Total Receives: 267 -Stores 78 1 -Total Receives: 268 -Stores 79 1 -Total Receives: 269 -Stores 80 1 -Total Receives: 270 -Stores 81 1 -Total Receives: 271 -Stores 82 1 -Total Receives: 272 -Stores 83 1 -Total rewards: 275 -Total Receives: 273 -Total Receives: 274 -Total Receives: 275 -Total rewards: 277 -Total Receives: 276 -Total Receives: 277 -Total rewards: 282 -Total Receives: 278 -Stores 84 1 -Total Receives: 279 -Stores 85 1 -Total Receives: 280 -Stores 86 1 -Total Receives: 281 -Stores 87 1 -Total Receives: 282 -Stores 88 1 -Total rewards: 284 -Total Receives: 283 -Stores 89 1 -Total Receives: 284 -Stores 90 1 -Total rewards: 290 -Total Receives: 285 -Task started model_2 -Total rewards: 3 -Total Receives: 1 -Total Receives: 2 -Total Receives: 3 -Total rewards: 5 -Total Receives: 4 -Total Receives: 5 -Total rewards: 10 -Total Receives: 6 -Total Receives: 7 -Total Receives: 8 -Total Receives: 9 -Total Receives: 10 -Total rewards: 11 -Total Receives: 11 -Total rewards: 12 -Total Receives: 12 -Total rewards: 13 -Total Receives: 13 -Total rewards: 14 -Total Receives: 14 -Total rewards: 17 -Total Receives: 15 -Total Receives: 16 -Total Receives: 17 -Total rewards: 22 -Total Receives: 18 -Total Receives: 19 -Total Receives: 20 -Total Receives: 21 -Total Receives: 22 -Total rewards: 27 -Total Receives: 23 -Stores 1 1 -Total Receives: 24 -Total Receives: 25 -Total Receives: 26 -Total Receives: 27 -Total rewards: 32 -Total Receives: 28 -Total Receives: 29 -Total Receives: 30 -Total Receives: 31 -Total Receives: 32 -Total rewards: 33 -Total Receives: 33 -Total rewards: 40 -Total Receives: 34 -Stores 2 1 -Total Receives: 35 -Stores 3 1 -Total Receives: 36 -Total Receives: 37 -Total Receives: 38 -Total Receives: 39 -Total Receives: 40 -Total rewards: 42 -Total Receives: 41 -Total Receives: 42 -Total rewards: 45 -Total Receives: 43 -Total Receives: 44 -Total Receives: 45 -Total rewards: 47 -Total Receives: 46 -Total Receives: 47 -Total rewards: 52 -Total Receives: 48 -Stores 4 1 -Total Receives: 49 -Total Receives: 50 -Total Receives: 51 -Total Receives: 52 -Total rewards: 57 -Total Receives: 53 -Stores 5 1 -Total Receives: 54 -Total Receives: 55 -Total Receives: 56 -Total Receives: 57 -Total rewards: 61 -Total Receives: 58 -Total Receives: 59 -Total Receives: 60 -Total Receives: 61 -Total rewards: 66 -Total Receives: 62 -Total Receives: 63 -Total Receives: 64 -Total Receives: 65 -Total Receives: 66 -Total rewards: 69 -Total Receives: 67 -Total Receives: 68 -Total Receives: 69 -Total rewards: 73 -Total Receives: 70 -Total Receives: 71 -Total Receives: 72 -Total Receives: 73 -Total rewards: 78 -Total Receives: 74 -Stores 6 1 -Total Receives: 75 -Stores 7 1 -Total Receives: 76 -Stores 8 1 -Total Receives: 77 -Stores 9 1 -Total Receives: 78 -Stores 10 1 -Total rewards: 83 -Total Receives: 79 -Total Receives: 80 -Total Receives: 81 -Total Receives: 82 -Total Receives: 83 -Total rewards: 85 -Total Receives: 84 -Total Receives: 85 -Total rewards: 88 -Total Receives: 86 -Stores 11 1 -Total Receives: 87 -Stores 12 1 -Total Receives: 88 -Stores 13 1 -Total rewards: 92 -Total Receives: 89 -Stores 14 1 -Total Receives: 90 -Stores 15 1 -Total Receives: 91 -Stores 16 1 -Total Receives: 92 -Stores 17 1 -Total rewards: 95 -Total Receives: 93 -Stores 18 1 -Total Receives: 94 -Stores 19 1 -Total Receives: 95 -Stores 20 1 -Total rewards: 98 -Total Receives: 96 -Total Receives: 97 -Total Receives: 98 -Total rewards: 102 -Total Receives: 99 -Total Receives: 100 -Total Receives: 101 -Total Receives: 102 -Total rewards: 105 -Total Receives: 103 -Total Receives: 104 -Total Receives: 105 -Total rewards: 108 -Total Receives: 106 -Total Receives: 107 -Total Receives: 108 -Total rewards: 112 -Total Receives: 109 -Total Receives: 110 -Total Receives: 111 -Total Receives: 112 -Total rewards: 115 -Total Receives: 113 -Stores 21 1 -Total Receives: 114 -Total Receives: 115 -Total rewards: 118 -Total Receives: 116 -Total Receives: 117 -Total Receives: 118 -Total rewards: 124 -Total Receives: 119 -Stores 22 1 -Total Receives: 120 -Stores 23 1 -Total Receives: 121 -Stores 24 1 -Total Receives: 122 -Stores 25 1 -Total Receives: 123 -Stores 26 1 -Total Receives: 124 -Stores 27 1 -Total rewards: 128 -Total Receives: 125 -Total Receives: 126 -Total Receives: 127 -Total Receives: 128 -Total rewards: 133 -Total Receives: 129 -Total Receives: 130 -Total Receives: 131 -Total Receives: 132 -Total Receives: 133 -Total rewards: 140 -Total Receives: 134 -Stores 28 1 -Total Receives: 135 -Total Receives: 136 -Total Receives: 137 -Total Receives: 138 -Total Receives: 139 -Total Receives: 140 -Total rewards: 146 -Total Receives: 141 -Total Receives: 142 -Total Receives: 143 -Total Receives: 144 -Total Receives: 145 -Total Receives: 146 -Total rewards: 153 -Total Receives: 147 -Stores 29 1 -Total Receives: 148 -Total Receives: 149 -Total Receives: 150 -Total Receives: 151 -Total Receives: 152 -Total Receives: 153 -Total rewards: 157 -Total Receives: 154 -Total Receives: 155 -Total Receives: 156 -Total Receives: 157 -Total rewards: 162 -Total Receives: 158 -Total Receives: 159 -Total Receives: 160 -Total Receives: 161 -Total Receives: 162 -Total rewards: 168 -Total Receives: 163 -Stores 30 1 -Total Receives: 164 -Stores 31 1 -Total Receives: 165 -Stores 32 1 -Train step: 1.1920928955078125e-05 Optimizer Step: 0 -Total Receives: 166 -Total Receives: 167 -Total Receives: 168 -Total rewards: 174 -Total Receives: 169 -Stores 33 1 -Total Receives: 170 -Stores 34 1 -Total Receives: 171 -Stores 35 1 -Total Receives: 172 -Stores 36 1 -Total Receives: 173 -Stores 37 1 -Total Receives: 174 -Stores 38 1 -Total rewards: 177 -Total Receives: 175 -Total Receives: 176 -Total Receives: 177 -Total rewards: 180 -Total Receives: 178 -Stores 39 1 -Total Receives: 179 -Stores 40 1 -Total Receives: 180 -Stores 41 1 -Total rewards: 182 -Total Receives: 181 -Total Receives: 182 -Total rewards: 187 -Total Receives: 183 -Total Receives: 184 -Total Receives: 185 -Total Receives: 186 -Total Receives: 187 -Total rewards: 190 -Total Receives: 188 -Total Receives: 189 -Total Receives: 190 -Total rewards: 195 -Total Receives: 191 -Total Receives: 192 -Total Receives: 193 -Total Receives: 194 -Total Receives: 195 -Total rewards: 196 -Total Receives: 196 -Stores 42 1 -Total rewards: 198 -Total Receives: 197 -Stores 43 1 -Total Receives: 198 -Stores 44 1 -Total rewards: 201 -Total Receives: 199 -Total Receives: 200 -Total Receives: 201 -Total rewards: 208 -Total Receives: 202 -Stores 45 1 -Total Receives: 203 -Stores 46 1 -Total Receives: 204 -Stores 47 1 -Total Receives: 205 -Stores 48 1 -Total Receives: 206 -Stores 49 1 -Total Receives: 207 -Stores 50 1 -Total Receives: 208 -Stores 51 1 -Total rewards: 211 -Total Receives: 209 -Stores 52 1 -Total Receives: 210 -Stores 53 1 -Total Receives: 211 -Stores 54 1 -Total rewards: 215 -Total Receives: 212 -Total Receives: 213 -Total Receives: 214 -Total Receives: 215 -Total rewards: 220 -Total Receives: 216 -Stores 55 1 -Total Receives: 217 -Stores 56 1 -Total Receives: 218 -Stores 57 1 -Total Receives: 219 -Stores 58 1 -Total Receives: 220 -Stores 59 1 -Total rewards: 224 -Total Receives: 221 -Total Receives: 222 -Total Receives: 223 -Total Receives: 224 -Total rewards: 228 -Total Receives: 225 -Total Receives: 226 -Total Receives: 227 -Total Receives: 228 -Total rewards: 231 -Total Receives: 229 -Stores 60 1 -Total Receives: 230 -Stores 61 1 -Total Receives: 231 -Stores 62 1 -Total rewards: 236 -Total Receives: 232 -Total Receives: 233 -Total Receives: 234 -Total Receives: 235 -Total Receives: 236 -Total rewards: 239 -Total Receives: 237 -Stores 63 1 -Total Receives: 238 -Stores 64 1 -Train step: 1.2874603271484375e-05 Optimizer Step: 0 -Total Receives: 239 -Stores 65 1 -Total rewards: 244 -Total Receives: 240 -Total Receives: 241 -Total Receives: 242 -Total Receives: 243 -Total Receives: 244 -Total rewards: 249 -Total Receives: 245 -Total Receives: 246 -Total Receives: 247 -Total Receives: 248 -Total Receives: 249 -Total rewards: 253 -Total Receives: 250 -Total Receives: 251 -Total Receives: 252 -Total Receives: 253 -Total rewards: 257 -Total Receives: 254 -Stores 66 1 -Total Receives: 255 -Stores 67 1 -Total Receives: 256 -Stores 68 1 -Total Receives: 257 -Stores 69 1 -Total rewards: 262 -Total Receives: 258 -Stores 70 1 -Total Receives: 259 -Stores 71 1 -Total Receives: 260 -Stores 72 1 -Total Receives: 261 -Stores 73 1 -Total Receives: 262 -Stores 74 1 -Total rewards: 265 -Total Receives: 263 -Stores 75 1 -Total Receives: 264 -Stores 76 1 -Total Receives: 265 -Total rewards: 272 -Total Receives: 266 -Stores 77 1 -Total Receives: 267 -Stores 78 1 -Total Receives: 268 -Stores 79 1 -Total Receives: 269 -Stores 80 1 -Total Receives: 270 -Stores 81 1 -Total Receives: 271 -Stores 82 1 -Total Receives: 272 -Stores 83 1 -Total rewards: 275 -Total Receives: 273 -Total Receives: 274 -Total Receives: 275 -Total rewards: 277 -Total Receives: 276 -Total Receives: 277 -Total rewards: 282 -Total Receives: 278 -Stores 84 1 -Total Receives: 279 -Stores 85 1 -Total Receives: 280 -Stores 86 1 -Total Receives: 281 -Stores 87 1 -Total Receives: 282 -Stores 88 1 -Total rewards: 284 -Total Receives: 283 -Stores 89 1 -Total Receives: 284 -Stores 90 1 -Total rewards: 290 -Total Receives: 285 -Task started model_1 -Total rewards: 3 -Total Receives: 1 -Total Receives: 2 -Total Receives: 3 -Total rewards: 5 -Total Receives: 4 -Total Receives: 5 -Total rewards: 10 -Total Receives: 6 -Total Receives: 7 -Total Receives: 8 -Total Receives: 9 -Total Receives: 10 -Total rewards: 11 -Total Receives: 11 -Total rewards: 12 -Total Receives: 12 -Total rewards: 13 -Total Receives: 13 -Total rewards: 14 -Total Receives: 14 -Total rewards: 17 -Total Receives: 15 -Total Receives: 16 -Total Receives: 17 -Total rewards: 22 -Total Receives: 18 -Total Receives: 19 -Total Receives: 20 -Total Receives: 21 -Total Receives: 22 -Total rewards: 27 -Total Receives: 23 -Stores 1 1 -Total Receives: 24 -Total Receives: 25 -Total Receives: 26 -Total Receives: 27 -Total rewards: 32 -Total Receives: 28 -Total Receives: 29 -Total Receives: 30 -Total Receives: 31 -Total Receives: 32 -Total rewards: 33 -Total Receives: 33 -Total rewards: 40 -Total Receives: 34 -Stores 2 1 -Total Receives: 35 -Stores 3 1 -Total Receives: 36 -Total Receives: 37 -Total Receives: 38 -Total Receives: 39 -Total Receives: 40 -Total rewards: 42 -Total Receives: 41 -Total Receives: 42 -Total rewards: 45 -Total Receives: 43 -Total Receives: 44 -Total Receives: 45 -Total rewards: 47 -Total Receives: 46 -Total Receives: 47 -Total rewards: 52 -Total Receives: 48 -Stores 4 1 -Total Receives: 49 -Total Receives: 50 -Total Receives: 51 -Total Receives: 52 -Total rewards: 57 -Total Receives: 53 -Stores 5 1 -Total Receives: 54 -Total Receives: 55 -Total Receives: 56 -Total Receives: 57 -Total rewards: 61 -Total Receives: 58 -Total Receives: 59 -Total Receives: 60 -Total Receives: 61 -Total rewards: 66 -Total Receives: 62 -Total Receives: 63 -Total Receives: 64 -Total Receives: 65 -Total Receives: 66 -Total rewards: 69 -Total Receives: 67 -Total Receives: 68 -Total Receives: 69 -Total rewards: 73 -Total Receives: 70 -Total Receives: 71 -Total Receives: 72 -Total Receives: 73 -Total rewards: 78 -Total Receives: 74 -Stores 6 1 -Total Receives: 75 -Stores 7 1 -Total Receives: 76 -Stores 8 1 -Total Receives: 77 -Stores 9 1 -Total Receives: 78 -Stores 10 1 -Total rewards: 83 -Total Receives: 79 -Total Receives: 80 -Total Receives: 81 -Total Receives: 82 -Total Receives: 83 -Total rewards: 85 -Total Receives: 84 -Total Receives: 85 -Total rewards: 88 -Total Receives: 86 -Stores 11 1 -Total Receives: 87 -Stores 12 1 -Total Receives: 88 -Stores 13 1 -Total rewards: 92 -Total Receives: 89 -Stores 14 1 -Total Receives: 90 -Stores 15 1 -Total Receives: 91 -Stores 16 1 -Total Receives: 92 -Stores 17 1 -Total rewards: 95 -Total Receives: 93 -Stores 18 1 -Total Receives: 94 -Stores 19 1 -Total Receives: 95 -Stores 20 1 -Total rewards: 98 -Total Receives: 96 -Total Receives: 97 -Total Receives: 98 -Total rewards: 102 -Total Receives: 99 -Total Receives: 100 -Total Receives: 101 -Total Receives: 102 -Total rewards: 105 -Total Receives: 103 -Total Receives: 104 -Total Receives: 105 -Total rewards: 108 -Total Receives: 106 -Total Receives: 107 -Total Receives: 108 -Total rewards: 112 -Total Receives: 109 -Total Receives: 110 -Total Receives: 111 -Total Receives: 112 -Total rewards: 115 -Total Receives: 113 -Stores 21 1 -Total Receives: 114 -Total Receives: 115 -Total rewards: 118 -Total Receives: 116 -Total Receives: 117 -Total Receives: 118 -Total rewards: 124 -Total Receives: 119 -Stores 22 1 -Total Receives: 120 -Stores 23 1 -Total Receives: 121 -Stores 24 1 -Total Receives: 122 -Stores 25 1 -Total Receives: 123 -Stores 26 1 -Total Receives: 124 -Stores 27 1 -Total rewards: 128 -Total Receives: 125 -Total Receives: 126 -Total Receives: 127 -Total Receives: 128 -Total rewards: 133 -Total Receives: 129 -Total Receives: 130 -Total Receives: 131 -Total Receives: 132 -Total Receives: 133 -Total rewards: 140 -Total Receives: 134 -Stores 28 1 -Total Receives: 135 -Total Receives: 136 -Total Receives: 137 -Total Receives: 138 -Total Receives: 139 -Total Receives: 140 -Total rewards: 146 -Total Receives: 141 -Total Receives: 142 -Total Receives: 143 -Total Receives: 144 -Total Receives: 145 -Total Receives: 146 -Total rewards: 153 -Total Receives: 147 -Stores 29 1 -Total Receives: 148 -Total Receives: 149 -Total Receives: 150 -Total Receives: 151 -Total Receives: 152 -Total Receives: 153 -Total rewards: 157 -Total Receives: 154 -Total Receives: 155 -Total Receives: 156 -Total Receives: 157 -Total rewards: 162 -Total Receives: 158 -Total Receives: 159 -Total Receives: 160 -Total Receives: 161 -Total Receives: 162 -Total rewards: 168 -Total Receives: 163 -Stores 30 1 -Total Receives: 164 -Stores 31 1 -Total Receives: 165 -Stores 32 1 -Total Receives: 166 -Total Receives: 167 -Total Receives: 168 -Total rewards: 174 -Total Receives: 169 -Stores 33 1 -Total Receives: 170 -Stores 34 1 -Total Receives: 171 -Stores 35 1 -Total Receives: 172 -Stores 36 1 -Total Receives: 173 -Stores 37 1 -Total Receives: 174 -Stores 38 1 -Total rewards: 177 -Total Receives: 175 -Total Receives: 176 -Total Receives: 177 -Total rewards: 180 -Total Receives: 178 -Stores 39 1 -Total Receives: 179 -Stores 40 1 -Total Receives: 180 -Stores 41 1 -Total rewards: 182 -Total Receives: 181 -Total Receives: 182 -Total rewards: 187 -Total Receives: 183 -Total Receives: 184 -Total Receives: 185 -Total Receives: 186 -Total Receives: 187 -Total rewards: 190 -Total Receives: 188 -Total Receives: 189 -Total Receives: 190 -Total rewards: 195 -Total Receives: 191 -Total Receives: 192 -Total Receives: 193 -Total Receives: 194 -Total Receives: 195 -Total rewards: 196 -Total Receives: 196 -Stores 42 1 -Total rewards: 198 -Total Receives: 197 -Stores 43 1 -Total Receives: 198 -Stores 44 1 -Total rewards: 201 -Total Receives: 199 -Total Receives: 200 -Total Receives: 201 -Total rewards: 208 -Total Receives: 202 -Stores 45 1 -Total Receives: 203 -Stores 46 1 -Total Receives: 204 -Stores 47 1 -Total Receives: 205 -Stores 48 1 -Total Receives: 206 -Stores 49 1 -Total Receives: 207 -Stores 50 1 -Total Receives: 208 -Stores 51 1 -Total rewards: 211 -Total Receives: 209 -Stores 52 1 -Total Receives: 210 -Stores 53 1 -Total Receives: 211 -Stores 54 1 -Total rewards: 215 -Total Receives: 212 -Total Receives: 213 -Total Receives: 214 -Total Receives: 215 -Total rewards: 220 -Total Receives: 216 -Stores 55 1 -Total Receives: 217 -Stores 56 1 -Total Receives: 218 -Stores 57 1 -Total Receives: 219 -Stores 58 1 -Total Receives: 220 -Stores 59 1 -Total rewards: 224 -Total Receives: 221 -Total Receives: 222 -Total Receives: 223 -Total Receives: 224 -Total rewards: 228 -Total Receives: 225 -Total Receives: 226 -Total Receives: 227 -Total Receives: 228 -Total rewards: 231 -Total Receives: 229 -Stores 60 1 -Total Receives: 230 -Stores 61 1 -Total Receives: 231 -Stores 62 1 -Total rewards: 236 -Total Receives: 232 -Total Receives: 233 -Total Receives: 234 -Total Receives: 235 -Total Receives: 236 -Total rewards: 239 -Total Receives: 237 -Stores 63 1 -Total Receives: 238 -Stores 64 1 -Total Receives: 239 -Stores 65 1 -Total rewards: 244 -Total Receives: 240 -Total Receives: 241 -Total Receives: 242 -Total Receives: 243 -Total Receives: 244 -Total rewards: 249 -Total Receives: 245 -Total Receives: 246 -Total Receives: 247 -Total Receives: 248 -Total Receives: 249 -Total rewards: 253 -Total Receives: 250 -Total Receives: 251 -Total Receives: 252 -Total Receives: 253 -Total rewards: 257 -Total Receives: 254 -Stores 66 1 -Total Receives: 255 -Stores 67 1 -Total Receives: 256 -Stores 68 1 -Total Receives: 257 -Stores 69 1 -Total rewards: 262 -Total Receives: 258 -Stores 70 1 -Total Receives: 259 -Stores 71 1 -Total Receives: 260 -Stores 72 1 -Total Receives: 261 -Stores 73 1 -Total Receives: 262 -Stores 74 1 -Total rewards: 265 -Total Receives: 263 -Stores 75 1 -Total Receives: 264 -Stores 76 1 -Total Receives: 265 -Total rewards: 272 -Total Receives: 266 -Stores 77 1 -Total Receives: 267 -Stores 78 1 -Total Receives: 268 -Stores 79 1 -Total Receives: 269 -Stores 80 1 -Total Receives: 270 -Stores 81 1 -Total Receives: 271 -Stores 82 1 -Total Receives: 272 -Stores 83 1 -Total rewards: 275 -Total Receives: 273 -Total Receives: 274 -Total Receives: 275 -Total rewards: 277 -Total Receives: 276 -Total Receives: 277 -Total rewards: 282 -Total Receives: 278 -Stores 84 1 -Total Receives: 279 -Stores 85 1 -Total Receives: 280 -Stores 86 1 -Total Receives: 281 -Stores 87 1 -Total Receives: 282 -Stores 88 1 -Total rewards: 284 -Total Receives: 283 -Stores 89 1 -Total Receives: 284 -Stores 90 1 -Total rewards: 290 -Total Receives: 285 -Stores 91 1 -Total Receives: 286 -Stores 92 1 -Total Receives: 287 -Stores 93 1 -Total Receives: 288 -Stores 94 1 -Task started model_0 -Total rewards: 3 -Total Receives: 1 -Total Receives: 2 -Total Receives: 3 -Total rewards: 5 -Total Receives: 4 -Total Receives: 5 -Total rewards: 10 -Total Receives: 6 -Total Receives: 7 -Total Receives: 8 -Total Receives: 9 -Total Receives: 10 -Total rewards: 11 -Total Receives: 11 -Total rewards: 12 -Total Receives: 12 -Total rewards: 13 -Total Receives: 13 -Total rewards: 14 -Total Receives: 14 -Total rewards: 17 -Total Receives: 15 -Total Receives: 16 -Total Receives: 17 -Total rewards: 22 -Total Receives: 18 -Total Receives: 19 -Total Receives: 20 -Total Receives: 21 -Total Receives: 22 -Total rewards: 27 -Total Receives: 23 -Stores 1 1 -Total Receives: 24 -Total Receives: 25 -Total Receives: 26 -Total Receives: 27 -Total rewards: 32 -Total Receives: 28 -Total Receives: 29 -Total Receives: 30 -Total Receives: 31 -Total Receives: 32 -Total rewards: 33 -Total Receives: 33 -Total rewards: 40 -Total Receives: 34 -Stores 2 1 -Total Receives: 35 -Stores 3 1 -Total Receives: 36 -Total Receives: 37 -Total Receives: 38 -Total Receives: 39 -Total Receives: 40 -Total rewards: 42 -Total Receives: 41 -Total Receives: 42 -Total rewards: 45 -Total Receives: 43 -Total Receives: 44 -Total Receives: 45 -Total rewards: 47 -Total Receives: 46 -Total Receives: 47 -Total rewards: 52 -Total Receives: 48 -Stores 4 1 -Total Receives: 49 -Total Receives: 50 -Total Receives: 51 -Total Receives: 52 -Total rewards: 57 -Total Receives: 53 -Stores 5 1 -Total Receives: 54 -Total Receives: 55 -Total Receives: 56 -Total Receives: 57 -Total rewards: 61 -Total Receives: 58 -Total Receives: 59 -Total Receives: 60 -Total Receives: 61 -Total rewards: 66 -Total Receives: 62 -Total Receives: 63 -Total Receives: 64 -Total Receives: 65 -Total Receives: 66 -Total rewards: 69 -Total Receives: 67 -Total Receives: 68 -Total Receives: 69 -Total rewards: 73 -Total Receives: 70 -Total Receives: 71 -Total Receives: 72 -Total Receives: 73 -Total rewards: 78 -Total Receives: 74 -Stores 6 1 -Total Receives: 75 -Stores 7 1 -Total Receives: 76 -Stores 8 1 -Total Receives: 77 -Stores 9 1 -Total Receives: 78 -Stores 10 1 -Total rewards: 83 -Total Receives: 79 -Total Receives: 80 -Total Receives: 81 -Total Receives: 82 -Total Receives: 83 -Total rewards: 85 -Total Receives: 84 -Total Receives: 85 -Total rewards: 88 -Total Receives: 86 -Stores 11 1 -Total Receives: 87 -Stores 12 1 -Total Receives: 88 -Stores 13 1 -Total rewards: 92 -Total Receives: 89 -Stores 14 1 -Total Receives: 90 -Stores 15 1 -Total Receives: 91 -Stores 16 1 -Total Receives: 92 -Stores 17 1 -Total rewards: 95 -Total Receives: 93 -Stores 18 1 -Total Receives: 94 -Stores 19 1 -Total Receives: 95 -Stores 20 1 -Total rewards: 98 -Total Receives: 96 -Total Receives: 97 -Total Receives: 98 -Total rewards: 102 -Total Receives: 99 -Total Receives: 100 -Total Receives: 101 -Total Receives: 102 -Total rewards: 105 -Total Receives: 103 -Total Receives: 104 -Total Receives: 105 -Total rewards: 108 -Total Receives: 106 -Total Receives: 107 -Total Receives: 108 -Total rewards: 112 -Total Receives: 109 -Total Receives: 110 -Total Receives: 111 -Total Receives: 112 -Total rewards: 115 -Total Receives: 113 -Stores 21 1 -Total Receives: 114 -Total Receives: 115 -Total rewards: 118 -Total Receives: 116 -Total Receives: 117 -Total Receives: 118 -Total rewards: 124 -Total Receives: 119 -Stores 22 1 -Total Receives: 120 -Stores 23 1 -Total Receives: 121 -Stores 24 1 -Total Receives: 122 -Stores 25 1 -Total Receives: 123 -Stores 26 1 -Total Receives: 124 -Stores 27 1 -Total rewards: 128 -Total Receives: 125 -Total Receives: 126 -Total Receives: 127 -Total Receives: 128 -Total rewards: 133 -Total Receives: 129 -Total Receives: 130 -Total Receives: 131 -Total Receives: 132 -Total Receives: 133 -Total rewards: 140 -Total Receives: 134 -Stores 28 1 -Total Receives: 135 -Total Receives: 136 -Total Receives: 137 -Total Receives: 138 -Total Receives: 139 -Total Receives: 140 -Total rewards: 146 -Total Receives: 141 -Total Receives: 142 -Total Receives: 143 -Total Receives: 144 -Total Receives: 145 -Total Receives: 146 -Total rewards: 153 -Total Receives: 147 -Stores 29 1 -Total Receives: 148 -Total Receives: 149 -Total Receives: 150 -Total Receives: 151 -Total Receives: 152 -Total Receives: 153 -Total rewards: 157 -Total Receives: 154 -Total Receives: 155 -Total Receives: 156 -Total Receives: 157 -Total rewards: 162 -Total Receives: 158 -Total Receives: 159 -Total Receives: 160 -Total Receives: 161 -Total Receives: 162 -Total rewards: 168 -Total Receives: 163 -Stores 30 1 -Total Receives: 164 -Stores 31 1 -Total Receives: 165 -Stores 32 1 -Total Receives: 166 -Total Receives: 167 -Total Receives: 168 -Total rewards: 174 -Total Receives: 169 -Stores 33 1 -Total Receives: 170 -Stores 34 1 -Total Receives: 171 -Stores 35 1 -Total Receives: 172 -Stores 36 1 -Total Receives: 173 -Stores 37 1 -Total Receives: 174 -Stores 38 1 -Total rewards: 177 -Total Receives: 175 -Total Receives: 176 -Total Receives: 177 -Total rewards: 180 -Total Receives: 178 -Stores 39 1 -Total Receives: 179 -Stores 40 1 -Total Receives: 180 -Stores 41 1 -Total rewards: 182 -Total Receives: 181 -Total Receives: 182 -Total rewards: 187 -Total Receives: 183 -Total Receives: 184 -Total Receives: 185 -Total Receives: 186 -Total Receives: 187 -Total rewards: 190 -Total Receives: 188 -Total Receives: 189 -Total Receives: 190 -Total rewards: 195 -Total Receives: 191 -Total Receives: 192 -Total Receives: 193 -Total Receives: 194 -Total Receives: 195 -Total rewards: 196 -Total Receives: 196 -Stores 42 1 -Total rewards: 198 -Total Receives: 197 -Stores 43 1 -Total Receives: 198 -Stores 44 1 -Total rewards: 201 -Total Receives: 199 -Total Receives: 200 -Total Receives: 201 -Total rewards: 208 -Total Receives: 202 -Stores 45 1 -Total Receives: 203 -Stores 46 1 -Total Receives: 204 -Stores 47 1 -Total Receives: 205 -Stores 48 1 -Total Receives: 206 -Stores 49 1 -Total Receives: 207 -Stores 50 1 -Total Receives: 208 -Stores 51 1 -Total rewards: 211 -Total Receives: 209 -Stores 52 1 -Total Receives: 210 -Stores 53 1 -Total Receives: 211 -Stores 54 1 -Total rewards: 215 -Total Receives: 212 -Total Receives: 213 -Total Receives: 214 -Total Receives: 215 -Total rewards: 220 -Total Receives: 216 -Stores 55 1 -Total Receives: 217 -Stores 56 1 -Total Receives: 218 -Stores 57 1 -Total Receives: 219 -Stores 58 1 -Total Receives: 220 -Stores 59 1 -Total rewards: 224 -Total Receives: 221 -Total Receives: 222 -Total Receives: 223 -Total Receives: 224 -Total rewards: 228 -Total Receives: 225 -Total Receives: 226 -Total Receives: 227 -Total Receives: 228 -Total rewards: 231 -Total Receives: 229 -Stores 60 1 -Total Receives: 230 -Stores 61 1 -Total Receives: 231 -Stores 62 1 -Total rewards: 236 -Total Receives: 232 -Total Receives: 233 -Total Receives: 234 -Total Receives: 235 -Total Receives: 236 -Total rewards: 239 -Total Receives: 237 -Stores 63 1 -Total Receives: 238 -Stores 64 1 -Total Receives: 239 -Stores 65 1 -Total rewards: 244 -Total Receives: 240 -Total Receives: 241 -Total Receives: 242 -Total Receives: 243 -Total Receives: 244 -Total rewards: 249 -Total Receives: 245 -Total Receives: 246 -Total Receives: 247 -Total Receives: 248 -Total Receives: 249 -Total rewards: 253 -Total Receives: 250 -Total Receives: 251 -Total Receives: 252 -Total Receives: 253 -Total rewards: 257 -Total Receives: 254 -Stores 66 1 -Total Receives: 255 -Stores 67 1 -Total Receives: 256 -Stores 68 1 -Total Receives: 257 -Stores 69 1 -Total rewards: 262 -Total Receives: 258 -Stores 70 1 -Total Receives: 259 -Stores 71 1 -Total Receives: 260 -Stores 72 1 -Total Receives: 261 -Stores 73 1 -Total Receives: 262 -Stores 74 1 -Total rewards: 265 -Total Receives: 263 -Stores 75 1 -Total Receives: 264 -Stores 76 1 -Total Receives: 265 -Total rewards: 272 -Total Receives: 266 -Stores 77 1 -Total Receives: 267 -Stores 78 1 -Total Receives: 268 -Stores 79 1 -Total Receives: 269 -Stores 80 1 -Total Receives: 270 -Stores 81 1 -Total Receives: 271 -Stores 82 1 -Total Receives: 272 -Stores 83 1 -Total rewards: 275 -Total Receives: 273 -Total Receives: 274 -Total Receives: 275 -Total rewards: 277 -Total Receives: 276 -Total Receives: 277 -Total rewards: 282 -Total Receives: 278 -Stores 84 1 -Total Receives: 279 -Stores 85 1 -Total Receives: 280 -Stores 86 1 -Total Receives: 281 -Stores 87 1 -Total Receives: 282 -Stores 88 1 -Total rewards: 284 -Total Receives: 283 -Stores 89 1 -Total Receives: 284 -Stores 90 1 -Total rewards: 290 -Total Receives: 285 -Stores 91 1 -Total Receives: 286 -Stores 92 1 -Total Receives: 287 -Stores 93 1 -Total Receives: 288 -Stores 94 1 -Stores 91 1 -Total Receives: 286 -Stores 92 1 -Total Receives: 287 -Stores 93 1 -Total Receives: 288 -Stores 94 1 -Total Receives: 289 -Stores 95 1 -Total Receives: 290 -Stores 96 1 -Train step: 1.2874603271484375e-05 Optimizer Step: 0 -Total rewards: 293 -Total Receives: 291 -Stores 97 1 -Total Receives: 292 -Stores 98 1 -Total Receives: 293 -Stores 99 1 -Total rewards: 299 -Total Receives: 294 -Stores 100 1 -Total Receives: 295 -Stores 101 1 -Total Receives: 296 -Stores 102 1 -Total Receives: 297 -Stores 103 1 -Total Receives: 298 -Stores 104 1 -Total Receives: 299 -Stores 105 1 -Total rewards: 303 -Total Receives: 300 -Stores 106 1 -Total Receives: 301 -Stores 107 1 -Total Receives: 302 -Stores 108 1 -Total Receives: 303 -Stores 109 1 -Total rewards: 306 -Total Receives: 304 -Stores 110 1 -Total Receives: 305 -Stores 111 1 -Total Receives: 306 -Stores 112 1 -Total rewards: 310 -Total Receives: 307 -Stores 113 1 -Total Receives: 308 -Stores 114 1 -Total Receives: 309 -Stores 115 1 -Total Receives: 310 -Stores 116 1 -Total rewards: 311 -Total Receives: 311 -Stores 117 1 -Total rewards: 317 -Total Receives: 312 -Stores 118 1 -Total Receives: 313 -Stores 119 1 -Total Receives: 314 -Stores 120 1 -Total Receives: 315 -Stores 121 1 -Total Receives: 316 -Stores 122 1 -Total Receives: 317 -Stores 123 1 -Total rewards: 321 -Total Receives: 318 -Stores 124 1 -Total Receives: 319 -Stores 125 1 -Total Receives: 320 -Stores 126 1 -Total Receives: 321 -Stores 127 1 -Total rewards: 325 -Total Receives: 322 -Stores 128 1 -loss: 0.08340933918952942, td_error: 0.08340933918952942, entropy: 4.852001190185547 -Train step: 0.411268949508667 Optimizer Step: 1 -Total Receives: 323 -Stores 129 1 -Total Receives: 324 -Stores 130 1 -Total Receives: 325 -Stores 131 1 -Total rewards: 329 -Total Receives: 326 -Stores 132 1 -Total Receives: 327 -Stores 133 1 -Total Receives: 328 -Stores 134 1 -Total Receives: 329 -Stores 135 1 -Total rewards: 334 -Total Receives: 330 -Stores 136 1 -Total Receives: 331 -Stores 137 1 -Total Receives: 332 -Stores 138 1 -Total Receives: 333 -Stores 139 1 -Total Receives: 334 -Stores 140 1 -Total rewards: 342 -Total Receives: 335 -Stores 141 1 -Total Receives: 336 -Stores 142 1 -Total Receives: 337 -Stores 143 1 -Total Receives: 338 -Stores 144 1 -Total Receives: 339 -Stores 145 1 -Total Receives: 340 -Stores 146 1 -Total Receives: 341 -Stores 147 1 -Total Receives: 342 -Stores 148 1 -Total rewards: 347 -Total Receives: 343 -Stores 149 1 -Total Receives: 344 -Stores 150 1 -Total Receives: 345 -Stores 151 1 -Total Receives: 346 -Stores 152 1 -Total Receives: 347 -Stores 153 1 -Total rewards: 352 -Total Receives: 348 -Stores 154 1 -Total Receives: 349 -Stores 155 1 -Total Receives: 350 -Stores 156 1 -Total Receives: 351 -Stores 157 1 -Total Receives: 352 -Stores 158 1 -Total rewards: 356 -Total Receives: 353 -Stores 159 1 -Total Receives: 354 -Stores 160 1 -loss: 1907.040283203125, td_error: 810966.5625, entropy: 0.00013814149133395404 -Train step: 0.4413599967956543 Optimizer Step: 2 -Total Receives: 355 -Stores 161 1 -Total Receives: 356 -Stores 162 1 -Total rewards: 362 -Total Receives: 357 -Stores 163 1 -Total Receives: 358 -Stores 164 1 -Total Receives: 359 -Stores 165 1 -Total Receives: 360 -Stores 166 1 -Total Receives: 361 -Stores 167 1 -Total Receives: 362 -Stores 168 1 -Total rewards: 367 -Total Receives: 363 -Stores 169 1 -Total Receives: 364 -Stores 170 1 -Total Receives: 365 -Stores 171 1 -Total Receives: 366 -Stores 172 1 -Total Receives: 367 -Stores 173 1 -Total rewards: 371 -Total Receives: 368 -Stores 174 1 -Total Receives: 369 -Stores 175 1 -Total Receives: 370 -Stores 176 1 -Total Receives: 371 -Stores 177 1 -Total rewards: 373 -Total Receives: 372 -Stores 178 1 -Total Receives: 373 -Stores 179 1 -Total rewards: 379 -Total Receives: 374 -Stores 180 1 -Total Receives: 375 -Stores 181 1 -Total Receives: 376 -Stores 182 1 -Total Receives: 377 -Stores 183 1 -Total Receives: 378 -Stores 184 1 -Total Receives: 379 -Stores 185 1 -Total rewards: 382 -Total Receives: 380 -Stores 186 1 -Total Receives: 381 -Stores 187 1 -Total Receives: 382 -Stores 188 1 -Total rewards: 388 -Total Receives: 383 -Stores 189 1 -Total Receives: 384 -Stores 190 1 -Total Receives: 385 -Stores 191 1 -Total Receives: 386 -Stores 192 1 -loss: 0.0073743294924497604, td_error: 697.4981079101562, entropy: 1.7356938123703003 -Train step: 0.5191249847412109 Optimizer Step: 3 -Total Receives: 387 -Stores 193 1 -Total Receives: 388 -Stores 194 1 -Total rewards: 394 -Total Receives: 389 -Stores 195 1 -Total Receives: 390 -Stores 196 1 -Total Receives: 391 -Stores 197 1 -Total Receives: 392 -Stores 198 1 -Total Receives: 393 -Total Receives: 394 -Total rewards: 396 -Total Receives: 395 -Stores 199 1 -Total Receives: 396 -Stores 200 1 -Total rewards: 402 -Total Receives: 397 -Stores 201 1 -Total Receives: 398 -Stores 202 1 -Total Receives: 399 -Stores 203 1 -Total Receives: 400 -Stores 204 1 -Total Receives: 401 -Stores 205 1 -Total Receives: 402 -Stores 206 1 -Total rewards: 406 -Total Receives: 403 -Stores 207 1 -Total Receives: 404 -Stores 208 1 -Total Receives: 405 -Stores 209 1 -Total Receives: 406 -Stores 210 1 -Total rewards: 407 -Total Receives: 407 -Total rewards: 411 -Total Receives: 408 -Stores 211 1 -Total Receives: 409 -Stores 212 1 -Total Receives: 410 -Stores 213 1 -Total Receives: 411 -Stores 214 1 -Total rewards: 416 -Total Receives: 412 -Stores 215 1 -Total Receives: 413 -Stores 216 1 -Total Receives: 414 -Stores 217 1 -Total Receives: 415 -Stores 218 1 -Total Receives: 416 -Stores 219 1 -Total rewards: 422 -Total Receives: 417 -Stores 220 1 -Total Receives: 418 -Stores 221 1 -Total Receives: 419 -Stores 222 1 -Total Receives: 420 -Stores 223 1 -Total Receives: 421 -Stores 224 1 -loss: 0.00037273133057169616, td_error: 40.21186065673828, entropy: 4.048640727996826 -Train step: 0.4018220901489258 Optimizer Step: 4 -Total Receives: 422 -Stores 225 1 -Total rewards: 427 -Total Receives: 423 -Stores 226 1 -Total Receives: 424 -Stores 227 1 -Total Receives: 425 -Stores 228 1 -Total Receives: 426 -Stores 229 1 -Total Receives: 427 -Stores 230 1 -Total rewards: 430 -Total Receives: 428 -Stores 231 1 -Total Receives: 429 -Stores 232 1 -Total Receives: 430 -Stores 233 1 -Total rewards: 434 -Total Receives: 431 -Stores 234 1 -Total Receives: 432 -Stores 235 1 -Total Receives: 433 -Stores 236 1 -Total Receives: 434 -Stores 237 1 -Total rewards: 435 -Total Receives: 435 -Stores 238 1 -Total rewards: 437 -Total Receives: 436 -Stores 239 1 -Total Receives: 437 -Stores 240 1 -Total rewards: 440 -Total Receives: 438 -Stores 241 1 -Total Receives: 439 -Stores 242 1 -Total Receives: 440 -Stores 243 1 -Total rewards: 444 -Total Receives: 441 -Stores 244 1 -Total Receives: 442 -Stores 245 1 -Total Receives: 443 -Stores 246 1 -Total Receives: 444 -Stores 247 1 -Total rewards: 450 -Total Receives: 445 -Stores 248 1 -Total Receives: 446 -Stores 249 1 -Total Receives: 447 -Stores 250 1 -Total Receives: 448 -Stores 251 1 -Total Receives: 449 -Stores 252 1 -Total Receives: 450 -Stores 253 1 -Total rewards: 452 -Total Receives: 451 -Stores 254 1 -Total Receives: 452 -Stores 255 1 -Total rewards: 455 -Total Receives: 453 -Stores 256 1 -loss: 0.0019009055104106665, td_error: 184.94607543945312, entropy: 1.371143102645874 -Train step: 0.48240113258361816 Optimizer Step: 5 -Total Receives: 454 -Stores 257 1 -Total Receives: 455 -Stores 258 1 -Total rewards: 461 -Total Receives: 456 -Stores 259 1 -Total Receives: 457 -Stores 260 1 -Total Receives: 458 -Stores 261 1 -Total Receives: 459 -Stores 262 1 -Total Receives: 460 -Stores 263 1 -Total Receives: 461 -Stores 264 1 -Total rewards: 465 -Total Receives: 462 -Stores 265 1 -Total Receives: 463 -Stores 266 1 -Total Receives: 464 -Stores 267 1 -Total Receives: 465 -Stores 268 1 -Total rewards: 469 -Total Receives: 466 -Stores 269 1 -Total Receives: 467 -Stores 270 1 -Total Receives: 468 -Stores 271 1 -Total Receives: 469 -Stores 272 1 -Total rewards: 472 -Total Receives: 470 -Stores 273 1 -Total Receives: 471 -Stores 274 1 -Total Receives: 472 -Stores 275 1 -Total rewards: 478 -Total Receives: 473 -Stores 276 1 -Total Receives: 474 -Stores 277 1 -Total Receives: 475 -Stores 278 1 -Total Receives: 476 -Stores 279 1 -Total Receives: 477 -Stores 280 1 -Total Receives: 478 -Stores 281 1 -Total rewards: 482 -Total Receives: 479 -Stores 282 1 -Total Receives: 480 -Stores 283 1 -Total Receives: 481 -Stores 284 1 -Total Receives: 482 -Stores 285 1 -Total rewards: 485 -Total Receives: 483 -Stores 286 1 -Total Receives: 484 -Stores 287 1 -Total Receives: 485 -Stores 288 1 -Stores 91 1 -Total Receives: 286 -Stores 92 1 -Total Receives: 287 -Stores 93 1 -Total Receives: 288 -Stores 94 1 -Total Receives: 289 -Stores 95 1 -Total Receives: 290 -Stores 96 1 -Train step: 1.1920928955078125e-05 Optimizer Step: 0 -Total rewards: 293 -Total Receives: 291 -Stores 97 1 -Total Receives: 292 -Stores 98 1 -Total Receives: 293 -Stores 99 1 -Total rewards: 299 -Total Receives: 294 -Stores 100 1 -Total Receives: 295 -Stores 101 1 -Total Receives: 296 -Stores 102 1 -Total Receives: 297 -Stores 103 1 -Total Receives: 298 -Stores 104 1 -Total Receives: 299 -Stores 105 1 -Total rewards: 303 -Total Receives: 300 -Stores 106 1 -Total Receives: 301 -Stores 107 1 -Total Receives: 302 -Stores 108 1 -Total Receives: 303 -Stores 109 1 -Total rewards: 306 -Total Receives: 304 -Stores 110 1 -Total Receives: 305 -Stores 111 1 -Total Receives: 306 -Stores 112 1 -Total rewards: 310 -Total Receives: 307 -Stores 113 1 -Total Receives: 308 -Stores 114 1 -Total Receives: 309 -Stores 115 1 -Total Receives: 310 -Stores 116 1 -Total rewards: 311 -Total Receives: 311 -Stores 117 1 -Total rewards: 317 -Total Receives: 312 -Stores 118 1 -Total Receives: 313 -Stores 119 1 -Total Receives: 314 -Stores 120 1 -Total Receives: 315 -Stores 121 1 -Total Receives: 316 -Stores 122 1 -Total Receives: 317 -Stores 123 1 -Total rewards: 321 -Total Receives: 318 -Stores 124 1 -Total Receives: 319 -Stores 125 1 -Total Receives: 320 -Stores 126 1 -Total Receives: 321 -Stores 127 1 -Total rewards: 325 -Total Receives: 322 -Stores 128 1 -loss: 0.07794616371393204, td_error: 0.07794616371393204, entropy: 4.851358890533447 -Train step: 0.34377598762512207 Optimizer Step: 1 -Total Receives: 323 -Stores 129 1 -Total Receives: 324 -Stores 130 1 -Total Receives: 325 -Stores 131 1 -Total rewards: 329 -Total Receives: 326 -Stores 132 1 -Total Receives: 327 -Stores 133 1 -Total Receives: 328 -Stores 134 1 -Total Receives: 329 -Stores 135 1 -Total rewards: 334 -Total Receives: 330 -Stores 136 1 -Total Receives: 331 -Stores 137 1 -Total Receives: 332 -Stores 138 1 -Total Receives: 333 -Stores 139 1 -Total Receives: 334 -Stores 140 1 -Total rewards: 342 -Total Receives: 335 -Stores 141 1 -Total Receives: 336 -Stores 142 1 -Total Receives: 337 -Stores 143 1 -Total Receives: 338 -Stores 144 1 -Total Receives: 339 -Stores 145 1 -Total Receives: 340 -Stores 146 1 -Total Receives: 341 -Stores 147 1 -Total Receives: 342 -Stores 148 1 -Total rewards: 347 -Total Receives: 343 -Stores 149 1 -Total Receives: 344 -Stores 150 1 -Total Receives: 345 -Stores 151 1 -Total Receives: 346 -Stores 152 1 -Total Receives: 347 -Stores 153 1 -Total rewards: 352 -Total Receives: 348 -Stores 154 1 -Total Receives: 349 -Stores 155 1 -Total Receives: 350 -Stores 156 1 -Total Receives: 351 -Stores 157 1 -Total Receives: 352 -Stores 158 1 -Total rewards: 356 -Total Receives: 353 -Stores 159 1 -Total Receives: 354 -Stores 160 1 -loss: 13875.1416015625, td_error: 8074478.5, entropy: 1.9311757630685056e-20 -Train step: 0.46463513374328613 Optimizer Step: 2 -Total Receives: 355 -Stores 161 1 -Total Receives: 356 -Stores 162 1 -Total rewards: 362 -Total Receives: 357 -Stores 163 1 -Total Receives: 358 -Stores 164 1 -Total Receives: 359 -Stores 165 1 -Total Receives: 360 -Stores 166 1 -Total Receives: 361 -Stores 167 1 -Total Receives: 362 -Stores 168 1 -Total rewards: 367 -Total Receives: 363 -Stores 169 1 -Total Receives: 364 -Stores 170 1 -Total Receives: 365 -Stores 171 1 -Total Receives: 366 -Stores 172 1 -Total Receives: 367 -Stores 173 1 -Total rewards: 371 -Total Receives: 368 -Stores 174 1 -Total Receives: 369 -Stores 175 1 -Total Receives: 370 -Stores 176 1 -Total Receives: 371 -Stores 177 1 -Total rewards: 373 -Total Receives: 372 -Stores 178 1 -Total Receives: 373 -Stores 179 1 -Total rewards: 379 -Total Receives: 374 -Stores 180 1 -Total Receives: 375 -Stores 181 1 -Total Receives: 376 -Stores 182 1 -Total Receives: 377 -Stores 183 1 -Total Receives: 378 -Stores 184 1 -Total Receives: 379 -Stores 185 1 -Total rewards: 382 -Total Receives: 380 -Stores 186 1 -Total Receives: 381 -Stores 187 1 -Total Receives: 382 -Stores 188 1 -Total rewards: 388 -Total Receives: 383 -Stores 189 1 -Total Receives: 384 -Stores 190 1 -Total Receives: 385 -Stores 191 1 -Total Receives: 386 -Stores 192 1 -loss: 0.09413406997919083, td_error: 27814.00390625, entropy: 9.392795618623495e-06 -Train step: 0.4566471576690674 Optimizer Step: 3 -Total Receives: 387 -Stores 193 1 -Total Receives: 388 -Stores 194 1 -Total rewards: 394 -Total Receives: 389 -Stores 195 1 -Total Receives: 390 -Stores 196 1 -Total Receives: 391 -Stores 197 1 -Total Receives: 392 -Stores 198 1 -Total Receives: 393 -Total Receives: 394 -Total rewards: 396 -Total Receives: 395 -Stores 199 1 -Total Receives: 396 -Stores 200 1 -Total rewards: 402 -Total Receives: 397 -Stores 201 1 -Total Receives: 398 -Stores 202 1 -Total Receives: 399 -Stores 203 1 -Total Receives: 400 -Stores 204 1 -Total Receives: 401 -Stores 205 1 -Total Receives: 402 -Stores 206 1 -Total rewards: 406 -Total Receives: 403 -Stores 207 1 -Total Receives: 404 -Stores 208 1 -Total Receives: 405 -Stores 209 1 -Total Receives: 406 -Stores 210 1 -Total rewards: 407 -Total Receives: 407 -Total rewards: 411 -Total Receives: 408 -Stores 211 1 -Total Receives: 409 -Stores 212 1 -Total Receives: 410 -Stores 213 1 -Total Receives: 411 -Stores 214 1 -Total rewards: 416 -Total Receives: 412 -Stores 215 1 -Total Receives: 413 -Stores 216 1 -Total Receives: 414 -Stores 217 1 -Total Receives: 415 -Stores 218 1 -Total Receives: 416 -Stores 219 1 -Total rewards: 422 -Total Receives: 417 -Stores 220 1 -Total Receives: 418 -Stores 221 1 -Total Receives: 419 -Stores 222 1 -Total Receives: 420 -Stores 223 1 -Total Receives: 421 -Stores 224 1 -loss: 0.03133144602179527, td_error: 8952.923828125, entropy: 1.4172817468643188 -Train step: 0.43485307693481445 Optimizer Step: 4 -Total Receives: 422 -Stores 225 1 -Total rewards: 427 -Total Receives: 423 -Stores 226 1 -Total Receives: 424 -Stores 227 1 -Total Receives: 425 -Stores 228 1 -Total Receives: 426 -Stores 229 1 -Total Receives: 427 -Stores 230 1 -Total rewards: 430 -Total Receives: 428 -Stores 231 1 -Total Receives: 429 -Stores 232 1 -Total Receives: 430 -Stores 233 1 -Total rewards: 434 -Total Receives: 431 -Stores 234 1 -Total Receives: 432 -Stores 235 1 -Total Receives: 433 -Stores 236 1 -Total Receives: 434 -Stores 237 1 -Total rewards: 435 -Total Receives: 435 -Stores 238 1 -Total rewards: 437 -Total Receives: 436 -Stores 239 1 -Total Receives: 437 -Stores 240 1 -Total rewards: 440 -Total Receives: 438 -Stores 241 1 -Total Receives: 439 -Stores 242 1 -Total Receives: 440 -Stores 243 1 -Total rewards: 444 -Total Receives: 441 -Stores 244 1 -Total Receives: 442 -Stores 245 1 -Total Receives: 443 -Stores 246 1 -Total Receives: 444 -Stores 247 1 -Total rewards: 450 -Total Receives: 445 -Stores 248 1 -Total Receives: 446 -Stores 249 1 -Total Receives: 447 -Stores 250 1 -Total Receives: 448 -Stores 251 1 -Total Receives: 449 -Stores 252 1 -Total Receives: 450 -Stores 253 1 -Total rewards: 452 -Total Receives: 451 -Stores 254 1 -Total Receives: 452 -Stores 255 1 -Total rewards: 455 -Total Receives: 453 -Stores 256 1 -loss: 0.04698452353477478, td_error: 15415.271484375, entropy: 1.38629150390625 -Train step: 0.47814488410949707 Optimizer Step: 5 -Total Receives: 454 -Stores 257 1 -Total Receives: 455 -Stores 258 1 -Total rewards: 461 -Total Receives: 456 -Stores 259 1 -Total Receives: 457 -Stores 260 1 -Total Receives: 458 -Stores 261 1 -Total Receives: 459 -Stores 262 1 -Total Receives: 460 -Stores 263 1 -Total Receives: 461 -Stores 264 1 -Total rewards: 465 -Total Receives: 462 -Stores 265 1 -Total Receives: 463 -Stores 266 1 -Total Receives: 464 -Stores 267 1 -Total Receives: 465 -Stores 268 1 -Total rewards: 469 -Total Receives: 466 -Stores 269 1 -Total Receives: 467 -Stores 270 1 -Total Receives: 468 -Stores 271 1 -Total Receives: 469 -Stores 272 1 -Total rewards: 472 -Total Receives: 470 -Stores 273 1 -Total Receives: 471 -Stores 274 1 -Total Receives: 472 -Stores 275 1 -Total rewards: 478 -Total Receives: 473 -Stores 276 1 -Total Receives: 474 -Stores 277 1 -Total Receives: 475 -Stores 278 1 -Total Receives: 476 -Stores 279 1 -Total Receives: 477 -Stores 280 1 -Total Receives: 478 -Stores 281 1 -Total rewards: 482 -Total Receives: 479 -Stores 282 1 -Total Receives: 480 -Stores 283 1 -Total Receives: 481 -Stores 284 1 -Total Receives: 482 -Stores 285 1 -Total rewards: 485 -Total Receives: 483 -Stores 286 1 -Total Receives: 484 -Stores 287 1 -Total Receives: 485 -Stores 288 1 -Total Receives: 289 -Stores 95 1 -Total Receives: 290 -Stores 96 1 -Total rewards: 293 -Total Receives: 291 -Stores 97 1 -Total Receives: 292 -Stores 98 1 -Total Receives: 293 -Stores 99 1 -Total rewards: 299 -Total Receives: 294 -Stores 100 1 -Total Receives: 295 -Stores 101 1 -Total Receives: 296 -Stores 102 1 -Total Receives: 297 -Stores 103 1 -Total Receives: 298 -Stores 104 1 -Total Receives: 299 -Stores 105 1 -Total rewards: 303 -Total Receives: 300 -Stores 106 1 -Total Receives: 301 -Stores 107 1 -Total Receives: 302 -Stores 108 1 -Total Receives: 303 -Stores 109 1 -Total rewards: 306 -Total Receives: 304 -Stores 110 1 -Total Receives: 305 -Stores 111 1 -Total Receives: 306 -Stores 112 1 -Total rewards: 310 -Total Receives: 307 -Stores 113 1 -Total Receives: 308 -Stores 114 1 -Total Receives: 309 -Stores 115 1 -Total Receives: 310 -Stores 116 1 -Total rewards: 311 -Total Receives: 311 -Stores 117 1 -Total rewards: 317 -Total Receives: 312 -Stores 118 1 -Total Receives: 313 -Stores 119 1 -Total Receives: 314 -Stores 120 1 -Total Receives: 315 -Stores 121 1 -Total Receives: 316 -Stores 122 1 -Total Receives: 317 -Stores 123 1 -Total rewards: 321 -Total Receives: 318 -Stores 124 1 -Total Receives: 319 -Stores 125 1 -Total Receives: 320 -Stores 126 1 -Total Receives: 321 -Stores 127 1 -Total rewards: 325 -Total Receives: 322 -Stores 128 1 -Total Receives: 323 -Stores 129 1 -Total Receives: 324 -Stores 130 1 -Total Receives: 325 -Stores 131 1 -Total rewards: 329 -Total Receives: 326 -Stores 132 1 -Total Receives: 327 -Stores 133 1 -Total Receives: 328 -Stores 134 1 -Total Receives: 329 -Stores 135 1 -Total rewards: 334 -Total Receives: 330 -Stores 136 1 -Total Receives: 331 -Stores 137 1 -Total Receives: 332 -Stores 138 1 -Total Receives: 333 -Stores 139 1 -Total Receives: 334 -Stores 140 1 -Total rewards: 342 -Total Receives: 335 -Stores 141 1 -Total Receives: 336 -Stores 142 1 -Total Receives: 337 -Stores 143 1 -Total Receives: 338 -Stores 144 1 -Total Receives: 339 -Stores 145 1 -Total Receives: 340 -Stores 146 1 -Total Receives: 341 -Stores 147 1 -Total Receives: 342 -Stores 148 1 -Total rewards: 347 -Total Receives: 343 -Stores 149 1 -Total Receives: 344 -Stores 150 1 -Total Receives: 345 -Stores 151 1 -Total Receives: 346 -Stores 152 1 -Total Receives: 347 -Stores 153 1 -Total rewards: 352 -Total Receives: 348 -Stores 154 1 -Total Receives: 349 -Stores 155 1 -Total Receives: 350 -Stores 156 1 -Total Receives: 351 -Stores 157 1 -Total Receives: 352 -Stores 158 1 -Total rewards: 356 -Total Receives: 353 -Stores 159 1 -Total Receives: 354 -Stores 160 1 -Total Receives: 355 -Stores 161 1 -Total Receives: 356 -Stores 162 1 -Total rewards: 362 -Total Receives: 357 -Stores 163 1 -Total Receives: 358 -Stores 164 1 -Total Receives: 359 -Stores 165 1 -Total Receives: 360 -Stores 166 1 -Total Receives: 361 -Stores 167 1 -Total Receives: 362 -Stores 168 1 -Total rewards: 367 -Total Receives: 363 -Stores 169 1 -Total Receives: 364 -Stores 170 1 -Total Receives: 365 -Stores 171 1 -Total Receives: 366 -Stores 172 1 -Total Receives: 367 -Stores 173 1 -Total rewards: 371 -Total Receives: 368 -Stores 174 1 -Total Receives: 369 -Stores 175 1 -Total Receives: 370 -Stores 176 1 -Total Receives: 371 -Stores 177 1 -Total rewards: 373 -Total Receives: 372 -Stores 178 1 -Total Receives: 373 -Stores 179 1 -Total rewards: 379 -Total Receives: 374 -Stores 180 1 -Total Receives: 375 -Stores 181 1 -Total Receives: 376 -Stores 182 1 -Total Receives: 377 -Stores 183 1 -Total Receives: 378 -Stores 184 1 -Total Receives: 379 -Stores 185 1 -Total rewards: 382 -Total Receives: 380 -Stores 186 1 -Total Receives: 381 -Stores 187 1 -Total Receives: 382 -Stores 188 1 -Total rewards: 388 -Total Receives: 383 -Stores 189 1 -Total Receives: 384 -Stores 190 1 -Total Receives: 385 -Stores 191 1 -Total Receives: 386 -Stores 192 1 -Total Receives: 387 -Stores 193 1 -Total Receives: 388 -Stores 194 1 -Total rewards: 394 -Total Receives: 389 -Stores 195 1 -Total Receives: 390 -Stores 196 1 -Total Receives: 391 -Stores 197 1 -Total Receives: 392 -Stores 198 1 -Total Receives: 393 -Total Receives: 394 -Total rewards: 396 -Total Receives: 395 -Stores 199 1 -Total Receives: 396 -Stores 200 1 -Total rewards: 402 -Total Receives: 397 -Stores 201 1 -Total Receives: 398 -Stores 202 1 -Total Receives: 399 -Stores 203 1 -Total Receives: 400 -Stores 204 1 -Total Receives: 401 -Stores 205 1 -Total Receives: 402 -Stores 206 1 -Total rewards: 406 -Total Receives: 403 -Stores 207 1 -Total Receives: 404 -Stores 208 1 -Total Receives: 405 -Stores 209 1 -Total Receives: 406 -Stores 210 1 -Total rewards: 407 -Total Receives: 407 -Total rewards: 411 -Total Receives: 408 -Stores 211 1 -Total Receives: 409 -Stores 212 1 -Total Receives: 410 -Stores 213 1 -Total Receives: 411 -Stores 214 1 -Total rewards: 416 -Total Receives: 412 -Stores 215 1 -Total Receives: 413 -Stores 216 1 -Total Receives: 414 -Stores 217 1 -Total Receives: 415 -Stores 218 1 -Total Receives: 416 -Stores 219 1 -Total rewards: 422 -Total Receives: 417 -Stores 220 1 -Total Receives: 418 -Stores 221 1 -Total Receives: 419 -Stores 222 1 -Total Receives: 420 -Stores 223 1 -Total Receives: 421 -Stores 224 1 -Total Receives: 422 -Stores 225 1 -Total rewards: 427 -Total Receives: 423 -Stores 226 1 -Total Receives: 424 -Stores 227 1 -Total Receives: 425 -Stores 228 1 -Total Receives: 426 -Stores 229 1 -Total Receives: 427 -Stores 230 1 -Total rewards: 430 -Total Receives: 428 -Stores 231 1 -Total Receives: 429 -Stores 232 1 -Total Receives: 430 -Stores 233 1 -Total rewards: 434 -Total Receives: 431 -Stores 234 1 -Total Receives: 432 -Stores 235 1 -Total Receives: 433 -Stores 236 1 -Total Receives: 434 -Stores 237 1 -Total rewards: 435 -Total Receives: 435 -Stores 238 1 -Total rewards: 437 -Total Receives: 436 -Stores 239 1 -Total Receives: 437 -Stores 240 1 -Total rewards: 440 -Total Receives: 438 -Stores 241 1 -Total Receives: 439 -Stores 242 1 -Total Receives: 440 -Stores 243 1 -Total rewards: 444 -Total Receives: 441 -Stores 244 1 -Total Receives: 442 -Stores 245 1 -Total Receives: 443 -Stores 246 1 -Total Receives: 444 -Stores 247 1 -Total rewards: 450 -Total Receives: 445 -Stores 248 1 -Total Receives: 446 -Stores 249 1 -Total Receives: 447 -Stores 250 1 -Total Receives: 448 -Stores 251 1 -Total Receives: 449 -Stores 252 1 -Total Receives: 450 -Stores 253 1 -Total rewards: 452 -Total Receives: 451 -Stores 254 1 -Total Receives: 452 -Stores 255 1 -Total rewards: 455 -Total Receives: 453 -Stores 256 1 -Total Receives: 454 -Stores 257 1 -Total Receives: 455 -Stores 258 1 -Total rewards: 461 -Total Receives: 456 -Stores 259 1 -Total Receives: 457 -Stores 260 1 -Total Receives: 458 -Stores 261 1 -Total Receives: 459 -Stores 262 1 -Total Receives: 460 -Stores 263 1 -Total Receives: 461 -Stores 264 1 -Total rewards: 465 -Total Receives: 462 -Stores 265 1 -Total Receives: 463 -Stores 266 1 -Total Receives: 464 -Stores 267 1 -Total Receives: 465 -Stores 268 1 -Total rewards: 469 -Total Receives: 466 -Stores 269 1 -Total Receives: 467 -Stores 270 1 -Total Receives: 468 -Stores 271 1 -Total Receives: 469 -Stores 272 1 -Total rewards: 472 -Total Receives: 470 -Stores 273 1 -Total Receives: 471 -Stores 274 1 -Total Receives: 472 -Stores 275 1 -Total rewards: 478 -Total Receives: 473 -Stores 276 1 -Total Receives: 474 -Stores 277 1 -Total Receives: 475 -Stores 278 1 -Total Receives: 476 -Stores 279 1 -Total Receives: 477 -Stores 280 1 -Total Receives: 478 -Stores 281 1 -Total rewards: 482 -Total Receives: 479 -Stores 282 1 -Total Receives: 480 -Stores 283 1 -Total Receives: 481 -Stores 284 1 -Total Receives: 482 -Stores 285 1 -Total rewards: 485 -Total Receives: 483 -Stores 286 1 -Total Receives: 484 -Stores 287 1 -Total Receives: 485 -Stores 288 1 -Total rewards: 490 -Total Receives: 486 -Stores 289 1 -Total Receives: 487 -Stores 290 1 -Total Receives: 488 -Stores 291 1 -Total Receives: 489 -Stores 292 1 -Total Receives: 490 -Stores 293 1 -Total rewards: 492 -Total Receives: 491 -Stores 294 1 -Total Receives: 492 -Stores 295 1 -Total rewards: 496 -Total Receives: 493 -Stores 296 1 -Total Receives: 494 -Stores 297 1 -Total Receives: 495 -Stores 298 1 -Total Receives: 496 -Stores 299 1 -Total rewards: 500 -Total Receives: 497 -Stores 300 1 -Total Receives: 498 -Stores 301 1 -Total Receives: 499 -Stores 302 1 -Total Receives: 500 -Stores 303 1 -Total rewards: 502 -Total Receives: 501 -Stores 304 1 -Total Receives: 502 -Stores 305 1 -Total rewards: 507 -Total Receives: 503 -Stores 306 1 -Total Receives: 504 -Stores 307 1 -Total Receives: 505 -Stores 308 1 -Total Receives: 506 -Stores 309 1 -Total Receives: 507 -Stores 310 1 -Stores 91 1 -Total Receives: 286 -Stores 92 1 -Total Receives: 287 -Stores 93 1 -Total Receives: 288 -Stores 94 1 -Total Receives: 289 -Stores 95 1 -Total Receives: 290 -Stores 96 1 -Train step: 1.52587890625e-05 Optimizer Step: 0 -Total rewards: 293 -Total Receives: 291 -Stores 97 1 -Total Receives: 292 -Stores 98 1 -Total Receives: 293 -Stores 99 1 -Total rewards: 299 -Total Receives: 294 -Stores 100 1 -Total Receives: 295 -Stores 101 1 -Total Receives: 296 -Stores 102 1 -Total Receives: 297 -Stores 103 1 -Total Receives: 298 -Stores 104 1 -Total Receives: 299 -Stores 105 1 -Total rewards: 303 -Total Receives: 300 -Stores 106 1 -Total Receives: 301 -Stores 107 1 -Total Receives: 302 -Stores 108 1 -Total Receives: 303 -Stores 109 1 -Total rewards: 306 -Total Receives: 304 -Stores 110 1 -Total Receives: 305 -Stores 111 1 -Total Receives: 306 -Stores 112 1 -Total rewards: 310 -Total Receives: 307 -Stores 113 1 -Total Receives: 308 -Stores 114 1 -Total Receives: 309 -Stores 115 1 -Total Receives: 310 -Stores 116 1 -Total rewards: 311 -Total Receives: 311 -Stores 117 1 -Total rewards: 317 -Total Receives: 312 -Stores 118 1 -Total Receives: 313 -Stores 119 1 -Total Receives: 314 -Stores 120 1 -Total Receives: 315 -Stores 121 1 -Total Receives: 316 -Stores 122 1 -Total Receives: 317 -Stores 123 1 -Total rewards: 321 -Total Receives: 318 -Stores 124 1 -Total Receives: 319 -Stores 125 1 -Total Receives: 320 -Stores 126 1 -Total Receives: 321 -Stores 127 1 -Total rewards: 325 -Total Receives: 322 -Stores 128 1 -loss: 0.08180533349514008, td_error: 0.08180533349514008, entropy: 4.851987838745117 -Train step: 0.4115631580352783 Optimizer Step: 1 -Total Receives: 323 -Stores 129 1 -Total Receives: 324 -Stores 130 1 -Total Receives: 325 -Stores 131 1 -Total rewards: 329 -Total Receives: 326 -Stores 132 1 -Total Receives: 327 -Stores 133 1 -Total Receives: 328 -Stores 134 1 -Total Receives: 329 -Stores 135 1 -Total rewards: 334 -Total Receives: 330 -Stores 136 1 -Total Receives: 331 -Stores 137 1 -Total Receives: 332 -Stores 138 1 -Total Receives: 333 -Stores 139 1 -Total Receives: 334 -Stores 140 1 -Total rewards: 342 -Total Receives: 335 -Stores 141 1 -Total Receives: 336 -Stores 142 1 -Total Receives: 337 -Stores 143 1 -Total Receives: 338 -Stores 144 1 -Total Receives: 339 -Stores 145 1 -Total Receives: 340 -Stores 146 1 -Total Receives: 341 -Stores 147 1 -Total Receives: 342 -Stores 148 1 -Total rewards: 347 -Total Receives: 343 -Stores 149 1 -Total Receives: 344 -Stores 150 1 -Total Receives: 345 -Stores 151 1 -Total Receives: 346 -Stores 152 1 -Total Receives: 347 -Stores 153 1 -Total rewards: 352 -Total Receives: 348 -Stores 154 1 -Total Receives: 349 -Stores 155 1 -Total Receives: 350 -Stores 156 1 -Total Receives: 351 -Stores 157 1 -Total Receives: 352 -Stores 158 1 -Total rewards: 356 -Total Receives: 353 -Stores 159 1 -Total Receives: 354 -Stores 160 1 -loss: 136.770751953125, td_error: 79416.828125, entropy: 0.2319006770849228 -Train step: 0.35945701599121094 Optimizer Step: 2 -Total Receives: 355 -Stores 161 1 -Total Receives: 356 -Stores 162 1 -Total rewards: 362 -Total Receives: 357 -Stores 163 1 -Total Receives: 358 -Stores 164 1 -Total Receives: 359 -Stores 165 1 -Total Receives: 360 -Stores 166 1 -Total Receives: 361 -Stores 167 1 -Total Receives: 362 -Stores 168 1 -Total rewards: 367 -Total Receives: 363 -Stores 169 1 -Total Receives: 364 -Stores 170 1 -Total Receives: 365 -Stores 171 1 -Total Receives: 366 -Stores 172 1 -Total Receives: 367 -Stores 173 1 -Total rewards: 371 -Total Receives: 368 -Stores 174 1 -Total Receives: 369 -Stores 175 1 -Total Receives: 370 -Stores 176 1 -Total Receives: 371 -Stores 177 1 -Total rewards: 373 -Total Receives: 372 -Stores 178 1 -Total Receives: 373 -Stores 179 1 -Total rewards: 379 -Total Receives: 374 -Stores 180 1 -Total Receives: 375 -Stores 181 1 -Total Receives: 376 -Stores 182 1 -Total Receives: 377 -Stores 183 1 -Total Receives: 378 -Stores 184 1 -Total Receives: 379 -Stores 185 1 -Total rewards: 382 -Total Receives: 380 -Stores 186 1 -Total Receives: 381 -Stores 187 1 -Total Receives: 382 -Stores 188 1 -Total rewards: 388 -Total Receives: 383 -Stores 189 1 -Total Receives: 384 -Stores 190 1 -Total Receives: 385 -Stores 191 1 -Total Receives: 386 -Stores 192 1 -loss: 0.001643691211938858, td_error: 119.52545166015625, entropy: 3.2033674716949463 -Train step: 0.4214940071105957 Optimizer Step: 3 -Total Receives: 387 -Stores 193 1 -Total Receives: 388 -Stores 194 1 -Total rewards: 394 -Total Receives: 389 -Stores 195 1 -Total Receives: 390 -Stores 196 1 -Total Receives: 391 -Stores 197 1 -Total Receives: 392 -Stores 198 1 -Total Receives: 393 -Total Receives: 394 -Total rewards: 396 -Total Receives: 395 -Stores 199 1 -Total Receives: 396 -Stores 200 1 -Total rewards: 402 -Total Receives: 397 -Stores 201 1 -Total Receives: 398 -Stores 202 1 -Total Receives: 399 -Stores 203 1 -Total Receives: 400 -Stores 204 1 -Total Receives: 401 -Stores 205 1 -Total Receives: 402 -Stores 206 1 -Total rewards: 406 -Total Receives: 403 -Stores 207 1 -Total Receives: 404 -Stores 208 1 -Total Receives: 405 -Stores 209 1 -Total Receives: 406 -Stores 210 1 -Total rewards: 407 -Total Receives: 407 -Total rewards: 411 -Total Receives: 408 -Stores 211 1 -Total Receives: 409 -Stores 212 1 -Total Receives: 410 -Stores 213 1 -Total Receives: 411 -Stores 214 1 -Total rewards: 416 -Total Receives: 412 -Stores 215 1 -Total Receives: 413 -Stores 216 1 -Total Receives: 414 -Stores 217 1 -Total Receives: 415 -Stores 218 1 -Total Receives: 416 -Stores 219 1 -Total rewards: 422 -Total Receives: 417 -Stores 220 1 -Total Receives: 418 -Stores 221 1 -Total Receives: 419 -Stores 222 1 -Total Receives: 420 -Stores 223 1 -Total Receives: 421 -Stores 224 1 -loss: 3.324386852909811e-05, td_error: 2.670900583267212, entropy: 4.076871871948242 -Train step: 0.3803138732910156 Optimizer Step: 4 -Total Receives: 422 -Stores 225 1 -Total rewards: 427 -Total Receives: 423 -Stores 226 1 -Total Receives: 424 -Stores 227 1 -Total Receives: 425 -Stores 228 1 -Total Receives: 426 -Stores 229 1 -Total Receives: 427 -Stores 230 1 -Total rewards: 430 -Total Receives: 428 -Stores 231 1 -Total Receives: 429 -Stores 232 1 -Total Receives: 430 -Stores 233 1 -Total rewards: 434 -Total Receives: 431 -Stores 234 1 -Total Receives: 432 -Stores 235 1 -Total Receives: 433 -Stores 236 1 -Total Receives: 434 -Stores 237 1 -Total rewards: 435 -Total Receives: 435 -Stores 238 1 -Total rewards: 437 -Total Receives: 436 -Stores 239 1 -Total Receives: 437 -Stores 240 1 -Total rewards: 440 -Total Receives: 438 -Stores 241 1 -Total Receives: 439 -Stores 242 1 -Total Receives: 440 -Stores 243 1 -Total rewards: 444 -Total Receives: 441 -Stores 244 1 -Total Receives: 442 -Stores 245 1 -Total Receives: 443 -Stores 246 1 -Total Receives: 444 -Stores 247 1 -Total rewards: 450 -Total Receives: 445 -Stores 248 1 -Total Receives: 446 -Stores 249 1 -Total Receives: 447 -Stores 250 1 -Total Receives: 448 -Stores 251 1 -Total Receives: 449 -Stores 252 1 -Total Receives: 450 -Stores 253 1 -Total rewards: 452 -Total Receives: 451 -Stores 254 1 -Total Receives: 452 -Stores 255 1 -Total rewards: 455 -Total Receives: 453 -Stores 256 1 -loss: 4.9176258471561596e-05, td_error: 3.7355363368988037, entropy: 4.445135593414307 -Train step: 0.4452688694000244 Optimizer Step: 5 -Total Receives: 454 -Stores 257 1 -Total Receives: 455 -Stores 258 1 -Total rewards: 461 -Total Receives: 456 -Stores 259 1 -Total Receives: 457 -Stores 260 1 -Total Receives: 458 -Stores 261 1 -Total Receives: 459 -Stores 262 1 -Total Receives: 460 -Stores 263 1 -Total Receives: 461 -Stores 264 1 -Total rewards: 465 -Total Receives: 462 -Stores 265 1 -Total Receives: 463 -Stores 266 1 -Total Receives: 464 -Stores 267 1 -Total Receives: 465 -Stores 268 1 -Total rewards: 469 -Total Receives: 466 -Stores 269 1 -Total Receives: 467 -Stores 270 1 -Total Receives: 468 -Stores 271 1 -Total Receives: 469 -Stores 272 1 -Total rewards: 472 -Total Receives: 470 -Stores 273 1 -Total Receives: 471 -Stores 274 1 -Total Receives: 472 -Stores 275 1 -Total rewards: 478 -Total Receives: 473 -Stores 276 1 -Total Receives: 474 -Stores 277 1 -Total Receives: 475 -Stores 278 1 -Total Receives: 476 -Stores 279 1 -Total Receives: 477 -Stores 280 1 -Total Receives: 478 -Stores 281 1 -Total rewards: 482 -Total Receives: 479 -Stores 282 1 -Total Receives: 480 -Stores 283 1 -Total Receives: 481 -Stores 284 1 -Total Receives: 482 -Stores 285 1 -Total rewards: 485 -Total Receives: 483 -Stores 286 1 -Total Receives: 484 -Stores 287 1 -Total Receives: 485 -Stores 288 1 -Total rewards: 513 -Total Receives: 508 -Stores 311 1 -Total Receives: 509 -Stores 312 1 -Total Receives: 510 -Stores 313 1 -Total Receives: 511 -Stores 314 1 -Total Receives: 512 -Stores 315 1 -Total Receives: 513 -Stores 316 1 -Total rewards: 516 -Total Receives: 514 -Stores 317 1 -Total Receives: 515 -Stores 318 1 -Total Receives: 516 -Stores 319 1 -Total rewards: 519 -Total Receives: 517 -Stores 320 1 -Total Receives: 518 -Stores 321 1 -Total Receives: 519 -Stores 322 1 -Total rewards: 523 -Total Receives: 520 -Stores 323 1 -Total Receives: 521 -Stores 324 1 -Total Receives: 522 -Stores 325 1 -Total Receives: 523 -Stores 326 1 -Total rewards: 526 -Total Receives: 524 -Stores 327 1 -Total Receives: 525 -Stores 328 1 -Total Receives: 526 -Stores 329 1 -Total rewards: 530 -Total Receives: 527 -Stores 330 1 -Total Receives: 528 -Stores 331 1 -Total Receives: 529 -Stores 332 1 -Total Receives: 530 -Stores 333 1 -Total rewards: 533 -Total Receives: 531 -Stores 334 1 -Total Receives: 532 -Stores 335 1 -Total Receives: 533 -Stores 336 1 -Total rewards: 538 -Total Receives: 534 -Stores 337 1 -Total Receives: 535 -Stores 338 1 -Total Receives: 536 -Stores 339 1 -Total Receives: 537 -Stores 340 1 -Total Receives: 538 -Stores 341 1 -Total rewards: 541 -Total Receives: 539 -Stores 342 1 -Total Receives: 540 -Stores 343 1 -Total Receives: 541 -Stores 344 1 -Total rewards: 545 -Total Receives: 542 -Stores 345 1 -Total Receives: 543 -Stores 346 1 -Total Receives: 544 -Stores 347 1 -Total Receives: 545 -Stores 348 1 -Total rewards: 549 -Total Receives: 546 -Stores 349 1 -Total Receives: 547 -Stores 350 1 -Total Receives: 548 -Stores 351 1 -Total Receives: 549 -Stores 352 1 -Total rewards: 552 -Total Receives: 550 -Stores 353 1 -Total Receives: 551 -Stores 354 1 -Total Receives: 552 -Stores 355 1 -Total rewards: 557 -Total Receives: 553 -Stores 356 1 -Total Receives: 554 -Stores 357 1 -Total Receives: 555 -Stores 358 1 -Total Receives: 556 -Stores 359 1 -Total Receives: 557 -Stores 360 1 -Total rewards: 564 -Total Receives: 558 -Stores 361 1 -Total Receives: 559 -Stores 362 1 -Total Receives: 560 -Stores 363 1 -Total Receives: 561 -Stores 364 1 -Total Receives: 562 -Stores 365 1 -Total Receives: 563 -Stores 366 1 -Total Receives: 564 -Stores 367 1 -Total rewards: 568 -Total Receives: 565 -Stores 368 1 -Total Receives: 566 -Stores 369 1 -Total Receives: 567 -Stores 370 1 -Total Receives: 568 -Stores 371 1 -Total rewards: 572 -Total Receives: 569 -Stores 372 1 -Total Receives: 570 -Stores 373 1 -Total Receives: 571 -Stores 374 1 -Total Receives: 572 -Stores 375 1 -Total rewards: 577 -Total Receives: 573 -Stores 376 1 -Total Receives: 574 -Stores 377 1 -Total Receives: 575 -Stores 378 1 -Total Receives: 576 -Stores 379 1 -Total Receives: 577 -Stores 380 1 -Total rewards: 582 -Total Receives: 578 -Stores 381 1 -Total Receives: 579 -Stores 382 1 -Total Receives: 580 -Stores 383 1 -Total Receives: 581 -Stores 384 1 -Total Receives: 582 -Stores 385 1 -Total rewards: 583 -Total Receives: 583 -Stores 386 1 -Total rewards: 590 -Total Receives: 584 -Stores 387 1 -Total Receives: 585 -Stores 388 1 -Total Receives: 586 -Stores 389 1 -Total Receives: 587 -Stores 390 1 -Total Receives: 588 -Stores 391 1 -Total Receives: 589 -Stores 392 1 -Total Receives: 590 -Stores 393 1 -Total rewards: 597 -Total Receives: 591 -Stores 394 1 -Total Receives: 592 -Stores 395 1 -Total Receives: 593 -Stores 396 1 -Total Receives: 594 -Stores 397 1 -Total Receives: 595 -Stores 398 1 -Total Receives: 596 -Stores 399 1 -Total Receives: 597 -Stores 400 1 -Total rewards: 602 -Total Receives: 598 -Stores 401 1 -Total Receives: 599 -Stores 402 1 -Total Receives: 600 -Stores 403 1 -Total Receives: 601 -Stores 404 1 -Total Receives: 602 -Stores 405 1 -Total rewards: 604 -Total Receives: 603 -Stores 406 1 -Total Receives: 604 -Stores 407 1 -Total rewards: 612 -Total Receives: 605 -Stores 408 1 -Total Receives: 606 -Stores 409 1 -Total Receives: 607 -Stores 410 1 -Total Receives: 608 -Stores 411 1 -Total Receives: 609 -Stores 412 1 -Total Receives: 610 -Stores 413 1 -Total Receives: 611 -Stores 414 1 -Total Receives: 612 -Stores 415 1 -Total rewards: 620 -Total Receives: 613 -Stores 416 1 -Total Receives: 614 -Stores 417 1 -Total Receives: 615 -Stores 418 1 -Total Receives: 616 -Stores 419 1 -Total Receives: 617 -Stores 420 1 -Total Receives: 618 -Stores 421 1 -Total Receives: 619 -Stores 422 1 -Total Receives: 620 -Stores 423 1 -Total rewards: 626 -Total Receives: 621 -Stores 424 1 -Total Receives: 622 -Stores 425 1 -Total Receives: 623 -Stores 426 1 -Total Receives: 624 -Stores 427 1 -Total Receives: 625 -Total Receives: 626 -Total rewards: 630 -Total Receives: 627 -Total Receives: 628 -Total Receives: 629 -Total Receives: 630 -Total rewards: 636 -Total Receives: 631 -Stores 428 1 -Total Receives: 632 -Stores 429 1 -Total Receives: 633 -Stores 430 1 -Total Receives: 634 -Stores 431 1 -Total Receives: 635 -Stores 432 1 -Total Receives: 636 -Stores 433 1 -Total rewards: 640 -Total Receives: 637 -Stores 434 1 -Total Receives: 638 -Stores 435 1 -Total Receives: 639 -Stores 436 1 -Total Receives: 640 -Stores 437 1 -Total rewards: 645 -Total Receives: 641 -Stores 438 1 -Total Receives: 642 -Stores 439 1 -Total Receives: 643 -Stores 440 1 -Total Receives: 644 -Stores 441 1 -Total Receives: 645 -Stores 442 1 -Total rewards: 651 -Total Receives: 646 -Stores 443 1 -Total Receives: 647 -Stores 444 1 -Total Receives: 648 -Stores 445 1 -Total Receives: 649 -Stores 446 1 -Total Receives: 650 -Stores 447 1 -Total Receives: 651 -Stores 448 1 -Total rewards: 656 -Total Receives: 652 -Stores 449 1 -Total Receives: 653 -Stores 450 1 -Total Receives: 654 -Stores 451 1 -Total Receives: 655 -Stores 452 1 -Total Receives: 656 -Stores 453 1 -Total rewards: 659 -Total Receives: 657 -Stores 454 1 -Total Receives: 658 -Stores 455 1 -Total Receives: 659 -Stores 456 1 -Total rewards: 662 -Total Receives: 660 -Stores 457 1 -Total Receives: 661 -Stores 458 1 -Total Receives: 662 -Stores 459 1 -Total rewards: 667 -Total Receives: 663 -Stores 460 1 -Total Receives: 664 -Stores 461 1 -Total Receives: 665 -Stores 462 1 -Total Receives: 666 -Stores 463 1 -Total Receives: 667 -Stores 464 1 -Total rewards: 672 -Total Receives: 668 -Stores 465 1 -Total Receives: 669 -Stores 466 1 -Total Receives: 670 -Stores 467 1 -Total Receives: 671 -Stores 468 1 -Total Receives: 672 -Stores 469 1 -Total rewards: 678 -Total Receives: 673 -Stores 470 1 -Total Receives: 674 -Stores 471 1 -Total Receives: 675 -Stores 472 1 -Total Receives: 676 -Stores 473 1 -Total Receives: 677 -Stores 474 1 -Total Receives: 678 -Stores 475 1 -Total rewards: 685 -Total Receives: 679 -Stores 476 1 -Total Receives: 680 -Total Receives: 681 -Total Receives: 682 -Total Receives: 683 -Total Receives: 684 -Total Receives: 685 -Total rewards: 690 -Total Receives: 686 -Stores 477 1 -Total Receives: 687 -Stores 478 1 -Total Receives: 688 -Stores 479 1 -Total Receives: 689 -Stores 480 1 -Total Receives: 690 -Stores 481 1 -Total rewards: 695 -Total Receives: 691 -Stores 482 1 -Total Receives: 692 -Stores 483 1 -Total Receives: 693 -Stores 484 1 -Total Receives: 694 -Stores 485 1 -Total Receives: 695 -Stores 486 1 -Total rewards: 701 -Total Receives: 696 -Stores 487 1 -Total Receives: 697 -Stores 488 1 -Total Receives: 698 -Stores 489 1 -Total Receives: 699 -Stores 490 1 -Total Receives: 700 -Stores 491 1 -Total Receives: 701 -Stores 492 1 -Total rewards: 704 -Total Receives: 702 -Stores 493 1 -Total Receives: 703 -Stores 494 1 -Total Receives: 704 -Stores 495 1 -Total rewards: 708 -Total Receives: 705 -Stores 496 1 -Total Receives: 706 -Stores 497 1 -Total Receives: 707 -Stores 498 1 -Total Receives: 708 -Stores 499 1 -Total rewards: 712 -Total Receives: 709 -Stores 500 1 -Total Receives: 710 -Stores 501 1 -Total Receives: 711 -Stores 502 1 -Total Receives: 712 -Stores 503 1 -Total rewards: 718 -Total Receives: 713 -Stores 504 1 -Total Receives: 714 -Stores 505 1 -Total Receives: 715 -Stores 506 1 -Total Receives: 716 -Stores 507 1 -Total Receives: 717 -Stores 508 1 -Total Receives: 718 -Stores 509 1 -Total rewards: 722 -Total Receives: 719 -Stores 510 1 -Total Receives: 720 -Stores 511 1 -Total Receives: 721 -Stores 512 1 -Total Receives: 722 -Stores 513 1 -Total rewards: 726 -Total Receives: 723 -Stores 514 1 -Total Receives: 724 -Stores 515 1 -Total Receives: 725 -Stores 516 1 -Total Receives: 726 -Stores 517 1 -Total rewards: 729 -Total Receives: 727 -Stores 518 1 -Total Receives: 728 -Stores 519 1 -Total Receives: 729 -Stores 520 1 -Total rewards: 734 -Total Receives: 730 -Stores 521 1 -Total Receives: 731 -Stores 522 1 -Total Receives: 732 -loss: 0.0010263718431815505, td_error: 114.29786682128906, entropy: 3.855865240097046 -Train step: 0.4455597400665283 Optimizer Step: 6 -Total rewards: 490 -Total Receives: 486 -Stores 289 1 -Total Receives: 487 -Stores 290 1 -Total Receives: 488 -Stores 291 1 -Total Receives: 489 -Stores 292 1 -Total Receives: 490 -Stores 293 1 -Total rewards: 492 -Total Receives: 491 -Stores 294 1 -Total Receives: 492 -Stores 295 1 -Total rewards: 496 -Total Receives: 493 -Stores 296 1 -Total Receives: 494 -Stores 297 1 -Total Receives: 495 -Stores 298 1 -Total Receives: 496 -Stores 299 1 -Total rewards: 500 -Total Receives: 497 -Stores 300 1 -Total Receives: 498 -Stores 301 1 -Total Receives: 499 -Stores 302 1 -Total Receives: 500 -Stores 303 1 -Total rewards: 502 -Total Receives: 501 -Stores 304 1 -Total Receives: 502 -Stores 305 1 -Total rewards: 507 -Total Receives: 503 -Stores 306 1 -Total Receives: 504 -Stores 307 1 -Total Receives: 505 -Stores 308 1 -Total Receives: 506 -Stores 309 1 -Total Receives: 507 -Stores 310 1 -Total rewards: 513 -Total Receives: 508 -Stores 311 1 -Total Receives: 509 -Stores 312 1 -Total Receives: 510 -Stores 313 1 -Total Receives: 511 -Stores 314 1 -Total Receives: 512 -Stores 315 1 -Total Receives: 513 -Stores 316 1 -Total rewards: 516 -Total Receives: 514 -Stores 317 1 -Total Receives: 515 -Stores 318 1 -Total Receives: 516 -Stores 319 1 -Total rewards: 519 -Total Receives: 517 -Stores 320 1 -loss: 4.641587565856753e-06, td_error: 0.5135261416435242, entropy: 4.64009428024292 -Train step: 0.3771052360534668 Optimizer Step: 7 -Total Receives: 518 -Stores 321 1 -Total Receives: 519 -Stores 322 1 -Total rewards: 523 -Total Receives: 520 -Stores 323 1 -Total Receives: 521 -Stores 324 1 -Total Receives: 522 -Stores 325 1 -Total Receives: 523 -Stores 326 1 -Total rewards: 526 -Total Receives: 524 -Stores 327 1 -Total Receives: 525 -Stores 328 1 -Total Receives: 526 -Stores 329 1 -Total rewards: 530 -Total Receives: 527 -Stores 330 1 -Total Receives: 528 -Stores 331 1 -Total Receives: 529 -Stores 332 1 -Total Receives: 530 -Stores 333 1 -Total rewards: 533 -Total Receives: 531 -Stores 334 1 -Total Receives: 532 -Stores 335 1 -Total Receives: 533 -Stores 336 1 -Total rewards: 538 -Total Receives: 534 -Stores 337 1 -Total Receives: 535 -Stores 338 1 -Total Receives: 536 -Stores 339 1 -Total Receives: 537 -Stores 340 1 -Total Receives: 538 -Stores 341 1 -Total rewards: 541 -Total Receives: 539 -Stores 342 1 -Total Receives: 540 -Stores 343 1 -Total Receives: 541 -Stores 344 1 -Total rewards: 545 -Total Receives: 542 -Stores 345 1 -Total Receives: 543 -Stores 346 1 -Total Receives: 544 -Stores 347 1 -Total Receives: 545 -Stores 348 1 -Total rewards: 549 -Total Receives: 546 -Stores 349 1 -Total Receives: 547 -Stores 350 1 -Total Receives: 548 -Stores 351 1 -Total Receives: 549 -Stores 352 1 -loss: 2.1955518604954705e-05, td_error: 0.816707968711853, entropy: 4.7405571937561035 -Train step: 0.4521017074584961 Optimizer Step: 8 -Total rewards: 552 -Total Receives: 550 -Stores 353 1 -Total Receives: 551 -Stores 354 1 -Total Receives: 552 -Stores 355 1 -Total rewards: 557 -Total Receives: 553 -Stores 356 1 -Total Receives: 554 -Stores 357 1 -Total Receives: 555 -Stores 358 1 -Total Receives: 556 -Stores 359 1 -Total Receives: 557 -Stores 360 1 -Total rewards: 564 -Total Receives: 558 -Stores 361 1 -Total Receives: 559 -Stores 362 1 -Total Receives: 560 -Stores 363 1 -Total Receives: 561 -Stores 364 1 -Total Receives: 562 -Stores 365 1 -Total Receives: 563 -Stores 366 1 -Total Receives: 564 -Stores 367 1 -Total rewards: 568 -Total Receives: 565 -Stores 368 1 -Total Receives: 566 -Stores 369 1 -Total Receives: 567 -Stores 370 1 -Total Receives: 568 -Stores 371 1 -Total rewards: 572 -Total Receives: 569 -Stores 372 1 -Total Receives: 570 -Stores 373 1 -Total Receives: 571 -Stores 374 1 -Total Receives: 572 -Stores 375 1 -Total rewards: 577 -Total Receives: 573 -Stores 376 1 -Total Receives: 574 -Stores 377 1 -Total Receives: 575 -Stores 378 1 -Total Receives: 576 -Stores 379 1 -Total Receives: 577 -Stores 380 1 -Total rewards: 582 -Total Receives: 578 -Stores 381 1 -Total Receives: 579 -Stores 382 1 -Total Receives: 580 -Stores 383 1 -Total Receives: 581 -Stores 384 1 -loss: 1.2654808415391017e-05, td_error: 1.402619481086731, entropy: 4.675647258758545 -Train step: 0.507270097732544 Optimizer Step: 9 -Total Receives: 582 -Stores 385 1 -Total rewards: 583 -Total Receives: 583 -Stores 386 1 -Total rewards: 590 -Total Receives: 584 -Stores 387 1 -Total Receives: 585 -Stores 388 1 -Total Receives: 586 -Stores 389 1 -Total Receives: 587 -Stores 390 1 -Total Receives: 588 -Stores 391 1 -Total Receives: 589 -Stores 392 1 -Total Receives: 590 -Stores 393 1 -Total rewards: 597 -Total Receives: 591 -Stores 394 1 -Total Receives: 592 -Stores 395 1 -Total Receives: 593 -Stores 396 1 -Total Receives: 594 -Stores 397 1 -Total Receives: 595 -Stores 398 1 -Total Receives: 596 -Stores 399 1 -Total Receives: 597 -Stores 400 1 -Total rewards: 602 -Total Receives: 598 -Stores 401 1 -Total Receives: 599 -Stores 402 1 -Total Receives: 600 -Stores 403 1 -Total Receives: 601 -Stores 404 1 -Total Receives: 602 -Stores 405 1 -Total rewards: 604 -Total Receives: 603 -Stores 406 1 -Total Receives: 604 -Stores 407 1 -Total rewards: 612 -Total Receives: 605 -Stores 408 1 -Total Receives: 606 -Stores 409 1 -Total Receives: 607 -Stores 410 1 -Total Receives: 608 -Stores 411 1 -Total Receives: 609 -Stores 412 1 -Total Receives: 610 -Stores 413 1 -Total Receives: 611 -Stores 414 1 -Total Receives: 612 -Stores 415 1 -Total rewards: 620 -Total Receives: 613 -Stores 416 1 -loss: 8.288554454338737e-06, td_error: 0.9267004728317261, entropy: 4.712723731994629 -Train step: 0.40714073181152344 Optimizer Step: 10 -Total Receives: 614 -Stores 417 1 -Total Receives: 615 -Stores 418 1 -Total Receives: 616 -Stores 419 1 -Total Receives: 617 -Stores 420 1 -Total Receives: 618 -Stores 421 1 -Total Receives: 619 -Stores 422 1 -Total Receives: 620 -Stores 423 1 -Total rewards: 626 -Total Receives: 621 -Stores 424 1 -Total Receives: 622 -Stores 425 1 -Total Receives: 623 -Stores 426 1 -Total Receives: 624 -Stores 427 1 -Total Receives: 625 -Total Receives: 626 -Total rewards: 630 -Total Receives: 627 -Total Receives: 628 -Total Receives: 629 -Total Receives: 630 -Total rewards: 636 -Total Receives: 631 -Stores 428 1 -Total Receives: 632 -Stores 429 1 -Total Receives: 633 -Stores 430 1 -Total Receives: 634 -Stores 431 1 -Total Receives: 635 -Stores 432 1 -Total Receives: 636 -Stores 433 1 -Total rewards: 640 -Total Receives: 637 -Stores 434 1 -Total Receives: 638 -Stores 435 1 -Total Receives: 639 -Stores 436 1 -Total Receives: 640 -Stores 437 1 -Total rewards: 645 -Total Receives: 641 -Stores 438 1 -Total Receives: 642 -Stores 439 1 -Total Receives: 643 -Stores 440 1 -Total Receives: 644 -Stores 441 1 -Total Receives: 645 -Stores 442 1 -Total rewards: 651 -Total Receives: 646 -Stores 443 1 -Total Receives: 647 -Stores 444 1 -Total Receives: 648 -Stores 445 1 -Total Receives: 649 -Stores 446 1 -Total Receives: 650 -Stores 447 1 -Total Receives: 651 -Stores 448 1 -loss: 2.513945673854323e-06, td_error: 0.28107160329818726, entropy: 4.75938081741333 -Train step: 0.3399770259857178 Optimizer Step: 11 -Total rewards: 656 -Total Receives: 652 -Stores 449 1 -Total Receives: 653 -Stores 450 1 -Total Receives: 654 -Stores 451 1 -Total Receives: 655 -Stores 452 1 -Total Receives: 656 -Stores 453 1 -Total rewards: 659 -Total Receives: 657 -Stores 454 1 -Total Receives: 658 -Stores 455 1 -Total Receives: 659 -Stores 456 1 -Total rewards: 662 -Total Receives: 660 -Stores 457 1 -Total Receives: 661 -Stores 458 1 -Total Receives: 662 -Stores 459 1 -Total rewards: 667 -Total Receives: 663 -Stores 460 1 -Total Receives: 664 -Stores 461 1 -Total Receives: 665 -Stores 462 1 -Total Receives: 666 -Stores 463 1 -Total Receives: 667 -Stores 464 1 -Total rewards: 672 -Total Receives: 668 -Stores 465 1 -Total Receives: 669 -Stores 466 1 -Total Receives: 670 -Stores 467 1 -Total Receives: 671 -Stores 468 1 -Total Receives: 672 -Stores 469 1 -Total rewards: 678 -Total Receives: 673 -Stores 470 1 -Total Receives: 674 -Stores 471 1 -Total Receives: 675 -Stores 472 1 -Total Receives: 676 -Stores 473 1 -Total Receives: 677 -Stores 474 1 -Total Receives: 678 -Stores 475 1 -Total rewards: 685 -Total Receives: 679 -Stores 476 1 -Total Receives: 680 -Total Receives: 681 -Total Receives: 682 -Total Receives: 683 -Total Receives: 684 -Total Receives: 685 -Total rewards: 690 -Total Receives: 686 -Stores 477 1 -Total Receives: 687 -Stores 478 1 -loss: 0.032700784504413605, td_error: 8066.96044921875, entropy: 0.767361044883728 -Train step: 0.41517186164855957 Optimizer Step: 6 -Total rewards: 490 -Total Receives: 486 -Stores 289 1 -Total Receives: 487 -Stores 290 1 -Total Receives: 488 -Stores 291 1 -Total Receives: 489 -Stores 292 1 -Total Receives: 490 -Stores 293 1 -Total rewards: 492 -Total Receives: 491 -Stores 294 1 -Total Receives: 492 -Stores 295 1 -Total rewards: 496 -Total Receives: 493 -Stores 296 1 -Total Receives: 494 -Stores 297 1 -Total Receives: 495 -Stores 298 1 -Total Receives: 496 -Stores 299 1 -Total rewards: 500 -Total Receives: 497 -Stores 300 1 -Total Receives: 498 -Stores 301 1 -Total Receives: 499 -Stores 302 1 -Total Receives: 500 -Stores 303 1 -Total rewards: 502 -Total Receives: 501 -Stores 304 1 -Total Receives: 502 -Stores 305 1 -Total rewards: 507 -Total Receives: 503 -Stores 306 1 -Total Receives: 504 -Stores 307 1 -Total Receives: 505 -Stores 308 1 -Total Receives: 506 -Stores 309 1 -Total Receives: 507 -Stores 310 1 -Total rewards: 513 -Total Receives: 508 -Stores 311 1 -Total Receives: 509 -Stores 312 1 -Total Receives: 510 -Stores 313 1 -Total Receives: 511 -Stores 314 1 -Total Receives: 512 -Stores 315 1 -Total Receives: 513 -Stores 316 1 -Total rewards: 516 -Total Receives: 514 -Stores 317 1 -Total Receives: 515 -Stores 318 1 -Total Receives: 516 -Stores 319 1 -Total rewards: 519 -Total Receives: 517 -Stores 320 1 -loss: 0.00011496063962113112, td_error: 37.860103607177734, entropy: 3.927485466003418 -Train step: 0.31688809394836426 Optimizer Step: 7 -Total Receives: 518 -Stores 321 1 -Total Receives: 519 -Stores 322 1 -Total rewards: 523 -Total Receives: 520 -Stores 323 1 -Total Receives: 521 -Stores 324 1 -Total Receives: 522 -Stores 325 1 -Total Receives: 523 -Stores 326 1 -Total rewards: 526 -Total Receives: 524 -Stores 327 1 -Total Receives: 525 -Stores 328 1 -Total Receives: 526 -Stores 329 1 -Total rewards: 530 -Total Receives: 527 -Stores 330 1 -Total Receives: 528 -Stores 331 1 -Total Receives: 529 -Stores 332 1 -Total Receives: 530 -Stores 333 1 -Total rewards: 533 -Total Receives: 531 -Stores 334 1 -Total Receives: 532 -Stores 335 1 -Total Receives: 533 -Stores 336 1 -Total rewards: 538 -Total Receives: 534 -Stores 337 1 -Total Receives: 535 -Stores 338 1 -Total Receives: 536 -Stores 339 1 -Total Receives: 537 -Stores 340 1 -Total Receives: 538 -Stores 341 1 -Total rewards: 541 -Total Receives: 539 -Stores 342 1 -Total Receives: 540 -Stores 343 1 -Total Receives: 541 -Stores 344 1 -Total rewards: 545 -Total Receives: 542 -Stores 345 1 -Total Receives: 543 -Stores 346 1 -Total Receives: 544 -Stores 347 1 -Total Receives: 545 -Stores 348 1 -Total rewards: 549 -Total Receives: 546 -Stores 349 1 -Total Receives: 547 -Stores 350 1 -Total Receives: 548 -Stores 351 1 -Total Receives: 549 -Stores 352 1 -loss: 0.00024404622672591358, td_error: 71.21437072753906, entropy: 2.0011048316955566 -Train step: 0.3852527141571045 Optimizer Step: 8 -Total rewards: 552 -Total Receives: 550 -Stores 353 1 -Total Receives: 551 -Stores 354 1 -Total Receives: 552 -Stores 355 1 -Total rewards: 557 -Total Receives: 553 -Stores 356 1 -Total Receives: 554 -Stores 357 1 -Total Receives: 555 -Stores 358 1 -Total Receives: 556 -Stores 359 1 -Total Receives: 557 -Stores 360 1 -Total rewards: 564 -Total Receives: 558 -Stores 361 1 -Total Receives: 559 -Stores 362 1 -Total Receives: 560 -Stores 363 1 -Total Receives: 561 -Stores 364 1 -Total Receives: 562 -Stores 365 1 -Total Receives: 563 -Stores 366 1 -Total Receives: 564 -Stores 367 1 -Total rewards: 568 -Total Receives: 565 -Stores 368 1 -Total Receives: 566 -Stores 369 1 -Total Receives: 567 -Stores 370 1 -Total Receives: 568 -Stores 371 1 -Total rewards: 572 -Total Receives: 569 -Stores 372 1 -Total Receives: 570 -Stores 373 1 -Total Receives: 571 -Stores 374 1 -Total Receives: 572 -Stores 375 1 -Total rewards: 577 -Total Receives: 573 -Stores 376 1 -Total Receives: 574 -Stores 377 1 -Total Receives: 575 -Stores 378 1 -Total Receives: 576 -Stores 379 1 -Total Receives: 577 -Stores 380 1 -Total rewards: 582 -Total Receives: 578 -Stores 381 1 -Total Receives: 579 -Stores 382 1 -Total Receives: 580 -Stores 383 1 -Total Receives: 581 -Stores 384 1 -loss: 5.529516056412831e-05, td_error: 9.249808311462402, entropy: 4.170965194702148 -Train step: 0.5455420017242432 Optimizer Step: 9 -Total Receives: 582 -Stores 385 1 -Total rewards: 583 -Total Receives: 583 -Stores 386 1 -Total rewards: 590 -Total Receives: 584 -Stores 387 1 -Total Receives: 585 -Stores 388 1 -Total Receives: 586 -Stores 389 1 -Total Receives: 587 -Stores 390 1 -Total Receives: 588 -Stores 391 1 -Total Receives: 589 -Stores 392 1 -Total Receives: 590 -Stores 393 1 -Total rewards: 597 -Total Receives: 591 -Stores 394 1 -Total Receives: 592 -Stores 395 1 -Total Receives: 593 -Stores 396 1 -Total Receives: 594 -Stores 397 1 -Total Receives: 595 -Stores 398 1 -Total Receives: 596 -Stores 399 1 -Total Receives: 597 -Stores 400 1 -Total rewards: 602 -Total Receives: 598 -Stores 401 1 -Total Receives: 599 -Stores 402 1 -Total Receives: 600 -Stores 403 1 -Total Receives: 601 -Stores 404 1 -Total Receives: 602 -Stores 405 1 -Total rewards: 604 -Total Receives: 603 -Stores 406 1 -Total Receives: 604 -Stores 407 1 -Total rewards: 612 -Total Receives: 605 -Stores 408 1 -Total Receives: 606 -Stores 409 1 -Total Receives: 607 -Stores 410 1 -Total Receives: 608 -Stores 411 1 -Total Receives: 609 -Stores 412 1 -Total Receives: 610 -Stores 413 1 -Total Receives: 611 -Stores 414 1 -Total Receives: 612 -Stores 415 1 -Total rewards: 620 -Total Receives: 613 -Stores 416 1 -loss: 3.88391490560025e-05, td_error: 7.587400436401367, entropy: 1.2369554042816162 -Train step: 0.3622410297393799 Optimizer Step: 10 -Total Receives: 614 -Stores 417 1 -Total Receives: 615 -Stores 418 1 -Total Receives: 616 -Stores 419 1 -Total Receives: 617 -Stores 420 1 -Total Receives: 618 -Stores 421 1 -Total Receives: 619 -Stores 422 1 -Total Receives: 620 -Stores 423 1 -Total rewards: 626 -Total Receives: 621 -Stores 424 1 -Total Receives: 622 -Stores 425 1 -Total Receives: 623 -Stores 426 1 -Total Receives: 624 -Stores 427 1 -Total Receives: 625 -Total Receives: 626 -Total rewards: 630 -Total Receives: 627 -Total Receives: 628 -Total Receives: 629 -Total Receives: 630 -Total rewards: 636 -Total Receives: 631 -Stores 428 1 -Total Receives: 632 -Stores 429 1 -Total Receives: 633 -Stores 430 1 -Total Receives: 634 -Stores 431 1 -Total Receives: 635 -Stores 432 1 -Total Receives: 636 -Stores 433 1 -Total rewards: 640 -Total Receives: 637 -Stores 434 1 -Total Receives: 638 -Stores 435 1 -Total Receives: 639 -Stores 436 1 -Total Receives: 640 -Stores 437 1 -Total rewards: 645 -Total Receives: 641 -Stores 438 1 -Total Receives: 642 -Stores 439 1 -Total Receives: 643 -Stores 440 1 -Total Receives: 644 -Stores 441 1 -Total Receives: 645 -Stores 442 1 -Total rewards: 651 -Total Receives: 646 -Stores 443 1 -Total Receives: 647 -Stores 444 1 -Total Receives: 648 -Stores 445 1 -Total Receives: 649 -Stores 446 1 -Total Receives: 650 -Stores 447 1 -Total Receives: 651 -Stores 448 1 -loss: 1.266639810637571e-05, td_error: 3.8569483757019043, entropy: 2.744009017944336 -Train step: 0.3705110549926758 Optimizer Step: 11 -Total rewards: 656 -Total Receives: 652 -Stores 449 1 -Total Receives: 653 -Stores 450 1 -Total Receives: 654 -Stores 451 1 -Total Receives: 655 -Stores 452 1 -Total Receives: 656 -Stores 453 1 -Total rewards: 659 -Total Receives: 657 -Stores 454 1 -Total Receives: 658 -Stores 455 1 -Total Receives: 659 -Stores 456 1 -Total rewards: 662 -Total Receives: 660 -Stores 457 1 -Total Receives: 661 -Stores 458 1 -Total Receives: 662 -Stores 459 1 -Total rewards: 667 -Total Receives: 663 -Stores 460 1 -Total Receives: 664 -Stores 461 1 -Total Receives: 665 -Stores 462 1 -Total Receives: 666 -Stores 463 1 -Total Receives: 667 -Stores 464 1 -Total rewards: 672 -Total Receives: 668 -Stores 465 1 -Total Receives: 669 -Stores 466 1 -Total Receives: 670 -Stores 467 1 -Total Receives: 671 -Stores 468 1 -Total Receives: 672 -Stores 469 1 -Total rewards: 678 -Total Receives: 673 -Stores 470 1 -Total Receives: 674 -Stores 471 1 -Total Receives: 675 -Stores 472 1 -Total Receives: 676 -Stores 473 1 -Total Receives: 677 -Stores 474 1 -Total Receives: 678 -Stores 475 1 -Total rewards: 685 -Total Receives: 679 -Stores 476 1 -Total Receives: 680 -Total Receives: 681 -Total Receives: 682 -Total Receives: 683 -Total Receives: 684 -Total Receives: 685 -Total rewards: 690 -Total Receives: 686 -Stores 477 1 -Total Receives: 687 -Stores 478 1 -loss: 1.5670266293454915e-05, td_error: 1.3160675764083862, entropy: 4.506892204284668 -Train step: 0.3734447956085205 Optimizer Step: 6 -Total rewards: 490 -Total Receives: 486 -Stores 289 1 -Total Receives: 487 -Stores 290 1 -Total Receives: 488 -Stores 291 1 -Total Receives: 489 -Stores 292 1 -Total Receives: 490 -Stores 293 1 -Total rewards: 492 -Total Receives: 491 -Stores 294 1 -Total Receives: 492 -Stores 295 1 -Total rewards: 496 -Total Receives: 493 -Stores 296 1 -Total Receives: 494 -Stores 297 1 -Total Receives: 495 -Stores 298 1 -Total Receives: 496 -Stores 299 1 -Total rewards: 500 -Total Receives: 497 -Stores 300 1 -Total Receives: 498 -Stores 301 1 -Total Receives: 499 -Stores 302 1 -Total Receives: 500 -Stores 303 1 -Total rewards: 502 -Total Receives: 501 -Stores 304 1 -Total Receives: 502 -Stores 305 1 -Total rewards: 507 -Total Receives: 503 -Stores 306 1 -Total Receives: 504 -Stores 307 1 -Total Receives: 505 -Stores 308 1 -Total Receives: 506 -Stores 309 1 -Total Receives: 507 -Stores 310 1 -Total rewards: 513 -Total Receives: 508 -Stores 311 1 -Total Receives: 509 -Stores 312 1 -Total Receives: 510 -Stores 313 1 -Total Receives: 511 -Stores 314 1 -Total Receives: 512 -Stores 315 1 -Total Receives: 513 -Stores 316 1 -Total rewards: 516 -Total Receives: 514 -Stores 317 1 -Total Receives: 515 -Stores 318 1 -Total Receives: 516 -Stores 319 1 -Total rewards: 519 -Total Receives: 517 -Stores 320 1 -loss: 5.150700417289045e-06, td_error: 0.4334803521633148, entropy: 4.808489799499512 -Train step: 0.3811321258544922 Optimizer Step: 7 -Total Receives: 518 -Stores 321 1 -Total Receives: 519 -Stores 322 1 -Total rewards: 523 -Total Receives: 520 -Stores 323 1 -Total Receives: 521 -Stores 324 1 -Total Receives: 522 -Stores 325 1 -Total Receives: 523 -Stores 326 1 -Total rewards: 526 -Total Receives: 524 -Stores 327 1 -Total Receives: 525 -Stores 328 1 -Total Receives: 526 -Stores 329 1 -Total rewards: 530 -Total Receives: 527 -Stores 330 1 -Total Receives: 528 -Stores 331 1 -Total Receives: 529 -Stores 332 1 -Total Receives: 530 -Stores 333 1 -Total rewards: 533 -Total Receives: 531 -Stores 334 1 -Total Receives: 532 -Stores 335 1 -Total Receives: 533 -Stores 336 1 -Total rewards: 538 -Total Receives: 534 -Stores 337 1 -Total Receives: 535 -Stores 338 1 -Total Receives: 536 -Stores 339 1 -Total Receives: 537 -Stores 340 1 -Total Receives: 538 -Stores 341 1 -Total rewards: 541 -Total Receives: 539 -Stores 342 1 -Total Receives: 540 -Stores 343 1 -Total Receives: 541 -Stores 344 1 -Total rewards: 545 -Total Receives: 542 -Stores 345 1 -Total Receives: 543 -Stores 346 1 -Total Receives: 544 -Stores 347 1 -Total Receives: 545 -Stores 348 1 -Total rewards: 549 -Total Receives: 546 -Stores 349 1 -Total Receives: 547 -Stores 350 1 -Total Receives: 548 -Stores 351 1 -Total Receives: 549 -Stores 352 1 -loss: 3.754782028408954e-06, td_error: 0.3097396790981293, entropy: 4.804421901702881 -Train step: 0.4921128749847412 Optimizer Step: 8 -Total rewards: 552 -Total Receives: 550 -Stores 353 1 -Total Receives: 551 -Stores 354 1 -Total Receives: 552 -Stores 355 1 -Total rewards: 557 -Total Receives: 553 -Stores 356 1 -Total Receives: 554 -Stores 357 1 -Total Receives: 555 -Stores 358 1 -Total Receives: 556 -Stores 359 1 -Total Receives: 557 -Stores 360 1 -Total rewards: 564 -Total Receives: 558 -Stores 361 1 -Total Receives: 559 -Stores 362 1 -Total Receives: 560 -Stores 363 1 -Total Receives: 561 -Stores 364 1 -Total Receives: 562 -Stores 365 1 -Total Receives: 563 -Stores 366 1 -Total Receives: 564 -Stores 367 1 -Total rewards: 568 -Total Receives: 565 -Stores 368 1 -Total Receives: 566 -Stores 369 1 -Total Receives: 567 -Stores 370 1 -Total Receives: 568 -Stores 371 1 -Total rewards: 572 -Total Receives: 569 -Stores 372 1 -Total Receives: 570 -Stores 373 1 -Total Receives: 571 -Stores 374 1 -Total Receives: 572 -Stores 375 1 -Total rewards: 577 -Total Receives: 573 -Stores 376 1 -Total Receives: 574 -Stores 377 1 -Total Receives: 575 -Stores 378 1 -Total Receives: 576 -Stores 379 1 -Total Receives: 577 -Stores 380 1 -Total rewards: 582 -Total Receives: 578 -Stores 381 1 -Total Receives: 579 -Stores 382 1 -Total Receives: 580 -Stores 383 1 -Total Receives: 581 -Stores 384 1 -loss: 2.165265186704346e-06, td_error: 0.1553744375705719, entropy: 4.84356689453125 -Train step: 0.37946105003356934 Optimizer Step: 9 -Total Receives: 582 -Stores 385 1 -Total rewards: 583 -Total Receives: 583 -Stores 386 1 -Total rewards: 590 -Total Receives: 584 -Stores 387 1 -Total Receives: 585 -Stores 388 1 -Total Receives: 586 -Stores 389 1 -Total Receives: 587 -Stores 390 1 -Total Receives: 588 -Stores 391 1 -Total Receives: 589 -Stores 392 1 -Total Receives: 590 -Stores 393 1 -Total rewards: 597 -Total Receives: 591 -Stores 394 1 -Total Receives: 592 -Stores 395 1 -Total Receives: 593 -Stores 396 1 -Total Receives: 594 -Stores 397 1 -Total Receives: 595 -Stores 398 1 -Total Receives: 596 -Stores 399 1 -Total Receives: 597 -Stores 400 1 -Total rewards: 602 -Total Receives: 598 -Stores 401 1 -Total Receives: 599 -Stores 402 1 -Total Receives: 600 -Stores 403 1 -Total Receives: 601 -Stores 404 1 -Total Receives: 602 -Stores 405 1 -Total rewards: 604 -Total Receives: 603 -Stores 406 1 -Total Receives: 604 -Stores 407 1 -Total rewards: 612 -Total Receives: 605 -Stores 408 1 -Total Receives: 606 -Stores 409 1 -Total Receives: 607 -Stores 410 1 -Total Receives: 608 -Stores 411 1 -Total Receives: 609 -Stores 412 1 -Total Receives: 610 -Stores 413 1 -Total Receives: 611 -Stores 414 1 -Total Receives: 612 -Stores 415 1 -Total rewards: 620 -Total Receives: 613 -Stores 416 1 -loss: 1.2170594345661812e-06, td_error: 0.0718606561422348, entropy: 4.838161945343018 -Train step: 0.3473689556121826 Optimizer Step: 10 -Total Receives: 614 -Stores 417 1 -Total Receives: 615 -Stores 418 1 -Total Receives: 616 -Stores 419 1 -Total Receives: 617 -Stores 420 1 -Total Receives: 618 -Stores 421 1 -Total Receives: 619 -Stores 422 1 -Total Receives: 620 -Stores 423 1 -Total rewards: 626 -Total Receives: 621 -Stores 424 1 -Total Receives: 622 -Stores 425 1 -Total Receives: 623 -Stores 426 1 -Total Receives: 624 -Stores 427 1 -Total Receives: 625 -Total Receives: 626 -Total rewards: 630 -Total Receives: 627 -Total Receives: 628 -Total Receives: 629 -Total Receives: 630 -Total rewards: 636 -Total Receives: 631 -Stores 428 1 -Total Receives: 632 -Stores 429 1 -Total Receives: 633 -Stores 430 1 -Total Receives: 634 -Stores 431 1 -Total Receives: 635 -Stores 432 1 -Total Receives: 636 -Stores 433 1 -Total rewards: 640 -Total Receives: 637 -Stores 434 1 -Total Receives: 638 -Stores 435 1 -Total Receives: 639 -Stores 436 1 -Total Receives: 640 -Stores 437 1 -Total rewards: 645 -Total Receives: 641 -Stores 438 1 -Total Receives: 642 -Stores 439 1 -Total Receives: 643 -Stores 440 1 -Total Receives: 644 -Stores 441 1 -Total Receives: 645 -Stores 442 1 -Total rewards: 651 -Total Receives: 646 -Stores 443 1 -Total Receives: 647 -Stores 444 1 -Total Receives: 648 -Stores 445 1 -Total Receives: 649 -Stores 446 1 -Total Receives: 650 -Stores 447 1 -Total Receives: 651 -Stores 448 1 -loss: 1.3300690397954895e-06, td_error: 0.08150734007358551, entropy: 4.837431907653809 -Train step: 0.45541810989379883 Optimizer Step: 11 -Total rewards: 656 -Total Receives: 652 -Stores 449 1 -Total Receives: 653 -Stores 450 1 -Total Receives: 654 -Stores 451 1 -Total Receives: 655 -Stores 452 1 -Total Receives: 656 -Stores 453 1 -Total rewards: 659 -Total Receives: 657 -Stores 454 1 -Total Receives: 658 -Stores 455 1 -Total Receives: 659 -Stores 456 1 -Total rewards: 662 -Total Receives: 660 -Stores 457 1 -Total Receives: 661 -Stores 458 1 -Total Receives: 662 -Stores 459 1 -Total rewards: 667 -Total Receives: 663 -Stores 460 1 -Total Receives: 664 -Stores 461 1 -Total Receives: 665 -Stores 462 1 -Total Receives: 666 -Stores 463 1 -Total Receives: 667 -Stores 464 1 -Total rewards: 672 -Total Receives: 668 -Stores 465 1 -Total Receives: 669 -Stores 466 1 -Total Receives: 670 -Stores 467 1 -Total Receives: 671 -Stores 468 1 -Total Receives: 672 -Stores 469 1 -Total rewards: 678 -Total Receives: 673 -Stores 470 1 -Total Receives: 674 -Stores 471 1 -Total Receives: 675 -Stores 472 1 -Total Receives: 676 -Stores 473 1 -Total Receives: 677 -Stores 474 1 -Total Receives: 678 -Stores 475 1 -Total rewards: 685 -Total Receives: 679 -Stores 476 1 -Total Receives: 680 -Total Receives: 681 -Total Receives: 682 -Total Receives: 683 -Total Receives: 684 -Total Receives: 685 -Total rewards: 690 -Total Receives: 686 -Stores 477 1 -Total Receives: 687 -Stores 478 1 -Total Receives: 289 -Stores 95 1 -Total Receives: 290 -Stores 96 1 -Total rewards: 293 -Total Receives: 291 -Stores 97 1 -Total Receives: 292 -Stores 98 1 -Total Receives: 293 -Stores 99 1 -Total rewards: 299 -Total Receives: 294 -Stores 100 1 -Total Receives: 295 -Stores 101 1 -Total Receives: 296 -Stores 102 1 -Total Receives: 297 -Stores 103 1 -Total Receives: 298 -Stores 104 1 -Total Receives: 299 -Stores 105 1 -Total rewards: 303 -Total Receives: 300 -Stores 106 1 -Total Receives: 301 -Stores 107 1 -Total Receives: 302 -Stores 108 1 -Total Receives: 303 -Stores 109 1 -Total rewards: 306 -Total Receives: 304 -Stores 110 1 -Total Receives: 305 -Stores 111 1 -Total Receives: 306 -Stores 112 1 -Total rewards: 310 -Total Receives: 307 -Stores 113 1 -Total Receives: 308 -Stores 114 1 -Total Receives: 309 -Stores 115 1 -Total Receives: 310 -Stores 116 1 -Total rewards: 311 -Total Receives: 311 -Stores 117 1 -Total rewards: 317 -Total Receives: 312 -Stores 118 1 -Total Receives: 313 -Stores 119 1 -Total Receives: 314 -Stores 120 1 -Total Receives: 315 -Stores 121 1 -Total Receives: 316 -Stores 122 1 -Total Receives: 317 -Stores 123 1 -Total rewards: 321 -Total Receives: 318 -Stores 124 1 -Total Receives: 319 -Stores 125 1 -Total Receives: 320 -Stores 126 1 -Total Receives: 321 -Stores 127 1 -Total rewards: 325 -Total Receives: 322 -Stores 128 1 -Total Receives: 323 -Stores 129 1 -Total Receives: 324 -Stores 130 1 -Total Receives: 325 -Stores 131 1 -Total rewards: 329 -Total Receives: 326 -Stores 132 1 -Total Receives: 327 -Stores 133 1 -Total Receives: 328 -Stores 134 1 -Total Receives: 329 -Stores 135 1 -Total rewards: 334 -Total Receives: 330 -Stores 136 1 -Total Receives: 331 -Stores 137 1 -Total Receives: 332 -Stores 138 1 -Total Receives: 333 -Stores 139 1 -Total Receives: 334 -Stores 140 1 -Total rewards: 342 -Total Receives: 335 -Stores 141 1 -Total Receives: 336 -Stores 142 1 -Total Receives: 337 -Stores 143 1 -Total Receives: 338 -Stores 144 1 -Total Receives: 339 -Stores 145 1 -Total Receives: 340 -Stores 146 1 -Total Receives: 341 -Stores 147 1 -Total Receives: 342 -Stores 148 1 -Total rewards: 347 -Total Receives: 343 -Stores 149 1 -Total Receives: 344 -Stores 150 1 -Total Receives: 345 -Stores 151 1 -Total Receives: 346 -Stores 152 1 -Total Receives: 347 -Stores 153 1 -Total rewards: 352 -Total Receives: 348 -Stores 154 1 -Total Receives: 349 -Stores 155 1 -Total Receives: 350 -Stores 156 1 -Total Receives: 351 -Stores 157 1 -Total Receives: 352 -Stores 158 1 -Total rewards: 356 -Total Receives: 353 -Stores 159 1 -Total Receives: 354 -Stores 160 1 -Total Receives: 355 -Stores 161 1 -Total Receives: 356 -Stores 162 1 -Total rewards: 362 -Total Receives: 357 -Stores 163 1 -Total Receives: 358 -Stores 164 1 -Total Receives: 359 -Stores 165 1 -Total Receives: 360 -Stores 166 1 -Total Receives: 361 -Stores 167 1 -Total Receives: 362 -Stores 168 1 -Total rewards: 367 -Total Receives: 363 -Stores 169 1 -Total Receives: 364 -Stores 170 1 -Total Receives: 365 -Stores 171 1 -Total Receives: 366 -Stores 172 1 -Total Receives: 367 -Stores 173 1 -Total rewards: 371 -Total Receives: 368 -Stores 174 1 -Total Receives: 369 -Stores 175 1 -Total Receives: 370 -Stores 176 1 -Total Receives: 371 -Stores 177 1 -Total rewards: 373 -Total Receives: 372 -Stores 178 1 -Total Receives: 373 -Stores 179 1 -Total rewards: 379 -Total Receives: 374 -Stores 180 1 -Total Receives: 375 -Stores 181 1 -Total Receives: 376 -Stores 182 1 -Total Receives: 377 -Stores 183 1 -Total Receives: 378 -Stores 184 1 -Total Receives: 379 -Stores 185 1 -Total rewards: 382 -Total Receives: 380 -Stores 186 1 -Total Receives: 381 -Stores 187 1 -Total Receives: 382 -Stores 188 1 -Total rewards: 388 -Total Receives: 383 -Stores 189 1 -Total Receives: 384 -Stores 190 1 -Total Receives: 385 -Stores 191 1 -Total Receives: 386 -Stores 192 1 -Total Receives: 387 -Stores 193 1 -Total Receives: 388 -Stores 194 1 -Total rewards: 394 -Total Receives: 389 -Stores 195 1 -Total Receives: 390 -Stores 196 1 -Total Receives: 391 -Stores 197 1 -Total Receives: 392 -Stores 198 1 -Total Receives: 393 -Total Receives: 394 -Total rewards: 396 -Total Receives: 395 -Stores 199 1 -Total Receives: 396 -Stores 200 1 -Total rewards: 402 -Total Receives: 397 -Stores 201 1 -Total Receives: 398 -Stores 202 1 -Total Receives: 399 -Stores 203 1 -Total Receives: 400 -Stores 204 1 -Total Receives: 401 -Stores 205 1 -Total Receives: 402 -Stores 206 1 -Total rewards: 406 -Total Receives: 403 -Stores 207 1 -Total Receives: 404 -Stores 208 1 -Total Receives: 405 -Stores 209 1 -Total Receives: 406 -Stores 210 1 -Total rewards: 407 -Total Receives: 407 -Total rewards: 411 -Total Receives: 408 -Stores 211 1 -Total Receives: 409 -Stores 212 1 -Total Receives: 410 -Stores 213 1 -Total Receives: 411 -Stores 214 1 -Total rewards: 416 -Total Receives: 412 -Stores 215 1 -Total Receives: 413 -Stores 216 1 -Total Receives: 414 -Stores 217 1 -Total Receives: 415 -Stores 218 1 -Total Receives: 416 -Stores 219 1 -Total rewards: 422 -Total Receives: 417 -Stores 220 1 -Total Receives: 418 -Stores 221 1 -Total Receives: 419 -Stores 222 1 -Total Receives: 420 -Stores 223 1 -Total Receives: 421 -Stores 224 1 -Total Receives: 422 -Stores 225 1 -Total rewards: 427 -Total Receives: 423 -Stores 226 1 -Total Receives: 424 -Stores 227 1 -Total Receives: 425 -Stores 228 1 -Total Receives: 426 -Stores 229 1 -Total Receives: 427 -Stores 230 1 -Total rewards: 430 -Total Receives: 428 -Stores 231 1 -Total Receives: 429 -Stores 232 1 -Total Receives: 430 -Stores 233 1 -Total rewards: 434 -Total Receives: 431 -Stores 234 1 -Total Receives: 432 -Stores 235 1 -Total Receives: 433 -Stores 236 1 -Total Receives: 434 -Stores 237 1 -Total rewards: 435 -Total Receives: 435 -Stores 238 1 -Total rewards: 437 -Total Receives: 436 -Stores 239 1 -Total Receives: 437 -Stores 240 1 -Total rewards: 440 -Total Receives: 438 -Stores 241 1 -Total Receives: 439 -Stores 242 1 -Total Receives: 440 -Stores 243 1 -Total rewards: 444 -Total Receives: 441 -Stores 244 1 -Total Receives: 442 -Stores 245 1 -Total Receives: 443 -Stores 246 1 -Total Receives: 444 -Stores 247 1 -Total rewards: 450 -Total Receives: 445 -Stores 248 1 -Total Receives: 446 -Stores 249 1 -Total Receives: 447 -Stores 250 1 -Total Receives: 448 -Stores 251 1 -Total Receives: 449 -Stores 252 1 -Total Receives: 450 -Stores 253 1 -Total rewards: 452 -Total Receives: 451 -Stores 254 1 -Total Receives: 452 -Stores 255 1 -Total rewards: 455 -Total Receives: 453 -Stores 256 1 -Total Receives: 454 -Stores 257 1 -Total Receives: 455 -Stores 258 1 -Total rewards: 461 -Total Receives: 456 -Stores 259 1 -Total Receives: 457 -Stores 260 1 -Total Receives: 458 -Stores 261 1 -Total Receives: 459 -Stores 262 1 -Total Receives: 460 -Stores 263 1 -Total Receives: 461 -Stores 264 1 -Total rewards: 465 -Total Receives: 462 -Stores 265 1 -Total Receives: 463 -Stores 266 1 -Total Receives: 464 -Stores 267 1 -Total Receives: 465 -Stores 268 1 -Total rewards: 469 -Total Receives: 466 -Stores 269 1 -Total Receives: 467 -Stores 270 1 -Total Receives: 468 -Stores 271 1 -Total Receives: 469 -Stores 272 1 -Total rewards: 472 -Total Receives: 470 -Stores 273 1 -Total Receives: 471 -Stores 274 1 -Total Receives: 472 -Stores 275 1 -Total rewards: 478 -Total Receives: 473 -Stores 276 1 -Total Receives: 474 -Stores 277 1 -Total Receives: 475 -Stores 278 1 -Total Receives: 476 -Stores 279 1 -Total Receives: 477 -Stores 280 1 -Total Receives: 478 -Stores 281 1 -Total rewards: 482 -Total Receives: 479 -Stores 282 1 -Total Receives: 480 -Stores 283 1 -Total Receives: 481 -Stores 284 1 -Total Receives: 482 -Stores 285 1 -Total rewards: 485 -Total Receives: 483 -Stores 286 1 -Total Receives: 484 -Stores 287 1 -Total Receives: 485 -Stores 288 1 -Total rewards: 490 -Total Receives: 486 -Stores 289 1 -Total Receives: 487 -Stores 290 1 -Total Receives: 488 -Stores 291 1 -Total Receives: 489 -Stores 292 1 -Total Receives: 490 -Stores 293 1 -Total rewards: 492 -Total Receives: 491 -Stores 294 1 -Total Receives: 492 -Stores 295 1 -Total rewards: 496 -Total Receives: 493 -Stores 296 1 -Total Receives: 494 -Stores 297 1 -Total Receives: 495 -Stores 298 1 -Total Receives: 496 -Stores 299 1 -Total rewards: 500 -Total Receives: 497 -Stores 300 1 -Total Receives: 498 -Stores 301 1 -Total Receives: 499 -Stores 302 1 -Total Receives: 500 -Stores 303 1 -Total rewards: 502 -Total Receives: 501 -Stores 304 1 -Total Receives: 502 -Stores 305 1 -Total rewards: 507 -Total Receives: 503 -Stores 306 1 -Total Receives: 504 -Stores 307 1 -Total Receives: 505 -Stores 308 1 -Total Receives: 506 -Stores 309 1 -Total Receives: 507 -Stores 310 1 -Total rewards: 513 -Total Receives: 508 -Stores 311 1 -Total Receives: 509 -Stores 312 1 -Total Receives: 510 -Stores 313 1 -Total Receives: 511 -Stores 314 1 -Total Receives: 512 -Stores 315 1 -Total Receives: 513 -Stores 316 1 -Total rewards: 516 -Total Receives: 514 -Stores 317 1 -Total Receives: 515 -Stores 318 1 -Total Receives: 516 -Stores 319 1 -Total rewards: 519 -Total Receives: 517 -Stores 320 1 -Total Receives: 518 -Stores 321 1 -Total Receives: 519 -Stores 322 1 -Total rewards: 523 -Total Receives: 520 -Stores 323 1 -Total Receives: 521 -Stores 324 1 -Total Receives: 522 -Stores 325 1 -Total Receives: 523 -Stores 326 1 -Total rewards: 526 -Total Receives: 524 -Stores 327 1 -Total Receives: 525 -Stores 328 1 -Total Receives: 526 -Stores 329 1 -Total rewards: 530 -Total Receives: 527 -Stores 330 1 -Total Receives: 528 -Stores 331 1 -Total Receives: 529 -Stores 332 1 -Total Receives: 530 -Stores 333 1 -Total rewards: 533 -Total Receives: 531 -Stores 334 1 -Total Receives: 532 -Stores 335 1 -Total Receives: 533 -Stores 336 1 -Total rewards: 538 -Total Receives: 534 -Stores 337 1 -Total Receives: 535 -Stores 338 1 -Total Receives: 536 -Stores 339 1 -Total Receives: 537 -Stores 340 1 -Total Receives: 538 -Stores 341 1 -Total rewards: 541 -Total Receives: 539 -Stores 342 1 -Total Receives: 540 -Stores 343 1 -Total Receives: 541 -Stores 344 1 -Total rewards: 545 -Total Receives: 542 -Stores 345 1 -Total Receives: 543 -Stores 346 1 -Total Receives: 544 -Stores 347 1 -Total Receives: 545 -Stores 348 1 -Total rewards: 549 -Total Receives: 546 -Stores 349 1 -Total Receives: 547 -Stores 350 1 -Total Receives: 548 -Stores 351 1 -Total Receives: 549 -Stores 352 1 -Total rewards: 552 -Total Receives: 550 -Stores 353 1 -Total Receives: 551 -Stores 354 1 -Total Receives: 552 -Stores 355 1 -Total rewards: 557 -Total Receives: 553 -Stores 356 1 -Total Receives: 554 -Stores 357 1 -Total Receives: 555 -Stores 358 1 -Total Receives: 556 -Stores 359 1 -Total Receives: 557 -Stores 360 1 -Total rewards: 564 -Total Receives: 558 -Stores 361 1 -Total Receives: 559 -Stores 362 1 -Total Receives: 560 -Stores 363 1 -Total Receives: 561 -Stores 364 1 -Total Receives: 562 -Stores 365 1 -Total Receives: 563 -Stores 366 1 -Total Receives: 564 -Stores 367 1 -Total rewards: 568 -Total Receives: 565 -Stores 368 1 -Total Receives: 566 -Stores 369 1 -Total Receives: 567 -Stores 370 1 -Total Receives: 568 -Stores 371 1 -Total rewards: 572 -Total Receives: 569 -Stores 372 1 -Total Receives: 570 -Stores 373 1 -Total Receives: 571 -Stores 374 1 -Total Receives: 572 -Stores 375 1 -Total rewards: 577 -Total Receives: 573 -Stores 376 1 -Total Receives: 574 -Stores 377 1 -Total Receives: 575 -Stores 378 1 -Total Receives: 576 -Stores 379 1 -Total Receives: 577 -Stores 380 1 -Total rewards: 582 -Total Receives: 578 -Stores 381 1 -Total Receives: 579 -Stores 382 1 -Total Receives: 580 -Stores 383 1 -Total Receives: 581 -Stores 384 1 -Total Receives: 582 -Stores 385 1 -Total rewards: 583 -Total Receives: 583 -Stores 386 1 -Total rewards: 590 -Total Receives: 584 -Stores 387 1 -Total Receives: 585 -Stores 388 1 -Total Receives: 586 -Stores 389 1 -Total Receives: 587 -Stores 390 1 -Total Receives: 588 -Stores 391 1 -Total Receives: 589 -Stores 392 1 -Total Receives: 590 -Stores 393 1 -Total rewards: 597 -Total Receives: 591 -Stores 394 1 -Total Receives: 592 -Stores 395 1 -Total Receives: 593 -Stores 396 1 -Total Receives: 594 -Stores 397 1 -Total Receives: 595 -Stores 398 1 -Total Receives: 596 -Stores 399 1 -Total Receives: 597 -Stores 400 1 -Total rewards: 602 -Total Receives: 598 -Stores 401 1 -Total Receives: 599 -Stores 402 1 -Total Receives: 600 -Stores 403 1 -Total Receives: 601 -Stores 404 1 -Total Receives: 602 -Stores 405 1 -Total rewards: 604 -Total Receives: 603 -Stores 406 1 -Total Receives: 604 -Stores 407 1 -Total rewards: 612 -Total Receives: 605 -Stores 408 1 -Total Receives: 606 -Stores 409 1 -Total Receives: 607 -Stores 410 1 -Total Receives: 608 -Stores 411 1 -Total Receives: 609 -Stores 412 1 -Total Receives: 610 -Stores 413 1 -Total Receives: 611 -Stores 414 1 -Total Receives: 612 -Stores 415 1 -Total rewards: 620 -Total Receives: 613 -Stores 416 1 -Total Receives: 614 -Stores 417 1 -Total Receives: 615 -Stores 418 1 -Total Receives: 616 -Stores 419 1 -Total Receives: 617 -Stores 420 1 -Total Receives: 618 -Stores 421 1 -Total Receives: 619 -Stores 422 1 -Total Receives: 620 -Stores 423 1 -Total rewards: 626 -Total Receives: 621 -Stores 424 1 -Total Receives: 622 -Stores 425 1 -Total Receives: 623 -Stores 426 1 -Total Receives: 624 -Stores 427 1 -Total Receives: 625 -Total Receives: 626 -Total rewards: 630 -Total Receives: 627 -Total Receives: 628 -Total Receives: 629 -Total Receives: 630 -Total rewards: 636 -Total Receives: 631 -Stores 428 1 -Total Receives: 632 -Stores 429 1 -Total Receives: 633 -Stores 430 1 -Total Receives: 634 -Stores 431 1 -Total Receives: 635 -Stores 432 1 -Total Receives: 636 -Stores 433 1 -Total rewards: 640 -Total Receives: 637 -Stores 434 1 -Total Receives: 638 -Stores 435 1 -Total Receives: 639 -Stores 436 1 -Total Receives: 640 -Stores 437 1 -Total rewards: 645 -Total Receives: 641 -Stores 438 1 -Total Receives: 642 -Stores 439 1 -Total Receives: 643 -Stores 440 1 -Total Receives: 644 -Stores 441 1 -Total Receives: 645 -Stores 442 1 -Total rewards: 651 -Total Receives: 646 -Stores 443 1 -Total Receives: 647 -Stores 444 1 -Total Receives: 648 -Stores 445 1 -Total Receives: 649 -Stores 446 1 -Total Receives: 650 -Stores 447 1 -Total Receives: 651 -Stores 448 1 -Total rewards: 656 -Total Receives: 652 -Stores 449 1 -Total Receives: 653 -Stores 450 1 -Total Receives: 654 -Stores 451 1 -Total Receives: 655 -Stores 452 1 -Total Receives: 656 -Stores 453 1 -Total rewards: 659 -Total Receives: 657 -Stores 454 1 -Total Receives: 658 -Stores 455 1 -Total Receives: 659 -Stores 456 1 -Total rewards: 662 -Total Receives: 660 -Stores 457 1 -Total Receives: 661 -Stores 458 1 -Total Receives: 662 -Stores 459 1 -Total rewards: 667 -Total Receives: 663 -Stores 460 1 -Total Receives: 664 -Stores 461 1 -Total Receives: 665 -Stores 462 1 -Total Receives: 666 -Stores 463 1 -Total Receives: 667 -Stores 464 1 -Total rewards: 672 -Total Receives: 668 -Stores 465 1 -Total Receives: 669 -Stores 466 1 -Total Receives: 670 -Stores 467 1 -Total Receives: 671 -Stores 468 1 -Total Receives: 672 -Stores 469 1 -Total rewards: 678 -Total Receives: 673 -Stores 470 1 -Total Receives: 674 -Stores 471 1 -Total Receives: 675 -Stores 472 1 -Total Receives: 676 -Stores 473 1 -Total Receives: 677 -Stores 474 1 -Total Receives: 678 -Stores 475 1 -Total rewards: 685 -Total Receives: 679 -Stores 476 1 -Total Receives: 680 -Total Receives: 681 -Total Receives: 682 -Total Receives: 683 -Total Receives: 684 -Total Receives: 685 -Total rewards: 690 -Total Receives: 686 -Stores 477 1 -Total Receives: 687 -Stores 478 1 -Total Receives: 688 -Stores 479 1 -Total Receives: 689 -Stores 480 1 -Total Receives: 690 -Stores 481 1 -Total rewards: 695 -Total Receives: 691 -Stores 482 1 -Total Receives: 692 -Stores 483 1 -Total Receives: 693 -Stores 484 1 -Total Receives: 694 -Stores 485 1 -Total Receives: 695 -Stores 486 1 -Total rewards: 701 -Total Receives: 696 -Stores 487 1 -Total Receives: 697 -Stores 488 1 -Total Receives: 698 -Stores 489 1 -Total Receives: 699 -Stores 490 1 -Total Receives: 700 -Stores 491 1 -Total Receives: 701 -Stores 492 1 -Total rewards: 704 -Total Receives: 702 -Stores 493 1 -Total Receives: 703 -Stores 494 1 -Total Receives: 704 -Stores 495 1 -Total rewards: 708 -Total Receives: 705 -Stores 496 1 -Total Receives: 706 -Stores 497 1 -Total Receives: 707 -Stores 498 1 -Total Receives: 708 -Stores 499 1 -Total rewards: 712 -Total Receives: 709 -Stores 500 1 -Total Receives: 710 -Stores 501 1 -Total Receives: 711 -Stores 502 1 -Total Receives: 712 -Stores 503 1 -Total rewards: 718 -Total Receives: 713 -Stores 504 1 -Total Receives: 714 -Stores 505 1 -Total Receives: 715 -Stores 506 1 -Total Receives: 716 -Stores 507 1 -Total Receives: 717 -Stores 508 1 -Total Receives: 718 -Stores 509 1 -Total rewards: 722 -Total Receives: 719 -Stores 510 1 -Total Receives: 720 -Stores 511 1 -Total Receives: 721 -Stores 512 1 -Total Receives: 722 -Stores 513 1 -Total rewards: 726 -Total Receives: 723 -Stores 514 1 -Total Receives: 724 -Stores 515 1 -Total Receives: 725 -Stores 516 1 -Total Receives: 726 -Stores 517 1 -Total rewards: 729 -Total Receives: 727 -Stores 518 1 -Total Receives: 728 -Stores 519 1 -Total Receives: 729 -Stores 520 1 -Total rewards: 734 -Total Receives: 730 -Stores 521 1 -Total Receives: 731 -Stores 522 1 -Total Receives: 732 -Stores 523 1 -Total Receives: 733 -Stores 524 1 -Total Receives: 734 -Stores 525 1 -Total rewards: 740 -Total Receives: 735 -Stores 526 1 -Total Receives: 736 -Stores 527 1 -Total Receives: 737 -Stores 528 1 -Total Receives: 738 -Stores 529 1 -Total Receives: 739 -Stores 530 1 -Total Receives: 740 -Stores 531 1 -Total rewards: 744 -Total Receives: 741 -Stores 532 1 -Total Receives: 742 -Stores 533 1 -Total Receives: 743 -Stores 534 1 -Total Receives: 744 -Stores 535 1 -Total rewards: 751 -Total Receives: 745 -Stores 536 1 -Total Receives: 746 -Stores 537 1 -Total Receives: 747 -Stores 538 1 -Total Receives: 748 -Stores 539 1 -Total Receives: 749 -Stores 540 1 -Total Receives: 750 -Stores 541 1 -Total Receives: 751 -Stores 542 1 -Total rewards: 754 -Total Receives: 752 -Stores 543 1 -Total Receives: 753 -Stores 544 1 -Total Receives: 754 -Stores 545 1 -Total rewards: 757 -Total Receives: 755 -Stores 546 1 -Total Receives: 756 -Stores 547 1 -Total Receives: 757 -Stores 548 1 -Total rewards: 761 -Total Receives: 758 -Stores 549 1 -Total Receives: 759 -Stores 550 1 -Total Receives: 760 -Stores 551 1 -Total Receives: 761 -Stores 552 1 -Total rewards: 766 -Total Receives: 762 -Total Receives: 763 -Total Receives: 764 -Total Receives: 765 -Total Receives: 766 -Total rewards: 770 -Total Receives: 767 -Total Receives: 768 -Total Receives: 769 -Total Receives: 770 -Total rewards: 775 -Total Receives: 771 -Stores 553 1 -Total Receives: 772 -Stores 554 1 -Total Receives: 773 -Stores 555 1 -Total Receives: 774 -Stores 556 1 -Total Receives: 775 -Stores 557 1 -Total rewards: 783 -Total Receives: 776 -Total Receives: 777 -Total Receives: 778 -Total Receives: 779 -Total Receives: 780 -Total Receives: 781 -Total Receives: 782 -Total Receives: 783 -Total rewards: 788 -Total Receives: 784 -Total Receives: 785 -Total Receives: 786 -Total Receives: 787 -Total Receives: 788 -Total rewards: 794 -Total Receives: 789 -Stores 558 1 -Total Receives: 790 -Stores 559 1 -Total Receives: 791 -Stores 560 1 -Total Receives: 792 -Stores 561 1 -Total Receives: 793 -Stores 562 1 -Total Receives: 794 -Stores 563 1 -Total rewards: 799 -Total Receives: 795 -Total Receives: 796 -Total Receives: 797 -Total Receives: 798 -Total Receives: 799 -Total rewards: 802 -Total Receives: 800 -Stores 564 1 -Total Receives: 801 -Stores 565 1 -Total Receives: 802 -Stores 566 1 -Total rewards: 806 -Total Receives: 803 -Total Receives: 804 -Total Receives: 805 -Total Receives: 806 -Total rewards: 812 -Total Receives: 807 -Stores 567 1 -Total Receives: 808 -Stores 568 1 -Total Receives: 809 -Stores 569 1 -Total Receives: 810 -Stores 570 1 -Total Receives: 811 -Stores 571 1 -Total Receives: 812 -Stores 572 1 -Total rewards: 816 -Total Receives: 813 -Stores 573 1 -Total Receives: 814 -Stores 574 1 -Total Receives: 815 -Stores 575 1 -Total Receives: 816 -Stores 576 1 -Total rewards: 820 -Total Receives: 817 -Stores 577 1 -Total Receives: 818 -Stores 578 1 -Total Receives: 819 -Stores 579 1 -Total Receives: 820 -Stores 580 1 -Total rewards: 824 -Total Receives: 821 -Stores 581 1 -Total Receives: 822 -Stores 582 1 -Total Receives: 823 -Stores 583 1 -Total Receives: 824 -Stores 584 1 -Total rewards: 828 -Total Receives: 825 -Stores 585 1 -Total Receives: 826 -Stores 586 1 -Total Receives: 827 -Stores 587 1 -Total Receives: 828 -Stores 588 1 -Total rewards: 832 -Total Receives: 829 -Stores 589 1 -Total Receives: 830 -Stores 590 1 -Total Receives: 831 -Stores 591 1 -Total Receives: 832 -Stores 592 1 -Total rewards: 839 -Total Receives: 833 -Total Receives: 834 -Total Receives: 835 -Total Receives: 836 -Total Receives: 837 -Total Receives: 838 -Total Receives: 839 -Total rewards: 843 -Total Receives: 840 -Stores 593 1 -Total Receives: 841 -Stores 594 1 -Total Receives: 842 -Stores 595 1 -Total Receives: 843 -Stores 596 1 -Total rewards: 848 -Total Receives: 844 -Stores 597 1 -Total Receives: 845 -Stores 598 1 -Total Receives: 846 -Stores 599 1 -Total Receives: 847 -Stores 600 1 -Total Receives: 848 -Stores 601 1 -Total rewards: 852 -Total Receives: 849 -Stores 602 1 -Total Receives: 850 -Stores 603 1 -Total Receives: 851 -Stores 604 1 -Total Receives: 852 -Stores 605 1 -Total rewards: 857 -Total Receives: 853 -Total Receives: 854 -Total Receives: 855 -Total Receives: 856 -Total Receives: 857 -Total rewards: 863 -Total Receives: 858 -Stores 606 1 -Total Receives: 859 -Stores 607 1 -Total Receives: 860 -Stores 608 1 -Total Receives: 861 -Stores 609 1 -Total Receives: 862 -Stores 610 1 -Total Receives: 863 -Stores 611 1 -Total rewards: 870 -Total Receives: 864 -Total Receives: 865 -Total Receives: 866 -Total Receives: 867 -Total Receives: 868 -Total Receives: 869 -Total Receives: 870 -Total rewards: 875 -Total Receives: 871 -Stores 612 1 -Total Receives: 872 -Stores 613 1 -Total Receives: 873 -Stores 614 1 -Total Receives: 874 -Stores 615 1 -Total Receives: 875 -Stores 616 1 -Total rewards: 880 -Total Receives: 876 -Total Receives: 877 -Total Receives: 878 -Total Receives: 879 -Total Receives: 880 -Total rewards: 884 -Total Receives: 881 -Total Receives: 882 -Total Receives: 883 -Total Receives: 884 -Total rewards: 889 -Total Receives: 885 -Total Receives: 886 -Total Receives: 887 -Total Receives: 888 -Total Receives: 889 -Total rewards: 893 -Total Receives: 890 -Total Receives: 891 -Total Receives: 892 -Total Receives: 893 -Total rewards: 899 -Total Receives: 894 -Stores 617 1 -Total Receives: 895 -Stores 618 1 -Total Receives: 896 -Stores 619 1 -Total Receives: 897 -Stores 620 1 -Total Receives: 898 -Stores 621 1 -Total Receives: 899 -Stores 622 1 -Total rewards: 905 -Total Receives: 900 -Total Receives: 901 -Total Receives: 902 -Total Receives: 903 -Total Receives: 904 -Total Receives: 905 -Total rewards: 910 -Total Receives: 906 -Total Receives: 907 -Total Receives: 908 -Total Receives: 909 -Total Receives: 910 -Total rewards: 915 -Total Receives: 911 -Stores 623 1 -Total Receives: 912 -Stores 624 1 -Total Receives: 913 -Stores 625 1 -Total Receives: 914 -Stores 626 1 -Total Receives: 915 -Stores 627 1 -Total rewards: 920 -Total Receives: 916 -Stores 628 1 -Total Receives: 917 -Stores 629 1 -Total Receives: 918 -Stores 630 1 -Total Receives: 919 -Stores 631 1 -Total Receives: 920 -Stores 632 1 -Total rewards: 928 -Total Receives: 921 -Stores 633 1 -Total Receives: 922 -Stores 634 1 -Total Receives: 923 -Stores 635 1 -Total Receives: 924 -Stores 636 1 -Total Receives: 925 -Stores 637 1 -Total Receives: 926 -Stores 638 1 -Total Receives: 927 -Stores 639 1 -Total Receives: 928 -Stores 640 1 -Total rewards: 934 -Total Receives: 929 -Total Receives: 930 -Total Receives: 931 -Total Receives: 932 -Total Receives: 933 -Total Receives: 934 -Total rewards: 941 -Total Receives: 935 -Stores 641 1 -Total Receives: 936 -Stores 642 1 -Total Receives: 937 -Stores 643 1 -Total Receives: 938 -Stores 644 1 -Total Receives: 939 -Stores 645 1 -Total Receives: 940 -Stores 646 1 -Total Receives: 941 -Stores 647 1 -Total rewards: 944 -Total Receives: 942 -Stores 648 1 -Total Receives: 943 -Stores 649 1 -Total Receives: 944 -Stores 650 1 -Total rewards: 949 -Total Receives: 945 -Stores 651 1 -Total Receives: 946 -Stores 652 1 -Total Receives: 947 -Stores 653 1 -Total Receives: 948 -Stores 654 1 -Total Receives: 949 -Stores 655 1 -Total rewards: 956 -Total Receives: 950 -Total Receives: 951 -Total Receives: 952 -Total Receives: 953 -Total Receives: 954 -Total Receives: 955 -Total Receives: 956 -Total rewards: 960 -Total Receives: 957 -Stores 656 1 -Total Receives: 958 -Total Receives: 959 -Total Receives: 960 -Total rewards: 966 -Total Receives: 961 -Total Receives: 962 -Total Receives: 963 -Total Receives: 964 -Total Receives: 965 -Total Receives: 966 -Total rewards: 971 -Total Receives: 967 -Total Receives: 968 -Total Receives: 969 -Total Receives: 970 -Total Receives: 971 -Total rewards: 977 -Total Receives: 972 -Total Receives: 973 -Total Receives: 974 -Total Receives: 975 -Total Receives: 976 -Total Receives: 977 -Total rewards: 985 -Total Receives: 978 -Total Receives: 979 -Total Receives: 980 -Total Receives: 981 -Total Receives: 982 -Total Receives: 983 -Total Receives: 984 -Total Receives: 985 -Total rewards: 987 -Total Receives: 986 -Total Receives: 987 -Total rewards: 993 -Total Receives: 988 -Total Receives: 989 -Total Receives: 990 -Total Receives: 991 -Total Receives: 992 -Total Receives: 993 -Total rewards: 1000 -Total Receives: 994 -Stores 657 1 -Total Receives: 995 -Stores 658 1 -Total Receives: 996 -Stores 659 1 -Total Receives: 997 -Stores 660 1 -Total Receives: 998 -Stores 661 1 -Total Receives: 999 -Stores 662 1 -Total Receives: 1000 -Stores 663 1 -Total rewards: 1008 -Total Receives: 1001 -Total Receives: 1002 -Total Receives: 1003 -Total Receives: 1004 -Total Receives: 1005 -Total Receives: 1006 -Total Receives: 1007 -Total Receives: 1008 -Total rewards: 1010 -Total Receives: 1009 -Total Receives: 1010 -Total rewards: 1018 -Total Receives: 1011 -Total Receives: 1012 -Total Receives: 1013 -Total Receives: 1014 -Total Receives: 1015 -Total Receives: 1016 -Total Receives: 1017 -Total Receives: 1018 -Total rewards: 1021 -Total Receives: 1019 -Stores 664 1 -Total Receives: 1020 -Stores 665 1 -Total Receives: 1021 -Stores 666 1 -Total rewards: 1027 -Total Receives: 1022 -Total Receives: 1023 -Total Receives: 1024 -Total Receives: 1025 -Total Receives: 1026 -Total Receives: 1027 -Total rewards: 1035 -Total Receives: 1028 -Total Receives: 1029 -Total Receives: 1030 -Total Receives: 1031 -Total Receives: 1032 -Total Receives: 1033 -Total Receives: 1034 -Total Receives: 1035 -Total rewards: 1039 -Total Receives: 1036 -Stores 667 1 -Total Receives: 1037 -Stores 668 1 -Total Receives: 1038 -Stores 669 1 -Total Receives: 1039 -Stores 670 1 -Total rewards: 1046 -Total Receives: 1040 -Stores 671 1 -Total Receives: 1041 -Stores 672 1 -Total Receives: 1042 -Stores 673 1 -Total Receives: 1043 -Stores 674 1 -Total Receives: 1044 -Stores 675 1 -Total Receives: 1045 -Stores 676 1 -Total Receives: 1046 -Stores 677 1 -Total rewards: 1051 -Total Receives: 1047 -Stores 678 1 -Total Receives: 1048 -Stores 679 1 -Total Receives: 1049 -Stores 680 1 -Total Receives: 1050 -Stores 681 1 -Total Receives: 1051 -Stores 682 1 -Total rewards: 1057 -Total Receives: 1052 -Stores 683 1 -Total Receives: 1053 -Stores 684 1 -Total Receives: 1054 -Stores 685 1 -Total Receives: 1055 -Stores 686 1 -Total Receives: 1056 -Stores 687 1 -Total Receives: 1057 -Stores 688 1 -Total rewards: 1062 -Total Receives: 1058 -Stores 689 1 -Total Receives: 1059 -Stores 690 1 -Total Receives: 1060 -Stores 691 1 -Total Receives: 1061 -Stores 692 1 -Total Receives: 1062 -Stores 693 1 -Total rewards: 1068 -Total Receives: 1063 -Stores 694 1 -Total Receives: 1064 -Stores 695 1 -Total Receives: 1065 -Stores 696 1 -Total Receives: 1066 -Stores 697 1 -Total Receives: 1067 -Stores 698 1 -Total Receives: 1068 -Stores 699 1 -Total rewards: 1076 -Total Receives: 1069 -Stores 700 1 -Total Receives: 1070 -Stores 701 1 -Total Receives: 1071 -Stores 702 1 -Total Receives: 1072 -Stores 703 1 -Total Receives: 1073 -Stores 704 1 -Total Receives: 1074 -Stores 705 1 -Total Receives: 1075 -Stores 706 1 -Total Receives: 1076 -Stores 707 1 -Total rewards: 1080 -Total Receives: 1077 -Stores 708 1 -Total Receives: 1078 -Stores 709 1 -Total Receives: 1079 -Stores 710 1 -Total Receives: 1080 -Stores 711 1 -Total rewards: 1084 -Total Receives: 1081 -Stores 712 1 -Total Receives: 1082 -Stores 713 1 -Total Receives: 1083 -Stores 714 1 -Total Receives: 1084 -Stores 715 1 -Total rewards: 1091 -Total Receives: 1085 -Stores 716 1 -Total Receives: 1086 -Stores 717 1 -Total Receives: 1087 -Stores 718 1 -Total Receives: 1088 -Stores 719 1 -Total Receives: 1089 -Stores 720 1 -Total Receives: 1090 -Stores 721 1 -Total Receives: 1091 -Stores 722 1 -Total rewards: 1094 -Total Receives: 1092 -Stores 723 1 -Total Receives: 1093 -Stores 724 1 -Total Receives: 1094 -Stores 725 1 -Total rewards: 1102 -Total Receives: 1095 -Stores 726 1 -Total Receives: 1096 -Stores 727 1 -Total Receives: 1097 -Stores 728 1 -Total Receives: 1098 -Stores 729 1 -Total Receives: 1099 -Stores 730 1 -Total Receives: 1100 -Stores 731 1 -Total Receives: 1101 -Stores 732 1 -Total Receives: 1102 -Stores 733 1 -Total rewards: 1110 -Total Receives: 1103 -Stores 734 1 -Total Receives: 1104 -Stores 735 1 -Total Receives: 1105 -Stores 736 1 -Total Receives: 1106 -Stores 737 1 -Total Receives: 1107 -Stores 738 1 -Total Receives: 1108 -Stores 739 1 -Total Receives: 1109 -Stores 740 1 -Total Receives: 1110 -Stores 741 1 -Total rewards: 1115 -Total Receives: 1111 -Total Receives: 1112 -Total Receives: 1113 -Total Receives: 1114 -Total Receives: 1115 -Total rewards: 1120 -Total Receives: 1116 -Stores 742 1 -Total Receives: 1117 -Stores 743 1 -Total Receives: 1118 -Stores 744 1 -Total Receives: 1119 -Stores 745 1 -Total Receives: 1120 -Stores 746 1 -Total rewards: 1128 -Total Receives: 1121 -Total Receives: 1122 -Total Receives: 1123 -Total Receives: 1124 -Total Receives: 1125 -Total Receives: 1126 -Total Receives: 1127 -Total Receives: 1128 -Total rewards: 1133 -Total Receives: 1129 -Total Receives: 1130 -Total Receives: 1131 -Total Receives: 1132 -Total Receives: 1133 -Total rewards: 1140 -Total Receives: 1134 -Stores 747 1 -Total Receives: 1135 -Stores 748 1 -Total Receives: 1136 -Stores 749 1 -Total Receives: 1137 -Stores 750 1 -Total Receives: 1138 -Stores 751 1 -Total Receives: 1139 -Stores 752 1 -Total Receives: 1140 -Stores 753 1 -Total rewards: 1147 -Total Receives: 1141 -Stores 754 1 -Total Receives: 1142 -Stores 755 1 -Total Receives: 1143 -Stores 756 1 -Total Receives: 1144 -Stores 757 1 -Total Receives: 1145 -Stores 758 1 -Total Receives: 1146 -Stores 759 1 -Total Receives: 1147 -Stores 760 1 -Total rewards: 1155 -Total Receives: 1148 -Stores 761 1 -Total Receives: 1149 -Stores 762 1 -Total Receives: 1150 -Stores 763 1 -Total Receives: 1151 -Stores 764 1 -Total Receives: 1152 -Stores 765 1 -Total Receives: 1153 -Stores 766 1 -Total Receives: 1154 -Stores 767 1 -Total Receives: 1155 -Stores 768 1 -Total rewards: 1159 -Total Receives: 1156 -Stores 769 1 -Total Receives: 1157 -Stores 770 1 -Total Receives: 1158 -Stores 771 1 -Total Receives: 1159 -Stores 772 1 -Total rewards: 1165 -Total Receives: 1160 -Stores 773 1 -Total Receives: 1161 -Stores 774 1 -Total Receives: 1162 -Stores 775 1 -Total Receives: 1163 -Stores 776 1 -Total Receives: 1164 -Stores 777 1 -Total Receives: 1165 -Stores 778 1 -Total rewards: 1169 -Total Receives: 1166 -Stores 779 1 -Total Receives: 1167 -Stores 780 1 -Total Receives: 1168 -Stores 781 1 -Total Receives: 1169 -Stores 782 1 -Total rewards: 1175 -Total Receives: 1170 -Total Receives: 1171 -Total Receives: 1172 -Total Receives: 1173 -Total Receives: 1174 -Total Receives: 1175 -Total rewards: 1181 -Total Receives: 1176 -Stores 783 1 -Total Receives: 1177 -Stores 784 1 -Total Receives: 1178 -Stores 785 1 -Total Receives: 1179 -Stores 786 1 -Total Receives: 1180 -Stores 787 1 -Total Receives: 1181 -Stores 788 1 -Total rewards: 1189 -Total Receives: 1182 -Stores 789 1 -Total Receives: 1183 -Stores 790 1 -Total Receives: 1184 -Stores 791 1 -Total Receives: 1185 -Stores 792 1 -Total Receives: 1186 -Stores 793 1 -Total Receives: 1187 -Stores 794 1 -Total Receives: 1188 -Stores 795 1 -Total Receives: 1189 -Stores 796 1 -Total rewards: 1195 -Total Receives: 1190 -Stores 797 1 -Total Receives: 1191 -Stores 798 1 -Total Receives: 1192 -Stores 799 1 -Total Receives: 1193 -Stores 800 1 -Total Receives: 1194 -Stores 801 1 -Total Receives: 1195 -Total rewards: 1199 -Total Receives: 1196 -Total Receives: 1197 -Total Receives: 1198 -Total Receives: 1199 -Total rewards: 1205 -Total Receives: 1200 -Total Receives: 1201 -Total Receives: 1202 -Total Receives: 1203 -Total Receives: 1204 -Total Receives: 1205 -Total rewards: 1209 -Total Receives: 1206 -Stores 802 1 -Total Receives: 1207 -Stores 803 1 -Total Receives: 1208 -Stores 804 1 -Total Receives: 1209 -Stores 805 1 -Total rewards: 1218 -Total Receives: 1210 -Stores 806 1 -Total Receives: 1211 -Total Receives: 1212 -Total Receives: 1213 -Total Receives: 1214 -Total Receives: 1215 -Total Receives: 1216 -Total Receives: 1217 -Total Receives: 1218 -Total rewards: 1221 -Total Receives: 1219 -Stores 807 1 -Total Receives: 1220 -Stores 808 1 -Total Receives: 1221 -Stores 809 1 -Total rewards: 1229 -Total Receives: 1222 -Stores 810 1 -Total Receives: 1223 -Stores 811 1 -Total Receives: 1224 -Stores 812 1 -Total Receives: 1225 -Stores 813 1 -Total Receives: 1226 -Stores 814 1 -Total Receives: 1227 -Stores 815 1 -Total Receives: 1228 -Stores 816 1 -Total Receives: 1229 -Stores 817 1 -Total rewards: 1237 -Total Receives: 1230 -Stores 818 1 -Total Receives: 1231 -Stores 819 1 -Total Receives: 1232 -Stores 820 1 -Total Receives: 1233 -Stores 821 1 -Total Receives: 1234 -Stores 822 1 -Total Receives: 1235 -Stores 823 1 -Total Receives: 1236 -Stores 824 1 -Total Receives: 1237 -Stores 825 1 -Total rewards: 1245 -Total Receives: 1238 -Stores 826 1 -Total Receives: 1239 -Stores 827 1 -Total Receives: 1240 -Stores 828 1 -Total Receives: 1241 -Stores 829 1 -Total Receives: 1242 -Stores 830 1 -Total Receives: 1243 -Stores 831 1 -Total Receives: 1244 -Stores 832 1 -Total Receives: 1245 -Stores 833 1 -Stores 523 1 -Total Receives: 733 -Stores 524 1 -Total Receives: 734 -Stores 525 1 -Total rewards: 740 -Total Receives: 735 -Stores 526 1 -Total Receives: 736 -Stores 527 1 -Total Receives: 737 -Stores 528 1 -Total Receives: 738 -Stores 529 1 -Total Receives: 739 -Stores 530 1 -Total Receives: 740 -Stores 531 1 -Total rewards: 744 -Total Receives: 741 -Stores 532 1 -Total Receives: 742 -Stores 533 1 -Total Receives: 743 -Stores 534 1 -Total Receives: 744 -Stores 535 1 -Total rewards: 751 -Total Receives: 745 -Stores 536 1 -Total Receives: 746 -Stores 537 1 -Total Receives: 747 -Stores 538 1 -Total Receives: 748 -Stores 539 1 -Total Receives: 749 -Stores 540 1 -Total Receives: 750 -Stores 541 1 -Total Receives: 751 -Stores 542 1 -Total rewards: 754 -Total Receives: 752 -Stores 543 1 -Total Receives: 753 -Stores 544 1 -Total Receives: 754 -Stores 545 1 -Total rewards: 757 -Total Receives: 755 -Stores 546 1 -Total Receives: 756 -Stores 547 1 -Total Receives: 757 -Stores 548 1 -Total rewards: 761 -Total Receives: 758 -Stores 549 1 -Total Receives: 759 -Stores 550 1 -Total Receives: 760 -Stores 551 1 -Total Receives: 761 -Stores 552 1 -Total rewards: 766 -Total Receives: 762 -Total Receives: 763 -Total Receives: 764 -Total Receives: 765 -Total Receives: 766 -Total rewards: 770 -Total Receives: 767 -Total Receives: 768 -Total Receives: 769 -Total Receives: 770 -Total rewards: 775 -Total Receives: 771 -Stores 553 1 -Total Receives: 772 -Stores 554 1 -Total Receives: 773 -Stores 555 1 -Total Receives: 774 -Stores 556 1 -Total Receives: 775 -Stores 557 1 -Total rewards: 783 -Total Receives: 776 -Total Receives: 777 -Total Receives: 778 -Total Receives: 779 -Total Receives: 780 -Total Receives: 781 -Total Receives: 782 -Total Receives: 783 -Total rewards: 788 -Total Receives: 784 -Total Receives: 785 -Total Receives: 786 -Total Receives: 787 -Total Receives: 788 -Total rewards: 794 -Total Receives: 789 -Stores 558 1 -Total Receives: 790 -Stores 559 1 -Total Receives: 791 -Stores 560 1 -Total Receives: 792 -Stores 561 1 -Total Receives: 793 -Stores 562 1 -Total Receives: 794 -Stores 563 1 -Total rewards: 799 -Total Receives: 795 -Total Receives: 796 -Total Receives: 797 -Total Receives: 798 -Total Receives: 799 -Total rewards: 802 -Total Receives: 800 -Stores 564 1 -Total Receives: 801 -Stores 565 1 -Total Receives: 802 -Stores 566 1 -Total rewards: 806 -Total Receives: 803 -Total Receives: 804 -Total Receives: 805 -Total Receives: 806 -Total rewards: 812 -Total Receives: 807 -Stores 567 1 -Total Receives: 808 -Stores 568 1 -Total Receives: 809 -Stores 569 1 -Total Receives: 810 -Stores 570 1 -Total Receives: 811 -Stores 571 1 -Total Receives: 812 -Stores 572 1 -Total rewards: 816 -Total Receives: 813 -Stores 573 1 -Total Receives: 814 -Stores 574 1 -Total Receives: 815 -Stores 575 1 -Total Receives: 816 -Stores 576 1 -Total rewards: 820 -Total Receives: 817 -Stores 577 1 -Total Receives: 818 -Stores 578 1 -Total Receives: 819 -Stores 579 1 -Total Receives: 820 -Stores 580 1 -Total rewards: 824 -Total Receives: 821 -Stores 581 1 -Total Receives: 822 -Stores 582 1 -Total Receives: 823 -Stores 583 1 -Total Receives: 824 -Stores 584 1 -Total rewards: 828 -Total Receives: 825 -Stores 585 1 -Total Receives: 826 -Stores 586 1 -Total Receives: 827 -Stores 587 1 -Total Receives: 828 -Stores 588 1 -Total rewards: 832 -Total Receives: 829 -Stores 589 1 -Total Receives: 830 -Stores 590 1 -Total Receives: 831 -Stores 591 1 -Total Receives: 832 -Stores 592 1 -Total rewards: 839 -Total Receives: 833 -Total Receives: 834 -Total Receives: 835 -Total Receives: 836 -Total Receives: 837 -Total Receives: 838 -Total Receives: 839 -Total rewards: 843 -Total Receives: 840 -Stores 593 1 -Total Receives: 841 -Stores 594 1 -Total Receives: 842 -Stores 595 1 -Total Receives: 843 -Stores 596 1 -Total rewards: 848 -Total Receives: 844 -Stores 597 1 -Total Receives: 845 -Stores 598 1 -Total Receives: 846 -Stores 599 1 -Total Receives: 847 -Stores 600 1 -Total Receives: 848 -Stores 601 1 -Total rewards: 852 -Total Receives: 849 -Stores 602 1 -Total Receives: 850 -Stores 603 1 -Total Receives: 851 -Stores 604 1 -Total Receives: 852 -Stores 605 1 -Total rewards: 857 -Total Receives: 853 -Total Receives: 854 -Total Receives: 855 -Total Receives: 856 -Total Receives: 857 -Total rewards: 863 -Total Receives: 858 -Stores 606 1 -Total Receives: 859 -Stores 607 1 -Total Receives: 860 -Stores 608 1 -Total Receives: 861 -Stores 609 1 -Total Receives: 862 -Stores 610 1 -Total Receives: 863 -Stores 611 1 -Total rewards: 870 -Total Receives: 864 -Total Receives: 865 -Total Receives: 866 -Total Receives: 867 -Total Receives: 868 -Total Receives: 869 -Total Receives: 870 -Total rewards: 875 -Total Receives: 871 -Stores 612 1 -Total Receives: 872 -Stores 613 1 -Total Receives: 873 -Stores 614 1 -Total Receives: 874 -Stores 615 1 -Total Receives: 875 -Stores 616 1 -Total rewards: 880 -Total Receives: 876 -Total Receives: 877 -Total Receives: 878 -Total Receives: 879 -Total Receives: 880 -Total rewards: 884 -Total Receives: 881 -Total Receives: 882 -Total Receives: 883 -Total Receives: 884 -Total rewards: 889 -Total Receives: 885 -Total Receives: 886 -Total Receives: 887 -Total Receives: 888 -Total Receives: 889 -Total rewards: 893 -Total Receives: 890 -Total Receives: 891 -Total Receives: 892 -Total Receives: 893 -Total rewards: 899 -Total Receives: 894 -Stores 617 1 -Total Receives: 895 -Stores 618 1 -Total Receives: 896 -Stores 619 1 -Total Receives: 897 -Stores 620 1 -Total Receives: 898 -Stores 621 1 -Total Receives: 899 -Stores 622 1 -Total rewards: 905 -Total Receives: 900 -Total Receives: 901 -Total Receives: 902 -Total Receives: 903 -Total Receives: 904 -Total Receives: 905 -Total rewards: 910 -Total Receives: 906 -Total Receives: 907 -Total Receives: 908 -Total Receives: 909 -Total Receives: 910 -Total rewards: 915 -Total Receives: 911 -Stores 623 1 -Total Receives: 912 -Stores 624 1 -Total Receives: 913 -Stores 625 1 -Total Receives: 914 -Stores 626 1 -Total Receives: 915 -Stores 627 1 -Total rewards: 920 -Total Receives: 916 -Stores 628 1 -Total Receives: 917 -Stores 629 1 -Total Receives: 918 -Stores 630 1 -Total Receives: 919 -Stores 631 1 -Total Receives: 920 -Stores 632 1 -Total rewards: 928 -Total Receives: 921 -Stores 633 1 -Total Receives: 922 -Stores 634 1 -Total Receives: 923 -Stores 635 1 -Total Receives: 924 -Stores 636 1 -Total Receives: 925 -Stores 637 1 -Total Receives: 926 -Stores 638 1 -Total Receives: 927 -Stores 639 1 -Total Receives: 928 -Stores 640 1 -Total rewards: 934 -Total Receives: 929 -Total Receives: 930 -Total Receives: 931 -Total Receives: 932 -Total Receives: 933 -Total Receives: 934 -Total rewards: 941 -Total Receives: 935 -Stores 641 1 -Total Receives: 936 -Stores 642 1 -Total Receives: 937 -Stores 643 1 -Total Receives: 938 -Stores 644 1 -Total Receives: 939 -Stores 645 1 -Total Receives: 940 -Stores 646 1 -Total Receives: 941 -Stores 647 1 -Total rewards: 944 -Total Receives: 942 -Stores 648 1 -Total Receives: 943 -Stores 649 1 -Total Receives: 944 -Stores 650 1 -Total rewards: 949 -Total Receives: 945 -Stores 651 1 -Total Receives: 946 -Stores 652 1 -Total Receives: 947 -Stores 653 1 -Total Receives: 948 -Stores 654 1 -Total Receives: 949 -Stores 655 1 -Total rewards: 956 -Total Receives: 950 -Total Receives: 951 -Total Receives: 952 -Total Receives: 953 -Total Receives: 954 -Total Receives: 955 -Total Receives: 956 -Total rewards: 960 -Total Receives: 957 -Stores 656 1 -Total Receives: 958 -Total Receives: 959 -Total Receives: 960 -Total rewards: 966 -Total Receives: 961 -Total Receives: 962 -Total Receives: 963 -Total Receives: 964 -Total Receives: 965 -Total Receives: 966 -Total rewards: 971 -Total Receives: 967 -Total Receives: 968 -Total Receives: 969 -Total Receives: 970 -Total Receives: 971 -Total rewards: 977 -Total Receives: 972 -Total Receives: 973 -Total Receives: 974 -Total Receives: 975 -Total Receives: 976 -Total Receives: 977 -Total rewards: 985 -Total Receives: 978 -Total Receives: 979 -Total Receives: 980 -Total Receives: 981 -Total Receives: 982 -Total Receives: 983 -Total Receives: 984 -Total Receives: 985 -Total rewards: 987 -Total Receives: 986 -Total Receives: 987 -Total rewards: 993 -Total Receives: 988 -Total Receives: 989 -Total Receives: 990 -Total Receives: 991 -Total Receives: 992 -Total Receives: 993 -Total rewards: 1000 -Total Receives: 994 -Stores 657 1 -Total Receives: 995 -Stores 658 1 -Total Receives: 996 -Stores 659 1 -Total Receives: 997 -Stores 660 1 -Total Receives: 998 -Stores 661 1 -Total Receives: 999 -Stores 662 1 -Total Receives: 1000 -Stores 663 1 -Total rewards: 1252 -Total Receives: 1246 -Stores 834 1 -Total Receives: 1247 -Stores 835 1 -Total Receives: 1248 -Stores 836 1 -Total Receives: 1249 -Stores 837 1 -Total Receives: 1250 -Stores 838 1 -Total Receives: 1251 -Stores 839 1 -Total Receives: 1252 -Stores 840 1 -Total rewards: 1260 -Total Receives: 1253 -Stores 841 1 -Total Receives: 1254 -Stores 842 1 -Total Receives: 1255 -Stores 843 1 -Total Receives: 1256 -Stores 844 1 -Total Receives: 1257 -Stores 845 1 -Total Receives: 1258 -Stores 846 1 -Total Receives: 1259 -Stores 847 1 -Total Receives: 1260 -Stores 848 1 -Total rewards: 1266 -Total Receives: 1261 -Stores 849 1 -Total Receives: 1262 -Stores 850 1 -Total Receives: 1263 -Stores 851 1 -Total Receives: 1264 -Stores 852 1 -Total Receives: 1265 -Stores 853 1 -Total Receives: 1266 -Total rewards: 1272 -Total Receives: 1267 -Stores 854 1 -Total Receives: 1268 -Stores 855 1 -Total Receives: 1269 -Stores 856 1 -Total Receives: 1270 -Stores 857 1 -Total Receives: 1271 -Stores 858 1 -Total Receives: 1272 -Stores 859 1 -Total rewards: 1276 -Total Receives: 1273 -Stores 860 1 -Total Receives: 1274 -Stores 861 1 -Total Receives: 1275 -Stores 862 1 -Total Receives: 1276 -Stores 863 1 -Total rewards: 1279 -Total Receives: 1277 -Stores 864 1 -Total Receives: 1278 -Stores 865 1 -Total Receives: 1279 -Stores 866 1 -Total rewards: 1283 -Total Receives: 1280 -Total Receives: 1281 -Total Receives: 1282 -Total Receives: 1283 -Total rewards: 1285 -Total Receives: 1284 -Stores 867 1 -Total Receives: 1285 -Stores 868 1 -Total rewards: 1292 -Total Receives: 1286 -Stores 869 1 -Total Receives: 1287 -Stores 870 1 -Total Receives: 1288 -Stores 871 1 -Total Receives: 1289 -Stores 872 1 -Total Receives: 1290 -Stores 873 1 -Total Receives: 1291 -Stores 874 1 -Total Receives: 1292 -Stores 875 1 -Total rewards: 1301 -Total Receives: 1293 -Total Receives: 1294 -Total Receives: 1295 -Total Receives: 1296 -Total Receives: 1297 -Total Receives: 1298 -Total Receives: 1299 -Total Receives: 1300 -Total Receives: 1301 -Total rewards: 1307 -Total Receives: 1302 -Stores 876 1 -Total Receives: 1303 -Stores 877 1 -Total Receives: 1304 -Stores 878 1 -Total Receives: 1305 -Stores 879 1 -Total Receives: 1306 -Stores 880 1 -Total Receives: 1307 -Stores 881 1 -Total rewards: 1311 -Total Receives: 1308 -Total Receives: 1309 -Total Receives: 1310 -Total Receives: 1311 -Total rewards: 1321 -Total Receives: 1312 -Stores 882 1 -Total Receives: 1313 -Stores 883 1 -Total Receives: 1314 -Stores 884 1 -Total Receives: 1315 -Stores 885 1 -Total Receives: 1316 -Stores 886 1 -Total Receives: 1317 -Stores 887 1 -Total Receives: 1318 -Stores 888 1 -Total Receives: 1319 -Stores 889 1 -Total Receives: 1320 -Stores 890 1 -Total Receives: 1321 -Stores 891 1 -Total rewards: 1329 -Total Receives: 1322 -Stores 892 1 -Total Receives: 1323 -Stores 893 1 -Total Receives: 1324 -Stores 894 1 -Total Receives: 1325 -Stores 895 1 -Total Receives: 1326 -Stores 896 1 -Total Receives: 1327 -Stores 897 1 -Total Receives: 1328 -Stores 898 1 -Total Receives: 1329 -Stores 899 1 -Total rewards: 1337 -Total Receives: 1330 -Stores 900 1 -Total Receives: 1331 -Stores 901 1 -Total Receives: 1332 -Stores 902 1 -Total Receives: 1333 -Stores 903 1 -Total Receives: 1334 -Stores 904 1 -Total Receives: 1335 -Stores 905 1 -Total Receives: 1336 -Stores 906 1 -Total Receives: 1337 -Stores 907 1 -Total rewards: 1345 -Total Receives: 1338 -Stores 908 1 -Total Receives: 1339 -Stores 909 1 -Total Receives: 1340 -Stores 910 1 -Total Receives: 1341 -Stores 911 1 -Total Receives: 1342 -Stores 912 1 -Total Receives: 1343 -Stores 913 1 -Total Receives: 1344 -Stores 914 1 -Total Receives: 1345 -Stores 915 1 -Total rewards: 1350 -Total Receives: 1346 -Stores 916 1 -Total Receives: 1347 -Stores 917 1 -Total Receives: 1348 -Stores 918 1 -Total Receives: 1349 -Stores 919 1 -Total Receives: 1350 -Stores 920 1 -Total rewards: 1359 -Total Receives: 1351 -Stores 921 1 -Total Receives: 1352 -Stores 922 1 -Total Receives: 1353 -Stores 923 1 -Total Receives: 1354 -Stores 924 1 -Total Receives: 1355 -Stores 925 1 -Total Receives: 1356 -Stores 926 1 -Total Receives: 1357 -Stores 927 1 -Total Receives: 1358 -Stores 928 1 -Total Receives: 1359 -Stores 929 1 -Total rewards: 1368 -Total Receives: 1360 -Stores 930 1 -Total Receives: 1361 -Stores 931 1 -Total Receives: 1362 -Stores 932 1 -Total Receives: 1363 -Stores 933 1 -Total Receives: 1364 -Stores 934 1 -Total Receives: 1365 -Stores 935 1 -Total Receives: 1366 -Stores 936 1 -Total Receives: 1367 -Stores 937 1 -Total Receives: 1368 -Stores 938 1 -Total rewards: 1369 -Total Receives: 1369 -Stores 939 1 -Total rewards: 1376 -Total Receives: 1370 -Stores 940 1 -Total Receives: 1371 -Stores 941 1 -Total Receives: 1372 -Stores 942 1 -Total Receives: 1373 -Stores 943 1 -Total Receives: 1374 -Stores 944 1 -Total Receives: 1375 -Stores 945 1 -Total Receives: 1376 -Stores 946 1 -Total rewards: 1384 -Total Receives: 1377 -Stores 947 1 -Total Receives: 1378 -Stores 948 1 -Total Receives: 1379 -Stores 949 1 -Total Receives: 1380 -Stores 950 1 -Total Receives: 1381 -Stores 951 1 -Total Receives: 1382 -Stores 952 1 -Total Receives: 1383 -Stores 953 1 -Total Receives: 1384 -Stores 954 1 -Total rewards: 1386 -Total Receives: 1385 -Stores 955 1 -Total Receives: 1386 -Stores 956 1 -Total rewards: 1396 -Total Receives: 1387 -Stores 957 1 -Total Receives: 1388 -Stores 958 1 -Total Receives: 1389 -Stores 959 1 -Total Receives: 1390 -Stores 960 1 -Total Receives: 1391 -Stores 961 1 -Total Receives: 1392 -Stores 962 1 -Total Receives: 1393 -Stores 963 1 -Total Receives: 1394 -Stores 964 1 -Total Receives: 1395 -Stores 965 1 -Total Receives: 1396 -Stores 966 1 -Total rewards: 1402 -Total Receives: 1397 -Stores 967 1 -Total Receives: 1398 -Stores 968 1 -Total Receives: 1399 -Stores 969 1 -Total Receives: 1400 -Stores 970 1 -Total Receives: 1401 -Stores 971 1 -Total Receives: 1402 -Stores 972 1 -Total rewards: 1410 -Total Receives: 1403 -Stores 973 1 -Total Receives: 1404 -Stores 974 1 -Total Receives: 1405 -Stores 975 1 -Total Receives: 1406 -Stores 976 1 -Total Receives: 1407 -Stores 977 1 -Total Receives: 1408 -Stores 978 1 -Total Receives: 1409 -Stores 979 1 -Total Receives: 1410 -Stores 980 1 -Total rewards: 1413 -Total Receives: 1411 -Stores 981 1 -Total Receives: 1412 -Stores 982 1 -Total Receives: 1413 -Stores 983 1 -Total rewards: 1421 -Total Receives: 1414 -Stores 984 1 -Total Receives: 1415 -Stores 985 1 -Total Receives: 1416 -Stores 986 1 -Total Receives: 1417 -Stores 987 1 -Total Receives: 1418 -Stores 988 1 -Total Receives: 1419 -Stores 989 1 -Total Receives: 1420 -Stores 990 1 -Total Receives: 1421 -Stores 991 1 -Total rewards: 1428 -Total Receives: 1422 -Stores 992 1 -Total Receives: 1423 -Stores 993 1 -Total Receives: 1424 -Stores 994 1 -Total Receives: 1425 -Stores 995 1 -Total Receives: 1426 -Stores 996 1 -Total Receives: 1427 -Stores 997 1 -Total Receives: 1428 -Stores 998 1 -Total rewards: 1436 -Total Receives: 1429 -Stores 999 1 -Total Receives: 1430 -Stores 1000 1 -Total Receives: 1431 -Stores 1001 1 -Total Receives: 1432 -Stores 1002 1 -Total Receives: 1433 -Stores 1003 1 -Total Receives: 1434 -Stores 1004 1 -Total Receives: 1435 -Stores 1005 1 -Total Receives: 1436 -Stores 1006 1 -Total rewards: 1441 -Total Receives: 1437 -Stores 1007 1 -Total Receives: 1438 -Stores 1008 1 -Total Receives: 1439 -Stores 1009 1 -Total Receives: 1440 -Stores 1010 1 -Total Receives: 1441 -Stores 1011 1 -Total rewards: 1448 -Total Receives: 1442 -Stores 1012 1 -Total Receives: 1443 -Stores 1013 1 -Total Receives: 1444 -Stores 1014 1 -Total Receives: 1445 -Stores 1015 1 -Total Receives: 1446 -Stores 1016 1 -Total Receives: 1447 -Stores 1017 1 -Total Receives: 1448 -Stores 1018 1 -Total rewards: 1454 -Total Receives: 1449 -Stores 1019 1 -Total Receives: 1450 -Stores 1020 1 -Total Receives: 1451 -Stores 1021 1 -Total Receives: 1452 -Stores 1022 1 -Total Receives: 1453 -Stores 1023 1 -Total Receives: 1454 -Stores 1024 1 -Total rewards: 1463 -Total Receives: 1455 -Stores 1025 1 -Total Receives: 1456 -Stores 1026 1 -Total Receives: 1457 -Stores 1027 1 -Total Receives: 1458 -Stores 1028 1 -Total Receives: 1459 -Stores 1029 1 -Total Receives: 1460 -Stores 1030 1 -Total Receives: 1461 -Stores 1031 1 -Total Receives: 1462 -Stores 1032 1 -Total Receives: 1463 -Stores 1033 1 -Total rewards: 1470 -Total Receives: 1464 -Stores 1034 1 -Total Receives: 1465 -Stores 1035 1 -Total Receives: 1466 -Stores 1036 1 -Total Receives: 1467 -Stores 1037 1 -Total Receives: 1468 -Stores 1038 1 -Total Receives: 1469 -Stores 1039 1 -Total Receives: 1470 -Stores 1040 1 -Total rewards: 1477 -Total Receives: 1471 -Stores 1041 1 -Total Receives: 1472 -Stores 1042 1 -Total Receives: 1473 -Stores 1043 1 -Total Receives: 1474 -Stores 1044 1 -Total Receives: 1475 -Stores 1045 1 -Total Receives: 1476 -Stores 1046 1 -Total Receives: 1477 -Stores 1047 1 -Total rewards: 1483 -Total Receives: 1478 -Stores 1048 1 -Total Receives: 1479 -Stores 1049 1 -Total Receives: 1480 -Stores 1050 1 -Total Receives: 1481 -Stores 1051 1 -Total Receives: 1482 -Stores 1052 1 -Total Receives: 1483 -Stores 1053 1 -Total rewards: 1485 -Total Receives: 1484 -Stores 1054 1 -Total Receives: 1485 -Stores 1055 1 -Total rewards: 1489 -Total Receives: 1486 -Stores 1056 1 -Total Receives: 1487 -Stores 1057 1 -Total Receives: 1488 -Stores 1058 1 -Total Receives: 1489 -Stores 1059 1 -Total rewards: 1498 -Total Receives: 1490 -Stores 1060 1 -Total Receives: 1491 -Stores 1061 1 -Total Receives: 1492 -Stores 1062 1 -Total Receives: 1493 -Stores 1063 1 -Total Receives: 1494 -Stores 1064 1 -Total Receives: 1495 -Stores 1065 1 -Total Receives: 1496 -Stores 1066 1 -Total Receives: 1497 -Stores 1067 1 -Total Receives: 1498 -Stores 1068 1 -Total rewards: 1504 -Total Receives: 1499 -Stores 1069 1 -Total Receives: 1500 -Stores 1070 1 -Total Receives: 1501 -Stores 1071 1 -Total Receives: 1502 -Stores 1072 1 -Total Receives: 1503 -Stores 1073 1 -Total Receives: 1504 -Stores 1074 1 -Total rewards: 1507 -Total Receives: 1505 -Stores 1075 1 -Total Receives: 1506 -Stores 1076 1 -Total Receives: 1507 -Stores 1077 1 -Total rewards: 1516 -Total Receives: 1508 -Stores 1078 1 -Total Receives: 1509 -Stores 1079 1 -Total Receives: 1510 -Stores 1080 1 -Total Receives: 1511 -Stores 1081 1 -Total Receives: 1512 -Stores 1082 1 -Total Receives: 1513 -Stores 1083 1 -Total Receives: 1514 -Total Receives: 1515 -Total Receives: 1516 -Total rewards: 1525 -Total Receives: 1517 -Total Receives: 1518 -Total Receives: 1519 -Total Receives: 1520 -Total Receives: 1521 -Total Receives: 1522 -Total Receives: 1523 -Total Receives: 1524 -Total Receives: 1525 -Total rewards: 1527 -Total Receives: 1526 -Stores 1084 1 -Total Receives: 1527 -Stores 1085 1 -Total rewards: 1533 -Total Receives: 1528 -Total Receives: 1529 -Total Receives: 1530 -Total Receives: 1531 -Total Receives: 1532 -Total Receives: 1533 -Total rewards: 1536 -Total Receives: 1534 -Stores 1086 1 -Total Receives: 1535 -Stores 1087 1 -Total Receives: 1536 -Stores 1088 1 -Total rewards: 1543 -Total Receives: 1537 -Total Receives: 1538 -Total Receives: 1539 -Total Receives: 1540 -Total Receives: 1541 -Total Receives: 1542 -Total Receives: 1543 -Total rewards: 1552 -Total Receives: 1544 -Stores 1089 1 -Total Receives: 1545 -Stores 1090 1 -Total Receives: 1546 -Stores 1091 1 -Total Receives: 1547 -Stores 1092 1 -Total Receives: 1548 -Stores 1093 1 -Total Receives: 1549 -Stores 1094 1 -Total Receives: 1550 -Stores 1095 1 -Total Receives: 1551 -Stores 1096 1 -Total Receives: 1552 -Stores 1097 1 -Total rewards: 1556 -Total Receives: 1553 -Total Receives: 1554 -Total Receives: 1555 -Total Receives: 1556 -Total rewards: 1560 -Total Receives: 1557 -Stores 1098 1 -Total Receives: 1558 -Stores 1099 1 -Total Receives: 1559 -Stores 1100 1 -Total Receives: 1560 -Stores 1101 1 -Total rewards: 1563 -Total Receives: 1561 -Stores 1102 1 -Total Receives: 1562 -Stores 1103 1 -Total Receives: 1563 -Stores 1104 1 -Total rewards: 1569 -Total Receives: 1564 -Total Receives: 1565 -Total Receives: 1566 -Total Receives: 1567 -Total Receives: 1568 -Total Receives: 1569 -Total rewards: 1577 -Total Receives: 1570 -Stores 1105 1 -Total Receives: 1571 -Stores 1106 1 -Total Receives: 1572 -Stores 1107 1 -Total Receives: 1573 -Stores 1108 1 -Total Receives: 1574 -Stores 1109 1 -Total Receives: 1575 -Stores 1110 1 -Total Receives: 1576 -Stores 1111 1 -Total Receives: 1577 -Total rewards: 1586 -Total Receives: 1578 -Total Receives: 1579 -Total Receives: 1580 -Total Receives: 1581 -Total Receives: 1582 -Total Receives: 1583 -Total Receives: 1584 -Total Receives: 1585 -Total Receives: 1586 -Total rewards: 1594 -Total Receives: 1587 -Total Receives: 1588 -Total Receives: 1589 -Total Receives: 1590 -Total Receives: 1591 -Total Receives: 1592 -Total Receives: 1593 -Total Receives: 1594 -Total rewards: 1602 -Total Receives: 1595 -Total Receives: 1596 -Total Receives: 1597 -Total Receives: 1598 -Total Receives: 1599 -Total Receives: 1600 -Total Receives: 1601 -Total Receives: 1602 -Total rewards: 1606 -Total Receives: 1603 -Stores 1112 1 -Total Receives: 1604 -Stores 1113 1 -Total Receives: 1605 -Stores 1114 1 -Total Receives: 1606 -Stores 1115 1 -Total rewards: 1609 -Total Receives: 1607 -Stores 1116 1 -Total Receives: 1608 -Stores 1117 1 -Total Receives: 1609 -Stores 1118 1 -Total rewards: 1615 -Total Receives: 1610 -Total Receives: 1611 -Total Receives: 1612 -Total Receives: 1613 -Total Receives: 1614 -Total Receives: 1615 -Total rewards: 1619 -Total Receives: 1616 -Stores 1119 1 -Total Receives: 1617 -Stores 1120 1 -Total Receives: 1618 -Stores 1121 1 -Total Receives: 1619 -Stores 1122 1 -Total rewards: 1627 -Total Receives: 1620 -Total Receives: 1621 -Total Receives: 1622 -Total Receives: 1623 -Total Receives: 1624 -Total Receives: 1625 -Total Receives: 1626 -Total Receives: 1627 -Total rewards: 1637 -Total Receives: 1628 -Total Receives: 1629 -Total Receives: 1630 -Total Receives: 1631 -Total Receives: 1632 -Total Receives: 1633 -Total Receives: 1634 -Total Receives: 1635 -Total Receives: 1636 -Total Receives: 1637 -Total rewards: 1646 -Total Receives: 1638 -Total Receives: 1639 -Total Receives: 1640 -Total Receives: 1641 -Total Receives: 1642 -Total Receives: 1643 -Total Receives: 1644 -Total Receives: 1645 -Total Receives: 1646 -Total rewards: 1654 -Total Receives: 1647 -Total Receives: 1648 -Total Receives: 1649 -Total Receives: 1650 -Total Receives: 1651 -Total Receives: 1652 -Total Receives: 1653 -Total Receives: 1654 -Total rewards: 1661 -Total Receives: 1655 -Total Receives: 1656 -Total Receives: 1657 -Total Receives: 1658 -Total Receives: 1659 -Total Receives: 1660 -Total Receives: 1661 -Total rewards: 1671 -Total Receives: 1662 -Stores 1123 1 -Total Receives: 1663 -Stores 1124 1 -Total Receives: 1664 -Stores 1125 1 -Total Receives: 1665 -Stores 1126 1 -Total Receives: 1666 -Stores 1127 1 -Total Receives: 1667 -Stores 1128 1 -Total Receives: 1668 -Stores 1129 1 -Total Receives: 1669 -Stores 1130 1 -Total Receives: 1670 -Stores 1131 1 -Total Receives: 1671 -Stores 1132 1 -Total rewards: 1677 -Total Receives: 1672 -Stores 1133 1 -Total Receives: 1673 -Stores 1134 1 -Total Receives: 1674 -Stores 1135 1 -Total Receives: 1675 -Stores 1136 1 -Total Receives: 1676 -Stores 1137 1 -Total Receives: 1677 -Stores 1138 1 -Total rewards: 1684 -Total Receives: 1678 -Stores 1139 1 -Total Receives: 1679 -Stores 1140 1 -Total Receives: 1680 -Stores 1141 1 -Total Receives: 1681 -Stores 1142 1 -Total Receives: 1682 -Stores 1143 1 -Total Receives: 1683 -Stores 1144 1 -Total Receives: 1684 -Stores 1145 1 -Total rewards: 1687 -Total Receives: 1685 -Stores 1146 1 -Total Receives: 1686 -Stores 1147 1 -Total Receives: 1687 -Stores 1148 1 -Total rewards: 1694 -Total Receives: 1688 -Stores 1149 1 -Total Receives: 1689 -Stores 1150 1 -Total Receives: 1690 -Stores 1151 1 -Total Receives: 1691 -Stores 1152 1 -Total Receives: 1692 -Stores 1153 1 -Total Receives: 1693 -Stores 1154 1 -Total Receives: 1694 -Stores 1155 1 -Total rewards: 1703 -Total Receives: 1695 -Stores 1156 1 -Total Receives: 1696 -Stores 1157 1 -Total Receives: 1697 -Stores 1158 1 -Total Receives: 1698 -Stores 1159 1 -Total Receives: 1699 -Stores 1160 1 -Total Receives: 1700 -Stores 1161 1 -Total Receives: 1701 -Stores 1162 1 -Total Receives: 1702 -Stores 1163 1 -Total Receives: 1703 -Stores 1164 1 -Total rewards: 1706 -Total Receives: 1704 -Stores 1165 1 -Total Receives: 1705 -Stores 1166 1 -Total Receives: 1706 -Stores 1167 1 -Total rewards: 1713 -Total Receives: 1707 -Stores 1168 1 -Total Receives: 1708 -Stores 1169 1 -Total Receives: 1709 -Stores 1170 1 -Total Receives: 1710 -Stores 1171 1 -Total Receives: 1711 -Stores 1172 1 -Total Receives: 1712 -Stores 1173 1 -Total Receives: 1713 -Stores 1174 1 -Total rewards: 1719 -Total Receives: 1714 -Stores 1175 1 -Total Receives: 1715 -Stores 1176 1 -Total Receives: 1716 -Stores 1177 1 -Total Receives: 1717 -Stores 1178 1 -Total Receives: 1718 -Stores 1179 1 -Total Receives: 1719 -Stores 1180 1 -Total rewards: 1728 -Total Receives: 1720 -Stores 1181 1 -Total Receives: 1721 -Stores 1182 1 -Total Receives: 1722 -Stores 1183 1 -Total Receives: 1723 -Stores 1184 1 -Total Receives: 1724 -Stores 1185 1 -Total Receives: 1725 -Total rewards: 1008 -Total Receives: 1001 -Total Receives: 1002 -Total Receives: 1003 -Total Receives: 1004 -Total Receives: 1005 -Total Receives: 1006 -Total Receives: 1007 -Total Receives: 1008 -Total rewards: 1010 -Total Receives: 1009 -Total Receives: 1010 -Total rewards: 1018 -Total Receives: 1011 -Total Receives: 1012 -Total Receives: 1013 -Total Receives: 1014 -Total Receives: 1015 -Total Receives: 1016 -Total Receives: 1017 -Total Receives: 1018 -Total rewards: 1021 -Total Receives: 1019 -Stores 664 1 -Total Receives: 1020 -Stores 665 1 -Total Receives: 1021 -Stores 666 1 -Total rewards: 1027 -Total Receives: 1022 -Total Receives: 1023 -Total Receives: 1024 -Total Receives: 1025 -Total Receives: 1026 -Total Receives: 1027 -Total rewards: 1035 -Total Receives: 1028 -Total Receives: 1029 -Total Receives: 1030 -Total Receives: 1031 -Total Receives: 1032 -Total Receives: 1033 -Total Receives: 1034 -Total Receives: 1035 -Total rewards: 1039 -Total Receives: 1036 -Stores 667 1 -Total Receives: 1037 -Stores 668 1 -Total Receives: 1038 -Stores 669 1 -Total Receives: 1039 -Stores 670 1 -Total rewards: 1046 -Total Receives: 1040 -Stores 671 1 -Total Receives: 1041 -Stores 672 1 -Total Receives: 1042 -Stores 673 1 -Total Receives: 1043 -Stores 674 1 -Total Receives: 1044 -Stores 675 1 -Total Receives: 1045 -Stores 676 1 -Total Receives: 1046 -Stores 677 1 -Total rewards: 1051 -Total Receives: 1047 -Stores 678 1 -Total Receives: 1048 -Stores 679 1 -Total Receives: 1049 -Stores 680 1 -Total Receives: 1050 -Stores 681 1 -Total Receives: 1051 -Stores 682 1 -Total rewards: 1057 -Total Receives: 1052 -Stores 683 1 -Total Receives: 1053 -Stores 684 1 -Total Receives: 1054 -Stores 685 1 -Total Receives: 1055 -Stores 686 1 -Total Receives: 1056 -Stores 687 1 -Total Receives: 1057 -Stores 688 1 -Total rewards: 1062 -Total Receives: 1058 -Stores 689 1 -Total Receives: 1059 -Stores 690 1 -Total Receives: 1060 -Stores 691 1 -Total Receives: 1061 -Stores 692 1 -Total Receives: 1062 -Stores 693 1 -Total rewards: 1068 -Total Receives: 1063 -Stores 694 1 -Total Receives: 1064 -Stores 695 1 -Total Receives: 1065 -Stores 696 1 -Total Receives: 1066 -Stores 697 1 -Total Receives: 1067 -Stores 698 1 -Total Receives: 1068 -Stores 699 1 -Total rewards: 1076 -Total Receives: 1069 -Stores 700 1 -Total Receives: 1070 -Stores 701 1 -Total Receives: 1071 -Stores 702 1 -Total Receives: 1072 -Stores 703 1 -Total Receives: 1073 -Stores 704 1 -Total Receives: 1074 -Stores 705 1 -Total Receives: 1075 -Stores 706 1 -Total Receives: 1076 -Stores 707 1 -Total rewards: 1080 -Total Receives: 1077 -Stores 708 1 -Total Receives: 1078 -Stores 709 1 -Total Receives: 1079 -Stores 710 1 -Total Receives: 1080 -Stores 711 1 -Total rewards: 1084 -Total Receives: 1081 -Stores 712 1 -Total Receives: 1082 -Stores 713 1 -Total Receives: 1083 -Stores 714 1 -Total Receives: 1084 -Stores 715 1 -Total rewards: 1091 -Total Receives: 1085 -Stores 716 1 -Total Receives: 1086 -Stores 717 1 -Total Receives: 1087 -Stores 718 1 -Total Receives: 1088 -Stores 719 1 -Total Receives: 1089 -Stores 720 1 -Total Receives: 1090 -Stores 721 1 -Total Receives: 1091 -Stores 722 1 -Total rewards: 1094 -Total Receives: 1092 -Stores 723 1 -Total Receives: 1093 -Stores 724 1 -Total Receives: 1094 -Stores 725 1 -Total rewards: 1102 -Total Receives: 1095 -Stores 726 1 -Total Receives: 1096 -Stores 727 1 -Total Receives: 1097 -Stores 728 1 -Total Receives: 1098 -Stores 729 1 -Total Receives: 1099 -Stores 730 1 -Total Receives: 1100 -Stores 731 1 -Total Receives: 1101 -Stores 732 1 -Total Receives: 1102 -Stores 733 1 -Total rewards: 1110 -Total Receives: 1103 -Stores 734 1 -Total Receives: 1104 -Stores 735 1 -Total Receives: 1105 -Stores 736 1 -Total Receives: 1106 -Stores 737 1 -Total Receives: 1107 -Stores 738 1 -Total Receives: 1108 -Stores 739 1 -Total Receives: 1109 -Stores 740 1 -Total Receives: 1110 -Stores 741 1 -Total rewards: 1115 -Total Receives: 1111 -Total Receives: 1112 -Total Receives: 1113 -Total Receives: 1114 -Total Receives: 1115 -Total rewards: 1120 -Total Receives: 1116 -Stores 742 1 -Total Receives: 1117 -Stores 743 1 -Total Receives: 1118 -Stores 744 1 -Total Receives: 1119 -Stores 745 1 -Total Receives: 1120 -Stores 746 1 -Total rewards: 1128 -Total Receives: 1121 -Total Receives: 1122 -Total Receives: 1123 -Total Receives: 1124 -Total Receives: 1125 -Total Receives: 1126 -Total Receives: 1127 -Total Receives: 1128 -Total rewards: 1133 -Total Receives: 1129 -Total Receives: 1130 -Total Receives: 1131 -Total Receives: 1132 -Total Receives: 1133 -Total rewards: 1140 -Total Receives: 1134 -Stores 747 1 -Total Receives: 1135 -Stores 748 1 -Total Receives: 1136 -Stores 749 1 -Total Receives: 1137 -Stores 750 1 -Total Receives: 1138 -Stores 751 1 -Total Receives: 1139 -Stores 752 1 -Total Receives: 1140 -Stores 753 1 -Total rewards: 1147 -Total Receives: 1141 -Stores 754 1 -Total Receives: 1142 -Stores 755 1 -Total Receives: 1143 -Stores 756 1 -Total Receives: 1144 -Stores 757 1 -Total Receives: 1145 -Stores 758 1 -Total Receives: 1146 -Stores 759 1 -Total Receives: 1147 -Stores 760 1 -Total rewards: 1155 -Total Receives: 1148 -Stores 761 1 -Total Receives: 1149 -Stores 762 1 -Total Receives: 1150 -Stores 763 1 -Total Receives: 1151 -Stores 764 1 -Total Receives: 1152 -Stores 765 1 -Total Receives: 1153 -Stores 766 1 -Total Receives: 1154 -Stores 767 1 -Total Receives: 1155 -Stores 768 1 -Total rewards: 1159 -Total Receives: 1156 -Stores 769 1 -Total Receives: 1157 -Stores 770 1 -Total Receives: 1158 -Stores 771 1 -Total Receives: 1159 -Stores 772 1 -Total rewards: 1165 -Total Receives: 1160 -Stores 773 1 -Total Receives: 1161 -Stores 774 1 -Total Receives: 1162 -Stores 775 1 -Total Receives: 1163 -Stores 776 1 -Total Receives: 1164 -Stores 777 1 -Total Receives: 1165 -Stores 778 1 -Total rewards: 1169 -Total Receives: 1166 -Stores 779 1 -Total Receives: 1167 -Stores 780 1 -Total Receives: 1168 -Stores 781 1 -Total Receives: 1169 -Stores 782 1 -Total rewards: 1175 -Total Receives: 1170 -Total Receives: 1171 -Total Receives: 1172 -Total Receives: 1173 -Total Receives: 1174 -Total Receives: 1175 -Total rewards: 1181 -Total Receives: 1176 -Stores 783 1 -Total Receives: 1177 -Stores 784 1 -Total Receives: 1178 -Stores 785 1 -Total Receives: 1179 -Stores 786 1 -Total Receives: 1180 -Stores 787 1 -Total Receives: 1181 -Stores 788 1 -Total rewards: 1189 -Total Receives: 1182 -Stores 789 1 -Total Receives: 1183 -Stores 790 1 -Total Receives: 1184 -Stores 791 1 -Total Receives: 1185 -Stores 792 1 -Total Receives: 1186 -Stores 793 1 -Total Receives: 1187 -Stores 794 1 -Total Receives: 1188 -Stores 795 1 -Total Receives: 1189 -Stores 796 1 -Total rewards: 1195 -Total Receives: 1190 -Stores 797 1 -Total Receives: 1191 -Stores 798 1 -Total Receives: 1192 -Stores 799 1 -Total Receives: 1193 -Stores 800 1 -Total Receives: 1194 -Stores 801 1 -Total Receives: 1195 -Total rewards: 1199 -Total Receives: 1196 -Total Receives: 1197 -Total Receives: 1198 -Total Receives: 1199 -Total rewards: 1205 -Total Receives: 1200 -Total Receives: 1201 -Total Receives: 1202 -Total Receives: 1203 -Total Receives: 1204 -Total Receives: 1205 -Total rewards: 1209 -Total Receives: 1206 -Stores 802 1 -Total Receives: 1207 -Stores 803 1 -Total Receives: 1208 -Stores 804 1 -Total Receives: 1209 -Stores 805 1 -Total rewards: 1218 -Total Receives: 1210 -Stores 806 1 -Total Receives: 1211 -Total Receives: 1212 -Total Receives: 1213 -Total Receives: 1214 -Total Receives: 1215 -Total Receives: 1216 -Total Receives: 1217 -Total Receives: 1218 -Total rewards: 1221 -Total Receives: 1219 -Stores 807 1 -Total Receives: 1220 -Stores 808 1 -Total Receives: 1221 -Stores 809 1 -Total rewards: 1229 -Total Receives: 1222 -Stores 810 1 -Total Receives: 1223 -Stores 811 1 -Total Receives: 1224 -Stores 812 1 -Total Receives: 1225 -Stores 813 1 -Total Receives: 1226 -Stores 814 1 -Total Receives: 1227 -Stores 815 1 -Total Receives: 1228 -Stores 816 1 -Total Receives: 1229 -Stores 817 1 -Total rewards: 1237 -Total Receives: 1230 -Stores 818 1 -Total Receives: 1231 -Stores 819 1 -Total Receives: 1232 -Stores 820 1 -Total Receives: 1233 -Stores 821 1 -Total Receives: 1234 -Stores 822 1 -Total Receives: 1235 -Stores 823 1 -Total Receives: 1236 -Stores 824 1 -Total Receives: 1237 -Stores 825 1 -Total rewards: 1245 -Total Receives: 1238 -Stores 826 1 -Total Receives: 1239 -Stores 827 1 -Total Receives: 1240 -Stores 828 1 -Total Receives: 1241 -Stores 829 1 -Total Receives: 1242 -Stores 830 1 -Total Receives: 1243 -Stores 831 1 -Total Receives: 1244 -Stores 832 1 -Total Receives: 1245 -Stores 833 1 -Stores 1186 1 -Total Receives: 1726 -Stores 1187 1 -Total Receives: 1727 -Stores 1188 1 -Total Receives: 1728 -Stores 1189 1 -Total rewards: 1735 -Total Receives: 1729 -Stores 1190 1 -Total Receives: 1730 -Stores 1191 1 -Total Receives: 1731 -Stores 1192 1 -Total Receives: 1732 -Stores 1193 1 -Total Receives: 1733 -Stores 1194 1 -Total Receives: 1734 -Stores 1195 1 -Total Receives: 1735 -Stores 1196 1 -Total rewards: 1741 -Total Receives: 1736 -Stores 1197 1 -Total Receives: 1737 -Stores 1198 1 -Total Receives: 1738 -Stores 1199 1 -Total Receives: 1739 -Stores 1200 1 -Total Receives: 1740 -Stores 1201 1 -Total Receives: 1741 -Stores 1202 1 -Total rewards: 1747 -Total Receives: 1742 -Stores 1203 1 -Total Receives: 1743 -Stores 1204 1 -Total Receives: 1744 -Stores 1205 1 -Total Receives: 1745 -Stores 1206 1 -Total Receives: 1746 -Stores 1207 1 -Total Receives: 1747 -Stores 1208 1 -Total rewards: 1755 -Total Receives: 1748 -Total Receives: 1749 -Total Receives: 1750 -Total Receives: 1751 -Total Receives: 1752 -Total Receives: 1753 -Total Receives: 1754 -Total Receives: 1755 -Total rewards: 1762 -Total Receives: 1756 -Stores 1209 1 -Total Receives: 1757 -Stores 1210 1 -Total Receives: 1758 -Stores 1211 1 -Total Receives: 1759 -Stores 1212 1 -Total Receives: 1760 -Stores 1213 1 -Total Receives: 1761 -Stores 1214 1 -Total Receives: 1762 -Stores 1215 1 -Total rewards: 1769 -Total Receives: 1763 -Total Receives: 1764 -Total Receives: 1765 -Total Receives: 1766 -Total Receives: 1767 -Total Receives: 1768 -Total Receives: 1769 -Total rewards: 1773 -Total Receives: 1770 -Stores 1216 1 -Total Receives: 1771 -Stores 1217 1 -Total Receives: 1772 -Stores 1218 1 -Total Receives: 1773 -Stores 1219 1 -Total rewards: 1781 -Total Receives: 1774 -Stores 1220 1 -Total Receives: 1775 -Stores 1221 1 -Total Receives: 1776 -Stores 1222 1 -Total Receives: 1777 -Stores 1223 1 -Total Receives: 1778 -Stores 1224 1 -Total Receives: 1779 -Stores 1225 1 -Total Receives: 1780 -Stores 1226 1 -Total Receives: 1781 -Stores 1227 1 -Total rewards: 1786 -Total Receives: 1782 -Stores 1228 1 -Total Receives: 1783 -Stores 1229 1 -Total Receives: 1784 -Stores 1230 1 -Total Receives: 1785 -Stores 1231 1 -Total Receives: 1786 -Stores 1232 1 -Total rewards: 1793 -Total Receives: 1787 -Stores 1233 1 -Total Receives: 1788 -Stores 1234 1 -Total Receives: 1789 -Stores 1235 1 -Total Receives: 1790 -Stores 1236 1 -Total Receives: 1791 -Stores 1237 1 -Total Receives: 1792 -Stores 1238 1 -Total Receives: 1793 -Stores 1239 1 -Total rewards: 1803 -Total Receives: 1794 -Total Receives: 1795 -Total Receives: 1796 -Total Receives: 1797 -Total Receives: 1798 -Total Receives: 1799 -Total Receives: 1800 -Total Receives: 1801 -Total Receives: 1802 -Total Receives: 1803 -Total rewards: 1811 -Total Receives: 1804 -Stores 1240 1 -Total Receives: 1805 -Stores 1241 1 -Total Receives: 1806 -Stores 1242 1 -Total Receives: 1807 -Stores 1243 1 -Total Receives: 1808 -Stores 1244 1 -Total Receives: 1809 -Stores 1245 1 -Total Receives: 1810 -Stores 1246 1 -Total Receives: 1811 -Stores 1247 1 -Total rewards: 1819 -Total Receives: 1812 -Total Receives: 1813 -Total Receives: 1814 -Total Receives: 1815 -Total Receives: 1816 -Total Receives: 1817 -Total Receives: 1818 -Total Receives: 1819 -Total rewards: 1823 -Total Receives: 1820 -Stores 1248 1 -Total Receives: 1821 -Stores 1249 1 -Total Receives: 1822 -Stores 1250 1 -Total Receives: 1823 -Stores 1251 1 -Total rewards: 1833 -Total Receives: 1824 -Total Receives: 1825 -Total Receives: 1826 -Total Receives: 1827 -Total Receives: 1828 -Total Receives: 1829 -Total Receives: 1830 -Total Receives: 1831 -Total Receives: 1832 -Total Receives: 1833 -Total rewards: 1837 -Total Receives: 1834 -Stores 1252 1 -Total Receives: 1835 -Stores 1253 1 -Total Receives: 1836 -Stores 1254 1 -Total Receives: 1837 -Stores 1255 1 -Total rewards: 1845 -Total Receives: 1838 -Total Receives: 1839 -Total Receives: 1840 -Total Receives: 1841 -Total Receives: 1842 -Total Receives: 1843 -Total Receives: 1844 -Total Receives: 1845 -Total rewards: 1854 -Total Receives: 1846 -Total Receives: 1847 -Total Receives: 1848 -Total Receives: 1849 -Total Receives: 1850 -Total Receives: 1851 -Total Receives: 1852 -Total Receives: 1853 -Total Receives: 1854 -Total rewards: 1859 -Total Receives: 1855 -Stores 1256 1 -Total Receives: 1856 -Stores 1257 1 -Total Receives: 1857 -Stores 1258 1 -Total Receives: 1858 -Stores 1259 1 -Total Receives: 1859 -Stores 1260 1 -Total rewards: 1862 -Total Receives: 1860 -Stores 1261 1 -Total Receives: 1861 -Stores 1262 1 -Total Receives: 1862 -Stores 1263 1 -Total rewards: 1868 -Total Receives: 1863 -Total Receives: 1864 -Total Receives: 1865 -Total Receives: 1866 -Total Receives: 1867 -Total Receives: 1868 -Total rewards: 1873 -Total Receives: 1869 -Stores 1264 1 -Total Receives: 1870 -Stores 1265 1 -Total Receives: 1871 -Stores 1266 1 -Total Receives: 1872 -Stores 1267 1 -Total Receives: 1873 -Stores 1268 1 -Total rewards: 1875 -Total Receives: 1874 -Stores 1269 1 -Total Receives: 1875 -Stores 1270 1 -Total rewards: 1882 -Total Receives: 1876 -Total Receives: 1877 -Total Receives: 1878 -Total Receives: 1879 -Total Receives: 1880 -Total Receives: 1881 -Total Receives: 1882 -Total rewards: 1886 -Total Receives: 1883 -Stores 1271 1 -Total Receives: 1884 -Stores 1272 1 -Total Receives: 1885 -Stores 1273 1 -Total Receives: 1886 -Stores 1274 1 -Total rewards: 1894 -Total Receives: 1887 -Total Receives: 1888 -Total Receives: 1889 -Total Receives: 1890 -Total Receives: 1891 -Total Receives: 1892 -Total Receives: 1893 -Total Receives: 1894 -Total rewards: 1897 -Total Receives: 1895 -Stores 1275 1 -Total Receives: 1896 -Stores 1276 1 -Total Receives: 1897 -Stores 1277 1 -Total rewards: 1903 -Total Receives: 1898 -Stores 1278 1 -Total Receives: 1899 -Stores 1279 1 -Total Receives: 1900 -Stores 1280 1 -Total Receives: 1901 -Stores 1281 1 -Total Receives: 1902 -Stores 1282 1 -Total Receives: 1903 -Stores 1283 1 -Total rewards: 1909 -Total Receives: 1904 -Stores 1284 1 -Total Receives: 1905 -Stores 1285 1 -Total Receives: 1906 -Stores 1286 1 -Total Receives: 1907 -Stores 1287 1 -Total Receives: 1908 -Stores 1288 1 -Total Receives: 1909 -Stores 1289 1 -Total rewards: 1915 -Total Receives: 1910 -Stores 1290 1 -Total Receives: 1911 -Stores 1291 1 -Total Receives: 1912 -Stores 1292 1 -Total Receives: 1913 -Stores 1293 1 -Total Receives: 1914 -Stores 1294 1 -Total Receives: 1915 -Stores 1295 1 -Total rewards: 1921 -Total Receives: 1916 -Total Receives: 1917 -Total Receives: 1918 -Total Receives: 1919 -Total Receives: 1920 -Total Receives: 1921 -Total rewards: 1926 -Total Receives: 1922 -Stores 1296 1 -Total Receives: 1923 -Stores 1297 1 -Total Receives: 1924 -Stores 1298 1 -Total Receives: 1925 -Stores 1299 1 -Total Receives: 1926 -Stores 1300 1 -Total rewards: 1929 -Total Receives: 1927 -Stores 1301 1 -Total Receives: 1928 -Stores 1302 1 -Total Receives: 1929 -Stores 1303 1 -Total rewards: 1933 -Total Receives: 1930 -Stores 1304 1 -Total Receives: 1931 -Stores 1305 1 -Total Receives: 1932 -Stores 1306 1 -Total Receives: 1933 -Stores 1307 1 -Total rewards: 1939 -Total Receives: 1934 -Stores 1308 1 -Total Receives: 1935 -Stores 1309 1 -Total Receives: 1936 -Stores 1310 1 -Total Receives: 1937 -Stores 1311 1 -Total Receives: 1938 -Stores 1312 1 -Total Receives: 1939 -Stores 1313 1 -Total rewards: 1946 -Total Receives: 1940 -Stores 1314 1 -Total Receives: 1941 -Stores 1315 1 -Total Receives: 1942 -Stores 1316 1 -Total Receives: 1943 -Stores 1317 1 -Total Receives: 1944 -Stores 1318 1 -Total Receives: 1945 -Stores 1319 1 -Total Receives: 1946 -Stores 1320 1 -Total rewards: 1949 -Total Receives: 1947 -Stores 1321 1 -Total Receives: 1948 -Stores 1322 1 -Total Receives: 1949 -Stores 1323 1 -Total rewards: 1957 -Total Receives: 1950 -Total Receives: 1951 -Total Receives: 1952 -Total Receives: 1953 -Total Receives: 1954 -Total Receives: 1955 -Total Receives: 1956 -Total Receives: 1957 -Total rewards: 1967 -Total Receives: 1958 -Stores 1324 1 -Total Receives: 1959 -Stores 1325 1 -Total Receives: 1960 -Stores 1326 1 -Total Receives: 1961 -Stores 1327 1 -Total Receives: 1962 -Stores 1328 1 -Total Receives: 1963 -Stores 1329 1 -Total Receives: 1964 -Stores 1330 1 -Total Receives: 1965 -Stores 1331 1 -Total Receives: 1966 -Stores 1332 1 -Total Receives: 1967 -Stores 1333 1 -Total rewards: 1976 -Total Receives: 1968 -Stores 1334 1 -Total Receives: 1969 -Stores 1335 1 -Total Receives: 1970 -Stores 1336 1 -Total Receives: 1971 -Stores 1337 1 -Total Receives: 1972 -Stores 1338 1 -Total Receives: 1973 -Stores 1339 1 -Total Receives: 1974 -Stores 1340 1 -Total Receives: 1975 -Stores 1341 1 -Total Receives: 1976 -Stores 1342 1 -Total rewards: 1981 -Total Receives: 1977 -Stores 1343 1 -Total Receives: 1978 -Stores 1344 1 -Total Receives: 1979 -Stores 1345 1 -Total Receives: 1980 -Stores 1346 1 -Total Receives: 1981 -Stores 1347 1 -Total rewards: 1989 -Total Receives: 1982 -Stores 1348 1 -Total Receives: 1983 -Stores 1349 1 -Total Receives: 1984 -Stores 1350 1 -Total Receives: 1985 -Stores 1351 1 -Total Receives: 1986 -Stores 1352 1 -Total Receives: 1987 -Stores 1353 1 -Total Receives: 1988 -Stores 1354 1 -Total Receives: 1989 -Stores 1355 1 -Total rewards: 1996 -Total Receives: 1990 -Stores 1356 1 -Total Receives: 1991 -Stores 1357 1 -Total Receives: 1992 -Stores 1358 1 -Total Receives: 1993 -Stores 1359 1 -Total Receives: 1994 -Stores 1360 1 -Total Receives: 1995 -Stores 1361 1 -Total Receives: 1996 -Stores 1362 1 -Total rewards: 1998 -Total Receives: 1997 -Stores 1363 1 -Total Receives: 1998 -Stores 1364 1 -Total rewards: 2007 -Total Receives: 1999 -Stores 1365 1 -Total Receives: 2000 -Stores 1366 1 -Total Receives: 2001 -Stores 1367 1 -Total Receives: 2002 -Stores 1368 1 -Total Receives: 2003 -Stores 1369 1 -Total Receives: 2004 -Stores 1370 1 -Total Receives: 2005 -Stores 1371 1 -Total Receives: 2006 -Stores 1372 1 -Total Receives: 2007 -Stores 1373 1 -Total rewards: 2015 -Total Receives: 2008 -Stores 1374 1 -Total Receives: 2009 -Stores 1375 1 -Total Receives: 2010 -Stores 1376 1 -Total Receives: 2011 -Stores 1377 1 -Total Receives: 2012 -Stores 1378 1 -Total Receives: 2013 -Stores 1379 1 -Total Receives: 2014 -Stores 1380 1 -Total Receives: 2015 -Total rewards: 2024 -Total Receives: 2016 -Total Receives: 2017 -Total Receives: 2018 -Total Receives: 2019 -Total Receives: 2020 -Total Receives: 2021 -Total Receives: 2022 -Total Receives: 2023 -Total Receives: 2024 -Total rewards: 2029 -Total Receives: 2025 -Stores 1381 1 -Total Receives: 2026 -Stores 1382 1 -Total Receives: 2027 -Stores 1383 1 -Total Receives: 2028 -Stores 1384 1 -Total Receives: 2029 -Stores 1385 1 -Total rewards: 2036 -Total Receives: 2030 -Stores 1386 1 -Total Receives: 2031 -Stores 1387 1 -Total Receives: 2032 -Stores 1388 1 -Total Receives: 2033 -Stores 1389 1 -Total Receives: 2034 -Stores 1390 1 -Total Receives: 2035 -Stores 1391 1 -Total Receives: 2036 -Stores 1392 1 -Total rewards: 2042 -Total Receives: 2037 -Total Receives: 2038 -Total Receives: 2039 -Total Receives: 2040 -Total Receives: 2041 -Total Receives: 2042 -Total rewards: 2044 -Total Receives: 2043 -Stores 1393 1 -Total Receives: 2044 -Stores 1394 1 -Total rewards: 2051 -Total Receives: 2045 -Total Receives: 2046 -Total Receives: 2047 -Total Receives: 2048 -Total Receives: 2049 -Total Receives: 2050 -Total Receives: 2051 -Total rewards: 2058 -Total Receives: 2052 -Total Receives: 2053 -Total Receives: 2054 -Total Receives: 2055 -Total Receives: 2056 -Total Receives: 2057 -Total Receives: 2058 -Total rewards: 2064 -Total Receives: 2059 -Stores 1395 1 -Total Receives: 2060 -Stores 1396 1 -Total Receives: 2061 -Stores 1397 1 -Total Receives: 2062 -Stores 1398 1 -Total Receives: 2063 -Stores 1399 1 -Total Receives: 2064 -Stores 1400 1 -Total rewards: 2068 -Total Receives: 2065 -Stores 1401 1 -Total Receives: 2066 -Stores 1402 1 -Total Receives: 2067 -Stores 1403 1 -Total Receives: 2068 -Stores 1404 1 -Total rewards: 2071 -Total Receives: 2069 -Stores 1405 1 -Total Receives: 2070 -Stores 1406 1 -Total Receives: 2071 -Stores 1407 1 -Total rewards: 2078 -Total Receives: 2072 -Stores 1408 1 -Total Receives: 2073 -Stores 1409 1 -Total Receives: 2074 -Stores 1410 1 -Total Receives: 2075 -Stores 1411 1 -Total Receives: 2076 -Stores 1412 1 -Total Receives: 2077 -Stores 1413 1 -Total Receives: 2078 -Stores 1414 1 -Total rewards: 2081 -Total Receives: 2079 -Stores 1415 1 -Total Receives: 2080 -Stores 1416 1 -Total Receives: 2081 -Stores 1417 1 -Total rewards: 2089 -Total Receives: 2082 -Stores 1418 1 -Total Receives: 2083 -Stores 1419 1 -Total Receives: 2084 -Stores 1420 1 -Total Receives: 2085 -Stores 1421 1 -Total Receives: 2086 -Stores 1422 1 -Total Receives: 2087 -Stores 1423 1 -Total Receives: 2088 -Stores 1424 1 -Total Receives: 2089 -Stores 1425 1 -Total rewards: 2097 -Total Receives: 2090 -Stores 1426 1 -Total Receives: 2091 -Stores 1427 1 -Total Receives: 2092 -Stores 1428 1 -Total Receives: 2093 -Stores 1429 1 -Total Receives: 2094 -Stores 1430 1 -Total Receives: 2095 -Stores 1431 1 -Total Receives: 2096 -Stores 1432 1 -Total Receives: 2097 -Stores 1433 1 -Total rewards: 2106 -Total Receives: 2098 -Stores 1434 1 -Total Receives: 2099 -Stores 1435 1 -Total Receives: 2100 -Stores 1436 1 -Total Receives: 2101 -Stores 1437 1 -Total Receives: 2102 -Stores 1438 1 -Total Receives: 2103 -Stores 1439 1 -Total Receives: 2104 -Stores 1440 1 -Total Receives: 2105 -Stores 1441 1 -Total Receives: 2106 -Stores 1442 1 -Total rewards: 2113 -Total Receives: 2107 -Stores 1443 1 -Total Receives: 2108 -Stores 1444 1 -Total Receives: 2109 -Stores 1445 1 -Total Receives: 2110 -Stores 1446 1 -Total Receives: 2111 -Total Receives: 2112 -Total Receives: 2113 -Total rewards: 2121 -Total Receives: 2114 -Total Receives: 2115 -Total Receives: 2116 -Total Receives: 2117 -Total Receives: 2118 -Total Receives: 2119 -Total Receives: 2120 -Total Receives: 2121 -Total rewards: 2122 -Total Receives: 2122 -Stores 1447 1 -Total rewards: 2130 -Total Receives: 2123 -Total Receives: 2124 -Total Receives: 2125 -Total Receives: 2126 -Total Receives: 2127 -Total Receives: 2128 -Total Receives: 2129 -Total Receives: 2130 -Total rewards: 2135 -Total Receives: 2131 -Stores 1448 1 -Total Receives: 2132 -Stores 1449 1 -Total Receives: 2133 -Stores 1450 1 -Total Receives: 2134 -Stores 1451 1 -Total Receives: 2135 -Stores 1452 1 -Total rewards: 2142 -Total Receives: 2136 -Total Receives: 2137 -Total Receives: 2138 -Total Receives: 2139 -Total Receives: 2140 -Total Receives: 2141 -Total Receives: 2142 -Total rewards: 2151 -Total Receives: 2143 -Total Receives: 2144 -Total Receives: 2145 -Total Receives: 2146 -Total Receives: 2147 -Total Receives: 2148 -Total Receives: 2149 -Total Receives: 2150 -Total Receives: 2151 -Total rewards: 2154 -Total Receives: 2152 -Stores 1453 1 -Total Receives: 2153 -Stores 1454 1 -Total Receives: 2154 -Stores 1455 1 -Total rewards: 2161 -Total Receives: 2155 -Total Receives: 2156 -Total Receives: 2157 -Total Receives: 2158 -Total Receives: 2159 -Total Receives: 2160 -Total Receives: 2161 -Total rewards: 2166 -Total Receives: 2162 -Stores 1456 1 -Total Receives: 2163 -Stores 1457 1 -Total Receives: 2164 -Stores 1458 1 -Total Receives: 2165 -Stores 1459 1 -Total Receives: 2166 -Stores 1460 1 -Total rewards: 2169 -Total Receives: 2167 -Stores 1461 1 -Total Receives: 2168 -Stores 1462 1 -Total Receives: 2169 -Stores 1463 1 -Total rewards: 2176 -Total Receives: 2170 -Total Receives: 2171 -Total Receives: 2172 -Total Receives: 2173 -Total Receives: 2174 -Total Receives: 2175 -Total Receives: 2176 -tensor([0.0279, 0.0267, 0.0268]) -tensor([-1.9869, -2.0260, -2.0545, -1.8894, -2.0349, -2.0282, -2.0462, -2.0350, - -2.0240, -2.0462, -2.0528]) -tensor([-0.2032, -0.1102, -0.1274]) -tensor([-0.1360, -0.1673, -0.1396, -0.1450]) -tensor([0.0335, 0.0348]) -tensor([0.0302, 0.0294]) -Total rewards: 2182 -Total Receives: 2177 -Stores 1464 1 -Total Receives: 2178 -Stores 1465 1 -Total Receives: 2179 -Stores 1466 1 -Total Receives: 2180 -Stores 1467 1 -Total Receives: 2181 -Stores 1468 1 -Total Receives: 2182 -Stores 1469 1 -tensor([0.0036, 0.0056]) -tensor([-0.0795, -0.0848]) -Total rewards: 2183 -Total Receives: 2183 -Stores 1470 1 -tensor([0.0132, 0.0119]) -tensor([-0.6984, -0.7009, -0.6917, -0.6973, -0.7034, -0.7224, -0.7143, -0.7004]) -tensor([-0.3758, -0.3817, -0.3790, -0.3796, -0.3807]) -tensor([-0.0788, -0.0660, -0.0851, -0.0234, -0.0465, -0.0491]) -tensor([-0.8468, -0.8466, -0.8517, -0.8611, -0.8842, -0.8734, -0.8821, -0.8772, - -0.8756]) -Total rewards: 2188 -Total Receives: 2184 -Stores 1471 1 -Total Receives: 2185 -Stores 1472 1 -Total Receives: 2186 -Stores 1473 1 -Total Receives: 2187 -Stores 1474 1 -Total Receives: 2188 -Stores 1475 1 -tensor([-0.2825, -0.2588, -0.2710, -0.2555, -0.2625, -0.2611]) -tensor([-1.1635, -1.1995, -1.2290, -1.2078, -1.1998, -1.2203, -1.2079, -1.1955, - -1.2210, -1.2287]) -tensor([0.0346, 0.0323, 0.0316]) -Total rewards: 2194 -Total Receives: 2189 -Stores 1476 1 -Total Receives: 2190 -Stores 1477 1 -Total Receives: 2191 -Stores 1478 1 -Total Receives: 2192 -Stores 1479 1 -Total Receives: 2193 -Stores 1480 1 -Total Receives: 2194 -Stores 1481 1 -tensor([0.0049, 0.0044, 0.0049]) -Total rewards: 2200 -Total Receives: 2195 -Total Receives: 2196 -Total Receives: 2197 -Total Receives: 2198 -Total Receives: 2199 -Total Receives: 2200 -tensor([-0.3770, -0.3025, -0.3229]) -tensor([-0.0453, -0.0414]) -tensor([-0.2349, -0.2023, -0.1969]) -tensor([-0.0943, -0.0806, -0.1012, -0.0628, -0.0655, -0.0451]) -tensor([-0.3711, -0.3719, -0.3745, -0.3778, -0.3789, -0.3775, -0.4064, -0.3721, - -0.3856]) -tensor([0.0057, 0.0098]) -tensor([0.0270, 0.0269]) -tensor([-0.6319, -0.6350, -0.6339, -0.6405, -0.6588, -0.6518, -0.6373]) -tensor([-0.3206, -0.4188, -0.3107, -0.3613, -0.3251, -0.3075]) -tensor([-0.3797, -0.3636, -0.3770, -0.3777, -0.3784, -0.3791, -0.3718, -0.3805]) -Total rewards: 2206 -Total Receives: 2201 -Stores 1482 1 -Total Receives: 2202 -Stores 1483 1 -Total Receives: 2203 -Stores 1484 1 -Total Receives: 2204 -Stores 1485 1 -Total Receives: 2205 -Stores 1486 1 -Total Receives: 2206 -Stores 1487 1 -tensor([-0.0081, -0.0090, -0.0037]) -tensor([0.0019, 0.0004]) -tensor([-0.1607, -0.1354]) -tensor([-1.2509, -1.2831, -1.3281, -1.3163, -1.2977, -1.2899, -1.3090, -1.2988, - -1.2828, -1.3147]) -Total rewards: 2214 -Total Receives: 2207 -Total Receives: 2208 -Total Receives: 2209 -Total Receives: 2210 -Total Receives: 2211 -Total Receives: 2212 -Total Receives: 2213 -Total Receives: 2214 -tensor([-0.0608, -0.0659, -0.1021, -0.0714, -0.0770, -0.0561]) -tensor([0.0411, 0.0332]) -tensor([-0.0088, -0.0115]) -tensor([-0.4597, -0.3957]) -tensor([-6.4518, -6.4856, -6.5165, -6.5139, -6.5010, -6.4695, -6.5027, -6.5013]) -tensor([-0.0420, -0.0501, -0.0916, -0.0566, -0.0646]) -tensor([0.0281, 0.0270]) -tensor([0.0208, 0.0198]) -tensor([-0.0670, -0.1100, -0.0737, -0.0814]) -tensor([0.0306, 0.0326]) -tensor([-1.1312, -1.1475, -1.1523, -1.1588, -1.1380, -1.1386, -1.1480, -1.1450, - -1.1577, -1.1566, -1.1572, -1.1572]) -tensor([-1.0159, -1.0399, -1.0382, -1.0282, -1.0041, -1.0267, -1.0305]) -Total rewards: 2220 -Total Receives: 2215 -Stores 1488 1 -Total Receives: 2216 -Stores 1489 1 -Total Receives: 2217 -Stores 1490 1 -Total Receives: 2218 -Stores 1491 1 -Total Receives: 2219 -Stores 1492 1 -Total Receives: 2220 -Stores 1493 1 -tensor([0.0354, 0.0360]) -tensor([0.0275, 0.0293, 0.0299]) -tensor([-0.0854, -0.0753, -0.0751, -0.0789, -0.0792]) -tensor([0.0334, 0.0345]) -Total rewards: 2224 -Total Receives: 2221 -Stores 1494 1 -Total Receives: 2222 -Stores 1495 1 -Total Receives: 2223 -Stores 1496 1 -Total Receives: 2224 -Stores 1497 1 -tensor([-0.1337, -0.1057, -0.1116]) -tensor([-0.1441, -0.1162, -0.1163]) -tensor([0.0321, 0.0313]) -tensor([-0.8388, -0.8401, -0.8404, -0.8297, -0.8283, -0.8369, -0.8329, -0.8147, - -0.8514, -0.8582]) -tensor([-0.4061, -0.4125, -0.4133, -0.4120]) -tensor([0.0340, 0.0356]) -Total rewards: 2227 -Total Receives: 2225 -Stores 1498 1 -Total Receives: 2226 -Stores 1499 1 -Total Receives: 2227 -Stores 1500 1 -tensor([-0.5529, -0.5703, -0.5595, -0.5570, -0.5805, -0.5718, -0.5549, -0.5549]) -tensor([-0.0078, -0.0422, -0.0168, -0.0108]) -tensor([-0.5984, -0.6069, -0.6085, -0.5964, -0.6144, -0.6071, -0.6062, -0.6161, - -0.5979]) -tensor([-0.5972, -0.6158, -0.6148, -0.6348, -0.6060, -0.6010, -0.6162]) -Total rewards: 2230 -Total Receives: 2228 -Stores 1501 1 -Total Receives: 2229 -Stores 1502 1 -Total Receives: 2230 -Stores 1503 1 -tensor([-0.4118, -0.4076, -0.4064, -0.4071, -0.4121, -0.4130, -0.4146, -0.3930]) -Total rewards: 2239 -Total Receives: 2231 -Stores 1504 1 -Total Receives: 2232 -Stores 1505 1 -Total Receives: 2233 -Stores 1506 1 -Total Receives: 2234 -Stores 1507 1 -Total Receives: 2235 -Stores 1508 1 -Total Receives: 2236 -Stores 1509 1 -Total Receives: 2237 -Stores 1510 1 -Total Receives: 2238 -Stores 1511 1 -Total Receives: 2239 -Stores 1512 1 -tensor([-0.0331, -0.0316]) -tensor([-0.3837, -0.3847, -0.3887, -0.3830, -0.3738, -0.3988]) -Total rewards: 2244 -Total Receives: 2240 -Stores 1513 1 -Total Receives: 2241 -Stores 1514 1 -Total Receives: 2242 -Stores 1515 1 -Total Receives: 2243 -Stores 1516 1 -Total Receives: 2244 -Stores 1517 1 -tensor([-0.0027, -0.0032, -0.0031]) -tensor([-0.6846, -0.6826, -0.6860, -0.6820, -0.6825, -0.6844, -0.6859, -0.7041, - -0.7056, -0.7042]) -tensor([-1.8704, -1.8821, -1.8866, -1.8651, -1.8804, -1.8893, -1.8795, -1.8741, - -1.8874, -1.8912, -1.8955, -1.8955]) -Total rewards: 2253 -Total Receives: 2245 -Stores 1518 1 -Total Receives: 2246 -Stores 1519 1 -Total Receives: 2247 -Stores 1520 1 -Total Receives: 2248 -Stores 1521 1 -Total Receives: 2249 -Stores 1522 1 -Total Receives: 2250 -Stores 1523 1 -Total Receives: 2251 -Stores 1524 1 -Total Receives: 2252 -Stores 1525 1 -Total Receives: 2253 -Stores 1526 1 -tensor([-0.3944, -0.3827, -0.3955, -0.3939, -0.3962, -0.3975, -0.3984, -0.4160, - -0.4034]) -tensor([0.0152, 0.0152]) -tensor([-0.1491, -0.1214]) -tensor([-0.2314, -0.2334, -0.2321, -0.2364, -0.2658]) -tensor([-0.8426, -0.8460, -0.8424, -0.8510, -0.8529]) -tensor([-0.1463, -0.1300, -0.1200, -0.1218]) -tensor([-0.0308, -0.0266, -0.0180, -0.0217]) -tensor([-0.3213, -0.2622, -0.2679, -0.3003, -0.2594, -0.2911, -0.2984, -0.2763]) -tensor([-0.0610, -0.0580]) -tensor([-0.1663, -0.1458, -0.1787, -0.2092]) -Total rewards: 2263 -Total Receives: 2254 -Stores 1527 1 -Total Receives: 2255 -Stores 1528 1 -Total Receives: 2256 -Stores 1529 1 -Total Receives: 2257 -Stores 1530 1 -Total Receives: 2258 -Stores 1531 1 -Total Receives: 2259 -Stores 1532 1 -Total Receives: 2260 -Stores 1533 1 -Total Receives: 2261 -Stores 1534 1 -Total Receives: 2262 -Stores 1535 1 -Total Receives: 2263 -Stores 1536 1 -tensor([-0.1018, -0.0688, -0.0623, -0.0481]) -Total rewards: 2269 -Total Receives: 2264 -Stores 1537 1 -Total Receives: 2265 -Stores 1538 1 -Total Receives: 2266 -Stores 1539 1 -Total Receives: 2267 -Stores 1540 1 -Total Receives: 2268 -Stores 1541 1 -Total Receives: 2269 -Stores 1542 1 -tensor([0.0234, 0.0213, 0.0206]) -tensor([-0.3098, -0.3134, -0.3106, -0.3107, -0.3122, -0.3142, -0.3152, -0.3349, - -0.3216]) -tensor([-0.0657, -0.1309]) -tensor([0.0288, 0.0276]) -tensor([-0.1357, -0.1077, -0.1024]) -tensor([-0.0235, -0.0235]) -tensor([-0.0362, -0.0395]) -tensor([-0.0787, -0.0663, -0.0753, -0.0638]) -tensor([-1.0378, -1.0755, -1.0326, -1.0561, -1.0736, -1.0693, -1.0858, -1.0858]) -tensor([-0.1884, -0.2001, -0.2318]) -tensor([-4.2669, -4.2810, -4.2868, -4.2708, -4.3085, -4.2916, -4.2735, -4.2702, - -4.3134, -4.2849, -4.2925, -4.3137, -4.3137, -4.3137]) -Total rewards: 2276 -Total Receives: 2270 -Stores 1543 1 -Total Receives: 2271 -Stores 1544 1 -Total Receives: 2272 -Stores 1545 1 -Total Receives: 2273 -Stores 1546 1 -Total Receives: 2274 -Stores 1547 1 -Total Receives: 2275 -Stores 1548 1 -Total Receives: 2276 -Stores 1549 1 -tensor([ 0.0096, -0.0012]) -tensor([-0.5369, -0.5354, -0.5377, -0.5712, -0.5587, -0.5366, -0.5366]) -Total rewards: 2282 -Total Receives: 2277 -Stores 1550 1 -Total Receives: 2278 -Stores 1551 1 -Total Receives: 2279 -Stores 1552 1 -Total Receives: 2280 -Stores 1553 1 -Total Receives: 2281 -Stores 1554 1 -Total Receives: 2282 -Stores 1555 1 -tensor([0.0316, 0.0287]) -tensor([-0.0841, -0.0448, -0.0655]) -tensor([-0.0118, -0.0103, -0.0133, -0.0137]) -Total rewards: 2291 -Total Receives: 2283 -Stores 1556 1 -Total Receives: 2284 -Stores 1557 1 -Total Receives: 2285 -Stores 1558 1 -Total Receives: 2286 -Stores 1559 1 -Total Receives: 2287 -Stores 1560 1 -Total Receives: 2288 -Stores 1561 1 -Total Receives: 2289 -Stores 1562 1 -Total Receives: 2290 -Stores 1563 1 -Total Receives: 2291 -Stores 1564 1 -tensor([-4.1158, -4.1207, -4.1076, -4.1341, -4.1237, -4.1071, -4.1049, -4.1378, - -4.1190, -4.1270, -4.1397, -4.1397, -4.1397]) -tensor([-0.0756, -0.0929, -0.0866]) -tensor([-0.0089, -0.0072, -0.0108, -0.0075]) -Total rewards: 2297 -Total Receives: 2292 -Stores 1565 1 -Total Receives: 2293 -Stores 1566 1 -Total Receives: 2294 -Stores 1567 1 -Total Receives: 2295 -Stores 1568 1 -Total Receives: 2296 -Stores 1569 1 -Total Receives: 2297 -Stores 1570 1 -tensor([-0.0123, -0.0090, -0.0011]) -Total rewards: 2302 -Total Receives: 2298 -Stores 1571 1 -Total Receives: 2299 -Stores 1572 1 -Total Receives: 2300 -Stores 1573 1 -Total Receives: 2301 -Stores 1574 1 -Total Receives: 2302 -Stores 1575 1 -tensor([-0.1634, -0.0714, -0.1500, -0.1064, -0.1345, -0.0769, -0.0884, -0.1427, - -0.1241]) -tensor([0.0393, 0.0398, 0.0397]) -tensor([-0.0739, -0.0762]) -Total rewards: 1252 -Total Receives: 1246 -Stores 834 1 -Total Receives: 1247 -Stores 835 1 -Total Receives: 1248 -Stores 836 1 -Total Receives: 1249 -Stores 837 1 -Total Receives: 1250 -Stores 838 1 -Total Receives: 1251 -Stores 839 1 -Total Receives: 1252 -Stores 840 1 -Total rewards: 1260 -Total Receives: 1253 -Stores 841 1 -Total Receives: 1254 -Stores 842 1 -Total Receives: 1255 -Stores 843 1 -Total Receives: 1256 -Stores 844 1 -Total Receives: 1257 -Stores 845 1 -Total Receives: 1258 -Stores 846 1 -Total Receives: 1259 -Stores 847 1 -Total Receives: 1260 -Stores 848 1 -Total rewards: 1266 -Total Receives: 1261 -Stores 849 1 -Total Receives: 1262 -Stores 850 1 -Total Receives: 1263 -Stores 851 1 -Total Receives: 1264 -Stores 852 1 -Total Receives: 1265 -Stores 853 1 -Total Receives: 1266 -Total rewards: 1272 -Total Receives: 1267 -Stores 854 1 -Total Receives: 1268 -Stores 855 1 -Total Receives: 1269 -Stores 856 1 -Total Receives: 1270 -Stores 857 1 -Total Receives: 1271 -Stores 858 1 -Total Receives: 1272 -Stores 859 1 -Total rewards: 1276 -Total Receives: 1273 -Stores 860 1 -Total Receives: 1274 -Stores 861 1 -Total Receives: 1275 -Stores 862 1 -Total Receives: 1276 -Stores 863 1 -Total rewards: 1279 -Total Receives: 1277 -Stores 864 1 -Total Receives: 1278 -Stores 865 1 -Total Receives: 1279 -Stores 866 1 -Total rewards: 1283 -Total Receives: 1280 -Total Receives: 1281 -Total Receives: 1282 -Total Receives: 1283 -Total rewards: 1285 -Total Receives: 1284 -Stores 867 1 -Total Receives: 1285 -Stores 868 1 -Total rewards: 1292 -Total Receives: 1286 -Stores 869 1 -Total Receives: 1287 -Stores 870 1 -Total Receives: 1288 -Stores 871 1 -Total Receives: 1289 -Stores 872 1 -Total Receives: 1290 -Stores 873 1 -Total Receives: 1291 -Stores 874 1 -Total Receives: 1292 -Stores 875 1 -Total rewards: 1301 -Total Receives: 1293 -Total Receives: 1294 -Total Receives: 1295 -Total Receives: 1296 -Total Receives: 1297 -Total Receives: 1298 -Total Receives: 1299 -Total Receives: 1300 -Total Receives: 1301 -Total rewards: 1307 -Total Receives: 1302 -Stores 876 1 -Total Receives: 1303 -Stores 877 1 -Total Receives: 1304 -Stores 878 1 -Total Receives: 1305 -Stores 879 1 -Total Receives: 1306 -Stores 880 1 -Total Receives: 1307 -Stores 881 1 -Total rewards: 1311 -Total Receives: 1308 -Total Receives: 1309 -Total Receives: 1310 -Total Receives: 1311 -Total rewards: 1321 -Total Receives: 1312 -Stores 882 1 -Total Receives: 1313 -Stores 883 1 -Total Receives: 1314 -Stores 884 1 -Total Receives: 1315 -Stores 885 1 -Total Receives: 1316 -Stores 886 1 -Total Receives: 1317 -Stores 887 1 -Total Receives: 1318 -Stores 888 1 -Total Receives: 1319 -Stores 889 1 -Total Receives: 1320 -Stores 890 1 -Total Receives: 1321 -Stores 891 1 -Total rewards: 1329 -Total Receives: 1322 -Stores 892 1 -Total Receives: 1323 -Stores 893 1 -Total Receives: 1324 -Stores 894 1 -Total Receives: 1325 -Stores 895 1 -Total Receives: 1326 -Stores 896 1 -Total Receives: 1327 -Stores 897 1 -Total Receives: 1328 -Stores 898 1 -Total Receives: 1329 -Stores 899 1 -Total rewards: 1337 -Total Receives: 1330 -Stores 900 1 -Total Receives: 1331 -Stores 901 1 -Total Receives: 1332 -Stores 902 1 -Total Receives: 1333 -Stores 903 1 -Total Receives: 1334 -Stores 904 1 -Total Receives: 1335 -Stores 905 1 -Total Receives: 1336 -Stores 906 1 -Total Receives: 1337 -Stores 907 1 -Total rewards: 1345 -Total Receives: 1338 -Stores 908 1 -Total Receives: 1339 -Stores 909 1 -Total Receives: 1340 -Stores 910 1 -Total Receives: 1341 -Stores 911 1 -Total Receives: 1342 -Stores 912 1 -Total Receives: 1343 -Stores 913 1 -Total Receives: 1344 -Stores 914 1 -Total Receives: 1345 -Stores 915 1 -Total rewards: 1350 -Total Receives: 1346 -Stores 916 1 -Total Receives: 1347 -Stores 917 1 -Total Receives: 1348 -Stores 918 1 -Total Receives: 1349 -Stores 919 1 -Total Receives: 1350 -Stores 920 1 -Total rewards: 1359 -Total Receives: 1351 -Stores 921 1 -Total Receives: 1352 -Stores 922 1 -Total Receives: 1353 -Stores 923 1 -Total Receives: 1354 -Stores 924 1 -Total Receives: 1355 -Stores 925 1 -Total Receives: 1356 -Stores 926 1 -Total Receives: 1357 -Stores 927 1 -Total Receives: 1358 -Stores 928 1 -Total Receives: 1359 -Stores 929 1 -Total rewards: 1368 -Total Receives: 1360 -Stores 930 1 -Total Receives: 1361 -Stores 931 1 -Total Receives: 1362 -Stores 932 1 -Total Receives: 1363 -Stores 933 1 -Total Receives: 1364 -Stores 934 1 -Total Receives: 1365 -Stores 935 1 -Total Receives: 1366 -Stores 936 1 -Total Receives: 1367 -Stores 937 1 -Total Receives: 1368 -Stores 938 1 -Total rewards: 1369 -Total Receives: 1369 -Stores 939 1 -Total rewards: 1376 -Total Receives: 1370 -Stores 940 1 -Total Receives: 1371 -Stores 941 1 -Total Receives: 1372 -Stores 942 1 -Total Receives: 1373 -Stores 943 1 -Total Receives: 1374 -Stores 944 1 -Total Receives: 1375 -Stores 945 1 -Total Receives: 1376 -Stores 946 1 -Total rewards: 1384 -Total Receives: 1377 -Stores 947 1 -Total Receives: 1378 -Stores 948 1 -Total Receives: 1379 -Stores 949 1 -Total Receives: 1380 -Stores 950 1 -Total Receives: 1381 -Stores 951 1 -Total Receives: 1382 -Stores 952 1 -Total Receives: 1383 -Stores 953 1 -Total Receives: 1384 -Stores 954 1 -Total rewards: 1386 -Total Receives: 1385 -Stores 955 1 -Total Receives: 1386 -Stores 956 1 -Total rewards: 1396 -Total Receives: 1387 -Stores 957 1 -Total Receives: 1388 -Stores 958 1 -Total Receives: 1389 -Stores 959 1 -Total Receives: 1390 -Stores 960 1 -Total Receives: 1391 -Stores 961 1 -Total Receives: 1392 -Stores 962 1 -Total Receives: 1393 -Stores 963 1 -Total Receives: 1394 -Stores 964 1 -Total Receives: 1395 -Stores 965 1 -Total Receives: 1396 -Stores 966 1 -Total rewards: 1402 -Total Receives: 1397 -Stores 967 1 -Total Receives: 1398 -Stores 968 1 -Total Receives: 1399 -Stores 969 1 -Total Receives: 1400 -Stores 970 1 -Total Receives: 1401 -Stores 971 1 -Total Receives: 1402 -Stores 972 1 -Total rewards: 1410 -Total Receives: 1403 -Stores 973 1 -Total Receives: 1404 -Stores 974 1 -Total Receives: 1405 -Stores 975 1 -Total Receives: 1406 -Stores 976 1 -Total Receives: 1407 -Stores 977 1 -Total Receives: 1408 -Stores 978 1 -Total Receives: 1409 -Stores 979 1 -Total Receives: 1410 -Stores 980 1 -Total rewards: 1413 -Total Receives: 1411 -Stores 981 1 -Total Receives: 1412 -Stores 982 1 -Total Receives: 1413 -Stores 983 1 -Total rewards: 1421 -Total Receives: 1414 -Stores 984 1 -Total Receives: 1415 -Stores 985 1 -Total Receives: 1416 -Stores 986 1 -Total Receives: 1417 -Stores 987 1 -Total Receives: 1418 -Stores 988 1 -Total Receives: 1419 -Stores 989 1 -Total Receives: 1420 -Stores 990 1 -Total Receives: 1421 -Stores 991 1 -Total rewards: 1428 -Total Receives: 1422 -Stores 992 1 -Total Receives: 1423 -Stores 993 1 -Total Receives: 1424 -Stores 994 1 -Total Receives: 1425 -Stores 995 1 -Total Receives: 1426 -Stores 996 1 -Total Receives: 1427 -Stores 997 1 -Total Receives: 1428 -Stores 998 1 -Total rewards: 1436 -Total Receives: 1429 -Stores 999 1 -Total Receives: 1430 -Stores 1000 1 -Total Receives: 1431 -Stores 1001 1 -Total Receives: 1432 -Stores 1002 1 -Total Receives: 1433 -Stores 1003 1 -Total Receives: 1434 -Stores 1004 1 -Total Receives: 1435 -Stores 1005 1 -Total Receives: 1436 -Stores 1006 1 -Total rewards: 1441 -Total Receives: 1437 -Stores 1007 1 -Total Receives: 1438 -Stores 1008 1 -Total Receives: 1439 -Stores 1009 1 -Total Receives: 1440 -Stores 1010 1 -Total Receives: 1441 -Stores 1011 1 -Total rewards: 1448 -Total Receives: 1442 -Stores 1012 1 -Total Receives: 1443 -Stores 1013 1 -Total Receives: 1444 -Stores 1014 1 -Total Receives: 1445 -Stores 1015 1 -Total Receives: 1446 -Stores 1016 1 -Total Receives: 1447 -Stores 1017 1 -Total Receives: 1448 -Stores 1018 1 -Total rewards: 1454 -Total Receives: 1449 -Stores 1019 1 -Total Receives: 1450 -Stores 1020 1 -Total Receives: 1451 -Stores 1021 1 -Total Receives: 1452 -Stores 1022 1 -Total Receives: 1453 -Stores 1023 1 -Total Receives: 1454 -Stores 1024 1 -Total rewards: 1463 -Total Receives: 1455 -Stores 1025 1 -Total Receives: 1456 -Stores 1026 1 -Total Receives: 1457 -Stores 1027 1 -Total Receives: 1458 -Stores 1028 1 -Total Receives: 1459 -Stores 1029 1 -Total Receives: 1460 -Stores 1030 1 -Total Receives: 1461 -Stores 1031 1 -Total Receives: 1462 -Stores 1032 1 -Total Receives: 1463 -Stores 1033 1 -Total rewards: 1470 -Total Receives: 1464 -Stores 1034 1 -Total Receives: 1465 -Stores 1035 1 -Total Receives: 1466 -Stores 1036 1 -Total Receives: 1467 -Stores 1037 1 -Total Receives: 1468 -Stores 1038 1 -Total Receives: 1469 -Stores 1039 1 -Total Receives: 1470 -Stores 1040 1 -Total rewards: 1477 -tensor([-3.6073, -3.6125, -3.5956, -3.6333, -3.6173, -3.5946, -3.6399, -3.6114, - -3.6209, -3.6462, -3.6462, -3.6462]) -Total rewards: 2312 -Total Receives: 2303 -Stores 1576 1 -Total Receives: 2304 -Stores 1577 1 -Total Receives: 2305 -Stores 1578 1 -Total Receives: 2306 -Stores 1579 1 -Total Receives: 2307 -Stores 1580 1 -Total Receives: 2308 -Stores 1581 1 -Total Receives: 2309 -Stores 1582 1 -Total Receives: 2310 -Stores 1583 1 -Total Receives: 2311 -Stores 1584 1 -Total Receives: 2312 -Stores 1585 1 -tensor([-0.9716, -0.9749, -0.9792, -0.9871, -0.9811, -0.9857, -0.9890]) -Total rewards: 2321 -Total Receives: 2313 -Stores 1586 1 -Total Receives: 2314 -Stores 1587 1 -Total Receives: 2315 -Stores 1588 1 -Total Receives: 2316 -Stores 1589 1 -Total Receives: 2317 -Stores 1590 1 -Total Receives: 2318 -Stores 1591 1 -Total Receives: 2319 -Stores 1592 1 -Total Receives: 2320 -Stores 1593 1 -Total Receives: 2321 -Stores 1594 1 -tensor([-0.0148, -0.0142]) -tensor([-0.0182, -0.0157]) -tensor([-0.4101, -0.4070, -0.4061, -0.4071, -0.4094, -0.4115, -0.4325, -0.4138, - -0.4218]) -tensor([-0.3557, -0.3079, -0.3207, -0.4100, -0.3642, -0.3061]) -tensor([0.0200, 0.0174, 0.0176]) -Total rewards: 2326 -Total Receives: 2322 -Stores 1595 1 -Total Receives: 2323 -Stores 1596 1 -Total Receives: 2324 -Stores 1597 1 -Total Receives: 2325 -Stores 1598 1 -Total Receives: 2326 -Stores 1599 1 -tensor([-0.1783, -0.1627, -0.1001, -0.1414, -0.0663, -0.0767, -0.1529, -0.1216]) -tensor([-0.2658, -0.2905, -0.3269]) -tensor([-0.0680, -0.0706, -0.0701]) -tensor([-0.0596, -0.0594]) -tensor([-0.0110, -0.0102, -0.0135, -0.0114]) -Total rewards: 2334 -Total Receives: 2327 -Stores 1600 1 -Total Receives: 2328 -Stores 1601 1 -Total Receives: 2329 -Stores 1602 1 -Total Receives: 2330 -Stores 1603 1 -Total Receives: 2331 -Stores 1604 1 -Total Receives: 2332 -Stores 1605 1 -Total Receives: 2333 -Stores 1606 1 -Total Receives: 2334 -Stores 1607 1 -tensor([-0.0660, -0.0481, -0.0518]) -tensor([-0.4003, -0.3976, -0.3982, -0.4004, -0.4018, -0.4250, -0.4050, -0.4136]) -tensor([0.0168, 0.0180]) -tensor([-0.0931, -0.0771]) -Total rewards: 2341 -Total Receives: 2335 -Stores 1608 1 -Total Receives: 2336 -Stores 1609 1 -Total Receives: 2337 -Stores 1610 1 -Total Receives: 2338 -Stores 1611 1 -Total Receives: 2339 -Stores 1612 1 -Total Receives: 2340 -Stores 1613 1 -Total Receives: 2341 -Stores 1614 1 -Total rewards: 2345 -Total Receives: 2342 -Stores 1615 1 -Total Receives: 2343 -Stores 1616 1 -Total Receives: 2344 -Stores 1617 1 -Total Receives: 2345 -Stores 1618 1 -tensor([-0.1840, -0.2104, -0.2242, -0.1875, -0.2674, -0.1867, -0.2078]) -tensor([0.0149, 0.0148, 0.0157]) -tensor([-0.1950, -0.1828, -0.1260, -0.1637, -0.1078, -0.1744, -0.1452]) -tensor([-0.2750, -0.2112, -0.2118, -0.2112, -0.2330, -0.3284, -0.2842]) -tensor([-0.7745, -0.7719, -0.7733, -0.7730, -0.7823, -0.7788, -0.7842]) -tensor([-0.0579, -0.0626]) -tensor([-0.0183, -0.0166, -0.0298, -0.0125, -0.0105]) -tensor([-0.6666, -0.6188, -0.6400, -0.6685]) -tensor([0.0210, 0.0275, 0.0204]) -tensor([0.0324, 0.0318]) -tensor([0.0303, 0.0321]) -tensor([0.0411, 0.0417]) -tensor([-3.4538, -3.4435, -3.4394, -3.4826, -3.4684, -3.4891, -3.4598, -3.4793, - -3.5055, -3.5084, -3.5084, -3.5011, -3.5084]) -tensor([-0.3983, -0.3977, -0.3882, -0.4027, -0.4004, -0.3996, -0.4017]) -tensor([-0.0031, -0.0059]) -Total rewards: 2351 -Total Receives: 2346 -Stores 1619 1 -Total Receives: 2347 -Stores 1620 1 -Total Receives: 2348 -Stores 1621 1 -Total Receives: 2349 -Stores 1622 1 -Total Receives: 2350 -Stores 1623 1 -Total Receives: 2351 -Stores 1624 1 -tensor([-0.3793, -0.2995, -0.3650, -0.3226, -0.3054, -0.3586, -0.3376]) -tensor([-0.0801, -0.0482, -0.0581, -0.0399]) -tensor([0.0312, 0.0269]) -tensor([-0.5165, -0.5337, -0.5201]) -Total rewards: 2354 -Total Receives: 2352 -Stores 1625 1 -Total Receives: 2353 -Stores 1626 1 -Total Receives: 2354 -Stores 1627 1 -tensor([-0.1058, -0.0639]) -tensor([-3.6530, -3.6410, -3.6441, -3.6762, -3.6642, -3.6831, -3.6758, -3.6997, - -3.7075, -3.7075, -3.6956, -3.7075]) -tensor([-0.0458, -0.0490]) -tensor([-0.2356, -0.2218, -0.2082, -0.2192, -0.2224, -0.2214]) -tensor([-0.0184, -0.0192, -0.0217, -0.0179]) -tensor([-0.5402, -0.5426, -0.5412, -0.5738, -0.5440, -0.5591]) -tensor([-1.2277, -1.1632, -1.2238, -1.2161, -1.1847, -1.2035, -1.2134, -1.2301]) -tensor([-0.0224, -0.0263, -0.0262]) -tensor([-2.6421, -2.6357, -2.6635, -2.6531, -2.6940, -2.6698, -2.6673, -2.6868, - -2.6973, -2.6973, -2.6829, -2.6973]) -tensor([0.0079, 0.0078]) -Total rewards: 2361 -Total Receives: 2355 -Stores 1628 1 -Total Receives: 2356 -Stores 1629 1 -Total Receives: 2357 -Stores 1630 1 -Total Receives: 2358 -Stores 1631 1 -Total Receives: 2359 -Stores 1632 1 -Total Receives: 2360 -Stores 1633 1 -Total Receives: 2361 -Stores 1634 1 -Total rewards: 2364 -Total Receives: 2362 -Stores 1635 1 -Total Receives: 2363 -Stores 1636 1 -Total Receives: 2364 -Stores 1637 1 -tensor([-0.0001, -0.0012]) -tensor([-0.5031, -0.5034, -0.5043]) -Total rewards: 2370 -Total Receives: 2365 -Stores 1638 1 -Total Receives: 2366 -Total Receives: 2367 -Total Receives: 2368 -Total Receives: 2369 -Total Receives: 2370 -tensor([0.0137, 0.0186, 0.0165, 0.0142]) -tensor([0.0262, 0.0232]) -tensor([-0.0710, -0.0681, -0.0833, -0.0621]) -Total rewards: 2376 -Total Receives: 2371 -Total Receives: 2372 -Total Receives: 2373 -Total Receives: 2374 -Total Receives: 2375 -Total Receives: 2376 -tensor([-0.3726, -0.3212, -0.3103, -0.2836, -0.3447, -0.2701, -0.3243, -0.2706, - -0.2681]) -tensor([-0.3448, -0.3499]) -tensor([-2.7753, -2.7712, -2.7980, -2.7873, -2.8272, -2.8044, -2.8203, -2.8342, - -2.8342, -2.8171, -2.8342]) -tensor([-0.0862, -0.0833, -0.0986]) -tensor([0.0350, 0.0351]) -tensor([-0.3913, -0.3918, -0.3916]) -Total rewards: 2381 -Total Receives: 2377 -Stores 1639 1 -Total Receives: 2378 -Stores 1640 1 -Total Receives: 2379 -Stores 1641 1 -Total Receives: 2380 -Stores 1642 1 -Total Receives: 2381 -Stores 1643 1 -tensor([0.0389, 0.0313, 0.0316]) -Total rewards: 2385 -Total Receives: 2382 -Total Receives: 2383 -Total Receives: 2384 -Total Receives: 2385 -tensor([-0.4279, -0.4274, -0.4338, -0.4301, -0.4315, -0.4308]) -tensor([-0.2241, -0.2209]) -tensor([0.0217, 0.0200, 0.0194]) -tensor([-1.5314, -1.4309, -1.4944, -1.5228, -1.5083, -1.5357, -1.4647, -1.4857, - -1.5435]) -Total rewards: 2389 -Total Receives: 2386 -Total Receives: 2387 -Total Receives: 2388 -Total Receives: 2389 -tensor([-0.0540, -0.0217, -0.0193, -0.0098, -0.0150]) -tensor([-0.2770, -0.2858, -0.2594]) -tensor([-3.6913, -3.7011, -3.7052, -3.7484, -3.7589, -3.7262, -3.7419, -3.7621, - -3.7621, -3.7382, -3.7621]) -tensor([0.0412, 0.0449]) -tensor([-0.1577, -0.1429]) -tensor([-0.2682, -0.2068, -0.2296, -0.3257, -0.2202, -0.2642, -0.2068, -0.2104]) -tensor([-0.0236, -0.0218, -0.0152, -0.0197]) -tensor([0.0285, 0.0291, 0.0286]) -tensor([0.0073, 0.0066]) -tensor([-0.7726, -0.7907, -0.7824, -0.7884, -0.7824, -0.8020]) -tensor([-0.4552, -0.4531, -0.4545, -0.4543, -0.4588]) -tensor([-0.2480, -0.2037, -0.1973, -0.1562, -0.2063, -0.1255, -0.2155, -0.1436, - -0.1547]) -tensor([0.0336, 0.0343]) -tensor([-0.1988, -0.2110]) -tensor([-0.2528, -0.2553, -0.2534, -0.2589]) -tensor([-4.7398, -4.6650, -4.6643, -4.7176, -4.7315, -4.6838, -4.7081, -4.7459, - -4.7459, -4.7016, -4.7459]) -tensor([ 0.0028, -0.0013, 0.0034, 0.0003]) -tensor([0.0418, 0.0388]) -tensor([-0.2771, -0.2832, -0.2867, -0.2793]) -tensor([-3.7343, -3.6867, -3.7183, -3.7289, -3.6941, -3.7122, -3.7406, -3.7406, - -3.7081, -3.7406]) -Total rewards: 2392 -Total Receives: 2390 -Total Receives: 2391 -Total Receives: 2392 -tensor([-0.2648, -0.2212, -0.2079, -0.1438, -0.2193, -0.2062, -0.1222, -0.1345]) -Total rewards: 2395 -Total Receives: 2393 -Stores 1644 1 -Total Receives: 2394 -Stores 1645 1 -Total Receives: 2395 -Stores 1646 1 -tensor([0.0293, 0.0295]) -tensor([-0.0654, -0.0630, -0.0595]) -tensor([-0.2231, -0.2233, -0.2252]) -tensor([0.0306, 0.0336]) -tensor([-3.0239, -2.9079, -2.9798, -3.0160, -2.9952, -3.0226, -2.9536, -2.9689]) -tensor([-0.1717, -0.1727, -0.1725, -0.1721]) -tensor([0.0329, 0.0377]) -tensor([-0.1097, -0.1219, -0.1054]) -tensor([-0.2594, -0.1858, -0.1868, -0.2229, -0.3250, -0.2074, -0.2586, -0.1936, - -0.1855]) -tensor([-2.9762, -2.9328, -2.9597, -2.9701, -2.9805, -2.9418, -2.9546, -2.9874, - -2.9874, -2.9874]) -tensor([0.0258, 0.0246, 0.0249]) -tensor([-0.1075, -0.1360, -0.1597, -0.1682, -0.1114]) -Total rewards: 2399 -Total Receives: 2396 -Total Receives: 2397 -Total Receives: 2398 -Total Receives: 2399 -Total rewards: 2407 -Total Receives: 2400 -Stores 1647 1 -Total Receives: 2401 -Stores 1648 1 -Total Receives: 2402 -Stores 1649 1 -Total Receives: 2403 -Stores 1650 1 -Total Receives: 2404 -Stores 1651 1 -Total Receives: 2405 -Stores 1652 1 -Total Receives: 2406 -Stores 1653 1 -Total Receives: 2407 -Stores 1654 1 -tensor([-0.6196, -0.6406, -0.6396, -0.6427, -0.6371]) -tensor([-0.1015, -0.1053, -0.1067, -0.0894]) -tensor([-0.2192, -0.2074]) -tensor([-0.0984, -0.0964]) -tensor([-0.4325, -0.4435, -0.4437, -0.4462]) -tensor([0.0296, 0.0299, 0.0291]) -tensor([-0.3101, -0.2695, -0.2552, -0.1969, -0.2653, -0.2533, -0.1936]) -Total rewards: 2414 -Total Receives: 2408 -Stores 1655 1 -Total Receives: 2409 -Stores 1656 1 -Total Receives: 2410 -Stores 1657 1 -Total Receives: 2411 -Stores 1658 1 -Total Receives: 2412 -Stores 1659 1 -Total Receives: 2413 -Stores 1660 1 -Total Receives: 2414 -Stores 1661 1 -tensor([-2.2742, -2.2806, -2.2673, -2.3061, -2.2612, -2.2695, -2.2113, -2.2349]) -tensor([-0.2476, -0.2412, -0.2416]) -tensor([-2.3124, -2.3209, -2.2997, -2.3085, -2.3115, -2.2782, -2.2946, -2.3304, - -2.3304, -2.3304]) -Total rewards: 2417 -Total Receives: 2415 -Stores 1662 1 -Total Receives: 2416 -Stores 1663 1 -Total Receives: 2417 -Stores 1664 1 -tensor([-1.3465, -1.3327, -1.3541, -1.3603, -1.3604]) -tensor([-0.9089, -0.9156, -0.9205, -0.9192]) -tensor([-0.5932, -0.5990, -0.6004, -0.5881, -0.6386, -0.5959, -0.6241, -0.6123]) -tensor([0.0368, 0.0341, 0.0342]) -tensor([0.0335, 0.0316]) -tensor([-1.5482, -1.5540, -1.5456, -1.5757, -1.5405, -1.4952, -1.5282, -1.5727]) -tensor([-0.2057, -0.2074, -0.2091, -0.2216, -0.2085]) -tensor([-0.0640, -0.0711, -0.0736, -0.0402, -0.0402]) -Total rewards: 2422 -Total Receives: 2418 -Stores 1665 1 -Total Receives: 2419 -Stores 1666 1 -Total Receives: 2420 -Stores 1667 1 -Total Receives: 2421 -Stores 1668 1 -Total Receives: 2422 -Stores 1669 1 -tensor([0.0337, 0.0338, 0.0336]) -tensor([0.0298, 0.0298]) -tensor([-1.1845, -1.1911, -1.1963]) -tensor([-12.3929, -12.3554, -12.4172, -12.3701, -12.3806, -12.3158, -12.3449, - -12.4209, -12.4209, -12.4208, -11.8321]) -tensor([-0.0754, -0.0811, -0.0816, -0.0545]) -tensor([-0.9064, -0.9089, -0.9073, -0.9193, -0.9061, -0.9408, -0.9151]) -tensor([-2.9863, -2.9616, -3.0076, -2.9730, -2.9773, -2.9350, -2.9561, -3.0103, - -3.0103, -3.0103]) -tensor([0.0101, 0.0099, 0.0117]) -tensor([-0.6987, -0.7018, -0.6990, -0.7463, -0.7012, -0.7313, -0.7174]) -tensor([-0.1410, -0.1237, -0.1252]) -tensor([-0.3768, -0.3269, -0.3140, -0.2537, -0.3256, -0.3192]) -Total rewards: 2424 -Total Receives: 2423 -Stores 1670 1 -Total Receives: 2424 -Stores 1671 1 -tensor([0.0346, 0.0386]) -tensor([-1.1205, -1.1202, -1.1258]) -Total rewards: 2434 -Total Receives: 2425 -Stores 1672 1 -Total Receives: 2426 -Stores 1673 1 -Total Receives: 2427 -Stores 1674 1 -Total Receives: 2428 -Stores 1675 1 -Total Receives: 2429 -Stores 1676 1 -Total Receives: 2430 -Stores 1677 1 -Total Receives: 2431 -Stores 1678 1 -Total Receives: 2432 -Stores 1679 1 -Total Receives: 2433 -Stores 1680 1 -Total Receives: 2434 -Stores 1681 1 -tensor([-0.8720, -0.8484, -0.8422, -0.8654, -0.8763]) -Total rewards: 2437 -Total Receives: 2435 -Total Receives: 2436 -Total Receives: 2437 -tensor([-0.1339, -0.0947, -0.1019, -0.0978]) -tensor([0.0358, 0.0372]) -tensor([0.0319, 0.0319]) -tensor([-0.3948, -0.4324, -0.5380, -0.4179, -0.4751, -0.4136]) -tensor([-0.4728, -0.4726, -0.4675, -0.4681, -0.4733]) -tensor([0.0344, 0.0320]) -Total rewards: 2443 -Total Receives: 2438 -Stores 1682 1 -Total Receives: 2439 -Stores 1683 1 -Total Receives: 2440 -Stores 1684 1 -Total Receives: 2441 -Stores 1685 1 -Total Receives: 2442 -Stores 1686 1 -Total Receives: 2443 -Stores 1687 1 -tensor([-0.1852, -0.1422, -0.1170, -0.1271]) -tensor([-0.8521, -0.8544]) -tensor([-0.1921, -0.1670]) -tensor([-0.4778, -0.4783, -0.4712, -0.4802]) -tensor([0.0365, 0.0341, 0.0365]) -tensor([-0.1990, -0.1761, -0.1865]) -tensor([0.0280, 0.0266, 0.0272]) -tensor([-0.5608, -0.5620, -0.5641, -0.5638, -0.5635, -0.5996, -0.5515]) -tensor([-0.0865, -0.0907, -0.0790, -0.0936, -0.0813, -0.0828]) -tensor([-2.8850, -2.8564, -2.8881, -2.8407, -2.8809, -2.8517, -2.8494, -2.8367, - -2.8942, -2.8942, -2.8933, -2.8942]) -tensor([-0.2683, -0.2612, -0.3171, -0.4225, -0.2999, -0.2923, -0.2384]) -tensor([-2.6628, -2.6334, -2.6661, -2.6183, -2.6589, -2.6293, -2.6268, -2.6729, - -2.6729, -2.6680, -2.6729]) -Total rewards: 2452 -Total Receives: 2444 -Stores 1688 1 -Total Receives: 2445 -Stores 1689 1 -Total Receives: 2446 -Stores 1690 1 -Total Receives: 2447 -Stores 1691 1 -Total Receives: 2448 -Stores 1692 1 -Total Receives: 2449 -Stores 1693 1 -Total Receives: 2450 -Stores 1694 1 -Total Receives: 2451 -Stores 1695 1 -Total Receives: 2452 -Stores 1696 1 -tensor([-0.3103, -0.3363, -0.3117, -0.3131]) -tensor([0.0331, 0.0340]) -tensor([0.0332, 0.0319]) -Total rewards: 2455 -Total Receives: 2453 -Stores 1697 1 -Total Receives: 2454 -Stores 1698 1 -Total Receives: 2455 -Stores 1699 1 -tensor([-0.3186, -0.2680, -0.2561, -0.2669, -0.2761, -0.1041, -0.1228, -0.1160]) -tensor([-0.1783, -0.1791, -0.1782, -0.1803, -0.1807]) -Total rewards: 2457 -Total Receives: 2456 -Total Receives: 2457 -tensor([-0.3349, -0.2958, -0.2830, -0.2914, -0.3007, -0.1527, -0.1624]) -tensor([-0.2081, -0.2667, -0.2205]) -tensor([-0.1015, -0.1395, -0.0992]) -tensor([-0.1082, -0.1116, -0.1029, -0.1058, -0.1094]) -tensor([-0.1625, -0.2241, -0.1351]) -tensor([0.0343, 0.0345]) -tensor([0.0347, 0.0339]) -tensor([-0.2034, -0.1889, -0.1714, -0.1370]) -tensor([-0.3417, -0.3424, -0.3458, -0.3446, -0.3437, -0.3454, -0.3455, -0.3451, - -0.3448]) -tensor([-2.8310, -2.8160, -2.8343, -2.8244, -2.7968, -2.7922, -2.8489, -2.8489, - -2.8365, -2.8489, -2.8429]) -tensor([-0.1242, -0.1186, -0.0892, -0.1372, -0.0995, -0.0879, -0.0910, -0.0882, - -0.0945]) -tensor([-0.1087, -0.1139, -0.0731, -0.0822]) -tensor([-0.1913, -0.1612]) -tensor([-0.3538, -0.3224, -0.3165, -0.3205, -0.2155, -0.1968]) -Total rewards: 2465 -Total Receives: 2458 -Stores 1700 1 -Total Receives: 2459 -Stores 1701 1 -Total Receives: 2460 -Stores 1702 1 -Total Receives: 2461 -Stores 1703 1 -Total Receives: 2462 -Stores 1704 1 -Total Receives: 2463 -Stores 1705 1 -Total Receives: 2464 -Stores 1706 1 -Total Receives: 2465 -Stores 1707 1 -tensor([-0.1005, -0.0976, -0.1088, -0.0801, -0.0450, -0.0668, -0.0492, -0.0742]) -tensor([-0.0485, -0.0951, -0.0340, -0.0299]) -tensor([0.0188, 0.0205, 0.0201, 0.0196]) -tensor([-0.8869, -0.8642, -0.8821, -0.8612, -0.9270, -0.8631, -0.8917, -0.8885]) -tensor([-0.1257, -0.1308, -0.1271, -0.1277, -0.1291, -0.1269]) -tensor([-0.4534, -0.4598, -0.4558, -0.5658, -0.4607, -0.4577, -0.4651]) -Total rewards: 2474 -Total Receives: 2466 -Stores 1708 1 -Total Receives: 2467 -Stores 1709 1 -Total Receives: 2468 -Total Receives: 2469 -Total Receives: 2470 -Total Receives: 2471 -Total Receives: 2472 -Total Receives: 2473 -Total Receives: 2474 -tensor([-0.1274, -0.1218, -0.1412, -0.0954, -0.0800, -0.0628, -0.0871]) -tensor([-0.1861, -0.1408, -0.1721, -0.1394, -0.1533]) -Total rewards: 2476 -Total Receives: 2475 -Total Receives: 2476 -Total rewards: 2482 -Total Receives: 2477 -Stores 1710 1 -Total Receives: 2478 -Stores 1711 1 -Total Receives: 2479 -Stores 1712 1 -Total Receives: 2480 -Stores 1713 1 -Total Receives: 2481 -Stores 1714 1 -Total Receives: 2482 -Stores 1715 1 -tensor([0.0348, 0.0369, 0.0359]) -tensor([-3.0656, -3.0508, -3.0692, -3.0609, -3.0314, -3.0231, -3.0951, -3.0951, - -3.0899, -3.0714, -3.0694]) -tensor([-0.1022, -0.0708, -0.0811, -0.0636, -0.0605, -0.0912]) -Total rewards: 2484 -Total Receives: 2483 -Stores 1716 1 -Total Receives: 2484 -Stores 1717 1 -tensor([-0.1199, -0.1157, -0.1293, -0.0925, -0.0766, -0.0848, -0.0447]) -tensor([-0.4901, -0.4893, -0.4912, -0.5939, -0.4877, -0.4917, -0.5007]) -tensor([-0.3007, -0.2693, -0.2788, -0.2743, -0.2503]) -tensor([0.0403, 0.0399]) -Total rewards: 2488 -Total Receives: 2485 -Stores 1718 1 -Total Receives: 2486 -Stores 1719 1 -Total Receives: 2487 -Stores 1720 1 -Total Receives: 2488 -Stores 1721 1 -tensor([0.0345, 0.0339, 0.0314, 0.0273]) -tensor([-0.5337, -0.5180, -0.5512]) -tensor([-0.3040, -0.2713, -0.2822, -0.2761]) -tensor([-0.0959, -0.0941, -0.0720]) -tensor([-2.1834, -2.1706, -2.1873, -2.1799, -2.1538, -2.2141, -2.2169, -2.2169, - -2.2072, -2.1891, -2.1845]) -Total rewards: 2492 -Total Receives: 2489 -Stores 1722 1 -Total Receives: 2490 -Stores 1723 1 -Total Receives: 2491 -Stores 1724 1 -Total Receives: 2492 -Stores 1725 1 -tensor([-0.5930, -0.5748]) -tensor([0.0383, 0.0308]) -tensor([-0.1840, -0.1664, -0.1302, -0.1447, -0.1181]) -tensor([-1.2645, -1.2517, -1.2686, -1.2614, -1.2360, -1.2970, -1.3019, -1.2897, - -1.2711, -1.2659]) -tensor([-0.0561, -0.0597]) -tensor([-0.0650, -0.0989, -0.0154, -0.0528, -0.0311, -0.0239]) -tensor([0.0247, 0.0247, 0.0221]) -Total rewards: 2498 -Total Receives: 2493 -Stores 1726 1 -Total Receives: 2494 -Stores 1727 1 -Total Receives: 2495 -Stores 1728 1 -Total Receives: 2496 -Stores 1729 1 -Total Receives: 2497 -Stores 1730 1 -Total Receives: 2498 -Stores 1731 1 -tensor([-0.1428, -0.1123, -0.1224, -0.1052, -0.1326]) -tensor([0.0209, 0.0200]) -Total rewards: 2506 -Total Receives: 2499 -Total Receives: 2500 -Total Receives: 2501 -Total Receives: 2502 -Total Receives: 2503 -Total Receives: 2504 -Total Receives: 2505 -Total Receives: 2506 -tensor([-0.4053, -0.4180, -0.4213, -0.4237]) -tensor([0.0168, 0.0173]) -tensor([-0.1052, -0.1296, -0.0934, -0.0770, -0.0648]) -tensor([-1.3734, -1.3228, -1.3543, -1.3688, -1.3547, -1.3643, -1.3679]) -Total rewards: 2514 -Total Receives: 2507 -Total Receives: 2508 -Total Receives: 2509 -Total Receives: 2510 -Total Receives: 2511 -Total Receives: 2512 -Total Receives: 2513 -Total Receives: 2514 -tensor([-0.3514, -0.3501]) -tensor([0.0355, 0.0320]) -tensor([-0.0853, -0.0780, -0.0665, -0.0529]) -Total rewards: 2515 -Total Receives: 2515 -tensor([-0.2353, -0.2492, -0.1813, -0.1950, -0.1817, -0.2030, -0.1880]) -tensor([-0.1585, -0.1438, -0.0709, -0.1224, -0.0908]) -Total rewards: 2525 -Total Receives: 2516 -Total Receives: 2517 -Total Receives: 2518 -Total Receives: 2519 -Total Receives: 2520 -Total Receives: 2521 -Total Receives: 2522 -Total Receives: 2523 -Total Receives: 2524 -Total Receives: 2525 -tensor([-0.4862, -0.4904, -0.4924]) -tensor([-0.0133, -0.0230]) -tensor([-0.1768, -0.1597, -0.1419, -0.1178]) -tensor([0.0290, 0.0384, 0.0271]) -tensor([-1.2349, -1.2762, -1.2235, -1.2393, -1.2333, -1.2657, -1.2835, -1.2588, - -1.2417, -1.2301]) -tensor([-0.3222, -0.3222, -0.3234, -0.3276, -0.3239, -0.3223, -0.3232]) -tensor([0.0072, 0.0114, 0.0072, 0.0085]) -tensor([-0.1736, -0.1826, -0.1440, -0.0959, -0.1524, -0.1159]) -tensor([-0.1531, -0.1489]) -tensor([-0.0344, 0.0171, 0.0170, -0.0228, -0.0079]) -Total rewards: 2528 -Total Receives: 2526 -Total Receives: 2527 -Total Receives: 2528 -tensor([0.0286, 0.0231]) -tensor([0.0328, 0.0318]) -tensor([-0.0040, -0.0028, -0.0020]) -tensor([-0.1653, -0.1770, -0.1181, -0.1281, -0.0581, -0.0885]) -tensor([-0.0321, -0.0303, -0.0281, -0.0387]) -Total rewards: 2533 -Total Receives: 2529 -Stores 1732 1 -Total Receives: 2530 -Stores 1733 1 -Total Receives: 2531 -Stores 1734 1 -Total Receives: 2532 -Stores 1735 1 -Total Receives: 2533 -Stores 1736 1 -tensor([-0.6524, -0.6571]) -tensor([-0.8227, -0.8413, -0.8253, -0.8265, -0.8370, -0.8509, -0.8346, -0.8277, - -0.8064]) -tensor([-0.0108, -0.0119, -0.0144]) -tensor([-1.2097, -1.2325, -1.1866, -1.2045, -1.1594, -1.2165, -1.2308, -1.1978, - -1.2042, -1.2347]) -Total rewards: 2542 -Total Receives: 2534 -Stores 1737 1 -Total Receives: 2535 -Stores 1738 1 -Total Receives: 2536 -Stores 1739 1 -Total Receives: 2537 -Stores 1740 1 -Total Receives: 2538 -Stores 1741 1 -Total Receives: 2539 -Stores 1742 1 -Total Receives: 2540 -Stores 1743 1 -Total Receives: 2541 -Stores 1744 1 -Total Receives: 2542 -Stores 1745 1 -tensor([-0.5095, -0.5208, -0.5060, -0.5084, -0.5099, -0.5209, -0.5078, -0.5239, - -0.5237]) -tensor([-0.0174, -0.0286]) -tensor([0.0394, 0.0392]) -Total rewards: 2547 -Total Receives: 2543 -Stores 1746 1 -Total Receives: 2544 -Stores 1747 1 -Total Receives: 2545 -Stores 1748 1 -Total Receives: 2546 -Stores 1749 1 -Total Receives: 2547 -Stores 1750 1 -Total rewards: 2556 -Total Receives: 2548 -Total Receives: 2549 -Total Receives: 2550 -Total Receives: 2551 -Total Receives: 2552 -Total Receives: 2553 -Total Receives: 2554 -Total Receives: 2555 -Total Receives: 2556 -tensor([-0.0367, -0.0262, -0.0226, -0.0254]) -tensor([-0.1921, -0.1244, -0.1836, -0.1484]) -Total rewards: 2564 -Total Receives: 2557 -Total Receives: 2558 -Total Receives: 2559 -Total Receives: 2560 -Total Receives: 2561 -Total Receives: 2562 -Total Receives: 2563 -Total Receives: 2564 -tensor([-1.4359, -1.4566, -1.4967, -1.5072, -1.4698, -1.4467, -1.5067, -1.5055]) -tensor([-0.1022, -0.0454, -0.0889, -0.0713]) -tensor([0.0387, 0.0371]) -tensor([0.0341, 0.0320, 0.0324, 0.0327]) -tensor([0.0192, 0.0182, 0.0135, 0.0186]) -tensor([-0.0971, -0.0825, -0.0840]) -tensor([-1.0181, -1.0517, -1.0567, -1.0308, -1.0081, -1.0597, -1.0575]) -tensor([0.0320, 0.0329]) -tensor([-0.1097, -0.1027, -0.0983]) -Total rewards: 2573 -Total Receives: 2565 -Total Receives: 2566 -Total Receives: 2567 -Total Receives: 2568 -Total Receives: 2569 -Total Receives: 2570 -Total Receives: 2571 -Total Receives: 2572 -Total Receives: 2573 -tensor([-0.1550, -0.1547, -0.1441]) -tensor([-0.1975, -0.2058, -0.1587, -0.1673, -0.1315, -0.0974]) -Total rewards: 2575 -Total Receives: 2574 -Stores 1751 1 -Total Receives: 2575 -Stores 1752 1 -tensor([-1.5195, -1.5510, -1.5236, -1.5194, -1.5445, -1.5722, -1.5398, -1.5257, - -1.5725]) -tensor([-0.1438, -0.1320]) -tensor([-0.6598, -0.6675, -0.6612, -0.6740, -0.6538, -0.6686, -0.6638, -0.6852, - -0.6757]) -tensor([-0.1232, -0.1149]) -Total rewards: 2584 -Total Receives: 2576 -Total Receives: 2577 -Total Receives: 2578 -Total Receives: 2579 -Total Receives: 2580 -Total Receives: 2581 -Total Receives: 2582 -Total Receives: 2583 -Total Receives: 2584 -tensor([-1.0083, -1.0148, -1.0109, -1.0159, -1.0242, -1.0155, -1.0132, -1.0241]) -tensor([0.0286, 0.0300]) -tensor([-0.0002, 0.0105, -0.0383, 0.0105]) -Total rewards: 2588 -Total Receives: 2585 -Stores 1753 1 -Total Receives: 2586 -Stores 1754 1 -Total Receives: 2587 -Stores 1755 1 -Total Receives: 2588 -Stores 1756 1 -tensor([0.0144, 0.0081, 0.0159, 0.0171]) -tensor([-0.0383, -0.0296, -0.0285, -0.0148]) -tensor([-1.9113, -1.9420, -1.9477, -1.9217, -1.9017, -1.9632]) -tensor([-0.1662, -0.1664]) -tensor([-0.0090, -0.0109, -0.0103]) -tensor([-0.0400, -0.0284, -0.0712]) -tensor([0.0086, 0.0110]) -tensor([0.0406, 0.0402, 0.0400]) -tensor([-0.2136, -0.2217, -0.1747, -0.1826, -0.1493]) -tensor([-0.1002, -0.0367, -0.0893, -0.0435, -0.0334]) -tensor([-0.9152, -0.9181, -0.9175, -0.9192, -0.9343, -0.9199, -0.9405, -0.9295]) -tensor([0.0355, 0.0358, 0.0351]) -tensor([-0.1036, -0.1351]) -tensor([-0.8441, -0.8263, -0.8448]) -tensor([0.0531, 0.0526]) -tensor([-0.2337, -0.2431, -0.2091, -0.2143]) -Total rewards: 2597 -Total Receives: 2589 -Total Receives: 2590 -Total Receives: 2591 -Total Receives: 2592 -Total Receives: 2593 -Total Receives: 2594 -Total Receives: 2595 -Total Receives: 2596 -Total Receives: 2597 -tensor([-0.0404, -0.0790]) -Total rewards: 2606 -Total Receives: 2598 -Total Receives: 2599 -Total Receives: 2600 -Total Receives: 2601 -Total Receives: 2602 -Total Receives: 2603 -Total Receives: 2604 -Total Receives: 2605 -Total Receives: 2606 -tensor([-0.4685, -0.4689, -0.4698, -0.4705, -0.4703, -0.4717, -0.4971, -0.4818, - -0.4726]) -tensor([-0.1016, -0.0898, -0.0931]) -tensor([0.0255, 0.0207]) -tensor([0.0139, 0.0097]) -tensor([0.0386, 0.0380]) -tensor([-0.2353, -0.2562, -0.2347, -0.2388, -0.2693, -0.3119, -0.2505, -0.2348]) -tensor([-0.1117, -0.1516]) -tensor([0.0295, 0.0291]) -tensor([-0.4134, -0.4144, -0.4015, -0.4144, -0.4191, -0.4189, -0.4153, -0.4221, - -0.4165, -0.4190]) -tensor([0.0416, 0.0412]) -tensor([-2.9885, -2.9805, -2.9249, -2.9589, -2.9368, -2.9102, -2.9906]) -Total rewards: 2610 -Total Receives: 2607 -Stores 1757 1 -Total Receives: 2608 -Stores 1758 1 -Total Receives: 2609 -Stores 1759 1 -Total Receives: 2610 -Stores 1760 1 -tensor([-0.0089, -0.0185]) -tensor([0.0483, 0.0342]) -tensor([-0.0899, -0.0269, -0.0782, -0.0075, -0.0305]) -tensor([0.0096, 0.0074, 0.0083, 0.0074, 0.0086]) -Total rewards: 2615 -Total Receives: 2611 -Stores 1761 1 -Total Receives: 2612 -Stores 1762 1 -Total Receives: 2613 -Stores 1763 1 -Total Receives: 2614 -Stores 1764 1 -Total Receives: 2615 -Stores 1765 1 -tensor([0.0182, 0.0196]) -Total rewards: 2624 -Total Receives: 2616 -Total Receives: 2617 -Total Receives: 2618 -Total Receives: 2619 -Total Receives: 2620 -Total Receives: 2621 -Total Receives: 2622 -Total Receives: 2623 -Total Receives: 2624 -tensor([-0.1762, -0.2018, -0.1808, -0.2101, -0.2416, -0.1911, -0.1568]) -tensor([-0.2062, -0.2168, -0.1775, -0.0678]) -tensor([-0.9201, -0.9134, -0.9140, -0.9259]) -tensor([0.0411, 0.0399]) -tensor([-0.0218, -0.0229, -0.0181]) -Total rewards: 2632 -Total Receives: 2625 -Total Receives: 2626 -Total Receives: 2627 -Total Receives: 2628 -Total Receives: 2629 -Total Receives: 2630 -Total Receives: 2631 -Total Receives: 2632 -tensor([-0.2383, -0.2856, -0.2562, -0.2068, -0.2036, -0.2865, -0.2140, -0.2202, - -0.2055]) -tensor([-0.1109, -0.0662, -0.1037, -0.0707]) -Total rewards: 2636 -Total Receives: 2633 -Stores 1766 1 -Total Receives: 2634 -Stores 1767 1 -Total Receives: 2635 -Stores 1768 1 -Total Receives: 2636 -Stores 1769 1 -tensor([-0.0993, -0.1301, -0.0561, -0.1431, -0.1740, -0.1119, -0.0811]) -tensor([-2.1418, -2.1313, -2.1476, -2.0702, -2.1476, -2.1528, -2.1202, -2.0810, - -2.1517]) -Total rewards: 2643 -Total Receives: 2637 -Stores 1770 1 -Total Receives: 2638 -Stores 1771 1 -Total Receives: 2639 -Stores 1772 1 -Total Receives: 2640 -Stores 1773 1 -Total Receives: 2641 -Stores 1774 1 -Total Receives: 2642 -Stores 1775 1 -Total Receives: 2643 -Stores 1776 1 -tensor([0.0178, 0.0190, 0.0172, 0.0184]) -tensor([-0.0673, -0.0641, -0.0639]) -tensor([-0.1403, -0.1316]) -tensor([0.0246, 0.0263]) -tensor([-0.0437, -0.0350, -0.0407]) -tensor([-0.4325, -0.4343, -0.4374, -0.4390, -0.4362, -0.4430, -0.4374, -0.4389]) -tensor([0.0329, 0.0318, 0.0313]) -tensor([-0.0013, 0.0002]) -tensor([0.0243, 0.0235]) -tensor([-0.1767, -0.0981, -0.1882, -0.2187, -0.1565, -0.1209]) -tensor([-0.0097, -0.0084]) -tensor([0.0377, 0.0363]) -tensor([0.0330, 0.0309]) -tensor([0.0400, 0.0383]) -tensor([-0.1953, -0.2049, -0.2316, -0.1771, -0.1467]) -tensor([-0.2355, -0.2449, -0.2045, -0.1116, -0.1005]) -Total rewards: 2652 -Total Receives: 2644 -Total Receives: 2645 -Total Receives: 2646 -Total Receives: 2647 -Total Receives: 2648 -Total Receives: 2649 -Total Receives: 2650 -Total Receives: 2651 -Total Receives: 2652 -tensor([0.0163, 0.0144]) -tensor([-0.3279, -0.3003, -0.2622, -0.3302, -0.2668, -0.2675, -0.2606]) -Total rewards: 2658 -Total Receives: 2653 -Total Receives: 2654 -Total Receives: 2655 -Total Receives: 2656 -Total Receives: 2657 -Total Receives: 2658 -tensor([-1.7257, -1.7112, -1.7416, -1.7319, -1.6522, -1.7302, -1.7436, -1.7394, - -1.7036, -1.6660, -1.7427, -1.7468]) -tensor([-1.7451, -1.7532, -1.7617, -1.7337, -1.7583, -1.7576, -1.7529, -1.7624]) -Total rewards: 2662 -Total Receives: 2659 -Stores 1777 1 -Total Receives: 2660 -Stores 1778 1 -Total Receives: 2661 -Stores 1779 1 -Total Receives: 2662 -Stores 1780 1 -tensor([-0.0454, -0.0322, -0.0255]) -tensor([-0.0613, -0.0592, -0.0568, -0.0491, -0.0521]) -tensor([-1.3526, -1.3599, -1.3668, -1.3636, -1.3647, -1.3605, -1.3710]) -tensor([-0.9339, -0.9390, -0.9373, -0.9393, -0.9369, -0.9428]) -tensor([-0.0513, -0.0480, -0.0387]) -Total rewards: 2665 -Total Receives: 2663 -Stores 1781 1 -Total Receives: 2664 -Stores 1782 1 -Total Receives: 2665 -Stores 1783 1 -tensor([0.0355, 0.0404]) -tensor([-0.0042, -0.0024]) -tensor([-1.6469, -1.6294, -1.6615, -1.6529, -1.6516, -1.6627, -1.6590, -1.6239, - -1.5828, -1.6671, -1.6610, -1.6773, -1.6773]) -tensor([-0.2352, -0.2444, -0.2787, -0.2327]) -tensor([-0.0074, -0.0088, -0.0077, -0.0062]) -tensor([0.0109, 0.0087, 0.0132]) -tensor([-0.2437, -0.1050, -0.2531, -0.2120, -0.1175]) -tensor([0.0349, 0.0333]) -Total rewards: 2673 -Total Receives: 2666 -Total Receives: 2667 -Total Receives: 2668 -Total Receives: 2669 -Total Receives: 2670 -Total Receives: 2671 -Total Receives: 2672 -Total Receives: 2673 -tensor([-0.2291, -0.2295, -0.2294]) -tensor([-0.2330, -0.2151, -0.1668, -0.2346, -0.1848, -0.1582, -0.1337]) -tensor([0.0187, 0.0187]) -Total rewards: 2680 -Total Receives: 2674 -Stores 1784 1 -Total Receives: 2675 -Stores 1785 1 -Total Receives: 2676 -Stores 1786 1 -Total Receives: 2677 -Stores 1787 1 -Total Receives: 2678 -Stores 1788 1 -Total Receives: 2679 -Stores 1789 1 -Total Receives: 2680 -Stores 1790 1 -tensor([-0.0115, -0.0046, -0.0063, -0.0077, -0.0072]) -tensor([-0.1330, -0.1290, -0.1282, -0.1232, -0.0933]) -tensor([-2.7767, -2.7618, -2.7851, -2.7810, -2.7813, -2.7875, -2.7866, -2.7642, - -2.7894, -2.7865, -2.7987, -2.7987, -2.7987, -2.7987]) -tensor([-0.3027, -0.3145, -0.2626, -0.1490]) -tensor([0.0187, 0.0182, 0.0204]) -tensor([-0.3559, -0.3584, -0.3616, -0.3506, -0.3634, -0.3598, -0.3638, -0.3584]) -tensor([0.0284, 0.0266]) -Total rewards: 2682 -Total Receives: 2681 -Stores 1791 1 -Total Receives: 2682 -Stores 1792 1 -tensor([0.0093, 0.0153, 0.0121, 0.0103]) -tensor([-0.3099, -0.3162, -0.2831]) -tensor([-0.1291, -0.2182, -0.1284, -0.2249, -0.2615, -0.1409]) -tensor([-2.7436, -2.7558, -2.7494, -2.7492, -2.7585, -2.7569, -2.7256, -2.7616, - -2.7568, -2.7815, -2.7815, -2.7815, -2.7815]) -tensor([-0.0858, -0.1042, -0.0608, -0.1015]) -tensor([-1.9137, -1.9280, -1.9202, -1.9191, -1.9305, -1.9279, -1.9341, -1.9284, - -1.9583, -1.9583, -1.9583, -1.9583]) -tensor([-0.5688, -0.5563, -0.5655, -0.5719, -0.5699]) -tensor([-0.2823, -0.2929]) -tensor([-0.3696, -0.3419, -0.2929, -0.3716, -0.3035, -0.2818, -0.2821]) -tensor([0.0262, 0.0258]) -Total rewards: 2690 -Total Receives: 2683 -Stores 1793 1 -Total Receives: 2684 -Stores 1794 1 -Total Receives: 2685 -Stores 1795 1 -Total Receives: 2686 -Stores 1796 1 -Total Receives: 2687 -Stores 1797 1 -Total Receives: 2688 -Stores 1798 1 -Total Receives: 2689 -Stores 1799 1 -Total Receives: 2690 -Stores 1800 1 -tensor([-0.1501, -0.2416, -0.2495, -0.2869, -0.1628]) -tensor([-0.1824, -0.1768, -0.1771, -0.1727]) -tensor([-0.0014, -0.0041, -0.0020, 0.0005]) -tensor([-0.0824, -0.1026, -0.0976]) -tensor([0.0337, 0.0303, 0.0281]) -tensor([-0.2729, -0.2557, -0.2078, -0.2743, -0.2280, -0.1529]) -tensor([-0.4952, -0.4942, -0.4917, -0.4990, -0.5005, -0.4954]) -tensor([-0.2695, -0.2413, -0.1810, -0.2711, -0.1183]) -tensor([0.0148, 0.0148]) -tensor([-0.1470, -0.1411, -0.0982, -0.1420, -0.0931]) -tensor([-0.4432, -0.4459, -0.4493, -0.4520, -0.4439, -0.4523, -0.4464]) -tensor([-0.5846, -0.5682, -0.5871, -0.6314, -0.5997]) -Total rewards: 2692 -Total Receives: 2691 -Stores 1801 1 -Total Receives: 2692 -Stores 1802 1 -tensor([-0.2686, -0.2695, -0.2702, -0.2681, -0.2725, -0.2754]) -tensor([-0.1065, -0.0859]) -tensor([-0.4202, -0.4200, -0.4184, -0.4121]) -tensor([-0.1811, -0.1747, -0.1972, -0.1827, -0.2070]) -tensor([-0.3189, -0.3215, -0.3223]) -Total rewards: 2702 -Total Receives: 2693 -Stores 1803 1 -Total Receives: 2694 -Stores 1804 1 -Total Receives: 2695 -Stores 1805 1 -Total Receives: 2696 -Stores 1806 1 -Total Receives: 2697 -Stores 1807 1 -Total Receives: 2698 -Stores 1808 1 -Total Receives: 2699 -Stores 1809 1 -Total Receives: 2700 -Stores 1810 1 -Total Receives: 2701 -Stores 1811 1 -Total Receives: 2702 -Stores 1812 1 -tensor([-5.6262, -5.6207, -5.6692, -5.6204, -5.6295, -5.6588, -5.6271, -5.6214, - -5.6271, -5.6674, -5.6674, -5.6674, -5.6674, -5.6674]) -tensor([-0.0628, -0.0689]) -tensor([0.0297, 0.0264]) -tensor([0.0188, 0.0166, 0.0164]) -tensor([-0.2868, -0.2578, -0.1970, -0.2885]) -tensor([-0.1696, -0.1721, -0.1214, -0.1660, -0.1033]) -tensor([-0.3434, -0.3401, -0.3477, -0.3444, -0.3466, -0.3422, -0.3456]) -tensor([0.0213, 0.0202, 0.0181]) -tensor([-0.0758, -0.0808, -0.0950, -0.0789, -0.0605]) -tensor([-0.3226, -0.3208, -0.3237, -0.3226, -0.3271]) -tensor([0.0287, 0.0283]) -tensor([-0.4006, -0.4044, -0.4032, -0.4052, -0.4019, -0.4037]) -tensor([-0.0981, -0.0935, -0.0401, -0.0490, -0.0478]) -tensor([-1.1841, -1.1875, -1.2353, -1.2305]) -tensor([-0.3009, -0.2809, -0.3014]) -tensor([-0.1509, -0.1519, -0.1124, -0.1528, -0.0931]) -tensor([-0.0742, -0.0731, -0.0633, -0.0628]) -tensor([0.0189, 0.0176, 0.0140]) -tensor([-1.1395, -1.1399, -1.1092, -1.1532]) -tensor([0.0296, 0.0304]) -tensor([-0.1586, -0.1162, -0.1810, -0.1591, -0.1876]) -tensor([-0.1832, -0.1851, -0.1861, -0.1850, -0.1912, -0.1864, -0.1828]) -tensor([-7.6028, -7.6110, -7.5533, -7.5391, -7.6025, -7.5448, -7.5570, -7.5901, - -7.5543, -7.5609, -7.6177, -7.6177, -7.6177, -7.6177, -7.6177]) -tensor([-0.5141, -0.5143, -0.5200, -0.5198, -0.5161]) -tensor([0.0158, 0.0136]) -Total Receives: 1471 -Stores 1041 1 -Total Receives: 1472 -Stores 1042 1 -Total Receives: 1473 -Stores 1043 1 -Total Receives: 1474 -Stores 1044 1 -Total Receives: 1475 -Stores 1045 1 -Total Receives: 1476 -Stores 1046 1 -Total Receives: 1477 -Stores 1047 1 -Total rewards: 1483 -Total Receives: 1478 -Stores 1048 1 -Total Receives: 1479 -Stores 1049 1 -Total Receives: 1480 -Stores 1050 1 -Total Receives: 1481 -Stores 1051 1 -Total Receives: 1482 -Stores 1052 1 -Total Receives: 1483 -Stores 1053 1 -Total rewards: 1485 -Total Receives: 1484 -Stores 1054 1 -Total Receives: 1485 -Stores 1055 1 -Total rewards: 1489 -Total Receives: 1486 -Stores 1056 1 -Total Receives: 1487 -Stores 1057 1 -Total Receives: 1488 -Stores 1058 1 -Total Receives: 1489 -Stores 1059 1 -Total rewards: 1498 -Total Receives: 1490 -Stores 1060 1 -Total Receives: 1491 -Stores 1061 1 -Total Receives: 1492 -Stores 1062 1 -Total Receives: 1493 -Stores 1063 1 -Total Receives: 1494 -Stores 1064 1 -Total Receives: 1495 -Stores 1065 1 -Total Receives: 1496 -Stores 1066 1 -Total Receives: 1497 -Stores 1067 1 -Total Receives: 1498 -Stores 1068 1 -Total rewards: 1504 -Total Receives: 1499 -Stores 1069 1 -Total Receives: 1500 -Stores 1070 1 -Total Receives: 1501 -Stores 1071 1 -Total Receives: 1502 -Stores 1072 1 -Total Receives: 1503 -Stores 1073 1 -Total Receives: 1504 -Stores 1074 1 -Total rewards: 1507 -Total Receives: 1505 -Stores 1075 1 -Total Receives: 1506 -Stores 1076 1 -Total Receives: 1507 -Stores 1077 1 -Total rewards: 1516 -Total Receives: 1508 -Stores 1078 1 -Total Receives: 1509 -Stores 1079 1 -Total Receives: 1510 -Stores 1080 1 -Total Receives: 1511 -Stores 1081 1 -Total Receives: 1512 -Stores 1082 1 -Total Receives: 1513 -Stores 1083 1 -Total Receives: 1514 -Total Receives: 1515 -Total Receives: 1516 -Total rewards: 1525 -Total Receives: 1517 -Total Receives: 1518 -Total Receives: 1519 -Total Receives: 1520 -Total Receives: 1521 -Total Receives: 1522 -Total Receives: 1523 -Total Receives: 1524 -Total Receives: 1525 -Total rewards: 1527 -Total Receives: 1526 -Stores 1084 1 -Total Receives: 1527 -Stores 1085 1 -Total rewards: 1533 -Total Receives: 1528 -Total Receives: 1529 -Total Receives: 1530 -Total Receives: 1531 -Total Receives: 1532 -Total Receives: 1533 -Total rewards: 1536 -Total Receives: 1534 -Stores 1086 1 -Total Receives: 1535 -Stores 1087 1 -Total Receives: 1536 -Stores 1088 1 -Total rewards: 1543 -Total Receives: 1537 -Total Receives: 1538 -Total Receives: 1539 -Total Receives: 1540 -Total Receives: 1541 -Total Receives: 1542 -Total Receives: 1543 -Total rewards: 1552 -Total Receives: 1544 -Stores 1089 1 -Total Receives: 1545 -Stores 1090 1 -Total Receives: 1546 -Stores 1091 1 -Total Receives: 1547 -Stores 1092 1 -Total Receives: 1548 -Stores 1093 1 -Total Receives: 1549 -Stores 1094 1 -Total Receives: 1550 -Stores 1095 1 -Total Receives: 1551 -Stores 1096 1 -Total Receives: 1552 -Stores 1097 1 -Total rewards: 1556 -Total Receives: 1553 -Total Receives: 1554 -Total Receives: 1555 -Total Receives: 1556 -Total rewards: 1560 -Total Receives: 1557 -Stores 1098 1 -Total Receives: 1558 -Stores 1099 1 -Total Receives: 1559 -Stores 1100 1 -Total Receives: 1560 -Stores 1101 1 -Total rewards: 1563 -Total Receives: 1561 -Stores 1102 1 -Total Receives: 1562 -Stores 1103 1 -Total Receives: 1563 -Stores 1104 1 -Total rewards: 1569 -Total Receives: 1564 -Total Receives: 1565 -Total Receives: 1566 -Total Receives: 1567 -Total Receives: 1568 -Total Receives: 1569 -Total rewards: 1577 -Total Receives: 1570 -Stores 1105 1 -Total Receives: 1571 -Stores 1106 1 -Total Receives: 1572 -Stores 1107 1 -Total Receives: 1573 -Stores 1108 1 -Total Receives: 1574 -Stores 1109 1 -Total Receives: 1575 -Stores 1110 1 -Total Receives: 1576 -Stores 1111 1 -Total Receives: 1577 -Total rewards: 1586 -Total Receives: 1578 -Total Receives: 1579 -Total Receives: 1580 -Total Receives: 1581 -Total Receives: 1582 -Total Receives: 1583 -Total Receives: 1584 -Total Receives: 1585 -Total Receives: 1586 -Total rewards: 1594 -Total Receives: 1587 -Total Receives: 1588 -Total Receives: 1589 -Total Receives: 1590 -Total Receives: 1591 -Total Receives: 1592 -Total Receives: 1593 -Total Receives: 1594 -Total rewards: 1602 -Total Receives: 1595 -Total Receives: 1596 -Total Receives: 1597 -Total Receives: 1598 -Total Receives: 1599 -Total Receives: 1600 -Total Receives: 1601 -Total Receives: 1602 -Total rewards: 1606 -Total Receives: 1603 -Stores 1112 1 -Total Receives: 1604 -Stores 1113 1 -Total Receives: 1605 -Stores 1114 1 -Total Receives: 1606 -Stores 1115 1 -Total rewards: 1609 -Total Receives: 1607 -Stores 1116 1 -Total Receives: 1608 -Stores 1117 1 -Total Receives: 1609 -Stores 1118 1 -Total rewards: 1615 -Total Receives: 1610 -Total Receives: 1611 -Total Receives: 1612 -Total Receives: 1613 -Total Receives: 1614 -Total Receives: 1615 -Total rewards: 1619 -Total Receives: 1616 -Stores 1119 1 -Total Receives: 1617 -Stores 1120 1 -Total Receives: 1618 -Stores 1121 1 -Total Receives: 1619 -Stores 1122 1 -Total rewards: 1627 -Total Receives: 1620 -Total Receives: 1621 -Total Receives: 1622 -Total Receives: 1623 -Total Receives: 1624 -Total Receives: 1625 -Total Receives: 1626 -Total Receives: 1627 -Total rewards: 1637 -Total Receives: 1628 -Total Receives: 1629 -Total Receives: 1630 -Total Receives: 1631 -Total Receives: 1632 -Total Receives: 1633 -Total Receives: 1634 -Total Receives: 1635 -Total Receives: 1636 -Total Receives: 1637 -Total rewards: 1646 -Total Receives: 1638 -Total Receives: 1639 -Total Receives: 1640 -Total Receives: 1641 -Total Receives: 1642 -Total Receives: 1643 -Total Receives: 1644 -Total Receives: 1645 -Total Receives: 1646 -Total rewards: 1654 -Total Receives: 1647 -Total Receives: 1648 -Total Receives: 1649 -Total Receives: 1650 -Total Receives: 1651 -Total Receives: 1652 -Total Receives: 1653 -Total Receives: 1654 -Total rewards: 1661 -Total Receives: 1655 -Total Receives: 1656 -Total Receives: 1657 -Total Receives: 1658 -Total Receives: 1659 -Total Receives: 1660 -Total Receives: 1661 -Total rewards: 1671 -Total Receives: 1662 -Stores 1123 1 -Total Receives: 1663 -Stores 1124 1 -Total Receives: 1664 -Stores 1125 1 -Total Receives: 1665 -Stores 1126 1 -Total Receives: 1666 -Stores 1127 1 -Total Receives: 1667 -Stores 1128 1 -Total Receives: 1668 -Stores 1129 1 -Total Receives: 1669 -Stores 1130 1 -Total Receives: 1670 -Stores 1131 1 -Total Receives: 1671 -Stores 1132 1 -Total rewards: 1677 -Total Receives: 1672 -Stores 1133 1 -Total Receives: 1673 -Stores 1134 1 -Total Receives: 1674 -Stores 1135 1 -Total Receives: 1675 -Stores 1136 1 -Total Receives: 1676 -Stores 1137 1 -Total Receives: 1677 -Stores 1138 1 -Total rewards: 1684 -Total Receives: 1678 -Stores 1139 1 -Total Receives: 1679 -Stores 1140 1 -Total Receives: 1680 -Stores 1141 1 -Total Receives: 1681 -Stores 1142 1 -Total Receives: 1682 -Stores 1143 1 -Total Receives: 1683 -Stores 1144 1 -Total Receives: 1684 -Stores 1145 1 -Total rewards: 1687 -Total Receives: 1685 -Stores 1146 1 -Total Receives: 1686 -Stores 1147 1 -Total Receives: 1687 -Stores 1148 1 -Total rewards: 1694 -Total Receives: 1688 -Stores 1149 1 -Total Receives: 1689 -Stores 1150 1 -Total Receives: 1690 -Stores 1151 1 -Total Receives: 1691 -Stores 1152 1 -Total Receives: 1692 -Stores 1153 1 -Total Receives: 1693 -Stores 1154 1 -Total Receives: 1694 -Stores 1155 1 -Total rewards: 1703 -Total Receives: 1695 -Stores 1156 1 -Total Receives: 1696 -Stores 1157 1 -Total Receives: 1697 -Stores 1158 1 -Total Receives: 1698 -Stores 1159 1 -Total Receives: 1699 -Stores 1160 1 -Total Receives: 1700 -Stores 1161 1 -Total Receives: 1701 -Stores 1162 1 -Total Receives: 1702 -Stores 1163 1 -Total Receives: 1703 -Stores 1164 1 -Total rewards: 1706 -Total Receives: 1704 -Stores 1165 1 -Total Receives: 1705 -Stores 1166 1 -Total Receives: 1706 -Stores 1167 1 -Total rewards: 1713 -Total Receives: 1707 -Stores 1168 1 -Total Receives: 1708 -Stores 1169 1 -Total Receives: 1709 -Stores 1170 1 -Total Receives: 1710 -Stores 1171 1 -Total Receives: 1711 -Stores 1172 1 -Total Receives: 1712 -Stores 1173 1 -Total Receives: 1713 -Stores 1174 1 -Total rewards: 1719 -Total Receives: 1714 -Stores 1175 1 -Total Receives: 1715 -Stores 1176 1 -Total Receives: 1716 -Stores 1177 1 -Total Receives: 1717 -Stores 1178 1 -Total Receives: 1718 -Stores 1179 1 -Total Receives: 1719 -Stores 1180 1 -Total rewards: 1728 -Total Receives: 1720 -Stores 1181 1 -Total Receives: 1721 -Stores 1182 1 -Total Receives: 1722 -Stores 1183 1 -Total Receives: 1723 -Stores 1184 1 -Total Receives: 1724 -Stores 1185 1 -Total Receives: 1725 -tensor([-0.2888, -0.2896]) -tensor([0.0013, 0.0031, 0.0054]) -Total rewards: 2708 -Total Receives: 2703 -Stores 1813 1 -Total Receives: 2704 -Stores 1814 1 -Total Receives: 2705 -Stores 1815 1 -Total Receives: 2706 -Stores 1816 1 -Total Receives: 2707 -Stores 1817 1 -Total Receives: 2708 -Stores 1818 1 -tensor([0.0313, 0.0306]) -Total rewards: 2714 -Total Receives: 2709 -Stores 1819 1 -Total Receives: 2710 -Stores 1820 1 -Total Receives: 2711 -Stores 1821 1 -Total Receives: 2712 -Stores 1822 1 -Total Receives: 2713 -Stores 1823 1 -Total Receives: 2714 -Stores 1824 1 -tensor([-0.1881, -0.1867, -0.1859, -0.1871, -0.1890, -0.1890, -0.1870]) -tensor([-0.1501, -0.1564, -0.1286, -0.1539, -0.1168]) -tensor([0.0344, 0.0357]) -tensor([0.0350, 0.0294, 0.0296]) -tensor([-0.0233, -0.0229, -0.0128]) -tensor([-0.8506, -0.8764, -0.8885, -0.8520, -0.8299, -0.8845, -0.8903]) -tensor([-0.3193, -0.3194, -0.3208, -0.3178]) -tensor([-0.1908, -0.1807, -0.1530, -0.1941]) -tensor([-0.2162, -0.1808, -0.1623, -0.2300, -0.2171]) -tensor([-0.0870, -0.0883, -0.0731, -0.0896, -0.0628, -0.0585]) -tensor([0.0283, 0.0283, 0.0251]) -tensor([0.0296, 0.0302]) -tensor([-0.0542, -0.0422, -0.0258, -0.0353, -0.0673, -0.0214, -0.0556]) -tensor([-8.0756, -8.0771, -8.0321, -8.0223, -8.0772, -8.0287, -8.0390, -8.0670, - -8.0396, -8.1004, -8.0793, -8.1004, -8.1004, -8.1004, -8.1004]) -tensor([-0.0215, -0.0223, -0.0175]) -tensor([-0.3270, -0.1388]) -tensor([0.0591, 0.0368]) -tensor([-0.2144, -0.2030, -0.2194]) -tensor([-0.4200, -0.4198, -0.4169]) -tensor([-0.0435, -0.0435]) -Total rewards: 2723 -Total Receives: 2715 -Stores 1825 1 -Total Receives: 2716 -Stores 1826 1 -Total Receives: 2717 -Stores 1827 1 -Total Receives: 2718 -Stores 1828 1 -Total Receives: 2719 -Stores 1829 1 -Total Receives: 2720 -Stores 1830 1 -Total Receives: 2721 -Stores 1831 1 -Total Receives: 2722 -Stores 1832 1 -Total Receives: 2723 -Stores 1833 1 -tensor([-0.0647, -0.1199, -0.0882, -0.0671, -0.1322, -0.1198, -0.0619]) -tensor([-0.0158, -0.0174, -0.0140]) -tensor([-0.0389, -0.0442, -0.0215, -0.0406]) -tensor([-0.0723, -0.0593, -0.0419, -0.0524, -0.0840, -0.0714]) -tensor([0.0361, 0.0357, 0.0360, 0.0351]) -Total rewards: 2732 -Total Receives: 2724 -Stores 1834 1 -Total Receives: 2725 -Stores 1835 1 -Total Receives: 2726 -Stores 1836 1 -Total Receives: 2727 -Stores 1837 1 -Total Receives: 2728 -Stores 1838 1 -Total Receives: 2729 -Stores 1839 1 -Total Receives: 2730 -Stores 1840 1 -Total Receives: 2731 -Stores 1841 1 -Total Receives: 2732 -Stores 1842 1 -tensor([-0.5559, -0.5506, -0.5482, -0.5480, -0.5572, -0.5694, -0.5552]) -tensor([-1.0246, -1.0635, -1.0234, -0.9996, -1.0642, -1.0763, -1.0737]) -tensor([-8.4357, -8.4377, -8.3864, -8.4318, -8.3843, -8.3945, -8.4229, -8.3935, - -8.4702, -8.4384, -8.4702, -8.4702, -8.4702, -8.4702]) -tensor([0.0303, 0.0278]) -tensor([-0.0295, -0.0310, -0.0308]) -tensor([-0.0550, -0.1173, -0.0825, -0.0591, -0.1384, -0.1179]) -tensor([-0.0904, -0.0762, -0.0581, -0.1068, -0.0898]) -tensor([-0.1255, -0.1242]) -tensor([-0.3690, -0.3657, -0.3687]) -tensor([-0.0973, -0.0985, -0.0828, -0.1006, -0.0709, -0.0675]) -tensor([-0.0164, 0.0074]) -tensor([-0.2608, -0.2364]) -tensor([-0.9687, -0.9297, -0.9815, -0.9019, -0.9758, -0.9813, -0.9834]) -tensor([-0.0738, -0.0877]) -tensor([0.0371, 0.0380, 0.0366]) -tensor([-0.2108, -0.2195]) -tensor([-0.1236, -0.3064, -0.1336]) -tensor([0.0279, 0.0279]) -tensor([0.0137, 0.0099, 0.0104]) -tensor([-0.0688, -0.0750]) -tensor([-0.0047, -0.0042, -0.0051, -0.0013]) -tensor([-0.1100, -0.1039, -0.0952]) -tensor([-0.2990, -0.2996, -0.3005, -0.2905, -0.3020, -0.3046, -0.3025]) -Total rewards: 2740 -Total Receives: 2733 -Stores 1843 1 -Total Receives: 2734 -Stores 1844 1 -Total Receives: 2735 -Stores 1845 1 -Total Receives: 2736 -Stores 1846 1 -Total Receives: 2737 -Stores 1847 1 -Total Receives: 2738 -Stores 1848 1 -Total Receives: 2739 -Stores 1849 1 -Total Receives: 2740 -Stores 1850 1 -tensor([-0.1849, -0.1442, -0.1197, -0.2040, -0.1854]) -tensor([-0.0716, -0.0666]) -tensor([-0.1931, -0.1600, -0.2109, -0.1930]) -tensor([0.0325, 0.0307]) -Total rewards: 2746 -Total Receives: 2741 -Stores 1851 1 -Total Receives: 2742 -Stores 1852 1 -Total Receives: 2743 -Stores 1853 1 -Total Receives: 2744 -Stores 1854 1 -Total Receives: 2745 -Stores 1855 1 -Total Receives: 2746 -Stores 1856 1 -tensor([0.0310, 0.0275]) -tensor([-0.0416, -0.0422, -0.0383]) -tensor([-0.2518, -0.2029]) -tensor([-0.1166, -0.1054, -0.1354, -0.1183, -0.0559]) -Total rewards: 2748 -Total Receives: 2747 -Stores 1857 1 -Total Receives: 2748 -Stores 1858 1 -tensor([-0.0282, -0.0241]) -tensor([-0.1180, -0.1256, -0.1048, -0.1213, -0.0952]) -Total rewards: 2753 -Total Receives: 2749 -Stores 1859 1 -Total Receives: 2750 -Stores 1860 1 -Total Receives: 2751 -Stores 1861 1 -Total Receives: 2752 -Stores 1862 1 -Total Receives: 2753 -Stores 1863 1 -tensor([-10.3668, -10.3634, -10.3145, -10.3502, -10.3260, -10.3555, -10.3197, - -10.4204, -10.3636, -10.4204, -10.4205, -10.4204, -10.4204, -10.4021]) -tensor([-0.0849, -0.2654, -0.0855, -0.1025]) -tensor([-0.0071, -0.0156, -0.0144, -0.0107]) -tensor([0.0015, 0.0009]) -tensor([0.0264, 0.0368]) -tensor([0.0421, 0.0389]) -tensor([-0.0897, -0.1250, -0.0886]) -tensor([0.0118, 0.0128]) -tensor([-0.9062, -0.9175, -0.9061, -0.9248, -0.9179, -0.9278, -0.9253]) -Total rewards: 2758 -Total Receives: 2754 -Stores 1864 1 -Total Receives: 2755 -Stores 1865 1 -Total Receives: 2756 -Stores 1866 1 -Total Receives: 2757 -Stores 1867 1 -Total Receives: 2758 -Stores 1868 1 -tensor([-0.0481, -0.0474]) -tensor([-0.0293, -0.0838, -0.0335, -0.0296, -0.0287]) -tensor([-0.0959, -0.1565, -0.0981]) -tensor([-0.6994, -0.6957, -0.7108, -0.6853, -0.7018, -0.7148, -0.7003]) -tensor([-0.5154, -0.5171, -0.5154, -0.5366, -0.5183, -0.5201, -0.5239]) -tensor([-0.1985, -0.1889, -0.2149, -0.2008]) -tensor([0.0316, 0.0262]) -Total rewards: 2760 -Total Receives: 2759 -Stores 1869 1 -Total Receives: 2760 -Stores 1870 1 -tensor([-0.0966, -0.1103, -0.0885]) -tensor([0.0177, 0.0183]) -tensor([-0.1852, -0.1302]) -tensor([-10.9822, -10.9746, -10.9606, -10.9429, -10.9745, -10.9469, -11.0390, - -10.9753, -11.0298, -11.0390, -11.0390, -11.0390, -11.0390, -11.0131]) -tensor([-0.2402, -0.2627, -0.2408, -0.1463]) -tensor([-0.6501, -0.5764, -0.5845, -0.5769]) -tensor([0.0302, 0.0381]) -tensor([-0.0560, -0.0407, -0.0308, -0.0555, -0.0610]) -tensor([-0.0653, -0.0762, -0.0658, -0.0726]) -tensor([-0.0295, -0.0438, -0.0319, -0.0323, -0.0313]) -tensor([-0.2324, -0.2391, -0.2354, -0.1820, -0.2094, -0.1798]) -tensor([-0.1140, -0.2051, -0.0984]) -tensor([-12.9191, -12.9136, -12.8957, -12.9013, -12.8737, -12.9864, -12.9139, - -12.9723, -12.9864, -12.9864, -12.9864, -12.9765, -12.9864, -12.9528]) -tensor([0.0193, 0.0184, 0.0187]) -tensor([-0.3094, -0.3046, -0.3719, -0.3019, -0.3044, -0.3121]) -Total rewards: 2764 -Total Receives: 2761 -Stores 1871 1 -Total Receives: 2762 -Stores 1872 1 -Total Receives: 2763 -Stores 1873 1 -Total Receives: 2764 -Stores 1874 1 -tensor([-0.0432, -0.0428, -0.0426, -0.0371]) -tensor([-0.0990, -0.1112]) -Total rewards: 2773 -Total Receives: 2765 -Stores 1875 1 -Total Receives: 2766 -Stores 1876 1 -Total Receives: 2767 -Stores 1877 1 -Total Receives: 2768 -Stores 1878 1 -Total Receives: 2769 -Stores 1879 1 -Total Receives: 2770 -Stores 1880 1 -Total Receives: 2771 -Stores 1881 1 -Total Receives: 2772 -Stores 1882 1 -Total Receives: 2773 -Stores 1883 1 -tensor([-1.4671, -1.4610, -1.4871, -1.4700, -1.4958, -1.4676, -1.5099]) -tensor([-0.1374, -0.1950, -0.2195, -0.2057]) -tensor([-0.0080, -0.0124]) -tensor([-0.2682, -0.2897, -0.2688]) -tensor([-0.0836, -0.0597]) -Total rewards: 2777 -Total Receives: 2774 -Stores 1884 1 -Total Receives: 2775 -Stores 1885 1 -Total Receives: 2776 -Stores 1886 1 -Total Receives: 2777 -Stores 1887 1 -tensor([-0.2305, -0.1802, -0.2326, -0.1840, -0.2126, -0.1813, -0.1803, -0.1786]) -tensor([-0.0361, -0.0275]) -tensor([-0.1351, -0.1750]) -tensor([-11.4398, -11.4288, -11.4131, -11.5113, -11.4250, -11.5186, -11.4289, - -11.4911, -11.5186, -11.5186, -11.5186, -11.4928, -11.5186, -11.4711]) -tensor([-0.0931, -0.0870, -0.0931, -0.0682]) -tensor([0.0181, 0.0156, 0.0166]) -tensor([-0.1083, -0.1058, -0.1581]) -tensor([-0.7741, -0.8092, -0.8043, -0.8125, -0.8007, -0.7970, -0.7988]) -tensor([-0.2431, -0.2232, -0.1858, -0.2071, -0.2297]) -tensor([0.0343, 0.0321]) -tensor([-0.1959, -0.2528, -0.1966, -0.2537, -0.2012, -0.2288, -0.1974, -0.1992]) -tensor([-0.0883, -0.0794, -0.0870]) -tensor([-0.0904, -0.1623, -0.0731, -0.0940, -0.0901]) -Total rewards: 2786 -Total Receives: 2778 -Stores 1888 1 -Total Receives: 2779 -Stores 1889 1 -Total Receives: 2780 -Stores 1890 1 -Total Receives: 2781 -Stores 1891 1 -Total Receives: 2782 -Stores 1892 1 -Total Receives: 2783 -Stores 1893 1 -Total Receives: 2784 -Stores 1894 1 -Total Receives: 2785 -Stores 1895 1 -Total Receives: 2786 -Stores 1896 1 -tensor([-0.1515, -0.2216, -0.2423, -0.2291]) -tensor([-0.2053, -0.1822, -0.1657, -0.1863]) -tensor([-0.2256, -0.2583, -0.2401]) -Total rewards: 2788 -Total Receives: 2787 -Stores 1897 1 -Total Receives: 2788 -Stores 1898 1 -tensor([-1.2245, -1.2198, -1.2268, -1.2523, -1.2763, -1.2722, -1.2243, -1.2717]) -tensor([-0.1131, -0.1615, -0.0916]) -Total rewards: 2793 -Total Receives: 2789 -Stores 1899 1 -Total Receives: 2790 -Stores 1900 1 -Total Receives: 2791 -Stores 1901 1 -Total Receives: 2792 -Stores 1902 1 -Total Receives: 2793 -Stores 1903 1 -tensor([-0.1671, -0.1088, -0.1685, -0.1146, -0.1342, -0.1018, -0.1102, -0.1122]) -tensor([-0.0498, -0.0451, -0.0439, -0.0760, -0.0414]) -tensor([-0.1172, -0.1100, -0.0864, -0.1164]) -tensor([-0.1403, -0.1368, -0.1008, -0.0914]) -tensor([-0.1555, -0.2793, -0.2623, -0.1530]) -tensor([-16.7108, -16.6998, -16.7679, -16.7104, -16.7732, -16.6942, -16.7901, - -16.6991, -16.7576, -16.7901, -16.7901, -16.7568, -16.7901, -16.7406]) -tensor([-0.5958, -0.5883, -0.5851, -0.5932, -0.5900, -0.5801, -0.5787]) -tensor([-0.0244, -0.0213, -0.0280, -0.0486, -0.0201]) -tensor([-0.1164, -0.1150]) -tensor([-0.1680, -0.2681, -0.2567]) -tensor([-0.0666, -0.0782, -0.0619, -0.0690, -0.0734]) -tensor([-0.0344, -0.0428, -0.0248]) -tensor([-0.7395, -0.7369, -0.7345, -0.7399, -0.7476, -0.7313, -0.7436]) -tensor([-0.0483, -0.0478, -0.0500]) -tensor([-0.0767, -0.0757]) -tensor([-0.2172, -0.2320, -0.1267]) -tensor([-0.1176, -0.0589, -0.0628, -0.1145, -0.0734]) -tensor([-0.2152, -0.1621, -0.2151, -0.1676, -0.1858, -0.1632, -0.1673]) -tensor([-0.1997, -0.1208]) -Total rewards: 2801 -Total Receives: 2794 -Stores 1904 1 -Total Receives: 2795 -Stores 1905 1 -Total Receives: 2796 -Stores 1906 1 -Total Receives: 2797 -Stores 1907 1 -Total Receives: 2798 -Stores 1908 1 -Total Receives: 2799 -Stores 1909 1 -Total Receives: 2800 -Stores 1910 1 -Total Receives: 2801 -Stores 1911 1 -tensor([-0.0901, -0.1564, -0.1380, -0.0835, -0.1217]) -Total rewards: 2807 -Total Receives: 2802 -Stores 1912 1 -Total Receives: 2803 -Stores 1913 1 -Total Receives: 2804 -Stores 1914 1 -Total Receives: 2805 -Stores 1915 1 -Total Receives: 2806 -Stores 1916 1 -Total Receives: 2807 -Stores 1917 1 -tensor([-0.1199, -0.0842, -0.1231, -0.0943]) -tensor([0.0042, 0.0190]) -tensor([-0.0731, -0.0839, -0.0754, -0.0811, -0.0576]) -tensor([-0.1413, -0.1133, -0.1557, -0.0970, -0.1028]) -tensor([-0.0331, -0.0343, -0.0329, -0.0419]) -tensor([-0.0956, -0.1677, -0.1460, -0.0909]) -tensor([-15.4201, -15.4062, -15.4741, -15.4162, -15.4816, -15.5065, -15.5045, - -15.5121, -15.4453, -15.5073, -15.4659, -15.5121, -15.5121, -15.4617, - -15.5121, -15.4478, -15.4930]) -tensor([-3.1207, -3.1229, -3.1574, -3.1821, -3.1744, -3.1174, -3.1825]) -tensor([-0.1331, -0.2461, -0.2353, -0.1276]) -tensor([-0.9098, -0.9071, -0.9154, -0.9147, -0.9168, -0.8925, -0.9163]) -tensor([-0.1038, -0.0455, -0.0692, -0.1075]) -tensor([-0.2325, -0.2358, -0.1637, -0.1979, -0.1585, -0.1617]) -tensor([-0.1167, -0.0335, -0.0331, -0.0348]) -tensor([-0.0171, -0.0212, -0.0203, -0.0253]) -tensor([-0.6466, -0.6482, -0.6571, -0.6428, -0.6580, -0.6463, -0.6620, -0.6403, - -0.6585]) -tensor([5.4433e-03, 3.7094e-03, 5.8115e-06]) -tensor([-0.0226, -0.0226]) -tensor([-0.1104, -0.0745, -0.1148]) -tensor([0.0159, 0.0152, 0.0163, 0.0155]) -tensor([-1.3118, -1.3101, -1.3173, -1.3173, -1.3188, -1.3223, -1.3198]) -tensor([0.0139, 0.0147]) -tensor([-0.0970, -0.0742, -0.1066, -0.0537, -0.0698, -0.0555]) -tensor([-0.1704, -0.1043, -0.1043, -0.0972]) -tensor([0.0306, 0.0322]) -tensor([-0.1924, -0.2972, -0.1743, -0.2877]) -tensor([-0.0708, -0.0818]) -tensor([-0.0972, -0.0739, -0.1516, -0.1377]) -Total rewards: 2817 -Total Receives: 2808 -Total Receives: 2809 -Total Receives: 2810 -Total Receives: 2811 -Total Receives: 2812 -Total Receives: 2813 -Total Receives: 2814 -Total Receives: 2815 -Total Receives: 2816 -Total Receives: 2817 -tensor([-23.1753, -23.2407, -23.1684, -23.2520, -23.2797, -23.2778, -23.3011, - -23.2025, -23.2816, -23.2292, -23.3011, -23.3011, -23.2295, -23.3011, - -23.2150, -23.2643, -23.2832]) -tensor([-0.1328, -0.1456]) -tensor([-0.1278, -0.1180, -0.1276]) -Total rewards: 2823 -Total Receives: 2818 -Stores 1918 1 -Total Receives: 2819 -Stores 1919 1 -Total Receives: 2820 -Stores 1920 1 -Total Receives: 2821 -Stores 1921 1 -Total Receives: 2822 -Stores 1922 1 -Total Receives: 2823 -Stores 1923 1 -tensor([-0.9541, -0.9672, -0.9667, -0.9646, -0.9677, -0.9728, -0.9728, -0.9731]) -tensor([-0.0431, -0.0467]) -tensor([-2.6301, -2.6330, -2.6595, -2.6825, -2.6683, -2.7089, -2.6988, -2.6894]) -Total rewards: 2829 -Total Receives: 2824 -Stores 1924 1 -Total Receives: 2825 -Stores 1925 1 -Total Receives: 2826 -Stores 1926 1 -Total Receives: 2827 -Stores 1927 1 -Total Receives: 2828 -Stores 1928 1 -Total Receives: 2829 -Stores 1929 1 -tensor([-0.0654, -0.0559]) -tensor([-18.7086, -18.7616, -18.7780, -18.8003, -18.7980, -18.8234, -18.8225, - -18.7254, -18.8011, -18.7557, -18.8225, -18.8225, -18.7472, -18.8225, - -18.7391, -18.7831, -18.7983]) -tensor([-0.1220, -0.0927, -0.1314, -0.0524, -0.0533, -0.0915, -0.0755]) -tensor([-0.2187, -0.1226, -0.2213, -0.1698, -0.1834, -0.1755, -0.1346]) -tensor([-1.1691, -1.1678, -1.1821, -1.1577, -1.1823, -1.1618, -1.1667, -1.1624, - -1.1961]) -tensor([-0.0094, -0.0133]) -tensor([-0.0345, -0.0446]) -tensor([0.0128, 0.0091]) -tensor([-17.0697, -17.1139, -17.1313, -17.1536, -17.1507, -17.1761, -17.1807, - -17.0831, -17.1156, -17.1807, -17.1807, -17.1003, -17.1806, -17.0963, - -17.1352, -17.1487]) -Total rewards: 2838 -Total Receives: 2830 -Total Receives: 2831 -Total Receives: 2832 -Total Receives: 2833 -Total Receives: 2834 -Total Receives: 2835 -Total Receives: 2836 -Total Receives: 2837 -Total Receives: 2838 -tensor([-0.1387, -0.1854, -0.1743]) -tensor([0.0146, 0.0146, 0.0119]) -tensor([0.0117, 0.0035]) -tensor([-0.1384, -0.1155, -0.1456, -0.0863, -0.1167, -0.1012]) -tensor([0.0339, 0.0332]) -tensor([-0.3547, -0.2303, -0.3434]) -tensor([-0.1188, -0.0418, -0.0195, -0.0390, -0.0252, -0.0272]) -tensor([-0.0047, -0.0115]) -tensor([0.0350, 0.0316]) -tensor([-0.1804, -0.1680]) -Total rewards: 2846 -Total Receives: 2839 -Stores 1930 1 -Total Receives: 2840 -Stores 1931 1 -Total Receives: 2841 -Stores 1932 1 -Total Receives: 2842 -Stores 1933 1 -Total Receives: 2843 -Stores 1934 1 -Total Receives: 2844 -Stores 1935 1 -Total Receives: 2845 -Stores 1936 1 -Total Receives: 2846 -Stores 1937 1 -tensor([-1.6319, -1.5549, -1.5806, -1.6034, -1.5859, -1.6297, -1.6410, -1.6154, - -1.6143]) -tensor([-0.2530, -0.2566, -0.1855, -0.2137, -0.1863, -0.1406]) -Total rewards: 2849 -Total Receives: 2847 -Stores 1938 1 -Total Receives: 2848 -Stores 1939 1 -Total Receives: 2849 -Stores 1940 1 -tensor([-0.1197, -0.0604, -0.1339, -0.0678, -0.0990, -0.0828]) -tensor([0.0287, 0.0282]) -tensor([-0.3506, -0.3344, -0.1785]) -tensor([-1.6597, -1.6767, -1.6763, -1.6731, -1.6870, -1.6793, -1.6845, -1.6813, - -1.6923]) -tensor([0.0278, 0.0249]) -tensor([-0.1483, -0.0586, -0.0294, -0.0575, -0.0389]) -tensor([-1.4997, -1.4986, -1.5167, -1.5164, -1.4921, -1.5097, -1.4925, -1.5380]) -tensor([0.0321, 0.0314, 0.0328]) -Total rewards: 2855 -Total Receives: 2850 -Stores 1941 1 -Total Receives: 2851 -Stores 1942 1 -Total Receives: 2852 -Stores 1943 1 -Total Receives: 2853 -Stores 1944 1 -Total Receives: 2854 -Stores 1945 1 -Total Receives: 2855 -Stores 1946 1 -tensor([-0.0673, -0.0552]) -tensor([0.0166, 0.0162]) -Total rewards: 2865 -Total Receives: 2856 -Stores 1947 1 -Total Receives: 2857 -Stores 1948 1 -Total Receives: 2858 -Stores 1949 1 -Total Receives: 2859 -Stores 1950 1 -Total Receives: 2860 -Stores 1951 1 -Total Receives: 2861 -Stores 1952 1 -Total Receives: 2862 -Stores 1953 1 -Total Receives: 2863 -Stores 1954 1 -Total Receives: 2864 -Stores 1955 1 -Total Receives: 2865 -Stores 1956 1 -tensor([-1.5874, -1.5421, -1.5635, -1.5444, -1.5864, -1.5970, -1.5727, -1.5775, - -1.6024]) -tensor([-0.1008, -0.0993, -0.1299]) -tensor([-15.9362, -15.9232, -15.9504, -15.9765, -15.9495, -15.9762, -15.9956, - -15.8751, -15.9032, -15.9956, -15.9956, -15.8787, -15.9956, -15.8800, - -15.9214, -15.9380]) -tensor([-0.1139, -0.1982, -0.1859]) -tensor([-0.3053, -0.1573, -0.2941]) -tensor([-0.8698, -0.8655, -0.8838, -0.8832, -0.8991, -0.8548, -0.9050, -0.9057]) -tensor([0.0390, 0.0393, 0.0371]) -tensor([0.0424, 0.0421, 0.0415]) -tensor([0.0331, 0.0329]) -tensor([-0.3398, -0.3287]) -tensor([-0.9475, -0.9278, -0.9326, -0.9469, -0.9556, -0.9376, -0.9504, -0.9544, - -0.9654]) -tensor([-0.2251, -0.1261, -0.1277, -0.1128]) -tensor([-0.1369, -0.0821, -0.0897, -0.1197, -0.1034]) -tensor([-0.2858, -0.2880, -0.2291, -0.2526, -0.2346]) -tensor([-13.5740, -13.5638, -13.5882, -13.6063, -13.5851, -13.6097, -13.6381, - -13.5268, -13.5497, -13.6381, -13.5247, -13.6381, -13.5305, -13.5599, - -13.5740]) -tensor([ 0.0025, -0.0006, 0.0025]) -Total rewards: 2872 -Total Receives: 2866 -Stores 1957 1 -Total Receives: 2867 -Stores 1958 1 -Total Receives: 2868 -Stores 1959 1 -Total Receives: 2869 -Stores 1960 1 -Total Receives: 2870 -Stores 1961 1 -Total Receives: 2871 -Stores 1962 1 -Total Receives: 2872 -Stores 1963 1 -tensor([-10.8669, -10.8547, -10.8815, -10.9045, -10.8782, -10.9063, -10.9365, - -10.8086, -10.8362, -10.9366, -10.8135, -10.9366, -10.8534, -10.8696]) -tensor([0.0134, 0.0142]) -tensor([-1.1273, -1.1521, -1.1443, -1.1437, -1.1401, -1.1457, -1.1505, -1.1423, - -1.1575, -1.1710, -1.1599]) -tensor([-0.1153, -0.1112, -0.1413, -0.1047]) -tensor([-0.2543, -0.1542, -0.1558]) -tensor([-0.1496, -0.2640, -0.2666, -0.2449, -0.2247]) -tensor([-0.2441, -0.2272, -0.1449, -0.1493, -0.1384]) -tensor([-0.7710, -0.7812, -0.7818, -0.7814, -0.7785, -0.7817, -0.7841, -0.7749, - -0.8012, -0.7918]) -tensor([0.0389, 0.0392]) -tensor([0.0323, 0.0326]) -tensor([-0.1651, -0.3078]) -tensor([-0.0906, -0.1032, -0.1297, -0.0965]) -tensor([-1.0975, -1.0782, -1.0837, -1.0982, -1.1082, -1.0850, -1.1098, -1.1225]) -tensor([-0.0356, -0.0293]) -tensor([-0.2076, -0.0478, -0.1194, -0.0726, -0.0696]) -tensor([-1.0048, -1.0007, -1.0241, -1.0461, -0.9880, -1.0550, -1.0533, -1.0461]) -tensor([0.0093, 0.0079]) -tensor([-0.4549, -0.4523, -0.4545, -0.4525, -0.4533, -0.4539, -0.4449, -0.4555, - -0.4551]) -tensor([0.0008, 0.0008]) -tensor([0.0300, 0.0313, 0.0287]) -Total rewards: 2876 -Total Receives: 2873 -Stores 1964 1 -Total Receives: 2874 -Stores 1965 1 -Total Receives: 2875 -Stores 1966 1 -Total Receives: 2876 -Stores 1967 1 -tensor([0.0394, 0.0376]) -tensor([-0.0270, -0.0312, -0.0369, -0.0772]) -tensor([-0.2203, -0.2443, -0.2463, -0.2474, -0.2407, -0.2353, -0.2206, -0.2270]) -tensor([0.0398, 0.0410]) -tensor([-0.0600, -0.0585]) -tensor([-0.2453, -0.2471, -0.2485, -0.2414, -0.2360, -0.2189, -0.2279, -0.2165]) -Total rewards: 2884 -Total Receives: 2877 -Stores 1968 1 -Total Receives: 2878 -Stores 1969 1 -Total Receives: 2879 -Stores 1970 1 -Total Receives: 2880 -Stores 1971 1 -Total Receives: 2881 -Stores 1972 1 -Total Receives: 2882 -Stores 1973 1 -Total Receives: 2883 -Stores 1974 1 -Total Receives: 2884 -Stores 1975 1 -tensor([-9.5108, -9.4994, -9.5249, -9.5409, -9.5180, -9.5479, -9.5890, -9.4837, - -9.5890, -9.4955, -9.5890, -9.4864, -9.5406]) -tensor([-0.3198, -0.1717]) -tensor([-0.0719, -0.0939, -0.1201, -0.0877]) -tensor([-0.1520, -0.1085, -0.1398, -0.1194, -0.0867, -0.0740]) -tensor([-0.0448, -0.0536, -0.0961]) -Total rewards: 2893 -Total Receives: 2885 -Stores 1976 1 -Total Receives: 2886 -Stores 1977 1 -Total Receives: 2887 -Stores 1978 1 -Total Receives: 2888 -Stores 1979 1 -Total Receives: 2889 -Stores 1980 1 -Total Receives: 2890 -Stores 1981 1 -Total Receives: 2891 -Stores 1982 1 -Total Receives: 2892 -Stores 1983 1 -Total Receives: 2893 -Stores 1984 1 -tensor([-0.2489, -0.1612, -0.1122, -0.1039, -0.1039]) -Total rewards: 2899 -Total Receives: 2894 -Stores 1985 1 -Total Receives: 2895 -Stores 1986 1 -Total Receives: 2896 -Stores 1987 1 -Total Receives: 2897 -Stores 1988 1 -Total Receives: 2898 -Stores 1989 1 -Total Receives: 2899 -Stores 1990 1 -tensor([-0.2553, -0.1051, -0.2583, -0.2361, -0.2189]) -Total rewards: 2903 -Total Receives: 2900 -Stores 1991 1 -Total Receives: 2901 -Stores 1992 1 -Total Receives: 2902 -Stores 1993 1 -Total Receives: 2903 -Stores 1994 1 -tensor([-0.0097, -0.0112, -0.0088]) -tensor([-0.3470, -0.3262, -0.2795, -0.2815]) -tensor([-0.2288, -0.1632, -0.1227, -0.1113]) -tensor([-0.0281, -0.0307]) -tensor([0.0332, 0.0327]) -tensor([-0.2266, -0.1377, -0.1424]) -tensor([-0.2345, -0.1697, -0.1269]) -tensor([-0.0202, -0.0188]) -tensor([-0.7469, -0.7595, -0.7620, -0.7830, -0.7650, -0.7822, -0.7765, -0.7732, - -0.7822]) -tensor([-0.1950, -0.1966, -0.1971, -0.1919, -0.1873, -0.1684, -0.1447]) -tensor([-0.1089, -0.0912, -0.0740]) -tensor([-12.8994, -12.8903, -12.9604, -12.9132, -12.9489, -12.9041, -12.9348, - -12.9815, -12.9816, -12.8875, -12.9815, -12.9132, -12.9285]) -tensor([-0.1281, -0.1100, -0.1068]) -tensor([-0.1131, -0.1088]) -tensor([-1.6325, -1.6073, -1.6327, -1.6450, -1.6644, -1.6170, -1.6371, -1.6740]) -tensor([-0.3187, -0.3210]) -tensor([-0.0225, -0.0254]) -tensor([-0.3540, -0.3000]) -tensor([-0.1248, -0.1546, -0.1778, -0.1469]) -tensor([-0.0532, -0.0407, -0.0492]) -tensor([-0.1899, -0.1906, -0.1934, -0.1847, -0.1769, -0.1453]) -tensor([-0.0902, -0.1347]) -tensor([-0.1066, -0.0915, -0.0797]) -tensor([0.0209, 0.0195, 0.0196]) -tensor([-0.1160, -0.2243, -0.2014, -0.1012, -0.1031, -0.1868]) -tensor([0.0195, 0.0189, 0.0240, 0.0240]) -tensor([-0.3957, -0.4095, -0.4143, -0.4150, -0.4132, -0.4135, -0.4147, -0.4150, - -0.4135]) -Total rewards: 2910 -Total Receives: 2904 -Stores 1995 1 -Total Receives: 2905 -Stores 1996 1 -Total Receives: 2906 -Stores 1997 1 -Total Receives: 2907 -Stores 1998 1 -Total Receives: 2908 -Stores 1999 1 -Total Receives: 2909 -Stores 2000 1 -Total Receives: 2910 -Stores 2001 1 -tensor([-0.0442, -0.0366, -0.0428]) -tensor([0.0289, 0.0282]) -tensor([-0.1051, -0.0990, -0.1175, -0.0971, -0.0961]) -tensor([-0.0741, -0.0658, -0.0749, -0.0604]) -Total rewards: 2912 -Total Receives: 2911 -Stores 2002 1 -Total Receives: 2912 -Stores 2003 1 -Total rewards: 2915 -Total Receives: 2913 -Stores 2004 1 -Total Receives: 2914 -Stores 2005 1 -Total Receives: 2915 -Stores 2006 1 -tensor([0.0209, 0.0193, 0.0151]) -tensor([0.0304, 0.0322]) -tensor([-0.0930, -0.0801, -0.0673]) -tensor([0.0286, 0.0279]) -tensor([-0.0861, -0.0919]) -tensor([-0.0830, -0.0821]) -tensor([-0.1352, -0.1427, -0.1355]) -tensor([-0.1695, -0.1725, -0.1717, -0.1775, -0.1705]) -Total rewards: 2922 -Total Receives: 2916 -Stores 2007 1 -Total Receives: 2917 -Stores 2008 1 -Total Receives: 2918 -Stores 2009 1 -Total Receives: 2919 -Stores 2010 1 -Total Receives: 2920 -Stores 2011 1 -Total Receives: 2921 -Stores 2012 1 -Total Receives: 2922 -Stores 2013 1 -tensor([-0.1448, -0.1808, -0.2132, -0.1728]) -tensor([-0.0825, -0.0773, -0.0832]) -tensor([-0.0369, -0.0364, -0.0470]) -tensor([-0.2340, -0.1645]) -tensor([-14.7506, -14.8027, -14.7647, -14.7989, -14.7508, -14.7847, -14.8527, - -14.8527, -14.7372, -14.8527, -14.7647, -14.7795, -14.8274]) -tensor([-1.1600, -1.1636, -1.1700, -1.1729, -1.1944, -1.1310, -1.1926, -1.1961]) -Total rewards: 2928 -Total Receives: 2923 -Stores 2014 1 -Total Receives: 2924 -Stores 2015 1 -Total Receives: 2925 -Stores 2016 1 -Total Receives: 2926 -Stores 2017 1 -Total Receives: 2927 -Stores 2018 1 -Total Receives: 2928 -Stores 2019 1 -tensor([-0.5539, -0.5694, -0.5700, -0.5706, -0.5667, -0.5706, -0.5724, -0.5667]) -tensor([-0.0785, -0.0795]) -Total rewards: 2931 -Total Receives: 2929 -Stores 2020 1 -Total Receives: 2930 -Stores 2021 1 -Total Receives: 2931 -Stores 2022 1 -tensor([-0.0511, -0.0544, -0.0537]) -tensor([-0.0311, -0.0406, -0.0288, -0.0434, -0.0549]) -tensor([0.0198, 0.0179]) -tensor([ 0.0029, -0.0009]) -tensor([-0.2502, -0.2511, -0.2395, -0.2454, -0.2377]) -tensor([-0.0348, -0.0301, -0.0429, -0.0272]) -tensor([-0.0021, -0.0013]) -tensor([-0.1515, -0.1869, -0.1020, -0.1454, -0.1054]) -tensor([-0.0686, -0.0745]) -Total rewards: 2934 -Total Receives: 2932 -Stores 2023 1 -Total Receives: 2933 -Stores 2024 1 -Total Receives: 2934 -Stores 2025 1 -tensor([-0.0341, -0.0535, -0.0309, -0.0363, -0.0309, -0.0346]) -Stores 1186 1 -Total Receives: 1726 -Stores 1187 1 -Total Receives: 1727 -Stores 1188 1 -Total Receives: 1728 -Stores 1189 1 -Total rewards: 1735 -Total Receives: 1729 -Stores 1190 1 -Total Receives: 1730 -Stores 1191 1 -Total Receives: 1731 -Stores 1192 1 -Total Receives: 1732 -Stores 1193 1 -Total Receives: 1733 -Stores 1194 1 -Total Receives: 1734 -Stores 1195 1 -Total Receives: 1735 -Stores 1196 1 -Total rewards: 1741 -Total Receives: 1736 -Stores 1197 1 -Total Receives: 1737 -Stores 1198 1 -Total Receives: 1738 -Stores 1199 1 -Total Receives: 1739 -Stores 1200 1 -Total Receives: 1740 -Stores 1201 1 -Total Receives: 1741 -Stores 1202 1 -Total rewards: 1747 -Total Receives: 1742 -Stores 1203 1 -Total Receives: 1743 -Stores 1204 1 -Total Receives: 1744 -Stores 1205 1 -Total Receives: 1745 -Stores 1206 1 -Total Receives: 1746 -Stores 1207 1 -Total Receives: 1747 -Stores 1208 1 -Total rewards: 1755 -Total Receives: 1748 -Total Receives: 1749 -Total Receives: 1750 -Total Receives: 1751 -Total Receives: 1752 -Total Receives: 1753 -Total Receives: 1754 -Total Receives: 1755 -Total rewards: 1762 -Total Receives: 1756 -Stores 1209 1 -Total Receives: 1757 -Stores 1210 1 -Total Receives: 1758 -Stores 1211 1 -Total Receives: 1759 -Stores 1212 1 -Total Receives: 1760 -Stores 1213 1 -Total Receives: 1761 -Stores 1214 1 -Total Receives: 1762 -Stores 1215 1 -Total rewards: 1769 -Total Receives: 1763 -Total Receives: 1764 -Total Receives: 1765 -Total Receives: 1766 -Total Receives: 1767 -Total Receives: 1768 -Total Receives: 1769 -Total rewards: 1773 -Total Receives: 1770 -Stores 1216 1 -Total Receives: 1771 -Stores 1217 1 -Total Receives: 1772 -Stores 1218 1 -Total Receives: 1773 -Stores 1219 1 -Total rewards: 1781 -Total Receives: 1774 -Stores 1220 1 -Total Receives: 1775 -Stores 1221 1 -Total Receives: 1776 -Stores 1222 1 -Total Receives: 1777 -Stores 1223 1 -Total Receives: 1778 -Stores 1224 1 -Total Receives: 1779 -Stores 1225 1 -Total Receives: 1780 -Stores 1226 1 -Total Receives: 1781 -Stores 1227 1 -Total rewards: 1786 -Total Receives: 1782 -Stores 1228 1 -Total Receives: 1783 -Stores 1229 1 -Total Receives: 1784 -Stores 1230 1 -Total Receives: 1785 -Stores 1231 1 -Total Receives: 1786 -Stores 1232 1 -Total rewards: 1793 -Total Receives: 1787 -Stores 1233 1 -Total Receives: 1788 -Stores 1234 1 -Total Receives: 1789 -Stores 1235 1 -Total Receives: 1790 -Stores 1236 1 -Total Receives: 1791 -Stores 1237 1 -Total Receives: 1792 -Stores 1238 1 -Total Receives: 1793 -Stores 1239 1 -Total rewards: 1803 -Total Receives: 1794 -Total Receives: 1795 -Total Receives: 1796 -Total Receives: 1797 -Total Receives: 1798 -Total Receives: 1799 -Total Receives: 1800 -Total Receives: 1801 -Total Receives: 1802 -Total Receives: 1803 -Total rewards: 1811 -Total Receives: 1804 -Stores 1240 1 -Total Receives: 1805 -Stores 1241 1 -Total Receives: 1806 -Stores 1242 1 -Total Receives: 1807 -Stores 1243 1 -Total Receives: 1808 -Stores 1244 1 -Total Receives: 1809 -Stores 1245 1 -Total Receives: 1810 -Stores 1246 1 -Total Receives: 1811 -Stores 1247 1 -Total rewards: 1819 -Total Receives: 1812 -Total Receives: 1813 -Total Receives: 1814 -Total Receives: 1815 -Total Receives: 1816 -Total Receives: 1817 -Total Receives: 1818 -Total Receives: 1819 -Total rewards: 1823 -Total Receives: 1820 -Stores 1248 1 -Total Receives: 1821 -Stores 1249 1 -Total Receives: 1822 -Stores 1250 1 -Total Receives: 1823 -Stores 1251 1 -Total rewards: 1833 -Total Receives: 1824 -Total Receives: 1825 -Total Receives: 1826 -Total Receives: 1827 -Total Receives: 1828 -Total Receives: 1829 -Total Receives: 1830 -Total Receives: 1831 -Total Receives: 1832 -Total Receives: 1833 -Total rewards: 1837 -Total Receives: 1834 -Stores 1252 1 -Total Receives: 1835 -Stores 1253 1 -Total Receives: 1836 -Stores 1254 1 -Total Receives: 1837 -Stores 1255 1 -Total rewards: 1845 -Total Receives: 1838 -Total Receives: 1839 -Total Receives: 1840 -Total Receives: 1841 -Total Receives: 1842 -Total Receives: 1843 -Total Receives: 1844 -Total Receives: 1845 -Total rewards: 1854 -Total Receives: 1846 -Total Receives: 1847 -Total Receives: 1848 -Total Receives: 1849 -Total Receives: 1850 -Total Receives: 1851 -Total Receives: 1852 -Total Receives: 1853 -Total Receives: 1854 -Total rewards: 1859 -Total Receives: 1855 -Stores 1256 1 -Total Receives: 1856 -Stores 1257 1 -Total Receives: 1857 -Stores 1258 1 -Total Receives: 1858 -Stores 1259 1 -Total Receives: 1859 -Stores 1260 1 -Total rewards: 1862 -Total Receives: 1860 -Stores 1261 1 -Total Receives: 1861 -Stores 1262 1 -Total Receives: 1862 -Stores 1263 1 -Total rewards: 1868 -Total Receives: 1863 -Total Receives: 1864 -Total Receives: 1865 -Total Receives: 1866 -Total Receives: 1867 -Total Receives: 1868 -Total rewards: 1873 -Total Receives: 1869 -Stores 1264 1 -Total Receives: 1870 -Stores 1265 1 -Total Receives: 1871 -Stores 1266 1 -Total Receives: 1872 -Stores 1267 1 -Total Receives: 1873 -Stores 1268 1 -Total rewards: 1875 -Total Receives: 1874 -Stores 1269 1 -Total Receives: 1875 -Stores 1270 1 -Total rewards: 1882 -Total Receives: 1876 -Total Receives: 1877 -Total Receives: 1878 -Total Receives: 1879 -Total Receives: 1880 -Total Receives: 1881 -Total Receives: 1882 -Total rewards: 1886 -Total Receives: 1883 -Stores 1271 1 -Total Receives: 1884 -Stores 1272 1 -Total Receives: 1885 -Stores 1273 1 -Total Receives: 1886 -Stores 1274 1 -Total rewards: 1894 -Total Receives: 1887 -Total Receives: 1888 -Total Receives: 1889 -Total Receives: 1890 -Total Receives: 1891 -Total Receives: 1892 -Total Receives: 1893 -Total Receives: 1894 -Total rewards: 1897 -Total Receives: 1895 -Stores 1275 1 -Total Receives: 1896 -Stores 1276 1 -Total Receives: 1897 -Stores 1277 1 -Total rewards: 1903 -Total Receives: 1898 -Stores 1278 1 -Total Receives: 1899 -Stores 1279 1 -Total Receives: 1900 -Stores 1280 1 -Total Receives: 1901 -Stores 1281 1 -Total Receives: 1902 -Stores 1282 1 -Total Receives: 1903 -Stores 1283 1 -Total rewards: 1909 -Total Receives: 1904 -Stores 1284 1 -Total Receives: 1905 -Stores 1285 1 -Total Receives: 1906 -Stores 1286 1 -Total Receives: 1907 -Stores 1287 1 -Total Receives: 1908 -Stores 1288 1 -Total Receives: 1909 -Stores 1289 1 -Total rewards: 1915 -Total Receives: 1910 -Stores 1290 1 -Total Receives: 1911 -Stores 1291 1 -Total Receives: 1912 -Stores 1292 1 -Total Receives: 1913 -Stores 1293 1 -Total Receives: 1914 -Stores 1294 1 -Total Receives: 1915 -Stores 1295 1 -Total rewards: 1921 -Total Receives: 1916 -Total Receives: 1917 -Total Receives: 1918 -Total Receives: 1919 -Total Receives: 1920 -Total Receives: 1921 -Total rewards: 1926 -Total Receives: 1922 -Stores 1296 1 -Total Receives: 1923 -Stores 1297 1 -Total Receives: 1924 -Stores 1298 1 -Total Receives: 1925 -Stores 1299 1 -Total Receives: 1926 -Stores 1300 1 -Total rewards: 1929 -Total Receives: 1927 -Stores 1301 1 -Total Receives: 1928 -Stores 1302 1 -Total Receives: 1929 -Stores 1303 1 -Total rewards: 1933 -Total Receives: 1930 -Stores 1304 1 -Total Receives: 1931 -Stores 1305 1 -Total Receives: 1932 -Stores 1306 1 -Total Receives: 1933 -Stores 1307 1 -Total rewards: 1939 -Total Receives: 1934 -Stores 1308 1 -Total Receives: 1935 -Stores 1309 1 -Total Receives: 1936 -Stores 1310 1 -Total Receives: 1937 -Stores 1311 1 -Total Receives: 1938 -Stores 1312 1 -Total Receives: 1939 -Stores 1313 1 -Total rewards: 1946 -Total Receives: 1940 -Stores 1314 1 -Total Receives: 1941 -Stores 1315 1 -Total Receives: 1942 -Stores 1316 1 -Total Receives: 1943 -Stores 1317 1 -Total Receives: 1944 -Stores 1318 1 -Total Receives: 1945 -Stores 1319 1 -Total Receives: 1946 -Stores 1320 1 -Total rewards: 1949 -Total Receives: 1947 -Stores 1321 1 -Total Receives: 1948 -Stores 1322 1 -Total Receives: 1949 -Stores 1323 1 -Total rewards: 1957 -Total Receives: 1950 -Total Receives: 1951 -Total Receives: 1952 -Total Receives: 1953 -Total Receives: 1954 -Total Receives: 1955 -Total Receives: 1956 -Total Receives: 1957 -Total rewards: 1967 -Total Receives: 1958 -Stores 1324 1 -Total Receives: 1959 -Stores 1325 1 -Total Receives: 1960 -Stores 1326 1 -Total Receives: 1961 -Stores 1327 1 -Total Receives: 1962 -Stores 1328 1 -Total Receives: 1963 -Stores 1329 1 -Total Receives: 1964 -Stores 1330 1 -Total Receives: 1965 -Stores 1331 1 -Total Receives: 1966 -Stores 1332 1 -Total Receives: 1967 -Stores 1333 1 -Total rewards: 1976 -Total Receives: 1968 -Stores 1334 1 -Total Receives: 1969 -Stores 1335 1 -Total Receives: 1970 -Stores 1336 1 -Total Receives: 1971 -Stores 1337 1 -Total Receives: 1972 -Stores 1338 1 -Total Receives: 1973 -Stores 1339 1 -Total Receives: 1974 -Total rewards: 2941 -Total Receives: 2935 -Stores 2026 1 -Total Receives: 2936 -Stores 2027 1 -Total Receives: 2937 -Stores 2028 1 -Total Receives: 2938 -Stores 2029 1 -Total Receives: 2939 -Stores 2030 1 -Total Receives: 2940 -Stores 2031 1 -Total Receives: 2941 -Stores 2032 1 -tensor([-0.1298, -0.2569, -0.2294, -0.1181, -0.2136, -0.0999]) -tensor([0.0273, 0.0272]) -Total rewards: 2946 -Total Receives: 2942 -Stores 2033 1 -Total Receives: 2943 -Stores 2034 1 -Total Receives: 2944 -Stores 2035 1 -Total Receives: 2945 -Stores 2036 1 -Total Receives: 2946 -Stores 2037 1 -tensor([-0.0256, -0.0185, -0.0217, -0.0185, -0.0213]) -tensor([0.0343, 0.0319, 0.0325, 0.0337]) -tensor([0.0466, 0.0328, 0.0444]) -tensor([-0.0964, -0.1181, -0.0996, -0.1079]) -tensor([-0.0622, -0.0741, -0.0788, -0.0913]) -tensor([0.0289, 0.0377, 0.0383, 0.0353, 0.0134]) -tensor([-0.1195, -0.0880]) -tensor([-0.6281, -0.6287, -0.6273, -0.6254, -0.6276, -0.6292, -0.6254, -0.6304]) -tensor([-0.0274, -0.0485]) -tensor([-0.2786, -0.2656, -0.2736, -0.2508]) -tensor([-17.1625, -17.2201, -17.1735, -17.2164, -17.1586, -17.1991, -17.2799, - -17.1441, -17.2798, -17.1742, -17.2831, -17.2702, -17.1912, -17.2393]) -tensor([-0.0338, -0.0343, -0.0423]) -tensor([-0.1328, -0.2137, -0.1216, -0.2089, -0.0965]) -tensor([-0.0293, -0.0267, -0.0339, -0.0308, -0.0369]) -tensor([-0.0793, -0.1385, -0.1644, -0.1310, -0.0955]) -Total rewards: 2952 -Total Receives: 2947 -Stores 2038 1 -Total Receives: 2948 -Stores 2039 1 -Total Receives: 2949 -Stores 2040 1 -Total Receives: 2950 -Stores 2041 1 -Total Receives: 2951 -Stores 2042 1 -Total Receives: 2952 -Stores 2043 1 -Total rewards: 2955 -Total Receives: 2953 -Stores 2044 1 -Total Receives: 2954 -Stores 2045 1 -Total Receives: 2955 -Stores 2046 1 -tensor([0.0088, 0.0119]) -tensor([-0.2945, -0.2910, -0.2934]) -tensor([-2.3393, -2.3406, -2.3523, -2.3558, -2.3937, -2.3835, -2.3548, -2.3943]) -tensor([0.0224, 0.0216, 0.0240]) -tensor([-0.2737, -0.2600, -0.2688, -0.1792]) -Total rewards: 2964 -Total Receives: 2956 -Stores 2047 1 -Total Receives: 2957 -Stores 2048 1 -Total Receives: 2958 -Stores 2049 1 -Total Receives: 2959 -Stores 2050 1 -Total Receives: 2960 -Stores 2051 1 -Total Receives: 2961 -Stores 2052 1 -Total Receives: 2962 -Stores 2053 1 -Total Receives: 2963 -Stores 2054 1 -Total Receives: 2964 -Stores 2055 1 -tensor([-0.0036, 0.0029, 0.0011, 0.0163, -0.0169]) -tensor([-0.1956, -0.2303, -0.1893, -0.1558]) -tensor([-0.0414, -0.0410, -0.0347, -0.0405]) -tensor([-0.1020, -0.1117, -0.1165]) -tensor([-14.2741, -14.3299, -14.2854, -14.3307, -14.2687, -14.3131, -14.4000, - -14.4000, -14.2818, -14.3978, -14.3850, -14.3065, -14.4026, -14.3486]) -tensor([0.0757, 0.0926]) -tensor([-0.0682, -0.0754, -0.0810]) -tensor([-0.0907, -0.1986, -0.1825, -0.1941, -0.0917]) -tensor([-0.0328, -0.0264, -0.0110, -0.0462]) -tensor([0.0034, 0.0065, 0.0062]) -tensor([-0.7545, -0.7462, -0.7472, -0.7386, -0.7611, -0.7425, -0.7433, -0.7581, - -0.7616, -0.7611]) -tensor([-0.1619, -0.2397, -0.1511, -0.2358]) -tensor([-0.4136, -0.4137, -0.4139, -0.4132, -0.4114, -0.4144, -0.4210, -0.4174, - -0.4132]) -tensor([-0.1505, -0.1772, -0.0853, -0.1080, -0.0830]) -tensor([-0.0753, -0.0708, -0.0809, -0.0769, -0.0619]) -tensor([-0.5001, -0.5051, -0.5105]) -tensor([0.0275, 0.0275, 0.0270]) -tensor([-1.0979, -1.1204, -1.1553, -1.0947, -1.1108, -1.1079, -1.1548, -1.1408, - -1.1458, -1.1596, -1.1542]) -Total rewards: 2970 -Total Receives: 2965 -Stores 2056 1 -Total Receives: 2966 -Stores 2057 1 -Total Receives: 2967 -Stores 2058 1 -Total Receives: 2968 -Stores 2059 1 -Total Receives: 2969 -Stores 2060 1 -Total Receives: 2970 -Stores 2061 1 -tensor([-0.1323, -0.2147, -0.2120, -0.1334]) -Total rewards: 2979 -Total Receives: 2971 -Stores 2062 1 -Total Receives: 2972 -Stores 2063 1 -Total Receives: 2973 -Stores 2064 1 -Total Receives: 2974 -Stores 2065 1 -Total Receives: 2975 -Stores 2066 1 -Total Receives: 2976 -Stores 2067 1 -Total Receives: 2977 -Stores 2068 1 -Total Receives: 2978 -Stores 2069 1 -Total Receives: 2979 -Stores 2070 1 -tensor([-0.1020, -0.1116, -0.0708]) -tensor([0.0025, 0.0107, 0.0042]) -tensor([-13.8583, -13.9098, -13.9850, -13.8698, -13.9142, -13.8960, -13.9985, - -13.9985, -13.8638, -13.9823, -13.9683, -13.9928, -13.8894, -13.9861, - -13.9267, -13.9985]) -Total rewards: 2984 -Total Receives: 2980 -Stores 2071 1 -Total Receives: 2981 -Stores 2072 1 -Total Receives: 2982 -Stores 2073 1 -Total Receives: 2983 -Stores 2074 1 -Total Receives: 2984 -Stores 2075 1 -tensor([-0.0327, -0.0348, -0.0379]) -tensor([-0.0591, -0.0497, -0.0397, -0.0642, -0.0516]) -tensor([0.0345, 0.0342]) -Total rewards: 2994 -Total Receives: 2985 -Stores 2076 1 -Total Receives: 2986 -Stores 2077 1 -Total Receives: 2987 -Stores 2078 1 -Total Receives: 2988 -Stores 2079 1 -Total Receives: 2989 -Stores 2080 1 -Total Receives: 2990 -Stores 2081 1 -Total Receives: 2991 -Stores 2082 1 -Total Receives: 2992 -Stores 2083 1 -Total Receives: 2993 -Stores 2084 1 -Total Receives: 2994 -Stores 2085 1 -tensor([-11.1747, -11.2477, -11.1398, -11.1807, -11.1641, -11.2640, -11.2640, - -11.1346, -11.2437, -11.2302, -11.2539, -11.1582, -11.2467, -11.1891, - -11.2640]) -tensor([-0.0448, -0.0433, -0.0394, -0.0460]) -Total rewards: 3000 -Total Receives: 2995 -Stores 2086 1 -Total Receives: 2996 -Stores 2087 1 -Total Receives: 2997 -Stores 2088 1 -Total Receives: 2998 -Stores 2089 1 -Total Receives: 2999 -Stores 2090 1 -Total Receives: 3000 -Stores 2091 1 -tensor([-0.0801, -0.0756, -0.0817, -0.0700]) -tensor([-0.3694, -0.3680]) -tensor([-0.4142, -0.4129, -0.4130, -0.4148, -0.4104, -0.4240, -0.4215, -0.4101, - -0.4148]) -tensor([0.0193, 0.0175]) -Total rewards: 3008 -Total Receives: 3001 -Stores 2092 1 -Total Receives: 3002 -Stores 2093 1 -Total Receives: 3003 -Stores 2094 1 -Total Receives: 3004 -Stores 2095 1 -Total Receives: 3005 -Stores 2096 1 -Total Receives: 3006 -Stores 2097 1 -Total Receives: 3007 -Stores 2098 1 -Total Receives: 3008 -Stores 2099 1 -tensor([-0.0584, -0.0534, -0.0705, -0.0316]) -tensor([0.0206, 0.0002, 0.0002]) -tensor([-0.1254, -0.1367, -0.1620, -0.1114]) -tensor([-0.1766, -0.0907, -0.2020, -0.1080, -0.1339]) -tensor([-0.0959, -0.1124]) -Total rewards: 3016 -Total Receives: 3009 -Stores 2100 1 -Total Receives: 3010 -Stores 2101 1 -Total Receives: 3011 -Stores 2102 1 -Total Receives: 3012 -Stores 2103 1 -Total Receives: 3013 -Stores 2104 1 -Total Receives: 3014 -Stores 2105 1 -Total Receives: 3015 -Stores 2106 1 -Total Receives: 3016 -Stores 2107 1 -tensor([-0.1254, -0.1256]) -tensor([-0.1013, -0.1048]) -tensor([0.0332, 0.0332, 0.0322, 0.0258]) -tensor([0.0365, 0.0317]) -tensor([-0.0947, -0.1004, -0.0804]) -tensor([-0.2214, -0.1030, -0.2184, -0.1418, -0.1197]) -Total rewards: 3018 -Total Receives: 3017 -Stores 2108 1 -Total Receives: 3018 -Stores 2109 1 -tensor([-0.0781, -0.0641]) -tensor([0.0332, 0.0349, 0.0336]) -tensor([-0.0881, -0.1608, -0.1795, -0.1091, -0.1289]) -tensor([-0.9424, -0.9650, -1.0079, -1.0209, -0.9474, -0.9473, -1.0109, -0.9888, - -0.9913, -1.0204, -1.0259, -1.0039]) -tensor([-25.2839, -25.2197, -25.2651, -25.1451, -25.1940, -25.1754, -25.2934, - -25.2610, -25.2934, -24.5540, -25.2482, -25.2684, -25.1700, -25.2653, - -25.2405, -25.2934]) -tensor([-7.7642, -7.7077, -7.7476, -7.6445, -7.6851, -7.6675, -7.7729, -7.7389, - -7.7729, -7.7327, -7.7601, -7.6639, -7.7477, -7.7253, -7.7729]) -tensor([0.0396, 0.0406]) -tensor([-0.2342, -0.1312, -0.2313, -0.1402]) -Total rewards: 3023 -Total Receives: 3019 -Stores 2110 1 -Total Receives: 3020 -Stores 2111 1 -Total Receives: 3021 -Stores 2112 1 -Total Receives: 3022 -Stores 2113 1 -Total Receives: 3023 -Stores 2114 1 -tensor([-0.1075, -0.1192, -0.0552]) -tensor([-0.1070, -0.1210, -0.1061]) -tensor([-0.1377, -0.1369]) -tensor([-0.9825, -0.9853, -0.9956, -0.9413, -0.9373, -0.9923, -0.9725, -0.9714, - -0.9958, -1.0020, -0.9840]) -tensor([-0.1385, -0.1503]) -tensor([-5.9563, -5.8921, -5.9387, -5.8609, -5.8479, -5.9738, -5.9279, -5.9738, - -5.9688, -5.9219, -5.9503, -5.8412, -5.9369, -5.9129, -5.9738]) -tensor([-0.1553, -0.2305, -0.1018]) -tensor([-0.1242, -0.1167, -0.1254, -0.0927]) -tensor([-0.1744, -0.1971, -0.1281, -0.1487, -0.1063]) -tensor([-1.0787, -1.0813, -1.0907, -1.0384, -1.0306, -1.0907, -1.0684, -1.0666, - -1.0920, -1.0994]) -tensor([-0.7380, -0.7148, -0.7084, -0.7091, -0.7405, -0.7428, -0.7042, -0.7306, - -0.7325, -0.7108]) -Total rewards: 3029 -Total Receives: 3024 -Stores 2115 1 -Total Receives: 3025 -Stores 2116 1 -Total Receives: 3026 -Stores 2117 1 -Total Receives: 3027 -Stores 2118 1 -Total Receives: 3028 -Stores 2119 1 -Total Receives: 3029 -tensor([-0.0688, -0.0693, -0.0595]) -tensor([-0.1967, -0.1992]) -tensor([0.0374, 0.0335]) -tensor([-0.0851, -0.0781, -0.0913]) -tensor([-0.2479, -0.2439, -0.1507, -0.1469]) -tensor([-0.0972, -0.0906, -0.0492, -0.0987]) -tensor([-0.1546, -0.1512]) -tensor([-6.7693, -6.7080, -6.7530, -6.6812, -6.6699, -6.7909, -6.7383, -6.7807, - -6.7381, -6.7658, -6.6654, -6.7529, -6.7303, -6.7909]) -tensor([-5.3242, -5.2575, -5.3062, -5.2284, -5.2161, -5.3468, -5.2915, -5.3361, - -5.2893, -5.3200, -5.2113, -5.2806, -5.3468]) -tensor([0.0107, 0.0120, 0.0125, 0.0121, 0.0112]) -tensor([-0.1137, -0.1096, -0.1279]) -tensor([-0.1090, -0.1210]) -Total rewards: 3036 -Total Receives: 3030 -Total Receives: 3031 -Total Receives: 3032 -Total Receives: 3033 -Total Receives: 3034 -Total Receives: 3035 -Total Receives: 3036 -tensor([0.0288, 0.0279]) -Total rewards: 3042 -Total Receives: 3037 -Total Receives: 3038 -Total Receives: 3039 -Total Receives: 3040 -Total Receives: 3041 -Total Receives: 3042 -tensor([-0.1293, -0.1224, -0.1309]) -tensor([-0.0091, -0.0094, 0.0006]) -tensor([-5.5612, -5.4930, -5.5473, -5.4687, -5.4579, -5.5880, -5.5265, -5.5724, - -5.5542, -5.4508, -5.5181, -5.5880]) -tensor([-0.1399, -0.1503, -0.1339]) -tensor([-0.1116, -0.1068, -0.1085, -0.1071]) -Total rewards: 3046 -Total Receives: 3043 -Total Receives: 3044 -Total Receives: 3045 -Total Receives: 3046 -tensor([-0.1118, -0.1689, -0.2444, -0.1060]) -Total rewards: 3056 -Total Receives: 3047 -Stores 2120 1 -Total Receives: 3048 -Stores 2121 1 -Total Receives: 3049 -Stores 2122 1 -Total Receives: 3050 -Stores 2123 1 -Total Receives: 3051 -Stores 2124 1 -Total Receives: 3052 -Stores 2125 1 -Total Receives: 3053 -Stores 2126 1 -Total Receives: 3054 -Stores 2127 1 -Total Receives: 3055 -Stores 2128 1 -Total Receives: 3056 -Stores 2129 1 -tensor([-0.0653, -0.0263, -0.0723, -0.0275]) -tensor([-2.1920, -2.1685, -2.1502, -2.1522, -2.1963, -2.2092, -2.1298, -2.1859, - -2.1918]) -Total rewards: 3060 -Total Receives: 3057 -Stores 2130 1 -Total Receives: 3058 -Stores 2131 1 -Total Receives: 3059 -Stores 2132 1 -Total Receives: 3060 -Stores 2133 1 -tensor([-0.2082, -0.2121, -0.1329, -0.1728, -0.1194]) -Total rewards: 3066 -Total Receives: 3061 -Total Receives: 3062 -Total Receives: 3063 -Total Receives: 3064 -Total Receives: 3065 -Total Receives: 3066 -tensor([-0.1400, -0.1415]) -tensor([-1.0739, -1.0776, -1.0899, -1.0944, -1.0196, -1.0894, -1.0542, -1.0544, - -1.1111, -1.0906, -1.1144, -1.1123]) -tensor([-5.4209, -5.3603, -5.4096, -5.3362, -5.3239, -5.4501, -5.3887, -5.4314, - -5.4158, -5.3859, -5.4501]) -tensor([0.0257, 0.0228]) -tensor([-0.0786, -0.0622, -0.0757, -0.0751]) -tensor([-0.2997, -0.2969, -0.1668, -0.1454, -0.1509]) -tensor([-1.5656, -1.5410, -1.5223, -1.5246, -1.5705, -1.5871, -1.5042, -1.5671]) -tensor([-0.1506, -0.1683]) -tensor([-0.2315, -0.2362, -0.1667, -0.2033]) -Total rewards: 3073 -Total Receives: 3067 -Stores 2134 1 -Total Receives: 3068 -Stores 2135 1 -Total Receives: 3069 -Stores 2136 1 -Total Receives: 3070 -Stores 2137 1 -Total Receives: 3071 -Stores 2138 1 -Total Receives: 3072 -Stores 2139 1 -Total Receives: 3073 -Stores 2140 1 -tensor([-0.0903, -0.1002, -0.0526]) -tensor([0.0395, 0.0381, 0.0380]) -tensor([0.0401, 0.0402]) -tensor([-0.0179, -0.0185, -0.0179, -0.0180, -0.0165]) -tensor([-0.9283, -0.9173, -0.9182, -0.9194, -0.9333, -0.9475, -0.9404]) -tensor([-0.2440, -0.1571, -0.1413, -0.1461]) -tensor([-0.1549, -0.1666, -0.1511]) -Total rewards: 3077 -Total Receives: 3074 -Stores 2141 1 -Total Receives: 3075 -Stores 2142 1 -Total Receives: 3076 -Stores 2143 1 -Total Receives: 3077 -Stores 2144 1 -tensor([-0.1534, -0.1543, -0.1554]) -Total rewards: 3082 -Total Receives: 3078 -Total Receives: 3079 -Total Receives: 3080 -Total Receives: 3081 -Total Receives: 3082 -tensor([-0.2723, -0.1568, -0.1767]) -tensor([0.0372, 0.0504]) -tensor([-0.1298, -0.1878, -0.3023, -0.1136, -0.1181]) -tensor([-0.0402, -0.0396, -0.0401, -0.0447]) -tensor([0.0399, 0.0365]) -tensor([-0.1802, -0.1893]) -tensor([0.0368, 0.0353]) -tensor([-0.2906, -0.3014, -0.1996, -0.1531]) -tensor([-1.0362, -1.0400, -1.0525, -1.0537, -1.0562, -1.0106, -1.0119, -1.0767, - -1.0811, -1.0530, -1.0952, -1.0782, -1.0952]) -tensor([-0.0495, -0.0508, -0.0639, -0.0637]) -tensor([-5.5556, -5.4815, -5.5368, -5.4595, -5.6042, -5.5637, -5.5675, -5.5472, - -5.5109, -5.6030, -5.5902, -5.6042]) -Total rewards: 3089 -Total Receives: 3083 -Total Receives: 3084 -Total Receives: 3085 -Total Receives: 3086 -Total Receives: 3087 -Total Receives: 3088 -Total Receives: 3089 -tensor([-0.1793, -0.1287, -0.1221, -0.1273, -0.1212, -0.1145, -0.1145]) -tensor([-0.5303, -0.5310, -0.5345, -0.5267, -0.5540, -0.5344, -0.5445, -0.5439, - -0.5363, -0.5550, -0.5455, -0.5550]) -Total rewards: 3092 -Total Receives: 3090 -Stores 2145 1 -Total Receives: 3091 -Stores 2146 1 -Total Receives: 3092 -Stores 2147 1 -tensor([-0.0341, -0.0277]) -tensor([0.0417, 0.0309]) -tensor([-1.4677, -1.4461, -1.4261, -1.4291, -1.4724, -1.4758]) -tensor([-0.2257, -0.1809, -0.1743, -0.1803, -0.1634, -0.1634]) -tensor([-0.3036, -0.3036, -0.3058, -0.3267, -0.3061, -0.3094, -0.3023, -0.3068, - -0.3094, -0.3094, -0.3094]) -tensor([0.0318, 0.0309, 0.0303]) -tensor([-0.2224, -0.1520, -0.1508, -0.1276, -0.1276]) -Total rewards: 3099 -Total Receives: 3093 -Stores 2148 1 -Total Receives: 3094 -Stores 2149 1 -Total Receives: 3095 -Stores 2150 1 -Total Receives: 3096 -Stores 2151 1 -Total Receives: 3097 -Stores 2152 1 -Total Receives: 3098 -Stores 2153 1 -Total Receives: 3099 -Stores 2154 1 -tensor([-3.9616, -3.9028, -3.9452, -4.0020, -3.9681, -3.9710, -3.9553, -3.9995, - -3.9281, -4.0003, -3.9855, -4.0020]) -tensor([-0.1205, -0.0299, -0.0600]) -tensor([-0.0997, -0.0892, -0.0885, -0.0855]) -Total rewards: 3105 -Total Receives: 3100 -Total Receives: 3101 -Total Receives: 3102 -Total Receives: 3103 -Total Receives: 3104 -Total Receives: 3105 -tensor([0.0560, 0.0360]) -Total rewards: 3111 -Total Receives: 3106 -Total Receives: 3107 -Total Receives: 3108 -Total Receives: 3109 -Total Receives: 3110 -Total Receives: 3111 -tensor([-0.1764, -0.2511, -0.3368, -0.1641, -0.1386]) -tensor([-0.3621, -0.1961, -0.2157]) -tensor([-0.1108, -0.1240, -0.1256]) -tensor([0.0057, 0.0062]) -tensor([-0.3166, -0.2279, -0.1577, -0.1502]) -tensor([-0.1564, -0.1206]) -tensor([-0.0136, -0.0141]) -tensor([-0.0558, -0.0558, -0.0538, -0.0658]) -tensor([-0.1380, -0.3075, -0.1000, -0.1265, -0.0990]) -tensor([-0.0813, -0.0718]) -tensor([-0.0389, -0.0309, -0.0239, -0.0714, -0.0371]) -tensor([-2.7388, -2.6807, -2.7576, -2.7333, -2.7878, -2.7448, -2.7476, -2.7851, - -2.7322, -2.7762, -2.7074, -2.7714, -2.7580]) -tensor([0.0173, 0.0181]) -tensor([-0.0433, -0.0349, -0.0500, -0.0449]) -Total rewards: 3117 -Total Receives: 3112 -Stores 2155 1 -Total Receives: 3113 -Stores 2156 1 -Total Receives: 3114 -Stores 2157 1 -Total Receives: 3115 -Stores 2158 1 -Total Receives: 3116 -Stores 2159 1 -Total Receives: 3117 -Stores 2160 1 -Total rewards: 3122 -Total Receives: 3118 -Stores 2161 1 -Total Receives: 3119 -Stores 2162 1 -Total Receives: 3120 -Stores 2163 1 -Total Receives: 3121 -Stores 2164 1 -Total Receives: 3122 -Stores 2165 1 -tensor([-0.6374, -0.6134, -0.6222]) -tensor([-1.1736, -1.1509, -1.1922, -1.2142, -1.1331, -1.1791, -1.2176, -1.1876, - -1.2180]) -tensor([0.0379, 0.0359]) -Total rewards: 3128 -Total Receives: 3123 -Stores 2166 1 -Total Receives: 3124 -Stores 2167 1 -Total Receives: 3125 -Stores 2168 1 -Total Receives: 3126 -Stores 2169 1 -Total Receives: 3127 -Stores 2170 1 -Total Receives: 3128 -Stores 2171 1 -tensor([-0.4526, -0.4525, -0.4538, -0.4789, -0.4551, -0.4561, -0.4549, -0.4573, - -0.4567, -0.4573]) -tensor([-0.0701, -0.1477, -0.0922, -0.0599, -0.0940, -0.0591]) -tensor([0.0339, 0.0275]) -tensor([-0.0527, -0.0443]) -tensor([-0.0814, -0.0812, -0.0934]) -Total rewards: 3135 -Total Receives: 3129 -Stores 2172 1 -Total Receives: 3130 -Stores 2173 1 -Total Receives: 3131 -Stores 2174 1 -Total Receives: 3132 -Stores 2175 1 -Total Receives: 3133 -Stores 2176 1 -Total Receives: 3134 -Stores 2177 1 -Total Receives: 3135 -Stores 2178 1 -tensor([-0.3151, -0.1376, -0.1879, -0.1351, -0.1327]) -tensor([0.0338, 0.0331, 0.0316, 0.0320]) -Total rewards: 3140 -Total Receives: 3136 -Stores 2179 1 -Total Receives: 3137 -Stores 2180 1 -Total Receives: 3138 -Stores 2181 1 -Total Receives: 3139 -Stores 2182 1 -Total Receives: 3140 -Stores 2183 1 -tensor([-0.1046, -0.0986, -0.1289, -0.1046]) -tensor([-0.0853, -0.1025, -0.1178]) -tensor([-0.1311, -0.2603, -0.1920, -0.1380]) -tensor([-0.1584, -0.2768, -0.1137, -0.1212, -0.1468]) -tensor([-0.1485, -0.1404, -0.0927]) -tensor([-3.6267, -3.6778, -3.6100, -3.6895, -3.6329, -3.6359, -3.6732, -3.6184, - -3.6651, -3.5908, -3.6568, -3.6421, -3.6871, -3.6805]) -tensor([0.0299, 0.0295]) -Total rewards: 3146 -Total Receives: 3141 -Total Receives: 3142 -Total Receives: 3143 -Total Receives: 3144 -Total Receives: 3145 -Total Receives: 3146 -tensor([-1.2515, -1.2329, -1.2610, -1.2848, -1.2564, -1.2895, -1.2715, -1.2944]) -tensor([-0.0295, -0.0301]) -tensor([-0.5352, -0.5353, -0.5369, -0.5392, -0.5387, -0.5391, -0.5381, -0.5404, - -0.5381]) -tensor([-0.1578, -0.2870, -0.1660]) -tensor([-0.1045, -0.0580, -0.0546]) -tensor([-0.0177, -0.0462, -0.0653, -0.0307]) -tensor([0.0322, 0.0310, 0.0322]) -tensor([-0.0269, -0.0206]) -tensor([-0.1075, -0.1792, -0.1266, -0.1015, -0.0894, -0.1304, -0.0957]) -tensor([-0.0887, -0.0456, -0.0947, -0.0391]) -Total rewards: 3155 -Total Receives: 3147 -Stores 2184 1 -Total Receives: 3148 -Stores 2185 1 -Total Receives: 3149 -Stores 2186 1 -Total Receives: 3150 -Stores 2187 1 -Total Receives: 3151 -Stores 2188 1 -Total Receives: 3152 -Stores 2189 1 -Total Receives: 3153 -Stores 2190 1 -Total Receives: 3154 -Stores 2191 1 -Total Receives: 3155 -Stores 2192 1 -tensor([-0.7214, -0.7163, -0.7378, -0.7499, -0.7262, -0.7425, -0.7556, -0.7480]) -tensor([-1.5531, -1.5512]) -tensor([-3.6093, -3.6560, -3.5903, -3.6759, -3.6158, -3.6187, -3.6560, -3.6013, - -3.6482, -3.6327, -3.6230, -3.6672, -3.6597]) -Total rewards: 3163 -Total Receives: 3156 -Stores 2193 1 -Total Receives: 3157 -Stores 2194 1 -Total Receives: 3158 -Stores 2195 1 -Total Receives: 3159 -Stores 2196 1 -Total Receives: 3160 -Stores 2197 1 -Total Receives: 3161 -Stores 2198 1 -Total Receives: 3162 -Stores 2199 1 -Total Receives: 3163 -Stores 2200 1 -tensor([-0.4286, -0.4505, -0.4554, -0.4486, -0.4544, -0.4780, -0.4599]) -tensor([-0.3190, -0.1898, -0.2293, -0.1890]) -tensor([-0.9725, -0.9465, -0.9752, -0.9777, -0.9985, -0.9782, -1.0002, -0.9820, - -1.0002]) -tensor([0.0355, 0.0338]) -Total rewards: 3171 -Total Receives: 3164 -Stores 2201 1 -Total Receives: 3165 -Stores 2202 1 -Total Receives: 3166 -Stores 2203 1 -Total Receives: 3167 -Total Receives: 3168 -Total Receives: 3169 -Total Receives: 3170 -Total Receives: 3171 -tensor([-0.2342, -0.1021, -0.1256, -0.0985]) -tensor([0.0359, 0.0351]) -tensor([-0.0743, -0.0754]) -tensor([-0.5729, -0.5741, -0.5769, -0.5765, -0.5763, -0.5747, -0.5781, -0.5747]) -tensor([-0.1017, -0.0582, -0.1092]) -tensor([-0.3320, -0.2054, -0.2460]) -Total rewards: 3179 -Total Receives: 3172 -Stores 2204 1 -Total Receives: 3173 -Stores 2205 1 -Total Receives: 3174 -Stores 2206 1 -Total Receives: 3175 -Stores 2207 1 -Total Receives: 3176 -Stores 2208 1 -Total Receives: 3177 -Stores 2209 1 -Total Receives: 3178 -Stores 2210 1 -Total Receives: 3179 -Stores 2211 1 -tensor([-0.0214, -0.0374, -0.0505, -0.0222]) -tensor([-0.0875, -0.0661, -0.0835, -0.0913]) -tensor([0.0186, 0.0185]) -tensor([-1.3181, -1.3165, -1.3163]) -tensor([-0.2074, -0.3615, -0.1641, -0.1714]) -tensor([-0.8829, -0.8928, -0.8658, -0.8860, -0.8972, -0.8946]) -tensor([0.0395, 0.0371]) -Total rewards: 3184 -Total Receives: 3180 -Stores 2212 1 -Total Receives: 3181 -Stores 2213 1 -Total Receives: 3182 -Stores 2214 1 -Total Receives: 3183 -Stores 2215 1 -Total Receives: 3184 -Stores 2216 1 -tensor([-0.0229, -0.0197, -0.0212, -0.0217, -0.0188]) -tensor([-0.0011, -0.0028, -0.0068, -0.0053]) -tensor([-0.8203, -0.8210, -0.8273]) -tensor([-0.0709, -0.0819]) -Total rewards: 3191 -Total Receives: 3185 -Stores 2217 1 -Total Receives: 3186 -Stores 2218 1 -Total Receives: 3187 -Stores 2219 1 -Total Receives: 3188 -Stores 2220 1 -Total Receives: 3189 -Stores 2221 1 -Total Receives: 3190 -Stores 2222 1 -Total Receives: 3191 -Stores 2223 1 -tensor([-0.4545, -0.4521, -0.4581]) -tensor([-0.0042, -0.0110, -0.0089]) -tensor([0.0385, 0.0365]) -tensor([ 0.0008, -0.0001]) -tensor([-0.1298, -0.1934, -0.1447, -0.1238, -0.1127, -0.1519, -0.0981]) -Total rewards: 3198 -Total Receives: 3192 -Stores 2224 1 -Total Receives: 3193 -Stores 2225 1 -Total Receives: 3194 -Stores 2226 1 -Total Receives: 3195 -Stores 2227 1 -Total Receives: 3196 -Stores 2228 1 -Total Receives: 3197 -Stores 2229 1 -Total Receives: 3198 -Stores 2230 1 -tensor([-8.4816, -8.5179, -8.5807, -8.4894, -8.4932, -8.5420, -8.4676, -8.5302, - -8.5097, -8.4982, -8.5567, -8.5432, -8.5691]) -tensor([-0.3379, -0.1849, -0.2492]) -tensor([-0.2194, -0.3364, -0.1842]) -tensor([-0.0207, -0.0213]) -tensor([-0.2918, -0.2945, -0.2983]) -Total rewards: 3208 -Total Receives: 3199 -Stores 2231 1 -Total Receives: 3200 -Stores 2232 1 -Total Receives: 3201 -Stores 2233 1 -Total Receives: 3202 -Stores 2234 1 -Total Receives: 3203 -Stores 2235 1 -Total Receives: 3204 -Stores 2236 1 -Total Receives: 3205 -Stores 2237 1 -Total Receives: 3206 -Stores 2238 1 -Total Receives: 3207 -Stores 2239 1 -Total Receives: 3208 -Stores 2240 1 -tensor([-0.5704, -0.5717, -0.5758, -0.5724, -0.5753, -0.5744, -0.5763, -0.5753, - -0.5753]) -tensor([-0.1468, -0.1120, -0.1221, -0.1068]) -tensor([-0.1640, -0.1565, -0.1726, -0.1155]) -Total rewards: 3211 -Total Receives: 3209 -Stores 2241 1 -Total Receives: 3210 -Stores 2242 1 -Total Receives: 3211 -Stores 2243 1 -tensor([-0.0520, -0.0308, -0.0529, -0.0468]) -tensor([-0.0441, -0.0624]) -tensor([0.0361, 0.0356]) -tensor([-1.0861, -1.0921, -1.0884, -1.1153, -1.1001]) -tensor([0.0342, 0.0319]) -tensor([-0.1808, -0.1509, -0.2370, -0.1971, -0.1755, -0.1671, -0.2028]) -tensor([-0.3320, -0.3574]) -tensor([0.0184, 0.0202]) -tensor([-0.2625, -0.2464, -0.1984]) -tensor([-0.1299, -0.1204, -0.0766, -0.1365]) -tensor([0.0397, 0.0410]) -tensor([-0.0584, -0.0610, -0.0546]) -tensor([-0.2621, -0.4178, -0.1731, -0.1735]) -tensor([-0.3242, -0.3261, -0.3357]) -tensor([-0.1294, -0.1197]) -tensor([-9.3756, -9.4150, -9.5039, -9.3840, -9.3881, -9.4423, -9.4688, -9.4876, - -9.4279, -9.4027, -9.3899, -9.4582, -9.4374, -9.4656, -9.5039]) -tensor([0.0232, 0.0264, 0.0224]) -tensor([-0.0492, -0.0474, -0.0420, -0.0400, -0.0407]) -tensor([-0.1017, -0.1183, -0.1002]) -Total rewards: 3218 -Total Receives: 3212 -Total Receives: 3213 -Total Receives: 3214 -Total Receives: 3215 -Total Receives: 3216 -Total Receives: 3217 -Total Receives: 3218 -tensor([-0.0032, -0.0051, 0.0018, 0.0021]) -tensor([-0.3785, -0.3140, -0.3814, -0.2988, -0.3545, -0.3098, -0.2988, -0.2988]) -tensor([-0.0421, -0.0572, -0.0407]) -tensor([-0.1813, -0.1645, -0.1664, -0.1986]) -tensor([-0.3712, -0.2000, -0.2312, -0.1605]) -tensor([-0.1124, -0.1916, -0.1351, -0.0622, -0.0635, -0.1037, -0.0841, -0.1445]) -tensor([0.0365, 0.0333]) -tensor([0.0424, 0.0421]) -Total rewards: 3225 -Total Receives: 3219 -Stores 2244 1 -Total Receives: 3220 -Stores 2245 1 -Total Receives: 3221 -Stores 2246 1 -Total Receives: 3222 -Stores 2247 1 -Total Receives: 3223 -Stores 2248 1 -Total Receives: 3224 -Stores 2249 1 -Total Receives: 3225 -Stores 2250 1 -tensor([-0.3859, -0.1366, -0.1468, -0.1471]) -tensor([-0.0728, -0.0573]) -tensor([-0.1929, -0.1845, -0.1845]) -Total rewards: 3234 -Total Receives: 3226 -Stores 2251 1 -Total Receives: 3227 -Stores 2252 1 -Total Receives: 3228 -Stores 2253 1 -Total Receives: 3229 -Stores 2254 1 -Total Receives: 3230 -Stores 2255 1 -Total Receives: 3231 -Stores 2256 1 -Total Receives: 3232 -Stores 2257 1 -Total Receives: 3233 -Stores 2258 1 -Total Receives: 3234 -Stores 2259 1 -tensor([-0.9083, -0.9255, -0.9042, -0.9342, -0.9211, -0.9360, -0.9329]) -tensor([0.0405, 0.0410]) -Total rewards: 3240 -Total Receives: 3235 -Stores 2260 1 -Total Receives: 3236 -Stores 2261 1 -Total Receives: 3237 -Stores 2262 1 -Total Receives: 3238 -Stores 2263 1 -Total Receives: 3239 -Stores 2264 1 -Total Receives: 3240 -Stores 2265 1 -tensor([-0.1360, -0.2096, -0.1575, -0.0901, -0.1261, -0.1078, -0.1668]) -tensor([0.0481, 0.0267, 0.0222]) -tensor([-0.1037, -0.0969]) -Total rewards: 3250 -Total Receives: 3241 -Stores 2266 1 -Total Receives: 3242 -Stores 2267 1 -Total Receives: 3243 -Stores 2268 1 -Total Receives: 3244 -Stores 2269 1 -Total Receives: 3245 -Stores 2270 1 -Total Receives: 3246 -Stores 2271 1 -Total Receives: 3247 -Stores 2272 1 -Total Receives: 3248 -Stores 2273 1 -Total Receives: 3249 -Stores 2274 1 -Total Receives: 3250 -Stores 2275 1 -Total rewards: 3257 -Total Receives: 3251 -Stores 2276 1 -Total Receives: 3252 -Stores 2277 1 -Total Receives: 3253 -Stores 2278 1 -Total Receives: 3254 -Stores 2279 1 -Total Receives: 3255 -Stores 2280 1 -Total Receives: 3256 -Stores 2281 1 -Total Receives: 3257 -Stores 2282 1 -tensor([-0.0419, -0.0433, -0.0466, -0.0502]) -tensor([-0.1902, -0.1413]) -tensor([-0.2375, -0.2311, -0.2788]) -tensor([-0.2498, -0.2425]) -tensor([-0.0893, -0.0976, -0.1091, -0.0951, -0.0849]) -Total rewards: 3262 -Total Receives: 3258 -Stores 2283 1 -Total Receives: 3259 -Stores 2284 1 -Total Receives: 3260 -Stores 2285 1 -Total Receives: 3261 -Stores 2286 1 -Total Receives: 3262 -Stores 2287 1 -Total rewards: 3271 -Total Receives: 3263 -Stores 2288 1 -Total Receives: 3264 -Stores 2289 1 -Total Receives: 3265 -Stores 2290 1 -Total Receives: 3266 -Stores 2291 1 -Total Receives: 3267 -Stores 2292 1 -Total Receives: 3268 -Stores 2293 1 -Total Receives: 3269 -Stores 2294 1 -Total Receives: 3270 -Stores 2295 1 -Total Receives: 3271 -Stores 2296 1 -tensor([-0.3847, -0.1742, -0.1747]) -Total rewards: 3280 -Total Receives: 3272 -Stores 2297 1 -Total Receives: 3273 -Stores 2298 1 -Total Receives: 3274 -Stores 2299 1 -Total Receives: 3275 -Stores 2300 1 -Total Receives: 3276 -Stores 2301 1 -Total Receives: 3277 -Stores 2302 1 -Total Receives: 3278 -Stores 2303 1 -Total Receives: 3279 -Stores 2304 1 -Total Receives: 3280 -Stores 2305 1 -tensor([-0.1043, -0.1008]) -tensor([0.0313, 0.0325]) -tensor([-0.1903, -0.2449, -0.1985, -0.1845, -0.1649, -0.2129]) -tensor([-0.2989, -0.2489, -0.3017, -0.2039, -0.2834, -0.2443, -0.2039]) -tensor([-8.7700, -8.7945, -8.8960, -8.7778, -8.7815, -8.8281, -8.8476, -8.8661, - -8.8107, -8.8116, -8.7732, -8.8416, -8.8797, -8.8380, -8.8960]) -tensor([-0.3118, -0.1563, -0.1046, -0.1175, -0.1103]) -tensor([0.0287, 0.0301]) -tensor([0.0413, 0.0406]) -tensor([-0.3276, -0.1717, -0.1337, -0.1249]) -tensor([-0.1069, -0.1274]) -tensor([-0.5678, -0.5708, -0.5684, -0.5875, -0.5743, -0.5737]) -tensor([-0.1085, -0.0825, -0.0789]) -tensor([0.0525, 0.0435]) -tensor([-0.1072, -0.0824]) -Total rewards: 3285 -Total Receives: 3281 -Stores 2306 1 -Total Receives: 3282 -Stores 2307 1 -Total Receives: 3283 -Stores 2308 1 -Total Receives: 3284 -Stores 2309 1 -Total Receives: 3285 -Stores 2310 1 -tensor([0.0331, 0.0329]) -tensor([-0.3260, -0.3223, -0.3567, -0.3238, -0.3239]) -tensor([-0.0836, -0.0867, -0.0919, -0.0616, -0.0574]) -tensor([-0.1620, -0.2320, -0.1706, -0.1535, -0.1925, -0.0904, -0.0904]) -Total rewards: 3288 -Total Receives: 3286 -Stores 2311 1 -Total Receives: 3287 -Stores 2312 1 -Total Receives: 3288 -Stores 2313 1 -tensor([-0.0378, -0.0480, -0.0573, -0.0656, -0.0548, -0.0425]) -tensor([-0.1762, -0.1600, -0.1616]) -tensor([-0.0232, -0.0435, -0.0428, -0.0476, -0.0262]) -tensor([-0.1336, -0.1487]) -tensor([-0.1692, -0.2259, -0.1652, -0.1986, -0.1082, -0.1082]) -tensor([-0.0547, -0.0696]) -tensor([0.0270, 0.0313, 0.0283]) -Total rewards: 3296 -Total Receives: 3289 -Stores 2314 1 -Total Receives: 3290 -Stores 2315 1 -Total Receives: 3291 -Stores 2316 1 -Total Receives: 3292 -Stores 2317 1 -Total Receives: 3293 -Stores 2318 1 -Total Receives: 3294 -Stores 2319 1 -Total Receives: 3295 -Stores 2320 1 -Total Receives: 3296 -Stores 2321 1 -tensor([-0.7625, -0.4389, -0.4054]) -Total rewards: 3305 -Total Receives: 3297 -Stores 2322 1 -Total Receives: 3298 -Stores 2323 1 -Total Receives: 3299 -Stores 2324 1 -Total Receives: 3300 -Stores 2325 1 -Total Receives: 3301 -Stores 2326 1 -Total Receives: 3302 -Stores 2327 1 -Total Receives: 3303 -Stores 2328 1 -Total Receives: 3304 -Stores 2329 1 -Total Receives: 3305 -Stores 2330 1 -tensor([-13.5604, -13.6422, -13.5126, -13.5161, -13.5596, -13.5778, -13.6000, - -13.5398, -13.5440, -13.5015, -13.5739, -13.6300, -13.5679, -13.6422, - -13.6286]) -tensor([-0.2908, -0.2485, -0.2005, -0.2082, -0.2762, -0.2431, -0.2005]) -tensor([-0.0967, -0.0574]) -tensor([-0.0901, -0.0896, -0.0942, -0.0710]) -tensor([-0.1092, -0.0864, -0.0691]) -tensor([-0.1957, -0.1680, -0.1759]) -tensor([-0.1312, -0.1121]) -tensor([0.0263, 0.0263]) -tensor([-0.3569, -0.2046, -0.1481, -0.1563]) -tensor([0.0248, 0.0216]) -Total rewards: 3314 -Total Receives: 3306 -Stores 2331 1 -Total Receives: 3307 -Stores 2332 1 -Total Receives: 3308 -Stores 2333 1 -Total Receives: 3309 -Stores 2334 1 -Total Receives: 3310 -Stores 2335 1 -Total Receives: 3311 -Stores 2336 1 -Total Receives: 3312 -Total Receives: 3313 -Total Receives: 3314 -tensor([0.0398, 0.0419]) -tensor([-0.1697, -0.1486]) -tensor([-0.2967, -0.3076, -0.2669, -0.3431, -0.3000, -0.3005]) -tensor([-0.1678, -0.1745, -0.1817]) -tensor([-12.0309, -12.1308, -11.9728, -11.9769, -12.0294, -12.0428, -12.0733, - -12.0050, -12.0103, -12.0450, -12.1061, -12.0376, -12.1308, -12.1211]) -tensor([-0.0549, -0.0606, -0.0638]) -tensor([-0.1558, -0.2347, -0.1471, -0.0778, -0.0842, -0.0768, -0.0768]) -tensor([-0.1063, -0.1132, -0.1230, -0.1209, -0.1110]) -tensor([-0.3929, -0.2906, -0.3223, -0.2902, -0.2925, -0.3697, -0.2924, -0.3165]) -tensor([0.0306, 0.0322]) -tensor([-0.4187, -0.1309, -0.1305, -0.1380, -0.1889, -0.1286]) -Total rewards: 3322 -Total Receives: 3315 -Total Receives: 3316 -Total Receives: 3317 -Total Receives: 3318 -Total Receives: 3319 -Total Receives: 3320 -Total Receives: 3321 -Total Receives: 3322 -tensor([0.0348, 0.0344, 0.0348]) -tensor([-0.0604, -0.0582, -0.0800, -0.0884, -0.0787, -0.0563]) -tensor([-0.7101, -0.6799, -0.7373, -0.7179, -0.7163]) -tensor([-0.4120, -0.4512, -0.4060, -0.4053]) -tensor([-9.3814, -9.4775, -9.3335, -9.3805, -9.3852, -9.4188, -9.3550, -9.3643, - -9.3941, -9.4474, -9.3813, -9.4747, -9.4775, -9.4633]) -Total rewards: 3326 -Total Receives: 3323 -Stores 2337 1 -Total Receives: 3324 -Stores 2338 1 -Total Receives: 3325 -Stores 2339 1 -Total Receives: 3326 -Stores 2340 1 -tensor([0.0401, 0.0394]) -tensor([-0.2976, -0.1852, -0.1251, -0.1322, -0.1408]) -tensor([-0.1009, -0.1038, -0.0748]) -tensor([-0.0778, -0.0791]) -Total rewards: 3332 -Total Receives: 3327 -Stores 2341 1 -Total Receives: 3328 -Stores 2342 1 -Total Receives: 3329 -Stores 2343 1 -Total Receives: 3330 -Stores 2344 1 -Total Receives: 3331 -Stores 2345 1 -Total Receives: 3332 -Stores 2346 1 -tensor([-0.0541, -0.0335, -0.0622, -0.0238, -0.0689, -0.0579, -0.0326]) -tensor([-0.3110, -0.3464, -0.2744]) -tensor([-0.1982, -0.2548, -0.1936, -0.1351, -0.1403, -0.1209]) -tensor([-0.3332, -0.3422]) -tensor([-0.0225, -0.0221, -0.0206]) -tensor([-0.4842, -0.1981, -0.1984, -0.2071, -0.2389, -0.1993]) -tensor([0.0425, 0.0430]) -tensor([-0.2894, -0.2464, -0.1764, -0.2085, -0.2779, -0.1819, -0.1980, -0.2371]) -tensor([-10.6112, -10.7243, -10.5640, -10.6104, -10.6140, -10.6508, -10.5811, - -10.5939, -10.6245, -10.6845, -10.6097, -10.7242, -10.7018]) -tensor([-0.0301, -0.0402, -0.0466, -0.0432, -0.0808, -0.0924, -0.0713]) -Total rewards: 3337 -Total Receives: 3333 -Stores 2347 1 -Total Receives: 3334 -Stores 2348 1 -Total Receives: 3335 -Stores 2349 1 -Total Receives: 3336 -Stores 2350 1 -Total Receives: 3337 -Stores 2351 1 -tensor([-0.3858, -0.2355, -0.1820, -0.1907]) -tensor([0.0247, 0.0253]) -tensor([-0.1144, -0.1171, -0.0882, -0.0841]) -tensor([-0.1962, -0.2791, -0.1317, -0.1342, -0.1064]) -Total rewards: 3345 -Total Receives: 3338 -Stores 2352 1 -Total Receives: 3339 -Stores 2353 1 -Total Receives: 3340 -Stores 2354 1 -Total Receives: 3341 -Stores 2355 1 -Total Receives: 3342 -Stores 2356 1 -Total Receives: 3343 -Stores 2357 1 -Total Receives: 3344 -Stores 2358 1 -Total Receives: 3345 -Stores 2359 1 -tensor([-8.5598, -8.6776, -8.5083, -8.5670, -8.5753, -8.5482, -8.5405, -8.5738, - -8.6332, -8.5548, -8.6776, -8.6542]) -tensor([-0.0591, -0.0632]) -tensor([-0.1470, -0.1546]) -tensor([-0.0630, -0.0652, -0.0374, -0.0435]) -tensor([-0.0678, -0.0747, -0.0709, -0.1054, -0.1181, -0.0981, -0.0352]) -tensor([-0.1732, -0.1861, -0.2685, -0.3024]) -tensor([0.0326, 0.0223]) -tensor([0.0441, 0.0376]) -tensor([-0.5518, -0.5532, -0.5502]) -tensor([-0.1156, -0.1242]) -tensor([-0.4550, -0.3870, -0.3550, -0.4356, -0.3456, -0.3531, -0.3746]) -tensor([-0.0267, -0.0242]) -Stores 1340 1 -Total Receives: 1975 -Stores 1341 1 -Total Receives: 1976 -Stores 1342 1 -Total rewards: 1981 -Total Receives: 1977 -Stores 1343 1 -Total Receives: 1978 -Stores 1344 1 -Total Receives: 1979 -Stores 1345 1 -Total Receives: 1980 -Stores 1346 1 -Total Receives: 1981 -Stores 1347 1 -Total rewards: 1989 -Total Receives: 1982 -Stores 1348 1 -Total Receives: 1983 -Stores 1349 1 -Total Receives: 1984 -Stores 1350 1 -Total Receives: 1985 -Stores 1351 1 -Total Receives: 1986 -Stores 1352 1 -Total Receives: 1987 -Stores 1353 1 -Total Receives: 1988 -Stores 1354 1 -Total Receives: 1989 -Stores 1355 1 -Total rewards: 1996 -Total Receives: 1990 -Stores 1356 1 -Total Receives: 1991 -Stores 1357 1 -Total Receives: 1992 -Stores 1358 1 -Total Receives: 1993 -Stores 1359 1 -Total Receives: 1994 -Stores 1360 1 -Total Receives: 1995 -Stores 1361 1 -Total Receives: 1996 -Stores 1362 1 -Total rewards: 1998 -Total Receives: 1997 -Stores 1363 1 -Total Receives: 1998 -Stores 1364 1 -Total rewards: 2007 -Total Receives: 1999 -Stores 1365 1 -Total Receives: 2000 -Stores 1366 1 -Total Receives: 2001 -Stores 1367 1 -Total Receives: 2002 -Stores 1368 1 -Total Receives: 2003 -Stores 1369 1 -Total Receives: 2004 -Stores 1370 1 -Total Receives: 2005 -Stores 1371 1 -Total Receives: 2006 -Stores 1372 1 -Total Receives: 2007 -Stores 1373 1 -Total rewards: 2015 -Total Receives: 2008 -Stores 1374 1 -Total Receives: 2009 -Stores 1375 1 -Total Receives: 2010 -Stores 1376 1 -Total Receives: 2011 -Stores 1377 1 -Total Receives: 2012 -Stores 1378 1 -Total Receives: 2013 -Stores 1379 1 -Total Receives: 2014 -Stores 1380 1 -Total Receives: 2015 -Total rewards: 2024 -Total Receives: 2016 -Total Receives: 2017 -Total Receives: 2018 -Total Receives: 2019 -Total Receives: 2020 -Total Receives: 2021 -Total Receives: 2022 -Total Receives: 2023 -Total Receives: 2024 -Total rewards: 2029 -Total Receives: 2025 -Stores 1381 1 -Total Receives: 2026 -Stores 1382 1 -Total Receives: 2027 -Stores 1383 1 -Total Receives: 2028 -Stores 1384 1 -Total Receives: 2029 -Stores 1385 1 -Total rewards: 2036 -Total Receives: 2030 -Stores 1386 1 -Total Receives: 2031 -Stores 1387 1 -Total Receives: 2032 -Stores 1388 1 -Total Receives: 2033 -Stores 1389 1 -Total Receives: 2034 -Stores 1390 1 -Total Receives: 2035 -Stores 1391 1 -Total Receives: 2036 -Stores 1392 1 -Total rewards: 2042 -Total Receives: 2037 -Total Receives: 2038 -Total Receives: 2039 -Total Receives: 2040 -Total Receives: 2041 -Total Receives: 2042 -Total rewards: 2044 -Total Receives: 2043 -Stores 1393 1 -Total Receives: 2044 -Stores 1394 1 -Total rewards: 2051 -Total Receives: 2045 -Total Receives: 2046 -Total Receives: 2047 -Total Receives: 2048 -Total Receives: 2049 -Total Receives: 2050 -Total Receives: 2051 -Total rewards: 2058 -Total Receives: 2052 -Total Receives: 2053 -Total Receives: 2054 -Total Receives: 2055 -Total Receives: 2056 -Total Receives: 2057 -Total Receives: 2058 -Total rewards: 2064 -Total Receives: 2059 -Stores 1395 1 -Total Receives: 2060 -Stores 1396 1 -Total Receives: 2061 -Stores 1397 1 -Total Receives: 2062 -Stores 1398 1 -Total Receives: 2063 -Stores 1399 1 -Total Receives: 2064 -Stores 1400 1 -Total rewards: 2068 -Total Receives: 2065 -Stores 1401 1 -Total Receives: 2066 -Stores 1402 1 -Total Receives: 2067 -Stores 1403 1 -Total Receives: 2068 -Stores 1404 1 -Total rewards: 2071 -Total Receives: 2069 -Stores 1405 1 -Total Receives: 2070 -Stores 1406 1 -Total Receives: 2071 -Stores 1407 1 -Total rewards: 2078 -Total Receives: 2072 -Stores 1408 1 -Total Receives: 2073 -Stores 1409 1 -Total Receives: 2074 -Stores 1410 1 -Total Receives: 2075 -Stores 1411 1 -Total Receives: 2076 -Stores 1412 1 -Total Receives: 2077 -Stores 1413 1 -Total Receives: 2078 -Stores 1414 1 -Total rewards: 2081 -Total Receives: 2079 -Stores 1415 1 -Total Receives: 2080 -Stores 1416 1 -Total Receives: 2081 -Stores 1417 1 -Total rewards: 2089 -Total Receives: 2082 -Stores 1418 1 -Total Receives: 2083 -Stores 1419 1 -Total Receives: 2084 -Stores 1420 1 -Total Receives: 2085 -Stores 1421 1 -Total Receives: 2086 -Stores 1422 1 -Total Receives: 2087 -Stores 1423 1 -Total Receives: 2088 -Stores 1424 1 -Total Receives: 2089 -Stores 1425 1 -Total rewards: 2097 -Total Receives: 2090 -Stores 1426 1 -Total Receives: 2091 -Stores 1427 1 -Total Receives: 2092 -Stores 1428 1 -Total Receives: 2093 -Stores 1429 1 -Total Receives: 2094 -Stores 1430 1 -Total Receives: 2095 -Stores 1431 1 -Total Receives: 2096 -Stores 1432 1 -Total Receives: 2097 -Stores 1433 1 -Total rewards: 2106 -Total Receives: 2098 -Stores 1434 1 -Total Receives: 2099 -Stores 1435 1 -Total Receives: 2100 -Stores 1436 1 -Total Receives: 2101 -Stores 1437 1 -Total Receives: 2102 -Stores 1438 1 -Total Receives: 2103 -Stores 1439 1 -Total Receives: 2104 -Stores 1440 1 -Total Receives: 2105 -Stores 1441 1 -Total Receives: 2106 -Stores 1442 1 -Total rewards: 2113 -Total Receives: 2107 -Stores 1443 1 -Total Receives: 2108 -Stores 1444 1 -Total Receives: 2109 -Stores 1445 1 -Total Receives: 2110 -Stores 1446 1 -Total Receives: 2111 -Total Receives: 2112 -Total Receives: 2113 -Total rewards: 2121 -Total Receives: 2114 -Total Receives: 2115 -Total Receives: 2116 -Total Receives: 2117 -Total Receives: 2118 -Total Receives: 2119 -Total Receives: 2120 -Total Receives: 2121 -Total rewards: 2122 -Total Receives: 2122 -Stores 1447 1 -Total rewards: 2130 -Total Receives: 2123 -Total Receives: 2124 -Total Receives: 2125 -Total Receives: 2126 -Total Receives: 2127 -Total Receives: 2128 -Total Receives: 2129 -Total Receives: 2130 -Total rewards: 2135 -Total Receives: 2131 -Stores 1448 1 -Total Receives: 2132 -Stores 1449 1 -Total Receives: 2133 -Stores 1450 1 -Total Receives: 2134 -Stores 1451 1 -Total Receives: 2135 -Stores 1452 1 -Total rewards: 2142 -Total Receives: 2136 -Total Receives: 2137 -Total Receives: 2138 -Total Receives: 2139 -Total Receives: 2140 -Total Receives: 2141 -Total Receives: 2142 -Total rewards: 2151 -Total Receives: 2143 -Total Receives: 2144 -Total Receives: 2145 -Total Receives: 2146 -Total Receives: 2147 -Total Receives: 2148 -Total Receives: 2149 -Total Receives: 2150 -Total Receives: 2151 -Total rewards: 2154 -Total Receives: 2152 -Stores 1453 1 -Total Receives: 2153 -Stores 1454 1 -Total Receives: 2154 -Stores 1455 1 -Total rewards: 2161 -Total Receives: 2155 -Total Receives: 2156 -Total Receives: 2157 -Total Receives: 2158 -Total Receives: 2159 -Total Receives: 2160 -Total Receives: 2161 -Total rewards: 2166 -Total Receives: 2162 -Stores 1456 1 -Total Receives: 2163 -Stores 1457 1 -Total Receives: 2164 -Stores 1458 1 -Total Receives: 2165 -Stores 1459 1 -Total Receives: 2166 -Stores 1460 1 -Total rewards: 2169 -Total Receives: 2167 -Stores 1461 1 -Total Receives: 2168 -Stores 1462 1 -Total Receives: 2169 -Stores 1463 1 -Total rewards: 2176 -Total Receives: 2170 -Total Receives: 2171 -Total Receives: 2172 -Total Receives: 2173 -Total Receives: 2174 -Total Receives: 2175 -Total Receives: 2176 -tensor([-0.0316, -0.0427, -0.0359]) -tensor([-43.8963, -44.0040, -44.0204, -43.7569, -44.0018, -43.9854, -44.0117, - -43.9935, -43.9849, -44.0010, -44.0165]) -tensor([-109.9537, -110.0404, -110.0716]) -tensor([-31.0010, -30.9442, -31.0119, -30.9996]) -tensor([-0.0404, -0.0456]) -tensor([-0.0367, -0.0290]) -tensor([-28.3553, -28.3658, -28.3514]) -Total rewards: 2182 -Total Receives: 2177 -Stores 1464 1 -Total Receives: 2178 -Stores 1465 1 -Total Receives: 2179 -Stores 1466 1 -Total Receives: 2180 -Stores 1467 1 -Total Receives: 2181 -Stores 1468 1 -Total Receives: 2182 -Stores 1469 1 -tensor([-0.0842, -0.0698]) -tensor([-48.4116, -48.3605, -48.4237]) -Total rewards: 2183 -Total Receives: 2183 -Stores 1470 1 -tensor([-0.0755, -0.0733]) -tensor([-54.9936, -55.0753, -54.8406, -54.9126, -55.1099, -55.0628, -55.0972, - -55.1343]) -tensor([-43.3400, -43.3499]) -tensor([-69.4279, -69.4592, -69.4364, -69.4474, -69.4532]) -tensor([-0.0838, -0.0685, -0.0885, -0.0264, -0.0552, -0.0527]) -tensor([-39.7308, -39.6112, -39.5354, -39.6481, -39.7019, -39.7323, -39.7280, - -39.6292, -39.7397, -39.7141]) -Total rewards: 2188 -Total Receives: 2184 -Stores 1471 1 -Total Receives: 2185 -Stores 1472 1 -Total Receives: 2186 -Stores 1473 1 -Total Receives: 2187 -Stores 1474 1 -Total Receives: 2188 -Stores 1475 1 -tensor([-1.7750, -1.7690, -1.7943, -1.7808, -1.8100, -1.7966]) -tensor([-27.6973, -27.7767, -27.7950, -27.7774, -27.7622, -27.7868, -27.7699, - -27.7613, -27.7784, -27.7934]) -tensor([-0.2098, -0.2138, -0.2156]) -Total rewards: 2194 -Total Receives: 2189 -tensor([-0.5178, -0.2818, -0.2912, -0.3231, -0.2853, -0.2940, -0.2833, -0.2833]) -tensor([-0.0774, -0.0792, -0.0591]) -tensor([-0.0668, -0.0479, -0.0729, -0.0194, -0.0821, -0.0707, -0.0230, -0.0504]) -tensor([-0.3436, -0.2288, -0.1496, -0.1907]) -tensor([-0.2915, -0.2499, -0.1695, -0.2122, -0.2787, -0.2007, -0.2387]) -tensor([0.0409, 0.0417]) -tensor([-0.4985, -0.2107, -0.2188, -0.2453, -0.2181, -0.1910, -0.1910]) -tensor([-6.8216, -6.9320, -6.7838, -6.8292, -6.8283, -6.8134, -6.8077, -6.8326, - -6.8962, -6.8842, -6.8094, -6.9320]) -tensor([-0.5401, -0.5816, -0.5803]) -Total rewards: 3351 -Total Receives: 3346 -Stores 2360 1 -Total Receives: 3347 -Stores 2361 1 -Total Receives: 3348 -Stores 2362 1 -Total Receives: 3349 -Stores 2363 1 -Total Receives: 3350 -Stores 2364 1 -Total Receives: 3351 -Stores 2365 1 -tensor([-4.6934, -4.7836, -4.6933, -4.6791, -4.6871, -4.6826, -4.7028, -4.7500, - -4.7454, -4.6780, -4.7836]) -tensor([0.0374, 0.0369]) -tensor([-0.2132, -0.1804, -0.1749, -0.1473]) -tensor([-0.1415, -0.1462, -0.1722, -0.1881, -0.1664, -0.0994, -0.1094]) -tensor([-0.1180, -0.1308, -0.2214, -0.1131]) -tensor([-0.2923, -0.3563, -0.2003, -0.2059]) -tensor([-0.1743, -0.1857, -0.2595]) -tensor([-0.1783, -0.1911]) -tensor([0.0437, 0.0400]) -tensor([-3.2089, -3.3161, -3.2088, -3.1955, -3.2014, -3.1960, -3.2203, -3.2732, - -3.2669, -3.3160]) -Total rewards: 3358 -Total Receives: 3352 -Stores 2366 1 -Total Receives: 3353 -Stores 2367 1 -Total Receives: 3354 -Stores 2368 1 -Total Receives: 3355 -Stores 2369 1 -Total Receives: 3356 -Stores 2370 1 -Total Receives: 3357 -Stores 2371 1 -Total Receives: 3358 -Stores 2372 1 -tensor([-0.0626, -0.0641, -0.0293]) -tensor([-0.0222, -0.0367, -0.0510, -0.0287]) -tensor([0.0084, 0.0095, 0.0074, 0.0094]) -tensor([-0.1388, -0.1499]) -Total rewards: 3368 -Total Receives: 3359 -Stores 2373 1 -Total Receives: 3360 -Stores 2374 1 -Total Receives: 3361 -Stores 2375 1 -Total Receives: 3362 -Stores 2376 1 -Total Receives: 3363 -Stores 2377 1 -Total Receives: 3364 -Stores 2378 1 -Total Receives: 3365 -Stores 2379 1 -Total Receives: 3366 -Stores 2380 1 -Total Receives: 3367 -Stores 2381 1 -Total Receives: 3368 -Stores 2382 1 -tensor([-0.2169, -0.2957]) -tensor([-0.3030, -0.1584, -0.1639, -0.1242]) -tensor([-0.0128, -0.0154, -0.0153]) -tensor([-0.4653, -0.3434, -0.3520, -0.4440, -0.3507, -0.3769, -0.3426]) -tensor([-0.0214, -0.0326, -0.0327]) -tensor([-0.0833, -0.0815, -0.0804]) -Total rewards: 3372 -Total Receives: 3369 -Stores 2383 1 -Total Receives: 3370 -Stores 2384 1 -Total Receives: 3371 -Stores 2385 1 -Total Receives: 3372 -Stores 2386 1 -tensor([-0.1108, -0.0917, -0.1133, -0.1230, -0.1138, -0.0696, -0.0934]) -Total rewards: 3375 -Total Receives: 3373 -Stores 2387 1 -Total Receives: 3374 -Stores 2388 1 -Total Receives: 3375 -Stores 2389 1 -tensor([0.0407, 0.0287]) -tensor([-0.7813, -0.6385, -0.6387, -0.6296, -0.6285, -0.6367, -0.6367, -0.6367]) -tensor([0.0326, 0.0347]) -tensor([-0.1587, -0.1558, -0.1576]) -tensor([-0.0095, -0.0088, -0.0118]) -tensor([-1.4235, -1.4557, -1.4597, -1.4565]) -tensor([-0.5691, -0.3424, -0.3578, -0.3506, -0.3395, -0.3395, -0.3395]) -tensor([0.0771, 0.0454]) -tensor([-0.0659, -0.0389, -0.0741, -0.0553]) -tensor([-0.0219, -0.0185, -0.0163]) -tensor([-0.1386, -0.1439, -0.1113, -0.1716, -0.1678, -0.0947, -0.1293]) -tensor([-0.2862, -0.1649, -0.2031, -0.2765, -0.1972, -0.1488, -0.1552]) -tensor([0.0281, 0.0291]) -tensor([-0.0277, -0.0270, -0.0191]) -tensor([-0.4442, -0.1381, -0.1654, -0.1590, -0.1282, -0.1286, -0.1243, -0.1243]) -tensor([-0.2336, -0.1853]) -Total rewards: 3382 -Total Receives: 3376 -Stores 2390 1 -Total Receives: 3377 -Stores 2391 1 -Total Receives: 3378 -Stores 2392 1 -Total Receives: 3379 -Stores 2393 1 -Total Receives: 3380 -Stores 2394 1 -Total Receives: 3381 -Stores 2395 1 -Total Receives: 3382 -Stores 2396 1 -tensor([-3.1637, -3.2690, -3.1644, -3.1588, -3.1548, -3.1737, -3.2689, -3.2502, - -3.2043, -3.2690]) -tensor([-0.1482, -0.1615]) -tensor([-1.9110, -1.9688, -1.9129, -1.9124, -1.9114, -1.9158, -1.9685, -1.9542, - -1.9253]) -tensor([-0.0214, -0.0209, -0.0517, -0.0558, -0.0423]) -Total rewards: 3385 -Total Receives: 3383 -Stores 2397 1 -Total Receives: 3384 -Stores 2398 1 -Total Receives: 3385 -Stores 2399 1 -tensor([-0.1524, -0.1387, -0.1535, -0.1544, -0.1549, -0.1330]) -tensor([-0.8581, -0.8575, -0.8599, -0.8690]) -tensor([-0.3280, -0.1435, -0.1900, -0.1524]) -Total rewards: 3393 -Total Receives: 3386 -Stores 2400 1 -Total Receives: 3387 -Stores 2401 1 -Total Receives: 3388 -Stores 2402 1 -Total Receives: 3389 -Stores 2403 1 -Total Receives: 3390 -Stores 2404 1 -Total Receives: 3391 -Stores 2405 1 -Total Receives: 3392 -Stores 2406 1 -Total Receives: 3393 -Stores 2407 1 -tensor([-0.9751, -0.9642, -0.9641, -0.9642, -0.9769, -0.9659, -0.9765, -0.9750, - -0.9665]) -tensor([0.0091, 0.0036]) -tensor([0.0323, 0.0328, 0.0297]) -tensor([-0.0165, -0.0173]) -tensor([-0.4458, -0.5634, -0.5737, -0.4502, -0.5512, -0.4521, -0.4651, -0.4960]) -tensor([-0.2753, -0.2826, -0.1691, -0.2677, -0.1724, -0.1945, -0.2288]) -tensor([-0.1552, -0.2102, -0.2183, -0.2068, -0.2206]) -tensor([-0.0989, -0.1305, -0.1062, -0.0922]) -Total rewards: 3400 -Total Receives: 3394 -Stores 2408 1 -Total Receives: 3395 -Stores 2409 1 -Total Receives: 3396 -Stores 2410 1 -Total Receives: 3397 -Stores 2411 1 -Total Receives: 3398 -Stores 2412 1 -Total Receives: 3399 -Stores 2413 1 -Total Receives: 3400 -Stores 2414 1 -tensor([-0.1200, -0.1440, -0.1236, -0.1259]) -tensor([-0.0339, -0.0333, -0.0569, -0.0594, -0.0171]) -tensor([-0.4824, -0.3493, -0.3557, -0.4607, -0.3578, -0.3544]) -tensor([-0.2303, -0.1929, -0.2078, -0.2716, -0.2740, -0.1824, -0.2291]) -tensor([-0.0401, -0.0444, -0.0410]) -tensor([-0.5376, -0.5340, -0.5379, -0.5340, -0.5387]) -tensor([-0.4544, -0.1469, -0.1603, -0.1879, -0.1789, -0.1524, -0.1524, -0.1361]) -tensor([-0.2475, -0.2559, -0.2359, -0.1097, -0.1339, -0.1825, -0.0863]) -tensor([-0.0609, -0.0547, -0.0363, -0.0402]) -tensor([-0.2650, -0.2707, -0.2562, -0.1448, -0.1629, -0.2060]) -tensor([-0.1218, -0.1214]) -tensor([0.0322, 0.0273]) -tensor([-0.2344, -0.2006, -0.2198, -0.2748, -0.2775, -0.2368]) -tensor([-0.1292, -0.1253, -0.1241, -0.1251, -0.1318]) -tensor([-0.2036, -0.1842, -0.1880, -0.1820]) -Total rewards: 3405 -Total Receives: 3401 -Stores 2415 1 -Total Receives: 3402 -Stores 2416 1 -Total Receives: 3403 -Stores 2417 1 -Total Receives: 3404 -Stores 2418 1 -Total Receives: 3405 -Stores 2419 1 -tensor([0.0210, 0.0211, 0.0223]) -tensor([-0.0680, -0.0772, -0.0629, -0.0462]) -tensor([-0.3899, -0.2382, -0.2743, -0.3761, -0.2637, -0.2405, -0.2493]) -tensor([-1.0179e-05, -2.1529e-03, -3.2957e-03]) -tensor([-0.0972, -0.0773, -0.0790]) -tensor([-0.3108, -0.2200, -0.2910, -0.2125, -0.1498, -0.1818]) -tensor([-0.2838, -0.2895, -0.2746, -0.1846, -0.2227]) -Total rewards: 3411 -Total Receives: 3406 -Stores 2420 1 -Total Receives: 3407 -Stores 2421 1 -Total Receives: 3408 -Stores 2422 1 -Total Receives: 3409 -Stores 2423 1 -Total Receives: 3410 -Stores 2424 1 -Total Receives: 3411 -Stores 2425 1 -tensor([-0.0384, -0.0435, -0.0359, -0.0404]) -tensor([0.0317, 0.0311]) -tensor([-0.2692, -0.2732, -0.2703, -0.2758, -0.2729, -0.2752]) -Total rewards: 3420 -Total Receives: 3412 -Stores 2426 1 -Total Receives: 3413 -Stores 2427 1 -Total Receives: 3414 -Stores 2428 1 -Total Receives: 3415 -Stores 2429 1 -Total Receives: 3416 -Total Receives: 3417 -Total Receives: 3418 -Total Receives: 3419 -Total Receives: 3420 -tensor([-0.1961, -0.1893, -0.2027, -0.1934, -0.2030]) -tensor([-0.1173, -0.1170, -0.1229]) -tensor([-0.0355, -0.0198, -0.0351, -0.0705, -0.0752]) -tensor([0.0366, 0.0358, 0.0306, 0.0347]) -tensor([-0.1328, -0.1413]) -tensor([-0.0277, -0.0320, -0.0289]) -tensor([0.0370, 0.0342]) -tensor([-0.0545, -0.0518, -0.0531]) -tensor([-0.4923, -0.1869, -0.1985, -0.2312, -0.2188, -0.1926, -0.1925]) -tensor([0.0340, 0.0342, 0.0341]) -tensor([-0.3281, -0.2429, -0.3149, -0.2355, -0.2035]) -tensor([-0.1307, -0.1140]) -tensor([-0.6979, -0.7273, -0.7118, -0.7350, -0.6934, -0.7251, -0.7234, -0.7339]) -tensor([-0.0245, -0.0273, -0.0297, -0.0223]) -tensor([-0.1042, -0.0764, -0.1141, -0.0999, -0.1130, -0.0741]) -tensor([-0.9390, -0.9485, -0.9291, -0.9361, -0.9237, -0.9285, -0.9413]) -tensor([-0.5913, -0.5987, -0.5846, -0.5891, -0.5893, -0.5930]) -Total rewards: 3427 -Total Receives: 3421 -Total Receives: 3422 -Total Receives: 3423 -Stores 1476 1 -Total Receives: 2190 -Stores 1477 1 -Total Receives: 2191 -Stores 1478 1 -Total Receives: 2192 -Stores 1479 1 -Total Receives: 2193 -Stores 1480 1 -Total Receives: 2194 -Stores 1481 1 -tensor([-31.4148, -31.4249, -31.4268]) -Total rewards: 2200 -Total Receives: 2195 -Total Receives: 2196 -Total Receives: 2197 -Total Receives: 2198 -Total Receives: 2199 -Total Receives: 2200 -tensor([-59.3814, -59.4163, -59.5327]) -tensor([-0.1383, -0.1309]) -tensor([-0.0935, -0.0784, -0.0997, -0.0669, -0.0637, -0.0510]) -tensor([-26.8577, -26.8919]) -tensor([-0.0929, -0.0656]) -tensor([-0.1411, -0.1450]) -tensor([-42.3903, -42.3742, -42.4307]) -tensor([-32.2170, -32.2310, -32.2260, -32.2475]) -tensor([-47.0114, -46.9121, -46.8367, -46.9465, -46.9958, -47.0009, -47.0280, - -47.0225, -46.9254, -47.0270]) -tensor([-59.2360, -59.2792, -59.0492, -59.1237, -59.3672, -59.3310, -59.3229, - -59.3701, -59.3701, -59.3702]) -tensor([-34.3008, -34.3236, -34.2940, -34.2854, -34.2590, -34.2896, -34.2723, - -34.2585, -34.2859, -34.2942]) -Total rewards: 2206 -Total Receives: 2201 -Stores 1482 1 -Total Receives: 2202 -Stores 1483 1 -Total Receives: 2203 -Stores 1484 1 -Total Receives: 2204 -Stores 1485 1 -Total Receives: 2205 -Stores 1486 1 -Total Receives: 2206 -Stores 1487 1 -tensor([-0.0688, -0.0636, -0.0533]) -tensor([-0.3056, -0.3093]) -tensor([-38.2218, -38.1833, -38.2199]) -tensor([-2.8222, -2.8200]) -tensor([-0.0877, -0.0794]) -tensor([-1.2910, -1.2923]) -tensor([-82.8521, -82.8319, -82.8445, -82.8768, -82.8519, -82.8889, -82.8889]) -tensor([-2.4556, -2.5025, -2.5117, -2.4700, -2.4556, -2.4826, -2.4680, -2.5120]) -tensor([-0.0537, -0.0858]) -tensor([-36.3983, -36.4216, -36.4191, -36.4021, -36.3828, -36.3564, -36.3861, - -36.3691, -36.3839, -36.3920]) -tensor([-0.1016, -0.0980, -0.0954]) -Total rewards: 2209 -Total Receives: 2207 -Stores 1488 1 -Total Receives: 2208 -Stores 1489 1 -Total Receives: 2209 -Stores 1490 1 -tensor([-45.5839, -45.6555, -45.6470]) -tensor([-43.5742, -43.5681]) -tensor([-0.1319, -0.1126, -0.1340, -0.1036, -0.0978]) -tensor([-48.7795, -48.6527, -48.5707, -48.6954, -48.7576, -48.7676, -48.8178, - -48.7869, -48.7985]) -tensor([-48.7946, -48.8369, -48.6763, -48.9305, -48.9109, -48.8873, -48.9480, - -48.9480, -48.9480]) -Total rewards: 2218 -Total Receives: 2210 -Stores 1491 1 -Total Receives: 2211 -Stores 1492 1 -Total Receives: 2212 -Stores 1493 1 -Total Receives: 2213 -Stores 1494 1 -Total Receives: 2214 -Stores 1495 1 -Total Receives: 2215 -Stores 1496 1 -Total Receives: 2216 -Stores 1497 1 -Total Receives: 2217 -Stores 1498 1 -Total Receives: 2218 -Stores 1499 1 -tensor([-47.3120, -47.2991, -47.2879, -47.3213]) -Total rewards: 2222 -Total Receives: 2219 -Stores 1500 1 -Total Receives: 2220 -Stores 1501 1 -Total Receives: 2221 -Stores 1502 1 -Total Receives: 2222 -Stores 1503 1 -tensor([-40.0409, -40.0776, -40.1691, -40.1500, -40.1277, -40.1883, -40.1883, - -40.1883]) -tensor([-0.1742, -0.1752]) -tensor([-30.8650, -30.8862, -30.8838, -30.8695, -30.8495, -30.8550, -30.8374, - -30.8982, -30.8555, -30.8616]) -tensor([-46.6786, -46.6720, -46.7078]) -Total rewards: 2225 -Total Receives: 2223 -Stores 1504 1 -Total Receives: 2224 -Stores 1505 1 -Total Receives: 2225 -Stores 1506 1 -tensor([-0.1592, -0.1399, -0.1612, -0.1340]) -tensor([-0.0771, -0.0592]) -tensor([-15.3817, -15.4134]) -tensor([-0.3080, -0.3179]) -tensor([-32.1121, -31.9714, -32.0177, -32.0796, -32.1540, -32.1507, -32.1180, - -32.1313]) -Total rewards: 2235 -Total Receives: 2226 -Stores 1507 1 -Total Receives: 2227 -Stores 1508 1 -Total Receives: 2228 -Stores 1509 1 -Total Receives: 2229 -Stores 1510 1 -Total Receives: 2230 -Stores 1511 1 -Total Receives: 2231 -Stores 1512 1 -Total Receives: 2232 -Stores 1513 1 -Total Receives: 2233 -Stores 1514 1 -Total Receives: 2234 -Stores 1515 1 -Total Receives: 2235 -Stores 1516 1 -tensor([-32.9080, -32.9399, -32.9252, -32.9053, -32.9124]) -tensor([-2.4546, -2.4662, -2.4653, -2.4103, -2.3931, -2.4598, -2.4327, -2.4078, - -2.4718, -2.4781]) -Total rewards: 2243 -Total Receives: 2236 -Stores 1517 1 -Total Receives: 2237 -Stores 1518 1 -Total Receives: 2238 -Stores 1519 1 -Total Receives: 2239 -Stores 1520 1 -Total Receives: 2240 -Stores 1521 1 -Total Receives: 2241 -Stores 1522 1 -Total Receives: 2242 -Stores 1523 1 -Total Receives: 2243 -Stores 1524 1 -tensor([-28.6794, -28.6408, -28.6633, -28.6301, -28.6786]) -tensor([-55.8578, -55.9042, -55.8345, -55.8818, -55.8728, -55.9155, -55.9155]) -tensor([-0.0571, -0.0611, -0.0671]) -Total rewards: 2248 -Total Receives: 2244 -Stores 1525 1 -Total Receives: 2245 -Stores 1526 1 -Total Receives: 2246 -Stores 1527 1 -Total Receives: 2247 -Stores 1528 1 -Total Receives: 2248 -Stores 1529 1 -tensor([-33.8339, -33.8576, -33.8349, -33.8077, -33.8184, -33.8008, -33.8596, - -33.8248, -33.8272]) -tensor([-35.6950, -35.7294, -35.7130, -35.6993]) -tensor([-0.1492, -0.1299, -0.1588]) -tensor([-41.5377, -41.5784, -41.6739, -41.6548, -41.7091, -41.7092, -41.7091]) -tensor([-35.3851, -35.4811, -35.4619, -35.5178, -35.5178, -35.5178]) -tensor([-0.1382, -0.1492]) -tensor([-28.3988, -28.3938, -28.3531, -28.3743, -28.4014]) -tensor([-0.0371, -0.0290, -0.0405]) -tensor([-0.0721, -0.0610, -0.0621]) -tensor([-34.3444, -34.3676, -34.3445, -34.3173, -34.3277, -34.3794, -34.3355, - -34.3491]) -tensor([-48.7895, -48.7776, -48.7781]) -tensor([-48.0510, -48.0893, -48.0836, -48.1001]) -tensor([-32.8760, -32.7277, -32.7684, -32.8368, -32.8851, -32.8616, -32.8809, - -32.9143]) -Total rewards: 2258 -Total Receives: 2249 -Stores 1530 1 -Total Receives: 2250 -Stores 1531 1 -Total Receives: 2251 -Stores 1532 1 -Total Receives: 2252 -Stores 1533 1 -Total Receives: 2253 -Stores 1534 1 -Total Receives: 2254 -Stores 1535 1 -Total Receives: 2255 -Stores 1536 1 -Total Receives: 2256 -Stores 1537 1 -Total Receives: 2257 -Stores 1538 1 -Total Receives: 2258 -Stores 1539 1 -tensor([-29.9148, -29.9117, -29.8920, -29.9324, -29.9271]) -tensor([-10.3596, -10.3693, -9.8075, -10.3036, -10.3893, -10.3243, -10.3965, - -10.3197, -10.3741, -10.3883]) -tensor([-0.1440, -0.1545, -0.0693]) -tensor([-0.0504, -0.0553]) -tensor([-45.3758, -45.4083, -45.4275]) -tensor([-27.1630, -27.1575, -27.1398, -27.1714]) -tensor([-0.0898, -0.0897, -0.0998]) -tensor([-52.5650, -52.5092, -52.5605, -52.5911, -52.5475, -52.5215, -52.5878, - -52.5878, -52.5878]) -tensor([-29.4047, -29.4288, -29.3748, -29.3846, -29.4413, -29.3951, -29.4095]) -tensor([-0.1145, -0.1385, -0.0462]) -tensor([-26.0421, -26.0360, -26.0539]) -Total rewards: 2264 -Total Receives: 2259 -Stores 1540 1 -Total Receives: 2260 -Stores 1541 1 -Total Receives: 2261 -Stores 1542 1 -Total Receives: 2262 -Stores 1543 1 -Total Receives: 2263 -Stores 1544 1 -Total Receives: 2264 -Stores 1545 1 -tensor([-0.1050, -0.1054]) -tensor([-29.9803, -29.9768, -29.9977, -29.9929]) -Total rewards: 2273 -Total Receives: 2265 -Stores 1546 1 -Total Receives: 2266 -Stores 1547 1 -Total Receives: 2267 -Stores 1548 1 -Total Receives: 2268 -Stores 1549 1 -Total Receives: 2269 -Stores 1550 1 -Total Receives: 2270 -Stores 1551 1 -Total Receives: 2271 -Stores 1552 1 -Total Receives: 2272 -Stores 1553 1 -Total Receives: 2273 -Stores 1554 1 -tensor([-52.8935, -52.8763, -52.8921]) -tensor([-0.1817, -0.1707]) -tensor([-0.1243, -0.1480]) -tensor([-25.6204, -25.6230, -25.6342]) -Total rewards: 2282 -Total Receives: 2274 -Stores 1555 1 -Total Receives: 2275 -Stores 1556 1 -Total Receives: 2276 -Stores 1557 1 -Total Receives: 2277 -Stores 1558 1 -Total Receives: 2278 -Stores 1559 1 -Total Receives: 2279 -Stores 1560 1 -Total Receives: 2280 -Stores 1561 1 -Total Receives: 2281 -Stores 1562 1 -Total Receives: 2282 -Stores 1563 1 -tensor([-29.3545, -29.3777, -29.3347, -29.3909, -29.3468, -29.3593]) -tensor([-32.4329, -32.3388, -32.4910, -32.3882, -32.4367, -32.4156, -32.4285, - -32.4622]) -tensor([-50.8244, -50.8220, -50.7461, -50.8023, -50.8320, -50.8324, -50.7649, - -50.8358, -50.8358, -50.8358]) -tensor([-0.0520, -0.0506]) -tensor([-0.1227, -0.1197]) -tensor([-0.3028, -0.3007]) -tensor([-19.4063, -19.4084]) -Total rewards: 2287 -Total Receives: 2283 -Stores 1564 1 -Total Receives: 2284 -Stores 1565 1 -Total Receives: 2285 -Stores 1566 1 -Total Receives: 2286 -Stores 1567 1 -Total Receives: 2287 -Stores 1568 1 -tensor([-43.5159, -43.5112]) -Total Receives: 3424 -Total Receives: 3425 -Total Receives: 3426 -Total Receives: 3427 -tensor([-0.0325, -0.0235, -0.0329]) -tensor([-0.2993, -0.1329, -0.3082, -0.2877, -0.2403]) -Total rewards: 3431 -Total Receives: 3428 -Stores 2430 1 -Total Receives: 3429 -Stores 2431 1 -Total Receives: 3430 -Stores 2432 1 -Total Receives: 3431 -Stores 2433 1 -tensor([0.0377, 0.0377]) -tensor([0.0350, 0.0333]) -Total rewards: 3436 -Total Receives: 3432 -Total Receives: 3433 -Total Receives: 3434 -Total Receives: 3435 -Total Receives: 3436 -tensor([-0.5495, -0.2390, -0.2805, -0.2674, -0.2271, -0.2445]) -Total rewards: 3438 -Total Receives: 3437 -Stores 2434 1 -Total Receives: 3438 -Stores 2435 1 -tensor([-0.0166, -0.0182, -0.0180]) -tensor([-0.0685, -0.0683, -0.1022, -0.0388, -0.1051]) -Total rewards: 3442 -Total Receives: 3439 -Stores 2436 1 -Total Receives: 3440 -Stores 2437 1 -Total Receives: 3441 -Stores 2438 1 -Total Receives: 3442 -Stores 2439 1 -tensor([-0.0049, 0.0015, -0.0068]) -tensor([0.0351, 0.0368, 0.0322]) -Total rewards: 3449 -Total Receives: 3443 -Stores 2440 1 -Total Receives: 3444 -Stores 2441 1 -Total Receives: 3445 -Stores 2442 1 -Total Receives: 3446 -Stores 2443 1 -Total Receives: 3447 -Stores 2444 1 -Total Receives: 3448 -Stores 2445 1 -Total Receives: 3449 -Stores 2446 1 -tensor([-0.0806, -0.0817, -0.0834]) -tensor([-0.3200, -0.3258, -0.3115, -0.2679]) -tensor([-0.0271, -0.0283]) -Total rewards: 3458 -Total Receives: 3450 -Stores 2447 1 -Total Receives: 3451 -Stores 2448 1 -Total Receives: 3452 -Stores 2449 1 -Total Receives: 3453 -Stores 2450 1 -Total Receives: 3454 -Stores 2451 1 -Total Receives: 3455 -Stores 2452 1 -Total Receives: 3456 -Stores 2453 1 -Total Receives: 3457 -Stores 2454 1 -Total Receives: 3458 -Stores 2455 1 -tensor([-0.2662, -0.2582, -0.2643, -0.2346, -0.2754, -0.2711]) -tensor([0.0383, 0.0272]) -tensor([0.0444, 0.0285, 0.0291]) -tensor([-0.3752, -0.3775, -0.3797, -0.3780, -0.3802, -0.4084, -0.3931, -0.3802]) -tensor([0.0269, 0.0237, 0.0235]) -tensor([0.0199, 0.0189]) -tensor([-0.4694, -0.4563, -0.3478, -0.3331]) -tensor([-0.2947, -0.2863, -0.2595]) -tensor([-0.0296, -0.0525, -0.0335, -0.0402, -0.0301, -0.0587, -0.0488]) -tensor([-0.0499, -0.0517, -0.0562]) -tensor([-0.6462, -0.6524, -0.6391, -0.6442, -0.6553, -0.6391]) -tensor([-0.4036, -0.4054, -0.4098, -0.4064, -0.4486, -0.4263, -0.4062]) -Total rewards: 3466 -Total Receives: 3459 -Stores 2456 1 -Total Receives: 3460 -Stores 2457 1 -Total Receives: 3461 -Stores 2458 1 -Total Receives: 3462 -Stores 2459 1 -Total Receives: 3463 -Stores 2460 1 -Total Receives: 3464 -Stores 2461 1 -Total Receives: 3465 -Stores 2462 1 -Total Receives: 3466 -Stores 2463 1 -tensor([-0.4337, -0.4367, -0.4343, -0.4384, -0.4438]) -tensor([-0.5037, -0.1454, -0.1776, -0.2324, -0.2376, -0.1843]) -tensor([-0.1314, -0.1556, -0.1243]) -tensor([0.0348, 0.0325, 0.0327]) -tensor([-0.1188, -0.0911, -0.1284, -0.1149, -0.1251, -0.0908]) -tensor([0.0200, 0.0125, 0.0130]) -tensor([0.0296, 0.0287]) -tensor([-0.4991, -0.1385, -0.1710, -0.2181, -0.2227, -0.1758]) -tensor([-0.0937, -0.0889]) -tensor([-0.4255, -0.3393, -0.3188, -0.2909]) -tensor([0.0732, 0.0451]) -tensor([-0.0867, -0.1199, -0.1200, -0.1456, -0.1426, -0.0866]) -tensor([-0.0780, -0.0813, -0.1081, -0.0740]) -tensor([0.0288, 0.0274]) -tensor([-0.0888, -0.0629, -0.0696, -0.0550, -0.0965, -0.0815]) -tensor([-0.2144, -0.2119, -0.2385, -0.2158, -0.3191, -0.2698, -0.2135]) -tensor([-0.0664, -0.0686, -0.0940]) -tensor([-0.1271, -0.1021, -0.1355, -0.1233, -0.1317]) -tensor([-0.0638, -0.0611, -0.0687]) -tensor([-0.2821, -0.2738, -0.1586]) -tensor([0.0250, 0.0230]) -tensor([0.0352, 0.0340]) -tensor([0.0488, 0.0413]) -tensor([-0.1356, -0.1101]) -tensor([0.0119, 0.0137]) -Total rewards: 3472 -Total Receives: 3467 -Stores 2464 1 -Total Receives: 3468 -Stores 2465 1 -Total Receives: 3469 -Stores 2466 1 -Total Receives: 3470 -Stores 2467 1 -Total Receives: 3471 -Stores 2468 1 -Total Receives: 3472 -Stores 2469 1 -Total rewards: 3474 -Total Receives: 3473 -Stores 2470 1 -Total Receives: 3474 -Stores 2471 1 -tensor([-0.0699, -0.0679, -0.0633]) -Total rewards: 3482 -Total Receives: 3475 -Stores 2472 1 -Total Receives: 3476 -Stores 2473 1 -Total Receives: 3477 -Stores 2474 1 -Total Receives: 3478 -Stores 2475 1 -Total Receives: 3479 -Stores 2476 1 -Total Receives: 3480 -Stores 2477 1 -Total Receives: 3481 -Stores 2478 1 -Total Receives: 3482 -Stores 2479 1 -tensor([-0.2626, -0.2548, -0.2662, -0.2637, -0.3003]) -tensor([-0.2237, -0.2201, -0.2489, -0.2264, -0.2821, -0.2233]) -Total rewards: 3485 -Total Receives: 3483 -Stores 2480 1 -Total Receives: 3484 -Stores 2481 1 -Total Receives: 3485 -Stores 2482 1 -tensor([-0.0614, -0.0643]) -tensor([3.2877e-05, 2.3081e-03]) -Total rewards: 3491 -Total Receives: 3486 -Total Receives: 3487 -Total Receives: 3488 -Total Receives: 3489 -Total Receives: 3490 -Total Receives: 3491 -tensor([-0.0667, -0.0702, -0.0594]) -tensor([-0.6442, -0.6939, -0.6511, -0.6658]) -tensor([-0.0973, -0.0972, -0.0849]) -tensor([0.0240, 0.0274, 0.0210]) -tensor([-0.5441, -0.2205, -0.2680, -0.2725, -0.2257, -0.1841]) -tensor([0.0337, 0.0384, 0.0335, 0.0311, 0.0317]) -tensor([-0.0664, -0.1003, -0.1006, -0.1245, -0.0323, -0.0641, -0.0502]) -tensor([-0.1970, -0.1668, -0.1930, -0.2082]) -tensor([0.0391, 0.0377]) -tensor([-0.2775, -0.2721, -0.2993, -0.2798, -0.3340]) -Total rewards: 3499 -Total Receives: 3492 -Total Receives: 3493 -Total Receives: 3494 -Total Receives: 3495 -Total Receives: 3496 -Total Receives: 3497 -Total Receives: 3498 -Total Receives: 3499 -tensor([0.0263, 0.0229]) -tensor([-0.1198, -0.1199, -0.1535, -0.0534, -0.0831, -0.0690]) -tensor([-0.1367, -0.1073, -0.1162, -0.1463, -0.1284]) -tensor([-0.0323, -0.0248]) -tensor([-0.1002, -0.1071]) -tensor([-0.0782, -0.0830]) -Total rewards: 3508 -Total Receives: 3500 -Stores 2483 1 -Total Receives: 3501 -Stores 2484 1 -Total Receives: 3502 -Stores 2485 1 -Total Receives: 3503 -Stores 2486 1 -Total Receives: 3504 -Stores 2487 1 -Total Receives: 3505 -Stores 2488 1 -Total Receives: 3506 -Stores 2489 1 -Total Receives: 3507 -Stores 2490 1 -Total Receives: 3508 -Stores 2491 1 -tensor([-0.5585, -0.5149, -0.5186, -0.5195, -0.5362, -0.5210]) -tensor([-0.0056, -0.0010, -0.0027]) -tensor([-0.0426, -0.0464, -0.0394, -0.0565]) -Total rewards: 3514 -Total Receives: 3509 -Stores 2492 1 -Total Receives: 3510 -Stores 2493 1 -Total Receives: 3511 -Stores 2494 1 -Total Receives: 3512 -Stores 2495 1 -Total Receives: 3513 -Stores 2496 1 -Total Receives: 3514 -Stores 2497 1 -tensor([0.0301, 0.0310, 0.0293, 0.0314]) -Total rewards: 3522 -Total Receives: 3515 -Stores 2498 1 -Total Receives: 3516 -Stores 2499 1 -Total Receives: 3517 -Stores 2500 1 -Total Receives: 3518 -Stores 2501 1 -Total Receives: 3519 -Stores 2502 1 -Total Receives: 3520 -Stores 2503 1 -Total Receives: 3521 -Stores 2504 1 -Total Receives: 3522 -Stores 2505 1 -Total rewards: 3532 -Total Receives: 3523 -Stores 2506 1 -Total Receives: 3524 -Stores 2507 1 -Total Receives: 3525 -Stores 2508 1 -Total Receives: 3526 -Stores 2509 1 -Total Receives: 3527 -Stores 2510 1 -Total Receives: 3528 -Stores 2511 1 -Total Receives: 3529 -Stores 2512 1 -Total Receives: 3530 -Stores 2513 1 -Total Receives: 3531 -Stores 2514 1 -Total Receives: 3532 -Stores 2515 1 -tensor([-0.8351, -0.8354, -0.8184, -0.8307, -0.8312, -0.8465]) -tensor([-0.0589, -0.0642]) -tensor([-0.9580, -0.6019, -0.6487, -0.6532, -0.6068, -0.5443]) -tensor([-0.0521, -0.0552, -0.0632]) -tensor([-0.1533, -0.1531, -0.1750, -0.1231, -0.1125]) -tensor([0.0408, 0.0384]) -tensor([-0.1963, -0.2061, -0.2148]) -tensor([0.0604, 0.0217]) -tensor([0.0321, 0.0316, 0.0334]) -tensor([-0.1282, -0.1284, -0.1112]) -tensor([-0.1988, -0.2025, -0.1992, -0.2333, -0.2057, -0.2002, -0.2744]) -tensor([-0.0510, -0.0590]) -tensor([0.0295, 0.0271, 0.0312]) -tensor([-0.0690, -0.0712]) -tensor([-0.1602, -0.1403, -0.1708, -0.1520]) -tensor([-0.0079, -0.0186]) -tensor([0.0321, 0.0337]) -Total rewards: 3538 -Total Receives: 3533 -Total Receives: 3534 -Total Receives: 3535 -Total Receives: 3536 -Total Receives: 3537 -Total Receives: 3538 -tensor([0.0309, 0.0282, 0.0317]) -tensor([-0.0723, -0.0762]) -tensor([-2.0477, -2.0477, -2.0661, -2.0406, -2.0100, -2.0756]) -tensor([-0.4731, -0.5516, -0.4076, -0.4773, -0.4632, -0.4758, -0.5066, -0.4825]) -tensor([-0.1709, -0.2063, -0.2098, -0.1746, -0.1149, -0.1068]) -tensor([-0.2428, -0.3789, -0.1795, -0.2463, -0.2326, -0.2449, -0.3036]) -tensor([-0.1683, -0.1797, -0.1615, -0.0989]) -tensor([-30.7050, -30.6190, -30.6654, -30.7073, -30.6904, -30.7049, -30.7339]) -Total rewards: 2294 -Total Receives: 2288 -Stores 1569 1 -Total Receives: 2289 -Stores 1570 1 -Total Receives: 2290 -Stores 1571 1 -Total Receives: 2291 -Stores 1572 1 -Total Receives: 2292 -Stores 1573 1 -Total Receives: 2293 -Stores 1574 1 -Total Receives: 2294 -Stores 1575 1 -tensor([-29.3009, -29.2686, -29.3202, -29.2616, -29.3299, -29.3299, -29.3299]) -tensor([-0.6659, -0.6686, -0.6707, -0.6691]) -tensor([-0.3462, -0.3483]) -tensor([-26.6274, -26.5873, -26.6298, -26.6128, -26.6274, -26.6516]) -Total rewards: 2301 -Total Receives: 2295 -Stores 1576 1 -Total Receives: 2296 -Stores 1577 1 -Total Receives: 2297 -Stores 1578 1 -Total Receives: 2298 -Stores 1579 1 -Total Receives: 2299 -Stores 1580 1 -Total Receives: 2300 -Stores 1581 1 -Total Receives: 2301 -Stores 1582 1 -Total rewards: 2306 -Total Receives: 2302 -Stores 1583 1 -Total Receives: 2303 -Stores 1584 1 -Total Receives: 2304 -Stores 1585 1 -Total Receives: 2305 -Stores 1586 1 -Total Receives: 2306 -Stores 1587 1 -tensor([-2.6429, -2.6463, -2.6864, -2.5448, -2.6184, -2.6082, -2.6632, -2.6773, - -2.5997, -2.6623, -2.6698]) -tensor([-1.6711, -1.6717, -1.7080, -1.6398, -1.6415, -1.6849, -1.6989, -1.6448, - -1.6891, -1.6942]) -tensor([-39.5915, -39.6225]) -Total rewards: 2312 -Total Receives: 2307 -Stores 1588 1 -Total Receives: 2308 -Stores 1589 1 -Total Receives: 2309 -Stores 1590 1 -Total Receives: 2310 -Stores 1591 1 -Total Receives: 2311 -Stores 1592 1 -Total Receives: 2312 -Stores 1593 1 -Total rewards: 2317 -Total Receives: 2313 -Stores 1594 1 -Total Receives: 2314 -Stores 1595 1 -Total Receives: 2315 -Stores 1596 1 -Total Receives: 2316 -Stores 1597 1 -Total Receives: 2317 -Stores 1598 1 -tensor([-0.0650, -0.0463, -0.0470]) -Total rewards: 2326 -Total Receives: 2318 -Stores 1599 1 -Total Receives: 2319 -Stores 1600 1 -Total Receives: 2320 -Stores 1601 1 -Total Receives: 2321 -Stores 1602 1 -Total Receives: 2322 -Stores 1603 1 -Total Receives: 2323 -Stores 1604 1 -Total Receives: 2324 -Stores 1605 1 -Total Receives: 2325 -Stores 1606 1 -Total Receives: 2326 -Stores 1607 1 -tensor([-47.9565, -47.9427, -47.9404, -47.9448, -47.9597]) -tensor([-0.1555, -0.1444]) -tensor([-0.0874, -0.0717]) -tensor([-17.9110, -17.9031, -17.9160, -17.9208]) -tensor([-33.0977, -33.0682, -33.1203, -33.0599, -33.1472, -33.1472]) -tensor([-46.2799, -46.2723, -46.2717, -46.2813]) -Total rewards: 2332 -Total Receives: 2327 -Stores 1608 1 -Total Receives: 2328 -Stores 1609 1 -Total Receives: 2329 -Stores 1610 1 -Total Receives: 2330 -Stores 1611 1 -Total Receives: 2331 -Stores 1612 1 -Total Receives: 2332 -Stores 1613 1 -tensor([-1.2736, -1.2805, -1.3046, -1.2676, -1.2945, -1.3069, -1.2580, -1.3189, - -1.3050, -1.3094]) -tensor([-30.4341, -30.4284, -30.4544]) -tensor([-32.1411, -32.1457, -32.1746, -32.1301, -32.1474, -32.2135]) -tensor([-0.0773, -0.0909, -0.0883]) -tensor([-26.8316, -26.9186, -26.8847, -26.8569, -26.8718, -26.8975]) -Total rewards: 2336 -Total Receives: 2333 -Stores 1614 1 -Total Receives: 2334 -Stores 1615 1 -Total Receives: 2335 -Stores 1616 1 -Total Receives: 2336 -Stores 1617 1 -tensor([-133.7305, -133.7169, -133.7026, -133.7233, -133.7324, -133.7214, - -133.6664, -133.7542, -133.7543, -133.7458, -133.7542]) -tensor([-37.6175, -37.6341, -37.5994, -37.6073, -37.6141]) -tensor([-31.4043, -31.3565, -31.4009, -31.4301, -31.4301]) -tensor([-0.6919, -0.6984, -0.7225, -0.6868, -0.7114, -0.7246, -0.7355, -0.7234, - -0.7274, -0.7459]) -tensor([-29.1109, -29.1134, -29.1417, -29.1172, -29.1815]) -tensor([-29.4902, -29.4877]) -tensor([-0.3371, -0.3500]) -tensor([-0.1010, -0.0809, -0.0802]) -tensor([-0.2262, -0.2366, -0.2238]) -tensor([-17.2339, -17.2215, -17.2344, -17.2496]) -tensor([-15.9978, -16.0148]) -tensor([-35.9078, -35.9001, -35.9149]) -tensor([-27.4550, -27.5446, -27.5098, -27.4808, -27.5417]) -tensor([-0.0718, -0.0452]) -tensor([-33.7941, -33.8078, -33.8176, -33.8011, -33.8142, -33.7898]) -Total rewards: 2341 -Total Receives: 2337 -Stores 1618 1 -Total Receives: 2338 -Stores 1619 1 -Total Receives: 2339 -Stores 1620 1 -Total Receives: 2340 -Stores 1621 1 -Total Receives: 2341 -Stores 1622 1 -tensor([-0.1344, -0.1127, -0.1402]) -tensor([-0.4400, -0.4399, -0.4543, -0.4490, -0.4377, -0.4593, -0.4737, -0.4686, - -0.4732]) -Total rewards: 2351 -Total Receives: 2342 -Stores 1623 1 -Total Receives: 2343 -Stores 1624 1 -Total Receives: 2344 -Stores 1625 1 -Total Receives: 2345 -Stores 1626 1 -Total Receives: 2346 -Stores 1627 1 -Total Receives: 2347 -Stores 1628 1 -Total Receives: 2348 -Stores 1629 1 -Total Receives: 2349 -Stores 1630 1 -Total Receives: 2350 -Stores 1631 1 -Total Receives: 2351 -Stores 1632 1 -tensor([-0.0814, -0.0833]) -tensor([-0.0582, -0.0765, -0.0626, -0.0710, -0.0656]) -tensor([-26.1857, -26.2603, -26.2279, -26.1991]) -tensor([-39.8630, -39.8565, -39.8640, -39.8605, -39.8705, -39.8684]) -tensor([-0.0587, -0.0432, -0.0490, -0.0437]) -Total rewards: 2354 -Total Receives: 2352 -Stores 1633 1 -Total Receives: 2353 -Stores 1634 1 -Total Receives: 2354 -Stores 1635 1 -tensor([-0.1168, -0.0871, -0.1198]) -tensor([-30.3427, -30.3565, -30.3252, -30.4051, -30.3887]) -tensor([-0.1319, -0.1259]) -tensor([-26.7236, -26.7714, -26.8468, -26.7419]) -tensor([-63.4802, -63.4647, -63.4523, -63.4879, -63.4886, -63.4715, -63.5315, - -63.5315, -63.5108, -63.5315]) -Total rewards: 2361 -Total Receives: 2355 -Stores 1636 1 -Total Receives: 2356 -Stores 1637 1 -Total Receives: 2357 -Stores 1638 1 -Total Receives: 2358 -Stores 1639 1 -Total Receives: 2359 -Stores 1640 1 -Total Receives: 2360 -Stores 1641 1 -Total Receives: 2361 -Stores 1642 1 -tensor([-35.3881, -35.3796, -35.4344, -35.4435, -35.4435, -35.4435]) -tensor([-20.4928, -20.5694, -20.4643]) -Total rewards: 2369 -Total Receives: 2362 -Stores 1643 1 -Total Receives: 2363 -Stores 1644 1 -Total Receives: 2364 -Stores 1645 1 -Total Receives: 2365 -Stores 1646 1 -Total Receives: 2366 -Stores 1647 1 -Total Receives: 2367 -Stores 1648 1 -Total Receives: 2368 -Stores 1649 1 -Total Receives: 2369 -Stores 1650 1 -tensor([-19.8631, -19.8264, -19.8336, -19.8596, -19.8705]) -tensor([-0.0407, -0.0362, -0.0622]) -tensor([-43.5252, -43.5301, -43.5557]) -Total rewards: 2379 -Total Receives: 2370 -Stores 1651 1 -Total Receives: 2371 -Stores 1652 1 -Total Receives: 2372 -Stores 1653 1 -Total Receives: 2373 -Stores 1654 1 -Total Receives: 2374 -Stores 1655 1 -Total Receives: 2375 -Stores 1656 1 -Total Receives: 2376 -Stores 1657 1 -Total Receives: 2377 -Stores 1658 1 -Total Receives: 2378 -Stores 1659 1 -Total Receives: 2379 -Stores 1660 1 -tensor([-35.5477, -35.5788, -35.5885, -35.5886, -35.5627, -35.5569, -35.5641, - -35.5360]) -tensor([-0.0904, -0.0682, -0.0632]) -tensor([-0.0299, -0.0480, -0.0348]) -tensor([-0.0466, -0.0418]) -tensor([-0.0569, -0.0566, -0.0563]) -tensor([-30.0943, -30.0841, -30.0819, -30.1587]) -tensor([-0.0487, -0.0462, -0.0403]) -tensor([-0.1037, -0.1281]) -tensor([-0.6445, -0.6267, -0.6422, -0.6398, -0.6263, -0.6563, -0.6672, -0.6629, - -0.6846]) -tensor([-34.0930, -34.1205, -34.1285, -34.1282, -34.1036, -34.1016, -34.1064]) -tensor([-19.4160, -19.4818, -19.4937]) -tensor([-60.2747, -60.2788, -60.3013, -60.3038, -60.2868, -60.3664, -60.3664, - -60.3329, -60.3664]) -tensor([-47.8783, -47.8814, -47.8766, -47.8929, -47.8849]) -tensor([-0.0503, -0.0442]) -tensor([-0.0583, -0.0492]) -tensor([-0.0624, -0.0411, -0.0288]) -tensor([-25.6436, -25.6448]) -tensor([-19.2186, -19.1875, -19.2136, -19.2364]) -tensor([-75.6566, -75.6657, -75.6644, -75.6735, -75.6310, -75.6419]) -tensor([-17.3547, -17.2771, -17.3478, -17.3554]) -tensor([-0.0682, -0.0778]) -tensor([-15.6983, -15.7804, -15.7889]) -tensor([-46.7585, -46.7611, -46.7726, -46.7732]) -tensor([-0.3491, -0.3290, -0.3385, -0.3554, -0.3505, -0.3528, -0.3624, -0.3569, - -0.3584]) -tensor([-37.4340, -37.4007, -37.3837, -37.4378, -37.4543, -37.4621, -37.4621]) -tensor([-34.7247, -34.6920, -34.6753, -34.7286, -34.7537, -34.7536]) -tensor([-0.4143, -0.4266]) -tensor([-0.2929, -0.2995, -0.2943]) -Total rewards: 2383 -Total Receives: 2380 -Stores 1661 1 -Total Receives: 2381 -Stores 1662 1 -Total Receives: 2382 -Stores 1663 1 -Total Receives: 2383 -Stores 1664 1 -tensor([-30.7885, -30.7572, -30.8447, -30.8768]) -tensor([-50.2376, -50.2627, -50.2728]) -Total rewards: 2387 -Total Receives: 2384 -Stores 1665 1 -Total Receives: 2385 -Stores 1666 1 -tensor([-0.6095, -0.6091, -0.6120, -0.6084, -0.6114, -0.6432]) -tensor([0.0253, 0.0239, 0.0242]) -tensor([-0.0448, -0.0430, -0.0506]) -tensor([-0.1345, -0.0758, -0.1625, -0.0784, -0.1057, -0.0960]) -tensor([0.0232, 0.0302, 0.0281, 0.0308]) -tensor([-0.0589, -0.0896]) -tensor([-0.1420, -0.1533, -0.0827, -0.0962, -0.0919]) -tensor([-1.6595, -1.6768, -1.6565, -1.6341, -1.6883]) -tensor([-0.0465, -0.0497]) -tensor([-0.1094, -0.1112, -0.0945, -0.0933]) -tensor([-0.6072, -0.6078, -0.6091, -0.6107, -0.6442]) -Total rewards: 3543 -Total Receives: 3539 -Total Receives: 3540 -Total Receives: 3541 -Total Receives: 3542 -Total Receives: 3543 -tensor([-0.0449, -0.0699]) -tensor([-0.1587, -0.1591, -0.1267]) -tensor([-0.0054, -0.0162]) -tensor([-1.0594, -1.0748, -1.0579, -1.0864]) -Total rewards: 3550 -Total Receives: 3544 -Stores 2516 1 -Total Receives: 3545 -Stores 2517 1 -Total Receives: 3546 -Stores 2518 1 -Total Receives: 3547 -Stores 2519 1 -Total Receives: 3548 -Stores 2520 1 -Total Receives: 3549 -Stores 2521 1 -Total Receives: 3550 -Stores 2522 1 -tensor([-0.3597, -0.3896, -0.3617, -0.4394]) -Total rewards: 3557 -Total Receives: 3551 -Stores 2523 1 -Total Receives: 3552 -Stores 2524 1 -Total Receives: 3553 -Stores 2525 1 -Total Receives: 3554 -Stores 2526 1 -Total Receives: 3555 -Stores 2527 1 -Total Receives: 3556 -Stores 2528 1 -Total Receives: 3557 -Stores 2529 1 -Total rewards: 3561 -Total Receives: 3558 -Stores 2530 1 -Total Receives: 3559 -Stores 2531 1 -Total Receives: 3560 -Stores 2532 1 -Total Receives: 3561 -Stores 2533 1 -tensor([0.0259, 0.0255, 0.0239]) -tensor([-0.1558, -0.1660, -0.1078]) -tensor([-0.1606, -0.0991, -0.1754, -0.1558]) -tensor([0.0229, 0.0219]) -tensor([-0.0158, -0.0137, -0.0154]) -tensor([0.0133, 0.0171, 0.0164, 0.0118]) -Total rewards: 3569 -Total Receives: 3562 -Total Receives: 3563 -Total Receives: 3564 -Total Receives: 3565 -Total Receives: 3566 -Total Receives: 3567 -Total Receives: 3568 -Total Receives: 3569 -tensor([-0.1200, -0.1194, -0.0746, -0.0791]) -tensor([-0.1809, -0.1781, -0.3361, -0.1850, -0.1839, -0.1821, -0.1833, -0.2657]) -tensor([-0.1630, -0.1792, -0.1589]) -tensor([-0.0486, -0.0518]) -tensor([-0.1694, -0.0641, -0.2113, -0.2150, -0.1735, -0.1059, -0.0783, -0.0732]) -tensor([-0.1232, -0.1563, -0.0780, -0.0542, -0.1014, -0.1007, -0.0611, -0.0535]) -tensor([0.0360, 0.0330]) -tensor([-0.6598, -0.6582, -0.6603, -0.6598, -0.6612]) -tensor([-0.0188, -0.0160, -0.0208]) -tensor([-0.1934, -0.1311]) -tensor([-0.1160, -0.0561, -0.0322, -0.0847, -0.0776, -0.0388, -0.0271]) -tensor([-0.0210, -0.0157, -0.0227, -0.0202]) -tensor([0.0281, 0.0278]) -tensor([-0.1421, -0.1418, -0.1023, -0.1048]) -tensor([0.0230, 0.0252]) -tensor([-0.1476, -0.1511]) -tensor([-0.3294, -0.3304, -0.3689, -0.3315, -0.3331]) -tensor([-0.1879, -0.2308, -0.2343, -0.1921, -0.1267, -0.0997, -0.0950, -0.0849]) -tensor([-0.0256, -0.0264]) -Total rewards: 3574 -Total Receives: 3570 -Stores 2534 1 -Total Receives: 3571 -Stores 2535 1 -Total Receives: 3572 -Stores 2536 1 -Total Receives: 3573 -Stores 2537 1 -Total Receives: 3574 -Stores 2538 1 -tensor([0.0057, 0.0080, 0.0102]) -tensor([-0.1365, -0.0816, -0.0595, -0.1058, -0.1033, -0.0657]) -tensor([-0.2307, -0.1281, -0.1222]) -tensor([-0.0723, -0.0779]) -tensor([-0.0531, -0.0611, -0.0438, -0.0349]) -Total rewards: 3580 -Total Receives: 3575 -Total Receives: 3576 -Total Receives: 3577 -Total Receives: 3578 -Total Receives: 3579 -Total Receives: 3580 -tensor([-0.1358, -0.1355, -0.0816, -0.0975]) -Total rewards: 3586 -Total Receives: 3581 -Stores 2539 1 -Total Receives: 3582 -Stores 2540 1 -Total Receives: 3583 -Stores 2541 1 -Total Receives: 3584 -Stores 2542 1 -Total Receives: 3585 -Stores 2543 1 -Total Receives: 3586 -Stores 2544 1 -tensor([-0.2311, -0.4031, -0.2287, -0.2385, -0.2346, -0.2298, -0.2335, -0.3252]) -tensor([-0.0450, -0.0512, -0.0379]) -tensor([-0.1794, -0.1115, -0.1982]) -tensor([-0.0333, -0.0272, -0.0272]) -tensor([0.0209, 0.0232]) -Total rewards: 3593 -Total Receives: 3587 -Stores 2545 1 -Total Receives: 3588 -Stores 2546 1 -Total Receives: 3589 -Stores 2547 1 -Total Receives: 3590 -Stores 2548 1 -Total Receives: 3591 -Stores 2549 1 -Total Receives: 3592 -Stores 2550 1 -Total Receives: 3593 -Stores 2551 1 -tensor([-0.1503, -0.1747, -0.1515, -0.1504, -0.1401, -0.1515, -0.1481]) -tensor([-0.1510, -0.1072, -0.1264, -0.1255, -0.0942]) -tensor([0.0224, 0.0201, 0.0189]) -tensor([-0.0551, -0.0875, -0.0589, -0.0488, -0.0439, -0.0339, -0.0354]) -tensor([-0.0587, -0.0513, -0.0556]) -tensor([-0.1570, -0.1665, -0.1490, -0.2391, -0.1457, -0.1922]) -tensor([-0.1057, -0.0915]) -tensor([-0.2763, -0.3046, -0.3074, -0.2371, -0.2211, -0.2173, -0.1967]) -tensor([0.0377, 0.0314, 0.0279]) -tensor([-0.0183, -0.0395]) -tensor([-0.1690, -0.1697, -0.1227, -0.0900]) -tensor([0.0421, 0.0362]) -Total rewards: 3597 -Total Receives: 3594 -Stores 2552 1 -Total Receives: 3595 -Stores 2553 1 -Total Receives: 3596 -Stores 2554 1 -Total Receives: 3597 -Stores 2555 1 -tensor([-0.2026, -0.3689, -0.1940, -0.2030, -0.2138, -0.2038, -0.2017, -0.2047]) -tensor([0.0303, 0.0292, 0.0290]) -tensor([-0.0537, -0.0324, -0.0612]) -tensor([-0.1260, -0.1998, -0.1181, -0.1373]) -tensor([-0.0080, -0.0030]) -tensor([-0.0143, -0.0223]) -tensor([-0.0608, -0.0680]) -tensor([-0.1526, -0.0771, -0.0647]) -tensor([-0.1290, -0.0695, -0.1010, -0.0685]) -tensor([-0.1862, -0.2361, -0.1042, -0.1431, -0.0887, -0.1288, -0.1239, -0.1003]) -tensor([-0.1255, -0.0984]) -tensor([0.0146, 0.0160]) -tensor([-0.1103, -0.1045, -0.1038, -0.1218]) -tensor([-0.3638, -0.3513, -0.3640, -0.3673, -0.3688, -0.3622]) -tensor([-0.1355, -0.0764, -0.0745, -0.1068]) -tensor([-0.1049, -0.0847, -0.1760, -0.0780, -0.1277, -0.0752, -0.0808]) -Total rewards: 3607 -Total Receives: 3598 -Total Receives: 3599 -Total Receives: 3600 -Total Receives: 3601 -Total Receives: 3602 -Total Receives: 3603 -Total Receives: 3604 -Total Receives: 3605 -Total Receives: 3606 -Total Receives: 3607 -tensor([-0.1391, -0.1080]) -tensor([0.0349, 0.0337]) -tensor([-0.0075, -0.0091]) -Total rewards: 3614 -Total Receives: 3608 -Total Receives: 3609 -Total Receives: 3610 -Total Receives: 3611 -Total Receives: 3612 -Total Receives: 3613 -Total Receives: 3614 -tensor([-0.0220, -0.0201, -0.0324]) -tensor([0.0368, 0.0373]) -tensor([-0.2388, -0.2419, -0.2455]) -Total rewards: 3621 -Total Receives: 3615 -Stores 2556 1 -Total Receives: 3616 -Stores 2557 1 -Total Receives: 3617 -Stores 2558 1 -Total Receives: 3618 -Stores 2559 1 -Total Receives: 3619 -Stores 2560 1 -Total Receives: 3620 -Stores 2561 1 -Total Receives: 3621 -Stores 2562 1 -tensor([-0.2841, -0.2865, -0.2847, -0.2855, -0.2754]) -tensor([-0.1224, -0.1346, -0.2362, -0.1515]) -tensor([-0.0183, -0.0187, -0.0173, -0.0396]) -Total rewards: 3626 -Total Receives: 3622 -Stores 2563 1 -Total Receives: 3623 -Stores 2564 1 -Total Receives: 3624 -Stores 2565 1 -Total Receives: 3625 -Stores 2566 1 -Total Receives: 3626 -Stores 2567 1 -tensor([-0.9419, -0.9294, -0.9725, -0.8514, -0.9398, -0.9512, -0.9237, -0.9457, - -0.9687]) -tensor([-0.0518, -0.0518, -0.0511, -0.0435]) -tensor([0.0405, 0.0410]) -tensor([0.0090, 0.0096, 0.0083, 0.0078]) -tensor([-0.1055, -0.0918, -0.0966, -0.0865]) -tensor([0.0313, 0.0285]) -Total rewards: 3634 -Total Receives: 3627 -Stores 2568 1 -Total Receives: 3628 -Stores 2569 1 -Total Receives: 3629 -Stores 2570 1 -Total Receives: 3630 -Stores 2571 1 -Total Receives: 3631 -Stores 2572 1 -Total Receives: 3632 -Stores 2573 1 -Total Receives: 3633 -Stores 2574 1 -Total Receives: 3634 -Stores 2575 1 -tensor([-0.2131, -0.1273, -0.0887]) -Total rewards: 3638 -Total Receives: 3635 -Total Receives: 3636 -Total Receives: 3637 -Total Receives: 3638 -tensor([0.0357, 0.0372]) -tensor([0.0416, 0.0416]) -tensor([-0.2520, -0.2933, -0.1420, -0.1970, -0.1740, -0.1691, -0.1203]) -tensor([0.0400, 0.0383, 0.0383, 0.0391]) -tensor([-0.1540, -0.2624, -0.1342, -0.1784]) -tensor([-0.0551, -0.1506, -0.0754, -0.0668, -0.1114]) -tensor([0.0298, 0.0281]) -tensor([-0.1316, -0.1179, -0.1224]) -tensor([-0.1083, -0.0877, -0.1129, -0.1285, -0.0786, -0.0660, -0.0626]) -tensor([-0.1029, -0.1078, -0.1269, -0.0725, -0.0675, -0.0545]) -tensor([0.0226, 0.0208, 0.0146]) -tensor([-0.1242, -0.1280, -0.1454, -0.0890, -0.0823]) -tensor([-0.1961, -0.1770]) -tensor([-0.0767, -0.0917, -0.0946, -0.0908]) -tensor([0.0276, 0.0274]) -Total rewards: 3648 -Total Receives: 3639 -Stores 2576 1 -Total Receives: 3640 -Stores 2577 1 -Total Receives: 3641 -Stores 2578 1 -Total Receives: 3642 -Stores 2579 1 -Total Receives: 3643 -Stores 2580 1 -Total Receives: 3644 -Stores 2581 1 -Total Receives: 3645 -Stores 2582 1 -Total Receives: 3646 -Stores 2583 1 -Total Receives: 3647 -Stores 2584 1 -Total Receives: 3648 -Stores 2585 1 -tensor([-0.3697, -0.3716, -0.3707, -0.3713, -0.3576, -0.3712, -0.3727, -0.3696]) -tensor([-2.6188, -2.4633, -2.6607, -2.6112, -2.6307, -2.5914, -2.6227, -2.6517]) -tensor([-0.0884, -0.0988, -0.1022]) -Total rewards: 3653 -Total Receives: 3649 -Stores 2586 1 -Total Receives: 3650 -Stores 2587 1 -Total Receives: 3651 -Stores 2588 1 -Total Receives: 3652 -Stores 2589 1 -Total Receives: 3653 -Stores 2590 1 -tensor([0.0099, 0.0069, 0.0087, 0.0050]) -tensor([0.0309, 0.0293, 0.0309]) -tensor([-0.1410, -0.2159, -0.1620, -0.1141]) -tensor([-0.0255, -0.0242, -0.0191, -0.0189, -0.0127]) -tensor([-0.0184, -0.0182, -0.0154, -0.0154]) -tensor([-0.0927, -0.1881, -0.0901, -0.1408]) -tensor([-1.8265, -1.7153, -1.8640, -1.8340, -1.8236, -1.8309, -1.8541]) -tensor([0.0383, 0.0372]) -Total rewards: 3660 -Total Receives: 3654 -Stores 2591 1 -Total Receives: 3655 -Stores 2592 1 -Total Receives: 3656 -Stores 2593 1 -Total Receives: 3657 -Stores 2594 1 -Total Receives: 3658 -Stores 2595 1 -Total Receives: 3659 -Stores 2596 1 -Total Receives: 3660 -Stores 2597 1 -tensor([-0.1572, -0.1627, -0.1558]) -tensor([-0.2929, -0.2897, -0.2632, -0.2867, -0.2576, -0.2676, -0.2607]) -tensor([-0.2337, -0.2747, -0.1201, -0.1380, -0.1877, -0.1653, -0.1609]) -Total rewards: 3668 -Total Receives: 3661 -Stores 2598 1 -Total Receives: 3662 -Stores 2599 1 -Total Receives: 3663 -Stores 2600 1 -Total Receives: 3664 -Stores 2601 1 -Total Receives: 3665 -Stores 2602 1 -Total Receives: 3666 -Stores 2603 1 -Total Receives: 3667 -Stores 2604 1 -Total Receives: 3668 -Stores 2605 1 -tensor([-0.0434, -0.1701, -0.0314, -0.0354]) -tensor([-0.1784, -0.1060, -0.1055, -0.1255, -0.1199, -0.1082, -0.1477]) -tensor([-0.1923, -0.0694]) -tensor([-0.0555, -0.0654, -0.0545, -0.0498, -0.0498]) -tensor([-0.1404, -0.1423]) -tensor([0.0077, 0.0064]) -tensor([-0.1243, -0.2038, -0.1420, -0.0966]) -tensor([-0.1660, -0.1774, -0.1315, -0.1235]) -tensor([0.0407, 0.0398]) -tensor([-0.1270, -0.1095, -0.1306]) -Total rewards: 3674 -Total Receives: 3669 -Stores 2606 1 -Total Receives: 3670 -Stores 2607 1 -Total Receives: 3671 -Stores 2608 1 -Total Receives: 3672 -Stores 2609 1 -Total Receives: 3673 -Stores 2610 1 -Total Receives: 3674 -Stores 2611 1 -tensor([-0.0518, -0.0510, -0.0489]) -tensor([0.0266, 0.0296, 0.0289]) -Total rewards: 3680 -Total Receives: 3675 -Total Receives: 3676 -Total Receives: 3677 -Total Receives: 3678 -Total Receives: 3679 -Total Receives: 3680 -tensor([-0.1294, -0.0365, -0.0595, -0.0257, -0.0515, -0.0396, -0.0909, -0.0263]) -Total rewards: 3688 -Total Receives: 3681 -Stores 2612 1 -Total Receives: 3682 -Stores 2613 1 -Total Receives: 3683 -Stores 2614 1 -Total Receives: 3684 -Stores 2615 1 -Total Receives: 3685 -Stores 2616 1 -Total Receives: 3686 -Stores 2617 1 -Total Receives: 3687 -Stores 2618 1 -Total Receives: 3688 -Stores 2619 1 -tensor([-0.0343, -0.0379]) -Total rewards: 3697 -Total Receives: 3689 -Stores 2620 1 -Total Receives: 3690 -Stores 2621 1 -Total Receives: 3691 -Stores 2622 1 -Total Receives: 3692 -Stores 2623 1 -Total Receives: 3693 -Stores 2624 1 -Total Receives: 3694 -Stores 2625 1 -Total Receives: 3695 -Stores 2626 1 -Total Receives: 3696 -Stores 2627 1 -Total Receives: 3697 -Stores 2628 1 -tensor([-0.0578, -0.0423, -0.1752, -0.0504]) -Total rewards: 3705 -Total Receives: 3698 -Total Receives: 3699 -Total Receives: 3700 -Total Receives: 3701 -Total Receives: 3702 -Total Receives: 3703 -Total Receives: 3704 -Total Receives: 3705 -tensor([0.0392, 0.0419]) -tensor([-0.1846, -0.1784, -0.1399, -0.1733, -0.1373, -0.1371, -0.1480, -0.1352]) -tensor([0.0317, 0.0310, 0.0311]) -tensor([-0.0850, -0.0980, -0.1166]) -tensor([-0.1358, -0.1303, -0.0892, -0.1261, -0.0787, -0.0773, -0.0985]) -Total rewards: 3708 -Total Receives: 3706 -Stores 2629 1 -Total Receives: 3707 -Stores 2630 1 -Total Receives: 3708 -Stores 2631 1 -tensor([-0.1275, -0.0755, -0.1171, -0.0656, -0.0639, -0.0835]) -tensor([-0.0623, -0.1592, -0.0681, -0.0591]) -tensor([-2.0824, -2.0187, -2.0551, -2.0254, -1.9933, -2.0231, -2.0681, -2.0577]) -Total rewards: 3716 -Total Receives: 3709 -Stores 2632 1 -Total Receives: 3710 -Stores 2633 1 -Total Receives: 3711 -Stores 2634 1 -Total Receives: 3712 -Stores 2635 1 -Total Receives: 3713 -Stores 2636 1 -Total Receives: 3714 -Stores 2637 1 -Total Receives: 3715 -Stores 2638 1 -Total Receives: 3716 -Stores 2639 1 -tensor([0.0380, 0.0477]) -tensor([0.0324, 0.0304]) -tensor([-0.0997, -0.1305]) -tensor([-0.1590, -0.1124, -0.1039]) -Total rewards: 3723 -Total Receives: 3717 -Total Receives: 3718 -Total Receives: 3719 -Total Receives: 3720 -Total Receives: 3721 -Total Receives: 3722 -Total Receives: 3723 -tensor([-0.2784, -0.3138, -0.1650, -0.2329, -0.2104, -0.2060]) -tensor([-0.1366, -0.0946, -0.1295, -0.0846, -0.1026]) -tensor([-0.1362, -0.1432, -0.1198, -0.1179, -0.1179]) -tensor([-0.0395, -0.0408]) -tensor([0.0290, 0.0289]) -tensor([-0.1708, -0.2429, -0.1352, -0.1867]) -Total rewards: 3729 -Total Receives: 3724 -Stores 2640 1 -Total Receives: 3725 -Stores 2641 1 -Total Receives: 3726 -Stores 2642 1 -Total Receives: 3727 -Stores 2643 1 -Total Receives: 3728 -Stores 2644 1 -Total Receives: 3729 -Stores 2645 1 -tensor([-0.1504, -0.1194, -0.1294]) -tensor([-0.0792, -0.0779, -0.0625]) -tensor([-0.0488, -0.0365]) -tensor([-0.0472, -0.0465]) -tensor([0.0444, 0.0401]) -tensor([-0.1868, -0.0926, -0.1184, -0.1101, -0.0963, -0.1495, -0.0814]) -tensor([-0.0894, -0.1846, -0.0924, -0.0721]) -tensor([-0.1853, -0.1040, -0.1265, -0.1175, -0.1049, -0.1519]) -Total rewards: 3736 -Total Receives: 3730 -Total Receives: 3731 -Total Receives: 3732 -Total Receives: 3733 -Total Receives: 3734 -Total Receives: 3735 -Total Receives: 3736 -tensor([0.0342, 0.0324]) -tensor([0.0335, 0.0405, 0.0437]) -Total rewards: 3740 -Total Receives: 3737 -Stores 2646 1 -Total Receives: 3738 -Stores 2647 1 -Total Receives: 3739 -Stores 2648 1 -Total Receives: 3740 -Stores 2649 1 -tensor([0.0329, 0.0395]) -tensor([-3.2692, -3.1965, -3.2367, -3.2046, -3.1999, -3.2490, -3.2821, -3.2442, - -3.2730, -3.2812, -3.2306]) -tensor([0.0304, 0.0344]) -tensor([-0.2983, -0.3322, -0.2014, -0.2623, -0.2471, -0.1538]) -Total rewards: 3746 -Total Receives: 3741 -Stores 2650 1 -Total Receives: 3742 -Stores 2651 1 -Total Receives: 3743 -Stores 2652 1 -Total Receives: 3744 -Stores 2653 1 -Total Receives: 3745 -Stores 2654 1 -Total Receives: 3746 -Stores 2655 1 -tensor([-0.2180, -0.1200, -0.0904]) -Total rewards: 3751 -Total Receives: 3747 -Total Receives: 3748 -Total Receives: 3749 -Total Receives: 3750 -Total Receives: 3751 -tensor([0.0248, 0.0278, 0.0268, 0.0268]) -Total rewards: 3759 -Total Receives: 3752 -Total Receives: 3753 -Total Receives: 3754 -Total Receives: 3755 -Total Receives: 3756 -Total Receives: 3757 -Total Receives: 3758 -Total Receives: 3759 -tensor([-0.2143, -0.1416, -0.1551, -0.1425, -0.1850]) -tensor([-0.1351, -0.1911, -0.1247]) -tensor([-0.1029, -0.1014, -0.1010]) -tensor([-0.1384, -0.1394, -0.1390, -0.1419, -0.1390, -0.1379]) -tensor([-0.1514, -0.1145, -0.1435, -0.0725, -0.1252]) -tensor([-0.1656, -0.1205, -0.2481, -0.1796, -0.1127]) -tensor([-4.2158, -4.1363, -4.1816, -4.1392, -4.1953, -4.2303, -4.1884, -4.2201, - -4.2290, -4.1741]) -tensor([-0.0880, -0.0603]) -tensor([0.0325, 0.0321, 0.0311]) -tensor([-0.1261, -0.1763, -0.1120]) -tensor([0.0262, 0.0267]) -tensor([-0.2537, -0.2608, -0.2608, -0.2567, -0.2618, -0.2580]) -Total rewards: 3765 -Total Receives: 3760 -Stores 2656 1 -Total Receives: 3761 -Stores 2657 1 -Total Receives: 3762 -Stores 2658 1 -Total Receives: 3763 -Stores 2659 1 -Total Receives: 3764 -Stores 2660 1 -Total Receives: 3765 -Stores 2661 1 -tensor([0.0362, 0.0366]) -tensor([0.0355, 0.0345]) -tensor([-0.1723, -0.1166]) -tensor([-0.0270, -0.0271]) -tensor([-0.1500, -0.1120, -0.2137, -0.1638, -0.1009, -0.0909]) -tensor([-0.2362, -0.2426, -0.2077, -0.2248, -0.2230]) -Total rewards: 3771 -Total Receives: 3766 -Total Receives: 3767 -Total Receives: 3768 -Total Receives: 3769 -Total Receives: 3770 -Total Receives: 3771 -tensor([-0.0598, -0.0423, -0.0580, -0.0536]) -Total rewards: 3777 -Total Receives: 3772 -Total Receives: 3773 -Total Receives: 3774 -Total Receives: 3775 -Total Receives: 3776 -Total Receives: 3777 -tensor([-0.1095, -0.1106]) -Total Receives: 2386 -Stores 1667 1 -Total Receives: 2387 -Stores 1668 1 -Total rewards: 2393 -Total Receives: 2388 -Stores 1669 1 -Total Receives: 2389 -Stores 1670 1 -Total Receives: 2390 -Stores 1671 1 -Total Receives: 2391 -Stores 1672 1 -Total Receives: 2392 -Stores 1673 1 -Total Receives: 2393 -Stores 1674 1 -tensor([-0.1980, -0.1388, -0.2236, -0.1744, -0.1545, -0.1924, -0.1296, -0.1312]) -tensor([-0.0761, -0.0598]) -tensor([-70.5160, -70.5237, -70.5264, -70.5808, -70.5098, -70.5962, -70.6034, - -70.6034, -70.5623, -70.6033]) -Total rewards: 2396 -Total Receives: 2394 -Stores 1675 1 -Total Receives: 2395 -Stores 1676 1 -Total Receives: 2396 -Stores 1677 1 -tensor([-70.7629, -70.7705, -70.7732, -70.8280, -70.8431, -70.8529, -70.8529, - -70.8097, -70.8529]) -tensor([-0.0518, -0.0437, -0.0444, -0.0444, -0.0428]) -Total rewards: 2405 -Total Receives: 2397 -Stores 1678 1 -Total Receives: 2398 -Stores 1679 1 -Total Receives: 2399 -Stores 1680 1 -Total Receives: 2400 -Stores 1681 1 -Total Receives: 2401 -Stores 1682 1 -Total Receives: 2402 -Stores 1683 1 -Total Receives: 2403 -Stores 1684 1 -Total Receives: 2404 -Stores 1685 1 -Total Receives: 2405 -Stores 1686 1 -tensor([-30.8552, -30.8464, -30.8602]) -Total rewards: 2414 -Total Receives: 2406 -Stores 1687 1 -Total Receives: 2407 -Stores 1688 1 -Total Receives: 2408 -Stores 1689 1 -Total Receives: 2409 -Stores 1690 1 -Total Receives: 2410 -Stores 1691 1 -Total Receives: 2411 -Stores 1692 1 -Total Receives: 2412 -Stores 1693 1 -Total Receives: 2413 -Stores 1694 1 -Total Receives: 2414 -Stores 1695 1 -tensor([-68.8027, -68.8072, -68.8648, -68.8822, -68.8998, -68.8998, -68.8485, - -68.8999]) -tensor([-38.3684, -38.3233, -38.3406, -38.3395, -38.3522, -38.3499, -38.3354]) -tensor([-0.0054, -0.0106]) -tensor([-0.0470, -0.0364, -0.0394, -0.0370]) -tensor([-47.5260, -47.5940, -47.6564]) -tensor([-59.1012, -59.1094, -59.1139, -59.1722]) -Total rewards: 2419 -Total Receives: 2415 -Stores 1696 1 -Total Receives: 2416 -Stores 1697 1 -Total Receives: 2417 -Stores 1698 1 -Total Receives: 2418 -Stores 1699 1 -Total Receives: 2419 -Stores 1700 1 -tensor([-0.2498, -0.2489, -0.2486, -0.2513]) -tensor([-25.5928, -25.6102, -25.6172, -25.5574, -25.6245, -25.6321]) -tensor([-30.4957, -30.4021, -30.5013, -30.4537, -30.5066]) -tensor([-55.8332, -55.8986, -55.9108, -55.9335, -55.9444, -55.9444, -55.8794, - -55.9443]) -Total rewards: 2422 -Total Receives: 2420 -Stores 1701 1 -Total Receives: 2421 -Stores 1702 1 -Total Receives: 2422 -Stores 1703 1 -tensor([-0.0512, -0.0396, -0.0438]) -tensor([-33.6236, -33.5978, -33.5629, -33.6036, -33.6509, -33.6509]) -tensor([-53.5163, -53.5210, -53.5860]) -tensor([-30.7489, -30.6495, -30.7416, -30.6973, -30.7522]) -tensor([-0.1863, -0.1382, -0.1922, -0.1405, -0.1223, -0.1833, -0.1022]) -tensor([-0.1288, -0.1371, -0.1331]) -tensor([-23.1700, -23.1769, -23.1164, -23.1937, -23.1845, -23.1976]) -tensor([-0.0404, -0.0407]) -Total rewards: 2430 -Total Receives: 2423 -Stores 1704 1 -Total Receives: 2424 -Stores 1705 1 -Total Receives: 2425 -Stores 1706 1 -Total Receives: 2426 -Stores 1707 1 -Total Receives: 2427 -Stores 1708 1 -Total Receives: 2428 -Stores 1709 1 -Total Receives: 2429 -Stores 1710 1 -Total Receives: 2430 -Total rewards: 2439 -Total Receives: 2431 -Total Receives: 2432 -Total Receives: 2433 -Total Receives: 2434 -Total Receives: 2435 -Total Receives: 2436 -Total Receives: 2437 -Total Receives: 2438 -Total Receives: 2439 -tensor([-182.9043, -182.9148, -182.2568, -182.9111, -182.9262]) -tensor([-33.0288, -32.4656, -33.0179, -33.0558]) -tensor([-36.1985, -36.1715, -36.1709, -36.2031, -36.1772, -36.1602, -36.2169]) -tensor([-55.5535, -55.5171, -55.5251, -55.5350, -55.5530, -55.5530, -55.4792, - -55.5530]) -tensor([-30.7703, -30.7598, -30.7981]) -tensor([-0.0444, -0.0978]) -tensor([-45.8771, -45.8879, -45.8841, -45.9039]) -tensor([-60.9149, -60.8844, -60.8911, -60.8971, -60.9251, -60.9251, -60.9251]) -tensor([-0.0166, -0.0280, -0.0293]) -tensor([-0.0055, -0.0004]) -tensor([-57.2297, -57.2995, -57.2812, -57.2977]) -Total rewards: 2444 -Total Receives: 2440 -Stores 1711 1 -Total Receives: 2441 -Stores 1712 1 -Total Receives: 2442 -Stores 1713 1 -Total Receives: 2443 -Stores 1714 1 -Total Receives: 2444 -Stores 1715 1 -tensor([-0.0415, -0.0518, -0.0458]) -tensor([-53.3003, -53.2805, -53.3032]) -tensor([-41.0935, -41.0226, -41.0619, -41.1318, -41.1318]) -tensor([-23.6100, -23.6129, -23.4851, -23.5820]) -tensor([-0.0961, -0.1000]) -tensor([-26.2967, -26.3037, -26.2428, -26.3192, -26.3405]) -tensor([-0.0652, -0.0661, -0.0516]) -tensor([-32.6482, -32.5617, -32.6702, -32.6844, -32.6187, -32.6856]) -tensor([-0.1213, -0.1301, -0.1260]) -tensor([-0.0701, -0.0888]) -tensor([-0.1787, -0.1195, -0.1918, -0.1271, -0.1295, -0.1686, -0.0652]) -tensor([-59.8064, -59.7791, -59.8197, -59.7856, -59.8314, -59.8314, -59.8314]) -tensor([-23.6247, -23.6416, -23.6478, -23.6714]) -tensor([-0.0238, -0.0278]) -tensor([-40.2222, -40.2533, -39.7188, -40.2116, -40.2156, -40.2271, -40.1986, - -40.2404]) -tensor([-19.6723, -19.6740, -19.6713, -19.6328]) -Total rewards: 2449 -Total Receives: 2445 -Stores 1716 1 -Total Receives: 2446 -Stores 1717 1 -Total Receives: 2447 -Stores 1718 1 -Total Receives: 2448 -Stores 1719 1 -Total Receives: 2449 -Stores 1720 1 -tensor([-0.1713, -0.1231, -0.1525, -0.1295, -0.1644, -0.0749]) -Total rewards: 2452 -Total Receives: 2450 -Stores 1721 1 -Total Receives: 2451 -Stores 1722 1 -Total Receives: 2452 -Stores 1723 1 -tensor([-0.1388, -0.1492, -0.1360, -0.1611, -0.0882]) -tensor([-0.1452, -0.1434, -0.1407]) -Total rewards: 2462 -Total Receives: 2453 -Total Receives: 2454 -Total Receives: 2455 -Total Receives: 2456 -Total Receives: 2457 -Total Receives: 2458 -Total Receives: 2459 -Total Receives: 2460 -Total Receives: 2461 -Total Receives: 2462 -tensor([-35.8537, -35.8922, -35.8970, -35.9036, -35.8982, -35.9074]) -tensor([-53.2703, -53.2654, -53.3075]) -tensor([-0.0950, -0.1110, -0.1051, -0.1134]) -tensor([-51.2442, -51.2228, -51.2560]) -tensor([-42.4788, -42.5139, -42.4654, -42.4697, -42.4834, -42.4668, -42.4879, - -42.5318]) -tensor([-45.2278, -45.1976, -45.2845, -45.2845]) -tensor([-40.1349, -40.1168, -40.1211, -40.1349, -40.1253, -40.1439, -40.1905]) -tensor([-22.1557, -22.1556, -22.1714]) -tensor([-0.0410, -0.0485, -0.0471, -0.0333]) -tensor([-0.0560, -0.0509, -0.0351]) -tensor([-77.7184, -77.7765, -77.7582, -77.7755, -77.7637, -77.7844]) -tensor([-46.6989, -46.7102, -46.7136]) -Total rewards: 2465 -Total Receives: 2463 -Stores 1724 1 -Total Receives: 2464 -Stores 1725 1 -Total Receives: 2465 -Stores 1726 1 -tensor([-49.0893, -49.0832]) -tensor([-30.4330, -30.0224, -30.5025, -30.4338, -30.4435, -30.4600, -30.4632]) -tensor([-35.4655, -35.5183, -35.5227, -35.5227]) -tensor([-25.3651, -25.3894, -25.3658, -25.3756, -25.3924, -25.3965]) -tensor([-19.6088, -19.6239, -19.6663, -19.6727, -19.6718]) -tensor([-29.5842, -29.5678, -29.5718, -29.5842, -29.6405, -29.6037, -29.6409]) -Total rewards: 2469 -Total Receives: 2466 -Stores 1727 1 -Total Receives: 2467 -Stores 1728 1 -Total Receives: 2468 -Stores 1729 1 -Total Receives: 2469 -Stores 1730 1 -tensor([-0.3444, -0.3105]) -tensor([-17.0818, -17.1379, -17.1447, -17.1441]) -tensor([-42.9719, -42.9990, -43.0233, -42.9805, -42.9725, -43.0205, -43.0245, - -43.0245, -43.0170, -43.0246]) -tensor([-0.0757, -0.0862, -0.0895, -0.0614]) -tensor([-29.6414, -29.6514, -29.6514]) -tensor([-0.0518, -0.0535, -0.0365]) -tensor([-27.2306, -27.2047, -27.2087, -27.2213, -27.2757, -27.2406]) -tensor([-0.0294, -0.0307]) -Total rewards: 2476 -Total Receives: 2470 -Stores 1731 1 -Total Receives: 2471 -Stores 1732 1 -Total Receives: 2472 -Stores 1733 1 -Total Receives: 2473 -Stores 1734 1 -Total Receives: 2474 -Stores 1735 1 -Total Receives: 2475 -Stores 1736 1 -Total Receives: 2476 -Stores 1737 1 -tensor([ 0.0079, -0.0183, -0.0058]) -Total rewards: 2480 -Total Receives: 2477 -Stores 1738 1 -Total Receives: 2478 -Stores 1739 1 -Total Receives: 2479 -Stores 1740 1 -Total Receives: 2480 -Stores 1741 1 -tensor([-0.0028, -0.0105, 0.0068]) -tensor([-0.0418, -0.0248]) -tensor([-0.0289, -0.0198]) -tensor([-58.1438, -58.1808, -57.7024, -58.2411, -58.2589, -58.1877, -58.1830]) -tensor([-29.7546, -29.7524, -29.7523]) -tensor([-0.0287, -0.0331, -0.0232]) -Total rewards: 2488 -Total Receives: 2481 -Total Receives: 2482 -Total Receives: 2483 -Total Receives: 2484 -Total Receives: 2485 -tensor([-0.1655, -0.1339, -0.1584, -0.1418, -0.0979]) -Total rewards: 3785 -Total Receives: 3778 -Total Receives: 3779 -Total Receives: 3780 -Total Receives: 3781 -Total Receives: 3782 -Total Receives: 3783 -Total Receives: 3784 -Total Receives: 3785 -tensor([-0.2334, -0.1768, -0.1634, -0.2040, -0.1245]) -tensor([0.0061, 0.0072]) -tensor([0.0242, 0.0226, 0.0228, 0.0263]) -tensor([-0.1750, -0.1330, -0.2445, -0.1882, -0.1216]) -tensor([0.0540, 0.0357, 0.0287]) -tensor([-5.1464, -5.1118, -5.1583, -5.0646, -5.1204, -5.1757, -5.1585, -5.1132, - -5.1487, -5.1519, -5.0973, -5.1757, -5.1604]) -Total rewards: 3787 -Total Receives: 3786 -Stores 2662 1 -Total Receives: 3787 -Stores 2663 1 -tensor([0.0410, 0.0400]) -tensor([-0.0521, -0.0748, -0.0525]) -Total rewards: 3793 -Total Receives: 3788 -Stores 2664 1 -Total Receives: 3789 -Stores 2665 1 -Total Receives: 3790 -Stores 2666 1 -Total Receives: 3791 -Stores 2667 1 -Total Receives: 3792 -Stores 2668 1 -Total Receives: 3793 -Stores 2669 1 -tensor([-0.2070, -0.2132, -0.2126, -0.2127, -0.2163, -0.2074, -0.2097, -0.2154]) -tensor([-0.0135, -0.0360]) -tensor([-0.1541, -0.1378, -0.1628, -0.1612]) -tensor([-0.1007, -0.0966, -0.0978, -0.0857, -0.1103]) -tensor([-3.9335, -3.8968, -3.9458, -3.9426, -3.9572, -3.9480, -3.9769, -3.9236, - -3.9256, -3.9374, -3.8774, -3.9663, -3.9496]) -Total rewards: 3802 -Total Receives: 3794 -Stores 2670 1 -Total Receives: 3795 -Stores 2671 1 -Total Receives: 3796 -Stores 2672 1 -Total Receives: 3797 -Stores 2673 1 -Total Receives: 3798 -Stores 2674 1 -Total Receives: 3799 -Stores 2675 1 -Total Receives: 3800 -Stores 2676 1 -Total Receives: 3801 -Stores 2677 1 -Total Receives: 3802 -Stores 2678 1 -tensor([-0.0965, -0.1243, -0.0916]) -tensor([0.0125, 0.0104, 0.0117]) -Total rewards: 3806 -Total Receives: 3803 -Total Receives: 3804 -Total Receives: 3805 -Total Receives: 3806 -tensor([ 6.4969e-06, -3.1349e-04, 1.6629e-03]) -tensor([-0.3099, -0.2348, -0.2209, -0.2748, -0.1575]) -tensor([-0.0171, -0.0365]) -tensor([0.0335, 0.0314]) -tensor([-0.0343, -0.0372]) -tensor([0.0271, 0.0243, 0.0248]) -tensor([0.0295, 0.0286]) -Total rewards: 3811 -Total Receives: 3807 -Stores 2679 1 -Total Receives: 3808 -Stores 2680 1 -Total Receives: 3809 -Stores 2681 1 -Total Receives: 3810 -Stores 2682 1 -Total Receives: 3811 -Stores 2683 1 -tensor([-0.0657, -0.0690, -0.0692]) -tensor([-0.1369, -0.1270]) -tensor([-0.5192, -0.5277, -0.5254, -0.5260, -0.5349, -0.5205, -0.5325, -0.5430]) -tensor([-0.0045, -0.0085, -0.0049]) -tensor([-0.1158, -0.1107, -0.1131, -0.1290]) -tensor([0.0297, 0.0292]) -tensor([0.0310, 0.0302]) -tensor([-4.5324, -4.4944, -4.5397, -4.5419, -4.5519, -4.5485, -4.5743, -4.5185, - -4.5210, -4.5317, -4.5662, -4.5822]) -tensor([0.0240, 0.0222]) -tensor([-0.0862, -0.0641, -0.0637]) -tensor([-0.1140, -0.0960, -0.1379]) -tensor([-0.0515, -0.0587]) -tensor([-0.0282, -0.0274, -0.0282]) -Total rewards: 3818 -Total Receives: 3812 -Total Receives: 3813 -Total Receives: 3814 -Total Receives: 3815 -Total Receives: 3816 -Total Receives: 3817 -Total Receives: 3818 -tensor([-0.2261, -0.1546, -0.0712, -0.1882, -0.0911, -0.0699, -0.0732]) -tensor([-0.0087, -0.0088]) -tensor([0.0373, 0.0402]) -tensor([-1.1216, -1.1167, -1.1179, -1.1293, -1.1069, -1.1274, -1.1443]) -tensor([0.0326, 0.0325]) -tensor([0.0061, 0.0118, 0.0069]) -tensor([0.0354, 0.0366]) -tensor([-3.9298, -3.9614, -3.9389, -3.9458, -3.9447, -3.9672, -3.9174, -3.9199, - -3.9282, -3.9593, -3.9773]) -tensor([0.0268, 0.0256]) -Total rewards: 3825 -Total Receives: 3819 -Total Receives: 3820 -Total Receives: 3821 -Total Receives: 3822 -Total Receives: 3823 -Total Receives: 3824 -Total Receives: 3825 -tensor([0.0377, 0.0369]) -tensor([0.0360, 0.0352]) -tensor([-0.0315, -0.0332]) -tensor([-0.1182, -0.0949, -0.1165, -0.1379]) -tensor([0.0328, 0.0336, 0.0329]) -tensor([0.0237, 0.0453]) -tensor([0.0290, 0.0267]) -tensor([0.0421, 0.0417]) -tensor([0.0269, 0.0256, 0.0221]) -Total rewards: 3830 -Total Receives: 3826 -Stores 2684 1 -Total Receives: 3827 -Stores 2685 1 -Total Receives: 3828 -Stores 2686 1 -Total Receives: 3829 -Stores 2687 1 -Total Receives: 3830 -Stores 2688 1 -tensor([-0.0798, -0.0825]) -tensor([-0.0553, -0.0299, -0.0836, -0.0386, -0.0373, -0.0251]) -tensor([-1.5332, -1.5231, -1.5246, -1.5418, -1.5772, -1.5390, -1.5640, -1.5714]) -tensor([0.0388, 0.0279]) -tensor([-0.0109, 0.0035, -0.0198]) -Total rewards: 3839 -Total Receives: 3831 -Stores 2689 1 -Total Receives: 3832 -Stores 2690 1 -Total Receives: 3833 -Stores 2691 1 -Total Receives: 3834 -Stores 2692 1 -Total Receives: 3835 -Stores 2693 1 -Total Receives: 3836 -Stores 2694 1 -Total Receives: 3837 -Stores 2695 1 -Total Receives: 3838 -Stores 2696 1 -Total Receives: 3839 -Stores 2697 1 -tensor([0.0524, 0.0438]) -tensor([-0.2643, -0.2135, -0.1415, -0.2377, -0.1599, -0.1434]) -Total rewards: 3844 -Total Receives: 3840 -Stores 2698 1 -Total Receives: 3841 -Stores 2699 1 -Total Receives: 3842 -Stores 2700 1 -Total Receives: 3843 -Stores 2701 1 -Total Receives: 3844 -Stores 2702 1 -tensor([0.0334, 0.0318]) -tensor([-5.2962, -5.3351, -5.3073, -5.3143, -5.3150, -5.2920, -5.2790, -5.3584, - -5.2918, -5.3285, -5.3567]) -Total rewards: 3848 -Total Receives: 3845 -Total Receives: 3846 -Total Receives: 3847 -Total Receives: 3848 -tensor([-0.0456, -0.0447]) -Total rewards: 3855 -Total Receives: 3849 -Stores 2703 1 -Total Receives: 3850 -Stores 2704 1 -Total Receives: 3851 -Stores 2705 1 -Total Receives: 3852 -Stores 2706 1 -Total Receives: 3853 -Stores 2707 1 -Total Receives: 3854 -Stores 2708 1 -Total Receives: 3855 -Stores 2709 1 -tensor([0.0380, 0.0339]) -tensor([-0.1080, -0.1016]) -Total rewards: 3859 -Total Receives: 3856 -Total Receives: 3857 -Total Receives: 3858 -Total Receives: 3859 -tensor([0.0372, 0.0336]) -Total rewards: 3865 -Total Receives: 3860 -Total Receives: 3861 -Total Receives: 3862 -Total Receives: 3863 -Total Receives: 3864 -Total Receives: 3865 -Total rewards: 3870 -Total Receives: 3866 -Stores 2710 1 -Total Receives: 3867 -Stores 2711 1 -Total Receives: 3868 -Stores 2712 1 -Total Receives: 3869 -Stores 2713 1 -Total Receives: 3870 -Stores 2714 1 -tensor([0.0365, 0.0373]) -Total rewards: 3874 -Total Receives: 3871 -Stores 2715 1 -Total Receives: 3872 -Stores 2716 1 -Total Receives: 3873 -Stores 2717 1 -Total Receives: 3874 -Stores 2718 1 -tensor([0.0417, 0.0410]) -tensor([-1.6563, -1.6416, -1.6652, -1.6980, -1.6989, -1.6626, -1.6837, -1.7019]) -tensor([0.0251, 0.0288]) -tensor([0.0388, 0.0375, 0.0354]) -Total rewards: 3881 -Total Receives: 3875 -Stores 2719 1 -Total Receives: 3876 -Stores 2720 1 -Total Receives: 3877 -Stores 2721 1 -Total Receives: 3878 -Stores 2722 1 -Total Receives: 3879 -Stores 2723 1 -Total Receives: 3880 -Stores 2724 1 -Total Receives: 3881 -Stores 2725 1 -tensor([-0.0652, -0.0171, -0.0338, -0.0898, -0.0457, -0.0463]) -tensor([0.0320, 0.0328]) -tensor([-6.1629, -6.2027, -6.2368, -6.1736, -6.1797, -6.1818, -6.1562, -6.2215, - -6.1821, -6.1961, -6.2280]) -Total rewards: 3887 -Total Receives: 3882 -Stores 2726 1 -Total Receives: 3883 -Stores 2727 1 -Total Receives: 3884 -Stores 2728 1 -Total Receives: 3885 -Stores 2729 1 -Total Receives: 3886 -Stores 2730 1 -Total Receives: 3887 -Stores 2731 1 -tensor([-0.0481, -0.0556]) -Total rewards: 3893 -Total Receives: 3888 -Total Receives: 3889 -Total Receives: 3890 -Total Receives: 3891 -Total Receives: 3892 -Total Receives: 3893 -tensor([-0.3106, -0.2585, -0.2799, -0.1994, -0.1828]) -Total rewards: 3899 -Total Receives: 3894 -Stores 2732 1 -Total Receives: 3895 -Stores 2733 1 -Total Receives: 3896 -Stores 2734 1 -Total Receives: 3897 -Stores 2735 1 -Total Receives: 3898 -Stores 2736 1 -Total Receives: 3899 -Stores 2737 1 -tensor([0.0336, 0.0333]) -Total rewards: 3900 -Total Receives: 3900 -Stores 2738 1 -tensor([0.0356, 0.0392]) -Total rewards: 3904 -Total Receives: 3901 -Stores 2739 1 -Total Receives: 3902 -Stores 2740 1 -Total Receives: 3903 -Stores 2741 1 -Total Receives: 3904 -Stores 2742 1 -tensor([0.0352, 0.0351]) -tensor([-0.2068, -0.2003, -0.1999, -0.1798]) -tensor([0.0326, 0.0309]) -tensor([-1.1605, -1.2101, -1.1706, -1.2047, -1.2021, -1.1688, -1.1826, -1.2176, - -1.2113]) -Total rewards: 3908 -Total Receives: 3905 -Total Receives: 3906 -Total Receives: 3907 -Total Receives: 3908 -tensor([0.0336, 0.0328]) -Total rewards: 3910 -Total Receives: 3909 -Stores 2743 1 -Total Receives: 3910 -Stores 2744 1 -tensor([-0.7742, -0.7597, -0.7738, -0.7721, -0.7605, -0.7543, -0.7831, -0.7763]) -Total Receives: 2486 -Total Receives: 2487 -Total Receives: 2488 -tensor([ 0.0063, -0.0006]) -tensor([-29.9118, -29.9194, -29.8954, -29.9143, -29.9035, -29.9362]) -tensor([-0.0449, -0.0951, -0.0439]) -Total rewards: 2494 -Total Receives: 2489 -Total Receives: 2490 -Total Receives: 2491 -Total Receives: 2492 -Total Receives: 2493 -Total Receives: 2494 -tensor([-40.1803, -40.1700, -40.2074]) -Total rewards: 2499 -Total Receives: 2495 -Stores 1742 1 -Total Receives: 2496 -Stores 1743 1 -Total Receives: 2497 -Stores 1744 1 -Total Receives: 2498 -Stores 1745 1 -Total Receives: 2499 -Stores 1746 1 -tensor([-0.1090, -0.1172, -0.1146]) -Total rewards: 2507 -Total Receives: 2500 -Stores 1747 1 -Total Receives: 2501 -Stores 1748 1 -Total Receives: 2502 -Stores 1749 1 -Total Receives: 2503 -Stores 1750 1 -Total Receives: 2504 -Stores 1751 1 -Total Receives: 2505 -Stores 1752 1 -Total Receives: 2506 -Stores 1753 1 -Total Receives: 2507 -Stores 1754 1 -tensor([-19.7505, -19.7627, -19.7864]) -Total rewards: 2512 -Total Receives: 2508 -Stores 1755 1 -Total Receives: 2509 -Stores 1756 1 -Total Receives: 2510 -Stores 1757 1 -Total Receives: 2511 -Stores 1758 1 -Total Receives: 2512 -Stores 1759 1 -tensor([-52.8146, -52.7642]) -tensor([-29.1307, -29.1076, -29.1237, -29.1817, -29.1421]) -tensor([0.0368, 0.0354]) -tensor([-53.8211, -53.8310, -53.8537, -53.8748, -53.8067, -53.8552, -53.8818, - -53.8817, -53.8493, -53.8818]) -Total rewards: 2516 -Total Receives: 2513 -Stores 1760 1 -Total Receives: 2514 -Stores 1761 1 -Total Receives: 2515 -Stores 1762 1 -Total Receives: 2516 -Stores 1763 1 -tensor([-24.7690, -24.7773, -24.7835, -24.7809, -24.7930]) -tensor([-16.2698, -16.3157, -16.3241]) -tensor([-35.5484, -35.5308]) -tensor([-20.9416, -20.9770, -20.9636, -20.9952, -20.9692, -20.9986, -20.9813]) -tensor([-0.1262, -0.1179]) -tensor([-45.3127, -45.3211, -45.3415, -45.3607, -45.3415, -45.3732, -45.3733, - -45.3371, -45.3732]) -Total rewards: 2518 -Total Receives: 2517 -Stores 1764 1 -Total Receives: 2518 -Stores 1765 1 -tensor([-0.0405, -0.0360]) -tensor([-32.1401, -32.1670]) -Total rewards: 2522 -Total Receives: 2519 -Stores 1766 1 -Total Receives: 2520 -Stores 1767 1 -Total Receives: 2521 -Stores 1768 1 -Total Receives: 2522 -Stores 1769 1 -tensor([-0.0146, -0.0292, -0.0244]) -tensor([-16.7944, -16.8127, -16.8354]) -tensor([-45.7205, -45.7426]) -tensor([-21.2207, -21.2077, -21.2406, -21.2134, -21.2500, -21.2262]) -tensor([-27.0909, -27.0805, -27.1181, -27.0734, -27.0719, -27.1227]) -tensor([-0.1154, -0.1543]) -tensor([-46.1416, -46.1651, -46.1886, -46.1690, -46.1550, -46.1964, -46.1963, - -46.1489, -46.1963]) -tensor([-14.9397, -14.9678]) -tensor([-25.4546, -25.4568, -25.5062, -25.4661]) -tensor([-27.7072, -27.6981, -27.6970]) -tensor([0.0207, 0.0269]) -Total rewards: 2531 -Total Receives: 2523 -Stores 1770 1 -Total Receives: 2524 -Stores 1771 1 -Total Receives: 2525 -Stores 1772 1 -Total Receives: 2526 -Stores 1773 1 -Total Receives: 2527 -Stores 1774 1 -Total Receives: 2528 -Stores 1775 1 -Total Receives: 2529 -Stores 1776 1 -Total Receives: 2530 -Stores 1777 1 -Total Receives: 2531 -Stores 1778 1 -tensor([-23.0705, -23.0208, -23.0542, -23.0253, -23.0736, -23.0401]) -tensor([-0.0251, -0.0262, -0.0547, -0.0140, -0.0295]) -Total rewards: 2537 -Total Receives: 2532 -Stores 1779 1 -Total Receives: 2533 -Stores 1780 1 -Total Receives: 2534 -Stores 1781 1 -Total Receives: 2535 -Stores 1782 1 -Total Receives: 2536 -Stores 1783 1 -Total Receives: 2537 -Stores 1784 1 -tensor([0.0273, 0.0266]) -Total rewards: 2540 -Total Receives: 2538 -Stores 1785 1 -Total Receives: 2539 -Stores 1786 1 -Total Receives: 2540 -Stores 1787 1 -tensor([-29.2992, -29.3029, -29.3028, -29.3378]) -tensor([-21.6643, -21.7610, -21.7109]) -tensor([-46.2831, -46.3204, -46.3174, -46.2890, -46.2785, -46.3280, -46.3280, - -46.2709, -46.3280]) -Total rewards: 2544 -Total Receives: 2541 -Stores 1788 1 -Total Receives: 2542 -Stores 1789 1 -Total Receives: 2543 -Stores 1790 1 -Total Receives: 2544 -Stores 1791 1 -tensor([-0.2800, 0.0922, -0.2592]) -tensor([-31.5416, -31.5504, -31.5735, -31.5288, -31.5921]) -tensor([-49.1263, -49.1210, -49.1096, -49.1107, -49.1259]) -tensor([-28.9547, -28.9585, -28.9965]) -tensor([-29.4891, -29.4798, -29.5038]) -Total rewards: 2547 -Total Receives: 2545 -Stores 1792 1 -Total Receives: 2546 -Stores 1793 1 -Total Receives: 2547 -Stores 1794 1 -tensor([-0.0498, -0.1121]) -Total rewards: 2555 -Total Receives: 2548 -Stores 1795 1 -Total Receives: 2549 -Stores 1796 1 -Total Receives: 2550 -Stores 1797 1 -Total Receives: 2551 -Stores 1798 1 -Total Receives: 2552 -Stores 1799 1 -Total Receives: 2553 -Stores 1800 1 -Total Receives: 2554 -Stores 1801 1 -Total Receives: 2555 -Stores 1802 1 -tensor([-28.0295, -28.1170, -28.0845, -28.0377]) -tensor([-15.5359, -15.5524, -15.5369, -15.5509, -15.5536]) -Total rewards: 2562 -Total Receives: 2556 -Stores 1803 1 -Total Receives: 2557 -Stores 1804 1 -Total Receives: 2558 -Stores 1805 1 -Total Receives: 2559 -Stores 1806 1 -Total Receives: 2560 -Stores 1807 1 -Total Receives: 2561 -Stores 1808 1 -Total Receives: 2562 -Stores 1809 1 -tensor([-26.3643, -26.3798, -26.3825]) -tensor([-13.7773, -13.7963, -13.7947, -13.7991]) -tensor([-23.6260, -23.6300]) -tensor([-0.0867, -0.0521]) -tensor([-28.4007, -28.4070, -28.4262, -28.4516]) -tensor([0.0151, 0.0124]) -Total rewards: 2570 -Total Receives: 2563 -Stores 1810 1 -Total Receives: 2564 -Stores 1811 1 -Total Receives: 2565 -Stores 1812 1 -Total Receives: 2566 -Stores 1813 1 -Total Receives: 2567 -Stores 1814 1 -Total Receives: 2568 -Stores 1815 1 -Total Receives: 2569 -Stores 1816 1 -Total Receives: 2570 -Stores 1817 1 -tensor([-26.6320, -26.6641]) -tensor([-45.4083, -45.9838, -45.9818, -45.9583, -45.9513, -46.0025, -46.0025, - -46.0025, -45.9917]) -tensor([-42.8365, -42.8760, -42.8564, -42.8424, -42.8556, -42.8682]) -Total rewards: 2573 -Total Receives: 2571 -Stores 1818 1 -Total Receives: 2572 -Stores 1819 1 -Total Receives: 2573 -Stores 1820 1 -tensor([-24.2130, -24.1812, -24.2323, -24.1398]) -Total rewards: 2582 -Total Receives: 2574 -Stores 1821 1 -Total Receives: 2575 -Stores 1822 1 -Total Receives: 2576 -Stores 1823 1 -Total Receives: 2577 -Stores 1824 1 -Total Receives: 2578 -Stores 1825 1 -Total Receives: 2579 -Stores 1826 1 -Total Receives: 2580 -Stores 1827 1 -Total Receives: 2581 -Stores 1828 1 -Total Receives: 2582 -Stores 1829 1 -tensor([-45.1109, -45.0959, -45.0940, -45.0693, -45.0633, -45.1211, -45.1211, - -45.1211, -45.1034]) -tensor([-25.9931, -25.9794, -25.9960]) -tensor([-0.0717, -0.0258]) -Total rewards: 2588 -Total Receives: 2583 -Stores 1830 1 -Total Receives: 2584 -Stores 1831 1 -Total Receives: 2585 -Stores 1832 1 -Total Receives: 2586 -Stores 1833 1 -Total Receives: 2587 -Stores 1834 1 -Total Receives: 2588 -Stores 1835 1 -tensor([-47.3515, -47.3479, -47.3242, -47.3434, -47.3102, -47.3221, -47.3331, - -47.3570]) -tensor([-28.8927, -28.8997, -28.9134, -28.8944, -28.8593, -28.9266, -28.8667]) -tensor([-0.0903, -0.0996, -0.0769, -0.1282, -0.0951]) -Total rewards: 2594 -Total Receives: 2589 -Stores 1836 1 -Total Receives: 2590 -Stores 1837 1 -Total Receives: 2591 -Stores 1838 1 -Total Receives: 2592 -Stores 1839 1 -Total Receives: 2593 -Stores 1840 1 -Total Receives: 2594 -Stores 1841 1 -tensor([-26.1539, -26.1257]) -tensor([-45.7447, -45.7307, -45.7292, -45.7039, -45.7580, -45.7601, -45.7601, - -45.7601, -45.7342]) -tensor([-30.6178, -30.5665, -30.5351]) -Total rewards: 2597 -Total Receives: 2595 -Stores 1842 1 -Total Receives: 2596 -Stores 1843 1 -Total Receives: 2597 -Stores 1844 1 -tensor([-36.3370, -36.3326, -36.3182, -36.2914, -36.3516, -36.3587, -36.3587, - -36.3397]) -tensor([-16.0750, -16.0810, -16.1023]) -tensor([-30.3266, -30.3537, -30.3980]) -tensor([-29.0531, -29.1029]) -Total rewards: 2604 -Total Receives: 2598 -Stores 1845 1 -Total Receives: 2599 -Stores 1846 1 -Total Receives: 2600 -Stores 1847 1 -Total Receives: 2601 -Stores 1848 1 -Total Receives: 2602 -Stores 1849 1 -Total Receives: 2603 -Stores 1850 1 -Total Receives: 2604 -Stores 1851 1 -tensor([-45.1313, -45.1226, -45.1022, -45.1359, -45.1433, -45.1207, -45.1126, - -45.1511]) -tensor([-14.6239, -14.6537]) -tensor([0.0090, 0.0081]) -tensor([-34.1753, -34.1746, -34.1829, -34.1819]) -tensor([-24.3298, -24.3347, -24.3508, -24.3508]) -tensor([0.0031, 0.0031]) -tensor([-30.4682, -30.4508, -30.4403, -30.4266, -30.4563, -30.4753, -30.4753, - -30.4467]) -Total rewards: 3919 -Total Receives: 3911 -Stores 2745 1 -Total Receives: 3912 -Stores 2746 1 -Total Receives: 3913 -Stores 2747 1 -Total Receives: 3914 -Stores 2748 1 -Total Receives: 3915 -Stores 2749 1 -Total Receives: 3916 -Stores 2750 1 -Total Receives: 3917 -Stores 2751 1 -Total Receives: 3918 -Stores 2752 1 -Total Receives: 3919 -Stores 2753 1 -tensor([0.0106, 0.0134, 0.0112]) -Total rewards: 3921 -Total Receives: 3920 -Stores 2754 1 -Total Receives: 3921 -Stores 2755 1 -Total rewards: 3924 -Total Receives: 3922 -Total Receives: 3923 -Total Receives: 3924 -tensor([-0.4829, -0.4804, -0.4866, -0.4849, -0.4816, -0.4840, -0.4855]) -tensor([-0.1570, -0.1355, -0.1126, -0.1796, -0.1455, -0.1497]) -Total rewards: 3932 -Total Receives: 3925 -Stores 2756 1 -Total Receives: 3926 -Stores 2757 1 -Total Receives: 3927 -Stores 2758 1 -Total Receives: 3928 -Stores 2759 1 -Total Receives: 3929 -Stores 2760 1 -Total Receives: 3930 -Stores 2761 1 -Total Receives: 3931 -Stores 2762 1 -Total Receives: 3932 -Stores 2763 1 -tensor([-6.3393, -6.4538, -6.3785, -6.4502, -6.3492, -6.3503, -6.3588, -6.3326, - -6.3911, -6.3582, -6.3724, -6.4362, -6.4105]) -tensor([0.0293, 0.0277]) -tensor([-4.7909, -4.8891, -4.8264, -4.8866, -4.8006, -4.7951, -4.8099, -4.8699, - -4.8087, -4.8208, -4.8714, -4.8613]) -Total rewards: 3939 -Total Receives: 3933 -Stores 2764 1 -Total Receives: 3934 -Stores 2765 1 -Total Receives: 3935 -Stores 2766 1 -Total Receives: 3936 -Stores 2767 1 -Total Receives: 3937 -Stores 2768 1 -Total Receives: 3938 -Stores 2769 1 -Total Receives: 3939 -Stores 2770 1 -tensor([-3.5164, -3.4609, -3.5142, -3.4409, -3.4322, -3.4482, -3.4983, -3.4477, - -3.4567, -3.4956, -3.4931]) -tensor([0.0266, 0.0244, 0.0258]) -tensor([-0.3074, -0.2481, -0.1331, -0.2727, -0.1307]) -Total rewards: 3942 -Total Receives: 3940 -Total Receives: 3941 -Total Receives: 3942 -tensor([-0.4908, -0.4910, -0.4925, -0.4935, -0.4909, -0.4922, -0.4928]) -tensor([0.0399, 0.0386]) -tensor([0.0117, 0.0109]) -Total rewards: 3948 -Total Receives: 3943 -Stores 2771 1 -Total Receives: 3944 -Stores 2772 1 -Total Receives: 3945 -Stores 2773 1 -Total Receives: 3946 -Stores 2774 1 -Total Receives: 3947 -Stores 2775 1 -Total Receives: 3948 -Stores 2776 1 -tensor([-0.1194, -0.0692, -0.1013, -0.0595, -0.1416, -0.1087, -0.1180, -0.0580]) -tensor([-0.0214, -0.0192]) -tensor([0.0178, 0.0170, 0.0182]) -Total rewards: 3953 -Total Receives: 3949 -Stores 2777 1 -Total Receives: 3950 -Stores 2778 1 -Total Receives: 3951 -Stores 2779 1 -Total Receives: 3952 -Stores 2780 1 -Total Receives: 3953 -Stores 2781 1 -tensor([-3.7924, -3.7330, -3.7902, -3.7157, -3.7196, -3.7745, -3.7231, -3.7320, - -3.7673, -3.7721, -3.8096]) -Total rewards: 3960 -Total Receives: 3954 -Stores 2782 1 -Total Receives: 3955 -Stores 2783 1 -Total Receives: 3956 -Stores 2784 1 -Total Receives: 3957 -Stores 2785 1 -Total Receives: 3958 -Stores 2786 1 -Total Receives: 3959 -Stores 2787 1 -Total Receives: 3960 -Stores 2788 1 -tensor([-0.3104, -0.3078, -0.3093, -0.3085, -0.3024, -0.3056]) -Total rewards: 3963 -Total Receives: 3961 -Total Receives: 3962 -Total Receives: 3963 -tensor([0.0118, 0.0126]) -tensor([0.0410, 0.0274]) -tensor([-0.1327, -0.0843, -0.1164, -0.0724, -0.1536, -0.1231, -0.1342, -0.0649]) -Total rewards: 3967 -Total Receives: 3964 -Total Receives: 3965 -Total Receives: 3966 -Total Receives: 3967 -tensor([-0.3081, -0.2093, -0.1170, -0.1239, -0.2708, -0.1169, -0.1068]) -tensor([0.0120, 0.0113]) -tensor([-2.8551, -2.8015, -2.8583, -2.7899, -2.7934, -2.8409, -2.7960, -2.8032, - -2.8270, -2.8705, -2.8755]) -tensor([-0.3873, -0.3837, -0.3878, -0.3884, -0.3762]) -Total rewards: 3970 -Total Receives: 3968 -Total Receives: 3969 -Total Receives: 3970 -tensor([0.0306, 0.0392, 0.0283]) -tensor([-0.2371, -0.1938, -0.1794, -0.2740, -0.2194, -0.2361, -0.1926]) -tensor([-0.0359, -0.0353, -0.0306, -0.0278]) -tensor([-0.1755, -0.1237, -0.2000, -0.1656, -0.1782, -0.0955]) -tensor([0.0037, 0.0046]) -tensor([0.0410, 0.0389]) -tensor([0.0292, 0.0258, 0.0254, 0.0283]) -tensor([-0.1196, -0.1160, -0.1094]) -Total rewards: 3973 -Total Receives: 3971 -Total Receives: 3972 -Total Receives: 3973 -tensor([-0.1237, -0.1179]) -Total rewards: 3979 -Total Receives: 3974 -Stores 2789 1 -Total Receives: 3975 -Stores 2790 1 -Total Receives: 3976 -Stores 2791 1 -Total Receives: 3977 -Stores 2792 1 -Total Receives: 3978 -Stores 2793 1 -Total Receives: 3979 -Stores 2794 1 -tensor([0.0312, 0.0310, 0.0308]) -tensor([-0.1483, -0.2064, -0.1771, -0.2077, -0.2357, -0.1471]) -tensor([-0.1277, -0.1908, -0.1594, -0.1943, -0.2343]) -tensor([-3.4406, -3.3795, -3.4098, -3.3732, -3.4245, -3.3762, -3.3842, -3.4092, - -3.4801, -3.4574, -3.4703, -3.4611]) -tensor([0.0226, 0.0215, 0.0217]) -Total rewards: 3984 -Total Receives: 3980 -Total Receives: 3981 -Total Receives: 3982 -Total Receives: 3983 -Total Receives: 3984 -tensor([-0.4203, -0.3072, -0.1841, -0.3802, -0.1999, -0.1724]) -tensor([0.0281, 0.0259]) -tensor([-0.0344, -0.0406, -0.0390]) -tensor([-0.2286, -0.1664, -0.1546, -0.2529, -0.2222, -0.2368]) -tensor([0.0326, 0.0292]) -tensor([-0.1161, -0.1110, -0.1395, -0.1682, -0.2005]) -Total rewards: 3988 -Total Receives: 3985 -Total Receives: 3986 -Total Receives: 3987 -Total Receives: 3988 -tensor([0.0314, 0.0310]) -Total rewards: 3993 -Total Receives: 3989 -Stores 2795 1 -Total Receives: 3990 -Stores 2796 1 -Total Receives: 3991 -Stores 2797 1 -Total Receives: 3992 -Stores 2798 1 -Total Receives: 3993 -Stores 2799 1 -tensor([0.0210, 0.0215, 0.0239, 0.0234]) -tensor([-0.0662, -0.0722, -0.0746]) -tensor([-0.1790, -0.2055, -0.2378, -0.2814]) -tensor([-0.1347, -0.1359]) -tensor([0.0301, 0.0304, 0.0322]) -tensor([-7.3047, -7.2234, -7.2706, -7.2830, -7.2182, -7.2305, -7.2702, -7.3518, - -7.3257, -7.3353, -7.3298, -7.3573]) -tensor([-0.3423, -0.1602, -0.2876, -0.1423, -0.1151, -0.1224, -0.1123, -0.1184, - -0.1144]) -tensor([-0.0815, -0.1338, -0.1612, -0.1994, -0.0842, -0.0842]) -tensor([-0.0028, -0.0046, 0.0012, -0.0017, -0.0094]) -tensor([-0.1577, -0.0918, -0.0795, -0.0831, -0.1515, -0.0539, -0.1691]) -Total rewards: 4002 -Total Receives: 3994 -Stores 2800 1 -Total Receives: 3995 -Stores 2801 1 -Total Receives: 3996 -Stores 2802 1 -Total Receives: 3997 -Stores 2803 1 -Total Receives: 3998 -Stores 2804 1 -Total Receives: 3999 -Stores 2805 1 -Total Receives: 4000 -Stores 2806 1 -Total Receives: 4001 -Stores 2807 1 -Total Receives: 4002 -Stores 2808 1 -tensor([-0.5612, -0.5671]) -tensor([-4.7835, -4.7200, -4.7506, -4.8475, -4.8411, -4.7685, -4.8489, -4.7837, - -4.7412, -4.8310, -4.7997, -4.8080, -4.8061, -4.8323]) -tensor([-0.1827, -0.1208, -0.0791, -0.0834, -0.1643, -0.1781]) -tensor([0.0218, 0.0203, 0.0215, 0.0167]) -tensor([-0.1668, -0.1905, -0.2170, -0.1227, -0.1226]) -tensor([-0.1107, -0.1159]) -tensor([-0.2059, -0.1524, -0.1202, -0.1919, -0.2054]) -tensor([0.0275, 0.0280, 0.0281]) -tensor([-0.4089, -0.3619, -0.2217, -0.1938, -0.1974, -0.1788, -0.1959, -0.1933]) -tensor([0.0404, 0.0387, 0.0387]) -tensor([0.0315, 0.0314, 0.0342]) -tensor([-0.1798, -0.1991, -0.2296, -0.1405]) -tensor([-0.1369, -0.1454, -0.1490]) -Total rewards: 4009 -Total Receives: 4003 -Stores 2809 1 -Total Receives: 4004 -Stores 2810 1 -Total Receives: 4005 -Stores 2811 1 -Total Receives: 4006 -Stores 2812 1 -Total Receives: 4007 -Stores 2813 1 -Total Receives: 4008 -Stores 2814 1 -Total Receives: 4009 -Stores 2815 1 -tensor([-3.4015, -3.3757, -3.4740, -3.4699, -3.3751, -3.4782, -3.3727, -3.3482, - -3.4862, -3.4594, -3.4204, -3.4258, -3.4259, -3.4733, -3.4856, -3.4640]) -tensor([-0.2297, -0.1735, -0.2214, -0.2321]) -tensor([-0.0049, -0.0073, -0.0046]) -tensor([-0.4458, -0.3988, -0.2663, -0.2378, -0.2409, -0.2399, -0.2371]) -tensor([-0.9363, -0.9292, -0.9358, -0.9426]) -Total rewards: 4011 -Total Receives: 4010 -Total Receives: 4011 -tensor([-0.1554, -0.1206, -0.1553]) -tensor([-2.6790, -2.6546, -2.7347, -2.7346, -2.6608, -2.7429, -2.6737, -2.7510, - -2.7247, -2.6921, -2.6949, -2.6967, -2.7370, -2.7500, -2.7289]) -tensor([0.0151, 0.0138]) -tensor([-0.1666, -0.1989, -0.0845, -0.0875]) -tensor([0.0154, 0.0155]) -tensor([0.0254, 0.0269, 0.0266]) -tensor([0.0328, 0.0310]) -tensor([-0.5466, -0.5488, -0.5478, -0.5191, -0.5507]) -tensor([-0.3279, -0.3260, -0.3010, -0.3301]) -tensor([0.0290, 0.0320]) -tensor([0.0355, 0.0354]) -tensor([-4.0896, -4.1529, -4.1528, -4.0635, -4.1547, -4.0581, -4.1666, -4.1457, - -4.1040, -4.1087, -4.1129, -4.1479, -4.1604, -4.1494]) -tensor([-0.4325, -0.3873, -0.1807, -0.2364, -0.2328, -0.2026, -0.2369, -0.2114]) -tensor([0.0400, 0.0332]) -tensor([0.0285, 0.0285]) -Total rewards: 4016 -Total Receives: 4012 -Stores 2816 1 -Total Receives: 4013 -Stores 2817 1 -Total Receives: 4014 -Stores 2818 1 -Total Receives: 4015 -Stores 2819 1 -Total Receives: 4016 -Stores 2820 1 -tensor([-0.2197, -0.2066, -0.1077]) -Total rewards: 4023 -Total Receives: 4017 -Stores 2821 1 -Total Receives: 4018 -Stores 2822 1 -Total Receives: 4019 -Stores 2823 1 -Total Receives: 4020 -Stores 2824 1 -Total Receives: 4021 -Stores 2825 1 -Total Receives: 4022 -Stores 2826 1 -Total Receives: 4023 -Stores 2827 1 -Total rewards: 4024 -Total Receives: 4024 -tensor([-2.8232, -2.8669, -2.8671, -2.8079, -2.9043, -2.8686, -2.8794, -2.8638, - -2.8296, -2.8300, -2.8364, -2.8606, -2.8719, -2.8665]) -tensor([-0.0570, -0.0535, -0.0542]) -tensor([-1.8443, -1.8910, -1.8912, -1.9219, -1.8926, -1.9024, -1.8864, -1.8507, - -1.8529, -1.8586, -1.8848, -1.8950, -1.8898]) -tensor([-0.1793, -0.1063, -0.1095]) -tensor([0.0383, 0.0395]) -Total rewards: 4031 -Total Receives: 4025 -Stores 2828 1 -Total Receives: 4026 -Stores 2829 1 -Total Receives: 4027 -Stores 2830 1 -Total Receives: 4028 -Stores 2831 1 -Total Receives: 4029 -Stores 2832 1 -Total Receives: 4030 -Stores 2833 1 -Total Receives: 4031 -Stores 2834 1 -tensor([-0.4255, -0.3830, -0.2178, -0.1826, -0.1229, -0.1774, -0.1398, -0.1818]) -tensor([-0.1741, -0.1655, -0.1231, -0.1702]) -tensor([-0.0941, -0.0604, -0.0639, -0.0907]) -Total rewards: 4039 -Total Receives: 4032 -Stores 2835 1 -Total Receives: 4033 -Stores 2836 1 -Total Receives: 4034 -Stores 2837 1 -Total Receives: 4035 -Stores 2838 1 -Total Receives: 4036 -Stores 2839 1 -Total Receives: 4037 -Stores 2840 1 -Total Receives: 4038 -Stores 2841 1 -Total Receives: 4039 -Stores 2842 1 -tensor([-1.9482, -1.9486, -1.9768, -1.9496, -1.9587, -1.9466, -1.9096, -1.9093, - -1.9184, -1.9387, -1.9483, -1.9499]) -Total rewards: 4047 -Total Receives: 4040 -Stores 2843 1 -Total Receives: 4041 -Stores 2844 1 -Total Receives: 4042 -Stores 2845 1 -Total Receives: 4043 -Stores 2846 1 -Total Receives: 4044 -Stores 2847 1 -Total Receives: 4045 -Stores 2848 1 -Total Receives: 4046 -Stores 2849 1 -Total Receives: 4047 -Stores 2850 1 -tensor([-0.1102, -0.0848, -0.1094]) -Total rewards: 4053 -Total Receives: 4048 -Stores 2851 1 -Total Receives: 4049 -Stores 2852 1 -Total Receives: 4050 -Stores 2853 1 -Total Receives: 4051 -Stores 2854 1 -Total Receives: 4052 -Stores 2855 1 -Total Receives: 4053 -Stores 2856 1 -tensor([0.0149, 0.0158]) -tensor([0.0353, 0.0351]) -tensor([0.0322, 0.0314, 0.0330]) -tensor([-0.0017, 0.0043, 0.0009, 0.0038, 0.0017]) -Total rewards: 4062 -Total Receives: 4054 -Stores 2857 1 -Total Receives: 4055 -Stores 2858 1 -Total Receives: 4056 -Stores 2859 1 -Total Receives: 4057 -Stores 2860 1 -Total Receives: 4058 -Stores 2861 1 -Total Receives: 4059 -Stores 2862 1 -Total Receives: 4060 -Stores 2863 1 -Total Receives: 4061 -Stores 2864 1 -Total Receives: 4062 -Stores 2865 1 -Total rewards: 4067 -Total Receives: 4063 -Stores 2866 1 -Total Receives: 4064 -Stores 2867 1 -Total Receives: 4065 -Stores 2868 1 -Total Receives: 4066 -Stores 2869 1 -Total Receives: 4067 -Stores 2870 1 -tensor([-0.1650, -0.1343, -0.1725]) -Total rewards: 4071 -Total Receives: 4068 -Total Receives: 4069 -Total Receives: 4070 -Total Receives: 4071 -tensor([-0.0740, -0.0776, -0.0533]) -Total rewards: 4074 -Total Receives: 4072 -Stores 2871 1 -Total Receives: 4073 -Stores 2872 1 -Total Receives: 4074 -Stores 2873 1 -tensor([0.0079, 0.0088]) -tensor([-2.1575, -2.1577, -2.1880, -2.1582, -2.1679, -2.1558, -2.1164, -2.1274, - -2.1940, -2.1685, -2.1525, -2.1586]) -tensor([-0.4548, -0.3707, -0.2137, -0.2436, -0.2401, -0.2110, -0.2468]) -tensor([0.0391, 0.0388]) -tensor([-0.4870, -0.4008, -0.2773, -0.2926, -0.2889, -0.2931]) -tensor([-0.0302, -0.0306, -0.0268, -0.0280]) -tensor([0.0331, 0.0316]) -tensor([0.0255, 0.0244, 0.0261]) -Total rewards: 4080 -Total Receives: 4075 -Stores 2874 1 -Total Receives: 4076 -Stores 2875 1 -Total Receives: 4077 -Stores 2876 1 -Total Receives: 4078 -Stores 2877 1 -Total Receives: 4079 -Stores 2878 1 -Total Receives: 4080 -Stores 2879 1 -tensor([-0.0044, -0.0020, -0.0167]) -tensor([-0.3027, -0.2358, -0.2627, -0.2610, -0.2667]) -tensor([-0.1761, -0.1540, -0.1807, -0.1491]) -tensor([0.0331, 0.0326]) -tensor([-0.0727, -0.0769]) -tensor([0.0144, 0.0148]) -tensor([-0.1467, -0.1172, -0.1525, -0.1051]) -tensor([0.0162, 0.0184]) -tensor([0.0352, 0.0356, 0.0355, 0.0355]) -tensor([0.0265, 0.0243]) -tensor([0.0308, 0.0308, 0.0316]) -tensor([-3.0270, -3.0270, -3.0555, -3.0278, -3.0372, -3.0451, -3.0407, -3.0652, - -3.0386, -3.0171, -3.0287, -3.0596, -3.0772]) -tensor([0.0279, 0.0291]) -tensor([-0.1211, -0.1177, -0.1120]) -Total rewards: 4084 -Total Receives: 4081 -Stores 2880 1 -Total Receives: 4082 -Stores 2881 1 -Total Receives: 4083 -Stores 2882 1 -Total Receives: 4084 -Stores 2883 1 -tensor([0.0302, 0.0405]) -tensor([0.0405, 0.0420]) -tensor([-0.1385, -0.0870, -0.1078, -0.1183]) -Total rewards: 4087 -Total Receives: 4085 -Total Receives: 4086 -Total Receives: 4087 -tensor([-0.0662, -0.1099]) -Total rewards: 4093 -Total Receives: 4088 -Stores 2884 1 -Total Receives: 4089 -Stores 2885 1 -Total Receives: 4090 -Stores 2886 1 -Total Receives: 4091 -Stores 2887 1 -Total Receives: 4092 -Stores 2888 1 -Total Receives: 4093 -Stores 2889 1 -tensor([0.0337, 0.0342, 0.0340]) -tensor([0.0399, 0.0401]) -tensor([-0.0181, -0.0223]) -tensor([-0.1928, -0.1687, -0.1978]) -tensor([0.0426, 0.0395, 0.0393]) -Total rewards: 4098 -Total Receives: 4094 -Stores 2890 1 -Total Receives: 4095 -Stores 2891 1 -Total Receives: 4096 -Stores 2892 1 -Total Receives: 4097 -Stores 2893 1 -Total Receives: 4098 -Stores 2894 1 -tensor([0.0320, 0.0252]) -Total rewards: 4104 -Total Receives: 4099 -Stores 2895 1 -Total Receives: 4100 -Stores 2896 1 -Total Receives: 4101 -Stores 2897 1 -Total Receives: 4102 -Stores 2898 1 -Total Receives: 4103 -Stores 2899 1 -Total Receives: 4104 -Stores 2900 1 -tensor([-4.8236, -4.8237, -4.8673, -4.8227, -4.8371, -4.8430, -4.8349, -4.8769, - -4.8409, -4.8133, -4.8237, -4.8672]) -tensor([-0.0439, -0.0535]) -tensor([0.0390, 0.0365]) -tensor([-0.2096, -0.2143, -0.1560]) -tensor([0.0287, 0.0291, 0.0298, 0.0284]) -tensor([-0.0784, -0.0815, -0.0938]) -tensor([0.0370, 0.0375]) -tensor([-0.0238, -0.0259]) -tensor([-0.0188, -0.0216]) -tensor([-0.0318, -0.0285, -0.0319]) -tensor([-0.1448, -0.1392]) -Total rewards: 4106 -Total Receives: 4105 -Stores 2901 1 -Total Receives: 4106 -Stores 2902 1 -tensor([0.0065, 0.0048]) -Total rewards: 4107 -Total Receives: 4107 -tensor([0.0306, 0.0300, 0.0294]) -tensor([-0.0101, -0.0053]) -tensor([-0.0730, -0.0317, -0.0766]) -Total rewards: 4110 -Total Receives: 4108 -Stores 2903 1 -Total Receives: 4109 -Stores 2904 1 -Total Receives: 4110 -Stores 2905 1 -tensor([-0.0326, -0.0383, -0.0355]) -tensor([-0.0188, -0.0253, -0.0255]) -tensor([-0.2248, -0.2308]) -tensor([-3.6625, -3.6624, -3.6932, -3.7328, -3.6634, -3.7434, -3.6710, -3.6862, - -3.6807, -3.7043, -3.6717, -3.6468, -3.6934, -3.7458]) -tensor([-0.1797, -0.1118]) -tensor([0.0104, 0.0047, 0.0097]) -tensor([-0.0138, -0.0148, -0.0078, -0.0061]) -tensor([0.0306, 0.0301]) -tensor([0.0193, 0.0216]) -tensor([0.0393, 0.0280]) -tensor([-0.0434, -0.0318, -0.0445, -0.0288]) -tensor([0.0378, 0.0357, 0.0371]) -Total rewards: 4115 -Total Receives: 4111 -Stores 2906 1 -Total Receives: 4112 -Stores 2907 1 -Total Receives: 4113 -Stores 2908 1 -Total Receives: 4114 -Stores 2909 1 -Total Receives: 4115 -Stores 2910 1 -tensor([0.0597, 0.0367]) -tensor([-0.0659, -0.0551, -0.0477]) -tensor([-0.0203, -0.0214]) -tensor([-0.0271, -0.0304, -0.0191, -0.0100]) -tensor([0.0989, 0.0121]) -tensor([-0.0497, -0.0475]) -tensor([-0.1322, -0.2004]) -tensor([0.0278, 0.0251, 0.0267, 0.0245]) -tensor([-0.0281, -0.0288, -0.0225]) -tensor([-0.0016, -0.0027]) -tensor([-5.1240, -5.1234, -5.1644, -5.2012, -5.1208, -5.2140, -5.1316, -5.1480, - -5.1375, -5.1771, -5.2131, -5.1391, -5.1601, -5.2171]) -tensor([-0.0257, -0.0288]) -tensor([-0.0705, -0.0588, -0.0592]) -tensor([0.0288, 0.0320]) -tensor([-0.0129, -0.0239, -0.0244, -0.0141]) -tensor([-0.0562, -0.0489]) -tensor([0.0514, 0.0399]) -tensor([-0.0022, 0.0074]) -tensor([-20.7319, -20.7119, -20.7011, -20.7444, -20.7145, -20.7011, -20.7449, - -20.6903]) -tensor([0.0309, 0.0285]) -tensor([-22.4849, -22.4573, -22.3723, -22.4770]) -tensor([-28.7938, -28.7758, -28.7653, -28.7810, -28.8066, -28.8066, -28.7714]) -tensor([-0.0780, -0.0658, -0.0799, -0.1173, -0.0835, -0.0611]) -Total rewards: 2610 -Total Receives: 2605 -Stores 1852 1 -Total Receives: 2606 -Stores 1853 1 -Total Receives: 2607 -Stores 1854 1 -Total Receives: 2608 -Stores 1855 1 -Total Receives: 2609 -Stores 1856 1 -Total Receives: 2610 -Stores 1857 1 -tensor([-19.3320, -19.3052, -19.3246]) -tensor([-15.7410, -15.7503]) -tensor([-34.8629, -34.8530, -34.8670, -34.8812, -34.8701, -34.8870, -34.8535, - -34.8560, -34.8918]) -tensor([-11.0628, -11.0736]) -tensor([-0.0742, -0.0540, -0.0736, -0.1066, -0.0479]) -tensor([-22.1517, -22.1499, -22.1626]) -Total rewards: 2617 -Total Receives: 2611 -Stores 1858 1 -Total Receives: 2612 -Stores 1859 1 -Total Receives: 2613 -Stores 1860 1 -Total Receives: 2614 -Stores 1861 1 -Total Receives: 2615 -Stores 1862 1 -Total Receives: 2616 -Stores 1863 1 -Total Receives: 2617 -Stores 1864 1 -tensor([-24.9716, -24.9558, -24.9695, -24.9952, -24.9952, -24.9470]) -tensor([-29.1061, -29.1190, -29.1186, -29.1029]) -tensor([-0.0244, -0.0179]) -tensor([-21.1478, -21.1283, -21.1661, -21.1661]) -tensor([-0.1490, -0.1307, -0.1457, -0.1922, -0.1135]) -Total rewards: 2620 -Total Receives: 2618 -Stores 1865 1 -Total Receives: 2619 -Stores 1866 1 -Total Receives: 2620 -Stores 1867 1 -tensor([-25.3013, -25.2778, -25.3111, -25.2756, -25.3119, -25.2878, -25.3345, - -25.2648]) -tensor([-0.0346, -0.0317, -0.0355, -0.0247]) -tensor([-19.0533, -19.0533, -19.0495]) -tensor([-41.8213, -41.8227, -41.8553, -41.8291, -41.8467, -41.8141, -41.8695, - -41.8133, -41.8686]) -tensor([-0.0197, -0.0156, -0.0210]) -tensor([-20.5578, -20.5713, -20.5696]) -tensor([-0.3207, -0.3211]) -tensor([-22.4640, -22.4596, -22.4960, -22.4960]) -tensor([-0.0157, -0.0161]) -tensor([-30.4204, -30.4312, -30.4328]) -tensor([-0.2079, -0.1890, -0.2018, -0.2537, -0.1710]) -Total rewards: 2629 -Total Receives: 2621 -Stores 1868 1 -Total Receives: 2622 -Stores 1869 1 -Total Receives: 2623 -Stores 1870 1 -Total Receives: 2624 -Stores 1871 1 -Total Receives: 2625 -Stores 1872 1 -Total Receives: 2626 -Stores 1873 1 -Total Receives: 2627 -Stores 1874 1 -Total Receives: 2628 -Stores 1875 1 -Total Receives: 2629 -Stores 1876 1 -tensor([-17.8683, -17.8660]) -tensor([-26.7456, -26.7539, -26.7969, -26.7970, -26.7309, -26.7845]) -tensor([-0.0182, -0.0197]) -tensor([-0.0652, -0.0589]) -tensor([-23.7994, -23.8078, -23.8534, -23.8534, -23.8364]) -tensor([-0.0444, -0.0415]) -tensor([-0.0036, 0.0041, 0.0041]) -tensor([-24.3444, -24.3519, -24.3252, -24.3393, -24.3545]) -tensor([-19.9797, -20.0144, -20.0144]) -tensor([-3.1998, -3.2165, -3.2165]) -tensor([-0.0078, 0.0021]) -tensor([-37.2097, -37.2087, -37.2398, -37.2376, -37.2124, -37.2283, -37.1998, - -37.2504, -37.2618, -37.2567]) -tensor([-0.2036, -0.1846, -0.1965, -0.2531]) -tensor([3.8236e-05, 8.5687e-04]) -Total rewards: 2638 -Total Receives: 2630 -Stores 1877 1 -Total Receives: 2631 -Stores 1878 1 -Total Receives: 2632 -Stores 1879 1 -Total Receives: 2633 -Stores 1880 1 -Total Receives: 2634 -Stores 1881 1 -Total Receives: 2635 -Stores 1882 1 -Total Receives: 2636 -Stores 1883 1 -Total Receives: 2637 -Stores 1884 1 -Total Receives: 2638 -Stores 1885 1 -Total rewards: 2639 -Total Receives: 2639 -Stores 1886 1 -tensor([-33.7814, -33.7802, -33.8112, -33.8092, -33.7838, -33.7998, -33.8193, - -33.8354, -33.8288]) -tensor([-0.2637, -0.2636]) -tensor([-15.9622, -15.9645]) -Total rewards: 2644 -Total Receives: 2640 -Stores 1887 1 -Total Receives: 2641 -Stores 1888 1 -Total Receives: 2642 -Stores 1889 1 -Total Receives: 2643 -Stores 1890 1 -Total Receives: 2644 -Stores 1891 1 -tensor([-0.5163, -0.5171, -0.5171]) -Total rewards: 2648 -Total Receives: 2645 -Stores 1892 1 -Total Receives: 2646 -Stores 1893 1 -Total Receives: 2647 -Stores 1894 1 -Total Receives: 2648 -Stores 1895 1 -tensor([-20.6056, -20.5751, -20.6143, -20.5722, -20.6556, -20.6201, -20.5885, - -20.6623, -20.6514]) -Total rewards: 2658 -Total Receives: 2649 -Stores 1896 1 -Total Receives: 2650 -Stores 1897 1 -Total Receives: 2651 -Stores 1898 1 -Total Receives: 2652 -Stores 1899 1 -Total Receives: 2653 -Stores 1900 1 -Total Receives: 2654 -Stores 1901 1 -Total Receives: 2655 -Stores 1902 1 -Total Receives: 2656 -Stores 1903 1 -Total Receives: 2657 -Stores 1904 1 -Total Receives: 2658 -Stores 1905 1 -tensor([-27.7033, -27.7091, -27.7461]) -tensor([-21.4191, -21.4713, -21.4713, -21.4606]) -Total rewards: 2666 -Total Receives: 2659 -Stores 1906 1 -Total Receives: 2660 -Stores 1907 1 -Total Receives: 2661 -Stores 1908 1 -Total Receives: 2662 -Stores 1909 1 -Total Receives: 2663 -Stores 1910 1 -Total Receives: 2664 -Stores 1911 1 -Total Receives: 2665 -Stores 1912 1 -Total Receives: 2666 -Stores 1913 1 -tensor([-0.0414, -0.0416]) -tensor([-4.8162, -4.1063, -4.8207]) -tensor([-0.5049, -0.5087, -0.5087, -0.5087]) -Total rewards: 2675 -Total Receives: 2667 -Stores 1914 1 -Total Receives: 2668 -Stores 1915 1 -Total Receives: 2669 -Stores 1916 1 -Total Receives: 2670 -Stores 1917 1 -Total Receives: 2671 -Stores 1918 1 -Total Receives: 2672 -Stores 1919 1 -Total Receives: 2673 -Stores 1920 1 -Total Receives: 2674 -Stores 1921 1 -Total Receives: 2675 -Stores 1922 1 -tensor([-19.4688, -19.4436, -19.4824, -19.5191, -19.4883, -19.4581, -19.5398, - -19.5218]) -Total rewards: 2684 -Total Receives: 2676 -Stores 1923 1 -Total Receives: 2677 -Stores 1924 1 -Total Receives: 2678 -Stores 1925 1 -Total Receives: 2679 -Stores 1926 1 -Total Receives: 2680 -Stores 1927 1 -Total Receives: 2681 -Stores 1928 1 -Total Receives: 2682 -Stores 1929 1 -Total Receives: 2683 -Stores 1930 1 -Total Receives: 2684 -Stores 1931 1 -tensor([-18.2396, -18.2534, -18.2534]) -Total rewards: 2688 -Total Receives: 2685 -Stores 1932 1 -Total Receives: 2686 -Stores 1933 1 -Total Receives: 2687 -Stores 1934 1 -Total Receives: 2688 -Stores 1935 1 -tensor([-0.5701, -0.5499, -0.5674, -0.5798]) -tensor([-35.1338, -35.1286, -35.1786, -35.1599, -34.6686, -35.1804, -35.1852, - -35.2048, -35.1825]) -tensor([-20.8519, -20.8519, -20.8346]) -Total rewards: 2693 -Total Receives: 2689 -Stores 1936 1 -Total Receives: 2690 -Stores 1937 1 -Total Receives: 2691 -Stores 1938 1 -Total Receives: 2692 -Stores 1939 1 -Total Receives: 2693 -Stores 1940 1 -tensor([-17.6500, -17.6861]) -tensor([-17.1015, -17.0964]) -tensor([-16.8704, -16.8808]) -tensor([-0.0087, -0.0218]) -tensor([-20.9650, -20.9751, -20.9912, -20.9700, -21.0001, -20.9961, -20.9992, - -20.9798]) -tensor([-18.9571, -18.9661, -18.9492, -18.9753, -18.9713, -18.9748, -18.9574]) -tensor([-0.3890, -0.4059, -0.4008]) -tensor([-0.9637, -0.9613]) -tensor([-17.8385, -17.8468, -17.8468]) -tensor([-21.3516, -21.3642, -21.3728, -21.3686, -21.3757, -21.3524]) -tensor([-0.0782, -0.0899]) -tensor([-33.9046, -33.9121, -33.8770, -33.8339]) -tensor([-1.3988, -1.4069]) -tensor([-1.0737, -1.0737, -1.0737, -1.0731]) -Total rewards: 2694 -Total Receives: 2694 -Stores 1941 1 -tensor([-1.3173, -1.3156]) -tensor([-32.8459, -32.8539, -32.8202]) -tensor([-0.6285, -0.6025, -0.6109, -0.6100]) -tensor([-24.3003, -24.3310, -24.3266]) -tensor([-28.4145, -28.4464, -28.4692, -28.4340, -28.5008, -28.4045, -28.5021, - -28.4732]) -tensor([-22.2762, -22.2778]) -tensor([-45.1257, -45.1168, -45.1692, -45.1591, -45.1491, -45.1741, -45.2226, - -45.1877]) -Total rewards: 2701 -Total Receives: 2695 -Stores 1942 1 -Total Receives: 2696 -Stores 1943 1 -Total Receives: 2697 -Stores 1944 1 -Total Receives: 2698 -Stores 1945 1 -Total Receives: 2699 -Stores 1946 1 -Total Receives: 2700 -Stores 1947 1 -Total Receives: 2701 -Stores 1948 1 -tensor([-21.0018, -21.0053]) -tensor([-19.3250, -19.3229, -19.3263, -19.3501]) -Total rewards: 2710 -Total Receives: 2702 -Stores 1949 1 -Total Receives: 2703 -Stores 1950 1 -Total Receives: 2704 -Stores 1951 1 -Total Receives: 2705 -Stores 1952 1 -Total Receives: 2706 -Stores 1953 1 -Total Receives: 2707 -Stores 1954 1 -Total Receives: 2708 -Stores 1955 1 -Total Receives: 2709 -Stores 1956 1 -Total Receives: 2710 -Stores 1957 1 -tensor([-19.6881, -19.6953, -19.6924, -19.7185, -19.6847, -19.7066, -19.7185]) -tensor([-1.0800, -1.0779, -1.0866, -1.0881, -1.0881, -1.0881]) -Total rewards: 2715 -Total Receives: 2711 -Stores 1958 1 -tensor([0.0379, 0.0364]) -Total rewards: 4117 -Total Receives: 4116 -Total Receives: 4117 -tensor([0.0354, 0.0355]) -tensor([-0.0428, -0.0460, -0.0423]) -Total rewards: 4118 -Total Receives: 4118 -tensor([0.0298, 0.0320, 0.0303]) -tensor([-5.0950, -5.0944, -5.1359, -5.1720, -5.1880, -5.1002, -5.1933, -5.1260, - -5.1114, -5.1478, -5.1840, -5.1076, -5.1643, -5.1868, -5.2198]) -tensor([0.0385, 0.0393]) -Total rewards: 4120 -Total Receives: 4119 -Stores 2911 1 -Total Receives: 4120 -Stores 2912 1 -tensor([-0.0014, -0.0028, -0.0011, -0.0033]) -tensor([0.0197, 0.0185]) -tensor([-0.0145, -0.0109, -0.0153]) -tensor([-0.0802, -0.0883]) -tensor([0.0416, 0.0497]) -tensor([0.0146, 0.0124, 0.0129]) -tensor([0.0386, 0.0370, 0.0364]) -tensor([-4.0165, -4.0470, -4.0703, -4.0880, -4.0162, -4.0919, -4.0509, -4.0433, - -4.0569, -4.0819, -4.0233, -4.0691, -4.0846, -4.1191]) -tensor([0.0326, 0.0306, 0.0252, 0.0322]) -tensor([0.0405, 0.0399]) -tensor([0.0361, 0.0354]) -Total rewards: 4125 -Total Receives: 4121 -Stores 2913 1 -Total Receives: 4122 -Stores 2914 1 -Total Receives: 4123 -Stores 2915 1 -Total Receives: 4124 -Stores 2916 1 -Total Receives: 4125 -Stores 2917 1 -tensor([0.0134, 0.0153, 0.0125]) -tensor([0.0058, 0.0071]) -tensor([0.0300, 0.0298]) -tensor([0.0372, 0.0365]) -Total rewards: 4134 -Total Receives: 4126 -Stores 2918 1 -Total Receives: 4127 -Stores 2919 1 -Total Receives: 4128 -Stores 2920 1 -Total Receives: 4129 -Stores 2921 1 -Total Receives: 4130 -Stores 2922 1 -Total Receives: 4131 -Stores 2923 1 -Total Receives: 4132 -Stores 2924 1 -Total Receives: 4133 -Stores 2925 1 -Total Receives: 4134 -Stores 2926 1 -Total rewards: 4136 -Total Receives: 4135 -Stores 2927 1 -Total Receives: 4136 -Stores 2928 1 -tensor([0.0414, 0.0409]) -tensor([0.0283, 0.0298, 0.0308]) -tensor([-4.3768, -4.4076, -4.4248, -4.4479, -4.3749, -4.4421, -4.4120, -4.4065, - -4.4178, -4.4335, -4.3816, -4.4299, -4.4731]) -tensor([0.0317, 0.0338]) -tensor([-0.0100, -0.0071, -0.0050]) -tensor([0.0353, 0.0322, 0.0303]) -tensor([0.0310, 0.0310]) -tensor([0.0291, 0.0289, 0.0292]) -tensor([0.0191, 0.0182]) -tensor([0.0402, 0.0407]) -tensor([-0.0235, -0.0283]) -Total rewards: 4142 -Total Receives: 4137 -Stores 2929 1 -Total Receives: 4138 -Stores 2930 1 -Total Receives: 4139 -Stores 2931 1 -Total Receives: 4140 -Stores 2932 1 -Total Receives: 4141 -Stores 2933 1 -Total Receives: 4142 -Stores 2934 1 -tensor([0.0390, 0.0393]) -tensor([-0.0471, -0.0528, -0.0489, -0.0489]) -Total rewards: 4148 -Total Receives: 4143 -Stores 2935 1 -Total Receives: 4144 -Stores 2936 1 -Total Receives: 4145 -Stores 2937 1 -Total Receives: 4146 -Stores 2938 1 -Total Receives: 4147 -Stores 2939 1 -Total Receives: 4148 -Stores 2940 1 -tensor([0.0188, 0.0196]) -Total rewards: 4152 -Total Receives: 4149 -Stores 2941 1 -Total Receives: 4150 -Stores 2942 1 -Total Receives: 4151 -Stores 2943 1 -Total Receives: 4152 -Stores 2944 1 -tensor([0.0391, 0.0408]) -tensor([0.0345, 0.0330]) -tensor([0.0322, 0.0317]) -tensor([-4.7443, -4.7748, -4.7940, -4.8177, -4.7381, -4.8136, -4.7798, -4.7781, - -4.7849, -4.8027, -4.7451, -4.8612, -4.8437, -4.8880]) -tensor([0.0358, 0.0351, 0.0343, 0.0349]) -tensor([-3.1884, -3.2190, -3.2376, -3.2596, -3.2861, -3.2199, -3.2259, -3.2291, - -3.2463, -3.1893, -3.2975, -3.2841, -3.3210]) -tensor([0.0397, 0.0406]) -Total rewards: 4158 -Total Receives: 4153 -Stores 2945 1 -Total Receives: 4154 -Stores 2946 1 -Total Receives: 4155 -Stores 2947 1 -Total Receives: 4156 -Stores 2948 1 -Total Receives: 4157 -Stores 2949 1 -Total Receives: 4158 -Stores 2950 1 -tensor([-0.0067, -0.0084, -0.0052]) -tensor([0.0410, 0.0373]) -tensor([0.0292, 0.0287, 0.0303]) -tensor([0.0372, 0.0321]) -tensor([0.0411, 0.0389]) -tensor([-2.0538, -2.0647, -2.0932, -2.1169, -2.0581, -2.0671, -2.0674, -2.0731, - -2.0248, -2.1260, -2.1152, -2.1573]) -tensor([0.0304, 0.0298, 0.0294]) -tensor([0.0226, 0.0218, 0.0204]) -tensor([-1.8365, -1.8443, -1.8727, -1.8964, -1.8439, -1.8533, -1.8495, -1.8525, - -1.9003, -1.8945, -1.9320]) -tensor([-0.0081, -0.0037]) -tensor([0.0130, 0.0226]) -tensor([-0.0501, -0.0407, -0.0444, -0.0496]) -Total rewards: 4163 -Total Receives: 4159 -Stores 2951 1 -Total Receives: 4160 -Stores 2952 1 -Total Receives: 4161 -Stores 2953 1 -Total Receives: 4162 -Stores 2954 1 -Total Receives: 4163 -Stores 2955 1 -tensor([-0.0435, -0.0447, -0.0344, -0.0505]) -tensor([0.0034, 0.0026, 0.0050]) -Total rewards: 4168 -Total Receives: 4164 -Stores 2956 1 -Total Receives: 4165 -Stores 2957 1 -Total Receives: 4166 -Stores 2958 1 -Total Receives: 4167 -Stores 2959 1 -Total Receives: 4168 -Stores 2960 1 -tensor([0.0083, 0.0095, 0.0061, 0.0069]) -tensor([-1.1796, -1.1775, -1.1874, -1.2068, -1.2257, -1.1725, -1.1474, -1.2059, - -1.1866, -1.2171]) -tensor([ 0.0009, -0.0027, -0.0036, -0.0091, 0.0011]) -Total rewards: 4170 -Total Receives: 4169 -Stores 2961 1 -Total Receives: 4170 -Stores 2962 1 -tensor([0.0247, 0.0259, 0.0245, 0.0227, 0.0247]) -tensor([0.0111, 0.0052, 0.0099]) -tensor([-0.0627, -0.0638, -0.0685]) -tensor([0.0226, 0.0201]) -tensor([0.0349, 0.0306]) -tensor([-0.0548, -0.0355, -0.0343]) -Total rewards: 4174 -Total Receives: 4171 -Stores 2963 1 -Total Receives: 4172 -Stores 2964 1 -Total Receives: 4173 -Stores 2965 1 -Total Receives: 4174 -Stores 2966 1 -tensor([0.0171, 0.0204]) -tensor([-0.0076, -0.0072, -0.0082, -0.0075]) -Total rewards: 4182 -Total Receives: 4175 -Stores 2967 1 -Total Receives: 4176 -Stores 2968 1 -Total Receives: 4177 -Stores 2969 1 -Total Receives: 4178 -Stores 2970 1 -Total Receives: 4179 -Stores 2971 1 -Total Receives: 4180 -Stores 2972 1 -Total Receives: 4181 -Stores 2973 1 -Total Receives: 4182 -Stores 2974 1 -tensor([-0.5763, -0.5577, -0.0162]) -tensor([0.0346, 0.0338]) -tensor([0.0229, 0.0266, 0.0238, 0.0259]) -tensor([-0.0215, -0.0212]) -tensor([-0.6486, -0.6477, -0.6513, -0.6956, -0.7256, -0.6446, -0.6547, -0.6549, - -0.6502, -0.6527]) -Total rewards: 4187 -Total Receives: 4183 -Stores 2975 1 -Total Receives: 4184 -Stores 2976 1 -Total Receives: 4185 -Stores 2977 1 -Total Receives: 4186 -Stores 2978 1 -Total Receives: 4187 -Stores 2979 1 -tensor([ 0.0038, 0.0042, 0.0032, -0.0045]) -tensor([0.0169, 0.0135, 0.0171]) -tensor([-0.0528, -0.0424, -0.0561, -0.0367]) -Total rewards: 4192 -Total Receives: 4188 -Stores 2980 1 -Total Receives: 4189 -Stores 2981 1 -Total Receives: 4190 -Stores 2982 1 -Total Receives: 4191 -Stores 2983 1 -Total Receives: 4192 -Stores 2984 1 -Total rewards: 4198 -Total Receives: 4193 -Stores 2985 1 -Total Receives: 4194 -Stores 2986 1 -Total Receives: 4195 -Stores 2987 1 -Total Receives: 4196 -Stores 2988 1 -Total Receives: 4197 -Stores 2989 1 -Total Receives: 4198 -Stores 2990 1 -tensor([0.0134, 0.0132, 0.0164]) -Total rewards: 4205 -Total Receives: 4199 -Stores 2991 1 -Total Receives: 4200 -Stores 2992 1 -Total Receives: 4201 -Stores 2993 1 -Total Receives: 4202 -Stores 2994 1 -Total Receives: 4203 -Stores 2995 1 -Total Receives: 4204 -Stores 2996 1 -Total Receives: 4205 -Stores 2997 1 -Total rewards: 4211 -Total Receives: 4206 -Stores 2998 1 -Total Receives: 4207 -Stores 2999 1 -Total Receives: 4208 -Stores 3000 1 -Total Receives: 4209 -Stores 3001 1 -Total Receives: 4210 -Stores 3002 1 -Total Receives: 4211 -Stores 3003 1 -tensor([0.0295, 0.0286, 0.0214]) -tensor([-0.2571, -0.2622, -0.2174, -0.3998, -0.4145, -0.2083, -0.2218, -0.1977, - -0.2394, -0.2052]) -tensor([0.0191, 0.0182, 0.0150, 0.0201]) -Total rewards: 4219 -Total Receives: 4212 -Stores 3004 1 -Total Receives: 4213 -Stores 3005 1 -Total Receives: 4214 -Stores 3006 1 -Total Receives: 4215 -Stores 3007 1 -Total Receives: 4216 -Stores 3008 1 -Total Receives: 4217 -Stores 3009 1 -Total Receives: 4218 -Stores 3010 1 -Total Receives: 4219 -Stores 3011 1 -tensor([0.0327, 0.0317]) -tensor([0.0292, 0.0349]) -tensor([0.0167, 0.0159, 0.0169]) -tensor([-0.1886, -0.1921, -0.1549, -0.2881, -0.2995, -0.1352, -0.1541, -0.1739, - -0.1189]) -tensor([0.0383, 0.0368, 0.0349]) -Total rewards: 4223 -Total Receives: 4220 -Stores 3012 1 -Total Receives: 4221 -Stores 3013 1 -Total Receives: 4222 -Stores 3014 1 -Total Receives: 4223 -Stores 3015 1 -tensor([-0.0933, -0.0650, -0.0792, -0.0533]) -tensor([0.0349, 0.0329]) -tensor([0.0312, 0.0252]) -tensor([0.0128, 0.0097]) -Total rewards: 4228 -Total Receives: 4224 -Stores 3016 1 -Total Receives: 4225 -Stores 3017 1 -Total Receives: 4226 -Stores 3018 1 -Total Receives: 4227 -Stores 3019 1 -Total Receives: 4228 -Stores 3020 1 -Total Receives: 2712 -Stores 1959 1 -Total Receives: 2713 -Stores 1960 1 -Total Receives: 2714 -Stores 1961 1 -Total Receives: 2715 -Stores 1962 1 -tensor([-25.4002, -25.3879, -25.3930, -25.4207]) -tensor([-1.3432, -1.3484]) -tensor([-19.5834, -19.5899, -19.5877, -19.6190, -19.6006, -19.6190]) -tensor([-0.0721, -0.0938, -0.0633]) -tensor([-41.4936, -41.5449, -41.5327, -41.5212, -41.5443, -41.5982, -41.5605, - -41.5982]) -tensor([-20.6553, -20.6463, -20.6165]) -tensor([0.0066, 0.0100]) -tensor([-67.2157, -67.2289, -67.2509, -67.2152, -67.2785, -67.2825, -67.3011, - -67.2574, -67.2966]) -tensor([-29.4845, -29.4508, -29.4569]) -tensor([-0.5685, -0.5535, -0.5641, -0.5625, -0.5705]) -tensor([-25.1706, -25.1726, -25.2014, -25.2131, -25.2053]) -tensor([-23.0240, -23.0249, -23.0250, -23.0374]) -tensor([-20.2226, -20.2156, -20.2140]) -tensor([-22.7225, -22.7290, -22.7684, -22.7269, -22.7725, -22.7725]) -tensor([-31.6063, -31.6115, -31.6607]) -tensor([-1.8468, -1.8495, -1.8646, -1.8646, -1.8646, -1.8646]) -tensor([-1.9250, -1.9183]) -tensor([-0.0712, -0.0515, -0.0809]) -tensor([-22.4623, -22.4543, -22.4868, -22.4905, -22.5083]) -tensor([-20.0201, -20.0235, -20.0449]) -tensor([-19.0347, -19.0755, -19.0342, -19.0860, -19.0860]) -tensor([-0.9594, -1.0126, -1.0088, -1.0255]) -tensor([-0.4916, -0.4898, -0.5008]) -tensor([-41.7504, -41.7475, -41.8294, -41.7555, -41.7529, -41.8167, -41.8314, - -41.7749, -41.8251, -41.8314, -41.8314, -41.8314]) -tensor([-1.5368, -1.5222, -1.5399]) -Total rewards: 2725 -Total Receives: 2716 -Stores 1963 1 -Total Receives: 2717 -Stores 1964 1 -Total Receives: 2718 -Stores 1965 1 -Total Receives: 2719 -Stores 1966 1 -Total Receives: 2720 -Stores 1967 1 -Total Receives: 2721 -Stores 1968 1 -Total Receives: 2722 -Stores 1969 1 -Total Receives: 2723 -Stores 1970 1 -Total Receives: 2724 -Stores 1971 1 -Total Receives: 2725 -Stores 1972 1 -tensor([-23.8806, -23.8408, -23.8658, -23.8815, -23.8723]) -Total rewards: 2734 -Total Receives: 2726 -Stores 1973 1 -Total Receives: 2727 -Stores 1974 1 -Total Receives: 2728 -Stores 1975 1 -Total Receives: 2729 -Stores 1976 1 -Total Receives: 2730 -Stores 1977 1 -Total Receives: 2731 -Stores 1978 1 -Total Receives: 2732 -Stores 1979 1 -Total Receives: 2733 -Stores 1980 1 -Total Receives: 2734 -Stores 1981 1 -tensor([-26.6610, -26.6799, -26.6950, -26.7344, -26.7300, -26.7674, -26.7447, - -26.7146, -26.7467]) -tensor([-1.2460, -1.2572]) -tensor([-18.8498, -18.8410, -18.8395]) -tensor([-0.0596, -0.0256, -0.0709]) -tensor([0.0306, 0.0324]) -tensor([-17.4380, -17.4272]) -Total rewards: 2744 -Total Receives: 2735 -Stores 1982 1 -Total Receives: 2736 -Stores 1983 1 -Total Receives: 2737 -Stores 1984 1 -Total Receives: 2738 -Stores 1985 1 -Total Receives: 2739 -Stores 1986 1 -Total Receives: 2740 -Stores 1987 1 -Total Receives: 2741 -Stores 1988 1 -Total Receives: 2742 -Stores 1989 1 -Total Receives: 2743 -Stores 1990 1 -Total Receives: 2744 -Stores 1991 1 -tensor([-22.6582, -22.6830, -22.6948, -22.7144]) -Total rewards: 2747 -Total Receives: 2745 -Stores 1992 1 -Total Receives: 2746 -Stores 1993 1 -Total Receives: 2747 -Stores 1994 1 -tensor([-1.0832, -1.0862]) -tensor([-1.6521, -1.6784, -1.6784, -1.6784, -1.6784]) -tensor([-22.8154, -22.8563, -22.8796, -22.8796]) -tensor([-19.9900, -20.0161, -20.0161]) -tensor([-1.7677, -1.7790]) -tensor([-29.0644, -29.0942, -29.1215, -29.1285, -29.1663, -29.1648, -29.1467, - -29.1030, -29.1427]) -tensor([-1.2935, -1.2757]) -tensor([-28.8784, -28.8407, -28.8636, -28.8694, -28.8815, -28.8849]) -Total rewards: 2749 -Total Receives: 2748 -Stores 1995 1 -Total Receives: 2749 -Stores 1996 1 -tensor([-0.0653, -0.0750, -0.0410]) -tensor([-1.4465, -1.4465, -1.4465, -1.4465]) -Total rewards: 2758 -Total Receives: 2750 -Stores 1997 1 -Total Receives: 2751 -Stores 1998 1 -Total Receives: 2752 -Stores 1999 1 -Total Receives: 2753 -Stores 2000 1 -Total Receives: 2754 -Stores 2001 1 -Total Receives: 2755 -Stores 2002 1 -Total Receives: 2756 -Stores 2003 1 -Total Receives: 2757 -Stores 2004 1 -Total Receives: 2758 -Stores 2005 1 -tensor([-18.5873, -18.6070]) -tensor([-28.6320, -28.6621, -28.6737]) -tensor([-1.7495, -1.7469, -1.7498]) -tensor([-1.0143, -1.0305]) -tensor([-55.3754, -55.3758, -55.4486, -55.3782, -55.3770, -55.4757, -55.4127, - -55.4470, -55.4756, -55.4756, -55.4757, -55.4756]) -tensor([-30.3390, -30.3672, -30.3746, -30.4234, -30.4118, -30.3934, -30.3495, - -30.3905]) -tensor([-28.4973, -28.5230, -28.4949, -28.5316]) -tensor([-1.6187, -1.6021, -1.6029, -1.6204]) -tensor([-0.0988, -0.1004, -0.0879]) -tensor([-26.3660, -26.3854]) -tensor([-34.2567, -34.2553]) -Total rewards: 2759 -Total Receives: 2759 -Stores 2006 1 -tensor([-3.7699, -3.7699, -3.7699, -3.8471]) -tensor([-2.6029, -2.5993, -2.6084, -2.6084]) -Total rewards: 2768 -Total Receives: 2760 -Stores 2007 1 -Total Receives: 2761 -Stores 2008 1 -Total Receives: 2762 -Stores 2009 1 -Total Receives: 2763 -Stores 2010 1 -Total Receives: 2764 -Stores 2011 1 -Total Receives: 2765 -Stores 2012 1 -Total Receives: 2766 -Stores 2013 1 -Total Receives: 2767 -Stores 2014 1 -Total Receives: 2768 -Stores 2015 1 -tensor([-38.4543, -38.4082, -38.4415, -38.4530, -38.4589, -38.4746, -38.4825]) -tensor([-54.8047, -54.8041, -54.9136, -54.8839, -54.8375, -54.9233, -54.8482, - -54.8834, -54.9233, -54.9233, -54.9233, -54.9233]) -tensor([-24.9516, -24.9889, -25.0002]) -tensor([-0.0655, -0.0257, -0.0371]) -tensor([-3.2771, -3.2543, -3.2908]) -tensor([-33.7427, -33.7500, -33.8158, -33.7877, -33.7671, -33.7267, -33.7683]) -tensor([-3.1068, -3.1035, -3.1201, -3.1201]) -tensor([-0.0970, -0.0994, -0.0778]) -tensor([-2.4843, -2.4864, -2.4719, -2.4838]) -tensor([-7.0588, -7.0702]) -tensor([-35.2635, -35.2379, -35.2649, -35.2885]) -tensor([-3.4065, -3.3981, -3.4109]) -tensor([-23.3338, -23.3400]) -tensor([-30.0247, -30.0267, -30.0429, -30.0515]) -tensor([-2.9678, -2.9724]) -tensor([-3.1741, -3.1761, -3.1704, -3.1761, -3.1548, -3.1744]) -tensor([-39.8617, -39.8647, -39.8531, -39.8892, -39.8760, -39.8781, -39.9067]) -tensor([-2.7817, -2.8137, -2.8132]) -tensor([-62.5892, -62.5886, -62.4764, -62.5689, -62.5609, -62.5469, -62.4872, - -62.5994, -62.5072, -62.5466, -62.5994, -62.5993, -62.5993, -62.5993]) -tensor([-33.8821, -33.9527]) -tensor([-3.3272, -3.3158, -3.3425, -3.3425]) -tensor([-0.1072, -0.0723, -0.0442]) -Total rewards: 2769 -Total Receives: 2769 -Stores 2016 1 -tensor([-33.8927, -33.8927]) -tensor([-44.4274, -44.4348, -44.5260, -44.4739, -44.4596, -44.4576, -44.5260]) -Total rewards: 2771 -Total Receives: 2770 -Stores 2017 1 -Total Receives: 2771 -Stores 2018 1 -tensor([-2.8455, -2.8489, -2.8509]) -tensor([-2.7498, -2.7657, -2.7513]) -tensor([-25.5910, -25.5920, -25.5615]) -Total rewards: 2778 -Total Receives: 2772 -Stores 2019 1 -Total Receives: 2773 -Stores 2020 1 -Total Receives: 2774 -Stores 2021 1 -Total Receives: 2775 -Stores 2022 1 -Total Receives: 2776 -Stores 2023 1 -Total Receives: 2777 -Stores 2024 1 -Total Receives: 2778 -Stores 2025 1 -tensor([-34.3087, -34.3417, -34.3229, -34.3475, -34.3475]) -tensor([-58.1929, -58.1918, -58.1731, -58.1663, -58.1541, -58.1025, -58.2075, - -58.1200, -58.1582, -58.2075, -58.2075, -58.2075, -58.2075]) -tensor([-0.1513, -0.1582, -0.1379]) -tensor([-40.0282, -40.0085, -40.0184]) -tensor([-40.5318, -40.6240, -40.5699, -40.5558, -40.5591, -40.6240]) -tensor([-3.0123, -3.0190, -3.0310, -3.0120, -3.0310, -3.0124, -3.0289]) -tensor([-0.1510, -0.1420, -0.1403]) -tensor([-0.2268, -0.2273]) -tensor([-41.8661, -41.8578]) -tensor([-54.1520, -54.1937, -54.1560, -54.1929, -54.1584, -54.1994, -54.1818, - -54.2179]) -tensor([-37.8345, -37.8176, -37.8534, -37.8534]) -tensor([-2.8958, -2.8955, -2.9075, -2.9075, -2.8900, -2.9031]) -tensor([-0.1238, -0.0948]) -tensor([-0.1642, -0.1329]) -tensor([-30.4952, -30.4854, -30.4864, -30.4846]) -tensor([-2.6140, -2.6020, -2.6216]) -tensor([-3.2193, -3.2445, -3.2487, -3.2487]) -tensor([-43.6429, -43.5837, -43.5576, -43.5657, -43.6429]) -tensor([-62.9439, -62.9519, -62.9324, -62.9182, -62.9276, -62.8931, -62.8349, - -62.9764, -62.8661, -62.8962, -62.9765, -62.9764, -62.9765]) -tensor([-5.9592, -5.9714, -5.9689]) -tensor([-0.2253, -0.2520]) -tensor([-4.0985, -4.0995, -4.1067, -4.0662]) -tensor([-6.2319, -6.2431]) -Total rewards: 2787 -Total Receives: 2779 -Stores 2026 1 -Total Receives: 2780 -Stores 2027 1 -Total Receives: 2781 -Stores 2028 1 -Total rewards: 4237 -Total Receives: 4229 -Stores 3021 1 -Total Receives: 4230 -Stores 3022 1 -Total Receives: 4231 -Stores 3023 1 -Total Receives: 4232 -Stores 3024 1 -Total Receives: 4233 -Stores 3025 1 -Total Receives: 4234 -Stores 3026 1 -Total Receives: 4235 -Stores 3027 1 -Total Receives: 4236 -Stores 3028 1 -Total Receives: 4237 -Stores 3029 1 -tensor([0.0299, 0.0301]) -tensor([-0.0752, -0.0662, -0.0576, -0.0768, -0.0536]) -tensor([0.0332, 0.0339, 0.0307]) -tensor([-0.2012, -0.1839, -0.1680, -0.3030, -0.3142, -0.1495, -0.1863, -0.1275, - -0.1275]) -tensor([0.0078, 0.0062]) -Total rewards: 4241 -Total Receives: 4238 -Stores 3030 1 -Total Receives: 4239 -Stores 3031 1 -Total Receives: 4240 -Stores 3032 1 -Total Receives: 4241 -Stores 3033 1 -tensor([-0.1370, -0.1084, -0.1233]) -tensor([0.0137, 0.0145]) -Total rewards: 4246 -Total Receives: 4242 -Stores 3034 1 -Total Receives: 4243 -Stores 3035 1 -Total Receives: 4244 -Stores 3036 1 -Total Receives: 4245 -Stores 3037 1 -Total Receives: 4246 -Stores 3038 1 -tensor([-0.2801, -0.2840]) -tensor([-0.0712, -0.0802, -0.0781, -0.0716]) -tensor([-0.2782, -0.2773]) -tensor([0.0339, 0.0351]) -tensor([0.0543, 0.0442]) -Total rewards: 4247 -Total Receives: 4247 -Stores 3039 1 -tensor([0.0290, 0.0302]) -tensor([0.0294, 0.0296]) -tensor([-0.0664, -0.0629]) -tensor([-0.0244, -0.0293, -0.0320, -0.0188, -0.0188]) -tensor([-0.0423, -0.0531, -0.0571, -0.0533]) -tensor([-0.1290, -0.1132, -0.1035, -0.1337, -0.0896]) -tensor([-0.7864, -0.7858, -0.7609, -0.8351, -0.8478, -0.7909, -0.7861, -0.7868]) -tensor([-0.0453, -0.0498, -0.0529, -0.0365]) -tensor([0.0313, 0.0307]) -tensor([-0.0865, -0.0658, -0.0259, -0.0242, -0.0332, -0.0314]) -tensor([-0.0211, -0.0216, -0.0161]) -tensor([-0.1098, -0.0918, -0.0538, -0.0620, -0.0607]) -tensor([-0.0643, -0.0638, -0.0660]) -tensor([0.0281, 0.0283]) -tensor([0.0341, 0.0335]) -tensor([0.0286, 0.0286, 0.0265]) -tensor([-0.4030, -0.4082, -0.5696, -0.5849, -0.3548, -0.3809, -0.3409, -0.3421]) -Total rewards: 4252 -Total Receives: 4248 -Stores 3040 1 -Total Receives: 4249 -Stores 3041 1 -Total Receives: 4250 -Stores 3042 1 -Total Receives: 4251 -Stores 3043 1 -Total Receives: 4252 -Stores 3044 1 -tensor([0.0334, 0.0333]) -tensor([0.0251, 0.0275]) -tensor([0.0427, 0.0404]) -tensor([-0.0935, -0.0802, -0.0869]) -tensor([0.0284, 0.0404]) -tensor([-0.1398, -0.1135, -0.1465, -0.1000, -0.0886]) -tensor([-0.0256, -0.0350, -0.0302]) -tensor([-0.0378, -0.0552, -0.0581]) -tensor([-0.1001, -0.1043, -0.1060, -0.0790]) -tensor([0.0369, 0.0399]) -tensor([0.0309, 0.0279]) -tensor([-0.1668, -0.1472, -0.1087, -0.1053, -0.0814, -0.0778]) -Total rewards: 4258 -Total Receives: 4253 -Stores 3045 1 -Total Receives: 4254 -Stores 3046 1 -Total Receives: 4255 -Stores 3047 1 -Total Receives: 4256 -Stores 3048 1 -Total Receives: 4257 -Stores 3049 1 -Total Receives: 4258 -Stores 3050 1 -tensor([-0.3053, -0.4295, -0.4411, -0.2561, -0.2821, -0.2042, -0.2156]) -tensor([-0.0653, -0.0432, -0.0688, -0.0715]) -tensor([-0.1504, -0.1366, -0.1086, -0.1056, -0.0891]) -tensor([-0.4921, -0.6510, -0.6653, -0.4421, -0.4648, -0.4335]) -tensor([-0.3598, -0.4809, -0.4931, -0.3103, -0.3367]) -tensor([-0.1205, -0.1078, -0.0901]) -tensor([0.0382, 0.0386, 0.0368]) -tensor([-0.3216, -0.4516, -0.2433, -0.2903]) -tensor([0.0083, 0.0069, 0.0067]) -tensor([-0.0456, -0.0695, -0.0407, -0.0744]) -tensor([-0.1374, -0.1196, -0.1427, -0.1080, -0.0852]) -tensor([0.0249, 0.0273]) -Total rewards: 4261 -Total Receives: 4259 -Stores 3051 1 -Total Receives: 4260 -Stores 3052 1 -Total Receives: 4261 -Stores 3053 1 -Total rewards: 4266 -Total Receives: 4262 -Stores 3054 1 -Total Receives: 4263 -Stores 3055 1 -Total Receives: 4264 -Stores 3056 1 -Total Receives: 4265 -Stores 3057 1 -Total Receives: 4266 -Stores 3058 1 -Total rewards: 4269 -Total Receives: 4267 -Stores 3059 1 -Total Receives: 4268 -Stores 3060 1 -Total Receives: 4269 -Stores 3061 1 -tensor([0.0333, 0.0366]) -tensor([0.0284, 0.0298]) -tensor([0.0391, 0.0407]) -tensor([-0.1534, -0.1361, -0.0856, -0.1072, -0.0912]) -Total rewards: 4272 -Total Receives: 4270 -Stores 3062 1 -Total Receives: 4271 -Stores 3063 1 -Total Receives: 4272 -Stores 3064 1 -tensor([-0.1358, -0.1398, -0.1418]) -Total rewards: 4275 -Total Receives: 4273 -Stores 3065 1 -Total Receives: 4274 -Stores 3066 1 -Total Receives: 4275 -Stores 3067 1 -tensor([-0.0064, -0.0047, -0.0031]) -tensor([-0.0813, -0.0719, -0.0507, -0.0568]) -tensor([0.0301, 0.0291]) -tensor([-0.1603, -0.1453, -0.1663, -0.1338, -0.0957]) -tensor([-0.0466, -0.0502, -0.0452]) -tensor([-0.0038, -0.0061, -0.0062]) -tensor([-0.1492, -0.1567, -0.1287, -0.0902]) -tensor([-0.2555, -0.4106, -0.1410, -0.2397, -0.1386]) -tensor([0.0054, 0.0032]) -Total rewards: 4278 -Total Receives: 4276 -Stores 3068 1 -Total Receives: 4277 -Stores 3069 1 -Total Receives: 4278 -Stores 3070 1 -tensor([0.0401, 0.0329]) -tensor([ 0.0015, -0.0058]) -tensor([0.0362, 0.0357]) -tensor([-0.2797, -0.4122, -0.1735, -0.2652]) -tensor([0.0360, 0.0346]) -tensor([-0.1431, -0.1462]) -tensor([0.0404, 0.0274]) -tensor([0.0296, 0.0240]) -tensor([-0.1816, -0.1640, -0.1174, -0.1361]) -tensor([-0.0922, -0.0827, -0.0694, -0.0581]) -tensor([-0.0610, -0.0612, -0.0594]) -Total rewards: 4285 -Total Receives: 4279 -Stores 3071 1 -Total Receives: 4280 -Stores 3072 1 -Total Receives: 4281 -Stores 3073 1 -Total Receives: 4282 -Stores 3074 1 -Total Receives: 4283 -Stores 3075 1 -Total Receives: 4284 -Stores 3076 1 -Total Receives: 4285 -Stores 3077 1 -tensor([-0.0966, -0.0775, -0.0999]) -tensor([-0.0012, -0.0004]) -tensor([-0.0786, -0.0956, -0.0967, -0.0959]) -tensor([-0.0569, -0.0583, -0.0324]) -tensor([-5.4547e-03, -4.6816e-03, -9.8051e-05, 3.9808e-03]) -tensor([ 0.0007, -0.0092, -0.0102]) -Total rewards: 4290 -Total Receives: 4286 -Stores 3078 1 -Total Receives: 4287 -Stores 3079 1 -Total Receives: 4288 -Stores 3080 1 -Total Receives: 4289 -Stores 3081 1 -Total Receives: 4290 -Stores 3082 1 -tensor([-0.0465, -0.0235, -0.0477]) -Total rewards: 4293 -Total Receives: 4291 -Stores 3083 1 -Total Receives: 4292 -Stores 3084 1 -Total Receives: 4293 -Stores 3085 1 -tensor([0.0380, 0.0374]) -tensor([0.0329, 0.0309, 0.0320]) -Total rewards: 4298 -Total Receives: 4294 -Stores 3086 1 -Total Receives: 4295 -Stores 3087 1 -Total Receives: 4296 -Stores 3088 1 -Total Receives: 4297 -Stores 3089 1 -Total Receives: 4298 -Stores 3090 1 -Total rewards: 4304 -Total Receives: 4299 -Stores 3091 1 -Total Receives: 4300 -Stores 3092 1 -Total Receives: 4301 -Stores 3093 1 -Total Receives: 4302 -Stores 3094 1 -Total Receives: 4303 -Stores 3095 1 -Total Receives: 4304 -Stores 3096 1 -tensor([-0.3110, -0.4432, -0.2972]) -tensor([-0.0976, -0.1462, -0.0999]) -tensor([-0.0033, -0.0073, -0.0038, -0.0099]) -tensor([0.0386, 0.0266]) -tensor([-0.1155, -0.1151, -0.1086, -0.0860]) -Total rewards: 4309 -Total Receives: 4305 -Stores 3097 1 -Total Receives: 4306 -Stores 3098 1 -Total Receives: 4307 -Stores 3099 1 -Total Receives: 4308 -Stores 3100 1 -Total Receives: 4309 -Stores 3101 1 -tensor([0.0439, 0.0298, 0.0358]) -tensor([-0.1777, -0.2304, -0.1444]) -tensor([0.0395, 0.0383]) -tensor([-0.0251, -0.0243, -0.0261]) -Total rewards: 4315 -Total Receives: 4310 -Stores 3102 1 -Total Receives: 4311 -Stores 3103 1 -Total Receives: 4312 -Stores 3104 1 -Total Receives: 4313 -Stores 3105 1 -Total Receives: 4314 -Stores 3106 1 -Total Receives: 4315 -Stores 3107 1 -tensor([-0.1358, -0.0897]) -tensor([-0.0346, -0.0345, -0.0557]) -tensor([-0.0471, -0.0479, -0.0532]) -tensor([-0.0158, -0.0167, -0.0178, -0.0170]) -Total rewards: 4320 -Total Receives: 4316 -Stores 3108 1 -Total Receives: 4317 -Stores 3109 1 -Total Receives: 4318 -Stores 3110 1 -Total Receives: 4319 -Stores 3111 1 -Total Receives: 4320 -Stores 3112 1 -Total rewards: 4322 -Total Receives: 4321 -Stores 3113 1 -Total Receives: 4322 -Stores 3114 1 -tensor([0.0293, 0.0358]) -tensor([0.0154, 0.0118, 0.0169]) -tensor([0.0411, 0.0388]) -tensor([0.0104, 0.0120, 0.0113]) -tensor([-0.1052, -0.0890]) -Total rewards: 4327 -Total Receives: 4323 -Stores 3115 1 -Total Receives: 4324 -Stores 3116 1 -Total Receives: 4325 -Stores 3117 1 -Total Receives: 4326 -Stores 3118 1 -Total Receives: 4327 -Stores 3119 1 -tensor([-0.0488, -0.0427]) -Total rewards: 4334 -Total Receives: 4328 -Stores 3120 1 -Total Receives: 4329 -Stores 3121 1 -Total Receives: 4330 -Stores 3122 1 -Total Receives: 4331 -Stores 3123 1 -Total Receives: 4332 -Stores 3124 1 -Total Receives: 4333 -Stores 3125 1 -Total Receives: 4334 -Stores 3126 1 -tensor([0.0345, 0.0337, 0.0351]) -tensor([-0.1122, -0.1101, -0.0978, -0.1060]) -tensor([0.0182, 0.0169, 0.0170, 0.0162]) -Total rewards: 4337 -Total Receives: 4335 -Stores 3127 1 -Total Receives: 4336 -Stores 3128 1 -Total Receives: 4337 -Stores 3129 1 -tensor([0.0383, 0.0346]) -tensor([-0.0544, -0.0383, -0.0499, -0.0268]) -tensor([-0.0599, -0.0907, -0.0413]) -tensor([-0.0062, -0.0042]) -Total rewards: 4342 -Total Receives: 4338 -Stores 3130 1 -Total Receives: 4339 -Stores 3131 1 -Total Receives: 4340 -Stores 3132 1 -Total Receives: 4341 -Stores 3133 1 -Total Receives: 4342 -Stores 3134 1 -tensor([ 0.0027, -0.0229, -0.0188]) -tensor([0.0394, 0.0390]) -tensor([0.0284, 0.0300]) -tensor([-0.0828, -0.0641]) -tensor([-0.0786, -0.0632, -0.0751]) -Total rewards: 4346 -Total Receives: 4343 -Stores 3135 1 -Total Receives: 4344 -Stores 3136 1 -Total Receives: 4345 -Stores 3137 1 -Total Receives: 4346 -Stores 3138 1 -tensor([0.0293, 0.0303, 0.0306]) -tensor([0.0359, 0.0378]) -tensor([0.0070, 0.0083, 0.0070, 0.0085, 0.0123]) -tensor([-0.0335, -0.0335, -0.0363]) -tensor([0.0362, 0.0368]) -tensor([0.0312, 0.0284]) -tensor([-0.0219, -0.0233]) -tensor([-0.1844, -0.1698, -0.1054, -0.0905]) -tensor([0.0092, 0.0070]) -tensor([0.0296, 0.0472, 0.0300]) -tensor([-0.0771, -0.0491, -0.0465]) -tensor([0.0236, 0.0334]) -tensor([0.0124, 0.0052, 0.0051]) -tensor([-0.0874, -0.0766, -0.0658]) -tensor([-0.0650, -0.0567]) -tensor([0.0314, 0.0283]) -tensor([0.0335, 0.0343, 0.0326]) -tensor([ 0.0006, 0.0006, -0.0006, 0.0017]) -tensor([0.0378, 0.0288]) -tensor([-0.0589, -0.0379, -0.0357, -0.0326]) -Total rewards: 4352 -Total Receives: 4347 -Stores 3139 1 -Total Receives: 4348 -Stores 3140 1 -Total Receives: 4349 -Stores 3141 1 -Total Receives: 4350 -Stores 3142 1 -Total Receives: 4351 -Stores 3143 1 -Total Receives: 4352 -Stores 3144 1 -tensor([0.0299, 0.0290, 0.0281]) -tensor([0.0184, 0.0197, 0.0202, 0.0200, 0.0182]) -tensor([0.0213, 0.0185, 0.0177]) -tensor([-0.0694, -0.0668, -0.0590, -0.0598, -0.0543, -0.0553]) -tensor([-0.0528, -0.0921, -0.0523, -0.0874]) -tensor([0.0079, 0.0088, 0.0071, 0.0041]) -tensor([-0.0412, -0.0419, -0.0458]) -Total rewards: 4357 -Total Receives: 4353 -Stores 3145 1 -Total Receives: 4354 -Stores 3146 1 -Total Receives: 4355 -Stores 3147 1 -Total Receives: 4356 -Stores 3148 1 -Total Receives: 4357 -Stores 3149 1 -tensor([0.0240, 0.0213]) -tensor([0.0082, 0.0082, 0.0098, 0.0092]) -tensor([-0.0636, -0.0400, -0.0319, -0.0398]) -tensor([-0.0841, -0.0816, -0.0722, -0.0729, -0.0632]) -tensor([0.0264, 0.0265, 0.0266]) -tensor([-0.0034, -0.0044, -0.0041, -0.0040]) -tensor([0.0339, 0.0326]) -tensor([-0.1876, -0.1136, -0.0806, -0.0983]) -Total rewards: 4364 -Total Receives: 4358 -Stores 3150 1 -Total Receives: 4359 -Stores 3151 1 -Total Receives: 4360 -Stores 3152 1 -Total Receives: 4361 -Stores 3153 1 -Total Receives: 4362 -Stores 3154 1 -Total Receives: 4363 -Stores 3155 1 -Total Receives: 4364 -Stores 3156 1 -tensor([-0.0559, -0.0617]) -tensor([0.0331, 0.0320, 0.0303, 0.0325]) -tensor([0.0199, 0.0172]) -Total rewards: 4369 -Total Receives: 4365 -Stores 3157 1 -Total Receives: 4366 -Stores 3158 1 -Total Receives: 4367 -Stores 3159 1 -Total Receives: 4368 -Stores 3160 1 -Total Receives: 4369 -Stores 3161 1 -tensor([0.0305, 0.0294]) -tensor([0.0249, 0.0278, 0.0264, 0.0252]) -tensor([0.0377, 0.0335, 0.0338]) -Total rewards: 4374 -Total Receives: 4370 -Stores 3162 1 -Total Receives: 4371 -Stores 3163 1 -Total Receives: 4372 -Stores 3164 1 -Total Receives: 4373 -Stores 3165 1 -Total Receives: 4374 -Stores 3166 1 -tensor([-0.0165, -0.0125]) -tensor([0.0404, 0.0362]) -tensor([0.0250, 0.0262, 0.0215]) -tensor([0.0306, 0.0291, 0.0340, 0.0306]) -tensor([0.0021, 0.0037]) -tensor([-0.1068, -0.1049, -0.0957, -0.0976]) -tensor([0.0306, 0.0299]) -Total rewards: 4376 -Total Receives: 4375 -Stores 3167 1 -Total Receives: 4376 -Stores 3168 1 -tensor([0.0269, 0.0272]) -tensor([-0.1261, -0.1020, -0.1168]) -tensor([-0.0233, -0.0406, -0.0279]) -tensor([-0.0332, -0.0307, -0.0302]) -tensor([-0.0860, -0.0770, -0.0434]) -Total rewards: 4382 -Total Receives: 4377 -Stores 3169 1 -Total Receives: 4378 -Stores 3170 1 -Total Receives: 4379 -Stores 3171 1 -Total Receives: 4380 -Stores 3172 1 -Total Receives: 4381 -Stores 3173 1 -Total Receives: 4382 -Stores 3174 1 -tensor([-0.0089, -0.0107]) -tensor([-0.1036, -0.0772, -0.0766, -0.0569, -0.0489, -0.0489]) -tensor([-0.0331, -0.0302, -0.0331]) -tensor([0.0025, 0.0037, 0.0043, 0.0101, 0.0100]) -tensor([0.0260, 0.0252, 0.0232, 0.0232]) -tensor([-0.0285, -0.0218, -0.0218]) -Total rewards: 4389 -Total Receives: 4383 -Stores 3175 1 -Total Receives: 4384 -Stores 3176 1 -Total Receives: 4385 -Stores 3177 1 -Total Receives: 4386 -Stores 3178 1 -Total Receives: 4387 -Stores 3179 1 -Total Receives: 4388 -Stores 3180 1 -Total Receives: 4389 -Stores 3181 1 -Total rewards: 4394 -Total Receives: 4390 -Stores 3182 1 -Total Receives: 4391 -Stores 3183 1 -Total Receives: 4392 -Stores 3184 1 -Total Receives: 4393 -Stores 3185 1 -Total Receives: 4394 -Stores 3186 1 -tensor([-0.0533, -0.0310, -0.0521]) -tensor([0.0380, 0.0383]) -tensor([-0.1086, -0.0861, -0.0868, -0.0697, -0.0578]) -tensor([0.0285, 0.0317]) -tensor([-0.0363, -0.0461, -0.0426, -0.0278]) -tensor([-0.0157, -0.0159, -0.0100]) -tensor([0.0261, 0.0244, 0.0200]) -tensor([-0.0655, -0.0597, -0.0396, -0.0415, -0.0600]) -tensor([0.0362, 0.0378, 0.0383]) -tensor([-0.0660, -0.0873, -0.0616, -0.0758]) -tensor([0.0329, 0.0327]) -tensor([-0.0476, -0.0524, -0.0508]) -tensor([0.0394, 0.0417]) -tensor([-0.0439, -0.0391, -0.0316]) -Total rewards: 4399 -Total Receives: 4395 -Stores 3187 1 -Total Receives: 4396 -Stores 3188 1 -Total Receives: 4397 -Stores 3189 1 -Total Receives: 4398 -Stores 3190 1 -Total Receives: 4399 -Stores 3191 1 -tensor([-0.0580, -0.0614]) -tensor([-0.0843, -0.0723, -0.0587]) -tensor([0.0336, 0.0380]) -Total rewards: 4400 -Total Receives: 4400 -Stores 3192 1 -tensor([-0.0005, -0.0010]) -tensor([-0.0283, -0.0148, -0.0106, -0.0234]) -tensor([-0.0079, -0.0042, -0.0082]) -tensor([-0.0079, -0.0091, -0.0095, -0.0078]) -Total rewards: 4403 -Total Receives: 4401 -Stores 3193 1 -Total Receives: 4402 -Stores 3194 1 -Total Receives: 4403 -Stores 3195 1 -tensor([0.0304, 0.0296]) -tensor([0.0433, 0.0384]) -Total rewards: 4411 -Total Receives: 4404 -Stores 3196 1 -Total Receives: 4405 -Stores 3197 1 -Total Receives: 4406 -Stores 3198 1 -Total Receives: 4407 -Stores 3199 1 -Total Receives: 4408 -Stores 3200 1 -Total Receives: 4409 -Stores 3201 1 -Total Receives: 4410 -Stores 3202 1 -Total Receives: 4411 -Stores 3203 1 -tensor([-0.0685, -0.0666, -0.0490]) -tensor([-0.0740, -0.0645]) -tensor([0.0036, 0.0061]) -tensor([-0.1770, -0.1069, -0.1389, -0.1392, -0.1209]) -tensor([-0.1407, -0.0993, -0.1256, -0.1143]) -tensor([0.0354, 0.0383, 0.0383]) -tensor([0.0337, 0.0326, 0.0318]) -tensor([-0.1056, -0.1000, -0.0853, -0.0997]) -tensor([-0.0362, -0.0449, -0.0626, -0.0557, -0.0262]) -tensor([-0.0705, -0.0572]) -tensor([-0.0005, 0.0029]) -tensor([0.0073, 0.0059, 0.0069, 0.0049, 0.0066]) -tensor([-0.1185, -0.0630, -0.0484, -0.0769, -0.0489]) -tensor([0.0325, 0.0255, 0.0292, 0.0291]) -tensor([0.0334, 0.0327]) -tensor([0.0392, 0.0387]) -tensor([0.0238, 0.0271, 0.0271]) -tensor([-0.0770, -0.0924, -0.0688, -0.0639]) -Total rewards: 4417 -Total Receives: 4412 -Stores 3204 1 -Total Receives: 4413 -Stores 3205 1 -Total Receives: 4414 -Stores 3206 1 -Total Receives: 4415 -Stores 3207 1 -Total Receives: 4416 -Stores 3208 1 -Total Receives: 4417 -Stores 3209 1 -tensor([0.0322, 0.0321]) -tensor([0.0119, 0.0106, 0.0087]) -tensor([-0.0277, -0.0335, -0.0308]) -tensor([-0.0869, -0.0757, -0.0566, -0.0742]) -tensor([-0.0650, -0.0541, -0.0695, -0.0469]) -tensor([0.0385, 0.0385]) -tensor([0.0325, 0.0282]) -tensor([-0.0021, -0.0032]) -tensor([-0.0479, -0.0496, -0.0471, -0.0482]) -tensor([-0.0003, -0.0013, -0.0002]) -tensor([0.0278, 0.0281]) -tensor([-0.0891, -0.0640, -0.0398]) -tensor([0.0069, 0.0064, 0.0072, 0.0060]) -tensor([0.0120, 0.0121]) -tensor([-0.0661, -0.0506, -0.0967, -0.0904, -0.0628]) -tensor([0.0019, 0.0011, 0.0007]) -Total rewards: 4425 -Total Receives: 4418 -Stores 3210 1 -Total Receives: 4419 -Stores 3211 1 -Total Receives: 4420 -Stores 3212 1 -Total Receives: 4421 -Stores 3213 1 -Total Receives: 4422 -Stores 3214 1 -Total Receives: 4423 -Stores 3215 1 -Total Receives: 4424 -Stores 3216 1 -Total Receives: 4425 -Stores 3217 1 -tensor([0.0277, 0.0268]) -tensor([0.0128, 0.0132]) -tensor([-0.0560, -0.0544]) -tensor([0.0057, 0.0030]) -Total Receives: 2782 -Stores 2029 1 -Total Receives: 2783 -Stores 2030 1 -Total Receives: 2784 -Stores 2031 1 -Total Receives: 2785 -Stores 2032 1 -Total Receives: 2786 -Stores 2033 1 -Total Receives: 2787 -Stores 2034 1 -tensor([-47.7446, -47.6939, -47.7280, -47.6982, -47.7296, -47.7096, -47.7626]) -tensor([-41.4135, -41.4290]) -tensor([-35.8418, -35.8441]) -tensor([-0.2367, -0.2398, -0.2362, -0.2372]) -tensor([-32.7329, -32.7484, -32.7603, -32.7603, -32.7584]) -Total rewards: 2791 -Total Receives: 2788 -Stores 2035 1 -Total Receives: 2789 -Stores 2036 1 -Total Receives: 2790 -Stores 2037 1 -Total Receives: 2791 -Stores 2038 1 -tensor([-5.8904, -5.9250, -5.9344, -5.9344, -5.8936, -5.9134]) -tensor([-4.9301, -4.9390, -4.9390, -4.9079, -4.9229]) -tensor([-28.2455, -28.2501, -28.2732]) -tensor([-24.1487, -24.1455, -24.1311, -24.1361, -24.1278, -24.1409]) -tensor([-9.7271, -9.6894]) -tensor([-4.9228, -4.9398, -4.9421]) -tensor([-22.6250, -22.6096, -22.6144, -22.6063, -22.6191]) -tensor([-67.2890, -67.2940, -67.2892, -67.2613, -67.2695, -67.1768, -67.3359, - -67.2170, -67.2378, -67.3359, -67.3359, -67.3359]) -tensor([-4.5602, -4.5762, -4.5404, -4.5551]) -tensor([-58.8663, -58.8722, -58.8668, -58.8401, -58.8473, -58.7598, -58.7934, - -58.8164, -58.9187, -58.9186, -58.9186]) -tensor([-6.9745, -6.9995, -6.9495]) -Total rewards: 2794 -Total Receives: 2792 -Stores 2039 1 -Total Receives: 2793 -Stores 2040 1 -Total Receives: 2794 -Stores 2041 1 -Total rewards: 2800 -Total Receives: 2795 -Stores 2042 1 -Total Receives: 2796 -Stores 2043 1 -Total Receives: 2797 -Stores 2044 1 -Total Receives: 2798 -Stores 2045 1 -Total Receives: 2799 -Stores 2046 1 -Total Receives: 2800 -Stores 2047 1 -tensor([-7.9773, -7.9635, -7.9924, -7.9924, -7.9924]) -tensor([-37.0386, -37.0123, -37.0333, -37.1211, -37.1073]) -tensor([-9.5367, -9.5284]) -tensor([-0.1362, -0.1363, -0.1341]) -tensor([-21.8278, -21.8622, -21.8622]) -tensor([-29.3831, -29.3385, -29.3516, -29.3755, -29.3843, -29.3713]) -tensor([-6.9086, -6.9313]) -tensor([-27.4411, -27.4540, -27.4772, -27.4862, -27.4734]) -tensor([-9.4987, -9.4715, -9.4979, -9.4991, -9.5019]) -Total rewards: 2810 -Total Receives: 2801 -Stores 2048 1 -Total Receives: 2802 -Stores 2049 1 -Total Receives: 2803 -Stores 2050 1 -Total Receives: 2804 -Stores 2051 1 -Total Receives: 2805 -Stores 2052 1 -Total Receives: 2806 -Stores 2053 1 -Total Receives: 2807 -Stores 2054 1 -Total Receives: 2808 -Stores 2055 1 -Total Receives: 2809 -Stores 2056 1 -Total Receives: 2810 -Stores 2057 1 -tensor([-41.1119, -41.0883, -41.0572, -41.0926, -41.0707, -41.1388, -41.1474]) -tensor([-27.9897, -27.9740]) -Total rewards: 2819 -Total Receives: 2811 -Stores 2058 1 -Total Receives: 2812 -Stores 2059 1 -Total Receives: 2813 -Stores 2060 1 -Total Receives: 2814 -Stores 2061 1 -Total Receives: 2815 -Stores 2062 1 -Total Receives: 2816 -Stores 2063 1 -Total Receives: 2817 -Stores 2064 1 -Total Receives: 2818 -Stores 2065 1 -Total Receives: 2819 -Stores 2066 1 -tensor([-0.0797, -0.0900]) -Total rewards: 2825 -Total Receives: 2820 -Stores 2067 1 -Total Receives: 2821 -Stores 2068 1 -Total Receives: 2822 -Stores 2069 1 -Total Receives: 2823 -Stores 2070 1 -Total Receives: 2824 -Stores 2071 1 -Total Receives: 2825 -Stores 2072 1 -tensor([-7.1499, -7.1595, -7.1492, -7.1595]) -tensor([-5.8978, -5.9315, -5.8995]) -tensor([-7.6058, -7.6269, -7.6269, -7.6225, -7.6269]) -tensor([-0.1822, -0.1704, -0.1567, -0.1566]) -tensor([-0.0550, -0.0557]) -tensor([-10.9505, -10.9490]) -tensor([-59.2122, -59.2147, -59.2096, -59.1969, -59.1920, -59.1373, -59.1640, - -59.2823, -59.2823, -59.2824]) -tensor([-19.1124, -19.0992, -19.1228, -19.1035, -19.1404, -19.1074, -19.1418]) -tensor([-36.0394, -36.0373, -36.1329, -36.1156]) -tensor([-7.6654, -7.6654, -7.6516, -7.6654]) -tensor([-17.9730, -17.9603, -17.9827, -17.9643, -17.9997, -18.0031]) -Total rewards: 2829 -Total Receives: 2826 -Stores 2073 1 -Total Receives: 2827 -Stores 2074 1 -Total Receives: 2828 -Stores 2075 1 -Total Receives: 2829 -Stores 2076 1 -tensor([-5.8252, -5.7868, -5.8099]) -tensor([-10.3183, -10.2855, -10.3289, -10.3240, -10.3351, -10.3136]) -tensor([-68.8399, -68.8125, -68.2608, -69.0402, -68.8631, -68.1606]) -tensor([-6.6105, -6.6105, -6.6105]) -tensor([-26.5864, -26.5908]) -tensor([-7.6680, -7.7100, -7.6978, -7.7074]) -tensor([-24.2684, -24.2695, -24.2695]) -tensor([-0.1677, -0.1622, -0.1694, -0.1648]) -tensor([-28.6911, -28.7182, -28.7213]) -Total rewards: 2835 -Total Receives: 2830 -Stores 2077 1 -Total Receives: 2831 -Stores 2078 1 -Total Receives: 2832 -Stores 2079 1 -Total Receives: 2833 -Stores 2080 1 -Total Receives: 2834 -Stores 2081 1 -Total Receives: 2835 -Stores 2082 1 -tensor([-17.0478, -17.0817, -17.0494, -17.0874, -17.0891]) -tensor([-40.7577, -40.7424, -40.7588, -40.7075, -40.7746, -40.8175]) -tensor([-52.8597, -52.8594, -52.8551, -52.8437, -52.8409, -52.9289, -52.8178, - -52.9303, -52.9335, -52.9335, -52.9335]) -tensor([-31.1005, -31.2020, -31.1611, -31.1997, -31.1725]) -Total rewards: 2839 -Total Receives: 2836 -Stores 2083 1 -Total Receives: 2837 -Stores 2084 1 -Total Receives: 2838 -Stores 2085 1 -Total Receives: 2839 -Stores 2086 1 -tensor([-18.0021, -18.0088]) -tensor([-7.5185, -7.5516, -7.5352, -7.5440, -7.5516]) -tensor([-11.0431, -11.0139, -11.0574, -11.0647, -11.0445]) -tensor([-7.5384, -7.5384]) -tensor([-0.1254, -0.1254]) -Total rewards: 2844 -Total Receives: 2840 -Stores 2087 1 -Total Receives: 2841 -Stores 2088 1 -Total Receives: 2842 -Stores 2089 1 -Total Receives: 2843 -Stores 2090 1 -Total Receives: 2844 -Stores 2091 1 -tensor([-12.1763, -12.1871, -12.1835]) -tensor([-20.3505, -20.3301, -20.3203, -20.3301, -20.3340, -20.3616]) -tensor([-0.3107, -0.3095, -0.3134, -0.3052]) -tensor([-0.0742, -0.0712, -0.0703, -0.0697, -0.0694]) -tensor([-12.3782, -12.3745]) -tensor([-10.3443, -10.3585, -10.3328, -10.3415, -10.3570]) -tensor([-7.7383, -7.7330, -7.7243]) -tensor([-11.6060, -11.6165]) -tensor([-31.7219, -31.7056, -31.7211, -31.7365, -31.7923]) -Total rewards: 2850 -Total Receives: 2845 -Stores 2092 1 -Total Receives: 2846 -Stores 2093 1 -Total Receives: 2847 -Stores 2094 1 -Total Receives: 2848 -Stores 2095 1 -Total Receives: 2849 -Stores 2096 1 -Total Receives: 2850 -Stores 2097 1 -tensor([-8.2287, -8.2278, -8.2003, -8.2068, -8.2155, -8.2287]) -tensor([-65.6246, -65.1100, -65.6382]) -tensor([-16.3069, -16.3242]) -tensor([-43.3966, -43.3944, -43.3901, -43.3789, -43.3859, -43.4644, -43.3518, - -43.4595, -43.4792, -43.4792]) -tensor([-17.5175, -17.5035, -17.5239]) -tensor([-23.9781, -23.9678, -23.9280, -23.9804, -23.9408]) -Total rewards: 2858 -Total Receives: 2851 -Stores 2098 1 -Total Receives: 2852 -Stores 2099 1 -Total Receives: 2853 -Stores 2100 1 -Total Receives: 2854 -Stores 2101 1 -Total Receives: 2855 -Stores 2102 1 -Total Receives: 2856 -Stores 2103 1 -Total Receives: 2857 -Stores 2104 1 -Total Receives: 2858 -Stores 2105 1 -tensor([-12.6880, -12.6610, -12.6644, -12.6713, -12.6539, -12.6915]) -tensor([-27.0844, -27.0805, -27.1015, -27.1678]) -tensor([-14.0791, -14.0765]) -tensor([-8.2179, -8.1952, -8.1999, -8.2198]) -tensor([-14.4430, -14.4308, -14.4636, -14.4469, -14.4566, -14.4660]) -tensor([-0.1497, -0.1458, -0.1431]) -tensor([-0.0691, -0.0601, -0.0628, -0.0618]) -tensor([-6.2958, -6.2715, -6.2860]) -tensor([-0.2146, -0.2098, -0.1932, -0.1817, -0.1829]) -tensor([-28.9953, -29.0110, -29.0790]) -tensor([-10.7223, -10.6893, -10.6934, -10.7009, -10.7349]) -tensor([-18.5655, -18.5614, -18.5643]) -tensor([-9.2524, -9.2323, -9.2361, -9.2636]) -tensor([-7.4515, -7.4558]) -tensor([-8.4199, -8.4310, -8.4129]) -tensor([-6.4881, -6.4684, -6.4701]) -tensor([-18.6561, -18.6640, -18.6353, -18.6683, -18.6538, -18.6308, -18.6327, - -18.6790, -18.6686, -18.6790]) -tensor([-0.0736, -0.0616]) -Total rewards: 2865 -Total Receives: 2859 -Stores 2106 1 -Total Receives: 2860 -Stores 2107 1 -Total Receives: 2861 -Stores 2108 1 -Total Receives: 2862 -Stores 2109 1 -Total Receives: 2863 -Stores 2110 1 -Total Receives: 2864 -Stores 2111 1 -Total Receives: 2865 -Stores 2112 1 -tensor([-6.4773, -6.4852, -6.4702, -6.4584, -6.4700, -6.4852]) -tensor([-24.8492, -24.8816, -24.8678]) -tensor([-43.8368, -43.8317, -43.8269, -43.8155, -43.8212, -43.9159, -43.9122, - -43.9474, -43.9474]) -tensor([-0.0819, -0.0803, -0.0843]) -tensor([-13.5978, -13.6319, -13.5989, -13.6393, -13.6047, -13.6222]) -tensor([-4.5567, -4.5544, -4.5493, -4.5371]) -tensor([-0.1516, -0.1548, -0.1574, -0.1745, -0.1556, -0.1518]) -Total rewards: 4429 -Total Receives: 4426 -Stores 3218 1 -Total Receives: 4427 -Stores 3219 1 -Total Receives: 4428 -Stores 3220 1 -Total Receives: 4429 -Stores 3221 1 -tensor([0.0107, 0.0105]) -tensor([-0.0935, -0.0926, -0.1112, -0.1051, -0.0897]) -tensor([-0.1777, -0.1080, -0.1356, -0.0875, -0.0875]) -tensor([-0.1442, -0.0986, -0.1136, -0.0882]) -tensor([0.0364, 0.0368]) -tensor([0.0219, 0.0186]) -tensor([0.0406, 0.0382]) -tensor([-0.0740, -0.0681, -0.0802]) -tensor([-0.1623, -0.0955, -0.1269, -0.0732]) -tensor([0.0231, 0.0201]) -Total rewards: 4434 -Total Receives: 4430 -Stores 3222 1 -Total Receives: 4431 -Stores 3223 1 -Total Receives: 4432 -Stores 3224 1 -Total Receives: 4433 -Stores 3225 1 -Total Receives: 4434 -Stores 3226 1 -tensor([-0.0818, -0.0649, -0.0663, -0.0425, -0.0441]) -tensor([-0.1083, -0.1006, -0.0945]) -tensor([-0.1427, -0.1416, -0.1602, -0.1375, -0.1534]) -tensor([0.0390, 0.0390]) -tensor([-0.0214, -0.0202, -0.0122]) -tensor([0.0284, 0.0306]) -tensor([-0.0533, -0.0533, -0.0474]) -tensor([-0.0694, -0.0634, -0.0580]) -tensor([0.0327, 0.0325]) -tensor([-0.0655, -0.0617]) -tensor([-0.0088, -0.0073]) -tensor([-0.1012, -0.1270, -0.1206, -0.0981, -0.0643]) -tensor([-0.0566, -0.0575]) -Total rewards: 4441 -Total Receives: 4435 -Stores 3227 1 -Total Receives: 4436 -Stores 3228 1 -Total Receives: 4437 -Stores 3229 1 -Total Receives: 4438 -Stores 3230 1 -Total Receives: 4439 -Stores 3231 1 -Total Receives: 4440 -Stores 3232 1 -Total Receives: 4441 -Stores 3233 1 -tensor([-0.1125, -0.1079, -0.1008]) -tensor([-0.0617, -0.0555, -0.0428]) -tensor([-0.1106, -0.1098, -0.1039, -0.1322, -0.1241, -0.1034]) -tensor([0.0311, 0.0288, 0.0309]) -tensor([0.0328, 0.0350]) -tensor([0.0358, 0.0308]) -tensor([-0.0725, -0.1581, -0.0956, -0.0720, -0.1291]) -tensor([-0.0935, -0.0429, -0.0751, -0.0516, -0.0534, -0.0389]) -tensor([0.0315, 0.0307]) -tensor([0.0205, 0.0215]) -tensor([0.0299, 0.0306, 0.0315, 0.0310]) -tensor([-0.1630, -0.1170, -0.0725, -0.0777]) -tensor([0.0416, 0.0364]) -tensor([-0.0977, -0.0506, -0.0807, -0.0605, -0.0622]) -tensor([0.0426, 0.0411, 0.0419]) -tensor([-0.0068, -0.0014, -0.0072, -0.0110]) -tensor([0.0413, 0.0426]) -tensor([-0.0348, -0.0235, -0.0348, -0.0271, -0.0260]) -tensor([-0.1558, -0.1569, -0.1568, -0.1570, -0.1565, -0.1542]) -Total rewards: 4448 -Total Receives: 4442 -Stores 3234 1 -Total Receives: 4443 -Stores 3235 1 -Total Receives: 4444 -Stores 3236 1 -Total Receives: 4445 -Stores 3237 1 -Total Receives: 4446 -Stores 3238 1 -Total Receives: 4447 -Stores 3239 1 -Total Receives: 4448 -Stores 3240 1 -tensor([-0.0424, -0.0391, -0.0399, -0.0417]) -tensor([0.0203, 0.0157]) -tensor([0.0277, 0.0283]) -tensor([-0.0573, -0.0464, -0.0492]) -tensor([-0.1026, -0.1014, -0.0925, -0.1228, -0.0815, -0.1168]) -tensor([-0.0001, 0.0012]) -tensor([0.0276, 0.0239]) -tensor([-0.0093, -0.0088, -0.0055, -0.0135]) -Total rewards: 4452 -Total Receives: 4449 -Stores 3241 1 -Total Receives: 4450 -Stores 3242 1 -Total Receives: 4451 -Stores 3243 1 -Total Receives: 4452 -Stores 3244 1 -tensor([-0.2670, -0.3112, -0.3044, -0.2548, -0.2548]) -Total rewards: 4457 -Total Receives: 4453 -Stores 3245 1 -Total Receives: 4454 -Stores 3246 1 -Total Receives: 4455 -Stores 3247 1 -Total Receives: 4456 -Stores 3248 1 -Total Receives: 4457 -Stores 3249 1 -tensor([-0.0148, -0.0141, -0.0164]) -tensor([-0.0330, -0.0342, -0.0366]) -tensor([-0.0254, -0.0271]) -tensor([0.0252, 0.0229, 0.0228]) -tensor([-0.0952, -0.1132, -0.1020, -0.1206]) -tensor([0.0337, 0.0322]) -tensor([-0.1073, -0.1075, -0.1008, -0.1013]) -Total rewards: 4464 -Total Receives: 4458 -Stores 3250 1 -Total Receives: 4459 -Stores 3251 1 -Total Receives: 4460 -Stores 3252 1 -Total Receives: 4461 -Stores 3253 1 -Total Receives: 4462 -Stores 3254 1 -Total Receives: 4463 -Stores 3255 1 -Total Receives: 4464 -Stores 3256 1 -tensor([-0.0039, -0.0038, -0.0057]) -tensor([-0.2038, -0.0591, -0.1441, -0.0980, -0.0707]) -tensor([0.0223, 0.0228]) -tensor([-0.2115, -0.2012, -0.2005, -0.2002, -0.1871]) -tensor([0.0319, 0.0326, 0.0300]) -tensor([0.0285, 0.0285]) -tensor([0.0269, 0.0249]) -tensor([-0.2390, -0.2201, -0.2885, -0.1944, -0.2836, -0.2230]) -tensor([-0.1061, -0.1050, -0.0750, -0.0958, -0.0838, -0.1192]) -tensor([0.0032, 0.0034, 0.0054]) -tensor([-0.0156, -0.0252, -0.0307]) -tensor([0.0269, 0.0269]) -tensor([-0.0807, -0.0629, -0.0499, -0.0817]) -tensor([-0.1230, -0.1261]) -tensor([0.0349, 0.0355]) -tensor([-0.8060, -0.7973]) -tensor([-0.0932, -0.0931, -0.0595, -0.0909, -0.0583]) -tensor([0.0330, 0.0541, 0.0303]) -Total rewards: 4468 -Total Receives: 4465 -Stores 3257 1 -Total Receives: 4466 -Stores 3258 1 -Total Receives: 4467 -Stores 3259 1 -Total Receives: 4468 -Stores 3260 1 -tensor([0.0155, 0.0141, 0.0148, 0.0179]) -tensor([0.0218, 0.0215]) -Total rewards: 4471 -Total Receives: 4469 -Stores 3261 1 -Total Receives: 4470 -Stores 3262 1 -Total Receives: 4471 -Stores 3263 1 -tensor([0.0299, 0.0291]) -tensor([0.0864, 0.0498]) -tensor([-0.0988, -0.1279, -0.1168, -0.1163, -0.0853, -0.1152]) -tensor([-0.1867, -0.1369, -0.1346, -0.2265, -0.2222, -0.1493]) -tensor([-0.1450, -0.0721, -0.1080]) -Total rewards: 4475 -Total Receives: 4472 -Stores 3264 1 -Total Receives: 4473 -Stores 3265 1 -Total Receives: 4474 -Stores 3266 1 -Total Receives: 4475 -Stores 3267 1 -tensor([-0.2341, -0.1773, -0.0965, -0.1334, -0.0859, -0.0859]) -tensor([-0.0541, -0.0787, -0.0709, -0.0703, -0.0428, -0.0689]) -Total rewards: 4480 -Total Receives: 4476 -Stores 3268 1 -Total Receives: 4477 -Stores 3269 1 -Total Receives: 4478 -Stores 3270 1 -Total Receives: 4479 -Stores 3271 1 -Total Receives: 4480 -Stores 3272 1 -tensor([0.0113, 0.0110]) -tensor([-0.1099, -0.1103, -0.0756, -0.1058]) -tensor([0.0333, 0.0296]) -Total rewards: 4486 -Total Receives: 4481 -Stores 3273 1 -Total Receives: 4482 -Stores 3274 1 -Total Receives: 4483 -Stores 3275 1 -Total Receives: 4484 -Stores 3276 1 -Total Receives: 4485 -Stores 3277 1 -Total Receives: 4486 -Stores 3278 1 -tensor([0.0291, 0.0309]) -Total rewards: 4494 -Total Receives: 4487 -Stores 3279 1 -Total Receives: 4488 -Stores 3280 1 -Total Receives: 4489 -Stores 3281 1 -Total Receives: 4490 -Stores 3282 1 -Total Receives: 4491 -Stores 3283 1 -Total Receives: 4492 -Stores 3284 1 -Total Receives: 4493 -Stores 3285 1 -Total Receives: 4494 -Stores 3286 1 -tensor([-0.0125, -0.0118]) -tensor([0.0293, 0.0296]) -tensor([-0.0088, -0.0037]) -Total rewards: 4501 -Total Receives: 4495 -Stores 3287 1 -Total Receives: 4496 -Stores 3288 1 -Total Receives: 4497 -Stores 3289 1 -Total Receives: 4498 -Stores 3290 1 -Total Receives: 4499 -Stores 3291 1 -Total Receives: 4500 -Stores 3292 1 -Total Receives: 4501 -Stores 3293 1 -tensor([-0.1550, -0.0948, -0.1244, -0.0824, -0.0824]) -Total rewards: 4509 -Total Receives: 4502 -Stores 3294 1 -Total Receives: 4503 -Stores 3295 1 -Total Receives: 4504 -Stores 3296 1 -Total Receives: 4505 -Stores 3297 1 -Total Receives: 4506 -Stores 3298 1 -Total Receives: 4507 -Stores 3299 1 -Total Receives: 4508 -Stores 3300 1 -Total Receives: 4509 -Stores 3301 1 -tensor([-0.1448, -0.1434, -0.1346, -0.1226, -0.1588]) -Total rewards: 4514 -Total Receives: 4510 -Stores 3302 1 -Total Receives: 4511 -Stores 3303 1 -Total Receives: 4512 -Stores 3304 1 -Total Receives: 4513 -Stores 3305 1 -Total Receives: 4514 -Stores 3306 1 -tensor([-0.0812, -0.0772, -0.0687]) -tensor([-0.0628, -0.0828]) -tensor([-0.0182, -0.0099, -0.0059, -0.0059]) -tensor([0.0283, 0.0291]) -tensor([-0.1515, -0.1502, -0.1421, -0.1619]) -tensor([0.0039, 0.0012, 0.0041]) -tensor([-0.1221, -0.0825, -0.1126, -0.1258, -0.0790]) -Total rewards: 4520 -Total Receives: 4515 -Stores 3307 1 -Total Receives: 4516 -Stores 3308 1 -Total Receives: 4517 -Stores 3309 1 -Total Receives: 4518 -Stores 3310 1 -Total Receives: 4519 -Stores 3311 1 -Total Receives: 4520 -Stores 3312 1 -tensor([-0.0760, -0.1619, -0.0940, -0.2005, -0.1927, -0.1078]) -tensor([-0.0997, -0.0524, -0.0858, -0.0493]) -Total rewards: 4528 -Total Receives: 4521 -Stores 3313 1 -Total Receives: 4522 -Stores 3314 1 -Total Receives: 4523 -Stores 3315 1 -Total Receives: 4524 -Stores 3316 1 -Total Receives: 4525 -Stores 3317 1 -Total Receives: 4526 -Stores 3318 1 -Total Receives: 4527 -Stores 3319 1 -Total Receives: 4528 -Stores 3320 1 -tensor([-0.1677, -0.1144, -0.1403, -0.0985]) -Total rewards: 4532 -Total Receives: 4529 -Stores 3321 1 -Total Receives: 4530 -Stores 3322 1 -Total Receives: 4531 -Stores 3323 1 -Total Receives: 4532 -Stores 3324 1 -tensor([-25.0834, -25.0705, -25.1002, -25.0524]) -tensor([-8.9045, -8.3434]) -tensor([-14.5997, -14.6337, -14.6005, -14.6061, -14.6235]) -tensor([-0.1080, -0.1036, -0.1422, -0.1210, -0.1077, -0.1062]) -tensor([-7.0121, -7.0259, -7.0039, -7.0090, -7.0258]) -tensor([-9.6246, -9.6415]) -Total rewards: 2872 -Total Receives: 2866 -Stores 2113 1 -Total Receives: 2867 -Stores 2114 1 -Total Receives: 2868 -Stores 2115 1 -Total Receives: 2869 -Stores 2116 1 -Total Receives: 2870 -Stores 2117 1 -Total Receives: 2871 -Stores 2118 1 -Total Receives: 2872 -Stores 2119 1 -tensor([-8.6430, -8.6673]) -tensor([-29.4069, -29.3936, -29.4227]) -tensor([-8.3772, -8.3566, -8.3574, -8.4006, -8.4006, -8.3946]) -tensor([-22.8159, -22.7907, -22.7883]) -tensor([-20.3407, -20.3176]) -tensor([-35.7306, -35.8222, -35.8333, -35.8210]) -tensor([-22.5519, -22.5790, -22.5482, -22.5642, -22.5475, -22.5799, -22.5431, - -22.5946, -22.5790, -22.5659, -22.5946]) -tensor([-7.3879, -7.3891]) -tensor([-19.0589, -19.0417, -19.0567, -19.0381, -19.0352, -19.0541]) -tensor([-6.9579, -6.9601]) -tensor([-5.8958, -5.9007]) -Total rewards: 2877 -Total Receives: 2873 -Stores 2120 1 -Total Receives: 2874 -Stores 2121 1 -Total Receives: 2875 -Stores 2122 1 -Total Receives: 2876 -Stores 2123 1 -Total Receives: 2877 -Stores 2124 1 -tensor([-0.1260, -0.1833, -0.1555, -0.1302, -0.1291]) -tensor([-0.0594, -0.0605, -0.0387]) -tensor([-26.0955, -26.1077]) -tensor([-32.1023, -32.1133, -32.0916, -32.1337]) -tensor([-30.2179, -30.2329, -30.2186]) -tensor([-14.4216, -14.4564, -14.4229, -14.4930, -14.4453, -14.4966]) -tensor([-32.3568, -32.3567]) -tensor([-18.0391, -18.0065, -18.0384, -18.0078, -18.0231, -18.0126, -18.0406, - -18.0627, -18.0564, -18.0365, -18.0627]) -tensor([-46.2205, -46.2106, -46.2068, -46.2077, -46.2831, -46.3206, -46.3223, - -46.3242, -46.3236, -46.2846, -46.3326, -46.3327]) -Total rewards: 2886 -Total Receives: 2878 -Stores 2125 1 -Total Receives: 2879 -Stores 2126 1 -Total Receives: 2880 -Stores 2127 1 -Total Receives: 2881 -Stores 2128 1 -Total Receives: 2882 -Stores 2129 1 -Total Receives: 2883 -Stores 2130 1 -Total Receives: 2884 -Stores 2131 1 -Total Receives: 2885 -Stores 2132 1 -Total Receives: 2886 -Stores 2133 1 -tensor([-43.1896, -43.1795, -43.1734, -43.2598, -43.2993, -43.3005, -43.3024, - -43.3017, -43.2616, -43.3147, -43.3147]) -tensor([-6.9537, -6.9873, -6.9422, -6.9873, -6.9802]) -tensor([-26.8214, -26.8310, -26.8109, -26.8522]) -tensor([-11.7462, -11.7143, -11.7852, -11.7498, -11.7921, -11.7975]) -tensor([-8.9351, -8.9310, -8.9432]) -tensor([-0.0735, -0.0761]) -tensor([-5.9159, -5.9499, -5.9499, -5.9412]) -tensor([-39.9455, -39.9345, -39.9294, -40.0161, -40.0588, -40.0597, -40.0619, - -40.0206, -40.0814, -40.0814]) -tensor([-8.8656, -8.8636]) -Total rewards: 2889 -Total Receives: 2887 -Stores 2134 1 -Total Receives: 2888 -Stores 2135 1 -Total Receives: 2889 -Stores 2136 1 -tensor([-8.5448, -8.5047, -8.4939, -8.4877, -8.5164, -8.5105]) -tensor([-10.5102, -10.5447, -10.5149, -10.5529, -10.5626]) -Total rewards: 2898 -Total Receives: 2890 -Stores 2137 1 -Total Receives: 2891 -Stores 2138 1 -Total Receives: 2892 -Stores 2139 1 -Total Receives: 2893 -Stores 2140 1 -Total Receives: 2894 -Stores 2141 1 -Total Receives: 2895 -Stores 2142 1 -Total Receives: 2896 -Stores 2143 1 -Total Receives: 2897 -Stores 2144 1 -Total Receives: 2898 -Stores 2145 1 -tensor([-12.9220, -12.9344]) -tensor([-17.4041, -17.3927, -17.4095]) -tensor([-6.9326, -6.9849, -6.9884]) -tensor([-36.3742, -36.3621, -36.4350, -36.4678, -36.4727, -36.4744, -36.4397, - -36.4983, -36.4983]) -tensor([-3.2838, -3.3011, -3.2929, -3.2873, -3.2968, -3.3156]) -tensor([-0.1521, -0.1325, -0.1183, -0.0931, -0.1047, -0.1317]) -tensor([-5.6236, -5.5746, -5.5690, -5.5948, -5.5878]) -tensor([-4.7515, -4.8028]) -tensor([-3.6035, -3.5993, -3.6047, -3.6047, -3.5834]) -tensor([-26.5903, -26.5989, -26.5773, -26.5965, -26.5614]) -tensor([-14.4225, -14.4279]) -tensor([-23.8747, -23.8922, -23.8700]) -tensor([-0.1757, -0.1557, -0.1301, -0.1277, -0.1422]) -tensor([-25.0526, -25.0745, -25.0945, -25.0849]) -tensor([-4.2289, -4.2460]) -tensor([-8.5879, -8.5564, -8.5974, -8.6162]) -tensor([-18.4084, -18.3980, -18.4059, -18.3765, -18.4024, -18.4051, -18.4509, - -18.4328, -18.4244, -18.4089, -18.4509]) -Total rewards: 2905 -Total Receives: 2899 -Stores 2146 1 -Total Receives: 2900 -Stores 2147 1 -Total Receives: 2901 -Stores 2148 1 -Total Receives: 2902 -Stores 2149 1 -Total Receives: 2903 -Stores 2150 1 -Total Receives: 2904 -Stores 2151 1 -Total Receives: 2905 -Stores 2152 1 -tensor([-20.1340, -20.1562, -20.1465]) -tensor([-27.1153, -27.1241, -27.1274]) -Total rewards: 2907 -Total Receives: 2906 -Stores 2153 1 -Total Receives: 2907 -Stores 2154 1 -tensor([-0.2655, -0.2696, -0.2744]) -tensor([-21.0221, -20.9943, -21.0156]) -Total rewards: 2912 -Total Receives: 2908 -Stores 2155 1 -Total Receives: 2909 -Stores 2156 1 -Total Receives: 2910 -Stores 2157 1 -Total Receives: 2911 -Stores 2158 1 -Total Receives: 2912 -Stores 2159 1 -Total rewards: 2920 -Total Receives: 2913 -Stores 2160 1 -Total Receives: 2914 -Stores 2161 1 -Total Receives: 2915 -Stores 2162 1 -Total Receives: 2916 -Stores 2163 1 -Total Receives: 2917 -Stores 2164 1 -Total Receives: 2918 -Stores 2165 1 -Total Receives: 2919 -Stores 2166 1 -Total Receives: 2920 -Stores 2167 1 -tensor([-0.2423, -0.2450]) -Total rewards: 2925 -Total Receives: 2921 -Stores 2168 1 -Total Receives: 2922 -Stores 2169 1 -Total Receives: 2923 -Stores 2170 1 -Total Receives: 2924 -Stores 2171 1 -Total Receives: 2925 -Stores 2172 1 -tensor([-2.7358, -2.7044, -2.6924, -2.6923, -2.6946, -2.7368, -2.7173]) -tensor([-2.6819, -2.6994]) -tensor([-5.0390, -4.9910, -5.0482, -5.0198, -5.0047]) -tensor([-30.7067, -30.7073]) -Total rewards: 2933 -Total Receives: 2926 -Stores 2173 1 -Total Receives: 2927 -Stores 2174 1 -Total Receives: 2928 -Stores 2175 1 -Total Receives: 2929 -Stores 2176 1 -Total Receives: 2930 -Stores 2177 1 -Total Receives: 2931 -Stores 2178 1 -Total Receives: 2932 -Stores 2179 1 -Total Receives: 2933 -Stores 2180 1 -tensor([-42.3326, -42.3929, -42.4297, -42.4704, -42.4374, -42.4370, -42.4648, - -42.3962, -42.4866, -42.4795, -42.4866]) -tensor([-23.6316, -23.6258, -23.6542]) -tensor([-34.0322, -34.0414, -34.0282, -34.0557, -34.0557]) -tensor([-0.2207, -0.2020, -0.1917, -0.1979]) -Total rewards: 2938 -Total Receives: 2934 -Stores 2181 1 -Total Receives: 2935 -Stores 2182 1 -Total Receives: 2936 -Stores 2183 1 -Total Receives: 2937 -Stores 2184 1 -Total Receives: 2938 -Stores 2185 1 -tensor([-3.0324, -3.0284, -3.0446, -3.0324, -3.0446]) -tensor([-1.9928, -1.9849]) -Total rewards: 2941 -Total Receives: 2939 -Stores 2186 1 -Total Receives: 2940 -Stores 2187 1 -Total Receives: 2941 -Stores 2188 1 -tensor([-22.3767, -22.4104]) -Total rewards: 2950 -Total Receives: 2942 -Stores 2189 1 -Total Receives: 2943 -Stores 2190 1 -Total Receives: 2944 -Stores 2191 1 -Total Receives: 2945 -Stores 2192 1 -Total Receives: 2946 -Stores 2193 1 -Total Receives: 2947 -Stores 2194 1 -Total Receives: 2948 -Stores 2195 1 -Total Receives: 2949 -Stores 2196 1 -Total Receives: 2950 -Stores 2197 1 -tensor([-40.3998, -40.5480, -40.4619, -40.4994, -40.5050, -40.5165, -40.5304, - -40.4642, -40.5593, -40.5452, -40.5593, -40.5505]) -tensor([-19.1537, -19.1306, -19.1264, -19.1275]) -tensor([-25.7532, -25.7481, -25.7555, -25.7464, -25.7531, -25.8243, -25.7818, - -25.7721, -25.7597, -25.8243]) -tensor([-11.5971, -11.5977, -11.6300]) -Total rewards: 2953 -Total Receives: 2951 -Stores 2198 1 -Total Receives: 2952 -Stores 2199 1 -Total Receives: 2953 -Stores 2200 1 -tensor([-26.4889, -26.4855, -26.4926, -26.4839, -26.4896, -26.5634, -26.5150, - -26.5012, -26.5634]) -Total rewards: 2959 -Total Receives: 2954 -Stores 2201 1 -Total Receives: 2955 -Stores 2202 1 -Total Receives: 2956 -Stores 2203 1 -Total Receives: 2957 -Stores 2204 1 -Total Receives: 2958 -Stores 2205 1 -Total Receives: 2959 -Stores 2206 1 -tensor([-2.5828, -2.5384, -2.5419, -2.5441, -2.5909, -2.5766]) -tensor([-0.3570, -0.3362]) -tensor([-4.5511, -4.5086, -4.5694, -4.5338, -4.5764, -4.5182]) -tensor([-2.5872, -2.6064, -2.5896, -2.5970, -2.6064]) -tensor([-0.1833, -0.2625, -0.2431, -0.2318, -0.1842]) -tensor([-2.0673, -2.0697]) -tensor([-0.3308, -0.3351]) -tensor([-3.0640, -3.0625]) -tensor([-0.3091, -0.3165]) -tensor([-31.8452, -31.8260, -31.8190]) -tensor([-0.1197, -0.0679, -0.0718]) -tensor([0.0375, 0.0352]) -tensor([-0.0672, -0.0924, -0.0751, -0.0417, -0.0791, -0.0818]) -tensor([0.0268, 0.0267]) -tensor([-0.0570, -0.0560, -0.0500, -0.0666]) -tensor([-0.1762, -0.1748, -0.1861]) -tensor([0.0060, 0.0065]) -tensor([-0.0106, -0.0070]) -tensor([-0.0425, -0.0411]) -tensor([0.0114, 0.0085]) -tensor([0.0314, 0.0283, 0.0386]) -tensor([-0.1420, -0.0950, -0.0862, -0.0917]) -tensor([-0.0594, -0.0522, -0.0348, -0.0337]) -tensor([-0.1300, -0.1161, -0.1212]) -tensor([0.0360, 0.0361, 0.0366]) -Total rewards: 4539 -Total Receives: 4533 -Stores 3325 1 -Total Receives: 4534 -Stores 3326 1 -Total Receives: 4535 -Stores 3327 1 -Total Receives: 4536 -Stores 3328 1 -Total Receives: 4537 -Stores 3329 1 -Total Receives: 4538 -Stores 3330 1 -Total Receives: 4539 -Stores 3331 1 -tensor([-0.2613, -0.2207, -0.3048, -0.2995, -0.2281, -0.1920]) -tensor([-0.0666, -0.0575, -0.0653, -0.0778]) -tensor([0.0146, 0.0159]) -Total rewards: 4541 -Total Receives: 4540 -Stores 3332 1 -Total Receives: 4541 -Stores 3333 1 -Total rewards: 4544 -Total Receives: 4542 -Stores 3334 1 -Total Receives: 4543 -Stores 3335 1 -Total Receives: 4544 -Stores 3336 1 -tensor([-0.0498, -0.0565, -0.1040, -0.0604, -0.0663, -0.0471, -0.0589]) -tensor([0.0384, 0.0386]) -tensor([-0.1363, -0.0899, -0.0821, -0.0738, -0.0745]) -tensor([-0.1101, -0.1317, -0.1145, -0.1229, -0.1219]) -tensor([0.0159, 0.0136, 0.0101]) -tensor([-0.0263, -0.0316]) -tensor([-0.1415, -0.0786, -0.1409, -0.0865]) -Total rewards: 4551 -Total Receives: 4545 -Stores 3337 1 -Total Receives: 4546 -Stores 3338 1 -Total Receives: 4547 -Stores 3339 1 -Total Receives: 4548 -Stores 3340 1 -Total Receives: 4549 -Stores 3341 1 -Total Receives: 4550 -Stores 3342 1 -Total Receives: 4551 -Stores 3343 1 -tensor([-0.5859, -0.6003, -0.6136, -0.6275, -0.6263, -0.5986, -0.6264, -0.6238, - -0.6258]) -tensor([-0.0567, -0.0614, -0.0993, -0.0652, -0.0707, -0.0652]) -tensor([0.0397, 0.0396]) -tensor([-0.0974, -0.0788, -0.0813, -0.0718, -0.0692]) -tensor([0.0142, 0.0137, 0.0112]) -tensor([-0.0249, -0.0261, -0.0225]) -tensor([-0.0649, -0.1025, -0.0677, -0.0496, -0.0725, -0.0686]) -tensor([0.0230, 0.0345]) -tensor([-0.4655, -0.4525, -0.5200, -0.4528, -0.4552, -0.4528, -0.4558, -0.4502]) -Total rewards: 4555 -Total Receives: 4552 -Stores 3344 1 -Total Receives: 4553 -Stores 3345 1 -Total Receives: 4554 -Stores 3346 1 -Total Receives: 4555 -Stores 3347 1 -tensor([-0.1590, -0.1312, -0.1629]) -tensor([-0.1080, -0.0878, -0.0904, -0.0795]) -tensor([-0.1720, -0.1535, -0.1637, -0.1627]) -Total rewards: 4561 -Total Receives: 4556 -Stores 3348 1 -Total Receives: 4557 -Stores 3349 1 -Total Receives: 4558 -Stores 3350 1 -Total Receives: 4559 -Stores 3351 1 -Total Receives: 4560 -Stores 3352 1 -Total Receives: 4561 -Stores 3353 1 -tensor([-0.0733, -0.1082, -0.0764, -0.0805, -0.0773]) -tensor([-0.0083, -0.0040]) -tensor([0.0306, 0.0301]) -tensor([-0.0919, -0.0784, -0.1053]) -Total rewards: 4567 -Total Receives: 4562 -Stores 3354 1 -Total Receives: 4563 -Stores 3355 1 -Total Receives: 4564 -Stores 3356 1 -Total Receives: 4565 -Stores 3357 1 -Total Receives: 4566 -Stores 3358 1 -Total Receives: 4567 -Stores 3359 1 -tensor([0.0024, 0.0003]) -tensor([-0.1670, -0.1292, -0.1247, -0.0902, -0.1140]) -tensor([-0.0853, -0.0598, -0.0504, -0.0431]) -tensor([-0.5584, -0.5552, -0.6152, -0.5497, -0.5515, -0.5507, -0.5543, -0.5466]) -tensor([-0.1650, -0.0909, -0.1489, -0.0829, -0.1014, -0.0988]) -tensor([0.0278, 0.0315]) -tensor([-0.0593, -0.0640, -0.0616, -0.0620]) -tensor([-0.0480, -0.0503, -0.0458]) -Total rewards: 4575 -Total Receives: 4568 -Stores 3360 1 -Total Receives: 4569 -Stores 3361 1 -Total Receives: 4570 -Stores 3362 1 -Total Receives: 4571 -Stores 3363 1 -Total Receives: 4572 -Stores 3364 1 -Total Receives: 4573 -Stores 3365 1 -Total Receives: 4574 -Stores 3366 1 -Total Receives: 4575 -Stores 3367 1 -tensor([-0.0102, -0.0166]) -tensor([-0.1876, -0.1706, -0.1808]) -tensor([0.0092, 0.0104, 0.0079]) -tensor([0.0222, 0.0286]) -tensor([0.0402, 0.0394, 0.0391]) -Total rewards: 4581 -Total Receives: 4576 -Stores 3368 1 -Total Receives: 4577 -Stores 3369 1 -Total Receives: 4578 -Stores 3370 1 -Total Receives: 4579 -Stores 3371 1 -Total Receives: 4580 -Stores 3372 1 -Total Receives: 4581 -Stores 3373 1 -tensor([-0.0470, -0.0468, -0.0454]) -tensor([0.0404, 0.0359]) -tensor([-0.0763, -0.0799]) -tensor([0.0078, 0.0091]) -tensor([0.0087, 0.0065]) -tensor([-0.0473, -0.0460]) -tensor([-0.0422, -0.0339]) -tensor([-0.5606, -0.5533, -0.6204, -0.5529, -0.5539, -0.5549, -0.5534, -0.5570]) -tensor([-0.1959, -0.1665, -0.1626, -0.1268]) -tensor([0.0059, 0.0046]) -tensor([-0.0052, -0.0041, -0.0037, 0.0010]) -Total rewards: 4588 -Total Receives: 4582 -Stores 3374 1 -Total Receives: 4583 -Stores 3375 1 -Total Receives: 4584 -Stores 3376 1 -Total Receives: 4585 -Stores 3377 1 -Total Receives: 4586 -Stores 3378 1 -Total Receives: 4587 -Stores 3379 1 -Total Receives: 4588 -Stores 3380 1 -tensor([-0.2141, -0.1406, -0.1968, -0.1536, -0.1414, -0.1511]) -tensor([-0.1595, -0.1376, -0.0902]) -tensor([0.0380, 0.0380]) -tensor([0.0053, 0.0058]) -Total rewards: 4596 -Total Receives: 4589 -Stores 3381 1 -Total Receives: 4590 -Stores 3382 1 -Total Receives: 4591 -Stores 3383 1 -Total Receives: 4592 -Stores 3384 1 -Total Receives: 4593 -Stores 3385 1 -Total Receives: 4594 -Stores 3386 1 -Total Receives: 4595 -Stores 3387 1 -Total Receives: 4596 -Stores 3388 1 -tensor([0.0030, 0.0027]) -tensor([-0.1286, -0.0956, -0.0909, -0.0482, -0.0545, -0.0532]) -tensor([0.0153, 0.0144]) -tensor([-0.0247, -0.0311, -0.0248, -0.0351]) -tensor([-0.0256, -0.0228]) -tensor([-0.0942, -0.0785, -0.0770]) -tensor([-0.0078, -0.0050, -0.0052]) -tensor([-0.1016, -0.0910, -0.0946]) -Total rewards: 4604 -Total Receives: 4597 -Stores 3389 1 -Total Receives: 4598 -Stores 3390 1 -Total Receives: 4599 -Stores 3391 1 -Total Receives: 4600 -Stores 3392 1 -Total Receives: 4601 -Stores 3393 1 -Total Receives: 4602 -Stores 3394 1 -Total Receives: 4603 -Stores 3395 1 -Total Receives: 4604 -Stores 3396 1 -tensor([-0.3010, -0.3544, -0.1948, -0.1939, -0.2028, -0.2483, -0.1990, -0.2070, - -0.2156]) -tensor([-0.0757, -0.0767, -0.0767, -0.0618, -0.0625]) -tensor([0.0363, 0.0278]) -Total rewards: 4609 -Total Receives: 4605 -Stores 3397 1 -Total Receives: 4606 -Stores 3398 1 -Total Receives: 4607 -Stores 3399 1 -Total Receives: 4608 -Stores 3400 1 -Total Receives: 4609 -Stores 3401 1 -tensor([0.0237, 0.0200]) -Total rewards: 4611 -Total Receives: 4610 -Stores 3402 1 -Total Receives: 4611 -Stores 3403 1 -Total rewards: 4618 -Total Receives: 4612 -Stores 3404 1 -Total Receives: 4613 -Stores 3405 1 -Total Receives: 4614 -Stores 3406 1 -Total Receives: 4615 -Stores 3407 1 -Total Receives: 4616 -Stores 3408 1 -Total Receives: 4617 -Stores 3409 1 -Total Receives: 4618 -Stores 3410 1 -tensor([0.0125, 0.0199]) -tensor([-0.0149, -0.0202, -0.0204, -0.0063]) -tensor([-0.0177, -0.0187, -0.0132]) -Total rewards: 4625 -Total Receives: 4619 -Stores 3411 1 -Total Receives: 4620 -Stores 3412 1 -Total Receives: 4621 -Stores 3413 1 -Total Receives: 4622 -Stores 3414 1 -Total Receives: 4623 -Stores 3415 1 -Total Receives: 4624 -Stores 3416 1 -Total Receives: 4625 -Stores 3417 1 -tensor([-0.0193, -0.0180, -0.0202, -0.0168]) -tensor([0.0338, 0.0322]) -tensor([-0.2003, -0.1292, -0.1809, -0.1175, -0.1325, -0.1422, -0.1245]) -Total rewards: 4629 -Total Receives: 4626 -Stores 3418 1 -Total Receives: 4627 -Stores 3419 1 -Total Receives: 4628 -Stores 3420 1 -Total Receives: 4629 -Stores 3421 1 -tensor([-0.0143, -0.0152, -0.0155]) -tensor([-0.2190, -0.1275, -0.2006, -0.1222, -0.1326, -0.1146]) -tensor([-0.1125, -0.1026, -0.0871]) -tensor([-0.0845, -0.1111, -0.0915]) -tensor([-0.0919, -0.0837]) -tensor([-0.0223, -0.0244, -0.0207]) -tensor([ 0.0010, -0.0067]) -tensor([-0.1347, -0.1313, -0.1313]) -tensor([-0.0171, -0.0147, -0.0138, -0.0203]) -tensor([-0.0234, -0.0193]) -Total rewards: 4636 -Total Receives: 4630 -Stores 3422 1 -Total Receives: 4631 -Stores 3423 1 -Total Receives: 4632 -Stores 3424 1 -Total Receives: 4633 -Stores 3425 1 -Total Receives: 4634 -Stores 3426 1 -Total Receives: 4635 -Stores 3427 1 -Total Receives: 4636 -Stores 3428 1 -tensor([0.0467, 0.0291]) -tensor([-0.0238, -0.0314, -0.0255]) -tensor([-0.3773, -0.4394, -0.2725, -0.2720, -0.3239, -0.2771, -0.2838, -0.2928]) -tensor([0.0318, 0.0320, 0.0315]) -tensor([-0.0196, -0.0180, -0.0190, -0.0198]) -tensor([-0.0019, -0.0019]) -tensor([0.0312, 0.0248]) -tensor([-0.0189, -0.0188, -0.0177]) -tensor([0.0305, 0.0290]) -tensor([-0.1753, -0.1373]) -tensor([0.0332, 0.0319]) -tensor([ 0.0015, -0.0021, -0.0012]) -tensor([-0.0463, -0.0459]) -tensor([-0.6560, -0.5724, -0.5723, -0.5704, -0.5769, -0.5731, -0.5759]) -tensor([-0.1930, -0.1006, -0.1725, -0.0479, -0.1012, -0.1104, -0.0761]) -tensor([0.0253, 0.0239]) -tensor([-0.0916, -0.0923]) -tensor([-0.1379, -0.1379, -0.1472, -0.1303, -0.1358]) -tensor([0.0304, 0.0295, 0.0303]) -tensor([-0.2078, -0.1270, -0.1869, -0.1280, -0.1353, -0.1079]) -tensor([0.0025, 0.0021, 0.0096, 0.0102]) -tensor([0.0476, 0.0259, 0.0264]) -Total rewards: 4641 -Total Receives: 4637 -Stores 3429 1 -Total Receives: 4638 -Stores 3430 1 -Total Receives: 4639 -Stores 3431 1 -Total Receives: 4640 -Stores 3432 1 -Total Receives: 4641 -Stores 3433 1 -tensor([-0.0306, -0.0291, -0.0284, -0.0351]) -tensor([-0.0352, -0.0327]) -tensor([0.0292, 0.0307, 0.0284]) -tensor([-0.1438, -0.1407, -0.1571, -0.1399]) -Total rewards: 4644 -Total Receives: 4642 -Stores 3434 1 -Total Receives: 4643 -Stores 3435 1 -Total Receives: 4644 -Stores 3436 1 -tensor([0.0301, 0.0305, 0.0323]) -tensor([-0.0454, -0.0881, -0.0430, -0.0368]) -tensor([0.0334, 0.0340]) -tensor([0.0346, 0.0335]) -tensor([-0.1452, -0.1559, -0.1446]) -tensor([0.0397, 0.0409]) -Total rewards: 4649 -Total Receives: 4645 -Stores 3437 1 -Total Receives: 4646 -Stores 3438 1 -Total Receives: 4647 -Stores 3439 1 -Total Receives: 4648 -Stores 3440 1 -Total Receives: 4649 -Stores 3441 1 -tensor([-0.1562, -0.1065, -0.1164, -0.1166, -0.1100]) -tensor([-0.2369, -0.1730, -0.2185, -0.1713, -0.1811]) -tensor([0.0281, 0.0222]) -tensor([0.0177, 0.0167]) -Total rewards: 4653 -Total Receives: 4650 -Stores 3442 1 -Total Receives: 4651 -Stores 3443 1 -Total Receives: 4652 -Stores 3444 1 -Total Receives: 4653 -Stores 3445 1 -tensor([0.0072, 0.0049, 0.0046]) -tensor([-0.0207, -0.0100, -0.0197, -0.0195]) -tensor([0.0296, 0.0282, 0.0291]) -tensor([-2.1582, -2.1117, -2.1370, -2.1409, -2.1319, -2.1204, -2.1178, -2.1600, - -2.1650]) -tensor([-0.0130, -0.0138, -0.0101]) -tensor([-0.1132, -0.1712, -0.1110]) -tensor([0.0391, 0.0391, 0.0415]) -tensor([0.0224, 0.0188]) -tensor([0.0163, 0.0163]) -Total rewards: 4660 -Total Receives: 4654 -Stores 3446 1 -Total Receives: 4655 -Stores 3447 1 -Total Receives: 4656 -Stores 3448 1 -Total Receives: 4657 -Stores 3449 1 -Total Receives: 4658 -Stores 3450 1 -Total Receives: 4659 -Stores 3451 1 -Total Receives: 4660 -Stores 3452 1 -tensor([-0.0286, -0.0694, -0.0300, -0.0295, -0.0435, -0.0353]) -Total rewards: 4662 -Total Receives: 4661 -Stores 3453 1 -Total Receives: 4662 -Stores 3454 1 -tensor([0.0247, 0.0198]) -tensor([-0.1023, -0.1206, -0.1303]) -tensor([0.0341, 0.0316]) -tensor([-0.0438, -0.0441, -0.0448]) -Total rewards: 4668 -Total Receives: 4663 -Stores 3455 1 -Total Receives: 4664 -Stores 3456 1 -Total Receives: 4665 -Stores 3457 1 -Total Receives: 4666 -Stores 3458 1 -Total Receives: 4667 -Stores 3459 1 -Total Receives: 4668 -Stores 3460 1 -tensor([-0.1178, -0.1307, -0.1172, -0.1190]) -Total rewards: 4671 -Total Receives: 4669 -Stores 3461 1 -Total Receives: 4670 -Stores 3462 1 -Total Receives: 4671 -Stores 3463 1 -tensor([-0.0492, -0.0846, -0.0517, -0.0638, -0.0563]) -tensor([0.0320, 0.0304]) -tensor([0.0370, 0.0300, 0.0147]) -tensor([-0.0640, -0.0751, -0.0670]) -tensor([-0.1850, -0.1167, -0.0917, -0.0973, -0.0853]) -tensor([0.0404, 0.0349]) -tensor([-2.5430, -2.5167, -2.5209, -2.5129, -2.4947, -2.5297, -2.4907, -2.5600, - -2.5449, -2.5598, -2.5633]) -tensor([-0.1212, -0.1210]) -tensor([-0.0741, -0.1642, -0.1034, -0.0798, -0.0865]) -tensor([0.0290, 0.0282]) -Total rewards: 4675 -Total Receives: 4672 -Stores 3464 1 -Total Receives: 4673 -Stores 3465 1 -Total Receives: 4674 -Stores 3466 1 -Total Receives: 4675 -Stores 3467 1 -tensor([-0.0699, -0.0749, -0.0721]) -Total rewards: 4679 -Total Receives: 4676 -Stores 3468 1 -Total Receives: 4677 -Stores 3469 1 -Total Receives: 4678 -Stores 3470 1 -Total Receives: 4679 -Stores 3471 1 -tensor([0.0320, 0.0283]) -tensor([-0.0787, -0.0775]) -tensor([-0.0076, -0.0003, 0.0025]) -tensor([-0.0606, -0.0648]) -tensor([-0.0896, -0.0857, -0.0475]) -tensor([-0.1335, -0.0345, -0.0818, -0.0597, -0.0653]) -tensor([-0.2183, -0.2834, -0.1950, -0.1185]) -tensor([-0.0306, -0.0274]) -Total rewards: 4682 -Total Receives: 4680 -Stores 3472 1 -Total Receives: 4681 -Stores 3473 1 -Total Receives: 4682 -Stores 3474 1 -tensor([0.0311, 0.0325]) -tensor([0.0413, 0.0407]) -tensor([-0.1174, -0.0821, -0.0645, -0.0971, -0.0885]) -Total rewards: 4687 -Total Receives: 4683 -Stores 3475 1 -Total Receives: 4684 -Stores 3476 1 -Total Receives: 4685 -Stores 3477 1 -Total Receives: 4686 -Stores 3478 1 -Total Receives: 4687 -Stores 3479 1 -tensor([-2.9285, -2.8939, -2.8988, -2.9489, -2.9543, -2.8789, -2.9429, -2.8959, - -2.9453, -2.9259, -2.9447, -2.9506, -2.9570]) -tensor([0.0389, 0.0382]) -tensor([-0.0317, -0.0348]) -tensor([0.0443, 0.0265, 0.0282]) -tensor([0.0350, 0.0365]) -tensor([0.0046, 0.0063]) -tensor([-0.0573, -0.0211, -0.0469, -0.0477, -0.0354]) -Total rewards: 4692 -Total Receives: 4688 -Stores 3480 1 -Total Receives: 4689 -Stores 3481 1 -Total Receives: 4690 -Stores 3482 1 -Total Receives: 4691 -Stores 3483 1 -Total Receives: 4692 -Stores 3484 1 -tensor([0.0224, 0.0229]) -tensor([-0.2092, -0.2738, -0.1849, -0.1071, -0.1048]) -Total rewards: 4697 -Total Receives: 4693 -Stores 3485 1 -Total Receives: 4694 -Stores 3486 1 -Total Receives: 4695 -Stores 3487 1 -Total Receives: 4696 -Stores 3488 1 -Total Receives: 4697 -Stores 3489 1 -tensor([0.0348, 0.0327]) -tensor([0.0289, 0.0285]) -tensor([-0.1400, -0.1329]) -Total rewards: 4701 -Total Receives: 4698 -Stores 3490 1 -Total Receives: 4699 -Stores 3491 1 -Total Receives: 4700 -Stores 3492 1 -Total Receives: 4701 -Stores 3493 1 -tensor([-0.1040, -0.0993]) -tensor([0.0334, 0.0354]) -tensor([-0.1176, -0.1239, -0.1239]) -tensor([-0.1044, -0.0939, -0.1117, -0.0995]) -tensor([-0.1631, -0.0964, -0.0976, -0.0703]) -tensor([0.0396, 0.0179]) -tensor([-0.2114, -0.2770, -0.1007, -0.1876, -0.1069]) -Total rewards: 4710 -Total Receives: 4702 -Stores 3494 1 -Total Receives: 4703 -Stores 3495 1 -Total Receives: 4704 -Stores 3496 1 -Total Receives: 4705 -Stores 3497 1 -Total Receives: 4706 -Stores 3498 1 -Total Receives: 4707 -Stores 3499 1 -Total Receives: 4708 -Stores 3500 1 -Total Receives: 4709 -Stores 3501 1 -Total Receives: 4710 -Stores 3502 1 -tensor([-0.0277, -0.0347, -0.0240]) -tensor([-2.9908, -2.9541, -2.9585, -3.0176, -3.0261, -3.0341, -3.0076, -2.9551, - -3.0432, -3.0115, -2.9812, -3.0110, -3.0355, -3.0176, -3.0479, -3.0459]) -tensor([0.0311, 0.0299, 0.0316]) -tensor([0.0308, 0.0366]) -Total rewards: 4714 -Total Receives: 4711 -Stores 3503 1 -Total Receives: 4712 -Stores 3504 1 -Total Receives: 4713 -Stores 3505 1 -Total Receives: 4714 -Stores 3506 1 -Total rewards: 4719 -Total Receives: 4715 -Stores 3507 1 -Total Receives: 4716 -Stores 3508 1 -Total Receives: 4717 -Stores 3509 1 -Total Receives: 4718 -Stores 3510 1 -Total Receives: 4719 -Stores 3511 1 -tensor([-0.0948, -0.1087, -0.1091, -0.0935]) -tensor([-0.0833, -0.0631, -0.0715, -0.0732]) -tensor([0.0244, 0.0263]) -tensor([-2.5244, -2.5016, -2.5037, -2.5521, -2.5625, -2.5376, -2.5145, -2.5727, - -2.5407, -2.5141, -2.5383, -2.5650, -2.5434, -2.5844, -2.5784]) -tensor([0.0349, 0.0326, 0.0373]) -tensor([0.0793, 0.0446]) -tensor([-0.0679, -0.0652]) -tensor([-0.0196, -0.0170, -0.0131]) -tensor([0.0404, 0.0326]) -tensor([-0.0558, -0.0675, -0.0850, -0.0847]) -tensor([-0.1383, -0.1275, -0.1218, -0.1493]) -tensor([-0.0598, -0.0647, -0.0643]) -tensor([-0.2542, -0.3147, -0.1564, -0.2377, -0.1754]) -tensor([-4.2344, -4.2142, -4.2642, -4.2780, -4.2500, -4.2271, -4.2797, -4.2565, - -4.2230, -4.2473, -4.2698, -4.2547, -4.2939, -4.2860, -4.2939]) -Total rewards: 4724 -Total Receives: 4720 -Stores 3512 1 -Total Receives: 4721 -Stores 3513 1 -Total Receives: 4722 -Stores 3514 1 -Total Receives: 4723 -Stores 3515 1 -Total Receives: 4724 -Stores 3516 1 -tensor([0.0376, 0.0372]) -tensor([0.0093, 0.0096]) -Total rewards: 4731 -Total Receives: 4725 -Stores 3517 1 -Total Receives: 4726 -Stores 3518 1 -Total Receives: 4727 -Stores 3519 1 -Total Receives: 4728 -Stores 3520 1 -Total Receives: 4729 -Stores 3521 1 -Total Receives: 4730 -Stores 3522 1 -Total Receives: 4731 -Stores 3523 1 -Total rewards: 4733 -Total Receives: 4732 -Stores 3524 1 -Total Receives: 4733 -Stores 3525 1 -tensor([0.0269, 0.0253, 0.0267]) -tensor([-36.8585, -36.8547, -36.8627, -36.8538, -36.9463, -36.8845, -36.8737, - -36.9463, -36.9463]) -tensor([-5.6404, -4.8894, -5.6521, -5.6215, -5.6597, -5.6136, -5.6764]) -tensor([-187.6032, -186.9096, -186.9207, -187.6292, -187.6291]) -tensor([-34.4735, -34.4942, -34.4886, -34.4944]) -tensor([-27.5888, -27.5713]) -tensor([-2.7275, -2.7261]) -Total rewards: 2963 -Total Receives: 2960 -Stores 2207 1 -Total Receives: 2961 -Stores 2208 1 -Total Receives: 2962 -Stores 2209 1 -Total Receives: 2963 -Stores 2210 1 -Total rewards: 2971 -Total Receives: 2964 -Stores 2211 1 -Total Receives: 2965 -Stores 2212 1 -Total Receives: 2966 -Stores 2213 1 -Total Receives: 2967 -Stores 2214 1 -Total Receives: 2968 -Stores 2215 1 -Total Receives: 2969 -Stores 2216 1 -Total Receives: 2970 -Stores 2217 1 -Total Receives: 2971 -Stores 2218 1 -tensor([-56.5239, -56.4396, -56.4762, -56.4889, -56.4874, -56.5092, -56.4461, - -56.5584, -56.5212, -56.5584, -56.5275, -56.5404]) -tensor([-3.6067, -3.5620, -3.5878, -3.6150, -3.6086]) -tensor([-43.3408, -43.3507]) -tensor([-20.0808, -20.1099, -20.1418, -20.1419]) -tensor([-3.2756, -3.2747, -3.2743]) -tensor([-17.8850, -17.9157, -17.9588]) -Total rewards: 2980 -Total Receives: 2972 -Stores 2219 1 -Total Receives: 2973 -Stores 2220 1 -Total Receives: 2974 -Stores 2221 1 -Total Receives: 2975 -Stores 2222 1 -Total Receives: 2976 -Stores 2223 1 -Total Receives: 2977 -Stores 2224 1 -Total Receives: 2978 -Stores 2225 1 -Total Receives: 2979 -Stores 2226 1 -Total Receives: 2980 -Stores 2227 1 -tensor([-31.5987, -31.5721, -31.5633, -31.6570, -31.5887, -31.5847, -31.6570, - -31.6570]) -tensor([-4.5582, -4.6011, -4.5737, -4.5359, -4.5816, -4.5295]) -tensor([-2.8085, -2.7953, -2.8140, -2.8197]) -tensor([-0.4034, -0.4065, -0.4165, -0.4085]) -tensor([-3.0658, -3.0871, -3.0881, -3.0981, -3.0619, -3.0981]) -tensor([-48.3248, -48.2365, -48.3422, -48.2735, -48.2936, -48.2842, -48.3066, - -48.2423, -48.3641, -48.3187, -48.3264, -48.3372]) -tensor([-13.9039, -13.9463]) -tensor([-0.5785, -0.5462, -0.5747]) -tensor([-3.5183, -3.5495, -3.5381, -3.5038, -3.5333, -3.5524]) -tensor([-0.3173, -0.3127]) -tensor([-29.0484, -29.0650]) -Total rewards: 2985 -Total Receives: 2981 -Stores 2228 1 -Total Receives: 2982 -Stores 2229 1 -Total Receives: 2983 -Stores 2230 1 -Total Receives: 2984 -Stores 2231 1 -Total Receives: 2985 -Stores 2232 1 -tensor([-0.4841, 0.1635, -0.4695, -0.4664]) -tensor([-1.6540, -1.6668, -1.6528]) -tensor([-27.6753, -27.6946, -27.7002, -27.6889]) -tensor([-2.4472, -2.4801, -2.4925, -2.4666, -2.4575, -2.4808]) -tensor([-8.1641, -8.1665, -8.1748]) -tensor([-0.5410, -0.5416, -0.5405, -0.5434]) -tensor([-70.9186, -70.9271]) -tensor([-24.1312, -24.1438, -24.1434]) -tensor([-0.9682, -0.9822, -0.9763]) -tensor([-0.4538, -0.4540, -0.4561]) -tensor([-46.2938, -46.2162, -46.3100, -46.2477, -46.2656, -46.2588, -46.2784, - -46.2217, -46.3380, -46.2919, -46.3058, -46.3206]) -tensor([-39.9150, -39.9340, -39.8918, -39.9421, -39.9507, -39.9507]) -Total rewards: 2993 -Total Receives: 2986 -Stores 2233 1 -Total Receives: 2987 -Stores 2234 1 -Total Receives: 2988 -Stores 2235 1 -Total Receives: 2989 -Stores 2236 1 -Total Receives: 2990 -Stores 2237 1 -Total Receives: 2991 -Stores 2238 1 -Total Receives: 2992 -Stores 2239 1 -Total Receives: 2993 -Stores 2240 1 -tensor([-43.3792, -43.3950, -43.3366, -43.3512, -43.3443, -43.3639, -43.3189, - -43.4260, -43.3769, -43.3886, -43.4246]) -tensor([-3.3049, -3.3098, -3.3175]) -tensor([-0.8020, -0.8310]) -tensor([-0.3450, -0.3485]) -tensor([-2.2898, -2.3181, -2.3354, -2.3098, -2.2983, -2.3186]) -tensor([-29.5043, -29.4714, -29.5580, -29.5847, -29.4921, -29.4897, -29.5847, - -29.5780, -29.5847]) -tensor([-1.2751, -1.2877, -1.3024]) -tensor([-0.1736, -0.1718]) -tensor([-2.8087, -2.8125]) -tensor([-3.0830, -3.1217, -3.1213, -3.1580, -3.1543, -3.1580, -3.1507, -3.1452]) -tensor([-0.2283, -0.2346, -0.1534]) -tensor([-43.0412, -43.0725, -43.0582, -43.0153, -43.0630, -43.0885]) -tensor([-45.8800, -45.9158, -45.8630, -45.8532, -45.8460, -45.8651, -45.9362, - -45.9123, -45.8409, -45.9231]) -tensor([-32.4329, -32.4161, -32.4533]) -tensor([-41.7992, -41.8196]) -tensor([-31.2207, -31.1817, -31.2079, -31.1944, -31.2234]) -tensor([-3.2204, -3.2231, -3.2656, -3.2562, -3.2657, -3.2523, -3.2470, -3.2657]) -Total rewards: 3000 -Total Receives: 2994 -Stores 2241 1 -Total Receives: 2995 -Stores 2242 1 -Total Receives: 2996 -Stores 2243 1 -Total Receives: 2997 -Stores 2244 1 -Total Receives: 2998 -Stores 2245 1 -Total Receives: 2999 -Stores 2246 1 -Total Receives: 3000 -Stores 2247 1 -tensor([-2.7628, -2.7065, -2.7268, -2.7611]) -tensor([-32.2235, -32.2362, -32.2492]) -tensor([-2.0470, -2.0519]) -tensor([-2.5156, -2.5194]) -tensor([-29.9758, -30.0002]) -tensor([-32.5927, -32.6381, -32.6754, -32.5865, -32.5924, -32.6754, -32.6603, - -32.6754]) -tensor([-2.1545, -2.1764, -2.1968]) -Total rewards: 3008 -Total Receives: 3001 -Stores 2248 1 -Total Receives: 3002 -Stores 2249 1 -Total Receives: 3003 -Stores 2250 1 -Total Receives: 3004 -Stores 2251 1 -Total Receives: 3005 -Stores 2252 1 -Total Receives: 3006 -Stores 2253 1 -Total Receives: 3007 -Stores 2254 1 -Total Receives: 3008 -Stores 2255 1 -tensor([-2.6869, -2.7115, -2.7271, -2.6845, -2.6700, -2.6911]) -tensor([-1.4810, -1.5053]) -tensor([-0.1852, -0.1715, -0.1718]) -tensor([-0.1105, -0.0914]) -tensor([-0.0862, -0.0855]) -tensor([-0.0447, -0.0681]) -tensor([-1.2957, -1.2999, -1.3133, -1.3152]) -tensor([-82.7586, -82.7497, -82.7339, -82.7627]) -tensor([-28.3288, -28.3254]) -tensor([-2.9296, -2.9142]) -tensor([-0.8750, -0.8583]) -tensor([-0.1767, -0.1784]) -tensor([-0.8562, -0.8714, -0.8731]) -tensor([-42.6664, -42.6848, -42.7031, -42.6595]) -tensor([-0.1429, -0.1441]) -tensor([-0.5775, -0.5926]) -tensor([-49.4720, -49.4363, -49.4605, -49.4639, -49.4552, -49.4813, -49.4010, - -49.4492]) -tensor([-30.9405, -30.9744, -30.9828]) -tensor([-49.9893, -50.0319, -49.9659, -49.9607, -49.9463, -49.9718, -50.0871, - -50.0303, -50.0423]) -tensor([-0.0737, -0.0803]) -tensor([-2.5589, -2.5056, -2.5681, -2.5358, -2.5681, -2.5320, -2.5266, -2.5681]) -tensor([-0.8882, -0.8909, -0.8930]) -tensor([-0.3705, -0.3706, -0.3690]) -tensor([-0.2775, -0.2762]) -tensor([-2.0927, -2.1011, -2.1108, -2.0845, -2.1042, -2.1121]) -tensor([-35.1303, -35.1782, -35.2442, -35.1249, -35.2442, -35.2003, -35.1912, - -35.2442, -35.2234, -35.2442]) -tensor([-1.9691, -1.9777, -1.9606, -1.9777, -1.9450, -1.9432, -1.9777]) -tensor([-51.4667, -51.4750, -51.4599, -51.4485, -51.4750, -51.4000, -51.4444, - -51.5010]) -tensor([-74.7023, -74.7037]) -tensor([-44.9755, -44.9838, -44.9680, -44.9526, -44.9842, -44.9769, -45.0142]) -tensor([-0.0082, -0.0109]) -tensor([-30.9897, -31.0115, -30.9812, -31.0029]) -tensor([-37.5594, -37.5886, -37.6043, -37.5909]) -tensor([-4.2269, -4.2223]) -tensor([-1.3921, -1.3452, -1.3495, -1.3629, -1.3499, -1.3664]) -tensor([-27.7497, -27.7416, -27.7623]) -tensor([-30.1469, -30.1918, -30.2589, -30.2589, -30.2106, -30.2439, -30.2589, - -30.2354, -30.2589]) -Total rewards: 3010 -Total Receives: 3009 -Stores 2256 1 -Total Receives: 3010 -Stores 2257 1 -tensor([-0.0126, -0.0101, -0.0068, -0.0090]) -Total rewards: 3020 -Total Receives: 3011 -Stores 2258 1 -Total Receives: 3012 -Stores 2259 1 -Total Receives: 3013 -Stores 2260 1 -Total Receives: 3014 -Stores 2261 1 -Total Receives: 3015 -Stores 2262 1 -Total Receives: 3016 -Stores 2263 1 -Total Receives: 3017 -Stores 2264 1 -Total Receives: 3018 -Stores 2265 1 -Total Receives: 3019 -Stores 2266 1 -Total Receives: 3020 -Stores 2267 1 -tensor([-26.0365, -26.1043, -26.1043, -26.0551, -26.0883, -26.1043, -26.0800, - -26.1043]) -Total rewards: 3027 -Total Receives: 3021 -Stores 2268 1 -Total Receives: 3022 -Stores 2269 1 -Total Receives: 3023 -Stores 2270 1 -Total Receives: 3024 -Stores 2271 1 -Total Receives: 3025 -Stores 2272 1 -Total Receives: 3026 -Stores 2273 1 -Total Receives: 3027 -Stores 2274 1 -tensor([-1.3810, -1.3875]) -tensor([-38.5916, -38.6025, -38.5898, -38.6017, -38.5979, -38.6302]) -tensor([-0.0264, -0.0389]) -tensor([-14.3760, -14.3744, -14.3842]) -Total rewards: 3033 -Total Receives: 3028 -Stores 2275 1 -Total Receives: 3029 -Stores 2276 1 -Total Receives: 3030 -Stores 2277 1 -Total Receives: 3031 -Stores 2278 1 -Total Receives: 3032 -Stores 2279 1 -Total Receives: 3033 -Stores 2280 1 -tensor([-0.5614, -0.5659])tensor([0.0289, 0.0297, 0.0287]) -Total rewards: 4739 -Total Receives: 4734 -Stores 3526 1 -Total Receives: 4735 -Stores 3527 1 -Total Receives: 4736 -Stores 3528 1 -Total Receives: 4737 -Stores 3529 1 -Total Receives: 4738 -Stores 3530 1 -Total Receives: 4739 -Stores 3531 1 -Total rewards: 4744 -Total Receives: 4740 -Stores 3532 1 -Total Receives: 4741 -Stores 3533 1 -Total Receives: 4742 -Stores 3534 1 -Total Receives: 4743 -Stores 3535 1 -Total Receives: 4744 -Stores 3536 1 -tensor([-0.2818, -0.3461, -0.2607, -0.1765]) -tensor([-0.1033, -0.0930, -0.0940, -0.0747]) -tensor([-0.0948, -0.1027, -0.1153]) -tensor([-0.2161, -0.2785, -0.1393, -0.1094]) -tensor([-3.8825, -3.8530, -3.9156, -3.9309, -3.8609, -3.9331, -3.9011, -3.8670, - -3.8941, -3.9184, -3.9017, -3.9563, -3.9424, -3.9563]) -tensor([-0.0869, -0.0968]) -tensor([0.0114, 0.0110, 0.0102]) -tensor([-0.1979, -0.1248, -0.0935]) -tensor([-0.0730, -0.0552, -0.0498, -0.0697, -0.0852, -0.0482]) -tensor([0.0054, 0.0045]) -tensor([-0.0157, -0.0353, -0.0120, -0.0382]) -Total rewards: 4751 -Total Receives: 4745 -Stores 3537 1 -Total Receives: 4746 -Stores 3538 1 -Total Receives: 4747 -Stores 3539 1 -Total Receives: 4748 -Stores 3540 1 -Total Receives: 4749 -Stores 3541 1 -Total Receives: 4750 -Stores 3542 1 -Total Receives: 4751 -Stores 3543 1 -tensor([0.0339, 0.0344, 0.0316]) -tensor([-0.2332, -0.1691]) -Total rewards: 4756 -Total Receives: 4752 -Stores 3544 1 -Total Receives: 4753 -Stores 3545 1 -Total Receives: 4754 -Stores 3546 1 -Total Receives: 4755 -Stores 3547 1 -Total Receives: 4756 -Stores 3548 1 -tensor([-0.0481, -0.0570]) -Total rewards: 4762 -Total Receives: 4757 -Stores 3549 1 -Total Receives: 4758 -Stores 3550 1 -Total Receives: 4759 -Stores 3551 1 -Total Receives: 4760 -Stores 3552 1 -Total Receives: 4761 -Stores 3553 1 -Total Receives: 4762 -Stores 3554 1 -tensor([0.0321, 0.0327, 0.0317]) -tensor([-2.9599, -2.9396, -2.9846, -2.9968, -2.9574, -3.0290, -2.9982, -2.9763, - -2.9639, -2.9837, -2.9705, -3.0295, -3.0097, -3.0295]) -Total rewards: 4765 -Total Receives: 4763 -Stores 3555 1 -Total Receives: 4764 -Stores 3556 1 -Total Receives: 4765 -Stores 3557 1 -tensor([-0.0644, -0.0473, -0.0429, -0.0588, -0.0371, -0.0743]) -Total rewards: 4772 -Total Receives: 4766 -Stores 3558 1 -Total Receives: 4767 -Stores 3559 1 -Total Receives: 4768 -Stores 3560 1 -Total Receives: 4769 -Stores 3561 1 -Total Receives: 4770 -Stores 3562 1 -Total Receives: 4771 -Stores 3563 1 -Total Receives: 4772 -Stores 3564 1 -Total rewards: 4777 -Total Receives: 4773 -Stores 3565 1 -Total Receives: 4774 -Stores 3566 1 -Total Receives: 4775 -Stores 3567 1 -Total Receives: 4776 -Stores 3568 1 -Total Receives: 4777 -Stores 3569 1 -tensor([0.0388, 0.0393]) -tensor([-0.0245, -0.0260]) -tensor([0.0401, 0.0385]) -tensor([-0.0551, -0.0391, -0.0728, -0.0232, -0.0329, -0.0331]) -tensor([-0.1057, -0.0936, -0.0946, -0.0785, -0.0801]) -tensor([-0.0711, -0.0692]) -tensor([0.0151, 0.0168]) -tensor([-0.0221, -0.0245, -0.0287, -0.0202]) -tensor([-0.0881, -0.0767, -0.0729, -0.0837, -0.1015]) -tensor([-0.0459, -0.0482, -0.0473, -0.0580]) -tensor([0.0355, 0.0361]) -tensor([-0.0632, -0.0581]) -tensor([-0.0622, -0.0532]) -tensor([-0.1201, -0.1084, -0.1094, -0.0953, -0.0880]) -tensor([-3.2834, -3.3071, -3.3179, -3.2835, -3.3444, -3.3199, -3.2957, -3.2838, - -3.3232, -3.2893, -3.3587, -3.3344, -3.3434, -3.3587]) -tensor([0.0099, 0.0074, 0.0043]) -tensor([0.0399, 0.0387, 0.0405]) -tensor([-0.0370, -0.0345, -0.0354, -0.0210]) -tensor([-0.0585, -0.0481]) -tensor([-0.0257, 0.0005, -0.0144, 0.0003, -0.0447, -0.0078, -0.0057]) -Total rewards: 4783 -Total Receives: 4778 -Stores 3570 1 -Total Receives: 4779 -Stores 3571 1 -Total Receives: 4780 -Stores 3572 1 -Total Receives: 4781 -Stores 3573 1 -Total Receives: 4782 -Stores 3574 1 -Total Receives: 4783 -Stores 3575 1 -tensor([-0.0005, 0.0011, -0.0067]) -tensor([-0.0105, -0.0032, -0.0063, -0.0029, 0.0004]) -tensor([-0.0429, -0.0340]) -Total rewards: 4790 -Total Receives: 4784 -Stores 3576 1 -Total Receives: 4785 -Stores 3577 1 -Total Receives: 4786 -Stores 3578 1 -Total Receives: 4787 -Stores 3579 1 -Total Receives: 4788 -Stores 3580 1 -Total Receives: 4789 -Stores 3581 1 -Total Receives: 4790 -Stores 3582 1 -tensor([0.0543, 0.0285, 0.0287]) -tensor([0.0280, 0.0296]) -Total rewards: 4795 -Total Receives: 4791 -Stores 3583 1 -Total Receives: 4792 -Stores 3584 1 -Total Receives: 4793 -Stores 3585 1 -Total Receives: 4794 -Stores 3586 1 -Total Receives: 4795 -Stores 3587 1 -tensor([0.0404, 0.0415]) -tensor([0.0340, 0.0375]) -tensor([-3.5133, -3.5243, -3.4963, -3.5504, -3.5269, -3.4973, -3.4860, -3.5486, - -3.5327, -3.4909, -3.5804, -3.5457, -3.5432, -3.5804]) -tensor([0.0312, 0.0380]) -tensor([0.0381, 0.0383]) -tensor([0.0715, 0.0713]) -tensor([-0.0547, -0.0469, -0.0512, -0.0478]) -tensor([-0.4235, -0.4243]) -tensor([0.0530, 0.0374]) -tensor([-0.0503, -0.0244, -0.0412, -0.0260, -0.0343, -0.0300]) -tensor([0.0261, 0.0254, 0.0204]) -Total rewards: 4801 -Total Receives: 4796 -Stores 3588 1 -Total Receives: 4797 -Stores 3589 1 -Total Receives: 4798 -Stores 3590 1 -Total Receives: 4799 -Stores 3591 1 -Total Receives: 4800 -Stores 3592 1 -Total Receives: 4801 -Stores 3593 1 -tensor([-0.6337, -0.6379, -0.6306]) -tensor([0.0333, 0.0343]) -tensor([-0.1731, -0.1737, -0.1815]) -tensor([-0.0588, -0.0518, -0.0518, -0.0518]) -tensor([-0.0535, -0.0481, -0.0481]) -tensor([-0.0824, -0.0575, -0.0628, -0.0699, -0.0638]) -tensor([-5.1028, -5.1189, -5.0622, -5.1517, -5.1227, -5.0780, -5.1461, -5.1937, - -5.1254, -5.0683, -5.1946, -5.1474, -5.1398, -5.1946]) -tensor([-0.1135, -0.1166, -0.1253]) -Total rewards: 4804 -Total Receives: 4802 -Stores 3594 1 -Total Receives: 4803 -Stores 3595 1 -Total Receives: 4804 -Stores 3596 1 -tensor([-0.0545, -0.0614, -0.0644, -0.0541]) -tensor([-0.0125, -0.0116, -0.0148, -0.0071]) -tensor([0.0314, 0.0337]) -tensor([0.0394, 0.0384]) -tensor([0.0327, 0.0342]) -tensor([-0.0700, -0.0748]) -tensor([-0.0826, -0.0781, -0.0815, -0.0641, -0.0566]) -Total rewards: 4806 -Total Receives: 4805 -Stores 3597 1 -Total Receives: 4806 -Stores 3598 1 -tensor([0.0412, 0.0406]) -tensor([-0.0819, -0.0370, -0.0653, -0.0724, -0.0659]) -tensor([0.0256, 0.0309]) -tensor([-0.0390, -0.0404, -0.0269, -0.0261, -0.0261]) -tensor([-0.0255, -0.0242, -0.0324, -0.0212]) -tensor([-3.6862, -3.6971, -3.7202, -3.7007, -3.6631, -3.7406, -3.7446, -3.7001, - -3.6525, -3.7630, -3.7255, -3.7025, -3.7630, -3.7564]) -tensor([-2.4821, -2.4931, -2.5183, -2.4969, -2.4581, -2.5398, -2.5438, -2.4960, - -2.5593, -2.5226, -2.5027, -2.5593, -2.5544]) -tensor([-0.1543, -0.1629]) -tensor([-0.0082, -0.0062, -0.0071]) -tensor([0.0047, 0.0079, 0.0088, 0.0021]) -Total rewards: 4810 -Total Receives: 4807 -Stores 3599 1 -Total Receives: 4808 -Stores 3600 1 -Total Receives: 4809 -Stores 3601 1 -Total Receives: 4810 -Stores 3602 1 -Total rewards: 4814 -Total Receives: 4811 -Stores 3603 1 -Total Receives: 4812 -Stores 3604 1 -Total Receives: 4813 -Stores 3605 1 -Total Receives: 4814 -Stores 3606 1 -tensor([0.0366, 0.0372]) -tensor([0.0323, 0.0343]) -tensor([0.0209, 0.0216]) -tensor([-0.0675, -0.0817, -0.0832, -0.0728, -0.0597]) -tensor([0.0377, 0.0397]) -tensor([-0.0455, -0.0442, -0.0454, -0.0487]) -tensor([-0.0828, -0.0755, -0.0348, -0.0455, -0.0837, -0.0641]) -tensor([0.0397, 0.0394]) -tensor([-1.8257, -1.8369, -1.8613, -1.8415, -1.7990, -1.8769, -1.8828, -1.8361, - -1.9054, -1.9207, -1.8710, -1.9207, -1.9086]) -Total rewards: 4821 -Total Receives: 4815 -Stores 3607 1 -Total Receives: 4816 -Stores 3608 1 -Total Receives: 4817 -Stores 3609 1 -Total Receives: 4818 -Stores 3610 1 -Total Receives: 4819 -Stores 3611 1 -Total Receives: 4820 -Stores 3612 1 -Total Receives: 4821 -Stores 3613 1 -Total rewards: 4827 -Total Receives: 4822 -Stores 3614 1 -Total Receives: 4823 -Stores 3615 1 -Total Receives: 4824 -Stores 3616 1 -Total Receives: 4825 -Stores 3617 1 -Total Receives: 4826 -Stores 3618 1 -Total Receives: 4827 -Stores 3619 1 -tensor([-0.0125, -0.0120, -0.0115, -0.0149]) -tensor([0.0062, 0.0073, 0.0033, 0.0016]) -tensor([-0.0491, -0.0619, -0.0389, -0.0629, -0.0523]) -tensor([0.0395, 0.0388]) -tensor([0.0394, 0.0256]) -Total rewards: 4832 -Total Receives: 4828 -Stores 3620 1 -Total Receives: 4829 -Stores 3621 1 -Total Receives: 4830 -Stores 3622 1 -Total Receives: 4831 -Stores 3623 1 -Total Receives: 4832 -Stores 3624 1 -tensor([0.0211, 0.0195, 0.0219]) -tensor([-0.0157, -0.0161]) - -tensor([-33.6590, -33.6746]) -tensor([-48.7328, -48.7978, -48.7996, -48.7450, -48.6938, -48.6909, -48.7167, - -48.8056, -48.8064, -48.7420, -48.7490, -48.7957, -48.8056]) -tensor([-33.0762, -33.0444, -33.0581, -33.0391]) -tensor([-0.6028, -0.6136]) -tensor([-30.2903, -30.2657, -30.2769]) -Total rewards: 3036 -Total Receives: 3034 -Stores 2281 1 -Total Receives: 3035 -Stores 2282 1 -Total Receives: 3036 -Stores 2283 1 -tensor([-0.9535, -0.9236, -0.9323, -0.9257, -0.9341]) -tensor([-39.6236, -39.6404, -39.6407]) -tensor([-0.2775, -0.2797]) -Total rewards: 3043 -Total Receives: 3037 -Stores 2284 1 -Total Receives: 3038 -Stores 2285 1 -Total Receives: 3039 -Stores 2286 1 -Total Receives: 3040 -Stores 2287 1 -Total Receives: 3041 -Stores 2288 1 -Total Receives: 3042 -Stores 2289 1 -Total Receives: 3043 -Stores 2290 1 -tensor([-25.4905, -25.4881]) -tensor([-2.0407, -2.0493, -2.0483]) -tensor([-0.0360, -0.0289, -0.0277]) -tensor([-1.8113, -1.8346, -1.8018, -1.8346, -1.8310, -1.7845, -1.8346]) -tensor([-0.8938, -0.8894, -0.8701, -0.8737, -0.9076]) -tensor([-0.1160, -0.1234, -0.1314, -0.1147]) -tensor([-52.2333, -52.3030, -52.3046, -52.2442, -52.1936, -52.1908, -52.2180, - -52.3206, -52.3123, -52.2418, -52.2484, -52.3206]) -Total rewards: 3047 -Total Receives: 3044 -Stores 2291 1 -Total Receives: 3045 -Stores 2292 1 -Total Receives: 3046 -Stores 2293 1 -Total Receives: 3047 -Stores 2294 1 -tensor([-3.5400, -3.5376]) -tensor([-0.0126, -0.0132, -0.0038]) -tensor([-7.9216, -7.9284]) -tensor([-2.7004, -2.6920]) -tensor([-30.8807, -30.8828]) -tensor([-22.5403, -22.5900, -22.5937, -22.5366, -22.5602, -22.5937, -22.5850, - -22.5563, -22.5937]) -Total rewards: 3054 -Total Receives: 3048 -Stores 2295 1 -Total Receives: 3049 -Stores 2296 1 -Total Receives: 3050 -Stores 2297 1 -Total Receives: 3051 -Stores 2298 1 -Total Receives: 3052 -Stores 2299 1 -Total Receives: 3053 -Stores 2300 1 -Total Receives: 3054 -Stores 2301 1 -tensor([-24.9043, -24.8803, -24.8728, -24.9081, -24.9122]) -tensor([-11.7737, -11.7743]) -tensor([-22.1007, -22.0962]) -tensor([-1.8735, -1.8963, -1.8619, -1.8963, -1.8925, -1.8963]) -tensor([-0.0459, -0.0860]) -tensor([-0.0664, -0.0758, -0.0860, -0.0558]) -tensor([-39.6956, -39.6947, -39.6806, -39.6905, -39.7192]) -tensor([-0.6333, -0.6380]) -tensor([0.0080, 0.0096]) -tensor([-27.8032, -27.7789, -27.8069, -27.8206, -27.8206]) -Total rewards: 3059 -Total Receives: 3055 -Stores 2302 1 -Total Receives: 3056 -Stores 2303 1 -Total Receives: 3057 -Stores 2304 1 -Total Receives: 3058 -Stores 2305 1 -Total Receives: 3059 -Stores 2306 1 -tensor([0.0341, 0.0326]) -tensor([-0.5781, -0.5435]) -tensor([-0.0320, -0.0507, -0.0523, -0.0591]) -tensor([-3.9908, -3.9968, -4.0005]) -tensor([-34.2260, -34.2627, -34.2597, -34.2406, -34.2681]) -tensor([-24.0602, -24.1214, -24.1352, -24.0606, -24.0882, -24.1118, -24.0827, - -24.1153, -24.1352]) -Total rewards: 3064 -Total Receives: 3060 -Stores 2307 1 -Total Receives: 3061 -Stores 2308 1 -Total Receives: 3062 -Stores 2309 1 -Total Receives: 3063 -Stores 2310 1 -Total Receives: 3064 -Stores 2311 1 -tensor([-0.0483, -0.0501, -0.0570]) -tensor([-1.6498, -1.6697, -1.6292, -1.6046, -1.6376]) -tensor([-25.4152, -25.4196, -25.4394, -25.4393]) -Total rewards: 3072 -Total Receives: 3065 -Stores 2312 1 -Total Receives: 3066 -Stores 2313 1 -Total Receives: 3067 -Stores 2314 1 -Total Receives: 3068 -Stores 2315 1 -Total Receives: 3069 -Stores 2316 1 -Total Receives: 3070 -Stores 2317 1 -Total Receives: 3071 -Stores 2318 1 -Total Receives: 3072 -Stores 2319 1 -tensor([-46.4693, -46.5431, -46.5451, -46.4795, -46.4071, -46.4462, -46.5821, - -46.5585, -46.4849, -46.5418, -46.4826, -46.5820]) -tensor([-2.6757, -2.7151, -2.7081, -2.6711, -2.7151, -2.6973]) -tensor([-28.4819, -28.4120, -28.4829]) -Total rewards: 3077 -Total Receives: 3073 -Stores 2320 1 -Total Receives: 3074 -Stores 2321 1 -Total Receives: 3075 -Stores 2322 1 -Total Receives: 3076 -Stores 2323 1 -Total Receives: 3077 -Stores 2324 1 -tensor([-1.8616, -1.8986, -1.8901, -1.8986, -1.8812, -1.8954]) -tensor([-42.5232, -42.5083, -42.5567, -42.5014, -42.5891, -42.5422]) -tensor([-22.7544, -22.7777, -22.6885, -22.7174, -22.7457, -22.7132, -22.7486, - -22.7777]) -tensor([-0.9379, -0.9519, -0.9304]) -tensor([-30.7116, -30.7497, -30.7474, -30.7640, -30.7573, -30.7469]) -tensor([-1.9859, -1.9842]) -tensor([-25.1443, -25.1617, -25.1617]) -tensor([-31.8304, -31.8699]) -tensor([-10.5000, -10.5408]) -Total rewards: 3085 -Total Receives: 3078 -Stores 2325 1 -Total Receives: 3079 -Stores 2326 1 -Total Receives: 3080 -Stores 2327 1 -Total Receives: 3081 -Stores 2328 1 -Total Receives: 3082 -Stores 2329 1 -Total Receives: 3083 -Stores 2330 1 -Total Receives: 3084 -Stores 2331 1 -Total Receives: 3085 -Stores 2332 1 -tensor([-0.1586, -0.1699]) -tensor([-0.1029, -0.1023, -0.0737]) -tensor([-2.2968, -2.3055, -2.2765, -2.2826]) -Total rewards: 3089 -Total Receives: 3086 -Stores 2333 1 -Total Receives: 3087 -Stores 2334 1 -Total Receives: 3088 -Stores 2335 1 -Total Receives: 3089 -Stores 2336 1 -tensor([-0.0814, -0.0815]) -tensor([-26.3777, -26.3040, -26.3819, -26.3852, -26.3699]) -tensor([-31.3022, -31.3387, -31.3389, -31.3551, -31.3523]) -tensor([-47.4990, -47.5726, -47.5791, -47.5069, -47.4735, -47.6393, -47.5938, - -47.5140, -47.5762, -47.5092, -47.6392]) -tensor([-4.9633, -4.9837, -4.9870, -4.9699]) -Total rewards: 3097 -Total Receives: 3090 -Stores 2337 1 -Total Receives: 3091 -Stores 2338 1 -Total Receives: 3092 -Stores 2339 1 -Total Receives: 3093 -Stores 2340 1 -Total Receives: 3094 -Stores 2341 1 -Total Receives: 3095 -Stores 2342 1 -Total Receives: 3096 -Stores 2343 1 -Total Receives: 3097 -Stores 2344 1 -Total rewards: 3099 -Total Receives: 3098 -Stores 2345 1 -Total Receives: 3099 -Stores 2346 1 -tensor([-0.4221, -0.4311]) -tensor([-1.1794, -1.1795]) -tensor([-1.8124, -1.8226, -1.7900]) -tensor([-30.6802, -30.6840, -30.6844, -30.7006]) -tensor([-23.6418, -23.6854, -23.6164, -23.6422, -23.6101, -23.6386, -23.6854]) -tensor([-2.0232, -2.0010, -2.0065, -2.0232, -1.9964, -2.0144, -2.0093]) -tensor([-42.6056, -42.6252, -42.6912, -42.6901, -42.7185, -42.6461]) -Total rewards: 3103 -Total Receives: 3100 -Stores 2347 1 -Total Receives: 3101 -Stores 2348 1 -Total Receives: 3102 -Stores 2349 1 -Total Receives: 3103 -Stores 2350 1 -tensor([-0.0513, -0.0285]) -tensor([-20.9330, -20.9477, -20.9206, -20.9624]) -tensor([-3.6887, -3.7102]) -Total rewards: 3110 -Total Receives: 3104 -Stores 2351 1 -Total Receives: 3105 -Stores 2352 1 -Total Receives: 3106 -Stores 2353 1 -Total Receives: 3107 -Stores 2354 1 -Total Receives: 3108 -Stores 2355 1 -Total Receives: 3109 -Stores 2356 1 -Total Receives: 3110 -Stores 2357 1 -tensor([-2.4351, -2.4565, -2.4622, -2.4712]) -tensor([-43.2113, -43.2751, -43.2770, -43.2172, -43.3396, -43.2856, -43.2219, - -43.2719, -43.3326, -43.2421, -43.3396]) -Total rewards: 3118 -Total Receives: 3111 -Stores 2358 1 -Total Receives: 3112 -Stores 2359 1 -Total Receives: 3113 -Stores 2360 1 -Total Receives: 3114 -Stores 2361 1 -Total Receives: 3115 -Stores 2362 1 -Total Receives: 3116 -Stores 2363 1 -Total Receives: 3117 -Stores 2364 1 -Total Receives: 3118 -Stores 2365 1 -tensor([-24.9416, -25.5647, -25.5461]) -tensor([-3.0428, -3.0193, -3.0798]) -tensor([-3.3952, -3.3956]) -tensor([-25.5269, -25.5810, -25.5856, -25.5683, -25.5999]) -tensor([-20.9916, -21.0398, -20.9940, -20.9644, -20.9899, -21.0397]) -tensor([-0.1273, -0.0860]) -tensor([-0.0279, -0.0150, -0.0080]) -tensor([-97.1218, -97.1192, -96.4436]) -tensor([-23.4943, -23.4845, -23.4981]) -tensor([-17.2165, -17.2276, -17.2463]) -tensor([-4.5242, -4.4764, -4.4952, -4.5242, -4.5182, -4.5024, -4.4968, -4.5242]) -tensor([-22.4039, -22.3975, -22.3948, -22.4079]) -tensor([-47.2822, -47.2912, -47.3718, -47.3673, -47.3224, -47.4121]) -tensor([-0.0637, -0.0791, -0.0640]) -tensor([-0.1428, -0.1508]) -tensor([-25.3766, -25.3794, -25.4182]) -tensor([-42.9135, -42.9883, -42.9823, -42.9424, -43.0279]) -Total rewards: 3125 -Total Receives: 3119 -Stores 2366 1 -Total Receives: 3120 -Stores 2367 1 -Total Receives: 3121 -Stores 2368 1 -Total Receives: 3122 -Stores 2369 1 -Total Receives: 3123 -Stores 2370 1 -Total Receives: 3124 -Stores 2371 1 -Total Receives: 3125 -Stores 2372 1 -tensor([-44.5682, -44.5528, -44.4943, -44.6035, -44.6328, -44.5648, -44.4983, - -44.5510, -44.6122, -44.5227, -44.6328]) -tensor([-2.8969, -2.9125, -2.9125]) -tensor([-2.9230, -2.9354, -2.9529]) -Total rewards: 3133 -Total Receives: 3126 -Stores 2373 1 -Total Receives: 3127 -Stores 2374 1 -Total Receives: 3128 -Stores 2375 1 -Total Receives: 3129 -Stores 2376 1 -Total Receives: 3130 -Stores 2377 1 -Total Receives: 3131 -Stores 2378 1 -Total Receives: 3132 -Stores 2379 1 -Total Receives: 3133 -Stores 2380 1 -tensor([-17.4961, -17.4781, -17.5079]) -tensor([-0.0589, -0.0450]) -tensor([-3.3020, -3.3376]) -tensor([-5.1139, -5.1187, -5.1005, -5.1071]) -tensor([-26.3002, -26.3497, -26.3569, -26.3843, -26.3719]) -tensor([-8.0368, -8.0163, -8.0114, -7.9810]) -tensor([-21.9699, -22.0183, -22.0365, -21.9754, -21.9874, -22.0365]) -tensor([-0.1032, -0.0597, -0.0674]) -tensor([-31.6610, -31.6537]) -tensor([-3.2502, -3.2847, -3.3035, -3.2886, -3.2754, -3.2728, -3.2984, -3.3035]) -tensor([-38.5485, -38.5397, -38.5072, -38.5734, -38.5943]) -tensor([-0.0399, -0.0375]) -tensor([-2.1156, -2.1387]) -Total rewards: 3137 -Total Receives: 3134 -Stores 2381 1 -Total Receives: 3135 -Stores 2382 1 -Total Receives: 3136 -Stores 2383 1 -Total Receives: 3137 -Stores 2384 1 -tensor([-26.8261, -26.8502]) -tensor([-45.3957, -45.3810, -45.4539, -45.4727, -45.3926, -45.3234, -45.3787, - -45.4406, -45.4610, -45.3486, -45.4727]) -tensor([-0.1930, -0.1836]) -tensor([-18.4795, -18.4850, -18.4918]) -tensor([-0.0899, -0.0589]) -tensor([-2.6276, -2.6415]) -tensor([-34.5320, -34.4816, -34.4739, -34.5320, -34.5342]) -Total rewards: 3141 -Total Receives: 3138 -Stores 2385 1 -Total Receives: 3139 -Stores 2386 1 -Total Receives: 3140 -Stores 2387 1 -Total Receives: 3141 -Stores 2388 1 -tensor([-3.0164, -3.0042]) -Total rewards: 3144 -Total Receives: 3142 -Stores 2389 1 -Total Receives: 3143 -Stores 2390 1 -Total Receives: 3144 -Stores 2391 1 -tensor([-25.8470, -25.8623, -25.8388, -25.8692, -25.8549, -25.8778]) -tensor([-3.2700, -3.2662, -3.2486, -3.2549]) -tensor([-27.6945, -27.6189, -27.6665, -27.6626, -27.6803]) -tensor([-32.5530, -32.5718]) -tensor([-0.1510, -0.1531]) -Total rewards: 3149 -Total Receives: 3145 -Stores 2392 1 -Total Receives: 3146 -Stores 2393 1 -Total Receives: 3147 -Stores 2394 1 -Total Receives: 3148 -Stores 2395 1 -Total Receives: 3149 -Stores 2396 1 -tensor([-16.6695, -16.6890]) -tensor([-0.1371, -0.1081]) -tensor([-1.8299, -1.8066, -1.8341]) -tensor([-23.9210, -23.9751, -24.0086, -24.0124, -23.9528, -24.0124]) -tensor([-32.2850, -32.2366, -32.2846, -32.2981]) -Total rewards: 3155 -Total Receives: 3150 -Stores 2397 1 -Total Receives: 3151 -Stores 2398 1 -Total Receives: 3152 -Stores 2399 1 -Total Receives: 3153 -Stores 2400 1 -Total Receives: 3154 -Stores 2401 1 -Total Receives: 3155 -Stores 2402 1 -tensor([-26.4603, -26.4324, -26.4431, -26.4656]) -tensor([-40.5121, -40.4980, -40.5685, -40.5991, -40.5868, -40.5175, -40.5190, - -40.5919, -40.5517, -40.5830, -40.4654, -40.5930, -40.5992]) -tensor([-34.7946, -34.7810, -34.8492, -34.8796, -34.8669, -34.7998, -34.8013, - -34.8719, -34.8330, -34.8633, -34.8729, -34.8796]) -tensor([-5.8822, -5.8524, -5.8439, -5.8910]) -tensor([-1.1805, -1.1840, -1.1921]) -tensor([-21.8420, -21.8248, -21.8476]) -tensor([-0.6397, -0.6443, -0.6428]) -tensor([-2.6634, -2.6755, -2.6586]) -tensor([-2.6726, -2.6890, -2.7340, -2.6992, -2.7010, -2.7119, -2.7340]) -Total rewards: 3160 -Total Receives: 3156 -Stores 2403 1 -Total Receives: 3157 -Stores 2404 1 -Total Receives: 3158 -Stores 2405 1 -Total Receives: 3159 -Stores 2406 1 -Total Receives: 3160 -Stores 2407 1 -tensor([-0.8615, -0.8602]) -tensor([-18.4838, -18.4913]) -tensor([-32.5644, -32.5896, -32.5910, -32.5991]) -tensor([-25.2603, -25.2925, -25.3122, -25.3058, -25.2486, -25.3122]) -tensor([-31.7422, -31.7561, -31.7879, -31.7512, -31.7686, -31.7782, -31.7478]) -tensor([-21.5596, -21.5734, -21.5727, -21.5734, -21.5734]) -tensor([-29.0212, -29.0353, -29.0665, -29.0302, -29.0576, -29.0267]) -tensor([-0.0566, -0.0610, -0.0547, -0.0556]) -tensor([-4.7025, -4.6780, -4.7074, -4.7301]) -tensor([-25.9760, -25.9960, -25.9599, -25.9874, -25.9565]) -Total rewards: 3166 -Total Receives: 3161 -Stores 2408 1 -Total Receives: 3162 -Stores 2409 1 -Total Receives: 3163 -Stores 2410 1 -Total Receives: 3164 -Stores 2411 1 -Total Receives: 3165 -Stores 2412 1 -Total Receives: 3166 -Stores 2413 1 -tensor([-0.0437, -0.0355, -0.0340]) -tensor([-8.4338, -8.4269, -8.4390, -8.4239]) -tensor([-4.5754, -4.5503, -4.5808]) -tensor([-0.4719, -0.4693]) -tensor([-35.5923, -35.5912, -35.6091]) -tensor([-0.2626, -0.2575, -0.2650]) -tensor([-1.7665, -1.7602, -1.7584]) -tensor([-0.2078, -0.2052, -0.2112]) -tensor([-40.0794, -40.0305, -40.1018, -40.0736]) -tensor([-5.8546, -5.8573]) -tensor([-32.6105, -32.6261]) -tensor([-45.1048, -45.1653, -45.2241, -45.1942, -45.1021, -45.1204, -45.2023, - -45.1961, -45.1447, -45.1819, -45.2147, -45.1931, -45.2241]) -tensor([-0.0343, -0.0255]) -tensor([-3.5168, -3.5286, -3.5645, -3.5668]) -tensor([-40.0845, -40.1383, -40.1917, -40.1632, -40.0991, -40.1660, -40.1654, - -40.1199, -40.1531, -40.1813, -40.1622, -40.1918]) -tensor([-5.1312, -5.1752]) -Total rewards: 3169 -Total Receives: 3167 -Stores 2414 1 -Total Receives: 3168 -Stores 2415 1 -Total Receives: 3169 -Stores 2416 1 -tensor([-18.5492, -18.5373, -18.5297, -18.5575, -18.5533, -18.5512, -18.5575]) -tensor([-1.2536, -1.2467, -1.2431]) -tensor([-77.0642, -77.0860, -77.0703, -77.0563, -77.0972]) -tensor([-15.5787, -15.5675, -15.5605, -15.5872, -15.5828, -15.5872]) -tensor([-0.0785, -0.0743]) -tensor([-11.9704, -11.9748]) -tensor([-1.3628, -1.3687, -1.3668]) -Total rewards: 3174 -Total Receives: 3170 -Stores 2417 1 -Total Receives: 3171 -Stores 2418 1 -Total Receives: 3172 -Stores 2419 1 -Total Receives: 3173 -Stores 2420 1 -Total Receives: 3174 -Stores 2421 1 -tensor([-4.0038, -4.0667, -4.0115, -4.0170, -4.0263, -4.0667]) -tensor([-29.3722, -29.3784]) -tensor([-37.2852, -37.3138, -37.3205, -37.2825]) -Total rewards: 3183 -Total Receives: 3175 -Stores 2422 1 -Total Receives: 3176 -Stores 2423 1 -Total Receives: 3177 -Stores 2424 1 -Total Receives: 3178 -Stores 2425 1 -Total Receives: 3179 -Stores 2426 1 -Total Receives: 3180 -Stores 2427 1 -Total Receives: 3181 -Stores 2428 1 -Total Receives: 3182 -Stores 2429 1 -Total Receives: 3183 -Stores 2430 1 -tensor([-25.8359, -25.7836, -25.8285, -25.8382, -25.8098, -25.7511, -25.8382]) -tensor([-0.0290, -0.0289]) -tensor([-0.1268, -0.1329, -0.1283]) -tensor([-5.1286, -5.1194, -5.1283]) -tensor([-25.6780, -25.6921, -25.6601, -25.7144, -25.6929]) -tensor([-0.1492, -0.1458, -0.1464, -0.1440]) -tensor([-22.7005, -22.7178, -22.7426, -22.7171]) -tensor([-1.9644, -1.9710, -1.9635]) -tensor([-44.6345, -44.6869, -44.7522, -44.7149, -44.6529, -44.7125, -44.7141, - -44.6865, -44.6975, -44.7273, -44.7283, -44.7284]) -Total rewards: 3189 -Total Receives: 3184 -Stores 2431 1 -Total Receives: 3185 -Stores 2432 1 -Total Receives: 3186 -Stores 2433 1 -Total Receives: 3187 -Stores 2434 1 -Total Receives: 3188 -Stores 2435 1 -Total Receives: 3189 -Total rewards: 3194 -Total Receives: 3190 -Total Receives: 3191 -Total Receives: 3192 -Total Receives: 3193 -Total Receives: 3194 -tensor([-2.9395, -2.9687, -2.9658, -2.9853]) -tensor([-23.3043, -23.2521, -23.2975, -23.3134, -23.2778, -23.3134, -23.3129]) -tensor([-1.0247, -1.0315, -1.0239]) -tensor([-22.4922, -22.5055, -22.4807]) -Total rewards: 3198 -Total Receives: 3195 -Total Receives: 3196 -Total Receives: 3197 -Total Receives: 3198 -tensor([-3.4990, -3.4428, -3.4532, -3.4526, -3.4990]) -Total rewards: 3207 -Total Receives: 3199 -Stores 2436 1 -Total Receives: 3200 -Stores 2437 1 -Total Receives: 3201 -Stores 2438 1 -Total Receives: 3202 -Stores 2439 1 -Total Receives: 3203 -Stores 2440 1 -Total Receives: 3204 -Stores 2441 1 -Total Receives: 3205 -Stores 2442 1 -Total Receives: 3206 -Stores 2443 1 -Total Receives: 3207 -Stores 2444 1 -tensor([-32.2978, -32.2976, -32.3059]) -tensor([-1.0138, -1.0113]) -tensor([-18.4695, -18.4554, -18.4783, -18.4933, -18.4669, -18.4933]) -tensor([-0.0534, -0.0603]) -tensor([-0.5831, -0.5927, -0.5854]) -tensor([-1.3469, -1.3410, -1.3430]) -tensor([-33.6246, -33.6907, -33.6699, -33.6541, -33.6805, -33.6996]) -tensor([-0.0112, 0.0011]) -tensor([-40.0999, -40.1160]) -Total rewards: 3215 -Total Receives: 3208 -Stores 2445 1 -Total Receives: 3209 -Stores 2446 1 -Total Receives: 3210 -Stores 2447 1 -Total Receives: 3211 -Stores 2448 1 -Total Receives: 3212 -Stores 2449 1 -Total Receives: 3213 -Stores 2450 1 -Total Receives: 3214 -Stores 2451 1 -Total Receives: 3215 -Stores 2452 1 -tensor([-31.1760, -31.1554, -31.1385, -31.1655, -31.1870]) -tensor([-43.2896, -43.3280, -43.3444, -43.3764]) -Total rewards: 3222 -Total Receives: 3216 -Stores 2453 1 -Total Receives: 3217 -Stores 2454 1 -Total Receives: 3218 -Stores 2455 1 -Total Receives: 3219 -Stores 2456 1 -Total Receives: 3220 -Stores 2457 1 -Total Receives: 3221 -Stores 2458 1 -Total Receives: 3222 -Stores 2459 1 -tensor([-47.4725, -47.5567, -47.5035, -47.4379, -47.5015, -47.5026, -47.4782, - -47.4840, -47.5405, -47.5201, -47.5401, -47.5095, -47.5361, -47.5567]) -Total rewards: 3225 -Total Receives: 3223 -Stores 2460 1 -Total Receives: 3224 -Stores 2461 1 -Total Receives: 3225 -Stores 2462 1 -tensor([-0.1929, -0.1940, -0.1928]) -tensor([-0.2820, -0.3022, -0.2861]) -tensor([-0.0990, -0.0701, -0.0742, -0.0844, -0.0857, -0.0771]) -tensor([-2.4362, -2.4255, -2.4088, -2.4425]) -tensor([-22.6102, -22.5633, -22.6084, -22.6203, -22.5849, -22.6326, -22.6357, - -22.6223, -22.6298]) -tensor([-4.8587, -4.8717, -4.8599]) -tensor([-19.9863, -19.9977]) -tensor([-26.9234, -26.9027, -26.9127, -26.9447]) -tensor([-0.1523, -0.1558]) -Total rewards: 3233 -Total Receives: 3226 -Total Receives: 3227 -Total Receives: 3228 -Total Receives: 3229 -Total Receives: 3230 -Total Receives: 3231 -Total Receives: 3232 -Total Receives: 3233 -tensor([-6.2048, -6.1906]) -Total rewards: 3242 -Total Receives: 3234 -Stores 2463 1 -Total Receives: 3235 -Stores 2464 1 -Total Receives: 3236 -Stores 2465 1 -Total Receives: 3237 -Stores 2466 1 -Total Receives: 3238 -Stores 2467 1 -Total Receives: 3239 -Stores 2468 1 -Total Receives: 3240 -Stores 2469 1 -Total Receives: 3241 -Stores 2470 1 -Total Receives: 3242 -Stores 2471 1 -tensor([-24.6262, -24.6153, -24.6500]) -tensor([-30.5368, -30.5675, -30.5692]) -tensor([-19.3211, -19.2528, -19.2518, -19.3211]) -tensor([-38.8231, -38.8396, -38.8990]) -Total rewards: 3247 -Total Receives: 3243 -Stores 2472 1 -Total Receives: 3244 -Stores 2473 1 -Total Receives: 3245 -Stores 2474 1 -Total Receives: 3246 -Stores 2475 1 -Total Receives: 3247 -Stores 2476 1 -Total rewards: 3250 -Total Receives: 3248 -Stores 2477 1 -Total Receives: 3249 -Stores 2478 1 -Total Receives: 3250 -Stores 2479 1 -tensor([-0.0810, -0.0565]) -tensor([-57.4468, -57.4556, -57.4899, -57.4931, -57.4599, -57.4930]) -tensor([-31.6515, -31.6569]) -tensor([-45.5634, -45.6605, -45.5935, -45.5347, -45.5884, -45.5921, -45.5676, - -45.6529, -45.5849, -45.6275, -45.6145, -45.6037, -45.6245, -45.6604]) -tensor([-0.6830, -0.6675]) -tensor([-2.2358, -2.2594, -2.2271]) -tensor([-51.6496, -51.6583, -51.6942, -51.6622, -51.6942]) -Total rewards: 3259 -Total Receives: 3251 -Stores 2480 1 -Total Receives: 3252 -Stores 2481 1 -Total Receives: 3253 -Stores 2482 1 -Total Receives: 3254 -Stores 2483 1 -Total Receives: 3255 -Stores 2484 1 -Total Receives: 3256 -Stores 2485 1 -Total Receives: 3257 -Stores 2486 1 -Total Receives: 3258 -Stores 2487 1 -Total Receives: 3259 -Stores 2488 1 -tensor([-14.6895, -14.7201, -14.6940, -14.7201]) -tensor([-25.6868, -25.6755, -25.6871, -25.6504, -25.6995, -25.7150, -25.6964, - -25.6916]) -tensor([-40.2472, -40.3538, -40.2793, -40.3071, -40.2733, -40.2524, -40.3499, - -40.3436, -40.2702, -40.3160, -40.3019, -40.2903, -40.3119, -40.3539]) -Total rewards: 3266 -Total Receives: 3260 -Total Receives: 3261 -Total Receives: 3262 -Total Receives: 3263 -Total Receives: 3264 -Total Receives: 3265 -Total Receives: 3266 -tensor([-3.8140, -3.7817, -3.8280]) -tensor([-42.6928, -42.7073, -42.6891, -42.7112, -42.6924, -42.7006]) -Total rewards: 3273 -Total Receives: 3267 -Stores 2489 1 -Total Receives: 3268 -Stores 2490 1 -Total Receives: 3269 -Stores 2491 1 -Total Receives: 3270 -Stores 2492 1 -Total Receives: 3271 -Stores 2493 1 -Total Receives: 3272 -Stores 2494 1 -Total Receives: 3273 -Stores 2495 1 -tensor([-36.0551, -36.1615, -36.1134, -36.0802, -36.0601, -36.1565, -36.1502, - -36.0772, -36.1222, -36.1080, -36.0966, -36.1180, -36.1615]) -tensor([-3.2911, -3.3066]) -Total rewards: 3280 -Total Receives: 3274 -Stores 2496 1 -Total Receives: 3275 -Stores 2497 1 -Total Receives: 3276 -Stores 2498 1 -Total Receives: 3277 -Stores 2499 1 -Total Receives: 3278 -Stores 2500 1 -Total Receives: 3279 -Stores 2501 1 -Total Receives: 3280 -Stores 2502 1 -tensor([-21.3993, -21.4343, -21.4286, -21.4399]) -tensor([-23.1975, -23.1885, -23.1984, -23.2113, -23.2316, -23.2081, -23.2030]) -tensor([-27.2776, -27.3061, -27.3061]) -Total rewards: 3287 -Total Receives: 3281 -Stores 2503 1 -Total Receives: 3282 -Stores 2504 1 -Total Receives: 3283 -Stores 2505 1 -Total Receives: 3284 -Stores 2506 1 -Total Receives: 3285 -Stores 2507 1 -Total Receives: 3286 -Stores 2508 1 -Total Receives: 3287 -Stores 2509 1 -tensor([-4.1847, -4.1806, -4.1824, -4.1395, -4.1847]) -Total rewards: 3292 -Total Receives: 3288 -Stores 2510 1 -Total Receives: 3289 -Stores 2511 1 -Total Receives: 3290 -Stores 2512 1 -Total Receives: 3291 -Stores 2513 1 -Total Receives: 3292 -Stores 2514 1 -tensor([-0.0820, -0.0977]) -tensor([-0.1236, -0.1006, -0.1074, -0.1109, -0.0881]) -tensor([-0.0333, -0.0405]) -Total rewards: 3297 -Total Receives: 3293 -Total Receives: 3294 -Total Receives: 3295 -Total Receives: 3296 -Total Receives: 3297 -tensor([-33.5961, -33.5419, -33.5090, -33.4897, -33.5824, -33.5764, -33.5065, - -33.5495, -33.5368, -33.5261, -33.5425, -33.5961]) -tensor([-40.4099, -40.3989, -40.3799, -40.4011, -40.4153, -40.4214, -40.4048]) -tensor([-22.8036, -22.8155, -22.8195, -22.8195]) -tensor([-19.9467, -19.9804]) -tensor([-41.6683, -41.6134, -41.6638, -41.6736]) -tensor([-3.1341, -3.1219, -3.0898, -3.1346, -3.1341]) -Total rewards: 3305 -Total Receives: 3298 -Stores 2515 1 -Total Receives: 3299 -Stores 2516 1 -Total Receives: 3300 -Stores 2517 1 -Total Receives: 3301 -Stores 2518 1 -Total Receives: 3302 -Stores 2519 1 -Total Receives: 3303 -Stores 2520 1 -Total Receives: 3304 -Stores 2521 1 -Total Receives: 3305 -Stores 2522 1 -tensor([-39.0933, -39.1423, -39.1531]) -tensor([-1.7643, -1.7834]) -Total rewards: 3311 -Total Receives: 3306 -Stores 2523 1 -Total Receives: 3307 -Stores 2524 1 -Total Receives: 3308 -Stores 2525 1 -Total Receives: 3309 -Stores 2526 1 -Total Receives: 3310 -Stores 2527 1 -Total Receives: 3311 -Stores 2528 1 -Total rewards: 3319 -Total Receives: 3312 -Stores 2529 1 -Total Receives: 3313 -Stores 2530 1 -Total Receives: 3314 -Stores 2531 1 -Total Receives: 3315 -Stores 2532 1 -Total Receives: 3316 -Stores 2533 1 -Total Receives: 3317 -Stores 2534 1 -Total Receives: 3318 -Stores 2535 1 -Total Receives: 3319 -Stores 2536 1 -tensor([-14.6149, -14.6331, -14.5930, -14.6331]) -tensor([-3.9527, -3.9676, -3.9688]) -tensor([-24.7339, -24.7387, -24.7515, -24.7876, -24.7526, -24.7507]) -Total rewards: 3327 -Total Receives: 3320 -Stores 2537 1 -Total Receives: 3321 -Stores 2538 1 -Total Receives: 3322 -Stores 2539 1 -Total Receives: 3323 -Stores 2540 1 -Total Receives: 3324 -Stores 2541 1 -Total Receives: 3325 -Stores 2542 1 -Total Receives: 3326 -Stores 2543 1 -Total Receives: 3327 -Stores 2544 1 -tensor([-1.1942, -1.1759]) -tensor([-3.7837, -3.8244]) -tensor([-22.0829, -22.0960, -22.0901]) -tensor([-0.1429, -0.0921, -0.1169, -0.1318, -0.1274]) -Total rewards: 3331 -Total Receives: 3328 -Stores 2545 1 -Total Receives: 3329 -Stores 2546 1 -Total Receives: 3330 -Stores 2547 1 -Total Receives: 3331 -Stores 2548 1 -tensor([-27.0344, -27.0531, -27.0531]) -tensor([-0.0197, -0.0406]) -tensor([-44.9500, -44.9504, -44.9273, -44.9330, -44.9217, -44.9330, -44.9404, - -44.9288, -44.9554]) -tensor([-32.1323, -32.0988, -32.0790, -32.1813, -32.1642, -32.0943, -32.1399, - -32.1272, -32.1161, -32.1318, -32.2047]) -tensor([-0.7502, -0.7502]) -tensor([-5.2785, -5.2996, -5.2960]) -Total rewards: 3336 -Total Receives: 3332 -Stores 2549 1 -Total Receives: 3333 -Stores 2550 1 -Total Receives: 3334 -Stores 2551 1 -Total Receives: 3335 -Stores 2552 1 -Total Receives: 3336 -Stores 2553 1 -Total rewards: 3342 -Total Receives: 3337 -Stores 2554 1 -Total Receives: 3338 -Stores 2555 1 -Total Receives: 3339 -Stores 2556 1 -Total Receives: 3340 -Stores 2557 1 -Total Receives: 3341 -Stores 2558 1 -Total Receives: 3342 -Stores 2559 1 -tensor([-30.5078, -30.4755, -30.4568, -30.5568, -30.5387, -30.4712, -30.5007, - -30.4924, -30.5069, -30.5837]) -tensor([-14.8660, -14.8207, -14.8459, -14.8660, -14.9808]) -tensor([-3.4807, -3.4787, -3.4752, -3.4815]) -tensor([-0.1099, -0.0707, -0.0938]) -tensor([0.0059, 0.0031, 0.0036]) -tensor([-0.1053, -0.0653, -0.0666, -0.1046, -0.0854, -0.0488]) -tensor([-0.1821, -0.1869]) -tensor([0.0398, 0.0333]) -tensor([0.0252, 0.0259]) -tensor([ 0.0051, -0.0040, 0.0015]) -tensor([-0.0886, -0.0889, -0.1269, -0.1015, -0.0677]) -tensor([0.0265, 0.0262]) -Total rewards: 4837 -Total Receives: 4833 -Stores 3625 1 -Total Receives: 4834 -Stores 3626 1 -Total Receives: 4835 -Stores 3627 1 -Total Receives: 4836 -Stores 3628 1 -Total Receives: 4837 -Stores 3629 1 -tensor([0.0177, 0.0198]) -tensor([-2.9616, -2.9732, -3.0032, -2.9783, -3.0144, -3.0091, -2.9623, -3.0335, - -3.0629, -3.0137, -3.0629, -3.0425]) -tensor([-0.0199, -0.0235, -0.0215, -0.0220, -0.0210]) -Total rewards: 4841 -Total Receives: 4838 -Stores 3630 1 -Total Receives: 4839 -Stores 3631 1 -Total Receives: 4840 -Stores 3632 1 -Total Receives: 4841 -Stores 3633 1 -tensor([-0.0211, -0.0207, -0.0248, -0.0217]) -loss: 12.78576374053955, td_error: 12.78576374053955, entropy: 4.718231678009033 -Train step: 0.5288698673248291 Optimizer Step: 1 -loss: 27776142.0, td_error: 70602432512.0, entropy: 1.8125001192092896 -Train step: 0.6425478458404541 Optimizer Step: 2 -loss: 74.55108642578125, td_error: 519224448.0, entropy: 3.296875 -Train step: 1.3912060260772705 Optimizer Step: 3 -loss: 1.1767648458480835, td_error: 964680.25, entropy: 0.0 -Train step: 0.6564888954162598 Optimizer Step: 4 -loss: 653.296875, td_error: 400498432.0, entropy: 0.00032143527641892433 -Train step: 0.9877843856811523 Optimizer Step: 5 -loss: 19.488731384277344, td_error: 16211236.0, entropy: 2.99560546875 -Train step: 1.2516019344329834 Optimizer Step: 6 -loss: 0.04999544098973274, td_error: 8433.6181640625, entropy: 0.9839757084846497 -Train step: 0.9095120429992676 Optimizer Step: 7 -loss: 1.5784860849380493, td_error: 50040.5, entropy: 0.0 -Train step: 0.5644018650054932 Optimizer Step: 8 -loss: 8.345884323120117, td_error: 569084.625, entropy: 1.231515645980835 -Train step: 0.8739490509033203 Optimizer Step: 9 -loss: 217.82139587402344, td_error: 16442532.0, entropy: 3.610840082168579 -Train step: 1.186126947402954 Optimizer Step: 10 -tensor([13.0849, 13.0866]) -tensor([7.1688, 6.6379, 5.4956]) -tensor([16.2715, 16.0937]) -tensor([3.2309, 3.2126, 2.9526]) -Total rewards: 4847 -Total Receives: 4842 -Stores 3634 1 -Total Receives: 4843 -Stores 3635 1 -Total Receives: 4844 -Stores 3636 1 -Total Receives: 4845 -Stores 3637 1 -Total Receives: 4846 -Stores 3638 1 -Total Receives: 4847 -Stores 3639 1 -tensor([6.6049, 4.8692, 6.0524, 4.3313, 5.0020]) -tensor([10.4972, 8.7548, 9.5828, 8.0897]) -tensor([6.4525, 6.6612, 8.4271, 7.6726, 5.2655]) -tensor([28.9056, 28.1570, 28.8747, 27.6094, 27.2388, 28.8668, 26.7675, 26.1620, - 28.7826, 26.1620, 27.0561]) -tensor([2.3736, 1.5901, 1.7619]) -tensor([12.9261, 11.2897, 7.7155]) -Total rewards: 4849 -Total Receives: 4848 -Stores 3640 1 -Total Receives: 4849 -Stores 3641 1 -tensor([2.4972, 2.1256]) -tensor([2.3716, 2.0061]) -tensor([12.4932, 13.7007]) -Total rewards: 4858 -Total Receives: 4850 -Stores 3642 1 -Total Receives: 4851 -Stores 3643 1 -Total Receives: 4852 -Stores 3644 1 -Total Receives: 4853 -Stores 3645 1 -Total Receives: 4854 -Stores 3646 1 -Total Receives: 4855 -Stores 3647 1 -Total Receives: 4856 -Stores 3648 1 -Total Receives: 4857 -Stores 3649 1 -Total Receives: 4858 -Stores 3650 1 -tensor([5.0758, 6.3886, 4.4786, 5.2114, 4.0831]) -tensor([2.8224, 3.1402, 2.5257]) -tensor([8.4517, 7.8322, 7.9440]) -tensor([6.7882, 5.5328, 4.3994]) -loss: 2.0661516828113236e-05, td_error: 175.5941925048828, entropy: 2.5383799076080322 -Train step: 0.6119098663330078 Optimizer Step: 11 -tensor([2.8984, 2.3072]) -tensor([ 0.5834, -0.1997]) -tensor([10.4411, 9.5811, 8.1441]) -tensor([1.8869, 1.6210]) -tensor([5.2927, 3.7715]) -Total rewards: 4861 -Total Receives: 4859 -Stores 3651 1 -Total Receives: 4860 -Stores 3652 1 -Total Receives: 4861 -Stores 3653 1 -Total rewards: 4863 -Total Receives: 4862 -Stores 3654 1 -Total Receives: 4863 -Stores 3655 1 -tensor([3.5762, 2.8327, 2.8327]) -Total rewards: 4871 -Total Receives: 4864 -Stores 3656 1 -Total Receives: 4865 -Stores 3657 1 -Total Receives: 4866 -Stores 3658 1 -Total Receives: 4867 -Stores 3659 1 -Total Receives: 4868 -Stores 3660 1 -Total Receives: 4869 -Stores 3661 1 -Total Receives: 4870 -Stores 3662 1 -Total Receives: 4871 -Stores 3663 1 -tensor([1.8302, 5.7449, 1.9749, 1.6315]) -tensor([8.6619, 7.2341]) -Total rewards: 4875 -Total Receives: 4872 -Stores 3664 1 -Total Receives: 4873 -Stores 3665 1 -Total Receives: 4874 -Stores 3666 1 -Total Receives: 4875 -Stores 3667 1 -tensor([4.0478, 3.3849]) -tensor([0.8057, 1.1103, 0.8537, 1.1604, 1.4097]) -loss: 0.00013427493104245514, td_error: 1141.150390625, entropy: 2.5199718475341797 -Train step: 0.5023276805877686 Optimizer Step: 12 -tensor([4.2691, 4.1642, 4.2928, 4.0872]) -tensor([8.5988, 8.9025, 9.0615]) -Total rewards: 4880 -Total Receives: 4876 -Stores 3668 1 -Total Receives: 4877 -Stores 3669 1 -Total Receives: 4878 -Stores 3670 1 -Total Receives: 4879 -Stores 3671 1 -Total Receives: 4880 -Stores 3672 1 -tensor([9.8842, 7.0485, 7.7000, 8.5444, 7.2154]) -tensor([ 8.0521, 9.6813, 11.5550, 10.8280, 7.4343]) -Total rewards: 4887 -Total Receives: 4881 -Stores 3673 1 -Total Receives: 4882 -Stores 3674 1 -Total Receives: 4883 -Stores 3675 1 -Total Receives: 4884 -Stores 3676 1 -Total Receives: 4885 -Stores 3677 1 -Total Receives: 4886 -Stores 3678 1 -Total Receives: 4887 -Stores 3679 1 -tensor([17.6590, 16.0249]) -tensor([5.6285, 4.6630, 3.6167]) -Total rewards: 4893 -Total Receives: 4888 -Stores 3680 1 -Total Receives: 4889 -Stores 3681 1 -Total Receives: 4890 -Stores 3682 1 -Total Receives: 4891 -Stores 3683 1 -Total Receives: 4892 -Stores 3684 1 -Total Receives: 4893 -Stores 3685 1 -tensor([28.5249, 27.9059, 28.4984, 27.5501, 27.2357, 28.4732, 26.1153, 28.4635, - 26.1152, 27.1360]) -tensor([4.3903, 3.8692, 4.0185, 4.6051, 3.8142]) -tensor([4.0836, 3.4489]) -tensor([28.9222, 28.0748, 28.8812, 27.4627, 28.8797, 24.8661, 28.8216, 24.8661, - 26.7432]) -tensor([2.1800, 1.5754, 1.5188, 1.7581, 1.4726]) -Total rewards: 4897 -Total Receives: 4894 -Stores 3686 1 -Total Receives: 4895 -Stores 3687 1 -Total Receives: 4896 -Stores 3688 1 -Total Receives: 4897 -Stores 3689 1 -tensor([6.1550, 6.8127, 7.6457, 6.3076]) -tensor([8.1061, 8.1607, 8.5451, 8.8329]) -tensor([1.7506, 2.6775, 2.4060, 2.0261, 1.9364]) -loss: 1.6082354704849422e-05, td_error: 136.6776885986328, entropy: 1.624068260192871 -Train step: 0.5112438201904297 Optimizer Step: 13 -tensor([4.8911, 4.8911]) -tensor([21.7182, 22.4157, 21.2562, 22.4132, 19.4461, 22.3635, 19.4461, 20.7758]) -tensor([18.5086, 16.0094]) -tensor([7.2745, 5.3098]) -tensor([19.7616, 19.2659, 20.4235, 17.3601, 20.3859, 17.3601, 18.7611, 17.3601]) -tensor([4.4343, 4.8803, 4.7462, 4.0520, 3.9674, 4.2537]) -tensor([2.4690, 3.1504, 2.7848, 2.6832]) -tensor([4.3805, 4.7126, 4.4389, 4.7717, 3.4308]) -tensor([20.1875, 19.3989, 21.2474, 16.1625, 16.1625, 18.5015, 16.1625]) -tensor([5.2117, 6.4968, 7.4101, 4.3957, 4.5533, 4.3201, 4.7288, 4.3201]) -tensor([3.0089, 4.7093, 3.3586]) -Total rewards: 4902 -Total Receives: 4898 -Stores 3690 1 -Total Receives: 4899 -Stores 3691 1 -Total Receives: 4900 -Stores 3692 1 -Total Receives: 4901 -Stores 3693 1 -Total Receives: 4902 -Stores 3694 1 -Total rewards: 4909 -Total Receives: 4903 -Stores 3695 1 -Total Receives: 4904 -Stores 3696 1 -Total Receives: 4905 -Stores 3697 1 -Total Receives: 4906 -Stores 3698 1 -Total Receives: 4907 -Stores 3699 1 -Total Receives: 4908 -Stores 3700 1 -Total Receives: 4909 -Stores 3701 1 -loss: 1.5120891475817189e-05, td_error: 128.50657653808594, entropy: 2.2307307720184326 -Train step: 0.40218186378479004 Optimizer Step: 14 -tensor([4.8463, 5.3608, 4.9545]) -Total rewards: 4914 -Total Receives: 4910 -Stores 3702 1 -Total Receives: 4911 -Stores 3703 1 -Total Receives: 4912 -Stores 3704 1 -Total Receives: 4913 -Stores 3705 1 -Total Receives: 4914 -Stores 3706 1 -tensor([5.3725, 5.7336, 5.7950, 4.0651]) -tensor([7.4076, 4.1121, 3.9820]) -tensor([4.9993, 4.8788, 5.1057, 5.3752, 5.5310]) -tensor([7.0150, 8.6194, 9.7348, 5.9110, 5.6082, 6.3503, 5.6082]) -tensor([3.2293, 3.6014]) -tensor([5.4959, 4.9662, 4.9662]) -tensor([4.4060, 4.8414, 4.7174, 3.5863, 4.0212, 3.5514]) -Total rewards: 4922 -Total Receives: 4915 -Stores 3707 1 -Total Receives: 4916 -Stores 3708 1 -tensor([-12.0269, -12.0569, -12.0648, -12.0067]) -tensor([-29.8503, -29.8496, -29.8274, -29.8587, -29.8352, -29.8305, -29.8509, - -29.8405, -29.8290, -29.8615]) -tensor([-31.7254, -31.6990, -31.7292]) -tensor([-0.0564, -0.0564]) -tensor([-17.3346, -17.3367]) -tensor([-2.1676, -2.1981]) -Total rewards: 3349 -Total Receives: 3343 -Stores 2560 1 -Total Receives: 3344 -Stores 2561 1 -Total Receives: 3345 -Stores 2562 1 -Total Receives: 3346 -Stores 2563 1 -Total Receives: 3347 -Stores 2564 1 -Total Receives: 3348 -Stores 2565 1 -Total Receives: 3349 -Stores 2566 1 -tensor([-19.5761, -19.6341, -19.5996, -19.6299, -19.6469, -19.5918, -19.5876]) -Total rewards: 3351 -Total Receives: 3350 -Stores 2567 1 -Total Receives: 3351 -Stores 2568 1 -tensor([-0.1583, -0.1174, -0.1339, -0.1493, -0.1470]) -Total rewards: 3360 -Total Receives: 3352 -Stores 2569 1 -Total Receives: 3353 -Stores 2570 1 -Total Receives: 3354 -Stores 2571 1 -Total Receives: 3355 -Stores 2572 1 -Total Receives: 3356 -Stores 2573 1 -Total Receives: 3357 -Stores 2574 1 -Total Receives: 3358 -Stores 2575 1 -Total Receives: 3359 -Stores 2576 1 -Total Receives: 3360 -Stores 2577 1 -tensor([-10.0046, -10.0353, -10.0251, -10.0234, -10.0431]) -tensor([-19.4088, -19.4335, -19.4606, -19.4817, -19.4348, -19.4200]) -tensor([-21.3408, -21.3521, -21.3521]) -tensor([-16.5243, -16.5495, -16.5697, -16.5250, -16.5113]) -tensor([-0.1590, -0.1247, -0.1482, -0.1444]) -tensor([-31.4898, -31.4469, -31.4396, -31.5270, -31.5101, -31.4443, -31.4809, - -31.4731, -31.5751]) -tensor([-3.8594, -3.8554, -3.8529, -3.8683, -3.8619]) -tensor([-33.7043, -33.6856, -33.7203, -33.7183]) -tensor([-4.9081, -4.8923, -4.8908]) -tensor([-32.0419, -32.0374, -32.0151, -32.0501, -32.0203, -32.0503, -32.0183, - -32.0565, -32.0399, -32.0667, -32.0667]) -tensor([-0.0655, -0.0561]) -Total rewards: 3364 -Total Receives: 3361 -Stores 2578 1 -Total Receives: 3362 -Stores 2579 1 -Total Receives: 3363 -Stores 2580 1 -Total Receives: 3364 -Stores 2581 1 -tensor([-28.3220, -28.2809, -28.2736, -28.3593, -28.3427, -28.2781, -28.3066, - -28.4113]) -tensor([-3.9059, -3.9190, -3.8922]) -Total rewards: 3367 -Total Receives: 3365 -Stores 2582 1 -Total Receives: 3366 -Stores 2583 1 -Total Receives: 3367 -Stores 2584 1 -tensor([-4.7425, -3.8576, -4.7161, -4.7425, -4.7343, -4.8506, -4.7248, -4.7425]) -tensor([-21.4650, -21.4650]) -tensor([-3.8531, -3.8573]) -tensor([-32.5876, -32.5816, -32.5948, -32.5649, -32.6001, -32.5590, -32.6031, - -32.5871, -32.6185, -32.6185]) -tensor([-28.1743, -28.1330, -28.2109, -28.1950, -28.1304, -28.1592, -28.2691]) -tensor([-3.1174, -3.1366, -3.1052]) -Total rewards: 3376 -Total Receives: 3368 -Stores 2585 1 -Total Receives: 3369 -Stores 2586 1 -Total Receives: 3370 -Stores 2587 1 -Total Receives: 3371 -Stores 2588 1 -Total Receives: 3372 -Stores 2589 1 -Total Receives: 3373 -Stores 2590 1 -Total Receives: 3374 -Stores 2591 1 -Total Receives: 3375 -Stores 2592 1 -Total Receives: 3376 -Stores 2593 1 -tensor([-11.7833, -11.7808, -11.7885, -11.7984, -11.7925, -11.8059]) -tensor([-16.5486, -16.4766, -16.5087, -16.5497, -16.4783, -16.5431]) -tensor([-26.7141, -26.7076, -26.7204, -26.6908, -26.7254, -26.7303, -26.7146, - -26.7475, -26.7475]) -tensor([-0.1190, -0.0898, -0.1047, -0.0425, -0.0499]) -tensor([-14.9721, -14.9742]) -tensor([-3.3999, -3.4015, -3.3937, -3.4150]) -tensor([-34.4663, -34.5128, -34.5023, -34.5207]) -tensor([-0.0296, -0.0259]) -tensor([-26.5814, -26.5349, -26.6139, -26.5967, -26.5619, -26.6765, -26.6851]) -tensor([-3.6962, -3.6898, -3.6605, -3.6414, -3.6962, -3.6709, -3.6717, -3.6819, - -3.6962]) -tensor([-0.0503, -0.0497]) -tensor([-2.9315, -2.8962, -2.8816, -2.9315, -2.9046, -2.9062, -2.9152, -2.9315]) -tensor([-15.6938, -15.6962, -15.6058, -15.6514, -15.7081, -15.6006, -15.7032]) -tensor([-12.3630, -12.3562, -12.3739, -12.3684, -12.3952, -12.3911]) -Total rewards: 3383 -Total Receives: 3377 -Stores 2594 1 -Total Receives: 3378 -Stores 2595 1 -Total Receives: 3379 -Stores 2596 1 -Total Receives: 3380 -Stores 2597 1 -Total Receives: 3381 -Stores 2598 1 -Total Receives: 3382 -Stores 2599 1 -Total Receives: 3383 -Stores 2600 1 -tensor([-10.8846, -10.8953, -10.8899, -10.9177, -10.9128]) -tensor([-22.8845, -22.8425, -22.9317, -22.8963, -22.9726, -22.9854]) -tensor([-0.1348, -0.0930, -0.1176, -0.0674]) -Total rewards: 3389 -Total Receives: 3384 -Stores 2601 1 -Total Receives: 3385 -Stores 2602 1 -Total Receives: 3386 -Stores 2603 1 -Total Receives: 3387 -Stores 2604 1 -Total Receives: 3388 -Stores 2605 1 -Total Receives: 3389 -Stores 2606 1 -tensor([-5.2365, -5.2556]) -tensor([-23.0754, -23.0611, -23.0933]) -tensor([-30.9718, -30.9633, -30.9778, -30.9465, -30.9795, -31.0262, -30.9914, - -31.0261, -31.0261, -31.0222]) -tensor([-4.1527, -4.1606]) -tensor([-21.3050, -21.2625, -21.3331, -21.3886, -21.4075, -21.4070]) -tensor([-34.8839, -34.8700, -34.9064]) -tensor([-17.7771, -17.7905, -17.7863, -17.7948]) -tensor([-5.0687, -5.0950, -5.0722, -5.1046, -5.1006, -5.0779, -5.0989]) -tensor([-16.5930, -16.6077, -16.5355, -16.5602, -16.6369, -16.6136]) -Total rewards: 3392 -Total Receives: 3390 -Stores 2607 1 -Total Receives: 3391 -Stores 2608 1 -Total Receives: 3392 -Stores 2609 1 -tensor([-0.0528, -0.0459]) -Total rewards: 3400 -Total Receives: 3393 -Stores 2610 1 -Total Receives: 3394 -Stores 2611 1 -Total Receives: 3395 -Stores 2612 1 -Total Receives: 3396 -Stores 2613 1 -Total Receives: 3397 -Stores 2614 1 -Total Receives: 3398 -Stores 2615 1 -Total Receives: 3399 -Stores 2616 1 -Total Receives: 3400 -Stores 2617 1 -tensor([-26.1952, -26.1956, -26.1668, -26.2432, -26.1919, -26.2334, -26.2095, - -26.2446, -26.2496, -26.2496, -26.2358]) -tensor([-23.3931, -23.3942, -23.3622, -23.4455, -23.3913, -23.4354, -23.4086, - -23.4470, -23.4531, -23.4375]) -tensor([-4.1034, -4.0624, -4.0939, -4.0426, -4.1034, -4.0570, -4.0580, -4.0693, - -4.1034, -4.1034]) -tensor([-9.1650, -9.1625, -9.1991, -9.2075, -9.1887]) -tensor([-3.1533, -3.1048, -3.1375, -3.1533, -3.1190, -3.0998, -3.1101, -3.1533, - -3.1533]) -tensor([-16.2560, -16.2682, -16.2821]) -tensor([-0.0619, -0.0606]) -Total rewards: 3406 -Total Receives: 3401 -Stores 2618 1 -Total Receives: 3402 -Stores 2619 1 -Total Receives: 3403 -Stores 2620 1 -Total Receives: 3404 -Stores 2621 1 -Total Receives: 3405 -Stores 2622 1 -Total Receives: 3406 -Stores 2623 1 -tensor([-20.4803, -20.5316, -20.5865, -20.6032, -20.6190, -20.6181]) -tensor([-0.1634, -0.1249, -0.1511]) -tensor([-21.2001, -21.2188]) -tensor([-0.1026, -0.0938, -0.1170]) -tensor([-0.0662, -0.0821]) -tensor([-40.5530, -40.6078]) -tensor([-2.2198, -2.2478, -2.2751, -2.2347, -2.2153, -2.2237, -2.2751, -2.2751, - -2.2751]) -tensor([-2.5792, -2.5743]) -tensor([-3.4840, -3.5001, -3.4745, -3.5135, -3.4979, -3.5068]) -tensor([-20.0676, -20.1259, -20.1419, -20.1636, -20.1560]) -tensor([-0.0345, -0.0398, -0.0340]) -tensor([-16.7533, -16.8179, -16.8376, -16.8560]) -tensor([-20.5389, -20.5248, -20.5328]) -tensor([-0.0389, -0.0430]) -Total rewards: 3413 -Total Receives: 3407 -Stores 2624 1 -Total Receives: 3408 -Stores 2625 1 -Total Receives: 3409 -Stores 2626 1 -Total Receives: 3410 -Stores 2627 1 -Total Receives: 3411 -Stores 2628 1 -Total Receives: 3412 -Stores 2629 1 -Total Receives: 3413 -Stores 2630 1 -tensor([-16.2219, -16.2182, -16.2468, -16.2022, -16.2710, -16.2200]) -tensor([-20.1416, -20.1754]) -tensor([-76.7558, -76.7671, -76.8370, -76.8147, -76.7513, -76.8021, -76.0712, - -76.8167, -76.8471, -76.8129]) -tensor([-13.2333, -13.2292, -13.2561, -13.2796, -13.2446]) -tensor([-1.5456, -1.5647, -1.5880, -1.5527, -1.5557, -1.5880, -1.5880, -1.5880]) -tensor([-10.5029, -10.4530, -10.5481, -10.5138]) -Total rewards: 3416 -Total Receives: 3414 -Stores 2631 1 -Total Receives: 3415 -Stores 2632 1 -Total Receives: 3416 -Stores 2633 1 -tensor([-2.8243, -2.8133]) -tensor([-0.0762, -0.0655]) -tensor([-16.1887, -16.1997, -16.2261, -16.2238, -16.2194, -16.2297]) -tensor([-23.5196, -23.5289, -23.5918, -23.5725, -23.5136, -23.5779, -23.5756, - -23.6089, -23.5718]) -tensor([-0.1896, -0.1777]) -tensor([-13.1258, -13.0213, -13.1254, -13.0880, -13.0939, -13.1219]) -tensor([-16.9682, -16.9861]) -tensor([-53.6489, -53.6092]) -tensor([-11.3893, -11.2766, -11.3480, -11.3538, -11.3838]) -tensor([-4.0946, -4.1242, -4.1241, -4.1494, -4.1208, -4.1168]) -tensor([-0.0495, -0.0525, -0.0483, -0.0399]) -Total Receives: 4917 -Stores 3709 1 -Total Receives: 4918 -Stores 3710 1 -Total Receives: 4919 -Stores 3711 1 -Total Receives: 4920 -Stores 3712 1 -Total Receives: 4921 -Stores 3713 1 -Total Receives: 4922 -Stores 3714 1 -tensor([5.6204, 5.5238, 4.5103, 5.8424, 4.3479]) -tensor([4.5123, 4.9476, 4.8261, 3.6540, 3.5658]) -tensor([5.2596, 5.1134, 5.3766, 5.6858]) -tensor([4.8766, 3.0475, 3.2679, 4.9848, 3.3101]) -loss: 2.1256855688989162e-05, td_error: 180.65374755859375, entropy: 2.450289726257324 -Train step: 0.5829308032989502 Optimizer Step: 15 -Total rewards: 4929 -Total Receives: 4923 -Stores 3715 1 -Total Receives: 4924 -Stores 3716 1 -Total Receives: 4925 -Stores 3717 1 -Total Receives: 4926 -Stores 3718 1 -Total Receives: 4927 -Stores 3719 1 -Total Receives: 4928 -Stores 3720 1 -Total Receives: 4929 -Stores 3721 1 -tensor([200.4652, 200.5154]) -tensor([6.1201, 6.4468, 4.2918]) -tensor([6.8551, 6.3472, 6.3472]) -tensor([6.3393, 6.9569, 5.8589]) -tensor([6.1969, 6.5158, 6.4094]) -tensor([2.7044, 3.7362, 3.6313]) -tensor([4.8426, 3.8187]) -tensor([8.1197, 6.6986, 8.6251, 8.4909, 6.4864]) -tensor([16.7463, 16.2954, 17.5721, 13.6692, 13.8048, 13.9481, 15.8053, 13.6692, - 13.6692]) -tensor([7.2059, 6.9970, 7.3775]) -tensor([8.3487, 7.3538]) -Total rewards: 4934 -Total Receives: 4930 -Stores 3722 1 -Total Receives: 4931 -Stores 3723 1 -Total Receives: 4932 -Stores 3724 1 -Total Receives: 4933 -Stores 3725 1 -Total Receives: 4934 -Stores 3726 1 -tensor([5.3565, 5.9704]) -loss: 4.695842562796315e-06, td_error: 39.90814208984375, entropy: 3.07738995552063 -Train step: 0.33255696296691895 Optimizer Step: 16 -tensor([6.6246, 4.1603, 4.4048, 4.0660]) -tensor([4.0028, 4.1418, 4.4293, 6.1608, 4.4548]) -tensor([4.6045, 4.7181, 6.7058, 6.5602]) -tensor([16.6788, 15.8228, 11.9786, 12.3015, 12.5500, 14.8954, 11.9786, 11.9786]) -tensor([10.9450, 8.3312, 12.7830, 9.5151, 8.2661, 10.0916, 8.2661]) -Total rewards: 4941 -Total Receives: 4935 -Stores 3727 1 -Total Receives: 4936 -Stores 3728 1 -Total Receives: 4937 -Stores 3729 1 -Total Receives: 4938 -Stores 3730 1 -Total Receives: 4939 -Stores 3731 1 -Total Receives: 4940 -Stores 3732 1 -Total Receives: 4941 -Stores 3733 1 -tensor([139.7004, 139.6216, 138.7759, 137.7431]) -tensor([3.7670, 3.9207, 4.2383, 4.2732]) -tensor([4.3407, 5.3613]) -tensor([3.6529, 3.9254, 3.2369]) -Total rewards: 4946 -Total Receives: 4942 -Stores 3734 1 -Total Receives: 4943 -Stores 3735 1 -Total Receives: 4944 -Stores 3736 1 -Total Receives: 4945 -Stores 3737 1 -Total Receives: 4946 -Stores 3738 1 -tensor([7.0498, 9.3048, 9.1405]) -Total rewards: 4952 -Total Receives: 4947 -Stores 3739 1 -Total Receives: 4948 -Stores 3740 1 -Total Receives: 4949 -Stores 3741 1 -Total Receives: 4950 -Stores 3742 1 -Total Receives: 4951 -Stores 3743 1 -Total Receives: 4952 -Stores 3744 1 -tensor([17.9281, 16.8856, 11.7436, 12.0959, 15.5897, 11.0460, 11.0460]) -tensor([6.7082, 6.5730, 6.0874]) -loss: 1.8399357941234484e-05, td_error: 59.45797348022461, entropy: 2.831089496612549 -Train step: 0.4841952323913574 Optimizer Step: 17 -tensor([8.1736, 8.1221, 8.1221]) -tensor([2.0036, 2.4039]) -tensor([2.2590, 2.4841, 2.7427, 2.7528, 1.8731]) -tensor([8.3951, 7.0890, 8.7079, 6.1446, 6.7865, 6.1446]) -tensor([6.1663, 8.2829]) -tensor([3.8678, 2.9009, 4.0048, 4.3188]) -tensor([9.3623, 6.6982, 8.0071, 6.2853, 6.0998, 8.5652, 6.0998]) -tensor([7.7183, 6.7242, 5.2979, 5.1444, 5.1444]) -tensor([3.2317, 3.4743, 3.7816, 2.5601]) -tensor([7.2059, 5.4303, 5.1397, 5.1397]) -tensor([2.0606, 1.8484, 1.9136, 1.7060]) -Total rewards: 4957 -Total Receives: 4953 -Stores 3745 1 -Total Receives: 4954 -Stores 3746 1 -Total Receives: 4955 -Stores 3747 1 -Total Receives: 4956 -Stores 3748 1 -Total Receives: 4957 -Stores 3749 1 -tensor([3.4663, 3.7853]) -tensor([2.1750, 2.3478, 3.4869, 3.8159]) -Total rewards: 4961 -Total Receives: 4958 -Stores 3750 1 -Total Receives: 4959 -Stores 3751 1 -Total Receives: 4960 -Stores 3752 1 -Total Receives: 4961 -Stores 3753 1 -tensor([194.3303, 194.3303]) -Total rewards: 4967 -Total Receives: 4962 -Stores 3754 1 -Total Receives: 4963 -Stores 3755 1 -Total Receives: 4964 -Stores 3756 1 -Total Receives: 4965 -Stores 3757 1 -Total Receives: 4966 -Stores 3758 1 -Total Receives: 4967 -Stores 3759 1 -loss: 0.00011228544462937862, td_error: 954.2705078125, entropy: 2.5806851387023926 -Train step: 0.5620839595794678 Optimizer Step: 18 -tensor([3.8711, 4.1321, 2.8562]) -tensor([7.2402, 5.8117, 6.6439]) -tensor([4.5973, 6.0519, 6.4444]) -tensor([6.9492, 5.7314]) -Total rewards: 4974 -Total Receives: 4968 -Stores 3760 1 -Total Receives: 4969 -Stores 3761 1 -Total Receives: 4970 -Stores 3762 1 -Total Receives: 4971 -Stores 3763 1 -Total Receives: 4972 -Stores 3764 1 -Total Receives: 4973 -Stores 3765 1 -Total Receives: 4974 -Stores 3766 1 -tensor([5.0684, 4.8778]) -tensor([5.9216, 5.3406]) -tensor([4.0221, 2.6164]) -Total rewards: 4979 -Total Receives: 4975 -Stores 3767 1 -Total Receives: 4976 -Stores 3768 1 -Total Receives: 4977 -Stores 3769 1 -Total Receives: 4978 -Stores 3770 1 -Total Receives: 4979 -Stores 3771 1 -tensor([12.3919, 11.1306, 11.4283, 11.6136]) -tensor([3.9566, 3.9306, 3.6606, 4.3144]) -tensor([14.5085, 12.8023, 10.7929, 12.5156, 10.7929]) -Total rewards: 4985 -Total Receives: 4980 -Stores 3772 1 -Total Receives: 4981 -Stores 3773 1 -Total Receives: 4982 -Stores 3774 1 -Total Receives: 4983 -Stores 3775 1 -Total Receives: 4984 -Stores 3776 1 -Total Receives: 4985 -Stores 3777 1 -tensor([21.3567, 15.8712, 16.2538, 19.9444, 14.3093, 14.3093]) -loss: 0.00010264123557135463, td_error: 872.3079833984375, entropy: 1.9472132921218872 -Train step: 0.8755142688751221 Optimizer Step: 19 -Total rewards: 4992 -Total Receives: 4986 -Stores 3778 1 -Total Receives: 4987 -Stores 3779 1 -Total Receives: 4988 -Stores 3780 1 -Total Receives: 4989 -Stores 3781 1 -Total Receives: 4990 -Stores 3782 1 -Total Receives: 4991 -Stores 3783 1 -Total Receives: 4992 -Stores 3784 1 -tensor([3.0311, 3.5833]) -tensor([16.7133, 11.2250, 11.6096, 9.3563, 9.3563]) -tensor([5.0884, 6.6034]) -tensor([4.9438, 4.5391]) -tensor([8.3348, 6.2977, 8.0235, 6.2977]) -tensor([ 9.5559, 10.7556, 9.0843, 8.3956, 8.0406, 11.2696, 8.0406]) -tensor([3.7503, 2.8592, 3.6792, 3.4667]) -tensor([6.6333, 8.3542, 7.1953]) -Total rewards: 4998 -Total Receives: 4993 -Stores 3785 1 -Total Receives: 4994 -Stores 3786 1 -Total Receives: 4995 -Stores 3787 1 -Total Receives: 4996 -Stores 3788 1 -Total Receives: 4997 -Stores 3789 1 -Total Receives: 4998 -Stores 3790 1 -tensor([10.1611, 8.6444, 8.6444]) -tensor([2.2995, 1.6236, 1.7699, 1.9016, 2.6376, 2.7087, 2.5161]) -Total rewards: 5005 -Total Receives: 4999 -Stores 3791 1 -Total Receives: 5000 -Stores 3792 1 -Total Receives: 5001 -Stores 3793 1 -Total Receives: 5002 -Stores 3794 1 -Total Receives: 5003 -Stores 3795 1 -Total Receives: 5004 -Stores 3796 1 -Total Receives: 5005 -Stores 3797 1 -tensor([10.5896, 8.9320, 8.2525, 7.7133, 11.1039, 7.7133]) -tensor([2.7678, 2.7870, 2.4856]) -loss: 9.921124728862196e-06, td_error: 84.31578826904297, entropy: 1.6116958856582642 -Train step: 0.5943589210510254 Optimizer Step: 20 -tensor([-1.3722, -1.1757, -1.6515, -1.0803]) -tensor([2.8687, 1.6338, 1.7582]) -tensor([4.8447, 4.7392]) -tensor([3.4875, 2.9664, 2.7802]) -Total rewards: 5011 -Total Receives: 5006 -Stores 3798 1 -Total Receives: 5007 -Stores 3799 1 -Total Receives: 5008 -Stores 3800 1 -Total Receives: 5009 -Stores 3801 1 -Total Receives: 5010 -Stores 3802 1 -Total Receives: 5011 -Stores 3803 1 -tensor([5.9189, 6.3360, 4.1282, 3.5294, 3.8558, 3.5294, 3.5294]) -Total rewards: 5013 -Total Receives: 5012 -Stores 3804 1 -Total Receives: 5013 -Stores 3805 1 -tensor([7.7575, 5.8728, 5.1013, 5.5733, 5.1013, 5.1013]) -tensor([ 0.3465, -0.2368, 0.5523, -0.2130, 0.0656]) -Total rewards: 5018 -Total Receives: 5014 -Stores 3806 1 -Total Receives: 5015 -Stores 3807 1 -Total Receives: 5016 -Stores 3808 1 -Total Receives: 5017 -Stores 3809 1 -Total Receives: 5018 -Stores 3810 1 -tensor([2.6775, 2.3558]) -loss: 4.466313839657232e-06, td_error: 37.957462310791016, entropy: 2.080873489379883 -Train step: 0.4810490608215332 Optimizer Step: 21 -tensor([4.3668, 6.2283, 5.0926]) -tensor([9.8692, 6.8875, 7.5159, 6.8875, 6.8875]) -Total rewards: 5022 -Total Receives: 5019 -Stores 3811 1 -Total Receives: 5020 -Stores 3812 1 -Total Receives: 5021 -Stores 3813 1 -Total Receives: 5022 -Stores 3814 1 -tensor([4.2489, 4.9330, 4.7119, 3.1795, 3.1795]) -tensor([-13.8112, -13.8418, -13.8443, -13.8394, -13.8341, -13.8462]) -tensor([-3.1230, -3.1497]) -tensor([-2.9652, -2.9854]) -tensor([-9.7975, -9.7550, -9.7615, -9.7927]) -tensor([-27.5972, -27.6109, -27.6004]) -tensor([-8.1910, -8.3117, -8.2640, -8.3075]) -tensor([-0.0739, -0.1236, -0.1139, -0.1107]) -tensor([-1.8983, -1.9307, -1.8935, -1.8926, -1.9307, -1.9307, -1.9307]) -tensor([-7.1870, -7.1718, -7.1994, -7.2024]) -tensor([-46.1165, -46.1479]) -tensor([-10.7825, -10.8133, -10.8378, -10.8443, -10.8153, -10.8307]) -tensor([-8.8434, -8.8834, -8.8314, -8.8565, -8.8477]) -tensor([-15.3944, -15.3990, -15.4516, -15.4351, -15.4423, -15.4385, -15.4794, - -15.4372]) -tensor([-3.1628, -3.1479, -3.2823]) -tensor([-1.6444, -1.6473]) -tensor([-3.4420, -3.4743, -3.4731, -3.5000, -3.4779, -3.5246]) -tensor([-2.5094, -2.5112, -2.5347, -2.5159, -2.5577]) -tensor([-7.6948, -7.7622, -7.7347, -7.7295]) -tensor([-0.0480, -0.0795, -0.0764]) -tensor([-0.1348, -0.1357]) -tensor([-0.0420, -0.0736, -0.0625]) -Total rewards: 3419 -Total Receives: 3417 -Stores 2634 1 -Total Receives: 3418 -Stores 2635 1 -Total Receives: 3419 -Stores 2636 1 -tensor([-6.9498, -6.9567, -6.8986, -6.9412]) -tensor([-0.1047, -0.1084, -0.1112]) -tensor([-6.1569, -6.1692, -6.1834]) -tensor([-10.0430, -10.0382, -10.0474, -10.0505, -10.0379]) -tensor([-23.0022, -23.0024, -23.0145, -23.0148]) -tensor([-2.5659, -2.5808, -2.6073]) -tensor([-21.5420, -21.5539, -21.5543]) -tensor([-1.0892, -1.0441, -1.0907, -1.0398, -1.0754, -1.0907, -1.0907, -1.0907]) -Total rewards: 3426 -Total Receives: 3420 -Stores 2637 1 -Total Receives: 3421 -Stores 2638 1 -Total Receives: 3422 -Stores 2639 1 -Total Receives: 3423 -Stores 2640 1 -Total Receives: 3424 -Stores 2641 1 -Total Receives: 3425 -Stores 2642 1 -Total Receives: 3426 -Stores 2643 1 -tensor([-1.9778, -1.9964, -1.9846, -2.0187]) -tensor([-9.3876, -9.4155, -9.4218, -9.3937, -9.4085, -9.4218]) -Total rewards: 3432 -Total Receives: 3427 -Stores 2644 1 -Total Receives: 3428 -Stores 2645 1 -Total Receives: 3429 -Stores 2646 1 -Total Receives: 3430 -Stores 2647 1 -Total Receives: 3431 -Stores 2648 1 -Total Receives: 3432 -Stores 2649 1 -tensor([-2.6670, -2.6734, -2.6804]) -Total rewards: 3436 -Total Receives: 3433 -Stores 2650 1 -Total Receives: 3434 -Stores 2651 1 -Total Receives: 3435 -Stores 2652 1 -Total Receives: 3436 -Stores 2653 1 -tensor([-0.0734, -0.0606]) -tensor([-10.9387, -10.9460]) -tensor([-8.2293, -8.2339, -8.2125, -8.2260, -8.2329]) -tensor([-19.0747, -19.0768, -19.0712]) -tensor([-9.2255, -9.2998, -9.3348, -9.2594]) -tensor([-5.9832, -5.9789, -5.9960, -5.9722]) -Total rewards: 3440 -Total Receives: 3437 -Stores 2654 1 -Total Receives: 3438 -Stores 2655 1 -Total Receives: 3439 -Stores 2656 1 -Total Receives: 3440 -Stores 2657 1 -Total rewards: 3448 -Total Receives: 3441 -Stores 2658 1 -Total Receives: 3442 -Stores 2659 1 -Total Receives: 3443 -Stores 2660 1 -Total Receives: 3444 -Stores 2661 1 -Total Receives: 3445 -Stores 2662 1 -Total Receives: 3446 -Stores 2663 1 -Total Receives: 3447 -Stores 2664 1 -Total Receives: 3448 -Stores 2665 1 -tensor([-16.8216, -16.7271, -16.7874, -16.7576, -16.7665, -16.1094, -16.8293, - -16.8310, -15.9871, -16.8215, -16.8293]) -tensor([-5.8465, -5.8227, -5.8289, -5.8208, -5.8421]) -tensor([-0.0528, -0.0749, -0.0782]) -tensor([-9.3851, -9.3806]) -tensor([-0.0303, -0.0273]) -tensor([-4.0026, -4.0174]) -tensor([-3.3239, -3.3033, -3.3205, -3.3114]) -tensor([-0.0491, -0.0343, -0.0298, -0.0437, -0.0297]) -Total rewards: 3457 -Total Receives: 3449 -Stores 2666 1 -Total Receives: 3450 -Stores 2667 1 -Total Receives: 3451 -Stores 2668 1 -Total Receives: 3452 -Stores 2669 1 -Total Receives: 3453 -Stores 2670 1 -Total Receives: 3454 -Stores 2671 1 -Total Receives: 3455 -Stores 2672 1 -Total Receives: 3456 -Stores 2673 1 -Total Receives: 3457 -Stores 2674 1 -tensor([-7.1431, -7.1684, -7.1710, -7.0831]) -tensor([-0.0381, -0.0630, -0.0703]) -tensor([-5.1884, -5.1859, -5.1997]) -tensor([-4.1548, -4.1695]) -tensor([-9.9436, -9.9387, -9.9461, -9.9507]) -tensor([-0.0314, -0.0284, -0.0428, -0.0281]) -tensor([-3.1693, -3.1753, -3.1091, -3.1952, -3.1535, -3.1952, -3.1952, -3.1952]) -tensor([-5.6136, -5.6142, -5.6337]) -tensor([-5.3116, -5.3323, -5.3357, -5.3351]) -Total rewards: 3459 -Total Receives: 3458 -Stores 2675 1 -Total Receives: 3459 -Stores 2676 1 -Total rewards: 3464 -Total Receives: 3460 -Stores 2677 1 -Total Receives: 3461 -Stores 2678 1 -Total Receives: 3462 -Stores 2679 1 -Total Receives: 3463 -Total Receives: 3464 -tensor([-4.7688, -4.7447, -4.7959]) -tensor([-5.9362, -5.9730, -5.9957]) -tensor([-3.4392, -3.4397, -3.4434, -3.4465, -3.4160, -3.4512]) -tensor([-17.5841, -17.6039, -17.5956]) -tensor([-0.0367, -0.0728]) -tensor([-5.4766, -5.4558, -5.4648, -5.4730]) -tensor([-7.8370, -7.8772, -7.8717]) -Total rewards: 3470 -Total Receives: 3465 -Stores 2680 1 -Total Receives: 3466 -Stores 2681 1 -Total Receives: 3467 -Stores 2682 1 -Total Receives: 3468 -Stores 2683 1 -Total Receives: 3469 -Stores 2684 1 -Total Receives: 3470 -Stores 2685 1 -tensor([-10.4504, -10.4609, -10.4556]) -tensor([-7.2981, -7.3335]) -tensor([-18.8479, -18.7736, -18.8255, -18.7994, -18.8145, -18.8307, -18.8800, - -18.8633, -18.8586, -18.8800]) -tensor([-9.0985, -9.1099, -9.0949, -9.1132]) -Total rewards: 3477 -Total Receives: 3471 -Stores 2686 1 -Total Receives: 3472 -Stores 2687 1 -Total Receives: 3473 -Stores 2688 1 -Total Receives: 3474 -Stores 2689 1 -Total Receives: 3475 -Stores 2690 1 -Total Receives: 3476 -Stores 2691 1 -Total Receives: 3477 -Stores 2692 1 -tensor([-5.4260, -5.4143, -5.4216]) -tensor([-3.8708, -3.8628, -3.8873, -3.8643, -3.8873, -3.8873, -3.8873]) -tensor([-16.7781, -16.7871, -16.7594, -16.7972, -16.8048]) -tensor([-8.2643, -8.2881, -8.2800]) -tensor([-5.7637, -5.7593, -5.7715]) -tensor([-5.3203, -5.3215, -5.3206, -5.3287, -5.3481, -5.3481]) -tensor([-16.2995, -16.2714, -16.1739, -16.2336, -16.2036, -16.2908, -16.2193, - -16.3013, -16.2765, -16.2689, -16.3013]) -tensor([-5.6960, -5.6990, -5.7472]) -tensor([-0.0726, -0.0674, -0.0782, -0.0445]) -Total rewards: 3484 -Total Receives: 3478 -Stores 2693 1 -Total Receives: 3479 -Stores 2694 1 -Total Receives: 3480 -Stores 2695 1 -Total Receives: 3481 -Stores 2696 1 -Total Receives: 3482 -Stores 2697 1 -Total Receives: 3483 -Stores 2698 1 -Total Receives: 3484 -Stores 2699 1 -tensor([-4.7525, -4.7753, -4.7710, -4.7610]) -tensor([-2.9198, -2.9379]) -tensor([-6.8702, -6.8947]) -tensor([-11.7009, -11.6965]) -tensor([-5.2101, -5.2179]) -tensor([-4.9702, -4.9772]) -tensor([-3.2156, -3.2432, -3.2066, -3.2432, -3.2432, -3.2432]) -tensor([-4.4086, -4.4490, -4.4657, -4.4612]) -Total rewards: 3491 -Total Receives: 3485 -Stores 2700 1 -Total Receives: 3486 -Stores 2701 1 -Total Receives: 3487 -Stores 2702 1 -Total Receives: 3488 -Stores 2703 1 -Total Receives: 3489 -Stores 2704 1 -Total Receives: 3490 -Stores 2705 1 -Total Receives: 3491 -Stores 2706 1 -tensor([-8.8681, -8.8511, -8.8521, -8.8895]) -tensor([-9.7858, -9.7733]) -tensor([-7.0915, -7.0897, -7.0870, -7.1122]) -tensor([-4.8423, -4.8506]) -tensor([-6.4440, -6.4424, -6.5857]) -tensor([-3.6267, -3.6252, -3.6234]) -tensor([-4.5790, -4.5885, -4.5965, -4.6114, -4.5893, -4.6177]) -tensor([-4.7946, -4.7963]) -tensor([-7.3009, -7.3145, -7.2971, -7.2927]) -tensor([-2.4879, -2.5123, -2.5123, -2.5123, -2.5123]) -tensor([-0.0998, -0.1119, -0.1179, -0.1260]) -tensor([-3.6234, -3.6334, -3.6401, -3.6333, -3.6646, -3.6640]) -tensor([-8.5437, -8.5491, -8.5490, -8.5527, -8.5514, -8.5511]) -tensor([-0.0685, -0.0903, -0.0989]) -tensor([-2.8334, -2.8442, -2.8393]) -tensor([-5.4544, -5.4162, -5.4144]) -tensor([-7.1695, -7.1843, -7.1913]) -Total rewards: 3500 -Total Receives: 3492 -Stores 2707 1 -Total Receives: 3493 -Stores 2708 1 -Total Receives: 3494 -Stores 2709 1 -Total Receives: 3495 -Stores 2710 1 -Total Receives: 3496 -Stores 2711 1 -Total Receives: 3497 -Stores 2712 1 -Total Receives: 3498 -Stores 2713 1 -Total Receives: 3499 -Stores 2714 1 -Total Receives: 3500 -Stores 2715 1 -tensor([-19.4583, -19.4619, -19.4333, -19.5016]) -tensor([-16.1421, -16.1054, -16.0553, -16.0299, -16.1308, -16.0481, -16.1269, - -16.1588, -16.1625, -16.1100, -16.1020, -16.1625]) -tensor([-0.0584, -0.0594, -0.0443]) -tensor([-6.2558, -6.2201]) -tensor([-2.3654, -2.3588]) -tensor([-9.8197, -9.7972]) -tensor([4.3731, 4.5268, 4.6703, 5.4387, 5.5449, 5.3621]) -Total rewards: 5028 -Total Receives: 5023 -Stores 3815 1 -Total Receives: 5024 -Stores 3816 1 -Total Receives: 5025 -Stores 3817 1 -Total Receives: 5026 -Stores 3818 1 -Total Receives: 5027 -Stores 3819 1 -Total Receives: 5028 -Stores 3820 1 -tensor([ 8.8342, 12.0036, 9.7287, 8.8342, 9.3710]) -tensor([4.7828, 4.7759]) -tensor([8.0724, 7.3676]) -tensor([10.3126, 7.9401, 7.6889]) -tensor([4.5820, 5.2637, 5.0408, 3.3902]) -tensor([5.7067, 5.0050]) -tensor([3.3987, 3.6277]) -tensor([7.2840, 5.7801, 5.1361, 3.7939, 3.7939, 3.9625]) -tensor([5.4119, 5.9856, 3.7505]) -tensor([5.9395, 7.8503]) -tensor([2.7657, 1.7168]) -tensor([3.6185, 3.7870]) -loss: 1.0898074833676219e-05, td_error: 92.6185073852539, entropy: 2.20635724067688 -Train step: 0.37872910499572754 Optimizer Step: 22 -tensor([7.9213, 9.2271, 7.9213, 8.8957]) -tensor([5.3679, 3.4153]) -tensor([3.4096, 5.9727, 3.6997]) -tensor([2.5053, 1.5943, 1.8093, 1.7948]) -tensor([1.9188, 2.2545]) -tensor([-0.0305, -0.7076, -0.6635, -0.8390, -0.3111, -0.8037, -0.7502]) -Total rewards: 5035 -Total Receives: 5029 -Stores 3821 1 -Total Receives: 5030 -Stores 3822 1 -Total Receives: 5031 -Stores 3823 1 -Total Receives: 5032 -Stores 3824 1 -Total Receives: 5033 -Stores 3825 1 -Total Receives: 5034 -Stores 3826 1 -Total Receives: 5035 -Stores 3827 1 -Total rewards: 5042 -Total Receives: 5036 -Stores 3828 1 -Total Receives: 5037 -Stores 3829 1 -Total Receives: 5038 -Stores 3830 1 -Total Receives: 5039 -Stores 3831 1 -Total Receives: 5040 -Stores 3832 1 -Total Receives: 5041 -Stores 3833 1 -Total Receives: 5042 -Stores 3834 1 -tensor([2.2856, 2.2565, 3.7428]) -tensor([8.9377, 8.2474, 6.5220, 6.3866, 6.3866, 6.9373]) -tensor([ 9.4141, 11.1133, 9.4141, 9.4141]) -tensor([10.1209, 8.4583, 8.2492, 8.2491, 8.8236]) -tensor([2.7250, 2.8561, 2.9838, 1.5414, 3.7252, 3.6493, 1.3866, 1.3012]) -Total rewards: 5046 -Total Receives: 5043 -Stores 3835 1 -Total Receives: 5044 -Stores 3836 1 -Total Receives: 5045 -Stores 3837 1 -Total Receives: 5046 -Stores 3838 1 -loss: 2.6782013264892157e-06, td_error: 22.7609920501709, entropy: 3.5150246620178223 -Train step: 0.37822389602661133 Optimizer Step: 23 -tensor([3.5364, 3.8218, 4.7114]) -tensor([10.6620, 10.2287, 10.2287, 11.2077]) -tensor([2.3971, 2.2726]) -Total rewards: 5051 -Total Receives: 5047 -Stores 3839 1 -Total Receives: 5048 -Stores 3840 1 -Total Receives: 5049 -Stores 3841 1 -Total Receives: 5050 -Stores 3842 1 -Total Receives: 5051 -Stores 3843 1 -tensor([3.8720, 3.3330]) -Total rewards: 5059 -Total Receives: 5052 -Stores 3844 1 -Total Receives: 5053 -Stores 3845 1 -Total Receives: 5054 -Stores 3846 1 -Total Receives: 5055 -Stores 3847 1 -Total Receives: 5056 -Stores 3848 1 -Total Receives: 5057 -Stores 3849 1 -Total Receives: 5058 -Stores 3850 1 -Total Receives: 5059 -Stores 3851 1 -tensor([4.3299, 3.4588]) -Total rewards: 5066 -Total Receives: 5060 -Stores 3852 1 -Total Receives: 5061 -Stores 3853 1 -Total Receives: 5062 -Stores 3854 1 -Total Receives: 5063 -Stores 3855 1 -Total Receives: 5064 -Stores 3856 1 -Total Receives: 5065 -Stores 3857 1 -Total Receives: 5066 -Stores 3858 1 -tensor([ 0.3634, 0.3107, 0.0752, 0.7447, 0.1463, 0.2394, -0.0797]) -tensor([ 9.1393, 9.1393, 11.0063, 9.8197]) -tensor([1.9540, 1.9610]) -tensor([4.5070, 4.7541, 4.7289]) -tensor([5.4545, 3.8839]) -tensor([6.7401, 7.7080]) -Total rewards: 5071 -Total Receives: 5067 -Stores 3859 1 -Total Receives: 5068 -Stores 3860 1 -Total Receives: 5069 -Stores 3861 1 -Total Receives: 5070 -Stores 3862 1 -Total Receives: 5071 -Stores 3863 1 -loss: 7.066181296977447e-06, td_error: 60.05272674560547, entropy: 2.0690536499023438 -Train step: 0.5605840682983398 Optimizer Step: 24 -tensor([4.2507, 3.1770]) -tensor([10.1667, 9.3889, 9.2096, 9.2096]) -Total rewards: 5073 -Total Receives: 5072 -Stores 3864 1 -Total Receives: 5073 -Stores 3865 1 -tensor([2.8312, 3.8772, 3.8505]) -tensor([3.0804, 4.2245]) -tensor([3.2157, 3.3415, 3.4229, 2.1112, 4.1783, 4.1736, 1.9493]) -tensor([11.5660, 11.1815, 11.1815, 11.1815]) -tensor([2.5222, 1.8031]) -tensor([ 9.8435, 9.8435, 11.2311]) -loss: 2.516081167414086e-06, td_error: 21.383193969726562, entropy: 4.166140556335449 -Train step: 0.44002294540405273 Optimizer Step: 25 -tensor([5.7644, 4.2612]) -tensor([2.2847, 2.0149]) -tensor([4.6599, 2.2583]) -tensor([3.2754, 3.4853, 1.7616]) -tensor([ 0.3984, 0.3417, 0.0661, 0.2678, 0.2934, -0.0847]) -tensor([0.9263, 0.6297]) -tensor([3.3529, 3.2617, 2.8344, 3.1317, 1.5522, 3.1460]) -tensor([9.6694, 9.6694]) -tensor([6.6128, 6.1155, 6.4378, 4.6504, 6.3951]) -tensor([3.6284, 3.7430, 3.8170, 2.4786, 4.5696, 2.3930]) -tensor([3.2575, 2.9252, 2.3206]) -tensor([1.7149, 1.6343]) -tensor([6.1893, 6.3158, 6.3901, 4.8484, 3.4043, 4.6249]) -loss: 0.35718685388565063, td_error: 71308.0546875, entropy: 1.3328696489334106 -Train step: 1.3316919803619385 Optimizer Step: 26 -tensor([3.1403, 2.2849, 2.4897]) -tensor([23.0681, 21.4663, 21.4663, 22.2453]) -tensor([18.3891, 18.5806, 18.9027, 14.5709, 16.2618]) -tensor([1.5806, 1.7934]) -tensor([3.2655, 2.4301]) -tensor([7.1171, 5.8396, 5.4873]) -tensor([6.8094, 9.4529, 7.3621]) -tensor([16.8520, 17.0328, 12.5816, 14.4606]) -tensor([7.6362, 5.8992]) -tensor([2.1172, 1.7125]) -tensor([2.2640, 2.5276]) -tensor([21.8313, 20.8376, 20.8376, 20.8376, 20.8376]) -Total rewards: 5079 -Total Receives: 5074 -Stores 3866 1 -Total Receives: 5075 -Stores 3867 1 -Total Receives: 5076 -Stores 3868 1 -Total Receives: 5077 -Stores 3869 1 -Total Receives: 5078 -Stores 3870 1 -Total Receives: 5079 -Stores 3871 1 -tensor([9.5246, 9.6479, 9.8855, 9.2028]) -tensor([3.7056, 2.9871]) -tensor([8.1151, 7.5574]) -tensor([15.7581, 16.4009]) -loss: 6.743703852407634e-05, td_error: 573.1212768554688, entropy: 2.8332157135009766 -Train step: 0.46921706199645996 Optimizer Step: 27 -tensor([23.5108, 23.5108, 25.1180]) -tensor([20.2831, 19.1464, 18.7695, 18.7695, 18.7695]) -tensor([0.8433, 0.5677]) -tensor([21.7773, 22.0966, 19.6022, 21.8884, 18.6450, 18.5357]) -tensor([7.1045, 7.2755, 7.9251]) -tensor([8.0298, 6.9906]) -tensor([4.1505, 3.4851]) -tensor([22.2508, 22.2508]) -tensor([4.1577, 3.6631, 3.6627, 4.3391]) -tensor([12.8764, 13.0711, 12.8275, 12.3609]) -Total rewards: 5085 -Total Receives: 5080 -Stores 3872 1 -Total Receives: 5081 -Stores 3873 1 -Total Receives: 5082 -Stores 3874 1 -Total Receives: 5083 -Stores 3875 1 -Total Receives: 5084 -Stores 3876 1 -Total Receives: 5085 -Stores 3877 1 -tensor([1.9373, 1.8709]) -Total rewards: 5089 -Total Receives: 5086 -Stores 3878 1 -Total Receives: 5087 -Stores 3879 1 -Total Receives: 5088 -Stores 3880 1 -Total Receives: 5089 -Stores 3881 1 -tensor([3.7196, 3.5128, 4.7610]) -tensor([22.9118, 23.2297, 23.0096, 19.6602, 19.3179]) -tensor([14.3471, 14.2921, 13.6696]) -loss: 9.091981155506801e-06, td_error: 77.26921844482422, entropy: 2.310525417327881 -Train step: 0.34337306022644043 Optimizer Step: 28 -tensor([3.5789, 2.9609, 2.9839, 2.7926]) -tensor([12.8888, 13.0241, 12.3302, 12.5600, 11.6965]) -tensor([25.0692, 22.7777, 22.7777, 22.7777]) -tensor([22.1842, 22.5141, 18.7745, 18.1678]) -Total rewards: 5093 -Total Receives: 5090 -Stores 3882 1 -Total Receives: 5091 -Stores 3883 1 -Total Receives: 5092 -Stores 3884 1 -Total Receives: 5093 -Stores 3885 1 -tensor([3.5965, 5.6506]) -tensor([4.4035, 4.0172]) -tensor([3.0931, 2.5898, 2.5898]) -tensor([2.7595, 1.4879]) -tensor([22.2312, 22.2312, 22.2312]) -tensor([20.7457, 20.8782, 16.3111, 15.2494]) -tensor([3.2124, 3.1114]) -tensor([21.5629, 21.5629]) -Total rewards: 5097 -Total Receives: 5094 -Stores 3886 1 -Total Receives: 5095 -Stores 3887 1 -Total Receives: 5096 -Stores 3888 1 -Total Receives: 5097 -Stores 3889 1 -tensor([5.2686, 5.7780, 5.8777, 5.7655]) -Total rewards: 5102 -Total Receives: 5098 -Stores 3890 1 -Total Receives: 5099 -Stores 3891 1 -Total Receives: 5100 -Stores 3892 1 -Total Receives: 5101 -Stores 3893 1 -Total Receives: 5102 -Stores 3894 1 -tensor([4.2544, 4.7704]) -tensor([3.6816, 3.7321, 3.5049]) -tensor([14.4714, 13.7353, 13.9826, 12.7481]) -tensor([1.7141, 2.5561]) -Total rewards: 5109 -Total Receives: 5103 -Stores 3895 1 -Total Receives: 5104 -Stores 3896 1 -Total Receives: 5105 -Stores 3897 1 -Total Receives: 5106 -Stores 3898 1 -Total Receives: 5107 -Stores 3899 1 -Total Receives: 5108 -Stores 3900 1 -Total Receives: 5109 -Stores 3901 1 -tensor([4.5273, 4.3378]) -tensor([11.9921, 7.5617, 8.3898, 7.4152, 7.4152]) -tensor([-13.2663, -13.2307, -13.1825, -13.2545, -13.1758, -13.2490, -13.2849, - -13.2885, -13.2346, -13.2389, -13.2885]) -tensor([-4.6197, -4.6222, -4.5936, -4.6142, -4.5811, -4.6222]) -Total rewards: 3505 -Total Receives: 3501 -Stores 2716 1 -Total Receives: 3502 -Stores 2717 1 -Total Receives: 3503 -Stores 2718 1 -Total Receives: 3504 -Stores 2719 1 -Total Receives: 3505 -Stores 2720 1 -Total rewards: 3513 -Total Receives: 3506 -Stores 2721 1 -Total Receives: 3507 -Stores 2722 1 -Total Receives: 3508 -Stores 2723 1 -Total Receives: 3509 -Stores 2724 1 -Total Receives: 3510 -Stores 2725 1 -Total Receives: 3511 -Stores 2726 1 -Total Receives: 3512 -Stores 2727 1 -Total Receives: 3513 -Stores 2728 1 -tensor([-5.9821, -5.9657, -5.9876, -6.0032]) -tensor([-0.0278, -0.0299, -0.0092]) -Total rewards: 3522 -Total Receives: 3514 -Stores 2729 1 -Total Receives: 3515 -Stores 2730 1 -Total Receives: 3516 -Stores 2731 1 -Total Receives: 3517 -Stores 2732 1 -Total Receives: 3518 -Stores 2733 1 -Total Receives: 3519 -Stores 2734 1 -Total Receives: 3520 -Stores 2735 1 -Total Receives: 3521 -Stores 2736 1 -Total Receives: 3522 -Stores 2737 1 -tensor([-2.0030, -2.0030, -2.0030, -2.0030, -2.0016]) -tensor([-5.0481, -5.0575, -5.0234, -5.0429, -5.0575]) -tensor([-3.4567, -3.4616, -3.4633, -3.4960, -3.5048]) -tensor([-0.0265, -0.0355]) -Total rewards: 3527 -Total Receives: 3523 -Stores 2738 1 -Total Receives: 3524 -Stores 2739 1 -Total Receives: 3525 -Stores 2740 1 -Total Receives: 3526 -Stores 2741 1 -Total Receives: 3527 -Stores 2742 1 -tensor([-3.1476, -3.1431, -3.1347]) -tensor([-0.0980, -0.0823]) -tensor([-7.7665, -7.7659, -7.7611]) -tensor([-8.1337, -8.1024, -8.1551, -8.1731]) -tensor([-34.1123, -34.1069, -34.1052, -34.0943, -34.1024, -34.0999, -33.2823]) -tensor([-2.4661, -2.4609]) -Total rewards: 3535 -Total Receives: 3528 -Stores 2743 1 -Total Receives: 3529 -Stores 2744 1 -Total Receives: 3530 -Stores 2745 1 -Total Receives: 3531 -Stores 2746 1 -Total Receives: 3532 -Stores 2747 1 -Total Receives: 3533 -Stores 2748 1 -Total Receives: 3534 -Stores 2749 1 -Total Receives: 3535 -Stores 2750 1 -tensor([-19.9013, -19.9086, -19.9428, -19.9676]) -tensor([-0.0520, -0.0541]) -tensor([-1.7706, -1.7597]) -tensor([-2.4752, -2.4425, -2.4399, -2.4426, -2.4655, -2.4753]) -tensor([-0.0631, -0.0526]) -tensor([-15.9668, -15.9271, -15.8787, -15.9925, -15.9613, -15.9783, -15.9827, - -16.0111, -15.9293, -15.9303, -16.0111]) -tensor([-1.6504, -1.6504, -1.6504, -1.6504, -1.6491]) -tensor([-6.4658, -6.4410, -6.4522, -6.4687]) -tensor([-1.7452, -1.7046, -1.7051, -1.7322, -1.7481, -1.7454]) -tensor([-0.1323, -0.1317]) -tensor([-0.1259, -0.1141]) -tensor([-0.3550, -0.3603]) -tensor([-0.0516, -0.0427]) -tensor([-1.2136, -1.1817, -1.1822, -1.2046, -1.2145]) -tensor([-5.7099, -5.6833, -5.7179]) -tensor([-1.8791, -1.8961]) -tensor([-4.6661, -4.6408]) -tensor([-7.1619, -7.1436, -7.2094, -7.1825]) -tensor([-10.8423, -10.8521, -10.8404, -10.8433, -10.8293, -10.8355]) -tensor([-19.4037, -19.4096, -19.4775]) -tensor([-7.0425, -7.0368, -7.0570]) -tensor([-5.9233, -5.9517, -5.9114, -5.9322, -5.9351]) -tensor([-13.2251, -13.1834, -13.2474, -13.2178, -13.2335, -13.2438, -13.2758, - -13.2008, -13.1927, -13.2752, -13.2758]) -tensor([-2.1277, -2.1301, -2.1277, -2.1277, -2.1277, -2.1269, -2.1277]) -tensor([-1.7298, -1.6846, -1.7180, -1.7309, -1.7446]) -tensor([-0.1584, -0.1564]) -Total rewards: 3538 -Total Receives: 3536 -Stores 2751 1 -Total Receives: 3537 -Stores 2752 1 -Total Receives: 3538 -Stores 2753 1 -tensor([-5.2844, -5.2727]) -tensor([-15.0986, -15.0933]) -tensor([-0.0614, -0.0495]) -Total rewards: 3547 -Total Receives: 3539 -Stores 2754 1 -Total Receives: 3540 -Stores 2755 1 -Total Receives: 3541 -Stores 2756 1 -Total Receives: 3542 -Stores 2757 1 -Total Receives: 3543 -Stores 2758 1 -Total Receives: 3544 -Stores 2759 1 -Total Receives: 3545 -Stores 2760 1 -Total Receives: 3546 -Stores 2761 1 -Total Receives: 3547 -Stores 2762 1 -tensor([-6.5549, -6.6166, -6.5891]) -tensor([-0.0375, -0.0237]) -Total rewards: 3552 -Total Receives: 3548 -Stores 2763 1 -Total Receives: 3549 -Stores 2764 1 -Total Receives: 3550 -Stores 2765 1 -Total Receives: 3551 -Stores 2766 1 -Total Receives: 3552 -Stores 2767 1 -Total rewards: 3557 -Total Receives: 3553 -Stores 2768 1 -Total Receives: 3554 -Stores 2769 1 -Total Receives: 3555 -Stores 2770 1 -Total Receives: 3556 -Stores 2771 1 -Total Receives: 3557 -Stores 2772 1 -tensor([-11.6871, -11.7107]) -tensor([-14.6225, -14.6430, -14.6006, -14.6294, -14.6397, -14.6830, -14.5959, - -14.5883, -14.6712, -14.6830]) -tensor([-18.9142, -18.9828, -18.9519, -19.0091]) -tensor([-11.6290, -11.6452, -11.6050, -11.6346, -11.6444, -11.6910, -11.6031, - -11.6745, -11.6910]) -Total rewards: 3559 -Total Receives: 3558 -Stores 2773 1 -Total Receives: 3559 -Stores 2774 1 -Total rewards: 3566 -Total Receives: 3560 -Stores 2775 1 -Total Receives: 3561 -Stores 2776 1 -Total Receives: 3562 -Stores 2777 1 -Total Receives: 3563 -Stores 2778 1 -Total Receives: 3564 -Stores 2779 1 -Total Receives: 3565 -Stores 2780 1 -Total Receives: 3566 -Stores 2781 1 -tensor([-1.9501, -1.9341, -1.9431, -1.9617]) -tensor([-2.7439, -2.7402]) -tensor([-10.2409, -10.2518, -10.2138, -10.2445, -10.2543, -10.3008, -10.2826, - -10.3008]) -tensor([-0.1253, -0.1233]) -Total rewards: 3573 -Total Receives: 3567 -Stores 2782 1 -Total Receives: 3568 -Stores 2783 1 -Total Receives: 3569 -Stores 2784 1 -Total Receives: 3570 -Stores 2785 1 -Total Receives: 3571 -Stores 2786 1 -Total Receives: 3572 -Stores 2787 1 -Total Receives: 3573 -Stores 2788 1 -tensor([-12.9940, -12.9739, -12.9602, -12.9642, -12.9519, -12.9580]) -tensor([-2.7366, -2.7399]) -tensor([-4.1320, -4.0780, -4.1251, -4.0800, -4.1000, -4.0996, -4.1297]) -tensor([-4.4954, -4.5389, -4.5318, -4.5398]) -tensor([-0.0540, -0.0361]) -tensor([-8.4121, -8.4269, -8.4156, -8.4239, -8.4828, -8.4565, -8.4828]) -tensor([-13.4037, -13.4012]) -tensor([-2.5437, -2.5211, -2.5437, -2.5437, -2.5437, -2.5288, -2.5410, -2.5437]) -Total rewards: 3574 -Total Receives: 3574 -Stores 2789 1 -tensor([-0.0007, 0.0032]) -Total rewards: 3584 -Total Receives: 3575 -Stores 2790 1 -Total Receives: 3576 -Stores 2791 1 -Total Receives: 3577 -Stores 2792 1 -Total Receives: 3578 -Stores 2793 1 -Total Receives: 3579 -Stores 2794 1 -Total Receives: 3580 -Stores 2795 1 -Total Receives: 3581 -Stores 2796 1 -Total Receives: 3582 -Stores 2797 1 -Total Receives: 3583 -Stores 2798 1 -Total Receives: 3584 -Stores 2799 1 -tensor([-5.4634, -5.4546]) -tensor([-7.5842, -7.5624, -7.5774, -7.6283, -7.6073, -7.6283]) -tensor([-12.1602, -12.1776, -12.1230, -12.1278, -12.1158, -12.1215]) -tensor([-2.1573, -2.1355]) -tensor([-8.9295, -8.9217]) -tensor([-5.5647, -5.5658, -5.5513, -5.5432, -5.5677]) -tensor([-5.4228, -5.4266, -5.4335]) -Total rewards: 3594 -Total Receives: 3585 -Stores 2800 1 -Total Receives: 3586 -Stores 2801 1 -Total Receives: 3587 -Stores 2802 1 -Total Receives: 3588 -Stores 2803 1 -Total Receives: 3589 -Stores 2804 1 -Total Receives: 3590 -Stores 2805 1 -Total Receives: 3591 -Stores 2806 1 -Total Receives: 3592 -Stores 2807 1 -Total Receives: 3593 -Stores 2808 1 -Total Receives: 3594 -Stores 2809 1 -tensor([-14.8669, -14.8160, -14.8694, -14.8741, -14.8741]) -tensor([-1.4431, -1.4328, -1.4503]) -Total rewards: 3600 -Total Receives: 3595 -Stores 2810 1 -Total Receives: 3596 -Stores 2811 1 -Total Receives: 3597 -Stores 2812 1 -Total Receives: 3598 -Stores 2813 1 -Total Receives: 3599 -Stores 2814 1 -Total Receives: 3600 -Stores 2815 1 -tensor([-11.1417, -11.1101, -11.1070, -11.1555, -11.1721, -11.1102, -11.1077, - -11.1688]) -tensor([-0.0756, -0.0590]) -tensor([-2.5240, -2.5240, -2.5240, -2.5240, -2.5140, -2.5112, -2.5046, -2.5240]) -tensor([-0.0101, -0.0225, -0.0180]) -Total rewards: 3607 -Total Receives: 3601 -Stores 2816 1 -Total Receives: 3602 -Stores 2817 1 -Total Receives: 3603 -Stores 2818 1 -Total Receives: 3604 -Stores 2819 1 -Total Receives: 3605 -Stores 2820 1 -Total Receives: 3606 -Stores 2821 1 -Total Receives: 3607 -Stores 2822 1 -tensor([-7.0011, -6.1724, -7.0659, -6.9675, -6.9978, -7.0159, -6.9833, -7.0103]) -tensor([-7.9480, -8.0126, -7.9523, -8.0119, -7.9808, -8.0120]) -Total rewards: 3611 -Total Receives: 3608 -Stores 2823 1 -Total Receives: 3609 -Stores 2824 1 -Total Receives: 3610 -Stores 2825 1 -Total Receives: 3611 -Stores 2826 1 -tensor([-5.8238, -5.8418]) -Total rewards: 3615 -Total Receives: 3612 -Stores 2827 1 -Total Receives: 3613 -Stores 2828 1 -Total Receives: 3614 -Stores 2829 1 -Total Receives: 3615 -Stores 2830 1 -tensor([-0.0069, -0.0013]) -tensor([-14.8284, -14.8250, -14.7997, -14.8424]) -tensor([-0.0041, -0.0048]) -tensor([-0.0087, -0.0139]) -tensor([-10.6532, -10.6160, -10.6802, -10.6759, -10.6139, -10.6737, -10.6139, - -10.6932]) -tensor([-12.2135, -12.0039]) -tensor([-17.8419, -17.8417, -17.8479, -17.8479]) -tensor([-1.7379, -1.7064, -1.7379, -1.7379, -1.7173, -1.7090, -1.7033, -1.7379]) -tensor([-6.2827, -6.2485, -6.2670, -6.2776, -6.2984, -6.2722, -6.2951, -6.3223, - -6.3142]) -Total rewards: 3618 -Total Receives: 3616 -Stores 2831 1 -Total Receives: 3617 -Stores 2832 1 -Total Receives: 3618 -Stores 2833 1 -tensor([-13.2765, -13.2718, -13.2901, -13.2972]) -Total rewards: 3620 -Total Receives: 3619 -Stores 2834 1 -Total Receives: 3620 -Stores 2835 1 -tensor([-6.3083, -6.3018, -6.3008, -6.3026, -6.2949, -6.3286]) -tensor([-1.4181, -1.4239]) -tensor([-0.0067, -0.0110, -0.0059, -0.0177]) -Total rewards: 3624 -Total Receives: 3621 -Stores 2836 1 -Total Receives: 3622 -Stores 2837 1 -Total Receives: 3623 -Stores 2838 1 -Total Receives: 3624 -Stores 2839 1 -tensor([-7.1918, -7.1220, -7.2140, -7.1560, -7.2140]) -tensor([-5.8434, -5.8645, -5.8147, -5.8645]) -tensor([-7.1393, -7.1382]) -tensor([-4.7454, -4.7685, -4.7685]) -Total rewards: 3632 -Total Receives: 3625 -Stores 2840 1 -Total Receives: 3626 -Stores 2841 1 -Total Receives: 3627 -Stores 2842 1 -Total Receives: 3628 -Stores 2843 1 -Total Receives: 3629 -Stores 2844 1 -Total Receives: 3630 -Stores 2845 1 -Total Receives: 3631 -Stores 2846 1 -Total Receives: 3632 -Stores 2847 1 -tensor([-34.8091, -34.8250, -34.8349]) -tensor([-5.1682, -5.1282, -5.1490, -5.1651, -5.1815, -5.1514, -5.1801, -5.2070, - -5.2083]) -tensor([-0.0013, 0.0004]) -tensor([-4.0798, -4.0366, -4.0600, -4.0736, -4.0916, -4.0922, -4.1091, -4.1090]) -tensor([-58.8311, -58.8379, -58.8757, -58.1305, -58.9951, -58.8813]) -tensor([-9.4988, -9.5149, -9.4648, -9.5150, -9.5092, -9.5205, -9.4608, -9.5383]) -tensor([-0.0175, -0.0201, -0.0289]) -tensor([-8.0579, -8.0706, -8.0719]) -tensor([-6.5481, -6.5505, -6.5408, -6.5645, -6.5855]) -tensor([-5.4609, -5.4497]) -Total rewards: 3640 -Total Receives: 3633 -Stores 2848 1 -Total Receives: 3634 -Stores 2849 1 -Total Receives: 3635 -Stores 2850 1 -Total Receives: 3636 -Stores 2851 1 -Total Receives: 3637 -Stores 2852 1 -Total Receives: 3638 -Stores 2853 1 -Total Receives: 3639 -Stores 2854 1 -Total Receives: 3640 -Stores 2855 1 -tensor([-14.5920, -14.6033, -14.6309]) -tensor([-2.0956, -2.0728, -2.0956, -2.0956, -2.0526, -2.0377, -2.0956]) -Total rewards: 3645 -Total Receives: 3641 -Stores 2856 1 -Total Receives: 3642 -Stores 2857 1 -Total Receives: 3643 -Stores 2858 1 -Total Receives: 3644 -Stores 2859 1 -Total Receives: 3645 -Stores 2860 1 -tensor([-0.0350, -0.0395]) -tensor([-4.3772, -4.3835, -4.3791, -4.3757]) -tensor([-9.0854, -9.1043, -9.1107]) -tensor([-18.4577, -18.4023, -18.4643, -18.4467, -18.4223, -18.4503, -18.4546]) -tensor([-9.0531, -9.0445, -9.0556, -9.0903]) -tensor([-0.0817, -0.0838, -0.0807]) -tensor([-0.1462, -0.1484, -0.1440]) -Total rewards: 3651 -Total Receives: 3646 -Stores 2861 1 -Total Receives: 3647 -Stores 2862 1 -Total Receives: 3648 -Stores 2863 1 -Total Receives: 3649 -Stores 2864 1 -Total Receives: 3650 -Stores 2865 1 -Total Receives: 3651 -Stores 2866 1 -tensor([-7.1051, -7.1082, -7.1145, -7.1145]) -tensor([-0.0970, -0.1029]) -Total rewards: 3656 -Total Receives: 3652 -Stores 2867 1 -Total Receives: 3653 -Stores 2868 1 -Total Receives: 3654 -Stores 2869 1 -Total Receives: 3655 -Stores 2870 1 -Total Receives: 3656 -Stores 2871 1 -tensor([-5.5466, -5.5373, -5.5334, -5.5492]) -tensor([-0.0797, -0.0853, -0.0782]) -tensor([-17.6414, -17.6862]) -Total rewards: 3660 -Total Receives: 3657 -Stores 2872 1 -Total Receives: 3658 -Stores 2873 1 -Total Receives: 3659 -Stores 2874 1 -Total Receives: 3660 -Stores 2875 1 -tensor([-28.4505, -28.4753, -28.4067, -28.4826, -28.4877, -28.4052, -28.5264]) -tensor([-3.4410, -3.4769, -3.4677]) -tensor([-7.4656, -7.4174, -7.4297, -7.4489, -7.4670, -7.5183, -7.4926, -7.5079, - -7.5347]) -tensor([-5.7249, -5.7135, -5.7118]) -tensor([-5.6745, -5.6452, -5.6936, -5.6763, -5.7187, -5.6969, -5.7102, -5.7343]) -tensor([-4.2199, -4.2365, -4.2215, -4.2685, -4.2448, -4.2594, -4.2827]) -tensor([-2.2186, -2.1824, -2.2186, -2.2186, -2.1622, -2.2186]) -Total rewards: 3663 -Total Receives: 3661 -Stores 2876 1 -Total Receives: 3662 -Stores 2877 1 -Total Receives: 3663 -Stores 2878 1 -tensor([-4.6974, -4.6858]) -tensor([-18.7121, -18.7166, -18.7241, -18.7010, -18.7289, -18.6844, -18.7057, - -18.7366, -18.7080]) -tensor([-8.0767, -8.0810, -8.1334]) -tensor([-9.7852, -9.8094, -9.8094]) -Total rewards: 3671 -Total Receives: 3664 -Stores 2879 1 -Total Receives: 3665 -Stores 2880 1 -Total Receives: 3666 -Stores 2881 1 -Total Receives: 3667 -Stores 2882 1 -Total Receives: 3668 -Stores 2883 1 -Total Receives: 3669 -Stores 2884 1 -Total Receives: 3670 -Stores 2885 1 -Total Receives: 3671 -Stores 2886 1 -tensor([-3.3762, -3.3631, -3.4012, -3.3881, -3.3925, -3.4260]) -tensor([-1.7167, -1.7173]) -tensor([-0.0997, -0.1158, -0.1081]) -tensor([-0.0136, -0.0199, -0.0152, -0.0098]) -tensor([-3.5615, -3.5470, -3.5626]) -tensor([-1.0200, -1.0164]) -tensor([-9.2432, -9.2587, -9.2038, -9.2646, -9.2648, -9.3184]) -Total rewards: 3679 -Total Receives: 3672 -Stores 2887 1 -Total Receives: 3673 -Stores 2888 1 -Total Receives: 3674 -Stores 2889 1 -Total Receives: 3675 -Stores 2890 1 -Total Receives: 3676 -Stores 2891 1 -Total Receives: 3677 -Stores 2892 1 -Total Receives: 3678 -Stores 2893 1 -Total Receives: 3679 -Stores 2894 1 -tensor([-7.6288, -7.6576, -7.6576]) -tensor([-4.0396, -4.0220, -4.0367]) -tensor([-0.0054, -0.0145, -0.0071]) -tensor([-19.2289, -19.2226, -19.2406, -19.2206, -19.2345, -19.2117, -19.2416, - -19.2132]) -tensor([-7.8150, -7.8179]) -tensor([-2.4065, -2.4009]) -tensor([-19.1822, -19.2246, -19.2134]) -tensor([-0.0008, 0.0022]) -tensor([-10.0600, -10.0664, -10.0664]) -tensor([-0.0640, -0.0752, -0.0540, -0.0750]) -Total rewards: 3687 -Total Receives: 3680 -Stores 2895 1 -Total Receives: 3681 -Stores 2896 1 -Total Receives: 3682 -Stores 2897 1 -Total Receives: 3683 -Stores 2898 1 -Total Receives: 3684 -Stores 2899 1 -Total Receives: 3685 -Stores 2900 1 -Total Receives: 3686 -Stores 2901 1 -Total Receives: 3687 -Stores 2902 1 -tensor([-9.0722, -9.0678, -9.0625, -9.0874]) -tensor([-4.5293, -4.5259]) -tensor([-2.2695, -2.2142, -2.2695, -2.2695, -2.2695]) -tensor([-0.0544, -0.0832, -0.0723]) -Total rewards: 3694 -Total Receives: 3688 -Stores 2903 1 -Total Receives: 3689 -Stores 2904 1 -Total Receives: 3690 -Stores 2905 1 -Total Receives: 3691 -Stores 2906 1 -Total Receives: 3692 -Stores 2907 1 -Total Receives: 3693 -Stores 2908 1 -Total Receives: 3694 -Stores 2909 1 -tensor([-9.2909, -9.3056, -9.3099, -9.3192, -9.3739, -9.3813]) -tensor([-4.4590, -4.5046, -4.5019, -4.4963]) -tensor([-4.1081, -4.0461, -4.0713, -4.0678, -4.0739, -4.1107, -4.1156]) -Total rewards: 3701 -Total Receives: 3695 -Stores 2910 1 -Total Receives: 3696 -Stores 2911 1 -Total Receives: 3697 -Stores 2912 1 -Total Receives: 3698 -Stores 2913 1 -Total Receives: 3699 -Stores 2914 1 -Total Receives: 3700 -Stores 2915 1 -Total Receives: 3701 -Stores 2916 1 -tensor([-3.0770, -3.0375, -3.0362, -3.0411, -3.0764, -3.0859]) -tensor([-3.6732, -3.6762]) -tensor([-7.9066, -7.9066]) -Total rewards: 3706 -Total Receives: 3702 -Stores 2917 1 -Total Receives: 3703 -Stores 2918 1 -Total Receives: 3704 -Stores 2919 1 -Total Receives: 3705 -Stores 2920 1 -Total Receives: 3706 -Stores 2921 1 -tensor([-0.0913, -0.0975, -0.0996]) -Total rewards: 3711 -Total Receives: 3707 -Stores 2922 1 -Total Receives: 3708 -Stores 2923 1 -Total Receives: 3709 -Stores 2924 1 -Total Receives: 3710 -Stores 2925 1 -Total Receives: 3711 -Stores 2926 1 -tensor([-7.8158, -7.8321, -7.8428, -7.8885, -7.9002]) -tensor([-0.0478, -0.0434]) -tensor([-10.5224, -10.5086, -10.5255, -10.5255]) -tensor([-21.1319, -21.1253, -21.1674, -21.1398, -21.1212, -21.1697, -21.1372, - -21.1394, -21.1133]) -tensor([-18.7740, -18.7684, -18.7891, -18.7837]) -tensor([-1.5683, -1.5683, -1.5683, -1.5683]) -tensor([-8.9243, -8.9117, -8.9560]) -tensor([-0.0825, -0.0802, -0.0441]) -Total rewards: 3716 -Total Receives: 3712 -Stores 2927 1 -Total Receives: 3713 -Stores 2928 1 -Total Receives: 3714 -Stores 2929 1 -Total Receives: 3715 -tensor([2.2573, 2.4684, 2.2600]) -loss: 2.0387784388731234e-05, td_error: 173.26783752441406, entropy: 1.7917556762695312 -Train step: 0.37093687057495117 Optimizer Step: 29 -tensor([4.6052, 3.9784]) -tensor([5.9230, 6.5077, 6.6232, 5.3839]) -tensor([3.1281, 2.9656, 3.0840]) -tensor([5.4927, 5.3963, 6.2066]) -Total rewards: 5113 -Total Receives: 5110 -Stores 3902 1 -Total Receives: 5111 -Stores 3903 1 -Total Receives: 5112 -Stores 3904 1 -Total Receives: 5113 -Stores 3905 1 -tensor([5.4158, 5.4158]) -tensor([7.7363, 8.8188, 8.0349, 7.4045]) -tensor([7.7769, 8.1282, 6.5159, 6.6357, 5.9184]) -tensor([5.0805, 3.7844]) -Total rewards: 5116 -Total Receives: 5114 -Stores 3906 1 -Total Receives: 5115 -Stores 3907 1 -Total Receives: 5116 -Stores 3908 1 -tensor([5.8950, 4.8908, 5.2138]) -loss: 3.1373463571071625e-05, td_error: 266.630859375, entropy: 2.751234531402588 -Train step: 0.4851808547973633 Optimizer Step: 30 -tensor([2.8022, 4.6907]) -Total rewards: 5122 -Total Receives: 5117 -Stores 3909 1 -Total Receives: 5118 -Stores 3910 1 -Total Receives: 5119 -Stores 3911 1 -Total Receives: 5120 -Stores 3912 1 -Total Receives: 5121 -Stores 3913 1 -Total Receives: 5122 -Stores 3914 1 -tensor([4.6043, 3.5744, 4.4645]) -tensor([4.4910, 4.6871]) -tensor([6.6785, 6.6740]) -tensor([3.7116, 3.7100]) -tensor([7.5828, 7.3944, 5.7575]) -tensor([20.3787, 15.9367, 14.8761]) -tensor([8.3843, 8.9037, 9.8624, 7.6641, 7.6641]) -Total rewards: 5128 -Total Receives: 5123 -Stores 3915 1 -Total Receives: 5124 -Stores 3916 1 -Total Receives: 5125 -Stores 3917 1 -Total Receives: 5126 -Stores 3918 1 -Total Receives: 5127 -Stores 3919 1 -Total Receives: 5128 -Stores 3920 1 -tensor([7.1448, 5.4217]) -tensor([4.2090, 5.1467]) -tensor([7.2310, 7.3848]) -tensor([ 9.9364, 10.5917, 9.3658]) -tensor([10.1367, 10.4497, 9.7896]) -loss: 2.3875358238001354e-05, td_error: 202.9073944091797, entropy: 1.6909222602844238 -Train step: 0.43897485733032227 Optimizer Step: 31 -tensor([10.0007, 11.5266, 8.7944, 8.7944]) -tensor([18.8128, 13.4594]) -tensor([10.6617, 9.4657, 9.6072, 8.0033]) -tensor([5.2664, 5.3601, 5.1004, 5.7862, 6.2916]) -tensor([9.6167, 7.5836, 9.2637]) -Total rewards: 5134 -Total Receives: 5129 -Stores 3921 1 -Total Receives: 5130 -Stores 3922 1 -Total Receives: 5131 -Stores 3923 1 -Total Receives: 5132 -Stores 3924 1 -Total Receives: 5133 -Stores 3925 1 -Total Receives: 5134 -Stores 3926 1 -tensor([8.2702, 8.4351, 6.5313]) -tensor([7.2840, 5.2091, 8.0299, 5.3286]) -tensor([5.9832, 7.0317, 5.3535]) -tensor([10.2971, 11.8937, 8.6702]) -tensor([3.1559, 2.8958, 2.9810]) -tensor([3.4292, 3.2014]) -tensor([5.7904, 2.9147, 2.9393]) -Total rewards: 5142 -Total Receives: 5135 -Stores 3927 1 -Total Receives: 5136 -Stores 3928 1 -Total Receives: 5137 -Stores 3929 1 -Total Receives: 5138 -Stores 3930 1 -Total Receives: 5139 -Stores 3931 1 -Total Receives: 5140 -Stores 3932 1 -Total Receives: 5141 -Stores 3933 1 -Total Receives: 5142 -Stores 3934 1 -tensor([2.8632, 2.9596]) -loss: 3.961460606660694e-05, td_error: 336.6691589355469, entropy: 2.4916064739227295 -Train step: 0.39706897735595703 Optimizer Step: 32 -tensor([6.7846, 6.6419, 8.4494]) -tensor([6.3550, 8.6299, 6.2711]) -tensor([8.9036, 7.3745]) -tensor([12.1948, 8.4451]) -tensor([6.6844, 6.7989, 6.5030, 7.7985, 6.1362]) -tensor([4.1825, 3.1514]) -tensor([7.8274, 7.6898]) -tensor([0.7255, 1.1857]) -tensor([5.8247, 5.9433, 5.6363, 5.2505]) -tensor([5.2313, 5.3567, 4.6495]) -tensor([2.2073, 2.2611]) -tensor([3.0527, 3.2422]) -tensor([10.0950, 11.7563]) -tensor([0.8475, 0.5378]) -tensor([6.5983, 5.6701]) -loss: 2.498065710067749, td_error: 72955.8515625, entropy: 2.564941644668579 -Train step: 0.9213771820068359 Optimizer Step: 33 -tensor([6.8085, 4.9920, 5.0082]) -tensor([3.8475, 3.3802]) -tensor([1.2111, 0.6258, 0.8442, 1.5362, 0.8684]) -Total rewards: 5146 -Total Receives: 5143 -Stores 3935 1 -Total Receives: 5144 -Stores 3936 1 -Total Receives: 5145 -Stores 3937 1 -Total Receives: 5146 -Stores 3938 1 -tensor([5.0204, 3.8890]) -tensor([3.6491, 3.6558]) -tensor([1.3537, 2.4697, 1.5642, 3.3458]) -tensor([0.2037, 0.0276]) -Total rewards: 5152 -Total Receives: 5147 -Stores 3939 1 -Total Receives: 5148 -Stores 3940 1 -Total Receives: 5149 -Stores 3941 1 -Total Receives: 5150 -Stores 3942 1 -Total Receives: 5151 -Stores 3943 1 -Total Receives: 5152 -Stores 3944 1 -tensor([2.1429, 3.3358, 1.7819, 1.6178]) -tensor([2.8641, 2.4967]) -tensor([0.7159, 0.0848, 0.6370]) -tensor([2.4707, 3.8618, 3.8793]) -loss: 5.095886444905773e-05, td_error: 433.07958984375, entropy: 2.6469950675964355 -Train step: 0.38896775245666504 Optimizer Step: 34 -Total rewards: 5158 -Total Receives: 5153 -Stores 3945 1 -Total Receives: 5154 -Stores 3946 1 -Total Receives: 5155 -Stores 3947 1 -Total Receives: 5156 -Stores 3948 1 -Total Receives: 5157 -Stores 3949 1 -Total Receives: 5158 -Stores 3950 1 -tensor([3.1456, 3.3902]) -tensor([4.4536, 6.4756, 3.8876, 3.6449]) -tensor([2.1093, 2.0938]) -tensor([1.9345, 1.7067, 1.3909]) -Total rewards: 5164 -Total Receives: 5159 -Stores 3951 1 -Total Receives: 5160 -Stores 3952 1 -Total Receives: 5161 -Stores 3953 1 -Total Receives: 5162 -Stores 3954 1 -Total Receives: 5163 -Stores 3955 1 -Total Receives: 5164 -Stores 3956 1 -Total rewards: 5169 -Total Receives: 5165 -Stores 3957 1 -Total Receives: 5166 -Stores 3958 1 -Total Receives: 5167 -Stores 3959 1 -Total Receives: 5168 -Stores 3960 1 -Total Receives: 5169 -Stores 3961 1 -tensor([3.9724, 2.1701]) -tensor([1.1099, 1.5647]) -tensor([0.8720, 0.6712, 0.9035]) -tensor([3.5257, 2.9641, 2.7479]) -Total rewards: 5174 -Total Receives: 5170 -Stores 3962 1 -Total Receives: 5171 -Stores 3963 1 -Total Receives: 5172 -Stores 3964 1 -Total Receives: 5173 -Stores 3965 1 -Total Receives: 5174 -Stores 3966 1 -tensor([4.1074, 2.2163]) -tensor([2.6539, 2.4444]) -tensor([4.7536, 3.3110]) -tensor([2.7602, 2.1211]) -tensor([3.2462, 2.2683]) -tensor([1.5308, 0.9034, 0.1369, 0.0592, 0.2811]) -loss: 2.6134792278753594e-05, td_error: 222.10946655273438, entropy: 1.6430737972259521 -Train step: 0.6219210624694824 Optimizer Step: 35 -tensor([2.7053, 1.1963, 2.0375, 2.2943, 2.3573]) -Total rewards: 5179 -Total Receives: 5175 -Stores 3967 1 -Total Receives: 5176 -Stores 3968 1 -Total Receives: 5177 -Stores 3969 1 -Total Receives: 5178 -Stores 3970 1 -Total Receives: 5179 -Stores 3971 1 -tensor([ 0.0417, 0.3814, 0.3651, -0.0414, -0.0414]) -tensor([1.7916, 1.3128, 1.1526]) -tensor([1.8131, 1.6700]) -tensor([0.8461, 1.6694, 0.9844, 0.2084]) -tensor([3.9951, 3.4521, 2.8777]) -tensor([4.2130, 5.5481]) -tensor([1.5977, 2.0861, 1.1994, 1.1994]) -tensor([1.5326, 2.5427]) -loss: 0.0002686262596398592, td_error: 2282.9501953125, entropy: 3.314141035079956 -Train step: 0.6048970222473145 Optimizer Step: 36 -tensor([3.9541, 3.3302, 3.6201]) -Total rewards: 5184 -Total Receives: 5180 -Stores 3972 1 -Total Receives: 5181 -Stores 3973 1 -Total Receives: 5182 -Stores 3974 1 -Total Receives: 5183 -Stores 3975 1 -Total Receives: 5184 -Stores 3976 1 -tensor([3.2975, 2.7174]) -tensor([3.1355, 3.4508, 2.2631, 1.6896]) -tensor([3.1321, 3.3829]) -Total rewards: 5186 -Total Receives: 5185 -Stores 3977 1 -Total Receives: 5186 -Stores 3978 1 -Total rewards: 5193 -Total Receives: 5187 -Stores 3979 1 -Total Receives: 5188 -Stores 3980 1 -Total Receives: 5189 -Stores 3981 1 -Total Receives: 5190 -Stores 3982 1 -Total Receives: 5191 -Stores 3983 1 -Total Receives: 5192 -Stores 3984 1 -Total Receives: 5193 -Stores 3985 1 -tensor([3.4744, 2.4247, 2.2574, 2.5818, 2.1551]) -tensor([3.8634, 4.1934, 3.1147]) -Total rewards: 5199 -Total Receives: 5194 -Stores 3986 1 -Total Receives: 5195 -Stores 3987 1 -Total Receives: 5196 -Stores 3988 1 -Total Receives: 5197 -Stores 3989 1 -Total Receives: 5198 -Stores 3990 1 -Total Receives: 5199 -Stores 3991 1 -tensor([3.8911, 3.5706]) -tensor([6.1721, 4.2748, 5.3068, 4.0786, 5.6598]) -tensor([3.2770, 3.6665]) -tensor([2.0687, 2.5770]) -Total rewards: 5205 -Total Receives: 5200 -Stores 3992 1 -Total Receives: 5201 -Stores 3993 1 -Total Receives: 5202 -Stores 3994 1 -Total Receives: 5203 -Stores 3995 1 -Total Receives: 5204 -Stores 3996 1 -Total Receives: 5205 -Stores 3997 1 -tensor([5.7625, 6.8064, 5.4725, 7.0754]) -loss: 4.395323048811406e-05, td_error: 373.5414733886719, entropy: 3.284674882888794 -Train step: 0.4036290645599365 Optimizer Step: 37 -tensor([3.2712, 1.1721, 2.2503, 2.0581, 1.9440]) -tensor([5.3873, 4.8308]) -tensor([3.8793, 3.5407]) -tensor([3.6676, 3.0125]) -Total rewards: 5211 -Stores 2930 1 -Total Receives: 3716 -Stores 2931 1 -tensor([-6.3968, -6.4019, -6.4408, -6.4585]) -tensor([-2.1380, -2.1205, -2.1132, -2.1409, -2.1501]) -tensor([-0.8217, -0.8217, -0.8217]) -Total rewards: 3720 -Total Receives: 3717 -Stores 2932 1 -Total Receives: 3718 -Stores 2933 1 -Total Receives: 3719 -Stores 2934 1 -Total Receives: 3720 -Stores 2935 1 -tensor([-0.4302, -0.4344]) -tensor([-1.3955, -1.3956, -1.3873]) -tensor([-0.0173, -0.0223, -0.0525, -0.0072]) -tensor([-6.1988, -6.1861]) -tensor([-2.7775, -2.7060, -2.7860, -2.7786, -2.7746, -2.7848]) -tensor([-7.5161, -7.5200, -7.5345, -7.5345]) -tensor([-59.0011, -59.0046, -59.0620, -59.0417, -59.0098, -58.9807, -59.0449, - -59.0153, -59.0648, -59.0598, -59.0742, -58.2591]) -tensor([-1.2614, -1.2376, -1.2631, -1.2833, -1.2718]) -tensor([-12.8400, -12.8375, -12.8581]) -tensor([-4.3278, -4.3688]) -Total rewards: 3727 -Total Receives: 3721 -Stores 2936 1 -Total Receives: 3722 -Stores 2937 1 -Total Receives: 3723 -Stores 2938 1 -Total Receives: 3724 -Stores 2939 1 -Total Receives: 3725 -Stores 2940 1 -Total Receives: 3726 -Stores 2941 1 -Total Receives: 3727 -Stores 2942 1 -tensor([-4.3508, -4.3736, -4.3666]) -tensor([-1.4980, -1.5100]) -tensor([-0.0051, -0.0067, -0.0092]) -tensor([-0.0432, -0.0399, -0.0330]) -tensor([-0.9259, -0.9271]) -tensor([-6.1727, -6.1753, -6.1946]) -Total rewards: 3734 -Total Receives: 3728 -Stores 2943 1 -Total Receives: 3729 -Stores 2944 1 -Total Receives: 3730 -Stores 2945 1 -Total Receives: 3731 -Stores 2946 1 -Total Receives: 3732 -Stores 2947 1 -Total Receives: 3733 -Stores 2948 1 -Total Receives: 3734 -Stores 2949 1 -tensor([-15.3352, -15.3867, -15.3707, -15.3438, -15.3182, -15.3727, -15.3441, - -15.3932, -15.3934, -15.3986, -15.3986, -15.3568, -15.4073]) -tensor([-0.4629, -0.4629]) -tensor([-0.0337, -0.0472, -0.0631]) -Total rewards: 3740 -Total Receives: 3735 -Stores 2950 1 -Total Receives: 3736 -Stores 2951 1 -Total Receives: 3737 -Stores 2952 1 -Total Receives: 3738 -Stores 2953 1 -Total Receives: 3739 -Stores 2954 1 -Total Receives: 3740 -Stores 2955 1 -tensor([-0.3463, -0.3459]) -tensor([-0.2520, -0.2511]) -tensor([-0.0281, -0.0337]) -tensor([-4.4895, -4.5360, -4.5618, -4.5650, -4.5657]) -tensor([-9.9649, -9.9806]) -Total rewards: 3747 -Total Receives: 3741 -Stores 2956 1 -Total Receives: 3742 -Stores 2957 1 -Total Receives: 3743 -Stores 2958 1 -Total Receives: 3744 -Stores 2959 1 -Total Receives: 3745 -Stores 2960 1 -Total Receives: 3746 -Stores 2961 1 -Total Receives: 3747 -Stores 2962 1 -tensor([-0.7166, -0.6727, -0.7102, -0.7330, -0.7103]) -tensor([-8.6049, -8.6471, -8.6295, -8.6140, -8.6482, -8.6173, -8.6546, -8.6533, - -8.6591, -8.6590, -8.6162, -8.6696]) -tensor([-5.7043, -5.7140]) -tensor([0.0388, 0.0356]) -tensor([-3.6856, -3.6757, -3.6751, -3.6919]) -tensor([-0.7541, -0.7918, -0.8182, -0.8063]) -tensor([-2.9971, -2.9858, -3.0038]) -tensor([0.0323, 0.0334]) -tensor([-2.3751, -2.3959, -2.3739, -2.3787, -2.3669, -2.3795, -2.3886]) -tensor([-6.1445, -6.1364, -6.1237, -6.1503]) -tensor([-0.0187, -0.0206]) -Total rewards: 3751 -Total Receives: 3748 -Stores 2963 1 -Total Receives: 3749 -Stores 2964 1 -Total Receives: 3750 -Stores 2965 1 -Total Receives: 3751 -Stores 2966 1 -tensor([-9.5359, -9.5503]) -tensor([-0.0305, -0.0335]) -tensor([-3.4760, -3.4783, -3.4316, -3.4671, -3.4715, -3.4891]) -Total rewards: 3758 -Total Receives: 3752 -Stores 2967 1 -Total Receives: 3753 -Stores 2968 1 -Total Receives: 3754 -Stores 2969 1 -Total Receives: 3755 -Stores 2970 1 -Total Receives: 3756 -Stores 2971 1 -Total Receives: 3757 -Stores 2972 1 -Total Receives: 3758 -Stores 2973 1 -tensor([-6.4059, -6.3969, -6.4189]) -Total rewards: 3767 -Total Receives: 3759 -Stores 2974 1 -Total Receives: 3760 -Stores 2975 1 -Total Receives: 3761 -Stores 2976 1 -Total Receives: 3762 -Stores 2977 1 -Total Receives: 3763 -Stores 2978 1 -Total Receives: 3764 -Stores 2979 1 -Total Receives: 3765 -Stores 2980 1 -Total Receives: 3766 -Stores 2981 1 -Total Receives: 3767 -Stores 2982 1 -tensor([-12.3396, -12.3241, -12.2958, -12.3265, -12.2981, -12.3418, -12.3329, - -12.3362, -12.3345, -12.2964, -12.3478]) -tensor([-0.0013, -0.0059]) -tensor([-0.3672, -0.3547]) -tensor([-5.6195, -5.6396]) -Total rewards: 3771 -Total Receives: 3768 -Stores 2983 1 -Total Receives: 3769 -Stores 2984 1 -Total Receives: 3770 -Stores 2985 1 -Total Receives: 3771 -Stores 2986 1 -tensor([-0.4124, -0.4131]) -tensor([0.0214, 0.0231]) -tensor([0.0284, 0.0368]) -tensor([-4.7176, -4.7306]) -tensor([-4.3671, -4.3964]) -Total rewards: 3776 -Total Receives: 3772 -Stores 2987 1 -Total Receives: 3773 -Stores 2988 1 -Total Receives: 3774 -Stores 2989 1 -Total Receives: 3775 -Stores 2990 1 -Total Receives: 3776 -Stores 2991 1 -tensor([-1.8819, -1.9172, -1.9019]) -tensor([-9.5288, -9.5532, -9.5506, -9.5436]) -tensor([-1.8450, -1.8633, -1.8369, -1.8409, -1.8318, -1.8519]) -tensor([-11.8175, -11.8046, -11.8186]) -Total rewards: 3779 -Total Receives: 3777 -Stores 2992 1 -Total Receives: 3778 -Stores 2993 1 -Total Receives: 3779 -Stores 2994 1 -tensor([-10.5573, -10.5588]) -tensor([0.0186, 0.0141, 0.0157]) -tensor([-2.3936, -2.3929, -2.3770, -2.3819, -2.4047]) -Total rewards: 3786 -Total Receives: 3780 -Stores 2995 1 -Total Receives: 3781 -Stores 2996 1 -Total Receives: 3782 -Stores 2997 1 -Total Receives: 3783 -Stores 2998 1 -Total Receives: 3784 -Stores 2999 1 -Total Receives: 3785 -Stores 3000 1 -Total Receives: 3786 -Stores 3001 1 -Total rewards: 3790 -Total Receives: 3787 -Stores 3002 1 -Total Receives: 3788 -Stores 3003 1 -Total Receives: 3789 -Stores 3004 1 -Total Receives: 3790 -Stores 3005 1 -tensor([-4.9833, -4.9606]) -tensor([-1.2001, -1.2227, -1.1888, -1.1925, -1.2193]) -Total rewards: 3794 -Total Receives: 3791 -Stores 3006 1 -Total Receives: 3792 -Stores 3007 1 -Total Receives: 3793 -Stores 3008 1 -Total Receives: 3794 -Stores 3009 1 -tensor([0.0111, 0.0129]) -Total rewards: 3802 -Total Receives: 3795 -Stores 3010 1 -Total Receives: 3796 -Stores 3011 1 -Total Receives: 3797 -Stores 3012 1 -Total Receives: 3798 -Stores 3013 1 -Total Receives: 3799 -Stores 3014 1 -Total Receives: 3800 -Stores 3015 1 -Total Receives: 3801 -Stores 3016 1 -Total Receives: 3802 -Stores 3017 1 -tensor([-9.6289, -9.6430, -9.6302]) -tensor([-8.3854, -8.3777]) -tensor([-15.8310, -15.8628, -15.7843, -15.8178, -15.7837, -15.8329, -15.8169, - -15.8247, -15.8240, -15.7833, -15.8226]) -tensor([-0.2673, -0.2665]) -tensor([-5.7343, -5.7563, -5.7321, -5.7619]) -tensor([-3.7336, -3.7344, -3.7390]) -tensor([-0.9398, -0.9172, -0.9247]) -tensor([-0.0058, -0.0011]) -tensor([-3.3691, -3.3696]) -tensor([-0.2063, -0.2050, -0.2064]) -Total rewards: 3807 -Total Receives: 3803 -Stores 3018 1 -Total Receives: 3804 -Stores 3019 1 -Total Receives: 3805 -Stores 3020 1 -Total Receives: 3806 -Stores 3021 1 -Total Receives: 3807 -Stores 3022 1 -tensor([-0.5369, -0.5416]) -tensor([-3.0332, -3.0386, -3.0147]) -Total rewards: 3816 -Total Receives: 3808 -Stores 3023 1 -Total Receives: 3809 -Stores 3024 1 -Total Receives: 3810 -Stores 3025 1 -Total Receives: 3811 -Stores 3026 1 -Total Receives: 3812 -Stores 3027 1 -Total Receives: 3813 -Stores 3028 1 -Total Receives: 3814 -Stores 3029 1 -Total Receives: 3815 -Stores 3030 1 -Total Receives: 3816 -Stores 3031 1 -Total rewards: 3818 -Total Receives: 3817 -Stores 3032 1 -Total Receives: 3818 -Stores 3033 1 -tensor([-2.5681, -2.5746]) -Total rewards: 3825 -Total Receives: 3819 -Stores 3034 1 -Total Receives: 3820 -Stores 3035 1 -Total Receives: 3821 -Stores 3036 1 -Total Receives: 3822 -Stores 3037 1 -Total Receives: 3823 -Stores 3038 1 -Total Receives: 3824 -Stores 3039 1 -Total Receives: 3825 -Stores 3040 1 -tensor([-0.1835, -0.1787]) -tensor([-11.6756, -11.6948, -11.6248, -11.6605, -11.6232, -11.7217, -11.6825, - -11.6700, -11.6649, -11.6640, -11.6942]) -tensor([-3.0783, -3.0779]) -Total rewards: 3830 -Total Receives: 3826 -Stores 3041 1 -Total Receives: 3827 -Stores 3042 1 -Total Receives: 3828 -Stores 3043 1 -Total Receives: 3829 -Stores 3044 1 -Total Receives: 3830 -Stores 3045 1 -tensor([-2.0288, -2.0128, -2.0272, -2.0490]) -Total rewards: 3834 -Total Receives: 3831 -Stores 3046 1 -Total Receives: 3832 -Stores 3047 1 -Total Receives: 3833 -Stores 3048 1 -Total Receives: 3834 -Stores 3049 1 -tensor([-0.1680, -0.1767]) -tensor([-0.7017, -0.7111, -0.7037, -0.6986, -0.7053]) -Total rewards: 3835 -Total Receives: 3835 -Stores 3050 1 -tensor([-0.4510, -0.3936]) -tensor([ 0.0086, -0.0148]) -tensor([-4.7720, -4.7894, -4.7966, -4.8128]) -tensor([-4.7893, -4.8010, -4.8118, -4.8118]) -tensor([-3.4681, -3.4731, -3.4731]) -tensor([-13.2800, -13.2965, -13.2312, -13.3346, -13.2273, -13.3288, -13.2872, - -13.2746, -13.2643, -13.2635, -13.3399, -13.3024, -13.3404]) -tensor([-0.7552, -0.7652, -0.7560, -0.7575]) -tensor([0.0318, 0.0271]) -Total rewards: 3842 -Total Receives: 3836 -Stores 3051 1 -Total Receives: 3837 -Stores 3052 1 -Total Receives: 3838 -Stores 3053 1 -Total Receives: 3839 -Stores 3054 1 -Total Receives: 3840 -Stores 3055 1 -Total Receives: 3841 -Stores 3056 1 -Total Receives: 3842 -Stores 3057 1 -tensor([-6.0072, -6.0068]) -tensor([-5.1800, -5.2091, -5.1923, -5.2123]) -tensor([-5.4381, -5.4511, -5.4288]) -Total rewards: 3847 -Total Receives: 3843 -Stores 3058 1 -Total Receives: 3844 -Stores 3059 1 -Total Receives: 3845 -Stores 3060 1 -Total Receives: 3846 -Stores 3061 1 -Total Receives: 3847 -Stores 3062 1 -tensor([-0.1581, -0.1589]) -tensor([-5.0282, -5.0398]) -Total rewards: 3854 -Total Receives: 3848 -Stores 3063 1 -Total Receives: 3849 -Stores 3064 1 -Total Receives: 3850 -Stores 3065 1 -Total Receives: 3851 -Stores 3066 1 -Total Receives: 3852 -Stores 3067 1 -Total Receives: 3853 -Stores 3068 1 -Total Receives: 3854 -Stores 3069 1 -tensor([-1.9688, -2.0270, -1.9856, -2.0340, -2.0062, -2.0074]) -Total rewards: 3858 -Total Receives: 3855 -Stores 3070 1 -Total Receives: 3856 -Stores 3071 1 -Total Receives: 3857 -Stores 3072 1 -Total Receives: 3858 -Stores 3073 1 -tensor([-0.4766, -0.4686, -0.4662]) -tensor([-1.3125, -1.2859, -1.3194, -1.2928, -1.2940]) -tensor([-13.3874, -13.4014, -13.3307, -13.4394, -13.4537, -13.4371, -13.4016, - -13.3822, -13.3693, -13.3686, -13.4436, -13.4089, -13.4478]) -tensor([-0.7682, -0.7679, -0.7488, -0.7505]) -tensor([-3.5882, -3.6020, -3.6020]) -Total rewards: 3862 -Total Receives: 3859 -Stores 3074 1 -Total Receives: 3860 -Stores 3075 1 -Total Receives: 3861 -Stores 3076 1 -Total Receives: 3862 -Stores 3077 1 -tensor([-4.1719, -4.1602, -4.1657, -4.1527, -4.1333, -4.1747, -4.1599]) -tensor([-12.0301, -12.0413, -12.0874, -12.0955, -12.0800, -12.0487, -12.0225, - -12.0057, -12.0050, -12.0948, -12.0521, -12.0925]) -Total rewards: 3866 -Total Receives: 3863 -Stores 3078 1 -Total Receives: 3864 -Stores 3079 1 -Total Receives: 3865 -Stores 3080 1 -Total Receives: 3866 -Stores 3081 1 -Total rewards: 3873 -Total Receives: 3867 -Stores 3082 1 -Total Receives: 3868 -Stores 3083 1 -Total Receives: 3869 -Stores 3084 1 -Total Receives: 3870 -Stores 3085 1 -Total Receives: 3871 -Stores 3086 1 -Total Receives: 3872 -Stores 3087 1 -Total Receives: 3873 -Stores 3088 1 -tensor([-2.8846, -2.8962, -2.8962]) -tensor([-0.0453, -0.0545]) -tensor([0.0175, 0.0093]) -tensor([-4.0003, -4.0114, -4.0235]) -tensor([-10.0975, -10.1035, -10.1527, -10.1535, -10.1398, -10.1174, -10.0915, - -10.0743, -10.1521, -10.1184, -10.1512]) -tensor([0.0284, 0.0290]) -tensor([-0.0418, -0.1045, -0.0330, -0.0466, -0.0889]) -tensor([-0.0461, -0.0744, -0.0712, -0.0803, -0.1241, -0.0651]) -Total rewards: 3880 -Total Receives: 3874 -Stores 3089 1 -Total Receives: 3875 -Stores 3090 1 -Total Receives: 3876 -Stores 3091 1 -Total Receives: 3877 -Stores 3092 1 -Total Receives: 3878 -Stores 3093 1 -Total Receives: 3879 -Stores 3094 1 -Total Receives: 3880 -Stores 3095 1 -tensor([-3.4532, -3.4644]) -tensor([-3.2100, -3.2112, -3.1997, -3.2254, -3.2013, -3.1917, -3.2326, -3.1962]) -tensor([-0.0265, -0.0243]) -tensor([-3.3804, -3.3799, -3.3687, -3.3951, -3.3682, -3.4057, -3.3649]) -Total rewards: 3885 -Total Receives: 3881 -Stores 3096 1 -Total Receives: 3882 -Stores 3097 1 -Total Receives: 3883 -Stores 3098 1 -Total Receives: 3884 -Stores 3099 1 -Total Receives: 3885 -Stores 3100 1 -tensor([-2.9773, -2.9688, -3.0092, -3.0092]) -tensor([-10.1248, -10.1269, -10.1798, -10.1777, -10.1644, -10.1461, -10.1185, - -10.1802, -10.1463, -10.1887, -10.1786]) -tensor([-0.0888, -0.0914, -0.0848, -0.0876]) -tensor([-10.8099, -10.8111, -10.8630, -10.8483, -10.8315, -10.8027, -10.8621, - -10.8310, -10.8705, -10.8624]) -tensor([-0.1452, -0.0548, -0.0693, -0.1258]) -tensor([-5.9849, -5.9863, -5.9842, -5.9751, -5.9751]) -tensor([-10.1235, -10.1241, -10.1785, -10.1631, -10.1451, -10.1778, -10.1445, - -10.1866, -10.1785]) -tensor([-3.6199, -3.6268, -3.6136, -3.6327, -3.6131, -3.6566]) -tensor([-0.1725, -0.1654, -0.1793]) -tensor([-2.8636, -2.8715, -2.8614, -2.8756, -2.8943]) -Total rewards: 3889 -Total Receives: 3886 -Stores 3101 1 -Total Receives: 3887 -Stores 3102 1 -Total Receives: 3888 -Stores 3103 1 -Total Receives: 3889 -Stores 3104 1 -tensor([-0.0526, -0.0761, -0.0963, -0.1283, -0.0833]) -tensor([-0.0629, -0.0757]) -tensor([-0.0691, -0.0744]) -Total rewards: 3893 -Total Receives: 3890 -Stores 3105 1 -Total Receives: 3891 -Stores 3106 1 -Total Receives: 3892 -Stores 3107 1 -Total Receives: 3893 -Stores 3108 1 -tensor([-0.1866, -0.1052, -0.1674]) -tensor([-3.7840, -3.8265, -3.8265, -3.8265]) -tensor([-0.0809, -0.0818]) -Total rewards: 3898 -Total Receives: 3894 -Stores 3109 1 -Total Receives: 3895 -Stores 3110 1 -Total Receives: 3896 -Stores 3111 1 -Total Receives: 3897 -Stores 3112 1 -Total Receives: 3898 -Stores 3113 1 -tensor([-3.8680, -3.8680, -3.8680]) -tensor([-0.3105, -0.3138]) -tensor([-3.9758, -3.9820, -3.9993, -4.0248]) -tensor([-0.3381, -0.3424]) -tensor([-9.1816, -9.1979, -9.1861]) -tensor([-0.2160, -0.1634]) -tensor([-7.6597, -7.6616, -7.6568, -7.6511, -7.6493, -7.6584]) -tensor([-2.8258, -2.8419, -2.8680]) -tensor([-8.7694, -8.7586]) -tensor([-11.5877, -11.6249, -11.6108, -11.5812, -11.6222, -11.5726, -11.6341, - -11.6871, -11.6365, -11.6827]) -tensor([-5.8692, -5.8792, -5.8840, -5.8750, -5.8691, -5.8781, -5.8934]) -tensor([-0.0312, -0.0263, -0.0209]) -tensor([-4.3604, -4.3604]) -Total rewards: 3903 -Total Receives: 3899 -Stores 3114 1 -Total Receives: 3900 -Stores 3115 1 -Total Receives: 3901 -Stores 3116 1 -Total Receives: 3902 -Stores 3117 1 -Total Receives: 3903 -Stores 3118 1 -tensor([-0.2598, -0.2908]) -tensor([-0.1445, -0.1478]) -tensor([-0.8500, -0.8420, -0.8198, -0.8345, -0.8715]) -tensor([-0.6222, -0.6296]) -tensor([-2.5005, -2.4981, -2.4612, -2.4954, -2.5041]) -tensor([-0.2040, -0.2073, -0.2085, -0.2071]) -Total rewards: 3911 -Total Receives: 3904 -Stores 3119 1 -Total Receives: 3905 -Stores 3120 1 -Total Receives: 3906 -Stores 3121 1 -Total Receives: 3907 -Stores 3122 1 -Total Receives: 3908 -Stores 3123 1 -Total Receives: 3909 -Stores 3124 1 -Total Receives: 3910 -Stores 3125 1 -Total Receives: 3911 -Stores 3126 1 -tensor([-5.5242, -5.5260]) -Total rewards: 3915 -Total Receives: 3912 -Stores 3127 1 -Total Receives: 3913 -Stores 3128 1 -Total Receives: 3914 -Stores 3129 1 -Total Receives: 3915 -Stores 3130 1 -tensor([-0.6774, -0.6954]) -tensor([-0.1439, -0.1445, -0.1538]) -tensor([-0.0309, -0.0108]) -Total rewards: 3922 -Total Receives: 3916 -Stores 3131 1 -Total Receives: 3917 -Stores 3132 1 -Total Receives: 3918 -Stores 3133 1 -Total Receives: 3919 -Stores 3134 1 -Total Receives: 3920 -Stores 3135 1 -Total Receives: 3921 -Stores 3136 1 -Total Receives: 3922 -Stores 3137 1 -Total rewards: 3929 -Total Receives: 3923 -Stores 3138 1 -Total Receives: 3924 -Stores 3139 1 -Total Receives: 3925 -Stores 3140 1 -Total Receives: 3926 -Stores 3141 1 -Total Receives: 3927 -Stores 3142 1 -Total Receives: 3928 -Stores 3143 1 -Total Receives: 3929 -Stores 3144 1 -tensor([-0.0738, -0.0606]) -tensor([-9.1246, -9.1380, -9.1395, -9.1312, -9.1369, -9.1495]) -tensor([-2.9033, -2.9030, -2.9053, -2.9157]) -tensor([-0.0643, -0.0570, -0.0631]) -tensor([-5.3612, -5.3655]) -tensor([-7.9967, -7.9927, -8.0128, -7.9814, -7.9914, -8.0069]) -tensor([-0.1237, -0.1298]) -Total rewards: 3933 -Total Receives: 3930 -Stores 3145 1 -Total Receives: 3931 -Stores 3146 1 -Total Receives: 3932 -Stores 3147 1 -Total Receives: 3933 -Stores 3148 1 -tensor([-0.4441, -0.4412, -0.4459]) -tensor([-17.4702, -17.5110, -17.4943, -17.4483, -17.5015, -17.4609, -17.5123, - -17.5249, -17.5834, -17.5605, -17.5770]) -Total rewards: 3939 -Total Receives: 3934 -Stores 3149 1 -Total Receives: 3935 -Stores 3150 1 -Total Receives: 3936 -Stores 3151 1 -Total Receives: 3937 -Stores 3152 1 -Total Receives: 3938 -Stores 3153 1 -Total Receives: 3939 -Stores 3154 1 -tensor([-1.0831, -1.0642, -1.0390, -1.0797, -1.0492, -1.0916, -1.0861]) -tensor([-9.3665, -9.3823]) -tensor([-0.3078, -0.3168]) -tensor([-5.2056, -5.2311, -5.2379]) -tensor([-0.4587, -0.4641]) -Total Receives: 5206 -Stores 3998 1 -Total Receives: 5207 -Stores 3999 1 -Total Receives: 5208 -Stores 4000 1 -Total Receives: 5209 -Stores 4001 1 -Total Receives: 5210 -Stores 4002 1 -Total Receives: 5211 -Stores 4003 1 -tensor([3.5770, 4.4777, 5.0956, 2.9827]) -tensor([5.5666, 4.5417, 3.2957, 3.8643]) -tensor([0.7774, 0.4708]) -tensor([0.9998, 1.1661, 1.8872, 0.9868]) -Total rewards: 5219 -Total Receives: 5212 -Stores 4004 1 -Total Receives: 5213 -Stores 4005 1 -Total Receives: 5214 -Stores 4006 1 -Total Receives: 5215 -Stores 4007 1 -Total Receives: 5216 -Stores 4008 1 -Total Receives: 5217 -Stores 4009 1 -Total Receives: 5218 -Stores 4010 1 -Total Receives: 5219 -Stores 4011 1 -tensor([4.8264, 3.0791]) -tensor([6.5230, 7.5566, 6.2476]) -Total rewards: 5226 -Total Receives: 5220 -Stores 4012 1 -Total Receives: 5221 -Stores 4013 1 -Total Receives: 5222 -Stores 4014 1 -Total Receives: 5223 -Stores 4015 1 -Total Receives: 5224 -Stores 4016 1 -Total Receives: 5225 -Stores 4017 1 -Total Receives: 5226 -Stores 4018 1 -Total rewards: 5230 -Total Receives: 5227 -Stores 4019 1 -Total Receives: 5228 -Stores 4020 1 -Total Receives: 5229 -Stores 4021 1 -Total Receives: 5230 -Stores 4022 1 -tensor([1.1203, 1.2660, 1.0395]) -Total rewards: 5237 -Total Receives: 5231 -Stores 4023 1 -Total Receives: 5232 -Stores 4024 1 -Total Receives: 5233 -Stores 4025 1 -Total Receives: 5234 -Stores 4026 1 -Total Receives: 5235 -Stores 4027 1 -Total Receives: 5236 -Stores 4028 1 -Total Receives: 5237 -Stores 4029 1 -tensor([4.1638, 2.8721, 3.4531]) -Total rewards: 5242 -Total Receives: 5238 -Stores 4030 1 -Total Receives: 5239 -Stores 4031 1 -Total Receives: 5240 -Stores 4032 1 -Total Receives: 5241 -Stores 4033 1 -Total Receives: 5242 -Stores 4034 1 -tensor([3.3222, 5.4699, 4.0421]) -loss: 4.4841286580776796e-05, td_error: 381.08868408203125, entropy: 1.1323356628417969 -Train step: 0.4972951412200928 Optimizer Step: 38 -tensor([6.0679, 4.4120]) -tensor([1.7152, 1.4956]) -tensor([2.0102, 1.8498]) -tensor([4.4738, 4.4178]) -Total rewards: 5245 -Total Receives: 5243 -Stores 4035 1 -Total Receives: 5244 -Stores 4036 1 -Total Receives: 5245 -Stores 4037 1 -tensor([0.3107, 0.6478, 0.7641, 0.6651]) -tensor([1.1079, 0.9949]) -tensor([6.6302, 6.3633]) -tensor([3.3047, 5.6185]) -tensor([7.2453, 8.1703, 5.7146]) -loss: 2.282045443280367e-06, td_error: 19.39422035217285, entropy: 2.8904950618743896 -Train step: 0.3497350215911865 Optimizer Step: 39 -tensor([ 0.6882, 1.5119, 1.3657, 1.2807, -0.1419]) -tensor([3.9129, 3.7019, 3.5839, 1.7223]) -tensor([2.6229, 1.9842]) -Total rewards: 5250 -Total Receives: 5246 -Stores 4038 1 -Total Receives: 5247 -Stores 4039 1 -Total Receives: 5248 -Stores 4040 1 -Total Receives: 5249 -Stores 4041 1 -Total Receives: 5250 -Stores 4042 1 -Total rewards: 5253 -Total Receives: 5251 -Stores 4043 1 -Total Receives: 5252 -Stores 4044 1 -Total Receives: 5253 -Stores 4045 1 -tensor([2.0634, 1.1151, 1.1871]) -tensor([1.3356, 0.9173, 0.7546]) -tensor([2.8233, 1.9919, 3.3225, 1.5476, 1.5476]) -tensor([0.5994, 0.6871]) -tensor([5.8874, 7.9947]) -Total rewards: 5256 -Total Receives: 5254 -Stores 4046 1 -Total Receives: 5255 -Stores 4047 1 -Total Receives: 5256 -Stores 4048 1 -tensor([2.2433, 2.3476]) -Total rewards: 5259 -Total Receives: 5257 -Stores 4049 1 -Total Receives: 5258 -Stores 4050 1 -Total Receives: 5259 -Stores 4051 1 -Total rewards: 5263 -Total Receives: 5260 -Stores 4052 1 -Total Receives: 5261 -Stores 4053 1 -Total Receives: 5262 -Stores 4054 1 -Total Receives: 5263 -Stores 4055 1 -loss: 3.275636481703259e-05, td_error: 278.3836364746094, entropy: 1.6094398498535156 -Train step: 0.4178287982940674 Optimizer Step: 40 -tensor([5.6837, 6.5752]) -tensor([1.6822, 1.6131, 1.8719, 1.2995]) -tensor([6.2191, 6.0779, 3.6000, 3.8086, 3.4330]) -Total rewards: 5268 -Total Receives: 5264 -Stores 4056 1 -Total Receives: 5265 -Stores 4057 1 -Total Receives: 5266 -Stores 4058 1 -Total Receives: 5267 -Stores 4059 1 -Total Receives: 5268 -Stores 4060 1 -tensor([1.1006, 1.4222, 0.1475, 0.3455, 1.4413]) -Total rewards: 5274 -Total Receives: 5269 -Stores 4061 1 -Total Receives: 5270 -Stores 4062 1 -Total Receives: 5271 -Stores 4063 1 -Total Receives: 5272 -Stores 4064 1 -Total Receives: 5273 -Stores 4065 1 -Total Receives: 5274 -Stores 4066 1 -tensor([6.9014, 4.2603, 4.5241, 4.1176]) -Total rewards: 5277 -Total Receives: 5275 -Stores 4067 1 -Total Receives: 5276 -Stores 4068 1 -Total Receives: 5277 -Stores 4069 1 -tensor([1.8360, 2.5898, 2.6038]) -loss: 1.569870437378995e-05, td_error: 133.41717529296875, entropy: 2.624746799468994 -Train step: 0.41835498809814453 Optimizer Step: 41 -tensor([4.2136, 4.7151, 2.4910, 2.0522, 2.0522]) -Total rewards: 5285 -Total Receives: 5278 -Stores 4070 1 -Total Receives: 5279 -Stores 4071 1 -Total Receives: 5280 -Stores 4072 1 -Total Receives: 5281 -Stores 4073 1 -Total Receives: 5282 -Stores 4074 1 -Total Receives: 5283 -Stores 4075 1 -Total Receives: 5284 -Stores 4076 1 -Total Receives: 5285 -Stores 4077 1 -tensor([3.7152, 2.3537, 2.3537]) -tensor([5.4351, 4.0828]) -tensor([6.5191, 4.5014, 3.6459, 3.6459]) -tensor([2.9204, 2.8165, 1.7594]) -tensor([5.1736, 2.0730, 2.6240, 2.7262]) -tensor([2.2262, 2.2262]) -tensor([2.4202, 3.1667, 2.1621]) -tensor([1.1323, 1.6305, 1.0188]) -tensor([1.3550, 1.2919, 1.1024]) -loss: 3.996387022198178e-06, td_error: 33.9637451171875, entropy: 3.29586124420166 -Train step: 0.3569662570953369 Optimizer Step: 42 -tensor([1.1986, 1.0897]) -tensor([5.2976, 5.4038, 5.0883]) -tensor([4.4907, 2.4187, 2.7862, 3.0039, 4.5658, 2.3308]) -Total rewards: 5291 -Total Receives: 5286 -Stores 4078 1 -Total Receives: 5287 -Stores 4079 1 -Total Receives: 5288 -Stores 4080 1 -Total Receives: 5289 -Stores 4081 1 -Total Receives: 5290 -Stores 4082 1 -Total Receives: 5291 -Stores 4083 1 -tensor([3.4218, 3.9190, 4.0363]) -tensor([6.1218, 3.6530, 4.1199, 4.3668, 3.5635]) -tensor([1.1985, 1.0986]) -Total rewards: 5297 -Total Receives: 5292 -Stores 4084 1 -Total Receives: 5293 -Stores 4085 1 -Total Receives: 5294 -Stores 4086 1 -Total Receives: 5295 -Stores 4087 1 -Total Receives: 5296 -Stores 4088 1 -Total Receives: 5297 -Stores 4089 1 -tensor([3.9779, 3.8826]) -tensor([4.3823, 4.2724]) -tensor([4.7724, 2.7784]) -Total rewards: 5303 -Total Receives: 5298 -Stores 4090 1 -Total Receives: 5299 -Stores 4091 1 -Total Receives: 5300 -Stores 4092 1 -Total Receives: 5301 -Stores 4093 1 -Total Receives: 5302 -Stores 4094 1 -Total Receives: 5303 -Stores 4095 1 -Total rewards: 5309 -Total Receives: 5304 -Stores 4096 1 -Total Receives: 5305 -Stores 4097 1 -Total Receives: 5306 -Stores 4098 1 -Total Receives: 5307 -Stores 4099 1 -Total Receives: 5308 -Stores 4100 1 -Total Receives: 5309 -Stores 4101 1 -tensor([3.9509, 2.4797, 2.4797]) -loss: 0.00011831869051093236, td_error: 1005.5444946289062, entropy: 2.340620279312134 -Train step: 0.49236202239990234 Optimizer Step: 43 -tensor([5.3344, 5.7476, 6.0092, 5.1714]) -Total rewards: 5313 -Total Receives: 5310 -Stores 4102 1 -Total Receives: 5311 -Stores 4103 1 -Total Receives: 5312 -Stores 4104 1 -Total Receives: 5313 -Stores 4105 1 -tensor([5.2471, 5.0496, 3.5111]) -tensor([3.8754, 5.8501, 3.7293]) -tensor([4.5306, 3.4961]) -tensor([6.3498, 6.8409]) -Total rewards: 5316 -Total Receives: 5314 -Stores 4106 1 -Total Receives: 5315 -Stores 4107 1 -Total Receives: 5316 -Stores 4108 1 -loss: 1.7543569583722274e-06, td_error: 14.909597396850586, entropy: 2.3088982105255127 -Train step: 0.32691311836242676 Optimizer Step: 44 -tensor([6.0652, 8.3908, 8.5563]) -tensor([7.9216, 8.1608, 7.3899]) -tensor([1.2186, 1.6059]) -tensor([0.9439, 1.0832, 1.0383]) -tensor([1.7278, 2.5672]) -tensor([1.4358, 1.5362]) -tensor([3.5086, 3.6559, 4.4233]) -tensor([2.7670, 2.3838]) -tensor([2.3604, 2.4252]) -tensor([2.2412, 2.6485]) -tensor([6.2558, 6.0787]) -Total rewards: 5323 -Total Receives: 5317 -Stores 4109 1 -Total Receives: 5318 -Stores 4110 1 -Total Receives: 5319 -Stores 4111 1 -Total Receives: 5320 -Stores 4112 1 -Total Receives: 5321 -Stores 4113 1 -Total Receives: 5322 -Stores 4114 1 -Total Receives: 5323 -Stores 4115 1 -loss: 4.9293244956061244e-05, td_error: 418.92413330078125, entropy: 3.6996541023254395 -Train step: 0.44605302810668945 Optimizer Step: 45 -tensor([2.3013, 1.6153]) -tensor([3.0683, 2.7068, 2.9718]) -tensor([1.1841, 0.9894]) -tensor([1.1893, 0.8255, 0.9175, 0.8963, 0.4199, 0.4199]) -tensor([3.8290, 2.7781]) -tensor([6.3704, 6.5343]) -Total rewards: 5328 -Total Receives: 5324 -Stores 4116 1 -Total Receives: 5325 -Stores 4117 1 -tensor([-2.8573, -2.8579, -2.8836]) -Total rewards: 3945 -Total Receives: 3940 -Stores 3155 1 -Total Receives: 3941 -Stores 3156 1 -Total Receives: 3942 -Stores 3157 1 -Total Receives: 3943 -Stores 3158 1 -Total Receives: 3944 -Stores 3159 1 -Total Receives: 3945 -Stores 3160 1 -tensor([-10.9834, -10.9884]) -tensor([-4.4496, -4.4662, -4.4800]) -Total rewards: 3948 -Total Receives: 3946 -Stores 3161 1 -Total Receives: 3947 -Stores 3162 1 -Total Receives: 3948 -Stores 3163 1 -tensor([-0.1317, -0.1162, -0.1318]) -tensor([-16.8832, -16.9248, -17.0323, -16.9157, -16.8624, -16.9174, -16.8762, - -16.9302, -16.9466, -16.9880, -16.9783, -17.0184, -17.0257]) -tensor([-13.2872, -13.3220, -13.4098, -13.3144, -13.3139, -13.2868, -13.3186, - -13.3435, -13.3693, -13.3605, -13.3960, -13.4033]) -Total rewards: 3950 -Total Receives: 3949 -Stores 3164 1 -Total Receives: 3950 -Stores 3165 1 -tensor([-0.1041, -0.1100]) -tensor([-0.5951, -0.5808, -0.5786, -0.6010, -0.5999, -0.5693, -0.6059, -0.6061, - -0.6072]) -Total rewards: 3953 -Total Receives: 3951 -Stores 3166 1 -Total Receives: 3952 -Stores 3167 1 -Total Receives: 3953 -Stores 3168 1 -tensor([-8.6082, -8.6317, -8.6072, -8.6199, -8.6122, -8.6178]) -Total rewards: 3958 -Total Receives: 3954 -Stores 3169 1 -Total Receives: 3955 -Stores 3170 1 -Total Receives: 3956 -Stores 3171 1 -Total Receives: 3957 -Stores 3172 1 -Total Receives: 3958 -Stores 3173 1 -tensor([-2.1600, -2.1819]) -tensor([-3.4066, -3.3748]) -tensor([-10.9445, -10.9726, -11.0591, -10.9649, -11.0588, -10.9601, -10.9345, - -10.9603, -10.9736, -11.0499, -11.0432, -11.0597]) -Total rewards: 3965 -Total Receives: 3959 -Stores 3174 1 -Total Receives: 3960 -Stores 3175 1 -Total Receives: 3961 -Stores 3176 1 -Total Receives: 3962 -Stores 3177 1 -Total Receives: 3963 -Stores 3178 1 -Total Receives: 3964 -Stores 3179 1 -Total Receives: 3965 -Stores 3180 1 -tensor([-2.8845, -2.9042]) -tensor([-1.0095, -1.0107]) -tensor([-0.2038, -0.1938, -0.1876]) -tensor([-0.3770, -0.3773, -0.3713, -0.3791]) -tensor([-0.1020, -0.1144]) -tensor([-8.9085, -8.8960, -8.9663, -8.9256, -9.0057, -8.9204, -8.9741, -8.9293, - -9.0003, -9.0066, -9.0105]) -tensor([-26.0294, -26.0573, -26.0285, -26.0451, -26.0949, -26.0210]) -tensor([-0.1203, -0.0886, -0.1099]) -Total rewards: 3968 -Total Receives: 3966 -Stores 3181 1 -Total Receives: 3967 -Stores 3182 1 -Total Receives: 3968 -Stores 3183 1 -tensor([-0.5913, -0.5762, -0.5483, -0.5983, -0.5855, -0.6000, -0.6122, -0.6138, - -0.6065, -0.6066]) -tensor([-5.2475, -5.2544]) -tensor([-0.0889, -0.0838]) -tensor([-3.3937, -3.4157, -3.4241, -3.4478, -3.3882]) -tensor([-2.2822, -2.3428]) -Total rewards: 3973 -Total Receives: 3969 -Stores 3184 1 -Total Receives: 3970 -Stores 3185 1 -Total Receives: 3971 -Stores 3186 1 -Total Receives: 3972 -Stores 3187 1 -Total Receives: 3973 -Stores 3188 1 -tensor([-24.4782, -24.4970]) -tensor([-3.5126, -3.5008, -3.5236, -3.4650]) -tensor([-0.2543, -0.2508, -0.2500, -0.2514]) -Total rewards: 3978 -Total Receives: 3974 -Stores 3189 1 -Total Receives: 3975 -Stores 3190 1 -Total Receives: 3976 -Stores 3191 1 -Total Receives: 3977 -Stores 3192 1 -Total Receives: 3978 -Stores 3193 1 -tensor([-7.9542, -7.9988, -7.9659, -8.0400, -7.9661, -8.0161, -8.0438, -8.0638, - -8.0202, -8.0523, -8.0398, -8.0543]) -tensor([-0.8741, -0.8677, -0.8846, -0.8877]) -tensor([-5.4694, -5.4519]) -tensor([-3.0100, -2.9714, -2.9584, -2.9274, -2.9781]) -tensor([-4.0021, -4.0058]) -tensor([-0.3964, -0.3844, 0.2447]) -tensor([-0.4355, -0.4322, -0.4324, -0.4362, -0.4283, -0.4344, -0.4391, -0.4415, - -0.4467]) -tensor([-1.0547, -1.0576]) -tensor([-0.1203, -0.1034, -0.1018, -0.1192]) -tensor([-0.0134, -0.0122, -0.0124]) -tensor([-0.4392, -0.4368, -0.4368, -0.4391, -0.4358, -0.4443, -0.4453, -0.4546]) -Total rewards: 3984 -Total Receives: 3979 -Stores 3194 1 -Total Receives: 3980 -Stores 3195 1 -Total Receives: 3981 -Stores 3196 1 -Total Receives: 3982 -Stores 3197 1 -Total Receives: 3983 -Stores 3198 1 -Total Receives: 3984 -Stores 3199 1 -tensor([-9.1208, -9.1624]) -tensor([-0.4601, -0.4767]) -tensor([-0.1606, -0.1492, -0.1665]) -tensor([-2.4446, -2.3968, -2.4232, -2.3741, -2.4440]) -Total rewards: 3993 -Total Receives: 3985 -Stores 3200 1 -Total Receives: 3986 -Stores 3201 1 -Total Receives: 3987 -Stores 3202 1 -Total Receives: 3988 -Stores 3203 1 -Total Receives: 3989 -Stores 3204 1 -Total Receives: 3990 -Stores 3205 1 -Total Receives: 3991 -Stores 3206 1 -Total Receives: 3992 -Stores 3207 1 -Total Receives: 3993 -Stores 3208 1 -tensor([-6.4791, -6.3892, -6.4769, -6.4059, -6.4464, -6.5193, -6.5061, -6.4429, - -6.4875, -6.4825, -6.5008]) -tensor([-0.2581, -0.2493, -0.2417]) -tensor([-0.1334, -0.1392]) -tensor([-1.9381, -1.9034, -1.9140, -1.9355]) -tensor([-0.0437, -0.0592]) -tensor([-0.1085, -0.1075, -0.0614]) -tensor([-4.7021, -4.7006, -4.6370, -4.6730, -4.7425, -4.7266, -4.6676, -4.7068, - -4.7308, -4.7042, -4.7234]) -tensor([-0.0394, -0.0411]) -tensor([-0.2682, -0.3113, -0.3518, -0.2654, -0.2382, -0.2362, -0.2546, -0.2745]) -Total rewards: 4000 -Total Receives: 3994 -Stores 3209 1 -Total Receives: 3995 -Stores 3210 1 -Total Receives: 3996 -Stores 3211 1 -Total Receives: 3997 -Stores 3212 1 -Total Receives: 3998 -Stores 3213 1 -Total Receives: 3999 -Stores 3214 1 -Total Receives: 4000 -Stores 3215 1 -tensor([-0.2686, -0.2414, -0.2454, -0.2431]) -tensor([-1.8268, -1.8084, -1.8034, -1.8276]) -Total rewards: 4002 -Total Receives: 4001 -Stores 3216 1 -Total Receives: 4002 -Stores 3217 1 -tensor([-0.1261, -0.1248]) -Total rewards: 4006 -Total Receives: 4003 -Stores 3218 1 -Total Receives: 4004 -Stores 3219 1 -Total Receives: 4005 -Stores 3220 1 -Total Receives: 4006 -Stores 3221 1 -tensor([-0.3084, -0.2813, -0.2817]) -tensor([-0.2719, -0.3125, -0.3527, -0.2704, -0.2472, -0.2608, -0.2826]) -tensor([-5.9170, -5.9080, -5.8788, -5.9531, -5.9336, -5.8790, -5.9207, -5.9390, - -5.9130, -5.9364, -5.9628]) -tensor([-0.1046, -0.1363]) -tensor([-0.1363, -0.1355, -0.1324]) -Total rewards: 4010 -Total Receives: 4007 -Stores 3222 1 -Total Receives: 4008 -Stores 3223 1 -Total Receives: 4009 -Stores 3224 1 -Total Receives: 4010 -Stores 3225 1 -Total rewards: 4019 -Total Receives: 4011 -Stores 3226 1 -Total Receives: 4012 -Stores 3227 1 -Total Receives: 4013 -Stores 3228 1 -Total Receives: 4014 -Stores 3229 1 -Total Receives: 4015 -Stores 3230 1 -Total Receives: 4016 -Stores 3231 1 -Total Receives: 4017 -Stores 3232 1 -Total Receives: 4018 -Stores 3233 1 -Total Receives: 4019 -Stores 3234 1 -tensor([-2.5731, -2.5293, -2.5849, -2.5442]) -tensor([-1.7036, -1.7116]) -tensor([-1.6393, -1.6368, -1.6616]) -Total rewards: 4024 -Total Receives: 4020 -Stores 3235 1 -Total Receives: 4021 -Stores 3236 1 -Total Receives: 4022 -Stores 3237 1 -Total Receives: 4023 -Stores 3238 1 -Total Receives: 4024 -Stores 3239 1 -tensor([-1.2652, -1.2838, -1.2752]) -tensor([-0.4473, -0.4927, -0.5206]) -tensor([-0.0764, -0.0793]) -tensor([-0.2564, -0.3179]) -tensor([-4.8627, -4.8519, -4.8246, -4.8963, -4.8922, -4.8173, -4.8589, -4.8769, - -4.9316, -4.9313, -4.8838, -4.9281]) -tensor([-0.2897, -0.2357]) -tensor([-1.8876, -1.9133, -1.8543]) -tensor([-0.2912, -0.3341, -0.3705, -0.2824, -0.2760, -0.3034, -0.2558]) -Total rewards: 4029 -Total Receives: 4025 -Stores 3240 1 -Total Receives: 4026 -Stores 3241 1 -Total Receives: 4027 -Stores 3242 1 -Total Receives: 4028 -Stores 3243 1 -Total Receives: 4029 -Stores 3244 1 -tensor([-0.1565, -0.1667]) -tensor([-1.1938, -1.1917]) -tensor([-0.2938, -0.2846, -0.2879]) -tensor([-4.7698, -4.7788, -4.8251, -4.7297, -4.8010, -4.7953, -4.7218, -4.7694, - -4.8419, -4.8311, -4.7909, -4.8336]) -tensor([-0.3528, -0.3527]) -tensor([-2.5375, -2.5441, -2.5453, -2.5453]) -tensor([-0.2484, -0.1905, -0.1878]) -tensor([-0.3660, -0.3588, -0.3634, -0.3760, -0.3764]) -tensor([-1.0810, -1.0786, -1.0604]) -Total rewards: 4034 -Total Receives: 4030 -Stores 3245 1 -Total Receives: 4031 -Stores 3246 1 -Total Receives: 4032 -Stores 3247 1 -Total Receives: 4033 -Stores 3248 1 -Total Receives: 4034 -Stores 3249 1 -Total rewards: 4036 -Total Receives: 4035 -Stores 3250 1 -Total Receives: 4036 -Stores 3251 1 -tensor([-0.2238, -0.2557, -0.2754, -0.2229, -0.2101, -0.2359]) -tensor([-0.1592, -0.1909]) -tensor([-0.2122, -0.2101, -0.2045]) -tensor([-2.8432, -2.8568, -2.8637, -2.8637]) -tensor([-6.9119, -6.9160, -6.9663, -6.8736, -6.9444, -6.9394, -6.8607, -6.9095, - -6.9846, -6.9731, -6.9337])Total Receives: 5326 -Stores 4118 1 -Total Receives: 5327 -Stores 4119 1 -Total Receives: 5328 -Stores 4120 1 -tensor([5.5648, 7.8523]) -tensor([2.7243, 2.8150, 2.7907, 2.0275, 2.0275]) -Total rewards: 5330 -Total Receives: 5329 -Stores 4121 1 -Total Receives: 5330 -Stores 4122 1 -tensor([2.1997, 1.4776, 1.2444, 1.1517]) -tensor([2.4552, 1.8406]) -tensor([2.8675, 3.5517, 3.0927]) -tensor([1.4080, 1.7314]) -tensor([3.5230, 3.8228, 3.5955]) -tensor([9.1186, 8.5164]) -Total rewards: 5336 -Total Receives: 5331 -Stores 4123 1 -Total Receives: 5332 -Stores 4124 1 -Total Receives: 5333 -Stores 4125 1 -Total Receives: 5334 -Stores 4126 1 -Total Receives: 5335 -Stores 4127 1 -Total Receives: 5336 -Stores 4128 1 -tensor([2.1452, 1.9987]) -loss: 0.00011384968820493668, td_error: 967.5641479492188, entropy: 3.334995746612549 -Train step: 0.4958310127258301 Optimizer Step: 46 -tensor([0.6020, 0.4104]) -tensor([4.1365, 2.3427]) -tensor([1.8140, 0.9920]) -tensor([4.4491, 3.6958, 2.9171]) -tensor([5.0882, 4.8906]) -tensor([3.4225, 2.6381, 2.2140]) -tensor([1.4073, 1.1029, 1.3665]) -tensor([3.9834, 3.2424, 4.0491, 2.6250, 2.6250]) -Total rewards: 5338 -Total Receives: 5337 -Stores 4129 1 -Total Receives: 5338 -Stores 4130 1 -tensor([6.9622, 3.8591, 3.5864, 3.3879]) -Total rewards: 5344 -Total Receives: 5339 -Stores 4131 1 -Total Receives: 5340 -Stores 4132 1 -Total Receives: 5341 -Stores 4133 1 -Total Receives: 5342 -Stores 4134 1 -Total Receives: 5343 -Stores 4135 1 -Total Receives: 5344 -Stores 4136 1 -tensor([1.3744, 0.8671]) -loss: 5.153676738700597e-06, td_error: 43.799095153808594, entropy: 2.732339859008789 -Train step: 0.3947792053222656 Optimizer Step: 47 -tensor([2.3231, 2.7255]) -tensor([2.2353, 2.8258, 2.5330]) -tensor([0.8371, 1.0760, 1.4862]) -tensor([2.6648, 2.1334, 2.8021]) -Total rewards: 5348 -Total Receives: 5345 -Stores 4137 1 -Total Receives: 5346 -Stores 4138 1 -Total Receives: 5347 -Stores 4139 1 -Total Receives: 5348 -Stores 4140 1 -tensor([1.4957, 0.2847, 0.0886]) -tensor([1.8377, 1.3727, 2.6685, 1.3414, 1.2434]) -tensor([3.7090, 3.1972]) -loss: 3.597767135943286e-05, td_error: 305.7602233886719, entropy: 4.226323127746582 -Train step: 0.47177600860595703 Optimizer Step: 48 -tensor([1.1572, 1.3609]) -Total rewards: 5354 -Total Receives: 5349 -Stores 4141 1 -Total Receives: 5350 -Stores 4142 1 -Total Receives: 5351 -Stores 4143 1 -Total Receives: 5352 -Stores 4144 1 -Total Receives: 5353 -Stores 4145 1 -Total Receives: 5354 -Stores 4146 1 -tensor([5.5596, 4.2917, 4.4174]) -tensor([6.0698, 5.7442]) -tensor([4.0749, 3.5266]) -tensor([8.4581, 7.4766, 5.8349, 5.9202, 5.9202]) -tensor([4.9517, 4.6745, 3.5070, 4.5178]) -tensor([3.7697, 4.0730]) -loss: 3.625747012847569e-06, td_error: 30.813812255859375, entropy: 3.8697268962860107 -Train step: 0.3624691963195801 Optimizer Step: 49 -tensor([2.7071, 3.1123]) -tensor([3.3792, 1.8474]) -tensor([0.3600, 1.0578, 0.1878, 0.3932, 0.5259]) -tensor([2.1028, 1.5578]) -tensor([2.5839, 1.6632, 1.2118]) -tensor([6.1977, 4.8649, 3.0302]) -tensor([9.0474, 7.2520, 6.4609, 6.4609]) -tensor([1.4270, 1.4010, 0.5971]) -tensor([3.0122, 4.3201, 3.0063, 2.9028, 1.9119, 2.4650]) -tensor([6.3934, 4.7096]) -Total rewards: 5360 -Total Receives: 5355 -Stores 4147 1 -Total Receives: 5356 -Stores 4148 1 -Total Receives: 5357 -Stores 4149 1 -Total Receives: 5358 -Stores 4150 1 -Total Receives: 5359 -Stores 4151 1 -Total Receives: 5360 -Stores 4152 1 -tensor([5.4227, 3.2830]) -Total rewards: 5366 -Total Receives: 5361 -Stores 4153 1 -Total Receives: 5362 -Stores 4154 1 -Total Receives: 5363 -Stores 4155 1 -Total Receives: 5364 -Stores 4156 1 -Total Receives: 5365 -Stores 4157 1 -Total Receives: 5366 -Stores 4158 1 -loss: 4.683842053054832e-05, td_error: 398.06158447265625, entropy: 2.08385968208313 -Train step: 0.39970993995666504 Optimizer Step: 50 -tensor([0.0782, 1.3353]) -tensor([6.7367, 6.5386, 5.3203, 4.1660]) -Total rewards: 5372 -Total Receives: 5367 -Stores 4159 1 -Total Receives: 5368 -Stores 4160 1 -Total Receives: 5369 -Stores 4161 1 -Total Receives: 5370 -Stores 4162 1 -Total Receives: 5371 -Stores 4163 1 -Total Receives: 5372 -Stores 4164 1 -Total rewards: 5379 -Total Receives: 5373 -Stores 4165 1 -Total Receives: 5374 -Stores 4166 1 -Total Receives: 5375 -Stores 4167 1 -Total Receives: 5376 -Stores 4168 1 -Total Receives: 5377 -Stores 4169 1 -Total Receives: 5378 -Stores 4170 1 -Total Receives: 5379 -Stores 4171 1 -tensor([4.9202, 2.4910]) -Total rewards: 5382 -Total Receives: 5380 -Stores 4172 1 -Total Receives: 5381 -Stores 4173 1 -Total Receives: 5382 -Stores 4174 1 -tensor([1.4443, 1.5000, 2.8243, 1.3896]) -tensor([3.0835, 2.6776, 2.0520, 3.1587, 3.3445]) -tensor([3.5736, 3.5798, 3.4808, 2.4263, 3.0099]) -tensor([1.8627, 1.9138]) -loss: 3.3082407753681764e-05, td_error: 281.1545104980469, entropy: 2.182295560836792 -Train step: 0.3791539669036865 Optimizer Step: 51 -tensor([2.1030, 1.9155, 1.9730]) -tensor([9.5000, 7.6601, 6.2162, 6.2162]) -Total rewards: 5387 -Total Receives: 5383 -Stores 4175 1 -Total Receives: 5384 -Stores 4176 1 -Total Receives: 5385 -Stores 4177 1 -Total Receives: 5386 -Stores 4178 1 -Total Receives: 5387 -Stores 4179 1 -tensor([2.3198, 2.2537]) -tensor([5.6196, 5.1572, 4.1614, 5.7253]) -tensor([5.9653, 5.8690, 4.6422, 5.3167]) -tensor([10.7786, 10.3997, 9.3591]) -tensor([1.7512, 1.2732, 1.8284, 1.1992]) -Total rewards: 5392 -Total Receives: 5388 -Stores 4180 1 -Total Receives: 5389 -Stores 4181 1 -Total Receives: 5390 -Stores 4182 1 -Total Receives: 5391 -Stores 4183 1 -Total Receives: 5392 -Stores 4184 1 -tensor([8.7303, 7.5017]) -tensor([3.9061, 4.7342, 3.8032, 2.9752]) -tensor([2.5818, 2.2702]) -loss: 1.908273588924203e-05, td_error: 162.1767578125, entropy: 3.044593572616577 -Train step: 0.32688212394714355 Optimizer Step: 52 -tensor([3.7691, 3.3612, 2.1617, 2.1160]) -Total rewards: 5399 -Total Receives: 5393 -Stores 4185 1 -Total Receives: 5394 -Stores 4186 1 -Total Receives: 5395 -Stores 4187 1 -Total Receives: 5396 -Stores 4188 1 -Total Receives: 5397 -Stores 4189 1 -Total Receives: 5398 -Stores 4190 1 -Total Receives: 5399 -Stores 4191 1 -tensor([9.8587, 8.0384, 5.7486]) -tensor([1.0299, 0.7304]) -tensor([6.6883, 5.3988, 6.1039, 3.6460]) -tensor([2.2640, 2.1076]) -Total rewards: 5404 -Total Receives: 5400 -Stores 4192 1 -Total Receives: 5401 -Stores 4193 1 -Total Receives: 5402 -Stores 4194 1 -Total Receives: 5403 -Stores 4195 1 -Total Receives: 5404 -Stores 4196 1 -tensor([5.1391, 4.6629, 3.2319]) -tensor([5.1662, 5.0603, 3.5410]) -tensor([4.1263, 4.2096, 3.5471]) -loss: 0.00014345883391797543, td_error: 1219.2008056640625, entropy: 2.5651960372924805 -Train step: 0.539276123046875 Optimizer Step: 53 -tensor([6.4118, 4.8873]) -tensor([5.3871, 6.1230, 3.1616]) -Total rewards: 5408 -Total Receives: 5405 -Stores 4197 1 -Total Receives: 5406 -Stores 4198 1 -Total Receives: 5407 -Stores 4199 1 -Total Receives: 5408 -Stores 4200 1 -tensor([2.7745, 2.3815]) -tensor([5.4489, 4.9442, 4.6110, 3.8690]) -tensor([2.2328, 2.5047, 2.4850]) -tensor([0.5400, 0.3885]) -tensor([3.3456, 3.7995, 2.8970]) -Total rewards: 5412 -Total Receives: 5409 -Stores 4201 1 -Total Receives: 5410 -Stores 4202 1 -Total Receives: 5411 -Stores 4203 1 -Total Receives: 5412 -Stores 4204 1 -tensor([8.0161, 4.7456, 4.5210]) -tensor([3.3142, 3.6343]) -loss: 0.0003456287377048284, td_error: 2937.364501953125, entropy: 3.138453483581543 -Train step: 0.5407710075378418 Optimizer Step: 54 -Total rewards: 5416 -Total Receives: 5413 -Stores 4205 1 -Total Receives: 5414 -Stores 4206 1 -Total Receives: 5415 -Stores 4207 1 -Total Receives: 5416 -Stores 4208 1 -tensor([1.0041, 1.0914, 1.4699, 1.4075]) -tensor([5.0464, 7.3877, 4.7722, 4.7722]) -tensor([6.9251, 3.7287]) -Total rewards: 5421 -Total Receives: 5417 -Stores 4209 1 -Total Receives: 5418 -Stores 4210 1 -Total Receives: 5419 -Stores 4211 1 -Total Receives: 5420 -Stores 4212 1 -Total Receives: 5421 -Stores 4213 1 -tensor([5.3056, 4.9340, 3.3198, 3.3198]) -tensor([1.8448, 3.7474, 3.8384]) -tensor([1.1054, 2.1920, 0.9314]) -tensor([4.3809, 3.5572, 3.9610]) -tensor([2.5172, 2.6401, 3.0553, 1.8775]) -loss: 0.00025284787989221513, td_error: 2148.85546875, entropy: 2.571770668029785 -Train step: 0.5743081569671631 Optimizer Step: 55 -Total rewards: 5427 -Total Receives: 5422 -Stores 4214 1 -Total Receives: 5423 -Stores 4215 1 -Total Receives: 5424 -Stores 4216 1 -Total Receives: 5425 -Stores 4217 1 -Total Receives: 5426 -Stores 4218 1 -Total Receives: 5427 -Stores 4219 1 - -tensor([-3.7133, -3.6738, -3.7920]) -tensor([-0.3751, -0.3618, -0.3769, -0.3510]) -tensor([-5.1185, -5.1337, -5.1337]) -tensor([-0.3600, -0.4000, -0.4324, -0.3601, -0.3449]) -tensor([-7.0919, -7.0905, -7.1193, -6.2082]) -tensor([-0.2285, -0.2594, -0.2737, -0.2277]) -tensor([-0.0404, -0.0399]) -tensor([-0.8199, -0.8198, -0.8261, -0.8373]) -tensor([-0.2294, -0.2344]) -Total rewards: 4041 -Total Receives: 4037 -Stores 3252 1 -Total Receives: 4038 -Stores 3253 1 -Total Receives: 4039 -Stores 3254 1 -Total Receives: 4040 -Stores 3255 1 -Total Receives: 4041 -Stores 3256 1 -tensor([-0.2050, -0.1977, -0.2079]) -tensor([-0.2161, -0.2509, -0.2896, -0.1150]) -tensor([-2.4509, -2.4641, -2.4641]) -tensor([-4.2796, -4.2819, -4.3048]) -tensor([-0.1619, -0.2057, -0.0826]) -Total rewards: 4045 -Total Receives: 4042 -Stores 3257 1 -Total Receives: 4043 -Stores 3258 1 -Total Receives: 4044 -Stores 3259 1 -Total Receives: 4045 -Stores 3260 1 -tensor([-0.0888, -0.1040]) -tensor([-0.1426, -0.1302, -0.1398]) -tensor([-0.2983, -0.2878, -0.3035]) -tensor([-7.9959, -7.9987, -8.0486, -7.9478, -7.1533, -8.0254, -7.9923, -8.0607, - -8.0489, -8.0191]) -tensor([-3.4602, -3.5364, -3.5517]) -tensor([-2.6362, -2.6478]) -tensor([-22.8252, -22.8341, -22.8985, -22.7743, -22.8636, -22.8198, -22.9060, - -22.8948, -22.8482]) -tensor([-0.0560, -0.0714, -0.0619]) -tensor([-0.2110, -0.2258]) -tensor([-22.1490, -22.1854, -22.1832]) -tensor([-1.0264, -1.0281]) -tensor([-0.0873, -0.0770]) -tensor([-3.8556, -3.8636, -3.9174, -3.8938, -3.8536, -3.9326, -3.9156, -3.8829, - -3.9777, -3.9810]) -tensor([-1.0373, -1.0493, -1.0618, -1.0610]) -tensor([-0.2238, -0.2780]) -tensor([-0.0697, -0.0853]) -tensor([-0.6591, -0.6332]) -tensor([-0.9586, -0.9641, -0.9706]) -tensor([-1.7144, -1.7316, -1.7433]) -Total rewards: 4050 -Total Receives: 4046 -Stores 3261 1 -Total Receives: 4047 -Stores 3262 1 -Total Receives: 4048 -Stores 3263 1 -Total Receives: 4049 -Stores 3264 1 -Total Receives: 4050 -Stores 3265 1 -tensor([-0.0352, -0.0443]) -tensor([-0.4336, -0.4210, -0.4323]) -tensor([-0.0373, -0.0295]) -tensor([-0.3658, -0.3567, -0.3743]) -Total rewards: 4057 -Total Receives: 4051 -Stores 3266 1 -Total Receives: 4052 -Stores 3267 1 -Total Receives: 4053 -Stores 3268 1 -Total Receives: 4054 -Stores 3269 1 -Total Receives: 4055 -Stores 3270 1 -Total Receives: 4056 -Stores 3271 1 -Total Receives: 4057 -Stores 3272 1 -tensor([-17.2819, -17.2780, -17.2997]) -tensor([-0.6422, -0.6525]) -tensor([-0.3204, -0.3666, -0.3695]) -Total rewards: 4063 -Total Receives: 4058 -Stores 3273 1 -Total Receives: 4059 -Stores 3274 1 -Total Receives: 4060 -Stores 3275 1 -Total Receives: 4061 -Stores 3276 1 -Total Receives: 4062 -Stores 3277 1 -Total Receives: 4063 -Stores 3278 1 -tensor([-2.6886, -2.7031]) -tensor([-0.9401, -0.9294, -0.9386, -0.9431]) -tensor([-2.1599, -2.1637, -2.1235, -2.1673]) -tensor([-0.6540, -0.6554]) -Total rewards: 4065 -Total Receives: 4064 -Stores 3279 1 -Total Receives: 4065 -Stores 3280 1 -tensor([-0.2168, -0.2096, -0.2198]) -Total rewards: 4067 -Total Receives: 4066 -Stores 3281 1 -Total Receives: 4067 -Stores 3282 1 -tensor([-0.1046, -0.0635]) -Total rewards: 4069 -Total Receives: 4068 -Stores 3283 1 -Total Receives: 4069 -Stores 3284 1 -tensor([-0.0748, -0.0794]) -tensor([-3.4772, -3.4893, -3.5197, -3.5957, -3.5024, -3.4787, -3.5345, -3.5192, - -3.5015, -3.5858]) -Total rewards: 4075 -Total Receives: 4070 -Stores 3285 1 -Total Receives: 4071 -Stores 3286 1 -Total Receives: 4072 -Stores 3287 1 -Total Receives: 4073 -Stores 3288 1 -Total Receives: 4074 -Stores 3289 1 -Total Receives: 4075 -Stores 3290 1 -tensor([-0.0072, -0.0095]) -tensor([-1.1320, -1.1770, -1.1818]) -tensor([-2.0595, -2.0772]) -tensor([-0.8166, -0.8201]) -tensor([-0.2418, -0.2564]) -tensor([-0.3894, -0.3993]) -tensor([-0.1413, -0.1292]) -tensor([-6.4572, -6.4493, -6.4445, -6.4681]) -tensor([-0.5797, -0.5831]) -tensor([-1.6832, -1.6918, -1.7096, -1.6558, -1.7008]) -Total rewards: 4081 -Total Receives: 4076 -Stores 3291 1 -Total Receives: 4077 -Stores 3292 1 -Total Receives: 4078 -Stores 3293 1 -Total Receives: 4079 -Stores 3294 1 -Total Receives: 4080 -Stores 3295 1 -Total Receives: 4081 -Stores 3296 1 -tensor([-2.4778, -2.5092, -2.5745, -2.4930, -2.4694, -2.5266, -2.5079, -2.5151, - -2.5612]) -tensor([-1.4248, -1.4474, -1.5210, -1.4388, -1.4702, -1.4486, -1.4643, -1.5056]) -tensor([0.0283, 0.0282]) -tensor([-1.2309, -1.2202]) -Total rewards: 4085 -Total Receives: 4082 -Stores 3297 1 -Total Receives: 4083 -Stores 3298 1 -Total Receives: 4084 -Stores 3299 1 -Total Receives: 4085 -Stores 3300 1 -tensor([-0.1866, -0.1890]) -tensor([-0.6147, -0.6056, -0.6275, -0.6264]) -tensor([-0.3461, -0.3456, -0.3325]) -tensor([-1.2658, -1.2973, -1.2275, -1.2540, -1.2332, -1.2544, -1.2818, -1.3208]) -tensor([-1.2464, -1.2153, -1.2379, -1.2342]) -Total rewards: 4088 -Total Receives: 4086 -Stores 3301 1 -Total Receives: 4087 -Stores 3302 1 -Total Receives: 4088 -Stores 3303 1 -tensor([-0.1006, -0.1432]) -tensor([-1.1254, -1.1366, -1.1416]) -tensor([-0.0744, -0.0630]) -tensor([-0.0152, -0.0103]) -tensor([-1.6003, -1.5754]) -tensor([-0.2066, -0.2130, -0.2105, -0.2118]) -Total rewards: 4093 -Total Receives: 4089 -Stores 3304 1 -Total Receives: 4090 -Stores 3305 1 -Total Receives: 4091 -Stores 3306 1 -Total Receives: 4092 -Stores 3307 1 -Total Receives: 4093 -Stores 3308 1 -tensor([-0.0587, -0.0641, -0.0510]) -tensor([-1.2886, -1.3195, -1.2504, -1.2855, -1.2551, -1.2831, -1.3490]) -Total rewards: 4098 -Total Receives: 4094 -Stores 3309 1 -Total Receives: 4095 -Stores 3310 1 -Total Receives: 4096 -Stores 3311 1 -Total Receives: 4097 -Stores 3312 1 -Total Receives: 4098 -Stores 3313 1 -tensor([-0.1224, -0.1197]) -Total rewards: 4105 -Total Receives: 4099 -Stores 3314 1 -Total Receives: 4100 -Stores 3315 1 -Total Receives: 4101 -Stores 3316 1 -Total Receives: 4102 -Stores 3317 1 -Total Receives: 4103 -Stores 3318 1 -Total Receives: 4104 -Stores 3319 1 -Total Receives: 4105 -Stores 3320 1 -tensor([-0.1984, -0.2004]) -tensor([-0.0565, -0.0679]) -tensor([-0.6206, -0.6413]) -tensor([-0.5990, -0.5861, -0.6036, -0.5950]) -tensor([-0.2885, -0.2894, -0.2874, -0.2924]) -tensor([-0.1603, -0.1506]) -tensor([-2.5730, -2.5763]) -tensor([-0.2269, -0.2318]) -tensor([-2.9280, -2.8731, -2.9285]) -tensor([-0.0510, -0.0615]) -tensor([-2.4085, -2.4298, -2.4383]) -tensor([-0.0222, -0.0161]) -tensor([-2.5316, -2.5267, -2.5213]) -tensor([-4.2058, -4.2097, -4.2179]) -tensor([-0.2221, -0.2307, -0.2264]) -Total rewards: 4107 -Total Receives: 4106 -Stores 3321 1 -Total Receives: 4107 -Stores 3322 1 -tensor([-0.0661, -0.0569]) -Total rewards: 4112 -Total Receives: 4108 -Stores 3323 1 -Total Receives: 4109 -Stores 3324 1 -Total Receives: 4110 -Stores 3325 1 -Total Receives: 4111 -Stores 3326 1 -Total Receives: 4112 -Stores 3327 1 -tensor([-2.1752, -2.1915, -2.1994]) -Total rewards: 4118 -Total Receives: 4113 -Stores 3328 1 -Total Receives: 4114 -Stores 3329 1 -Total Receives: 4115 -Stores 3330 1 -Total Receives: 4116 -Stores 3331 1 -Total Receives: 4117 -Stores 3332 1 -Total Receives: 4118 -Stores 3333 1 -tensor([-1.7565, -1.7952, -1.7157, -1.7607, -1.7252, -1.8216, -1.8291]) -tensor([-1.4897, -1.4810, -1.4839]) -tensor([-3.9271, -3.9295, -3.8983, -3.9225, -3.9178, -3.9335]) -tensor([-0.0152, -0.0171, -0.0257]) -tensor([-6.9297, -6.9365]) -tensor([-0.0544, -0.0343]) -tensor([-1.1237, -1.1237]) -tensor([-0.2797, -0.2543, -0.2770, -0.2757, -0.2797]) -tensor([-0.0174, -0.0190]) -tensor([-1.5964, -1.6187, -1.6193]) -tensor([-1.9726, -1.9728, -2.0141]) -tensor([-0.1488, -0.1327, -0.1327, -0.1314]) -tensor([-0.2246, -0.2230, -0.2187, -0.2125, -0.2178]) -Total rewards: 4122 -Total Receives: 4119 -Stores 3334 1 -Total Receives: 4120 -Stores 3335 1 -Total Receives: 4121 -Stores 3336 1 -Total Receives: 4122 -Stores 3337 1 -tensor([-0.3754, -0.3863, -0.3916]) -tensor([-2.5329, -2.5434, -2.5645]) -tensor([-0.1904, -0.1982]) -tensor([-1.3527, -1.3712, -1.3924]) -tensor([-0.1670, -0.1678, -0.1797, -0.1655]) -tensor([-0.4620, -0.4522]) -tensor([-0.0391, -0.0481, -0.0351]) -tensor([-2.0997, -2.1425, -2.1051, -2.0688, -2.1807, -2.1640, -2.1842]) -Total rewards: 4128 -Total Receives: 4123 -Stores 3338 1 -Total Receives: 4124 -Stores 3339 1 -Total Receives: 4125 -Stores 3340 1 -Total Receives: 4126 -Stores 3341 1 -Total Receives: 4127 -Stores 3342 1 -Total Receives: 4128 -Stores 3343 1 -tensor([-1.9341, -1.9721]) -tensor([-0.0602, -0.0342]) -tensor([-17.5936, -17.5922]) -tensor([-0.1098, -0.1044, -0.1367]) -tensor([-1.2304, -1.2620, -1.2632, -1.2663, -1.2718]) -tensor([-0.3689, -0.3799]) -tensor([-0.9175, -0.9319]) -tensor([-1.7531, -1.7770, -1.7840]) -tensor([-0.7468, -0.7605]) -Total rewards: 4130 -Total Receives: 4129 -Stores 3344 1 -Total Receives: 4130 -Stores 3345 1 -tensor([-0.3239, -0.3282, -0.3300]) -tensor([-0.4351, -0.4319, -0.4305, -0.4414, -0.4349, -0.4328, -0.4458]) -tensor([-1.3072, -1.3000, -1.3043, -1.3136]) -tensor([-0.5088, -0.5319]) -Total rewards: 4133 -Total Receives: 4131 -Stores 3346 1 -Total Receives: 4132 -Stores 3347 1 -Total Receives: 4133 -Stores 3348 1 -tensor([-2.4028, -2.4335, -2.3922, -2.4887, -2.4703, -2.4961, -2.5098, -2.5098]) -tensor([-0.3297, -0.3408]) -tensor([-0.0958, -0.0657, -0.0787, -0.0787, -0.0659, -0.0797]) -tensor([-1.4844, -1.5090, -1.5498, -1.5364, -1.5517, -1.5644, -1.5644]) -Total rewards: 4139 -Total Receives: 4134 -Stores 3349 1 -Total Receives: 4135 -Stores 3350 1 -Total Receives: 4136 -Stores 3351 1 -Total Receives: 4137 -Stores 3352 1 -Total Receives: 4138 -Stores 3353 1 -Total Receives: 4139 -Stores 3354 1 -Total rewards: 4143 -Total Receives: 4140 -Stores 3355 1 -Total Receives: 4141 -Stores 3356 1 -Total Receives: 4142 -Stores 3357 1 -Total Receives: 4143 -Stores 3358 1 -Total rewards: 4149 -Total Receives: 4144 -Stores 3359 1 -Total Receives: 4145 -Stores 3360 1 -Total Receives: 4146 -Stores 3361 1 -Total Receives: 4147 -Stores 3362 1 -Total Receives: 4148 -Stores 3363 1 -Total Receives: 4149 -Stores 3364 1 -tensor([-0.0903, -0.1152]) -tensor([-0.2651, -0.2740, -0.2733, -0.2741, -0.2766, -0.2801]) -Total rewards: 4153 -Total Receives: 4150 -Stores 3365 1 -Total Receives: 4151 -Stores 3366 1 -Total Receives: 4152 -Stores 3367 1 -Total Receives: 4153 -Stores 3368 1 -tensor([-0.9802, -0.9867, -0.9915]) -tensor([-0.0420, -0.0314, -0.0218]) -tensor([-0.8815, -0.8919]) -Total rewards: 4158 -Total Receives: 4154 -Stores 3369 1 -Total Receives: 4155 -Stores 3370 1 -Total Receives: 4156 -Stores 3371 1 -Total Receives: 4157 -Stores 3372 1 -Total Receives: 4158 -Stores 3373 1 -tensor([-1.3213, -1.3239, -1.3355]) -tensor([-2.4965, -2.5136, -2.5037, -2.5001, -2.5236]) -Total rewards: 4166 -Total Receives: 4159 -Stores 3374 1 -Total Receives: 4160 -Stores 3375 1 -Total Receives: 4161 -Stores 3376 1 -Total Receives: 4162 -Stores 3377 1 -Total Receives: 4163 -Stores 3378 1 -Total Receives: 4164 -Stores 3379 1 -Total Receives: 4165 -Stores 3380 1 -Total Receives: 4166 -Stores 3381 1 -tensor([-1.6545, -1.6612, -1.6572]) -tensor([-0.8549, -1.0218, -0.8920, -0.8785, -0.8776, -0.8929]) -tensor([-1.0940, -1.0807]) -tensor([-0.3024, -0.3024, -0.3067, -0.3086]) -tensor([-0.0569, -0.0449]) -tensor([-0.2500, -0.2443, -0.2313, -0.2357, -0.2329, -0.2539]) -tensor([-0.1512, -0.1565, -0.1580]) -tensor([-0.7859, -0.7911, -0.7911]) -Total rewards: 4169 -Total Receives: 4167 -Stores 3382 1 -Total Receives: 4168 -Stores 3383 1 -Total Receives: 4169 -Stores 3384 1 -tensor([-1.6321, -1.6589, -1.6984, -1.6828, -1.6947, -1.7194]) -tensor([-0.1143, -0.1406, -0.1129, -0.1471, -0.1217]) -tensor([-0.0189, -0.0296, -0.0252]) -tensor([-0.2540, -0.2500, -0.2458]) -tensor([-0.1394, -0.1349]) -tensor([-0.0330, -0.0285]) -tensor([-1.0512, -1.0765, -1.0650]) -tensor([-1.1392, -1.1577, -1.1933, -1.1796, -1.1922]) -Total rewards: 4173 -Total Receives: 4170 -Stores 3385 1 -Total Receives: 4171 -Stores 3386 1 -Total Receives: 4172 -Stores 3387 1 -Total Receives: 4173 -Stores 3388 1 -Total rewards: 4179 -Total Receives: 4174 -Stores 3389 1 -Total Receives: 4175 -Stores 3390 1 -Total Receives: 4176 -Stores 3391 1 -Total Receives: 4177 -Stores 3392 1 -Total Receives: 4178 -Stores 3393 1 -Total Receives: 4179 -Stores 3394 1 -tensor([-0.7380, -0.7398]) -tensor([-2.3803, -2.3791, -2.4028, -2.3862, -2.4058, -2.4163]) -tensor([-0.1795, -0.1962, -0.2211, -0.1914]) -tensor([-1.0395, -1.0354, -1.0409]) -tensor([-0.9952, -0.9952, -0.9970]) -tensor([-0.3003, -0.2972, -0.2963, -0.2997, -0.3132, -0.3046]) -tensor([-0.0309, -0.0169]) -Total rewards: 4183 -Total Receives: 4180 -Stores 3395 1 -Total Receives: 4181 -Stores 3396 1 -Total Receives: 4182 -Stores 3397 1 -Total Receives: 4183 -Stores 3398 1 -tensor([-0.5522, -0.5718, -0.5881, -0.5577, -0.5642]) -tensor([-0.2884, -0.2857, -0.2861, -0.3028, -0.2930]) -tensor([-0.2913, -0.2917, -0.2870, -0.2792]) -tensor([-0.1715, -0.1996, -0.2019]) -tensor([-0.0658, -0.1084, -0.1209, -0.0955]) -Total rewards: 4184 -Total Receives: 4184 -Stores 3399 1 -tensor([-0.7511, -0.7551, -0.7558]) -tensor([-0.1691, -0.1428, -0.1750]) -tensor([-0.7078, -0.6885]) -tensor([-0.5525, -0.5568]) -Total rewards: 4190 -Total Receives: 4185 -Stores 3400 1 -Total Receives: 4186 -Stores 3401 1 -Total Receives: 4187 -Stores 3402 1 -Total Receives: 4188 -Stores 3403 1 -Total Receives: 4189 -Stores 3404 1 -Total Receives: 4190 -Stores 3405 1 -tensor([-0.8049, -0.8286, -0.8056]) -Total rewards: 4197 -Total Receives: 4191 -Stores 3406 1 -Total Receives: 4192 -Stores 3407 1 -Total Receives: 4193 -Stores 3408 1 -Total Receives: 4194 -Stores 3409 1 -Total Receives: 4195 -Stores 3410 1 -Total Receives: 4196 -Stores 3411 1 -Total Receives: 4197 -Stores 3412 1 -Total rewards: 4202 -Total Receives: 4198 -Stores 3413 1 -Total Receives: 4199 -Stores 3414 1 -Total Receives: 4200 -Stores 3415 1 -Total Receives: 4201 -Stores 3416 1 -Total Receives: 4202 -Stores 3417 1 -tensor([-0.0500, -0.1115, -0.0851]) -tensor([-2.7028, -2.7061, -2.7151, -2.6915, -2.6724, -2.7033, -2.7079, -2.7155, - -2.7179]) -tensor([-0.6392, -0.6382]) -tensor([-0.2231, -0.2340, -0.2526, -0.2165]) -tensor([-0.4615, 0.1950]) -tensor([-1.8599, -1.8638, -1.8743, -1.8462, -1.8591, -1.8659, -1.8743, -1.8761]) -tensor([-0.5971, -0.5876, -0.5999, -0.5952]) -Total rewards: 4207 -Total Receives: 4203 -Stores 3418 1 -Total Receives: 4204 -Stores 3419 1 -Total Receives: 4205 -Stores 3420 1 -Total Receives: 4206 -Stores 3421 1 -Total Receives: 4207 -Stores 3422 1 -tensor([-1.3950, -1.3956, -1.4053, -1.3954, -1.3992, -1.4056, -1.4063]) -tensor([-0.0503, -0.0990]) -tensor([-0.0957, -0.1162, -0.1208]) -Total rewards: 4212 -Total Receives: 4208 -Stores 3423 1 -Total Receives: 4209 -Stores 3424 1 -Total Receives: 4210 -Stores 3425 1 -Total Receives: 4211 -Stores 3426 1 -Total Receives: 4212 -Stores 3427 1 -tensor([-1.0869, -1.0869]) -tensor([-1.5859, -1.5965, -1.5954]) -tensor([-0.4898, -0.4946, -0.4908]) -tensor([-0.4173, -0.4172]) -Total rewards: 4221 -Total Receives: 4213 -Stores 3428 1 -Total Receives: 4214 -Stores 3429 1 -Total Receives: 4215 -Stores 3430 1 -Total Receives: 4216 -Stores 3431 1 -Total Receives: 4217 -Stores 3432 1 -Total Receives: 4218 -Stores 3433 1 -Total Receives: 4219 -Stores 3434 1 -Total Receives: 4220 -Stores 3435 1 -Total Receives: 4221 -Stores 3436 1 -tensor([-1.1113, -1.0875, -1.0827, -1.1128]) -tensor([-1.0566, -1.0552, -1.0490, -1.0533, -1.0577, -1.0574]) -Total rewards: 4225 -Total Receives: 4222 -Stores 3437 1 -Total Receives: 4223 -Stores 3438 1 -Total Receives: 4224 -Stores 3439 1 -Total Receives: 4225 -Stores 3440 1 -tensor([-0.0775, -0.0788, -0.0894]) -tensor([-0.1911, -0.1641, -0.2083, -0.2214, -0.1663]) -Total rewards: 4228 -Total Receives: 4226 -Stores 3441 1 -Total Receives: 4227 -Stores 3442 1 -Total Receives: 4228 -Stores 3443 1 -tensor([-0.0247, -0.0330, -0.0377]) -Total rewards: 4235 -Total Receives: 4229 -Stores 3444 1 -Total Receives: 4230 -Stores 3445 1 -Total Receives: 4231 -Stores 3446 1 -Total Receives: 4232 -Stores 3447 1 -Total Receives: 4233 -Stores 3448 1 -Total Receives: 4234 -Stores 3449 1 -Total Receives: 4235 -Stores 3450 1 -tensor([-1.1791, -1.1997, -1.2089]) -tensor([-0.7110, -0.7150, -0.7178]) -tensor([-7.1450, -7.1567, -7.1394, -7.1462]) -tensor([-0.5428, -0.5735]) -Total rewards: 4237 -Total Receives: 4236 -Stores 3451 1 -Total Receives: 4237 -Stores 3452 1 -tensor([-0.3108, -0.2871]) -tensor([-0.0723, -0.0713, -0.1069, -0.1220]) -tensor([-0.3898, -0.3863]) -Total rewards: 4242 -Total Receives: 4238 -Stores 3453 1 -Total Receives: 4239 -Stores 3454 1 -Total Receives: 4240 -Stores 3455 1 -Total Receives: 4241 -Stores 3456 1 -Total Receives: 4242 -Stores 3457 1 -tensor([-0.7953, -0.7969, -0.7807]) -tensor([-0.1104, -0.1176]) -tensor([-0.5235, -0.5266]) -Total rewards: 4246 -Total Receives: 4243 -Stores 3458 1 -Total Receives: 4244 -Stores 3459 1 -Total Receives: 4245 -Stores 3460 1 -Total Receives: 4246 -Stores 3461 1 -tensor([-0.0845, -0.0831, -0.1308]) -tensor([0.9473, 0.5957]) -tensor([2.1707, 2.5669]) -tensor([2.4393, 4.8105, 1.7120, 1.8816]) -tensor([7.0884, 5.0597, 5.0597]) -tensor([1.4786, 2.1235]) -tensor([2.5470, 2.6812, 1.4065, 1.8953]) -tensor([2.6849, 1.4529, 2.0251]) -tensor([5.0794, 5.0794]) -tensor([1.3427, 2.9382, 1.9246, 3.2946, 1.3420, 1.1915]) -Total rewards: 5429 -Total Receives: 5428 -Stores 4220 1 -Total Receives: 5429 -Stores 4221 1 -loss: 0.0002587885537650436, td_error: 2199.34326171875, entropy: 3.0446290969848633 -Train step: 0.49926280975341797 Optimizer Step: 56 -tensor([3.0495, 2.7557, 2.0201, 2.0201]) -tensor([6.4847, 2.3597, 2.6866]) -tensor([3.8639, 3.4474, 2.2376]) -tensor([2.2437, 1.5757, 1.8814, 1.1898]) -Total rewards: 5434 -Total Receives: 5430 -Stores 4222 1 -Total Receives: 5431 -Stores 4223 1 -Total Receives: 5432 -Stores 4224 1 -Total Receives: 5433 -Stores 4225 1 -Total Receives: 5434 -Stores 4226 1 -tensor([5.2248, 4.1254, 4.1254]) -tensor([3.5644, 2.7884, 2.2923]) -tensor([2.3374, 2.2532, 1.7583]) -tensor([1.9163, 2.1753, 1.9168]) -tensor([2.5384, 2.0342]) -tensor([0.9722, 0.5261, 0.6270]) -Total rewards: 5440 -Total Receives: 5435 -Stores 4227 1 -Total Receives: 5436 -Stores 4228 1 -Total Receives: 5437 -Stores 4229 1 -Total Receives: 5438 -Stores 4230 1 -Total Receives: 5439 -Stores 4231 1 -Total Receives: 5440 -Stores 4232 1 -tensor([4.0652, 2.3738]) -loss: 0.00017812174337450415, td_error: 1513.78759765625, entropy: 2.079591989517212 -Train step: 0.4836690425872803 Optimizer Step: 57 -tensor([3.2115, 7.9024, 3.6707]) -tensor([3.5150, 3.1245]) -tensor([2.3877, 2.3905]) -Total rewards: 5443 -Total Receives: 5441 -Stores 4233 1 -Total Receives: 5442 -Stores 4234 1 -Total Receives: 5443 -Stores 4235 1 -tensor([5.2969, 7.2429, 5.9763, 5.3000, 4.0324]) -tensor([5.7180, 5.0704, 3.7779, 3.7039]) -tensor([1.9269, 3.2327, 1.5455]) -tensor([1.8117, 1.6327]) -Total rewards: 5445 -Total Receives: 5444 -Stores 4236 1 -Total Receives: 5445 -Stores 4237 1 -tensor([5.0759, 3.8472, 3.7815]) -loss: 9.87367002380779e-07, td_error: 8.391247749328613, entropy: 4.504161834716797 -Train step: 0.3826639652252197 Optimizer Step: 58 -tensor([1.0688, 0.1542]) -tensor([0.9024, 0.9994]) -Total rewards: 5453 -Total Receives: 5446 -Stores 4238 1 -Total Receives: 5447 -Stores 4239 1 -Total Receives: 5448 -Stores 4240 1 -Total Receives: 5449 -Stores 4241 1 -Total Receives: 5450 -Stores 4242 1 -Total Receives: 5451 -Stores 4243 1 -Total Receives: 5452 -Stores 4244 1 -Total Receives: 5453 -Stores 4245 1 -tensor([2.9801, 2.1202, 2.3693, 1.6554, 2.2186]) -tensor([3.1872, 2.6341]) -tensor([2.0066, 1.7946, 1.7188]) -tensor([2.8407, 2.7082]) -tensor([0.9920, 2.5271]) -tensor([4.8708, 3.5497]) -tensor([3.4450, 3.0801]) -Total rewards: 5457 -Total Receives: 5454 -Stores 4246 1 -Total Receives: 5455 -Stores 4247 1 -Total Receives: 5456 -Stores 4248 1 -Total Receives: 5457 -Stores 4249 1 -Total rewards: 5463 -Total Receives: 5458 -Stores 4250 1 -Total Receives: 5459 -Stores 4251 1 -Total Receives: 5460 -Stores 4252 1 -Total Receives: 5461 -Stores 4253 1 -Total Receives: 5462 -Stores 4254 1 -Total Receives: 5463 -Stores 4255 1 -Total rewards: 5467 -Total Receives: 5464 -Stores 4256 1 -Total Receives: 5465 -Stores 4257 1 -Total Receives: 5466 -Stores 4258 1 -Total Receives: 5467 -Stores 4259 1 -tensor([3.0892, 2.8504, 2.9760]) -tensor([3.1810, 2.2906, 2.2650, 1.7702]) -tensor([3.3816, 2.9228]) -tensor([4.1215, 4.7797, 3.5441]) -tensor([2.3608, 3.0391, 2.6003]) -loss: 0.00032401381758973, td_error: 2753.667724609375, entropy: 1.6094545125961304 -Train step: 0.536250114440918 Optimizer Step: 59 -tensor([4.1136, 4.7755, 4.1127, 2.2501]) -tensor([1.5442, 1.9972, 1.0791]) -tensor([2.3998, 1.9168, 2.2046]) -tensor([5.3896, 5.0182]) -tensor([6.4035, 6.3971, 3.9576]) -tensor([2.7668, 1.7624]) -tensor([4.1161, 4.2806, 3.5597, 4.2079]) -tensor([1.1764, 1.2574, 1.1383, 0.9183]) -tensor([3.0104, 2.2705, 1.3125]) -Total rewards: 5468 -Total Receives: 5468 -Stores 4260 1 -tensor([2.9564, 3.8340, 2.9564]) -loss: 2.1795894156184659e-07, td_error: 1.8523484468460083, entropy: 4.852030277252197 -Train step: 0.3569188117980957 Optimizer Step: 60 -tensor([4.3089, 4.1046]) -tensor([0.8463, 0.0545]) -tensor([7.3293, 4.5262]) -tensor([4.6413, 6.1053, 5.3736]) -tensor([0.3555, 0.2825]) -tensor([2.6340, 1.1416, 1.5305, 0.9702, 0.8910]) -tensor([2.5037, 2.5398, 3.5362]) -tensor([4.0442, 4.6836, 2.9937]) -Total rewards: 5474 -Total Receives: 5469 -Stores 4261 1 -Total Receives: 5470 -Stores 4262 1 -Total Receives: 5471 -Stores 4263 1 -Total Receives: 5472 -Stores 4264 1 -Total Receives: 5473 -Stores 4265 1 -Total Receives: 5474 -Stores 4266 1 -tensor([0.9011, 0.9987, 0.8304]) -tensor([0.4330, 0.2094]) -tensor([ 1.3738, 1.0636, 1.4198, -0.0502]) -tensor([0.1264, 0.0609]) -tensor([0.4523, 0.6537]) -tensor([6.8447, 9.7299, 6.2859]) -tensor([6.0836, 6.0836]) -loss: 1.9288488601887366e-06, td_error: 16.39253807067871, entropy: 3.512697219848633 -Train step: 0.35387492179870605 Optimizer Step: 61 -tensor([2.9864, 2.9413, 2.4768, 1.9011]) -tensor([1.9643, 2.7877, 2.4133, 2.3910]) -tensor([1.7491, 0.6465, 0.0994, 0.9291, 0.1290, 0.1290]) -tensor([1.5398, 1.8299, 1.8374]) -tensor([5.5613, 6.2950]) -Total rewards: 5478 -Total Receives: 5475 -Stores 4267 1 -Total Receives: 5476 -Stores 4268 1 -Total Receives: 5477 -Stores 4269 1 -Total Receives: 5478 -Stores 4270 1 -tensor([1.9341, 0.2090, 1.0448, 0.2015, 0.1158, 0.1158, 0.1158]) -tensor([5.2749, 5.9005, 3.7862]) -tensor([1.9770, 1.5858]) -tensor([0.1700, 0.2380, 0.3415, 0.1447]) -loss: 0.0013489773264154792, td_error: 11464.4345703125, entropy: 3.4339871406555176 -Train step: 0.7458691596984863 Optimizer Step: 62 -tensor([5.7630, 5.3155, 3.9741]) -tensor([0.5244, 0.5303, 0.7289, 0.4089]) -Total rewards: 5483 -Total Receives: 5479 -Stores 4271 1 -Total Receives: 5480 -Stores 4272 1 -Total Receives: 5481 -Stores 4273 1 -Total Receives: 5482 -Stores 4274 1 -Total Receives: 5483 -Stores 4275 1 -tensor([3.5580, 3.4108]) -tensor([0.9162, 0.9013, 0.7475]) -tensor([1.9089, 1.5933, 0.4043]) -tensor([1.7551, 2.0310]) -tensor([3.1523, 3.5606, 3.8226]) -tensor([2.2172, 3.4454, 3.4796]) -tensor([1.9103, 3.1536]) -tensor([8.6454, 7.0620]) -Total rewards: 5487 -Total Receives: 5484 -Stores 4276 1 -Total Receives: 5485 -Stores 4277 1 -Total Receives: 5486 -Stores 4278 1 -Total Receives: 5487 -Stores 4279 1 -Total rewards: 5492 -Total Receives: 5488 -Stores 4280 1 -Total Receives: 5489 -Stores 4281 1 -Total Receives: 5490 -Stores 4282 1 -Total Receives: 5491 -Stores 4283 1 -Total Receives: 5492 -Stores 4284 1 -tensor([6.1557, 4.4618]) -tensor([3.1150, 4.2878]) -tensor([6.6498, 7.6185, 7.1893]) -tensor([5.9765, 5.6124, 5.0008]) -tensor([0.4236, 0.2380, 0.5737, 0.1003]) -tensor([6.8932, 5.4062]) -loss: 5.807804427604424e-06, td_error: 49.3582763671875, entropy: 2.1313588619232178 -Train step: 0.38596105575561523 Optimizer Step: 63 -tensor([0.6671, 0.0909]) -tensor([3.9495, 2.1876]) -Total rewards: 5498 -Total Receives: 5493 -Stores 4285 1 -Total Receives: 5494 -Stores 4286 1 -Total Receives: 5495 -Stores 4287 1 -Total Receives: 5496 -Stores 4288 1 -Total Receives: 5497 -Stores 4289 1 -Total Receives: 5498 -Stores 4290 1 -tensor([1.5380, 1.6271]) -tensor([3.7365, 4.8042, 3.6451, 2.5604, 2.5604, 2.5604]) -tensor([2.6081, 1.9271, 2.0937, 1.7421]) -tensor([2.9885, 2.0736]) -tensor([6.9679, 7.4999]) -tensor([1.5733, 1.5477, 0.5680, 0.5680, 0.5680, 0.5680]) -tensor([0.8043, 1.0631, 0.7632]) -tensor([ 0.1405, -0.1494, -1.7900, -2.2560, -1.7963, -2.2199]) -tensor([3.7863, 4.1689]) -loss: 0.05974293127655983, td_error: 2580.5263671875, entropy: 3.723997116088867 -Train step: 0.4204568862915039 Optimizer Step: 64 -tensor([1.0010, 1.1563, 1.0088, 0.9463]) -tensor([5.0267, 3.2066]) -tensor([1.8127, 2.2023]) -tensor([2.5681, 2.1389, 2.8724, 2.2302]) -tensor([4.8420, 3.2835]) -tensor([3.5175, 2.6190, 3.1367]) -Total rewards: 5506 -Total Receives: 5499 -Stores 4291 1 -Total Receives: 5500 -Stores 4292 1 -Total Receives: 5501 -Stores 4293 1 -Total Receives: 5502 -Stores 4294 1 -Total Receives: 5503 -Stores 4295 1 -Total Receives: 5504 -Stores 4296 1 -Total Receives: 5505 -Stores 4297 1 -Total Receives: 5506 -Stores 4298 1 -tensor([3.8719, 4.3749]) -tensor([2.6265, 3.1503, 3.2532, 1.8692, 1.7511, 2.3916]) -Total rewards: 5510 -Total Receives: 5507 -Stores 4299 1 -Total Receives: 5508 -Stores 4300 1 -Total Receives: 5509 -Stores 4301 1 -Total Receives: 5510 -Stores 4302 1 -tensor([-0.5539, -0.5483, -0.5652, -0.5545, -0.5652, -0.5746, -0.5888, -0.5860]) -tensor([-0.3059, -0.3026]) -tensor([-0.1451, -0.1121, -0.1743, -0.1078, -0.1048]) -tensor([-0.3915, -0.4023, -0.3974]) -tensor([-0.3636, -0.3720, -0.3801]) -tensor([-0.1770, -0.1811]) -tensor([-0.1717, -0.1712]) -Total rewards: 4249 -Total Receives: 4247 -Stores 3462 1 -Total Receives: 4248 -Stores 3463 1 -Total Receives: 4249 -Stores 3464 1 -tensor([-0.0466, -0.0400]) -Total rewards: 4256 -Total Receives: 4250 -Stores 3465 1 -Total Receives: 4251 -Stores 3466 1 -Total Receives: 4252 -Stores 3467 1 -Total Receives: 4253 -Stores 3468 1 -Total Receives: 4254 -Stores 3469 1 -Total Receives: 4255 -Stores 3470 1 -Total Receives: 4256 -Stores 3471 1 -tensor([-0.0701, -0.0741, -0.0637]) -Total rewards: 4258 -Total Receives: 4257 -Stores 3472 1 -Total Receives: 4258 -Stores 3473 1 -Total rewards: 4264 -Total Receives: 4259 -Stores 3474 1 -Total Receives: 4260 -Stores 3475 1 -Total Receives: 4261 -Stores 3476 1 -Total Receives: 4262 -Stores 3477 1 -Total Receives: 4263 -Stores 3478 1 -Total Receives: 4264 -Stores 3479 1 -tensor([-0.3363, -0.3538, -0.3340, -0.3415, -0.3630, -0.3572, -0.3574]) -tensor([-0.2482, -0.2605]) -Total rewards: 4267 -Total Receives: 4265 -Stores 3480 1 -Total Receives: 4266 -Stores 3481 1 -Total Receives: 4267 -Stores 3482 1 -tensor([-0.1804, -0.2019, -0.1808, -0.1883, -0.1855, -0.1878]) -Total rewards: 4275 -Total Receives: 4268 -Stores 3483 1 -Total Receives: 4269 -Stores 3484 1 -Total Receives: 4270 -Stores 3485 1 -Total Receives: 4271 -Stores 3486 1 -Total Receives: 4272 -Stores 3487 1 -Total Receives: 4273 -Stores 3488 1 -Total Receives: 4274 -Stores 3489 1 -Total Receives: 4275 -Stores 3490 1 -tensor([-0.1744, -0.1466, -0.1302, -0.1297, -0.1335]) -tensor([-0.1894, -0.2350, -0.1807]) -tensor([-4.7867, -4.7959]) -tensor([-0.1447, -0.1173, -0.1710, -0.1048]) -tensor([-0.0990, -0.1142]) -tensor([-0.1437, -0.1882]) -tensor([-0.2146, -0.2078, -0.2169]) -tensor([-0.2292, -0.2388, -0.2355]) -Total rewards: 4282 -Total Receives: 4276 -Stores 3491 1 -Total Receives: 4277 -Stores 3492 1 -Total Receives: 4278 -Stores 3493 1 -Total Receives: 4279 -Stores 3494 1 -Total Receives: 4280 -Stores 3495 1 -Total Receives: 4281 -Stores 3496 1 -Total Receives: 4282 -Stores 3497 1 -tensor([-0.2917, -0.2844, -0.3015]) -tensor([-0.1437, -0.1214, -0.1001, -0.1125]) -tensor([-0.2693, -0.2815, -0.2792]) -tensor([-0.1387, -0.1157, -0.1083]) -tensor([-0.0700, -0.0721]) -tensor([-0.2576, -0.2505]) -tensor([-0.0634, -0.0676, -0.0541]) -Total rewards: 4286 -Total Receives: 4283 -Stores 3498 1 -Total Receives: 4284 -Stores 3499 1 -Total Receives: 4285 -Stores 3500 1 -Total Receives: 4286 -Stores 3501 1 -tensor([-0.0547, -0.0645]) -tensor([-0.2166, -0.2123, -0.2195, -0.2195]) -tensor([-0.0149, -0.0068]) -tensor([-0.1653, -0.2281, -0.1428, -0.1506]) -tensor([-0.0738, -0.2007]) -tensor([-0.2442, -0.2540]) -tensor([-0.4267, -0.4435, -0.4318]) -tensor([-0.1293, -0.1906, -0.1187]) -Total rewards: 4289 -Total Receives: 4287 -Stores 3502 1 -Total Receives: 4288 -Stores 3503 1 -Total Receives: 4289 -Stores 3504 1 -tensor([-0.7206, -0.7251, -0.7224]) -tensor([-0.8049, -0.7989]) -tensor([-0.6075, -0.6055, -0.5800, -0.6175]) -tensor([-0.6711, -0.6874, -0.7027]) -tensor([-0.2241, -0.2150]) -tensor([-0.0911, -0.1591, -0.0920]) -Total rewards: 4294 -Total Receives: 4290 -Stores 3505 1 -Total Receives: 4291 -Stores 3506 1 -Total Receives: 4292 -Stores 3507 1 -Total Receives: 4293 -Stores 3508 1 -Total Receives: 4294 -Stores 3509 1 -tensor([-1.1813, -1.1668, -1.1795]) -tensor([-0.0872, -0.0687, -0.0911]) -Total rewards: 4300 -Total Receives: 4295 -Stores 3510 1 -Total Receives: 4296 -Stores 3511 1 -Total Receives: 4297 -Stores 3512 1 -Total Receives: 4298 -Stores 3513 1 -Total Receives: 4299 -Stores 3514 1 -Total Receives: 4300 -Stores 3515 1 -tensor([-0.5000, -0.5023, -0.5040]) -tensor([-0.6480, -0.6707, -0.6682, -0.6682]) -Total rewards: 4303 -Total Receives: 4301 -Stores 3516 1 -Total Receives: 4302 -Stores 3517 1 -Total Receives: 4303 -Stores 3518 1 -tensor([-0.3624, -0.3375]) -tensor([-0.1345, -0.2023, -0.1006]) -tensor([-0.0793, -0.0815]) -tensor([-0.5090, -0.5039]) -tensor([-0.2089, -0.1954]) -tensor([-0.5807, -0.5606, -0.5718, -0.5790]) -tensor([-1.0301, -1.0451]) -Total rewards: 4307 -Total Receives: 4304 -Stores 3519 1 -Total Receives: 4305 -Stores 3520 1 -Total Receives: 4306 -Stores 3521 1 -Total Receives: 4307 -Stores 3522 1 -tensor([-0.3321, -0.3445]) -tensor([-0.8261, -0.8090, -0.8282]) -tensor([-1.1571, -1.1598]) -tensor([-0.5831, -0.5823]) -tensor([-0.1428, -0.2134]) -Total rewards: 4312 -Total Receives: 4308 -Stores 3523 1 -Total Receives: 4309 -Stores 3524 1 -Total Receives: 4310 -Stores 3525 1 -Total Receives: 4311 -Stores 3526 1 -Total Receives: 4312 -Stores 3527 1 -tensor([-0.5435, -0.5469]) -tensor([-0.4831, -0.4893, -0.4893]) -tensor([-0.1014, -0.0953]) -tensor([-0.6073, -0.5957, -0.6031, -0.6119]) -tensor([-0.1458, -0.1471, -0.1439]) -tensor([-0.0921, -0.0938]) -tensor([-0.0693, -0.1024]) -tensor([-0.3486, -0.3484]) -Total rewards: 4316 -Total Receives: 4313 -Stores 3528 1 -Total Receives: 4314 -Stores 3529 1 -Total Receives: 4315 -Stores 3530 1 -Total Receives: 4316 -Stores 3531 1 -tensor([-0.4664, -0.4537, -0.4601]) -tensor([-0.1922, -0.1876]) -tensor([-0.9784, -1.4878]) -tensor([-0.9459, -0.9411, -0.9521]) -Total rewards: 4317 -Total Receives: 4317 -Stores 3532 1 -tensor([-1.1038, -1.2005]) -tensor([-0.5393, -0.5477, -0.5427, -0.5392, -0.5529]) -tensor([-0.0257, -0.0144]) -tensor([-0.9242, -0.9131, -0.9263, -0.9247, -0.9263]) -Total rewards: 4321 -Total Receives: 4318 -Stores 3533 1 -Total Receives: 4319 -Stores 3534 1 -Total Receives: 4320 -Stores 3535 1 -Total Receives: 4321 -Stores 3536 1 -tensor([-0.3674, -0.3712, -0.3671, -0.3718, -0.3787, -0.3696]) -tensor([-0.1349, -0.1286, -0.1393]) -tensor([-0.2329, -0.2359, -0.2353, -0.2447, -0.2426]) -tensor([-1.2051, -1.2051]) -tensor([-0.3163, -0.3292, -0.3193]) -tensor([-0.0577, -0.0565]) -tensor([-0.2073, -0.2083, -0.2122, -0.2122]) -tensor([-1.0998, -1.1018]) -Total rewards: 4327 -Total Receives: 4322 -Stores 3537 1 -Total Receives: 4323 -Stores 3538 1 -Total Receives: 4324 -Stores 3539 1 -Total Receives: 4325 -Stores 3540 1 -Total Receives: 4326 -Stores 3541 1 -Total Receives: 4327 -Stores 3542 1 -Total rewards: 4331 -Total Receives: 4328 -Stores 3543 1 -Total Receives: 4329 -Stores 3544 1 -Total Receives: 4330 -Stores 3545 1 -Total Receives: 4331 -Stores 3546 1 -tensor([-0.0702, -0.0683, -0.0654, -0.0647, -0.0662]) -tensor([-1.3702, -1.3727, -1.3655, -1.3609, -1.3694]) -Total rewards: 4333 -Total Receives: 4332 -Stores 3547 1 -Total Receives: 4333 -Stores 3548 1 -tensor([-0.0877, -0.0797, -0.0791, -0.0910, -0.0885]) -tensor([-0.2714, -0.2732, -0.2793]) -tensor([-0.1501, -0.1517]) -tensor([-1.5919, -1.5887]) -Total rewards: 4338 -Total Receives: 4334 -Stores 3549 1 -Total Receives: 4335 -Stores 3550 1 -Total Receives: 4336 -Stores 3551 1 -Total Receives: 4337 -Stores 3552 1 -Total Receives: 4338 -Stores 3553 1 -tensor([-0.2853, -0.2821, -0.2860, -0.2685]) -tensor([-0.1233, -0.1221, -0.1192, -0.1229]) -Total rewards: 4341 -Total Receives: 4339 -Stores 3554 1 -Total Receives: 4340 -Stores 3555 1 -Total Receives: 4341 -Stores 3556 1 -tensor([-0.0889, -0.0867, -0.0907, -0.0930]) -tensor([-2.5324, -2.5364]) -tensor([-1.3691, -1.3714, -1.3640, -1.3699]) -tensor([-0.0777, -0.0705]) -tensor([-0.1459, -0.1413, -0.1372, -0.1493]) -tensor([-0.1377, -0.1364, -0.1364]) -tensor([-16.5937, -16.6126, -16.6203, -16.6301, -16.6034, -16.6301]) -tensor([-0.8722, -0.8948, -0.8984, -0.8980]) -tensor([-0.0726, -0.0754]) -Total rewards: 4348 -Total Receives: 4342 -Stores 3557 1 -Total Receives: 4343 -Stores 3558 1 -Total Receives: 4344 -Stores 3559 1 -Total Receives: 4345 -Stores 3560 1 -Total Receives: 4346 -Stores 3561 1 -Total Receives: 4347 -Stores 3562 1 -Total Receives: 4348 -Stores 3563 1 -tensor([-0.4975, -0.4813, -0.4977]) -tensor([-1.0897, -1.0876]) -tensor([-0.1044, -0.0973, -0.1117]) -tensor([-0.1258, -0.1161]) -tensor([-1.0464, -1.0736, -1.0742]) -tensor([-0.1061, -0.1039, -0.1096]) -tensor([-0.0895, -0.0883, -0.0869]) -Total rewards: 4355 -Total Receives: 4349 -Stores 3564 1 -Total Receives: 4350 -Stores 3565 1 -Total Receives: 4351 -Stores 3566 1 -Total Receives: 4352 -Stores 3567 1 -Total Receives: 4353 -Stores 3568 1 -Total Receives: 4354 -Stores 3569 1 -Total Receives: 4355 -Stores 3570 1 -tensor([-1.8354, -1.8344, -1.8469, -1.8505, -1.8251, -1.8466, -1.8505]) -tensor([-0.1044, -0.1166]) -tensor([-0.4603, -0.4602]) -tensor([-0.2651, -0.2549, -0.2519, -0.2637]) -tensor([-1.6643, -1.6676]) -tensor([-0.1924, -0.1896]) -tensor([-2.8819, -2.8794, -2.8825]) -tensor([-0.0937, -0.0993, -0.0952]) -tensor([-0.0919, -0.0801, -0.1002]) -Total rewards: 4361 -Total Receives: 4356 -Stores 3571 1 -Total Receives: 4357 -Stores 3572 1 -Total Receives: 4358 -Stores 3573 1 -Total Receives: 4359 -Stores 3574 1 -Total Receives: 4360 -Stores 3575 1 -Total Receives: 4361 -Stores 3576 1 -tensor([-0.1641, -0.1615, -0.1524]) -tensor([-0.2240, -0.2153, -0.2225]) -tensor([-3.5869, -3.5983]) -tensor([-0.0964, -0.0977]) -Total rewards: 4364 -Total Receives: 4362 -Stores 3577 1 -Total Receives: 4363 -Stores 3578 1 -Total Receives: 4364 -Stores 3579 1 -Total rewards: 4367 -Total Receives: 4365 -Stores 3580 1 -Total Receives: 4366 -Stores 3581 1 -Total Receives: 4367 -Stores 3582 1 -tensor([-3.8415, -3.8514]) -tensor([-0.6742, -0.6647, -0.6905, -0.6902]) -tensor([-1.8356, -1.8507, -1.8454, -1.8442, -1.8529, -1.8673, -1.8513, -1.8673]) -tensor([-1.2050, -1.2210, -1.2144, -1.2129, -1.2236, -1.2211, -1.2395]) -tensor([-0.1486, -0.1467]) -Total rewards: 4371 -Total Receives: 4368 -Stores 3583 1 -Total Receives: 4369 -Stores 3584 1 -Total Receives: 4370 -Stores 3585 1 -Total Receives: 4371 -Stores 3586 1 -tensor([-1.6655, -1.6719, -1.6987, -1.7043]) -tensor([-0.1265, -0.1321]) -tensor([-0.9271, -0.9243, -0.9190, -0.9291, -0.9288, -0.9421]) -tensor([-0.0808, -0.0620, -0.0791]) -tensor([-0.0803, -0.0774]) -tensor([-0.6351, -0.6415, -0.6496]) -tensor([-0.4840, -0.4846]) -tensor([-0.9285, -0.0145]) -tensor([-0.3681, -1.1555]) -tensor([-0.0538, -0.0508]) -tensor([-0.1360, -0.1354]) -tensor([-2.8796, -2.8784]) -Total rewards: 4374 -Total Receives: 4372 -Stores 3587 1 -Total Receives: 4373 -Stores 3588 1 -Total Receives: 4374 -Stores 3589 1 -tensor([-0.1496, -0.1479, -0.1694]) -tensor([-0.5696, -0.5805]) -Total rewards: 4381 -Total Receives: 4375 -Stores 3590 1 -Total Receives: 4376 -Stores 3591 1 -Total Receives: 4377 -Stores 3592 1 -Total Receives: 4378 -Stores 3593 1 -Total Receives: 4379 -Stores 3594 1 -Total Receives: 4380 -Stores 3595 1 -Total Receives: 4381 -Stores 3596 1 -tensor([-1.9185, -1.9302]) -Total rewards: 4389 -Total Receives: 4382 -Stores 3597 1 -Total Receives: 4383 -Stores 3598 1 -Total Receives: 4384 -Stores 3599 1 -Total Receives: 4385 -Stores 3600 1 -Total Receives: 4386 -Stores 3601 1 -Total Receives: 4387 -Stores 3602 1 -Total Receives: 4388 -Stores 3603 1 -Total Receives: 4389 -Stores 3604 1 -tensor([-0.3679, -0.3719]) -tensor([-0.9790, -0.9809, -0.9768, -0.9773, -0.9978]) -tensor([-0.4193, -0.4230]) -tensor([-0.1824, -0.1610]) -tensor([-0.6594, -0.6607]) -tensor([-0.6249, -0.6344]) -tensor([-1.0354, -1.0484, -1.0486]) -tensor([-0.1749, -0.1824]) -tensor([-3.0379, -3.0603, -3.0730, -3.0697]) -tensor([-3.5218, -3.5149]) -tensor([-0.5846, -0.5546, -0.5859]) -tensor([-0.1980, -0.2091]) -tensor([-0.1249, -0.1284]) -tensor([-0.3950, -0.4026]) -tensor([-0.3925, -0.3788, -0.4008]) -tensor([-0.5136, -0.5169]) -tensor([-0.4674, -0.4658]) -tensor([-0.9276, -0.9294]) -tensor([-4.1931, -4.1927, -4.1977]) -tensor([-2.5750, -2.5790, -2.5702]) -tensor([-2.6398, -2.6414, -2.6374]) -Total rewards: 4394 -Total Receives: 4390 -Stores 3605 1 -Total Receives: 4391 -Stores 3606 1 -Total Receives: 4392 -Stores 3607 1 -Total Receives: 4393 -Stores 3608 1 -Total Receives: 4394 -Stores 3609 1 -tensor([-9.9765, -9.2592, -9.9795]) -tensor([-0.8790, -0.8667]) -tensor([-3.3147, -3.3229]) -tensor([-0.5304, -0.5510, -0.5248, -0.5505, -0.5505]) -tensor([-1.5721, -1.5776, -1.5678, -1.6148, -1.6151]) -tensor([-0.2905, -0.3037, -0.3051, -0.3051]) -tensor([-0.1709, -0.1719, -0.1662]) -tensor([-0.9338, -0.9380]) -Total rewards: 4397 -Total Receives: 4395 -Stores 3610 1 -Total Receives: 4396 -Stores 3611 1 -Total Receives: 4397 -Stores 3612 1 -tensor([-0.5605, -0.5644, -0.5663, -0.5707]) -tensor([-1.9483, -1.9363, -1.9402]) -tensor([-0.3047, -0.3177]) -tensor([-3.6737, -3.7311, -3.7148, -3.7312, -3.7408, -3.7426]) -tensor([-1.4353, -1.4400, -1.4812, -1.4417, -1.4713, -1.4735]) -tensor([-0.9134, -0.9219, -0.9237, -0.9217, -0.9250]) -tensor([-1.0373, -1.0464]) -tensor([-7.4815, -6.2107, -7.3543]) -tensor([-0.3198, -0.3127]) -tensor([-0.2971, -0.3099]) -tensor([-4.1473, -4.1447]) -tensor([-1.0324, -1.0404, -1.0363]) -tensor([-3.3492, -3.3491, -3.3531]) -tensor([-0.1418, -0.1431]) -tensor([-3.2204, -3.2220, -3.2476, -3.2476]) -tensor([-0.7106, -0.7088, -0.7089, -0.7129]) -tensor([-3.4271, -3.4298]) -tensor([-0.8176, -0.7991, -0.8136]) -tensor([-2.8116, -2.8138]) -tensor([-0.7664, -0.7711, -0.7696]) -tensor([-0.6946, -0.6917, -0.6971]) -tensor([-0.4870, -0.5155, -0.5155]) -Total rewards: 4401 -Total Receives: 4398 -Stores 3613 1 -Total Receives: 4399 -Stores 3614 1 -Total Receives: 4400 -Stores 3615 1 -Total Receives: 4401 -Stores 3616 1 -tensor([-1.4016, -1.3889, -1.4058]) -tensor([-2.3206, -2.3181, -2.2816, -2.2856]) -tensor([-3.1109, -3.1328, -3.1338, -3.1400]) -tensor([-2.0079, -2.0121, -2.0138, -2.0375, -2.0445, -2.0386]) -tensor([-1.2858, -1.2871, -1.2902, -1.2899, -1.2861, -1.2898, -1.2898]) -tensor([-1.0029, -1.0104, -1.0062, -1.0064, -1.0131, -1.0118]) -Total rewards: 4404 -Total Receives: 4402 -Stores 3617 1 -Total Receives: 4403 -Stores 3618 1 -Total Receives: 4404 -Stores 3619 1 -tensor([-5.1490, -5.0717, -5.1265, -5.1117, -5.1239, -5.1342]) -tensor([-1.0302, -1.0373, -1.0371, -1.0381]) -tensor([-0.7217, -0.7243]) -Total rewards: 4409 -Total Receives: 4405 -Stores 3620 1 -Total Receives: 4406 -Stores 3621 1 -Total Receives: 4407 -Stores 3622 1 -Total Receives: 4408 -Stores 3623 1 -Total Receives: 4409 -Stores 3624 1 -tensor([-0.5810, -0.5798, -0.5800, -0.5876]) -tensor([-0.4092, -0.4155]) -Total rewards: 4415 -Total Receives: 4410 -Stores 3625 1 -Total Receives: 4411 -Stores 3626 1 -Total Receives: 4412 -Stores 3627 1 -Total Receives: 4413 -Stores 3628 1 -Total Receives: 4414 -Stores 3629 1 -Total Receives: 4415 -Stores 3630 1 -tensor([-0.4556, -0.4570, -0.4623, -0.4570]) -tensor([-2.4474, -2.4491, -2.4799, -2.4919, -2.5105, -2.4807]) -tensor([-0.3418, -0.3463, -0.3411]) -tensor([-0.6021, -0.6031]) -tensor([-0.7156, -0.6950, -0.7188, -0.7153]) -tensor([-0.6559, -0.6353, -0.6559]) -Total rewards: 4421 -Total Receives: 4416 -Stores 3631 1 -Total Receives: 4417 -Stores 3632 1 -Total Receives: 4418 -Stores 3633 1 -Total Receives: 4419 -Stores 3634 1 -Total Receives: 4420 -Stores 3635 1 -Total Receives: 4421 -Stores 3636 1 -tensor([-3.6724, -3.6807, -3.6599, -3.6810]) -tensor([-1.5636, -1.5520, -1.5659]) -tensor([-0.4996, -0.5032, -0.5032]) -tensor([-0.6552, -0.6455, -0.6497, -0.6413, -0.6602, -0.6536, -0.6556]) -tensor([-0.6228, -0.6155, -0.6249, -0.6336]) -tensor([-5.2035, -5.1201, -5.1743, -5.2204, -5.1825, -5.1664]) -Total rewards: 4429 -Total Receives: 4422 -Stores 3637 1 -Total Receives: 4423 -Stores 3638 1 -Total Receives: 4424 -Stores 3639 1 -Total Receives: 4425 -Stores 3640 1 -Total Receives: 4426 -Stores 3641 1 -Total Receives: 4427 -Stores 3642 1 -Total Receives: 4428 -Stores 3643 1 -Total Receives: 4429 -Stores 3644 1 -tensor([-3.2655, -3.2465, -3.2747]) -tensor([-0.3771, -0.3799, -0.3828]) -tensor([-0.4380, -0.4247]) -Total rewards: 4431 -Total Receives: 4430 -Stores 3645 1 -Total Receives: 4431 -Stores 3646 1 -tensor([-2.2167, -2.2468, -2.2594, -2.2933, -2.2625, -2.2925]) -tensor([-0.4528, -0.4536]) -tensor([-3.5032, -3.5249]) -Total rewards: 4437 -Total Receives: 4432 -Stores 3647 1 -Total Receives: 4433 -Stores 3648 1 -Total Receives: 4434 -Stores 3649 1 -Total Receives: 4435 -Stores 3650 1 -Total Receives: 4436 -Stores 3651 1 -Total Receives: 4437 -Stores 3652 1 -tensor([-3.5212, -3.5030, -3.4901, -3.4848, -3.5012]) -tensor([-4.7185, -4.6884, -4.7313, -4.6887, -4.6772]) -tensor([-0.9132, -0.8971]) -tensor([-0.3185, -0.3135, -0.3186]) -tensor([-1.6996, -1.7321, -1.7340, -1.7154, -1.7320]) -tensor([-0.6992, -0.7090, -0.7090]) -tensor([-26.7648, -26.8046, -26.7659, -26.7564]) -Total rewards: 4440 -Total Receives: 4438 -Stores 3653 1 -Total Receives: 4439 -Stores 3654 1 -Total Receives: 4440 -Stores 3655 1 -tensor([-0.4091, -0.4090]) -tensor([-1.0183, -1.0180, -1.0277]) -tensor([-1.3411, -1.3599, -1.3584, -1.3545, -1.3480, -1.3608, -1.3598]) -tensor([-0.4952, -0.4887, -0.5002, -0.4977, -0.5002, -0.4990, -0.4997]) -tensor([-0.6030, -0.6034]) -Total rewards: 4444 -tensor([5.0949, 3.2343, 3.2343, 3.2343, 3.2343]) -Total rewards: 5514 -Total Receives: 5511 -Stores 4303 1 -Total Receives: 5512 -Stores 4304 1 -Total Receives: 5513 -Stores 4305 1 -Total Receives: 5514 -Stores 4306 1 -Total rewards: 5519 -Total Receives: 5515 -Stores 4307 1 -Total Receives: 5516 -Stores 4308 1 -Total Receives: 5517 -Stores 4309 1 -Total Receives: 5518 -Stores 4310 1 -Total Receives: 5519 -Stores 4311 1 -tensor([-1.4081, -2.8368, -3.3117, -3.2909, -2.8258, -3.2508, -3.6153]) -loss: 3.4701949971349677e-06, td_error: 29.49184226989746, entropy: 2.086686372756958 -Train step: 0.6268279552459717 Optimizer Step: 65 -tensor([6.2601, 6.5272, 5.0649]) -tensor([7.0254, 6.3244, 6.7715]) -tensor([4.5212, 3.4707]) -tensor([3.1462, 3.1174, 2.3492]) -tensor([1.6681, 1.7055, 1.0765, 2.2648, 1.9460]) -tensor([1.2171, 1.1680, 0.9541]) -Total rewards: 5525 -Total Receives: 5520 -Stores 4312 1 -Total Receives: 5521 -Stores 4313 1 -Total Receives: 5522 -Stores 4314 1 -Total Receives: 5523 -Stores 4315 1 -Total Receives: 5524 -Stores 4316 1 -Total Receives: 5525 -Stores 4317 1 -loss: 0.0004903732915408909, td_error: 4167.49169921875, entropy: 3.5704541206359863 -Train step: 0.6048369407653809 Optimizer Step: 66 -tensor([3.9274, 3.7082, 3.4147]) -tensor([3.4018, 3.2101, 2.9581]) -tensor([-7.9531, -5.3890, -6.8079, -7.3522, -7.3486, -7.2661, -8.2769, -8.2769]) -tensor([-0.2565, -0.4776]) -tensor([5.7992, 7.0063, 5.0079]) -tensor([1.1779, 1.2581, 0.9812]) -tensor([4.5948, 5.0707, 3.8367, 2.8003, 4.3701, 3.4246]) -tensor([2.4313, 2.8346]) -Total rewards: 5530 -Total Receives: 5526 -Stores 4318 1 -Total Receives: 5527 -Stores 4319 1 -Total Receives: 5528 -Stores 4320 1 -Total Receives: 5529 -Stores 4321 1 -Total Receives: 5530 -Stores 4322 1 -tensor([4.8745, 4.3248, 4.1740]) -tensor([7.9479, 7.9479, 7.9479, 7.9479]) -tensor([4.0385, 2.2965]) -loss: 2.2442047338699922e-05, td_error: 190.72625732421875, entropy: 2.9977595806121826 -Train step: 0.37033700942993164 Optimizer Step: 67 -tensor([6.7379, 6.3538]) -tensor([-0.3956, -0.3911, -0.8798, -0.2055]) -tensor([2.1982, 1.9927]) -tensor([2.0230, 2.3838, 2.3047]) -tensor([5.6500, 4.3604, 3.6525, 3.6525]) -tensor([4.6765, 3.4710]) -tensor([5.4155, 4.5523, 4.5523]) -tensor([9.0047, 8.0793, 6.0811, 8.7738, 7.6226]) -tensor([-7.5250, -6.4521, -6.9875, -7.0161, -8.3771, -6.8981, -8.2529, -8.4285, - -8.4285]) -tensor([0.3126, 0.2531, 0.3808, 0.4403, 0.3606, 0.3026]) -tensor([ 9.7043, 7.5528, 10.4063, 9.2567]) -tensor([6.0790, 5.7919]) -tensor([8.7513, 8.5500, 8.5500, 8.5500]) -loss: 1.4322110414505005, td_error: 53779.21875, entropy: 0.054827723652124405 -Train step: 1.043262004852295 Optimizer Step: 68 -tensor([6.2225, 4.4985, 4.4985]) -tensor([3.9381, 3.8454]) -Total rewards: 5537 -Total Receives: 5531 -Stores 4323 1 -Total Receives: 5532 -Stores 4324 1 -Total Receives: 5533 -Stores 4325 1 -Total Receives: 5534 -Stores 4326 1 -Total Receives: 5535 -Stores 4327 1 -Total Receives: 5536 -Stores 4328 1 -Total Receives: 5537 -Stores 4329 1 -tensor([170.9114, 170.4993, 169.9880, 169.7658]) -tensor([4.4507, 4.3570, 4.5695, 4.6643, 4.5343]) -Total rewards: 5539 -Total Receives: 5538 -Stores 4330 1 -Total Receives: 5539 -Stores 4331 1 -tensor([6.1863, 6.1059]) -tensor([4.5022, 4.4046, 4.6234, 4.5845]) -tensor([6.7697, 5.5303, 7.4269, 7.5535, 5.8774, 7.5415, 6.0883, 5.5082, 5.5082]) -tensor([7.2997, 7.2997]) -Total rewards: 5546 -Total Receives: 5540 -Stores 4332 1 -Total Receives: 5541 -Stores 4333 1 -Total Receives: 5542 -Stores 4334 1 -Total Receives: 5543 -Stores 4335 1 -Total Receives: 5544 -Stores 4336 1 -Total Receives: 5545 -Stores 4337 1 -Total Receives: 5546 -Stores 4338 1 -tensor([6.3789, 5.8800, 4.4206, 4.5043, 6.4709, 4.7917]) -tensor([6.9799, 7.1878]) -tensor([10.0112, 9.7507]) -tensor([0.8100, 1.2714]) -loss: 4.344761327956803e-05, td_error: 369.244384765625, entropy: 2.564953565597534 -Train step: 0.3296198844909668 Optimizer Step: 69 -tensor([6.4456, 5.8275, 4.2239, 4.3239, 4.6302]) -tensor([6.3010, 5.9945, 5.9945]) -tensor([5.0624, 3.7016, 4.0683, 3.3347]) -tensor([13.8136, 10.5258, 13.3404]) -tensor([7.5000, 9.6091]) -tensor([13.1130, 11.9609, 11.9609]) -tensor([2.6804, 1.7569]) -tensor([10.3152, 8.9086, 10.9753, 11.1000, 9.3603, 9.5318, 8.3860, 8.3860]) -loss: 1.598832368850708, td_error: 48207.95703125, entropy: 2.302581787109375 -Train step: 1.0366840362548828 Optimizer Step: 70 -tensor([5.3485, 5.2367, 5.4271, 4.0095]) -tensor([1.9989, 1.9256]) -tensor([5.5264, 7.4642]) -tensor([3.4032, 3.0390, 2.6715]) -tensor([3.7826, 3.3994, 3.4440]) -tensor([5.6072, 6.5387]) -tensor([10.8635, 10.5410, 10.5410]) -tensor([6.1244, 3.7856, 3.8979, 3.5335, 3.3134, 4.2257]) -tensor([7.8792, 7.2882, 7.2882]) -tensor([2.2844, 2.7804, 2.0787]) -tensor([2.2960, 2.1560]) -tensor([3.2272, 2.1913]) -loss: 0.39386776089668274, td_error: 9377.9345703125, entropy: 3.365849806868937e-06 -Train step: 0.979377031326294 Optimizer Step: 71 -tensor([4.3891, 4.5188, 4.0899, 3.8421, 4.8813]) -Total rewards: 5550 -Total Receives: 5547 -Stores 4339 1 -Total Receives: 5548 -Stores 4340 1 -Total Receives: 5549 -Stores 4341 1 -Total Receives: 5550 -Stores 4342 1 -tensor([6.9264, 7.2426, 5.6361]) -tensor([12.9012, 12.7158, 12.7158]) -tensor([2.8206, 3.5543, 2.3505]) -tensor([202.7078, 202.5993, 200.9558, 201.3043]) -tensor([4.2948, 5.7168, 4.2428]) -tensor([12.7290, 11.0288, 13.5161, 11.6138, 11.7757, 9.7899, 9.7899]) -tensor([7.4422, 7.7103]) -tensor([15.1663, 10.6136, 10.6136]) -tensor([5.8610, 4.3347]) -Total rewards: 5557 -Total Receives: 5551 -Stores 4343 1 -Total Receives: 5552 -Stores 4344 1 -Total Receives: 5553 -Stores 4345 1 -Total Receives: 5554 -Stores 4346 1 -Total Receives: 5555 -Stores 4347 1 -Total Receives: 5556 -Stores 4348 1 -Total Receives: 5557 -Stores 4349 1 -tensor([2.4546, 2.4066, 2.0027]) -tensor([3.3073, 3.8501]) -tensor([4.6030, 4.0311]) -tensor([2.9930, 2.0047]) -tensor([7.4504, 7.4504]) -Total rewards: 5559 -Total Receives: 5558 -Stores 4350 1 -Total Receives: 5559 -Stores 4351 1 -tensor([4.1740, 4.0930]) -tensor([5.3415, 5.4663, 5.0614, 4.7812]) -tensor([14.9821, 13.1810, 13.7927, 13.9625, 11.5192, 11.5192]) -tensor([5.6967, 5.3109, 4.3556]) -loss: 1.2743062143272255e-05, td_error: 108.29832458496094, entropy: 2.578538417816162 -Train step: 0.3488149642944336 Optimizer Step: 72 -tensor([5.9465, 5.5955, 5.3650]) -tensor([9.2943, 9.2943]) -tensor([0.8359, 0.5043]) -Total rewards: 5566 -Total Receives: 5560 -Stores 4352 1 -Total Receives: 5561 -Stores 4353 1 -Total Receives: 5562 -Stores 4354 1 -Total Receives: 5563 -Stores 4355 1 -Total Receives: 5564 -Stores 4356 1 -Total Receives: 5565 -Stores 4357 1 -Total Receives: 5566 -Stores 4358 1 -tensor([11.7479, 11.7479]) -tensor([3.0447, 3.6630, 2.8167]) -Total rewards: 5573 -Total Receives: 5567 -Stores 4359 1 -Total Receives: 5568 -Stores 4360 1 -Total Receives: 5569 -Stores 4361 1 -Total Receives: 5570 -Stores 4362 1 -Total Receives: 5571 -Stores 4363 1 -Total Receives: 5572 -Stores 4364 1 -Total Receives: 5573 -Stores 4365 1 -tensor([7.2597, 4.8140, 5.5644, 5.9234]) -tensor([3.8915, 3.8915]) -tensor([3.7924, 3.7429, 3.9205]) -tensor([3.7077, 3.7870]) -tensor([1.9893, 1.9594]) -tensor([4.0698, 2.8926, 2.9769]) -tensor([6.0811, 5.6406, 5.0720]) -tensor([10.2619, 9.1349, 9.6376]) -tensor([2.7674, 2.4783, 2.3448]) -tensor([1.1816, 1.1816]) -tensor([6.7092, 6.2281]) -tensor([2.5544, 1.8246, 2.4437]) -tensor([9.0836, 9.6053]) -tensor([3.0714, 2.7493]) -loss: 2.554394450271502e-05, td_error: 217.08807373046875, entropy: 2.308717966079712 -Train step: 0.42723798751831055 Optimizer Step: 73 -tensor([8.8202, 9.2684, 9.4068, 6.8578, 6.8578, 6.8578]) -tensor([6.8151, 6.3785, 4.8659, 4.4338]) -Total rewards: 5580 -Total Receives: 5574 -Stores 4366 1 -Total Receives: 5575 -Stores 4367 1 -Total Receives: 5576 -Stores 4368 1 -Total Receives: 5577 -Stores 4369 1 -Total Receives: 5578 -Stores 4370 1 -Total Receives: 5579 -Stores 4371 1 -Total Receives: 5580 -Stores 4372 1 -tensor([2.5483, 1.9015, 2.1229, 1.9015]) -tensor([5.4075, 5.6477]) -tensor([3.2744, 3.4309]) -tensor([2.0771, 2.0046]) -Total rewards: 5588 -Total Receives: 5581 -Stores 4373 1 -Total Receives: 5582 -Stores 4374 1 -Total Receives: 5583 -Stores 4375 1 -Total Receives: 5584 -Stores 4376 1 -Total Receives: 5585 -Stores 4377 1 -Total Receives: 5586 -Stores 4378 1 -Total Receives: 5587 -Stores 4379 1 -Total Receives: 5588 -Stores 4380 1 -tensor([4.5184, 3.8181]) -Total Receives: 4441 -Stores 3656 1 -Total Receives: 4442 -Stores 3657 1 -Total Receives: 4443 -Stores 3658 1 -Total Receives: 4444 -Stores 3659 1 -tensor([-0.1695, -0.1615]) -tensor([-4.2329, -4.2210]) -tensor([-0.5603, -0.5634, -0.5631]) -Total rewards: 4450 -Total Receives: 4445 -Stores 3660 1 -Total Receives: 4446 -Stores 3661 1 -Total Receives: 4447 -Stores 3662 1 -Total Receives: 4448 -Stores 3663 1 -Total Receives: 4449 -Stores 3664 1 -Total Receives: 4450 -Stores 3665 1 -tensor([-0.6968, -0.7024, -0.7048]) -tensor([-2.8988, -2.9023, -2.8774, -2.8669, -2.8723]) -tensor([-0.6577, -0.6577]) -tensor([-1.3875, -1.3937, -1.3793, -1.3864, -1.4022]) -tensor([-3.0160, -3.0401, -3.0435]) -tensor([-2.4475, -2.4508]) -tensor([-0.7072, -0.7093]) -tensor([-3.7156, -3.7318]) -tensor([-1.7201, -1.7402]) -tensor([-0.6028, -0.5987, -0.6075, -0.6019, -0.6052, -0.6012, -0.6024]) -Total rewards: 4456 -Total Receives: 4451 -Stores 3666 1 -Total Receives: 4452 -Stores 3667 1 -Total Receives: 4453 -Stores 3668 1 -Total Receives: 4454 -Stores 3669 1 -Total Receives: 4455 -Stores 3670 1 -Total Receives: 4456 -Stores 3671 1 -tensor([-0.3253, -0.3272, -0.3314]) -tensor([-0.3490, -0.3513, -0.3520]) -tensor([-1.2941, -1.3016, -1.3184]) -tensor([-1.9092, -1.9111]) -tensor([-2.7850, -2.7287, -2.7849, -2.7658, -2.7358, -2.7852, -2.7869]) -tensor([-0.5557, -0.5653]) -tensor([-0.8181, -0.8194, -0.8383, -0.8351, -0.8340]) -tensor([-0.4469, -0.4484]) -tensor([-0.5757, -0.5758, -0.5872, -0.5864]) -tensor([-1.8047, -1.8054, -1.8177, -1.8003, -1.8077, -1.8310, -1.8177]) -Total rewards: 4463 -Total Receives: 4457 -Stores 3672 1 -Total Receives: 4458 -Stores 3673 1 -Total Receives: 4459 -Stores 3674 1 -Total Receives: 4460 -Stores 3675 1 -Total Receives: 4461 -Stores 3676 1 -Total Receives: 4462 -Stores 3677 1 -Total Receives: 4463 -Stores 3678 1 -tensor([-2.2452, -2.2330, -2.2612, -2.2199, -2.2199]) -tensor([-1.2988, -1.3063]) -tensor([-0.5111, -0.5125]) -Total rewards: 4469 -Total Receives: 4464 -Stores 3679 1 -Total Receives: 4465 -Stores 3680 1 -Total Receives: 4466 -Stores 3681 1 -Total Receives: 4467 -Stores 3682 1 -Total Receives: 4468 -Stores 3683 1 -Total Receives: 4469 -Stores 3684 1 -tensor([-1.7997, -1.7857, -1.8168, -1.7727]) -tensor([-0.9477, -0.9450, -0.9583]) -tensor([-1.3378, -1.3246, -1.3519]) -tensor([-0.2421, -0.2447]) -tensor([-1.1071, -1.1319, -1.1075]) -tensor([-2.0272, -2.0410, -2.0329]) -tensor([-0.3415, -0.3383, -0.3418, -0.3726, -0.3455, -0.3424, -0.3500, -0.3561]) -tensor([-1.7156, -1.7290]) -tensor([-0.2771, -0.2773]) -tensor([-2.9287, -2.9316, -2.9115, -2.8844, -2.9314, -2.9466, -2.9385, -2.9586]) -tensor([-2.4181, -2.4211, -2.4069, -2.4138, -2.4455, -2.4283, -2.4455]) -tensor([-1.3688, -1.3947, -1.3808]) -tensor([-0.6879, -0.6749, -0.6858, -0.6794]) -Total rewards: 4474 -Total Receives: 4470 -Stores 3685 1 -Total Receives: 4471 -Stores 3686 1 -Total Receives: 4472 -Stores 3687 1 -Total Receives: 4473 -Stores 3688 1 -Total Receives: 4474 -Stores 3689 1 -tensor([-1.4274, -1.4271]) -tensor([-1.2538, -1.2278]) -Total rewards: 4478 -Total Receives: 4475 -Stores 3690 1 -Total Receives: 4476 -Stores 3691 1 -Total Receives: 4477 -Stores 3692 1 -Total Receives: 4478 -Stores 3693 1 -tensor([-1.1913, -1.1867, -1.1913]) -tensor([-0.1980, -0.1904, -0.1944, -0.2695, -0.2130, -0.1861, -0.2342, -0.2348]) -tensor([-1.6779, -1.6750]) -tensor([-0.2144, -0.2100]) -tensor([-2.2861, -2.2871, -2.2774, -2.3299, -2.2978, -2.3299]) -tensor([-1.8074, -1.8255]) -Total rewards: 4481 -Total Receives: 4479 -Stores 3694 1 -Total Receives: 4480 -Stores 3695 1 -Total Receives: 4481 -Stores 3696 1 -tensor([-0.7586, -0.7553, -0.7467]) -tensor([-1.1090, -1.1192, -1.1192]) -tensor([-0.1469, -0.1340, -0.2020, -0.1544, -0.1266, -0.1720, -0.1731]) -tensor([-27.1029, -27.1069, -27.0965]) -tensor([-0.1073, -0.0812, -0.1743, -0.1212, -0.1442, -0.1413]) -tensor([-1.9064, -1.9240, -1.9438]) -Total rewards: 4486 -Total Receives: 4482 -Stores 3697 1 -Total Receives: 4483 -Stores 3698 1 -Total Receives: 4484 -Stores 3699 1 -Total Receives: 4485 -Stores 3700 1 -Total Receives: 4486 -Stores 3701 1 -tensor([-4.1741, -4.1658, -4.1581]) -tensor([-3.4311, -3.4334, -3.4225, -3.4485, -3.4504, -3.4411, -3.4726]) -tensor([-0.9999, -1.0161]) -tensor([-1.1625, -1.1765, -1.1737, -1.1765]) -Total rewards: 4490 -Total Receives: 4487 -Stores 3702 1 -Total Receives: 4488 -Stores 3703 1 -Total Receives: 4489 -Stores 3704 1 -Total Receives: 4490 -Stores 3705 1 -Total rewards: 4494 -Total Receives: 4491 -Stores 3706 1 -Total Receives: 4492 -Stores 3707 1 -Total Receives: 4493 -Stores 3708 1 -Total Receives: 4494 -Stores 3709 1 -tensor([-2.4979, -2.4843, -2.5303, -2.5416, -2.4972, -2.5416]) -tensor([-3.3442, -4.5102, -4.3589]) -tensor([-0.6465, -0.6281, -0.6412, -0.6522, -0.6403]) -tensor([-1.4004, -1.3891, -1.3943, -1.3807, -1.3971, -1.4024]) -tensor([-0.2994, -0.3057]) -tensor([-0.4802, -0.4812]) -tensor([-10.3501, -10.3563]) -tensor([-1.0347, -1.0251, -1.0294, -1.0324, -1.0386]) -tensor([-3.2643, -3.2569]) -tensor([-3.7108, -3.7088, -3.6943, -3.7186, -3.7541, -3.7215, -3.7530]) -tensor([-2.4029, -2.4029]) -tensor([-1.1761, -1.1864, -1.1991]) -Total rewards: 4497 -Total Receives: 4495 -Stores 3710 1 -Total Receives: 4496 -Stores 3711 1 -Total Receives: 4497 -Stores 3712 1 -tensor([-2.7147, -2.7337]) -tensor([-2.4733, -2.5030, -2.5178, -2.4833, -2.5177, -2.5177]) -tensor([-0.1180, -0.1762, -0.1305, -0.1518, -0.1527]) -tensor([-0.9937, -1.0036, -0.9846, -1.0008, -1.0036]) -tensor([-8.0160, -7.9990, -7.9932, -8.0305]) -tensor([-0.5234, -0.5163, -0.5282, -0.5326]) -tensor([-0.7864, -0.7860, -0.7722, -0.7814]) -tensor([-0.6901, -0.6797, -0.7008]) -tensor([-1.4304, -1.4459]) -Total rewards: 4502 -Total Receives: 4498 -Stores 3713 1 -Total Receives: 4499 -Stores 3714 1 -Total Receives: 4500 -Stores 3715 1 -Total Receives: 4501 -Stores 3716 1 -Total Receives: 4502 -Stores 3717 1 -tensor([-0.2193, -0.2203, -0.2224]) -tensor([-0.3181, -0.3194]) -tensor([-0.8977, -0.8935, -0.8926]) -tensor([-1.0851, -1.0907]) -tensor([-1.8981, -1.9203, -1.8838, -1.9203, -1.9203]) -tensor([-0.2653, -0.2733, -0.2645]) -tensor([-4.1522, -4.2015, -4.1519, -4.2069, -4.1647, -4.2012, -4.1619, -4.2209]) -tensor([-0.3731, -0.3732, -0.3805]) -Total rewards: 4506 -Total Receives: 4503 -Stores 3718 1 -Total Receives: 4504 -Stores 3719 1 -Total Receives: 4505 -Stores 3720 1 -Total Receives: 4506 -Stores 3721 1 -tensor([-0.2216, -0.2207, -0.2204, -0.2209]) -tensor([-3.7098, -3.7209, -3.7234, -3.7173, -3.7194]) -Total rewards: 4510 -Total Receives: 4507 -Stores 3722 1 -Total Receives: 4508 -Stores 3723 1 -Total Receives: 4509 -Stores 3724 1 -Total Receives: 4510 -Stores 3725 1 -tensor([-0.8499, -0.8611]) -tensor([-10.2885, -10.2936]) -tensor([-0.3867, -0.3964]) -tensor([-1.5632, -1.5389, -1.5631, -1.5631, -1.5631]) -tensor([-1.4470, -1.4470]) -tensor([-0.1595, -0.1573, -0.1560]) -tensor([-3.3015, -3.3044, -3.2966, -3.2937, -3.3010, -3.3084]) -tensor([-4.6176, -4.6070, -4.6298]) -tensor([-1.2302, -1.2352, -1.2287]) -tensor([-9.8794, -9.8795]) -tensor([-0.1569, -0.2268, -0.1716, -0.1958]) -tensor([-0.6420, -0.6546]) -Total rewards: 4517 -Total Receives: 4511 -Stores 3726 1 -Total Receives: 4512 -Stores 3727 1 -Total Receives: 4513 -Stores 3728 1 -Total Receives: 4514 -Stores 3729 1 -Total Receives: 4515 -Stores 3730 1 -Total Receives: 4516 -Stores 3731 1 -Total Receives: 4517 -Stores 3732 1 -tensor([-0.5551, -0.5625]) -tensor([-1.3496, -1.3537, -1.3537, -1.3537]) -tensor([-10.1461, -10.1509, -10.1565, -10.1545, -10.1573]) -tensor([-6.6623, -6.6585, -6.6528, -6.6593, -6.6758]) -Total rewards: 4523 -Total Receives: 4518 -Stores 3733 1 -Total Receives: 4519 -Stores 3734 1 -Total Receives: 4520 -Stores 3735 1 -Total Receives: 4521 -Stores 3736 1 -Total Receives: 4522 -Stores 3737 1 -Total Receives: 4523 -Stores 3738 1 -Total rewards: 4526 -Total Receives: 4524 -Stores 3739 1 -Total Receives: 4525 -Stores 3740 1 -Total Receives: 4526 -Stores 3741 1 -tensor([-0.2869, -0.2284, -0.2460]) -Total rewards: 4530 -Total Receives: 4527 -Stores 3742 1 -Total Receives: 4528 -Stores 3743 1 -Total Receives: 4529 -Stores 3744 1 -Total Receives: 4530 -Stores 3745 1 -tensor([-0.2446, -0.2461, -0.2471]) -tensor([-6.0816, -6.0755, -6.0709, -6.0730, -6.0629, -6.0900]) -tensor([-0.4800, -0.4570, -0.4853]) -Total rewards: 4534 -Total Receives: 4531 -Stores 3746 1 -Total Receives: 4532 -Stores 3747 1 -Total Receives: 4533 -Stores 3748 1 -Total Receives: 4534 -Stores 3749 1 -tensor([7.5827, 7.9418]) -tensor([11.0156, 11.7338, 8.5477, 8.5477, 8.5477]) -Total rewards: 5594 -Total Receives: 5589 -Stores 4381 1 -Total Receives: 5590 -Stores 4382 1 -Total Receives: 5591 -Stores 4383 1 -Total Receives: 5592 -Stores 4384 1 -Total Receives: 5593 -Stores 4385 1 -Total Receives: 5594 -Stores 4386 1 -tensor([5.4931, 6.2322, 6.5923, 4.9883, 4.3652]) -tensor([4.0528, 3.3702]) -tensor([4.4448, 5.2810]) -Total rewards: 5599 -Total Receives: 5595 -Stores 4387 1 -Total Receives: 5596 -Stores 4388 1 -Total Receives: 5597 -Stores 4389 1 -Total Receives: 5598 -Stores 4390 1 -Total Receives: 5599 -Stores 4391 1 -tensor([6.3081, 4.6687, 4.1942]) -tensor([4.1822, 5.0302]) -tensor([2.8770, 3.2691]) -tensor([13.2629, 9.7829, 9.7829, 9.7829]) -tensor([2.5713, 2.1712]) -tensor([3.5699, 3.5962, 2.9064]) -tensor([6.1664, 4.5242]) -tensor([6.4825, 8.5557, 7.9329]) -tensor([4.1837, 3.4601]) -Total rewards: 5604 -Total Receives: 5600 -Stores 4392 1 -Total Receives: 5601 -Stores 4393 1 -Total Receives: 5602 -Stores 4394 1 -Total Receives: 5603 -Stores 4395 1 -Total Receives: 5604 -Stores 4396 1 -tensor([6.9006, 5.4035, 6.6613, 4.7537]) -loss: 2.6472442186786793e-05, td_error: 224.97900390625, entropy: 0.6931629180908203 -Train step: 0.4075791835784912 Optimizer Step: 74 -Total rewards: 5611 -Total Receives: 5605 -Stores 4397 1 -Total Receives: 5606 -Stores 4398 1 -Total Receives: 5607 -Stores 4399 1 -Total Receives: 5608 -Stores 4400 1 -Total Receives: 5609 -Stores 4401 1 -Total Receives: 5610 -Stores 4402 1 -Total Receives: 5611 -Stores 4403 1 -tensor([5.9805, 6.3607]) -Total rewards: 5614 -Total Receives: 5612 -Stores 4404 1 -Total Receives: 5613 -Stores 4405 1 -Total Receives: 5614 -Stores 4406 1 -tensor([2.1259, 1.9760, 1.9328]) -tensor([1.1970, 1.3435, 1.0256]) -tensor([3.0872, 2.3935]) -tensor([6.1415, 7.4309, 5.3540]) -Total rewards: 5620 -Total Receives: 5615 -Stores 4407 1 -Total Receives: 5616 -Stores 4408 1 -Total Receives: 5617 -Stores 4409 1 -Total Receives: 5618 -Stores 4410 1 -Total Receives: 5619 -Stores 4411 1 -Total Receives: 5620 -Stores 4412 1 -tensor([6.4231, 7.9458]) -tensor([1.4865, 1.5607]) -tensor([4.2366, 3.3585, 3.1754]) -tensor([3.3875, 3.1990]) -tensor([9.9389, 9.4324]) -tensor([1.6895, 0.9621]) -tensor([4.6498, 3.9076, 3.5066]) -tensor([1.0201, 1.1270, 0.7578, 1.8534, 0.7263, 0.7787, 1.4419]) -tensor([1.8742, 2.4167]) -loss: 1.1231261851207819e-05, td_error: 95.45012664794922, entropy: 2.3978943824768066 -Train step: 0.32637810707092285 Optimizer Step: 75 -tensor([8.0912, 7.7481, 7.7481, 7.7481]) -tensor([2.5712, 2.7144, 2.1703, 2.0447, 2.2148, 3.1027]) -tensor([3.1822, 2.3595]) -tensor([9.9510, 9.9510, 9.9510]) -tensor([7.9613, 8.7103, 7.4510, 5.8378]) -tensor([2.8530, 2.6259, 3.1377, 2.5393]) -tensor([3.8083, 2.9665, 3.1546, 3.3563]) -tensor([8.6581, 8.5120, 8.3507, 8.5808]) -Total rewards: 5626 -Total Receives: 5621 -Stores 4413 1 -Total Receives: 5622 -Stores 4414 1 -Total Receives: 5623 -Stores 4415 1 -Total Receives: 5624 -Stores 4416 1 -Total Receives: 5625 -Stores 4417 1 -Total Receives: 5626 -Stores 4418 1 -tensor([3.6440, 3.5846]) -tensor([3.1510, 3.3509, 3.5423]) -tensor([2.1946, 3.1614]) -tensor([0.8741, 0.9811]) -Total rewards: 5632 -Total Receives: 5627 -Stores 4419 1 -Total Receives: 5628 -Stores 4420 1 -Total Receives: 5629 -Stores 4421 1 -Total Receives: 5630 -Stores 4422 1 -Total Receives: 5631 -Stores 4423 1 -Total Receives: 5632 -Stores 4424 1 -tensor([5.6018, 6.0154, 5.9660, 4.7877]) -tensor([5.2899, 5.4346, 4.8509, 4.3461, 4.9037]) -loss: 2.738122748269234e-05, td_error: 232.70242309570312, entropy: 2.9739837646484375 -Train step: 0.4324767589569092 Optimizer Step: 76 -tensor([3.8422, 3.4383]) -tensor([3.2015, 2.7362]) -Total rewards: 5637 -Total Receives: 5633 -Stores 4425 1 -Total Receives: 5634 -Stores 4426 1 -Total Receives: 5635 -Stores 4427 1 -Total Receives: 5636 -Stores 4428 1 -Total Receives: 5637 -Stores 4429 1 -tensor([3.8528, 3.9938]) -Total rewards: 5641 -Total Receives: 5638 -Stores 4430 1 -Total Receives: 5639 -Stores 4431 1 -Total Receives: 5640 -Stores 4432 1 -Total Receives: 5641 -Stores 4433 1 -tensor([201.4447, 201.6365]) -tensor([8.1565, 6.1157, 6.8181]) -tensor([2.0886, 1.4920]) -tensor([3.2888, 1.7808]) -tensor([1.7641, 1.8375]) -Total rewards: 5645 -Total Receives: 5642 -Stores 4434 1 -Total Receives: 5643 -Stores 4435 1 -Total Receives: 5644 -Stores 4436 1 -Total Receives: 5645 -Stores 4437 1 -tensor([2.5513, 2.2210, 2.1778, 2.0812]) -tensor([8.6326, 8.1669, 5.7904]) -tensor([9.5995, 8.9233, 8.9233]) -loss: 7.46221473946207e-07, td_error: 6.341846466064453, entropy: 2.1972618103027344 -Train step: 0.2995297908782959 Optimizer Step: 77 -Total rewards: 5648 -Total Receives: 5646 -Stores 4438 1 -Total Receives: 5647 -Stores 4439 1 -Total Receives: 5648 -Stores 4440 1 -tensor([5.7487, 5.8746, 5.3628, 5.3664]) -tensor([4.7691, 5.8457, 6.1830, 4.9227, 4.2756]) -Total rewards: 5656 -Total Receives: 5649 -Stores 4441 1 -Total Receives: 5650 -Stores 4442 1 -Total Receives: 5651 -Stores 4443 1 -Total Receives: 5652 -Stores 4444 1 -Total Receives: 5653 -Stores 4445 1 -Total Receives: 5654 -Stores 4446 1 -Total Receives: 5655 -Stores 4447 1 -Total Receives: 5656 -Stores 4448 1 -tensor([2.9381, 3.5549, 3.6133, 3.1106, 4.1112, 2.7569, 2.7904, 2.7569]) -tensor([8.4579, 8.2936, 7.3801, 8.0902, 7.1226]) -Total rewards: 5662 -Total Receives: 5657 -Stores 4449 1 -Total Receives: 5658 -Stores 4450 1 -Total Receives: 5659 -Stores 4451 1 -Total Receives: 5660 -Stores 4452 1 -Total Receives: 5661 -Stores 4453 1 -Total Receives: 5662 -Stores 4454 1 -tensor([7.2201, 7.3704, 6.7706]) -tensor([5.2623, 5.7746, 6.5677]) -tensor([197.8939, 197.1126]) -tensor([4.1252, 4.8482, 4.9160, 4.3222, 3.7085, 3.9272, 3.7085]) -Total rewards: 5668 -Total Receives: 5663 -Stores 4455 1 -Total Receives: 5664 -Stores 4456 1 -Total Receives: 5665 -Stores 4457 1 -Total Receives: 5666 -Stores 4458 1 -Total Receives: 5667 -Stores 4459 1 -Total Receives: 5668 -Stores 4460 1 -tensor([10.0673, 8.8755, 9.6765, 8.5930]) -Total rewards: 5673 -Total Receives: 5669 -Stores 4461 1 -Total Receives: 5670 -Stores 4462 1 -Total Receives: 5671 -Stores 4463 1 -Total Receives: 5672 -Stores 4464 1 -Total Receives: 5673 -Stores 4465 1 -Total rewards: 5679 -Total Receives: 5674 -Stores 4466 1 -Total Receives: 5675 -Stores 4467 1 -Total Receives: 5676 -Stores 4468 1 -Total Receives: 5677 -Stores 4469 1 -Total Receives: 5678 -Stores 4470 1 -Total Receives: 5679 -Stores 4471 1 -tensor([3.0360, 2.9943, 2.2941, 2.8976]) -tensor([3.2895, 4.5818, 2.7862]) -tensor([7.2334, 8.0856]) -tensor([2.2374, 1.6753]) -Total rewards: 5683 -Total Receives: 5680 -Stores 4472 1 -Total Receives: 5681 -Stores 4473 1 -Total Receives: 5682 -Stores 4474 1 -Total Receives: 5683 -Stores 4475 1 -Total rewards: 5690 -Total Receives: 5684 -Stores 4476 1 -Total Receives: 5685 -Stores 4477 1 -Total Receives: 5686 -Stores 4478 1 -Total Receives: 5687 -Stores 4479 1 -Total Receives: 5688 -Stores 4480 1 -Total Receives: 5689 -Stores 4481 1 -Total Receives: 5690 -Stores 4482 1 -tensor([4.1302, 3.5902]) -tensor([4.2213, 4.2616, 3.3704]) -tensor([1.6247, 1.7420]) -loss: 4.8065710871014744e-05, td_error: 408.4918212890625, entropy: 0.0 -Train step: 0.41648006439208984 Optimizer Step: 78 -tensor([5.5353, 6.2888, 5.6893, 4.8931, 5.3230, 4.8931]) -Total rewards: 5694 -Total Receives: 5691 -Stores 4483 1 -Total Receives: 5692 -Stores 4484 1 -Total Receives: 5693 -Stores 4485 1 -Total Receives: 5694 -Stores 4486 1 -tensor([6.4493, 7.5867, 6.6094, 5.3444]) -tensor([4.9041, 5.0004, 4.7674, 4.7674, 4.7674]) -tensor([1.4666, 0.8600]) -tensor([5.0209, 4.8592]) -tensor([5.0092, 5.0565]) -tensor([4.3222, 6.6881, 4.4130, 3.9600]) -tensor([6.4607, 4.5746, 4.0309, 4.3669, 4.4389, 6.0067]) -tensor([11.7255, 10.5461, 10.2459]) -tensor([0.9638, 1.3847]) -loss: 2.5715737592690857e-06, td_error: 21.854806900024414, entropy: 3.0921239852905273 -Train step: 0.2661151885986328 Optimizer Step: 79 -tensor([3.4468, 4.9949, 4.3304, 4.9085]) -tensor([9.9018, 9.3036, 9.3036, 9.3036]) -tensor([11.5935, 11.2691]) -tensor([6.5331, 5.0581, 6.6780, 4.7941]) -tensor([9.0928, 7.0623, 6.4554, 6.8365, 8.6241]) -tensor([3.5192, 4.0645, 3.4000]) -tensor([10.1475, 10.9537, 8.5609, 9.9037, 8.5609]) -tensor([3.9334, 3.7398]) -Total rewards: 5700 -Total Receives: 5695 -Stores 4487 1 -Total Receives: 5696 -Stores 4488 1 -Total Receives: 5697 -Stores 4489 1 -Total Receives: 5698 -Stores 4490 1 -Total Receives: 5699 -Stores 4491 1 -tensor([-6.1245, -6.1172, -6.1255, -6.1417]) -tensor([-0.9182, -0.9258, -0.9212, -0.9238]) -tensor([-8.0595, -8.0578]) -tensor([-1.1056, -1.1081]) -tensor([-7.7991, -7.7994, -7.7205, -7.7928, -7.8062, -7.7376, -7.7903, -7.7433, - -7.8238, -7.8186]) -tensor([-0.2842, -0.2782]) -Total rewards: 4540 -Total Receives: 4535 -Stores 3750 1 -Total Receives: 4536 -Stores 3751 1 -Total Receives: 4537 -Stores 3752 1 -Total Receives: 4538 -Stores 3753 1 -Total Receives: 4539 -Stores 3754 1 -Total Receives: 4540 -Stores 3755 1 -tensor([-3.1611, -3.1730]) -Total rewards: 4548 -Total Receives: 4541 -Stores 3756 1 -Total Receives: 4542 -Stores 3757 1 -Total Receives: 4543 -Stores 3758 1 -Total Receives: 4544 -Stores 3759 1 -Total Receives: 4545 -Stores 3760 1 -Total Receives: 4546 -Stores 3761 1 -Total Receives: 4547 -Stores 3762 1 -Total Receives: 4548 -Stores 3763 1 -tensor([-0.7779, -0.7866, -0.7865]) -tensor([-6.1028, -6.0938, -6.0878, -6.0913, -6.1193]) -Total rewards: 4552 -Total Receives: 4549 -Stores 3764 1 -Total Receives: 4550 -Stores 3765 1 -Total Receives: 4551 -Stores 3766 1 -Total Receives: 4552 -Stores 3767 1 -tensor([-1.3683, -1.3785]) -tensor([-0.5954, -0.5948]) -tensor([-0.1250, -0.1078, -0.1005]) -tensor([-1.4501, -1.4661]) -Total rewards: 4559 -Total Receives: 4553 -Stores 3768 1 -Total Receives: 4554 -Stores 3769 1 -Total Receives: 4555 -Stores 3770 1 -Total Receives: 4556 -Stores 3771 1 -Total Receives: 4557 -Stores 3772 1 -Total Receives: 4558 -Stores 3773 1 -Total Receives: 4559 -Stores 3774 1 -tensor([-1.0077, -0.9975, -1.0077, -1.0077]) -tensor([-84.6222, -84.6645]) -tensor([-0.6016, -0.6075, -0.6076, -0.6146]) -tensor([-5.9022, -5.8823, -5.8864, -5.9084, -5.9146]) -tensor([-0.1665, -0.1687]) -tensor([-3.0096, -3.0055]) -Total rewards: 4562 -Total Receives: 4560 -Stores 3775 1 -Total Receives: 4561 -Stores 3776 1 -Total Receives: 4562 -Stores 3777 1 -tensor([-0.2040, -0.0557, -0.1798, -0.0516, -0.0673]) -tensor([-0.6009, -0.6009, -0.5986, -0.6009]) -tensor([-8.2107, -8.2079, -8.2072, -8.2143, -8.1616, -8.2071, -8.1697, -8.2435, - -8.2289]) -tensor([-0.3354, -0.3387, -0.3378]) -tensor([-2.0724, -2.0629]) -tensor([-0.2217, -0.1932, -0.0758, -0.0896]) -tensor([-6.4379, -6.4319, -6.4231, -6.4261]) -tensor([-0.2252, -0.2043, -0.0881]) -tensor([-0.4029, -0.4022]) -tensor([-0.1059, -0.1151, -0.1058]) -Total rewards: 4566 -Total Receives: 4563 -Stores 3778 1 -Total Receives: 4564 -Stores 3779 1 -Total Receives: 4565 -Stores 3780 1 -Total Receives: 4566 -Stores 3781 1 -tensor([-0.3611, -0.3561]) -Total rewards: 4574 -Total Receives: 4567 -Stores 3782 1 -Total Receives: 4568 -Stores 3783 1 -Total Receives: 4569 -Stores 3784 1 -Total Receives: 4570 -Stores 3785 1 -Total Receives: 4571 -Stores 3786 1 -Total Receives: 4572 -Stores 3787 1 -Total Receives: 4573 -Stores 3788 1 -Total Receives: 4574 -Stores 3789 1 -tensor([-5.5542, -5.5314, -5.5405, -5.5643]) -tensor([-7.6677, -7.6792]) -tensor([-0.4851, -0.4851, -0.4851]) -tensor([-0.2283, -0.2191]) -tensor([-0.2730, -0.2612]) -tensor([-8.1328, -8.1314, -8.1294, -8.1475, -8.1237, -8.0884, -8.1729, -8.1475, - -8.1729]) -tensor([-0.2498, -0.2498]) -tensor([-0.0513, -0.0298, -0.0342, -0.0288, -0.0356]) -tensor([-0.9291, -0.9329]) -tensor([-1.8181, -1.8084, -1.8201]) -tensor([-0.0386, -0.0434, -0.0380, -0.0446]) -tensor([-0.2443, -0.2507]) -tensor([-0.1310, -0.1310]) -tensor([-5.3681, -5.3489, -5.3485, -5.4115]) -Total rewards: 4577 -Total Receives: 4575 -Stores 3790 1 -Total Receives: 4576 -Stores 3791 1 -Total Receives: 4577 -Stores 3792 1 -tensor([-0.3720, -0.5156, -0.3892]) -tensor([-4.6267, -4.6119, -4.6115]) -tensor([-0.3524, -0.2586]) -tensor([-0.2492, -0.2476]) -tensor([-0.2482, -0.2243]) -Total rewards: 4583 -Total Receives: 4578 -Stores 3793 1 -Total Receives: 4579 -Stores 3794 1 -Total Receives: 4580 -Stores 3795 1 -Total Receives: 4581 -Stores 3796 1 -Total Receives: 4582 -Stores 3797 1 -Total Receives: 4583 -Stores 3798 1 -tensor([-0.1312, -0.1331]) -Total rewards: 4592 -Total Receives: 4584 -Stores 3799 1 -Total Receives: 4585 -Stores 3800 1 -Total Receives: 4586 -Stores 3801 1 -Total Receives: 4587 -Stores 3802 1 -Total Receives: 4588 -Stores 3803 1 -Total Receives: 4589 -Stores 3804 1 -Total Receives: 4590 -Stores 3805 1 -Total Receives: 4591 -Stores 3806 1 -Total Receives: 4592 -Stores 3807 1 -tensor([-4.7474, -4.7470, -4.8087]) -tensor([-8.5119, -8.5122, -8.5101, -8.5304, -8.5042, -8.5744, -8.5292, -8.5590, - -8.5744]) -Total rewards: 4599 -Total Receives: 4593 -Stores 3808 1 -Total Receives: 4594 -Stores 3809 1 -Total Receives: 4595 -Stores 3810 1 -Total Receives: 4596 -Stores 3811 1 -Total Receives: 4597 -Stores 3812 1 -Total Receives: 4598 -Stores 3813 1 -Total Receives: 4599 -Stores 3814 1 -tensor([-6.2929, -6.3469, -6.3543, -6.3601, -6.3086, -6.3367]) -Total rewards: 4604 -Total Receives: 4600 -Stores 3815 1 -Total Receives: 4601 -Stores 3816 1 -Total Receives: 4602 -Stores 3817 1 -Total Receives: 4603 -Stores 3818 1 -Total Receives: 4604 -Stores 3819 1 -tensor([-1.9324, -1.9311]) -Total rewards: 4609 -Total Receives: 4605 -Stores 3820 1 -Total Receives: 4606 -Stores 3821 1 -Total Receives: 4607 -Stores 3822 1 -Total Receives: 4608 -Stores 3823 1 -Total Receives: 4609 -Stores 3824 1 -tensor([-7.3117, -7.3018, -7.3160, -7.3346, -7.3236]) -tensor([-3.1085, -3.1323]) -tensor([-4.7985, -4.7791, -4.7689, -4.7737, -4.7375, -4.7627]) -tensor([-0.2251, -0.2002]) -tensor([-0.0743, -0.0602]) -tensor([-4.9499, -4.9666]) -tensor([-0.0742, -0.0806, -0.0808]) -tensor([-0.0569, -0.0522]) -tensor([-0.0912, -0.1059]) -tensor([-0.3146, -0.3204]) -tensor([-3.8154, -3.8380, -3.8147, -3.8022, -3.8086, -3.8043]) -tensor([-0.1933, -0.1858, -0.1962]) -tensor([-0.0884, -0.0926]) -tensor([-0.1146, -0.1079]) -tensor([-4.5473, -4.5184, -4.5539]) -tensor([-0.1895, -0.1924]) -Total rewards: 4613 -Total Receives: 4610 -Stores 3825 1 -Total Receives: 4611 -Stores 3826 1 -Total Receives: 4612 -Stores 3827 1 -Total Receives: 4613 -Stores 3828 1 -tensor([-0.1393, -0.1397, -0.1418, -0.1379]) -tensor([-11.6278, -11.6345, -11.6255, -11.6363, -10.7365, -11.7032, -11.6405, - -11.6734, -11.7032]) -tensor([-9.1852, -9.1539, -9.1854, -9.1963, -9.1813]) -tensor([-0.1811, -0.1817]) -tensor([-7.2516, -7.2543, -7.2528, -7.2535]) -Total rewards: 4618 -Total Receives: 4614 -Stores 3829 1 -Total Receives: 4615 -Stores 3830 1 -Total Receives: 4616 -Stores 3831 1 -Total Receives: 4617 -Stores 3832 1 -Total Receives: 4618 -Stores 3833 1 -tensor([-0.0562, -0.0504, -0.0491, -0.0524]) -tensor([-5.3349, -5.3386, -5.3005]) -Total rewards: 4620 -Total Receives: 4619 -Stores 3834 1 -Total Receives: 4620 -Stores 3835 1 -tensor([-6.1135, -6.1374, -6.1122, -6.1124, -6.1040]) -tensor([-0.3035, -0.3011, -0.3007, -0.3035, -0.2984]) -Total rewards: 4625 -Total Receives: 4621 -Stores 3836 1 -Total Receives: 4622 -Stores 3837 1 -Total Receives: 4623 -Stores 3838 1 -Total Receives: 4624 -Stores 3839 1 -Total Receives: 4625 -Stores 3840 1 -tensor([-0.0757, -0.0852, -0.0710]) -tensor([-4.9546, -4.9775, -4.9574, -4.9573]) -tensor([-0.0399, -0.0497]) -tensor([-3.7692, -3.7636, -3.7699]) -tensor([-9.3515, -9.3292, -9.3552, -9.3502]) -tensor([-9.8847, -9.8892, -9.8783, -9.8906, -9.9617, -9.8921, -9.9585, -9.9224, - -9.9617, -9.9606]) -tensor([-5.0250, -5.0126, -5.0364]) -tensor([-4.6690, -4.7204, -4.7192, -4.6708]) -tensor([-0.2324, -0.2365, -0.2335, -0.2267, -0.2322]) -tensor([-0.0712, -0.0805]) -tensor([-0.0244, -0.0562, -0.0214, -0.0180]) -Total rewards: 4630 -Total Receives: 4626 -Stores 3841 1 -Total Receives: 4627 -Stores 3842 1 -Total Receives: 4628 -Stores 3843 1 -Total Receives: 4629 -Stores 3844 1 -Total Receives: 4630 -Stores 3845 1 -tensor([-0.0286, -0.0222, -0.0051, -0.0240]) -tensor([-9.8692, -9.8727, -9.8707, -9.9535, -9.8758, -9.9454, -9.9073, -9.9535, - -9.9483]) -tensor([-4.9675, -4.9869]) -tensor([-0.1126, -0.1150]) -tensor([-0.0300, -0.0641, -0.0278]) -tensor([-5.2822, -5.2513, -5.2553]) -tensor([-4.2790, -4.2809, -4.2932]) -tensor([-3.5110, -3.5290]) -tensor([-0.2534, -0.2491, -0.2502, -0.2536]) -tensor([-7.9747, -7.9847, -7.9578, -7.9962, -7.9523, -7.9778, -7.9645]) -tensor([-4.2330, -4.2239, -4.1915]) -tensor([-0.0519, -0.0452, -0.0428]) -Total rewards: 4634 -Total Receives: 4631 -Stores 3846 1 -Total Receives: 4632 -Stores 3847 1 -Total Receives: 4633 -Stores 3848 1 -Total Receives: 4634 -Stores 3849 1 -tensor([-5.4755, -5.4491]) -tensor([-0.0605, -0.0523, -0.0588]) -tensor([-0.0463, -0.0772]) -tensor([-1.4409, -1.5441]) -tensor([-0.0620, -0.0542]) -Total rewards: 4642 -Total Receives: 4635 -Stores 3850 1 -Total Receives: 4636 -Stores 3851 1 -Total Receives: 4637 -Stores 3852 1 -Total Receives: 4638 -Stores 3853 1 -Total Receives: 4639 -Stores 3854 1 -Total Receives: 4640 -Stores 3855 1 -Total Receives: 4641 -Stores 3856 1 -Total Receives: 4642 -Stores 3857 1 -tensor([-9.3796, -9.3718, -9.4650, -9.4709, -9.4810, -9.3891, -9.4553, -9.4127, - -9.4810, -9.4656, -9.4810]) -tensor([-0.0618, -0.0646, -0.0668, -0.0638, -0.0628]) -tensor([-0.2833, -0.2809, -0.2821, -0.2846]) -tensor([-8.2266, -8.2364, -8.2129, -8.2520, -8.2528, -8.2273, -8.2177]) -tensor([-5.6458, -5.6498]) -tensor([-0.0471, -0.0343]) -tensor([-2.4723, -2.4745, -2.4699]) -tensor([-0.2504, -0.2502]) -tensor([-8.7080, -8.7014, -8.7903, -8.8041, -8.8161, -8.7808, -8.7507, -8.8161, - -8.7924, -8.8161]) -tensor([-0.1829, -0.1689]) -tensor([-0.1979, -0.1930, -0.2139, -0.1968]) -tensor([-4.3194, -4.3657, -4.3611]) -tensor([-4.9247, -4.9321, -4.9067, -4.8509, -4.9282, -4.9195]) -tensor([-0.0808, -0.0890, -0.0988]) -tensor([-0.1362, -0.1352]) -tensor([-7.1451, -7.1593, -7.1732, -7.1744, -7.1922, -7.1485, -7.1498]) -tensor([-6.7148, -6.8211, -6.8161, -6.8423, -6.8362, -6.8008, -6.7623, -6.8423, - -6.8072, -6.8423]) -tensor([-0.0314, -0.0334, -0.0280, -0.0222, -0.0201]) -tensor([-0.0581, -0.0554]) -Total rewards: 4647 -Total Receives: 4643 -Stores 3858 1 -Total Receives: 4644 -Stores 3859 1 -Total Receives: 4645 -Stores 3860 1 -Total Receives: 4646 -Stores 3861 1 -Total Receives: 4647 -Stores 3862 1 -Total rewards: 4651 -Total Receives: 4648 -Stores 3863 1 -Total Receives: 4649 -Stores 3864 1 -Total Receives: 4650 -Stores 3865 1 -Total Receives: 4651 -Stores 3866 1 -tensor([-0.5231, -0.5373, -0.5360]) -tensor([-0.0727, -0.0892]) -Total rewards: 4656 -Total Receives: 4652 -Stores 3867 1 -Total Receives: 4653 -Stores 3868 1 -Total Receives: 4654 -Stores 3869 1 -Total Receives: 4655 -Stores 3870 1 -Total Receives: 4656 -Stores 3871 1 -tensor([-0.0477, -0.0624]) -tensor([-0.1176, -0.1129, -0.1196]) -Total rewards: 4664 -Total Receives: 4657 -Stores 3872 1 -Total Receives: 4658 -Stores 3873 1 -Total Receives: 4659 -Stores 3874 1 -Total Receives: 4660 -Stores 3875 1 -Total Receives: 4661 -Stores 3876 1 -Total Receives: 4662 -Stores 3877 1 -Total Receives: 4663 -Stores 3878 1 -Total Receives: 4664 -Stores 3879 1 -Total rewards: 4670 -Total Receives: 4665 -Stores 3880 1 -Total Receives: 4666 -Stores 3881 1 -Total Receives: 4667 -Stores 3882 1 -Total Receives: 4668 -Stores 3883 1 -Total Receives: 4669 -Stores 3884 1 -Total Receives: 4670 -Stores 3885 1 -tensor([-0.0303, -0.0539]) -Total rewards: 4674 -Total Receives: 4671 -Stores 3886 1 -Total Receives: 4672 -Stores 3887 1 -Total Receives: 4673 -Stores 3888 1 -Total Receives: 4674 -Stores 3889 1 -tensor([-0.1357, -0.1620, -0.1131]) -tensor([-0.1076, -0.1096]) -tensor([-5.9809, -5.9894, -5.9573, -5.9815, -5.9755]) -Total rewards: 4679 -Total Receives: 4675 -Stores 3890 1 -Total Receives: 4676 -Stores 3891 1 -Total Receives: 4677 -Stores 3892 1 -Total Receives: 4678 -Stores 3893 1 -Total Receives: 4679 -Stores 3894 1 -tensor([-6.8443, -6.8376, -6.8804, -6.8563, -6.8217, -6.7807, -6.8804, -6.8321, - -6.8804]) -tensor([-24.6421, -24.6842, -24.6933]) -tensor([-8.7852, -8.7586, -8.7785, -8.7801, -8.7502, -8.7567]) -tensor([-0.2505, -0.2438, -0.2559, -0.2598]) -tensor([-0.3987, -0.4117]) -tensor([-0.1166, -0.1209]) -tensor([-0.1339, -0.1262, -0.1226]) -Total rewards: 4684 -Total Receives: 4680 -Stores 3895 1 -Total Receives: 4681 -Stores 3896 1 -Total Receives: 4682 -Stores 3897 1 -Total Receives: 4683 -Stores 3898 1 -Total Receives: 4684 -Stores 3899 1 -tensor([-2.4363, -2.4344]) -tensor([-0.1008, -0.0933]) -Total rewards: 4686 -Total Receives: 4685 -Stores 3900 1 -Total Receives: 4686 -Stores 3901 1 -tensor([-8.3290, -8.3016, -8.3230, -8.2931, -8.2943]) -tensor([-0.2971, -0.2828, -0.2926, -0.2993, -0.2968]) -tensor([-0.2622, -0.2691]) -tensor([-0.0833, -0.0543]) -tensor([-5.5400, -5.5536, -5.5694, -5.5536, -5.5382]) -Total rewards: 4692 -Total Receives: 4687 -Stores 3902 1 -Total Receives: 4688 -Stores 3903 1 -Total Receives: 4689 -Stores 3904 1 -Total Receives: 4690 -Stores 3905 1 -Total Receives: 4691 -Stores 3906 1 -Total Receives: 4692 -Stores 3907 1 -tensor([-4.0922, -4.0956]) -tensor([-4.4446, -4.4557, -4.4680, -4.4544]) -tensor([-0.0375, -0.0345]) -tensor([-0.1631, -0.1228]) -tensor([-3.4905, -3.4962, -3.4775]) -tensor([-6.4214, -6.4149, -6.4488, -6.4590, -6.4341, -6.3770, -6.4590, -6.4269, - -6.4590]) -tensor([-6.1441, -6.1945, -6.1219, -6.1802, -6.1152, -6.1209]) -tensor([-0.3527, -0.3620]) -tensor([-4.3882, -4.4391, -4.4704]) -tensor([-0.0449, -0.0394]) -Total rewards: 4698 -Total Receives: 4693 -Stores 3908 1 -Total Receives: 4694 -Stores 3909 1 -Total Receives: 4695 -Stores 3910 1 -Total Receives: 4696 -Stores 3911 1 -Total Receives: 4697 -Stores 3912 1 -Total Receives: 4698 -Stores 3913 1 -tensor([-0.1232, -0.1150]) -tensor([-0.0382, -0.0382]) -tensor([-0.6852, -0.6844, -0.6825]) -tensor([-0.0478, -0.0409]) -tensor([-3.5286, -3.5615]) -Total rewards: 4705 -Total Receives: 4699 -Stores 3914 1 -Total Receives: 4700 -Stores 3915 1 -Total Receives: 4701 -Stores 3916 1 -Total Receives: 4702 -Stores 3917 1 -Total Receives: 4703 -Stores 3918 1 -Total Receives: 4704 -Stores 3919 1 -Total Receives: 4705 -Stores 3920 1 -tensor([-2.8484, -2.8469, -2.8583]) -tensor([-5.3199, -5.3641, -5.3153, -5.3474, -5.2960]) -tensor([-0.0212, -0.0270]) -tensor([-0.6834, -0.6796, -0.6846]) -tensor([-0.1715, -0.1866, -0.1614, -0.1744, -0.1791]) -tensor([-3.2355, -3.2472, -3.2709]) -Total rewards: 4712 -Total Receives: 4706 -Stores 3921 1 -Total Receives: 4707 -Stores 3922 1 -Total Receives: 4708 -Stores 3923 1 -Total Receives: 4709 -Stores 3924 1 -Total Receives: 4710 -Stores 3925 1 -Total Receives: 4711 -Stores 3926 1 -Total Receives: 4712 -Stores 3927 1 -tensor([-2.5636, -2.5832]) -Total rewards: 4716 -Total Receives: 4713 -Stores 3928 1 -Total Receives: 4714 -Stores 3929 1 -Total Receives: 4715 -Stores 3930 1 -Total Receives: 4716 -Stores 3931 1 -Total rewards: 4721 -Total Receives: 4717 -Stores 3932 1 -Total Receives: 4718 -Stores 3933 1 -Total Receives: 4719 -Stores 3934 1 -Total Receives: 4720 -Stores 3935 1 -Total Receives: 4721 -Stores 3936 1 -tensor([-0.1354, -0.1354]) -tensor([-0.9538, -0.9555]) -tensor([-6.7775, -6.7567, -6.7994, -6.8278, -6.8141, -6.7290, -6.8234, -6.8278, - -6.8186, -6.7838, -6.7967, -6.8278]) -Total rewards: 4724 -Total Receives: 4722 -Stores 3937 1 -Total Receives: 4723 -Stores 3938 1 -Total Receives: 4724 -Stores 3939 1 -tensor([-0.1466, -0.1354, -0.1612, -0.1527, -0.1444]) -tensor([-3.3878, -3.3914]) -tensor([-1.1918, -1.1966, -1.1971, -1.1974]) -tensor([-0.6251, -0.6298, -0.6294]) -tensor([-5.5505, -5.5984, -5.5433, -5.5944]) -tensor([-1.0871, -1.0883, -1.0858]) -Total rewards: 4729 -Total Receives: 4725 -Stores 3940 1 -Total Receives: 4726 -Stores 3941 1 -Total Receives: 4727 -Stores 3942 1 -Total Receives: 4728 -Stores 3943 1 -Total Receives: 4729 -Stores 3944 1 -tensor([-0.0181, -0.0181]) -Total rewards: 4735 -Total Receives: 4730 -Stores 3945 1 -Total Receives: 4731 -Stores 3946 1 -Total Receives: 4732 -Stores 3947 1 -Total Receives: 4733 -Stores 3948 1 -Total Receives: 4734 -Stores 3949 1 -Total Receives: 4735 -Stores 3950 1 -tensor([-1.0207, -1.0284]) -tensor([-4.6774, -4.7255, -4.7199]) -tensor([-0.6336, -0.6328, -0.6333]) -tensor([-0.0497, -0.0407]) -tensor([-5.1048, -5.0878, -5.1360, -5.1709, -5.1427, -5.3156, -5.1504, -5.1634, - -5.1709, -5.1474, -5.1194, -5.1253, -5.1709, -5.1585]) -tensor([-1.3111, -1.3081, -1.3257]) -tensor([-0.6384, -0.6118, -0.6204, -0.5991, -0.6500, -0.6412, -0.6480]) -tensor([-1.2062, -1.2112, -1.2282]) -tensor([-0.9119, -0.9200]) -tensor([-0.0796, -0.0756]) -tensor([-3.8985, -3.9372, -3.9888, -3.9475, -3.9762, -3.9626, -3.9708, -3.9888, - -3.9523, -3.9135, -3.9259, -3.9888, -3.9673]) -tensor([-1.2850, -1.2648, -1.2729, -1.2794, -1.2887, -1.2768, -1.2800]) -tensor([-0.9293, -0.9466]) -tensor([-0.0562, -0.0494]) -tensor([-3.1468, -3.1379, -3.1680]) -tensor([-0.5398, -0.5531]) -tensor([-1.4598, -1.4412]) -tensor([-12.0467, -12.0384]) -tensor([-1.3711, -1.3595, -1.3763, -1.3748]) -tensor([-3.8914, -3.9479, -3.8956, -3.9265, -3.9127, -3.9208, -3.9479, -3.9025, - -3.8653, -3.8759, -3.9479, -3.9197]) -tensor([-0.5522, -0.5499, -0.5608, -0.5410]) -Total Receives: 5700 -Stores 4492 1 -tensor([3.4376, 3.5221]) -tensor([3.8805, 4.2844, 4.8956]) -tensor([3.7265, 4.0776, 3.2190]) -tensor([11.2177, 12.0738, 9.2434, 10.9649]) -loss: 5.401217322287266e-07, td_error: 4.590285301208496, entropy: 3.6829278469085693 -Train step: 0.22478795051574707 Optimizer Step: 80 -tensor([3.7282, 5.6999, 3.6532]) -tensor([4.8362, 5.2608]) -tensor([2.0264, 1.8343]) -tensor([8.4086, 7.6726, 6.3428, 8.3190]) -tensor([2.3989, 2.3741, 2.3519]) -Total rewards: 5704 -Total Receives: 5701 -Stores 4493 1 -Total Receives: 5702 -Stores 4494 1 -Total Receives: 5703 -Stores 4495 1 -Total Receives: 5704 -Stores 4496 1 -tensor([1.5644, 2.2446, 2.3336, 1.3202, 1.5139]) -tensor([10.6459, 10.8069, 10.1719, 10.1719, 10.1719]) -tensor([3.4484, 4.3157, 3.0914, 3.3131]) -tensor([4.2505, 4.1364]) -tensor([3.3752, 2.3474]) -tensor([4.4075, 5.2472, 3.9959, 3.9860, 3.9136]) -tensor([12.2206, 10.0477, 9.4119, 9.8063]) -tensor([3.4738, 3.3913, 2.7559]) -tensor([6.2935, 5.3239]) -loss: 1.8937238564831205e-05, td_error: 160.94021606445312, entropy: 4.201624393463135 -Train step: 0.6872227191925049 Optimizer Step: 81 -tensor([7.9833, 5.5094, 6.5740, 5.3811]) -Total rewards: 5708 -Total Receives: 5705 -Stores 4497 1 -Total Receives: 5706 -Stores 4498 1 -Total Receives: 5707 -Stores 4499 1 -Total Receives: 5708 -Stores 4500 1 -tensor([13.1783, 10.2598, 12.9401]) -tensor([4.9553, 3.3473, 4.5001]) -tensor([4.3880, 4.8073]) -Total rewards: 5713 -Total Receives: 5709 -Stores 4501 1 -Total Receives: 5710 -Stores 4502 1 -Total Receives: 5711 -Stores 4503 1 -Total Receives: 5712 -Stores 4504 1 -Total Receives: 5713 -Stores 4505 1 -tensor([4.4758, 3.7620]) -tensor([2.2927, 2.1381, 1.7588]) -tensor([3.1972, 4.4380]) -tensor([9.6251, 9.3970]) -tensor([130.4717, 129.7908, 129.7908]) -tensor([5.3884, 4.9399]) -tensor([6.6653, 5.5118, 7.1339]) -loss: 3.99758355342783e-05, td_error: 339.73907470703125, entropy: 2.641547679901123 -Train step: 0.3808162212371826 Optimizer Step: 82 -tensor([3.8013, 3.0020, 3.5865]) -tensor([6.2961, 7.4926, 6.0621, 5.5612, 5.6461]) -tensor([0.4431, 0.4287]) -tensor([5.4867, 3.5484]) -tensor([3.8240, 3.0993, 4.3170, 4.2737]) -tensor([10.6761, 10.0053, 10.4228]) -tensor([2.1801, 2.9288]) -tensor([1.5524, 1.5524]) -Total rewards: 5719 -Total Receives: 5714 -Stores 4506 1 -Total Receives: 5715 -Stores 4507 1 -Total Receives: 5716 -Stores 4508 1 -Total Receives: 5717 -Stores 4509 1 -Total Receives: 5718 -Stores 4510 1 -Total Receives: 5719 -Stores 4511 1 -tensor([11.8010, 10.4588, 10.4588, 10.6777, 10.5158, 10.4588]) -Total rewards: 5725 -Total Receives: 5720 -Stores 4512 1 -Total Receives: 5721 -Stores 4513 1 -Total Receives: 5722 -Stores 4514 1 -Total Receives: 5723 -Stores 4515 1 -Total Receives: 5724 -Stores 4516 1 -Total Receives: 5725 -Stores 4517 1 -tensor([2.0023, 1.3316, 1.2468, 1.6245, 1.6436, 1.0168, 1.4462]) -Total rewards: 5731 -Total Receives: 5726 -Stores 4518 1 -Total Receives: 5727 -Stores 4519 1 -Total Receives: 5728 -Stores 4520 1 -Total Receives: 5729 -Stores 4521 1 -Total Receives: 5730 -Stores 4522 1 -Total Receives: 5731 -Stores 4523 1 -tensor([1.6535, 1.8840]) -Total rewards: 5736 -Total Receives: 5732 -Stores 4524 1 -Total Receives: 5733 -Stores 4525 1 -Total Receives: 5734 -Stores 4526 1 -Total Receives: 5735 -Stores 4527 1 -Total Receives: 5736 -Stores 4528 1 -Total rewards: 5743 -Total Receives: 5737 -Stores 4529 1 -Total Receives: 5738 -Stores 4530 1 -Total Receives: 5739 -Stores 4531 1 -Total Receives: 5740 -Stores 4532 1 -Total Receives: 5741 -Stores 4533 1 -Total Receives: 5742 -Stores 4534 1 -Total Receives: 5743 -Stores 4535 1 -tensor([14.1284, 14.1284, 14.6486, 14.4000, 14.1284]) -tensor([9.1367, 9.5603]) -loss: 6.795019544370007e-06, td_error: 57.74822998046875, entropy: 1.4782103300094604 -Train step: 0.30811023712158203 Optimizer Step: 83 -tensor([5.5027, 4.4075, 4.5523, 5.9796]) -tensor([3.0667, 3.8171, 2.7178]) -tensor([6.0540, 8.6139, 7.4103, 6.0205]) -tensor([3.4009, 2.9546]) -tensor([16.8416, 16.9245, 16.8416, 17.5933, 17.2708]) -tensor([1.6941, 1.0960, 0.9155]) -tensor([4.7244, 3.4268, 3.9393, 3.8334, 3.2501, 4.3028, 3.1252, 4.0683]) -tensor([3.2191, 3.7533, 2.9695]) -tensor([3.3105, 2.2425]) -tensor([6.4251, 5.6846, 5.6846]) -tensor([1.3405, 0.9282]) -tensor([7.2827, 6.4172]) -tensor([5.8368, 5.7725, 5.7725]) -tensor([6.8036, 6.8036]) -tensor([9.9141, 9.6807, 8.0743, 9.2854, 8.0743]) -tensor([5.1034, 5.1640]) -loss: 0.19298061728477478, td_error: 3061.075927734375, entropy: 1.6094361543655396 -Train step: 0.7939000129699707 Optimizer Step: 84 -tensor([4.1425, 4.6805]) -Total rewards: 5748 -Total Receives: 5744 -Stores 4536 1 -Total Receives: 5745 -Stores 4537 1 -Total Receives: 5746 -Stores 4538 1 -Total Receives: 5747 -Stores 4539 1 -Total Receives: 5748 -Stores 4540 1 -Total rewards: 5751 -Total Receives: 5749 -Stores 4541 1 -Total Receives: 5750 -Stores 4542 1 -Total Receives: 5751 -Stores 4543 1 -tensor([7.4793, 6.0188, 6.5602, 7.9343]) -tensor([8.3016, 9.7852, 8.2855]) -Total rewards: 5755 -Total Receives: 5752 -Stores 4544 1 -Total Receives: 5753 -Stores 4545 1 -Total Receives: 5754 -Stores 4546 1 -Total Receives: 5755 -Stores 4547 1 -tensor([9.0184, 9.3754]) -tensor([7.4606, 7.4520]) -tensor([1.6920, 1.5899]) -tensor([8.8751, 7.0467, 8.4977, 7.0467]) -tensor([1.7969, 2.1397, 1.9592]) -Total rewards: 5762 -Total Receives: 5756 -Stores 4548 1 -Total Receives: 5757 -Stores 4549 1 -Total Receives: 5758 -Stores 4550 1 -Total Receives: 5759 -Stores 4551 1 -Total Receives: 5760 -Stores 4552 1 -Total Receives: 5761 -Stores 4553 1 -Total Receives: 5762 -Stores 4554 1 -tensor([3.6243, 5.0902]) -loss: 1.595101275597699e-05, td_error: 135.56146240234375, entropy: 2.3046815395355225 -Train step: 0.38950014114379883 Optimizer Step: 85 -tensor([6.7731, 8.4947, 6.7731]) -tensor([8.1489, 7.3086]) -tensor([3.9914, 3.9183]) -tensor([8.3127, 7.0540, 7.5789, 7.4487, 6.0629, 6.8212, 7.9401, 7.6740]) -tensor([2.8878, 3.2409, 2.7808]) -tensor([15.6289, 16.6393, 15.6289, 17.2886, 16.3436]) -tensor([3.2835, 3.4821]) -tensor([238.7227, 238.4326]) -tensor([8.7467, 6.2306, 7.1910, 7.7479]) -Total rewards: 5767 -Total Receives: 5763 -Stores 4555 1 -Total Receives: 5764 -Stores 4556 1 -Total Receives: 5765 -Stores 4557 1 -Total Receives: 5766 -Stores 4558 1 -Total Receives: 5767 -Stores 4559 1 -tensor([8.0173, 8.6356, 8.4930, 6.9733, 7.7835, 9.0672, 8.7445]) -tensor([4.5671, 6.4615]) -tensor([15.5155, 15.5155, 17.3435, 16.4590]) -tensor([3.8419, 3.9787, 3.5619]) -tensor([5.5884, 3.9880, 3.7528]) -loss: 8.044772812354495e-07, td_error: 6.836940288543701, entropy: 3.917191743850708 -Train step: 0.2679150104522705 Optimizer Step: 86 -tensor([11.0322, 11.8825, 11.3289, 11.2864]) -tensor([17.8103, 17.8103, 19.0260, 17.8660]) -tensor([10.7576, 9.1081, 9.6865]) -tensor([9.3454, 9.9413, 9.3454]) -tensor([2.1759, 2.7530, 1.6906, 1.6783]) -Total rewards: 5773 -Total Receives: 5768 -Stores 4560 1 -Total Receives: 5769 -Stores 4561 1 -Total Receives: 5770 -Stores 4562 1 -Total Receives: 5771 -Stores 4563 1 -Total Receives: 5772 -Stores 4564 1 -Total Receives: 5773 -Stores 4565 1 -tensor([7.0988, 7.6712]) -tensor([4.8899, 4.1946, 4.3105]) -tensor([3.5832, 3.6959]) -tensor([12.9878, 14.6720, 13.3430]) -tensor([3.7540, 3.1247, 2.9849]) -tensor([3.2370, 2.6519, 4.1223, 3.5070]) -tensor([3.9833, 3.9250, 3.6775, 4.0098]) -tensor([1.8465, 2.6115]) -tensor([2.7339, 2.3990, 2.4282, 3.0695, 2.3246]) -loss: 1.3930419299867935e-05, td_error: 118.38922119140625, entropy: 3.9375667572021484 -Train step: 0.4513380527496338 Optimizer Step: 87 -tensor([4.7907, 4.5116, 4.8885]) -tensor([7.3450, 6.2178, 7.0434]) -tensor([6.1841, 7.1092]) -tensor([4.0586, 3.6778, 3.4187, 3.6919, 3.3795]) -tensor([13.3888, 13.9181, 12.1978, 13.1493, 14.5393, 14.1815, 11.7317]) -tensor([6.9683, 6.8142]) -tensor([12.8271, 12.9986, 12.0712, 12.0712]) -tensor([3.2849, 3.4881]) -tensor([5.3935, 4.8449, 4.8196]) -tensor([2.3696, 3.8258]) -loss: 0.16931426525115967, td_error: 1882.501708984375, entropy: 1.38629150390625 -Train step: 0.7270190715789795 Optimizer Step: 88 -tensor([13.1909, 14.1964]) -Total rewards: 5778 -Total Receives: 5774 -Stores 4566 1 -Total Receives: 5775 -Stores 4567 1 -Total Receives: 5776 -Stores 4568 1 -Total Receives: 5777 -Stores 4569 1 -Total Receives: 5778 -Stores 4570 1 -tensor([2.7473, 2.4727, 1.9554]) -tensor([8.2538, 8.5859, 7.4805, 8.1115, 8.7797, 7.2162]) -Total rewards: 5783 -Total Receives: 5779 -Stores 4571 1 -Total Receives: 5780 -Stores 4572 1 -Total Receives: 5781 -Stores 4573 1 -Total Receives: 5782 -Stores 4574 1 -Total Receives: 5783 -Stores 4575 1 -tensor([1.8231, 1.3230, 1.1985, 1.1985]) -tensor([2.1464, 1.8100]) -tensor([3.8269, 3.3997, 4.0303, 3.0983]) -tensor([1.1857, 1.0239, 1.0239]) -tensor([2.4966, 2.4952]) -tensor([3.7900, 3.2948, 2.9886]) -tensor([1.1502, 1.1502]) -tensor([5.0768, 4.8347, 3.7394, 3.9176]) -tensor([2.4759, 1.8128]) -tensor([11.2900, 11.3995]) -Total rewards: 5786 -Total Receives: 5784 -Stores 4576 1 -Total Receives: 5785 -Stores 4577 1 -Total Receives: 5786 -Stores 4578 1 -tensor([2.9601, 2.1358, 2.6637]) -tensor([5.6637, 5.0757, 3.6336]) -tensor([4.9565, 3.7645, 3.9681]) -tensor([6.7225, 7.0626, 5.9212, 6.5455, 5.5564, 4.6850]) -loss: 1.3925688335802988e-06, td_error: 11.83489990234375, entropy: 3.624870538711548 -Train step: 0.32685232162475586 Optimizer Step: 89 -tensor([10.6198, 11.0357, 9.6468, 10.4240, 8.1528]) -tensor([4.5718, 5.4098, 5.1025, 5.4448, 4.2254]) -tensor([4.3779, 4.6013]) -Total rewards: 5792 -Total Receives: 5787 -Stores 4579 1 -Total Receives: 5788 -Stores 4580 1 -Total Receives: 5789 -Stores 4581 1 -Total Receives: 5790 -Stores 4582 1 -Total Receives: 5791 -Stores 4583 1 -Total Receives: 5792 -Stores 4584 1 -tensor([2.0253, 1.6987]) -tensor([5.1572, 3.9861]) -tensor([4.3980, 4.3086]) -Total rewards: 5797 -Total Receives: 5793 -Stores 4585 1 -Total Receives: 5794 -Stores 4586 1 -Total Receives: 5795 -Stores 4587 1 -Total Receives: 5796 -Stores 4588 1 -Total Receives: 5797 -Stores 4589 1 -tensor([4.2568, 5.9483, 4.2139]) -Total rewards: 5802 -Total Receives: 5798 -Stores 4590 1 -Total Receives: 5799 -Stores 4591 1 -Total Receives: 5800 -Stores 4592 1 -Total Receives: 5801 -Stores 4593 1 -Total Receives: 5802 -Stores 4594 1 -tensor([5.1023, 3.0605]) -tensor([2.4277, 3.7153, 2.7484]) -tensor([3.1050, 3.0791]) -tensor([12.2113, 12.6193, 11.2384, 12.0188]) -tensor([7.9624, 7.9645, 6.1512]) -tensor([3.1513, 2.9391]) -tensor([9.5107, 9.6084, 8.1337]) -Total rewards: 5810 -Total Receives: 5803 -Stores 4595 1 -Total Receives: 5804 -Stores 4596 1 -Total Receives: 5805 -Stores 4597 1 -Total Receives: 5806 -Stores 4598 1 -Total Receives: 5807 -Stores 4599 1 -Total Receives: 5808 -Stores 4600 1 -Total Receives: 5809 -Stores 4601 1 -Total Receives: 5810 -Stores 4602 1 -loss: 5.5670949222985655e-05, td_error: 473.125732421875, entropy: 3.1549527645111084 -Train step: 0.41942477226257324 Optimizer Step: 90 -tensor([1.7556, 1.0167]) -tensor([2.0722, 1.5046, 1.3038]) -tensor([0.6156, 0.7873, 0.9124]) -Total rewards: 5814 -Total Receives: 5811 -Stores 4603 1 -Total Receives: 5812 -Stores 4604 1 -Total Receives: 5813 -Stores 4605 1 -Total Receives: 5814 -Stores 4606 1 -tensor([9.6700, 8.2183, 8.9279]) -Total rewards: 5821 -Total Receives: 5815 -Stores 4607 1 -Total Receives: 5816 -Stores 4608 1 -Total Receives: 5817 -Stores 4609 1 -Total Receives: 5818 -Stores 4610 1 -Total Receives: 5819 -Stores 4611 1 -Total Receives: 5820 -Stores 4612 1 -Total Receives: 5821 -Stores 4613 1 -tensor([4.0875, 3.6657]) -tensor([2.2005, 2.5125]) -tensor([7.7021, 6.1334]) -tensor([4.4541, 3.4933, 3.6077, 3.7266]) -tensor([3.3512, 5.2903, 3.2706]) -tensor([7.1780, 8.0130, 7.6934, 5.9047]) -tensor([7.4119, 8.1899]) -loss: 7.20726529834792e-06, td_error: 61.251739501953125, entropy: 2.7602245807647705 -Train step: 0.399155855178833 Optimizer Step: 91 -tensor([4.4483, 4.4081, 3.8120]) -tensor([3.4355, 2.0442, 2.5056, 2.6278]) -tensor([8.2207, 8.6088, 8.0941]) -tensor([1.6537, 1.4903]) -tensor([3.0898, 3.7021]) -Total rewards: 5828 -Total Receives: 5822 -Stores 4614 1 -Total Receives: 5823 -Stores 4615 1 -Total Receives: 5824 -Stores 4616 1 -Total Receives: 5825 -Stores 4617 1 -Total Receives: 5826 -Stores 4618 1 -Total Receives: 5827 -Stores 4619 1 -Total Receives: 5828 -Stores 4620 1 -tensor([2.8122, 2.7858, 2.5063]) -tensor([1.3780, 1.7974]) -tensor([1.7204, 1.5939, 1.0868]) -tensor([2.6752, 2.4437]) -tensor([6.7797, 4.2290, 4.0378]) -tensor([1.5083, 1.9375]) -tensor([3.4971, 3.8091]) -tensor([4.7948, 4.1811]) -tensor([2.5690, 2.4321]) -loss: 1.7783610019250773e-05, td_error: 151.13600158691406, entropy: 3.834989070892334 -Train step: 0.3899538516998291 Optimizer Step: 92 -tensor([3.4393, 3.0295]) -tensor([4.4387, 3.8425, 4.7675, 4.1312]) -Total rewards: 5834 -Total Receives: 5829 -Stores 4621 1 -Total Receives: 5830 -Stores 4622 1 -Total Receives: 5831 -Stores 4623 1 -Total Receives: 5832 -Stores 4624 1 -Total Receives: 5833 -Stores 4625 1 -Total Receives: 5834 -Stores 4626 1 -Total rewards: 5839 -Total Receives: 5835 -Stores 4627 1 -Total Receives: 5836 -Stores 4628 1 -Total Receives: 5837 -Stores 4629 1 -Total Receives: 5838 -Stores 4630 1 -Total Receives: 5839 -Stores 4631 1 -tensor([3.2531, 2.4114]) -Total rewards: 5844 -Total Receives: 5840 -Stores 4632 1 -Total Receives: 5841 -Stores 4633 1 -Total Receives: 5842 -Stores 4634 1 -Total Receives: 5843 -Stores 4635 1 -Total Receives: 5844 -Stores 4636 1 -tensor([3.6963, 3.1116, 3.2211]) -tensor([3.2459, 3.6828, 3.8025, 2.4954]) -tensor([7.6336, 8.1073, 5.6061, 5.6061, 5.6061]) -Total rewards: 5849 -Total Receives: 5845 -Stores 4637 1 -Total Receives: 5846 -Stores 4638 1 -Total Receives: 5847 -Stores 4639 1 -Total Receives: 5848 -Stores 4640 1 -Total Receives: 5849 -Stores 4641 1 -tensor([2.1672, 2.0685, 1.9101, 1.7448, 2.2235]) -tensor([5.5588, 4.8810, 5.2384]) -loss: 2.700222648854833e-05, td_error: 229.48141479492188, entropy: 1.7917633056640625 -Train step: 0.391650915145874 Optimizer Step: 93 -tensor([0.4672, 0.4475, 0.4438]) -tensor([2.3327, 2.2326, 2.0620, 1.6575, 1.8862]) -tensor([7.9540, 5.6643, 5.6643, 5.6643]) -tensor([2.1662, 2.7300, 2.6930]) -tensor([1.9258, 1.6851]) -Total rewards: 5852 -Total Receives: 5850 -Stores 4642 1 -Total Receives: 5851 -Stores 4643 1 -Total Receives: 5852 -Stores 4644 1 -tensor([2.7377, 3.3936, 3.5146]) -tensor([2.0023, 1.5538]) -tensor([3.3402, 3.1837]) -tensor([1.1625, 1.1328]) -tensor([3.7632, 2.9141, 4.1747, 2.6415]) -tensor([5.6298, 5.4284, 5.4284, 5.4284]) -tensor([1.6275, 1.3476, 2.5068]) -tensor([0.4647, 0.8395]) -tensor([2.3875, 3.0040, 1.7138]) -tensor([1.0828, 1.3265]) -loss: 2.118548763974104e-06, td_error: 18.004722595214844, entropy: 4.751944541931152 -Train step: 0.33842992782592773 Optimizer Step: 94 -Total rewards: 5857 -Total Receives: 5853 -Stores 4645 1 -Total Receives: 5854 -Stores 4646 1 -Total Receives: 5855 -Stores 4647 1 -Total Receives: 5856 -Stores 4648 1 -Total Receives: 5857 -Stores 4649 1 -Total rewards: 5863 -Total Receives: 5858 -Stores 4650 1 -Total Receives: 5859 -Stores 4651 1 -Total Receives: 5860 -Stores 4652 1 -Total Receives: 5861 -Stores 4653 1 -Total Receives: 5862 -Stores 4654 1 -Total Receives: 5863 -Stores 4655 1 -tensor([1.4992, 2.6330]) -tensor([6.9853, 6.9853, 6.9853]) -tensor([0.5646, 0.3043]) -tensor([2.5727, 1.4986, 1.7885, 1.5735]) -tensor([3.6737, 2.8848, 3.9976, 2.3619]) -Total rewards: 5868 -Total Receives: 5864 -Stores 4656 1 -Total Receives: 5865 -Stores 4657 1 -Total Receives: 5866 -Stores 4658 1 -Total Receives: 5867 -Stores 4659 1 -Total Receives: 5868 -Stores 4660 1 -tensor([4.0241, 3.8418, 3.4416, 3.6474, 2.6583]) -Total rewards: 5875 -Total Receives: 5869 -Stores 4661 1 -Total Receives: 5870 -Stores 4662 1 -Total Receives: 5871 -Stores 4663 1 -Total Receives: 5872 -Stores 4664 1 -Total Receives: 5873 -Stores 4665 1 -Total Receives: 5874 -Stores 4666 1 -Total Receives: 5875 -Stores 4667 1 -tensor([3.2280, 3.2514]) -tensor([5.8205, 4.4788, 6.2787, 4.0761]) -tensor([0.8864, 0.5887]) -tensor([1.7724, 1.3592]) -tensor([1.2221, 1.1793]) -tensor([4.4717, 6.3918, 3.9767]) -tensor([2.1353, 1.4503]) -tensor([2.1704, 2.4739, 2.2407]) -loss: 2.9591367365355836e-06, td_error: 25.1485538482666, entropy: 3.078235626220703 -Train step: 0.39688920974731445 Optimizer Step: 95 -Total rewards: 5882 -Total Receives: 5876 -Stores 4668 1 -Total Receives: 5877 -Stores 4669 1 -Total Receives: 5878 -Stores 4670 1 -Total Receives: 5879 -Stores 4671 1 -Total Receives: 5880 -Stores 4672 1 -Total Receives: 5881 -Stores 4673 1 -Total Receives: 5882 -Stores 4674 1 -tensor([0.7169, 0.5766]) -tensor([2.3190, 3.2261]) -tensor([4.1084, 3.7452]) -Total rewards: 5887 -Total Receives: 5883 -Stores 4675 1 -Total Receives: 5884 -Stores 4676 1 -Total Receives: 5885 -Stores 4677 1 -Total Receives: 5886 -Stores 4678 1 -Total Receives: 5887 -Stores 4679 1 -Total rewards: 5893 -Total Receives: 5888 -Stores 4680 1 -Total Receives: 5889 -Stores 4681 1 -Total Receives: 5890 -Stores 4682 1 -Total Receives: 5891 -Stores 4683 1 -Total Receives: 5892 -Stores 4684 1 -Total Receives: 5893 -Stores 4685 1 -tensor([2.1173, 2.9860]) -tensor([2.6195, 2.6971]) -tensor([4.4309, 3.6538, 3.1050]) -tensor([5.2800, 4.8198, 5.0040, 3.5822]) -Total rewards: 5901 -Total Receives: 5894 -Stores 4686 1 -Total Receives: 5895 -Stores 4687 1 -Total Receives: 5896 -Stores 4688 1 -Total Receives: 5897 -Stores 4689 1 -Total Receives: 5898 -Stores 4690 1 -Total Receives: 5899 -Stores 4691 1 -Total Receives: 5900 -Stores 4692 1 -Total Receives: 5901 -Stores 4693 1 -Total rewards: 5906 -Total Receives: 5902 -Stores 4694 1 -Total Receives: 5903 -Stores 4695 1 -Total Receives: 5904 -Stores 4696 1 -Total Receives: 5905 -Stores 4697 1 -Total Receives: 5906 -Stores 4698 1 -tensor([4.4462, 4.6210, 2.9959]) -tensor([1.3961, 2.1505]) -tensor([1.2842, 1.2912, 2.0635]) -tensor([147.1471, 146.6272]) -tensor([2.8420, 2.4029]) -tensor([4.2132, 4.3509]) -Total rewards: 5909 -Total Receives: 5907 -Stores 4699 1 -Total Receives: 5908 -Stores 4700 1 -Total Receives: 5909 -Stores 4701 1 -tensor([0.3534, 0.3533]) -tensor([5.8688, 5.9876, 5.8688]) -Total rewards: 5914 -Total Receives: 5910 -Stores 4702 1 -Total Receives: 5911 -Stores 4703 1 -Total Receives: 5912 -Stores 4704 1 -Total Receives: 5913 -Stores 4705 1 -Total Receives: 5914 -Stores 4706 1 -loss: 4.278789674572181e-06, td_error: 36.36376953125, entropy: 0.009547235444188118 -Train step: 0.4358799457550049 Optimizer Step: 96 -tensor([3.6027, 3.2605]) -tensor([1.4082, 2.2562]) -tensor([4.4128, 2.5486]) -Total rewards: 5917 -Total Receives: 5915 -Stores 4707 1 -Total Receives: 5916 -Stores 4708 1 -Total Receives: 5917 -Stores 4709 1 -tensor([2.4357, 1.8022, 1.7433]) -tensor([1.6133, 1.5358]) -tensor([2.6754, 2.8123, 3.4162, 2.8240]) -tensor([2.8649, 2.5179]) -tensor([2.7424, 2.1969]) -tensor([4.5717, 5.0794, 3.7195, 3.7195]) -tensor([1.6688, 1.5716]) -tensor([1.0400, 0.8552]) -tensor([3.6966, 3.1557]) -tensor([3.4270, 2.5485]) -loss: 2.695057048640592e-07, td_error: 2.290424108505249, entropy: 3.742276430130005 -Train step: 0.2796318531036377 Optimizer Step: 97 -tensor([2.4010, 2.3366]) -Total rewards: 5922 -Total Receives: 5918 -Stores 4710 1 -Total Receives: 5919 -Stores 4711 1 -Total Receives: 5920 -Stores 4712 1 -Total Receives: 5921 -Stores 4713 1 -Total Receives: 5922 -Stores 4714 1 -tensor([4.0372, 2.9793]) -tensor([8.6275, 8.6275]) -tensor([4.8590, 5.4538, 4.8980]) -tensor([3.8082, 3.2701]) -tensor([2.2340, 1.5755]) -tensor([3.3538, 3.2355, 3.1211]) -Total rewards: 5928 -Total Receives: 5923 -Stores 4715 1 -Total Receives: 5924 -Stores 4716 1 -Total Receives: 5925 -Stores 4717 1 -Total Receives: 5926 -Stores 4718 1 -Total Receives: 5927 -Stores 4719 1 -Total Receives: 5928 -Stores 4720 1 -tensor([4.9128, 3.7057, 3.6212, 3.6212, 3.6212]) -tensor([1.0105, 1.2729, 0.9448, 1.5967]) -tensor([4.2360, 4.1188]) -loss: 3.2099032978294417e-06, td_error: 27.27971839904785, entropy: 2.566481351852417 -Train step: 0.3432450294494629 Optimizer Step: 98 -tensor([2.5145, 1.9369]) -tensor([1.4806, 1.9663, 2.5664, 1.2291, 1.2214]) -tensor([2.8075, 2.5965, 2.2197]) -tensor([4.7306, 4.3282, 4.3282, 4.3282]) -tensor([3.5227, 2.4548, 1.8867]) -Total rewards: 5934 -Total Receives: 5929 -Stores 4721 1 -Total Receives: 5930 -Stores 4722 1 -Total Receives: 5931 -Stores 4723 1 -Total Receives: 5932 -Stores 4724 1 -Total Receives: 5933 -Stores 4725 1 -Total Receives: 5934 -Stores 4726 1 -tensor([1.3224, 2.2326, 1.6365]) -tensor([2.2779, 2.0859]) -tensor([2.1399, 2.7035, 1.7397, 1.7827, 1.6164]) -tensor([1.7055, 2.6780]) -Total rewards: 5940 -Total Receives: 5935 -Stores 4727 1 -Total Receives: 5936 -Stores 4728 1 -Total Receives: 5937 -Stores 4729 1 -Total Receives: 5938 -Stores 4730 1 -Total Receives: 5939 -Stores 4731 1 -Total Receives: 5940 -Stores 4732 1 -tensor([2.9273, 2.5542]) -tensor([2.2055, 1.9833]) -Total rewards: 5945 -Total Receives: 5941 -Stores 4733 1 -Total Receives: 5942 -Stores 4734 1 -Total Receives: 5943 -Stores 4735 1 -Total Receives: 5944 -Stores 4736 1 -Total Receives: 5945 -Stores 4737 1 -tensor([2.8566, 2.4395, 1.9985]) -tensor([4.8892, 4.8892, 4.8892, 4.8892]) -tensor([2.7602, 3.9864]) -tensor([2.0872, 1.6124]) -Total rewards: 5949 -Total Receives: 5946 -Stores 4738 1 -Total Receives: 5947 -Stores 4739 1 -Total Receives: 5948 -Stores 4740 1 -Total Receives: 5949 -Stores 4741 1 -tensor([4.3289, 3.7703, 3.1964]) -loss: 7.406789518427104e-05, td_error: 629.4743041992188, entropy: 1.6094361543655396 -Train step: 0.32572507858276367 Optimizer Step: 99 -tensor([2.4179, 2.4179, 2.4179]) -tensor([1.0714, 0.8632, 1.2104, 1.1144, 0.9375]) -tensor([4.6738, 3.4766, 4.7113]) -tensor([2.9383, 1.7313]) -tensor([1.6108, 1.2937, 1.3437]) -tensor([1.8374, 1.3647, 1.3299, 1.8335]) -tensor([2.0458, 2.0326]) -Total rewards: 5952 -Total Receives: 5950 -Stores 4742 1 -Total Receives: 5951 -Stores 4743 1 -Total Receives: 5952 -Stores 4744 1 -tensor([4.0780, 2.8543, 2.1090]) -Total rewards: 5958 -Total Receives: 5953 -Stores 4745 1 -Total Receives: 5954 -Stores 4746 1 -Total Receives: 5955 -Stores 4747 1 -Total Receives: 5956 -Stores 4748 1 -Total Receives: 5957 -Stores 4749 1 -Total Receives: 5958 -Stores 4750 1 -tensor([2.3519, 2.3519]) -tensor([3.2996, 3.9716, 4.6182, 3.5645, 3.4085]) -Total rewards: 5963 -Total Receives: 5959 -Stores 4751 1 -Total Receives: 5960 -Stores 4752 1 -Total Receives: 5961 -Stores 4753 1 -Total Receives: 5962 -Stores 4754 1 -Total Receives: 5963 -Stores 4755 1 -tensor([3.3531, 4.1776, 3.6790, 3.4923]) -Total rewards: 5970 -Total Receives: 5964 -Stores 4756 1 -Total Receives: 5965 -Stores 4757 1 -Total Receives: 5966 -Stores 4758 1 -Total Receives: 5967 -Stores 4759 1 -Total Receives: 5968 -Stores 4760 1 -Total Receives: 5969 -Stores 4761 1 -Total Receives: 5970 -Stores 4762 1 -tensor([2.2213, 2.0871, 2.1831]) -Total rewards: 5976 -Total Receives: 5971 -Stores 4763 1 -Total Receives: 5972 -Stores 4764 1 -Total Receives: 5973 -Stores 4765 1 -Total Receives: 5974 -Stores 4766 1 -Total Receives: 5975 -Stores 4767 1 -Total Receives: 5976 -Stores 4768 1 -tensor([2.0885, 2.0484, 2.5905]) -tensor([1.6814, 1.2120]) -loss: 2.0014763322251383e-06, td_error: 17.009769439697266, entropy: 2.3476130962371826 -Train step: 0.39261603355407715 Optimizer Step: 100 -tensor([3.3587, 1.8725]) -tensor([1.4657, 1.8451, 1.7680]) -tensor([2.4222, 2.1622, 2.3786]) -tensor([2.9309, 2.5133]) -tensor([3.4720, 2.3750]) -tensor([3.1500, 4.4433, 3.8421]) -tensor([2.2512, 2.9699, 2.7057, 3.0069, 2.7968, 2.3402]) -tensor([2.4278, 1.9490, 1.7353]) -loss: 3.2266487437482283e-07, td_error: 2.7422032356262207, entropy: 4.852029800415039 -Train step: 0.4463629722595215 Optimizer Step: 101 -tensor([3.8542, 2.3178, 3.3845, 3.2241]) -tensor([2.6326, 2.5247, 2.6204, 2.1193]) -tensor([3.0720, 3.0452]) -tensor([2.4753, 2.5260, 2.6482, 3.0752]) -tensor([1.5550, 1.3893]) -tensor([1.8738, 1.7789, 2.0001]) -tensor([0.7662, 1.1005, 0.8381, 0.6918]) -Total rewards: 5979 -Total Receives: 5977 -Stores 4769 1 -Total Receives: 5978 -Stores 4770 1 -Total Receives: 5979 -Stores 4771 1 -tensor([4.0796, 2.6224, 2.4258, 2.3231]) -tensor([4.8329, 5.1664]) -tensor([1.4309, 1.5878, 1.5553]) -tensor([5.9538, 5.3478]) -loss: 1.318530262750528e-08, td_error: 0.11205676198005676, entropy: 4.825449466705322 -Train step: 0.3704080581665039 Optimizer Step: 102 -Total rewards: 5984 -Total Receives: 5980 -Stores 4772 1 -Total Receives: 5981 -Stores 4773 1 -Total Receives: 5982 -Stores 4774 1 -Total Receives: 5983 -Stores 4775 1 -Total Receives: 5984 -Stores 4776 1 -tensor([3.7804, 2.9141]) -tensor([3.6399, 3.1629, 4.9148, 3.4179, 4.7626]) -tensor([2.7637, 2.5610, 2.1084]) -tensor([3.7794, 4.6566]) -tensor([1.9319, 1.9524, 1.5013]) -tensor([3.2491, 3.9034, 3.7009, 3.7592, 3.3395]) -tensor([3.0401, 2.6245, 2.4746, 2.3070, 3.1795]) -tensor([4.6151, 4.7279, 4.1946]) -tensor([4.1793, 4.9053, 4.6821, 4.7447]) -tensor([4.0210, 4.0253, 4.1965, 3.3481]) -tensor([4.0918, 2.1625]) -tensor([3.1885, 2.3193]) -loss: 2.8474412374634994e-06, td_error: 24.19929313659668, entropy: 3.8834643363952637 -Train step: 0.48041510581970215 Optimizer Step: 103 -tensor([3.8528, 4.3740, 4.4366]) -tensor([3.5228, 3.5228]) -tensor([3.4372, 3.5793, 2.8519]) -tensor([3.6237, 2.6568, 2.5801]) -tensor([1.4412, 1.3456]) -tensor([2.2891, 2.2476, 2.4508, 2.2050]) -tensor([0.6524, 0.6260]) -tensor([1.3076, 1.4006, 1.3286, 1.4299]) -tensor([6.1157, 5.6046, 7.3791, 7.2802]) diff --git a/diploma_thesis/d.txt b/diploma_thesis/d.txt deleted file mode 100644 index 1898124..0000000 --- a/diploma_thesis/d.txt +++ /dev/null @@ -1,117246 +0,0 @@ -Evaluating 13 candidates -Task started |Optimizer|model|AdamW_model_2 -Greedy action tensor([ 1.3941, -0.5181, -0.4233, -0.1306]) tensor([0.6545, 0.0967, 0.1063, 0.1425]) -Greedy action tensor([ 1.0746, -0.7312, -0.2550, 0.3027]) tensor([0.5288, 0.0869, 0.1399, 0.2444]) -Greedy action tensor([ 1.0763, -0.4468, -0.1480, -0.1982]) tensor([0.5582, 0.1217, 0.1641, 0.1560]) -Greedy action tensor([ 0.8020, -0.5302, -0.2302, -0.1197]) tensor([0.4955, 0.1308, 0.1765, 0.1971]) -Greedy action tensor([ 1.1618, -0.5959, -0.2931, 0.2940]) tensor([0.5477, 0.0945, 0.1278, 0.2300]) -Greedy action tensor([ 0.8787, -0.4619, -0.0081, -0.2520]) tensor([0.5009, 0.1311, 0.2064, 0.1617]) -Greedy action tensor([ 0.4771, -0.3907, 0.0087, -0.0982]) tensor([0.3834, 0.1610, 0.2400, 0.2157]) -Greedy action tensor([ 1.3909, -0.0537, -0.1733, -0.1628]) tensor([0.6036, 0.1424, 0.1263, 0.1277]) -Greedy action tensor([ 0.6848, -0.5622, -0.1214, 0.2222]) tensor([0.4231, 0.1216, 0.1889, 0.2664]) -Greedy action tensor([ 0.6612, -0.2455, 0.1232, -0.0920]) tensor([0.4067, 0.1643, 0.2375, 0.1915]) -Greedy action tensor([ 1.2104, -0.5212, -0.5049, -0.0590]) tensor([0.6105, 0.1081, 0.1098, 0.1716]) -Greedy action tensor([ 1.2076, -0.4305, -0.2627, -0.2104]) tensor([0.6001, 0.1166, 0.1379, 0.1453]) -Greedy action tensor([ 1.2096, -0.4400, -0.4561, -0.1643]) tensor([0.6119, 0.1176, 0.1157, 0.1549]) -Greedy action tensor([ 1.7129, -0.4793, -0.1988, 0.3529]) tensor([0.6596, 0.0737, 0.0975, 0.1693]) -Greedy action tensor([ 1.5609, -0.2300, -0.4035, -0.1421]) tensor([0.6715, 0.1120, 0.0942, 0.1223]) -Greedy action tensor([ 0.7321, -0.5879, -0.2661, -0.0131]) tensor([0.4739, 0.1266, 0.1746, 0.2249]) -Greedy action tensor([ 1.2741, -0.3220, -0.0033, -0.2708]) tensor([0.5901, 0.1196, 0.1645, 0.1259]) -Greedy action tensor([ 0.9800, -0.8245, -0.4487, 0.3136]) tensor([0.5214, 0.0858, 0.1250, 0.2678]) -Greedy action tensor([ 0.8128, -0.1881, 0.0513, -0.1894]) tensor([0.4542, 0.1670, 0.2121, 0.1667]) -Greedy action tensor([ 1.0827, -0.5062, -0.0883, 0.4012]) tensor([0.4950, 0.1011, 0.1535, 0.2504]) -Greedy action tensor([ 0.9316, -0.5868, 0.1784, 0.0438]) tensor([0.4759, 0.1042, 0.2241, 0.1958]) -Greedy action tensor([ 1.6456, -0.4795, -0.1799, 0.1515]) tensor([0.6645, 0.0793, 0.1071, 0.1491]) -Greedy action tensor([ 0.7240, -0.5897, -0.2076, -0.0930]) tensor([0.4752, 0.1277, 0.1872, 0.2099]) -Greedy action tensor([ 0.7023, -0.4842, -0.0636, 0.1034]) tensor([0.4311, 0.1316, 0.2004, 0.2369]) -Greedy action tensor([ 0.5718, -0.5077, -0.0411, -0.1552]) tensor([0.4229, 0.1437, 0.2291, 0.2044]) -Greedy action tensor([ 1.1937, -0.5461, -0.1461, -0.2063]) tensor([0.5938, 0.1042, 0.1555, 0.1464]) -Greedy action tensor([ 0.9986, -0.2594, 0.0771, -0.0287]) tensor([0.4902, 0.1393, 0.1951, 0.1755]) -Greedy action tensor([ 1.0512, -0.4438, -0.2149, -0.1453]) tensor([0.5530, 0.1240, 0.1559, 0.1671]) -Greedy action tensor([ 1.7162, -0.6477, -0.3591, 0.2493]) tensor([0.6896, 0.0649, 0.0865, 0.1590]) -Greedy action tensor([ 0.9748, -0.4967, -0.2224, -0.2075]) tensor([0.5440, 0.1249, 0.1643, 0.1668]) -Greedy action tensor([ 0.9070, -0.5388, -0.2664, 0.3092]) tensor([0.4773, 0.1124, 0.1477, 0.2626]) -Greedy action tensor([ 0.9091, -0.3155, 0.1422, -0.0121]) tensor([0.4637, 0.1363, 0.2154, 0.1846]) -Greedy action tensor([ 1.3863, -0.6774, -0.3443, 0.2148]) tensor([0.6196, 0.0787, 0.1098, 0.1920]) -Greedy action tensor([ 0.5707, -0.2995, 0.0358, -0.2021]) tensor([0.4055, 0.1698, 0.2375, 0.1872]) -Greedy action tensor([ 1.0184, -0.0266, -0.2569, -0.1585]) tensor([0.5157, 0.1814, 0.1440, 0.1589]) -Greedy action tensor([ 1.0556, -0.1206, -0.0543, -0.1329]) tensor([0.5147, 0.1588, 0.1697, 0.1568]) -Greedy action tensor([ 1.4516, -0.6848, -0.4530, -0.0628]) tensor([0.6725, 0.0794, 0.1001, 0.1479]) -Greedy action tensor([ 0.7118, -0.6047, -0.3019, 0.1983]) tensor([0.4486, 0.1202, 0.1628, 0.2684]) -Greedy action tensor([ 0.5998, -0.4070, 0.0663, -0.1230]) tensor([0.4103, 0.1499, 0.2407, 0.1991]) -Greedy action tensor([ 1.1992, -0.5363, 0.0291, 0.4490]) tensor([0.5105, 0.0900, 0.1584, 0.2411]) -Greedy action tensor([ 1.2636, -0.0899, -0.1293, -0.1480]) tensor([0.5713, 0.1476, 0.1419, 0.1393]) -Greedy action tensor([ 1.5656, -0.5123, -0.1168, 0.0318]) tensor([0.6549, 0.0820, 0.1218, 0.1413]) -Greedy action tensor([ 1.1180, -0.4498, -0.1337, -0.2006]) tensor([0.5675, 0.1183, 0.1623, 0.1518]) -Greedy action tensor([ 1.1367, -0.5605, -0.1578, 0.2783]) tensor([0.5316, 0.0974, 0.1457, 0.2253]) -Greedy action tensor([ 0.8512, -0.4170, -0.2459, 0.1454]) tensor([0.4742, 0.1334, 0.1583, 0.2341]) -Greedy action tensor([ 1.0779, -0.1987, -0.0235, -0.0128]) tensor([0.5135, 0.1433, 0.1707, 0.1725]) -Greedy action tensor([ 1.1344, -0.0484, -0.1770, -0.1370]) tensor([0.5387, 0.1651, 0.1452, 0.1511]) -Greedy action tensor([ 0.4438, -0.3240, 0.1245, -0.0954]) tensor([0.3605, 0.1673, 0.2620, 0.2103]) -Greedy action tensor([ 1.5533, -0.5260, -0.4731, 0.0598]) tensor([0.6750, 0.0844, 0.0890, 0.1516]) -Greedy action tensor([ 0.8734, -0.4998, -0.0985, -0.1366]) tensor([0.5010, 0.1269, 0.1896, 0.1825]) -Greedy action tensor([ 0.3891, -0.3369, 0.1708, -0.0590]) tensor([0.3417, 0.1653, 0.2747, 0.2183]) -Greedy action tensor([ 0.7476, -0.4283, -0.4535, -0.1547]) tensor([0.4963, 0.1531, 0.1493, 0.2013]) -Greedy action tensor([ 1.3800, -0.4842, -0.4041, -0.2746]) tensor([0.6604, 0.1024, 0.1109, 0.1263]) -Greedy action tensor([ 0.9296, -0.4944, -0.4032, 0.0477]) tensor([0.5212, 0.1255, 0.1375, 0.2158]) -Greedy action tensor([ 1.2162, -0.4891, -0.0533, 0.3643]) tensor([0.5293, 0.0962, 0.1487, 0.2258]) -Greedy action tensor([ 1.9805, -0.5471, -0.3911, -0.0295]) tensor([0.7650, 0.0611, 0.0714, 0.1025]) -Greedy action tensor([ 1.4343, -0.6390, -0.2108, 0.2048]) tensor([0.6207, 0.0781, 0.1198, 0.1815]) -Greedy action tensor([ 1.2855, -0.4972, -0.2718, -0.0790]) tensor([0.6119, 0.1029, 0.1289, 0.1563]) -Greedy action tensor([ 0.7220, -0.4026, -0.2182, 0.0945]) tensor([0.4446, 0.1444, 0.1736, 0.2374]) -Greedy action tensor([ 1.3947, -0.3899, -0.0023, -0.0091]) tensor([0.6021, 0.1011, 0.1489, 0.1479]) -Greedy action tensor([ 1.2216, -0.5670, -0.3728, 0.1167]) tensor([0.5877, 0.0983, 0.1193, 0.1947]) -Greedy action tensor([ 0.7907, -0.5423, -0.2006, 0.1488]) tensor([0.4627, 0.1220, 0.1717, 0.2435]) -Greedy action tensor([ 1.3061, -0.5085, -0.0245, 0.0022]) tensor([0.5887, 0.0959, 0.1556, 0.1598]) -Greedy action tensor([ 0.9159, -0.3661, -0.2431, -0.0942]) tensor([0.5114, 0.1419, 0.1605, 0.1862]) -Greedy action tensor([ 0.7823, -0.4956, -0.3556, -0.0898]) tensor([0.4957, 0.1381, 0.1589, 0.2073]) -Greedy action tensor([ 1.2632, -0.2354, -0.1928, -0.1895]) tensor([0.5915, 0.1322, 0.1379, 0.1384]) -Greedy action tensor([ 1.3361, -0.3546, -0.3874, -0.0860]) tensor([0.6234, 0.1149, 0.1112, 0.1504]) -Greedy action tensor([ 1.7840, -0.4069, -0.2517, -0.0732]) tensor([0.7150, 0.0800, 0.0934, 0.1116]) -Greedy action tensor([ 1.3254, -0.4084, -0.2051, 0.3096]) tensor([0.5697, 0.1006, 0.1233, 0.2063]) -Greedy action tensor([ 2.0954, -0.8208, -0.6414, 0.4408]) tensor([0.7633, 0.0413, 0.0494, 0.1459]) -Greedy action tensor([ 1.5626, -0.5425, -0.4895, -0.0595]) tensor([0.6907, 0.0842, 0.0887, 0.1364]) -Greedy action tensor([ 0.9113, -0.4320, -0.2673, 0.1026]) tensor([0.4965, 0.1296, 0.1528, 0.2212]) -Greedy action tensor([ 1.4044, -0.5821, -0.1102, 0.1982]) tensor([0.6037, 0.0828, 0.1328, 0.1807]) -Greedy action tensor([ 1.2522, -0.6511, -0.4548, 0.2720]) tensor([0.5863, 0.0874, 0.1064, 0.2200]) -Greedy action tensor([ 1.0613, -0.4082, -0.2680, -0.0884]) tensor([0.5520, 0.1270, 0.1461, 0.1748]) -Greedy action tensor([ 0.7839, -0.6337, -0.3302, 0.2720]) tensor([0.4609, 0.1117, 0.1512, 0.2762]) -Greedy action tensor([ 0.8671, -0.3861, 0.0653, -0.1554]) tensor([0.4776, 0.1364, 0.2142, 0.1718]) -Greedy action tensor([ 0.8660, -0.3320, 0.0433, -0.3034]) tensor([0.4874, 0.1471, 0.2141, 0.1514]) -Greedy action tensor([ 1.3409, -0.3970, -0.0597, -0.1090]) tensor([0.6035, 0.1062, 0.1487, 0.1416]) -Greedy action tensor([ 0.8271, -0.4723, -0.1621, 0.0701]) tensor([0.4731, 0.1290, 0.1759, 0.2219]) -Greedy action tensor([ 1.5033, -0.7476, -0.2746, 0.2604]) tensor([0.6399, 0.0674, 0.1081, 0.1846]) -Task started |NN|model|Baseline_model_0 -Greedy action tensor([-1.4667, -0.5330, 0.4426, 0.2889]) tensor([0.0622, 0.1582, 0.4197, 0.3599]) -Greedy action tensor([-1.8186, -0.9147, 0.1974, -0.4607]) tensor([0.0673, 0.1661, 0.5051, 0.2616]) -Greedy action tensor([-1.0251, -0.5848, 0.2659, 0.2196]) tensor([0.1035, 0.1608, 0.3764, 0.3594]) -Greedy action tensor([-0.9842, -0.5998, 0.3317, 0.2235]) tensor([0.1048, 0.1539, 0.3907, 0.3506]) -Greedy action tensor([-1.6203, -0.7356, -0.0131, -0.4096]) tensor([0.0850, 0.2059, 0.4240, 0.2852]) -Greedy action tensor([-1.8603, -0.6563, 0.8266, 0.2134]) tensor([0.0371, 0.1236, 0.5445, 0.2949]) -Greedy action tensor([-1.8926, -0.6885, 0.2878, -0.2047]) tensor([0.0538, 0.1793, 0.4760, 0.2909]) -Greedy action tensor([-1.5246, 0.0642, 0.6627, -0.5539]) tensor([0.0573, 0.2807, 0.5107, 0.1513]) -Greedy action tensor([-1.9947, -0.9166, 0.3176, -0.3057]) tensor([0.0514, 0.1511, 0.5191, 0.2783]) -Greedy action tensor([-1.9023, -0.7375, 0.5782, 0.0388]) tensor([0.0433, 0.1386, 0.5168, 0.3013]) -Greedy action tensor([-1.8783, -0.8523, 0.4109, -0.1860]) tensor([0.0524, 0.1462, 0.5169, 0.2846]) -Greedy action tensor([-0.9344, -0.5764, 0.2879, 0.1939]) tensor([0.1122, 0.1604, 0.3808, 0.3466]) -Greedy action tensor([-1.2549, -0.5575, 0.4029, 0.1658]) tensor([0.0807, 0.1620, 0.4233, 0.3340]) -Greedy action tensor([-1.0720, -0.2934, 0.3865, -0.2634]) tensor([0.1029, 0.2240, 0.4422, 0.2309]) -Greedy action tensor([-1.3362, -0.5513, 0.4114, -0.0082]) tensor([0.0787, 0.1725, 0.4518, 0.2970]) -Greedy action tensor([-1.8843, -0.7396, 0.3183, -0.2234]) tensor([0.0542, 0.1702, 0.4903, 0.2852]) -Greedy action tensor([-1.2512, -0.4743, 0.7316, -0.2833]) tensor([0.0765, 0.1664, 0.5557, 0.2014]) -Greedy action tensor([-1.7926, -0.9297, 0.2107, -0.4342]) tensor([0.0682, 0.1615, 0.5052, 0.2651]) -Greedy action tensor([-1.6743, -0.5539, 0.2695, -0.2524]) tensor([0.0658, 0.2018, 0.4597, 0.2728]) -Greedy action tensor([-1.4250, -0.1429, 0.4870, -0.2070]) tensor([0.0678, 0.2443, 0.4587, 0.2292]) -Greedy action tensor([-1.8786, -0.5532, 0.7574, 0.0128]) tensor([0.0394, 0.1485, 0.5506, 0.2615]) -Greedy action tensor([-1.3504, -0.0760, 0.4989, -0.2656]) tensor([0.0720, 0.2575, 0.4575, 0.2130]) -Greedy action tensor([-1.9494, -0.9670, 0.2940, -0.3323]) tensor([0.0551, 0.1473, 0.5197, 0.2778]) -Greedy action tensor([-1.5713, -0.6172, 0.1450, -0.3172]) tensor([0.0790, 0.2050, 0.4393, 0.2767]) -Greedy action tensor([-2.0512, -0.8905, 0.6887, -0.0045]) tensor([0.0365, 0.1164, 0.5648, 0.2824]) -Greedy action tensor([-1.6741, -0.6704, 0.7129, 0.3125]) tensor([0.0457, 0.1246, 0.4969, 0.3329]) -Greedy action tensor([-1.8771, -0.5768, 0.5631, -0.1093]) tensor([0.0454, 0.1668, 0.5215, 0.2662]) -Greedy action tensor([-1.1137, -0.6096, 0.4579, 0.6577]) tensor([0.0749, 0.1240, 0.3607, 0.4404]) -Greedy action tensor([-1.6578, 0.1544, 0.4869, -0.1708]) tensor([0.0498, 0.3049, 0.4251, 0.2202]) -Greedy action tensor([-2.0207, -0.7599, 0.8429, 0.0814]) tensor([0.0331, 0.1167, 0.5796, 0.2707]) -Greedy action tensor([-1.8452, -0.7900, 0.8342, 0.1296]) tensor([0.0390, 0.1120, 0.5682, 0.2808]) -Greedy action tensor([-1.6453, -0.6205, 0.3230, -0.0805]) tensor([0.0636, 0.1772, 0.4552, 0.3040]) -Greedy action tensor([-1.5365, -0.7130, -0.0461, -0.4290]) tensor([0.0931, 0.2121, 0.4131, 0.2817]) -Greedy action tensor([-1.9381, -0.6910, 0.3804, -0.1104]) tensor([0.0479, 0.1668, 0.4871, 0.2981]) -Greedy action tensor([-1.0760, 0.0312, -0.1267, -0.2826]) tensor([0.1134, 0.3430, 0.2929, 0.2507]) -Greedy action tensor([-0.8967, 0.5014, 0.2478, -0.3441]) tensor([0.1007, 0.4078, 0.3164, 0.1751]) -Greedy action tensor([-1.6150, -0.7227, 0.6228, -0.1266]) tensor([0.0580, 0.1415, 0.5436, 0.2569]) -Greedy action tensor([-1.8576, -0.9221, 0.3358, -0.4585]) tensor([0.0604, 0.1538, 0.5412, 0.2446]) -Greedy action tensor([-2.0414, -0.7496, 0.9250, 0.1379]) tensor([0.0304, 0.1106, 0.5903, 0.2687]) -Greedy action tensor([-1.0677, -0.0112, 0.4009, -0.3071]) tensor([0.0965, 0.2777, 0.4193, 0.2065]) -Greedy action tensor([-1.4864, 0.3017, 0.6038, -0.5207]) tensor([0.0565, 0.3379, 0.4571, 0.1485]) -Greedy action tensor([-1.1268, -0.0311, 0.5192, -0.4773]) tensor([0.0902, 0.2697, 0.4676, 0.1726]) -Greedy action tensor([-1.7689, -0.6209, 0.7985, 0.1854]) tensor([0.0413, 0.1300, 0.5375, 0.2912]) -Greedy action tensor([-1.6960, -0.9684, 0.0757, -0.8181]) tensor([0.0881, 0.1823, 0.5178, 0.2118]) -Greedy action tensor([-1.5922, -0.6559, 0.6625, -0.1835]) tensor([0.0582, 0.1485, 0.5551, 0.2382]) -Greedy action tensor([-1.5238, -0.7825, -0.1804, -0.4045]) tensor([0.1001, 0.2100, 0.3835, 0.3065]) -Greedy action tensor([-1.2686, 0.2582, 0.2948, -0.5866]) tensor([0.0809, 0.3726, 0.3864, 0.1601]) -Greedy action tensor([-1.9758, -0.8797, 0.3688, -0.1212]) tensor([0.0481, 0.1438, 0.5012, 0.3070]) -Greedy action tensor([-1.6848, 0.1049, 0.4124, -0.0400]) tensor([0.0492, 0.2948, 0.4009, 0.2550]) -Greedy action tensor([-2.0345, -0.7951, 0.4680, -0.1024]) tensor([0.0424, 0.1465, 0.5181, 0.2929]) -Greedy action tensor([-2.0532, -0.7103, 0.6721, 0.1445]) tensor([0.0344, 0.1316, 0.5245, 0.3095]) -Greedy action tensor([-1.9923, -0.9602, 0.4537, -0.2008]) tensor([0.0468, 0.1315, 0.5407, 0.2810]) -Greedy action tensor([-1.3556, -0.5750, 0.4158, 0.2190]) tensor([0.0720, 0.1571, 0.4232, 0.3476]) -Greedy action tensor([-1.8585, -0.7320, 0.2704, -0.2591]) tensor([0.0573, 0.1769, 0.4820, 0.2838]) -Greedy action tensor([-1.1254, -0.3644, -0.1042, -0.0149]) tensor([0.1117, 0.2391, 0.3101, 0.3391]) -Greedy action tensor([-1.3951, -0.4502, 0.5376, -0.2841]) tensor([0.0740, 0.1903, 0.5110, 0.2247]) -Greedy action tensor([-1.9168, -0.5500, 0.5734, -0.1912]) tensor([0.0442, 0.1736, 0.5337, 0.2485]) -Greedy action tensor([-1.1413, -0.5771, 0.2392, 0.2985]) tensor([0.0913, 0.1605, 0.3630, 0.3852]) -Greedy action tensor([-1.2022, -0.3702, -0.0717, -0.3194]) tensor([0.1135, 0.2608, 0.3514, 0.2743]) -Greedy action tensor([-1.6684, -0.6215, 0.5552, 0.0458]) tensor([0.0536, 0.1528, 0.4957, 0.2978]) -Greedy action tensor([-1.3022, -0.5174, 0.5819, -0.4659]) tensor([0.0828, 0.1815, 0.5447, 0.1910]) -Greedy action tensor([-1.7359, -0.9460, 0.4013, -0.3229]) tensor([0.0633, 0.1396, 0.5369, 0.2602]) -Greedy action tensor([-1.3991, -0.6247, 0.4185, 0.1489]) tensor([0.0713, 0.1546, 0.4389, 0.3352]) -Greedy action tensor([-2.0467, -0.9021, 0.6729, -0.0076]) tensor([0.0370, 0.1163, 0.5620, 0.2846]) -Greedy action tensor([-1.5619, -0.6278, 0.5737, 0.0384]) tensor([0.0590, 0.1500, 0.4989, 0.2921]) -Greedy action tensor([-1.4997, -0.5800, 0.7644, -0.0297]) tensor([0.0572, 0.1435, 0.5505, 0.2488]) -Greedy action tensor([-0.8937, -0.5155, 0.4678, -0.3299]) tensor([0.1232, 0.1798, 0.4806, 0.2164]) -Greedy action tensor([-1.6120, -0.2935, 0.5542, -0.2026]) tensor([0.0570, 0.2129, 0.4970, 0.2332]) -Greedy action tensor([-1.3773, -0.6096, 0.3761, 0.1398]) tensor([0.0741, 0.1598, 0.4281, 0.3380]) -Greedy action tensor([-1.2801, -0.5673, 0.3240, 0.2004]) tensor([0.0806, 0.1644, 0.4008, 0.3542]) -Greedy action tensor([-1.7016, -0.4977, 0.5489, 0.0326]) tensor([0.0513, 0.1710, 0.4870, 0.2906]) -Greedy action tensor([-2.0455, -0.9388, 0.5801, -0.1581]) tensor([0.0409, 0.1238, 0.5652, 0.2701]) -Greedy action tensor([-1.0268, -0.4605, 0.4941, -0.3500]) tensor([0.1075, 0.1893, 0.4918, 0.2114]) -Greedy action tensor([-1.2365, -0.5856, 0.3436, 0.0867]) tensor([0.0867, 0.1663, 0.4212, 0.3258]) -Greedy action tensor([-2.0533, -0.8805, 0.7173, 0.0415]) tensor([0.0353, 0.1141, 0.5638, 0.2868]) -Greedy action tensor([-1.5462, -0.8462, 0.4010, -0.4442]) tensor([0.0767, 0.1545, 0.5378, 0.2310]) -Greedy action tensor([-1.8013, -0.6718, 0.7081, 0.1252]) tensor([0.0430, 0.1330, 0.5288, 0.2952]) -Greedy action tensor([-1.4784, -0.5824, 0.8215, 0.0105]) tensor([0.0560, 0.1372, 0.5586, 0.2482]) -Greedy action tensor([-1.6905, -0.6361, 0.0656, -0.3203]) tensor([0.0736, 0.2111, 0.4258, 0.2895]) -Greedy action tensor([-1.5948, -0.8602, 0.7522, -0.1097]) tensor([0.0557, 0.1161, 0.5823, 0.2459]) -Greedy action tensor([-1.4811, -0.6042, 0.4937, -0.0685]) tensor([0.0680, 0.1633, 0.4896, 0.2791]) -Task started |NN|model|Tanh_model_2 -Greedy action tensor([ 1.7005, 0.2508, -0.4698, 0.8906]) tensor([0.5575, 0.1308, 0.0636, 0.2480]) -Greedy action tensor([ 1.2838, -0.7745, 1.2436, 0.4575]) tensor([0.3959, 0.0505, 0.3803, 0.1733]) -Greedy action tensor([ 1.6697, -0.1964, -0.1741, 1.5811]) tensor([0.4488, 0.0694, 0.0710, 0.4107]) -Greedy action tensor([ 1.4701, -0.6168, 0.1719, 0.7304]) tensor([0.5335, 0.0662, 0.1457, 0.2546]) -Greedy action tensor([ 0.7372, -1.2204, 0.3732, 0.6271]) tensor([0.3661, 0.0517, 0.2544, 0.3279]) -Greedy action tensor([ 1.7815, -0.8205, -0.1083, 1.0903]) tensor([0.5793, 0.0429, 0.0875, 0.2902]) -Greedy action tensor([1.4651, 0.2898, 1.2829, 0.7550]) tensor([0.3797, 0.1172, 0.3164, 0.1867]) -Greedy action tensor([ 1.9359, -0.8256, 0.4398, 1.6716]) tensor([0.4866, 0.0308, 0.1090, 0.3736]) -Greedy action tensor([1.4642, 0.0434, 1.4493, 0.3066]) tensor([0.3935, 0.0951, 0.3877, 0.1237]) -Greedy action tensor([ 1.0831, -0.1867, 1.5107, 1.0481]) tensor([0.2646, 0.0743, 0.4057, 0.2554]) -Greedy action tensor([ 1.8656, -0.2281, 0.1885, 0.2274]) tensor([0.6647, 0.0819, 0.1242, 0.1292]) -Greedy action tensor([ 1.3853, -0.0966, -0.1870, 1.5115]) tensor([0.3892, 0.0884, 0.0808, 0.4416]) -Greedy action tensor([ 1.6405, 1.1969, -0.5382, 0.2295]) tensor([0.5003, 0.3210, 0.0566, 0.1220]) -Greedy action tensor([ 1.7404, -0.1112, 1.2296, 0.8221]) tensor([0.4638, 0.0728, 0.2783, 0.1851]) -Greedy action tensor([ 1.5079, -0.7539, 0.5595, 1.0442]) tensor([0.4716, 0.0491, 0.1827, 0.2966]) -Greedy action tensor([ 1.9757, -1.0771, 0.1389, 0.7631]) tensor([0.6649, 0.0314, 0.1059, 0.1978]) -Greedy action tensor([ 1.9492, -0.3008, -0.4525, 1.2097]) tensor([0.5976, 0.0630, 0.0541, 0.2853]) -Greedy action tensor([ 1.2130, -0.4375, 0.4049, 0.4034]) tensor([0.4802, 0.0922, 0.2140, 0.2137]) -Greedy action tensor([2.2348, 0.0165, 0.6477, 1.6614]) tensor([0.5328, 0.0580, 0.1090, 0.3003]) -Greedy action tensor([ 1.4241, -0.7186, 1.8253, 0.5223]) tensor([0.3315, 0.0389, 0.4951, 0.1345]) -Greedy action tensor([ 1.6503, -0.4744, 0.6532, 1.9881]) tensor([0.3460, 0.0413, 0.1276, 0.4850]) -Greedy action tensor([ 1.4883, -0.1492, 1.1372, 1.3486]) tensor([0.3613, 0.0703, 0.2543, 0.3142]) -Greedy action tensor([ 2.0374, -0.2718, -0.0858, 0.8279]) tensor([0.6590, 0.0655, 0.0789, 0.1966]) -Greedy action tensor([1.2438, 0.0051, 0.6415, 0.9730]) tensor([0.3846, 0.1114, 0.2106, 0.2934]) -Greedy action tensor([ 1.9818, -0.1957, 0.2926, 1.3079]) tensor([0.5532, 0.0627, 0.1022, 0.2820]) -Greedy action tensor([ 1.2588, -0.2214, 0.5159, 0.9990]) tensor([0.4041, 0.0920, 0.1923, 0.3117]) -Greedy action tensor([ 2.3093, -0.2860, 1.0040, 1.6451]) tensor([0.5375, 0.0401, 0.1457, 0.2767]) -Greedy action tensor([0.9778, 0.1383, 0.0534, 1.0064]) tensor([0.3499, 0.1511, 0.1388, 0.3601]) -Greedy action tensor([1.9973, 1.0636, 0.8783, 1.0369]) tensor([0.4756, 0.1870, 0.1553, 0.1820]) -Greedy action tensor([ 1.7576, -0.1076, -0.2140, 0.8306]) tensor([0.5918, 0.0916, 0.0824, 0.2342]) -Greedy action tensor([ 0.8107, -0.7060, 0.8636, 0.4190]) tensor([0.3390, 0.0744, 0.3574, 0.2292]) -Greedy action tensor([ 1.5125, -0.5862, 1.0781, 1.6269]) tensor([0.3458, 0.0424, 0.2240, 0.3878]) -Greedy action tensor([ 1.2016, -0.3483, 0.2805, 0.9486]) tensor([0.4190, 0.0889, 0.1668, 0.3253]) -Greedy action tensor([ 2.0303, -0.3278, 0.7278, 1.9919]) tensor([0.4294, 0.0406, 0.1167, 0.4132]) -Greedy action tensor([ 1.5349, -0.1410, 1.3479, 1.1843]) tensor([0.3675, 0.0688, 0.3048, 0.2588]) -Greedy action tensor([ 2.2596, -0.4594, 0.6857, 1.8959]) tensor([0.5080, 0.0335, 0.1053, 0.3532]) -Greedy action tensor([ 1.2179, -0.4458, 0.2004, 0.8163]) tensor([0.4504, 0.0853, 0.1628, 0.3014]) -Greedy action tensor([ 0.7008, -0.2277, 0.8816, 0.1906]) tensor([0.3131, 0.1237, 0.3752, 0.1880]) -Greedy action tensor([1.8859, 0.1811, 0.8597, 1.0190]) tensor([0.5101, 0.0927, 0.1828, 0.2144]) -Greedy action tensor([ 1.2262, -1.2257, 1.6916, 1.0789]) tensor([0.2823, 0.0243, 0.4497, 0.2437]) -Greedy action tensor([ 2.2179, -0.5210, 0.4405, 1.7579]) tensor([0.5362, 0.0347, 0.0907, 0.3385]) -Greedy action tensor([1.9601, 0.1903, 0.5759, 1.0271]) tensor([0.5512, 0.0939, 0.1381, 0.2168]) -Greedy action tensor([ 2.1336, -0.2392, 0.2886, 0.7108]) tensor([0.6701, 0.0625, 0.1059, 0.1615]) -Greedy action tensor([ 1.8785, -0.6526, 1.5226, 0.9995]) tensor([0.4555, 0.0362, 0.3191, 0.1891]) -Greedy action tensor([ 1.0201, -1.0709, 0.5488, 0.8385]) tensor([0.3873, 0.0479, 0.2418, 0.3230]) -Greedy action tensor([ 1.6647, -1.0266, 1.0748, 1.4904]) tensor([0.4061, 0.0275, 0.2252, 0.3412]) -Greedy action tensor([ 2.3802, -0.8082, 0.7673, 2.0446]) tensor([0.5114, 0.0211, 0.1019, 0.3656]) -Greedy action tensor([0.9639, 0.4208, 0.5869, 1.1124]) tensor([0.2918, 0.1695, 0.2001, 0.3385]) -Greedy action tensor([ 1.6817, -0.5786, 0.0746, 0.3866]) tensor([0.6335, 0.0661, 0.1270, 0.1735]) -Greedy action tensor([1.8996, 0.4726, 1.5488, 0.6350]) tensor([0.4491, 0.1078, 0.3162, 0.1268]) -Greedy action tensor([1.8009, 0.4370, 1.5265, 1.2399]) tensor([0.3866, 0.0989, 0.2939, 0.2206]) -Greedy action tensor([ 2.0351, -0.6312, -0.2101, 0.8656]) tensor([0.6730, 0.0468, 0.0713, 0.2090]) -Greedy action tensor([ 1.5450, 0.2730, -0.6672, 0.8966]) tensor([0.5228, 0.1465, 0.0572, 0.2734]) -Greedy action tensor([ 1.6011, -0.7967, 0.4310, 0.4616]) tensor([0.5810, 0.0528, 0.1803, 0.1859]) -Greedy action tensor([ 1.1927, -0.6487, 1.7955, 0.5025]) tensor([0.2867, 0.0455, 0.5240, 0.1438]) -Greedy action tensor([0.8131, 0.5999, 0.7017, 1.1308]) tensor([0.2453, 0.1982, 0.2194, 0.3371]) -Greedy action tensor([ 1.3665, -0.9554, 1.6151, 0.4438]) tensor([0.3600, 0.0353, 0.4616, 0.1431]) -Greedy action tensor([ 0.7086, 0.1696, -0.6287, 1.6700]) tensor([0.2241, 0.1307, 0.0589, 0.5863]) -Greedy action tensor([1.1687, 0.1108, 0.7556, 1.3510]) tensor([0.3116, 0.1082, 0.2062, 0.3740]) -Greedy action tensor([ 1.1303, -1.0816, 0.7594, 1.1862]) tensor([0.3500, 0.0383, 0.2415, 0.3701]) -Greedy action tensor([ 1.5685, -0.9882, -0.0279, 0.8654]) tensor([0.5633, 0.0437, 0.1141, 0.2789]) -Greedy action tensor([ 1.2407, -0.1582, 0.6029, 0.8563]) tensor([0.4071, 0.1005, 0.2152, 0.2772]) -Greedy action tensor([ 1.2187, -1.2356, 0.3589, 1.7716]) tensor([0.3079, 0.0265, 0.1303, 0.5353]) -Greedy action tensor([ 1.8033, -0.3962, 0.9692, 1.0359]) tensor([0.4977, 0.0552, 0.2161, 0.2310]) -Greedy action tensor([ 1.1645, -0.5965, -0.5427, 1.1673]) tensor([0.4244, 0.0730, 0.0770, 0.4256]) -Greedy action tensor([ 1.7117, -0.1540, -0.1339, 1.0204]) tensor([0.5514, 0.0853, 0.0871, 0.2762]) -Greedy action tensor([ 1.1654, 0.1792, -0.9090, 0.3609]) tensor([0.5139, 0.1917, 0.0646, 0.2299]) -Greedy action tensor([ 0.9246, -0.4625, 1.7655, 0.9826]) tensor([0.2161, 0.0540, 0.5010, 0.2290]) -Greedy action tensor([ 0.7349, -0.1690, 0.9105, 1.1626]) tensor([0.2421, 0.0980, 0.2886, 0.3713]) -Greedy action tensor([0.7710, 0.4911, 0.5907, 0.7887]) tensor([0.2771, 0.2094, 0.2314, 0.2820]) -Greedy action tensor([ 1.1999, 0.1913, -0.9111, 0.5674]) tensor([0.4958, 0.1808, 0.0600, 0.2634]) -Greedy action tensor([ 1.0079, -0.5245, 1.0410, 0.7419]) tensor([0.3316, 0.0716, 0.3427, 0.2541]) -Greedy action tensor([ 0.6510, -0.6300, 0.8869, 0.8229]) tensor([0.2680, 0.0744, 0.3393, 0.3183]) -Greedy action tensor([ 1.8350, -0.5892, -0.3674, 1.2735]) tensor([0.5651, 0.0500, 0.0625, 0.3223]) -Greedy action tensor([1.6306, 0.3322, 0.6016, 0.5123]) tensor([0.5109, 0.1395, 0.1826, 0.1670]) -Greedy action tensor([ 1.3269, -0.6842, 0.9625, 0.2726]) tensor([0.4594, 0.0615, 0.3191, 0.1601]) -Greedy action tensor([2.2662, 0.1843, 0.6263, 1.3623]) tensor([0.5801, 0.0723, 0.1125, 0.2350]) -Greedy action tensor([ 1.9664, -0.2705, 0.3956, 1.3825]) tensor([0.5341, 0.0570, 0.1110, 0.2979]) -Greedy action tensor([ 1.5997, -0.9127, 0.9740, 1.3283]) tensor([0.4205, 0.0341, 0.2249, 0.3205]) -Greedy action tensor([ 0.7922, -0.0567, 0.3102, 0.5087]) tensor([0.3573, 0.1529, 0.2207, 0.2691]) -Greedy action tensor([ 1.5557, -0.5501, 0.2220, 0.8226]) tensor([0.5360, 0.0653, 0.1412, 0.2575]) -Greedy action tensor([ 2.1044, -0.3116, 0.6512, 1.8548]) tensor([0.4757, 0.0425, 0.1112, 0.3706]) -Task started |NN|model|Relu_model_1 -Greedy action tensor([ 0.4599, -0.0402, -0.0223, -0.2500]) tensor([0.3683, 0.2233, 0.2274, 0.1811]) -Greedy action tensor([ 0.5547, -0.2145, -0.0222, -0.4202]) tensor([0.4163, 0.1929, 0.2338, 0.1570]) -Greedy action tensor([ 0.3184, 0.1479, -0.0023, -0.3334]) tensor([0.3236, 0.2729, 0.2348, 0.1686]) -Greedy action tensor([ 0.2994, 0.0473, 0.1314, -0.2154]) tensor([0.3105, 0.2413, 0.2625, 0.1856]) -Greedy action tensor([ 0.4731, -0.1568, 0.0563, -0.2758]) tensor([0.3753, 0.1999, 0.2474, 0.1775]) -Greedy action tensor([ 0.4009, 0.1721, 0.1616, -0.1310]) tensor([0.3154, 0.2509, 0.2483, 0.1853]) -Greedy action tensor([ 0.2273, 0.0022, 0.2433, -0.0831]) tensor([0.2819, 0.2250, 0.2864, 0.2067]) -Greedy action tensor([ 0.5901, -0.0949, 0.0474, -0.3083]) tensor([0.4012, 0.2022, 0.2332, 0.1634]) -Greedy action tensor([ 0.2278, 0.0930, 0.1282, -0.1931]) tensor([0.2911, 0.2544, 0.2635, 0.1911]) -Greedy action tensor([ 0.2218, 0.0498, 0.1383, -0.1207]) tensor([0.2880, 0.2425, 0.2650, 0.2045]) -Greedy action tensor([ 0.3753, -0.0177, 0.0973, -0.1061]) tensor([0.3278, 0.2213, 0.2483, 0.2026]) -Greedy action tensor([ 0.3672, 0.0799, 0.0330, -0.3138]) tensor([0.3364, 0.2524, 0.2409, 0.1703]) -Greedy action tensor([ 0.3601, 0.0374, 0.0590, -0.1291]) tensor([0.3250, 0.2353, 0.2405, 0.1992]) -Greedy action tensor([ 0.4413, -0.1248, 0.1003, -0.4231]) tensor([0.3703, 0.2103, 0.2634, 0.1560]) -Greedy action tensor([ 0.6407, -0.1185, 0.1090, -0.2212]) tensor([0.4036, 0.1889, 0.2371, 0.1704]) -Greedy action tensor([ 0.3975, 0.1062, 0.0775, -0.1900]) tensor([0.3301, 0.2467, 0.2397, 0.1835]) -Greedy action tensor([ 0.4382, 0.1616, 0.1876, -0.2568]) tensor([0.3294, 0.2498, 0.2564, 0.1644]) -Greedy action tensor([ 0.3352, -0.1642, -0.0340, -0.3667]) tensor([0.3579, 0.2172, 0.2474, 0.1774]) -Greedy action tensor([ 0.2187, 0.0989, 0.1387, -0.1885]) tensor([0.2877, 0.2552, 0.2656, 0.1915]) -Greedy action tensor([ 0.4148, -0.1168, 0.2245, -0.3057]) tensor([0.3447, 0.2026, 0.2850, 0.1677]) -Greedy action tensor([ 0.3372, 0.2381, 0.1793, -0.0957]) tensor([0.2934, 0.2657, 0.2505, 0.1903]) -Greedy action tensor([ 0.4747, -0.0555, 0.1577, -0.4051]) tensor([0.3661, 0.2154, 0.2666, 0.1519]) -Greedy action tensor([ 0.3249, 0.0994, 0.2067, -0.3029]) tensor([0.3105, 0.2478, 0.2759, 0.1657]) -Greedy action tensor([ 0.3013, -0.0351, 0.2405, -0.2664]) tensor([0.3104, 0.2217, 0.2921, 0.1759]) -Greedy action tensor([ 0.2308, 0.0441, 0.2108, -0.1561]) tensor([0.2866, 0.2378, 0.2809, 0.1947]) -Greedy action tensor([ 0.4226, 0.0404, 0.1334, -0.1858]) tensor([0.3361, 0.2293, 0.2517, 0.1829]) -Greedy action tensor([ 0.2713, 0.1925, 0.1860, -0.1644]) tensor([0.2866, 0.2649, 0.2632, 0.1854]) -Greedy action tensor([ 0.2040, 0.0993, 0.0989, -0.1158]) tensor([0.2835, 0.2553, 0.2552, 0.2059]) -Greedy action tensor([ 0.5180, -0.1420, 0.1244, -0.3376]) tensor([0.3822, 0.1975, 0.2578, 0.1624]) -Greedy action tensor([ 0.3417, 0.0704, 0.2385, -0.0925]) tensor([0.3019, 0.2302, 0.2723, 0.1956]) -Greedy action tensor([ 0.2273, 0.0418, 0.1059, -0.1912]) tensor([0.2963, 0.2462, 0.2625, 0.1950]) -Greedy action tensor([ 0.2527, 0.1853, 0.0842, -0.1741]) tensor([0.2913, 0.2724, 0.2462, 0.1901]) -Greedy action tensor([ 0.6312, -0.2108, -0.0590, -0.4674]) tensor([0.4414, 0.1902, 0.2213, 0.1471]) -Greedy action tensor([ 0.2178, 0.0656, 0.1746, -0.2491]) tensor([0.2904, 0.2494, 0.2781, 0.1821]) -Greedy action tensor([ 0.4549, -0.2298, 0.0982, -0.2052]) tensor([0.3675, 0.1853, 0.2573, 0.1899]) -Greedy action tensor([ 0.3872, 0.1818, 0.0965, -0.2660]) tensor([0.3244, 0.2642, 0.2426, 0.1688]) -Greedy action tensor([ 0.5990, -0.2200, -0.0102, -0.3666]) tensor([0.4228, 0.1864, 0.2299, 0.1610]) -Greedy action tensor([ 0.2733, 0.0335, 0.0010, -0.0759]) tensor([0.3074, 0.2418, 0.2341, 0.2167]) -Greedy action tensor([ 0.2726, 0.1018, 0.2229, -0.1135]) tensor([0.2878, 0.2426, 0.2739, 0.1956]) -Greedy action tensor([ 0.4952, -0.1643, 0.1867, -0.4561]) tensor([0.3791, 0.1960, 0.2785, 0.1464]) -Greedy action tensor([ 0.4961, -0.0199, 0.0744, -0.2003]) tensor([0.3635, 0.2170, 0.2384, 0.1811]) -Greedy action tensor([ 0.7020, -0.1561, 0.0592, -0.5181]) tensor([0.4454, 0.1889, 0.2342, 0.1315]) -Greedy action tensor([ 0.3717, 0.1297, 0.1787, -0.2413]) tensor([0.3173, 0.2491, 0.2616, 0.1719]) -Greedy action tensor([ 0.4238, -0.2265, 0.2012, -0.5291]) tensor([0.3693, 0.1927, 0.2956, 0.1424]) -Greedy action tensor([ 0.3742, -0.3027, -0.0900, -0.4624]) tensor([0.3891, 0.1977, 0.2446, 0.1685]) -Greedy action tensor([ 0.5096, 0.0243, 0.0900, -0.3351]) tensor([0.3700, 0.2278, 0.2432, 0.1590]) -Greedy action tensor([ 0.5140, -0.0909, 0.0755, -0.2315]) tensor([0.3751, 0.2049, 0.2420, 0.1780]) -Greedy action tensor([ 0.1845, 0.1338, 0.2005, -0.2414]) tensor([0.2762, 0.2626, 0.2807, 0.1804]) -Greedy action tensor([ 0.8148, -0.1227, -0.0816, -0.6249]) tensor([0.4910, 0.1923, 0.2004, 0.1164]) -Greedy action tensor([ 0.3059, 0.0947, 0.1857, -0.1471]) tensor([0.3001, 0.2430, 0.2661, 0.1908]) -Greedy action tensor([ 0.1927, 0.0904, 0.1906, -0.1917]) tensor([0.2792, 0.2520, 0.2786, 0.1901]) -Greedy action tensor([ 0.3896, 0.0140, 0.0983, -0.2098]) tensor([0.3352, 0.2302, 0.2505, 0.1841]) -Greedy action tensor([ 0.3189, 0.0310, 0.0876, -0.1257]) tensor([0.3140, 0.2355, 0.2492, 0.2013]) -Greedy action tensor([ 0.3129, -0.0911, -0.0366, -0.3766]) tensor([0.3479, 0.2323, 0.2453, 0.1746]) -Greedy action tensor([ 0.4181, -0.0550, 0.1617, -0.3654]) tensor([0.3504, 0.2183, 0.2712, 0.1601]) -Greedy action tensor([ 0.7592, -0.2381, 0.1354, -0.4351]) tensor([0.4530, 0.1671, 0.2427, 0.1372]) -Greedy action tensor([ 0.5958, -0.3150, 0.0575, -0.5764]) tensor([0.4356, 0.1752, 0.2543, 0.1349]) -Greedy action tensor([ 0.2413, 0.0645, 0.0339, -0.3059]) tensor([0.3097, 0.2595, 0.2517, 0.1792]) -Greedy action tensor([ 0.2731, -0.0886, 0.1408, -0.2327]) tensor([0.3149, 0.2193, 0.2759, 0.1899]) -Greedy action tensor([ 0.3500, 0.1477, 0.1683, -0.2187]) tensor([0.3108, 0.2539, 0.2592, 0.1760]) -Greedy action tensor([ 0.7233, -0.2666, -0.1531, -0.4462]) tensor([0.4766, 0.1771, 0.1984, 0.1480]) -Greedy action tensor([ 0.4757, -0.1394, -0.0766, -0.4395]) tensor([0.3974, 0.2148, 0.2287, 0.1591]) -Greedy action tensor([ 0.5101, 0.0938, 0.0939, -0.3268]) tensor([0.3634, 0.2396, 0.2397, 0.1574]) -Greedy action tensor([ 0.3593, 0.0392, 0.0834, -0.1764]) tensor([0.3257, 0.2365, 0.2472, 0.1906]) -Greedy action tensor([ 0.2355, -0.0095, -0.0062, -0.2000]) tensor([0.3111, 0.2435, 0.2443, 0.2012]) -Greedy action tensor([ 0.4559, 0.0863, 0.1537, -0.2699]) tensor([0.3432, 0.2371, 0.2537, 0.1661]) -Greedy action tensor([ 0.4018, 0.1021, -0.0052, 0.0295]) tensor([0.3230, 0.2394, 0.2150, 0.2226]) -Greedy action tensor([ 0.6203, -0.1044, 0.0513, -0.4487]) tensor([0.4177, 0.2024, 0.2365, 0.1434]) -Greedy action tensor([ 0.8830, -0.7684, 0.0709, -0.6248]) tensor([0.5385, 0.1033, 0.2391, 0.1192]) -Greedy action tensor([ 1.1892, -0.7536, 0.1218, -0.8327]) tensor([0.6174, 0.0885, 0.2123, 0.0818]) -Greedy action tensor([ 0.4966, -0.0012, 0.0319, -0.2250]) tensor([0.3673, 0.2233, 0.2308, 0.1785]) -Greedy action tensor([ 0.1911, -0.0875, 0.1133, -0.1319]) tensor([0.2936, 0.2222, 0.2716, 0.2126]) -Greedy action tensor([ 0.8346, -0.2910, -0.1113, -0.5337]) tensor([0.5083, 0.1649, 0.1974, 0.1294]) -Greedy action tensor([ 0.7379, -0.4545, -0.1007, -0.6434]) tensor([0.5033, 0.1527, 0.2176, 0.1264]) -Greedy action tensor([ 0.3969, 0.0222, 0.0697, -0.1633]) tensor([0.3356, 0.2307, 0.2420, 0.1917]) -Greedy action tensor([ 0.8158, -0.1574, -0.0712, -0.5102]) tensor([0.4866, 0.1838, 0.2004, 0.1292]) -Greedy action tensor([ 0.3851, 0.1776, 0.1468, -0.1533]) tensor([0.3140, 0.2552, 0.2475, 0.1833]) -Greedy action tensor([ 0.3362, 0.1311, 0.0307, -0.2627]) tensor([0.3225, 0.2627, 0.2376, 0.1772]) -Greedy action tensor([ 0.4751, 0.0719, 0.1083, -0.3649]) tensor([0.3581, 0.2392, 0.2481, 0.1546]) -Greedy action tensor([ 0.2057, 0.0022, -0.0698, -0.0770]) tensor([0.3004, 0.2451, 0.2281, 0.2264]) -Greedy action tensor([ 0.6945, -0.4233, 0.0447, -0.5665]) tensor([0.4689, 0.1533, 0.2449, 0.1329]) -Greedy action tensor([ 0.6884, -0.6131, 0.0187, -0.1437]) tensor([0.4506, 0.1226, 0.2307, 0.1961]) -Greedy action tensor([ 1.7890, -0.4757, -0.1329, 0.3974]) tensor([0.6672, 0.0693, 0.0976, 0.1659]) -Greedy action tensor([ 0.6917, -0.5075, -0.0414, -0.1799]) tensor([0.4545, 0.1370, 0.2184, 0.1901]) -Greedy action tensor([ 1.3802, -0.6800, -0.0161, 0.1109]) tensor([0.6039, 0.0769, 0.1495, 0.1697]) -Greedy action tensor([ 1.2321, -0.5921, -0.4809, -0.0435]) tensor([0.6169, 0.0995, 0.1112, 0.1723]) -Greedy action tensor([ 0.8330, -0.6023, -0.2753, 0.1107]) tensor([0.4869, 0.1159, 0.1607, 0.2365]) -Greedy action tensor([ 1.7510, -0.6893, -0.3024, 0.2486]) tensor([0.6954, 0.0606, 0.0892, 0.1548]) -Greedy action tensor([ 0.8866, -0.5596, -0.0233, 0.0245]) tensor([0.4853, 0.1143, 0.1954, 0.2050]) -Greedy action tensor([ 0.6215, -0.4577, -0.1293, -0.0990]) tensor([0.4351, 0.1479, 0.2054, 0.2117]) -Greedy action tensor([ 0.7472, -0.4601, -0.0872, -0.0695]) tensor([0.4598, 0.1375, 0.1996, 0.2032]) -Greedy action tensor([ 0.4580, -0.1243, -0.0535, -0.1366]) tensor([0.3690, 0.2061, 0.2212, 0.2036]) -Greedy action tensor([ 0.6329, -0.0280, 0.0323, -0.0884]) tensor([0.3920, 0.2024, 0.2150, 0.1906]) -Greedy action tensor([ 0.8501, -0.3498, -0.1373, -0.1022]) tensor([0.4855, 0.1462, 0.1809, 0.1874]) -Greedy action tensor([ 1.1603, -0.4679, -0.5040, -0.1210]) tensor([0.6012, 0.1180, 0.1138, 0.1669]) -Greedy action tensor([ 0.7453, -0.6723, -0.2476, 0.1159]) tensor([0.4661, 0.1129, 0.1727, 0.2484]) -Greedy action tensor([ 1.3481, -0.6450, -0.2903, 0.2588]) tensor([0.5999, 0.0817, 0.1166, 0.2018]) -Greedy action tensor([ 0.7857, -0.4460, -0.0815, -0.2049]) tensor([0.4800, 0.1401, 0.2017, 0.1782]) -Greedy action tensor([ 0.5838, -0.5749, -0.1688, 0.0257]) tensor([0.4242, 0.1332, 0.1999, 0.2428]) -Greedy action tensor([ 1.7884, -0.3612, -0.4601, -0.1828]) tensor([0.7345, 0.0856, 0.0775, 0.1023]) -Greedy action tensor([ 0.7361, -0.5235, -0.1451, 0.1041]) tensor([0.4485, 0.1273, 0.1858, 0.2384]) -Greedy action tensor([ 1.1029, 0.0387, -0.1405, -0.0635]) tensor([0.5142, 0.1774, 0.1483, 0.1602]) -Greedy action tensor([ 1.3981, -0.4677, -0.1973, 0.1313]) tensor([0.6100, 0.0944, 0.1237, 0.1719]) -Greedy action tensor([ 1.6776, -0.4141, -0.2123, 0.1488]) tensor([0.6705, 0.0828, 0.1013, 0.1454]) -Greedy action tensor([ 0.5743, -0.2313, 0.1077, -0.1835]) tensor([0.3933, 0.1757, 0.2466, 0.1843]) -Greedy action tensor([ 0.7183, -0.3866, 0.0696, 0.0778]) tensor([0.4200, 0.1391, 0.2195, 0.2214]) -Greedy action tensor([ 1.4952, -0.6441, -0.5348, -0.1323]) tensor([0.6918, 0.0815, 0.0909, 0.1359]) -Greedy action tensor([ 0.9356, -0.2774, -0.1743, 0.1926]) tensor([0.4756, 0.1414, 0.1567, 0.2262]) -Greedy action tensor([ 1.1503, -0.4374, -0.1084, 0.0848]) tensor([0.5456, 0.1115, 0.1550, 0.1880]) -Greedy action tensor([ 0.4794, -0.4639, 0.0521, -0.0785]) tensor([0.3825, 0.1489, 0.2495, 0.2190]) -Greedy action tensor([ 1.0005, -0.4406, -0.1576, 0.0261]) tensor([0.5186, 0.1227, 0.1629, 0.1957]) -Greedy action tensor([ 0.5826, -0.4869, -0.1484, 0.0356]) tensor([0.4161, 0.1428, 0.2003, 0.2408]) -Greedy action tensor([ 0.9332, -0.5941, -0.3019, 0.1362]) tensor([0.5106, 0.1109, 0.1485, 0.2301]) -Greedy action tensor([ 1.2451, -0.5968, -0.0474, 0.3405]) tensor([0.5441, 0.0863, 0.1494, 0.2202]) -Greedy action tensor([ 0.9391, -0.3909, -0.2317, -0.1125]) tensor([0.5198, 0.1375, 0.1612, 0.1816]) -Greedy action tensor([ 0.6904, -0.6027, -0.2945, 0.1432]) tensor([0.4491, 0.1233, 0.1677, 0.2599]) -Greedy action tensor([ 1.1094, -0.3535, -0.1937, -0.1201]) tensor([0.5569, 0.1290, 0.1513, 0.1629]) -Greedy action tensor([ 1.2813, -0.6213, -0.1833, -0.0108]) tensor([0.6042, 0.0901, 0.1397, 0.1660]) -Greedy action tensor([ 0.8913, -0.4807, -0.0468, -0.2143]) tensor([0.5061, 0.1283, 0.1981, 0.1675]) -Greedy action tensor([ 1.3160, -0.6556, -0.3617, -0.0687]) tensor([0.6344, 0.0883, 0.1185, 0.1588]) -Greedy action tensor([ 1.1660, -0.4435, -0.1940, -0.0768]) tensor([0.5730, 0.1146, 0.1471, 0.1654]) -Greedy action tensor([ 0.5188, -0.4233, 0.0889, -0.0750]) tensor([0.3857, 0.1503, 0.2509, 0.2130]) -Greedy action tensor([ 1.1176, -0.5661, -0.0387, 0.0614]) tensor([0.5411, 0.1005, 0.1702, 0.1882]) -Greedy action tensor([ 1.2494, -0.4612, -0.4367, -0.1710]) tensor([0.6220, 0.1124, 0.1152, 0.1503]) -Greedy action tensor([ 0.8969, -0.6540, 0.1016, 0.0928]) tensor([0.4737, 0.1005, 0.2139, 0.2120]) -Greedy action tensor([ 0.9981, -0.4283, -0.4132, 0.4000]) tensor([0.4917, 0.1181, 0.1199, 0.2704]) -Greedy action tensor([ 0.7897, -0.4433, -0.0979, 0.0359]) tensor([0.4601, 0.1341, 0.1894, 0.2165]) -Greedy action tensor([ 0.5457, -0.3173, 0.0244, -0.0989]) tensor([0.3936, 0.1661, 0.2337, 0.2066]) -Greedy action tensor([ 0.9102, -0.5821, -0.2579, 0.1318]) tensor([0.5013, 0.1127, 0.1559, 0.2302]) -Greedy action tensor([ 1.2726, -0.6386, -0.2127, 0.1892]) tensor([0.5838, 0.0864, 0.1322, 0.1976]) -Greedy action tensor([ 1.4570, -0.2646, -0.1320, -0.0220]) tensor([0.6208, 0.1110, 0.1267, 0.1415]) -Greedy action tensor([ 1.4327, -0.5278, -0.4837, -0.0790]) tensor([0.6629, 0.0933, 0.0975, 0.1462]) -Greedy action tensor([ 1.3203, -0.3986, -0.3080, -0.2161]) tensor([0.6287, 0.1127, 0.1234, 0.1353]) -Greedy action tensor([ 1.1070, -0.4567, -0.1546, 0.1457]) tensor([0.5333, 0.1117, 0.1510, 0.2040]) -Greedy action tensor([ 1.1625, -0.6322, -0.4861, -0.0369]) tensor([0.6024, 0.1001, 0.1159, 0.1816]) -Greedy action tensor([ 0.8370, -0.2701, -0.2124, -0.0752]) tensor([0.4802, 0.1587, 0.1682, 0.1929]) -Greedy action tensor([ 1.1201, -0.4877, -0.0797, -0.0544]) tensor([0.5523, 0.1106, 0.1664, 0.1707]) -Greedy action tensor([ 0.5064, -0.4236, -0.0447, 0.0193]) tensor([0.3868, 0.1526, 0.2229, 0.2377]) -Greedy action tensor([ 0.7678, -0.5744, -0.0325, 0.1035]) tensor([0.4494, 0.1174, 0.2019, 0.2313]) -Greedy action tensor([ 0.6078, -0.5831, -0.3340, 0.0161]) tensor([0.4450, 0.1353, 0.1735, 0.2463]) -Greedy action tensor([ 1.0789, -0.4458, -0.1365, -0.1803]) tensor([0.5561, 0.1211, 0.1650, 0.1579]) -Greedy action tensor([ 0.8948, -0.1960, -0.0057, -0.1185]) tensor([0.4750, 0.1596, 0.1930, 0.1724]) -Greedy action tensor([ 0.4775, -0.2569, -0.1286, 0.0141]) tensor([0.3767, 0.1808, 0.2055, 0.2370]) -Greedy action tensor([ 1.9580, -0.7108, -0.4213, 0.1258]) tensor([0.7564, 0.0524, 0.0701, 0.1211]) -Greedy action tensor([ 0.5221, -0.3663, 0.0811, -0.1401]) tensor([0.3890, 0.1600, 0.2503, 0.2006]) -Greedy action tensor([ 0.5315, -0.1320, 0.1692, -0.0314]) tensor([0.3596, 0.1852, 0.2503, 0.2048]) -Greedy action tensor([ 1.4553, -0.5786, -0.4501, -0.1589]) tensor([0.6763, 0.0885, 0.1006, 0.1346]) -Greedy action tensor([ 0.8082, -0.4213, -0.2218, -0.1166]) tensor([0.4887, 0.1429, 0.1745, 0.1938]) -Greedy action tensor([ 0.8085, -0.5505, -0.1422, 0.0065]) tensor([0.4780, 0.1228, 0.1848, 0.2144]) -Greedy action tensor([ 1.0724, -0.5345, -0.0275, 0.0248]) tensor([0.5307, 0.1064, 0.1767, 0.1862]) -Greedy action tensor([ 1.4726, -0.7345, -0.5158, 0.1562]) tensor([0.6601, 0.0726, 0.0904, 0.1769]) -Greedy action tensor([ 0.9331, -0.5592, -0.3322, 0.2075]) tensor([0.5023, 0.1129, 0.1417, 0.2431]) -Greedy action tensor([ 0.6491, -0.2492, -0.0023, 0.0705]) tensor([0.4017, 0.1636, 0.2094, 0.2252]) -Greedy action tensor([ 1.1522, -0.5319, -0.0889, -0.0414]) tensor([0.5625, 0.1044, 0.1626, 0.1705]) -Greedy action tensor([ 1.0151, -0.6233, -0.3516, 0.0541]) tensor([0.5459, 0.1061, 0.1392, 0.2088]) -Greedy action tensor([ 1.8550, -0.6439, -0.2488, 0.2928]) tensor([0.7073, 0.0581, 0.0863, 0.1483]) -Greedy action tensor([ 0.8520, -0.4450, -0.1236, -0.0096]) tensor([0.4824, 0.1319, 0.1819, 0.2038]) -Greedy action tensor([ 1.2054, -0.4611, -0.2530, -0.1923]) tensor([0.5993, 0.1132, 0.1394, 0.1481]) -Greedy action tensor([ 0.8811, -0.6293, -0.1287, 0.1293]) tensor([0.4862, 0.1074, 0.1771, 0.2293]) -Greedy action tensor([ 0.9432, -0.1743, -0.0704, -0.0985]) tensor([0.4895, 0.1601, 0.1777, 0.1727]) -Greedy action tensor([ 1.0006, -0.5375, -0.2051, 0.1482]) tensor([0.5153, 0.1107, 0.1543, 0.2197]) -Greedy action tensor([ 0.9455, -0.4094, -0.2057, -0.0050]) tensor([0.5100, 0.1316, 0.1613, 0.1971]) -Greedy action tensor([ 1.7340, -0.6084, 0.7749, 1.2669]) tensor([0.4748, 0.0456, 0.1819, 0.2976]) -Greedy action tensor([ 1.8365, 0.1654, -0.3342, 0.6509]) tensor([0.6220, 0.1170, 0.0710, 0.1901]) -Greedy action tensor([ 0.9776, -0.6044, 2.3046, 0.7752]) tensor([0.1727, 0.0355, 0.6508, 0.1410]) -Greedy action tensor([ 2.1042, -0.9612, 0.6645, 0.8621]) tensor([0.6359, 0.0297, 0.1507, 0.1837]) -Greedy action tensor([ 1.4614, -0.8530, 0.6303, 0.7074]) tensor([0.4988, 0.0493, 0.2173, 0.2347]) -Greedy action tensor([ 0.7921, -0.1557, 0.5265, 0.7335]) tensor([0.3229, 0.1251, 0.2475, 0.3045]) -Greedy action tensor([ 2.2127, -0.5526, 1.4941, 1.1600]) tensor([0.5265, 0.0331, 0.2566, 0.1837]) -Greedy action tensor([ 1.5434, -0.0837, 0.9043, 1.7769]) tensor([0.3347, 0.0658, 0.1767, 0.4228]) -Greedy action tensor([ 1.6135, -0.2364, 0.7857, 1.2361]) tensor([0.4386, 0.0690, 0.1917, 0.3007]) -Greedy action tensor([ 1.5831, -0.1121, 0.7812, 1.9994]) tensor([0.3176, 0.0583, 0.1424, 0.4816]) -Greedy action tensor([ 2.3084, -0.4507, 0.6813, 2.2713]) tensor([0.4497, 0.0285, 0.0884, 0.4334]) -Greedy action tensor([ 1.6702, -0.2414, 0.6058, 0.9989]) tensor([0.4990, 0.0738, 0.1721, 0.2550]) -Greedy action tensor([ 0.9527, 0.1987, -0.4134, -0.3625]) tensor([0.5015, 0.2359, 0.1279, 0.1346]) -Greedy action tensor([ 1.2510, -0.8368, 1.0853, 0.4540]) tensor([0.4129, 0.0512, 0.3498, 0.1861]) -Greedy action tensor([1.5536, 0.3613, 1.1839, 1.2416]) tensor([0.3668, 0.1113, 0.2534, 0.2685]) -Greedy action tensor([1.3846, 0.0522, 0.7014, 0.8643]) tensor([0.4232, 0.1116, 0.2137, 0.2515]) -Greedy action tensor([ 1.6620, -0.4796, -0.4617, 1.2770]) tensor([0.5215, 0.0613, 0.0624, 0.3549]) -Greedy action tensor([ 1.1985, -0.1301, 0.3667, 1.0301]) tensor([0.3929, 0.1041, 0.1710, 0.3320]) -Greedy action tensor([ 1.5241, -0.2825, 0.7637, 0.9574]) tensor([0.4547, 0.0747, 0.2126, 0.2580]) -Greedy action tensor([ 1.2432, -0.2336, 1.3869, 1.5439]) tensor([0.2678, 0.0612, 0.3092, 0.3618]) -Greedy action tensor([ 1.0111, -0.9508, 0.6871, 1.3911]) tensor([0.3007, 0.0423, 0.2174, 0.4396]) -Greedy action tensor([ 1.2516, -0.5841, 2.0383, 1.2837]) tensor([0.2279, 0.0363, 0.5005, 0.2353]) -Greedy action tensor([1.5135, 0.2306, 1.3711, 1.3992]) tensor([0.3293, 0.0913, 0.2856, 0.2938]) -Greedy action tensor([0.9750, 0.1834, 1.2204, 0.8246]) tensor([0.2784, 0.1262, 0.3559, 0.2396]) -Greedy action tensor([ 1.9134, -0.2443, -0.1572, 0.8806]) tensor([0.6259, 0.0723, 0.0789, 0.2228]) -Greedy action tensor([ 1.6999, -0.3998, 1.1449, 1.4601]) tensor([0.4027, 0.0493, 0.2312, 0.3168]) -Greedy action tensor([ 0.8821, -0.6787, 0.9819, 0.6491]) tensor([0.3218, 0.0676, 0.3556, 0.2550]) -Greedy action tensor([1.4562, 0.1087, 0.9363, 1.4345]) tensor([0.3530, 0.0917, 0.2099, 0.3454]) -Greedy action tensor([ 1.5630, -0.5000, 0.9799, 1.1742]) tensor([0.4232, 0.0538, 0.2362, 0.2869]) -Greedy action tensor([ 1.0423, -0.2172, 0.7765, 1.3706]) tensor([0.2908, 0.0825, 0.2229, 0.4038]) -Greedy action tensor([ 1.1195, -0.0795, 0.5688, 0.7049]) tensor([0.3939, 0.1188, 0.2271, 0.2602]) -Greedy action tensor([ 1.2265, -0.6838, 1.5894, 0.7687]) tensor([0.3107, 0.0460, 0.4467, 0.1966]) -Greedy action tensor([1.6649, 0.0866, 0.6304, 0.8707]) tensor([0.4966, 0.1025, 0.1765, 0.2244]) -Greedy action tensor([ 2.0926, -0.9244, 0.5277, 0.7835]) tensor([0.6544, 0.0320, 0.1368, 0.1767]) -Greedy action tensor([ 1.3790, -0.3668, 0.8124, 1.4070]) tensor([0.3610, 0.0630, 0.2048, 0.3712]) -Greedy action tensor([ 1.6528, -0.5309, 0.4408, 1.1861]) tensor([0.4908, 0.0553, 0.1461, 0.3078]) -Greedy action tensor([ 1.7196, -0.5749, 0.4380, 1.3216]) tensor([0.4878, 0.0492, 0.1354, 0.3276]) -Greedy action tensor([ 1.4897, -0.0445, -0.3216, 0.9171]) tensor([0.5146, 0.1110, 0.0841, 0.2903]) -Greedy action tensor([ 1.3991, -0.2612, 1.1941, 1.0220]) tensor([0.3717, 0.0706, 0.3028, 0.2549]) -Greedy action tensor([1.0927, 0.4489, 1.5064, 0.8237]) tensor([0.2630, 0.1382, 0.3978, 0.2010]) -Greedy action tensor([ 1.3821, -0.5241, 0.6744, 0.8644]) tensor([0.4470, 0.0664, 0.2203, 0.2663]) -Greedy action tensor([ 1.8831, -0.1588, 0.4573, 1.3812]) tensor([0.5062, 0.0657, 0.1216, 0.3064]) -Greedy action tensor([ 1.0854, -0.3770, 1.2959, 1.0800]) tensor([0.2890, 0.0669, 0.3567, 0.2874]) -Greedy action tensor([ 1.5564, -1.4218, 1.0214, 0.5659]) tensor([0.4980, 0.0253, 0.2917, 0.1850]) -Greedy action tensor([ 0.4205, -0.4118, 0.7006, 1.2608]) tensor([0.1970, 0.0857, 0.2607, 0.4565]) -Greedy action tensor([1.3747, 0.3592, 1.0403, 0.9549]) tensor([0.3656, 0.1324, 0.2617, 0.2403]) -Greedy action tensor([ 1.2222, -1.2036, 0.9235, 0.3113]) tensor([0.4480, 0.0396, 0.3323, 0.1802]) -Greedy action tensor([ 1.6163, -0.5337, 1.3425, 1.2645]) tensor([0.3875, 0.0451, 0.2947, 0.2726]) -Greedy action tensor([ 1.2678, -0.5763, 1.3708, 1.2962]) tensor([0.3035, 0.0480, 0.3364, 0.3122]) -Greedy action tensor([ 1.1773, -0.3294, 0.7680, 0.5588]) tensor([0.4125, 0.0914, 0.2739, 0.2222]) -Greedy action tensor([ 1.5719, -0.2805, 0.7878, 0.8754]) tensor([0.4735, 0.0743, 0.2162, 0.2360]) -Greedy action tensor([ 1.2580, -0.3370, 0.8909, 1.1860]) tensor([0.3538, 0.0718, 0.2451, 0.3292]) -Greedy action tensor([ 1.6363, -0.6716, 0.9442, 0.4808]) tensor([0.5222, 0.0519, 0.2614, 0.1645]) -Greedy action tensor([ 1.8913, -0.4190, -0.7011, 0.7921]) tensor([0.6635, 0.0658, 0.0497, 0.2210]) -Greedy action tensor([ 1.3678, -1.0894, 0.2766, 1.7444]) tensor([0.3474, 0.0298, 0.1166, 0.5062]) -Greedy action tensor([ 1.5570, -0.1462, 0.9906, 0.8539]) tensor([0.4455, 0.0811, 0.2528, 0.2205]) -Greedy action tensor([1.6082, 0.2148, 0.8631, 0.9507]) tensor([0.4462, 0.1108, 0.2118, 0.2312]) -Greedy action tensor([ 1.3966, -0.5199, -0.4272, 1.0197]) tensor([0.5014, 0.0738, 0.0809, 0.3439]) -Greedy action tensor([ 1.8030, -0.7035, 0.4651, 1.2483]) tensor([0.5213, 0.0425, 0.1368, 0.2994]) -Greedy action tensor([ 2.1741, -0.4020, 0.7051, 1.6340]) tensor([0.5294, 0.0403, 0.1218, 0.3085]) -Greedy action tensor([1.4292, 0.1048, 1.5776, 1.6916]) tensor([0.2684, 0.0714, 0.3113, 0.3489]) -Greedy action tensor([0.7542, 0.4022, 0.4758, 1.1042]) tensor([0.2578, 0.1813, 0.1951, 0.3658]) -Greedy action tensor([1.7014, 0.3704, 0.5492, 1.6040]) tensor([0.4020, 0.1062, 0.1270, 0.3647]) -Greedy action tensor([1.8235, 0.2148, 0.9862, 1.8995]) tensor([0.3687, 0.0738, 0.1596, 0.3979]) -Greedy action tensor([ 1.6673, -0.1455, -0.1752, 0.7596]) tensor([0.5797, 0.0946, 0.0918, 0.2339]) -Greedy action tensor([ 1.2646e+00, -2.3344e-04, 6.6448e-01, 1.0959e+00]) tensor([0.3737, 0.1055, 0.2051, 0.3157]) -Greedy action tensor([ 1.5112, -0.2364, 1.4233, 1.1530]) tensor([0.3586, 0.0625, 0.3284, 0.2506]) -Greedy action tensor([ 1.8565, -0.5444, 0.5994, 1.5661]) tensor([0.4710, 0.0427, 0.1340, 0.3523]) -Greedy action tensor([ 1.3663, -0.4401, -0.1491, 0.8442]) tensor([0.5058, 0.0831, 0.1111, 0.3000]) -Greedy action tensor([ 1.5460, -0.3924, 1.0288, 0.7555]) tensor([0.4558, 0.0656, 0.2718, 0.2068]) -Greedy action tensor([1.2977, 0.4313, 1.2625, 1.1029]) tensor([0.3116, 0.1310, 0.3008, 0.2565]) -Greedy action tensor([ 1.5369, -1.0424, 0.4036, 1.5852]) tensor([0.4086, 0.0310, 0.1316, 0.4288]) -Greedy action tensor([ 0.6291, -0.6903, 0.6696, 0.7628]) tensor([0.2897, 0.0774, 0.3017, 0.3311]) -Greedy action tensor([ 0.1813, -0.9186, 1.1630, 1.0953]) tensor([0.1539, 0.0512, 0.4108, 0.3840]) -Greedy action tensor([ 1.3326, -0.4851, 1.1430, 1.6721]) tensor([0.2947, 0.0478, 0.2438, 0.4137]) -Greedy action tensor([ 1.2090, -0.4460, -0.7545, 0.4788]) tensor([0.5515, 0.1054, 0.0774, 0.2657]) -Greedy action tensor([ 1.5008, -0.5349, 1.5046, 0.5016]) tensor([0.3996, 0.0522, 0.4011, 0.1471]) -Greedy action tensor([ 1.8492, -0.8146, 0.6241, 2.1214]) tensor([0.3737, 0.0260, 0.1098, 0.4906]) -Greedy action tensor([ 1.5013, -0.7873, 0.5299, 0.9222]) tensor([0.4901, 0.0497, 0.1855, 0.2747]) -Greedy action tensor([ 1.7269, -0.3991, 0.7173, 1.2788]) tensor([0.4711, 0.0562, 0.1717, 0.3010]) -Greedy action tensor([ 0.9736, 0.6334, -0.6566, 0.3829]) tensor([0.4063, 0.2891, 0.0796, 0.2251]) -Greedy action tensor([ 1.4044, -0.0192, 1.3398, 0.9712]) tensor([0.3538, 0.0852, 0.3316, 0.2294]) -Greedy action tensor([-2.0570, -0.8413, 0.6792, 0.0419]) tensor([0.0358, 0.1206, 0.5518, 0.2918]) -Greedy action tensor([-0.5497, 0.2284, -0.8598, -0.2860]) tensor([0.1919, 0.4177, 0.1407, 0.2497]) -Greedy action tensor([-1.9829, -0.6545, 0.8720, 0.1186]) tensor([0.0330, 0.1245, 0.5729, 0.2697]) -Greedy action tensor([-1.6551, -0.7184, 0.3337, 0.2257]) tensor([0.0574, 0.1465, 0.4195, 0.3766]) -Greedy action tensor([-1.1961, 0.0214, 0.4092, -0.5663]) tensor([0.0890, 0.3007, 0.4432, 0.1671]) -Greedy action tensor([-1.7987, -0.8417, 0.1328, -0.4376]) tensor([0.0694, 0.1808, 0.4790, 0.2708]) -Greedy action tensor([-0.4514, -0.5244, 0.1645, 0.2207]) tensor([0.1742, 0.1620, 0.3226, 0.3412]) -Greedy action tensor([-1.3584, -0.6224, -0.0669, -0.0515]) tensor([0.0960, 0.2003, 0.3491, 0.3546]) -Greedy action tensor([-1.8211, -0.4685, 0.2708, -0.2801]) tensor([0.0567, 0.2193, 0.4593, 0.2647]) -Greedy action tensor([-2.0154, -0.9353, 0.6341, -0.1596]) tensor([0.0408, 0.1203, 0.5777, 0.2612]) -Greedy action tensor([-1.7945, -0.4337, 0.4512, -0.1341]) tensor([0.0510, 0.1989, 0.4818, 0.2683]) -Greedy action tensor([-1.2700, -0.3642, -0.0364, -0.1766]) tensor([0.1011, 0.2501, 0.3471, 0.3017]) -Greedy action tensor([-2.0089, -0.7130, 0.8927, 0.1519]) tensor([0.0317, 0.1159, 0.5772, 0.2752]) -Greedy action tensor([-1.4588, -0.5600, 0.5062, 0.1877]) tensor([0.0634, 0.1557, 0.4521, 0.3288]) -Greedy action tensor([-1.4608, -0.9247, 0.4327, -0.5214]) tensor([0.0840, 0.1435, 0.5577, 0.2148]) -Greedy action tensor([-1.9177, -0.7259, 0.1700, -0.2745]) tensor([0.0570, 0.1878, 0.4601, 0.2950]) -Greedy action tensor([-2.0488, -0.7756, 0.8556, 0.0993]) tensor([0.0319, 0.1138, 0.5814, 0.2729]) -Greedy action tensor([-2.0050, -0.8186, 0.5342, -0.0560]) tensor([0.0417, 0.1367, 0.5287, 0.2930]) -Greedy action tensor([-1.5040, -0.0068, 0.3882, -0.0266]) tensor([0.0607, 0.2711, 0.4024, 0.2658]) -Greedy action tensor([-1.9619, -0.9829, 0.2909, -0.3534]) tensor([0.0550, 0.1465, 0.5236, 0.2749]) -Greedy action tensor([-1.0032, -0.0873, -0.3897, -0.1725]) tensor([0.1309, 0.3271, 0.2417, 0.3004]) -Greedy action tensor([-1.2509, -0.5512, 0.3244, 0.1657]) tensor([0.0836, 0.1682, 0.4038, 0.3445]) -Greedy action tensor([-1.6272, -0.4392, 0.5178, -0.0888]) tensor([0.0572, 0.1877, 0.4887, 0.2664]) -Greedy action tensor([-1.8694, -0.5036, 0.5516, -0.1353]) tensor([0.0458, 0.1794, 0.5154, 0.2593]) -Greedy action tensor([-1.6240, -0.5590, 0.2365, -0.1022]) tensor([0.0671, 0.1946, 0.4311, 0.3072]) -Greedy action tensor([-1.6173, -0.5254, 0.5466, 0.1544]) tensor([0.0539, 0.1605, 0.4689, 0.3168]) -Greedy action tensor([-2.0000, -0.8919, 0.6364, -0.1905]) tensor([0.0415, 0.1257, 0.5794, 0.2534]) -Greedy action tensor([-1.1458, -0.5443, -0.0240, -0.2445]) tensor([0.1196, 0.2183, 0.3674, 0.2947]) -Greedy action tensor([-1.6116, -0.5190, 0.3325, 0.1622]) tensor([0.0593, 0.1768, 0.4144, 0.3495]) -Greedy action tensor([-1.7547, -0.5736, 0.4020, -0.0669]) tensor([0.0546, 0.1779, 0.4721, 0.2954]) -Greedy action tensor([-1.5653, -0.6178, 0.0479, -0.4800]) tensor([0.0865, 0.2232, 0.4342, 0.2561]) -Greedy action tensor([-0.8701, -0.6039, 0.3794, 0.1663]) tensor([0.1161, 0.1515, 0.4051, 0.3273]) -Greedy action tensor([-1.5902, -0.6701, 0.2110, -0.2612]) tensor([0.0749, 0.1881, 0.4539, 0.2831]) -Greedy action tensor([-2.0422, -0.8965, 0.6189, -0.0376]) tensor([0.0386, 0.1215, 0.5530, 0.2868]) -Greedy action tensor([-1.9030, -0.9783, 0.2895, -0.4011]) tensor([0.0589, 0.1486, 0.5279, 0.2646]) -Greedy action tensor([-1.4803, -0.4695, 0.1995, -0.3348]) tensor([0.0816, 0.2242, 0.4377, 0.2565]) -Greedy action tensor([-1.5666, -0.5177, 0.4780, 0.1041]) tensor([0.0592, 0.1689, 0.4573, 0.3146]) -Greedy action tensor([-2.0109e+00, -7.1748e-01, 8.8126e-01, 6.4206e-04]) tensor([0.0332, 0.1209, 0.5980, 0.2479]) -Greedy action tensor([-1.0730, -0.5980, 0.3233, 0.0579]) tensor([0.1026, 0.1650, 0.4145, 0.3179]) -Greedy action tensor([-2.0103, -0.7103, 1.2704, 0.3986]) tensor([0.0236, 0.0866, 0.6274, 0.2624]) -Greedy action tensor([-0.9552, -0.2984, -0.3939, 0.1440]) tensor([0.1302, 0.2510, 0.2282, 0.3907]) -Greedy action tensor([-1.1761, -0.5869, 0.3961, -0.3994]) tensor([0.1021, 0.1840, 0.4919, 0.2220]) -Greedy action tensor([-1.4305, -0.2986, -0.0066, -0.4487]) tensor([0.0915, 0.2839, 0.3802, 0.2443]) -Greedy action tensor([-1.5495, -0.6700, 0.1371, -0.3024]) tensor([0.0814, 0.1960, 0.4394, 0.2831]) -Greedy action tensor([-1.0211, -0.0806, 0.0531, -0.2133]) tensor([0.1145, 0.2933, 0.3353, 0.2569]) -Greedy action tensor([-1.4142, -0.4889, 0.0528, -0.3675]) tensor([0.0934, 0.2356, 0.4050, 0.2660]) -Greedy action tensor([-2.0500, -0.8803, 0.5627, -0.0743]) tensor([0.0399, 0.1285, 0.5439, 0.2877]) -Greedy action tensor([-1.5737, -0.7932, 0.3396, -0.5346]) tensor([0.0782, 0.1707, 0.5300, 0.2211]) -Greedy action tensor([-1.8627, -0.8863, 0.1002, -0.3788]) tensor([0.0659, 0.1748, 0.4689, 0.2904]) -Greedy action tensor([-0.8398, -0.1085, -0.2640, -0.1896]) tensor([0.1477, 0.3068, 0.2626, 0.2829]) -Greedy action tensor([-1.8179, -0.5029, 0.7236, 0.0336]) tensor([0.0420, 0.1565, 0.5337, 0.2677]) -Greedy action tensor([-2.0205, -0.7360, 0.8618, 0.0894]) tensor([0.0326, 0.1176, 0.5813, 0.2685]) -Greedy action tensor([-1.7074, -0.7132, 0.0855, -0.2977]) tensor([0.0724, 0.1958, 0.4352, 0.2966]) -Greedy action tensor([-1.9160, -0.7359, 0.1720, -0.2365]) tensor([0.0565, 0.1840, 0.4562, 0.3032]) -Greedy action tensor([-0.9189, -0.3310, 0.4167, 1.1421]) tensor([0.0692, 0.1245, 0.2630, 0.5433]) -Greedy action tensor([-0.8707, -0.3431, 0.3155, -0.0933]) tensor([0.1228, 0.2081, 0.4020, 0.2671]) -Greedy action tensor([-2.0267, -0.9511, 0.5593, -0.2044]) tensor([0.0427, 0.1253, 0.5675, 0.2644]) -Greedy action tensor([-1.9130, -0.9679, 0.2304, -0.4473]) tensor([0.0609, 0.1566, 0.5190, 0.2635]) -Greedy action tensor([-1.9842, -0.8482, 0.3533, -0.1759]) tensor([0.0486, 0.1514, 0.5034, 0.2965]) -Greedy action tensor([-1.9913, -0.6719, 0.8321, 0.0227]) tensor([0.0344, 0.1287, 0.5791, 0.2578]) -Greedy action tensor([-1.9456, -0.6497, 0.8120, 0.1171]) tensor([0.0354, 0.1292, 0.5573, 0.2782]) -Greedy action tensor([-1.4369, -0.5372, -0.0061, -0.1596]) tensor([0.0891, 0.2190, 0.3725, 0.3195]) -Greedy action tensor([-0.5050, -0.4845, 0.1315, 0.3221]) tensor([0.1614, 0.1647, 0.3049, 0.3690]) -Greedy action tensor([-2.0543, -0.8396, 0.7589, 0.0692]) tensor([0.0340, 0.1146, 0.5669, 0.2844]) -Greedy action tensor([-1.8890, -0.6149, 0.4801, -0.1122]) tensor([0.0472, 0.1689, 0.5047, 0.2792]) -Greedy action tensor([-1.0425, -0.5703, 0.3058, 0.3244]) tensor([0.0964, 0.1545, 0.3711, 0.3781]) -Greedy action tensor([-2.0457, -0.8144, 0.6334, -0.0186]) tensor([0.0376, 0.1288, 0.5480, 0.2855]) -Greedy action tensor([-1.8699, -0.9325, 0.1841, -0.3346]) tensor([0.0625, 0.1596, 0.4876, 0.2903]) -Greedy action tensor([-1.9878, -0.6029, 0.7754, -0.0512]) tensor([0.0360, 0.1438, 0.5706, 0.2497]) -Greedy action tensor([-0.4703, -0.5156, 0.2597, 0.3922]) tensor([0.1563, 0.1493, 0.3242, 0.3702]) -Greedy action tensor([-2.0360, -0.9229, 0.4752, -0.1842]) tensor([0.0440, 0.1339, 0.5419, 0.2802]) -Greedy action tensor([-1.3581, -0.4207, -0.0238, -0.1082]) tensor([0.0922, 0.2355, 0.3503, 0.3219]) -Greedy action tensor([-1.9268, -0.5268, 0.5585, -0.1313]) tensor([0.0433, 0.1757, 0.5201, 0.2609]) -Greedy action tensor([-1.8285, -0.6047, 0.8311, 0.0499]) tensor([0.0396, 0.1347, 0.5663, 0.2593]) -Greedy action tensor([-0.7646, -0.5860, 0.2876, -0.0224]) tensor([0.1397, 0.1670, 0.4000, 0.2934]) -Greedy action tensor([-1.7842, -0.6722, 0.2393, -0.1315]) tensor([0.0594, 0.1807, 0.4496, 0.3103]) -Greedy action tensor([-1.4425e+00, -5.6224e-01, 7.0812e-01, -4.3565e-04]) tensor([0.0616, 0.1486, 0.5292, 0.2606]) -Greedy action tensor([-1.9108, -0.9724, 0.1366, -0.4811]) tensor([0.0646, 0.1651, 0.5005, 0.2699]) -Greedy action tensor([-1.9218, -0.7047, 0.2405, -0.2271]) tensor([0.0540, 0.1824, 0.4694, 0.2941]) -Greedy action tensor([-0.8962, -0.6228, -0.3376, -0.4230]) tensor([0.1764, 0.2319, 0.3084, 0.2832]) -Greedy action tensor([-1.9564, -0.9555, 0.3827, -0.2461]) tensor([0.0510, 0.1386, 0.5285, 0.2818]) -Greedy action tensor([ 0.3437, 0.1532, 0.1889, -0.0551]) tensor([0.2981, 0.2464, 0.2554, 0.2001]) -Greedy action tensor([ 0.7063, -0.3923, 0.1189, -0.6281]) tensor([0.4646, 0.1549, 0.2582, 0.1223]) -Greedy action tensor([0.4000, 0.1872, 0.0974, 0.0606]) tensor([0.3068, 0.2480, 0.2267, 0.2185]) -Greedy action tensor([ 0.3962, -0.1186, 0.0986, -0.3881]) tensor([0.3576, 0.2137, 0.2655, 0.1632]) -Greedy action tensor([ 0.6712, -0.1892, -0.0532, -0.4215]) tensor([0.4458, 0.1886, 0.2161, 0.1495]) -Greedy action tensor([ 0.4335, -0.1262, 0.0739, -0.3747]) tensor([0.3683, 0.2105, 0.2571, 0.1641]) -Greedy action tensor([ 1.0428, -0.6205, 0.0206, -0.7363]) tensor([0.5820, 0.1103, 0.2094, 0.0982]) -Greedy action tensor([ 0.3535, 0.0925, 0.2433, -0.2547]) tensor([0.3115, 0.2400, 0.2790, 0.1696]) -Greedy action tensor([ 0.2817, 0.0349, 0.2252, -0.1161]) tensor([0.2943, 0.2299, 0.2781, 0.1977]) -Greedy action tensor([ 0.2964, 0.1796, 0.1410, -0.2209]) tensor([0.2992, 0.2662, 0.2562, 0.1784]) -Greedy action tensor([ 0.3377, -0.0612, 0.1978, -0.1377]) tensor([0.3163, 0.2122, 0.2750, 0.1966]) -Greedy action tensor([ 0.3879, 0.2030, 0.1674, -0.2867]) tensor([0.3182, 0.2645, 0.2552, 0.1621]) -Greedy action tensor([ 0.2345, 0.0328, 0.2141, -0.0236]) tensor([0.2801, 0.2290, 0.2745, 0.2164]) -Greedy action tensor([0.1930, 0.0439, 0.0366, 0.0053]) tensor([0.2820, 0.2430, 0.2412, 0.2338]) -Greedy action tensor([ 0.8678, -0.1164, -0.0660, -0.5574]) tensor([0.4982, 0.1862, 0.1958, 0.1198]) -Greedy action tensor([ 0.6861, -0.2320, 0.1185, -0.6237]) tensor([0.4472, 0.1786, 0.2535, 0.1207]) -Greedy action tensor([ 0.2990, 0.0683, 0.1916, -0.0962]) tensor([0.2971, 0.2359, 0.2669, 0.2001]) -Greedy action tensor([ 0.3887, 0.1097, -0.0318, -0.0261]) tensor([0.3253, 0.2461, 0.2137, 0.2149]) -Greedy action tensor([ 0.7749, -0.3493, 0.0463, -0.4496]) tensor([0.4759, 0.1546, 0.2296, 0.1399]) -Greedy action tensor([ 0.2002, -0.0206, 0.1672, -0.2073]) tensor([0.2911, 0.2335, 0.2817, 0.1937]) -Greedy action tensor([ 0.4726, -0.1458, 0.1062, -0.2474]) tensor([0.3678, 0.1982, 0.2550, 0.1790]) -Greedy action tensor([ 0.5603, -0.0167, 0.0142, -0.3502]) tensor([0.3932, 0.2208, 0.2278, 0.1582]) -Greedy action tensor([ 0.3575, -0.1243, 0.1290, -0.2198]) tensor([0.3362, 0.2076, 0.2675, 0.1887]) -Greedy action tensor([ 0.2934, 0.0218, 0.2052, -0.1787]) tensor([0.3029, 0.2309, 0.2773, 0.1889]) -Greedy action tensor([ 0.1994, 0.0356, 0.1377, -0.3237]) tensor([0.2957, 0.2510, 0.2780, 0.1753]) -Greedy action tensor([ 0.4919, -0.0097, 0.0242, -0.2392]) tensor([0.3685, 0.2232, 0.2309, 0.1774]) -Greedy action tensor([ 0.4232, -0.0088, 0.0531, -0.2081]) tensor([0.3482, 0.2261, 0.2405, 0.1852]) -Greedy action tensor([ 0.3025, -0.1089, 0.1357, -0.2722]) tensor([0.3255, 0.2157, 0.2755, 0.1832]) -Greedy action tensor([ 0.2043, 0.1158, 0.1984, -0.2603]) tensor([0.2827, 0.2587, 0.2810, 0.1776]) -Greedy action tensor([ 0.3386, 0.0940, 0.1911, -0.1702]) tensor([0.3080, 0.2411, 0.2657, 0.1852]) -Greedy action tensor([ 0.1960, 0.0276, 0.2258, -0.2124]) tensor([0.2825, 0.2387, 0.2910, 0.1878]) -Greedy action tensor([ 0.6190, -0.0951, -0.0984, -0.4277]) tensor([0.4294, 0.2102, 0.2096, 0.1508]) -Greedy action tensor([ 0.4760, -0.1628, 0.2040, -0.5686]) tensor([0.3786, 0.1998, 0.2884, 0.1332]) -Greedy action tensor([ 0.3623, 0.0711, 0.1750, -0.2388]) tensor([0.3200, 0.2392, 0.2654, 0.1754]) -Greedy action tensor([ 0.3161, 0.0101, 0.0486, -0.1823]) tensor([0.3216, 0.2368, 0.2461, 0.1954]) -Greedy action tensor([ 0.3866, 0.0354, 0.1533, -0.1841]) tensor([0.3267, 0.2299, 0.2587, 0.1846]) -Greedy action tensor([ 0.9334, -0.2602, -0.2093, -0.7194]) tensor([0.5514, 0.1671, 0.1759, 0.1056]) -Greedy action tensor([ 0.3763, 0.1955, 0.1744, -0.1779]) tensor([0.3100, 0.2587, 0.2533, 0.1781]) -Greedy action tensor([ 0.7202, -0.1675, -0.1019, -0.4632]) tensor([0.4635, 0.1908, 0.2037, 0.1420]) -Greedy action tensor([ 0.3134, 0.1553, 0.1737, -0.1385]) tensor([0.2976, 0.2541, 0.2588, 0.1894]) -Greedy action tensor([ 0.3695, 0.0474, 0.2019, -0.1425]) tensor([0.3155, 0.2286, 0.2668, 0.1891]) -Greedy action tensor([ 0.9307, -0.3775, -0.1070, -0.5392]) tensor([0.5392, 0.1458, 0.1910, 0.1240]) -Greedy action tensor([ 0.6789, -0.1404, 0.0220, -0.4608]) tensor([0.4388, 0.1934, 0.2275, 0.1404]) -Greedy action tensor([ 0.4010, -0.0082, 0.2466, -0.4600]) tensor([0.3397, 0.2256, 0.2911, 0.1436]) -Greedy action tensor([ 0.4336, -0.1207, 0.1119, -0.2879]) tensor([0.3590, 0.2062, 0.2603, 0.1745]) -Greedy action tensor([ 0.2540, -0.0189, 0.1664, -0.3003]) tensor([0.3075, 0.2341, 0.2817, 0.1767]) -Greedy action tensor([ 0.1737, 0.0909, 0.2135, -0.1829]) tensor([0.2731, 0.2514, 0.2842, 0.1912]) -Greedy action tensor([ 0.7961, -0.2101, -0.0751, -0.6557]) tensor([0.4955, 0.1811, 0.2073, 0.1160]) -Greedy action tensor([ 0.5482, -0.2446, 0.1372, -0.6004]) tensor([0.4111, 0.1860, 0.2725, 0.1303]) -Greedy action tensor([ 0.6328, -0.2460, 0.0208, -0.3807]) tensor([0.4309, 0.1790, 0.2337, 0.1564]) -Greedy action tensor([ 0.4946, -0.0622, 0.0179, -0.2105]) tensor([0.3720, 0.2132, 0.2310, 0.1838]) -Greedy action tensor([ 0.3650, 0.0842, 0.1422, -0.1117]) tensor([0.3148, 0.2378, 0.2520, 0.1954]) -Greedy action tensor([ 0.3523, -0.0794, 0.1732, -0.3307]) tensor([0.3344, 0.2171, 0.2796, 0.1689]) -Greedy action tensor([ 0.6132, -0.0343, 0.0188, -0.3702]) tensor([0.4083, 0.2137, 0.2253, 0.1527]) -Greedy action tensor([ 0.6443, -0.2503, -0.1130, -0.2621]) tensor([0.4383, 0.1792, 0.2055, 0.1770]) -Greedy action tensor([ 0.3980, 0.1865, 0.1765, -0.1585]) tensor([0.3141, 0.2542, 0.2517, 0.1800]) -Greedy action tensor([ 0.2567, 0.0474, 0.2401, -0.1937]) tensor([0.2914, 0.2363, 0.2866, 0.1857]) -Greedy action tensor([ 0.1372, 0.0913, 0.1426, -0.2353]) tensor([0.2740, 0.2617, 0.2755, 0.1888]) -Greedy action tensor([ 0.1655, 0.0169, 0.0890, -0.1841]) tensor([0.2863, 0.2467, 0.2652, 0.2018]) -Greedy action tensor([ 0.4206, 0.2136, 0.1539, -0.1652]) tensor([0.3189, 0.2593, 0.2443, 0.1775]) -Greedy action tensor([ 0.2807, 0.1751, 0.1391, -0.2355]) tensor([0.2972, 0.2674, 0.2580, 0.1774]) -Greedy action tensor([ 0.2955, -0.0628, 0.1929, -0.1867]) tensor([0.3107, 0.2171, 0.2804, 0.1918]) -Greedy action tensor([ 1.3185, -0.7753, 0.0846, -0.9293]) tensor([0.6579, 0.0811, 0.1915, 0.0695]) -Greedy action tensor([ 0.1891, 0.1814, 0.1855, -0.2442]) tensor([0.2749, 0.2728, 0.2740, 0.1783]) -Greedy action tensor([ 0.3058, 0.1819, 0.1722, -0.2856]) tensor([0.3019, 0.2667, 0.2642, 0.1671]) -Greedy action tensor([ 0.4444, 0.0773, 0.0411, -0.2215]) tensor([0.3479, 0.2410, 0.2324, 0.1787]) -Greedy action tensor([ 0.2665, 0.0988, 0.1782, -0.1881]) tensor([0.2945, 0.2490, 0.2696, 0.1869]) -Greedy action tensor([ 0.4052, -0.0883, 0.1946, -0.2945]) tensor([0.3428, 0.2093, 0.2777, 0.1703]) -Greedy action tensor([ 0.3405, 0.1491, 0.1988, -0.0493]) tensor([0.2967, 0.2450, 0.2575, 0.2009]) -Greedy action tensor([ 0.4834, -0.2069, 0.0675, -0.3127]) tensor([0.3828, 0.1920, 0.2526, 0.1727]) -Greedy action tensor([ 0.3050, 0.0808, -0.0216, -0.0828]) tensor([0.3126, 0.2498, 0.2255, 0.2121]) -Greedy action tensor([ 0.2289, -0.0605, 0.1726, -0.1882]) tensor([0.2982, 0.2233, 0.2819, 0.1965]) -Greedy action tensor([ 0.3900, 0.1043, 0.2134, -0.2342]) tensor([0.3200, 0.2404, 0.2682, 0.1714]) -Greedy action tensor([ 0.3760, -0.1973, -0.0128, -0.4323]) tensor([0.3721, 0.2098, 0.2523, 0.1658]) -Greedy action tensor([ 0.8547, -0.3622, 0.0449, -0.5910]) tensor([0.5059, 0.1498, 0.2251, 0.1192]) -Greedy action tensor([ 0.3420, -0.0127, 0.1432, -0.2141]) tensor([0.3232, 0.2267, 0.2649, 0.1853]) -Greedy action tensor([ 0.4253, 0.1157, 0.1424, -0.0135]) tensor([0.3193, 0.2343, 0.2406, 0.2059]) -Greedy action tensor([ 0.3534, -0.0878, 0.0984, -0.2286]) tensor([0.3359, 0.2161, 0.2603, 0.1877]) -Greedy action tensor([ 0.4019, 0.0899, 0.1593, -0.1967]) tensor([0.3261, 0.2387, 0.2559, 0.1792]) -Greedy action tensor([ 0.6733, -0.1634, -0.0612, -0.4119]) tensor([0.4443, 0.1924, 0.2132, 0.1501]) -Greedy action tensor([ 0.2303, 0.0750, 0.1651, -0.2521]) tensor([0.2932, 0.2510, 0.2747, 0.1810]) -Greedy action tensor([ 0.2791, 0.1452, 0.1636, -0.1088]) tensor([0.2904, 0.2540, 0.2587, 0.1970]) -Greedy action tensor([ 1.1222, -0.4957, -0.2506, -0.1454]) tensor([0.5770, 0.1144, 0.1462, 0.1624]) -Greedy action tensor([ 0.5289, -0.4423, 0.0325, 0.0063]) tensor([0.3875, 0.1467, 0.2359, 0.2298]) -Greedy action tensor([ 0.6241, -0.5091, -0.0675, 0.0611]) tensor([0.4180, 0.1346, 0.2093, 0.2381]) -Greedy action tensor([ 1.1530, -0.6069, -0.0371, 0.1730]) tensor([0.5401, 0.0929, 0.1643, 0.2027]) -Greedy action tensor([ 0.8354, -0.5668, -0.1948, 0.0620]) tensor([0.4844, 0.1192, 0.1729, 0.2235]) -Greedy action tensor([ 0.9931, -0.6990, -0.1919, 0.2472]) tensor([0.5091, 0.0937, 0.1557, 0.2415]) -Greedy action tensor([ 1.1012, -0.0579, -0.0265, -0.1473]) tensor([0.5196, 0.1630, 0.1682, 0.1491]) -Greedy action tensor([ 0.7288, -0.4834, -0.3168, 0.0803]) tensor([0.4604, 0.1370, 0.1618, 0.2407]) -Greedy action tensor([ 1.3795, -0.6505, -0.1123, 0.3613]) tensor([0.5822, 0.0765, 0.1310, 0.2103]) -Greedy action tensor([ 1.9910, -0.5055, -0.2841, 0.2580]) tensor([0.7343, 0.0605, 0.0755, 0.1298]) -Greedy action tensor([ 0.8387, -0.5349, -0.1398, -0.0051]) tensor([0.4856, 0.1230, 0.1825, 0.2088]) -Greedy action tensor([ 9.2890e-01, -4.8000e-01, -7.9317e-02, -4.5631e-04]) tensor([0.4990, 0.1220, 0.1821, 0.1970]) -Greedy action tensor([ 1.0352, -0.4300, -0.2253, 0.0102]) tensor([0.5338, 0.1233, 0.1513, 0.1915]) -Greedy action tensor([ 0.7063, 0.1142, -0.0789, -0.0215]) tensor([0.4012, 0.2220, 0.1830, 0.1938]) -Greedy action tensor([ 1.2437, -0.7299, -0.5903, 0.2708]) tensor([0.5964, 0.0829, 0.0953, 0.2254]) -Greedy action tensor([ 1.2580, -0.6394, -0.2649, 0.0575]) tensor([0.5991, 0.0898, 0.1307, 0.1804]) -Greedy action tensor([ 1.2599, -0.0619, -0.1585, -0.1564]) tensor([0.5710, 0.1523, 0.1382, 0.1385]) -Greedy action tensor([ 1.0485, -0.0103, -0.1947, -0.1410]) tensor([0.5156, 0.1788, 0.1487, 0.1569]) -Greedy action tensor([ 1.3990, -0.0400, -0.1434, -0.0776]) tensor([0.5954, 0.1412, 0.1273, 0.1360]) -Greedy action tensor([ 1.5393, -0.3652, -0.3104, -0.1682]) tensor([0.6723, 0.1001, 0.1057, 0.1219]) -Greedy action tensor([ 1.1653, -0.5193, -0.1730, 0.1665]) tensor([0.5506, 0.1022, 0.1444, 0.2028]) -Greedy action tensor([ 1.2692, -0.4699, -0.0879, 0.2189]) tensor([0.5609, 0.0985, 0.1444, 0.1962]) -Greedy action tensor([ 1.2869, -0.5970, -0.5006, -0.1810]) tensor([0.6453, 0.0981, 0.1080, 0.1487]) -Greedy action tensor([ 1.4316, -0.1131, -0.2376, -0.1203]) tensor([0.6197, 0.1322, 0.1168, 0.1313]) -Greedy action tensor([ 1.1947, -0.5634, -0.3978, 0.3631]) tensor([0.5521, 0.0952, 0.1123, 0.2404]) -Greedy action tensor([ 0.7838, -0.4684, -0.1629, 0.1646]) tensor([0.4520, 0.1292, 0.1754, 0.2434]) -Greedy action tensor([ 0.6563, -0.3299, -0.1435, 0.0531]) tensor([0.4220, 0.1574, 0.1897, 0.2309]) -Greedy action tensor([ 1.7759, -0.6094, -0.2596, 0.2042]) tensor([0.6991, 0.0644, 0.0913, 0.1452]) -Greedy action tensor([ 1.0097, -0.5163, 0.0735, -0.2611]) tensor([0.5291, 0.1150, 0.2075, 0.1485]) -Greedy action tensor([ 0.9034, -0.4636, -0.1509, -0.1311]) tensor([0.5105, 0.1301, 0.1779, 0.1815]) -Greedy action tensor([ 0.6799, -0.5575, -0.1145, 0.0950]) tensor([0.4350, 0.1262, 0.1965, 0.2423]) -Greedy action tensor([ 0.8936, 0.0061, -0.0031, 0.0502]) tensor([0.4445, 0.1830, 0.1813, 0.1912]) -Greedy action tensor([ 0.7337, -0.4210, -0.0545, -0.1110]) tensor([0.4546, 0.1433, 0.2067, 0.1954]) -Greedy action tensor([ 1.3521, -0.6400, -0.0969, 0.1032]) tensor([0.6031, 0.0823, 0.1416, 0.1730]) -Greedy action tensor([ 1.1086, -0.4915, -0.2363, -0.1792]) tensor([0.5753, 0.1161, 0.1499, 0.1587]) -Greedy action tensor([ 0.5803, -0.3289, 0.1337, -0.1933]) tensor([0.3994, 0.1609, 0.2555, 0.1842]) -Greedy action tensor([ 0.9625, -0.4116, -0.1075, -0.2401]) tensor([0.5273, 0.1334, 0.1809, 0.1584]) -Greedy action tensor([ 1.4129, -0.0650, -0.1593, -0.1753]) tensor([0.6098, 0.1391, 0.1266, 0.1246]) -Greedy action tensor([ 1.0607, -0.5285, -0.2848, -0.0254]) tensor([0.5549, 0.1133, 0.1445, 0.1873]) -Greedy action tensor([ 0.8301, -0.3889, -0.3494, 0.3771]) tensor([0.4467, 0.1320, 0.1373, 0.2840]) -Greedy action tensor([ 1.0196, -0.4700, -0.2569, 0.1247]) tensor([0.5227, 0.1178, 0.1458, 0.2136]) -Greedy action tensor([ 0.7411, 0.0868, -0.1760, -0.0491]) tensor([0.4214, 0.2190, 0.1684, 0.1912]) -Greedy action tensor([ 0.7886, -0.5823, -0.0463, -0.1389]) tensor([0.4800, 0.1219, 0.2083, 0.1899]) -Greedy action tensor([ 0.7704, -0.3557, -0.1388, -0.1435]) tensor([0.4699, 0.1524, 0.1893, 0.1884]) -Greedy action tensor([ 1.5270, -0.5289, -0.1960, -0.0687]) tensor([0.6626, 0.0848, 0.1183, 0.1343]) -Greedy action tensor([ 0.9253, -0.2319, 0.1145, -0.1798]) tensor([0.4785, 0.1504, 0.2127, 0.1584]) -Greedy action tensor([ 1.8556, -0.6173, -0.2151, 0.1131]) tensor([0.7218, 0.0609, 0.0910, 0.1264]) -Greedy action tensor([ 1.0090, -0.5188, -0.2276, -0.1790]) tensor([0.5518, 0.1197, 0.1602, 0.1682]) -Greedy action tensor([ 1.3191, -0.1817, -0.2029, -0.1809]) tensor([0.6008, 0.1340, 0.1311, 0.1341]) -Greedy action tensor([ 1.1735, -0.5993, -0.0668, 0.3935]) tensor([0.5215, 0.0886, 0.1509, 0.2391]) -Greedy action tensor([ 1.1906, -0.7320, -0.5383, 0.1081]) tensor([0.6015, 0.0880, 0.1068, 0.2038]) -Greedy action tensor([ 0.8488, -0.5546, 0.0971, 0.0405]) tensor([0.4623, 0.1136, 0.2180, 0.2060]) -Greedy action tensor([ 0.8541, -0.0702, 0.0388, -0.0360]) tensor([0.4445, 0.1764, 0.1967, 0.1825]) -Greedy action tensor([ 0.8791, -0.4845, -0.4158, 0.2811]) tensor([0.4809, 0.1230, 0.1317, 0.2644]) -Greedy action tensor([ 0.8405, -0.6582, -0.1605, 0.0170]) tensor([0.4927, 0.1101, 0.1811, 0.2162]) -Greedy action tensor([ 0.9742, -0.3479, -0.2593, -0.1361]) tensor([0.5299, 0.1412, 0.1543, 0.1746]) -Greedy action tensor([ 0.9194, -0.3292, -0.1625, -0.1326]) tensor([0.5063, 0.1453, 0.1716, 0.1768]) -Greedy action tensor([ 1.1700, -0.4092, -0.0886, -0.1249]) tensor([0.5668, 0.1169, 0.1610, 0.1553]) -Greedy action tensor([ 0.9580, -0.4692, -0.1610, 0.0577]) tensor([0.5068, 0.1216, 0.1655, 0.2060]) -Greedy action tensor([ 1.1059, -0.1808, -0.1782, -0.1938]) tensor([0.5477, 0.1513, 0.1517, 0.1493]) -Greedy action tensor([ 0.7389, -0.5387, -0.2316, 0.1233]) tensor([0.4550, 0.1268, 0.1724, 0.2458]) -Greedy action tensor([ 0.4661, -0.4010, 0.0159, -0.0176]) tensor([0.3739, 0.1571, 0.2384, 0.2305]) -Greedy action tensor([ 0.8731, -0.6241, -0.0844, 0.1109]) tensor([0.4821, 0.1079, 0.1851, 0.2250]) -Greedy action tensor([ 0.7966, -0.5001, -0.3677, -0.1228]) tensor([0.5040, 0.1378, 0.1573, 0.2010]) -Greedy action tensor([ 2.0228, -0.7047, -0.4607, 0.5166]) tensor([0.7296, 0.0477, 0.0609, 0.1618]) -Greedy action tensor([ 1.2911, -0.4736, -0.2817, -0.1879]) tensor([0.6225, 0.1066, 0.1291, 0.1418]) -Greedy action tensor([ 1.5354, -0.2568, -0.4165, -0.1511]) tensor([0.6695, 0.1115, 0.0951, 0.1240]) -Greedy action tensor([ 1.2942, -0.5418, -0.0468, 0.1038]) tensor([0.5797, 0.0924, 0.1516, 0.1763]) -Greedy action tensor([ 0.9966, -0.2365, -0.1418, -0.2067]) tensor([0.5230, 0.1524, 0.1676, 0.1570]) -Greedy action tensor([ 0.9894, -0.4380, -0.2731, -0.0510]) tensor([0.5330, 0.1279, 0.1508, 0.1883]) -Greedy action tensor([ 1.0510, -0.4712, -0.2997, 0.0741]) tensor([0.5394, 0.1177, 0.1397, 0.2031]) -Greedy action tensor([ 0.6752, -0.3288, -0.0996, 0.0742]) tensor([0.4210, 0.1543, 0.1940, 0.2308]) -Greedy action tensor([ 0.9497, -0.8746, -0.5700, 0.2777]) tensor([0.5289, 0.0853, 0.1157, 0.2701]) -Greedy action tensor([ 1.3918, -0.7142, -0.2551, 0.1514]) tensor([0.6236, 0.0759, 0.1201, 0.1804]) -Greedy action tensor([ 0.6965, -0.4330, -0.1400, -0.0875]) tensor([0.4519, 0.1461, 0.1958, 0.2063]) -Greedy action tensor([ 0.8463, -0.5817, -0.2150, 0.1103]) tensor([0.4843, 0.1161, 0.1676, 0.2320]) -Greedy action tensor([ 1.1743, -0.3895, -0.2733, -0.0954]) tensor([0.5796, 0.1213, 0.1363, 0.1628]) -Greedy action tensor([ 1.1769, -0.5387, -0.3615, -0.0707]) tensor([0.5946, 0.1069, 0.1277, 0.1708]) -Greedy action tensor([ 0.8292, -0.5780, -0.5283, 0.0845]) tensor([0.5058, 0.1238, 0.1302, 0.2402]) -Greedy action tensor([ 1.2344, -0.2040, -0.3442, -0.2037]) tensor([0.5949, 0.1412, 0.1227, 0.1412]) -Greedy action tensor([ 0.6817, -0.3247, -0.0256, -0.1100]) tensor([0.4326, 0.1581, 0.2133, 0.1960]) -Greedy action tensor([1.0432, 0.1372, 0.7505, 1.2136]) tensor([0.2997, 0.1211, 0.2237, 0.3554]) -Greedy action tensor([ 1.1725, -0.4398, 0.7143, 0.9715]) tensor([0.3774, 0.0753, 0.2387, 0.3087]) -Greedy action tensor([ 1.1103, -0.7625, 0.5477, 0.8142]) tensor([0.4053, 0.0623, 0.2309, 0.3015]) -Greedy action tensor([ 1.1606, -0.2307, 0.6970, 0.5523]) tensor([0.4129, 0.1027, 0.2597, 0.2247]) -Greedy action tensor([ 1.7193, -0.5202, 0.7186, 2.0513]) tensor([0.3487, 0.0371, 0.1282, 0.4860]) -Greedy action tensor([ 1.0705, -0.7560, 0.3738, 0.5063]) tensor([0.4488, 0.0723, 0.2236, 0.2553]) -Greedy action tensor([ 1.1634, -0.7438, 0.1895, 1.1273]) tensor([0.4015, 0.0596, 0.1516, 0.3873]) -Greedy action tensor([ 2.0541, -0.3378, 0.7699, 1.1027]) tensor([0.5699, 0.0521, 0.1578, 0.2201]) -Greedy action tensor([ 1.4763, -0.6676, 0.4911, 1.3063]) tensor([0.4284, 0.0502, 0.1600, 0.3614]) -Greedy action tensor([ 1.2872, -0.6753, 1.0551, 0.5942]) tensor([0.4109, 0.0577, 0.3258, 0.2055]) -Greedy action tensor([ 1.1446, -0.2424, 1.1195, 1.2182]) tensor([0.3029, 0.0757, 0.2954, 0.3260]) -Greedy action tensor([ 1.6840, -0.2383, 0.8387, 1.8754]) tensor([0.3589, 0.0525, 0.1541, 0.4346]) -Greedy action tensor([ 0.9543, -0.0010, 0.5387, 0.2110]) tensor([0.3968, 0.1526, 0.2619, 0.1887]) -Greedy action tensor([ 1.9587, -0.3780, 0.2853, 0.7347]) tensor([0.6336, 0.0612, 0.1189, 0.1863]) -Greedy action tensor([ 2.1624, -0.9426, 0.5709, 1.5904]) tensor([0.5516, 0.0247, 0.1123, 0.3113]) -Greedy action tensor([ 1.8658, -0.3417, 1.1084, 2.0967]) tensor([0.3523, 0.0387, 0.1652, 0.4438]) -Greedy action tensor([ 1.9279, -0.6005, 0.9820, 1.7433]) tensor([0.4349, 0.0347, 0.1689, 0.3616]) -Greedy action tensor([ 1.3489, -0.7447, 1.1273, 1.2091]) tensor([0.3579, 0.0441, 0.2868, 0.3112]) -Greedy action tensor([ 2.3624, -0.2620, 1.5270, 0.8747]) tensor([0.5774, 0.0418, 0.2504, 0.1304]) -Greedy action tensor([ 1.9799, -0.7170, 1.5253, 1.2016]) tensor([0.4627, 0.0312, 0.2937, 0.2125]) -Greedy action tensor([2.0024, 0.5307, 0.4525, 1.2333]) tensor([0.5249, 0.1205, 0.1114, 0.2432]) -Greedy action tensor([ 2.0435, -0.6745, 0.6374, 1.4319]) tensor([0.5395, 0.0356, 0.1322, 0.2927]) -Greedy action tensor([ 1.8314, -0.8067, -0.2547, 0.7650]) tensor([0.6494, 0.0464, 0.0806, 0.2235]) -Greedy action tensor([ 0.1651, -0.8022, 0.2073, 0.9924]) tensor([0.2123, 0.0807, 0.2214, 0.4855]) -Greedy action tensor([1.2681, 0.4441, 1.5428, 0.3985]) tensor([0.3151, 0.1382, 0.4147, 0.1321]) -Greedy action tensor([ 1.1740, -0.5429, 1.4102, 1.0678]) tensor([0.2989, 0.0537, 0.3786, 0.2688]) -Greedy action tensor([ 1.3759, -0.1426, -0.3488, -0.0600]) tensor([0.6116, 0.1340, 0.1090, 0.1455]) -Greedy action tensor([ 1.7615, -0.1205, 0.3554, -0.0326]) tensor([0.6395, 0.0974, 0.1567, 0.1063]) -Greedy action tensor([1.4576, 0.0517, 1.2794, 1.1731]) tensor([0.3528, 0.0865, 0.2952, 0.2654]) -Greedy action tensor([ 1.6826, -0.4561, 1.4871, 1.0289]) tensor([0.4064, 0.0479, 0.3343, 0.2114]) -Greedy action tensor([ 1.3561, -0.4184, 0.9660, 1.6412]) tensor([0.3148, 0.0534, 0.2131, 0.4187]) -Greedy action tensor([ 0.7686, -0.6790, 1.5961, 1.0549]) tensor([0.2060, 0.0484, 0.4713, 0.2743]) -Greedy action tensor([ 1.5035, -0.0450, -0.5337, -0.7295]) tensor([0.6896, 0.1466, 0.0899, 0.0739]) -Greedy action tensor([1.4414, 0.3683, 0.8179, 1.1494]) tensor([0.3810, 0.1303, 0.2042, 0.2845]) -Greedy action tensor([ 1.1914, -0.2888, 0.7070, 0.5617]) tensor([0.4208, 0.0958, 0.2593, 0.2242]) -Greedy action tensor([ 1.9392, -0.3370, 0.2643, 1.2571]) tensor([0.5569, 0.0572, 0.1043, 0.2816]) -Greedy action tensor([1.4027, 0.2718, 0.2994, 0.4600]) tensor([0.4892, 0.1579, 0.1623, 0.1906]) -Greedy action tensor([ 1.5032, -1.6294, 1.0671, 0.3040]) tensor([0.5021, 0.0219, 0.3246, 0.1514]) -Greedy action tensor([ 1.0381, -0.8947, 1.0505, 0.8764]) tensor([0.3325, 0.0481, 0.3366, 0.2828]) -Greedy action tensor([ 1.5585, -0.5520, 1.3964, 1.2494]) tensor([0.3696, 0.0448, 0.3143, 0.2713]) -Greedy action tensor([ 2.0797, -0.5345, 0.4552, 1.6037]) tensor([0.5287, 0.0387, 0.1042, 0.3284]) -Greedy action tensor([ 1.6328, -0.3129, 0.0683, 0.7791]) tensor([0.5624, 0.0804, 0.1177, 0.2395]) -Greedy action tensor([1.1328, 0.1211, 0.7267, 0.9035]) tensor([0.3540, 0.1287, 0.2358, 0.2815]) -Greedy action tensor([ 2.5282, -0.3859, 0.4177, 1.7327]) tensor([0.6147, 0.0334, 0.0745, 0.2775]) -Greedy action tensor([ 1.7055, -0.5353, 0.3872, 1.1975]) tensor([0.5062, 0.0538, 0.1354, 0.3045]) -Greedy action tensor([ 1.1693, -0.2053, -0.6808, 0.5574]) tensor([0.5122, 0.1296, 0.0805, 0.2778]) -Greedy action tensor([1.5506, 0.2311, 1.5054, 1.0205]) tensor([0.3557, 0.0951, 0.3399, 0.2093]) -Greedy action tensor([ 2.1331, -1.2696, 1.3071, 1.2994]) tensor([0.5248, 0.0175, 0.2298, 0.2280]) -Greedy action tensor([ 1.4115, -0.4144, 0.1017, 1.0050]) tensor([0.4769, 0.0768, 0.1287, 0.3176]) -Greedy action tensor([ 1.7205, -1.2021, 0.9417, 1.8109]) tensor([0.3835, 0.0206, 0.1760, 0.4198]) -Greedy action tensor([1.6332, 0.4734, 1.0981, 1.9317]) tensor([0.3080, 0.0966, 0.1803, 0.4151]) -Greedy action tensor([ 1.4574, -0.2461, 1.0478, 0.3940]) tensor([0.4564, 0.0831, 0.3030, 0.1576]) -Greedy action tensor([ 1.8898, -0.4824, 1.1595, 1.3257]) tensor([0.4664, 0.0435, 0.2247, 0.2653]) -Greedy action tensor([ 1.7893, -0.0455, 0.8322, 0.8310]) tensor([0.5189, 0.0828, 0.1992, 0.1990]) -Greedy action tensor([ 1.4491, -0.3511, 0.3554, 0.8216]) tensor([0.4916, 0.0812, 0.1647, 0.2625]) -Greedy action tensor([ 1.3934, -0.0064, -0.2915, 0.2654]) tensor([0.5695, 0.1405, 0.1056, 0.1844]) -Greedy action tensor([ 1.3107, -0.3763, 1.1789, 0.9511]) tensor([0.3624, 0.0671, 0.3176, 0.2529]) -Greedy action tensor([ 2.3147, -0.1723, 0.6649, 1.6425]) tensor([0.5600, 0.0466, 0.1076, 0.2859]) -Greedy action tensor([ 0.7694, -0.6860, 0.7942, 0.3637]) tensor([0.3419, 0.0798, 0.3505, 0.2279]) -Greedy action tensor([1.4570, 0.5005, 1.2818, 1.0522]) tensor([0.3459, 0.1329, 0.2904, 0.2308]) -Greedy action tensor([ 2.2745, -0.5526, 1.4165, 0.8811]) tensor([0.5776, 0.0342, 0.2449, 0.1434]) -Greedy action tensor([ 1.7137, -0.5805, -0.0313, 0.4376]) tensor([0.6432, 0.0649, 0.1123, 0.1795]) -Greedy action tensor([ 0.8866, -0.3280, 0.6715, 1.2583]) tensor([0.2814, 0.0835, 0.2269, 0.4081]) -Greedy action tensor([ 1.3725, 0.6340, -0.3685, 0.7002]) tensor([0.4622, 0.2208, 0.0810, 0.2360]) -Greedy action tensor([ 1.6425, -0.4845, 0.9298, 0.9810]) tensor([0.4705, 0.0561, 0.2307, 0.2428]) -Greedy action tensor([ 1.0992, -1.2022, 0.3945, 1.2194]) tensor([0.3674, 0.0368, 0.1816, 0.4143]) -Greedy action tensor([ 1.6848, -1.3139, 0.2163, 1.5173]) tensor([0.4704, 0.0235, 0.1083, 0.3979]) -Greedy action tensor([ 1.6721, -0.3499, 1.1054, 1.6170]) tensor([0.3779, 0.0500, 0.2144, 0.3577]) -Greedy action tensor([ 0.9783, -0.5088, 0.4726, 1.0623]) tensor([0.3428, 0.0775, 0.2068, 0.3729]) -Greedy action tensor([ 1.0570, -0.1714, 1.3247, 0.7568]) tensor([0.2994, 0.0876, 0.3913, 0.2217]) -Greedy action tensor([ 2.0668, -1.1207, -0.0794, 0.9091]) tensor([0.6792, 0.0280, 0.0794, 0.2134]) -Greedy action tensor([ 1.3021, -0.2178, 1.2637, 0.6425]) tensor([0.3706, 0.0811, 0.3567, 0.1916]) -Greedy action tensor([ 1.9744, -0.1103, 0.3436, 1.3777]) tensor([0.5345, 0.0665, 0.1047, 0.2944]) -Greedy action tensor([ 1.6704, -0.4547, 1.4056, 1.0569]) tensor([0.4118, 0.0492, 0.3160, 0.2230]) -Greedy action tensor([ 0.9473, 0.0069, -0.7229, 1.5594]) tensor([0.2921, 0.1141, 0.0550, 0.5388]) -Greedy action tensor([ 1.7860, 0.5029, -0.1201, 0.2992]) tensor([0.6054, 0.1678, 0.0900, 0.1369]) -Greedy action tensor([ 1.2004, -0.7040, -0.2909, 0.7342]) tensor([0.4997, 0.0744, 0.1125, 0.3135]) -Greedy action tensor([1.6584, 0.2544, 0.4850, 1.0377]) tensor([0.4779, 0.1174, 0.1478, 0.2569]) -Greedy action tensor([ 1.4223, -0.6165, 0.2650, 1.3200]) tensor([0.4260, 0.0555, 0.1339, 0.3846]) -Greedy action tensor([ 0.8084, -0.5978, 1.0750, 1.1243]) tensor([0.2550, 0.0625, 0.3328, 0.3497]) -Greedy action tensor([1.4367, 0.3939, 0.3405, 0.2011]) tensor([0.5057, 0.1783, 0.1690, 0.1470]) -Greedy action tensor([ 1.7462, -0.6366, 0.2773, 1.1793]) tensor([0.5292, 0.0488, 0.1218, 0.3002]) -Greedy action tensor([-1.0766, -0.6401, 0.4035, 0.3627]) tensor([0.0896, 0.1387, 0.3937, 0.3780]) -Greedy action tensor([-2.0062, -0.9422, 0.3357, -0.2213]) tensor([0.0494, 0.1430, 0.5134, 0.2942]) -Greedy action tensor([-1.9431, -0.9774, 0.2931, -0.2872]) tensor([0.0549, 0.1442, 0.5135, 0.2874]) -Greedy action tensor([-1.7090, -0.7069, 0.2270, -0.4623]) tensor([0.0708, 0.1927, 0.4904, 0.2461]) -Greedy action tensor([-1.7438, -0.8374, 0.0369, -0.3915]) tensor([0.0753, 0.1865, 0.4470, 0.2912]) -Greedy action tensor([-1.7243, -0.8142, 0.2814, -0.4990]) tensor([0.0698, 0.1735, 0.5189, 0.2378]) -Greedy action tensor([-1.6632, -0.5985, 0.4041, -0.0656]) tensor([0.0597, 0.1732, 0.4720, 0.2951]) -Greedy action tensor([-1.6417, -0.5181, 0.5784, -0.0337]) tensor([0.0547, 0.1683, 0.5038, 0.2732]) -Greedy action tensor([-1.4069, -0.5865, 0.3804, 0.1294]) tensor([0.0720, 0.1635, 0.4300, 0.3345]) -Greedy action tensor([-0.5956, 0.3093, -0.1339, -0.0168]) tensor([0.1462, 0.3612, 0.2319, 0.2607]) -Greedy action tensor([-1.9798, -0.8982, 0.4161, -0.2861]) tensor([0.0491, 0.1448, 0.5390, 0.2671]) -Greedy action tensor([-1.9182, -0.8633, 0.1185, -0.3741]) tensor([0.0616, 0.1770, 0.4726, 0.2887]) -Greedy action tensor([-1.9988, -0.9441, 0.3510, -0.1709]) tensor([0.0486, 0.1395, 0.5095, 0.3023]) -Greedy action tensor([-1.1156, -0.2908, -0.3515, -0.2145]) tensor([0.1267, 0.2891, 0.2721, 0.3120]) -Greedy action tensor([-0.7273, -0.6547, 0.3852, 0.1552]) tensor([0.1327, 0.1427, 0.4038, 0.3208]) -Greedy action tensor([-1.7872, -0.5487, 0.8814, 0.1777]) tensor([0.0385, 0.1327, 0.5545, 0.2743]) -Greedy action tensor([-1.5441, 0.1331, 0.6699, -0.5687]) tensor([0.0551, 0.2947, 0.5041, 0.1461]) -Greedy action tensor([-1.4549, -0.6402, -0.1291, -0.3669]) tensor([0.1001, 0.2260, 0.3768, 0.2971]) -Greedy action tensor([-1.0211, -0.2633, 0.2200, -0.2567]) tensor([0.1144, 0.2441, 0.3958, 0.2457]) -Greedy action tensor([-1.2482, -0.2452, 0.0864, -0.3974]) tensor([0.1014, 0.2763, 0.3850, 0.2373]) -Greedy action tensor([-1.6582, -0.4902, 0.4703, -0.0149]) tensor([0.0562, 0.1808, 0.4723, 0.2907]) -Greedy action tensor([-1.2982, -0.5778, 0.3649, 0.1551]) tensor([0.0793, 0.1630, 0.4184, 0.3392]) -Greedy action tensor([-1.4858, -0.1064, 0.7107, -0.6127]) tensor([0.0611, 0.2428, 0.5497, 0.1463]) -Greedy action tensor([-1.1617, 0.0647, 0.5754, -0.5853]) tensor([0.0843, 0.2872, 0.4786, 0.1499]) -Greedy action tensor([-1.3691, -0.6292, 0.4778, -0.1026]) tensor([0.0770, 0.1614, 0.4883, 0.2733]) -Greedy action tensor([-1.9381, -0.7219, 0.3057, -0.2291]) tensor([0.0517, 0.1746, 0.4879, 0.2858]) -Greedy action tensor([-2.0608, -0.8653, 0.6406, -0.0136]) tensor([0.0371, 0.1226, 0.5529, 0.2874]) -Greedy action tensor([-0.9147, -0.1437, 0.3734, -0.2632]) tensor([0.1149, 0.2483, 0.4165, 0.2204]) -Greedy action tensor([-1.0824, -0.6300, 0.5824, 0.0308]) tensor([0.0917, 0.1442, 0.4848, 0.2793]) -Greedy action tensor([-1.2314, -0.6621, 0.5525, 0.5065]) tensor([0.0694, 0.1227, 0.4132, 0.3947]) -Greedy action tensor([-1.7286, -0.9782, 0.1078, -0.7690]) tensor([0.0833, 0.1765, 0.5227, 0.2175]) -Greedy action tensor([-1.0142, 0.2199, 0.0831, -0.2773]) tensor([0.1050, 0.3608, 0.3147, 0.2195]) -Greedy action tensor([-1.9525, -0.7974, 0.3160, -0.1549]) tensor([0.0503, 0.1597, 0.4863, 0.3037]) -Greedy action tensor([-0.7352, -0.6059, 0.3431, -0.1356]) tensor([0.1450, 0.1650, 0.4261, 0.2640]) -Greedy action tensor([-1.7274, -0.5975, 0.8088, 0.1242]) tensor([0.0433, 0.1340, 0.5469, 0.2758]) -Greedy action tensor([-2.0514, -0.8472, 0.6701, -0.0263]) tensor([0.0369, 0.1230, 0.5607, 0.2794]) -Greedy action tensor([-2.0419, -0.7688, 0.9493, 0.1352]) tensor([0.0300, 0.1073, 0.5978, 0.2649]) -Greedy action tensor([-0.9068, 0.3680, 0.3565, -0.3935]) tensor([0.1022, 0.3656, 0.3615, 0.1707]) -Greedy action tensor([-1.6932, -0.5068, 0.5991, -0.0526]) tensor([0.0517, 0.1694, 0.5120, 0.2668]) -Greedy action tensor([-0.9405, -0.1231, 0.0484, -0.1720]) tensor([0.1233, 0.2792, 0.3315, 0.2659]) -Greedy action tensor([-2.0144, -0.9645, 0.3730, -0.2361]) tensor([0.0484, 0.1383, 0.5268, 0.2865]) -Greedy action tensor([-1.0298, -0.1192, -0.3416, -0.2453]) tensor([0.1304, 0.3242, 0.2596, 0.2858]) -Greedy action tensor([-1.9920, -0.5715, 0.7448, -0.0148]) tensor([0.0360, 0.1489, 0.5553, 0.2598]) -Greedy action tensor([-2.0081, -0.6625, 0.8248, 0.0961]) tensor([0.0333, 0.1279, 0.5658, 0.2730]) -Greedy action tensor([-0.7071, -0.3416, 0.3105, -0.1523]) tensor([0.1439, 0.2074, 0.3981, 0.2506]) -Greedy action tensor([-1.2146, -0.3204, 0.5293, -0.2101]) tensor([0.0841, 0.2056, 0.4808, 0.2295]) -Greedy action tensor([-0.4556, 0.1073, 0.1388, 0.0501]) tensor([0.1606, 0.2820, 0.2910, 0.2663]) -Greedy action tensor([-1.0140, -0.6094, 0.3359, 0.2974]) tensor([0.0993, 0.1489, 0.3831, 0.3687]) -Greedy action tensor([-1.9853, -0.6990, 0.5023, -0.0084]) tensor([0.0419, 0.1516, 0.5040, 0.3025]) -Greedy action tensor([-1.2404, 0.3187, 0.4580, -0.4131]) tensor([0.0740, 0.3520, 0.4046, 0.1693]) -Greedy action tensor([-1.8228, -0.9161, 0.2206, -0.3541]) tensor([0.0644, 0.1594, 0.4967, 0.2796]) -Greedy action tensor([-1.7201, -0.7137, 0.1581, -0.0969]) tensor([0.0652, 0.1783, 0.4263, 0.3303]) -Greedy action tensor([-1.5208, 0.0628, 0.4813, -0.1927]) tensor([0.0586, 0.2858, 0.4343, 0.2213]) -Greedy action tensor([-2.0408, -0.8616, 0.5475, -0.1528]) tensor([0.0414, 0.1346, 0.5507, 0.2734]) -Greedy action tensor([-1.8492, -0.9820, 0.1857, -0.4335]) tensor([0.0660, 0.1571, 0.5050, 0.2719]) -Greedy action tensor([-2.0517, -0.9299, 0.6552, 0.0284]) tensor([0.0370, 0.1135, 0.5537, 0.2959]) -Greedy action tensor([-2.0509, -0.8336, 0.7684, 0.0554]) tensor([0.0341, 0.1151, 0.5710, 0.2799]) -Greedy action tensor([-1.5433, -0.5169, 0.7269, -0.2689]) tensor([0.0587, 0.1637, 0.5679, 0.2098]) -Greedy action tensor([-2.0033, -0.9313, 0.9918, 0.3562]) tensor([0.0290, 0.0847, 0.5794, 0.3069]) -Greedy action tensor([-2.0391, -0.7126, 0.8407, 0.1499]) tensor([0.0317, 0.1196, 0.5653, 0.2833]) -Greedy action tensor([-1.0510, -0.6105, 0.3527, 0.4063]) tensor([0.0916, 0.1423, 0.3728, 0.3933]) -Greedy action tensor([-1.9968, -0.8132, 0.3674, -0.0887]) tensor([0.0462, 0.1509, 0.4914, 0.3115]) -Greedy action tensor([-1.6219, -0.9154, 0.0423, -0.4965]) tensor([0.0878, 0.1780, 0.4637, 0.2705]) -Greedy action tensor([-1.0676, -0.5762, 0.2894, 0.3729]) tensor([0.0931, 0.1522, 0.3616, 0.3931]) -Greedy action tensor([-0.2846, -0.2374, 0.1634, 0.1349]) tensor([0.1947, 0.2042, 0.3048, 0.2963]) -Greedy action tensor([-1.0559, -0.5920, 0.3865, 0.0378]) tensor([0.1020, 0.1622, 0.4314, 0.3044]) -Greedy action tensor([-1.2774, -0.5964, 0.3280, 0.1611]) tensor([0.0822, 0.1624, 0.4092, 0.3463]) -Greedy action tensor([-1.2386, 0.0382, 0.3368, -0.1668]) tensor([0.0810, 0.2906, 0.3917, 0.2367]) -Greedy action tensor([-1.4660, -0.2009, 0.0159, -0.5219]) tensor([0.0868, 0.3077, 0.3822, 0.2232]) -Greedy action tensor([-1.9804, -0.9838, 0.4468, -0.2297]) tensor([0.0481, 0.1303, 0.5447, 0.2769]) -Greedy action tensor([-1.6823, -0.8187, -0.0214, -0.4258]) tensor([0.0823, 0.1952, 0.4333, 0.2892]) -Greedy action tensor([-2.0075, -0.8329, 0.3403, -0.1303]) tensor([0.0471, 0.1524, 0.4927, 0.3078]) -Greedy action tensor([-1.7075, -0.8042, 0.1627, -0.3817]) tensor([0.0729, 0.1798, 0.4729, 0.2744]) -Greedy action tensor([-1.5165, -0.5630, 0.4390, 0.0910]) tensor([0.0639, 0.1658, 0.4515, 0.3188]) -Greedy action tensor([-2.0355, -0.9224, 0.5678, -0.1176]) tensor([0.0411, 0.1249, 0.5546, 0.2794]) -Greedy action tensor([-1.7645, -1.0545, 0.1557, -0.6196]) tensor([0.0769, 0.1565, 0.5249, 0.2417]) -Greedy action tensor([-1.9540, -0.5946, 0.9399, 0.0947]) tensor([0.0326, 0.1268, 0.5881, 0.2526]) -Greedy action tensor([-1.2880, -0.5993, 0.5703, 0.3479]) tensor([0.0688, 0.1370, 0.4411, 0.3532]) -Greedy action tensor([-1.8617, -0.5375, 0.3030, -0.2783]) tensor([0.0545, 0.2049, 0.4750, 0.2656]) -Greedy action tensor([-1.8182, -0.6868, 0.5649, -0.0770]) tensor([0.0484, 0.1502, 0.5251, 0.2763]) -Greedy action tensor([-2.0463, -0.7929, 0.8926, 0.1049]) tensor([0.0313, 0.1095, 0.5906, 0.2687]) -Greedy action tensor([ 0.2665, 0.0078, 0.2076, -0.1292]) tensor([0.2952, 0.2279, 0.2783, 0.1987]) -Greedy action tensor([ 0.1807, -0.0208, 0.1661, -0.2318]) tensor([0.2886, 0.2359, 0.2844, 0.1911]) -Greedy action tensor([ 0.8672, -0.5534, -0.0199, -0.6496]) tensor([0.5340, 0.1290, 0.2199, 0.1172]) -Greedy action tensor([ 0.4186, -0.0945, 0.0744, -0.4418]) tensor([0.3663, 0.2192, 0.2596, 0.1549]) -Greedy action tensor([ 0.6897, -0.2511, -0.0874, -0.6209]) tensor([0.4718, 0.1841, 0.2169, 0.1272]) -Greedy action tensor([ 0.4848, 0.1121, 0.1016, -0.1995]) tensor([0.3478, 0.2396, 0.2371, 0.1755]) -Greedy action tensor([ 0.6893, -0.0430, -0.1264, -0.3746]) tensor([0.4409, 0.2120, 0.1950, 0.1521]) -Greedy action tensor([ 0.5451, -0.1751, -0.0475, -0.2371]) tensor([0.4005, 0.1949, 0.2214, 0.1832]) -Greedy action tensor([ 0.7258, -0.3376, 0.1162, -0.5713]) tensor([0.4625, 0.1597, 0.2514, 0.1264]) -Greedy action tensor([ 0.3059, 0.1879, 0.1793, -0.3047]) tensor([0.3019, 0.2683, 0.2659, 0.1639]) -Greedy action tensor([ 0.2978, 0.1028, 0.1461, -0.3111]) tensor([0.3100, 0.2551, 0.2663, 0.1686]) -Greedy action tensor([ 0.2913, 0.0718, 0.1445, -0.2122]) tensor([0.3057, 0.2455, 0.2640, 0.1848]) -Greedy action tensor([ 0.3499, -0.0537, 0.1321, -0.3402]) tensor([0.3363, 0.2246, 0.2705, 0.1687]) -Greedy action tensor([ 0.7012, -0.3088, -0.0384, -0.6115]) tensor([0.4738, 0.1726, 0.2261, 0.1275]) -Greedy action tensor([ 0.3653, -0.1763, 0.0747, -0.3666]) tensor([0.3558, 0.2070, 0.2661, 0.1711]) -Greedy action tensor([ 0.4478, 0.0788, -0.0397, -0.1597]) tensor([0.3508, 0.2426, 0.2155, 0.1911]) -Greedy action tensor([ 0.4022, -0.0343, 0.1885, -0.1767]) tensor([0.3317, 0.2144, 0.2679, 0.1859]) -Greedy action tensor([ 0.5562, -0.0047, 0.0472, -0.3608]) tensor([0.3889, 0.2219, 0.2337, 0.1554]) -Greedy action tensor([ 0.5945, -0.0588, 0.0225, -0.3673]) tensor([0.4054, 0.2109, 0.2288, 0.1549]) -Greedy action tensor([ 0.3790, -0.0297, 0.2640, -0.1616]) tensor([0.3186, 0.2118, 0.2840, 0.1856]) -Greedy action tensor([ 0.3920, -0.0445, 0.1473, -0.2129]) tensor([0.3361, 0.2172, 0.2631, 0.1835]) -Greedy action tensor([ 0.8414, -0.4433, -0.0955, -0.5014]) tensor([0.5182, 0.1434, 0.2031, 0.1353]) -Greedy action tensor([ 0.5677, -0.0560, 0.0582, -0.2465]) tensor([0.3876, 0.2078, 0.2329, 0.1717]) -Greedy action tensor([ 0.9828, -0.4871, -0.1890, -0.6119]) tensor([0.5738, 0.1319, 0.1778, 0.1165]) -Greedy action tensor([ 0.3574, -0.0167, 0.0583, -0.3370]) tensor([0.3415, 0.2349, 0.2532, 0.1705]) -Greedy action tensor([ 0.1302, -0.0357, 0.2078, -0.0997]) tensor([0.2686, 0.2276, 0.2903, 0.2135]) -Greedy action tensor([ 0.6078, -0.0699, 0.0678, -0.4323]) tensor([0.4092, 0.2078, 0.2384, 0.1446]) -Greedy action tensor([0.4500, 0.2260, 0.0086, 0.0081]) tensor([0.3241, 0.2591, 0.2084, 0.2083]) -Greedy action tensor([ 0.4412, 0.0843, -0.0069, -0.0208]) tensor([0.3369, 0.2357, 0.2152, 0.2122]) -Greedy action tensor([ 0.3446, -0.0878, 0.0870, -0.4575]) tensor([0.3484, 0.2261, 0.2693, 0.1562]) -Greedy action tensor([ 0.4623, -0.0677, 0.1096, -0.2880]) tensor([0.3618, 0.2130, 0.2543, 0.1709]) -Greedy action tensor([ 0.2412, 0.0095, 0.1269, -0.3255]) tensor([0.3075, 0.2439, 0.2742, 0.1744]) -Greedy action tensor([ 0.9862, -0.4988, -0.0837, -0.6815]) tensor([0.5687, 0.1288, 0.1951, 0.1073]) -Greedy action tensor([0.2599, 0.0965, 0.0902, 0.0340]) tensor([0.2864, 0.2433, 0.2418, 0.2285]) -Greedy action tensor([ 0.2193, 0.0935, 0.1971, -0.1704]) tensor([0.2827, 0.2493, 0.2765, 0.1915]) -Greedy action tensor([ 0.2934, 0.0746, 0.2149, -0.0399]) tensor([0.2903, 0.2333, 0.2684, 0.2080]) -Greedy action tensor([ 0.5934, -0.0643, 0.0970, -0.2849]) tensor([0.3934, 0.2038, 0.2394, 0.1634]) -Greedy action tensor([ 0.4029, 0.0590, -0.0507, -0.0016]) tensor([0.3320, 0.2354, 0.2110, 0.2216]) -Greedy action tensor([ 0.4193, -0.2644, 0.1245, -0.3298]) tensor([0.3673, 0.1854, 0.2736, 0.1737]) -Greedy action tensor([ 0.2478, -0.1044, 0.1480, -0.2174]) tensor([0.3090, 0.2173, 0.2797, 0.1941]) -Greedy action tensor([ 0.3632, -0.0469, 0.1125, -0.2218]) tensor([0.3334, 0.2213, 0.2595, 0.1858]) -Greedy action tensor([ 0.3677, 0.1765, 0.1894, -0.0724]) tensor([0.3024, 0.2498, 0.2530, 0.1948]) -Greedy action tensor([ 0.2583, 0.0424, 0.0807, -0.2179]) tensor([0.3063, 0.2469, 0.2565, 0.1903]) -Greedy action tensor([ 0.9553, -0.5287, -0.0204, -0.6675]) tensor([0.5553, 0.1259, 0.2093, 0.1096]) -Greedy action tensor([ 0.3931, 0.0845, 0.1222, -0.2677]) tensor([0.3318, 0.2437, 0.2531, 0.1714]) -Greedy action tensor([ 1.0642, -0.5291, -0.0561, -0.6412]) tensor([0.5844, 0.1188, 0.1906, 0.1062]) -Greedy action tensor([ 0.2448, 0.0880, 0.0885, -0.0178]) tensor([0.2874, 0.2457, 0.2458, 0.2210]) -Greedy action tensor([ 0.3673, -0.0627, 0.1762, -0.1888]) tensor([0.3279, 0.2133, 0.2708, 0.1880]) -Greedy action tensor([ 0.5101, -0.1685, 0.2243, -0.4539]) tensor([0.3788, 0.1922, 0.2846, 0.1444]) -Greedy action tensor([ 0.3661, -0.1693, 0.0131, -0.2793]) tensor([0.3556, 0.2082, 0.2498, 0.1865]) -Greedy action tensor([ 0.3743, 0.0686, 0.2358, -0.2616]) tensor([0.3188, 0.2348, 0.2776, 0.1688]) -Greedy action tensor([ 0.4278, 0.1269, 0.0469, -0.3146]) tensor([0.3449, 0.2553, 0.2357, 0.1642]) -Greedy action tensor([ 0.2180, -0.0280, 0.1412, -0.0604]) tensor([0.2886, 0.2257, 0.2673, 0.2185]) -Greedy action tensor([ 0.3706, 0.0221, 0.1745, -0.3909]) tensor([0.3339, 0.2357, 0.2745, 0.1559]) -Greedy action tensor([ 0.3315, 0.0911, 0.1404, -0.1071]) tensor([0.3070, 0.2414, 0.2536, 0.1980]) -Greedy action tensor([ 0.2407, 0.0371, 0.2088, -0.0559]) tensor([0.2835, 0.2313, 0.2746, 0.2107]) -Greedy action tensor([ 0.4300, 0.1058, 0.1562, -0.1847]) tensor([0.3306, 0.2391, 0.2515, 0.1788]) -Greedy action tensor([ 0.3862, -0.0528, 0.1319, -0.3483]) tensor([0.3448, 0.2223, 0.2674, 0.1654]) -Greedy action tensor([ 0.3850, -0.0177, 0.1665, -0.1781]) tensor([0.3288, 0.2198, 0.2642, 0.1872]) -Greedy action tensor([ 0.7776, -0.2696, -0.0601, -0.6627]) tensor([0.4949, 0.1737, 0.2142, 0.1172]) -Greedy action tensor([ 0.1991, 0.0177, 0.1401, -0.2675]) tensor([0.2938, 0.2450, 0.2769, 0.1842]) -Greedy action tensor([ 0.4198, -0.0925, 0.2341, -0.4375]) tensor([0.3504, 0.2099, 0.2910, 0.1487]) -Greedy action tensor([ 0.4534, -0.0248, -0.0669, -0.2554]) tensor([0.3695, 0.2290, 0.2196, 0.1819]) -Greedy action tensor([ 1.0910, -0.7383, 0.1029, -0.8352]) tensor([0.5958, 0.0956, 0.2218, 0.0868]) -Greedy action tensor([0.3545, 0.1866, 0.0174, 0.0295]) tensor([0.3047, 0.2576, 0.2175, 0.2202]) -Greedy action tensor([ 0.6813, -0.2734, 0.0091, -0.3782]) tensor([0.4460, 0.1717, 0.2277, 0.1546]) -Greedy action tensor([ 0.5765, 0.0185, 0.0718, -0.4119]) tensor([0.3924, 0.2246, 0.2369, 0.1461]) -Greedy action tensor([ 0.5555, 0.0669, 0.0849, -0.3215]) tensor([0.3768, 0.2311, 0.2353, 0.1568]) -Greedy action tensor([ 0.2920, 0.1003, 0.1448, -0.1988]) tensor([0.3030, 0.2501, 0.2615, 0.1854]) -Greedy action tensor([ 0.2945, -0.1298, 0.1179, -0.3622]) tensor([0.3321, 0.2173, 0.2783, 0.1722]) -Greedy action tensor([ 0.1961, -0.0315, 0.1705, -0.2311]) tensor([0.2921, 0.2326, 0.2847, 0.1906]) -Greedy action tensor([ 0.3535, -0.2413, -0.2226, -0.4174]) tensor([0.3881, 0.2141, 0.2182, 0.1796]) -Greedy action tensor([ 1.0031, -0.4203, -0.0667, -0.7213]) tensor([0.5675, 0.1367, 0.1947, 0.1012]) -Greedy action tensor([ 0.2833, 0.0601, 0.1957, -0.1557]) tensor([0.2975, 0.2380, 0.2726, 0.1918]) -Greedy action tensor([ 0.3251, -0.1533, 0.1886, -0.3792]) tensor([0.3348, 0.2075, 0.2921, 0.1656]) -Greedy action tensor([ 0.3820, 0.2253, 0.1083, -0.1434]) tensor([0.3118, 0.2666, 0.2372, 0.1844]) -Greedy action tensor([ 0.2831, 0.1244, 0.1327, -0.0944]) tensor([0.2942, 0.2510, 0.2531, 0.2017]) -Greedy action tensor([ 0.3171, 0.0865, 0.0665, -0.1690]) tensor([0.3137, 0.2491, 0.2442, 0.1929]) -Greedy action tensor([ 0.5660, 0.0390, 0.0319, -0.3181]) tensor([0.3861, 0.2280, 0.2264, 0.1595]) -Greedy action tensor([ 0.1092, 0.0301, 0.1737, -0.0289]) tensor([0.2590, 0.2393, 0.2762, 0.2256]) -Greedy action tensor([ 0.5476, -0.0233, 0.0708, -0.3300]) tensor([0.3844, 0.2172, 0.2386, 0.1598]) -Greedy action tensor([ 0.2361, -0.0103, 0.0965, -0.0987]) tensor([0.2970, 0.2322, 0.2583, 0.2125]) -Greedy action tensor([ 1.2513, -0.4436, -0.2266, -0.0859]) tensor([0.5972, 0.1097, 0.1362, 0.1568]) -Greedy action tensor([ 0.8313, -0.5661, -0.3447, 0.1114]) tensor([0.4896, 0.1210, 0.1510, 0.2383]) -Greedy action tensor([ 0.7360, -0.4583, -0.1776, -0.0699]) tensor([0.4650, 0.1408, 0.1865, 0.2077]) -Greedy action tensor([ 1.5406, -0.5457, -0.3806, 0.3219]) tensor([0.6385, 0.0793, 0.0935, 0.1887]) -Greedy action tensor([ 0.7011, -0.4740, -0.0936, 0.0234]) tensor([0.4409, 0.1361, 0.1991, 0.2239]) -Greedy action tensor([ 0.8314, -0.5945, 0.1026, -0.2832]) tensor([0.4876, 0.1172, 0.2353, 0.1600]) -Greedy action tensor([ 1.4005, -0.3608, -0.0547, -0.1737]) tensor([0.6202, 0.1066, 0.1447, 0.1285]) -Greedy action tensor([ 0.6943, -0.4157, -0.1825, -0.0106]) tensor([0.4464, 0.1471, 0.1858, 0.2206]) -Greedy action tensor([ 1.2242, -0.6128, -0.1681, -0.0133]) tensor([0.5890, 0.0938, 0.1464, 0.1709]) -Greedy action tensor([ 1.5776, -0.4931, -0.6417, -0.0880]) tensor([0.7023, 0.0886, 0.0763, 0.1328]) -Greedy action tensor([ 1.2676, -0.6713, 0.0232, 0.2235]) tensor([0.5606, 0.0806, 0.1615, 0.1973]) -Greedy action tensor([ 1.6377, -0.1788, -0.4418, -0.1744]) tensor([0.6892, 0.1121, 0.0861, 0.1126]) -Greedy action tensor([ 1.2038, -0.4470, -0.2459, -0.1518]) tensor([0.5937, 0.1139, 0.1393, 0.1531]) -Greedy action tensor([ 0.5698, -0.2887, -0.1297, -0.0859]) tensor([0.4099, 0.1737, 0.2036, 0.2128]) -Greedy action tensor([ 1.0781, -0.6765, -0.2212, 0.1703]) tensor([0.5408, 0.0935, 0.1475, 0.2182]) -Greedy action tensor([ 1.1958, -0.4029, 0.0018, -0.1549]) tensor([0.5668, 0.1146, 0.1718, 0.1468]) -Greedy action tensor([ 1.1404, -0.5045, -0.4604, 0.0789]) tensor([0.5745, 0.1109, 0.1159, 0.1987]) -Greedy action tensor([ 1.5154, -0.5597, -0.5843, -0.1153]) tensor([0.6926, 0.0870, 0.0848, 0.1356]) -Greedy action tensor([ 1.4112, -0.3716, -0.4485, -0.2194]) tensor([0.6580, 0.1107, 0.1025, 0.1288]) -Greedy action tensor([ 1.6307, -0.5136, -0.4627, 0.0423]) tensor([0.6922, 0.0811, 0.0853, 0.1414]) -Greedy action tensor([ 1.3586, -0.4094, -0.4992, -0.2743]) tensor([0.6570, 0.1121, 0.1025, 0.1284]) -Greedy action tensor([ 1.1010, -0.4682, -0.2666, 0.1491]) tensor([0.5409, 0.1126, 0.1378, 0.2088]) -Greedy action tensor([ 0.6133, -0.5005, -0.1558, -0.1218]) tensor([0.4403, 0.1446, 0.2041, 0.2111]) -Greedy action tensor([ 0.9503, -0.3302, -0.0153, -0.2541]) tensor([0.5106, 0.1419, 0.1944, 0.1531]) -Greedy action tensor([ 0.7456, -0.6552, -0.2738, 0.1466]) tensor([0.4637, 0.1143, 0.1673, 0.2547]) -Greedy action tensor([ 0.9579, -0.6704, -0.4980, 0.3301]) tensor([0.5094, 0.1000, 0.1188, 0.2719]) -Greedy action tensor([ 0.8437, -0.7233, -0.2635, 0.1170]) tensor([0.4944, 0.1032, 0.1634, 0.2390]) -Greedy action tensor([ 0.7903, -0.1610, 0.0745, -0.2712]) tensor([0.4503, 0.1739, 0.2201, 0.1558]) -Greedy action tensor([ 1.0937, -0.3864, -0.1082, -0.1329]) tensor([0.5490, 0.1250, 0.1650, 0.1610]) -Greedy action tensor([ 0.8804, -0.2813, -0.1690, 0.1268]) tensor([0.4686, 0.1467, 0.1641, 0.2206]) -Greedy action tensor([ 0.7842, -0.6600, -0.2752, 0.2808]) tensor([0.4572, 0.1079, 0.1585, 0.2764]) -Greedy action tensor([ 1.0905, -0.6267, -0.0915, 0.2092]) tensor([0.5262, 0.0945, 0.1614, 0.2180]) -Greedy action tensor([ 1.2469, -0.5517, 0.0344, 0.0077]) tensor([0.5706, 0.0945, 0.1697, 0.1652]) -Greedy action tensor([ 1.0038, -0.3898, -0.3067, -0.1429]) tensor([0.5448, 0.1352, 0.1469, 0.1731]) -Greedy action tensor([ 0.6896, -0.5372, -0.1902, 0.1362]) tensor([0.4380, 0.1284, 0.1817, 0.2518]) -Greedy action tensor([ 0.9621, -0.3755, -0.2483, 0.0378]) tensor([0.5109, 0.1341, 0.1523, 0.2027]) -Greedy action tensor([ 0.6766, -0.2436, -0.3833, 0.1184]) tensor([0.4316, 0.1720, 0.1495, 0.2470]) -Greedy action tensor([ 1.1934, -0.3123, -0.0495, -0.0806]) tensor([0.5586, 0.1239, 0.1612, 0.1563]) -Greedy action tensor([ 0.7055, -0.5475, 0.0164, 0.0226]) tensor([0.4361, 0.1246, 0.2190, 0.2203]) -Greedy action tensor([ 1.4564, -0.6671, 0.0410, 0.3446]) tensor([0.5912, 0.0707, 0.1436, 0.1945]) -Greedy action tensor([ 0.7949, -0.5578, -0.1185, 0.1035]) tensor([0.4628, 0.1197, 0.1857, 0.2318]) -Greedy action tensor([ 1.2372, -0.5736, -0.2800, 0.3153]) tensor([0.5616, 0.0918, 0.1232, 0.2234]) -Greedy action tensor([ 1.8877, -0.5804, -0.3741, 0.0490]) tensor([0.7419, 0.0629, 0.0773, 0.1180]) -Greedy action tensor([ 1.4904, -0.6095, -0.4813, -0.0829]) tensor([0.6807, 0.0834, 0.0948, 0.1412]) -Greedy action tensor([ 0.7582, -0.1171, 0.1155, -0.0605]) tensor([0.4195, 0.1748, 0.2206, 0.1850]) -Greedy action tensor([ 0.9532, -0.3274, -0.1569, 0.0788]) tensor([0.4939, 0.1373, 0.1628, 0.2060]) -Greedy action tensor([ 1.4945, -0.6505, -0.2720, 0.2087]) tensor([0.6392, 0.0748, 0.1093, 0.1767]) -Greedy action tensor([ 1.1804, -0.5811, -0.6166, -0.2169]) tensor([0.6310, 0.1084, 0.1046, 0.1560]) -Greedy action tensor([ 0.6628, -0.5388, -0.1703, 0.1495]) tensor([0.4285, 0.1288, 0.1863, 0.2564]) -Greedy action tensor([ 0.7754, -0.4248, -0.1675, 0.1065]) tensor([0.4540, 0.1367, 0.1768, 0.2325]) -Greedy action tensor([ 0.8682, -0.3666, -0.1797, -0.1577]) tensor([0.5000, 0.1454, 0.1753, 0.1792]) -Greedy action tensor([ 1.2284, -0.5283, -0.2708, 0.1898]) tensor([0.5715, 0.0986, 0.1276, 0.2023]) -Greedy action tensor([ 0.5427, -0.5298, 0.0662, -0.0741]) tensor([0.3996, 0.1367, 0.2481, 0.2156]) -Greedy action tensor([ 0.8586, -0.3774, -0.2633, 0.0742]) tensor([0.4825, 0.1402, 0.1571, 0.2202]) -Greedy action tensor([ 0.8793, -0.2110, -0.2741, 0.0799]) tensor([0.4759, 0.1600, 0.1502, 0.2140]) -Greedy action tensor([ 1.4031, -0.6263, -0.2598, 0.1427]) tensor([0.6232, 0.0819, 0.1182, 0.1767]) -Greedy action tensor([ 1.2250, -0.5156, 0.1619, 0.1821]) tensor([0.5338, 0.0936, 0.1844, 0.1881]) -Greedy action tensor([ 0.7142, -0.4995, -0.1226, 0.1431]) tensor([0.4357, 0.1294, 0.1887, 0.2461]) -Greedy action tensor([ 0.5351, -0.1567, -0.0259, -0.0715]) tensor([0.3822, 0.1914, 0.2181, 0.2084]) -Greedy action tensor([ 0.7939, -0.3401, -0.0340, -0.1103]) tensor([0.4622, 0.1487, 0.2020, 0.1871]) -Greedy action tensor([ 0.8308, -0.4429, -0.0834, 0.1754]) tensor([0.4546, 0.1272, 0.1822, 0.2360]) -Greedy action tensor([ 0.9434, -0.5669, -0.3546, -0.0611]) tensor([0.5376, 0.1187, 0.1468, 0.1969]) -Greedy action tensor([ 0.8039, -0.5503, -0.1154, 0.0793]) tensor([0.4670, 0.1206, 0.1862, 0.2263]) -Greedy action tensor([ 0.8200, -0.5360, -0.2103, 0.1098]) tensor([0.4748, 0.1224, 0.1695, 0.2334]) -Greedy action tensor([ 1.0212, -0.6217, -0.4123, 0.2904]) tensor([0.5226, 0.1011, 0.1246, 0.2517]) -Greedy action tensor([ 0.9263, -0.4267, -0.1384, -0.0398]) tensor([0.5041, 0.1303, 0.1738, 0.1918]) -Greedy action tensor([ 0.9926, -0.5624, -0.3764, -0.0225]) tensor([0.5471, 0.1155, 0.1392, 0.1982]) -Greedy action tensor([ 0.9641, -0.4267, -0.0908, 0.1235]) tensor([0.4930, 0.1227, 0.1717, 0.2127]) -Greedy action tensor([ 0.5721, -0.3883, -0.0285, -0.0799]) tensor([0.4078, 0.1561, 0.2237, 0.2125]) -Greedy action tensor([ 0.9101, -0.3984, -0.1947, 0.1344]) tensor([0.4850, 0.1311, 0.1607, 0.2233]) -Greedy action tensor([ 0.5378, -0.3510, -0.0201, -0.0497]) tensor([0.3938, 0.1619, 0.2254, 0.2188]) -Greedy action tensor([ 0.8528, -0.4266, 0.0471, -0.0112]) tensor([0.4659, 0.1296, 0.2082, 0.1964]) -Greedy action tensor([ 0.8843, -0.6879, -0.1274, 0.1632]) tensor([0.4860, 0.1009, 0.1767, 0.2363]) -Greedy action tensor([ 1.1803, -0.1998, -0.2043, -0.2170]) tensor([0.5717, 0.1438, 0.1432, 0.1414]) -Greedy action tensor([ 1.2559, -0.5918, -0.2160, 0.3778]) tensor([0.5547, 0.0874, 0.1273, 0.2305]) -Greedy action tensor([ 1.2192, -0.4841, -0.3334, -0.0862]) tensor([0.6006, 0.1094, 0.1272, 0.1628]) -Greedy action tensor([ 0.4401, -0.2191, 0.2480, -0.1757]) tensor([0.3469, 0.1794, 0.2863, 0.1874]) -Greedy action tensor([ 0.8783, -0.1379, -0.0215, -0.1533]) tensor([0.4706, 0.1703, 0.1914, 0.1677]) -Greedy action tensor([ 1.1288, -0.5871, -0.3854, -0.1214]) tensor([0.5930, 0.1066, 0.1305, 0.1699]) -Greedy action tensor([ 1.6125, -0.5844, -0.5580, -0.1469]) tensor([0.7156, 0.0795, 0.0817, 0.1232]) -Greedy action tensor([ 0.8880, -0.5872, -0.4790, 0.0056]) tensor([0.5270, 0.1206, 0.1343, 0.2181]) -Greedy action tensor([-1.7317, -0.7591, 0.0050, -0.2849]) tensor([0.0737, 0.1949, 0.4184, 0.3131]) -Greedy action tensor([-2.0314, -0.8877, 0.4872, -0.1231]) tensor([0.0429, 0.1348, 0.5329, 0.2894]) -Greedy action tensor([-2.0123, -0.7034, 0.6502, -0.0344]) tensor([0.0381, 0.1410, 0.5457, 0.2752]) -Greedy action tensor([-1.7557, -0.6899, 0.2081, -0.1498]) tensor([0.0625, 0.1813, 0.4451, 0.3112]) -Greedy action tensor([-2.0357, -0.9548, 0.5063, -0.1514]) tensor([0.0430, 0.1269, 0.5468, 0.2833]) -Greedy action tensor([-1.9150, -0.6831, 0.8146, 0.0937]) tensor([0.0368, 0.1260, 0.5633, 0.2739]) -Greedy action tensor([-0.8145, -0.5600, 0.2663, 0.0843]) tensor([0.1300, 0.1676, 0.3831, 0.3193]) -Greedy action tensor([-2.0462, -0.8505, 0.5430, -0.0505]) tensor([0.0400, 0.1323, 0.5331, 0.2945]) -Greedy action tensor([-1.7527, -0.6716, 0.1999, -0.3012]) tensor([0.0655, 0.1931, 0.4617, 0.2797]) -Greedy action tensor([-1.8277, -0.4788, 0.6708, -0.0061]) tensor([0.0431, 0.1661, 0.5243, 0.2665]) -Greedy action tensor([-1.5129, -0.1884, 0.5338, -0.2444]) tensor([0.0623, 0.2342, 0.4821, 0.2214]) -Greedy action tensor([-1.0525, -0.5330, 0.3484, -0.0432]) tensor([0.1054, 0.1773, 0.4280, 0.2893]) -Greedy action tensor([-0.7357, -0.3480, 0.2801, -0.0389]) tensor([0.1381, 0.2035, 0.3813, 0.2772]) -Greedy action tensor([-1.1916, -0.4072, -0.2235, -0.2216]) tensor([0.1182, 0.2589, 0.3111, 0.3118]) -Greedy action tensor([-1.8267, -1.0000, 0.3620, -0.5020]) tensor([0.0626, 0.1431, 0.5588, 0.2355]) -Greedy action tensor([-1.9809, -0.6627, 1.0125, 0.0659]) tensor([0.0308, 0.1152, 0.6152, 0.2387]) -Greedy action tensor([-1.8800, -0.9646, 0.3995, -0.2515]) tensor([0.0545, 0.1360, 0.5321, 0.2775]) -Greedy action tensor([-1.8233, -0.4717, 0.6376, -0.0220]) tensor([0.0442, 0.1707, 0.5175, 0.2676]) -Greedy action tensor([-0.8124, -0.5577, 0.3107, 0.1376]) tensor([0.1258, 0.1623, 0.3867, 0.3252]) -Greedy action tensor([-1.7138, -0.4189, 0.5409, -0.0172]) tensor([0.0509, 0.1859, 0.4854, 0.2778]) -Greedy action tensor([-1.8139, -0.5991, 0.8911, 0.3249]) tensor([0.0360, 0.1212, 0.5377, 0.3052]) -Greedy action tensor([-1.8238, -0.8314, -0.0285, -0.3666]) tensor([0.0714, 0.1925, 0.4297, 0.3064]) -Greedy action tensor([-2.0357, -0.8313, 0.7393, -0.0452]) tensor([0.0361, 0.1204, 0.5792, 0.2643]) -Greedy action tensor([-1.9702, -0.6630, 1.0195, 0.2776]) tensor([0.0294, 0.1086, 0.5840, 0.2781]) -Greedy action tensor([-1.7353, -0.8170, 0.1478, -0.4061]) tensor([0.0722, 0.1808, 0.4744, 0.2727]) -Greedy action tensor([-1.7851, -0.6270, 0.4230, -0.3919]) tensor([0.0578, 0.1839, 0.5256, 0.2327]) -Greedy action tensor([-1.7365, -0.9405, 0.1771, -0.5310]) tensor([0.0750, 0.1663, 0.5083, 0.2504]) -Greedy action tensor([-1.4530, -0.0783, -0.0978, -0.4310]) tensor([0.0861, 0.3406, 0.3340, 0.2393]) -Greedy action tensor([-1.0873, -0.5872, 0.3075, 0.1122]) tensor([0.1000, 0.1649, 0.4033, 0.3318]) -Greedy action tensor([-1.9684, -0.8896, 0.2812, -0.2002]) tensor([0.0519, 0.1525, 0.4918, 0.3039]) -Greedy action tensor([-1.8275, -0.9594, 0.3028, -0.3769]) tensor([0.0622, 0.1483, 0.5239, 0.2655]) -Greedy action tensor([-1.8948, -0.5033, 0.6505, -0.1483]) tensor([0.0425, 0.1711, 0.5424, 0.2440]) -Greedy action tensor([-1.4791, -0.1249, 0.4161, -0.1197]) tensor([0.0648, 0.2512, 0.4315, 0.2525]) -Greedy action tensor([-2.0541, -0.8604, 0.7158, 0.0065]) tensor([0.0356, 0.1174, 0.5677, 0.2793]) -Greedy action tensor([-1.7224, -0.9437, 0.5003, -0.2976]) tensor([0.0604, 0.1315, 0.5572, 0.2509]) -Greedy action tensor([-2.0531, -0.8443, 0.7272, -0.0101]) tensor([0.0355, 0.1188, 0.5720, 0.2737]) -Greedy action tensor([-0.9860, -0.1485, -0.3722, -0.3182]) tensor([0.1407, 0.3251, 0.2599, 0.2743]) -Greedy action tensor([-1.3968, -0.5483, 0.4471, 0.2742]) tensor([0.0668, 0.1560, 0.4221, 0.3551]) -Greedy action tensor([-1.9459, -0.6589, 0.6024, 0.1568]) tensor([0.0391, 0.1415, 0.4995, 0.3199]) -Greedy action tensor([-1.0918, -0.5871, -0.1643, -0.3995]) tensor([0.1392, 0.2306, 0.3520, 0.2782]) -Greedy action tensor([-1.8905, -1.0020, 0.3255, -0.4835]) tensor([0.0599, 0.1457, 0.5496, 0.2448]) -Greedy action tensor([-1.9033, -0.9014, 0.1869, -0.5558]) tensor([0.0639, 0.1739, 0.5164, 0.2457]) -Greedy action tensor([-1.7515, -0.5022, 0.5615, -0.0367]) tensor([0.0496, 0.1731, 0.5015, 0.2757]) -Greedy action tensor([-0.8852, -0.4556, 0.4038, -0.2387]) tensor([0.1238, 0.1903, 0.4494, 0.2364]) -Greedy action tensor([-1.5612, -0.3885, 0.4016, -0.1455]) tensor([0.0646, 0.2088, 0.4602, 0.2663]) -Greedy action tensor([-1.6779, -0.5981, 0.2202, -0.2447]) tensor([0.0675, 0.1988, 0.4506, 0.2831]) -Greedy action tensor([-1.3857, -0.6307, 0.4118, 0.0362]) tensor([0.0751, 0.1599, 0.4535, 0.3115]) -Greedy action tensor([-0.6131, -0.3300, 0.2768, -0.1245]) tensor([0.1564, 0.2076, 0.3809, 0.2550]) -Greedy action tensor([-1.9725, -0.9420, 0.3966, -0.2394]) tensor([0.0496, 0.1391, 0.5305, 0.2808]) -Greedy action tensor([-1.8975, -0.9433, 0.4073, -0.2463]) tensor([0.0531, 0.1379, 0.5322, 0.2768]) -Greedy action tensor([-2.0456, -0.7059, 0.8679, 0.1635]) tensor([0.0309, 0.1180, 0.5695, 0.2816]) -Greedy action tensor([-1.0019, -0.3423, -0.4012, -0.1560]) tensor([0.1411, 0.2729, 0.2573, 0.3288]) -Greedy action tensor([-1.9065, -0.9902, 0.3062, -0.3080]) tensor([0.0569, 0.1422, 0.5198, 0.2812]) -Greedy action tensor([-1.8641, -0.8439, 0.1613, -0.3344]) tensor([0.0626, 0.1737, 0.4746, 0.2891]) -Greedy action tensor([-2.0164, -0.7946, 0.4000, -0.0856]) tensor([0.0445, 0.1509, 0.4982, 0.3065]) -Greedy action tensor([-1.3627, -0.5461, 0.4436, -0.0649]) tensor([0.0769, 0.1739, 0.4679, 0.2814]) -Greedy action tensor([-1.4554, 0.1948, 0.5641, -0.4270]) tensor([0.0605, 0.3149, 0.4555, 0.1691]) -Greedy action tensor([-1.8009, -0.8984, 0.3324, -0.2866]) tensor([0.0608, 0.1499, 0.5131, 0.2763]) -Greedy action tensor([-1.5232, -0.5924, 0.0336, -0.2344]) tensor([0.0840, 0.2130, 0.3983, 0.3047]) -Greedy action tensor([-1.9793, -0.7562, 1.0339, 0.2488]) tensor([0.0294, 0.0998, 0.5980, 0.2727]) -Greedy action tensor([-1.1530, 0.0793, -0.1697, 0.1980]) tensor([0.0912, 0.3128, 0.2438, 0.3522]) -Greedy action tensor([-1.9133, -0.8037, 0.3319, -0.1957]) tensor([0.0525, 0.1592, 0.4957, 0.2925]) -Greedy action tensor([-1.4228, -0.6501, -0.0355, -0.0885]) tensor([0.0912, 0.1975, 0.3651, 0.3462]) -Greedy action tensor([-1.9339, -0.8829, 0.3909, -0.1807]) tensor([0.0504, 0.1440, 0.5149, 0.2907]) -Greedy action tensor([-1.4902, -0.5861, 0.5559, -0.3121]) tensor([0.0692, 0.1709, 0.5353, 0.2247]) -Greedy action tensor([-2.0232, -0.8458, 0.5861, 0.0036]) tensor([0.0393, 0.1277, 0.5345, 0.2985]) -Greedy action tensor([-2.0080, -0.9420, 0.5067, -0.1516]) tensor([0.0441, 0.1281, 0.5454, 0.2824]) -Greedy action tensor([-1.6282, -0.5691, 0.5417, -0.0644]) tensor([0.0574, 0.1656, 0.5028, 0.2743]) -Greedy action tensor([-2.0537e+00, -8.5406e-01, 7.3284e-01, -1.9987e-03]) tensor([0.0353, 0.1172, 0.5728, 0.2747]) -Greedy action tensor([-1.0778, -0.3341, -0.1210, -0.0951]) tensor([0.1194, 0.2511, 0.3107, 0.3189]) -Greedy action tensor([-1.9443, -0.8096, 0.2730, -0.1933]) tensor([0.0525, 0.1633, 0.4819, 0.3023]) -Greedy action tensor([-1.7750, -0.6799, 0.2768, -0.2761]) tensor([0.0615, 0.1840, 0.4789, 0.2755]) -Greedy action tensor([-1.7694, -0.8718, 0.2044, -0.4724]) tensor([0.0699, 0.1715, 0.5030, 0.2556]) -Greedy action tensor([-0.9922, -0.5200, 0.5744, 0.1378]) tensor([0.0953, 0.1529, 0.4567, 0.2951]) -Greedy action tensor([-1.5734, -0.3182, 0.4499, -0.0099]) tensor([0.0594, 0.2083, 0.4489, 0.2834]) -Greedy action tensor([-0.7166, -0.0172, 0.2689, -0.0376]) tensor([0.1305, 0.2626, 0.3496, 0.2573]) -Greedy action tensor([-1.9224, -0.5366, 0.6739, -0.0889]) tensor([0.0405, 0.1621, 0.5438, 0.2536]) -Greedy action tensor([-1.4685, 0.0396, 0.0288, -0.5085]) tensor([0.0794, 0.3586, 0.3547, 0.2073]) -Greedy action tensor([-1.9886, -0.8771, 0.3940, -0.1432]) tensor([0.0472, 0.1433, 0.5109, 0.2986]) -Greedy action tensor([-1.7335, -0.5058, 0.6964, 0.2271]) tensor([0.0437, 0.1492, 0.4965, 0.3105]) -Greedy action tensor([-1.8988, -0.9475, 0.2314, -0.3210]) tensor([0.0593, 0.1537, 0.4995, 0.2875]) -Greedy action tensor([ 1.6375, -0.6627, 1.0439, 1.2515]) tensor([0.4288, 0.0430, 0.2368, 0.2914]) -Greedy action tensor([1.6173, 0.0183, 1.1390, 0.6825]) tensor([0.4515, 0.0913, 0.2799, 0.1773]) -Greedy action tensor([ 1.4287, -0.8993, -0.4840, 1.4290]) tensor([0.4453, 0.0434, 0.0658, 0.4455]) -Greedy action tensor([1.6895, 0.0093, 0.4178, 1.4170]) tensor([0.4488, 0.0836, 0.1258, 0.3418]) -Greedy action tensor([1.4133, 0.0410, 1.0624, 0.1665]) tensor([0.4454, 0.1129, 0.3136, 0.1280]) -Greedy action tensor([ 1.2086, -0.4029, 0.1434, 1.2480]) tensor([0.3869, 0.0772, 0.1334, 0.4025]) -Greedy action tensor([ 1.3996, 0.7503, -1.5887, 0.7072]) tensor([0.4824, 0.2520, 0.0243, 0.2414]) -Greedy action tensor([ 1.6598, -1.4499, 0.9444, 1.1914]) tensor([0.4630, 0.0207, 0.2264, 0.2899]) -Greedy action tensor([ 1.0862, -0.0031, 0.5788, 0.8037]) tensor([0.3714, 0.1250, 0.2236, 0.2800]) -Greedy action tensor([ 1.6634, -0.8500, -0.1747, 0.9398]) tensor([0.5797, 0.0469, 0.0922, 0.2811]) -Greedy action tensor([ 1.1714, -0.0360, 1.1883, 1.9197]) tensor([0.2258, 0.0675, 0.2296, 0.4771]) -Greedy action tensor([ 1.3145, -1.0508, 1.0925, 0.6542]) tensor([0.4147, 0.0389, 0.3321, 0.2143]) -Greedy action tensor([ 1.6859, -0.9001, 1.2470, 0.7987]) tensor([0.4691, 0.0353, 0.3024, 0.1932]) -Greedy action tensor([1.7763, 0.1310, 0.2027, 0.3716]) tensor([0.6077, 0.1172, 0.1260, 0.1491]) -Greedy action tensor([ 1.5535, 0.2959, -1.1234, 0.7140]) tensor([0.5602, 0.1593, 0.0385, 0.2420]) -Greedy action tensor([1.2986, 0.3417, 0.2451, 0.7739]) tensor([0.4302, 0.1652, 0.1500, 0.2546]) -Greedy action tensor([ 1.5465, -0.0611, 0.0161, 1.2259]) tensor([0.4667, 0.0935, 0.1010, 0.3387]) -Greedy action tensor([ 0.5618, 0.4567, -0.6061, 0.8013]) tensor([0.2872, 0.2586, 0.0893, 0.3649]) -Greedy action tensor([ 0.7972, -1.1548, 0.7483, 0.9756]) tensor([0.3040, 0.0432, 0.2895, 0.3634]) -Greedy action tensor([ 1.0323, 0.1684, -0.2429, 1.7398]) tensor([0.2681, 0.1130, 0.0749, 0.5440]) -Greedy action tensor([ 1.5910, -0.3316, 0.5186, 1.1813]) tensor([0.4646, 0.0679, 0.1590, 0.3084]) -Greedy action tensor([ 1.2110, -0.9343, -0.3637, 0.7044]) tensor([0.5190, 0.0607, 0.1075, 0.3127]) -Greedy action tensor([ 1.3946, -1.1044, -0.0321, 0.8010]) tensor([0.5335, 0.0438, 0.1281, 0.2946]) -Greedy action tensor([ 1.5916, -0.0487, 0.9093, 0.9365]) tensor([0.4507, 0.0874, 0.2278, 0.2341]) -Greedy action tensor([ 1.9924, -0.2987, 0.5571, 1.8408]) tensor([0.4549, 0.0460, 0.1083, 0.3909]) -Greedy action tensor([ 1.5076, 0.1163, -0.5136, 2.1983]) tensor([0.2962, 0.0737, 0.0392, 0.5909]) -Greedy action tensor([ 0.8336, -0.4047, 1.3368, 1.0908]) tensor([0.2360, 0.0684, 0.3903, 0.3052]) -Greedy action tensor([ 0.7065, -0.5684, 0.9038, 0.1116]) tensor([0.3280, 0.0917, 0.3995, 0.1809]) -Greedy action tensor([ 0.9617, -0.4050, 1.4011, 0.5794]) tensor([0.2866, 0.0731, 0.4448, 0.1955]) -Greedy action tensor([ 1.3360, -0.7770, 0.9674, 0.7155]) tensor([0.4255, 0.0514, 0.2943, 0.2288]) -Greedy action tensor([ 1.3104, -0.2756, 0.6545, 0.3993]) tensor([0.4704, 0.0963, 0.2441, 0.1891]) -Greedy action tensor([ 1.1779, -1.0059, 0.6825, 0.4978]) tensor([0.4487, 0.0505, 0.2734, 0.2273]) -Greedy action tensor([ 1.3818, -0.0976, 1.2844, 0.5914]) tensor([0.3863, 0.0880, 0.3505, 0.1753]) -Greedy action tensor([ 1.9903, -0.6774, 1.1002, 0.9352]) tensor([0.5470, 0.0380, 0.2246, 0.1904]) -Greedy action tensor([ 1.9278, -0.4919, 0.4400, 1.2871]) tensor([0.5430, 0.0483, 0.1226, 0.2861]) -Greedy action tensor([ 1.4983, -0.2595, 1.2576, 0.9620]) tensor([0.3932, 0.0678, 0.3091, 0.2300]) -Greedy action tensor([ 1.7798, -1.2517, 1.1302, 1.3058]) tensor([0.4560, 0.0220, 0.2381, 0.2839]) -Greedy action tensor([ 1.3414, -1.2166, 1.2551, 0.4851]) tensor([0.4133, 0.0320, 0.3791, 0.1755]) -Greedy action tensor([ 1.8277, -0.3107, 1.5692, 1.0262]) tensor([0.4276, 0.0504, 0.3302, 0.1918]) -Greedy action tensor([ 1.0144, -0.6069, -0.0391, 1.0234]) tensor([0.3913, 0.0773, 0.1365, 0.3949]) -Greedy action tensor([ 2.0729, 0.0667, -0.2120, 0.7546]) tensor([0.6649, 0.0894, 0.0677, 0.1779]) -Greedy action tensor([ 2.0634, -0.0451, 0.8971, 1.9530]) tensor([0.4295, 0.0521, 0.1338, 0.3846]) -Greedy action tensor([ 1.3024, -0.3308, 0.7383, 1.3494]) tensor([0.3556, 0.0694, 0.2023, 0.3727]) -Greedy action tensor([ 1.6272, -0.2036, -0.0154, 0.8716]) tensor([0.5484, 0.0879, 0.1061, 0.2576]) -Greedy action tensor([ 1.5201, -0.0190, 0.5453, 0.8355]) tensor([0.4771, 0.1024, 0.1800, 0.2406]) -Greedy action tensor([ 1.4944, -0.5045, 0.4607, 0.8484]) tensor([0.4962, 0.0672, 0.1765, 0.2601]) -Greedy action tensor([ 0.9030, -0.0557, 0.1413, 0.8190]) tensor([0.3611, 0.1384, 0.1686, 0.3320]) -Greedy action tensor([1.7951, 0.0584, 0.7735, 1.5626]) tensor([0.4294, 0.0756, 0.1546, 0.3403]) -Greedy action tensor([ 0.9525, -0.4844, 0.6453, 1.0782]) tensor([0.3218, 0.0765, 0.2367, 0.3649]) -Greedy action tensor([ 2.4595, -0.8746, 0.4563, 1.9085]) tensor([0.5724, 0.0204, 0.0772, 0.3299]) -Greedy action tensor([ 0.9026, -0.4907, 0.7718, 1.1879]) tensor([0.2894, 0.0718, 0.2539, 0.3849]) -Greedy action tensor([ 1.2697, -0.5538, 1.2050, 0.5446]) tensor([0.3871, 0.0625, 0.3629, 0.1875]) -Greedy action tensor([ 1.5316, -1.4021, -0.0281, 1.3397]) tensor([0.4788, 0.0255, 0.1006, 0.3951]) -Greedy action tensor([ 1.4526, -1.0046, 1.5488, 1.0970]) tensor([0.3463, 0.0297, 0.3813, 0.2427]) -Greedy action tensor([ 0.9081, -0.8032, 0.9642, 0.3635]) tensor([0.3548, 0.0641, 0.3753, 0.2058]) -Greedy action tensor([ 1.1478, -0.0136, 1.1195, 1.8635]) tensor([0.2309, 0.0723, 0.2245, 0.4723]) -Greedy action tensor([ 0.6713, -0.3019, 0.4896, 1.8513]) tensor([0.1829, 0.0691, 0.1526, 0.5954]) -Greedy action tensor([ 0.9612, -0.0948, 1.0200, 1.0344]) tensor([0.2870, 0.0998, 0.3044, 0.3088]) -Greedy action tensor([ 2.1433, -0.7690, 0.4808, 1.1858]) tensor([0.6143, 0.0334, 0.1165, 0.2358]) -Greedy action tensor([ 1.7703, -0.3132, 1.1099, 1.4445]) tensor([0.4232, 0.0527, 0.2186, 0.3055]) -Greedy action tensor([ 1.0130, -0.5657, 1.0365, 0.2298]) tensor([0.3722, 0.0768, 0.3810, 0.1700]) -Greedy action tensor([ 1.3451, -1.1986, 0.5530, 1.0700]) tensor([0.4365, 0.0343, 0.1977, 0.3315]) -Greedy action tensor([ 1.3484, -0.5696, 0.3615, 1.2395]) tensor([0.4138, 0.0608, 0.1542, 0.3711]) -Greedy action tensor([1.6786, 0.0896, 0.0928, 1.3496]) tensor([0.4698, 0.0959, 0.0962, 0.3381]) -Greedy action tensor([ 1.4749, -0.5884, 0.2485, 1.2259]) tensor([0.4545, 0.0577, 0.1333, 0.3544]) -Greedy action tensor([ 1.1959, -0.1595, 0.9109, 0.8264]) tensor([0.3702, 0.0955, 0.2784, 0.2559]) -Greedy action tensor([ 1.8159, -0.7542, 0.6984, 1.2227]) tensor([0.5112, 0.0391, 0.1672, 0.2825]) -Greedy action tensor([1.0850, 0.3840, 1.0426, 0.9591]) tensor([0.2997, 0.1487, 0.2873, 0.2643]) -Greedy action tensor([ 1.4357, -0.6591, 0.5872, 1.1239]) tensor([0.4379, 0.0539, 0.1875, 0.3207]) -Greedy action tensor([ 1.2571, -0.1341, 0.7783, 0.7990]) tensor([0.3999, 0.0995, 0.2477, 0.2529]) -Greedy action tensor([1.8923, 0.3029, 0.0929, 1.2951]) tensor([0.5209, 0.1063, 0.0862, 0.2867]) -Greedy action tensor([ 1.8853, -0.6163, 1.7430, 0.9742]) tensor([0.4253, 0.0349, 0.3689, 0.1710]) -Greedy action tensor([ 1.3304, -1.0356, 0.2141, 0.6905]) tensor([0.5132, 0.0482, 0.1681, 0.2706]) -Greedy action tensor([ 1.1667, -0.0942, 1.2890, 1.5096]) tensor([0.2616, 0.0741, 0.2957, 0.3686]) -Greedy action tensor([ 2.2337, -0.6093, 0.6500, 1.9542]) tensor([0.4951, 0.0288, 0.1016, 0.3744]) -Greedy action tensor([ 1.9158, -0.2755, 0.5966, 0.8720]) tensor([0.5776, 0.0646, 0.1544, 0.2034]) -Greedy action tensor([ 1.3451, -0.3481, -0.4192, 1.1252]) tensor([0.4634, 0.0852, 0.0794, 0.3719]) -Greedy action tensor([ 1.7386, -0.3904, 0.4526, 0.7654]) tensor([0.5639, 0.0671, 0.1559, 0.2131]) -Greedy action tensor([ 1.7273, -0.3564, 0.1272, 0.6356]) tensor([0.6017, 0.0749, 0.1215, 0.2020]) -Greedy action tensor([ 0.4093, -1.1089, 1.4065, 1.1103]) tensor([0.1682, 0.0369, 0.4559, 0.3390]) -Greedy action tensor([ 2.0953, -0.5275, 1.5722, 1.6316]) tensor([0.4359, 0.0316, 0.2583, 0.2741]) -Greedy action tensor([ 2.2285, -0.7303, 0.1411, 1.0217]) tensor([0.6780, 0.0352, 0.0841, 0.2028]) -Greedy action tensor([ 0.3717, 0.2268, 0.0744, -0.1920]) tensor([0.3148, 0.2723, 0.2338, 0.1791]) -Greedy action tensor([ 0.3928, -0.1796, 0.1500, -0.2093]) tensor([0.3453, 0.1948, 0.2708, 0.1891]) -Greedy action tensor([ 0.2156, 0.0399, 0.0968, -0.1806]) tensor([0.2941, 0.2467, 0.2612, 0.1979]) -Greedy action tensor([ 0.4534, 0.2027, 0.1773, -0.0148]) tensor([0.3162, 0.2460, 0.2399, 0.1980]) -Greedy action tensor([ 0.4311, 0.1728, 0.0329, -0.2014]) tensor([0.3361, 0.2596, 0.2257, 0.1786]) -Greedy action tensor([ 0.2013, -0.0209, 0.2144, -0.1264]) tensor([0.2829, 0.2266, 0.2867, 0.2039]) -Greedy action tensor([ 0.2527, 0.1393, 0.1441, -0.3022]) tensor([0.2973, 0.2654, 0.2667, 0.1707]) -Greedy action tensor([ 0.6563, -0.1267, 0.0505, -0.1787]) tensor([0.4104, 0.1876, 0.2239, 0.1781]) -Greedy action tensor([ 0.3564, -0.0064, 0.2251, -0.3061]) tensor([0.3238, 0.2253, 0.2840, 0.1669]) -Greedy action tensor([ 0.7060, -0.1801, 0.0258, -0.4042]) tensor([0.4448, 0.1834, 0.2253, 0.1466]) -Greedy action tensor([ 0.6379, 0.0036, -0.0250, -0.3180]) tensor([0.4115, 0.2182, 0.2121, 0.1582]) -Greedy action tensor([ 0.3087, 0.0192, 0.2454, -0.2126]) tensor([0.3048, 0.2282, 0.2861, 0.1810]) -Greedy action tensor([ 0.4942, -0.2603, 0.0317, -0.4806]) tensor([0.4037, 0.1898, 0.2542, 0.1523]) -Greedy action tensor([ 0.3494, 0.1919, 0.1537, -0.1000]) tensor([0.3017, 0.2577, 0.2481, 0.1925]) -Greedy action tensor([ 0.2913, 0.0994, 0.1140, -0.0544]) tensor([0.2967, 0.2449, 0.2485, 0.2100]) -Greedy action tensor([ 0.6603, -0.2579, -0.0366, -0.3533]) tensor([0.4424, 0.1766, 0.2204, 0.1606]) -Greedy action tensor([ 0.6588, -0.0935, -0.0704, -0.2857]) tensor([0.4269, 0.2012, 0.2059, 0.1660]) -Greedy action tensor([ 1.0191, -0.6349, -0.0330, -0.6690]) tensor([0.5796, 0.1109, 0.2024, 0.1071]) -Greedy action tensor([ 0.3264, 0.0945, 0.0817, -0.0087]) tensor([0.3039, 0.2410, 0.2379, 0.2173]) -Greedy action tensor([ 0.3351, -0.0632, 0.0719, -0.3796]) tensor([0.3414, 0.2292, 0.2624, 0.1670]) -Greedy action tensor([ 0.2135, 0.1142, 0.1254, -0.1264]) tensor([0.2830, 0.2563, 0.2592, 0.2015]) -Greedy action tensor([ 0.3549, 0.1754, 0.1556, -0.0849]) tensor([0.3031, 0.2533, 0.2483, 0.1953]) -Greedy action tensor([ 0.1622, 0.0723, -0.1489, -0.3536]) tensor([0.3083, 0.2818, 0.2259, 0.1841]) -Greedy action tensor([ 0.4570, -0.1039, -0.0089, -0.3402]) tensor([0.3775, 0.2155, 0.2369, 0.1701]) -Greedy action tensor([ 0.7777, -0.1837, -0.0420, -0.5311]) tensor([0.4778, 0.1827, 0.2105, 0.1291]) -Greedy action tensor([ 0.3389, 0.1378, 0.1367, -0.2520]) tensor([0.3136, 0.2565, 0.2562, 0.1737]) -Greedy action tensor([ 0.2524, 0.1532, 0.0760, -0.2011]) tensor([0.2959, 0.2680, 0.2481, 0.1880]) -Greedy action tensor([ 0.1358, 0.0101, 0.1212, -0.2692]) tensor([0.2829, 0.2495, 0.2788, 0.1887]) -Greedy action tensor([ 0.2674, -0.0902, 0.0104, -0.1345]) tensor([0.3183, 0.2226, 0.2462, 0.2130]) -Greedy action tensor([ 0.8386, -0.2473, -0.0941, -0.5899]) tensor([0.5074, 0.1713, 0.1997, 0.1216]) -Greedy action tensor([ 0.5697, -0.0601, 0.0343, -0.4908]) tensor([0.4058, 0.2162, 0.2375, 0.1405]) -Greedy action tensor([ 0.3441, 0.0479, 0.1802, -0.0394]) tensor([0.3055, 0.2271, 0.2593, 0.2082]) -Greedy action tensor([ 0.3887, -0.1187, -0.0610, -0.3699]) tensor([0.3692, 0.2223, 0.2355, 0.1729]) -Greedy action tensor([ 0.2043, 0.0401, 0.1776, -0.1020]) tensor([0.2810, 0.2385, 0.2736, 0.2069]) -Greedy action tensor([ 0.3376, -0.0682, 0.1626, -0.3143]) tensor([0.3304, 0.2202, 0.2773, 0.1721]) -Greedy action tensor([ 0.5170, -0.1014, -0.0646, -0.2768]) tensor([0.3922, 0.2113, 0.2192, 0.1773]) -Greedy action tensor([ 0.2737, 0.2130, 0.2126, -0.1784]) tensor([0.2843, 0.2675, 0.2674, 0.1809]) -Greedy action tensor([ 0.6103, -0.2584, 0.1232, -0.5922]) tensor([0.4284, 0.1797, 0.2632, 0.1287]) -Greedy action tensor([ 0.3840, -0.1646, 0.0333, -0.4393]) tensor([0.3675, 0.2123, 0.2588, 0.1613]) -Greedy action tensor([ 0.4755, -0.2376, 0.1417, -0.4986]) tensor([0.3870, 0.1897, 0.2772, 0.1461]) -Greedy action tensor([ 0.7355, -0.2434, 0.0435, -0.3364]) tensor([0.4507, 0.1693, 0.2256, 0.1543]) -Greedy action tensor([ 0.8097, -0.3175, -0.1567, -0.5965]) tensor([0.5130, 0.1662, 0.1951, 0.1257]) -Greedy action tensor([ 0.3463, 0.1259, 0.1343, -0.2580]) tensor([0.3167, 0.2541, 0.2562, 0.1731]) -Greedy action tensor([ 0.2906, -0.1382, 0.1567, -0.2904]) tensor([0.3241, 0.2111, 0.2835, 0.1813]) -Greedy action tensor([ 0.9184, -0.5648, -0.0794, -0.5701]) tensor([0.5491, 0.1246, 0.2024, 0.1239]) -Greedy action tensor([ 0.6747, -0.1476, -0.0225, -0.3933]) tensor([0.4384, 0.1926, 0.2183, 0.1507]) -Greedy action tensor([ 0.2611, 0.0665, 0.0188, -0.1559]) tensor([0.3061, 0.2520, 0.2402, 0.2017]) -Greedy action tensor([ 0.1207, 0.0105, -0.0583, -0.0530]) tensor([0.2799, 0.2507, 0.2340, 0.2353]) -Greedy action tensor([ 0.3718, 0.0277, 0.1982, -0.1404]) tensor([0.3176, 0.2251, 0.2670, 0.1903]) -Greedy action tensor([ 0.3725, -0.0907, 0.1651, -0.3013]) tensor([0.3388, 0.2132, 0.2753, 0.1727]) -Greedy action tensor([ 0.2677, 0.0868, 0.2264, -0.2134]) tensor([0.2931, 0.2446, 0.2812, 0.1812]) -Greedy action tensor([ 0.2175, -0.0146, 0.1894, -0.1685]) tensor([0.2903, 0.2302, 0.2822, 0.1973]) -Greedy action tensor([ 0.1881, -0.0032, 0.1314, -0.1146]) tensor([0.2849, 0.2353, 0.2692, 0.2105]) -Greedy action tensor([ 0.7617, -0.3130, -0.1277, -0.4457]) tensor([0.4875, 0.1664, 0.2003, 0.1457]) -Greedy action tensor([ 0.4576, -0.1353, 0.1193, -0.4221]) tensor([0.3730, 0.2062, 0.2660, 0.1548]) -Greedy action tensor([ 0.3344, -0.1116, -0.0463, -0.2680]) tensor([0.3483, 0.2230, 0.2380, 0.1907]) -Greedy action tensor([ 0.2762, 0.0122, 0.2098, -0.2272]) tensor([0.3023, 0.2321, 0.2829, 0.1827]) -Greedy action tensor([ 0.2028, -0.0295, 0.0329, -0.3218]) tensor([0.3098, 0.2456, 0.2614, 0.1833]) -Greedy action tensor([ 0.2866, -0.0074, 0.1781, -0.2242]) tensor([0.3084, 0.2298, 0.2767, 0.1851]) -Greedy action tensor([ 0.4514, -0.0662, 0.0764, -0.2914]) tensor([0.3624, 0.2160, 0.2491, 0.1724]) -Greedy action tensor([ 0.3537, 0.0788, 0.2064, -0.1293]) tensor([0.3087, 0.2345, 0.2664, 0.1904]) -Greedy action tensor([ 0.6073, -0.1196, -0.0377, -0.4289]) tensor([0.4232, 0.2046, 0.2220, 0.1502]) -Greedy action tensor([ 0.6586, -0.4277, -0.1203, -0.6507]) tensor([0.4839, 0.1633, 0.2221, 0.1307]) -Greedy action tensor([ 0.2244, 0.0583, 0.0689, -0.2859]) tensor([0.3027, 0.2564, 0.2591, 0.1817]) -Greedy action tensor([ 0.2762, 0.1162, 0.1193, -0.2876]) tensor([0.3053, 0.2601, 0.2609, 0.1737]) -Greedy action tensor([ 0.4948, -0.2776, 0.1382, -0.3308]) tensor([0.3846, 0.1777, 0.2693, 0.1685]) -Greedy action tensor([ 0.1836, -0.0122, 0.2193, -0.1147]) tensor([0.2777, 0.2283, 0.2878, 0.2061]) -Greedy action tensor([ 0.3456, -0.1976, 0.1102, -0.3976]) tensor([0.3513, 0.2041, 0.2776, 0.1671]) -Greedy action tensor([ 0.1909, 0.0521, 0.2036, -0.2032]) tensor([0.2811, 0.2447, 0.2847, 0.1895]) -Greedy action tensor([ 0.4186, 0.0456, 0.0368, -0.3736]) tensor([0.3541, 0.2439, 0.2417, 0.1603]) -Greedy action tensor([ 0.7756, -0.2093, -0.1233, -0.6194]) tensor([0.4930, 0.1841, 0.2007, 0.1222]) -Greedy action tensor([ 0.5060, 0.1014, 0.0279, -0.2451]) tensor([0.3624, 0.2418, 0.2247, 0.1710]) -Greedy action tensor([ 0.3693, -0.1197, 0.2229, -0.2058]) tensor([0.3290, 0.2017, 0.2842, 0.1851]) -Greedy action tensor([ 0.6632, -0.0423, -0.0698, -0.3613]) tensor([0.4286, 0.2117, 0.2059, 0.1539]) -Greedy action tensor([ 0.3387, 0.0267, 0.2471, -0.1802]) tensor([0.3087, 0.2259, 0.2817, 0.1837]) -Greedy action tensor([ 0.3446, 0.0203, 0.1802, -0.2076]) tensor([0.3177, 0.2297, 0.2696, 0.1829]) -Greedy action tensor([ 0.4882, 0.0419, 0.0493, -0.2889]) tensor([0.3644, 0.2332, 0.2349, 0.1675]) -Greedy action tensor([ 0.6617, -0.0935, 0.0437, -0.1665]) tensor([0.4089, 0.1921, 0.2204, 0.1786]) -Greedy action tensor([ 0.3507, -0.0746, 0.0877, -0.2174]) tensor([0.3346, 0.2187, 0.2572, 0.1896]) -Greedy action tensor([ 0.5724, -0.1084, 0.0329, -0.2790]) tensor([0.3974, 0.2012, 0.2317, 0.1696]) -Greedy action tensor([ 0.5962, -0.4300, 0.2115, -0.5818]) tensor([0.4261, 0.1527, 0.2900, 0.1312]) -Greedy action tensor([ 1.2571, -0.4694, -0.4999, -0.1611]) tensor([0.6279, 0.1117, 0.1084, 0.1520]) -Greedy action tensor([ 1.9359, -0.5793, -0.3402, 0.3663]) tensor([0.7185, 0.0581, 0.0738, 0.1496]) -Greedy action tensor([ 1.3019, -0.3333, 0.0147, -0.1721]) tensor([0.5882, 0.1147, 0.1624, 0.1347]) -Greedy action tensor([ 0.6219, -0.5921, -0.2886, 0.1079]) tensor([0.4353, 0.1293, 0.1751, 0.2603]) -Greedy action tensor([ 8.3264e-01, -4.9609e-01, -5.4859e-02, -3.7123e-05]) tensor([0.4736, 0.1254, 0.1950, 0.2060]) -Greedy action tensor([ 0.9305, -0.5521, 0.1383, 0.1372]) tensor([0.4690, 0.1065, 0.2124, 0.2121]) -Greedy action tensor([ 0.9920, -0.6413, -0.2500, 0.1849]) tensor([0.5181, 0.1012, 0.1496, 0.2311]) -Greedy action tensor([ 0.8763, -0.1727, -0.1533, -0.0927]) tensor([0.4792, 0.1679, 0.1711, 0.1818]) -Greedy action tensor([ 0.5253, -0.5021, -0.0767, -0.0871]) tensor([0.4085, 0.1462, 0.2238, 0.2215]) -Greedy action tensor([ 0.5994, -0.3416, 0.0103, 0.0556]) tensor([0.3959, 0.1545, 0.2197, 0.2299]) -Greedy action tensor([ 1.7905, -0.5331, -0.1454, 0.1462]) tensor([0.6967, 0.0682, 0.1005, 0.1346]) -Greedy action tensor([ 1.3769, -0.5647, -0.2713, 0.1908]) tensor([0.6093, 0.0874, 0.1172, 0.1861]) -Greedy action tensor([ 1.2486, -0.3675, -0.5930, 0.0490]) tensor([0.6029, 0.1198, 0.0956, 0.1817]) -Greedy action tensor([ 0.7670, -0.3413, -0.1989, 0.0106]) tensor([0.4587, 0.1514, 0.1746, 0.2153]) -Greedy action tensor([ 0.7737, -0.5167, 0.1504, -0.0337]) tensor([0.4430, 0.1219, 0.2375, 0.1976]) -Greedy action tensor([ 1.3993, -0.6037, -0.4562, -0.1219]) tensor([0.6624, 0.0894, 0.1036, 0.1447]) -Greedy action tensor([ 1.4737, -0.5554, -0.3464, 0.0170]) tensor([0.6551, 0.0861, 0.1061, 0.1526]) -Greedy action tensor([ 0.7408, -0.5877, 0.0353, -0.0448]) tensor([0.4516, 0.1196, 0.2230, 0.2058]) -Greedy action tensor([ 0.9429, -0.5735, -0.1464, 0.1890]) tensor([0.4935, 0.1083, 0.1660, 0.2322]) -Greedy action tensor([ 0.9740, -0.4216, -0.2859, 0.1635]) tensor([0.5061, 0.1253, 0.1436, 0.2250]) -Greedy action tensor([ 0.4932, -0.4391, 0.0646, -0.0698]) tensor([0.3825, 0.1506, 0.2492, 0.2178]) -Greedy action tensor([ 1.8124, -0.6467, -0.6770, 0.0394]) tensor([0.7472, 0.0639, 0.0620, 0.1269]) -Greedy action tensor([ 1.7713, -0.6012, -0.4654, -0.0723]) tensor([0.7362, 0.0686, 0.0786, 0.1165]) -Greedy action tensor([ 0.9655, -0.4097, -0.2272, 0.0974]) tensor([0.5061, 0.1279, 0.1535, 0.2124]) -Greedy action tensor([ 0.7848, -0.4091, -0.0398, -0.1151]) tensor([0.4655, 0.1411, 0.2041, 0.1893]) -Greedy action tensor([ 1.0359, -0.3391, 0.0061, -0.2359]) tensor([0.5290, 0.1338, 0.1889, 0.1483]) -Greedy action tensor([ 0.7191, -0.5235, -0.3066, -0.0123]) tensor([0.4698, 0.1356, 0.1685, 0.2261]) -Greedy action tensor([ 1.0448, -0.5706, -0.2301, 0.0347]) tensor([0.5428, 0.1079, 0.1517, 0.1977]) -Greedy action tensor([ 0.9187, -0.3990, -0.2295, 0.0036]) tensor([0.5037, 0.1349, 0.1598, 0.2017]) -Greedy action tensor([ 0.6509, -0.1407, -0.0673, -0.0780]) tensor([0.4127, 0.1870, 0.2012, 0.1991]) -Greedy action tensor([ 1.0203, -0.3884, -0.3267, 0.1052]) tensor([0.5249, 0.1283, 0.1365, 0.2102]) -Greedy action tensor([ 1.1298, -0.6223, -0.2646, 0.1817]) tensor([0.5528, 0.0959, 0.1371, 0.2142]) -Greedy action tensor([ 1.8295, -0.6732, -0.2238, 0.3973]) tensor([0.6902, 0.0565, 0.0885, 0.1648]) -Greedy action tensor([ 1.3643, -0.5809, -0.0750, 0.1585]) tensor([0.5954, 0.0851, 0.1412, 0.1783]) -Greedy action tensor([ 0.9295, -0.6131, -0.2789, 0.1479]) tensor([0.5076, 0.1085, 0.1516, 0.2323]) -Greedy action tensor([ 0.9215, -0.4187, -0.0957, -0.0181]) tensor([0.4965, 0.1300, 0.1795, 0.1940]) -Greedy action tensor([ 1.3313, -0.3835, -0.0238, -0.1343]) tensor([0.5992, 0.1079, 0.1545, 0.1384]) -Greedy action tensor([ 0.9400, -0.4039, -0.2176, 0.2746]) tensor([0.4787, 0.1249, 0.1504, 0.2461]) -Greedy action tensor([ 1.1884, -0.5326, -0.2354, 0.0309]) tensor([0.5767, 0.1032, 0.1389, 0.1812]) -Greedy action tensor([ 0.9577, -0.3093, -0.0440, -0.2601]) tensor([0.5142, 0.1448, 0.1888, 0.1521]) -Greedy action tensor([ 1.3246, -0.2290, -0.0458, -0.1370]) tensor([0.5891, 0.1246, 0.1497, 0.1366]) -Greedy action tensor([ 0.7495, -0.4922, -0.1113, 0.0903]) tensor([0.4486, 0.1296, 0.1897, 0.2321]) -Greedy action tensor([ 0.7456, -0.5394, -0.2262, -0.1182]) tensor([0.4816, 0.1332, 0.1822, 0.2030]) -Greedy action tensor([ 1.5742, -0.4655, -0.2880, -0.0785]) tensor([0.6771, 0.0881, 0.1052, 0.1297]) -Greedy action tensor([ 1.1617, -0.1364, 0.0813, -0.1856]) tensor([0.5341, 0.1458, 0.1813, 0.1388]) -Greedy action tensor([ 1.3830, -0.5252, -0.2090, 0.3400]) tensor([0.5868, 0.0870, 0.1194, 0.2068]) -Greedy action tensor([ 1.1491, -0.1894, -0.0458, -0.2194]) tensor([0.5496, 0.1441, 0.1664, 0.1399]) -Greedy action tensor([ 1.3139, -0.4137, -0.4642, -0.1915]) tensor([0.6375, 0.1133, 0.1077, 0.1415]) -Greedy action tensor([ 0.7369, -0.5279, -0.0466, -0.1048]) tensor([0.4608, 0.1301, 0.2105, 0.1986]) -Greedy action tensor([ 1.0195, -0.4351, -0.1381, -0.2319]) tensor([0.5453, 0.1273, 0.1714, 0.1560]) -Greedy action tensor([ 0.8539, -0.6185, -0.4281, 0.0417]) tensor([0.5126, 0.1176, 0.1422, 0.2276]) -Greedy action tensor([ 0.9789, -0.4662, -0.0298, -0.2222]) tensor([0.5260, 0.1240, 0.1918, 0.1582]) -Greedy action tensor([ 1.3814, -0.2500, -0.1850, -0.2206]) tensor([0.6227, 0.1218, 0.1300, 0.1255]) -Greedy action tensor([ 1.1911, -0.7147, -0.2666, 0.1212]) tensor([0.5799, 0.0862, 0.1350, 0.1989]) -Greedy action tensor([ 0.7718, -0.4412, -0.0765, 0.0050]) tensor([0.4566, 0.1358, 0.1955, 0.2121]) -Greedy action tensor([ 0.6670, -0.1712, -0.1287, -0.0303]) tensor([0.4199, 0.1816, 0.1895, 0.2091]) -Greedy action tensor([ 0.9020, -0.3101, -0.2343, 0.2978]) tensor([0.4619, 0.1374, 0.1483, 0.2524]) -Greedy action tensor([ 1.0974, -0.7032, -0.1594, 0.1248]) tensor([0.5471, 0.0904, 0.1557, 0.2068]) -Greedy action tensor([ 0.7960, -0.5376, 0.1063, -0.0673]) tensor([0.4572, 0.1205, 0.2294, 0.1928]) -Greedy action tensor([ 0.7353, 0.0340, 0.0064, -0.0433]) tensor([0.4103, 0.2035, 0.1979, 0.1883]) -Greedy action tensor([ 0.9164, -0.5558, 0.0042, 0.1257]) tensor([0.4797, 0.1101, 0.1927, 0.2176]) -Greedy action tensor([ 0.5481, -0.3480, 0.1575, -0.1768]) tensor([0.3892, 0.1589, 0.2634, 0.1885]) -Greedy action tensor([ 1.4128, -0.3648, -0.0257, 0.1458]) tensor([0.5924, 0.1001, 0.1406, 0.1669]) -Greedy action tensor([ 0.7144, -0.6739, -0.3361, 0.1351]) tensor([0.4631, 0.1155, 0.1620, 0.2594]) -Greedy action tensor([ 0.7134, -0.4733, -0.1324, 0.0026]) tensor([0.4493, 0.1371, 0.1928, 0.2207]) -Greedy action tensor([ 0.7259, -0.4951, -0.0433, 0.0510]) tensor([0.4410, 0.1301, 0.2043, 0.2246]) -Greedy action tensor([ 0.8822, -0.0270, 0.0397, -0.0820]) tensor([0.4515, 0.1819, 0.1944, 0.1722]) -Greedy action tensor([ 0.8584, -0.5418, -0.2337, -0.0359]) tensor([0.5023, 0.1238, 0.1685, 0.2054]) -Greedy action tensor([ 1.2127, -0.4841, -0.2962, -0.2124]) tensor([0.6079, 0.1114, 0.1344, 0.1462]) -Greedy action tensor([ 0.4531, -0.4714, 0.1076, -0.0904]) tensor([0.3724, 0.1477, 0.2636, 0.2163]) -Greedy action tensor([ 2.1384, -0.6962, -0.3832, 0.4885]) tensor([0.7512, 0.0441, 0.0603, 0.1443]) -Greedy action tensor([ 1.4659, -0.6070, -0.1734, -0.0796]) tensor([0.6522, 0.0821, 0.1266, 0.1391]) -Greedy action tensor([ 1.4731, -0.4798, -0.4659, -0.1173]) tensor([0.6714, 0.0952, 0.0966, 0.1368]) -Greedy action tensor([ 1.1638, -0.5693, 0.0157, 0.2288]) tensor([0.5301, 0.0937, 0.1682, 0.2081]) -Greedy action tensor([ 1.0186, -0.6696, -0.0678, 0.3354]) tensor([0.4933, 0.0912, 0.1664, 0.2491]) -Greedy action tensor([ 0.8238, -0.7679, -0.4139, 0.0885]) tensor([0.5069, 0.1032, 0.1470, 0.2430]) -Greedy action tensor([ 1.4423, -0.6162, -0.5575, 0.3030]) tensor([0.6317, 0.0806, 0.0855, 0.2022]) -Greedy action tensor([ 0.9118, -0.7577, -0.4632, 0.1799]) tensor([0.5202, 0.0980, 0.1315, 0.2502]) -Greedy action tensor([ 1.1626, -0.0377, 0.0933, -0.0027]) tensor([0.5112, 0.1539, 0.1755, 0.1594]) -Greedy action tensor([ 0.5605, -0.6105, -0.2388, 0.0250]) tensor([0.4264, 0.1322, 0.1917, 0.2496]) -Greedy action tensor([ 1.1068, -0.1485, 0.1469, -0.1857]) tensor([0.5148, 0.1467, 0.1971, 0.1413]) -Greedy action tensor([-1.3205, -0.4027, 0.5521, -0.1423]) tensor([0.0754, 0.1889, 0.4907, 0.2450]) -Greedy action tensor([-0.6459, -0.4945, 0.0506, 0.0053]) tensor([0.1643, 0.1911, 0.3296, 0.3150]) -Greedy action tensor([-1.2067, -0.4452, 0.3093, -0.1913]) tensor([0.0956, 0.2048, 0.4355, 0.2640]) -Greedy action tensor([-1.8619, -0.9135, 0.2186, -0.3351]) tensor([0.0618, 0.1594, 0.4946, 0.2843]) -Greedy action tensor([-1.8818, -0.5612, 0.4280, -0.1477]) tensor([0.0488, 0.1829, 0.4918, 0.2765]) -Greedy action tensor([-1.0717, -0.1779, -0.1474, -0.0685]) tensor([0.1151, 0.2813, 0.2899, 0.3138]) -Greedy action tensor([-1.8567, -0.9829, 0.0662, -0.6161]) tensor([0.0730, 0.1750, 0.4995, 0.2525]) -Greedy action tensor([-1.5679, -0.6879, 0.6118, 0.3638]) tensor([0.0522, 0.1259, 0.4617, 0.3603]) -Greedy action tensor([-2.0324, -0.9064, 0.3979, -0.1264]) tensor([0.0451, 0.1391, 0.5125, 0.3034]) -Greedy action tensor([-1.4859, -0.6020, 0.0153, -0.2487]) tensor([0.0881, 0.2132, 0.3952, 0.3035]) -Greedy action tensor([-0.4409, 0.4181, -0.2089, 0.2038]) tensor([0.1532, 0.3617, 0.1932, 0.2919]) -Greedy action tensor([-1.6510, -0.5529, 0.5249, -0.0445]) tensor([0.0562, 0.1685, 0.4951, 0.2802]) -Greedy action tensor([-1.9807, -0.8347, 0.4429, -0.0675]) tensor([0.0450, 0.1416, 0.5083, 0.3051]) -Greedy action tensor([-1.7837, -0.2982, 0.1862, -0.3137]) tensor([0.0590, 0.2608, 0.4233, 0.2568]) -Greedy action tensor([-1.6448, -0.6984, 0.0828, -0.2258]) tensor([0.0750, 0.1932, 0.4219, 0.3099]) -Greedy action tensor([-1.6151, -0.5034, 0.5651, 0.1910]) tensor([0.0527, 0.1602, 0.4663, 0.3208]) -Greedy action tensor([-1.0076, -0.5188, 0.4100, -0.0697]) tensor([0.1074, 0.1751, 0.4432, 0.2743]) -Greedy action tensor([-1.9743, -0.9115, 0.4042, -0.2889]) tensor([0.0498, 0.1442, 0.5374, 0.2687]) -Greedy action tensor([-1.9389, -0.9778, 0.2046, -0.3618]) tensor([0.0589, 0.1539, 0.5022, 0.2850]) -Greedy action tensor([-1.8171, -0.6243, 0.2747, -0.2673]) tensor([0.0585, 0.1927, 0.4735, 0.2754]) -Greedy action tensor([-2.0279, -0.9504, 0.5651, -0.1332]) tensor([0.0417, 0.1226, 0.5581, 0.2776]) -Greedy action tensor([-1.7038, -0.7058, 0.0310, -0.4397]) tensor([0.0774, 0.2100, 0.4387, 0.2740]) -Greedy action tensor([-1.9145, -0.4359, 0.6510, -0.1759]) tensor([0.0415, 0.1821, 0.5401, 0.2362]) -Greedy action tensor([-1.6709, -0.5717, 0.0626, -0.4215]) tensor([0.0760, 0.2283, 0.4304, 0.2653]) -Greedy action tensor([-1.9845, -0.6949, 0.8272, 0.0917]) tensor([0.0342, 0.1242, 0.5689, 0.2727]) -Greedy action tensor([-1.6361, -0.4738, 0.0130, -0.4455]) tensor([0.0788, 0.2520, 0.4100, 0.2592]) -Greedy action tensor([-2.0480, -0.9029, 0.5632, -0.0670]) tensor([0.0400, 0.1257, 0.5445, 0.2899]) -Greedy action tensor([-1.9406, -0.8146, 0.5834, -0.0218]) tensor([0.0428, 0.1319, 0.5338, 0.2915]) -Greedy action tensor([-1.8040, -0.6901, 0.3171, -0.2305]) tensor([0.0581, 0.1770, 0.4846, 0.2803]) -Greedy action tensor([-1.9309, -0.8937, 0.3754, -0.0832]) tensor([0.0495, 0.1396, 0.4968, 0.3141]) -Greedy action tensor([-0.9334, -0.2823, -0.5412, -0.3475]) tensor([0.1614, 0.3096, 0.2390, 0.2900]) -Greedy action tensor([-1.7342, -0.4873, 0.5652, -0.0690]) tensor([0.0507, 0.1763, 0.5051, 0.2679]) -Greedy action tensor([-1.3294, -0.5408, 0.4310, -0.1932]) tensor([0.0824, 0.1814, 0.4794, 0.2568]) -Greedy action tensor([-0.8394, -0.1951, 0.1287, -0.2283]) tensor([0.1355, 0.2581, 0.3568, 0.2496]) -Greedy action tensor([-1.6713, -0.7889, 0.2002, -0.4510]) tensor([0.0752, 0.1817, 0.4885, 0.2547]) -Greedy action tensor([-1.9818, -0.5990, 0.8078, -0.0507]) tensor([0.0355, 0.1416, 0.5780, 0.2449]) -Greedy action tensor([-1.3172, -0.6500, 0.5142, 0.0969]) tensor([0.0752, 0.1465, 0.4692, 0.3091]) -Greedy action tensor([-1.4785, 0.2142, 0.3618, -0.2003]) tensor([0.0613, 0.3329, 0.3859, 0.2199]) -Greedy action tensor([-1.4337, -0.0070, 0.6835, -0.6213]) tensor([0.0636, 0.2648, 0.5283, 0.1433]) -Greedy action tensor([-1.0033, -0.4143, 0.6049, -0.3878]) tensor([0.1037, 0.1868, 0.5177, 0.1918]) -Greedy action tensor([-1.4881, -0.5490, 0.3267, -0.1536]) tensor([0.0741, 0.1895, 0.4550, 0.2814]) -Greedy action tensor([-1.1436, -0.5818, -0.3426, -0.4581]) tensor([0.1435, 0.2518, 0.3198, 0.2849]) -Greedy action tensor([-0.9608, -0.5878, 0.4256, 0.1124]) tensor([0.1066, 0.1549, 0.4266, 0.3119]) -Greedy action tensor([-0.8977, -0.4334, 0.4459, -0.3383]) tensor([0.1224, 0.1947, 0.4689, 0.2141]) -Greedy action tensor([-1.0853, -0.5107, 0.5750, -0.3487]) tensor([0.0988, 0.1754, 0.5195, 0.2063]) -Greedy action tensor([-1.8739, -0.4558, 0.6605, -0.0616]) tensor([0.0419, 0.1730, 0.5284, 0.2567]) -Greedy action tensor([-1.2482, -0.6989, 0.2815, 0.1834]) tensor([0.0867, 0.1502, 0.4003, 0.3629]) -Greedy action tensor([-1.1883, 0.5524, 0.2946, -0.1238]) tensor([0.0714, 0.4070, 0.3146, 0.2070]) -Greedy action tensor([-1.9338, -0.8554, 0.2061, -0.2049]) tensor([0.0553, 0.1627, 0.4702, 0.3117]) -Greedy action tensor([-0.9084, -0.6458, 0.3864, 0.1864]) tensor([0.1119, 0.1455, 0.4083, 0.3343]) -Greedy action tensor([-2.0350, -0.9237, 0.5496, -0.1274]) tensor([0.0416, 0.1264, 0.5517, 0.2803]) -Greedy action tensor([-1.7389e+00, -4.9503e-01, 5.7279e-01, -1.6637e-03]) tensor([0.0494, 0.1714, 0.4985, 0.2807]) -Greedy action tensor([-0.7888, -0.4254, 0.4343, -0.0656]) tensor([0.1266, 0.1821, 0.4303, 0.2610]) -Greedy action tensor([-1.6533, -0.6785, 0.1078, -0.3886]) tensor([0.0769, 0.2037, 0.4472, 0.2722]) -Greedy action tensor([-1.4632, -0.6905, -0.1437, -0.4277]) tensor([0.1028, 0.2227, 0.3848, 0.2897]) -Greedy action tensor([-1.0009, -0.1414, -0.4037, -0.0629]) tensor([0.1293, 0.3054, 0.2349, 0.3303]) -Greedy action tensor([-1.7256, -0.4752, 0.4754, -0.1557]) tensor([0.0545, 0.1905, 0.4928, 0.2622]) -Greedy action tensor([-1.5862, -0.8260, -0.0340, -0.4558]) tensor([0.0913, 0.1952, 0.4309, 0.2826]) -Greedy action tensor([-1.9673, -0.6118, 0.6218, -0.1401]) tensor([0.0410, 0.1589, 0.5455, 0.2546]) -Greedy action tensor([-1.0034, -0.1063, -0.4145, 0.0432]) tensor([0.1234, 0.3027, 0.2224, 0.3515]) -Greedy action tensor([-1.5251, -0.8651, -0.1121, -0.9690]) tensor([0.1138, 0.2202, 0.4675, 0.1985]) -Greedy action tensor([-2.0048, -0.9290, 1.0812, 0.2671]) tensor([0.0282, 0.0826, 0.6163, 0.2730]) -Greedy action tensor([-1.6286, -0.0363, 0.5465, -0.2473]) tensor([0.0535, 0.2629, 0.4708, 0.2129]) -Greedy action tensor([-1.2208, -0.4516, -0.2164, -0.5329]) tensor([0.1269, 0.2739, 0.3466, 0.2525]) -Greedy action tensor([-1.9726, -0.8078, 0.3541, -0.1716]) tensor([0.0488, 0.1563, 0.4996, 0.2953]) -Greedy action tensor([-1.7717, -0.9931, 0.1412, -0.5250]) tensor([0.0745, 0.1622, 0.5043, 0.2590]) -Greedy action tensor([-1.8419, -0.6011, 0.5986, -0.0053]) tensor([0.0450, 0.1557, 0.5168, 0.2825]) -Greedy action tensor([-1.8371, -0.7445, 0.0618, -0.2996]) tensor([0.0653, 0.1947, 0.4361, 0.3039]) -Greedy action tensor([-1.9424, -0.6890, 0.8494, 0.1214]) tensor([0.0349, 0.1221, 0.5685, 0.2745]) -Greedy action tensor([-2.0275, -0.9209, 0.4081, -0.1493]) tensor([0.0455, 0.1375, 0.5195, 0.2975]) -Greedy action tensor([-1.0530, -0.4461, 0.3242, 0.0157]) tensor([0.1030, 0.1889, 0.4082, 0.2999]) -Greedy action tensor([-0.8245, 0.1051, 0.2330, -0.1180]) tensor([0.1185, 0.3002, 0.3412, 0.2402]) -Greedy action tensor([-1.8097, -0.5313, 0.7312, 0.0137]) tensor([0.0426, 0.1530, 0.5406, 0.2638]) -Greedy action tensor([-1.4521, -0.4727, 0.2371, -0.1111]) tensor([0.0775, 0.2064, 0.4198, 0.2963]) -Greedy action tensor([-1.3970, -0.3740, 0.3091, -0.1252]) tensor([0.0778, 0.2163, 0.4284, 0.2775]) -Greedy action tensor([-1.7970, -0.8020, 0.4230, -0.1744]) tensor([0.0556, 0.1504, 0.5121, 0.2818]) -Greedy action tensor([-0.8365, -0.1734, -0.1851, -0.2646]) tensor([0.1508, 0.2927, 0.2893, 0.2672]) -Greedy action tensor([-1.7255, -0.7886, 0.2109, -0.3875]) tensor([0.0699, 0.1785, 0.4850, 0.2666]) -Greedy action tensor([-1.2214, -0.0447, 0.4512, -0.4127]) tensor([0.0846, 0.2745, 0.4508, 0.1900]) -Greedy action tensor([-2.0262, -0.9188, 0.5074, -0.1412]) tensor([0.0431, 0.1304, 0.5428, 0.2838]) -Greedy action tensor([-0.9066, 0.3053, 0.0951, -0.1905]) tensor([0.1095, 0.3680, 0.2983, 0.2242]) -Greedy action tensor([ 1.4297, -0.3558, 1.0042, 1.2653]) tensor([0.3746, 0.0628, 0.2448, 0.3178]) -Greedy action tensor([ 1.9504, -0.9341, 1.0818, 1.2222]) tensor([0.5107, 0.0285, 0.2143, 0.2465]) -Greedy action tensor([ 1.1779, -0.2564, 1.5332, 0.5654]) tensor([0.3118, 0.0743, 0.4449, 0.1690]) -Greedy action tensor([ 1.9951, -0.2124, 0.6961, 1.0950]) tensor([0.5589, 0.0615, 0.1525, 0.2272]) -Greedy action tensor([1.2606, 0.2096, 0.9553, 1.4996]) tensor([0.2979, 0.1042, 0.2195, 0.3784]) -Greedy action tensor([1.0554, 0.2566, 0.9670, 0.9059]) tensor([0.3099, 0.1394, 0.2837, 0.2669]) -Greedy action tensor([ 1.6881, -0.6391, 0.3581, 1.1641]) tensor([0.5117, 0.0499, 0.1353, 0.3030]) -Greedy action tensor([ 1.2272, -0.1070, 0.2303, 0.9102]) tensor([0.4236, 0.1116, 0.1563, 0.3085]) -Greedy action tensor([ 1.4553, -0.9088, 0.0284, 0.6458]) tensor([0.5621, 0.0529, 0.1349, 0.2502]) -Greedy action tensor([ 1.3328, -0.1343, 1.4865, 1.4737]) tensor([0.2819, 0.0650, 0.3287, 0.3245]) -Greedy action tensor([ 2.0448, -1.3389, 0.9497, 1.2280]) tensor([0.5524, 0.0187, 0.1848, 0.2441]) -Greedy action tensor([ 1.3200, 0.3469, -0.8263, 0.3595]) tensor([0.5326, 0.2013, 0.0623, 0.2038]) -Greedy action tensor([ 1.0418, -0.6164, 0.0943, 0.5449]) tensor([0.4573, 0.0871, 0.1773, 0.2782]) -Greedy action tensor([ 1.8390, -0.4887, 0.7128, 1.8632]) tensor([0.4088, 0.0399, 0.1326, 0.4188]) -Greedy action tensor([ 1.2706, -0.4100, 1.0948, 0.6348]) tensor([0.3915, 0.0729, 0.3283, 0.2073]) -Greedy action tensor([ 1.3158, -0.4245, 0.9043, 1.5414]) tensor([0.3235, 0.0568, 0.2144, 0.4054]) -Greedy action tensor([ 1.6657, -0.1395, 1.2088, 1.3233]) tensor([0.3988, 0.0656, 0.2525, 0.2832]) -Greedy action tensor([ 0.6348, -0.1123, 0.2428, 0.4397]) tensor([0.3365, 0.1594, 0.2273, 0.2768]) -Greedy action tensor([1.3060, 0.7578, 0.0355, 0.7317]) tensor([0.4129, 0.2387, 0.1159, 0.2325]) -Greedy action tensor([ 0.7812, -1.3084, 0.3757, 0.8911]) tensor([0.3441, 0.0426, 0.2294, 0.3840]) -Greedy action tensor([ 1.3389, -0.3925, 0.5580, 1.6700]) tensor([0.3303, 0.0585, 0.1513, 0.4600]) -Greedy action tensor([ 2.1993, -0.5166, 0.9391, 1.8359]) tensor([0.4890, 0.0323, 0.1387, 0.3400]) -Greedy action tensor([1.8483, 0.2664, 0.8261, 0.7972]) tensor([0.5222, 0.1074, 0.1879, 0.1825]) -Greedy action tensor([ 1.5419, -0.0248, 0.1784, 1.1800]) tensor([0.4628, 0.0966, 0.1184, 0.3223]) -Greedy action tensor([ 1.3459, -0.4940, -0.1778, 1.0882]) tensor([0.4652, 0.0739, 0.1014, 0.3595]) -Greedy action tensor([ 1.6674, -0.2731, 1.2711, 1.8916]) tensor([0.3260, 0.0468, 0.2193, 0.4079]) -Greedy action tensor([ 2.0753, -0.4821, 0.7963, 1.2529]) tensor([0.5570, 0.0432, 0.1550, 0.2447]) -Greedy action tensor([ 1.1024, 0.0885, -0.2395, 0.5440]) tensor([0.4553, 0.1652, 0.1190, 0.2605]) -Greedy action tensor([ 1.3566, -0.6552, 1.6950, 1.0680]) tensor([0.3043, 0.0407, 0.4269, 0.2280]) -Greedy action tensor([ 2.0153, -1.0410, 0.7145, 1.2159]) tensor([0.5653, 0.0266, 0.1539, 0.2542]) -Greedy action tensor([ 1.2408, -0.7163, 0.3425, 0.4714]) tensor([0.4971, 0.0702, 0.2024, 0.2303]) -Greedy action tensor([ 1.8909, -0.4162, 0.7470, 1.6424]) tensor([0.4550, 0.0453, 0.1449, 0.3548]) -Greedy action tensor([ 0.9599, -0.8694, 0.3316, 1.6280]) tensor([0.2744, 0.0440, 0.1464, 0.5352]) -Greedy action tensor([ 1.5834, -0.5963, 1.2842, 0.4400]) tensor([0.4601, 0.0520, 0.3412, 0.1467]) -Greedy action tensor([ 2.0356, -1.3228, 0.3933, 1.9576]) tensor([0.4644, 0.0162, 0.0899, 0.4295]) -Greedy action tensor([1.8496, 0.0150, 0.1668, 1.4140]) tensor([0.5019, 0.0801, 0.0933, 0.3247]) -Greedy action tensor([ 2.0178, -0.9476, 1.6906, 1.3630]) tensor([0.4363, 0.0225, 0.3145, 0.2267]) -Greedy action tensor([ 1.0026, -0.4425, 0.5189, 1.2923]) tensor([0.3137, 0.0739, 0.1934, 0.4191]) -Greedy action tensor([ 1.4301, -0.5878, -0.7715, 1.6338]) tensor([0.4049, 0.0538, 0.0448, 0.4964]) -Greedy action tensor([0.9405, 0.4486, 1.0354, 1.4045]) tensor([0.2325, 0.1422, 0.2556, 0.3697]) -Greedy action tensor([1.0449, 0.8262, 0.2958, 1.4014]) tensor([0.2699, 0.2169, 0.1276, 0.3856]) -Greedy action tensor([ 1.1525, -0.9074, 1.7944, 1.4738]) tensor([0.2269, 0.0289, 0.4312, 0.3129]) -Greedy action tensor([ 2.0876, -0.8102, 0.8087, 1.7522]) tensor([0.4881, 0.0269, 0.1359, 0.3491]) -Greedy action tensor([ 2.1860, -0.4672, -0.4148, 0.7556]) tensor([0.7226, 0.0509, 0.0536, 0.1729]) -Greedy action tensor([ 2.0450, -0.5374, 0.8043, 1.5973]) tensor([0.4990, 0.0377, 0.1443, 0.3189]) -Greedy action tensor([ 1.2971, -0.1727, -0.1594, 0.6987]) tensor([0.4968, 0.1143, 0.1158, 0.2731]) -Greedy action tensor([1.4221, 0.3487, 1.0706, 0.5699]) tensor([0.4045, 0.1383, 0.2846, 0.1725]) -Greedy action tensor([ 1.0355, -0.3513, -0.6817, 1.5389]) tensor([0.3243, 0.0810, 0.0582, 0.5365]) -Greedy action tensor([ 2.3175, -0.5072, 0.6113, 0.7985]) tensor([0.6850, 0.0406, 0.1244, 0.1500]) -Greedy action tensor([ 1.4109, -0.3365, 0.2777, 1.2716]) tensor([0.4226, 0.0736, 0.1361, 0.3677]) -Greedy action tensor([ 1.4776, -0.6768, 0.4907, 1.3184]) tensor([0.4271, 0.0495, 0.1592, 0.3642]) -Greedy action tensor([ 1.2122, -0.1564, 1.0436, 1.4041]) tensor([0.3020, 0.0769, 0.2552, 0.3659]) -Greedy action tensor([ 1.6230, -0.8970, 1.2619, 0.9653]) tensor([0.4357, 0.0351, 0.3036, 0.2257]) -Greedy action tensor([ 1.8150, -0.4219, 1.2825, 1.2728]) tensor([0.4395, 0.0469, 0.2580, 0.2556]) -Greedy action tensor([ 1.3843, -0.6836, 0.4716, 1.2687]) tensor([0.4134, 0.0523, 0.1660, 0.3683]) -Greedy action tensor([ 1.4192, -0.1328, 1.5434, 0.9194]) tensor([0.3389, 0.0718, 0.3837, 0.2056]) -Greedy action tensor([1.0461, 0.0102, 1.3122, 1.1865]) tensor([0.2624, 0.0931, 0.3424, 0.3020]) -Greedy action tensor([1.8017, 0.0563, 0.8303, 1.2201]) tensor([0.4735, 0.0827, 0.1792, 0.2647]) -Greedy action tensor([ 1.2737, -0.9729, 1.7664, 0.9875]) tensor([0.2862, 0.0303, 0.4685, 0.2150]) -Greedy action tensor([ 1.4129, -0.7644, 0.0559, 0.8083]) tensor([0.5216, 0.0591, 0.1343, 0.2850]) -Greedy action tensor([ 1.1413, -1.0367, 0.4503, 1.3599]) tensor([0.3498, 0.0396, 0.1753, 0.4353]) -Greedy action tensor([ 1.5191, -0.2523, 1.1324, 0.6432]) tensor([0.4413, 0.0751, 0.2998, 0.1838]) -Greedy action tensor([ 1.9118, -0.3580, 0.8680, 1.7365]) tensor([0.4358, 0.0450, 0.1534, 0.3657]) -Greedy action tensor([ 1.8649, -0.4619, 0.1956, 1.1328]) tensor([0.5660, 0.0552, 0.1066, 0.2722]) -Greedy action tensor([ 1.4770, 0.1313, -0.4557, 0.7730]) tensor([0.5264, 0.1370, 0.0762, 0.2604]) -Greedy action tensor([ 1.6537, -0.0389, 1.2658, 1.2334]) tensor([0.3969, 0.0731, 0.2693, 0.2607]) -Greedy action tensor([1.6686, 0.1945, 1.4879, 1.1783]) tensor([0.3737, 0.0856, 0.3119, 0.2289]) -Greedy action tensor([ 1.4441, -0.1198, 0.9443, 1.0473]) tensor([0.4019, 0.0841, 0.2438, 0.2702]) -Greedy action tensor([ 1.4839, 0.1372, -0.5821, 0.8238]) tensor([0.5253, 0.1366, 0.0666, 0.2715]) -Greedy action tensor([ 1.1555, -0.7299, 1.8648, 0.7445]) tensor([0.2599, 0.0394, 0.5283, 0.1723]) -Greedy action tensor([ 1.6273, -1.0987, 1.7521, 1.4256]) tensor([0.3316, 0.0217, 0.3757, 0.2710]) -Greedy action tensor([ 1.8772, -0.0657, -0.2980, 1.1119]) tensor([0.5807, 0.0832, 0.0660, 0.2701]) -Greedy action tensor([ 1.6414, -0.4169, 0.6502, 1.0849]) tensor([0.4826, 0.0616, 0.1791, 0.2766]) -Greedy action tensor([ 1.9090, -0.9713, -0.0961, 0.7605]) tensor([0.6632, 0.0372, 0.0893, 0.2103]) -Greedy action tensor([ 2.1410, -1.3149, 0.9805, 1.4317]) tensor([0.5444, 0.0172, 0.1706, 0.2678]) -Greedy action tensor([ 1.9153, -0.3629, 0.5822, 0.6122]) tensor([0.6106, 0.0626, 0.1610, 0.1659]) -Greedy action tensor([1.6765, 0.3834, 0.3373, 2.4557]) tensor([0.2691, 0.0738, 0.0705, 0.5866]) -Greedy action tensor([1.7330, 0.8567, 0.6590, 1.8058]) tensor([0.3529, 0.1469, 0.1206, 0.3796]) -Greedy action tensor([1.2690, 0.3577, 0.0643, 1.2440]) tensor([0.3735, 0.1502, 0.1120, 0.3643]) -Greedy action tensor([ 1.2066, -0.9746, 0.7598, 1.1554]) tensor([0.3700, 0.0418, 0.2367, 0.3515]) -Greedy action tensor([1.3630, 0.1116, 0.9782, 1.6144]) tensor([0.3075, 0.0880, 0.2092, 0.3953]) -Greedy action tensor([ 1.3486, -0.3872, 1.4615, 1.0250]) tensor([0.3312, 0.0584, 0.3708, 0.2396]) -Greedy action tensor([ 0.4338, 0.2361, 0.0234, -0.2877]) tensor([0.3367, 0.2763, 0.2234, 0.1636]) -Greedy action tensor([ 0.2473, 0.0456, 0.1378, -0.1573]) tensor([0.2958, 0.2418, 0.2651, 0.1973]) -Greedy action tensor([ 0.2592, 0.0797, 0.1583, -0.0880]) tensor([0.2902, 0.2425, 0.2623, 0.2050]) -Greedy action tensor([ 0.4435, 0.0095, 0.2013, -0.3617]) tensor([0.3473, 0.2250, 0.2725, 0.1552]) -Greedy action tensor([ 0.3430, -0.1531, 0.1139, -0.3235]) tensor([0.3427, 0.2087, 0.2726, 0.1760]) -Greedy action tensor([ 0.3062, 0.2090, 0.0765, -0.2377]) tensor([0.3046, 0.2764, 0.2421, 0.1768]) -Greedy action tensor([ 0.2737, -0.0260, 0.2564, -0.1074]) tensor([0.2935, 0.2175, 0.2885, 0.2005]) -Greedy action tensor([ 0.2125, 0.0057, 0.1792, -0.2581]) tensor([0.2937, 0.2388, 0.2841, 0.1834]) -Greedy action tensor([ 0.9705, -0.4699, 0.0026, -0.5955]) tensor([0.5478, 0.1297, 0.2081, 0.1144]) -Greedy action tensor([ 0.8541, -0.4390, -0.0757, -0.5601]) tensor([0.5230, 0.1435, 0.2064, 0.1271]) -Greedy action tensor([ 0.3502, -0.0142, 0.1191, -0.2084]) tensor([0.3268, 0.2270, 0.2593, 0.1869]) -Greedy action tensor([ 0.2679, -0.0424, 0.1499, -0.1895]) tensor([0.3072, 0.2253, 0.2730, 0.1945]) -Greedy action tensor([ 0.3873, 0.0326, 0.2062, -0.3676]) tensor([0.3327, 0.2333, 0.2776, 0.1564]) -Greedy action tensor([ 0.3673, 0.0822, 0.0544, -0.1284]) tensor([0.3234, 0.2432, 0.2365, 0.1970]) -Greedy action tensor([ 0.5877, -0.0321, -0.0025, -0.2784]) tensor([0.3980, 0.2141, 0.2206, 0.1674]) -Greedy action tensor([ 0.3129, 0.1649, 0.1942, -0.3362]) tensor([0.3055, 0.2635, 0.2713, 0.1596]) -Greedy action tensor([ 0.2496, 0.1832, 0.1377, -0.2746]) tensor([0.2922, 0.2735, 0.2613, 0.1730]) -Greedy action tensor([ 0.3124, -0.0572, 0.2699, -0.2596]) tensor([0.3112, 0.2150, 0.2982, 0.1756]) -Greedy action tensor([ 0.3679, 0.0469, 0.2141, -0.1468]) tensor([0.3144, 0.2281, 0.2696, 0.1879]) -Greedy action tensor([ 0.6761, -0.4222, 0.1708, -0.6049]) tensor([0.4516, 0.1506, 0.2724, 0.1254]) -Greedy action tensor([ 1.0024, -0.6158, -0.1218, -0.7588]) tensor([0.5900, 0.1170, 0.1917, 0.1014]) -Greedy action tensor([ 0.4007, -0.2133, 0.0068, -0.4380]) tensor([0.3777, 0.2044, 0.2547, 0.1633]) -Greedy action tensor([ 0.3208, 0.1486, 0.1304, -0.0460]) tensor([0.2975, 0.2504, 0.2459, 0.2062]) -Greedy action tensor([ 0.3549, 0.1872, 0.0990, -0.3351]) tensor([0.3204, 0.2709, 0.2480, 0.1607]) -Greedy action tensor([ 0.2805, 0.0141, 0.1914, -0.2006]) tensor([0.3031, 0.2322, 0.2773, 0.1874]) -Greedy action tensor([ 0.4679, -0.0431, -0.0044, -0.2124]) tensor([0.3663, 0.2198, 0.2284, 0.1855]) -Greedy action tensor([ 0.1629, 0.0445, -0.0555, -0.2092]) tensor([0.2957, 0.2627, 0.2377, 0.2038]) -Greedy action tensor([ 0.3201, -0.0569, 0.1796, -0.2307]) tensor([0.3194, 0.2190, 0.2775, 0.1841]) -Greedy action tensor([ 0.4208, -0.2152, 0.1085, -0.3769]) tensor([0.3688, 0.1952, 0.2699, 0.1661]) -Greedy action tensor([ 0.9436, -0.3515, -0.1639, -0.7185]) tensor([0.5574, 0.1527, 0.1842, 0.1058]) -Greedy action tensor([ 0.5073, -0.1576, 0.1335, -0.4710]) tensor([0.3878, 0.1995, 0.2669, 0.1458]) -Greedy action tensor([ 0.6123, -0.3855, 0.1000, -0.5501]) tensor([0.4385, 0.1617, 0.2627, 0.1371]) -Greedy action tensor([ 0.5880, -0.1032, -0.0101, -0.4741]) tensor([0.4173, 0.2090, 0.2294, 0.1443]) -Greedy action tensor([ 0.3898, 0.1449, 0.0501, -0.0061]) tensor([0.3157, 0.2471, 0.2248, 0.2125]) -Greedy action tensor([ 0.4353, 0.1273, 0.1526, -0.2202]) tensor([0.3325, 0.2443, 0.2506, 0.1726]) -Greedy action tensor([ 0.3852, -0.1356, 0.1137, -0.3039]) tensor([0.3499, 0.2078, 0.2667, 0.1756]) -Greedy action tensor([ 0.4635, 0.0963, 0.1464, -0.4071]) tensor([0.3522, 0.2439, 0.2565, 0.1475]) -Greedy action tensor([ 0.3082, 0.1792, -0.0637, -0.2890]) tensor([0.3207, 0.2818, 0.2210, 0.1765]) -Greedy action tensor([ 0.5252, 0.0188, 0.0738, -0.3119]) tensor([0.3742, 0.2255, 0.2383, 0.1620]) -Greedy action tensor([ 0.3128, -0.0282, 0.1697, -0.3836]) tensor([0.3251, 0.2312, 0.2817, 0.1620]) -Greedy action tensor([ 0.2426, 0.0827, 0.1187, -0.2719]) tensor([0.3000, 0.2557, 0.2650, 0.1793]) -Greedy action tensor([ 0.8901, -0.6126, 0.0072, -0.6085]) tensor([0.5378, 0.1197, 0.2224, 0.1202]) -Greedy action tensor([ 0.3571, 0.1576, -0.0364, -0.2260]) tensor([0.3276, 0.2684, 0.2211, 0.1829]) -Greedy action tensor([ 0.2883, -0.0470, 0.1916, -0.1203]) tensor([0.3042, 0.2175, 0.2761, 0.2022]) -Greedy action tensor([ 0.4359, 0.1245, 0.1049, -0.1993]) tensor([0.3355, 0.2458, 0.2410, 0.1778]) -Greedy action tensor([ 0.7364, -0.2446, -0.0476, -0.4674]) tensor([0.4691, 0.1759, 0.2142, 0.1408]) -Greedy action tensor([ 0.3627, 0.1024, 0.1984, -0.1978]) tensor([0.3135, 0.2416, 0.2660, 0.1790]) -Greedy action tensor([ 0.5818, 0.1530, -0.1101, -0.2134]) tensor([0.3841, 0.2502, 0.1923, 0.1734]) -Greedy action tensor([ 0.2719, -0.0080, 0.1495, -0.1748]) tensor([0.3048, 0.2304, 0.2697, 0.1950]) -Greedy action tensor([ 0.3070, 0.0848, 0.0985, -0.4207]) tensor([0.3230, 0.2587, 0.2622, 0.1560]) -Greedy action tensor([ 0.5304, -0.0067, 0.0724, -0.2734]) tensor([0.3753, 0.2193, 0.2374, 0.1680]) -Greedy action tensor([ 0.6328, -0.1987, -0.0862, -0.4800]) tensor([0.4442, 0.1934, 0.2164, 0.1460]) -Greedy action tensor([ 0.3894, 0.1339, 0.1412, -0.2810]) tensor([0.3261, 0.2526, 0.2544, 0.1668]) -Greedy action tensor([ 0.3546, 0.0349, 0.1702, -0.2094]) tensor([0.3198, 0.2323, 0.2660, 0.1819]) -Greedy action tensor([ 0.4022, -0.0424, 0.1538, -0.2451]) tensor([0.3396, 0.2177, 0.2649, 0.1778]) -Greedy action tensor([ 0.5445, -0.1184, 0.1399, -0.4998]) tensor([0.3946, 0.2033, 0.2633, 0.1389]) -Greedy action tensor([ 0.4431, 0.2774, 0.0917, -0.2052]) tensor([0.3253, 0.2756, 0.2289, 0.1701]) -Greedy action tensor([ 0.3042, -0.0349, 0.1624, -0.1924]) tensor([0.3136, 0.2234, 0.2721, 0.1909]) -Greedy action tensor([ 0.3717, -0.1541, 0.1882, -0.3822]) tensor([0.3456, 0.2042, 0.2876, 0.1626]) -Greedy action tensor([ 0.2445, 0.1711, 0.2005, -0.1939]) tensor([0.2832, 0.2631, 0.2710, 0.1827]) -Greedy action tensor([ 0.4091, -0.1079, 0.1650, -0.2397]) tensor([0.3445, 0.2055, 0.2699, 0.1801]) -Greedy action tensor([ 0.3580, -0.0839, -0.0026, -0.4125]) tensor([0.3568, 0.2293, 0.2488, 0.1651]) -Greedy action tensor([ 0.2899, 0.0511, 0.0559, -0.4164]) tensor([0.3255, 0.2563, 0.2576, 0.1606]) -Greedy action tensor([ 0.2978, 0.0290, 0.1495, -0.2996]) tensor([0.3148, 0.2406, 0.2714, 0.1732]) -Greedy action tensor([ 0.3328, 0.1445, 0.1159, -0.2344]) tensor([0.3125, 0.2588, 0.2515, 0.1772]) -Greedy action tensor([ 0.3489, 0.1576, 0.1649, -0.2249]) tensor([0.3104, 0.2564, 0.2583, 0.1749]) -Greedy action tensor([ 0.4274, 0.1741, 0.1681, -0.1783]) tensor([0.3233, 0.2509, 0.2494, 0.1764]) -Greedy action tensor([ 0.2324, 0.1260, 0.2531, -0.1652]) tensor([0.2784, 0.2503, 0.2842, 0.1871]) -Greedy action tensor([ 0.6825, -0.5978, 0.1847, -0.6987]) tensor([0.4679, 0.1301, 0.2844, 0.1176]) -Greedy action tensor([ 0.7237, -0.3567, 0.0223, -0.5446]) tensor([0.4725, 0.1604, 0.2343, 0.1329]) -Greedy action tensor([ 0.5120, 0.0529, -0.0659, -0.2611]) tensor([0.3767, 0.2380, 0.2114, 0.1739]) -Greedy action tensor([ 0.3887, -0.0478, 0.2288, -0.3382]) tensor([0.3354, 0.2167, 0.2858, 0.1621]) -Greedy action tensor([ 0.4352, -0.1811, 0.2599, -0.4709]) tensor([0.3593, 0.1940, 0.3015, 0.1452]) -Greedy action tensor([ 0.1989, -0.0421, 0.0049, -0.2039]) tensor([0.3051, 0.2397, 0.2513, 0.2039]) -Greedy action tensor([ 0.6321, -0.1640, 0.0246, -0.4670]) tensor([0.4294, 0.1937, 0.2339, 0.1431]) -Greedy action tensor([ 0.2506, -0.0026, -0.1488, -0.3668]) tensor([0.3348, 0.2600, 0.2246, 0.1806]) -Greedy action tensor([ 0.4671, 0.0898, 0.1132, -0.2786]) tensor([0.3494, 0.2396, 0.2453, 0.1658]) -Greedy action tensor([ 0.1999, -0.0334, 0.1029, -0.1338]) tensor([0.2928, 0.2318, 0.2657, 0.2097]) -Greedy action tensor([ 0.2522, 0.1896, 0.0927, -0.2294]) tensor([0.2933, 0.2755, 0.2500, 0.1812]) -Greedy action tensor([ 0.3625, 0.1790, 0.0059, -0.2195]) tensor([0.3235, 0.2693, 0.2265, 0.1808]) -Greedy action tensor([ 1.1072, -0.6725, -0.0496, -0.7446]) tensor([0.6097, 0.1028, 0.1918, 0.0957]) -Greedy action tensor([ 1.2566, -0.1217, 0.1300, -0.1394]) tensor([0.5483, 0.1382, 0.1777, 0.1358]) -Greedy action tensor([ 1.4560, -0.4659, -0.2726, -0.1233]) tensor([0.6536, 0.0956, 0.1160, 0.1347]) -Greedy action tensor([ 1.1349, -0.4329, -0.3181, 0.0053]) tensor([0.5664, 0.1181, 0.1325, 0.1830]) -Greedy action tensor([ 0.6508, -0.4799, -0.1168, -0.0201]) tensor([0.4351, 0.1405, 0.2019, 0.2224]) -Greedy action tensor([ 1.1244, -0.3949, -0.2787, -0.0602]) tensor([0.5648, 0.1236, 0.1388, 0.1728]) -Greedy action tensor([ 1.2660, -0.6535, -0.4686, -0.0617]) tensor([0.6296, 0.0924, 0.1111, 0.1669]) -Greedy action tensor([ 1.2141, -0.3490, -0.0505, -0.1903]) tensor([0.5756, 0.1206, 0.1625, 0.1413]) -Greedy action tensor([ 0.9604, -0.4797, -0.3580, 0.1972]) tensor([0.5074, 0.1202, 0.1358, 0.2366]) -Greedy action tensor([ 1.5075, -0.1449, -0.2667, -0.1727]) tensor([0.6462, 0.1238, 0.1096, 0.1204]) -Greedy action tensor([ 1.5965, -0.4340, -0.4074, -0.2286]) tensor([0.7006, 0.0920, 0.0945, 0.1129]) -Greedy action tensor([ 1.0630, -0.7514, -0.2124, 0.0784]) tensor([0.5507, 0.0897, 0.1538, 0.2057]) -Greedy action tensor([ 1.0005, -0.4370, -0.2470, -0.1530]) tensor([0.5434, 0.1291, 0.1561, 0.1715]) -Greedy action tensor([ 1.0689, -0.5384, -0.0663, 0.4283]) tensor([0.4881, 0.0978, 0.1568, 0.2572]) -Greedy action tensor([ 0.6785, -0.7279, -0.3126, 0.0904]) tensor([0.4605, 0.1128, 0.1709, 0.2558]) -Greedy action tensor([ 0.7893, -0.6293, -0.2789, 0.2211]) tensor([0.4646, 0.1125, 0.1597, 0.2632]) -Greedy action tensor([ 1.3981, -0.1283, -0.2005, -0.2054]) tensor([0.6170, 0.1341, 0.1248, 0.1241]) -Greedy action tensor([ 0.9044, -0.7470, -0.2928, 0.3042]) tensor([0.4896, 0.0939, 0.1479, 0.2686]) -Greedy action tensor([ 1.2556, -0.8766, -0.5768, 0.2305]) tensor([0.6107, 0.0724, 0.0977, 0.2191]) -Greedy action tensor([ 1.4152, -0.5462, -0.0876, 0.2424]) tensor([0.5979, 0.0841, 0.1330, 0.1850]) -Greedy action tensor([ 1.2702, -0.5764, -0.3261, 0.0588]) tensor([0.6030, 0.0951, 0.1222, 0.1796]) -Greedy action tensor([ 0.8529, -0.5302, -0.0359, 0.1933]) tensor([0.4589, 0.1151, 0.1887, 0.2373]) -Greedy action tensor([ 1.6539, -0.5392, -0.3137, 0.1570]) tensor([0.6779, 0.0756, 0.0948, 0.1517]) -Greedy action tensor([ 1.3904, -0.6647, -0.2628, 0.2137]) tensor([0.6143, 0.0787, 0.1176, 0.1894]) -Greedy action tensor([ 0.7530, -0.4745, -0.1215, 0.1791]) tensor([0.4399, 0.1289, 0.1835, 0.2478]) -Greedy action tensor([ 0.6738, -0.2369, -0.0990, 0.0814]) tensor([0.4138, 0.1664, 0.1910, 0.2288]) -Greedy action tensor([ 2.1382, -0.5086, -0.3471, 0.1351]) tensor([0.7757, 0.0550, 0.0646, 0.1047]) -Greedy action tensor([ 1.1794, -0.6007, -0.6391, 0.2181]) tensor([0.5837, 0.0984, 0.0947, 0.2232]) -Greedy action tensor([ 0.9257, -0.0599, -0.0350, -0.0625]) tensor([0.4699, 0.1754, 0.1798, 0.1749]) -Greedy action tensor([ 1.4026, -0.3770, 0.1114, -0.0198]) tensor([0.5936, 0.1001, 0.1632, 0.1431]) -Greedy action tensor([ 1.2134, -0.5007, -0.1047, 0.2927]) tensor([0.5417, 0.0976, 0.1450, 0.2157]) -Greedy action tensor([ 1.3276, -0.6580, -0.4300, 0.3590]) tensor([0.5919, 0.0813, 0.1021, 0.2247]) -Greedy action tensor([ 1.4245, -0.7048, -0.4238, 0.2862]) tensor([0.6263, 0.0745, 0.0986, 0.2006]) -Greedy action tensor([ 0.7292, -0.5711, -0.2152, 0.1830]) tensor([0.4463, 0.1216, 0.1736, 0.2585]) -Greedy action tensor([ 1.1350, -0.3651, -0.4147, -0.0046]) tensor([0.5697, 0.1271, 0.1210, 0.1823]) -Greedy action tensor([ 0.9455, -0.4400, -0.2597, 0.1582]) tensor([0.4988, 0.1248, 0.1495, 0.2270]) -Greedy action tensor([ 1.3774, -0.5937, -0.4163, 0.1682]) tensor([0.6234, 0.0868, 0.1037, 0.1860]) -Greedy action tensor([ 1.2437, -0.5594, -0.3716, -0.1086]) tensor([0.6164, 0.1016, 0.1226, 0.1594]) -Greedy action tensor([ 1.2343, -0.7361, -0.1935, 0.1767]) tensor([0.5792, 0.0807, 0.1389, 0.2012]) -Greedy action tensor([ 0.9733, -0.7527, -0.3716, 0.1323]) tensor([0.5348, 0.0952, 0.1393, 0.2307]) -Greedy action tensor([ 1.4048, -0.1291, 0.0354, -0.1334]) tensor([0.5936, 0.1280, 0.1509, 0.1275]) -Greedy action tensor([ 1.1567, -0.6728, -0.2631, 0.2076]) tensor([0.5589, 0.0897, 0.1351, 0.2163]) -Greedy action tensor([ 1.1370, -0.4979, -0.0573, 0.2306]) tensor([0.5258, 0.1025, 0.1593, 0.2124]) -Greedy action tensor([ 0.8272, -0.2439, 0.0748, -0.1566]) tensor([0.4571, 0.1566, 0.2154, 0.1709]) -Greedy action tensor([ 0.6504, -0.2114, 0.0690, -0.1227]) tensor([0.4093, 0.1729, 0.2289, 0.1889]) -Greedy action tensor([ 1.2344, -0.4849, -0.2171, -0.1872]) tensor([0.6043, 0.1083, 0.1415, 0.1458]) -Greedy action tensor([ 0.7077, -0.4187, -0.0303, -0.1513]) tensor([0.4493, 0.1457, 0.2148, 0.1903]) -Greedy action tensor([ 0.6286, -0.4646, -0.1464, -0.0281]) tensor([0.4321, 0.1448, 0.1990, 0.2241]) -Greedy action tensor([ 0.9197, -0.6394, -0.5133, -0.0180]) tensor([0.5434, 0.1143, 0.1296, 0.2127]) -Greedy action tensor([ 1.4066, -0.4301, -0.3648, -0.2056]) tensor([0.6541, 0.1042, 0.1113, 0.1305]) -Greedy action tensor([ 1.2215, -0.1436, -0.1807, -0.1112]) tensor([0.5665, 0.1447, 0.1394, 0.1494]) -Greedy action tensor([ 1.1938, -0.4832, -0.4234, 0.1679]) tensor([0.5735, 0.1072, 0.1138, 0.2056]) -Greedy action tensor([ 1.2922, -0.4651, -0.0894, 0.1739]) tensor([0.5713, 0.0985, 0.1435, 0.1867]) -Greedy action tensor([ 1.1817, -0.5643, -0.3495, -0.0693]) tensor([0.5963, 0.1040, 0.1290, 0.1707]) -Greedy action tensor([ 1.3002, -0.4389, -0.2476, -0.0206]) tensor([0.6041, 0.1061, 0.1285, 0.1613]) -Greedy action tensor([ 1.3964, -0.6373, -0.1136, 0.0322]) tensor([0.6221, 0.0814, 0.1374, 0.1590]) -Greedy action tensor([ 0.6439, -0.2216, -0.0701, -0.1790]) tensor([0.4256, 0.1791, 0.2084, 0.1869]) -Greedy action tensor([ 1.2568, -0.1457, 0.0824, -0.2051]) tensor([0.5597, 0.1377, 0.1729, 0.1297]) -Greedy action tensor([ 0.8246, -0.5630, -0.2674, 0.2092]) tensor([0.4704, 0.1175, 0.1579, 0.2542]) -Greedy action tensor([ 1.6594, -0.7143, -0.5700, 0.1875]) tensor([0.6992, 0.0651, 0.0752, 0.1605]) -Greedy action tensor([ 1.0646, -0.5887, 0.1143, 0.1623]) tensor([0.5041, 0.0965, 0.1949, 0.2045]) -Greedy action tensor([ 1.2112, -0.3923, -0.3646, -0.1730]) tensor([0.6029, 0.1213, 0.1247, 0.1510]) -Greedy action tensor([ 0.8229, -0.6104, -0.4393, -0.1182]) tensor([0.5231, 0.1248, 0.1480, 0.2041]) -Greedy action tensor([ 0.9148, -0.6196, -0.2946, 0.2007]) tensor([0.4991, 0.1076, 0.1489, 0.2444]) -Greedy action tensor([ 1.0461, -0.9453, -0.6802, 0.2455]) tensor([0.5670, 0.0774, 0.1009, 0.2547]) -Greedy action tensor([ 1.2505, -0.2680, -0.1667, -0.1962]) tensor([0.5894, 0.1291, 0.1429, 0.1387]) -Greedy action tensor([ 0.8575, -0.6180, -0.3994, 0.1614]) tensor([0.4971, 0.1137, 0.1414, 0.2478]) -Greedy action tensor([ 1.3578, -0.6171, -0.2136, 0.2094]) tensor([0.6011, 0.0834, 0.1249, 0.1906]) -Greedy action tensor([ 1.1921, -0.7465, -0.5041, 0.3110]) tensor([0.5742, 0.0826, 0.1053, 0.2379]) -Greedy action tensor([ 0.9107, -0.5147, -0.3060, -0.1628]) tensor([0.5324, 0.1280, 0.1577, 0.1820]) -Greedy action tensor([ 1.6094, -0.3738, -0.4589, -0.1792]) tensor([0.6987, 0.0962, 0.0883, 0.1168]) -Greedy action tensor([ 1.2427, -0.4702, -0.2122, -0.1562]) tensor([0.6022, 0.1086, 0.1406, 0.1487]) -Greedy action tensor([ 0.7345, -0.2820, -0.0963, -0.1675]) tensor([0.4539, 0.1642, 0.1977, 0.1842]) -Greedy action tensor([ 1.0679, -0.3647, -0.2692, -0.0828]) tensor([0.5502, 0.1313, 0.1445, 0.1741]) -Greedy action tensor([ 1.0726, -0.4327, -0.1851, 0.1459]) tensor([0.5257, 0.1167, 0.1495, 0.2081]) -Greedy action tensor([ 0.9202, -0.3645, -0.2795, -0.0161]) tensor([0.5076, 0.1405, 0.1529, 0.1990]) -Greedy action tensor([ 0.7013, -0.3231, -0.1027, -0.0521]) tensor([0.4391, 0.1576, 0.1965, 0.2067]) -Greedy action tensor([ 1.1272, -0.3775, -0.1063, -0.1209]) tensor([0.5554, 0.1234, 0.1618, 0.1594]) -Greedy action tensor([ 1.2215, -0.4896, -0.4726, -0.0944]) tensor([0.6125, 0.1107, 0.1126, 0.1643]) -Greedy action tensor([ 1.5914, -0.6569, -0.1461, 0.2658]) tensor([0.6464, 0.0682, 0.1137, 0.1717]) -Greedy action tensor([ 1.1265, -0.3949, -0.1325, -0.0168]) tensor([0.5491, 0.1199, 0.1559, 0.1750]) -Greedy action tensor([ 1.1967, -0.6673, -0.5768, 0.0359]) tensor([0.6105, 0.0947, 0.1036, 0.1912]) -Greedy action tensor([-0.7339, -0.5066, 0.2972, -0.0181]) tensor([0.1407, 0.1767, 0.3947, 0.2879]) -Greedy action tensor([-1.4809, -0.5128, 0.4054, 0.1338]) tensor([0.0656, 0.1726, 0.4323, 0.3295]) -Greedy action tensor([-1.9933, -0.8908, 0.4472, -0.1456]) tensor([0.0458, 0.1379, 0.5257, 0.2906]) -Greedy action tensor([-1.8043, -0.7581, 0.0728, -0.3176]) tensor([0.0675, 0.1923, 0.4414, 0.2987]) -Greedy action tensor([-1.9399, -1.0091, 0.4420, -0.0823]) tensor([0.0481, 0.1221, 0.5212, 0.3085]) -Greedy action tensor([-1.9727, -0.9317, 0.5039, -0.1875]) tensor([0.0461, 0.1305, 0.5486, 0.2748]) -Greedy action tensor([-1.3814, -0.5521, 0.4172, 0.0543]) tensor([0.0739, 0.1693, 0.4463, 0.3105]) -Greedy action tensor([-1.8698, -0.9843, 0.4157, -0.3707]) tensor([0.0564, 0.1367, 0.5544, 0.2525]) -Greedy action tensor([-0.8303, -0.5713, 0.2323, 0.1959]) tensor([0.1253, 0.1624, 0.3626, 0.3497]) -Greedy action tensor([-1.3216, -0.3411, 0.5522, -0.4228]) tensor([0.0791, 0.2110, 0.5155, 0.1944]) -Greedy action tensor([-1.0189, -0.6496, -0.4650, -0.5047]) tensor([0.1707, 0.2469, 0.2970, 0.2854]) -Greedy action tensor([-1.2119, -0.6862, 0.5981, 0.6742]) tensor([0.0650, 0.1099, 0.3969, 0.4283]) -Greedy action tensor([-1.7548, -0.4774, 0.5833, -0.0447]) tensor([0.0488, 0.1752, 0.5060, 0.2700]) -Greedy action tensor([-1.8106, -0.7009, 0.2507, -0.2988]) tensor([0.0609, 0.1847, 0.4783, 0.2761]) -Greedy action tensor([-1.0608, -0.6257, 0.3840, 0.4121]) tensor([0.0897, 0.1386, 0.3804, 0.3913]) -Greedy action tensor([-1.1243, -0.4636, -0.4053, -0.3836]) tensor([0.1411, 0.2732, 0.2896, 0.2960]) -Greedy action tensor([-1.2652, -0.8954, 0.7127, -0.2971]) tensor([0.0812, 0.1176, 0.5872, 0.2139]) -Greedy action tensor([-1.6394, -0.5691, 0.5503, -0.0886]) tensor([0.0569, 0.1660, 0.5086, 0.2685]) -Greedy action tensor([-0.6881, -0.2563, -0.2252, -0.0911]) tensor([0.1682, 0.2590, 0.2672, 0.3056]) -Greedy action tensor([-1.0683, -0.3537, 0.1586, -0.2994]) tensor([0.1161, 0.2373, 0.3961, 0.2505]) -Greedy action tensor([-1.8015, -0.9742, 0.1150, -0.7119]) tensor([0.0766, 0.1752, 0.5206, 0.2277]) -Greedy action tensor([-1.5587, -0.5397, 0.4540, 0.0591]) tensor([0.0614, 0.1700, 0.4592, 0.3094]) -Greedy action tensor([-1.8643, -0.8270, 0.0475, -0.3323]) tensor([0.0657, 0.1855, 0.4447, 0.3041]) -Greedy action tensor([-1.9595, -0.9415, 0.3112, -0.4073]) tensor([0.0550, 0.1523, 0.5329, 0.2598]) -Greedy action tensor([-1.3810, -0.5026, -0.0329, -0.2326]) tensor([0.0961, 0.2312, 0.3698, 0.3029]) -Greedy action tensor([-1.5057, -0.3776, 0.5101, -0.1589]) tensor([0.0648, 0.2001, 0.4861, 0.2490]) -Greedy action tensor([-1.7568, -0.6882, 0.2269, -0.3908]) tensor([0.0662, 0.1928, 0.4814, 0.2596]) -Greedy action tensor([-1.2944, -0.4654, -0.2149, -0.5078]) tensor([0.1186, 0.2718, 0.3491, 0.2605]) -Greedy action tensor([-1.5376, -0.2905, 0.5069, -0.1933]) tensor([0.0623, 0.2170, 0.4816, 0.2391]) -Greedy action tensor([-1.8062, -0.4567, 0.6003, -0.1054]) tensor([0.0467, 0.1799, 0.5178, 0.2556]) -Greedy action tensor([-0.9803, -0.6525, 0.2695, 0.1278]) tensor([0.1123, 0.1558, 0.3918, 0.3401]) -Greedy action tensor([-1.2973, -0.6792, 0.1641, -0.4290]) tensor([0.1047, 0.1943, 0.4515, 0.2495]) -Greedy action tensor([-1.9693, -0.9119, 0.4557, -0.1098]) tensor([0.0463, 0.1333, 0.5232, 0.2972]) -Greedy action tensor([-1.5966, -0.6490, 0.1134, -0.4919]) tensor([0.0825, 0.2127, 0.4559, 0.2489]) -Greedy action tensor([-0.8586, -0.3615, -0.0022, -0.1766]) tensor([0.1433, 0.2357, 0.3375, 0.2835]) -Greedy action tensor([-1.9649, -0.7215, 0.4177, -0.2185]) tensor([0.0475, 0.1648, 0.5150, 0.2726]) -Greedy action tensor([-1.9335, -0.7418, 0.8411, 0.1609]) tensor([0.0352, 0.1157, 0.5636, 0.2855]) -Greedy action tensor([-1.8034, -0.5092, 0.6096, -0.0909]) tensor([0.0468, 0.1708, 0.5228, 0.2595]) -Greedy action tensor([-0.5681, -0.5417, 0.2551, 0.2502]) tensor([0.1522, 0.1563, 0.3466, 0.3449]) -Greedy action tensor([-0.6650, -0.2275, 0.1871, -0.0430]) tensor([0.1480, 0.2293, 0.3470, 0.2757]) -Greedy action tensor([-1.0768, -0.6380, 0.2543, 0.2335]) tensor([0.0996, 0.1544, 0.3769, 0.3691]) -Greedy action tensor([-1.6711, -0.7207, 0.1917, 0.0444]) tensor([0.0642, 0.1660, 0.4132, 0.3566]) -Greedy action tensor([-1.9194, -0.7271, 0.6721, 0.0458]) tensor([0.0404, 0.1330, 0.5387, 0.2880]) -Greedy action tensor([-1.8763, -0.4676, 0.3948, -0.1632]) tensor([0.0492, 0.2012, 0.4767, 0.2729]) -Greedy action tensor([-1.0528, -0.6114, 0.5160, 0.1086]) tensor([0.0948, 0.1474, 0.4551, 0.3028]) -Greedy action tensor([-1.9338, -0.8450, 0.6629, 0.0388]) tensor([0.0407, 0.1209, 0.5460, 0.2925]) -Greedy action tensor([-2.0298, -0.8953, 0.5265, -0.1213]) tensor([0.0421, 0.1310, 0.5429, 0.2840]) -Greedy action tensor([-2.0355, -0.9001, 0.5054, -0.1284]) tensor([0.0425, 0.1322, 0.5392, 0.2861]) -Greedy action tensor([-1.9274, -0.5921, 0.8705, 0.1077]) tensor([0.0346, 0.1317, 0.5685, 0.2651]) -Greedy action tensor([-1.5788, -0.8086, -0.0171, -0.1721]) tensor([0.0833, 0.1799, 0.3969, 0.3399]) -Greedy action tensor([-1.8395, -0.7707, 0.2485, -0.2868]) tensor([0.0599, 0.1743, 0.4830, 0.2828]) -Greedy action tensor([-0.8889, 0.0069, 0.1225, -0.1516]) tensor([0.1206, 0.2955, 0.3317, 0.2522]) -Greedy action tensor([-1.0734, -0.5823, 0.4197, 0.0228]) tensor([0.0992, 0.1622, 0.4416, 0.2970]) -Greedy action tensor([-1.3176, -0.3692, 0.0941, -0.3729]) tensor([0.0975, 0.2517, 0.4000, 0.2508]) -Greedy action tensor([-1.2144, -0.2538, 0.2188, -0.3921]) tensor([0.0992, 0.2592, 0.4158, 0.2257]) -Greedy action tensor([-2.0079, -0.6631, 0.8039, 0.0209]) tensor([0.0344, 0.1320, 0.5722, 0.2615]) -Greedy action tensor([-1.1593, -0.6461, 0.3212, 0.3681]) tensor([0.0857, 0.1431, 0.3766, 0.3946]) -Greedy action tensor([-1.9108, -0.8983, 0.7026, -0.0311]) tensor([0.0418, 0.1149, 0.5698, 0.2736]) -Greedy action tensor([-1.7184, -0.7007, 0.2324, -0.3771]) tensor([0.0684, 0.1892, 0.4810, 0.2615]) -Greedy action tensor([-0.9921, -0.2104, 0.1830, -0.2776]) tensor([0.1181, 0.2581, 0.3825, 0.2413]) -Greedy action tensor([-1.8813, -0.6295, 0.8655, 0.2094]) tensor([0.0355, 0.1241, 0.5533, 0.2871]) -Greedy action tensor([-1.8371, -0.8298, 0.2129, -0.3251]) tensor([0.0623, 0.1707, 0.4842, 0.2827]) -Greedy action tensor([-2.0070, -0.9527, 0.4701, -0.2862]) tensor([0.0468, 0.1343, 0.5573, 0.2616]) -Greedy action tensor([-1.6738, -0.6803, 0.3057, -0.3334]) tensor([0.0677, 0.1830, 0.4904, 0.2588]) -Greedy action tensor([-1.1967, -0.5620, 0.3727, 0.0667]) tensor([0.0891, 0.1680, 0.4278, 0.3151]) -Greedy action tensor([-1.8523, -0.8077, 0.2048, -0.3319]) tensor([0.0616, 0.1750, 0.4817, 0.2817]) -Greedy action tensor([-1.3795, -0.6149, 0.0195, -0.1411]) tensor([0.0939, 0.2017, 0.3804, 0.3240]) -Greedy action tensor([-1.4749, -0.8764, 0.5193, -0.4254]) tensor([0.0768, 0.1397, 0.5641, 0.2193]) -Greedy action tensor([-2.0360, -0.7079, 0.7395, 0.0427]) tensor([0.0347, 0.1310, 0.5569, 0.2774]) -Greedy action tensor([-1.3689, -0.5416, 0.3924, 0.0734]) tensor([0.0750, 0.1715, 0.4364, 0.3172]) -Greedy action tensor([-1.6037, -0.5381, 0.7117, -0.0908]) tensor([0.0538, 0.1563, 0.5454, 0.2445]) -Greedy action tensor([-2.0502, -0.8155, 0.6841, 0.0357]) tensor([0.0359, 0.1233, 0.5522, 0.2887]) -Greedy action tensor([-1.9137, -0.6130, 0.8316, 0.1540]) tensor([0.0355, 0.1304, 0.5531, 0.2809]) -Greedy action tensor([-0.8094, -0.2477, -0.5253, -0.1875]) tensor([0.1682, 0.2950, 0.2235, 0.3133]) -Greedy action tensor([-0.9347, -0.1152, -0.4486, -0.1314]) tensor([0.1403, 0.3184, 0.2281, 0.3133]) -Greedy action tensor([-1.9582, -0.5536, 0.3278, -0.1692]) tensor([0.0479, 0.1950, 0.4708, 0.2864]) -Greedy action tensor([-1.5501, -0.5337, 0.4580, 0.0573]) tensor([0.0617, 0.1705, 0.4598, 0.3080]) -Greedy action tensor([-2.0194, -0.8177, 0.6805, 0.1199]) tensor([0.0361, 0.1201, 0.5372, 0.3067]) -Greedy action tensor([-1.1234, -0.6062, 0.0915, -0.3245]) tensor([0.1209, 0.2028, 0.4075, 0.2688]) -Greedy action tensor([-1.3985, 0.3333, 0.6134, -0.5938]) tensor([0.0611, 0.3453, 0.4570, 0.1366]) -Greedy action tensor([-0.9326, -0.4488, 0.2760, -0.3719]) tensor([0.1295, 0.2100, 0.4336, 0.2268]) -Greedy action tensor([ 1.7382, -0.3252, 0.4410, 0.9700]) tensor([0.5364, 0.0681, 0.1466, 0.2488]) -Greedy action tensor([ 1.3239, -0.2115, -0.3913, 1.3797]) tensor([0.4077, 0.0878, 0.0734, 0.4311]) -Greedy action tensor([ 0.7341, -0.7092, 0.8191, -0.1702]) tensor([0.3663, 0.0865, 0.3989, 0.1483]) -Greedy action tensor([ 1.9483, -0.2610, 0.3226, 0.7725]) tensor([0.6192, 0.0680, 0.1218, 0.1910]) -Greedy action tensor([ 1.0842, -0.1016, 0.7729, 0.8769]) tensor([0.3508, 0.1072, 0.2569, 0.2851]) -Greedy action tensor([ 1.3153, -0.6261, 0.9745, 0.9347]) tensor([0.3940, 0.0565, 0.2802, 0.2693]) -Greedy action tensor([ 1.0784, 0.3559, -0.0407, 0.6272]) tensor([0.4083, 0.1983, 0.1333, 0.2600]) -Greedy action tensor([ 1.3748, -1.5166, 1.6328, 1.0038]) tensor([0.3290, 0.0183, 0.4258, 0.2270]) -Greedy action tensor([ 1.6587, -0.9411, -0.6937, 1.0081]) tensor([0.5913, 0.0439, 0.0563, 0.3085]) -Greedy action tensor([ 0.8350, -0.3211, 1.0081, 0.4821]) tensor([0.3119, 0.0982, 0.3708, 0.2191]) -Greedy action tensor([ 1.7744, -1.1331, 0.6321, 1.5429]) tensor([0.4615, 0.0252, 0.1473, 0.3661]) -Greedy action tensor([ 1.2381, -0.6744, 1.0124, 0.2254]) tensor([0.4331, 0.0640, 0.3456, 0.1573]) -Greedy action tensor([ 2.1427, -0.5960, -0.5105, 1.2567]) tensor([0.6463, 0.0418, 0.0455, 0.2664]) -Greedy action tensor([ 1.3301, -0.6540, 0.8703, 0.5831]) tensor([0.4459, 0.0613, 0.2815, 0.2113]) -Greedy action tensor([1.6736, 0.7049, 1.0882, 1.0809]) tensor([0.4017, 0.1525, 0.2237, 0.2221]) -Greedy action tensor([1.3212, 0.0280, 0.8971, 0.7987]) tensor([0.3965, 0.1088, 0.2595, 0.2352]) -Greedy action tensor([ 1.3553, 0.0367, -0.3752, 0.9022]) tensor([0.4807, 0.1286, 0.0852, 0.3056]) -Greedy action tensor([ 1.0308, -0.3062, 1.1939, 0.8542]) tensor([0.3051, 0.0801, 0.3591, 0.2557]) -Greedy action tensor([ 1.2759, -0.7697, 1.2160, 0.3613]) tensor([0.4046, 0.0523, 0.3810, 0.1621]) -Greedy action tensor([ 1.2693, -0.1534, 1.1489, 1.0050]) tensor([0.3454, 0.0833, 0.3062, 0.2652]) -Greedy action tensor([ 1.6525, 0.0704, -0.0344, 0.7643]) tensor([0.5549, 0.1141, 0.1027, 0.2283]) -Greedy action tensor([ 0.8040, -1.3739, 0.6287, 0.4697]) tensor([0.3748, 0.0425, 0.3145, 0.2683]) -Greedy action tensor([ 1.7501, -1.0976, 0.4663, 1.6017]) tensor([0.4552, 0.0264, 0.1261, 0.3924]) -Greedy action tensor([ 1.4495, -1.2295, 1.3791, 1.4793]) tensor([0.3299, 0.0226, 0.3075, 0.3399]) -Greedy action tensor([ 1.5137, -0.5818, 1.5198, 1.2489]) tensor([0.3452, 0.0425, 0.3474, 0.2649]) -Greedy action tensor([1.6986, 0.1436, 0.0540, 0.7523]) tensor([0.5579, 0.1178, 0.1077, 0.2166]) -Greedy action tensor([ 1.3597, -0.7496, 0.2437, 1.0407]) tensor([0.4596, 0.0558, 0.1506, 0.3341]) -Greedy action tensor([ 1.7771, -0.1783, 0.3302, 0.7894]) tensor([0.5717, 0.0809, 0.1345, 0.2129]) -Greedy action tensor([ 0.8590, -0.2903, 0.7011, 0.9435]) tensor([0.3068, 0.0972, 0.2620, 0.3339]) -Greedy action tensor([ 0.9956, -0.4429, 0.5488, 1.2379]) tensor([0.3173, 0.0753, 0.2030, 0.4043]) -Greedy action tensor([ 1.0906, -0.6419, 0.5930, 1.1902]) tensor([0.3461, 0.0612, 0.2104, 0.3823]) -Greedy action tensor([ 1.9185, -0.4935, 0.3358, 0.9315]) tensor([0.5996, 0.0537, 0.1232, 0.2235]) -Greedy action tensor([ 1.4281, -0.9759, 1.4906, 0.3346]) tensor([0.4016, 0.0363, 0.4275, 0.1346]) -Greedy action tensor([ 2.1294, -0.2703, -0.3737, 1.3167]) tensor([0.6187, 0.0561, 0.0506, 0.2745]) -Greedy action tensor([1.9169, 0.0063, 0.9401, 1.4740]) tensor([0.4615, 0.0683, 0.1738, 0.2964]) -Greedy action tensor([ 1.9627, -0.1082, 0.0971, 1.2235]) tensor([0.5687, 0.0717, 0.0880, 0.2716]) -Greedy action tensor([ 1.8259, -0.5437, 1.5429, 0.8184]) tensor([0.4520, 0.0423, 0.3406, 0.1651]) -Greedy action tensor([2.3863e+00, 1.4168e-03, 9.2642e-01, 1.4873e+00]) tensor([0.5776, 0.0532, 0.1342, 0.2351]) -Greedy action tensor([ 2.0692, -0.4181, 1.3285, 1.6029]) tensor([0.4572, 0.0380, 0.2180, 0.2868]) -Greedy action tensor([ 1.3621, -0.0631, 0.0034, 0.4807]) tensor([0.5231, 0.1258, 0.1344, 0.2167]) -Greedy action tensor([ 1.4247, -0.3514, 0.7896, 0.7455]) tensor([0.4533, 0.0767, 0.2402, 0.2298]) -Greedy action tensor([ 1.4150, -0.1771, 0.5109, 1.4406]) tensor([0.3796, 0.0772, 0.1537, 0.3895]) -Greedy action tensor([ 1.5741, -0.2988, -0.8657, 0.4142]) tensor([0.6433, 0.0989, 0.0561, 0.2017]) -Greedy action tensor([ 1.9397, 0.3470, -0.2182, 0.9019]) tensor([0.5977, 0.1216, 0.0691, 0.2117]) -Greedy action tensor([ 1.2311, -0.7472, 0.7550, 1.3795]) tensor([0.3425, 0.0474, 0.2128, 0.3973]) -Greedy action tensor([ 0.6387, -1.2530, 0.9448, 0.1266]) tensor([0.3217, 0.0485, 0.4370, 0.1928]) -Greedy action tensor([ 2.3644, -0.3246, 1.1437, 1.2236]) tensor([0.5943, 0.0404, 0.1754, 0.1899]) -Greedy action tensor([ 1.5736, -0.3127, -0.5547, 0.8694]) tensor([0.5665, 0.0859, 0.0674, 0.2801]) -Greedy action tensor([ 1.6206, -0.4787, 1.3625, 1.1368]) tensor([0.3982, 0.0488, 0.3076, 0.2455]) -Greedy action tensor([ 1.5916, -0.8147, 0.3594, 1.0065]) tensor([0.5158, 0.0465, 0.1504, 0.2873]) -Greedy action tensor([ 1.8570, -0.3269, 0.8778, 1.6788]) tensor([0.4301, 0.0484, 0.1615, 0.3599]) -Greedy action tensor([1.4803, 0.2069, 0.3983, 1.3631]) tensor([0.3987, 0.1116, 0.1351, 0.3546]) -Greedy action tensor([ 1.1224, -0.3486, 0.8944, 0.3122]) tensor([0.4048, 0.0930, 0.3222, 0.1800]) -Greedy action tensor([ 1.4077, -0.8427, -0.1803, 1.2154]) tensor([0.4684, 0.0494, 0.0957, 0.3865]) -Greedy action tensor([ 1.8894, -1.0696, 1.6622, 1.0845]) tensor([0.4356, 0.0226, 0.3471, 0.1948]) -Greedy action tensor([ 1.3852, 0.5922, -0.5122, 0.8916]) tensor([0.4519, 0.2045, 0.0678, 0.2759]) -Greedy action tensor([ 1.7851, -0.2920, -0.7282, 1.1808]) tensor([0.5705, 0.0715, 0.0462, 0.3118]) -Greedy action tensor([ 1.2239, -0.7784, 1.1461, 0.0091]) tensor([0.4243, 0.0573, 0.3925, 0.1259]) -Greedy action tensor([ 1.9776, -1.0147, -0.2823, 1.4770]) tensor([0.5680, 0.0285, 0.0593, 0.3443]) -Greedy action tensor([ 1.2167, 0.5351, -0.2439, 1.2938]) tensor([0.3548, 0.1795, 0.0824, 0.3833]) -Greedy action tensor([ 1.3665, -0.9326, -0.1236, 0.8733]) tensor([0.5164, 0.0518, 0.1164, 0.3154]) -Greedy action tensor([ 1.8607, -0.1782, 1.2849, 1.3814]) tensor([0.4326, 0.0563, 0.2432, 0.2679]) -Greedy action tensor([ 1.7393, -0.1563, 0.5269, 0.4226]) tensor([0.5828, 0.0876, 0.1734, 0.1562]) -Greedy action tensor([ 1.7167, -0.5468, -0.1097, 1.2616]) tensor([0.5265, 0.0547, 0.0848, 0.3340]) -Greedy action tensor([ 1.1512, 0.1930, -0.1189, 0.7864]) tensor([0.4240, 0.1626, 0.1190, 0.2944]) -Greedy action tensor([2.1931, 0.3501, 0.0268, 0.0698]) tensor([0.7181, 0.1137, 0.0823, 0.0859]) -Greedy action tensor([1.7570, 0.1933, 0.4988, 1.1224]) tensor([0.4941, 0.1035, 0.1404, 0.2620]) -Greedy action tensor([ 1.0617, -0.0229, -0.2194, 0.8602]) tensor([0.4110, 0.1389, 0.1141, 0.3360]) -Greedy action tensor([ 1.8049, -0.2881, 0.6544, 1.1554]) tensor([0.5097, 0.0628, 0.1613, 0.2662]) -Greedy action tensor([ 1.4175, -0.6135, 0.6690, 1.5189]) tensor([0.3689, 0.0484, 0.1745, 0.4082]) -Greedy action tensor([1.4518, 0.1522, 1.1513, 0.5344]) tensor([0.4145, 0.1130, 0.3069, 0.1656]) -Greedy action tensor([ 1.3297, 0.2431, -0.6984, 2.1588]) tensor([0.2660, 0.0897, 0.0350, 0.6093]) -Greedy action tensor([1.5677, 0.1686, 0.3202, 0.4992]) tensor([0.5326, 0.1315, 0.1530, 0.1830]) -Greedy action tensor([ 1.1280, -0.8824, 1.4401, 1.1144]) tensor([0.2868, 0.0384, 0.3918, 0.2829]) -Greedy action tensor([ 1.9312, -0.7255, 1.6095, 0.2398]) tensor([0.5052, 0.0355, 0.3662, 0.0931]) -Greedy action tensor([ 1.5354, -0.5943, 1.0041, 0.9530]) tensor([0.4415, 0.0525, 0.2595, 0.2466]) -Greedy action tensor([ 0.9710, -0.0550, 0.4081, 1.4385]) tensor([0.2838, 0.1017, 0.1616, 0.4529]) -Greedy action tensor([ 1.3872, -0.2521, 1.0206, 1.3272]) tensor([0.3535, 0.0686, 0.2450, 0.3329]) -Greedy action tensor([ 1.7829, -0.6323, 1.1273, 0.9785]) tensor([0.4864, 0.0435, 0.2525, 0.2176]) -Greedy action tensor([ 1.5650, -0.7814, 0.3054, 0.8618]) tensor([0.5335, 0.0511, 0.1514, 0.2641]) -Greedy action tensor([ 1.9588, 0.0386, -0.2591, 0.4456]) tensor([0.6777, 0.0993, 0.0738, 0.1492]) -Greedy action tensor([ 1.2871, -0.0227, 1.0644, 1.8350]) tensor([0.2632, 0.0710, 0.2106, 0.4552]) -Greedy action tensor([ 0.3220, -0.0102, 0.0912, -0.3345]) tensor([0.3300, 0.2368, 0.2620, 0.1712]) -Greedy action tensor([ 0.2410, 0.0826, 0.2027, -0.1557]) tensor([0.2867, 0.2447, 0.2759, 0.1928]) -Greedy action tensor([ 0.3179, 0.0446, 0.1106, -0.2757]) tensor([0.3199, 0.2434, 0.2600, 0.1767]) -Greedy action tensor([ 0.6712, -0.2395, 0.0141, -0.4395]) tensor([0.4445, 0.1788, 0.2304, 0.1464]) -Greedy action tensor([ 0.5505, -0.0342, 0.0074, -0.4341]) tensor([0.3981, 0.2219, 0.2313, 0.1487]) -Greedy action tensor([ 0.2733, -0.0285, 0.1238, -0.1647]) tensor([0.3081, 0.2278, 0.2653, 0.1988]) -Greedy action tensor([ 0.5731, -0.0580, 0.0445, -0.2840]) tensor([0.3928, 0.2090, 0.2315, 0.1667]) -Greedy action tensor([ 0.7026, -0.1771, 0.0381, -0.3763]) tensor([0.4407, 0.1828, 0.2267, 0.1498]) -Greedy action tensor([ 0.7305, -0.2518, -0.1133, -0.5166]) tensor([0.4781, 0.1790, 0.2056, 0.1374]) -Greedy action tensor([ 0.2592, 0.0699, 0.1133, -0.0377]) tensor([0.2911, 0.2409, 0.2516, 0.2164]) -Greedy action tensor([ 0.3488, -0.0191, 0.2222, -0.1470]) tensor([0.3142, 0.2175, 0.2769, 0.1914]) -Greedy action tensor([ 0.2080, 0.0038, 0.0038, -0.2079]) tensor([0.3039, 0.2478, 0.2478, 0.2005]) -Greedy action tensor([ 0.6776, -0.2316, -0.0577, -0.5822]) tensor([0.4617, 0.1860, 0.2213, 0.1310]) -Greedy action tensor([ 0.6102, -0.0224, 0.0160, -0.3850]) tensor([0.4077, 0.2166, 0.2250, 0.1507]) -Greedy action tensor([ 0.3329, -0.1927, -0.0533, -0.2881]) tensor([0.3561, 0.2105, 0.2420, 0.1914]) -Greedy action tensor([ 0.5877, -0.3938, -0.0175, -0.5073]) tensor([0.4434, 0.1662, 0.2421, 0.1483]) -Greedy action tensor([ 0.6440, -0.0706, -0.0421, -0.3729]) tensor([0.4247, 0.2078, 0.2138, 0.1536]) -Greedy action tensor([ 0.9263, -0.4608, 0.0372, -0.7371]) tensor([0.5404, 0.1350, 0.2221, 0.1024]) -Greedy action tensor([ 0.1410, 0.0198, 0.0877, -0.2097]) tensor([0.2826, 0.2504, 0.2680, 0.1990]) -Greedy action tensor([ 1.0538, -0.6108, 0.0126, -0.6674]) tensor([0.5810, 0.1100, 0.2051, 0.1039]) -Greedy action tensor([ 0.4448, -0.0853, 0.1541, -0.4467]) tensor([0.3641, 0.2143, 0.2723, 0.1493]) -Greedy action tensor([ 0.2760, -0.0071, 0.0408, -0.1659]) tensor([0.3138, 0.2364, 0.2480, 0.2017]) -Greedy action tensor([ 0.3546, -0.0548, 0.1292, -0.3193]) tensor([0.3365, 0.2234, 0.2686, 0.1715]) -Greedy action tensor([ 1.1575, -0.5017, -0.0559, -0.6436]) tensor([0.6051, 0.1151, 0.1798, 0.0999]) -Greedy action tensor([ 0.4612, -0.2232, -0.1290, -0.4132]) tensor([0.4039, 0.2037, 0.2239, 0.1685]) -Greedy action tensor([ 0.3704, 0.0998, 0.0886, -0.3012]) tensor([0.3302, 0.2519, 0.2491, 0.1687]) -Greedy action tensor([ 0.4732, 0.1141, 0.1001, -0.2994]) tensor([0.3510, 0.2451, 0.2417, 0.1621]) -Greedy action tensor([ 0.3572, 0.0578, 0.2180, -0.0859]) tensor([0.3074, 0.2278, 0.2674, 0.1973]) -Greedy action tensor([ 0.9391, -0.4806, -0.2309, -0.7228]) tensor([0.5741, 0.1388, 0.1782, 0.1089]) -Greedy action tensor([ 0.7731, -0.3360, -0.1120, -0.6115]) tensor([0.5018, 0.1655, 0.2071, 0.1257]) -Greedy action tensor([ 0.1834, -0.0045, 0.0403, -0.1980]) tensor([0.2960, 0.2453, 0.2565, 0.2022]) -Greedy action tensor([ 0.3335, 0.0215, 0.0394, -0.3287]) tensor([0.3341, 0.2446, 0.2490, 0.1723]) -Greedy action tensor([ 0.3595, -0.1261, 0.0723, -0.3951]) tensor([0.3526, 0.2170, 0.2646, 0.1658]) -Greedy action tensor([ 0.4800, -0.0840, 0.1051, -0.2933]) tensor([0.3679, 0.2093, 0.2529, 0.1698]) -Greedy action tensor([ 0.3991, 0.1562, 0.1745, -0.2591]) tensor([0.3225, 0.2529, 0.2576, 0.1670]) -Greedy action tensor([ 0.8619, -0.3694, -0.0555, -0.6891]) tensor([0.5254, 0.1534, 0.2099, 0.1114]) -Greedy action tensor([ 0.4504, -0.1671, -0.1015, -0.3584]) tensor([0.3905, 0.2106, 0.2249, 0.1739]) -Greedy action tensor([ 0.3904, 0.1640, 0.0070, -0.2591]) tensor([0.3332, 0.2657, 0.2271, 0.1740]) -Greedy action tensor([ 0.7596, -0.3928, -0.1404, -0.4782]) tensor([0.4969, 0.1570, 0.2020, 0.1441]) -Greedy action tensor([ 0.3673, 0.0066, 0.1366, -0.2205]) tensor([0.3282, 0.2288, 0.2606, 0.1823]) -Greedy action tensor([ 0.2821, 0.1906, 0.0741, -0.2775]) tensor([0.3034, 0.2769, 0.2464, 0.1734]) -Greedy action tensor([ 0.3303, 0.0538, 0.0953, -0.2153]) tensor([0.3196, 0.2424, 0.2527, 0.1852]) -Greedy action tensor([ 0.4250, 0.1206, 0.0764, -0.1514]) tensor([0.3328, 0.2454, 0.2348, 0.1870]) -Greedy action tensor([ 0.2094, 0.0467, 0.1842, -0.0843]) tensor([0.2801, 0.2380, 0.2731, 0.2088]) -Greedy action tensor([ 0.1408, 0.0843, 0.0915, -0.2126]) tensor([0.2778, 0.2626, 0.2645, 0.1951]) -Greedy action tensor([ 0.2885, 0.0686, 0.2050, -0.1905]) tensor([0.2992, 0.2402, 0.2753, 0.1853]) -Greedy action tensor([ 0.3191, 0.1102, 0.1188, -0.1587]) tensor([0.3077, 0.2497, 0.2518, 0.1908]) -Greedy action tensor([ 0.3671, 0.1671, 0.1011, -0.2896]) tensor([0.3222, 0.2638, 0.2469, 0.1671]) -Greedy action tensor([ 0.3713, -0.1427, -0.0874, -0.3649]) tensor([0.3691, 0.2208, 0.2333, 0.1768]) -Greedy action tensor([ 0.5128, -0.1180, 0.1513, -0.4511]) tensor([0.3831, 0.2039, 0.2669, 0.1461]) -Greedy action tensor([ 0.3440, 0.0965, 0.1259, -0.1464]) tensor([0.3128, 0.2442, 0.2515, 0.1915]) -Greedy action tensor([ 0.5401, -0.2913, 0.0738, -0.4734]) tensor([0.4122, 0.1795, 0.2586, 0.1496]) -Greedy action tensor([ 0.5966, 0.0216, 0.1000, -0.4006]) tensor([0.3937, 0.2215, 0.2396, 0.1452]) -Greedy action tensor([ 0.2901, 0.1515, 0.1302, -0.1899]) tensor([0.2993, 0.2605, 0.2550, 0.1852]) -Greedy action tensor([ 0.3283, 0.1088, 0.0926, -0.1116]) tensor([0.3089, 0.2480, 0.2441, 0.1990]) -Greedy action tensor([ 0.3193, -0.0770, 0.0167, -0.1868]) tensor([0.3317, 0.2232, 0.2451, 0.2000]) -Greedy action tensor([ 0.5498, -0.3499, 0.0738, -0.5476]) tensor([0.4234, 0.1722, 0.2631, 0.1413]) -Greedy action tensor([ 0.5868, -0.2902, 0.1246, -0.6087]) tensor([0.4258, 0.1771, 0.2682, 0.1288]) -Greedy action tensor([ 0.3373, 0.1059, 0.1835, -0.1095]) tensor([0.3039, 0.2411, 0.2606, 0.1944]) -Greedy action tensor([ 0.3284, 0.1414, 0.1350, -0.2328]) tensor([0.3101, 0.2573, 0.2556, 0.1770]) -Greedy action tensor([ 0.3091, -0.1225, -0.0336, -0.2282]) tensor([0.3397, 0.2206, 0.2411, 0.1985]) -Greedy action tensor([ 0.5117, -0.3079, -0.1166, -0.4917]) tensor([0.4272, 0.1882, 0.2279, 0.1566]) -Greedy action tensor([ 0.4797, 0.1311, -0.0746, -0.3270]) tensor([0.3668, 0.2588, 0.2107, 0.1637]) -Greedy action tensor([ 0.2743, 0.0526, 0.0641, -0.3424]) tensor([0.3173, 0.2542, 0.2572, 0.1713]) -Greedy action tensor([ 0.4688, -0.1210, 0.1982, -0.4615]) tensor([0.3688, 0.2044, 0.2813, 0.1454]) -Greedy action tensor([ 0.6147, -0.3112, -0.0233, -0.5929]) tensor([0.4498, 0.1782, 0.2376, 0.1344]) -Greedy action tensor([ 0.3482, 0.1407, 0.1635, -0.1310]) tensor([0.3064, 0.2490, 0.2548, 0.1898]) -Greedy action tensor([ 0.7686, -0.3526, -0.0227, -0.3720]) tensor([0.4765, 0.1553, 0.2160, 0.1523]) -Greedy action tensor([ 0.3649, 0.1053, 0.1475, -0.2237]) tensor([0.3194, 0.2464, 0.2570, 0.1773]) -Greedy action tensor([ 0.2472, 0.0848, 0.1974, -0.1454]) tensor([0.2876, 0.2445, 0.2737, 0.1942]) -Greedy action tensor([ 0.2647, 0.1226, 0.1091, -0.1342]) tensor([0.2946, 0.2556, 0.2521, 0.1977]) -Greedy action tensor([ 0.4769, -0.0457, 0.0964, -0.4732]) tensor([0.3755, 0.2226, 0.2567, 0.1452]) -Greedy action tensor([ 0.5350, -0.0364, 0.0042, -0.3386]) tensor([0.3891, 0.2197, 0.2288, 0.1624]) -Greedy action tensor([ 0.2257, 0.0305, 0.1674, -0.0885]) tensor([0.2860, 0.2353, 0.2698, 0.2089]) -Greedy action tensor([ 0.2700, 0.1284, 0.1009, -0.3056]) tensor([0.3054, 0.2651, 0.2579, 0.1717]) -Greedy action tensor([ 0.4180, 0.0114, -0.0323, -0.2757]) tensor([0.3568, 0.2376, 0.2274, 0.1783]) -Greedy action tensor([ 0.6634, -0.2060, 0.0698, -0.5383]) tensor([0.4401, 0.1845, 0.2431, 0.1323]) -Greedy action tensor([ 0.4979, -0.1401, 0.0316, -0.2923]) tensor([0.3832, 0.2025, 0.2404, 0.1739]) -Greedy action tensor([ 0.9094, -0.4257, -0.0480, -0.7935]) tensor([0.5467, 0.1439, 0.2099, 0.0996]) -Greedy action tensor([ 0.2471, -0.0166, 0.2251, -0.1303]) tensor([0.2914, 0.2238, 0.2850, 0.1998]) -Greedy action tensor([ 0.2459, 0.0604, 0.1879, -0.1907]) tensor([0.2923, 0.2428, 0.2759, 0.1889]) -Greedy action tensor([ 0.5717, -0.4230, -0.2713, -0.0810]) tensor([0.4309, 0.1593, 0.1855, 0.2243]) -Greedy action tensor([ 0.8262, -0.2278, -0.1455, 0.1572]) tensor([0.4466, 0.1556, 0.1690, 0.2287]) -Greedy action tensor([ 1.3330, -0.4858, -0.3042, 0.3698]) tensor([0.5752, 0.0933, 0.1119, 0.2196]) -Greedy action tensor([ 1.2967, -0.7767, -0.4162, -0.2260]) tensor([0.6561, 0.0825, 0.1183, 0.1431]) -Greedy action tensor([ 1.2484, -0.5490, -0.2134, -0.2183]) tensor([0.6141, 0.1018, 0.1424, 0.1417]) -Greedy action tensor([ 0.7629, 0.0723, -0.1344, -0.1544]) tensor([0.4332, 0.2172, 0.1766, 0.1731]) -Greedy action tensor([ 1.6832, -0.5453, -0.3641, 0.1993]) tensor([0.6833, 0.0736, 0.0882, 0.1549]) -Greedy action tensor([ 0.4907, -0.4097, -0.1220, -0.0348]) tensor([0.3938, 0.1600, 0.2134, 0.2328]) -Greedy action tensor([ 1.4618, -0.3356, -0.2997, 0.0376]) tensor([0.6336, 0.1050, 0.1088, 0.1525]) -Greedy action tensor([ 1.3644, -0.5349, -0.2684, 0.1691]) tensor([0.6069, 0.0908, 0.1186, 0.1837]) -Greedy action tensor([ 1.3879, -0.5543, 0.0824, 0.3024]) tensor([0.5707, 0.0818, 0.1547, 0.1928]) -Greedy action tensor([ 1.2906, -0.5529, -0.4783, -0.1419]) tensor([0.6380, 0.1010, 0.1088, 0.1523]) -Greedy action tensor([ 0.9809, -0.4808, 0.1245, -0.2495]) tensor([0.5132, 0.1190, 0.2179, 0.1499]) -Greedy action tensor([ 1.2014, -0.1032, -0.0247, -0.0967]) tensor([0.5441, 0.1476, 0.1597, 0.1486]) -Greedy action tensor([ 1.3125, -0.5580, -0.2899, -0.1845]) tensor([0.6332, 0.0975, 0.1275, 0.1417]) -Greedy action tensor([ 0.9184, -0.6155, -0.2227, 0.2169]) tensor([0.4924, 0.1062, 0.1573, 0.2441]) -Greedy action tensor([ 0.7474, -0.4619, -0.1364, 0.1607]) tensor([0.4410, 0.1316, 0.1822, 0.2453]) -Greedy action tensor([ 1.0240, -0.2322, -0.1041, -0.2442]) tensor([0.5292, 0.1507, 0.1713, 0.1489]) -Greedy action tensor([ 0.8388, -0.5926, -0.2963, 0.2601]) tensor([0.4715, 0.1127, 0.1515, 0.2643]) -Greedy action tensor([ 0.5711, -0.3461, -0.1297, -0.1037]) tensor([0.4158, 0.1662, 0.2063, 0.2117]) -Greedy action tensor([ 1.2246, -0.4878, -0.4938, -0.1089]) tensor([0.6160, 0.1111, 0.1105, 0.1624]) -Greedy action tensor([ 1.1282, -0.6935, -0.3267, 0.1268]) tensor([0.5674, 0.0918, 0.1324, 0.2084]) -Greedy action tensor([ 0.6609, -0.4848, -0.3039, 0.0093]) tensor([0.4504, 0.1432, 0.1716, 0.2347]) -Greedy action tensor([ 1.0406, -0.6548, -0.3686, 0.2887]) tensor([0.5265, 0.0966, 0.1286, 0.2482]) -Greedy action tensor([ 0.7881, -0.5275, 0.0510, 0.0320]) tensor([0.4512, 0.1211, 0.2159, 0.2118]) -Greedy action tensor([ 1.0144, -0.4895, -0.1912, -0.1667]) tensor([0.5468, 0.1215, 0.1638, 0.1678]) -Greedy action tensor([ 0.9722, -0.3599, -0.2277, -0.1511]) tensor([0.5290, 0.1396, 0.1593, 0.1720]) -Greedy action tensor([ 0.5545, -0.2931, -0.0418, -0.0089]) tensor([0.3924, 0.1681, 0.2161, 0.2234]) -Greedy action tensor([ 0.7964, -0.3241, -0.1202, 0.0578]) tensor([0.4538, 0.1480, 0.1815, 0.2168]) -Greedy action tensor([ 0.7957, -0.4903, -0.1381, 0.0683]) tensor([0.4646, 0.1284, 0.1826, 0.2245]) -Greedy action tensor([ 0.8061, -0.5903, -0.1908, 0.1629]) tensor([0.4668, 0.1155, 0.1723, 0.2454]) -Greedy action tensor([ 1.3120, -0.5140, -0.1357, 0.2277]) tensor([0.5766, 0.0929, 0.1356, 0.1950]) -Greedy action tensor([ 0.6762, -0.2804, -0.0960, -0.0530]) tensor([0.4295, 0.1650, 0.1984, 0.2071]) -Greedy action tensor([ 1.2657, -0.1330, 0.0784, -0.2121]) tensor([0.5618, 0.1387, 0.1714, 0.1282]) -Greedy action tensor([ 0.8551, -0.3110, -0.1159, -0.0493]) tensor([0.4773, 0.1487, 0.1808, 0.1932]) -Greedy action tensor([ 0.6207, -0.5273, -0.0923, -0.1210]) tensor([0.4379, 0.1389, 0.2146, 0.2086]) -Greedy action tensor([ 1.3134, -0.7302, -0.4937, 0.1678]) tensor([0.6204, 0.0804, 0.1018, 0.1973]) -Greedy action tensor([ 1.0376, -0.3429, -0.0387, -0.2869]) tensor([0.5381, 0.1353, 0.1834, 0.1431]) -Greedy action tensor([ 1.4323, -0.5310, -0.6165, -0.1981]) tensor([0.6825, 0.0958, 0.0880, 0.1337]) -Greedy action tensor([ 1.1671, -0.4604, -0.2791, 0.2173]) tensor([0.5499, 0.1080, 0.1295, 0.2127]) -Greedy action tensor([ 1.1020, -0.4217, -0.2381, 0.1745]) tensor([0.5333, 0.1162, 0.1396, 0.2109]) -Greedy action tensor([ 0.8516, -0.4112, -0.2208, 0.0938]) tensor([0.4776, 0.1351, 0.1634, 0.2239]) -Greedy action tensor([ 1.0205, -0.2754, -0.3151, -0.0400]) tensor([0.5311, 0.1453, 0.1397, 0.1839]) -Greedy action tensor([ 1.2096, -0.6359, -0.0190, 0.2548]) tensor([0.5448, 0.0860, 0.1595, 0.2097]) -Greedy action tensor([ 1.5749, -0.5150, -0.0627, 0.3827]) tensor([0.6166, 0.0763, 0.1199, 0.1872]) -Greedy action tensor([ 1.6662, -0.5873, -0.4055, -0.2465]) tensor([0.7253, 0.0762, 0.0914, 0.1071]) -Greedy action tensor([ 0.7229, -0.4770, -0.1577, -0.0303]) tensor([0.4573, 0.1378, 0.1896, 0.2153]) -Greedy action tensor([ 1.5333, -0.7313, -0.1205, 0.1319]) tensor([0.6488, 0.0674, 0.1241, 0.1597]) -Greedy action tensor([ 1.6975, -0.4901, -0.3804, -0.1994]) tensor([0.7208, 0.0809, 0.0902, 0.1081]) -Greedy action tensor([ 1.1651, -0.4584, -0.2413, -0.1336]) tensor([0.5830, 0.1150, 0.1429, 0.1591]) -Greedy action tensor([ 0.8584, -0.3928, -0.4829, -0.1010]) tensor([0.5179, 0.1482, 0.1354, 0.1984]) -Greedy action tensor([ 0.5853, -0.3909, -0.0804, 0.0441]) tensor([0.4044, 0.1524, 0.2078, 0.2354]) -Greedy action tensor([ 0.8881, -0.7840, -0.3549, 0.0545]) tensor([0.5233, 0.0983, 0.1510, 0.2274]) -Greedy action tensor([ 0.8270, -0.3842, -0.2258, -0.0171]) tensor([0.4815, 0.1434, 0.1680, 0.2070]) -Greedy action tensor([ 1.3650, -0.4444, -0.5086, -0.0792]) tensor([0.6438, 0.1054, 0.0989, 0.1519]) -Greedy action tensor([ 1.1619, -0.5427, -0.2086, 0.2311]) tensor([0.5464, 0.0994, 0.1388, 0.2154]) -Greedy action tensor([ 1.3940, -0.5279, -0.3200, 0.0994]) tensor([0.6248, 0.0914, 0.1126, 0.1712]) -Greedy action tensor([ 1.0213, -0.6667, -0.2094, 0.2839]) tensor([0.5114, 0.0946, 0.1494, 0.2447]) -Greedy action tensor([ 1.4413, -0.6704, -0.3992, -0.0587]) tensor([0.6654, 0.0805, 0.1056, 0.1485]) -Greedy action tensor([ 1.4186, -0.4310, -0.1783, -0.0677]) tensor([0.6305, 0.0992, 0.1277, 0.1426]) -Greedy action tensor([ 0.9341, -0.2239, -0.1393, -0.1472]) tensor([0.5012, 0.1574, 0.1713, 0.1700]) -Greedy action tensor([ 1.0632, -0.6384, 0.0513, 0.1108]) tensor([0.5177, 0.0944, 0.1882, 0.1997]) -Greedy action tensor([ 0.7285, -0.4720, -0.0836, -0.0281]) tensor([0.4516, 0.1360, 0.2005, 0.2119]) -Greedy action tensor([ 0.7110, -0.3085, 0.1019, -0.1266]) tensor([0.4278, 0.1543, 0.2327, 0.1851]) -Greedy action tensor([ 0.6502, -0.5534, -0.1727, 0.0763]) tensor([0.4343, 0.1303, 0.1907, 0.2446]) -Greedy action tensor([ 0.8773, -0.4846, -0.0903, 0.1459]) tensor([0.4723, 0.1210, 0.1795, 0.2273]) -Greedy action tensor([ 0.7777, -0.7506, -0.2866, 0.0536]) tensor([0.4886, 0.1060, 0.1686, 0.2369]) -Greedy action tensor([ 0.8550, -0.5064, -0.0261, 0.0018]) tensor([0.4770, 0.1222, 0.1976, 0.2032]) -Greedy action tensor([ 1.0543, -0.2265, 0.1349, -0.1983]) tensor([0.5096, 0.1416, 0.2032, 0.1456]) -Greedy action tensor([ 0.6741, -0.3739, 0.1702, -0.1317]) tensor([0.4164, 0.1460, 0.2516, 0.1860]) -Greedy action tensor([ 0.9149, -0.6672, -0.3113, 0.1985]) tensor([0.5032, 0.1034, 0.1476, 0.2458]) -Greedy action tensor([ 1.2206, -0.5222, -0.3628, -0.1172]) tensor([0.6087, 0.1066, 0.1250, 0.1597]) -Greedy action tensor([ 0.9731, -0.3562, -0.0872, 0.1204]) tensor([0.4909, 0.1299, 0.1700, 0.2092]) -Greedy action tensor([ 1.0526, -0.4601, -0.1373, 0.4449]) tensor([0.4833, 0.1065, 0.1471, 0.2632]) -Greedy action tensor([ 1.0282, -0.0806, -0.0207, -0.1399]) tensor([0.5022, 0.1657, 0.1759, 0.1562]) -Greedy action tensor([ 1.1905, -0.1118, 0.0525, -0.0755]) tensor([0.5335, 0.1451, 0.1710, 0.1504]) -Greedy action tensor([ 1.8489, -0.5187, -0.1461, 0.0889]) tensor([0.7134, 0.0668, 0.0970, 0.1227]) -Greedy action tensor([ 1.5199, -0.5811, -0.2400, 0.0108]) tensor([0.6598, 0.0807, 0.1135, 0.1459]) -Greedy action tensor([ 1.4794, -0.5580, -0.3664, -0.0655]) tensor([0.6660, 0.0868, 0.1052, 0.1421]) -Greedy action tensor([ 0.6684, -0.4341, -0.1769, 0.0325]) tensor([0.4365, 0.1449, 0.1875, 0.2311]) -Greedy action tensor([ 0.7954, -0.5777, -0.2519, 0.2401]) tensor([0.4591, 0.1163, 0.1611, 0.2635]) -Greedy action tensor([-2.0180, -0.9158, 0.3577, -0.1430]) tensor([0.0470, 0.1414, 0.5053, 0.3063]) -Greedy action tensor([-1.7286, 0.0616, 0.3876, -0.3000]) tensor([0.0514, 0.3078, 0.4264, 0.2144]) -Greedy action tensor([-1.2784, -0.6633, -0.1987, -0.4591]) tensor([0.1240, 0.2294, 0.3651, 0.2814]) -Greedy action tensor([-1.4299, -0.5565, 0.3252, -0.4197]) tensor([0.0839, 0.2008, 0.4850, 0.2303]) -Greedy action tensor([-1.5975, -0.5738, 0.7704, 0.0667]) tensor([0.0507, 0.1410, 0.5408, 0.2676]) -Greedy action tensor([-1.9446, -0.6371, 0.4564, -0.0772]) tensor([0.0450, 0.1665, 0.4970, 0.2915]) -Greedy action tensor([-1.2179, 0.1670, 0.5167, -0.4866]) tensor([0.0785, 0.3136, 0.4448, 0.1631]) -Greedy action tensor([-2.0082, -0.7169, 0.6972, 0.0284]) tensor([0.0367, 0.1334, 0.5488, 0.2811]) -Greedy action tensor([-1.3608, -0.6010, 0.3802, 0.1223]) tensor([0.0755, 0.1614, 0.4305, 0.3326]) -Greedy action tensor([-1.1091, -0.3041, -0.2249, 0.4187]) tensor([0.0974, 0.2179, 0.2358, 0.4489]) -Greedy action tensor([-1.0386, -0.3718, 0.2330, -0.1712]) tensor([0.1124, 0.2190, 0.4010, 0.2676]) -Greedy action tensor([-1.7472, -0.4942, 0.7447, 0.2000]) tensor([0.0424, 0.1484, 0.5122, 0.2971]) -Greedy action tensor([-1.0184, -0.5947, 0.5956, 0.1962]) tensor([0.0916, 0.1399, 0.4600, 0.3085]) -Greedy action tensor([-1.4230, -0.7131, -0.2347, -0.3831]) tensor([0.1094, 0.2224, 0.3589, 0.3094]) -Greedy action tensor([-1.9373, -0.6422, 0.8763, 0.1265]) tensor([0.0343, 0.1251, 0.5709, 0.2697]) -Greedy action tensor([-1.8268, -0.9620, 0.1601, -0.6695]) tensor([0.0722, 0.1715, 0.5266, 0.2297]) -Greedy action tensor([-1.6313, -0.7440, 1.0877, 0.3452]) tensor([0.0387, 0.0941, 0.5875, 0.2796]) -Greedy action tensor([-0.8098, -0.4338, 0.4490, -0.2395]) tensor([0.1291, 0.1880, 0.4546, 0.2283]) -Greedy action tensor([-0.6847, -0.5850, 0.2999, 0.3989]) tensor([0.1293, 0.1428, 0.3460, 0.3820]) -Greedy action tensor([-1.7848, -0.7118, 0.0913, -0.3053]) tensor([0.0674, 0.1970, 0.4398, 0.2958]) -Greedy action tensor([-1.7897, -0.5329, 0.7285, 0.0313]) tensor([0.0433, 0.1521, 0.5371, 0.2675]) -Greedy action tensor([-1.8361, -0.5229, 0.7095, -0.0735]) tensor([0.0429, 0.1596, 0.5473, 0.2501]) -Greedy action tensor([-1.8919, -0.6962, 0.3413, -0.2197]) tensor([0.0527, 0.1744, 0.4921, 0.2808]) -Greedy action tensor([-0.9789, -0.6878, 0.4866, -0.0379]) tensor([0.1083, 0.1450, 0.4691, 0.2776]) -Greedy action tensor([-1.9283, -0.6199, 0.5349, -0.0147]) tensor([0.0431, 0.1594, 0.5057, 0.2919]) -Greedy action tensor([-2.0071, -0.6576, 0.8228, 0.0582]) tensor([0.0337, 0.1299, 0.5708, 0.2657]) -Greedy action tensor([-1.9598, -0.7054, 0.6877, 0.0212]) tensor([0.0386, 0.1355, 0.5457, 0.2802]) -Greedy action tensor([-2.0500, -0.9092, 0.5988, -0.0862]) tensor([0.0394, 0.1232, 0.5567, 0.2806]) -Greedy action tensor([-1.8422, -0.8436, 0.2657, -0.1911]) tensor([0.0583, 0.1582, 0.4797, 0.3038]) -Greedy action tensor([-1.3900, -0.6251, 0.0069, 0.1990]) tensor([0.0827, 0.1777, 0.3344, 0.4052]) -Greedy action tensor([-1.8103, -0.9156, 0.5894, -0.2070]) tensor([0.0515, 0.1259, 0.5670, 0.2557]) -Greedy action tensor([-0.9376, 0.0289, -0.2971, 0.1498]) tensor([0.1177, 0.3095, 0.2234, 0.3493]) -Greedy action tensor([-2.0427, -0.8775, 0.6596, 0.0204]) tensor([0.0370, 0.1188, 0.5525, 0.2916]) -Greedy action tensor([-0.7974, -0.3076, 0.3967, -0.2622]) tensor([0.1309, 0.2136, 0.4320, 0.2235]) -Greedy action tensor([-2.0092, -0.8862, 0.8784, 0.3429]) tensor([0.0307, 0.0945, 0.5518, 0.3230]) -Greedy action tensor([-2.0250, -0.7846, 0.4808, -0.0702]) tensor([0.0421, 0.1454, 0.5154, 0.2971]) -Greedy action tensor([-1.7079, -0.7869, 0.1932, -0.3142]) tensor([0.0703, 0.1765, 0.4702, 0.2831]) -Greedy action tensor([-1.9344, -0.7804, 0.6982, 0.0214]) tensor([0.0398, 0.1261, 0.5531, 0.2811]) -Greedy action tensor([-1.7479, -0.4767, 0.6125, 0.0731]) tensor([0.0469, 0.1671, 0.4965, 0.2895]) -Greedy action tensor([-0.8329, -0.0044, -0.1285, -0.1354]) tensor([0.1366, 0.3128, 0.2763, 0.2744]) -Greedy action tensor([-0.7692, -0.5808, 0.1889, 0.3716]) tensor([0.1259, 0.1520, 0.3282, 0.3940]) -Greedy action tensor([-1.0316, -0.6601, 0.5508, 0.5008]) tensor([0.0837, 0.1214, 0.4074, 0.3875]) -Greedy action tensor([-1.8325, -0.6397, 0.3109, -0.1676]) tensor([0.0552, 0.1820, 0.4709, 0.2918]) -Greedy action tensor([-1.6819, -0.8088, -0.0544, -0.4135]) tensor([0.0831, 0.1989, 0.4228, 0.2953]) -Greedy action tensor([-1.2725, -0.4198, 0.4092, -0.2418]) tensor([0.0868, 0.2036, 0.4664, 0.2432]) -Greedy action tensor([-1.5679, -0.5755, 0.5232, -0.1181]) tensor([0.0623, 0.1680, 0.5042, 0.2655]) -Greedy action tensor([-1.8298, -0.7800, 0.1465, -0.2807]) tensor([0.0634, 0.1811, 0.4573, 0.2983]) -Greedy action tensor([-1.5726, -0.5728, 0.4877, -0.0102]) tensor([0.0612, 0.1664, 0.4804, 0.2920]) -Greedy action tensor([-1.4915, -0.5497, 0.4252, 0.0631]) tensor([0.0662, 0.1699, 0.4503, 0.3135]) -Greedy action tensor([-1.7612, -0.6194, 0.8510, 0.2877]) tensor([0.0392, 0.1227, 0.5340, 0.3040]) -Greedy action tensor([-0.9593, -0.2151, -0.4820, -0.3209]) tensor([0.1513, 0.3184, 0.2438, 0.2865]) -Greedy action tensor([-2.0445, -0.8591, 0.5986, -0.0900]) tensor([0.0394, 0.1289, 0.5537, 0.2781]) -Greedy action tensor([-1.8367, -0.9097, 0.0713, -0.4082]) tensor([0.0693, 0.1750, 0.4668, 0.2890]) -Greedy action tensor([-2.0116, -0.7503, 0.9032, 0.1268]) tensor([0.0318, 0.1122, 0.5863, 0.2697]) -Greedy action tensor([-1.6434, -0.5059, 0.5607, 0.1551]) tensor([0.0520, 0.1623, 0.4715, 0.3143]) -Greedy action tensor([-1.1766, -0.6569, -0.0246, -0.4812]) tensor([0.1274, 0.2142, 0.4031, 0.2553]) -Greedy action tensor([-1.7721, -0.4979, 0.5106, -0.1886]) tensor([0.0519, 0.1857, 0.5092, 0.2531]) -Greedy action tensor([-0.8152, -0.6123, 0.2872, 0.4441]) tensor([0.1142, 0.1398, 0.3438, 0.4022]) -Greedy action tensor([-1.2198, -0.5849, 0.2728, 0.2998]) tensor([0.0840, 0.1585, 0.3736, 0.3839]) -Greedy action tensor([-1.3705, -0.6329, 0.4916, 0.0652]) tensor([0.0728, 0.1523, 0.4688, 0.3061]) -Greedy action tensor([-1.9957, -0.6796, 0.9224, 0.1175]) tensor([0.0317, 0.1183, 0.5873, 0.2626]) -Greedy action tensor([-1.7677, -0.5168, 0.5813, -0.0795]) tensor([0.0491, 0.1714, 0.5140, 0.2655]) -Greedy action tensor([-1.9983, -0.9422, 0.3337, -0.2369]) tensor([0.0500, 0.1438, 0.5151, 0.2911]) -Greedy action tensor([-1.9583, -0.5907, 0.7585, -0.1111]) tensor([0.0379, 0.1487, 0.5732, 0.2402]) -Greedy action tensor([-1.3698, -0.6640, -0.0955, -0.5325]) tensor([0.1122, 0.2273, 0.4013, 0.2592]) -Greedy action tensor([-1.9693, -0.8127, 0.1897, -0.3326]) tensor([0.0556, 0.1768, 0.4818, 0.2858]) -Greedy action tensor([-1.9726, -0.9013, 0.3725, -0.1844]) tensor([0.0492, 0.1436, 0.5132, 0.2940]) -Greedy action tensor([-1.7994, -0.5489, 0.4942, 0.0682]) tensor([0.0479, 0.1673, 0.4748, 0.3101]) -Greedy action tensor([-0.8596, 0.3481, 0.2201, -0.1952]) tensor([0.1083, 0.3624, 0.3188, 0.2105]) -Greedy action tensor([-1.9490, -0.6342, 0.4855, -0.1271]) tensor([0.0448, 0.1669, 0.5113, 0.2771]) -Greedy action tensor([-1.6439, -0.9794, 0.2111, -0.5728]) tensor([0.0816, 0.1586, 0.5216, 0.2382]) -Greedy action tensor([-1.8455, -0.6083, 0.8595, 0.1910]) tensor([0.0369, 0.1273, 0.5526, 0.2832]) -Greedy action tensor([-1.5109, -0.6949, -0.0604, -0.3007]) tensor([0.0919, 0.2078, 0.3920, 0.3083]) -Greedy action tensor([-1.5829, -0.5995, 0.2834, -0.2573]) tensor([0.0719, 0.1923, 0.4650, 0.2708]) -Greedy action tensor([-1.5481, -0.3239, 0.2993, -0.2228]) tensor([0.0689, 0.2345, 0.4372, 0.2594]) -Greedy action tensor([-0.6417, -0.3520, 0.2413, 0.0407]) tensor([0.1485, 0.1984, 0.3591, 0.2939]) -Greedy action tensor([-0.3777, -0.1466, 0.1406, 0.0993]) tensor([0.1802, 0.2270, 0.3025, 0.2903]) -Greedy action tensor([-1.1882, -0.1642, 0.4907, -0.3726]) tensor([0.0877, 0.2441, 0.4700, 0.1982]) -Greedy action tensor([-0.9408, -0.5432, 0.2479, 0.1764]) tensor([0.1133, 0.1686, 0.3719, 0.3462]) -Greedy action tensor([-2.0402, -0.8720, 0.6067, -0.1399]) tensor([0.0400, 0.1286, 0.5641, 0.2674]) -Greedy action tensor([-1.9376, -0.9643, 0.3741, -0.2957]) tensor([0.0529, 0.1400, 0.5339, 0.2732]) -Greedy action tensor([ 0.4010, -0.0847, 0.2120, -0.3572]) tensor([0.3435, 0.2113, 0.2843, 0.1609]) -Greedy action tensor([ 0.7598, -0.3438, -0.0515, -0.4510]) tensor([0.4822, 0.1599, 0.2142, 0.1437]) -Greedy action tensor([ 0.4312, 0.1200, 0.1091, -0.1729]) tensor([0.3329, 0.2439, 0.2412, 0.1820]) -Greedy action tensor([ 0.3436, -0.1290, 0.1885, -0.0838]) tensor([0.3193, 0.1990, 0.2734, 0.2082]) -Greedy action tensor([ 0.6324, -0.0154, 0.0144, -0.4069]) tensor([0.4139, 0.2166, 0.2231, 0.1464]) -Greedy action tensor([ 0.2957, 0.0578, 0.1883, -0.1678]) tensor([0.3016, 0.2378, 0.2709, 0.1897]) -Greedy action tensor([ 0.8320, -0.5637, 0.0590, -0.5618]) tensor([0.5109, 0.1265, 0.2358, 0.1268]) -Greedy action tensor([ 0.4229, -0.1554, 0.2072, -0.3450]) tensor([0.3533, 0.1981, 0.2847, 0.1639]) -Greedy action tensor([ 0.4676, -0.1029, 0.1038, -0.3920]) tensor([0.3726, 0.2106, 0.2590, 0.1578]) -Greedy action tensor([ 0.5793, -0.1688, -0.0889, -0.2654]) tensor([0.4140, 0.1959, 0.2122, 0.1779]) -Greedy action tensor([ 0.4013, 0.1655, 0.1236, -0.0568]) tensor([0.3145, 0.2484, 0.2382, 0.1989]) -Greedy action tensor([ 0.5223, -0.0044, 0.0571, -0.2971]) tensor([0.3760, 0.2221, 0.2362, 0.1657]) -Greedy action tensor([ 0.4251, 0.1315, 0.0774, -0.1519]) tensor([0.3318, 0.2474, 0.2344, 0.1864]) -Greedy action tensor([ 0.2205, -0.1277, 0.0987, -0.3495]) tensor([0.3168, 0.2236, 0.2804, 0.1792]) -Greedy action tensor([ 0.3047, -0.1525, -0.0690, -0.2326]) tensor([0.3442, 0.2179, 0.2369, 0.2011]) -Greedy action tensor([ 0.5474, 0.0950, -0.0914, -0.2994]) tensor([0.3857, 0.2453, 0.2036, 0.1654]) -Greedy action tensor([ 0.8166, -0.3257, -0.0021, -0.6281]) tensor([0.5010, 0.1599, 0.2210, 0.1181]) -Greedy action tensor([ 0.2613, 0.0303, 0.2088, -0.1905]) tensor([0.2959, 0.2349, 0.2808, 0.1884]) -Greedy action tensor([ 0.2611, 0.0591, 0.1025, -0.0922]) tensor([0.2965, 0.2423, 0.2530, 0.2082]) -Greedy action tensor([ 0.7631, -0.2428, -0.0471, -0.4995]) tensor([0.4777, 0.1747, 0.2125, 0.1351]) -Greedy action tensor([ 0.2535, -0.0858, 0.0042, -0.3790]) tensor([0.3308, 0.2356, 0.2578, 0.1757]) -Greedy action tensor([ 0.9920, -0.4001, -0.1421, -0.7817]) tensor([0.5747, 0.1429, 0.1849, 0.0975]) -Greedy action tensor([ 0.3611, 0.0679, 0.1994, -0.0817]) tensor([0.3088, 0.2303, 0.2627, 0.1983]) -Greedy action tensor([ 0.3440, 0.0784, 0.2515, -0.0483]) tensor([0.2982, 0.2286, 0.2718, 0.2014]) -Greedy action tensor([ 0.3794, 0.1155, 0.0373, -0.2512]) tensor([0.3322, 0.2551, 0.2359, 0.1768]) -Greedy action tensor([ 0.2609, -0.0859, 0.1861, -0.1348]) tensor([0.3023, 0.2137, 0.2805, 0.2035]) -Greedy action tensor([ 0.2458, -0.0904, 0.2324, -0.2109]) tensor([0.2999, 0.2143, 0.2959, 0.1899]) -Greedy action tensor([ 0.3776, 0.0607, 0.2186, -0.1767]) tensor([0.3169, 0.2308, 0.2703, 0.1820]) -Greedy action tensor([ 0.2526, -0.0690, 0.0144, -0.2194]) tensor([0.3188, 0.2311, 0.2512, 0.1989]) -Greedy action tensor([ 0.4481, -0.0417, 0.1073, -0.3997]) tensor([0.3633, 0.2226, 0.2584, 0.1556]) -Greedy action tensor([ 0.3771, 0.0367, 0.1916, -0.2627]) tensor([0.3258, 0.2318, 0.2706, 0.1718]) -Greedy action tensor([ 0.5189, 0.1186, 0.0096, -0.3255]) tensor([0.3703, 0.2481, 0.2225, 0.1591]) -Greedy action tensor([ 0.4144, 0.1239, 0.1492, -0.2221]) tensor([0.3285, 0.2457, 0.2520, 0.1738]) -Greedy action tensor([ 0.2895, 0.0685, 0.2314, -0.0565]) tensor([0.2896, 0.2322, 0.2733, 0.2049]) -Greedy action tensor([ 0.4381, -0.2783, -0.0026, -0.5128]) tensor([0.3971, 0.1940, 0.2556, 0.1534]) -Greedy action tensor([ 0.3199, 0.1215, 0.1875, -0.2074]) tensor([0.3043, 0.2495, 0.2666, 0.1796]) -Greedy action tensor([ 0.5443, -0.1104, -0.0427, -0.2381]) tensor([0.3948, 0.2051, 0.2195, 0.1806]) -Greedy action tensor([ 0.3046, -0.1931, 0.1895, -0.3646]) tensor([0.3321, 0.2019, 0.2960, 0.1701]) -Greedy action tensor([ 0.8121, -0.3502, -0.1796, -0.5949]) tensor([0.5185, 0.1622, 0.1923, 0.1270]) -Greedy action tensor([ 0.1305, 0.0327, -0.0713, -0.0021]) tensor([0.2778, 0.2519, 0.2270, 0.2433]) -Greedy action tensor([ 0.2863, 0.0391, 0.1824, -0.1913]) tensor([0.3028, 0.2365, 0.2729, 0.1878]) -Greedy action tensor([ 0.5592, -0.1908, 0.2495, -0.6186]) tensor([0.3978, 0.1879, 0.2918, 0.1225]) -Greedy action tensor([ 0.4098, -0.0345, 0.1001, -0.3585]) tensor([0.3523, 0.2259, 0.2584, 0.1634]) -Greedy action tensor([ 0.8418, -0.5274, 0.0263, -0.5649]) tensor([0.5150, 0.1310, 0.2278, 0.1261]) -Greedy action tensor([ 0.3315, 0.1945, 0.1259, -0.2586]) tensor([0.3086, 0.2691, 0.2513, 0.1710]) -Greedy action tensor([ 0.9250, -0.5435, -0.1487, -0.6525]) tensor([0.5623, 0.1295, 0.1921, 0.1161]) -Greedy action tensor([ 0.7232, -0.3325, 0.0452, -0.4572]) tensor([0.4624, 0.1609, 0.2347, 0.1420]) -Greedy action tensor([ 0.2761, 0.1416, 0.1338, -0.1746]) tensor([0.2960, 0.2587, 0.2567, 0.1886]) -Greedy action tensor([ 0.5346, -0.0840, 0.0311, -0.3277]) tensor([0.3898, 0.2100, 0.2356, 0.1646]) -Greedy action tensor([ 0.1367, -0.0662, 0.1895, -0.0434]) tensor([0.2698, 0.2203, 0.2845, 0.2254]) -Greedy action tensor([ 0.4509, -0.1261, -0.0270, -0.3750]) tensor([0.3817, 0.2144, 0.2367, 0.1671]) -Greedy action tensor([ 0.2045, 0.0301, 0.1318, -0.1303]) tensor([0.2869, 0.2410, 0.2668, 0.2053]) -Greedy action tensor([ 0.7193, -0.2398, -0.1833, -0.4510]) tensor([0.4764, 0.1826, 0.1932, 0.1478]) -Greedy action tensor([ 0.7360, -0.2382, -0.0843, -0.4019]) tensor([0.4677, 0.1765, 0.2059, 0.1499]) -Greedy action tensor([ 0.3939, -0.0217, 0.1339, -0.1952]) tensor([0.3349, 0.2210, 0.2582, 0.1858]) -Greedy action tensor([ 0.6161, -0.3472, -0.1065, -0.5182]) tensor([0.4569, 0.1744, 0.2218, 0.1469]) -Greedy action tensor([ 0.6746, -0.2010, 0.0884, -0.3018]) tensor([0.4256, 0.1773, 0.2368, 0.1603]) -Greedy action tensor([ 0.4339, 0.1119, 0.0914, -0.1729]) tensor([0.3356, 0.2432, 0.2383, 0.1829]) -Greedy action tensor([ 0.2958, -0.0198, 0.0786, -0.1620]) tensor([0.3158, 0.2303, 0.2541, 0.1998]) -Greedy action tensor([ 0.8695, -0.4086, -0.1501, -0.6652]) tensor([0.5391, 0.1502, 0.1945, 0.1162]) -Greedy action tensor([ 0.2419, 0.0857, 0.1931, -0.1418]) tensor([0.2866, 0.2452, 0.2730, 0.1953]) -Greedy action tensor([ 0.4504, 0.1804, 0.0909, -0.0801]) tensor([0.3279, 0.2503, 0.2289, 0.1929]) -Greedy action tensor([ 0.3603, -0.0201, 0.0682, -0.3023]) tensor([0.3395, 0.2321, 0.2535, 0.1750]) -Greedy action tensor([ 0.2897, -0.0623, 0.1313, -0.3332]) tensor([0.3233, 0.2274, 0.2759, 0.1734]) -Greedy action tensor([ 0.2092, 0.0246, 0.0678, -0.2332]) tensor([0.2992, 0.2488, 0.2598, 0.1922]) -Greedy action tensor([ 0.3429, 0.0862, 0.1918, -0.1053]) tensor([0.3056, 0.2364, 0.2628, 0.1952]) -Greedy action tensor([ 0.3633, 0.1359, 0.1201, -0.1744]) tensor([0.3160, 0.2517, 0.2478, 0.1846]) -Greedy action tensor([ 0.2061, 0.1744, 0.1625, -0.2023]) tensor([0.2785, 0.2698, 0.2666, 0.1851]) -Greedy action tensor([ 0.6061, -0.2461, -0.0163, -0.6378]) tensor([0.4442, 0.1894, 0.2384, 0.1280]) -Greedy action tensor([ 0.3393, 0.1455, 0.1593, -0.1376]) tensor([0.3049, 0.2512, 0.2547, 0.1893]) -Greedy action tensor([ 0.3806, -0.1822, 0.1381, -0.3218]) tensor([0.3509, 0.1999, 0.2753, 0.1738]) -Greedy action tensor([ 0.4485, -0.1666, 0.2052, -0.4296]) tensor([0.3649, 0.1973, 0.2861, 0.1517]) -Greedy action tensor([ 0.3802, -0.0818, 0.1496, -0.2207]) tensor([0.3364, 0.2120, 0.2671, 0.1845]) -Greedy action tensor([ 0.3331, 0.2841, 0.0804, -0.2387]) tensor([0.3036, 0.2891, 0.2358, 0.1714]) -Greedy action tensor([ 1.0995, -0.6200, -0.0483, -0.6893]) tensor([0.6011, 0.1077, 0.1908, 0.1005]) -Greedy action tensor([ 0.7070, -0.3350, -0.0029, -0.5174]) tensor([0.4676, 0.1650, 0.2299, 0.1375]) -Greedy action tensor([ 0.5909, -0.2629, 0.0146, -0.4208]) tensor([0.4253, 0.1811, 0.2390, 0.1546]) -Greedy action tensor([ 0.0939, -0.0033, 0.1159, -0.0434]) tensor([0.2631, 0.2387, 0.2689, 0.2293]) -Greedy action tensor([ 0.3255, -0.0215, 0.0534, -0.0860]) tensor([0.3194, 0.2257, 0.2433, 0.2116]) -Greedy action tensor([ 0.8221, -0.5385, 0.0279, -0.6311]) tensor([0.5149, 0.1321, 0.2327, 0.1204]) -Greedy action tensor([ 0.3704, -0.2748, 0.1159, -0.2807]) tensor([0.3544, 0.1859, 0.2748, 0.1848]) -Greedy action tensor([ 1.4085, -0.5796, 1.0752, 0.6600]) tensor([0.4298, 0.0589, 0.3080, 0.2033]) -Greedy action tensor([ 1.5790, -0.4475, 0.3060, 0.8190]) tensor([0.5321, 0.0701, 0.1490, 0.2488]) -Greedy action tensor([ 0.3905, -0.6960, 1.2453, 1.2616]) tensor([0.1645, 0.0555, 0.3868, 0.3932]) -Greedy action tensor([1.6978, 0.1214, 1.0370, 0.1836]) tensor([0.5146, 0.1064, 0.2658, 0.1132]) -Greedy action tensor([ 1.3996, -1.5828, 0.7229, 1.1275]) tensor([0.4309, 0.0218, 0.2190, 0.3282]) -Greedy action tensor([ 1.1658, 0.5289, -0.8724, 0.4754]) tensor([0.4628, 0.2448, 0.0603, 0.2321]) -Greedy action tensor([ 1.0720, -0.5529, 0.2400, 0.7550]) tensor([0.4237, 0.0834, 0.1844, 0.3085]) -Greedy action tensor([ 1.9200, -1.1604, 1.1424, 1.5041]) tensor([0.4618, 0.0212, 0.2122, 0.3047]) -Greedy action tensor([ 2.1345, -0.3129, 0.7158, 0.6535]) tensor([0.6427, 0.0556, 0.1555, 0.1462]) -Greedy action tensor([ 1.3866, -0.2376, 1.3143, 0.9318]) tensor([0.3621, 0.0714, 0.3368, 0.2298]) -Greedy action tensor([ 0.8497, -0.5790, 0.7593, 0.7933]) tensor([0.3227, 0.0773, 0.2949, 0.3050]) -Greedy action tensor([ 1.4214, 0.2982, -0.0356, 0.8721]) tensor([0.4683, 0.1523, 0.1091, 0.2703]) -Greedy action tensor([ 1.0003, -0.7180, 0.5792, 0.8114]) tensor([0.3754, 0.0673, 0.2464, 0.3108]) -Greedy action tensor([1.4865, 0.1208, 1.1832, 2.0317]) tensor([0.2689, 0.0686, 0.1986, 0.4639]) -Greedy action tensor([ 1.3678, 0.4455, -0.5267, -0.1368]) tensor([0.5649, 0.2246, 0.0850, 0.1255]) -Greedy action tensor([ 1.9044, -0.0843, 0.5656, 1.0139]) tensor([0.5526, 0.0756, 0.1449, 0.2268]) -Greedy action tensor([ 1.5458, -0.8438, 0.0730, 0.5619]) tensor([0.5900, 0.0541, 0.1353, 0.2206]) -Greedy action tensor([ 0.9386, -0.2097, 0.8516, 0.1438]) tensor([0.3724, 0.1181, 0.3413, 0.1682]) -Greedy action tensor([1.8422, 0.2274, 1.5657, 1.2106]) tensor([0.4018, 0.0799, 0.3047, 0.2136]) -Greedy action tensor([ 1.9882, -0.2929, 0.3358, 1.1946]) tensor([0.5727, 0.0585, 0.1097, 0.2590]) -Greedy action tensor([1.7365, 0.3350, 0.0141, 1.2725]) tensor([0.4869, 0.1199, 0.0870, 0.3062]) -Greedy action tensor([ 1.0221, -0.1280, 1.3933, 0.4479]) tensor([0.3004, 0.0951, 0.4354, 0.1692]) -Greedy action tensor([ 1.2802, -0.0656, 1.3542, 1.8323]) tensor([0.2455, 0.0639, 0.2643, 0.4263]) -Greedy action tensor([ 1.4876, -0.4937, 1.5410, 0.8667]) tensor([0.3663, 0.0505, 0.3864, 0.1969]) -Greedy action tensor([ 1.2097, -0.1485, 0.8212, 0.6696]) tensor([0.3972, 0.1021, 0.2693, 0.2314]) -Greedy action tensor([ 1.8282, -0.6804, 1.6294, 0.3246]) tensor([0.4709, 0.0383, 0.3860, 0.1047]) -Greedy action tensor([ 2.1522, -0.2282, 1.1219, 1.9250]) tensor([0.4452, 0.0412, 0.1589, 0.3547]) -Greedy action tensor([ 1.7222, -0.0355, -0.4412, 1.4288]) tensor([0.4919, 0.0848, 0.0565, 0.3668]) -Greedy action tensor([ 1.9032, -0.6471, 1.1644, 1.8787]) tensor([0.3950, 0.0308, 0.1887, 0.3854]) -Greedy action tensor([ 1.5972, -0.1795, 0.7233, 1.2601]) tensor([0.4347, 0.0736, 0.1814, 0.3103]) -Greedy action tensor([ 0.7237, -1.0028, 0.4609, 0.9807]) tensor([0.3087, 0.0549, 0.2373, 0.3991]) -Greedy action tensor([ 2.2438, -0.0269, 0.9076, 0.8641]) tensor([0.6182, 0.0638, 0.1625, 0.1556]) -Greedy action tensor([ 1.8586, 0.8585, -0.5851, 0.3374]) tensor([0.5977, 0.2199, 0.0519, 0.1306]) -Greedy action tensor([ 1.6196, -0.5328, 0.3039, 0.9485]) tensor([0.5275, 0.0613, 0.1415, 0.2696]) -Greedy action tensor([1.5970, 0.3473, 0.1544, 1.1849]) tensor([0.4576, 0.1312, 0.1081, 0.3031]) -Greedy action tensor([ 0.8668, -0.2133, 0.1580, 0.9086]) tensor([0.3479, 0.1181, 0.1712, 0.3627]) -Greedy action tensor([ 1.5072, -0.6305, 0.9697, 0.5995]) tensor([0.4749, 0.0560, 0.2775, 0.1916]) -Greedy action tensor([ 1.3297, -0.9872, 1.1790, 1.2270]) tensor([0.3495, 0.0345, 0.3006, 0.3154]) -Greedy action tensor([ 1.2882, -0.7968, 1.4348, 0.7781]) tensor([0.3469, 0.0431, 0.4017, 0.2083]) -Greedy action tensor([ 1.5772, -0.2584, 0.2529, 1.1071]) tensor([0.4877, 0.0778, 0.1297, 0.3048]) -Greedy action tensor([ 1.5252, -0.3620, 0.4659, 1.3857]) tensor([0.4223, 0.0640, 0.1464, 0.3673]) -Greedy action tensor([ 1.1407, -0.3935, 0.9134, 0.9925]) tensor([0.3479, 0.0750, 0.2772, 0.3000]) -Greedy action tensor([ 1.4733, -0.1485, 0.2443, 1.1523]) tensor([0.4514, 0.0892, 0.1321, 0.3274]) -Greedy action tensor([ 1.2642, 0.1153, -0.3604, 1.3101]) tensor([0.3905, 0.1238, 0.0769, 0.4088]) -Greedy action tensor([ 1.6539, -0.7188, 0.9825, 1.5608]) tensor([0.3976, 0.0371, 0.2031, 0.3622]) -Greedy action tensor([ 1.2045, -0.2403, 1.1302, 0.1126]) tensor([0.4000, 0.0943, 0.3714, 0.1342]) -Greedy action tensor([ 1.6274, -0.1966, 0.4366, 0.7967]) tensor([0.5260, 0.0849, 0.1599, 0.2292]) -Greedy action tensor([ 1.2629, -0.6517, 0.8820, 0.6245]) tensor([0.4239, 0.0625, 0.2897, 0.2239]) -Greedy action tensor([ 1.5680, 0.5391, -0.5566, 0.6105]) tensor([0.5374, 0.1921, 0.0642, 0.2063]) -Greedy action tensor([ 1.4309, -0.9294, 0.2946, 0.8407]) tensor([0.5077, 0.0479, 0.1630, 0.2814]) -Greedy action tensor([ 1.6840, -0.7428, 0.7497, 1.2590]) tensor([0.4684, 0.0414, 0.1840, 0.3062]) -Greedy action tensor([1.7515, 0.2079, 0.8238, 0.9532]) tensor([0.4856, 0.1037, 0.1921, 0.2186]) -Greedy action tensor([0.8222, 0.3662, 0.1375, 0.6228]) tensor([0.3381, 0.2143, 0.1705, 0.2770]) -Greedy action tensor([ 1.4556, -0.7216, 0.2270, 1.7675]) tensor([0.3607, 0.0409, 0.1056, 0.4928]) -Greedy action tensor([ 1.1275, -0.3393, 0.6982, 1.5503]) tensor([0.2934, 0.0677, 0.1910, 0.4479]) -Greedy action tensor([ 1.0245, -0.3920, 0.9980, 0.9203]) tensor([0.3208, 0.0778, 0.3124, 0.2890]) -Greedy action tensor([ 1.2965, -0.0869, 0.8555, 0.9709]) tensor([0.3822, 0.0958, 0.2459, 0.2760]) -Greedy action tensor([ 0.8385, -0.7041, 0.5811, 1.5776]) tensor([0.2450, 0.0524, 0.1894, 0.5131]) -Greedy action tensor([ 1.2782, -0.2943, 0.6115, 0.8100]) tensor([0.4261, 0.0884, 0.2187, 0.2668]) -Greedy action tensor([ 1.4349, -0.5929, 1.5256, 0.6017]) tensor([0.3758, 0.0495, 0.4114, 0.1633]) -Greedy action tensor([ 2.0323, -0.0445, 0.2954, 0.9156]) tensor([0.6140, 0.0769, 0.1081, 0.2010]) -Greedy action tensor([ 2.1292, -0.0923, 1.1491, 1.0293]) tensor([0.5505, 0.0597, 0.2066, 0.1833]) -Greedy action tensor([ 2.4598, -0.2511, 0.5786, 1.7095]) tensor([0.5913, 0.0393, 0.0901, 0.2792]) -Greedy action tensor([ 1.9713, -0.3877, 0.8273, 1.5987]) tensor([0.4757, 0.0450, 0.1515, 0.3278]) -Greedy action tensor([ 1.7565, -1.0783, 1.3354, 0.6538]) tensor([0.4885, 0.0287, 0.3206, 0.1622]) -Greedy action tensor([ 1.3587, 0.3596, -0.4324, -0.0817]) tensor([0.5644, 0.2078, 0.0941, 0.1337]) -Greedy action tensor([ 1.1257, -0.1287, 1.0633, 1.4568]) tensor([0.2764, 0.0789, 0.2597, 0.3850]) -Greedy action tensor([ 0.9135, -1.5920, 1.5581, 0.9086]) tensor([0.2511, 0.0205, 0.4785, 0.2499]) -Greedy action tensor([ 2.1444, -0.7431, 0.9253, 1.7375]) tensor([0.4958, 0.0276, 0.1465, 0.3301]) -Greedy action tensor([ 1.5636, -0.5997, 1.0906, 1.9353]) tensor([0.3136, 0.0361, 0.1954, 0.4549]) -Greedy action tensor([ 1.1083, -0.2011, 0.7680, 1.1603]) tensor([0.3295, 0.0890, 0.2345, 0.3471]) -Greedy action tensor([ 1.2578e+00, -7.6270e-04, 9.4863e-01, 1.0738e+00]) tensor([0.3509, 0.0997, 0.2576, 0.2919]) -Greedy action tensor([ 1.6840, 0.1185, -0.7476, 0.9577]) tensor([0.5616, 0.1174, 0.0494, 0.2717]) -Greedy action tensor([ 1.0742, -0.1509, 0.5212, 0.6660]) tensor([0.3947, 0.1159, 0.2270, 0.2624]) -Greedy action tensor([ 1.5185, -0.0908, 1.2989, 0.7837]) tensor([0.4028, 0.0806, 0.3234, 0.1932]) -Greedy action tensor([ 1.2084, -0.9471, 1.4004, 1.0079]) tensor([0.3179, 0.0368, 0.3852, 0.2601]) -Greedy action tensor([ 1.3367, -0.5837, 1.1081, 1.6593]) tensor([0.3009, 0.0441, 0.2394, 0.4155]) -Greedy action tensor([ 1.4941, -1.4040, 1.1957, 1.1880]) tensor([0.3947, 0.0218, 0.2929, 0.2906]) -Greedy action tensor([ 1.2073, 0.4514, -0.2056, 1.3317]) tensor([0.3514, 0.1650, 0.0856, 0.3980]) -Greedy action tensor([ 1.4142, -0.6951, 2.2947, 1.0697]) tensor([0.2357, 0.0286, 0.5686, 0.1670]) -Greedy action tensor([ 1.5866, -0.3899, 0.6617, 0.8527]) tensor([0.4962, 0.0688, 0.1968, 0.2382]) -Greedy action tensor([ 0.8978, -1.4267, 0.2314, 0.5560]) tensor([0.4307, 0.0421, 0.2212, 0.3060]) -Greedy action tensor([ 1.8437, -0.7907, -0.1439, 0.1848]) tensor([0.7147, 0.0513, 0.0979, 0.1360]) -Greedy action tensor([ 1.1550, -0.2588, -0.2809, 0.1294]) tensor([0.5435, 0.1322, 0.1293, 0.1949]) -Greedy action tensor([ 1.0536, -0.6685, 0.0564, 0.2006]) tensor([0.5067, 0.0905, 0.1869, 0.2159]) -Greedy action tensor([ 0.7647, -0.2755, -0.0381, -0.2104]) tensor([0.4590, 0.1622, 0.2057, 0.1731]) -Greedy action tensor([ 1.4995, -0.4286, 0.1346, 0.1117]) tensor([0.6059, 0.0881, 0.1548, 0.1512]) -Greedy action tensor([ 0.6931, -0.4874, -0.0284, -0.0859]) tensor([0.4441, 0.1364, 0.2158, 0.2038]) -Greedy action tensor([ 0.9633, -0.7693, -0.5477, 0.2220]) tensor([0.5336, 0.0944, 0.1178, 0.2543]) -Greedy action tensor([ 1.4081, -0.7258, -0.4274, 0.2425]) tensor([0.6291, 0.0745, 0.1004, 0.1961]) -Greedy action tensor([ 1.1798, -0.5673, -0.3334, 0.2161]) tensor([0.5631, 0.0981, 0.1240, 0.2148]) -Greedy action tensor([ 1.2186, -0.4942, -0.4209, -0.0761]) tensor([0.6066, 0.1094, 0.1177, 0.1662]) -Greedy action tensor([ 1.1078, -0.4599, -0.5165, 0.1667]) tensor([0.5569, 0.1161, 0.1097, 0.2173]) -Greedy action tensor([ 0.9004, -0.5492, -0.1788, -0.0691]) tensor([0.5118, 0.1201, 0.1740, 0.1941]) -Greedy action tensor([0.7513, 0.0252, 0.0497, 0.0395]) tensor([0.4048, 0.1958, 0.2007, 0.1987]) -Greedy action tensor([ 0.7351, -0.5756, -0.0912, 0.1615]) tensor([0.4404, 0.1187, 0.1927, 0.2482]) -Greedy action tensor([ 1.0387, -0.5375, -0.0329, 0.0107]) tensor([0.5244, 0.1084, 0.1796, 0.1876]) -Greedy action tensor([ 1.1828, 0.0860, -0.2723, -0.1534]) tensor([0.5464, 0.1825, 0.1275, 0.1436]) -Greedy action tensor([ 1.4911, -0.4773, -0.5186, -0.1909]) tensor([0.6851, 0.0957, 0.0918, 0.1274]) -Greedy action tensor([ 1.2657, -0.5090, -0.2756, 0.0017]) tensor([0.6002, 0.1018, 0.1285, 0.1696]) -Greedy action tensor([ 1.0897, -0.3719, -0.2585, 0.0913]) tensor([0.5376, 0.1247, 0.1396, 0.1981]) -Greedy action tensor([ 1.3942, -0.2135, -0.0559, -0.1716]) tensor([0.6083, 0.1219, 0.1427, 0.1271]) -Greedy action tensor([ 0.8167, -0.5389, 0.0902, -0.0531]) tensor([0.4629, 0.1193, 0.2239, 0.1940]) -Greedy action tensor([ 1.6022, -0.7572, -0.3150, -0.0386]) tensor([0.6967, 0.0658, 0.1024, 0.1350]) -Greedy action tensor([ 1.4606, -0.4098, -0.4517, -0.0783]) tensor([0.6595, 0.1016, 0.0974, 0.1415]) -Greedy action tensor([ 0.8153, -0.5965, -0.2007, 0.1210]) tensor([0.4750, 0.1158, 0.1720, 0.2372]) -Greedy action tensor([ 0.7944, -0.2044, -0.1808, 0.1599]) tensor([0.4394, 0.1619, 0.1657, 0.2330]) -Greedy action tensor([ 0.7269, -0.4252, 0.2606, -0.0864]) tensor([0.4190, 0.1324, 0.2628, 0.1858]) -Greedy action tensor([ 2.0142, -0.6896, -0.3866, 0.4216]) tensor([0.7348, 0.0492, 0.0666, 0.1495]) -Greedy action tensor([ 0.7775, -0.4248, -0.3170, 0.1846]) tensor([0.4571, 0.1373, 0.1530, 0.2526]) -Greedy action tensor([ 0.6703, -0.3824, -0.0428, -0.0049]) tensor([0.4259, 0.1486, 0.2087, 0.2168]) -Greedy action tensor([ 1.4459, -0.5812, -0.5763, -0.1239]) tensor([0.6793, 0.0895, 0.0899, 0.1413]) -Greedy action tensor([ 0.7667, -0.4135, -0.0400, 0.1026]) tensor([0.4409, 0.1354, 0.1968, 0.2269]) -Greedy action tensor([ 0.9989, -0.4767, -0.0686, -0.2219]) tensor([0.5355, 0.1224, 0.1841, 0.1580]) -Greedy action tensor([ 1.5941, -0.4966, -0.1893, -0.0602]) tensor([0.6744, 0.0833, 0.1133, 0.1290]) -Greedy action tensor([ 1.0289, -0.0509, 0.1461, -0.1527]) tensor([0.4854, 0.1649, 0.2008, 0.1489]) -Greedy action tensor([ 0.9690, -0.6225, -0.3510, 0.1896]) tensor([0.5183, 0.1055, 0.1385, 0.2377]) -Greedy action tensor([ 1.4049, -0.6923, -0.6146, 0.2291]) tensor([0.6394, 0.0785, 0.0849, 0.1973]) -Greedy action tensor([ 0.7904, -0.7480, -0.4145, -0.0225]) tensor([0.5107, 0.1097, 0.1531, 0.2265]) -Greedy action tensor([ 1.1783, -0.4580, 0.0160, -0.0450]) tensor([0.5550, 0.1081, 0.1736, 0.1633]) -Greedy action tensor([ 1.0289, -0.4379, -0.3179, 0.1549]) tensor([0.5241, 0.1209, 0.1363, 0.2187]) -Greedy action tensor([ 1.7903, -0.4763, -0.4157, 0.0985]) tensor([0.7153, 0.0742, 0.0788, 0.1318]) -Greedy action tensor([ 1.0753, -0.0697, 0.1817, -0.1820]) tensor([0.4970, 0.1582, 0.2034, 0.1414]) -Greedy action tensor([ 1.5218, -0.0583, -0.4077, -0.2070]) tensor([0.6542, 0.1347, 0.0950, 0.1161]) -Greedy action tensor([ 0.9645, -0.3768, -0.0358, -0.2486]) tensor([0.5191, 0.1357, 0.1909, 0.1543]) -Greedy action tensor([ 0.8759, -0.2248, 0.0488, -0.0807]) tensor([0.4642, 0.1544, 0.2030, 0.1784]) -Greedy action tensor([ 0.5655, -0.5216, -0.0935, 0.0711]) tensor([0.4058, 0.1368, 0.2099, 0.2475]) -Greedy action tensor([ 0.7296, -0.4556, 0.0150, 0.0397]) tensor([0.4354, 0.1331, 0.2131, 0.2184]) -Greedy action tensor([ 1.0298, -0.5043, -0.0327, -0.0738]) tensor([0.5283, 0.1139, 0.1826, 0.1752]) -Greedy action tensor([ 1.3776, -0.2133, -0.0968, -0.0696]) tensor([0.5996, 0.1221, 0.1372, 0.1410]) -Greedy action tensor([ 0.8557, 0.0622, -0.0836, -0.0077]) tensor([0.4415, 0.1997, 0.1726, 0.1862]) -Greedy action tensor([ 1.2673, -0.8195, -0.2188, 0.0121]) tensor([0.6115, 0.0759, 0.1383, 0.1743]) -Greedy action tensor([ 0.9240, -0.5843, -0.1741, 0.0876]) tensor([0.5030, 0.1113, 0.1677, 0.2179]) -Greedy action tensor([ 0.8693, -0.4464, -0.2827, 0.2335]) tensor([0.4731, 0.1269, 0.1495, 0.2505]) -Greedy action tensor([ 1.0238, -0.0787, -0.0920, -0.1153]) tensor([0.5051, 0.1677, 0.1655, 0.1617]) -Greedy action tensor([ 0.6685, -0.0334, 0.1216, -0.0703]) tensor([0.3919, 0.1942, 0.2268, 0.1872]) -Greedy action tensor([ 1.2432, -0.6357, -0.1159, 0.1745]) tensor([0.5704, 0.0871, 0.1465, 0.1959]) -Greedy action tensor([ 1.2856, -0.6404, -0.5944, 0.2060]) tensor([0.6105, 0.0890, 0.0932, 0.2074]) -Greedy action tensor([ 0.6258, -0.4717, -0.1122, 0.1503]) tensor([0.4110, 0.1371, 0.1965, 0.2554]) -Greedy action tensor([ 1.0151, -0.5445, -0.0712, 0.1367]) tensor([0.5094, 0.1071, 0.1719, 0.2116]) -Greedy action tensor([ 1.0833, -0.5158, -0.1332, -0.0845]) tensor([0.5527, 0.1117, 0.1637, 0.1719]) -Greedy action tensor([ 0.9678, -0.6137, -0.4172, 0.0526]) tensor([0.5387, 0.1108, 0.1348, 0.2157]) -Greedy action tensor([ 8.9907e-01, -4.7759e-01, -2.6770e-01, 3.2424e-04]) tensor([0.5074, 0.1281, 0.1580, 0.2065]) -Greedy action tensor([ 0.4636, -0.2624, 0.0664, 0.0212]) tensor([0.3573, 0.1729, 0.2402, 0.2296]) -Greedy action tensor([ 1.1215, -0.7280, -0.2421, -0.1426]) tensor([0.5898, 0.0928, 0.1508, 0.1666]) -Greedy action tensor([ 1.4191, -0.7465, -0.6772, 0.4008]) tensor([0.6255, 0.0717, 0.0769, 0.2259]) -Greedy action tensor([ 1.2970, -0.4049, -0.3856, -0.1416]) tensor([0.6229, 0.1136, 0.1158, 0.1478]) -Greedy action tensor([ 1.5586, -0.4152, -0.3860, 0.1641]) tensor([0.6536, 0.0908, 0.0935, 0.1621]) -Greedy action tensor([ 1.1944, -0.3025, -0.0056, -0.3588]) tensor([0.5758, 0.1289, 0.1734, 0.1218]) -Greedy action tensor([ 1.7961, -0.5997, -0.2597, 0.1865]) tensor([0.7047, 0.0642, 0.0902, 0.1409]) -Greedy action tensor([ 1.3027, -0.6845, -0.2183, 0.2365]) tensor([0.5883, 0.0806, 0.1285, 0.2026]) -Greedy action tensor([ 0.7865, -0.6180, -0.1303, 0.0949]) tensor([0.4660, 0.1144, 0.1863, 0.2333]) -Greedy action tensor([ 0.6975, -0.2673, -0.0204, 0.0775]) tensor([0.4155, 0.1583, 0.2027, 0.2235]) -Greedy action tensor([ 0.4296, -0.5416, 0.0678, -0.1312]) tensor([0.3780, 0.1431, 0.2632, 0.2157]) -Greedy action tensor([ 1.2208, -0.5069, -0.3514, -0.1945]) tensor([0.6142, 0.1091, 0.1275, 0.1492]) -Greedy action tensor([ 0.6038, -0.3781, -0.0990, 0.0610]) tensor([0.4080, 0.1528, 0.2020, 0.2371]) -Greedy action tensor([ 1.6280, -0.6147, -0.4430, 0.2811]) tensor([0.6701, 0.0711, 0.0845, 0.1743]) -Greedy action tensor([ 0.8880, -0.4827, -0.0110, 0.0664]) tensor([0.4760, 0.1209, 0.1937, 0.2093]) -Greedy action tensor([ 0.8036, -0.4210, -0.1009, 0.0488]) tensor([0.4611, 0.1355, 0.1866, 0.2168]) -Greedy action tensor([ 0.8429, -0.5388, -0.1084, 0.2305]) tensor([0.4588, 0.1152, 0.1772, 0.2487]) -Greedy action tensor([ 0.7369, -0.3868, -0.0970, 0.0930]) tensor([0.4377, 0.1423, 0.1901, 0.2299]) -Greedy action tensor([ 1.6584e+00, -4.5644e-01, 1.5940e-03, 1.3506e-01]) tensor([0.6539, 0.0789, 0.1247, 0.1425]) -Greedy action tensor([ 1.3280, -0.5191, -0.5466, 0.0590]) tensor([0.6280, 0.0990, 0.0964, 0.1766]) -Greedy action tensor([ 0.4501, -0.0540, 0.2294, -0.5124]) tensor([0.3587, 0.2167, 0.2876, 0.1370]) -Greedy action tensor([ 0.2475, 0.2175, -0.0566, -0.3432]) tensor([0.3065, 0.2975, 0.2262, 0.1698]) -Greedy action tensor([ 0.4792, 0.0243, -0.0212, -0.3661]) tensor([0.3745, 0.2376, 0.2271, 0.1608]) -Greedy action tensor([ 0.6854, 0.0234, -0.1224, -0.3887]) tensor([0.4342, 0.2240, 0.1936, 0.1483]) -Greedy action tensor([ 0.2474, -0.0541, -0.1588, -0.3385]) tensor([0.3375, 0.2497, 0.2249, 0.1879]) -Greedy action tensor([ 0.8436, -0.4515, 0.0302, -0.6174]) tensor([0.5130, 0.1405, 0.2275, 0.1190]) -Greedy action tensor([ 0.4014, -0.2976, 0.1641, -0.5047]) tensor([0.3718, 0.1848, 0.2932, 0.1502]) -Greedy action tensor([ 0.3153, 0.0351, 0.0192, -0.0072]) tensor([0.3102, 0.2344, 0.2307, 0.2247]) -Greedy action tensor([ 0.4350, -0.1859, 0.0612, -0.3840]) tensor([0.3750, 0.2016, 0.2581, 0.1653]) -Greedy action tensor([ 0.3534, 0.0956, 0.1586, -0.0403]) tensor([0.3058, 0.2363, 0.2517, 0.2063]) -Greedy action tensor([ 0.3737, -0.0219, 0.0392, -0.2105]) tensor([0.3394, 0.2285, 0.2429, 0.1892]) -Greedy action tensor([ 0.5170, -0.1166, -0.1163, -0.5028]) tensor([0.4128, 0.2191, 0.2192, 0.1489]) -Greedy action tensor([ 0.4378, -0.1449, 0.1624, -0.5961]) tensor([0.3741, 0.2089, 0.2840, 0.1330]) -Greedy action tensor([ 0.4558, -0.1521, 0.0999, -0.1924]) tensor([0.3613, 0.1967, 0.2531, 0.1889]) -Greedy action tensor([ 0.3810, 0.1157, 0.1657, -0.2231]) tensor([0.3205, 0.2458, 0.2584, 0.1752]) -Greedy action tensor([ 0.6747, -0.3265, -0.0167, -0.2982]) tensor([0.4452, 0.1636, 0.2230, 0.1683]) -Greedy action tensor([ 0.3517, -0.1242, 0.0702, -0.3907]) tensor([0.3506, 0.2179, 0.2646, 0.1669]) -Greedy action tensor([ 0.5705, -0.0123, -0.0402, -0.3544]) tensor([0.4004, 0.2235, 0.2174, 0.1588]) -Greedy action tensor([ 0.3719, 0.1592, 0.1943, -0.2160]) tensor([0.3124, 0.2525, 0.2616, 0.1735]) -Greedy action tensor([ 0.7734, -0.3214, -0.0290, -0.5204]) tensor([0.4861, 0.1627, 0.2179, 0.1333]) -Greedy action tensor([ 0.3916, 0.0146, 0.1242, -0.1306]) tensor([0.3285, 0.2253, 0.2514, 0.1949]) -Greedy action tensor([ 0.4103, -0.1569, 0.0889, -0.4435]) tensor([0.3679, 0.2086, 0.2668, 0.1567]) -Greedy action tensor([ 0.3425, -0.2384, 0.1412, -0.2731]) tensor([0.3428, 0.1917, 0.2803, 0.1852]) -Greedy action tensor([ 0.3496, 0.2785, 0.2104, -0.1618]) tensor([0.2940, 0.2738, 0.2558, 0.1763]) -Greedy action tensor([ 1.2516, -0.7460, 0.0109, -0.7661]) tensor([0.6419, 0.0871, 0.1856, 0.0854]) -Greedy action tensor([ 0.1806, 0.0381, -0.0801, -0.0002]) tensor([0.2880, 0.2498, 0.2219, 0.2404]) -Greedy action tensor([ 0.1599, -0.0143, 0.0258, -0.2018]) tensor([0.2932, 0.2463, 0.2564, 0.2042]) -Greedy action tensor([ 0.4714, -0.0312, 0.0302, -0.1480]) tensor([0.3589, 0.2171, 0.2309, 0.1932]) -Greedy action tensor([ 0.2841, -0.1408, 0.1443, -0.2362]) tensor([0.3208, 0.2097, 0.2789, 0.1906]) -Greedy action tensor([ 0.3964, 0.1641, 0.1570, -0.2006]) tensor([0.3195, 0.2532, 0.2514, 0.1758]) -Greedy action tensor([ 0.9671, -0.5705, -0.0887, -0.5871]) tensor([0.5636, 0.1211, 0.1961, 0.1191]) -Greedy action tensor([ 0.4400, 0.1818, 0.1068, -0.2739]) tensor([0.3357, 0.2593, 0.2406, 0.1644]) -Greedy action tensor([ 0.3429, -0.1846, -0.1821, -0.3733]) tensor([0.3745, 0.2210, 0.2215, 0.1830]) -Greedy action tensor([ 0.8155, -0.2621, -0.0731, -0.6439]) tensor([0.5040, 0.1716, 0.2073, 0.1171]) -Greedy action tensor([ 0.5425, -0.0257, -0.0371, -0.2772]) tensor([0.3895, 0.2207, 0.2182, 0.1716]) -Greedy action tensor([ 0.3683, 0.1333, 0.1979, -0.1583]) tensor([0.3101, 0.2452, 0.2615, 0.1832]) -Greedy action tensor([ 0.5434, 0.0148, 0.0561, -0.3415]) tensor([0.3822, 0.2253, 0.2348, 0.1578]) -Greedy action tensor([ 1.1598, -0.6749, 0.1444, -0.7976]) tensor([0.6013, 0.0960, 0.2178, 0.0849]) -Greedy action tensor([ 0.3286, 0.1710, 0.0318, -0.2503]) tensor([0.3167, 0.2705, 0.2353, 0.1775]) -Greedy action tensor([ 0.5501, -0.1182, 0.0986, -0.2614]) tensor([0.3856, 0.1976, 0.2455, 0.1713]) -Greedy action tensor([ 0.3158, 0.1811, 0.0672, -0.2647]) tensor([0.3112, 0.2720, 0.2427, 0.1741]) -Greedy action tensor([ 0.2695, 0.0315, 0.0928, -0.1845]) tensor([0.3066, 0.2417, 0.2570, 0.1947]) -Greedy action tensor([ 0.2068, 0.0570, 0.2230, -0.2798]) tensor([0.2864, 0.2465, 0.2910, 0.1760]) -Greedy action tensor([ 0.2727, 0.0157, 0.1749, -0.2087]) tensor([0.3032, 0.2345, 0.2749, 0.1873]) -Greedy action tensor([ 0.4427, 0.0065, 0.0333, -0.2436]) tensor([0.3554, 0.2297, 0.2360, 0.1789]) -Greedy action tensor([ 0.5472, 0.0831, 0.0578, -0.2814]) tensor([0.3734, 0.2347, 0.2289, 0.1630]) -Greedy action tensor([ 0.4577, -0.1940, 0.1942, -0.3660]) tensor([0.3665, 0.1910, 0.2816, 0.1608]) -Greedy action tensor([ 0.6203, -0.1536, 0.0709, -0.4894]) tensor([0.4223, 0.1948, 0.2438, 0.1392]) -Greedy action tensor([ 0.8898, -0.3142, -0.1615, -0.7247]) tensor([0.5410, 0.1623, 0.1891, 0.1076]) -Greedy action tensor([ 0.2967, -0.0556, -0.0955, -0.1220]) tensor([0.3293, 0.2315, 0.2225, 0.2167]) -Greedy action tensor([ 0.4319, 0.0005, 0.1321, -0.2292]) tensor([0.3440, 0.2235, 0.2549, 0.1776]) -Greedy action tensor([ 0.4022, 0.2068, 0.1821, -0.2384]) tensor([0.3173, 0.2610, 0.2546, 0.1672]) -Greedy action tensor([ 0.6337, -0.3296, -0.0143, -0.4885]) tensor([0.4484, 0.1711, 0.2345, 0.1460]) -Greedy action tensor([ 0.5730, -0.3606, -0.0578, -0.5619]) tensor([0.4451, 0.1750, 0.2369, 0.1431]) -Greedy action tensor([ 0.1977, -0.0306, 0.1647, -0.1532]) tensor([0.2884, 0.2295, 0.2790, 0.2030]) -Greedy action tensor([ 0.3858, 0.0614, 0.2491, -0.4205]) tensor([0.3288, 0.2377, 0.2867, 0.1468]) -Greedy action tensor([ 0.3401, 0.1582, 0.1077, -0.2358]) tensor([0.3136, 0.2615, 0.2486, 0.1763]) -Greedy action tensor([ 0.5452, -0.1566, 0.0900, -0.3989]) tensor([0.3970, 0.1968, 0.2518, 0.1544]) -Greedy action tensor([ 0.2575, 0.0885, -0.0432, 0.1182]) tensor([0.2895, 0.2444, 0.2143, 0.2518]) -Greedy action tensor([ 0.2892, 0.1433, 0.2829, -0.2529]) tensor([0.2907, 0.2513, 0.2889, 0.1691]) -Greedy action tensor([ 0.7343, -0.1825, 0.0487, -0.5840]) tensor([0.4606, 0.1841, 0.2320, 0.1232]) -Greedy action tensor([ 0.7266, -0.5232, 0.0288, -0.5549]) tensor([0.4850, 0.1390, 0.2414, 0.1347]) -Greedy action tensor([ 0.3618, 0.0632, 0.1215, -0.1076]) tensor([0.3171, 0.2352, 0.2494, 0.1983]) -Greedy action tensor([ 0.8483, -0.4455, -0.0224, -0.4381]) tensor([0.5078, 0.1393, 0.2126, 0.1403]) -Greedy action tensor([ 0.4383, 0.0468, 0.0148, -0.2931]) tensor([0.3556, 0.2404, 0.2328, 0.1711]) -Greedy action tensor([ 0.5699, -0.2246, 0.2458, -0.4396]) tensor([0.3938, 0.1779, 0.2848, 0.1435]) -Greedy action tensor([ 1.0293, -0.4490, -0.0797, -0.8117]) tensor([0.5826, 0.1328, 0.1922, 0.0924]) -Greedy action tensor([ 0.2770, 0.0802, 0.0471, -0.2349]) tensor([0.3110, 0.2554, 0.2471, 0.1864]) -Greedy action tensor([ 0.2772, 0.0454, 0.1207, -0.3106]) tensor([0.3121, 0.2475, 0.2669, 0.1734]) -Greedy action tensor([ 0.1609, 0.2068, 0.1772, -0.2028]) tensor([0.2661, 0.2786, 0.2704, 0.1849]) -Greedy action tensor([ 0.5777, -0.0302, -0.0021, -0.3282]) tensor([0.3986, 0.2170, 0.2232, 0.1611]) -Greedy action tensor([ 0.1443, -0.0154, 0.2505, -0.0802]) tensor([0.2657, 0.2265, 0.2955, 0.2123]) -Greedy action tensor([ 0.9485, -0.5619, 0.0907, -0.6712]) tensor([0.5426, 0.1198, 0.2301, 0.1074]) -Greedy action tensor([ 0.3657, 0.0243, 0.1936, -0.3941]) tensor([0.3311, 0.2353, 0.2787, 0.1549]) -Greedy action tensor([ 0.3905, 0.0542, 0.1698, -0.3613]) tensor([0.3347, 0.2391, 0.2684, 0.1578]) -Greedy action tensor([ 0.6439, -0.2044, -0.1694, -0.5668]) tensor([0.4609, 0.1973, 0.2044, 0.1374]) -Greedy action tensor([ 0.1975, 0.0271, 0.1296, -0.2517]) tensor([0.2928, 0.2469, 0.2735, 0.1868]) -Greedy action tensor([ 0.4506, -0.0175, 0.0960, -0.3809]) tensor([0.3619, 0.2266, 0.2539, 0.1576]) -Greedy action tensor([ 0.5999, -0.1109, -0.0369, -0.3632]) tensor([0.4163, 0.2045, 0.2202, 0.1589]) -Greedy action tensor([ 0.2124, 0.2049, 0.1280, -0.3311]) tensor([0.2864, 0.2842, 0.2632, 0.1663]) -Greedy action tensor([ 0.2262, 0.1187, -0.0398, 0.0354]) tensor([0.2865, 0.2573, 0.2196, 0.2367]) -Greedy action tensor([-0.6037, -0.4881, 0.2527, 0.0184]) tensor([0.1577, 0.1771, 0.3714, 0.2938]) -Greedy action tensor([-1.0640, -0.3269, -0.4071, -0.1540]) tensor([0.1333, 0.2785, 0.2571, 0.3311]) -Greedy action tensor([-1.3995, -0.6509, 0.0393, -0.2666]) tensor([0.0958, 0.2026, 0.4040, 0.2975]) -Greedy action tensor([-1.3104, -0.5252, 0.6176, -0.0485]) tensor([0.0735, 0.1612, 0.5055, 0.2597]) -Greedy action tensor([-1.4704, -0.4897, 0.4508, -0.0528]) tensor([0.0684, 0.1823, 0.4670, 0.2822]) -Greedy action tensor([-1.3708, -0.5929, 0.3704, 0.1450]) tensor([0.0744, 0.1620, 0.4246, 0.3389]) -Greedy action tensor([-1.7763, -0.7927, 0.2084, -0.3926]) tensor([0.0669, 0.1790, 0.4871, 0.2670]) -Greedy action tensor([-1.4970, -0.5805, 0.4393, 0.0711]) tensor([0.0657, 0.1642, 0.4552, 0.3150]) -Greedy action tensor([-1.8818, -0.5900, 0.4730, -0.1798]) tensor([0.0484, 0.1762, 0.5100, 0.2655]) -Greedy action tensor([-1.4383, -0.5662, 0.5062, 0.0184]) tensor([0.0682, 0.1630, 0.4763, 0.2925]) -Greedy action tensor([-1.8069, -0.4896, 0.1648, -0.4380]) tensor([0.0631, 0.2356, 0.4533, 0.2480]) -Greedy action tensor([-1.9005, -0.7320, 0.7285, 0.0137]) tensor([0.0402, 0.1294, 0.5576, 0.2728]) -Greedy action tensor([-1.1191, 0.0400, 0.0972, -0.4213]) tensor([0.1045, 0.3330, 0.3526, 0.2099]) -Greedy action tensor([-1.6634, -0.7278, 0.7672, -0.0173]) tensor([0.0497, 0.1268, 0.5654, 0.2580]) -Greedy action tensor([-1.8189, -0.9309, 0.0564, -0.4794]) tensor([0.0726, 0.1765, 0.4737, 0.2772]) -Greedy action tensor([-1.3271, -0.4370, -0.2324, -0.4238]) tensor([0.1125, 0.2739, 0.3361, 0.2775]) -Greedy action tensor([-2.0174, -0.6940, 0.9674, 0.1658]) tensor([0.0299, 0.1124, 0.5920, 0.2656]) -Greedy action tensor([-1.5118, -0.5322, 0.4155, 0.1335]) tensor([0.0636, 0.1695, 0.4372, 0.3297]) -Greedy action tensor([-2.0084, -0.8446, 0.5454, -0.0466]) tensor([0.0414, 0.1325, 0.5319, 0.2943]) -Greedy action tensor([-1.1716, -0.4842, 0.3612, -0.0696]) tensor([0.0941, 0.1871, 0.4357, 0.2832]) -Greedy action tensor([-1.7665, -0.6666, 0.2523, -0.1100]) tensor([0.0596, 0.1791, 0.4489, 0.3124]) -Greedy action tensor([-1.6186, -0.6579, 0.2113, -0.2626]) tensor([0.0728, 0.1904, 0.4541, 0.2827]) -Greedy action tensor([-1.9151, -0.5198, 0.8309, 0.0643]) tensor([0.0359, 0.1449, 0.5593, 0.2599]) -Greedy action tensor([-1.8749, -1.0093, 0.2099, -0.5104]) tensor([0.0652, 0.1550, 0.5245, 0.2553]) -Greedy action tensor([-0.8634, 0.0698, -0.5482, 0.0755]) tensor([0.1339, 0.3404, 0.1835, 0.3423]) -Greedy action tensor([-1.8821, -0.6691, 0.5258, 0.0413]) tensor([0.0448, 0.1507, 0.4978, 0.3067]) -Greedy action tensor([-0.3112, -0.2572, 0.1673, 0.1614]) tensor([0.1896, 0.2002, 0.3060, 0.3042]) -Greedy action tensor([-1.6178, -0.7973, 0.8125, -0.2248]) tensor([0.0536, 0.1217, 0.6089, 0.2158]) -Greedy action tensor([-2.0333, -0.8417, 0.6950, 0.0239]) tensor([0.0365, 0.1201, 0.5582, 0.2853]) -Greedy action tensor([-1.4611, -0.5477, 0.4356, 0.2184]) tensor([0.0644, 0.1606, 0.4294, 0.3455]) -Greedy action tensor([-1.4904, -0.4897, 0.1522, -0.1819]) tensor([0.0794, 0.2161, 0.4106, 0.2939]) -Greedy action tensor([-1.9770, -0.6250, 0.7131, -0.1150]) tensor([0.0384, 0.1485, 0.5659, 0.2472]) -Greedy action tensor([-1.1910, -0.5188, 0.4992, -0.2609]) tensor([0.0916, 0.1795, 0.4967, 0.2322]) -Greedy action tensor([-1.5335, -0.1854, 0.5958, -0.3682]) tensor([0.0607, 0.2338, 0.5107, 0.1948]) -Greedy action tensor([-1.7189, -0.7682, 0.1990, -0.3628]) tensor([0.0701, 0.1813, 0.4768, 0.2719]) -Greedy action tensor([-1.1963, -0.2157, 0.5611, -0.5687]) tensor([0.0882, 0.2352, 0.5114, 0.1652]) -Greedy action tensor([-1.9429, -0.9694, 0.3692, -0.3525]) tensor([0.0536, 0.1419, 0.5414, 0.2631]) -Greedy action tensor([-1.6415, -0.5551, 0.5239, 0.0027]) tensor([0.0560, 0.1659, 0.4882, 0.2899]) -Greedy action tensor([-0.9754, 0.0517, -0.0314, -0.2123]) tensor([0.1175, 0.3283, 0.3021, 0.2521]) -Greedy action tensor([-1.5685, -0.5505, 0.4685, 0.0355]) tensor([0.0609, 0.1687, 0.4673, 0.3031]) -Greedy action tensor([-0.8535, 0.3422, 0.3210, -0.3252]) tensor([0.1082, 0.3578, 0.3503, 0.1836]) -Greedy action tensor([-1.4009, 0.0038, 0.1132, -0.5522]) tensor([0.0836, 0.3408, 0.3802, 0.1954]) -Greedy action tensor([-0.9330, -0.5092, 0.5107, -0.0206]) tensor([0.1081, 0.1651, 0.4578, 0.2691]) -Greedy action tensor([-1.4822, -0.4084, 0.2052, -0.1792]) tensor([0.0769, 0.2249, 0.4154, 0.2828]) -Greedy action tensor([-1.1104, -0.4223, -0.2727, -0.4061]) tensor([0.1366, 0.2717, 0.3156, 0.2762]) -Greedy action tensor([-0.9351, 0.3064, -0.3997, 0.3087]) tensor([0.1038, 0.3591, 0.1772, 0.3599]) -Greedy action tensor([-1.9998, -0.7747, 1.2438, 0.3367]) tensor([0.0248, 0.0843, 0.6347, 0.2562]) -Greedy action tensor([-1.1522, -0.3030, 0.4046, -0.4068]) tensor([0.0981, 0.2295, 0.4656, 0.2068]) -Greedy action tensor([-0.9984, 0.3592, 0.0540, -0.1776]) tensor([0.0998, 0.3878, 0.2858, 0.2267]) -Greedy action tensor([-1.9328, -0.8390, 0.1567, -0.2853]) tensor([0.0579, 0.1730, 0.4682, 0.3009]) -Greedy action tensor([-1.5909, -0.6365, 0.3062, -0.4127]) tensor([0.0740, 0.1922, 0.4934, 0.2404]) -Greedy action tensor([-1.7155, -0.9891, 0.0944, -0.8059]) tensor([0.0858, 0.1773, 0.5240, 0.2130]) -Greedy action tensor([-1.3962, -0.6615, -0.2199, -0.4083]) tensor([0.1110, 0.2313, 0.3598, 0.2980]) -Greedy action tensor([-1.5044, -0.3247, 0.4586, -0.2713]) tensor([0.0675, 0.2197, 0.4809, 0.2318]) -Greedy action tensor([-1.5490, -0.5787, 0.4651, 0.0624]) tensor([0.0619, 0.1635, 0.4642, 0.3103]) -Greedy action tensor([-1.7070, -0.7912, 0.2342, -0.4158]) tensor([0.0709, 0.1772, 0.4940, 0.2579]) -Greedy action tensor([-1.9885, -0.9120, 0.3261, -0.3263]) tensor([0.0517, 0.1518, 0.5237, 0.2727]) -Greedy action tensor([-1.9656, -0.9792, 0.4492, -0.1921]) tensor([0.0482, 0.1292, 0.5389, 0.2838]) -Greedy action tensor([-1.9817, -0.7296, 0.7459, 0.0183]) tensor([0.0368, 0.1287, 0.5627, 0.2718]) -Greedy action tensor([-1.9056, -0.9686, 0.2114, -0.4410]) tensor([0.0618, 0.1577, 0.5132, 0.2673]) -Greedy action tensor([-1.7888, -0.5473, 0.7640, -0.0667]) tensor([0.0437, 0.1511, 0.5608, 0.2444]) -Greedy action tensor([-1.8065, -0.7089, 0.2309, -0.0900]) tensor([0.0580, 0.1739, 0.4451, 0.3229]) -Greedy action tensor([-1.5899, -0.6987, -0.0993, -0.3662]) tensor([0.0887, 0.2162, 0.3937, 0.3015]) -Greedy action tensor([-1.4667, -0.6316, 0.4823, -0.0675]) tensor([0.0695, 0.1603, 0.4884, 0.2818]) -Greedy action tensor([-1.5768, -0.5326, 0.6449, -0.0669]) tensor([0.0568, 0.1615, 0.5243, 0.2573]) -Greedy action tensor([-1.3280, -0.5785, 0.3534, 0.1354]) tensor([0.0781, 0.1652, 0.4194, 0.3373]) -Greedy action tensor([-1.0082, -0.5513, 0.5389, -0.3807]) tensor([0.1093, 0.1726, 0.5134, 0.2047]) -Greedy action tensor([-1.5614, -0.5867, 0.5488, -0.1242]) tensor([0.0621, 0.1645, 0.5121, 0.2613]) -Greedy action tensor([-1.8673, -0.5733, 0.4951, -0.1782]) tensor([0.0484, 0.1764, 0.5134, 0.2619]) -Greedy action tensor([-1.8639, -0.9549, 0.2491, -0.3656]) tensor([0.0616, 0.1529, 0.5098, 0.2757]) -Greedy action tensor([-1.6597, -0.1685, 0.4395, -0.1156]) tensor([0.0547, 0.2429, 0.4462, 0.2561]) -Greedy action tensor([-2.0358, -0.9161, 0.6372, -0.1280]) tensor([0.0395, 0.1212, 0.5728, 0.2665]) -Greedy action tensor([-1.8368, -0.5618, 0.7991, 0.0597]) tensor([0.0397, 0.1420, 0.5539, 0.2644]) -Greedy action tensor([-2.0080, -0.8881, 0.2883, -0.2047]) tensor([0.0498, 0.1527, 0.4951, 0.3024]) -Greedy action tensor([-1.6720, -0.5354, 0.5254, -0.0206]) tensor([0.0545, 0.1700, 0.4910, 0.2844]) -Greedy action tensor([-1.9403, -0.8622, 0.1307, -0.3553]) tensor([0.0597, 0.1755, 0.4736, 0.2913]) -Greedy action tensor([-0.9952, -0.3745, -0.4048, -0.1779]) tensor([0.1443, 0.2685, 0.2604, 0.3268]) -Greedy action tensor([-0.6553, -0.5945, 0.2815, 0.3658]) tensor([0.1353, 0.1438, 0.3453, 0.3756]) -Greedy action tensor([-2.0000, -0.9694, 0.5054, -0.2772]) tensor([0.0462, 0.1295, 0.5657, 0.2587]) -Greedy action tensor([-1.5498, -0.2718, 0.5441, -0.2344]) tensor([0.0609, 0.2184, 0.4939, 0.2268]) -Greedy action tensor([-0.8685, -0.4917, 0.4510, -0.2136]) tensor([0.1231, 0.1794, 0.4605, 0.2369]) -Greedy action tensor([ 1.3734, -0.2995, 0.6581, 0.7337]) tensor([0.4537, 0.0852, 0.2219, 0.2393]) -Greedy action tensor([ 1.2089, -0.7683, 0.9421, 0.8078]) tensor([0.3885, 0.0538, 0.2975, 0.2601]) -Greedy action tensor([ 1.2051, -0.2974, -0.5491, -0.1154]) tensor([0.6015, 0.1339, 0.1041, 0.1606]) -Greedy action tensor([ 0.6114, -1.1023, 0.7249, 0.4223]) tensor([0.3197, 0.0576, 0.3581, 0.2646]) -Greedy action tensor([ 1.2774, -0.5772, -0.0115, 0.8380]) tensor([0.4816, 0.0754, 0.1327, 0.3103]) -Greedy action tensor([ 1.8373, -0.7881, 1.0377, 1.2413]) tensor([0.4824, 0.0349, 0.2168, 0.2658]) -Greedy action tensor([ 1.2476, -0.5264, 0.8021, 0.6564]) tensor([0.4231, 0.0718, 0.2710, 0.2342]) -Greedy action tensor([ 0.9700, -0.5409, 1.2551, 0.9799]) tensor([0.2808, 0.0620, 0.3735, 0.2837]) -Greedy action tensor([ 1.5445, -0.3695, 0.6308, 2.2514]) tensor([0.2796, 0.0412, 0.1121, 0.5670]) -Greedy action tensor([ 1.3768, -0.2560, -0.1467, 1.0364]) tensor([0.4706, 0.0919, 0.1026, 0.3348]) -Greedy action tensor([ 1.3634, -0.3918, -0.1852, 1.0623]) tensor([0.4705, 0.0813, 0.1000, 0.3482]) -Greedy action tensor([ 0.9426, -0.8452, 0.8164, 0.9603]) tensor([0.3261, 0.0546, 0.2874, 0.3319]) -Greedy action tensor([ 1.6833, -0.6194, 1.5055, 1.6561]) tensor([0.3436, 0.0344, 0.2876, 0.3344]) -Greedy action tensor([ 2.1249, -0.8032, 0.9921, 1.6735]) tensor([0.4969, 0.0266, 0.1601, 0.3164]) -Greedy action tensor([ 0.8113, -0.5923, 1.1065, 0.5713]) tensor([0.2962, 0.0728, 0.3980, 0.2330]) -Greedy action tensor([ 1.8918, 0.0957, -0.3835, 0.6515]) tensor([0.6419, 0.1065, 0.0660, 0.1857]) -Greedy action tensor([ 1.5662, -0.6092, 1.2470, 0.8539]) tensor([0.4290, 0.0487, 0.3118, 0.2104]) -Greedy action tensor([ 1.7202, -0.4660, 1.2683, 0.5939]) tensor([0.4824, 0.0542, 0.3070, 0.1564]) -Greedy action tensor([ 1.7314, -0.4798, 1.1794, 1.7389]) tensor([0.3714, 0.0407, 0.2138, 0.3741]) -Greedy action tensor([ 0.4993, -0.1929, 0.4935, 1.1390]) tensor([0.2278, 0.1140, 0.2264, 0.4318]) -Greedy action tensor([ 0.6727, 0.2666, 0.4203, -0.7028]) tensor([0.3709, 0.2471, 0.2882, 0.0937]) -Greedy action tensor([ 0.7988, -0.3144, 0.4995, 0.7132]) tensor([0.3347, 0.1100, 0.2481, 0.3072]) -Greedy action tensor([ 1.4848, 0.4841, -1.0667, 0.7943]) tensor([0.5136, 0.1888, 0.0400, 0.2575]) -Greedy action tensor([ 1.5249, -0.0138, 0.8920, 1.2227]) tensor([0.4024, 0.0864, 0.2137, 0.2975]) -Greedy action tensor([ 1.9395, -0.3720, 0.7150, 1.5795]) tensor([0.4783, 0.0474, 0.1406, 0.3337]) -Greedy action tensor([ 1.1022, -0.3728, 0.8363, 0.8122]) tensor([0.3645, 0.0834, 0.2794, 0.2727]) -Greedy action tensor([ 2.0667, -0.3383, 0.6672, 1.7467]) tensor([0.4847, 0.0438, 0.1196, 0.3520]) -Greedy action tensor([ 1.0667, -1.0712, 0.1579, 0.7884]) tensor([0.4390, 0.0518, 0.1769, 0.3323]) -Greedy action tensor([ 1.4730, -1.3069, 0.4297, 0.6465]) tensor([0.5400, 0.0335, 0.1902, 0.2363]) -Greedy action tensor([ 2.2118, -0.6182, 0.3909, 1.0320]) tensor([0.6543, 0.0386, 0.1059, 0.2011]) -Greedy action tensor([ 1.5602, -1.2111, 1.1022, 1.6081]) tensor([0.3644, 0.0228, 0.2305, 0.3823]) -Greedy action tensor([ 1.6823, -0.8130, 0.8192, 0.7219]) tensor([0.5299, 0.0437, 0.2236, 0.2028]) -Greedy action tensor([ 0.3336, -0.0164, 0.5664, 1.2511]) tensor([0.1828, 0.1288, 0.2308, 0.4576]) -Greedy action tensor([1.6411, 0.1579, 0.3537, 1.9739]) tensor([0.3451, 0.0783, 0.0952, 0.4813]) -Greedy action tensor([ 1.3377, -0.4276, 0.6235, 1.6259]) tensor([0.3339, 0.0571, 0.1635, 0.4455]) -Greedy action tensor([ 1.8265, -0.0955, -0.0516, 1.4958]) tensor([0.4956, 0.0725, 0.0758, 0.3561]) -Greedy action tensor([2.0925, 0.5154, 1.2697, 1.6228]) tensor([0.4403, 0.0910, 0.1934, 0.2753]) -Greedy action tensor([ 1.0625, -0.1235, 0.5958, 0.8959]) tensor([0.3598, 0.1099, 0.2256, 0.3046]) -Greedy action tensor([0.7465, 0.0436, 0.7275, 1.1440]) tensor([0.2523, 0.1249, 0.2475, 0.3754]) -Greedy action tensor([ 1.2597, -0.1791, 0.8182, 0.7006]) tensor([0.4078, 0.0967, 0.2623, 0.2332]) -Greedy action tensor([ 1.4698, -0.8894, 0.4889, 1.6158]) tensor([0.3807, 0.0360, 0.1428, 0.4406]) -Greedy action tensor([1.4169, 0.0194, 1.0333, 1.3355]) tensor([0.3508, 0.0867, 0.2391, 0.3234]) -Greedy action tensor([ 1.1632, -0.5387, 1.2558, 0.8567]) tensor([0.3316, 0.0605, 0.3638, 0.2441]) -Greedy action tensor([ 1.3217, -0.9772, 1.0986, 0.6413]) tensor([0.4155, 0.0417, 0.3324, 0.2104]) -Greedy action tensor([ 0.9824, -0.7814, 1.0991, -0.0093]) tensor([0.3751, 0.0643, 0.4215, 0.1391]) -Greedy action tensor([ 1.9750, -0.8230, 1.1773, 1.6954]) tensor([0.4410, 0.0269, 0.1986, 0.3335]) -Greedy action tensor([ 2.0374, -0.2802, 0.5886, 1.9124]) tensor([0.4513, 0.0445, 0.1060, 0.3983]) -Greedy action tensor([1.4988, 0.3084, 0.7085, 0.7111]) tensor([0.4519, 0.1374, 0.2050, 0.2056]) -Greedy action tensor([ 0.6128, -1.1955, 0.9967, 0.2191]) tensor([0.3024, 0.0496, 0.4440, 0.2040]) -Greedy action tensor([1.2178, 0.0181, 1.5098, 0.4795]) tensor([0.3207, 0.0966, 0.4294, 0.1533]) -Greedy action tensor([ 1.7258, -0.8500, 1.3152, 1.4755]) tensor([0.3972, 0.0302, 0.2634, 0.3092]) -Greedy action tensor([ 1.4501, -0.4476, 0.0449, 1.4507]) tensor([0.4174, 0.0626, 0.1024, 0.4176]) -Greedy action tensor([ 1.2130, -0.7551, 1.1386, 0.5160]) tensor([0.3897, 0.0544, 0.3618, 0.1941]) -Greedy action tensor([ 1.3724, -0.1233, 0.5712, 0.1147]) tensor([0.5109, 0.1145, 0.2293, 0.1453]) -Greedy action tensor([1.1235, 0.3991, 0.8912, 1.7867]) tensor([0.2371, 0.1149, 0.1879, 0.4601]) -Greedy action tensor([ 1.4575, -0.5807, 1.2121, 0.8842]) tensor([0.4038, 0.0526, 0.3160, 0.2276]) -Greedy action tensor([ 1.6955, -0.9270, -0.2952, 1.3249]) tensor([0.5264, 0.0382, 0.0719, 0.3634]) -Greedy action tensor([ 1.0706, 0.2270, -0.7625, 0.0601]) tensor([0.5117, 0.2201, 0.0818, 0.1863]) -Greedy action tensor([ 1.4531, -0.4363, -0.6027, 0.9929]) tensor([0.5235, 0.0791, 0.0670, 0.3304]) -Greedy action tensor([1.1245, 0.2989, 0.1374, 1.0691]) tensor([0.3627, 0.1589, 0.1352, 0.3432]) -Greedy action tensor([ 1.1370, -0.9186, 0.3333, 0.5514]) tensor([0.4689, 0.0600, 0.2099, 0.2611]) -Greedy action tensor([ 1.2060, -0.2790, 1.4029, 0.6223]) tensor([0.3331, 0.0755, 0.4056, 0.1858]) -Greedy action tensor([ 1.2798, -0.5441, 0.5200, 0.8171]) tensor([0.4427, 0.0715, 0.2071, 0.2787]) -Greedy action tensor([ 1.7038, -0.6316, 0.7783, 2.0050]) tensor([0.3515, 0.0340, 0.1393, 0.4751]) -Greedy action tensor([1.0100, 0.3762, 0.7226, 1.9079]) tensor([0.2112, 0.1120, 0.1584, 0.5183]) -Greedy action tensor([ 1.9313, 0.1768, -0.0991, 0.3106]) tensor([0.6658, 0.1152, 0.0874, 0.1317]) -Greedy action tensor([0.8298, 0.3370, 1.0811, 0.4655]) tensor([0.2785, 0.1701, 0.3580, 0.1934]) -Greedy action tensor([ 0.6465, -0.4785, -0.0612, 0.4524]) tensor([0.3786, 0.1229, 0.1866, 0.3118]) -Greedy action tensor([ 1.3830, -0.6565, 1.1861, 1.1692]) tensor([0.3625, 0.0472, 0.2977, 0.2927]) -Greedy action tensor([ 2.0389, -0.6381, 0.8363, 1.4505]) tensor([0.5196, 0.0357, 0.1561, 0.2885]) -Greedy action tensor([ 1.3151, -0.0983, 0.4445, 1.0974]) tensor([0.4055, 0.0986, 0.1698, 0.3261]) -Greedy action tensor([ 1.2785, -0.3201, 1.3183, 0.6614]) tensor([0.3594, 0.0727, 0.3740, 0.1939]) -Greedy action tensor([ 1.4836, -1.4248, 1.4823, 1.2668]) tensor([0.3498, 0.0191, 0.3494, 0.2817]) -Greedy action tensor([ 0.9492, -0.8729, 0.4003, 0.5190]) tensor([0.4185, 0.0677, 0.2417, 0.2722]) -Greedy action tensor([1.5902, 0.1586, 0.8863, 1.7897]) tensor([0.3385, 0.0809, 0.1674, 0.4132]) -Greedy action tensor([0.8982, 0.1447, 0.5554, 0.0613]) tensor([0.3826, 0.1801, 0.2716, 0.1657]) -Greedy action tensor([ 1.9564, -0.7509, 0.8733, 1.3938]) tensor([0.5063, 0.0338, 0.1714, 0.2885]) -Greedy action tensor([ 1.7526, -0.3226, -0.1646, 1.6246]) tensor([0.4646, 0.0583, 0.0683, 0.4088]) -Greedy action tensor([1.4094, 0.0321, 0.9315, 1.8153]) tensor([0.2965, 0.0748, 0.1838, 0.4449]) -Greedy action tensor([ 1.9257, -1.2008, 0.3088, 1.9816]) tensor([0.4348, 0.0191, 0.0863, 0.4598]) -Greedy action tensor([ 1.0526, -1.5834, 0.6789, 0.5816]) tensor([0.4194, 0.0300, 0.2887, 0.2619]) -Greedy action tensor([ 1.5072, -0.0413, 0.4631, 0.8743]) tensor([0.4772, 0.1014, 0.1680, 0.2534]) -Greedy action tensor([ 0.4038, -0.4038, 0.1305, -0.0957]) tensor([0.3554, 0.1585, 0.2704, 0.2157]) -Greedy action tensor([ 0.7960, -0.3546, -0.2328, 0.1481]) tensor([0.4552, 0.1440, 0.1627, 0.2381]) -Greedy action tensor([ 0.8645, -0.1196, -0.0313, 0.1149]) tensor([0.4435, 0.1658, 0.1811, 0.2096]) -Greedy action tensor([ 0.8113, -0.4744, -0.3019, -0.0786]) tensor([0.4961, 0.1372, 0.1630, 0.2038]) -Greedy action tensor([ 0.8383, -0.4001, 0.3021, -0.2384]) tensor([0.4514, 0.1308, 0.2640, 0.1538]) -Greedy action tensor([ 1.0896, 0.0183, -0.1083, -0.0973]) tensor([0.5129, 0.1757, 0.1548, 0.1565]) -Greedy action tensor([ 1.0907, -0.3995, -0.4194, 0.1746]) tensor([0.5416, 0.1220, 0.1196, 0.2167]) -Greedy action tensor([ 1.2510, -0.2603, -0.4779, -0.2004]) tensor([0.6126, 0.1352, 0.1087, 0.1435]) -Greedy action tensor([ 1.6944, -0.5076, -0.2237, 0.3779]) tensor([0.6555, 0.0725, 0.0963, 0.1757]) -Greedy action tensor([ 1.6683, -0.5976, -0.2637, 0.2922]) tensor([0.6662, 0.0691, 0.0965, 0.1682]) -Greedy action tensor([ 0.7626, -0.4434, -0.1818, 0.1608]) tensor([0.4472, 0.1339, 0.1739, 0.2450]) -Greedy action tensor([ 1.4565, -0.5421, -0.3532, -0.1931]) tensor([0.6705, 0.0909, 0.1098, 0.1288]) -Greedy action tensor([ 0.9897, -0.4549, -0.1919, 0.1846]) tensor([0.5026, 0.1185, 0.1542, 0.2247]) -Greedy action tensor([ 1.1731, -0.7125, -0.4524, 0.1912]) tensor([0.5803, 0.0881, 0.1142, 0.2174]) -Greedy action tensor([ 1.5134, -0.0936, -0.1347, -0.1146]) tensor([0.6292, 0.1262, 0.1211, 0.1235]) -Greedy action tensor([ 1.5272, -0.5721, -0.2466, 0.2960]) tensor([0.6313, 0.0774, 0.1071, 0.1843]) -Greedy action tensor([ 1.5084, -0.6049, -0.2285, 0.0103]) tensor([0.6577, 0.0795, 0.1158, 0.1470]) -Greedy action tensor([ 1.2052, -0.0347, 0.0683, -0.1484]) tensor([0.5352, 0.1549, 0.1717, 0.1382]) -Greedy action tensor([ 1.3324, -0.5223, -0.4026, -0.2035]) tensor([0.6459, 0.1011, 0.1139, 0.1390]) -Greedy action tensor([ 1.0243, -0.4476, -0.1354, 0.0530]) tensor([0.5204, 0.1194, 0.1632, 0.1970]) -Greedy action tensor([ 1.4413, -0.2117, -0.2169, -0.1504]) tensor([0.6307, 0.1208, 0.1201, 0.1284]) -Greedy action tensor([ 1.3966, -0.3960, -0.2549, -0.0278]) tensor([0.6254, 0.1042, 0.1199, 0.1505]) -Greedy action tensor([ 1.1039, -0.2808, -0.0429, -0.1926]) tensor([0.5430, 0.1360, 0.1725, 0.1485]) -Greedy action tensor([ 1.2735, -0.4443, -0.1546, 0.1997]) tensor([0.5679, 0.1019, 0.1362, 0.1940]) -Greedy action tensor([ 0.7190, -0.3525, -0.1693, 0.0600]) tensor([0.4403, 0.1508, 0.1811, 0.2278]) -Greedy action tensor([ 1.0941, -0.5218, -0.2743, 0.1629]) tensor([0.5413, 0.1076, 0.1378, 0.2133]) -Greedy action tensor([ 0.7034, -0.5649, -0.3602, 0.0737]) tensor([0.4631, 0.1303, 0.1599, 0.2467]) -Greedy action tensor([ 0.5497, -0.4794, 0.0241, -0.1253]) tensor([0.4069, 0.1454, 0.2406, 0.2072]) -Greedy action tensor([ 1.2842, -0.5989, -0.0518, 0.2005]) tensor([0.5703, 0.0868, 0.1499, 0.1930]) -Greedy action tensor([ 0.7603, -0.5225, -0.0286, 0.0402]) tensor([0.4508, 0.1250, 0.2048, 0.2194]) -Greedy action tensor([ 0.7096, -0.4055, 0.0113, -0.1764]) tensor([0.4469, 0.1465, 0.2223, 0.1843]) -Greedy action tensor([ 1.1708, -0.2720, -0.2417, -0.1438]) tensor([0.5719, 0.1351, 0.1393, 0.1536]) -Greedy action tensor([ 1.6504, -0.7428, -0.3607, 0.2525]) tensor([0.6792, 0.0620, 0.0909, 0.1678]) -Greedy action tensor([ 1.1685, -0.6692, -0.6189, 0.2417]) tensor([0.5806, 0.0924, 0.0972, 0.2298]) -Greedy action tensor([ 0.6988, -0.5442, 0.0559, -0.0480]) tensor([0.4370, 0.1261, 0.2298, 0.2071]) -Greedy action tensor([ 0.8923, -0.5281, 0.0675, -0.0877]) tensor([0.4866, 0.1176, 0.2133, 0.1826]) -Greedy action tensor([ 0.9980, -0.6684, -0.1456, 0.2338]) tensor([0.5068, 0.0957, 0.1615, 0.2360]) -Greedy action tensor([ 1.4151, -0.5414, -0.4535, 0.1491]) tensor([0.6339, 0.0896, 0.0978, 0.1787]) -Greedy action tensor([ 1.4723, -0.4011, -0.4795, -0.1892]) tensor([0.6732, 0.1034, 0.0956, 0.1278]) -Greedy action tensor([ 1.5433, -0.2150, -0.0556, -0.1233]) tensor([0.6397, 0.1102, 0.1293, 0.1208]) -Greedy action tensor([ 1.1030, -0.3953, -0.0430, -0.2050]) tensor([0.5519, 0.1234, 0.1755, 0.1492]) -Greedy action tensor([ 0.8906, -0.6057, -0.1440, 0.2158]) tensor([0.4788, 0.1072, 0.1702, 0.2438]) -Greedy action tensor([ 1.3800, -0.5639, -0.3929, 0.2922]) tensor([0.6061, 0.0868, 0.1029, 0.2042]) -Greedy action tensor([ 1.1928, -0.4831, -0.2632, -0.1045]) tensor([0.5905, 0.1105, 0.1377, 0.1614]) -Greedy action tensor([ 1.1534, -0.5901, -0.1101, 0.5672]) tensor([0.4965, 0.0868, 0.1403, 0.2763]) -Greedy action tensor([ 1.4243, -0.3536, -0.1563, -0.1314]) tensor([0.6306, 0.1066, 0.1298, 0.1331]) -Greedy action tensor([ 0.8481, -0.4205, -0.2246, 0.0024]) tensor([0.4872, 0.1370, 0.1667, 0.2091]) -Greedy action tensor([ 1.1424, -0.2345, -0.1748, -0.0246]) tensor([0.5460, 0.1378, 0.1463, 0.1700]) -Greedy action tensor([ 0.7657, -0.6694, -0.2683, 0.0693]) tensor([0.4780, 0.1138, 0.1700, 0.2382]) -Greedy action tensor([ 0.7887, -0.1707, -0.0643, 0.0880]) tensor([0.4337, 0.1662, 0.1848, 0.2152]) -Greedy action tensor([ 1.3480, -0.6344, -0.1903, -0.0074]) tensor([0.6210, 0.0855, 0.1334, 0.1601]) -Greedy action tensor([ 1.2574, -0.7103, -0.3181, 0.1998]) tensor([0.5903, 0.0825, 0.1221, 0.2050]) -Greedy action tensor([ 1.6735, -0.5363, -0.3754, 0.1321]) tensor([0.6884, 0.0755, 0.0887, 0.1474]) -Greedy action tensor([ 1.2239, -0.5182, -0.2332, -0.2557]) tensor([0.6113, 0.1071, 0.1424, 0.1392]) -Greedy action tensor([ 1.5048, -0.7265, -0.6361, 0.2461]) tensor([0.6627, 0.0712, 0.0779, 0.1882]) -Greedy action tensor([ 0.7359, -0.3467, -0.1181, -0.1358]) tensor([0.4581, 0.1552, 0.1950, 0.1916]) -Greedy action tensor([ 0.9849, -0.4934, -0.0997, -0.2607]) tensor([0.5394, 0.1230, 0.1823, 0.1552]) -Greedy action tensor([ 0.6966, -0.0777, -0.2344, 0.0629]) tensor([0.4191, 0.1932, 0.1652, 0.2224]) -Greedy action tensor([ 0.6876, -0.3489, -0.0024, -0.0429]) tensor([0.4277, 0.1517, 0.2145, 0.2060]) -Greedy action tensor([ 1.0443, -0.7382, -0.2503, -0.0513]) tensor([0.5629, 0.0947, 0.1542, 0.1882]) -Greedy action tensor([ 0.6472, -0.6739, -0.2210, 0.1157]) tensor([0.4397, 0.1173, 0.1846, 0.2584]) -Greedy action tensor([ 0.4002, -0.1247, 0.0467, -0.0965]) tensor([0.3446, 0.2038, 0.2419, 0.2097]) -Greedy action tensor([ 0.7369, -0.5184, -0.1085, 0.0034]) tensor([0.4557, 0.1299, 0.1957, 0.2188]) -Greedy action tensor([ 0.5386, -0.5381, 0.0219, 0.0187]) tensor([0.3950, 0.1346, 0.2356, 0.2348]) -Greedy action tensor([ 0.6979, -0.5292, -0.2243, 0.0242]) tensor([0.4544, 0.1332, 0.1807, 0.2317]) -Greedy action tensor([ 0.6506, -0.3550, -0.0200, -0.0598]) tensor([0.4222, 0.1544, 0.2159, 0.2075]) -Greedy action tensor([ 0.8571, -0.4492, -0.2044, 0.1084]) tensor([0.4785, 0.1296, 0.1655, 0.2263]) -Greedy action tensor([ 0.8259, -0.5804, -0.0354, 0.1209]) tensor([0.4626, 0.1134, 0.1955, 0.2286]) -Greedy action tensor([ 1.8767, -0.6663, -0.1997, 0.1985]) tensor([0.7190, 0.0565, 0.0902, 0.1343]) -Greedy action tensor([ 0.7303, -0.5211, -0.2611, -0.0782]) tensor([0.4756, 0.1361, 0.1765, 0.2119]) -Greedy action tensor([ 1.4917, -0.5110, -0.3290, 0.0387]) tensor([0.6533, 0.0882, 0.1058, 0.1528]) -Greedy action tensor([ 0.7865, -0.4884, -0.0209, 0.1500]) tensor([0.4435, 0.1239, 0.1978, 0.2347]) -Greedy action tensor([ 0.9730, -0.4840, -0.3294, -0.0426]) tensor([0.5356, 0.1248, 0.1456, 0.1940]) -Greedy action tensor([ 1.4173, -0.4128, -0.4173, -0.0697]) tensor([0.6468, 0.1037, 0.1033, 0.1462]) -Greedy action tensor([ 0.8945, -0.4152, -0.2317, 0.1114]) tensor([0.4875, 0.1316, 0.1581, 0.2228]) -Greedy action tensor([ 1.2356, -0.5844, -0.0600, 0.1823]) tensor([0.5604, 0.0908, 0.1534, 0.1954]) -Greedy action tensor([ 1.2689, -0.4516, -0.4537, 0.1635]) tensor([0.5922, 0.1060, 0.1058, 0.1961]) -Greedy action tensor([ 1.2332, -0.3324, 0.0104, -0.2258]) tensor([0.5761, 0.1204, 0.1696, 0.1339]) -Greedy action tensor([ 1.2275, -0.1083, -0.1982, -0.0523]) tensor([0.5614, 0.1476, 0.1349, 0.1561]) -Greedy action tensor([ 0.9645, -0.3439, -0.1421, -0.0422]) tensor([0.5086, 0.1374, 0.1682, 0.1858]) -Greedy action tensor([ 1.5532, -0.2431, -0.3478, -0.0284]) tensor([0.6575, 0.1091, 0.0982, 0.1352]) -Greedy action tensor([ 0.3491, -0.1239, 0.1483, -0.3961]) tensor([0.3430, 0.2137, 0.2806, 0.1628]) -Greedy action tensor([ 0.2189, 0.0911, 0.1202, -0.1608]) tensor([0.2882, 0.2536, 0.2611, 0.1971]) -Greedy action tensor([ 0.2802, 0.2939, 0.1357, -0.2196]) tensor([0.2869, 0.2908, 0.2483, 0.1740]) -Greedy action tensor([ 0.4338, 0.0542, 0.0335, -0.1542]) tensor([0.3437, 0.2351, 0.2303, 0.1909]) -Greedy action tensor([ 0.4779, -0.2993, -0.0463, -0.4510]) tensor([0.4087, 0.1879, 0.2420, 0.1614]) -Greedy action tensor([ 0.5272, -0.0598, 0.0319, -0.2736]) tensor([0.3825, 0.2127, 0.2331, 0.1717]) -Greedy action tensor([ 0.4382, -0.0329, 0.1380, -0.3585]) tensor([0.3551, 0.2217, 0.2630, 0.1601]) -Greedy action tensor([ 1.1223, -0.6642, -0.0387, -0.6124]) tensor([0.6034, 0.1011, 0.1890, 0.1065]) -Greedy action tensor([ 0.1807, -0.1051, -0.0039, -0.0800]) tensor([0.2982, 0.2241, 0.2479, 0.2298]) -Greedy action tensor([ 0.7005, -0.1978, -0.0402, -0.3940]) tensor([0.4507, 0.1836, 0.2149, 0.1509]) -Greedy action tensor([ 0.3079, 0.0440, 0.2478, -0.1109]) tensor([0.2970, 0.2281, 0.2796, 0.1953]) -Greedy action tensor([ 0.5770, -0.2526, -0.0446, -0.4902]) tensor([0.4315, 0.1883, 0.2318, 0.1484]) -Greedy action tensor([ 0.4993, -0.0136, 0.0448, -0.2212]) tensor([0.3676, 0.2201, 0.2334, 0.1789]) -Greedy action tensor([ 0.5398, -0.0490, 0.0731, -0.3664]) tensor([0.3867, 0.2146, 0.2425, 0.1562]) -Greedy action tensor([ 0.3578, 0.0330, 0.1165, -0.3521]) tensor([0.3333, 0.2409, 0.2619, 0.1639]) -Greedy action tensor([ 0.3256, 0.1853, 0.0739, -0.2082]) tensor([0.3093, 0.2688, 0.2405, 0.1814]) -Greedy action tensor([ 0.4953, 0.0115, -0.0944, -0.2610]) tensor([0.3787, 0.2335, 0.2100, 0.1778]) -Greedy action tensor([ 0.4876, -0.1037, -0.0230, -0.3384]) tensor([0.3859, 0.2136, 0.2316, 0.1689]) -Greedy action tensor([ 0.5463, 0.0496, 0.0986, -0.2375]) tensor([0.3698, 0.2250, 0.2363, 0.1689]) -Greedy action tensor([ 0.3567, 0.1092, -0.0495, -0.0216]) tensor([0.3193, 0.2493, 0.2127, 0.2187]) -Greedy action tensor([ 0.3001, 0.1705, 0.1016, -0.2333]) tensor([0.3044, 0.2674, 0.2496, 0.1786]) -Greedy action tensor([ 0.4260, -0.1744, 0.1998, -0.4616]) tensor([0.3626, 0.1989, 0.2892, 0.1493]) -Greedy action tensor([ 0.2149, -0.0028, 0.1897, -0.1507]) tensor([0.2879, 0.2316, 0.2807, 0.1997]) -Greedy action tensor([ 0.8070, -0.2529, -0.0844, -0.7093]) tensor([0.5061, 0.1753, 0.2075, 0.1111]) -Greedy action tensor([ 0.7372, -0.2539, -0.0234, -0.6751]) tensor([0.4803, 0.1783, 0.2245, 0.1170]) -Greedy action tensor([ 0.1311, 0.1262, 0.1877, -0.2061]) tensor([0.2655, 0.2642, 0.2809, 0.1895]) -Greedy action tensor([ 0.5376, -0.2218, 0.1144, -0.5078]) tensor([0.4041, 0.1891, 0.2647, 0.1421]) -Greedy action tensor([ 0.2188, 0.1452, 0.1729, -0.2284]) tensor([0.2838, 0.2637, 0.2711, 0.1815]) -Greedy action tensor([ 0.3262, 0.1644, 0.1374, -0.0515]) tensor([0.2973, 0.2529, 0.2461, 0.2038]) -Greedy action tensor([ 0.3750, 0.2211, 0.1269, -0.1861]) tensor([0.3117, 0.2672, 0.2432, 0.1779]) -Greedy action tensor([ 0.9458, -0.5241, -0.0243, -0.7633]) tensor([0.5586, 0.1285, 0.2118, 0.1011]) -Greedy action tensor([ 0.8208, -0.3568, 0.0604, -0.6186]) tensor([0.4969, 0.1530, 0.2323, 0.1178]) -Greedy action tensor([ 0.3164, 0.1063, 0.2228, -0.2638]) tensor([0.3048, 0.2470, 0.2776, 0.1706]) -Greedy action tensor([ 0.4247, 0.2422, 0.1017, -0.0032]) tensor([0.3116, 0.2596, 0.2256, 0.2031]) -Greedy action tensor([ 0.5312, -0.1841, -0.0646, -0.3760]) tensor([0.4092, 0.2001, 0.2255, 0.1652]) -Greedy action tensor([ 0.6637, -0.0680, 0.0122, -0.4733]) tensor([0.4305, 0.2071, 0.2244, 0.1381]) -Greedy action tensor([ 0.4443, 0.1559, -0.0031, -0.1902]) tensor([0.3426, 0.2568, 0.2190, 0.1816]) -Greedy action tensor([ 0.6537, 0.0511, 0.0202, -0.3420]) tensor([0.4086, 0.2236, 0.2168, 0.1510]) -Greedy action tensor([ 0.3309, 0.2103, -0.0833, -0.3438]) tensor([0.3272, 0.2900, 0.2162, 0.1666]) -Greedy action tensor([ 0.7921, -0.3394, -0.0915, -0.6420]) tensor([0.5065, 0.1634, 0.2093, 0.1207]) -Greedy action tensor([ 1.2086, -0.7439, -0.0566, -0.8550]) tensor([0.6447, 0.0915, 0.1819, 0.0819]) -Greedy action tensor([ 0.3184, 0.2039, 0.1552, -0.2716]) tensor([0.3034, 0.2706, 0.2577, 0.1682]) -Greedy action tensor([ 0.4570, -0.2389, 0.2601, -0.5156]) tensor([0.3707, 0.1848, 0.3044, 0.1401]) -Greedy action tensor([ 0.3511, 0.1223, 0.1444, -0.1475]) tensor([0.3109, 0.2473, 0.2529, 0.1889]) -Greedy action tensor([ 0.3168, 0.1170, 0.1766, -0.1978]) tensor([0.3043, 0.2492, 0.2645, 0.1819]) -Greedy action tensor([ 0.4719, -0.0553, 0.1239, -0.2847]) tensor([0.3616, 0.2134, 0.2553, 0.1697]) -Greedy action tensor([ 0.4621, -0.0158, -0.0014, -0.3382]) tensor([0.3706, 0.2298, 0.2331, 0.1665]) -Greedy action tensor([ 0.3106, -0.1666, 0.1539, -0.2352]) tensor([0.3273, 0.2031, 0.2799, 0.1897]) -Greedy action tensor([ 0.5747, -0.0308, 0.0458, -0.3666]) tensor([0.3960, 0.2162, 0.2333, 0.1545]) -Greedy action tensor([ 0.7105, -0.1482, -0.0347, -0.4722]) tensor([0.4536, 0.1922, 0.2153, 0.1390]) -Greedy action tensor([ 0.5573, -0.0600, 0.0649, -0.4465]) tensor([0.3973, 0.2143, 0.2428, 0.1456]) -Greedy action tensor([ 0.3418, 0.1606, -0.0205, -0.0213]) tensor([0.3100, 0.2586, 0.2158, 0.2156]) -Greedy action tensor([ 0.8899, -0.4712, -0.0633, -0.6661]) tensor([0.5397, 0.1384, 0.2081, 0.1139]) -Greedy action tensor([ 0.2997, -0.0326, 0.2778, -0.0785]) tensor([0.2958, 0.2122, 0.2894, 0.2027]) -Greedy action tensor([ 0.4670, -0.0634, 0.0370, -0.4406]) tensor([0.3784, 0.2227, 0.2462, 0.1527]) -Greedy action tensor([ 0.4498, -0.0847, 0.0772, -0.2706]) tensor([0.3621, 0.2122, 0.2495, 0.1762]) -Greedy action tensor([ 0.2015, -0.0350, 0.0175, -0.2322]) tensor([0.3059, 0.2414, 0.2545, 0.1982]) -Greedy action tensor([ 0.6158, -0.0317, 0.0200, -0.3246]) tensor([0.4057, 0.2123, 0.2236, 0.1584]) -Greedy action tensor([ 0.1727, 0.0241, 0.0669, -0.0951]) tensor([0.2836, 0.2444, 0.2551, 0.2169]) -Greedy action tensor([ 0.2438, 0.1083, 0.2054, -0.1023]) tensor([0.2823, 0.2465, 0.2716, 0.1997]) -Greedy action tensor([ 0.2828, 0.1133, 0.1921, -0.1559]) tensor([0.2939, 0.2481, 0.2684, 0.1895]) -Greedy action tensor([ 0.1896, 0.1501, 0.1770, -0.2135]) tensor([0.2765, 0.2658, 0.2730, 0.1847]) -Greedy action tensor([ 2.9068e-01, 7.9989e-05, 1.3421e-02, -4.3892e-01]) tensor([0.3347, 0.2503, 0.2537, 0.1614]) -Greedy action tensor([ 0.3184, -0.0208, 0.2237, -0.1097]) tensor([0.3055, 0.2176, 0.2779, 0.1991]) -Greedy action tensor([ 0.2193, 0.0683, 0.0957, -0.2427]) tensor([0.2964, 0.2549, 0.2620, 0.1868]) -Greedy action tensor([ 0.2468, 0.0742, 0.1782, -0.3283]) tensor([0.2996, 0.2521, 0.2797, 0.1686]) -Greedy action tensor([ 0.6899, -0.4126, 0.1106, -0.6295]) tensor([0.4630, 0.1538, 0.2594, 0.1238]) -Greedy action tensor([ 0.2343, 0.0768, 0.0782, -0.2348]) tensor([0.2998, 0.2561, 0.2565, 0.1876]) -Greedy action tensor([ 0.4818, -0.2297, 0.1432, -0.3734]) tensor([0.3804, 0.1867, 0.2711, 0.1617]) -Greedy action tensor([ 0.2410, 0.0634, 0.1587, -0.2168]) tensor([0.2949, 0.2469, 0.2716, 0.1866]) -Greedy action tensor([ 0.3202, 0.1683, 0.1482, -0.2642]) tensor([0.3069, 0.2636, 0.2584, 0.1711]) -Greedy action tensor([ 0.7442, -0.2706, -0.1976, -0.4217]) tensor([0.4845, 0.1756, 0.1889, 0.1510]) -Greedy action tensor([ 0.3083, -0.0395, 0.0781, -0.2426]) tensor([0.3250, 0.2295, 0.2582, 0.1873]) -Greedy action tensor([ 0.3955, 0.0306, 0.2515, -0.4102]) tensor([0.3326, 0.2309, 0.2880, 0.1486]) -Greedy action tensor([ 0.4574, -0.1698, 0.1444, -0.4026]) tensor([0.3720, 0.1987, 0.2720, 0.1574]) -Greedy action tensor([ 0.2632, 0.2753, 0.0117, -0.2471]) tensor([0.2950, 0.2986, 0.2294, 0.1771]) -Greedy action tensor([ 0.4679, 0.0081, 0.0647, -0.1213]) tensor([0.3503, 0.2212, 0.2341, 0.1944]) -Greedy action tensor([ 0.2145, 0.0348, 0.1189, -0.2203]) tensor([0.2948, 0.2463, 0.2679, 0.1909]) -Greedy action tensor([ 0.3828, 0.1457, 0.0908, -0.2389]) tensor([0.3255, 0.2567, 0.2430, 0.1748]) -Greedy action tensor([ 0.3214, -0.0200, 0.1998, -0.3068]) tensor([0.3195, 0.2271, 0.2829, 0.1705]) -Greedy action tensor([ 0.3167, -0.1024, -0.0299, -0.2218]) tensor([0.3392, 0.2231, 0.2398, 0.1979]) -Greedy action tensor([-1.9311, -0.7318, 0.2235, -0.2162]) tensor([0.0541, 0.1794, 0.4662, 0.3004]) -Greedy action tensor([-1.0815, -0.5991, 0.4356, 0.5167]) tensor([0.0825, 0.1336, 0.3761, 0.4078]) -Greedy action tensor([-1.7358, -0.7460, 0.0856, -0.2500]) tensor([0.0700, 0.1883, 0.4325, 0.3092]) -Greedy action tensor([-1.9105, -0.6802, 0.7827, 0.0244]) tensor([0.0383, 0.1310, 0.5657, 0.2650]) -Greedy action tensor([-1.9110, -0.5832, 0.5304, -0.1641]) tensor([0.0455, 0.1715, 0.5223, 0.2608]) -Greedy action tensor([-1.3969, 0.0722, 0.6771, -0.6841]) tensor([0.0652, 0.2832, 0.5186, 0.1329]) -Greedy action tensor([-1.7628, -0.5407, 0.1961, -0.4172]) tensor([0.0652, 0.2215, 0.4627, 0.2506]) -Greedy action tensor([-2.0195, -0.7217, 0.7948, 0.1296]) tensor([0.0334, 0.1224, 0.5575, 0.2867]) -Greedy action tensor([-1.6397, -0.4441, 0.5342, -0.1138]) tensor([0.0565, 0.1868, 0.4968, 0.2599]) -Greedy action tensor([-0.2966, -0.1356, 0.2070, 0.3010]) tensor([0.1771, 0.2080, 0.2930, 0.3219]) -Greedy action tensor([-1.8999, -0.9819, 0.4355, -0.3249]) tensor([0.0536, 0.1341, 0.5535, 0.2588]) -Greedy action tensor([-1.3130, -0.4486, 0.4866, -0.2823]) tensor([0.0818, 0.1942, 0.4947, 0.2293]) -Greedy action tensor([-1.5551, -0.4767, 0.4183, 0.1400]) tensor([0.0603, 0.1773, 0.4339, 0.3285]) -Greedy action tensor([-1.5162, -0.7067, 0.7768, -0.0850]) tensor([0.0577, 0.1296, 0.5714, 0.2413]) -Greedy action tensor([-0.8408, -0.5815, 0.2680, 0.0784]) tensor([0.1276, 0.1654, 0.3869, 0.3200]) -Greedy action tensor([-0.9577, -0.6402, -0.2106, -0.3545]) tensor([0.1584, 0.2176, 0.3344, 0.2896]) -Greedy action tensor([-1.9459, -0.5888, 0.7438, -0.0538]) tensor([0.0381, 0.1480, 0.5611, 0.2527]) -Greedy action tensor([-0.8425, 0.4359, 0.0931, -0.2284]) tensor([0.1113, 0.3995, 0.2836, 0.2056]) -Greedy action tensor([-1.7220, -0.4798, 0.7824, 0.3412]) tensor([0.0407, 0.1410, 0.4980, 0.3203]) -Greedy action tensor([-2.0019, -0.9585, 0.6672, 0.0177]) tensor([0.0388, 0.1100, 0.5592, 0.2920]) -Greedy action tensor([-1.5073, -0.5115, 0.3025, -0.1458]) tensor([0.0729, 0.1973, 0.4453, 0.2845]) -Greedy action tensor([-0.9418, -0.5129, 0.5088, -0.3759]) tensor([0.1168, 0.1794, 0.4982, 0.2057]) -Greedy action tensor([-1.7545, -0.6734, 0.2779, -0.2394]) tensor([0.0620, 0.1828, 0.4732, 0.2821]) -Greedy action tensor([-0.8635, -0.3674, -0.4459, -0.1845]) tensor([0.1631, 0.2678, 0.2476, 0.3216]) -Greedy action tensor([-2.0475, -0.9101, 0.6268, -0.1230]) tensor([0.0393, 0.1224, 0.5693, 0.2690]) -Greedy action tensor([-1.6175, -0.6336, 0.3280, -0.3680]) tensor([0.0706, 0.1889, 0.4941, 0.2464]) -Greedy action tensor([-1.1042, -0.6641, 0.4390, 0.3782]) tensor([0.0859, 0.1335, 0.4022, 0.3784]) -Greedy action tensor([-2.0540, -0.8374, 0.6969, 0.0025]) tensor([0.0359, 0.1212, 0.5622, 0.2807]) -Greedy action tensor([-0.8910, -0.1633, -0.1127, -0.0970]) tensor([0.1340, 0.2775, 0.2919, 0.2965]) -Greedy action tensor([-2.0027, -0.9589, 0.3501, -0.2817]) tensor([0.0501, 0.1424, 0.5272, 0.2803]) -Greedy action tensor([-2.0086, -0.6404, 0.6021, -0.0711]) tensor([0.0393, 0.1542, 0.5341, 0.2724]) -Greedy action tensor([-1.3543, -0.6745, 0.5573, 0.4837]) tensor([0.0624, 0.1232, 0.4222, 0.3922]) -Greedy action tensor([-1.5782, -0.5645, 0.4842, 0.0088]) tensor([0.0606, 0.1669, 0.4764, 0.2961]) -Greedy action tensor([-1.6692, -0.6609, 0.1978, -0.4273]) tensor([0.0731, 0.2005, 0.4731, 0.2532]) -Greedy action tensor([-1.9257, -0.7548, 0.2281, -0.1949]) tensor([0.0541, 0.1744, 0.4661, 0.3053]) -Greedy action tensor([-1.8643, -0.6140, 0.7496, 0.0941]) tensor([0.0396, 0.1384, 0.5411, 0.2809]) -Greedy action tensor([-1.5695, -0.6492, 0.6919, -0.0665]) tensor([0.0568, 0.1426, 0.5452, 0.2554]) -Greedy action tensor([-1.3113, -0.0953, 0.3880, -0.1140]) tensor([0.0760, 0.2565, 0.4158, 0.2517]) -Greedy action tensor([-1.6998, -0.5110, 0.6055, 0.1196]) tensor([0.0488, 0.1603, 0.4896, 0.3012]) -Greedy action tensor([-0.7261, -0.4976, 0.2734, -0.0586]) tensor([0.1444, 0.1815, 0.3924, 0.2816]) -Greedy action tensor([-1.2065, -0.5597, 0.3975, 0.0115]) tensor([0.0888, 0.1695, 0.4415, 0.3001]) -Greedy action tensor([-1.0423, -0.6677, 0.3759, 0.3270]) tensor([0.0951, 0.1383, 0.3927, 0.3739]) -Greedy action tensor([-1.8208, -0.4363, 0.6024, -0.1380]) tensor([0.0462, 0.1844, 0.5210, 0.2484]) -Greedy action tensor([-2.0279, -0.9413, 0.7547, 0.0801]) tensor([0.0353, 0.1045, 0.5699, 0.2903]) -Greedy action tensor([-1.7501, -0.5615, 0.4875, -0.1227]) tensor([0.0534, 0.1751, 0.4999, 0.2716]) -Greedy action tensor([-1.9095, -0.5838, 0.8411, 0.0722]) tensor([0.0361, 0.1360, 0.5656, 0.2622]) -Greedy action tensor([-1.9129, -0.6865, 0.5403, 0.0184]) tensor([0.0436, 0.1486, 0.5069, 0.3008]) -Greedy action tensor([-1.6937, -0.5018, 0.2222, -0.3188]) tensor([0.0665, 0.2190, 0.4516, 0.2629]) -Greedy action tensor([-1.9148, -0.9921, 0.2959, -0.4300]) tensor([0.0586, 0.1475, 0.5350, 0.2589]) -Greedy action tensor([-1.0475, -0.2925, -0.3968, -0.1194]) tensor([0.1320, 0.2809, 0.2531, 0.3340]) -Greedy action tensor([-1.3434, -0.3462, 0.3930, -0.5577]) tensor([0.0863, 0.2340, 0.4902, 0.1894]) -Greedy action tensor([-1.2390, -0.0517, 0.5984, -0.5677]) tensor([0.0799, 0.2619, 0.5018, 0.1564]) -Greedy action tensor([-1.3571, -0.2497, 0.2479, 0.1388]) tensor([0.0743, 0.2247, 0.3696, 0.3314]) -Greedy action tensor([-1.0752, -0.6063, 0.4496, 0.5104]) tensor([0.0828, 0.1324, 0.3805, 0.4043]) -Greedy action tensor([-1.3404, -0.6070, 0.5107, -0.3646]) tensor([0.0826, 0.1721, 0.5261, 0.2192]) -Greedy action tensor([-2.0595, -0.8091, 0.7302, 0.0851]) tensor([0.0341, 0.1191, 0.5554, 0.2914]) -Greedy action tensor([-1.4544, -0.5532, 0.4420, 0.1913]) tensor([0.0653, 0.1609, 0.4352, 0.3387]) -Greedy action tensor([-0.9599, 0.0095, -0.4031, -0.1382]) tensor([0.1306, 0.3444, 0.2279, 0.2971]) -Greedy action tensor([-1.9593, -0.6598, 0.8579, 0.0564]) tensor([0.0346, 0.1269, 0.5788, 0.2597]) -Greedy action tensor([-1.8044, -0.6840, 0.0880, -0.3411]) tensor([0.0666, 0.2041, 0.4417, 0.2876]) -Greedy action tensor([-1.9843, -0.8832, 0.3715, -0.1838]) tensor([0.0485, 0.1459, 0.5118, 0.2937]) -Greedy action tensor([-2.0552, -0.7548, 0.8586, 0.1800]) tensor([0.0308, 0.1131, 0.5679, 0.2881]) -Greedy action tensor([-1.9718, -0.9476, 0.3328, -0.2626]) tensor([0.0517, 0.1441, 0.5184, 0.2858]) -Greedy action tensor([-1.8648, -0.7526, 0.2735, -0.1797]) tensor([0.0558, 0.1697, 0.4735, 0.3010]) -Greedy action tensor([-1.8950, -0.8078, 0.2565, -0.2529]) tensor([0.0564, 0.1673, 0.4849, 0.2914]) -Greedy action tensor([-1.9304, -1.0058, 0.3380, -0.1874]) tensor([0.0529, 0.1334, 0.5113, 0.3024]) -Greedy action tensor([-1.9828, -0.9878, 0.4263, -0.3248]) tensor([0.0498, 0.1347, 0.5540, 0.2614]) -Greedy action tensor([-1.8498, -0.9934, 0.2058, -0.4740]) tensor([0.0661, 0.1557, 0.5165, 0.2617]) -Greedy action tensor([-1.1108, -0.0537, 0.5216, -0.4489]) tensor([0.0915, 0.2633, 0.4680, 0.1773]) -Greedy action tensor([-2.0318, -0.8879, 0.5503, -0.0575]) tensor([0.0407, 0.1278, 0.5384, 0.2932]) -Greedy action tensor([-1.5832, -0.5456, 0.4948, -0.0270]) tensor([0.0604, 0.1705, 0.4826, 0.2864]) -Greedy action tensor([-1.3024, 0.0816, 0.1140, 0.1222]) tensor([0.0754, 0.3008, 0.3106, 0.3132]) -Greedy action tensor([-2.0165, -0.9421, 0.5576, -0.0908]) tensor([0.0418, 0.1225, 0.5488, 0.2869]) -Greedy action tensor([-1.5309, -0.4806, 0.4676, 0.2268]) tensor([0.0587, 0.1678, 0.4331, 0.3404]) -Greedy action tensor([-1.5497, -0.5513, 0.0540, -0.4143]) tensor([0.0848, 0.2300, 0.4214, 0.2638]) -Greedy action tensor([-1.7614, -0.6696, 0.2355, -0.2434]) tensor([0.0629, 0.1873, 0.4630, 0.2868]) -Greedy action tensor([-1.4476, -0.4800, 0.2055, -0.2659]) tensor([0.0825, 0.2172, 0.4311, 0.2691]) -Greedy action tensor([-0.7080, -0.2790, 0.3480, -0.2081]) tensor([0.1417, 0.2175, 0.4073, 0.2335]) -Greedy action tensor([-1.1138, -0.3315, 0.6611, -0.5670]) tensor([0.0925, 0.2022, 0.5456, 0.1598]) -Greedy action tensor([-2.0189, -0.8011, 0.8608, 0.0750]) tensor([0.0330, 0.1115, 0.5877, 0.2678]) -Greedy action tensor([-1.8782, -0.5729, 0.8172, 0.0516]) tensor([0.0379, 0.1398, 0.5613, 0.2610]) -Greedy action tensor([ 1.7072, -0.1816, 1.3699, 0.7128]) tensor([0.4475, 0.0677, 0.3193, 0.1655]) -Greedy action tensor([ 0.6107, -0.3852, 0.7252, 0.9070]) tensor([0.2607, 0.0963, 0.2923, 0.3506]) -Greedy action tensor([ 0.6891, -0.6286, 0.6551, 0.5908]) tensor([0.3184, 0.0853, 0.3078, 0.2886]) -Greedy action tensor([ 1.3317, -0.6682, 0.9543, 1.0534]) tensor([0.3879, 0.0525, 0.2659, 0.2937]) -Greedy action tensor([2.4431, 0.0066, 1.4994, 1.1292]) tensor([0.5729, 0.0501, 0.2230, 0.1540]) -Greedy action tensor([ 1.8898, -0.4141, 0.2327, 0.7858]) tensor([0.6165, 0.0616, 0.1176, 0.2044]) -Greedy action tensor([2.0595, 0.2865, 0.8724, 1.1297]) tensor([0.5349, 0.0908, 0.1632, 0.2111]) -Greedy action tensor([ 1.0459, -0.5156, 0.9559, 0.5924]) tensor([0.3624, 0.0760, 0.3312, 0.2303]) -Greedy action tensor([ 1.3010, -0.2999, 0.2580, 1.4733]) tensor([0.3647, 0.0736, 0.1285, 0.4333]) -Greedy action tensor([ 2.0793, -0.0073, 0.3946, 1.8944]) tensor([0.4671, 0.0580, 0.0867, 0.3883]) -Greedy action tensor([ 1.4609, 0.3660, -0.0833, 0.4362]) tensor([0.5244, 0.1755, 0.1119, 0.1882]) -Greedy action tensor([ 1.6258, -0.0966, 1.0912, 0.8813]) tensor([0.4465, 0.0798, 0.2616, 0.2121]) -Greedy action tensor([ 1.8431, -0.3732, 0.8386, 1.4697]) tensor([0.4622, 0.0504, 0.1693, 0.3182]) -Greedy action tensor([ 2.2746, -0.7721, -0.1928, 1.3579]) tensor([0.6527, 0.0310, 0.0553, 0.2610]) -Greedy action tensor([ 2.0742, -0.5374, 1.4059, 1.6834]) tensor([0.4420, 0.0324, 0.2266, 0.2990]) -Greedy action tensor([ 1.7714, -0.6195, 0.9128, 0.9654]) tensor([0.5097, 0.0467, 0.2160, 0.2277]) -Greedy action tensor([ 1.3182, -0.1607, 1.5454, 0.6283]) tensor([0.3350, 0.0764, 0.4205, 0.1681]) -Greedy action tensor([ 1.0084, -0.9701, 0.8980, 0.6624]) tensor([0.3648, 0.0504, 0.3267, 0.2581]) -Greedy action tensor([ 0.5814, -1.7204, 0.8393, 0.0793]) tensor([0.3334, 0.0334, 0.4315, 0.2018]) -Greedy action tensor([1.5972, 0.0647, 0.6557, 1.1671]) tensor([0.4432, 0.0957, 0.1729, 0.2883]) -Greedy action tensor([ 1.8572, -0.8300, 1.0120, 1.1286]) tensor([0.5050, 0.0344, 0.2169, 0.2437]) -Greedy action tensor([ 1.5182, -0.5034, 1.1108, 0.7472]) tensor([0.4424, 0.0586, 0.2944, 0.2046]) -Greedy action tensor([ 1.9484, -0.1397, 1.1910, 0.8149]) tensor([0.5223, 0.0647, 0.2449, 0.1681]) -Greedy action tensor([ 0.7089, -0.0156, -0.6737, 1.6384]) tensor([0.2343, 0.1135, 0.0588, 0.5935]) -Greedy action tensor([1.0619, 0.2853, 0.3761, 0.7773]) tensor([0.3682, 0.1694, 0.1854, 0.2770]) -Greedy action tensor([ 1.2413, -1.2929, 0.9283, 0.4309]) tensor([0.4434, 0.0352, 0.3243, 0.1972]) -Greedy action tensor([ 2.1386, -0.1506, 1.4592, 1.2613]) tensor([0.4940, 0.0501, 0.2504, 0.2055]) -Greedy action tensor([ 1.6479, -0.6405, -0.1444, 1.4904]) tensor([0.4712, 0.0478, 0.0785, 0.4025]) -Greedy action tensor([ 1.6513, -0.2879, 0.6856, 1.0798]) tensor([0.4786, 0.0688, 0.1822, 0.2703]) -Greedy action tensor([1.8845, 0.2673, 0.9552, 0.5122]) tensor([0.5415, 0.1075, 0.2138, 0.1373]) -Greedy action tensor([ 1.3038, 0.0657, -0.3739, 1.0049]) tensor([0.4508, 0.1307, 0.0842, 0.3343]) -Greedy action tensor([ 1.4871, 0.0681, -0.3835, 1.7274]) tensor([0.3749, 0.0907, 0.0577, 0.4767]) -Greedy action tensor([ 1.5012, 0.3008, -0.6062, 1.1938]) tensor([0.4634, 0.1395, 0.0563, 0.3408]) -Greedy action tensor([ 1.3694, -0.5223, 1.1551, 0.8498]) tensor([0.3917, 0.0591, 0.3162, 0.2330]) -Greedy action tensor([ 1.3644, 0.3198, -0.0230, 1.0136]) tensor([0.4337, 0.1526, 0.1083, 0.3054]) -Greedy action tensor([ 1.3365, -0.4032, 0.7185, 1.1977]) tensor([0.3869, 0.0679, 0.2085, 0.3367]) -Greedy action tensor([ 1.2960, -1.4591, 1.6741, 1.0955]) tensor([0.2993, 0.0190, 0.4368, 0.2449]) -Greedy action tensor([ 1.9702, -0.2899, 0.3362, 0.7035]) tensor([0.6324, 0.0660, 0.1234, 0.1782]) -Greedy action tensor([ 1.6778, -0.9568, 1.1202, 1.5748]) tensor([0.3927, 0.0282, 0.2249, 0.3543]) -Greedy action tensor([ 1.9661, -1.0492, 1.3475, 1.3389]) tensor([0.4713, 0.0231, 0.2539, 0.2517]) -Greedy action tensor([ 2.2055, -0.6500, 0.7504, 0.8228]) tensor([0.6486, 0.0373, 0.1514, 0.1627]) -Greedy action tensor([ 1.5075, -0.1932, 1.0725, 0.9216]) tensor([0.4190, 0.0765, 0.2712, 0.2332]) -Greedy action tensor([ 1.5146, -0.8289, 0.5084, 0.9340]) tensor([0.4948, 0.0475, 0.1809, 0.2769]) -Greedy action tensor([ 1.6907, -0.5327, 0.7256, 0.5388]) tensor([0.5540, 0.0600, 0.2110, 0.1751]) -Greedy action tensor([ 1.1625, -0.8788, 1.5652, 0.3104]) tensor([0.3276, 0.0425, 0.4901, 0.1397]) -Greedy action tensor([1.7019, 0.2164, 0.0179, 0.3998]) tensor([0.5938, 0.1344, 0.1102, 0.1615]) -Greedy action tensor([ 1.3469, -0.5652, -0.0266, 1.0563]) tensor([0.4654, 0.0688, 0.1178, 0.3480]) -Greedy action tensor([1.4661, 0.4779, 1.1127, 0.7390]) tensor([0.3909, 0.1455, 0.2746, 0.1890]) -Greedy action tensor([ 1.3286, -0.3755, 0.1770, 0.9761]) tensor([0.4544, 0.0827, 0.1436, 0.3194]) -Greedy action tensor([ 1.5757, -0.5415, 1.4700, 1.0115]) tensor([0.3863, 0.0465, 0.3475, 0.2197]) -Greedy action tensor([ 0.8698, -0.4743, 0.4696, 2.0022]) tensor([0.1987, 0.0518, 0.1331, 0.6164]) -Greedy action tensor([ 1.8234, -0.4189, 0.7544, 0.8396]) tensor([0.5484, 0.0582, 0.1883, 0.2050]) -Greedy action tensor([ 1.3898, -0.3193, -0.1051, 0.7071]) tensor([0.5234, 0.0947, 0.1174, 0.2645]) -Greedy action tensor([ 1.4925, -0.3779, 0.5631, 1.5425]) tensor([0.3846, 0.0593, 0.1518, 0.4043]) -Greedy action tensor([ 1.8542, -0.0106, 0.9677, 1.9594]) tensor([0.3734, 0.0579, 0.1539, 0.4148]) -Greedy action tensor([1.6366, 0.2681, 1.1136, 1.1990]) tensor([0.4011, 0.1021, 0.2378, 0.2590]) -Greedy action tensor([ 1.6631, -1.2053, 0.3084, 1.3804]) tensor([0.4834, 0.0275, 0.1247, 0.3644]) -Greedy action tensor([1.5916, 0.1358, 1.0675, 1.3101]) tensor([0.3876, 0.0904, 0.2295, 0.2925]) -Greedy action tensor([ 1.5186, -0.3626, 1.0275, 0.9198]) tensor([0.4322, 0.0659, 0.2645, 0.2375]) -Greedy action tensor([ 1.7453, -0.0285, 0.9717, 1.6322]) tensor([0.3962, 0.0672, 0.1828, 0.3538]) -Greedy action tensor([ 1.6667, -0.5058, 0.7096, 1.1101]) tensor([0.4829, 0.0550, 0.1854, 0.2767]) -Greedy action tensor([ 1.7781, -0.1403, 0.2261, 1.1201]) tensor([0.5329, 0.0783, 0.1129, 0.2760]) -Greedy action tensor([ 1.3580, -0.9818, 1.1518, 1.2965]) tensor([0.3508, 0.0338, 0.2855, 0.3299]) -Greedy action tensor([ 0.7344, -0.2878, 0.7122, 0.9916]) tensor([0.2754, 0.0991, 0.2693, 0.3562]) -Greedy action tensor([ 1.6183, -0.3781, 0.7429, 1.1764]) tensor([0.4555, 0.0619, 0.1898, 0.2928]) -Greedy action tensor([ 1.4259, -0.0430, -0.7664, 1.0916]) tensor([0.4860, 0.1119, 0.0543, 0.3479]) -Greedy action tensor([ 1.2912, -0.2145, 0.9307, 1.5928]) tensor([0.3057, 0.0678, 0.2132, 0.4133]) -Greedy action tensor([ 1.1051, -0.9602, 1.4084, 0.7049]) tensor([0.3173, 0.0402, 0.4298, 0.2127]) -Greedy action tensor([ 1.2774, -0.5893, 1.0567, 1.6612]) tensor([0.2920, 0.0452, 0.2342, 0.4286]) -Greedy action tensor([ 1.8516, 0.4093, -0.3206, 1.9835]) tensor([0.4014, 0.0949, 0.0457, 0.4580]) -Greedy action tensor([1.3943, 0.7456, 0.0337, 1.0218]) tensor([0.4052, 0.2118, 0.1039, 0.2791]) -Greedy action tensor([ 1.0951, -0.5878, 0.7811, 0.3486]) tensor([0.4183, 0.0777, 0.3056, 0.1983]) -Greedy action tensor([ 1.2910, 0.0599, -0.2371, 0.8521]) tensor([0.4643, 0.1356, 0.1007, 0.2994]) -Greedy action tensor([ 1.0693, -0.9186, 0.7621, 0.9949]) tensor([0.3570, 0.0489, 0.2626, 0.3315]) -Greedy action tensor([ 1.0469, -0.1318, -0.0493, 0.0116]) tensor([0.5008, 0.1541, 0.1673, 0.1778]) -Greedy action tensor([ 2.0533, -1.5293, 1.0423, 1.6815]) tensor([0.4805, 0.0134, 0.1748, 0.3313]) -Greedy action tensor([ 2.1986, -0.7531, 0.3543, 1.9954]) tensor([0.4935, 0.0258, 0.0780, 0.4027]) -Greedy action tensor([1.5562, 0.1784, 0.3791, 0.9101]) tensor([0.4798, 0.1210, 0.1478, 0.2514]) -Greedy action tensor([ 1.3305, -0.4013, 1.0745, 1.1245]) tensor([0.3617, 0.0640, 0.2800, 0.2943]) -Greedy action tensor([ 1.2134, -0.2642, 1.2123, 0.7067]) tensor([0.3534, 0.0806, 0.3530, 0.2129]) -Greedy action tensor([ 0.4396, -1.0369, 0.3786, 0.6103]) tensor([0.2980, 0.0681, 0.2804, 0.3535]) -Greedy action tensor([ 1.2059, 0.1139, -0.0818, 1.1006]) tensor([0.3982, 0.1336, 0.1099, 0.3584]) -Greedy action tensor([ 0.6179, -0.4977, -0.0772, 0.0707]) tensor([0.4158, 0.1362, 0.2075, 0.2405]) -Greedy action tensor([ 0.9300, -0.5403, -0.2546, 0.2305]) tensor([0.4920, 0.1131, 0.1505, 0.2444]) -Greedy action tensor([ 1.1684, -0.1344, -0.2107, -0.1528]) tensor([0.5585, 0.1518, 0.1406, 0.1490]) -Greedy action tensor([ 0.9173, 0.0424, -0.0538, -0.0092]) tensor([0.4563, 0.1902, 0.1728, 0.1807]) -Greedy action tensor([ 1.2982, -0.4641, -0.4333, -0.1377]) tensor([0.6303, 0.1082, 0.1116, 0.1499]) -Greedy action tensor([ 0.9063, -0.4418, 0.0688, 0.1280]) tensor([0.4648, 0.1207, 0.2011, 0.2134]) -Greedy action tensor([ 0.6384, -0.3318, -0.0201, -0.0104]) tensor([0.4133, 0.1567, 0.2140, 0.2160]) -Greedy action tensor([ 0.8122, -0.4322, -0.2327, 0.1343]) tensor([0.4656, 0.1342, 0.1638, 0.2364]) -Greedy action tensor([ 1.2298, -0.5072, -0.1662, 0.3089]) tensor([0.5489, 0.0966, 0.1359, 0.2186]) -Greedy action tensor([ 1.1016, -0.7124, -0.2220, 0.1853]) tensor([0.5467, 0.0891, 0.1455, 0.2187]) -Greedy action tensor([ 1.0163, -0.6407, -0.2273, 0.1030]) tensor([0.5318, 0.1014, 0.1534, 0.2134]) -Greedy action tensor([ 1.5474, -0.4438, -0.4265, -0.1355]) tensor([0.6843, 0.0934, 0.0951, 0.1272]) -Greedy action tensor([ 0.9833, -0.0620, -0.0922, -0.2034]) tensor([0.5005, 0.1760, 0.1707, 0.1528]) -Greedy action tensor([ 1.0666, -0.6810, -0.4008, 0.2547]) tensor([0.5409, 0.0942, 0.1247, 0.2402]) -Greedy action tensor([ 1.0557, -0.4576, -0.0943, 0.2339]) tensor([0.5059, 0.1114, 0.1602, 0.2224]) -Greedy action tensor([ 0.9077, -0.4607, -0.0843, 0.1499]) tensor([0.4775, 0.1215, 0.1771, 0.2238]) -Greedy action tensor([ 0.8857, -0.4120, -0.1403, 0.1783]) tensor([0.4707, 0.1286, 0.1687, 0.2320]) -Greedy action tensor([ 1.2331, -0.3308, 0.1559, -0.1589]) tensor([0.5560, 0.1164, 0.1894, 0.1382]) -Greedy action tensor([ 1.4604, -0.3433, -0.5919, -0.1676]) tensor([0.6714, 0.1106, 0.0862, 0.1318]) -Greedy action tensor([ 0.8945, -0.4708, -0.4112, 0.0874]) tensor([0.5070, 0.1294, 0.1374, 0.2262]) -Greedy action tensor([ 1.4186, -0.3400, -0.3442, -0.1541]) tensor([0.6446, 0.1111, 0.1106, 0.1337]) -Greedy action tensor([ 0.7483, -0.3967, -0.1424, -0.1270]) tensor([0.4661, 0.1483, 0.1913, 0.1943]) -Greedy action tensor([ 0.7075, -0.5052, -0.1203, 0.0695]) tensor([0.4420, 0.1314, 0.1931, 0.2335]) -Greedy action tensor([ 1.1172, -0.1327, 0.0933, -0.0875]) tensor([0.5140, 0.1473, 0.1846, 0.1541]) -Greedy action tensor([ 0.9402, -0.1796, 0.0509, -0.1329]) tensor([0.4809, 0.1570, 0.1976, 0.1645]) -Greedy action tensor([ 1.0406, -0.4237, -0.2897, -0.0967]) tensor([0.5506, 0.1273, 0.1456, 0.1766]) -Greedy action tensor([ 0.6280, -0.5052, -0.0767, -0.0768]) tensor([0.4328, 0.1394, 0.2139, 0.2139]) -Greedy action tensor([ 1.4866, -0.5258, -0.6208, -0.1795]) tensor([0.6924, 0.0926, 0.0842, 0.1309]) -Greedy action tensor([ 1.0029, -0.6463, -0.4545, 0.2561]) tensor([0.5266, 0.1012, 0.1226, 0.2495]) -Greedy action tensor([ 0.6107, -0.2472, -0.0395, 0.0611]) tensor([0.3963, 0.1681, 0.2069, 0.2287]) -Greedy action tensor([ 1.4677, -0.6436, -0.1403, -0.0430]) tensor([0.6485, 0.0785, 0.1299, 0.1432]) -Greedy action tensor([ 0.7449, -0.2891, -0.1562, 0.0855]) tensor([0.4388, 0.1560, 0.1782, 0.2269]) -Greedy action tensor([ 1.4955, -0.3408, -0.3137, -0.2749]) tensor([0.6696, 0.1067, 0.1097, 0.1140]) -Greedy action tensor([ 0.8961, -0.6058, -0.2809, 0.1102]) tensor([0.5034, 0.1121, 0.1551, 0.2294]) -Greedy action tensor([ 8.6379e-01, 3.6153e-02, 2.1487e-05, -6.7720e-02]) tensor([0.4439, 0.1940, 0.1871, 0.1749]) -Greedy action tensor([ 0.9301, -0.0153, -0.0575, 0.0562]) tensor([0.4591, 0.1784, 0.1710, 0.1916]) -Greedy action tensor([ 1.0362, -0.5087, -0.0906, 0.2168]) tensor([0.5055, 0.1078, 0.1638, 0.2228]) -Greedy action tensor([ 0.8965, -0.6019, -0.0122, 0.1214]) tensor([0.4791, 0.1071, 0.1931, 0.2207]) -Greedy action tensor([ 1.0640, -0.3090, -0.2710, -0.2000]) tensor([0.5559, 0.1408, 0.1463, 0.1570]) -Greedy action tensor([ 1.4177, -0.4174, 0.0516, 0.0687]) tensor([0.5973, 0.0953, 0.1524, 0.1550]) -Greedy action tensor([ 1.6125, -0.5826, -0.4003, -0.0900]) tensor([0.7007, 0.0780, 0.0936, 0.1277]) -Greedy action tensor([ 1.6233, -0.4377, -0.5275, -0.0730]) tensor([0.7007, 0.0892, 0.0816, 0.1285]) -Greedy action tensor([ 0.8886, -0.4364, -0.1435, -0.1762]) tensor([0.5084, 0.1351, 0.1811, 0.1753]) -Greedy action tensor([ 0.8432, -0.6348, -0.2441, -0.0671]) tensor([0.5082, 0.1159, 0.1713, 0.2045]) -Greedy action tensor([ 1.1032, -0.5301, -0.2875, 0.1589]) tensor([0.5455, 0.1065, 0.1358, 0.2122]) -Greedy action tensor([ 1.4263, -0.4740, -0.5618, -0.1879]) tensor([0.6732, 0.1007, 0.0922, 0.1340]) -Greedy action tensor([ 0.7447, -0.3934, -0.1466, 0.0344]) tensor([0.4500, 0.1442, 0.1846, 0.2212]) -Greedy action tensor([ 0.9695, -0.4363, -0.1608, -0.0139]) tensor([0.5149, 0.1262, 0.1663, 0.1926]) -Greedy action tensor([ 0.9268, -0.5556, -0.1224, 0.0355]) tensor([0.5031, 0.1143, 0.1762, 0.2064]) -Greedy action tensor([ 1.7044, -0.1677, -0.2494, -0.0593]) tensor([0.6817, 0.1048, 0.0966, 0.1169]) -Greedy action tensor([ 1.4932, -0.1386, -0.1263, -0.1301]) tensor([0.6286, 0.1229, 0.1245, 0.1240]) -Greedy action tensor([ 1.2794, -0.4167, -0.2230, 0.1196]) tensor([0.5815, 0.1067, 0.1295, 0.1823]) -Greedy action tensor([ 0.7428, -0.3546, -0.0944, 0.0218]) tensor([0.4439, 0.1481, 0.1922, 0.2158]) -Greedy action tensor([ 0.7269, -0.4625, 0.0806, 0.1651]) tensor([0.4169, 0.1269, 0.2185, 0.2377]) -Greedy action tensor([ 1.6767, -0.3108, -0.5394, -0.1211]) tensor([0.7084, 0.0971, 0.0772, 0.1173]) -Greedy action tensor([ 1.1729, -0.5173, -0.3092, 0.1937]) tensor([0.5595, 0.1032, 0.1271, 0.2101]) -Greedy action tensor([ 0.8685, -0.4299, 0.2081, -0.3218]) tensor([0.4776, 0.1304, 0.2468, 0.1453]) -Greedy action tensor([ 1.0174, -0.0348, -0.0977, 0.0237]) tensor([0.4885, 0.1706, 0.1602, 0.1808]) -Greedy action tensor([ 1.2121, -0.0655, -0.0580, -0.1564]) tensor([0.5513, 0.1536, 0.1548, 0.1403]) -Greedy action tensor([ 1.5109, -0.7197, -0.2879, 0.2583]) tensor([0.6415, 0.0689, 0.1062, 0.1833]) -Greedy action tensor([ 0.4762, -0.4758, 0.0139, -0.1222]) tensor([0.3898, 0.1504, 0.2455, 0.2143]) -Greedy action tensor([ 1.3706, -0.7171, -0.0403, 0.1724]) tensor([0.5989, 0.0743, 0.1461, 0.1807]) -Greedy action tensor([ 0.6562, -0.6198, -0.2276, 0.1926]) tensor([0.4308, 0.1202, 0.1780, 0.2710]) -Greedy action tensor([ 0.7478, -0.3561, -0.1441, 0.2044]) tensor([0.4306, 0.1428, 0.1765, 0.2501]) -Greedy action tensor([ 0.7941, -0.2594, -0.1085, 0.1255]) tensor([0.4412, 0.1538, 0.1789, 0.2261]) -Greedy action tensor([ 0.9286, -0.4594, -0.1935, -0.1250]) tensor([0.5198, 0.1297, 0.1692, 0.1812]) -Greedy action tensor([ 0.4764, -0.4852, -0.0470, 0.0393]) tensor([0.3816, 0.1459, 0.2261, 0.2465]) -Greedy action tensor([ 1.0064, -0.2718, -0.1742, -0.1052]) tensor([0.5223, 0.1455, 0.1604, 0.1718]) -Greedy action tensor([ 1.2331, -0.0976, -0.2250, -0.0636]) tensor([0.5648, 0.1493, 0.1314, 0.1544]) -Greedy action tensor([ 0.7506, -0.4253, -0.1319, 0.0717]) tensor([0.4485, 0.1384, 0.1856, 0.2275]) -Greedy action tensor([ 0.9062, -0.0167, -0.0869, -0.0418]) tensor([0.4640, 0.1844, 0.1719, 0.1798]) -Greedy action tensor([ 0.7127, -0.3399, -0.0533, -0.1037]) tensor([0.4433, 0.1547, 0.2061, 0.1959]) -Greedy action tensor([ 1.1106, -0.4834, 0.0481, -0.2534]) tensor([0.5542, 0.1126, 0.1915, 0.1417]) -Greedy action tensor([ 0.8031, -0.7072, -0.3481, 0.1762]) tensor([0.4828, 0.1066, 0.1527, 0.2579]) -Greedy action tensor([ 0.4950, -0.4023, 0.0030, -0.0251]) tensor([0.3826, 0.1560, 0.2339, 0.2275]) -Greedy action tensor([ 1.1617, -0.4419, -0.2814, 0.1340]) tensor([0.5570, 0.1121, 0.1316, 0.1993]) -Greedy action tensor([ 1.3329, -0.5755, -0.2012, 0.1958]) tensor([0.5936, 0.0880, 0.1280, 0.1904]) -Greedy action tensor([ 0.5364, -0.4321, -0.0575, -0.0258]) tensor([0.3997, 0.1518, 0.2207, 0.2278]) -Greedy action tensor([ 0.5062, -0.5581, -0.1743, 0.0277]) tensor([0.4047, 0.1396, 0.2049, 0.2508]) -Greedy action tensor([ 1.4594, -0.5658, -0.6198, -0.1519]) tensor([0.6865, 0.0906, 0.0858, 0.1370]) -Greedy action tensor([ 1.1669, -0.4690, -0.3076, 0.0130]) tensor([0.5750, 0.1120, 0.1316, 0.1814]) -Greedy action tensor([ 0.4846, 0.0214, 0.0224, -0.2010]) tensor([0.3619, 0.2278, 0.2280, 0.1823]) -Greedy action tensor([ 0.3838, 0.1067, 0.1705, -0.2233]) tensor([0.3215, 0.2437, 0.2597, 0.1752]) -Greedy action tensor([ 0.4102, -0.0608, 0.0994, -0.1558]) tensor([0.3419, 0.2135, 0.2505, 0.1941]) -Greedy action tensor([ 0.2918, 0.1078, 0.1674, -0.0391]) tensor([0.2913, 0.2423, 0.2572, 0.2092]) -Greedy action tensor([ 0.4132, -0.0549, 0.1086, -0.2813]) tensor([0.3493, 0.2187, 0.2576, 0.1744]) -Greedy action tensor([ 0.9083, -0.2790, -0.0689, -0.5995]) tensor([0.5255, 0.1603, 0.1978, 0.1164]) -Greedy action tensor([ 0.4250, -0.1580, 0.0437, -0.3340]) tensor([0.3691, 0.2060, 0.2521, 0.1728]) -Greedy action tensor([ 0.7213, -0.2068, -0.0973, -0.5630]) tensor([0.4732, 0.1871, 0.2087, 0.1310]) -Greedy action tensor([ 0.5902, -0.2741, 0.1565, -0.5948]) tensor([0.4210, 0.1774, 0.2729, 0.1287]) -Greedy action tensor([ 0.6835, -0.2530, -0.0963, -0.3943]) tensor([0.4564, 0.1789, 0.2093, 0.1554]) -Greedy action tensor([ 0.4846, 0.0561, 0.1125, -0.2481]) tensor([0.3544, 0.2309, 0.2443, 0.1703]) -Greedy action tensor([ 0.3963, -0.1910, -0.0179, -0.4342]) tensor([0.3770, 0.2095, 0.2492, 0.1643]) -Greedy action tensor([ 0.4756, -0.1574, 0.2338, -0.5929]) tensor([0.3760, 0.1996, 0.2952, 0.1292]) -Greedy action tensor([ 0.4746, -0.1408, 0.2122, -0.5048]) tensor([0.3724, 0.2013, 0.2865, 0.1398]) -Greedy action tensor([ 0.4437, 0.0340, 0.0518, -0.3033]) tensor([0.3554, 0.2360, 0.2402, 0.1684]) -Greedy action tensor([ 0.5043, 0.0874, 0.0610, -0.3441]) tensor([0.3664, 0.2415, 0.2352, 0.1569]) -Greedy action tensor([ 0.3765, -0.0648, 0.0673, -0.0874]) tensor([0.3326, 0.2140, 0.2442, 0.2092]) -Greedy action tensor([ 0.3189, -0.1569, -0.0695, -0.4688]) tensor([0.3630, 0.2256, 0.2462, 0.1652]) -Greedy action tensor([ 0.4305, 0.0289, 0.2196, -0.3162]) tensor([0.3386, 0.2266, 0.2742, 0.1605]) -Greedy action tensor([ 0.2210, 0.0348, 0.1606, -0.1659]) tensor([0.2898, 0.2406, 0.2728, 0.1968]) -Greedy action tensor([ 0.1744, 0.0385, 0.0714, -0.1587]) tensor([0.2864, 0.2500, 0.2584, 0.2053]) -Greedy action tensor([ 0.2609, 0.2031, 0.1502, -0.1922]) tensor([0.2878, 0.2716, 0.2576, 0.1829]) -Greedy action tensor([ 0.2676, 0.2145, 0.1267, -0.2001]) tensor([0.2904, 0.2754, 0.2522, 0.1819]) -Greedy action tensor([ 0.2422, 0.0064, 0.2291, -0.0720]) tensor([0.2851, 0.2252, 0.2814, 0.2083]) -Greedy action tensor([ 0.1867, 0.0880, 0.1544, -0.1842]) tensor([0.2806, 0.2542, 0.2716, 0.1936]) -Greedy action tensor([ 0.4544, -0.0431, 0.0896, -0.1971]) tensor([0.3542, 0.2154, 0.2459, 0.1846]) -Greedy action tensor([ 0.1810, -0.0039, -0.0544, -0.0694]) tensor([0.2941, 0.2445, 0.2324, 0.2290]) -Greedy action tensor([ 0.1121, 0.0784, 0.1581, -0.1810]) tensor([0.2660, 0.2572, 0.2785, 0.1984]) -Greedy action tensor([ 0.4144, -0.1475, 0.1909, -0.4672]) tensor([0.3592, 0.2048, 0.2873, 0.1488]) -Greedy action tensor([ 0.3254, -0.0999, 0.1427, -0.2418]) tensor([0.3275, 0.2140, 0.2728, 0.1857]) -Greedy action tensor([ 0.6763, -0.1987, -0.0086, -0.3672]) tensor([0.4399, 0.1834, 0.2218, 0.1549]) -Greedy action tensor([ 0.4319, -0.1953, -0.0642, -0.4540]) tensor([0.3913, 0.2090, 0.2383, 0.1614]) -Greedy action tensor([ 0.3382, -0.0447, 0.1552, -0.2088]) tensor([0.3233, 0.2204, 0.2692, 0.1871]) -Greedy action tensor([ 0.5104, -0.1012, 0.0482, -0.2787]) tensor([0.3807, 0.2065, 0.2398, 0.1729]) -Greedy action tensor([ 0.4008, -0.1105, 0.2163, -0.3571]) tensor([0.3448, 0.2068, 0.2868, 0.1616]) -Greedy action tensor([ 0.3637, -0.0727, 0.1990, -0.4118]) tensor([0.3384, 0.2187, 0.2870, 0.1558]) -Greedy action tensor([ 0.3749, 0.1125, 0.1287, -0.1836]) tensor([0.3202, 0.2463, 0.2503, 0.1832]) -Greedy action tensor([ 0.4544, -0.1144, 0.0602, -0.3836]) tensor([0.3741, 0.2118, 0.2522, 0.1618]) -Greedy action tensor([ 1.1897, -0.6822, 0.0881, -0.7808]) tensor([0.6152, 0.0946, 0.2044, 0.0858]) -Greedy action tensor([ 0.4540, -0.0310, 0.0138, -0.1834]) tensor([0.3587, 0.2208, 0.2309, 0.1896]) -Greedy action tensor([0.3883, 0.1736, 0.1534, 0.0293]) tensor([0.3034, 0.2448, 0.2399, 0.2119]) -Greedy action tensor([ 0.3186, 0.1228, 0.1980, -0.3450]) tensor([0.3102, 0.2550, 0.2750, 0.1598]) -Greedy action tensor([ 0.4104, -0.1373, 0.0786, -0.4622]) tensor([0.3685, 0.2131, 0.2645, 0.1540]) -Greedy action tensor([ 0.6949, -0.2689, -0.0628, -0.3679]) tensor([0.4554, 0.1737, 0.2135, 0.1573]) -Greedy action tensor([ 0.1324, 0.0619, 0.1213, -0.2353]) tensor([0.2768, 0.2579, 0.2737, 0.1916]) -Greedy action tensor([ 0.2925, -0.0484, 0.0933, -0.3330]) tensor([0.3262, 0.2320, 0.2673, 0.1745]) -Greedy action tensor([ 0.4229, -0.1703, 0.0900, -0.6022]) tensor([0.3805, 0.2102, 0.2728, 0.1365]) -Greedy action tensor([ 0.7223, 0.0619, 0.0948, -0.2222]) tensor([0.4099, 0.2118, 0.2189, 0.1594]) -Greedy action tensor([ 0.5219, 0.2205, 0.0108, -0.2116]) tensor([0.3546, 0.2623, 0.2127, 0.1703]) -Greedy action tensor([ 0.3388, -0.1014, 0.2259, -0.0572]) tensor([0.3115, 0.2006, 0.2783, 0.2096]) -Greedy action tensor([ 0.2985, 0.1649, 0.1087, -0.2876]) tensor([0.3069, 0.2685, 0.2538, 0.1708]) -Greedy action tensor([ 0.2450, 0.1301, 0.1664, -0.0192]) tensor([0.2790, 0.2487, 0.2580, 0.2143]) -Greedy action tensor([ 0.5406, 0.0965, 0.0016, -0.1751]) tensor([0.3685, 0.2364, 0.2150, 0.1802]) -Greedy action tensor([ 0.3521, 0.1160, 0.1818, -0.3566]) tensor([0.3200, 0.2527, 0.2699, 0.1575]) -Greedy action tensor([ 0.2710, 0.2564, 0.0584, -0.2610]) tensor([0.2957, 0.2915, 0.2391, 0.1737]) -Greedy action tensor([ 0.3895, -0.1084, 0.1552, -0.3764]) tensor([0.3492, 0.2122, 0.2762, 0.1623]) -Greedy action tensor([ 0.3964, 0.1391, 0.0183, -0.2275]) tensor([0.3340, 0.2582, 0.2288, 0.1790]) -Greedy action tensor([ 0.7638, -0.4345, -0.0326, -0.5988]) tensor([0.4978, 0.1502, 0.2245, 0.1274]) -Greedy action tensor([ 0.2214, 0.0801, 0.2726, -0.1079]) tensor([0.2747, 0.2385, 0.2891, 0.1976]) -Greedy action tensor([ 0.6507, -0.5631, 0.1703, -0.6974]) tensor([0.4597, 0.1366, 0.2843, 0.1194]) -Greedy action tensor([ 0.5863, -0.0848, -0.0178, -0.4840]) tensor([0.4166, 0.2129, 0.2277, 0.1428]) -Greedy action tensor([ 0.3852, -0.2384, 0.0037, -0.3164]) tensor([0.3684, 0.1975, 0.2515, 0.1826]) -Greedy action tensor([ 0.3918, -0.1286, 0.1126, -0.2567]) tensor([0.3480, 0.2068, 0.2632, 0.1820]) -Greedy action tensor([ 0.2566, 0.1020, 0.0694, -0.0386]) tensor([0.2915, 0.2497, 0.2417, 0.2170]) -Greedy action tensor([ 0.1446, 0.0267, 0.0695, -0.3541]) tensor([0.2921, 0.2596, 0.2709, 0.1774]) -Greedy action tensor([ 0.2346, 0.0270, 0.0840, -0.2160]) tensor([0.3021, 0.2455, 0.2599, 0.1925]) -Greedy action tensor([ 0.4683, -0.1279, 0.0884, -0.2102]) tensor([0.3647, 0.2009, 0.2494, 0.1850]) -Greedy action tensor([ 0.3638, -0.0780, 0.0921, -0.2940]) tensor([0.3421, 0.2199, 0.2607, 0.1772]) -Greedy action tensor([ 0.5443, -0.1464, 0.1600, -0.3309]) tensor([0.3848, 0.1929, 0.2620, 0.1604]) -Greedy action tensor([ 0.2001, -0.0315, 0.2877, -0.0549]) tensor([0.2732, 0.2168, 0.2983, 0.2117]) -Greedy action tensor([ 0.4196, 0.1386, 0.1754, -0.1417]) tensor([0.3217, 0.2429, 0.2520, 0.1835]) -Greedy action tensor([ 0.6052, -0.2322, 0.0686, -0.6703]) tensor([0.4354, 0.1884, 0.2546, 0.1216]) -Greedy action tensor([ 0.2070, -0.0768, -0.0059, -0.3541]) tensor([0.3193, 0.2404, 0.2581, 0.1822]) -Greedy action tensor([ 1.0236, -0.5738, -0.0946, -0.6950]) tensor([0.5853, 0.1185, 0.1913, 0.1049]) -Greedy action tensor([ 0.4915, -0.1236, 0.0740, -0.2955]) tensor([0.3767, 0.2037, 0.2481, 0.1715]) -Greedy action tensor([ 0.3709, 0.0684, 0.0712, -0.3253]) tensor([0.3357, 0.2481, 0.2488, 0.1674]) -Greedy action tensor([ 0.1940, -0.0044, -0.0457, -0.2127]) tensor([0.3056, 0.2506, 0.2404, 0.2034]) -Greedy action tensor([ 0.5254, -0.0806, 0.0765, -0.1286]) tensor([0.3698, 0.2018, 0.2361, 0.1923]) -Greedy action tensor([ 0.5934, -0.0331, -0.1286, -0.2598]) tensor([0.4088, 0.2185, 0.1986, 0.1742]) -Greedy action tensor([ 0.2128, 0.0552, 0.2351, -0.2014]) tensor([0.2827, 0.2415, 0.2891, 0.1868]) -Greedy action tensor([ 0.5280, -0.1961, 0.0736, -0.5785]) tensor([0.4081, 0.1978, 0.2591, 0.1350]) -Greedy action tensor([-1.1063, -0.2296, -0.2145, -0.3207]) tensor([0.1244, 0.2990, 0.3036, 0.2730]) -Greedy action tensor([-0.6052, -0.5643, 0.3366, 0.5694]) tensor([0.1275, 0.1328, 0.3270, 0.4127]) -Greedy action tensor([-1.4274, 0.0475, 0.6088, -0.6587]) tensor([0.0658, 0.2878, 0.5044, 0.1420]) -Greedy action tensor([-1.3730, -0.0864, 0.2291, -0.4580]) tensor([0.0828, 0.2997, 0.4109, 0.2067]) -Greedy action tensor([-2.0273, -0.8176, 1.3073, 0.5379]) tensor([0.0220, 0.0738, 0.6179, 0.2863]) -Greedy action tensor([-1.8478, -0.5318, 0.8092, 0.1214]) tensor([0.0382, 0.1426, 0.5451, 0.2740]) -Greedy action tensor([-1.0380, -0.3329, -0.2876, 0.0383]) tensor([0.1238, 0.2506, 0.2623, 0.3633]) -Greedy action tensor([-1.2569, -0.6210, 0.3799, 0.2569]) tensor([0.0795, 0.1502, 0.4088, 0.3615]) -Greedy action tensor([-1.3677, -0.6050, 0.3580, 0.1637]) tensor([0.0747, 0.1602, 0.4196, 0.3455]) -Greedy action tensor([-1.8642, -0.8829, 0.1317, -0.3312]) tensor([0.0639, 0.1704, 0.4699, 0.2958]) -Greedy action tensor([-1.8165, -1.0117, 0.1579, -0.5227]) tensor([0.0710, 0.1588, 0.5113, 0.2589]) -Greedy action tensor([-1.2809, -0.5592, 0.3461, 0.1168]) tensor([0.0820, 0.1688, 0.4173, 0.3318]) -Greedy action tensor([-1.5643, -0.3065, 0.0336, -0.4409]) tensor([0.0798, 0.2806, 0.3943, 0.2453]) -Greedy action tensor([-1.7126, -0.8043, 0.0741, -0.4148]) tensor([0.0763, 0.1892, 0.4553, 0.2792]) -Greedy action tensor([-1.1865, -0.8277, -0.4438, -0.9621]) tensor([0.1729, 0.2475, 0.3633, 0.2164]) -Greedy action tensor([-1.5285, -0.7171, -0.1123, -0.6596]) tensor([0.1025, 0.2307, 0.4224, 0.2444]) -Greedy action tensor([-0.8546, -0.6082, 0.3284, 0.5847]) tensor([0.1024, 0.1311, 0.3344, 0.4321]) -Greedy action tensor([-1.4845, -0.1930, 0.5529, -0.3703]) tensor([0.0651, 0.2369, 0.4995, 0.1984]) -Greedy action tensor([-1.3387, -0.5738, 0.6321, -0.0885]) tensor([0.0724, 0.1555, 0.5194, 0.2527]) -Greedy action tensor([-1.8733, -0.6989, 0.2858, -0.2655]) tensor([0.0559, 0.1809, 0.4842, 0.2790]) -Greedy action tensor([-1.4191, -0.1856, 0.2452, -0.3764]) tensor([0.0797, 0.2735, 0.4208, 0.2260]) -Greedy action tensor([-2.0494, -0.8191, 0.7141, 0.0782]) tensor([0.0349, 0.1194, 0.5530, 0.2928]) -Greedy action tensor([-2.0411, -0.9314, 0.4831, -0.1266]) tensor([0.0429, 0.1302, 0.5357, 0.2912]) -Greedy action tensor([-0.9596, 0.0604, -0.1177, -0.1203]) tensor([0.1189, 0.3298, 0.2760, 0.2753]) -Greedy action tensor([-1.4716, -0.2833, 0.0315, -0.4384]) tensor([0.0863, 0.2832, 0.3880, 0.2425]) -Greedy action tensor([-2.0038, -0.7382, 0.8492, 0.0797]) tensor([0.0334, 0.1185, 0.5796, 0.2685]) -Greedy action tensor([-1.9729, -0.8021, 0.2981, -0.1967]) tensor([0.0505, 0.1627, 0.4888, 0.2980]) -Greedy action tensor([-1.4793, -0.5396, 0.4239, 0.1794]) tensor([0.0644, 0.1649, 0.4322, 0.3385]) -Greedy action tensor([-1.9940, -0.9184, 0.3274, -0.2592]) tensor([0.0505, 0.1481, 0.5149, 0.2864]) -Greedy action tensor([-2.0261, -0.8096, 0.3866, -0.1025]) tensor([0.0447, 0.1508, 0.4987, 0.3058]) -Greedy action tensor([-1.2703, -0.7745, -0.2079, -0.3911]) tensor([0.1259, 0.2067, 0.3642, 0.3032]) -Greedy action tensor([-0.8620, 0.0635, -0.5728, 0.1136]) tensor([0.1331, 0.3359, 0.1778, 0.3532]) -Greedy action tensor([-1.3244, -0.1373, 0.4132, -0.5287]) tensor([0.0821, 0.2692, 0.4667, 0.1820]) -Greedy action tensor([-1.6197, -0.7971, -0.0533, -0.3896]) tensor([0.0870, 0.1982, 0.4169, 0.2979]) -Greedy action tensor([-1.1746, -0.3578, -0.2006, -0.1656]) tensor([0.1155, 0.2615, 0.3060, 0.3169]) -Greedy action tensor([-0.9248, -0.0366, -0.3275, -0.2332]) tensor([0.1380, 0.3355, 0.2508, 0.2756]) -Greedy action tensor([-0.9393, -0.2101, -0.4283, 0.0309]) tensor([0.1355, 0.2810, 0.2259, 0.3576]) -Greedy action tensor([-1.7793, -0.8781, 0.1020, -0.4679]) tensor([0.0728, 0.1793, 0.4777, 0.2702]) -Greedy action tensor([-2.0211, -0.9511, 0.6619, 0.1148]) tensor([0.0370, 0.1079, 0.5416, 0.3134]) -Greedy action tensor([-1.5187, -0.1287, 0.5797, -0.3570]) tensor([0.0611, 0.2454, 0.4983, 0.1953]) -Greedy action tensor([-1.6807, -0.5002, 0.5253, 0.0200]) tensor([0.0532, 0.1731, 0.4826, 0.2912]) -Greedy action tensor([-1.6056, -0.4992, 0.5210, 0.0837]) tensor([0.0561, 0.1696, 0.4705, 0.3038]) -Greedy action tensor([-1.7812, -0.6728, 0.8334, 0.3558]) tensor([0.0382, 0.1158, 0.5221, 0.3239]) -Greedy action tensor([-2.0050, -0.9407, 0.4648, -0.2606]) tensor([0.0466, 0.1352, 0.5513, 0.2669]) -Greedy action tensor([-0.9641, -0.4879, -0.4076, -0.4166]) tensor([0.1644, 0.2646, 0.2868, 0.2842]) -Greedy action tensor([-1.6454, -0.4726, 0.7890, 0.4534]) tensor([0.0420, 0.1358, 0.4794, 0.3428]) -Greedy action tensor([-1.9727, -0.8429, 0.2978, -0.1702]) tensor([0.0504, 0.1560, 0.4880, 0.3056]) -Greedy action tensor([-2.0083, -0.7868, 0.5258, -0.0946]) tensor([0.0421, 0.1427, 0.5302, 0.2851]) -Greedy action tensor([-1.9530, -0.8901, 0.1985, -0.3547]) tensor([0.0574, 0.1660, 0.4931, 0.2836]) -Greedy action tensor([-1.0370, -0.5898, 0.2694, 0.1470]) tensor([0.1050, 0.1642, 0.3877, 0.3431]) -Greedy action tensor([-0.8475, -0.5301, 0.3165, -0.0401]) tensor([0.1279, 0.1757, 0.4096, 0.2868]) -Greedy action tensor([-1.9219, -0.5218, 0.6199, -0.1095]) tensor([0.0419, 0.1698, 0.5319, 0.2565]) -Greedy action tensor([-1.9391, -0.6628, 0.3768, -0.1203]) tensor([0.0479, 0.1716, 0.4853, 0.2952]) -Greedy action tensor([-1.9504, -0.5202, 0.8345, 0.0423]) tensor([0.0348, 0.1456, 0.5641, 0.2555]) -Greedy action tensor([-1.4022, -0.5809, 0.3999, 0.1969]) tensor([0.0700, 0.1592, 0.4244, 0.3464]) -Greedy action tensor([-1.7530, -0.7886, -0.0762, -0.4470]) tensor([0.0790, 0.2072, 0.4223, 0.2915]) -Greedy action tensor([-1.4372, -0.5736, 0.2890, -0.2652]) tensor([0.0818, 0.1941, 0.4599, 0.2642]) -Greedy action tensor([-1.6218, -0.7546, 0.0386, -0.2175]) tensor([0.0787, 0.1872, 0.4138, 0.3203]) -Greedy action tensor([-0.7908, -0.2518, 0.4267, -0.2807]) tensor([0.1289, 0.2210, 0.4355, 0.2147]) -Greedy action tensor([-1.7684, -0.7031, 0.0548, -0.4124]) tensor([0.0716, 0.2077, 0.4431, 0.2777]) -Greedy action tensor([-2.0318, -0.8518, 0.3856, -0.1343]) tensor([0.0452, 0.1470, 0.5066, 0.3012]) -Greedy action tensor([-1.3506, -0.5074, 0.4504, -0.0804]) tensor([0.0773, 0.1796, 0.4679, 0.2752]) -Greedy action tensor([-1.7157, -0.9647, 0.2582, -0.5961]) tensor([0.0747, 0.1584, 0.5380, 0.2289]) -Greedy action tensor([-1.5971, -0.5888, 0.5364, -0.0353]) tensor([0.0590, 0.1617, 0.4981, 0.2812]) -Greedy action tensor([-0.8337, 0.1236, -0.6079, 0.1742]) tensor([0.1316, 0.3428, 0.1650, 0.3606]) -Greedy action tensor([-0.8936, -0.6488, 0.4402, 0.8122]) tensor([0.0864, 0.1103, 0.3278, 0.4755]) -Greedy action tensor([-1.9567, -0.8251, 0.3300, -0.1820]) tensor([0.0504, 0.1563, 0.4960, 0.2973]) -Greedy action tensor([-1.9306, -0.5842, 0.8474, 0.0863]) tensor([0.0352, 0.1351, 0.5655, 0.2642]) -Greedy action tensor([-1.3676, 0.0619, 0.3467, -0.0737]) tensor([0.0696, 0.2905, 0.3862, 0.2537]) -Greedy action tensor([-1.0208, -0.3683, -0.2279, -0.2269]) tensor([0.1362, 0.2615, 0.3010, 0.3013]) -Greedy action tensor([-1.2526, 0.4464, 0.4874, -0.4425]) tensor([0.0694, 0.3794, 0.3953, 0.1560]) -Greedy action tensor([-2.0580, -0.8344, 0.8853, 0.1185]) tensor([0.0311, 0.1056, 0.5895, 0.2738]) -Greedy action tensor([-1.8304, -0.6088, 0.6823, 0.0272]) tensor([0.0432, 0.1466, 0.5332, 0.2769]) -Greedy action tensor([-1.5077, -1.1493, 0.2562, -0.7528]) tensor([0.0962, 0.1377, 0.5614, 0.2047]) -Greedy action tensor([-1.9856, -0.7759, 0.5198, -0.1004]) tensor([0.0431, 0.1446, 0.5282, 0.2841]) -Greedy action tensor([-1.7647, -0.4229, 0.5653, -0.0596]) tensor([0.0485, 0.1857, 0.4988, 0.2670]) -Greedy action tensor([-1.7575, -0.7355, 0.0814, -0.2987]) tensor([0.0696, 0.1934, 0.4377, 0.2993]) -Greedy action tensor([-1.6885, -0.7531, 0.0749, -0.3130]) tensor([0.0750, 0.1911, 0.4373, 0.2967]) -Greedy action tensor([-1.9640, -0.8073, 0.5328, -0.0314]) tensor([0.0430, 0.1369, 0.5227, 0.2974]) -Greedy action tensor([-1.8246, -0.4753, 0.6697, 0.0061]) tensor([0.0431, 0.1661, 0.5220, 0.2688]) -Greedy action tensor([-1.5152, -0.4962, 0.5708, 0.4265]) tensor([0.0532, 0.1474, 0.4285, 0.3709]) -Greedy action tensor([ 1.2384, -0.5115, 0.9693, 0.4958]) tensor([0.4143, 0.0720, 0.3166, 0.1971]) -Greedy action tensor([ 1.9239, -0.3536, 1.1314, 1.2102]) tensor([0.4890, 0.0501, 0.2214, 0.2395]) -Greedy action tensor([ 1.6484, -0.4245, 0.1435, 1.0413]) tensor([0.5283, 0.0665, 0.1173, 0.2879]) -Greedy action tensor([ 2.0000, -0.6680, 0.6715, 2.0841]) tensor([0.4129, 0.0287, 0.1094, 0.4491]) -Greedy action tensor([ 0.9122, -1.4836, -0.4435, 1.0442]) tensor([0.4016, 0.0366, 0.1035, 0.4583]) -Greedy action tensor([ 1.2448, -0.0978, 1.1842, 1.1112]) tensor([0.3250, 0.0849, 0.3058, 0.2843]) -Greedy action tensor([1.4151, 0.0462, 1.2686, 0.8751]) tensor([0.3703, 0.0942, 0.3198, 0.2158]) -Greedy action tensor([ 1.1229, -0.5778, -0.0220, 1.4955]) tensor([0.3387, 0.0618, 0.1078, 0.4917]) -Greedy action tensor([ 1.4779, -1.2682, 0.8842, 1.2673]) tensor([0.4121, 0.0264, 0.2276, 0.3339]) -Greedy action tensor([ 1.6333, 0.3551, -0.4676, 0.0599]) tensor([0.6218, 0.1732, 0.0761, 0.1289]) -Greedy action tensor([ 1.3303, -1.1273, 0.9820, 0.3992]) tensor([0.4575, 0.0392, 0.3230, 0.1803]) -Greedy action tensor([ 0.9079, -0.8621, 0.4104, 1.0594]) tensor([0.3399, 0.0579, 0.2067, 0.3955]) -Greedy action tensor([ 1.7631, -0.4812, 0.4633, 1.1650]) tensor([0.5186, 0.0550, 0.1413, 0.2851]) -Greedy action tensor([1.6128, 0.1283, 1.2663, 0.6620]) tensor([0.4310, 0.0977, 0.3048, 0.1666]) -Greedy action tensor([ 2.0097, -0.0647, 0.7940, 1.9614]) tensor([0.4210, 0.0529, 0.1248, 0.4012]) -Greedy action tensor([ 2.1858, -0.0824, 0.8630, 1.9275]) tensor([0.4668, 0.0483, 0.1244, 0.3605]) -Greedy action tensor([ 2.1781, -0.2311, -0.0908, 0.6331]) tensor([0.7109, 0.0639, 0.0735, 0.1517]) -Greedy action tensor([ 1.2603, -0.1469, 1.2630, 0.7950]) tensor([0.3478, 0.0851, 0.3487, 0.2184]) -Greedy action tensor([ 1.7428, 0.5362, -0.8826, 0.6449]) tensor([0.5865, 0.1755, 0.0425, 0.1956]) -Greedy action tensor([ 0.9607, -0.3301, 0.4276, -0.1209]) tensor([0.4544, 0.1250, 0.2666, 0.1541]) -Greedy action tensor([ 1.2489, -0.0052, 1.4118, 0.3285]) tensor([0.3496, 0.0997, 0.4114, 0.1393]) -Greedy action tensor([ 1.0819, -0.7689, 0.9000, 0.9638]) tensor([0.3473, 0.0546, 0.2895, 0.3086]) -Greedy action tensor([ 1.4012, -0.3852, 1.1015, 0.9267]) tensor([0.3951, 0.0662, 0.2928, 0.2458]) -Greedy action tensor([ 1.6692, -0.9388, 1.0502, 1.2373]) tensor([0.4422, 0.0326, 0.2381, 0.2871]) -Greedy action tensor([ 1.2947, -1.0784, -0.1519, 1.0034]) tensor([0.4817, 0.0449, 0.1134, 0.3600]) -Greedy action tensor([ 1.8300, -0.8396, 1.0953, 1.9509]) tensor([0.3735, 0.0259, 0.1791, 0.4215]) -Greedy action tensor([ 1.7109, -0.5611, 1.5481, 1.1585]) tensor([0.3955, 0.0408, 0.3361, 0.2276]) -Greedy action tensor([ 1.7369, -0.5960, 0.9247, 0.8459]) tensor([0.5125, 0.0497, 0.2275, 0.2103]) -Greedy action tensor([ 1.2420, -0.2893, 1.0912, 1.2850]) tensor([0.3205, 0.0693, 0.2756, 0.3346]) -Greedy action tensor([ 1.6568, -0.2083, 1.0381, 0.9133]) tensor([0.4610, 0.0714, 0.2483, 0.2192]) -Greedy action tensor([ 1.3643, -0.7390, 1.7953, 1.2707]) tensor([0.2800, 0.0342, 0.4309, 0.2550]) -Greedy action tensor([0.8704, 0.4560, 0.3672, 1.2837]) tensor([0.2648, 0.1749, 0.1601, 0.4003]) -Greedy action tensor([ 1.4420, 0.4010, -0.9391, 0.4221]) tensor([0.5537, 0.1955, 0.0512, 0.1997]) -Greedy action tensor([ 0.7905, -0.4510, -0.7126, 0.7125]) tensor([0.4105, 0.1186, 0.0913, 0.3796]) -Greedy action tensor([ 2.0047, -0.2728, 0.4601, 1.1523]) tensor([0.5739, 0.0589, 0.1225, 0.2447]) -Greedy action tensor([ 0.7347, -1.0679, 0.2843, 1.1398]) tensor([0.3029, 0.0499, 0.1930, 0.4541]) -Greedy action tensor([ 1.9529, 0.1768, -0.7736, 0.3789]) tensor([0.6935, 0.1174, 0.0454, 0.1437]) -Greedy action tensor([ 1.1410, -0.0074, 1.0599, 1.1280]) tensor([0.3100, 0.0983, 0.2858, 0.3059]) -Greedy action tensor([ 1.1770, -0.6284, 0.9789, 1.2626]) tensor([0.3253, 0.0535, 0.2668, 0.3544]) -Greedy action tensor([ 0.8738, -0.3207, 1.1780, 0.6565]) tensor([0.2888, 0.0875, 0.3914, 0.2324]) -Greedy action tensor([ 1.1445, -0.2216, -0.1539, 1.4563]) tensor([0.3456, 0.0882, 0.0943, 0.4720]) -Greedy action tensor([ 1.2688, 0.6308, -0.3570, 1.5045]) tensor([0.3344, 0.1766, 0.0658, 0.4232]) -Greedy action tensor([ 2.3096, -0.4278, 1.0511, 1.3254]) tensor([0.5805, 0.0376, 0.1649, 0.2170]) -Greedy action tensor([ 1.7808, -0.3306, 0.1314, 1.8907]) tensor([0.4116, 0.0498, 0.0791, 0.4594]) -Greedy action tensor([ 1.2822, -0.4709, 0.6648, 0.9425]) tensor([0.4124, 0.0715, 0.2225, 0.2937]) -Greedy action tensor([ 1.2712, 0.6158, -0.8574, 0.6745]) tensor([0.4569, 0.2372, 0.0544, 0.2516]) -Greedy action tensor([ 1.9837, -0.7693, 0.2591, 0.9406]) tensor([0.6272, 0.0400, 0.1118, 0.2210]) -Greedy action tensor([ 1.9020, -0.1422, 0.6273, 1.9872]) tensor([0.4003, 0.0518, 0.1119, 0.4359]) -Greedy action tensor([ 1.6692, -1.3483, 0.6931, 1.7753]) tensor([0.3941, 0.0193, 0.1485, 0.4382]) -Greedy action tensor([ 1.7176, 0.2753, -0.0423, 1.2658]) tensor([0.4890, 0.1156, 0.0841, 0.3113]) -Greedy action tensor([ 1.1343, -0.2097, 0.5592, 0.8140]) tensor([0.3923, 0.1023, 0.2207, 0.2848]) -Greedy action tensor([1.3652, 0.0441, 1.0423, 1.2058]) tensor([0.3517, 0.0938, 0.2546, 0.2999]) -Greedy action tensor([ 1.2380, -0.7807, 1.8768, 1.0699]) tensor([0.2583, 0.0343, 0.4892, 0.2183]) -Greedy action tensor([ 1.2712, -0.3786, 1.0388, 0.3068]) tensor([0.4227, 0.0812, 0.3350, 0.1611]) -Greedy action tensor([ 0.9436, -0.8180, -0.0840, 1.0681]) tensor([0.3756, 0.0645, 0.1344, 0.4254]) -Greedy action tensor([ 1.4344, -0.5084, 1.0603, 1.0463]) tensor([0.3985, 0.0571, 0.2741, 0.2703]) -Greedy action tensor([ 0.4518, -0.4625, -0.1038, 1.4310]) tensor([0.2157, 0.0864, 0.1237, 0.5742]) -Greedy action tensor([ 0.7939, -0.4242, 0.1830, 1.9413]) tensor([0.2005, 0.0593, 0.1088, 0.6314]) -Greedy action tensor([ 0.6117, -0.2337, 0.1618, 1.1567]) tensor([0.2637, 0.1132, 0.1682, 0.4548]) -Greedy action tensor([ 1.3079, -0.3984, 1.5734, 1.4694]) tensor([0.2731, 0.0496, 0.3562, 0.3210]) -Greedy action tensor([ 1.3134, -0.0969, 0.7253, 1.6507]) tensor([0.3124, 0.0763, 0.1735, 0.4378]) -Greedy action tensor([ 1.3636, -0.2111, 1.2556, 1.4678]) tensor([0.3111, 0.0644, 0.2792, 0.3453]) -Greedy action tensor([1.4869, 0.1052, 1.1079, 0.2811]) tensor([0.4474, 0.1124, 0.3063, 0.1340]) -Greedy action tensor([ 1.7651, -0.8418, 0.7733, 1.4720]) tensor([0.4565, 0.0337, 0.1693, 0.3405]) -Greedy action tensor([ 1.2174, -0.9110, 1.1298, 0.7431]) tensor([0.3763, 0.0448, 0.3447, 0.2342]) -Greedy action tensor([ 0.9988, -0.8778, 0.2972, 1.2589]) tensor([0.3395, 0.0520, 0.1683, 0.4403]) -Greedy action tensor([1.4083, 0.0901, 0.3385, 1.1262]) tensor([0.4228, 0.1132, 0.1451, 0.3189]) -Greedy action tensor([ 1.0623, -0.9553, 0.7497, 0.7049]) tensor([0.3900, 0.0519, 0.2853, 0.2728]) -Greedy action tensor([ 1.2714, -0.4318, 0.5439, 1.2235]) tensor([0.3819, 0.0695, 0.1845, 0.3640]) -Greedy action tensor([ 1.7499, -0.5624, 0.6038, 0.7688]) tensor([0.5581, 0.0553, 0.1774, 0.2092]) -Greedy action tensor([ 1.2609, 0.8258, -1.3657, 0.7331]) tensor([0.4330, 0.2802, 0.0313, 0.2554]) -Greedy action tensor([0.5820, 0.3566, 0.0317, 1.2562]) tensor([0.2306, 0.1840, 0.1330, 0.4525]) -Greedy action tensor([1.4707, 0.2569, 1.3273, 0.9973]) tensor([0.3589, 0.1066, 0.3109, 0.2235]) -Greedy action tensor([ 1.8327, -0.5362, 1.1292, 1.2737]) tensor([0.4629, 0.0433, 0.2291, 0.2647]) -Greedy action tensor([1.6381, 0.0751, 1.1990, 1.3269]) tensor([0.3866, 0.0810, 0.2492, 0.2832]) -Greedy action tensor([ 1.1878, 0.5433, -0.2925, 0.5722]) tensor([0.4361, 0.2290, 0.0993, 0.2357]) -Greedy action tensor([ 0.8496, -0.4126, 1.2021, 1.1122]) tensor([0.2496, 0.0707, 0.3551, 0.3246]) -Greedy action tensor([ 1.2684, -0.4230, 0.9633, 0.7216]) tensor([0.4000, 0.0737, 0.2948, 0.2315]) -Greedy action tensor([ 1.3223, 0.0293, -0.5116, 0.7967]) tensor([0.4937, 0.1355, 0.0789, 0.2919]) -Greedy action tensor([ 1.0097, -0.4633, 0.8525, 0.3297]) tensor([0.3860, 0.0885, 0.3299, 0.1956]) -Greedy action tensor([1.1446, 0.0253, 1.0887, 0.9571]) tensor([0.3224, 0.1053, 0.3049, 0.2673]) -Greedy action tensor([ 2.0218, -0.5892, 0.3937, 1.8434]) tensor([0.4747, 0.0349, 0.0932, 0.3972]) -Greedy action tensor([ 0.6167, -0.6277, -0.0667, 0.0479]) tensor([0.4239, 0.1221, 0.2140, 0.2400]) -Greedy action tensor([ 0.9205, -0.7112, -0.1093, 0.1220]) tensor([0.4993, 0.0977, 0.1783, 0.2247]) -Greedy action tensor([ 0.6048, -0.4910, 0.0901, -0.2343]) tensor([0.4230, 0.1414, 0.2528, 0.1828]) -Greedy action tensor([ 1.2119, -0.5169, -0.2281, -0.1328]) tensor([0.5970, 0.1060, 0.1414, 0.1556]) -Greedy action tensor([ 2.1024, -0.7692, -0.2998, 0.2146]) tensor([0.7701, 0.0436, 0.0697, 0.1166]) -Greedy action tensor([ 0.8911, -0.6215, -0.0114, 0.0968]) tensor([0.4813, 0.1060, 0.1952, 0.2175]) -Greedy action tensor([ 1.2806, -0.4969, -0.4168, -0.0483]) tensor([0.6184, 0.1046, 0.1133, 0.1637]) -Greedy action tensor([ 0.7999, -0.4380, -0.3412, 0.0949]) tensor([0.4754, 0.1379, 0.1519, 0.2349]) -Greedy action tensor([ 0.7689, 0.0775, -0.1886, 0.0272]) tensor([0.4235, 0.2121, 0.1626, 0.2017]) -Greedy action tensor([ 1.0856, 0.0422, -0.0540, -0.0800]) tensor([0.5040, 0.1776, 0.1613, 0.1571]) -Greedy action tensor([ 0.8113, -0.4872, -0.2398, 0.1622]) tensor([0.4662, 0.1272, 0.1630, 0.2436]) -Greedy action tensor([ 1.4927, -0.6263, -0.3060, 0.0611]) tensor([0.6559, 0.0788, 0.1086, 0.1567]) -Greedy action tensor([ 1.0630, -0.8405, -0.4687, 0.3346]) tensor([0.5412, 0.0807, 0.1170, 0.2612]) -Greedy action tensor([ 1.5546, -0.3781, -0.1610, -0.0711]) tensor([0.6573, 0.0952, 0.1182, 0.1293]) -Greedy action tensor([ 0.8027, -0.3292, -0.1778, 0.1386]) tensor([0.4520, 0.1457, 0.1696, 0.2327]) -Greedy action tensor([ 1.2135, -0.4721, -0.0683, -0.2218]) tensor([0.5879, 0.1090, 0.1632, 0.1399]) -Greedy action tensor([ 0.9818, -0.4531, -0.3609, 0.0279]) tensor([0.5306, 0.1264, 0.1386, 0.2044]) -Greedy action tensor([ 0.5697, -0.3619, 0.0122, -0.0675]) tensor([0.4007, 0.1579, 0.2295, 0.2119]) -Greedy action tensor([ 0.9265, 0.1173, -0.0624, -0.0844]) tensor([0.4585, 0.2041, 0.1706, 0.1668]) -Greedy action tensor([ 0.7793, -0.5904, -0.3447, 0.1018]) tensor([0.4792, 0.1218, 0.1557, 0.2433]) -Greedy action tensor([ 0.7730, -0.3424, -0.0467, 0.0926]) tensor([0.4396, 0.1441, 0.1937, 0.2226]) -Greedy action tensor([ 1.1817, -0.3972, -0.3016, -0.2376]) tensor([0.5970, 0.1231, 0.1355, 0.1444]) -Greedy action tensor([ 1.0600, -0.3514, -0.1468, -0.2120]) tensor([0.5485, 0.1337, 0.1641, 0.1537]) -Greedy action tensor([ 1.2563, -0.4946, -0.2378, 0.2354]) tensor([0.5687, 0.0987, 0.1277, 0.2049]) -Greedy action tensor([ 1.4050, -0.5277, 0.0047, 0.1534]) tensor([0.5962, 0.0863, 0.1470, 0.1705]) -Greedy action tensor([ 0.8694, -0.2793, -0.0945, -0.2141]) tensor([0.4910, 0.1557, 0.1873, 0.1661]) -Greedy action tensor([ 1.4498, -0.4067, -0.3270, -0.1835]) tensor([0.6576, 0.1027, 0.1113, 0.1284]) -Greedy action tensor([ 0.9469, -0.4984, -0.2548, 0.1655]) tensor([0.5015, 0.1182, 0.1508, 0.2295]) -Greedy action tensor([ 0.9733, -0.5481, -0.1586, 0.2215]) tensor([0.4969, 0.1085, 0.1602, 0.2343]) -Greedy action tensor([ 1.6830, -0.6073, -0.3366, 0.3722]) tensor([0.6651, 0.0673, 0.0883, 0.1793]) -Greedy action tensor([ 1.0628, -0.5147, -0.0362, 0.1431]) tensor([0.5159, 0.1065, 0.1719, 0.2057]) -Greedy action tensor([ 1.7576, -0.5347, -0.2647, 0.2612]) tensor([0.6862, 0.0693, 0.0908, 0.1537]) -Greedy action tensor([ 1.1846, -0.5117, -0.1581, -0.0569]) tensor([0.5769, 0.1058, 0.1506, 0.1667]) -Greedy action tensor([ 0.8997, -0.5445, -0.2542, 0.0264]) tensor([0.5079, 0.1198, 0.1602, 0.2121]) -Greedy action tensor([ 1.5399, -0.6285, -0.4864, -0.0252]) tensor([0.6872, 0.0786, 0.0906, 0.1437]) -Greedy action tensor([ 1.2676, -0.4660, -0.3702, -0.0851]) tensor([0.6136, 0.1084, 0.1193, 0.1587]) -Greedy action tensor([ 0.9597, -0.4836, -0.1304, 0.0967]) tensor([0.5014, 0.1184, 0.1686, 0.2116]) -Greedy action tensor([ 1.3598, -0.3552, -0.3506, -0.1035]) tensor([0.6280, 0.1130, 0.1136, 0.1454]) -Greedy action tensor([ 0.4878, -0.3713, -0.0108, -0.1479]) tensor([0.3905, 0.1654, 0.2372, 0.2068]) -Greedy action tensor([ 1.0682, -0.3853, -0.2323, -0.1768]) tensor([0.5574, 0.1303, 0.1518, 0.1605]) -Greedy action tensor([ 0.9713, -0.3743, -0.2057, -0.1358]) tensor([0.5266, 0.1371, 0.1623, 0.1740]) -Greedy action tensor([ 1.1234, 0.0409, -0.1591, -0.1225]) tensor([0.5253, 0.1779, 0.1457, 0.1511]) -Greedy action tensor([ 0.6144, -0.3075, -0.1534, -0.0355]) tensor([0.4195, 0.1669, 0.1947, 0.2190]) -Greedy action tensor([ 1.0906, -0.6308, -0.0872, 0.1739]) tensor([0.5300, 0.0948, 0.1632, 0.2119]) -Greedy action tensor([ 1.0180, -0.1991, 0.1717, -0.1791]) tensor([0.4933, 0.1461, 0.2116, 0.1490]) -Greedy action tensor([ 0.9254, -0.3509, -0.1402, 0.1025]) tensor([0.4848, 0.1353, 0.1670, 0.2129]) -Greedy action tensor([ 0.4755, -0.4668, -0.2229, 0.0309]) tensor([0.3955, 0.1542, 0.1967, 0.2536]) -Greedy action tensor([ 0.7195, -0.6859, -0.3641, 0.0635]) tensor([0.4756, 0.1167, 0.1609, 0.2468]) -Greedy action tensor([ 1.2947, -0.4198, -0.0172, 0.2693]) tensor([0.5531, 0.0996, 0.1490, 0.1984]) -Greedy action tensor([ 1.1490, -0.5894, -0.3565, 0.1544]) tensor([0.5657, 0.0995, 0.1255, 0.2093]) -Greedy action tensor([ 1.0439, -0.4298, -0.3082, -0.1969]) tensor([0.5628, 0.1289, 0.1456, 0.1627]) -Greedy action tensor([ 0.7512, -0.4769, 0.1406, -0.1355]) tensor([0.4448, 0.1303, 0.2416, 0.1833]) -Greedy action tensor([ 0.5329, -0.4715, -0.0574, 0.0294]) tensor([0.3961, 0.1451, 0.2195, 0.2394]) -Greedy action tensor([ 1.2363, -0.4348, -0.1905, -0.2013]) tensor([0.6004, 0.1129, 0.1441, 0.1426]) -Greedy action tensor([ 0.8035, -0.2918, -0.2729, 0.0468]) tensor([0.4663, 0.1559, 0.1589, 0.2188]) -Greedy action tensor([ 0.5894, -0.4368, 0.0833, -0.0404]) tensor([0.4010, 0.1437, 0.2417, 0.2136]) -Greedy action tensor([ 1.4606, -0.5799, -0.3543, -0.0802]) tensor([0.6636, 0.0862, 0.1081, 0.1421]) -Greedy action tensor([ 1.0868, -0.0664, 0.0550, -0.1750]) tensor([0.5115, 0.1614, 0.1823, 0.1448]) -Greedy action tensor([ 0.9652, -0.0554, 0.1040, 0.0513]) tensor([0.4579, 0.1650, 0.1935, 0.1836]) -Greedy action tensor([ 0.8523, -0.0901, 0.1813, -0.1025]) tensor([0.4375, 0.1705, 0.2236, 0.1684]) -Greedy action tensor([ 0.9034, -0.5676, -0.0919, 0.1739]) tensor([0.4804, 0.1104, 0.1776, 0.2316]) -Greedy action tensor([ 1.5887, -0.1667, -0.2796, -0.1657]) tensor([0.6666, 0.1152, 0.1029, 0.1153]) -Greedy action tensor([ 1.4147, -0.4678, -0.5695, -0.1154]) tensor([0.6639, 0.1011, 0.0913, 0.1438]) -Greedy action tensor([ 0.8186, -0.0977, 0.0665, -0.1082]) tensor([0.4411, 0.1764, 0.2079, 0.1746]) -Greedy action tensor([ 0.8199, -0.2869, 0.1739, -0.2230]) tensor([0.4531, 0.1498, 0.2375, 0.1597]) -Greedy action tensor([ 1.2921, -0.5643, -0.4487, 0.1459]) tensor([0.6063, 0.0947, 0.1063, 0.1927]) -Greedy action tensor([ 1.1022, -0.4351, -0.2467, -0.0559]) tensor([0.5591, 0.1202, 0.1451, 0.1756]) -Greedy action tensor([ 1.8201, -0.6975, -0.5791, 0.3412]) tensor([0.7146, 0.0576, 0.0649, 0.1629]) -Greedy action tensor([ 0.7226, -0.5217, 0.0519, 0.0338]) tensor([0.4345, 0.1252, 0.2222, 0.2182]) -Greedy action tensor([ 1.3613, -0.6504, -0.2156, 0.0613]) tensor([0.6200, 0.0829, 0.1281, 0.1690]) -Greedy action tensor([ 0.9433, -0.1635, 0.0800, -0.1341]) tensor([0.4778, 0.1580, 0.2015, 0.1627]) -Greedy action tensor([ 1.2466, -0.2920, -0.3520, -0.1212]) tensor([0.5982, 0.1284, 0.1210, 0.1524]) -Greedy action tensor([ 1.0830, -0.2305, -0.2148, 0.0527]) tensor([0.5266, 0.1416, 0.1438, 0.1879]) -Greedy action tensor([ 1.3735, -0.4978, -0.2679, 0.5524]) tensor([0.5594, 0.0861, 0.1084, 0.2461]) -Greedy action tensor([ 1.3235, -0.6573, -0.0112, 0.2036]) tensor([0.5789, 0.0799, 0.1524, 0.1889]) -Greedy action tensor([ 0.8893, -0.3936, -0.2431, 0.0931]) tensor([0.4877, 0.1352, 0.1572, 0.2200]) -Greedy action tensor([ 0.8897, -0.5639, -0.2751, -0.0742]) tensor([0.5189, 0.1213, 0.1619, 0.1979]) -Greedy action tensor([ 0.8676, -0.5081, 0.1129, 0.0183]) tensor([0.4650, 0.1175, 0.2186, 0.1989]) -Greedy action tensor([ 1.4911, -0.6157, -0.1832, 0.1621]) tensor([0.6354, 0.0773, 0.1191, 0.1682]) -Greedy action tensor([ 1.7850, -0.5457, -0.2605, 0.4011]) tensor([0.6770, 0.0658, 0.0875, 0.1697]) -Greedy action tensor([ 0.7526, -0.5666, -0.1533, 0.1340]) tensor([0.4524, 0.1210, 0.1829, 0.2437]) -Greedy action tensor([ 0.2181, 0.1013, 0.1761, -0.1829]) tensor([0.2842, 0.2529, 0.2725, 0.1903]) -Greedy action tensor([ 0.3928, 0.1500, 0.1757, -0.0871]) tensor([0.3117, 0.2445, 0.2509, 0.1929]) -Greedy action tensor([ 1.0200, -0.4552, 0.0512, -0.7019]) tensor([0.5596, 0.1280, 0.2124, 0.1000]) -Greedy action tensor([ 0.3225, 0.0597, 0.1519, -0.3849]) tensor([0.3221, 0.2476, 0.2715, 0.1587]) -Greedy action tensor([ 0.7960, -0.2736, -0.1909, -0.5016]) tensor([0.5027, 0.1725, 0.1874, 0.1373]) -Greedy action tensor([ 0.1377, -0.0820, 0.1624, -0.0029]) tensor([0.2705, 0.2172, 0.2773, 0.2350]) -Greedy action tensor([ 0.3358, -0.1098, 0.1510, -0.3589]) tensor([0.3366, 0.2156, 0.2798, 0.1680]) -Greedy action tensor([ 0.3715, 0.1358, 0.1221, -0.2285]) tensor([0.3207, 0.2534, 0.2499, 0.1760]) -Greedy action tensor([ 0.2774, -0.0717, 0.0091, -0.2308]) tensor([0.3256, 0.2296, 0.2489, 0.1959]) -Greedy action tensor([ 1.2779, -0.7933, 0.0122, -0.8275]) tensor([0.6536, 0.0824, 0.1844, 0.0796]) -Greedy action tensor([ 0.7664, -0.2525, -0.0565, -0.5982]) tensor([0.4865, 0.1756, 0.2136, 0.1243]) -Greedy action tensor([ 0.5735, -0.0394, 0.0715, -0.2951]) tensor([0.3896, 0.2111, 0.2358, 0.1635]) -Greedy action tensor([ 0.3313, -0.1776, 0.1318, -0.2978]) tensor([0.3386, 0.2035, 0.2774, 0.1805]) -Greedy action tensor([ 0.4734, 0.1468, 0.1816, -0.1442]) tensor([0.3325, 0.2399, 0.2483, 0.1793]) -Greedy action tensor([ 0.2860, -0.0409, 0.0364, -0.1465]) tensor([0.3176, 0.2290, 0.2474, 0.2061]) -Greedy action tensor([ 0.3324, 0.0575, 0.2503, -0.1788]) tensor([0.3048, 0.2316, 0.2808, 0.1828]) -Greedy action tensor([ 0.4113, -0.0484, 0.1746, -0.2532]) tensor([0.3407, 0.2151, 0.2689, 0.1753]) -Greedy action tensor([ 0.6865, -0.2699, 0.0269, -0.6685]) tensor([0.4631, 0.1780, 0.2395, 0.1195]) -Greedy action tensor([ 0.2440, -0.0445, 0.2154, -0.2832]) tensor([0.3020, 0.2263, 0.2935, 0.1783]) -Greedy action tensor([ 0.3605, 0.0232, 0.1720, -0.0177]) tensor([0.3099, 0.2212, 0.2567, 0.2123]) -Greedy action tensor([ 0.3655, -0.0036, 0.2176, -0.1075]) tensor([0.3148, 0.2176, 0.2715, 0.1961]) -Greedy action tensor([ 0.7424, -0.3790, -0.1083, -0.4948]) tensor([0.4895, 0.1595, 0.2090, 0.1420]) -Greedy action tensor([ 0.8060, -0.4181, 0.0614, -0.6610]) tensor([0.5001, 0.1470, 0.2375, 0.1153]) -Greedy action tensor([ 0.3046, 0.1505, 0.0255, -0.2925]) tensor([0.3160, 0.2709, 0.2391, 0.1739]) -Greedy action tensor([ 0.6486, -0.1835, -0.0429, -0.4070]) tensor([0.4378, 0.1905, 0.2193, 0.1524]) -Greedy action tensor([ 0.2506, -0.0263, 0.0564, -0.2774]) tensor([0.3153, 0.2391, 0.2597, 0.1860]) -Greedy action tensor([ 0.2474, -0.1191, 0.1486, -0.3110]) tensor([0.3153, 0.2186, 0.2857, 0.1804]) -Greedy action tensor([ 1.1381, -0.6028, 0.0481, -0.6396]) tensor([0.5950, 0.1044, 0.2001, 0.1006]) -Greedy action tensor([ 0.8575, -0.5585, 0.0133, -0.5410]) tensor([0.5210, 0.1264, 0.2240, 0.1287]) -Greedy action tensor([ 0.7089, -0.2018, 0.0392, -0.4384]) tensor([0.4481, 0.1803, 0.2294, 0.1423]) -Greedy action tensor([ 0.5548, -0.1949, 0.0138, -0.3531]) tensor([0.4068, 0.1922, 0.2368, 0.1641]) -Greedy action tensor([ 0.4697, -0.0626, -0.0028, -0.3291]) tensor([0.3759, 0.2207, 0.2343, 0.1691]) -Greedy action tensor([ 0.6727, -0.1983, 0.0651, -0.1680]) tensor([0.4176, 0.1748, 0.2274, 0.1802]) -Greedy action tensor([ 0.6133, -0.2021, -0.0109, -0.3356]) tensor([0.4228, 0.1871, 0.2265, 0.1637]) -Greedy action tensor([ 0.3517, -0.1026, 0.1746, -0.1481]) tensor([0.3248, 0.2062, 0.2720, 0.1970]) -Greedy action tensor([ 0.3058, 0.1635, -0.0095, -0.0804]) tensor([0.3052, 0.2647, 0.2227, 0.2074]) -Greedy action tensor([ 0.2416, 0.0593, 0.2734, -0.0787]) tensor([0.2784, 0.2320, 0.2874, 0.2021]) -Greedy action tensor([ 0.4110, 0.1489, 0.1022, -0.1306]) tensor([0.3241, 0.2494, 0.2380, 0.1886]) -Greedy action tensor([ 0.3967, 0.0117, -0.0278, -0.2504]) tensor([0.3499, 0.2381, 0.2289, 0.1832]) -Greedy action tensor([ 0.3750, -0.0072, 0.0534, -0.1266]) tensor([0.3319, 0.2265, 0.2406, 0.2010]) -Greedy action tensor([ 0.3867, -0.0371, 0.1281, -0.2533]) tensor([0.3385, 0.2216, 0.2614, 0.1785]) -Greedy action tensor([ 0.4621, -0.1423, 0.1901, -0.4721]) tensor([0.3702, 0.2023, 0.2821, 0.1455]) -Greedy action tensor([ 0.2953, 0.2125, 0.0655, -0.2733]) tensor([0.3047, 0.2805, 0.2422, 0.1726]) -Greedy action tensor([ 0.3858, -0.0960, 0.2210, -0.1436]) tensor([0.3274, 0.2022, 0.2776, 0.1928]) -Greedy action tensor([ 0.2523, 0.0263, 0.1031, -0.0160]) tensor([0.2921, 0.2330, 0.2516, 0.2234]) -Greedy action tensor([ 0.2466, 0.0349, 0.0841, -0.1788]) tensor([0.3019, 0.2443, 0.2566, 0.1973]) -Greedy action tensor([ 0.3684, 0.0507, 0.1680, -0.2765]) tensor([0.3256, 0.2370, 0.2665, 0.1709]) -Greedy action tensor([ 0.4557, -0.2161, 0.1713, -0.5434]) tensor([0.3800, 0.1941, 0.2860, 0.1399]) -Greedy action tensor([ 0.3475, 0.1291, 0.1894, -0.1121]) tensor([0.3040, 0.2444, 0.2596, 0.1920]) -Greedy action tensor([ 0.3709, 0.0864, 0.2010, -0.0499]) tensor([0.3074, 0.2313, 0.2594, 0.2018]) -Greedy action tensor([ 0.1656, 0.1383, 0.2004, -0.2222]) tensor([0.2712, 0.2639, 0.2808, 0.1840]) -Greedy action tensor([ 0.3991, 0.1212, 0.1197, -0.1768]) tensor([0.3251, 0.2462, 0.2459, 0.1828]) -Greedy action tensor([ 0.3495, 0.0374, 0.1344, -0.0527]) tensor([0.3118, 0.2282, 0.2514, 0.2085]) -Greedy action tensor([ 0.3025, 0.1224, 0.2129, -0.1885]) tensor([0.2975, 0.2485, 0.2720, 0.1821]) -Greedy action tensor([ 0.7931, -0.4048, -0.0996, -0.5130]) tensor([0.5045, 0.1523, 0.2066, 0.1366]) -Greedy action tensor([ 0.2953, -0.0029, 0.0070, -0.2453]) tensor([0.3253, 0.2414, 0.2438, 0.1895]) -Greedy action tensor([ 0.4600, -0.0180, 0.1888, -0.3711]) tensor([0.3549, 0.2200, 0.2706, 0.1546]) -Greedy action tensor([ 0.3811, 0.1635, 0.1030, -0.2547]) tensor([0.3235, 0.2602, 0.2450, 0.1713]) -Greedy action tensor([ 0.3994, 0.0302, 0.2231, -0.3239]) tensor([0.3317, 0.2293, 0.2781, 0.1609]) -Greedy action tensor([ 0.5997, -0.0182, 0.0414, -0.2927]) tensor([0.3967, 0.2138, 0.2270, 0.1625]) -Greedy action tensor([ 0.5626, -0.1099, -0.0380, -0.3330]) tensor([0.4053, 0.2069, 0.2223, 0.1655]) -Greedy action tensor([ 0.3929, 0.1540, 0.1212, -0.2313]) tensor([0.3241, 0.2552, 0.2470, 0.1736]) -Greedy action tensor([ 0.2354, 0.1879, 0.0991, -0.2802]) tensor([0.2921, 0.2786, 0.2549, 0.1744]) -Greedy action tensor([ 0.4833, -0.0657, 0.0637, -0.4178]) tensor([0.3787, 0.2187, 0.2489, 0.1538]) -Greedy action tensor([ 0.3818, 0.1000, 0.1045, -0.0582]) tensor([0.3168, 0.2390, 0.2401, 0.2040]) -Greedy action tensor([ 1.1144, -0.5638, 0.1213, -0.5811]) tensor([0.5745, 0.1073, 0.2128, 0.1054]) -Greedy action tensor([ 0.2832, 0.2130, 0.1568, -0.1961]) tensor([0.2913, 0.2716, 0.2567, 0.1804]) -Greedy action tensor([ 0.7053, -0.1544, -0.0815, -0.3378]) tensor([0.4482, 0.1897, 0.2041, 0.1579]) -Greedy action tensor([ 0.2659, 0.0920, 0.0903, -0.2256]) tensor([0.3039, 0.2554, 0.2549, 0.1859]) -Greedy action tensor([ 0.4708, -0.1001, 0.0656, -0.2237]) tensor([0.3661, 0.2069, 0.2442, 0.1828]) -Greedy action tensor([ 0.5005, -0.1814, 0.0361, -0.3302]) tensor([0.3891, 0.1968, 0.2446, 0.1696]) -Greedy action tensor([ 1.1239, -0.8920, 0.0186, -0.7189]) tensor([0.6163, 0.0821, 0.2040, 0.0976]) -Greedy action tensor([ 0.5124, -0.0913, 0.0883, -0.4787]) tensor([0.3887, 0.2126, 0.2544, 0.1443]) -Greedy action tensor([ 0.4526, -0.0838, 0.0772, -0.2973]) tensor([0.3644, 0.2131, 0.2503, 0.1721]) -Greedy action tensor([ 0.2623, 0.0268, 0.0346, -0.3018]) tensor([0.3169, 0.2504, 0.2524, 0.1803]) -Greedy action tensor([ 0.6401, -0.1495, -0.1319, -0.4207]) tensor([0.4420, 0.2007, 0.2042, 0.1530]) -Greedy action tensor([ 1.2587, -0.8804, 0.0162, -0.7171]) tensor([0.6472, 0.0762, 0.1868, 0.0897]) -Greedy action tensor([ 0.5951, -0.2718, 0.1159, -0.5201]) tensor([0.4224, 0.1775, 0.2616, 0.1385]) -Greedy action tensor([ 0.5365, -0.1219, -0.0425, -0.4119]) tensor([0.4056, 0.2100, 0.2273, 0.1571]) -Greedy action tensor([ 0.2908, 0.0785, 0.1077, -0.3272]) tensor([0.3144, 0.2543, 0.2618, 0.1695]) -Greedy action tensor([ 0.5550, -0.2145, 0.0301, -0.4886]) tensor([0.4155, 0.1924, 0.2458, 0.1463]) -Greedy action tensor([-1.9865, -0.9484, 0.3111, -0.2599]) tensor([0.0516, 0.1456, 0.5130, 0.2898]) -Greedy action tensor([-1.7810, -0.9888, 0.1458, -0.5275]) tensor([0.0736, 0.1626, 0.5058, 0.2580]) -Greedy action tensor([-1.8696, -0.5773, 0.2161, -0.2822]) tensor([0.0569, 0.2071, 0.4578, 0.2782]) -Greedy action tensor([-1.6165, -0.5171, 0.6543, -0.1763]) tensor([0.0558, 0.1676, 0.5408, 0.2357]) -Greedy action tensor([-0.6123, -0.4351, 0.2195, 0.1183]) tensor([0.1523, 0.1818, 0.3498, 0.3162]) -Greedy action tensor([-1.8127, -0.8736, -0.0078, -0.3911]) tensor([0.0726, 0.1856, 0.4412, 0.3007]) -Greedy action tensor([-1.5513, -0.4974, 0.5732, 0.2636]) tensor([0.0544, 0.1561, 0.4554, 0.3341]) -Greedy action tensor([-2.0253, -0.8959, 0.6420, -0.1263]) tensor([0.0397, 0.1229, 0.5721, 0.2653]) -Greedy action tensor([-0.8843, 0.4257, -0.3459, 0.1140]) tensor([0.1095, 0.4058, 0.1876, 0.2971]) -Greedy action tensor([-1.4971, -0.1612, 0.6139, -0.4447]) tensor([0.0628, 0.2388, 0.5185, 0.1799]) -Greedy action tensor([-1.9008, -0.8867, 0.7022, -0.0246]) tensor([0.0420, 0.1159, 0.5676, 0.2744]) -Greedy action tensor([-1.5302, -0.5218, 0.4313, 0.0977]) tensor([0.0627, 0.1719, 0.4459, 0.3194]) -Greedy action tensor([-1.6952, -0.7182, 0.0971, -0.4896]) tensor([0.0769, 0.2044, 0.4618, 0.2568]) -Greedy action tensor([-1.3839, -0.4477, 0.5778, -0.3626]) tensor([0.0744, 0.1898, 0.5292, 0.2066]) -Greedy action tensor([-0.9995, -0.2996, -0.4253, -0.2693]) tensor([0.1457, 0.2933, 0.2587, 0.3023]) -Greedy action tensor([-1.5609, -0.5261, 0.7564, -0.1965]) tensor([0.0559, 0.1574, 0.5677, 0.2189]) -Greedy action tensor([-1.9324, -0.8079, 0.7540, 0.0806]) tensor([0.0381, 0.1173, 0.5593, 0.2853]) -Greedy action tensor([-1.9939, -0.8054, 0.6023, 0.2109]) tensor([0.0374, 0.1226, 0.5012, 0.3388]) -Greedy action tensor([-1.4444, 0.1454, 0.5539, -0.4286]) tensor([0.0623, 0.3056, 0.4599, 0.1722]) -Greedy action tensor([-1.1807, -0.7565, 0.7079, 0.1990]) tensor([0.0763, 0.1166, 0.5041, 0.3031]) -Greedy action tensor([-1.6690, -0.6605, 0.0957, -0.2116]) tensor([0.0721, 0.1976, 0.4208, 0.3095]) -Greedy action tensor([-1.4828, -0.6387, 0.7337, 0.3671]) tensor([0.0530, 0.1233, 0.4865, 0.3372]) -Greedy action tensor([-1.8726, -0.6589, 0.8104, 0.2439]) tensor([0.0366, 0.1233, 0.5359, 0.3041]) -Greedy action tensor([-1.3732, -0.7469, 0.1637, -0.2984]) tensor([0.0957, 0.1790, 0.4450, 0.2803]) -Greedy action tensor([-0.6145, 0.1548, -0.1726, 0.0299]) tensor([0.1511, 0.3261, 0.2350, 0.2878]) -Greedy action tensor([-1.8080, -0.6713, 0.7228, 0.0614]) tensor([0.0432, 0.1345, 0.5424, 0.2799]) -Greedy action tensor([-1.2278, -0.5612, 0.3359, 0.1305]) tensor([0.0861, 0.1677, 0.4113, 0.3349]) -Greedy action tensor([-1.8533, -0.8072, 0.2642, -0.2212]) tensor([0.0579, 0.1648, 0.4812, 0.2961]) -Greedy action tensor([-1.6710, -0.9942, 0.0234, -0.8827]) tensor([0.0942, 0.1854, 0.5130, 0.2073]) -Greedy action tensor([-0.8776, -0.5859, 0.1748, 0.3323]) tensor([0.1169, 0.1565, 0.3348, 0.3919]) -Greedy action tensor([-1.3471, -0.6218, 0.3788, 0.1353]) tensor([0.0764, 0.1578, 0.4293, 0.3365]) -Greedy action tensor([-1.3770, 0.2462, 0.6313, -0.6067]) tensor([0.0638, 0.3233, 0.4752, 0.1378]) -Greedy action tensor([-1.3005, -0.5656, 0.3317, 0.1570]) tensor([0.0800, 0.1669, 0.4094, 0.3437]) -Greedy action tensor([-1.9837, -0.9599, 0.4604, -0.2150]) tensor([0.0472, 0.1315, 0.5442, 0.2770]) -Greedy action tensor([-1.8185, -0.4779, 0.6203, -0.0461]) tensor([0.0451, 0.1724, 0.5170, 0.2655]) -Greedy action tensor([-1.0940, 0.0717, 0.2065, -0.4651]) tensor([0.1025, 0.3289, 0.3764, 0.1923]) -Greedy action tensor([-1.9170, -0.6780, 0.4316, -0.1357]) tensor([0.0479, 0.1655, 0.5020, 0.2846]) -Greedy action tensor([-1.1986, -0.5781, 0.5990, -0.1074]) tensor([0.0842, 0.1566, 0.5083, 0.2508]) -Greedy action tensor([-2.0564, -0.8423, 0.6457, 0.0290]) tensor([0.0366, 0.1232, 0.5457, 0.2945]) -Greedy action tensor([-1.9844, -0.6891, 1.0087, 0.2548]) tensor([0.0294, 0.1075, 0.5869, 0.2762]) -Greedy action tensor([-0.9774, -0.6003, 0.4766, 0.9445]) tensor([0.0737, 0.1074, 0.3154, 0.5035]) -Greedy action tensor([-1.5496, 0.1869, 0.6488, -0.5392]) tensor([0.0542, 0.3080, 0.4888, 0.1490]) -Greedy action tensor([-2.0202, -0.6983, 0.7839, 0.1044]) tensor([0.0337, 0.1266, 0.5572, 0.2825]) -Greedy action tensor([-1.7705, -0.7813, 0.0629, -0.3700]) tensor([0.0714, 0.1921, 0.4467, 0.2898]) -Greedy action tensor([-0.7667, 0.3608, 0.1320, -0.1159]) tensor([0.1182, 0.3650, 0.2903, 0.2266]) -Greedy action tensor([-1.9263, -0.6462, 0.6067, 0.2018]) tensor([0.0391, 0.1406, 0.4921, 0.3282]) -Greedy action tensor([-2.0125, -0.7253, 0.7066, 0.0299]) tensor([0.0364, 0.1317, 0.5515, 0.2803]) -Greedy action tensor([-2.0185, -0.9218, 0.4014, -0.2191]) tensor([0.0470, 0.1407, 0.5283, 0.2841]) -Greedy action tensor([-1.1756, -0.6134, 0.3141, 0.2092]) tensor([0.0894, 0.1569, 0.3966, 0.3571]) -Greedy action tensor([-1.7741, -0.6480, 0.2238, -0.2599]) tensor([0.0625, 0.1927, 0.4608, 0.2841]) -Greedy action tensor([-2.0593, -0.8469, 0.7693, 0.0813]) tensor([0.0336, 0.1128, 0.5681, 0.2855]) -Greedy action tensor([-1.9697, -0.5452, 0.7155, -0.1083]) tensor([0.0381, 0.1583, 0.5585, 0.2451]) -Greedy action tensor([-1.9370, -0.9285, 0.3422, -0.1907]) tensor([0.0520, 0.1425, 0.5077, 0.2979]) -Greedy action tensor([-1.9600, -0.6128, 0.7110, -0.1138]) tensor([0.0390, 0.1500, 0.5638, 0.2471]) -Greedy action tensor([-2.0167, -0.8526, 0.5587, -0.0026]) tensor([0.0403, 0.1290, 0.5290, 0.3018]) -Greedy action tensor([-1.9756, -0.9897, 0.4119, -0.1692]) tensor([0.0484, 0.1298, 0.5270, 0.2948]) -Greedy action tensor([-1.1509, 0.0489, 0.4889, -0.5161]) tensor([0.0880, 0.2922, 0.4537, 0.1661]) -Greedy action tensor([-1.1218, -0.0178, 0.6133, -0.6227]) tensor([0.0882, 0.2661, 0.5003, 0.1454]) -Greedy action tensor([-1.1598, -0.5300, -0.1196, -0.3070]) tensor([0.1242, 0.2331, 0.3514, 0.2913]) -Greedy action tensor([-1.5417, -0.3097, 0.4997, -0.1579]) tensor([0.0620, 0.2127, 0.4778, 0.2475]) -Greedy action tensor([-1.0002, -0.0021, -0.2505, 0.1531]) tensor([0.1111, 0.3015, 0.2352, 0.3522]) -Greedy action tensor([-1.3475, -0.1989, 0.1334, -0.4266]) tensor([0.0904, 0.2851, 0.3975, 0.2270]) -Greedy action tensor([-0.9859, -0.5853, 0.3170, 0.4048]) tensor([0.0981, 0.1465, 0.3611, 0.3943]) -Greedy action tensor([-1.2842, -0.4047, 0.4299, -0.1688]) tensor([0.0832, 0.2006, 0.4622, 0.2540]) -Greedy action tensor([-1.2099, -0.0569, -0.4062, -0.2972]) tensor([0.1125, 0.3562, 0.2512, 0.2801]) -Greedy action tensor([-1.0046, -0.4635, -0.4396, -0.4521]) tensor([0.1609, 0.2764, 0.2831, 0.2796]) -Greedy action tensor([-1.6307, -0.5975, 0.7231, 0.2541]) tensor([0.0478, 0.1343, 0.5031, 0.3148]) -Greedy action tensor([-1.2251, -0.5835, 0.3106, 0.2062]) tensor([0.0853, 0.1620, 0.3960, 0.3568]) -Greedy action tensor([-1.9929, -0.9234, 0.4966, -0.2623]) tensor([0.0463, 0.1348, 0.5578, 0.2611]) -Greedy action tensor([-1.0907, -0.5920, 0.5112, -0.3249]) tensor([0.1025, 0.1687, 0.5085, 0.2204]) -Greedy action tensor([-1.0629, 0.0710, -0.3491, -0.1250]) tensor([0.1149, 0.3570, 0.2346, 0.2935]) -Greedy action tensor([-1.4272, -0.5017, 0.1980, 0.3875]) tensor([0.0678, 0.1711, 0.3446, 0.4165]) -Greedy action tensor([-0.9210, 0.2102, -0.3079, 0.0544]) tensor([0.1163, 0.3605, 0.2147, 0.3085]) -Greedy action tensor([-0.9576, -0.5968, 0.1775, 0.3668]) tensor([0.1075, 0.1541, 0.3343, 0.4041]) -Greedy action tensor([-1.6795, -0.8211, -0.1422, -0.4571]) tensor([0.0877, 0.2068, 0.4078, 0.2977]) -Greedy action tensor([-0.8601, 0.0095, -0.4961, 0.0644]) tensor([0.1361, 0.3248, 0.1959, 0.3431]) -Greedy action tensor([-1.4190, -0.6298, 0.5117, 0.2811]) tensor([0.0642, 0.1414, 0.4428, 0.3516]) -Greedy action tensor([-1.3768, -0.6056, 0.3586, 0.1786]) tensor([0.0737, 0.1593, 0.4179, 0.3491]) -Greedy action tensor([-0.8398, -0.5713, 0.1687, 0.3276]) tensor([0.1210, 0.1583, 0.3318, 0.3889]) -Greedy action tensor([-0.5653, 0.1254, -0.0295, 0.1592]) tensor([0.1478, 0.2948, 0.2525, 0.3049]) -Greedy action tensor([-1.8794, -0.9298, 0.2764, -0.3046]) tensor([0.0587, 0.1516, 0.5065, 0.2833]) -Greedy action tensor([ 1.1676, -0.1849, 1.3560, 0.5437]) tensor([0.3331, 0.0862, 0.4022, 0.1785]) -Greedy action tensor([1.3801, 0.3552, 0.1465, 1.2835]) tensor([0.3909, 0.1403, 0.1139, 0.3549]) -Greedy action tensor([ 1.5977, -0.8797, 1.0751, 0.8098]) tensor([0.4691, 0.0394, 0.2782, 0.2133]) -Greedy action tensor([ 1.4501, -0.8790, 0.2947, 0.9877]) tensor([0.4897, 0.0477, 0.1542, 0.3084]) -Greedy action tensor([ 1.5783, -0.2606, 0.5870, 1.2403]) tensor([0.4458, 0.0709, 0.1654, 0.3179]) -Greedy action tensor([ 1.3852, -0.4466, -0.7095, 0.6297]) tensor([0.5704, 0.0913, 0.0702, 0.2680]) -Greedy action tensor([1.3634, 0.1622, 0.2922, 0.1130]) tensor([0.5182, 0.1559, 0.1775, 0.1484]) -Greedy action tensor([ 1.9043, -0.1633, 1.0188, 1.3976]) tensor([0.4670, 0.0591, 0.1926, 0.2813]) -Greedy action tensor([ 1.8302, -0.7826, 0.7060, 1.6111]) tensor([0.4542, 0.0333, 0.1476, 0.3649]) -Greedy action tensor([ 0.4379, -1.1137, 0.8255, 0.5305]) tensor([0.2644, 0.0560, 0.3896, 0.2900]) -Greedy action tensor([ 0.8516, -0.1666, 0.9443, 1.1130]) tensor([0.2662, 0.0961, 0.2920, 0.3457]) -Greedy action tensor([ 1.1358, -0.0391, -0.4271, 0.1091]) tensor([0.5329, 0.1646, 0.1117, 0.1909]) -Greedy action tensor([ 2.1265, -0.0486, 0.6888, 1.0156]) tensor([0.5951, 0.0676, 0.1413, 0.1960]) -Greedy action tensor([ 1.1059, -0.4574, 0.6915, 0.9646]) tensor([0.3652, 0.0765, 0.2413, 0.3171]) -Greedy action tensor([ 2.0422, -0.5570, 0.8627, 0.9632]) tensor([0.5808, 0.0432, 0.1786, 0.1974]) -Greedy action tensor([ 1.6214, 0.1319, -0.0532, 1.1758]) tensor([0.4870, 0.1098, 0.0913, 0.3119]) -Greedy action tensor([ 1.5404, 0.0336, -0.1623, 0.8944]) tensor([0.5187, 0.1150, 0.0945, 0.2719]) -Greedy action tensor([ 1.0948, -0.3508, 0.3250, 1.0218]) tensor([0.3805, 0.0896, 0.1762, 0.3537]) -Greedy action tensor([ 1.4718, -0.2416, 1.2505, 0.9691]) tensor([0.3866, 0.0697, 0.3098, 0.2339]) -Greedy action tensor([ 0.3384, -0.1556, 0.6224, 1.0233]) tensor([0.2032, 0.1240, 0.2699, 0.4030]) -Greedy action tensor([ 1.2404, -0.4142, 0.6585, 0.8782]) tensor([0.4088, 0.0782, 0.2285, 0.2846]) -Greedy action tensor([ 1.9145, -1.2447, 0.4624, 1.1552]) tensor([0.5732, 0.0243, 0.1342, 0.2683]) -Greedy action tensor([ 1.7428, -0.3936, 0.0388, 0.9174]) tensor([0.5753, 0.0679, 0.1047, 0.2520]) -Greedy action tensor([ 1.2999, -0.7598, 1.0795, 0.2559]) tensor([0.4383, 0.0559, 0.3516, 0.1543]) -Greedy action tensor([ 0.2084, 0.5764, -1.3326, 0.5430]) tensor([0.2465, 0.3562, 0.0528, 0.3445]) -Greedy action tensor([ 1.4204, -0.4293, 1.3809, 1.2843]) tensor([0.3343, 0.0526, 0.3214, 0.2917]) -Greedy action tensor([ 0.9190, -0.4619, 1.2921, 1.0648]) tensor([0.2590, 0.0651, 0.3762, 0.2997]) -Greedy action tensor([ 1.2972, 0.6193, -0.5895, 0.4909]) tensor([0.4749, 0.2411, 0.0720, 0.2120]) -Greedy action tensor([ 1.3044, -0.0320, 0.6709, 0.9175]) tensor([0.4044, 0.1063, 0.2146, 0.2747]) -Greedy action tensor([ 1.8946, -0.0085, 0.5287, 0.4847]) tensor([0.6066, 0.0905, 0.1548, 0.1481]) -Greedy action tensor([ 0.9240, -0.7237, -0.0322, 1.0959]) tensor([0.3618, 0.0696, 0.1390, 0.4296]) -Greedy action tensor([ 1.9091, -0.5471, 0.1036, 2.0145]) tensor([0.4235, 0.0363, 0.0696, 0.4706]) -Greedy action tensor([ 1.4811, -0.1535, 0.6315, 0.8737]) tensor([0.4614, 0.0900, 0.1973, 0.2514]) -Greedy action tensor([ 1.4431, -0.3573, 1.3648, 1.0600]) tensor([0.3608, 0.0596, 0.3336, 0.2460]) -Greedy action tensor([ 2.2630, -0.2577, 1.0155, 1.8742]) tensor([0.4889, 0.0393, 0.1404, 0.3314]) -Greedy action tensor([ 1.8028, -0.0604, 1.9989, 1.1771]) tensor([0.3440, 0.0534, 0.4186, 0.1840]) -Greedy action tensor([ 1.2531, -0.7311, -0.1431, 0.9960]) tensor([0.4633, 0.0637, 0.1147, 0.3583]) -Greedy action tensor([1.6135, 0.1723, 0.5952, 0.6527]) tensor([0.5049, 0.1195, 0.1824, 0.1932]) -Greedy action tensor([ 0.9136, -1.1515, 0.8407, 1.5123]) tensor([0.2580, 0.0327, 0.2398, 0.4695]) -Greedy action tensor([ 1.2761, -1.1312, 0.5560, 0.5997]) tensor([0.4796, 0.0432, 0.2334, 0.2438]) -Greedy action tensor([ 1.6789, -0.0333, 0.4054, 0.8878]) tensor([0.5226, 0.0943, 0.1462, 0.2369]) -Greedy action tensor([ 1.7360, -0.3219, 1.1914, 1.1536]) tensor([0.4412, 0.0564, 0.2559, 0.2464]) -Greedy action tensor([1.4597, 0.5347, 1.2513, 0.7148]) tensor([0.3727, 0.1478, 0.3026, 0.1769]) -Greedy action tensor([ 1.4464, -0.2994, 1.6038, 1.1945]) tensor([0.3203, 0.0559, 0.3749, 0.2490]) -Greedy action tensor([ 1.0199, -0.4630, 0.4397, 0.3882]) tensor([0.4313, 0.0979, 0.2414, 0.2293]) -Greedy action tensor([2.2184, 0.1383, 1.1617, 1.6298]) tensor([0.4932, 0.0616, 0.1714, 0.2738]) -Greedy action tensor([ 1.0499, -0.4554, 1.3938, 1.2595]) tensor([0.2587, 0.0574, 0.3649, 0.3190]) -Greedy action tensor([1.5135, 0.6368, 0.6971, 1.1553]) tensor([0.3911, 0.1627, 0.1729, 0.2733]) -Greedy action tensor([ 1.9101, -0.7552, 1.0522, 1.0013]) tensor([0.5272, 0.0367, 0.2236, 0.2125]) -Greedy action tensor([ 1.6329, -0.0689, 1.0976, 2.0227]) tensor([0.3082, 0.0562, 0.1805, 0.4551]) -Greedy action tensor([ 2.2916, -0.7543, 1.4350, 0.4537]) tensor([0.6130, 0.0292, 0.2603, 0.0976]) -Greedy action tensor([ 1.3930, -0.4868, 0.3547, 1.3999]) tensor([0.3978, 0.0607, 0.1409, 0.4006]) -Greedy action tensor([ 1.3866, -0.1449, -0.6608, 0.4993]) tensor([0.5691, 0.1231, 0.0735, 0.2343]) -Greedy action tensor([ 2.1755, -0.4779, 0.9561, 1.9043]) tensor([0.4699, 0.0331, 0.1388, 0.3582]) -Greedy action tensor([ 2.2590, -0.2888, 0.5272, 1.9744]) tensor([0.4981, 0.0390, 0.0881, 0.3747]) -Greedy action tensor([ 1.0975, -0.8172, 0.0188, 0.9487]) tensor([0.4257, 0.0627, 0.1447, 0.3668]) -Greedy action tensor([ 1.6402, 0.2844, -0.2681, 0.7875]) tensor([0.5458, 0.1407, 0.0810, 0.2326]) -Greedy action tensor([ 0.6516, -0.0607, -0.4742, 1.0456]) tensor([0.3032, 0.1487, 0.0984, 0.4497]) -Greedy action tensor([2.2616, 0.1000, 1.0300, 2.0525]) tensor([0.4508, 0.0519, 0.1316, 0.3658]) -Greedy action tensor([ 1.4698, -0.7709, 0.2797, 1.1899]) tensor([0.4616, 0.0491, 0.1404, 0.3489]) -Greedy action tensor([ 2.4011, -0.4559, 0.9441, 1.6265]) tensor([0.5710, 0.0328, 0.1330, 0.2632]) -Greedy action tensor([ 1.2565, -0.1124, 0.9845, 1.3702]) tensor([0.3188, 0.0811, 0.2429, 0.3572]) -Greedy action tensor([ 1.3744, -0.5945, 1.3323, 1.1494]) tensor([0.3452, 0.0482, 0.3310, 0.2756]) -Greedy action tensor([0.5053, 0.2129, 0.3434, 1.0921]) tensor([0.2275, 0.1698, 0.1935, 0.4091]) -Greedy action tensor([ 0.9930, -0.6518, 0.8124, 0.8283]) tensor([0.3477, 0.0671, 0.2903, 0.2949]) -Greedy action tensor([ 1.3152, -0.1484, 0.7785, 1.4338]) tensor([0.3399, 0.0787, 0.1987, 0.3827]) -Greedy action tensor([ 1.5750, -0.4148, 0.1475, 1.5424]) tensor([0.4265, 0.0583, 0.1023, 0.4128]) -Greedy action tensor([ 1.4950, -0.4007, 0.8094, 0.9180]) tensor([0.4513, 0.0678, 0.2274, 0.2535]) -Greedy action tensor([ 1.3336, -0.1294, 0.2215, 0.9587]) tensor([0.4449, 0.1030, 0.1463, 0.3058]) -Greedy action tensor([ 1.3064, -0.7092, 1.4742, 1.1930]) tensor([0.3117, 0.0415, 0.3686, 0.2782]) -Greedy action tensor([ 1.4258, -0.4957, 0.7825, 0.5624]) tensor([0.4776, 0.0699, 0.2510, 0.2014]) -Greedy action tensor([ 1.1812, -0.0046, -0.6758, 1.6358]) tensor([0.3292, 0.1006, 0.0514, 0.5188]) -Greedy action tensor([ 1.3861, -0.5378, 1.0130, 0.7107]) tensor([0.4267, 0.0623, 0.2938, 0.2172]) -Greedy action tensor([ 1.2344, -0.5415, -0.1398, 1.5913]) tensor([0.3507, 0.0594, 0.0887, 0.5011]) -Greedy action tensor([ 0.8827, -0.5310, 0.8663, 0.7541]) tensor([0.3219, 0.0783, 0.3167, 0.2831]) -Greedy action tensor([1.4016, 0.2750, 0.6666, 0.4582]) tensor([0.4560, 0.1478, 0.2186, 0.1775]) -Greedy action tensor([ 1.4526, -0.1411, 1.1076, 1.0265]) tensor([0.3900, 0.0792, 0.2762, 0.2547]) -Greedy action tensor([ 1.2767, -0.5454, 1.7869, 1.5195]) tensor([0.2438, 0.0394, 0.4060, 0.3108]) -Greedy action tensor([ 1.5590, -0.8272, 1.5182, 1.1533]) tensor([0.3678, 0.0338, 0.3531, 0.2452]) -Greedy action tensor([ 1.1458, -1.0714, 1.0440, 0.4593]) tensor([0.3975, 0.0433, 0.3591, 0.2001]) -Greedy action tensor([ 1.4341, -0.4533, 0.7856, 0.4822]) tensor([0.4854, 0.0735, 0.2538, 0.1874]) -Greedy action tensor([ 2.0499, -0.5611, -0.0419, 0.8915]) tensor([0.6619, 0.0486, 0.0817, 0.2078]) -Greedy action tensor([ 1.0384, -0.5089, -0.1985, -0.1716]) tensor([0.5552, 0.1182, 0.1611, 0.1655]) -Greedy action tensor([ 1.0212, -0.5785, -0.1897, 0.0982]) tensor([0.5271, 0.1064, 0.1570, 0.2094]) -Greedy action tensor([ 0.8103, -0.4652, -0.1310, -0.1449]) tensor([0.4868, 0.1360, 0.1899, 0.1873]) -Greedy action tensor([ 1.0334, -0.6456, -0.2721, 0.0777]) tensor([0.5428, 0.1013, 0.1471, 0.2087]) -Greedy action tensor([ 0.7737, -0.4357, -0.0464, 0.1057]) tensor([0.4442, 0.1325, 0.1956, 0.2277]) -Greedy action tensor([ 0.8188, -0.0275, -0.0678, 0.0732]) tensor([0.4319, 0.1853, 0.1780, 0.2049]) -Greedy action tensor([ 1.4749, -0.1989, -0.2091, 0.0306]) tensor([0.6215, 0.1165, 0.1154, 0.1466]) -Greedy action tensor([ 1.3433, -0.6267, -0.1290, 0.1376]) tensor([0.5994, 0.0836, 0.1375, 0.1795]) -Greedy action tensor([ 0.8474, -0.4732, -0.3279, 0.0929]) tensor([0.4888, 0.1305, 0.1509, 0.2298]) -Greedy action tensor([ 0.9099, -0.7516, -0.1973, 0.1601]) tensor([0.5018, 0.0953, 0.1658, 0.2371]) -Greedy action tensor([ 1.3642, -0.7724, -0.4615, -0.1064]) tensor([0.6627, 0.0782, 0.1068, 0.1523]) -Greedy action tensor([ 0.8988, -0.2354, 0.0617, -0.2128]) tensor([0.4799, 0.1544, 0.2078, 0.1579]) -Greedy action tensor([ 1.7497, -0.2835, -0.3517, -0.1753]) tensor([0.7148, 0.0936, 0.0874, 0.1043]) -Greedy action tensor([ 0.9986, -0.7205, -0.2631, 0.1878]) tensor([0.5244, 0.0940, 0.1485, 0.2331]) -Greedy action tensor([ 1.7327, -0.6404, -0.1511, 0.3040]) tensor([0.6735, 0.0628, 0.1024, 0.1614]) -Greedy action tensor([ 1.1674, -0.6601, -0.3588, 0.0415]) tensor([0.5874, 0.0945, 0.1277, 0.1905]) -Greedy action tensor([ 1.8873, -0.6665, -0.3579, 0.2886]) tensor([0.7216, 0.0561, 0.0764, 0.1459]) -Greedy action tensor([ 1.2017, -0.3646, -0.3399, -0.2209]) tensor([0.6010, 0.1255, 0.1286, 0.1449]) -Greedy action tensor([ 1.1248, -0.4894, -0.1825, -0.0401]) tensor([0.5613, 0.1117, 0.1519, 0.1751]) -Greedy action tensor([ 1.0221, 0.0797, 0.0288, -0.1296]) tensor([0.4817, 0.1877, 0.1784, 0.1523]) -Greedy action tensor([ 1.4766, -0.4670, -0.5285, -0.1466]) tensor([0.6779, 0.0971, 0.0913, 0.1337]) -Greedy action tensor([ 0.9336, -0.5336, -0.1546, -0.1258]) tensor([0.5225, 0.1205, 0.1760, 0.1811]) -Greedy action tensor([ 0.9990, -0.4885, 0.2493, 0.0635]) tensor([0.4783, 0.1081, 0.2260, 0.1877]) -Greedy action tensor([ 0.9984, -0.4136, -0.0764, 0.0794]) tensor([0.5041, 0.1228, 0.1721, 0.2011]) -Greedy action tensor([ 0.6027, -0.4528, -0.0754, 0.0409]) tensor([0.4122, 0.1435, 0.2092, 0.2350]) -Greedy action tensor([ 0.6482, -0.4091, 0.0461, -0.0363]) tensor([0.4168, 0.1448, 0.2283, 0.2102]) -Greedy action tensor([ 1.0284, -0.4058, -0.3773, 0.4453]) tensor([0.4898, 0.1167, 0.1201, 0.2734]) -Greedy action tensor([ 1.0531, -0.4148, -0.1594, -0.1569]) tensor([0.5476, 0.1262, 0.1629, 0.1633]) -Greedy action tensor([ 0.5143, -0.3999, -0.1952, -0.0678]) tensor([0.4079, 0.1635, 0.2007, 0.2279]) -Greedy action tensor([ 1.0187, -0.4826, -0.2848, 0.0176]) tensor([0.5371, 0.1197, 0.1459, 0.1974]) -Greedy action tensor([ 1.2486, -0.2416, -0.2198, -0.1742]) tensor([0.5894, 0.1328, 0.1357, 0.1421]) -Greedy action tensor([ 0.8858, -0.2662, -0.1336, -0.1453]) tensor([0.4918, 0.1554, 0.1774, 0.1754]) -Greedy action tensor([ 1.5509, -0.4527, -0.1835, 0.1322]) tensor([0.6437, 0.0868, 0.1136, 0.1558]) -Greedy action tensor([ 0.7988, -0.3546, 0.1097, -0.1745]) tensor([0.4555, 0.1437, 0.2287, 0.1721]) -Greedy action tensor([ 0.8235, -0.4213, -0.0482, -0.2129]) tensor([0.4852, 0.1397, 0.2029, 0.1721]) -Greedy action tensor([ 0.9450, -0.4752, 0.0689, -0.2287]) tensor([0.5083, 0.1228, 0.2117, 0.1572]) -Greedy action tensor([ 0.9921, -0.5257, -0.0811, 0.0257]) tensor([0.5150, 0.1129, 0.1761, 0.1960]) -Greedy action tensor([ 0.7255, -0.1658, -0.0394, -0.2173]) tensor([0.4415, 0.1811, 0.2055, 0.1720]) -Greedy action tensor([ 0.9578, -0.5394, -0.1485, 0.1057]) tensor([0.5048, 0.1129, 0.1670, 0.2153]) -Greedy action tensor([ 1.6821, -0.5187, -0.3899, 0.1017]) tensor([0.6932, 0.0768, 0.0873, 0.1427]) -Greedy action tensor([ 1.5801, -0.5027, -0.5851, -0.0815]) tensor([0.6997, 0.0872, 0.0803, 0.1328]) -Greedy action tensor([ 1.0916, -0.6460, -0.4063, 0.1901]) tensor([0.5539, 0.0975, 0.1238, 0.2248]) -Greedy action tensor([ 1.2228, -0.6836, -0.4055, -0.1731]) tensor([0.6280, 0.0933, 0.1232, 0.1555]) -Greedy action tensor([ 0.8815, -0.3662, -0.1063, -0.1469]) tensor([0.4957, 0.1424, 0.1846, 0.1773]) -Greedy action tensor([ 0.7670, -0.5213, 0.0391, -0.0122]) tensor([0.4510, 0.1244, 0.2178, 0.2069]) -Greedy action tensor([ 0.8478, -0.5876, -0.1028, 0.1726]) tensor([0.4687, 0.1116, 0.1811, 0.2386]) -Greedy action tensor([ 0.7412, -0.7820, -0.5303, 0.1316]) tensor([0.4897, 0.1068, 0.1373, 0.2662]) -Greedy action tensor([ 1.3715, -0.2874, -0.3047, -0.1496]) tensor([0.6266, 0.1193, 0.1172, 0.1369]) -Greedy action tensor([ 0.6095, -0.3468, 0.1070, -0.2235]) tensor([0.4125, 0.1585, 0.2496, 0.1794]) -Greedy action tensor([ 0.9999, -0.0739, 0.0726, -0.2152]) tensor([0.4916, 0.1680, 0.1945, 0.1459]) -Greedy action tensor([ 0.9231, -0.5024, -0.0898, 0.0898]) tensor([0.4906, 0.1179, 0.1782, 0.2132]) -Greedy action tensor([ 0.5281, -0.5018, 0.1098, -0.2032]) tensor([0.4006, 0.1430, 0.2636, 0.1928]) -Greedy action tensor([ 0.6461, -0.4474, -0.0280, -0.1023]) tensor([0.4314, 0.1446, 0.2199, 0.2041]) -Greedy action tensor([ 1.4699, -0.6250, -0.1170, 0.2716]) tensor([0.6137, 0.0755, 0.1255, 0.1852]) -Greedy action tensor([ 1.5195, -0.2918, -0.2047, -0.1337]) tensor([0.6522, 0.1066, 0.1163, 0.1249]) -Greedy action tensor([ 0.8298, -0.3345, -0.1443, 0.1573]) tensor([0.4545, 0.1419, 0.1716, 0.2320]) -Greedy action tensor([ 0.8626, -0.5830, 0.0303, 0.0022]) tensor([0.4776, 0.1125, 0.2078, 0.2020]) -Greedy action tensor([ 0.9785, -0.6037, -0.1462, 0.2061]) tensor([0.5020, 0.1032, 0.1630, 0.2319]) -Greedy action tensor([ 1.1751, -0.3380, -0.1618, 0.1777]) tensor([0.5400, 0.1189, 0.1419, 0.1992]) -Greedy action tensor([ 0.9897, -0.4323, 0.0328, -0.1349]) tensor([0.5128, 0.1237, 0.1970, 0.1665]) -Greedy action tensor([ 1.3662, -0.5474, -0.3376, 0.1493]) tensor([0.6151, 0.0908, 0.1119, 0.1822]) -Greedy action tensor([ 0.8790, -0.3347, -0.2430, -0.1162]) tensor([0.5019, 0.1491, 0.1634, 0.1855]) -Greedy action tensor([ 0.9507, -0.4830, 0.0199, -0.2567]) tensor([0.5177, 0.1234, 0.2041, 0.1548]) -Greedy action tensor([ 1.4003, -0.4189, -0.2950, -0.0512]) tensor([0.6329, 0.1026, 0.1162, 0.1482]) -Greedy action tensor([ 0.9249, -0.3422, -0.0604, -0.1749]) tensor([0.5030, 0.1417, 0.1878, 0.1675]) -Greedy action tensor([ 0.7280, -0.5077, -0.1293, 0.1575]) tensor([0.4386, 0.1275, 0.1861, 0.2479]) -Greedy action tensor([ 1.5137, -0.5345, -0.4616, 0.2533]) tensor([0.6447, 0.0831, 0.0894, 0.1828]) -Greedy action tensor([ 1.0082, -0.4772, -0.0501, -0.0820]) tensor([0.5237, 0.1186, 0.1817, 0.1760]) -Greedy action tensor([ 0.6804, -0.3583, -0.1394, 0.0712]) tensor([0.4277, 0.1514, 0.1884, 0.2326]) -Greedy action tensor([ 1.5216, -0.4980, -0.4056, -0.1755]) tensor([0.6842, 0.0908, 0.0996, 0.1254]) -Greedy action tensor([ 0.8181, -0.4572, -0.3848, 0.0044]) tensor([0.4943, 0.1381, 0.1485, 0.2191]) -Greedy action tensor([ 1.6743, -0.5619, -0.3135, 0.1330]) tensor([0.6859, 0.0733, 0.0940, 0.1468]) -Greedy action tensor([ 1.4360, -0.4901, -0.1001, 0.0221]) tensor([0.6234, 0.0908, 0.1342, 0.1516]) -Greedy action tensor([ 1.0670, -0.6712, -0.4153, 0.3674]) tensor([0.5264, 0.0926, 0.1196, 0.2615]) -Greedy action tensor([ 1.1435, -0.0482, 0.0165, -0.0895]) tensor([0.5211, 0.1582, 0.1688, 0.1519]) -Greedy action tensor([ 0.9939, -0.0724, 0.0137, -0.2381]) tensor([0.4972, 0.1712, 0.1866, 0.1450]) -Greedy action tensor([ 1.7334, -0.5677, -0.3725, 0.0431]) tensor([0.7111, 0.0712, 0.0866, 0.1312]) -Greedy action tensor([ 1.1674, -0.3995, -0.2508, -0.1321]) tensor([0.5802, 0.1211, 0.1405, 0.1582]) -Greedy action tensor([ 1.7628, -0.4531, -0.5419, -0.0877]) tensor([0.7321, 0.0798, 0.0731, 0.1150]) -Greedy action tensor([ 0.9390, 0.0855, -0.1188, -0.0846]) tensor([0.4690, 0.1997, 0.1628, 0.1685]) -Greedy action tensor([ 0.5703, -0.3864, -0.1255, -0.1132]) tensor([0.4188, 0.1609, 0.2089, 0.2114]) -Greedy action tensor([ 0.2449, 0.0428, 0.0654, -0.2889]) tensor([0.3087, 0.2522, 0.2580, 0.1810]) -Greedy action tensor([ 0.7291, -0.1997, 0.0395, -0.6996]) tensor([0.4681, 0.1849, 0.2349, 0.1122]) -Greedy action tensor([ 0.2647, -0.1264, 0.1860, -0.2516]) tensor([0.3127, 0.2115, 0.2891, 0.1866]) -Greedy action tensor([ 0.4346, -0.1259, 0.2119, -0.4112]) tensor([0.3571, 0.2039, 0.2858, 0.1533]) -Greedy action tensor([ 0.6299, -0.2813, 0.0367, -0.3890]) tensor([0.4318, 0.1736, 0.2386, 0.1559]) -Greedy action tensor([ 0.4827, -0.0700, 0.1250, -0.4290]) tensor([0.3736, 0.2150, 0.2613, 0.1501]) -Greedy action tensor([ 0.3228, -0.1546, -0.0277, -0.4781]) tensor([0.3605, 0.2237, 0.2539, 0.1618]) -Greedy action tensor([ 0.4375, -0.0635, 0.0094, -0.3393]) tensor([0.3680, 0.2230, 0.2398, 0.1692]) -Greedy action tensor([ 0.6518, -0.1523, -0.0766, -0.3900]) tensor([0.4380, 0.1960, 0.2114, 0.1545]) -Greedy action tensor([ 0.3387, 0.1210, 0.0615, -0.2639]) tensor([0.3216, 0.2587, 0.2437, 0.1760]) -Greedy action tensor([ 0.7072, -0.2039, 0.0939, -0.4904]) tensor([0.4453, 0.1791, 0.2412, 0.1345]) -Greedy action tensor([ 0.3898, -0.0350, 0.0449, -0.2869]) tensor([0.3484, 0.2278, 0.2467, 0.1771]) -Greedy action tensor([ 0.8753, -0.5972, 0.0523, -0.5971]) tensor([0.5269, 0.1208, 0.2314, 0.1209]) -Greedy action tensor([ 0.7887, -0.3730, -0.1687, -0.5437]) tensor([0.5100, 0.1596, 0.1958, 0.1346]) -Greedy action tensor([ 1.0387, -0.5948, 0.0555, -0.6624]) tensor([0.5708, 0.1115, 0.2136, 0.1042]) -Greedy action tensor([ 0.2967, 0.0373, 0.1035, -0.1042]) tensor([0.3062, 0.2363, 0.2524, 0.2051]) -Greedy action tensor([ 0.3955, 0.2476, -0.0264, 0.0213]) tensor([0.3119, 0.2690, 0.2045, 0.2145]) -Greedy action tensor([ 0.3567, 0.1250, 0.0431, -0.2008]) tensor([0.3229, 0.2562, 0.2360, 0.1849]) -Greedy action tensor([ 0.4210, -0.0831, 0.0648, -0.1052]) tensor([0.3454, 0.2086, 0.2419, 0.2041]) -Greedy action tensor([ 0.3211, 0.0477, 0.2459, -0.0613]) tensor([0.2967, 0.2257, 0.2752, 0.2024]) -Greedy action tensor([ 0.3803, 0.1951, 0.1781, -0.2268]) tensor([0.3132, 0.2602, 0.2559, 0.1707]) -Greedy action tensor([ 0.1717, 0.1236, 0.0972, -0.0775]) tensor([0.2732, 0.2603, 0.2536, 0.2129]) -Greedy action tensor([ 0.3021, 0.0165, 0.1838, -0.1918]) tensor([0.3077, 0.2312, 0.2733, 0.1878]) -Greedy action tensor([ 0.2299, 0.1183, 0.1554, -0.3145]) tensor([0.2939, 0.2628, 0.2728, 0.1705]) -Greedy action tensor([ 0.2682, 0.0999, -0.0102, 0.0491]) tensor([0.2937, 0.2482, 0.2223, 0.2359]) -Greedy action tensor([ 0.3294, 0.1203, 0.1247, -0.1876]) tensor([0.3103, 0.2518, 0.2529, 0.1850]) -Greedy action tensor([ 0.1252, 0.0642, 0.1651, -0.1423]) tensor([0.2669, 0.2511, 0.2778, 0.2043]) -Greedy action tensor([ 0.5364, -0.0361, -0.0455, -0.2478]) tensor([0.3877, 0.2187, 0.2166, 0.1770]) -Greedy action tensor([ 0.4232, -0.0359, 0.1561, -0.1608]) tensor([0.3384, 0.2138, 0.2591, 0.1887]) -Greedy action tensor([ 0.4164, 0.1408, 0.1324, -0.1606]) tensor([0.3254, 0.2470, 0.2449, 0.1827]) -Greedy action tensor([ 0.6547, -0.1364, 0.0359, -0.5258]) tensor([0.4350, 0.1972, 0.2343, 0.1336]) -Greedy action tensor([ 0.3824, 0.1887, 0.1821, -0.1966]) tensor([0.3122, 0.2572, 0.2555, 0.1750]) -Greedy action tensor([ 0.2617, 0.1555, 0.0464, -0.2766]) tensor([0.3040, 0.2734, 0.2451, 0.1775]) -Greedy action tensor([ 0.5146, 0.2411, -0.0635, -0.0028]) tensor([0.3427, 0.2607, 0.1923, 0.2043]) -Greedy action tensor([ 0.3082, 0.0545, 0.1688, -0.1288]) tensor([0.3038, 0.2357, 0.2642, 0.1962]) -Greedy action tensor([ 0.4027, 0.0258, 0.1802, -0.1372]) tensor([0.3258, 0.2235, 0.2608, 0.1899]) -Greedy action tensor([ 0.4821, -0.0389, 0.1877, -0.4383]) tensor([0.3653, 0.2170, 0.2722, 0.1455]) -Greedy action tensor([ 1.1045, -0.7530, 0.1090, -0.7665]) tensor([0.5954, 0.0929, 0.2200, 0.0917]) -Greedy action tensor([ 0.5541, -0.0208, 0.0655, -0.1701]) tensor([0.3758, 0.2115, 0.2305, 0.1822]) -Greedy action tensor([ 0.3968, -0.1971, 0.0224, -0.4066]) tensor([0.3721, 0.2054, 0.2559, 0.1666]) -Greedy action tensor([ 0.5310, -0.0291, 0.0219, -0.3296]) tensor([0.3854, 0.2201, 0.2316, 0.1630]) -Greedy action tensor([ 0.3517, 0.1003, 0.1605, -0.1616]) tensor([0.3123, 0.2429, 0.2579, 0.1869]) -Greedy action tensor([ 0.2749, 0.1138, 0.1769, -0.1707]) tensor([0.2943, 0.2505, 0.2668, 0.1885]) -Greedy action tensor([ 0.2634, 0.1711, 0.1060, -0.1968]) tensor([0.2944, 0.2684, 0.2515, 0.1858]) -Greedy action tensor([ 0.3797, 0.0889, 0.0357, -0.2358]) tensor([0.3337, 0.2495, 0.2365, 0.1803]) -Greedy action tensor([ 0.5286, 0.0083, 0.0578, -0.2966]) tensor([0.3764, 0.2237, 0.2350, 0.1649]) -Greedy action tensor([ 0.2822, 0.1050, 0.2046, -0.1328]) tensor([0.2921, 0.2447, 0.2703, 0.1929]) -Greedy action tensor([ 0.3393, 0.1207, 0.1173, -0.2185]) tensor([0.3148, 0.2530, 0.2521, 0.1802]) -Greedy action tensor([ 0.2910, 0.0808, 0.1584, -0.1451]) tensor([0.3000, 0.2432, 0.2628, 0.1940]) -Greedy action tensor([ 0.2299, 0.1806, 0.1674, -0.2135]) tensor([0.2830, 0.2694, 0.2659, 0.1817]) -Greedy action tensor([ 0.2445, 0.0367, 0.2757, -0.0940]) tensor([0.2811, 0.2284, 0.2901, 0.2004]) -Greedy action tensor([ 0.7089, -0.2987, 0.0015, -0.5145]) tensor([0.4646, 0.1696, 0.2290, 0.1367]) -Greedy action tensor([ 0.4676, 0.1531, 0.0801, -0.3042]) tensor([0.3483, 0.2543, 0.2364, 0.1610]) -Greedy action tensor([ 0.3673, -0.2341, 0.1214, -0.4021]) tensor([0.3580, 0.1962, 0.2800, 0.1658]) -Greedy action tensor([ 0.1450, 0.1844, 0.1104, -0.1692]) tensor([0.2676, 0.2784, 0.2585, 0.1955]) -Greedy action tensor([ 0.2544, 0.0608, 0.0933, -0.2904]) tensor([0.3072, 0.2531, 0.2615, 0.1782]) -Greedy action tensor([ 0.7989, -0.5962, 0.0820, -0.5314]) tensor([0.4999, 0.1239, 0.2441, 0.1322]) -Greedy action tensor([ 0.4942, 0.1583, 0.0743, -0.1383]) tensor([0.3445, 0.2462, 0.2263, 0.1830]) -Greedy action tensor([ 1.0712, -0.6290, -0.1498, -0.7466]) tensor([0.6098, 0.1114, 0.1798, 0.0990]) -Greedy action tensor([0.3529, 0.0731, 0.0764, 0.0473]) tensor([0.3076, 0.2325, 0.2333, 0.2266]) -Greedy action tensor([ 0.3537, 0.1017, 0.1585, -0.0372]) tensor([0.3052, 0.2372, 0.2511, 0.2065]) -Greedy action tensor([ 0.4126, 0.1722, 0.1043, -0.1660]) tensor([0.3245, 0.2552, 0.2384, 0.1819]) -Greedy action tensor([ 0.4436, 0.0941, 0.0600, -0.1864]) tensor([0.3426, 0.2415, 0.2334, 0.1825]) -Greedy action tensor([ 0.2070, -0.0322, 0.1316, -0.1341]) tensor([0.2919, 0.2298, 0.2707, 0.2076]) -Greedy action tensor([ 1.3508, -0.8181, 0.0427, -0.8103]) tensor([0.6668, 0.0762, 0.1802, 0.0768]) -Greedy action tensor([ 0.5865, 0.1634, -0.1463, -0.3070]) tensor([0.3930, 0.2574, 0.1888, 0.1608]) -Greedy action tensor([ 0.3454, -0.0130, 0.0866, -0.4051]) tensor([0.3398, 0.2375, 0.2623, 0.1604]) -Greedy action tensor([ 0.8758, -0.4887, -0.0388, -0.5495]) tensor([0.5272, 0.1347, 0.2113, 0.1268]) -Greedy action tensor([ 0.2628, 0.0556, 0.1888, -0.1746]) tensor([0.2952, 0.2400, 0.2742, 0.1906]) -Greedy action tensor([ 0.7301, -0.1892, -0.0117, -0.2933]) tensor([0.4475, 0.1785, 0.2132, 0.1608]) -Greedy action tensor([ 0.7166, -0.1121, -0.0127, -0.4311]) tensor([0.4472, 0.1952, 0.2157, 0.1419]) -Greedy action tensor([ 0.8569, -0.3733, -0.0579, -0.6611]) tensor([0.5230, 0.1528, 0.2095, 0.1146]) -Greedy action tensor([ 0.4055, 0.0058, 0.1034, -0.2096]) tensor([0.3389, 0.2273, 0.2506, 0.1832]) -Greedy action tensor([ 0.3759, 0.0971, 0.1007, -0.2700]) tensor([0.3289, 0.2489, 0.2498, 0.1724]) -Greedy action tensor([ 1.0789, -0.4874, -0.0932, -0.7073]) tensor([0.5931, 0.1238, 0.1837, 0.0994]) -Greedy action tensor([ 0.7241, -0.1333, -0.1278, -0.2859]) tensor([0.4514, 0.1915, 0.1926, 0.1644]) -Greedy action tensor([ 0.8520, -0.4077, -0.0346, -0.4710]) tensor([0.5097, 0.1446, 0.2100, 0.1357]) -Greedy action tensor([ 0.5330, -0.1380, 0.0754, -0.2690]) tensor([0.3857, 0.1972, 0.2441, 0.1730]) -Greedy action tensor([0.2441, 0.0536, 0.2484, 0.0089]) tensor([0.2761, 0.2283, 0.2773, 0.2183]) -Greedy action tensor([ 0.3660, -0.0252, 0.0215, -0.1207]) tensor([0.3334, 0.2255, 0.2362, 0.2049]) -Greedy action tensor([ 0.2270, -0.0643, 0.1481, -0.1947]) tensor([0.3005, 0.2246, 0.2777, 0.1971]) -Greedy action tensor([ 0.4982, 0.0368, 0.1089, -0.3556]) tensor([0.3658, 0.2306, 0.2478, 0.1558]) -Greedy action tensor([-0.9572, -0.5583, 0.3993, 0.0171]) tensor([0.1108, 0.1652, 0.4303, 0.2937]) -Greedy action tensor([-1.9110, -0.8850, 0.1702, -0.3034]) tensor([0.0595, 0.1661, 0.4772, 0.2972]) -Greedy action tensor([-1.7664, -0.6261, 0.6875, 0.0989]) tensor([0.0450, 0.1408, 0.5236, 0.2906]) -Greedy action tensor([-1.9019, -0.8498, 0.1841, -0.4825]) tensor([0.0623, 0.1784, 0.5017, 0.2576]) -Greedy action tensor([-0.8156, -0.0218, -0.4871, -0.0135]) tensor([0.1464, 0.3238, 0.2033, 0.3265]) -Greedy action tensor([-0.8633, 0.3141, -0.1400, -0.0050]) tensor([0.1154, 0.3745, 0.2378, 0.2722]) -Greedy action tensor([-1.2493, 0.0041, 0.5364, -0.4674]) tensor([0.0790, 0.2768, 0.4714, 0.1728]) -Greedy action tensor([-1.8941, -0.7665, 0.2139, -0.1393]) tensor([0.0552, 0.1706, 0.4547, 0.3194]) -Greedy action tensor([-1.9710, -0.8165, 0.5036, -0.1387]) tensor([0.0448, 0.1423, 0.5327, 0.2802]) -Greedy action tensor([-1.9179, -0.9618, 0.2222, -0.3604]) tensor([0.0594, 0.1544, 0.5045, 0.2817]) -Greedy action tensor([-0.5743, -0.5535, 0.2936, -0.0480]) tensor([0.1641, 0.1675, 0.3908, 0.2777]) -Greedy action tensor([-2.0138, -0.6834, 0.8428, 0.0997]) tensor([0.0328, 0.1242, 0.5713, 0.2717]) -Greedy action tensor([-1.1936, -0.5790, 0.3554, 0.0329]) tensor([0.0912, 0.1686, 0.4293, 0.3109]) -Greedy action tensor([-1.6487, -0.5870, 0.6614, 0.0205]) tensor([0.0519, 0.1500, 0.5227, 0.2754]) -Greedy action tensor([-0.6156, -0.5393, 0.2084, 0.1274]) tensor([0.1548, 0.1670, 0.3528, 0.3254]) -Greedy action tensor([-1.3823, -0.5447, 0.3469, 0.1873]) tensor([0.0727, 0.1680, 0.4099, 0.3494]) -Greedy action tensor([-1.1297, 0.2933, 0.3469, -0.2131]) tensor([0.0831, 0.3450, 0.3640, 0.2079]) -Greedy action tensor([-1.9202, -0.5981, 0.8423, 0.1123]) tensor([0.0354, 0.1329, 0.5612, 0.2704]) -Greedy action tensor([-1.6589, -0.4998, 0.7492, -0.3114]) tensor([0.0522, 0.1664, 0.5804, 0.2009]) -Greedy action tensor([-1.4751, 0.0017, 0.6103, -0.4892]) tensor([0.0621, 0.2719, 0.4996, 0.1664]) -Greedy action tensor([-1.8613, -0.6055, 0.8132, 0.1505]) tensor([0.0377, 0.1325, 0.5475, 0.2822]) -Greedy action tensor([-2.0397, -0.7180, 0.9091, 0.1395]) tensor([0.0306, 0.1148, 0.5841, 0.2705]) -Greedy action tensor([-1.8590, -0.5336, 0.5805, -0.1151]) tensor([0.0456, 0.1715, 0.5224, 0.2606]) -Greedy action tensor([-2.0317, -0.7286, 1.1909, 0.4044]) tensor([0.0243, 0.0893, 0.6090, 0.2774]) -Greedy action tensor([-1.9724, -0.9782, 0.3491, -0.3255]) tensor([0.0524, 0.1416, 0.5340, 0.2720]) -Greedy action tensor([-1.9848, -0.9478, 0.4505, -0.2926]) tensor([0.0484, 0.1365, 0.5524, 0.2628]) -Greedy action tensor([-0.9871, -0.6512, 0.4792, 0.5234]) tensor([0.0888, 0.1242, 0.3848, 0.4022]) -Greedy action tensor([-1.8163, -0.5716, 0.8794, 0.2931]) tensor([0.0363, 0.1261, 0.5382, 0.2994]) -Greedy action tensor([-2.0335, -0.8482, 0.5104, -0.0827]) tensor([0.0416, 0.1361, 0.5296, 0.2927]) -Greedy action tensor([-1.3026, -0.5668, 0.3917, 0.3348]) tensor([0.0731, 0.1527, 0.3981, 0.3761]) -Greedy action tensor([-1.9757, -0.7375, 0.9023, 0.1327]) tensor([0.0328, 0.1132, 0.5836, 0.2703]) -Greedy action tensor([-1.9706, -0.6943, 0.6319, -0.0439]) tensor([0.0401, 0.1436, 0.5410, 0.2752]) -Greedy action tensor([-1.5628, -0.4361, 0.4643, 0.0027]) tensor([0.0607, 0.1874, 0.4612, 0.2907]) -Greedy action tensor([-1.9552, -0.6118, 0.5791, -0.1437]) tensor([0.0424, 0.1627, 0.5351, 0.2598]) -Greedy action tensor([-1.3862, -0.5660, 0.6704, 0.1483]) tensor([0.0636, 0.1444, 0.4971, 0.2949]) -Greedy action tensor([-0.9438, -0.5783, 0.5266, 0.0782]) tensor([0.1045, 0.1506, 0.4546, 0.2903]) -Greedy action tensor([-1.9108, -0.6677, 0.3665, -0.0945]) tensor([0.0491, 0.1702, 0.4788, 0.3019]) -Greedy action tensor([-1.9905, -0.6684, 0.5536, -0.0592]) tensor([0.0410, 0.1539, 0.5222, 0.2829]) -Greedy action tensor([-1.5258, -0.6574, 0.0561, -0.3104]) tensor([0.0861, 0.2051, 0.4186, 0.2902]) -Greedy action tensor([-1.4831, -0.5750, 0.4244, 0.0832]) tensor([0.0666, 0.1653, 0.4489, 0.3192]) -Greedy action tensor([-1.7823, -0.4919, 0.7193, 0.1082]) tensor([0.0426, 0.1549, 0.5201, 0.2823]) -Greedy action tensor([-1.8841, -0.9581, 0.2930, -0.3332]) tensor([0.0586, 0.1480, 0.5170, 0.2764]) -Greedy action tensor([-1.5634, -0.5670, -0.0071, -0.5028]) tensor([0.0882, 0.2389, 0.4182, 0.2547]) -Greedy action tensor([-2.0261, -0.8800, 0.6243, 0.0619]) tensor([0.0379, 0.1193, 0.5369, 0.3059]) -Greedy action tensor([-1.9378, -0.4560, 0.5082, -0.1752]) tensor([0.0439, 0.1933, 0.5069, 0.2559]) -Greedy action tensor([-1.9736, -0.9822, 0.4155, -0.3911]) tensor([0.0514, 0.1385, 0.5601, 0.2500]) -Greedy action tensor([-1.5670, -0.8524, 0.1809, -0.5959]) tensor([0.0875, 0.1788, 0.5026, 0.2311]) -Greedy action tensor([-1.3811, -0.4727, 0.7374, -0.5099]) tensor([0.0705, 0.1748, 0.5863, 0.1684]) -Greedy action tensor([-1.9831, -0.6453, 0.4091, -0.0985]) tensor([0.0448, 0.1706, 0.4898, 0.2948]) -Greedy action tensor([-1.0162, -0.0295, -0.0210, -0.0500]) tensor([0.1109, 0.2975, 0.3001, 0.2915]) -Greedy action tensor([-1.9571, -0.7913, 0.4525, 0.0037]) tensor([0.0446, 0.1430, 0.4959, 0.3166]) -Greedy action tensor([-2.0262, -0.8954, 0.7047, -0.0890]) tensor([0.0379, 0.1174, 0.5817, 0.2630]) -Greedy action tensor([-2.0538, -0.8453, 0.8027, 0.0688]) tensor([0.0332, 0.1112, 0.5780, 0.2775]) -Greedy action tensor([-1.8573, -0.6072, 0.4285, -0.1938]) tensor([0.0510, 0.1781, 0.5017, 0.2692]) -Greedy action tensor([-1.1598, -0.5684, 0.3325, 0.1052]) tensor([0.0926, 0.1673, 0.4119, 0.3282]) -Greedy action tensor([-0.8858, -0.2937, -0.4938, 0.1673]) tensor([0.1398, 0.2527, 0.2069, 0.4007]) -Greedy action tensor([-1.8674, -0.9724, 0.1451, -0.5306]) tensor([0.0679, 0.1661, 0.5077, 0.2583]) -Greedy action tensor([-1.9466, -0.9838, 0.3096, -0.3940]) tensor([0.0559, 0.1464, 0.5337, 0.2641]) -Greedy action tensor([-1.5358, -0.5424, 0.4464, 0.0763]) tensor([0.0626, 0.1691, 0.4544, 0.3139]) -Greedy action tensor([-1.2158, -0.5602, 0.6612, -0.0428]) tensor([0.0788, 0.1518, 0.5148, 0.2546]) -Greedy action tensor([-1.8262, -0.4264, 0.5953, -0.1096]) tensor([0.0457, 0.1853, 0.5147, 0.2543]) -Greedy action tensor([-1.6784, -0.6229, 0.7767, 0.3659]) tensor([0.0430, 0.1236, 0.5011, 0.3323]) -Greedy action tensor([-1.5872, -0.7741, 0.7367, 0.5295]) tensor([0.0459, 0.1036, 0.4692, 0.3814]) -Greedy action tensor([-1.1615, -0.6545, 0.3089, 0.3632]) tensor([0.0862, 0.1431, 0.3749, 0.3958]) -Greedy action tensor([-0.7340, -0.0620, 0.2226, 0.0075]) tensor([0.1305, 0.2556, 0.3398, 0.2740]) -Greedy action tensor([-1.9598, -0.9307, 0.2790, -0.3931]) tensor([0.0556, 0.1557, 0.5221, 0.2666]) -Greedy action tensor([-1.6575, -0.5115, 0.5079, 0.0367]) tensor([0.0546, 0.1718, 0.4762, 0.2973]) -Greedy action tensor([-1.5578, -0.5702, 0.7078, 0.3837]) tensor([0.0493, 0.1323, 0.4749, 0.3435]) -Greedy action tensor([-1.9631, -0.8170, 0.3593, -0.2255]) tensor([0.0499, 0.1571, 0.5092, 0.2838]) -Greedy action tensor([-1.4592, -0.5276, 0.4085, 0.2335]) tensor([0.0647, 0.1644, 0.4191, 0.3518]) -Greedy action tensor([-1.8796, -0.9020, 0.1989, -0.3218]) tensor([0.0610, 0.1621, 0.4874, 0.2896]) -Greedy action tensor([-1.6547, -0.5420, 0.5430, 0.0142]) tensor([0.0545, 0.1658, 0.4906, 0.2891]) -Greedy action tensor([-1.5051, -0.2329, 0.4768, -0.1398]) tensor([0.0635, 0.2267, 0.4610, 0.2488]) -Greedy action tensor([-0.8814, -0.6863, 0.5346, -0.0662]) tensor([0.1163, 0.1414, 0.4794, 0.2629]) -Greedy action tensor([-1.3300, -0.5494, 0.2627, 0.0098]) tensor([0.0839, 0.1832, 0.4126, 0.3204]) -Greedy action tensor([-1.1050, 0.3661, -0.1112, -0.4816]) tensor([0.1008, 0.4389, 0.2723, 0.1880]) -Greedy action tensor([-1.5073, -0.0259, 0.4994, -0.2318]) tensor([0.0609, 0.2679, 0.4531, 0.2181]) -Greedy action tensor([-1.5472, -0.4904, 0.6483, 0.3710]) tensor([0.0508, 0.1463, 0.4568, 0.3461]) -Greedy action tensor([-1.6687, -0.5135, 0.5448, 0.0333]) tensor([0.0532, 0.1688, 0.4864, 0.2916]) -Greedy action tensor([-0.9681, 0.3119, -0.0840, -0.1143]) tensor([0.1068, 0.3840, 0.2585, 0.2508]) -Greedy action tensor([-2.0455, -0.7505, 0.8790, 0.1269]) tensor([0.0312, 0.1139, 0.5810, 0.2739]) -Greedy action tensor([1.8888, 0.3341, 0.3483, 0.8234]) tensor([0.5649, 0.1193, 0.1211, 0.1947]) -Greedy action tensor([ 1.2515, -0.4953, 0.5707, 0.9596]) tensor([0.4120, 0.0718, 0.2085, 0.3077]) -Greedy action tensor([ 2.0928, -0.9256, -0.1131, 0.9149]) tensor([0.6817, 0.0333, 0.0751, 0.2099]) -Greedy action tensor([ 0.7814, -0.8989, 0.6686, 1.2696]) tensor([0.2696, 0.0502, 0.2408, 0.4393]) -Greedy action tensor([ 1.8632, 0.0961, -0.1090, 1.5111]) tensor([0.4967, 0.0849, 0.0691, 0.3493]) -Greedy action tensor([ 1.4648, -0.7041, 0.1986, 1.4079]) tensor([0.4272, 0.0488, 0.1204, 0.4036]) -Greedy action tensor([ 0.9177, -0.2778, 0.1689, 0.8374]) tensor([0.3706, 0.1121, 0.1753, 0.3420]) -Greedy action tensor([1.7657, 0.0092, 0.1296, 1.6757]) tensor([0.4384, 0.0757, 0.0854, 0.4006]) -Greedy action tensor([ 1.8130, -0.5423, 0.4323, 1.9891]) tensor([0.3939, 0.0374, 0.0990, 0.4697]) -Greedy action tensor([ 1.3613, -0.0516, -0.5639, 0.6306]) tensor([0.5345, 0.1301, 0.0780, 0.2574]) -Greedy action tensor([2.1148, 0.0301, 0.4182, 1.1951]) tensor([0.5861, 0.0729, 0.1074, 0.2336]) -Greedy action tensor([ 1.4223, -1.4157, 0.9133, 1.4284]) tensor([0.3751, 0.0220, 0.2255, 0.3774]) -Greedy action tensor([ 0.9859, -0.1601, 1.2184, 1.2575]) tensor([0.2570, 0.0817, 0.3242, 0.3371]) -Greedy action tensor([ 1.2914, -0.5063, 0.8865, 0.8821]) tensor([0.4005, 0.0664, 0.2672, 0.2660]) -Greedy action tensor([ 1.2856, -0.0668, 1.0191, 0.9283]) tensor([0.3671, 0.0949, 0.2812, 0.2568]) -Greedy action tensor([1.1437, 0.2061, 0.1325, 0.0732]) tensor([0.4766, 0.1866, 0.1734, 0.1634]) -Greedy action tensor([ 1.5797, 0.3722, -0.0438, 0.6161]) tensor([0.5326, 0.1592, 0.1050, 0.2032]) -Greedy action tensor([ 0.9760, -0.2584, 0.0166, 1.0442]) tensor([0.3643, 0.1060, 0.1396, 0.3901]) -Greedy action tensor([1.8646, 0.0702, 0.0932, 0.6755]) tensor([0.6095, 0.1013, 0.1037, 0.1856]) -Greedy action tensor([ 1.6729, -0.6624, 1.4772, 1.6206]) tensor([0.3487, 0.0337, 0.2867, 0.3309]) -Greedy action tensor([ 2.1657, -0.9286, 0.0814, 1.3048]) tensor([0.6280, 0.0284, 0.0781, 0.2655]) -Greedy action tensor([ 2.2717, -0.1295, 1.1244, 1.1340]) tensor([0.5785, 0.0524, 0.1837, 0.1854]) -Greedy action tensor([ 2.0153, -0.5626, 1.1022, 1.9788]) tensor([0.4096, 0.0311, 0.1644, 0.3949]) -Greedy action tensor([ 1.6732, -0.3684, -0.3268, 1.7284]) tensor([0.4307, 0.0559, 0.0583, 0.4551]) -Greedy action tensor([ 1.4517, -0.5170, 0.0138, 0.4215]) tensor([0.5767, 0.0805, 0.1369, 0.2058]) -Greedy action tensor([ 1.3545, -0.5229, 1.1314, 0.9870]) tensor([0.3780, 0.0578, 0.3024, 0.2617]) -Greedy action tensor([ 1.8041, -0.2255, 0.7651, 1.8048]) tensor([0.4023, 0.0529, 0.1423, 0.4026]) -Greedy action tensor([ 2.1893, -0.2598, 1.2703, 0.9330]) tensor([0.5650, 0.0488, 0.2254, 0.1608]) -Greedy action tensor([ 0.9857, -0.0812, 0.1909, 0.9732]) tensor([0.3593, 0.1236, 0.1623, 0.3548]) -Greedy action tensor([ 1.0488, 0.5753, -1.4464, 0.6890]) tensor([0.4161, 0.2592, 0.0343, 0.2904]) -Greedy action tensor([ 1.1605, -0.8223, 0.5506, 0.6337]) tensor([0.4402, 0.0606, 0.2392, 0.2599]) -Greedy action tensor([ 2.3514, -0.6838, -0.0849, 0.6375]) tensor([0.7600, 0.0365, 0.0665, 0.1369]) -Greedy action tensor([ 2.0525, -0.1305, 1.0160, 1.7256]) tensor([0.4569, 0.0515, 0.1621, 0.3295]) -Greedy action tensor([ 2.1278, -0.5049, 0.5188, 1.1731]) tensor([0.6035, 0.0434, 0.1208, 0.2323]) -Greedy action tensor([ 1.6666, -0.5433, 1.1000, 1.2779]) tensor([0.4246, 0.0466, 0.2409, 0.2879]) -Greedy action tensor([ 2.1070, -0.5319, -0.3493, 1.0113]) tensor([0.6705, 0.0479, 0.0575, 0.2241]) -Greedy action tensor([ 1.7317, 0.4728, -0.5520, 0.5396]) tensor([0.5919, 0.1681, 0.0603, 0.1797]) -Greedy action tensor([ 1.7376, -0.2902, 0.8615, 2.0830]) tensor([0.3378, 0.0445, 0.1407, 0.4771]) -Greedy action tensor([2.3518, 0.0914, 0.5652, 1.5411]) tensor([0.5826, 0.0608, 0.0976, 0.2590]) -Greedy action tensor([ 1.3279, -0.1475, 1.4485, 1.4929]) tensor([0.2828, 0.0647, 0.3190, 0.3335]) -Greedy action tensor([1.6799, 0.1114, 0.9390, 1.2609]) tensor([0.4269, 0.0889, 0.2035, 0.2807]) -Greedy action tensor([1.6689, 0.4946, 1.3231, 0.1209]) tensor([0.4486, 0.1386, 0.3174, 0.0954]) -Greedy action tensor([ 1.0294, -0.6413, 0.9123, 0.7714]) tensor([0.3508, 0.0660, 0.3121, 0.2711]) -Greedy action tensor([ 1.2070, 0.5486, -1.0217, 1.7188]) tensor([0.3036, 0.1572, 0.0327, 0.5065]) -Greedy action tensor([ 1.2184, -0.8852, 1.1976, 1.2954]) tensor([0.3143, 0.0384, 0.3079, 0.3395]) -Greedy action tensor([ 1.3595, -0.1995, 0.9551, 0.0560]) tensor([0.4653, 0.0979, 0.3105, 0.1264]) -Greedy action tensor([ 1.2729, -0.5252, 0.6456, 0.6131]) tensor([0.4511, 0.0747, 0.2409, 0.2332]) -Greedy action tensor([ 0.6971, -0.9152, 0.4029, 1.2979]) tensor([0.2654, 0.0529, 0.1977, 0.4839]) -Greedy action tensor([ 1.6820, -0.9769, -0.0739, 0.9248]) tensor([0.5842, 0.0409, 0.1009, 0.2740]) -Greedy action tensor([ 1.5510, -0.1478, -0.3049, 1.2228]) tensor([0.4856, 0.0888, 0.0759, 0.3497]) -Greedy action tensor([ 1.1361, -0.4046, -0.2036, 0.5283]) tensor([0.4949, 0.1060, 0.1296, 0.2695]) -Greedy action tensor([ 1.7293, -0.1759, -0.0472, 0.4996]) tensor([0.6210, 0.0924, 0.1051, 0.1815]) -Greedy action tensor([ 0.8649, -0.5393, 0.4076, 0.9633]) tensor([0.3353, 0.0824, 0.2123, 0.3700]) -Greedy action tensor([ 1.4998, -1.1112, 0.6954, 1.1341]) tensor([0.4515, 0.0332, 0.2020, 0.3133]) -Greedy action tensor([1.2168, 0.0210, 1.5809, 0.5986]) tensor([0.3048, 0.0922, 0.4387, 0.1643]) -Greedy action tensor([0.8457, 0.2104, 0.1609, 1.0004]) tensor([0.3124, 0.1655, 0.1575, 0.3646]) -Greedy action tensor([ 0.9424, -0.7025, 0.8476, 1.1036]) tensor([0.3051, 0.0589, 0.2775, 0.3585]) -Greedy action tensor([ 1.0228, -0.8178, 0.2917, 1.4565]) tensor([0.3142, 0.0499, 0.1512, 0.4847]) -Greedy action tensor([ 1.7623, 0.2523, -0.4115, 0.9242]) tensor([0.5659, 0.1250, 0.0644, 0.2448]) -Greedy action tensor([ 1.6053, -0.1020, 0.1955, 1.1459]) tensor([0.4861, 0.0882, 0.1187, 0.3070]) -Greedy action tensor([ 0.1661, -0.9362, -0.2329, 1.0948]) tensor([0.2205, 0.0732, 0.1480, 0.5582]) -Greedy action tensor([ 1.3221, -0.8600, 0.1626, 1.6572]) tensor([0.3540, 0.0399, 0.1110, 0.4950]) -Greedy action tensor([ 1.3918, -0.2055, 0.4256, 0.5500]) tensor([0.4966, 0.1005, 0.1889, 0.2140]) -Greedy action tensor([ 1.9999, -0.0704, 0.8358, 1.6417]) tensor([0.4679, 0.0590, 0.1461, 0.3270]) -Greedy action tensor([ 1.9652, -0.1851, 0.4747, 2.2192]) tensor([0.3801, 0.0443, 0.0856, 0.4900]) -Greedy action tensor([1.6517, 0.0552, 0.2206, 1.0338]) tensor([0.5049, 0.1023, 0.1207, 0.2722]) -Greedy action tensor([ 1.2110, 0.1766, -0.3231, 0.8052]) tensor([0.4469, 0.1589, 0.0964, 0.2979]) -Greedy action tensor([ 1.4821, 0.4006, -0.7488, 0.8202]) tensor([0.5096, 0.1728, 0.0547, 0.2629]) -Greedy action tensor([ 1.2968, -0.1279, 0.9978, 1.6493]) tensor([0.2937, 0.0707, 0.2178, 0.4178]) -Greedy action tensor([ 1.1381, -0.8166, 0.0680, 0.4535]) tensor([0.5028, 0.0712, 0.1725, 0.2536]) -Greedy action tensor([ 1.8511, 0.1842, -0.1902, -0.0200]) tensor([0.6790, 0.1282, 0.0882, 0.1045]) -Greedy action tensor([0.4622, 0.2228, 1.1809, 0.8526]) tensor([0.1881, 0.1480, 0.3859, 0.2779]) -Greedy action tensor([ 1.4976, -0.6971, 0.9903, 0.8414]) tensor([0.4480, 0.0499, 0.2697, 0.2324]) -Greedy action tensor([ 1.0265, -0.0860, 1.2596, 0.5855]) tensor([0.3091, 0.1016, 0.3903, 0.1989]) -Greedy action tensor([ 1.4480, -0.0334, 0.6567, 0.9566]) tensor([0.4362, 0.0992, 0.1977, 0.2669]) -Greedy action tensor([ 1.2454, -0.4087, 1.2412, 1.3945]) tensor([0.2987, 0.0571, 0.2975, 0.3467]) -Greedy action tensor([ 1.9773, -0.2827, 0.2178, 1.3742]) tensor([0.5484, 0.0572, 0.0944, 0.3000]) -Greedy action tensor([ 1.4926, 0.2987, -0.2914, 1.1982]) tensor([0.4513, 0.1368, 0.0758, 0.3362]) -Greedy action tensor([ 1.3668, -0.2335, 1.0847, 1.6183]) tensor([0.3085, 0.0623, 0.2326, 0.3966]) -Greedy action tensor([1.1305, 0.1699, 0.9337, 1.6211]) tensor([0.2606, 0.0997, 0.2140, 0.4256]) -Greedy action tensor([ 0.8048, -0.1369, 0.3406, 1.2920]) tensor([0.2742, 0.1070, 0.1724, 0.4464]) -Greedy action tensor([ 0.9923, -0.5247, -0.3838, 0.5347]) tensor([0.4751, 0.1042, 0.1200, 0.3007]) -Greedy action tensor([ 1.2041, -0.4704, -0.4449, -0.0858]) tensor([0.6042, 0.1132, 0.1162, 0.1663]) -Greedy action tensor([ 0.4652, -0.2554, -0.0477, -0.0073]) tensor([0.3692, 0.1796, 0.2210, 0.2302]) -Greedy action tensor([ 1.3350, -0.5305, -0.1562, -0.0635]) tensor([0.6147, 0.0952, 0.1384, 0.1518]) -Greedy action tensor([ 0.8622, -0.4034, -0.1873, 0.0787]) tensor([0.4787, 0.1350, 0.1676, 0.2187]) -Greedy action tensor([ 1.6001, -0.6828, -0.2724, 0.2149]) tensor([0.6640, 0.0677, 0.1021, 0.1662]) -Greedy action tensor([ 1.0194, -0.5666, -0.0073, 0.1648]) tensor([0.5029, 0.1030, 0.1801, 0.2140]) -Greedy action tensor([ 1.1261, -0.4171, -0.0075, -0.1275]) tensor([0.5492, 0.1173, 0.1767, 0.1568]) -Greedy action tensor([ 1.0007, -0.3547, -0.2580, -0.1783]) tensor([0.5407, 0.1394, 0.1536, 0.1663]) -Greedy action tensor([ 1.1463, -0.0203, -0.0164, -0.1375]) tensor([0.5260, 0.1638, 0.1645, 0.1457]) -Greedy action tensor([ 0.6704, -0.2388, -0.0330, 0.0734]) tensor([0.4084, 0.1646, 0.2022, 0.2248]) -Greedy action tensor([ 1.2308, -0.4535, 0.0315, -0.1617]) tensor([0.5762, 0.1069, 0.1737, 0.1432]) -Greedy action tensor([ 1.2883, -0.3675, -0.0970, -0.1118]) tensor([0.5925, 0.1131, 0.1483, 0.1461]) -Greedy action tensor([ 0.8361, -0.3420, -0.1233, 0.0848]) tensor([0.4624, 0.1423, 0.1771, 0.2181]) -Greedy action tensor([ 1.5088, -0.6516, -0.5061, -0.0392]) tensor([0.6843, 0.0789, 0.0912, 0.1455]) -Greedy action tensor([ 0.8161, -0.3907, -0.1474, 0.0347]) tensor([0.4676, 0.1399, 0.1784, 0.2141]) -Greedy action tensor([ 0.6577, -0.4100, -0.1155, 0.1024]) tensor([0.4203, 0.1445, 0.1940, 0.2412]) -Greedy action tensor([ 0.3959, -0.4138, 0.0580, -0.1211]) tensor([0.3630, 0.1616, 0.2589, 0.2165]) -Greedy action tensor([ 1.1649, -0.2006, 0.1668, -0.0018]) tensor([0.5167, 0.1319, 0.1905, 0.1609]) -Greedy action tensor([ 0.8586, -0.3463, -0.2088, -0.1267]) tensor([0.4958, 0.1486, 0.1705, 0.1851]) -Greedy action tensor([ 1.0239, -0.7485, -0.3998, 0.1679]) tensor([0.5448, 0.0926, 0.1312, 0.2315]) -Greedy action tensor([ 0.9074, -0.3558, -0.1445, 0.3695]) tensor([0.4513, 0.1276, 0.1576, 0.2635]) -Greedy action tensor([ 0.9301, -0.4279, -0.1991, 0.1688]) tensor([0.4884, 0.1256, 0.1579, 0.2281]) -Greedy action tensor([ 0.7442, -0.6072, -0.3048, 0.1088]) tensor([0.4675, 0.1210, 0.1638, 0.2477]) -Greedy action tensor([ 0.9253, -0.5276, -0.3379, 0.0360]) tensor([0.5188, 0.1213, 0.1467, 0.2132]) -Greedy action tensor([ 0.8870, -0.4760, -0.0647, 0.2029]) tensor([0.4659, 0.1192, 0.1799, 0.2350]) -Greedy action tensor([ 0.6180, -0.4227, 0.0508, -0.1461]) tensor([0.4191, 0.1480, 0.2377, 0.1952]) -Greedy action tensor([ 1.3546, -0.1312, 0.0539, -0.1147]) tensor([0.5784, 0.1309, 0.1575, 0.1331]) -Greedy action tensor([ 1.3290, -0.4996, -0.4839, 0.0875]) tensor([0.6201, 0.0996, 0.1012, 0.1792]) -Greedy action tensor([ 0.8537, -0.4870, 0.2232, -0.0399]) tensor([0.4539, 0.1188, 0.2416, 0.1857]) -Greedy action tensor([ 1.4754, -0.5145, -0.2815, 0.3049]) tensor([0.6175, 0.0844, 0.1066, 0.1915]) -Greedy action tensor([ 0.5854, -0.5589, 0.0168, 0.0464]) tensor([0.4052, 0.1290, 0.2295, 0.2363]) -Greedy action tensor([ 0.6760, -0.4788, -0.1692, 0.0916]) tensor([0.4344, 0.1369, 0.1866, 0.2422]) -Greedy action tensor([ 1.1216, -0.3858, -0.5156, -0.1549]) tensor([0.5900, 0.1307, 0.1148, 0.1646]) -Greedy action tensor([ 0.4634, -0.4090, 0.1909, -0.1225]) tensor([0.3655, 0.1528, 0.2783, 0.2034]) -Greedy action tensor([ 0.8246, -0.2058, 0.0632, -0.2634]) tensor([0.4628, 0.1652, 0.2161, 0.1559]) -Greedy action tensor([ 1.3342, -0.3692, -0.3557, -0.0732]) tensor([0.6206, 0.1130, 0.1145, 0.1519]) -Greedy action tensor([ 0.7176, -0.7171, -0.3649, 0.1688]) tensor([0.4641, 0.1105, 0.1572, 0.2681]) -Greedy action tensor([ 0.7454, -0.4316, -0.0025, -0.1531]) tensor([0.4569, 0.1408, 0.2163, 0.1860]) -Greedy action tensor([ 0.6199, -0.7595, -0.3408, 0.1032]) tensor([0.4483, 0.1128, 0.1715, 0.2674]) -Greedy action tensor([ 0.7527, -0.6527, -0.3957, 0.2133]) tensor([0.4661, 0.1143, 0.1478, 0.2718]) -Greedy action tensor([ 1.4105, -0.4401, -0.2734, -0.1318]) tensor([0.6424, 0.1010, 0.1193, 0.1374]) -Greedy action tensor([ 1.3161, -0.5319, -0.3236, -0.0758]) tensor([0.6249, 0.0985, 0.1213, 0.1554]) -Greedy action tensor([ 0.8460, -0.6430, -0.3932, 0.3294]) tensor([0.4735, 0.1068, 0.1371, 0.2825]) -Greedy action tensor([ 1.0281, -0.5820, -0.2149, 0.3238]) tensor([0.5043, 0.1008, 0.1455, 0.2494]) -Greedy action tensor([ 1.1751, -0.4995, -0.4849, -0.1849]) tensor([0.6119, 0.1147, 0.1164, 0.1571]) -Greedy action tensor([ 1.2115, -0.4032, -0.3054, -0.1423]) tensor([0.5964, 0.1187, 0.1309, 0.1540]) -Greedy action tensor([ 0.4365, -0.5093, 0.1160, -0.1426]) tensor([0.3739, 0.1452, 0.2714, 0.2095]) -Greedy action tensor([ 1.8149, -0.5642, -0.4334, 0.1885]) tensor([0.7169, 0.0664, 0.0757, 0.1410]) -Greedy action tensor([ 0.7113, -0.3409, 0.0976, -0.3156]) tensor([0.4447, 0.1553, 0.2407, 0.1593]) -Greedy action tensor([ 1.3630, -0.4853, 0.0050, 0.3272]) tensor([0.5651, 0.0890, 0.1453, 0.2006]) -Greedy action tensor([ 1.1766, -0.2203, -0.4166, -0.0765]) tensor([0.5759, 0.1425, 0.1171, 0.1645]) -Greedy action tensor([ 1.1766, -0.4529, -0.3252, 0.4099]) tensor([0.5310, 0.1041, 0.1183, 0.2467]) -Greedy action tensor([ 1.6541, -0.6761, -0.3252, 0.2624]) tensor([0.6738, 0.0655, 0.0931, 0.1675]) -Greedy action tensor([ 1.3018, -0.3524, -0.3556, -0.0877]) tensor([0.6131, 0.1173, 0.1169, 0.1528]) -Greedy action tensor([ 0.5195, -0.5157, 0.0045, -0.0966]) tensor([0.4012, 0.1425, 0.2397, 0.2167]) -Greedy action tensor([ 1.0810, -0.2579, 0.0074, -0.2265]) tensor([0.5335, 0.1398, 0.1823, 0.1443]) -Greedy action tensor([ 1.2101, -0.5570, -0.1785, 0.0541]) tensor([0.5764, 0.0985, 0.1438, 0.1814]) -Greedy action tensor([ 1.1180, -0.2895, 0.0450, -0.2000]) tensor([0.5393, 0.1320, 0.1844, 0.1443]) -Greedy action tensor([ 1.8074, -0.4522, -0.4324, 0.1420]) tensor([0.7143, 0.0746, 0.0761, 0.1351]) -Greedy action tensor([ 1.6996, -0.3898, -0.6678, -0.0699]) tensor([0.7205, 0.0892, 0.0675, 0.1228]) -Greedy action tensor([ 1.2476, -0.4460, -0.3103, -0.0476]) tensor([0.5994, 0.1102, 0.1262, 0.1641]) -Greedy action tensor([ 1.1460, -0.7549, -0.6620, 0.2559]) tensor([0.5800, 0.0867, 0.0951, 0.2382]) -Greedy action tensor([ 0.5371, -0.4340, -0.1202, -0.0175]) tensor([0.4047, 0.1532, 0.2097, 0.2324]) -Greedy action tensor([ 0.6390, -0.5491, -0.0078, -0.0244]) tensor([0.4267, 0.1301, 0.2235, 0.2198]) -Greedy action tensor([ 0.7553, -0.6599, -0.1545, -0.0534]) tensor([0.4783, 0.1162, 0.1926, 0.2130]) -Greedy action tensor([ 0.7468, -0.3881, -0.1806, -0.1188]) tensor([0.4678, 0.1504, 0.1850, 0.1968]) -Greedy action tensor([ 0.8982, -0.2296, -0.2365, -0.2187]) tensor([0.5069, 0.1641, 0.1630, 0.1659]) -Greedy action tensor([ 1.3185, -0.6221, -0.3876, 0.2370]) tensor([0.6009, 0.0863, 0.1091, 0.2037]) -Greedy action tensor([ 0.7834, -0.4952, -0.3427, -0.0284]) tensor([0.4886, 0.1360, 0.1584, 0.2169]) -Greedy action tensor([ 0.9532, -0.5782, -0.5671, -0.1407]) tensor([0.5650, 0.1222, 0.1235, 0.1892]) -Greedy action tensor([ 0.8440, -0.5517, 0.0697, -0.2216]) tensor([0.4870, 0.1206, 0.2246, 0.1678]) -Greedy action tensor([ 1.4857, -0.5663, -0.3632, 0.1269]) tensor([0.6482, 0.0833, 0.1020, 0.1665]) -Greedy action tensor([ 0.8190, -0.5701, 0.0213, -0.1201]) tensor([0.4783, 0.1192, 0.2154, 0.1870]) -Greedy action tensor([ 1.1843, -0.4419, -0.3030, 0.0759]) tensor([0.5705, 0.1122, 0.1289, 0.1883]) -Greedy action tensor([ 0.5137, -0.4445, -0.0091, -0.0754]) tensor([0.3951, 0.1515, 0.2342, 0.2192]) -Greedy action tensor([ 0.6706, -0.4754, -0.0550, 0.0052]) tensor([0.4318, 0.1373, 0.2090, 0.2220]) -Greedy action tensor([ 0.4761, -0.4121, 0.0338, -0.1189]) tensor([0.3838, 0.1579, 0.2466, 0.2117]) -Greedy action tensor([ 0.8819, -0.4305, -0.3034, 0.1039]) tensor([0.4916, 0.1323, 0.1503, 0.2258]) -Greedy action tensor([ 0.7493, -0.5915, -0.0571, -0.1711]) tensor([0.4747, 0.1242, 0.2120, 0.1891]) -Greedy action tensor([ 0.8359, -0.6996, -0.1810, 0.2098]) tensor([0.4735, 0.1020, 0.1713, 0.2532]) -Greedy action tensor([ 1.2312, -0.6967, -0.4477, 0.0764]) tensor([0.6071, 0.0883, 0.1133, 0.1913]) -Greedy action tensor([ 0.1296, -0.0573, 0.0944, -0.2674]) tensor([0.2884, 0.2392, 0.2784, 0.1939]) -Greedy action tensor([ 1.0439, -0.7672, -0.0568, -0.6548]) tensor([0.5956, 0.0974, 0.1981, 0.1089]) -Greedy action tensor([ 0.4300, -0.0857, 0.2315, -0.5041]) tensor([0.3559, 0.2125, 0.2918, 0.1398]) -Greedy action tensor([ 0.4075, 0.2160, 0.0886, -0.0518]) tensor([0.3140, 0.2593, 0.2283, 0.1984]) -Greedy action tensor([ 0.2556, 0.1015, 0.0775, -0.2564]) tensor([0.3036, 0.2603, 0.2541, 0.1820]) -Greedy action tensor([ 0.4328, -0.1446, 0.0752, -0.1703]) tensor([0.3561, 0.1999, 0.2491, 0.1948]) -Greedy action tensor([ 0.2224, 0.0205, 0.2025, -0.2516]) tensor([0.2924, 0.2389, 0.2866, 0.1820]) -Greedy action tensor([ 0.4579, 0.2322, 0.0173, -0.0091]) tensor([0.3259, 0.2600, 0.2098, 0.2043]) -Greedy action tensor([ 0.5061, 0.1680, -0.0523, -0.0543]) tensor([0.3501, 0.2497, 0.2003, 0.1999]) -Greedy action tensor([ 0.4335, -0.1568, 0.0906, -0.2978]) tensor([0.3643, 0.2019, 0.2585, 0.1753]) -Greedy action tensor([ 0.6410, -0.0980, 0.1060, -0.4209]) tensor([0.4151, 0.1983, 0.2431, 0.1435]) -Greedy action tensor([ 0.3612, -0.2325, 0.1721, -0.1627]) tensor([0.3364, 0.1858, 0.2785, 0.1993]) -Greedy action tensor([ 0.3778, -0.0432, 0.1831, -0.3274]) tensor([0.3363, 0.2207, 0.2768, 0.1661]) -Greedy action tensor([ 0.2101, 0.1066, 0.2723, -0.0453]) tensor([0.2673, 0.2411, 0.2845, 0.2071]) -Greedy action tensor([ 0.2567, 0.2489, 0.0362, -0.1706]) tensor([0.2901, 0.2879, 0.2327, 0.1892]) -Greedy action tensor([ 0.3216, 0.0762, 0.1386, -0.1356]) tensor([0.3079, 0.2409, 0.2564, 0.1949]) -Greedy action tensor([ 0.3805, -0.1540, -0.0198, -0.2550]) tensor([0.3590, 0.2103, 0.2405, 0.1901]) -Greedy action tensor([ 0.3402, -0.0535, 0.2031, -0.2789]) tensor([0.3242, 0.2187, 0.2826, 0.1745]) -Greedy action tensor([ 0.4118, -0.0404, 0.1315, -0.2817]) tensor([0.3458, 0.2200, 0.2613, 0.1728]) -Greedy action tensor([ 0.3741, -0.1534, -0.0800, -0.4009]) tensor([0.3723, 0.2197, 0.2364, 0.1715]) -Greedy action tensor([ 0.4860, -0.1879, 0.1021, -0.3648]) tensor([0.3820, 0.1947, 0.2602, 0.1631]) -Greedy action tensor([ 0.2396, -0.0324, 0.1835, -0.3036]) tensor([0.3041, 0.2317, 0.2875, 0.1767]) -Greedy action tensor([0.3348, 0.0968, 0.2321, 0.0050]) tensor([0.2933, 0.2312, 0.2647, 0.2109]) -Greedy action tensor([ 0.4074, 0.2001, 0.0829, -0.2245]) tensor([0.3260, 0.2650, 0.2357, 0.1733]) -Greedy action tensor([ 0.4457, -0.0356, 0.0917, -0.1760]) tensor([0.3500, 0.2163, 0.2457, 0.1880]) -Greedy action tensor([ 0.4159, 0.1152, 0.0147, -0.3861]) tensor([0.3499, 0.2590, 0.2342, 0.1569]) -Greedy action tensor([ 1.1555, -0.8764, 0.0212, -0.6823]) tensor([0.6204, 0.0813, 0.1995, 0.0987]) -Greedy action tensor([ 0.1130, 0.0531, 0.0553, -0.0370]) tensor([0.2669, 0.2514, 0.2520, 0.2297]) -Greedy action tensor([ 0.3868, 0.0158, -0.0130, -0.1924]) tensor([0.3424, 0.2362, 0.2295, 0.1918]) -Greedy action tensor([ 0.3362, 0.2367, 0.1019, -0.1889]) tensor([0.3041, 0.2754, 0.2406, 0.1799]) -Greedy action tensor([ 0.2091, 0.0929, 0.2665, -0.1219]) tensor([0.2727, 0.2428, 0.2888, 0.1958]) -Greedy action tensor([ 0.2601, 0.2692, -0.0539, -0.3563]) tensor([0.3049, 0.3077, 0.2227, 0.1646]) -Greedy action tensor([ 0.2890, 0.1196, 0.0824, -0.1046]) tensor([0.3001, 0.2533, 0.2441, 0.2025]) -Greedy action tensor([ 0.2996, 0.0360, 0.0230, -0.4619]) tensor([0.3340, 0.2566, 0.2533, 0.1560]) -Greedy action tensor([ 0.2352, 0.1056, 0.1948, -0.1324]) tensor([0.2832, 0.2488, 0.2720, 0.1961]) -Greedy action tensor([ 0.1161, 0.0396, -0.1149, -0.3231]) tensor([0.2972, 0.2753, 0.2359, 0.1916]) -Greedy action tensor([ 0.2042, -0.2582, -0.2141, -0.3928]) tensor([0.3523, 0.2219, 0.2319, 0.1939]) -Greedy action tensor([ 0.5045, 0.1172, -0.0650, -0.3033]) tensor([0.3717, 0.2523, 0.2103, 0.1657]) -Greedy action tensor([ 0.4050, 0.0671, 0.0658, -0.1953]) tensor([0.3362, 0.2398, 0.2395, 0.1845]) -Greedy action tensor([ 0.3956, -0.0984, -0.0040, -0.4732]) tensor([0.3703, 0.2260, 0.2484, 0.1553]) -Greedy action tensor([ 0.9289, -0.4900, -0.2349, -0.7005]) tensor([0.5713, 0.1383, 0.1784, 0.1120]) -Greedy action tensor([ 0.3290, 0.1824, 0.2014, -0.2714]) tensor([0.3037, 0.2623, 0.2673, 0.1666]) -Greedy action tensor([ 0.5187, 0.0353, -0.0836, -0.2935]) tensor([0.3834, 0.2365, 0.2099, 0.1702]) -Greedy action tensor([ 0.0347, 0.1461, 0.1558, -0.1591]) tensor([0.2457, 0.2746, 0.2773, 0.2024]) -Greedy action tensor([ 0.5676, -0.2877, 0.1585, -0.4651]) tensor([0.4089, 0.1739, 0.2716, 0.1456]) -Greedy action tensor([ 0.4077, 0.1703, 0.0103, -0.2133]) tensor([0.3336, 0.2631, 0.2242, 0.1792]) -Greedy action tensor([ 1.1225, -0.8412, -0.0839, -0.6527]) tensor([0.6215, 0.0872, 0.1860, 0.1053]) -Greedy action tensor([ 0.7447, -0.2800, -0.0372, -0.4280]) tensor([0.4704, 0.1688, 0.2152, 0.1456]) -Greedy action tensor([ 0.9905, -0.6565, 0.0999, -0.6236]) tensor([0.5549, 0.1069, 0.2277, 0.1105]) -Greedy action tensor([ 0.9146, -0.5522, -0.0872, -0.4949]) tensor([0.5428, 0.1252, 0.1994, 0.1326]) -Greedy action tensor([ 0.2759, 0.0587, 0.1859, -0.1954]) tensor([0.2991, 0.2407, 0.2734, 0.1867]) -Greedy action tensor([ 0.3114, 0.1445, -0.0920, -0.3083]) tensor([0.3276, 0.2773, 0.2189, 0.1763]) -Greedy action tensor([ 0.3878, 0.0339, 0.0885, -0.2946]) tensor([0.3391, 0.2381, 0.2514, 0.1714]) -Greedy action tensor([ 0.2730, 0.2909, -0.1012, -0.4292]) tensor([0.3124, 0.3180, 0.2149, 0.1548]) -Greedy action tensor([ 1.3389, -0.8421, 0.0801, -0.8372]) tensor([0.6621, 0.0748, 0.1880, 0.0751]) -Greedy action tensor([ 0.8748, -0.3726, -0.0979, -0.4751]) tensor([0.5196, 0.1492, 0.1964, 0.1347]) -Greedy action tensor([ 0.5567, 0.0189, -0.1493, -0.1628]) tensor([0.3899, 0.2277, 0.1925, 0.1899]) -Greedy action tensor([ 0.5187, -0.4335, -0.1394, -0.5266]) tensor([0.4434, 0.1711, 0.2296, 0.1559]) -Greedy action tensor([ 0.1822, -0.0302, 0.2576, -0.0915]) tensor([0.2742, 0.2217, 0.2956, 0.2085]) -Greedy action tensor([ 0.4110, 0.0999, 0.0009, -0.2533]) tensor([0.3435, 0.2517, 0.2280, 0.1768]) -Greedy action tensor([ 0.2449, 0.0981, 0.1562, -0.2657]) tensor([0.2960, 0.2556, 0.2708, 0.1776]) -Greedy action tensor([ 0.2432, 0.0278, 0.2049, -0.0994]) tensor([0.2875, 0.2318, 0.2767, 0.2041]) -Greedy action tensor([ 0.4847, 0.0638, 0.0226, -0.2311]) tensor([0.3603, 0.2366, 0.2270, 0.1761]) -Greedy action tensor([ 0.8794, -0.4325, -0.1460, -0.6150]) tensor([0.5399, 0.1454, 0.1936, 0.1211]) -Greedy action tensor([ 0.2590, 0.0315, 0.1772, -0.2618]) tensor([0.3019, 0.2405, 0.2782, 0.1794]) -Greedy action tensor([ 0.6151, -0.3100, -0.0258, -0.4876]) tensor([0.4434, 0.1758, 0.2336, 0.1472]) -Greedy action tensor([ 0.3592, 0.1652, 0.2072, -0.0419]) tensor([0.2983, 0.2457, 0.2562, 0.1997]) -Greedy action tensor([ 0.9056, -0.5020, -0.0221, -0.6665]) tensor([0.5412, 0.1324, 0.2140, 0.1124]) -Greedy action tensor([ 0.4168, 0.1327, 0.0765, -0.2498]) tensor([0.3358, 0.2528, 0.2390, 0.1724]) -Greedy action tensor([ 0.9483, -0.6875, -0.0361, -0.4766]) tensor([0.5528, 0.1077, 0.2066, 0.1330]) -Greedy action tensor([ 0.0042, -0.0460, -0.0162, -0.0403]) tensor([0.2572, 0.2447, 0.2521, 0.2460]) -Greedy action tensor([ 0.1834, 0.0924, 0.0283, -0.3394]) tensor([0.2974, 0.2716, 0.2547, 0.1763]) -Greedy action tensor([ 0.5571, -0.2113, -0.1072, -0.5048]) tensor([0.4303, 0.1995, 0.2214, 0.1488]) -Greedy action tensor([ 0.3853, 0.1481, 0.1891, -0.1437]) tensor([0.3125, 0.2465, 0.2568, 0.1841]) -Greedy action tensor([ 0.8062, -0.2531, -0.0986, -0.6923]) tensor([0.5064, 0.1756, 0.2049, 0.1132]) -Greedy action tensor([ 0.6247, -0.1759, -0.0638, -0.4913]) tensor([0.4388, 0.1970, 0.2204, 0.1437]) -Greedy action tensor([ 0.1396, 0.0428, 0.1276, -0.2547]) tensor([0.2801, 0.2543, 0.2768, 0.1888]) -Greedy action tensor([ 0.2035, -0.2206, -0.1218, -0.3987]) tensor([0.3420, 0.2238, 0.2470, 0.1873]) -Greedy action tensor([ 0.5142, -0.4202, -0.1130, -0.4655]) tensor([0.4343, 0.1706, 0.2320, 0.1631]) -Greedy action tensor([ 0.2571, 0.2125, 0.1003, -0.2746]) tensor([0.2942, 0.2814, 0.2515, 0.1729]) -Greedy action tensor([ 0.5091, -0.1642, 0.0317, -0.3648]) tensor([0.3925, 0.2002, 0.2435, 0.1638]) -Greedy action tensor([ 1.4991, -0.5579, 1.1371, 1.2006]) tensor([0.3897, 0.0498, 0.2713, 0.2891]) -Greedy action tensor([ 1.1905, -0.4450, 1.0881, 0.8398]) tensor([0.3569, 0.0695, 0.3222, 0.2513]) -Greedy action tensor([ 1.3133, -0.1789, 1.3976, 1.2645]) tensor([0.3063, 0.0689, 0.3332, 0.2917]) -Greedy action tensor([ 1.3866, -0.1991, 0.6935, 1.7367]) tensor([0.3201, 0.0656, 0.1601, 0.4543]) -Greedy action tensor([ 0.9074, -0.4888, 0.7440, 0.6232]) tensor([0.3509, 0.0869, 0.2980, 0.2641]) -Greedy action tensor([ 0.9628, -0.0956, 0.8508, 1.9973]) tensor([0.1978, 0.0686, 0.1769, 0.5566]) -Greedy action tensor([ 1.7972, -0.1165, 0.5516, 0.6376]) tensor([0.5718, 0.0844, 0.1645, 0.1793]) -Greedy action tensor([ 1.8249, -0.2031, -0.2425, 1.1903]) tensor([0.5592, 0.0736, 0.0707, 0.2964]) -Greedy action tensor([ 1.7676, -0.2186, 1.3994, 1.0221]) tensor([0.4341, 0.0596, 0.3004, 0.2060]) -Greedy action tensor([ 0.7460, -0.8390, 0.5375, 0.9802]) tensor([0.3048, 0.0625, 0.2474, 0.3853]) -Greedy action tensor([ 1.6423, -1.0649, -0.1629, 1.4898]) tensor([0.4785, 0.0319, 0.0787, 0.4109]) -Greedy action tensor([ 1.6705, -0.3177, 1.2183, 1.1691]) tensor([0.4204, 0.0576, 0.2675, 0.2546]) -Greedy action tensor([1.0272, 0.0102, 0.6260, 0.6375]) tensor([0.3692, 0.1335, 0.2472, 0.2500]) -Greedy action tensor([ 0.6352, -0.6163, 0.5023, 1.1522]) tensor([0.2605, 0.0745, 0.2281, 0.4369]) -Greedy action tensor([ 1.3979, -0.4730, 0.2207, 1.6269]) tensor([0.3677, 0.0566, 0.1133, 0.4624]) -Greedy action tensor([ 1.6169, -1.3145, 0.6698, 0.6495]) tensor([0.5491, 0.0293, 0.2130, 0.2087]) -Greedy action tensor([ 1.0772, -0.5339, 0.4111, 1.1798]) tensor([0.3544, 0.0708, 0.1821, 0.3927]) -Greedy action tensor([ 1.8765, -0.2561, -0.3612, 0.8293]) tensor([0.6345, 0.0752, 0.0677, 0.2226]) -Greedy action tensor([1.3418, 0.1625, 1.0482, 0.2968]) tensor([0.4158, 0.1279, 0.3100, 0.1462]) -Greedy action tensor([ 1.6151, -0.4842, -0.0995, 0.9118]) tensor([0.5563, 0.0682, 0.1002, 0.2753]) -Greedy action tensor([ 1.3403, 0.8156, -0.8311, 0.6645]) tensor([0.4516, 0.2672, 0.0515, 0.2297]) -Greedy action tensor([1.3050, 0.3019, 1.0264, 0.6760]) tensor([0.3764, 0.1380, 0.2849, 0.2007]) -Greedy action tensor([ 1.0646, 0.1026, -0.6971, 1.6004]) tensor([0.3065, 0.1171, 0.0526, 0.5237]) -Greedy action tensor([ 1.0529, 0.4237, -0.6795, 0.3282]) tensor([0.4557, 0.2429, 0.0806, 0.2208]) -Greedy action tensor([ 1.6963, -1.0654, -0.3198, 1.2545]) tensor([0.5437, 0.0344, 0.0724, 0.3495]) -Greedy action tensor([ 1.4816, -0.1340, 0.2415, 1.0330]) tensor([0.4702, 0.0935, 0.1361, 0.3003]) -Greedy action tensor([ 1.3955, -0.5229, -0.0032, 1.0700]) tensor([0.4726, 0.0694, 0.1167, 0.3413]) -Greedy action tensor([ 1.4977, -1.1004, -0.2141, 0.5626]) tensor([0.6070, 0.0452, 0.1096, 0.2383]) -Greedy action tensor([ 1.4301, 0.3193, -0.8811, 1.1184]) tensor([0.4628, 0.1524, 0.0459, 0.3389]) -Greedy action tensor([ 1.4487, -0.1254, -0.5814, 0.9149]) tensor([0.5195, 0.1076, 0.0682, 0.3046]) -Greedy action tensor([ 1.5115, -1.3910, -0.2029, 1.1182]) tensor([0.5236, 0.0287, 0.0943, 0.3533]) -Greedy action tensor([ 1.2380, -0.0364, 0.2482, 0.9114]) tensor([0.4215, 0.1178, 0.1566, 0.3040]) -Greedy action tensor([ 0.9485, 0.2224, -0.5295, 0.8476]) tensor([0.3823, 0.1849, 0.0872, 0.3456]) -Greedy action tensor([1.3800, 0.1712, 0.6256, 0.8640]) tensor([0.4227, 0.1262, 0.1988, 0.2523]) -Greedy action tensor([ 1.5570, -1.0168, 0.0447, 1.2994]) tensor([0.4832, 0.0368, 0.1065, 0.3735]) -Greedy action tensor([ 1.2827, -0.4002, 0.0391, 1.1825]) tensor([0.4204, 0.0781, 0.1212, 0.3803]) -Greedy action tensor([ 1.6872, -0.0030, -0.4772, 0.4414]) tensor([0.6301, 0.1162, 0.0723, 0.1813]) -Greedy action tensor([ 2.0509, -0.8697, -0.5821, 1.2957]) tensor([0.6267, 0.0338, 0.0450, 0.2945]) -Greedy action tensor([ 1.5033, -0.0550, -0.3875, 0.9519]) tensor([0.5161, 0.1086, 0.0779, 0.2974]) -Greedy action tensor([ 2.1460, 0.0869, -0.3763, 0.4472]) tensor([0.7190, 0.0917, 0.0577, 0.1315]) -Greedy action tensor([1.8325, 0.2697, 0.7395, 1.1251]) tensor([0.4908, 0.1028, 0.1645, 0.2419]) -Greedy action tensor([ 1.0583, 0.3921, -0.5797, 1.2862]) tensor([0.3374, 0.1733, 0.0656, 0.4238]) -Greedy action tensor([ 0.9798, 0.0986, -0.6250, 1.8674]) tensor([0.2472, 0.1024, 0.0497, 0.6006]) -Greedy action tensor([ 1.6292, 0.3371, -0.2563, 0.7499]) tensor([0.5430, 0.1492, 0.0824, 0.2254]) -Greedy action tensor([ 0.9528, -0.1764, -0.0193, 0.7099]) tensor([0.4023, 0.1300, 0.1522, 0.3155]) -Greedy action tensor([ 1.1338, -1.0809, 0.9508, 0.5967]) tensor([0.3958, 0.0432, 0.3296, 0.2313]) -Greedy action tensor([ 1.0154, -0.0829, 0.7172, 1.0364]) tensor([0.3229, 0.1077, 0.2396, 0.3298]) -Greedy action tensor([ 1.7114, -0.0198, 0.6380, 0.8624]) tensor([0.5137, 0.0910, 0.1756, 0.2198]) -Greedy action tensor([1.1147, 0.0523, 1.0965, 0.9087]) tensor([0.3183, 0.1100, 0.3126, 0.2591]) -Greedy action tensor([ 1.4981, -0.3702, -0.7712, 0.6695]) tensor([0.5902, 0.0911, 0.0610, 0.2577]) -Greedy action tensor([1.6945, 0.1982, 0.0531, 0.7529]) tensor([0.5532, 0.1239, 0.1072, 0.2158]) -Greedy action tensor([ 1.2160, -0.0808, 0.3820, 0.7819]) tensor([0.4245, 0.1161, 0.1844, 0.2750]) -Greedy action tensor([ 1.5800, -0.2537, -0.4987, 0.5846]) tensor([0.6044, 0.0966, 0.0756, 0.2234]) -Greedy action tensor([1.1477, 0.2063, 0.5938, 0.1308]) tensor([0.4298, 0.1677, 0.2470, 0.1555]) -Greedy action tensor([ 1.5871, -0.1542, 0.2518, 1.3324]) tensor([0.4518, 0.0792, 0.1189, 0.3502]) -Greedy action tensor([ 1.8267, -0.8379, -0.5560, 1.2379]) tensor([0.5824, 0.0406, 0.0538, 0.3233]) -Greedy action tensor([ 1.1436, -0.4436, 0.5577, 1.7319]) tensor([0.2807, 0.0574, 0.1563, 0.5056]) -Greedy action tensor([ 0.9983, -0.3536, -0.2138, 1.3093]) tensor([0.3423, 0.0886, 0.1019, 0.4672]) -Greedy action tensor([ 1.0497, -0.0028, 0.8298, 0.8389]) tensor([0.3377, 0.1179, 0.2710, 0.2735]) -Greedy action tensor([ 0.1121, -1.2503, 0.4466, -0.3997]) tensor([0.3074, 0.0787, 0.4296, 0.1843]) -Greedy action tensor([ 2.0439, -0.5123, -0.5114, 1.3317]) tensor([0.6076, 0.0471, 0.0472, 0.2981]) -Greedy action tensor([ 1.9228, -0.3384, 0.0646, 1.2884]) tensor([0.5585, 0.0582, 0.0871, 0.2962]) -Greedy action tensor([ 1.4351, -0.3639, -0.0683, 1.4257]) tensor([0.4204, 0.0696, 0.0935, 0.4165]) -Greedy action tensor([ 0.8185, -0.9549, 0.9046, 0.2352]) tensor([0.3549, 0.0602, 0.3868, 0.1980]) -Greedy action tensor([ 0.8540, -0.1718, -0.1650, 0.9667]) tensor([0.3523, 0.1263, 0.1271, 0.3943]) -Greedy action tensor([ 1.9869, -0.2121, -0.1006, 1.3095]) tensor([0.5738, 0.0636, 0.0711, 0.2914]) -Greedy action tensor([ 0.8984, -0.3724, -0.6737, 1.0858]) tensor([0.3711, 0.1042, 0.0771, 0.4476]) -Greedy action tensor([ 0.6108, -0.3774, -0.1987, 1.6136]) tensor([0.2201, 0.0819, 0.0980, 0.6000]) -Greedy action tensor([ 1.3383, 0.5366, -0.8735, 0.7391]) tensor([0.4745, 0.2129, 0.0520, 0.2606]) -Greedy action tensor([1.4515, 0.1764, 1.1059, 0.9235]) tensor([0.3881, 0.1084, 0.2747, 0.2289]) -Greedy action tensor([ 1.6282, -1.0645, 1.1939, 0.4672]) tensor([0.4929, 0.0334, 0.3193, 0.1544]) -Greedy action tensor([ 1.6990, -0.6247, -0.7233, 0.6815]) tensor([0.6459, 0.0632, 0.0573, 0.2335]) -Greedy action tensor([ 1.4347, -0.2513, -0.8814, 1.5231]) tensor([0.4208, 0.0780, 0.0415, 0.4597]) -Greedy action tensor([ 1.2357, 0.0440, -0.1962, 0.7337]) tensor([0.4656, 0.1414, 0.1112, 0.2818]) -Greedy action tensor([ 1.2506, -1.2240, 0.1616, 0.4986]) tensor([0.5285, 0.0445, 0.1779, 0.2491]) -Greedy action tensor([1.6456, 0.1628, 1.1274, 0.5163]) tensor([0.4660, 0.1058, 0.2776, 0.1506]) -Greedy action tensor([ 1.4619, -0.7815, -0.3679, 0.6457]) tensor([0.5853, 0.0621, 0.0939, 0.2587]) -Greedy action tensor([ 1.6062, -1.4195, -1.2777, 0.8320]) tensor([0.6388, 0.0310, 0.0357, 0.2945]) -Greedy action tensor([ 1.8280, -0.6780, 0.0228, 1.0407]) tensor([0.5878, 0.0480, 0.0967, 0.2675]) -Greedy action tensor([ 0.7425, -0.9762, 0.4262, 1.6760]) tensor([0.2246, 0.0403, 0.1637, 0.5713]) -Greedy action tensor([1.2186, 0.2481, 0.4186, 1.0688]) tensor([0.3719, 0.1409, 0.1671, 0.3201]) -Greedy action tensor([ 1.6912, -0.5182, 0.9017, 1.9123]) tensor([0.3557, 0.0390, 0.1615, 0.4437]) -Greedy action tensor([-1.7443, -0.6811, 0.7475, 0.0427]) tensor([0.0456, 0.1319, 0.5505, 0.2720]) -Greedy action tensor([-1.9417, -0.3699, 0.2878, -0.2037]) tensor([0.0481, 0.2315, 0.4470, 0.2734]) -Greedy action tensor([-1.8579, -0.6969, 0.1750, -0.2267]) tensor([0.0590, 0.1885, 0.4508, 0.3017]) -Greedy action tensor([-1.8170, -0.8376, 0.1249, -0.3982]) tensor([0.0677, 0.1803, 0.4721, 0.2798]) -Greedy action tensor([-0.6173, -0.6209, 0.1921, 0.1219]) tensor([0.1578, 0.1572, 0.3545, 0.3305]) -Greedy action tensor([-1.7367, -0.8521, 0.0050, -0.4613]) tensor([0.0787, 0.1906, 0.4490, 0.2817]) -Greedy action tensor([-1.2536, -0.6396, 0.6189, 0.1112]) tensor([0.0754, 0.1393, 0.4903, 0.2951]) -Greedy action tensor([-1.9700, -0.6927, 0.6567, 0.0355]) tensor([0.0387, 0.1388, 0.5350, 0.2875]) -Greedy action tensor([-0.9188, -0.4925, -0.0727, -0.0197]) tensor([0.1366, 0.2092, 0.3184, 0.3357]) -Greedy action tensor([-1.6970, -0.5726, 0.2628, -0.1189]) tensor([0.0624, 0.1921, 0.4430, 0.3024]) -Greedy action tensor([-1.6477, -0.5355, 0.7413, -0.1520]) tensor([0.0515, 0.1567, 0.5618, 0.2300]) -Greedy action tensor([-1.4312, -0.5386, 0.7094, -0.3042]) tensor([0.0665, 0.1624, 0.5658, 0.2053]) -Greedy action tensor([-0.7675, 0.1380, -0.6753, 0.2251]) tensor([0.1376, 0.3403, 0.1509, 0.3712]) -Greedy action tensor([-1.8691, -0.7938, 0.2071, -0.1664]) tensor([0.0575, 0.1685, 0.4584, 0.3156]) -Greedy action tensor([-1.6883, -0.7318, 0.1564, -0.0308]) tensor([0.0659, 0.1715, 0.4169, 0.3457]) -Greedy action tensor([-1.9552, -0.8074, 0.4111, -0.2988]) tensor([0.0499, 0.1572, 0.5316, 0.2614]) -Greedy action tensor([-1.9629, -0.5743, 0.4030, -0.0787]) tensor([0.0450, 0.1802, 0.4790, 0.2959]) -Greedy action tensor([-1.0294, -0.4922, 0.1740, -0.0992]) tensor([0.1166, 0.1995, 0.3884, 0.2955]) -Greedy action tensor([-2.0506, -0.7865, 0.9809, 0.2576]) tensor([0.0283, 0.1002, 0.5868, 0.2847]) -Greedy action tensor([-1.4504, -0.9209, 0.4643, -0.4858]) tensor([0.0826, 0.1403, 0.5604, 0.2167]) -Greedy action tensor([-1.3261, 0.5280, 0.2407, 0.2444]) tensor([0.0589, 0.3759, 0.2821, 0.2831]) -Greedy action tensor([-1.0163, -0.2179, -0.2617, -0.2646]) tensor([0.1339, 0.2975, 0.2847, 0.2839]) -Greedy action tensor([-1.0921, -0.6058, 0.4178, -0.0782]) tensor([0.1009, 0.1641, 0.4568, 0.2782]) -Greedy action tensor([-1.8917, -1.0275, 0.2594, -0.4089]) tensor([0.0611, 0.1449, 0.5249, 0.2691]) -Greedy action tensor([-1.5705, -0.6229, 0.2259, -0.3254]) tensor([0.0765, 0.1972, 0.4608, 0.2655]) -Greedy action tensor([-1.9989, -0.6836, 1.0267, 0.2875]) tensor([0.0284, 0.1059, 0.5859, 0.2798]) -Greedy action tensor([-0.7670, -0.5091, 0.3098, 0.0450]) tensor([0.1337, 0.1730, 0.3923, 0.3010]) -Greedy action tensor([-1.6817, -0.4690, 0.2825, -0.3424]) tensor([0.0653, 0.2197, 0.4657, 0.2493]) -Greedy action tensor([-0.6580, -0.5644, 0.1628, 0.2653]) tensor([0.1452, 0.1594, 0.3299, 0.3655]) -Greedy action tensor([-2.0249, -0.8826, 0.5080, -0.1224]) tensor([0.0427, 0.1338, 0.5374, 0.2861]) -Greedy action tensor([-1.4902, -0.6161, 0.4637, 0.0070]) tensor([0.0670, 0.1606, 0.4729, 0.2995]) -Greedy action tensor([-1.7549, -0.7389, 0.1153, -0.3713]) tensor([0.0702, 0.1940, 0.4557, 0.2801]) -Greedy action tensor([-1.7385, -0.7317, 0.1329, -0.3777]) tensor([0.0708, 0.1936, 0.4597, 0.2759]) -Greedy action tensor([-1.2641, -0.5844, 0.4857, -0.1880]) tensor([0.0858, 0.1692, 0.4934, 0.2516]) -Greedy action tensor([-1.2724, -0.5539, 0.4006, 0.0413]) tensor([0.0827, 0.1695, 0.4404, 0.3074]) -Greedy action tensor([-1.3383, -0.5542, 0.3874, 0.0932]) tensor([0.0770, 0.1686, 0.4323, 0.3221]) -Greedy action tensor([-1.9803, -0.9169, 0.3730, -0.1854]) tensor([0.0489, 0.1417, 0.5148, 0.2945]) -Greedy action tensor([-1.7419, -0.6691, 0.1704, -0.3539]) tensor([0.0680, 0.1989, 0.4605, 0.2726]) -Greedy action tensor([-1.8395, -0.7755, 0.2138, -0.4115]) tensor([0.0630, 0.1827, 0.4913, 0.2629]) -Greedy action tensor([-1.9883, -0.9507, 0.4133, -0.2669]) tensor([0.0489, 0.1380, 0.5398, 0.2734]) -Greedy action tensor([-0.9744, -0.6586, 0.2838, 0.0731]) tensor([0.1144, 0.1569, 0.4026, 0.3261]) -Greedy action tensor([-1.9885, -0.8945, 0.3734, -0.2301]) tensor([0.0490, 0.1464, 0.5201, 0.2845]) -Greedy action tensor([-1.1284, -0.6026, 0.3308, 0.0417]) tensor([0.0979, 0.1656, 0.4211, 0.3154]) -Greedy action tensor([-2.0416, -0.8495, 0.5599, -0.0713]) tensor([0.0401, 0.1320, 0.5404, 0.2875]) -Greedy action tensor([-1.6631, -0.6571, 0.7764, 0.3534]) tensor([0.0440, 0.1204, 0.5049, 0.3307]) -Greedy action tensor([-1.8072, -0.4770, 0.6378, -0.0052]) tensor([0.0447, 0.1690, 0.5153, 0.2709]) -Greedy action tensor([-1.7381, -0.7328, 0.0630, -0.4078]) tensor([0.0737, 0.2014, 0.4463, 0.2787]) -Greedy action tensor([-1.9720, -0.7324, 0.8327, 0.1250]) tensor([0.0343, 0.1186, 0.5674, 0.2796]) -Greedy action tensor([-1.1384, -0.5615, 0.3161, 0.0997]) tensor([0.0951, 0.1694, 0.4074, 0.3281]) -Greedy action tensor([-0.7846, -0.3647, 0.4299, -0.2048]) tensor([0.1303, 0.1983, 0.4388, 0.2326]) -Greedy action tensor([-1.7449, -0.7889, 0.1654, -0.4670]) tensor([0.0717, 0.1865, 0.4844, 0.2574]) -Greedy action tensor([-1.2870, -0.6079, 0.3414, 0.1437]) tensor([0.0816, 0.1610, 0.4160, 0.3414]) -Greedy action tensor([-0.3956, -0.4580, 0.1833, 0.2449]) tensor([0.1779, 0.1671, 0.3174, 0.3376]) -Greedy action tensor([-1.7506, -0.9484, 0.2153, -0.4689]) tensor([0.0716, 0.1596, 0.5110, 0.2578]) -Greedy action tensor([-1.7268, -0.8202, 0.0983, -0.4447]) tensor([0.0753, 0.1864, 0.4670, 0.2713]) -Greedy action tensor([-1.6578, -0.3589, 0.5282, -0.1070]) tensor([0.0547, 0.2005, 0.4868, 0.2580]) -Greedy action tensor([-1.2690, -0.6097, 0.4065, -0.0147]) tensor([0.0849, 0.1641, 0.4534, 0.2976]) -Greedy action tensor([-1.9200, -0.8695, 0.2907, -0.2319]) tensor([0.0544, 0.1555, 0.4960, 0.2941]) -Greedy action tensor([-1.6957, -0.5409, 0.5793, -0.1414]) tensor([0.0537, 0.1703, 0.5221, 0.2540]) -Greedy action tensor([-1.7527, -0.7817, 0.1285, -0.4450]) tensor([0.0719, 0.1900, 0.4721, 0.2660]) -Greedy action tensor([-1.6544, -0.5950, 0.8251, 0.0851]) tensor([0.0465, 0.1341, 0.5548, 0.2647]) -Greedy action tensor([-1.9693, -0.7999, 0.4941, -0.0676]) tensor([0.0441, 0.1421, 0.5182, 0.2955]) -Greedy action tensor([-0.8076, -0.6043, 0.2578, 0.1140]) tensor([0.1309, 0.1604, 0.3798, 0.3289]) -Greedy action tensor([-1.3943, -0.5716, 0.5744, -0.2713]) tensor([0.0740, 0.1685, 0.5300, 0.2275]) -Greedy action tensor([-1.7133, -0.7651, 0.1930, -0.3222]) tensor([0.0698, 0.1801, 0.4696, 0.2805]) -Greedy action tensor([-1.1588, -0.6419, 0.3156, 0.1094]) tensor([0.0943, 0.1582, 0.4121, 0.3353]) -Greedy action tensor([-1.7317, -0.8260, 0.1570, -0.4447]) tensor([0.0730, 0.1805, 0.4823, 0.2642]) -Greedy action tensor([-0.2004, -0.2707, 0.1265, -0.2096]) tensor([0.2320, 0.2163, 0.3218, 0.2299]) -Greedy action tensor([-2.0419, -0.8315, 0.5516, -0.0340]) tensor([0.0397, 0.1332, 0.5313, 0.2958]) -Greedy action tensor([-1.7517, -0.7688, 0.1543, -0.3697]) tensor([0.0695, 0.1858, 0.4677, 0.2770]) -Greedy action tensor([-0.7321, -0.2123, 0.3743, -0.0974]) tensor([0.1317, 0.2215, 0.3983, 0.2485]) -Greedy action tensor([-1.5650, -0.5165, 0.6152, 0.2874]) tensor([0.0524, 0.1496, 0.4638, 0.3342]) -Greedy action tensor([-1.2527, -0.6370, 0.3380, 0.1162]) tensor([0.0856, 0.1583, 0.4198, 0.3363]) -Greedy action tensor([-1.8619, -0.8242, 0.2352, -0.2815]) tensor([0.0594, 0.1678, 0.4840, 0.2887]) -Greedy action tensor([-1.8308, -0.9125, 0.2498, -0.3585]) tensor([0.0630, 0.1578, 0.5046, 0.2746]) -Greedy action tensor([-1.5431e+00, -6.1044e-01, 4.9369e-01, -1.1699e-03]) tensor([0.0630, 0.1600, 0.4827, 0.2943]) -Greedy action tensor([-1.9217, -0.8264, 0.2331, -0.2554]) tensor([0.0558, 0.1670, 0.4817, 0.2955]) -Greedy action tensor([-1.9496, -0.7648, 0.1630, -0.3377]) tensor([0.0570, 0.1863, 0.4711, 0.2856]) -Greedy action tensor([-1.0750, -0.5524, 0.3903, 0.5808]) tensor([0.0816, 0.1376, 0.3533, 0.4274]) -Greedy action tensor([-1.8111, -0.8086, 0.1691, -0.3830]) tensor([0.0660, 0.1800, 0.4785, 0.2755]) -Greedy action tensor([-1.8540, -0.9302, 0.2159, -0.4008]) tensor([0.0636, 0.1602, 0.5041, 0.2721]) -Greedy action tensor([ 0.5105, -0.5599, -0.1561, 0.0363]) tensor([0.4034, 0.1383, 0.2071, 0.2511]) -Greedy action tensor([ 0.7174, -0.5951, -0.3752, 0.2412]) tensor([0.4493, 0.1209, 0.1507, 0.2791]) -Greedy action tensor([ 1.1489, -0.7482, -0.7243, 0.2840]) tensor([0.5798, 0.0870, 0.0891, 0.2441]) -Greedy action tensor([ 0.8960, -0.3859, -0.2514, -0.0098]) tensor([0.5002, 0.1388, 0.1588, 0.2022]) -Greedy action tensor([ 1.2837, -0.6371, -0.2757, 0.1193]) tensor([0.5992, 0.0878, 0.1260, 0.1870]) -Greedy action tensor([ 0.7793, -0.5462, -0.3264, -0.1210]) tensor([0.4992, 0.1326, 0.1652, 0.2029]) -Greedy action tensor([ 1.3921, -0.4761, -0.4535, -0.0366]) tensor([0.6443, 0.0995, 0.1018, 0.1544]) -Greedy action tensor([ 0.8355, -0.5779, -0.2212, 0.2026]) tensor([0.4713, 0.1147, 0.1638, 0.2503]) -Greedy action tensor([ 0.4898, -0.5042, -0.0060, -0.0259]) tensor([0.3882, 0.1437, 0.2364, 0.2318]) -Greedy action tensor([ 1.2963, -0.4085, -0.2524, -0.1377]) tensor([0.6125, 0.1114, 0.1302, 0.1460]) -Greedy action tensor([ 1.6625, -0.4369, -0.5857, -0.0569]) tensor([0.7106, 0.0871, 0.0750, 0.1273]) -Greedy action tensor([ 0.5125, -0.4175, 0.1251, -0.0937]) tensor([0.3818, 0.1507, 0.2592, 0.2083]) -Greedy action tensor([ 0.8010, -0.7030, -0.2586, 0.1828]) tensor([0.4744, 0.1054, 0.1644, 0.2557]) -Greedy action tensor([ 0.6064, -0.3625, -0.4185, -0.2347]) tensor([0.4609, 0.1749, 0.1654, 0.1988]) -Greedy action tensor([ 0.8469, -0.3401, 0.0492, -0.2616]) tensor([0.4795, 0.1463, 0.2159, 0.1583]) -Greedy action tensor([ 0.6013, -0.3285, 0.0999, -0.2613]) tensor([0.4128, 0.1629, 0.2500, 0.1742]) -Greedy action tensor([ 0.8851, -0.1669, -0.0968, -0.2507]) tensor([0.4890, 0.1708, 0.1832, 0.1570]) -Greedy action tensor([ 0.7126, -0.6382, -0.3667, 0.1887]) tensor([0.4564, 0.1182, 0.1551, 0.2703]) -Greedy action tensor([ 0.5301, -0.2821, -0.0623, 0.0088]) tensor([0.3860, 0.1713, 0.2135, 0.2292]) -Greedy action tensor([ 0.6799, -0.4557, -0.2038, 0.0558]) tensor([0.4405, 0.1415, 0.1820, 0.2360]) -Greedy action tensor([ 0.7573, -0.6418, -0.3742, 0.3011]) tensor([0.4539, 0.1120, 0.1464, 0.2876]) -Greedy action tensor([ 0.8676, -0.7311, -0.4367, 0.2228]) tensor([0.5004, 0.1012, 0.1358, 0.2626]) -Greedy action tensor([ 0.8768, -0.8270, -0.4153, 0.0416]) tensor([0.5290, 0.0963, 0.1453, 0.2295]) -Greedy action tensor([ 0.4667, -0.4548, 0.0973, -0.1311]) tensor([0.3789, 0.1508, 0.2619, 0.2084]) -Greedy action tensor([ 1.5086, -0.5972, -0.2890, 0.0329]) tensor([0.6596, 0.0803, 0.1093, 0.1508]) -Greedy action tensor([ 0.1379, -0.0132, -0.3604, -0.1305]) tensor([0.3094, 0.2660, 0.1880, 0.2366]) -Greedy action tensor([ 0.5966, -0.2093, 0.0355, -0.1565]) tensor([0.4019, 0.1795, 0.2293, 0.1893]) -Greedy action tensor([ 0.8590, -0.3386, -0.2527, 0.0143]) tensor([0.4853, 0.1465, 0.1597, 0.2085]) -Greedy action tensor([ 0.4176, -0.0714, 0.0538, -0.1320]) tensor([0.3466, 0.2125, 0.2409, 0.2000]) -Greedy action tensor([ 1.1234, -0.4379, -0.2491, -0.1191]) tensor([0.5708, 0.1198, 0.1447, 0.1648]) -Greedy action tensor([ 1.6400, -0.4633, -0.5272, -0.0131]) tensor([0.7003, 0.0855, 0.0802, 0.1341]) -Greedy action tensor([ 1.0153, -0.5821, -0.8418, 0.2330]) tensor([0.5507, 0.1115, 0.0860, 0.2519]) -Greedy action tensor([ 0.5159, -0.5553, -0.0120, 0.0580]) tensor([0.3899, 0.1336, 0.2299, 0.2466]) -Greedy action tensor([ 1.1832, -0.4455, -0.1133, -0.0044]) tensor([0.5635, 0.1105, 0.1541, 0.1718]) -Greedy action tensor([ 0.7971, -0.5555, -0.3025, 0.1415]) tensor([0.4738, 0.1225, 0.1578, 0.2460]) -Greedy action tensor([ 0.6497, -0.3637, 0.0740, -0.0619]) tensor([0.4139, 0.1502, 0.2327, 0.2032]) -Greedy action tensor([ 0.5020, -0.6312, -0.1530, 0.0240]) tensor([0.4063, 0.1308, 0.2110, 0.2519]) -Greedy action tensor([ 1.2150, -0.6264, -0.3989, -0.1710]) tensor([0.6220, 0.0986, 0.1238, 0.1555]) -Greedy action tensor([ 0.8927, -0.6292, -0.2259, 0.6777]) tensor([0.4252, 0.0928, 0.1389, 0.3430]) -Greedy action tensor([ 1.0855, -0.7002, -0.3745, -0.1448]) tensor([0.5910, 0.0991, 0.1372, 0.1727]) -Greedy action tensor([ 1.2855, -0.6846, -0.5455, 0.0361]) tensor([0.6304, 0.0879, 0.1010, 0.1807]) -Greedy action tensor([ 0.4409, -0.2238, 0.1124, -0.1983]) tensor([0.3620, 0.1862, 0.2607, 0.1911]) -Greedy action tensor([ 1.3955, -0.3073, -0.2777, -0.1308]) tensor([0.6301, 0.1148, 0.1182, 0.1369]) -Greedy action tensor([ 1.0586, -0.3891, -0.0059, -0.2408]) tensor([0.5398, 0.1269, 0.1862, 0.1472]) -Greedy action tensor([ 1.1795, -0.6539, -0.6088, 1.1755]) tensor([0.4304, 0.0688, 0.0720, 0.4288]) -Greedy action tensor([ 1.7069, -0.4646, -0.5347, 0.1311]) tensor([0.7007, 0.0799, 0.0745, 0.1449]) -Greedy action tensor([ 0.7675, -0.6406, -0.3554, 0.1224]) tensor([0.4774, 0.1168, 0.1553, 0.2505]) -Greedy action tensor([ 0.8845, -0.6281, -0.3919, 0.1804]) tensor([0.5015, 0.1105, 0.1399, 0.2480]) -Greedy action tensor([ 0.5888, -0.3774, 0.1143, -0.2314]) tensor([0.4093, 0.1558, 0.2547, 0.1802]) -Greedy action tensor([ 1.0663, -0.4615, -0.1327, -0.1810]) tensor([0.5538, 0.1202, 0.1670, 0.1591]) -Greedy action tensor([ 1.8582, -0.5377, -0.1716, 0.3347]) tensor([0.6943, 0.0632, 0.0912, 0.1513]) -Greedy action tensor([ 0.9851, -0.5825, -0.3318, 0.0458]) tensor([0.5355, 0.1117, 0.1435, 0.2093]) -Greedy action tensor([ 1.0264, -0.4960, -0.2371, 0.1657]) tensor([0.5198, 0.1134, 0.1469, 0.2198]) -Greedy action tensor([ 0.8267, -0.3247, -0.2947, -0.0360]) tensor([0.4845, 0.1532, 0.1579, 0.2045]) -Greedy action tensor([ 0.9203, -0.5447, -0.2427, -0.0529]) tensor([0.5204, 0.1203, 0.1627, 0.1966]) -Greedy action tensor([ 0.8118, -0.7479, -0.3989, 0.2117]) tensor([0.4862, 0.1022, 0.1449, 0.2668]) -Greedy action tensor([ 0.8788, 0.0103, 0.0479, -0.0193]) tensor([0.4420, 0.1854, 0.1925, 0.1800]) -Greedy action tensor([ 0.6974, -0.7254, -0.3940, 0.1034]) tensor([0.4697, 0.1132, 0.1577, 0.2593]) -Greedy action tensor([ 0.6087, -0.5662, -0.0704, 0.0146]) tensor([0.4223, 0.1304, 0.2141, 0.2331]) -Greedy action tensor([ 0.4719, -0.4752, -0.0426, -0.0900]) tensor([0.3913, 0.1518, 0.2339, 0.2231]) -Greedy action tensor([ 0.8572, -0.7453, -0.2933, 0.2579]) tensor([0.4838, 0.0974, 0.1531, 0.2657]) -Greedy action tensor([ 0.8953, -0.3543, -0.0762, -0.2401]) tensor([0.5034, 0.1443, 0.1906, 0.1617]) -Greedy action tensor([ 1.3338, -0.6428, -0.4444, 0.3415]) tensor([0.5959, 0.0826, 0.1007, 0.2209]) -Greedy action tensor([ 0.9283, -0.6973, -0.3157, 0.3161]) tensor([0.4933, 0.0971, 0.1422, 0.2675]) -Greedy action tensor([ 1.1715, -0.6418, -0.3340, 0.0160]) tensor([0.5883, 0.0960, 0.1305, 0.1852]) -Greedy action tensor([ 0.7921, -0.5909, -0.1776, 0.2255]) tensor([0.4551, 0.1141, 0.1726, 0.2582]) -Greedy action tensor([ 0.7679, -0.1912, -0.1636, -0.3009]) tensor([0.4716, 0.1807, 0.1858, 0.1619]) -Greedy action tensor([ 1.0299, -0.7648, -0.6202, 0.3111]) tensor([0.5418, 0.0900, 0.1041, 0.2641]) -Greedy action tensor([ 1.0233, -0.3265, 0.1105, -0.3278]) tensor([0.5209, 0.1351, 0.2091, 0.1349]) -Greedy action tensor([ 1.4740, -0.4729, -0.3941, 0.1684]) tensor([0.6377, 0.0910, 0.0985, 0.1728]) -Greedy action tensor([ 0.7384, -0.3157, -0.2592, 0.1533]) tensor([0.4397, 0.1532, 0.1621, 0.2449]) -Greedy action tensor([ 0.7668, -0.5270, -0.2756, -0.0615]) tensor([0.4846, 0.1329, 0.1709, 0.2117]) -Greedy action tensor([ 1.1417, -0.7833, -0.2866, 0.0599]) tensor([0.5799, 0.0846, 0.1390, 0.1966]) -Greedy action tensor([ 1.0279, -0.3627, -0.5261, -0.1582]) tensor([0.5663, 0.1410, 0.1197, 0.1730]) -Greedy action tensor([ 0.7541, -0.6624, -0.1446, -0.0704]) tensor([0.4789, 0.1162, 0.1950, 0.2100]) -Greedy action tensor([ 0.4760, -0.4196, 0.2115, -0.1502]) tensor([0.3689, 0.1506, 0.2832, 0.1972]) -Greedy action tensor([ 0.8410, -0.4982, 0.1011, -0.2833]) tensor([0.4845, 0.1270, 0.2312, 0.1574]) -Greedy action tensor([ 1.3156, -0.4673, -0.2629, -0.1522]) tensor([0.6231, 0.1048, 0.1285, 0.1436]) -Greedy action tensor([ 0.8362, -0.1996, -0.5135, 0.3014]) tensor([0.4545, 0.1613, 0.1179, 0.2663]) -Greedy action tensor([ 0.9187, -0.4349, -0.1529, -0.1974]) tensor([0.5186, 0.1340, 0.1776, 0.1699]) -Greedy action tensor([ 0.9057, -0.7003, -0.2395, 0.0441]) tensor([0.5151, 0.1034, 0.1639, 0.2176]) -Greedy action tensor([ 0.3158, 0.1078, 0.1653, -0.2444]) tensor([0.3083, 0.2504, 0.2652, 0.1761]) -Greedy action tensor([ 0.4677, 0.0846, 0.0713, -0.1960]) tensor([0.3485, 0.2376, 0.2344, 0.1795]) -Greedy action tensor([ 0.7746, -0.2377, -0.0733, -0.5709]) tensor([0.4873, 0.1771, 0.2087, 0.1269]) -Greedy action tensor([ 0.1931, 0.0517, 0.1336, -0.1629]) tensor([0.2848, 0.2473, 0.2684, 0.1995]) -Greedy action tensor([ 0.4092, 0.1027, 0.1155, -0.1989]) tensor([0.3305, 0.2432, 0.2464, 0.1799]) -Greedy action tensor([ 0.8125, -0.3641, -0.0127, -0.4568]) tensor([0.4932, 0.1521, 0.2161, 0.1386]) -Greedy action tensor([ 0.3571, -0.0209, 0.2674, -0.1298]) tensor([0.3111, 0.2132, 0.2845, 0.1912]) -Greedy action tensor([ 0.6487, -0.0732, -0.0443, -0.5139]) tensor([0.4351, 0.2114, 0.2176, 0.1360]) -Greedy action tensor([ 0.4661, 0.0827, 0.0439, -0.1606]) tensor([0.3482, 0.2374, 0.2283, 0.1861]) -Greedy action tensor([ 0.3605, 0.0750, 0.1208, -0.3688]) tensor([0.3310, 0.2488, 0.2605, 0.1596]) -Greedy action tensor([ 0.1543, 0.2127, 0.0870, -0.2790]) tensor([0.2745, 0.2910, 0.2566, 0.1780]) -Greedy action tensor([ 0.4599, 0.1153, -0.0636, -0.2615]) tensor([0.3588, 0.2542, 0.2126, 0.1744]) -Greedy action tensor([ 0.3032, -0.2115, -0.1352, -0.4291]) tensor([0.3672, 0.2195, 0.2368, 0.1765]) -Greedy action tensor([ 0.2456, -0.1739, -0.0288, -0.1929]) tensor([0.3265, 0.2147, 0.2482, 0.2106]) -Greedy action tensor([ 0.2764, -0.0077, 0.1633, -0.2587]) tensor([0.3095, 0.2329, 0.2764, 0.1812]) -Greedy action tensor([ 0.2083, 0.2183, -0.1080, -0.3032]) tensor([0.2995, 0.3026, 0.2183, 0.1796]) -Greedy action tensor([ 0.2638, -0.1663, -0.1356, -0.3405]) tensor([0.3487, 0.2268, 0.2339, 0.1906]) -Greedy action tensor([ 0.2602, 0.0249, -0.0897, -0.2677]) tensor([0.3242, 0.2562, 0.2285, 0.1912]) -Greedy action tensor([ 0.3645, -0.0509, -0.0427, -0.3915]) tensor([0.3578, 0.2361, 0.2381, 0.1680]) -Greedy action tensor([ 0.2555, -0.1103, 0.0807, -0.3613]) tensor([0.3254, 0.2257, 0.2732, 0.1756]) -Greedy action tensor([ 1.0615, -0.7620, -0.0868, -0.6828]) tensor([0.6048, 0.0977, 0.1918, 0.1057]) -Greedy action tensor([ 0.2367, 0.1780, 0.1856, -0.1506]) tensor([0.2800, 0.2640, 0.2660, 0.1900]) -Greedy action tensor([ 0.6512, -0.0094, 0.0091, -0.3645]) tensor([0.4158, 0.2148, 0.2188, 0.1506]) -Greedy action tensor([ 0.1404, 0.0547, 0.1905, -0.1012]) tensor([0.2663, 0.2445, 0.2800, 0.2092]) -Greedy action tensor([ 0.4780, 0.0739, -0.1635, -0.2743]) tensor([0.3752, 0.2505, 0.1975, 0.1768]) -Greedy action tensor([ 0.5135, -0.0730, 0.0168, -0.2150]) tensor([0.3777, 0.2101, 0.2298, 0.1823]) -Greedy action tensor([ 0.5099, -0.2863, 0.1056, -0.4713]) tensor([0.4011, 0.1809, 0.2677, 0.1503]) -Greedy action tensor([ 0.2662, 0.1420, 0.0417, -0.1176]) tensor([0.2973, 0.2626, 0.2375, 0.2026]) -Greedy action tensor([ 0.5426, -0.1791, 0.0610, -0.6230]) tensor([0.4140, 0.2012, 0.2558, 0.1291]) -Greedy action tensor([ 0.6198, -0.1007, -0.0944, -0.4737]) tensor([0.4327, 0.2105, 0.2118, 0.1450]) -Greedy action tensor([ 0.2921, -0.0521, 0.1333, -0.3003]) tensor([0.3210, 0.2276, 0.2739, 0.1775]) -Greedy action tensor([ 0.1431, 0.0426, -0.0152, -0.3905]) tensor([0.2990, 0.2704, 0.2552, 0.1754]) -Greedy action tensor([ 0.4545, -0.0060, 0.1186, -0.2565]) tensor([0.3525, 0.2224, 0.2519, 0.1731]) -Greedy action tensor([ 0.6627, -0.1359, 0.0284, -0.5128]) tensor([0.4369, 0.1966, 0.2317, 0.1348]) -Greedy action tensor([ 0.6236, -0.0505, -0.0279, -0.3568]) tensor([0.4156, 0.2118, 0.2167, 0.1559]) -Greedy action tensor([ 0.6564, -0.2710, 0.0983, -0.6021]) tensor([0.4441, 0.1757, 0.2541, 0.1262]) -Greedy action tensor([ 0.1322, 0.0738, 0.1519, -0.1580]) tensor([0.2695, 0.2542, 0.2748, 0.2016]) -Greedy action tensor([ 0.4973, 0.1086, -0.0470, -0.3066]) tensor([0.3696, 0.2505, 0.2144, 0.1654]) -Greedy action tensor([ 0.3823, 0.2142, -0.1269, -0.3530]) tensor([0.3418, 0.2889, 0.2054, 0.1639]) -Greedy action tensor([ 0.6288, -0.4149, 0.1599, -0.7979]) tensor([0.4509, 0.1588, 0.2821, 0.1082]) -Greedy action tensor([ 1.1167, -0.7027, -0.1634, -0.6900]) tensor([0.6233, 0.1010, 0.1733, 0.1023]) -Greedy action tensor([ 0.4567, -0.1955, -0.0609, -0.4146]) tensor([0.3944, 0.2055, 0.2351, 0.1650]) -Greedy action tensor([ 0.1961, -0.1082, -0.1541, -0.3088]) tensor([0.3283, 0.2422, 0.2313, 0.1982]) -Greedy action tensor([ 0.1704, 0.1735, 0.1383, -0.2166]) tensor([0.2739, 0.2748, 0.2653, 0.1860]) -Greedy action tensor([ 0.3701, 0.1818, 0.1527, -0.1624]) tensor([0.3106, 0.2573, 0.2499, 0.1823]) -Greedy action tensor([ 0.6034, -0.2337, 0.0808, -0.4504]) tensor([0.4211, 0.1823, 0.2497, 0.1468]) -Greedy action tensor([ 0.3106, 0.0767, 0.1837, -0.2342]) tensor([0.3075, 0.2433, 0.2708, 0.1783]) -Greedy action tensor([ 0.4379, -0.1506, 0.0365, -0.3775]) tensor([0.3750, 0.2082, 0.2510, 0.1659]) -Greedy action tensor([ 0.3826, 0.1304, 0.1461, -0.1400]) tensor([0.3165, 0.2460, 0.2499, 0.1877]) -Greedy action tensor([ 0.3093, 0.1561, 0.1744, -0.2609]) tensor([0.3033, 0.2602, 0.2650, 0.1715]) -Greedy action tensor([ 0.3095, -0.0109, -0.0969, -0.3757]) tensor([0.3453, 0.2507, 0.2300, 0.1740]) -Greedy action tensor([ 0.3645, 0.1025, 0.1537, -0.2571]) tensor([0.3209, 0.2469, 0.2599, 0.1723]) -Greedy action tensor([ 0.2144, 0.0934, -0.0341, -0.3407]) tensor([0.3086, 0.2735, 0.2407, 0.1772]) -Greedy action tensor([ 0.1625, 0.0084, 0.0704, -0.2097]) tensor([0.2892, 0.2479, 0.2637, 0.1993]) -Greedy action tensor([ 0.1835, 0.0975, 0.2186, -0.1414]) tensor([0.2720, 0.2496, 0.2818, 0.1966]) -Greedy action tensor([ 0.7675, -0.3071, -0.0360, -0.6718]) tensor([0.4935, 0.1685, 0.2210, 0.1170]) -Greedy action tensor([ 0.2710, 0.1448, 0.0428, -0.2329]) tensor([0.3047, 0.2686, 0.2425, 0.1841]) -Greedy action tensor([ 0.9337, -0.5481, -0.1477, -0.7007]) tensor([0.5677, 0.1290, 0.1925, 0.1107]) -Greedy action tensor([ 0.7433, -0.3413, 0.0193, -0.6552]) tensor([0.4831, 0.1633, 0.2342, 0.1193]) -Greedy action tensor([ 0.4645, 0.0690, 0.0411, -0.2971]) tensor([0.3578, 0.2409, 0.2343, 0.1670]) -Greedy action tensor([ 0.8667, -0.3000, -0.1143, -0.6690]) tensor([0.5259, 0.1638, 0.1972, 0.1132]) -Greedy action tensor([ 0.4047, 0.2352, -0.0690, -0.2417]) tensor([0.3344, 0.2822, 0.2082, 0.1752]) -Greedy action tensor([ 0.4526, -0.3251, -0.1425, -0.3910]) tensor([0.4096, 0.1882, 0.2259, 0.1762]) -Greedy action tensor([ 0.2975, 0.1953, -0.0445, -0.3849]) tensor([0.3207, 0.2895, 0.2278, 0.1621]) -Greedy action tensor([ 1.0196, -0.5645, 0.0300, -0.6307]) tensor([0.5653, 0.1160, 0.2101, 0.1085]) -Greedy action tensor([ 0.3851, 0.0742, -0.0308, -0.1286]) tensor([0.3343, 0.2450, 0.2206, 0.2000]) -Greedy action tensor([ 0.2452, 0.0338, 0.1854, -0.2264]) tensor([0.2962, 0.2398, 0.2791, 0.1849]) -Greedy action tensor([ 0.6575, -0.1929, -0.0775, -0.4048]) tensor([0.4440, 0.1897, 0.2129, 0.1535]) -Greedy action tensor([ 0.4023, -0.0559, 0.1566, -0.1894]) tensor([0.3369, 0.2131, 0.2635, 0.1865]) -Greedy action tensor([ 0.2716, 0.1940, 0.1608, -0.2596]) tensor([0.2934, 0.2715, 0.2626, 0.1725]) -Greedy action tensor([ 0.2311, 0.2336, 0.0622, -0.2932]) tensor([0.2908, 0.2915, 0.2456, 0.1721]) -Greedy action tensor([ 0.3442, 0.1858, 0.1692, -0.2084]) tensor([0.3060, 0.2611, 0.2569, 0.1761]) -Greedy action tensor([ 1.0435, -0.6020, -0.2320, -0.7229]) tensor([0.6086, 0.1174, 0.1700, 0.1040]) -Greedy action tensor([ 0.3028, 0.0877, 0.0253, -0.3414]) tensor([0.3237, 0.2611, 0.2453, 0.1700]) -Greedy action tensor([ 0.3173, 0.1378, 0.1345, -0.2550]) tensor([0.3093, 0.2585, 0.2576, 0.1745]) -Greedy action tensor([ 0.4985, -0.1389, 0.0058, -0.5148]) tensor([0.3996, 0.2112, 0.2441, 0.1451]) -Greedy action tensor([ 1.1973, -0.8518, -0.0259, -0.7244]) tensor([0.6372, 0.0821, 0.1875, 0.0932]) -Greedy action tensor([ 0.3129, -0.0906, -0.1647, -0.3867]) tensor([0.3591, 0.2398, 0.2227, 0.1784]) -Greedy action tensor([ 0.3775, 0.0392, -0.0644, -0.1432]) tensor([0.3390, 0.2417, 0.2179, 0.2014]) -Greedy action tensor([ 0.2535, 0.1268, 0.1686, -0.2022]) tensor([0.2912, 0.2566, 0.2675, 0.1846]) -Greedy action tensor([-0.0428, -0.0191, -0.0090, -0.1749]) tensor([0.2542, 0.2602, 0.2629, 0.2227]) -Greedy action tensor([0.5893, 0.4868, 0.2504, 0.3654]) tensor([0.2929, 0.2643, 0.2087, 0.2341]) -Greedy action tensor([ 1.0672, 0.3179, -0.2665, 0.5164]) tensor([0.4324, 0.2044, 0.1139, 0.2493]) -Greedy action tensor([1.9058, 0.0683, 0.3313, 0.6118]) tensor([0.6096, 0.0970, 0.1262, 0.1671]) -Greedy action tensor([1.2761, 0.0641, 0.3945, 0.8638]) tensor([0.4213, 0.1254, 0.1744, 0.2789]) -Greedy action tensor([ 1.0074, -0.3999, -0.4207, 1.4068]) tensor([0.3361, 0.0823, 0.0806, 0.5011]) -Greedy action tensor([ 1.9350, -0.0959, 0.4514, 1.2983]) tensor([0.5299, 0.0695, 0.1202, 0.2803]) -Greedy action tensor([ 1.0161, -0.8434, 0.0436, 1.2047]) tensor([0.3648, 0.0568, 0.1379, 0.4405]) -Greedy action tensor([ 2.1490, -0.2677, -0.2244, 0.8761]) tensor([0.6838, 0.0610, 0.0637, 0.1915]) -Greedy action tensor([ 0.0886, 0.5848, 0.7275, -0.5687]) tensor([0.1978, 0.3249, 0.3747, 0.1025]) -Greedy action tensor([ 2.2791, -0.6713, -0.1378, 0.9580]) tensor([0.7100, 0.0372, 0.0633, 0.1895]) -Greedy action tensor([1.2433, 0.1543, 0.5407, 0.3701]) tensor([0.4446, 0.1496, 0.2202, 0.1856]) -Greedy action tensor([ 1.2908, 0.4893, -0.2061, 1.5239]) tensor([0.3407, 0.1529, 0.0763, 0.4301]) -Greedy action tensor([ 1.5211, 0.1518, -0.3131, 1.1902]) tensor([0.4690, 0.1193, 0.0749, 0.3369]) -Greedy action tensor([ 2.2588, -0.1841, -0.3942, 0.9033]) tensor([0.7066, 0.0614, 0.0498, 0.1822]) -Greedy action tensor([ 1.4123, -1.0273, -0.2499, 0.9469]) tensor([0.5250, 0.0458, 0.0996, 0.3296]) -Greedy action tensor([ 0.5582, -0.8139, 0.0284, 0.3994]) tensor([0.3710, 0.0941, 0.2184, 0.3165]) -Greedy action tensor([ 0.6590, -0.5617, -0.1216, 0.6914]) tensor([0.3589, 0.1059, 0.1644, 0.3708]) -Greedy action tensor([ 1.3515, -1.1333, 0.9478, 0.9260]) tensor([0.4159, 0.0347, 0.2777, 0.2717]) -Greedy action tensor([1.7024, 0.1881, 1.1463, 0.5893]) tensor([0.4713, 0.1037, 0.2702, 0.1548]) -Greedy action tensor([ 1.3874, -1.5012, 0.2379, 0.5348]) tensor([0.5559, 0.0309, 0.1761, 0.2370]) -Greedy action tensor([ 1.1481, 0.5624, -0.3340, 0.4120]) tensor([0.4419, 0.2460, 0.1004, 0.2117]) -Greedy action tensor([1.5047, 0.1037, 0.5733, 0.7852]) tensor([0.4701, 0.1158, 0.1852, 0.2289]) -Greedy action tensor([ 1.7506, -1.0877, -0.7236, 1.0457]) tensor([0.6109, 0.0358, 0.0515, 0.3019]) -Greedy action tensor([ 1.9226, -1.2568, -1.0941, 1.0847]) tensor([0.6565, 0.0273, 0.0321, 0.2840]) -Greedy action tensor([ 1.3985, -0.3346, 0.6279, 0.7920]) tensor([0.4577, 0.0809, 0.2118, 0.2496]) -Greedy action tensor([ 2.0607, -0.1667, 0.4188, 1.3408]) tensor([0.5592, 0.0603, 0.1083, 0.2722]) -Greedy action tensor([1.5508, 0.4308, 0.2717, 0.7454]) tensor([0.4875, 0.1591, 0.1357, 0.2178]) -Greedy action tensor([ 1.0624, -0.3939, 0.6133, 0.4319]) tensor([0.4160, 0.0970, 0.2655, 0.2215]) -Greedy action tensor([ 1.9859e+00, -6.0064e-01, 1.1910e-03, 9.5029e-01]) tensor([0.6379, 0.0480, 0.0877, 0.2265]) -Greedy action tensor([ 0.4700, -0.5351, 0.6148, 0.5366]) tensor([0.2785, 0.1019, 0.3219, 0.2977]) -Greedy action tensor([ 1.8030, 0.4229, -0.7590, 1.6440]) tensor([0.4584, 0.1153, 0.0354, 0.3910]) -Greedy action tensor([ 0.8492, -1.5027, -0.1960, 0.5063]) tensor([0.4637, 0.0441, 0.1631, 0.3291]) -Greedy action tensor([ 0.9821, -0.0738, -0.5188, 1.8105]) tensor([0.2590, 0.0901, 0.0577, 0.5931]) -Greedy action tensor([ 0.4861, -0.4404, 0.2987, 0.4512]) tensor([0.3134, 0.1241, 0.2599, 0.3027]) -Greedy action tensor([1.9119, 0.1116, 0.4180, 0.9541]) tensor([0.5639, 0.0932, 0.1266, 0.2164]) -Greedy action tensor([ 1.5631, -0.4898, 0.1436, 0.9830]) tensor([0.5181, 0.0665, 0.1253, 0.2901]) -Greedy action tensor([ 1.4265, -0.2174, 0.0574, 1.1792]) tensor([0.4487, 0.0867, 0.1141, 0.3504]) -Greedy action tensor([ 1.2493, -0.0605, 0.4373, 1.8427]) tensor([0.2838, 0.0766, 0.1260, 0.5137]) -Greedy action tensor([ 1.5213, 0.3701, -0.6312, 0.2962]) tensor([0.5793, 0.1832, 0.0673, 0.1702]) -Greedy action tensor([ 1.7371, -0.8074, -0.2390, 1.0407]) tensor([0.5829, 0.0458, 0.0808, 0.2905]) -Greedy action tensor([ 1.4365, -0.3804, -0.6899, 1.5829]) tensor([0.4099, 0.0666, 0.0489, 0.4745]) -Greedy action tensor([ 1.4996, -0.1383, 0.5143, 0.9572]) tensor([0.4653, 0.0905, 0.1737, 0.2705]) -Greedy action tensor([ 1.5572, -0.4328, 0.5002, 0.9373]) tensor([0.4945, 0.0676, 0.1718, 0.2661]) -Greedy action tensor([ 0.9425, -1.2620, 0.8489, 0.8674]) tensor([0.3391, 0.0374, 0.3088, 0.3146]) -Greedy action tensor([ 2.2034, -0.0376, -0.5087, 0.9332]) tensor([0.6880, 0.0732, 0.0457, 0.1932]) -Greedy action tensor([ 1.6977, -0.9559, -0.2489, 1.4077]) tensor([0.5098, 0.0359, 0.0728, 0.3815]) -Greedy action tensor([ 1.8337, -0.2617, 0.0360, 1.3958]) tensor([0.5170, 0.0636, 0.0857, 0.3337]) -Greedy action tensor([0.2739, 0.3318, 0.1284, 0.3090]) tensor([0.2525, 0.2676, 0.2183, 0.2615]) -Greedy action tensor([ 1.5171, -0.0639, 0.8435, 0.6820]) tensor([0.4652, 0.0957, 0.2372, 0.2018]) -Greedy action tensor([ 0.5003, -0.1083, 0.1238, 0.1071]) tensor([0.3442, 0.1873, 0.2362, 0.2323]) -Greedy action tensor([1.4799, 0.3844, 0.2649, 0.5182]) tensor([0.4967, 0.1661, 0.1474, 0.1898]) -Greedy action tensor([ 1.0272, 0.2409, -0.5952, 0.3550]) tensor([0.4622, 0.2105, 0.0913, 0.2360]) -Greedy action tensor([ 0.8912, 0.0231, -1.1757, 1.1915]) tensor([0.3452, 0.1449, 0.0437, 0.4662]) -Greedy action tensor([ 1.1966, -0.0478, 0.3409, 1.4175]) tensor([0.3378, 0.0973, 0.1436, 0.4213]) -Greedy action tensor([ 2.0279, -1.1214, -0.4357, 0.9781]) tensor([0.6766, 0.0290, 0.0576, 0.2368]) -Greedy action tensor([ 1.0054, 0.2112, -0.1625, -0.4332]) tensor([0.4999, 0.2259, 0.1555, 0.1186]) -Greedy action tensor([ 1.3573, -0.2413, 0.0185, 1.1120]) tensor([0.4451, 0.0900, 0.1167, 0.3482]) -Greedy action tensor([ 1.7223, -0.3919, 0.4195, 0.9343]) tensor([0.5413, 0.0654, 0.1471, 0.2462]) -Greedy action tensor([ 1.6496, 0.1007, -0.7640, 0.5663]) tensor([0.6096, 0.1295, 0.0546, 0.2063]) -Greedy action tensor([ 1.0432, -0.3351, 0.5346, 0.9139]) tensor([0.3660, 0.0922, 0.2201, 0.3216]) -Greedy action tensor([1.6316, 0.0942, 1.1835, 0.7032]) tensor([0.4447, 0.0956, 0.2841, 0.1757]) -Greedy action tensor([ 1.7170, -0.6193, -0.1158, 1.0813]) tensor([0.5598, 0.0541, 0.0896, 0.2965]) -Greedy action tensor([ 1.3568, -1.0290, 0.1642, 0.6618]) tensor([0.5278, 0.0486, 0.1602, 0.2634]) -Greedy action tensor([ 0.4531, 0.5049, -0.5039, 0.5582]) tensor([0.2818, 0.2968, 0.1082, 0.3131]) -Greedy action tensor([ 1.8734, -0.4003, -0.2193, 1.1677]) tensor([0.5814, 0.0598, 0.0717, 0.2871]) -Greedy action tensor([ 1.0259, -0.4683, 0.7181, 0.4762]) tensor([0.3942, 0.0885, 0.2898, 0.2275]) -Greedy action tensor([ 1.4612, -0.0474, 0.4619, 0.7554]) tensor([0.4801, 0.1062, 0.1767, 0.2370]) -Greedy action tensor([ 0.9886, -0.9334, -0.1817, 0.8207]) tensor([0.4344, 0.0636, 0.1348, 0.3673]) -Greedy action tensor([ 1.4438, -0.9613, -0.1030, 1.0386]) tensor([0.5076, 0.0458, 0.1081, 0.3385]) -Greedy action tensor([ 1.3638, 0.1014, -0.0929, 1.4636]) tensor([0.3816, 0.1080, 0.0889, 0.4216]) -Greedy action tensor([ 0.6116, 0.6616, -1.3883, 1.0040]) tensor([0.2727, 0.2867, 0.0369, 0.4037]) -Greedy action tensor([1.3113, 0.3634, 0.3501, 0.9797]) tensor([0.4020, 0.1558, 0.1537, 0.2885]) -Greedy action tensor([ 1.6902, -0.3028, 0.8665, 0.9862]) tensor([0.4832, 0.0658, 0.2120, 0.2390]) -Greedy action tensor([ 1.4025, -0.8084, 0.1345, 1.1571]) tensor([0.4601, 0.0504, 0.1295, 0.3600]) -Greedy action tensor([ 1.7357, 0.1373, -0.7328, 0.9629]) tensor([0.5719, 0.1156, 0.0484, 0.2640]) -Greedy action tensor([ 1.3090, -1.0555, 0.5911, 0.6783]) tensor([0.4731, 0.0445, 0.2307, 0.2518]) -Greedy action tensor([ 0.1193, -0.4300, 0.1233, 0.9674]) tensor([0.2034, 0.1174, 0.2042, 0.4750]) -Greedy action tensor([ 1.0604, 0.5688, -0.9166, 0.6221]) tensor([0.4175, 0.2554, 0.0578, 0.2693]) -Greedy action tensor([ 1.4158, 0.0719, -1.0370, 1.7108]) tensor([0.3717, 0.0970, 0.0320, 0.4993]) -Greedy action tensor([ 0.8925, -0.1842, 0.0406, 1.7277]) tensor([0.2455, 0.0837, 0.1048, 0.5660]) -Greedy action tensor([ 1.0299, -0.2716, -0.4303, 1.4960]) tensor([0.3228, 0.0878, 0.0749, 0.5144]) -Greedy action tensor([ 1.1246, -0.6473, 0.6184, 1.5647]) tensor([0.3007, 0.0511, 0.1813, 0.4669]) -Greedy action tensor([-0.9203, -0.4793, 0.2656, 0.4862]) tensor([0.1009, 0.1568, 0.3304, 0.4119]) -Greedy action tensor([-1.3373, -0.6600, -0.1296, -0.2110]) tensor([0.1064, 0.2095, 0.3560, 0.3282]) -Greedy action tensor([-0.6201, -0.1646, 0.2734, -0.0755]) tensor([0.1483, 0.2338, 0.3623, 0.2556]) -Greedy action tensor([-2.0379, -0.9245, 0.4922, -0.1554]) tensor([0.0432, 0.1314, 0.5419, 0.2835]) -Greedy action tensor([-1.3709, -0.8285, 0.5650, -0.3608]) tensor([0.0807, 0.1388, 0.5591, 0.2215]) -Greedy action tensor([-1.4809, -0.5834, 0.5446, -0.3295]) tensor([0.0704, 0.1728, 0.5339, 0.2228]) -Greedy action tensor([-0.7414, -0.5984, 0.2549, 0.3281]) tensor([0.1286, 0.1484, 0.3483, 0.3747]) -Greedy action tensor([-0.8248, 0.4620, 0.0968, -0.2019]) tensor([0.1111, 0.4024, 0.2793, 0.2072]) -Greedy action tensor([-1.9061, -0.6963, 0.2091, -0.3047]) tensor([0.0568, 0.1905, 0.4710, 0.2818]) -Greedy action tensor([-1.1163, -0.6891, 0.4943, 0.2113]) tensor([0.0884, 0.1355, 0.4426, 0.3335]) -Greedy action tensor([-0.4269, -0.4656, 0.1906, 0.2362]) tensor([0.1737, 0.1671, 0.3221, 0.3371]) -Greedy action tensor([-1.8745, -0.9081, 0.2217, -0.3226]) tensor([0.0607, 0.1595, 0.4935, 0.2864]) -Greedy action tensor([-1.7877, -0.7646, 0.1613, -0.3857]) tensor([0.0673, 0.1871, 0.4723, 0.2733]) -Greedy action tensor([-1.7537, -0.7434, 0.1109, -0.4136]) tensor([0.0713, 0.1959, 0.4603, 0.2725]) -Greedy action tensor([-1.7154, -0.7801, 0.0862, -0.3441]) tensor([0.0738, 0.1881, 0.4473, 0.2909]) -Greedy action tensor([-2.0041, -0.9165, 0.3435, -0.2360]) tensor([0.0493, 0.1463, 0.5156, 0.2888]) -Greedy action tensor([-2.0056, -0.9710, 0.4737, -0.2569]) tensor([0.0465, 0.1309, 0.5552, 0.2674]) -Greedy action tensor([-1.5509, -0.6739, 0.0441, -0.2531]) tensor([0.0834, 0.2004, 0.4109, 0.3053]) -Greedy action tensor([-1.8448, -0.6677, 0.4407, -0.2358]) tensor([0.0524, 0.1701, 0.5154, 0.2620]) -Greedy action tensor([-0.9966, -0.6041, 0.4342, -0.0949]) tensor([0.1096, 0.1622, 0.4582, 0.2700]) -Greedy action tensor([-1.3261, -0.5550, 0.3645, 0.1073]) tensor([0.0783, 0.1692, 0.4244, 0.3281]) -Greedy action tensor([-0.7944, -0.5896, 0.1760, 0.2747]) tensor([0.1285, 0.1578, 0.3392, 0.3744]) -Greedy action tensor([-2.0397, -0.7540, 0.7716, 0.1029]) tensor([0.0336, 0.1215, 0.5587, 0.2862]) -Greedy action tensor([-0.9930, -0.5555, 0.3029, 0.0486]) tensor([0.1107, 0.1714, 0.4044, 0.3136]) -Greedy action tensor([-2.0365, -0.7989, 0.5617, -0.0317]) tensor([0.0395, 0.1362, 0.5310, 0.2933]) -Greedy action tensor([-1.6714, -0.5042, 0.5054, 0.0153]) tensor([0.0542, 0.1743, 0.4784, 0.2930]) -Greedy action tensor([-1.8590, -0.9933, 0.0861, -0.5009]) tensor([0.0701, 0.1667, 0.4905, 0.2727]) -Greedy action tensor([-0.4475, -0.4998, 0.1878, 0.1732]) tensor([0.1755, 0.1666, 0.3313, 0.3265]) -Greedy action tensor([-1.9604, -0.8134, 0.2546, -0.2378]) tensor([0.0529, 0.1665, 0.4845, 0.2961]) -Greedy action tensor([-2.0335, -0.9548, 0.4897, -0.1510]) tensor([0.0435, 0.1280, 0.5426, 0.2859]) -Greedy action tensor([-0.7458, -0.6155, 0.1839, 0.2095]) tensor([0.1375, 0.1567, 0.3484, 0.3575]) -Greedy action tensor([-0.3486, -0.5009, 0.2786, 0.3117]) tensor([0.1765, 0.1515, 0.3304, 0.3416]) -Greedy action tensor([-1.3797, -0.6003, -0.1844, -0.4287]) tensor([0.1102, 0.2403, 0.3642, 0.2853]) -Greedy action tensor([-1.4386, -0.5370, 0.1543, -0.3098]) tensor([0.0872, 0.2147, 0.4286, 0.2695]) -Greedy action tensor([-2.0367, -0.8388, 0.5172, -0.1196]) tensor([0.0417, 0.1382, 0.5363, 0.2837]) -Greedy action tensor([-1.9301, -0.9559, 0.1768, -0.3775]) tensor([0.0603, 0.1596, 0.4955, 0.2846]) -Greedy action tensor([-1.7837, -0.7459, 0.1784, -0.3864]) tensor([0.0667, 0.1884, 0.4749, 0.2699]) -Greedy action tensor([-1.4327, -0.5012, 0.6333, -0.3943]) tensor([0.0701, 0.1781, 0.5537, 0.1981]) -Greedy action tensor([-2.0312, -0.8372, 0.4974, -0.1397]) tensor([0.0426, 0.1406, 0.5342, 0.2825]) -Greedy action tensor([-1.6798, -1.0100, 0.0188, -0.8001]) tensor([0.0923, 0.1804, 0.5047, 0.2226]) -Greedy action tensor([-1.0287, -0.5592, 0.3810, -0.0781]) tensor([0.1077, 0.1723, 0.4412, 0.2788]) -Greedy action tensor([-1.9047, -0.6261, 0.8780, 0.1827]) tensor([0.0347, 0.1246, 0.5609, 0.2798]) -Greedy action tensor([-1.2117, -0.5451, 0.3301, 0.2266]) tensor([0.0845, 0.1646, 0.3949, 0.3560]) -Greedy action tensor([-1.5886, -0.5905, 0.5305, -0.0731]) tensor([0.0603, 0.1636, 0.5018, 0.2744]) -Greedy action tensor([-2.0056, -0.9568, 0.5023, -0.2557]) tensor([0.0457, 0.1304, 0.5610, 0.2629]) -Greedy action tensor([-0.4888, -0.5189, 0.1878, 0.2158]) tensor([0.1678, 0.1628, 0.3300, 0.3394]) -Greedy action tensor([-1.6203, -0.5813, 0.5373, -0.0703]) tensor([0.0582, 0.1644, 0.5033, 0.2741]) -Greedy action tensor([-1.0268, -0.6912, 0.5415, 0.2741]) tensor([0.0920, 0.1287, 0.4415, 0.3379]) -Greedy action tensor([-1.3222, -0.1470, -0.0107, -0.3514]) tensor([0.0944, 0.3058, 0.3505, 0.2493]) -Greedy action tensor([-2.0051, -0.8917, 0.2865, -0.2328]) tensor([0.0505, 0.1536, 0.4990, 0.2969]) -Greedy action tensor([-1.7319, -0.7712, 0.1510, -0.4322]) tensor([0.0722, 0.1886, 0.4744, 0.2648]) -Greedy action tensor([-1.6316, -0.5140, 0.5627, 0.1708]) tensor([0.0524, 0.1601, 0.4699, 0.3176]) -Greedy action tensor([-1.7711e+00, -4.9059e-01, 6.1439e-01, -1.1611e-03]) tensor([0.0469, 0.1687, 0.5093, 0.2752]) -Greedy action tensor([-0.6798, -0.5678, 0.1595, 0.2910]) tensor([0.1414, 0.1581, 0.3272, 0.3733]) -Greedy action tensor([-1.9532, -0.6797, 0.4716, -0.1687]) tensor([0.0458, 0.1637, 0.5176, 0.2729]) -Greedy action tensor([-1.6812, -0.9260, 0.0558, -0.6510]) tensor([0.0861, 0.1833, 0.4893, 0.2413]) -Greedy action tensor([-1.6515, -0.5445, 0.5953, -0.2260]) tensor([0.0567, 0.1715, 0.5361, 0.2358]) -Greedy action tensor([-0.6675, -0.5140, 0.2898, -0.0898]) tensor([0.1526, 0.1779, 0.3975, 0.2720]) -Greedy action tensor([-1.4988, -0.6847, 0.1178, -0.4016]) tensor([0.0886, 0.1999, 0.4461, 0.2654]) -Greedy action tensor([-1.6764, -0.6151, 0.4698, -0.2941]) tensor([0.0609, 0.1759, 0.5206, 0.2425]) -Greedy action tensor([-1.7202, -0.8216, 0.0936, -0.4360]) tensor([0.0758, 0.1861, 0.4646, 0.2736]) -Greedy action tensor([-1.9748, -0.9040, 0.3184, -0.2252]) tensor([0.0511, 0.1490, 0.5060, 0.2938]) -Greedy action tensor([-1.9924, -0.7987, 0.3704, -0.1382]) tensor([0.0469, 0.1548, 0.4985, 0.2997]) -Greedy action tensor([-1.6379, -0.5088, 0.5274, 0.0909]) tensor([0.0542, 0.1677, 0.4726, 0.3055]) -Greedy action tensor([-1.9309, -0.9154, 0.5251, -0.2243]) tensor([0.0478, 0.1319, 0.5570, 0.2633]) -Greedy action tensor([-1.0074, -0.5720, 0.4191, -0.3065]) tensor([0.1146, 0.1771, 0.4772, 0.2310]) -Greedy action tensor([-0.7884, -0.5246, 0.2988, -0.0018]) tensor([0.1340, 0.1744, 0.3974, 0.2942]) -Greedy action tensor([-1.3605, -0.6620, 0.4453, 0.1021]) tensor([0.0746, 0.1499, 0.4537, 0.3219]) -Greedy action tensor([-1.0007, -0.5690, 0.2769, 0.2742]) tensor([0.1030, 0.1587, 0.3697, 0.3687]) -Greedy action tensor([-1.7026, -0.7548, 0.1422, -0.3013]) tensor([0.0716, 0.1847, 0.4530, 0.2907]) -Greedy action tensor([-0.7145, -0.6155, 0.1933, 0.1801]) tensor([0.1423, 0.1571, 0.3526, 0.3480]) -Greedy action tensor([-1.5604, -0.4791, 0.6749, 0.4885]) tensor([0.0475, 0.1400, 0.4440, 0.3685]) -Greedy action tensor([-1.9272, -0.5603, 0.4272, -0.1190]) tensor([0.0464, 0.1820, 0.4886, 0.2830]) -Greedy action tensor([-1.9161, -0.9564, 0.2991, -0.3292]) tensor([0.0566, 0.1478, 0.5188, 0.2768]) -Greedy action tensor([-2.0487, -0.8488, 0.8203, 0.1161]) tensor([0.0326, 0.1083, 0.5748, 0.2842]) -Greedy action tensor([-1.4809, -0.4702, 0.3518, -0.2961]) tensor([0.0754, 0.2071, 0.4711, 0.2465]) -Greedy action tensor([-1.7524, -0.6050, 0.7729, 0.2405]) tensor([0.0417, 0.1313, 0.5210, 0.3059]) -Greedy action tensor([-0.9216, -0.3212, 0.4582, -0.1305]) tensor([0.1111, 0.2025, 0.4414, 0.2450]) -Greedy action tensor([-1.8310, -0.8927, 0.2258, -0.3558]) tensor([0.0635, 0.1623, 0.4966, 0.2776]) -Greedy action tensor([-1.9438, -0.6117, 0.4462, -0.0411]) tensor([0.0446, 0.1691, 0.4871, 0.2992]) -Greedy action tensor([-1.7775, -0.4844, 0.7615, 0.2308]) tensor([0.0404, 0.1472, 0.5116, 0.3009]) -Greedy action tensor([ 0.8865, -0.5608, -0.3186, 0.0782]) tensor([0.5049, 0.1188, 0.1513, 0.2250]) -Greedy action tensor([ 1.4542, -0.5287, -0.3762, 0.0232]) tensor([0.6506, 0.0896, 0.1043, 0.1555]) -Greedy action tensor([ 0.9969, -0.9472, -0.6368, 0.2599]) tensor([0.5504, 0.0788, 0.1074, 0.2634]) -Greedy action tensor([ 0.8535, -0.7026, -0.3460, 0.2726]) tensor([0.4827, 0.1018, 0.1455, 0.2700]) -Greedy action tensor([ 0.6382, -0.5992, -0.1329, -0.1556]) tensor([0.4535, 0.1316, 0.2098, 0.2051]) -Greedy action tensor([ 0.3429, -0.1949, -0.3705, 0.1902]) tensor([0.3410, 0.1992, 0.1671, 0.2927]) -Greedy action tensor([ 0.9229, -0.6312, -0.1377, -0.0681]) tensor([0.5184, 0.1096, 0.1795, 0.1925]) -Greedy action tensor([ 1.2685, -0.5408, -0.3491, -0.1331]) tensor([0.6217, 0.1018, 0.1233, 0.1531]) -Greedy action tensor([ 1.1876, -0.6566, -0.6897, 1.2488]) tensor([0.4212, 0.0666, 0.0644, 0.4478]) -Greedy action tensor([ 0.9080, -0.4336, -0.4642, 0.4490]) tensor([0.4658, 0.1218, 0.1181, 0.2944]) -Greedy action tensor([ 1.6291, -0.7793, -0.5545, 0.2609]) tensor([0.6863, 0.0617, 0.0773, 0.1747]) -Greedy action tensor([ 0.8937, -0.2069, -0.2388, 0.0912]) tensor([0.4755, 0.1582, 0.1532, 0.2131]) -Greedy action tensor([ 0.8138, -0.0655, -0.0673, -0.1348]) tensor([0.4511, 0.1873, 0.1869, 0.1747]) -Greedy action tensor([ 1.5116, -0.4985, -0.4398, 0.1373]) tensor([0.6540, 0.0876, 0.0929, 0.1655]) -Greedy action tensor([ 0.4789, -0.4357, 0.2038, -0.1734]) tensor([0.3730, 0.1495, 0.2833, 0.1943]) -Greedy action tensor([ 1.0388, -0.8013, -0.4292, 0.1852]) tensor([0.5509, 0.0875, 0.1269, 0.2346]) -Greedy action tensor([ 1.1420, -0.6911, -0.4429, 0.1291]) tensor([0.5787, 0.0925, 0.1186, 0.2102]) -Greedy action tensor([ 0.3510, -0.0574, 0.1001, -0.0890]) tensor([0.3239, 0.2153, 0.2521, 0.2086]) -Greedy action tensor([ 0.7985, -0.7561, -0.1307, 0.1225]) tensor([0.4729, 0.0999, 0.1867, 0.2405]) -Greedy action tensor([ 1.1175, -0.4468, -0.3091, -0.0884]) tensor([0.5718, 0.1196, 0.1373, 0.1712]) -Greedy action tensor([ 1.3002, -0.4873, -0.3082, -0.0659]) tensor([0.6163, 0.1031, 0.1234, 0.1572]) -Greedy action tensor([ 1.3006, -0.5955, -0.3642, 0.0026]) tensor([0.6202, 0.0931, 0.1173, 0.1693]) -Greedy action tensor([ 0.9525, -0.4739, -0.3440, 0.2449]) tensor([0.4984, 0.1197, 0.1363, 0.2456]) -Greedy action tensor([ 1.1161, -0.4993, -0.1925, 0.2939]) tensor([0.5240, 0.1042, 0.1416, 0.2303]) -Greedy action tensor([ 0.6743, -0.6085, -0.2839, 0.2162]) tensor([0.4360, 0.1209, 0.1673, 0.2758]) -Greedy action tensor([ 0.5036, -0.2282, -0.5390, -0.3599]) tensor([0.4434, 0.2133, 0.1563, 0.1870]) -Greedy action tensor([ 1.2552, -0.6695, -0.2070, 0.0499]) tensor([0.5962, 0.0870, 0.1382, 0.1786]) -Greedy action tensor([ 0.8829, -0.7670, -0.3096, 0.2651]) tensor([0.4915, 0.0944, 0.1491, 0.2650]) -Greedy action tensor([ 0.6745, -0.4256, -0.0619, -0.1738]) tensor([0.4465, 0.1486, 0.2138, 0.1912]) -Greedy action tensor([ 1.2178, -0.5927, -0.4535, 0.2762]) tensor([0.5742, 0.0939, 0.1079, 0.2239]) -Greedy action tensor([ 1.0864, -0.5962, -0.1827, 0.2514]) tensor([0.5261, 0.0978, 0.1479, 0.2282]) -Greedy action tensor([ 1.1385, -0.2884, 0.0281, -0.2881]) tensor([0.5526, 0.1327, 0.1820, 0.1327]) -Greedy action tensor([ 1.3158, -0.5842, -0.4804, -0.1371]) tensor([0.6454, 0.0965, 0.1071, 0.1509]) -Greedy action tensor([ 1.4857, -0.6416, -0.1347, 0.1508]) tensor([0.6329, 0.0754, 0.1252, 0.1666]) -Greedy action tensor([ 1.0553, -0.7338, -0.2303, 0.1679]) tensor([0.5390, 0.0901, 0.1490, 0.2219]) -Greedy action tensor([ 0.8985, -0.5576, -0.3612, 0.1906]) tensor([0.4976, 0.1160, 0.1412, 0.2452]) -Greedy action tensor([ 0.6805, -0.5054, -0.3216, 0.5059]) tensor([0.3980, 0.1216, 0.1461, 0.3343]) -Greedy action tensor([ 1.7279, -0.4293, -0.4440, 0.0762]) tensor([0.7036, 0.0814, 0.0802, 0.1349]) -Greedy action tensor([ 0.4395, -0.5113, -0.1209, -0.0218]) tensor([0.3864, 0.1493, 0.2206, 0.2436]) -Greedy action tensor([ 0.7896, -0.3692, -0.2559, -0.1622]) tensor([0.4875, 0.1530, 0.1714, 0.1882]) -Greedy action tensor([ 1.0713, -0.4660, -0.2710, -0.1085]) tensor([0.5607, 0.1205, 0.1465, 0.1723]) -Greedy action tensor([ 1.0050, -0.4625, -0.4423, 0.1015]) tensor([0.5345, 0.1232, 0.1257, 0.2166]) -Greedy action tensor([ 1.8611, -0.2915, -0.4735, -0.1137]) tensor([0.7398, 0.0859, 0.0716, 0.1027]) -Greedy action tensor([ 0.7103, -0.3223, -0.3271, 0.0530]) tensor([0.4487, 0.1598, 0.1590, 0.2325]) -Greedy action tensor([ 1.2100, -0.3807, -0.1258, -0.1201]) tensor([0.5776, 0.1177, 0.1519, 0.1528]) -Greedy action tensor([ 0.4987, -0.5535, -0.0680, -0.0213]) tensor([0.3982, 0.1391, 0.2260, 0.2368]) -Greedy action tensor([ 1.2608, -0.4675, -0.1534, 0.0329]) tensor([0.5836, 0.1036, 0.1419, 0.1709]) -Greedy action tensor([ 0.3229, -0.2220, 0.2027, -0.1076]) tensor([0.3208, 0.1861, 0.2845, 0.2086]) -Greedy action tensor([ 0.6230, -0.4850, 0.0030, -0.1629]) tensor([0.4303, 0.1421, 0.2315, 0.1961]) -Greedy action tensor([ 1.0723, -0.5165, -0.2391, -0.1187]) tensor([0.5626, 0.1149, 0.1516, 0.1710]) -Greedy action tensor([ 1.4428, -0.6365, -0.3568, -0.0818]) tensor([0.6631, 0.0829, 0.1097, 0.1444]) -Greedy action tensor([ 0.9145, -0.4523, -0.2751, 0.3360]) tensor([0.4717, 0.1202, 0.1436, 0.2645]) -Greedy action tensor([ 1.0938, -0.8909, -0.5360, -0.0829]) tensor([0.6091, 0.0837, 0.1194, 0.1878]) -Greedy action tensor([ 1.0449, -0.3782, -0.3002, 0.0447]) tensor([0.5350, 0.1289, 0.1394, 0.1968]) -Greedy action tensor([ 0.5335, -0.3983, -0.1149, -0.1067]) tensor([0.4092, 0.1612, 0.2139, 0.2157]) -Greedy action tensor([ 1.1580, -0.3961, -0.2111, 0.1553]) tensor([0.5457, 0.1153, 0.1388, 0.2002]) -Greedy action tensor([ 0.8561, -0.4148, -0.2264, 0.0975]) tensor([0.4790, 0.1344, 0.1623, 0.2243]) -Greedy action tensor([ 1.6168, -0.3742, -0.3421, -0.1201]) tensor([0.6879, 0.0939, 0.0970, 0.1211]) -Greedy action tensor([ 0.9758, -0.3626, -0.4660, 0.5122]) tensor([0.4700, 0.1233, 0.1112, 0.2956]) -Greedy action tensor([ 0.7059, -0.4989, -0.1556, -0.1046]) tensor([0.4615, 0.1383, 0.1950, 0.2052]) -Greedy action tensor([ 0.6433, -0.4579, 0.0324, -0.0563]) tensor([0.4216, 0.1402, 0.2289, 0.2094]) -Greedy action tensor([ 1.2735, -0.5496, -0.2423, 0.1189]) tensor([0.5895, 0.0952, 0.1295, 0.1858]) -Greedy action tensor([ 1.4294, -0.7765, -0.0225, 0.1170]) tensor([0.6198, 0.0683, 0.1451, 0.1668]) -Greedy action tensor([ 1.3335, -0.7339, -0.4950, 0.3071]) tensor([0.6077, 0.0769, 0.0976, 0.2178]) -Greedy action tensor([ 1.0919, -0.2268, 0.1652, -0.3019]) tensor([0.5231, 0.1399, 0.2071, 0.1298]) -Greedy action tensor([ 1.3571, -0.4359, -0.6095, -0.0301]) tensor([0.6426, 0.1070, 0.0899, 0.1605]) -Greedy action tensor([ 1.3999, -0.4703, -0.5651, -0.0621]) tensor([0.6553, 0.1010, 0.0918, 0.1519]) -Greedy action tensor([ 0.8753, -0.5408, -0.2286, 0.2125]) tensor([0.4785, 0.1161, 0.1587, 0.2467]) -Greedy action tensor([ 0.6127, -0.6681, -0.1264, 0.0075]) tensor([0.4345, 0.1207, 0.2075, 0.2372]) -Greedy action tensor([ 1.2237, -0.5225, -0.0890, 0.3370]) tensor([0.5389, 0.0940, 0.1450, 0.2220]) -Greedy action tensor([ 1.4275, -0.6555, -0.3306, 0.1281]) tensor([0.6371, 0.0794, 0.1098, 0.1737]) -Greedy action tensor([ 0.7314, -0.3687, -0.4746, -0.0256]) tensor([0.4759, 0.1584, 0.1425, 0.2232]) -Greedy action tensor([ 0.5595, -0.2122, 0.1231, -0.2615]) tensor([0.3924, 0.1814, 0.2536, 0.1726]) -Greedy action tensor([ 0.8712, -0.4819, -0.3850, 0.3095]) tensor([0.4732, 0.1223, 0.1347, 0.2698]) -Greedy action tensor([ 0.7938, -0.7824, -0.3162, 0.0344]) tensor([0.4989, 0.1032, 0.1644, 0.2335]) -Greedy action tensor([ 1.1695, -0.5390, -0.7165, 0.7338]) tensor([0.5052, 0.0915, 0.0766, 0.3267]) -Greedy action tensor([ 0.7521, -0.5552, -0.2340, 0.1096]) tensor([0.4609, 0.1247, 0.1719, 0.2424]) -Greedy action tensor([ 0.3477, -0.2194, -0.5681, -0.2098]) tensor([0.3937, 0.2233, 0.1576, 0.2254]) -Greedy action tensor([ 1.3655, -0.4943, -0.0642, -0.1843]) tensor([0.6221, 0.0969, 0.1489, 0.1321]) -Greedy action tensor([ 1.6676, -0.4206, -0.6629, -0.1035]) tensor([0.7187, 0.0891, 0.0699, 0.1223]) -Greedy action tensor([ 0.9665, -0.7271, -0.4850, 0.0787]) tensor([0.5466, 0.1005, 0.1280, 0.2249]) -Greedy action tensor([ 0.3202, 0.1331, 0.1739, -0.2680]) tensor([0.3078, 0.2553, 0.2659, 0.1710]) -Greedy action tensor([ 0.3241, 0.1520, 0.1563, -0.1912]) tensor([0.3044, 0.2563, 0.2574, 0.1818]) -Greedy action tensor([ 0.5672, -0.2297, -0.0551, -0.5370]) tensor([0.4313, 0.1944, 0.2314, 0.1429]) -Greedy action tensor([ 0.4171, -0.1854, -0.0088, -0.4657]) tensor([0.3825, 0.2094, 0.2499, 0.1582]) -Greedy action tensor([ 0.9715, -0.7655, -0.0542, -0.6028]) tensor([0.5741, 0.1011, 0.2059, 0.1189]) -Greedy action tensor([ 0.2970, 0.0363, 0.0517, -0.1216]) tensor([0.3114, 0.2400, 0.2437, 0.2049]) -Greedy action tensor([ 0.3791, -0.0351, 0.1917, -0.1522]) tensor([0.3249, 0.2147, 0.2694, 0.1910]) -Greedy action tensor([ 0.6386, -0.2772, 0.1391, -0.3170]) tensor([0.4181, 0.1673, 0.2537, 0.1608]) -Greedy action tensor([ 0.1342, 0.1727, 0.2049, -0.1706]) tensor([0.2597, 0.2699, 0.2788, 0.1915]) -Greedy action tensor([ 0.5080, -0.1783, -0.0139, -0.4384]) tensor([0.4024, 0.2026, 0.2388, 0.1562]) -Greedy action tensor([ 0.9703, -0.5008, 0.0191, -0.7360]) tensor([0.5563, 0.1278, 0.2149, 0.1010]) -Greedy action tensor([ 0.1854, 0.0841, 0.2033, -0.0568]) tensor([0.2698, 0.2438, 0.2747, 0.2118]) -Greedy action tensor([ 0.3892, -0.0292, -0.0477, -0.2740]) tensor([0.3547, 0.2334, 0.2291, 0.1827]) -Greedy action tensor([ 0.3504, 0.1006, 0.1225, -0.1266]) tensor([0.3129, 0.2437, 0.2491, 0.1942]) -Greedy action tensor([ 0.4424, 0.0277, 0.1002, -0.2359]) tensor([0.3474, 0.2295, 0.2468, 0.1763]) -Greedy action tensor([ 0.4999, -0.0072, 0.1646, -0.2685]) tensor([0.3596, 0.2165, 0.2571, 0.1667]) -Greedy action tensor([ 0.3935, -0.2220, 0.1387, -0.3048]) tensor([0.3555, 0.1921, 0.2755, 0.1768]) -Greedy action tensor([ 0.8454, -0.6529, -0.1187, -0.5604]) tensor([0.5405, 0.1208, 0.2061, 0.1325]) -Greedy action tensor([ 0.2384, -0.2593, -0.1438, -0.4309]) tensor([0.3568, 0.2169, 0.2435, 0.1827]) -Greedy action tensor([ 0.4919, -0.2162, 0.1127, -0.3064]) tensor([0.3807, 0.1875, 0.2605, 0.1713]) -Greedy action tensor([ 0.6546, -0.3158, 0.0339, -0.2651]) tensor([0.4319, 0.1637, 0.2322, 0.1722]) -Greedy action tensor([ 0.7259, -0.2163, -0.0358, -0.5146]) tensor([0.4660, 0.1816, 0.2176, 0.1348]) -Greedy action tensor([ 0.2483, 0.0559, 0.2149, -0.1053]) tensor([0.2862, 0.2361, 0.2768, 0.2009]) -Greedy action tensor([ 0.7030, -0.3962, 0.0264, -0.3947]) tensor([0.4597, 0.1532, 0.2337, 0.1534]) -Greedy action tensor([ 0.7164, -0.4013, -0.1572, -0.5997]) tensor([0.4969, 0.1625, 0.2074, 0.1332]) -Greedy action tensor([ 0.2887, 0.1331, -0.0209, -0.2340]) tensor([0.3142, 0.2689, 0.2306, 0.1863]) -Greedy action tensor([ 0.3244, 0.1693, -0.0135, -0.1190]) tensor([0.3114, 0.2667, 0.2221, 0.1999]) -Greedy action tensor([ 1.0709, -0.7061, -0.0173, -0.6306]) tensor([0.5923, 0.1002, 0.1995, 0.1080]) -Greedy action tensor([ 0.7671, -0.2590, -0.1382, -0.5112]) tensor([0.4899, 0.1756, 0.1981, 0.1364]) -Greedy action tensor([ 0.1645, -0.1477, 0.0243, -0.3685]) tensor([0.3137, 0.2296, 0.2726, 0.1841]) -Greedy action tensor([ 0.2996, -0.1064, 0.0178, -0.3481]) tensor([0.3397, 0.2263, 0.2563, 0.1777]) -Greedy action tensor([ 1.1381, -0.6114, 0.0280, -0.6456]) tensor([0.5983, 0.1040, 0.1972, 0.1005]) -Greedy action tensor([ 0.2687, -0.1912, -0.0272, -0.4349]) tensor([0.3484, 0.2200, 0.2592, 0.1724]) -Greedy action tensor([ 0.4905, 0.0673, 0.0395, -0.2101]) tensor([0.3587, 0.2349, 0.2285, 0.1780]) -Greedy action tensor([ 0.4698, 0.0652, -0.0320, -0.2737]) tensor([0.3639, 0.2428, 0.2203, 0.1730]) -Greedy action tensor([ 0.5300, -0.2779, 0.0624, -0.4815]) tensor([0.4105, 0.1830, 0.2572, 0.1493]) -Greedy action tensor([ 0.6507, 0.0607, 0.1740, -0.3197]) tensor([0.3915, 0.2170, 0.2431, 0.1484]) -Greedy action tensor([ 0.1529, 0.0491, 0.0700, -0.2186]) tensor([0.2848, 0.2567, 0.2621, 0.1964]) -Greedy action tensor([ 0.4374, -0.0307, 0.1796, -0.6313]) tensor([0.3646, 0.2283, 0.2818, 0.1252]) -Greedy action tensor([ 0.1943, 0.0713, 0.1473, -0.1987]) tensor([0.2846, 0.2517, 0.2715, 0.1921]) -Greedy action tensor([ 0.5408, -0.1725, 0.0639, -0.5730]) tensor([0.4100, 0.2009, 0.2545, 0.1346]) -Greedy action tensor([ 0.2676, 0.0168, 0.2492, -0.1130]) tensor([0.2904, 0.2260, 0.2851, 0.1985]) -Greedy action tensor([ 0.1990, 0.0687, 0.2595, -0.0968]) tensor([0.2714, 0.2383, 0.2884, 0.2019]) -Greedy action tensor([ 0.3699, 0.0515, -0.0086, -0.2571]) tensor([0.3394, 0.2468, 0.2325, 0.1813]) -Greedy action tensor([ 1.0012, -0.5847, -0.0341, -0.6156]) tensor([0.5687, 0.1164, 0.2019, 0.1129]) -Greedy action tensor([ 0.5147, -0.2266, 0.0239, -0.2182]) tensor([0.3892, 0.1855, 0.2383, 0.1870]) -Greedy action tensor([ 0.3300, -0.0553, 0.0528, -0.2779]) tensor([0.3353, 0.2281, 0.2541, 0.1826]) -Greedy action tensor([ 0.3144, 0.1160, 0.0269, -0.0619]) tensor([0.3071, 0.2518, 0.2303, 0.2108]) -Greedy action tensor([ 0.7496, -0.6040, -0.1997, -0.5672]) tensor([0.5226, 0.1350, 0.2023, 0.1401]) -Greedy action tensor([ 0.2409, 0.1129, -0.0473, -0.0271]) tensor([0.2946, 0.2592, 0.2208, 0.2253]) -Greedy action tensor([ 0.5012, 0.0674, 0.0184, -0.3843]) tensor([0.3735, 0.2420, 0.2304, 0.1541]) -Greedy action tensor([ 0.5545, -0.5574, -0.1174, -0.5499]) tensor([0.4606, 0.1515, 0.2352, 0.1526]) -Greedy action tensor([ 0.2922, 0.1392, 0.1392, -0.1368]) tensor([0.2970, 0.2548, 0.2548, 0.1934]) -Greedy action tensor([ 0.2936, 0.1096, 0.1919, -0.0905]) tensor([0.2927, 0.2435, 0.2644, 0.1994]) -Greedy action tensor([ 0.3251, 0.1030, 0.1771, -0.3103]) tensor([0.3132, 0.2508, 0.2701, 0.1659]) -Greedy action tensor([ 0.4499, -0.0740, 0.2561, -0.2872]) tensor([0.3455, 0.2046, 0.2846, 0.1653]) -Greedy action tensor([ 0.7406, -0.4470, -0.0786, -0.6026]) tensor([0.4983, 0.1520, 0.2197, 0.1301]) -Greedy action tensor([ 0.2601, 0.1897, -0.0175, -0.3101]) tensor([0.3072, 0.2863, 0.2327, 0.1737]) -Greedy action tensor([ 0.9502, -0.4322, -0.0970, -0.3977]) tensor([0.5371, 0.1348, 0.1885, 0.1395]) -Greedy action tensor([ 0.9091, -0.4079, -0.1735, -0.5908]) tensor([0.5465, 0.1464, 0.1851, 0.1220]) -Greedy action tensor([ 0.5853, -0.1364, -0.1332, -0.4637]) tensor([0.4303, 0.2091, 0.2098, 0.1507]) -Greedy action tensor([ 0.2891, 0.0864, 0.0972, -0.2213]) tensor([0.3084, 0.2518, 0.2546, 0.1851]) -Greedy action tensor([ 1.1054, -0.6478, -0.0988, -0.6701]) tensor([0.6088, 0.1055, 0.1826, 0.1031]) -Greedy action tensor([ 0.8953, -0.5210, -0.0988, -0.4828]) tensor([0.5363, 0.1301, 0.1985, 0.1352]) -Greedy action tensor([ 0.2382, 0.0099, 0.0599, -0.1529]) tensor([0.3022, 0.2405, 0.2529, 0.2044]) -Greedy action tensor([ 0.1641, 0.1349, 0.1263, -0.2024]) tensor([0.2757, 0.2677, 0.2655, 0.1911]) -Greedy action tensor([ 0.5159, -0.2921, -0.1293, -0.4417]) tensor([0.4248, 0.1894, 0.2228, 0.1630]) -Greedy action tensor([ 2.8055e-01, 2.1456e-04, 4.3329e-02, -2.3199e-01]) tensor([0.3181, 0.2404, 0.2510, 0.1906]) -Greedy action tensor([ 0.5651, -0.5374, -0.0257, -0.6514]) tensor([0.4582, 0.1522, 0.2538, 0.1358]) -Greedy action tensor([ 0.4197, -0.1173, -0.0202, -0.4410]) tensor([0.3771, 0.2204, 0.2429, 0.1595]) -Greedy action tensor([ 0.3005, 0.1310, -0.0513, -0.1510]) tensor([0.3140, 0.2651, 0.2209, 0.2000]) -Greedy action tensor([ 0.6922, -0.0922, -0.0351, -0.5244]) tensor([0.4473, 0.2041, 0.2161, 0.1325]) -Greedy action tensor([ 0.5025, 0.0629, 0.1345, -0.1048]) tensor([0.3471, 0.2236, 0.2402, 0.1891]) -Greedy action tensor([ 0.3145, 0.0917, 0.1256, -0.2322]) tensor([0.3118, 0.2495, 0.2581, 0.1805]) -Greedy action tensor([ 0.4687, -0.2002, -0.1711, -0.2397]) tensor([0.3949, 0.2023, 0.2083, 0.1945]) -Greedy action tensor([ 0.3177, 0.2298, 0.0587, -0.2078]) tensor([0.3050, 0.2793, 0.2354, 0.1803]) -Greedy action tensor([ 0.7497, -0.1942, 0.0165, -0.3802]) tensor([0.4561, 0.1775, 0.2191, 0.1473]) -Greedy action tensor([ 1.1992, -0.6753, 0.0926, -0.6416]) tensor([0.6087, 0.0934, 0.2013, 0.0966]) -Greedy action tensor([ 0.2251, 0.1469, 0.1213, -0.1760]) tensor([0.2861, 0.2645, 0.2579, 0.1915]) -Greedy action tensor([ 0.4837, -0.0550, 0.0202, -0.1323]) tensor([0.3633, 0.2120, 0.2285, 0.1962]) -Greedy action tensor([ 0.1865, 0.2565, 0.0898, -0.2656]) tensor([0.2765, 0.2966, 0.2510, 0.1759]) -Greedy action tensor([ 1.7246, -0.1098, 0.3281, 0.9761]) tensor([0.5318, 0.0849, 0.1316, 0.2516]) -Greedy action tensor([1.1069, 0.2585, 0.2695, 1.1843]) tensor([0.3400, 0.1455, 0.1472, 0.3673]) -Greedy action tensor([ 2.2407, -0.8063, -0.3748, 1.0984]) tensor([0.6946, 0.0330, 0.0508, 0.2216]) -Greedy action tensor([ 1.5008, -0.1367, 0.9027, 0.7117]) tensor([0.4549, 0.0884, 0.2501, 0.2066]) -Greedy action tensor([1.7122, 0.0284, 0.9892, 1.9803]) tensor([0.3357, 0.0623, 0.1629, 0.4390]) -Greedy action tensor([ 0.9890, -0.5902, -0.6280, 1.4347]) tensor([0.3371, 0.0695, 0.0669, 0.5265]) -Greedy action tensor([-0.0692, -1.4106, 1.4818, -0.8852]) tensor([0.1558, 0.0407, 0.7346, 0.0689]) -Greedy action tensor([ 1.3062, -1.2858, 0.0928, 0.5497]) tensor([0.5431, 0.0407, 0.1614, 0.2549]) -Greedy action tensor([ 1.0332, -0.4259, 0.3513, 1.3946]) tensor([0.3151, 0.0732, 0.1593, 0.4523]) -Greedy action tensor([1.6351, 0.0908, 1.1589, 1.2151]) tensor([0.4013, 0.0857, 0.2493, 0.2637]) -Greedy action tensor([ 1.0924, -0.0818, -0.5241, 1.5963]) tensor([0.3162, 0.0977, 0.0628, 0.5233]) -Greedy action tensor([ 1.8476, -0.0418, 0.0518, 0.6784]) tensor([0.6143, 0.0929, 0.1020, 0.1908]) -Greedy action tensor([ 1.1324, 0.5701, -0.6548, 0.4971]) tensor([0.4411, 0.2514, 0.0739, 0.2337]) -Greedy action tensor([ 0.4970, 0.8549, -1.1578, 0.5483]) tensor([0.2722, 0.3893, 0.0520, 0.2865]) -Greedy action tensor([ 1.0287, -0.8393, 0.2816, 0.9022]) tensor([0.3985, 0.0615, 0.1888, 0.3512]) -Greedy action tensor([0.4704, 0.0238, 0.6143, 0.9880]) tensor([0.2236, 0.1431, 0.2582, 0.3752]) -Greedy action tensor([ 1.9585, -0.5935, -0.0919, 0.9133]) tensor([0.6418, 0.0500, 0.0826, 0.2256]) -Greedy action tensor([ 1.9838, 0.5295, -0.1292, 0.1981]) tensor([0.6570, 0.1534, 0.0794, 0.1102]) -Greedy action tensor([ 1.0749, -0.3550, -0.0488, 0.4201]) tensor([0.4799, 0.1148, 0.1560, 0.2493]) -Greedy action tensor([ 1.5460, -0.4599, 0.3658, 0.7159]) tensor([0.5326, 0.0716, 0.1636, 0.2322]) -Greedy action tensor([ 1.3131, -0.1322, -0.3009, 0.5617]) tensor([0.5245, 0.1236, 0.1044, 0.2474]) -Greedy action tensor([ 0.7298, -1.2802, 0.3822, 0.6310]) tensor([0.3641, 0.0488, 0.2572, 0.3299]) -Greedy action tensor([ 1.5530, 0.0976, -0.9619, 1.1885]) tensor([0.4978, 0.1161, 0.0403, 0.3458]) -Greedy action tensor([ 1.4904, -0.5536, 1.5906, 1.0685]) tensor([0.3459, 0.0448, 0.3824, 0.2269]) -Greedy action tensor([ 1.2867, 0.8144, -1.5390, 0.6445]) tensor([0.4527, 0.2823, 0.0268, 0.2382]) -Greedy action tensor([ 1.4666, -0.2020, 0.6337, 0.8215]) tensor([0.4656, 0.0878, 0.2024, 0.2442]) -Greedy action tensor([ 1.7232, 0.2507, -0.2969, 0.8579]) tensor([0.5609, 0.1286, 0.0744, 0.2361]) -Greedy action tensor([ 0.6195, -0.0828, -0.3360, 0.2369]) tensor([0.3903, 0.1934, 0.1501, 0.2662]) -Greedy action tensor([ 0.8957, 0.0912, -1.0809, 1.0324]) tensor([0.3660, 0.1637, 0.0507, 0.4196]) -Greedy action tensor([ 1.5305, -0.0442, 0.3469, 0.4727]) tensor([0.5375, 0.1113, 0.1646, 0.1866]) -Greedy action tensor([0.4965, 0.0856, 0.5106, 2.1644]) tensor([0.1253, 0.0831, 0.1271, 0.6644]) -Greedy action tensor([ 1.9286, -0.4018, 0.6915, 1.0205]) tensor([0.5584, 0.0543, 0.1621, 0.2252]) -Greedy action tensor([ 0.9238, -0.3142, 0.5243, 0.4171]) tensor([0.3902, 0.1131, 0.2617, 0.2351]) -Greedy action tensor([ 1.3107, -0.6509, 0.1044, 1.4118]) tensor([0.3927, 0.0552, 0.1175, 0.4345]) -Greedy action tensor([0.9813, 0.2614, 0.9503, 0.3565]) tensor([0.3343, 0.1627, 0.3241, 0.1790]) -Greedy action tensor([ 2.0168, -0.9809, 0.8039, 1.6249]) tensor([0.4943, 0.0247, 0.1470, 0.3341]) -Greedy action tensor([ 1.0614, 0.4152, -0.3331, 0.7216]) tensor([0.4026, 0.2110, 0.0998, 0.2866]) -Greedy action tensor([ 1.0764, -0.4152, 0.4876, 1.1082]) tensor([0.3556, 0.0800, 0.1973, 0.3671]) -Greedy action tensor([ 1.0989, 0.5413, -1.4001, 0.2427]) tensor([0.4809, 0.2753, 0.0395, 0.2043]) -Greedy action tensor([ 0.2253, -1.2571, -0.1518, 1.4815]) tensor([0.1843, 0.0419, 0.1264, 0.6474]) -Greedy action tensor([1.0433, 0.6531, 0.0167, 1.0615]) tensor([0.3275, 0.2217, 0.1173, 0.3335]) -Greedy action tensor([ 1.5641, -0.2020, -0.8770, 1.4079]) tensor([0.4732, 0.0809, 0.0412, 0.4047]) -Greedy action tensor([ 1.4591, -1.1874, 0.2230, 1.5179]) tensor([0.4129, 0.0293, 0.1199, 0.4379]) -Greedy action tensor([ 1.2201, -0.4456, 1.7011, 1.2398]) tensor([0.2613, 0.0494, 0.4227, 0.2665]) -Greedy action tensor([ 0.3190, -0.4491, 0.8368, 0.4865]) tensor([0.2312, 0.1073, 0.3881, 0.2734]) -Greedy action tensor([ 2.2935, -0.1195, -0.2520, 0.8711]) tensor([0.7097, 0.0635, 0.0557, 0.1711]) -Greedy action tensor([ 1.4095, -0.9214, -0.0235, 0.8875]) tensor([0.5184, 0.0504, 0.1237, 0.3076]) -Greedy action tensor([ 1.9918, -0.3493, 0.4626, 1.7222]) tensor([0.4815, 0.0463, 0.1044, 0.3678]) -Greedy action tensor([1.9131, 0.2325, 0.6279, 1.5087]) tensor([0.4694, 0.0874, 0.1298, 0.3133]) -Greedy action tensor([ 1.4998, -0.2224, 0.5337, 0.7595]) tensor([0.4911, 0.0877, 0.1869, 0.2342]) -Greedy action tensor([ 0.7106, 0.1858, -0.3162, 0.2280]) tensor([0.3896, 0.2305, 0.1395, 0.2404]) -Greedy action tensor([ 1.3506, -0.1185, 0.1438, 1.0592]) tensor([0.4393, 0.1011, 0.1314, 0.3282]) -Greedy action tensor([0.4090, 0.4826, 0.1532, 0.2976]) tensor([0.2670, 0.2874, 0.2067, 0.2389]) -Greedy action tensor([ 1.1947, 0.0916, -1.3455, 1.6674]) tensor([0.3317, 0.1101, 0.0262, 0.5321]) -Greedy action tensor([ 0.9676, -0.2117, 0.3943, 1.1532]) tensor([0.3252, 0.1000, 0.1833, 0.3915]) -Greedy action tensor([ 0.6899, -1.1642, -0.6074, 0.5512]) tensor([0.4347, 0.0681, 0.1188, 0.3784]) -Greedy action tensor([ 1.3060, -0.6481, 0.7841, 1.3632]) tensor([0.3579, 0.0507, 0.2124, 0.3790]) -Greedy action tensor([ 0.5685, 0.0179, -0.9787, 1.2468]) tensor([0.2660, 0.1534, 0.0566, 0.5241]) -Greedy action tensor([ 1.3502, -0.9110, -0.0847, 1.1382]) tensor([0.4648, 0.0484, 0.1107, 0.3760]) -Greedy action tensor([ 0.7145, 0.4909, -0.3189, 0.1461]) tensor([0.3674, 0.2938, 0.1307, 0.2081]) -Greedy action tensor([ 0.8270, -0.8494, 0.0408, 0.9663]) tensor([0.3582, 0.0670, 0.1632, 0.4117]) -Greedy action tensor([ 0.6378, 0.2777, -0.8640, 1.0311]) tensor([0.2939, 0.2050, 0.0655, 0.4356]) -Greedy action tensor([0.3282, 0.2278, 0.8173, 0.1175]) tensor([0.2301, 0.2081, 0.3753, 0.1864]) -Greedy action tensor([ 1.0882, -0.0392, -0.2261, -0.1393]) tensor([0.5304, 0.1718, 0.1425, 0.1554]) -Greedy action tensor([ 1.2537, -0.3911, 0.8902, 0.4380]) tensor([0.4291, 0.0828, 0.2983, 0.1898]) -Greedy action tensor([ 1.2287, -0.9078, -0.2528, 1.2438]) tensor([0.4236, 0.0500, 0.0963, 0.4301]) -Greedy action tensor([ 1.8845, -0.4051, -0.1597, 0.9058]) tensor([0.6224, 0.0631, 0.0806, 0.2339]) -Greedy action tensor([ 1.5808, -0.5193, 1.0224, 0.9276]) tensor([0.4515, 0.0553, 0.2583, 0.2349]) -Greedy action tensor([ 0.9298, -0.5903, -0.5411, 0.8043]) tensor([0.4291, 0.0938, 0.0986, 0.3785]) -Greedy action tensor([ 1.6797, -0.5421, 0.0375, 0.6930]) tensor([0.5971, 0.0647, 0.1156, 0.2226]) -Greedy action tensor([ 0.9958, -0.0459, 0.3781, 0.6005]) tensor([0.3898, 0.1375, 0.2102, 0.2625]) -Greedy action tensor([0.9054, 0.0097, 0.9448, 1.5452]) tensor([0.2302, 0.0940, 0.2394, 0.4364]) -Greedy action tensor([ 1.2164, -0.6663, 0.6948, 0.8784]) tensor([0.4067, 0.0619, 0.2414, 0.2900]) -Greedy action tensor([ 1.3329, -0.2458, 0.4435, 1.0342]) tensor([0.4239, 0.0874, 0.1742, 0.3145]) -Greedy action tensor([ 0.9602, -0.1610, -0.1157, 1.4411]) tensor([0.3045, 0.0992, 0.1038, 0.4925]) -Greedy action tensor([ 1.5038, 0.0429, -0.2441, 0.9888]) tensor([0.4991, 0.1158, 0.0869, 0.2982]) -Greedy action tensor([ 1.1168, 0.2793, -0.8519, 0.7522]) tensor([0.4411, 0.1909, 0.0616, 0.3064]) -Greedy action tensor([ 1.2714, -0.5802, -0.3380, 0.4448]) tensor([0.5572, 0.0875, 0.1115, 0.2438]) -Greedy action tensor([ 0.9653, -1.1416, 0.8897, 0.3226]) tensor([0.3884, 0.0472, 0.3601, 0.2042]) -Greedy action tensor([ 1.2237, -0.8410, -0.4498, 1.1043]) tensor([0.4542, 0.0576, 0.0852, 0.4030]) -Greedy action tensor([0.9318, 0.3460, 0.5548, 1.1689]) tensor([0.2849, 0.1586, 0.1954, 0.3611]) -Greedy action tensor([1.5491, 0.3744, 0.6255, 1.0773]) tensor([0.4292, 0.1326, 0.1704, 0.2678]) -Greedy action tensor([-1.4030, -0.5146, 0.7201, -0.4311]) tensor([0.0693, 0.1685, 0.5791, 0.1831]) -Greedy action tensor([-0.7699, -0.5930, 0.3528, 0.4963]) tensor([0.1135, 0.1354, 0.3487, 0.4025]) -Greedy action tensor([-2.0013, -0.7284, 0.7567, 0.0728]) tensor([0.0353, 0.1262, 0.5572, 0.2812]) -Greedy action tensor([-1.8464, -0.7593, 0.1914, -0.3264]) tensor([0.0617, 0.1829, 0.4734, 0.2820]) -Greedy action tensor([-1.8759, -0.8124, 0.1777, -0.3230]) tensor([0.0609, 0.1764, 0.4748, 0.2878]) -Greedy action tensor([-0.9545, -0.2355, 0.5180, -0.5244]) tensor([0.1117, 0.2293, 0.4872, 0.1718]) -Greedy action tensor([-1.8617, -0.9385, 0.1116, -0.4474]) tensor([0.0675, 0.1698, 0.4853, 0.2775]) -Greedy action tensor([-1.9576, -0.9154, 0.1819, -0.3205]) tensor([0.0572, 0.1623, 0.4862, 0.2942]) -Greedy action tensor([-0.8434, -0.1730, 0.4109, -0.2288]) tensor([0.1203, 0.2353, 0.4219, 0.2225]) -Greedy action tensor([-1.8515, -0.8634, 0.1807, -0.3539]) tensor([0.0633, 0.1701, 0.4833, 0.2832]) -Greedy action tensor([-1.8432, -0.7426, 0.1511, -0.3377]) tensor([0.0631, 0.1895, 0.4633, 0.2841]) -Greedy action tensor([-2.0122, -0.8800, 0.3664, -0.1492]) tensor([0.0469, 0.1454, 0.5057, 0.3020]) -Greedy action tensor([-1.8235, -0.8398, 0.1500, -0.3868]) tensor([0.0663, 0.1774, 0.4773, 0.2790]) -Greedy action tensor([-2.0244, -0.9384, 0.4395, -0.1349]) tensor([0.0448, 0.1327, 0.5262, 0.2963]) -Greedy action tensor([-1.7396, -0.7753, 0.1283, -0.4301]) tensor([0.0725, 0.1900, 0.4691, 0.2684]) -Greedy action tensor([-1.0331, -0.2506, -0.1480, -0.1275]) tensor([0.1237, 0.2705, 0.2998, 0.3060]) -Greedy action tensor([-1.0835, -0.6632, 0.3763, -0.1335]) tensor([0.1062, 0.1617, 0.4573, 0.2747]) -Greedy action tensor([-1.3172, -0.5416, 0.4187, 0.1947]) tensor([0.0747, 0.1623, 0.4240, 0.3389]) -Greedy action tensor([-1.9899, -0.9064, 0.3086, -0.2166]) tensor([0.0505, 0.1492, 0.5029, 0.2974]) -Greedy action tensor([-0.4266, -0.4764, 0.1844, 0.2161]) tensor([0.1756, 0.1671, 0.3235, 0.3339]) -Greedy action tensor([-1.6038, -0.5866, 0.5208, -0.0371]) tensor([0.0591, 0.1634, 0.4945, 0.2830]) -Greedy action tensor([-1.5497, -0.4379, 0.4973, -0.0794]) tensor([0.0620, 0.1884, 0.4800, 0.2696]) -Greedy action tensor([-0.8431, -0.5398, 0.3754, 0.0518]) tensor([0.1222, 0.1655, 0.4133, 0.2990]) -Greedy action tensor([-1.4099, -0.6138, 0.4609, 0.3899]) tensor([0.0635, 0.1407, 0.4121, 0.3838]) -Greedy action tensor([-1.9751, -0.8803, 0.2895, -0.2136]) tensor([0.0515, 0.1538, 0.4953, 0.2995]) -Greedy action tensor([-1.4271, -0.6375, 0.4336, 0.0603]) tensor([0.0711, 0.1567, 0.4573, 0.3148]) -Greedy action tensor([-1.7643, -0.7812, 0.1514, -0.3993]) tensor([0.0695, 0.1859, 0.4723, 0.2723]) -Greedy action tensor([-0.8963, -0.4957, 0.2420, 0.2098]) tensor([0.1158, 0.1728, 0.3614, 0.3500]) -Greedy action tensor([-1.9440, -0.9092, 0.2886, -0.2457]) tensor([0.0538, 0.1513, 0.5012, 0.2937]) -Greedy action tensor([-1.7845, -0.6386, 0.2974, -0.2861]) tensor([0.0601, 0.1890, 0.4820, 0.2689]) -Greedy action tensor([-2.0079, -0.9509, 0.4052, -0.2526]) tensor([0.0480, 0.1382, 0.5361, 0.2777]) -Greedy action tensor([-1.7393, -0.7990, 0.1450, -0.4114]) tensor([0.0719, 0.1840, 0.4730, 0.2711]) -Greedy action tensor([-1.7281, -0.5270, 0.5603, -0.0707]) tensor([0.0515, 0.1711, 0.5074, 0.2700]) -Greedy action tensor([-1.7161, -0.7576, 0.2367, -0.3485]) tensor([0.0686, 0.1788, 0.4834, 0.2692]) -Greedy action tensor([-1.8145, 0.0552, 0.6519, 0.2415]) tensor([0.0369, 0.2395, 0.4350, 0.2886]) -Greedy action tensor([-1.4322, -0.4836, 0.7678, -0.5536]) tensor([0.0666, 0.1720, 0.6011, 0.1603]) -Greedy action tensor([-1.2709, -0.5598, 0.3998, 0.0279]) tensor([0.0832, 0.1694, 0.4424, 0.3050]) -Greedy action tensor([-2.0058, -0.8033, 0.3299, -0.1874]) tensor([0.0480, 0.1598, 0.4963, 0.2959]) -Greedy action tensor([-1.7863, -0.5888, 0.1488, -0.3878]) tensor([0.0654, 0.2166, 0.4530, 0.2649]) -Greedy action tensor([-1.7645, -0.7226, 0.1646, -0.3780]) tensor([0.0679, 0.1926, 0.4677, 0.2718]) -Greedy action tensor([-1.8833, -0.6423, 0.8559, 0.1611]) tensor([0.0362, 0.1251, 0.5595, 0.2793]) -Greedy action tensor([-1.3107, -0.6607, 0.4267, 0.2147]) tensor([0.0758, 0.1452, 0.4307, 0.3484]) -Greedy action tensor([-0.3716, -0.4171, 0.1917, 0.1939]) tensor([0.1827, 0.1746, 0.3210, 0.3217]) -Greedy action tensor([-0.9821, 0.7377, 0.1433, -0.1467]) tensor([0.0835, 0.4664, 0.2574, 0.1926]) -Greedy action tensor([-2.0024, -0.8796, 0.3442, -0.1586]) tensor([0.0480, 0.1475, 0.5014, 0.3032]) -Greedy action tensor([-1.4716, -0.8132, 0.4534, -0.4605]) tensor([0.0798, 0.1541, 0.5468, 0.2193]) -Greedy action tensor([-0.8243, 0.1955, 0.3728, -0.3668]) tensor([0.1154, 0.3200, 0.3821, 0.1824]) -Greedy action tensor([-1.7250, -0.5298, 0.6292, 0.0254]) tensor([0.0486, 0.1605, 0.5114, 0.2796]) -Greedy action tensor([-1.5765, -0.5200, 0.6677, 0.4002]) tensor([0.0487, 0.1401, 0.4595, 0.3517]) -Greedy action tensor([-2.0164, -0.9572, 0.4774, -0.2433]) tensor([0.0457, 0.1318, 0.5534, 0.2691]) -Greedy action tensor([-1.2050, -0.3984, 0.3088, -0.3743]) tensor([0.0992, 0.2223, 0.4508, 0.2277]) -Greedy action tensor([-2.0054, -0.7002, 0.8092, 0.0183]) tensor([0.0346, 0.1274, 0.5765, 0.2615]) -Greedy action tensor([-1.1792, -0.5659, 0.3113, 0.1653]) tensor([0.0899, 0.1660, 0.3992, 0.3449]) -Greedy action tensor([-1.6749, -0.4658, 0.4303, -0.1670]) tensor([0.0586, 0.1962, 0.4807, 0.2645]) -Greedy action tensor([-1.3004, -0.5294, 0.3395, 0.3820]) tensor([0.0730, 0.1579, 0.3764, 0.3927]) -Greedy action tensor([-0.3178, -0.3842, 0.1843, 0.2049]) tensor([0.1896, 0.1774, 0.3132, 0.3198]) -Greedy action tensor([-1.3102, -0.6174, 0.5175, -0.2223]) tensor([0.0821, 0.1640, 0.5104, 0.2435]) -Greedy action tensor([-1.6233, -0.5903, 0.6061, 0.1858]) tensor([0.0521, 0.1463, 0.4838, 0.3178]) -Greedy action tensor([-1.0804, -0.5620, 0.2920, 0.4563]) tensor([0.0887, 0.1490, 0.3499, 0.4124]) -Greedy action tensor([-0.8902, -0.5599, 0.2570, 0.0894]) tensor([0.1219, 0.1696, 0.3839, 0.3246]) -Greedy action tensor([-1.6612, -0.5699, 0.5069, 0.1724]) tensor([0.0527, 0.1569, 0.4607, 0.3297]) -Greedy action tensor([-1.1198, 0.4857, 0.2547, -0.0727]) tensor([0.0782, 0.3896, 0.3093, 0.2229]) -Greedy action tensor([-1.6585, -0.4958, 0.7805, -0.4114]) tensor([0.0522, 0.1671, 0.5988, 0.1818]) -Greedy action tensor([-0.9406, -0.5739, 0.1951, 0.3337]) tensor([0.1095, 0.1580, 0.3409, 0.3916]) -Greedy action tensor([-1.8481, -0.4820, 0.6243, -0.1057]) tensor([0.0445, 0.1744, 0.5271, 0.2540]) -Greedy action tensor([-1.8788, -0.9237, 0.2459, -0.3585]) tensor([0.0605, 0.1571, 0.5060, 0.2765]) -Greedy action tensor([-0.2594, 0.1910, 0.1258, 0.2988]) tensor([0.1728, 0.2711, 0.2540, 0.3020]) -Greedy action tensor([-1.4067, -0.7131, 0.4478, 0.3142]) tensor([0.0668, 0.1336, 0.4265, 0.3732]) -Greedy action tensor([-1.0068, -0.6418, 0.2388, 0.2398]) tensor([0.1064, 0.1533, 0.3699, 0.3703]) -Greedy action tensor([-1.6373, -0.6055, 0.7362, 0.2138]) tensor([0.0478, 0.1342, 0.5135, 0.3045]) -Greedy action tensor([-0.6646, -0.6172, 0.3550, 0.3754]) tensor([0.1307, 0.1371, 0.3624, 0.3698]) -Greedy action tensor([-1.7826, -0.3723, 1.0147, 0.7552]) tensor([0.0293, 0.1200, 0.4803, 0.3705]) -Greedy action tensor([-0.7586, -0.5621, 0.3004, 0.4972]) tensor([0.1161, 0.1413, 0.3348, 0.4077]) -Greedy action tensor([-1.9422, -0.6498, 0.8830, 0.1427]) tensor([0.0338, 0.1232, 0.5707, 0.2722]) -Greedy action tensor([-1.9321, -0.7583, 0.3930, -0.1488]) tensor([0.0490, 0.1584, 0.5011, 0.2915]) -Greedy action tensor([-1.0303, -0.6135, -0.1633, -0.3625]) tensor([0.1460, 0.2216, 0.3476, 0.2848]) -Greedy action tensor([-1.7053, -0.7180, 0.2505, -0.3356]) tensor([0.0681, 0.1827, 0.4813, 0.2679]) -Greedy action tensor([-0.6377, -0.5985, 0.3259, 0.0727]) tensor([0.1493, 0.1553, 0.3915, 0.3039]) -Greedy action tensor([-0.9504, -0.5827, 0.2422, 0.3476]) tensor([0.1064, 0.1536, 0.3505, 0.3895]) -Greedy action tensor([-1.5781, -0.4727, 0.3001, -0.2278]) tensor([0.0693, 0.2095, 0.4536, 0.2676]) -Greedy action tensor([-1.3102, -0.5716, 0.5924, -0.3866]) tensor([0.0812, 0.1700, 0.5443, 0.2045]) -Greedy action tensor([ 0.6991, -0.2936, -0.4506, -0.1378]) tensor([0.4716, 0.1748, 0.1494, 0.2042]) -Greedy action tensor([ 1.1121, -0.2459, 0.0077, -0.2055]) tensor([0.5387, 0.1385, 0.1785, 0.1443]) -Greedy action tensor([ 0.8053, -0.3200, -0.2867, -0.2541]) tensor([0.4983, 0.1617, 0.1672, 0.1728]) -Greedy action tensor([ 1.7077, -0.2680, -0.5999, 0.0599]) tensor([0.6990, 0.0969, 0.0695, 0.1345]) -Greedy action tensor([ 0.7602, -0.1428, -0.1401, 0.0299]) tensor([0.4360, 0.1767, 0.1772, 0.2100]) -Greedy action tensor([ 1.0704, -0.4127, -0.5401, -0.4260]) tensor([0.6058, 0.1375, 0.1210, 0.1357]) -Greedy action tensor([ 0.5394, -0.6096, -0.1265, -0.0924]) tensor([0.4233, 0.1342, 0.2175, 0.2250]) -Greedy action tensor([ 0.8974, -0.6155, -0.2414, -0.0247]) tensor([0.5159, 0.1137, 0.1652, 0.2052]) -Greedy action tensor([ 0.8380, -0.5406, -0.2139, 0.1501]) tensor([0.4753, 0.1197, 0.1660, 0.2389]) -Greedy action tensor([ 0.7886, -0.7155, -0.2706, 0.2198]) tensor([0.4684, 0.1041, 0.1624, 0.2652]) -Greedy action tensor([ 0.7238, -0.5233, -0.3305, 0.1367]) tensor([0.4563, 0.1311, 0.1590, 0.2536]) -Greedy action tensor([ 0.9246, 0.1310, -0.1847, -0.0122]) tensor([0.4600, 0.2080, 0.1517, 0.1803]) -Greedy action tensor([ 0.9594, -0.4143, -0.4496, -0.0185]) tensor([0.5337, 0.1351, 0.1304, 0.2007]) -Greedy action tensor([ 0.7071, -0.5931, -0.2778, 0.1613]) tensor([0.4494, 0.1224, 0.1678, 0.2604]) -Greedy action tensor([ 1.0374, -0.4146, -0.2659, -0.1286]) tensor([0.5502, 0.1288, 0.1495, 0.1715]) -Greedy action tensor([ 0.9562, -0.4370, 0.1000, 0.0966]) tensor([0.4770, 0.1184, 0.2026, 0.2019]) -Greedy action tensor([ 1.1806, -0.3310, -0.7213, -0.0949]) tensor([0.6064, 0.1337, 0.0905, 0.1694]) -Greedy action tensor([ 1.2403, -0.4001, -0.2675, 0.0039]) tensor([0.5863, 0.1137, 0.1298, 0.1703]) -Greedy action tensor([ 0.8749, -0.3977, -0.0364, 0.1115]) tensor([0.4655, 0.1304, 0.1871, 0.2170]) -Greedy action tensor([ 1.4742, -0.4476, -0.4020, 0.1852]) tensor([0.6349, 0.0929, 0.0972, 0.1749]) -Greedy action tensor([ 0.9837, -0.5775, -0.4652, 0.2277]) tensor([0.5224, 0.1096, 0.1227, 0.2453]) -Greedy action tensor([ 0.8093, -0.4612, -0.2120, 0.2296]) tensor([0.4544, 0.1275, 0.1636, 0.2545]) -Greedy action tensor([ 0.9053, -0.7256, -0.4452, 0.2958]) tensor([0.5004, 0.0979, 0.1297, 0.2720]) -Greedy action tensor([ 1.1872, -0.2945, -0.1365, -0.0473]) tensor([0.5604, 0.1274, 0.1492, 0.1631]) -Greedy action tensor([ 1.4581, -0.1223, -0.0406, 0.0018]) tensor([0.6015, 0.1239, 0.1344, 0.1402]) -Greedy action tensor([ 1.0172, 0.0042, -0.0644, -0.0283]) tensor([0.4869, 0.1768, 0.1651, 0.1712]) -Greedy action tensor([ 1.6494, -0.4187, -0.4755, 0.1393]) tensor([0.6818, 0.0862, 0.0814, 0.1506]) -Greedy action tensor([ 0.9016, -0.6296, -0.2577, 0.0696]) tensor([0.5089, 0.1101, 0.1596, 0.2215]) -Greedy action tensor([ 1.6412, -0.5298, -0.4317, 0.0192]) tensor([0.6957, 0.0794, 0.0875, 0.1374]) -Greedy action tensor([ 0.8421, -0.4442, -0.0243, 0.0384]) tensor([0.4663, 0.1288, 0.1961, 0.2088]) -Greedy action tensor([ 1.1254, -0.0424, -0.0840, -0.1059]) tensor([0.5259, 0.1636, 0.1569, 0.1535]) -Greedy action tensor([ 0.5864, -0.3808, 0.0221, -0.1145]) tensor([0.4090, 0.1555, 0.2326, 0.2029]) -Greedy action tensor([ 1.0694, -0.8538, -0.2145, 0.0332]) tensor([0.5625, 0.0822, 0.1558, 0.1996]) -Greedy action tensor([ 0.6577, -0.2257, -0.2008, -0.1948]) tensor([0.4418, 0.1826, 0.1872, 0.1884]) -Greedy action tensor([ 0.7642, -0.5310, -0.2217, -0.0096]) tensor([0.4743, 0.1299, 0.1770, 0.2188]) -Greedy action tensor([ 1.1376, -0.1765, -0.1205, -0.1802]) tensor([0.5492, 0.1476, 0.1561, 0.1471]) -Greedy action tensor([ 0.8404, -0.6243, -0.5647, 0.1763]) tensor([0.5022, 0.1161, 0.1232, 0.2585]) -Greedy action tensor([ 1.1270, -0.3138, -0.0141, -0.2319]) tensor([0.5515, 0.1306, 0.1762, 0.1417]) -Greedy action tensor([ 1.5088, -0.5946, -0.3627, 0.0858]) tensor([0.6592, 0.0805, 0.1014, 0.1589]) -Greedy action tensor([ 1.2152, -0.2934, -0.2740, 0.0340]) tensor([0.5702, 0.1261, 0.1286, 0.1750]) -Greedy action tensor([ 0.8418, -0.5691, -0.3002, 0.2979]) tensor([0.4665, 0.1138, 0.1489, 0.2708]) -Greedy action tensor([ 0.4836, -0.3079, 0.1907, -0.2263]) tensor([0.3716, 0.1684, 0.2773, 0.1827]) -Greedy action tensor([ 1.4311, -0.4266, -0.3230, -0.1248]) tensor([0.6493, 0.1013, 0.1124, 0.1370]) -Greedy action tensor([ 0.4296, -0.5790, -0.1579, 0.0693]) tensor([0.3820, 0.1393, 0.2123, 0.2664]) -Greedy action tensor([ 1.1030, -0.5121, -0.3826, 0.5498]) tensor([0.4999, 0.0994, 0.1132, 0.2875]) -Greedy action tensor([ 0.6611, -0.5182, -0.0366, 0.0378]) tensor([0.4271, 0.1313, 0.2126, 0.2290]) -Greedy action tensor([ 1.0434, -0.5675, -0.3992, 0.0835]) tensor([0.5498, 0.1098, 0.1299, 0.2105]) -Greedy action tensor([ 0.4404, -0.3237, 0.1477, -0.0145]) tensor([0.3513, 0.1636, 0.2622, 0.2229]) -Greedy action tensor([ 0.7319, -0.8173, -0.3362, 0.1313]) tensor([0.4752, 0.1009, 0.1633, 0.2606]) -Greedy action tensor([ 0.7427, -0.5294, -0.0371, 0.1375]) tensor([0.4377, 0.1227, 0.2007, 0.2390]) -Greedy action tensor([ 1.3682, -0.4357, -0.2211, 0.3661]) tensor([0.5761, 0.0949, 0.1176, 0.2115]) -Greedy action tensor([ 0.9265, -0.4670, -0.1888, 0.1109]) tensor([0.4955, 0.1230, 0.1624, 0.2192]) -Greedy action tensor([ 0.9110, -0.7136, -0.4444, 0.2219]) tensor([0.5110, 0.1007, 0.1318, 0.2565]) -Greedy action tensor([ 0.7516, -0.0112, -0.1254, 0.0599]) tensor([0.4196, 0.1957, 0.1746, 0.2101]) -Greedy action tensor([ 1.1064, -0.5599, -0.5959, -0.0835]) tensor([0.5969, 0.1128, 0.1088, 0.1816]) -Greedy action tensor([ 0.8638, -0.8757, -0.3171, 0.1323]) tensor([0.5092, 0.0894, 0.1563, 0.2450]) -Greedy action tensor([ 1.6384, -0.7597, -0.5607, 0.1503]) tensor([0.7005, 0.0637, 0.0777, 0.1582]) -Greedy action tensor([ 0.7471, -0.3673, -0.1345, 0.0930]) tensor([0.4421, 0.1450, 0.1831, 0.2298]) -Greedy action tensor([ 1.4174, -0.5570, -0.3511, -0.0022]) tensor([0.6446, 0.0895, 0.1100, 0.1559]) -Greedy action tensor([ 1.0775, -0.5808, -0.0943, 0.2618]) tensor([0.5148, 0.0980, 0.1595, 0.2277]) -Greedy action tensor([ 0.5026, -0.3555, -0.0696, -0.1575]) tensor([0.3992, 0.1692, 0.2253, 0.2063]) -Greedy action tensor([ 1.1828, -0.4926, -0.2574, -0.1637]) tensor([0.5937, 0.1112, 0.1406, 0.1545]) -Greedy action tensor([ 0.3464, -0.0846, -0.0603, -0.3156]) tensor([0.3532, 0.2295, 0.2352, 0.1822]) -Greedy action tensor([ 0.3971, -0.2326, -0.2973, 0.1777]) tensor([0.3527, 0.1879, 0.1761, 0.2832]) -Greedy action tensor([ 0.9815, -0.5434, -0.0919, -0.2105]) tensor([0.5367, 0.1168, 0.1835, 0.1630]) -Greedy action tensor([ 0.9932, -0.3767, -0.4221, -0.0987]) tensor([0.5457, 0.1387, 0.1325, 0.1831]) -Greedy action tensor([ 0.3526, -0.2882, -0.4578, 0.0621]) tensor([0.3677, 0.1937, 0.1635, 0.2750]) -Greedy action tensor([ 0.7056, -0.5135, -0.1755, 0.0842]) tensor([0.4450, 0.1315, 0.1844, 0.2391]) -Greedy action tensor([ 1.2427, -0.5389, -0.2139, 0.2572]) tensor([0.5635, 0.0949, 0.1313, 0.2103]) -Greedy action tensor([ 0.8503, 0.0944, -0.1021, 0.0076]) tensor([0.4375, 0.2054, 0.1688, 0.1883]) -Greedy action tensor([ 0.5097, -0.0861, 0.0611, -0.1795]) tensor([0.3715, 0.2047, 0.2372, 0.1865]) -Greedy action tensor([ 1.0575, -0.7322, -0.3941, 0.1741]) tensor([0.5511, 0.0920, 0.1291, 0.2278]) -Greedy action tensor([ 1.3352, -0.5323, -0.1633, 0.3531]) tensor([0.5706, 0.0882, 0.1275, 0.2137]) -Greedy action tensor([ 0.6566, -0.5437, -0.1301, 0.0132]) tensor([0.4382, 0.1320, 0.1995, 0.2303]) -Greedy action tensor([ 1.0808, -0.5505, -0.1470, 0.3829]) tensor([0.5035, 0.0985, 0.1475, 0.2505]) -Greedy action tensor([ 1.5961e+00, -3.7995e-01, -5.2941e-01, 4.8259e-04]) tensor([0.6846, 0.0949, 0.0817, 0.1388]) -Greedy action tensor([ 1.0940, -0.3840, -0.3936, 0.0201]) tensor([0.5569, 0.1270, 0.1258, 0.1903]) -Greedy action tensor([ 0.5370, -0.3176, 0.1625, -0.2139]) tensor([0.3869, 0.1646, 0.2660, 0.1826]) -Greedy action tensor([ 1.5140, -0.4385, -0.5050, -0.0764]) tensor([0.6763, 0.0960, 0.0898, 0.1379]) -Greedy action tensor([ 1.1641, -0.7220, -0.3609, 0.8611]) tensor([0.4744, 0.0719, 0.1032, 0.3504]) -Greedy action tensor([ 0.7609, -0.6117, -0.1238, 0.0183]) tensor([0.4668, 0.1183, 0.1927, 0.2221]) -Greedy action tensor([ 0.6459, -0.1501, -0.0554, -0.4308]) tensor([0.4371, 0.1972, 0.2168, 0.1489]) -Greedy action tensor([ 0.7527, -0.2295, -0.1486, -0.6905]) tensor([0.4959, 0.1857, 0.2013, 0.1171]) -Greedy action tensor([ 0.4868, -0.1043, -0.0516, -0.4267]) tensor([0.3939, 0.2181, 0.2299, 0.1580]) -Greedy action tensor([ 0.2414, -0.0206, 0.1577, -0.1798]) tensor([0.2989, 0.2300, 0.2749, 0.1962]) -Greedy action tensor([ 0.3689, -0.0323, 0.1533, -0.3354]) tensor([0.3367, 0.2254, 0.2714, 0.1665]) -Greedy action tensor([ 0.2474, 0.0420, 0.1636, -0.2870]) tensor([0.3012, 0.2453, 0.2770, 0.1765]) -Greedy action tensor([ 0.7635, -0.2173, -0.0489, -0.6158]) tensor([0.4830, 0.1811, 0.2143, 0.1216]) -Greedy action tensor([ 0.3640, 0.1965, 0.2009, -0.2510]) tensor([0.3090, 0.2614, 0.2625, 0.1671]) -Greedy action tensor([ 0.7275, -0.1612, -0.1718, -0.5252]) tensor([0.4753, 0.1954, 0.1934, 0.1358]) -Greedy action tensor([ 0.3902, 0.1368, 0.0603, -0.1123]) tensor([0.3225, 0.2504, 0.2319, 0.1952]) -Greedy action tensor([ 0.8574, -0.5752, -0.0587, -0.4465]) tensor([0.5235, 0.1250, 0.2094, 0.1421]) -Greedy action tensor([ 0.3849, 0.1310, 0.0509, -0.2036]) tensor([0.3282, 0.2546, 0.2350, 0.1822]) -Greedy action tensor([ 1.3519, -0.8133, 0.0706, -0.9021]) tensor([0.6678, 0.0766, 0.1854, 0.0701]) -Greedy action tensor([ 0.4096, -0.3403, -0.1255, -0.4444]) tensor([0.4026, 0.1902, 0.2358, 0.1714]) -Greedy action tensor([ 0.3512, 0.0868, 0.0893, -0.2576]) tensor([0.3245, 0.2491, 0.2498, 0.1765]) -Greedy action tensor([ 0.5024, -0.5042, -0.1289, -0.4333]) tensor([0.4367, 0.1596, 0.2323, 0.1713]) -Greedy action tensor([ 0.6208, -0.3165, 0.0482, -0.5620]) tensor([0.4421, 0.1731, 0.2493, 0.1354]) -Greedy action tensor([ 0.6305, -0.0741, 0.0565, -0.3173]) tensor([0.4090, 0.2022, 0.2304, 0.1585]) -Greedy action tensor([ 0.4404, -0.1041, 0.1271, -0.2633]) tensor([0.3564, 0.2067, 0.2605, 0.1763]) -Greedy action tensor([ 0.6477, -0.2683, 0.1948, -0.3955]) tensor([0.4187, 0.1675, 0.2662, 0.1475]) -Greedy action tensor([ 0.5635, -0.1139, -0.0108, -0.5614]) tensor([0.4174, 0.2120, 0.2351, 0.1355]) -Greedy action tensor([ 1.0248, -0.6120, -0.1019, -0.6045]) tensor([0.5832, 0.1135, 0.1890, 0.1143]) -Greedy action tensor([ 0.6150, -0.0136, -0.1211, -0.3089]) tensor([0.4151, 0.2214, 0.1988, 0.1648]) -Greedy action tensor([ 0.3084, 0.1485, 0.1915, -0.2323]) tensor([0.3008, 0.2564, 0.2676, 0.1752]) -Greedy action tensor([ 0.7674, -0.3729, -0.1184, -0.5252]) tensor([0.4983, 0.1593, 0.2055, 0.1368]) -Greedy action tensor([ 0.6144, -0.1326, 0.0248, -0.2703]) tensor([0.4096, 0.1941, 0.2272, 0.1691]) -Greedy action tensor([ 0.1740, 0.1052, 0.0636, -0.3388]) tensor([0.2917, 0.2723, 0.2612, 0.1747]) -Greedy action tensor([ 0.3612, 0.1558, -0.0944, -0.3467]) tensor([0.3400, 0.2769, 0.2156, 0.1675]) -Greedy action tensor([ 0.5471, -0.2368, 0.0193, -0.3765]) tensor([0.4092, 0.1869, 0.2414, 0.1625]) -Greedy action tensor([ 0.2348, 0.2352, -0.0660, -0.3341]) tensor([0.3024, 0.3025, 0.2239, 0.1712]) -Greedy action tensor([ 1.0024, -0.4256, 0.0354, -0.6274]) tensor([0.5507, 0.1320, 0.2094, 0.1079]) -Greedy action tensor([ 0.7221, -0.4871, -0.0787, -0.3477]) tensor([0.4783, 0.1428, 0.2148, 0.1641]) -Greedy action tensor([ 0.4597, -0.2417, -0.0461, -0.3589]) tensor([0.3937, 0.1952, 0.2374, 0.1736]) -Greedy action tensor([ 0.3270, 0.0033, 0.1008, -0.3161]) tensor([0.3282, 0.2375, 0.2618, 0.1725]) -Greedy action tensor([ 0.8663, -0.1443, 0.1665, -0.4769]) tensor([0.4713, 0.1716, 0.2341, 0.1230]) -Greedy action tensor([ 0.2044, -0.0100, 0.0359, -0.4182]) tensor([0.3136, 0.2531, 0.2650, 0.1683]) -Greedy action tensor([ 0.7627, -0.4577, -0.2175, -0.5386]) tensor([0.5148, 0.1519, 0.1932, 0.1401]) -Greedy action tensor([ 0.1932, -0.0037, 0.0246, -0.2974]) tensor([0.3050, 0.2505, 0.2577, 0.1868]) -Greedy action tensor([ 0.5670, -0.4360, 0.1265, -0.5737]) tensor([0.4292, 0.1574, 0.2763, 0.1372]) -Greedy action tensor([ 0.2722, -0.1124, 0.0093, -0.3351]) tensor([0.3340, 0.2273, 0.2568, 0.1819]) -Greedy action tensor([ 0.2243, 0.1067, 0.0241, -0.1638]) tensor([0.2953, 0.2626, 0.2418, 0.2004]) -Greedy action tensor([ 0.2039, -0.0700, 0.2266, -0.3129]) tensor([0.2959, 0.2250, 0.3027, 0.1765]) -Greedy action tensor([ 0.9388, -0.6561, -0.0044, -0.3365]) tensor([0.5343, 0.1084, 0.2080, 0.1493]) -Greedy action tensor([ 0.4577, -0.1300, 0.0171, -0.3935]) tensor([0.3808, 0.2116, 0.2451, 0.1626]) -Greedy action tensor([ 0.5147, -0.2758, -0.0845, -0.4086]) tensor([0.4166, 0.1890, 0.2289, 0.1655]) -Greedy action tensor([ 0.4272, -0.0873, 0.2113, -0.3784]) tensor([0.3508, 0.2097, 0.2827, 0.1568]) -Greedy action tensor([ 0.6653, -0.4399, -0.1652, -0.5697]) tensor([0.4860, 0.1609, 0.2118, 0.1413]) -Greedy action tensor([ 0.3666, -0.0728, -0.0187, -0.3165]) tensor([0.3534, 0.2277, 0.2404, 0.1785]) -Greedy action tensor([ 0.7786, -0.7890, -0.1172, -0.7597]) tensor([0.5460, 0.1139, 0.2229, 0.1172]) -Greedy action tensor([ 0.3980, 0.0404, 0.1649, -0.3361]) tensor([0.3365, 0.2354, 0.2666, 0.1615]) -Greedy action tensor([ 0.5863, -0.2670, 0.1340, -0.4400]) tensor([0.4131, 0.1760, 0.2628, 0.1480]) -Greedy action tensor([ 0.1172, 0.0968, 0.1229, -0.1426]) tensor([0.2662, 0.2608, 0.2677, 0.2053]) -Greedy action tensor([ 0.6247, -0.1613, -0.0260, -0.4085]) tensor([0.4286, 0.1953, 0.2236, 0.1525]) -Greedy action tensor([ 0.4908, 0.1257, -0.0309, -0.1701]) tensor([0.3566, 0.2476, 0.2117, 0.1842]) -Greedy action tensor([ 0.3362, -0.1026, 0.1571, -0.2212]) tensor([0.3275, 0.2112, 0.2738, 0.1876]) -Greedy action tensor([ 0.5475, 0.1250, -0.1402, -0.3224]) tensor([0.3880, 0.2543, 0.1951, 0.1626]) -Greedy action tensor([ 0.4824, -0.3828, -0.0278, -0.5306]) tensor([0.4194, 0.1766, 0.2518, 0.1523]) -Greedy action tensor([ 0.1592, 0.1050, 0.1984, -0.1061]) tensor([0.2664, 0.2523, 0.2770, 0.2043]) -Greedy action tensor([ 0.8066, -0.1832, -0.1115, -0.6004]) tensor([0.4961, 0.1844, 0.1981, 0.1215]) -Greedy action tensor([ 0.5210, -0.2813, 0.1043, -0.4415]) tensor([0.4017, 0.1801, 0.2648, 0.1534]) -Greedy action tensor([ 0.4677, 0.0930, 0.1710, -0.2794]) tensor([0.3443, 0.2367, 0.2559, 0.1631]) -Greedy action tensor([ 0.1740, 0.1203, 0.1207, -0.2073]) tensor([0.2794, 0.2648, 0.2649, 0.1908]) -Greedy action tensor([ 0.6657, -0.2523, -0.0609, -0.5097]) tensor([0.4563, 0.1822, 0.2206, 0.1409]) -Greedy action tensor([ 1.0845, -0.8552, -0.1201, -0.7112]) tensor([0.6213, 0.0893, 0.1863, 0.1031]) -Greedy action tensor([ 0.1685, 0.1326, 0.0378, -0.3365]) tensor([0.2902, 0.2800, 0.2547, 0.1751]) -Greedy action tensor([ 0.8989, -0.4239, -0.2329, -0.6696]) tensor([0.5564, 0.1482, 0.1794, 0.1159]) -Greedy action tensor([ 0.4231, -0.0197, 0.0662, -0.1190]) tensor([0.3420, 0.2197, 0.2394, 0.1989]) -Greedy action tensor([ 0.2893, 0.1022, 0.0660, -0.0552]) tensor([0.2996, 0.2485, 0.2396, 0.2123]) -Greedy action tensor([ 0.1815, 0.1158, 0.2033, -0.1120]) tensor([0.2700, 0.2528, 0.2759, 0.2013]) -Greedy action tensor([ 0.3891, 0.0642, 0.0138, -0.1207]) tensor([0.3322, 0.2400, 0.2283, 0.1995]) -Greedy action tensor([ 0.7360, -0.4069, 0.0841, -0.7768]) tensor([0.4854, 0.1548, 0.2529, 0.1069]) -Greedy action tensor([ 0.3253, 0.2172, 0.0924, -0.2615]) tensor([0.3081, 0.2765, 0.2441, 0.1713]) -Greedy action tensor([ 0.2487, -0.0724, -0.0901, -0.4824]) tensor([0.3425, 0.2485, 0.2441, 0.1649]) -Greedy action tensor([ 0.7262, -0.3869, 0.0180, -0.5886]) tensor([0.4786, 0.1572, 0.2357, 0.1285]) -Greedy action tensor([ 0.7282, -0.1721, -0.1335, -0.5958]) tensor([0.4773, 0.1940, 0.2016, 0.1270]) -Greedy action tensor([ 0.3608, 0.1356, 0.1848, -0.1230]) tensor([0.3074, 0.2454, 0.2578, 0.1895]) -Greedy action tensor([ 0.3707, -0.3412, -0.2063, -0.4142]) tensor([0.3987, 0.1956, 0.2239, 0.1819]) -Greedy action tensor([ 0.7795, -0.5470, 0.0846, -0.7842]) tensor([0.5066, 0.1345, 0.2529, 0.1061]) -Greedy action tensor([ 0.1417, 0.0384, 0.1715, -0.1442]) tensor([0.2715, 0.2448, 0.2797, 0.2040]) -Greedy action tensor([ 0.7633, -0.1053, -0.1240, -0.5276]) tensor([0.4748, 0.1992, 0.1955, 0.1306]) -Greedy action tensor([ 0.4937, -0.1973, 0.0841, -0.3607]) tensor([0.3860, 0.1934, 0.2563, 0.1643]) -Greedy action tensor([0.9613, 0.1588, 0.5408, 0.4730]) tensor([0.3678, 0.1649, 0.2416, 0.2257]) -Greedy action tensor([ 1.3663, -0.3370, 0.0376, 1.1625]) tensor([0.4420, 0.0805, 0.1171, 0.3605]) -Greedy action tensor([ 0.9184, 0.0566, -1.0512, 0.7139]) tensor([0.4207, 0.1777, 0.0587, 0.3429]) -Greedy action tensor([ 0.8726, -0.0099, 0.0556, 1.6323]) tensor([0.2504, 0.1036, 0.1106, 0.5353]) -Greedy action tensor([ 0.0206, 0.6206, 1.5882, -0.7904]) tensor([0.1240, 0.2260, 0.5948, 0.0551]) -Greedy action tensor([ 1.7793, -0.8532, -0.3097, 1.3160]) tensor([0.5480, 0.0394, 0.0678, 0.3448]) -Greedy action tensor([ 0.9675, -1.4042, 0.6889, 0.3509]) tensor([0.4184, 0.0390, 0.3167, 0.2259]) -Greedy action tensor([ 1.2133, -0.4803, 0.7537, 0.4517]) tensor([0.4382, 0.0806, 0.2767, 0.2046]) -Greedy action tensor([ 1.7866, -0.7864, -0.3420, 1.4582]) tensor([0.5221, 0.0398, 0.0621, 0.3759]) -Greedy action tensor([ 1.8453, -1.2010, -0.4251, 1.0886]) tensor([0.6173, 0.0293, 0.0638, 0.2896]) -Greedy action tensor([ 0.7755, -0.4978, 1.6843, 0.5052]) tensor([0.2210, 0.0619, 0.5484, 0.1687]) -Greedy action tensor([ 1.2134, -0.7544, -0.5115, 1.0519]) tensor([0.4611, 0.0644, 0.0822, 0.3923]) -Greedy action tensor([ 1.6452, -0.3606, 0.6437, 0.9717]) tensor([0.4971, 0.0669, 0.1826, 0.2535]) -Greedy action tensor([ 1.8892, -0.9213, -0.0698, 1.0303]) tensor([0.6155, 0.0370, 0.0868, 0.2607]) -Greedy action tensor([ 1.6538, -1.1966, 0.0600, 0.8112]) tensor([0.5912, 0.0342, 0.1201, 0.2546]) -Greedy action tensor([ 1.2250, -0.0882, 1.4257, 0.6513]) tensor([0.3274, 0.0880, 0.4001, 0.1845]) -Greedy action tensor([ 0.5125, -0.1701, 0.4622, -0.2918]) tensor([0.3444, 0.1740, 0.3275, 0.1541]) -Greedy action tensor([ 0.9487, 0.0777, 0.6598, -1.0270]) tensor([0.4336, 0.1815, 0.3248, 0.0601]) -Greedy action tensor([ 0.3467, -0.4530, 0.0989, 0.6832]) tensor([0.2755, 0.1238, 0.2150, 0.3857]) -Greedy action tensor([ 0.8582, -0.2417, 0.0162, 0.4787]) tensor([0.4085, 0.1360, 0.1760, 0.2795]) -Greedy action tensor([ 1.7500, -0.6253, 1.0190, 0.7435]) tensor([0.5155, 0.0479, 0.2482, 0.1884]) -Greedy action tensor([ 0.4401, -1.2116, 0.8645, 0.0404]) tensor([0.2949, 0.0565, 0.4508, 0.1977]) -Greedy action tensor([ 1.2584, -0.7873, 0.8158, 0.3092]) tensor([0.4632, 0.0599, 0.2976, 0.1793]) -Greedy action tensor([ 0.4489, -0.2451, -0.7502, 1.1542]) tensor([0.2614, 0.1306, 0.0788, 0.5292]) -Greedy action tensor([ 0.8899, -0.5088, 0.0348, 1.4737]) tensor([0.2886, 0.0713, 0.1227, 0.5174]) -Greedy action tensor([ 1.6713, -0.0270, 0.8873, 1.0435]) tensor([0.4601, 0.0842, 0.2101, 0.2456]) -Greedy action tensor([ 0.9847, -0.3371, -0.0167, 0.5369]) tensor([0.4399, 0.1173, 0.1616, 0.2811]) -Greedy action tensor([ 0.6874, 0.2087, -1.2875, 0.8642]) tensor([0.3388, 0.2099, 0.0470, 0.4043]) -Greedy action tensor([ 1.1276, -1.4776, 0.7873, 0.5267]) tensor([0.4285, 0.0317, 0.3049, 0.2350]) -Greedy action tensor([ 1.6050, 0.5776, -0.3105, -0.0436]) tensor([0.5891, 0.2109, 0.0868, 0.1133]) -Greedy action tensor([ 1.0024, 0.1496, -0.3016, -0.4504]) tensor([0.5177, 0.2207, 0.1405, 0.1211]) -Greedy action tensor([ 0.8227, -0.0929, -0.7421, 0.9995]) tensor([0.3568, 0.1428, 0.0746, 0.4258]) -Greedy action tensor([0.0451, 0.3290, 0.7572, 0.4286]) tensor([0.1714, 0.2277, 0.3494, 0.2515]) -Greedy action tensor([ 1.1729, -0.7675, 0.3495, -0.0116]) tensor([0.5295, 0.0761, 0.2324, 0.1620]) -Greedy action tensor([ 1.3158, -0.4364, 0.9987, 1.6210]) tensor([0.3069, 0.0532, 0.2235, 0.4164]) -Greedy action tensor([ 0.6925, -0.8589, 0.6445, 0.4691]) tensor([0.3373, 0.0715, 0.3215, 0.2697]) -Greedy action tensor([ 1.5589, -0.8500, -0.3416, 1.1070]) tensor([0.5331, 0.0479, 0.0797, 0.3393]) -Greedy action tensor([ 1.7412, 0.4908, -0.5416, 1.3313]) tensor([0.4873, 0.1396, 0.0497, 0.3234]) -Greedy action tensor([ 1.1489, -0.1852, 0.4089, 0.2445]) tensor([0.4662, 0.1228, 0.2224, 0.1887]) -Greedy action tensor([ 1.1029, 0.2418, -0.3058, 0.7898]) tensor([0.4170, 0.1762, 0.1019, 0.3049]) -Greedy action tensor([ 0.5535, -0.2977, 0.3429, 0.8137]) tensor([0.2830, 0.1208, 0.2292, 0.3671]) -Greedy action tensor([ 2.0236, -0.3650, 0.2038, 0.9840]) tensor([0.6221, 0.0571, 0.1008, 0.2200]) -Greedy action tensor([1.3454, 0.4767, 0.2018, 1.4516]) tensor([0.3509, 0.1472, 0.1118, 0.3902]) -Greedy action tensor([-0.2657, 0.7101, 0.9891, -0.6732]) tensor([0.1278, 0.3391, 0.4481, 0.0850]) -Greedy action tensor([ 1.4514, -1.1052, -0.3654, -0.1938]) tensor([0.6978, 0.0541, 0.1134, 0.1347]) -Greedy action tensor([ 1.3877, -0.6547, 0.9266, 0.7210]) tensor([0.4398, 0.0571, 0.2773, 0.2258]) -Greedy action tensor([ 1.5559, -0.1147, 0.1400, 1.6071]) tensor([0.4027, 0.0758, 0.0977, 0.4238]) -Greedy action tensor([1.5463, 0.6034, 0.3881, 0.5257]) tensor([0.4845, 0.1887, 0.1522, 0.1746]) -Greedy action tensor([ 1.3226, -0.9927, -0.8013, 0.5210]) tensor([0.5999, 0.0592, 0.0717, 0.2691]) -Greedy action tensor([1.5607, 0.3197, 0.7367, 1.5478]) tensor([0.3683, 0.1065, 0.1616, 0.3636]) -Greedy action tensor([1.6655, 0.0549, 0.6945, 0.6472]) tensor([0.5156, 0.1030, 0.1952, 0.1862]) -Greedy action tensor([ 0.9999, -0.5137, 1.8871, 0.8144]) tensor([0.2233, 0.0491, 0.5422, 0.1855]) -Greedy action tensor([ 1.8486, -0.3598, 0.5195, 0.7639]) tensor([0.5839, 0.0642, 0.1546, 0.1974]) -Greedy action tensor([ 1.1980, -0.2037, -0.9285, 1.5342]) tensor([0.3617, 0.0890, 0.0431, 0.5062]) -Greedy action tensor([ 2.0313, -0.0352, 0.1397, 1.3029]) tensor([0.5681, 0.0719, 0.0857, 0.2742]) -Greedy action tensor([1.0984, 0.2174, 0.8315, 0.9454]) tensor([0.3291, 0.1364, 0.2520, 0.2824]) -Greedy action tensor([ 1.8762, -0.0575, -0.2209, 1.1803]) tensor([0.5662, 0.0819, 0.0695, 0.2823]) -Greedy action tensor([ 0.2875, -1.1278, -0.8627, -0.0290]) tensor([0.4370, 0.1061, 0.1384, 0.3185]) -Greedy action tensor([ 1.7898, -0.1759, -0.0870, 0.7002]) tensor([0.6137, 0.0860, 0.0939, 0.2064]) -Greedy action tensor([ 1.9832, -0.2338, 0.4781, 1.5055]) tensor([0.5125, 0.0558, 0.1138, 0.3179]) -Greedy action tensor([1.1561, 0.5280, 0.7180, 1.1235]) tensor([0.3178, 0.1696, 0.2051, 0.3076]) -Greedy action tensor([ 0.4598, 0.2011, -0.3855, 0.6305]) tensor([0.2952, 0.2279, 0.1268, 0.3502]) -Greedy action tensor([1.8804, 0.5313, 1.1482, 1.0097]) tensor([0.4632, 0.1202, 0.2227, 0.1939]) -Greedy action tensor([ 1.4230, -0.3153, 1.1206, 0.9545]) tensor([0.3936, 0.0692, 0.2909, 0.2464]) -Greedy action tensor([ 1.1801, -0.0590, -0.2606, 0.6873]) tensor([0.4679, 0.1355, 0.1108, 0.2858]) -Greedy action tensor([ 1.3356, 0.4534, -0.1853, 0.3972]) tensor([0.4941, 0.2045, 0.1080, 0.1934]) -Greedy action tensor([ 0.7166, -1.1621, -0.0454, 0.1416]) tensor([0.4582, 0.0700, 0.2139, 0.2579]) -Greedy action tensor([ 0.9584, -0.8113, -0.2120, 1.1831]) tensor([0.3660, 0.0624, 0.1135, 0.4582]) -Greedy action tensor([ 2.0897, -0.6355, 0.1746, 1.1373]) tensor([0.6255, 0.0410, 0.0922, 0.2413]) -Greedy action tensor([ 0.4913, 0.1576, -1.4656, 0.7785]) tensor([0.3135, 0.2245, 0.0443, 0.4177]) -Greedy action tensor([ 1.3508, -0.4065, 0.4992, 0.8602]) tensor([0.4522, 0.0780, 0.1930, 0.2768]) -Greedy action tensor([ 1.7385, -1.3541, -0.1148, 0.4663]) tensor([0.6746, 0.0306, 0.1057, 0.1890]) -Greedy action tensor([0.4912, 0.4127, 0.0061, 0.7619]) tensor([0.2597, 0.2401, 0.1599, 0.3404]) -Greedy action tensor([ 1.6088, -0.1407, 0.1185, 1.9789]) tensor([0.3512, 0.0611, 0.0791, 0.5086]) -Greedy action tensor([ 1.4387, -0.4862, 0.3711, 0.9507]) tensor([0.4754, 0.0694, 0.1634, 0.2918]) -Greedy action tensor([ 0.8565, 0.1111, -0.1498, 0.2829]) tensor([0.4160, 0.1974, 0.1521, 0.2344]) -Greedy action tensor([ 1.6459, -0.0793, 0.8800, 1.0591]) tensor([0.4547, 0.0810, 0.2114, 0.2529]) -Greedy action tensor([ 2.0425, -0.4641, -0.1187, 0.7946]) tensor([0.6739, 0.0550, 0.0776, 0.1935]) -Greedy action tensor([ 0.8096, -0.0163, -0.5918, 1.0259]) tensor([0.3418, 0.1497, 0.0842, 0.4244]) -Greedy action tensor([ 0.6809, -0.5171, 0.4965, 0.5527]) tensor([0.3319, 0.1002, 0.2760, 0.2920]) -Greedy action tensor([ 1.7930, -0.4227, 0.2273, 1.1002]) tensor([0.5500, 0.0600, 0.1149, 0.2751]) -Greedy action tensor([ 0.0283, 0.4545, 0.0669, -0.0080]) tensor([0.2205, 0.3377, 0.2292, 0.2126]) -Greedy action tensor([ 0.6959, -0.3239, -0.3296, 0.4696]) tensor([0.3973, 0.1433, 0.1425, 0.3169]) -Greedy action tensor([ 0.8383, -0.5879, -0.3362, 0.1906]) tensor([0.4825, 0.1159, 0.1491, 0.2525]) -Greedy action tensor([ 0.8647, -0.6980, -0.1039, 0.2056]) tensor([0.4747, 0.0995, 0.1802, 0.2456]) -Greedy action tensor([ 0.7876, -0.5862, -0.1067, -0.1878]) tensor([0.4904, 0.1242, 0.2005, 0.1849]) -Greedy action tensor([ 1.0125, -0.8727, -0.6010, 0.2956]) tensor([0.5437, 0.0825, 0.1083, 0.2655]) -Greedy action tensor([ 1.5013, -0.4721, -0.3338, 0.3896]) tensor([0.6144, 0.0854, 0.0981, 0.2021]) -Greedy action tensor([ 0.5775, -0.2414, -0.1720, -0.1105]) tensor([0.4139, 0.1825, 0.1956, 0.2080]) -Greedy action tensor([ 1.0881, -0.5408, -0.0145, -0.0202]) tensor([0.5381, 0.1056, 0.1787, 0.1776]) -Greedy action tensor([ 0.9031, -0.6235, -0.7204, 0.0731]) tensor([0.5404, 0.1174, 0.1066, 0.2356]) -Greedy action tensor([ 0.6886, -0.6634, -0.2870, 0.0266]) tensor([0.4648, 0.1202, 0.1752, 0.2397]) -Greedy action tensor([ 0.5223, -0.3908, 0.0376, -0.1940]) tensor([0.3991, 0.1601, 0.2458, 0.1950]) -Greedy action tensor([ 1.4113, -0.6990, -0.1245, -0.0068]) tensor([0.6335, 0.0768, 0.1364, 0.1534]) -Greedy action tensor([ 1.5090, -0.3918, -0.5677, -0.1014]) tensor([0.6782, 0.1013, 0.0850, 0.1355]) -Greedy action tensor([ 0.9587, -0.4579, -0.2540, -0.0396]) tensor([0.5240, 0.1271, 0.1558, 0.1931]) -Greedy action tensor([ 1.3610, -0.4016, -0.3608, -0.0235]) tensor([0.6247, 0.1072, 0.1117, 0.1564]) -Greedy action tensor([ 1.1712, -0.5877, -0.0713, 0.1360]) tensor([0.5506, 0.0948, 0.1589, 0.1956]) -Greedy action tensor([ 0.9665, -0.4456, -0.4423, -0.1207]) tensor([0.5479, 0.1335, 0.1339, 0.1847]) -Greedy action tensor([ 0.9011, -0.7887, -0.4456, 0.2100]) tensor([0.5140, 0.0949, 0.1337, 0.2575]) -Greedy action tensor([ 1.1029, -0.2060, -0.1317, -0.1463]) tensor([0.5412, 0.1462, 0.1575, 0.1552]) -Greedy action tensor([ 0.8351, -0.5333, -0.2487, -0.1747]) tensor([0.5109, 0.1300, 0.1729, 0.1861]) -Greedy action tensor([ 1.3443, -0.7200, -0.5502, 0.4393]) tensor([0.5946, 0.0755, 0.0894, 0.2405]) -Greedy action tensor([ 1.3463, -0.6132, -0.3646, 1.0454]) tensor([0.4850, 0.0683, 0.0876, 0.3590]) -Greedy action tensor([ 1.3122, -0.5405, -0.1140, 0.1636]) tensor([0.5834, 0.0915, 0.1401, 0.1850]) -Greedy action tensor([ 1.0728, -0.4912, -0.0872, -0.1944]) tensor([0.5542, 0.1160, 0.1737, 0.1561]) -Greedy action tensor([ 1.5922, -0.0699, -0.1124, -0.2029]) tensor([0.6503, 0.1234, 0.1183, 0.1080]) -Greedy action tensor([ 0.6204, -0.4000, -0.3031, -0.2106]) tensor([0.4560, 0.1643, 0.1811, 0.1986]) -Greedy action tensor([ 1.0229, -0.4075, -0.6439, 0.9277]) tensor([0.4279, 0.1024, 0.0808, 0.3890]) -Greedy action tensor([ 1.0628, -0.5718, -0.0341, 0.1089]) tensor([0.5224, 0.1019, 0.1744, 0.2013]) -Greedy action tensor([ 1.2765, -0.7172, -0.3354, 0.4457]) tensor([0.5645, 0.0769, 0.1126, 0.2460]) -Greedy action tensor([ 1.4376, -0.4635, -0.4347, 0.0568]) tensor([0.6433, 0.0961, 0.0989, 0.1617]) -Greedy action tensor([ 0.7975, -0.3879, -0.1639, -0.1326]) tensor([0.4802, 0.1468, 0.1836, 0.1894]) -Greedy action tensor([ 0.8030, -0.5647, -0.1551, -0.1015]) tensor([0.4895, 0.1247, 0.1878, 0.1981]) -Greedy action tensor([ 1.6967, -0.5585, -0.4528, 0.2060]) tensor([0.6913, 0.0725, 0.0806, 0.1557]) -Greedy action tensor([ 1.3954, -0.5087, -0.4543, 0.0252]) tensor([0.6409, 0.0955, 0.1008, 0.1628]) -Greedy action tensor([ 0.8449, -0.4489, -0.7946, 0.5306]) tensor([0.4548, 0.1247, 0.0883, 0.3322]) -Greedy action tensor([ 0.8412, -0.3849, -0.3200, 0.1843]) tensor([0.4706, 0.1381, 0.1473, 0.2440]) -Greedy action tensor([ 1.4240, -0.8127, -0.6230, 0.3738]) tensor([0.6306, 0.0674, 0.0814, 0.2206]) -Greedy action tensor([ 0.8300, -0.8068, -0.3129, 0.2052]) tensor([0.4881, 0.0950, 0.1556, 0.2613]) -Greedy action tensor([ 0.8202, -0.3028, -0.4521, -0.2458]) tensor([0.5129, 0.1668, 0.1437, 0.1766]) -Greedy action tensor([ 1.0970, -0.6377, -0.4940, 0.6618]) tensor([0.4933, 0.0870, 0.1005, 0.3192]) -Greedy action tensor([ 0.5300, -0.4864, -0.3329, 0.1486]) tensor([0.4054, 0.1467, 0.1711, 0.2769]) -Greedy action tensor([ 0.2779, 0.0328, -0.0060, -0.2372]) tensor([0.3192, 0.2498, 0.2403, 0.1907]) -Greedy action tensor([ 0.6034, -0.2653, -0.3353, 0.1650]) tensor([0.4072, 0.1708, 0.1593, 0.2627]) -Greedy action tensor([ 1.1998, -0.5391, 0.1026, 0.2185]) tensor([0.5307, 0.0933, 0.1771, 0.1989]) -Greedy action tensor([ 0.7555, -0.4447, -0.2724, -0.0015]) tensor([0.4699, 0.1415, 0.1681, 0.2204]) -Greedy action tensor([ 0.8588, -0.5313, -0.6016, 0.0444]) tensor([0.5197, 0.1294, 0.1206, 0.2302]) -Greedy action tensor([ 0.2513, -0.2065, -0.4371, -0.0765]) tensor([0.3502, 0.2216, 0.1759, 0.2523]) -Greedy action tensor([ 0.4001, -0.3740, -0.0074, -0.0788]) tensor([0.3642, 0.1679, 0.2423, 0.2256]) -Greedy action tensor([ 0.7850, -0.3438, 0.1548, -0.2044]) tensor([0.4489, 0.1452, 0.2390, 0.1669]) -Greedy action tensor([ 1.4481, -0.4168, -0.4851, -0.0989]) tensor([0.6612, 0.1024, 0.0957, 0.1408]) -Greedy action tensor([ 1.1536, -0.5123, -0.1662, -0.1356]) tensor([0.5775, 0.1092, 0.1543, 0.1591]) -Greedy action tensor([ 0.6545, -0.4741, -0.4725, 0.2641]) tensor([0.4302, 0.1392, 0.1394, 0.2912]) -Greedy action tensor([ 0.5408, -0.2287, -0.6146, 0.3492]) tensor([0.3841, 0.1779, 0.1210, 0.3171]) -Greedy action tensor([ 1.2078, -0.4777, -0.2170, 0.3016]) tensor([0.5465, 0.1013, 0.1315, 0.2208]) -Greedy action tensor([ 0.8390, -0.5458, -0.0111, 0.0206]) tensor([0.4720, 0.1182, 0.2017, 0.2082]) -Greedy action tensor([ 1.1620, -0.5142, -0.2359, -0.1045]) tensor([0.5827, 0.1090, 0.1440, 0.1642]) -Greedy action tensor([ 1.0614, -0.5842, -0.4666, 0.2045]) tensor([0.5452, 0.1052, 0.1183, 0.2314]) -Greedy action tensor([ 0.7224, -0.5059, -0.1795, -0.1284]) tensor([0.4704, 0.1377, 0.1909, 0.2009]) -Greedy action tensor([ 0.5300, -0.4541, 0.0948, -0.1866]) tensor([0.3985, 0.1490, 0.2579, 0.1946]) -Greedy action tensor([ 1.2131, -0.3549, -0.0669, -0.0492]) tensor([0.5651, 0.1178, 0.1571, 0.1599]) -Greedy action tensor([ 0.9610, -0.6032, 0.0408, 0.2769]) tensor([0.4734, 0.0991, 0.1886, 0.2389]) -Greedy action tensor([ 0.7430, -0.3082, -0.4214, -0.3272]) tensor([0.4989, 0.1744, 0.1557, 0.1711]) -Greedy action tensor([ 0.5955, -0.4562, -0.2982, 0.3698]) tensor([0.3912, 0.1367, 0.1600, 0.3121]) -Greedy action tensor([ 1.1635, -0.3548, -0.1318, -0.2029]) tensor([0.5721, 0.1253, 0.1566, 0.1459]) -Greedy action tensor([ 0.7425, -0.4677, -0.1525, -0.1642]) tensor([0.4738, 0.1413, 0.1936, 0.1913]) -Greedy action tensor([ 0.3981, 0.0782, 0.0257, -0.3146]) tensor([0.3442, 0.2499, 0.2372, 0.1687]) -Greedy action tensor([ 0.9178, -0.5046, 0.0148, -0.2320]) tensor([0.5094, 0.1228, 0.2065, 0.1613]) -Greedy action tensor([ 0.4329, -0.1282, 0.1149, -0.1259]) tensor([0.3484, 0.1988, 0.2535, 0.1993]) -Greedy action tensor([ 0.9599, -0.5681, -0.0041, -0.1504]) tensor([0.5187, 0.1126, 0.1978, 0.1709]) -Greedy action tensor([ 0.8047, -0.4095, -0.1337, -0.0576]) tensor([0.4738, 0.1407, 0.1854, 0.2001]) -Greedy action tensor([ 1.0952, -0.4657, -0.1971, -0.2302]) tensor([0.5713, 0.1199, 0.1569, 0.1518]) -Greedy action tensor([ 1.3165, -0.3514, -0.2038, -0.1130]) tensor([0.6073, 0.1146, 0.1328, 0.1454]) -Greedy action tensor([ 1.0570, -0.4656, -0.1569, -0.2318]) tensor([0.5584, 0.1218, 0.1659, 0.1539]) -Greedy action tensor([ 0.8850, -0.5819, -0.0762, -0.0154]) tensor([0.4952, 0.1142, 0.1894, 0.2013]) -Greedy action tensor([ 0.8570, -0.4112, -0.5590, -0.2301]) tensor([0.5373, 0.1512, 0.1304, 0.1812]) -Greedy action tensor([ 0.9521, -0.4225, -0.2163, 0.0896]) tensor([0.5035, 0.1274, 0.1565, 0.2126]) -Greedy action tensor([ 0.5562, -0.0780, -0.0415, -0.2435]) tensor([0.3953, 0.2096, 0.2174, 0.1777]) -Greedy action tensor([ 1.3968, -0.6878, -0.6198, 0.3340]) tensor([0.6239, 0.0776, 0.0830, 0.2155]) -Greedy action tensor([ 1.2834, -0.6241, -0.2297, 0.2133]) tensor([0.5842, 0.0867, 0.1287, 0.2004]) -Greedy action tensor([ 1.0637, -0.4473, -0.4243, 0.4795]) tensor([0.4990, 0.1101, 0.1127, 0.2782]) -Greedy action tensor([ 0.5982, -0.5641, -0.1916, 0.1836]) tensor([0.4120, 0.1289, 0.1870, 0.2722]) -Greedy action tensor([-1.9871, -0.9467, 0.4317, -0.1054]) tensor([0.0462, 0.1309, 0.5194, 0.3035]) -Greedy action tensor([-1.3731, -0.5923, 0.4602, 0.4361]) tensor([0.0643, 0.1405, 0.4024, 0.3928]) -Greedy action tensor([-0.7641, -0.4212, 0.4137, -0.2213]) tensor([0.1355, 0.1910, 0.4402, 0.2333]) -Greedy action tensor([-1.3338, -0.6404, 0.3671, 0.1310]) tensor([0.0781, 0.1562, 0.4278, 0.3379]) -Greedy action tensor([-1.8320, -0.6842, 0.2278, -0.2712]) tensor([0.0597, 0.1880, 0.4681, 0.2842]) -Greedy action tensor([-1.8097, -0.6512, 0.3382, -0.3614]) tensor([0.0588, 0.1873, 0.5037, 0.2502]) -Greedy action tensor([-1.8466, -0.4218, 0.6599, -0.0354]) tensor([0.0425, 0.1766, 0.5210, 0.2599]) -Greedy action tensor([-1.6773, -0.5013, 1.0254, 0.7123]) tensor([0.0333, 0.1078, 0.4962, 0.3628]) -Greedy action tensor([-1.9521, -0.8439, 0.4940, -0.1181]) tensor([0.0458, 0.1387, 0.5287, 0.2867]) -Greedy action tensor([-1.1820, 0.0056, 0.4983, -0.4139]) tensor([0.0847, 0.2778, 0.4548, 0.1827]) -Greedy action tensor([-1.9049, -0.9852, 0.1703, -0.4006]) tensor([0.0626, 0.1570, 0.4986, 0.2817]) -Greedy action tensor([-0.6691, -0.5657, 0.2765, 0.4172]) tensor([0.1308, 0.1450, 0.3367, 0.3875]) -Greedy action tensor([-1.4426, -0.4878, 0.0192, -0.3993]) tensor([0.0930, 0.2417, 0.4013, 0.2641]) -Greedy action tensor([-2.0050, -0.9177, 0.3559, -0.1901]) tensor([0.0483, 0.1432, 0.5119, 0.2965]) -Greedy action tensor([-1.8969, -0.9091, 0.2534, -0.3041]) tensor([0.0582, 0.1562, 0.4995, 0.2861]) -Greedy action tensor([-1.6531, -0.5969, 0.4104, -0.3319]) tensor([0.0645, 0.1855, 0.5081, 0.2419]) -Greedy action tensor([-0.9081, -0.3330, 0.5199, -0.4696]) tensor([0.1177, 0.2092, 0.4907, 0.1824]) -Greedy action tensor([-0.8361, -0.7396, 0.6269, -0.0663]) tensor([0.1166, 0.1284, 0.5034, 0.2517]) -Greedy action tensor([-1.3677, -0.4783, 0.7278, -0.5283]) tensor([0.0721, 0.1754, 0.5858, 0.1668]) -Greedy action tensor([-1.9651, -0.5339, 0.8584, 0.1931]) tensor([0.0326, 0.1364, 0.5489, 0.2822]) -Greedy action tensor([-1.5254, -0.4864, 0.5228, -0.1753]) tensor([0.0648, 0.1831, 0.5023, 0.2499]) -Greedy action tensor([-1.2182, -0.5686, 0.3195, 0.2686]) tensor([0.0834, 0.1597, 0.3881, 0.3688]) -Greedy action tensor([-1.8571, -0.6333, 0.1758, -0.3357]) tensor([0.0602, 0.2046, 0.4596, 0.2756]) -Greedy action tensor([-1.0074, -0.5370, 0.3921, 0.1563]) tensor([0.1015, 0.1624, 0.4113, 0.3249]) -Greedy action tensor([-1.5580e+00, -5.2364e-01, 4.7281e-01, 1.6350e-04]) tensor([0.0618, 0.1738, 0.4709, 0.2935]) -Greedy action tensor([-1.9605, -0.6744, 0.9364, 0.2262]) tensor([0.0316, 0.1144, 0.5726, 0.2815]) -Greedy action tensor([-2.0470, -0.9241, 0.4763, -0.1240]) tensor([0.0428, 0.1314, 0.5333, 0.2925]) -Greedy action tensor([-0.5930, -0.5494, 0.1682, 0.2647]) tensor([0.1528, 0.1596, 0.3272, 0.3603]) -Greedy action tensor([-0.3599, 0.0250, 0.1270, 0.0855]) tensor([0.1767, 0.2597, 0.2876, 0.2759]) -Greedy action tensor([-1.9552, -0.6747, 0.3625, -0.2164]) tensor([0.0489, 0.1760, 0.4966, 0.2784]) -Greedy action tensor([-1.6909, -0.7415, 0.0490, -0.4212]) tensor([0.0779, 0.2012, 0.4436, 0.2772]) -Greedy action tensor([-1.5836, -0.5979, 0.5169, -0.0205]) tensor([0.0602, 0.1612, 0.4915, 0.2871]) -Greedy action tensor([-1.3833, -0.3526, 0.7470, 0.9064]) tensor([0.0453, 0.1269, 0.3810, 0.4468]) -Greedy action tensor([-1.2742, -0.1107, 0.4772, -0.4490]) tensor([0.0817, 0.2614, 0.4706, 0.1864]) -Greedy action tensor([-1.5410, -0.4442, 0.6638, 0.6085]) tensor([0.0462, 0.1384, 0.4190, 0.3965]) -Greedy action tensor([-1.8633, -0.9284, 0.1990, -0.4042]) tensor([0.0636, 0.1621, 0.5005, 0.2738]) -Greedy action tensor([-1.4174, -0.9672, 0.3618, -0.5319]) tensor([0.0916, 0.1437, 0.5427, 0.2220]) -Greedy action tensor([-2.0184, -0.9022, 0.5033, -0.1523]) tensor([0.0435, 0.1329, 0.5421, 0.2814]) -Greedy action tensor([-1.5272, -0.4824, 0.7810, -0.5239]) tensor([0.0601, 0.1710, 0.6048, 0.1640]) -Greedy action tensor([-1.8336, -0.5534, 0.6779, -0.0266]) tensor([0.0435, 0.1563, 0.5355, 0.2647]) -Greedy action tensor([-1.1867, -0.5497, 0.2830, 0.2029]) tensor([0.0889, 0.1680, 0.3864, 0.3567]) -Greedy action tensor([-1.6481, -0.7890, -0.1169, -0.4207]) tensor([0.0877, 0.2072, 0.4057, 0.2994]) -Greedy action tensor([-1.8174, -0.5311, 0.7763, 0.0476]) tensor([0.0409, 0.1480, 0.5471, 0.2640]) -Greedy action tensor([-1.1253, -0.4961, 0.3106, 0.5227]) tensor([0.0815, 0.1528, 0.3424, 0.4233]) -Greedy action tensor([-1.7867, -0.4776, 0.5715, -0.0633]) tensor([0.0479, 0.1774, 0.5064, 0.2684]) -Greedy action tensor([-1.6800e+00, -5.0193e-01, 5.2179e-01, -1.1059e-03]) tensor([0.0536, 0.1742, 0.4848, 0.2874]) -Greedy action tensor([-1.6148, -0.5302, 0.4811, 0.0465]) tensor([0.0576, 0.1704, 0.4686, 0.3034]) -Greedy action tensor([-2.0017, -0.6399, 0.9339, 0.2071]) tensor([0.0305, 0.1189, 0.5735, 0.2772]) -Greedy action tensor([-1.8550, -0.9690, 0.1644, -0.4071]) tensor([0.0657, 0.1594, 0.4952, 0.2796]) -Greedy action tensor([-0.8923, -0.5118, 0.4962, 0.2230]) tensor([0.1050, 0.1536, 0.4210, 0.3204]) -Greedy action tensor([-2.0009, -0.8004, 0.3850, -0.1573]) tensor([0.0465, 0.1544, 0.5053, 0.2938]) -Greedy action tensor([-2.0452e+00, -8.2206e-01, 6.3570e-01, -1.5833e-03]) tensor([0.0374, 0.1272, 0.5465, 0.2889]) -Greedy action tensor([-1.1410, -0.3365, 0.5386, 0.9796]) tensor([0.0590, 0.1320, 0.3167, 0.4922]) -Greedy action tensor([-0.7474, -0.5757, 0.2679, 0.3854]) tensor([0.1242, 0.1475, 0.3428, 0.3856]) -Greedy action tensor([-1.7167, -0.7930, 0.0217, -0.4186]) tensor([0.0777, 0.1957, 0.4420, 0.2846]) -Greedy action tensor([-1.7462, -0.7486, 0.1498, -0.3969]) tensor([0.0703, 0.1906, 0.4681, 0.2710]) -Greedy action tensor([-1.2055, -0.5885, 0.3373, 0.1090]) tensor([0.0889, 0.1647, 0.4156, 0.3308]) -Greedy action tensor([-1.7456, -0.5264, 0.8565, 0.3682]) tensor([0.0382, 0.1294, 0.5158, 0.3165]) -Greedy action tensor([-0.4133, -0.3716, 0.1910, 0.0471]) tensor([0.1832, 0.1910, 0.3353, 0.2904]) -Greedy action tensor([-0.8305, 0.0037, 0.3062, -0.1517]) tensor([0.1192, 0.2745, 0.3714, 0.2350]) -Greedy action tensor([-1.5197, -0.5502, 0.7216, -0.2215]) tensor([0.0599, 0.1578, 0.5630, 0.2193]) -Greedy action tensor([-1.8834, -0.5513, 0.2404, -0.2700]) tensor([0.0550, 0.2085, 0.4602, 0.2762]) -Greedy action tensor([-1.5372, 0.2621, 0.6016, -0.4640]) tensor([0.0542, 0.3275, 0.4599, 0.1584]) -Greedy action tensor([-1.7588, -0.9423, 0.2028, -0.4646]) tensor([0.0713, 0.1614, 0.5071, 0.2602]) -Greedy action tensor([-1.4853, -0.5522, 0.5945, 0.3013]) tensor([0.0571, 0.1452, 0.4569, 0.3408]) -Greedy action tensor([-1.3616, -0.5809, 0.3346, 0.2313]) tensor([0.0738, 0.1611, 0.4023, 0.3628]) -Greedy action tensor([-1.9362, -0.4378, 0.7482, 0.0173]) tensor([0.0368, 0.1646, 0.5390, 0.2595]) -Greedy action tensor([-1.9295, -0.5763, 0.4375, -0.1523]) tensor([0.0466, 0.1804, 0.4973, 0.2757]) -Greedy action tensor([-0.8345, -0.4243, 0.6842, 1.0290]) tensor([0.0740, 0.1115, 0.3377, 0.4768]) -Greedy action tensor([-1.6849, -0.6206, 0.5072, -0.1696]) tensor([0.0575, 0.1666, 0.5145, 0.2615]) -Greedy action tensor([-1.7884, -0.7974, 0.3330, -0.3073]) tensor([0.0608, 0.1639, 0.5076, 0.2676]) -Greedy action tensor([-0.5984, -0.5933, 0.1800, 0.2020]) tensor([0.1560, 0.1568, 0.3398, 0.3474]) -Greedy action tensor([-1.0588, -0.5413, 0.2423, 0.3624]) tensor([0.0953, 0.1599, 0.3501, 0.3947]) -Greedy action tensor([-1.9963, -0.6849, 0.8683, 0.1051]) tensor([0.0329, 0.1220, 0.5764, 0.2687]) -Greedy action tensor([-1.0023, -0.0507, -0.7166, -0.2313]) tensor([0.1412, 0.3657, 0.1879, 0.3053]) -Greedy action tensor([-1.6391, -0.6749, 0.3995, -0.4349]) tensor([0.0683, 0.1792, 0.5247, 0.2278]) -Greedy action tensor([-1.9899, -0.8677, 0.3847, -0.1088]) tensor([0.0468, 0.1437, 0.5027, 0.3069]) -Greedy action tensor([-1.7301, -1.0388, 0.0613, -0.6675]) tensor([0.0841, 0.1679, 0.5045, 0.2434]) -Greedy action tensor([-1.0458, -0.5414, 0.3210, 0.0486]) tensor([0.1045, 0.1731, 0.4101, 0.3123]) -Greedy action tensor([-1.9305, -0.4812, 0.8331, 0.1489]) tensor([0.0343, 0.1463, 0.5446, 0.2748]) -Greedy action tensor([-1.5308, -0.5108, 0.8970, 0.6613]) tensor([0.0416, 0.1153, 0.4711, 0.3721]) -Greedy action tensor([ 0.1677, 0.1219, -0.0246, -0.3172]) tensor([0.2945, 0.2813, 0.2429, 0.1813]) -Greedy action tensor([ 0.1728, 0.0918, 0.2201, -0.1464]) tensor([0.2705, 0.2494, 0.2836, 0.1966]) -Greedy action tensor([ 0.8404, -0.3810, -0.1286, -0.5062]) tensor([0.5169, 0.1524, 0.1962, 0.1345]) -Greedy action tensor([ 0.5186, -0.1371, -0.0894, -0.2092]) tensor([0.3927, 0.2038, 0.2138, 0.1897]) -Greedy action tensor([ 0.4753, 0.1247, 0.0138, -0.1771]) tensor([0.3502, 0.2467, 0.2207, 0.1824]) -Greedy action tensor([ 0.7667, -0.2071, -0.1527, -0.3974]) tensor([0.4788, 0.1808, 0.1909, 0.1495]) -Greedy action tensor([ 0.7648, -0.4133, -0.1036, -0.4708]) tensor([0.4955, 0.1525, 0.2079, 0.1440]) -Greedy action tensor([ 0.6626, -0.4147, -0.0568, -0.4616]) tensor([0.4646, 0.1582, 0.2263, 0.1510]) -Greedy action tensor([ 0.4323, -0.2628, -0.2035, -0.4397]) tensor([0.4087, 0.2040, 0.2164, 0.1709]) -Greedy action tensor([ 0.4160, 0.0531, 0.0923, -0.1244]) tensor([0.3331, 0.2318, 0.2410, 0.1941]) -Greedy action tensor([ 0.3319, 0.1014, 0.1922, -0.1146]) tensor([0.3027, 0.2404, 0.2632, 0.1937]) -Greedy action tensor([ 0.7545, -0.3996, -0.0079, -0.6447]) tensor([0.4929, 0.1554, 0.2300, 0.1217]) -Greedy action tensor([ 0.6108, -0.2483, -0.0091, -0.7397]) tensor([0.4503, 0.1907, 0.2423, 0.1167]) -Greedy action tensor([ 0.9946, -0.5716, -0.0508, -0.6060]) tensor([0.5675, 0.1185, 0.1995, 0.1145]) -Greedy action tensor([ 0.3653, 0.1871, -0.0055, -0.0376]) tensor([0.3130, 0.2619, 0.2160, 0.2092]) -Greedy action tensor([ 0.5904, 0.0774, -0.0169, -0.2107]) tensor([0.3857, 0.2310, 0.2102, 0.1731]) -Greedy action tensor([ 0.4516, -0.1015, 0.1140, -0.2271]) tensor([0.3577, 0.2057, 0.2552, 0.1814]) -Greedy action tensor([ 0.5068, -0.2342, 0.0657, -0.5376]) tensor([0.4045, 0.1928, 0.2603, 0.1424]) -Greedy action tensor([ 0.4488, -0.3487, 0.0953, -0.3635]) tensor([0.3851, 0.1735, 0.2704, 0.1709]) -Greedy action tensor([ 0.9045, -0.4947, -0.1980, -0.6536]) tensor([0.5588, 0.1379, 0.1856, 0.1177]) -Greedy action tensor([ 0.8791, -0.3826, -0.0902, -0.4345]) tensor([0.5178, 0.1466, 0.1964, 0.1392]) -Greedy action tensor([ 0.3203, 0.1400, 0.1308, -0.1575]) tensor([0.3046, 0.2544, 0.2520, 0.1889]) -Greedy action tensor([ 0.2716, 0.1134, 0.1905, -0.2151]) tensor([0.2950, 0.2518, 0.2720, 0.1813]) -Greedy action tensor([ 1.1196, -0.6096, -0.0307, -0.7374]) tensor([0.6060, 0.1075, 0.1918, 0.0946]) -Greedy action tensor([ 0.4429, -0.1669, 0.0469, -0.4384]) tensor([0.3801, 0.2066, 0.2558, 0.1575]) -Greedy action tensor([ 3.4053e-01, -1.6249e-01, -2.0035e-04, -4.6416e-01]) tensor([0.3619, 0.2188, 0.2574, 0.1619]) -Greedy action tensor([ 0.8429, -0.8472, 0.0118, -0.7270]) tensor([0.5470, 0.1009, 0.2383, 0.1138]) -Greedy action tensor([ 0.4209, -0.0124, -0.0509, -0.1909]) tensor([0.3553, 0.2304, 0.2217, 0.1927]) -Greedy action tensor([ 0.3572, 0.0961, 0.0020, -0.1773]) tensor([0.3271, 0.2519, 0.2293, 0.1917]) -Greedy action tensor([ 0.5894, -0.2989, 0.1529, -0.6495]) tensor([0.4260, 0.1752, 0.2753, 0.1234]) -Greedy action tensor([ 0.3398, -0.0590, 0.0387, -0.2327]) tensor([0.3361, 0.2256, 0.2487, 0.1896]) -Greedy action tensor([ 0.2545, 0.0972, 0.0616, -0.1374]) tensor([0.2981, 0.2547, 0.2458, 0.2014]) -Greedy action tensor([ 0.3834, -0.0530, 0.0575, -0.2027]) tensor([0.3419, 0.2210, 0.2468, 0.1903]) -Greedy action tensor([ 1.0709, -0.6882, -0.1099, -0.4656]) tensor([0.5902, 0.1016, 0.1812, 0.1270]) -Greedy action tensor([ 0.4828, -0.3556, 0.1356, -0.3575]) tensor([0.3890, 0.1682, 0.2749, 0.1679]) -Greedy action tensor([ 0.2699, -0.1015, 0.0995, -0.2046]) tensor([0.3169, 0.2186, 0.2673, 0.1972]) -Greedy action tensor([ 0.1313, -0.0225, 0.1037, -0.1767]) tensor([0.2805, 0.2405, 0.2729, 0.2061]) -Greedy action tensor([ 0.3437, 0.1134, 0.1690, -0.1722]) tensor([0.3095, 0.2458, 0.2599, 0.1848]) -Greedy action tensor([ 0.4125, 0.1859, 0.0580, -0.2461]) tensor([0.3315, 0.2643, 0.2326, 0.1716]) -Greedy action tensor([ 0.5882, -0.1509, -0.0953, -0.2771]) tensor([0.4161, 0.1987, 0.2101, 0.1751]) -Greedy action tensor([ 0.2540, 0.2610, 0.0649, -0.2254]) tensor([0.2895, 0.2916, 0.2396, 0.1793]) -Greedy action tensor([ 0.5267, -0.2192, 0.0719, -0.3933]) tensor([0.3988, 0.1892, 0.2531, 0.1589]) -Greedy action tensor([ 0.4588, 0.1640, -0.0435, 0.0057]) tensor([0.3350, 0.2494, 0.2027, 0.2129]) -Greedy action tensor([ 0.3209, -0.0248, 0.1978, -0.1777]) tensor([0.3126, 0.2212, 0.2764, 0.1899]) -Greedy action tensor([ 0.6605, -0.0912, -0.1193, -0.3525]) tensor([0.4361, 0.2056, 0.1999, 0.1584]) -Greedy action tensor([ 0.3247, 0.2365, 0.1675, -0.2279]) tensor([0.2989, 0.2737, 0.2554, 0.1720]) -Greedy action tensor([ 0.9836, -0.4763, 0.0035, -0.7421]) tensor([0.5601, 0.1301, 0.2102, 0.0997]) -Greedy action tensor([ 0.4724, 0.1181, 0.0918, -0.0395]) tensor([0.3351, 0.2351, 0.2290, 0.2008]) -Greedy action tensor([ 0.2481, 0.1415, 0.2489, -0.1436]) tensor([0.2797, 0.2514, 0.2799, 0.1890]) -Greedy action tensor([ 0.4636, 0.0620, 0.0901, -0.3615]) tensor([0.3577, 0.2394, 0.2462, 0.1567]) -Greedy action tensor([ 0.3743, -0.0145, 0.1401, -0.3940]) tensor([0.3409, 0.2311, 0.2698, 0.1581]) -Greedy action tensor([ 0.6165, -0.3139, -0.0319, -0.3244]) tensor([0.4334, 0.1709, 0.2266, 0.1691]) -Greedy action tensor([ 0.2823, -0.0973, 0.0521, -0.1814]) tensor([0.3218, 0.2202, 0.2556, 0.2024]) -Greedy action tensor([ 0.5133, -0.0040, 0.0934, -0.5083]) tensor([0.3827, 0.2281, 0.2514, 0.1378]) -Greedy action tensor([ 0.3338, 0.0808, 0.1620, -0.2028]) tensor([0.3122, 0.2424, 0.2629, 0.1825]) -Greedy action tensor([ 0.3712, 0.1702, 0.1580, -0.2717]) tensor([0.3173, 0.2595, 0.2564, 0.1668]) -Greedy action tensor([ 0.1789, 0.1618, 0.1477, -0.1626]) tensor([0.2730, 0.2684, 0.2646, 0.1940]) -Greedy action tensor([ 0.3488, 0.2276, -0.0657, 0.0791]) tensor([0.3021, 0.2676, 0.1996, 0.2307]) -Greedy action tensor([ 0.2939, -0.1117, 0.0795, -0.4372]) tensor([0.3384, 0.2256, 0.2731, 0.1629]) -Greedy action tensor([ 0.3993, 0.1513, 0.2022, -0.1824]) tensor([0.3164, 0.2469, 0.2598, 0.1769]) -Greedy action tensor([ 0.3329, 0.1948, 0.1553, -0.2339]) tensor([0.3053, 0.2659, 0.2556, 0.1732]) -Greedy action tensor([ 0.3038, 0.0765, 0.0356, -0.2806]) tensor([0.3206, 0.2554, 0.2452, 0.1787]) -Greedy action tensor([ 0.1060, -0.1402, 0.1740, -0.2077]) tensor([0.2791, 0.2182, 0.2987, 0.2040]) -Greedy action tensor([ 0.6316, -0.2455, 0.0914, -0.4701]) tensor([0.4290, 0.1785, 0.2500, 0.1426]) -Greedy action tensor([ 0.4656, -0.1753, 0.2195, -0.4602]) tensor([0.3697, 0.1948, 0.2890, 0.1465]) -Greedy action tensor([ 0.2100, 0.0148, 0.0506, -0.2417]) tensor([0.3019, 0.2484, 0.2575, 0.1922]) -Greedy action tensor([ 0.7627, -0.5404, -0.1433, -0.5969]) tensor([0.5174, 0.1406, 0.2091, 0.1329]) -Greedy action tensor([ 0.8163, -0.5097, -0.1534, -0.6695]) tensor([0.5345, 0.1419, 0.2027, 0.1210]) -Greedy action tensor([ 0.4948, -0.1403, 0.0052, -0.3293]) tensor([0.3874, 0.2053, 0.2374, 0.1699]) -Greedy action tensor([ 0.2545, 0.0103, -0.0586, -0.4004]) tensor([0.3296, 0.2582, 0.2410, 0.1712]) -Greedy action tensor([ 0.3316, 0.0318, 0.0165, -0.3193]) tensor([0.3342, 0.2476, 0.2439, 0.1743]) -Greedy action tensor([ 0.6632, -0.2335, 0.0438, -0.2343]) tensor([0.4248, 0.1733, 0.2287, 0.1732]) -Greedy action tensor([ 0.6230, -0.1854, -0.0833, -0.4773]) tensor([0.4402, 0.1961, 0.2172, 0.1465]) -Greedy action tensor([ 0.6715, -0.2489, -0.1049, -0.4142]) tensor([0.4553, 0.1814, 0.2095, 0.1538]) -Greedy action tensor([ 7.5050e-01, -7.8416e-01, -5.7417e-04, -8.4745e-01]) tensor([0.5292, 0.1141, 0.2497, 0.1071]) -Greedy action tensor([ 0.3588, 0.0822, 0.1391, -0.2416]) tensor([0.3216, 0.2439, 0.2581, 0.1764]) -Greedy action tensor([ 0.3454, -0.0899, 0.1140, -0.3812]) tensor([0.3420, 0.2213, 0.2713, 0.1654]) -Greedy action tensor([ 0.4116, -0.0489, -0.0172, -0.2707]) tensor([0.3587, 0.2263, 0.2336, 0.1813]) -Greedy action tensor([ 0.5192, -0.3250, -0.0200, -0.4681]) tensor([0.4192, 0.1802, 0.2445, 0.1562]) -Greedy action tensor([ 0.3228, 0.1366, 0.0809, -0.2627]) tensor([0.3153, 0.2617, 0.2475, 0.1755]) -Greedy action tensor([ 0.3005, 0.1155, -0.0449, -0.1917]) tensor([0.3174, 0.2638, 0.2247, 0.1940]) -Greedy action tensor([1.9584, 0.0573, 0.4999, 0.7965]) tensor([0.5900, 0.0881, 0.1372, 0.1846]) -Greedy action tensor([ 1.6566, 0.4203, 0.1533, -0.1320]) tensor([0.5952, 0.1729, 0.1324, 0.0995]) -Greedy action tensor([ 1.3945, -0.2058, 0.1053, 1.0482]) tensor([0.4578, 0.0924, 0.1261, 0.3238]) -Greedy action tensor([ 1.3686, -0.4231, -0.1646, 0.3623]) tensor([0.5721, 0.0953, 0.1235, 0.2091]) -Greedy action tensor([ 1.2216, -0.9216, 0.3325, 0.5242]) tensor([0.4935, 0.0579, 0.2029, 0.2457]) -Greedy action tensor([ 1.2525, 0.1422, -0.8117, 1.0920]) tensor([0.4332, 0.1427, 0.0550, 0.3690]) -Greedy action tensor([ 1.6180, -0.2205, 0.2330, 1.6888]) tensor([0.4028, 0.0641, 0.1008, 0.4323]) -Greedy action tensor([ 1.3299, -0.2832, 0.9801, 0.7773]) tensor([0.4033, 0.0804, 0.2842, 0.2321]) -Greedy action tensor([ 1.3372, -0.0833, 1.1754, 0.8896]) tensor([0.3661, 0.0885, 0.3114, 0.2340]) -Greedy action tensor([ 1.1641, -0.7737, 1.2141, 0.1409]) tensor([0.3914, 0.0564, 0.4115, 0.1407]) -Greedy action tensor([ 0.8107, -0.0563, -0.2504, 0.9535]) tensor([0.3425, 0.1439, 0.1185, 0.3951]) -Greedy action tensor([ 1.6753, -0.9559, -0.2801, 1.1139]) tensor([0.5606, 0.0404, 0.0793, 0.3197]) -Greedy action tensor([ 1.0601, -0.4805, 0.6696, 1.0994]) tensor([0.3412, 0.0731, 0.2309, 0.3549]) -Greedy action tensor([ 0.2095, 0.6451, -0.1122, 0.7071]) tensor([0.2034, 0.3145, 0.1475, 0.3346]) -Greedy action tensor([ 1.0570, -0.2606, -1.1488, 1.6161]) tensor([0.3198, 0.0856, 0.0352, 0.5594]) -Greedy action tensor([ 0.2412, -0.9581, 0.1896, 1.1480]) tensor([0.2115, 0.0638, 0.2009, 0.5238]) -Greedy action tensor([ 0.3378, 0.3170, -0.3729, 0.4250]) tensor([0.2808, 0.2750, 0.1379, 0.3063]) -Greedy action tensor([ 0.8498, -1.7785, -0.2756, 0.5801]) tensor([0.4629, 0.0334, 0.1502, 0.3535]) -Greedy action tensor([ 0.1549, -0.7158, 0.4967, 0.3100]) tensor([0.2504, 0.1048, 0.3524, 0.2924]) -Greedy action tensor([ 1.6126, -0.5862, 0.3664, 1.7375]) tensor([0.3950, 0.0438, 0.1136, 0.4476]) -Greedy action tensor([1.4244, 0.2181, 0.0069, 1.0157]) tensor([0.4533, 0.1357, 0.1098, 0.3012]) -Greedy action tensor([ 0.2815, 0.1272, -0.4104, 0.7202]) tensor([0.2559, 0.2193, 0.1281, 0.3968]) -Greedy action tensor([ 1.0576, -0.8009, 1.0791, 0.3304]) tensor([0.3758, 0.0586, 0.3840, 0.1816]) -Greedy action tensor([ 2.0726, -0.1096, -0.6380, 0.6988]) tensor([0.6981, 0.0787, 0.0464, 0.1767]) -Greedy action tensor([ 0.9757, -0.8361, 0.1073, 0.6565]) tensor([0.4330, 0.0707, 0.1817, 0.3146]) -Greedy action tensor([ 0.8853, 0.6864, -0.7835, 0.7608]) tensor([0.3459, 0.2835, 0.0652, 0.3054]) -Greedy action tensor([ 1.5437, -0.5167, 1.1636, 1.6402]) tensor([0.3434, 0.0437, 0.2348, 0.3781]) -Greedy action tensor([ 0.6427, -0.6850, 0.0111, 0.1350]) tensor([0.4169, 0.1105, 0.2217, 0.2509]) -Greedy action tensor([0.1117, 0.4707, 0.1415, 0.2956]) tensor([0.2144, 0.3070, 0.2209, 0.2577]) -Greedy action tensor([ 8.1413e-01, 6.9642e-04, -4.5149e-01, 6.5939e-01]) tensor([0.3873, 0.1717, 0.1092, 0.3318]) -Greedy action tensor([ 1.8788, -0.7807, -0.1716, 0.8046]) tensor([0.6492, 0.0454, 0.0836, 0.2218]) -Greedy action tensor([0.2484, 0.4109, 1.4859, 0.0422]) tensor([0.1553, 0.1827, 0.5355, 0.1264]) -Greedy action tensor([-0.2004, 0.1192, 1.5450, 0.3421]) tensor([0.1018, 0.1401, 0.5830, 0.1751]) -Greedy action tensor([ 1.6659, -0.5656, 0.9478, 0.9139]) tensor([0.4839, 0.0520, 0.2360, 0.2281]) -Greedy action tensor([ 2.0469, -1.3350, 0.4934, 1.9467]) tensor([0.4651, 0.0158, 0.0984, 0.4208]) -Greedy action tensor([ 1.1958, -1.5573, 0.7396, 1.0126]) tensor([0.3953, 0.0252, 0.2505, 0.3291]) -Greedy action tensor([ 1.5679, -0.0339, -0.9960, 0.7410]) tensor([0.5828, 0.1174, 0.0449, 0.2549]) -Greedy action tensor([ 2.1523, -0.3645, 0.4905, 0.9095]) tensor([0.6414, 0.0518, 0.1217, 0.1851]) -Greedy action tensor([ 0.7047, -0.1665, -0.0165, 1.4479]) tensor([0.2495, 0.1044, 0.1213, 0.5247]) -Greedy action tensor([ 1.7831, -0.7526, 0.2680, 1.4830]) tensor([0.4903, 0.0388, 0.1078, 0.3632]) -Greedy action tensor([0.4098, 0.6429, 0.5537, 0.2196]) tensor([0.2356, 0.2975, 0.2721, 0.1948]) -Greedy action tensor([ 0.3764, 0.7454, -0.5673, 0.4671]) tensor([0.2544, 0.3680, 0.0990, 0.2786]) -Greedy action tensor([1.6540, 0.7812, 0.9750, 1.2457]) tensor([0.3861, 0.1613, 0.1958, 0.2567]) -Greedy action tensor([ 1.7444, -0.5537, 1.0730, 1.6921]) tensor([0.3906, 0.0392, 0.1996, 0.3706]) -Greedy action tensor([ 1.2794, -0.3827, 0.6180, 1.5064]) tensor([0.3378, 0.0641, 0.1743, 0.4238]) -Greedy action tensor([ 1.2705, -0.1943, 0.2959, 1.2090]) tensor([0.3923, 0.0907, 0.1480, 0.3689]) -Greedy action tensor([ 1.2973, -0.4104, -0.1822, 1.3451]) tensor([0.4068, 0.0738, 0.0927, 0.4268]) -Greedy action tensor([ 1.1294, 0.9476, -1.2892, 0.4803]) tensor([0.4089, 0.3410, 0.0364, 0.2137]) -Greedy action tensor([ 0.7808, -1.0997, 0.2255, 0.8535]) tensor([0.3569, 0.0544, 0.2048, 0.3838]) -Greedy action tensor([ 0.9801, -0.5970, 0.6278, 0.7733]) tensor([0.3673, 0.0759, 0.2582, 0.2987]) -Greedy action tensor([0.7916, 0.6574, 0.0446, 0.9012]) tensor([0.2887, 0.2524, 0.1368, 0.3221]) -Greedy action tensor([ 0.6478, 0.0667, -0.2738, 0.4772]) tensor([0.3571, 0.1997, 0.1421, 0.3011]) -Greedy action tensor([ 1.0149, -0.8110, -1.1352, 1.4629]) tensor([0.3518, 0.0567, 0.0410, 0.5506]) -Greedy action tensor([ 2.3039, -0.3681, 0.7019, 1.7552]) tensor([0.5410, 0.0374, 0.1090, 0.3126]) -Greedy action tensor([ 0.9867, 0.2601, -0.0766, -0.2963]) tensor([0.4748, 0.2296, 0.1640, 0.1316]) -Greedy action tensor([ 2.0552, -0.1168, 0.7648, 1.7728]) tensor([0.4666, 0.0532, 0.1284, 0.3518]) -Greedy action tensor([ 0.7568, -0.0947, -0.8714, 0.6136]) tensor([0.4017, 0.1714, 0.0788, 0.3481]) -Greedy action tensor([ 0.8084, -0.0685, -0.6741, 0.3853]) tensor([0.4351, 0.1810, 0.0988, 0.2850]) -Greedy action tensor([1.4244, 0.1092, 0.1000, 1.8341]) tensor([0.3289, 0.0883, 0.0875, 0.4954]) -Greedy action tensor([ 0.5658, 1.2376, -0.2323, 0.2334]) tensor([0.2424, 0.4746, 0.1091, 0.1739]) -Greedy action tensor([ 1.5429, -0.9125, 0.4572, 1.0904]) tensor([0.4856, 0.0417, 0.1639, 0.3088]) -Greedy action tensor([ 1.3528, 0.1076, -1.2717, 1.4321]) tensor([0.4093, 0.1178, 0.0297, 0.4432]) -Greedy action tensor([1.7160, 0.2402, 1.4295, 0.6976]) tensor([0.4272, 0.0977, 0.3208, 0.1543]) -Greedy action tensor([ 1.1086, -0.1536, 0.8735, 0.2277]) tensor([0.4019, 0.1138, 0.3177, 0.1666]) -Greedy action tensor([ 1.1800, -1.2192, 0.8338, 0.4855]) tensor([0.4353, 0.0395, 0.3079, 0.2173]) -Greedy action tensor([ 1.6087, -0.4304, 1.4279, 0.8793]) tensor([0.4087, 0.0532, 0.3411, 0.1971]) -Greedy action tensor([-0.0024, 0.7014, -0.9794, 0.8107]) tensor([0.1769, 0.3576, 0.0666, 0.3989]) -Greedy action tensor([ 0.8650, 0.5236, -1.0499, 0.8423]) tensor([0.3526, 0.2507, 0.0520, 0.3447]) -Greedy action tensor([ 1.3798, 0.3807, -0.5670, 0.1033]) tensor([0.5587, 0.2057, 0.0797, 0.1559]) -Greedy action tensor([ 1.2669, -0.1307, 0.2652, 0.8645]) tensor([0.4380, 0.1083, 0.1609, 0.2929]) -Greedy action tensor([ 1.7977, -0.1893, 0.7980, 0.9719]) tensor([0.5147, 0.0706, 0.1894, 0.2254]) -Greedy action tensor([ 1.4959, -1.1392, 1.4911, 0.8567]) tensor([0.3854, 0.0276, 0.3836, 0.2034]) -Greedy action tensor([ 1.4767, -1.3977, 0.0213, 0.5151]) tensor([0.5981, 0.0338, 0.1395, 0.2286]) -Greedy action tensor([ 1.5260, -0.3781, -0.1686, 0.9956]) tensor([0.5206, 0.0775, 0.0956, 0.3063]) -Greedy action tensor([1.1491, 0.3619, 0.6426, 0.7729]) tensor([0.3644, 0.1658, 0.2196, 0.2502]) -Greedy action tensor([ 2.0359, -0.8681, 0.4838, 0.7692]) tensor([0.6458, 0.0354, 0.1368, 0.1820]) -Greedy action tensor([ 1.2272, -1.6711, 0.6131, 0.8622]) tensor([0.4366, 0.0241, 0.2362, 0.3031]) -Greedy action tensor([ 1.0427, -0.0887, 0.3471, 0.9142]) tensor([0.3703, 0.1194, 0.1847, 0.3256]) -Greedy action tensor([ 1.2185, 0.5915, -0.3786, -0.1279]) tensor([0.5008, 0.2675, 0.1014, 0.1303]) -Greedy action tensor([ 0.7322, -0.4625, 1.2308, 1.0077]) tensor([0.2344, 0.0710, 0.3859, 0.3087]) -Greedy action tensor([ 0.6514, 0.7177, -0.3777, 0.8867]) tensor([0.2709, 0.2895, 0.0968, 0.3428]) -Greedy action tensor([ 1.2138, -0.7328, 0.3012, 1.1773]) tensor([0.3987, 0.0569, 0.1601, 0.3844]) -Greedy action tensor([ 0.8151, -0.7933, -0.4059, 0.0783]) tensor([0.5067, 0.1014, 0.1494, 0.2425]) -Greedy action tensor([ 1.5480, -0.3511, -0.5806, -0.0637]) tensor([0.6811, 0.1020, 0.0811, 0.1359]) -Greedy action tensor([ 0.9554, -0.7122, -0.2159, 0.1704]) tensor([0.5116, 0.0965, 0.1586, 0.2333]) -Greedy action tensor([ 1.2710, -0.6472, -0.3008, 0.3061]) tensor([0.5762, 0.0846, 0.1197, 0.2195]) -Greedy action tensor([ 1.2935, -0.8161, -0.5957, 0.6748]) tensor([0.5521, 0.0670, 0.0835, 0.2974]) -Greedy action tensor([ 1.1535, -0.6283, -0.4448, -0.0649]) tensor([0.6001, 0.1010, 0.1214, 0.1775]) -Greedy action tensor([ 0.7199, -0.4764, -0.6812, 0.4025]) tensor([0.4392, 0.1328, 0.1082, 0.3198]) -Greedy action tensor([ 0.5212, -0.4019, -0.8731, 0.1659]) tensor([0.4262, 0.1693, 0.1057, 0.2988]) -Greedy action tensor([ 0.9948, -0.5355, -0.4592, 0.6720]) tensor([0.4599, 0.0996, 0.1075, 0.3330]) -Greedy action tensor([ 0.7288, -0.6706, -0.2053, -0.0695]) tensor([0.4785, 0.1181, 0.1880, 0.2154]) -Greedy action tensor([ 0.7864, -0.2246, -0.4263, -0.1035]) tensor([0.4826, 0.1756, 0.1435, 0.1982]) -Greedy action tensor([ 1.0361, -0.6258, -0.1133, -0.0854]) tensor([0.5457, 0.1036, 0.1729, 0.1778]) -Greedy action tensor([ 1.4285, -0.8254, -0.3549, -0.1447]) tensor([0.6755, 0.0709, 0.1135, 0.1401]) -Greedy action tensor([ 1.0252, -0.5774, -0.4226, 0.2046]) tensor([0.5329, 0.1073, 0.1253, 0.2346]) -Greedy action tensor([ 1.1285, -0.3693, -0.2889, 0.0212]) tensor([0.5567, 0.1245, 0.1349, 0.1839]) -Greedy action tensor([ 0.9130, -0.4267, 0.0935, 0.0698]) tensor([0.4689, 0.1228, 0.2066, 0.2018]) -Greedy action tensor([ 1.4170, -0.4346, -0.4502, -0.0143]) tensor([0.6449, 0.1012, 0.0997, 0.1541]) -Greedy action tensor([ 0.9161, -0.5478, -0.5726, 0.6314]) tensor([0.4526, 0.1047, 0.1021, 0.3405]) -Greedy action tensor([ 0.9087, -0.5289, -0.8339, 0.0972]) tensor([0.5386, 0.1279, 0.0943, 0.2392]) -Greedy action tensor([ 0.6911, -0.4256, -0.2758, 0.0982]) tensor([0.4424, 0.1448, 0.1682, 0.2445]) -Greedy action tensor([ 0.7413, -0.7280, -0.3451, -0.0436]) tensor([0.4941, 0.1137, 0.1667, 0.2254]) -Greedy action tensor([ 1.2441, -0.7155, -0.4470, 0.3153]) tensor([0.5813, 0.0819, 0.1071, 0.2296]) -Greedy action tensor([ 1.3150, -0.4185, -0.3379, -0.1936]) tensor([0.6292, 0.1112, 0.1205, 0.1392]) -Greedy action tensor([ 1.1649, -0.5815, -0.6306, 0.6015]) tensor([0.5236, 0.0913, 0.0869, 0.2981]) -Greedy action tensor([ 0.6987, -0.4851, -0.5031, 0.0476]) tensor([0.4699, 0.1438, 0.1413, 0.2450]) -Greedy action tensor([ 0.4763, -0.5265, -0.0934, 0.0515]) tensor([0.3866, 0.1418, 0.2187, 0.2528]) -Greedy action tensor([ 0.5135, -0.4237, -0.2205, 0.0168]) tensor([0.4032, 0.1579, 0.1935, 0.2453]) -Greedy action tensor([ 1.1215, -0.5891, -0.0374, -0.1015]) tensor([0.5590, 0.1010, 0.1754, 0.1645]) -Greedy action tensor([ 1.0961, -0.6144, -0.1006, -0.1715]) tensor([0.5667, 0.1025, 0.1713, 0.1595]) -Greedy action tensor([ 0.8385, -0.4813, 0.1739, 0.0187]) tensor([0.4500, 0.1202, 0.2315, 0.1982]) -Greedy action tensor([ 1.4934, -0.8476, -0.2819, 0.5998]) tensor([0.5971, 0.0575, 0.1012, 0.2443]) -Greedy action tensor([ 0.7828, -0.4784, -0.1814, 0.1433]) tensor([0.4562, 0.1292, 0.1739, 0.2407]) -Greedy action tensor([ 0.7132, -0.3614, 0.1847, -0.1023]) tensor([0.4214, 0.1439, 0.2484, 0.1864]) -Greedy action tensor([ 0.8665, -0.4204, -0.3045, -0.3145]) tensor([0.5282, 0.1459, 0.1638, 0.1622]) -Greedy action tensor([ 0.7993, -0.7924, -0.3456, 0.0389]) tensor([0.5027, 0.1023, 0.1600, 0.2350]) -Greedy action tensor([ 1.4210, -0.3879, -0.1489, 0.0603]) tensor([0.6141, 0.1006, 0.1278, 0.1575]) -Greedy action tensor([ 1.0604, -0.6264, -0.5838, 0.6476]) tensor([0.4902, 0.0907, 0.0947, 0.3244]) -Greedy action tensor([ 1.2862, -0.7234, -0.6495, -0.0751]) tensor([0.6516, 0.0873, 0.0940, 0.1670]) -Greedy action tensor([ 0.9597, -0.5593, -0.4285, 0.3347]) tensor([0.4991, 0.1093, 0.1245, 0.2671]) -Greedy action tensor([ 0.9779, -0.3600, 0.0456, -0.2683]) tensor([0.5145, 0.1350, 0.2025, 0.1480]) -Greedy action tensor([ 0.5425, -0.4621, -0.1180, 0.1019]) tensor([0.3958, 0.1450, 0.2045, 0.2548]) -Greedy action tensor([ 0.8099, -0.4264, -0.1995, -0.0530]) tensor([0.4815, 0.1399, 0.1755, 0.2032]) -Greedy action tensor([ 1.2570, -0.5650, -0.5202, 0.4543]) tensor([0.5621, 0.0909, 0.0951, 0.2519]) -Greedy action tensor([ 0.8464, -0.2512, -0.2296, 0.0409]) tensor([0.4714, 0.1573, 0.1607, 0.2106]) -Greedy action tensor([ 1.1703, -0.1138, -0.0047, -0.2338]) tensor([0.5461, 0.1512, 0.1686, 0.1341]) -Greedy action tensor([ 1.2876, -0.5794, -0.3310, 0.2478]) tensor([0.5861, 0.0906, 0.1161, 0.2072]) -Greedy action tensor([ 0.7507, -0.3831, 0.1085, -0.0885]) tensor([0.4386, 0.1411, 0.2308, 0.1895]) -Greedy action tensor([ 0.8952, -0.7079, -0.3865, 0.2310]) tensor([0.5016, 0.1010, 0.1392, 0.2582]) -Greedy action tensor([ 1.9585, -0.3158, -0.4326, 0.1798]) tensor([0.7335, 0.0755, 0.0671, 0.1239]) -Greedy action tensor([ 0.7991, -0.3946, -0.0742, -0.0438]) tensor([0.4649, 0.1409, 0.1941, 0.2001]) -Greedy action tensor([ 0.6030, -0.2747, 0.2952, -0.2443]) tensor([0.3877, 0.1612, 0.2850, 0.1662]) -Greedy action tensor([ 0.7417, -0.4288, -0.8385, 0.4347]) tensor([0.4441, 0.1378, 0.0915, 0.3267]) -Greedy action tensor([ 0.8843, -0.3839, -0.4647, 0.5473]) tensor([0.4435, 0.1248, 0.1151, 0.3166]) -Greedy action tensor([ 1.2074, -0.6218, -0.4870, 0.1688]) tensor([0.5889, 0.0945, 0.1082, 0.2084]) -Greedy action tensor([ 0.7792, -0.4517, -0.1485, 0.3600]) tensor([0.4264, 0.1245, 0.1686, 0.2804]) -Greedy action tensor([ 0.8227, -0.5429, -0.4119, 0.3400]) tensor([0.4623, 0.1180, 0.1345, 0.2853]) -Greedy action tensor([ 1.1481, -0.4519, -0.4064, -0.1836]) tensor([0.5962, 0.1204, 0.1260, 0.1574]) -Greedy action tensor([ 0.9085, -0.2492, -0.5337, 0.2167]) tensor([0.4875, 0.1532, 0.1153, 0.2441]) -Greedy action tensor([ 0.5210, -0.2321, -0.7256, 0.2367]) tensor([0.3982, 0.1875, 0.1145, 0.2997]) -Greedy action tensor([ 1.0293, -0.7097, -0.1675, 0.4912]) tensor([0.4850, 0.0852, 0.1466, 0.2832]) -Greedy action tensor([ 1.3487, -0.3228, -0.2217, 0.0364]) tensor([0.6006, 0.1129, 0.1249, 0.1617]) -Greedy action tensor([ 0.8039, -0.5400, -0.3805, 0.4069]) tensor([0.4466, 0.1165, 0.1366, 0.3003]) -Greedy action tensor([ 1.6669, -0.4529, -0.1256, 0.3271]) tensor([0.6458, 0.0775, 0.1076, 0.1691]) -Greedy action tensor([ 1.1895, -0.2428, -0.1953, -0.1233]) tensor([0.5688, 0.1358, 0.1424, 0.1530]) -Greedy action tensor([ 0.8235, -0.2630, -0.2267, 0.0431]) tensor([0.4661, 0.1573, 0.1631, 0.2136]) -Greedy action tensor([ 1.2400, -0.3179, -0.2667, 0.1781]) tensor([0.5624, 0.1184, 0.1247, 0.1945]) -Greedy action tensor([ 0.6766, -0.4437, -0.1628, -0.0773]) tensor([0.4487, 0.1464, 0.1938, 0.2111]) -Greedy action tensor([ 1.3616, -0.9022, -0.7120, 0.6805]) tensor([0.5761, 0.0599, 0.0724, 0.2916]) -Greedy action tensor([ 0.6979, -0.2636, -0.6224, 0.6804]) tensor([0.3799, 0.1453, 0.1015, 0.3733]) -Greedy action tensor([ 0.9485, -0.6014, -0.1823, -0.0304]) tensor([0.5234, 0.1111, 0.1689, 0.1966]) -Greedy action tensor([ 0.7171, -0.3864, -0.3163, 0.2802]) tensor([0.4285, 0.1421, 0.1525, 0.2769]) -Greedy action tensor([ 1.1730, -0.0736, -0.0701, -0.1397]) tensor([0.5420, 0.1558, 0.1564, 0.1458]) -Greedy action tensor([ 0.9908, -0.6797, -0.4769, 0.2893]) tensor([0.5223, 0.0983, 0.1204, 0.2590]) -Greedy action tensor([ 0.4656, -0.4553, 0.1790, -0.1355]) tensor([0.3708, 0.1476, 0.2784, 0.2033]) -Greedy action tensor([ 1.4504, -0.6298, -0.2688, -0.0549]) tensor([0.6553, 0.0818, 0.1174, 0.1454]) -Greedy action tensor([ 1.1968, -0.4129, -0.3327, 0.0196]) tensor([0.5798, 0.1159, 0.1256, 0.1787]) -Greedy action tensor([ 1.2543, -0.4042, -0.2773, -0.0828]) tensor([0.5991, 0.1141, 0.1295, 0.1573]) -Greedy action tensor([ 0.8942, -0.2877, -0.5571, -0.2547]) tensor([0.5382, 0.1651, 0.1261, 0.1706]) -Greedy action tensor([ 1.0480, -0.5336, -0.2273, 0.0815]) tensor([0.5361, 0.1102, 0.1497, 0.2039]) -Greedy action tensor([ 0.5742, -0.3125, -0.0416, 0.0433]) tensor([0.3936, 0.1622, 0.2127, 0.2315]) -Greedy action tensor([ 1.6665, -0.4594, -0.3148, 0.1640]) tensor([0.6758, 0.0806, 0.0932, 0.1504]) -Greedy action tensor([ 0.3656, 0.1430, 0.1674, -0.2404]) tensor([0.3158, 0.2528, 0.2591, 0.1723]) -Greedy action tensor([ 0.5932, -0.1612, 0.0634, -0.3240]) tensor([0.4067, 0.1913, 0.2394, 0.1625]) -Greedy action tensor([ 0.4990, -0.3654, 0.2261, -0.5048]) tensor([0.3923, 0.1653, 0.2986, 0.1438]) -Greedy action tensor([ 0.4624, -0.1117, 0.0917, -0.2759]) tensor([0.3661, 0.2062, 0.2527, 0.1750]) -Greedy action tensor([ 0.6192, -0.1065, -0.0760, -0.3909]) tensor([0.4261, 0.2062, 0.2126, 0.1552]) -Greedy action tensor([ 0.2677, 0.2107, 0.1227, -0.1713]) tensor([0.2895, 0.2735, 0.2504, 0.1866]) -Greedy action tensor([ 0.5011, -0.1340, 0.0077, -0.3496]) tensor([0.3895, 0.2064, 0.2378, 0.1664]) -Greedy action tensor([ 0.3744, -0.1902, 0.0873, -0.3941]) tensor([0.3594, 0.2043, 0.2697, 0.1666]) -Greedy action tensor([ 0.3793, -0.1232, 0.1431, -0.2936]) tensor([0.3442, 0.2083, 0.2718, 0.1757]) -Greedy action tensor([ 0.3170, 0.0602, 0.1054, -0.2493]) tensor([0.3174, 0.2455, 0.2569, 0.1802]) -Greedy action tensor([ 0.8622, -0.6798, 0.0534, -0.7788]) tensor([0.5396, 0.1155, 0.2403, 0.1046]) -Greedy action tensor([ 0.4558, 0.0410, 0.0042, -0.2400]) tensor([0.3577, 0.2363, 0.2277, 0.1784]) -Greedy action tensor([ 0.6116, -0.4032, 0.0436, -0.7179]) tensor([0.4558, 0.1652, 0.2583, 0.1206]) -Greedy action tensor([ 0.4101, -0.2192, 0.2420, -0.4261]) tensor([0.3557, 0.1896, 0.3006, 0.1541]) -Greedy action tensor([ 0.1930, -0.0011, 0.1116, -0.3186]) tensor([0.2990, 0.2462, 0.2756, 0.1792]) -Greedy action tensor([ 0.3821, 0.1198, 0.1394, -0.0344]) tensor([0.3112, 0.2394, 0.2442, 0.2052]) -Greedy action tensor([ 0.5856, -0.4075, -0.0138, -0.4585]) tensor([0.4402, 0.1631, 0.2417, 0.1550]) -Greedy action tensor([ 0.2757, 0.0984, 0.0316, -0.3429]) tensor([0.3165, 0.2651, 0.2479, 0.1705]) -Greedy action tensor([ 0.7346, -0.1777, -0.0554, -0.4190]) tensor([0.4606, 0.1850, 0.2091, 0.1453]) -Greedy action tensor([ 0.4339, -0.3315, 0.1147, -0.5204]) tensor([0.3880, 0.1805, 0.2820, 0.1494]) -Greedy action tensor([ 0.2053, 0.0491, 0.0493, -0.1967]) tensor([0.2959, 0.2531, 0.2531, 0.1979]) -Greedy action tensor([ 0.4038, -0.0198, -0.0509, -0.3129]) tensor([0.3600, 0.2357, 0.2285, 0.1758]) -Greedy action tensor([ 0.3962, 0.0867, 0.0414, -0.1861]) tensor([0.3340, 0.2451, 0.2343, 0.1866]) -Greedy action tensor([ 0.3325, 0.0349, -0.0451, -0.4414]) tensor([0.3461, 0.2570, 0.2373, 0.1596]) -Greedy action tensor([ 1.1701, -0.6540, 0.0224, -0.7567]) tensor([0.6156, 0.0993, 0.1954, 0.0896]) -Greedy action tensor([ 0.6111, -0.1863, 0.0854, -0.5870]) tensor([0.4267, 0.1922, 0.2523, 0.1288]) -Greedy action tensor([ 0.6569, -0.3758, -0.0048, -0.4758]) tensor([0.4558, 0.1623, 0.2351, 0.1468]) -Greedy action tensor([ 0.7223, -0.5812, -0.1998, -0.6199]) tensor([0.5180, 0.1407, 0.2060, 0.1353]) -Greedy action tensor([ 0.5490, -0.4460, 0.1623, -0.3526]) tensor([0.4074, 0.1506, 0.2767, 0.1653]) -Greedy action tensor([ 0.3720, 0.2093, -0.0266, -0.4468]) tensor([0.3376, 0.2869, 0.2266, 0.1489]) -Greedy action tensor([ 0.2531, -0.1976, -0.0347, -0.3838]) tensor([0.3429, 0.2185, 0.2572, 0.1814]) -Greedy action tensor([ 0.3908, -0.0834, 0.0308, -0.2945]) tensor([0.3541, 0.2204, 0.2470, 0.1784]) -Greedy action tensor([ 0.7275, -0.3313, 0.0074, -0.4465]) tensor([0.4667, 0.1619, 0.2271, 0.1443]) -Greedy action tensor([ 0.5913, -0.3802, -0.0507, -0.4982]) tensor([0.4462, 0.1689, 0.2348, 0.1501]) -Greedy action tensor([ 0.4453, 0.0682, 0.0047, -0.1244]) tensor([0.3454, 0.2369, 0.2223, 0.1954]) -Greedy action tensor([ 1.2692, -0.8695, -0.0476, -0.8657]) tensor([0.6649, 0.0783, 0.1782, 0.0786]) -Greedy action tensor([ 0.4424, -0.3113, -0.1192, -0.4369]) tensor([0.4072, 0.1916, 0.2322, 0.1690]) -Greedy action tensor([ 0.5322, -0.1005, 0.0342, -0.2716]) tensor([0.3866, 0.2054, 0.2350, 0.1731]) -Greedy action tensor([ 1.3534, -0.9101, 0.0628, -0.7688]) tensor([0.6672, 0.0694, 0.1835, 0.0799]) -Greedy action tensor([ 0.8192, -0.0371, -0.1828, -0.4322]) tensor([0.4812, 0.2044, 0.1767, 0.1377]) -Greedy action tensor([ 0.4464, -0.1330, 0.0273, -0.4185]) tensor([0.3789, 0.2123, 0.2492, 0.1596]) -Greedy action tensor([ 0.4586, -0.1937, -0.0468, -0.3246]) tensor([0.3874, 0.2018, 0.2337, 0.1770]) -Greedy action tensor([ 0.6307, -0.2703, 0.0563, -0.3858]) tensor([0.4290, 0.1742, 0.2415, 0.1552]) -Greedy action tensor([ 0.6027, 0.0937, -0.0340, -0.3871]) tensor([0.3997, 0.2403, 0.2115, 0.1486]) -Greedy action tensor([ 0.4355, -0.2272, 0.1269, -0.4218]) tensor([0.3739, 0.1928, 0.2746, 0.1587]) -Greedy action tensor([ 0.6207, -0.2040, -0.0354, -0.3469]) tensor([0.4278, 0.1876, 0.2220, 0.1626]) -Greedy action tensor([ 0.3333, 0.0541, 0.0198, -0.1398]) tensor([0.3215, 0.2432, 0.2350, 0.2003]) -Greedy action tensor([ 0.3673, -0.1958, 0.0824, -0.3242]) tensor([0.3543, 0.2018, 0.2665, 0.1775]) -Greedy action tensor([ 0.5224, -0.2653, 0.1049, -0.5346]) tensor([0.4063, 0.1848, 0.2676, 0.1412]) -Greedy action tensor([ 0.3766, 0.0287, 0.2407, -0.2014]) tensor([0.3185, 0.2249, 0.2780, 0.1787]) -Greedy action tensor([ 0.3725, -0.1348, 0.0729, -0.4427]) tensor([0.3590, 0.2161, 0.2660, 0.1589]) -Greedy action tensor([ 0.6170, -0.3089, 0.0306, -0.4350]) tensor([0.4345, 0.1721, 0.2417, 0.1517]) -Greedy action tensor([ 0.6126, -0.1008, -0.0161, -0.3160]) tensor([0.4135, 0.2026, 0.2205, 0.1634]) -Greedy action tensor([ 0.4065, -0.1504, -0.0176, -0.3089]) tensor([0.3682, 0.2109, 0.2409, 0.1800]) -Greedy action tensor([ 0.8416, -0.2394, -0.1787, -0.3415]) tensor([0.4985, 0.1691, 0.1797, 0.1527]) -Greedy action tensor([ 0.2986, -0.0722, -0.0282, -0.4564]) tensor([0.3470, 0.2395, 0.2503, 0.1631]) -Greedy action tensor([ 0.6959, -0.4100, 0.1679, -0.7077]) tensor([0.4616, 0.1527, 0.2722, 0.1134]) -Greedy action tensor([ 0.8682, -0.3546, -0.0184, -0.3493]) tensor([0.4994, 0.1470, 0.2058, 0.1478]) -Greedy action tensor([ 0.5390, -0.3285, 0.3082, -0.4129]) tensor([0.3846, 0.1615, 0.3054, 0.1485]) -Greedy action tensor([ 0.4322, -0.2749, 0.1570, -0.4081]) tensor([0.3726, 0.1837, 0.2829, 0.1608]) -Greedy action tensor([ 0.5301, 0.0624, -0.0175, -0.4013]) tensor([0.3848, 0.2411, 0.2225, 0.1516]) -Greedy action tensor([ 0.5989, -0.4520, -0.0438, -0.4575]) tensor([0.4498, 0.1573, 0.2365, 0.1564]) -Greedy action tensor([ 0.4309, 0.1735, 0.1216, -0.1711]) tensor([0.3274, 0.2531, 0.2403, 0.1793]) -Greedy action tensor([ 0.3736, 0.0084, 0.1299, -0.2507]) tensor([0.3319, 0.2303, 0.2601, 0.1778]) -Greedy action tensor([ 0.4085, 0.2440, 0.1291, -0.1926]) tensor([0.3172, 0.2691, 0.2399, 0.1739]) -Greedy action tensor([ 0.3956, 0.1417, 0.0364, -0.1907]) tensor([0.3300, 0.2560, 0.2304, 0.1836]) -Greedy action tensor([ 0.5453, -0.0778, -0.0559, -0.3364]) tensor([0.4002, 0.2146, 0.2194, 0.1657]) -Greedy action tensor([ 0.3067, -0.1311, -0.0490, -0.3283]) tensor([0.3477, 0.2244, 0.2436, 0.1842]) -Greedy action tensor([ 1.2075, -0.6987, -0.0378, -0.7558]) tensor([0.6342, 0.0943, 0.1825, 0.0890]) -Greedy action tensor([ 0.8162, -0.4642, -0.0330, -0.4820]) tensor([0.5054, 0.1405, 0.2162, 0.1380]) -Greedy action tensor([ 0.2633, -0.0678, 0.0631, -0.3977]) tensor([0.3275, 0.2352, 0.2681, 0.1691]) -Greedy action tensor([ 0.8769, -0.5368, -0.0771, -0.5890]) tensor([0.5378, 0.1308, 0.2072, 0.1242]) -Greedy action tensor([ 0.4165, 0.0542, 0.0422, -0.3812]) tensor([0.3528, 0.2456, 0.2427, 0.1589]) -Greedy action tensor([ 0.3379, 0.0936, 0.1609, -0.1004]) tensor([0.3062, 0.2398, 0.2565, 0.1975]) -Greedy action tensor([ 1.0355, -0.7432, 0.0102, -0.7824]) tensor([0.5917, 0.0999, 0.2123, 0.0961]) -Greedy action tensor([ 0.6298, -0.5082, 0.0313, -0.6042]) tensor([0.4627, 0.1483, 0.2543, 0.1347]) -Greedy action tensor([ 0.3335, 0.1985, 0.2466, -0.1208]) tensor([0.2919, 0.2551, 0.2676, 0.1853]) -Greedy action tensor([ 0.3673, -0.1162, -0.0507, -0.3311]) tensor([0.3607, 0.2224, 0.2375, 0.1794]) -Greedy action tensor([ 0.3940, 0.0838, 0.1220, -0.0739]) tensor([0.3204, 0.2349, 0.2441, 0.2006]) -Greedy action tensor([ 0.2772, 0.2513, 0.1667, -0.2464]) tensor([0.2888, 0.2814, 0.2586, 0.1711]) -Greedy action tensor([ 0.8157, -0.4496, -0.0605, -0.7680]) tensor([0.5253, 0.1482, 0.2187, 0.1078]) -Greedy action tensor([-1.3446, -0.3618, 0.2546, -0.3698]) tensor([0.0887, 0.2371, 0.4391, 0.2352]) -Greedy action tensor([-1.6474, -0.4501, 0.7371, -0.5095]) tensor([0.0547, 0.1811, 0.5936, 0.1706]) -Greedy action tensor([-0.4186, -0.4373, 0.1855, 0.1359]) tensor([0.1801, 0.1768, 0.3295, 0.3136]) -Greedy action tensor([-1.1066, 0.1893, 0.2918, 0.4382]) tensor([0.0747, 0.2729, 0.3024, 0.3500]) -Greedy action tensor([-1.2013, -0.5911, 0.4619, 0.2608]) tensor([0.0804, 0.1481, 0.4244, 0.3471]) -Greedy action tensor([-0.4592, -0.4889, 0.1845, 0.1886]) tensor([0.1728, 0.1678, 0.3290, 0.3304]) -Greedy action tensor([-0.4447, -0.4534, 0.1962, 0.2098]) tensor([0.1720, 0.1705, 0.3265, 0.3310]) -Greedy action tensor([-1.6664, -0.6443, 0.2116, -0.3963]) tensor([0.0720, 0.2002, 0.4712, 0.2566]) -Greedy action tensor([-1.9119, -0.7254, 0.7883, 0.1039]) tensor([0.0375, 0.1228, 0.5581, 0.2815]) -Greedy action tensor([-1.3192, -0.5189, 0.3839, 0.0390]) tensor([0.0793, 0.1766, 0.4356, 0.3085]) -Greedy action tensor([-1.5478, -0.3942, 0.3641, -0.2902]) tensor([0.0692, 0.2193, 0.4682, 0.2433]) -Greedy action tensor([-1.7681, -0.7397, 0.0454, -0.3083]) tensor([0.0703, 0.1965, 0.4308, 0.3025]) -Greedy action tensor([-1.7942, -0.4236, 0.7124, 0.3014]) tensor([0.0395, 0.1554, 0.4841, 0.3210]) -Greedy action tensor([-1.8294, -0.9849, 0.1779, -0.4668]) tensor([0.0681, 0.1585, 0.5072, 0.2662]) -Greedy action tensor([-1.9864, -0.9447, 0.3544, -0.2460]) tensor([0.0502, 0.1422, 0.5215, 0.2861]) -Greedy action tensor([-1.3868, -0.5337, 0.4470, -0.0053]) tensor([0.0736, 0.1728, 0.4606, 0.2930]) -Greedy action tensor([-1.8516, -0.6926, 0.4790, 0.0185]) tensor([0.0477, 0.1520, 0.4907, 0.3096]) -Greedy action tensor([-1.3372, -0.5348, 0.3937, 0.0096]) tensor([0.0786, 0.1754, 0.4438, 0.3022]) -Greedy action tensor([-1.4257, -0.4913, 0.6935, 0.5828]) tensor([0.0518, 0.1317, 0.4308, 0.3857]) -Greedy action tensor([-1.0176, -0.5414, 0.4957, -0.2414]) tensor([0.1072, 0.1726, 0.4870, 0.2331]) -Greedy action tensor([-1.8572, -0.8201, 0.0134, -0.3594]) tensor([0.0676, 0.1908, 0.4391, 0.3025]) -Greedy action tensor([-1.3334, -0.5185, 0.3894, 0.4032]) tensor([0.0688, 0.1554, 0.3852, 0.3906]) -Greedy action tensor([-1.7541, -0.7600, 0.1876, -0.4135]) tensor([0.0690, 0.1864, 0.4809, 0.2636]) -Greedy action tensor([-1.2136, -0.5435, 0.6356, -0.3986]) tensor([0.0864, 0.1690, 0.5493, 0.1953]) -Greedy action tensor([-1.5484, -0.5396, 0.4667, 0.0029]) tensor([0.0627, 0.1718, 0.4700, 0.2956]) -Greedy action tensor([-1.5532, -0.5804, -0.0714, -0.4802]) tensor([0.0912, 0.2411, 0.4012, 0.2665]) -Greedy action tensor([-1.0738, -0.5943, 0.3524, 0.1116]) tensor([0.0995, 0.1607, 0.4142, 0.3256]) -Greedy action tensor([-0.8938, -0.5943, 0.2636, 0.3003]) tensor([0.1132, 0.1528, 0.3603, 0.3737]) -Greedy action tensor([-1.7648, -0.4625, 0.6042, 0.2052]) tensor([0.0444, 0.1632, 0.4742, 0.3182]) -Greedy action tensor([-1.7511, -0.5723, 0.1488, -0.3856]) tensor([0.0673, 0.2188, 0.4501, 0.2638]) -Greedy action tensor([-1.5336, -0.5480, 0.9251, 0.8528]) tensor([0.0381, 0.1021, 0.4454, 0.4144]) -Greedy action tensor([-1.0543, -0.1027, 0.1978, -0.3266]) tensor([0.1092, 0.2828, 0.3819, 0.2261]) -Greedy action tensor([-1.5166, -0.5472, 0.5192, -0.0808]) tensor([0.0645, 0.1701, 0.4942, 0.2712]) -Greedy action tensor([-2.0236, -0.7089, 0.7761, 0.0498]) tensor([0.0343, 0.1279, 0.5646, 0.2731]) -Greedy action tensor([-1.9891, -0.9174, 0.2488, -0.2445]) tensor([0.0526, 0.1536, 0.4929, 0.3010]) -Greedy action tensor([-0.9030, -0.1965, -0.4743, -0.1215]) tensor([0.1482, 0.3004, 0.2276, 0.3238]) -Greedy action tensor([-1.6556, -0.6269, 0.7724, -0.0332]) tensor([0.0495, 0.1385, 0.5612, 0.2508]) -Greedy action tensor([-1.7691, -0.8920, 0.1864, -0.4538]) tensor([0.0704, 0.1693, 0.4978, 0.2624]) -Greedy action tensor([-1.7523, -0.4035, 0.5542, -0.0564]) tensor([0.0492, 0.1894, 0.4935, 0.2680]) -Greedy action tensor([-1.4941, -0.6289, 0.2117, -0.3729]) tensor([0.0837, 0.1988, 0.4607, 0.2568]) -Greedy action tensor([-1.8785, -0.9374, 0.1268, -0.4371]) tensor([0.0657, 0.1684, 0.4882, 0.2777]) -Greedy action tensor([-1.9365, -0.7379, 0.5037, -0.0621]) tensor([0.0448, 0.1486, 0.5144, 0.2921]) -Greedy action tensor([-1.0197, -0.5682, 0.2355, 0.3496]) tensor([0.0999, 0.1569, 0.3504, 0.3928]) -Greedy action tensor([-1.9723, -0.9297, 0.2675, -0.2944]) tensor([0.0538, 0.1526, 0.5054, 0.2881]) -Greedy action tensor([-1.4175, -0.4457, 0.5267, 0.5226]) tensor([0.0568, 0.1502, 0.3973, 0.3956]) -Greedy action tensor([-1.6394, -0.6770, 0.8212, -0.0433]) tensor([0.0493, 0.1292, 0.5780, 0.2435]) -Greedy action tensor([-1.5000, -0.5497, 0.4210, 0.1513]) tensor([0.0640, 0.1655, 0.4369, 0.3336]) -Greedy action tensor([-1.7527, -0.6654, 0.2535, -0.2830]) tensor([0.0635, 0.1883, 0.4721, 0.2761]) -Greedy action tensor([-1.4890, -0.5240, 0.4426, 0.1084]) tensor([0.0647, 0.1697, 0.4462, 0.3194]) -Greedy action tensor([-1.4392, -0.5489, 0.4130, 0.0560]) tensor([0.0701, 0.1707, 0.4467, 0.3126]) -Greedy action tensor([-1.6968, -0.4790, 0.6156, 0.2234]) tensor([0.0469, 0.1587, 0.4741, 0.3203]) -Greedy action tensor([-1.9375, -0.6171, 0.9073, 0.1858]) tensor([0.0330, 0.1236, 0.5675, 0.2759]) -Greedy action tensor([-0.7130, -0.1536, 0.1898, -0.0986]) tensor([0.1415, 0.2477, 0.3491, 0.2617]) -Greedy action tensor([-0.9543, -0.1064, 0.5100, -0.5041]) tensor([0.1084, 0.2530, 0.4686, 0.1700]) -Greedy action tensor([-1.7262, -0.6197, 0.2132, -0.3079]) tensor([0.0662, 0.2001, 0.4603, 0.2734]) -Greedy action tensor([-1.3370, -0.2679, 0.0774, -0.1108]) tensor([0.0874, 0.2547, 0.3598, 0.2981]) -Greedy action tensor([-1.3728, -0.4543, 0.4417, -0.0705]) tensor([0.0751, 0.1881, 0.4608, 0.2761]) -Greedy action tensor([-2.0047, -0.8048, 0.4553, -0.1092]) tensor([0.0441, 0.1464, 0.5161, 0.2935]) -Greedy action tensor([-1.6093, -0.5198, 0.6049, 0.2054]) tensor([0.0519, 0.1543, 0.4751, 0.3187]) -Greedy action tensor([-1.0116, -0.5940, 0.2899, 0.0708]) tensor([0.1094, 0.1660, 0.4018, 0.3228]) -Greedy action tensor([-1.8121, -0.8282, 0.3455, -0.2057]) tensor([0.0578, 0.1545, 0.4997, 0.2880]) -Greedy action tensor([-1.2779, -0.4806, 0.3565, 0.3560]) tensor([0.0742, 0.1648, 0.3806, 0.3804]) -Greedy action tensor([-1.6858, -0.8970, -0.0804, -0.4878]) tensor([0.0870, 0.1915, 0.4333, 0.2883]) -Greedy action tensor([-1.0657, -0.5294, 0.4566, -0.3664]) tensor([0.1075, 0.1837, 0.4925, 0.2163]) -Greedy action tensor([-1.5739, -0.5236, 0.7173, -0.2653]) tensor([0.0573, 0.1638, 0.5667, 0.2121]) -Greedy action tensor([-1.8692, -0.7639, 0.2154, -0.3005]) tensor([0.0593, 0.1791, 0.4769, 0.2847]) -Greedy action tensor([-1.9868, -0.8639, 0.3703, -0.1292]) tensor([0.0475, 0.1461, 0.5019, 0.3045]) -Greedy action tensor([-2.0130, -0.9583, 0.5714, -0.0629]) tensor([0.0414, 0.1189, 0.5487, 0.2910]) -Greedy action tensor([-1.7261, -0.7010, 0.1801, -0.3316]) tensor([0.0687, 0.1916, 0.4624, 0.2772]) -Greedy action tensor([-1.0362, -0.5478, 0.5603, -0.3657]) tensor([0.1050, 0.1712, 0.5184, 0.2054]) -Greedy action tensor([-1.6703, -0.4933, 0.5850, 0.1910]) tensor([0.0495, 0.1605, 0.4718, 0.3182]) -Greedy action tensor([-1.6771, -0.5024, 0.5164, -0.0032]) tensor([0.0539, 0.1746, 0.4837, 0.2877]) -Greedy action tensor([-0.8100, -0.5688, 0.3350, -0.0079]) tensor([0.1308, 0.1665, 0.4110, 0.2917]) -Greedy action tensor([ 0.1425, -0.1008, 0.3947, 0.8004]) tensor([0.1999, 0.1567, 0.2573, 0.3860]) -Greedy action tensor([-0.6375, -0.5454, 0.2176, 0.2166]) tensor([0.1471, 0.1613, 0.3460, 0.3456]) -Greedy action tensor([-1.9996, -0.9504, 0.3575, -0.2751]) tensor([0.0499, 0.1426, 0.5274, 0.2801]) -Greedy action tensor([-1.8472, -0.4447, 0.6159, -0.1288]) tensor([0.0447, 0.1816, 0.5246, 0.2491]) -Greedy action tensor([-1.9452, -0.8832, 0.9021, 0.2507]) tensor([0.0332, 0.0960, 0.5724, 0.2984]) -Greedy action tensor([-1.8055, -0.5908, 0.1632, -0.3749]) tensor([0.0636, 0.2144, 0.4558, 0.2661]) -Greedy action tensor([-0.8097, -0.5782, 0.2883, 0.0202]) tensor([0.1324, 0.1669, 0.3970, 0.3036]) -Greedy action tensor([-1.1621, -0.5574, 0.5225, 0.3811]) tensor([0.0775, 0.1419, 0.4178, 0.3627]) -Greedy action tensor([ 0.8601, -0.0709, 0.2165, 0.7877]) tensor([0.3509, 0.1383, 0.1844, 0.3264]) -Greedy action tensor([ 0.4436, 0.5085, 0.0244, -0.1217]) tensor([0.3037, 0.3241, 0.1997, 0.1726]) -Greedy action tensor([ 1.9323, -0.9338, -0.4046, 0.9465]) tensor([0.6550, 0.0373, 0.0633, 0.2444]) -Greedy action tensor([ 1.3770, -0.1161, 0.2260, 1.0402]) tensor([0.4434, 0.0996, 0.1403, 0.3167]) -Greedy action tensor([ 1.3711, -0.2952, 0.0859, 1.1108]) tensor([0.4472, 0.0845, 0.1237, 0.3447]) -Greedy action tensor([ 0.1453, 0.1266, -0.0234, 0.2920]) tensor([0.2510, 0.2463, 0.2120, 0.2906]) -Greedy action tensor([ 1.3063, 0.2335, -0.0079, 0.6970]) tensor([0.4641, 0.1588, 0.1247, 0.2524]) -Greedy action tensor([ 0.8611, -0.2269, 1.1688, 1.2337]) tensor([0.2410, 0.0812, 0.3279, 0.3499]) -Greedy action tensor([ 1.4071, -0.3324, 0.1886, 0.9591]) tensor([0.4739, 0.0832, 0.1401, 0.3028]) -Greedy action tensor([ 1.0997, -0.1377, -0.1358, 0.2295]) tensor([0.5001, 0.1451, 0.1454, 0.2095]) -Greedy action tensor([1.7524, 0.1720, 0.2079, 0.9771]) tensor([0.5320, 0.1095, 0.1135, 0.2450]) -Greedy action tensor([ 1.3953, 0.5263, -0.3062, 0.4872]) tensor([0.4987, 0.2092, 0.0910, 0.2011]) -Greedy action tensor([ 0.6533, -0.6941, 0.0867, 1.3656]) tensor([0.2587, 0.0672, 0.1468, 0.5273]) -Greedy action tensor([ 0.3067, -0.9781, 0.6385, -0.1816]) tensor([0.3045, 0.0843, 0.4243, 0.1869]) -Greedy action tensor([ 2.1429, -0.8326, -0.2046, 0.7721]) tensor([0.7140, 0.0364, 0.0683, 0.1813]) -Greedy action tensor([ 0.7592, -1.1015, -0.3939, -0.2863]) tensor([0.5486, 0.0853, 0.1732, 0.1929]) -Greedy action tensor([ 1.1223, -1.4275, -0.0948, 1.2241]) tensor([0.4030, 0.0315, 0.1193, 0.4462]) -Greedy action tensor([ 1.3173, -1.2056, 0.6842, 0.3239]) tensor([0.5047, 0.0405, 0.2680, 0.1869]) -Greedy action tensor([ 1.7389, -0.0896, 0.2099, 1.3704]) tensor([0.4833, 0.0776, 0.1048, 0.3343]) -Greedy action tensor([ 1.9117, -0.6356, -0.0478, 0.9984]) tensor([0.6171, 0.0483, 0.0870, 0.2476]) -Greedy action tensor([0.8402, 0.3937, 0.5432, 0.6241]) tensor([0.3136, 0.2007, 0.2330, 0.2527]) -Greedy action tensor([ 0.9198, 0.1722, -0.3442, 0.6988]) tensor([0.3910, 0.1851, 0.1105, 0.3134]) -Greedy action tensor([ 1.4473, -0.4127, 0.8939, 0.8970]) tensor([0.4334, 0.0675, 0.2492, 0.2500]) -Greedy action tensor([ 1.0707, 0.2702, -0.7218, 0.7909]) tensor([0.4216, 0.1894, 0.0702, 0.3188]) -Greedy action tensor([ 0.0694, 0.8325, -0.1540, 1.0251]) tensor([0.1528, 0.3277, 0.1222, 0.3973]) -Greedy action tensor([ 1.3294, -0.1715, 1.4933, 1.2250]) tensor([0.3029, 0.0675, 0.3568, 0.2728]) -Greedy action tensor([ 1.2682, -0.4681, 0.6505, 0.7777]) tensor([0.4296, 0.0757, 0.2316, 0.2631]) -Greedy action tensor([ 0.5682, -0.6424, 0.2643, 1.6547]) tensor([0.2000, 0.0596, 0.1476, 0.5928]) -Greedy action tensor([ 0.3315, 0.8583, 0.2285, -0.0343]) tensor([0.2331, 0.3948, 0.2103, 0.1617]) -Greedy action tensor([ 0.9916, -0.0228, 0.2142, 0.8851]) tensor([0.3675, 0.1333, 0.1689, 0.3304]) -Greedy action tensor([ 1.8043, -0.4211, 0.7492, 0.8876]) tensor([0.5388, 0.0582, 0.1876, 0.2154]) -Greedy action tensor([ 1.2291, -0.9938, 0.3906, 0.4379]) tensor([0.5015, 0.0543, 0.2168, 0.2273]) -Greedy action tensor([ 0.9491, -0.3584, 0.3785, 0.2124]) tensor([0.4321, 0.1169, 0.2442, 0.2068]) -Greedy action tensor([ 0.6608, -0.0043, 0.0172, 0.2229]) tensor([0.3724, 0.1915, 0.1957, 0.2404]) -Greedy action tensor([ 1.9484, -0.1148, 0.1320, 1.4765]) tensor([0.5226, 0.0664, 0.0850, 0.3260]) -Greedy action tensor([ 1.1635, 0.6334, -1.1613, 0.7155]) tensor([0.4301, 0.2531, 0.0421, 0.2748]) -Greedy action tensor([ 0.7283, -0.0455, -0.6764, 1.6115]) tensor([0.2424, 0.1118, 0.0595, 0.5863]) -Greedy action tensor([ 1.1397, 1.0607, -0.8988, 0.0904]) tensor([0.4159, 0.3843, 0.0542, 0.1456]) -Greedy action tensor([ 1.3285, -1.0103, 1.0396, 0.4467]) tensor([0.4426, 0.0427, 0.3315, 0.1832]) -Greedy action tensor([ 1.7169e+00, -9.7426e-01, 1.3172e-04, 8.2761e-01]) tensor([0.6030, 0.0409, 0.1083, 0.2478]) -Greedy action tensor([ 0.6646, 0.8944, -0.3406, 0.1995]) tensor([0.3075, 0.3869, 0.1125, 0.1931]) -Greedy action tensor([ 0.7099, 0.5414, -0.9070, 0.7914]) tensor([0.3197, 0.2701, 0.0635, 0.3468]) -Greedy action tensor([ 0.7585, -0.2898, -0.2447, 0.9462]) tensor([0.3420, 0.1199, 0.1254, 0.4126]) -Greedy action tensor([ 0.1225, -0.0755, 0.8064, -0.2685]) tensor([0.2233, 0.1832, 0.4425, 0.1510]) -Greedy action tensor([ 2.0903, -0.5039, 0.7842, 1.7383]) tensor([0.4881, 0.0365, 0.1322, 0.3433]) -Greedy action tensor([ 1.1730, 0.4152, -0.6964, 0.3400]) tensor([0.4860, 0.2278, 0.0749, 0.2113]) -Greedy action tensor([ 1.5158, -0.8105, 1.1384, 0.3311]) tensor([0.4787, 0.0467, 0.3282, 0.1464]) -Greedy action tensor([ 2.2151, -0.5376, -0.0528, 0.8018]) tensor([0.7089, 0.0452, 0.0734, 0.1725]) -Greedy action tensor([ 0.7428, 0.1825, -0.9057, 0.3714]) tensor([0.4076, 0.2328, 0.0784, 0.2812]) -Greedy action tensor([ 2.1625, -0.2456, -0.9546, 0.6791]) tensor([0.7347, 0.0661, 0.0325, 0.1667]) -Greedy action tensor([ 0.5092, 0.0490, -0.9119, 1.0984]) tensor([0.2721, 0.1717, 0.0657, 0.4905]) -Greedy action tensor([ 1.1716, 0.5535, -1.1309, 0.8901]) tensor([0.4178, 0.2252, 0.0418, 0.3153]) -Greedy action tensor([ 1.4450, -0.2297, 0.7506, 1.0745]) tensor([0.4207, 0.0788, 0.2101, 0.2904]) -Greedy action tensor([ 0.9214, 0.5562, -1.0575, 0.8090]) tensor([0.3668, 0.2546, 0.0507, 0.3279]) -Greedy action tensor([ 1.3389, 0.2044, -0.2013, 1.1852]) tensor([0.4178, 0.1344, 0.0895, 0.3583]) -Greedy action tensor([ 1.7105, -1.0729, -0.1659, 0.5023]) tensor([0.6606, 0.0408, 0.1012, 0.1973]) -Greedy action tensor([ 1.2106, -0.4049, -0.4593, 1.5635]) tensor([0.3559, 0.0707, 0.0670, 0.5064]) -Greedy action tensor([ 0.5897, 0.4651, 0.0187, -0.4579]) tensor([0.3573, 0.3155, 0.2019, 0.1253]) -Greedy action tensor([ 0.5595, 0.8201, -0.4353, 1.0496]) tensor([0.2326, 0.3018, 0.0860, 0.3797]) -Greedy action tensor([ 1.3579, -0.2837, 0.0911, 0.9088]) tensor([0.4731, 0.0916, 0.1333, 0.3019]) -Greedy action tensor([ 1.2446, -0.6379, 0.1794, -0.0711]) tensor([0.5665, 0.0862, 0.1953, 0.1520]) -Greedy action tensor([ 1.6313, -1.0228, -0.5431, 0.8293]) tensor([0.6126, 0.0431, 0.0696, 0.2747]) -Greedy action tensor([ 0.1860, -0.0873, -0.2647, 0.2929]) tensor([0.2848, 0.2167, 0.1815, 0.3170]) -Greedy action tensor([ 1.0102, -0.3954, -0.7406, 1.5910]) tensor([0.3119, 0.0765, 0.0542, 0.5575]) -Greedy action tensor([ 0.3717, -0.2786, 0.1786, 1.0846]) tensor([0.2280, 0.1190, 0.1879, 0.4651]) -Greedy action tensor([0.8961, 1.1714, 0.2383, 0.5499]) tensor([0.2823, 0.3718, 0.1462, 0.1997]) -Greedy action tensor([ 0.9458, -0.2141, -0.4950, 0.6396]) tensor([0.4374, 0.1371, 0.1035, 0.3220]) -Greedy action tensor([ 1.4100, -1.1559, 0.4822, 1.0712]) tensor([0.4577, 0.0352, 0.1810, 0.3262]) -Greedy action tensor([ 1.0593, -0.6352, 0.5666, 0.5662]) tensor([0.4157, 0.0764, 0.2540, 0.2539]) -Greedy action tensor([ 1.7196, -0.2100, -0.8207, 1.4411]) tensor([0.5048, 0.0733, 0.0398, 0.3821]) -Greedy action tensor([ 1.9853, -0.7453, 0.2287, 1.1810]) tensor([0.5934, 0.0387, 0.1024, 0.2655]) -Greedy action tensor([ 1.0483, -0.2642, 0.7757, -0.8290]) tensor([0.4580, 0.1233, 0.3487, 0.0701]) -Greedy action tensor([ 1.7667, -0.6494, 0.7099, 1.1365]) tensor([0.5078, 0.0453, 0.1765, 0.2704]) -Greedy action tensor([ 0.4024, -0.7558, 1.0238, 0.1522]) tensor([0.2529, 0.0794, 0.4708, 0.1969]) -Greedy action tensor([ 0.7532, -1.0435, 0.3895, 1.0160]) tensor([0.3163, 0.0525, 0.2199, 0.4114]) -Greedy action tensor([ 1.9327, -0.1604, -0.3341, 0.9907]) tensor([0.6185, 0.0763, 0.0641, 0.2411]) -Greedy action tensor([ 1.2699, -0.5478, 1.0856, 1.6444]) tensor([0.2900, 0.0471, 0.2412, 0.4217]) -Greedy action tensor([ 1.1781, -0.4046, -0.3047, -0.0473]) tensor([0.5794, 0.1190, 0.1315, 0.1701]) -Greedy action tensor([ 1.1238, 0.3413, -0.0331, 1.5311]) tensor([0.3054, 0.1396, 0.0960, 0.4589]) -Greedy action tensor([ 0.8362, 0.1608, -0.7072, 1.5440]) tensor([0.2665, 0.1356, 0.0569, 0.5409]) -Greedy action tensor([ 0.8330, -0.2591, 0.6956, 0.1643]) tensor([0.3677, 0.1234, 0.3205, 0.1884]) -Greedy action tensor([ 0.3609, 0.1893, -0.0916, -0.4103]) tensor([0.3400, 0.2864, 0.2163, 0.1572]) -Greedy action tensor([ 0.0479, 0.1243, 0.1496, -0.1750]) tensor([0.2508, 0.2708, 0.2777, 0.2007]) -Greedy action tensor([ 0.2594, 0.0531, -0.0615, -0.1122]) tensor([0.3097, 0.2520, 0.2247, 0.2136]) -Greedy action tensor([ 0.5395, -0.3431, -0.0858, -0.5188]) tensor([0.4356, 0.1802, 0.2331, 0.1512]) -Greedy action tensor([ 0.2022, 0.1726, 0.1163, -0.1478]) tensor([0.2783, 0.2702, 0.2554, 0.1961]) -Greedy action tensor([ 1.1837, -0.6295, 0.0828, -0.6134]) tensor([0.6019, 0.0982, 0.2002, 0.0998]) -Greedy action tensor([ 0.3112, 0.1275, 0.0128, -0.1557]) tensor([0.3124, 0.2600, 0.2318, 0.1958]) -Greedy action tensor([ 0.4201, 0.0761, 0.0631, -0.4606]) tensor([0.3542, 0.2511, 0.2479, 0.1468]) -Greedy action tensor([ 0.4763, -0.0394, 0.0777, -0.2353]) tensor([0.3624, 0.2164, 0.2433, 0.1779]) -Greedy action tensor([ 0.3714, 0.1027, 0.0630, -0.2331]) tensor([0.3284, 0.2510, 0.2412, 0.1794]) -Greedy action tensor([ 0.1091, 0.1239, 0.1524, -0.1574]) tensor([0.2614, 0.2653, 0.2730, 0.2003]) -Greedy action tensor([ 0.4253, -0.0785, 0.0657, -0.3355]) tensor([0.3611, 0.2182, 0.2520, 0.1687]) -Greedy action tensor([ 1.0302, -0.6510, -0.1108, -0.5020]) tensor([0.5808, 0.1081, 0.1856, 0.1255]) -Greedy action tensor([ 1.0812, -0.8053, -0.0331, -0.7434]) tensor([0.6094, 0.0924, 0.2000, 0.0983]) -Greedy action tensor([ 0.7135, -0.2112, 0.0601, -0.3421]) tensor([0.4415, 0.1751, 0.2297, 0.1536]) -Greedy action tensor([ 0.4588, -0.2545, -0.1332, -0.3754]) tensor([0.4036, 0.1978, 0.2233, 0.1753]) -Greedy action tensor([ 0.3012, -0.0097, 0.0918, -0.3692]) tensor([0.3273, 0.2398, 0.2655, 0.1674]) -Greedy action tensor([ 0.5596, -0.3253, 0.0195, -0.4150]) tensor([0.4214, 0.1740, 0.2456, 0.1590]) -Greedy action tensor([ 0.9198, -0.4750, -0.2305, -0.6911]) tensor([0.5668, 0.1405, 0.1794, 0.1132]) -Greedy action tensor([ 0.6084, -0.2230, 0.0655, -0.5890]) tensor([0.4313, 0.1878, 0.2506, 0.1303]) -Greedy action tensor([ 0.9916, -0.6029, -0.0204, -0.7006]) tensor([0.5712, 0.1160, 0.2076, 0.1052]) -Greedy action tensor([ 0.3574, 0.1450, 0.1602, -0.0574]) tensor([0.3039, 0.2458, 0.2495, 0.2007]) -Greedy action tensor([ 0.4457, 0.0592, 0.0065, -0.4100]) tensor([0.3638, 0.2472, 0.2345, 0.1546]) -Greedy action tensor([ 0.5929, -0.0188, -0.1450, -0.1755]) tensor([0.4025, 0.2184, 0.1924, 0.1867]) -Greedy action tensor([ 0.4039, -0.1940, 0.0866, -0.4895]) tensor([0.3721, 0.2047, 0.2709, 0.1523]) -Greedy action tensor([ 1.1522, -0.8795, -0.0969, -0.6947]) tensor([0.6347, 0.0832, 0.1820, 0.1001]) -Greedy action tensor([ 0.3862, 0.1028, -0.0205, -0.1112]) tensor([0.3303, 0.2488, 0.2200, 0.2009]) -Greedy action tensor([ 0.3731, 0.1434, 0.0127, -0.2241]) tensor([0.3287, 0.2612, 0.2292, 0.1809]) -Greedy action tensor([ 0.4065, -0.3057, 0.0627, -0.2853]) tensor([0.3703, 0.1817, 0.2626, 0.1854]) -Greedy action tensor([ 0.5863, -0.2507, 0.0245, -0.2045]) tensor([0.4071, 0.1763, 0.2321, 0.1846]) -Greedy action tensor([ 0.3852, 0.1586, 0.1751, -0.0216]) tensor([0.3055, 0.2435, 0.2476, 0.2034]) -Greedy action tensor([ 0.4750, -0.1160, -0.0494, -0.3404]) tensor([0.3864, 0.2140, 0.2287, 0.1710]) -Greedy action tensor([ 0.3848, 0.1027, 0.0837, -0.2532]) tensor([0.3308, 0.2495, 0.2448, 0.1748]) -Greedy action tensor([ 0.5794, -0.2450, -0.0080, -0.5348]) tensor([0.4306, 0.1888, 0.2393, 0.1413]) -Greedy action tensor([ 0.2454, 0.1893, 0.0801, -0.2521]) tensor([0.2940, 0.2780, 0.2492, 0.1788]) -Greedy action tensor([ 0.3285, -0.0265, -0.0168, -0.1555]) tensor([0.3305, 0.2318, 0.2340, 0.2037]) -Greedy action tensor([ 0.5355, -0.0419, 0.0184, -0.3199]) tensor([0.3872, 0.2173, 0.2309, 0.1646]) -Greedy action tensor([ 0.7230, -0.4076, -0.0464, -0.3821]) tensor([0.4723, 0.1525, 0.2188, 0.1564]) -Greedy action tensor([ 0.5421, -0.1719, -0.0054, -0.4584]) tensor([0.4105, 0.2010, 0.2375, 0.1510]) -Greedy action tensor([ 0.4509, -0.0050, 0.0303, -0.3503]) tensor([0.3651, 0.2314, 0.2397, 0.1638]) -Greedy action tensor([ 0.6427, -0.2696, -0.0454, -0.4543]) tensor([0.4468, 0.1794, 0.2245, 0.1492]) -Greedy action tensor([ 0.5435, -0.0325, -0.0067, -0.3083]) tensor([0.3898, 0.2191, 0.2248, 0.1663]) -Greedy action tensor([ 0.5798, -0.2944, 0.1390, -0.7545]) tensor([0.4303, 0.1795, 0.2769, 0.1133]) -Greedy action tensor([ 0.6473, -0.5024, -0.1306, -0.5504]) tensor([0.4812, 0.1524, 0.2211, 0.1453]) -Greedy action tensor([ 0.5093, -0.2425, -0.0465, -0.4129]) tensor([0.4094, 0.1930, 0.2348, 0.1628]) -Greedy action tensor([ 0.3839, 0.2116, 0.0909, -0.1944]) tensor([0.3176, 0.2673, 0.2369, 0.1781]) -Greedy action tensor([ 0.7611, -0.3163, -0.0789, -0.3718]) tensor([0.4775, 0.1626, 0.2061, 0.1538]) -Greedy action tensor([ 0.7707, -0.4873, -0.0531, -0.6910]) tensor([0.5116, 0.1454, 0.2244, 0.1186]) -Greedy action tensor([ 0.7392, -0.4327, 0.1129, -0.7026]) tensor([0.4806, 0.1489, 0.2569, 0.1137]) -Greedy action tensor([ 0.5026, -0.3285, 0.1566, -0.2678]) tensor([0.3838, 0.1671, 0.2715, 0.1776]) -Greedy action tensor([ 0.8570, -0.2775, 0.0403, -0.5922]) tensor([0.5004, 0.1609, 0.2211, 0.1175]) -Greedy action tensor([ 0.7782, -0.1863, -0.0971, -0.5036]) tensor([0.4818, 0.1837, 0.2008, 0.1337]) -Greedy action tensor([ 1.1116, -0.7398, -0.0868, -0.7141]) tensor([0.6174, 0.0969, 0.1862, 0.0995]) -Greedy action tensor([ 0.4051, -0.2026, 0.0024, -0.3937]) tensor([0.3755, 0.2045, 0.2510, 0.1689]) -Greedy action tensor([ 0.2462, -0.1696, 0.0703, -0.1651]) tensor([0.3163, 0.2087, 0.2653, 0.2097]) -Greedy action tensor([ 0.3146, -0.2271, -0.0601, -0.3247]) tensor([0.3575, 0.2080, 0.2458, 0.1887]) -Greedy action tensor([ 0.7940, -0.2693, 0.0733, -0.7784]) tensor([0.4904, 0.1693, 0.2385, 0.1018]) -Greedy action tensor([ 0.9347, -0.4698, 0.0326, -0.7070]) tensor([0.5420, 0.1331, 0.2199, 0.1050]) -Greedy action tensor([ 0.5195, -0.3375, 0.3535, -0.4560]) tensor([0.3776, 0.1603, 0.3198, 0.1423]) -Greedy action tensor([ 0.5786, -0.0261, 0.0425, -0.3778]) tensor([0.3975, 0.2171, 0.2326, 0.1528]) -Greedy action tensor([ 0.3212, -0.0527, 0.0625, -0.4075]) tensor([0.3398, 0.2338, 0.2624, 0.1640]) -Greedy action tensor([ 0.6165, -0.3136, 0.0966, -0.6544]) tensor([0.4406, 0.1738, 0.2620, 0.1236]) -Greedy action tensor([ 1.0326, -0.6954, -0.2193, -0.7859]) tensor([0.6150, 0.1093, 0.1759, 0.0998]) -Greedy action tensor([ 0.7067, -0.3340, 0.0528, -0.4343]) tensor([0.4561, 0.1611, 0.2372, 0.1457]) -Greedy action tensor([ 0.3013, 0.0023, -0.0315, -0.2417]) tensor([0.3290, 0.2440, 0.2359, 0.1911]) -Greedy action tensor([ 0.3258, 0.0391, 0.0035, -0.3429]) tensor([0.3347, 0.2513, 0.2425, 0.1715]) -Greedy action tensor([ 0.2981, 0.1422, 0.0255, -0.1152]) tensor([0.3050, 0.2610, 0.2322, 0.2018]) -Greedy action tensor([ 0.5828, -0.4016, 0.1104, -0.6274]) tensor([0.4357, 0.1628, 0.2716, 0.1299]) -Greedy action tensor([ 0.5677, 0.0552, 0.0025, -0.2560]) tensor([0.3837, 0.2299, 0.2180, 0.1684]) -Greedy action tensor([ 0.4093, 0.0367, 0.0311, -0.1990]) tensor([0.3427, 0.2361, 0.2347, 0.1865]) -Greedy action tensor([ 0.4278, -0.0543, 0.0460, -0.3792]) tensor([0.3641, 0.2248, 0.2486, 0.1625]) -Greedy action tensor([ 0.4137, 0.1657, 0.1291, -0.2509]) tensor([0.3282, 0.2561, 0.2469, 0.1688]) -Greedy action tensor([ 0.2036, 0.1360, 0.0468, -0.1277]) tensor([0.2851, 0.2665, 0.2437, 0.2047]) -Greedy action tensor([ 0.3609, -0.0547, 0.0241, -0.2578]) tensor([0.3433, 0.2266, 0.2452, 0.1849]) -Greedy action tensor([ 0.4831, -0.0719, -0.0941, -0.2545]) tensor([0.3826, 0.2196, 0.2148, 0.1830]) -Greedy action tensor([ 0.7270, -0.2557, -0.1194, -0.3752]) tensor([0.4683, 0.1753, 0.2009, 0.1555]) -Greedy action tensor([ 0.4856, -0.2767, 0.0390, -0.3904]) tensor([0.3964, 0.1850, 0.2536, 0.1651]) -Greedy action tensor([ 0.5861, -0.3027, 0.0456, -0.5912]) tensor([0.4345, 0.1786, 0.2530, 0.1339]) -Greedy action tensor([ 1.2262, -0.6311, 0.0414, -0.6410]) tensor([0.6186, 0.0966, 0.1892, 0.0956]) -Greedy action tensor([ 0.0798, 0.2090, 0.1537, -0.2000]) tensor([0.2518, 0.2866, 0.2712, 0.1904]) -Greedy action tensor([ 0.4241, 0.0409, -0.0427, -0.3673]) tensor([0.3621, 0.2468, 0.2270, 0.1641]) -Greedy action tensor([ 1.1301, -0.6005, -0.3605, -0.1820]) tensor([0.5982, 0.1060, 0.1347, 0.1611]) -Greedy action tensor([ 0.7540, -0.4745, -0.1108, 0.0250]) tensor([0.4553, 0.1333, 0.1918, 0.2196]) -Greedy action tensor([ 1.3949, -0.6032, -0.2601, 0.1193]) tensor([0.6227, 0.0844, 0.1190, 0.1739]) -Greedy action tensor([ 0.7311, -0.2574, 0.0525, -0.2559]) tensor([0.4440, 0.1652, 0.2253, 0.1655]) -Greedy action tensor([ 0.9591, -0.5412, -0.1048, 0.0623]) tensor([0.5061, 0.1129, 0.1746, 0.2064]) -Greedy action tensor([ 1.4216, -0.7607, -0.4233, 0.4649]) tensor([0.6042, 0.0681, 0.0955, 0.2321]) -Greedy action tensor([ 1.1874, -0.7205, -0.4570, 0.1217]) tensor([0.5931, 0.0880, 0.1145, 0.2043]) -Greedy action tensor([ 1.1257, -0.1062, 0.1253, -0.1873]) tensor([0.5185, 0.1513, 0.1907, 0.1395]) -Greedy action tensor([ 0.7398, -0.4398, 0.0738, 0.0648]) tensor([0.4291, 0.1319, 0.2205, 0.2185]) -Greedy action tensor([ 0.7465, -0.4870, -0.2194, 0.2220]) tensor([0.4417, 0.1287, 0.1681, 0.2614]) -Greedy action tensor([ 1.0489, -0.4621, -0.3728, -0.1540]) tensor([0.5674, 0.1252, 0.1369, 0.1704]) -Greedy action tensor([ 0.5927, -0.4506, 0.2524, -0.2329]) tensor([0.3997, 0.1408, 0.2844, 0.1751]) -Greedy action tensor([ 1.1285, -0.7161, -0.4085, 0.4818]) tensor([0.5272, 0.0833, 0.1133, 0.2761]) -Greedy action tensor([ 0.3681, -0.4406, 0.0914, -0.0961]) tensor([0.3531, 0.1573, 0.2677, 0.2219]) -Greedy action tensor([ 1.1540, -0.3101, -0.0060, -0.2008]) tensor([0.5547, 0.1283, 0.1739, 0.1431]) -Greedy action tensor([ 0.4330, -0.4444, 0.0280, -0.0591]) tensor([0.3712, 0.1544, 0.2476, 0.2269]) -Greedy action tensor([ 0.6610, -0.6525, -0.2152, 0.1495]) tensor([0.4377, 0.1177, 0.1822, 0.2624]) -Greedy action tensor([ 1.3010, -0.7018, -0.6788, 0.2321]) tensor([0.6187, 0.0835, 0.0854, 0.2124]) -Greedy action tensor([ 0.6124, -0.3986, 0.1469, -0.0303]) tensor([0.3972, 0.1445, 0.2494, 0.2089]) -Greedy action tensor([ 0.6238, -0.1170, -0.4670, 0.3557]) tensor([0.3880, 0.1850, 0.1303, 0.2967]) -Greedy action tensor([ 1.6398, -0.3873, -0.6514, 0.0221]) tensor([0.6987, 0.0920, 0.0707, 0.1386]) -Greedy action tensor([ 1.3476, -0.3865, -0.4086, -0.1380]) tensor([0.6347, 0.1121, 0.1096, 0.1437]) -Greedy action tensor([ 0.7505, -0.3446, 0.0299, -0.2630]) tensor([0.4579, 0.1532, 0.2227, 0.1662]) -Greedy action tensor([ 1.3067, -0.5847, -0.3098, -0.1347]) tensor([0.6305, 0.0951, 0.1252, 0.1492]) -Greedy action tensor([ 0.7501, -0.6226, -0.0916, -0.0550]) tensor([0.4692, 0.1189, 0.2022, 0.2097]) -Greedy action tensor([ 0.4515, -0.4627, 0.0486, -0.1416]) tensor([0.3814, 0.1529, 0.2549, 0.2108]) -Greedy action tensor([ 1.1731, -0.6566, -0.4840, 0.8849]) tensor([0.4760, 0.0764, 0.0908, 0.3568]) -Greedy action tensor([ 1.4006, -0.5635, -0.1921, 0.1876]) tensor([0.6094, 0.0855, 0.1239, 0.1812]) -Greedy action tensor([ 1.2909, -0.5723, -0.5469, 0.4559]) tensor([0.5720, 0.0888, 0.0910, 0.2482]) -Greedy action tensor([ 1.4693, -0.6143, -0.3007, -0.1710]) tensor([0.6717, 0.0836, 0.1144, 0.1303]) -Greedy action tensor([ 1.0815, -0.3468, -0.4226, 0.3746]) tensor([0.5115, 0.1226, 0.1137, 0.2523]) -Greedy action tensor([ 0.4769, -0.4625, -0.0636, -0.0030]) tensor([0.3858, 0.1508, 0.2247, 0.2387]) -Greedy action tensor([ 0.9123, -0.5566, -0.3134, 0.1509]) tensor([0.5023, 0.1156, 0.1475, 0.2346]) -Greedy action tensor([ 1.0468, -0.6521, -0.2083, 0.3317]) tensor([0.5110, 0.0934, 0.1457, 0.2499]) -Greedy action tensor([ 0.9138, -0.7099, -0.3944, 0.2480]) tensor([0.5047, 0.0995, 0.1364, 0.2594]) -Greedy action tensor([ 1.6517, -0.7904, -0.2068, 0.1481]) tensor([0.6825, 0.0594, 0.1064, 0.1517]) -Greedy action tensor([ 0.8434, -0.4255, -0.0702, -0.1324]) tensor([0.4856, 0.1365, 0.1948, 0.1830]) -Greedy action tensor([ 0.8249, -0.5001, -0.0212, 0.1054]) tensor([0.4583, 0.1218, 0.1967, 0.2232]) -Greedy action tensor([ 1.2471, -0.4758, -0.2792, -0.0592]) tensor([0.6000, 0.1071, 0.1304, 0.1625]) -Greedy action tensor([ 1.0146, -0.5412, -0.6123, 0.2257]) tensor([0.5371, 0.1133, 0.1056, 0.2440]) -Greedy action tensor([ 1.4056, -0.5042, -0.1109, 0.4078]) tensor([0.5759, 0.0853, 0.1264, 0.2123]) -Greedy action tensor([ 0.7538, -0.3473, -0.2939, -0.1618]) tensor([0.4800, 0.1596, 0.1683, 0.1921]) -Greedy action tensor([ 1.3224, -0.4048, -0.2256, -0.2533]) tensor([0.6261, 0.1113, 0.1331, 0.1295]) -Greedy action tensor([ 0.8897, -0.4312, -0.3964, -0.0288]) tensor([0.5148, 0.1374, 0.1423, 0.2055]) -Greedy action tensor([ 0.9364, -0.5057, -0.2870, 0.3379]) tensor([0.4807, 0.1137, 0.1414, 0.2642]) -Greedy action tensor([ 1.0571, -0.5109, -0.0076, -0.2212]) tensor([0.5459, 0.1138, 0.1882, 0.1520]) -Greedy action tensor([ 0.6652, -0.5284, -0.1800, 0.1862]) tensor([0.4252, 0.1289, 0.1826, 0.2633]) -Greedy action tensor([ 1.1805, -0.5497, -0.4680, 0.2484]) tensor([0.5671, 0.1005, 0.1091, 0.2233]) -Greedy action tensor([ 0.6824, -0.2597, -0.2145, -0.3312]) tensor([0.4628, 0.1804, 0.1888, 0.1680]) -Greedy action tensor([ 1.2632, -0.2885, 0.0483, -0.0950]) tensor([0.5663, 0.1200, 0.1681, 0.1456]) -Greedy action tensor([ 1.0212, -0.4646, -0.2035, 0.3001]) tensor([0.4984, 0.1128, 0.1465, 0.2423]) -Greedy action tensor([ 0.7481, -0.5579, -0.2566, 0.2012]) tensor([0.4513, 0.1223, 0.1653, 0.2612]) -Greedy action tensor([ 1.1236, -0.7558, -0.2652, -0.0702]) tensor([0.5865, 0.0895, 0.1462, 0.1777]) -Greedy action tensor([ 0.9957, -0.5228, -0.0577, -0.2447]) tensor([0.5385, 0.1180, 0.1878, 0.1558]) -Greedy action tensor([ 0.8655, -0.5573, -0.0728, 0.0496]) tensor([0.4820, 0.1162, 0.1886, 0.2132]) -Greedy action tensor([ 1.0992, -0.6461, -0.6593, 0.8803]) tensor([0.4651, 0.0812, 0.0801, 0.3736]) -Greedy action tensor([ 0.9390, -0.5198, -0.1659, 0.1844]) tensor([0.4917, 0.1143, 0.1629, 0.2312]) -Greedy action tensor([ 0.9609, -0.5500, -0.5293, 0.1216]) tensor([0.5325, 0.1175, 0.1200, 0.2300]) -Greedy action tensor([ 1.1280, -0.4276, -0.2057, -0.1698]) tensor([0.5722, 0.1208, 0.1508, 0.1563]) -Greedy action tensor([ 1.3652, -0.6394, -0.2911, 0.1704]) tensor([0.6141, 0.0827, 0.1172, 0.1859]) -Greedy action tensor([ 0.9885, -0.6212, -0.5864, 0.2807]) tensor([0.5264, 0.1053, 0.1090, 0.2594]) -Greedy action tensor([ 1.3871, -0.5945, -0.3926, 0.4559]) tensor([0.5880, 0.0811, 0.0992, 0.2317]) -Greedy action tensor([ 0.5287, -0.0868, -0.0592, -0.2692]) tensor([0.3928, 0.2122, 0.2182, 0.1768]) -Greedy action tensor([ 2.0699, -0.5901, -0.3725, 0.2785]) tensor([0.7555, 0.0528, 0.0657, 0.1260]) -Greedy action tensor([ 0.7218, -0.3771, -0.5461, -0.2757]) tensor([0.5042, 0.1680, 0.1419, 0.1859]) -Greedy action tensor([ 1.3607, -0.5116, -0.0842, 0.2113]) tensor([0.5861, 0.0901, 0.1382, 0.1857]) -Greedy action tensor([ 0.6016, -0.3761, -0.4127, 0.0956]) tensor([0.4270, 0.1606, 0.1549, 0.2575]) -Greedy action tensor([ 0.1306, -0.1374, -0.4282, 0.0994]) tensor([0.3025, 0.2314, 0.1730, 0.2932]) -Greedy action tensor([ 0.6674, -0.4455, 0.0108, -0.3785]) tensor([0.4548, 0.1495, 0.2359, 0.1598]) -Greedy action tensor([ 1.1425, -0.6069, -0.4451, 1.1052]) tensor([0.4270, 0.0743, 0.0873, 0.4114]) -Greedy action tensor([ 0.3574, -0.3757, -0.2364, -0.1020]) tensor([0.3753, 0.1803, 0.2073, 0.2371]) -Greedy action tensor([ 1.0367, -0.6965, -0.6150, 0.8674]) tensor([0.4519, 0.0799, 0.0866, 0.3815]) -Greedy action tensor([ 0.8274, -0.2468, 0.0272, -0.1784]) tensor([0.4637, 0.1584, 0.2083, 0.1696]) -Greedy action tensor([ 1.0025, -0.4833, -0.2631, -0.1281]) tensor([0.5461, 0.1236, 0.1540, 0.1763]) -Greedy action tensor([ 0.6023, -0.3151, -0.3626, 0.3765]) tensor([0.3878, 0.1550, 0.1478, 0.3094]) -Greedy action tensor([ 1.1364, -0.5985, -0.5754, 1.0842]) tensor([0.4336, 0.0765, 0.0783, 0.4116]) -Greedy action tensor([ 1.4152, -0.7370, -0.2777, 0.5213]) tensor([0.5851, 0.0680, 0.1076, 0.2393]) -Greedy action tensor([ 0.8647, -0.6841, -0.4441, 0.2338]) tensor([0.4963, 0.1055, 0.1341, 0.2641]) -Greedy action tensor([ 0.8350, -0.3877, -0.5863, 0.6710]) tensor([0.4193, 0.1235, 0.1012, 0.3559]) -Greedy action tensor([ 0.6686, -0.6135, -0.1532, 0.0542]) tensor([0.4429, 0.1229, 0.1947, 0.2396]) -Greedy action tensor([ 0.8110, -0.3841, -0.4932, -0.0287]) tensor([0.4985, 0.1509, 0.1353, 0.2153]) -Greedy action tensor([-2.0121, -0.8406, 1.1731, 0.5351]) tensor([0.0243, 0.0784, 0.5871, 0.3102]) -Greedy action tensor([-0.8371, -0.5745, 0.1891, 0.2469]) tensor([0.1243, 0.1616, 0.3468, 0.3674]) -Greedy action tensor([-1.8341, -0.9556, 0.2646, -0.4014]) tensor([0.0635, 0.1528, 0.5177, 0.2660]) -Greedy action tensor([-1.9745, -0.6018, 1.1532, 0.3934]) tensor([0.0260, 0.1026, 0.5937, 0.2777]) -Greedy action tensor([-1.6875, -0.2705, 0.1831, -0.4727]) tensor([0.0667, 0.2752, 0.4332, 0.2248]) -Greedy action tensor([-1.2108, -0.5580, 0.4330, 0.7664]) tensor([0.0653, 0.1254, 0.3378, 0.4715]) -Greedy action tensor([-1.6023, -0.3656, 0.5576, 0.2551]) tensor([0.0512, 0.1764, 0.4441, 0.3282]) -Greedy action tensor([-0.8869, -0.5533, 0.1867, 0.3541]) tensor([0.1139, 0.1590, 0.3332, 0.3939]) -Greedy action tensor([-1.7239, -0.6269, 0.1678, -0.4219]) tensor([0.0699, 0.2094, 0.4636, 0.2571]) -Greedy action tensor([-1.7399, -0.4899, 0.5757, 0.0072]) tensor([0.0491, 0.1714, 0.4976, 0.2818]) -Greedy action tensor([-1.7783, -0.4780, 0.5866, -0.0284]) tensor([0.0475, 0.1742, 0.5052, 0.2731]) -Greedy action tensor([-1.4185, -0.5325, 0.4000, 0.1710]) tensor([0.0690, 0.1674, 0.4253, 0.3383]) -Greedy action tensor([-1.8393, -0.9606, 0.2839, -0.4388]) tensor([0.0632, 0.1522, 0.5282, 0.2564]) -Greedy action tensor([-1.5651, -0.6286, 0.4210, -0.2917]) tensor([0.0694, 0.1770, 0.5057, 0.2479]) -Greedy action tensor([-2.0191, -0.7738, 0.5773, -0.0547]) tensor([0.0400, 0.1388, 0.5362, 0.2850]) -Greedy action tensor([-1.8212, -0.8153, 0.1524, -0.3653]) tensor([0.0657, 0.1797, 0.4729, 0.2818]) -Greedy action tensor([-1.7554, -0.5447, 0.5186, -0.0220]) tensor([0.0507, 0.1701, 0.4925, 0.2868]) -Greedy action tensor([-1.7785, -0.5571, 0.2956, -0.2036]) tensor([0.0582, 0.1974, 0.4632, 0.2812]) -Greedy action tensor([-1.9448, -0.7138, 0.8198, -0.0183]) tensor([0.0368, 0.1261, 0.5843, 0.2528]) -Greedy action tensor([-1.6921, -0.7481, 0.2579, -0.3134]) tensor([0.0686, 0.1764, 0.4825, 0.2725]) -Greedy action tensor([-1.6990, -0.4359, 0.0371, -0.4080]) tensor([0.0722, 0.2554, 0.4098, 0.2626]) -Greedy action tensor([-1.6686, -0.5305, 0.7660, -0.1063]) tensor([0.0493, 0.1537, 0.5621, 0.2349]) -Greedy action tensor([-1.1763, -0.5822, 0.4375, 0.2786]) tensor([0.0825, 0.1495, 0.4144, 0.3536]) -Greedy action tensor([-1.7016, -0.8931, 0.5968, -0.2101]) tensor([0.0567, 0.1272, 0.5643, 0.2518]) -Greedy action tensor([-0.6672, -0.5544, 0.1648, 0.2920]) tensor([0.1423, 0.1593, 0.3270, 0.3714]) -Greedy action tensor([-1.7942, -0.8213, 0.1850, -0.3817]) tensor([0.0667, 0.1765, 0.4828, 0.2740]) -Greedy action tensor([-1.6926, -0.7233, 0.2641, -0.5224]) tensor([0.0718, 0.1892, 0.5078, 0.2313]) -Greedy action tensor([-1.7706, -0.4969, 0.5930, -0.1013]) tensor([0.0488, 0.1742, 0.5182, 0.2588]) -Greedy action tensor([-1.3954, -0.5840, 0.0116, -0.3427]) tensor([0.0980, 0.2207, 0.4003, 0.2809]) -Greedy action tensor([-1.1734, -0.6321, 0.5957, 0.0494]) tensor([0.0835, 0.1434, 0.4896, 0.2835]) -Greedy action tensor([-1.2868, -0.5731, 0.3425, 0.2342]) tensor([0.0786, 0.1605, 0.4010, 0.3599]) -Greedy action tensor([-1.6666, -0.3851, 0.6349, 0.3150]) tensor([0.0458, 0.1649, 0.4572, 0.3321]) -Greedy action tensor([-0.6583, -0.5658, 0.2657, 0.2665]) tensor([0.1401, 0.1537, 0.3530, 0.3533]) -Greedy action tensor([-0.7699, -0.5981, 0.2316, 0.3138]) tensor([0.1271, 0.1510, 0.3461, 0.3758]) -Greedy action tensor([-1.8258, -0.4502, 0.8483, 0.3168]) tensor([0.0357, 0.1414, 0.5182, 0.3046]) -Greedy action tensor([-1.0129, -0.3220, 0.5747, -0.5678]) tensor([0.1058, 0.2112, 0.5178, 0.1652]) -Greedy action tensor([-1.7296, -0.4903, 0.6140, 0.0997]) tensor([0.0474, 0.1636, 0.4937, 0.2952]) -Greedy action tensor([-2.0326, -0.8259, 1.4171, 0.7574]) tensor([0.0192, 0.0641, 0.6043, 0.3124]) -Greedy action tensor([-1.3658, -0.3618, 0.4313, -0.3384]) tensor([0.0796, 0.2174, 0.4805, 0.2225]) -Greedy action tensor([-1.8931, -0.4735, 1.1063, 0.6187]) tensor([0.0266, 0.1102, 0.5348, 0.3284]) -Greedy action tensor([-1.3411, -0.6034, 0.8064, 0.9349]) tensor([0.0467, 0.0978, 0.4003, 0.4552]) -Greedy action tensor([-1.9529, -0.8957, 0.3183, -0.2798]) tensor([0.0529, 0.1523, 0.5128, 0.2820]) -Greedy action tensor([-1.6139, -0.9201, 0.1985, -0.5709]) tensor([0.0836, 0.1673, 0.5120, 0.2372]) -Greedy action tensor([-1.6167, -0.6248, 0.7346, 0.0221]) tensor([0.0517, 0.1394, 0.5428, 0.2662]) -Greedy action tensor([-1.4301, -0.5999, 0.5321, 0.2601]) tensor([0.0632, 0.1449, 0.4495, 0.3424]) -Greedy action tensor([-1.3350, -0.5243, 0.3483, 0.2535]) tensor([0.0739, 0.1663, 0.3979, 0.3619]) -Greedy action tensor([-1.7004, -0.5724, 0.5754, -0.0734]) tensor([0.0529, 0.1634, 0.5147, 0.2690]) -Greedy action tensor([-1.7877, -0.4326, 0.5850, -0.0745]) tensor([0.0473, 0.1833, 0.5071, 0.2623]) -Greedy action tensor([-0.8834, 0.0267, 0.2718, -0.1833]) tensor([0.1153, 0.2865, 0.3660, 0.2322]) -Greedy action tensor([-1.9612, -0.8278, 0.1104, -0.2818]) tensor([0.0575, 0.1785, 0.4560, 0.3081]) -Greedy action tensor([-1.2169, -0.4654, 0.3427, 0.3777]) tensor([0.0781, 0.1656, 0.3715, 0.3848]) -Greedy action tensor([-1.5235, -0.5697, 0.5344, 0.3070]) tensor([0.0566, 0.1470, 0.4433, 0.3531]) -Greedy action tensor([-2.0166, -0.8540, 0.4352, -0.1347]) tensor([0.0447, 0.1429, 0.5189, 0.2935]) -Greedy action tensor([-1.4483, -0.3789, 0.8238, 0.9332]) tensor([0.0409, 0.1192, 0.3970, 0.4429]) -Greedy action tensor([-1.9719, -0.9524, 0.4249, -0.2159]) tensor([0.0487, 0.1349, 0.5347, 0.2817]) -Greedy action tensor([-1.8119, -0.7883, 0.1146, -0.3706]) tensor([0.0672, 0.1871, 0.4615, 0.2841]) -Greedy action tensor([-1.1332, -0.3203, 0.1418, -0.3340]) tensor([0.1104, 0.2489, 0.3951, 0.2455]) -Greedy action tensor([-1.8154, -0.7856, 0.3798, -0.2277]) tensor([0.0566, 0.1584, 0.5082, 0.2768]) -Greedy action tensor([-1.5758, -0.5086, 0.4715, 0.0312]) tensor([0.0601, 0.1747, 0.4655, 0.2997]) -Greedy action tensor([-1.6043, -0.5467, 0.4877, 0.0245]) tensor([0.0586, 0.1686, 0.4743, 0.2985]) -Greedy action tensor([-0.9036, -0.4207, 0.4232, 1.1891]) tensor([0.0690, 0.1118, 0.2600, 0.5592]) -Greedy action tensor([-1.9147, -0.5936, 0.8719, 0.3920]) tensor([0.0322, 0.1208, 0.5232, 0.3237]) -Greedy action tensor([-2.0079, -0.9488, 0.5031, -0.1826]) tensor([0.0446, 0.1287, 0.5497, 0.2769]) -Greedy action tensor([-1.4261, -0.6348, 0.4302, 0.0633]) tensor([0.0712, 0.1571, 0.4558, 0.3158]) -Greedy action tensor([-0.6051, -0.5639, 0.2969, 0.3040]) tensor([0.1431, 0.1491, 0.3526, 0.3552]) -Greedy action tensor([-2.0381, -0.9040, 0.4427, -0.1297]) tensor([0.0439, 0.1363, 0.5241, 0.2957]) -Greedy action tensor([-1.9580, -0.5955, 0.9018, 0.1320]) tensor([0.0328, 0.1283, 0.5734, 0.2655]) -Greedy action tensor([-1.8159, -0.7017, 0.2413, -0.3207]) tensor([0.0612, 0.1866, 0.4791, 0.2731]) -Greedy action tensor([-1.1326, -0.5769, 0.2365, 0.2990]) tensor([0.0921, 0.1605, 0.3620, 0.3854]) -Greedy action tensor([-1.7793, -0.5347, 1.1240, 0.6327]) tensor([0.0295, 0.1025, 0.5385, 0.3295]) -Greedy action tensor([-0.8546, -0.2651, 0.3455, -0.1078]) tensor([0.1215, 0.2190, 0.4033, 0.2563]) -Greedy action tensor([-1.9930, -0.8959, 0.7080, -0.0188]) tensor([0.0383, 0.1148, 0.5709, 0.2760]) -Greedy action tensor([-1.9355, -0.8051, 0.2439, -0.2507]) tensor([0.0546, 0.1690, 0.4823, 0.2941]) -Greedy action tensor([-1.8859, -0.6707, 0.2079, -0.2839]) tensor([0.0573, 0.1932, 0.4651, 0.2844]) -Greedy action tensor([-1.3610, -1.0493, 0.0730, -0.7591]) tensor([0.1192, 0.1628, 0.5002, 0.2177]) -Greedy action tensor([-1.8759, -0.6579, 0.8524, 0.1813]) tensor([0.0363, 0.1229, 0.5564, 0.2844]) -Greedy action tensor([-1.3863, -0.5474, 0.4740, 0.4387]) tensor([0.0627, 0.1451, 0.4031, 0.3891]) -Greedy action tensor([-1.7216, -0.4923, 1.2842, 0.9573]) tensor([0.0255, 0.0872, 0.5155, 0.3717]) -Greedy action tensor([-1.8258, -0.9048, 0.0874, -0.3823]) tensor([0.0689, 0.1730, 0.4665, 0.2917]) -Greedy action tensor([-0.2096, -0.2587, 0.3122, 0.6897]) tensor([0.1641, 0.1562, 0.2765, 0.4032]) -Greedy action tensor([-0.8855, -0.3713, -0.5145, -0.3232]) tensor([0.1702, 0.2846, 0.2466, 0.2986]) -Greedy action tensor([ 0.5759, 0.3640, -0.1714, 0.7075]) tensor([0.2921, 0.2363, 0.1384, 0.3332]) -Greedy action tensor([ 1.2880, -0.3291, 0.1403, -0.0090]) tensor([0.5589, 0.1109, 0.1774, 0.1528]) -Greedy action tensor([ 1.4460, -0.9174, 0.0784, 1.3187]) tensor([0.4486, 0.0422, 0.1143, 0.3950]) -Greedy action tensor([ 1.6733, -0.4136, 1.6997, 0.6782]) tensor([0.3967, 0.0492, 0.4074, 0.1467]) -Greedy action tensor([1.0355, 0.0640, 0.2410, 1.1759]) tensor([0.3355, 0.1270, 0.1516, 0.3860]) -Greedy action tensor([ 1.9586, -0.2861, 0.1881, 1.5672]) tensor([0.5122, 0.0543, 0.0872, 0.3463]) -Greedy action tensor([ 1.2608, -1.3396, 0.1474, 1.1366]) tensor([0.4375, 0.0325, 0.1437, 0.3864]) -Greedy action tensor([ 1.5140, -0.1716, -0.0575, 1.9914]) tensor([0.3328, 0.0617, 0.0691, 0.5364]) -Greedy action tensor([1.2479, 0.1414, 1.0263, 1.0462]) tensor([0.3391, 0.1121, 0.2717, 0.2771]) -Greedy action tensor([ 0.0647, -0.2840, 1.4366, 0.4401]) tensor([0.1408, 0.0993, 0.5550, 0.2049]) -Greedy action tensor([1.0471, 1.0170, 0.2966, 0.5589]) tensor([0.3272, 0.3175, 0.1545, 0.2008]) -Greedy action tensor([0.8413, 0.0303, 0.4961, 0.5078]) tensor([0.3486, 0.1549, 0.2468, 0.2497]) -Greedy action tensor([ 1.5683, -0.2345, 1.6172, 0.7110]) tensor([0.3789, 0.0625, 0.3979, 0.1608]) -Greedy action tensor([ 1.6388, -0.2212, -0.3510, 0.8954]) tensor([0.5656, 0.0881, 0.0773, 0.2690]) -Greedy action tensor([ 0.8802, -1.4409, -0.7911, 0.5777]) tensor([0.4938, 0.0485, 0.0928, 0.3649]) -Greedy action tensor([ 1.7532, -0.9437, 0.0134, 0.6080]) tensor([0.6406, 0.0432, 0.1124, 0.2038]) -Greedy action tensor([ 1.2816, -0.1624, -1.2901, 1.4191]) tensor([0.4065, 0.0959, 0.0311, 0.4665]) -Greedy action tensor([ 0.7661, -0.4779, 0.2658, 0.9667]) tensor([0.3209, 0.0925, 0.1945, 0.3921]) -Greedy action tensor([ 0.6530, -0.2025, -0.7231, 1.2161]) tensor([0.2912, 0.1238, 0.0736, 0.5114]) -Greedy action tensor([ 0.8499, -0.2016, 0.4449, 0.5642]) tensor([0.3613, 0.1262, 0.2410, 0.2715]) -Greedy action tensor([ 0.9380, 0.4124, -0.4737, 0.8377]) tensor([0.3650, 0.2158, 0.0890, 0.3302]) -Greedy action tensor([ 1.7491, 0.5353, -0.6121, 0.5159]) tensor([0.5943, 0.1765, 0.0560, 0.1731]) -Greedy action tensor([ 1.0242, -0.1115, 0.8474, 1.3380]) tensor([0.2835, 0.0910, 0.2375, 0.3880]) -Greedy action tensor([ 1.0485, -0.1101, 1.3570, 0.1836]) tensor([0.3230, 0.1014, 0.4397, 0.1360]) -Greedy action tensor([ 0.7398, -0.5774, 0.3437, -0.2205]) tensor([0.4304, 0.1153, 0.2896, 0.1647]) -Greedy action tensor([ 0.3874, -0.1836, 0.3069, 0.1844]) tensor([0.3027, 0.1710, 0.2793, 0.2471]) -Greedy action tensor([ 1.4866, 0.0234, -0.6145, 1.1044]) tensor([0.4911, 0.1137, 0.0601, 0.3351]) -Greedy action tensor([ 1.1052, 0.0632, -0.6120, 0.6370]) tensor([0.4633, 0.1634, 0.0832, 0.2901]) -Greedy action tensor([ 1.2631, 0.0272, -0.0570, 1.7013]) tensor([0.3218, 0.0935, 0.0860, 0.4988]) -Greedy action tensor([ 1.4708, -0.4076, 1.1222, 0.2900]) tensor([0.4618, 0.0706, 0.3259, 0.1418]) -Greedy action tensor([ 0.5331, -0.7785, 1.1776, 0.3474]) tensor([0.2497, 0.0673, 0.4757, 0.2074]) -Greedy action tensor([ 1.4222, 0.1502, -0.3667, 0.8002]) tensor([0.5040, 0.1412, 0.0842, 0.2706]) -Greedy action tensor([1.5774, 0.1926, 0.2409, 1.0863]) tensor([0.4706, 0.1178, 0.1237, 0.2880]) -Greedy action tensor([ 0.3729, -1.0578, 0.3941, 0.4715]) tensor([0.2972, 0.0711, 0.3036, 0.3281]) -Greedy action tensor([ 0.1723, 1.1617, -0.0309, 0.7939]) tensor([0.1570, 0.4224, 0.1282, 0.2924]) -Greedy action tensor([ 1.8087, -0.4558, 1.3487, 1.0760]) tensor([0.4513, 0.0469, 0.2849, 0.2169]) -Greedy action tensor([ 1.3912, 0.1160, -0.4384, 0.7333]) tensor([0.5108, 0.1427, 0.0820, 0.2646]) -Greedy action tensor([0.7001, 0.3701, 0.2080, 0.1352]) tensor([0.3450, 0.2480, 0.2109, 0.1961]) -Greedy action tensor([-0.1391, 0.7113, -0.4029, 0.7769]) tensor([0.1513, 0.3542, 0.1162, 0.3782]) -Greedy action tensor([0.3754, 0.1125, 0.2037, 0.0070]) tensor([0.3028, 0.2328, 0.2550, 0.2095]) -Greedy action tensor([0.4534, 0.3110, 0.6034, 1.2166]) tensor([0.1933, 0.1676, 0.2245, 0.4146]) -Greedy action tensor([ 0.8682, 1.0893, -0.1984, 0.0034]) tensor([0.3319, 0.4141, 0.1142, 0.1398]) -Greedy action tensor([ 0.7786, -0.3414, -0.4180, 1.6084]) tensor([0.2550, 0.0832, 0.0771, 0.5847]) -Greedy action tensor([1.7409, 0.1216, 0.4931, 1.8101]) tensor([0.3911, 0.0775, 0.1123, 0.4191]) -Greedy action tensor([1.1118, 0.5348, 0.3715, 0.2517]) tensor([0.4062, 0.2281, 0.1937, 0.1719]) -Greedy action tensor([ 0.6487, 0.1488, -0.3078, 1.0784]) tensor([0.2835, 0.1719, 0.1089, 0.4356]) -Greedy action tensor([ 1.5480, -0.0457, 0.8047, 0.9645]) tensor([0.4471, 0.0908, 0.2126, 0.2495]) -Greedy action tensor([0.8652, 0.1022, 0.4728, 0.1404]) tensor([0.3808, 0.1775, 0.2572, 0.1845]) -Greedy action tensor([ 0.9797, -0.5761, 0.9683, -0.0585]) tensor([0.3916, 0.0826, 0.3871, 0.1387]) -Greedy action tensor([ 1.3135, -0.2442, -0.9899, 1.6673]) tensor([0.3656, 0.0770, 0.0365, 0.5208]) -Greedy action tensor([ 1.2095, -1.4406, 0.5263, 0.4159]) tensor([0.4931, 0.0348, 0.2490, 0.2230]) -Greedy action tensor([ 0.3641, 0.4591, 0.2505, -0.6495]) tensor([0.2980, 0.3277, 0.2661, 0.1082]) -Greedy action tensor([ 0.6346, -0.7323, -0.4764, 1.2897]) tensor([0.2849, 0.0726, 0.0938, 0.5486]) -Greedy action tensor([ 0.3617, 0.7031, -0.5430, 0.7477]) tensor([0.2335, 0.3285, 0.0945, 0.3435]) -Greedy action tensor([ 1.5796, 0.3185, -0.3724, 1.2473]) tensor([0.4667, 0.1322, 0.0663, 0.3348]) -Greedy action tensor([ 0.8114, 0.7776, -0.6443, 0.6667]) tensor([0.3262, 0.3154, 0.0761, 0.2823]) -Greedy action tensor([0.7167, 0.3119, 1.2332, 0.1144]) tensor([0.2570, 0.1715, 0.4308, 0.1407]) -Greedy action tensor([ 0.8862, -0.3463, 0.1390, 1.6827]) tensor([0.2511, 0.0732, 0.1189, 0.5568]) -Greedy action tensor([1.1007, 0.2040, 1.2042, 0.3592]) tensor([0.3341, 0.1363, 0.3705, 0.1591]) -Greedy action tensor([ 0.7322, 0.5457, -0.5924, 0.0039]) tensor([0.3878, 0.3218, 0.1031, 0.1872]) -Greedy action tensor([ 1.0819, 0.9037, -0.0341, 0.7111]) tensor([0.3503, 0.2931, 0.1148, 0.2418]) -Greedy action tensor([1.0278, 0.3053, 0.2245, 1.0373]) tensor([0.3398, 0.1650, 0.1522, 0.3430]) -Greedy action tensor([ 1.7277, 0.0032, -0.5991, 1.5275]) tensor([0.4775, 0.0851, 0.0466, 0.3908]) -Greedy action tensor([ 0.4536, -0.9495, 0.0201, 0.7696]) tensor([0.3062, 0.0753, 0.1985, 0.4200]) -Greedy action tensor([ 0.7117, 0.4693, -0.6993, 1.1779]) tensor([0.2760, 0.2166, 0.0673, 0.4400]) -Greedy action tensor([ 1.2641, 0.4589, -0.3009, 1.1719]) tensor([0.3894, 0.1741, 0.0814, 0.3551]) -Greedy action tensor([ 1.0268, -0.2347, 0.0606, 0.2099]) tensor([0.4749, 0.1345, 0.1807, 0.2098]) -Greedy action tensor([ 1.7273, -0.8274, 1.4373, 0.8538]) tensor([0.4457, 0.0346, 0.3335, 0.1861]) -Greedy action tensor([0.8473, 0.4219, 0.2704, 0.4634]) tensor([0.3453, 0.2256, 0.1939, 0.2352]) -Greedy action tensor([ 0.3587, 0.0397, -0.1771, 1.0412]) tensor([0.2331, 0.1694, 0.1364, 0.4612]) -Greedy action tensor([ 1.1111, -0.3897, -0.2604, 0.2754]) tensor([0.5235, 0.1167, 0.1328, 0.2270]) -Greedy action tensor([ 0.9670, -0.2278, 0.6285, 2.0415]) tensor([0.2023, 0.0612, 0.1442, 0.5923]) -Greedy action tensor([ 0.6297, -0.2474, 0.7719, -0.0701]) tensor([0.3262, 0.1357, 0.3761, 0.1620]) -Greedy action tensor([ 0.5660, 0.2339, -0.5659, 0.4719]) tensor([0.3390, 0.2432, 0.1093, 0.3085]) -Greedy action tensor([ 0.8224, -0.5232, -0.8657, 1.2365]) tensor([0.3380, 0.0880, 0.0625, 0.5115]) -Greedy action tensor([1.1334, 0.1228, 0.7757, 1.1742]) tensor([0.3221, 0.1172, 0.2252, 0.3355]) -Greedy action tensor([ 1.2627, -0.3659, 0.7335, 1.2586]) tensor([0.3596, 0.0705, 0.2118, 0.3581]) -Greedy action tensor([-0.0258, 1.5025, -0.1034, 0.0852]) tensor([0.1307, 0.6024, 0.1209, 0.1460]) -Greedy action tensor([ 1.3932, -0.0973, 0.0450, 1.0683]) tensor([0.4530, 0.1020, 0.1176, 0.3273]) -Greedy action tensor([ 0.4069, -0.0867, -0.1765, 0.6569]) tensor([0.2897, 0.1768, 0.1616, 0.3719]) -Greedy action tensor([0.3945, 0.1456, 0.3428, 0.4093]) tensor([0.2671, 0.2082, 0.2536, 0.2711]) -Greedy action tensor([ 1.1383, -0.5405, 0.7553, 1.5862]) tensor([0.2912, 0.0543, 0.1986, 0.4558]) -Greedy action tensor([ 1.4001, -0.8644, -0.2503, 0.2067]) tensor([0.6254, 0.0650, 0.1200, 0.1896]) -Greedy action tensor([ 1.0270, -0.1328, -0.1187, -0.1695]) tensor([0.5171, 0.1621, 0.1644, 0.1563]) -Greedy action tensor([ 0.6938, -0.3485, -0.1780, -0.2756]) tensor([0.4651, 0.1640, 0.1945, 0.1764]) -Greedy action tensor([ 0.9278, -0.4674, -0.2370, 0.0268]) tensor([0.5087, 0.1260, 0.1587, 0.2066]) -Greedy action tensor([ 1.2947, -0.4869, -0.1903, -0.1920]) tensor([0.6169, 0.1039, 0.1397, 0.1395]) -Greedy action tensor([ 0.4699, -0.2955, -0.5220, -0.1694]) tensor([0.4231, 0.1968, 0.1569, 0.2232]) -Greedy action tensor([ 0.7510, -0.3760, -0.2304, 0.0512]) tensor([0.4555, 0.1476, 0.1707, 0.2262]) -Greedy action tensor([ 0.7909, -0.7525, -0.2668, 0.2524]) tensor([0.4663, 0.0996, 0.1619, 0.2722]) -Greedy action tensor([ 1.0300, -0.5420, -0.0987, 0.1151]) tensor([0.5177, 0.1075, 0.1674, 0.2074]) -Greedy action tensor([ 0.9327, -0.3716, -0.7689, 0.4228]) tensor([0.4868, 0.1321, 0.0888, 0.2923]) -Greedy action tensor([ 1.0760, -0.5482, -0.4351, 0.1943]) tensor([0.5459, 0.1076, 0.1205, 0.2261]) -Greedy action tensor([ 1.6308, -0.3190, -0.3844, 0.4205]) tensor([0.6354, 0.0904, 0.0847, 0.1894]) -Greedy action tensor([ 0.4007, -0.3346, 0.1756, -0.0720]) tensor([0.3447, 0.1652, 0.2752, 0.2149]) -Greedy action tensor([ 0.4317, -0.1779, 0.1737, -0.1724]) tensor([0.3493, 0.1899, 0.2699, 0.1909]) -Greedy action tensor([ 1.1430, -0.4107, -0.0269, -0.2149]) tensor([0.5621, 0.1189, 0.1745, 0.1446]) -Greedy action tensor([ 0.4598, -0.0559, -0.5142, -0.2931]) tensor([0.4089, 0.2441, 0.1544, 0.1926]) -Greedy action tensor([ 0.8612, -0.4219, -0.1607, 0.2509]) tensor([0.4587, 0.1271, 0.1651, 0.2491]) -Greedy action tensor([ 1.4100, -0.4801, -0.2568, -0.0814]) tensor([0.6390, 0.0965, 0.1207, 0.1438]) -Greedy action tensor([ 0.4145, -0.2329, -0.3039, 0.1048]) tensor([0.3643, 0.1907, 0.1776, 0.2673]) -Greedy action tensor([ 0.3040, -0.0314, -0.6363, 0.0967]) tensor([0.3427, 0.2450, 0.1338, 0.2785]) -Greedy action tensor([ 1.2301, -0.5191, -0.1079, -0.1800]) tensor([0.5951, 0.1035, 0.1561, 0.1453]) -Greedy action tensor([ 0.1751, -0.0010, -0.2199, -0.0682]) tensor([0.3034, 0.2544, 0.2044, 0.2379]) -Greedy action tensor([ 0.6983, -0.3181, -0.0161, -0.2516]) tensor([0.4468, 0.1617, 0.2187, 0.1728]) -Greedy action tensor([ 0.7979, -0.6915, -0.3967, 0.2136]) tensor([0.4794, 0.1081, 0.1452, 0.2673]) -Greedy action tensor([ 1.2360, -0.4245, -0.1806, 0.0268]) tensor([0.5777, 0.1098, 0.1401, 0.1724]) -Greedy action tensor([ 0.9870, -0.4194, -0.4057, 0.4224]) tensor([0.4850, 0.1188, 0.1205, 0.2757]) -Greedy action tensor([ 0.6683, -0.3576, 0.1138, -0.2211]) tensor([0.4267, 0.1529, 0.2451, 0.1753]) -Greedy action tensor([ 0.7998, -0.2598, -0.5378, 0.6621]) tensor([0.4032, 0.1397, 0.1058, 0.3513]) -Greedy action tensor([ 1.2128, -0.6853, -0.2434, -0.1440]) tensor([0.6096, 0.0913, 0.1421, 0.1570]) -Greedy action tensor([ 1.1500, -0.6128, -0.5504, 0.2330]) tensor([0.5702, 0.0978, 0.1041, 0.2279]) -Greedy action tensor([ 0.9109, -0.3260, -0.2372, 0.3945]) tensor([0.4537, 0.1317, 0.1439, 0.2707]) -Greedy action tensor([ 1.6418, -0.8082, -0.6054, 0.2418]) tensor([0.6951, 0.0600, 0.0735, 0.1714]) -Greedy action tensor([ 0.9486, -0.3512, -0.2986, 0.0616]) tensor([0.5071, 0.1382, 0.1457, 0.2089]) -Greedy action tensor([ 0.3870, -0.2927, -0.2593, 0.1596]) tensor([0.3537, 0.1792, 0.1853, 0.2817]) -Greedy action tensor([ 1.6275, -0.5380, -0.3283, 0.1157]) tensor([0.6772, 0.0777, 0.0958, 0.1493]) -Greedy action tensor([ 1.0572, -0.3534, -0.3983, 0.4964]) tensor([0.4883, 0.1191, 0.1139, 0.2787]) -Greedy action tensor([ 1.3245, -0.4813, -0.3788, -0.0979]) tensor([0.6299, 0.1035, 0.1147, 0.1519]) -Greedy action tensor([ 0.9614, -0.6171, -0.2439, 0.0482]) tensor([0.5243, 0.1082, 0.1571, 0.2104]) -Greedy action tensor([ 0.7602, -0.5382, -0.1746, 0.0428]) tensor([0.4643, 0.1267, 0.1823, 0.2266]) -Greedy action tensor([ 0.7907, -0.4641, -0.6489, 0.1632]) tensor([0.4864, 0.1387, 0.1153, 0.2597]) -Greedy action tensor([ 0.8409, -0.4496, -0.3328, -0.0917]) tensor([0.5056, 0.1391, 0.1563, 0.1990]) -Greedy action tensor([ 0.5896, -0.4504, -0.1198, -0.0176]) tensor([0.4184, 0.1479, 0.2058, 0.2279]) -Greedy action tensor([ 1.1770, -0.3082, -0.3612, -0.1371]) tensor([0.5848, 0.1324, 0.1256, 0.1571]) -Greedy action tensor([ 0.6384, -0.5241, 0.0440, -0.2173]) tensor([0.4368, 0.1366, 0.2410, 0.1856]) -Greedy action tensor([ 0.6607, -0.5202, 0.0986, -0.1216]) tensor([0.4284, 0.1315, 0.2442, 0.1959]) -Greedy action tensor([ 1.0156, -0.7407, -0.6057, 0.8846]) tensor([0.4449, 0.0768, 0.0879, 0.3903]) -Greedy action tensor([ 0.6123, -0.3796, -0.2298, 0.2682]) tensor([0.3983, 0.1477, 0.1716, 0.2824]) -Greedy action tensor([ 0.4630, -0.3514, -0.3608, -0.1543]) tensor([0.4130, 0.1829, 0.1812, 0.2228]) -Greedy action tensor([ 1.0786, -0.6528, -0.2727, 0.0410]) tensor([0.5586, 0.0989, 0.1446, 0.1979]) -Greedy action tensor([ 0.8139, -0.5482, -0.0111, 0.0486]) tensor([0.4630, 0.1186, 0.2029, 0.2154]) -Greedy action tensor([ 1.4878, -0.4802, -0.3106, 0.0918]) tensor([0.6440, 0.0900, 0.1066, 0.1594]) -Greedy action tensor([ 1.0225, -0.5785, -0.7336, 0.4749]) tensor([0.5121, 0.1033, 0.0885, 0.2962]) -Greedy action tensor([ 0.5448, -0.2241, -0.3335, -0.1748]) tensor([0.4227, 0.1959, 0.1756, 0.2058]) -Greedy action tensor([ 0.9847, -0.1174, 0.0999, -0.2515]) tensor([0.4913, 0.1632, 0.2028, 0.1427]) -Greedy action tensor([ 1.3496, -0.8082, -0.3408, -0.0466]) tensor([0.6462, 0.0747, 0.1192, 0.1600]) -Greedy action tensor([ 0.9767, -0.2932, 0.0952, -0.2840]) tensor([0.5054, 0.1420, 0.2093, 0.1433]) -Greedy action tensor([ 0.9770, -0.6540, -0.3542, 0.2531]) tensor([0.5142, 0.1007, 0.1358, 0.2493]) -Greedy action tensor([ 0.9261, -0.5252, 0.0642, -0.2640]) tensor([0.5100, 0.1195, 0.2154, 0.1551]) -Greedy action tensor([ 1.1346, -0.2843, -0.3960, 0.1836]) tensor([0.5421, 0.1312, 0.1173, 0.2094]) -Greedy action tensor([ 0.7919, -0.5427, -0.3323, 0.5671]) tensor([0.4190, 0.1103, 0.1361, 0.3346]) -Greedy action tensor([ 0.8009, -0.3378, -0.2357, 0.4070]) tensor([0.4256, 0.1363, 0.1510, 0.2871]) -Greedy action tensor([ 1.0436, -0.4867, -0.4255, 0.0270]) tensor([0.5530, 0.1197, 0.1273, 0.2001]) -Greedy action tensor([ 1.1766, -0.6155, -0.2717, -0.0508]) tensor([0.5901, 0.0983, 0.1387, 0.1729]) -Greedy action tensor([ 1.0315, -0.0478, -0.2237, -0.3445]) tensor([0.5326, 0.1810, 0.1518, 0.1345]) -Greedy action tensor([ 1.2062, -0.5657, 0.1419, 0.2254]) tensor([0.5291, 0.0900, 0.1825, 0.1984]) -Greedy action tensor([ 1.0393, -0.5507, -0.0516, -0.1852]) tensor([0.5453, 0.1112, 0.1832, 0.1603]) -Greedy action tensor([ 1.2025, -0.4760, -0.2730, -0.0866]) tensor([0.5914, 0.1104, 0.1352, 0.1629]) -Greedy action tensor([ 0.7512, -0.3134, -0.0631, -0.0393]) tensor([0.4462, 0.1539, 0.1976, 0.2024]) -Greedy action tensor([ 0.6498, -0.3497, -0.1664, -0.2017]) tensor([0.4470, 0.1645, 0.1976, 0.1908]) -Greedy action tensor([ 1.0713, -0.3320, -0.5150, -0.4883]) tensor([0.6022, 0.1480, 0.1233, 0.1266]) -Greedy action tensor([ 1.0743, -0.4693, -0.2276, -0.0439]) tensor([0.5517, 0.1179, 0.1501, 0.1803]) -Greedy action tensor([ 1.2468, -0.8192, -0.3937, 0.3172]) tensor([0.5830, 0.0739, 0.1130, 0.2301]) -Greedy action tensor([ 1.2677, -0.6556, -0.2721, 0.0820]) tensor([0.6002, 0.0877, 0.1287, 0.1834]) -Greedy action tensor([ 0.8138, -0.3082, -0.4055, 0.0140]) tensor([0.4830, 0.1573, 0.1427, 0.2171]) -Greedy action tensor([ 0.8651, -0.5786, -0.0856, 0.0427]) tensor([0.4850, 0.1145, 0.1874, 0.2131]) -Greedy action tensor([ 0.6705, -0.2265, -0.4335, -0.0580]) tensor([0.4500, 0.1835, 0.1492, 0.2172]) -Greedy action tensor([ 0.8005, -0.4937, -0.0581, -0.0119]) tensor([0.4669, 0.1280, 0.1979, 0.2072]) -Greedy action tensor([ 0.4030, -0.1738, -0.1066, -0.1376]) tensor([0.3643, 0.2046, 0.2189, 0.2122]) -Greedy action tensor([ 1.3007, -0.6473, -0.2454, 0.2288]) tensor([0.5889, 0.0840, 0.1255, 0.2016]) -Greedy action tensor([ 1.5572, -0.5779, -0.2897, -0.1008]) tensor([0.6819, 0.0806, 0.1076, 0.1299]) -Greedy action tensor([ 0.8454, -0.2933, -0.1231, -0.1809]) tensor([0.4859, 0.1556, 0.1845, 0.1741]) -Greedy action tensor([ 0.4264, 0.1645, 0.0889, -0.2745]) tensor([0.3356, 0.2583, 0.2395, 0.1665]) -Greedy action tensor([ 0.3287, 0.0935, -0.0126, -0.1508]) tensor([0.3205, 0.2533, 0.2278, 0.1984]) -Greedy action tensor([ 0.3888, -0.0798, 0.1833, -0.2862]) tensor([0.3391, 0.2122, 0.2761, 0.1726]) -Greedy action tensor([ 0.5576, -0.0682, -0.0394, -0.1819]) tensor([0.3902, 0.2087, 0.2148, 0.1863]) -Greedy action tensor([ 0.1639, 0.0126, 0.1319, -0.1636]) tensor([0.2818, 0.2422, 0.2729, 0.2031]) -Greedy action tensor([ 0.3083, -0.0897, 0.0557, -0.1528]) tensor([0.3248, 0.2181, 0.2523, 0.2048]) -Greedy action tensor([ 0.3918, 0.1942, 0.1758, -0.1572]) tensor([0.3121, 0.2561, 0.2515, 0.1803]) -Greedy action tensor([ 0.4501, 0.0511, -0.0600, -0.1396]) tensor([0.3539, 0.2374, 0.2125, 0.1962]) -Greedy action tensor([ 0.2259, 0.1476, 0.1105, -0.2767]) tensor([0.2923, 0.2703, 0.2605, 0.1769]) -Greedy action tensor([ 0.3489, -0.3078, -0.1661, -0.3724]) tensor([0.3843, 0.1993, 0.2296, 0.1868]) -Greedy action tensor([ 0.4307, 0.0947, 0.0549, -0.2454]) tensor([0.3437, 0.2456, 0.2360, 0.1748]) -Greedy action tensor([ 0.8555, -0.4698, -0.1482, -0.6580]) tensor([0.5398, 0.1434, 0.1979, 0.1188]) -Greedy action tensor([ 0.3966, 0.0722, -0.1022, -0.1505]) tensor([0.3438, 0.2485, 0.2088, 0.1989]) -Greedy action tensor([ 0.4790, 0.1157, 0.0817, -0.2501]) tensor([0.3509, 0.2440, 0.2359, 0.1692]) -Greedy action tensor([ 0.6383, -0.4048, -0.1613, -0.5708]) tensor([0.4761, 0.1678, 0.2140, 0.1421]) -Greedy action tensor([ 0.3692, 0.0076, -0.1126, -0.5739]) tensor([0.3699, 0.2576, 0.2285, 0.1440]) -Greedy action tensor([ 0.6992, -0.4437, 0.1022, -0.5232]) tensor([0.4621, 0.1474, 0.2544, 0.1361]) -Greedy action tensor([ 0.4664, -0.1489, -0.0838, -0.2827]) tensor([0.3861, 0.2087, 0.2227, 0.1825]) -Greedy action tensor([ 0.1462, 0.0536, 0.1157, -0.0391]) tensor([0.2694, 0.2455, 0.2613, 0.2238]) -Greedy action tensor([ 0.7651, -0.3724, 0.0012, -0.5363]) tensor([0.4858, 0.1557, 0.2263, 0.1322]) -Greedy action tensor([ 8.4077e-01, -3.6564e-01, -2.7658e-04, -5.5106e-01]) tensor([0.5053, 0.1512, 0.2179, 0.1256]) -Greedy action tensor([ 0.6269, -0.0967, -0.1146, -0.3164]) tensor([0.4254, 0.2063, 0.2026, 0.1656]) -Greedy action tensor([ 0.5615, -0.3868, -0.0864, -0.6290]) tensor([0.4515, 0.1749, 0.2362, 0.1373]) -Greedy action tensor([ 3.6893e-01, -1.8625e-02, -4.0755e-05, -4.0494e-01]) tensor([0.3532, 0.2397, 0.2442, 0.1629]) -Greedy action tensor([ 0.3699, -0.2612, 0.0231, -0.3662]) tensor([0.3679, 0.1957, 0.2601, 0.1762]) -Greedy action tensor([ 0.4418, -0.1140, -0.0155, -0.2571]) tensor([0.3699, 0.2122, 0.2341, 0.1839]) -Greedy action tensor([ 0.0211, 0.0216, 0.0636, -0.1652]) tensor([0.2581, 0.2583, 0.2694, 0.2143]) -Greedy action tensor([ 0.5403, -0.1823, 0.0064, -0.3372]) tensor([0.4020, 0.1952, 0.2357, 0.1672]) -Greedy action tensor([ 0.3511, 0.1743, 0.1461, -0.1530]) tensor([0.3071, 0.2573, 0.2501, 0.1855]) -Greedy action tensor([ 0.2383, 0.1386, 0.1211, -0.2080]) tensor([0.2912, 0.2635, 0.2590, 0.1863]) -Greedy action tensor([ 0.5411, -0.3677, -0.0783, -0.3586]) tensor([0.4259, 0.1716, 0.2293, 0.1732]) -Greedy action tensor([ 0.5017, 0.1381, -0.0757, -0.0454]) tensor([0.3527, 0.2452, 0.1980, 0.2041]) -Greedy action tensor([ 0.4354, -0.0579, 0.2116, -0.2658]) tensor([0.3441, 0.2101, 0.2751, 0.1707]) -Greedy action tensor([ 0.6909, -0.1977, -0.0720, -0.4737]) tensor([0.4567, 0.1878, 0.2130, 0.1425]) -Greedy action tensor([ 0.4749, 0.0756, 0.0064, -0.3368]) tensor([0.3648, 0.2447, 0.2284, 0.1620]) -Greedy action tensor([ 0.8264, -0.4028, -0.1330, -0.3279]) tensor([0.5023, 0.1469, 0.1924, 0.1584]) -Greedy action tensor([ 0.4988, -0.3778, -0.0538, -0.5989]) tensor([0.4301, 0.1790, 0.2475, 0.1435]) -Greedy action tensor([ 0.4499, -0.3134, 0.1409, -0.2419]) tensor([0.3702, 0.1726, 0.2718, 0.1854]) -Greedy action tensor([ 0.3565, 0.1584, 0.1428, -0.2182]) tensor([0.3134, 0.2571, 0.2531, 0.1764]) -Greedy action tensor([ 0.6669, -0.4234, -0.0853, -0.6361]) tensor([0.4810, 0.1617, 0.2267, 0.1307]) -Greedy action tensor([ 0.2876, 0.1957, -0.0515, -0.3263]) tensor([0.3159, 0.2881, 0.2250, 0.1710]) -Greedy action tensor([ 0.6148, -0.1955, 0.0675, -0.2570]) tensor([0.4096, 0.1822, 0.2370, 0.1713]) -Greedy action tensor([ 0.3023, 0.2024, 0.0639, -0.1367]) tensor([0.2996, 0.2711, 0.2361, 0.1932]) -Greedy action tensor([ 0.5086, -0.2849, -0.0328, -0.3442]) tensor([0.4064, 0.1838, 0.2365, 0.1732]) -Greedy action tensor([ 0.3988, -0.0230, -0.0679, -0.1504]) tensor([0.3496, 0.2293, 0.2192, 0.2019]) -Greedy action tensor([ 0.4704, -0.0683, -0.0879, -0.4266]) tensor([0.3901, 0.2276, 0.2232, 0.1591]) -Greedy action tensor([ 0.3262, 0.1001, 0.0796, -0.2554]) tensor([0.3187, 0.2542, 0.2490, 0.1781]) -Greedy action tensor([ 0.3219, 0.2282, 0.1232, -0.2320]) tensor([0.3026, 0.2755, 0.2480, 0.1739]) -Greedy action tensor([ 0.4435, -0.0578, -0.1382, -0.1251]) tensor([0.3662, 0.2218, 0.2047, 0.2074]) -Greedy action tensor([ 0.9499, -0.4435, -0.0531, -0.3851]) tensor([0.5324, 0.1322, 0.1953, 0.1401]) -Greedy action tensor([ 0.2545, 0.1488, 0.1688, -0.2013]) tensor([0.2897, 0.2607, 0.2659, 0.1837]) -Greedy action tensor([ 0.3597, 0.2810, 0.1618, -0.2255]) tensor([0.3029, 0.2800, 0.2485, 0.1687]) -Greedy action tensor([ 0.3858, -0.2701, 0.1108, -0.2936]) tensor([0.3590, 0.1863, 0.2727, 0.1820]) -Greedy action tensor([ 0.3220, -0.0441, 0.1011, -0.1978]) tensor([0.3236, 0.2244, 0.2595, 0.1924]) -Greedy action tensor([ 0.5961, -0.1223, -0.0601, -0.1953]) tensor([0.4066, 0.1982, 0.2109, 0.1843]) -Greedy action tensor([ 0.3257, 0.3323, 0.1412, -0.1988]) tensor([0.2915, 0.2935, 0.2424, 0.1726]) -Greedy action tensor([ 0.6385, -0.5152, 0.2325, -0.5947]) tensor([0.4399, 0.1388, 0.2931, 0.1282]) -Greedy action tensor([ 0.5859, -0.2027, -0.0041, -0.5378]) tensor([0.4285, 0.1947, 0.2375, 0.1393]) -Greedy action tensor([ 0.8129, -0.2808, -0.0127, -0.5334]) tensor([0.4918, 0.1648, 0.2154, 0.1280]) -Greedy action tensor([ 0.8936, -0.4570, 0.0897, -0.6597]) tensor([0.5213, 0.1351, 0.2333, 0.1103]) -Greedy action tensor([ 0.4772, -0.0056, -0.0723, -0.1577]) tensor([0.3671, 0.2265, 0.2119, 0.1945]) -Greedy action tensor([ 0.8193, -0.3572, 0.0050, -0.4567]) tensor([0.4925, 0.1519, 0.2182, 0.1375]) -Greedy action tensor([ 0.8699, -0.6374, -0.0464, -0.5923]) tensor([0.5396, 0.1195, 0.2158, 0.1250]) -Greedy action tensor([ 0.3886, 0.1051, 0.0806, -0.2940]) tensor([0.3341, 0.2516, 0.2455, 0.1688]) -Greedy action tensor([ 0.4762, -0.1497, -0.0664, -0.3783]) tensor([0.3935, 0.2104, 0.2287, 0.1674]) -Greedy action tensor([ 0.4662, 0.0015, 0.0427, -0.2572]) tensor([0.3613, 0.2270, 0.2365, 0.1752]) -Greedy action tensor([ 0.3769, 0.2306, 0.1486, -0.2174]) tensor([0.3114, 0.2690, 0.2478, 0.1718]) -Greedy action tensor([ 0.2281, 0.0602, -0.0352, -0.2117]) tensor([0.3069, 0.2595, 0.2359, 0.1977]) -Greedy action tensor([ 0.5508, -0.3799, 0.1216, -0.5384]) tensor([0.4198, 0.1655, 0.2733, 0.1413]) -Greedy action tensor([ 0.4781, -0.1580, -0.0360, -0.1408]) tensor([0.3751, 0.1986, 0.2243, 0.2020]) -Greedy action tensor([ 0.7277, -0.1612, 0.0944, -0.4080]) tensor([0.4419, 0.1816, 0.2346, 0.1419]) -Greedy action tensor([ 0.7189, -0.4515, 0.1450, -0.6988]) tensor([0.4726, 0.1466, 0.2662, 0.1145]) -Greedy action tensor([ 0.2012, -0.0549, 0.0739, -0.2535]) tensor([0.3040, 0.2353, 0.2677, 0.1929]) -Greedy action tensor([ 0.5827, -0.3717, -0.0088, -0.5328]) tensor([0.4413, 0.1699, 0.2442, 0.1446]) -Greedy action tensor([ 0.1641, 0.1312, 0.1191, -0.2451]) tensor([0.2787, 0.2697, 0.2665, 0.1851]) -Greedy action tensor([ 0.3533, 0.1818, 0.1483, -0.0543]) tensor([0.3010, 0.2536, 0.2452, 0.2002]) -Greedy action tensor([ 0.1220, 0.0141, 0.1603, -0.1556]) tensor([0.2707, 0.2430, 0.2813, 0.2051]) -Greedy action tensor([ 0.5882, -0.5099, 0.0585, -0.6706]) tensor([0.4533, 0.1512, 0.2669, 0.1287]) -Greedy action tensor([ 0.3123, -0.0851, 0.1457, -0.3414]) tensor([0.3291, 0.2212, 0.2786, 0.1712]) -Greedy action tensor([ 0.6286, -0.1894, 0.1872, -0.4880]) tensor([0.4146, 0.1830, 0.2667, 0.1357]) -Greedy action tensor([ 0.5421, 0.0466, -0.0446, -0.3188]) tensor([0.3864, 0.2354, 0.2149, 0.1633]) -Greedy action tensor([-1.8784, -0.4880, 0.7983, 0.1492]) tensor([0.0368, 0.1479, 0.5355, 0.2798]) -Greedy action tensor([-0.7682, 0.1846, 0.1314, -0.3309]) tensor([0.1316, 0.3412, 0.3235, 0.2037]) -Greedy action tensor([-1.6514, -0.5295, 0.5228, 0.0235]) tensor([0.0549, 0.1687, 0.4831, 0.2933]) -Greedy action tensor([-1.9549, -0.8987, 0.6227, -0.1659]) tensor([0.0434, 0.1249, 0.5718, 0.2599]) -Greedy action tensor([-1.9607, -0.8695, 0.3301, -0.2236]) tensor([0.0512, 0.1524, 0.5057, 0.2907]) -Greedy action tensor([-0.6616, -0.6690, 0.4778, 0.0742]) tensor([0.1388, 0.1378, 0.4337, 0.2897]) -Greedy action tensor([-2.0341, -0.8815, 0.5960, -0.1137]) tensor([0.0402, 0.1273, 0.5580, 0.2744]) -Greedy action tensor([-2.0162, -0.8756, 0.2989, -0.2062]) tensor([0.0491, 0.1536, 0.4972, 0.3000]) -Greedy action tensor([-1.7537, -0.7411, 0.1657, -0.3354]) tensor([0.0680, 0.1873, 0.4638, 0.2810]) -Greedy action tensor([-1.8263, -0.8174, 0.1246, -0.3731]) tensor([0.0664, 0.1822, 0.4673, 0.2841]) -Greedy action tensor([-1.0211, -0.5687, 0.3036, 0.1560]) tensor([0.1044, 0.1641, 0.3927, 0.3388]) -Greedy action tensor([-0.3273, -0.2413, 0.2049, 0.2627]) tensor([0.1787, 0.1947, 0.3042, 0.3223]) -Greedy action tensor([-1.2745, -0.3067, 0.4244, 0.6576]) tensor([0.0625, 0.1645, 0.3417, 0.4314]) -Greedy action tensor([-1.2065, -0.5234, 0.4492, 0.5555]) tensor([0.0712, 0.1410, 0.3730, 0.4148]) -Greedy action tensor([-0.7351, -0.5795, 0.2878, 0.4412]) tensor([0.1221, 0.1426, 0.3395, 0.3958]) -Greedy action tensor([-1.4067, -0.6978, 0.7006, -0.3244]) tensor([0.0704, 0.1430, 0.5789, 0.2077]) -Greedy action tensor([-0.8525, -0.4229, 0.1183, 0.0985]) tensor([0.1288, 0.1979, 0.3400, 0.3333]) -Greedy action tensor([-1.9815, -0.6424, 0.5307, -0.0105]) tensor([0.0411, 0.1569, 0.5070, 0.2951]) -Greedy action tensor([-1.0785, -0.2109, 0.4940, -0.3100]) tensor([0.0966, 0.2299, 0.4653, 0.2082]) -Greedy action tensor([-1.9992, -0.9258, 0.3758, -0.1420]) tensor([0.0474, 0.1388, 0.5100, 0.3038]) -Greedy action tensor([-1.6952, -0.4109, 0.5347, 0.0019]) tensor([0.0516, 0.1865, 0.4801, 0.2818]) -Greedy action tensor([-1.8415, -0.4967, 0.7240, 0.0950]) tensor([0.0404, 0.1549, 0.5249, 0.2798]) -Greedy action tensor([-0.7631, -0.3796, -0.6069, -0.2958]) tensor([0.1911, 0.2805, 0.2234, 0.3050]) -Greedy action tensor([-0.9959, -0.0502, 0.4879, -0.4627]) tensor([0.1032, 0.2657, 0.4551, 0.1759]) -Greedy action tensor([-0.7522, -0.5733, 0.1578, 0.3273]) tensor([0.1312, 0.1569, 0.3259, 0.3861]) -Greedy action tensor([-1.9130, -0.7272, 0.2620, -0.2267]) tensor([0.0541, 0.1772, 0.4764, 0.2923]) -Greedy action tensor([-2.0120, -0.6516, 0.8530, 0.1057]) tensor([0.0325, 0.1267, 0.5705, 0.2702]) -Greedy action tensor([-1.1812, -0.2473, -0.1811, -0.1671]) tensor([0.1109, 0.2821, 0.3014, 0.3057]) -Greedy action tensor([-0.3783, -0.1638, 0.1843, 0.2663]) tensor([0.1695, 0.2101, 0.2975, 0.3229]) -Greedy action tensor([-0.7569, -0.6204, 0.2784, 0.1519]) tensor([0.1343, 0.1540, 0.3783, 0.3334]) -Greedy action tensor([-1.8314, -0.4830, 0.6421, -0.0579]) tensor([0.0442, 0.1704, 0.5248, 0.2606]) -Greedy action tensor([-0.8958, -0.5553, 0.4959, -0.3262]) tensor([0.1220, 0.1715, 0.4908, 0.2157]) -Greedy action tensor([-1.7741, -0.6957, 0.0926, -0.3626]) tensor([0.0689, 0.2026, 0.4457, 0.2827]) -Greedy action tensor([-1.9845, -0.8141, 0.2795, -0.2109]) tensor([0.0507, 0.1633, 0.4875, 0.2985]) -Greedy action tensor([-1.5034, -0.3173, 0.8181, 0.8364]) tensor([0.0403, 0.1318, 0.4102, 0.4178]) -Greedy action tensor([-1.3569, -0.5323, 0.4306, 0.4764]) tensor([0.0645, 0.1471, 0.3852, 0.4033]) -Greedy action tensor([-1.5112, -0.0156, 0.1024, -0.2535]) tensor([0.0714, 0.3187, 0.3586, 0.2512]) -Greedy action tensor([-1.8534, -0.4999, 0.9640, 0.3978]) tensor([0.0322, 0.1245, 0.5380, 0.3054]) -Greedy action tensor([-1.7666, -0.4835, 0.5677, -0.0694]) tensor([0.0490, 0.1769, 0.5063, 0.2677]) -Greedy action tensor([-0.5289, -0.4205, 0.1912, 0.4588]) tensor([0.1459, 0.1626, 0.2998, 0.3917]) -Greedy action tensor([-1.5701, -0.6104, 0.7550, 0.0143]) tensor([0.0534, 0.1395, 0.5465, 0.2606]) -Greedy action tensor([-2.0040, -0.8969, 0.2761, -0.2245]) tensor([0.0507, 0.1533, 0.4956, 0.3004]) -Greedy action tensor([-1.9731, -0.9270, 0.2217, -0.2510]) tensor([0.0543, 0.1545, 0.4874, 0.3038]) -Greedy action tensor([-1.9328, -0.6842, 0.4266, -0.2021]) tensor([0.0483, 0.1683, 0.5110, 0.2725]) -Greedy action tensor([-1.3688, -0.6553, 0.0846, -0.1520]) tensor([0.0935, 0.1909, 0.4000, 0.3157]) -Greedy action tensor([-2.0279, -0.8735, 0.4334, -0.0751]) tensor([0.0436, 0.1383, 0.5109, 0.3072]) -Greedy action tensor([-1.6642, -0.4041, 0.6842, 0.4356]) tensor([0.0432, 0.1522, 0.4520, 0.3525]) -Greedy action tensor([-1.2699, -0.5447, 0.3384, 0.1808]) tensor([0.0811, 0.1675, 0.4052, 0.3461]) -Greedy action tensor([-1.7375, -0.7903, 0.1336, -0.3877]) tensor([0.0718, 0.1851, 0.4663, 0.2769]) -Greedy action tensor([-1.6401, -0.4750, 0.5127, 0.0390]) tensor([0.0550, 0.1764, 0.4736, 0.2949]) -Greedy action tensor([-1.1235, -0.4998, 0.6345, -0.5092]) tensor([0.0951, 0.1774, 0.5517, 0.1758]) -Greedy action tensor([-1.1320, -0.6748, 0.2639, 0.1561]) tensor([0.0976, 0.1542, 0.3942, 0.3539]) -Greedy action tensor([-1.5132, -0.5266, 0.6412, -0.3929]) tensor([0.0651, 0.1745, 0.5610, 0.1995]) -Greedy action tensor([-1.7634, -0.3921, 0.8654, 0.6510]) tensor([0.0334, 0.1314, 0.4622, 0.3730]) -Greedy action tensor([-0.7609, -0.1702, 0.3792, -0.0330]) tensor([0.1250, 0.2256, 0.3907, 0.2587]) -Greedy action tensor([-1.9064, -0.5002, 1.1585, 0.6305]) tensor([0.0255, 0.1042, 0.5474, 0.3228]) -Greedy action tensor([-1.8900, -0.4622, 0.6417, -0.1465]) tensor([0.0426, 0.1777, 0.5360, 0.2437]) -Greedy action tensor([-1.5084, -0.4126, 0.4769, 0.1774]) tensor([0.0600, 0.1795, 0.4368, 0.3237]) -Greedy action tensor([-1.9688, -0.4958, 0.9954, 0.2429]) tensor([0.0295, 0.1288, 0.5721, 0.2696]) -Greedy action tensor([-1.9921, -0.9463, 0.4382, -0.2432]) tensor([0.0477, 0.1358, 0.5422, 0.2743]) -Greedy action tensor([-1.9766, -0.8373, 0.4584, -0.1389]) tensor([0.0458, 0.1432, 0.5231, 0.2879]) -Greedy action tensor([-0.7506, 0.1987, 0.2748, -0.2415]) tensor([0.1244, 0.3215, 0.3470, 0.2070]) -Greedy action tensor([-1.2804, 0.0164, -0.1857, -0.3064]) tensor([0.0971, 0.3553, 0.2903, 0.2573]) -Greedy action tensor([-1.8711, -0.4374, 0.6202, -0.1332]) tensor([0.0436, 0.1827, 0.5261, 0.2477]) -Greedy action tensor([-1.1744, -0.7543, 0.7151, -0.1515]) tensor([0.0839, 0.1277, 0.5551, 0.2333]) -Greedy action tensor([-0.4869, -0.5185, 0.2371, -0.0488]) tensor([0.1792, 0.1736, 0.3696, 0.2777]) -Greedy action tensor([-0.4011, -0.4897, 0.2223, 0.2697]) tensor([0.1743, 0.1595, 0.3252, 0.3410]) -Greedy action tensor([-1.2091, -0.4861, -0.2189, -0.3129]) tensor([0.1219, 0.2512, 0.3281, 0.2987]) -Greedy action tensor([-1.8727, -0.9961, 0.1651, -0.4435]) tensor([0.0656, 0.1575, 0.5031, 0.2738]) -Greedy action tensor([-1.4373, -0.2980, 0.5509, 0.5065]) tensor([0.0543, 0.1697, 0.3966, 0.3794]) -Greedy action tensor([-1.5592, -0.3005, 0.6224, 0.4651]) tensor([0.0477, 0.1680, 0.4229, 0.3613]) -Greedy action tensor([-1.0611, -0.6234, 0.2787, 0.1233]) tensor([0.1038, 0.1608, 0.3962, 0.3392]) -Greedy action tensor([-1.1054, 0.4326, 0.4933, -0.5433]) tensor([0.0809, 0.3768, 0.4003, 0.1420]) -Greedy action tensor([-2.0192, -0.9326, 0.5699, -0.0775]) tensor([0.0412, 0.1222, 0.5491, 0.2874]) -Greedy action tensor([-1.1523, -0.5653, 0.4194, -0.1144]) tensor([0.0958, 0.1723, 0.4613, 0.2705]) -Greedy action tensor([-1.6157, -0.5020, 0.4751, 0.0741]) tensor([0.0570, 0.1735, 0.4609, 0.3086]) -Greedy action tensor([-1.3856, -0.3195, 0.6863, 0.8642]) tensor([0.0469, 0.1361, 0.3723, 0.4447]) -Greedy action tensor([-2.0395, -0.8882, 0.4660, -0.0805]) tensor([0.0425, 0.1345, 0.5212, 0.3017]) -Greedy action tensor([-0.9395, 0.4982, -0.2559, 0.4378]) tensor([0.0896, 0.3775, 0.1776, 0.3553]) -Greedy action tensor([-1.8311, -0.4805, 0.6524, -0.0373]) tensor([0.0438, 0.1689, 0.5243, 0.2631]) -Greedy action tensor([-0.9349, -0.5881, 0.2885, 0.0606]) tensor([0.1174, 0.1660, 0.3990, 0.3176]) -Greedy action tensor([ 1.2182, 0.9720, -1.0000, 0.5992]) tensor([0.4117, 0.3218, 0.0448, 0.2217]) -Greedy action tensor([1.0425, 0.4271, 0.0533, 0.5281]) tensor([0.3984, 0.2153, 0.1481, 0.2382]) -Greedy action tensor([0.7201, 0.0418, 0.3554, 0.1346]) tensor([0.3625, 0.1840, 0.2517, 0.2018]) -Greedy action tensor([ 1.4986, -0.0938, 0.8994, 1.1796]) tensor([0.4033, 0.0820, 0.2215, 0.2932]) -Greedy action tensor([ 2.0973, -0.1463, 0.5832, 1.2336]) tensor([0.5722, 0.0607, 0.1259, 0.2412]) -Greedy action tensor([1.3006, 0.3464, 0.5847, 0.5758]) tensor([0.4240, 0.1633, 0.2072, 0.2054]) -Greedy action tensor([0.7975, 0.7393, 0.1425, 0.9044]) tensor([0.2797, 0.2638, 0.1453, 0.3112]) -Greedy action tensor([ 1.6557, -0.0460, 0.9588, 0.9925]) tensor([0.4554, 0.0831, 0.2269, 0.2346]) -Greedy action tensor([ 0.5608, -0.0166, 0.3261, 0.7993]) tensor([0.2761, 0.1550, 0.2184, 0.3505]) -Greedy action tensor([ 1.5303, -1.2441, 0.1476, 1.2166]) tensor([0.4892, 0.0305, 0.1227, 0.3575]) -Greedy action tensor([ 1.4230, -0.6708, 1.2743, 0.5932]) tensor([0.4130, 0.0509, 0.3559, 0.1801]) -Greedy action tensor([ 1.3764, -0.3574, -0.2214, 1.2856]) tensor([0.4363, 0.0770, 0.0883, 0.3984]) -Greedy action tensor([ 0.3987, -0.2265, 0.0852, 1.6471]) tensor([0.1739, 0.0931, 0.1271, 0.6060]) -Greedy action tensor([ 1.9810, -0.4736, 2.0331, 0.5372]) tensor([0.4210, 0.0362, 0.4435, 0.0994]) -Greedy action tensor([ 1.0019, -0.1733, 0.5558, 0.2133]) tensor([0.4161, 0.1285, 0.2663, 0.1891]) -Greedy action tensor([ 0.0473, -0.0437, -0.6552, 1.1527]) tensor([0.1842, 0.1682, 0.0912, 0.5564]) -Greedy action tensor([ 0.6037, 0.2093, -0.6654, 0.5938]) tensor([0.3395, 0.2289, 0.0954, 0.3362]) -Greedy action tensor([ 1.2003, 0.4928, -0.3234, 0.3502]) tensor([0.4677, 0.2305, 0.1019, 0.1999]) -Greedy action tensor([ 0.5082, -1.1505, 0.7079, 1.4825]) tensor([0.1976, 0.0376, 0.2413, 0.5235]) -Greedy action tensor([ 0.8523, -0.2022, 0.7626, 0.5418]) tensor([0.3338, 0.1163, 0.3052, 0.2447]) -Greedy action tensor([0.6088, 0.2645, 1.3049, 0.0645]) tensor([0.2328, 0.1650, 0.4670, 0.1351]) -Greedy action tensor([ 0.7493, -1.6797, 0.4898, 1.3305]) tensor([0.2741, 0.0242, 0.2115, 0.4902]) -Greedy action tensor([0.5808, 0.1615, 0.4604, 0.4052]) tensor([0.2956, 0.1943, 0.2621, 0.2480]) -Greedy action tensor([ 1.5393, 0.2220, -0.2734, 0.5278]) tensor([0.5572, 0.1492, 0.0909, 0.2026]) -Greedy action tensor([ 0.3338, -0.1428, 0.9124, 1.1252]) tensor([0.1782, 0.1107, 0.3179, 0.3933]) -Greedy action tensor([ 1.2539, -0.2413, 1.2740, 0.8891]) tensor([0.3403, 0.0763, 0.3472, 0.2363]) -Greedy action tensor([ 0.5219, -0.4929, 0.4830, 0.2848]) tensor([0.3212, 0.1164, 0.3090, 0.2534]) -Greedy action tensor([ 1.7876, -1.0095, 0.3484, 1.1233]) tensor([0.5517, 0.0336, 0.1308, 0.2839]) -Greedy action tensor([ 1.6942, -0.4605, -0.3107, 0.8168]) tensor([0.6001, 0.0696, 0.0808, 0.2496]) -Greedy action tensor([ 0.8236, 0.0467, -1.0447, 1.9907]) tensor([0.2072, 0.0953, 0.0320, 0.6656]) -Greedy action tensor([ 1.0710, 0.5926, -0.2228, 1.0697]) tensor([0.3457, 0.2142, 0.0948, 0.3452]) -Greedy action tensor([-0.0593, 0.0412, 1.1261, -0.1790]) tensor([0.1596, 0.1765, 0.5223, 0.1416]) -Greedy action tensor([ 0.8908, -0.3222, 0.4917, -0.3120]) tensor([0.4408, 0.1310, 0.2958, 0.1324]) -Greedy action tensor([ 1.3798, -0.0687, 1.0261, 0.8512]) tensor([0.3958, 0.0930, 0.2779, 0.2333]) -Greedy action tensor([ 1.1919, -0.3778, -0.0429, 0.4431]) tensor([0.5071, 0.1055, 0.1475, 0.2398]) -Greedy action tensor([ 1.1088, -0.0648, 0.5418, 1.4240]) tensor([0.3080, 0.0952, 0.1747, 0.4221]) -Greedy action tensor([ 0.7976, -1.3855, -0.8632, 1.2625]) tensor([0.3455, 0.0389, 0.0656, 0.5499]) -Greedy action tensor([ 1.1805, 0.5127, -0.4186, 0.5448]) tensor([0.4455, 0.2285, 0.0900, 0.2359]) -Greedy action tensor([ 1.1486, -0.6423, -0.7633, 0.9968]) tensor([0.4600, 0.0767, 0.0680, 0.3953]) -Greedy action tensor([ 0.6409, 0.1610, -0.8671, 0.8934]) tensor([0.3197, 0.1979, 0.0708, 0.4116]) -Greedy action tensor([ 1.5758, 0.6764, -0.8111, 0.4095]) tensor([0.5524, 0.2247, 0.0508, 0.1721]) -Greedy action tensor([ 0.5185, -1.0744, 1.5265, 0.0256]) tensor([0.2196, 0.0446, 0.6017, 0.1341]) -Greedy action tensor([ 1.6678, 0.1219, -0.2720, 1.5577]) tensor([0.4439, 0.0946, 0.0638, 0.3976]) -Greedy action tensor([ 1.0056, -0.6754, 0.9040, 0.0985]) tensor([0.4011, 0.0747, 0.3623, 0.1619]) -Greedy action tensor([ 1.2918, 0.4925, -0.5550, 1.4579]) tensor([0.3587, 0.1613, 0.0566, 0.4235]) -Greedy action tensor([-0.4303, 0.6630, 1.0543, -0.7797]) tensor([0.1099, 0.3278, 0.4848, 0.0775]) -Greedy action tensor([ 1.0675, -0.3520, 0.8413, 1.1614]) tensor([0.3187, 0.0771, 0.2542, 0.3501]) -Greedy action tensor([1.6316, 0.0159, 1.7632, 0.4148]) tensor([0.3794, 0.0754, 0.4328, 0.1124]) -Greedy action tensor([ 1.3124, -0.4921, 1.2577, 0.7240]) tensor([0.3750, 0.0617, 0.3551, 0.2082]) -Greedy action tensor([ 1.3598, 0.2905, -0.2936, 0.7115]) tensor([0.4860, 0.1668, 0.0930, 0.2542]) -Greedy action tensor([1.0958, 0.7215, 0.7081, 0.3273]) tensor([0.3533, 0.2430, 0.2398, 0.1639]) -Greedy action tensor([ 0.7018, -1.1760, 1.5828, -0.0389]) tensor([0.2473, 0.0378, 0.5969, 0.1179]) -Greedy action tensor([ 0.5029, -0.5070, 0.0850, 0.1564]) tensor([0.3663, 0.1334, 0.2412, 0.2590]) -Greedy action tensor([ 2.2755, -0.7538, 0.8935, 1.7437]) tensor([0.5299, 0.0256, 0.1331, 0.3114]) -Greedy action tensor([ 1.6112, -1.1055, 0.8024, 0.7791]) tensor([0.5137, 0.0340, 0.2288, 0.2235]) -Greedy action tensor([ 1.3893, -0.1054, 1.2836, 1.1894]) tensor([0.3398, 0.0762, 0.3057, 0.2782]) -Greedy action tensor([ 1.2577, 0.6696, -0.7135, 0.6610]) tensor([0.4454, 0.2474, 0.0620, 0.2452]) -Greedy action tensor([ 0.9012, -0.4971, 0.3006, 0.5872]) tensor([0.3959, 0.0978, 0.2171, 0.2892]) -Greedy action tensor([ 0.2868, 0.6057, -0.9801, 0.0346]) tensor([0.2912, 0.4005, 0.0820, 0.2263]) -Greedy action tensor([ 1.0856, -0.4645, 0.3831, 1.3587]) tensor([0.3309, 0.0702, 0.1639, 0.4349]) -Greedy action tensor([ 0.8271, 1.0179, -0.1451, -0.0134]) tensor([0.3311, 0.4007, 0.1253, 0.1429]) -Greedy action tensor([ 1.3362, -0.2926, 0.1699, 1.7128]) tensor([0.3373, 0.0662, 0.1051, 0.4915]) -Greedy action tensor([0.9614, 0.2347, 0.5322, 0.2052]) tensor([0.3840, 0.1857, 0.2500, 0.1803]) -Greedy action tensor([-0.3476, -1.0011, -0.3441, 1.7312]) tensor([0.0951, 0.0495, 0.0954, 0.7601]) -Greedy action tensor([ 1.5810, -0.0712, 0.0591, 0.6793]) tensor([0.5507, 0.1055, 0.1202, 0.2235]) -Greedy action tensor([ 0.6152, -0.3527, -0.3263, 1.3777]) tensor([0.2555, 0.0971, 0.0997, 0.5477]) -Greedy action tensor([0.5566, 0.0495, 0.2205, 0.1541]) tensor([0.3350, 0.2017, 0.2394, 0.2240]) -Greedy action tensor([1.1680, 0.7721, 0.0665, 0.6109]) tensor([0.3878, 0.2611, 0.1289, 0.2222]) -Greedy action tensor([ 1.5547, -1.2255, -0.1455, 0.5330]) tensor([0.6232, 0.0387, 0.1138, 0.2243]) -Greedy action tensor([ 1.1133, -0.1806, 0.1018, 0.4206]) tensor([0.4677, 0.1282, 0.1701, 0.2340]) -Greedy action tensor([ 0.1851, -0.0924, 0.2366, 0.9999]) tensor([0.1973, 0.1495, 0.2077, 0.4456]) -Greedy action tensor([ 0.9320, -0.9557, 1.3367, -0.3293]) tensor([0.3409, 0.0516, 0.5109, 0.0966]) -Greedy action tensor([ 1.3719, -0.0584, 1.1043, 1.3871]) tensor([0.3312, 0.0792, 0.2534, 0.3362]) -Greedy action tensor([0.5074, 0.0493, 0.5209, 0.5599]) tensor([0.2703, 0.1709, 0.2739, 0.2848]) -Greedy action tensor([ 0.9175, 0.5221, -0.5580, 0.5976]) tensor([0.3805, 0.2562, 0.0870, 0.2763]) -Greedy action tensor([ 1.0375, 0.1277, -0.9262, 0.8526]) tensor([0.4212, 0.1696, 0.0591, 0.3501]) -Greedy action tensor([ 0.3779, 0.1773, 1.4796, -0.0099]) tensor([0.1816, 0.1486, 0.5465, 0.1232]) -Greedy action tensor([ 0.9126, 0.9861, -0.2918, 0.3308]) tensor([0.3407, 0.3667, 0.1022, 0.1904]) -Greedy action tensor([ 0.3141, -0.7023, -0.3531, 2.1000]) tensor([0.1276, 0.0462, 0.0654, 0.7608]) -Greedy action tensor([ 1.0946, -1.1005, 0.5970, 0.6393]) tensor([0.4249, 0.0473, 0.2583, 0.2695]) -Greedy action tensor([ 1.5449, -0.3885, 0.6328, 1.8579]) tensor([0.3432, 0.0496, 0.1379, 0.4693]) -Greedy action tensor([ 1.0131, 0.5466, -0.5985, 0.6191]) tensor([0.3998, 0.2508, 0.0798, 0.2696]) -Greedy action tensor([ 0.6749, -0.5326, -0.3533, -0.1914]) tensor([0.4814, 0.1439, 0.1722, 0.2025]) -Greedy action tensor([ 1.0859, -0.4349, -0.3315, -0.1126]) tensor([0.5674, 0.1240, 0.1375, 0.1711]) -Greedy action tensor([ 0.8137, -0.3818, -0.6720, 0.3269]) tensor([0.4665, 0.1412, 0.1056, 0.2867]) -Greedy action tensor([ 1.1969, -0.4337, -0.2982, 0.1234]) tensor([0.5676, 0.1111, 0.1273, 0.1940]) -Greedy action tensor([ 0.7705, -0.6447, -0.3260, 0.2132]) tensor([0.4652, 0.1130, 0.1554, 0.2664]) -Greedy action tensor([ 1.0193, -0.5729, -0.0283, 0.0546]) tensor([0.5167, 0.1051, 0.1812, 0.1969]) -Greedy action tensor([ 1.5788, -0.3305, -0.2564, -0.0165]) tensor([0.6620, 0.0981, 0.1056, 0.1343]) -Greedy action tensor([ 0.4929, -0.3766, -0.3433, 0.2408]) tensor([0.3803, 0.1594, 0.1648, 0.2955]) -Greedy action tensor([ 1.3198, -0.7036, -0.4262, -0.1054]) tensor([0.6463, 0.0855, 0.1128, 0.1554]) -Greedy action tensor([ 0.6608, -0.3704, -0.1383, -0.0754]) tensor([0.4376, 0.1560, 0.1968, 0.2096]) -Greedy action tensor([ 1.0431, -0.5359, -0.0396, -0.2076]) tensor([0.5461, 0.1126, 0.1850, 0.1564]) -Greedy action tensor([ 0.1926, -0.0260, -0.5279, 0.2104]) tensor([0.3023, 0.2429, 0.1471, 0.3077]) -Greedy action tensor([ 1.4544, -0.7414, -0.4313, 0.4838]) tensor([0.6091, 0.0678, 0.0924, 0.2308]) -Greedy action tensor([ 1.0465, -0.3653, -0.0896, 0.1006]) tensor([0.5120, 0.1248, 0.1644, 0.1988]) -Greedy action tensor([ 1.3563, -0.4534, -0.4896, 0.1290]) tensor([0.6193, 0.1014, 0.0978, 0.1815]) -Greedy action tensor([ 1.2971, -0.3555, -0.2201, -0.1270]) tensor([0.6055, 0.1160, 0.1328, 0.1458]) -Greedy action tensor([ 1.3963, -0.3870, -0.2844, -0.1717]) tensor([0.6399, 0.1075, 0.1192, 0.1334]) -Greedy action tensor([ 1.0291, -0.4249, -0.0838, -0.0210]) tensor([0.5230, 0.1222, 0.1719, 0.1830]) -Greedy action tensor([ 0.9911, -0.5988, -0.2256, 0.2662]) tensor([0.5039, 0.1028, 0.1493, 0.2441]) -Greedy action tensor([ 0.8922, -0.2931, -0.1344, 0.2785]) tensor([0.4535, 0.1386, 0.1624, 0.2455]) -Greedy action tensor([ 0.7165, -0.2427, -0.5485, -0.1974]) tensor([0.4839, 0.1854, 0.1366, 0.1940]) -Greedy action tensor([ 1.1220, -0.6134, -0.6743, 0.5281]) tensor([0.5279, 0.0931, 0.0876, 0.2915]) -Greedy action tensor([ 1.3262, -0.6632, -0.5079, 0.2868]) tensor([0.6060, 0.0829, 0.0968, 0.2143]) -Greedy action tensor([ 0.6068, -0.4338, -0.3944, 0.3365]) tensor([0.4026, 0.1422, 0.1479, 0.3072]) -Greedy action tensor([ 0.5168, -0.1087, 0.3179, -0.0512]) tensor([0.3423, 0.1831, 0.2806, 0.1940]) -Greedy action tensor([ 1.1131, -0.3684, -0.3830, 0.3007]) tensor([0.5277, 0.1199, 0.1182, 0.2342]) -Greedy action tensor([ 2.0149, -0.5094, -0.3948, 0.1190]) tensor([0.7575, 0.0607, 0.0681, 0.1138]) -Greedy action tensor([ 1.4978, -0.4064, -0.3316, -0.0255]) tensor([0.6547, 0.0975, 0.1051, 0.1427]) -Greedy action tensor([ 1.1088, -0.2779, 0.0086, -0.1972]) tensor([0.5395, 0.1348, 0.1795, 0.1461]) -Greedy action tensor([ 0.8413, -0.1538, 0.1208, -0.2180]) tensor([0.4539, 0.1678, 0.2208, 0.1574]) -Greedy action tensor([ 1.3639, -0.7954, -0.8416, 0.3955]) tensor([0.6229, 0.0719, 0.0686, 0.2365]) -Greedy action tensor([ 0.7640, -0.4084, -0.1742, 0.0837]) tensor([0.4530, 0.1403, 0.1773, 0.2294]) -Greedy action tensor([ 0.6237, -0.6319, 0.0178, -0.0725]) tensor([0.4294, 0.1223, 0.2343, 0.2140]) -Greedy action tensor([ 0.5721, -0.5775, -0.2411, 0.1034]) tensor([0.4191, 0.1328, 0.1858, 0.2623]) -Greedy action tensor([ 1.2138, -0.3965, -0.1561, -0.0580]) tensor([0.5766, 0.1152, 0.1465, 0.1616]) -Greedy action tensor([ 0.9284, -0.3812, -0.4474, 0.1824]) tensor([0.5008, 0.1352, 0.1265, 0.2375]) -Greedy action tensor([ 0.8908, -0.6895, -0.3334, 0.1915]) tensor([0.5008, 0.1031, 0.1472, 0.2489]) -Greedy action tensor([ 0.4601, -0.3319, 0.0287, -0.1154]) tensor([0.3752, 0.1700, 0.2438, 0.2110]) -Greedy action tensor([ 0.8061, -0.2835, -0.6419, 0.5112]) tensor([0.4318, 0.1452, 0.1015, 0.3215]) -Greedy action tensor([ 1.1439, -0.1475, 0.1068, -0.0694]) tensor([0.5191, 0.1427, 0.1840, 0.1543]) -Greedy action tensor([ 1.1411, -0.8166, -0.4461, 0.7469]) tensor([0.4951, 0.0699, 0.1012, 0.3338]) -Greedy action tensor([ 1.4119, -0.7716, -0.5788, 0.2563]) tensor([0.6394, 0.0720, 0.0873, 0.2013]) -Greedy action tensor([ 1.0921, -0.5125, 0.0405, 0.2182]) tensor([0.5082, 0.1021, 0.1776, 0.2121]) -Greedy action tensor([ 0.5202, -0.3273, -0.5634, -0.1361]) tensor([0.4375, 0.1875, 0.1481, 0.2270]) -Greedy action tensor([ 0.9830, -0.3765, -0.4976, 0.6821]) tensor([0.4496, 0.1154, 0.1023, 0.3327]) -Greedy action tensor([ 1.1534, -0.7800, -0.5996, 0.5105]) tensor([0.5424, 0.0785, 0.0940, 0.2852]) -Greedy action tensor([ 0.3135, -0.2843, -0.0478, 0.0545]) tensor([0.3313, 0.1822, 0.2308, 0.2557]) -Greedy action tensor([ 0.8098, -0.5838, -0.6467, 0.3388]) tensor([0.4749, 0.1179, 0.1107, 0.2965]) -Greedy action tensor([ 0.7474, -0.3766, -0.5522, 0.3886]) tensor([0.4355, 0.1415, 0.1187, 0.3042]) -Greedy action tensor([ 0.7100, -0.3065, -0.0840, -0.0656]) tensor([0.4397, 0.1591, 0.1988, 0.2024]) -Greedy action tensor([ 0.3747, -0.1239, -0.6658, 0.4857]) tensor([0.3249, 0.1973, 0.1148, 0.3630]) -Greedy action tensor([ 0.9417, -0.5459, -0.3377, 0.0860]) tensor([0.5184, 0.1171, 0.1442, 0.2203]) -Greedy action tensor([ 0.8222, -0.3571, -0.1832, 0.0689]) tensor([0.4664, 0.1434, 0.1706, 0.2196]) -Greedy action tensor([ 0.7309, -0.3428, -0.1635, 0.0142]) tensor([0.4466, 0.1526, 0.1826, 0.2181]) -Greedy action tensor([ 0.8937, -0.2533, -0.0979, -0.1124]) tensor([0.4868, 0.1546, 0.1806, 0.1780]) -Greedy action tensor([ 1.3028, -0.6433, -0.2273, -0.0162]) tensor([0.6147, 0.0878, 0.1331, 0.1644]) -Greedy action tensor([ 0.8987, -0.4328, -0.2376, -0.0995]) tensor([0.5119, 0.1352, 0.1643, 0.1886]) -Greedy action tensor([ 1.0455, -0.5775, -0.5720, 0.8066]) tensor([0.4580, 0.0904, 0.0909, 0.3607]) -Greedy action tensor([ 0.5605, -0.0901, -0.2258, -0.3241]) tensor([0.4184, 0.2183, 0.1906, 0.1727]) -Greedy action tensor([ 1.3070, -0.2165, -0.6561, 0.1591]) tensor([0.5968, 0.1301, 0.0838, 0.1894]) -Greedy action tensor([ 1.4301, -0.5603, -0.2080, -0.0185]) tensor([0.6386, 0.0873, 0.1241, 0.1500]) -Greedy action tensor([ 1.1010, -0.6052, -0.0573, -0.0095]) tensor([0.5480, 0.0995, 0.1721, 0.1805]) -Greedy action tensor([ 0.9275, -0.5251, -0.0309, -0.1001]) tensor([0.5063, 0.1184, 0.1942, 0.1812]) -Greedy action tensor([ 1.1654, -0.3840, -0.0647, -0.1813]) tensor([0.5667, 0.1203, 0.1656, 0.1474]) -Greedy action tensor([ 1.1931, -0.5258, -0.6236, -0.0838]) tensor([0.6170, 0.1106, 0.1003, 0.1721]) -Greedy action tensor([ 1.0744, -0.3923, 0.2018, -0.0328]) tensor([0.5053, 0.1166, 0.2111, 0.1670]) -Greedy action tensor([ 0.7928, -0.5222, -0.5171, 0.0063]) tensor([0.5016, 0.1347, 0.1353, 0.2284]) -Greedy action tensor([ 0.5940, -0.3831, -0.4257, -0.0321]) tensor([0.4402, 0.1657, 0.1588, 0.2354]) -Greedy action tensor([ 0.8918, -0.4231, -0.8740, 0.3488]) tensor([0.4949, 0.1329, 0.0847, 0.2875]) -Greedy action tensor([ 1.2227, -0.7829, -0.4069, 0.7792]) tensor([0.5070, 0.0682, 0.0994, 0.3254]) -Greedy action tensor([ 0.7138, -0.3523, -0.6276, -0.3456]) tensor([0.5122, 0.1764, 0.1339, 0.1776]) -Greedy action tensor([ 1.3048, -0.5938, 0.0757, 0.3517]) tensor([0.5471, 0.0819, 0.1601, 0.2109]) -Greedy action tensor([ 0.5917, -0.5614, -0.0351, -0.1763]) tensor([0.4322, 0.1364, 0.2309, 0.2005]) -Greedy action tensor([ 1.2477, -0.8086, -0.4725, 0.9505]) tensor([0.4878, 0.0624, 0.0873, 0.3624]) -Greedy action tensor([ 0.9234, -0.4824, -0.7412, 0.7297]) tensor([0.4428, 0.1086, 0.0838, 0.3648]) -Greedy action tensor([ 1.3286, -0.3620, -0.2436, -0.1870]) tensor([0.6205, 0.1144, 0.1288, 0.1363]) -Greedy action tensor([ 0.6396, -0.0523, -0.1427, -0.2599]) tensor([0.4229, 0.2117, 0.1934, 0.1720]) -Greedy action tensor([ 1.4288, -0.2913, -0.0951, 0.0560]) tensor([0.6059, 0.1085, 0.1320, 0.1535]) -Greedy action tensor([ 0.6361, -0.4157, 0.0480, -0.0087]) tensor([0.4116, 0.1438, 0.2286, 0.2160]) -Greedy action tensor([ 0.5658, -0.1958, -0.1257, -0.3349]) tensor([0.4212, 0.1967, 0.2110, 0.1711]) -Greedy action tensor([ 1.4741, -0.7534, -0.3072, 0.3800]) tensor([0.6207, 0.0669, 0.1045, 0.2078]) -Greedy action tensor([ 0.4533, -0.0279, -0.0081, -0.4623]) tensor([0.3775, 0.2333, 0.2380, 0.1511]) -Greedy action tensor([ 0.4044, 0.1590, -0.0163, -0.1255]) tensor([0.3303, 0.2584, 0.2169, 0.1944]) -Greedy action tensor([ 0.6810, -0.3135, -0.0965, -0.5997]) tensor([0.4745, 0.1755, 0.2181, 0.1319]) -Greedy action tensor([ 0.6557, -0.0981, 0.1044, -0.5591]) tensor([0.4267, 0.2008, 0.2459, 0.1266]) -Greedy action tensor([ 0.4184, -0.0476, 0.0907, -0.2695]) tensor([0.3508, 0.2201, 0.2528, 0.1763]) -Greedy action tensor([ 0.6369, -0.3230, 0.0901, -0.7466]) tensor([0.4520, 0.1731, 0.2616, 0.1133]) -Greedy action tensor([ 0.7041, -0.4695, 0.0405, -0.7258]) tensor([0.4846, 0.1499, 0.2496, 0.1160]) -Greedy action tensor([ 0.3049, -0.1051, 0.2035, -0.3442]) tensor([0.3237, 0.2148, 0.2924, 0.1691]) -Greedy action tensor([ 0.4892, 0.1159, -0.0744, -0.3749]) tensor([0.3733, 0.2570, 0.2124, 0.1573]) -Greedy action tensor([ 0.6797, -0.6281, -0.1141, -0.6799]) tensor([0.5052, 0.1366, 0.2284, 0.1297]) -Greedy action tensor([ 1.1667, -0.6805, 0.1360, -0.6834]) tensor([0.5982, 0.0943, 0.2134, 0.0941]) -Greedy action tensor([ 0.8320, -0.3907, -0.1447, -0.4866]) tensor([0.5159, 0.1519, 0.1942, 0.1380]) -Greedy action tensor([ 0.3121, 0.1693, -0.0712, -0.3280]) tensor([0.3251, 0.2819, 0.2216, 0.1714]) -Greedy action tensor([ 0.2655, 0.1606, 0.0517, -0.2704]) tensor([0.3037, 0.2734, 0.2452, 0.1777]) -Greedy action tensor([ 0.8372, -0.6311, 0.1241, -0.8116]) tensor([0.5228, 0.1204, 0.2562, 0.1005]) -Greedy action tensor([ 0.4739, 0.1399, 0.0590, -0.2223]) tensor([0.3478, 0.2491, 0.2297, 0.1734]) -Greedy action tensor([ 0.2598, 0.1722, 0.1532, -0.2601]) tensor([0.2933, 0.2687, 0.2636, 0.1744]) -Greedy action tensor([-0.0353, -0.0863, -0.0618, -0.2507]) tensor([0.2681, 0.2547, 0.2611, 0.2161]) -Greedy action tensor([ 0.3683, 0.0312, -0.0372, -0.1669]) tensor([0.3371, 0.2407, 0.2248, 0.1974]) -Greedy action tensor([ 0.4752, -0.1796, 0.1340, -0.4597]) tensor([0.3812, 0.1981, 0.2710, 0.1497]) -Greedy action tensor([ 0.7564, -0.2975, -0.0222, -0.6703]) tensor([0.4883, 0.1702, 0.2242, 0.1173]) -Greedy action tensor([ 0.1747, 0.1051, 0.2182, -0.1422]) tensor([0.2699, 0.2517, 0.2818, 0.1966]) -Greedy action tensor([ 0.4782, -0.3183, -0.0958, -0.3448]) tensor([0.4076, 0.1838, 0.2296, 0.1790]) -Greedy action tensor([ 0.6776, -0.5608, 0.3289, -0.4602]) tensor([0.4318, 0.1252, 0.3047, 0.1384]) -Greedy action tensor([ 0.6668, -0.3822, -0.0957, -0.5576]) tensor([0.4738, 0.1660, 0.2210, 0.1393]) -Greedy action tensor([ 0.8581, -0.8061, -0.1279, -0.9121]) tensor([0.5771, 0.1093, 0.2153, 0.0983]) -Greedy action tensor([ 0.7704, 0.2952, -0.1416, -0.4199]) tensor([0.4296, 0.2671, 0.1726, 0.1307]) -Greedy action tensor([ 0.3625, 0.0177, 0.1355, -0.2226]) tensor([0.3265, 0.2313, 0.2602, 0.1819]) -Greedy action tensor([ 0.5813, -0.5177, -0.0061, -0.6452]) tensor([0.4582, 0.1527, 0.2547, 0.1344]) -Greedy action tensor([ 0.9853, -0.4042, 0.1217, -0.8156]) tensor([0.5447, 0.1357, 0.2296, 0.0899]) -Greedy action tensor([ 0.5632, 0.1546, -0.0601, -0.1042]) tensor([0.3685, 0.2449, 0.1976, 0.1890]) -Greedy action tensor([ 0.4479, -0.1785, 0.0448, -0.3253]) tensor([0.3753, 0.2006, 0.2508, 0.1732]) -Greedy action tensor([ 0.3541, -0.1639, 0.0132, -0.3746]) tensor([0.3585, 0.2136, 0.2549, 0.1730]) -Greedy action tensor([ 0.1191, 0.1679, 0.0662, -0.2522]) tensor([0.2711, 0.2847, 0.2572, 0.1870]) -Greedy action tensor([ 0.3874, -0.0445, -0.0632, -0.1349]) tensor([0.3473, 0.2255, 0.2213, 0.2060]) -Greedy action tensor([ 0.3571, -0.2808, 0.1448, -0.4404]) tensor([0.3587, 0.1896, 0.2901, 0.1616]) -Greedy action tensor([ 0.3464, 0.1145, -0.0165, -0.2827]) tensor([0.3309, 0.2625, 0.2302, 0.1764]) -Greedy action tensor([ 0.2271, 0.0593, 0.0022, -0.2481]) tensor([0.3062, 0.2589, 0.2445, 0.1904]) -Greedy action tensor([ 0.2496, -0.0032, 0.1478, -0.0177]) tensor([0.2903, 0.2254, 0.2622, 0.2222]) -Greedy action tensor([ 0.4953, -0.2811, 0.0398, -0.4254]) tensor([0.4012, 0.1846, 0.2544, 0.1598]) -Greedy action tensor([ 0.1022, -0.0225, 0.0227, -0.2718]) tensor([0.2862, 0.2526, 0.2643, 0.1969]) -Greedy action tensor([ 0.6487, -0.4156, -0.0058, -0.5465]) tensor([0.4614, 0.1592, 0.2398, 0.1396]) -Greedy action tensor([ 0.3530, -0.2471, 0.0534, -0.2260]) tensor([0.3508, 0.1925, 0.2600, 0.1966]) -Greedy action tensor([ 0.5834, -0.4352, -0.0517, -0.4602]) tensor([0.4458, 0.1610, 0.2362, 0.1570]) -Greedy action tensor([ 0.5572, -0.3281, 0.1120, -0.4913]) tensor([0.4160, 0.1717, 0.2665, 0.1458]) -Greedy action tensor([ 0.6972, -0.3556, -0.2756, -0.5220]) tensor([0.4944, 0.1725, 0.1869, 0.1461]) -Greedy action tensor([ 0.4412, 0.1612, -0.0299, -0.2361]) tensor([0.3462, 0.2617, 0.2162, 0.1759]) -Greedy action tensor([ 0.4286, 0.1418, -0.0913, -0.3390]) tensor([0.3560, 0.2672, 0.2116, 0.1652]) -Greedy action tensor([ 0.6308, -0.6181, -0.0373, -0.6781]) tensor([0.4832, 0.1386, 0.2477, 0.1305]) -Greedy action tensor([ 0.7356, -0.2468, 0.0379, -0.5988]) tensor([0.4683, 0.1753, 0.2331, 0.1233]) -Greedy action tensor([ 0.2752, -0.0908, 0.0817, -0.2362]) tensor([0.3208, 0.2225, 0.2644, 0.1924]) -Greedy action tensor([ 0.5658, -0.2389, 0.0679, -0.3332]) tensor([0.4062, 0.1817, 0.2469, 0.1653]) -Greedy action tensor([ 0.4197, -0.2859, 0.0619, -0.5576]) tensor([0.3892, 0.1922, 0.2721, 0.1465]) -Greedy action tensor([ 0.6764, -0.5619, -0.1548, -0.5579]) tensor([0.4959, 0.1438, 0.2160, 0.1443]) -Greedy action tensor([ 0.4470, -0.0524, 0.0482, -0.1846]) tensor([0.3559, 0.2160, 0.2389, 0.1893]) -Greedy action tensor([ 1.2132, -0.7718, 0.0297, -0.9197]) tensor([0.6402, 0.0879, 0.1960, 0.0759]) -Greedy action tensor([ 0.2776, 0.1342, 0.1376, -0.1848]) tensor([0.2971, 0.2574, 0.2583, 0.1871]) -Greedy action tensor([ 0.9883, -0.3391, 0.0141, -0.7056]) tensor([0.5475, 0.1452, 0.2067, 0.1006]) -Greedy action tensor([ 0.3106, -0.0938, 0.0963, -0.3383]) tensor([0.3337, 0.2227, 0.2693, 0.1744]) -Greedy action tensor([ 0.6417, -0.1639, -0.0196, -0.4240]) tensor([0.4334, 0.1936, 0.2237, 0.1493]) -Greedy action tensor([ 0.5274, 0.0184, -0.0246, -0.2734]) tensor([0.3808, 0.2289, 0.2193, 0.1710]) -Greedy action tensor([ 0.4326, 0.0256, 0.1688, -0.3019]) tensor([0.3432, 0.2285, 0.2636, 0.1647]) -Greedy action tensor([ 0.4306, 0.2436, 0.1180, -0.1698]) tensor([0.3216, 0.2667, 0.2352, 0.1764]) -Greedy action tensor([ 0.5057, -0.0258, 0.0708, -0.2136]) tensor([0.3674, 0.2159, 0.2378, 0.1789]) -Greedy action tensor([ 0.6323, -0.4610, -0.2535, -0.5612]) tensor([0.4877, 0.1634, 0.2011, 0.1478]) -Greedy action tensor([ 0.6942, -0.5594, 0.2285, -0.6883]) tensor([0.4621, 0.1319, 0.2900, 0.1160]) -Greedy action tensor([ 0.1389, 0.1572, 0.2102, -0.2100]) tensor([0.2633, 0.2682, 0.2828, 0.1857]) -Greedy action tensor([ 0.4184, -0.0275, 0.0708, -0.3733]) tensor([0.3572, 0.2287, 0.2523, 0.1618]) -Greedy action tensor([ 0.3573, 0.0621, 0.0620, -0.2630]) tensor([0.3304, 0.2460, 0.2459, 0.1777]) -Greedy action tensor([ 0.2113, -0.0272, 0.1786, -0.2713]) tensor([0.2965, 0.2336, 0.2870, 0.1830]) -Greedy action tensor([ 0.4897, -0.0222, 0.0783, -0.3750]) tensor([0.3727, 0.2234, 0.2470, 0.1570]) -Greedy action tensor([ 0.4697, -0.1528, 0.0367, -0.4974]) tensor([0.3898, 0.2092, 0.2528, 0.1482]) -Greedy action tensor([ 0.4380, -0.0824, -0.0736, -0.1297]) tensor([0.3622, 0.2153, 0.2172, 0.2053]) -Greedy action tensor([ 0.7797, -0.5915, -0.0965, -0.6220]) tensor([0.5218, 0.1324, 0.2173, 0.1285]) -Greedy action tensor([ 0.6829, -0.1969, 0.0051, -0.3747]) tensor([0.4405, 0.1828, 0.2237, 0.1530]) -Greedy action tensor([ 0.6828, -0.2215, -0.1318, -0.5308]) tensor([0.4663, 0.1887, 0.2065, 0.1385]) -Greedy action tensor([ 0.2027, 0.1900, 0.0986, -0.2182]) tensor([0.2821, 0.2785, 0.2542, 0.1852]) -Greedy action tensor([ 0.4424, -0.2563, 0.1396, -0.3504]) tensor([0.3719, 0.1850, 0.2748, 0.1683]) -Greedy action tensor([ 0.4167, 0.0207, -0.0077, -0.1504]) tensor([0.3455, 0.2325, 0.2260, 0.1959]) -Greedy action tensor([ 0.2994, 0.1456, 0.2448, -0.2316]) tensor([0.2948, 0.2528, 0.2791, 0.1733]) -Greedy action tensor([ 0.6082, -0.2583, -0.0482, -0.1683]) tensor([0.4168, 0.1752, 0.2162, 0.1917]) -Greedy action tensor([-1.0828, -0.2970, 0.1547, -0.0246]) tensor([0.1050, 0.2304, 0.3620, 0.3026]) -Greedy action tensor([-1.3835, -1.0368, 0.0978, -0.8779]) tensor([0.1181, 0.1670, 0.5193, 0.1957]) -Greedy action tensor([-1.3899, -0.5758, 0.3677, 0.1629]) tensor([0.0726, 0.1638, 0.4208, 0.3429]) -Greedy action tensor([-1.8102, -0.4824, 0.5932, -0.0975]) tensor([0.0468, 0.1765, 0.5174, 0.2593]) -Greedy action tensor([-0.8843, 0.8192, 0.0565, 0.3552]) tensor([0.0799, 0.4391, 0.2048, 0.2761]) -Greedy action tensor([-0.8168, -0.5546, 0.3109, 0.2091]) tensor([0.1223, 0.1589, 0.3777, 0.3411]) -Greedy action tensor([-1.9541, -0.4827, 0.9380, 0.2214]) tensor([0.0311, 0.1353, 0.5601, 0.2736]) -Greedy action tensor([-2.0035, -0.8345, 0.3019, -0.1923]) tensor([0.0491, 0.1581, 0.4924, 0.3004]) -Greedy action tensor([-0.7860, -0.4351, 0.3299, -0.1625]) tensor([0.1363, 0.1936, 0.4160, 0.2542]) -Greedy action tensor([-1.7970, -0.4379, 0.5874, -0.1154]) tensor([0.0474, 0.1843, 0.5139, 0.2545]) -Greedy action tensor([-1.8525, -0.3973, 0.2893, -0.2637]) tensor([0.0535, 0.2292, 0.4554, 0.2619]) -Greedy action tensor([-1.4407, -0.4635, 0.8679, 0.7557]) tensor([0.0440, 0.1170, 0.4430, 0.3960]) -Greedy action tensor([-1.3032, -0.5662, 0.4187, 0.0547]) tensor([0.0795, 0.1662, 0.4450, 0.3092]) -Greedy action tensor([-1.3523, -1.0835, 0.0447, -0.8412]) tensor([0.1247, 0.1632, 0.5042, 0.2079]) -Greedy action tensor([-1.8410, -0.6573, 0.7075, 0.1118]) tensor([0.0415, 0.1355, 0.5306, 0.2924]) -Greedy action tensor([-1.1293, -0.5702, 0.4453, -0.1387]) tensor([0.0974, 0.1703, 0.4701, 0.2622]) -Greedy action tensor([-1.9094, -0.5990, 0.8121, 0.0634]) tensor([0.0369, 0.1368, 0.5610, 0.2653]) -Greedy action tensor([-1.6178, -0.4316, 0.5886, 0.2885]) tensor([0.0498, 0.1630, 0.4522, 0.3350]) -Greedy action tensor([-1.2130, -0.5393, 0.3093, 0.3520]) tensor([0.0811, 0.1591, 0.3718, 0.3880]) -Greedy action tensor([-0.7073, -0.2096, 0.3492, -0.2466]) tensor([0.1407, 0.2315, 0.4048, 0.2231]) -Greedy action tensor([-0.8793, -0.5505, 0.2053, 0.2846]) tensor([0.1170, 0.1625, 0.3460, 0.3746]) -Greedy action tensor([-1.8027, -0.4029, 0.5833, -0.0815]) tensor([0.0465, 0.1884, 0.5052, 0.2599]) -Greedy action tensor([-1.3550, -0.5818, 0.4003, 0.3441]) tensor([0.0693, 0.1502, 0.4012, 0.3792]) -Greedy action tensor([-1.7336, -0.4882, 0.6686, 0.2155]) tensor([0.0444, 0.1541, 0.4900, 0.3115]) -Greedy action tensor([-1.4153, 0.2289, -0.2606, -0.4068]) tensor([0.0827, 0.4281, 0.2624, 0.2267]) -Greedy action tensor([-1.1984, -0.0305, 0.3428, 0.5371]) tensor([0.0687, 0.2209, 0.3208, 0.3896]) -Greedy action tensor([-1.9691, -0.9208, 0.2400, -0.2623]) tensor([0.0541, 0.1544, 0.4931, 0.2984]) -Greedy action tensor([-1.6684, -0.4464, 0.8772, 0.7130]) tensor([0.0358, 0.1214, 0.4560, 0.3869]) -Greedy action tensor([-1.6125, -0.6363, 0.4735, -0.1477]) tensor([0.0624, 0.1655, 0.5022, 0.2699]) -Greedy action tensor([-1.7181, -0.6577, 0.8451, 0.4586]) tensor([0.0389, 0.1124, 0.5053, 0.3433]) -Greedy action tensor([-1.6610, -0.4651, 0.5187, 0.0342]) tensor([0.0538, 0.1778, 0.4755, 0.2929]) -Greedy action tensor([-1.2152, -0.4871, 0.6975, -0.5540]) tensor([0.0849, 0.1758, 0.5748, 0.1644]) -Greedy action tensor([-1.8347, -0.4756, 0.6116, -0.1189]) tensor([0.0455, 0.1769, 0.5248, 0.2528]) -Greedy action tensor([-1.6653, -0.3224, 0.5061, -0.0283]) tensor([0.0534, 0.2044, 0.4680, 0.2742]) -Greedy action tensor([-1.6520, -0.3068, 0.5443, -0.1804]) tensor([0.0550, 0.2111, 0.4944, 0.2395]) -Greedy action tensor([-0.9786, -0.5548, 0.1974, 0.4108]) tensor([0.1022, 0.1562, 0.3314, 0.4102]) -Greedy action tensor([-1.7509, -0.4277, 0.6554, 0.1918]) tensor([0.0438, 0.1645, 0.4860, 0.3057]) -Greedy action tensor([-1.9700, -0.6666, 0.9157, 0.1608]) tensor([0.0322, 0.1187, 0.5776, 0.2715]) -Greedy action tensor([-0.2841, -0.1399, 0.8572, 1.4150]) tensor([0.0930, 0.1074, 0.2911, 0.5085]) -Greedy action tensor([-2.0582, -0.8032, 1.0429, 0.3053]) tensor([0.0268, 0.0939, 0.5948, 0.2845]) -Greedy action tensor([-1.6374, -0.5054, 0.1938, -0.3151]) tensor([0.0709, 0.2201, 0.4428, 0.2662]) -Greedy action tensor([-0.5012, -0.2483, 0.2117, 0.0438]) tensor([0.1652, 0.2128, 0.3371, 0.2850]) -Greedy action tensor([-1.5076, -0.6247, 0.5015, -0.0191]) tensor([0.0653, 0.1580, 0.4872, 0.2895]) -Greedy action tensor([-1.1739, -0.6446, 0.4364, 0.4150]) tensor([0.0794, 0.1347, 0.3972, 0.3887]) -Greedy action tensor([-0.9473, 0.1157, 0.6224, 0.9424]) tensor([0.0653, 0.1890, 0.3137, 0.4320]) -Greedy action tensor([-0.7596, 0.1044, 0.1135, -0.0228]) tensor([0.1273, 0.3020, 0.3048, 0.2659]) -Greedy action tensor([-1.8170, -0.4805, 0.6236, -0.0732]) tensor([0.0454, 0.1729, 0.5217, 0.2599]) -Greedy action tensor([-2.0111, -0.8889, 0.4093, -0.1716]) tensor([0.0463, 0.1421, 0.5205, 0.2912]) -Greedy action tensor([-0.9208, -0.3123, 0.2971, 0.7273]) tensor([0.0876, 0.1610, 0.2961, 0.4553]) -Greedy action tensor([-1.6976, -0.5380, 1.0569, 0.8212]) tensor([0.0309, 0.0987, 0.4862, 0.3841]) -Greedy action tensor([-1.2250, -0.3529, 0.3171, 0.3120]) tensor([0.0786, 0.1881, 0.3676, 0.3657]) -Greedy action tensor([-1.5921, -0.5266, 0.5494, 0.1142]) tensor([0.0558, 0.1619, 0.4749, 0.3073]) -Greedy action tensor([-1.5624, -0.5879, 0.6466, 0.3313]) tensor([0.0515, 0.1366, 0.4694, 0.3425]) -Greedy action tensor([-1.1937, -0.4852, 0.3454, 0.4060]) tensor([0.0791, 0.1606, 0.3686, 0.3916]) -Greedy action tensor([-1.9199, -0.6823, 0.5209, -0.0168]) tensor([0.0442, 0.1523, 0.5073, 0.2963]) -Greedy action tensor([-1.2784, -0.5769, 0.3076, 0.2317]) tensor([0.0805, 0.1623, 0.3930, 0.3643]) -Greedy action tensor([-1.8254, -0.4717, 0.6285, -0.0895]) tensor([0.0451, 0.1746, 0.5245, 0.2558]) -Greedy action tensor([-1.1406, -0.6383, 0.2787, 0.1874]) tensor([0.0947, 0.1565, 0.3915, 0.3573]) -Greedy action tensor([-1.4180, -0.5826, 0.5431, -0.1930]) tensor([0.0724, 0.1669, 0.5144, 0.2464]) -Greedy action tensor([-1.9350, -0.4462, 0.6602, -0.1761]) tensor([0.0406, 0.1799, 0.5438, 0.2357]) -Greedy action tensor([-1.8880, -0.6916, 0.2756, -0.2161]) tensor([0.0545, 0.1805, 0.4747, 0.2903]) -Greedy action tensor([-1.5477, -0.3505, 0.7003, 0.6616]) tensor([0.0437, 0.1446, 0.4137, 0.3980]) -Greedy action tensor([-1.4536, -0.6690, 0.4746, -0.3603]) tensor([0.0766, 0.1679, 0.5269, 0.2286]) -Greedy action tensor([-1.6796, -0.4531, 0.5304, -0.0027]) tensor([0.0530, 0.1806, 0.4830, 0.2834]) -Greedy action tensor([-0.9259, -0.3807, 0.3356, 0.0675]) tensor([0.1117, 0.1926, 0.3942, 0.3015]) -Greedy action tensor([-1.5198, -0.6372, 0.3381, 0.1355]) tensor([0.0664, 0.1605, 0.4256, 0.3475]) -Greedy action tensor([-1.8459, -0.7501, 0.6994, 0.0660]) tensor([0.0425, 0.1273, 0.5423, 0.2879]) -Greedy action tensor([-1.6885, -0.5361, 0.5401, -0.0537]) tensor([0.0538, 0.1704, 0.4998, 0.2760]) -Greedy action tensor([-1.8087, -0.8688, 0.1897, -0.3994]) tensor([0.0665, 0.1703, 0.4908, 0.2723]) -Greedy action tensor([-0.9905, -0.4203, 0.3707, 0.9842]) tensor([0.0721, 0.1275, 0.2812, 0.5193]) -Greedy action tensor([-1.8441, -0.4718, 0.6734, 0.0113]) tensor([0.0421, 0.1662, 0.5223, 0.2694]) -Greedy action tensor([-1.5869, -0.4478, 0.7441, 0.6830]) tensor([0.0415, 0.1297, 0.4271, 0.4018]) -Greedy action tensor([-1.1395, -0.5463, 0.3761, 0.0549]) tensor([0.0938, 0.1697, 0.4269, 0.3096]) -Greedy action tensor([-1.1542, -0.5554, 0.2926, 0.2574]) tensor([0.0895, 0.1629, 0.3804, 0.3672]) -Greedy action tensor([-1.1448, -0.3317, 0.0224, -0.3300]) tensor([0.1146, 0.2584, 0.3682, 0.2588]) -Greedy action tensor([-1.9403, -0.8827, 0.1795, -0.2838]) tensor([0.0573, 0.1650, 0.4773, 0.3004]) -Greedy action tensor([-1.7226, -0.5140, 0.5551, -0.0835]) tensor([0.0519, 0.1739, 0.5066, 0.2675]) -Greedy action tensor([-1.9534, -0.4994, 0.9143, 0.1740]) tensor([0.0320, 0.1369, 0.5627, 0.2684]) -Greedy action tensor([-1.0755, -0.5838, 0.2859, 0.7112]) tensor([0.0800, 0.1307, 0.3120, 0.4773]) -Greedy action tensor([-0.6150, -0.5663, 0.2029, 0.2700]) tensor([0.1484, 0.1558, 0.3362, 0.3596]) -Greedy action tensor([-1.5219, -0.5275, 0.4540, 0.2104]) tensor([0.0604, 0.1631, 0.4353, 0.3412]) -Greedy action tensor([ 0.5171, 0.2944, -0.3052, 0.7517]) tensor([0.2854, 0.2284, 0.1254, 0.3608]) -Greedy action tensor([ 1.1649, -0.1008, 0.1278, 0.3367]) tensor([0.4823, 0.1360, 0.1710, 0.2107]) -Greedy action tensor([ 0.6699, 1.5081, -0.3374, 0.7054]) tensor([0.2122, 0.4905, 0.0775, 0.2198]) -Greedy action tensor([1.6597, 0.1180, 1.1753, 1.6207]) tensor([0.3582, 0.0767, 0.2207, 0.3445]) -Greedy action tensor([1.2630, 0.2537, 0.2088, 1.1503]) tensor([0.3837, 0.1398, 0.1337, 0.3428]) -Greedy action tensor([ 1.1440, -0.2372, 0.2944, 0.4563]) tensor([0.4584, 0.1152, 0.1960, 0.2304]) -Greedy action tensor([ 1.7357, -0.2995, -0.0587, 0.6535]) tensor([0.6113, 0.0799, 0.1016, 0.2072]) -Greedy action tensor([ 0.7031, 0.0039, -0.1534, -0.1043]) tensor([0.4223, 0.2099, 0.1794, 0.1884]) -Greedy action tensor([ 1.4742, -0.0032, 0.4577, 1.1282]) tensor([0.4352, 0.0993, 0.1575, 0.3079]) -Greedy action tensor([ 1.9559, -1.3049, -0.1391, 0.7597]) tensor([0.6832, 0.0262, 0.0841, 0.2066]) -Greedy action tensor([ 0.7512, 0.7247, -0.0247, -0.2539]) tensor([0.3571, 0.3478, 0.1644, 0.1307]) -Greedy action tensor([ 0.7965, -0.6276, 1.1322, 0.5064]) tensor([0.2952, 0.0711, 0.4129, 0.2208]) -Greedy action tensor([ 1.1088, -0.0032, 1.0799, 1.0296]) tensor([0.3101, 0.1020, 0.3013, 0.2865]) -Greedy action tensor([ 0.3670, -1.1846, -0.3358, 0.7163]) tensor([0.3200, 0.0678, 0.1584, 0.4538]) -Greedy action tensor([ 1.0605, 0.2466, -0.1478, 0.4852]) tensor([0.4340, 0.1923, 0.1296, 0.2441]) -Greedy action tensor([ 1.3942, 0.4252, -0.1681, 0.4512]) tensor([0.5054, 0.1918, 0.1060, 0.1968]) -Greedy action tensor([ 1.0755, -0.3534, 0.7021, 0.8584]) tensor([0.3659, 0.0877, 0.2519, 0.2945]) -Greedy action tensor([ 1.1667, -0.1195, 0.1572, 0.4072]) tensor([0.4742, 0.1310, 0.1728, 0.2219]) -Greedy action tensor([ 2.0520, 0.2689, -0.3205, 1.9768]) tensor([0.4569, 0.0768, 0.0426, 0.4237]) -Greedy action tensor([1.2223, 0.2925, 0.4487, 0.6866]) tensor([0.4096, 0.1616, 0.1890, 0.2397]) -Greedy action tensor([ 0.3489, -0.3847, 0.2515, -0.2701]) tensor([0.3418, 0.1641, 0.3101, 0.1840]) -Greedy action tensor([-0.1666, 0.2054, 0.0086, 0.0164]) tensor([0.2065, 0.2995, 0.2460, 0.2479]) -Greedy action tensor([ 0.6126, 0.0519, -0.4388, 0.2629]) tensor([0.3809, 0.2174, 0.1331, 0.2685]) -Greedy action tensor([ 1.3081, 0.2480, -0.6515, 0.7516]) tensor([0.4853, 0.1681, 0.0684, 0.2782]) -Greedy action tensor([1.1595, 0.3292, 0.5734, 1.1929]) tensor([0.3304, 0.1440, 0.1839, 0.3416]) -Greedy action tensor([0.0914, 0.6192, 0.6246, 0.4722]) tensor([0.1706, 0.2891, 0.2907, 0.2496]) -Greedy action tensor([1.0848, 0.1792, 0.0610, 0.8678]) tensor([0.3893, 0.1574, 0.1399, 0.3134]) -Greedy action tensor([-0.4461, 0.4763, 0.4157, -0.4725]) tensor([0.1458, 0.3668, 0.3453, 0.1420]) -Greedy action tensor([ 0.7392, 0.2604, -0.3938, -0.2659]) tensor([0.4334, 0.2685, 0.1396, 0.1586]) -Greedy action tensor([ 1.0805, 0.5577, -0.3238, 0.9730]) tensor([0.3654, 0.2167, 0.0897, 0.3282]) -Greedy action tensor([0.8785, 0.7017, 0.7475, 0.8309]) tensor([0.2726, 0.2284, 0.2391, 0.2599]) -Greedy action tensor([ 0.7131, -0.4732, -0.1898, 1.0090]) tensor([0.3273, 0.0999, 0.1327, 0.4400]) -Greedy action tensor([ 1.0639, -0.1296, 0.1788, 0.4227]) tensor([0.4459, 0.1352, 0.1840, 0.2349]) -Greedy action tensor([ 0.8957, -0.9879, -0.3020, 0.6711]) tensor([0.4439, 0.0675, 0.1340, 0.3546]) -Greedy action tensor([1.4707, 0.2059, 0.0424, 0.8478]) tensor([0.4858, 0.1371, 0.1165, 0.2606]) -Greedy action tensor([ 0.8164, -0.3957, 0.1014, 1.0627]) tensor([0.3262, 0.0971, 0.1596, 0.4172]) -Greedy action tensor([ 0.9370, -0.3466, 0.3854, 0.2008]) tensor([0.4288, 0.1188, 0.2470, 0.2054]) -Greedy action tensor([1.2469, 0.9713, 1.0446, 0.1712]) tensor([0.3428, 0.2602, 0.2800, 0.1169]) -Greedy action tensor([ 0.3999, 0.7012, -0.2317, 0.3411]) tensor([0.2614, 0.3532, 0.1390, 0.2464]) -Greedy action tensor([ 1.1651, -0.3133, -0.7923, -0.0788]) tensor([0.6033, 0.1376, 0.0852, 0.1739]) -Greedy action tensor([ 2.2928, -0.8740, 0.5659, 0.4119]) tensor([0.7286, 0.0307, 0.1296, 0.1111]) -Greedy action tensor([ 0.8330, -0.4393, -0.2157, 1.1051]) tensor([0.3398, 0.0952, 0.1190, 0.4460]) -Greedy action tensor([1.5167, 0.1905, 1.6075, 0.8344]) tensor([0.3489, 0.0926, 0.3821, 0.1764]) -Greedy action tensor([ 1.2784, -0.3914, 0.6743, 1.8101]) tensor([0.2910, 0.0548, 0.1590, 0.4952]) -Greedy action tensor([ 1.3760, -1.1248, -0.2897, 1.3023]) tensor([0.4546, 0.0373, 0.0859, 0.4222]) -Greedy action tensor([ 0.6201, 1.1335, -0.1055, -0.0818]) tensor([0.2739, 0.4577, 0.1326, 0.1358]) -Greedy action tensor([0.1125, 0.3778, 0.1091, 1.1479]) tensor([0.1635, 0.2132, 0.1629, 0.4604]) -Greedy action tensor([1.2354, 0.7426, 0.0645, 0.8336]) tensor([0.3861, 0.2359, 0.1197, 0.2583]) -Greedy action tensor([ 1.2957, -1.0251, -0.7089, 0.6545]) tensor([0.5683, 0.0558, 0.0766, 0.2993]) -Greedy action tensor([ 0.6468, 0.5509, -0.8070, 0.9654]) tensor([0.2843, 0.2583, 0.0664, 0.3910]) -Greedy action tensor([ 0.5064, -0.9121, 0.0980, 1.7174]) tensor([0.1900, 0.0460, 0.1263, 0.6377]) -Greedy action tensor([ 1.4052, -0.8888, 0.3101, 0.1156]) tensor([0.5845, 0.0590, 0.1955, 0.1610]) -Greedy action tensor([ 1.1369, -0.3379, 0.2531, -0.6838]) tensor([0.5543, 0.1268, 0.2291, 0.0898]) -Greedy action tensor([ 1.7658, -1.1860, 0.5809, 1.6872]) tensor([0.4381, 0.0229, 0.1340, 0.4050]) -Greedy action tensor([ 0.5610, 0.6519, -0.1952, 1.1605]) tensor([0.2280, 0.2497, 0.1070, 0.4152]) -Greedy action tensor([-0.0571, -0.3474, -1.4477, 0.8111]) tensor([0.2283, 0.1708, 0.0568, 0.5440]) -Greedy action tensor([ 1.0780, -0.9233, 1.1743, 0.5408]) tensor([0.3545, 0.0479, 0.3904, 0.2072]) -Greedy action tensor([ 0.7371, -0.8590, 1.0998, 0.6806]) tensor([0.2789, 0.0565, 0.4009, 0.2636]) -Greedy action tensor([ 1.9402, -0.7184, -0.3011, 0.1986]) tensor([0.7399, 0.0518, 0.0787, 0.1296]) -Greedy action tensor([ 0.8282, 0.4283, -0.8488, 0.8098]) tensor([0.3522, 0.2361, 0.0658, 0.3458]) -Greedy action tensor([ 1.2454, -0.4314, 0.9716, 0.3668]) tensor([0.4232, 0.0791, 0.3219, 0.1758]) -Greedy action tensor([ 0.7015, -0.5200, 0.2336, 1.1031]) tensor([0.2928, 0.0863, 0.1834, 0.4375]) -Greedy action tensor([ 1.2064, -0.1960, 1.4346, 0.7126]) tensor([0.3213, 0.0790, 0.4036, 0.1961]) -Greedy action tensor([ 1.9934, -0.5322, 1.5800, 1.0625]) tensor([0.4682, 0.0375, 0.3097, 0.1846]) -Greedy action tensor([ 1.0689, -1.8181, 0.3899, 0.6500]) tensor([0.4503, 0.0251, 0.2284, 0.2962]) -Greedy action tensor([ 0.8651, -0.1108, -0.1898, 0.9005]) tensor([0.3622, 0.1365, 0.1261, 0.3752]) -Greedy action tensor([ 2.0171, -0.3681, -0.7663, 1.5816]) tensor([0.5553, 0.0511, 0.0343, 0.3593]) -Greedy action tensor([ 1.3246, -0.0830, -0.1530, 1.0137]) tensor([0.4534, 0.1110, 0.1034, 0.3322]) -Greedy action tensor([ 0.4291, -0.7365, -0.0356, 2.2808]) tensor([0.1203, 0.0375, 0.0756, 0.7666]) -Greedy action tensor([ 1.3735, 0.9392, -0.4064, 0.5552]) tensor([0.4429, 0.2869, 0.0747, 0.1954]) -Greedy action tensor([ 1.8328, -0.0110, 0.2233, 0.8647]) tensor([0.5754, 0.0910, 0.1151, 0.2185]) -Greedy action tensor([ 0.9942, 0.5240, -0.1553, 0.7639]) tensor([0.3655, 0.2284, 0.1158, 0.2903]) -Greedy action tensor([ 0.3962, -0.2656, 0.6218, 1.9634]) tensor([0.1322, 0.0682, 0.1657, 0.6338]) -Greedy action tensor([ 1.1368, 0.5022, -0.2357, 1.0262]) tensor([0.3733, 0.1979, 0.0946, 0.3342]) -Greedy action tensor([ 1.2978, -0.9025, 1.5004, 1.0449]) tensor([0.3214, 0.0356, 0.3935, 0.2495]) -Greedy action tensor([ 0.6301, 0.9518, 0.2241, -0.6533]) tensor([0.3009, 0.4151, 0.2005, 0.0834]) -Greedy action tensor([1.6442, 0.2748, 0.3838, 0.9806]) tensor([0.4872, 0.1239, 0.1381, 0.2509]) -Greedy action tensor([ 1.3090, -0.2006, -0.6099, 0.2838]) tensor([0.5792, 0.1280, 0.0850, 0.2078]) -Greedy action tensor([1.5208, 0.1286, 0.0731, 1.1345]) tensor([0.4623, 0.1149, 0.1087, 0.3142]) -Greedy action tensor([ 1.7354, -0.8521, 0.8905, 0.5804]) tensor([0.5495, 0.0413, 0.2361, 0.1731]) -Greedy action tensor([ 1.3082, -0.8980, 0.3533, 0.1128]) tensor([0.5563, 0.0613, 0.2141, 0.1683]) -Greedy action tensor([1.6269, 0.9375, 0.1548, 0.4843]) tensor([0.4877, 0.2448, 0.1119, 0.1556]) -Greedy action tensor([ 1.0498, -0.6209, -0.2678, 0.1323]) tensor([0.5390, 0.1014, 0.1443, 0.2153]) -Greedy action tensor([ 0.4678, -0.4570, 0.0451, -0.0988]) tensor([0.3818, 0.1514, 0.2502, 0.2166]) -Greedy action tensor([ 1.3704, -0.5258, -0.2271, -0.0018]) tensor([0.6226, 0.0935, 0.1260, 0.1579]) -Greedy action tensor([ 0.4456, -0.4781, 0.0198, -0.0382]) tensor([0.3750, 0.1489, 0.2450, 0.2312]) -Greedy action tensor([ 0.6507, -0.6142, -0.0418, 0.0179]) tensor([0.4322, 0.1220, 0.2163, 0.2295]) -Greedy action tensor([ 0.4995, -0.3334, 0.0647, -0.0829]) tensor([0.3787, 0.1647, 0.2452, 0.2115]) -Greedy action tensor([ 1.0597, -0.4744, -0.1386, -0.2298]) tensor([0.5578, 0.1203, 0.1683, 0.1536]) -Greedy action tensor([ 0.9457, -0.5279, -0.2870, 0.4755]) tensor([0.4661, 0.1068, 0.1359, 0.2913]) -Greedy action tensor([ 0.8978, -0.4453, -0.1761, -0.1489]) tensor([0.5118, 0.1336, 0.1749, 0.1797]) -Greedy action tensor([ 1.1587, -0.7390, -0.4901, 0.2065]) tensor([0.5787, 0.0867, 0.1113, 0.2233]) -Greedy action tensor([ 0.9750, -0.6831, -0.2091, 0.6130]) tensor([0.4560, 0.0869, 0.1396, 0.3175]) -Greedy action tensor([ 0.8122, -0.7066, -0.2493, 0.2647]) tensor([0.4666, 0.1022, 0.1614, 0.2699]) -Greedy action tensor([ 1.3627, -0.5511, -0.3317, -0.0185]) tensor([0.6319, 0.0932, 0.1161, 0.1588]) -Greedy action tensor([ 0.7317, -0.6496, -0.2164, 0.1378]) tensor([0.4564, 0.1147, 0.1769, 0.2520]) -Greedy action tensor([ 1.1569, -0.3819, -0.0949, 0.2401]) tensor([0.5262, 0.1129, 0.1505, 0.2104]) -Greedy action tensor([ 0.9934, -0.4537, 0.1139, 0.1910]) tensor([0.4765, 0.1121, 0.1978, 0.2136]) -Greedy action tensor([ 0.9688, -0.2278, -0.2242, -0.2924]) tensor([0.5294, 0.1600, 0.1606, 0.1500]) -Greedy action tensor([ 0.7553, -0.4330, -0.1824, 0.0313]) tensor([0.4585, 0.1397, 0.1795, 0.2223]) -Greedy action tensor([ 1.2055, -0.5227, -0.3198, -0.0296]) tensor([0.5931, 0.1053, 0.1290, 0.1725]) -Greedy action tensor([ 0.5559, -0.2728, -0.6383, -0.2476]) tensor([0.4572, 0.1996, 0.1385, 0.2047]) -Greedy action tensor([ 1.4314, -0.4269, -0.0768, 0.2594]) tensor([0.5928, 0.0924, 0.1312, 0.1836]) -Greedy action tensor([ 1.0348, -0.7042, -0.5714, 0.8072]) tensor([0.4603, 0.0809, 0.0923, 0.3665]) -Greedy action tensor([ 0.6892, -0.3719, -0.6849, 0.1069]) tensor([0.4634, 0.1604, 0.1173, 0.2589]) -Greedy action tensor([ 1.5097, -0.4205, -0.2633, 0.4332]) tensor([0.6039, 0.0876, 0.1026, 0.2058]) -Greedy action tensor([ 6.1401e-01, -8.2322e-02, 3.5232e-04, 1.6478e-03]) tensor([0.3873, 0.1930, 0.2097, 0.2100]) -Greedy action tensor([ 1.0693, -0.4128, 0.0147, -0.0714]) tensor([0.5277, 0.1199, 0.1838, 0.1686]) -Greedy action tensor([ 1.0669, -0.5944, -0.0943, 0.3612]) tensor([0.5008, 0.0951, 0.1568, 0.2473]) -Greedy action tensor([ 0.7495, -0.5268, -0.0732, -0.1393]) tensor([0.4696, 0.1311, 0.2063, 0.1931]) -Greedy action tensor([ 0.9742, -0.5946, -0.5805, 0.5443]) tensor([0.4831, 0.1006, 0.1020, 0.3143]) -Greedy action tensor([ 0.4468, -0.4039, -0.5159, 0.1649]) tensor([0.3901, 0.1666, 0.1490, 0.2943]) -Greedy action tensor([ 0.5845, -0.2302, -0.3234, -0.0527]) tensor([0.4211, 0.1864, 0.1699, 0.2226]) -Greedy action tensor([ 0.5379, -0.2343, 0.0492, -0.1818]) tensor([0.3903, 0.1803, 0.2394, 0.1900]) -Greedy action tensor([ 0.7824, -0.2926, -0.7400, -0.2310]) tensor([0.5202, 0.1775, 0.1135, 0.1888]) -Greedy action tensor([ 1.5793, -0.4320, -0.2992, 0.0298]) tensor([0.6671, 0.0893, 0.1019, 0.1417]) -Greedy action tensor([ 1.5219, -0.3685, -0.4205, 0.0156]) tensor([0.6596, 0.0996, 0.0946, 0.1463]) -Greedy action tensor([ 3.8937e-01, -3.5663e-01, 3.8138e-04, 3.4176e-02]) tensor([0.3505, 0.1662, 0.2376, 0.2457]) -Greedy action tensor([ 0.6355, -0.5330, -0.2099, 0.1158]) tensor([0.4283, 0.1331, 0.1839, 0.2547]) -Greedy action tensor([ 1.4100, -0.4693, -0.1897, 0.3221]) tensor([0.5912, 0.0903, 0.1194, 0.1992]) -Greedy action tensor([ 1.0820, -0.4219, -0.2075, 0.5345]) tensor([0.4817, 0.1071, 0.1327, 0.2786]) -Greedy action tensor([ 0.4590, -0.4166, -0.3807, -0.0162]) tensor([0.4048, 0.1687, 0.1748, 0.2517]) -Greedy action tensor([ 0.7679, -0.4722, -0.3752, 0.0355]) tensor([0.4787, 0.1385, 0.1526, 0.2301]) -Greedy action tensor([ 0.8954, -0.6266, -0.3415, 0.3280]) tensor([0.4818, 0.1052, 0.1399, 0.2732]) -Greedy action tensor([ 0.8904, -0.4147, -0.2101, 0.1199]) tensor([0.4839, 0.1312, 0.1610, 0.2239]) -Greedy action tensor([ 1.1746, -0.5482, -0.5029, -0.1165]) tensor([0.6096, 0.1089, 0.1139, 0.1676]) -Greedy action tensor([ 1.0177, -0.6692, -0.4535, -0.1186]) tensor([0.5761, 0.1066, 0.1323, 0.1849]) -Greedy action tensor([ 1.2883, -0.8115, -0.6303, 0.9150]) tensor([0.5108, 0.0626, 0.0750, 0.3517]) -Greedy action tensor([ 0.5296, -0.1728, -0.0441, -0.2830]) tensor([0.3996, 0.1980, 0.2251, 0.1773]) -Greedy action tensor([ 0.9002, -0.4459, 0.1320, -0.1475]) tensor([0.4820, 0.1254, 0.2236, 0.1691]) -Greedy action tensor([ 1.0797, -0.3583, -0.3203, 0.0379]) tensor([0.5444, 0.1293, 0.1342, 0.1921]) -Greedy action tensor([ 0.6716, -0.3340, -0.0654, -0.0699]) tensor([0.4309, 0.1576, 0.2062, 0.2053]) -Greedy action tensor([ 0.9175, -0.6898, -0.2623, 0.2933]) tensor([0.4894, 0.0981, 0.1504, 0.2621]) -Greedy action tensor([ 1.1738, -0.7332, -0.4415, 0.7234]) tensor([0.5039, 0.0748, 0.1002, 0.3211]) -Greedy action tensor([ 1.2388, -0.4782, 0.0117, 0.3194]) tensor([0.5343, 0.0960, 0.1566, 0.2131]) -Greedy action tensor([ 0.7310, -0.4042, -0.0568, 0.0677]) tensor([0.4364, 0.1402, 0.1985, 0.2248]) -Greedy action tensor([ 1.4026, -0.6985, -0.1448, -0.2160]) tensor([0.6522, 0.0798, 0.1388, 0.1292]) -Greedy action tensor([ 1.1514, -0.2386, 0.1391, -0.3023]) tensor([0.5417, 0.1349, 0.1968, 0.1266]) -Greedy action tensor([ 1.0253, -0.5848, -0.6810, 0.9896]) tensor([0.4262, 0.0852, 0.0774, 0.4113]) -Greedy action tensor([ 0.7844, -0.2627, -0.6784, 0.1508]) tensor([0.4732, 0.1661, 0.1096, 0.2511]) -Greedy action tensor([ 1.2866, -0.1947, -0.0861, -0.0660]) tensor([0.5749, 0.1307, 0.1457, 0.1487]) -Greedy action tensor([ 0.9181, -0.4594, -0.2802, 0.1077]) tensor([0.5003, 0.1262, 0.1510, 0.2225]) -Greedy action tensor([ 1.5536, -0.7514, -0.3159, 0.5586]) tensor([0.6159, 0.0614, 0.0950, 0.2277]) -Greedy action tensor([ 0.9054, -0.3600, -0.3552, 0.4125]) tensor([0.4595, 0.1296, 0.1303, 0.2807]) -Greedy action tensor([ 1.0115, -0.3437, -0.1525, -0.2363]) tensor([0.5384, 0.1389, 0.1681, 0.1546]) -Greedy action tensor([ 1.5892e+00, -1.5806e-01, -4.2428e-01, -1.0052e-03]) tensor([0.6615, 0.1153, 0.0883, 0.1349]) -Greedy action tensor([ 1.2504, -0.2789, -0.3991, 0.1196]) tensor([0.5775, 0.1251, 0.1110, 0.1864]) -Greedy action tensor([ 0.9707, -0.3655, -0.4224, 0.5129]) tensor([0.4665, 0.1226, 0.1158, 0.2951]) -Greedy action tensor([ 1.0230, -0.3898, -0.5997, 0.5147]) tensor([0.4896, 0.1192, 0.0966, 0.2945]) -Greedy action tensor([ 0.3882, -0.3224, -0.0706, -0.0631]) tensor([0.3623, 0.1780, 0.2290, 0.2307]) -Greedy action tensor([ 1.3787, -0.5868, -0.2265, 0.3369]) tensor([0.5904, 0.0827, 0.1186, 0.2083]) -Greedy action tensor([ 1.0895, -0.5988, -0.4921, -0.1006]) tensor([0.5901, 0.1091, 0.1213, 0.1795]) -Greedy action tensor([ 0.6863, -0.6080, -0.2416, 0.2329]) tensor([0.4339, 0.1189, 0.1715, 0.2757]) -Greedy action tensor([ 0.9412, -0.4859, -0.8225, 0.2697]) tensor([0.5202, 0.1249, 0.0892, 0.2658]) -Greedy action tensor([ 0.8514, -0.3100, -0.3884, 0.0459]) tensor([0.4880, 0.1528, 0.1412, 0.2180]) -Greedy action tensor([ 1.1077, -0.3406, -0.3110, 0.1963]) tensor([0.5322, 0.1251, 0.1288, 0.2139]) -Greedy action tensor([ 1.0164, -0.4730, -0.7185, 0.1195]) tensor([0.5525, 0.1246, 0.0975, 0.2254]) -Greedy action tensor([ 1.0053, -0.5543, -0.3289, 0.1380]) tensor([0.5281, 0.1110, 0.1391, 0.2218]) -Greedy action tensor([ 0.3035, -0.2299, -0.1899, -0.0959]) tensor([0.3487, 0.2045, 0.2129, 0.2339]) -Greedy action tensor([ 0.9566, -0.6413, -0.4365, 0.2452]) tensor([0.5150, 0.1042, 0.1279, 0.2529]) -Greedy action tensor([ 1.4896, -0.2947, -0.3887, -0.1518]) tensor([0.6603, 0.1109, 0.1009, 0.1279]) -Greedy action tensor([ 0.4421, -0.4614, 0.0157, -0.0030]) tensor([0.3705, 0.1501, 0.2419, 0.2374]) -Greedy action tensor([ 0.8296, -0.4716, -0.3239, 0.0213]) tensor([0.4918, 0.1339, 0.1552, 0.2192]) -Greedy action tensor([ 0.2505, -0.1111, 0.0541, -0.2836]) tensor([0.3221, 0.2244, 0.2647, 0.1888]) -Greedy action tensor([ 0.4017, 0.0371, -0.0130, -0.3510]) tensor([0.3539, 0.2457, 0.2337, 0.1667]) -Greedy action tensor([ 0.3332, -0.0105, -0.0490, -0.1860]) tensor([0.3348, 0.2374, 0.2285, 0.1992]) -Greedy action tensor([ 1.1142, -0.6213, -0.0556, -0.5926]) tensor([0.5995, 0.1057, 0.1861, 0.1088]) -Greedy action tensor([ 0.6285, -0.5342, 0.2970, -0.6958]) tensor([0.4354, 0.1361, 0.3126, 0.1158]) -Greedy action tensor([ 0.8566, -0.6182, 0.0822, -0.6139]) tensor([0.5209, 0.1192, 0.2401, 0.1197]) -Greedy action tensor([ 0.3132, 0.0342, 0.1453, -0.2247]) tensor([0.3139, 0.2375, 0.2654, 0.1833]) -Greedy action tensor([ 0.6105, -0.1621, 0.0148, -0.3883]) tensor([0.4199, 0.1939, 0.2315, 0.1547]) -Greedy action tensor([ 0.4608, -0.3281, 0.2256, -0.4552]) tensor([0.3781, 0.1718, 0.2988, 0.1513]) -Greedy action tensor([ 0.2575, 0.1523, 0.1633, -0.1912]) tensor([0.2900, 0.2610, 0.2639, 0.1851]) -Greedy action tensor([ 0.5487, -0.2481, 0.1063, -0.4924]) tensor([0.4088, 0.1843, 0.2626, 0.1443]) -Greedy action tensor([ 0.4078, -0.1296, 0.2049, -0.3148]) tensor([0.3465, 0.2024, 0.2829, 0.1682]) -Greedy action tensor([ 0.3552, 0.1535, 0.1295, -0.2705]) tensor([0.3174, 0.2595, 0.2533, 0.1698]) -Greedy action tensor([ 0.5757, -0.2329, 0.1504, -0.3255]) tensor([0.3992, 0.1778, 0.2609, 0.1621]) -Greedy action tensor([ 0.5117, -0.1095, -0.0054, -0.3672]) tensor([0.3923, 0.2108, 0.2339, 0.1629]) -Greedy action tensor([ 0.6383, -0.1572, -0.0317, -0.3379]) tensor([0.4274, 0.1929, 0.2187, 0.1610]) -Greedy action tensor([ 0.4319, -0.0481, 0.1254, -0.3377]) tensor([0.3549, 0.2196, 0.2612, 0.1644]) -Greedy action tensor([ 0.1070, 0.0933, 0.1193, -0.1895]) tensor([0.2672, 0.2636, 0.2705, 0.1987]) -Greedy action tensor([ 1.0148, -0.7348, 0.0107, -0.6820]) tensor([0.5802, 0.1009, 0.2126, 0.1063]) -Greedy action tensor([ 0.4640, -0.2250, 0.2027, -0.4677]) tensor([0.3751, 0.1883, 0.2888, 0.1477]) -Greedy action tensor([ 0.4159, 0.0183, 0.0066, -0.1331]) tensor([0.3432, 0.2306, 0.2279, 0.1982]) -Greedy action tensor([ 0.1911, 0.0465, 0.1679, -0.2265]) tensor([0.2856, 0.2472, 0.2791, 0.1881]) -Greedy action tensor([ 0.2954, -0.1875, 0.0724, -0.4516]) tensor([0.3459, 0.2134, 0.2768, 0.1639]) -Greedy action tensor([ 0.2368, 0.0148, -0.1013, -0.3030]) tensor([0.3229, 0.2586, 0.2303, 0.1882]) -Greedy action tensor([ 0.2945, 0.1477, -0.0161, -0.2267]) tensor([0.3134, 0.2707, 0.2298, 0.1861]) -Greedy action tensor([ 0.3893, 0.0527, 0.0685, -0.2469]) tensor([0.3368, 0.2405, 0.2444, 0.1783]) -Greedy action tensor([ 0.6641, -0.4535, 0.0867, -0.7034]) tensor([0.4666, 0.1526, 0.2619, 0.1189]) -Greedy action tensor([ 0.5207, 0.1315, -0.0973, -0.2047]) tensor([0.3703, 0.2509, 0.1996, 0.1793]) -Greedy action tensor([ 0.4774, 0.1801, 0.0497, -0.2718]) tensor([0.3487, 0.2590, 0.2274, 0.1649]) -Greedy action tensor([ 0.3934, -0.2912, 0.0676, -0.2955]) tensor([0.3665, 0.1848, 0.2646, 0.1840]) -Greedy action tensor([ 0.1747, 0.1893, 0.1285, -0.2645]) tensor([0.2767, 0.2808, 0.2642, 0.1784]) -Greedy action tensor([ 0.8242, -0.3770, -0.0432, -0.6269]) tensor([0.5115, 0.1539, 0.2148, 0.1198]) -Greedy action tensor([ 0.9280, -0.3921, 0.0843, -0.6999]) tensor([0.5281, 0.1411, 0.2271, 0.1037]) -Greedy action tensor([ 1.0349, -0.6802, -0.1701, -0.7016]) tensor([0.6040, 0.1087, 0.1810, 0.1064]) -Greedy action tensor([ 0.4994, 0.0062, 0.0310, -0.2528]) tensor([0.3693, 0.2255, 0.2312, 0.1741]) -Greedy action tensor([ 0.7636, -0.3315, -0.0968, -0.4607]) tensor([0.4875, 0.1631, 0.2062, 0.1433]) -Greedy action tensor([ 0.5521, 0.1024, -0.0863, -0.0957]) tensor([0.3718, 0.2372, 0.1964, 0.1946]) -Greedy action tensor([ 0.4408, -0.0460, -0.0699, -0.3401]) tensor([0.3741, 0.2300, 0.2245, 0.1714]) -Greedy action tensor([ 0.1066, 0.1356, -0.1192, -0.1323]) tensor([0.2766, 0.2848, 0.2207, 0.2179]) -Greedy action tensor([ 0.4844, 0.0992, 0.0938, -0.1850]) tensor([0.3486, 0.2371, 0.2358, 0.1785]) -Greedy action tensor([ 0.6696, -0.0262, -0.0744, -0.3322]) tensor([0.4272, 0.2130, 0.2030, 0.1569]) -Greedy action tensor([ 1.0689, -0.5574, -0.1567, -0.6964]) tensor([0.6019, 0.1184, 0.1767, 0.1030]) -Greedy action tensor([ 0.5376, -0.3593, 0.1393, -0.5566]) tensor([0.4142, 0.1689, 0.2781, 0.1387]) -Greedy action tensor([ 0.4575, -0.0516, -0.0444, -0.2623]) tensor([0.3713, 0.2232, 0.2248, 0.1808]) -Greedy action tensor([ 0.8869, -0.1127, -0.0317, -0.2054]) tensor([0.4756, 0.1750, 0.1898, 0.1595]) -Greedy action tensor([ 0.3735, -0.3559, 0.0995, -0.3394]) tensor([0.3659, 0.1765, 0.2782, 0.1794]) -Greedy action tensor([ 0.5236, -0.2394, -0.0316, -0.2405]) tensor([0.3990, 0.1861, 0.2290, 0.1859]) -Greedy action tensor([ 0.7171, -0.6095, -0.1558, -0.6521]) tensor([0.5162, 0.1370, 0.2156, 0.1313]) -Greedy action tensor([ 1.0050, -0.8263, -0.1673, -0.6591]) tensor([0.6027, 0.0966, 0.1866, 0.1141]) -Greedy action tensor([ 0.7128, -0.4902, -0.0209, -0.5823]) tensor([0.4868, 0.1462, 0.2337, 0.1333]) -Greedy action tensor([ 0.1572, -0.0369, 0.0342, -0.1539]) tensor([0.2907, 0.2394, 0.2570, 0.2129]) -Greedy action tensor([ 0.3153, 0.0004, -0.0970, -0.1452]) tensor([0.3308, 0.2414, 0.2190, 0.2087]) -Greedy action tensor([ 0.2091, 0.0822, -0.0421, -0.1692]) tensor([0.2991, 0.2634, 0.2326, 0.2049]) -Greedy action tensor([ 0.6892, -0.5880, -0.0631, -0.5647]) tensor([0.4913, 0.1370, 0.2315, 0.1402]) -Greedy action tensor([ 0.4267, -0.1786, -0.0782, -0.3786]) tensor([0.3851, 0.2103, 0.2325, 0.1721]) -Greedy action tensor([ 0.8659, -0.1968, 0.0311, -0.5173]) tensor([0.4925, 0.1702, 0.2138, 0.1235]) -Greedy action tensor([ 0.4004, 0.0134, -0.0897, -0.0988]) tensor([0.3450, 0.2343, 0.2113, 0.2094]) -Greedy action tensor([ 1.0551, -0.5470, 0.0682, -0.5431]) tensor([0.5629, 0.1134, 0.2098, 0.1139]) -Greedy action tensor([ 0.5116, 0.2112, 0.0038, -0.1518]) tensor([0.3500, 0.2592, 0.2106, 0.1803]) -Greedy action tensor([ 0.3472, -0.0637, -0.0309, -0.4216]) tensor([0.3556, 0.2358, 0.2437, 0.1649]) -Greedy action tensor([ 0.6656, -0.2809, 0.0204, -0.4457]) tensor([0.4461, 0.1731, 0.2340, 0.1468]) -Greedy action tensor([ 0.4746, -0.2306, -0.0994, -0.2534]) tensor([0.3937, 0.1945, 0.2217, 0.1901]) -Greedy action tensor([ 0.3507, 0.1537, -0.0886, -0.3490]) tensor([0.3376, 0.2772, 0.2175, 0.1677]) -Greedy action tensor([ 0.5066, -0.0763, -0.0129, -0.1490]) tensor([0.3742, 0.2089, 0.2226, 0.1943]) -Greedy action tensor([ 0.4040, 0.0786, 0.1544, -0.2260]) tensor([0.3296, 0.2381, 0.2568, 0.1755]) -Greedy action tensor([ 0.5432, -0.0827, 0.1523, -0.3374]) tensor([0.3808, 0.2037, 0.2576, 0.1579]) -Greedy action tensor([ 0.3021, -0.0263, 0.0764, -0.2101]) tensor([0.3208, 0.2310, 0.2560, 0.1922]) -Greedy action tensor([ 1.0003, -0.4548, -0.1459, -0.6992]) tensor([0.5767, 0.1346, 0.1833, 0.1054]) -Greedy action tensor([ 0.2231, 0.1540, 0.1159, -0.1932]) tensor([0.2864, 0.2673, 0.2573, 0.1889]) -Greedy action tensor([ 0.5789, -0.2484, -0.0489, -0.3091]) tensor([0.4197, 0.1835, 0.2240, 0.1727]) -Greedy action tensor([ 0.5138, -0.2092, 0.0385, -0.4921]) tensor([0.4044, 0.1963, 0.2514, 0.1479]) -Greedy action tensor([ 0.4572, -0.0316, 0.1286, -0.3059]) tensor([0.3572, 0.2191, 0.2572, 0.1665]) -Greedy action tensor([ 0.4758, -0.0233, 0.1027, -0.3479]) tensor([0.3657, 0.2220, 0.2518, 0.1605]) -Greedy action tensor([ 1.0868, -0.5700, 0.0864, -0.5213]) tensor([0.5686, 0.1085, 0.2091, 0.1139]) -Greedy action tensor([ 0.2390, 0.1088, 0.0757, -0.2112]) tensor([0.2972, 0.2609, 0.2524, 0.1895]) -Greedy action tensor([ 0.2598, -0.0563, 0.0243, -0.3309]) tensor([0.3254, 0.2372, 0.2571, 0.1803]) -Greedy action tensor([ 0.5066, -0.4105, -0.0395, -0.5191]) tensor([0.4278, 0.1710, 0.2478, 0.1534]) -Greedy action tensor([ 0.2908, -0.0939, 0.1099, -0.2450]) tensor([0.3225, 0.2195, 0.2692, 0.1888]) -Greedy action tensor([ 0.1393, 0.2202, 0.0781, -0.2549]) tensor([0.2703, 0.2931, 0.2543, 0.1823]) -Greedy action tensor([ 0.7598, -0.4050, 0.0016, -0.3602]) tensor([0.4746, 0.1481, 0.2224, 0.1549]) -Greedy action tensor([ 0.5339, -0.3530, -0.0502, -0.5021]) tensor([0.4302, 0.1772, 0.2399, 0.1527]) -Greedy action tensor([-1.7703, -0.5360, 0.2919, -0.2585]) tensor([0.0594, 0.2041, 0.4671, 0.2694]) -Greedy action tensor([-1.9003, -0.6035, 0.8523, 0.1473]) tensor([0.0356, 0.1302, 0.5583, 0.2759]) -Greedy action tensor([-1.6457, -0.4774, 0.5457, 0.1363]) tensor([0.0523, 0.1683, 0.4683, 0.3110]) -Greedy action tensor([-1.2307, -0.6125, 0.4833, -0.1997]) tensor([0.0892, 0.1655, 0.4952, 0.2501]) -Greedy action tensor([-1.7683, -0.4915, 0.5672, -0.0581]) tensor([0.0489, 0.1753, 0.5054, 0.2704]) -Greedy action tensor([-2.0112, -0.9758, 0.8014, 0.3146]) tensor([0.0326, 0.0917, 0.5424, 0.3333]) -Greedy action tensor([-0.7862, -0.1838, -0.8693, -0.1484]) tensor([0.1773, 0.3239, 0.1632, 0.3356]) -Greedy action tensor([-1.8052, -0.2401, 0.7895, 0.2632]) tensor([0.0369, 0.1766, 0.4944, 0.2921]) -Greedy action tensor([-1.5820, -0.5654, 0.0352, -0.4225]) tensor([0.0834, 0.2305, 0.4202, 0.2659]) -Greedy action tensor([-1.4057, -0.3037, 0.6885, 0.8784]) tensor([0.0456, 0.1372, 0.3699, 0.4473]) -Greedy action tensor([-1.4718, -0.3371, 1.1601, 1.0490]) tensor([0.0328, 0.1021, 0.4565, 0.4085]) -Greedy action tensor([-2.0520, -0.8784, 0.6645, -0.0361]) tensor([0.0372, 0.1203, 0.5630, 0.2794]) -Greedy action tensor([-1.8669, -0.9188, 0.0579, -0.4301]) tensor([0.0683, 0.1763, 0.4681, 0.2873]) -Greedy action tensor([-1.9982, -0.9524, 0.3377, -0.2679]) tensor([0.0504, 0.1435, 0.5214, 0.2846]) -Greedy action tensor([-1.6363, -1.0663, -0.0400, -0.8660]) tensor([0.1014, 0.1793, 0.5003, 0.2190]) -Greedy action tensor([-1.1932, -0.5207, -0.3697, -0.3108]) tensor([0.1306, 0.2560, 0.2977, 0.3157]) -Greedy action tensor([-1.5664, -0.5276, 0.4625, 0.0212]) tensor([0.0613, 0.1731, 0.4659, 0.2997]) -Greedy action tensor([-1.7869, -0.7061, 0.3020, -0.4282]) tensor([0.0628, 0.1852, 0.5075, 0.2445]) -Greedy action tensor([-0.6575, -0.5770, 0.2244, -0.0476]) tensor([0.1577, 0.1710, 0.3810, 0.2903]) -Greedy action tensor([-1.3338, -0.5928, 0.1116, -0.0078]) tensor([0.0900, 0.1889, 0.3821, 0.3390]) -Greedy action tensor([-0.6487, 0.2079, 0.2633, -0.0531]) tensor([0.1306, 0.3075, 0.3250, 0.2369]) -Greedy action tensor([-1.8689, -0.4612, 0.6316, -0.1243]) tensor([0.0435, 0.1777, 0.5300, 0.2489]) -Greedy action tensor([-1.5204, -0.5115, 0.5145, 0.3203]) tensor([0.0565, 0.1550, 0.4324, 0.3561]) -Greedy action tensor([-1.7030, -0.9226, 0.1402, -0.4582]) tensor([0.0771, 0.1682, 0.4870, 0.2677]) -Greedy action tensor([-1.0763, -0.5202, 0.2055, 0.4185]) tensor([0.0925, 0.1614, 0.3334, 0.4126]) -Greedy action tensor([-1.4631, -0.4117, 0.1911, -0.4816]) tensor([0.0850, 0.2433, 0.4447, 0.2269]) -Greedy action tensor([-1.1659, -0.5053, 1.0762, 1.2149]) tensor([0.0432, 0.0836, 0.4064, 0.4669]) -Greedy action tensor([-1.2792, -0.5172, 0.4327, 0.6046]) tensor([0.0655, 0.1404, 0.3630, 0.4311]) -Greedy action tensor([-1.5551, -0.4793, 0.7315, -0.5266]) tensor([0.0603, 0.1770, 0.5939, 0.1688]) -Greedy action tensor([-0.8055, -0.2263, -0.3631, -0.1003]) tensor([0.1571, 0.2804, 0.2445, 0.3180]) -Greedy action tensor([-0.6233, -0.3175, 0.3743, 0.2153]) tensor([0.1354, 0.1839, 0.3673, 0.3133]) -Greedy action tensor([-1.9174, -0.8420, 0.4281, -0.0712]) tensor([0.0483, 0.1416, 0.5042, 0.3060]) -Greedy action tensor([-1.9731, -0.5263, 1.2800, 0.6705]) tensor([0.0221, 0.0940, 0.5726, 0.3113]) -Greedy action tensor([-0.5239, 0.6000, 0.0766, 0.6694]) tensor([0.1087, 0.3345, 0.1982, 0.3586]) -Greedy action tensor([-0.4221, -0.4742, 0.1880, 0.2075]) tensor([0.1765, 0.1675, 0.3248, 0.3312]) -Greedy action tensor([-2.0294, -0.8405, 0.4980, -0.0780]) tensor([0.0419, 0.1377, 0.5252, 0.2952]) -Greedy action tensor([-1.6891, -0.4262, 0.9841, 0.8220]) tensor([0.0319, 0.1128, 0.4622, 0.3931]) -Greedy action tensor([-1.9038, -0.4155, 0.7776, 0.1944]) tensor([0.0355, 0.1572, 0.5182, 0.2892]) -Greedy action tensor([-0.8112, -0.1957, -0.3372, 0.0108]) tensor([0.1485, 0.2749, 0.2386, 0.3380]) -Greedy action tensor([-1.0540, -0.5826, 0.2665, 0.4501]) tensor([0.0922, 0.1477, 0.3453, 0.4149]) -Greedy action tensor([-1.4192, -0.3054, 0.6919, 0.7868]) tensor([0.0468, 0.1424, 0.3862, 0.4246]) -Greedy action tensor([-1.6377, -0.4430, 0.5242, 0.0794]) tensor([0.0539, 0.1780, 0.4681, 0.3000]) -Greedy action tensor([-1.8138, -0.4301, 0.5980, -0.1047]) tensor([0.0462, 0.1841, 0.5148, 0.2549]) -Greedy action tensor([-1.9940, -0.5979, 1.3180, 0.5816]) tensor([0.0219, 0.0885, 0.6015, 0.2880]) -Greedy action tensor([-0.7062, -0.5711, 0.1700, 0.3127]) tensor([0.1367, 0.1564, 0.3283, 0.3786]) -Greedy action tensor([-0.3641, 0.0798, 0.8694, 1.6241]) tensor([0.0752, 0.1172, 0.2582, 0.5493]) -Greedy action tensor([-1.7443, -0.4950, 0.5916, -0.0371]) tensor([0.0492, 0.1715, 0.5083, 0.2711]) -Greedy action tensor([-1.3232, -0.2626, 0.4324, 0.6035]) tensor([0.0605, 0.1746, 0.3498, 0.4151]) -Greedy action tensor([-1.9004, -0.5853, 1.0920, 0.4963]) tensor([0.0281, 0.1045, 0.5592, 0.3082]) -Greedy action tensor([-1.4652, -0.5049, 0.4407, 0.1615]) tensor([0.0648, 0.1694, 0.4360, 0.3298]) -Greedy action tensor([-1.0699, -0.5884, 0.2892, 0.2263]) tensor([0.0984, 0.1592, 0.3829, 0.3595]) -Greedy action tensor([-1.5839, -0.4755, 0.5959, 0.2265]) tensor([0.0527, 0.1596, 0.4658, 0.3219]) -Greedy action tensor([-1.4691, 0.1840, 0.0272, -0.5417]) tensor([0.0757, 0.3952, 0.3379, 0.1913]) -Greedy action tensor([-1.5042, -0.5517, 0.4425, 0.0360]) tensor([0.0655, 0.1698, 0.4590, 0.3057]) -Greedy action tensor([-2.0308, -0.7780, 1.3143, 0.6264]) tensor([0.0212, 0.0743, 0.6019, 0.3025]) -Greedy action tensor([-1.9520, -0.6233, 0.8724, 0.3918]) tensor([0.0312, 0.1178, 0.5258, 0.3252]) -Greedy action tensor([-1.6550, -0.4744, 1.0528, 0.7358]) tensor([0.0331, 0.1079, 0.4970, 0.3620]) -Greedy action tensor([-1.1884, -0.2057, 0.3299, 0.1635]) tensor([0.0826, 0.2208, 0.3772, 0.3194]) -Greedy action tensor([-1.3100, -0.4831, 0.4078, 0.1913]) tensor([0.0749, 0.1713, 0.4175, 0.3362]) -Greedy action tensor([ 0.0857, -0.0931, 0.3145, 0.8274]) tensor([0.1926, 0.1610, 0.2421, 0.4043]) -Greedy action tensor([-1.1142, -0.6612, 0.4758, 0.5457]) tensor([0.0785, 0.1235, 0.3850, 0.4129]) -Greedy action tensor([-1.9666, -0.6789, 0.8608, 0.0644]) tensor([0.0343, 0.1244, 0.5798, 0.2615]) -Greedy action tensor([-1.7063, -0.4946, 0.5439, 0.0321]) tensor([0.0512, 0.1719, 0.4857, 0.2911]) -Greedy action tensor([-1.7108, -0.5780, 1.0132, 0.8053]) tensor([0.0315, 0.0978, 0.4804, 0.3902]) -Greedy action tensor([-1.2823, -0.5827, 0.3201, 0.2313]) tensor([0.0799, 0.1608, 0.3965, 0.3628]) -Greedy action tensor([-1.5626, -0.5210, 0.4774, 0.1463]) tensor([0.0587, 0.1662, 0.4511, 0.3240]) -Greedy action tensor([-1.4594, -0.2831, 0.0747, -0.4725]) tensor([0.0865, 0.2804, 0.4010, 0.2320]) -Greedy action tensor([-1.7723, -0.5033, 0.6475, 0.0767]) tensor([0.0451, 0.1606, 0.5075, 0.2868]) -Greedy action tensor([-1.9879, -0.6135, 1.2749, 0.5345]) tensor([0.0230, 0.0908, 0.6000, 0.2862]) -Greedy action tensor([-1.7896, -0.6503, 0.7452, 0.1941]) tensor([0.0417, 0.1302, 0.5254, 0.3028]) -Greedy action tensor([-1.4465, -0.4859, 0.4605, -0.2339]) tensor([0.0729, 0.1906, 0.4912, 0.2453]) -Greedy action tensor([-1.3643, -0.0132, 0.3614, -0.5080]) tensor([0.0779, 0.3009, 0.4377, 0.1835]) -Greedy action tensor([-1.8980, -0.4533, 0.9911, 0.4837]) tensor([0.0294, 0.1246, 0.5281, 0.3179]) -Greedy action tensor([-1.9666, -0.5693, 0.9298, 0.1045]) tensor([0.0322, 0.1301, 0.5825, 0.2552]) -Greedy action tensor([-2.0302, -0.8586, 0.5041, -0.0972]) tensor([0.0421, 0.1359, 0.5310, 0.2910]) -Greedy action tensor([-0.8882, -0.7529, 0.5672, 1.1423]) tensor([0.0712, 0.0815, 0.3051, 0.5422]) -Greedy action tensor([-1.3059, -0.3265, 0.4977, 0.6480]) tensor([0.0596, 0.1586, 0.3616, 0.4203]) -Greedy action tensor([-0.8042, -0.3740, 1.1816, 1.5240]) tensor([0.0498, 0.0766, 0.3627, 0.5109]) -Greedy action tensor([-1.8755, -0.4679, 0.7168, 0.0384]) tensor([0.0396, 0.1620, 0.5296, 0.2688]) -Greedy action tensor([-1.7801, -0.8301, 0.0645, -0.3929]) tensor([0.0719, 0.1858, 0.4546, 0.2877]) -Greedy action tensor([-1.4227, -0.7909, 0.3752, 0.5164]) tensor([0.0630, 0.1185, 0.3804, 0.4381]) -Greedy action tensor([ 1.2668, -0.7705, 0.5246, 1.3766]) tensor([0.3673, 0.0479, 0.1749, 0.4099]) -Greedy action tensor([ 0.9895, -1.7473, 0.0041, 0.7186]) tensor([0.4544, 0.0294, 0.1696, 0.3466]) -Greedy action tensor([ 1.3794, -2.1568, 0.0783, 0.2292]) tensor([0.6181, 0.0180, 0.1683, 0.1957]) -Greedy action tensor([ 0.7550, -0.7205, 0.8668, 0.4994]) tensor([0.3204, 0.0733, 0.3583, 0.2481]) -Greedy action tensor([ 1.0707, -0.5766, 1.0999, 1.4365]) tensor([0.2729, 0.0526, 0.2810, 0.3935]) -Greedy action tensor([ 0.6015, -0.8385, 0.2606, 0.8897]) tensor([0.3047, 0.0722, 0.2167, 0.4064]) -Greedy action tensor([ 0.7282, 0.1900, 0.1588, -0.2845]) tensor([0.3979, 0.2323, 0.2252, 0.1445]) -Greedy action tensor([0.2230, 0.8640, 1.0319, 0.2021]) tensor([0.1633, 0.3100, 0.3667, 0.1599]) -Greedy action tensor([ 0.5453, 0.4176, -0.7666, 0.7791]) tensor([0.2930, 0.2579, 0.0789, 0.3702]) -Greedy action tensor([ 0.3175, -0.1300, -0.9881, -0.0529]) tensor([0.3845, 0.2458, 0.1042, 0.2655]) -Greedy action tensor([ 1.7930, -0.1791, 1.2236, 1.4247]) tensor([0.4172, 0.0581, 0.2361, 0.2887]) -Greedy action tensor([ 1.1920, -0.8206, 1.1344, 1.1259]) tensor([0.3318, 0.0443, 0.3133, 0.3106]) -Greedy action tensor([ 1.1034, -0.3459, -0.6551, 1.1461]) tensor([0.4081, 0.0958, 0.0703, 0.4259]) -Greedy action tensor([ 1.1111, -0.2979, 0.4654, -0.0592]) tensor([0.4810, 0.1176, 0.2522, 0.1492]) -Greedy action tensor([ 1.9458, -1.2708, -0.1149, 1.1636]) tensor([0.6154, 0.0247, 0.0784, 0.2815]) -Greedy action tensor([ 1.8083, -0.5158, 0.1940, 1.5488]) tensor([0.4835, 0.0473, 0.0962, 0.3730]) -Greedy action tensor([ 1.1808, -0.1957, 0.8859, 1.2401]) tensor([0.3270, 0.0826, 0.2435, 0.3470]) -Greedy action tensor([1.0993, 0.0622, 1.0349, 0.3072]) tensor([0.3643, 0.1291, 0.3416, 0.1650]) -Greedy action tensor([ 0.0320, -0.5577, 0.1264, 1.5298]) tensor([0.1403, 0.0778, 0.1542, 0.6276]) -Greedy action tensor([ 0.9977, -1.3695, -0.0179, 0.8955]) tensor([0.4239, 0.0397, 0.1536, 0.3828]) -Greedy action tensor([ 1.4722, -0.5909, 1.1683, 0.5652]) tensor([0.4408, 0.0560, 0.3253, 0.1780]) -Greedy action tensor([ 1.0371, 0.0988, -0.1315, 0.0914]) tensor([0.4784, 0.1872, 0.1487, 0.1858]) -Greedy action tensor([ 0.9982, 0.0971, -0.4748, 0.4462]) tensor([0.4523, 0.1837, 0.1037, 0.2604]) -Greedy action tensor([ 1.5128, -0.5636, 0.3730, 1.9420]) tensor([0.3354, 0.0421, 0.1073, 0.5152]) -Greedy action tensor([ 6.1339e-01, -2.2047e-01, -5.2676e-05, 1.4292e+00]) tensor([0.2360, 0.1025, 0.1278, 0.5337]) -Greedy action tensor([ 2.2440, -0.3655, 0.8619, 1.0894]) tensor([0.6098, 0.0449, 0.1531, 0.1922]) -Greedy action tensor([ 1.3787, -1.2957, -0.0262, 0.9830]) tensor([0.5031, 0.0347, 0.1235, 0.3387]) -Greedy action tensor([ 0.7180, -0.1023, 0.8741, 0.8123]) tensor([0.2697, 0.1187, 0.3152, 0.2964]) -Greedy action tensor([ 0.8470, 0.8066, -0.2968, 0.2812]) tensor([0.3513, 0.3373, 0.1119, 0.1995]) -Greedy action tensor([ 0.9345, -0.5252, 0.2092, 1.3174]) tensor([0.3142, 0.0730, 0.1521, 0.4607]) -Greedy action tensor([ 1.2219, -0.4854, 0.4003, 1.5718]) tensor([0.3289, 0.0597, 0.1446, 0.4668]) -Greedy action tensor([ 0.9192, -1.3390, 0.7978, 0.7389]) tensor([0.3539, 0.0370, 0.3135, 0.2956]) -Greedy action tensor([ 1.0714, -0.4876, 1.3195, 0.3984]) tensor([0.3331, 0.0701, 0.4269, 0.1699]) -Greedy action tensor([1.0543, 0.1217, 1.1792, 0.6248]) tensor([0.3147, 0.1239, 0.3566, 0.2048]) -Greedy action tensor([ 0.5989, -0.4489, 0.5797, 0.1447]) tensor([0.3371, 0.1182, 0.3307, 0.2140]) -Greedy action tensor([ 1.4131, 0.2375, -0.0172, 1.4584]) tensor([0.3855, 0.1190, 0.0922, 0.4033]) -Greedy action tensor([ 0.7949, -0.3707, 0.4785, 0.0919]) tensor([0.3944, 0.1229, 0.2874, 0.1953]) -Greedy action tensor([ 0.5938, -1.4978, -0.2769, 0.8595]) tensor([0.3513, 0.0434, 0.1471, 0.4582]) -Greedy action tensor([ 1.4153, -1.6198, 0.7917, 0.5793]) tensor([0.4957, 0.0238, 0.2657, 0.2148]) -Greedy action tensor([0.0471, 0.0474, 0.9711, 1.1248]) tensor([0.1341, 0.1341, 0.3378, 0.3940]) -Greedy action tensor([ 0.7523, -1.2398, 0.3752, -0.1496]) tensor([0.4488, 0.0612, 0.3078, 0.1821]) -Greedy action tensor([1.0044, 0.1197, 0.2011, 0.4196]) tensor([0.4136, 0.1707, 0.1852, 0.2305]) -Greedy action tensor([ 1.4490, -0.2848, 0.4593, 0.6024]) tensor([0.5058, 0.0893, 0.1880, 0.2169]) -Greedy action tensor([ 1.6431, 0.7672, 0.5613, -0.5055]) tensor([0.5342, 0.2225, 0.1811, 0.0623]) -Greedy action tensor([-0.3300, -0.3878, 0.1965, -0.3720]) tensor([0.2176, 0.2054, 0.3684, 0.2086]) -Greedy action tensor([ 0.6556, -0.4780, 1.0486, 0.1045]) tensor([0.2959, 0.0952, 0.4383, 0.1705]) -Greedy action tensor([ 0.9602, -1.6630, 0.7127, 0.3668]) tensor([0.4157, 0.0302, 0.3245, 0.2296]) -Greedy action tensor([ 1.3368, -1.0962, 0.9190, 0.8898]) tensor([0.4192, 0.0368, 0.2760, 0.2681]) -Greedy action tensor([1.2842, 0.3041, 0.2812, 0.6926]) tensor([0.4356, 0.1635, 0.1598, 0.2411]) -Greedy action tensor([ 0.6760, -1.4599, 0.8572, -0.1682]) tensor([0.3641, 0.0430, 0.4364, 0.1565]) -Greedy action tensor([0.4076, 0.3088, 0.4856, 0.7090]) tensor([0.2305, 0.2088, 0.2492, 0.3116]) -Greedy action tensor([ 1.6013, -1.7649, -0.1872, 0.3433]) tensor([0.6730, 0.0232, 0.1125, 0.1913]) -Greedy action tensor([ 0.6926, -0.6139, 0.5369, 0.3524]) tensor([0.3523, 0.0954, 0.3015, 0.2507]) -Greedy action tensor([ 0.8300, -0.7161, 0.2954, 0.3570]) tensor([0.4129, 0.0880, 0.2419, 0.2573]) -Greedy action tensor([1.0604, 0.5510, 0.6296, 1.0592]) tensor([0.3077, 0.1849, 0.2000, 0.3074]) -Greedy action tensor([ 0.6423, 0.1126, -0.4768, -0.2039]) tensor([0.4266, 0.2511, 0.1393, 0.1830]) -Greedy action tensor([0.6820, 0.6123, 0.2279, 0.1565]) tensor([0.3166, 0.2952, 0.2010, 0.1872]) -Greedy action tensor([ 1.2687, 0.9466, -1.1404, 1.0685]) tensor([0.3798, 0.2752, 0.0341, 0.3109]) -Greedy action tensor([ 1.0610, -0.2235, 0.4234, 0.7423]) tensor([0.3949, 0.1093, 0.2087, 0.2871]) -Greedy action tensor([ 1.5250, 0.1533, -0.2177, 0.6879]) tensor([0.5372, 0.1363, 0.0940, 0.2326]) -Greedy action tensor([ 0.7540, 0.1780, -0.1836, 0.5630]) tensor([0.3597, 0.2022, 0.1409, 0.2972]) -Greedy action tensor([ 1.3972, 0.4582, -0.6120, 0.2590]) tensor([0.5418, 0.2119, 0.0727, 0.1736]) -Greedy action tensor([ 1.3716, -0.2821, 1.0127, 1.4367]) tensor([0.3382, 0.0647, 0.2362, 0.3609]) -Greedy action tensor([ 1.0365, -1.1216, 0.6778, 0.8802]) tensor([0.3746, 0.0433, 0.2617, 0.3204]) -Greedy action tensor([ 1.8281, -0.2171, 0.1445, 0.8370]) tensor([0.5930, 0.0767, 0.1101, 0.2201]) -Greedy action tensor([ 0.7212, -0.7346, 0.3970, 0.6152]) tensor([0.3502, 0.0817, 0.2532, 0.3149]) -Greedy action tensor([0.8712, 0.0910, 0.0333, 1.2566]) tensor([0.2975, 0.1364, 0.1287, 0.4374]) -Greedy action tensor([ 0.7953, -0.6318, 1.0352, 1.2158]) tensor([0.2479, 0.0595, 0.3151, 0.3775]) -Greedy action tensor([ 0.8339, 0.3927, -0.8310, 0.6384]) tensor([0.3767, 0.2423, 0.0713, 0.3098]) -Greedy action tensor([ 1.1739, -0.2180, -0.2325, 0.6525]) tensor([0.4791, 0.1191, 0.1174, 0.2844]) -Greedy action tensor([ 0.0825, 1.2702, 0.7120, -0.3835]) tensor([0.1474, 0.4834, 0.2766, 0.0925]) -Greedy action tensor([ 0.9337, -0.2716, 0.5065, -0.2562]) tensor([0.4432, 0.1328, 0.2891, 0.1349]) -Greedy action tensor([ 1.2841, -0.0896, 0.5661, 0.8221]) tensor([0.4218, 0.1068, 0.2057, 0.2657]) -Greedy action tensor([ 0.7385, -0.8535, 1.3907, 0.7192]) tensor([0.2437, 0.0496, 0.4677, 0.2390]) -Greedy action tensor([ 0.8495, -0.5890, 0.4963, 0.4508]) tensor([0.3830, 0.0909, 0.2690, 0.2571]) -Greedy action tensor([ 1.0324, -1.1162, 0.6806, 1.8171]) tensor([0.2493, 0.0291, 0.1753, 0.5463]) -Greedy action tensor([0.1309, 0.7500, 0.0279, 0.2586]) tensor([0.2043, 0.3794, 0.1843, 0.2321]) -Greedy action tensor([ 0.8906, -0.1614, 0.2158, 0.2316]) tensor([0.4209, 0.1470, 0.2143, 0.2178]) -Greedy action tensor([ 0.8728, -0.1692, 1.0273, 0.7657]) tensor([0.2925, 0.1032, 0.3414, 0.2628]) -Greedy action tensor([ 0.7883, 0.9130, -0.1660, -0.1296]) tensor([0.3428, 0.3883, 0.1320, 0.1369]) -Greedy action tensor([ 1.5946, -0.5333, 1.5734, 0.2093]) tensor([0.4258, 0.0507, 0.4169, 0.1066]) -Greedy action tensor([ 0.2112, -0.1489, 0.2970, 0.0925]) tensor([0.2721, 0.1898, 0.2965, 0.2416]) -Greedy action tensor([ 0.8767, -0.3463, -0.4342, 0.0473]) tensor([0.5000, 0.1471, 0.1348, 0.2181]) -Greedy action tensor([ 0.6222, -0.4085, -0.0253, -0.1553]) tensor([0.4274, 0.1525, 0.2237, 0.1964]) -Greedy action tensor([ 1.1293, -0.3122, -0.0378, -0.2148]) tensor([0.5529, 0.1308, 0.1721, 0.1442]) -Greedy action tensor([ 1.4381, -0.4269, -0.4249, -0.2812]) tensor([0.6715, 0.1040, 0.1042, 0.1203]) -Greedy action tensor([ 0.4747, -0.0834, 0.1044, -0.2113]) tensor([0.3615, 0.2069, 0.2496, 0.1820]) -Greedy action tensor([ 0.6476, -0.4476, -0.5127, -0.0511]) tensor([0.4662, 0.1559, 0.1461, 0.2318]) -Greedy action tensor([ 0.7281, -0.4599, -0.7625, 0.4795]) tensor([0.4329, 0.1320, 0.0975, 0.3376]) -Greedy action tensor([ 1.4118, -0.5728, -0.0768, 0.2242]) tensor([0.5995, 0.0824, 0.1353, 0.1828]) -Greedy action tensor([ 0.6704, -0.4004, -0.5980, -0.1344]) tensor([0.4828, 0.1655, 0.1358, 0.2159]) -Greedy action tensor([ 1.0394, -0.7196, -0.6665, 0.7378]) tensor([0.4777, 0.0823, 0.0867, 0.3533]) -Greedy action tensor([ 0.9618, -0.0339, -0.0369, -0.0598]) tensor([0.4767, 0.1761, 0.1756, 0.1716]) -Greedy action tensor([ 1.2324, -0.7240, -0.1353, 0.0074]) tensor([0.5918, 0.0837, 0.1507, 0.1738]) -Greedy action tensor([ 1.9734, -0.1499, -0.3630, -0.0293]) tensor([0.7400, 0.0885, 0.0715, 0.0999]) -Greedy action tensor([ 1.1590, 0.0253, 0.0294, -0.0150]) tensor([0.5117, 0.1647, 0.1654, 0.1582]) -Greedy action tensor([ 1.1565, -0.4576, -0.2942, -0.0272]) tensor([0.5748, 0.1144, 0.1347, 0.1760]) -Greedy action tensor([ 0.9435, -0.4400, -0.3731, -0.2078]) tensor([0.5450, 0.1366, 0.1461, 0.1723]) -Greedy action tensor([ 0.7086, -0.2984, -0.1279, 0.1034]) tensor([0.4265, 0.1558, 0.1848, 0.2329]) -Greedy action tensor([ 0.5862, -0.2072, 0.0761, -0.2359]) tensor([0.4013, 0.1815, 0.2409, 0.1764]) -Greedy action tensor([ 1.7211, -0.3300, -0.3529, -0.1656]) tensor([0.7113, 0.0915, 0.0894, 0.1078]) -Greedy action tensor([ 0.8925, -0.3924, -0.5728, 0.5811]) tensor([0.4464, 0.1235, 0.1031, 0.3269]) -Greedy action tensor([ 1.5453, -0.9486, -0.2986, 0.0226]) tensor([0.6854, 0.0566, 0.1084, 0.1495]) -Greedy action tensor([ 1.2439, -0.5179, 0.1516, 0.0018]) tensor([0.5568, 0.0956, 0.1868, 0.1608]) -Greedy action tensor([ 1.3390, -0.6921, -0.2559, 0.0490]) tensor([0.6214, 0.0815, 0.1261, 0.1710]) -Greedy action tensor([ 0.9850, -0.3095, -0.4639, 0.6912]) tensor([0.4436, 0.1216, 0.1042, 0.3307]) -Greedy action tensor([ 0.7075, -0.6576, -0.2602, 0.2329]) tensor([0.4430, 0.1131, 0.1683, 0.2756]) -Greedy action tensor([ 1.0259, -0.3921, 0.1243, 0.1377]) tensor([0.4856, 0.1176, 0.1971, 0.1998]) -Greedy action tensor([ 1.0856, -0.4680, -0.4658, 0.1259]) tensor([0.5536, 0.1171, 0.1173, 0.2120]) -Greedy action tensor([ 1.1739, -0.4802, 0.0438, 0.1354]) tensor([0.5353, 0.1024, 0.1729, 0.1895]) -Greedy action tensor([ 1.2725, -0.5348, -0.3803, -0.2308]) tensor([0.6337, 0.1040, 0.1214, 0.1409]) -Greedy action tensor([ 0.6957, -0.2018, -0.4327, 0.0492]) tensor([0.4435, 0.1808, 0.1435, 0.2323]) -Greedy action tensor([ 1.4986, -0.1613, -0.3264, 0.1647]) tensor([0.6193, 0.1178, 0.0998, 0.1631]) -Greedy action tensor([ 1.2165, -0.5185, -0.2133, -0.0471]) tensor([0.5888, 0.1039, 0.1409, 0.1664]) -Greedy action tensor([ 0.6250, -0.2937, -0.2710, -0.1607]) tensor([0.4419, 0.1763, 0.1804, 0.2014]) -Greedy action tensor([ 1.2393, -0.6628, -0.4701, -0.0643]) tensor([0.6243, 0.0932, 0.1130, 0.1695]) -Greedy action tensor([ 0.8960, -0.2604, -0.2444, 0.1947]) tensor([0.4694, 0.1477, 0.1501, 0.2328]) -Greedy action tensor([ 0.5617, -0.4673, -0.0703, -0.0104]) tensor([0.4076, 0.1457, 0.2167, 0.2300]) -Greedy action tensor([ 1.2731, -0.3455, -0.3185, 0.2003]) tensor([0.5734, 0.1136, 0.1168, 0.1961]) -Greedy action tensor([ 0.6846, -0.3433, -0.3242, 0.1210]) tensor([0.4364, 0.1561, 0.1591, 0.2484]) -Greedy action tensor([ 0.8558, -0.4300, -0.4971, 0.7644]) tensor([0.4086, 0.1129, 0.1056, 0.3729]) -Greedy action tensor([ 1.7800, -0.2768, -0.4081, -0.1165]) tensor([0.7194, 0.0920, 0.0807, 0.1080]) -Greedy action tensor([ 1.1094, -0.5238, -0.1137, 0.2490]) tensor([0.5228, 0.1021, 0.1539, 0.2212]) -Greedy action tensor([ 0.7223, -0.4494, -0.1415, 0.1350]) tensor([0.4372, 0.1355, 0.1843, 0.2430]) -Greedy action tensor([ 0.9843, -0.1017, -0.3147, 0.1402]) tensor([0.4901, 0.1654, 0.1337, 0.2107]) -Greedy action tensor([ 0.4869, -0.3340, -0.3859, 0.0495]) tensor([0.3994, 0.1758, 0.1669, 0.2579]) -Greedy action tensor([ 0.8624, -0.4568, -0.1736, 0.1195]) tensor([0.4767, 0.1274, 0.1692, 0.2268]) -Greedy action tensor([ 1.1414, -0.8276, -0.5130, 0.5902]) tensor([0.5244, 0.0732, 0.1003, 0.3022]) -Greedy action tensor([ 1.4308, -0.5766, -0.4445, -0.1924]) tensor([0.6734, 0.0905, 0.1032, 0.1328]) -Greedy action tensor([ 0.6564, -0.4763, -0.1247, -0.1329]) tensor([0.4476, 0.1442, 0.2049, 0.2033]) -Greedy action tensor([ 1.0563, -0.7458, -0.2376, 0.1087]) tensor([0.5474, 0.0903, 0.1501, 0.2122]) -Greedy action tensor([ 1.0917, -0.4102, -0.3075, -0.0561]) tensor([0.5596, 0.1246, 0.1381, 0.1776]) -Greedy action tensor([ 1.4133, -0.7653, -0.4357, 0.1444]) tensor([0.6444, 0.0729, 0.1014, 0.1812]) -Greedy action tensor([ 0.5289, -0.4119, -0.5163, -0.1796]) tensor([0.4476, 0.1747, 0.1574, 0.2204]) -Greedy action tensor([ 0.8750, -0.3176, -0.1866, -0.2444]) tensor([0.5061, 0.1536, 0.1751, 0.1652]) -Greedy action tensor([ 0.3636, -0.3562, -0.3112, 0.0813]) tensor([0.3636, 0.1770, 0.1852, 0.2742]) -Greedy action tensor([ 0.1280, 0.0330, -0.2764, -0.1425]) tensor([0.2994, 0.2723, 0.1998, 0.2285]) -Greedy action tensor([ 1.1292, -0.4603, -0.4443, -0.0694]) tensor([0.5838, 0.1191, 0.1210, 0.1761]) -Greedy action tensor([ 1.0085, -0.4216, -0.7716, -0.0605]) tensor([0.5710, 0.1366, 0.0963, 0.1961]) -Greedy action tensor([ 1.4063, -0.6606, -0.2575, 0.1752]) tensor([0.6219, 0.0787, 0.1178, 0.1816]) -Greedy action tensor([ 1.4241, -0.6290, -0.2501, 0.2687]) tensor([0.6132, 0.0787, 0.1150, 0.1931]) -Greedy action tensor([ 1.0530, -0.3402, -0.4957, 0.1310]) tensor([0.5381, 0.1336, 0.1144, 0.2140]) -Greedy action tensor([ 0.6311, -0.4771, -0.2854, 0.0110]) tensor([0.4409, 0.1456, 0.1763, 0.2372]) -Greedy action tensor([ 1.2671, -0.6617, -0.5034, 0.3137]) tensor([0.5879, 0.0854, 0.1001, 0.2266]) -Greedy action tensor([ 0.9134, -0.3878, -0.4236, -0.0961]) tensor([0.5265, 0.1433, 0.1383, 0.1919]) -Greedy action tensor([ 0.8757, -0.6600, 0.1241, 0.0535]) tensor([0.4703, 0.1013, 0.2218, 0.2067]) -Greedy action tensor([ 1.2326, -0.5026, 0.0097, 0.2885]) tensor([0.5377, 0.0948, 0.1583, 0.2092]) -Greedy action tensor([ 0.6271, -0.4611, 0.1045, 0.0015]) tensor([0.4057, 0.1367, 0.2406, 0.2170]) -Greedy action tensor([ 1.2573, -0.1947, -0.0890, 0.1179]) tensor([0.5512, 0.1290, 0.1434, 0.1764]) -Greedy action tensor([ 1.3985, -0.3887, -0.6173, -0.2165]) tensor([0.6669, 0.1117, 0.0888, 0.1326]) -Greedy action tensor([ 0.4407, -0.4241, -0.4858, -0.0301]) tensor([0.4096, 0.1725, 0.1622, 0.2558]) -Greedy action tensor([ 0.5176, -0.0945, -0.0648, -0.1572]) tensor([0.3831, 0.2077, 0.2140, 0.1951]) -Greedy action tensor([ 1.6318, -0.9192, -0.4002, 0.2916]) tensor([0.6799, 0.0530, 0.0891, 0.1780]) -Greedy action tensor([ 1.3533, -0.5579, -0.3702, 0.1437]) tensor([0.6155, 0.0910, 0.1098, 0.1836]) -Greedy action tensor([ 0.7419, -0.5458, -0.1933, 0.0576]) tensor([0.4602, 0.1270, 0.1806, 0.2322]) -Greedy action tensor([ 1.0467, -0.6434, -0.2371, 0.3266]) tensor([0.5133, 0.0947, 0.1422, 0.2498]) -Greedy action tensor([ 1.0461, -0.4630, -0.2087, -0.0344]) tensor([0.5418, 0.1198, 0.1545, 0.1839]) -Greedy action tensor([ 0.8604, -0.4898, -0.0693, -0.0793]) tensor([0.4891, 0.1268, 0.1930, 0.1911]) -Greedy action tensor([ 0.9321, -0.4954, -0.4263, -0.0713]) tensor([0.5366, 0.1287, 0.1379, 0.1967]) -Greedy action tensor([ 1.0592, -0.3341, -0.3552, -0.1827]) tensor([0.5618, 0.1395, 0.1365, 0.1623]) -Greedy action tensor([ 1.0843, -0.6562, -0.6286, 0.6774]) tensor([0.4947, 0.0868, 0.0892, 0.3293]) -Greedy action tensor([ 1.3223, -0.0586, 0.0076, -0.0794]) tensor([0.5662, 0.1423, 0.1521, 0.1394]) -Greedy action tensor([ 1.1737, -0.5299, -0.5356, 0.1460]) tensor([0.5811, 0.1058, 0.1052, 0.2079]) -Greedy action tensor([ 0.7938, -0.5520, -0.0531, -0.7994]) tensor([0.5285, 0.1376, 0.2266, 0.1074]) -Greedy action tensor([ 1.1333e+00, -8.5496e-01, 5.2404e-04, -6.0356e-01]) tensor([0.6116, 0.0837, 0.1970, 0.1077]) -Greedy action tensor([ 0.5664, -0.1712, -0.0336, -0.3576]) tensor([0.4125, 0.1973, 0.2264, 0.1637]) -Greedy action tensor([ 0.3967, -0.1330, -0.0656, -0.3015]) tensor([0.3682, 0.2168, 0.2319, 0.1832]) -Greedy action tensor([ 0.8886, -0.3796, -0.1431, -0.5965]) tensor([0.5364, 0.1509, 0.1912, 0.1215]) -Greedy action tensor([ 0.4725, -0.0393, -0.1506, -0.2189]) tensor([0.3793, 0.2274, 0.2034, 0.1900]) -Greedy action tensor([ 0.4327, -0.0937, 0.0422, -0.2328]) tensor([0.3595, 0.2124, 0.2433, 0.1848]) -Greedy action tensor([ 1.1670, -0.5872, -0.0722, -0.4814]) tensor([0.6042, 0.1046, 0.1750, 0.1162]) -Greedy action tensor([ 0.5066, -0.0532, -0.0879, -0.2208]) tensor([0.3837, 0.2192, 0.2117, 0.1854]) -Greedy action tensor([ 0.3923, -0.0220, -0.0574, -0.1699]) tensor([0.3486, 0.2304, 0.2224, 0.1987]) -Greedy action tensor([ 0.5984, -0.2728, 0.0827, -0.3707]) tensor([0.4175, 0.1747, 0.2493, 0.1584]) -Greedy action tensor([ 0.5767, -0.0751, -0.0539, -0.3528]) tensor([0.4085, 0.2129, 0.2174, 0.1612]) -Greedy action tensor([ 0.6015, -0.2063, -0.1144, -0.2098]) tensor([0.4204, 0.1874, 0.2055, 0.1868]) -Greedy action tensor([ 0.8220, -0.6130, -0.0927, -0.5970]) tensor([0.5317, 0.1266, 0.2130, 0.1287]) -Greedy action tensor([ 0.8696, -0.4057, -0.0562, -0.4990]) tensor([0.5181, 0.1447, 0.2053, 0.1318]) -Greedy action tensor([ 0.2438, 0.1104, -0.0428, -0.2086]) tensor([0.3066, 0.2683, 0.2302, 0.1950]) -Greedy action tensor([ 0.2150, 0.1250, 0.1223, -0.2268]) tensor([0.2883, 0.2635, 0.2628, 0.1854]) -Greedy action tensor([ 0.4773, -0.0291, -0.0011, -0.1804]) tensor([0.3649, 0.2199, 0.2262, 0.1890]) -Greedy action tensor([ 0.3257, -0.1415, 0.0313, -0.4085]) tensor([0.3507, 0.2198, 0.2613, 0.1683]) -Greedy action tensor([ 0.2472, 0.1234, -0.0588, -0.2438]) tensor([0.3094, 0.2734, 0.2279, 0.1894]) -Greedy action tensor([ 0.5840, 0.0458, 0.0458, -0.3246]) tensor([0.3890, 0.2271, 0.2271, 0.1568]) -Greedy action tensor([ 0.7608, -0.4336, -0.0666, -0.7482]) tensor([0.5099, 0.1544, 0.2229, 0.1128]) -Greedy action tensor([ 0.1984, -0.1447, -0.0474, -0.3015]) tensor([0.3228, 0.2290, 0.2524, 0.1958]) -Greedy action tensor([ 0.7379, -0.6341, -0.1864, -0.6279]) tensor([0.5248, 0.1331, 0.2082, 0.1339]) -Greedy action tensor([ 0.1411, -0.0129, 0.1362, -0.1817]) tensor([0.2796, 0.2397, 0.2782, 0.2025]) -Greedy action tensor([ 0.3949, -0.2272, 0.1537, -0.3135]) tensor([0.3553, 0.1907, 0.2791, 0.1749]) -Greedy action tensor([ 0.8818, -0.3257, 0.0746, -0.7279]) tensor([0.5141, 0.1537, 0.2294, 0.1028]) -Greedy action tensor([ 0.5779, -0.1184, 0.0354, -0.3176]) tensor([0.4019, 0.2003, 0.2336, 0.1641]) -Greedy action tensor([ 0.3078, 0.0956, 0.0632, -0.2473]) tensor([0.3159, 0.2555, 0.2473, 0.1813]) -Greedy action tensor([ 0.6281, -0.0175, 0.0079, -0.3250]) tensor([0.4085, 0.2142, 0.2197, 0.1575]) -Greedy action tensor([ 0.2964, 0.0744, 0.0587, -0.2258]) tensor([0.3142, 0.2517, 0.2477, 0.1864]) -Greedy action tensor([ 1.0862e+00, -6.5804e-01, 7.1687e-04, -9.4571e-01]) tensor([0.6084, 0.1063, 0.2055, 0.0798]) -Greedy action tensor([ 0.2862, 0.2157, -0.0703, -0.0811]) tensor([0.3008, 0.2803, 0.2106, 0.2083]) -Greedy action tensor([ 0.2507, -0.0305, 0.0981, -0.2784]) tensor([0.3123, 0.2357, 0.2681, 0.1840]) -Greedy action tensor([ 0.0702, -0.0086, 0.1643, -0.1366]) tensor([0.2607, 0.2409, 0.2864, 0.2120]) -Greedy action tensor([ 0.4998, -0.0777, -0.0097, -0.1970]) tensor([0.3759, 0.2110, 0.2259, 0.1873]) -Greedy action tensor([ 0.4885, -0.3577, 0.0480, -0.5929]) tensor([0.4146, 0.1779, 0.2669, 0.1406]) -Greedy action tensor([ 0.7483, -0.4238, 0.0278, -0.4061]) tensor([0.4736, 0.1467, 0.2304, 0.1493]) -Greedy action tensor([ 0.8369, -0.4639, -0.0657, -0.5401]) tensor([0.5181, 0.1411, 0.2101, 0.1307]) -Greedy action tensor([ 0.5300, -0.2509, -0.0694, -0.2905]) tensor([0.4086, 0.1871, 0.2244, 0.1799]) -Greedy action tensor([ 0.1907, 0.0970, 0.1137, -0.2373]) tensor([0.2867, 0.2610, 0.2654, 0.1869]) -Greedy action tensor([ 0.5678, -0.5317, 0.2788, -0.6009]) tensor([0.4179, 0.1392, 0.3130, 0.1299]) -Greedy action tensor([ 0.8699, -0.4111, 0.0361, -0.5727]) tensor([0.5132, 0.1426, 0.2229, 0.1213]) -Greedy action tensor([ 0.5693, -0.2790, -0.0413, -0.4693]) tensor([0.4301, 0.1841, 0.2335, 0.1522]) -Greedy action tensor([ 1.1483, -0.7061, -0.0777, -0.6741]) tensor([0.6205, 0.0971, 0.1821, 0.1003]) -Greedy action tensor([ 0.5221, 0.0502, -0.0696, -0.2252]) tensor([0.3772, 0.2353, 0.2088, 0.1787]) -Greedy action tensor([ 0.6292, -0.2603, 0.1071, -0.4854]) tensor([0.4288, 0.1762, 0.2544, 0.1407]) -Greedy action tensor([ 0.5635, -0.0911, -0.0645, -0.1220]) tensor([0.3910, 0.2032, 0.2087, 0.1970]) -Greedy action tensor([ 0.1023, 0.1137, 0.1392, -0.1820]) tensor([0.2631, 0.2661, 0.2729, 0.1980]) -Greedy action tensor([ 0.9915, -0.6608, -0.1164, -0.9044]) tensor([0.5981, 0.1146, 0.1975, 0.0898]) -Greedy action tensor([ 0.1286, -0.0260, -0.0595, -0.3732]) tensor([0.3039, 0.2604, 0.2518, 0.1840]) -Greedy action tensor([ 0.5958, -0.2767, -0.0819, -0.3689]) tensor([0.4335, 0.1812, 0.2201, 0.1652]) -Greedy action tensor([ 0.7189, -0.6673, -0.2094, -0.7475]) tensor([0.5330, 0.1333, 0.2107, 0.1230]) -Greedy action tensor([ 0.4025, 0.1488, 0.0543, -0.3534]) tensor([0.3388, 0.2629, 0.2392, 0.1591]) -Greedy action tensor([ 0.6680, -0.2898, -0.1129, -0.2503]) tensor([0.4462, 0.1712, 0.2044, 0.1781]) -Greedy action tensor([ 0.1684, 0.1423, -0.0403, -0.0262]) tensor([0.2771, 0.2700, 0.2249, 0.2281]) -Greedy action tensor([ 0.6677, -0.5438, -0.1758, -0.5992]) tensor([0.4976, 0.1482, 0.2141, 0.1402]) -Greedy action tensor([ 0.5613, 0.0053, 0.0505, -0.2838]) tensor([0.3842, 0.2203, 0.2305, 0.1650]) -Greedy action tensor([ 0.7662, -0.5395, -0.0093, -0.7664]) tensor([0.5135, 0.1391, 0.2364, 0.1109]) -Greedy action tensor([ 0.9707, -0.6452, -0.1642, -0.5711]) tensor([0.5766, 0.1146, 0.1854, 0.1234]) -Greedy action tensor([ 0.5023, -0.1352, -0.1026, -0.2245]) tensor([0.3909, 0.2066, 0.2135, 0.1890]) -Greedy action tensor([ 0.6114, -0.2881, -0.0499, -0.2507]) tensor([0.4264, 0.1734, 0.2201, 0.1801]) -Greedy action tensor([ 0.2997, 0.1477, 0.1926, -0.2647]) tensor([0.3006, 0.2583, 0.2701, 0.1710]) -Greedy action tensor([ 0.3682, 0.1542, 0.1884, -0.1628]) tensor([0.3095, 0.2499, 0.2586, 0.1820]) -Greedy action tensor([ 0.7690, -0.6326, -0.0497, -0.6575]) tensor([0.5188, 0.1277, 0.2288, 0.1246]) -Greedy action tensor([ 0.2874, 0.1673, -0.0556, -0.0022]) tensor([0.2990, 0.2651, 0.2122, 0.2238]) -Greedy action tensor([ 0.4164, -0.3688, 0.0415, -0.4476]) tensor([0.3899, 0.1778, 0.2680, 0.1643]) -Greedy action tensor([ 0.8713, -0.7511, -0.2793, -0.8856]) tensor([0.5930, 0.1171, 0.1876, 0.1023]) -Greedy action tensor([ 0.6850, -0.2966, -0.0162, -0.3891]) tensor([0.4520, 0.1694, 0.2242, 0.1544]) -Greedy action tensor([ 0.5128, -0.1190, 0.0294, -0.1524]) tensor([0.3756, 0.1997, 0.2316, 0.1931]) -Greedy action tensor([ 0.3556, -0.0161, -0.0573, -0.3093]) tensor([0.3490, 0.2406, 0.2309, 0.1795]) -Greedy action tensor([ 0.3096, 0.0253, 0.0319, -0.2760]) tensor([0.3261, 0.2454, 0.2470, 0.1815]) -Greedy action tensor([ 0.5942, 0.0075, 0.0364, -0.1986]) tensor([0.3874, 0.2155, 0.2218, 0.1753]) -Greedy action tensor([ 0.1003, 0.1076, -0.0732, -0.0092]) tensor([0.2671, 0.2690, 0.2245, 0.2394]) -Greedy action tensor([ 0.6172, -0.4520, -0.1983, -0.5493]) tensor([0.4768, 0.1637, 0.2110, 0.1485]) -Greedy action tensor([ 0.3822, 0.0457, 0.1503, -0.2908]) tensor([0.3314, 0.2367, 0.2628, 0.1691]) -Greedy action tensor([ 0.5606, -0.3356, 0.0793, -0.6511]) tensor([0.4303, 0.1756, 0.2659, 0.1281]) -Greedy action tensor([ 1.1143, -0.7865, -0.0717, -0.7139]) tensor([0.6190, 0.0925, 0.1891, 0.0995]) -Greedy action tensor([ 0.4784, 0.1009, 0.1777, -0.2219]) tensor([0.3422, 0.2346, 0.2533, 0.1699]) -Greedy action tensor([ 0.7306, -0.4649, -0.1149, -0.6472]) tensor([0.5040, 0.1525, 0.2164, 0.1271]) -Greedy action tensor([ 0.7405, -0.2452, -0.0768, -0.2955]) tensor([0.4609, 0.1720, 0.2035, 0.1636]) -Greedy action tensor([-1.2167, -0.5945, 0.3095, 0.2328]) tensor([0.0853, 0.1589, 0.3924, 0.3634]) -Greedy action tensor([-1.7851, -0.4456, 0.5850, -0.0708]) tensor([0.0475, 0.1812, 0.5078, 0.2636]) -Greedy action tensor([-1.6414, -0.5209, 0.5680, 0.1277]) tensor([0.0525, 0.1610, 0.4784, 0.3080]) -Greedy action tensor([-1.0923, -0.6571, 0.5907, 0.6583]) tensor([0.0731, 0.1129, 0.3933, 0.4208]) -Greedy action tensor([-1.8560, -0.4840, 0.7187, 0.0471]) tensor([0.0404, 0.1591, 0.5298, 0.2707]) -Greedy action tensor([-1.3940, -0.5699, 0.4782, 0.2664]) tensor([0.0665, 0.1515, 0.4322, 0.3497]) -Greedy action tensor([-2.0194, -0.9355, 0.3615, -0.1459]) tensor([0.0470, 0.1389, 0.5081, 0.3060]) -Greedy action tensor([-1.5140, -0.4867, 0.8619, 0.7901]) tensor([0.0407, 0.1137, 0.4380, 0.4076]) -Greedy action tensor([-1.5431, -0.5805, 0.4735, 0.0293]) tensor([0.0627, 0.1642, 0.4710, 0.3021]) -Greedy action tensor([-0.9736, -0.4683, 0.5054, 0.7771]) tensor([0.0781, 0.1294, 0.3427, 0.4497]) -Greedy action tensor([-1.8750, -0.6628, 0.2717, -0.2363]) tensor([0.0554, 0.1860, 0.4736, 0.2850]) -Greedy action tensor([-1.6033, -0.5950, 0.5325, -0.0614]) tensor([0.0592, 0.1624, 0.5015, 0.2769]) -Greedy action tensor([-1.2949, -0.4403, 0.3546, 0.0573]) tensor([0.0805, 0.1892, 0.4190, 0.3113]) -Greedy action tensor([-1.2765, 0.4184, 0.5229, -0.5435]) tensor([0.0686, 0.3737, 0.4149, 0.1428]) -Greedy action tensor([-1.8573, -0.4701, 0.6632, -0.0496]) tensor([0.0425, 0.1701, 0.5284, 0.2590]) -Greedy action tensor([-1.5489, -0.8891, -0.0053, -0.7068]) tensor([0.1006, 0.1947, 0.4711, 0.2336]) -Greedy action tensor([-1.8156, -0.2653, 0.3007, -0.2953]) tensor([0.0538, 0.2536, 0.4466, 0.2461]) -Greedy action tensor([-1.3751, -0.7648, 0.6856, -0.1216]) tensor([0.0704, 0.1297, 0.5531, 0.2468]) -Greedy action tensor([-0.9370, -0.1947, -0.2865, -0.2797]) tensor([0.1440, 0.3024, 0.2759, 0.2778]) -Greedy action tensor([-1.6596, -0.5041, 0.5492, -0.0181]) tensor([0.0542, 0.1722, 0.4936, 0.2799]) -Greedy action tensor([-1.4808, -0.6228, 0.4606, 0.0391]) tensor([0.0671, 0.1583, 0.4677, 0.3069]) -Greedy action tensor([-1.8381, 0.1221, 0.5469, -0.0657]) tensor([0.0403, 0.2858, 0.4371, 0.2369]) -Greedy action tensor([-1.3256, -0.6126, 0.5890, 0.7483]) tensor([0.0562, 0.1147, 0.3816, 0.4475]) -Greedy action tensor([-2.0095, -0.8856, 0.7150, 0.0066]) tensor([0.0373, 0.1147, 0.5682, 0.2798]) -Greedy action tensor([-1.7859, -0.8502, 0.1429, -0.5918]) tensor([0.0728, 0.1856, 0.5011, 0.2404]) -Greedy action tensor([-1.0229, -0.2794, -0.4318, -0.3098]) tensor([0.1439, 0.3027, 0.2599, 0.2936]) -Greedy action tensor([-1.0648, -0.6469, 0.5298, 0.0799]) tensor([0.0945, 0.1435, 0.4653, 0.2967]) -Greedy action tensor([-1.7752, -0.6008, 0.9033, 0.4020]) tensor([0.0362, 0.1172, 0.5272, 0.3194]) -Greedy action tensor([-1.1575, -0.4040, 0.6143, 1.0235]) tensor([0.0560, 0.1189, 0.3293, 0.4958]) -Greedy action tensor([-1.4668, -0.5257, 0.5032, 0.4228]) tensor([0.0576, 0.1477, 0.4133, 0.3814]) -Greedy action tensor([-1.5954, -0.5490, 0.5879, 0.2204]) tensor([0.0530, 0.1509, 0.4704, 0.3257]) -Greedy action tensor([-1.3410, -0.2262, 0.0695, -0.4030]) tensor([0.0934, 0.2849, 0.3829, 0.2387]) -Greedy action tensor([-1.6250, -0.4201, 0.5863, 0.2711]) tensor([0.0497, 0.1658, 0.4536, 0.3309]) -Greedy action tensor([-1.6930, -0.4826, 0.7187, -0.3000]) tensor([0.0512, 0.1717, 0.5709, 0.2061]) -Greedy action tensor([-1.1545, -0.5169, 0.3671, 0.2858]) tensor([0.0855, 0.1618, 0.3916, 0.3611]) -Greedy action tensor([-1.8777, -0.4335, 0.7001, 0.0623]) tensor([0.0394, 0.1671, 0.5191, 0.2744]) -Greedy action tensor([-1.1861, 0.0878, -0.3768, -0.1646]) tensor([0.1042, 0.3724, 0.2340, 0.2894]) -Greedy action tensor([-1.2756, -0.5506, 0.3375, 0.2303]) tensor([0.0794, 0.1640, 0.3986, 0.3581]) -Greedy action tensor([-1.6380, -0.5145, 0.5321, -0.0831]) tensor([0.0569, 0.1751, 0.4985, 0.2695]) -Greedy action tensor([-1.3316, 0.2717, 0.3804, -0.0607]) tensor([0.0663, 0.3297, 0.3675, 0.2364]) -Greedy action tensor([-1.9460, -0.4862, 1.2664, 0.6974]) tensor([0.0226, 0.0974, 0.5619, 0.3181]) -Greedy action tensor([-0.5543, -0.5355, 0.1856, 0.1915]) tensor([0.1607, 0.1637, 0.3368, 0.3388]) -Greedy action tensor([-1.1718, -0.5977, 0.4520, 0.0962]) tensor([0.0877, 0.1557, 0.4449, 0.3117]) -Greedy action tensor([-1.8100, -0.3630, 1.1224, 0.7640]) tensor([0.0269, 0.1144, 0.5055, 0.3532]) -Greedy action tensor([-1.6681, -1.0535, -0.0172, -0.8038]) tensor([0.0958, 0.1772, 0.4995, 0.2275]) -Greedy action tensor([-1.8311, -0.4713, 0.6062, -0.0956]) tensor([0.0454, 0.1770, 0.5199, 0.2577]) -Greedy action tensor([-1.8466, -0.4482, 0.6274, -0.0880]) tensor([0.0440, 0.1782, 0.5224, 0.2555]) -Greedy action tensor([-1.6564, -0.5018, 0.9556, 0.5913]) tensor([0.0367, 0.1164, 0.4998, 0.3472]) -Greedy action tensor([-1.5550, -0.8746, 0.5211, -0.2375]) tensor([0.0681, 0.1345, 0.5431, 0.2543]) -Greedy action tensor([-1.6744, -0.0772, 0.6288, 0.3107]) tensor([0.0431, 0.2127, 0.4308, 0.3134]) -Greedy action tensor([-0.6259, 0.8008, -0.1131, 0.1988]) tensor([0.1097, 0.4569, 0.1832, 0.2502]) -Greedy action tensor([-1.5701, -0.5055, 0.5252, 0.1826]) tensor([0.0562, 0.1629, 0.4567, 0.3242]) -Greedy action tensor([-1.9286, -0.9695, 0.6662, 0.1554]) tensor([0.0399, 0.1042, 0.5349, 0.3209]) -Greedy action tensor([-1.4564, -0.4601, 0.4701, 0.2351]) tensor([0.0625, 0.1693, 0.4291, 0.3392]) -Greedy action tensor([-1.8746, -0.3955, 0.6182, -0.1297]) tensor([0.0431, 0.1891, 0.5211, 0.2467]) -Greedy action tensor([-1.6288, -0.4996, 0.4790, 0.0367]) tensor([0.0568, 0.1756, 0.4673, 0.3003]) -Greedy action tensor([-1.3569, -0.5582, 0.3559, 0.2902]) tensor([0.0716, 0.1592, 0.3972, 0.3719]) -Greedy action tensor([-1.8537, -0.6883, 0.6637, 0.0129]) tensor([0.0433, 0.1390, 0.5373, 0.2803]) -Greedy action tensor([-1.5509, -0.5240, 0.6229, 0.4752]) tensor([0.0496, 0.1385, 0.4359, 0.3760]) -Greedy action tensor([-1.9691, -0.5770, 0.8331, 0.0169]) tensor([0.0347, 0.1397, 0.5724, 0.2531]) -Greedy action tensor([-1.7032, -0.6435, 0.2235, -0.2507]) tensor([0.0666, 0.1920, 0.4570, 0.2844]) -Greedy action tensor([-2.0319, -0.8385, 0.5473, -0.1419]) tensor([0.0415, 0.1368, 0.5471, 0.2746]) -Greedy action tensor([-1.6679, -0.5454, 0.6346, 0.1156]) tensor([0.0499, 0.1535, 0.4994, 0.2972]) -Greedy action tensor([-1.9219, -0.7356, 0.2326, -0.2658]) tensor([0.0551, 0.1806, 0.4755, 0.2888]) -Greedy action tensor([-0.1602, -0.3224, 0.2121, 0.2997]) tensor([0.2047, 0.1740, 0.2970, 0.3242]) -Greedy action tensor([-1.9639, -0.6605, 0.5860, 0.0097]) tensor([0.0405, 0.1492, 0.5188, 0.2915]) -Greedy action tensor([-1.6735, -0.4352, 0.5247, -0.0531]) tensor([0.0540, 0.1863, 0.4866, 0.2731]) -Greedy action tensor([-1.2854, -0.3746, 1.3208, 1.2143]) tensor([0.0342, 0.0851, 0.4637, 0.4169]) -Greedy action tensor([-2.0659, -0.8161, 1.4256, 0.6710]) tensor([0.0190, 0.0661, 0.6223, 0.2926]) -Greedy action tensor([-0.3395, 1.0911, 0.1604, 0.5973]) tensor([0.1066, 0.4457, 0.1757, 0.2720]) -Greedy action tensor([-1.3263, -0.3924, 0.5677, 0.8112]) tensor([0.0536, 0.1363, 0.3560, 0.4542]) -Greedy action tensor([-1.1463, -0.5461, 0.3572, 0.5358]) tensor([0.0788, 0.1435, 0.3542, 0.4235]) -Greedy action tensor([-1.8167, -0.7625, 0.2079, -0.3403]) tensor([0.0632, 0.1814, 0.4787, 0.2767]) -Greedy action tensor([-1.1159, -0.6027, 0.3555, 0.1556]) tensor([0.0944, 0.1577, 0.4112, 0.3367]) -Greedy action tensor([-1.7131, -0.6324, 0.4733, -0.1172]) tensor([0.0562, 0.1657, 0.5007, 0.2774]) -Greedy action tensor([-0.3023, -0.1674, 0.5560, 1.1950]) tensor([0.1114, 0.1275, 0.2629, 0.4981]) -Greedy action tensor([-1.8514, -0.4981, 0.6727, -0.0771]) tensor([0.0430, 0.1665, 0.5368, 0.2537]) -Greedy action tensor([-1.7763, -0.4600, 0.7549, 0.2593]) tensor([0.0401, 0.1494, 0.5037, 0.3068]) -Greedy action tensor([-1.9228, -0.6108, 0.8292, 0.0973]) tensor([0.0358, 0.1330, 0.5613, 0.2700]) -Greedy action tensor([-1.1499, -0.6740, 0.6813, 1.0717]) tensor([0.0553, 0.0891, 0.3454, 0.5103]) -Greedy action tensor([-1.4391, -0.2484, 0.9317, 0.9570]) tensor([0.0385, 0.1266, 0.4121, 0.4227]) -Greedy action tensor([-0.1264, -0.0395, 1.0429, -0.1939]) tensor([0.1601, 0.1747, 0.5156, 0.1497]) -Greedy action tensor([ 1.8168, -0.3776, 0.7049, 2.1844]) tensor([0.3467, 0.0386, 0.1140, 0.5007]) -Greedy action tensor([1.2796, 0.0232, 0.3911, 1.0914]) tensor([0.3961, 0.1128, 0.1629, 0.3282]) -Greedy action tensor([ 1.3885, -0.5948, 0.6596, 1.9754]) tensor([0.2925, 0.0403, 0.1411, 0.5261]) -Greedy action tensor([ 0.6331, 0.7600, -0.4678, 1.1617]) tensor([0.2401, 0.2726, 0.0799, 0.4074]) -Greedy action tensor([ 0.9287, -1.0933, 0.6844, 0.9332]) tensor([0.3424, 0.0453, 0.2682, 0.3440]) -Greedy action tensor([ 0.7104, 0.7738, -0.3026, 0.4645]) tensor([0.3115, 0.3318, 0.1131, 0.2436]) -Greedy action tensor([ 1.1529, -0.8633, 0.4798, 0.8530]) tensor([0.4194, 0.0559, 0.2140, 0.3108]) -Greedy action tensor([ 1.5468, 0.2106, -0.5648, 0.6975]) tensor([0.5520, 0.1451, 0.0668, 0.2361]) -Greedy action tensor([ 1.0216, -0.3370, 0.9978, 1.4815]) tensor([0.2620, 0.0673, 0.2558, 0.4149]) -Greedy action tensor([ 1.5202, 0.0867, -0.1535, 1.0810]) tensor([0.4830, 0.1152, 0.0906, 0.3113]) -Greedy action tensor([ 1.4232, -0.5126, 1.0071, 1.4863]) tensor([0.3485, 0.0503, 0.2299, 0.3713]) -Greedy action tensor([ 0.8542, -1.2714, 0.5489, -0.1771]) tensor([0.4519, 0.0539, 0.3330, 0.1611]) -Greedy action tensor([ 1.2393, -1.1317, 1.7985, 0.6175]) tensor([0.2959, 0.0276, 0.5176, 0.1589]) -Greedy action tensor([ 0.5578, 0.2653, -0.0592, 0.3077]) tensor([0.3263, 0.2436, 0.1761, 0.2541]) -Greedy action tensor([ 1.2972, -1.1458, 0.6048, 0.9585]) tensor([0.4348, 0.0378, 0.2176, 0.3099]) -Greedy action tensor([0.3390, 0.4050, 0.0576, 0.9010]) tensor([0.2185, 0.2334, 0.1649, 0.3833]) -Greedy action tensor([ 0.8243, 0.0653, -1.1042, 1.1485]) tensor([0.3337, 0.1562, 0.0485, 0.4615]) -Greedy action tensor([ 1.5399, -0.0953, 1.1508, 0.8994]) tensor([0.4167, 0.0812, 0.2824, 0.2196]) -Greedy action tensor([0.8311, 0.3839, 0.5050, 1.7032]) tensor([0.2104, 0.1345, 0.1518, 0.5032]) -Greedy action tensor([ 1.7851, -0.8002, 1.1628, 0.3927]) tensor([0.5375, 0.0405, 0.2885, 0.1336]) -Greedy action tensor([ 1.7271, -1.3963, 0.5430, 0.0393]) tensor([0.6515, 0.0287, 0.1994, 0.1205]) -Greedy action tensor([ 0.9646, -0.1136, 1.1638, 1.9524]) tensor([0.1906, 0.0648, 0.2327, 0.5119]) -Greedy action tensor([ 1.7926, -0.8429, 0.0723, 1.2193]) tensor([0.5512, 0.0395, 0.0987, 0.3107]) -Greedy action tensor([ 0.5099, 0.2690, -0.3488, -0.7209]) tensor([0.3997, 0.3142, 0.1694, 0.1167]) -Greedy action tensor([ 0.2691, -0.6912, -0.2156, 0.4227]) tensor([0.3160, 0.1209, 0.1946, 0.3685]) -Greedy action tensor([ 1.3966, -0.2123, 1.4277, 1.0808]) tensor([0.3377, 0.0676, 0.3484, 0.2463]) -Greedy action tensor([ 1.1534, -0.9528, -0.3070, 1.6417]) tensor([0.3352, 0.0408, 0.0778, 0.5462]) -Greedy action tensor([ 1.1184, -0.0210, 0.1897, 0.9414]) tensor([0.3917, 0.1254, 0.1547, 0.3282]) -Greedy action tensor([ 1.1926, -0.2169, 0.2275, 0.3408]) tensor([0.4874, 0.1190, 0.1857, 0.2079]) -Greedy action tensor([ 1.2324, 0.8319, -0.9580, 0.3629]) tensor([0.4543, 0.3044, 0.0508, 0.1904]) -Greedy action tensor([ 0.9123, 0.0066, -0.4419, 0.2994]) tensor([0.4537, 0.1834, 0.1171, 0.2458]) -Greedy action tensor([1.3809, 0.2361, 0.1524, 1.6292]) tensor([0.3457, 0.1100, 0.1012, 0.4431]) -Greedy action tensor([ 0.9802, 0.2462, -0.7594, 0.3830]) tensor([0.4533, 0.2176, 0.0796, 0.2495]) -Greedy action tensor([0.5735, 0.3652, 0.0018, 0.8528]) tensor([0.2704, 0.2195, 0.1526, 0.3575]) -Greedy action tensor([ 1.9075, 0.2322, -0.2542, 0.6205]) tensor([0.6335, 0.1186, 0.0729, 0.1749]) -Greedy action tensor([ 2.5576, -0.2130, 0.7070, 1.4153]) tensor([0.6498, 0.0407, 0.1021, 0.2073]) -Greedy action tensor([ 1.3001, -0.8010, -0.6413, 0.6341]) tensor([0.5619, 0.0687, 0.0806, 0.2887]) -Greedy action tensor([1.6440, 0.4200, 0.9801, 0.9818]) tensor([0.4302, 0.1265, 0.2215, 0.2219]) -Greedy action tensor([ 1.0785, 0.4054, -0.1965, 0.9485]) tensor([0.3749, 0.1912, 0.1047, 0.3292]) -Greedy action tensor([ 0.6818, 0.4150, -0.1273, 0.2797]) tensor([0.3472, 0.2659, 0.1546, 0.2323]) -Greedy action tensor([ 1.3436, 1.1366, -1.3467, 0.5914]) tensor([0.4251, 0.3456, 0.0289, 0.2004]) -Greedy action tensor([ 1.5026, 0.7461, -0.6944, 0.3915]) tensor([0.5237, 0.2458, 0.0582, 0.1724]) -Greedy action tensor([ 1.0903, 0.2657, -0.3657, 0.8459]) tensor([0.4074, 0.1786, 0.0950, 0.3190]) -Greedy action tensor([1.7394, 0.3049, 0.3282, 0.7829]) tensor([0.5358, 0.1276, 0.1307, 0.2059]) -Greedy action tensor([ 1.5842, -0.6808, 1.2072, 0.5375]) tensor([0.4671, 0.0485, 0.3204, 0.1640]) -Greedy action tensor([ 1.0632, -0.4356, 0.7948, 0.2243]) tensor([0.4132, 0.0923, 0.3159, 0.1786]) -Greedy action tensor([-0.4240, 0.7419, 1.0822, -0.3726]) tensor([0.1023, 0.3284, 0.4615, 0.1077]) -Greedy action tensor([ 0.4581, 0.4223, -0.6847, 0.0578]) tensor([0.3385, 0.3266, 0.1080, 0.2269]) -Greedy action tensor([ 0.7872, -0.0463, -0.6334, 0.2719]) tensor([0.4399, 0.1911, 0.1063, 0.2627]) -Greedy action tensor([ 1.2175, 0.2124, -0.6717, 0.9582]) tensor([0.4369, 0.1599, 0.0661, 0.3371]) -Greedy action tensor([ 0.4329, -0.1507, -0.5798, 1.1030]) tensor([0.2580, 0.1440, 0.0937, 0.5043]) -Greedy action tensor([ 0.2139, 1.2728, 0.0437, -0.1702]) tensor([0.1849, 0.5332, 0.1560, 0.1259]) -Greedy action tensor([1.6073, 0.0433, 0.1832, 0.6224]) tensor([0.5484, 0.1148, 0.1320, 0.2048]) -Greedy action tensor([ 1.1017, -0.6914, 1.8187, 0.7080]) tensor([0.2571, 0.0428, 0.5267, 0.1734]) -Greedy action tensor([0.2169, 0.3319, 0.4925, 0.7939]) tensor([0.1916, 0.2149, 0.2524, 0.3411]) -Greedy action tensor([ 1.5876, -0.2349, 1.2456, 0.6863]) tensor([0.4390, 0.0709, 0.3118, 0.1783]) -Greedy action tensor([ 1.6451, -0.3776, 0.7175, 1.3524]) tensor([0.4397, 0.0582, 0.1739, 0.3282]) -Greedy action tensor([ 1.1025, -0.1245, 0.6498, 0.7027]) tensor([0.3847, 0.1128, 0.2446, 0.2579]) -Greedy action tensor([ 0.9520, -0.8353, 0.7058, 0.2180]) tensor([0.4117, 0.0689, 0.3218, 0.1976]) -Greedy action tensor([1.0939, 0.3412, 1.1453, 0.4455]) tensor([0.3282, 0.1546, 0.3455, 0.1716]) -Greedy action tensor([1.7029, 0.0192, 0.9528, 1.3205]) tensor([0.4273, 0.0793, 0.2018, 0.2915]) -Greedy action tensor([ 1.7447, -0.7889, 0.3193, 1.1840]) tensor([0.5290, 0.0420, 0.1272, 0.3019]) -Greedy action tensor([ 1.5486, -0.3813, 1.6512, 1.5370]) tensor([0.3085, 0.0448, 0.3418, 0.3049]) -Greedy action tensor([1.2218, 0.1315, 1.4666, 0.6110]) tensor([0.3168, 0.1065, 0.4047, 0.1720]) -Greedy action tensor([ 1.1791, -0.1769, 0.6433, 0.5090]) tensor([0.4247, 0.1094, 0.2485, 0.2173]) -Greedy action tensor([1.1243, 0.4620, 0.0519, 0.7172]) tensor([0.3963, 0.2043, 0.1356, 0.2638]) -Greedy action tensor([ 0.8261, -0.6653, 0.7871, -0.0351]) tensor([0.3832, 0.0862, 0.3686, 0.1620]) -Greedy action tensor([0.9151, 0.1391, 0.3972, 0.0546]) tensor([0.4034, 0.1857, 0.2403, 0.1706]) -Greedy action tensor([ 1.2633, -1.6690, 0.4552, 0.6702]) tensor([0.4874, 0.0260, 0.2173, 0.2694]) -Greedy action tensor([ 0.3090, -0.1112, 1.3022, 0.6594]) tensor([0.1731, 0.1137, 0.4674, 0.2458]) -Greedy action tensor([ 0.5389, -1.2297, 0.3538, 0.9742]) tensor([0.2819, 0.0481, 0.2343, 0.4357]) -Greedy action tensor([ 0.4610, -0.0315, -0.1344, 0.7262]) tensor([0.2885, 0.1763, 0.1591, 0.3761]) -Greedy action tensor([ 0.7014, -0.2390, 1.2838, -0.0246]) tensor([0.2729, 0.1066, 0.4885, 0.1320]) -Greedy action tensor([ 1.7759, -0.1252, 0.2388, 1.6858]) tensor([0.4389, 0.0656, 0.0944, 0.4011]) -Greedy action tensor([ 1.1720, -0.2017, -0.0446, 0.5810]) tensor([0.4755, 0.1204, 0.1409, 0.2633]) -Greedy action tensor([0.9306, 0.0136, 0.9668, 0.0634]) tensor([0.3501, 0.1399, 0.3629, 0.1471]) -Greedy action tensor([ 1.0660, -0.6891, -0.0573, 1.3375]) tensor([0.3559, 0.0615, 0.1157, 0.4669]) -Greedy action tensor([ 0.3788, 0.4493, 0.0700, -0.3496]) tensor([0.3039, 0.3261, 0.2232, 0.1467]) -Greedy action tensor([1.9113, 0.1476, 0.3171, 1.6296]) tensor([0.4697, 0.0805, 0.0954, 0.3544]) -Greedy action tensor([ 1.6673, -0.8491, 1.1300, 0.1481]) tensor([0.5308, 0.0429, 0.3102, 0.1162]) -Greedy action tensor([0.4925, 0.2710, 0.3574, 0.1676]) tensor([0.2943, 0.2359, 0.2571, 0.2127]) -Greedy action tensor([ 0.2362, 0.2248, 0.1602, -0.2773]) tensor([0.2846, 0.2814, 0.2638, 0.1703]) -Greedy action tensor([ 0.7104, -0.2425, -0.0207, -0.4147]) tensor([0.4563, 0.1760, 0.2196, 0.1481]) -Greedy action tensor([ 1.1555, -0.8727, -0.0258, -0.7023]) tensor([0.6272, 0.0825, 0.1925, 0.0978]) -Greedy action tensor([ 0.6975, -0.6064, -0.0572, -0.6017]) tensor([0.4964, 0.1348, 0.2334, 0.1354]) -Greedy action tensor([ 9.1532e-01, -4.4997e-01, -6.6769e-04, -5.3021e-01]) tensor([0.5288, 0.1350, 0.2116, 0.1246]) -Greedy action tensor([ 0.7501, -0.4351, 0.0954, -0.6608]) tensor([0.4833, 0.1477, 0.2511, 0.1179]) -Greedy action tensor([ 1.1201, -0.6236, 0.1232, -0.5883]) tensor([0.5797, 0.1014, 0.2139, 0.1050]) -Greedy action tensor([ 0.7225, -0.1434, 0.2309, -0.2659]) tensor([0.4159, 0.1750, 0.2544, 0.1548]) -Greedy action tensor([ 0.7182, -0.6231, -0.0908, -0.6378]) tensor([0.5090, 0.1331, 0.2267, 0.1312]) -Greedy action tensor([ 0.6040, -0.1065, -0.0095, -0.1379]) tensor([0.3986, 0.1959, 0.2158, 0.1898]) -Greedy action tensor([ 0.9288, -0.5403, -0.1339, -0.6428]) tensor([0.5607, 0.1290, 0.1937, 0.1165]) -Greedy action tensor([ 0.3823, 0.0134, 0.0510, -0.1220]) tensor([0.3319, 0.2295, 0.2383, 0.2004]) -Greedy action tensor([ 0.3491, -0.0719, 0.1804, -0.2215]) tensor([0.3261, 0.2141, 0.2755, 0.1843]) -Greedy action tensor([ 1.1117, -0.6104, -0.0665, -0.6569]) tensor([0.6035, 0.1078, 0.1858, 0.1029]) -Greedy action tensor([ 0.4559, -0.0153, 0.1321, -0.2133]) tensor([0.3497, 0.2183, 0.2530, 0.1791]) -Greedy action tensor([ 0.5455, 0.0542, -0.0071, -0.2631]) tensor([0.3798, 0.2324, 0.2186, 0.1692]) -Greedy action tensor([ 0.3868, 0.2091, -0.0389, -0.0595]) tensor([0.3194, 0.2674, 0.2087, 0.2044]) -Greedy action tensor([ 0.4945, -0.2313, 0.0339, -0.3636]) tensor([0.3939, 0.1906, 0.2485, 0.1670]) -Greedy action tensor([ 0.5772, -0.4998, -0.0453, -0.5823]) tensor([0.4564, 0.1555, 0.2449, 0.1432]) -Greedy action tensor([ 1.0553, -0.8500, -0.1605, -0.6433]) tensor([0.6142, 0.0914, 0.1821, 0.1124]) -Greedy action tensor([ 0.3368, 0.1506, 0.0166, -0.2951]) tensor([0.3239, 0.2688, 0.2351, 0.1722]) -Greedy action tensor([ 0.5013, -0.0367, 0.0339, -0.1737]) tensor([0.3677, 0.2147, 0.2304, 0.1872]) -Greedy action tensor([ 0.4166, -0.1665, 0.0017, -0.3726]) tensor([0.3741, 0.2088, 0.2471, 0.1699]) -Greedy action tensor([ 0.2316, -0.0110, 0.0268, -0.1638]) tensor([0.3055, 0.2397, 0.2490, 0.2058]) -Greedy action tensor([ 0.4234, -0.1275, -0.0175, -0.2372]) tensor([0.3654, 0.2107, 0.2351, 0.1888]) -Greedy action tensor([ 0.6113, -0.3022, -0.0340, -0.5428]) tensor([0.4462, 0.1790, 0.2341, 0.1407]) -Greedy action tensor([ 0.4077, -0.2500, 0.0899, -0.4155]) tensor([0.3725, 0.1930, 0.2711, 0.1635]) -Greedy action tensor([ 0.5806, -0.0110, -0.0392, -0.2210]) tensor([0.3937, 0.2179, 0.2118, 0.1766]) -Greedy action tensor([ 0.7677, -0.2121, 0.0367, -0.3520]) tensor([0.4580, 0.1719, 0.2205, 0.1495]) -Greedy action tensor([ 0.8245, -0.5314, -0.0415, -0.4934]) tensor([0.5139, 0.1324, 0.2161, 0.1376]) -Greedy action tensor([ 0.3890, -0.2335, 0.1569, -0.2303]) tensor([0.3487, 0.1871, 0.2765, 0.1877]) -Greedy action tensor([ 0.5244, -0.3119, 0.1167, -0.3735]) tensor([0.3991, 0.1729, 0.2654, 0.1626]) -Greedy action tensor([ 1.0798, -0.6616, 0.0178, -0.5396]) tensor([0.5817, 0.1020, 0.2011, 0.1152]) -Greedy action tensor([ 0.5640, -0.4196, -0.0393, -0.4885]) tensor([0.4405, 0.1647, 0.2410, 0.1538]) -Greedy action tensor([ 0.4761, -0.1498, -0.0092, -0.2886]) tensor([0.3823, 0.2045, 0.2353, 0.1779]) -Greedy action tensor([ 0.3033, 0.0985, -0.0632, -0.1879]) tensor([0.3205, 0.2612, 0.2222, 0.1961]) -Greedy action tensor([ 0.4136, 0.2794, 0.1823, -0.2735]) tensor([0.3154, 0.2758, 0.2503, 0.1586]) -Greedy action tensor([ 0.5447, -0.1282, -0.0687, -0.1484]) tensor([0.3919, 0.1999, 0.2122, 0.1959]) -Greedy action tensor([ 0.7349, -0.3120, 0.1919, -0.3687]) tensor([0.4418, 0.1551, 0.2567, 0.1465]) -Greedy action tensor([ 0.4033, 0.2435, 0.0775, -0.2438]) tensor([0.3228, 0.2751, 0.2330, 0.1690]) -Greedy action tensor([ 0.1053, 0.1805, 0.1307, -0.2654]) tensor([0.2636, 0.2842, 0.2703, 0.1819]) -Greedy action tensor([ 0.6577, -0.2064, -0.1186, -0.4596]) tensor([0.4528, 0.1908, 0.2083, 0.1481]) -Greedy action tensor([ 0.8170, -0.4622, -0.1481, -0.5164]) tensor([0.5201, 0.1447, 0.1981, 0.1371]) -Greedy action tensor([ 0.9171, -0.7535, -0.0252, -0.5844]) tensor([0.5553, 0.1045, 0.2164, 0.1237]) -Greedy action tensor([ 0.4159, 0.1652, -0.0594, -0.1649]) tensor([0.3379, 0.2630, 0.2101, 0.1890]) -Greedy action tensor([ 0.3984, 0.0384, 0.0085, -0.3418]) tensor([0.3507, 0.2446, 0.2374, 0.1673]) -Greedy action tensor([ 0.9685, -0.3231, 0.0088, -0.7352]) tensor([0.5435, 0.1494, 0.2082, 0.0989]) -Greedy action tensor([ 0.3115, 0.1308, -0.0056, -0.2646]) tensor([0.3200, 0.2671, 0.2330, 0.1799]) -Greedy action tensor([ 0.3620, -0.1116, -0.0989, -0.4564]) tensor([0.3711, 0.2311, 0.2341, 0.1637]) -Greedy action tensor([ 0.2823, -0.0846, 0.1204, -0.4094]) tensor([0.3285, 0.2276, 0.2794, 0.1645]) -Greedy action tensor([ 0.6115, -0.2524, 0.0164, -0.2272]) tensor([0.4157, 0.1752, 0.2293, 0.1797]) -Greedy action tensor([ 0.6617, -0.2971, 0.1119, -0.7415]) tensor([0.4533, 0.1738, 0.2616, 0.1114]) -Greedy action tensor([ 0.7150, -0.4718, -0.1546, -0.4723]) tensor([0.4928, 0.1504, 0.2065, 0.1503]) -Greedy action tensor([ 0.5466, -0.1713, -0.0517, -0.2776]) tensor([0.4039, 0.1970, 0.2220, 0.1771]) -Greedy action tensor([ 0.4592, -0.0095, -0.0808, -0.1326]) tensor([0.3621, 0.2266, 0.2110, 0.2003]) -Greedy action tensor([ 0.4362, -0.1661, -0.1125, -0.3683]) tensor([0.3887, 0.2128, 0.2246, 0.1739]) -Greedy action tensor([ 0.7709, -0.5107, 0.0189, -0.4195]) tensor([0.4871, 0.1352, 0.2296, 0.1481]) -Greedy action tensor([ 0.3680, -0.2082, 0.0434, -0.5218]) tensor([0.3710, 0.2085, 0.2681, 0.1524]) -Greedy action tensor([ 0.4788, -0.4118, 0.2880, -0.3726]) tensor([0.3754, 0.1541, 0.3102, 0.1602]) -Greedy action tensor([ 0.3833, -0.2181, 0.1733, -0.2514]) tensor([0.3462, 0.1897, 0.2806, 0.1835]) -Greedy action tensor([ 0.7648, -0.4265, -0.0272, -0.6926]) tensor([0.5026, 0.1527, 0.2277, 0.1170]) -Greedy action tensor([ 0.2903, 0.1538, -0.0169, -0.2893]) tensor([0.3156, 0.2754, 0.2322, 0.1768]) -Greedy action tensor([ 0.8057, -0.6203, 0.0385, -0.7928]) tensor([0.5244, 0.1260, 0.2435, 0.1060]) -Greedy action tensor([ 0.7251, -0.4779, -0.0623, -0.6714]) tensor([0.4993, 0.1499, 0.2272, 0.1236]) -Greedy action tensor([ 0.7492, -0.3570, -0.0874, -0.3920]) tensor([0.4800, 0.1588, 0.2079, 0.1533]) -Greedy action tensor([ 0.7039, -0.3227, -0.1319, -0.2181]) tensor([0.4567, 0.1636, 0.1980, 0.1816]) -Greedy action tensor([ 0.4260, -0.3468, 0.1397, -0.3548]) tensor([0.3744, 0.1729, 0.2812, 0.1715]) -Greedy action tensor([ 0.3621, -0.0826, -0.0966, -0.1482]) tensor([0.3480, 0.2231, 0.2200, 0.2089]) -Greedy action tensor([ 0.8245, -0.3236, -0.1292, -0.4314]) tensor([0.5032, 0.1596, 0.1939, 0.1433]) -Greedy action tensor([ 0.8322, -0.4523, 0.3535, -0.5240]) tensor([0.4643, 0.1285, 0.2876, 0.1196]) -Greedy action tensor([ 0.4337, -0.0647, 0.0154, -0.4279]) tensor([0.3720, 0.2260, 0.2448, 0.1572]) -Greedy action tensor([ 0.5333, -0.1609, 0.0144, -0.4594]) tensor([0.4057, 0.2026, 0.2414, 0.1503]) -Greedy action tensor([ 0.6097, -0.3655, -0.0285, -0.4991]) tensor([0.4474, 0.1687, 0.2363, 0.1476]) -Greedy action tensor([ 0.5718, -0.3996, -0.1096, -0.5221]) tensor([0.4506, 0.1706, 0.2279, 0.1509]) -Greedy action tensor([ 0.4056, 0.2174, 0.1632, -0.1142]) tensor([0.3117, 0.2583, 0.2446, 0.1854]) -Greedy action tensor([ 0.5823, -0.4462, 0.0539, -0.7341]) tensor([0.4514, 0.1614, 0.2662, 0.1210]) -Greedy action tensor([ 0.2566, -0.1202, 0.1420, -0.2768]) tensor([0.3160, 0.2168, 0.2818, 0.1854]) -Greedy action tensor([ 0.7203, -0.2652, -0.0238, -0.4297]) tensor([0.4619, 0.1724, 0.2195, 0.1462]) -Greedy action tensor([ 0.2906, -0.2074, 0.1404, -0.2872]) tensor([0.3301, 0.2006, 0.2841, 0.1852]) -Greedy action tensor([ 1.1727, -0.7858, -0.0294, -0.3811]) tensor([0.6049, 0.0853, 0.1818, 0.1279]) -Greedy action tensor([ 0.5233, -0.0049, -0.0759, -0.3718]) tensor([0.3926, 0.2315, 0.2156, 0.1604]) -Greedy action tensor([ 1.8812, -0.9548, -0.0661, 0.2169]) tensor([0.7191, 0.0422, 0.1026, 0.1361]) -Greedy action tensor([ 0.7564, -0.2545, -0.2019, 0.4680]) tensor([0.4005, 0.1457, 0.1536, 0.3002]) -Greedy action tensor([ 1.0139, -0.3693, -0.2348, 0.1173]) tensor([0.5140, 0.1289, 0.1474, 0.2097]) -Greedy action tensor([ 0.6113, -0.1342, -0.0606, 0.0426]) tensor([0.3919, 0.1860, 0.2002, 0.2219]) -Greedy action tensor([ 0.6722, -0.4931, -0.0139, -0.1697]) tensor([0.4452, 0.1388, 0.2242, 0.1918]) -Greedy action tensor([ 1.1568, -0.6493, -0.7000, 0.6433]) tensor([0.5211, 0.0856, 0.0814, 0.3119]) -Greedy action tensor([ 1.2208, -0.4107, -0.2751, -0.1418]) tensor([0.5968, 0.1167, 0.1337, 0.1528]) -Greedy action tensor([ 1.1023, -0.4805, -0.1929, 0.0339]) tensor([0.5486, 0.1127, 0.1502, 0.1885]) -Greedy action tensor([ 1.5160, -0.6973, -0.1727, 0.1577]) tensor([0.6447, 0.0705, 0.1191, 0.1657]) -Greedy action tensor([ 0.9899, -0.4991, 0.1652, 0.0224]) tensor([0.4892, 0.1104, 0.2145, 0.1859]) -Greedy action tensor([ 1.2399, -0.5337, -0.4263, -0.1880]) tensor([0.6256, 0.1062, 0.1182, 0.1500]) -Greedy action tensor([ 0.8488, -0.3271, -0.2166, -0.0530]) tensor([0.4857, 0.1498, 0.1674, 0.1971]) -Greedy action tensor([ 0.9777, -0.3002, -0.4270, -0.0232]) tensor([0.5287, 0.1473, 0.1297, 0.1943]) -Greedy action tensor([ 1.2565, -0.4743, -0.2627, 0.2914]) tensor([0.5628, 0.0997, 0.1232, 0.2144]) -Greedy action tensor([ 1.1666, -0.4191, -0.8373, 0.1698]) tensor([0.5853, 0.1199, 0.0789, 0.2160]) -Greedy action tensor([ 0.8687, -0.4962, -0.4280, 0.3343]) tensor([0.4729, 0.1208, 0.1293, 0.2771]) -Greedy action tensor([ 0.7721, 0.1159, 0.0531, -0.0342]) tensor([0.4077, 0.2116, 0.1987, 0.1821]) -Greedy action tensor([ 1.1887, -0.3231, -0.1876, -0.0428]) tensor([0.5666, 0.1249, 0.1431, 0.1654]) -Greedy action tensor([ 1.2790, -0.4758, -0.4107, 0.1837]) tensor([0.5910, 0.1022, 0.1091, 0.1977]) -Greedy action tensor([ 0.8118, -0.1479, -0.1154, -0.0517]) tensor([0.4545, 0.1741, 0.1798, 0.1916]) -Greedy action tensor([ 1.3404, -0.1635, -0.1073, -0.2389]) tensor([0.6011, 0.1336, 0.1413, 0.1239]) -Greedy action tensor([ 0.8911, -0.5395, -0.5253, 0.3912]) tensor([0.4789, 0.1145, 0.1162, 0.2905]) -Greedy action tensor([ 1.1383, -0.6143, -0.4282, 0.4479]) tensor([0.5309, 0.0920, 0.1108, 0.2662]) -Greedy action tensor([ 0.7896, -0.5518, -0.0743, 0.1508]) tensor([0.4523, 0.1183, 0.1906, 0.2388]) -Greedy action tensor([ 1.3818, -0.5350, -0.1580, -0.3333]) tensor([0.6487, 0.0954, 0.1391, 0.1167]) -Greedy action tensor([ 0.8981, -0.4937, -0.6660, 0.8662]) tensor([0.4121, 0.1025, 0.0862, 0.3992]) -Greedy action tensor([ 1.1351, -0.4775, -0.4898, 0.4871]) tensor([0.5210, 0.1039, 0.1026, 0.2725]) -Greedy action tensor([ 0.6477, 0.0816, -0.0169, -0.2288]) tensor([0.4002, 0.2272, 0.2059, 0.1666]) -Greedy action tensor([ 0.9272, -0.4387, 0.0101, -0.0849]) tensor([0.4955, 0.1264, 0.1980, 0.1801]) -Greedy action tensor([ 1.6383, -0.6483, -0.1503, 0.2813]) tensor([0.6552, 0.0666, 0.1095, 0.1687]) -Greedy action tensor([ 2.1151, -0.2457, -0.5509, 0.0108]) tensor([0.7777, 0.0734, 0.0541, 0.0948]) -Greedy action tensor([ 1.8508, -0.3731, -0.4781, -0.1267]) tensor([0.7441, 0.0805, 0.0725, 0.1030]) -Greedy action tensor([ 1.1234, -0.6437, -0.2839, 0.0681]) tensor([0.5670, 0.0969, 0.1388, 0.1974]) -Greedy action tensor([ 1.0886, -0.4590, -0.1505, -0.0174]) tensor([0.5455, 0.1160, 0.1580, 0.1805]) -Greedy action tensor([ 0.9638, -0.2501, -0.5691, -0.1125]) tensor([0.5394, 0.1602, 0.1165, 0.1839]) -Greedy action tensor([ 0.9452, -0.1673, 0.2001, -0.2669]) tensor([0.4760, 0.1565, 0.2259, 0.1416]) -Greedy action tensor([ 1.5934, -0.3875, -0.3670, 0.1934]) tensor([0.6556, 0.0904, 0.0923, 0.1617]) -Greedy action tensor([ 1.3237, -0.5760, -0.4120, 0.5912]) tensor([0.5535, 0.0828, 0.0976, 0.2661]) -Greedy action tensor([ 1.2647, -0.3795, 0.0103, 0.0894]) tensor([0.5596, 0.1081, 0.1596, 0.1727]) -Greedy action tensor([ 0.6642, -0.5159, -0.0638, -0.1317]) tensor([0.4462, 0.1371, 0.2154, 0.2013]) -Greedy action tensor([ 0.7634, -0.4560, -0.7886, 0.4770]) tensor([0.4428, 0.1308, 0.0938, 0.3325]) -Greedy action tensor([ 1.0664, -0.5670, -0.7756, 0.3456]) tensor([0.5435, 0.1061, 0.0861, 0.2643]) -Greedy action tensor([ 1.2457, -0.5908, -0.2412, 0.3368]) tensor([0.5591, 0.0891, 0.1264, 0.2253]) -Greedy action tensor([ 0.7502, -0.5478, -0.0491, -0.0049]) tensor([0.4561, 0.1245, 0.2051, 0.2143]) -Greedy action tensor([ 1.1925, -0.5274, -0.3267, 0.4129]) tensor([0.5386, 0.0965, 0.1179, 0.2470]) -Greedy action tensor([ 0.5308, 0.0174, -0.4415, 0.0332]) tensor([0.3869, 0.2315, 0.1463, 0.2352]) -Greedy action tensor([ 0.7426, -0.3903, -0.9160, 0.0343]) tensor([0.4987, 0.1606, 0.0950, 0.2456]) -Greedy action tensor([ 1.3131, -0.4996, -0.5791, 0.9814]) tensor([0.4922, 0.0803, 0.0742, 0.3533]) -Greedy action tensor([ 0.8258, -0.5316, -0.5554, 0.1990]) tensor([0.4895, 0.1260, 0.1230, 0.2615]) -Greedy action tensor([ 0.7566, -0.4049, -0.1422, 0.2450]) tensor([0.4311, 0.1349, 0.1755, 0.2585]) -Greedy action tensor([ 0.8040, -0.4499, -0.6175, -0.2549]) tensor([0.5337, 0.1523, 0.1288, 0.1851]) -Greedy action tensor([ 1.5185, -0.5453, -0.4902, -0.3290]) tensor([0.7048, 0.0895, 0.0946, 0.1111]) -Greedy action tensor([ 0.3671, 0.1174, -0.1261, -0.2635]) tensor([0.3422, 0.2666, 0.2090, 0.1822]) -Greedy action tensor([ 0.9110, -0.3797, -0.3315, 0.0606]) tensor([0.5023, 0.1382, 0.1450, 0.2146]) -Greedy action tensor([ 1.5901, -0.4335, -0.3562, 0.1691]) tensor([0.6594, 0.0872, 0.0942, 0.1592]) -Greedy action tensor([ 1.1210, -0.7606, -0.7139, 0.5782]) tensor([0.5282, 0.0805, 0.0843, 0.3070]) -Greedy action tensor([ 0.9920, -0.2494, -0.0284, 0.0235]) tensor([0.4928, 0.1424, 0.1776, 0.1871]) -Greedy action tensor([ 0.2385, 0.2820, -0.0242, -0.2428]) tensor([0.2914, 0.3044, 0.2241, 0.1801]) -Greedy action tensor([ 0.8319, 0.0556, 0.0777, -0.0526]) tensor([0.4267, 0.1964, 0.2007, 0.1762]) -Greedy action tensor([ 1.2129, -0.1578, -0.2531, -0.0762]) tensor([0.5681, 0.1443, 0.1311, 0.1565]) -Greedy action tensor([ 0.6090, -0.0339, 0.0570, 0.0537]) tensor([0.3738, 0.1965, 0.2152, 0.2145]) -Greedy action tensor([ 1.6438, -0.5665, -0.4087, 0.3443]) tensor([0.6619, 0.0726, 0.0850, 0.1805]) -Greedy action tensor([ 0.8540, -0.4238, -0.4217, 0.1381]) tensor([0.4886, 0.1361, 0.1364, 0.2388]) -Greedy action tensor([ 0.9399, -0.1468, 0.0280, -0.1516]) tensor([0.4820, 0.1626, 0.1936, 0.1618]) -Greedy action tensor([ 0.4142, -0.3716, 0.2459, -0.1947]) tensor([0.3515, 0.1602, 0.2971, 0.1912]) -Greedy action tensor([ 1.0162, -0.7608, 0.0198, 0.0393]) tensor([0.5222, 0.0883, 0.1928, 0.1966]) -Greedy action tensor([ 1.5348, -0.6838, 0.0187, -0.0285]) tensor([0.6503, 0.0707, 0.1428, 0.1362]) -Greedy action tensor([ 0.8625, -0.5073, -0.6687, 0.4710]) tensor([0.4659, 0.1184, 0.1008, 0.3150]) -Greedy action tensor([ 0.4030, -0.1300, -0.3702, -0.3190]) tensor([0.3946, 0.2316, 0.1821, 0.1917]) -Greedy action tensor([ 0.3991, -0.2261, 0.2850, -0.1374]) tensor([0.3320, 0.1777, 0.2962, 0.1942]) -Greedy action tensor([ 1.0663, -0.5628, -0.3466, 0.6242]) tensor([0.4803, 0.0942, 0.1169, 0.3087]) -Greedy action tensor([ 0.8188, -0.2731, -0.2697, -0.1040]) tensor([0.4832, 0.1621, 0.1627, 0.1920]) -Greedy action tensor([ 0.8489, -0.5201, -0.5741, 0.3492]) tensor([0.4757, 0.1210, 0.1146, 0.2886]) -Greedy action tensor([ 0.9572, -0.4180, -0.2318, 0.1266]) tensor([0.5017, 0.1268, 0.1528, 0.2186]) -Greedy action tensor([ 1.4114, 0.0572, -0.1955, -0.0372]) tensor([0.5905, 0.1524, 0.1184, 0.1387]) -Greedy action tensor([ 1.2005, -0.6593, -0.7378, 1.0016]) tensor([0.4718, 0.0735, 0.0679, 0.3868]) -Greedy action tensor([ 1.0323, -0.5090, -0.1254, 0.1842]) tensor([0.5111, 0.1094, 0.1606, 0.2189]) -Greedy action tensor([ 0.5381, -0.3932, -0.2560, 0.0760]) tensor([0.4039, 0.1592, 0.1825, 0.2544]) -Greedy action tensor([ 0.8282, -0.5702, -0.3612, 0.1926]) tensor([0.4806, 0.1187, 0.1463, 0.2545]) -Greedy action tensor([ 0.6958, -0.4938, -0.2806, 0.2378]) tensor([0.4322, 0.1315, 0.1628, 0.2734]) -Greedy action tensor([ 0.8251, -0.2796, -0.4508, 0.1189]) tensor([0.4753, 0.1575, 0.1327, 0.2346]) -Greedy action tensor([-0.7758, -0.2832, -0.0534, 0.1773]) tensor([0.1372, 0.2245, 0.2825, 0.3558]) -Greedy action tensor([-1.1906, -0.3552, 0.3754, 0.4885]) tensor([0.0743, 0.1714, 0.3558, 0.3985]) -Greedy action tensor([-1.7744, -0.4590, 0.5859, -0.0842]) tensor([0.0482, 0.1797, 0.5108, 0.2613]) -Greedy action tensor([-1.9136, -0.7698, 0.2194, -0.2747]) tensor([0.0564, 0.1770, 0.4761, 0.2905]) -Greedy action tensor([-1.7123, -0.5116, 0.5500, 0.0085]) tensor([0.0512, 0.1702, 0.4922, 0.2864]) -Greedy action tensor([-1.9661, -0.9568, 0.2778, -0.2295]) tensor([0.0530, 0.1455, 0.5002, 0.3012]) -Greedy action tensor([-1.1710, -0.4928, 0.4261, 0.7221]) tensor([0.0687, 0.1354, 0.3394, 0.4564]) -Greedy action tensor([-1.9028, -0.4234, 0.8494, 0.3007]) tensor([0.0332, 0.1457, 0.5204, 0.3006]) -Greedy action tensor([-1.8103, -0.5630, 0.9129, 0.4147]) tensor([0.0345, 0.1202, 0.5258, 0.3195]) -Greedy action tensor([-1.8482, -0.4668, 0.6234, -0.1125]) tensor([0.0445, 0.1769, 0.5264, 0.2522]) -Greedy action tensor([-1.7567, -0.5224, 0.4079, -0.2309]) tensor([0.0564, 0.1936, 0.4909, 0.2591]) -Greedy action tensor([-1.7389, -0.7474, 0.1238, -0.3634]) tensor([0.0710, 0.1913, 0.4570, 0.2808]) -Greedy action tensor([-1.6929, -0.4839, 0.6379, 0.2503]) tensor([0.0463, 0.1550, 0.4758, 0.3230]) -Greedy action tensor([-1.3457, -0.4587, 0.7043, 0.9410]) tensor([0.0475, 0.1154, 0.3692, 0.4678]) -Greedy action tensor([-0.8412, -0.0251, 0.3665, -0.1756]) tensor([0.1169, 0.2644, 0.3912, 0.2275]) -Greedy action tensor([-1.6264, -0.3114, 1.0742, 0.8889]) tensor([0.0313, 0.1165, 0.4655, 0.3868]) -Greedy action tensor([-1.3395, -0.4540, 0.4618, 0.5082]) tensor([0.0632, 0.1532, 0.3827, 0.4009]) -Greedy action tensor([-2.0010, -0.8574, 0.5860, 0.0643]) tensor([0.0395, 0.1240, 0.5250, 0.3116]) -Greedy action tensor([-1.8551, -0.6254, 0.7853, 0.1107]) tensor([0.0391, 0.1337, 0.5481, 0.2791]) -Greedy action tensor([-1.4298, 0.3487, 0.1030, -0.0827]) tensor([0.0649, 0.3845, 0.3007, 0.2498]) -Greedy action tensor([-0.5279, 0.7521, 0.1080, 0.5061]) tensor([0.1076, 0.3868, 0.2031, 0.3025]) -Greedy action tensor([-1.0408, -0.7013, 0.9287, 1.3425]) tensor([0.0490, 0.0688, 0.3511, 0.5311]) -Greedy action tensor([-1.0014, -0.5906, 0.4263, -0.1657]) tensor([0.1113, 0.1679, 0.4641, 0.2567]) -Greedy action tensor([-1.0325, -0.5717, 0.1949, 0.3973]) tensor([0.0983, 0.1558, 0.3353, 0.4106]) -Greedy action tensor([-1.8947, -0.9313, 0.1682, -0.3492]) tensor([0.0618, 0.1620, 0.4863, 0.2899]) -Greedy action tensor([-0.0693, -0.4248, 0.2748, 0.2872]) tensor([0.2203, 0.1544, 0.3108, 0.3146]) -Greedy action tensor([-1.8221, -0.4969, 0.8144, 0.1342]) tensor([0.0388, 0.1459, 0.5412, 0.2742]) -Greedy action tensor([-1.2744, -0.4646, 0.4710, 0.7191]) tensor([0.0613, 0.1377, 0.3511, 0.4499]) -Greedy action tensor([-1.2075, 0.4151, 0.4239, -0.5819]) tensor([0.0766, 0.3883, 0.3918, 0.1433]) -Greedy action tensor([-1.6725, -0.5012, 0.8836, 0.6315]) tensor([0.0369, 0.1189, 0.4750, 0.3692]) -Greedy action tensor([-0.8892, -0.6163, 0.4252, -0.1877]) tensor([0.1242, 0.1631, 0.4623, 0.2504]) -Greedy action tensor([-0.4383, -0.3245, 0.1487, 0.0838]) tensor([0.1784, 0.1999, 0.3209, 0.3007]) -Greedy action tensor([-1.9134, -0.6163, 1.1730, 0.5653]) tensor([0.0260, 0.0951, 0.5690, 0.3099]) -Greedy action tensor([-1.3954, -0.3313, 0.3763, 0.2365]) tensor([0.0671, 0.1946, 0.3949, 0.3434]) -Greedy action tensor([-1.8438, -0.6716, 0.7044, 0.0814]) tensor([0.0419, 0.1353, 0.5356, 0.2873]) -Greedy action tensor([-1.5848, 0.8948, 0.3921, 0.0670]) tensor([0.0394, 0.4704, 0.2846, 0.2056]) -Greedy action tensor([-1.9429, -0.8614, 0.1627, -0.2683]) tensor([0.0572, 0.1685, 0.4693, 0.3050]) -Greedy action tensor([-2.0029, -0.7247, 0.7071, 0.0663]) tensor([0.0363, 0.1304, 0.5458, 0.2875]) -Greedy action tensor([-1.6626, -1.0102, -0.1500, -0.9133]) tensor([0.1044, 0.2005, 0.4741, 0.2210]) -Greedy action tensor([-1.1983, -0.5585, -0.1955, -0.2275]) tensor([0.1210, 0.2295, 0.3299, 0.3196]) -Greedy action tensor([-1.5475, -0.5261, 0.4428, 0.1083]) tensor([0.0612, 0.1700, 0.4481, 0.3207]) -Greedy action tensor([-1.9903, -0.6714, 0.7382, -0.0922]) tensor([0.0374, 0.1399, 0.5729, 0.2497]) -Greedy action tensor([-1.8144, -0.4534, 0.9270, 0.5693]) tensor([0.0320, 0.1248, 0.4962, 0.3470]) -Greedy action tensor([-1.9312, -0.9856, 0.1588, -0.3955]) tensor([0.0613, 0.1579, 0.4959, 0.2849]) -Greedy action tensor([-0.9972, -0.4984, 0.2421, 0.3546]) tensor([0.1004, 0.1653, 0.3466, 0.3878]) -Greedy action tensor([-1.5647, -0.4289, 0.5438, 0.3236]) tensor([0.0527, 0.1642, 0.4344, 0.3486]) -Greedy action tensor([-1.3616, -0.5501, 0.4599, -0.0490]) tensor([0.0761, 0.1712, 0.4701, 0.2826]) -Greedy action tensor([-0.8017, -0.1685, 0.4235, -0.2274]) tensor([0.1240, 0.2336, 0.4222, 0.2202]) -Greedy action tensor([-1.7917, -0.5907, 0.2129, -0.2185]) tensor([0.0604, 0.2006, 0.4480, 0.2910]) -Greedy action tensor([-1.8726, -0.4826, 0.6563, -0.0945]) tensor([0.0426, 0.1710, 0.5342, 0.2521]) -Greedy action tensor([-1.5549, -0.4657, 0.7012, 0.6629]) tensor([0.0440, 0.1309, 0.4204, 0.4046]) -Greedy action tensor([-1.8599, -0.6111, 0.1423, -0.3403]) tensor([0.0607, 0.2118, 0.4499, 0.2776]) -Greedy action tensor([-1.3583, -0.5364, 0.4048, 0.4382]) tensor([0.0661, 0.1503, 0.3853, 0.3983]) -Greedy action tensor([-1.8847, -0.6233, 0.6257, -0.0704]) tensor([0.0435, 0.1536, 0.5357, 0.2671]) -Greedy action tensor([-0.5407, -0.5518, 0.1827, 0.2042]) tensor([0.1624, 0.1606, 0.3348, 0.3421]) -Greedy action tensor([-1.5118, -0.4392, 0.7498, 0.5567]) tensor([0.0467, 0.1364, 0.4478, 0.3692]) -Greedy action tensor([-1.1062, -0.5678, 0.2535, 0.3051]) tensor([0.0934, 0.1600, 0.3637, 0.3830]) -Greedy action tensor([-1.4913, -0.0801, 0.3723, 0.1178]) tensor([0.0604, 0.2478, 0.3896, 0.3021]) -Greedy action tensor([-1.8716, -0.4129, 0.6247, -0.1380]) tensor([0.0433, 0.1862, 0.5255, 0.2451]) -Greedy action tensor([-1.9958, -0.9436, 0.5334, -0.1886]) tensor([0.0444, 0.1273, 0.5575, 0.2708]) -Greedy action tensor([-0.9655, -0.5717, 0.3496, 0.3330]) tensor([0.1013, 0.1502, 0.3774, 0.3711]) -Greedy action tensor([-1.7807, -0.3584, 0.6878, 0.1856]) tensor([0.0415, 0.1721, 0.4899, 0.2965]) -Greedy action tensor([-1.5934, -0.5880, 0.5727, 0.0746]) tensor([0.0563, 0.1539, 0.4913, 0.2985]) -Greedy action tensor([-1.3733, -0.4194, 0.4550, 0.4723]) tensor([0.0619, 0.1607, 0.3853, 0.3920]) -Greedy action tensor([-1.6719, -0.5100, 1.0605, 0.8502]) tensor([0.0312, 0.0998, 0.4800, 0.3890]) -Greedy action tensor([-1.6580, -0.4809, 1.2273, 0.9862]) tensor([0.0276, 0.0896, 0.4944, 0.3885]) -Greedy action tensor([-1.5974, -0.4145, 0.7116, 0.5918]) tensor([0.0430, 0.1403, 0.4327, 0.3839]) -Greedy action tensor([-1.1795, -0.5631, 0.4203, -0.0661]) tensor([0.0922, 0.1707, 0.4565, 0.2806]) -Greedy action tensor([-1.3954, -0.5690, 0.0043, -0.5580]) tensor([0.1036, 0.2368, 0.4201, 0.2394]) -Greedy action tensor([-1.3702, -0.5228, 0.7251, -0.4288]) tensor([0.0713, 0.1664, 0.5795, 0.1828]) -Greedy action tensor([-1.1212, -0.5827, 0.4524, 0.8283]) tensor([0.0687, 0.1177, 0.3313, 0.4824]) -Greedy action tensor([-0.7477, -0.0028, 0.3474, -0.2121]) tensor([0.1281, 0.2699, 0.3831, 0.2189]) -Greedy action tensor([-1.9650, -0.5101, 0.8406, 0.1706]) tensor([0.0330, 0.1415, 0.5461, 0.2794]) -Greedy action tensor([-0.9094, -0.6570, 0.3308, 0.1131]) tensor([0.1173, 0.1510, 0.4055, 0.3262]) -Greedy action tensor([-1.2687, -0.3792, 0.9278, 1.1252]) tensor([0.0428, 0.1041, 0.3846, 0.4686]) -Greedy action tensor([-1.1586, -0.7106, 0.2999, -0.1982]) tensor([0.1055, 0.1652, 0.4536, 0.2757]) -Greedy action tensor([-1.0909, -0.5782, 0.4119, 0.8846]) tensor([0.0696, 0.1162, 0.3127, 0.5016]) -Greedy action tensor([-1.5237, 0.0111, 0.6016, -0.5238]) tensor([0.0598, 0.2773, 0.5005, 0.1624]) -Greedy action tensor([-1.4612, -0.5451, 0.4534, 0.1615]) tensor([0.0651, 0.1628, 0.4420, 0.3301]) -Greedy action tensor([-0.7944, -0.5559, 0.4170, 0.4359]) tensor([0.1105, 0.1403, 0.3711, 0.3782]) -Greedy action tensor([-1.5479, -0.4518, 1.1217, 1.0304]) tensor([0.0316, 0.0947, 0.4568, 0.4169]) -Greedy action tensor([ 1.4463, -0.1623, -0.1166, -0.0275]) tensor([0.6102, 0.1221, 0.1279, 0.1398]) -Greedy action tensor([ 1.0115, -0.5595, -0.4667, 1.0339]) tensor([0.4067, 0.0845, 0.0928, 0.4160]) -Greedy action tensor([ 0.7484, 0.0060, -0.0722, -0.1367]) tensor([0.4294, 0.2044, 0.1890, 0.1772]) -Greedy action tensor([ 0.7153, -0.3347, -0.1211, 0.2011]) tensor([0.4200, 0.1470, 0.1820, 0.2511]) -Greedy action tensor([ 0.9674, -0.5367, -0.6176, 0.2761]) tensor([0.5187, 0.1153, 0.1063, 0.2598]) -Greedy action tensor([ 0.6652, -0.2305, -0.7187, -0.4521]) tensor([0.5035, 0.2056, 0.1262, 0.1647]) -Greedy action tensor([ 1.0797, -0.6619, -0.6556, 0.6650]) tensor([0.4970, 0.0871, 0.0876, 0.3283]) -Greedy action tensor([ 1.0963, -0.3023, -0.5053, 0.3313]) tensor([0.5225, 0.1290, 0.1053, 0.2431]) -Greedy action tensor([ 1.0987, -0.5074, 0.0277, -0.0275]) tensor([0.5354, 0.1075, 0.1835, 0.1736]) -Greedy action tensor([ 0.8743, -0.1038, -0.6809, 0.0981]) tensor([0.4885, 0.1837, 0.1031, 0.2248]) -Greedy action tensor([ 0.8959, -0.2673, -0.2650, 0.1247]) tensor([0.4789, 0.1497, 0.1500, 0.2215]) -Greedy action tensor([ 1.0501, -0.2704, -0.3733, 0.2668]) tensor([0.5090, 0.1359, 0.1226, 0.2325]) -Greedy action tensor([ 0.9272, -0.4866, -0.4729, 0.1736]) tensor([0.5101, 0.1241, 0.1258, 0.2401]) -Greedy action tensor([ 1.9152, -0.4260, -0.2936, 0.1933]) tensor([0.7221, 0.0695, 0.0793, 0.1291]) -Greedy action tensor([ 0.9502, -0.3962, -0.4841, 0.3622]) tensor([0.4869, 0.1267, 0.1160, 0.2704]) -Greedy action tensor([ 1.0171, -0.6960, -0.6913, 0.8762]) tensor([0.4484, 0.0809, 0.0812, 0.3895]) -Greedy action tensor([ 0.9063, -0.4242, -0.3309, -0.2588]) tensor([0.5358, 0.1416, 0.1555, 0.1671]) -Greedy action tensor([ 1.1420, -0.6280, -0.5312, 0.1824]) tensor([0.5744, 0.0978, 0.1078, 0.2200]) -Greedy action tensor([ 0.5752, -0.4710, 0.1171, -0.1598]) tensor([0.4060, 0.1426, 0.2568, 0.1947]) -Greedy action tensor([ 1.3470, -0.5151, -0.7278, 0.5186]) tensor([0.5822, 0.0904, 0.0731, 0.2543]) -Greedy action tensor([ 0.9271, -0.2859, -0.5145, -0.1198]) tensor([0.5305, 0.1577, 0.1255, 0.1862]) -Greedy action tensor([ 0.9203, -0.4541, -0.5951, 0.1203]) tensor([0.5203, 0.1316, 0.1143, 0.2338]) -Greedy action tensor([ 1.3936, -0.4442, -0.0276, 0.2086]) tensor([0.5860, 0.0933, 0.1415, 0.1792]) -Greedy action tensor([ 0.8748, -0.7237, -0.0500, 0.0476]) tensor([0.4912, 0.0993, 0.1948, 0.2148]) -Greedy action tensor([ 1.3859, -0.3207, -0.3125, -0.1129]) tensor([0.6298, 0.1143, 0.1152, 0.1407]) -Greedy action tensor([ 1.2483, -0.4880, -0.3837, 0.0730]) tensor([0.5951, 0.1048, 0.1164, 0.1837]) -Greedy action tensor([ 1.1911, -0.5025, -0.3244, 0.0279]) tensor([0.5827, 0.1071, 0.1280, 0.1821]) -Greedy action tensor([ 0.9739, -0.4790, -0.5802, 0.1296]) tensor([0.5333, 0.1247, 0.1127, 0.2292]) -Greedy action tensor([ 0.7939, -0.4288, -0.2081, 0.1464]) tensor([0.4577, 0.1347, 0.1680, 0.2395]) -Greedy action tensor([ 0.7225, -0.4288, -0.2476, 0.0523]) tensor([0.4531, 0.1433, 0.1718, 0.2318]) -Greedy action tensor([ 0.9815, -0.6295, -0.2844, 0.7454]) tensor([0.4403, 0.0879, 0.1241, 0.3477]) -Greedy action tensor([ 0.8333, -0.6084, -0.2431, 0.3597]) tensor([0.4545, 0.1075, 0.1549, 0.2831]) -Greedy action tensor([ 0.8279, -0.3334, -0.7447, 0.1305]) tensor([0.4954, 0.1551, 0.1028, 0.2467]) -Greedy action tensor([ 0.6881, 0.0045, -0.3513, -0.4634]) tensor([0.4598, 0.2321, 0.1626, 0.1454]) -Greedy action tensor([ 1.2848, -0.6494, -0.6106, 0.7871]) tensor([0.5256, 0.0760, 0.0790, 0.3195]) -Greedy action tensor([ 0.9506, -0.3091, -0.5039, -0.1997]) tensor([0.5453, 0.1547, 0.1273, 0.1726]) -Greedy action tensor([ 0.9962, -0.1218, -0.0790, -0.1438]) tensor([0.5030, 0.1644, 0.1716, 0.1609]) -Greedy action tensor([ 0.5533, -0.5824, 0.0118, -0.0879]) tensor([0.4116, 0.1322, 0.2395, 0.2168]) -Greedy action tensor([ 1.4070, -0.6681, -0.3024, 0.1009]) tensor([0.6340, 0.0796, 0.1147, 0.1717]) -Greedy action tensor([ 0.4437, -0.1626, -0.4732, -0.2411]) tensor([0.4083, 0.2227, 0.1632, 0.2059]) -Greedy action tensor([ 0.7642, -0.6488, -0.2839, 0.7079]) tensor([0.3938, 0.0959, 0.1381, 0.3723]) -Greedy action tensor([ 0.6703, -0.4128, -0.0624, -0.0151]) tensor([0.4305, 0.1457, 0.2069, 0.2169]) -Greedy action tensor([ 1.3521, -0.4026, -0.1219, 0.1768]) tensor([0.5846, 0.1011, 0.1339, 0.1805]) -Greedy action tensor([ 1.0357e+00, -6.6339e-01, -2.8689e-01, -5.8489e-04]) tensor([0.5543, 0.1014, 0.1477, 0.1967]) -Greedy action tensor([ 0.6558, -0.4728, -0.5947, 0.1441]) tensor([0.4526, 0.1464, 0.1296, 0.2713]) -Greedy action tensor([ 0.9170, -0.3944, -0.0554, 0.1188]) tensor([0.4767, 0.1284, 0.1803, 0.2146]) -Greedy action tensor([ 1.7329, -0.2279, -0.3916, -0.0900]) tensor([0.7033, 0.0990, 0.0840, 0.1136]) -Greedy action tensor([ 1.2816, -0.8964, -0.0016, 0.1805]) tensor([0.5804, 0.0657, 0.1609, 0.1930]) -Greedy action tensor([ 0.9513, -0.5741, -0.7783, 0.5774]) tensor([0.4801, 0.1044, 0.0851, 0.3303]) -Greedy action tensor([ 0.9588, -0.3607, -0.4083, -0.0348]) tensor([0.5284, 0.1412, 0.1347, 0.1957]) -Greedy action tensor([ 0.9792, -0.5632, -0.3366, 0.5181]) tensor([0.4733, 0.1012, 0.1270, 0.2985]) -Greedy action tensor([ 0.5392, -0.2907, -0.1445, 0.1065]) tensor([0.3862, 0.1684, 0.1949, 0.2505]) -Greedy action tensor([ 1.0571, -0.6047, 0.1061, -0.0632]) tensor([0.5257, 0.0998, 0.2031, 0.1715]) -Greedy action tensor([ 0.9114, -0.4083, -0.0911, -0.1414]) tensor([0.5043, 0.1347, 0.1850, 0.1760]) -Greedy action tensor([ 1.5165, -0.8588, -0.4516, 0.2961]) tensor([0.6545, 0.0609, 0.0915, 0.1932]) -Greedy action tensor([ 1.1551, -0.6610, -0.4464, 0.4577]) tensor([0.5370, 0.0874, 0.1083, 0.2674]) -Greedy action tensor([ 0.8468, -0.4175, -0.4725, -0.2129]) tensor([0.5273, 0.1489, 0.1410, 0.1827]) -Greedy action tensor([ 0.8178, -0.3331, -0.3212, 0.3862]) tensor([0.4375, 0.1384, 0.1400, 0.2841]) -Greedy action tensor([ 0.7633, -0.4282, -0.1665, 0.1149]) tensor([0.4502, 0.1367, 0.1777, 0.2354]) -Greedy action tensor([ 0.6535, -0.4257, 0.1589, -0.2864]) tensor([0.4273, 0.1452, 0.2606, 0.1669]) -Greedy action tensor([ 0.7937, -0.5940, 0.0541, 0.0073]) tensor([0.4582, 0.1144, 0.2187, 0.2087]) -Greedy action tensor([ 0.7229, -0.4297, -0.1441, 0.0744]) tensor([0.4427, 0.1398, 0.1860, 0.2315]) -Greedy action tensor([ 0.5133, -0.5421, -0.0564, -0.1334]) tensor([0.4102, 0.1428, 0.2321, 0.2149]) -Greedy action tensor([ 0.8563, -0.3425, -0.0373, -0.0110]) tensor([0.4693, 0.1415, 0.1920, 0.1972]) -Greedy action tensor([ 1.1635, -0.6066, -0.2705, -0.0418]) tensor([0.5854, 0.0997, 0.1395, 0.1754]) -Greedy action tensor([ 0.9568, -0.5346, -0.2934, 0.1329]) tensor([0.5128, 0.1154, 0.1469, 0.2250]) -Greedy action tensor([ 0.6590, -0.4061, 0.1584, -0.2090]) tensor([0.4218, 0.1454, 0.2557, 0.1771]) -Greedy action tensor([ 0.6451, -0.1035, -0.1319, -0.3097]) tensor([0.4315, 0.2041, 0.1984, 0.1661]) -Greedy action tensor([ 1.5196, -0.4522, -0.3515, 0.1672]) tensor([0.6444, 0.0897, 0.0992, 0.1667]) -Greedy action tensor([ 1.1303, -0.6411, -0.4598, 0.6108]) tensor([0.5079, 0.0864, 0.1036, 0.3021]) -Greedy action tensor([ 1.1374, -0.0902, -0.0066, -0.1005]) tensor([0.5259, 0.1541, 0.1675, 0.1525]) -Greedy action tensor([ 1.2220, -0.7250, -0.4931, -0.0206]) tensor([0.6206, 0.0886, 0.1117, 0.1791]) -Greedy action tensor([ 0.5673, -0.4450, -0.1607, 0.0569]) tensor([0.4088, 0.1485, 0.1974, 0.2453]) -Greedy action tensor([ 0.9308, -0.3558, -0.2293, 0.0868]) tensor([0.4951, 0.1368, 0.1552, 0.2129]) -Greedy action tensor([ 0.4597, -0.3617, 0.0669, -0.1804]) tensor([0.3785, 0.1665, 0.2555, 0.1996]) -Greedy action tensor([ 1.1959e+00, -4.3891e-01, -4.1983e-01, -3.6542e-04]) tensor([0.5896, 0.1150, 0.1172, 0.1783]) -Greedy action tensor([ 1.0644, -0.1329, -0.0038, -0.1711]) tensor([0.5164, 0.1560, 0.1775, 0.1501]) -Greedy action tensor([ 0.9048, -0.3988, -0.2375, 0.0914]) tensor([0.4916, 0.1335, 0.1569, 0.2180]) -Greedy action tensor([ 1.0174, -0.3783, -0.5035, -0.2941]) tensor([0.5762, 0.1427, 0.1259, 0.1552]) -Greedy action tensor([0.7264, 0.1131, 0.0743, 0.0094]) tensor([0.3920, 0.2123, 0.2042, 0.1914]) -Greedy action tensor([ 0.9948, -0.4621, -0.1250, -0.1566]) tensor([0.5332, 0.1242, 0.1740, 0.1686]) -Greedy action tensor([ 0.4356, 0.0930, 0.0685, -0.1475]) tensor([0.3377, 0.2398, 0.2340, 0.1885]) -Greedy action tensor([ 0.5641, -0.2991, 0.0494, -0.5090]) tensor([0.4235, 0.1786, 0.2531, 0.1448]) -Greedy action tensor([ 0.3964, 0.1109, 0.0568, -0.2502]) tensor([0.3347, 0.2516, 0.2383, 0.1753]) -Greedy action tensor([ 0.5592, -0.2955, -0.0953, -0.4043]) tensor([0.4298, 0.1828, 0.2234, 0.1640]) -Greedy action tensor([ 0.1821, 0.0951, 0.0644, -0.2281]) tensor([0.2883, 0.2642, 0.2562, 0.1913]) -Greedy action tensor([ 0.4495, 0.0069, 0.1586, -0.4377]) tensor([0.3569, 0.2293, 0.2668, 0.1470]) -Greedy action tensor([ 0.5282, -0.3233, 0.1297, -0.6847]) tensor([0.4174, 0.1782, 0.2803, 0.1241]) -Greedy action tensor([ 0.3987, -0.2527, -0.0293, -0.5031]) tensor([0.3878, 0.2021, 0.2527, 0.1574]) -Greedy action tensor([ 0.3565, -0.0085, 0.0701, -0.2935]) tensor([0.3370, 0.2340, 0.2531, 0.1759]) -Greedy action tensor([ 0.4043, -0.0797, -0.0136, -0.1982]) tensor([0.3543, 0.2184, 0.2333, 0.1940]) -Greedy action tensor([ 0.5560, -0.3810, 0.0602, -0.7627]) tensor([0.4408, 0.1727, 0.2685, 0.1179]) -Greedy action tensor([ 0.5839, -0.2831, -0.0590, -0.4320]) tensor([0.4333, 0.1821, 0.2278, 0.1569]) -Greedy action tensor([ 0.6487, -0.1275, -0.0377, -0.2612]) tensor([0.4226, 0.1945, 0.2128, 0.1701]) -Greedy action tensor([ 0.3933, -0.0818, 0.3655, -0.2871]) tensor([0.3225, 0.2005, 0.3136, 0.1633]) -Greedy action tensor([ 0.9075, -0.6200, -0.1638, -0.5291]) tensor([0.5564, 0.1208, 0.1906, 0.1323]) -Greedy action tensor([ 0.1896, 0.0703, -0.0305, -0.1871]) tensor([0.2962, 0.2629, 0.2377, 0.2032]) -Greedy action tensor([ 0.4939, -0.4026, 0.0286, -0.3809]) tensor([0.4077, 0.1663, 0.2560, 0.1700]) -Greedy action tensor([ 0.8263, -0.2859, -0.0657, -0.2713]) tensor([0.4825, 0.1587, 0.1978, 0.1610]) -Greedy action tensor([ 0.4245, -0.2175, -0.0079, -0.3802]) tensor([0.3813, 0.2007, 0.2475, 0.1705]) -Greedy action tensor([ 0.3625, -0.2260, -0.0119, -0.3494]) tensor([0.3658, 0.2031, 0.2516, 0.1795]) -Greedy action tensor([ 0.4982, -0.4052, 0.2252, -0.4675]) tensor([0.3926, 0.1591, 0.2988, 0.1495]) -Greedy action tensor([ 1.1500, -0.7106, 0.0945, -0.6987]) tensor([0.6020, 0.0937, 0.2095, 0.0948]) -Greedy action tensor([ 0.6714, -0.2556, -0.0173, -0.3175]) tensor([0.4405, 0.1743, 0.2213, 0.1639]) -Greedy action tensor([ 0.4552, -0.2207, -0.1916, -0.3523]) tensor([0.4035, 0.2053, 0.2113, 0.1799]) -Greedy action tensor([ 0.1903, 0.1553, 0.0263, -0.3146]) tensor([0.2926, 0.2825, 0.2483, 0.1766]) -Greedy action tensor([ 0.5359, -0.1915, 0.0427, -0.5277]) tensor([0.4100, 0.1981, 0.2504, 0.1415]) -Greedy action tensor([ 0.2628, -0.2218, 0.1460, -0.2762]) tensor([0.3237, 0.1994, 0.2880, 0.1888]) -Greedy action tensor([ 0.9077, -0.7702, 0.0027, -0.6170]) tensor([0.5528, 0.1032, 0.2236, 0.1203]) -Greedy action tensor([ 0.3751, -0.0885, -0.0932, -0.4378]) tensor([0.3706, 0.2331, 0.2320, 0.1644]) -Greedy action tensor([ 1.0224, -0.6283, -0.0887, -0.6728]) tensor([0.5866, 0.1126, 0.1931, 0.1077]) -Greedy action tensor([ 1.0800, -0.5099, 0.0636, -0.7753]) tensor([0.5806, 0.1184, 0.2101, 0.0908]) -Greedy action tensor([ 0.2723, 0.0630, 0.0357, -0.2159]) tensor([0.3111, 0.2524, 0.2456, 0.1909]) -Greedy action tensor([ 0.2114, -0.1301, 0.0241, -0.2970]) tensor([0.3183, 0.2262, 0.2640, 0.1915]) -Greedy action tensor([ 0.4692, -0.2217, 0.0355, -0.2663]) tensor([0.3805, 0.1907, 0.2466, 0.1823]) -Greedy action tensor([ 0.4254, -0.0230, 0.0160, -0.1718]) tensor([0.3505, 0.2239, 0.2327, 0.1929]) -Greedy action tensor([ 0.2493, 0.2402, 0.1007, -0.3034]) tensor([0.2917, 0.2891, 0.2514, 0.1678]) -Greedy action tensor([ 0.4061, -0.2821, 0.2867, -0.4028]) tensor([0.3527, 0.1772, 0.3130, 0.1571]) -Greedy action tensor([ 0.3908, 0.0683, 0.0064, -0.1684]) tensor([0.3359, 0.2433, 0.2287, 0.1920]) -Greedy action tensor([ 0.1810, 0.1695, 0.1449, -0.2369]) tensor([0.2769, 0.2737, 0.2671, 0.1823]) -Greedy action tensor([ 0.4030, 0.1639, 0.1421, -0.0740]) tensor([0.3146, 0.2477, 0.2424, 0.1953]) -Greedy action tensor([ 0.8242, -0.4123, 0.0785, -0.4280]) tensor([0.4876, 0.1416, 0.2313, 0.1394]) -Greedy action tensor([ 0.3734, -0.2710, -0.0550, -0.4616]) tensor([0.3831, 0.2011, 0.2496, 0.1662]) -Greedy action tensor([ 0.3328, 0.0791, 0.0780, -0.2278]) tensor([0.3203, 0.2485, 0.2483, 0.1829]) -Greedy action tensor([ 0.8414, -0.4434, -0.0016, -0.3776]) tensor([0.4993, 0.1382, 0.2149, 0.1476]) -Greedy action tensor([ 0.5027, -0.2777, 0.1586, -0.5807]) tensor([0.3991, 0.1829, 0.2829, 0.1351]) -Greedy action tensor([ 0.4703, 0.0722, 0.0535, -0.2202]) tensor([0.3531, 0.2371, 0.2328, 0.1770]) -Greedy action tensor([ 0.7348, -0.3954, -0.0527, -0.4423]) tensor([0.4794, 0.1548, 0.2181, 0.1477]) -Greedy action tensor([-0.0910, -0.1762, -0.0417, -0.1920]) tensor([0.2582, 0.2371, 0.2713, 0.2334]) -Greedy action tensor([ 0.3302, 0.1574, 0.1236, -0.1638]) tensor([0.3063, 0.2577, 0.2491, 0.1869]) -Greedy action tensor([ 0.8175, -0.1286, -0.0570, -0.3462]) tensor([0.4722, 0.1833, 0.1969, 0.1475]) -Greedy action tensor([ 0.4034, -0.2546, -0.0135, -0.4863]) tensor([0.3864, 0.2001, 0.2547, 0.1587]) -Greedy action tensor([ 0.6721, -0.2278, 0.0112, -0.4584]) tensor([0.4453, 0.1811, 0.2299, 0.1438]) -Greedy action tensor([ 0.7041, -0.2578, -0.0252, -0.2982]) tensor([0.4482, 0.1713, 0.2161, 0.1645]) -Greedy action tensor([ 0.6897, -0.4228, 0.1980, -0.2922]) tensor([0.4320, 0.1420, 0.2642, 0.1618]) -Greedy action tensor([ 0.3592, 0.1355, 0.1778, -0.2775]) tensor([0.3162, 0.2528, 0.2637, 0.1673]) -Greedy action tensor([ 0.4684, 0.0776, 0.0281, -0.3304]) tensor([0.3610, 0.2442, 0.2324, 0.1624]) -Greedy action tensor([ 0.4757, -0.2129, 0.0604, -0.4629]) tensor([0.3916, 0.1967, 0.2585, 0.1532]) -Greedy action tensor([ 0.1205, -0.0273, -0.0134, -0.2247]) tensor([0.2902, 0.2504, 0.2539, 0.2055]) -Greedy action tensor([ 0.4375, -0.1900, -0.0146, -0.2761]) tensor([0.3759, 0.2007, 0.2392, 0.1842]) -Greedy action tensor([ 0.8259, -0.3317, 0.0152, -0.7138]) tensor([0.5068, 0.1593, 0.2253, 0.1087]) -Greedy action tensor([ 0.5120, -0.3459, 0.0921, -0.4056]) tensor([0.4031, 0.1709, 0.2649, 0.1610]) -Greedy action tensor([ 0.3197, 0.0288, 0.0378, -0.2881]) tensor([0.3282, 0.2454, 0.2476, 0.1788]) -Greedy action tensor([ 0.5809, -0.1767, 0.0055, -0.2182]) tensor([0.4031, 0.1890, 0.2267, 0.1813]) -Greedy action tensor([ 0.2428, -0.0388, 0.1184, -0.3639]) tensor([0.3142, 0.2371, 0.2774, 0.1713]) -Greedy action tensor([ 0.4026, -0.2551, 0.0887, -0.5148]) tensor([0.3776, 0.1956, 0.2759, 0.1509]) -Greedy action tensor([ 0.6082, -0.3277, 0.0801, -0.3791]) tensor([0.4247, 0.1666, 0.2505, 0.1582]) -Greedy action tensor([ 0.3934, 0.0641, -0.1230, -0.0493]) tensor([0.3380, 0.2432, 0.2017, 0.2171]) -Greedy action tensor([ 0.3571, 0.0745, 0.1667, -0.2973]) tensor([0.3226, 0.2432, 0.2666, 0.1677]) -Greedy action tensor([ 0.3349, -0.1276, 0.1008, -0.2352]) tensor([0.3348, 0.2109, 0.2650, 0.1893]) -Greedy action tensor([ 0.8922, -0.5282, 0.0398, -0.5795]) tensor([0.5270, 0.1273, 0.2247, 0.1210]) -Greedy action tensor([ 0.5345, -0.1497, -0.0423, -0.2570]) tensor([0.3969, 0.2002, 0.2229, 0.1799]) -Greedy action tensor([ 0.8323, -0.5808, 0.0625, -0.3865]) tensor([0.4995, 0.1216, 0.2313, 0.1476]) -Greedy action tensor([ 0.5834, -0.1770, -0.0228, -0.3913]) tensor([0.4184, 0.1956, 0.2282, 0.1579]) -Greedy action tensor([ 0.4141, 0.0203, 0.0019, -0.0454]) tensor([0.3369, 0.2272, 0.2231, 0.2128]) -Greedy action tensor([ 0.4706, -0.3729, 0.0228, -0.4418]) tensor([0.4047, 0.1741, 0.2586, 0.1625]) -Greedy action tensor([ 0.6084, -0.3787, -0.0320, -0.4329]) tensor([0.4439, 0.1654, 0.2340, 0.1567]) -Greedy action tensor([ 0.4521, -0.1728, 0.0220, -0.3707]) tensor([0.3810, 0.2039, 0.2478, 0.1673]) -Greedy action tensor([ 0.4819, -0.2828, -0.1053, -0.2896]) tensor([0.4026, 0.1874, 0.2238, 0.1861]) -Greedy action tensor([ 0.7552, -0.3098, -0.1114, -0.3792]) tensor([0.4792, 0.1652, 0.2014, 0.1541]) -Greedy action tensor([ 0.6209, -0.3736, -0.0583, -0.4564]) tensor([0.4510, 0.1668, 0.2287, 0.1536]) -Greedy action tensor([ 0.6641, -0.0794, -0.0488, -0.3436]) tensor([0.4291, 0.2040, 0.2103, 0.1566]) -Greedy action tensor([ 1.5247, -1.0726, 0.4899, 1.0164]) tensor([0.4923, 0.0367, 0.1749, 0.2961]) -Greedy action tensor([ 0.9217, -0.6302, -0.0721, 1.4195]) tensor([0.3099, 0.0656, 0.1147, 0.5098]) -Greedy action tensor([ 1.0940, 0.2978, -0.2753, 0.5300]) tensor([0.4397, 0.1983, 0.1118, 0.2502]) -Greedy action tensor([ 0.3440, 0.6999, -0.2341, 1.0711]) tensor([0.1977, 0.2822, 0.1109, 0.4091]) -Greedy action tensor([ 0.5245, 0.2160, -0.0198, -0.2257]) tensor([0.3588, 0.2636, 0.2082, 0.1695]) -Greedy action tensor([ 0.9771, 0.6318, -0.4337, 0.5223]) tensor([0.3866, 0.2737, 0.0943, 0.2453]) -Greedy action tensor([ 0.9534, -0.4300, -0.3006, 1.2812]) tensor([0.3420, 0.0857, 0.0976, 0.4747]) -Greedy action tensor([ 0.2665, 0.6294, -0.7177, 0.9136]) tensor([0.2118, 0.3045, 0.0792, 0.4046]) -Greedy action tensor([ 0.4414, 0.1468, -0.3115, -0.5284]) tensor([0.3853, 0.2870, 0.1815, 0.1461]) -Greedy action tensor([ 1.0281, -0.0622, -0.2198, 0.6152]) tensor([0.4376, 0.1471, 0.1257, 0.2896]) -Greedy action tensor([1.3597, 0.0077, 0.0671, 0.3380]) tensor([0.5282, 0.1367, 0.1450, 0.1901]) -Greedy action tensor([ 0.7811, 0.0055, 0.2973, -0.8925]) tensor([0.4416, 0.2033, 0.2722, 0.0828]) -Greedy action tensor([ 0.2597, 0.5338, -0.4900, 0.4147]) tensor([0.2528, 0.3325, 0.1195, 0.2952]) -Greedy action tensor([ 1.2318, -0.0518, 0.0353, 0.1813]) tensor([0.5184, 0.1436, 0.1567, 0.1813]) -Greedy action tensor([ 0.7917, 0.3053, -0.2040, -0.1201]) tensor([0.4191, 0.2577, 0.1548, 0.1684]) -Greedy action tensor([ 0.6736, -0.1904, -0.3484, 1.4279]) tensor([0.2559, 0.1079, 0.0921, 0.5441]) -Greedy action tensor([ 0.1639, 0.5938, -0.1116, 0.3838]) tensor([0.2202, 0.3384, 0.1671, 0.2743]) -Greedy action tensor([ 0.5278, -1.2265, 0.2420, 2.0858]) tensor([0.1498, 0.0259, 0.1126, 0.7116]) -Greedy action tensor([ 1.5661, 1.0478, -0.7779, 0.8914]) tensor([0.4544, 0.2706, 0.0436, 0.2314]) -Greedy action tensor([ 1.7287, 0.1960, -0.0924, 1.3046]) tensor([0.4921, 0.1063, 0.0796, 0.3220]) -Greedy action tensor([ 0.9516, -0.3027, 1.1721, 0.5451]) tensor([0.3127, 0.0892, 0.3899, 0.2082]) -Greedy action tensor([0.1042, 0.1807, 0.8556, 0.8523]) tensor([0.1584, 0.1710, 0.3358, 0.3347]) -Greedy action tensor([1.7683e+00, 1.2594e-04, 1.2930e+00, 1.0300e+00]) tensor([0.4405, 0.0752, 0.2738, 0.2105]) -Greedy action tensor([ 1.3518, 0.3767, -0.3379, 0.3703]) tensor([0.5164, 0.1948, 0.0953, 0.1935]) -Greedy action tensor([ 1.3978, -0.9488, 0.6643, 1.2082]) tensor([0.4161, 0.0398, 0.1998, 0.3443]) -Greedy action tensor([ 0.6519, -1.4710, -0.1365, 0.3973]) tensor([0.4256, 0.0509, 0.1935, 0.3299]) -Greedy action tensor([ 0.6827, -0.1483, -0.0509, 0.0622]) tensor([0.4076, 0.1776, 0.1957, 0.2191]) -Greedy action tensor([ 1.8273, 0.3507, -0.0780, 0.5349]) tensor([0.6054, 0.1383, 0.0901, 0.1662]) -Greedy action tensor([0.4771, 0.1500, 0.9778, 0.3065]) tensor([0.2373, 0.1711, 0.3915, 0.2001]) -Greedy action tensor([ 1.0991, 0.1060, -0.3246, 1.9397]) tensor([0.2545, 0.0943, 0.0613, 0.5899]) -Greedy action tensor([0.9944, 0.7697, 0.2119, 0.3144]) tensor([0.3620, 0.2891, 0.1655, 0.1834]) -Greedy action tensor([ 0.3260, 0.2952, -0.6576, 0.9588]) tensor([0.2366, 0.2294, 0.0885, 0.4455]) -Greedy action tensor([-0.0395, -0.2099, -0.8420, 1.6442]) tensor([0.1303, 0.1098, 0.0584, 0.7015]) -Greedy action tensor([ 0.8286, -1.0634, 0.0941, 0.6165]) tensor([0.4099, 0.0618, 0.1967, 0.3316]) -Greedy action tensor([ 0.3510, 0.3012, -0.0736, -0.0022]) tensor([0.3023, 0.2876, 0.1977, 0.2124]) -Greedy action tensor([ 1.1213, 0.6888, -0.4909, 0.3178]) tensor([0.4355, 0.2826, 0.0869, 0.1950]) -Greedy action tensor([ 1.0470, 0.3393, -0.2573, 0.3222]) tensor([0.4447, 0.2192, 0.1207, 0.2154]) -Greedy action tensor([ 0.4528, -0.3329, 0.0049, 0.6526]) tensor([0.3016, 0.1375, 0.1927, 0.3683]) -Greedy action tensor([ 0.6433, -0.0930, 1.3068, 1.9095]) tensor([0.1435, 0.0687, 0.2786, 0.5091]) -Greedy action tensor([ 0.4757, -0.7719, 0.5918, 0.2061]) tensor([0.3151, 0.0905, 0.3538, 0.2406]) -Greedy action tensor([ 0.5765, 0.8005, 1.5877, -0.4911]) tensor([0.1871, 0.2341, 0.5144, 0.0643]) -Greedy action tensor([ 1.5670, -0.5311, 0.1325, 0.4721]) tensor([0.5898, 0.0724, 0.1405, 0.1973]) -Greedy action tensor([ 0.8107, -1.5182, 1.3390, 0.1356]) tensor([0.3028, 0.0295, 0.5135, 0.1542]) -Greedy action tensor([ 0.9449, -0.5089, 0.3315, 0.0698]) tensor([0.4562, 0.1066, 0.2470, 0.1901]) -Greedy action tensor([ 1.0617, 0.3270, -0.3055, 0.0483]) tensor([0.4768, 0.2287, 0.1215, 0.1731]) -Greedy action tensor([ 1.5359, -0.6454, 0.1397, 1.2912]) tensor([0.4666, 0.0527, 0.1155, 0.3653]) -Greedy action tensor([ 1.9924, -0.4271, -0.3814, 1.1232]) tensor([0.6245, 0.0556, 0.0582, 0.2618]) -Greedy action tensor([ 1.3530, -0.3115, 1.3412, 1.0369]) tensor([0.3440, 0.0651, 0.3400, 0.2508]) -Greedy action tensor([ 0.8091, -1.2098, 1.1066, 0.1487]) tensor([0.3338, 0.0443, 0.4494, 0.1724]) -Greedy action tensor([ 1.5584, -0.8431, -0.3898, 1.1780]) tensor([0.5217, 0.0473, 0.0744, 0.3566]) -Greedy action tensor([-0.1841, -0.8497, 0.5169, 0.1057]) tensor([0.2055, 0.1056, 0.4143, 0.2746]) -Greedy action tensor([ 0.3821, 0.2135, -0.4262, 0.3182]) tensor([0.3097, 0.2617, 0.1380, 0.2906]) -Greedy action tensor([ 0.7104, 0.2526, 0.8109, -0.3941]) tensor([0.3258, 0.2061, 0.3602, 0.1079]) -Greedy action tensor([ 0.7106, 0.1257, -0.8472, 1.4712]) tensor([0.2559, 0.1426, 0.0539, 0.5476]) -Greedy action tensor([ 1.7888, -0.6210, 0.5396, 1.5708]) tensor([0.4586, 0.0412, 0.1315, 0.3687]) -Greedy action tensor([-0.3585, -1.5418, -0.0807, 0.7267]) tensor([0.1790, 0.0548, 0.2363, 0.5299]) -Greedy action tensor([1.4434, 0.0529, 1.2260, 1.8704]) tensor([0.2789, 0.0694, 0.2244, 0.4274]) -Greedy action tensor([ 0.2477, 1.0913, 1.2084, -0.6414]) tensor([0.1575, 0.3661, 0.4116, 0.0647]) -Greedy action tensor([ 1.4766, -0.0881, -0.2344, 1.6115]) tensor([0.3946, 0.0825, 0.0713, 0.4516]) -Greedy action tensor([1.3281, 0.2700, 0.4676, 0.4468]) tensor([0.4578, 0.1589, 0.1936, 0.1896]) -Greedy action tensor([ 0.5818, -0.3128, -0.2555, 1.2311]) tensor([0.2663, 0.1088, 0.1153, 0.5097]) -Greedy action tensor([0.9832, 0.6670, 0.0144, 0.0664]) tensor([0.3987, 0.2906, 0.1513, 0.1594]) -Greedy action tensor([ 0.0394, 0.3425, -0.5358, 1.0826]) tensor([0.1738, 0.2353, 0.0978, 0.4932]) -Greedy action tensor([ 1.2052, -1.2564, -0.7069, 0.7712]) tensor([0.5316, 0.0453, 0.0786, 0.3445]) -Greedy action tensor([0.4653, 0.0301, 0.3578, 0.8231]) tensor([0.2515, 0.1628, 0.2259, 0.3598]) -Greedy action tensor([ 0.5632, 0.9775, -0.1880, 0.5585]) tensor([0.2512, 0.3802, 0.1185, 0.2500]) -Greedy action tensor([ 0.8337, 0.1385, -0.3968, 0.2698]) tensor([0.4237, 0.2114, 0.1238, 0.2411]) -Greedy action tensor([ 1.0394, 0.4171, -0.5515, 1.1505]) tensor([0.3499, 0.1878, 0.0713, 0.3910]) -Greedy action tensor([ 0.5607, -0.0561, -0.4611, 1.5849]) tensor([0.2135, 0.1152, 0.0768, 0.5945]) -Greedy action tensor([ 1.3700, 0.2422, -0.4123, 1.2739]) tensor([0.4166, 0.1349, 0.0701, 0.3784]) -Greedy action tensor([ 0.3840, -1.2746, 0.1547, 0.2545]) tensor([0.3492, 0.0665, 0.2776, 0.3067]) -Greedy action tensor([0.7882, 0.1672, 0.0448, 0.0052]) tensor([0.4049, 0.2176, 0.1925, 0.1850]) -Greedy action tensor([ 1.2400, 0.9500, -0.5243, 0.4858]) tensor([0.4184, 0.3131, 0.0717, 0.1968]) -Greedy action tensor([ 1.0864, -0.8001, 0.9678, 0.2351]) tensor([0.4054, 0.0615, 0.3601, 0.1730]) -Greedy action tensor([ 0.7107, 0.3474, -0.2091, 0.0869]) tensor([0.3803, 0.2644, 0.1516, 0.2038]) -Greedy action tensor([ 1.0016, -0.4047, 0.8756, -0.0105]) tensor([0.4016, 0.0984, 0.3540, 0.1460]) -Greedy action tensor([0.5913, 0.2345, 1.5281, 0.7916]) tensor([0.1827, 0.1279, 0.4662, 0.2232]) -Greedy action tensor([ 0.6889, 0.6643, 0.4287, -0.0217]) tensor([0.3089, 0.3013, 0.2381, 0.1517]) -Greedy action tensor([ 1.8000, -1.2280, 1.2044, 0.9826]) tensor([0.4899, 0.0237, 0.2701, 0.2163]) -Greedy action tensor([0.3986, 0.3112, 0.5380, 0.5027]) tensor([0.2395, 0.2194, 0.2753, 0.2658]) -Greedy action tensor([ 1.0315, -0.1325, 0.4043, 1.0928]) tensor([0.3437, 0.1073, 0.1836, 0.3654]) -Greedy action tensor([ 1.2901, -1.0333, 0.1588, 1.1944]) tensor([0.4293, 0.0420, 0.1385, 0.3901]) -Greedy action tensor([-1.9555, -0.8787, 0.2818, -0.2431]) tensor([0.0531, 0.1558, 0.4971, 0.2941]) -Greedy action tensor([-1.7743, -0.4719, 1.2876, 0.8672]) tensor([0.0250, 0.0918, 0.5331, 0.3501]) -Greedy action tensor([-1.8770, -0.4471, 0.6576, -0.0874]) tensor([0.0421, 0.1757, 0.5304, 0.2518]) -Greedy action tensor([-1.2012, -0.6089, 0.3194, 0.2999]) tensor([0.0843, 0.1523, 0.3854, 0.3780]) -Greedy action tensor([-1.3186, -0.5536, 0.5932, -0.3697]) tensor([0.0800, 0.1720, 0.5413, 0.2067]) -Greedy action tensor([-1.9226, -0.4531, 0.6569, -0.1649]) tensor([0.0411, 0.1786, 0.5420, 0.2383]) -Greedy action tensor([-1.8683, -0.2967, 0.2281, -0.3048]) tensor([0.0534, 0.2571, 0.4345, 0.2550]) -Greedy action tensor([-1.2179, -0.5558, 0.4700, 0.8190]) tensor([0.0624, 0.1211, 0.3377, 0.4788]) -Greedy action tensor([-1.9003, -0.4460, 0.6416, -0.1578]) tensor([0.0422, 0.1807, 0.5361, 0.2410]) -Greedy action tensor([-0.9623, -0.6352, 0.4641, 0.9170]) tensor([0.0763, 0.1059, 0.3178, 0.4999]) -Greedy action tensor([-1.3481, -0.7406, 0.6035, -0.4113]) tensor([0.0805, 0.1477, 0.5665, 0.2053]) -Greedy action tensor([-1.4749, -0.2280, -0.0153, -0.4301]) tensor([0.0860, 0.2993, 0.3702, 0.2445]) -Greedy action tensor([-1.5798, -0.4782, 1.1097, 0.9918]) tensor([0.0314, 0.0946, 0.4627, 0.4113]) -Greedy action tensor([-0.2798, -0.1977, 0.1857, 0.2251]) tensor([0.1874, 0.2035, 0.2986, 0.3105]) -Greedy action tensor([-1.9866, -0.7950, 0.5413, -0.0788]) tensor([0.0424, 0.1398, 0.5318, 0.2860]) -Greedy action tensor([-1.8270, -0.4523, 0.5989, -0.1065]) tensor([0.0458, 0.1809, 0.5176, 0.2557]) -Greedy action tensor([-1.7513, -0.2931, 0.1549, -0.2999]) tensor([0.0614, 0.2638, 0.4128, 0.2620]) -Greedy action tensor([-1.3638, -0.5834, 0.6286, -0.1871]) tensor([0.0727, 0.1586, 0.5330, 0.2357]) -Greedy action tensor([-1.5750, -0.5456, 0.4751, 0.0852]) tensor([0.0594, 0.1663, 0.4616, 0.3126]) -Greedy action tensor([-1.4717, -0.3922, 0.5538, 0.4957]) tensor([0.0535, 0.1576, 0.4059, 0.3830]) -Greedy action tensor([-1.1180, -0.6892, 0.0501, -0.2884]) tensor([0.1243, 0.1909, 0.3998, 0.2850]) -Greedy action tensor([-1.7511, -0.5156, 0.5670, -0.0776]) tensor([0.0502, 0.1726, 0.5097, 0.2675]) -Greedy action tensor([-1.5143, -0.5387, 0.4557, 0.1314]) tensor([0.0625, 0.1657, 0.4479, 0.3239]) -Greedy action tensor([-0.9466, -0.5055, 0.5285, 1.1941]) tensor([0.0648, 0.1007, 0.2833, 0.5512]) -Greedy action tensor([-1.1534, -0.3469, 0.8654, 1.2009]) tensor([0.0469, 0.1052, 0.3535, 0.4944]) -Greedy action tensor([-0.6750, -0.5781, 0.1831, 0.2747]) tensor([0.1419, 0.1564, 0.3348, 0.3669]) -Greedy action tensor([-0.9155, -0.6385, 0.2762, 0.1453]) tensor([0.1176, 0.1552, 0.3873, 0.3398]) -Greedy action tensor([-0.9116, -0.3337, 0.7419, 1.2977]) tensor([0.0584, 0.1041, 0.3053, 0.5322]) -Greedy action tensor([-1.4808, -0.4828, 0.4414, 0.0797]) tensor([0.0653, 0.1772, 0.4465, 0.3110]) -Greedy action tensor([-0.7998, -0.4726, 0.4418, -0.2942]) tensor([0.1332, 0.1848, 0.4611, 0.2209]) -Greedy action tensor([-0.9288, -0.5680, 0.2050, 0.3246]) tensor([0.1106, 0.1586, 0.3436, 0.3872]) -Greedy action tensor([-1.2133, -0.3578, 0.8078, 1.1036]) tensor([0.0475, 0.1118, 0.3586, 0.4821]) -Greedy action tensor([-1.8514, -0.4627, 0.6171, -0.1024]) tensor([0.0443, 0.1777, 0.5232, 0.2548]) -Greedy action tensor([-1.6327, -0.4942, 0.5507, 0.1785]) tensor([0.0523, 0.1633, 0.4644, 0.3200]) -Greedy action tensor([-1.3496, -0.8093, -0.2052, -0.4569]) tensor([0.1205, 0.2068, 0.3784, 0.2942]) -Greedy action tensor([-1.1660, -0.5879, 0.2483, 0.3811]) tensor([0.0863, 0.1538, 0.3548, 0.4052]) -Greedy action tensor([-1.2118, -0.4227, 0.2034, -0.2400]) tensor([0.1004, 0.2210, 0.4133, 0.2653]) -Greedy action tensor([-1.0720, -0.6020, 0.2269, 0.2906]) tensor([0.0983, 0.1573, 0.3603, 0.3840]) -Greedy action tensor([-1.2028, -0.1864, 0.6398, 0.9079]) tensor([0.0546, 0.1507, 0.3444, 0.4503]) -Greedy action tensor([-1.7846, -0.7874, 0.0805, -0.4095]) tensor([0.0708, 0.1919, 0.4572, 0.2801]) -Greedy action tensor([-1.8270, -0.5081, 0.3604, -0.1422]) tensor([0.0525, 0.1964, 0.4680, 0.2831]) -Greedy action tensor([-1.6393, -0.2564, 0.4790, -0.0132]) tensor([0.0544, 0.2168, 0.4523, 0.2765]) -Greedy action tensor([-1.6693, -0.3949, 0.5049, -0.0063]) tensor([0.0536, 0.1918, 0.4717, 0.2829]) -Greedy action tensor([-1.9347, -0.9015, 0.2475, -0.2502]) tensor([0.0554, 0.1555, 0.4908, 0.2983]) -Greedy action tensor([-1.9694, -0.6957, 0.9307, 0.1763]) tensor([0.0320, 0.1142, 0.5807, 0.2731]) -Greedy action tensor([-1.5148, -0.5329, 0.4248, 0.1564]) tensor([0.0627, 0.1674, 0.4363, 0.3336]) -Greedy action tensor([-1.9894, -0.8132, 0.9987, 0.4103]) tensor([0.0285, 0.0923, 0.5653, 0.3139]) -Greedy action tensor([-1.9969, -0.6643, 0.8529, 0.0624]) tensor([0.0334, 0.1267, 0.5778, 0.2621]) -Greedy action tensor([-1.9361, -0.8412, 0.2393, -0.2655]) tensor([0.0552, 0.1650, 0.4862, 0.2935]) -Greedy action tensor([-1.6393, -0.7547, 0.5270, 0.0255]) tensor([0.0574, 0.1389, 0.5006, 0.3031]) -Greedy action tensor([-1.7139, -0.6626, 0.3166, -0.2019]) tensor([0.0624, 0.1787, 0.4757, 0.2832]) -Greedy action tensor([-1.6301, -0.5189, 0.5004, 0.0089]) tensor([0.0568, 0.1725, 0.4782, 0.2925]) -Greedy action tensor([-0.4735, -0.4706, 0.1748, 0.1314]) tensor([0.1740, 0.1745, 0.3328, 0.3187]) -Greedy action tensor([-1.4226, -0.5474, 0.7176, -0.2800]) tensor([0.0665, 0.1596, 0.5654, 0.2085]) -Greedy action tensor([-1.7376, -0.5369, 0.5829, -0.0394]) tensor([0.0501, 0.1664, 0.5099, 0.2736]) -Greedy action tensor([-1.4321, -0.6135, 0.4328, 0.0654]) tensor([0.0705, 0.1597, 0.4548, 0.3150]) -Greedy action tensor([-1.9498, 0.2444, 0.6094, 0.1504]) tensor([0.0322, 0.2888, 0.4160, 0.2629]) -Greedy action tensor([-1.4067, -0.9488, 0.5992, 0.9506]) tensor([0.0486, 0.0768, 0.3612, 0.5133]) -Greedy action tensor([-1.8015, -0.4787, 0.5906, -0.0760]) tensor([0.0469, 0.1762, 0.5133, 0.2636]) -Greedy action tensor([-1.7033, -0.5082, 0.5342, -0.0360]) tensor([0.0527, 0.1741, 0.4939, 0.2793]) -Greedy action tensor([-1.0501, -0.5554, 0.2508, 0.5460]) tensor([0.0889, 0.1458, 0.3266, 0.4387]) -Greedy action tensor([-1.8278, -0.3595, 0.1565, -0.3692]) tensor([0.0591, 0.2567, 0.4300, 0.2542]) -Greedy action tensor([-1.3952, -0.6113, 0.4003, 0.1694]) tensor([0.0715, 0.1565, 0.4304, 0.3416]) -Greedy action tensor([-1.0812, -0.5042, 1.3298, 1.3794]) tensor([0.0390, 0.0695, 0.4347, 0.4568]) -Greedy action tensor([-1.3678, -0.5838, 0.7235, -0.1494]) tensor([0.0682, 0.1493, 0.5519, 0.2306]) -Greedy action tensor([-1.9898, -0.9660, 0.2799, -0.2937]) tensor([0.0529, 0.1472, 0.5116, 0.2883]) -Greedy action tensor([-0.5804, -0.5890, 0.2533, 0.0722]) tensor([0.1609, 0.1596, 0.3704, 0.3091]) -Greedy action tensor([-0.8451, -0.7159, 0.4534, 0.3406]) tensor([0.1102, 0.1254, 0.4037, 0.3607]) -Greedy action tensor([-1.7591, -0.6391, 1.1908, 0.8063]) tensor([0.0276, 0.0847, 0.5281, 0.3595]) -Greedy action tensor([-1.7888, -0.3728, 0.5737, -0.0661]) tensor([0.0469, 0.1931, 0.4976, 0.2624]) -Greedy action tensor([-1.5031, -0.5128, 0.4547, 0.1512]) tensor([0.0625, 0.1682, 0.4426, 0.3267]) -Greedy action tensor([-0.2319, -0.0802, 0.1703, 0.2229]) tensor([0.1910, 0.2223, 0.2856, 0.3010]) -Greedy action tensor([-0.7861, -0.7676, 0.5149, -0.2993]) tensor([0.1366, 0.1392, 0.5019, 0.2223]) -Greedy action tensor([-1.7748, -0.4727, 0.5757, -0.0648]) tensor([0.0483, 0.1777, 0.5069, 0.2671]) -Greedy action tensor([-0.7033, -0.3387, 0.2937, 0.7495]) tensor([0.1061, 0.1528, 0.2876, 0.4536]) -Greedy action tensor([-1.7815, -0.5679, 0.1067, -0.3776]) tensor([0.0665, 0.2237, 0.4392, 0.2706]) -Greedy action tensor([-1.7371, -0.4966, 1.1456, 0.8416]) tensor([0.0282, 0.0974, 0.5032, 0.3713]) -Greedy action tensor([-1.2759, -0.5357, 0.3539, 0.2541]) tensor([0.0780, 0.1636, 0.3981, 0.3603]) -Greedy action tensor([-1.4253, -0.5767, 0.4790, 0.1361]) tensor([0.0675, 0.1577, 0.4532, 0.3216]) -Greedy action tensor([-1.8251, -0.5731, 0.8438, 0.2878]) tensor([0.0368, 0.1286, 0.5304, 0.3042]) -Greedy action tensor([-2.0230, -0.8095, 0.3892, -0.1321]) tensor([0.0451, 0.1519, 0.5038, 0.2991]) -Greedy action tensor([ 0.1446, 0.1977, 0.1268, -0.2136]) tensor([0.2677, 0.2823, 0.2630, 0.1871]) -Greedy action tensor([ 0.5072, -0.1393, 0.0105, -0.5027]) tensor([0.4005, 0.2098, 0.2438, 0.1459]) -Greedy action tensor([ 0.2918, -0.1013, 0.0520, -0.1961]) tensor([0.3251, 0.2194, 0.2558, 0.1996]) -Greedy action tensor([ 0.4202, 0.0120, -0.0989, -0.1302]) tensor([0.3525, 0.2344, 0.2098, 0.2033]) -Greedy action tensor([ 0.5934, -0.3450, -0.0795, -0.3404]) tensor([0.4358, 0.1705, 0.2224, 0.1713]) -Greedy action tensor([ 0.2426, -0.2139, 0.0057, -0.2834]) tensor([0.3318, 0.2102, 0.2618, 0.1961]) -Greedy action tensor([ 0.6941, -0.1739, 0.0280, -0.4306]) tensor([0.4428, 0.1859, 0.2275, 0.1438]) -Greedy action tensor([ 0.6514, -0.5190, -0.0407, -0.5135]) tensor([0.4711, 0.1462, 0.2358, 0.1470]) -Greedy action tensor([ 0.6912, -0.1220, -0.0018, -0.4569]) tensor([0.4423, 0.1962, 0.2212, 0.1403]) -Greedy action tensor([ 0.5375, -0.4396, 0.2063, -0.4625]) tensor([0.4061, 0.1529, 0.2916, 0.1494]) -Greedy action tensor([ 0.8285, -0.3836, -0.0631, -0.3719]) tensor([0.4978, 0.1482, 0.2041, 0.1499]) -Greedy action tensor([ 1.1634, -0.7153, 0.0248, -1.0172]) tensor([0.6305, 0.0963, 0.2019, 0.0712]) -Greedy action tensor([ 0.3317, 0.1318, 0.2123, -0.1189]) tensor([0.2991, 0.2449, 0.2654, 0.1906]) -Greedy action tensor([ 0.5724, -0.1583, 0.1182, -0.3582]) tensor([0.3983, 0.1918, 0.2529, 0.1571]) -Greedy action tensor([ 0.2205, -0.0701, 0.0465, -0.1915]) tensor([0.3076, 0.2301, 0.2585, 0.2038]) -Greedy action tensor([ 0.3786, 0.0022, 0.1606, -0.2460]) tensor([0.3305, 0.2268, 0.2657, 0.1770]) -Greedy action tensor([ 5.2212e-01, 1.4761e-04, 4.2020e-02, -3.3933e-01]) tensor([0.3796, 0.2252, 0.2348, 0.1604]) -Greedy action tensor([ 0.8116, -0.4001, -0.0202, -0.4373]) tensor([0.4951, 0.1474, 0.2155, 0.1420]) -Greedy action tensor([ 0.4866, -0.1580, -0.0536, -0.4191]) tensor([0.3981, 0.2090, 0.2320, 0.1609]) -Greedy action tensor([ 0.2512, -0.0322, -0.0536, -0.2161]) tensor([0.3208, 0.2416, 0.2365, 0.2010]) -Greedy action tensor([ 0.9149, -0.5077, 0.0640, -0.6873]) tensor([0.5349, 0.1290, 0.2284, 0.1078]) -Greedy action tensor([ 0.4665, 0.3326, 0.1609, -0.1446]) tensor([0.3170, 0.2773, 0.2336, 0.1721]) -Greedy action tensor([ 0.7258, -0.4345, -0.0704, -0.4860]) tensor([0.4849, 0.1520, 0.2187, 0.1444]) -Greedy action tensor([ 0.7678, -0.4448, -0.0241, -0.5839]) tensor([0.4977, 0.1480, 0.2255, 0.1288]) -Greedy action tensor([ 0.2362, 0.0711, 0.2347, -0.0924]) tensor([0.2804, 0.2377, 0.2800, 0.2019]) -Greedy action tensor([ 0.4069, -0.0956, 0.0818, -0.3271]) tensor([0.3562, 0.2155, 0.2573, 0.1710]) -Greedy action tensor([ 0.5476, -0.0431, 0.0950, -0.3612]) tensor([0.3857, 0.2136, 0.2453, 0.1554]) -Greedy action tensor([ 0.4047, -0.2615, -0.0599, -0.2952]) tensor([0.3790, 0.1947, 0.2381, 0.1882]) -Greedy action tensor([ 0.9048, -0.4261, 0.0708, -0.4759]) tensor([0.5128, 0.1355, 0.2227, 0.1289]) -Greedy action tensor([ 0.5686, -0.3183, -0.0783, -0.5892]) tensor([0.4445, 0.1831, 0.2328, 0.1396]) -Greedy action tensor([ 0.8999, -0.2889, 0.1021, -0.4938]) tensor([0.4992, 0.1521, 0.2248, 0.1239]) -Greedy action tensor([ 0.5929, -0.2269, 0.0199, -0.3629]) tensor([0.4186, 0.1844, 0.2360, 0.1610]) -Greedy action tensor([ 0.4089, 0.1763, 0.1288, -0.2370]) tensor([0.3255, 0.2579, 0.2460, 0.1706]) -Greedy action tensor([ 0.6237, -0.2730, -0.0470, -0.3721]) tensor([0.4369, 0.1782, 0.2234, 0.1614]) -Greedy action tensor([ 0.5728, 0.0598, -0.0169, -0.3540]) tensor([0.3923, 0.2349, 0.2175, 0.1553]) -Greedy action tensor([-0.0237, -0.0187, 0.0661, -0.0836]) tensor([0.2475, 0.2487, 0.2707, 0.2331]) -Greedy action tensor([ 0.3587, 0.0248, -0.0125, -0.1065]) tensor([0.3296, 0.2360, 0.2274, 0.2070]) -Greedy action tensor([ 0.6382, 0.0159, -0.1162, -0.2939]) tensor([0.4165, 0.2236, 0.1959, 0.1640]) -Greedy action tensor([ 0.3253, 0.1589, 0.1764, -0.2209]) tensor([0.3042, 0.2575, 0.2621, 0.1762]) -Greedy action tensor([ 0.6406, -0.5436, 0.0040, -0.6111]) tensor([0.4715, 0.1443, 0.2494, 0.1348]) -Greedy action tensor([ 0.4868, -0.1863, -0.1044, -0.2237]) tensor([0.3914, 0.1996, 0.2167, 0.1923]) -Greedy action tensor([ 0.5154, 0.1510, 0.1371, -0.1930]) tensor([0.3482, 0.2419, 0.2385, 0.1714]) -Greedy action tensor([ 0.6654, -0.2268, 0.0119, -0.3615]) tensor([0.4371, 0.1791, 0.2274, 0.1565]) -Greedy action tensor([ 0.1971, -0.1333, 0.1191, -0.3460]) tensor([0.3101, 0.2229, 0.2868, 0.1802]) -Greedy action tensor([ 0.0996, -0.0648, 0.1345, -0.1593]) tensor([0.2735, 0.2321, 0.2832, 0.2111]) -Greedy action tensor([ 0.2227, -0.0064, 0.0607, -0.1436]) tensor([0.2995, 0.2382, 0.2547, 0.2076]) -Greedy action tensor([ 0.6637, -0.3475, -0.0191, -0.6274]) tensor([0.4664, 0.1697, 0.2356, 0.1283]) -Greedy action tensor([ 0.2930, 0.0155, -0.0490, -0.0514]) tensor([0.3148, 0.2385, 0.2236, 0.2231]) -Greedy action tensor([ 0.5968, -0.3666, 0.0670, -0.6243]) tensor([0.4415, 0.1685, 0.2599, 0.1302]) -Greedy action tensor([ 0.5061, -0.3849, 0.0767, -0.3255]) tensor([0.4006, 0.1643, 0.2607, 0.1744]) -Greedy action tensor([ 0.9266, -0.4798, -0.1347, -0.4682]) tensor([0.5438, 0.1332, 0.1882, 0.1348]) -Greedy action tensor([ 0.2074, 0.0275, 0.1756, -0.3036]) tensor([0.2938, 0.2454, 0.2846, 0.1762]) -Greedy action tensor([ 0.3080, 0.0453, 0.0287, -0.1162]) tensor([0.3145, 0.2418, 0.2379, 0.2058]) -Greedy action tensor([ 0.7730, -0.3267, -0.0040, -0.3968]) tensor([0.4755, 0.1583, 0.2186, 0.1476]) -Greedy action tensor([ 0.5336, -0.3363, 0.1745, -0.3103]) tensor([0.3926, 0.1645, 0.2741, 0.1688]) -Greedy action tensor([ 0.5073, -0.0131, -0.0265, -0.1227]) tensor([0.3686, 0.2190, 0.2161, 0.1963]) -Greedy action tensor([ 0.4290, 0.1278, 0.0092, -0.2716]) tensor([0.3456, 0.2557, 0.2271, 0.1715]) -Greedy action tensor([ 0.7873, -0.2990, 0.0784, -0.5377]) tensor([0.4772, 0.1610, 0.2349, 0.1268]) -Greedy action tensor([ 1.2112, -0.4018, 0.1649, -0.7228]) tensor([0.5899, 0.1176, 0.2072, 0.0853]) -Greedy action tensor([ 0.4057, -0.0149, 0.1798, -0.3022]) tensor([0.3393, 0.2228, 0.2707, 0.1672]) -Greedy action tensor([ 0.3706, -0.1306, -0.0780, -0.2775]) tensor([0.3613, 0.2189, 0.2307, 0.1890]) -Greedy action tensor([ 0.3053, -0.0504, 0.0565, -0.3638]) tensor([0.3342, 0.2341, 0.2606, 0.1711]) -Greedy action tensor([ 0.4419, -0.1911, 0.0583, -0.3200]) tensor([0.3733, 0.1982, 0.2543, 0.1742]) -Greedy action tensor([ 0.1963, -0.0138, 0.0672, -0.2955]) tensor([0.3030, 0.2455, 0.2663, 0.1853]) -Greedy action tensor([ 0.5888, -0.3167, -0.2098, -0.2749]) tensor([0.4394, 0.1777, 0.1977, 0.1852]) -Greedy action tensor([ 0.9767, -0.4743, 0.0792, -0.6326]) tensor([0.5429, 0.1272, 0.2213, 0.1086]) -Greedy action tensor([ 0.6517, -0.1477, -0.0471, -0.3031]) tensor([0.4289, 0.1928, 0.2132, 0.1651]) -Greedy action tensor([ 0.4631, -0.0162, -0.0241, -0.2971]) tensor([0.3702, 0.2292, 0.2274, 0.1731]) -Greedy action tensor([ 0.4994, -0.3636, 0.3604, -0.5587]) tensor([0.3789, 0.1598, 0.3297, 0.1315]) -Greedy action tensor([ 0.8605, -0.5134, 0.0284, -0.4435]) tensor([0.5103, 0.1292, 0.2220, 0.1385]) -Greedy action tensor([ 0.7072, -0.7179, -0.1945, -0.7090]) tensor([0.5294, 0.1273, 0.2149, 0.1284]) -Greedy action tensor([ 0.9080, -0.4848, -0.1420, -0.6051]) tensor([0.5499, 0.1366, 0.1924, 0.1211]) -Greedy action tensor([ 0.3874, 0.0934, 0.1139, -0.1667]) tensor([0.3246, 0.2419, 0.2469, 0.1865]) -Greedy action tensor([ 0.5854, -0.1827, -0.0040, -0.4067]) tensor([0.4185, 0.1941, 0.2321, 0.1552]) -Greedy action tensor([ 0.6688, -0.2209, -0.1171, -0.1837]) tensor([0.4361, 0.1792, 0.1988, 0.1859]) -Greedy action tensor([ 0.5678, -0.3509, -0.1659, -0.3748]) tensor([0.4408, 0.1759, 0.2116, 0.1717]) -Greedy action tensor([ 0.2692, 0.1600, -0.0354, -0.3088]) tensor([0.3130, 0.2806, 0.2308, 0.1756]) -Greedy action tensor([ 0.3653, -0.0786, -0.0057, -0.2609]) tensor([0.3489, 0.2238, 0.2408, 0.1865]) -Greedy action tensor([ 0.6162, -0.3817, -0.0455, -0.5572]) tensor([0.4558, 0.1680, 0.2352, 0.1410]) -Greedy action tensor([ 0.4071, 0.1315, -0.0046, -0.3224]) tensor([0.3444, 0.2614, 0.2281, 0.1660]) -Greedy action tensor([ 0.6668, -0.1803, -0.0955, -0.3248]) tensor([0.4413, 0.1891, 0.2059, 0.1637]) -Greedy action tensor([ 0.8321, -0.0092, -0.1786, 0.1825]) tensor([0.4315, 0.1861, 0.1571, 0.2254]) -Greedy action tensor([ 0.1832, -1.0630, 0.3138, 0.1762]) tensor([0.2924, 0.0841, 0.3332, 0.2903]) -Greedy action tensor([ 0.7356, -0.1742, 0.2955, 1.8254]) tensor([0.1992, 0.0802, 0.1283, 0.5923]) -Greedy action tensor([ 1.7698, -0.8204, 1.4871, 0.7945]) tensor([0.4533, 0.0340, 0.3417, 0.1710]) -Greedy action tensor([-0.0113, -0.7142, 0.8320, 0.8943]) tensor([0.1589, 0.0787, 0.3693, 0.3931]) -Greedy action tensor([ 0.4083, -0.0191, 1.2561, 0.0660]) tensor([0.2129, 0.1389, 0.4970, 0.1512]) -Greedy action tensor([ 0.0233, -1.3576, -0.2972, 0.2236]) tensor([0.3126, 0.0786, 0.2269, 0.3819]) -Greedy action tensor([ 0.5448, -0.0589, 0.2473, -0.2311]) tensor([0.3637, 0.1989, 0.2701, 0.1674]) -Greedy action tensor([0.9841, 0.1019, 0.4388, 0.4785]) tensor([0.3851, 0.1594, 0.2232, 0.2323]) -Greedy action tensor([ 0.7380, -1.0200, -0.8389, -0.0268]) tensor([0.5422, 0.0935, 0.1120, 0.2523]) -Greedy action tensor([ 0.8665, -1.2168, -0.2504, 0.8189]) tensor([0.4157, 0.0518, 0.1361, 0.3964]) -Greedy action tensor([-0.2035, 0.1045, 0.0038, -0.6351]) tensor([0.2358, 0.3209, 0.2901, 0.1532]) -Greedy action tensor([ 0.9356, 0.4751, -0.4861, 1.0175]) tensor([0.3381, 0.2133, 0.0816, 0.3670]) -Greedy action tensor([ 1.1130, 0.4626, -0.8679, 0.8349]) tensor([0.4137, 0.2159, 0.0571, 0.3133]) -Greedy action tensor([ 1.0966, -0.3684, 0.9195, 0.5255]) tensor([0.3797, 0.0877, 0.3181, 0.2145]) -Greedy action tensor([ 1.4006, -0.9208, 0.9269, 1.1462]) tensor([0.4006, 0.0393, 0.2495, 0.3106]) -Greedy action tensor([ 0.9601, -0.1808, 0.8884, 1.0837]) tensor([0.2957, 0.0945, 0.2752, 0.3346]) -Greedy action tensor([ 1.3421, -0.0169, 0.6486, 0.9054]) tensor([0.4162, 0.1069, 0.2080, 0.2689]) -Greedy action tensor([ 1.0365, 0.2393, -0.1917, 0.1462]) tensor([0.4643, 0.2092, 0.1360, 0.1906]) -Greedy action tensor([ 1.2471, -0.3975, 1.1234, 0.8174]) tensor([0.3666, 0.0708, 0.3240, 0.2386]) -Greedy action tensor([ 1.8041, -1.1446, 1.2477, 0.2331]) tensor([0.5454, 0.0286, 0.3127, 0.1134]) -Greedy action tensor([ 0.3151, -0.0286, -0.2171, 0.5667]) tensor([0.2791, 0.1980, 0.1639, 0.3590]) -Greedy action tensor([ 1.6107, -0.3936, 1.5039, 0.5220]) tensor([0.4219, 0.0569, 0.3792, 0.1420]) -Greedy action tensor([ 1.0059, -0.7218, 1.8392, 0.0328]) tensor([0.2593, 0.0461, 0.5966, 0.0980]) -Greedy action tensor([ 0.8780, -1.1927, 0.9065, -0.0976]) tensor([0.3950, 0.0498, 0.4064, 0.1489]) -Greedy action tensor([ 0.5559, -1.3335, 0.4486, 0.6051]) tensor([0.3226, 0.0488, 0.2898, 0.3389]) -Greedy action tensor([ 0.2224, 0.3709, -0.6111, 1.3618]) tensor([0.1748, 0.2028, 0.0760, 0.5464]) -Greedy action tensor([ 1.2400, -0.2882, 1.7432, 0.5105]) tensor([0.2982, 0.0647, 0.4933, 0.1438]) -Greedy action tensor([-0.0015, -0.3024, 0.5193, 0.6551]) tensor([0.1869, 0.1383, 0.3145, 0.3603]) -Greedy action tensor([1.0054, 0.5740, 0.7476, 0.4273]) tensor([0.3352, 0.2177, 0.2590, 0.1880]) -Greedy action tensor([ 1.1283, -1.4367, 1.0963, 0.4706]) tensor([0.3901, 0.0300, 0.3778, 0.2021]) -Greedy action tensor([0.3269, 0.2543, 1.2674, 0.0804]) tensor([0.1897, 0.1764, 0.4857, 0.1482]) -Greedy action tensor([ 1.0756, -0.7983, 0.6842, 1.1055]) tensor([0.3496, 0.0537, 0.2364, 0.3603]) -Greedy action tensor([ 0.5998, 0.1249, -0.2701, 0.7721]) tensor([0.3097, 0.1926, 0.1298, 0.3679]) -Greedy action tensor([ 1.3669, -0.1086, 0.6781, 1.2168]) tensor([0.3859, 0.0882, 0.1938, 0.3321]) -Greedy action tensor([ 1.4691, -0.1278, 1.3834, 1.1275]) tensor([0.3532, 0.0715, 0.3242, 0.2510]) -Greedy action tensor([ 0.2046, -1.1334, -0.1096, 1.3965]) tensor([0.1892, 0.0496, 0.1382, 0.6230]) -Greedy action tensor([ 0.0899, 0.8818, -0.0842, 0.3484]) tensor([0.1872, 0.4132, 0.1573, 0.2424]) -Greedy action tensor([ 1.5358, 0.3655, -0.5782, 1.1227]) tensor([0.4779, 0.1483, 0.0577, 0.3162]) -Greedy action tensor([ 1.1561, -0.4400, 0.6184, 1.1622]) tensor([0.3581, 0.0726, 0.2091, 0.3602]) -Greedy action tensor([-0.1500, 0.6766, 0.2823, -0.5431]) tensor([0.1818, 0.4155, 0.2801, 0.1227]) -Greedy action tensor([ 1.0198, -0.3638, -0.1222, 1.2506]) tensor([0.3534, 0.0886, 0.1128, 0.4452]) -Greedy action tensor([ 1.0879, -0.5486, 1.1360, 0.9715]) tensor([0.3191, 0.0621, 0.3348, 0.2840]) -Greedy action tensor([ 0.3126, -0.3775, 0.5025, -0.1794]) tensor([0.3010, 0.1510, 0.3640, 0.1840]) -Greedy action tensor([ 0.2087, 0.7439, 0.2547, -0.6707]) tensor([0.2398, 0.4095, 0.2511, 0.0995]) -Greedy action tensor([ 1.2694, -0.3705, 0.4065, 0.8825]) tensor([0.4357, 0.0845, 0.1838, 0.2959]) -Greedy action tensor([ 1.9169, -1.3960, 0.0358, 0.7217]) tensor([0.6705, 0.0244, 0.1022, 0.2029]) -Greedy action tensor([ 0.4906, -1.1920, -0.5708, 0.6236]) tensor([0.3740, 0.0695, 0.1294, 0.4271]) -Greedy action tensor([1.4118, 0.1600, 1.0869, 0.3323]) tensor([0.4258, 0.1218, 0.3077, 0.1447]) -Greedy action tensor([ 0.7773, -0.3388, -0.5397, 0.5154]) tensor([0.4228, 0.1385, 0.1133, 0.3254]) -Greedy action tensor([ 1.5227, -0.5676, 0.3584, 1.5449]) tensor([0.4068, 0.0503, 0.1270, 0.4159]) -Greedy action tensor([0.3558, 0.3806, 1.4062, 0.8817]) tensor([0.1521, 0.1559, 0.4347, 0.2573]) -Greedy action tensor([ 0.4425, -0.2983, 0.2911, 1.5508]) tensor([0.1864, 0.0889, 0.1602, 0.5646]) -Greedy action tensor([ 0.9133, 0.0876, -0.5040, -0.2714]) tensor([0.5035, 0.2205, 0.1220, 0.1540]) -Greedy action tensor([1.4199, 0.5099, 0.4621, 1.7474]) tensor([0.3151, 0.1268, 0.1209, 0.4372]) -Greedy action tensor([ 0.9194, -0.6093, 0.8949, 0.4645]) tensor([0.3537, 0.0767, 0.3452, 0.2244]) -Greedy action tensor([ 0.8897, -1.3175, 0.3065, 0.2625]) tensor([0.4541, 0.0500, 0.2534, 0.2425]) -Greedy action tensor([0.6027, 0.9579, 1.2051, 0.3505]) tensor([0.1988, 0.2836, 0.3631, 0.1545]) -Greedy action tensor([ 0.5248, 0.3766, -0.9273, 0.1951]) tensor([0.3552, 0.3063, 0.0831, 0.2554]) -Greedy action tensor([ 1.0485, -0.6480, 0.5215, 1.0022]) tensor([0.3665, 0.0672, 0.2164, 0.3499]) -Greedy action tensor([ 1.3288, 0.0253, -0.2956, 1.6413]) tensor([0.3527, 0.0958, 0.0695, 0.4821]) -Greedy action tensor([-0.3046, 0.3731, 0.4795, 0.9710]) tensor([0.1144, 0.2253, 0.2506, 0.4097]) -Greedy action tensor([ 0.6649, 0.5924, -0.7731, -0.3379]) tensor([0.3946, 0.3670, 0.0937, 0.1448]) -Greedy action tensor([-1.4978, 0.3805, -0.1449, 0.1755]) tensor([0.0597, 0.3908, 0.2311, 0.3184]) -Greedy action tensor([0.6268, 0.1808, 0.6152, 0.7426]) tensor([0.2666, 0.1707, 0.2635, 0.2993]) -Greedy action tensor([ 0.5042, -0.5904, 0.0340, 0.8568]) tensor([0.2957, 0.0990, 0.1847, 0.4206]) -Greedy action tensor([0.8092, 0.2351, 1.1312, 0.9714]) tensor([0.2428, 0.1367, 0.3350, 0.2855]) -Greedy action tensor([ 1.0351, -1.3675, -0.2018, 0.7035]) tensor([0.4765, 0.0431, 0.1383, 0.3420]) -Greedy action tensor([ 1.1481, -0.4065, -0.1613, 0.8382]) tensor([0.4515, 0.0954, 0.1219, 0.3312]) -Greedy action tensor([ 0.9238, -0.1728, -0.2203, 0.8423]) tensor([0.3885, 0.1297, 0.1237, 0.3581]) -Greedy action tensor([ 0.7537, -0.4654, 0.8427, 0.2805]) tensor([0.3320, 0.0981, 0.3630, 0.2069]) -Greedy action tensor([ 1.0619, -0.0139, 1.2556, 0.5053]) tensor([0.3197, 0.1090, 0.3880, 0.1832]) -Greedy action tensor([ 2.1303, -0.4328, 1.1737, 0.6618]) tensor([0.5912, 0.0456, 0.2271, 0.1361]) -Greedy action tensor([ 1.0424, 0.5469, -0.6060, 0.6836]) tensor([0.4000, 0.2437, 0.0769, 0.2794]) -Greedy action tensor([ 2.0377, 0.0260, -0.0272, 0.3421]) tensor([0.6925, 0.0926, 0.0878, 0.1271]) -Greedy action tensor([ 1.7461, -0.0303, 0.7718, 1.3998]) tensor([0.4436, 0.0751, 0.1675, 0.3138]) -Greedy action tensor([ 1.2078, -1.2733, 1.8631, 0.1044]) tensor([0.2993, 0.0250, 0.5764, 0.0993]) -Greedy action tensor([ 0.9368, 0.0261, -0.1658, 0.5809]) tensor([0.4107, 0.1652, 0.1364, 0.2877]) -Greedy action tensor([ 1.6836, -1.0536, -0.0184, 0.7442]) tensor([0.6105, 0.0395, 0.1113, 0.2386]) -Greedy action tensor([-0.2534, 0.3518, 0.7733, -0.7343]) tensor([0.1602, 0.2934, 0.4473, 0.0990]) -Greedy action tensor([ 1.1524, 0.0792, -0.2885, 0.5560]) tensor([0.4696, 0.1606, 0.1112, 0.2587]) -Greedy action tensor([ 1.2593, -1.5887, -0.0394, 0.7234]) tensor([0.5219, 0.0302, 0.1424, 0.3054]) -Greedy action tensor([ 0.7222, -0.4994, -0.1327, -0.0265]) tensor([0.4560, 0.1344, 0.1939, 0.2157]) -Greedy action tensor([ 0.9892, -0.5007, -0.5697, 0.3135]) tensor([0.5143, 0.1159, 0.1082, 0.2617]) -Greedy action tensor([ 0.4989, -0.1571, -0.3594, 0.3698]) tensor([0.3544, 0.1839, 0.1502, 0.3115]) -Greedy action tensor([ 1.0371, -0.6667, -0.4220, 0.3535]) tensor([0.5211, 0.0948, 0.1211, 0.2630]) -Greedy action tensor([ 1.1849, -0.6582, -0.4862, -0.0457]) tensor([0.6103, 0.0966, 0.1148, 0.1783]) -Greedy action tensor([ 0.6290, -0.2056, -0.5023, -0.0239]) tensor([0.4391, 0.1906, 0.1417, 0.2286]) -Greedy action tensor([ 0.3339, -0.4347, 0.0095, -0.0551]) tensor([0.3491, 0.1619, 0.2524, 0.2366]) -Greedy action tensor([ 0.4922, -0.0274, -0.0736, -0.3649]) tensor([0.3866, 0.2299, 0.2195, 0.1640]) -Greedy action tensor([ 0.9617, -0.0929, -0.5803, -0.2713]) tensor([0.5395, 0.1879, 0.1154, 0.1572]) -Greedy action tensor([ 1.0861, -0.0519, 0.1468, -0.1569]) tensor([0.5000, 0.1602, 0.1955, 0.1443]) -Greedy action tensor([ 0.9986, -0.6158, -0.7054, 0.6010]) tensor([0.4871, 0.0969, 0.0886, 0.3273]) -Greedy action tensor([ 1.3498, -0.4062, -0.1826, 0.0580]) tensor([0.6011, 0.1038, 0.1299, 0.1652]) -Greedy action tensor([ 0.8649, -0.3233, -0.1793, -0.0165]) tensor([0.4829, 0.1472, 0.1699, 0.2000]) -Greedy action tensor([ 0.4848, -0.2315, -0.3265, 0.2915]) tensor([0.3627, 0.1772, 0.1611, 0.2990]) -Greedy action tensor([ 0.1975, -0.0122, -0.2532, -0.2324]) tensor([0.3227, 0.2617, 0.2056, 0.2100]) -Greedy action tensor([ 1.4182, -0.3632, -0.4903, 0.2972]) tensor([0.6088, 0.1025, 0.0903, 0.1984]) -Greedy action tensor([ 1.0602, -0.3599, -0.1732, 0.2008]) tensor([0.5111, 0.1235, 0.1489, 0.2164]) -Greedy action tensor([ 1.0918, -0.4309, -0.0997, -0.1022]) tensor([0.5480, 0.1195, 0.1665, 0.1660]) -Greedy action tensor([ 1.3287, -0.6871, -0.3233, -0.0225]) tensor([0.6314, 0.0841, 0.1210, 0.1635]) -Greedy action tensor([ 0.4901, -0.1246, -0.0875, -0.2272]) tensor([0.3861, 0.2088, 0.2167, 0.1884]) -Greedy action tensor([ 0.8877, -0.6161, 0.1317, -0.0350]) tensor([0.4786, 0.1064, 0.2247, 0.1902]) -Greedy action tensor([ 0.8929, -0.2089, -0.3266, -0.1423]) tensor([0.5043, 0.1676, 0.1490, 0.1791]) -Greedy action tensor([ 0.8026, -0.6314, -0.5491, 0.0726]) tensor([0.5053, 0.1204, 0.1308, 0.2435]) -Greedy action tensor([ 0.8752, -0.6958, -0.4848, 0.0519]) tensor([0.5253, 0.1092, 0.1348, 0.2306]) -Greedy action tensor([ 1.1278, -0.3215, -0.0480, 0.0310]) tensor([0.5327, 0.1250, 0.1644, 0.1779]) -Greedy action tensor([ 0.8904, -0.3607, -0.6090, 0.5010]) tensor([0.4573, 0.1309, 0.1021, 0.3098]) -Greedy action tensor([ 0.8819, -0.4948, 0.1998, 0.0292]) tensor([0.4578, 0.1156, 0.2315, 0.1952]) -Greedy action tensor([ 0.9455, -0.5980, -0.4103, 0.4411]) tensor([0.4819, 0.1029, 0.1242, 0.2910]) -Greedy action tensor([ 0.4405, -0.1842, -0.4657, -0.1261]) tensor([0.3989, 0.2136, 0.1612, 0.2263]) -Greedy action tensor([ 1.0062, -0.4488, 0.2252, 0.0245]) tensor([0.4840, 0.1130, 0.2217, 0.1813]) -Greedy action tensor([ 0.9715, -0.5774, -0.5088, 0.8158]) tensor([0.4356, 0.0925, 0.0991, 0.3728]) -Greedy action tensor([ 1.2016, -0.6199, -0.3239, -0.1662]) tensor([0.6120, 0.0990, 0.1331, 0.1559]) -Greedy action tensor([ 1.1945, -0.3558, -0.4429, 0.2608]) tensor([0.5556, 0.1179, 0.1081, 0.2184]) -Greedy action tensor([ 1.1291, -0.7717, -0.2316, 0.4139]) tensor([0.5277, 0.0789, 0.1353, 0.2581]) -Greedy action tensor([ 0.8358, -0.2697, -0.5281, 0.3740]) tensor([0.4511, 0.1493, 0.1153, 0.2842]) -Greedy action tensor([ 1.6811, -0.6345, -0.4415, -0.0675]) tensor([0.7182, 0.0709, 0.0860, 0.1250]) -Greedy action tensor([ 1.2471, -0.6519, -0.3398, 0.2163]) tensor([0.5844, 0.0875, 0.1196, 0.2085]) -Greedy action tensor([ 0.1838, -0.1980, -0.5550, -0.0674]) tensor([0.3403, 0.2323, 0.1626, 0.2647]) -Greedy action tensor([ 1.4195, -0.1702, -0.0171, -0.1570]) tensor([0.6066, 0.1237, 0.1442, 0.1254]) -Greedy action tensor([ 0.4801, -0.2313, -0.4313, 0.1641]) tensor([0.3814, 0.1873, 0.1533, 0.2781]) -Greedy action tensor([ 1.0340, -0.4375, -0.0851, 0.3619]) tensor([0.4838, 0.1111, 0.1580, 0.2471]) -Greedy action tensor([ 1.1002, -0.2184, -0.2727, -0.1102]) tensor([0.5498, 0.1471, 0.1393, 0.1639]) -Greedy action tensor([ 1.3882, -0.3309, -0.3757, 0.1548]) tensor([0.6091, 0.1092, 0.1044, 0.1774]) -Greedy action tensor([ 1.4582, -0.5780, -0.4587, 0.1836]) tensor([0.6422, 0.0838, 0.0944, 0.1795]) -Greedy action tensor([ 0.7563, -0.5222, -0.3054, 0.2035]) tensor([0.4546, 0.1266, 0.1572, 0.2616]) -Greedy action tensor([ 0.5959, -0.2157, -0.3451, -0.0845]) tensor([0.4272, 0.1897, 0.1667, 0.2163]) -Greedy action tensor([ 0.9112, -0.4357, -0.4370, -0.1078]) tensor([0.5317, 0.1383, 0.1381, 0.1919]) -Greedy action tensor([ 0.7123, -0.4226, -0.1137, 0.0093]) tensor([0.4436, 0.1426, 0.1942, 0.2196]) -Greedy action tensor([ 1.4180, -0.3564, -0.4307, -0.1068]) tensor([0.6474, 0.1098, 0.1019, 0.1409]) -Greedy action tensor([ 1.2542, -0.5863, -0.2199, 0.1451]) tensor([0.5822, 0.0924, 0.1333, 0.1920]) -Greedy action tensor([ 1.2664, -0.7462, -0.5343, 0.3800]) tensor([0.5845, 0.0781, 0.0965, 0.2409]) -Greedy action tensor([ 0.3763, -0.2878, -0.7265, -0.0506]) tensor([0.4001, 0.2060, 0.1328, 0.2611]) -Greedy action tensor([ 0.8437, -0.4683, -0.3419, 0.0522]) tensor([0.4931, 0.1328, 0.1507, 0.2235]) -Greedy action tensor([ 1.1318, -0.1965, -0.2330, -0.1278]) tensor([0.5543, 0.1468, 0.1416, 0.1573]) -Greedy action tensor([ 0.8959, -0.3735, -0.5832, 0.1029]) tensor([0.5099, 0.1433, 0.1162, 0.2307]) -Greedy action tensor([ 0.9556, -0.6113, 0.1100, 0.0171]) tensor([0.4928, 0.1028, 0.2116, 0.1928]) -Greedy action tensor([ 1.1527, -0.4626, -0.3855, 0.3698]) tensor([0.5346, 0.1063, 0.1148, 0.2443]) -Greedy action tensor([ 1.3516, -0.6194, -0.2153, 0.2688]) tensor([0.5929, 0.0826, 0.1237, 0.2008]) -Greedy action tensor([ 0.1919, -0.0899, -0.3962, 0.1567]) tensor([0.3053, 0.2304, 0.1696, 0.2948]) -Greedy action tensor([ 1.3741, -0.4335, -0.1837, 0.0520]) tensor([0.6093, 0.1000, 0.1283, 0.1624]) -Greedy action tensor([ 0.7890, -0.5059, -0.0740, -0.1188]) tensor([0.4764, 0.1305, 0.2010, 0.1922]) -Greedy action tensor([ 1.2953, -0.4600, -0.8107, -0.1950]) tensor([0.6579, 0.1137, 0.0801, 0.1482]) -Greedy action tensor([ 1.2480, -0.4188, 0.0577, 0.0076]) tensor([0.5611, 0.1060, 0.1707, 0.1623]) -Greedy action tensor([ 0.5803, -0.4065, -0.3411, 0.0620]) tensor([0.4226, 0.1575, 0.1682, 0.2517]) -Greedy action tensor([ 0.7798, -0.0248, -0.1619, -0.5486]) tensor([0.4757, 0.2128, 0.1855, 0.1260]) -Greedy action tensor([ 0.8146, -0.6931, -0.0613, 0.1481]) tensor([0.4648, 0.1029, 0.1936, 0.2387]) -Greedy action tensor([ 0.7714, -0.4894, -0.0090, -0.1947]) tensor([0.4712, 0.1336, 0.2159, 0.1793]) -Greedy action tensor([ 1.0341, -0.3054, -0.1491, 0.5148]) tensor([0.4623, 0.1211, 0.1416, 0.2750]) -Greedy action tensor([ 0.8336, -0.5805, -0.3701, -0.0506]) tensor([0.5112, 0.1243, 0.1534, 0.2111]) -Greedy action tensor([ 1.2455, -0.1452, -0.0624, -0.2041]) tensor([0.5702, 0.1419, 0.1542, 0.1338]) -Greedy action tensor([ 0.7881, -0.2273, -0.3046, -0.2319]) tensor([0.4859, 0.1760, 0.1629, 0.1752]) -Greedy action tensor([ 0.8959, -0.5751, -0.1943, 0.1280]) tensor([0.4927, 0.1132, 0.1656, 0.2286]) -Greedy action tensor([ 0.7553, -0.5405, -0.3112, 0.1898]) tensor([0.4575, 0.1252, 0.1575, 0.2599]) -Greedy action tensor([ 0.6544, -0.4278, -0.5798, 0.1161]) tensor([0.4517, 0.1531, 0.1315, 0.2637]) -Greedy action tensor([ 0.8214, -0.4772, -0.2849, 0.1071]) tensor([0.4777, 0.1304, 0.1580, 0.2339]) -Greedy action tensor([ 0.9999, -0.7328, -0.4493, 0.4430]) tensor([0.5039, 0.0891, 0.1183, 0.2887]) -Greedy action tensor([ 1.3173, -0.5440, -0.3836, 0.6365]) tensor([0.5422, 0.0843, 0.0990, 0.2745]) -Greedy action tensor([ 0.7644, -0.3379, -0.1371, -0.3080]) tensor([0.4807, 0.1596, 0.1951, 0.1645]) -Greedy action tensor([ 1.4617, -0.6133, -0.0872, 0.1103]) tensor([0.6262, 0.0786, 0.1331, 0.1621]) -Greedy action tensor([ 0.4748, -0.2231, -0.4591, -0.0664]) tensor([0.4044, 0.2012, 0.1589, 0.2354]) -Greedy action tensor([ 0.7114, -0.5189, -0.3344, 0.1288]) tensor([0.4541, 0.1327, 0.1596, 0.2536]) -Greedy action tensor([-1.8033, -0.4118, 0.6141, -0.0078]) tensor([0.0449, 0.1806, 0.5039, 0.2705]) -Greedy action tensor([-1.5185, -0.5303, 1.0057, 0.3915]) tensor([0.0436, 0.1172, 0.5445, 0.2946]) -Greedy action tensor([-1.7883, -0.6070, 0.0873, -0.3692]) tensor([0.0670, 0.2184, 0.4374, 0.2771]) -Greedy action tensor([-1.3943, 0.1225, 0.0734, -0.3882]) tensor([0.0792, 0.3608, 0.3435, 0.2165]) -Greedy action tensor([-0.8961, -0.4773, 0.5020, -0.3578]) tensor([0.1208, 0.1836, 0.4888, 0.2069]) -Greedy action tensor([-1.9842, -0.7281, 0.8871, 0.1056]) tensor([0.0331, 0.1161, 0.5837, 0.2672]) -Greedy action tensor([-1.3592, -0.4677, 0.9717, 1.1347]) tensor([0.0387, 0.0944, 0.3982, 0.4687]) -Greedy action tensor([-1.7628, 0.4864, 0.4510, 0.0291]) tensor([0.0390, 0.3699, 0.3570, 0.2341]) -Greedy action tensor([-0.5790, -0.5340, 0.1734, 0.2210]) tensor([0.1564, 0.1636, 0.3319, 0.3481]) -Greedy action tensor([-1.9554, -0.9617, 0.4185, -0.3465]) tensor([0.0514, 0.1390, 0.5525, 0.2571]) -Greedy action tensor([-1.7276, -0.9799, 0.1332, -0.5486]) tensor([0.0782, 0.1651, 0.5026, 0.2542]) -Greedy action tensor([-1.1012, -0.4026, 0.5558, 1.0295]) tensor([0.0600, 0.1206, 0.3145, 0.5050]) -Greedy action tensor([-1.1938, -0.4922, 0.0827, -0.2899]) tensor([0.1103, 0.2224, 0.3951, 0.2722]) -Greedy action tensor([-2.0441, -0.8654, 0.8156, 0.0502]) tensor([0.0335, 0.1090, 0.5853, 0.2722]) -Greedy action tensor([-1.2580, -0.5247, 1.2814, 1.3248]) tensor([0.0345, 0.0718, 0.4371, 0.4566]) -Greedy action tensor([-1.9379, -0.4763, 0.7786, 0.0553]) tensor([0.0360, 0.1553, 0.5446, 0.2642]) -Greedy action tensor([-1.5166, -0.5174, 0.4364, 0.1279]) tensor([0.0627, 0.1704, 0.4421, 0.3248]) -Greedy action tensor([-1.9727, -0.6467, 0.9066, 0.1054]) tensor([0.0327, 0.1232, 0.5826, 0.2615]) -Greedy action tensor([-1.5964, -0.3611, 0.7727, 0.6897]) tensor([0.0401, 0.1378, 0.4281, 0.3940]) -Greedy action tensor([-0.9636, 0.7556, 0.1405, 0.3488]) tensor([0.0751, 0.4192, 0.2266, 0.2791]) -Greedy action tensor([-1.2990, -0.5477, 0.7932, 0.9270]) tensor([0.0488, 0.1035, 0.3955, 0.4522]) -Greedy action tensor([-1.4845, -0.4573, 0.6468, 0.7121]) tensor([0.0471, 0.1317, 0.3972, 0.4240]) -Greedy action tensor([-1.7574, -0.4998, 0.5718, -0.0480]) tensor([0.0492, 0.1731, 0.5056, 0.2720]) -Greedy action tensor([-1.4339, -0.1251, 0.8122, 0.6585]) tensor([0.0449, 0.1663, 0.4246, 0.3641]) -Greedy action tensor([-0.1746, -0.4266, 0.2623, 0.2730]) tensor([0.2045, 0.1590, 0.3166, 0.3200]) -Greedy action tensor([-1.2280, -0.4337, 0.2712, 0.4848]) tensor([0.0756, 0.1672, 0.3383, 0.4189]) -Greedy action tensor([-0.7420, -0.5704, 0.1679, 0.2894]) tensor([0.1338, 0.1588, 0.3323, 0.3752]) -Greedy action tensor([-2.0059, -0.8647, 0.3072, -0.1962]) tensor([0.0492, 0.1539, 0.4967, 0.3003]) -Greedy action tensor([-1.1213, -0.4563, -0.3551, -0.1282]) tensor([0.1283, 0.2494, 0.2760, 0.3463]) -Greedy action tensor([-1.7094, -0.5202, 0.6200, 0.0580]) tensor([0.0490, 0.1609, 0.5032, 0.2869]) -Greedy action tensor([-1.4877, -0.5395, 0.4237, 0.1412]) tensor([0.0648, 0.1671, 0.4380, 0.3302]) -Greedy action tensor([-1.0368, -0.2987, 0.6020, -0.5577]) tensor([0.1015, 0.2123, 0.5224, 0.1638]) -Greedy action tensor([-1.5104, -0.5983, 0.5876, 0.3141]) tensor([0.0561, 0.1396, 0.4569, 0.3475]) -Greedy action tensor([-2.0148, -0.8237, 0.8746, 0.1275]) tensor([0.0325, 0.1069, 0.5840, 0.2767]) -Greedy action tensor([-0.8405, 0.6963, 0.2442, -0.2374]) tensor([0.0958, 0.4455, 0.2835, 0.1751]) -Greedy action tensor([-0.9147, -0.5547, 0.4261, 0.6793]) tensor([0.0895, 0.1282, 0.3419, 0.4404]) -Greedy action tensor([-1.0845, -0.2498, 0.6253, -0.5947]) tensor([0.0956, 0.2202, 0.5283, 0.1560]) -Greedy action tensor([-1.1752, -0.4354, 0.8551, 1.0327]) tensor([0.0505, 0.1058, 0.3845, 0.4592]) -Greedy action tensor([-1.6195, -0.5401, 0.5043, 0.0220]) tensor([0.0572, 0.1685, 0.4787, 0.2956]) -Greedy action tensor([-0.6749, -0.1547, 0.2628, -0.0977]) tensor([0.1425, 0.2397, 0.3640, 0.2538]) -Greedy action tensor([-1.5513, -0.4166, -0.0570, -0.3330]) tensor([0.0837, 0.2603, 0.3730, 0.2830]) -Greedy action tensor([-1.9574, -0.4599, 0.9578, 0.3310]) tensor([0.0296, 0.1323, 0.5462, 0.2918]) -Greedy action tensor([-0.7722, -0.5113, 0.3608, -0.0387]) tensor([0.1336, 0.1734, 0.4148, 0.2782]) -Greedy action tensor([-1.5026, -0.7799, 0.6930, 0.5262]) tensor([0.0509, 0.1048, 0.4573, 0.3870]) -Greedy action tensor([-1.9414, -0.6093, 1.0354, 0.3956]) tensor([0.0288, 0.1090, 0.5645, 0.2977]) -Greedy action tensor([-1.8654, -0.4712, 0.6289, -0.1362]) tensor([0.0439, 0.1770, 0.5317, 0.2474]) -Greedy action tensor([-1.3085, -0.5206, 0.5451, 0.6166]) tensor([0.0608, 0.1338, 0.3883, 0.4171]) -Greedy action tensor([-1.4129, -0.2912, -0.0395, 0.7622]) tensor([0.0594, 0.1825, 0.2347, 0.5233]) -Greedy action tensor([-0.9622, -0.5683, 0.2949, 0.1438]) tensor([0.1109, 0.1644, 0.3897, 0.3351]) -Greedy action tensor([-1.2855, -0.3956, 0.7690, 0.9731]) tensor([0.0481, 0.1170, 0.3750, 0.4599]) -Greedy action tensor([-0.4111, -0.3582, 0.2441, 0.6301]) tensor([0.1468, 0.1548, 0.2826, 0.4158]) -Greedy action tensor([-1.4081, -0.5367, 0.3976, 0.2798]) tensor([0.0672, 0.1606, 0.4088, 0.3634]) -Greedy action tensor([-1.5230, -0.5429, 0.5682, 0.3807]) tensor([0.0541, 0.1443, 0.4383, 0.3633]) -Greedy action tensor([-1.7547, -0.4390, 0.8126, 0.5287]) tensor([0.0363, 0.1352, 0.4727, 0.3558]) -Greedy action tensor([-1.6341, -0.4314, 0.5855, 0.2469]) tensor([0.0498, 0.1657, 0.4580, 0.3265]) -Greedy action tensor([-1.2340, -0.5763, 0.4691, 0.7522]) tensor([0.0637, 0.1229, 0.3496, 0.4639]) -Greedy action tensor([-0.5194, -0.4401, 0.2342, 0.1048]) tensor([0.1646, 0.1782, 0.3498, 0.3073]) -Greedy action tensor([-1.6328, -0.3834, 0.5025, 0.2528]) tensor([0.0512, 0.1785, 0.4330, 0.3373]) -Greedy action tensor([-1.9851, -0.6102, 1.0477, 0.1848]) tensor([0.0290, 0.1147, 0.6022, 0.2541]) -Greedy action tensor([-1.8442, -0.9181, 0.3451, -0.2343]) tensor([0.0573, 0.1446, 0.5115, 0.2866]) -Greedy action tensor([-1.0911, -0.5391, 0.3189, 0.7360]) tensor([0.0766, 0.1331, 0.3139, 0.4764]) -Greedy action tensor([-1.4685, -0.5105, 0.4333, 0.0599]) tensor([0.0670, 0.1747, 0.4491, 0.3091]) -Greedy action tensor([-1.0581, -0.0121, 0.2349, 0.4571]) tensor([0.0831, 0.2364, 0.3026, 0.3779]) -Greedy action tensor([-1.7066, -0.4554, 0.5354, -0.0351]) tensor([0.0520, 0.1818, 0.4895, 0.2767]) -Greedy action tensor([-1.5315, -0.5488, 0.4410, 0.0829]) tensor([0.0630, 0.1682, 0.4525, 0.3163]) -Greedy action tensor([-1.8668, -0.5123, 1.3232, 0.8071]) tensor([0.0229, 0.0887, 0.5563, 0.3320]) -Greedy action tensor([-0.9230, -0.6200, 0.4495, 0.8563]) tensor([0.0818, 0.1108, 0.3227, 0.4847]) -Greedy action tensor([-1.1642, -0.6032, 0.2606, 0.2678]) tensor([0.0901, 0.1579, 0.3746, 0.3773]) -Greedy action tensor([-0.8180, -0.5198, 1.1841, 1.5584]) tensor([0.0487, 0.0657, 0.3609, 0.5247]) -Greedy action tensor([-1.7618, -0.6811, 0.3046, -0.1809]) tensor([0.0599, 0.1764, 0.4728, 0.2909]) -Greedy action tensor([-1.8553, -0.4499, 0.6954, 0.0114]) tensor([0.0411, 0.1674, 0.5261, 0.2655]) -Greedy action tensor([-0.9458, -0.6382, -0.0356, -0.3242]) tensor([0.1491, 0.2028, 0.3705, 0.2776]) -Greedy action tensor([-1.1635, -0.5757, 0.4244, 0.6552]) tensor([0.0722, 0.1299, 0.3531, 0.4448]) -Greedy action tensor([-1.4303, -0.5780, 0.4597, 0.1560]) tensor([0.0673, 0.1579, 0.4457, 0.3290]) -Greedy action tensor([-1.3087, -0.5711, 0.3685, 0.2298]) tensor([0.0763, 0.1596, 0.4085, 0.3556]) -Greedy action tensor([-0.7130, -0.6109, 0.2884, 0.1351]) tensor([0.1396, 0.1546, 0.3799, 0.3259]) -Greedy action tensor([-1.9256, -0.4501, 0.6544, -0.1696]) tensor([0.0411, 0.1795, 0.5418, 0.2377]) -Greedy action tensor([-1.4908, -0.2997, 1.2394, 1.0057]) tensor([0.0315, 0.1036, 0.4828, 0.3822]) -Greedy action tensor([-1.9204, 0.3415, 0.7127, 0.3776]) tensor([0.0290, 0.2785, 0.4037, 0.2888]) -Greedy action tensor([-1.5229, -0.4288, 0.6043, 0.5063]) tensor([0.0500, 0.1494, 0.4199, 0.3807]) -Greedy action tensor([-1.8156, -0.4850, 0.6288, -0.0463]) tensor([0.0451, 0.1706, 0.5197, 0.2646]) -Greedy action tensor([ 0.7570, -0.2993, -0.1033, -0.2138]) tensor([0.4652, 0.1618, 0.1968, 0.1762]) -Greedy action tensor([ 0.2159, -0.0801, -0.1448, -0.2103]) tensor([0.3232, 0.2404, 0.2253, 0.2111]) -Greedy action tensor([ 0.3056, -0.2060, -0.0101, -0.4164]) tensor([0.3553, 0.2130, 0.2591, 0.1726]) -Greedy action tensor([ 0.4797, -0.1782, -0.0587, -0.3238]) tensor([0.3923, 0.2032, 0.2289, 0.1756]) -Greedy action tensor([ 0.7903, -0.4412, -0.0262, -0.4511]) tensor([0.4944, 0.1443, 0.2185, 0.1429]) -Greedy action tensor([ 0.4160, -0.0587, -0.0598, -0.3579]) tensor([0.3697, 0.2300, 0.2297, 0.1705]) -Greedy action tensor([ 0.1839, 0.0633, 0.0806, -0.2509]) tensor([0.2911, 0.2580, 0.2625, 0.1884]) -Greedy action tensor([ 1.0816, -0.6773, -0.0200, -0.7409]) tensor([0.6002, 0.1034, 0.1995, 0.0970]) -Greedy action tensor([ 0.8348, -0.2534, 0.1294, -0.5581]) tensor([0.4810, 0.1620, 0.2376, 0.1195]) -Greedy action tensor([ 0.7987, -0.5100, -0.0380, -0.8079]) tensor([0.5252, 0.1419, 0.2275, 0.1054]) -Greedy action tensor([ 0.5839, -0.5293, 0.2850, -0.6944]) tensor([0.4258, 0.1399, 0.3158, 0.1186]) -Greedy action tensor([ 0.6445, -0.4453, 0.0907, -0.5397]) tensor([0.4511, 0.1517, 0.2593, 0.1380]) -Greedy action tensor([ 3.9990e-01, -3.9843e-04, 4.8284e-03, -2.9963e-01]) tensor([0.3520, 0.2359, 0.2371, 0.1749]) -Greedy action tensor([ 0.7416, -0.3055, -0.0987, -0.2804]) tensor([0.4668, 0.1638, 0.2015, 0.1680]) -Greedy action tensor([ 0.2952, 0.2705, 0.1177, -0.2740]) tensor([0.2959, 0.2887, 0.2478, 0.1675]) -Greedy action tensor([ 0.7860, -0.1896, 0.0250, -0.3755]) tensor([0.4636, 0.1747, 0.2166, 0.1451]) -Greedy action tensor([ 0.2912, 0.1587, 0.1595, -0.1886]) tensor([0.2966, 0.2598, 0.2600, 0.1836]) -Greedy action tensor([ 0.5481, -0.1518, 0.0665, -0.5104]) tensor([0.4063, 0.2018, 0.2510, 0.1410]) -Greedy action tensor([ 0.7050, -0.4324, -0.0471, -0.5655]) tensor([0.4825, 0.1547, 0.2274, 0.1354]) -Greedy action tensor([ 0.5088, 0.0732, 0.0582, -0.2118]) tensor([0.3609, 0.2335, 0.2300, 0.1756]) -Greedy action tensor([ 0.2426, -0.1528, 0.2007, -0.2718]) tensor([0.3096, 0.2085, 0.2969, 0.1851]) -Greedy action tensor([ 0.5772, -0.1820, -0.0010, -0.2656]) tensor([0.4066, 0.1903, 0.2281, 0.1750]) -Greedy action tensor([ 0.3611, -0.1707, 0.0495, -0.3548]) tensor([0.3561, 0.2092, 0.2607, 0.1740]) -Greedy action tensor([ 0.3884, -0.1786, -0.2090, -0.4202]) tensor([0.3902, 0.2213, 0.2147, 0.1738]) -Greedy action tensor([ 4.2761e-01, 3.9210e-02, -1.1215e-02, -3.4272e-04]) tensor([0.3362, 0.2280, 0.2168, 0.2191]) -Greedy action tensor([ 0.7429, -0.3653, -0.0454, -0.5473]) tensor([0.4854, 0.1603, 0.2207, 0.1336]) -Greedy action tensor([ 0.6065, -0.4382, -0.0423, -0.4518]) tensor([0.4501, 0.1584, 0.2353, 0.1562]) -Greedy action tensor([ 0.4401, -0.2215, 0.1399, -0.3456]) tensor([0.3687, 0.1902, 0.2730, 0.1680]) -Greedy action tensor([ 1.1167, -0.5149, 0.0583, -0.7443]) tensor([0.5889, 0.1152, 0.2043, 0.0916]) -Greedy action tensor([ 0.7270, -0.3379, 0.1155, -0.6120]) tensor([0.4652, 0.1604, 0.2524, 0.1219]) -Greedy action tensor([ 0.4012, 0.0579, 0.0970, -0.3663]) tensor([0.3435, 0.2437, 0.2534, 0.1594]) -Greedy action tensor([ 0.5859, -0.0224, 0.0989, -0.1324]) tensor([0.3779, 0.2057, 0.2322, 0.1842]) -Greedy action tensor([ 0.2026, -0.0534, 0.1640, -0.1788]) tensor([0.2925, 0.2264, 0.2814, 0.1997]) -Greedy action tensor([ 0.9023, -0.7939, -0.2012, -0.7543]) tensor([0.5862, 0.1075, 0.1944, 0.1118]) -Greedy action tensor([ 0.2863, -0.0300, -0.1571, -0.3887]) tensor([0.3472, 0.2531, 0.2229, 0.1768]) -Greedy action tensor([ 0.7518, -0.4777, 0.0107, -0.5837]) tensor([0.4921, 0.1439, 0.2345, 0.1294]) -Greedy action tensor([ 0.6624, -0.2630, 0.0133, -0.3547]) tensor([0.4385, 0.1738, 0.2291, 0.1586]) -Greedy action tensor([ 0.8077, -0.1282, 0.1088, -0.4590]) tensor([0.4606, 0.1807, 0.2290, 0.1298]) -Greedy action tensor([ 0.5725, -0.4391, 0.2100, -0.5605]) tensor([0.4199, 0.1527, 0.2922, 0.1352]) -Greedy action tensor([ 0.3767, -0.2167, 0.0482, -0.3572]) tensor([0.3633, 0.2007, 0.2616, 0.1744]) -Greedy action tensor([ 0.4292, -0.0045, 0.0072, -0.1369]) tensor([0.3482, 0.2257, 0.2284, 0.1977]) -Greedy action tensor([ 0.4117, 0.1500, 0.1144, -0.3081]) tensor([0.3334, 0.2566, 0.2477, 0.1623]) -Greedy action tensor([ 0.8438, -0.3081, 0.1448, -0.5416]) tensor([0.4846, 0.1532, 0.2409, 0.1213]) -Greedy action tensor([ 0.6423, -0.2865, 0.0014, -0.3512]) tensor([0.4363, 0.1723, 0.2298, 0.1615]) -Greedy action tensor([ 0.6946, -0.3829, -0.0353, -0.5031]) tensor([0.4708, 0.1603, 0.2269, 0.1421]) -Greedy action tensor([ 0.6102, -0.1107, 0.1529, -0.2962]) tensor([0.3963, 0.1927, 0.2508, 0.1601]) -Greedy action tensor([ 0.2231, 0.0010, 0.0462, -0.2865]) tensor([0.3087, 0.2472, 0.2587, 0.1854]) -Greedy action tensor([-0.1014, 0.0416, -0.0789, -0.1626]) tensor([0.2429, 0.2802, 0.2484, 0.2285]) -Greedy action tensor([ 0.5282, -0.2573, -0.0636, -0.3555]) tensor([0.4128, 0.1882, 0.2284, 0.1706]) -Greedy action tensor([ 0.5270, -0.1592, -0.0789, -0.0739]) tensor([0.3850, 0.1938, 0.2100, 0.2111]) -Greedy action tensor([ 0.8670, -0.5750, -0.1039, -0.3767]) tensor([0.5254, 0.1242, 0.1990, 0.1515]) -Greedy action tensor([ 0.3326, 0.1648, 0.1965, -0.2148]) tensor([0.3033, 0.2565, 0.2647, 0.1755]) -Greedy action tensor([ 0.5977, -0.0693, -0.0655, -0.2837]) tensor([0.4094, 0.2101, 0.2109, 0.1696]) -Greedy action tensor([ 0.5933, -0.2274, -0.1474, -0.5420]) tensor([0.4468, 0.1966, 0.2130, 0.1436]) -Greedy action tensor([ 0.5865, -0.1159, -0.1112, -0.4315]) tensor([0.4247, 0.2104, 0.2114, 0.1535]) -Greedy action tensor([ 0.9931, -0.6183, 0.0248, -0.8425]) tensor([0.5751, 0.1148, 0.2184, 0.0917]) -Greedy action tensor([ 0.8905, -0.7067, -0.1244, -0.3906]) tensor([0.5427, 0.1099, 0.1967, 0.1507]) -Greedy action tensor([ 0.4516, -0.0937, -0.0593, -0.2336]) tensor([0.3726, 0.2160, 0.2236, 0.1878]) -Greedy action tensor([ 0.2089, 0.1585, 0.0964, -0.2746]) tensor([0.2889, 0.2747, 0.2582, 0.1782]) -Greedy action tensor([ 0.6672, -0.0618, -0.0274, -0.5244]) tensor([0.4376, 0.2111, 0.2184, 0.1329]) -Greedy action tensor([ 0.3761, 0.0755, 0.0944, -0.1284]) tensor([0.3227, 0.2389, 0.2435, 0.1949]) -Greedy action tensor([ 0.7020, -0.1620, 0.0668, -0.3593]) tensor([0.4353, 0.1835, 0.2306, 0.1506]) -Greedy action tensor([ 0.4884, -0.1215, 0.1321, -0.1473]) tensor([0.3606, 0.1959, 0.2525, 0.1910]) -Greedy action tensor([ 0.8665, -0.4791, 0.0549, -0.6518]) tensor([0.5199, 0.1354, 0.2309, 0.1139]) -Greedy action tensor([ 0.6962, -0.5795, -0.1132, -0.6224]) tensor([0.5020, 0.1402, 0.2235, 0.1343]) -Greedy action tensor([ 0.2973, 0.1952, -0.0295, -0.2525]) tensor([0.3124, 0.2820, 0.2253, 0.1803]) -Greedy action tensor([ 0.2539, 0.0648, 0.0376, -0.3040]) tensor([0.3120, 0.2582, 0.2513, 0.1786]) -Greedy action tensor([ 0.7145, -0.4537, -0.0132, -0.2966]) tensor([0.4634, 0.1441, 0.2239, 0.1686]) -Greedy action tensor([ 0.3144, 0.0400, -0.0342, -0.1575]) tensor([0.3237, 0.2460, 0.2284, 0.2019]) -Greedy action tensor([ 0.4473, -0.0384, -0.1084, -0.2548]) tensor([0.3725, 0.2292, 0.2137, 0.1846]) -Greedy action tensor([ 0.3403, -0.1054, 0.2349, -0.2352]) tensor([0.3223, 0.2064, 0.2901, 0.1813]) -Greedy action tensor([ 0.7852, -0.4045, 0.1678, -0.6642]) tensor([0.4812, 0.1464, 0.2595, 0.1129]) -Greedy action tensor([ 0.3883, 0.0278, 0.0567, -0.4506]) tensor([0.3512, 0.2449, 0.2521, 0.1518]) -Greedy action tensor([ 0.5302, -0.2680, -0.0752, -0.3931]) tensor([0.4179, 0.1881, 0.2281, 0.1660]) -Greedy action tensor([ 0.3886, 0.0727, 0.1236, -0.2301]) tensor([0.3295, 0.2402, 0.2528, 0.1775]) -Greedy action tensor([ 0.6705, -0.1117, 0.0660, -0.5946]) tensor([0.4374, 0.2001, 0.2390, 0.1235]) -Greedy action tensor([ 0.5619, -0.2185, -0.1168, -0.4570]) tensor([0.4298, 0.1970, 0.2180, 0.1552]) -Greedy action tensor([ 0.5685, -0.3339, 0.0804, -0.4981]) tensor([0.4231, 0.1716, 0.2597, 0.1456]) -Greedy action tensor([ 0.5144, -0.0557, -0.0615, -0.2043]) tensor([0.3824, 0.2162, 0.2150, 0.1864]) -Greedy action tensor([ 0.6589, -0.5308, -0.2483, -0.5698]) tensor([0.4998, 0.1521, 0.2018, 0.1463]) -Greedy action tensor([ 0.9403, -0.3353, 0.0861, -0.7319]) tensor([0.5283, 0.1475, 0.2249, 0.0992]) -Greedy action tensor([ 0.5759, -0.0565, 0.1295, 0.5454]) tensor([0.3183, 0.1691, 0.2037, 0.3088]) -Greedy action tensor([ 1.3397, -0.4150, -0.5271, 0.9877]) tensor([0.4924, 0.0852, 0.0761, 0.3463]) -Greedy action tensor([ 1.4871, -0.0487, 0.1712, 1.4419]) tensor([0.4099, 0.0883, 0.1100, 0.3918]) -Greedy action tensor([ 1.0763, -0.9772, 1.4591, 0.5515]) tensor([0.3138, 0.0403, 0.4602, 0.1857]) -Greedy action tensor([ 1.2103, -0.5549, 0.8083, 1.4588]) tensor([0.3203, 0.0548, 0.2143, 0.4106]) -Greedy action tensor([0.6321, 0.2949, 0.1340, 1.2845]) tensor([0.2358, 0.1683, 0.1433, 0.4527]) -Greedy action tensor([ 0.8964, -1.3282, 0.1906, 0.6945]) tensor([0.4134, 0.0447, 0.2041, 0.3378]) -Greedy action tensor([-0.0402, 0.6005, 1.1466, -0.4548]) tensor([0.1463, 0.2777, 0.4794, 0.0966]) -Greedy action tensor([ 0.3529, 0.9244, -0.5130, 0.3284]) tensor([0.2399, 0.4250, 0.1009, 0.2342]) -Greedy action tensor([ 1.3212, 0.9021, -0.0066, 1.1525]) tensor([0.3613, 0.2376, 0.0958, 0.3053]) -Greedy action tensor([ 1.6553, -0.9540, 1.3839, 1.5515]) tensor([0.3653, 0.0269, 0.2785, 0.3293]) -Greedy action tensor([ 0.9088, -0.5700, -0.3113, 0.2093]) tensor([0.4951, 0.1128, 0.1461, 0.2460]) -Greedy action tensor([ 9.4856e-01, -1.6702e+00, 3.7763e-04, 1.5038e+00]) tensor([0.3122, 0.0228, 0.1210, 0.5440]) -Greedy action tensor([ 0.1577, 0.9195, -0.1494, -0.0397]) tensor([0.2128, 0.4559, 0.1566, 0.1747]) -Greedy action tensor([ 0.8829, 0.1766, -0.3957, 1.4078]) tensor([0.2888, 0.1425, 0.0804, 0.4882]) -Greedy action tensor([ 0.4518, 0.6371, -0.3438, 0.4452]) tensor([0.2741, 0.3299, 0.1237, 0.2723]) -Greedy action tensor([ 0.5946, -0.3942, 1.0452, 0.7519]) tensor([0.2432, 0.0905, 0.3817, 0.2846]) -Greedy action tensor([ 0.8574, 0.9004, 0.6875, -0.5928]) tensor([0.3203, 0.3344, 0.2702, 0.0751]) -Greedy action tensor([ 0.7283, 0.5740, -0.7431, 0.7304]) tensor([0.3237, 0.2775, 0.0743, 0.3244]) -Greedy action tensor([ 1.6843, -0.9758, 0.2956, 1.2712]) tensor([0.5048, 0.0353, 0.1259, 0.3340]) -Greedy action tensor([ 1.0456, -1.2127, -0.5438, 1.8982]) tensor([0.2736, 0.0286, 0.0558, 0.6419]) -Greedy action tensor([ 0.7042, -0.8205, 0.5165, 1.0655]) tensor([0.2872, 0.0625, 0.2381, 0.4122]) -Greedy action tensor([ 0.8278, 0.3546, -0.4909, 0.2973]) tensor([0.4034, 0.2513, 0.1079, 0.2373]) -Greedy action tensor([ 0.5326, -0.1005, 1.2925, 1.7132]) tensor([0.1444, 0.0767, 0.3087, 0.4702]) -Greedy action tensor([ 1.3282, -1.6344, 1.6825, 0.9140]) tensor([0.3187, 0.0165, 0.4542, 0.2106]) -Greedy action tensor([ 0.8729, 0.6899, -1.6925, -0.1455]) tensor([0.4404, 0.3667, 0.0339, 0.1590]) -Greedy action tensor([ 1.6166, -1.0610, 0.8115, 1.6480]) tensor([0.3925, 0.0270, 0.1755, 0.4050]) -Greedy action tensor([ 0.3903, 0.3462, -0.2520, 0.3689]) tensor([0.2889, 0.2764, 0.1520, 0.2827]) -Greedy action tensor([ 0.8485, -0.6088, -0.4145, 0.7982]) tensor([0.4054, 0.0944, 0.1147, 0.3855]) -Greedy action tensor([ 0.3723, -0.0750, 0.1543, 0.2497]) tensor([0.3005, 0.1921, 0.2416, 0.2658]) -Greedy action tensor([1.3985, 0.0783, 0.1700, 1.0117]) tensor([0.4466, 0.1193, 0.1307, 0.3034]) -Greedy action tensor([ 1.2295, -1.0208, 1.5360, 0.6255]) tensor([0.3321, 0.0350, 0.4513, 0.1816]) -Greedy action tensor([0.5696, 0.3693, 0.5166, 0.1339]) tensor([0.2929, 0.2398, 0.2778, 0.1895]) -Greedy action tensor([ 1.2342, -0.3380, -0.7710, 0.1176]) tensor([0.5990, 0.1243, 0.0806, 0.1961]) -Greedy action tensor([ 0.8107, -0.0758, 0.7081, 0.6048]) tensor([0.3196, 0.1317, 0.2885, 0.2602]) -Greedy action tensor([ 0.4325, 0.4833, 1.2609, -0.2099]) tensor([0.2054, 0.2161, 0.4704, 0.1081]) -Greedy action tensor([1.3147, 0.0174, 1.1335, 0.7058]) tensor([0.3771, 0.1031, 0.3146, 0.2052]) -Greedy action tensor([ 0.7256, -0.6422, -0.3010, 1.2449]) tensor([0.3036, 0.0773, 0.1088, 0.5103]) -Greedy action tensor([ 0.9998, 1.1283, -0.1126, -0.0688]) tensor([0.3560, 0.4048, 0.1170, 0.1223]) -Greedy action tensor([-0.2950, 0.7820, 0.7578, 0.1073]) tensor([0.1205, 0.3538, 0.3454, 0.1802]) -Greedy action tensor([ 0.8059, -0.9510, 0.1229, 1.2972]) tensor([0.3019, 0.0521, 0.1525, 0.4935]) -Greedy action tensor([ 0.5823, -0.4988, 0.9947, 1.5572]) tensor([0.1818, 0.0617, 0.2746, 0.4819]) -Greedy action tensor([ 1.6524, -0.3166, 0.8187, 0.9327]) tensor([0.4852, 0.0677, 0.2108, 0.2363]) -Greedy action tensor([1.1277, 0.1890, 0.2671, 1.1047]) tensor([0.3582, 0.1401, 0.1515, 0.3501]) -Greedy action tensor([ 0.9276, 0.2197, -0.1182, 1.9748]) tensor([0.2130, 0.1050, 0.0749, 0.6071]) -Greedy action tensor([ 0.6471, -2.0765, -0.0252, 0.0282]) tensor([0.4729, 0.0310, 0.2414, 0.2547]) -Greedy action tensor([ 1.4613, -0.4576, 0.9142, 1.3158]) tensor([0.3861, 0.0567, 0.2234, 0.3338]) -Greedy action tensor([ 0.3263, 0.1008, 0.2422, -0.3612]) tensor([0.3105, 0.2478, 0.2855, 0.1562]) -Greedy action tensor([ 0.3537, 0.2019, 0.5084, -0.1514]) tensor([0.2755, 0.2367, 0.3216, 0.1662]) -Greedy action tensor([ 0.9862, -1.4155, 0.2903, 0.0982]) tensor([0.4998, 0.0453, 0.2492, 0.2057]) -Greedy action tensor([ 0.7188, -0.2500, 0.3728, 0.9362]) tensor([0.3003, 0.1140, 0.2125, 0.3732]) -Greedy action tensor([ 1.7648, -0.0136, 0.7835, 1.4466]) tensor([0.4403, 0.0744, 0.1650, 0.3203]) -Greedy action tensor([ 1.0618, -0.0381, -0.3314, 1.0478]) tensor([0.3895, 0.1297, 0.0967, 0.3841]) -Greedy action tensor([ 0.7446, 0.2419, -0.5780, 0.6358]) tensor([0.3612, 0.2185, 0.0963, 0.3240]) -Greedy action tensor([ 1.2641, -0.9724, 0.7691, 0.8021]) tensor([0.4262, 0.0455, 0.2598, 0.2685]) -Greedy action tensor([ 1.4232, -0.3419, 1.0932, 1.2809]) tensor([0.3627, 0.0621, 0.2607, 0.3146]) -Greedy action tensor([ 0.9676, 0.0782, -0.5085, 0.0101]) tensor([0.4942, 0.2031, 0.1129, 0.1897]) -Greedy action tensor([ 1.4455, -0.9819, 0.6647, 0.5398]) tensor([0.5127, 0.0453, 0.2348, 0.2072]) -Greedy action tensor([ 0.1154, -1.1758, -0.2900, 0.9520]) tensor([0.2353, 0.0647, 0.1569, 0.5432]) -Greedy action tensor([ 1.1280, -0.3127, -0.6566, 0.2000]) tensor([0.5556, 0.1315, 0.0933, 0.2196]) -Greedy action tensor([ 1.2899, 1.0481, -0.9846, 1.3615]) tensor([0.3376, 0.2651, 0.0347, 0.3626]) -Greedy action tensor([0.6178, 0.3589, 0.3866, 0.0451]) tensor([0.3195, 0.2467, 0.2536, 0.1802]) -Greedy action tensor([ 1.0280, -1.2643, 0.7511, 0.3801]) tensor([0.4198, 0.0424, 0.3182, 0.2196]) -Greedy action tensor([ 0.3703, -0.1022, -0.0550, 0.7604]) tensor([0.2664, 0.1661, 0.1741, 0.3935]) -Greedy action tensor([ 0.4393, 1.0296, 0.4043, -0.2400]) tensor([0.2338, 0.4219, 0.2258, 0.1185]) -Greedy action tensor([ 0.4670, -0.7708, 0.4210, 0.6416]) tensor([0.2910, 0.0844, 0.2780, 0.3466]) -Greedy action tensor([-0.0149, 0.3245, -0.5065, -0.0528]) tensor([0.2514, 0.3529, 0.1537, 0.2420]) -Greedy action tensor([ 0.8477, -0.0353, 0.1216, 0.1750]) tensor([0.4154, 0.1718, 0.2009, 0.2120]) -Greedy action tensor([ 1.5185, -0.6204, -0.5785, 0.6908]) tensor([0.5961, 0.0702, 0.0732, 0.2605]) -Greedy action tensor([ 0.7536, -0.0166, -0.3874, 0.5587]) tensor([0.3838, 0.1777, 0.1226, 0.3159]) -Greedy action tensor([ 1.2217, 0.7497, -0.3858, 0.5432]) tensor([0.4289, 0.2675, 0.0859, 0.2176]) -Greedy action tensor([ 1.3216, -0.5087, 0.1672, 1.8170]) tensor([0.3209, 0.0515, 0.1011, 0.5265]) -Greedy action tensor([ 0.8216, -0.2408, -0.8189, 1.3086]) tensor([0.3158, 0.1091, 0.0612, 0.5139]) -Greedy action tensor([ 0.5178, -1.7173, 0.1787, 1.3636]) tensor([0.2410, 0.0258, 0.1717, 0.5615]) -Greedy action tensor([ 1.3152, -0.2931, 0.6618, 1.0821]) tensor([0.3980, 0.0797, 0.2071, 0.3152]) -Greedy action tensor([ 1.1402, 0.5018, -0.3723, 0.5531]) tensor([0.4339, 0.2292, 0.0956, 0.2412]) -Greedy action tensor([ 0.9954, -0.5344, 1.9897, 0.8087]) tensor([0.2106, 0.0456, 0.5691, 0.1747]) -Greedy action tensor([ 0.0163, 0.6922, 0.8527, -0.6269]) tensor([0.1724, 0.3390, 0.3980, 0.0906]) -Greedy action tensor([ 0.3860, 0.4436, -0.0012, -0.6776]) tensor([0.3243, 0.3435, 0.2202, 0.1120]) -Greedy action tensor([1.4621, 0.1868, 0.5929, 1.3565]) tensor([0.3849, 0.1075, 0.1614, 0.3463]) -Greedy action tensor([ 0.8490, 0.6410, -0.0229, 0.6015]) tensor([0.3321, 0.2697, 0.1389, 0.2593]) -Greedy action tensor([ 0.7895, -1.3080, 1.4656, 0.4603]) tensor([0.2626, 0.0322, 0.5163, 0.1889]) -Greedy action tensor([ 1.2054, -0.3512, -0.3108, -0.2631]) tensor([0.6022, 0.1270, 0.1322, 0.1387]) -Greedy action tensor([ 0.9381, -0.1464, -0.1274, -0.3151]) tensor([0.5081, 0.1718, 0.1751, 0.1451]) -Greedy action tensor([ 1.1837, -0.2642, -0.1205, -0.2652]) tensor([0.5743, 0.1350, 0.1559, 0.1349]) -Greedy action tensor([ 1.3552, -0.4863, -0.2480, -0.1690]) tensor([0.6339, 0.1005, 0.1276, 0.1381]) -Greedy action tensor([ 1.3262, -0.7293, -0.3189, 0.1712]) tensor([0.6112, 0.0783, 0.1180, 0.1926]) -Greedy action tensor([ 1.1573, -0.6773, -0.4666, 0.3472]) tensor([0.5550, 0.0886, 0.1094, 0.2469]) -Greedy action tensor([ 0.8115, -0.0397, -0.1564, -0.2116]) tensor([0.4616, 0.1971, 0.1754, 0.1659]) -Greedy action tensor([ 1.0086, -0.5410, -0.7388, 0.5099]) tensor([0.5015, 0.1065, 0.0874, 0.3046]) -Greedy action tensor([ 0.9185, -0.5570, -0.5338, 0.6986]) tensor([0.4414, 0.1009, 0.1033, 0.3543]) -Greedy action tensor([ 0.6819, -0.2967, -0.1414, 0.0692]) tensor([0.4243, 0.1595, 0.1863, 0.2299]) -Greedy action tensor([ 0.9585, -0.4659, -0.3441, -0.0271]) tensor([0.5303, 0.1276, 0.1442, 0.1979]) -Greedy action tensor([ 1.4149, -0.7372, -0.3925, 0.2197]) tensor([0.6317, 0.0734, 0.1037, 0.1912]) -Greedy action tensor([ 1.3560, -0.3483, -0.0211, -0.0904]) tensor([0.5989, 0.1089, 0.1511, 0.1410]) -Greedy action tensor([ 0.7493, -0.4104, -0.1706, -0.0180]) tensor([0.4595, 0.1441, 0.1831, 0.2133]) -Greedy action tensor([ 0.7916, -0.6024, -0.6003, 0.1985]) tensor([0.4880, 0.1211, 0.1213, 0.2697]) -Greedy action tensor([ 1.0775, -0.6468, -0.4913, 0.3061]) tensor([0.5408, 0.0964, 0.1127, 0.2501]) -Greedy action tensor([ 0.9478, -0.5141, -0.2520, 0.0511]) tensor([0.5152, 0.1194, 0.1552, 0.2102]) -Greedy action tensor([ 0.8756, -0.5101, -0.1288, -0.0968]) tensor([0.5013, 0.1254, 0.1836, 0.1896]) -Greedy action tensor([ 0.6845, -0.1208, -0.0711, -0.1019]) tensor([0.4216, 0.1884, 0.1980, 0.1920]) -Greedy action tensor([ 1.0898, -0.4907, -0.5882, 0.3114]) tensor([0.5400, 0.1112, 0.1008, 0.2480]) -Greedy action tensor([ 0.8923, -0.3525, -0.0648, 0.1334]) tensor([0.4673, 0.1346, 0.1794, 0.2188]) -Greedy action tensor([ 0.9866, -0.4174, -0.4810, 0.4215]) tensor([0.4891, 0.1201, 0.1127, 0.2780]) -Greedy action tensor([ 1.0553, -0.2532, 0.0604, -0.2821]) tensor([0.5256, 0.1420, 0.1944, 0.1380]) -Greedy action tensor([ 0.9276, -0.7089, -0.5784, 0.4787]) tensor([0.4867, 0.0947, 0.1079, 0.3107]) -Greedy action tensor([ 1.1939, -0.7480, -0.6369, 0.5746]) tensor([0.5429, 0.0779, 0.0870, 0.2922]) -Greedy action tensor([ 1.2578, -0.3790, -0.1197, -0.0940]) tensor([0.5863, 0.1141, 0.1479, 0.1517]) -Greedy action tensor([ 1.3148, -0.4894, -0.3256, 0.0504]) tensor([0.6094, 0.1003, 0.1182, 0.1721]) -Greedy action tensor([ 1.3093, -0.4851, -0.3301, -0.0455]) tensor([0.6179, 0.1027, 0.1199, 0.1594]) -Greedy action tensor([ 0.9262, -0.5952, -0.2010, 0.4230]) tensor([0.4658, 0.1017, 0.1509, 0.2816]) -Greedy action tensor([ 0.8329, -0.5885, -0.2657, 0.6399]) tensor([0.4168, 0.1006, 0.1389, 0.3437]) -Greedy action tensor([ 1.5324, -0.3412, -0.4466, 0.2747]) tensor([0.6345, 0.0974, 0.0877, 0.1804]) -Greedy action tensor([ 0.8145, -0.3572, -0.4112, -0.1718]) tensor([0.5060, 0.1568, 0.1485, 0.1887]) -Greedy action tensor([ 1.3508, -0.6310, -0.3833, 0.3312]) tensor([0.5970, 0.0823, 0.1054, 0.2154]) -Greedy action tensor([ 1.0169, -0.5259, -0.5593, 0.2795]) tensor([0.5266, 0.1126, 0.1089, 0.2519]) -Greedy action tensor([ 1.4050, -0.9728, -0.7059, 0.5568]) tensor([0.6090, 0.0565, 0.0738, 0.2608]) -Greedy action tensor([ 1.3443, -0.6981, -0.2206, 0.2980]) tensor([0.5917, 0.0768, 0.1237, 0.2078]) -Greedy action tensor([ 1.4476, -0.3093, -0.3150, 0.0028]) tensor([0.6329, 0.1092, 0.1086, 0.1492]) -Greedy action tensor([ 0.6880, -0.5177, -0.5857, 0.2795]) tensor([0.4456, 0.1335, 0.1247, 0.2962]) -Greedy action tensor([ 1.0345, -0.6609, -0.2680, 0.2156]) tensor([0.5274, 0.0968, 0.1434, 0.2325]) -Greedy action tensor([ 1.0467, -0.4311, -0.2336, -0.0319]) tensor([0.5417, 0.1236, 0.1506, 0.1842]) -Greedy action tensor([ 1.1639, -0.4608, -0.0976, -0.0915]) tensor([0.5665, 0.1116, 0.1605, 0.1614]) -Greedy action tensor([ 0.5149, -0.0007, -0.1673, -0.4056]) tensor([0.3998, 0.2388, 0.2021, 0.1593]) -Greedy action tensor([ 0.6564, -0.3989, -0.0081, -0.1478]) tensor([0.4329, 0.1507, 0.2227, 0.1937]) -Greedy action tensor([ 1.0707, -0.6631, -0.6491, 0.8286]) tensor([0.4671, 0.0825, 0.0837, 0.3667]) -Greedy action tensor([ 1.6770, -0.7881, -0.6184, 0.0641]) tensor([0.7220, 0.0614, 0.0727, 0.1439]) -Greedy action tensor([ 1.1384, -0.2288, -0.0160, -0.2935]) tensor([0.5528, 0.1409, 0.1743, 0.1320]) -Greedy action tensor([ 0.7193, -0.5332, -0.1335, -0.0085]) tensor([0.4556, 0.1302, 0.1942, 0.2200]) -Greedy action tensor([ 0.6150, -0.0929, -0.4395, 0.1016]) tensor([0.4099, 0.2020, 0.1428, 0.2453]) -Greedy action tensor([ 0.9126, -0.3843, -0.0569, 0.0383]) tensor([0.4831, 0.1321, 0.1832, 0.2015]) -Greedy action tensor([ 0.7193, -0.2552, -0.2758, 0.3707]) tensor([0.4077, 0.1539, 0.1507, 0.2877]) -Greedy action tensor([ 1.4649, -0.3904, -0.1203, 0.0959]) tensor([0.6189, 0.0968, 0.1268, 0.1574]) -Greedy action tensor([ 0.8660, -0.4566, -0.2767, 0.1215]) tensor([0.4854, 0.1293, 0.1548, 0.2305]) -Greedy action tensor([ 1.2905, -0.6906, -0.8542, 0.5737]) tensor([0.5736, 0.0791, 0.0672, 0.2801]) -Greedy action tensor([ 0.9670, -0.5556, -0.4137, 0.0442]) tensor([0.5356, 0.1168, 0.1347, 0.2129]) -Greedy action tensor([ 0.5123, -0.4112, -0.5447, 0.2131]) tensor([0.4022, 0.1597, 0.1398, 0.2982]) -Greedy action tensor([ 1.0666, -0.5479, -0.5648, 0.4744]) tensor([0.5134, 0.1022, 0.1005, 0.2840]) -Greedy action tensor([ 0.9512, -0.6020, -0.4394, 0.0989]) tensor([0.5300, 0.1121, 0.1319, 0.2260]) -Greedy action tensor([ 1.0485, -0.4914, -0.0421, -0.0596]) tensor([0.5317, 0.1140, 0.1787, 0.1756]) -Greedy action tensor([ 1.2709, -0.0804, -0.0866, -0.2120]) tensor([0.5737, 0.1485, 0.1476, 0.1302]) -Greedy action tensor([ 0.8225, -0.3493, -0.0304, -0.1820]) tensor([0.4757, 0.1474, 0.2027, 0.1742]) -Greedy action tensor([ 1.5004, -0.6296, -0.3280, 0.2012]) tensor([0.6442, 0.0766, 0.1035, 0.1757]) -Greedy action tensor([ 0.1371, -0.0645, -0.2760, 0.0538]) tensor([0.2942, 0.2405, 0.1946, 0.2707]) -Greedy action tensor([ 0.9197, -0.6855, -0.4566, 0.1676]) tensor([0.5195, 0.1044, 0.1312, 0.2449]) -Greedy action tensor([ 1.4875, -0.4027, -0.2763, 0.1547]) tensor([0.6305, 0.0952, 0.1080, 0.1663]) -Greedy action tensor([ 0.8984, -0.6107, -0.1515, 0.2793]) tensor([0.4741, 0.1048, 0.1659, 0.2552]) -Greedy action tensor([ 1.3995, -0.4507, -0.2349, 0.0589]) tensor([0.6196, 0.0974, 0.1209, 0.1621]) -Greedy action tensor([ 1.1621, -0.6869, -0.6505, 0.5773]) tensor([0.5325, 0.0838, 0.0869, 0.2967]) -Greedy action tensor([ 0.9258, -0.7063, -0.7198, 0.5384]) tensor([0.4837, 0.0946, 0.0933, 0.3284]) -Greedy action tensor([ 0.6803, -0.4961, -0.5208, 0.3685]) tensor([0.4271, 0.1317, 0.1285, 0.3127]) -Greedy action tensor([ 0.8798, 0.0645, 0.0099, -0.0117]) tensor([0.4402, 0.1948, 0.1845, 0.1805]) -Greedy action tensor([ 0.9726, -0.6379, -0.3784, 0.2717]) tensor([0.5115, 0.1022, 0.1325, 0.2538]) -Greedy action tensor([ 1.2871, -0.6469, -0.2452, 0.2821]) tensor([0.5792, 0.0837, 0.1251, 0.2120]) -Greedy action tensor([ 1.0401e+00, -4.3224e-01, 5.6891e-04, -2.0931e-01]) tensor([0.5349, 0.1227, 0.1891, 0.1533]) -Greedy action tensor([ 1.0589, -0.3783, -0.1519, 0.1374]) tensor([0.5172, 0.1229, 0.1541, 0.2058]) -Greedy action tensor([ 1.0459, -0.5514, -0.2061, 0.0881]) tensor([0.5342, 0.1081, 0.1527, 0.2050]) -Greedy action tensor([ 0.8175, -0.4752, -0.6202, 0.7205]) tensor([0.4133, 0.1135, 0.0981, 0.3751]) -Greedy action tensor([ 0.2837, -0.2528, -0.1978, -0.0324]) tensor([0.3411, 0.1995, 0.2107, 0.2487]) -Greedy action tensor([ 1.3711, -0.8424, -0.3544, 0.1506]) tensor([0.6319, 0.0691, 0.1125, 0.1865]) -Greedy action tensor([ 0.8243, -0.4422, -0.1604, 0.2617]) tensor([0.4494, 0.1267, 0.1679, 0.2561]) -Greedy action tensor([ 0.9869, 0.1292, 0.0296, -0.6382]) tensor([0.4988, 0.2115, 0.1915, 0.0982]) -Greedy action tensor([ 1.2730, -0.5894, -0.3060, 0.0805]) tensor([0.6006, 0.0933, 0.1238, 0.1823]) -Greedy action tensor([-1.3296, -0.3372, 0.0073, -0.1034]) tensor([0.0916, 0.2472, 0.3489, 0.3123]) -Greedy action tensor([-1.6231, -0.7030, 0.8203, 0.5660]) tensor([0.0418, 0.1048, 0.4807, 0.3728]) -Greedy action tensor([-1.9463, -0.5027, 0.9443, 0.2257]) tensor([0.0312, 0.1323, 0.5623, 0.2741]) -Greedy action tensor([-1.7103, -0.7533, 0.1203, -0.5464]) tensor([0.0767, 0.1996, 0.4782, 0.2455]) -Greedy action tensor([-1.3000, -0.2847, 0.6851, 0.9392]) tensor([0.0490, 0.1351, 0.3564, 0.4595]) -Greedy action tensor([-1.4760, -0.5046, 0.7210, -0.4174]) tensor([0.0644, 0.1702, 0.5797, 0.1857]) -Greedy action tensor([-1.9160, -0.4124, 0.9965, 0.5134]) tensor([0.0284, 0.1276, 0.5220, 0.3220]) -Greedy action tensor([-1.7347, -0.0578, 0.5475, 0.1233]) tensor([0.0443, 0.2371, 0.4344, 0.2842]) -Greedy action tensor([-1.3621, -0.5467, 0.3664, 0.2523]) tensor([0.0718, 0.1624, 0.4047, 0.3611]) -Greedy action tensor([-1.8438, -0.4813, 0.6204, -0.1161]) tensor([0.0449, 0.1753, 0.5274, 0.2525]) -Greedy action tensor([-0.9255, -0.4118, 0.6926, 1.3060]) tensor([0.0587, 0.0982, 0.2962, 0.5469]) -Greedy action tensor([-1.6770, -0.4677, 0.3501, 0.1366]) tensor([0.0553, 0.1854, 0.4200, 0.3393]) -Greedy action tensor([-1.9028, -0.9926, 1.0233, -0.1125]) tensor([0.0355, 0.0883, 0.6631, 0.2130]) -Greedy action tensor([-0.8749, -0.5512, 0.2005, 0.2703]) tensor([0.1182, 0.1635, 0.3466, 0.3717]) -Greedy action tensor([-0.9017, -0.4935, 0.4706, -0.3190]) tensor([0.1214, 0.1826, 0.4787, 0.2174]) -Greedy action tensor([-1.9348, -0.5618, 0.6728, -0.1070]) tensor([0.0404, 0.1596, 0.5485, 0.2515]) -Greedy action tensor([-1.8401, -0.4845, 0.6309, -0.0887]) tensor([0.0445, 0.1726, 0.5265, 0.2564]) -Greedy action tensor([-0.0026, -0.1056, 0.6755, 1.0454]) tensor([0.1487, 0.1342, 0.2930, 0.4241]) -Greedy action tensor([-1.9207, -0.4900, 1.1865, 0.6456]) tensor([0.0247, 0.1031, 0.5513, 0.3210]) -Greedy action tensor([-1.8045, -0.6347, 0.8267, 0.2674]) tensor([0.0384, 0.1236, 0.5332, 0.3048]) -Greedy action tensor([-1.6342, -0.5467, 0.5156, -0.1873]) tensor([0.0595, 0.1766, 0.5109, 0.2530]) -Greedy action tensor([-0.9602, -0.0994, 0.2482, -0.2849]) tensor([0.1152, 0.2725, 0.3858, 0.2264]) -Greedy action tensor([-1.6748, -0.4757, 0.5672, 0.1076]) tensor([0.0508, 0.1686, 0.4784, 0.3021]) -Greedy action tensor([-1.9501, -0.8309, 0.2768, -0.2280]) tensor([0.0528, 0.1618, 0.4898, 0.2956]) -Greedy action tensor([-1.5667, -0.5922, 1.2424, 1.0617]) tensor([0.0293, 0.0777, 0.4867, 0.4063]) -Greedy action tensor([-1.3461, -0.3563, 1.0367, 1.1022]) tensor([0.0383, 0.1031, 0.4152, 0.4433]) -Greedy action tensor([-1.2888, -0.7200, 0.3161, 0.3118]) tensor([0.0787, 0.1391, 0.3919, 0.3902]) -Greedy action tensor([-1.7832, -0.6492, 0.3542, -0.2334]) tensor([0.0578, 0.1797, 0.4901, 0.2723]) -Greedy action tensor([-1.6959, -0.4671, 0.5628, 0.0093]) tensor([0.0513, 0.1753, 0.4911, 0.2823]) -Greedy action tensor([-0.5507, -0.5418, 0.1883, 0.1806]) tensor([0.1618, 0.1632, 0.3388, 0.3362]) -Greedy action tensor([-1.6418, -0.6874, 0.1928, -0.2805]) tensor([0.0727, 0.1887, 0.4551, 0.2835]) -Greedy action tensor([-1.8927, -0.4625, 0.6368, -0.1436]) tensor([0.0426, 0.1780, 0.5345, 0.2449]) -Greedy action tensor([-1.2007, -0.4591, 0.5446, 1.0084]) tensor([0.0558, 0.1171, 0.3194, 0.5078]) -Greedy action tensor([-1.4860, -0.5449, 0.5627, 0.4607]) tensor([0.0546, 0.1398, 0.4233, 0.3823]) -Greedy action tensor([-1.3012, -0.3171, 0.3250, 0.1019]) tensor([0.0780, 0.2086, 0.3964, 0.3171]) -Greedy action tensor([-1.8883, -0.6286, 0.9098, 0.2113]) tensor([0.0344, 0.1211, 0.5640, 0.2805]) -Greedy action tensor([-1.9205, -0.4501, 0.6510, -0.1673]) tensor([0.0413, 0.1797, 0.5405, 0.2385]) -Greedy action tensor([-1.9357, -0.9599, 0.3661, -0.1874]) tensor([0.0516, 0.1368, 0.5153, 0.2963]) -Greedy action tensor([-1.1028, -0.5435, -0.0114, 0.3441]) tensor([0.1002, 0.1753, 0.2985, 0.4259]) -Greedy action tensor([-1.8197, -0.4797, 0.5958, -0.0827]) tensor([0.0461, 0.1760, 0.5160, 0.2618]) -Greedy action tensor([-1.1736, -0.5632, 0.2858, 0.2754]) tensor([0.0877, 0.1615, 0.3774, 0.3735]) -Greedy action tensor([-1.8995, -0.4568, 0.6475, -0.1500]) tensor([0.0421, 0.1782, 0.5376, 0.2422]) -Greedy action tensor([-0.8859, -0.5673, 0.2528, 0.1063]) tensor([0.1220, 0.1678, 0.3810, 0.3291]) -Greedy action tensor([-2.0330, -0.8619, 0.8291, 0.0642]) tensor([0.0335, 0.1080, 0.5859, 0.2727]) -Greedy action tensor([-1.7460, 0.1252, 0.3595, -0.2055]) tensor([0.0491, 0.3188, 0.4030, 0.2291]) -Greedy action tensor([-0.9452, -0.6062, 0.2345, 0.1919]) tensor([0.1140, 0.1600, 0.3708, 0.3553]) -Greedy action tensor([-1.0366, -0.6319, 0.3087, 0.9631]) tensor([0.0729, 0.1092, 0.2797, 0.5382]) -Greedy action tensor([-1.9259, -0.6474, 1.0563, 0.4359]) tensor([0.0286, 0.1028, 0.5648, 0.3037]) -Greedy action tensor([-1.8382, -0.4411, 0.6401, -0.0643]) tensor([0.0437, 0.1769, 0.5215, 0.2578]) -Greedy action tensor([-1.6110, -0.6602, 0.7740, 0.4832]) tensor([0.0443, 0.1147, 0.4812, 0.3598]) -Greedy action tensor([-0.8587, -0.1011, 0.2612, 0.0445]) tensor([0.1154, 0.2462, 0.3537, 0.2847]) -Greedy action tensor([-0.6775, -0.3734, 1.0317, 1.5940]) tensor([0.0569, 0.0771, 0.3144, 0.5516]) -Greedy action tensor([-1.8057, -0.4884, 0.5935, -0.1042]) tensor([0.0471, 0.1759, 0.5188, 0.2582]) -Greedy action tensor([-0.9325, -0.6276, 0.1886, 0.3638]) tensor([0.1101, 0.1494, 0.3379, 0.4026]) -Greedy action tensor([-0.8302, -0.5110, 0.1940, 0.4445]) tensor([0.1144, 0.1575, 0.3187, 0.4094]) -Greedy action tensor([-1.8743, -0.4691, 0.7028, 0.1525]) tensor([0.0387, 0.1578, 0.5096, 0.2939]) -Greedy action tensor([-1.6714, -0.5474, 0.5301, -0.0123]) tensor([0.0544, 0.1675, 0.4920, 0.2860]) -Greedy action tensor([-0.9347, 0.9563, 0.3112, -0.2226]) tensor([0.0761, 0.5042, 0.2645, 0.1551]) -Greedy action tensor([-1.8891, -0.6022, 0.9984, 0.3303]) tensor([0.0315, 0.1140, 0.5649, 0.2896]) -Greedy action tensor([-1.2666, -0.3721, 0.8869, 1.1147]) tensor([0.0437, 0.1069, 0.3765, 0.4729]) -Greedy action tensor([-1.6961, -0.4832, 0.5309, 0.0372]) tensor([0.0518, 0.1743, 0.4805, 0.2933]) -Greedy action tensor([-1.5760, -0.4737, 0.0600, -0.4173]) tensor([0.0811, 0.2442, 0.4164, 0.2583]) -Greedy action tensor([-1.4184, -0.5756, 0.1507, -0.3335]) tensor([0.0902, 0.2096, 0.4332, 0.2670]) -Greedy action tensor([-1.6488, -1.0675, 0.0044, -0.7990]) tensor([0.0966, 0.1728, 0.5046, 0.2260]) -Greedy action tensor([-1.6309, -0.6024, 0.6102, 0.2398]) tensor([0.0508, 0.1420, 0.4775, 0.3297]) -Greedy action tensor([-1.8516, -0.6359, 0.8055, 0.2371]) tensor([0.0375, 0.1263, 0.5338, 0.3024]) -Greedy action tensor([-1.5331, -0.5586, 0.5706, 0.3161]) tensor([0.0549, 0.1456, 0.4503, 0.3491]) -Greedy action tensor([-1.4503, -0.5802, 0.5050, 0.2104]) tensor([0.0636, 0.1519, 0.4496, 0.3349]) -Greedy action tensor([-0.8244, 0.4316, 0.2136, -0.1019]) tensor([0.1064, 0.3738, 0.3006, 0.2192]) -Greedy action tensor([-1.3642, -0.2278, 0.4327, 0.4898]) tensor([0.0605, 0.1885, 0.3648, 0.3862]) -Greedy action tensor([-1.1113, -0.7073, 0.3238, -0.1221]) tensor([0.1065, 0.1596, 0.4474, 0.2865]) -Greedy action tensor([-1.6315, -0.5116, 0.5298, 0.1538]) tensor([0.0535, 0.1638, 0.4641, 0.3187]) -Greedy action tensor([-1.1743, -0.5800, 0.2757, 0.2420]) tensor([0.0893, 0.1618, 0.3807, 0.3681]) -Greedy action tensor([-0.9181, -0.3583, 0.5159, -0.4930]) tensor([0.1180, 0.2065, 0.4950, 0.1805]) -Greedy action tensor([-1.3236, -0.7313, 0.1136, 0.0367]) tensor([0.0916, 0.1657, 0.3856, 0.3571]) -Greedy action tensor([-1.0299, -0.5559, 0.2833, 0.6526]) tensor([0.0854, 0.1373, 0.3177, 0.4596]) -Greedy action tensor([-1.3935, -0.5255, 0.4096, 0.6572]) tensor([0.0581, 0.1383, 0.3523, 0.4513]) -Greedy action tensor([-0.5434, -0.1413, 0.7766, 1.4387]) tensor([0.0741, 0.1108, 0.2774, 0.5378]) -Greedy action tensor([-1.0390, -0.6198, 0.6266, -0.1349]) tensor([0.0973, 0.1480, 0.5145, 0.2403]) -Greedy action tensor([-1.0332, -0.4781, 0.3382, 0.6341]) tensor([0.0835, 0.1454, 0.3289, 0.4422]) -Greedy action tensor([-1.0632, -0.0744, -0.2994, 0.1310]) tensor([0.1095, 0.2942, 0.2349, 0.3613]) -Greedy action tensor([ 0.5281, -0.0848, -0.0824, -0.2849]) tensor([0.3955, 0.2143, 0.2148, 0.1754]) -Greedy action tensor([ 0.3535, 0.1986, 0.1079, -0.2643]) tensor([0.3147, 0.2695, 0.2461, 0.1697]) -Greedy action tensor([ 0.3468, 0.0448, 0.0114, -0.1797]) tensor([0.3284, 0.2428, 0.2348, 0.1940]) -Greedy action tensor([ 0.4079, -0.2063, 0.0274, -0.3630]) tensor([0.3721, 0.2014, 0.2543, 0.1721]) -Greedy action tensor([ 0.4646, -0.1758, -0.0334, -0.4748]) tensor([0.3959, 0.2087, 0.2406, 0.1548]) -Greedy action tensor([ 0.3987, 0.0909, 0.0429, -0.1980]) tensor([0.3349, 0.2461, 0.2346, 0.1844]) -Greedy action tensor([ 0.5349, -0.2577, -0.0212, -0.4411]) tensor([0.4162, 0.1884, 0.2386, 0.1568]) -Greedy action tensor([ 1.2098, -0.7926, 0.0243, -0.7228]) tensor([0.6308, 0.0852, 0.1928, 0.0913]) -Greedy action tensor([ 0.3353, -0.2492, 0.0558, -0.2262]) tensor([0.3468, 0.1933, 0.2622, 0.1978]) -Greedy action tensor([ 1.0313, -0.6021, -0.0186, -0.7422]) tensor([0.5831, 0.1139, 0.2041, 0.0990]) -Greedy action tensor([ 3.7665e-01, -1.4759e-01, 1.1162e-04, -3.2605e-01]) tensor([0.3606, 0.2135, 0.2474, 0.1786]) -Greedy action tensor([ 0.4781, -0.2909, 0.1825, -0.3012]) tensor([0.3750, 0.1738, 0.2791, 0.1720]) -Greedy action tensor([ 0.2644, -0.1478, 0.0170, -0.2554]) tensor([0.3292, 0.2180, 0.2570, 0.1957]) -Greedy action tensor([ 0.6599, -0.4456, -0.1329, -0.4206]) tensor([0.4710, 0.1559, 0.2132, 0.1599]) -Greedy action tensor([ 0.7935, -0.6120, 0.2802, -0.7671]) tensor([0.4869, 0.1194, 0.2914, 0.1023]) -Greedy action tensor([ 0.2975, -0.0464, 0.1508, -0.1656]) tensor([0.3123, 0.2214, 0.2697, 0.1965]) -Greedy action tensor([ 0.6772, -0.5841, -0.2139, -0.5232]) tensor([0.5014, 0.1420, 0.2057, 0.1509]) -Greedy action tensor([ 0.6353, -0.0989, 0.1366, -0.2526]) tensor([0.4002, 0.1921, 0.2431, 0.1647]) -Greedy action tensor([ 0.3452, 0.1325, 0.0108, -0.2077]) tensor([0.3226, 0.2608, 0.2309, 0.1856]) -Greedy action tensor([ 0.6234, -0.0416, -0.0401, -0.0569]) tensor([0.3944, 0.2028, 0.2031, 0.1997]) -Greedy action tensor([ 0.6084, -0.3972, 0.1626, -0.6582]) tensor([0.4371, 0.1599, 0.2799, 0.1232]) -Greedy action tensor([ 0.2328, 0.1058, 0.1177, -0.2188]) tensor([0.2934, 0.2584, 0.2615, 0.1868]) -Greedy action tensor([ 0.4288, -0.2332, 0.2238, -0.4097]) tensor([0.3619, 0.1867, 0.2949, 0.1565]) -Greedy action tensor([ 0.4098, 0.1101, 0.0613, -0.2378]) tensor([0.3367, 0.2495, 0.2376, 0.1762]) -Greedy action tensor([ 0.3373, 0.0013, -0.0387, -0.5651]) tensor([0.3563, 0.2546, 0.2446, 0.1445]) -Greedy action tensor([ 0.6336, -0.1983, -0.0996, -0.4061]) tensor([0.4407, 0.1918, 0.2117, 0.1558]) -Greedy action tensor([ 0.9106, -0.3658, 0.0189, -0.6790]) tensor([0.5283, 0.1474, 0.2166, 0.1078]) -Greedy action tensor([ 0.8384, -0.4230, -0.0573, -0.5493]) tensor([0.5151, 0.1459, 0.2103, 0.1286]) -Greedy action tensor([ 0.7294, -0.2025, 0.0204, -0.2993]) tensor([0.4458, 0.1755, 0.2194, 0.1593]) -Greedy action tensor([ 0.3570, 0.0253, -0.0248, -0.0480]) tensor([0.3260, 0.2340, 0.2226, 0.2174]) -Greedy action tensor([ 0.4137, 0.0081, -0.0615, -0.3335]) tensor([0.3621, 0.2413, 0.2251, 0.1715]) -Greedy action tensor([ 0.8666, -0.4752, 0.0219, -0.3967]) tensor([0.5066, 0.1324, 0.2177, 0.1432]) -Greedy action tensor([ 0.9875, -0.2900, 0.0922, -0.4844]) tensor([0.5217, 0.1454, 0.2131, 0.1197]) -Greedy action tensor([ 0.4191, -0.0510, 0.0179, -0.2557]) tensor([0.3567, 0.2229, 0.2388, 0.1816]) -Greedy action tensor([ 0.3476, -0.3010, -0.1692, -0.5071]) tensor([0.3930, 0.2055, 0.2344, 0.1672]) -Greedy action tensor([ 0.9035, -0.0453, -0.0342, -0.5146]) tensor([0.4948, 0.1916, 0.1937, 0.1198]) -Greedy action tensor([ 0.4262, -0.2534, 0.1025, -0.2377]) tensor([0.3643, 0.1846, 0.2635, 0.1875]) -Greedy action tensor([ 0.4992, -0.2907, -0.0421, -0.4134]) tensor([0.4103, 0.1862, 0.2388, 0.1647]) -Greedy action tensor([ 0.6237, -0.1120, -0.1509, -0.4701]) tensor([0.4396, 0.2106, 0.2026, 0.1472]) -Greedy action tensor([ 0.5920, -0.6630, 0.0685, -0.6827]) tensor([0.4636, 0.1322, 0.2746, 0.1296]) -Greedy action tensor([ 0.6176, -0.1560, -0.1059, -0.2295]) tensor([0.4211, 0.1942, 0.2042, 0.1805]) -Greedy action tensor([ 0.9022, -0.5387, 0.1249, -0.7074]) tensor([0.5273, 0.1248, 0.2424, 0.1054]) -Greedy action tensor([ 0.2950, 0.0201, -0.0282, -0.3641]) tensor([0.3333, 0.2531, 0.2412, 0.1724]) -Greedy action tensor([ 0.4598, -0.0980, -0.0344, -0.1873]) tensor([0.3695, 0.2115, 0.2254, 0.1935]) -Greedy action tensor([ 0.2020, 0.0610, 0.0073, -0.3110]) tensor([0.3039, 0.2639, 0.2502, 0.1820]) -Greedy action tensor([ 0.4877, 0.1039, 0.0506, -0.2545]) tensor([0.3567, 0.2430, 0.2304, 0.1698]) -Greedy action tensor([ 0.5519, -0.0386, -0.0537, -0.3165]) tensor([0.3969, 0.2199, 0.2166, 0.1666]) -Greedy action tensor([ 0.5546, -0.3577, 0.0172, -0.5324]) tensor([0.4305, 0.1729, 0.2515, 0.1452]) -Greedy action tensor([ 0.5939, -0.3573, -0.0238, -0.2859]) tensor([0.4273, 0.1651, 0.2304, 0.1773]) -Greedy action tensor([ 0.4717, -0.0398, 0.1351, -0.2629]) tensor([0.3580, 0.2146, 0.2557, 0.1717]) -Greedy action tensor([ 0.5831, -0.2245, -0.0955, -0.3265]) tensor([0.4245, 0.1893, 0.2153, 0.1709]) -Greedy action tensor([ 0.9059, -0.5806, 0.2439, -0.6862]) tensor([0.5140, 0.1163, 0.2651, 0.1046]) -Greedy action tensor([ 0.3338, -0.0886, 0.0845, -0.2070]) tensor([0.3314, 0.2173, 0.2583, 0.1930]) -Greedy action tensor([ 0.3092, -0.0536, -0.0725, -0.0299]) tensor([0.3235, 0.2251, 0.2209, 0.2305]) -Greedy action tensor([ 0.4145, -0.0405, 0.1137, -0.4678]) tensor([0.3586, 0.2275, 0.2654, 0.1484]) -Greedy action tensor([ 0.0652, -0.0265, 0.1223, -0.0548]) tensor([0.2592, 0.2365, 0.2744, 0.2299]) -Greedy action tensor([ 0.8946, -0.6362, 0.0997, -0.6196]) tensor([0.5297, 0.1146, 0.2392, 0.1165]) -Greedy action tensor([ 0.3081, 0.0847, 0.1059, -0.2381]) tensor([0.3129, 0.2503, 0.2556, 0.1812]) -Greedy action tensor([ 0.3212, 0.0140, -0.1538, -0.1662]) tensor([0.3365, 0.2475, 0.2093, 0.2067]) -Greedy action tensor([ 0.7435, -0.0208, -0.0931, -0.2925]) tensor([0.4437, 0.2066, 0.1922, 0.1575]) -Greedy action tensor([ 1.0009, -0.4917, -0.0913, -0.5226]) tensor([0.5624, 0.1264, 0.1887, 0.1226]) -Greedy action tensor([ 0.5636, -0.0471, -0.1183, -0.2839]) tensor([0.4037, 0.2192, 0.2041, 0.1730]) -Greedy action tensor([ 0.7461, -0.1919, -0.0475, -0.3030]) tensor([0.4558, 0.1784, 0.2061, 0.1597]) -Greedy action tensor([ 0.3740, -0.2085, 0.1348, -0.2799]) tensor([0.3489, 0.1949, 0.2747, 0.1815]) -Greedy action tensor([ 0.6076, -0.2412, 0.0257, -0.4388]) tensor([0.4277, 0.1830, 0.2390, 0.1502]) -Greedy action tensor([ 0.4739, -0.3748, -0.0836, -0.3862]) tensor([0.4126, 0.1766, 0.2363, 0.1746]) -Greedy action tensor([ 0.7466, -0.5165, 0.2165, -0.5540]) tensor([0.4665, 0.1319, 0.2745, 0.1271]) -Greedy action tensor([ 0.3280, 0.0879, 0.0658, -0.2342]) tensor([0.3199, 0.2516, 0.2461, 0.1823]) -Greedy action tensor([ 0.7619, -0.1367, -0.0417, -0.1171]) tensor([0.4405, 0.1793, 0.1972, 0.1829]) -Greedy action tensor([ 0.4695, -0.2151, -0.0808, -0.1786]) tensor([0.3840, 0.1936, 0.2215, 0.2009]) -Greedy action tensor([ 0.7278, -0.3505, -0.0515, -0.2854]) tensor([0.4625, 0.1573, 0.2122, 0.1679]) -Greedy action tensor([ 0.2690, 0.1970, 0.1653, -0.2120]) tensor([0.2899, 0.2697, 0.2613, 0.1792]) -Greedy action tensor([ 0.4274, -0.2202, 0.0654, -0.2641]) tensor([0.3676, 0.1924, 0.2560, 0.1841]) -Greedy action tensor([ 0.5220, -0.3688, 0.1439, -0.3868]) tensor([0.4002, 0.1642, 0.2742, 0.1613]) -Greedy action tensor([ 0.2193, 0.1259, -0.0374, -0.2336]) tensor([0.3012, 0.2743, 0.2330, 0.1915]) -Greedy action tensor([ 0.8764, -0.5107, -0.0780, -0.3818]) tensor([0.5211, 0.1302, 0.2006, 0.1481]) -Greedy action tensor([ 0.7263, -0.5376, -0.1041, -0.4754]) tensor([0.4953, 0.1399, 0.2159, 0.1489]) -Greedy action tensor([ 0.6051, -0.3956, -0.0314, -0.2816]) tensor([0.4331, 0.1592, 0.2292, 0.1784]) -Greedy action tensor([ 0.4995, 0.1123, -0.0411, 0.0032]) tensor([0.3484, 0.2366, 0.2029, 0.2121]) -Greedy action tensor([ 0.6896, -0.6126, 0.0203, -0.5831]) tensor([0.4845, 0.1317, 0.2481, 0.1357]) -Greedy action tensor([ 1.0614, -0.6200, -0.0310, -0.5999]) tensor([0.5843, 0.1088, 0.1960, 0.1110]) -Greedy action tensor([ 0.2221, 0.1453, -0.6726, 0.5653]) tensor([0.2671, 0.2473, 0.1092, 0.3764]) -Greedy action tensor([ 0.7108, -0.1734, -0.1142, 1.1749]) tensor([0.2905, 0.1200, 0.1273, 0.4621]) -Greedy action tensor([ 1.0119, -0.7803, 1.1072, 1.4062]) tensor([0.2667, 0.0444, 0.2933, 0.3956]) -Greedy action tensor([ 0.2105, 0.1107, 1.1472, -0.3661]) tensor([0.1993, 0.1803, 0.5084, 0.1120]) -Greedy action tensor([ 0.7029, 0.0600, -0.2148, 0.0411]) tensor([0.4096, 0.2154, 0.1636, 0.2113]) -Greedy action tensor([ 0.3396, -0.1602, 1.0698, 0.6549]) tensor([0.1979, 0.1201, 0.4108, 0.2713]) -Greedy action tensor([ 0.8351, 0.0122, -0.5071, 1.4676]) tensor([0.2791, 0.1226, 0.0729, 0.5254]) -Greedy action tensor([ 1.6335, -0.8858, 1.2522, 0.6530]) tensor([0.4676, 0.0376, 0.3194, 0.1754]) -Greedy action tensor([ 1.3600, -1.4235, 0.7246, 0.9513]) tensor([0.4433, 0.0274, 0.2348, 0.2945]) -Greedy action tensor([1.3609, 0.0275, 0.4396, 0.7655]) tensor([0.4519, 0.1191, 0.1799, 0.2491]) -Greedy action tensor([ 0.4610, -0.6003, -0.3228, 0.9930]) tensor([0.2853, 0.0987, 0.1303, 0.4857]) -Greedy action tensor([0.4575, 0.0701, 0.7062, 0.0805]) tensor([0.2742, 0.1861, 0.3516, 0.1881]) -Greedy action tensor([ 0.9326, -0.5252, -0.1245, 0.0366]) tensor([0.5029, 0.1170, 0.1747, 0.2053]) -Greedy action tensor([-0.0208, -1.3489, -0.9518, 0.9561]) tensor([0.2317, 0.0614, 0.0913, 0.6155]) -Greedy action tensor([ 0.3075, -0.7069, 0.5131, 0.9649]) tensor([0.2212, 0.0802, 0.2717, 0.4269]) -Greedy action tensor([ 0.4064, -0.5876, 0.3943, 1.4360]) tensor([0.1939, 0.0718, 0.1915, 0.5428]) -Greedy action tensor([ 1.3492, -0.3108, -0.5280, 1.6190]) tensor([0.3770, 0.0717, 0.0577, 0.4937]) -Greedy action tensor([ 1.0156, -0.8396, 0.9443, 1.0304]) tensor([0.3223, 0.0504, 0.3001, 0.3271]) -Greedy action tensor([1.9372, 0.4985, 0.1237, 0.5689]) tensor([0.6043, 0.1434, 0.0985, 0.1538]) -Greedy action tensor([0.4532, 0.0222, 0.0394, 0.0125]) tensor([0.3385, 0.2200, 0.2238, 0.2178]) -Greedy action tensor([ 1.4463, -0.2045, 0.5963, 1.0788]) tensor([0.4326, 0.0830, 0.1849, 0.2995]) -Greedy action tensor([ 1.7364, -0.9205, 0.6186, 1.7976]) tensor([0.4065, 0.0285, 0.1329, 0.4321]) -Greedy action tensor([ 0.8142, -0.1810, 0.4475, 0.8831]) tensor([0.3191, 0.1179, 0.2211, 0.3418]) -Greedy action tensor([ 0.4170, 0.4332, -0.6897, 0.3716]) tensor([0.3028, 0.3077, 0.1001, 0.2893]) -Greedy action tensor([ 1.6501, -1.1776, 0.4330, 1.4179]) tensor([0.4656, 0.0275, 0.1378, 0.3691]) -Greedy action tensor([ 0.7721, -1.1469, 0.5008, 1.2591]) tensor([0.2828, 0.0415, 0.2156, 0.4602]) -Greedy action tensor([ 0.8593, -0.3693, 0.9419, 0.2644]) tensor([0.3413, 0.0999, 0.3706, 0.1882]) -Greedy action tensor([ 0.9796, -1.1391, 1.4486, 0.9040]) tensor([0.2743, 0.0330, 0.4384, 0.2543]) -Greedy action tensor([ 1.0696, -0.7105, 0.8348, 1.2029]) tensor([0.3224, 0.0544, 0.2549, 0.3683]) -Greedy action tensor([ 0.6866, -1.0587, -0.4678, 0.9406]) tensor([0.3599, 0.0628, 0.1134, 0.4639]) -Greedy action tensor([ 1.1194, -0.3539, 0.9881, 0.6088]) tensor([0.3695, 0.0847, 0.3241, 0.2218]) -Greedy action tensor([ 1.4201, -0.2180, 1.7190, 0.8182]) tensor([0.3236, 0.0629, 0.4363, 0.1772]) -Greedy action tensor([ 1.2358, -0.2572, 1.4744, 0.7078]) tensor([0.3243, 0.0729, 0.4116, 0.1912]) -Greedy action tensor([ 0.6686, -0.2664, -0.0038, -0.2021]) tensor([0.4307, 0.1691, 0.2199, 0.1803]) -Greedy action tensor([ 0.2824, -0.5092, 1.1664, 0.4217]) tensor([0.1991, 0.0902, 0.4819, 0.2288]) -Greedy action tensor([ 0.8821, -0.3892, -0.1202, 1.3447]) tensor([0.3091, 0.0867, 0.1134, 0.4908]) -Greedy action tensor([ 1.2942, -0.1586, 0.9464, 0.3549]) tensor([0.4290, 0.1004, 0.3030, 0.1677]) -Greedy action tensor([-0.0723, -0.5456, -0.4575, 1.1742]) tensor([0.1730, 0.1077, 0.1177, 0.6016]) -Greedy action tensor([ 0.3716, -0.0902, 0.6069, -0.3993]) tensor([0.2978, 0.1877, 0.3768, 0.1378]) -Greedy action tensor([ 1.1919, -0.1360, 0.2344, 1.5817]) tensor([0.3199, 0.0848, 0.1228, 0.4725]) -Greedy action tensor([-0.1638, -0.0116, 0.1502, 0.3485]) tensor([0.1922, 0.2238, 0.2631, 0.3208]) -Greedy action tensor([ 0.0420, 0.5239, 0.8132, -0.4093]) tensor([0.1846, 0.2988, 0.3991, 0.1175]) -Greedy action tensor([0.9979, 0.7795, 0.2883, 0.3215]) tensor([0.3566, 0.2867, 0.1754, 0.1813]) -Greedy action tensor([ 1.0128, -0.1129, -0.5362, 0.4223]) tensor([0.4783, 0.1552, 0.1016, 0.2650]) -Greedy action tensor([ 0.7790, -0.0872, 0.5406, 0.4257]) tensor([0.3436, 0.1445, 0.2707, 0.2413]) -Greedy action tensor([7.5450e-01, 2.8006e-01, 6.1268e-04, 1.1874e+00]) tensor([0.2751, 0.1712, 0.1295, 0.4242]) -Greedy action tensor([ 0.6460, 0.4337, -0.9419, 1.3082]) tensor([0.2530, 0.2046, 0.0517, 0.4906]) -Greedy action tensor([-0.4756, 0.3588, 0.5981, 0.2792]) tensor([0.1197, 0.2756, 0.3502, 0.2545]) -Greedy action tensor([ 0.2078, 0.6351, -0.6132, 0.5693]) tensor([0.2268, 0.3478, 0.0998, 0.3256]) -Greedy action tensor([ 1.7269, -0.5586, 1.4744, 1.9398]) tensor([0.3210, 0.0326, 0.2493, 0.3971]) -Greedy action tensor([ 0.6665, -0.4212, 1.5306, 0.6456]) tensor([0.2133, 0.0719, 0.5060, 0.2088]) -Greedy action tensor([ 0.6414, 0.0706, -0.0672, 0.8242]) tensor([0.3069, 0.1734, 0.1511, 0.3685]) -Greedy action tensor([ 0.6242, -0.4358, -0.1573, 0.6076]) tensor([0.3587, 0.1243, 0.1642, 0.3528]) -Greedy action tensor([ 0.8066, 0.2327, -0.5208, 0.4034]) tensor([0.4005, 0.2256, 0.1062, 0.2676]) -Greedy action tensor([ 0.4147, -1.7423, -0.2554, 0.1015]) tensor([0.4240, 0.0490, 0.2170, 0.3100]) -Greedy action tensor([-0.1086, -0.2465, -0.5302, 0.7659]) tensor([0.2031, 0.1769, 0.1332, 0.4868]) -Greedy action tensor([-0.0314, -0.8666, 0.3757, 1.2598]) tensor([0.1521, 0.0660, 0.2286, 0.5533]) -Greedy action tensor([1.0320, 0.1102, 0.5992, 1.1952]) tensor([0.3102, 0.1234, 0.2012, 0.3652]) -Greedy action tensor([ 0.4682, 0.5142, -0.4114, 1.6627]) tensor([0.1735, 0.1817, 0.0720, 0.5729]) -Greedy action tensor([ 0.1042, -0.6256, 0.2494, 0.4987]) tensor([0.2426, 0.1169, 0.2805, 0.3599]) -Greedy action tensor([0.7354, 0.1727, 0.9528, 0.4460]) tensor([0.2808, 0.1600, 0.3490, 0.2102]) -Greedy action tensor([1.1744, 0.3055, 0.2077, 1.4440]) tensor([0.3216, 0.1349, 0.1223, 0.4211]) -Greedy action tensor([ 1.0748, -1.1137, 1.3257, 0.3184]) tensor([0.3488, 0.0391, 0.4483, 0.1637]) -Greedy action tensor([ 0.0939, -0.2239, 0.2590, 0.2837]) tensor([0.2429, 0.1768, 0.2865, 0.2937]) -Greedy action tensor([ 1.1590, -0.4150, 0.7477, 1.5177]) tensor([0.3029, 0.0628, 0.2008, 0.4336]) -Greedy action tensor([ 0.2521, -0.9814, -0.0755, 1.3092]) tensor([0.2045, 0.0596, 0.1474, 0.5886]) -Greedy action tensor([ 0.8790, -0.0258, 0.0511, -0.9711]) tensor([0.5003, 0.2024, 0.2186, 0.0787]) -Greedy action tensor([ 1.9229, -0.6465, 1.1045, 0.9788]) tensor([0.5244, 0.0402, 0.2314, 0.2040]) -Greedy action tensor([ 0.8637, -0.3610, -0.3306, 1.2196]) tensor([0.3307, 0.0972, 0.1002, 0.4720]) -Greedy action tensor([ 1.6046, -0.5275, 1.4170, 1.3981]) tensor([0.3622, 0.0430, 0.3002, 0.2946]) -Greedy action tensor([ 1.5172, -0.8761, 0.6885, 1.7520]) tensor([0.3581, 0.0327, 0.1563, 0.4529]) -Greedy action tensor([ 1.3004, -0.4443, 0.5457, 1.0579]) tensor([0.4116, 0.0719, 0.1935, 0.3230]) -Greedy action tensor([ 0.5038, 0.4018, -0.2236, -0.0054]) tensor([0.3347, 0.3023, 0.1617, 0.2012]) -Greedy action tensor([ 0.5141, -1.0680, 0.2436, 0.5243]) tensor([0.3357, 0.0690, 0.2561, 0.3391]) -Greedy action tensor([ 0.6990, -0.8303, 0.2798, -0.2281]) tensor([0.4405, 0.0955, 0.2897, 0.1743]) -Greedy action tensor([ 0.9830, -0.1253, -0.2622, 0.5757]) tensor([0.4379, 0.1446, 0.1261, 0.2914]) -Greedy action tensor([ 1.0594, 0.7235, -0.8080, 0.8266]) tensor([0.3757, 0.2685, 0.0581, 0.2977]) -Greedy action tensor([ 1.2927, -0.9764, 1.1132, 0.1748]) tensor([0.4413, 0.0456, 0.3688, 0.1443]) -Greedy action tensor([ 1.2198, -1.4364, -0.0088, 0.6541]) tensor([0.5179, 0.0364, 0.1516, 0.2941]) -Greedy action tensor([ 1.2570, -0.2514, 0.7089, 0.5424]) tensor([0.4369, 0.0967, 0.2526, 0.2138]) -Greedy action tensor([ 0.5131, 0.4694, -0.7270, 1.8588]) tensor([0.1643, 0.1573, 0.0475, 0.6309]) -Greedy action tensor([ 0.9162, -0.6040, 0.9213, 1.4607]) tensor([0.2533, 0.0554, 0.2546, 0.4367]) -Greedy action tensor([ 0.6771, -0.1528, 0.0055, 0.0339]) tensor([0.4044, 0.1764, 0.2066, 0.2126]) -Greedy action tensor([ 9.8045e-01, -3.8538e-01, -2.4221e-01, 3.6784e-04]) tensor([0.5195, 0.1326, 0.1530, 0.1950]) -Greedy action tensor([ 0.7458, -0.5750, -0.1195, 0.1500]) tensor([0.4466, 0.1192, 0.1880, 0.2462]) -Greedy action tensor([ 1.3001, -0.6229, -0.3535, -0.0303]) tensor([0.6243, 0.0912, 0.1195, 0.1650]) -Greedy action tensor([ 1.3728, -0.5113, -0.2805, 0.4190]) tensor([0.5785, 0.0879, 0.1107, 0.2229]) -Greedy action tensor([ 1.2332, -0.3159, -0.1138, 0.1277]) tensor([0.5545, 0.1178, 0.1442, 0.1836]) -Greedy action tensor([ 1.0271, -0.3877, -0.5710, 0.4271]) tensor([0.5015, 0.1218, 0.1014, 0.2752]) -Greedy action tensor([ 0.5975, -0.4598, 0.0124, -0.0404]) tensor([0.4110, 0.1428, 0.2290, 0.2172]) -Greedy action tensor([ 1.0798, -0.6126, -0.3699, 0.8309]) tensor([0.4549, 0.0837, 0.1067, 0.3546]) -Greedy action tensor([ 0.9404, -0.0657, -0.1663, -0.1175]) tensor([0.4894, 0.1789, 0.1618, 0.1699]) -Greedy action tensor([ 0.2963, -0.2319, -0.3531, 0.0518]) tensor([0.3454, 0.2037, 0.1804, 0.2705]) -Greedy action tensor([ 0.9567, 0.0793, 0.0438, -0.2177]) tensor([0.4703, 0.1956, 0.1888, 0.1453]) -Greedy action tensor([ 1.2946, -0.4634, -0.1947, -0.1432]) tensor([0.6115, 0.1054, 0.1379, 0.1452]) -Greedy action tensor([ 1.3782, -0.8578, -0.6449, 0.6892]) tensor([0.5743, 0.0614, 0.0759, 0.2884]) -Greedy action tensor([ 0.8231, -0.3942, -0.6618, 0.1979]) tensor([0.4860, 0.1439, 0.1101, 0.2601]) -Greedy action tensor([ 0.3604, 0.1286, 0.1370, -0.2344]) tensor([0.3180, 0.2522, 0.2543, 0.1754]) -Greedy action tensor([ 0.8337, -0.4463, -0.0182, 0.1057]) tensor([0.4571, 0.1271, 0.1950, 0.2207]) -Greedy action tensor([ 0.9152, -0.2100, -0.2277, -0.4385]) tensor([0.5258, 0.1707, 0.1677, 0.1358]) -Greedy action tensor([ 0.8716, -0.3211, -0.4948, 0.6979]) tensor([0.4168, 0.1265, 0.1063, 0.3504]) -Greedy action tensor([ 0.6657, -0.3376, -0.3716, 0.4761]) tensor([0.3924, 0.1439, 0.1391, 0.3246]) -Greedy action tensor([ 0.4838, -0.3543, -0.2379, 0.0866]) tensor([0.3860, 0.1669, 0.1876, 0.2595]) -Greedy action tensor([ 1.0768, -0.7248, -0.6596, 0.3177]) tensor([0.5527, 0.0912, 0.0974, 0.2587]) -Greedy action tensor([ 1.5228, -0.3555, -0.3061, -0.0498]) tensor([0.6575, 0.1005, 0.1056, 0.1364]) -Greedy action tensor([ 1.4019, -0.5299, -0.4634, 0.1380]) tensor([0.6320, 0.0916, 0.0979, 0.1786]) -Greedy action tensor([ 1.1351, -0.6870, -0.2972, 0.4445]) tensor([0.5258, 0.0850, 0.1255, 0.2636]) -Greedy action tensor([ 1.1050, -0.6924, -0.4835, 0.9024]) tensor([0.4573, 0.0758, 0.0934, 0.3735]) -Greedy action tensor([ 0.8256, -0.4578, 0.1040, -0.1188]) tensor([0.4647, 0.1288, 0.2258, 0.1807]) -Greedy action tensor([ 1.0844, -0.3564, -0.1355, 0.3053]) tensor([0.5023, 0.1189, 0.1483, 0.2305]) -Greedy action tensor([ 0.4010, -0.1966, -0.5624, 0.2371]) tensor([0.3596, 0.1978, 0.1372, 0.3053]) -Greedy action tensor([ 0.9008, -0.4883, -0.3533, 0.3044]) tensor([0.4795, 0.1196, 0.1368, 0.2641]) -Greedy action tensor([ 1.2852, -0.6293, -0.2685, 0.5889]) tensor([0.5384, 0.0794, 0.1139, 0.2684]) -Greedy action tensor([ 0.9057, -0.5065, -0.6414, -0.0410]) tensor([0.5422, 0.1321, 0.1154, 0.2104]) -Greedy action tensor([ 0.5387, -0.3235, -0.1132, -0.0264]) tensor([0.3981, 0.1681, 0.2075, 0.2263]) -Greedy action tensor([ 0.7539, -0.2735, -0.4636, -0.2452]) tensor([0.4945, 0.1770, 0.1464, 0.1821]) -Greedy action tensor([ 0.4763, -0.1838, -0.5361, 0.0030]) tensor([0.3995, 0.2065, 0.1452, 0.2489]) -Greedy action tensor([ 1.0975, -0.5305, -0.2367, 0.0333]) tensor([0.5541, 0.1088, 0.1459, 0.1912]) -Greedy action tensor([ 1.1483, -0.1804, -0.1104, -0.1847]) tensor([0.5517, 0.1461, 0.1567, 0.1455]) -Greedy action tensor([ 0.9448, -0.5924, -0.3009, 0.2867]) tensor([0.4949, 0.1064, 0.1424, 0.2563]) -Greedy action tensor([ 1.7466, -0.4489, -0.4757, -0.0895]) tensor([0.7251, 0.0807, 0.0786, 0.1156]) -Greedy action tensor([ 0.1657, 0.2060, -0.5318, -0.3125]) tensor([0.3166, 0.3296, 0.1576, 0.1962]) -Greedy action tensor([ 1.1808, -0.7321, -0.2195, 0.1144]) tensor([0.5752, 0.0849, 0.1418, 0.1980]) -Greedy action tensor([ 0.4045, -0.2240, -0.2636, -0.2050]) tensor([0.3862, 0.2060, 0.1980, 0.2099]) -Greedy action tensor([ 0.7178, -0.2248, -0.1109, -0.0841]) tensor([0.4396, 0.1713, 0.1919, 0.1972]) -Greedy action tensor([ 0.8446, 0.2291, 0.1726, -0.3542]) tensor([0.4251, 0.2297, 0.2171, 0.1282]) -Greedy action tensor([ 1.2761, -0.6058, -0.1219, 0.0568]) tensor([0.5900, 0.0899, 0.1458, 0.1743]) -Greedy action tensor([ 0.9003, -0.5976, -0.2448, 0.1661]) tensor([0.4946, 0.1106, 0.1574, 0.2374]) -Greedy action tensor([ 1.0122, -0.4221, -0.1404, 0.0042]) tensor([0.5211, 0.1242, 0.1646, 0.1902]) -Greedy action tensor([ 0.5939, -0.3725, -0.2337, -0.2533]) tensor([0.4452, 0.1694, 0.1946, 0.1908]) -Greedy action tensor([ 0.4383, -0.3389, 0.0404, -0.1756]) tensor([0.3742, 0.1720, 0.2513, 0.2025]) -Greedy action tensor([ 1.2850, -0.1068, -0.0506, 0.0042]) tensor([0.5588, 0.1389, 0.1470, 0.1553]) -Greedy action tensor([ 0.9194, -0.3432, -0.4399, -0.1389]) tensor([0.5300, 0.1500, 0.1361, 0.1839]) -Greedy action tensor([ 1.0220, -0.2815, -0.2923, 0.3143]) tensor([0.4919, 0.1336, 0.1321, 0.2424]) -Greedy action tensor([ 0.7094, -0.5057, -0.0714, 0.1670]) tensor([0.4281, 0.1270, 0.1961, 0.2488]) -Greedy action tensor([ 0.7875, -0.1786, 0.0202, -0.2019]) tensor([0.4511, 0.1717, 0.2094, 0.1677]) -Greedy action tensor([ 1.1154, -0.4855, -0.1396, 0.2984]) tensor([0.5185, 0.1046, 0.1478, 0.2291]) -Greedy action tensor([ 0.9908, -0.5430, -0.7723, 0.2276]) tensor([0.5396, 0.1164, 0.0925, 0.2515]) -Greedy action tensor([ 0.8358, -0.1726, -0.1013, 0.3799]) tensor([0.4183, 0.1526, 0.1639, 0.2652]) -Greedy action tensor([ 1.1109, -0.4825, -0.4007, 0.1076]) tensor([0.5585, 0.1135, 0.1232, 0.2048]) -Greedy action tensor([ 1.4370, -0.5233, -0.5753, 0.2821]) tensor([0.6291, 0.0886, 0.0841, 0.1982]) -Greedy action tensor([ 0.7977, -0.2391, -0.1857, -0.0664]) tensor([0.4651, 0.1649, 0.1740, 0.1960]) -Greedy action tensor([ 1.0089, -0.6150, -0.5509, 0.8484]) tensor([0.4427, 0.0873, 0.0930, 0.3770]) -Greedy action tensor([ 0.7584, 0.0240, -0.1554, -0.1396]) tensor([0.4370, 0.2097, 0.1752, 0.1780]) -Greedy action tensor([ 1.2043, -0.6876, -0.3235, 0.1320]) tensor([0.5848, 0.0882, 0.1269, 0.2001]) -Greedy action tensor([ 0.9558, -0.2694, -0.1738, -0.0253]) tensor([0.5021, 0.1475, 0.1623, 0.1882]) -Greedy action tensor([ 1.1443, -0.4783, -0.0973, -0.2291]) tensor([0.5749, 0.1135, 0.1661, 0.1456]) -Greedy action tensor([ 0.9214, -0.4581, -0.0050, -0.2557]) tensor([0.5113, 0.1287, 0.2025, 0.1576]) -Greedy action tensor([ 1.8984, -1.2858, -0.3203, 0.6869]) tensor([0.6906, 0.0286, 0.0751, 0.2056]) -Greedy action tensor([ 1.2117, -0.4377, -0.4452, 0.3236]) tensor([0.5573, 0.1071, 0.1063, 0.2293]) -Greedy action tensor([ 0.9582, -0.5012, 0.1037, 0.1601]) tensor([0.4744, 0.1102, 0.2018, 0.2136]) -Greedy action tensor([ 0.8414, -0.5223, -0.3707, 0.5540]) tensor([0.4341, 0.1110, 0.1292, 0.3257]) -Greedy action tensor([ 0.6215, -0.5491, -0.1148, 0.0644]) tensor([0.4234, 0.1313, 0.2027, 0.2425]) -Greedy action tensor([ 1.2216, -0.5336, -0.1433, 0.4630]) tensor([0.5273, 0.0911, 0.1347, 0.2469]) -Greedy action tensor([ 0.8617, -0.0743, -0.3012, -0.2028]) tensor([0.4879, 0.1913, 0.1525, 0.1683]) -Greedy action tensor([ 1.4964, -0.4579, -0.2998, -0.2203]) tensor([0.6724, 0.0953, 0.1116, 0.1208]) -Greedy action tensor([ 0.9957, -0.5681, -0.3042, 0.7330]) tensor([0.4443, 0.0930, 0.1211, 0.3416]) -Greedy action tensor([ 1.1808, -0.5786, -0.3347, 0.1703]) tensor([0.5695, 0.0980, 0.1251, 0.2073]) -Greedy action tensor([ 1.0239, -0.4031, -0.3609, 0.0349]) tensor([0.5369, 0.1289, 0.1344, 0.1997]) -Greedy action tensor([ 1.2827, -0.7573, -0.6558, 0.6890]) tensor([0.5476, 0.0712, 0.0788, 0.3024]) -Greedy action tensor([ 1.0940, -0.1504, -0.0148, 0.0151]) tensor([0.5107, 0.1471, 0.1685, 0.1736]) -Greedy action tensor([ 1.0301, -0.4042, -0.1664, 0.1387]) tensor([0.5127, 0.1222, 0.1550, 0.2102]) -Greedy action tensor([ 0.9612, -0.3115, -0.0579, 0.0131]) tensor([0.4930, 0.1381, 0.1779, 0.1910]) -Greedy action tensor([-1.0154, 0.2563, 0.2402, 0.3800]) tensor([0.0826, 0.2945, 0.2898, 0.3332]) -Greedy action tensor([-1.3305, -0.5141, 0.3761, 0.2667]) tensor([0.0729, 0.1650, 0.4019, 0.3602]) -Greedy action tensor([-0.6145, -0.3442, 0.8641, 1.4000]) tensor([0.0705, 0.0923, 0.3091, 0.5282]) -Greedy action tensor([-1.5728, -0.6379, 0.7400, 0.0567]) tensor([0.0533, 0.1358, 0.5388, 0.2721]) -Greedy action tensor([-1.8052, -0.7041, 0.0810, -0.3029]) tensor([0.0663, 0.1993, 0.4369, 0.2976]) -Greedy action tensor([-1.2087, -0.5904, 0.2852, 0.2228]) tensor([0.0870, 0.1614, 0.3875, 0.3641]) -Greedy action tensor([-1.0823, -0.5004, 0.6642, 1.2383]) tensor([0.0535, 0.0957, 0.3066, 0.5443]) -Greedy action tensor([-0.7865, -0.0202, -0.1788, -0.0396]) tensor([0.1409, 0.3031, 0.2587, 0.2973]) -Greedy action tensor([-1.7012, -0.7346, 0.5238, -0.0261]) tensor([0.0549, 0.1443, 0.5078, 0.2930]) -Greedy action tensor([-1.1247, -0.3997, 0.3313, 0.5045]) tensor([0.0803, 0.1658, 0.3444, 0.4095]) -Greedy action tensor([-1.9820, -0.7464, 1.5050, 0.8066]) tensor([0.0187, 0.0644, 0.6123, 0.3045]) -Greedy action tensor([-1.9649, -0.6361, 1.4258, 0.7601]) tensor([0.0201, 0.0760, 0.5971, 0.3069]) -Greedy action tensor([-2.0242, -0.7827, 0.7718, -0.0487]) tensor([0.0357, 0.1234, 0.5839, 0.2570]) -Greedy action tensor([-1.0576, -0.3263, 0.6529, 0.9349]) tensor([0.0627, 0.1303, 0.3470, 0.4600]) -Greedy action tensor([-1.8270, -0.5156, 0.7441, -0.1188]) tensor([0.0429, 0.1592, 0.5611, 0.2368]) -Greedy action tensor([-1.8011, -0.4617, 0.5882, -0.1006]) tensor([0.0472, 0.1800, 0.5144, 0.2583]) -Greedy action tensor([-1.9547, -0.4252, 0.9938, 0.4306]) tensor([0.0281, 0.1298, 0.5365, 0.3055]) -Greedy action tensor([-1.3172, -0.5278, 0.3480, 0.3788]) tensor([0.0717, 0.1580, 0.3792, 0.3911]) -Greedy action tensor([-1.7123, -0.6417, 0.1483, -0.3907]) tensor([0.0710, 0.2070, 0.4561, 0.2660]) -Greedy action tensor([-1.0490, -0.3880, 0.1826, -0.3336]) tensor([0.1189, 0.2303, 0.4075, 0.2432]) -Greedy action tensor([-1.5139, -0.3072, 0.4487, 0.0358]) tensor([0.0618, 0.2067, 0.4402, 0.2913]) -Greedy action tensor([-1.9275, -0.9129, 0.6870, 0.1465]) tensor([0.0394, 0.1087, 0.5383, 0.3136]) -Greedy action tensor([-1.7244, -0.5126, 0.6164, 0.0310]) tensor([0.0487, 0.1636, 0.5060, 0.2818]) -Greedy action tensor([-0.3589, -0.3969, 0.1777, 0.2842]) tensor([0.1794, 0.1727, 0.3067, 0.3412]) -Greedy action tensor([-2.0175, -0.9328, 0.3411, -0.2150]) tensor([0.0485, 0.1436, 0.5134, 0.2944]) -Greedy action tensor([-1.9118, -0.4629, 0.6605, -0.1527]) tensor([0.0414, 0.1762, 0.5420, 0.2404]) -Greedy action tensor([-1.4599, -0.5387, 0.4138, 0.2000]) tensor([0.0654, 0.1644, 0.4261, 0.3441]) -Greedy action tensor([-1.0582, -0.6077, 0.2499, 0.2457]) tensor([0.1005, 0.1577, 0.3717, 0.3701]) -Greedy action tensor([-1.4813, -0.5559, 0.4959, 0.2135]) tensor([0.0618, 0.1558, 0.4461, 0.3364]) -Greedy action tensor([-1.6811, -0.4880, 0.5391, -0.0155]) tensor([0.0532, 0.1754, 0.4900, 0.2814]) -Greedy action tensor([-1.0083, -0.5812, 0.2820, 0.1877]) tensor([0.1056, 0.1618, 0.3836, 0.3491]) -Greedy action tensor([-1.2120, -0.5346, 0.6369, -0.4447]) tensor([0.0871, 0.1716, 0.5536, 0.1877]) -Greedy action tensor([-0.5851, 0.7657, -0.8078, -0.0112]) tensor([0.1345, 0.5192, 0.1076, 0.2387]) -Greedy action tensor([-0.7519, -0.5184, 0.2430, 0.1887]) tensor([0.1328, 0.1678, 0.3592, 0.3402]) -Greedy action tensor([-0.7721, 1.0215, 0.3006, 0.2205]) tensor([0.0792, 0.4758, 0.2314, 0.2136]) -Greedy action tensor([-1.6545, -1.0639, -0.0036, -0.7934]) tensor([0.0963, 0.1739, 0.5020, 0.2279]) -Greedy action tensor([-1.4906, -0.3391, 0.4511, -0.4332]) tensor([0.0714, 0.2257, 0.4975, 0.2054]) -Greedy action tensor([-1.2973, -0.5294, 0.3275, 0.2619]) tensor([0.0770, 0.1659, 0.3909, 0.3661]) -Greedy action tensor([-0.7511, 0.3370, 0.2169, 0.4638]) tensor([0.1003, 0.2977, 0.2640, 0.3380]) -Greedy action tensor([-1.1043, -0.5038, 0.5092, 0.8121]) tensor([0.0683, 0.1245, 0.3429, 0.4642]) -Greedy action tensor([-1.8729, -0.4702, 0.6436, -0.1088]) tensor([0.0429, 0.1746, 0.5318, 0.2506]) -Greedy action tensor([-1.5952, -0.3682, 0.8063, 0.7561]) tensor([0.0385, 0.1314, 0.4254, 0.4046]) -Greedy action tensor([-1.9605, -0.9245, 0.3033, -0.1812]) tensor([0.0516, 0.1455, 0.4968, 0.3060]) -Greedy action tensor([-1.8795, -0.9279, 0.6765, -0.0795]) tensor([0.0444, 0.1150, 0.5720, 0.2686]) -Greedy action tensor([-0.5584, -0.5334, 0.3032, 0.4025]) tensor([0.1427, 0.1463, 0.3378, 0.3731]) -Greedy action tensor([-0.9569, -0.6137, 0.2578, 0.2152]) tensor([0.1110, 0.1565, 0.3741, 0.3585]) -Greedy action tensor([-1.7609, -0.4926, 0.5688, -0.0331]) tensor([0.0489, 0.1738, 0.5022, 0.2751]) -Greedy action tensor([-1.9228, -0.4105, 1.0360, 0.5795]) tensor([0.0270, 0.1226, 0.5206, 0.3298]) -Greedy action tensor([-0.8315, -0.3515, 0.4651, 1.2308]) tensor([0.0707, 0.1143, 0.2587, 0.5563]) -Greedy action tensor([-1.4524, -0.6209, 0.5294, 0.1491]) tensor([0.0645, 0.1481, 0.4677, 0.3198]) -Greedy action tensor([-1.4827, -0.3219, 0.9193, 0.9426]) tensor([0.0377, 0.1203, 0.4161, 0.4259]) -Greedy action tensor([-0.7053, -0.0462, 0.6710, 1.3843]) tensor([0.0668, 0.1291, 0.2645, 0.5397]) -Greedy action tensor([-1.7408, -0.7261, 0.1880, -0.4280]) tensor([0.0697, 0.1921, 0.4793, 0.2589]) -Greedy action tensor([-1.1808, -0.5074, 0.3558, 0.4561]) tensor([0.0784, 0.1538, 0.3646, 0.4031]) -Greedy action tensor([-1.7891, -0.4104, 0.5761, -0.0774]) tensor([0.0473, 0.1877, 0.5033, 0.2618]) -Greedy action tensor([-1.6982, -0.4995, 0.5318, -0.0291]) tensor([0.0528, 0.1752, 0.4914, 0.2805]) -Greedy action tensor([-1.7873, -0.3407, 0.5850, -0.0409]) tensor([0.0461, 0.1957, 0.4940, 0.2642]) -Greedy action tensor([-1.6671, -0.7015, 0.1437, -0.2753]) tensor([0.0727, 0.1908, 0.4443, 0.2922]) -Greedy action tensor([-0.6646, -0.5065, 0.2563, -0.0009]) tensor([0.1509, 0.1768, 0.3791, 0.2931]) -Greedy action tensor([-1.5281, -1.0513, 0.0093, -0.8328]) tensor([0.1079, 0.1738, 0.5020, 0.2163]) -Greedy action tensor([-0.8414, -0.5181, 0.3998, 0.2267]) tensor([0.1143, 0.1579, 0.3953, 0.3325]) -Greedy action tensor([-1.3809, -0.5744, 0.3656, 0.1542]) tensor([0.0734, 0.1645, 0.4211, 0.3409]) -Greedy action tensor([ 0.0719, -0.0064, 0.9151, 1.5353]) tensor([0.1167, 0.1079, 0.2712, 0.5042]) -Greedy action tensor([-1.6136, -0.3796, 0.6665, 0.4673]) tensor([0.0450, 0.1546, 0.4400, 0.3605]) -Greedy action tensor([-1.8987, -0.4580, 0.6477, -0.1461]) tensor([0.0421, 0.1778, 0.5372, 0.2429]) -Greedy action tensor([-1.9973, -0.6209, 1.0604, 0.3537]) tensor([0.0272, 0.1078, 0.5792, 0.2857]) -Greedy action tensor([-1.4378, -0.5030, 1.3825, 1.1927]) tensor([0.0292, 0.0744, 0.4906, 0.4058]) -Greedy action tensor([-0.5313, -0.4767, 0.1918, 0.1141]) tensor([0.1660, 0.1753, 0.3421, 0.3165]) -Greedy action tensor([-1.5048, -0.4233, 0.5675, 0.4289]) tensor([0.0532, 0.1568, 0.4223, 0.3677]) -Greedy action tensor([-1.1782, -0.5238, -0.3389, -0.3288]) tensor([0.1320, 0.2539, 0.3055, 0.3086]) -Greedy action tensor([-2.0409, -0.9512, 0.9203, 0.4225]) tensor([0.0285, 0.0849, 0.5514, 0.3352]) -Greedy action tensor([-1.8278, -0.4503, 0.6159, -0.1368]) tensor([0.0457, 0.1810, 0.5257, 0.2477]) -Greedy action tensor([-1.0087, -0.5736, 0.2515, 0.4315]) tensor([0.0972, 0.1501, 0.3426, 0.4101]) -Greedy action tensor([-1.5396, -0.4583, 1.3874, 1.0475]) tensor([0.0278, 0.0821, 0.5199, 0.3701]) -Greedy action tensor([-1.6386, -0.4021, 0.9819, 0.8670]) tensor([0.0329, 0.1131, 0.4515, 0.4025]) -Greedy action tensor([-1.6415, -0.4602, 0.5473, 0.1291]) tensor([0.0525, 0.1710, 0.4683, 0.3082]) -Greedy action tensor([-1.5650, -0.5432, 0.4763, 0.0693]) tensor([0.0602, 0.1673, 0.4638, 0.3087]) -Greedy action tensor([-1.4704, -0.5146, 0.5480, 0.3346]) tensor([0.0581, 0.1512, 0.4374, 0.3533]) -Greedy action tensor([-1.6710, -0.6650, 0.2976, -0.1470]) tensor([0.0646, 0.1766, 0.4624, 0.2964]) -Greedy action tensor([-1.6330, -0.5100, 0.5097, 0.0300]) tensor([0.0560, 0.1720, 0.4769, 0.2952]) -Greedy action tensor([-1.9323, -0.8012, 0.6165, 0.1868]) tensor([0.0397, 0.1229, 0.5073, 0.3301]) -Greedy action tensor([ 0.2924, -0.0572, 0.0043, -0.2436]) tensor([0.3290, 0.2319, 0.2466, 0.1925]) -Greedy action tensor([ 0.3193, 0.2086, -0.1829, -0.1812]) tensor([0.3219, 0.2882, 0.1948, 0.1951]) -Greedy action tensor([ 0.8311, -0.4011, -0.0367, -0.3346]) tensor([0.4943, 0.1441, 0.2075, 0.1541]) -Greedy action tensor([ 0.4252, -0.1708, -0.0531, -0.2341]) tensor([0.3720, 0.2050, 0.2306, 0.1924]) -Greedy action tensor([ 0.6420, -0.5925, -0.0473, -0.3537]) tensor([0.4625, 0.1346, 0.2321, 0.1709]) -Greedy action tensor([ 0.3803, 0.0554, -0.0321, -0.1496]) tensor([0.3363, 0.2430, 0.2227, 0.1980]) -Greedy action tensor([ 0.5154, -0.1412, -0.0384, -0.3949]) tensor([0.4007, 0.2078, 0.2303, 0.1612]) -Greedy action tensor([ 0.4921, -0.2013, 0.0221, -0.2539]) tensor([0.3847, 0.1923, 0.2405, 0.1825]) -Greedy action tensor([ 0.5559, -0.1749, -0.0827, -0.1440]) tensor([0.3990, 0.1921, 0.2107, 0.1982]) -Greedy action tensor([ 0.2907, 0.2176, 0.2498, -0.2514]) tensor([0.2881, 0.2678, 0.2766, 0.1675]) -Greedy action tensor([ 0.5055, -0.2842, 0.1628, -0.6238]) tensor([0.4021, 0.1825, 0.2854, 0.1300]) -Greedy action tensor([ 0.2042, -0.0869, 0.0897, -0.3269]) tensor([0.3099, 0.2316, 0.2763, 0.1822]) -Greedy action tensor([ 0.0999, -0.1917, 0.0568, -0.2032]) tensor([0.2904, 0.2170, 0.2782, 0.2145]) -Greedy action tensor([ 0.1981, 0.0191, 0.0397, -0.2433]) tensor([0.3001, 0.2509, 0.2561, 0.1930]) -Greedy action tensor([ 0.5872, -0.3652, -0.0941, -0.4923]) tensor([0.4481, 0.1729, 0.2267, 0.1523]) -Greedy action tensor([ 0.3363, -0.1192, 0.0497, -0.2592]) tensor([0.3406, 0.2160, 0.2557, 0.1877]) -Greedy action tensor([ 0.4710, 0.1071, -0.0153, -0.1833]) tensor([0.3534, 0.2456, 0.2173, 0.1837]) -Greedy action tensor([ 0.6614, -0.3227, -0.0459, -0.2376]) tensor([0.4398, 0.1644, 0.2168, 0.1790]) -Greedy action tensor([ 0.4134, -0.0731, 0.0187, -0.4208]) tensor([0.3672, 0.2258, 0.2475, 0.1595]) -Greedy action tensor([ 0.5462, -0.0654, 0.0188, -0.2616]) tensor([0.3878, 0.2104, 0.2289, 0.1729]) -Greedy action tensor([ 0.3959, -0.1821, 0.0563, -0.3789]) tensor([0.3658, 0.2052, 0.2605, 0.1686]) -Greedy action tensor([ 0.4489, 0.0157, -0.0636, -0.1273]) tensor([0.3559, 0.2308, 0.2132, 0.2001]) -Greedy action tensor([ 0.5450, -0.2274, 0.0381, -0.5875]) tensor([0.4190, 0.1936, 0.2524, 0.1350]) -Greedy action tensor([ 0.8381, -0.7946, 0.0582, -0.7487]) tensor([0.5381, 0.1051, 0.2467, 0.1101]) -Greedy action tensor([ 0.3641, -0.2333, 0.0856, -0.2348]) tensor([0.3501, 0.1926, 0.2650, 0.1923]) -Greedy action tensor([ 0.4054, 0.1785, -0.0119, -0.0101]) tensor([0.3209, 0.2558, 0.2114, 0.2118]) -Greedy action tensor([ 0.4859, -0.2479, 0.0684, -0.4549]) tensor([0.3954, 0.1898, 0.2604, 0.1543]) -Greedy action tensor([ 0.9255, -0.6669, -0.1357, -0.6752]) tensor([0.5710, 0.1162, 0.1976, 0.1152]) -Greedy action tensor([ 0.3599, -0.1064, 0.0041, -0.2000]) tensor([0.3449, 0.2164, 0.2416, 0.1970]) -Greedy action tensor([ 0.5084, -0.1158, -0.0201, -0.2742]) tensor([0.3872, 0.2074, 0.2283, 0.1770]) -Greedy action tensor([ 1.0804, -0.7442, 0.0857, -0.7570]) tensor([0.5916, 0.0954, 0.2188, 0.0942]) -Greedy action tensor([ 0.4405, 0.1049, -0.0277, -0.1156]) tensor([0.3431, 0.2453, 0.2148, 0.1968]) -Greedy action tensor([ 0.2103, 0.0085, 0.0922, -0.1605]) tensor([0.2944, 0.2407, 0.2617, 0.2032]) -Greedy action tensor([ 0.4119, 0.0750, -0.0081, -0.3587]) tensor([0.3529, 0.2520, 0.2319, 0.1633]) -Greedy action tensor([ 0.6878, -0.3943, -0.1815, -0.4132]) tensor([0.4783, 0.1621, 0.2005, 0.1591]) -Greedy action tensor([ 0.5888, -0.2948, 0.0317, -0.3694]) tensor([0.4220, 0.1744, 0.2417, 0.1619]) -Greedy action tensor([ 0.9922, -0.3428, -0.0642, -0.4440]) tensor([0.5409, 0.1423, 0.1881, 0.1287]) -Greedy action tensor([ 1.2257e+00, -8.2609e-01, 9.5263e-04, -8.0475e-01]) tensor([0.6437, 0.0827, 0.1891, 0.0845]) -Greedy action tensor([ 0.3267, 0.0602, 0.0188, -0.2405]) tensor([0.3259, 0.2497, 0.2395, 0.1848]) -Greedy action tensor([ 0.4024, 0.0969, 0.0232, -0.1699]) tensor([0.3350, 0.2468, 0.2292, 0.1890]) -Greedy action tensor([ 0.7110, -0.2014, -0.0484, -0.4878]) tensor([0.4606, 0.1850, 0.2155, 0.1389]) -Greedy action tensor([ 0.4336, -0.1937, 0.1326, -0.6158]) tensor([0.3811, 0.2035, 0.2820, 0.1334]) -Greedy action tensor([ 0.5552, -0.0324, 0.0707, -0.4167]) tensor([0.3921, 0.2179, 0.2416, 0.1484]) -Greedy action tensor([ 0.4683, -0.2674, -0.0963, -0.4860]) tensor([0.4110, 0.1970, 0.2337, 0.1583]) -Greedy action tensor([ 0.6190, -0.1328, -0.0884, -0.1176]) tensor([0.4093, 0.1930, 0.2018, 0.1959]) -Greedy action tensor([ 0.5137, -0.1435, -0.0169, -0.3255]) tensor([0.3939, 0.2042, 0.2317, 0.1702]) -Greedy action tensor([ 0.5641, -0.1151, -0.0453, -0.3338]) tensor([0.4068, 0.2063, 0.2212, 0.1657]) -Greedy action tensor([ 0.2670, 0.1201, -0.1475, -0.0804]) tensor([0.3095, 0.2672, 0.2045, 0.2187]) -Greedy action tensor([ 0.4345, -0.0621, -0.1216, -0.1023]) tensor([0.3614, 0.2200, 0.2073, 0.2113]) -Greedy action tensor([ 0.6711, -0.1255, 0.1457, -0.5613]) tensor([0.4285, 0.1932, 0.2534, 0.1249]) -Greedy action tensor([ 0.5540, -0.0130, 0.1610, -0.3601]) tensor([0.3783, 0.2146, 0.2554, 0.1517]) -Greedy action tensor([ 0.2575, -0.0749, 0.2473, -0.1796]) tensor([0.2982, 0.2139, 0.2952, 0.1926]) -Greedy action tensor([ 0.4635, -0.2130, 0.1500, -0.2871]) tensor([0.3688, 0.1875, 0.2696, 0.1741]) -Greedy action tensor([ 0.5159, -0.2488, 0.1274, -0.5830]) tensor([0.4038, 0.1879, 0.2738, 0.1345]) -Greedy action tensor([ 0.3490, -0.0414, -0.0608, -0.4632]) tensor([0.3591, 0.2431, 0.2384, 0.1594]) -Greedy action tensor([0.4746, 0.0186, 0.0130, 0.0293]) tensor([0.3443, 0.2182, 0.2170, 0.2206]) -Greedy action tensor([ 1.0150, -0.5768, -0.1024, -0.5580]) tensor([0.5753, 0.1171, 0.1882, 0.1193]) -Greedy action tensor([ 0.5054, 0.0237, 0.0533, -0.3034]) tensor([0.3704, 0.2288, 0.2357, 0.1650]) -Greedy action tensor([ 0.3671, 0.3307, -0.0702, -0.3498]) tensor([0.3228, 0.3112, 0.2084, 0.1576]) -Greedy action tensor([ 0.3380, 0.1194, 0.0124, -0.2836]) tensor([0.3265, 0.2624, 0.2358, 0.1754]) -Greedy action tensor([ 0.3875, 0.1668, 0.1576, -0.1935]) tensor([0.3169, 0.2541, 0.2518, 0.1772]) -Greedy action tensor([ 1.0119, -0.5216, 0.0452, -0.6754]) tensor([0.5614, 0.1211, 0.2135, 0.1039]) -Greedy action tensor([ 0.8678, -0.6135, -0.0905, -0.5436]) tensor([0.5392, 0.1226, 0.2068, 0.1315]) -Greedy action tensor([ 0.8229, -0.3057, -0.2186, -0.6930]) tensor([0.5274, 0.1706, 0.1861, 0.1158]) -Greedy action tensor([ 0.8939, -0.4749, 0.0082, -0.5522]) tensor([0.5257, 0.1337, 0.2168, 0.1238]) -Greedy action tensor([ 0.8277, -0.3588, -0.0020, -0.4398]) tensor([0.4943, 0.1509, 0.2156, 0.1392]) -Greedy action tensor([ 0.6848, -0.1191, -0.0371, -0.3449]) tensor([0.4366, 0.1954, 0.2121, 0.1559]) -Greedy action tensor([ 0.5565, -0.2665, -0.1159, -0.3574]) tensor([0.4254, 0.1868, 0.2172, 0.1706]) -Greedy action tensor([ 0.3286, 0.0863, -0.0040, -0.0082]) tensor([0.3109, 0.2440, 0.2230, 0.2220]) -Greedy action tensor([ 0.5738, -0.2027, 0.0641, -0.4673]) tensor([0.4143, 0.1906, 0.2489, 0.1463]) -Greedy action tensor([ 0.2022, -0.1386, 0.0497, -0.2685]) tensor([0.3131, 0.2226, 0.2688, 0.1955]) -Greedy action tensor([ 0.2626, -0.1207, 0.0673, -0.3274]) tensor([0.3269, 0.2229, 0.2690, 0.1812]) -Greedy action tensor([ 0.2926, -0.0042, 0.0485, -0.2016]) tensor([0.3188, 0.2369, 0.2498, 0.1945]) -Greedy action tensor([ 0.7534, -0.4598, 0.1272, -0.3998]) tensor([0.4657, 0.1384, 0.2489, 0.1470]) -Greedy action tensor([ 0.2498, -0.0154, -0.0755, -0.2265]) tensor([0.3215, 0.2466, 0.2322, 0.1997]) -Greedy action tensor([ 0.4898, -0.3424, 0.1160, -0.3603]) tensor([0.3921, 0.1706, 0.2698, 0.1676]) -Greedy action tensor([ 0.5572, -0.1647, 0.1071, -0.3661]) tensor([0.3967, 0.1927, 0.2529, 0.1576]) -Greedy action tensor([ 0.5215, 0.1884, 0.0161, -0.1565]) tensor([0.3537, 0.2535, 0.2134, 0.1795]) -Greedy action tensor([ 0.5141, -0.1438, 0.0105, -0.3034]) tensor([0.3900, 0.2020, 0.2357, 0.1722]) -Greedy action tensor([ 0.5202, -0.2691, 0.0057, -0.2401]) tensor([0.3969, 0.1803, 0.2373, 0.1856]) -Greedy action tensor([ 0.7202, -0.1477, 0.1308, -0.3860]) tensor([0.4338, 0.1821, 0.2406, 0.1435]) -Greedy action tensor([-0.0815, -1.6775, 0.1622, -0.0518]) tensor([0.2850, 0.0578, 0.3636, 0.2936]) -Greedy action tensor([ 0.6671, 0.0454, -1.0583, 1.4723]) tensor([0.2530, 0.1359, 0.0451, 0.5661]) -Greedy action tensor([ 1.0840, -0.1004, 1.2883, 1.2434]) tensor([0.2699, 0.0826, 0.3310, 0.3165]) -Greedy action tensor([ 1.3224, -1.3529, 0.5954, 0.7908]) tensor([0.4673, 0.0322, 0.2259, 0.2746]) -Greedy action tensor([0.9208, 0.3469, 0.4206, 0.2833]) tensor([0.3706, 0.2088, 0.2247, 0.1959]) -Greedy action tensor([ 0.9993, -0.7831, 1.2766, -0.3668]) tensor([0.3646, 0.0613, 0.4811, 0.0930]) -Greedy action tensor([ 2.3841, -0.6302, 0.9799, 2.2348]) tensor([0.4638, 0.0228, 0.1139, 0.3995]) -Greedy action tensor([ 0.8273, -0.1441, -0.2229, 0.9890]) tensor([0.3444, 0.1304, 0.1205, 0.4048]) -Greedy action tensor([ 1.2887, -0.2774, -0.1181, -0.1672]) tensor([0.5928, 0.1238, 0.1452, 0.1382]) -Greedy action tensor([ 0.7734, 0.0990, -0.4377, 1.8904]) tensor([0.2056, 0.1048, 0.0613, 0.6284]) -Greedy action tensor([ 1.0091, -1.1011, 0.0452, 0.0592]) tensor([0.5293, 0.0642, 0.2019, 0.2047]) -Greedy action tensor([ 1.5440, -0.7386, -0.1642, 1.7781]) tensor([0.3926, 0.0401, 0.0711, 0.4962]) -Greedy action tensor([ 0.2886, 1.1225, -0.0622, -0.2854]) tensor([0.2188, 0.5038, 0.1541, 0.1233]) -Greedy action tensor([ 1.5382, -1.1117, -0.1401, 1.9636]) tensor([0.3587, 0.0253, 0.0670, 0.5490]) -Greedy action tensor([ 0.5511, 0.4652, -1.5136, 0.3246]) tensor([0.3519, 0.3229, 0.0446, 0.2806]) -Greedy action tensor([ 0.3043, 0.1948, 0.7525, -0.4479]) tensor([0.2543, 0.2279, 0.3980, 0.1198]) -Greedy action tensor([-0.2212, 0.5551, 0.1855, -0.7532]) tensor([0.1900, 0.4130, 0.2854, 0.1116]) -Greedy action tensor([ 1.2916, 0.1607, 1.2236, -0.0025]) tensor([0.3951, 0.1275, 0.3691, 0.1083]) -Greedy action tensor([ 1.1109, -0.6767, 0.7253, 0.1475]) tensor([0.4486, 0.0751, 0.3051, 0.1712]) -Greedy action tensor([ 0.5573, 0.8643, -1.0928, 0.8453]) tensor([0.2574, 0.3499, 0.0494, 0.3433]) -Greedy action tensor([ 1.0732, -0.8146, 0.0133, 0.6857]) tensor([0.4594, 0.0696, 0.1592, 0.3118]) -Greedy action tensor([ 1.8943, -0.7611, 1.2253, 1.0984]) tensor([0.4917, 0.0346, 0.2519, 0.2219]) -Greedy action tensor([ 1.7281, 0.1237, -0.8100, 1.4006]) tensor([0.4998, 0.1005, 0.0395, 0.3602]) -Greedy action tensor([ 0.9934, 0.4390, -0.4493, 0.6119]) tensor([0.4010, 0.2304, 0.0948, 0.2738]) -Greedy action tensor([ 0.4226, -0.2301, -0.3519, 0.4880]) tensor([0.3280, 0.1708, 0.1512, 0.3501]) -Greedy action tensor([ 0.2639, 0.0820, 0.5400, -0.0111]) tensor([0.2557, 0.2131, 0.3370, 0.1942]) -Greedy action tensor([0.1339, 0.4825, 0.8975, 0.4213]) tensor([0.1696, 0.2403, 0.3640, 0.2261]) -Greedy action tensor([ 0.4342, -0.7817, 0.1059, 2.3964]) tensor([0.1095, 0.0325, 0.0789, 0.7792]) -Greedy action tensor([1.1354, 0.5513, 0.1747, 0.7575]) tensor([0.3809, 0.2124, 0.1457, 0.2610]) -Greedy action tensor([ 0.2693, 0.5574, -0.4803, 1.0704]) tensor([0.1986, 0.2650, 0.0939, 0.4425]) -Greedy action tensor([0.6281, 0.0316, 0.6543, 0.3419]) tensor([0.3004, 0.1655, 0.3084, 0.2257]) -Greedy action tensor([ 0.4731, -0.2066, 0.1950, 0.5716]) tensor([0.2969, 0.1505, 0.2249, 0.3277]) -Greedy action tensor([ 1.6091, -0.3302, 1.1904, -0.5569]) tensor([0.5218, 0.0750, 0.3433, 0.0598]) -Greedy action tensor([ 1.2763, -0.6093, -0.1648, 0.5022]) tensor([0.5407, 0.0820, 0.1280, 0.2493]) -Greedy action tensor([ 1.7157, -1.3696, -0.1204, 1.1906]) tensor([0.5566, 0.0254, 0.0887, 0.3292]) -Greedy action tensor([ 1.2591, -0.6001, 0.6413, -0.2329]) tensor([0.5209, 0.0812, 0.2808, 0.1172]) -Greedy action tensor([0.2695, 0.3011, 0.1513, 0.3569]) tensor([0.2493, 0.2573, 0.2215, 0.2720]) -Greedy action tensor([ 1.6069, -1.6512, 1.1302, 1.2018]) tensor([0.4299, 0.0165, 0.2669, 0.2867]) -Greedy action tensor([ 1.7383, -0.4324, 0.8693, 1.3587]) tensor([0.4509, 0.0515, 0.1891, 0.3085]) -Greedy action tensor([ 0.0054, -0.6219, -0.6592, 1.2321]) tensor([0.1832, 0.0978, 0.0942, 0.6247]) -Greedy action tensor([ 0.7483, 0.0956, -1.5877, 0.5212]) tensor([0.4142, 0.2157, 0.0401, 0.3301]) -Greedy action tensor([ 1.8957, -0.4685, 1.0540, 1.4251]) tensor([0.4652, 0.0437, 0.2005, 0.2906]) -Greedy action tensor([ 1.2444, -0.1134, 1.3371, 0.9109]) tensor([0.3257, 0.0838, 0.3573, 0.2333]) -Greedy action tensor([ 1.0926, -0.2685, 1.3334, -0.1418]) tensor([0.3546, 0.0909, 0.4512, 0.1032]) -Greedy action tensor([ 0.6216, -1.8305, 0.4361, 0.6468]) tensor([0.3399, 0.0293, 0.2823, 0.3485]) -Greedy action tensor([ 0.5159, 0.0335, 0.5077, -0.0199]) tensor([0.3130, 0.1932, 0.3105, 0.1832]) -Greedy action tensor([ 0.9442, 0.3073, -0.3714, 0.5991]) tensor([0.3991, 0.2111, 0.1071, 0.2826]) -Greedy action tensor([ 0.6119, -0.7235, 0.3283, 0.9024]) tensor([0.2982, 0.0785, 0.2246, 0.3987]) -Greedy action tensor([ 1.1746, -0.0325, 1.3432, 0.5672]) tensor([0.3303, 0.0988, 0.3910, 0.1799]) -Greedy action tensor([ 0.6401, -0.5290, 0.5451, 0.2929]) tensor([0.3417, 0.1061, 0.3107, 0.2415]) -Greedy action tensor([ 0.1450, 1.2710, -0.3646, 0.7148]) tensor([0.1550, 0.4779, 0.0931, 0.2740]) -Greedy action tensor([ 0.3326, 0.6477, -0.5958, 0.5185]) tensor([0.2519, 0.3452, 0.0995, 0.3034]) -Greedy action tensor([ 0.9533, -0.0587, -0.1485, 0.5729]) tensor([0.4203, 0.1528, 0.1396, 0.2873]) -Greedy action tensor([ 0.1692, -0.2499, -0.5362, 0.5616]) tensor([0.2753, 0.1811, 0.1360, 0.4076]) -Greedy action tensor([ 0.7929, 0.7825, -0.0802, -0.0246]) tensor([0.3510, 0.3474, 0.1466, 0.1550]) -Greedy action tensor([ 0.6402, -0.6398, -0.9697, 2.1608]) tensor([0.1652, 0.0459, 0.0330, 0.7558]) -Greedy action tensor([ 1.6420, 0.7239, -0.3141, 0.9298]) tensor([0.4923, 0.1966, 0.0696, 0.2415]) -Greedy action tensor([1.0271, 0.7185, 0.8907, 0.0487]) tensor([0.3352, 0.2462, 0.2925, 0.1260]) -Greedy action tensor([ 0.8360, -1.1487, 0.6368, 0.8309]) tensor([0.3388, 0.0466, 0.2776, 0.3371]) -Greedy action tensor([ 0.6331, -1.0055, 0.7771, 0.7387]) tensor([0.2890, 0.0561, 0.3337, 0.3211]) -Greedy action tensor([ 0.6247, 0.8447, -0.4662, 0.7183]) tensor([0.2717, 0.3386, 0.0913, 0.2984]) -Greedy action tensor([ 0.9493, 0.9703, -0.5973, 0.8200]) tensor([0.3212, 0.3281, 0.0684, 0.2823]) -Greedy action tensor([ 0.3488, 0.0095, 1.0153, -0.1248]) tensor([0.2335, 0.1663, 0.4547, 0.1454]) -Greedy action tensor([-0.3438, -1.1053, 0.0132, 1.6800]) tensor([0.0956, 0.0446, 0.1366, 0.7232]) -Greedy action tensor([ 0.7962, 0.2917, -1.0777, 1.5200]) tensor([0.2618, 0.1581, 0.0402, 0.5399]) -Greedy action tensor([ 0.6910, -0.4815, -0.1252, 0.7398]) tensor([0.3569, 0.1105, 0.1578, 0.3748]) -Greedy action tensor([ 0.2674, -0.7290, 0.3692, 0.1703]) tensor([0.2955, 0.1091, 0.3272, 0.2682]) -Greedy action tensor([ 1.5928, -1.4430, 0.2056, 1.1335]) tensor([0.5183, 0.0249, 0.1294, 0.3274]) -Greedy action tensor([ 1.4525, -1.4330, 0.4940, 0.5342]) tensor([0.5439, 0.0304, 0.2086, 0.2171]) -Greedy action tensor([ 0.9247, -0.0428, 0.2484, 0.5044]) tensor([0.3929, 0.1493, 0.1998, 0.2581]) -Greedy action tensor([ 0.6804, -0.9731, 0.7257, 0.4513]) tensor([0.3297, 0.0631, 0.3450, 0.2622]) -Greedy action tensor([ 1.0381, -1.0917, 0.7285, 0.9772]) tensor([0.3580, 0.0425, 0.2627, 0.3368]) -Greedy action tensor([1.2503, 0.1225, 0.4248, 0.3503]) tensor([0.4612, 0.1493, 0.2020, 0.1875]) -Greedy action tensor([ 0.9883, -0.2707, 0.8296, 0.6856]) tensor([0.3477, 0.0987, 0.2967, 0.2569]) -Greedy action tensor([ 0.6801, -0.6415, 1.0904, 0.6711]) tensor([0.2656, 0.0708, 0.4003, 0.2632]) -Greedy action tensor([ 0.7398, 0.2024, -0.3923, 0.5282]) tensor([0.3682, 0.2151, 0.1187, 0.2980]) -Greedy action tensor([0.7657, 0.0773, 0.6751, 1.2586]) tensor([0.2467, 0.1240, 0.2254, 0.4039]) -Greedy action tensor([0.7579, 0.0936, 1.1096, 0.5593]) tensor([0.2662, 0.1370, 0.3785, 0.2183]) -Greedy action tensor([ 1.9049, -0.7675, 0.7061, 1.3650]) tensor([0.5119, 0.0354, 0.1544, 0.2984]) -Greedy action tensor([ 1.2128, -0.3320, 0.9579, 0.5254]) tensor([0.4014, 0.0856, 0.3111, 0.2019]) -Greedy action tensor([ 0.6000, -0.0379, 0.1619, 0.3258]) tensor([0.3408, 0.1801, 0.2199, 0.2591]) -Greedy action tensor([ 0.8449, -0.5061, 1.9045, 0.3061]) tensor([0.2115, 0.0548, 0.6103, 0.1234]) -Greedy action tensor([ 0.6470, -0.5181, -0.3552, 0.2015]) tensor([0.4311, 0.1345, 0.1583, 0.2761]) -Greedy action tensor([ 1.1929, -0.3642, -0.2393, 0.4706]) tensor([0.5167, 0.1089, 0.1234, 0.2510]) -Greedy action tensor([ 1.0547, -0.5634, -0.2944, -0.2334]) tensor([0.5769, 0.1144, 0.1497, 0.1591]) -Greedy action tensor([ 1.0710, -0.7860, -0.1233, -0.0396]) tensor([0.5592, 0.0873, 0.1694, 0.1842]) -Greedy action tensor([ 0.7313, -0.3265, -0.5921, 0.0861]) tensor([0.4677, 0.1624, 0.1245, 0.2454]) -Greedy action tensor([ 1.1862, -0.5998, -0.4135, -0.0375]) tensor([0.6011, 0.1008, 0.1214, 0.1768]) -Greedy action tensor([ 0.3911, -0.0929, -0.7927, 0.5107]) tensor([0.3279, 0.2021, 0.1004, 0.3696]) -Greedy action tensor([ 1.0578, -0.3144, -0.1224, -0.0041]) tensor([0.5245, 0.1330, 0.1611, 0.1814]) -Greedy action tensor([ 0.6827, -0.3319, -0.1047, 0.2217]) tensor([0.4085, 0.1481, 0.1859, 0.2576]) -Greedy action tensor([ 1.2151, -0.3733, -0.4144, 0.3014]) tensor([0.5552, 0.1134, 0.1088, 0.2226]) -Greedy action tensor([ 1.3860, -0.6073, -0.3336, 0.2913]) tensor([0.6061, 0.0826, 0.1086, 0.2028]) -Greedy action tensor([ 0.2881, -0.2716, -0.1634, 0.0397]) tensor([0.3347, 0.1912, 0.2131, 0.2611]) -Greedy action tensor([ 0.4981, -0.1147, -0.3286, 0.0948]) tensor([0.3777, 0.2047, 0.1653, 0.2524]) -Greedy action tensor([ 0.9069, -0.3774, -0.2500, 0.2896]) tensor([0.4693, 0.1299, 0.1476, 0.2532]) -Greedy action tensor([ 1.0075, -0.5588, -0.2580, 0.2230]) tensor([0.5136, 0.1072, 0.1449, 0.2343]) -Greedy action tensor([ 1.2229, -0.4539, -0.4110, -0.1554]) tensor([0.6119, 0.1144, 0.1194, 0.1542]) -Greedy action tensor([ 0.8029, -0.3711, -0.3391, 0.1567]) tensor([0.4646, 0.1436, 0.1483, 0.2435]) -Greedy action tensor([ 0.5157, -0.1366, -0.1488, -0.1321]) tensor([0.3908, 0.2036, 0.2011, 0.2045]) -Greedy action tensor([ 1.2250, -0.5853, -0.3455, 0.2433]) tensor([0.5727, 0.0937, 0.1191, 0.2146]) -Greedy action tensor([ 0.8060, -0.3800, -0.5097, 0.6513]) tensor([0.4114, 0.1257, 0.1104, 0.3525]) -Greedy action tensor([ 1.1572, -0.3632, -0.1879, 0.0127]) tensor([0.5563, 0.1216, 0.1449, 0.1771]) -Greedy action tensor([ 0.9700, -0.6585, -0.2266, 0.3255]) tensor([0.4942, 0.0970, 0.1494, 0.2594]) -Greedy action tensor([ 1.0156, -0.3015, -0.1552, 0.2151]) tensor([0.4933, 0.1322, 0.1530, 0.2215]) -Greedy action tensor([ 0.6523, -0.3388, -0.6749, 0.2438]) tensor([0.4346, 0.1613, 0.1153, 0.2888]) -Greedy action tensor([ 1.0455, -0.4521, 0.0713, 0.0445]) tensor([0.5080, 0.1136, 0.1918, 0.1867]) -Greedy action tensor([ 1.0647, -0.4671, -0.1756, -0.0908]) tensor([0.5493, 0.1187, 0.1589, 0.1730]) -Greedy action tensor([ 0.4482, -0.2784, -0.5288, -0.0984]) tensor([0.4100, 0.1983, 0.1543, 0.2374]) -Greedy action tensor([ 1.0473, -0.0632, -0.0717, -0.1044]) tensor([0.5071, 0.1670, 0.1656, 0.1603]) -Greedy action tensor([ 0.7480, -0.2944, -0.2392, -0.1126]) tensor([0.4655, 0.1641, 0.1735, 0.1969]) -Greedy action tensor([ 0.8690, -0.4512, -0.0675, 0.2145]) tensor([0.4590, 0.1226, 0.1799, 0.2385]) -Greedy action tensor([ 1.2310, -0.6679, -0.4673, 0.6514]) tensor([0.5283, 0.0791, 0.0967, 0.2959]) -Greedy action tensor([ 1.1630, -0.3643, -0.1397, -0.0017]) tensor([0.5553, 0.1206, 0.1509, 0.1733]) -Greedy action tensor([ 1.0019, -0.2803, 0.0615, 0.1734]) tensor([0.4752, 0.1318, 0.1855, 0.2075]) -Greedy action tensor([ 1.3571, -0.0724, -0.0842, -0.1621]) tensor([0.5900, 0.1413, 0.1396, 0.1291]) -Greedy action tensor([ 1.0860, -0.2547, -0.2230, 0.1398]) tensor([0.5208, 0.1363, 0.1407, 0.2022]) -Greedy action tensor([ 2.0713, -1.0537, -0.2475, 0.4886]) tensor([0.7420, 0.0326, 0.0730, 0.1524]) -Greedy action tensor([ 0.7100, -0.4913, -0.5580, 0.4271]) tensor([0.4281, 0.1288, 0.1205, 0.3226]) -Greedy action tensor([ 1.1290, -0.6390, -0.0159, -0.0344]) tensor([0.5551, 0.0947, 0.1767, 0.1734]) -Greedy action tensor([ 1.2276, -0.4957, -0.6165, -0.1342]) tensor([0.6278, 0.1121, 0.0993, 0.1608]) -Greedy action tensor([ 0.8956, -0.3300, -0.0901, -0.0279]) tensor([0.4845, 0.1422, 0.1808, 0.1924]) -Greedy action tensor([ 1.0909, -0.6580, -0.3315, 0.2279]) tensor([0.5444, 0.0947, 0.1313, 0.2297]) -Greedy action tensor([ 0.9337, -0.4198, -0.1207, 0.0135]) tensor([0.4987, 0.1288, 0.1737, 0.1987]) -Greedy action tensor([ 0.8583, -0.3367, -0.0541, -0.0747]) tensor([0.4767, 0.1443, 0.1914, 0.1875]) -Greedy action tensor([ 0.6788, -0.0086, -0.1020, 0.0350]) tensor([0.4022, 0.2023, 0.1842, 0.2113]) -Greedy action tensor([ 0.9513, -0.8074, -0.1477, 0.3829]) tensor([0.4826, 0.0831, 0.1608, 0.2734]) -Greedy action tensor([ 0.6871, -0.1158, 0.2357, -0.4082]) tensor([0.4134, 0.1852, 0.2632, 0.1382]) -Greedy action tensor([ 1.0786, -0.7270, -0.3556, 0.0868]) tensor([0.5638, 0.0927, 0.1344, 0.2091]) -Greedy action tensor([ 0.5943, -0.2815, 0.0547, -0.2258]) tensor([0.4099, 0.1707, 0.2389, 0.1805]) -Greedy action tensor([ 0.6539, -0.2527, -0.3871, 0.1234]) tensor([0.4264, 0.1722, 0.1506, 0.2509]) -Greedy action tensor([ 1.1978, -0.2148, -0.2775, 0.1378]) tensor([0.5499, 0.1339, 0.1258, 0.1905]) -Greedy action tensor([ 0.7830, -0.5972, -0.1299, 0.4038]) tensor([0.4278, 0.1076, 0.1717, 0.2928]) -Greedy action tensor([ 0.7080, -0.3074, -0.2063, -0.2330]) tensor([0.4644, 0.1682, 0.1861, 0.1812]) -Greedy action tensor([ 0.8557, -0.3883, -0.5495, 0.2374]) tensor([0.4825, 0.1391, 0.1184, 0.2600]) -Greedy action tensor([ 0.9439, -0.3556, -0.5206, -0.1485]) tensor([0.5437, 0.1483, 0.1257, 0.1824]) -Greedy action tensor([ 1.1213, -0.4333, -0.2505, 0.3259]) tensor([0.5218, 0.1102, 0.1324, 0.2356]) -Greedy action tensor([ 1.6838, -0.7162, -0.3653, 0.1455]) tensor([0.6972, 0.0632, 0.0898, 0.1497]) -Greedy action tensor([ 0.2347, -0.0855, -0.2836, -0.2066]) tensor([0.3373, 0.2449, 0.2009, 0.2169]) -Greedy action tensor([ 1.1672, -0.5093, -0.2175, -0.2052]) tensor([0.5914, 0.1106, 0.1481, 0.1499]) -Greedy action tensor([ 1.5800, -0.4003, -0.3220, -0.0646]) tensor([0.6755, 0.0932, 0.1008, 0.1304]) -Greedy action tensor([ 1.1039, -0.5843, -0.4677, 0.2750]) tensor([0.5467, 0.1011, 0.1136, 0.2386]) -Greedy action tensor([ 0.5905, 0.0448, -0.0225, -0.3398]) tensor([0.3975, 0.2303, 0.2153, 0.1568]) -Greedy action tensor([ 0.7078, -0.2995, -0.1307, 0.0666]) tensor([0.4303, 0.1571, 0.1860, 0.2266]) -Greedy action tensor([ 1.0701, -0.7534, -0.3048, 0.0368]) tensor([0.5649, 0.0912, 0.1428, 0.2010]) -Greedy action tensor([ 1.0523, -0.4052, 0.0188, -0.2230]) tensor([0.5354, 0.1246, 0.1905, 0.1495]) -Greedy action tensor([ 0.5894, -0.2807, -0.0934, 0.0795]) tensor([0.3961, 0.1659, 0.2001, 0.2379]) -Greedy action tensor([ 0.6454, -0.3612, 0.0844, -0.0540]) tensor([0.4110, 0.1502, 0.2345, 0.2042]) -Greedy action tensor([ 0.9672, -0.2599, -0.2663, 0.1795]) tensor([0.4904, 0.1438, 0.1428, 0.2231]) -Greedy action tensor([ 1.0141, -0.4722, -0.7571, -0.2567]) tensor([0.5963, 0.1349, 0.1015, 0.1673]) -Greedy action tensor([ 1.5836, -0.4263, -0.4256, 0.2296]) tensor([0.6552, 0.0878, 0.0879, 0.1692]) -Greedy action tensor([ 1.0020, -0.4994, 0.0261, 0.0953]) tensor([0.4991, 0.1112, 0.1881, 0.2016]) -Greedy action tensor([ 0.9790, -0.3961, -0.1515, -0.1046]) tensor([0.5224, 0.1321, 0.1687, 0.1768]) -Greedy action tensor([ 1.1626, -0.6447, -0.3155, -0.0270]) tensor([0.5895, 0.0967, 0.1344, 0.1794]) -Greedy action tensor([ 1.1466, -0.6703, -0.2755, 0.9056]) tensor([0.4567, 0.0742, 0.1102, 0.3589]) -Greedy action tensor([ 0.7874, -0.2332, -0.1099, -0.2709]) tensor([0.4728, 0.1704, 0.1927, 0.1641]) -Greedy action tensor([ 0.2853, -0.2800, -0.4311, 0.0684]) tensor([0.3494, 0.1986, 0.1707, 0.2813]) -Greedy action tensor([ 0.8474, -0.2847, -0.5905, -0.3433]) tensor([0.5365, 0.1730, 0.1274, 0.1631]) -Greedy action tensor([ 0.7534, -0.2822, -0.2769, 0.1229]) tensor([0.4456, 0.1582, 0.1590, 0.2372]) -Greedy action tensor([ 0.7988, -0.2486, -0.6193, -0.3808]) tensor([0.5262, 0.1846, 0.1274, 0.1618]) -Greedy action tensor([ 1.3962, -0.3701, -0.1687, -0.1598]) tensor([0.6285, 0.1074, 0.1314, 0.1326]) -Greedy action tensor([ 1.1648, -0.4045, -0.4785, 0.7783]) tensor([0.4806, 0.1000, 0.0929, 0.3265]) -Greedy action tensor([ 0.9622, -0.4990, -0.2430, 0.1787]) tensor([0.5029, 0.1167, 0.1507, 0.2297]) -Greedy action tensor([-0.6685, -0.5560, 0.2718, 0.3954]) tensor([0.1320, 0.1477, 0.3379, 0.3824]) -Greedy action tensor([-1.3079, -0.9432, 0.5503, -0.2946]) tensor([0.0862, 0.1241, 0.5524, 0.2373]) -Greedy action tensor([-1.6795, -0.4605, 1.3346, 0.9838]) tensor([0.0256, 0.0865, 0.5210, 0.3668]) -Greedy action tensor([-0.6603, -0.5479, 0.2124, 0.2480]) tensor([0.1430, 0.1600, 0.3423, 0.3547]) -Greedy action tensor([-1.6347, -0.5381, 0.5957, 0.0403]) tensor([0.0537, 0.1606, 0.4992, 0.2865]) -Greedy action tensor([-1.4333, -0.4490, 0.3794, 0.2070]) tensor([0.0668, 0.1789, 0.4096, 0.3447]) -Greedy action tensor([-1.4142, -0.4022, 1.2419, 1.1296]) tensor([0.0326, 0.0895, 0.4636, 0.4143]) -Greedy action tensor([-1.2333, -0.4950, 0.7676, 1.1031]) tensor([0.0480, 0.1004, 0.3550, 0.4965]) -Greedy action tensor([-0.9189, -0.2895, 0.5191, 1.0368]) tensor([0.0706, 0.1325, 0.2975, 0.4993]) -Greedy action tensor([-1.2576, -0.5780, 0.3191, 0.2102]) tensor([0.0823, 0.1624, 0.3982, 0.3571]) -Greedy action tensor([-1.6936, -0.5150, 0.5282, -0.0127]) tensor([0.0531, 0.1725, 0.4895, 0.2850]) -Greedy action tensor([-1.1897, -0.5946, 0.3254, 0.1143]) tensor([0.0905, 0.1641, 0.4119, 0.3335]) -Greedy action tensor([-1.9033, -0.7386, 0.3420, -0.1799]) tensor([0.0519, 0.1665, 0.4905, 0.2911]) -Greedy action tensor([-1.4196, -0.4994, 0.4139, 0.0455]) tensor([0.0710, 0.1781, 0.4439, 0.3071]) -Greedy action tensor([-2.0136, -0.5279, 0.7026, 0.1496]) tensor([0.0342, 0.1511, 0.5172, 0.2975]) -Greedy action tensor([-1.4736, -0.5316, 0.4289, 0.1664]) tensor([0.0648, 0.1663, 0.4346, 0.3343]) -Greedy action tensor([-1.7777, -0.9027, 0.4502, -0.2988]) tensor([0.0586, 0.1406, 0.5438, 0.2571]) -Greedy action tensor([-1.1569, -0.2772, 0.5957, 0.9861]) tensor([0.0565, 0.1361, 0.3259, 0.4815]) -Greedy action tensor([-1.1290, -0.6969, 0.4341, 0.9548]) tensor([0.0651, 0.1004, 0.3110, 0.5235]) -Greedy action tensor([-1.5310, -0.3986, 0.7520, 0.7474]) tensor([0.0422, 0.1311, 0.4143, 0.4124]) -Greedy action tensor([-1.5109, -0.4820, 0.4690, 0.1368]) tensor([0.0616, 0.1723, 0.4461, 0.3200]) -Greedy action tensor([-1.2841, -0.6098, 0.4705, 0.1298]) tensor([0.0778, 0.1527, 0.4497, 0.3199]) -Greedy action tensor([-1.2821, -0.3302, 0.6096, 0.8847]) tensor([0.0528, 0.1367, 0.3499, 0.4607]) -Greedy action tensor([-1.6252, -1.0487, -0.1101, -0.9261]) tensor([0.1070, 0.1905, 0.4871, 0.2154]) -Greedy action tensor([-1.1500, -0.5180, 0.3370, 0.6171]) tensor([0.0760, 0.1430, 0.3362, 0.4448]) -Greedy action tensor([-1.5794, -0.3187, 0.2836, -0.2187]) tensor([0.0672, 0.2373, 0.4333, 0.2622]) -Greedy action tensor([-1.5772, -0.6161, 0.5344, 0.2006]) tensor([0.0562, 0.1469, 0.4643, 0.3325]) -Greedy action tensor([-1.4470, -0.5269, 0.4748, 0.4143]) tensor([0.0596, 0.1496, 0.4073, 0.3834]) -Greedy action tensor([-1.4384, -0.5219, 0.4112, 0.1945]) tensor([0.0668, 0.1670, 0.4245, 0.3418]) -Greedy action tensor([-1.5622, -0.3076, 0.8697, 0.8299]) tensor([0.0373, 0.1307, 0.4243, 0.4077]) -Greedy action tensor([-1.8447, -0.4661, 0.6225, -0.1143]) tensor([0.0446, 0.1772, 0.5263, 0.2519]) -Greedy action tensor([-1.4553, -0.2071, -0.0204, -0.3747]) tensor([0.0860, 0.2996, 0.3611, 0.2534]) -Greedy action tensor([-1.8284, -0.3885, 0.9157, 0.5482]) tensor([0.0317, 0.1338, 0.4931, 0.3414]) -Greedy action tensor([-1.9693, -0.8383, 0.2527, -0.1984]) tensor([0.0521, 0.1614, 0.4805, 0.3060]) -Greedy action tensor([-1.9273, -0.8665, 0.1239, -0.3150]) tensor([0.0599, 0.1732, 0.4663, 0.3006]) -Greedy action tensor([-1.6053, -0.5456, 0.5579, 0.1685]) tensor([0.0541, 0.1562, 0.4708, 0.3189]) -Greedy action tensor([-0.8214, -0.3429, 0.4927, 1.2239]) tensor([0.0711, 0.1147, 0.2646, 0.5496]) -Greedy action tensor([-1.8254, -0.5310, 0.7355, 0.0246]) tensor([0.0417, 0.1523, 0.5405, 0.2655]) -Greedy action tensor([-1.8497, -0.4339, 0.6091, -0.1243]) tensor([0.0446, 0.1837, 0.5213, 0.2504]) -Greedy action tensor([-1.3867, 0.1680, 0.2488, 0.1735]) tensor([0.0640, 0.3029, 0.3285, 0.3046]) -Greedy action tensor([-1.2763, -0.5065, 0.3629, 0.5175]) tensor([0.0698, 0.1508, 0.3596, 0.4198]) -Greedy action tensor([-1.5047, -0.5364, 0.4713, 0.0895]) tensor([0.0634, 0.1670, 0.4574, 0.3122]) -Greedy action tensor([-1.8067, -0.5350, 1.4229, 0.9287]) tensor([0.0221, 0.0788, 0.5584, 0.3407]) -Greedy action tensor([-1.9978, -0.9213, 0.6669, -0.0706]) tensor([0.0397, 0.1166, 0.5707, 0.2730]) -Greedy action tensor([-1.5504, -0.5462, 0.4641, 0.0217]) tensor([0.0623, 0.1701, 0.4673, 0.3002]) -Greedy action tensor([-0.9080, 0.3424, -0.1110, 0.3214]) tensor([0.0987, 0.3447, 0.2191, 0.3375]) -Greedy action tensor([-1.2996, -0.6397, 0.4630, 0.3104]) tensor([0.0726, 0.1405, 0.4234, 0.3634]) -Greedy action tensor([-0.9196, -0.5892, 0.2686, 0.3703]) tensor([0.1075, 0.1496, 0.3526, 0.3904]) -Greedy action tensor([-1.0738, -0.5958, 0.2454, 0.2998]) tensor([0.0971, 0.1565, 0.3631, 0.3833]) -Greedy action tensor([-1.6308, -0.5642, 0.5169, -0.0373]) tensor([0.0575, 0.1671, 0.4925, 0.2830]) -Greedy action tensor([-0.6861, -0.5400, 0.1895, 0.3323]) tensor([0.1365, 0.1580, 0.3276, 0.3779]) -Greedy action tensor([-0.5360, -0.5247, 0.2151, 0.2983]) tensor([0.1554, 0.1572, 0.3294, 0.3580]) -Greedy action tensor([-1.3300, -0.5697, 0.3565, 0.1556]) tensor([0.0772, 0.1651, 0.4168, 0.3409]) -Greedy action tensor([-0.1459, 1.1563, -0.1399, 0.2217]) tensor([0.1403, 0.5159, 0.1411, 0.2026]) -Greedy action tensor([-1.2488, -0.1930, 0.4486, 0.2719]) tensor([0.0719, 0.2066, 0.3925, 0.3289]) -Greedy action tensor([-1.9193, -0.7071, 0.1918, -0.2844]) tensor([0.0563, 0.1894, 0.4653, 0.2890]) -Greedy action tensor([-1.4618, -0.5453, 0.4095, 0.1483]) tensor([0.0667, 0.1667, 0.4331, 0.3335]) -Greedy action tensor([-1.1094, -0.5516, 0.5588, -0.3432]) tensor([0.0980, 0.1712, 0.5198, 0.2109]) -Greedy action tensor([-1.8478, -0.4646, 0.6191, -0.1256]) tensor([0.0447, 0.1783, 0.5268, 0.2502]) -Greedy action tensor([-1.9462, -0.4631, 1.0819, 0.4842]) tensor([0.0267, 0.1177, 0.5519, 0.3036]) -Greedy action tensor([-1.5595, -0.4176, 0.5182, 0.2570]) tensor([0.0547, 0.1715, 0.4371, 0.3367]) -Greedy action tensor([-1.8779, -0.5956, 0.3237, -0.2620]) tensor([0.0535, 0.1930, 0.4840, 0.2694]) -Greedy action tensor([-1.8319, -0.4701, 1.2670, 0.8430]) tensor([0.0240, 0.0939, 0.5332, 0.3489]) -Greedy action tensor([-1.4570, -0.5206, 0.8008, 0.8773]) tensor([0.0427, 0.1088, 0.4080, 0.4404]) -Greedy action tensor([-1.7551, -0.4705, 0.7533, 0.2864]) tensor([0.0406, 0.1469, 0.4994, 0.3131]) -Greedy action tensor([-1.9411, -0.5522, 0.7644, 0.0929]) tensor([0.0362, 0.1452, 0.5418, 0.2768]) -Greedy action tensor([-1.5381, -0.5183, 0.4500, 0.0529]) tensor([0.0626, 0.1735, 0.4569, 0.3071]) -Greedy action tensor([-1.3078, 0.2851, 0.2631, 0.2431]) tensor([0.0647, 0.3184, 0.3115, 0.3053]) -Greedy action tensor([-2.0442, -0.7998, 0.7721, 0.1060]) tensor([0.0336, 0.1166, 0.5614, 0.2884]) -Greedy action tensor([-1.1641, -0.2827, 1.0468, 1.2234]) tensor([0.0427, 0.1031, 0.3895, 0.4648]) -Greedy action tensor([-1.8796, -0.9711, 0.1388, -0.4726]) tensor([0.0663, 0.1644, 0.4987, 0.2706]) -Greedy action tensor([-0.2601, 0.2763, 0.0634, -0.0480]) tensor([0.1877, 0.3209, 0.2594, 0.2320]) -Greedy action tensor([-1.8445, -0.4832, 0.6280, -0.0988]) tensor([0.0445, 0.1735, 0.5272, 0.2548]) -Greedy action tensor([-2.0085, -0.6066, 0.8113, 0.1422]) tensor([0.0329, 0.1335, 0.5513, 0.2823]) -Greedy action tensor([-1.1717, -0.6372, 0.3141, 0.2475]) tensor([0.0888, 0.1516, 0.3924, 0.3672]) -Greedy action tensor([-1.4622, -0.4782, 0.4838, 0.3655]) tensor([0.0592, 0.1583, 0.4144, 0.3681]) -Greedy action tensor([-1.9181, -0.6610, 1.0839, 0.4022]) tensor([0.0287, 0.1010, 0.5780, 0.2923]) -Greedy action tensor([-1.8809, -0.4570, 0.6727, -0.0548]) tensor([0.0413, 0.1715, 0.5308, 0.2564]) -Greedy action tensor([-1.3212, -0.5232, 0.6197, 0.8379]) tensor([0.0531, 0.1178, 0.3695, 0.4596]) -Greedy action tensor([-1.8273, -0.6395, 0.3203, -0.2270]) tensor([0.0562, 0.1843, 0.4812, 0.2784]) -Greedy action tensor([-1.8837, -0.5326, 0.7828, 0.0864]) tensor([0.0378, 0.1462, 0.5446, 0.2714]) -Greedy action tensor([ 0.0304, -0.0264, 0.1451, -0.2283]) tensor([0.2605, 0.2461, 0.2922, 0.2011]) -Greedy action tensor([ 0.5752, -0.3004, 0.0519, -0.2311]) tensor([0.4072, 0.1697, 0.2413, 0.1818]) -Greedy action tensor([ 0.4559, 0.1669, 0.1603, -0.2235]) tensor([0.3333, 0.2497, 0.2480, 0.1690]) -Greedy action tensor([ 0.4037, 0.0292, -0.0644, -0.1657]) tensor([0.3473, 0.2388, 0.2175, 0.1965]) -Greedy action tensor([ 0.4444, -0.0075, 0.0358, -0.1127]) tensor([0.3480, 0.2215, 0.2313, 0.1993]) -Greedy action tensor([ 0.6414, 0.1132, -0.0688, -0.1946]) tensor([0.3977, 0.2345, 0.1955, 0.1724]) -Greedy action tensor([ 0.3650, 0.1725, -0.1026, -0.0950]) tensor([0.3244, 0.2676, 0.2032, 0.2048]) -Greedy action tensor([ 0.2622, 0.1023, 0.1168, -0.1113]) tensor([0.2937, 0.2503, 0.2539, 0.2021]) -Greedy action tensor([ 0.5719, 0.1597, -0.0151, -0.1206]) tensor([0.3679, 0.2436, 0.2045, 0.1841]) -Greedy action tensor([ 0.7584, -0.5433, 0.0497, -0.5314]) tensor([0.4903, 0.1334, 0.2414, 0.1350]) -Greedy action tensor([ 0.5954, -0.0395, 0.1795, -0.3952]) tensor([0.3905, 0.2069, 0.2576, 0.1450]) -Greedy action tensor([ 0.5352, -0.0180, 0.0613, -0.4184]) tensor([0.3871, 0.2226, 0.2410, 0.1492]) -Greedy action tensor([ 0.8350, -0.6296, -0.3397, -0.8704]) tensor([0.5808, 0.1343, 0.1794, 0.1055]) -Greedy action tensor([ 0.3906, -0.0837, 0.0284, -0.4621]) tensor([0.3643, 0.2267, 0.2536, 0.1553]) -Greedy action tensor([ 0.6709, -0.1280, 0.0523, -0.3942]) tensor([0.4286, 0.1928, 0.2309, 0.1477]) -Greedy action tensor([ 0.8932, -0.5474, -0.1326, -0.7482]) tensor([0.5590, 0.1324, 0.2004, 0.1083]) -Greedy action tensor([ 0.5846, -0.3393, -0.0925, -0.4619]) tensor([0.4432, 0.1759, 0.2252, 0.1556]) -Greedy action tensor([ 1.0556, -0.9935, -0.0140, -0.3256]) tensor([0.5803, 0.0748, 0.1991, 0.1458]) -Greedy action tensor([ 0.8033, -0.5591, -0.0526, -0.7628]) tensor([0.5292, 0.1355, 0.2248, 0.1105]) -Greedy action tensor([ 0.2850, -0.0301, -0.0683, -0.2289]) tensor([0.3300, 0.2408, 0.2318, 0.1974]) -Greedy action tensor([ 0.3910, -0.0159, -0.0416, -0.1692]) tensor([0.3466, 0.2307, 0.2248, 0.1979]) -Greedy action tensor([ 0.4384, -0.1794, 0.0385, -0.4743]) tensor([0.3830, 0.2065, 0.2568, 0.1538]) -Greedy action tensor([ 0.3705, -0.0868, 0.0840, -0.3166]) tensor([0.3464, 0.2193, 0.2601, 0.1742]) -Greedy action tensor([ 0.4599, -0.0713, -0.0588, -0.1609]) tensor([0.3676, 0.2161, 0.2188, 0.1976]) -Greedy action tensor([ 0.9696, -0.6394, -0.0689, -0.5080]) tensor([0.5611, 0.1123, 0.1986, 0.1280]) -Greedy action tensor([ 0.6288, -0.0018, 0.0954, -0.2876]) tensor([0.3970, 0.2113, 0.2329, 0.1588]) -Greedy action tensor([ 0.7774, -0.4434, 0.1036, -0.4000]) tensor([0.4733, 0.1396, 0.2413, 0.1458]) -Greedy action tensor([ 0.5207, -0.0895, -0.0234, -0.1721]) tensor([0.3811, 0.2070, 0.2212, 0.1906]) -Greedy action tensor([ 0.7483, -0.3030, -0.0214, -0.3710]) tensor([0.4675, 0.1634, 0.2165, 0.1526]) -Greedy action tensor([ 0.6451, -0.3499, 0.2214, -0.4133]) tensor([0.4217, 0.1559, 0.2761, 0.1463]) -Greedy action tensor([ 0.3690, -0.1378, -0.0193, -0.1527]) tensor([0.3479, 0.2096, 0.2360, 0.2065]) -Greedy action tensor([ 1.2895, -1.1381, 0.0158, -0.6713]) tensor([0.6628, 0.0585, 0.1854, 0.0933]) -Greedy action tensor([ 0.0292, -0.1271, -0.0735, -0.3077]) tensor([0.2880, 0.2464, 0.2599, 0.2057]) -Greedy action tensor([ 0.8983, -0.3673, -0.0037, -0.3532]) tensor([0.5066, 0.1429, 0.2056, 0.1449]) -Greedy action tensor([ 0.5306, -0.1451, -0.0453, -0.3518]) tensor([0.4024, 0.2048, 0.2263, 0.1665]) -Greedy action tensor([ 0.5905, -0.1399, -0.0471, -0.3491]) tensor([0.4165, 0.2006, 0.2201, 0.1628]) -Greedy action tensor([ 0.2030, 0.0208, 0.0726, -0.1110]) tensor([0.2906, 0.2422, 0.2550, 0.2123]) -Greedy action tensor([ 0.3351, -0.0481, 0.0107, -0.1401]) tensor([0.3304, 0.2252, 0.2389, 0.2054]) -Greedy action tensor([ 0.5398, 0.0553, 0.0259, -0.2313]) tensor([0.3736, 0.2301, 0.2235, 0.1728]) -Greedy action tensor([ 1.1297, -0.7190, -0.0515, -0.6259]) tensor([0.6108, 0.0962, 0.1875, 0.1055]) -Greedy action tensor([ 0.6502, -0.4707, -0.0012, -0.6373]) tensor([0.4710, 0.1535, 0.2455, 0.1300]) -Greedy action tensor([ 0.4262, 0.0055, 0.1241, -0.1880]) tensor([0.3405, 0.2236, 0.2517, 0.1842]) -Greedy action tensor([ 0.4244, 0.0848, -0.2052, -0.1624]) tensor([0.3570, 0.2542, 0.1902, 0.1985]) -Greedy action tensor([ 0.4770, -0.2550, -0.1157, -0.2886]) tensor([0.4002, 0.1925, 0.2212, 0.1861]) -Greedy action tensor([ 0.7472, -0.6336, -0.1088, -0.5706]) tensor([0.5144, 0.1293, 0.2186, 0.1377]) -Greedy action tensor([ 0.2777, 0.0537, -0.0050, -0.1869]) tensor([0.3143, 0.2512, 0.2369, 0.1975]) -Greedy action tensor([ 0.5555, -0.4565, -0.1239, -0.4627]) tensor([0.4481, 0.1629, 0.2271, 0.1619]) -Greedy action tensor([ 0.4772, -0.4257, 0.2371, -0.8311]) tensor([0.4061, 0.1646, 0.3194, 0.1098]) -Greedy action tensor([ 0.4692, 0.1746, -0.1022, -0.2526]) tensor([0.3577, 0.2664, 0.2020, 0.1738]) -Greedy action tensor([ 0.6869, -0.3292, -0.0702, -0.4291]) tensor([0.4633, 0.1677, 0.2173, 0.1518]) -Greedy action tensor([ 0.7170, -0.4215, 0.0092, -0.5158]) tensor([0.4752, 0.1522, 0.2341, 0.1385]) -Greedy action tensor([0.3825, 0.2303, 0.0822, 0.0212]) tensor([0.3034, 0.2606, 0.2247, 0.2114]) -Greedy action tensor([ 0.5151, -0.2860, 0.0672, -0.3976]) tensor([0.4017, 0.1803, 0.2567, 0.1613]) -Greedy action tensor([ 0.5722, -0.3741, 0.0865, -0.5084]) tensor([0.4268, 0.1657, 0.2626, 0.1449]) -Greedy action tensor([ 0.9232, -0.2991, 0.1097, -0.7995]) tensor([0.5218, 0.1537, 0.2313, 0.0932]) -Greedy action tensor([ 0.4202, 0.2057, 0.1453, -0.0635]) tensor([0.3142, 0.2535, 0.2387, 0.1937]) -Greedy action tensor([ 0.5257, -0.1526, -0.0497, -0.2556]) tensor([0.3956, 0.2008, 0.2225, 0.1811]) -Greedy action tensor([ 0.9211, -0.3512, 0.0835, -0.7043]) tensor([0.5236, 0.1467, 0.2266, 0.1031]) -Greedy action tensor([ 0.3096, -0.1526, 0.0889, -0.4991]) tensor([0.3476, 0.2189, 0.2787, 0.1548]) -Greedy action tensor([ 0.1872, -0.0633, 0.2036, -0.2113]) tensor([0.2885, 0.2246, 0.2933, 0.1937]) -Greedy action tensor([ 0.8161, -0.4591, -0.2525, -0.6460]) tensor([0.5392, 0.1506, 0.1852, 0.1250]) -Greedy action tensor([ 1.1971, -1.0208, -0.0331, -0.4858]) tensor([0.6302, 0.0686, 0.1841, 0.1171]) -Greedy action tensor([ 0.5710, -0.0381, 0.1086, -0.2069]) tensor([0.3798, 0.2066, 0.2392, 0.1745]) -Greedy action tensor([ 0.3366, -0.1197, 0.2168, -0.4092]) tensor([0.3339, 0.2116, 0.2962, 0.1584]) -Greedy action tensor([ 0.4790, -0.2791, 0.1300, -0.4566]) tensor([0.3897, 0.1826, 0.2749, 0.1529]) -Greedy action tensor([ 0.8365, -0.3888, -0.0254, -0.3801]) tensor([0.4969, 0.1459, 0.2099, 0.1472]) -Greedy action tensor([ 1.0468, -0.5318, 0.0037, -0.5513]) tensor([0.5679, 0.1171, 0.2001, 0.1149]) -Greedy action tensor([ 0.4419, -0.3859, 0.1154, -0.3200]) tensor([0.3809, 0.1665, 0.2748, 0.1778]) -Greedy action tensor([ 0.9641, -0.9101, -0.1959, -0.6753]) tensor([0.6020, 0.0924, 0.1887, 0.1169]) -Greedy action tensor([ 0.3216, 0.0715, 0.0343, -0.2386]) tensor([0.3226, 0.2512, 0.2420, 0.1842]) -Greedy action tensor([ 1.1913, -0.7637, 0.0810, -0.6442]) tensor([0.6133, 0.0868, 0.2020, 0.0978]) -Greedy action tensor([ 0.5148, -0.1481, -0.0100, -0.3639]) tensor([0.3965, 0.2043, 0.2346, 0.1647]) -Greedy action tensor([ 0.3138, 0.1252, 0.0057, -0.0140]) tensor([0.3046, 0.2522, 0.2238, 0.2194]) -Greedy action tensor([ 0.6392, -0.0130, 0.0850, -0.2845]) tensor([0.4012, 0.2090, 0.2305, 0.1593]) -Greedy action tensor([ 0.5192, -0.3286, 0.1101, -0.5585]) tensor([0.4110, 0.1761, 0.2730, 0.1399]) -Greedy action tensor([ 0.2966, 0.3061, 0.1039, -0.2073]) tensor([0.2908, 0.2936, 0.2399, 0.1757]) -Greedy action tensor([ 1.1316, -0.4054, 0.0081, -0.3700]) tensor([0.5672, 0.1220, 0.1844, 0.1264]) -Greedy action tensor([ 0.6278, -0.1033, -0.0568, -0.1648]) tensor([0.4101, 0.1974, 0.2068, 0.1856]) -Greedy action tensor([ 0.8832, -0.3198, 0.0293, -0.6668]) tensor([0.5159, 0.1549, 0.2197, 0.1095]) -Greedy action tensor([ 0.9270, -0.7669, -0.1190, -0.3500]) tensor([0.5513, 0.1013, 0.1937, 0.1537]) -Greedy action tensor([ 0.6665, -0.3846, -0.0973, -0.6008]) tensor([0.4769, 0.1667, 0.2222, 0.1343]) -Greedy action tensor([0.8288, 0.2577, 0.8575, 0.8907]) tensor([0.2734, 0.1544, 0.2814, 0.2908]) -Greedy action tensor([ 0.6703, 0.9699, -0.4987, -0.2459]) tensor([0.3268, 0.4410, 0.1015, 0.1307]) -Greedy action tensor([-0.0471, 0.0767, 0.0642, 0.8248]) tensor([0.1773, 0.2006, 0.1981, 0.4239]) -Greedy action tensor([ 0.0617, -1.6360, 0.4670, -0.0565]) tensor([0.2800, 0.0513, 0.4199, 0.2488]) -Greedy action tensor([ 1.1431, 0.4825, -1.3015, 0.4182]) tensor([0.4790, 0.2474, 0.0416, 0.2320]) -Greedy action tensor([ 0.5562, -0.1880, -0.8976, 0.7916]) tensor([0.3362, 0.1598, 0.0786, 0.4255]) -Greedy action tensor([ 1.0319, 0.1771, -0.2596, 0.3591]) tensor([0.4524, 0.1924, 0.1243, 0.2309]) -Greedy action tensor([ 1.1430, 0.2673, -0.8953, 0.6655]) tensor([0.4614, 0.1922, 0.0601, 0.2862]) -Greedy action tensor([0.3413, 0.2347, 0.0581, 0.4913]) tensor([0.2622, 0.2357, 0.1975, 0.3046]) -Greedy action tensor([ 1.2082, -0.7715, 0.2477, 0.7865]) tensor([0.4594, 0.0635, 0.1758, 0.3013]) -Greedy action tensor([ 1.4523, -0.4429, 0.7612, 1.5033]) tensor([0.3699, 0.0556, 0.1853, 0.3892]) -Greedy action tensor([ 0.6932, -0.0317, -0.5318, 0.9038]) tensor([0.3320, 0.1608, 0.0975, 0.4098]) -Greedy action tensor([ 0.5914, 0.6842, -0.1306, 1.1691]) tensor([0.2291, 0.2514, 0.1113, 0.4082]) -Greedy action tensor([ 1.7288, -0.0511, 0.5716, 1.2649]) tensor([0.4735, 0.0799, 0.1489, 0.2978]) -Greedy action tensor([ 1.1028, -0.5508, -0.4345, 1.2077]) tensor([0.3973, 0.0760, 0.0854, 0.4412]) -Greedy action tensor([0.3783, 0.3923, 0.7119, 0.1273]) tensor([0.2388, 0.2421, 0.3333, 0.1858]) -Greedy action tensor([ 0.6433, -0.4766, 0.0508, 0.3824]) tensor([0.3774, 0.1232, 0.2087, 0.2907]) -Greedy action tensor([ 1.1775, 0.3818, -0.5564, 1.3481]) tensor([0.3554, 0.1604, 0.0628, 0.4215]) -Greedy action tensor([ 0.6842, -0.5364, -0.3505, 2.1322]) tensor([0.1693, 0.0500, 0.0602, 0.7205]) -Greedy action tensor([ 0.6107, 0.2909, -1.2024, 1.6752]) tensor([0.2088, 0.1517, 0.0341, 0.6055]) -Greedy action tensor([ 0.2274, 0.4258, -0.0477, 0.5125]) tensor([0.2321, 0.2830, 0.1763, 0.3086]) -Greedy action tensor([ 1.1114, -0.3771, 0.5117, 1.4477]) tensor([0.3150, 0.0711, 0.1729, 0.4410]) -Greedy action tensor([ 1.0379, -0.1172, -0.8514, 1.9845]) tensor([0.2473, 0.0779, 0.0374, 0.6374]) -Greedy action tensor([ 1.1144, -1.5964, -0.2751, 0.5927]) tensor([0.5238, 0.0348, 0.1305, 0.3109]) -Greedy action tensor([ 1.4897, -0.0416, -0.0291, 1.9001]) tensor([0.3398, 0.0735, 0.0744, 0.5123]) -Greedy action tensor([ 1.7560, -0.2971, 0.5822, 1.2723]) tensor([0.4869, 0.0625, 0.1505, 0.3001]) -Greedy action tensor([ 0.5325, -0.0015, 0.1793, 0.3321]) tensor([0.3218, 0.1887, 0.2261, 0.2634]) -Greedy action tensor([ 0.4853, 0.5326, -0.3229, -0.0671]) tensor([0.3258, 0.3415, 0.1452, 0.1875]) -Greedy action tensor([ 0.6240, -0.5440, 1.7608, 0.0732]) tensor([0.1998, 0.0621, 0.6228, 0.1152]) -Greedy action tensor([ 0.9709, 0.8178, -0.3223, 0.0392]) tensor([0.3958, 0.3396, 0.1086, 0.1559]) -Greedy action tensor([1.6482, 0.1114, 0.3355, 0.1356]) tensor([0.5867, 0.1262, 0.1579, 0.1293]) -Greedy action tensor([ 1.8611, -0.4115, 0.9887, 1.1294]) tensor([0.4995, 0.0515, 0.2088, 0.2403]) -Greedy action tensor([ 1.0822, 0.1582, -1.1159, 1.6792]) tensor([0.3008, 0.1194, 0.0334, 0.5464]) -Greedy action tensor([ 0.7891, -0.6492, 0.5005, 0.1903]) tensor([0.3943, 0.0936, 0.2955, 0.2167]) -Greedy action tensor([1.3805, 0.1844, 1.1574, 0.8688]) tensor([0.3701, 0.1119, 0.2961, 0.2219]) -Greedy action tensor([ 0.0733, 0.0940, 0.7314, -0.6168]) tensor([0.2245, 0.2292, 0.4336, 0.1126]) -Greedy action tensor([0.9179, 0.0540, 0.8285, 0.2456]) tensor([0.3513, 0.1481, 0.3213, 0.1794]) -Greedy action tensor([ 0.8659, 0.1159, -0.4672, 1.1365]) tensor([0.3282, 0.1550, 0.0865, 0.4302]) -Greedy action tensor([0.2490, 0.3107, 0.4310, 0.0181]) tensor([0.2465, 0.2622, 0.2957, 0.1957]) -Greedy action tensor([0.3257, 0.9836, 0.0833, 0.6080]) tensor([0.1984, 0.3829, 0.1556, 0.2631]) -Greedy action tensor([0.2829, 0.3049, 0.4384, 0.0301]) tensor([0.2521, 0.2577, 0.2945, 0.1958]) -Greedy action tensor([ 2.1369, -0.1796, 1.3021, 0.8260]) tensor([0.5549, 0.0547, 0.2408, 0.1496]) -Greedy action tensor([ 1.3568, -0.0016, -0.6137, 0.5593]) tensor([0.5414, 0.1392, 0.0755, 0.2439]) -Greedy action tensor([ 0.5260, -1.3509, 0.4872, -0.0830]) tensor([0.3761, 0.0576, 0.3618, 0.2046]) -Greedy action tensor([ 0.3635, 0.4611, 0.3922, -0.3899]) tensor([0.2776, 0.3061, 0.2857, 0.1307]) -Greedy action tensor([0.7084, 0.4630, 1.1559, 1.0691]) tensor([0.2092, 0.1636, 0.3272, 0.3000]) -Greedy action tensor([ 1.2424, 0.6890, -0.2404, 0.8420]) tensor([0.4045, 0.2326, 0.0918, 0.2710]) -Greedy action tensor([-0.3139, 0.1633, -0.4309, -0.7032]) tensor([0.2393, 0.3857, 0.2129, 0.1621]) -Greedy action tensor([ 0.7914, -1.0265, -0.5411, 1.1162]) tensor([0.3559, 0.0578, 0.0939, 0.4924]) -Greedy action tensor([ 0.0296, 0.4728, -0.2981, 0.7009]) tensor([0.1910, 0.2976, 0.1376, 0.3738]) -Greedy action tensor([ 0.5833, 0.0993, -0.6735, 0.7815]) tensor([0.3205, 0.1975, 0.0912, 0.3908]) -Greedy action tensor([ 2.8146e-01, -7.7523e-01, -9.6844e-04, -9.9496e-01]) tensor([0.4201, 0.1460, 0.3167, 0.1172]) -Greedy action tensor([ 1.0667, -1.1417, 0.8030, 0.2989]) tensor([0.4270, 0.0469, 0.3280, 0.1981]) -Greedy action tensor([-0.8726, -0.1854, -0.7561, -0.0303]) tensor([0.1554, 0.3090, 0.1746, 0.3609]) -Greedy action tensor([ 0.3976, -0.0947, 1.0077, 0.7935]) tensor([0.2025, 0.1238, 0.3728, 0.3009]) -Greedy action tensor([ 1.2157, -0.4001, 0.0277, 1.4095]) tensor([0.3680, 0.0731, 0.1122, 0.4467]) -Greedy action tensor([ 1.4609, 0.1257, -0.4652, 0.6200]) tensor([0.5434, 0.1430, 0.0792, 0.2344]) -Greedy action tensor([ 1.8307, -1.3947, 0.9732, 0.7820]) tensor([0.5512, 0.0219, 0.2338, 0.1931]) -Greedy action tensor([ 0.2793, 0.1222, -0.7028, 0.6443]) tensor([0.2725, 0.2329, 0.1021, 0.3925]) -Greedy action tensor([ 1.3955, -0.0890, 1.4845, 1.0040]) tensor([0.3338, 0.0757, 0.3649, 0.2257]) -Greedy action tensor([ 0.6883, -0.9800, -0.0421, 2.2361]) tensor([0.1569, 0.0296, 0.0756, 0.7379]) -Greedy action tensor([0.9829, 0.2017, 1.5926, 1.0830]) tensor([0.2271, 0.1040, 0.4179, 0.2510]) -Greedy action tensor([ 0.3967, 1.0604, -0.7319, 0.2287]) tensor([0.2433, 0.4724, 0.0787, 0.2056]) -Greedy action tensor([ 0.1803, 0.6210, -0.5672, 0.4167]) tensor([0.2329, 0.3619, 0.1103, 0.2950]) -Greedy action tensor([ 1.2877, -0.5869, 0.0847, 0.8234]) tensor([0.4802, 0.0737, 0.1442, 0.3019]) -Greedy action tensor([ 1.0785, 0.5469, -0.6031, 0.9917]) tensor([0.3717, 0.2184, 0.0692, 0.3408]) -Greedy action tensor([ 0.1121, 0.7368, -0.2534, 0.4138]) tensor([0.2035, 0.3801, 0.1412, 0.2752]) -Greedy action tensor([ 0.6373, -0.9726, -0.4433, 1.3962]) tensor([0.2721, 0.0544, 0.0923, 0.5812]) -Greedy action tensor([0.6318, 0.7362, 0.3022, 0.8175]) tensor([0.2479, 0.2752, 0.1783, 0.2985]) -Greedy action tensor([ 0.7612, -1.5728, -0.6052, 0.3149]) tensor([0.5020, 0.0486, 0.1280, 0.3213]) -Greedy action tensor([ 0.8234, -0.6752, 0.3594, 1.3403]) tensor([0.2834, 0.0633, 0.1782, 0.4751]) -Greedy action tensor([1.0993, 0.2530, 0.0526, 0.3950]) tensor([0.4397, 0.1886, 0.1544, 0.2174]) -Greedy action tensor([ 1.0338, -0.3429, 0.2208, -0.0988]) tensor([0.4955, 0.1251, 0.2198, 0.1596]) -Greedy action tensor([ 1.3831, 0.3263, -0.7133, 0.1471]) tensor([0.5678, 0.1974, 0.0698, 0.1650]) -Greedy action tensor([ 1.1697, 1.0262, -0.3003, 0.4830]) tensor([0.3847, 0.3333, 0.0884, 0.1936]) -Greedy action tensor([ 1.3546e-01, -2.2657e+00, -1.9789e-05, 7.9290e-01]) tensor([0.2568, 0.0233, 0.2243, 0.4956]) -Greedy action tensor([ 0.7152, -0.3414, -0.4427, 0.9187]) tensor([0.3463, 0.1204, 0.1088, 0.4245]) -Greedy action tensor([ 1.8069, -0.3237, 1.2423, 0.8034]) tensor([0.4869, 0.0578, 0.2768, 0.1785]) -Greedy action tensor([ 0.7833, -0.4026, 1.4783, 1.3875]) tensor([0.1946, 0.0594, 0.3899, 0.3561]) -Greedy action tensor([ 1.0094, 0.1742, -0.4416, 0.4200]) tensor([0.4499, 0.1952, 0.1054, 0.2495]) -Greedy action tensor([ 0.5793, -0.8172, 0.7121, 0.9097]) tensor([0.2645, 0.0655, 0.3020, 0.3680]) -Greedy action tensor([0.8982, 0.2775, 0.2359, 0.5710]) tensor([0.3605, 0.1938, 0.1859, 0.2599]) -Greedy action tensor([ 0.9006, -0.0620, -0.2343, 0.1718]) tensor([0.4575, 0.1747, 0.1471, 0.2207]) -Greedy action tensor([ 1.0436, -0.2956, -0.0461, 0.3535]) tensor([0.4762, 0.1248, 0.1602, 0.2388]) -Greedy action tensor([ 0.6061, -0.5997, -0.1826, 0.0140]) tensor([0.4334, 0.1298, 0.1970, 0.2398]) -Greedy action tensor([ 1.5039, -0.3287, -0.6621, -0.1742]) tensor([0.6843, 0.1095, 0.0784, 0.1278]) -Greedy action tensor([ 1.7781, -0.2555, -0.5334, 0.0619]) tensor([0.7094, 0.0928, 0.0703, 0.1275]) -Greedy action tensor([ 1.6861, -0.5192, -0.2554, 0.3029]) tensor([0.6647, 0.0733, 0.0954, 0.1667]) -Greedy action tensor([ 0.5936, -0.1179, 0.0700, -0.2748]) tensor([0.3995, 0.1961, 0.2367, 0.1677]) -Greedy action tensor([ 0.9148, 0.0765, 0.0198, -0.1259]) tensor([0.4557, 0.1971, 0.1862, 0.1610]) -Greedy action tensor([ 0.7816, -0.4711, -0.0698, -0.2081]) tensor([0.4798, 0.1371, 0.2048, 0.1783]) -Greedy action tensor([ 1.6420, -0.6366, -0.2935, 0.2278]) tensor([0.6712, 0.0687, 0.0969, 0.1632]) -Greedy action tensor([ 1.2092, -0.4630, 0.0362, -0.1054]) tensor([0.5663, 0.1064, 0.1752, 0.1521]) -Greedy action tensor([ 0.9028, -0.6240, -0.1736, 0.2671]) tensor([0.4790, 0.1041, 0.1633, 0.2537]) -Greedy action tensor([ 1.2292, -0.5808, -0.0190, 0.0227]) tensor([0.5715, 0.0935, 0.1640, 0.1710]) -Greedy action tensor([ 1.3420, -0.6367, -0.2884, 0.0553]) tensor([0.6210, 0.0859, 0.1216, 0.1715]) -Greedy action tensor([ 0.8707, -0.2916, -0.4148, -0.1926]) tensor([0.5169, 0.1617, 0.1429, 0.1785]) -Greedy action tensor([ 0.8418, -0.4470, -0.7009, 0.2544]) tensor([0.4890, 0.1348, 0.1045, 0.2717]) -Greedy action tensor([ 1.2689, -0.4299, -0.5365, -0.1722]) tensor([0.6313, 0.1155, 0.1038, 0.1494]) -Greedy action tensor([ 0.9340, -0.4382, -0.1048, 0.3946]) tensor([0.4565, 0.1157, 0.1615, 0.2662]) -Greedy action tensor([ 0.5735, -0.5036, -0.2935, 0.0907]) tensor([0.4206, 0.1432, 0.1767, 0.2595]) -Greedy action tensor([ 0.7521, -0.3439, -0.2588, 0.4020]) tensor([0.4162, 0.1391, 0.1514, 0.2933]) -Greedy action tensor([ 1.3049, -0.2346, -0.2561, -0.1791]) tensor([0.6056, 0.1299, 0.1271, 0.1373]) -Greedy action tensor([ 0.6299, -0.2512, -0.3390, 0.4554]) tensor([0.3797, 0.1573, 0.1441, 0.3189]) -Greedy action tensor([ 0.9794, -0.1746, -0.2076, -0.1775]) tensor([0.5168, 0.1630, 0.1577, 0.1625]) -Greedy action tensor([ 1.5307, -0.3938, -0.4126, -0.0750]) tensor([0.6712, 0.0980, 0.0961, 0.1347]) -Greedy action tensor([ 0.5072, -0.0418, -0.7267, -0.0233]) tensor([0.4070, 0.2351, 0.1185, 0.2394]) -Greedy action tensor([ 1.1415, -0.2657, -0.2326, -0.2148]) tensor([0.5696, 0.1395, 0.1442, 0.1467]) -Greedy action tensor([ 1.0341, -0.5223, -0.1545, 0.2382]) tensor([0.5085, 0.1072, 0.1549, 0.2294]) -Greedy action tensor([ 1.0253, -0.3081, -0.1966, -0.0519]) tensor([0.5266, 0.1388, 0.1552, 0.1794]) -Greedy action tensor([ 1.1098, -0.6194, -0.3298, 0.3657]) tensor([0.5292, 0.0939, 0.1254, 0.2515]) -Greedy action tensor([ 1.1482, -0.0576, -0.0755, 0.0865]) tensor([0.5156, 0.1544, 0.1517, 0.1783]) -Greedy action tensor([ 1.5725, -0.4113, -0.2827, -0.0577]) tensor([0.6712, 0.0923, 0.1050, 0.1315]) -Greedy action tensor([ 0.6390, -0.2074, -0.2826, -0.1471]) tensor([0.4381, 0.1879, 0.1743, 0.1996]) -Greedy action tensor([ 0.6279, -0.0311, -0.3354, -0.0550]) tensor([0.4159, 0.2152, 0.1587, 0.2101]) -Greedy action tensor([ 0.6999, -0.2560, -0.2593, 0.2273]) tensor([0.4182, 0.1608, 0.1603, 0.2607]) -Greedy action tensor([ 0.6993, -0.1244, -0.0646, -0.0507]) tensor([0.4207, 0.1846, 0.1960, 0.1987]) -Greedy action tensor([ 0.3823, -0.2698, 0.0678, -0.1381]) tensor([0.3514, 0.1831, 0.2566, 0.2089]) -Greedy action tensor([ 1.6592, -0.5311, -0.2939, 0.1282]) tensor([0.6803, 0.0761, 0.0965, 0.1472]) -Greedy action tensor([ 0.8276, 0.1299, 0.0282, -0.3101]) tensor([0.4409, 0.2195, 0.1983, 0.1414]) -Greedy action tensor([ 1.0324, 0.0926, -0.0084, 0.0940]) tensor([0.4684, 0.1830, 0.1654, 0.1833]) -Greedy action tensor([ 1.3233, -0.4674, -0.5146, -0.0455]) tensor([0.6327, 0.1056, 0.1007, 0.1610]) -Greedy action tensor([ 1.0038, -0.3283, -0.2463, 0.0522]) tensor([0.5164, 0.1363, 0.1479, 0.1994]) -Greedy action tensor([ 1.0090, -0.4813, -0.0857, 0.2366]) tensor([0.4946, 0.1114, 0.1655, 0.2285]) -Greedy action tensor([ 1.8666, -1.0345, -0.4980, 0.3941]) tensor([0.7255, 0.0399, 0.0682, 0.1664]) -Greedy action tensor([ 0.6785, -0.4079, -0.2117, -0.0045]) tensor([0.4438, 0.1498, 0.1822, 0.2242]) -Greedy action tensor([ 1.4983, -0.4055, -0.0474, 0.2697]) tensor([0.6043, 0.0900, 0.1288, 0.1769]) -Greedy action tensor([ 1.3223, -0.7022, -0.6294, 0.4380]) tensor([0.5927, 0.0783, 0.0842, 0.2448]) -Greedy action tensor([ 0.5737, -0.2059, -0.5201, 0.1510]) tensor([0.4084, 0.1873, 0.1368, 0.2676]) -Greedy action tensor([ 0.7955, -0.3086, -0.6408, 0.9802]) tensor([0.3607, 0.1196, 0.0858, 0.4339]) -Greedy action tensor([ 0.8751, -0.5866, -0.3962, 0.6494]) tensor([0.4328, 0.1004, 0.1214, 0.3454]) -Greedy action tensor([ 1.1142, -0.4682, -0.2375, 0.1983]) tensor([0.5363, 0.1102, 0.1388, 0.2146]) -Greedy action tensor([ 1.1646, -0.0218, -0.1928, -0.0781]) tensor([0.5402, 0.1649, 0.1390, 0.1559]) -Greedy action tensor([ 0.8934, -0.4552, -0.3328, 0.1719]) tensor([0.4904, 0.1273, 0.1439, 0.2384]) -Greedy action tensor([ 1.8077, -0.4761, -0.2293, 0.0871]) tensor([0.7086, 0.0722, 0.0924, 0.1268]) -Greedy action tensor([ 1.6478, -0.4866, -0.2890, -0.0407]) tensor([0.6909, 0.0818, 0.0996, 0.1277]) -Greedy action tensor([ 0.9287, -0.3736, -0.1808, -0.0133]) tensor([0.5021, 0.1365, 0.1656, 0.1958]) -Greedy action tensor([ 1.2013, -0.0697, -0.1811, 0.0038]) tensor([0.5454, 0.1530, 0.1369, 0.1647]) -Greedy action tensor([ 1.4339, -0.5026, -0.2053, -0.2752]) tensor([0.6582, 0.0949, 0.1278, 0.1191]) -Greedy action tensor([ 0.7731, -0.1807, -0.3451, -0.1367]) tensor([0.4729, 0.1822, 0.1546, 0.1904]) -Greedy action tensor([ 0.4818, -0.3804, -1.1067, 0.3942]) tensor([0.3933, 0.1661, 0.0803, 0.3603]) -Greedy action tensor([ 1.4848, -0.5714, -0.2503, 0.0256]) tensor([0.6507, 0.0833, 0.1148, 0.1513]) -Greedy action tensor([ 0.8917, -0.2993, -0.2244, 0.0602]) tensor([0.4838, 0.1470, 0.1585, 0.2107]) -Greedy action tensor([ 1.2275, -0.3373, -0.1603, 0.0835]) tensor([0.5627, 0.1177, 0.1405, 0.1792]) -Greedy action tensor([ 0.5458, -0.2099, -0.2159, -0.2045]) tensor([0.4151, 0.1950, 0.1938, 0.1960]) -Greedy action tensor([ 1.0133, -0.5033, -0.0913, -0.1237]) tensor([0.5343, 0.1173, 0.1770, 0.1714]) -Greedy action tensor([ 0.6983, -0.1732, -0.0544, 0.0994]) tensor([0.4100, 0.1715, 0.1932, 0.2253]) -Greedy action tensor([ 1.3978, 0.0345, -0.0974, 0.4467]) tensor([0.5358, 0.1371, 0.1201, 0.2070]) -Greedy action tensor([ 0.7116, -0.6205, 0.0078, -0.0391]) tensor([0.4483, 0.1183, 0.2218, 0.2116]) -Greedy action tensor([ 1.4698, -0.6951, -0.2320, 0.1840]) tensor([0.6355, 0.0729, 0.1159, 0.1757]) -Greedy action tensor([ 1.4246, -0.7435, -0.5201, 0.3981]) tensor([0.6189, 0.0708, 0.0885, 0.2217]) -Greedy action tensor([ 0.4863, -0.2479, -0.2563, -0.2584]) tensor([0.4114, 0.1974, 0.1958, 0.1954]) -Greedy action tensor([ 0.6209, -0.1660, 0.0279, 0.0432]) tensor([0.3892, 0.1772, 0.2151, 0.2184]) -Greedy action tensor([ 1.3105, -0.4218, -0.3133, 0.0587]) tensor([0.6024, 0.1066, 0.1188, 0.1723]) -Greedy action tensor([ 1.0313, -0.5007, -0.6686, 0.3053]) tensor([0.5312, 0.1148, 0.0971, 0.2570]) -Greedy action tensor([ 0.6777, -0.3801, -0.2626, 0.0665]) tensor([0.4385, 0.1523, 0.1712, 0.2380]) -Greedy action tensor([ 0.9222, -0.3687, -0.4294, 0.3609]) tensor([0.4752, 0.1307, 0.1230, 0.2711]) -Greedy action tensor([ 1.2076, -0.7391, -0.2712, 0.8652]) tensor([0.4806, 0.0686, 0.1095, 0.3413]) -Greedy action tensor([ 0.8883, -0.3728, 0.2823, -0.1257]) tensor([0.4563, 0.1293, 0.2489, 0.1655]) -Greedy action tensor([ 1.4342, -0.7405, -0.3994, -0.0479]) tensor([0.6664, 0.0757, 0.1065, 0.1514]) -Greedy action tensor([ 1.0802, -0.5896, -0.8412, -0.0769]) tensor([0.6064, 0.1142, 0.0888, 0.1906]) -Greedy action tensor([ 1.0243, -0.2259, -0.2976, -0.0350]) tensor([0.5264, 0.1508, 0.1403, 0.1825]) -Greedy action tensor([ 1.0525, -0.6675, -0.3058, 0.5681]) tensor([0.4873, 0.0873, 0.1253, 0.3002]) -Greedy action tensor([-1.7322, -0.5386, 0.7583, 0.3812]) tensor([0.0406, 0.1339, 0.4897, 0.3359]) -Greedy action tensor([-1.2006, -0.5343, -0.2035, -0.4822]) tensor([0.1297, 0.2526, 0.3516, 0.2661]) -Greedy action tensor([-0.9252, -0.1428, 0.8176, 1.2936]) tensor([0.0553, 0.1208, 0.3157, 0.5082]) -Greedy action tensor([-1.2464, -0.1286, 0.5701, 0.5502]) tensor([0.0616, 0.1883, 0.3788, 0.3713]) -Greedy action tensor([-1.2253, -0.5738, 0.3202, 0.2576]) tensor([0.0832, 0.1597, 0.3904, 0.3667]) -Greedy action tensor([-1.8295, -0.4707, 0.6093, -0.1158]) tensor([0.0457, 0.1777, 0.5232, 0.2534]) -Greedy action tensor([-0.8702, -0.5643, 0.3218, 0.0782]) tensor([0.1215, 0.1649, 0.4000, 0.3136]) -Greedy action tensor([-1.5377, -0.5377, 0.6559, 0.5658]) tensor([0.0479, 0.1302, 0.4295, 0.3925]) -Greedy action tensor([-1.9105, -0.6392, 0.2013, -0.2835]) tensor([0.0558, 0.1990, 0.4612, 0.2840]) -Greedy action tensor([-0.3885, -0.4033, 0.2061, 0.2293]) tensor([0.1769, 0.1743, 0.3206, 0.3281]) -Greedy action tensor([-1.4735, -0.3215, 0.6911, 0.7099]) tensor([0.0460, 0.1455, 0.4005, 0.4081]) -Greedy action tensor([-1.9548, -0.4294, 1.0195, 0.4607]) tensor([0.0275, 0.1264, 0.5383, 0.3078]) -Greedy action tensor([-1.2080, -0.4238, 0.4946, 0.7519]) tensor([0.0634, 0.1388, 0.3478, 0.4499]) -Greedy action tensor([-1.3375, 0.4313, 0.3307, 0.4139]) tensor([0.0558, 0.3270, 0.2958, 0.3214]) -Greedy action tensor([-1.7634, -0.5371, 1.0851, 0.7366]) tensor([0.0295, 0.1007, 0.5099, 0.3599]) -Greedy action tensor([-1.7906, -0.4218, 0.6939, 0.3329]) tensor([0.0395, 0.1554, 0.4744, 0.3306]) -Greedy action tensor([-1.1665, -0.5624, 0.3731, 0.2262]) tensor([0.0868, 0.1589, 0.4048, 0.3495]) -Greedy action tensor([-1.5968, -0.5099, 0.4820, 0.0406]) tensor([0.0585, 0.1734, 0.4675, 0.3006]) -Greedy action tensor([-0.9616, -0.6267, 0.3053, 0.1420]) tensor([0.1116, 0.1560, 0.3961, 0.3364]) -Greedy action tensor([-0.8292, -0.5612, 0.2211, 0.2794]) tensor([0.1220, 0.1595, 0.3488, 0.3697]) -Greedy action tensor([-1.5881, -0.7662, 0.8787, 0.5924]) tensor([0.0418, 0.0951, 0.4929, 0.3702]) -Greedy action tensor([-1.3628, -0.4862, 0.9071, 1.0842]) tensor([0.0406, 0.0975, 0.3929, 0.4690]) -Greedy action tensor([-1.0468, -0.5990, 0.3620, 0.5372]) tensor([0.0867, 0.1357, 0.3548, 0.4227]) -Greedy action tensor([-1.6248, -0.7598, 0.1443, -0.4362]) tensor([0.0798, 0.1897, 0.4684, 0.2621]) -Greedy action tensor([-1.9851, 0.2480, 0.5740, -0.0106]) tensor([0.0328, 0.3063, 0.4244, 0.2365]) -Greedy action tensor([-1.2204, -0.5453, 0.3405, 0.3041]) tensor([0.0812, 0.1594, 0.3866, 0.3728]) -Greedy action tensor([-0.9382, 0.1047, 0.4663, 1.1870]) tensor([0.0614, 0.1742, 0.2501, 0.5142]) -Greedy action tensor([-1.3040, -0.5886, 0.5441, 0.1251]) tensor([0.0737, 0.1507, 0.4679, 0.3077]) -Greedy action tensor([-1.4210, -0.4546, 0.5196, 0.4966]) tensor([0.0575, 0.1511, 0.4003, 0.3912]) -Greedy action tensor([-1.8959, -0.9774, 0.1420, -0.3899]) tensor([0.0637, 0.1597, 0.4892, 0.2874]) -Greedy action tensor([-1.6338, -0.4998, 0.5212, 0.0264]) tensor([0.0556, 0.1727, 0.4794, 0.2923]) -Greedy action tensor([-1.8479, -0.4588, 0.6283, -0.0941]) tensor([0.0441, 0.1768, 0.5244, 0.2547]) -Greedy action tensor([-1.2286, -0.5592, 0.3201, 0.2100]) tensor([0.0842, 0.1645, 0.3963, 0.3550]) -Greedy action tensor([-0.8172, -0.1450, 0.3656, 0.7738]) tensor([0.0898, 0.1760, 0.2932, 0.4410]) -Greedy action tensor([-1.7773, -0.4896, 0.5778, -0.0595]) tensor([0.0482, 0.1748, 0.5083, 0.2687]) -Greedy action tensor([-1.6513, -0.5136, 0.5084, -0.0176]) tensor([0.0558, 0.1742, 0.4840, 0.2860]) -Greedy action tensor([-1.9931, -0.5814, 1.2553, 0.5234]) tensor([0.0231, 0.0949, 0.5955, 0.2864]) -Greedy action tensor([-0.8353, -0.7785, 0.6752, -0.0382]) tensor([0.1136, 0.1202, 0.5143, 0.2520]) -Greedy action tensor([-1.6126, -0.4598, 0.8046, 0.7119]) tensor([0.0391, 0.1237, 0.4380, 0.3992]) -Greedy action tensor([-1.3875, -0.4874, 0.7347, 0.9081]) tensor([0.0460, 0.1131, 0.3841, 0.4568]) -Greedy action tensor([-1.0943, -0.4497, 0.5528, 1.1107]) tensor([0.0583, 0.1110, 0.3024, 0.5283]) -Greedy action tensor([-1.6915, -0.5610, 0.6879, 0.0707]) tensor([0.0483, 0.1495, 0.5211, 0.2811]) -Greedy action tensor([-1.8894, -0.6519, 0.8636, 0.1778]) tensor([0.0357, 0.1229, 0.5596, 0.2818]) -Greedy action tensor([-2.0341, -0.9304, 0.4886, -0.1716]) tensor([0.0436, 0.1316, 0.5438, 0.2810]) -Greedy action tensor([-1.6919, -0.7524, 0.3009, -0.2207]) tensor([0.0656, 0.1678, 0.4811, 0.2856]) -Greedy action tensor([-1.2916, -0.5649, 0.3234, 0.3557]) tensor([0.0752, 0.1556, 0.3784, 0.3908]) -Greedy action tensor([-1.9152, -0.5197, 0.7102, 0.1705]) tensor([0.0372, 0.1501, 0.5134, 0.2993]) -Greedy action tensor([-1.8243, -0.4794, 0.9005, 0.4454]) tensor([0.0336, 0.1289, 0.5124, 0.3251]) -Greedy action tensor([-1.9537, -0.8962, 0.2697, -0.2168]) tensor([0.0532, 0.1532, 0.4915, 0.3021]) -Greedy action tensor([-1.9670, -0.6263, 1.3587, 0.6743]) tensor([0.0214, 0.0819, 0.5961, 0.3006]) -Greedy action tensor([-1.9540, -0.8931, 0.3631, -0.0958]) tensor([0.0489, 0.1413, 0.4962, 0.3136]) -Greedy action tensor([-1.9128, -0.7873, 0.3324, -0.0628]) tensor([0.0503, 0.1550, 0.4749, 0.3198]) -Greedy action tensor([-1.7070, -0.2823, 0.9194, 0.7051]) tensor([0.0332, 0.1379, 0.4587, 0.3702]) -Greedy action tensor([-1.1441, -0.7459, -0.2760, -0.6226]) tensor([0.1525, 0.2271, 0.3634, 0.2570]) -Greedy action tensor([ 0.0854, -0.0963, 0.6309, 1.1131]) tensor([0.1574, 0.1312, 0.2715, 0.4398]) -Greedy action tensor([-0.7308, -0.5655, 0.1699, 0.2900]) tensor([0.1348, 0.1591, 0.3319, 0.3742]) -Greedy action tensor([-1.1572, -0.5596, 0.5401, 0.9821]) tensor([0.0596, 0.1084, 0.3255, 0.5064]) -Greedy action tensor([-1.3600, -0.5580, 0.3556, 0.1506]) tensor([0.0751, 0.1674, 0.4174, 0.3401]) -Greedy action tensor([-1.9726, -0.4641, 1.0677, 0.4399]) tensor([0.0266, 0.1202, 0.5563, 0.2969]) -Greedy action tensor([-1.6743, -0.5153, 0.9365, 0.7379]) tensor([0.0345, 0.1101, 0.4700, 0.3854]) -Greedy action tensor([-1.6006, -0.8068, -0.1726, -0.5049]) tensor([0.0964, 0.2132, 0.4020, 0.2884]) -Greedy action tensor([-1.4993, -0.1471, 0.8663, 0.8204]) tensor([0.0389, 0.1505, 0.4146, 0.3960]) -Greedy action tensor([-1.4072, -0.5529, 0.3926, 0.2130]) tensor([0.0692, 0.1626, 0.4185, 0.3497]) -Greedy action tensor([-0.8752, -0.6572, 0.2154, 0.2009]) tensor([0.1227, 0.1525, 0.3650, 0.3598]) -Greedy action tensor([-1.7996, -0.4499, 0.5887, -0.1104]) tensor([0.0472, 0.1822, 0.5147, 0.2558]) -Greedy action tensor([-1.7969, -0.4884, 0.5952, -0.0648]) tensor([0.0470, 0.1738, 0.5137, 0.2655]) -Greedy action tensor([-1.3939, -0.5007, 0.4840, 0.5005]) tensor([0.0601, 0.1469, 0.3932, 0.3998]) -Greedy action tensor([-2.0175, -0.8183, 0.4844, -0.1369]) tensor([0.0433, 0.1437, 0.5288, 0.2841]) -Greedy action tensor([-1.9239, -0.4245, 1.0393, 0.5351]) tensor([0.0274, 0.1226, 0.5299, 0.3201]) -Greedy action tensor([-1.8185, -0.4860, 0.6938, 0.0168]) tensor([0.0428, 0.1620, 0.5273, 0.2679]) -Greedy action tensor([-1.8590, -0.4311, 0.6145, -0.1280]) tensor([0.0441, 0.1839, 0.5231, 0.2490]) -Greedy action tensor([-1.2514, -0.6297, 0.3570, 0.0142]) tensor([0.0877, 0.1633, 0.4381, 0.3109]) -Greedy action tensor([-1.4014, -0.4976, 0.4060, 0.0169]) tensor([0.0730, 0.1803, 0.4451, 0.3016]) -Greedy action tensor([-0.8208, -0.6979, 0.5732, 0.3081]) tensor([0.1081, 0.1222, 0.4356, 0.3342]) -Greedy action tensor([-1.1436, -0.5892, 0.4113, 0.6328]) tensor([0.0747, 0.1301, 0.3537, 0.4415]) -Greedy action tensor([-1.8627, -0.4886, 0.6713, -0.0865]) tensor([0.0426, 0.1684, 0.5372, 0.2518]) -Greedy action tensor([-1.8624, -0.7723, 0.9910, 0.6259]) tensor([0.0300, 0.0892, 0.5199, 0.3609]) -Greedy action tensor([-1.2887, 0.4619, 0.3792, 0.1699]) tensor([0.0611, 0.3520, 0.3240, 0.2628]) -Greedy action tensor([-1.1091, 0.1612, -0.0936, 0.3512]) tensor([0.0860, 0.3063, 0.2374, 0.3703]) -Greedy action tensor([-1.1454, -0.5641, 0.2715, 0.3123]) tensor([0.0892, 0.1595, 0.3680, 0.3833]) -Greedy action tensor([-1.1333, -0.4530, 0.4310, 0.8865]) tensor([0.0654, 0.1291, 0.3126, 0.4929]) -Greedy action tensor([ 0.3878, 0.0484, 0.1593, -0.2832]) tensor([0.3312, 0.2359, 0.2636, 0.1693]) -Greedy action tensor([ 0.6122, -0.2191, -0.0099, -0.2176]) tensor([0.4152, 0.1808, 0.2229, 0.1811]) -Greedy action tensor([ 0.8809, -0.4039, -0.0677, -0.7610]) tensor([0.5383, 0.1490, 0.2085, 0.1042]) -Greedy action tensor([ 5.9938e-01, -3.1727e-01, -1.9103e-04, -3.6991e-01]) tensor([0.4295, 0.1717, 0.2358, 0.1629]) -Greedy action tensor([ 0.4095, -0.2142, 0.2005, -0.3824]) tensor([0.3571, 0.1914, 0.2897, 0.1618]) -Greedy action tensor([ 0.3480, 0.1400, 0.1351, -0.1252]) tensor([0.3083, 0.2504, 0.2492, 0.1921]) -Greedy action tensor([ 0.9574, -0.6558, 0.0152, -0.5130]) tensor([0.5498, 0.1095, 0.2143, 0.1264]) -Greedy action tensor([ 0.7172, -0.1816, 0.0473, -0.3518]) tensor([0.4421, 0.1799, 0.2262, 0.1518]) -Greedy action tensor([ 0.2440, 0.0041, 0.0499, -0.0444]) tensor([0.2976, 0.2342, 0.2451, 0.2231]) -Greedy action tensor([ 0.4809, -0.1306, -0.0435, -0.3242]) tensor([0.3874, 0.2102, 0.2293, 0.1732]) -Greedy action tensor([ 0.4451, -0.3256, 0.2411, -0.4106]) tensor([0.3700, 0.1712, 0.3017, 0.1572]) -Greedy action tensor([ 0.3980, -0.0216, 0.0770, -0.3641]) tensor([0.3510, 0.2307, 0.2546, 0.1638]) -Greedy action tensor([ 0.2886, 0.0724, 0.0852, -0.3206]) tensor([0.3159, 0.2545, 0.2578, 0.1718]) -Greedy action tensor([ 0.4073, 0.0797, 0.1341, -0.2828]) tensor([0.3352, 0.2416, 0.2551, 0.1681]) -Greedy action tensor([ 0.3190, -0.2516, 0.0896, -0.4479]) tensor([0.3540, 0.2001, 0.2815, 0.1644]) -Greedy action tensor([ 0.2543, -0.1599, 0.0732, -0.4167]) tensor([0.3326, 0.2198, 0.2775, 0.1700]) -Greedy action tensor([ 0.4388, -0.1057, -0.0925, -0.4285]) tensor([0.3864, 0.2242, 0.2271, 0.1623]) -Greedy action tensor([ 0.3550, 0.0597, -0.0208, -0.2201]) tensor([0.3340, 0.2486, 0.2294, 0.1879]) -Greedy action tensor([ 0.4353, -0.2154, 0.2620, -0.2931]) tensor([0.3515, 0.1834, 0.2955, 0.1696]) -Greedy action tensor([ 0.6518, -0.3334, -0.0806, -0.4358]) tensor([0.4564, 0.1704, 0.2194, 0.1538]) -Greedy action tensor([ 0.6276, -0.0930, 0.0779, -0.2554]) tensor([0.4037, 0.1964, 0.2330, 0.1669]) -Greedy action tensor([ 0.3782, 0.1358, -0.0973, 0.0219]) tensor([0.3219, 0.2526, 0.2001, 0.2254]) -Greedy action tensor([ 0.4489, -0.1239, -0.0849, -0.1586]) tensor([0.3710, 0.2093, 0.2176, 0.2021]) -Greedy action tensor([ 0.7599, -0.1027, -0.0031, -0.4278]) tensor([0.4559, 0.1924, 0.2126, 0.1390]) -Greedy action tensor([ 0.3206, -0.0590, 0.1614, -0.1911]) tensor([0.3188, 0.2181, 0.2719, 0.1911]) -Greedy action tensor([ 0.4568, -0.1563, -0.0113, -0.3816]) tensor([0.3846, 0.2083, 0.2408, 0.1663]) -Greedy action tensor([ 0.4839, -0.2561, -0.0021, -0.3020]) tensor([0.3925, 0.1873, 0.2414, 0.1788]) -Greedy action tensor([ 0.6035, -0.1989, -0.0524, -0.4298]) tensor([0.4305, 0.1930, 0.2234, 0.1532]) -Greedy action tensor([ 1.2045, -0.8451, -0.0284, -0.7664]) tensor([0.6412, 0.0826, 0.1869, 0.0893]) -Greedy action tensor([ 0.4992, -0.1683, -0.0337, -0.3990]) tensor([0.3989, 0.2046, 0.2341, 0.1624]) -Greedy action tensor([ 0.9473, -0.6516, -0.0427, -0.7980]) tensor([0.5720, 0.1156, 0.2125, 0.0999]) -Greedy action tensor([ 0.3633, -0.0576, -0.0651, -0.2292]) tensor([0.3495, 0.2295, 0.2277, 0.1933]) -Greedy action tensor([ 0.2915, -0.0073, 0.1450, -0.1543]) tensor([0.3081, 0.2285, 0.2661, 0.1973]) -Greedy action tensor([ 0.9345, -0.5110, -0.0718, -0.4589]) tensor([0.5407, 0.1274, 0.1977, 0.1342]) -Greedy action tensor([ 0.4799, 0.1332, -0.0406, -0.2732]) tensor([0.3607, 0.2551, 0.2143, 0.1699]) -Greedy action tensor([ 0.5248, -0.2752, 0.0196, -0.3323]) tensor([0.4037, 0.1814, 0.2436, 0.1713]) -Greedy action tensor([ 0.2629, 0.1202, -0.0721, -0.3799]) tensor([0.3217, 0.2790, 0.2301, 0.1692]) -Greedy action tensor([ 0.3226, -0.0788, -0.0333, -0.5411]) tensor([0.3582, 0.2398, 0.2510, 0.1510]) -Greedy action tensor([ 0.6118, 0.1998, -0.2031, -0.0670]) tensor([0.3828, 0.2535, 0.1695, 0.1942]) -Greedy action tensor([ 0.4357, -0.0013, 0.0130, -0.2533]) tensor([0.3567, 0.2304, 0.2338, 0.1791]) -Greedy action tensor([ 0.6279, -0.0995, 0.0537, -0.2015]) tensor([0.4028, 0.1946, 0.2268, 0.1757]) -Greedy action tensor([ 0.8892, -0.7632, -0.2389, -0.7311]) tensor([0.5837, 0.1118, 0.1889, 0.1155]) -Greedy action tensor([ 0.5568, -0.1986, -0.1952, -0.2804]) tensor([0.4212, 0.1979, 0.1986, 0.1823]) -Greedy action tensor([ 0.6670, -0.2248, -0.1085, -0.5820]) tensor([0.4635, 0.1900, 0.2135, 0.1329]) -Greedy action tensor([ 0.4657, -0.2495, 0.1495, -0.5434]) tensor([0.3872, 0.1894, 0.2822, 0.1412]) -Greedy action tensor([ 0.5736, 0.1347, -0.0873, -0.1867]) tensor([0.3804, 0.2453, 0.1965, 0.1779]) -Greedy action tensor([ 0.6579, -0.2060, -0.0938, -0.1402]) tensor([0.4268, 0.1799, 0.2012, 0.1921]) -Greedy action tensor([ 0.3382, -0.0034, 0.1178, -0.2513]) tensor([0.3260, 0.2317, 0.2615, 0.1808]) -Greedy action tensor([ 0.4684, -0.2442, 0.0395, -0.6684]) tensor([0.4061, 0.1991, 0.2645, 0.1303]) -Greedy action tensor([ 0.8529, -0.5887, 0.0708, -0.6576]) tensor([0.5223, 0.1235, 0.2389, 0.1153]) -Greedy action tensor([ 1.1735, -0.9478, 0.0190, -0.6242]) tensor([0.6247, 0.0749, 0.1969, 0.1035]) -Greedy action tensor([ 0.6834, -0.4283, -0.1330, -0.3822]) tensor([0.4727, 0.1555, 0.2089, 0.1629]) -Greedy action tensor([ 0.8299, -0.2657, -0.0667, -0.3451]) tensor([0.4876, 0.1630, 0.1989, 0.1506]) -Greedy action tensor([ 0.4977, -0.1407, 0.1256, -0.4922]) tensor([0.3862, 0.2040, 0.2662, 0.1435]) -Greedy action tensor([ 0.9632, -0.7181, -0.1510, -0.6649]) tensor([0.5846, 0.1088, 0.1918, 0.1148]) -Greedy action tensor([ 0.1865, 0.2638, -0.0825, -0.3068]) tensor([0.2894, 0.3127, 0.2212, 0.1767]) -Greedy action tensor([ 0.3208, -0.0410, -0.0388, -0.3654]) tensor([0.3451, 0.2403, 0.2409, 0.1738]) -Greedy action tensor([ 0.6901, -0.5373, 0.2611, -0.7027]) tensor([0.4561, 0.1337, 0.2970, 0.1133]) -Greedy action tensor([ 0.4826, -0.0767, -0.0504, -0.2838]) tensor([0.3812, 0.2179, 0.2237, 0.1772]) -Greedy action tensor([ 0.3373, -0.0402, 0.2193, -0.3263]) tensor([0.3237, 0.2219, 0.2877, 0.1667]) -Greedy action tensor([ 0.2447, -0.0924, 0.0307, -0.1288]) tensor([0.3116, 0.2224, 0.2515, 0.2145]) -Greedy action tensor([ 0.4676, 0.1465, -0.1035, -0.0713]) tensor([0.3480, 0.2524, 0.1966, 0.2030]) -Greedy action tensor([ 0.5726, -0.2612, 0.0850, -0.6009]) tensor([0.4241, 0.1842, 0.2605, 0.1312]) -Greedy action tensor([ 0.2470, 0.1876, -0.0227, -0.3092]) tensor([0.3050, 0.2873, 0.2329, 0.1748]) -Greedy action tensor([ 0.6991, -0.4406, 0.1960, -0.4967]) tensor([0.4490, 0.1436, 0.2715, 0.1358]) -Greedy action tensor([ 0.7919, -0.4685, 0.0584, -0.4514]) tensor([0.4873, 0.1382, 0.2340, 0.1405]) -Greedy action tensor([ 0.8595, -0.4103, 0.0849, -0.7334]) tensor([0.5141, 0.1444, 0.2369, 0.1045]) -Greedy action tensor([ 0.5566, -0.1931, -0.0030, -0.3225]) tensor([0.4067, 0.1921, 0.2324, 0.1688]) -Greedy action tensor([ 0.4348, -0.4118, 0.0632, -0.4802]) tensor([0.3970, 0.1703, 0.2738, 0.1590]) -Greedy action tensor([ 0.4434, -0.2500, -0.1033, -0.3362]) tensor([0.3941, 0.1970, 0.2281, 0.1807]) -Greedy action tensor([ 0.4687, 0.1446, -0.1110, 0.1066]) tensor([0.3356, 0.2427, 0.1880, 0.2337]) -Greedy action tensor([ 0.6911, -0.4424, -0.0650, -0.4787]) tensor([0.4758, 0.1532, 0.2234, 0.1477]) -Greedy action tensor([ 0.5619, 0.0043, -0.0337, -0.1880]) tensor([0.3852, 0.2205, 0.2123, 0.1820]) -Greedy action tensor([ 0.4514, -0.3009, -0.0566, -0.4534]) tensor([0.4036, 0.1902, 0.2429, 0.1633]) -Greedy action tensor([ 0.4445, 0.0328, 0.1130, -0.2107]) tensor([0.3449, 0.2285, 0.2476, 0.1791]) -Greedy action tensor([ 0.6606, -0.5619, 0.0035, -0.7686]) tensor([0.4872, 0.1435, 0.2526, 0.1167]) -Greedy action tensor([ 0.3444, -0.2326, 0.0190, -0.1670]) tensor([0.3468, 0.1948, 0.2505, 0.2080]) -Greedy action tensor([ 0.5307, -0.3773, 0.0076, -0.4343]) tensor([0.4207, 0.1697, 0.2493, 0.1603]) -Greedy action tensor([ 0.9397, -0.3651, 0.0647, -0.5265]) tensor([0.5211, 0.1413, 0.2172, 0.1203]) -Greedy action tensor([ 0.5390, -0.2152, -0.0957, -0.3945]) tensor([0.4178, 0.1965, 0.2215, 0.1643]) -Greedy action tensor([ 0.7772, -0.7087, -0.0726, -0.7077]) tensor([0.5318, 0.1204, 0.2273, 0.1205]) -Greedy action tensor([ 2.0180, -1.1245, 0.1279, 0.8431]) tensor([0.6653, 0.0287, 0.1005, 0.2055]) -Greedy action tensor([ 0.9753, -0.1696, 0.6318, 1.1983]) tensor([0.3051, 0.0971, 0.2164, 0.3813]) -Greedy action tensor([ 0.0592, 1.1674, -0.1147, 0.7522]) tensor([0.1456, 0.4409, 0.1223, 0.2911]) -Greedy action tensor([-0.0264, 0.8682, -0.4341, 0.1960]) tensor([0.1866, 0.4563, 0.1241, 0.2330]) -Greedy action tensor([0.4617, 1.0566, 0.1497, 0.5111]) tensor([0.2176, 0.3945, 0.1593, 0.2286]) -Greedy action tensor([ 0.8721, -0.3851, 0.9602, 0.8592]) tensor([0.2973, 0.0846, 0.3247, 0.2935]) -Greedy action tensor([1.6318, 0.5699, 1.3356, 0.6145]) tensor([0.4080, 0.1411, 0.3034, 0.1475]) -Greedy action tensor([ 0.5684, 0.7944, -0.2032, -0.5232]) tensor([0.3277, 0.4108, 0.1515, 0.1100]) -Greedy action tensor([0.9124, 1.2184, 0.4177, 0.2760]) tensor([0.2860, 0.3883, 0.1744, 0.1513]) -Greedy action tensor([-0.3175, 0.1547, 0.4553, 1.1293]) tensor([0.1109, 0.1778, 0.2401, 0.4712]) -Greedy action tensor([ 0.2509, -1.2353, -0.1461, 0.1449]) tensor([0.3574, 0.0809, 0.2403, 0.3215]) -Greedy action tensor([ 0.5590, 0.0479, -0.5386, 0.8561]) tensor([0.3049, 0.1829, 0.1017, 0.4104]) -Greedy action tensor([ 0.5331, -1.0097, 0.4468, 0.0726]) tensor([0.3620, 0.0774, 0.3321, 0.2284]) -Greedy action tensor([-0.5829, -0.7103, -0.1908, 0.4083]) tensor([0.1651, 0.1454, 0.2444, 0.4450]) -Greedy action tensor([ 0.8632, -1.0702, 0.3177, 1.4073]) tensor([0.2901, 0.0420, 0.1681, 0.4998]) -Greedy action tensor([ 1.1975, -0.2790, -0.2725, 1.5483]) tensor([0.3474, 0.0794, 0.0799, 0.4934]) -Greedy action tensor([ 0.7620, -1.1946, -0.5776, 0.7714]) tensor([0.4145, 0.0586, 0.1086, 0.4184]) -Greedy action tensor([ 1.5345, -0.5603, 0.1056, 0.6387]) tensor([0.5647, 0.0695, 0.1353, 0.2305]) -Greedy action tensor([ 1.8993, 0.5773, -1.0870, 0.7836]) tensor([0.6080, 0.1621, 0.0307, 0.1992]) -Greedy action tensor([ 1.3497, 0.4338, -0.4162, 0.3831]) tensor([0.5124, 0.2050, 0.0876, 0.1949]) -Greedy action tensor([ 1.0357, -0.4161, -0.7035, 0.9053]) tensor([0.4372, 0.1024, 0.0768, 0.3837]) -Greedy action tensor([ 0.9813, -1.3839, -0.4188, 1.2167]) tensor([0.3837, 0.0360, 0.0946, 0.4856]) -Greedy action tensor([ 1.3526, -0.1529, -0.3954, 0.4557]) tensor([0.5544, 0.1230, 0.0965, 0.2261]) -Greedy action tensor([ 0.6818, 0.8116, -0.7632, 1.0547]) tensor([0.2613, 0.2976, 0.0616, 0.3795]) -Greedy action tensor([ 0.6582, -0.6501, -0.3214, 1.8167]) tensor([0.2070, 0.0559, 0.0777, 0.6593]) -Greedy action tensor([ 0.5308, -0.0379, 1.4787, 1.8037]) tensor([0.1296, 0.0734, 0.3343, 0.4627]) -Greedy action tensor([ 1.5188, -0.3219, 0.7222, 1.3246]) tensor([0.4110, 0.0652, 0.1853, 0.3385]) -Greedy action tensor([ 0.9063, -1.1878, 0.4810, 0.4708]) tensor([0.4126, 0.0508, 0.2697, 0.2669]) -Greedy action tensor([ 1.2486, -0.8723, -0.1438, 0.6990]) tensor([0.5140, 0.0616, 0.1277, 0.2967]) -Greedy action tensor([ 1.3800, -0.9055, 1.5366, 0.1996]) tensor([0.3878, 0.0395, 0.4536, 0.1191]) -Greedy action tensor([ 0.1673, -0.8517, 0.3721, -0.5014]) tensor([0.3225, 0.1164, 0.3958, 0.1653]) -Greedy action tensor([ 1.2573, -0.5775, 0.4023, 1.6663]) tensor([0.3236, 0.0517, 0.1376, 0.4871]) -Greedy action tensor([ 1.3018, -0.2594, 0.8168, 0.6733]) tensor([0.4239, 0.0890, 0.2610, 0.2261]) -Greedy action tensor([ 1.2064, -1.1630, 0.7888, 1.4755]) tensor([0.3267, 0.0306, 0.2152, 0.4276]) -Greedy action tensor([ 0.3595, 0.6024, -0.0764, 0.1258]) tensor([0.2693, 0.3434, 0.1742, 0.2132]) -Greedy action tensor([ 1.7961, -0.5745, 0.0657, 0.5047]) tensor([0.6470, 0.0605, 0.1147, 0.1779]) -Greedy action tensor([0.6206, 0.1657, 0.4796, 0.5190]) tensor([0.2936, 0.1863, 0.2550, 0.2652]) -Greedy action tensor([-0.4496, -0.1285, -0.3245, 0.7467]) tensor([0.1466, 0.2022, 0.1662, 0.4850]) -Greedy action tensor([-0.0973, -0.0407, 0.8467, -0.1405]) tensor([0.1790, 0.1894, 0.4601, 0.1714]) -Greedy action tensor([ 1.7395, -1.2873, -0.2036, 1.0517]) tensor([0.5902, 0.0286, 0.0846, 0.2967]) -Greedy action tensor([0.4366, 0.4570, 0.5683, 0.8567]) tensor([0.2135, 0.2179, 0.2436, 0.3250]) -Greedy action tensor([ 0.8263, -1.0995, -0.4142, 1.4270]) tensor([0.3069, 0.0447, 0.0888, 0.5596]) -Greedy action tensor([ 1.3995, -1.0778, 0.3186, 1.1904]) tensor([0.4475, 0.0376, 0.1518, 0.3631]) -Greedy action tensor([ 0.1770, -0.7369, -0.0055, 0.3158]) tensor([0.2956, 0.1185, 0.2463, 0.3396]) -Greedy action tensor([ 1.2152, -0.8410, 0.1373, 0.7753]) tensor([0.4734, 0.0606, 0.1611, 0.3049]) -Greedy action tensor([ 1.0214, 0.1296, -0.7362, 0.3623]) tensor([0.4763, 0.1952, 0.0821, 0.2464]) -Greedy action tensor([0.3019, 0.3253, 0.6144, 0.3910]) tensor([0.2230, 0.2283, 0.3048, 0.2438]) -Greedy action tensor([0.7048, 0.5639, 0.3040, 0.2807]) tensor([0.3132, 0.2720, 0.2098, 0.2049]) -Greedy action tensor([ 0.8785, -0.2638, 0.9921, 1.1488]) tensor([0.2667, 0.0851, 0.2988, 0.3495]) -Greedy action tensor([ 0.6275, -0.2437, 0.3890, 0.3873]) tensor([0.3341, 0.1398, 0.2632, 0.2628]) -Greedy action tensor([1.2038, 0.0871, 1.0571, 1.1436]) tensor([0.3192, 0.1045, 0.2757, 0.3006]) -Greedy action tensor([ 0.9162, 0.7796, -0.5534, 0.4870]) tensor([0.3632, 0.3168, 0.0835, 0.2365]) -Greedy action tensor([ 1.2898, -0.6599, -0.0065, 0.6868]) tensor([0.5094, 0.0725, 0.1394, 0.2787]) -Greedy action tensor([ 0.8684, -0.0354, 1.3595, 0.4280]) tensor([0.2715, 0.1100, 0.4437, 0.1748]) -Greedy action tensor([ 0.4636, -0.8604, -0.8342, 2.4247]) tensor([0.1157, 0.0308, 0.0316, 0.8220]) -Greedy action tensor([ 0.9646, -0.2354, 0.6626, 1.4274]) tensor([0.2756, 0.0830, 0.2037, 0.4377]) -Greedy action tensor([ 1.4632, -0.6053, 0.4582, 0.8662]) tensor([0.4895, 0.0619, 0.1792, 0.2695]) -Greedy action tensor([ 1.5383, -0.2086, 0.7322, 0.6315]) tensor([0.4939, 0.0861, 0.2206, 0.1994]) -Greedy action tensor([ 1.0536, -0.4345, 1.5233, 1.2862]) tensor([0.2447, 0.0552, 0.3914, 0.3087]) -Greedy action tensor([ 1.0182, -1.3269, 0.2446, 1.4924]) tensor([0.3161, 0.0303, 0.1458, 0.5078]) -Greedy action tensor([ 0.8110, -1.9132, 0.0812, 1.0701]) tensor([0.3517, 0.0231, 0.1695, 0.4557]) -Greedy action tensor([ 0.1246, 0.6813, 0.9940, -0.5802]) tensor([0.1778, 0.3102, 0.4241, 0.0879]) -Greedy action tensor([ 0.8630, 0.6768, -0.4977, 0.7091]) tensor([0.3397, 0.2820, 0.0871, 0.2912]) -Greedy action tensor([ 0.3095, -0.2808, 1.7353, 0.3708]) tensor([0.1475, 0.0818, 0.6139, 0.1569]) -Greedy action tensor([ 0.7329, 0.8958, -0.6114, 0.1743]) tensor([0.3323, 0.3910, 0.0866, 0.1901]) -Greedy action tensor([ 0.9579, -0.3700, 0.3977, -0.0412]) tensor([0.4537, 0.1202, 0.2591, 0.1670]) -Greedy action tensor([ 0.8276, -0.0629, 1.1555, 1.1924]) tensor([0.2359, 0.0968, 0.3275, 0.3398]) -Greedy action tensor([-1.0999, 0.0024, 0.6045, 0.0848]) tensor([0.0783, 0.2356, 0.4303, 0.2559]) -Greedy action tensor([ 1.1064, -0.7402, 0.8177, 0.9576]) tensor([0.3612, 0.0570, 0.2706, 0.3113]) -Greedy action tensor([ 0.3869, -0.3108, 0.6659, 0.1541]) tensor([0.2769, 0.1378, 0.3660, 0.2194]) -Greedy action tensor([ 0.7609, -0.9442, 1.2137, 0.0673]) tensor([0.3073, 0.0559, 0.4833, 0.1536]) -Greedy action tensor([ 1.4401, -0.4001, -0.5317, 1.2273]) tensor([0.4748, 0.0754, 0.0661, 0.3838]) -Greedy action tensor([ 0.9799, -1.0776, 0.0572, 0.2989]) tensor([0.4923, 0.0629, 0.1957, 0.2492]) -Greedy action tensor([-0.4698, 1.0215, 0.4345, -0.8336]) tensor([0.1162, 0.5161, 0.2870, 0.0807]) -Greedy action tensor([ 0.6704, -0.2194, 1.0718, 1.3400]) tensor([0.2058, 0.0845, 0.3075, 0.4021]) -Greedy action tensor([ 1.5558, -0.3654, 0.5988, 0.7786]) tensor([0.5025, 0.0736, 0.1930, 0.2310]) -Greedy action tensor([-0.3856, 0.3132, -0.4695, 0.9155]) tensor([0.1315, 0.2645, 0.1209, 0.4831]) -Greedy action tensor([ 1.3872, 0.3067, -0.1575, -0.1057]) tensor([0.5626, 0.1910, 0.1200, 0.1264]) -Greedy action tensor([ 1.7757, 0.2073, -0.2454, 1.3335]) tensor([0.5042, 0.1051, 0.0668, 0.3240]) -Greedy action tensor([-0.2958, -1.3562, -0.8938, 1.6705]) tensor([0.1106, 0.0383, 0.0608, 0.7903]) -Greedy action tensor([ 1.1954, -0.4888, 0.1495, 0.9734]) tensor([0.4277, 0.0794, 0.1503, 0.3426]) -Greedy action tensor([ 1.1709, -0.6102, 1.8812, 1.0516]) tensor([0.2445, 0.0412, 0.4974, 0.2170]) -Greedy action tensor([ 1.6076, -0.6417, -0.2225, 0.2155]) tensor([0.6603, 0.0696, 0.1059, 0.1641]) -Greedy action tensor([ 1.3296, -0.6975, -0.5442, 1.0720]) tensor([0.4859, 0.0640, 0.0746, 0.3755]) -Greedy action tensor([ 1.1366, -0.4030, -0.1902, -0.0883]) tensor([0.5638, 0.1209, 0.1496, 0.1656]) -Greedy action tensor([ 0.7850, -0.4522, -0.2662, 0.1013]) tensor([0.4663, 0.1353, 0.1630, 0.2354]) -Greedy action tensor([ 1.1945, -0.7253, -0.4024, 0.6827]) tensor([0.5132, 0.0753, 0.1039, 0.3076]) -Greedy action tensor([ 1.0329, -0.2980, -0.0047, -0.3105]) tensor([0.5321, 0.1406, 0.1885, 0.1388]) -Greedy action tensor([ 1.1698, -0.7646, -0.3577, 0.6395]) tensor([0.5128, 0.0741, 0.1113, 0.3018]) -Greedy action tensor([ 0.8454, -0.6730, -0.1490, 0.4294]) tensor([0.4447, 0.0974, 0.1645, 0.2934]) -Greedy action tensor([ 0.3952, -0.5392, -0.3143, -0.0309]) tensor([0.3940, 0.1548, 0.1938, 0.2573]) -Greedy action tensor([ 0.8382, -0.6610, -0.2622, 0.2426]) tensor([0.4745, 0.1060, 0.1579, 0.2616]) -Greedy action tensor([ 1.5482, -0.4334, -0.0889, 0.2323]) tensor([0.6248, 0.0861, 0.1215, 0.1676]) -Greedy action tensor([ 1.2427, -0.6500, -0.2846, 0.0168]) tensor([0.6019, 0.0907, 0.1307, 0.1767]) -Greedy action tensor([ 0.2343, -0.1394, -0.4094, 0.0870]) tensor([0.3250, 0.2237, 0.1708, 0.2805]) -Greedy action tensor([ 1.0857, -0.3660, -0.1816, 0.1804]) tensor([0.5208, 0.1220, 0.1466, 0.2106]) -Greedy action tensor([ 0.9711, -0.4187, -0.0129, 0.0312]) tensor([0.4966, 0.1237, 0.1856, 0.1940]) -Greedy action tensor([ 1.2101, -0.6446, -0.3312, 0.0615]) tensor([0.5925, 0.0927, 0.1269, 0.1879]) -Greedy action tensor([ 1.4600, -0.6124, -0.3343, 0.3271]) tensor([0.6195, 0.0780, 0.1030, 0.1995]) -Greedy action tensor([ 0.9461, -0.5062, -0.2362, 0.2024]) tensor([0.4960, 0.1161, 0.1521, 0.2358]) -Greedy action tensor([ 1.4178, -0.6395, -0.2005, 0.1824]) tensor([0.6185, 0.0790, 0.1226, 0.1798]) -Greedy action tensor([ 0.8247, -0.2517, -0.1736, -0.2486]) tensor([0.4875, 0.1662, 0.1797, 0.1667]) -Greedy action tensor([ 1.6366, -0.4530, -0.3273, 0.2298]) tensor([0.6627, 0.0820, 0.0930, 0.1623]) -Greedy action tensor([ 0.9098, -0.2765, -0.0053, 0.3238]) tensor([0.4420, 0.1350, 0.1770, 0.2460]) -Greedy action tensor([ 1.2164, -0.7074, 0.0786, -0.0744]) tensor([0.5742, 0.0839, 0.1840, 0.1579]) -Greedy action tensor([ 1.0613, -0.7122, -0.4636, 0.5643]) tensor([0.5011, 0.0851, 0.1091, 0.3048]) -Greedy action tensor([ 0.8514, -0.3091, -0.4756, -0.0747]) tensor([0.5064, 0.1587, 0.1343, 0.2006]) -Greedy action tensor([ 1.8846, -0.2138, -0.4756, 0.3300]) tensor([0.7001, 0.0859, 0.0661, 0.1479]) -Greedy action tensor([ 1.2819, -0.2858, -0.3861, -0.2249]) tensor([0.6178, 0.1288, 0.1165, 0.1369]) -Greedy action tensor([ 1.2162, -0.7235, -0.4172, 0.6297]) tensor([0.5276, 0.0758, 0.1030, 0.2935]) -Greedy action tensor([ 1.4281, -0.5238, -0.0440, 0.1907]) tensor([0.6018, 0.0855, 0.1381, 0.1746]) -Greedy action tensor([ 0.9991, -0.5165, -0.2976, 0.0736]) tensor([0.5293, 0.1163, 0.1447, 0.2098]) -Greedy action tensor([ 0.7413, -0.0491, -0.0406, -0.0042]) tensor([0.4192, 0.1901, 0.1918, 0.1989]) -Greedy action tensor([ 1.5694, -0.5869, -0.2756, 0.0028]) tensor([0.6745, 0.0781, 0.1066, 0.1408]) -Greedy action tensor([ 0.3665, -0.3162, 0.1065, -0.1667]) tensor([0.3493, 0.1765, 0.2693, 0.2049]) -Greedy action tensor([ 1.3680, -0.8041, -0.2811, 0.5334]) tensor([0.5746, 0.0655, 0.1105, 0.2494]) -Greedy action tensor([ 1.2426, -0.3833, -0.5278, -0.2465]) tensor([0.6279, 0.1235, 0.1069, 0.1416]) -Greedy action tensor([ 0.7584, -0.3802, -0.1052, 0.1812]) tensor([0.4341, 0.1390, 0.1830, 0.2438]) -Greedy action tensor([ 1.7902, -0.1386, -0.4711, 0.1786]) tensor([0.6901, 0.1003, 0.0719, 0.1377]) -Greedy action tensor([ 0.9240, 0.1673, 0.0212, -0.3446]) tensor([0.4639, 0.2176, 0.1881, 0.1304]) -Greedy action tensor([ 0.9568, -0.8155, -0.4761, 0.1911]) tensor([0.5337, 0.0907, 0.1274, 0.2482]) -Greedy action tensor([ 0.9792, -0.5217, -0.7965, 0.5276]) tensor([0.4929, 0.1099, 0.0835, 0.3138]) -Greedy action tensor([ 1.0840, -0.4641, -0.3113, 0.3035]) tensor([0.5212, 0.1108, 0.1291, 0.2388]) -Greedy action tensor([ 1.1386, -0.7270, -0.1228, 0.1797]) tensor([0.5490, 0.0850, 0.1555, 0.2105]) -Greedy action tensor([ 0.9072, -0.4455, -0.9681, -0.1583]) tensor([0.5693, 0.1472, 0.0873, 0.1962]) -Greedy action tensor([ 1.0117, -0.4725, -0.1343, 0.3925]) tensor([0.4801, 0.1088, 0.1526, 0.2585]) -Greedy action tensor([ 0.7530, -0.3446, -0.5069, 0.7137]) tensor([0.3878, 0.1294, 0.1100, 0.3728]) -Greedy action tensor([ 1.2037, -0.4886, -0.2563, 0.4796]) tensor([0.5260, 0.0968, 0.1222, 0.2550]) -Greedy action tensor([ 0.4536, 0.3503, 0.0995, -0.4693]) tensor([0.3332, 0.3005, 0.2339, 0.1324]) -Greedy action tensor([ 1.7182, -0.9402, -0.1606, -0.0079]) tensor([0.7139, 0.0500, 0.1091, 0.1270]) -Greedy action tensor([ 1.2001, 0.0604, -0.1543, -0.2317]) tensor([0.5504, 0.1761, 0.1421, 0.1315]) -Greedy action tensor([ 0.6617, -0.3281, -0.3062, -0.0885]) tensor([0.4497, 0.1671, 0.1708, 0.2124]) -Greedy action tensor([ 1.0995, -0.1582, -0.5270, -0.2803]) tensor([0.5772, 0.1641, 0.1135, 0.1452]) -Greedy action tensor([ 0.8595, -0.4967, -0.0873, -0.0959]) tensor([0.4925, 0.1269, 0.1911, 0.1895]) -Greedy action tensor([ 0.7232, -0.2848, 0.0398, -0.0647]) tensor([0.4302, 0.1570, 0.2172, 0.1956]) -Greedy action tensor([ 0.3335, -0.2589, -0.3484, 0.1031]) tensor([0.3505, 0.1938, 0.1772, 0.2784]) -Greedy action tensor([ 0.8992, -0.3185, -0.0389, 0.0952]) tensor([0.4684, 0.1386, 0.1833, 0.2096]) -Greedy action tensor([ 1.4208, -0.6024, -0.0047, 0.0966]) tensor([0.6102, 0.0807, 0.1467, 0.1623]) -Greedy action tensor([ 1.0096, -0.5991, -0.4813, 0.6209]) tensor([0.4754, 0.0952, 0.1071, 0.3223]) -Greedy action tensor([ 1.1995, -0.6877, -0.4602, 0.1950]) tensor([0.5855, 0.0887, 0.1114, 0.2144]) -Greedy action tensor([ 1.0419, -0.5302, -0.5917, 1.0315]) tensor([0.4180, 0.0868, 0.0816, 0.4136]) -Greedy action tensor([ 0.6369, -0.2407, -0.1394, -0.2386]) tensor([0.4362, 0.1814, 0.2007, 0.1817]) -Greedy action tensor([ 1.4251, -0.5640, -0.1144, 0.0194]) tensor([0.6264, 0.0857, 0.1344, 0.1536]) -Greedy action tensor([ 0.3271, 0.1074, -0.0555, -0.2703]) tensor([0.3295, 0.2645, 0.2247, 0.1813]) -Greedy action tensor([ 0.8941, -0.5487, -0.6501, 0.5553]) tensor([0.4625, 0.1093, 0.0987, 0.3295]) -Greedy action tensor([ 0.9882, -0.4804, -0.2917, 0.0292]) tensor([0.5287, 0.1217, 0.1470, 0.2026]) -Greedy action tensor([ 1.0815, -0.4162, -0.0839, -0.1550]) tensor([0.5477, 0.1225, 0.1708, 0.1590]) -Greedy action tensor([ 0.2962, -0.0738, 0.0090, -0.0707]) tensor([0.3191, 0.2204, 0.2394, 0.2211]) -Greedy action tensor([ 1.0299, -0.2893, -0.0498, -0.2236]) tensor([0.5284, 0.1413, 0.1795, 0.1509]) -Greedy action tensor([ 1.0737, 0.0496, -0.6407, 0.1291]) tensor([0.5187, 0.1863, 0.0934, 0.2017]) -Greedy action tensor([ 1.3432, -0.5888, -0.3614, 0.2028]) tensor([0.6074, 0.0880, 0.1104, 0.1942]) -Greedy action tensor([ 1.1690, -0.3406, -0.5901, 0.9939]) tensor([0.4479, 0.0990, 0.0771, 0.3760]) -Greedy action tensor([ 0.7379, -0.5017, -0.0823, 0.1767]) tensor([0.4347, 0.1259, 0.1914, 0.2480]) -Greedy action tensor([ 0.7869, -0.1379, -0.0338, -0.0718]) tensor([0.4424, 0.1755, 0.1947, 0.1874]) -Greedy action tensor([ 1.1399, -0.5351, -0.7126, 0.1268]) tensor([0.5857, 0.1097, 0.0919, 0.2127]) -Greedy action tensor([ 0.5406, -0.3441, -0.5735, 0.3005]) tensor([0.3956, 0.1633, 0.1299, 0.3112]) -Greedy action tensor([ 1.1621, -0.4192, -0.5205, 0.4809]) tensor([0.5270, 0.1084, 0.0980, 0.2666]) -Greedy action tensor([ 1.2860, -0.6922, -0.2031, 0.3822]) tensor([0.5653, 0.0782, 0.1275, 0.2290]) -Greedy action tensor([ 1.2474, -0.4891, -0.0453, 0.1477]) tensor([0.5607, 0.0988, 0.1539, 0.1867]) -Greedy action tensor([ 1.2147, -0.6501, -0.1136, 0.1456]) tensor([0.5672, 0.0879, 0.1503, 0.1947]) -Greedy action tensor([ 1.4038, -0.8925, -0.0274, 0.3515]) tensor([0.5921, 0.0596, 0.1415, 0.2067]) -Greedy action tensor([ 1.2377, -0.4931, 0.0603, 0.1864]) tensor([0.5450, 0.0965, 0.1679, 0.1905]) -Greedy action tensor([ 1.4470, -0.8631, -0.4700, 0.3863]) tensor([0.6280, 0.0623, 0.0923, 0.2174]) -Greedy action tensor([-1.1003, -0.4490, 0.4638, 1.0349]) tensor([0.0619, 0.1187, 0.2958, 0.5236]) -Greedy action tensor([-1.4523, -0.5279, 0.4790, 0.4135]) tensor([0.0592, 0.1493, 0.4087, 0.3828]) -Greedy action tensor([-1.1868, -0.2825, 0.6920, -0.6722]) tensor([0.0855, 0.2113, 0.5600, 0.1431]) -Greedy action tensor([-1.0292, -0.5848, 0.2241, 0.2900]) tensor([0.1020, 0.1591, 0.3573, 0.3816]) -Greedy action tensor([-1.6611, -0.4847, 0.5434, 0.0619]) tensor([0.0529, 0.1715, 0.4794, 0.2962]) -Greedy action tensor([-1.7019, -0.5798, 0.6892, 0.1772]) tensor([0.0464, 0.1426, 0.5071, 0.3039]) -Greedy action tensor([-1.0827, -0.5574, 0.2638, 0.2664]) tensor([0.0963, 0.1628, 0.3700, 0.3710]) -Greedy action tensor([-1.7995, -0.5261, 1.0655, 0.7060]) tensor([0.0291, 0.1040, 0.5106, 0.3564]) -Greedy action tensor([-0.8277, -0.3493, 0.4691, 1.0030]) tensor([0.0799, 0.1290, 0.2924, 0.4987]) -Greedy action tensor([-1.7249, -0.4673, 0.6092, 0.0399]) tensor([0.0484, 0.1701, 0.4991, 0.2825]) -Greedy action tensor([-1.9051, -0.5994, 0.6212, 0.0110]) tensor([0.0417, 0.1538, 0.5213, 0.2832]) -Greedy action tensor([-1.1520, -0.5251, 0.2996, 0.3155]) tensor([0.0871, 0.1630, 0.3719, 0.3779]) -Greedy action tensor([-1.4489, 0.2149, 0.5672, -0.6389]) tensor([0.0624, 0.3292, 0.4682, 0.1402]) -Greedy action tensor([-0.5099, -0.4479, 0.2124, 0.0468]) tensor([0.1704, 0.1813, 0.3509, 0.2974]) -Greedy action tensor([-1.8186, -0.4811, 0.6075, -0.0966]) tensor([0.0460, 0.1754, 0.5209, 0.2576]) -Greedy action tensor([-1.2047, -0.4290, 0.6187, 1.0497]) tensor([0.0529, 0.1150, 0.3278, 0.5044]) -Greedy action tensor([-1.8129, -0.6827, 1.3997, 0.8894]) tensor([0.0228, 0.0706, 0.5665, 0.3401]) -Greedy action tensor([-0.2969, 0.0104, 0.1755, 0.2856]) tensor([0.1738, 0.2363, 0.2787, 0.3112]) -Greedy action tensor([-1.8695, -0.9866, 0.2543, -0.4352]) tensor([0.0626, 0.1513, 0.5234, 0.2627]) -Greedy action tensor([-1.3042, -0.5796, 0.3887, 0.0364]) tensor([0.0812, 0.1675, 0.4412, 0.3102]) -Greedy action tensor([-1.5290, -0.4850, 1.1882, 0.9912]) tensor([0.0318, 0.0904, 0.4820, 0.3958]) -Greedy action tensor([-1.0474, -0.5515, 0.4194, 0.8194]) tensor([0.0744, 0.1221, 0.3224, 0.4810]) -Greedy action tensor([-1.1812, -0.4367, 0.7304, 1.1274]) tensor([0.0502, 0.1056, 0.3394, 0.5048]) -Greedy action tensor([-1.7039, -0.4547, 0.6601, 0.2477]) tensor([0.0451, 0.1574, 0.4798, 0.3177]) -Greedy action tensor([-1.6853, -0.5005, 0.5451, 0.0057]) tensor([0.0526, 0.1721, 0.4897, 0.2855]) -Greedy action tensor([-1.0978, -0.3422, 0.5214, 1.0722]) tensor([0.0590, 0.1257, 0.2981, 0.5171]) -Greedy action tensor([-1.3969, -0.5654, 0.3711, 0.1545]) tensor([0.0721, 0.1655, 0.4223, 0.3401]) -Greedy action tensor([-1.4141, -0.4682, 1.1624, 1.1600]) tensor([0.0335, 0.0863, 0.4406, 0.4396]) -Greedy action tensor([-1.5983, -0.4943, 0.5129, -0.0414]) tensor([0.0588, 0.1772, 0.4852, 0.2788]) -Greedy action tensor([-1.9603, -0.7247, 1.0301, 0.0425]) tensor([0.0315, 0.1084, 0.6267, 0.2334]) -Greedy action tensor([-1.6950, -0.4875, 0.5665, 0.0531]) tensor([0.0508, 0.1699, 0.4875, 0.2918]) -Greedy action tensor([-0.3540, -0.4859, 0.2511, 0.2959]) tensor([0.1778, 0.1559, 0.3257, 0.3406]) -Greedy action tensor([-1.5110, -0.4138, 0.6630, 0.6516]) tensor([0.0466, 0.1394, 0.4093, 0.4047]) -Greedy action tensor([-1.7117, -0.5047, 0.5495, -0.0064]) tensor([0.0514, 0.1720, 0.4935, 0.2831]) -Greedy action tensor([-1.3211, -0.5620, 0.3474, 0.1506]) tensor([0.0781, 0.1669, 0.4145, 0.3404]) -Greedy action tensor([-1.9146, -0.9873, 0.1906, -0.4785]) tensor([0.0627, 0.1586, 0.5150, 0.2637]) -Greedy action tensor([-2.0404, -0.9305, 0.5091, -0.1306]) tensor([0.0424, 0.1286, 0.5427, 0.2863]) -Greedy action tensor([-1.5107, -0.5069, 0.7036, -0.5188]) tensor([0.0642, 0.1751, 0.5876, 0.1731]) -Greedy action tensor([-0.0109, -0.2130, 0.1993, 0.2676]) tensor([0.2287, 0.1869, 0.2822, 0.3022]) -Greedy action tensor([-1.9583, -0.5200, 1.0754, 0.3722]) tensor([0.0276, 0.1162, 0.5727, 0.2835]) -Greedy action tensor([-1.0187, -0.3274, 0.4709, 0.8757]) tensor([0.0710, 0.1418, 0.3150, 0.4722]) -Greedy action tensor([-1.6864, -0.5429, 0.5495, -0.0150]) tensor([0.0532, 0.1668, 0.4973, 0.2828]) -Greedy action tensor([-1.6297, -0.2137, 0.4837, -0.0512]) tensor([0.0548, 0.2258, 0.4536, 0.2657]) -Greedy action tensor([-1.7309, -0.3842, 0.1056, -0.4159]) tensor([0.0674, 0.2590, 0.4227, 0.2509]) -Greedy action tensor([-1.4146, -0.3481, 0.5521, 0.5855]) tensor([0.0542, 0.1575, 0.3876, 0.4007]) -Greedy action tensor([-1.2655, -0.4333, 0.3898, 0.5191]) tensor([0.0690, 0.1586, 0.3613, 0.4111]) -Greedy action tensor([-1.0902, -0.4272, 0.1922, -0.2320]) tensor([0.1123, 0.2179, 0.4049, 0.2649]) -Greedy action tensor([-1.0685, -0.4785, 0.3135, 0.5588]) tensor([0.0842, 0.1519, 0.3353, 0.4286]) -Greedy action tensor([-1.8678, -0.5306, 0.7238, -0.0897]) tensor([0.0415, 0.1582, 0.5545, 0.2458]) -Greedy action tensor([-1.2005, -0.5557, 0.3088, 0.2385]) tensor([0.0859, 0.1636, 0.3884, 0.3621]) -Greedy action tensor([-1.6682, -0.5055, 0.5095, -0.0070]) tensor([0.0547, 0.1749, 0.4825, 0.2879]) -Greedy action tensor([-1.5251, -0.5141, 0.4628, 0.1441]) tensor([0.0611, 0.1680, 0.4463, 0.3245]) -Greedy action tensor([-1.8474, -0.9931, 0.1175, -0.4334]) tensor([0.0685, 0.1610, 0.4888, 0.2817]) -Greedy action tensor([-1.5821, -0.4370, 0.5899, -0.4683]) tensor([0.0626, 0.1969, 0.5497, 0.1908]) -Greedy action tensor([-1.7875, -0.6237, 0.7291, 0.2071]) tensor([0.0418, 0.1338, 0.5174, 0.3070]) -Greedy action tensor([-1.5225, -0.5242, 0.4475, 0.1114]) tensor([0.0625, 0.1695, 0.4480, 0.3201]) -Greedy action tensor([-1.5192, -0.2379, 0.1923, -0.5728]) tensor([0.0787, 0.2832, 0.4355, 0.2026]) -Greedy action tensor([-1.6855, -0.4157, 0.6463, 0.2995]) tensor([0.0452, 0.1608, 0.4652, 0.3288]) -Greedy action tensor([-1.2349, -0.5972, 0.2947, 0.2155]) tensor([0.0849, 0.1607, 0.3921, 0.3622]) -Greedy action tensor([-1.0595, -0.4209, 0.4992, 0.9876]) tensor([0.0650, 0.1230, 0.3088, 0.5032]) -Greedy action tensor([-1.3115, -0.5242, 0.6758, 1.0036]) tensor([0.0485, 0.1066, 0.3538, 0.4911]) -Greedy action tensor([-1.2311, -0.3587, 0.5763, 0.9717]) tensor([0.0539, 0.1291, 0.3288, 0.4882]) -Greedy action tensor([-1.5805, -0.4642, 0.5581, 0.3524]) tensor([0.0514, 0.1570, 0.4364, 0.3552]) -Greedy action tensor([-1.7903, -0.7014, 0.1819, -0.2989]) tensor([0.0641, 0.1904, 0.4607, 0.2848]) -Greedy action tensor([-1.3584, -0.4985, 0.4075, 0.3135]) tensor([0.0688, 0.1626, 0.4023, 0.3662]) -Greedy action tensor([-0.8283, -0.3696, 1.2893, 1.5174]) tensor([0.0469, 0.0742, 0.3896, 0.4894]) -Greedy action tensor([-1.9371, -0.6169, 1.2632, 0.6166]) tensor([0.0237, 0.0889, 0.5824, 0.3050]) -Greedy action tensor([-1.0308, -0.3957, 1.3363, 1.4023]) tensor([0.0401, 0.0756, 0.4276, 0.4567]) -Greedy action tensor([-1.4530, -0.3496, 0.8124, 0.9128]) tensor([0.0411, 0.1240, 0.3965, 0.4383]) -Greedy action tensor([-1.8563, -0.8977, 0.2002, -0.2903]) tensor([0.0617, 0.1609, 0.4822, 0.2953]) -Greedy action tensor([-1.4207, -0.6122, 0.4249, -0.1082]) tensor([0.0752, 0.1689, 0.4764, 0.2795]) -Greedy action tensor([-1.4723, -0.5474, 0.4165, 0.0997]) tensor([0.0669, 0.1687, 0.4423, 0.3222]) -Greedy action tensor([-1.2394, -0.5830, 0.2868, 0.2916]) tensor([0.0823, 0.1587, 0.3786, 0.3804]) -Greedy action tensor([-1.9065, -0.6776, 1.1899, 0.6090]) tensor([0.0257, 0.0878, 0.5685, 0.3180]) -Greedy action tensor([-1.1176, -0.6030, 0.6857, 1.1308]) tensor([0.0549, 0.0918, 0.3332, 0.5200]) -Greedy action tensor([-1.7735, -0.7523, 0.0494, -0.3512]) tensor([0.0709, 0.1967, 0.4386, 0.2938]) -Greedy action tensor([-1.3915, -0.5855, 0.4965, 0.4223]) tensor([0.0626, 0.1401, 0.4134, 0.3839]) -Greedy action tensor([-0.5354, -0.1189, 0.2684, 0.2376]) tensor([0.1446, 0.2193, 0.3230, 0.3132]) -Greedy action tensor([-1.2224, -0.4589, 0.5190, 0.5996]) tensor([0.0665, 0.1427, 0.3795, 0.4113]) -Greedy action tensor([-0.9288, -0.5668, 0.3581, -0.0245]) tensor([0.1173, 0.1684, 0.4247, 0.2897]) -Greedy action tensor([-1.5237, -0.4791, 0.1815, -0.7150]) tensor([0.0863, 0.2452, 0.4748, 0.1937]) -Greedy action tensor([ 0.4922, -0.2161, -0.2207, -0.2763]) tensor([0.4088, 0.2013, 0.2004, 0.1895]) -Greedy action tensor([ 0.5735, -0.2698, -0.0180, -0.3188]) tensor([0.4178, 0.1798, 0.2312, 0.1712]) -Greedy action tensor([ 0.8752, -0.6467, 0.0289, -0.4642]) tensor([0.5238, 0.1143, 0.2247, 0.1372]) -Greedy action tensor([ 0.6188, -0.1922, -0.0920, -0.1913]) tensor([0.4201, 0.1867, 0.2064, 0.1869]) -Greedy action tensor([ 0.7832, -0.5259, 0.0383, -0.5419]) tensor([0.4974, 0.1343, 0.2361, 0.1322]) -Greedy action tensor([ 0.6417, -0.1944, 0.0180, -0.3563]) tensor([0.4277, 0.1854, 0.2292, 0.1577]) -Greedy action tensor([ 0.3141, 0.0181, -0.0274, -0.1506]) tensor([0.3244, 0.2413, 0.2305, 0.2038]) -Greedy action tensor([ 0.3706, -0.2032, 0.1593, -0.3666]) tensor([0.3507, 0.1976, 0.2839, 0.1678]) -Greedy action tensor([ 0.4830, -0.2141, -0.0270, -0.3368]) tensor([0.3939, 0.1961, 0.2365, 0.1735]) -Greedy action tensor([ 0.4428, -0.0348, 0.0392, -0.2365]) tensor([0.3577, 0.2219, 0.2390, 0.1814]) -Greedy action tensor([ 0.5232, -0.2526, 0.0655, -0.3020]) tensor([0.3951, 0.1819, 0.2500, 0.1731]) -Greedy action tensor([ 0.5173, -0.4114, 0.0610, -0.3978]) tensor([0.4117, 0.1626, 0.2608, 0.1649]) -Greedy action tensor([ 0.5345, -0.3211, -0.0465, -0.4938]) tensor([0.4270, 0.1815, 0.2388, 0.1527]) -Greedy action tensor([ 0.6661, -0.3620, -0.0399, -0.5556]) tensor([0.4660, 0.1667, 0.2300, 0.1373]) -Greedy action tensor([ 0.4329, 0.0565, 0.1285, -0.2974]) tensor([0.3442, 0.2362, 0.2538, 0.1658]) -Greedy action tensor([ 0.3709, -0.0164, -0.2597, -0.4291]) tensor([0.3759, 0.2552, 0.2001, 0.1689]) -Greedy action tensor([ 0.7219, -0.6145, -0.0101, -0.5209]) tensor([0.4921, 0.1293, 0.2366, 0.1420]) -Greedy action tensor([ 0.5005, -0.1262, 0.0630, -0.2955]) tensor([0.3801, 0.2031, 0.2454, 0.1715]) -Greedy action tensor([ 0.3619, -0.1213, -0.0648, -0.3076]) tensor([0.3595, 0.2218, 0.2347, 0.1841]) -Greedy action tensor([ 0.2680, -0.0114, -0.0651, -0.2566]) tensor([0.3263, 0.2468, 0.2339, 0.1931]) -Greedy action tensor([ 0.4445, -0.1911, -0.1177, -0.3850]) tensor([0.3943, 0.2088, 0.2248, 0.1720]) -Greedy action tensor([ 0.1477, 0.2406, -0.0700, -0.1489]) tensor([0.2743, 0.3011, 0.2207, 0.2039]) -Greedy action tensor([ 0.3169, 0.1085, 0.0502, -0.0859]) tensor([0.3081, 0.2501, 0.2359, 0.2059]) -Greedy action tensor([ 0.4630, -0.1945, -0.1523, -0.1785]) tensor([0.3868, 0.2004, 0.2091, 0.2037]) -Greedy action tensor([ 0.7684, -0.5754, -0.2055, -0.4997]) tensor([0.5209, 0.1359, 0.1967, 0.1466]) -Greedy action tensor([ 0.6721, -0.3717, 0.0298, -0.4623]) tensor([0.4546, 0.1601, 0.2391, 0.1462]) -Greedy action tensor([ 0.7718, -0.1295, -0.0479, -0.4057]) tensor([0.4641, 0.1884, 0.2045, 0.1430]) -Greedy action tensor([ 0.7373, -0.6385, -0.1035, -0.7272]) tensor([0.5221, 0.1319, 0.2252, 0.1207]) -Greedy action tensor([ 0.5481, -0.1283, -0.0551, -0.5628]) tensor([0.4193, 0.2132, 0.2294, 0.1381]) -Greedy action tensor([ 0.5184, -0.1500, 0.1372, -0.3940]) tensor([0.3850, 0.1973, 0.2630, 0.1546]) -Greedy action tensor([ 0.6261, -0.4207, 0.0625, -0.3822]) tensor([0.4376, 0.1536, 0.2491, 0.1597]) -Greedy action tensor([ 0.3178, 0.0640, 0.0016, -0.2880]) tensor([0.3278, 0.2543, 0.2390, 0.1789]) -Greedy action tensor([ 0.4593, 0.0120, 0.0182, -0.0738]) tensor([0.3485, 0.2228, 0.2242, 0.2045]) -Greedy action tensor([ 0.2800, -0.0651, -0.0588, -0.2854]) tensor([0.3346, 0.2369, 0.2384, 0.1901]) -Greedy action tensor([ 0.7130, -0.3062, 0.0509, -0.0704]) tensor([0.4285, 0.1547, 0.2210, 0.1958]) -Greedy action tensor([ 0.4018, -0.2458, 0.1777, -0.3957]) tensor([0.3606, 0.1887, 0.2882, 0.1624]) -Greedy action tensor([ 0.9963, -0.7525, -0.0447, -0.6259]) tensor([0.5799, 0.1009, 0.2048, 0.1145]) -Greedy action tensor([ 0.4202, -0.0493, -0.0187, -0.1813]) tensor([0.3548, 0.2219, 0.2288, 0.1945]) -Greedy action tensor([ 0.7524, -0.4058, -0.0134, -0.4025]) tensor([0.4775, 0.1500, 0.2220, 0.1505]) -Greedy action tensor([ 0.9475, -0.2140, -0.0425, -0.3929]) tensor([0.5138, 0.1608, 0.1909, 0.1345]) -Greedy action tensor([ 0.6427, -0.3319, 0.1408, -0.5753]) tensor([0.4389, 0.1656, 0.2657, 0.1298]) -Greedy action tensor([ 0.4133, -0.1235, -0.0488, -0.3013]) tensor([0.3698, 0.2162, 0.2330, 0.1810]) -Greedy action tensor([ 0.6096, -0.4350, -0.1323, -0.4613]) tensor([0.4607, 0.1621, 0.2194, 0.1579]) -Greedy action tensor([ 0.1294, -0.0950, 0.0880, -0.1272]) tensor([0.2831, 0.2262, 0.2716, 0.2190]) -Greedy action tensor([ 0.8554, -0.5790, -0.1840, -0.4760]) tensor([0.5388, 0.1284, 0.1905, 0.1423]) -Greedy action tensor([ 0.8563, -0.5059, -0.0629, -0.4450]) tensor([0.5189, 0.1329, 0.2070, 0.1412]) -Greedy action tensor([ 0.1079, 0.0632, 0.1014, -0.1937]) tensor([0.2711, 0.2592, 0.2693, 0.2005]) -Greedy action tensor([ 0.5026, -0.0614, 0.0667, -0.1349]) tensor([0.3644, 0.2073, 0.2357, 0.1926]) -Greedy action tensor([ 0.4938, -0.1946, -0.0345, -0.3001]) tensor([0.3931, 0.1975, 0.2318, 0.1777]) -Greedy action tensor([ 0.9750, -0.4098, 0.0614, -0.4531]) tensor([0.5288, 0.1324, 0.2121, 0.1268]) -Greedy action tensor([ 0.5890, -0.1349, -0.1220, -0.2044]) tensor([0.4118, 0.1997, 0.2023, 0.1863]) -Greedy action tensor([ 0.5247, -0.3679, -0.1574, -0.4460]) tensor([0.4359, 0.1786, 0.2204, 0.1651]) -Greedy action tensor([ 0.6423, -0.2022, 0.0098, -0.3379]) tensor([0.4280, 0.1840, 0.2274, 0.1606]) -Greedy action tensor([ 0.6577, -0.4497, 0.0634, -0.3083]) tensor([0.4419, 0.1460, 0.2439, 0.1682]) -Greedy action tensor([ 0.4130, -0.2439, 0.1658, -0.2884]) tensor([0.3577, 0.1855, 0.2794, 0.1774]) -Greedy action tensor([ 0.2632, 0.2224, -0.0460, -0.2974]) tensor([0.3063, 0.2940, 0.2248, 0.1748]) -Greedy action tensor([ 0.4182, 0.0766, -0.0101, -0.5817]) tensor([0.3663, 0.2603, 0.2387, 0.1348]) -Greedy action tensor([ 0.5559, -0.2524, -0.2937, -0.2110]) tensor([0.4278, 0.1906, 0.1829, 0.1987]) -Greedy action tensor([ 0.6601, -0.2367, -0.0727, -0.2184]) tensor([0.4341, 0.1770, 0.2086, 0.1803]) -Greedy action tensor([ 0.5196, -0.2514, 0.1851, -0.4159]) tensor([0.3890, 0.1799, 0.2784, 0.1527]) -Greedy action tensor([ 0.5423, 0.0969, -0.0205, -0.0989]) tensor([0.3654, 0.2341, 0.2081, 0.1924]) -Greedy action tensor([ 0.7103, 0.0595, -0.0941, -0.2719]) tensor([0.4267, 0.2226, 0.1909, 0.1598]) -Greedy action tensor([ 0.3227, -0.1752, 0.0679, -0.1919]) tensor([0.3355, 0.2039, 0.2600, 0.2005]) -Greedy action tensor([ 0.2322, -0.2592, 0.0776, -0.3147]) tensor([0.3282, 0.2008, 0.2812, 0.1899]) -Greedy action tensor([ 0.5111, -0.1910, -0.1607, -0.2774]) tensor([0.4064, 0.2014, 0.2076, 0.1847]) -Greedy action tensor([ 0.3938, 0.2074, 0.0812, -0.0354]) tensor([0.3113, 0.2583, 0.2277, 0.2027]) -Greedy action tensor([ 0.6405, -0.1214, 0.1521, -0.2137]) tensor([0.3991, 0.1863, 0.2448, 0.1698]) -Greedy action tensor([ 0.3648, -0.1572, 0.0263, -0.1704]) tensor([0.3458, 0.2052, 0.2465, 0.2025]) -Greedy action tensor([ 0.2208, 0.1992, 0.2220, -0.1516]) tensor([0.2726, 0.2667, 0.2729, 0.1878]) -Greedy action tensor([ 1.1107, -0.8987, -0.0539, -0.6753]) tensor([0.6197, 0.0831, 0.1934, 0.1039]) -Greedy action tensor([ 0.5016, -0.1774, -0.0598, -0.2609]) tensor([0.3931, 0.1993, 0.2242, 0.1834]) -Greedy action tensor([ 0.4935, -0.5020, -0.1091, -0.4664]) tensor([0.4348, 0.1607, 0.2380, 0.1665]) -Greedy action tensor([ 0.8944, -0.1450, -0.0876, -0.4599]) tensor([0.5034, 0.1780, 0.1886, 0.1300]) -Greedy action tensor([ 0.3590, -0.0585, 0.1026, -0.3044]) tensor([0.3393, 0.2235, 0.2625, 0.1747]) -Greedy action tensor([ 1.1213, -0.6825, 0.0209, -0.6118]) tensor([0.5973, 0.0984, 0.1988, 0.1056]) -Greedy action tensor([ 0.3875, -0.0279, 0.0949, -0.3537]) tensor([0.3469, 0.2290, 0.2589, 0.1653]) -Greedy action tensor([ 0.4072, 0.2060, -0.0168, -0.0700]) tensor([0.3233, 0.2644, 0.2116, 0.2006]) -Greedy action tensor([ 0.4045, 0.1803, -0.0060, -0.1186]) tensor([0.3273, 0.2616, 0.2171, 0.1940]) -Greedy action tensor([ 0.7770, -0.5418, -0.2197, -0.4520]) tensor([0.5184, 0.1386, 0.1913, 0.1517]) -Greedy action tensor([ 0.2962, 0.1265, 0.0370, -0.2339]) tensor([0.3121, 0.2634, 0.2408, 0.1837]) -Greedy action tensor([ 0.6982, -0.1368, -0.0167, -0.2408]) tensor([0.4321, 0.1875, 0.2114, 0.1690]) -Greedy action tensor([ 1.1480, -0.7488, 0.7557, 1.4229]) tensor([0.3183, 0.0478, 0.2150, 0.4190]) -Greedy action tensor([-0.8130, 0.0013, -0.2491, -0.0659]) tensor([0.1403, 0.3168, 0.2466, 0.2962]) -Greedy action tensor([1.4225, 0.5713, 0.2709, 1.4483]) tensor([0.3611, 0.1542, 0.1142, 0.3706]) -Greedy action tensor([1.6317, 0.0728, 0.6514, 0.8708]) tensor([0.4871, 0.1025, 0.1828, 0.2276]) -Greedy action tensor([ 0.8747, -0.3070, -0.1116, -0.1086]) tensor([0.4869, 0.1494, 0.1816, 0.1821]) -Greedy action tensor([-0.0847, 0.3050, 1.6697, 0.0144]) tensor([0.1068, 0.1577, 0.6175, 0.1180]) -Greedy action tensor([ 1.0966, 0.3226, -0.4226, 1.7490]) tensor([0.2777, 0.1281, 0.0608, 0.5334]) -Greedy action tensor([ 0.6324, -0.8449, -0.1577, 2.0020]) tensor([0.1781, 0.0406, 0.0808, 0.7005]) -Greedy action tensor([ 0.8598, -0.4386, 0.9034, 0.9798]) tensor([0.2903, 0.0792, 0.3032, 0.3273]) -Greedy action tensor([ 0.1764, 1.4350, 0.8962, -0.2173]) tensor([0.1379, 0.4856, 0.2834, 0.0931]) -Greedy action tensor([2.2533, 0.7945, 0.8345, 1.5929]) tensor([0.5022, 0.1168, 0.1215, 0.2595]) -Greedy action tensor([0.9079, 0.9220, 0.9768, 0.0232]) tensor([0.2858, 0.2899, 0.3062, 0.1180]) -Greedy action tensor([ 0.0607, 0.3970, -0.0020, -0.2007]) tensor([0.2434, 0.3407, 0.2286, 0.1874]) -Greedy action tensor([0.4204, 0.6125, 0.4898, 0.9554]) tensor([0.2004, 0.2428, 0.2148, 0.3421]) -Greedy action tensor([ 1.4261, -0.2263, 0.0269, 1.3811]) tensor([0.4176, 0.0800, 0.1031, 0.3993]) -Greedy action tensor([ 0.5939, 1.1693, -0.4836, 0.3518]) tensor([0.2562, 0.4555, 0.0872, 0.2011]) -Greedy action tensor([1.2460, 0.4516, 0.9035, 0.9406]) tensor([0.3450, 0.1559, 0.2449, 0.2542]) -Greedy action tensor([-0.7437, -0.2579, -0.5029, 0.1063]) tensor([0.1603, 0.2606, 0.2040, 0.3751]) -Greedy action tensor([-0.0209, -0.9877, 0.6565, 1.3276]) tensor([0.1389, 0.0528, 0.2734, 0.5349]) -Greedy action tensor([ 1.5707, 0.8045, -0.8071, 0.6120]) tensor([0.5152, 0.2395, 0.0478, 0.1975]) -Greedy action tensor([ 0.9344, -0.4062, -1.1807, 1.6909]) tensor([0.2846, 0.0745, 0.0343, 0.6065]) -Greedy action tensor([ 1.8566, -0.4483, 0.3853, 2.1566]) tensor([0.3732, 0.0372, 0.0857, 0.5038]) -Greedy action tensor([1.1675, 1.2209, 1.1336, 0.0134]) tensor([0.2997, 0.3161, 0.2897, 0.0945]) -Greedy action tensor([ 1.8198, -0.3146, 0.7894, 1.1880]) tensor([0.4983, 0.0590, 0.1778, 0.2649]) -Greedy action tensor([ 0.8905, 0.1203, -0.4624, 0.9076]) tensor([0.3651, 0.1690, 0.0944, 0.3714]) -Greedy action tensor([ 2.0236, -0.5648, 0.9782, 1.2528]) tensor([0.5293, 0.0398, 0.1861, 0.2449]) -Greedy action tensor([0.7573, 0.2462, 0.7185, 0.9568]) tensor([0.2644, 0.1586, 0.2543, 0.3227]) -Greedy action tensor([-0.0583, 1.7668, 0.3896, -0.4707]) tensor([0.1060, 0.6578, 0.1660, 0.0702]) -Greedy action tensor([ 0.7936, 0.5863, -0.5625, 0.4809]) tensor([0.3569, 0.2901, 0.0920, 0.2611]) -Greedy action tensor([ 0.5306, 0.3327, -0.2497, -0.8025]) tensor([0.3933, 0.3227, 0.1803, 0.1037]) -Greedy action tensor([ 0.3355, 0.5039, -1.1389, 0.8284]) tensor([0.2470, 0.2922, 0.0565, 0.4043]) -Greedy action tensor([ 0.5662, 0.6688, -0.0788, -0.1040]) tensor([0.3180, 0.3524, 0.1669, 0.1627]) -Greedy action tensor([-0.0062, 1.0189, 0.0263, 0.1624]) tensor([0.1666, 0.4642, 0.1721, 0.1971]) -Greedy action tensor([ 1.0852, 1.1219, -0.8043, 0.4239]) tensor([0.3697, 0.3835, 0.0559, 0.1908]) -Greedy action tensor([ 1.6540, 0.3289, -0.3537, 0.6234]) tensor([0.5692, 0.1513, 0.0764, 0.2031]) -Greedy action tensor([ 1.2724, 0.1371, -0.3932, 0.8305]) tensor([0.4644, 0.1492, 0.0878, 0.2985]) -Greedy action tensor([ 1.0775, -0.3106, 0.1238, 0.3439]) tensor([0.4728, 0.1180, 0.1822, 0.2270]) -Greedy action tensor([ 0.8478, 0.0567, -0.5269, 0.8307]) tensor([0.3719, 0.1686, 0.0940, 0.3655]) -Greedy action tensor([ 0.5088, 1.1237, -0.6790, -0.0297]) tensor([0.2675, 0.4948, 0.0816, 0.1561]) -Greedy action tensor([0.0116, 0.2152, 1.0306, 0.4573]) tensor([0.1525, 0.1869, 0.4224, 0.2381]) -Greedy action tensor([1.1303, 0.8633, 0.6117, 0.2095]) tensor([0.3624, 0.2775, 0.2158, 0.1443]) -Greedy action tensor([ 1.7558, -0.6267, 1.0512, 0.4698]) tensor([0.5368, 0.0496, 0.2653, 0.1483]) -Greedy action tensor([ 2.0224, -0.2970, 0.9916, 0.2959]) tensor([0.6124, 0.0602, 0.2185, 0.1089]) -Greedy action tensor([ 0.9921, -0.7728, 0.1146, 1.0415]) tensor([0.3791, 0.0649, 0.1577, 0.3983]) -Greedy action tensor([0.5145, 0.3615, 0.1817, 1.5661]) tensor([0.1839, 0.1578, 0.1319, 0.5264]) -Greedy action tensor([0.6418, 0.3085, 0.4634, 1.3725]) tensor([0.2160, 0.1548, 0.1807, 0.4485]) -Greedy action tensor([0.8465, 0.0276, 0.2715, 1.1152]) tensor([0.3019, 0.1331, 0.1699, 0.3950]) -Greedy action tensor([ 1.1155, 0.0267, 0.8594, -0.0367]) tensor([0.4121, 0.1387, 0.3190, 0.1302]) -Greedy action tensor([ 0.3406, 0.7465, -0.3296, 1.4549]) tensor([0.1650, 0.2476, 0.0844, 0.5029]) -Greedy action tensor([ 1.0413, -0.0999, -0.6396, 0.9411]) tensor([0.4149, 0.1325, 0.0773, 0.3753]) -Greedy action tensor([-0.3535, 0.4766, 0.1486, 0.0731]) tensor([0.1544, 0.3541, 0.2551, 0.2365]) -Greedy action tensor([ 1.3617, 0.5081, -0.3057, 0.6770]) tensor([0.4719, 0.2010, 0.0891, 0.2380]) -Greedy action tensor([1.6857, 0.3053, 1.1637, 0.4271]) tensor([0.4697, 0.1181, 0.2787, 0.1334]) -Greedy action tensor([ 0.7057, 0.5952, -0.4918, 0.7521]) tensor([0.3082, 0.2759, 0.0931, 0.3228]) -Greedy action tensor([ 1.2313, -0.4981, 0.0895, -0.0488]) tensor([0.5635, 0.1000, 0.1799, 0.1567]) -Greedy action tensor([ 0.3817, 0.0187, -0.8200, 1.2023]) tensor([0.2343, 0.1630, 0.0704, 0.5323]) -Greedy action tensor([ 0.9749, 0.4747, -0.6255, 0.2632]) tensor([0.4350, 0.2638, 0.0878, 0.2135]) -Greedy action tensor([ 0.6793, -0.1562, 0.5851, 1.7470]) tensor([0.1904, 0.0826, 0.1733, 0.5538]) -Greedy action tensor([ 1.1009, -0.0227, -0.9841, 0.6900]) tensor([0.4734, 0.1539, 0.0588, 0.3139]) -Greedy action tensor([ 0.8730, -0.1296, -0.1762, 0.3174]) tensor([0.4365, 0.1602, 0.1529, 0.2504]) -Greedy action tensor([ 1.1791, -0.0454, -0.2987, -0.1389]) tensor([0.5587, 0.1642, 0.1275, 0.1496]) -Greedy action tensor([ 0.3243, -0.9868, -0.1592, 0.3575]) tensor([0.3425, 0.0923, 0.2112, 0.3540]) -Greedy action tensor([ 1.4393, -0.4761, 1.3813, 0.6918]) tensor([0.3899, 0.0574, 0.3680, 0.1847]) -Greedy action tensor([ 0.1407, 0.0102, -0.3068, 1.5089]) tensor([0.1552, 0.1362, 0.0992, 0.6095]) -Greedy action tensor([ 0.6656, 0.9626, -0.0999, -0.3176]) tensor([0.3140, 0.4225, 0.1460, 0.1175]) -Greedy action tensor([ 0.9641, 1.1346, -0.1869, 0.8096]) tensor([0.2977, 0.3530, 0.0942, 0.2551]) -Greedy action tensor([ 0.7784, 0.6891, -0.4882, 1.0751]) tensor([0.2823, 0.2582, 0.0796, 0.3799]) -Greedy action tensor([ 1.1040, -0.4744, 0.9918, 0.3880]) tensor([0.3863, 0.0797, 0.3453, 0.1888]) -Greedy action tensor([ 0.1856, -1.4955, -0.3099, 2.7139]) tensor([0.0698, 0.0130, 0.0425, 0.8747]) -Greedy action tensor([ 0.8560, -0.4795, 0.5133, 0.7845]) tensor([0.3444, 0.0906, 0.2444, 0.3206]) -Greedy action tensor([ 0.9192, -1.6715, -0.2180, 1.6989]) tensor([0.2796, 0.0210, 0.0897, 0.6098]) -Greedy action tensor([ 1.8999, -0.2870, 1.5194, 1.1951]) tensor([0.4367, 0.0490, 0.2985, 0.2158]) -Greedy action tensor([ 1.7070, -0.5638, 0.6124, 0.4416]) tensor([0.5814, 0.0600, 0.1946, 0.1640]) -Greedy action tensor([ 0.7212, -0.1825, -0.6714, 0.2234]) tensor([0.4422, 0.1791, 0.1099, 0.2688]) -Greedy action tensor([ 0.9405, -0.6977, 0.9905, 0.5406]) tensor([0.3429, 0.0666, 0.3605, 0.2299]) -Greedy action tensor([ 1.0386, -0.0262, 0.5270, 0.6780]) tensor([0.3786, 0.1305, 0.2270, 0.2640]) -Greedy action tensor([ 0.1451, 0.2157, 1.1112, -0.8427]) tensor([0.1971, 0.2115, 0.5179, 0.0734]) -Greedy action tensor([ 0.9621, -0.2056, -0.1660, 0.8304]) tensor([0.3982, 0.1239, 0.1289, 0.3491]) -Greedy action tensor([ 0.8010, -1.0359, 0.5165, 0.4781]) tensor([0.3794, 0.0604, 0.2855, 0.2747]) -Greedy action tensor([-0.0669, 0.6789, 0.0510, 1.9160]) tensor([0.0870, 0.1834, 0.0979, 0.6318]) -Greedy action tensor([ 1.0052, -0.0279, -0.4145, 0.8232]) tensor([0.4113, 0.1464, 0.0994, 0.3429]) -Greedy action tensor([ 0.5087, -1.1793, -0.4441, 0.3965]) tensor([0.4058, 0.0750, 0.1565, 0.3627]) -Greedy action tensor([ 0.5334, -0.4860, -0.2570, -0.0412]) tensor([0.4206, 0.1518, 0.1908, 0.2368]) -Greedy action tensor([ 0.6503, -0.4484, 0.1384, 0.0198]) tensor([0.4057, 0.1352, 0.2431, 0.2160]) -Greedy action tensor([ 1.0750, -0.4664, -0.2548, 0.3747]) tensor([0.5063, 0.1084, 0.1339, 0.2514]) -Greedy action tensor([ 0.7356, -0.3988, -0.0538, 0.0025]) tensor([0.4432, 0.1425, 0.2013, 0.2129]) -Greedy action tensor([ 0.6734, -0.3947, -0.5715, 0.1470]) tensor([0.4500, 0.1546, 0.1296, 0.2658]) -Greedy action tensor([ 0.5457, -0.3097, -0.2831, 0.2071]) tensor([0.3884, 0.1651, 0.1696, 0.2769]) -Greedy action tensor([ 1.1209, -0.4262, -0.6810, 0.4403]) tensor([0.5307, 0.1130, 0.0876, 0.2687]) -Greedy action tensor([ 1.0149, -0.2702, -0.0530, -0.2592]) tensor([0.5263, 0.1456, 0.1809, 0.1472]) -Greedy action tensor([ 0.9899, -0.2053, -0.1924, 0.0798]) tensor([0.4971, 0.1504, 0.1524, 0.2001]) -Greedy action tensor([ 1.2116, -0.5246, -0.0136, -0.0812]) tensor([0.5733, 0.1010, 0.1684, 0.1574]) -Greedy action tensor([ 1.1088, -0.7220, -0.4695, 0.8199]) tensor([0.4727, 0.0758, 0.0975, 0.3540]) -Greedy action tensor([ 0.9500, -0.2883, -0.3086, -0.1288]) tensor([0.5225, 0.1514, 0.1484, 0.1777]) -Greedy action tensor([ 0.9180, -0.5869, -0.0912, 0.1131]) tensor([0.4917, 0.1092, 0.1792, 0.2199]) -Greedy action tensor([ 0.7913, -0.2874, -0.1686, 0.2201]) tensor([0.4371, 0.1486, 0.1674, 0.2469]) -Greedy action tensor([ 0.3278, -0.3297, -0.2684, 0.0717]) tensor([0.3517, 0.1822, 0.1938, 0.2723]) -Greedy action tensor([ 1.1438, -0.6510, 0.0048, 0.1769]) tensor([0.5357, 0.0890, 0.1715, 0.2037]) -Greedy action tensor([ 0.6218, -0.5490, -0.0640, 0.2093]) tensor([0.4039, 0.1253, 0.2034, 0.2674]) -Greedy action tensor([ 0.7307, -0.1558, -0.2744, -0.2929]) tensor([0.4679, 0.1928, 0.1712, 0.1681]) -Greedy action tensor([ 0.8512, -0.3291, -0.5010, 0.0817]) tensor([0.4928, 0.1514, 0.1275, 0.2283]) -Greedy action tensor([ 1.0891, -0.4385, -0.2755, -0.0737]) tensor([0.5602, 0.1216, 0.1431, 0.1751]) -Greedy action tensor([ 1.2755, -0.1287, -0.0455, -0.1631]) tensor([0.5715, 0.1404, 0.1525, 0.1356]) -Greedy action tensor([ 1.2099, -0.3879, -0.3263, 0.4067]) tensor([0.5361, 0.1085, 0.1154, 0.2401]) -Greedy action tensor([ 0.6171, -0.1186, -0.3367, -0.3144]) tensor([0.4428, 0.2122, 0.1706, 0.1745]) -Greedy action tensor([ 1.4724, -0.5346, -0.3693, 0.2743]) tensor([0.6271, 0.0843, 0.0994, 0.1892]) -Greedy action tensor([ 1.5080, -0.7688, -0.6280, 0.3288]) tensor([0.6543, 0.0671, 0.0773, 0.2012]) -Greedy action tensor([ 0.8843, -0.5495, -0.2650, 0.5657]) tensor([0.4381, 0.1044, 0.1388, 0.3186]) -Greedy action tensor([ 1.5780, -0.6790, -0.5277, 0.4660]) tensor([0.6429, 0.0673, 0.0783, 0.2115]) -Greedy action tensor([ 1.0662, -0.3324, -0.7884, 0.3284]) tensor([0.5315, 0.1312, 0.0832, 0.2541]) -Greedy action tensor([ 0.5700, -0.2954, 0.0821, -0.1221]) tensor([0.3944, 0.1660, 0.2421, 0.1974]) -Greedy action tensor([ 0.9193, -0.3842, -0.2360, 0.5075]) tensor([0.4446, 0.1208, 0.1400, 0.2946]) -Greedy action tensor([ 1.1913, -0.3499, -0.1860, 0.0910]) tensor([0.5558, 0.1190, 0.1402, 0.1850]) -Greedy action tensor([ 1.1655, -0.6287, -0.4628, 0.0024]) tensor([0.5970, 0.0993, 0.1172, 0.1866]) -Greedy action tensor([ 1.2295, -0.5657, -0.5893, 0.0939]) tensor([0.6062, 0.1007, 0.0983, 0.1947]) -Greedy action tensor([ 0.8965, -0.4412, -0.2866, 0.4049]) tensor([0.4586, 0.1204, 0.1405, 0.2805]) -Greedy action tensor([ 0.9889, -0.3096, -0.5063, -0.1890]) tensor([0.5540, 0.1512, 0.1242, 0.1706]) -Greedy action tensor([ 0.7989, -0.4067, -0.1185, 0.0429]) tensor([0.4611, 0.1381, 0.1842, 0.2165]) -Greedy action tensor([ 1.1304, -0.6193, -0.1381, 0.0620]) tensor([0.5560, 0.0966, 0.1564, 0.1910]) -Greedy action tensor([ 0.8872, -0.2578, 0.0214, -0.1930]) tensor([0.4811, 0.1531, 0.2024, 0.1634]) -Greedy action tensor([ 1.1518, -0.3516, -0.4621, -0.1620]) tensor([0.5916, 0.1316, 0.1178, 0.1590]) -Greedy action tensor([ 0.9489, -0.7331, -0.3670, 0.4117]) tensor([0.4905, 0.0912, 0.1316, 0.2867]) -Greedy action tensor([ 1.1130, -0.1755, -0.6914, -0.0735]) tensor([0.5729, 0.1579, 0.0943, 0.1749]) -Greedy action tensor([ 0.5309, -0.0168, 0.2927, -0.3497]) tensor([0.3596, 0.2080, 0.2834, 0.1491]) -Greedy action tensor([ 1.0278, -0.3671, -0.0874, -0.2642]) tensor([0.5404, 0.1339, 0.1772, 0.1485]) -Greedy action tensor([ 0.8489, -0.5590, -0.5283, -0.1888]) tensor([0.5402, 0.1322, 0.1363, 0.1914]) -Greedy action tensor([ 0.7732, -0.2171, -0.4618, -0.2117]) tensor([0.4912, 0.1825, 0.1429, 0.1835]) -Greedy action tensor([ 0.8909, -0.5111, -0.6077, 0.5377]) tensor([0.4604, 0.1133, 0.1029, 0.3234]) -Greedy action tensor([ 0.8599, -0.5922, -0.4748, 0.8173]) tensor([0.4072, 0.0953, 0.1072, 0.3902]) -Greedy action tensor([ 0.5755, -0.2980, 0.0741, -0.0474]) tensor([0.3907, 0.1631, 0.2366, 0.2096]) -Greedy action tensor([ 0.4028, -0.3602, -0.5437, 0.4283]) tensor([0.3472, 0.1619, 0.1347, 0.3562]) -Greedy action tensor([ 1.2534, -0.3148, -0.4519, 0.0393]) tensor([0.5927, 0.1235, 0.1077, 0.1760]) -Greedy action tensor([ 0.7370, -0.2809, -0.7242, 0.3479]) tensor([0.4404, 0.1591, 0.1021, 0.2984]) -Greedy action tensor([ 1.1133, -0.5460, -0.4203, 0.6996]) tensor([0.4837, 0.0920, 0.1044, 0.3198]) -Greedy action tensor([ 0.8722, -0.5704, -0.3112, 0.5258]) tensor([0.4445, 0.1050, 0.1361, 0.3144]) -Greedy action tensor([ 1.0952, -0.7220, -0.1747, 0.6081]) tensor([0.4860, 0.0790, 0.1365, 0.2986]) -Greedy action tensor([ 0.7390, -0.4656, -0.0719, -0.1893]) tensor([0.4674, 0.1401, 0.2077, 0.1847]) -Greedy action tensor([ 1.7678, -0.8985, -0.6525, -0.2974]) tensor([0.7781, 0.0541, 0.0692, 0.0987]) -Greedy action tensor([ 1.2471, -0.7072, -0.3846, 0.5080]) tensor([0.5510, 0.0781, 0.1078, 0.2631]) -Greedy action tensor([0.9143, 0.0093, 0.0086, 0.1408]) tensor([0.4405, 0.1782, 0.1781, 0.2032]) -Greedy action tensor([ 0.8062, -0.3711, -0.0950, -0.1736]) tensor([0.4786, 0.1474, 0.1943, 0.1797]) -Greedy action tensor([ 0.4036, -0.1756, -0.1006, -0.1390]) tensor([0.3642, 0.2041, 0.2200, 0.2117]) -Greedy action tensor([ 0.4696, -0.2761, -0.2419, -0.0143]) tensor([0.3873, 0.1838, 0.1901, 0.2388]) -Greedy action tensor([ 0.9459, -0.2761, -0.1759, -0.1452]) tensor([0.5112, 0.1506, 0.1665, 0.1717]) -Greedy action tensor([ 0.4224, -0.4726, -0.4607, 0.3068]) tensor([0.3686, 0.1506, 0.1524, 0.3284]) -Greedy action tensor([ 0.7610, -0.1781, -0.1839, 0.0176]) tensor([0.4434, 0.1734, 0.1724, 0.2108]) -Greedy action tensor([ 1.2216, -0.3019, -0.2561, 0.3008]) tensor([0.5422, 0.1182, 0.1237, 0.2159]) -Greedy action tensor([ 0.7042, -0.6305, -0.1318, 0.0799]) tensor([0.4480, 0.1179, 0.1942, 0.2399]) -Greedy action tensor([ 0.2283, -0.1500, -0.3771, 0.1507]) tensor([0.3168, 0.2170, 0.1730, 0.2932]) -Greedy action tensor([ 1.1986, -0.4728, -0.3485, -0.0905]) tensor([0.5965, 0.1121, 0.1270, 0.1644]) -Greedy action tensor([ 1.3259, -0.5998, -0.3012, -0.1993]) tensor([0.6411, 0.0935, 0.1260, 0.1395]) -Greedy action tensor([ 1.2659, -0.5328, -0.4260, 0.0818]) tensor([0.6040, 0.1000, 0.1112, 0.1848]) -Greedy action tensor([ 1.2821, -0.4970, -0.1536, 0.1702]) tensor([0.5762, 0.0972, 0.1371, 0.1895]) -Greedy action tensor([ 0.9667, -0.7511, -0.3358, 0.3265]) tensor([0.5054, 0.0907, 0.1374, 0.2665]) -Greedy action tensor([ 1.0613, -0.5855, -0.4963, 0.4520]) tensor([0.5136, 0.0990, 0.1082, 0.2793]) -Greedy action tensor([ 0.8885, -0.3616, -0.3704, -0.1325]) tensor([0.5180, 0.1484, 0.1471, 0.1866]) -Greedy action tensor([ 1.1654, -0.5433, 0.0375, 0.0257]) tensor([0.5480, 0.0992, 0.1774, 0.1753]) -Greedy action tensor([ 1.1973, -0.6812, -0.1180, 0.0617]) tensor([0.5739, 0.0877, 0.1540, 0.1843]) -Greedy action tensor([ 0.8269, -0.5070, -0.3848, 0.1072]) tensor([0.4883, 0.1286, 0.1454, 0.2377]) -Greedy action tensor([ 1.4417, -0.5921, -0.0468, 0.2860]) tensor([0.5983, 0.0783, 0.1350, 0.1884]) -Greedy action tensor([ 0.7342, -0.0762, -0.0308, 0.0552]) tensor([0.4137, 0.1840, 0.1925, 0.2098]) -Greedy action tensor([ 0.8726, -0.1111, -0.2741, 0.0449]) tensor([0.4698, 0.1757, 0.1492, 0.2053]) -Greedy action tensor([ 1.1873, -0.3798, -0.3601, -0.0937]) tensor([0.5885, 0.1228, 0.1252, 0.1635]) -Greedy action tensor([-1.7063, -0.3449, 0.8873, 0.7100]) tensor([0.0339, 0.1323, 0.4537, 0.3800]) -Greedy action tensor([-1.4956, -0.5247, 0.4338, 0.1597]) tensor([0.0634, 0.1675, 0.4369, 0.3321]) -Greedy action tensor([-1.1060, -0.5581, 0.2575, 0.5335]) tensor([0.0848, 0.1467, 0.3316, 0.4370]) -Greedy action tensor([-1.4839, -0.3681, 0.4395, -0.0332]) tensor([0.0660, 0.2013, 0.4514, 0.2814]) -Greedy action tensor([-1.6055, -0.3551, 0.6437, 0.4340]) tensor([0.0462, 0.1612, 0.4377, 0.3549]) -Greedy action tensor([-1.7389, -0.5853, 0.8460, 0.4436]) tensor([0.0380, 0.1205, 0.5043, 0.3372]) -Greedy action tensor([-1.8461, -0.8422, 0.2219, -0.2910]) tensor([0.0611, 0.1667, 0.4830, 0.2892]) -Greedy action tensor([-1.8672, -0.8954, 0.2941, -0.1784]) tensor([0.0564, 0.1490, 0.4895, 0.3052]) -Greedy action tensor([-1.2703, -0.3822, 0.6867, 0.9708]) tensor([0.0502, 0.1221, 0.3554, 0.4723]) -Greedy action tensor([-1.9536, -0.7244, 0.8862, 0.4394]) tensor([0.0308, 0.1053, 0.5269, 0.3371]) -Greedy action tensor([-1.8954, -0.4530, 0.6484, -0.1417]) tensor([0.0421, 0.1783, 0.5363, 0.2433]) -Greedy action tensor([-1.6958, -0.5163, 1.0925, 0.8774]) tensor([0.0297, 0.0968, 0.4835, 0.3899]) -Greedy action tensor([-1.7117, -0.5057, 0.5391, -0.0305]) tensor([0.0521, 0.1739, 0.4944, 0.2797]) -Greedy action tensor([-1.3028, -0.5718, 0.3306, 0.2592]) tensor([0.0771, 0.1602, 0.3949, 0.3677]) -Greedy action tensor([-1.3087, -0.5861, 0.3346, 0.2692]) tensor([0.0765, 0.1575, 0.3955, 0.3705]) -Greedy action tensor([-1.5809, -0.2558, 0.4290, 0.1179]) tensor([0.0565, 0.2127, 0.4218, 0.3090]) -Greedy action tensor([-1.5654, -0.5182, 0.5466, 0.2229]) tensor([0.0553, 0.1575, 0.4568, 0.3305]) -Greedy action tensor([-2.0184, -0.5920, 1.0178, 0.4009]) tensor([0.0269, 0.1118, 0.5594, 0.3019]) -Greedy action tensor([-1.7742, -0.4302, 0.5887, -0.0732]) tensor([0.0478, 0.1831, 0.5074, 0.2617]) -Greedy action tensor([-0.2166, 1.1842, 0.1038, 0.4675]) tensor([0.1188, 0.4821, 0.1637, 0.2354]) -Greedy action tensor([-1.9654, -0.6467, 0.5303, -0.1675]) tensor([0.0437, 0.1632, 0.5296, 0.2636]) -Greedy action tensor([-1.7626, -0.7128, 0.1771, -0.4256]) tensor([0.0684, 0.1954, 0.4758, 0.2604]) -Greedy action tensor([-1.8090, -0.5232, 0.3115, -0.1850]) tensor([0.0555, 0.2007, 0.4624, 0.2814]) -Greedy action tensor([-1.4876, -0.4660, 1.0282, 1.0141]) tensor([0.0353, 0.0980, 0.4364, 0.4303]) -Greedy action tensor([-0.8753, -0.3499, 0.1970, -0.2450]) tensor([0.1335, 0.2257, 0.3901, 0.2507]) -Greedy action tensor([-0.8494, 0.0219, 0.2672, -0.1203]) tensor([0.1174, 0.2806, 0.3586, 0.2434]) -Greedy action tensor([-1.6232, -0.3104, 0.6567, 0.3526]) tensor([0.0461, 0.1712, 0.4504, 0.3323]) -Greedy action tensor([-0.7134, 0.2848, 0.2251, -0.0163]) tensor([0.1208, 0.3278, 0.3088, 0.2426]) -Greedy action tensor([-1.5401, -0.5598, 0.4795, 0.1485]) tensor([0.0602, 0.1604, 0.4536, 0.3258]) -Greedy action tensor([-1.5624, -0.7465, 0.9708, 0.7295]) tensor([0.0388, 0.0878, 0.4891, 0.3842]) -Greedy action tensor([-1.3569, -0.5577, 0.3774, 0.0833]) tensor([0.0763, 0.1696, 0.4321, 0.3220]) -Greedy action tensor([-1.4921, -0.4740, 0.4910, -0.0551]) tensor([0.0656, 0.1816, 0.4767, 0.2761]) -Greedy action tensor([-1.8292, -0.4087, 0.3658, -0.1111]) tensor([0.0508, 0.2102, 0.4560, 0.2830]) -Greedy action tensor([-1.6686, -0.4266, 0.5205, -0.0580]) tensor([0.0544, 0.1882, 0.4853, 0.2721]) -Greedy action tensor([-1.3554, -0.5658, 0.3814, 0.1611]) tensor([0.0744, 0.1639, 0.4226, 0.3390]) -Greedy action tensor([-1.1535, -0.5392, 0.2878, 0.2744]) tensor([0.0889, 0.1644, 0.3758, 0.3709]) -Greedy action tensor([-1.8549, -0.4680, 0.2061, -0.3387]) tensor([0.0574, 0.2299, 0.4511, 0.2616]) -Greedy action tensor([-0.9734, -0.0494, 0.4846, -0.4547]) tensor([0.1053, 0.2653, 0.4525, 0.1769]) -Greedy action tensor([-1.6652, -0.5034, 0.5168, 0.0050]) tensor([0.0544, 0.1739, 0.4825, 0.2892]) -Greedy action tensor([-1.6843, -0.5948, 0.2320, -0.2120]) tensor([0.0661, 0.1965, 0.4492, 0.2882]) -Greedy action tensor([-1.2876, -0.5523, 0.3453, 0.2042]) tensor([0.0791, 0.1649, 0.4046, 0.3514]) -Greedy action tensor([-1.8866, -0.6196, 0.9116, 0.2436]) tensor([0.0340, 0.1208, 0.5587, 0.2865]) -Greedy action tensor([-0.3130, 0.4033, 0.1165, 0.2408]) tensor([0.1582, 0.3237, 0.2430, 0.2752]) -Greedy action tensor([-5.1874e-01, -2.4025e-01, 1.4341e-01, 4.8876e-06]) tensor([0.1683, 0.2224, 0.3264, 0.2828]) -Greedy action tensor([-1.9102, -0.4206, 0.6660, -0.1271]) tensor([0.0408, 0.1808, 0.5360, 0.2425]) -Greedy action tensor([-1.0775, -0.5752, 0.2552, 0.3503]) tensor([0.0942, 0.1557, 0.3572, 0.3929]) -Greedy action tensor([-1.7619, -0.5549, 0.6860, -0.0650]) tensor([0.0468, 0.1565, 0.5413, 0.2554]) -Greedy action tensor([-1.9263, -0.4125, 0.6565, -0.1555]) tensor([0.0406, 0.1843, 0.5368, 0.2383]) -Greedy action tensor([-1.8653, -0.4707, 0.6346, -0.1289]) tensor([0.0437, 0.1762, 0.5321, 0.2480]) -Greedy action tensor([-1.8520, -0.4842, 0.6348, -0.0987]) tensor([0.0440, 0.1728, 0.5291, 0.2541]) -Greedy action tensor([-1.9561, -0.8097, 0.2707, -0.1972]) tensor([0.0520, 0.1637, 0.4822, 0.3020]) -Greedy action tensor([-1.9481, -0.4013, 0.6253, 0.1128]) tensor([0.0375, 0.1762, 0.4918, 0.2946]) -Greedy action tensor([-1.8803, -0.8126, 0.1180, -0.3337]) tensor([0.0626, 0.1820, 0.4616, 0.2938]) -Greedy action tensor([-2.0067, 0.8250, 0.6004, 0.4239]) tensor([0.0233, 0.3957, 0.3161, 0.2649]) -Greedy action tensor([-1.9367, -0.4850, 0.8808, 0.2261]) tensor([0.0326, 0.1391, 0.5451, 0.2832]) -Greedy action tensor([-1.7242, -0.4765, 0.6207, 0.1585]) tensor([0.0465, 0.1621, 0.4855, 0.3058]) -Greedy action tensor([-1.5609, -0.2776, 0.5064, 0.2679]) tensor([0.0534, 0.1926, 0.4218, 0.3323]) -Greedy action tensor([-1.8567, -0.4849, 1.2745, 0.7877]) tensor([0.0239, 0.0940, 0.5463, 0.3358]) -Greedy action tensor([-1.2569, -0.5196, 0.8540, 0.9821]) tensor([0.0482, 0.1008, 0.3983, 0.4527]) -Greedy action tensor([-1.3618, -0.5387, 0.4325, 0.3650]) tensor([0.0670, 0.1527, 0.4033, 0.3770]) -Greedy action tensor([-1.6589, -0.4152, 0.7055, 0.5056]) tensor([0.0420, 0.1456, 0.4467, 0.3657]) -Greedy action tensor([-1.4776, 0.3114, 0.4271, 0.1666]) tensor([0.0530, 0.3169, 0.3558, 0.2742]) -Greedy action tensor([-0.9017, -0.5982, 0.1778, 0.3487]) tensor([0.1138, 0.1541, 0.3349, 0.3973]) -Greedy action tensor([-1.9065, -0.6614, 0.8249, 0.0560]) tensor([0.0371, 0.1289, 0.5699, 0.2641]) -Greedy action tensor([-1.1772, 0.1923, 0.3675, -0.4617]) tensor([0.0857, 0.3372, 0.4018, 0.1753]) -Greedy action tensor([-1.5428, -0.5116, 0.4839, 0.1443]) tensor([0.0595, 0.1670, 0.4518, 0.3217]) -Greedy action tensor([-1.9146, -0.4319, 1.0471, 0.5862]) tensor([0.0271, 0.1193, 0.5235, 0.3302]) -Greedy action tensor([-1.6114, -0.4002, 0.5065, 0.0746]) tensor([0.0553, 0.1858, 0.4601, 0.2987]) -Greedy action tensor([-1.9663, -0.7585, 0.6807, 0.0168]) tensor([0.0389, 0.1301, 0.5486, 0.2824]) -Greedy action tensor([-0.9411, -0.5824, 0.2518, 0.4492]) tensor([0.1026, 0.1469, 0.3383, 0.4121]) -Greedy action tensor([-1.6107, -0.5269, 0.4658, -0.1073]) tensor([0.0609, 0.1799, 0.4855, 0.2737]) -Greedy action tensor([-1.2236, -0.6245, 0.4107, 0.1645]) tensor([0.0837, 0.1523, 0.4288, 0.3352]) -Greedy action tensor([-1.9818, -0.4890, 1.1045, 0.4470]) tensor([0.0258, 0.1150, 0.5659, 0.2932]) -Greedy action tensor([-1.7970, -0.9094, -0.0647, -0.6162]) tensor([0.0810, 0.1969, 0.4582, 0.2639]) -Greedy action tensor([-1.4351, -0.4919, 0.4288, 0.1495]) tensor([0.0671, 0.1724, 0.4330, 0.3275]) -Greedy action tensor([-1.6156, -0.5007, 0.7894, 0.5014]) tensor([0.0427, 0.1301, 0.4727, 0.3545]) -Greedy action tensor([-1.3073, -0.5944, 0.7229, 1.0002]) tensor([0.0483, 0.0985, 0.3678, 0.4853]) -Greedy action tensor([-1.1997, -0.5364, 0.3509, 0.1608]) tensor([0.0866, 0.1680, 0.4080, 0.3374]) -Greedy action tensor([-1.8122, -0.6127, 0.1542, -0.3540]) tensor([0.0634, 0.2105, 0.4533, 0.2727]) -Greedy action tensor([-1.1117, -0.6160, 0.3438, 0.2445]) tensor([0.0925, 0.1519, 0.3966, 0.3591]) -Greedy action tensor([-0.8668, -0.5681, 0.1922, 0.3481]) tensor([0.1163, 0.1567, 0.3352, 0.3918]) -Greedy action tensor([ 0.6748, -0.4407, 0.1640, -0.4919]) tensor([0.4466, 0.1464, 0.2680, 0.1391]) -Greedy action tensor([ 0.5639, -0.2393, -0.0882, -0.3891]) tensor([0.4247, 0.1902, 0.2213, 0.1638]) -Greedy action tensor([ 0.3519, -0.0038, 0.0642, -0.0929]) tensor([0.3235, 0.2266, 0.2426, 0.2073]) -Greedy action tensor([ 0.9437, -0.5338, -0.0894, -0.4366]) tensor([0.5448, 0.1243, 0.1939, 0.1370]) -Greedy action tensor([ 0.9821, -0.8099, -0.0743, -0.4142]) tensor([0.5676, 0.0946, 0.1974, 0.1405]) -Greedy action tensor([ 0.8037, -0.7387, -0.0276, -0.5633]) tensor([0.5252, 0.1123, 0.2287, 0.1338]) -Greedy action tensor([ 0.4761, -0.2817, -0.0168, -0.3286]) tensor([0.3957, 0.1855, 0.2418, 0.1770]) -Greedy action tensor([ 0.8287, -0.3183, -0.0692, -0.4670]) tensor([0.5003, 0.1589, 0.2038, 0.1369]) -Greedy action tensor([ 0.3034, 0.0801, 0.1096, -0.1018]) tensor([0.3039, 0.2431, 0.2504, 0.2027]) -Greedy action tensor([ 0.5317, -0.2603, -0.1635, -0.4239]) tensor([0.4280, 0.1938, 0.2136, 0.1646]) -Greedy action tensor([ 0.6160, -0.4381, -0.0663, -0.4228]) tensor([0.4529, 0.1579, 0.2289, 0.1603]) -Greedy action tensor([ 0.4193, 0.0141, 0.0345, -0.3426]) tensor([0.3553, 0.2370, 0.2418, 0.1659]) -Greedy action tensor([ 0.5820, 0.0664, 0.0438, -0.1296]) tensor([0.3743, 0.2235, 0.2185, 0.1837]) -Greedy action tensor([ 0.6522, 0.0641, 0.1714, -0.4672]) tensor([0.4000, 0.2221, 0.2473, 0.1306]) -Greedy action tensor([ 0.5690, -0.1933, 0.2131, -0.3882]) tensor([0.3920, 0.1829, 0.2746, 0.1505]) -Greedy action tensor([ 0.6209, -0.0951, -0.1000, -0.2810]) tensor([0.4200, 0.2053, 0.2043, 0.1704]) -Greedy action tensor([ 0.9545, -0.6922, 0.1187, -0.9071]) tensor([0.5613, 0.1081, 0.2433, 0.0872]) -Greedy action tensor([ 0.4779, -0.1275, -0.0336, -0.2355]) tensor([0.3794, 0.2071, 0.2275, 0.1859]) -Greedy action tensor([ 0.8070, -0.4587, -0.1777, -0.5388]) tensor([0.5219, 0.1472, 0.1950, 0.1359]) -Greedy action tensor([ 0.4315, -0.2234, 0.0417, -0.3829]) tensor([0.3788, 0.1968, 0.2566, 0.1678]) -Greedy action tensor([ 1.0896, -0.7094, -0.0134, -0.5205]) tensor([0.5892, 0.0975, 0.1955, 0.1178]) -Greedy action tensor([ 1.1102, -0.3451, -0.0880, -0.4503]) tensor([0.5730, 0.1337, 0.1729, 0.1204]) -Greedy action tensor([ 0.2777, -0.0460, -0.0665, -0.1624]) tensor([0.3251, 0.2352, 0.2304, 0.2093]) -Greedy action tensor([ 0.3890, 0.1177, 0.0638, -0.2041]) tensor([0.3292, 0.2510, 0.2378, 0.1819]) -Greedy action tensor([ 0.4390, -0.0278, -0.0409, -0.1306]) tensor([0.3557, 0.2230, 0.2201, 0.2012]) -Greedy action tensor([ 0.8348, -0.7093, -0.2470, -0.8117]) tensor([0.5730, 0.1223, 0.1942, 0.1104]) -Greedy action tensor([ 0.8181, -0.1705, -0.0508, -0.3806]) tensor([0.4778, 0.1778, 0.2004, 0.1441]) -Greedy action tensor([ 0.6283, 0.2339, -0.2153, -0.1817]) tensor([0.3923, 0.2644, 0.1687, 0.1745]) -Greedy action tensor([ 0.7528, -0.1631, 0.0324, -0.2521]) tensor([0.4439, 0.1776, 0.2160, 0.1625]) -Greedy action tensor([ 0.5236, -0.5501, 0.1927, -0.4356]) tensor([0.4093, 0.1399, 0.2940, 0.1568]) -Greedy action tensor([ 0.4837, 0.0924, -0.0306, -0.2816]) tensor([0.3650, 0.2468, 0.2183, 0.1698]) -Greedy action tensor([ 0.6313, -0.4639, 0.1866, -0.4001]) tensor([0.4288, 0.1434, 0.2749, 0.1529]) -Greedy action tensor([ 0.6109, -0.1668, -0.0828, -0.3312]) tensor([0.4257, 0.1956, 0.2127, 0.1659]) -Greedy action tensor([ 0.6257, -0.2374, 0.0455, -0.1474]) tensor([0.4093, 0.1727, 0.2291, 0.1889]) -Greedy action tensor([ 0.5617, -0.3041, 0.0736, -0.3608]) tensor([0.4112, 0.1730, 0.2524, 0.1635]) -Greedy action tensor([ 0.8128, -0.5803, -0.2207, -0.5382]) tensor([0.5368, 0.1333, 0.1910, 0.1390]) -Greedy action tensor([ 0.6367, -0.3229, -0.0401, -0.7727]) tensor([0.4683, 0.1794, 0.2380, 0.1144]) -Greedy action tensor([ 0.4257, 0.1233, 0.0513, -0.2748]) tensor([0.3421, 0.2528, 0.2353, 0.1698]) -Greedy action tensor([ 0.6485, -0.5177, 0.2351, -0.5688]) tensor([0.4407, 0.1373, 0.2915, 0.1305]) -Greedy action tensor([ 0.3038, 0.0763, 0.0559, -0.2597]) tensor([0.3178, 0.2532, 0.2481, 0.1809]) -Greedy action tensor([ 0.6237, -0.3360, 0.0851, -0.4653]) tensor([0.4342, 0.1663, 0.2534, 0.1461]) -Greedy action tensor([ 0.3236, -0.1621, 0.0470, -0.2527]) tensor([0.3406, 0.2096, 0.2583, 0.1914]) -Greedy action tensor([ 0.8111, -0.5089, -0.0481, -0.2841]) tensor([0.4938, 0.1319, 0.2091, 0.1652]) -Greedy action tensor([ 0.9052, -0.6309, 0.0479, -0.6181]) tensor([0.5384, 0.1159, 0.2284, 0.1174]) -Greedy action tensor([ 0.5695, -0.1809, 0.2074, -0.4349]) tensor([0.3945, 0.1863, 0.2747, 0.1445]) -Greedy action tensor([ 0.6000, -0.4075, -0.0462, -0.4546]) tensor([0.4469, 0.1632, 0.2342, 0.1557]) -Greedy action tensor([ 0.5156, -0.3414, -0.0388, -0.4919]) tensor([0.4230, 0.1795, 0.2430, 0.1545]) -Greedy action tensor([ 0.7604, -0.3669, -0.0758, -0.2702]) tensor([0.4730, 0.1532, 0.2050, 0.1688]) -Greedy action tensor([ 0.3936, 0.0229, -0.0309, -0.3574]) tensor([0.3551, 0.2451, 0.2323, 0.1676]) -Greedy action tensor([ 0.3048, 0.0956, 0.1058, -0.1568]) tensor([0.3066, 0.2488, 0.2513, 0.1933]) -Greedy action tensor([ 0.2302, 0.1089, -0.0470, -0.0622]) tensor([0.2950, 0.2613, 0.2236, 0.2202]) -Greedy action tensor([ 0.3903, -0.3579, 0.0976, -0.3310]) tensor([0.3696, 0.1749, 0.2758, 0.1797]) -Greedy action tensor([ 0.3782, 0.1009, -0.0414, 0.0271]) tensor([0.3206, 0.2430, 0.2107, 0.2257]) -Greedy action tensor([ 0.4564, -0.0609, 0.0285, -0.1732]) tensor([0.3596, 0.2144, 0.2344, 0.1916]) -Greedy action tensor([ 0.4550, 0.1891, 0.0713, -0.0660]) tensor([0.3288, 0.2520, 0.2240, 0.1953]) -Greedy action tensor([ 0.5227, -0.3427, 0.0950, -0.3142]) tensor([0.3990, 0.1679, 0.2602, 0.1728]) -Greedy action tensor([ 0.3670, 0.0811, 0.0653, -0.1352]) tensor([0.3230, 0.2427, 0.2389, 0.1955]) -Greedy action tensor([ 0.7270, -0.4873, -0.0216, -0.5583]) tensor([0.4886, 0.1451, 0.2311, 0.1351]) -Greedy action tensor([ 0.3768, -0.0656, 0.0645, -0.2740]) tensor([0.3453, 0.2219, 0.2527, 0.1801]) -Greedy action tensor([ 0.1736, 0.0586, 0.0225, -0.1304]) tensor([0.2866, 0.2555, 0.2464, 0.2115]) -Greedy action tensor([ 0.2673, -0.0680, 0.0139, -0.3213]) tensor([0.3283, 0.2347, 0.2548, 0.1822]) -Greedy action tensor([ 0.7922, -0.2913, 0.0369, -0.4463]) tensor([0.4766, 0.1613, 0.2240, 0.1381]) -Greedy action tensor([ 0.5087, 0.0042, -0.0221, -0.4043]) tensor([0.3856, 0.2328, 0.2268, 0.1547]) -Greedy action tensor([ 0.7068, -0.3878, -0.1222, -0.7561]) tensor([0.4993, 0.1671, 0.2179, 0.1156]) -Greedy action tensor([ 1.2999, -1.1330, 0.0388, -0.8031]) tensor([0.6697, 0.0588, 0.1898, 0.0818]) -Greedy action tensor([ 0.2777, -0.0068, 0.1465, -0.2703]) tensor([0.3118, 0.2346, 0.2734, 0.1802]) -Greedy action tensor([ 0.2756, 0.0059, 0.0592, -0.2239]) tensor([0.3149, 0.2404, 0.2536, 0.1911]) -Greedy action tensor([ 0.3059, -0.1544, -0.0838, -0.3190]) tensor([0.3517, 0.2219, 0.2382, 0.1882]) -Greedy action tensor([ 0.4477, -0.2066, 0.1357, -0.3144]) tensor([0.3679, 0.1912, 0.2693, 0.1717]) -Greedy action tensor([ 0.3978, -0.1049, 0.0709, -0.3915]) tensor([0.3597, 0.2176, 0.2594, 0.1634]) -Greedy action tensor([ 0.4506, -0.3891, -0.0571, -0.4078]) tensor([0.4069, 0.1757, 0.2449, 0.1725]) -Greedy action tensor([ 0.4615, -0.2471, 0.1036, -0.5701]) tensor([0.3925, 0.1932, 0.2744, 0.1399]) -Greedy action tensor([ 0.5968, -0.1681, -0.0264, -0.5608]) tensor([0.4318, 0.2010, 0.2315, 0.1357]) -Greedy action tensor([ 0.5444, -0.0651, 0.2586, -0.2752]) tensor([0.3655, 0.1987, 0.2747, 0.1611]) -Greedy action tensor([ 0.5482, -0.0250, -0.0084, -0.2153]) tensor([0.3842, 0.2166, 0.2202, 0.1790]) -Greedy action tensor([ 0.2392, -0.0322, -0.1035, -0.2670]) tensor([0.3252, 0.2479, 0.2308, 0.1960]) -Greedy action tensor([ 0.6746, -0.1720, 0.1276, -0.2047]) tensor([0.4128, 0.1770, 0.2389, 0.1713]) -Greedy action tensor([ 0.5200, -0.0493, 0.0616, -0.3386]) tensor([0.3814, 0.2158, 0.2411, 0.1616]) -Greedy action tensor([ 0.5805, -0.4092, 0.0205, -0.5815]) tensor([0.4433, 0.1648, 0.2532, 0.1387]) -Greedy action tensor([ 0.7284, -0.3415, -0.1256, -0.4408]) tensor([0.4809, 0.1650, 0.2047, 0.1494]) -Greedy action tensor([ 0.6955, -0.1815, 0.0064, -0.2704]) tensor([0.4350, 0.1810, 0.2184, 0.1656]) -Greedy action tensor([ 1.1097, -0.5339, -0.3166, 0.1128]) tensor([0.5548, 0.1072, 0.1333, 0.2047]) -Greedy action tensor([ 0.9360, -0.3618, -0.3934, -0.0672]) tensor([0.5251, 0.1434, 0.1390, 0.1926]) -Greedy action tensor([ 0.8489, -0.4111, -0.4837, 0.3482]) tensor([0.4644, 0.1317, 0.1225, 0.2814]) -Greedy action tensor([ 0.5682, -0.2433, -0.2416, -0.2916]) tensor([0.4325, 0.1921, 0.1924, 0.1830]) -Greedy action tensor([ 0.8252, 0.0373, 0.0509, -0.2359]) tensor([0.4421, 0.2011, 0.2038, 0.1530]) -Greedy action tensor([ 0.6521, -0.0572, -0.1090, -0.5289]) tensor([0.4413, 0.2171, 0.2061, 0.1355]) -Greedy action tensor([ 1.0381, -0.3734, -0.3054, -0.0371]) tensor([0.5417, 0.1321, 0.1414, 0.1849]) -Greedy action tensor([ 0.9621, -0.0337, -0.1441, -0.3470]) tensor([0.5075, 0.1875, 0.1679, 0.1371]) -Greedy action tensor([ 1.8654, -0.4730, -0.3427, 0.1277]) tensor([0.7234, 0.0698, 0.0795, 0.1273]) -Greedy action tensor([ 0.9942, -0.2929, 0.0371, -0.1054]) tensor([0.5017, 0.1385, 0.1927, 0.1671]) -Greedy action tensor([ 0.3432, -0.0659, -0.1511, 0.1051]) tensor([0.3265, 0.2169, 0.1992, 0.2574]) -Greedy action tensor([ 0.9112, -0.7314, -0.0350, 0.1110]) tensor([0.4924, 0.0953, 0.1912, 0.2212]) -Greedy action tensor([ 0.8239, -0.5364, -0.4916, 0.1606]) tensor([0.4902, 0.1258, 0.1315, 0.2525]) -Greedy action tensor([ 1.1035, -0.4803, -0.1007, 0.1891]) tensor([0.5247, 0.1077, 0.1574, 0.2103]) -Greedy action tensor([ 1.1824, -0.7112, -0.4413, 0.3323]) tensor([0.5633, 0.0848, 0.1111, 0.2408]) -Greedy action tensor([ 1.1095, -0.3761, -0.2690, -0.0614]) tensor([0.5592, 0.1266, 0.1409, 0.1734]) -Greedy action tensor([ 1.3106, -0.2449, -0.2908, -0.0554]) tensor([0.5996, 0.1266, 0.1209, 0.1530]) -Greedy action tensor([ 0.2132, -0.1721, -0.4417, 0.2931]) tensor([0.3046, 0.2072, 0.1582, 0.3299]) -Greedy action tensor([ 0.3936, -0.4296, 0.1255, -0.1071]) tensor([0.3559, 0.1562, 0.2722, 0.2157]) -Greedy action tensor([ 1.6180, -0.3165, -0.3866, 0.2521]) tensor([0.6517, 0.0942, 0.0878, 0.1663]) -Greedy action tensor([ 1.1349, -0.4756, 0.1338, 0.1726]) tensor([0.5130, 0.1025, 0.1885, 0.1960]) -Greedy action tensor([ 1.1718, -0.4714, -0.3917, 0.4164]) tensor([0.5340, 0.1033, 0.1118, 0.2509]) -Greedy action tensor([ 0.9479, -0.2721, -0.0219, -0.1791]) tensor([0.5004, 0.1477, 0.1897, 0.1621]) -Greedy action tensor([ 1.1882, -0.4533, 0.0446, 0.2212]) tensor([0.5284, 0.1023, 0.1684, 0.2009]) -Greedy action tensor([ 0.5499, -0.4740, -0.3566, 0.0144]) tensor([0.4258, 0.1529, 0.1720, 0.2493]) -Greedy action tensor([ 1.8304, -0.5313, -0.5133, -0.0023]) tensor([0.7406, 0.0698, 0.0711, 0.1185]) -Greedy action tensor([ 0.9436, -0.5503, -0.0163, 0.0886]) tensor([0.4920, 0.1104, 0.1884, 0.2092]) -Greedy action tensor([ 0.7928, -0.2441, -0.1478, 0.1956]) tensor([0.4357, 0.1545, 0.1701, 0.2398]) -Greedy action tensor([ 1.3274, -0.4683, -0.3678, -0.0467]) tensor([0.6240, 0.1036, 0.1145, 0.1579]) -Greedy action tensor([ 0.4381, -0.1882, -0.2154, 0.0320]) tensor([0.3675, 0.1965, 0.1912, 0.2448]) -Greedy action tensor([ 0.7333, 0.0425, 0.2329, -0.2456]) tensor([0.4027, 0.2018, 0.2442, 0.1513]) -Greedy action tensor([ 1.0153, -0.3380, 0.0130, -0.1299]) tensor([0.5145, 0.1329, 0.1888, 0.1637]) -Greedy action tensor([ 1.0866, -0.3744, -0.2064, 0.0482]) tensor([0.5375, 0.1247, 0.1475, 0.1903]) -Greedy action tensor([ 0.8372, -0.2750, -0.2061, -0.0213]) tensor([0.4751, 0.1562, 0.1674, 0.2013]) -Greedy action tensor([ 1.0216, -0.5921, -0.2141, 0.2934]) tensor([0.5070, 0.1010, 0.1473, 0.2447]) -Greedy action tensor([ 1.4257, -0.4940, 0.0369, 0.1655]) tensor([0.5954, 0.0873, 0.1485, 0.1688]) -Greedy action tensor([ 1.2965, -0.6798, -0.4121, 0.2272]) tensor([0.6013, 0.0833, 0.1089, 0.2064]) -Greedy action tensor([ 0.8630, -0.5945, -0.3950, 0.1822]) tensor([0.4943, 0.1151, 0.1405, 0.2502]) -Greedy action tensor([ 1.0940, -0.5353, -0.4863, 0.7320]) tensor([0.4766, 0.0934, 0.0981, 0.3318]) -Greedy action tensor([ 1.0484, -0.3935, -0.2332, -0.1882]) tensor([0.5542, 0.1311, 0.1538, 0.1609]) -Greedy action tensor([ 1.1928, -0.2931, -0.5515, -0.0655]) tensor([0.5934, 0.1343, 0.1037, 0.1686]) -Greedy action tensor([ 1.0314, -0.3581, -0.5223, 0.4104]) tensor([0.5005, 0.1247, 0.1058, 0.2690]) -Greedy action tensor([ 1.3395, -0.4361, -0.6284, -0.0494]) tensor([0.6416, 0.1087, 0.0897, 0.1600]) -Greedy action tensor([ 8.6071e-01, -1.1021e-04, 1.8305e-01, -1.7237e-01]) tensor([0.4373, 0.1849, 0.2221, 0.1557]) -Greedy action tensor([ 0.6433, -0.1428, -0.5220, -0.2886]) tensor([0.4627, 0.2108, 0.1443, 0.1822]) -Greedy action tensor([ 1.3264, -0.4802, -0.3223, 0.4831]) tensor([0.5597, 0.0919, 0.1076, 0.2408]) -Greedy action tensor([ 1.0892, -0.3249, -0.3302, 0.2420]) tensor([0.5226, 0.1271, 0.1264, 0.2240]) -Greedy action tensor([ 0.9125, -0.6873, -0.6085, 0.5592]) tensor([0.4711, 0.0951, 0.1029, 0.3309]) -Greedy action tensor([ 0.7722, -0.3958, -0.3814, 0.1111]) tensor([0.4667, 0.1451, 0.1472, 0.2410]) -Greedy action tensor([ 0.8779, -0.3384, -0.4981, 0.0732]) tensor([0.5010, 0.1485, 0.1265, 0.2240]) -Greedy action tensor([ 1.6677, -1.0382, 0.1251, 0.1816]) tensor([0.6636, 0.0443, 0.1419, 0.1501]) -Greedy action tensor([ 0.5754, -0.1580, 0.0436, -0.1939]) tensor([0.3951, 0.1897, 0.2321, 0.1830]) -Greedy action tensor([ 0.2459, -0.1414, -0.2159, -0.1939]) tensor([0.3386, 0.2299, 0.2134, 0.2181]) -Greedy action tensor([ 0.3879, -0.4186, 0.0601, -0.1047]) tensor([0.3600, 0.1607, 0.2594, 0.2200]) -Greedy action tensor([ 0.7578, -0.1864, -0.0286, -0.0481]) tensor([0.4364, 0.1698, 0.1988, 0.1950]) -Greedy action tensor([ 0.4342, 0.0777, -0.1792, 0.0737]) tensor([0.3402, 0.2382, 0.1842, 0.2373]) -Greedy action tensor([ 1.5696, -0.6210, -0.2788, 0.0059]) tensor([0.6763, 0.0756, 0.1065, 0.1416]) -Greedy action tensor([ 1.0325, -0.4071, -0.3036, 0.0951]) tensor([0.5287, 0.1253, 0.1390, 0.2070]) -Greedy action tensor([ 0.9357, -0.3112, 0.0770, -0.2104]) tensor([0.4929, 0.1416, 0.2088, 0.1567]) -Greedy action tensor([ 0.5240, -0.3160, 0.1606, 0.0388]) tensor([0.3646, 0.1574, 0.2535, 0.2245]) -Greedy action tensor([ 1.2571, -0.5855, -0.1792, 0.1894]) tensor([0.5747, 0.0910, 0.1367, 0.1976]) -Greedy action tensor([ 0.7057, -0.2882, -0.2636, -0.2281]) tensor([0.4667, 0.1728, 0.1771, 0.1835]) -Greedy action tensor([ 0.5786, -0.4362, 0.0343, -0.1052]) tensor([0.4086, 0.1481, 0.2371, 0.2062]) -Greedy action tensor([ 0.7900, -0.3311, -0.4013, -0.1621]) tensor([0.4961, 0.1617, 0.1507, 0.1915]) -Greedy action tensor([ 0.7609, -0.3770, -0.4311, -0.0054]) tensor([0.4787, 0.1534, 0.1454, 0.2225]) -Greedy action tensor([ 1.4056, 0.1052, -0.1395, -0.0738]) tensor([0.5836, 0.1590, 0.1245, 0.1329]) -Greedy action tensor([ 0.7555, -0.0671, -0.2603, -0.0304]) tensor([0.4431, 0.1946, 0.1604, 0.2019]) -Greedy action tensor([ 1.2631, -0.3088, -0.3729, -0.0392]) tensor([0.5973, 0.1240, 0.1163, 0.1624]) -Greedy action tensor([ 1.2369, -0.3459, -0.1053, 0.2243]) tensor([0.5465, 0.1122, 0.1428, 0.1985]) -Greedy action tensor([ 1.1942, -0.4959, 0.0953, 0.0685]) tensor([0.5428, 0.1002, 0.1809, 0.1761]) -Greedy action tensor([ 1.2445, -0.7048, -0.3027, 0.1557]) tensor([0.5911, 0.0841, 0.1258, 0.1990]) -Greedy action tensor([ 1.7844, -0.4579, -0.4505, -0.2005]) tensor([0.7404, 0.0786, 0.0792, 0.1017]) -Greedy action tensor([ 1.1505, -0.1058, -0.0616, -0.0758]) tensor([0.5332, 0.1518, 0.1587, 0.1564]) -Greedy action tensor([ 0.8652, -0.4840, -0.4977, 0.3324]) tensor([0.4757, 0.1234, 0.1217, 0.2792]) -Greedy action tensor([ 0.5330, -0.1903, -0.3471, 0.1096]) tensor([0.3914, 0.1899, 0.1623, 0.2563]) -Greedy action tensor([ 1.7574, -0.6804, -0.1955, 0.6533]) tensor([0.6407, 0.0560, 0.0909, 0.2124]) -Greedy action tensor([ 1.5652, -0.5219, -0.1579, 0.1229]) tensor([0.6498, 0.0806, 0.1160, 0.1536]) -Greedy action tensor([ 0.8279, -0.4433, -0.4724, -0.0327]) tensor([0.5061, 0.1420, 0.1379, 0.2140]) -Greedy action tensor([ 1.1233, -0.3860, -0.2736, 0.0849]) tensor([0.5487, 0.1213, 0.1357, 0.1942]) -Greedy action tensor([ 1.0423, -0.5577, -0.6380, 0.2353]) tensor([0.5451, 0.1101, 0.1016, 0.2432]) -Greedy action tensor([ 0.4070, -0.3871, 0.0250, -0.0881]) tensor([0.3644, 0.1647, 0.2487, 0.2221]) -Greedy action tensor([ 0.9984, 0.0323, -0.7262, 0.5203]) tensor([0.4590, 0.1747, 0.0818, 0.2845]) -Greedy action tensor([ 0.9179, -0.4613, 0.5144, 0.9659]) tensor([0.3368, 0.0848, 0.2250, 0.3534]) -Greedy action tensor([ 1.0794, -0.2812, -0.4995, 1.9170]) tensor([0.2650, 0.0680, 0.0546, 0.6124]) -Greedy action tensor([ 1.6436, -1.3864, 0.9356, 0.3932]) tensor([0.5472, 0.0264, 0.2696, 0.1567]) -Greedy action tensor([-0.1396, 0.2534, 0.0894, 0.3683]) tensor([0.1852, 0.2743, 0.2328, 0.3077]) -Greedy action tensor([ 1.3405, -0.9486, -0.3175, 0.7306]) tensor([0.5449, 0.0552, 0.1038, 0.2961]) -Greedy action tensor([ 0.8774, -0.1572, -0.6697, 1.3214]) tensor([0.3198, 0.1136, 0.0681, 0.4985]) -Greedy action tensor([ 0.2321, -0.4126, 0.2652, 0.6222]) tensor([0.2478, 0.1300, 0.2561, 0.3660]) -Greedy action tensor([ 0.3362, -0.6453, -0.7124, -0.3797]) tensor([0.4517, 0.1693, 0.1583, 0.2208]) -Greedy action tensor([ 1.1188, -0.6425, -0.7786, 0.7013]) tensor([0.5049, 0.0868, 0.0757, 0.3326]) -Greedy action tensor([ 1.4087, -1.1305, 0.3963, 1.3589]) tensor([0.4178, 0.0330, 0.1518, 0.3975]) -Greedy action tensor([-0.3524, 0.6401, 0.8707, 0.1235]) tensor([0.1149, 0.3099, 0.3903, 0.1849]) -Greedy action tensor([ 0.7228, -1.0027, 1.2377, 0.4093]) tensor([0.2791, 0.0497, 0.4671, 0.2040]) -Greedy action tensor([ 0.6824, -0.8826, 0.5235, 1.2067]) tensor([0.2666, 0.0557, 0.2274, 0.4503]) -Greedy action tensor([ 0.7358, -0.5807, 1.2945, -0.7072]) tensor([0.3074, 0.0824, 0.5375, 0.0726]) -Greedy action tensor([ 0.1451, -0.3236, 1.2745, 0.2189]) tensor([0.1725, 0.1080, 0.5338, 0.1857]) -Greedy action tensor([-0.5931, -0.1662, -0.4865, 0.5734]) tensor([0.1459, 0.2235, 0.1623, 0.4683]) -Greedy action tensor([ 0.0492, -0.5154, 0.8337, 1.0516]) tensor([0.1542, 0.0877, 0.3379, 0.4202]) -Greedy action tensor([1.5535, 0.3428, 0.6301, 1.0968]) tensor([0.4295, 0.1280, 0.1706, 0.2720]) -Greedy action tensor([ 1.7761, -1.2934, -0.6714, 0.9777]) tensor([0.6317, 0.0293, 0.0547, 0.2843]) -Greedy action tensor([ 0.6803, 0.5184, -0.5773, 0.6860]) tensor([0.3184, 0.2708, 0.0905, 0.3202]) -Greedy action tensor([1.1903, 0.2467, 0.3538, 0.5238]) tensor([0.4281, 0.1666, 0.1855, 0.2198]) -Greedy action tensor([ 1.3259, -0.6218, 0.6082, 0.8352]) tensor([0.4459, 0.0636, 0.2175, 0.2730]) -Greedy action tensor([ 0.7579, 0.2904, -0.0210, 0.4475]) tensor([0.3548, 0.2223, 0.1628, 0.2601]) -Greedy action tensor([-0.5532, -0.0386, 1.1328, 0.0504]) tensor([0.1010, 0.1690, 0.5453, 0.1847]) -Greedy action tensor([-0.5930, -1.6098, -0.8656, 0.6389]) tensor([0.1802, 0.0652, 0.1372, 0.6175]) -Greedy action tensor([ 0.4205, 0.8821, -0.5037, 1.1497]) tensor([0.1977, 0.3138, 0.0785, 0.4100]) -Greedy action tensor([ 1.0870, -0.9353, 0.5998, 0.9786]) tensor([0.3782, 0.0501, 0.2324, 0.3394]) -Greedy action tensor([ 1.3079, -0.1732, 0.0142, 0.8523]) tensor([0.4682, 0.1065, 0.1284, 0.2969]) -Greedy action tensor([-0.1589, -0.4663, -0.4028, 1.2240]) tensor([0.1537, 0.1130, 0.1204, 0.6128]) -Greedy action tensor([ 1.7334, -0.4738, 0.8727, 0.5429]) tensor([0.5444, 0.0599, 0.2302, 0.1655]) -Greedy action tensor([ 1.4998, -0.7432, 0.7622, 1.1859]) tensor([0.4320, 0.0458, 0.2066, 0.3156]) -Greedy action tensor([ 0.9244, 0.4477, -0.4755, 2.2660]) tensor([0.1757, 0.1091, 0.0433, 0.6719]) -Greedy action tensor([ 1.8636, -0.1078, 0.7068, 0.6063]) tensor([0.5753, 0.0801, 0.1809, 0.1636]) -Greedy action tensor([ 0.6669, 0.7867, -0.3454, 0.4263]) tensor([0.3052, 0.3440, 0.1109, 0.2399]) -Greedy action tensor([ 1.0673, -0.0658, 0.2171, 0.6418]) tensor([0.4162, 0.1340, 0.1778, 0.2720]) -Greedy action tensor([0.8427, 0.7489, 0.5060, 0.5853]) tensor([0.2943, 0.2680, 0.2102, 0.2275]) -Greedy action tensor([0.4703, 0.1341, 1.0484, 0.3647]) tensor([0.2274, 0.1625, 0.4054, 0.2046]) -Greedy action tensor([ 0.7945, -0.1086, 0.9833, -0.1986]) tensor([0.3352, 0.1358, 0.4048, 0.1242]) -Greedy action tensor([-0.0718, 0.1713, 0.8796, 0.2130]) tensor([0.1615, 0.2059, 0.4180, 0.2146]) -Greedy action tensor([0.7920, 0.5945, 0.6892, 0.4485]) tensor([0.2913, 0.2391, 0.2629, 0.2066]) -Greedy action tensor([ 1.3056, -1.0698, -0.0165, 1.1005]) tensor([0.4600, 0.0428, 0.1226, 0.3747]) -Greedy action tensor([1.7946, 0.0931, 0.8009, 0.4859]) tensor([0.5486, 0.1001, 0.2031, 0.1482]) -Greedy action tensor([ 0.8347, 0.1781, -0.0725, 0.2640]) tensor([0.4020, 0.2085, 0.1623, 0.2272]) -Greedy action tensor([ 0.0167, -0.3107, 0.4629, 0.0485]) tensor([0.2317, 0.1670, 0.3620, 0.2392]) -Greedy action tensor([ 1.4025, -0.4213, 0.8933, 2.1809]) tensor([0.2538, 0.0410, 0.1525, 0.5527]) -Greedy action tensor([-0.0694, 0.5902, 0.0423, 0.0353]) tensor([0.1937, 0.3746, 0.2166, 0.2151]) -Greedy action tensor([ 1.5577, -0.8323, 0.6908, 0.8394]) tensor([0.5001, 0.0458, 0.2102, 0.2439]) -Greedy action tensor([ 0.9392, -0.6769, 0.3717, 0.2009]) tensor([0.4457, 0.0886, 0.2527, 0.2130]) -Greedy action tensor([ 0.6880, -1.0976, -0.1467, -0.2098]) tensor([0.4977, 0.0835, 0.2160, 0.2028]) -Greedy action tensor([ 0.7765, -0.9698, 0.8125, 0.6178]) tensor([0.3263, 0.0569, 0.3383, 0.2784]) -Greedy action tensor([0.6092, 0.0842, 0.0644, 0.0843]) tensor([0.3619, 0.2141, 0.2099, 0.2141]) -Greedy action tensor([ 1.0298, -0.0068, -0.1935, 0.0824]) tensor([0.4910, 0.1741, 0.1445, 0.1904]) -Greedy action tensor([0.2910, 0.4076, 0.9388, 0.2989]) tensor([0.1983, 0.2228, 0.3790, 0.1999]) -Greedy action tensor([0.1016, 0.2072, 0.2849, 1.7807]) tensor([0.1153, 0.1281, 0.1385, 0.6181]) -Greedy action tensor([ 2.0448, -0.0400, 1.2454, 1.1071]) tensor([0.5088, 0.0633, 0.2288, 0.1992]) -Greedy action tensor([ 0.3204, -0.2974, -0.1457, -0.0151]) tensor([0.3470, 0.1871, 0.2177, 0.2481]) -Greedy action tensor([ 1.0383, -0.9427, 0.1051, 1.3976]) tensor([0.3374, 0.0465, 0.1327, 0.4833]) -Greedy action tensor([ 0.0827, -1.9038, 0.5969, 0.1138]) tensor([0.2604, 0.0357, 0.4354, 0.2686]) -Greedy action tensor([ 0.7618, 1.0817, -1.3351, 0.4692]) tensor([0.3080, 0.4242, 0.0378, 0.2299]) -Greedy action tensor([ 0.8479, 0.6344, -0.4035, 0.7794]) tensor([0.3303, 0.2668, 0.0945, 0.3084]) -Greedy action tensor([ 0.9716, 0.2070, -0.6212, 0.3525]) tensor([0.4530, 0.2109, 0.0921, 0.2439]) -Greedy action tensor([ 1.8203, -0.2288, 0.7470, 1.7510]) tensor([0.4160, 0.0536, 0.1422, 0.3881]) -Greedy action tensor([ 0.1016, -1.7161, -0.1548, 0.9273]) tensor([0.2370, 0.0385, 0.1834, 0.5412]) -Greedy action tensor([ 0.7859, 0.3385, 0.3218, -0.1971]) tensor([0.3785, 0.2420, 0.2380, 0.1416]) -Greedy action tensor([ 1.0641, -0.7901, 1.3233, 0.8663]) tensor([0.3055, 0.0478, 0.3959, 0.2507]) -Greedy action tensor([ 1.0444, -0.1356, 0.1661, 0.5675]) tensor([0.4267, 0.1311, 0.1773, 0.2649]) -Greedy action tensor([0.9677, 0.1601, 1.1144, 0.0888]) tensor([0.3312, 0.1477, 0.3836, 0.1375]) -Greedy action tensor([0.9749, 0.6740, 0.0333, 0.9901]) tensor([0.3179, 0.2353, 0.1240, 0.3228]) -Greedy action tensor([ 0.6989, -0.1011, 1.0460, 0.5765]) tensor([0.2667, 0.1198, 0.3774, 0.2360]) -Greedy action tensor([ 1.5977, 0.2938, 1.0670, -0.1897]) tensor([0.4933, 0.1339, 0.2902, 0.0826]) -Greedy action tensor([0.7301, 0.3569, 0.8332, 0.7596]) tensor([0.2613, 0.1799, 0.2897, 0.2691]) -Greedy action tensor([ 0.8467, 0.2037, -1.1092, 0.5426]) tensor([0.4158, 0.2186, 0.0588, 0.3068]) -Greedy action tensor([ 1.4051, -0.4229, -0.6957, 1.1278]) tensor([0.4900, 0.0788, 0.0600, 0.3713]) -Greedy action tensor([0.9888, 0.2722, 0.5075, 1.0375]) tensor([0.3168, 0.1547, 0.1958, 0.3326]) -Greedy action tensor([1.5970, 0.1888, 1.2425, 1.0446]) tensor([0.3966, 0.0970, 0.2782, 0.2282]) -Greedy action tensor([-0.4203, -0.0427, -0.8451, 0.8368]) tensor([0.1509, 0.2201, 0.0987, 0.5304]) -Greedy action tensor([ 0.7215, 0.4088, 1.8071, -0.0176]) tensor([0.1934, 0.1415, 0.5727, 0.0924]) -Greedy action tensor([ 0.8254, -0.5598, -0.5196, 1.4428]) tensor([0.2972, 0.0744, 0.0774, 0.5510]) -Greedy action tensor([-0.4818, 0.0458, 0.7452, 0.2831]) tensor([0.1211, 0.2053, 0.4132, 0.2603]) -Greedy action tensor([0.8250, 0.2929, 1.4284, 0.3919]) tensor([0.2460, 0.1445, 0.4499, 0.1596]) -Greedy action tensor([ 1.4562, -0.6114, 1.5548, 0.6768]) tensor([0.3719, 0.0470, 0.4104, 0.1706]) -Greedy action tensor([-0.6081, -0.5778, 0.2038, 0.1279]) tensor([0.1570, 0.1618, 0.3535, 0.3277]) -Greedy action tensor([-1.2212, 0.0055, 0.3913, -0.6346]) tensor([0.0891, 0.3038, 0.4469, 0.1602]) -Greedy action tensor([-1.5570, -0.3681, 0.8586, 0.8798]) tensor([0.0372, 0.1220, 0.4160, 0.4249]) -Greedy action tensor([-1.6355, -0.5225, 0.5341, -0.0672]) tensor([0.0568, 0.1729, 0.4975, 0.2727]) -Greedy action tensor([-1.6409, -1.0732, 0.0085, -0.7902]) tensor([0.0970, 0.1711, 0.5048, 0.2271]) -Greedy action tensor([-1.8924, -0.4339, 0.6347, -0.1458]) tensor([0.0425, 0.1826, 0.5315, 0.2435]) -Greedy action tensor([-1.8617, -0.4641, 0.6297, -0.1051]) tensor([0.0436, 0.1765, 0.5271, 0.2528]) -Greedy action tensor([-0.2512, 0.2755, 0.2046, 0.4217]) tensor([0.1605, 0.2718, 0.2532, 0.3146]) -Greedy action tensor([-1.6643, -0.6157, 0.0329, -0.3990]) tensor([0.0778, 0.2220, 0.4246, 0.2757]) -Greedy action tensor([-1.5294, -0.6772, 1.0749, 0.9037]) tensor([0.0354, 0.0830, 0.4785, 0.4032]) -Greedy action tensor([-1.2424, -0.3541, 0.4514, 0.7390]) tensor([0.0620, 0.1508, 0.3374, 0.4498]) -Greedy action tensor([-1.3230, -0.3848, 0.3924, 0.3371]) tensor([0.0696, 0.1778, 0.3867, 0.3659]) -Greedy action tensor([-1.9646, -0.5101, 1.2440, 0.6050]) tensor([0.0232, 0.0994, 0.5743, 0.3031]) -Greedy action tensor([-1.1061, -0.6586, 0.3599, 0.1224]) tensor([0.0970, 0.1517, 0.4201, 0.3313]) -Greedy action tensor([-1.1555, -0.5841, 0.2553, 0.2768]) tensor([0.0904, 0.1601, 0.3707, 0.3787]) -Greedy action tensor([-1.3021, -0.1375, 0.4156, 0.0107]) tensor([0.0741, 0.2375, 0.4129, 0.2754]) -Greedy action tensor([-1.5089, -0.4143, 1.0350, 1.0292]) tensor([0.0340, 0.1017, 0.4334, 0.4308]) -Greedy action tensor([-1.5473, -0.5750, 0.6434, 0.3981]) tensor([0.0511, 0.1350, 0.4566, 0.3573]) -Greedy action tensor([-1.0336, -0.5930, 0.2135, 0.3583]) tensor([0.0994, 0.1545, 0.3461, 0.4000]) -Greedy action tensor([-0.7658, -0.2682, 0.0219, 0.5454]) tensor([0.1169, 0.1923, 0.2570, 0.4338]) -Greedy action tensor([-1.6753, -0.4991, 0.5303, 0.0132]) tensor([0.0534, 0.1731, 0.4846, 0.2889]) -Greedy action tensor([-1.1366, -0.5097, 0.4878, 0.9552]) tensor([0.0623, 0.1166, 0.3163, 0.5047]) -Greedy action tensor([-1.8826, -0.5630, 0.8295, 0.4554]) tensor([0.0332, 0.1241, 0.4993, 0.3435]) -Greedy action tensor([-1.7502, -0.3756, 0.9466, 0.7487]) tensor([0.0313, 0.1237, 0.4642, 0.3808]) -Greedy action tensor([-1.7944, -0.4725, 0.5897, -0.1007]) tensor([0.0475, 0.1783, 0.5157, 0.2585]) -Greedy action tensor([-1.6405, -0.5059, 0.5002, 0.0191]) tensor([0.0560, 0.1740, 0.4759, 0.2942]) -Greedy action tensor([-1.6437, -0.4604, 1.1477, 0.9525]) tensor([0.0294, 0.0961, 0.4798, 0.3947]) -Greedy action tensor([-1.6195, -0.5885, 0.6567, 0.4056]) tensor([0.0473, 0.1328, 0.4612, 0.3587]) -Greedy action tensor([-1.3836, -0.4475, 0.4867, 0.4964]) tensor([0.0603, 0.1537, 0.3911, 0.3949]) -Greedy action tensor([-1.7077, -0.3673, 0.8382, 0.6697]) tensor([0.0353, 0.1348, 0.4499, 0.3801]) -Greedy action tensor([-1.7035, -0.5193, 0.5606, -0.1471]) tensor([0.0537, 0.1754, 0.5164, 0.2545]) -Greedy action tensor([-1.9198, -0.3725, 0.6635, -0.1314]) tensor([0.0401, 0.1886, 0.5313, 0.2400]) -Greedy action tensor([-1.3340, -0.3411, 0.4553, -0.1222]) tensor([0.0767, 0.2069, 0.4589, 0.2575]) -Greedy action tensor([-1.8856, -0.7338, 0.4617, -0.0934]) tensor([0.0485, 0.1534, 0.5071, 0.2911]) -Greedy action tensor([-1.5668, -0.5564, 0.2162, -0.2136]) tensor([0.0737, 0.2025, 0.4385, 0.2853]) -Greedy action tensor([-1.4827, -0.3979, 0.5768, 0.5414]) tensor([0.0516, 0.1528, 0.4048, 0.3908]) -Greedy action tensor([-1.9355, -0.5339, 1.3061, 0.7054]) tensor([0.0224, 0.0909, 0.5726, 0.3140]) -Greedy action tensor([-1.0917, -0.5736, 0.2430, 0.3343]) tensor([0.0940, 0.1578, 0.3570, 0.3912]) -Greedy action tensor([-1.6489, -0.4559, 0.5370, 0.0847]) tensor([0.0530, 0.1748, 0.4719, 0.3002]) -Greedy action tensor([-1.4888, -0.5645, 1.0040, 0.9841]) tensor([0.0364, 0.0917, 0.4403, 0.4316]) -Greedy action tensor([-1.8730, -0.5575, 0.8133, 0.1462]) tensor([0.0371, 0.1384, 0.5449, 0.2796]) -Greedy action tensor([-1.6385, -0.0192, 0.4837, 0.1028]) tensor([0.0497, 0.2512, 0.4153, 0.2838]) -Greedy action tensor([-0.6526, -0.5592, 0.1643, 0.2948]) tensor([0.1441, 0.1582, 0.3261, 0.3716]) -Greedy action tensor([-1.2781, -0.4253, 0.3741, 0.5151]) tensor([0.0686, 0.1610, 0.3581, 0.4123]) -Greedy action tensor([-1.8685, -0.6233, 1.2911, 0.7689]) tensor([0.0238, 0.0827, 0.5608, 0.3327]) -Greedy action tensor([-1.1075, -0.2919, 0.6133, 1.0445]) tensor([0.0573, 0.1295, 0.3202, 0.4929]) -Greedy action tensor([-1.8222, -0.4817, 0.6025, -0.0918]) tensor([0.0459, 0.1756, 0.5192, 0.2593]) -Greedy action tensor([-1.9555, -0.9290, 0.1853, -0.3202]) tensor([0.0574, 0.1602, 0.4881, 0.2944]) -Greedy action tensor([-1.0987, 0.5035, 0.0586, -0.5374]) tensor([0.0918, 0.4555, 0.2919, 0.1608]) -Greedy action tensor([-1.8201, -1.0106, 0.0186, -0.6052]) tensor([0.0775, 0.1741, 0.4873, 0.2611]) -Greedy action tensor([-1.8322, -0.4689, 0.6477, -0.0789]) tensor([0.0442, 0.1728, 0.5278, 0.2552]) -Greedy action tensor([-1.9284, -0.5361, 0.7485, 0.1892]) tensor([0.0359, 0.1444, 0.5216, 0.2981]) -Greedy action tensor([-1.1936, -0.5795, 0.2829, 0.2293]) tensor([0.0879, 0.1625, 0.3849, 0.3648]) -Greedy action tensor([-1.6474, -0.4993, 0.5248, 0.0433]) tensor([0.0545, 0.1718, 0.4782, 0.2955]) -Greedy action tensor([-1.9705, -0.5130, 1.2387, 0.6305]) tensor([0.0230, 0.0987, 0.5688, 0.3096]) -Greedy action tensor([-1.8527, -0.4446, 0.6609, -0.1053]) tensor([0.0431, 0.1764, 0.5328, 0.2476]) -Greedy action tensor([-1.6297, -0.4901, 0.4852, 0.0437]) tensor([0.0564, 0.1761, 0.4671, 0.3004]) -Greedy action tensor([-1.3785, -0.5313, 0.0697, -0.4178]) tensor([0.0980, 0.2287, 0.4171, 0.2562]) -Greedy action tensor([-1.4689, -0.5448, 0.4146, 0.0944]) tensor([0.0672, 0.1694, 0.4422, 0.3211]) -Greedy action tensor([-1.4760, -0.1122, 0.8820, 0.8152]) tensor([0.0394, 0.1542, 0.4167, 0.3897]) -Greedy action tensor([-1.7412, -0.3441, 0.8237, 0.5661]) tensor([0.0356, 0.1439, 0.4628, 0.3577]) -Greedy action tensor([-1.5516, -0.4847, 0.2124, -0.3606]) tensor([0.0767, 0.2230, 0.4478, 0.2525]) -Greedy action tensor([-1.5000, -0.5046, 0.4740, 0.2418]) tensor([0.0602, 0.1629, 0.4334, 0.3436]) -Greedy action tensor([-1.6003, -0.6876, 0.5092, 0.0928]) tensor([0.0582, 0.1451, 0.4801, 0.3166]) -Greedy action tensor([-1.4829, -0.4876, 0.4241, 0.1326]) tensor([0.0646, 0.1749, 0.4353, 0.3252]) -Greedy action tensor([-0.8251, -0.6178, 0.1870, 0.2623]) tensor([0.1258, 0.1548, 0.3462, 0.3732]) -Greedy action tensor([-1.4290, -0.4042, 0.4067, 0.0431]) tensor([0.0694, 0.1933, 0.4349, 0.3024]) -Greedy action tensor([-1.8439, -0.9189, 0.1846, -0.4288]) tensor([0.0656, 0.1655, 0.4988, 0.2701]) -Greedy action tensor([0.2986, 0.0219, 0.5676, 0.9505]) tensor([0.2006, 0.1521, 0.2625, 0.3849]) -Greedy action tensor([-1.7617, -0.4417, 0.6557, 0.0804]) tensor([0.0449, 0.1681, 0.5037, 0.2833]) -Greedy action tensor([-1.7536, -0.4720, 0.5841, 0.0074]) tensor([0.0481, 0.1734, 0.4985, 0.2800]) -Greedy action tensor([-1.3516, -0.2555, 0.2146, -0.5127]) tensor([0.0901, 0.2697, 0.4316, 0.2086]) -Greedy action tensor([-1.3219, -0.1114, 0.2609, -0.2435]) tensor([0.0822, 0.2758, 0.4002, 0.2417]) -Greedy action tensor([-1.4851, -0.3198, 0.9720, 0.9716]) tensor([0.0363, 0.1164, 0.4237, 0.4235]) -Greedy action tensor([-1.3526, -0.5614, 0.6648, 0.8053]) tensor([0.0516, 0.1138, 0.3880, 0.4465]) -Greedy action tensor([-0.6137, 0.3168, 0.1418, -0.0103]) tensor([0.1335, 0.3384, 0.2841, 0.2440]) -Greedy action tensor([-1.3423, -0.4813, 0.5299, -0.2504]) tensor([0.0778, 0.1841, 0.5061, 0.2319]) -Greedy action tensor([-0.5855, -0.1311, 0.4892, 1.2622]) tensor([0.0844, 0.1329, 0.2472, 0.5355]) -Greedy action tensor([-0.8244, -0.6403, 0.2981, 0.0090]) tensor([0.1320, 0.1587, 0.4056, 0.3038]) -Greedy action tensor([-0.5406, -0.3759, 0.3052, 0.5785]) tensor([0.1321, 0.1557, 0.3077, 0.4044]) -Greedy action tensor([-0.8293, -0.5786, 0.3435, 0.4686]) tensor([0.1090, 0.1400, 0.3521, 0.3990]) -Greedy action tensor([ 0.7736, -0.5216, 0.2528, -0.5510]) tensor([0.4686, 0.1283, 0.2784, 0.1246]) -Greedy action tensor([ 0.7870, 0.4682, -0.2521, -0.3184]) tensor([0.4146, 0.3014, 0.1467, 0.1373]) -Greedy action tensor([ 6.4010e-01, -6.0810e-01, 4.3768e-04, -5.2178e-01]) tensor([0.4701, 0.1349, 0.2479, 0.1471]) -Greedy action tensor([ 0.3568, -0.0681, -0.1743, -0.2515]) tensor([0.3589, 0.2347, 0.2110, 0.1954]) -Greedy action tensor([ 0.4572, -0.0457, 0.0106, -0.2880]) tensor([0.3678, 0.2224, 0.2353, 0.1745]) -Greedy action tensor([ 0.6135, -0.2668, -0.0053, -0.2599]) tensor([0.4218, 0.1749, 0.2272, 0.1761]) -Greedy action tensor([ 0.6574, -0.4540, 0.0377, -0.6146]) tensor([0.4656, 0.1533, 0.2506, 0.1305]) -Greedy action tensor([ 0.6935, -0.3742, -0.0664, -0.2373]) tensor([0.4533, 0.1559, 0.2121, 0.1787]) -Greedy action tensor([ 0.6483, -0.5198, 0.0375, -0.6569]) tensor([0.4706, 0.1463, 0.2555, 0.1276]) -Greedy action tensor([ 0.4181, 0.0825, 0.0635, -0.1815]) tensor([0.3372, 0.2411, 0.2365, 0.1851]) -Greedy action tensor([ 0.4370, -0.0970, -0.0521, -0.2341]) tensor([0.3689, 0.2163, 0.2262, 0.1886]) -Greedy action tensor([ 0.8094, -0.3778, 0.0060, -0.4878]) tensor([0.4935, 0.1506, 0.2210, 0.1349]) -Greedy action tensor([ 0.6347, -0.2956, -0.0124, -0.5402]) tensor([0.4491, 0.1771, 0.2351, 0.1387]) -Greedy action tensor([ 0.2554, -0.1216, -0.0171, -0.4750]) tensor([0.3414, 0.2342, 0.2600, 0.1645]) -Greedy action tensor([ 0.6449, -0.3497, 0.0446, -0.3452]) tensor([0.4367, 0.1615, 0.2396, 0.1622]) -Greedy action tensor([ 0.6900, -0.1598, 0.0463, -0.2485]) tensor([0.4266, 0.1824, 0.2241, 0.1669]) -Greedy action tensor([ 0.9403, -0.5452, -0.1301, -0.5762]) tensor([0.5590, 0.1266, 0.1917, 0.1227]) -Greedy action tensor([ 0.6334, -0.4995, 0.0139, -0.5217]) tensor([0.4597, 0.1481, 0.2474, 0.1448]) -Greedy action tensor([ 0.3635, -0.0673, -0.0530, -0.1257]) tensor([0.3422, 0.2224, 0.2256, 0.2098]) -Greedy action tensor([ 0.8981, -0.3881, 0.1281, -0.5354]) tensor([0.5056, 0.1397, 0.2341, 0.1206]) -Greedy action tensor([ 0.5241, 0.0796, 0.0050, -0.3881]) tensor([0.3791, 0.2431, 0.2256, 0.1523]) -Greedy action tensor([ 0.7266, -0.6166, -0.2634, -0.5316]) tensor([0.5217, 0.1362, 0.1939, 0.1483]) -Greedy action tensor([ 0.4398, -0.2424, -0.0197, -0.2297]) tensor([0.3775, 0.1908, 0.2384, 0.1933]) -Greedy action tensor([ 0.3793, -0.1029, -0.1173, -0.4008]) tensor([0.3725, 0.2300, 0.2267, 0.1707]) -Greedy action tensor([ 0.6591, -0.4238, 0.0047, -0.5974]) tensor([0.4666, 0.1580, 0.2425, 0.1328]) -Greedy action tensor([ 0.6885, -0.5022, 0.0623, -0.7271]) tensor([0.4804, 0.1461, 0.2569, 0.1166]) -Greedy action tensor([ 0.4896, -0.2585, 0.2864, -0.4946]) tensor([0.3755, 0.1777, 0.3065, 0.1403]) -Greedy action tensor([ 0.2419, -0.1496, 0.0349, -0.3557]) tensor([0.3290, 0.2224, 0.2675, 0.1810]) -Greedy action tensor([ 0.2120, 0.1347, 0.1725, -0.3577]) tensor([0.2896, 0.2681, 0.2784, 0.1638]) -Greedy action tensor([ 0.4019, -0.0660, -0.0127, -0.1621]) tensor([0.3502, 0.2193, 0.2313, 0.1992]) -Greedy action tensor([ 0.7694, -0.2558, -0.0259, -0.3302]) tensor([0.4666, 0.1674, 0.2106, 0.1554]) -Greedy action tensor([ 0.6755, -0.1256, 0.0622, -0.5331]) tensor([0.4369, 0.1961, 0.2366, 0.1305]) -Greedy action tensor([ 0.6189, -0.2306, -0.0112, -0.3067]) tensor([0.4244, 0.1815, 0.2260, 0.1682]) -Greedy action tensor([ 0.4317, 0.0217, -0.0411, -0.1676]) tensor([0.3526, 0.2340, 0.2198, 0.1936]) -Greedy action tensor([ 1.0221, -0.3838, 0.0461, -0.7917]) tensor([0.5602, 0.1373, 0.2111, 0.0913]) -Greedy action tensor([ 0.5321, -0.2124, -0.1203, -0.3598]) tensor([0.4157, 0.1974, 0.2165, 0.1704]) -Greedy action tensor([ 0.3658, 0.2580, -0.0996, 0.0311]) tensor([0.3085, 0.2770, 0.1937, 0.2208]) -Greedy action tensor([ 0.2908, -0.0843, -0.1894, -0.1151]) tensor([0.3365, 0.2312, 0.2081, 0.2242]) -Greedy action tensor([ 0.3595, -0.0282, -0.1123, -0.1939]) tensor([0.3475, 0.2358, 0.2168, 0.1998]) -Greedy action tensor([ 1.2793, -1.0960, 0.0280, -0.6816]) tensor([0.6580, 0.0612, 0.1883, 0.0926]) -Greedy action tensor([ 0.5650, 0.3621, -0.2058, -0.3322]) tensor([0.3722, 0.3038, 0.1722, 0.1517]) -Greedy action tensor([ 0.4627, 0.0021, 0.0354, -0.2787]) tensor([0.3624, 0.2286, 0.2364, 0.1726]) -Greedy action tensor([ 0.5307, 0.0975, -0.0297, -0.1205]) tensor([0.3649, 0.2366, 0.2083, 0.1902]) -Greedy action tensor([ 4.2080e-01, -2.2091e-04, 2.7009e-02, -4.4387e-01]) tensor([0.3634, 0.2385, 0.2451, 0.1530]) -Greedy action tensor([ 0.1169, 0.1624, 0.1637, -0.2032]) tensor([0.2617, 0.2739, 0.2743, 0.1900]) -Greedy action tensor([ 0.4895, -0.2146, -0.0015, -0.4433]) tensor([0.4000, 0.1978, 0.2448, 0.1574]) -Greedy action tensor([ 0.2732, 0.0298, 0.1376, -0.3861]) tensor([0.3150, 0.2470, 0.2751, 0.1629]) -Greedy action tensor([ 0.5996, -0.4150, -0.0066, -0.6685]) tensor([0.4567, 0.1656, 0.2491, 0.1285]) -Greedy action tensor([ 0.5430, -0.2044, -0.0368, -0.2198]) tensor([0.4000, 0.1894, 0.2240, 0.1865]) -Greedy action tensor([ 0.2831, -0.1880, 0.0950, -0.3756]) tensor([0.3367, 0.2102, 0.2789, 0.1742]) -Greedy action tensor([ 0.3847, 0.0429, -0.0139, -0.3309]) tensor([0.3483, 0.2475, 0.2338, 0.1703]) -Greedy action tensor([ 0.4509, -0.1289, -0.0500, -0.2742]) tensor([0.3773, 0.2113, 0.2287, 0.1827]) -Greedy action tensor([ 0.2568, -0.0926, 0.2870, -0.2551]) tensor([0.2998, 0.2114, 0.3090, 0.1797]) -Greedy action tensor([ 0.5707, -0.1252, -0.1053, -0.2256]) tensor([0.4068, 0.2028, 0.2069, 0.1835]) -Greedy action tensor([ 0.6539, -0.3844, 0.1070, -0.4158]) tensor([0.4394, 0.1556, 0.2543, 0.1508]) -Greedy action tensor([ 0.4875, -0.3070, -0.0762, -0.1990]) tensor([0.3962, 0.1790, 0.2254, 0.1994]) -Greedy action tensor([ 0.6073, -0.2334, -0.0619, -0.2784]) tensor([0.4244, 0.1831, 0.2174, 0.1751]) -Greedy action tensor([ 0.2509, 0.0425, -0.0130, -0.1647]) tensor([0.3086, 0.2506, 0.2371, 0.2037]) -Greedy action tensor([ 0.6573, -0.4430, 0.0547, -0.6864]) tensor([0.4671, 0.1554, 0.2557, 0.1218]) -Greedy action tensor([ 0.2417, 0.1279, -0.1091, -0.1952]) tensor([0.3084, 0.2752, 0.2171, 0.1992]) -Greedy action tensor([ 0.6882, -0.2260, -0.0509, -0.1448]) tensor([0.4323, 0.1733, 0.2064, 0.1880]) -Greedy action tensor([ 0.6869, -0.1136, 0.0290, -0.1811]) tensor([0.4190, 0.1882, 0.2170, 0.1759]) -Greedy action tensor([ 0.8228, -0.3539, 0.0772, -0.4843]) tensor([0.4870, 0.1501, 0.2310, 0.1318]) -Greedy action tensor([ 0.2824, -0.0080, 0.0972, -0.3073]) tensor([0.3191, 0.2387, 0.2652, 0.1770]) -Greedy action tensor([ 0.7407, -0.5359, -0.1514, -0.2586]) tensor([0.4862, 0.1356, 0.1992, 0.1790]) -Greedy action tensor([ 0.3019, -0.0389, -0.0842, -0.1177]) tensor([0.3281, 0.2333, 0.2230, 0.2156]) -Greedy action tensor([ 0.1154, -0.0283, 0.1293, -0.2220]) tensor([0.2783, 0.2410, 0.2821, 0.1986]) -Greedy action tensor([ 0.4818, -0.0361, -0.0078, -0.3219]) tensor([0.3765, 0.2243, 0.2307, 0.1685]) -Greedy action tensor([ 0.2632, 0.0875, 0.0288, -0.3396]) tensor([0.3147, 0.2640, 0.2490, 0.1722]) -Greedy action tensor([ 0.2463, 0.1779, 0.0834, -0.2770]) tensor([0.2962, 0.2766, 0.2517, 0.1755]) -Greedy action tensor([ 0.4746, -0.0919, 0.0498, -0.2015]) tensor([0.3663, 0.2079, 0.2395, 0.1863]) -Greedy action tensor([ 0.6681, -0.1744, 0.2152, -0.4183]) tensor([0.4160, 0.1791, 0.2645, 0.1404]) -Greedy action tensor([ 0.5189, -0.2090, -0.0042, -0.2377]) tensor([0.3929, 0.1898, 0.2329, 0.1844]) -Greedy action tensor([ 0.0988, -0.0341, -0.2222, -0.4584]) tensor([0.3151, 0.2759, 0.2286, 0.1805]) -Greedy action tensor([ 0.4303, -0.1608, -0.0841, -0.4642]) tensor([0.3906, 0.2163, 0.2335, 0.1597]) -Greedy action tensor([ 0.7171, -0.4268, 0.1594, -0.3723]) tensor([0.4489, 0.1430, 0.2570, 0.1510]) -Greedy action tensor([ 0.3409, -0.1967, 0.1522, -0.4227]) tensor([0.3474, 0.2030, 0.2877, 0.1619]) -Greedy action tensor([ 0.4125, 0.0904, 0.0444, -0.1173]) tensor([0.3327, 0.2411, 0.2303, 0.1959]) -Greedy action tensor([ 0.3089, 0.0128, 0.1178, -0.2232]) tensor([0.3167, 0.2356, 0.2616, 0.1860]) -Greedy action tensor([ 0.6658, -0.2201, 0.0101, -0.3076]) tensor([0.4330, 0.1786, 0.2248, 0.1636]) -Greedy action tensor([ 0.8690, -0.7884, -0.2798, -0.8449]) tensor([0.5925, 0.1129, 0.1878, 0.1067]) -Greedy action tensor([ 0.8526, 0.0171, 0.1067, -0.1631]) tensor([0.4405, 0.1910, 0.2089, 0.1595]) -Greedy action tensor([ 1.3684, -0.3900, -0.1835, 0.0712]) tensor([0.6033, 0.1040, 0.1278, 0.1649]) -Greedy action tensor([ 1.0345, -0.4011, -0.3620, -0.0074]) tensor([0.5440, 0.1295, 0.1346, 0.1919]) -Greedy action tensor([ 1.1866, -0.3125, -0.3890, -0.3180]) tensor([0.6052, 0.1352, 0.1252, 0.1344]) -Greedy action tensor([ 1.2050, -0.5277, -0.3648, -0.2162]) tensor([0.6149, 0.1087, 0.1280, 0.1484]) -Greedy action tensor([ 1.2789, -0.4432, -0.3390, -0.1201]) tensor([0.6158, 0.1100, 0.1221, 0.1520]) -Greedy action tensor([ 0.4760, 0.2074, -0.0797, -0.2273]) tensor([0.3530, 0.2698, 0.2025, 0.1747]) -Greedy action tensor([ 0.9493, -0.0640, 0.2036, -0.1710]) tensor([0.4622, 0.1678, 0.2193, 0.1508]) -Greedy action tensor([ 0.3881, -0.0797, -0.3490, -0.0776]) tensor([0.3660, 0.2292, 0.1751, 0.2297]) -Greedy action tensor([ 1.1080, -0.1802, -0.6327, -0.2913]) tensor([0.5889, 0.1624, 0.1033, 0.1453]) -Greedy action tensor([ 0.6403, -0.2816, -0.6857, 0.7434]) tensor([0.3608, 0.1435, 0.0958, 0.3999]) -Greedy action tensor([ 0.2385, -0.0857, -0.5256, -0.0952]) tensor([0.3442, 0.2489, 0.1603, 0.2466]) -Greedy action tensor([ 1.0704, -0.2300, -0.2571, 0.0599]) tensor([0.5259, 0.1433, 0.1394, 0.1914]) -Greedy action tensor([ 1.2476, -0.5460, -0.3171, 0.0028]) tensor([0.6011, 0.1000, 0.1257, 0.1731]) -Greedy action tensor([ 0.8683, -0.5577, -0.0501, -0.0140]) tensor([0.4870, 0.1170, 0.1944, 0.2016]) -Greedy action tensor([ 0.8764, -0.5600, -0.2612, 0.1063]) tensor([0.4947, 0.1176, 0.1586, 0.2290]) -Greedy action tensor([ 1.5829, -0.7291, -0.3287, 0.3516]) tensor([0.6499, 0.0644, 0.0961, 0.1897]) -Greedy action tensor([ 0.4784, -0.1764, -0.0749, -0.0620]) tensor([0.3735, 0.1941, 0.2148, 0.2176]) -Greedy action tensor([ 1.0712, -0.3914, -0.1191, -0.1238]) tensor([0.5439, 0.1260, 0.1654, 0.1647]) -Greedy action tensor([ 0.9841, -0.0853, -0.1163, -0.2114]) tensor([0.5054, 0.1735, 0.1682, 0.1529]) -Greedy action tensor([ 0.7333, -0.4216, -0.1034, -0.0223]) tensor([0.4509, 0.1421, 0.1953, 0.2118]) -Greedy action tensor([ 0.7715, -0.4699, -0.4827, 0.1369]) tensor([0.4752, 0.1373, 0.1356, 0.2519]) -Greedy action tensor([ 0.9521, -0.3689, -0.3711, 0.5334]) tensor([0.4564, 0.1218, 0.1215, 0.3003]) -Greedy action tensor([ 0.9426, -0.2511, -0.2447, 0.1600]) tensor([0.4842, 0.1467, 0.1477, 0.2214]) -Greedy action tensor([ 1.0765, -0.6345, -0.4779, 1.0491]) tensor([0.4228, 0.0764, 0.0894, 0.4114]) -Greedy action tensor([ 0.6237, -0.2392, -0.1476, -0.1511]) tensor([0.4264, 0.1799, 0.1972, 0.1965]) -Greedy action tensor([ 0.8221, -0.1607, -0.6002, -0.2943]) tensor([0.5147, 0.1926, 0.1241, 0.1685]) -Greedy action tensor([ 1.1899, -0.4765, -0.2723, 0.1022]) tensor([0.5690, 0.1075, 0.1318, 0.1917]) -Greedy action tensor([ 1.4283, -0.2166, -0.3303, -0.0059]) tensor([0.6236, 0.1204, 0.1074, 0.1486]) -Greedy action tensor([ 1.0203, -0.1299, 0.0310, -0.1276]) tensor([0.4986, 0.1578, 0.1854, 0.1582]) -Greedy action tensor([ 1.1553, -0.4955, -0.5154, 0.6848]) tensor([0.4988, 0.0957, 0.0938, 0.3116]) -Greedy action tensor([ 0.9835, -0.3164, -0.2680, 0.1755]) tensor([0.4989, 0.1360, 0.1427, 0.2224]) -Greedy action tensor([ 0.9633, -0.1774, -0.1590, -0.0449]) tensor([0.4975, 0.1590, 0.1620, 0.1815]) -Greedy action tensor([ 1.0304, -0.5642, -0.3808, -0.1224]) tensor([0.5673, 0.1152, 0.1383, 0.1791]) -Greedy action tensor([ 1.3162, -0.7243, -0.3401, 0.6787]) tensor([0.5407, 0.0703, 0.1032, 0.2858]) -Greedy action tensor([ 1.3900, -0.7434, -0.4849, 0.1375]) tensor([0.6420, 0.0760, 0.0985, 0.1835]) -Greedy action tensor([ 0.9505, 0.0769, 0.0891, -0.0439]) tensor([0.4525, 0.1889, 0.1912, 0.1674]) -Greedy action tensor([ 1.2025, -0.5561, 0.1519, 0.1987]) tensor([0.5295, 0.0912, 0.1852, 0.1941]) -Greedy action tensor([ 1.4067, -0.6251, -0.2960, 0.0956]) tensor([0.6318, 0.0828, 0.1151, 0.1703]) -Greedy action tensor([ 1.2424, -0.3015, -0.0900, 0.0631]) tensor([0.5603, 0.1196, 0.1478, 0.1723]) -Greedy action tensor([ 1.0256, -0.1585, -0.4993, 0.4994]) tensor([0.4729, 0.1447, 0.1029, 0.2794]) -Greedy action tensor([ 0.7093, -0.2645, -0.5088, -0.2459]) tensor([0.4859, 0.1835, 0.1437, 0.1869]) -Greedy action tensor([ 0.7351, -0.2801, 0.1057, -0.0943]) tensor([0.4289, 0.1554, 0.2286, 0.1871]) -Greedy action tensor([ 0.7677, -0.5547, -0.1647, -0.1107]) tensor([0.4818, 0.1284, 0.1896, 0.2002]) -Greedy action tensor([ 1.0044, -0.6765, -0.2415, 0.2186]) tensor([0.5182, 0.0965, 0.1491, 0.2362]) -Greedy action tensor([ 0.9953, -0.4830, -0.2370, 0.1149]) tensor([0.5170, 0.1179, 0.1508, 0.2144]) -Greedy action tensor([ 1.0393, -0.1445, 0.0287, -0.1618]) tensor([0.5074, 0.1553, 0.1847, 0.1526]) -Greedy action tensor([ 0.7568, -0.3075, -0.3207, 0.4465]) tensor([0.4135, 0.1426, 0.1408, 0.3032]) -Greedy action tensor([ 1.4190, -0.4617, -0.2445, 0.3106]) tensor([0.5981, 0.0912, 0.1133, 0.1974]) -Greedy action tensor([ 0.9045, -0.3533, -0.2973, -0.2064]) tensor([0.5224, 0.1485, 0.1571, 0.1720]) -Greedy action tensor([ 0.9048, -0.3336, -0.2276, 0.1937]) tensor([0.4755, 0.1378, 0.1532, 0.2335]) -Greedy action tensor([ 1.0887, -0.6129, -0.2054, 0.4124]) tensor([0.5089, 0.0928, 0.1395, 0.2588]) -Greedy action tensor([ 1.2549, -0.4202, -0.1355, -0.1571]) tensor([0.5953, 0.1115, 0.1482, 0.1450]) -Greedy action tensor([ 1.0904, -0.3696, -0.1997, 0.1828]) tensor([0.5233, 0.1215, 0.1440, 0.2111]) -Greedy action tensor([ 0.7766, -0.5552, -0.6557, 0.1568]) tensor([0.4900, 0.1294, 0.1170, 0.2636]) -Greedy action tensor([ 1.1745, -0.6102, -0.3063, 0.1005]) tensor([0.5757, 0.0966, 0.1310, 0.1967]) -Greedy action tensor([ 1.1060, -0.2621, -0.4152, 0.2399]) tensor([0.5281, 0.1345, 0.1154, 0.2221]) -Greedy action tensor([ 1.2829, -0.5502, -0.4771, 0.4905]) tensor([0.5603, 0.0896, 0.0964, 0.2537]) -Greedy action tensor([ 0.6353, -0.5390, -0.1441, 0.1699]) tensor([0.4174, 0.1290, 0.1915, 0.2621]) -Greedy action tensor([ 1.0826, -0.2652, -0.1799, 0.0615]) tensor([0.5255, 0.1365, 0.1487, 0.1893]) -Greedy action tensor([ 1.3283, -0.1251, -0.1774, 0.0483]) tensor([0.5768, 0.1348, 0.1280, 0.1604]) -Greedy action tensor([ 0.4926, -0.2902, 0.1745, -0.0974]) tensor([0.3651, 0.1669, 0.2656, 0.2024]) -Greedy action tensor([ 0.9176, -0.3499, 0.1626, -0.0097]) tensor([0.4657, 0.1311, 0.2189, 0.1842]) -Greedy action tensor([ 1.2504, -0.6887, -0.3904, 0.1766]) tensor([0.5955, 0.0856, 0.1154, 0.2035]) -Greedy action tensor([ 0.8683, -0.0445, -0.0019, -0.2926]) tensor([0.4687, 0.1882, 0.1963, 0.1468]) -Greedy action tensor([ 1.1121, -0.6263, -0.0836, 0.1342]) tensor([0.5393, 0.0948, 0.1631, 0.2028]) -Greedy action tensor([ 0.9126, -0.4103, -0.3935, 0.1974]) tensor([0.4935, 0.1314, 0.1337, 0.2414]) -Greedy action tensor([ 1.1734, -0.4811, -0.1403, 0.0275]) tensor([0.5624, 0.1075, 0.1512, 0.1788]) -Greedy action tensor([ 0.8766, -0.4458, -0.1670, 0.0579]) tensor([0.4855, 0.1294, 0.1710, 0.2141]) -Greedy action tensor([ 1.2718, -0.6763, -0.5085, 1.0497]) tensor([0.4735, 0.0675, 0.0798, 0.3792]) -Greedy action tensor([ 1.3111, -0.7067, -0.1615, 0.2716]) tensor([0.5828, 0.0775, 0.1336, 0.2061]) -Greedy action tensor([ 1.0339, -0.2679, -0.3639, 0.1392]) tensor([0.5187, 0.1411, 0.1282, 0.2120]) -Greedy action tensor([ 0.8203, -0.0225, -0.3316, -0.3605]) tensor([0.4870, 0.2096, 0.1539, 0.1495]) -Greedy action tensor([ 1.2920, -0.5683, -0.2587, 0.1929]) tensor([0.5879, 0.0915, 0.1247, 0.1959]) -Greedy action tensor([ 0.4179, -0.0893, 0.1503, -0.0214]) tensor([0.3320, 0.1999, 0.2541, 0.2140]) -Greedy action tensor([ 1.9622, -0.4288, -0.5465, -0.0680]) tensor([0.7667, 0.0702, 0.0624, 0.1007]) -Greedy action tensor([ 1.0151, -0.3845, -0.1749, -0.1422]) tensor([0.5361, 0.1323, 0.1631, 0.1685]) -Greedy action tensor([ 1.0996, -0.4895, -0.1715, 0.0325]) tensor([0.5469, 0.1116, 0.1534, 0.1881]) -Greedy action tensor([ 0.4598, -0.3315, -0.0613, -0.1595]) tensor([0.3868, 0.1753, 0.2297, 0.2082]) -Greedy action tensor([ 0.8230, -0.2417, -0.3975, 0.2674]) tensor([0.4517, 0.1558, 0.1333, 0.2592]) -Greedy action tensor([ 0.4488, -0.1592, 0.1885, -0.1812]) tensor([0.3512, 0.1912, 0.2707, 0.1870]) -Greedy action tensor([ 1.2600, -0.8873, 0.9290, 1.7755]) tensor([0.2849, 0.0333, 0.2046, 0.4771]) -Greedy action tensor([ 1.4353, -0.7791, 0.8339, 0.9462]) tensor([0.4404, 0.0481, 0.2414, 0.2701]) -Greedy action tensor([ 1.5700, -1.5846, 0.3857, 0.8458]) tensor([0.5455, 0.0233, 0.1669, 0.2644]) -Greedy action tensor([ 0.8424, -0.4653, 0.2693, 1.6666]) tensor([0.2430, 0.0657, 0.1370, 0.5542]) -Greedy action tensor([ 1.5901, -0.7547, 1.1208, 1.4913]) tensor([0.3806, 0.0365, 0.2381, 0.3448]) -Greedy action tensor([ 1.0698, 0.1725, -0.8414, 1.6796]) tensor([0.2945, 0.1201, 0.0436, 0.5419]) -Greedy action tensor([ 1.4990, -0.5001, 1.7621, 1.3579]) tensor([0.3026, 0.0410, 0.3937, 0.2628]) -Greedy action tensor([ 0.4789, -0.6846, -0.3708, 1.9704]) tensor([0.1617, 0.0505, 0.0691, 0.7186]) -Greedy action tensor([ 0.6424, -0.7734, 0.6413, 0.7245]) tensor([0.3005, 0.0730, 0.3002, 0.3263]) -Greedy action tensor([ 0.7122, -1.0505, -0.3228, 0.1783]) tensor([0.4732, 0.0812, 0.1681, 0.2775]) -Greedy action tensor([0.5128, 0.4784, 1.2677, 0.1472]) tensor([0.2089, 0.2018, 0.4444, 0.1449]) -Greedy action tensor([ 0.4538, -0.4824, 0.9834, -0.1600]) tensor([0.2754, 0.1080, 0.4676, 0.1491]) -Greedy action tensor([1.5432, 0.1269, 0.2241, 0.6115]) tensor([0.5252, 0.1274, 0.1404, 0.2069]) -Greedy action tensor([ 0.2730, 0.2125, -0.2884, 0.8609]) tensor([0.2319, 0.2183, 0.1323, 0.4175]) -Greedy action tensor([ 0.9501, -0.5872, 0.3807, 0.4075]) tensor([0.4234, 0.0910, 0.2396, 0.2461]) -Greedy action tensor([ 1.1825, -1.7097, 1.4109, 0.6151]) tensor([0.3473, 0.0193, 0.4365, 0.1969]) -Greedy action tensor([-0.2797, -0.0652, -0.8109, 1.6126]) tensor([0.1057, 0.1310, 0.0621, 0.7012]) -Greedy action tensor([ 1.7036, -1.6485, 0.3668, 1.7071]) tensor([0.4346, 0.0152, 0.1142, 0.4361]) -Greedy action tensor([ 0.5254, -0.0624, 0.8818, -0.2978]) tensor([0.2922, 0.1623, 0.4173, 0.1283]) -Greedy action tensor([ 0.6734, -1.0737, 0.7816, 0.3687]) tensor([0.3305, 0.0576, 0.3682, 0.2437]) -Greedy action tensor([ 0.4371, -0.4656, -0.5928, 0.5444]) tensor([0.3477, 0.1410, 0.1242, 0.3871]) -Greedy action tensor([ 0.6334, 0.6135, -0.1261, 0.6173]) tensor([0.2914, 0.2856, 0.1363, 0.2867]) -Greedy action tensor([ 1.6040, -1.0720, 0.0029, 1.1334]) tensor([0.5277, 0.0363, 0.1064, 0.3296]) -Greedy action tensor([ 0.7886, -0.2722, -0.0626, 0.9547]) tensor([0.3386, 0.1172, 0.1445, 0.3997]) -Greedy action tensor([ 1.1779, -0.7551, 1.3626, 1.0011]) tensor([0.3139, 0.0454, 0.3776, 0.2630]) -Greedy action tensor([ 1.6935, -0.9753, 0.2238, 0.6120]) tensor([0.6103, 0.0423, 0.1404, 0.2070]) -Greedy action tensor([ 1.6514, -0.3472, 1.5526, 0.5709]) tensor([0.4200, 0.0569, 0.3805, 0.1426]) -Greedy action tensor([ 1.1667, -0.4399, 0.7917, 0.3658]) tensor([0.4279, 0.0858, 0.2941, 0.1921]) -Greedy action tensor([ 0.5982, 0.2624, -0.0992, 2.3413]) tensor([0.1261, 0.0902, 0.0628, 0.7209]) -Greedy action tensor([ 0.7106, -1.9712, -0.2640, 0.5605]) tensor([0.4336, 0.0297, 0.1636, 0.3731]) -Greedy action tensor([ 0.7557, 0.4556, -0.1960, 0.4177]) tensor([0.3521, 0.2608, 0.1359, 0.2511]) -Greedy action tensor([ 0.3731, 0.1807, -0.4708, 1.5391]) tensor([0.1830, 0.1510, 0.0787, 0.5873]) -Greedy action tensor([ 0.6740, -0.4995, -0.4899, 1.0381]) tensor([0.3267, 0.1011, 0.1020, 0.4702]) -Greedy action tensor([ 0.6499, -0.2563, -0.3325, 0.5033]) tensor([0.3785, 0.1529, 0.1417, 0.3269]) -Greedy action tensor([ 0.4467, 0.2609, -0.6036, 0.9870]) tensor([0.2566, 0.2131, 0.0898, 0.4405]) -Greedy action tensor([ 1.0250, -0.0545, 0.6840, 0.8759]) tensor([0.3434, 0.1167, 0.2442, 0.2958]) -Greedy action tensor([ 0.5520, 0.9155, -0.2841, 1.1403]) tensor([0.2140, 0.3078, 0.0928, 0.3854]) -Greedy action tensor([ 1.3646, 0.4762, -0.2400, 0.5149]) tensor([0.4902, 0.2016, 0.0985, 0.2096]) -Greedy action tensor([ 1.3521, -1.6482, -0.4011, 0.9257]) tensor([0.5331, 0.0265, 0.0923, 0.3480]) -Greedy action tensor([-0.2810, 0.0500, 1.4955, -0.3072]) tensor([0.1078, 0.1501, 0.6370, 0.1050]) -Greedy action tensor([ 1.3297, 0.5447, -0.8040, 0.7357]) tensor([0.4702, 0.2145, 0.0557, 0.2596]) -Greedy action tensor([ 1.7549, -0.5157, -0.2163, 1.5729]) tensor([0.4817, 0.0497, 0.0671, 0.4015]) -Greedy action tensor([0.6287, 0.5691, 0.0679, 0.2190]) tensor([0.3148, 0.2966, 0.1797, 0.2090]) -Greedy action tensor([ 1.1661, -2.0858, 0.4942, 0.6621]) tensor([0.4643, 0.0180, 0.2372, 0.2805]) -Greedy action tensor([ 0.2953, 0.7586, 0.5322, -0.2120]) tensor([0.2243, 0.3565, 0.2842, 0.1350]) -Greedy action tensor([ 0.0952, -0.9928, -0.0193, 0.2018]) tensor([0.2993, 0.1008, 0.2669, 0.3330]) -Greedy action tensor([ 0.9278, -1.0802, 0.7404, 1.5056]) tensor([0.2670, 0.0358, 0.2214, 0.4758]) -Greedy action tensor([1.6287, 1.1391, 0.8208, 1.1653]) tensor([0.3720, 0.2280, 0.1659, 0.2341]) -Greedy action tensor([ 0.4771, -2.3376, -0.2309, 0.5834]) tensor([0.3753, 0.0225, 0.1849, 0.4174]) -Greedy action tensor([ 1.1564, -0.0887, 0.5770, 1.0871]) tensor([0.3596, 0.1035, 0.2014, 0.3355]) -Greedy action tensor([ 0.6577, 0.1919, -0.1246, 0.0580]) tensor([0.3797, 0.2383, 0.1736, 0.2084]) -Greedy action tensor([ 0.2371, -0.1969, 0.9241, 1.7819]) tensor([0.1202, 0.0779, 0.2388, 0.5632]) -Greedy action tensor([-0.5972, -1.7843, -0.6199, 1.9945]) tensor([0.0640, 0.0195, 0.0625, 0.8540]) -Greedy action tensor([ 0.7977, 0.1893, -0.9635, 0.8945]) tensor([0.3549, 0.1931, 0.0610, 0.3910]) -Greedy action tensor([ 1.2300, 0.4315, -0.6918, 0.8975]) tensor([0.4322, 0.1945, 0.0633, 0.3100]) -Greedy action tensor([ 0.3016, 0.0838, -0.7168, 0.8305]) tensor([0.2589, 0.2082, 0.0935, 0.4394]) -Greedy action tensor([ 0.1878, -0.1307, -0.2619, -0.1118]) tensor([0.3219, 0.2341, 0.2053, 0.2386]) -Greedy action tensor([ 0.1351, -0.2157, -0.5263, 0.2652]) tensor([0.2977, 0.2096, 0.1536, 0.3391]) -Greedy action tensor([ 1.0885, 0.6123, -0.3641, 0.1465]) tensor([0.4454, 0.2767, 0.1042, 0.1737]) -Greedy action tensor([ 0.9955, -0.4485, -0.0739, 0.2455]) tensor([0.4874, 0.1150, 0.1673, 0.2302]) -Greedy action tensor([ 0.6012, -0.2061, -0.5801, 0.9719]) tensor([0.3123, 0.1393, 0.0958, 0.4525]) -Greedy action tensor([ 1.9094, -0.0026, -0.4638, 0.5864]) tensor([0.6634, 0.0980, 0.0618, 0.1767]) -Greedy action tensor([ 0.9904, 0.6334, -0.3504, 0.5410]) tensor([0.3847, 0.2692, 0.1007, 0.2454]) -Greedy action tensor([ 0.1611, -0.5618, -0.5552, 1.0953]) tensor([0.2213, 0.1074, 0.1081, 0.5632]) -Greedy action tensor([ 0.9375, 0.3021, -0.4312, 0.6611]) tensor([0.3933, 0.2083, 0.1001, 0.2983]) -Greedy action tensor([ 1.2093, -0.5066, 1.7586, 0.9407]) tensor([0.2720, 0.0489, 0.4712, 0.2079]) -Greedy action tensor([ 0.8278, 0.8644, -0.7045, 0.8379]) tensor([0.3064, 0.3178, 0.0662, 0.3095]) -Greedy action tensor([-0.0355, -1.2280, -0.3037, 1.6754]) tensor([0.1315, 0.0399, 0.1006, 0.7279]) -Greedy action tensor([-0.1233, -0.3110, -0.8998, 0.4198]) tensor([0.2494, 0.2067, 0.1147, 0.4292]) -Greedy action tensor([ 0.4480, -1.5241, -0.2603, 0.3509]) tensor([0.3938, 0.0548, 0.1940, 0.3574]) -Greedy action tensor([ 0.3893, 0.1239, -0.0180, -0.5954]) tensor([0.3564, 0.2733, 0.2372, 0.1331]) -Greedy action tensor([0.6229, 0.8470, 0.1602, 1.2705]) tensor([0.2087, 0.2611, 0.1314, 0.3988]) -Greedy action tensor([ 0.9911, 0.4480, -0.7014, 0.5587]) tensor([0.4143, 0.2406, 0.0762, 0.2688]) -Greedy action tensor([0.5738, 0.3763, 0.0931, 1.1705]) tensor([0.2350, 0.1929, 0.1453, 0.4268]) -Greedy action tensor([ 0.8616, 0.1724, 1.0449, -0.1887]) tensor([0.3275, 0.1644, 0.3935, 0.1146]) -Greedy action tensor([ 1.4134, -0.5711, 1.1212, 0.1534]) tensor([0.4613, 0.0634, 0.3444, 0.1309]) -Greedy action tensor([-0.3685, 0.7594, 0.6562, -0.9037]) tensor([0.1340, 0.4140, 0.3734, 0.0785]) -Greedy action tensor([ 0.4842, -0.2609, -0.4391, 0.7214]) tensor([0.3185, 0.1512, 0.1265, 0.4038]) -Greedy action tensor([ 0.6011, -0.0955, -0.3539, 0.8521]) tensor([0.3156, 0.1573, 0.1214, 0.4057]) -Greedy action tensor([0.0898, 0.0432, 0.5954, 1.2076]) tensor([0.1499, 0.1431, 0.2485, 0.4585]) -Greedy action tensor([ 0.3329, -1.1187, -0.0981, 0.8854]) tensor([0.2761, 0.0647, 0.1794, 0.4798]) -Greedy action tensor([ 0.9869, 0.2066, -0.2128, 0.3508]) tensor([0.4369, 0.2002, 0.1316, 0.2313]) -Greedy action tensor([-1.8821, -0.4992, 0.8563, 0.2672]) tensor([0.0345, 0.1373, 0.5327, 0.2956]) -Greedy action tensor([-0.7452, -0.5959, 1.0424, 1.6419]) tensor([0.0526, 0.0611, 0.3142, 0.5722]) -Greedy action tensor([-1.8594, -0.8281, 0.1766, -0.3864]) tensor([0.0632, 0.1772, 0.4840, 0.2756]) -Greedy action tensor([-0.1501, 0.4263, 0.6434, 1.5750]) tensor([0.0943, 0.1678, 0.2085, 0.5293]) -Greedy action tensor([-1.2920, -0.5410, 0.4387, -0.0368]) tensor([0.0815, 0.1727, 0.4600, 0.2859]) -Greedy action tensor([-1.3714, -0.1738, 1.1193, 1.0114]) tensor([0.0367, 0.1217, 0.4435, 0.3981]) -Greedy action tensor([-1.8268, -0.6657, 0.4603, -0.2418]) tensor([0.0529, 0.1688, 0.5204, 0.2579]) -Greedy action tensor([-0.4278, -0.3537, 1.1478, 1.6293]) tensor([0.0679, 0.0731, 0.3281, 0.5310]) -Greedy action tensor([-1.6970, -0.4436, 0.7601, 0.3868]) tensor([0.0413, 0.1447, 0.4821, 0.3319]) -Greedy action tensor([-1.6232, -0.4873, 0.6970, -0.4569]) tensor([0.0571, 0.1779, 0.5815, 0.1834]) -Greedy action tensor([-1.5326, -0.4347, 1.0975, 1.0324]) tensor([0.0324, 0.0971, 0.4494, 0.4211]) -Greedy action tensor([-1.8761, -0.4604, 0.6301, -0.1416]) tensor([0.0434, 0.1788, 0.5320, 0.2459]) -Greedy action tensor([-1.7810, -0.8886, 0.5309, -0.1339]) tensor([0.0534, 0.1303, 0.5390, 0.2772]) -Greedy action tensor([-1.1643, -0.5083, 0.7474, 1.2015]) tensor([0.0492, 0.0947, 0.3325, 0.5236]) -Greedy action tensor([-2.0196, -0.7534, 1.2397, 0.5709]) tensor([0.0228, 0.0808, 0.5928, 0.3037]) -Greedy action tensor([-1.7838, -0.7191, 0.0502, -0.3266]) tensor([0.0692, 0.2006, 0.4331, 0.2971]) -Greedy action tensor([-1.6886, 0.3779, 0.4545, 0.1360]) tensor([0.0423, 0.3343, 0.3609, 0.2625]) -Greedy action tensor([-1.9515, -0.4243, 0.9783, 0.4082]) tensor([0.0286, 0.1319, 0.5362, 0.3032]) -Greedy action tensor([-1.2552, -0.5508, 0.3149, 0.2182]) tensor([0.0820, 0.1659, 0.3942, 0.3579]) -Greedy action tensor([-0.3794, -0.3804, 0.1826, 0.1580]) tensor([0.1830, 0.1828, 0.3210, 0.3132]) -Greedy action tensor([-1.8953, -0.4603, 0.6459, -0.1473]) tensor([0.0423, 0.1777, 0.5371, 0.2430]) -Greedy action tensor([-1.5720, -0.5387, 0.5324, 0.2461]) tensor([0.0550, 0.1546, 0.4513, 0.3390]) -Greedy action tensor([-1.9655, -0.5364, 1.2806, 0.5958]) tensor([0.0228, 0.0953, 0.5863, 0.2956]) -Greedy action tensor([-1.7867, -0.0717, 0.5363, -0.1327]) tensor([0.0455, 0.2527, 0.4641, 0.2377]) -Greedy action tensor([-1.0772, -0.5647, 0.4738, 0.9239]) tensor([0.0676, 0.1129, 0.3190, 0.5004]) -Greedy action tensor([-0.8051, 0.0817, 0.8404, 1.3038]) tensor([0.0593, 0.1441, 0.3076, 0.4890]) -Greedy action tensor([-0.4292, -0.3199, 1.1430, 1.5899]) tensor([0.0691, 0.0771, 0.3330, 0.5207]) -Greedy action tensor([-1.3245, -0.5339, 0.2595, 0.4918]) tensor([0.0703, 0.1550, 0.3426, 0.4322]) -Greedy action tensor([-1.1978, 0.5609, 0.0078, 0.0915]) tensor([0.0726, 0.4214, 0.2424, 0.2636]) -Greedy action tensor([-1.8051, -0.4673, 0.6176, -0.0651]) tensor([0.0459, 0.1749, 0.5176, 0.2615]) -Greedy action tensor([-1.0288, -0.6055, 0.3759, 0.3981]) tensor([0.0929, 0.1418, 0.3784, 0.3869]) -Greedy action tensor([-1.2019, -0.5563, 0.4035, 0.1908]) tensor([0.0839, 0.1601, 0.4180, 0.3379]) -Greedy action tensor([-1.1944, -0.6431, 0.6069, 0.4913]) tensor([0.0705, 0.1223, 0.4269, 0.3803]) -Greedy action tensor([-1.2660, -0.5954, 0.5169, 0.6454]) tensor([0.0638, 0.1248, 0.3796, 0.4317]) -Greedy action tensor([-1.8521, -0.7200, 0.7018, -0.0819]) tensor([0.0438, 0.1359, 0.5631, 0.2572]) -Greedy action tensor([-1.6574, -0.9980, 0.2523, -0.6045]) tensor([0.0797, 0.1541, 0.5379, 0.2283]) -Greedy action tensor([-1.0982, -0.4486, 0.5128, 0.8924]) tensor([0.0656, 0.1256, 0.3285, 0.4802]) -Greedy action tensor([-1.8736, -0.4608, 0.6500, -0.0948]) tensor([0.0425, 0.1748, 0.5307, 0.2520]) -Greedy action tensor([-1.3337, -0.5035, 0.5337, -0.2818]) tensor([0.0792, 0.1816, 0.5125, 0.2267]) -Greedy action tensor([-1.7257, 0.9847, 0.5384, 0.2927]) tensor([0.0301, 0.4531, 0.2900, 0.2268]) -Greedy action tensor([-1.6452, -0.3387, 0.7488, 0.5099]) tensor([0.0412, 0.1521, 0.4513, 0.3554]) -Greedy action tensor([-1.0134, -0.2755, 1.0132, 1.2659]) tensor([0.0489, 0.1023, 0.3711, 0.4777]) -Greedy action tensor([-2.0360, -0.9033, 0.5255, -0.1521]) tensor([0.0423, 0.1313, 0.5480, 0.2783]) -Greedy action tensor([-1.3756, -0.5994, 0.3697, 0.1352]) tensor([0.0745, 0.1618, 0.4264, 0.3373]) -Greedy action tensor([-1.6743, -0.5098, 0.5198, 0.0410]) tensor([0.0534, 0.1710, 0.4789, 0.2967]) -Greedy action tensor([-0.5132, -0.3429, 0.2089, 0.4401]) tensor([0.1462, 0.1734, 0.3011, 0.3793]) -Greedy action tensor([-1.5797, -0.2037, 0.5879, 0.3063]) tensor([0.0493, 0.1951, 0.4306, 0.3250]) -Greedy action tensor([-1.8556, -0.4501, 0.6190, -0.1337]) tensor([0.0443, 0.1808, 0.5267, 0.2481]) -Greedy action tensor([-1.3456, -0.2312, 1.1260, 1.0596]) tensor([0.0371, 0.1130, 0.4390, 0.4109]) -Greedy action tensor([-1.5099, -0.4642, 0.5711, 0.4669]) tensor([0.0524, 0.1491, 0.4200, 0.3784]) -Greedy action tensor([-1.7162, -0.5786, 1.4198, 1.0162]) tensor([0.0235, 0.0734, 0.5414, 0.3616]) -Greedy action tensor([-1.8558, -0.6466, 0.8741, 0.2715]) tensor([0.0356, 0.1194, 0.5461, 0.2989]) -Greedy action tensor([-0.3265, 1.3137, 0.4040, 0.5940]) tensor([0.0931, 0.4800, 0.1933, 0.2337]) -Greedy action tensor([-0.7567, -0.6507, 0.5400, -0.1596]) tensor([0.1318, 0.1466, 0.4821, 0.2395]) -Greedy action tensor([-0.9828, -0.5879, 0.3124, 0.3766]) tensor([0.0997, 0.1480, 0.3641, 0.3882]) -Greedy action tensor([-1.7635, -0.4836, 0.4177, -0.0482]) tensor([0.0526, 0.1892, 0.4659, 0.2924]) -Greedy action tensor([-1.2446, -0.5405, 0.4437, 0.7158]) tensor([0.0644, 0.1302, 0.3483, 0.4572]) -Greedy action tensor([-1.4175, -0.7090, 0.4658, 0.2474]) tensor([0.0672, 0.1364, 0.4416, 0.3549]) -Greedy action tensor([-1.2494, -0.4055, 1.2527, 1.2118]) tensor([0.0367, 0.0853, 0.4480, 0.4300]) -Greedy action tensor([-1.8267, -0.2646, 0.0880, -0.4134]) tensor([0.0600, 0.2862, 0.4072, 0.2466]) -Greedy action tensor([-1.9260, -0.4508, 0.6559, -0.1718]) tensor([0.0410, 0.1794, 0.5425, 0.2371]) -Greedy action tensor([-1.1495, -0.5623, 0.4487, 0.7185]) tensor([0.0703, 0.1265, 0.3477, 0.4554]) -Greedy action tensor([-1.5679, -0.4504, 0.5936, 0.2574]) tensor([0.0528, 0.1614, 0.4584, 0.3275]) -Greedy action tensor([-1.1668, -0.5973, 0.3169, 0.1664]) tensor([0.0912, 0.1611, 0.4019, 0.3458]) -Greedy action tensor([-1.6742, 0.9928, 0.7326, 0.1039]) tensor([0.0309, 0.4442, 0.3424, 0.1826]) -Greedy action tensor([-1.8828, -0.9721, 0.1350, -0.4957]) tensor([0.0666, 0.1656, 0.5011, 0.2667]) -Greedy action tensor([-1.8615, -0.4749, 0.6501, -0.1100]) tensor([0.0433, 0.1733, 0.5338, 0.2496]) -Greedy action tensor([-0.3112, -0.1216, 0.7082, 1.4771]) tensor([0.0912, 0.1103, 0.2529, 0.5456]) -Greedy action tensor([-1.9353, -0.7715, 1.3066, 0.7686]) tensor([0.0224, 0.0716, 0.5720, 0.3340]) -Greedy action tensor([-1.5669, -0.3542, 0.4764, 0.1082]) tensor([0.0574, 0.1930, 0.4430, 0.3065]) -Greedy action tensor([-1.9691, -0.5639, 0.9589, 0.0912]) tensor([0.0316, 0.1289, 0.5912, 0.2483]) -Greedy action tensor([-1.5366, -0.4047, 0.6090, 0.4784]) tensor([0.0496, 0.1539, 0.4242, 0.3723]) -Greedy action tensor([-1.4387, 0.1878, 0.3479, 0.0255]) tensor([0.0611, 0.3105, 0.3644, 0.2640]) -Greedy action tensor([-1.1882, -0.4657, 0.9725, 1.2534]) tensor([0.0431, 0.0887, 0.3736, 0.4947]) -Greedy action tensor([-0.7320, -0.7242, 0.3778, 0.2524]) tensor([0.1296, 0.1306, 0.3931, 0.3468]) -Greedy action tensor([-0.2641, -0.4577, 0.3032, 0.3773]) tensor([0.1823, 0.1502, 0.3214, 0.3461]) -Greedy action tensor([-1.3148, -0.7041, 0.6347, 0.1565]) tensor([0.0703, 0.1295, 0.4940, 0.3062]) -Greedy action tensor([-1.2107, -0.4929, 0.4268, 0.7814]) tensor([0.0644, 0.1321, 0.3313, 0.4723]) -Greedy action tensor([-1.2964, -0.6362, -0.2019, -0.2614]) tensor([0.1144, 0.2215, 0.3419, 0.3222]) -Greedy action tensor([-1.7481, -0.6423, 0.0091, -0.4185]) tensor([0.0735, 0.2222, 0.4263, 0.2780]) -Greedy action tensor([-1.7631, -0.4602, 0.5827, -0.0436]) tensor([0.0483, 0.1778, 0.5043, 0.2696]) -Greedy action tensor([ 0.7061, -0.3848, -0.1196, -0.4508]) tensor([0.4789, 0.1609, 0.2097, 0.1506]) -Greedy action tensor([ 0.4449, 0.0073, -0.0401, -0.2053]) tensor([0.3593, 0.2320, 0.2212, 0.1875]) -Greedy action tensor([ 0.3741, 0.1214, -0.0687, -0.0997]) tensor([0.3288, 0.2554, 0.2112, 0.2047]) -Greedy action tensor([ 1.1215, -0.8765, -0.1145, -0.5298]) tensor([0.6181, 0.0838, 0.1796, 0.1185]) -Greedy action tensor([ 0.7940, -0.4089, -0.1520, -0.6146]) tensor([0.5173, 0.1554, 0.2009, 0.1265]) -Greedy action tensor([ 0.5731, -0.2162, -0.0441, -0.4448]) tensor([0.4246, 0.1928, 0.2291, 0.1534]) -Greedy action tensor([ 0.5059, -0.2555, 0.0390, -0.2877]) tensor([0.3928, 0.1834, 0.2462, 0.1776]) -Greedy action tensor([ 0.8831, -0.3078, -0.1549, -0.5245]) tensor([0.5255, 0.1597, 0.1861, 0.1286]) -Greedy action tensor([ 0.3379, 0.0009, 0.0562, -0.0774]) tensor([0.3196, 0.2282, 0.2412, 0.2110]) -Greedy action tensor([ 0.7528, -0.4501, -0.0471, -0.4431]) tensor([0.4873, 0.1463, 0.2190, 0.1474]) -Greedy action tensor([ 0.8073, -0.1771, 0.1065, -0.5154]) tensor([0.4681, 0.1749, 0.2323, 0.1247]) -Greedy action tensor([ 0.5587, -0.3673, -0.0781, -0.3681]) tensor([0.4308, 0.1707, 0.2279, 0.1706]) -Greedy action tensor([ 0.4957, -0.2035, 0.0335, -0.3040]) tensor([0.3881, 0.1929, 0.2445, 0.1745]) -Greedy action tensor([ 0.6309, -0.4984, -0.0398, -0.2895]) tensor([0.4478, 0.1448, 0.2290, 0.1784]) -Greedy action tensor([ 0.8271, -0.5026, 0.0888, -0.4697]) tensor([0.4961, 0.1312, 0.2371, 0.1356]) -Greedy action tensor([ 0.5653, 0.0176, -0.0310, -0.3556]) tensor([0.3957, 0.2288, 0.2180, 0.1575]) -Greedy action tensor([ 0.6816, -0.1362, 0.2750, -0.3293]) tensor([0.4047, 0.1786, 0.2695, 0.1472]) -Greedy action tensor([ 0.5041, -0.0154, 0.1102, -0.1922]) tensor([0.3613, 0.2149, 0.2437, 0.1801]) -Greedy action tensor([ 0.4366, 0.1899, 0.0736, -0.2489]) tensor([0.3355, 0.2621, 0.2334, 0.1690]) -Greedy action tensor([ 0.5295, -0.1088, -0.1167, -0.1678]) tensor([0.3921, 0.2071, 0.2055, 0.1953]) -Greedy action tensor([ 0.4625, -0.0582, -0.0059, -0.1182]) tensor([0.3598, 0.2137, 0.2252, 0.2013]) -Greedy action tensor([ 0.2565, -0.1328, -0.1239, -0.5001]) tensor([0.3533, 0.2394, 0.2415, 0.1658]) -Greedy action tensor([ 0.4900, -0.0576, 0.0387, -0.5203]) tensor([0.3877, 0.2242, 0.2469, 0.1412]) -Greedy action tensor([ 0.3571, 0.0605, -0.0128, -0.3078]) tensor([0.3392, 0.2521, 0.2343, 0.1744]) -Greedy action tensor([ 0.7708, -0.3307, 0.0055, -0.3231]) tensor([0.4689, 0.1559, 0.2182, 0.1571]) -Greedy action tensor([ 0.9893, -0.5365, -0.1217, -0.4924]) tensor([0.5637, 0.1226, 0.1856, 0.1281]) -Greedy action tensor([ 0.5820, -0.3318, -0.1300, -0.5305]) tensor([0.4504, 0.1806, 0.2210, 0.1480]) -Greedy action tensor([ 0.5103, -0.1487, -0.0207, -0.3082]) tensor([0.3927, 0.2032, 0.2309, 0.1732]) -Greedy action tensor([ 8.3275e-01, -3.7307e-01, 7.5400e-05, -5.3917e-01]) tensor([0.5030, 0.1506, 0.2188, 0.1276]) -Greedy action tensor([ 0.6968, -0.2168, -0.0181, -0.2754]) tensor([0.4408, 0.1768, 0.2157, 0.1667]) -Greedy action tensor([ 0.5395, -0.3014, -0.0683, -0.3528]) tensor([0.4192, 0.1808, 0.2283, 0.1717]) -Greedy action tensor([ 0.7127, -0.4259, 0.0751, -0.4869]) tensor([0.4651, 0.1490, 0.2458, 0.1401]) -Greedy action tensor([ 0.4562, -0.1044, 0.2241, -0.3680]) tensor([0.3569, 0.2037, 0.2829, 0.1565]) -Greedy action tensor([ 0.8722, 0.2395, -0.1267, -0.2875]) tensor([0.4519, 0.2400, 0.1664, 0.1417]) -Greedy action tensor([ 0.9890, -0.5013, 0.1111, -0.7909]) tensor([0.5526, 0.1245, 0.2297, 0.0932]) -Greedy action tensor([ 0.5142, -0.0461, -0.0323, -0.1708]) tensor([0.3768, 0.2152, 0.2181, 0.1899]) -Greedy action tensor([ 0.3493, 0.1778, -0.0451, 0.0193]) tensor([0.3091, 0.2604, 0.2084, 0.2222]) -Greedy action tensor([ 0.4494, -0.1748, -0.0802, -0.3858]) tensor([0.3909, 0.2094, 0.2302, 0.1696]) -Greedy action tensor([ 0.2840, 0.0320, 0.0179, -0.1436]) tensor([0.3129, 0.2432, 0.2398, 0.2041]) -Greedy action tensor([ 0.1764, 0.2429, 0.0624, -0.3944]) tensor([0.2836, 0.3031, 0.2530, 0.1602]) -Greedy action tensor([ 0.6044, -0.1490, -0.0436, -0.1744]) tensor([0.4077, 0.1919, 0.2133, 0.1871]) -Greedy action tensor([ 0.0825, -0.1239, 0.0076, -0.2792]) tensor([0.2909, 0.2366, 0.2699, 0.2026]) -Greedy action tensor([ 0.6333, -0.5300, 0.1183, -0.3248]) tensor([0.4360, 0.1362, 0.2605, 0.1673]) -Greedy action tensor([ 0.2863, -0.1473, 0.0653, -0.1587]) tensor([0.3236, 0.2097, 0.2594, 0.2073]) -Greedy action tensor([ 0.4864, 0.1875, 0.0244, -0.4276]) tensor([0.3607, 0.2675, 0.2272, 0.1446]) -Greedy action tensor([ 0.5430, -0.0164, -0.0438, -0.2144]) tensor([0.3851, 0.2201, 0.2142, 0.1806]) -Greedy action tensor([ 0.3846, 0.2424, -0.0287, -0.0801]) tensor([0.3167, 0.2747, 0.2095, 0.1990]) -Greedy action tensor([ 0.4134, 0.0348, 0.0122, -0.2611]) tensor([0.3492, 0.2391, 0.2338, 0.1779]) -Greedy action tensor([ 0.3383, 0.0928, 0.0419, -0.2599]) tensor([0.3251, 0.2544, 0.2417, 0.1788]) -Greedy action tensor([ 0.5708, -0.1584, 0.1713, -0.3008]) tensor([0.3889, 0.1876, 0.2608, 0.1627]) -Greedy action tensor([ 0.9349, -0.4927, 0.0172, -0.4572]) tensor([0.5297, 0.1271, 0.2116, 0.1317]) -Greedy action tensor([ 0.6167, -0.3658, -0.0835, -0.4727]) tensor([0.4530, 0.1696, 0.2249, 0.1524]) -Greedy action tensor([ 0.1283, 0.2105, 0.0447, -0.2411]) tensor([0.2705, 0.2937, 0.2488, 0.1870]) -Greedy action tensor([ 0.7210, -0.6309, -0.0671, -0.7038]) tensor([0.5118, 0.1324, 0.2327, 0.1231]) -Greedy action tensor([ 0.9978, -0.6657, 0.0524, -0.4180]) tensor([0.5492, 0.1041, 0.2134, 0.1333]) -Greedy action tensor([ 1.0776, -0.4826, 0.0380, -0.4541]) tensor([0.5619, 0.1180, 0.1987, 0.1215]) -Greedy action tensor([ 0.5715, -0.3668, 0.1172, -0.5353]) tensor([0.4243, 0.1660, 0.2694, 0.1403]) -Greedy action tensor([ 0.6377, -0.4593, -0.0295, -0.4569]) tensor([0.4584, 0.1530, 0.2352, 0.1534]) -Greedy action tensor([ 0.5770, 0.1539, -0.1558, -0.1261]) tensor([0.3801, 0.2490, 0.1827, 0.1882]) -Greedy action tensor([ 0.6534, -0.3469, -0.0766, -0.3744]) tensor([0.4530, 0.1666, 0.2183, 0.1621]) -Greedy action tensor([ 0.4089, -0.1734, -0.0398, -0.3764]) tensor([0.3769, 0.2106, 0.2406, 0.1719]) -Greedy action tensor([ 0.5502, -0.2923, -0.1394, -0.2425]) tensor([0.4193, 0.1806, 0.2104, 0.1898]) -Greedy action tensor([ 0.2215, 0.1799, 0.0259, -0.1078]) tensor([0.2856, 0.2740, 0.2349, 0.2055]) -Greedy action tensor([ 0.4948, -0.1603, -0.0245, -0.4363]) tensor([0.3987, 0.2070, 0.2372, 0.1571]) -Greedy action tensor([ 0.0944, -0.0173, -0.0175, -0.0647]) tensor([0.2746, 0.2456, 0.2455, 0.2342]) -Greedy action tensor([ 0.6920, 0.1674, -0.0716, 0.0247]) tensor([0.3890, 0.2302, 0.1813, 0.1996]) -Greedy action tensor([ 0.6169, -0.4673, -0.1205, -0.5827]) tensor([0.4722, 0.1597, 0.2259, 0.1423]) -Greedy action tensor([ 0.5776, -0.4310, -0.0674, -0.5408]) tensor([0.4512, 0.1646, 0.2368, 0.1475]) -Greedy action tensor([ 0.8286, -0.6584, -0.0796, -0.4534]) tensor([0.5244, 0.1186, 0.2115, 0.1455]) -Greedy action tensor([ 0.8788, -0.4888, -0.0729, -0.4245]) tensor([0.5229, 0.1332, 0.2019, 0.1420]) -Greedy action tensor([-0.0219, 0.1724, -0.0170, -0.3025]) tensor([0.2516, 0.3056, 0.2528, 0.1900]) -Greedy action tensor([ 0.9179, -0.4837, 0.0278, -0.7308]) tensor([0.5408, 0.1331, 0.2221, 0.1040]) -Greedy action tensor([ 0.3652, 0.2250, -0.1106, -0.3303]) tensor([0.3345, 0.2907, 0.2079, 0.1669]) -Greedy action tensor([ 0.7384, -0.5865, -0.0845, -0.3768]) tensor([0.4919, 0.1308, 0.2160, 0.1613]) -Greedy action tensor([ 0.4925, -0.0623, -0.0137, -0.2812]) tensor([0.3790, 0.2176, 0.2285, 0.1748]) -Greedy action tensor([ 0.5873, -0.1854, 0.0015, -0.3166]) tensor([0.4126, 0.1905, 0.2297, 0.1671]) -Greedy action tensor([ 0.5371, -0.2549, -0.0901, -0.4653]) tensor([0.4248, 0.1924, 0.2269, 0.1559]) -Greedy action tensor([ 0.7325, -0.3047, -0.0022, -0.4176]) tensor([0.4650, 0.1648, 0.2230, 0.1472]) -Greedy action tensor([ 0.4064, 0.1150, -0.0384, -0.2443]) tensor([0.3437, 0.2568, 0.2203, 0.1793]) -Greedy action tensor([ 1.2201, -1.0347, -0.0158, -0.6078]) tensor([0.6426, 0.0674, 0.1867, 0.1033]) -Greedy action tensor([ 0.6011, -0.2821, -0.0099, -0.4435]) tensor([0.4332, 0.1791, 0.2352, 0.1524]) -Greedy action tensor([ 1.2775, -0.7517, -0.6252, 0.4839]) tensor([0.5771, 0.0759, 0.0861, 0.2610]) -Greedy action tensor([ 0.4916, -0.0187, -0.1622, 0.3291]) tensor([0.3367, 0.2021, 0.1751, 0.2862]) -Greedy action tensor([ 1.2560, -0.2861, -0.2656, 0.2051]) tensor([0.5612, 0.1201, 0.1225, 0.1962]) -Greedy action tensor([ 0.6387, -0.3707, -0.0632, -0.0325]) tensor([0.4217, 0.1537, 0.2090, 0.2155]) -Greedy action tensor([ 1.2004, -0.4749, -0.2573, -0.0553]) tensor([0.5865, 0.1098, 0.1365, 0.1671]) -Greedy action tensor([ 0.2872, -0.1818, 0.0984, -0.1488]) tensor([0.3226, 0.2018, 0.2671, 0.2086]) -Greedy action tensor([ 0.9914, -0.6477, -0.5143, 0.4771]) tensor([0.4965, 0.0964, 0.1102, 0.2969]) -Greedy action tensor([ 1.0449, -0.4839, 0.1160, 0.0834]) tensor([0.5015, 0.1087, 0.1981, 0.1917]) -Greedy action tensor([ 0.7319, -0.1831, -0.0734, 0.0481]) tensor([0.4251, 0.1703, 0.1900, 0.2146]) -Greedy action tensor([ 0.9253, -0.3147, -0.1878, 0.0855]) tensor([0.4879, 0.1412, 0.1603, 0.2107]) -Greedy action tensor([ 1.0475, -0.4348, -0.1935, -0.2838]) tensor([0.5617, 0.1276, 0.1624, 0.1484]) -Greedy action tensor([ 1.4204, -0.7012, -0.5301, 1.2569]) tensor([0.4737, 0.0568, 0.0674, 0.4022]) -Greedy action tensor([ 0.6913, -0.2112, -0.5279, -0.3541]) tensor([0.4872, 0.1976, 0.1440, 0.1713]) -Greedy action tensor([ 1.2047, -0.2084, -0.4354, -0.2403]) tensor([0.5977, 0.1455, 0.1159, 0.1409]) -Greedy action tensor([ 0.9476, -0.5327, -0.1087, 0.0354]) tensor([0.5058, 0.1151, 0.1759, 0.2032]) -Greedy action tensor([ 0.5439, -0.3127, -0.3999, -0.0009]) tensor([0.4178, 0.1774, 0.1626, 0.2423]) -Greedy action tensor([ 1.3638, -0.4177, -0.0395, 0.1313]) tensor([0.5863, 0.0987, 0.1441, 0.1709]) -Greedy action tensor([ 0.9608, -0.3157, -0.4928, 0.0024]) tensor([0.5274, 0.1471, 0.1233, 0.2022]) -Greedy action tensor([ 0.8976, -0.5081, -0.7360, 0.1051]) tensor([0.5282, 0.1295, 0.1031, 0.2391]) -Greedy action tensor([ 0.7865, -0.3884, -0.2804, -0.0869]) tensor([0.4830, 0.1492, 0.1662, 0.2017]) -Greedy action tensor([ 0.7244, -0.3502, -0.1352, -0.1877]) tensor([0.4616, 0.1576, 0.1954, 0.1854]) -Greedy action tensor([ 1.0553, -0.2654, -0.2348, -0.1435]) tensor([0.5424, 0.1448, 0.1493, 0.1636]) -Greedy action tensor([ 1.3650, -0.2829, -0.3763, -0.1079]) tensor([0.6262, 0.1205, 0.1098, 0.1436]) -Greedy action tensor([ 1.2467, -0.4545, -0.2586, 0.1456]) tensor([0.5757, 0.1050, 0.1278, 0.1914]) -Greedy action tensor([ 0.8909, -0.3512, -0.2762, 0.3741]) tensor([0.4553, 0.1315, 0.1417, 0.2715]) -Greedy action tensor([ 0.7121, -0.3419, 0.1075, -0.1438]) tensor([0.4311, 0.1502, 0.2355, 0.1832]) -Greedy action tensor([ 1.3381, -0.8818, -0.4228, 0.6270]) tensor([0.5645, 0.0613, 0.0970, 0.2772]) -Greedy action tensor([ 0.5324, -0.2708, -0.1537, 0.2249]) tensor([0.3722, 0.1667, 0.1874, 0.2737]) -Greedy action tensor([ 1.0022, -0.5775, 0.1425, -0.1948]) tensor([0.5178, 0.1067, 0.2192, 0.1564]) -Greedy action tensor([ 0.6338, -0.1028, 0.0081, -0.0212]) tensor([0.3948, 0.1890, 0.2112, 0.2051]) -Greedy action tensor([ 0.7872, -0.3556, -0.3404, 0.3860]) tensor([0.4325, 0.1379, 0.1400, 0.2896]) -Greedy action tensor([ 1.0685, -0.5317, -0.1608, -0.1770]) tensor([0.5611, 0.1133, 0.1641, 0.1615]) -Greedy action tensor([ 1.6972, -0.6726, -0.5559, 0.5708]) tensor([0.6567, 0.0614, 0.0690, 0.2129]) -Greedy action tensor([ 1.0438, -0.7282, 0.0087, -0.1466]) tensor([0.5467, 0.0929, 0.1942, 0.1662]) -Greedy action tensor([ 1.2991, -0.3472, -0.2969, -0.1552]) tensor([0.6139, 0.1183, 0.1244, 0.1434]) -Greedy action tensor([ 0.8979, -0.3660, -0.2117, 0.1429]) tensor([0.4802, 0.1357, 0.1583, 0.2257]) -Greedy action tensor([ 1.0045, -0.2769, -0.1935, 0.0890]) tensor([0.5051, 0.1402, 0.1524, 0.2022]) -Greedy action tensor([ 1.6899, -0.7285, -0.1597, 0.3831]) tensor([0.6592, 0.0587, 0.1037, 0.1784]) -Greedy action tensor([ 0.8352, -0.3283, -0.1334, -0.0411]) tensor([0.4743, 0.1482, 0.1801, 0.1975]) -Greedy action tensor([ 0.8131, -0.1998, -0.5447, -0.1420]) tensor([0.4987, 0.1811, 0.1283, 0.1919]) -Greedy action tensor([ 0.8662, -0.6434, -0.3911, 0.1322]) tensor([0.5037, 0.1113, 0.1433, 0.2418]) -Greedy action tensor([ 0.8380, -0.6056, -0.2818, 0.2844]) tensor([0.4679, 0.1105, 0.1527, 0.2690]) -Greedy action tensor([ 1.4772, -0.6343, -0.2046, 0.1000]) tensor([0.6413, 0.0776, 0.1193, 0.1618]) -Greedy action tensor([ 1.1597, -0.6876, -0.3370, 0.7377]) tensor([0.4908, 0.0774, 0.1099, 0.3219]) -Greedy action tensor([ 1.4021, -0.0680, -0.1276, 0.0922]) tensor([0.5826, 0.1339, 0.1262, 0.1572]) -Greedy action tensor([ 1.2797, -0.5100, -0.1016, -0.0052]) tensor([0.5900, 0.0985, 0.1482, 0.1632]) -Greedy action tensor([ 1.4843, -0.6545, -0.1397, 0.1199]) tensor([0.6368, 0.0750, 0.1255, 0.1627]) -Greedy action tensor([ 1.3966, -0.2720, -0.2950, -0.1071]) tensor([0.6269, 0.1182, 0.1155, 0.1394]) -Greedy action tensor([ 1.4642, -0.2137, -0.3839, -0.0089]) tensor([0.6355, 0.1187, 0.1001, 0.1457]) -Greedy action tensor([ 0.9589, -0.2585, -0.1389, -0.0515]) tensor([0.5016, 0.1485, 0.1673, 0.1826]) -Greedy action tensor([ 1.0179, -0.3307, -0.1447, -0.0541]) tensor([0.5223, 0.1356, 0.1633, 0.1788]) -Greedy action tensor([ 1.4583, -0.4608, -0.3916, 0.1827]) tensor([0.6316, 0.0927, 0.0993, 0.1764]) -Greedy action tensor([ 1.1136, -0.0842, -0.3134, -0.2221]) tensor([0.5541, 0.1673, 0.1330, 0.1457]) -Greedy action tensor([ 1.6679, -0.4186, -0.3689, 0.1905]) tensor([0.6744, 0.0837, 0.0880, 0.1539]) -Greedy action tensor([ 1.1997, -0.6052, -0.0120, 0.2580]) tensor([0.5399, 0.0888, 0.1607, 0.2105]) -Greedy action tensor([ 1.2599, -0.5543, -0.4574, 0.5182]) tensor([0.5498, 0.0896, 0.0987, 0.2619]) -Greedy action tensor([ 0.8191, -0.4117, -0.2524, 0.2458]) tensor([0.4549, 0.1329, 0.1558, 0.2564]) -Greedy action tensor([ 1.2889, -0.4129, -0.4284, 0.1031]) tensor([0.5998, 0.1094, 0.1077, 0.1832]) -Greedy action tensor([ 1.2562, -0.3376, -0.1051, 0.1138]) tensor([0.5623, 0.1142, 0.1441, 0.1794]) -Greedy action tensor([ 0.9912, -0.4112, -0.5111, -0.2970]) tensor([0.5733, 0.1410, 0.1276, 0.1581]) -Greedy action tensor([ 1.2315, -0.5522, -0.3494, 0.1746]) tensor([0.5809, 0.0976, 0.1196, 0.2019]) -Greedy action tensor([ 1.4239, -0.6261, -0.3073, 0.1515]) tensor([0.6305, 0.0812, 0.1116, 0.1767]) -Greedy action tensor([ 0.7980, 0.0323, -0.0514, -0.1482]) tensor([0.4384, 0.2039, 0.1875, 0.1702]) -Greedy action tensor([ 1.0628, -0.3232, 0.2392, -0.3204]) tensor([0.5155, 0.1289, 0.2263, 0.1293]) -Greedy action tensor([ 1.2852, -0.6528, -0.5777, 0.6662]) tensor([0.5441, 0.0784, 0.0845, 0.2930]) -Greedy action tensor([ 1.2120, -0.3746, -0.5219, 0.5528]) tensor([0.5267, 0.1078, 0.0930, 0.2725]) -Greedy action tensor([ 1.0246, -0.5757, -0.4332, 0.1484]) tensor([0.5403, 0.1090, 0.1257, 0.2249]) -Greedy action tensor([ 0.5549, -0.0603, -0.5078, 0.0215]) tensor([0.4044, 0.2186, 0.1397, 0.2372]) -Greedy action tensor([ 0.8668, -0.6844, 0.0950, -0.0144]) tensor([0.4788, 0.1015, 0.2213, 0.1984]) -Greedy action tensor([ 0.9734, -0.4934, -0.5068, 0.1826]) tensor([0.5231, 0.1207, 0.1191, 0.2372]) -Greedy action tensor([ 0.2887, -0.1937, -0.4853, 0.3218]) tensor([0.3213, 0.1983, 0.1482, 0.3321]) -Greedy action tensor([ 0.7850, 0.0720, -0.0590, -0.4583]) tensor([0.4528, 0.2219, 0.1947, 0.1306]) -Greedy action tensor([ 0.3281, -0.0551, -0.4819, 0.1814]) tensor([0.3344, 0.2280, 0.1488, 0.2888]) -Greedy action tensor([ 0.6579, -0.2246, 0.2728, 0.1516]) tensor([0.3708, 0.1534, 0.2523, 0.2235]) -Greedy action tensor([ 0.8011, -0.3338, -0.0837, -0.0823]) tensor([0.4656, 0.1497, 0.1922, 0.1925]) -Greedy action tensor([ 0.8919, -0.3620, -0.2977, 0.1205]) tensor([0.4873, 0.1391, 0.1483, 0.2253]) -Greedy action tensor([ 1.1323, -0.4081, -0.0967, 0.0089]) tensor([0.5458, 0.1170, 0.1597, 0.1775]) -Greedy action tensor([ 1.0166, -0.2318, -0.2682, 0.0265]) tensor([0.5167, 0.1483, 0.1430, 0.1920]) -Greedy action tensor([ 1.3378, -0.4011, -0.1366, 0.2148]) tensor([0.5781, 0.1016, 0.1323, 0.1880]) -Greedy action tensor([ 1.2562, -0.7538, -0.4335, 0.7342]) tensor([0.5230, 0.0701, 0.0965, 0.3103]) -Greedy action tensor([ 1.3150, -0.3829, -0.1983, 0.4445]) tensor([0.5489, 0.1005, 0.1208, 0.2298]) -Greedy action tensor([ 0.1571, -0.5791, -0.5599, 2.2423]) tensor([0.0999, 0.0478, 0.0488, 0.8035]) -Greedy action tensor([ 0.9140, -0.7443, 0.6298, 0.5184]) tensor([0.3822, 0.0728, 0.2877, 0.2573]) -Greedy action tensor([ 0.8116, -0.7811, 0.4877, 0.4985]) tensor([0.3762, 0.0765, 0.2721, 0.2751]) -Greedy action tensor([ 1.1134, -0.2593, 0.8745, 0.4993]) tensor([0.3873, 0.0982, 0.3050, 0.2096]) -Greedy action tensor([ 1.0236, -0.2259, -0.5263, 0.5104]) tensor([0.4768, 0.1367, 0.1012, 0.2854]) -Greedy action tensor([ 1.6810, 0.3707, -0.7163, 0.8577]) tensor([0.5557, 0.1499, 0.0505, 0.2439]) -Greedy action tensor([ 1.4405, 0.9612, -0.5236, 1.5746]) tensor([0.3445, 0.2133, 0.0483, 0.3939]) -Greedy action tensor([-0.1067, 0.7769, 1.2442, 0.1248]) tensor([0.1171, 0.2833, 0.4520, 0.1476]) -Greedy action tensor([-0.1074, -1.4620, -0.5965, 0.6405]) tensor([0.2510, 0.0648, 0.1539, 0.5303]) -Greedy action tensor([1.2768, 0.1262, 0.4025, 1.2571]) tensor([0.3685, 0.1166, 0.1537, 0.3612]) -Greedy action tensor([ 0.6490, -0.2369, 0.1564, 0.1827]) tensor([0.3773, 0.1556, 0.2305, 0.2367]) -Greedy action tensor([ 1.6005, -1.3870, 1.2302, 0.9244]) tensor([0.4445, 0.0224, 0.3070, 0.2261]) -Greedy action tensor([ 0.4844, 0.3996, 0.3139, -0.0805]) tensor([0.3003, 0.2759, 0.2532, 0.1707]) -Greedy action tensor([ 0.7112, -0.2254, 0.5863, 0.6387]) tensor([0.3121, 0.1223, 0.2754, 0.2902]) -Greedy action tensor([ 0.7430, -0.0443, -0.2031, -0.2869]) tensor([0.4545, 0.2068, 0.1764, 0.1623]) -Greedy action tensor([ 1.4050, 0.1740, -0.5649, 1.0428]) tensor([0.4700, 0.1372, 0.0656, 0.3272]) -Greedy action tensor([ 0.8108, -1.5948, -0.3404, 1.0958]) tensor([0.3655, 0.0330, 0.1156, 0.4860]) -Greedy action tensor([ 0.7891, -0.5659, -0.1199, 0.6842]) tensor([0.3904, 0.1007, 0.1573, 0.3516]) -Greedy action tensor([ 0.1275, 1.1511, -0.6767, 0.7759]) tensor([0.1628, 0.4531, 0.0728, 0.3113]) -Greedy action tensor([0.6369, 0.1276, 1.1154, 0.3389]) tensor([0.2527, 0.1519, 0.4078, 0.1876]) -Greedy action tensor([ 0.5957, -0.3488, 1.1406, 1.8051]) tensor([0.1547, 0.0602, 0.2667, 0.5184]) -Greedy action tensor([-0.0065, 0.2746, 0.0601, -0.0241]) tensor([0.2285, 0.3027, 0.2443, 0.2245]) -Greedy action tensor([ 0.4856, -0.9175, 0.9615, 1.3593]) tensor([0.1904, 0.0468, 0.3065, 0.4563]) -Greedy action tensor([ 1.4767, 1.4190, -0.1779, 0.9930]) tensor([0.3634, 0.3430, 0.0695, 0.2240]) -Greedy action tensor([ 1.0197, -1.1423, 1.1631, 0.7926]) tensor([0.3261, 0.0375, 0.3764, 0.2599]) -Greedy action tensor([ 1.8992, -1.1923, 0.0558, 0.5574]) tensor([0.6825, 0.0310, 0.1080, 0.1784]) -Greedy action tensor([ 2.2640, -0.7439, 1.1131, 1.6127]) tensor([0.5299, 0.0262, 0.1676, 0.2763]) -Greedy action tensor([ 0.6944, 0.7164, -0.7215, 1.8051]) tensor([0.1886, 0.1928, 0.0458, 0.5728]) -Greedy action tensor([ 0.7368, 0.4609, -0.8404, 0.0709]) tensor([0.4034, 0.3061, 0.0833, 0.2072]) -Greedy action tensor([ 0.8749, -0.1472, -0.4484, 1.0516]) tensor([0.3547, 0.1276, 0.0944, 0.4232]) -Greedy action tensor([ 0.8242, -0.4889, -0.3364, 0.4047]) tensor([0.4465, 0.1201, 0.1399, 0.2935]) -Greedy action tensor([-0.1728, 0.2219, 0.5668, 0.1410]) tensor([0.1681, 0.2495, 0.3522, 0.2301]) -Greedy action tensor([ 0.6935, -0.1958, 0.0135, 0.4587]) tensor([0.3692, 0.1517, 0.1871, 0.2920]) -Greedy action tensor([ 0.9219, -0.9588, 1.3066, 1.9819]) tensor([0.1816, 0.0277, 0.2667, 0.5240]) -Greedy action tensor([ 0.5788, 0.1266, 0.2998, -0.5156]) tensor([0.3666, 0.2333, 0.2774, 0.1227]) -Greedy action tensor([-0.1099, 0.0056, 0.2051, 0.3349]) tensor([0.1979, 0.2221, 0.2712, 0.3088]) -Greedy action tensor([ 1.4954e+00, -7.1499e-01, 4.0612e-01, -9.0349e-04]) tensor([0.5988, 0.0657, 0.2015, 0.1341]) -Greedy action tensor([ 0.5291, 0.4771, -0.3829, 0.3987]) tensor([0.3097, 0.2940, 0.1244, 0.2718]) -Greedy action tensor([0.9094, 0.3910, 0.8991, 1.1543]) tensor([0.2589, 0.1542, 0.2562, 0.3307]) -Greedy action tensor([ 1.2692, -0.7885, 0.7850, 1.5257]) tensor([0.3293, 0.0421, 0.2029, 0.4256]) -Greedy action tensor([ 1.5618, -0.2232, 0.8186, 0.5091]) tensor([0.5019, 0.0842, 0.2387, 0.1752]) -Greedy action tensor([-0.1164, 0.7318, -0.1336, -0.2465]) tensor([0.1924, 0.4494, 0.1892, 0.1690]) -Greedy action tensor([-0.6522, 0.2185, -0.0951, -0.2386]) tensor([0.1505, 0.3594, 0.2626, 0.2275]) -Greedy action tensor([ 0.7919, 1.0164, -0.6323, 0.2291]) tensor([0.3266, 0.4088, 0.0786, 0.1860]) -Greedy action tensor([ 1.4268, -0.1449, 0.5345, 0.6390]) tensor([0.4826, 0.1002, 0.1977, 0.2195]) -Greedy action tensor([ 1.1179, -0.2681, 0.1427, 0.2620]) tensor([0.4873, 0.1219, 0.1838, 0.2071]) -Greedy action tensor([ 0.6939, -0.6392, -0.2171, 0.7163]) tensor([0.3720, 0.0981, 0.1496, 0.3804]) -Greedy action tensor([0.2446, 0.4301, 0.8807, 0.4833]) tensor([0.1865, 0.2245, 0.3523, 0.2367]) -Greedy action tensor([ 0.4617, -0.0067, -0.2975, -0.2394]) tensor([0.3861, 0.2417, 0.1807, 0.1915]) -Greedy action tensor([0.0554, 0.7144, 0.8698, 0.9655]) tensor([0.1303, 0.2518, 0.2942, 0.3237]) -Greedy action tensor([ 0.1931, -0.5038, -0.3275, 1.9674]) tensor([0.1252, 0.0624, 0.0744, 0.7381]) -Greedy action tensor([ 0.3081, -1.6956, -0.0965, 1.1648]) tensor([0.2405, 0.0324, 0.1605, 0.5665]) -Greedy action tensor([1.2397, 0.2631, 0.3121, 0.0851]) tensor([0.4791, 0.1804, 0.1895, 0.1510]) -Greedy action tensor([ 1.2277, -1.4432, 0.4890, 0.9569]) tensor([0.4330, 0.0300, 0.2068, 0.3302]) -Greedy action tensor([ 1.5879, 0.1365, -0.1549, 0.3895]) tensor([0.5845, 0.1369, 0.1023, 0.1763]) -Greedy action tensor([ 0.5720, 1.4327, -0.5601, 0.9095]) tensor([0.1965, 0.4647, 0.0633, 0.2754]) -Greedy action tensor([ 0.0964, -1.0922, -0.9670, 1.3513]) tensor([0.1939, 0.0591, 0.0669, 0.6801]) -Greedy action tensor([0.3508, 0.1134, 0.6241, 1.0568]) tensor([0.1950, 0.1538, 0.2563, 0.3950]) -Greedy action tensor([ 0.5387, 0.2646, -0.0588, -0.0617]) tensor([0.3498, 0.2659, 0.1924, 0.1919]) -Greedy action tensor([-0.2342, 0.7247, -0.2605, 1.1683]) tensor([0.1156, 0.3017, 0.1126, 0.4701]) -Greedy action tensor([ 1.3846, 0.6579, -0.1938, 0.3188]) tensor([0.4916, 0.2377, 0.1014, 0.1693]) -Greedy action tensor([ 1.0882, -0.4775, 0.9383, 0.3143]) tensor([0.3951, 0.0826, 0.3401, 0.1822]) -Greedy action tensor([ 0.3787, 0.8240, -1.2054, 0.5490]) tensor([0.2530, 0.3950, 0.0519, 0.3000]) -Greedy action tensor([ 1.7081, -0.0268, 0.8325, 1.2608]) tensor([0.4479, 0.0790, 0.1866, 0.2864]) -Greedy action tensor([ 1.9983, -0.6688, 1.0135, 0.6943]) tensor([0.5833, 0.0405, 0.2179, 0.1583]) -Greedy action tensor([ 0.9781, -1.2048, -0.3718, 1.2647]) tensor([0.3698, 0.0417, 0.0959, 0.4926]) -Greedy action tensor([-0.0781, 0.5185, -0.0293, 0.8216]) tensor([0.1581, 0.2871, 0.1660, 0.3888]) -Greedy action tensor([ 0.8162, -0.7755, -0.0717, 0.8970]) tensor([0.3705, 0.0754, 0.1525, 0.4016]) -Greedy action tensor([ 0.7430, -0.0862, -1.0197, 1.0272]) tensor([0.3405, 0.1486, 0.0584, 0.4524]) -Greedy action tensor([ 1.3114, -1.2148, 1.4523, 1.4824]) tensor([0.2926, 0.0234, 0.3369, 0.3472]) -Greedy action tensor([ 0.7081, 0.5200, -0.4680, 0.0923]) tensor([0.3735, 0.3095, 0.1152, 0.2018]) -Greedy action tensor([0.0105, 1.1378, 0.5547, 0.1395]) tensor([0.1439, 0.4443, 0.2480, 0.1637]) -Greedy action tensor([ 1.2658, -1.0436, 0.3250, 0.2953]) tensor([0.5352, 0.0532, 0.2089, 0.2028]) -Greedy action tensor([ 1.8104, 1.3371, -0.6933, 0.8383]) tensor([0.4801, 0.2991, 0.0393, 0.1816]) -Greedy action tensor([ 1.1986, -2.3993, -0.3646, 0.3037]) tensor([0.6077, 0.0166, 0.1273, 0.2483]) -Greedy action tensor([ 1.1347, 0.2756, -0.1768, 0.2223]) tensor([0.4774, 0.2022, 0.1286, 0.1917]) -Greedy action tensor([0.8899, 0.3122, 0.3077, 0.2261]) tensor([0.3796, 0.2130, 0.2120, 0.1954]) -Greedy action tensor([0.5462, 0.3180, 0.2775, 0.6561]) tensor([0.2720, 0.2165, 0.2079, 0.3036]) -Greedy action tensor([ 1.2885, -0.1296, -0.5323, 0.1247]) tensor([0.5826, 0.1411, 0.0943, 0.1819]) -Greedy action tensor([ 0.8046, -2.4872, -0.1807, 0.1629]) tensor([0.5163, 0.0192, 0.1927, 0.2718]) -Greedy action tensor([-1.4038, 1.2942, 0.5248, -0.1342]) tensor([0.0380, 0.5649, 0.2617, 0.1354]) -Greedy action tensor([ 0.8336, 0.6102, -0.3318, 0.2728]) tensor([0.3728, 0.2982, 0.1162, 0.2128]) -Greedy action tensor([ 0.4096, -0.0837, 0.0801, -0.5858]) tensor([0.3704, 0.2262, 0.2665, 0.1369]) -Greedy action tensor([ 0.3096, 0.0287, -0.0426, -0.2432]) tensor([0.3296, 0.2489, 0.2318, 0.1896]) -Greedy action tensor([ 0.6930, -0.3239, -0.0148, -0.3854]) tensor([0.4557, 0.1648, 0.2245, 0.1550]) -Greedy action tensor([ 0.8014, -0.2723, 0.1935, -0.3214]) tensor([0.4522, 0.1545, 0.2462, 0.1471]) -Greedy action tensor([ 0.4643, -0.2430, -0.0384, -0.4204]) tensor([0.3983, 0.1964, 0.2409, 0.1644]) -Greedy action tensor([ 0.6197, -0.3288, 0.0148, -0.4504]) tensor([0.4393, 0.1701, 0.2399, 0.1507]) -Greedy action tensor([ 0.9428, -0.6539, -0.2097, -0.6570]) tensor([0.5813, 0.1177, 0.1836, 0.1174]) -Greedy action tensor([ 0.8547, -0.3007, 0.0858, -0.7133]) tensor([0.5033, 0.1585, 0.2333, 0.1049]) -Greedy action tensor([ 0.3426, 0.0603, 0.0128, -0.0779]) tensor([0.3195, 0.2409, 0.2297, 0.2098]) -Greedy action tensor([ 0.2392, 0.0713, 0.0730, -0.5019]) tensor([0.3156, 0.2668, 0.2672, 0.1504]) -Greedy action tensor([ 0.8072, -0.1572, 0.0535, -0.5571]) tensor([0.4745, 0.1809, 0.2233, 0.1213]) -Greedy action tensor([ 0.7890, -0.5551, 0.2543, -0.7677]) tensor([0.4860, 0.1267, 0.2847, 0.1025]) -Greedy action tensor([ 0.5390, -0.2080, -0.0345, -0.4715]) tensor([0.4164, 0.1973, 0.2347, 0.1516]) -Greedy action tensor([ 0.3728, 0.2080, 0.1292, -0.1750]) tensor([0.3115, 0.2642, 0.2442, 0.1801]) -Greedy action tensor([ 0.3354, 0.1794, 0.0899, -0.2296]) tensor([0.3119, 0.2668, 0.2440, 0.1773]) -Greedy action tensor([ 0.3185, 0.2183, -0.0044, -0.1329]) tensor([0.3062, 0.2771, 0.2217, 0.1950]) -Greedy action tensor([ 0.2773, 0.0905, 0.0080, -0.2294]) tensor([0.3129, 0.2596, 0.2390, 0.1885]) -Greedy action tensor([ 0.8203, -0.4132, -0.0310, -0.4456]) tensor([0.5000, 0.1456, 0.2134, 0.1410]) -Greedy action tensor([ 0.7760, -0.3261, 0.0228, -0.6744]) tensor([0.4908, 0.1630, 0.2311, 0.1151]) -Greedy action tensor([ 0.4213, -0.2084, -0.0416, -0.2406]) tensor([0.3734, 0.1989, 0.2350, 0.1926]) -Greedy action tensor([ 0.7022, -0.3251, 0.1091, -0.4161]) tensor([0.4469, 0.1600, 0.2470, 0.1461]) -Greedy action tensor([ 0.8266, -0.3405, -0.1107, -0.5903]) tensor([0.5140, 0.1600, 0.2013, 0.1246]) -Greedy action tensor([ 0.7425, -0.4591, -0.2444, -0.4091]) tensor([0.5026, 0.1511, 0.1873, 0.1589]) -Greedy action tensor([ 0.3799, -0.0278, -0.0519, -0.1274]) tensor([0.3429, 0.2281, 0.2226, 0.2065]) -Greedy action tensor([ 0.6295, -0.2626, -0.0264, -0.5813]) tensor([0.4491, 0.1840, 0.2331, 0.1338]) -Greedy action tensor([ 0.4874, -0.2497, -0.0586, -0.3692]) tensor([0.4028, 0.1928, 0.2333, 0.1710]) -Greedy action tensor([ 0.5088, 0.1110, 0.0314, -0.2346]) tensor([0.3613, 0.2427, 0.2242, 0.1718]) -Greedy action tensor([ 0.5699, -0.4434, -0.0215, -0.4008]) tensor([0.4357, 0.1581, 0.2411, 0.1650]) -Greedy action tensor([ 0.7330, -0.1394, -0.1599, -0.2628]) tensor([0.4552, 0.1902, 0.1864, 0.1682]) -Greedy action tensor([ 0.8685, -0.4661, -0.0275, -0.6328]) tensor([0.5279, 0.1390, 0.2155, 0.1176]) -Greedy action tensor([ 0.8240, -0.0323, -0.0125, -0.4546]) tensor([0.4681, 0.1988, 0.2028, 0.1303]) -Greedy action tensor([ 0.8880, -0.3784, 0.2329, -0.3299]) tensor([0.4768, 0.1344, 0.2477, 0.1411]) -Greedy action tensor([ 0.5137, -0.2265, 0.0249, -0.3732]) tensor([0.3996, 0.1906, 0.2451, 0.1646]) -Greedy action tensor([ 0.5918, -0.1803, 0.0220, -0.1980]) tensor([0.4030, 0.1862, 0.2279, 0.1829]) -Greedy action tensor([ 0.4162, -0.2005, -0.0178, -0.1868]) tensor([0.3656, 0.1974, 0.2369, 0.2001]) -Greedy action tensor([ 0.4159, -0.1982, -0.0612, -0.1886]) tensor([0.3693, 0.1998, 0.2292, 0.2018]) -Greedy action tensor([ 0.9644, -0.7205, 0.2555, -0.6073]) tensor([0.5304, 0.0984, 0.2611, 0.1102]) -Greedy action tensor([ 0.2133, -0.0297, 0.0167, -0.1028]) tensor([0.2999, 0.2352, 0.2463, 0.2186]) -Greedy action tensor([ 0.5958, -0.5213, -0.0276, -0.6035]) tensor([0.4619, 0.1512, 0.2477, 0.1392]) -Greedy action tensor([ 0.6932, -0.5766, -0.0572, -0.5270]) tensor([0.4882, 0.1371, 0.2305, 0.1441]) -Greedy action tensor([ 0.6024, -0.2044, -0.0169, -0.3151]) tensor([0.4195, 0.1872, 0.2258, 0.1676]) -Greedy action tensor([ 0.5169, 0.1322, -0.1186, -0.2240]) tensor([0.3722, 0.2533, 0.1971, 0.1774]) -Greedy action tensor([ 0.4827, 0.1497, -0.0430, -0.0923]) tensor([0.3484, 0.2497, 0.2059, 0.1960]) -Greedy action tensor([ 0.5398, -0.4099, -0.0102, -0.4194]) tensor([0.4261, 0.1648, 0.2458, 0.1633]) -Greedy action tensor([ 0.6005, -0.4653, 0.2463, -0.6616]) tensor([0.4293, 0.1479, 0.3013, 0.1215]) -Greedy action tensor([ 0.8074, -0.4633, 0.1095, -0.7560]) tensor([0.5031, 0.1412, 0.2503, 0.1054]) -Greedy action tensor([ 0.8050, -0.2756, 0.3242, -0.4039]) tensor([0.4432, 0.1504, 0.2740, 0.1323]) -Greedy action tensor([ 0.2725, -0.0159, -0.0581, -0.2286]) tensor([0.3253, 0.2438, 0.2337, 0.1971]) -Greedy action tensor([ 0.5156, -0.3299, 0.0297, -0.5029]) tensor([0.4157, 0.1785, 0.2557, 0.1501]) -Greedy action tensor([ 0.1550, 0.0584, 0.1280, -0.2247]) tensor([0.2805, 0.2546, 0.2730, 0.1919]) -Greedy action tensor([ 0.5392, -0.3054, 0.0701, -0.4741]) tensor([0.4135, 0.1777, 0.2587, 0.1501]) -Greedy action tensor([ 0.3956, 0.0756, -0.0107, -0.2486]) tensor([0.3428, 0.2489, 0.2283, 0.1800]) -Greedy action tensor([ 0.5356, -0.1743, 0.0709, -0.2594]) tensor([0.3889, 0.1912, 0.2443, 0.1756]) -Greedy action tensor([ 0.4750, -0.0864, -0.1255, -0.1004]) tensor([0.3729, 0.2127, 0.2046, 0.2098]) -Greedy action tensor([ 0.5913, -0.2218, 0.0332, -0.3303]) tensor([0.4143, 0.1837, 0.2371, 0.1648]) -Greedy action tensor([ 0.7503, -0.2456, 0.0711, -0.2158]) tensor([0.4431, 0.1637, 0.2246, 0.1686]) -Greedy action tensor([ 0.4849, -0.0513, 0.0022, -0.0320]) tensor([0.3573, 0.2090, 0.2205, 0.2131]) -Greedy action tensor([ 0.5607, -0.4328, 0.1790, -0.4641]) tensor([0.4146, 0.1535, 0.2831, 0.1488]) -Greedy action tensor([ 0.8780, -0.8346, -0.2216, -0.7839]) tensor([0.5871, 0.1059, 0.1955, 0.1114]) -Greedy action tensor([ 0.7550, -0.4162, -0.0423, -0.6901]) tensor([0.5010, 0.1553, 0.2257, 0.1181]) -Greedy action tensor([ 0.4812, -0.2224, -0.0835, -0.4265]) tensor([0.4054, 0.2006, 0.2305, 0.1636]) -Greedy action tensor([ 0.5585, -0.4437, 0.0455, -0.5623]) tensor([0.4363, 0.1602, 0.2612, 0.1423]) -Greedy action tensor([ 0.2815, 0.0355, -0.0725, -0.1818]) tensor([0.3212, 0.2512, 0.2255, 0.2021]) -Greedy action tensor([ 0.8131, -0.4426, -0.0764, -0.3457]) tensor([0.4976, 0.1418, 0.2045, 0.1562]) -Greedy action tensor([ 0.3180, 0.0549, 0.0798, -0.1083]) tensor([0.3116, 0.2395, 0.2455, 0.2034]) -Greedy action tensor([ 0.5545, -0.1545, -0.0347, -0.2208]) tensor([0.3988, 0.1963, 0.2212, 0.1837]) -Greedy action tensor([ 0.4793, -0.0331, 0.0941, -0.0789]) tensor([0.3507, 0.2101, 0.2386, 0.2007]) -Greedy action tensor([ 0.4411, -0.0633, 0.0265, -0.3046]) tensor([0.3651, 0.2205, 0.2412, 0.1732]) -Greedy action tensor([ 0.4507, -0.0898, 0.0246, -0.3364]) tensor([0.3717, 0.2165, 0.2427, 0.1692]) -Greedy action tensor([ 0.6884, -0.4287, 0.0063, -0.4953]) tensor([0.4675, 0.1530, 0.2364, 0.1431]) -Greedy action tensor([ 0.5313, -0.0992, -0.0161, -0.3008]) tensor([0.3928, 0.2091, 0.2272, 0.1709]) -Greedy action tensor([ 0.3848, 0.1456, -0.0146, -0.2463]) tensor([0.3344, 0.2633, 0.2243, 0.1779]) -Greedy action tensor([ 0.3355, -0.0072, -0.0559, -0.3917]) tensor([0.3485, 0.2474, 0.2356, 0.1684]) -Greedy action tensor([ 0.3694, 0.1389, -0.0010, -0.3295]) tensor([0.3354, 0.2663, 0.2316, 0.1667]) -Greedy action tensor([ 0.6712, -0.2474, 0.1421, -0.3553]) tensor([0.4262, 0.1701, 0.2511, 0.1527]) -Greedy action tensor([ 0.7093, -0.4993, 0.0245, -0.5271]) tensor([0.4777, 0.1427, 0.2409, 0.1387]) -Greedy action tensor([ 0.8739, -0.5553, -0.0255, -0.4693]) tensor([0.5243, 0.1256, 0.2133, 0.1368]) -Greedy action tensor([ 0.5331, -0.1491, 0.2065, -0.3797]) tensor([0.3805, 0.1923, 0.2745, 0.1527]) -Greedy action tensor([ 0.8181, -0.0976, -0.0315, -0.3582]) tensor([0.4681, 0.1874, 0.2002, 0.1444]) -Greedy action tensor([ 0.5307, -0.2793, -0.0430, -0.3117]) tensor([0.4100, 0.1824, 0.2310, 0.1766]) -Greedy action tensor([ 0.5916, -0.2976, -0.0084, -0.3327]) tensor([0.4243, 0.1744, 0.2329, 0.1684]) -Greedy action tensor([-1.7774, -0.5014, 0.7348, 0.1192]) tensor([0.0424, 0.1519, 0.5230, 0.2826]) -Greedy action tensor([-1.1084, -0.6684, 0.4786, 0.2488]) tensor([0.0883, 0.1371, 0.4316, 0.3430]) -Greedy action tensor([-1.8387, -0.4759, 0.6188, -0.1119]) tensor([0.0450, 0.1760, 0.5258, 0.2532]) -Greedy action tensor([-1.5632, -0.1305, 0.7307, 0.5712]) tensor([0.0425, 0.1779, 0.4209, 0.3588]) -Greedy action tensor([-1.1309, -0.1044, 0.7031, 1.0211]) tensor([0.0536, 0.1497, 0.3356, 0.4612]) -Greedy action tensor([-0.6084, -0.0681, 0.8450, 1.4342]) tensor([0.0680, 0.1167, 0.2909, 0.5244]) -Greedy action tensor([-0.8961, -0.5813, 0.6131, 1.2517]) tensor([0.0647, 0.0886, 0.2926, 0.5541]) -Greedy action tensor([-1.1821, -0.2212, 0.9440, 1.2092]) tensor([0.0436, 0.1140, 0.3657, 0.4767]) -Greedy action tensor([-1.6255, -0.2627, 0.4501, 0.0253]) tensor([0.0553, 0.2160, 0.4406, 0.2881]) -Greedy action tensor([-1.7240, -0.5470, 0.5180, -0.1024]) tensor([0.0534, 0.1733, 0.5028, 0.2704]) -Greedy action tensor([-1.2332, -0.2892, 0.7911, 1.1046]) tensor([0.0465, 0.1195, 0.3521, 0.4818]) -Greedy action tensor([-1.2384, -0.4249, 0.4986, 0.7115]) tensor([0.0626, 0.1413, 0.3558, 0.4403]) -Greedy action tensor([-0.9651, -0.6951, 0.8876, 1.4011]) tensor([0.0517, 0.0677, 0.3297, 0.5509]) -Greedy action tensor([-1.1044, -0.0399, 0.1022, -0.3661]) tensor([0.1071, 0.3106, 0.3581, 0.2242]) -Greedy action tensor([-1.2828, -0.6213, 0.5331, 0.7718]) tensor([0.0592, 0.1147, 0.3640, 0.4621]) -Greedy action tensor([-1.1564, -0.5648, 0.2479, 0.4254]) tensor([0.0852, 0.1539, 0.3468, 0.4142]) -Greedy action tensor([-1.7575, -0.5274, 0.5948, -0.1319]) tensor([0.0500, 0.1710, 0.5252, 0.2539]) -Greedy action tensor([-1.0740, -0.5948, 0.2208, 0.3633]) tensor([0.0955, 0.1542, 0.3485, 0.4019]) -Greedy action tensor([-1.6744, -0.3719, 0.5171, 0.0274]) tensor([0.0523, 0.1925, 0.4683, 0.2869]) -Greedy action tensor([-0.4117, -0.3941, 0.1468, 0.1020]) tensor([0.1839, 0.1872, 0.3215, 0.3074]) -Greedy action tensor([-0.6873, -0.3873, 0.1884, -0.0650]) tensor([0.1512, 0.2041, 0.3630, 0.2817]) -Greedy action tensor([-1.9128, -0.7942, 0.2400, -0.2160]) tensor([0.0552, 0.1688, 0.4750, 0.3010]) -Greedy action tensor([-1.7933, -0.4158, 0.8431, 0.5185]) tensor([0.0345, 0.1366, 0.4811, 0.3478]) -Greedy action tensor([-1.3012, -0.5450, 0.4664, 0.6951]) tensor([0.0612, 0.1303, 0.3582, 0.4503]) -Greedy action tensor([-1.6764, -0.4381, 0.5208, 0.0155]) tensor([0.0530, 0.1827, 0.4767, 0.2876]) -Greedy action tensor([-1.8867, -0.5539, 1.0707, 0.4390]) tensor([0.0292, 0.1106, 0.5616, 0.2986]) -Greedy action tensor([-1.8261, -0.4569, 0.6043, -0.1131]) tensor([0.0458, 0.1800, 0.5203, 0.2539]) -Greedy action tensor([-1.6149, -0.6094, 0.7740, 0.3784]) tensor([0.0455, 0.1244, 0.4961, 0.3340]) -Greedy action tensor([-1.6343, -0.4997, 0.5048, 0.0238]) tensor([0.0560, 0.1742, 0.4757, 0.2941]) -Greedy action tensor([-1.5065, -0.1674, 0.6266, -0.4162]) tensor([0.0616, 0.2351, 0.5200, 0.1833]) -Greedy action tensor([-1.0824, -0.2852, 0.5853, 0.9291]) tensor([0.0625, 0.1388, 0.3314, 0.4674]) -Greedy action tensor([-1.8093, -0.7571, 0.1795, -0.3906]) tensor([0.0654, 0.1872, 0.4775, 0.2700]) -Greedy action tensor([-1.7985, -0.4990, 0.6854, 0.0305]) tensor([0.0437, 0.1603, 0.5239, 0.2721]) -Greedy action tensor([-0.9594, 0.4621, 0.0596, -0.5324]) tensor([0.1059, 0.4386, 0.2933, 0.1623]) -Greedy action tensor([-1.4603, -0.4388, 0.5332, 0.4180]) tensor([0.0566, 0.1573, 0.4157, 0.3704]) -Greedy action tensor([-1.2817, -0.5764, 0.3180, 0.2240]) tensor([0.0801, 0.1622, 0.3967, 0.3611]) -Greedy action tensor([-1.1482, -0.4039, 0.9615, 1.2510]) tensor([0.0447, 0.0941, 0.3687, 0.4925]) -Greedy action tensor([-1.0440, -0.5635, 0.2559, 0.3938]) tensor([0.0953, 0.1540, 0.3495, 0.4012]) -Greedy action tensor([-1.7104, -0.5523, 0.5526, -0.0801]) tensor([0.0529, 0.1684, 0.5085, 0.2701]) -Greedy action tensor([-1.4033, -0.5525, 0.3532, 0.2185]) tensor([0.0704, 0.1649, 0.4080, 0.3566]) -Greedy action tensor([-1.6888, -0.4717, 0.5409, -0.0210]) tensor([0.0527, 0.1780, 0.4900, 0.2793]) -Greedy action tensor([-0.3702, -0.1257, 0.9579, 1.5178]) tensor([0.0790, 0.1009, 0.2982, 0.5219]) -Greedy action tensor([-1.2501, -0.3562, 0.6213, 1.0057]) tensor([0.0513, 0.1255, 0.3334, 0.4898]) -Greedy action tensor([-1.1381, -0.5642, 1.2186, 1.4186]) tensor([0.0381, 0.0677, 0.4025, 0.4917]) -Greedy action tensor([-1.9243, -0.4534, 0.6542, -0.1681]) tensor([0.0411, 0.1790, 0.5418, 0.2381]) -Greedy action tensor([-1.7808, -0.4704, 0.5882, -0.0910]) tensor([0.0480, 0.1781, 0.5135, 0.2603]) -Greedy action tensor([-1.9322, -0.6906, 0.2764, -0.2369]) tensor([0.0526, 0.1820, 0.4788, 0.2866]) -Greedy action tensor([-1.7101, -0.5205, 0.5431, -0.0414]) tensor([0.0523, 0.1719, 0.4981, 0.2776]) -Greedy action tensor([-1.8980, -0.4801, 1.2644, 0.7401]) tensor([0.0234, 0.0966, 0.5528, 0.3272]) -Greedy action tensor([-1.7591, -0.5089, 0.9552, 0.5486]) tensor([0.0337, 0.1178, 0.5093, 0.3392]) -Greedy action tensor([-1.8795, -0.6757, 0.2565, -0.2277]) tensor([0.0555, 0.1850, 0.4699, 0.2896]) -Greedy action tensor([-0.7458, -0.3271, 0.7046, 1.3867]) tensor([0.0657, 0.0999, 0.2802, 0.5542]) -Greedy action tensor([-1.3671, -0.5323, 0.3931, 0.2552]) tensor([0.0705, 0.1625, 0.4099, 0.3571]) -Greedy action tensor([-1.5648, -0.5033, 0.4906, 0.0135]) tensor([0.0604, 0.1747, 0.4720, 0.2929]) -Greedy action tensor([-0.7017, -0.2621, 0.8116, 1.4753]) tensor([0.0628, 0.0975, 0.2854, 0.5542]) -Greedy action tensor([-1.5147, -0.3930, 0.7551, 0.7770]) tensor([0.0423, 0.1299, 0.4094, 0.4184]) -Greedy action tensor([-1.8848, -0.4448, 0.6906, -0.0612]) tensor([0.0407, 0.1719, 0.5351, 0.2523]) -Greedy action tensor([-1.4647, 0.8641, 0.4652, 0.3745]) tensor([0.0409, 0.4199, 0.2818, 0.2574]) -Greedy action tensor([-1.9531, -0.5398, 0.6840, -0.1540]) tensor([0.0398, 0.1636, 0.5561, 0.2406]) -Greedy action tensor([-0.7621, 0.0962, 0.3481, 0.8722]) tensor([0.0868, 0.2048, 0.2635, 0.4449]) -Greedy action tensor([-1.4306, -0.5002, 0.6880, 0.7125]) tensor([0.0491, 0.1244, 0.4082, 0.4183]) -Greedy action tensor([-1.4047, -0.6110, 1.3748, 1.2645]) tensor([0.0296, 0.0655, 0.4774, 0.4275]) -Greedy action tensor([-1.5145, -0.3755, 0.8969, 0.9217]) tensor([0.0374, 0.1170, 0.4175, 0.4280]) -Greedy action tensor([-1.8737, -0.4525, 0.7234, 0.0750]) tensor([0.0391, 0.1619, 0.5247, 0.2743]) -Greedy action tensor([-1.0236, -0.5801, 0.2309, 0.2962]) tensor([0.1020, 0.1589, 0.3575, 0.3816]) -Greedy action tensor([-0.7911, -0.5106, 0.3441, 0.1819]) tensor([0.1237, 0.1638, 0.3850, 0.3274]) -Greedy action tensor([-1.4653, -0.6524, 1.4320, 1.2634]) tensor([0.0273, 0.0614, 0.4940, 0.4173]) -Greedy action tensor([-1.8787, -0.4211, 0.9021, 0.2801]) tensor([0.0332, 0.1428, 0.5362, 0.2878]) -Greedy action tensor([-1.1639, -0.5398, 0.3939, 0.3116]) tensor([0.0834, 0.1557, 0.3961, 0.3648]) -Greedy action tensor([-1.8049, -0.6711, 0.1504, -0.2784]) tensor([0.0634, 0.1970, 0.4479, 0.2917]) -Greedy action tensor([-1.1844, -0.4917, 0.5409, 0.9216]) tensor([0.0594, 0.1188, 0.3336, 0.4882]) -Greedy action tensor([-1.3012, -0.5678, 0.3155, 0.2293]) tensor([0.0785, 0.1634, 0.3953, 0.3627]) -Greedy action tensor([-1.3637, -0.3574, 0.9352, 1.0591]) tensor([0.0400, 0.1095, 0.3989, 0.4515]) -Greedy action tensor([-0.2631, -0.2879, 0.1926, 0.2241]) tensor([0.1930, 0.1883, 0.3045, 0.3142]) -Greedy action tensor([-2.0097, -0.7386, 0.8538, 0.0300]) tensor([0.0336, 0.1197, 0.5885, 0.2582]) -Greedy action tensor([-0.4412, -0.4685, 0.1870, 0.1352]) tensor([0.1777, 0.1729, 0.3331, 0.3163]) -Greedy action tensor([-1.8087, -0.4662, 0.5993, -0.1056]) tensor([0.0467, 0.1786, 0.5185, 0.2562]) -Greedy action tensor([-1.4944, 0.0235, 0.5830, -0.4183]) tensor([0.0607, 0.2769, 0.4845, 0.1780]) -Greedy action tensor([-1.8681, -0.4625, 0.6257, -0.1345]) tensor([0.0438, 0.1785, 0.5299, 0.2478]) -Greedy action tensor([-1.6616, -0.3812, 0.5070, -0.0130]) tensor([0.0539, 0.1940, 0.4716, 0.2804]) -Greedy action tensor([-1.1437, 0.1364, 0.4080, -0.6450]) tensor([0.0912, 0.3281, 0.4305, 0.1502]) -Greedy action tensor([ 1.0850, -0.4821, -0.2552, 0.1346]) tensor([0.5385, 0.1124, 0.1410, 0.2082]) -Greedy action tensor([ 0.9524, -0.4376, -0.3724, 0.6313]) tensor([0.4464, 0.1112, 0.1187, 0.3238]) -Greedy action tensor([ 0.7313, -0.2419, 0.0200, -0.2027]) tensor([0.4421, 0.1671, 0.2171, 0.1737]) -Greedy action tensor([ 1.3915, -0.8295, -0.1260, 0.0915]) tensor([0.6249, 0.0678, 0.1370, 0.1703]) -Greedy action tensor([ 0.8224, -0.0813, -0.0545, -0.0867]) tensor([0.4496, 0.1821, 0.1871, 0.1812]) -Greedy action tensor([ 0.6319, -0.2676, -0.6519, 0.3001]) tensor([0.4164, 0.1694, 0.1153, 0.2988]) -Greedy action tensor([ 1.0598, -0.4844, -0.3564, 0.2425]) tensor([0.5269, 0.1125, 0.1278, 0.2327]) -Greedy action tensor([ 0.9608, -0.4327, 0.0240, -0.0748]) tensor([0.5012, 0.1244, 0.1964, 0.1779]) -Greedy action tensor([ 1.0999, -0.6943, -0.3605, 0.2900]) tensor([0.5425, 0.0902, 0.1259, 0.2414]) -Greedy action tensor([ 1.0261, -0.5695, 0.0376, 0.0484]) tensor([0.5125, 0.1039, 0.1907, 0.1928]) -Greedy action tensor([ 1.0716, -0.3675, -0.2304, 0.0950]) tensor([0.5303, 0.1258, 0.1442, 0.1997]) -Greedy action tensor([ 1.1343, -0.5079, -0.2775, 0.2934]) tensor([0.5352, 0.1036, 0.1304, 0.2308]) -Greedy action tensor([ 1.7238, -0.3619, -0.2402, 0.0165]) tensor([0.6916, 0.0859, 0.0970, 0.1254]) -Greedy action tensor([ 1.1470, -0.6788, -0.5292, 0.5453]) tensor([0.5274, 0.0850, 0.0987, 0.2890]) -Greedy action tensor([ 0.7747, -0.5888, -0.3965, 0.4915]) tensor([0.4312, 0.1103, 0.1337, 0.3248]) -Greedy action tensor([ 0.4552, 0.1420, -0.0800, -0.3452]) tensor([0.3616, 0.2643, 0.2117, 0.1624]) -Greedy action tensor([ 1.6612, -0.8717, -0.4136, 0.0639]) tensor([0.7105, 0.0564, 0.0892, 0.1438]) -Greedy action tensor([ 0.5234, -0.1153, -0.4935, 0.3971]) tensor([0.3609, 0.1905, 0.1305, 0.3181]) -Greedy action tensor([ 0.9112, -0.4417, -0.2367, -0.1218]) tensor([0.5177, 0.1338, 0.1643, 0.1843]) -Greedy action tensor([ 0.9827, -0.4785, -0.6042, 0.0695]) tensor([0.5442, 0.1262, 0.1113, 0.2183]) -Greedy action tensor([ 1.4832, -0.6779, -0.3974, -0.0356]) tensor([0.6726, 0.0775, 0.1026, 0.1473]) -Greedy action tensor([ 0.3287, -0.1461, 0.3278, -0.2606]) tensor([0.3149, 0.1959, 0.3146, 0.1747]) -Greedy action tensor([ 1.3153, -0.4296, -0.0381, 0.1637]) tensor([0.5717, 0.0999, 0.1477, 0.1807]) -Greedy action tensor([ 1.0993, -0.6033, -0.2493, 0.0722]) tensor([0.5556, 0.1012, 0.1442, 0.1989]) -Greedy action tensor([ 1.1500, -0.3529, -0.1778, -0.1400]) tensor([0.5673, 0.1262, 0.1504, 0.1561]) -Greedy action tensor([ 1.4383, -0.2035, -0.4014, 0.2734]) tensor([0.6008, 0.1163, 0.0954, 0.1874]) -Greedy action tensor([ 0.8916, -0.5742, -0.4849, 0.4630]) tensor([0.4684, 0.1082, 0.1183, 0.3052]) -Greedy action tensor([ 1.3199, -0.5800, -0.5838, 0.2778]) tensor([0.6056, 0.0906, 0.0902, 0.2136]) -Greedy action tensor([ 1.3323, -0.7916, -0.2780, 0.1463]) tensor([0.6155, 0.0736, 0.1230, 0.1880]) -Greedy action tensor([ 1.3424, -0.8513, -0.5110, 0.6805]) tensor([0.5605, 0.0625, 0.0878, 0.2892]) -Greedy action tensor([ 0.5809, -0.2277, -0.3908, 0.1941]) tensor([0.3995, 0.1780, 0.1512, 0.2713]) -Greedy action tensor([ 0.8971, -0.4649, -0.2370, 0.6449]) tensor([0.4246, 0.1088, 0.1366, 0.3300]) -Greedy action tensor([ 0.8526, -0.1818, -0.0153, -0.0414]) tensor([0.4578, 0.1627, 0.1922, 0.1872]) -Greedy action tensor([ 0.5857, -0.2543, -0.6502, 0.2805]) tensor([0.4066, 0.1755, 0.1182, 0.2997]) -Greedy action tensor([ 0.9534, -0.6205, -0.2365, 0.2939]) tensor([0.4929, 0.1022, 0.1500, 0.2549]) -Greedy action tensor([ 0.9719, -0.5321, -0.6433, 0.5949]) tensor([0.4746, 0.1055, 0.0944, 0.3255]) -Greedy action tensor([ 1.0369, -0.4905, -0.3502, 0.0310]) tensor([0.5457, 0.1185, 0.1363, 0.1996]) -Greedy action tensor([ 0.9208, -0.3210, -0.6831, -0.0903]) tensor([0.5394, 0.1558, 0.1085, 0.1963]) -Greedy action tensor([ 1.0910, -0.6351, -0.2794, 0.2870]) tensor([0.5320, 0.0947, 0.1351, 0.2381]) -Greedy action tensor([ 1.3089, -0.4141, -0.4007, 0.2548]) tensor([0.5855, 0.1045, 0.1059, 0.2040]) -Greedy action tensor([ 0.8928, -0.3842, -0.6011, 0.2696]) tensor([0.4903, 0.1367, 0.1101, 0.2629]) -Greedy action tensor([ 0.7963, -0.2188, -0.2564, -0.2564]) tensor([0.4854, 0.1759, 0.1694, 0.1694]) -Greedy action tensor([ 0.8447, -0.4624, -0.2831, 0.3691]) tensor([0.4513, 0.1221, 0.1461, 0.2805]) -Greedy action tensor([ 0.9506, -0.2772, -0.2074, 0.2364]) tensor([0.4770, 0.1397, 0.1498, 0.2335]) -Greedy action tensor([ 1.3777, -0.7477, -0.2591, 0.0201]) tensor([0.6364, 0.0760, 0.1238, 0.1637]) -Greedy action tensor([ 0.8918, -0.0261, -0.2689, -0.2516]) tensor([0.4923, 0.1966, 0.1542, 0.1569]) -Greedy action tensor([ 1.3559, -0.6367, -0.4594, 0.1536]) tensor([0.6251, 0.0852, 0.1018, 0.1879]) -Greedy action tensor([ 0.9263, -0.5991, -0.5528, 0.6572]) tensor([0.4526, 0.0985, 0.1031, 0.3458]) -Greedy action tensor([ 1.5930, -0.3984, -0.5558, -0.0312]) tensor([0.6896, 0.0941, 0.0804, 0.1359]) -Greedy action tensor([ 1.2442, -0.4360, -0.1838, 0.0059]) tensor([0.5827, 0.1086, 0.1397, 0.1689]) -Greedy action tensor([ 1.2871, -0.5349, -0.4446, 0.0996]) tensor([0.6084, 0.0984, 0.1077, 0.1855]) -Greedy action tensor([ 1.2105, -0.4417, -0.3777, -0.0718]) tensor([0.5976, 0.1145, 0.1221, 0.1658]) -Greedy action tensor([ 1.2259, -0.4204, -0.0662, 0.0070]) tensor([0.5672, 0.1093, 0.1558, 0.1676]) -Greedy action tensor([ 1.2969, -0.5796, -0.2762, 0.3148]) tensor([0.5764, 0.0883, 0.1195, 0.2159]) -Greedy action tensor([ 1.3694, -0.4931, -0.1403, -0.0310]) tensor([0.6162, 0.0957, 0.1362, 0.1519]) -Greedy action tensor([ 1.0337, -0.2509, -0.2538, 0.3876]) tensor([0.4815, 0.1333, 0.1329, 0.2524]) -Greedy action tensor([ 1.0335, -0.5111, -0.1463, 0.2084]) tensor([0.5105, 0.1089, 0.1569, 0.2237]) -Greedy action tensor([ 1.2965, -0.3642, -0.1076, -0.0620]) tensor([0.5908, 0.1123, 0.1451, 0.1519]) -Greedy action tensor([ 0.8837, -0.2224, -0.0298, -0.0393]) tensor([0.4696, 0.1554, 0.1884, 0.1866]) -Greedy action tensor([ 0.7315, -0.4538, -0.1842, 0.0023]) tensor([0.4570, 0.1397, 0.1829, 0.2204]) -Greedy action tensor([ 0.7101, -0.1706, -0.0422, -0.1250]) tensor([0.4311, 0.1787, 0.2032, 0.1870]) -Greedy action tensor([ 1.2362, -0.5985, -0.5686, 0.6837]) tensor([0.5264, 0.0840, 0.0866, 0.3029]) -Greedy action tensor([ 0.9157, -0.5725, -0.5999, 0.1344]) tensor([0.5254, 0.1186, 0.1154, 0.2405]) -Greedy action tensor([ 0.6566, -0.2559, 0.2090, -0.1938]) tensor([0.4052, 0.1627, 0.2590, 0.1731]) -Greedy action tensor([ 0.8914, -0.3636, -0.1808, -0.2080]) tensor([0.5101, 0.1454, 0.1746, 0.1699]) -Greedy action tensor([ 1.3251, -0.4946, -0.4268, -0.2093]) tensor([0.6447, 0.1045, 0.1118, 0.1390]) -Greedy action tensor([ 1.1130, -0.3768, -0.0972, 0.3538]) tensor([0.5021, 0.1132, 0.1497, 0.2350]) -Greedy action tensor([ 1.2700, -0.6281, -0.3699, 0.6135]) tensor([0.5369, 0.0805, 0.1042, 0.2785]) -Greedy action tensor([ 1.6062, -0.3596, -0.4138, 0.2440]) tensor([0.6541, 0.0916, 0.0868, 0.1675]) -Greedy action tensor([ 0.7452, 0.2377, -0.2759, -0.4003]) tensor([0.4386, 0.2640, 0.1580, 0.1395]) -Greedy action tensor([ 1.4560, -0.5710, -0.0459, 0.1694]) tensor([0.6132, 0.0808, 0.1366, 0.1694]) -Greedy action tensor([ 0.5499, -0.1451, -0.6218, 0.1713]) tensor([0.4010, 0.2001, 0.1242, 0.2746]) -Greedy action tensor([ 0.8815, -0.3403, -0.6418, 0.6676]) tensor([0.4310, 0.1270, 0.0940, 0.3480]) -Greedy action tensor([ 0.6729, -0.0713, 0.1345, -0.2443]) tensor([0.4068, 0.1933, 0.2374, 0.1626]) -Greedy action tensor([ 1.1188, -0.3058, -0.4247, 0.1332]) tensor([0.5472, 0.1317, 0.1169, 0.2042]) -Greedy action tensor([ 0.9971, -0.5332, -0.4585, 0.0913]) tensor([0.5394, 0.1168, 0.1258, 0.2180]) -Greedy action tensor([ 0.8446, -0.3238, -0.2211, -0.0884]) tensor([0.4881, 0.1517, 0.1681, 0.1920]) -Greedy action tensor([ 0.9719, -0.7698, -0.4362, 0.5061]) tensor([0.4884, 0.0856, 0.1195, 0.3065]) -Greedy action tensor([ 1.3521, -0.0407, -0.2659, 0.0162]) tensor([0.5849, 0.1453, 0.1160, 0.1538]) -Greedy action tensor([ 0.8147, -0.4007, -0.1398, -0.1036]) tensor([0.4806, 0.1425, 0.1850, 0.1919]) -Greedy action tensor([ 1.2676, -0.4780, -0.4425, 0.0723]) tensor([0.6031, 0.1053, 0.1091, 0.1825]) -Greedy action tensor([ 1.1173, -0.5836, 1.3331, 0.9070]) tensor([0.3092, 0.0564, 0.3837, 0.2506]) -Greedy action tensor([ 0.2861, 0.1732, 0.8802, -1.2463]) tensor([0.2551, 0.2278, 0.4620, 0.0551]) -Greedy action tensor([ 1.2802, -0.4636, -1.0562, 1.5293]) tensor([0.3915, 0.0685, 0.0378, 0.5022]) -Greedy action tensor([1.4012, 0.6122, 0.2628, 1.2001]) tensor([0.3857, 0.1752, 0.1236, 0.3155]) -Greedy action tensor([ 1.5148, -0.0898, 1.0059, 0.7386]) tensor([0.4420, 0.0888, 0.2657, 0.2034]) -Greedy action tensor([ 1.2958, -0.6508, 1.0743, 0.6760]) tensor([0.4029, 0.0575, 0.3228, 0.2168]) -Greedy action tensor([ 1.9270, -0.2236, 1.5128, 0.8989]) tensor([0.4684, 0.0545, 0.3095, 0.1675]) -Greedy action tensor([ 0.8249, 0.4918, -0.5885, 0.8163]) tensor([0.3388, 0.2428, 0.0824, 0.3359]) -Greedy action tensor([ 0.5964, -0.8933, 1.2326, 0.4958]) tensor([0.2488, 0.0561, 0.4701, 0.2250]) -Greedy action tensor([ 0.5764, -1.0601, 0.0217, 1.1418]) tensor([0.2834, 0.0552, 0.1627, 0.4988]) -Greedy action tensor([ 1.1161, -0.6392, 0.9411, 0.6772]) tensor([0.3764, 0.0651, 0.3159, 0.2427]) -Greedy action tensor([ 1.4747, -1.1017, 1.6724, 1.2745]) tensor([0.3212, 0.0244, 0.3914, 0.2629]) -Greedy action tensor([0.7771, 0.2363, 0.8939, 0.2256]) tensor([0.3047, 0.1774, 0.3424, 0.1755]) -Greedy action tensor([ 1.1745, -0.4264, 1.1880, 0.2865]) tensor([0.3807, 0.0768, 0.3859, 0.1566]) -Greedy action tensor([ 1.7256, -1.0788, 0.2361, 1.0151]) tensor([0.5626, 0.0341, 0.1269, 0.2765]) -Greedy action tensor([ 0.9451, -0.0375, -0.3422, 0.6084]) tensor([0.4229, 0.1583, 0.1167, 0.3020]) -Greedy action tensor([ 0.4553, -0.2548, 0.1288, 0.8895]) tensor([0.2662, 0.1309, 0.1920, 0.4109]) -Greedy action tensor([0.8327, 0.4404, 0.0163, 1.6221]) tensor([0.2315, 0.1564, 0.1023, 0.5098]) -Greedy action tensor([ 0.4231, 0.2935, -0.8649, 0.2450]) tensor([0.3343, 0.2937, 0.0922, 0.2798]) -Greedy action tensor([-0.2734, 0.2345, -0.4358, 0.1473]) tensor([0.1986, 0.3300, 0.1688, 0.3025]) -Greedy action tensor([-0.0070, -0.3477, 0.0752, -0.3672]) tensor([0.2862, 0.2035, 0.3107, 0.1996]) -Greedy action tensor([0.8975, 0.5281, 0.8495, 0.9497]) tensor([0.2704, 0.1869, 0.2577, 0.2849]) -Greedy action tensor([ 0.6663, -0.2387, -0.2852, -0.1356]) tensor([0.4466, 0.1807, 0.1725, 0.2003]) -Greedy action tensor([ 0.7129, -2.2030, -0.8569, 0.1912]) tensor([0.5389, 0.0292, 0.1121, 0.3198]) -Greedy action tensor([1.2398, 0.4328, 0.6713, 0.8860]) tensor([0.3684, 0.1644, 0.2086, 0.2586]) -Greedy action tensor([ 0.8843, 0.0189, -1.1342, 1.1541]) tensor([0.3492, 0.1470, 0.0464, 0.4574]) -Greedy action tensor([ 0.5322, 0.2106, -0.4347, 0.9185]) tensor([0.2796, 0.2027, 0.1063, 0.4114]) -Greedy action tensor([ 1.3996, -0.4489, 1.2636, 0.2077]) tensor([0.4284, 0.0675, 0.3740, 0.1301]) -Greedy action tensor([ 1.1433, -0.6898, 1.2375, 1.0806]) tensor([0.3127, 0.0500, 0.3436, 0.2937]) -Greedy action tensor([ 1.1144, -0.1848, 1.2281, 0.2964]) tensor([0.3528, 0.0962, 0.3953, 0.1557]) -Greedy action tensor([ 0.3392, -0.1843, 1.2874, 0.0898]) tensor([0.2019, 0.1196, 0.5211, 0.1573]) -Greedy action tensor([ 0.7577, -0.6749, -0.5572, 2.1562]) tensor([0.1800, 0.0430, 0.0483, 0.7287]) -Greedy action tensor([ 1.1762, -0.6000, 1.2910, 0.4306]) tensor([0.3616, 0.0612, 0.4056, 0.1716]) -Greedy action tensor([-0.1487, 1.3854, 0.6809, -0.2151]) tensor([0.1128, 0.5231, 0.2586, 0.1055]) -Greedy action tensor([ 0.3246, -0.7379, -0.1795, 0.4559]) tensor([0.3236, 0.1118, 0.1955, 0.3690]) -Greedy action tensor([ 0.9332, -0.2722, -0.5148, 0.3308]) tensor([0.4803, 0.1439, 0.1129, 0.2629]) -Greedy action tensor([ 0.6318, -0.1357, -0.4699, 0.9705]) tensor([0.3125, 0.1451, 0.1039, 0.4385]) -Greedy action tensor([ 1.1047, -0.6178, 0.8164, 0.8973]) tensor([0.3648, 0.0652, 0.2735, 0.2965]) -Greedy action tensor([ 1.3977, -1.2374, 0.7465, 1.4771]) tensor([0.3737, 0.0268, 0.1949, 0.4046]) -Greedy action tensor([ 0.7021, -0.8240, 0.4195, 0.6613]) tensor([0.3412, 0.0742, 0.2572, 0.3275]) -Greedy action tensor([ 1.1774, -0.8296, 0.5659, 0.9975]) tensor([0.3980, 0.0535, 0.2160, 0.3325]) -Greedy action tensor([-0.5954, -1.4801, 0.6447, 1.2136]) tensor([0.0911, 0.0376, 0.3150, 0.5563]) -Greedy action tensor([ 1.6283, -0.4917, 0.2701, 1.7582]) tensor([0.3975, 0.0477, 0.1022, 0.4526]) -Greedy action tensor([ 1.1466, -0.9953, 1.5861, 0.9117]) tensor([0.2890, 0.0339, 0.4485, 0.2285]) -Greedy action tensor([ 1.4243, -0.8141, 0.0077, 2.1996]) tensor([0.2840, 0.0303, 0.0689, 0.6168]) -Greedy action tensor([ 1.2995, -1.0688, 0.8032, -0.2246]) tensor([0.5208, 0.0488, 0.3170, 0.1134]) -Greedy action tensor([ 1.0456, -0.1222, 1.5840, 0.7783]) tensor([0.2639, 0.0821, 0.4521, 0.2020]) -Greedy action tensor([-0.1055, -0.3249, 1.3944, 0.2207]) tensor([0.1304, 0.1047, 0.5843, 0.1807]) -Greedy action tensor([ 0.1591, 0.8258, 0.4573, -1.2276]) tensor([0.2200, 0.4286, 0.2964, 0.0550]) -Greedy action tensor([ 0.0725, 0.8216, -0.5356, 1.7540]) tensor([0.1107, 0.2342, 0.0603, 0.5949]) -Greedy action tensor([ 0.3794, 0.3079, 0.0536, -0.1078]) tensor([0.3061, 0.2849, 0.2210, 0.1880]) -Greedy action tensor([ 0.3995, -0.6916, 0.3612, 0.2783]) tensor([0.3141, 0.1055, 0.3023, 0.2782]) -Greedy action tensor([ 0.0800, 0.3852, 0.1748, -0.3001]) tensor([0.2415, 0.3277, 0.2655, 0.1652]) -Greedy action tensor([0.8360, 0.0999, 0.1723, 1.0301]) tensor([0.3117, 0.1493, 0.1605, 0.3785]) -Greedy action tensor([-0.0054, -1.3860, 0.2857, -0.0470]) tensor([0.2818, 0.0709, 0.3770, 0.2703]) -Greedy action tensor([ 2.2023, -0.6180, 1.1809, 0.9643]) tensor([0.5849, 0.0349, 0.2106, 0.1696]) -Greedy action tensor([ 2.2727, -0.1340, 1.1825, 1.3560]) tensor([0.5476, 0.0493, 0.1841, 0.2190]) -Greedy action tensor([ 0.5906, -0.8763, -0.2271, 1.1527]) tensor([0.2918, 0.0673, 0.1288, 0.5120]) -Greedy action tensor([0.8998, 0.1348, 1.6247, 1.0431]) tensor([0.2135, 0.0993, 0.4408, 0.2464]) -Greedy action tensor([-0.3501, -1.6079, -0.2893, 1.1964]) tensor([0.1420, 0.0404, 0.1509, 0.6667]) -Greedy action tensor([ 1.0741, 0.6850, -0.4180, -0.0218]) tensor([0.4471, 0.3030, 0.1005, 0.1494]) -Greedy action tensor([1.1409, 0.3485, 0.3426, 0.8883]) tensor([0.3732, 0.1690, 0.1680, 0.2899]) -Greedy action tensor([ 1.5636, 0.4906, -0.0112, 0.3150]) tensor([0.5447, 0.1863, 0.1128, 0.1563]) -Greedy action tensor([ 0.4942, -1.0553, 0.5904, 0.5069]) tensor([0.3007, 0.0638, 0.3310, 0.3045]) -Greedy action tensor([ 1.7657, -1.0911, 0.6372, 1.5586]) tensor([0.4558, 0.0262, 0.1475, 0.3706]) -Greedy action tensor([ 1.4952, 0.1587, -0.8538, 0.8225]) tensor([0.5352, 0.1406, 0.0511, 0.2731]) -Greedy action tensor([ 1.5659, -0.0970, 0.7368, 1.1553]) tensor([0.4368, 0.0828, 0.1906, 0.2897]) -Greedy action tensor([ 1.0419, 0.6274, -1.1157, 0.8901]) tensor([0.3794, 0.2507, 0.0439, 0.3260]) -Greedy action tensor([ 1.2230, -1.0137, -0.1996, 1.1531]) tensor([0.4385, 0.0468, 0.1057, 0.4089]) -Greedy action tensor([ 1.0566, -0.9032, 0.4310, 0.4486]) tensor([0.4504, 0.0635, 0.2409, 0.2452]) -Greedy action tensor([ 1.2104, -0.5635, 1.1164, 1.0714]) tensor([0.3389, 0.0575, 0.3086, 0.2950]) -Greedy action tensor([0.5667, 0.4873, 0.3085, 0.4880]) tensor([0.2762, 0.2551, 0.2134, 0.2553]) -Greedy action tensor([0.8178, 0.1397, 0.0174, 0.2755]) tensor([0.3940, 0.2000, 0.1770, 0.2291]) -Greedy action tensor([ 0.8931, -0.2954, 0.8754, 1.4231]) tensor([0.2509, 0.0764, 0.2465, 0.4262]) -Greedy action tensor([ 1.5074, -1.1739, -0.1566, 1.2174]) tensor([0.4985, 0.0341, 0.0944, 0.3730]) -Greedy action tensor([ 1.3912, -0.3952, 0.3950, 1.2367]) tensor([0.4178, 0.0700, 0.1543, 0.3580]) -Greedy action tensor([-1.0659, -0.1291, -0.0099, 0.5464]) tensor([0.0874, 0.2230, 0.2513, 0.4383]) -Greedy action tensor([0.7273, 0.2817, 0.1675, 0.5343]) tensor([0.3293, 0.2109, 0.1882, 0.2715]) -Greedy action tensor([ 1.0864, -0.5809, 0.4863, 0.2841]) tensor([0.4575, 0.0864, 0.2510, 0.2051]) -Greedy action tensor([ 0.4761, -0.9596, 0.5989, 0.0674]) tensor([0.3297, 0.0785, 0.3728, 0.2191]) -Greedy action tensor([ 0.5156, -0.0606, -0.0884, 0.9329]) tensor([0.2757, 0.1550, 0.1507, 0.4185]) -Greedy action tensor([ 1.1057, -0.1988, 0.9941, 0.7959]) tensor([0.3449, 0.0936, 0.3085, 0.2530]) -Greedy action tensor([ 0.7229, -0.4200, 0.0235, -0.3032]) tensor([0.4599, 0.1467, 0.2285, 0.1648]) -Greedy action tensor([ 0.4923, -0.1197, -0.1612, -0.1105]) tensor([0.3832, 0.2078, 0.1993, 0.2097]) -Greedy action tensor([ 0.8167, -0.3641, 0.2544, -0.5547]) tensor([0.4693, 0.1441, 0.2675, 0.1191]) -Greedy action tensor([ 0.9773, -0.6792, 0.0454, -0.3092]) tensor([0.5374, 0.1025, 0.2116, 0.1485]) -Greedy action tensor([ 0.5545, -0.1033, 0.0269, -0.2532]) tensor([0.3916, 0.2028, 0.2310, 0.1746]) -Greedy action tensor([ 0.5417, 0.0081, -0.0476, -0.3819]) tensor([0.3940, 0.2311, 0.2185, 0.1564]) -Greedy action tensor([ 0.6982, -0.0204, -0.0808, -0.3325]) tensor([0.4342, 0.2116, 0.1992, 0.1549]) -Greedy action tensor([ 1.0011, -0.5681, -0.1541, -0.9218]) tensor([0.5990, 0.1247, 0.1887, 0.0876]) -Greedy action tensor([ 0.6646, -0.3165, -0.0984, -0.3178]) tensor([0.4514, 0.1692, 0.2104, 0.1690]) -Greedy action tensor([ 0.3864, -0.1108, -0.3164, -0.3573]) tensor([0.3878, 0.2359, 0.1920, 0.1843]) -Greedy action tensor([ 0.5580, -0.2424, 0.0094, -0.2203]) tensor([0.4022, 0.1807, 0.2324, 0.1847]) -Greedy action tensor([ 0.4848, -0.3808, 0.3042, -0.4373]) tensor([0.3769, 0.1586, 0.3146, 0.1499]) -Greedy action tensor([ 0.4488, -0.0728, -0.0602, -0.2166]) tensor([0.3692, 0.2191, 0.2219, 0.1898]) -Greedy action tensor([ 0.7959, -0.5471, -0.1347, -0.4556]) tensor([0.5151, 0.1345, 0.2031, 0.1474]) -Greedy action tensor([ 0.4409, -0.0726, -0.0769, -0.2806]) tensor([0.3731, 0.2233, 0.2223, 0.1813]) -Greedy action tensor([ 0.4242, 0.0515, -0.0489, -0.1369]) tensor([0.3469, 0.2390, 0.2161, 0.1979]) -Greedy action tensor([ 0.5424, -0.0796, -0.1296, -0.2157]) tensor([0.3974, 0.2134, 0.2030, 0.1862]) -Greedy action tensor([ 0.2553, -0.2259, 0.1501, -0.1890]) tensor([0.3165, 0.1956, 0.2849, 0.2030]) -Greedy action tensor([ 0.5807, -0.3971, -0.0121, -0.6193]) tensor([0.4484, 0.1687, 0.2479, 0.1351]) -Greedy action tensor([ 0.6670, -0.3323, -0.1622, -0.4349]) tensor([0.4680, 0.1723, 0.2042, 0.1555]) -Greedy action tensor([ 0.9008, -0.2148, 0.0367, -0.4622]) tensor([0.4987, 0.1635, 0.2102, 0.1276]) -Greedy action tensor([ 0.5385, -0.2122, -0.0584, -0.2135]) tensor([0.4010, 0.1893, 0.2207, 0.1890]) -Greedy action tensor([ 0.3579, 0.2400, 0.0604, -0.1569]) tensor([0.3097, 0.2752, 0.2300, 0.1851]) -Greedy action tensor([ 0.5187, -0.2862, 0.0336, -0.2463]) tensor([0.3955, 0.1769, 0.2435, 0.1841]) -Greedy action tensor([ 0.2777, 0.1578, -0.0474, -0.2935]) tensor([0.3150, 0.2794, 0.2276, 0.1779]) -Greedy action tensor([ 0.4473, 0.2353, -0.0618, -0.2148]) tensor([0.3418, 0.2765, 0.2054, 0.1763]) -Greedy action tensor([ 0.6320, -0.0152, -0.0126, -0.1028]) tensor([0.3956, 0.2071, 0.2076, 0.1897]) -Greedy action tensor([ 0.5537, -0.3428, -0.0409, -0.2019]) tensor([0.4116, 0.1679, 0.2271, 0.1933]) -Greedy action tensor([ 0.7624, -0.5222, 0.0140, -0.5144]) tensor([0.4929, 0.1364, 0.2332, 0.1375]) -Greedy action tensor([ 0.3621, -0.0166, -0.0582, -0.1532]) tensor([0.3402, 0.2330, 0.2235, 0.2032]) -Greedy action tensor([ 0.5556, -0.2791, -0.0066, -0.2759]) tensor([0.4099, 0.1779, 0.2337, 0.1785]) -Greedy action tensor([0.1948, 0.0499, 0.1205, 0.0502]) tensor([0.2733, 0.2364, 0.2537, 0.2365]) -Greedy action tensor([ 0.5423, 0.0481, 0.0162, -0.1555]) tensor([0.3706, 0.2261, 0.2190, 0.1844]) -Greedy action tensor([ 0.4067, 0.0360, 0.1163, -0.3344]) tensor([0.3431, 0.2368, 0.2566, 0.1635]) -Greedy action tensor([ 0.3876, -0.0492, -0.0649, -0.1939]) tensor([0.3520, 0.2274, 0.2239, 0.1968]) -Greedy action tensor([ 0.3651, -0.0556, -0.0182, -0.1946]) tensor([0.3437, 0.2257, 0.2343, 0.1964]) -Greedy action tensor([ 0.7029, -0.1553, 0.0753, -0.3522]) tensor([0.4337, 0.1838, 0.2315, 0.1510]) -Greedy action tensor([ 0.4669, -0.1571, -0.2444, -0.3663]) tensor([0.4063, 0.2177, 0.1995, 0.1766]) -Greedy action tensor([ 0.5167, -0.2160, -0.0618, -0.1993]) tensor([0.3952, 0.1900, 0.2216, 0.1932]) -Greedy action tensor([ 0.1878, -0.0460, 0.0059, -0.3327]) tensor([0.3106, 0.2459, 0.2590, 0.1846]) -Greedy action tensor([ 0.5253, -0.0045, 0.0714, -0.1637]) tensor([0.3668, 0.2160, 0.2330, 0.1842]) -Greedy action tensor([ 0.4876, -0.1461, 0.0746, -0.3421]) tensor([0.3805, 0.2019, 0.2517, 0.1659]) -Greedy action tensor([ 0.6424, -0.2282, 0.2477, -0.4699]) tensor([0.4130, 0.1729, 0.2783, 0.1358]) -Greedy action tensor([ 0.5941, 0.1229, -0.0599, -0.1625]) tensor([0.3826, 0.2389, 0.1990, 0.1796]) -Greedy action tensor([ 0.7879, -0.5555, -0.0150, -0.3676]) tensor([0.4941, 0.1289, 0.2214, 0.1556]) -Greedy action tensor([ 0.7311, -0.2991, 0.0673, -0.4468]) tensor([0.4588, 0.1638, 0.2362, 0.1413]) -Greedy action tensor([ 0.6927, -0.2090, -0.0125, -0.3596]) tensor([0.4446, 0.1805, 0.2197, 0.1552]) -Greedy action tensor([ 0.7153, -0.5042, 0.0046, -0.5356]) tensor([0.4824, 0.1425, 0.2370, 0.1381]) -Greedy action tensor([ 0.9758, -0.5157, 0.0392, -0.4385]) tensor([0.5376, 0.1210, 0.2107, 0.1307]) -Greedy action tensor([ 0.6599, -0.2724, -0.1148, -0.1960]) tensor([0.4387, 0.1727, 0.2022, 0.1864]) -Greedy action tensor([ 0.9329, -0.4242, 0.1629, -0.6826]) tensor([0.5210, 0.1341, 0.2412, 0.1036]) -Greedy action tensor([ 0.4251, 0.0201, -0.0501, 0.0092]) tensor([0.3392, 0.2262, 0.2109, 0.2237]) -Greedy action tensor([ 0.7248, -0.2286, 0.0434, -0.3630]) tensor([0.4488, 0.1730, 0.2270, 0.1512]) -Greedy action tensor([ 0.3358, 0.0665, 0.1335, -0.2492]) tensor([0.3187, 0.2435, 0.2603, 0.1775]) -Greedy action tensor([ 0.4829, -0.2519, -0.0558, -0.3652]) tensor([0.4014, 0.1925, 0.2342, 0.1719]) -Greedy action tensor([ 0.6939, -0.5565, 0.3664, -0.8487]) tensor([0.4503, 0.1289, 0.3245, 0.0963]) -Greedy action tensor([ 0.6559, -0.2731, -0.0411, -0.3734]) tensor([0.4444, 0.1755, 0.2213, 0.1588]) -Greedy action tensor([ 0.8107, -0.7496, 0.0786, -0.6035]) tensor([0.5170, 0.1086, 0.2486, 0.1257]) -Greedy action tensor([ 0.4383, 0.2296, 0.0795, -0.1128]) tensor([0.3240, 0.2630, 0.2263, 0.1867]) -Greedy action tensor([ 0.3964, -0.2872, -0.1032, -0.3822]) tensor([0.3890, 0.1964, 0.2360, 0.1786]) -Greedy action tensor([ 0.6261, -0.5766, -0.0133, -0.6607]) tensor([0.4752, 0.1428, 0.2508, 0.1312]) -Greedy action tensor([ 0.5316, -0.2264, 0.0082, -0.4033]) tensor([0.4075, 0.1910, 0.2415, 0.1600]) -Greedy action tensor([ 0.6931, -0.5417, -0.1319, -0.6562]) tensor([0.5029, 0.1463, 0.2204, 0.1305]) -Greedy action tensor([ 0.7990, -0.2485, -0.0161, -0.4360]) tensor([0.4798, 0.1683, 0.2123, 0.1395]) -Greedy action tensor([ 0.5339, -0.1135, -0.0008, -0.2556]) tensor([0.3901, 0.2042, 0.2285, 0.1771]) -Greedy action tensor([ 0.8210, -0.5839, 0.0508, -0.4758]) tensor([0.5046, 0.1238, 0.2336, 0.1380]) -Greedy action tensor([ 0.5151, -0.4134, -0.0685, -0.5195]) tensor([0.4332, 0.1712, 0.2417, 0.1539]) -Greedy action tensor([ 0.4371, 0.1703, -0.0109, -0.0417]) tensor([0.3307, 0.2532, 0.2113, 0.2049]) -Greedy action tensor([ 0.3548, 0.0131, -0.0150, -0.1614]) tensor([0.3335, 0.2370, 0.2304, 0.1991]) -Greedy action tensor([ 0.2625, -0.0235, -0.0477, -0.2979]) tensor([0.3273, 0.2459, 0.2400, 0.1869]) -Greedy action tensor([ 0.4331, -0.0076, 0.0183, -0.2034]) tensor([0.3529, 0.2272, 0.2331, 0.1868]) -Greedy action tensor([ 0.4137, 0.2122, -0.1703, -0.0106]) tensor([0.3301, 0.2699, 0.1841, 0.2160]) -Greedy action tensor([ 0.8462, -0.7369, -0.0261, -0.2776]) tensor([0.5133, 0.1054, 0.2145, 0.1668]) -Greedy action tensor([ 0.6327, -0.0663, -0.0512, -0.3589]) tensor([0.4215, 0.2095, 0.2127, 0.1564]) -Greedy action tensor([ 0.7334, -0.0914, -0.0313, -0.3662]) tensor([0.4471, 0.1960, 0.2081, 0.1489]) -Greedy action tensor([ 0.4576, 0.0326, -0.0155, -0.0882]) tensor([0.3501, 0.2289, 0.2181, 0.2029]) -Greedy action tensor([ 0.3354, 0.1694, -0.0787, 0.0495]) tensor([0.3068, 0.2599, 0.2028, 0.2305]) -Greedy action tensor([ 0.4739, 0.0599, 0.0419, -0.2101]) tensor([0.3553, 0.2348, 0.2306, 0.1793]) -Greedy action tensor([ 0.1868, -0.0460, 0.0446, -0.2012]) tensor([0.2996, 0.2374, 0.2599, 0.2032]) -Greedy action tensor([ 0.7536, -0.1736, -0.0418, -0.1863]) tensor([0.4469, 0.1768, 0.2017, 0.1746]) -Greedy action tensor([ 0.4144, 0.1567, 0.0104, -0.1629]) tensor([0.3331, 0.2574, 0.2224, 0.1870]) -Greedy action tensor([-1.3204, -0.4480, 0.5368, 0.7630]) tensor([0.0561, 0.1342, 0.3593, 0.4504]) -Greedy action tensor([-1.5026, -0.5405, 0.4299, 0.0815]) tensor([0.0649, 0.1700, 0.4485, 0.3166]) -Greedy action tensor([-1.7355, -0.5307, 0.6525, 0.0465]) tensor([0.0472, 0.1576, 0.5145, 0.2807]) -Greedy action tensor([-1.6274, -0.5743, 0.6650, 0.1836]) tensor([0.0503, 0.1442, 0.4979, 0.3077]) -Greedy action tensor([-1.4628, -0.2594, 0.8467, 0.7678]) tensor([0.0422, 0.1405, 0.4247, 0.3925]) -Greedy action tensor([-1.7861, -0.3624, 0.5765, -0.0588]) tensor([0.0467, 0.1941, 0.4963, 0.2629]) -Greedy action tensor([-1.2325, -0.5249, 0.4464, 0.5196]) tensor([0.0706, 0.1433, 0.3786, 0.4074]) -Greedy action tensor([-1.4555, -0.5636, 0.4382, 0.2726]) tensor([0.0636, 0.1553, 0.4228, 0.3583]) -Greedy action tensor([-0.7327, -0.1285, 0.4736, 1.0521]) tensor([0.0824, 0.1509, 0.2755, 0.4912]) -Greedy action tensor([-1.2748, -0.4773, 0.5969, 0.7204]) tensor([0.0586, 0.1300, 0.3807, 0.4307]) -Greedy action tensor([-1.9397, -0.7864, 0.2379, -0.2557]) tensor([0.0544, 0.1724, 0.4801, 0.2931]) -Greedy action tensor([-1.6235, -0.4074, 0.9310, 0.7787]) tensor([0.0354, 0.1193, 0.4548, 0.3906]) -Greedy action tensor([-0.9620, -0.1417, 0.7263, 1.2824]) tensor([0.0552, 0.1254, 0.2986, 0.5208]) -Greedy action tensor([-1.0753, 0.1562, 0.3746, 0.4282]) tensor([0.0758, 0.2598, 0.3233, 0.3411]) -Greedy action tensor([-1.7085, -0.3551, 0.8316, 0.6214]) tensor([0.0359, 0.1391, 0.4557, 0.3693]) -Greedy action tensor([-1.9366, -0.5761, 1.0455, 0.3280]) tensor([0.0292, 0.1138, 0.5759, 0.2811]) -Greedy action tensor([-1.4720, -0.5491, 0.4004, 0.1363]) tensor([0.0666, 0.1676, 0.4331, 0.3326]) -Greedy action tensor([-1.3555, -0.5341, 0.3886, 0.3983]) tensor([0.0677, 0.1539, 0.3873, 0.3911]) -Greedy action tensor([-2.0125, -0.6163, 0.8772, 0.2826]) tensor([0.0303, 0.1226, 0.5459, 0.3012]) -Greedy action tensor([-0.5786, -0.5855, 0.3749, 0.1093]) tensor([0.1520, 0.1510, 0.3945, 0.3025]) -Greedy action tensor([-1.0015, -0.4911, 0.4083, 0.6916]) tensor([0.0820, 0.1366, 0.3357, 0.4457]) -Greedy action tensor([-1.5492, -0.4980, 0.4642, 0.0249]) tensor([0.0618, 0.1769, 0.4629, 0.2984]) -Greedy action tensor([-1.6432, -0.4799, 0.1549, -0.3283]) tensor([0.0716, 0.2292, 0.4324, 0.2667]) -Greedy action tensor([-1.7943, -0.7988, 0.2519, -0.2931]) tensor([0.0628, 0.1698, 0.4857, 0.2816]) -Greedy action tensor([-1.5458, -0.7151, 0.9723, 0.9400]) tensor([0.0361, 0.0828, 0.4477, 0.4334]) -Greedy action tensor([-1.5504, -0.4094, 0.4973, 0.1868]) tensor([0.0569, 0.1782, 0.4413, 0.3235]) -Greedy action tensor([-1.3282, -1.0769, 0.6972, 1.1209]) tensor([0.0466, 0.0600, 0.3535, 0.5399]) -Greedy action tensor([-0.9651, 0.0899, 0.2790, -0.3213]) tensor([0.1082, 0.3106, 0.3753, 0.2059]) -Greedy action tensor([-1.8876, -0.9273, 0.2469, -0.3024]) tensor([0.0590, 0.1542, 0.4988, 0.2880]) -Greedy action tensor([-0.4011, -0.3379, 1.0362, 1.5712]) tensor([0.0743, 0.0791, 0.3127, 0.5339]) -Greedy action tensor([-1.5471, -0.5807, 0.4940, 0.1645]) tensor([0.0593, 0.1558, 0.4565, 0.3284]) -Greedy action tensor([-1.7019, -0.5168, 0.5313, -0.0116]) tensor([0.0526, 0.1720, 0.4905, 0.2850]) -Greedy action tensor([-1.4788, -0.5293, 0.4273, 0.1397]) tensor([0.0651, 0.1683, 0.4381, 0.3285]) -Greedy action tensor([-1.1689, 0.0227, 0.3158, 0.4455]) tensor([0.0728, 0.2398, 0.3214, 0.3660]) -Greedy action tensor([-1.6415, -0.6398, 0.6714, -0.0495]) tensor([0.0534, 0.1453, 0.5392, 0.2622]) -Greedy action tensor([-2.0441, -0.9564, 0.6166, -0.0329]) tensor([0.0388, 0.1153, 0.5557, 0.2902]) -Greedy action tensor([-1.4058, -0.5529, 0.4624, 0.2133]) tensor([0.0672, 0.1578, 0.4355, 0.3395]) -Greedy action tensor([-1.9342, -0.5968, 0.2235, -0.2428]) tensor([0.0529, 0.2017, 0.4581, 0.2873]) -Greedy action tensor([-1.1113, -0.5863, 0.7193, 1.2295]) tensor([0.0518, 0.0875, 0.3229, 0.5378]) -Greedy action tensor([-1.9138, -0.5138, 0.9004, 0.2454]) tensor([0.0329, 0.1334, 0.5487, 0.2850]) -Greedy action tensor([-1.8150, -0.4747, 0.6269, -0.0384]) tensor([0.0450, 0.1719, 0.5172, 0.2659]) -Greedy action tensor([-1.4495, -0.0241, 0.2481, -0.4277]) tensor([0.0746, 0.3105, 0.4076, 0.2073]) -Greedy action tensor([-1.5007, -0.3361, 1.0640, 0.9863]) tensor([0.0342, 0.1096, 0.4447, 0.4115]) -Greedy action tensor([-0.6270, -0.3201, 0.1527, -0.0143]) tensor([0.1566, 0.2129, 0.3415, 0.2890]) -Greedy action tensor([-0.6784, -0.6184, 0.2284, 0.0864]) tensor([0.1495, 0.1588, 0.3703, 0.3213]) -Greedy action tensor([-0.9060, -0.1501, 0.3445, 0.3370]) tensor([0.0991, 0.2111, 0.3462, 0.3436]) -Greedy action tensor([-1.0669, -0.3732, 0.6755, 1.1724]) tensor([0.0553, 0.1106, 0.3155, 0.5186]) -Greedy action tensor([-0.9923, -0.5631, 0.4238, 0.9378]) tensor([0.0738, 0.1134, 0.3042, 0.5086]) -Greedy action tensor([-0.8903, -0.0167, 0.5960, 1.1722]) tensor([0.0638, 0.1528, 0.2819, 0.5016]) -Greedy action tensor([-1.0992, -0.4832, 0.5901, -0.4681]) tensor([0.0986, 0.1825, 0.5337, 0.1853]) -Greedy action tensor([-1.0298, -0.3180, 1.2054, 1.3564]) tensor([0.0430, 0.0876, 0.4019, 0.4675]) -Greedy action tensor([-1.1380, -0.5881, 0.3057, 0.5758]) tensor([0.0799, 0.1384, 0.3384, 0.4433]) -Greedy action tensor([-1.1878, -0.5496, 0.3234, 0.2793]) tensor([0.0850, 0.1610, 0.3853, 0.3687]) -Greedy action tensor([-1.4533, -0.5549, 0.6257, 0.5908]) tensor([0.0522, 0.1281, 0.4170, 0.4027]) -Greedy action tensor([-1.4049, -0.3231, 0.7024, 0.9154]) tensor([0.0447, 0.1320, 0.3680, 0.4553]) -Greedy action tensor([-1.5053, -0.0809, 0.6340, -0.4855]) tensor([0.0609, 0.2530, 0.5172, 0.1688]) -Greedy action tensor([-1.9073, -0.3912, 0.4939, -0.0462]) tensor([0.0434, 0.1978, 0.4794, 0.2793]) -Greedy action tensor([-1.5460, -0.4272, 0.5730, 0.3541]) tensor([0.0524, 0.1605, 0.4364, 0.3506]) -Greedy action tensor([-1.7087, -0.5086, 0.5462, -0.0186]) tensor([0.0519, 0.1723, 0.4947, 0.2812]) -Greedy action tensor([-1.4820, -0.6551, 0.6568, 0.1751]) tensor([0.0588, 0.1343, 0.4988, 0.3081]) -Greedy action tensor([-0.8536, 0.1093, 0.1120, 0.2536]) tensor([0.1079, 0.2825, 0.2833, 0.3264]) -Greedy action tensor([-1.0315, -0.5001, 1.0104, 1.4035]) tensor([0.0458, 0.0780, 0.3531, 0.5231]) -Greedy action tensor([-2.0051, -0.7898, 0.7360, 0.0580]) tensor([0.0360, 0.1215, 0.5588, 0.2837]) -Greedy action tensor([-0.9973, -0.5334, 0.3764, 0.2271]) tensor([0.1006, 0.1599, 0.3973, 0.3422]) -Greedy action tensor([-1.7219, -0.4772, 0.5839, 0.0947]) tensor([0.0484, 0.1681, 0.4857, 0.2978]) -Greedy action tensor([-2.0466, -0.6944, 0.9323, 0.3661]) tensor([0.0280, 0.1083, 0.5509, 0.3128]) -Greedy action tensor([-0.5072, -0.4948, 0.2335, 0.3495]) tensor([0.1547, 0.1566, 0.3244, 0.3643]) -Greedy action tensor([-1.7875, -0.4747, 0.5958, -0.0879]) tensor([0.0476, 0.1767, 0.5155, 0.2602]) -Greedy action tensor([-1.3161, -0.4028, 0.9394, 1.1050]) tensor([0.0412, 0.1026, 0.3927, 0.4635]) -Greedy action tensor([-1.6271, -0.5069, 0.5310, -0.0939]) tensor([0.0576, 0.1767, 0.4987, 0.2670]) -Greedy action tensor([-1.8418, -0.4068, 0.8222, 0.3462]) tensor([0.0351, 0.1475, 0.5041, 0.3132]) -Greedy action tensor([-0.3363, 0.0540, 0.8720, 1.5469]) tensor([0.0806, 0.1192, 0.2700, 0.5302]) -Greedy action tensor([-0.3865, 1.1551, -0.2353, 0.1577]) tensor([0.1168, 0.5459, 0.1359, 0.2013]) -Greedy action tensor([-1.5702, -0.4269, 0.5903, 0.5113]) tensor([0.0480, 0.1506, 0.4165, 0.3849]) -Greedy action tensor([-0.0562, 0.1187, 0.9906, 1.4638]) tensor([0.1040, 0.1239, 0.2964, 0.4757]) -Greedy action tensor([-1.0464, -0.5038, 0.3002, 0.4488]) tensor([0.0907, 0.1560, 0.3487, 0.4046]) -Greedy action tensor([-1.4608, -0.3187, 0.5371, 0.1154]) tensor([0.0612, 0.1917, 0.4512, 0.2959]) -Greedy action tensor([-0.8865, -0.6283, -0.6282, -0.5327]) tensor([0.1994, 0.2582, 0.2582, 0.2841]) -Greedy action tensor([-0.8039, 0.1645, -0.0518, -0.0971]) tensor([0.1285, 0.3384, 0.2726, 0.2605]) -Greedy action tensor([-1.8004, -0.6790, 0.2135, -0.3760]) tensor([0.0636, 0.1953, 0.4767, 0.2644]) -Greedy action tensor([-1.8076e+00, -8.2427e-01, 4.6706e-04, -6.0387e-01]) tensor([0.0763, 0.2040, 0.4654, 0.2543]) -Greedy action tensor([ 1.1818, -0.6120, -0.4601, 0.4394]) tensor([0.5447, 0.0906, 0.1055, 0.2593]) -Greedy action tensor([ 0.7954, -0.1208, -0.2212, -0.2024]) tensor([0.4694, 0.1878, 0.1698, 0.1730]) -Greedy action tensor([ 0.6228, -0.0016, -0.1710, 0.3052]) tensor([0.3682, 0.1972, 0.1665, 0.2680]) -Greedy action tensor([ 0.8646, -0.1641, -0.3988, -0.1694]) tensor([0.5011, 0.1791, 0.1416, 0.1782]) -Greedy action tensor([ 1.2950, -0.3715, -0.1070, 0.4062]) tensor([0.5417, 0.1023, 0.1333, 0.2227]) -Greedy action tensor([ 0.6488, -0.3957, -0.2916, -0.0444]) tensor([0.4460, 0.1569, 0.1741, 0.2230]) -Greedy action tensor([ 0.4958, 0.0502, -0.3190, -0.3604]) tensor([0.3987, 0.2554, 0.1765, 0.1694]) -Greedy action tensor([ 1.4277, 0.0374, -0.2305, -0.0112]) tensor([0.5964, 0.1485, 0.1136, 0.1415]) -Greedy action tensor([ 0.5319, -0.2211, -0.2993, -0.3108]) tensor([0.4279, 0.2015, 0.1864, 0.1842]) -Greedy action tensor([ 0.8545, -0.6288, -0.1217, 0.3206]) tensor([0.4566, 0.1036, 0.1720, 0.2677]) -Greedy action tensor([ 1.1160, -0.6045, -0.6218, 0.6537]) tensor([0.5039, 0.0902, 0.0886, 0.3173]) -Greedy action tensor([ 0.7421, 0.0531, -0.5172, -0.3315]) tensor([0.4700, 0.2360, 0.1334, 0.1606]) -Greedy action tensor([ 0.6963, -0.2281, -0.5805, 0.1119]) tensor([0.4478, 0.1777, 0.1249, 0.2496]) -Greedy action tensor([ 0.9427, -0.4395, -0.1340, -0.1636]) tensor([0.5202, 0.1306, 0.1772, 0.1721]) -Greedy action tensor([ 0.7162, -0.5595, -0.0353, -0.1014]) tensor([0.4561, 0.1274, 0.2151, 0.2014]) -Greedy action tensor([ 1.0664, -0.2108, -0.2589, -0.0480]) tensor([0.5340, 0.1489, 0.1419, 0.1752]) -Greedy action tensor([ 1.3964, -0.7327, -0.3389, 0.0699]) tensor([0.6407, 0.0762, 0.1130, 0.1701]) -Greedy action tensor([ 2.2535, -0.2726, -0.3087, 0.0515]) tensor([0.7888, 0.0631, 0.0608, 0.0872]) -Greedy action tensor([ 1.0805, -0.7211, -0.1853, 0.6239]) tensor([0.4807, 0.0793, 0.1356, 0.3045]) -Greedy action tensor([ 1.2504, -0.4943, -0.5726, 0.7086]) tensor([0.5214, 0.0911, 0.0842, 0.3033]) -Greedy action tensor([ 1.1737, -0.2650, -0.1985, -0.0390]) tensor([0.5592, 0.1327, 0.1418, 0.1663]) -Greedy action tensor([ 1.2210, -0.2013, -0.0503, -0.0652]) tensor([0.5562, 0.1341, 0.1560, 0.1537]) -Greedy action tensor([ 0.6730, -0.2535, -0.4571, -0.2430]) tensor([0.4719, 0.1869, 0.1524, 0.1888]) -Greedy action tensor([ 8.2820e-01, -3.4139e-04, -8.3411e-02, -9.1678e-02]) tensor([0.4470, 0.1952, 0.1796, 0.1782]) -Greedy action tensor([ 1.2431, -0.5052, 0.1363, 0.0537]) tensor([0.5528, 0.0962, 0.1828, 0.1683]) -Greedy action tensor([ 1.5892, -0.4364, -0.5256, 0.2795]) tensor([0.6568, 0.0866, 0.0793, 0.1773]) -Greedy action tensor([ 1.3128, -0.3930, -0.0745, -0.2303]) tensor([0.6079, 0.1104, 0.1518, 0.1299]) -Greedy action tensor([ 0.8760, -0.4305, -0.3070, 0.4883]) tensor([0.4433, 0.1200, 0.1358, 0.3008]) -Greedy action tensor([ 1.4932, -0.6318, -0.3397, 0.3115]) tensor([0.6304, 0.0753, 0.1008, 0.1934]) -Greedy action tensor([ 0.9468, -0.5224, -0.4478, 0.7089]) tensor([0.4413, 0.1015, 0.1094, 0.3478]) -Greedy action tensor([ 1.0353, -0.4720, -0.6215, 0.2741]) tensor([0.5321, 0.1179, 0.1015, 0.2486]) -Greedy action tensor([ 1.1219, -0.5871, -0.6520, -0.0749]) tensor([0.6050, 0.1095, 0.1026, 0.1828]) -Greedy action tensor([ 1.1424, -0.4691, 0.0356, 0.0288]) tensor([0.5380, 0.1074, 0.1779, 0.1767]) -Greedy action tensor([ 0.6940, -0.1018, 0.0892, -0.2571]) tensor([0.4195, 0.1893, 0.2291, 0.1621]) -Greedy action tensor([ 1.2144, -0.7370, -0.4502, 0.7575]) tensor([0.5090, 0.0723, 0.0963, 0.3223]) -Greedy action tensor([ 1.0476, -0.6619, -0.3666, 0.0743]) tensor([0.5550, 0.1004, 0.1349, 0.2097]) -Greedy action tensor([ 0.7624, -0.2566, -0.0796, 0.0506]) tensor([0.4381, 0.1581, 0.1888, 0.2150]) -Greedy action tensor([ 1.1687, -0.4458, -0.2275, -0.1842]) tensor([0.5865, 0.1167, 0.1452, 0.1516]) -Greedy action tensor([ 0.6051, 0.0324, -0.0995, -0.4138]) tensor([0.4134, 0.2331, 0.2043, 0.1492]) -Greedy action tensor([ 1.0711, -0.4965, -0.3475, 0.0502]) tensor([0.5522, 0.1152, 0.1337, 0.1989]) -Greedy action tensor([ 1.0352, -0.7728, -0.3909, 0.1865]) tensor([0.5458, 0.0895, 0.1311, 0.2336]) -Greedy action tensor([ 1.5158, -0.7039, -0.1585, 0.0185]) tensor([0.6580, 0.0715, 0.1233, 0.1472]) -Greedy action tensor([ 1.3749e+00, 1.7381e-04, -3.9680e-01, -3.5220e-01]) tensor([0.6247, 0.1580, 0.1062, 0.1111]) -Greedy action tensor([ 1.0604, -0.3729, -0.1709, -0.1408]) tensor([0.5461, 0.1303, 0.1594, 0.1643]) -Greedy action tensor([ 1.2036, 0.0643, -0.0785, 0.0945]) tensor([0.5189, 0.1661, 0.1440, 0.1711]) -Greedy action tensor([ 0.3901, -0.0363, -0.0142, -0.2639]) tensor([0.3521, 0.2299, 0.2350, 0.1831]) -Greedy action tensor([ 0.7794, -0.2487, 0.1590, -0.1844]) tensor([0.4392, 0.1571, 0.2362, 0.1675]) -Greedy action tensor([ 1.3756, -0.2806, -0.1372, -0.2337]) tensor([0.6206, 0.1185, 0.1367, 0.1242]) -Greedy action tensor([ 0.9051, -0.5072, -0.2244, 0.1381]) tensor([0.4923, 0.1199, 0.1591, 0.2286]) -Greedy action tensor([ 1.4174, -0.7530, -0.4155, 0.1855]) tensor([0.6386, 0.0729, 0.1021, 0.1863]) -Greedy action tensor([ 0.7331, 0.0790, -0.0912, -0.1869]) tensor([0.4243, 0.2206, 0.1861, 0.1691]) -Greedy action tensor([ 0.5844, -0.3361, -0.3066, -0.0996]) tensor([0.4323, 0.1722, 0.1773, 0.2181]) -Greedy action tensor([ 1.0854, -0.3538, -0.3117, -0.0833]) tensor([0.5570, 0.1321, 0.1378, 0.1731]) -Greedy action tensor([ 0.9252, -0.2565, 0.0257, -0.1478]) tensor([0.4865, 0.1492, 0.1979, 0.1664]) -Greedy action tensor([ 1.4690, -0.5529, -0.0592, 0.1994]) tensor([0.6134, 0.0812, 0.1331, 0.1723]) -Greedy action tensor([ 0.9174, -0.0938, -0.5952, 0.4238]) tensor([0.4557, 0.1658, 0.1004, 0.2782]) -Greedy action tensor([ 0.9674, -0.3361, -0.1500, 0.3390]) tensor([0.4690, 0.1274, 0.1534, 0.2502]) -Greedy action tensor([ 0.9719, -0.6179, -0.2974, 0.2905]) tensor([0.5023, 0.1024, 0.1412, 0.2541]) -Greedy action tensor([ 1.1394, -0.5722, -0.2119, -0.1411]) tensor([0.5823, 0.1051, 0.1508, 0.1618]) -Greedy action tensor([ 0.8072, -0.2376, -0.1231, 0.0351]) tensor([0.4529, 0.1593, 0.1786, 0.2092]) -Greedy action tensor([ 0.6130, -0.3136, -0.3294, 0.1106]) tensor([0.4183, 0.1656, 0.1630, 0.2531]) -Greedy action tensor([ 1.2378, -0.4450, -0.0987, 0.1250]) tensor([0.5627, 0.1046, 0.1478, 0.1849]) -Greedy action tensor([ 0.8876, -0.6305, -0.2487, 0.0690]) tensor([0.5048, 0.1106, 0.1620, 0.2226]) -Greedy action tensor([ 0.7501, -0.0563, -0.1138, -0.1432]) tensor([0.4391, 0.1960, 0.1851, 0.1797]) -Greedy action tensor([ 0.8510, -0.3205, -0.1447, 0.1781]) tensor([0.4567, 0.1415, 0.1687, 0.2330]) -Greedy action tensor([ 0.8357, -0.1077, -0.2876, -0.1085]) tensor([0.4754, 0.1851, 0.1546, 0.1849]) -Greedy action tensor([ 1.4960, -0.4224, -0.1160, -0.0189]) tensor([0.6385, 0.0938, 0.1274, 0.1404]) -Greedy action tensor([ 1.5755, -0.3343, -0.2876, -0.2605]) tensor([0.6836, 0.1013, 0.1061, 0.1090]) -Greedy action tensor([ 1.8403, -0.1847, -0.6122, 0.2238]) tensor([0.7059, 0.0932, 0.0608, 0.1402]) -Greedy action tensor([ 1.0888, -0.5304, -0.0538, 0.2338]) tensor([0.5148, 0.1020, 0.1642, 0.2190]) -Greedy action tensor([ 1.5705, -0.6625, -0.3243, 0.1029]) tensor([0.6720, 0.0720, 0.1010, 0.1549]) -Greedy action tensor([ 0.7756, -0.0683, -0.0780, -0.0078]) tensor([0.4324, 0.1859, 0.1841, 0.1975]) -Greedy action tensor([ 0.9293, -0.3356, -0.3014, 0.1533]) tensor([0.4915, 0.1387, 0.1436, 0.2262]) -Greedy action tensor([ 1.3416, -0.6934, -0.1737, 0.3373]) tensor([0.5825, 0.0761, 0.1280, 0.2134]) -Greedy action tensor([ 0.9506, -0.2707, -0.1345, -0.0015]) tensor([0.4954, 0.1461, 0.1674, 0.1912]) -Greedy action tensor([ 1.4343, -0.5495, -0.5665, 0.1951]) tensor([0.6400, 0.0880, 0.0866, 0.1854]) -Greedy action tensor([ 1.3326, -0.6703, 0.1184, 0.0459]) tensor([0.5855, 0.0790, 0.1739, 0.1617]) -Greedy action tensor([ 1.2788, -0.3176, -0.1692, -0.0954]) tensor([0.5915, 0.1198, 0.1390, 0.1497]) -Greedy action tensor([ 0.9741, -0.3036, -0.3384, -0.1302]) tensor([0.5321, 0.1483, 0.1432, 0.1764]) -Greedy action tensor([ 1.2762, -0.5318, -0.4983, 0.6621]) tensor([0.5334, 0.0875, 0.0904, 0.2887]) -Greedy action tensor([ 0.8186, -0.4547, -0.2434, -0.0736]) tensor([0.4913, 0.1375, 0.1699, 0.2013]) -Greedy action tensor([ 0.5468, -0.0133, 1.1630, 0.1079]) tensor([0.2458, 0.1404, 0.4552, 0.1585]) -Greedy action tensor([-0.0368, 0.2262, 0.8038, 0.6483]) tensor([0.1515, 0.1970, 0.3510, 0.3005]) -Greedy action tensor([ 1.4003, -0.2129, -0.0845, 1.0747]) tensor([0.4656, 0.0928, 0.1055, 0.3362]) -Greedy action tensor([ 0.1632, -0.0297, 1.0845, 0.0220]) tensor([0.1921, 0.1584, 0.4827, 0.1668]) -Greedy action tensor([ 0.3588, -0.1334, -1.1554, 0.8839]) tensor([0.2839, 0.1736, 0.0625, 0.4800]) -Greedy action tensor([ 0.1260, -1.1043, 0.2944, 0.4554]) tensor([0.2587, 0.0756, 0.3061, 0.3596]) -Greedy action tensor([ 1.4145, -0.1687, -0.2564, 0.9658]) tensor([0.4922, 0.1011, 0.0926, 0.3142]) -Greedy action tensor([1.0281, 0.3076, 0.8759, 1.8734]) tensor([0.2139, 0.1041, 0.1837, 0.4982]) -Greedy action tensor([-0.7409, 0.1135, -0.5336, 0.3912]) tensor([0.1302, 0.3059, 0.1601, 0.4038]) -Greedy action tensor([-0.2709, -0.8829, 0.1593, 1.0141]) tensor([0.1494, 0.0810, 0.2297, 0.5399]) -Greedy action tensor([-0.1442, 0.4583, 0.4904, 0.8004]) tensor([0.1373, 0.2508, 0.2589, 0.3530]) -Greedy action tensor([ 1.4813, -0.6386, 0.4796, 0.8031]) tensor([0.5013, 0.0602, 0.1841, 0.2544]) -Greedy action tensor([ 0.9707, 0.3193, -0.1123, 0.4373]) tensor([0.4087, 0.2131, 0.1384, 0.2398]) -Greedy action tensor([-0.0112, -0.8464, -0.8159, -0.1421]) tensor([0.3625, 0.1573, 0.1621, 0.3181]) -Greedy action tensor([ 1.1140, -0.2683, 0.2875, 0.6621]) tensor([0.4301, 0.1080, 0.1882, 0.2737]) -Greedy action tensor([ 0.8297, -0.3432, 0.9811, 0.7233]) tensor([0.2966, 0.0918, 0.3450, 0.2666]) -Greedy action tensor([ 0.1028, -0.2508, -0.0329, 0.0165]) tensor([0.2863, 0.2010, 0.2500, 0.2627]) -Greedy action tensor([0.4371, 0.1190, 0.6246, 0.9767]) tensor([0.2151, 0.1565, 0.2595, 0.3690]) -Greedy action tensor([ 1.1770, -0.5462, 0.6356, 0.2855]) tensor([0.4607, 0.0822, 0.2681, 0.1889]) -Greedy action tensor([ 1.0805, -0.0148, -0.0958, 0.3264]) tensor([0.4732, 0.1583, 0.1459, 0.2226]) -Greedy action tensor([ 1.1495, -1.3995, 0.8369, 1.3150]) tensor([0.3345, 0.0261, 0.2447, 0.3947]) -Greedy action tensor([ 1.3146, 0.8311, -0.5813, 0.5360]) tensor([0.4493, 0.2770, 0.0675, 0.2062]) -Greedy action tensor([ 1.0925, -0.4139, 0.1415, 1.8313]) tensor([0.2702, 0.0599, 0.1044, 0.5656]) -Greedy action tensor([ 0.3004, 0.2353, -0.0962, 1.3422]) tensor([0.1837, 0.1721, 0.1236, 0.5206]) -Greedy action tensor([-0.5089, -1.8039, -0.2021, 0.8843]) tensor([0.1501, 0.0411, 0.2040, 0.6047]) -Greedy action tensor([ 1.5234, -0.3070, 1.3420, 1.1529]) tensor([0.3725, 0.0597, 0.3107, 0.2571]) -Greedy action tensor([ 0.7183, 0.1423, -0.8728, 1.0849]) tensor([0.3116, 0.1752, 0.0635, 0.4497]) -Greedy action tensor([ 1.6463, 1.2002, -0.4701, 0.8943]) tensor([0.4480, 0.2868, 0.0540, 0.2112]) -Greedy action tensor([ 0.9420, -0.2317, -0.4993, 0.5288]) tensor([0.4530, 0.1401, 0.1072, 0.2997]) -Greedy action tensor([ 0.5482, -0.8909, 0.5938, 1.7431]) tensor([0.1790, 0.0424, 0.1873, 0.5912]) -Greedy action tensor([ 1.3677, -1.0732, -1.0129, 0.9792]) tensor([0.5383, 0.0469, 0.0498, 0.3650]) -Greedy action tensor([ 1.9651, -0.2817, -0.4688, 1.5136]) tensor([0.5464, 0.0578, 0.0479, 0.3479]) -Greedy action tensor([ 1.2012, -0.0442, 0.1769, 0.4807]) tensor([0.4687, 0.1349, 0.1683, 0.2280]) -Greedy action tensor([ 0.9417, -1.6948, -0.0772, 0.6522]) tensor([0.4585, 0.0328, 0.1655, 0.3432]) -Greedy action tensor([ 0.8760, 0.0514, -0.3205, 0.6567]) tensor([0.3931, 0.1723, 0.1188, 0.3157]) -Greedy action tensor([ 0.2621, -0.8462, -0.8458, 0.8276]) tensor([0.2923, 0.0965, 0.0965, 0.5146]) -Greedy action tensor([-0.3719, -0.1565, -0.5855, 0.3567]) tensor([0.1953, 0.2423, 0.1577, 0.4047]) -Greedy action tensor([0.0477, 0.2947, 0.1515, 0.2430]) tensor([0.2171, 0.2780, 0.2409, 0.2640]) -Greedy action tensor([ 0.5441, 0.0369, -1.1827, 0.4145]) tensor([0.3762, 0.2265, 0.0669, 0.3304]) -Greedy action tensor([-0.4805, 0.3986, 0.0973, -0.1686]) tensor([0.1525, 0.3674, 0.2718, 0.2083]) -Greedy action tensor([ 0.4300, -0.3951, 1.6176, -0.7410]) tensor([0.1989, 0.0872, 0.6523, 0.0617]) -Greedy action tensor([ 1.1380, 0.1186, -0.7849, 2.0580]) tensor([0.2490, 0.0898, 0.0364, 0.6248]) -Greedy action tensor([ 1.8499, -0.8197, 0.9099, 0.6875]) tensor([0.5641, 0.0391, 0.2204, 0.1764]) -Greedy action tensor([ 1.1685, -1.1799, -0.3294, 0.7581]) tensor([0.5044, 0.0482, 0.1128, 0.3346]) -Greedy action tensor([ 0.6855, -1.0099, 0.9981, 0.0107]) tensor([0.3268, 0.0600, 0.4468, 0.1664]) -Greedy action tensor([-0.0934, 1.4554, 0.1813, 0.4645]) tensor([0.1140, 0.5367, 0.1501, 0.1992]) -Greedy action tensor([ 0.6454, -0.0526, 1.6017, 1.0594]) tensor([0.1782, 0.0887, 0.4636, 0.2696]) -Greedy action tensor([ 1.4386, -0.0591, 0.8872, 1.5191]) tensor([0.3468, 0.0776, 0.1998, 0.3759]) -Greedy action tensor([-0.0743, 0.2290, -0.0846, 0.1395]) tensor([0.2182, 0.2955, 0.2160, 0.2702]) -Greedy action tensor([0.9695, 0.1082, 0.0033, 0.4190]) tensor([0.4202, 0.1776, 0.1599, 0.2423]) -Greedy action tensor([ 0.3628, 0.9009, 0.2691, -0.1932]) tensor([0.2383, 0.4081, 0.2170, 0.1367]) -Greedy action tensor([ 0.6526, 0.5330, -0.3236, 2.0271]) tensor([0.1608, 0.1427, 0.0606, 0.6359]) -Greedy action tensor([ 1.4029, -0.8763, -0.7823, 1.2891]) tensor([0.4745, 0.0486, 0.0534, 0.4235]) -Greedy action tensor([ 0.1328, -0.0755, 0.7518, 0.3006]) tensor([0.2061, 0.1673, 0.3828, 0.2438]) -Greedy action tensor([0.9406, 0.2977, 0.9586, 0.7469]) tensor([0.2969, 0.1561, 0.3023, 0.2446]) -Greedy action tensor([ 1.8584, -0.2136, 0.8100, 1.5422]) tensor([0.4534, 0.0571, 0.1589, 0.3305]) -Greedy action tensor([-0.1331, -0.0611, -0.1223, 0.8218]) tensor([0.1759, 0.1891, 0.1778, 0.4571]) -Greedy action tensor([ 1.0171, -1.4423, -0.2311, 1.6247]) tensor([0.3117, 0.0266, 0.0895, 0.5722]) -Greedy action tensor([ 1.7076, -0.2808, 1.2824, 1.0673]) tensor([0.4315, 0.0591, 0.2820, 0.2274]) -Greedy action tensor([ 1.7953, -0.0997, 1.2481, 0.7853]) tensor([0.4778, 0.0718, 0.2764, 0.1740]) -Greedy action tensor([1.4308, 0.1368, 0.5600, 1.9206]) tensor([0.3008, 0.0825, 0.1259, 0.4909]) -Greedy action tensor([ 0.5352, -0.9847, 0.9150, 0.6914]) tensor([0.2598, 0.0568, 0.3797, 0.3037]) -Greedy action tensor([1.1899, 0.3225, 0.5239, 1.0347]) tensor([0.3584, 0.1505, 0.1841, 0.3069]) -Greedy action tensor([ 0.9845, 0.0286, -0.2430, 0.6662]) tensor([0.4158, 0.1599, 0.1218, 0.3025]) -Greedy action tensor([-0.1000, 0.4404, 0.5896, -0.2713]) tensor([0.1801, 0.3092, 0.3589, 0.1517]) -Greedy action tensor([ 0.8351, -0.3171, -0.0809, 0.8929]) tensor([0.3603, 0.1138, 0.1442, 0.3817]) -Greedy action tensor([ 0.7815, -1.3957, -0.1740, 1.4298]) tensor([0.2932, 0.0332, 0.1128, 0.5607]) -Greedy action tensor([1.8037, 0.0983, 0.6116, 0.8488]) tensor([0.5347, 0.0972, 0.1623, 0.2058]) -Greedy action tensor([ 0.7204, 0.0750, -0.0025, 2.1642]) tensor([0.1601, 0.0840, 0.0777, 0.6782]) -Greedy action tensor([0.6108, 0.2374, 0.1196, 0.2778]) tensor([0.3314, 0.2282, 0.2028, 0.2376]) -Greedy action tensor([ 0.5460, -1.9488, 0.9608, -0.5122]) tensor([0.3397, 0.0280, 0.5144, 0.1179]) -Greedy action tensor([ 0.8454, -0.4856, 0.1270, 1.3884]) tensor([0.2879, 0.0761, 0.1404, 0.4956]) -Greedy action tensor([ 1.0321, -1.5323, 1.0012, 0.6813]) tensor([0.3635, 0.0280, 0.3525, 0.2560]) -Greedy action tensor([ 1.1052e+00, -1.6434e-01, 3.6353e-01, 3.5872e-04]) tensor([0.4788, 0.1345, 0.2281, 0.1586]) -Greedy action tensor([ 0.5395, -0.1928, 1.0992, 0.3643]) tensor([0.2457, 0.1181, 0.4300, 0.2062]) -Greedy action tensor([0.7219, 0.7761, 2.0809, 0.2651]) tensor([0.1519, 0.1604, 0.5914, 0.0962]) -Greedy action tensor([ 1.2610, 0.1447, -0.6646, 1.0825]) tensor([0.4329, 0.1418, 0.0631, 0.3622]) -Greedy action tensor([0.6177, 0.4225, 0.8647, 0.3525]) tensor([0.2584, 0.2126, 0.3308, 0.1982]) -Greedy action tensor([ 0.6659, -0.1409, 0.5945, 0.1741]) tensor([0.3346, 0.1493, 0.3115, 0.2046]) -Greedy action tensor([ 0.8108, -0.4812, 0.4688, 0.8191]) tensor([0.3341, 0.0918, 0.2373, 0.3369]) -Greedy action tensor([ 1.0189, -1.5958, 0.5401, 0.6917]) tensor([0.4143, 0.0303, 0.2567, 0.2987]) -Greedy action tensor([0.1036, 1.4312, 0.4797, 0.2959]) tensor([0.1344, 0.5069, 0.1958, 0.1629]) -Greedy action tensor([ 0.5383, 0.1015, 0.1469, -0.2042]) tensor([0.3574, 0.2309, 0.2416, 0.1701]) -Greedy action tensor([ 0.8634, -0.2694, 0.0458, -0.5859]) tensor([0.5004, 0.1612, 0.2209, 0.1175]) -Greedy action tensor([ 0.2109, -0.0335, 0.0756, -0.3494]) tensor([0.3098, 0.2426, 0.2706, 0.1769]) -Greedy action tensor([ 0.4814, -0.3398, 0.0051, -0.3760]) tensor([0.4024, 0.1770, 0.2499, 0.1707]) -Greedy action tensor([ 0.5570, -0.3372, 0.0297, -0.5068]) tensor([0.4266, 0.1744, 0.2518, 0.1472]) -Greedy action tensor([ 0.7705, -0.2585, -0.0812, -0.2661]) tensor([0.4676, 0.1671, 0.1995, 0.1658]) -Greedy action tensor([ 0.6507, -0.2709, 0.0962, -0.3240]) tensor([0.4256, 0.1693, 0.2445, 0.1606]) -Greedy action tensor([ 0.4091, -0.0733, -0.0817, -0.2684]) tensor([0.3653, 0.2255, 0.2236, 0.1855]) -Greedy action tensor([ 0.3869, 0.0522, 0.1611, -0.2556]) tensor([0.3290, 0.2354, 0.2625, 0.1731]) -Greedy action tensor([ 0.7525, -0.2760, -0.0258, -0.4903]) tensor([0.4750, 0.1698, 0.2181, 0.1371]) -Greedy action tensor([ 1.1895, -0.9707, -0.0399, -0.6970]) tensor([0.6413, 0.0739, 0.1876, 0.0972]) -Greedy action tensor([ 0.1894, 0.1094, -0.0496, -0.3192]) tensor([0.3019, 0.2787, 0.2378, 0.1816]) -Greedy action tensor([ 0.3688, -0.0672, -0.0057, -0.4002]) tensor([0.3574, 0.2311, 0.2458, 0.1657]) -Greedy action tensor([ 0.4693, -0.2952, -0.0553, -0.3551]) tensor([0.4007, 0.1865, 0.2371, 0.1757]) -Greedy action tensor([ 0.3170, -0.3125, 0.3076, -0.3495]) tensor([0.3293, 0.1755, 0.3262, 0.1691]) -Greedy action tensor([ 0.8921, -0.2332, -0.0535, -0.4920]) tensor([0.5093, 0.1653, 0.1978, 0.1276]) -Greedy action tensor([ 0.4054, -0.0292, 0.0749, -0.1866]) tensor([0.3426, 0.2218, 0.2461, 0.1895]) -Greedy action tensor([ 0.3894, -0.1011, -0.0196, -0.5115]) tensor([0.3727, 0.2282, 0.2476, 0.1514]) -Greedy action tensor([ 0.5268, -0.2567, 0.1559, -0.3273]) tensor([0.3887, 0.1776, 0.2683, 0.1655]) -Greedy action tensor([ 0.4404, -0.2472, 0.0465, -0.3480]) tensor([0.3800, 0.1910, 0.2563, 0.1727]) -Greedy action tensor([ 0.4947, -0.2247, 0.0424, -0.2811]) tensor([0.3871, 0.1885, 0.2462, 0.1782]) -Greedy action tensor([ 0.6064, -0.1141, 0.0174, -0.2833]) tensor([0.4078, 0.1984, 0.2263, 0.1675]) -Greedy action tensor([ 0.5495, 0.1094, 0.0499, -0.1373]) tensor([0.3631, 0.2338, 0.2203, 0.1827]) -Greedy action tensor([ 0.3351, -0.1384, 0.0674, -0.1833]) tensor([0.3352, 0.2088, 0.2565, 0.1996]) -Greedy action tensor([ 0.9500, -0.3828, -0.0429, -0.3780]) tensor([0.5265, 0.1389, 0.1951, 0.1395]) -Greedy action tensor([ 0.8580, -0.2769, 0.2976, -0.3875]) tensor([0.4587, 0.1474, 0.2619, 0.1320]) -Greedy action tensor([ 0.9143, -0.3325, -0.0389, -0.8089]) tensor([0.5401, 0.1552, 0.2082, 0.0964]) -Greedy action tensor([ 0.6617, -0.3928, 0.1327, -0.4606]) tensor([0.4419, 0.1539, 0.2604, 0.1438]) -Greedy action tensor([ 0.5162, -0.3178, 0.1874, -0.4140]) tensor([0.3924, 0.1704, 0.2824, 0.1548]) -Greedy action tensor([ 0.5764, -0.2969, -0.0726, -0.3924]) tensor([0.4311, 0.1800, 0.2253, 0.1636]) -Greedy action tensor([ 0.3271, 0.0473, 0.0955, -0.1784]) tensor([0.3172, 0.2398, 0.2516, 0.1913]) -Greedy action tensor([ 0.4891, -0.2299, 0.0646, -0.2637]) tensor([0.3828, 0.1865, 0.2504, 0.1803]) -Greedy action tensor([ 0.2950, -0.1058, 0.0544, -0.4841]) tensor([0.3431, 0.2298, 0.2697, 0.1574]) -Greedy action tensor([ 0.5666, -0.1259, -0.0639, -0.1900]) tensor([0.3997, 0.2000, 0.2128, 0.1876]) -Greedy action tensor([ 0.6829, -0.2036, -0.0417, -0.0384]) tensor([0.4197, 0.1730, 0.2033, 0.2040]) -Greedy action tensor([ 0.6192, -0.1621, -0.0008, -0.3681]) tensor([0.4222, 0.1933, 0.2271, 0.1573]) -Greedy action tensor([ 0.3763, 0.0512, -0.0472, -0.0898]) tensor([0.3328, 0.2404, 0.2179, 0.2088]) -Greedy action tensor([ 0.8748, -0.6629, -0.1254, -0.4713]) tensor([0.5426, 0.1166, 0.1996, 0.1412]) -Greedy action tensor([ 0.9012, -0.7286, -0.1037, -0.6950]) tensor([0.5667, 0.1110, 0.2074, 0.1148]) -Greedy action tensor([ 0.4664, -0.1576, -0.0709, -0.5473]) tensor([0.4027, 0.2158, 0.2353, 0.1461]) -Greedy action tensor([ 0.4718, -0.1921, 0.1505, -0.2360]) tensor([0.3659, 0.1884, 0.2654, 0.1803]) -Greedy action tensor([ 0.5446, -0.1940, 0.0683, -0.5134]) tensor([0.4088, 0.1953, 0.2539, 0.1419]) -Greedy action tensor([ 0.6263, -0.1871, -0.0242, -0.2675]) tensor([0.4212, 0.1867, 0.2198, 0.1723]) -Greedy action tensor([ 0.8706, -0.1180, -0.0411, -0.5492]) tensor([0.4961, 0.1846, 0.1993, 0.1199]) -Greedy action tensor([ 0.1808, 0.1228, -0.1491, -0.3751]) tensor([0.3090, 0.2916, 0.2222, 0.1772]) -Greedy action tensor([ 0.6132, -0.3500, 0.0715, -0.4941]) tensor([0.4359, 0.1664, 0.2536, 0.1441]) -Greedy action tensor([ 0.4694, -0.1782, -0.1463, -0.3364]) tensor([0.3984, 0.2085, 0.2152, 0.1780]) -Greedy action tensor([ 0.4297, -0.0688, 0.0077, -0.4702]) tensor([0.3746, 0.2275, 0.2456, 0.1523]) -Greedy action tensor([ 0.7416, -0.3939, -0.0172, -0.2571]) tensor([0.4634, 0.1489, 0.2170, 0.1707]) -Greedy action tensor([ 0.6101, -0.3056, 0.0149, -0.2889]) tensor([0.4240, 0.1697, 0.2338, 0.1725]) -Greedy action tensor([ 0.6985, -0.4118, 0.0140, -0.5713]) tensor([0.4729, 0.1558, 0.2385, 0.1328]) -Greedy action tensor([ 0.5528, 0.0035, 0.0125, -0.0582]) tensor([0.3700, 0.2136, 0.2156, 0.2008]) -Greedy action tensor([ 0.7697, -0.3291, -0.0774, -0.6489]) tensor([0.4990, 0.1663, 0.2139, 0.1208]) -Greedy action tensor([ 0.5234, -0.1956, -0.1241, -0.1934]) tensor([0.4002, 0.1950, 0.2094, 0.1954]) -Greedy action tensor([ 0.8455, -0.7966, -0.0484, -0.4815]) tensor([0.5354, 0.1036, 0.2190, 0.1420]) -Greedy action tensor([ 0.8840, -0.5259, 0.0633, -0.3656]) tensor([0.5074, 0.1239, 0.2233, 0.1454]) -Greedy action tensor([ 0.3193, 0.2501, 0.0649, -0.2388]) tensor([0.3048, 0.2844, 0.2363, 0.1744]) -Greedy action tensor([ 0.6413, -0.4708, -0.2364, -0.3873]) tensor([0.4757, 0.1564, 0.1978, 0.1701]) -Greedy action tensor([ 0.6706, -0.3563, -0.0340, -0.2263]) tensor([0.4424, 0.1584, 0.2187, 0.1804]) -Greedy action tensor([ 0.7167, -0.4898, 0.1182, -0.4429]) tensor([0.4624, 0.1384, 0.2542, 0.1450]) -Greedy action tensor([ 0.7578, -0.3522, 0.2260, -0.5432]) tensor([0.4568, 0.1505, 0.2684, 0.1244]) -Greedy action tensor([ 1.1020, -0.8520, 0.0357, -0.4678]) tensor([0.5903, 0.0837, 0.2032, 0.1228]) -Greedy action tensor([ 0.5532, -0.1762, -0.1270, -0.2297]) tensor([0.4089, 0.1972, 0.2071, 0.1869]) -Greedy action tensor([ 0.3125, 0.1546, -0.0037, -0.2072]) tensor([0.3147, 0.2687, 0.2294, 0.1872]) -Greedy action tensor([ 0.2315, -0.0263, 0.0799, -0.1635]) tensor([0.3025, 0.2338, 0.2599, 0.2038]) -Greedy action tensor([ 0.6201, -0.1925, -0.0828, -0.2602]) tensor([0.4249, 0.1885, 0.2104, 0.1762]) -Greedy action tensor([ 0.4426, -0.1968, -0.0412, -0.2615]) tensor([0.3790, 0.2000, 0.2336, 0.1874]) -Greedy action tensor([ 0.3829, -0.2457, -0.1276, -0.5115]) tensor([0.3933, 0.2098, 0.2361, 0.1608]) -Greedy action tensor([ 0.4869, -0.1396, -0.0911, -0.2825]) tensor([0.3908, 0.2089, 0.2193, 0.1811]) -Greedy action tensor([ 0.2795, -0.0565, 0.1343, -0.2893]) tensor([0.3179, 0.2272, 0.2749, 0.1800]) -Greedy action tensor([ 0.5078, 0.1532, 0.0836, -0.1809]) tensor([0.3499, 0.2454, 0.2289, 0.1757]) -Greedy action tensor([ 0.5598, -0.1561, 0.0802, -0.4273]) tensor([0.4031, 0.1971, 0.2496, 0.1502]) -Greedy action tensor([ 0.1865, 0.2696, -0.0367, -0.2920]) tensor([0.2852, 0.3099, 0.2281, 0.1767]) -Greedy action tensor([ 0.8679, -0.7009, 0.0721, -0.9340]) tensor([0.5481, 0.1142, 0.2473, 0.0904]) -Greedy action tensor([ 0.9058, -0.4717, -0.0251, -0.7943]) tensor([0.5467, 0.1379, 0.2155, 0.0999]) -Greedy action tensor([ 0.7425, -0.2801, -0.1024, -0.4863]) tensor([0.4803, 0.1728, 0.2064, 0.1406]) -Greedy action tensor([ 0.3291, -0.2093, 0.1327, -0.1777]) tensor([0.3325, 0.1941, 0.2732, 0.2003]) -Greedy action tensor([ 1.3242, -0.9019, -0.1102, -1.0200]) tensor([0.6934, 0.0749, 0.1652, 0.0665]) -Greedy action tensor([ 0.4826, -0.0125, 0.0247, -0.2994]) tensor([0.3704, 0.2258, 0.2343, 0.1695]) -Greedy action tensor([ 0.9548, -0.5936, 0.0996, -0.5351]) tensor([0.5367, 0.1141, 0.2282, 0.1210]) -Greedy action tensor([ 0.6867, -0.3397, 0.0030, -0.3642]) tensor([0.4519, 0.1619, 0.2281, 0.1580]) -Greedy action tensor([-1.0727, -0.7474, 0.6978, -0.2581]) tensor([0.0951, 0.1316, 0.5585, 0.2147]) -Greedy action tensor([-1.2187, 0.0642, -0.2448, -0.2910]) tensor([0.1022, 0.3687, 0.2707, 0.2585]) -Greedy action tensor([-1.0895, -0.6539, 0.3188, 0.0833]) tensor([0.1014, 0.1567, 0.4144, 0.3275]) -Greedy action tensor([-1.1423, -0.2714, 0.6498, 0.9771]) tensor([0.0564, 0.1348, 0.3388, 0.4699]) -Greedy action tensor([-1.8427, -0.4932, 1.3225, 0.8986]) tensor([0.0227, 0.0875, 0.5378, 0.3520]) -Greedy action tensor([-1.5939, -0.3517, 0.5897, 0.4016]) tensor([0.0483, 0.1673, 0.4290, 0.3554]) -Greedy action tensor([-0.6817, 0.0911, 0.2813, 0.0655]) tensor([0.1266, 0.2743, 0.3317, 0.2673]) -Greedy action tensor([-1.1974, -0.5475, 0.5647, 0.8586]) tensor([0.0604, 0.1157, 0.3518, 0.4721]) -Greedy action tensor([-1.3148, -1.0644, 0.7802, 1.0898]) tensor([0.0465, 0.0598, 0.3782, 0.5154]) -Greedy action tensor([-0.9610, 0.4236, 0.1277, -0.1097]) tensor([0.0970, 0.3875, 0.2882, 0.2273]) -Greedy action tensor([-0.2551, -0.1879, 1.0701, 1.7499]) tensor([0.0754, 0.0807, 0.2838, 0.5601]) -Greedy action tensor([-1.2016, -0.2515, 0.3808, 0.6727]) tensor([0.0668, 0.1728, 0.3251, 0.4353]) -Greedy action tensor([-1.8015, -0.5040, 0.7831, 0.1510]) tensor([0.0401, 0.1466, 0.5311, 0.2822]) -Greedy action tensor([-1.4340, -0.3263, 0.9163, 0.9265]) tensor([0.0398, 0.1206, 0.4177, 0.4219]) -Greedy action tensor([-1.5385, -0.4980, 0.7081, -0.3644]) tensor([0.0605, 0.1713, 0.5723, 0.1958]) -Greedy action tensor([-1.4769, -0.5390, 0.4173, 0.0843]) tensor([0.0668, 0.1707, 0.4442, 0.3183]) -Greedy action tensor([-1.7197, -0.4656, 1.0522, 0.7619]) tensor([0.0308, 0.1080, 0.4927, 0.3685]) -Greedy action tensor([-0.7459, -0.5352, 0.2820, 0.2167]) tensor([0.1307, 0.1614, 0.3655, 0.3424]) -Greedy action tensor([-1.6543, -0.4643, 0.5147, -0.0137]) tensor([0.0550, 0.1807, 0.4809, 0.2835]) -Greedy action tensor([-1.3596, -0.2869, 0.9578, 1.0353]) tensor([0.0399, 0.1167, 0.4053, 0.4380]) -Greedy action tensor([-1.9476, -0.6544, 0.8442, 0.1013]) tensor([0.0348, 0.1269, 0.5680, 0.2702]) -Greedy action tensor([-1.5175, -0.3617, 0.7204, 0.7070]) tensor([0.0439, 0.1393, 0.4111, 0.4057]) -Greedy action tensor([-1.6599, -0.5208, 0.5438, 0.0576]) tensor([0.0533, 0.1666, 0.4830, 0.2970]) -Greedy action tensor([-1.9449, -0.5906, 1.0664, 0.4110]) tensor([0.0280, 0.1084, 0.5684, 0.2952]) -Greedy action tensor([-1.9156, -0.8910, 0.1098, -0.3544]) tensor([0.0620, 0.1727, 0.4699, 0.2954]) -Greedy action tensor([-1.2782, -0.5446, 0.5074, 0.4120]) tensor([0.0691, 0.1440, 0.4122, 0.3747]) -Greedy action tensor([-1.3422, -0.6016, 0.4184, 0.3007]) tensor([0.0710, 0.1489, 0.4130, 0.3671]) -Greedy action tensor([-1.6771, -0.5024, 0.5472, 0.0346]) tensor([0.0526, 0.1702, 0.4861, 0.2911]) -Greedy action tensor([-1.5148, -0.4893, 0.8043, 0.8247]) tensor([0.0411, 0.1146, 0.4178, 0.4265]) -Greedy action tensor([-1.0962, -0.5863, 0.2683, 0.2124]) tensor([0.0973, 0.1620, 0.3807, 0.3600]) -Greedy action tensor([-1.3848, -0.3181, 0.3265, 0.1818]) tensor([0.0703, 0.2042, 0.3890, 0.3366]) -Greedy action tensor([-1.7063, -0.4836, 0.6865, 0.1727]) tensor([0.0457, 0.1552, 0.5000, 0.2991]) -Greedy action tensor([-1.8076, -0.3514, 0.5892, -0.0769]) tensor([0.0456, 0.1957, 0.5012, 0.2575]) -Greedy action tensor([-1.9285, -0.6543, 0.7805, 0.0116]) tensor([0.0377, 0.1347, 0.5655, 0.2621]) -Greedy action tensor([-1.6747, -0.5050, 0.5209, 0.0286]) tensor([0.0535, 0.1723, 0.4805, 0.2937]) -Greedy action tensor([-1.6278, -0.5229, 0.4973, 0.0078]) tensor([0.0571, 0.1723, 0.4778, 0.2929]) -Greedy action tensor([-1.6786, -0.4942, 0.5275, 0.0362]) tensor([0.0529, 0.1729, 0.4803, 0.2939]) -Greedy action tensor([-1.7305, -0.4838, 0.8415, 0.4764]) tensor([0.0375, 0.1305, 0.4911, 0.3409]) -Greedy action tensor([-1.6024, -0.2222, 0.7751, 0.6410]) tensor([0.0397, 0.1579, 0.4281, 0.3743]) -Greedy action tensor([-1.7899, -0.4720, 0.6679, 0.0787]) tensor([0.0437, 0.1632, 0.5102, 0.2830]) -Greedy action tensor([-1.6843, -0.7601, 0.0108, -0.4679]) tensor([0.0810, 0.2042, 0.4413, 0.2735]) -Greedy action tensor([-0.3789, -0.5906, 0.3780, 0.1642]) tensor([0.1766, 0.1429, 0.3765, 0.3040]) -Greedy action tensor([-1.7998, -0.4735, 0.6050, -0.0879]) tensor([0.0468, 0.1762, 0.5180, 0.2591]) -Greedy action tensor([-1.2576, -0.8094, 0.0834, -0.1424]) tensor([0.1059, 0.1659, 0.4050, 0.3232]) -Greedy action tensor([-1.4504, -0.5604, 0.3890, 0.1456]) tensor([0.0682, 0.1661, 0.4292, 0.3365]) -Greedy action tensor([-1.9276, -0.6428, 0.9073, 0.2094]) tensor([0.0332, 0.1200, 0.5654, 0.2814]) -Greedy action tensor([-1.6462, -0.6456, 0.5404, -0.3021]) tensor([0.0608, 0.1652, 0.5410, 0.2330]) -Greedy action tensor([-1.7663, -0.5743, 0.0554, -0.3671]) tensor([0.0688, 0.2267, 0.4256, 0.2789]) -Greedy action tensor([-1.8699, -0.4568, 0.6331, -0.1254]) tensor([0.0434, 0.1782, 0.5301, 0.2483]) -Greedy action tensor([-1.9063, -0.7277, 0.1875, -0.2757]) tensor([0.0572, 0.1860, 0.4645, 0.2923]) -Greedy action tensor([-1.3200, -0.7708, -0.1542, -0.5854]) tensor([0.1246, 0.2158, 0.3998, 0.2598]) -Greedy action tensor([-1.3709, -0.6304, 0.3830, 0.1160]) tensor([0.0752, 0.1577, 0.4344, 0.3327]) -Greedy action tensor([-1.1009, -0.2073, 0.6010, 1.0064]) tensor([0.0583, 0.1425, 0.3197, 0.4795]) -Greedy action tensor([-0.7897, -0.4310, 0.1814, -0.2202]) tensor([0.1462, 0.2093, 0.3861, 0.2584]) -Greedy action tensor([-1.9634, -0.4469, 1.0013, 0.4296]) tensor([0.0279, 0.1269, 0.5402, 0.3050]) -Greedy action tensor([-1.5565, -0.3444, 0.8990, 0.9123]) tensor([0.0359, 0.1208, 0.4188, 0.4244]) -Greedy action tensor([-1.9694, -0.6804, 0.3939, -0.1773]) tensor([0.0470, 0.1707, 0.4999, 0.2824]) -Greedy action tensor([-0.1848, -0.1560, 0.4347, 0.8697]) tensor([0.1480, 0.1523, 0.2749, 0.4248]) -Greedy action tensor([-1.8622, -0.4661, 0.1562, -0.3219]) tensor([0.0580, 0.2344, 0.4368, 0.2708]) -Greedy action tensor([-1.0223, -0.5645, 0.1805, 0.5471]) tensor([0.0933, 0.1475, 0.3108, 0.4484]) -Greedy action tensor([-1.6702, -0.4693, 0.5613, 0.0775]) tensor([0.0516, 0.1715, 0.4806, 0.2963]) -Greedy action tensor([-1.5510, -0.3240, 0.4962, 0.0935]) tensor([0.0577, 0.1968, 0.4468, 0.2987]) -Greedy action tensor([-1.8560, -0.4635, 0.6392, -0.1083]) tensor([0.0437, 0.1758, 0.5296, 0.2508]) -Greedy action tensor([-1.6938, -0.4598, 0.6980, 0.2261]) tensor([0.0451, 0.1548, 0.4927, 0.3074]) -Greedy action tensor([-1.9673, -0.5437, 1.0867, 0.3165]) tensor([0.0277, 0.1148, 0.5862, 0.2714]) -Greedy action tensor([-1.6476, -1.0672, -0.0270, -0.8372]) tensor([0.0991, 0.1771, 0.5010, 0.2228]) -Greedy action tensor([-1.1236, -0.6203, 0.7023, 0.5603]) tensor([0.0702, 0.1161, 0.4357, 0.3780]) -Greedy action tensor([-1.9834, -0.5427, 1.2837, 0.5764]) tensor([0.0225, 0.0951, 0.5910, 0.2913]) -Greedy action tensor([-1.0049, -0.5431, 0.2999, 0.7085]) tensor([0.0846, 0.1342, 0.3119, 0.4693]) -Greedy action tensor([-1.4262, -0.5838, 0.3832, 0.1550]) tensor([0.0700, 0.1625, 0.4273, 0.3402]) -Greedy action tensor([-1.6333, -0.5264, 0.5073, 0.0583]) tensor([0.0557, 0.1685, 0.4736, 0.3023]) -Greedy action tensor([-1.1620, -0.7021, 1.3019, 1.4622]) tensor([0.0356, 0.0563, 0.4178, 0.4904]) -Greedy action tensor([-1.6906, -0.5137, 0.9485, 0.7232]) tensor([0.0340, 0.1103, 0.4759, 0.3799]) -Greedy action tensor([-1.7857, -0.4842, 0.5936, -0.0646]) tensor([0.0475, 0.1745, 0.5126, 0.2654]) -Greedy action tensor([-0.6275, 0.9170, 0.0702, 0.0680]) tensor([0.1031, 0.4831, 0.2071, 0.2067]) -Greedy action tensor([-1.5543, -0.7119, 0.0217, -0.5505]) tensor([0.0919, 0.2133, 0.4442, 0.2506]) -Greedy action tensor([-1.0282, -0.5410, 0.3561, 0.3272]) tensor([0.0953, 0.1551, 0.3803, 0.3694]) -Greedy action tensor([-1.9060, -0.8526, 0.2453, -0.2651]) tensor([0.0567, 0.1627, 0.4877, 0.2928]) -Greedy action tensor([-0.8868, -0.2750, 0.6243, 1.1095]) tensor([0.0679, 0.1251, 0.3075, 0.4996]) -Greedy action tensor([-1.9249, -0.4582, 0.6789, -0.1506]) tensor([0.0404, 0.1752, 0.5462, 0.2383]) -Greedy action tensor([-1.1809, -0.4405, 0.7502, 1.1336]) tensor([0.0497, 0.1043, 0.3429, 0.5031]) -Greedy action tensor([ 1.0714, -0.2350, -0.6312, -0.1714]) tensor([0.5742, 0.1555, 0.1046, 0.1657]) -Greedy action tensor([ 1.8016, -0.6160, -0.2913, -0.1371]) tensor([0.7373, 0.0657, 0.0909, 0.1061]) -Greedy action tensor([ 0.7953, -0.2363, -0.3123, 0.0199]) tensor([0.4657, 0.1660, 0.1538, 0.2145]) -Greedy action tensor([ 1.3375, -0.4747, -0.1120, -0.1764]) tensor([0.6180, 0.1009, 0.1450, 0.1360]) -Greedy action tensor([ 1.0514, -0.5040, 0.2587, -0.0616]) tensor([0.5019, 0.1060, 0.2272, 0.1649]) -Greedy action tensor([ 1.0593, -0.4020, -0.2273, 0.5044]) tensor([0.4802, 0.1114, 0.1326, 0.2757]) -Greedy action tensor([ 0.5672, -0.4468, -0.6346, 0.5422]) tensor([0.3790, 0.1375, 0.1139, 0.3696]) -Greedy action tensor([ 0.8936, -0.3543, -0.1216, 0.2207]) tensor([0.4630, 0.1329, 0.1678, 0.2363]) -Greedy action tensor([ 1.2687, -0.5939, -0.3658, 0.1582]) tensor([0.5953, 0.0924, 0.1161, 0.1961]) -Greedy action tensor([ 0.7246, -0.4524, -0.1669, 0.1141]) tensor([0.4422, 0.1363, 0.1813, 0.2402]) -Greedy action tensor([ 0.9920, -0.3055, -0.0206, -0.0777]) tensor([0.5051, 0.1380, 0.1835, 0.1733]) -Greedy action tensor([ 0.5404, -0.1916, -0.0449, 0.0291]) tensor([0.3791, 0.1823, 0.2112, 0.2274]) -Greedy action tensor([ 0.9431, 0.0162, -0.2277, 0.1082]) tensor([0.4673, 0.1850, 0.1449, 0.2028]) -Greedy action tensor([ 0.8983, -0.5397, -0.3095, 0.3706]) tensor([0.4703, 0.1117, 0.1406, 0.2775]) -Greedy action tensor([ 0.9354, -0.3609, -0.2273, 0.3185]) tensor([0.4704, 0.1287, 0.1471, 0.2538]) -Greedy action tensor([ 0.6414, -0.1141, -0.2536, -0.1380]) tensor([0.4279, 0.2010, 0.1748, 0.1963]) -Greedy action tensor([ 0.5890, -0.1974, -0.0015, -0.0248]) tensor([0.3920, 0.1786, 0.2172, 0.2122]) -Greedy action tensor([ 0.8743, -0.2788, 0.1541, -0.4375]) tensor([0.4827, 0.1524, 0.2349, 0.1300]) -Greedy action tensor([ 0.5783, -0.2789, 0.1279, -0.1200]) tensor([0.3908, 0.1658, 0.2490, 0.1944]) -Greedy action tensor([ 1.1029, -0.4089, -0.2337, -0.1528]) tensor([0.5656, 0.1247, 0.1486, 0.1611]) -Greedy action tensor([ 1.5153, -0.3194, -0.3043, 0.0070]) tensor([0.6481, 0.1035, 0.1050, 0.1434]) -Greedy action tensor([ 0.9780, -0.3623, -0.2358, 0.0527]) tensor([0.5114, 0.1339, 0.1519, 0.2027]) -Greedy action tensor([ 1.4634, -0.3134, -0.2473, -0.2312]) tensor([0.6521, 0.1103, 0.1178, 0.1198]) -Greedy action tensor([ 1.0986, -0.6961, -0.5325, 1.0542]) tensor([0.4313, 0.0717, 0.0844, 0.4126]) -Greedy action tensor([ 0.4400, -0.4079, -0.0662, -0.0006]) tensor([0.3739, 0.1601, 0.2254, 0.2406]) -Greedy action tensor([ 1.0230, -0.6629, -0.2676, 0.3581]) tensor([0.5064, 0.0938, 0.1393, 0.2605]) -Greedy action tensor([ 1.2454, -0.3950, 0.1022, 0.1490]) tensor([0.5415, 0.1050, 0.1726, 0.1809]) -Greedy action tensor([ 0.9834, -0.7978, -0.6345, 0.7492]) tensor([0.4634, 0.0780, 0.0919, 0.3666]) -Greedy action tensor([ 0.9043, -0.5906, -0.4432, 0.7803]) tensor([0.4224, 0.0947, 0.1098, 0.3731]) -Greedy action tensor([ 1.3432, -0.3661, -0.1245, 0.0739]) tensor([0.5908, 0.1069, 0.1362, 0.1660]) -Greedy action tensor([ 1.7045, -0.1867, -0.5550, 0.1360]) tensor([0.6832, 0.1031, 0.0713, 0.1424]) -Greedy action tensor([ 1.2218, -0.4213, -0.2575, 0.4156]) tensor([0.5354, 0.1035, 0.1220, 0.2391]) -Greedy action tensor([ 0.6081, -0.0132, 0.1042, -0.1845]) tensor([0.3855, 0.2071, 0.2329, 0.1745]) -Greedy action tensor([ 1.2345, -0.7517, -0.3217, 0.5353]) tensor([0.5420, 0.0744, 0.1143, 0.2693]) -Greedy action tensor([ 0.3589, -0.2083, -0.4599, 0.0248]) tensor([0.3671, 0.2082, 0.1619, 0.2628]) -Greedy action tensor([ 1.2212, -0.6151, -0.4362, 0.1536]) tensor([0.5904, 0.0941, 0.1125, 0.2030]) -Greedy action tensor([ 1.1713, -0.5307, -0.1699, -0.0494]) tensor([0.5751, 0.1049, 0.1504, 0.1697]) -Greedy action tensor([ 0.6906, 0.0755, -0.0200, -0.1408]) tensor([0.4053, 0.2191, 0.1991, 0.1765]) -Greedy action tensor([ 0.8639, -0.3504, -0.2319, 0.5284]) tensor([0.4262, 0.1265, 0.1425, 0.3047]) -Greedy action tensor([ 1.0606, -0.0971, -0.1064, -0.1461]) tensor([0.5196, 0.1632, 0.1617, 0.1554]) -Greedy action tensor([ 1.2757, -0.4078, -0.3385, 0.0911]) tensor([0.5915, 0.1099, 0.1177, 0.1809]) -Greedy action tensor([ 0.5129, -0.1655, -0.0019, -0.1786]) tensor([0.3837, 0.1947, 0.2293, 0.1922]) -Greedy action tensor([ 1.1479, -0.3850, -0.4372, 0.1050]) tensor([0.5639, 0.1218, 0.1156, 0.1987]) -Greedy action tensor([ 1.1416, -0.3422, -0.1152, -0.1086]) tensor([0.5562, 0.1261, 0.1583, 0.1593]) -Greedy action tensor([ 1.0675, -0.3820, -0.5155, 0.2717]) tensor([0.5288, 0.1241, 0.1086, 0.2386]) -Greedy action tensor([ 0.5968, -0.0978, -0.2989, -0.0434]) tensor([0.4107, 0.2051, 0.1677, 0.2165]) -Greedy action tensor([ 0.4743, -0.1273, -0.3530, -0.1103]) tensor([0.3933, 0.2155, 0.1720, 0.2192]) -Greedy action tensor([ 0.8990, -0.6401, -0.0170, 0.0742]) tensor([0.4871, 0.1045, 0.1949, 0.2135]) -Greedy action tensor([ 1.3064, -0.6860, -0.4472, 0.3870]) tensor([0.5854, 0.0798, 0.1014, 0.2334]) -Greedy action tensor([ 1.6842, -0.5246, -0.4300, -0.1222]) tensor([0.7170, 0.0787, 0.0866, 0.1178]) -Greedy action tensor([ 0.9164, -0.4371, -0.6039, 0.8122]) tensor([0.4205, 0.1086, 0.0919, 0.3789]) -Greedy action tensor([ 0.9155, -0.4198, -0.2653, 0.0334]) tensor([0.5040, 0.1326, 0.1547, 0.2086]) -Greedy action tensor([ 1.4011, -0.6305, -0.1894, -0.0920]) tensor([0.6412, 0.0841, 0.1307, 0.1441]) -Greedy action tensor([ 1.0707, -0.5481, -0.3057, 0.5753]) tensor([0.4855, 0.0962, 0.1226, 0.2958]) -Greedy action tensor([ 0.4069, -0.0470, -0.2143, -0.3244]) tensor([0.3768, 0.2393, 0.2025, 0.1814]) -Greedy action tensor([ 0.3670, -0.1186, -0.3065, -0.2567]) tensor([0.3758, 0.2312, 0.1916, 0.2014]) -Greedy action tensor([ 1.0206, -0.5960, -0.3641, 0.7229]) tensor([0.4563, 0.0906, 0.1143, 0.3388]) -Greedy action tensor([ 0.8398, -0.4192, -0.2848, 0.5473]) tensor([0.4246, 0.1206, 0.1379, 0.3169]) -Greedy action tensor([ 1.0184, -0.3574, -0.1930, 0.0276]) tensor([0.5204, 0.1315, 0.1550, 0.1932]) -Greedy action tensor([ 0.8906, -0.5018, -0.4607, 0.2808]) tensor([0.4876, 0.1212, 0.1262, 0.2650]) -Greedy action tensor([ 0.5098, -0.0399, -0.3743, -0.1360]) tensor([0.3977, 0.2295, 0.1643, 0.2085]) -Greedy action tensor([ 1.0384, -0.2794, -0.3794, 0.1404]) tensor([0.5216, 0.1396, 0.1263, 0.2125]) -Greedy action tensor([ 1.6455, -0.4468, -0.5204, 0.2283]) tensor([0.6755, 0.0834, 0.0774, 0.1637]) -Greedy action tensor([ 0.9094, -0.3259, -0.1272, 0.1323]) tensor([0.4750, 0.1381, 0.1685, 0.2184]) -Greedy action tensor([ 0.8031, -0.2578, -0.3158, -0.2780]) tensor([0.4970, 0.1720, 0.1623, 0.1686]) -Greedy action tensor([ 1.1439, -0.4530, -0.3686, 0.3892]) tensor([0.5283, 0.1070, 0.1164, 0.2484]) -Greedy action tensor([ 0.9213, -0.5315, -0.2708, 0.4588]) tensor([0.4614, 0.1079, 0.1401, 0.2906]) -Greedy action tensor([ 1.1270, -0.7282, -0.2901, 0.5209]) tensor([0.5143, 0.0804, 0.1247, 0.2806]) -Greedy action tensor([ 1.4005, -0.1814, -0.4060, 0.2439]) tensor([0.5937, 0.1221, 0.0975, 0.1867]) -Greedy action tensor([ 1.1100, -0.2355, -0.2524, 0.1656]) tensor([0.5248, 0.1367, 0.1344, 0.2041]) -Greedy action tensor([ 0.9928, -0.6461, -0.4436, 0.8653]) tensor([0.4325, 0.0840, 0.1028, 0.3807]) -Greedy action tensor([ 0.9232, -0.3502, -0.3220, -0.0334]) tensor([0.5123, 0.1434, 0.1475, 0.1968]) -Greedy action tensor([ 0.7208, -0.4109, 0.1011, -0.0249]) tensor([0.4283, 0.1381, 0.2305, 0.2032]) -Greedy action tensor([ 1.2721, -0.3345, -0.2906, -0.0914]) tensor([0.6003, 0.1204, 0.1258, 0.1535]) -Greedy action tensor([ 0.9744, -0.2571, -0.3623, -0.1872]) tensor([0.5355, 0.1563, 0.1407, 0.1676]) -Greedy action tensor([ 1.3028, -0.8611, -0.6014, 0.6873]) tensor([0.5543, 0.0637, 0.0826, 0.2995]) -Greedy action tensor([ 0.6583, -0.3346, -0.0433, -0.0681]) tensor([0.4255, 0.1577, 0.2110, 0.2058]) -Greedy action tensor([ 0.9675, -0.3112, -0.5206, 0.9257]) tensor([0.4060, 0.1130, 0.0917, 0.3893]) -Greedy action tensor([ 1.0014, -0.2411, -0.0696, -0.0354]) tensor([0.5036, 0.1454, 0.1725, 0.1786]) -Greedy action tensor([ 1.1705, 0.0147, -0.1769, -0.3896]) tensor([0.5603, 0.1764, 0.1456, 0.1177]) -Greedy action tensor([ 0.4686, -0.2554, 0.0123, -0.0838]) tensor([0.3712, 0.1800, 0.2352, 0.2136]) -Greedy action tensor([-0.0358, 0.8915, -0.3205, -0.0481]) tensor([0.1898, 0.4798, 0.1428, 0.1875]) -Greedy action tensor([ 0.1313, -0.0889, 1.2732, -0.1982]) tensor([0.1769, 0.1419, 0.5540, 0.1272]) -Greedy action tensor([ 1.2275, -0.5011, 1.1531, 0.7378]) tensor([0.3678, 0.0653, 0.3415, 0.2254]) -Greedy action tensor([-0.1643, 0.3310, 0.4419, -0.3843]) tensor([0.1895, 0.3110, 0.3474, 0.1521]) -Greedy action tensor([ 1.7217, -0.4123, -0.5759, 0.4393]) tensor([0.6683, 0.0791, 0.0672, 0.1854]) -Greedy action tensor([-0.3790, 0.4893, -0.3913, 0.6338]) tensor([0.1404, 0.3345, 0.1387, 0.3865]) -Greedy action tensor([ 1.8755, -0.2340, 2.2621, 1.2068]) tensor([0.3220, 0.0391, 0.4740, 0.1650]) -Greedy action tensor([ 0.8941, -0.1900, -1.0641, 1.1841]) tensor([0.3551, 0.1201, 0.0501, 0.4746]) -Greedy action tensor([ 0.4711, 0.0573, -0.8476, 0.5030]) tensor([0.3377, 0.2233, 0.0903, 0.3487]) -Greedy action tensor([ 0.0220, -0.3067, 1.3022, 0.6389]) tensor([0.1395, 0.1004, 0.5017, 0.2584]) -Greedy action tensor([ 0.9236, -0.5002, 1.0042, 0.6773]) tensor([0.3219, 0.0775, 0.3489, 0.2516]) -Greedy action tensor([ 0.1762, -1.2441, 0.1354, 1.4529]) tensor([0.1728, 0.0418, 0.1659, 0.6195]) -Greedy action tensor([ 0.6970, -1.8009, 0.1318, 0.5755]) tensor([0.3943, 0.0324, 0.2241, 0.3492]) -Greedy action tensor([ 1.3730, -0.3709, -0.0200, 1.1343]) tensor([0.4523, 0.0791, 0.1123, 0.3563]) -Greedy action tensor([ 1.0505, -1.4543, 0.1109, -0.0763]) tensor([0.5566, 0.0455, 0.2175, 0.1804]) -Greedy action tensor([ 0.8432, 1.0956, -0.4802, 0.9533]) tensor([0.2725, 0.3507, 0.0725, 0.3042]) -Greedy action tensor([ 1.9030, -0.9389, -0.0337, 0.8825]) tensor([0.6398, 0.0373, 0.0923, 0.2306]) -Greedy action tensor([ 0.8379, -0.6121, 0.9943, 0.3367]) tensor([0.3323, 0.0779, 0.3885, 0.2013]) -Greedy action tensor([ 1.9319, -0.3736, 0.6054, 2.2551]) tensor([0.3641, 0.0363, 0.0966, 0.5030]) -Greedy action tensor([ 0.8891, 0.6102, -0.0375, 1.6381]) tensor([0.2343, 0.1773, 0.0928, 0.4956]) -Greedy action tensor([ 1.2411, 0.2335, -0.1838, 0.5614]) tensor([0.4734, 0.1728, 0.1139, 0.2399]) -Greedy action tensor([ 1.4555, -0.3046, 0.0665, 2.9155]) tensor([0.1746, 0.0300, 0.0435, 0.7518]) -Greedy action tensor([ 1.0652, -1.3641, 0.9498, -0.1317]) tensor([0.4384, 0.0386, 0.3906, 0.1324]) -Greedy action tensor([ 1.0549, 0.9624, -0.8070, -0.1098]) tensor([0.4203, 0.3832, 0.0653, 0.1312]) -Greedy action tensor([-0.4365, 0.7317, 0.4288, -0.1878]) tensor([0.1270, 0.4085, 0.3017, 0.1628]) -Greedy action tensor([ 1.5308, -0.2557, 0.2553, 0.7454]) tensor([0.5255, 0.0881, 0.1468, 0.2396]) -Greedy action tensor([ 0.9704, -0.4997, 0.5420, 1.0432]) tensor([0.3382, 0.0777, 0.2203, 0.3637]) -Greedy action tensor([ 0.0670, -0.0324, 0.1875, 1.5029]) tensor([0.1382, 0.1251, 0.1559, 0.5808]) -Greedy action tensor([1.7676, 0.5012, 0.2389, 0.4820]) tensor([0.5633, 0.1588, 0.1221, 0.1558]) -Greedy action tensor([ 0.7546, 1.0005, -0.4483, 0.5253]) tensor([0.2964, 0.3790, 0.0890, 0.2356]) -Greedy action tensor([-0.1126, -1.7966, 1.8489, -0.2841]) tensor([0.1094, 0.0203, 0.7781, 0.0922]) -Greedy action tensor([1.1252, 0.8186, 0.2184, 0.4591]) tensor([0.3769, 0.2773, 0.1522, 0.1936]) -Greedy action tensor([ 1.8736, -0.7134, 0.6061, 0.6430]) tensor([0.6065, 0.0456, 0.1707, 0.1772]) -Greedy action tensor([ 0.4897, -0.8657, 0.3270, 0.1843]) tensor([0.3516, 0.0906, 0.2988, 0.2590]) -Greedy action tensor([1.0310, 0.4042, 0.7306, 1.1422]) tensor([0.2948, 0.1575, 0.2183, 0.3295]) -Greedy action tensor([ 0.9779, -2.2699, -0.2276, 1.5590]) tensor([0.3199, 0.0124, 0.0958, 0.5719]) -Greedy action tensor([ 1.7963, 0.3754, 1.1465, -0.0422]) tensor([0.5201, 0.1256, 0.2716, 0.0827]) -Greedy action tensor([0.1359, 0.4857, 0.5229, 0.8808]) tensor([0.1667, 0.2365, 0.2455, 0.3512]) -Greedy action tensor([-0.1347, -2.0895, -0.2305, 0.6274]) tensor([0.2385, 0.0338, 0.2167, 0.5110]) -Greedy action tensor([ 1.6244, -0.5004, 0.5620, 1.3519]) tensor([0.4491, 0.0537, 0.1552, 0.3420]) -Greedy action tensor([-0.4174, -0.4807, -1.1239, -0.2720]) tensor([0.2787, 0.2616, 0.1375, 0.3223]) -Greedy action tensor([ 0.2424, -0.2913, -0.2547, 1.3068]) tensor([0.1963, 0.1151, 0.1194, 0.5692]) -Greedy action tensor([ 0.4098, 0.2156, -0.6588, 0.8496]) tensor([0.2689, 0.2214, 0.0924, 0.4174]) -Greedy action tensor([ 0.6733, -0.0141, -0.3584, 0.6238]) tensor([0.3558, 0.1789, 0.1268, 0.3386]) -Greedy action tensor([ 0.6868, -2.3602, 0.1136, 0.9151]) tensor([0.3487, 0.0166, 0.1966, 0.4381]) -Greedy action tensor([ 1.1243, -0.8984, 1.9399, 0.9745]) tensor([0.2351, 0.0311, 0.5314, 0.2024]) -Greedy action tensor([ 1.2075, -1.0039, -0.3387, 0.3109]) tensor([0.5779, 0.0633, 0.1231, 0.2357]) -Greedy action tensor([-0.1312, -0.0416, -0.4808, -0.1217]) tensor([0.2626, 0.2872, 0.1851, 0.2651]) -Greedy action tensor([ 0.9346, -0.9344, 0.3415, 0.9621]) tensor([0.3657, 0.0564, 0.2021, 0.3758]) -Greedy action tensor([ 0.5792, 0.0384, -0.1338, 1.0107]) tensor([0.2768, 0.1612, 0.1357, 0.4262]) -Greedy action tensor([ 0.3673, -0.8005, 0.1846, 0.0061]) tensor([0.3520, 0.1095, 0.2932, 0.2453]) -Greedy action tensor([ 0.1687, 0.0480, -0.5589, -0.0634]) tensor([0.3162, 0.2803, 0.1528, 0.2507]) -Greedy action tensor([ 1.5354, -0.0508, -0.4388, 1.1097]) tensor([0.5008, 0.1025, 0.0695, 0.3272]) -Greedy action tensor([ 0.2699, -0.3530, -0.1822, 1.2413]) tensor([0.2077, 0.1114, 0.1322, 0.5487]) -Greedy action tensor([ 0.1981, -1.1774, 0.3434, -0.3241]) tensor([0.3331, 0.0842, 0.3852, 0.1976]) -Greedy action tensor([ 0.2220, 0.2354, 0.0606, -0.0050]) tensor([0.2731, 0.2768, 0.2324, 0.2177]) -Greedy action tensor([ 0.9499, -0.0560, 0.1808, 1.1861]) tensor([0.3231, 0.1181, 0.1497, 0.4091]) -Greedy action tensor([-0.2651, 0.0069, 0.8267, 0.4111]) tensor([0.1378, 0.1808, 0.4105, 0.2709]) -Greedy action tensor([ 1.0491, 0.4421, -0.5298, 0.2005]) tensor([0.4589, 0.2501, 0.0946, 0.1964]) -Greedy action tensor([ 0.8010, 0.0988, -0.1867, 0.5890]) tensor([0.3736, 0.1851, 0.1391, 0.3022]) -Greedy action tensor([ 0.8065, -1.3997, 0.2817, 0.7466]) tensor([0.3783, 0.0417, 0.2238, 0.3563]) -Greedy action tensor([ 1.0752, 0.5708, -1.0707, 1.0872]) tensor([0.3659, 0.2210, 0.0428, 0.3703]) -Greedy action tensor([ 1.2333, 0.1756, -0.6851, 1.2631]) tensor([0.3962, 0.1376, 0.0582, 0.4081]) -Greedy action tensor([ 0.2941, -0.9670, -0.8479, 0.7266]) tensor([0.3181, 0.0901, 0.1015, 0.4902]) -Greedy action tensor([-0.3423, -0.3283, -0.0801, -1.3745]) tensor([0.2725, 0.2763, 0.3542, 0.0971]) -Greedy action tensor([ 0.5009, -0.4516, -1.1030, 1.5010]) tensor([0.2323, 0.0896, 0.0467, 0.6314]) -Greedy action tensor([0.7937, 0.3035, 0.5887, 1.2102]) tensor([0.2536, 0.1553, 0.2066, 0.3846]) -Greedy action tensor([0.7965, 0.5399, 0.8030, 0.0049]) tensor([0.3093, 0.2393, 0.3113, 0.1401]) -Greedy action tensor([0.5540, 0.0681, 0.5410, 1.0336]) tensor([0.2371, 0.1458, 0.2340, 0.3830]) -Greedy action tensor([ 1.7580, -0.4772, 0.5075, 1.4210]) tensor([0.4745, 0.0508, 0.1359, 0.3388]) -Greedy action tensor([ 0.7367, -0.8009, 0.1700, 0.5085]) tensor([0.3879, 0.0833, 0.2201, 0.3087]) -Greedy action tensor([ 1.5519, -0.6576, 1.0891, 0.4344]) tensor([0.4839, 0.0531, 0.3047, 0.1583]) -Greedy action tensor([-0.2122, 0.5240, -0.5592, 0.2171]) tensor([0.1876, 0.3917, 0.1326, 0.2882]) -Greedy action tensor([ 0.8071, 0.5451, -0.7236, 1.2505]) tensor([0.2822, 0.2171, 0.0611, 0.4396]) -Greedy action tensor([ 1.1242, -0.7118, 0.5076, 1.5706]) tensor([0.3066, 0.0489, 0.1655, 0.4791]) -Greedy action tensor([-0.0463, 0.3422, 0.7821, 0.8878]) tensor([0.1368, 0.2018, 0.3132, 0.3482]) -Greedy action tensor([-0.0320, -0.4080, -0.4200, -0.2404]) tensor([0.3148, 0.2161, 0.2136, 0.2556]) -Greedy action tensor([ 0.1154, 0.2135, -0.5195, 1.1198]) tensor([0.1864, 0.2057, 0.0988, 0.5091]) -Greedy action tensor([ 1.1174, -0.4474, 0.4319, 0.7304]) tensor([0.4181, 0.0874, 0.2106, 0.2839]) -Greedy action tensor([ 0.4146, 0.4260, 0.8786, -0.3213]) tensor([0.2450, 0.2479, 0.3897, 0.1174]) -Greedy action tensor([-0.0796, 0.4566, -0.3998, -0.7246]) tensor([0.2525, 0.4317, 0.1833, 0.1325]) -Greedy action tensor([ 1.0196, 0.2148, 0.2654, -0.0611]) tensor([0.4431, 0.1981, 0.2084, 0.1504]) -Greedy action tensor([ 0.5662, -0.4051, -0.0543, -0.4540]) tensor([0.4392, 0.1663, 0.2362, 0.1584]) -Greedy action tensor([ 0.8581, -0.3866, 0.0728, -0.4483]) tensor([0.4963, 0.1430, 0.2263, 0.1344]) -Greedy action tensor([ 0.2346, 0.0366, 0.1328, -0.2000]) tensor([0.2966, 0.2434, 0.2679, 0.1921]) -Greedy action tensor([ 0.4353, -0.1720, -0.0761, -0.1675]) tensor([0.3715, 0.2024, 0.2228, 0.2033]) -Greedy action tensor([ 0.1962, 0.2414, 0.0832, -0.2533]) tensor([0.2795, 0.2925, 0.2497, 0.1783]) -Greedy action tensor([ 0.4663, 0.0533, 0.0261, -0.2331]) tensor([0.3568, 0.2361, 0.2298, 0.1773]) -Greedy action tensor([ 0.4937, -0.1628, -0.0928, -0.1656]) tensor([0.3858, 0.2001, 0.2146, 0.1995]) -Greedy action tensor([ 0.3440, -0.3499, 0.2517, -0.4359]) tensor([0.3484, 0.1741, 0.3177, 0.1597]) -Greedy action tensor([ 0.7553, -0.2178, -0.0494, -0.5714]) tensor([0.4784, 0.1808, 0.2139, 0.1269]) -Greedy action tensor([ 0.6397, -0.1670, -0.0972, -0.1113]) tensor([0.4172, 0.1862, 0.1997, 0.1969]) -Greedy action tensor([ 0.4655, -0.2985, 0.1192, -0.3672]) tensor([0.3835, 0.1786, 0.2712, 0.1667]) -Greedy action tensor([ 0.9198, -0.3852, -0.0433, -0.6162]) tensor([0.5353, 0.1452, 0.2043, 0.1152]) -Greedy action tensor([ 0.4245, -0.1309, 0.1376, -0.4530]) tensor([0.3649, 0.2094, 0.2739, 0.1517]) -Greedy action tensor([ 0.3854, -0.0987, -0.0556, -0.2445]) tensor([0.3581, 0.2207, 0.2304, 0.1908]) -Greedy action tensor([ 0.5508, -0.1689, 0.1063, -0.4688]) tensor([0.4018, 0.1956, 0.2576, 0.1450]) -Greedy action tensor([ 0.2098, 0.1240, -0.0076, 0.0495]) tensor([0.2798, 0.2568, 0.2251, 0.2383]) -Greedy action tensor([ 0.7967, -0.3741, -0.0091, -0.4211]) tensor([0.4872, 0.1511, 0.2176, 0.1441]) -Greedy action tensor([ 0.5002, -0.2133, -0.0700, -0.3633]) tensor([0.4037, 0.1978, 0.2283, 0.1702]) -Greedy action tensor([ 0.3788, 0.0254, -0.1465, -0.1961]) tensor([0.3501, 0.2459, 0.2070, 0.1970]) -Greedy action tensor([ 0.4387, 0.0514, -0.0613, -0.4292]) tensor([0.3696, 0.2510, 0.2242, 0.1552]) -Greedy action tensor([ 0.4842, -0.1251, -0.0817, -0.1608]) tensor([0.3793, 0.2063, 0.2154, 0.1990]) -Greedy action tensor([ 0.6189, -0.0660, 0.1147, -0.1096]) tensor([0.3860, 0.1946, 0.2331, 0.1863]) -Greedy action tensor([ 0.9951, -0.3262, 0.0853, -0.6939]) tensor([0.5394, 0.1439, 0.2171, 0.0996]) -Greedy action tensor([ 0.2981, -0.0097, -0.0245, -0.1826]) tensor([0.3249, 0.2388, 0.2353, 0.2009]) -Greedy action tensor([ 0.4251, 0.2474, -0.1831, -0.1462]) tensor([0.3394, 0.2841, 0.1848, 0.1917]) -Greedy action tensor([ 0.7164, -0.1638, -0.0967, -0.2981]) tensor([0.4503, 0.1867, 0.1997, 0.1633]) -Greedy action tensor([ 0.4365, -0.0384, -0.0544, -0.3644]) tensor([0.3727, 0.2318, 0.2281, 0.1673]) -Greedy action tensor([ 0.8477, -0.5793, -0.1185, -0.3397]) tensor([0.5193, 0.1247, 0.1976, 0.1584]) -Greedy action tensor([ 0.9275, -0.2795, 0.0572, -0.8505]) tensor([0.5300, 0.1585, 0.2220, 0.0896]) -Greedy action tensor([ 0.6760, 0.1772, 0.0050, -0.6655]) tensor([0.4202, 0.2552, 0.2148, 0.1099]) -Greedy action tensor([ 0.7017, -0.2546, 0.0496, -0.5658]) tensor([0.4573, 0.1757, 0.2382, 0.1287]) -Greedy action tensor([ 0.5422, -0.4034, -0.1329, -0.4381]) tensor([0.4400, 0.1709, 0.2240, 0.1651]) -Greedy action tensor([ 0.6119, -0.4576, 0.2024, -0.4395]) tensor([0.4243, 0.1456, 0.2818, 0.1483]) -Greedy action tensor([ 0.6553, -0.6355, -0.0687, -0.6747]) tensor([0.4940, 0.1359, 0.2395, 0.1306]) -Greedy action tensor([ 0.6042, -0.0495, 0.0158, -0.2129]) tensor([0.3973, 0.2066, 0.2206, 0.1755]) -Greedy action tensor([ 0.2940, 0.1460, 0.1046, -0.2876]) tensor([0.3078, 0.2655, 0.2547, 0.1721]) -Greedy action tensor([ 0.5858, -0.3251, 0.0957, -0.2824]) tensor([0.4108, 0.1652, 0.2516, 0.1724]) -Greedy action tensor([ 0.2762, -0.2658, 0.1483, -0.2081]) tensor([0.3249, 0.1890, 0.2859, 0.2002]) -Greedy action tensor([ 0.9006, -0.5712, 0.1082, -0.1879]) tensor([0.4953, 0.1137, 0.2243, 0.1668]) -Greedy action tensor([ 0.5787, 0.0972, -0.0363, -0.0386]) tensor([0.3707, 0.2290, 0.2004, 0.1999]) -Greedy action tensor([ 0.3321, 0.3509, 0.1269, -0.2756]) tensor([0.2960, 0.3017, 0.2411, 0.1612]) -Greedy action tensor([ 0.5387, -0.2515, 0.0143, -0.2385]) tensor([0.3991, 0.1811, 0.2363, 0.1835]) -Greedy action tensor([ 0.3219, 0.0814, 0.0750, -0.1126]) tensor([0.3110, 0.2445, 0.2430, 0.2014]) -Greedy action tensor([ 0.7252, -0.4677, -0.0195, -0.3130]) tensor([0.4690, 0.1423, 0.2227, 0.1661]) -Greedy action tensor([ 0.5270, -0.3060, 0.0024, -0.4315]) tensor([0.4150, 0.1804, 0.2455, 0.1591]) -Greedy action tensor([ 0.2898, 0.2177, 0.0555, -0.2260]) tensor([0.3013, 0.2804, 0.2384, 0.1799]) -Greedy action tensor([ 0.4923, 0.0369, -0.1010, -0.1258]) tensor([0.3669, 0.2327, 0.2027, 0.1977]) -Greedy action tensor([ 0.1331, -0.0134, -0.0191, -0.2883]) tensor([0.2960, 0.2556, 0.2542, 0.1942]) -Greedy action tensor([ 0.3173, 0.0421, -0.1129, -0.2128]) tensor([0.3335, 0.2533, 0.2169, 0.1963]) -Greedy action tensor([ 0.2968, 0.0582, -0.0834, -0.0128]) tensor([0.3120, 0.2458, 0.2133, 0.2289]) -Greedy action tensor([ 0.8981, -0.4943, -0.0713, -0.4702]) tensor([0.5313, 0.1320, 0.2015, 0.1352]) -Greedy action tensor([ 0.4198, -0.1309, -0.1058, -0.3166]) tensor([0.3778, 0.2178, 0.2234, 0.1809]) -Greedy action tensor([ 0.7616, -0.3996, -0.0680, -0.4386]) tensor([0.4877, 0.1527, 0.2127, 0.1469]) -Greedy action tensor([ 0.8122, -0.5198, -0.1872, -0.6966]) tensor([0.5396, 0.1424, 0.1986, 0.1194]) -Greedy action tensor([ 0.3650, -0.0363, 0.0372, -0.1538]) tensor([0.3350, 0.2243, 0.2414, 0.1994]) -Greedy action tensor([ 0.6413, 0.0285, -0.1519, -0.3866]) tensor([0.4252, 0.2304, 0.1923, 0.1521]) -Greedy action tensor([ 0.4943, -0.2095, -0.0408, -0.3655]) tensor([0.3994, 0.1976, 0.2339, 0.1691]) -Greedy action tensor([ 0.7082, -0.4007, 0.0184, -0.5185]) tensor([0.4706, 0.1553, 0.2361, 0.1380]) -Greedy action tensor([ 0.8375, -0.2732, -0.1184, -0.2020]) tensor([0.4837, 0.1593, 0.1860, 0.1711]) -Greedy action tensor([ 0.9036, -0.6688, -0.0547, -0.4434]) tensor([0.5402, 0.1121, 0.2072, 0.1405]) -Greedy action tensor([ 0.5119, 0.1449, 0.0828, -0.0430]) tensor([0.3427, 0.2374, 0.2231, 0.1968]) -Greedy action tensor([ 0.4607, -0.2025, 0.0275, -0.3084]) tensor([0.3807, 0.1961, 0.2468, 0.1764]) -Greedy action tensor([ 0.4157, 0.0564, 0.0114, -0.1155]) tensor([0.3386, 0.2364, 0.2260, 0.1990]) -Greedy action tensor([ 0.8177, -0.2691, -0.0148, -0.4252]) tensor([0.4853, 0.1637, 0.2111, 0.1400]) -Greedy action tensor([ 0.3110, 0.2189, 0.0536, -0.2319]) tensor([0.3062, 0.2792, 0.2367, 0.1779]) -Greedy action tensor([ 0.6930, -0.1767, -0.0256, -0.4312]) tensor([0.4481, 0.1878, 0.2185, 0.1456]) -Greedy action tensor([ 0.7414, -0.3211, 0.1053, -0.4243]) tensor([0.4573, 0.1580, 0.2421, 0.1426]) -Greedy action tensor([ 0.6150, -0.4639, 0.0473, -0.5850]) tensor([0.4529, 0.1540, 0.2567, 0.1364]) -Greedy action tensor([ 0.2566, -0.0378, -0.0512, -0.1017]) tensor([0.3146, 0.2343, 0.2312, 0.2198]) -Greedy action tensor([ 0.5568, 0.2618, 0.0612, -0.2520]) tensor([0.3573, 0.2660, 0.2176, 0.1591]) -Greedy action tensor([ 0.5879, -0.2471, 0.1004, -0.3557]) tensor([0.4103, 0.1780, 0.2520, 0.1597]) -Greedy action tensor([ 1.0723, -0.6477, -0.0387, -0.6465]) tensor([0.5926, 0.1061, 0.1951, 0.1062]) -Greedy action tensor([ 0.6573, 0.1004, -0.0014, -0.3946]) tensor([0.4099, 0.2348, 0.2121, 0.1432]) -Greedy action tensor([ 0.0981, -0.0625, 0.1569, -0.2458]) tensor([0.2761, 0.2352, 0.2929, 0.1958]) -Greedy action tensor([ 0.1669, 0.0850, 0.1729, -0.2414]) tensor([0.2784, 0.2565, 0.2801, 0.1851]) -Greedy action tensor([ 0.9000, -0.2881, 0.0559, -0.6170]) tensor([0.5117, 0.1560, 0.2200, 0.1123]) -Greedy action tensor([ 0.4562, -0.1171, -0.0587, -0.1853]) tensor([0.3721, 0.2097, 0.2223, 0.1959]) -Greedy action tensor([ 0.4591, 0.0295, -0.0361, -0.3424]) tensor([0.3692, 0.2402, 0.2250, 0.1656]) -Greedy action tensor([ 0.6049, -0.0949, -0.1679, -0.5172]) tensor([0.4378, 0.2175, 0.2022, 0.1426]) -Greedy action tensor([ 0.4213, 0.0836, -0.0698, -0.1557]) tensor([0.3464, 0.2471, 0.2120, 0.1945]) -Greedy action tensor([ 0.4222, 0.1416, -0.0596, -0.3026]) tensor([0.3500, 0.2643, 0.2162, 0.1695]) -Greedy action tensor([-1.5844, -0.5088, 0.4820, 0.0577]) tensor([0.0588, 0.1725, 0.4647, 0.3040]) -Greedy action tensor([-1.7888, -0.2737, 0.5434, -0.0783]) tensor([0.0468, 0.2128, 0.4817, 0.2587]) -Greedy action tensor([-1.9344, -0.6765, 1.0131, 0.2941]) tensor([0.0304, 0.1071, 0.5799, 0.2826]) -Greedy action tensor([-0.8869, -0.0691, 0.3758, -0.3985]) tensor([0.1186, 0.2687, 0.4193, 0.1933]) -Greedy action tensor([-0.8921, -0.3784, 0.5493, 1.2368]) tensor([0.0653, 0.1092, 0.2762, 0.5493]) -Greedy action tensor([-1.0232, -0.0907, 0.6108, 1.0540]) tensor([0.0601, 0.1526, 0.3078, 0.4795]) -Greedy action tensor([-1.8171, -0.4766, 0.7509, 0.2126]) tensor([0.0393, 0.1500, 0.5119, 0.2988]) -Greedy action tensor([-1.7426, -0.4736, 0.5544, -0.0420]) tensor([0.0501, 0.1781, 0.4977, 0.2741]) -Greedy action tensor([-1.8247, -0.5246, 0.1690, -0.3129]) tensor([0.0604, 0.2218, 0.4437, 0.2741]) -Greedy action tensor([-1.7333, -0.4054, 0.9701, 0.7364]) tensor([0.0317, 0.1197, 0.4737, 0.3749]) -Greedy action tensor([-1.1673, 0.6888, 0.1582, 0.1026]) tensor([0.0679, 0.4346, 0.2557, 0.2418]) -Greedy action tensor([-1.8647, -0.8166, 0.1998, -0.3447]) tensor([0.0613, 0.1749, 0.4834, 0.2804]) -Greedy action tensor([-0.9781, -0.6546, 0.3086, 0.2167]) tensor([0.1075, 0.1485, 0.3891, 0.3549]) -Greedy action tensor([-1.6320, -0.3665, 0.3331, -0.1694]) tensor([0.0625, 0.2216, 0.4460, 0.2699]) -Greedy action tensor([-0.9983, 0.3885, 0.2468, -0.4478]) tensor([0.0979, 0.3920, 0.3402, 0.1698]) -Greedy action tensor([-1.6608, -0.4992, 0.8462, 0.5446]) tensor([0.0392, 0.1251, 0.4804, 0.3553]) -Greedy action tensor([-1.3856, -0.4231, 0.0185, -0.3120]) tensor([0.0942, 0.2466, 0.3836, 0.2756]) -Greedy action tensor([-1.9734, -0.5327, 1.1126, 0.3358]) tensor([0.0269, 0.1136, 0.5887, 0.2708]) -Greedy action tensor([-1.6124, -1.0011, 0.2264, -0.5663]) tensor([0.0835, 0.1538, 0.5250, 0.2376]) -Greedy action tensor([-1.7888, -0.4418, 0.5946, -0.0549]) tensor([0.0468, 0.1801, 0.5078, 0.2652]) -Greedy action tensor([-1.8587, -0.5069, 0.9610, 0.3859]) tensor([0.0322, 0.1244, 0.5398, 0.3037]) -Greedy action tensor([-1.1943, -0.5032, 0.4314, -0.1046]) tensor([0.0905, 0.1806, 0.4598, 0.2691]) -Greedy action tensor([-0.8807, -0.4143, 0.3187, 0.8191]) tensor([0.0878, 0.1400, 0.2914, 0.4807]) -Greedy action tensor([-1.7234, -0.4421, 0.5665, 0.0251]) tensor([0.0495, 0.1781, 0.4883, 0.2842]) -Greedy action tensor([-1.7588, -0.5010, 0.5987, -0.0560]) tensor([0.0486, 0.1710, 0.5135, 0.2668]) -Greedy action tensor([-1.3915, -0.5694, 0.3813, 0.1036]) tensor([0.0734, 0.1670, 0.4322, 0.3274]) -Greedy action tensor([-1.7966, -0.7400, 0.0837, -0.2825]) tensor([0.0668, 0.1921, 0.4377, 0.3035]) -Greedy action tensor([-1.2275, -0.8050, -0.4576, -0.5023]) tensor([0.1481, 0.2260, 0.3199, 0.3059]) -Greedy action tensor([-1.1480, -0.4663, 0.3550, 0.4845]) tensor([0.0794, 0.1571, 0.3571, 0.4064]) -Greedy action tensor([-1.7058, -1.0624, 0.0047, -0.7451]) tensor([0.0905, 0.1722, 0.5007, 0.2366]) -Greedy action tensor([-0.7663, -0.6543, 1.0721, 1.5924]) tensor([0.0527, 0.0589, 0.3312, 0.5572]) -Greedy action tensor([-1.8998, -0.4501, 0.6450, -0.1489]) tensor([0.0421, 0.1793, 0.5362, 0.2424]) -Greedy action tensor([-1.6171, -0.4993, 0.5621, 0.2452]) tensor([0.0517, 0.1582, 0.4571, 0.3330]) -Greedy action tensor([-0.7638, -0.6045, 1.0941, 1.5921]) tensor([0.0523, 0.0613, 0.3351, 0.5514]) -Greedy action tensor([-1.7782, -0.4536, 0.6336, 0.0422]) tensor([0.0453, 0.1703, 0.5050, 0.2795]) -Greedy action tensor([-0.9624, -0.5859, 0.1847, 0.3972]) tensor([0.1053, 0.1534, 0.3314, 0.4099]) -Greedy action tensor([-0.9274, -0.1634, 0.4648, -0.3082]) tensor([0.1108, 0.2378, 0.4457, 0.2058]) -Greedy action tensor([-1.3265, -0.2605, 0.7467, 0.8838]) tensor([0.0477, 0.1385, 0.3791, 0.4348]) -Greedy action tensor([-1.2412, -0.5125, 0.4792, 0.6594]) tensor([0.0652, 0.1350, 0.3640, 0.4358]) -Greedy action tensor([-1.8615, -0.5379, 0.1650, -0.3151]) tensor([0.0587, 0.2205, 0.4453, 0.2755]) -Greedy action tensor([-0.8873, -0.3272, 0.9542, 1.4080]) tensor([0.0527, 0.0922, 0.3322, 0.5229]) -Greedy action tensor([-1.8035, -0.4790, 0.6397, 0.0048]) tensor([0.0447, 0.1681, 0.5145, 0.2727]) -Greedy action tensor([-1.9966, -0.9289, 0.5326, -0.2281]) tensor([0.0448, 0.1304, 0.5621, 0.2627]) -Greedy action tensor([-1.3386, -0.6087, 0.4527, 0.1677]) tensor([0.0736, 0.1528, 0.4415, 0.3321]) -Greedy action tensor([-1.7516, -1.0522, 0.0185, -0.7125]) tensor([0.0854, 0.1719, 0.5014, 0.2414]) -Greedy action tensor([-0.8942, -0.4573, 0.7686, 1.4085]) tensor([0.0561, 0.0868, 0.2959, 0.5611]) -Greedy action tensor([-1.7798, -0.5500, 0.7612, 0.0612]) tensor([0.0427, 0.1461, 0.5420, 0.2692]) -Greedy action tensor([-1.9530, -0.7105, 0.8427, 0.2327]) tensor([0.0336, 0.1165, 0.5507, 0.2992]) -Greedy action tensor([-1.7145, -0.5401, 0.6548, 0.0743]) tensor([0.0478, 0.1548, 0.5113, 0.2861]) -Greedy action tensor([-1.6352, -0.5203, 0.4937, 0.0191]) tensor([0.0565, 0.1724, 0.4753, 0.2957]) -Greedy action tensor([-1.2479, -0.5861, 0.3004, 0.2948]) tensor([0.0812, 0.1573, 0.3818, 0.3797]) -Greedy action tensor([-1.6648, -0.4849, 0.9940, 0.7123]) tensor([0.0341, 0.1110, 0.4872, 0.3676]) -Greedy action tensor([-1.7568, -0.3343, 0.9085, 0.6954]) tensor([0.0321, 0.1332, 0.4616, 0.3730]) -Greedy action tensor([-1.7866, -0.4501, 0.5897, -0.1129]) tensor([0.0478, 0.1821, 0.5150, 0.2551]) -Greedy action tensor([-1.4831, -0.5347, 0.5043, 0.2736]) tensor([0.0600, 0.1548, 0.4377, 0.3475]) -Greedy action tensor([-1.8766, -0.4579, 0.6334, -0.1398]) tensor([0.0433, 0.1787, 0.5323, 0.2457]) -Greedy action tensor([-2.0356, -0.7154, 0.8564, 0.2205]) tensor([0.0309, 0.1158, 0.5579, 0.2954]) -Greedy action tensor([-1.5733, -0.4910, 0.4827, 0.0958]) tensor([0.0586, 0.1729, 0.4577, 0.3109]) -Greedy action tensor([-1.8704, -0.2863, 0.5963, -0.1315]) tensor([0.0428, 0.2088, 0.5047, 0.2437]) -Greedy action tensor([-0.8738, -0.1266, 0.4740, -0.4480]) tensor([0.1178, 0.2486, 0.4533, 0.1803]) -Greedy action tensor([-1.6452, -0.4491, 0.6490, 0.3784]) tensor([0.0459, 0.1518, 0.4551, 0.3472]) -Greedy action tensor([-1.4563, -0.4207, 0.5558, 0.4567]) tensor([0.0553, 0.1559, 0.4139, 0.3749]) -Greedy action tensor([-1.4784, -0.3411, 0.4347, -0.0247]) tensor([0.0659, 0.2055, 0.4465, 0.2820]) -Greedy action tensor([-1.8125, -0.4840, 0.6050, -0.1028]) tensor([0.0465, 0.1754, 0.5213, 0.2568]) -Greedy action tensor([-0.2605, 0.5524, 0.3648, 1.0585]) tensor([0.1128, 0.2544, 0.2109, 0.4219]) -Greedy action tensor([-0.9609, 0.4919, -0.0696, -0.0522]) tensor([0.0981, 0.4194, 0.2392, 0.2434]) -Greedy action tensor([-1.7476, -0.4626, 0.5951, 0.0265]) tensor([0.0478, 0.1728, 0.4976, 0.2818]) -Greedy action tensor([-0.7155, -0.4018, 1.1146, 1.5552]) tensor([0.0547, 0.0748, 0.3409, 0.5296]) -Greedy action tensor([-0.9619, -0.6613, 0.8418, 1.3780]) tensor([0.0532, 0.0718, 0.3229, 0.5520]) -Greedy action tensor([-1.6443, -0.5030, 0.4966, 0.0422]) tensor([0.0554, 0.1736, 0.4716, 0.2994]) -Greedy action tensor([-1.2940, -0.5404, 0.4291, -0.0045]) tensor([0.0809, 0.1719, 0.4533, 0.2938]) -Greedy action tensor([-1.8475, -0.4626, 0.6649, -0.0426]) tensor([0.0427, 0.1706, 0.5269, 0.2597]) -Greedy action tensor([-1.8373, -0.3641, 1.2075, 0.7917]) tensor([0.0249, 0.1085, 0.5221, 0.3445]) -Greedy action tensor([-0.9176, -0.4794, 0.4416, 1.1596]) tensor([0.0693, 0.1074, 0.2699, 0.5534]) -Greedy action tensor([-1.7552, -0.6244, 1.5235, 0.9216]) tensor([0.0221, 0.0686, 0.5875, 0.3218]) -Greedy action tensor([-1.3317, -0.5551, 0.3534, 0.1456]) tensor([0.0772, 0.1679, 0.4165, 0.3384]) -Greedy action tensor([-1.5318, -0.5317, 0.4498, 0.0663]) tensor([0.0628, 0.1708, 0.4558, 0.3106]) -Greedy action tensor([-1.8644, -0.7749, 1.2331, 0.6562]) tensor([0.0259, 0.0771, 0.5744, 0.3226]) -Greedy action tensor([-1.9246, -0.7972, 0.3905, -0.0774]) tensor([0.0486, 0.1502, 0.4926, 0.3085]) -Greedy action tensor([-0.5510, -0.5204, 0.1728, 0.2297]) tensor([0.1593, 0.1643, 0.3286, 0.3478]) -Greedy action tensor([-0.9655, -0.3465, 0.6794, 1.2750]) tensor([0.0574, 0.1065, 0.2971, 0.5390]) -Greedy action tensor([ 0.9984, -0.2059, -0.1828, 0.0051]) tensor([0.5058, 0.1517, 0.1552, 0.1873]) -Greedy action tensor([ 1.8649, 0.0468, -0.2292, -0.0449]) tensor([0.6975, 0.1132, 0.0859, 0.1033]) -Greedy action tensor([ 0.8458, -0.2332, -0.5036, 0.4908]) tensor([0.4347, 0.1478, 0.1128, 0.3048]) -Greedy action tensor([ 0.9667, -0.1559, -0.3271, -0.0325]) tensor([0.5082, 0.1654, 0.1394, 0.1871]) -Greedy action tensor([ 1.0947, -0.3142, -0.2765, 0.0680]) tensor([0.5387, 0.1317, 0.1367, 0.1930]) -Greedy action tensor([ 1.5700, -0.3451, -0.3501, 0.3339]) tensor([0.6311, 0.0930, 0.0925, 0.1834]) -Greedy action tensor([ 0.6767, -0.2745, -0.2011, 0.0162]) tensor([0.4313, 0.1666, 0.1793, 0.2228]) -Greedy action tensor([ 0.4489, -0.1668, -0.6919, 0.0702]) tensor([0.3930, 0.2123, 0.1256, 0.2691]) -Greedy action tensor([ 1.0906, -0.8846, -0.3844, 0.1515]) tensor([0.5687, 0.0789, 0.1301, 0.2223]) -Greedy action tensor([ 1.1748, -0.2482, -0.1921, -0.1683]) tensor([0.5692, 0.1372, 0.1451, 0.1486]) -Greedy action tensor([ 1.2790, -0.3900, -0.0710, 0.0048]) tensor([0.5789, 0.1091, 0.1501, 0.1619]) -Greedy action tensor([ 1.5004, -0.1499, -0.2198, 0.0759]) tensor([0.6205, 0.1191, 0.1111, 0.1493]) -Greedy action tensor([ 0.8769, -0.3746, -0.3065, -0.0281]) tensor([0.5008, 0.1433, 0.1534, 0.2026]) -Greedy action tensor([ 0.6481, -0.3150, 0.0413, -0.1666]) tensor([0.4220, 0.1611, 0.2300, 0.1869]) -Greedy action tensor([ 1.6444, -0.5982, -0.2175, 0.6721]) tensor([0.6099, 0.0648, 0.0948, 0.2306]) -Greedy action tensor([ 1.0008, -0.6683, -0.5913, 0.5895]) tensor([0.4867, 0.0917, 0.0990, 0.3226]) -Greedy action tensor([ 1.4203, -0.3452, -0.3997, 0.1727]) tensor([0.6172, 0.1056, 0.1000, 0.1772]) -Greedy action tensor([ 1.4214, -0.7220, -0.3204, 0.4052]) tensor([0.6044, 0.0709, 0.1059, 0.2188]) -Greedy action tensor([ 1.3688, -0.2858, -0.2791, 0.1741]) tensor([0.5930, 0.1134, 0.1141, 0.1795]) -Greedy action tensor([ 1.4343, -0.2808, -0.1843, 0.2011]) tensor([0.5990, 0.1078, 0.1187, 0.1745]) -Greedy action tensor([ 0.8893, -0.3429, -0.6258, -0.1965]) tensor([0.5408, 0.1577, 0.1189, 0.1826]) -Greedy action tensor([ 0.8967, -0.3532, -0.6596, 0.9256]) tensor([0.3958, 0.1134, 0.0835, 0.4074]) -Greedy action tensor([ 1.2165, -0.3295, -0.1156, -0.0363]) tensor([0.5673, 0.1209, 0.1497, 0.1621]) -Greedy action tensor([ 0.5385, 0.0176, 0.0696, -0.2012]) tensor([0.3708, 0.2202, 0.2320, 0.1770]) -Greedy action tensor([ 1.1702, -0.9201, -0.5229, 0.8837]) tensor([0.4858, 0.0601, 0.0894, 0.3648]) -Greedy action tensor([ 0.8453, -0.3590, -0.3080, 0.2470]) tensor([0.4618, 0.1385, 0.1457, 0.2539]) -Greedy action tensor([ 0.4956, -0.4078, 0.0352, -0.1951]) tensor([0.3941, 0.1597, 0.2487, 0.1975]) -Greedy action tensor([ 0.9431, -0.4841, -0.5032, 0.0038]) tensor([0.5358, 0.1286, 0.1262, 0.2095]) -Greedy action tensor([ 0.6365, -0.3627, -0.6460, -0.0167]) tensor([0.4617, 0.1700, 0.1280, 0.2403]) -Greedy action tensor([ 0.5440, -0.1985, -0.3881, 0.3427]) tensor([0.3721, 0.1771, 0.1465, 0.3043]) -Greedy action tensor([ 0.5841, -0.2196, -0.4524, 0.0400]) tensor([0.4197, 0.1879, 0.1489, 0.2436]) -Greedy action tensor([ 1.1832, -0.5871, -0.2889, 0.6074]) tensor([0.5097, 0.0868, 0.1169, 0.2866]) -Greedy action tensor([ 1.2280, -0.5302, -0.4974, 0.3835]) tensor([0.5617, 0.0968, 0.1000, 0.2414]) -Greedy action tensor([ 1.2890, -0.6589, -0.1634, 0.3973]) tensor([0.5597, 0.0798, 0.1310, 0.2295]) -Greedy action tensor([ 1.0128, -0.5237, -0.2141, 0.1736]) tensor([0.5154, 0.1109, 0.1511, 0.2227]) -Greedy action tensor([ 0.9593, -0.3241, -0.0674, 0.4175]) tensor([0.4511, 0.1250, 0.1616, 0.2624]) -Greedy action tensor([ 0.9106, -0.1712, -0.3059, -0.0379]) tensor([0.4944, 0.1676, 0.1465, 0.1915]) -Greedy action tensor([ 1.2473, -0.2488, -0.1677, -0.2169]) tensor([0.5889, 0.1319, 0.1431, 0.1362]) -Greedy action tensor([ 1.2482, -0.5006, -0.4014, 0.0392]) tensor([0.6007, 0.1045, 0.1154, 0.1793]) -Greedy action tensor([ 1.2363, -0.4772, 0.1165, -0.1218]) tensor([0.5670, 0.1022, 0.1850, 0.1458]) -Greedy action tensor([ 1.0236, -0.4466, -0.4775, 0.0578]) tensor([0.5454, 0.1254, 0.1216, 0.2076]) -Greedy action tensor([ 1.0872, -0.4718, -0.2319, -0.1789]) tensor([0.5683, 0.1195, 0.1519, 0.1602]) -Greedy action tensor([ 0.9893, -0.2769, 0.0605, -0.1360]) tensor([0.4996, 0.1408, 0.1974, 0.1622]) -Greedy action tensor([ 1.4397, -0.3561, -0.2993, -0.0860]) tensor([0.6414, 0.1065, 0.1127, 0.1395]) -Greedy action tensor([ 1.2554, -0.5986, -0.5568, 0.8784]) tensor([0.4985, 0.0781, 0.0814, 0.3420]) -Greedy action tensor([ 1.1285, -0.7458, -0.1140, 0.0936]) tensor([0.5564, 0.0854, 0.1606, 0.1976]) -Greedy action tensor([ 0.4607, -0.1424, -0.3978, -0.1434]) tensor([0.3972, 0.2173, 0.1683, 0.2171]) -Greedy action tensor([ 0.8240, -0.5370, -0.0572, 0.0139]) tensor([0.4727, 0.1212, 0.1958, 0.2103]) -Greedy action tensor([ 0.6872, -0.2721, 0.0166, 0.1752]) tensor([0.4010, 0.1536, 0.2051, 0.2403]) -Greedy action tensor([ 1.2026, -0.3319, -0.3046, -0.0914]) tensor([0.5844, 0.1260, 0.1295, 0.1602]) -Greedy action tensor([ 1.6001, -0.7214, -0.0979, 0.5194]) tensor([0.6171, 0.0606, 0.1130, 0.2094]) -Greedy action tensor([ 1.1524, -0.4308, -0.2869, 0.7620]) tensor([0.4719, 0.0969, 0.1119, 0.3194]) -Greedy action tensor([ 0.6062, -0.4267, -0.0809, 0.1646]) tensor([0.3997, 0.1423, 0.2010, 0.2570]) -Greedy action tensor([ 0.8404, -0.4925, -0.4129, 0.4959]) tensor([0.4429, 0.1168, 0.1265, 0.3138]) -Greedy action tensor([ 1.0679, -0.6013, -0.3666, 0.2359]) tensor([0.5371, 0.1012, 0.1280, 0.2337]) -Greedy action tensor([ 0.9725, 0.1682, -0.0333, -0.1811]) tensor([0.4698, 0.2102, 0.1718, 0.1482]) -Greedy action tensor([ 1.2579, -0.5041, -0.3626, 0.1597]) tensor([0.5872, 0.1008, 0.1162, 0.1958]) -Greedy action tensor([ 1.4669, -0.3309, -0.0652, -0.2370]) tensor([0.6395, 0.1059, 0.1382, 0.1164]) -Greedy action tensor([ 1.2357, -0.4781, -0.4577, 0.5079]) tensor([0.5414, 0.0976, 0.0996, 0.2615]) -Greedy action tensor([ 1.1178, -0.5590, -0.0720, 0.3855]) tensor([0.5071, 0.0948, 0.1543, 0.2438]) -Greedy action tensor([ 0.8950, -0.0453, 0.2156, -0.2178]) tensor([0.4492, 0.1754, 0.2277, 0.1476]) -Greedy action tensor([ 0.6404, -0.3230, -0.2299, -0.2234]) tensor([0.4500, 0.1717, 0.1885, 0.1897]) -Greedy action tensor([ 0.8058, 0.3414, -0.1873, -0.5385]) tensor([0.4425, 0.2781, 0.1639, 0.1154]) -Greedy action tensor([ 1.3396, -0.4167, -0.1496, 0.3424]) tensor([0.5659, 0.0977, 0.1276, 0.2088]) -Greedy action tensor([ 1.5438, -0.6894, -0.0486, 0.2368]) tensor([0.6324, 0.0678, 0.1287, 0.1711]) -Greedy action tensor([ 0.2037, -0.2596, -0.1771, -0.0248]) tensor([0.3217, 0.2024, 0.2198, 0.2560]) -Greedy action tensor([1.1965, 0.0577, 0.0122, 0.0666]) tensor([0.5130, 0.1643, 0.1570, 0.1657]) -Greedy action tensor([ 1.1480, -0.4158, -0.0519, -0.1339]) tensor([0.5593, 0.1171, 0.1685, 0.1552]) -Greedy action tensor([ 0.7752, -0.2516, -0.3146, -0.2018]) tensor([0.4829, 0.1729, 0.1624, 0.1818]) -Greedy action tensor([ 1.4952, -0.3290, -0.5701, -0.1133]) tensor([0.6719, 0.1084, 0.0852, 0.1345]) -Greedy action tensor([ 0.9226, -0.2707, -0.6343, -0.1368]) tensor([0.5374, 0.1630, 0.1133, 0.1863]) -Greedy action tensor([ 1.0064, -0.3549, -0.1150, -0.1554]) tensor([0.5277, 0.1353, 0.1719, 0.1651]) -Greedy action tensor([ 1.0618, 0.0281, -0.2555, 0.0141]) tensor([0.5065, 0.1802, 0.1357, 0.1777]) -Greedy action tensor([ 0.5322, 0.0145, 0.0842, -0.3201]) tensor([0.3758, 0.2239, 0.2401, 0.1602]) -Greedy action tensor([ 0.9726, -0.3019, -0.5599, 0.8733]) tensor([0.4165, 0.1164, 0.0900, 0.3771]) -Greedy action tensor([ 1.6681, -0.7264, -0.2656, 0.3610]) tensor([0.6638, 0.0606, 0.0960, 0.1796]) -Greedy action tensor([ 0.9372, -0.3917, -0.1402, -0.0614]) tensor([0.5067, 0.1342, 0.1725, 0.1867]) -Greedy action tensor([ 1.5890, -0.5664, -0.3417, 0.5042]) tensor([0.6254, 0.0725, 0.0907, 0.2114]) -Greedy action tensor([ 0.8233, -0.3946, -0.7441, 0.9812]) tensor([0.3738, 0.1106, 0.0780, 0.4377]) -Greedy action tensor([ 9.8627e-01, -7.4540e-01, -6.9025e-04, -4.6554e-03]) tensor([0.5206, 0.0921, 0.1940, 0.1933]) -Greedy action tensor([ 1.0900, -0.6054, -0.5236, 0.1000]) tensor([0.5700, 0.1046, 0.1135, 0.2118]) -Greedy action tensor([-0.0241, 0.1400, 0.3519, -0.4245]) tensor([0.2323, 0.2737, 0.3383, 0.1556]) -Greedy action tensor([-0.0178, -0.6768, 0.9080, 0.4781]) tensor([0.1760, 0.0910, 0.4441, 0.2889]) -Greedy action tensor([ 0.9224, 0.5523, -0.1865, 0.8608]) tensor([0.3377, 0.2333, 0.1114, 0.3176]) -Greedy action tensor([ 0.2070, -0.2446, -0.1122, 2.2191]) tensor([0.1016, 0.0647, 0.0738, 0.7599]) -Greedy action tensor([ 1.2030, -0.5011, 0.3773, 0.9175]) tensor([0.4217, 0.0767, 0.1847, 0.3169]) -Greedy action tensor([ 1.7729, -0.0269, 0.5824, 0.5605]) tensor([0.5660, 0.0936, 0.1721, 0.1684]) -Greedy action tensor([ 1.2308, -0.5659, -0.2226, 0.2417]) tensor([0.5645, 0.0936, 0.1320, 0.2099]) -Greedy action tensor([ 1.1427, -0.2855, 0.0224, 0.7415]) tensor([0.4473, 0.1072, 0.1459, 0.2995]) -Greedy action tensor([0.8835, 0.2357, 0.2779, 1.0107]) tensor([0.3120, 0.1633, 0.1703, 0.3544]) -Greedy action tensor([ 0.7575, 0.2461, -0.5129, 0.3442]) tensor([0.3934, 0.2359, 0.1104, 0.2602]) -Greedy action tensor([ 0.9922, 0.5026, -0.4127, 0.9692]) tensor([0.3527, 0.2161, 0.0865, 0.3447]) -Greedy action tensor([ 0.0444, 0.4326, 1.0114, -0.2829]) tensor([0.1717, 0.2531, 0.4515, 0.1238]) -Greedy action tensor([-0.7594, -1.1983, 0.7425, 0.0879]) tensor([0.1181, 0.0761, 0.5302, 0.2755]) -Greedy action tensor([0.7753, 0.2850, 0.4252, 0.7553]) tensor([0.3033, 0.1858, 0.2137, 0.2973]) -Greedy action tensor([ 0.1639, 0.1901, 1.0802, -0.7750]) tensor([0.2034, 0.2087, 0.5084, 0.0795]) -Greedy action tensor([ 1.2131, -0.2079, 0.7059, 0.9996]) tensor([0.3772, 0.0911, 0.2271, 0.3046]) -Greedy action tensor([ 0.8087, 0.4431, -0.1842, 0.4661]) tensor([0.3605, 0.2501, 0.1336, 0.2559]) -Greedy action tensor([ 1.0227, 0.6374, -0.7322, 1.0442]) tensor([0.3478, 0.2366, 0.0601, 0.3554]) -Greedy action tensor([ 0.5730, -1.0578, 1.1221, 0.0303]) tensor([0.2850, 0.0558, 0.4935, 0.1656]) -Greedy action tensor([ 0.9708, -0.0202, -0.2434, 0.2936]) tensor([0.4595, 0.1706, 0.1365, 0.2335]) -Greedy action tensor([ 0.2894, 0.4255, -0.3090, 0.2951]) tensor([0.2702, 0.3096, 0.1485, 0.2717]) -Greedy action tensor([1.1731, 0.0149, 0.4129, 0.4989]) tensor([0.4365, 0.1371, 0.2041, 0.2224]) -Greedy action tensor([ 0.8119, -1.3614, -0.7479, 1.9562]) tensor([0.2240, 0.0255, 0.0471, 0.7034]) -Greedy action tensor([1.1960, 0.0679, 0.9068, 1.1844]) tensor([0.3267, 0.1057, 0.2446, 0.3229]) -Greedy action tensor([ 1.1517, -0.7754, -0.8813, 0.8335]) tensor([0.4990, 0.0726, 0.0653, 0.3630]) -Greedy action tensor([ 0.9323, -1.3223, 0.2647, 1.1073]) tensor([0.3560, 0.0373, 0.1826, 0.4241]) -Greedy action tensor([ 1.8810, -0.2437, 0.9012, 1.3143]) tensor([0.4849, 0.0579, 0.1820, 0.2751]) -Greedy action tensor([ 0.6828, -0.0719, 0.9039, -0.2117]) tensor([0.3198, 0.1504, 0.3990, 0.1308]) -Greedy action tensor([ 1.4729, -0.5179, 0.0127, 0.6358]) tensor([0.5550, 0.0758, 0.1289, 0.2403]) -Greedy action tensor([ 0.5790, -0.7179, 0.7856, -0.5040]) tensor([0.3519, 0.0962, 0.4327, 0.1192]) -Greedy action tensor([-0.8940, -1.0188, 0.4170, 1.3313]) tensor([0.0673, 0.0594, 0.2498, 0.6234]) -Greedy action tensor([ 0.7927, -1.3379, 0.1024, 0.5544]) tensor([0.4152, 0.0493, 0.2082, 0.3272]) -Greedy action tensor([ 0.6070, 0.7811, -0.2870, 0.0686]) tensor([0.3142, 0.3739, 0.1285, 0.1834]) -Greedy action tensor([ 0.9939, -0.6802, -0.1757, 1.1663]) tensor([0.3723, 0.0698, 0.1156, 0.4423]) -Greedy action tensor([-0.3199, 0.1763, 0.9934, 0.2069]) tensor([0.1242, 0.2039, 0.4617, 0.2103]) -Greedy action tensor([ 1.1152, -0.1311, 0.9707, 0.3631]) tensor([0.3810, 0.1096, 0.3298, 0.1796]) -Greedy action tensor([-0.3263, -0.1667, 0.7074, 0.2034]) tensor([0.1496, 0.1755, 0.4207, 0.2541]) -Greedy action tensor([ 1.2120, 0.5375, -1.0021, 0.0993]) tensor([0.5135, 0.2616, 0.0561, 0.1688]) -Greedy action tensor([ 1.1823, 1.1100, -0.4903, 0.0177]) tensor([0.4115, 0.3828, 0.0773, 0.1284]) -Greedy action tensor([ 0.5320, 0.9134, -0.2187, 0.1885]) tensor([0.2743, 0.4017, 0.1295, 0.1946]) -Greedy action tensor([ 0.5527, -1.4985, 1.9447, 0.8944]) tensor([0.1525, 0.0196, 0.6133, 0.2146]) -Greedy action tensor([0.1287, 0.3182, 0.1609, 0.4004]) tensor([0.2196, 0.2654, 0.2268, 0.2882]) -Greedy action tensor([ 0.6488, 0.4278, 0.7379, -0.1465]) tensor([0.2988, 0.2396, 0.3267, 0.1349]) -Greedy action tensor([-0.7031, -0.6405, -0.9258, 1.0118]) tensor([0.1188, 0.1264, 0.0950, 0.6598]) -Greedy action tensor([ 2.3157, -1.3522, 0.3412, 1.5087]) tensor([0.6209, 0.0159, 0.0862, 0.2770]) -Greedy action tensor([-0.3365, 0.4563, 0.4374, -0.3092]) tensor([0.1561, 0.3449, 0.3385, 0.1604]) -Greedy action tensor([1.2193, 0.1186, 0.6478, 2.2435]) tensor([0.2136, 0.0710, 0.1206, 0.5948]) -Greedy action tensor([ 1.4245, -0.2120, 0.3690, 1.5043]) tensor([0.3808, 0.0741, 0.1325, 0.4125]) -Greedy action tensor([ 1.0803, 0.1026, 1.4452, -0.0228]) tensor([0.3176, 0.1195, 0.4575, 0.1054]) -Greedy action tensor([ 1.4906, -0.4044, 0.6392, 0.0350]) tensor([0.5524, 0.0830, 0.2358, 0.1288]) -Greedy action tensor([ 0.6207, -0.8284, -0.6353, 1.8985]) tensor([0.1958, 0.0460, 0.0557, 0.7025]) -Greedy action tensor([ 1.8019, -0.7048, 1.1309, 0.4753]) tensor([0.5382, 0.0439, 0.2751, 0.1428]) -Greedy action tensor([0.5721, 0.5786, 0.2841, 1.5373]) tensor([0.1858, 0.1870, 0.1393, 0.4878]) -Greedy action tensor([ 0.5061, -0.3134, 1.0932, 0.4649]) tensor([0.2381, 0.1049, 0.4284, 0.2285]) -Greedy action tensor([ 0.7541, -0.6023, 0.1432, -0.4048]) tensor([0.4730, 0.1218, 0.2568, 0.1484]) -Greedy action tensor([ 0.6901, -0.3755, 0.3490, 0.1972]) tensor([0.3750, 0.1292, 0.2667, 0.2291]) -Greedy action tensor([-0.0841, -2.0892, -0.4353, 1.0451]) tensor([0.2028, 0.0273, 0.1427, 0.6272]) -Greedy action tensor([0.3714, 0.1304, 0.6482, 0.6246]) tensor([0.2276, 0.1789, 0.3002, 0.2932]) -Greedy action tensor([-0.0551, 0.9657, -0.3238, 0.4704]) tensor([0.1605, 0.4454, 0.1227, 0.2714]) -Greedy action tensor([-0.9571, -0.9364, -0.1874, 0.5146]) tensor([0.1171, 0.1196, 0.2529, 0.5103]) -Greedy action tensor([0.7962, 0.2471, 0.3822, 0.1420]) tensor([0.3625, 0.2094, 0.2396, 0.1885]) -Greedy action tensor([ 0.4960, -0.4489, -0.0269, -0.4579]) tensor([0.4225, 0.1642, 0.2505, 0.1628]) -Greedy action tensor([ 1.3679, 0.1372, -0.3906, 1.6327]) tensor([0.3613, 0.1055, 0.0623, 0.4709]) -Greedy action tensor([ 0.5824, -0.4043, -0.2968, 0.5873]) tensor([0.3580, 0.1335, 0.1486, 0.3598]) -Greedy action tensor([ 1.3856, -0.1293, 0.3360, -0.1089]) tensor([0.5573, 0.1225, 0.1951, 0.1250]) -Greedy action tensor([-0.5319, -0.1349, 0.9760, -0.1165]) tensor([0.1174, 0.1746, 0.5302, 0.1778]) -Greedy action tensor([ 1.8022, -1.1557, 0.0160, 0.8669]) tensor([0.6204, 0.0322, 0.1040, 0.2435]) -Greedy action tensor([0.3796, 0.1408, 0.1636, 0.4699]) tensor([0.2712, 0.2136, 0.2185, 0.2968]) -Greedy action tensor([ 0.1016, -0.6808, -0.1467, 0.2396]) tensor([0.2954, 0.1351, 0.2304, 0.3391]) -Greedy action tensor([ 0.4897, 0.3519, -0.6360, 0.4974]) tensor([0.3122, 0.2720, 0.1013, 0.3146]) -Greedy action tensor([ 0.2416, -0.0146, -0.9921, -0.5195]) tensor([0.3949, 0.3056, 0.1150, 0.1845]) -Greedy action tensor([0.6271, 0.1965, 1.0384, 0.0654]) tensor([0.2682, 0.1743, 0.4046, 0.1529]) -Greedy action tensor([ 0.7793, -0.8743, 0.1743, 0.1310]) tensor([0.4424, 0.0847, 0.2416, 0.2313]) -Greedy action tensor([ 1.2263, -0.3266, 0.7611, 1.3786]) tensor([0.3329, 0.0704, 0.2090, 0.3876]) -Greedy action tensor([ 1.0447, 1.0619, -1.1744, 0.4695]) tensor([0.3719, 0.3784, 0.0404, 0.2092]) -Greedy action tensor([ 0.5963, -0.3688, 1.7925, 0.1896]) tensor([0.1868, 0.0712, 0.6177, 0.1244]) -Greedy action tensor([ 0.3583, 0.0887, -0.6667, 0.7257]) tensor([0.2804, 0.2141, 0.1006, 0.4049]) -Greedy action tensor([ 0.3586, -1.4630, -0.1845, 1.9816]) tensor([0.1468, 0.0238, 0.0853, 0.7441]) -Greedy action tensor([ 0.7135, -0.9574, -0.5516, 1.7376]) tensor([0.2350, 0.0442, 0.0663, 0.6544]) -Greedy action tensor([ 1.5235, -0.9237, 1.5344, 1.2166]) tensor([0.3530, 0.0305, 0.3568, 0.2597]) -Greedy action tensor([-0.3695, 0.4130, 0.6379, -0.1989]) tensor([0.1406, 0.3075, 0.3851, 0.1668]) -Greedy action tensor([ 0.9115, 0.1800, -0.2112, 0.4534]) tensor([0.4100, 0.1973, 0.1334, 0.2593]) -Greedy action tensor([ 0.7536, -0.2721, 0.2061, -0.2222]) tensor([0.4322, 0.1550, 0.2500, 0.1629]) -Greedy action tensor([ 0.6026, -0.0563, -0.0576, -0.3238]) tensor([0.4115, 0.2129, 0.2126, 0.1629]) -Greedy action tensor([ 0.3207, -0.0356, -0.0089, -0.2937]) tensor([0.3378, 0.2366, 0.2429, 0.1827]) -Greedy action tensor([ 0.4176, -0.0130, -0.0354, -0.1286]) tensor([0.3490, 0.2269, 0.2219, 0.2022]) -Greedy action tensor([ 0.9002, -0.6863, -0.0677, -0.5134]) tensor([0.5471, 0.1120, 0.2078, 0.1331]) -Greedy action tensor([ 0.3695, -0.0798, 0.0782, -0.2978]) tensor([0.3450, 0.2202, 0.2578, 0.1770]) -Greedy action tensor([ 0.4563, -0.1342, -0.1285, -0.2564]) tensor([0.3844, 0.2130, 0.2142, 0.1885]) -Greedy action tensor([ 0.9408, -0.5842, -0.1151, -0.5978]) tensor([0.5617, 0.1222, 0.1954, 0.1206]) -Greedy action tensor([ 0.5111, 0.1032, -0.0290, -0.0681]) tensor([0.3561, 0.2368, 0.2075, 0.1996]) -Greedy action tensor([ 0.0614, 0.0053, 0.0845, -0.2308]) tensor([0.2691, 0.2545, 0.2754, 0.2010]) -Greedy action tensor([ 0.5404, 0.0465, -0.0158, -0.0246]) tensor([0.3634, 0.2217, 0.2084, 0.2065]) -Greedy action tensor([ 0.2410, 0.1994, 0.0182, -0.2004]) tensor([0.2939, 0.2819, 0.2352, 0.1890]) -Greedy action tensor([ 0.5280, -0.1232, -0.0119, -0.3279]) tensor([0.3954, 0.2062, 0.2304, 0.1680]) -Greedy action tensor([ 0.7297, -0.3589, -0.0109, -0.3427]) tensor([0.4639, 0.1562, 0.2212, 0.1587]) -Greedy action tensor([ 0.7600, -0.1648, 0.1177, -0.3522]) tensor([0.4441, 0.1762, 0.2337, 0.1461]) -Greedy action tensor([ 0.6535, -0.3742, -0.0819, -0.3204]) tensor([0.4515, 0.1616, 0.2164, 0.1705]) -Greedy action tensor([ 0.4955, -0.2967, -0.0312, -0.4706]) tensor([0.4125, 0.1868, 0.2436, 0.1570]) -Greedy action tensor([ 0.4461, -0.0367, -0.0812, -0.2706]) tensor([0.3710, 0.2289, 0.2189, 0.1812]) -Greedy action tensor([ 0.5019, -0.2979, 0.0522, -0.4582]) tensor([0.4048, 0.1819, 0.2582, 0.1550]) -Greedy action tensor([ 0.6083, -0.0390, 0.0233, -0.1516]) tensor([0.3924, 0.2054, 0.2186, 0.1835]) -Greedy action tensor([ 0.7717, -0.3215, 0.0166, -0.4589]) tensor([0.4768, 0.1598, 0.2241, 0.1393]) -Greedy action tensor([ 0.5161, -0.3639, 0.1858, -0.4545]) tensor([0.3980, 0.1651, 0.2861, 0.1508]) -Greedy action tensor([ 0.5780, -0.2781, -0.0311, -0.2048]) tensor([0.4122, 0.1751, 0.2242, 0.1885]) -Greedy action tensor([ 0.6355, -0.0393, 0.0889, -0.3203]) tensor([0.4044, 0.2060, 0.2341, 0.1555]) -Greedy action tensor([ 0.7728, -0.4074, 0.1674, -0.4391]) tensor([0.4650, 0.1428, 0.2538, 0.1384]) -Greedy action tensor([ 0.3060, 0.3118, 0.1915, -0.2499]) tensor([0.2881, 0.2898, 0.2569, 0.1652]) -Greedy action tensor([ 0.3329, 0.1214, -0.2134, -0.3682]) tensor([0.3467, 0.2806, 0.2008, 0.1720]) -Greedy action tensor([ 0.2297, 0.2020, 0.0581, -0.2422]) tensor([0.2908, 0.2828, 0.2450, 0.1814]) -Greedy action tensor([ 0.2752, 0.3214, 0.0831, -0.2435]) tensor([0.2884, 0.3020, 0.2380, 0.1717]) -Greedy action tensor([ 0.8124, -0.6224, -0.1725, -0.7237]) tensor([0.5474, 0.1304, 0.2044, 0.1178]) -Greedy action tensor([ 0.8087, -0.0992, -0.0617, -0.2367]) tensor([0.4601, 0.1856, 0.1927, 0.1617]) -Greedy action tensor([ 0.3516, 0.1212, -0.0100, -0.2995]) tensor([0.3320, 0.2637, 0.2312, 0.1731]) -Greedy action tensor([ 0.6640, -0.4045, 0.1532, -0.4871]) tensor([0.4425, 0.1520, 0.2655, 0.1400]) -Greedy action tensor([ 0.4091, -0.3113, 0.1880, -0.3386]) tensor([0.3621, 0.1762, 0.2903, 0.1714]) -Greedy action tensor([ 0.5058, -0.1669, 0.2277, -0.3944]) tensor([0.3740, 0.1908, 0.2832, 0.1520]) -Greedy action tensor([ 0.4187, -0.1035, 0.0257, -0.4820]) tensor([0.3739, 0.2218, 0.2524, 0.1519]) -Greedy action tensor([ 0.6065, -0.1966, -0.0397, -0.2665]) tensor([0.4185, 0.1874, 0.2193, 0.1748]) -Greedy action tensor([ 0.2543, -0.1375, -0.0450, -0.3013]) tensor([0.3344, 0.2260, 0.2479, 0.1918]) -Greedy action tensor([ 0.8490, -0.2853, 0.0679, -0.5303]) tensor([0.4923, 0.1583, 0.2254, 0.1239]) -Greedy action tensor([ 0.7680, -0.0954, -0.0745, -0.3783]) tensor([0.4608, 0.1943, 0.1984, 0.1464]) -Greedy action tensor([ 0.6564, -0.0118, -0.1119, -0.2140]) tensor([0.4175, 0.2140, 0.1936, 0.1748]) -Greedy action tensor([ 0.9828, -0.8661, -0.0679, -0.6835]) tensor([0.5896, 0.0928, 0.2062, 0.1114]) -Greedy action tensor([ 0.4423, 0.0323, 0.1379, -0.1786]) tensor([0.3403, 0.2258, 0.2510, 0.1829]) -Greedy action tensor([ 0.4913, -0.2634, -0.0169, -0.2498]) tensor([0.3924, 0.1845, 0.2361, 0.1870]) -Greedy action tensor([ 0.5318, -0.1862, -0.0584, -0.0611]) tensor([0.3854, 0.1880, 0.2136, 0.2130]) -Greedy action tensor([ 0.6046, -0.4110, -0.1463, -0.5104]) tensor([0.4625, 0.1675, 0.2183, 0.1517]) -Greedy action tensor([ 0.4244, -0.1237, 0.0465, -0.1847]) tensor([0.3562, 0.2059, 0.2441, 0.1937]) -Greedy action tensor([ 0.5904, -0.5607, -0.0307, -0.3156]) tensor([0.4429, 0.1401, 0.2380, 0.1790]) -Greedy action tensor([ 0.5344, -0.1190, 0.0252, -0.3840]) tensor([0.3968, 0.2064, 0.2384, 0.1584]) -Greedy action tensor([ 0.5534, -0.2502, -0.0650, -0.3255]) tensor([0.4164, 0.1864, 0.2243, 0.1729]) -Greedy action tensor([ 0.7303, -0.4516, 0.0368, -0.5067]) tensor([0.4769, 0.1463, 0.2384, 0.1384]) -Greedy action tensor([ 0.3055, 0.0040, 0.0952, -0.2866]) tensor([0.3223, 0.2384, 0.2611, 0.1782]) -Greedy action tensor([ 0.5590, -0.0365, -0.0350, -0.3197]) tensor([0.3970, 0.2189, 0.2192, 0.1649]) -Greedy action tensor([ 0.4955, -0.1078, -0.0780, -0.3568]) tensor([0.3942, 0.2156, 0.2221, 0.1681]) -Greedy action tensor([ 0.7104, -0.3607, 0.0542, -0.2590]) tensor([0.4463, 0.1529, 0.2315, 0.1693]) -Greedy action tensor([ 0.6424, -0.2500, -0.0242, -0.3032]) tensor([0.4326, 0.1772, 0.2221, 0.1680]) -Greedy action tensor([ 0.5318, -0.0235, -0.0822, -0.0931]) tensor([0.3773, 0.2165, 0.2042, 0.2020]) -Greedy action tensor([ 0.8148, -0.5868, -0.0797, -0.5538]) tensor([0.5237, 0.1289, 0.2141, 0.1333]) -Greedy action tensor([ 0.5025, -0.2182, -0.0464, -0.5532]) tensor([0.4146, 0.2017, 0.2395, 0.1443]) -Greedy action tensor([ 0.5836, -0.0141, 0.0036, -0.1690]) tensor([0.3874, 0.2131, 0.2169, 0.1825]) -Greedy action tensor([ 0.6099, -0.4217, 0.0108, -0.2948]) tensor([0.4328, 0.1543, 0.2378, 0.1752]) -Greedy action tensor([ 1.2754, -1.2359, -0.0346, -0.7693]) tensor([0.6755, 0.0548, 0.1823, 0.0874]) -Greedy action tensor([ 0.5054, -0.0379, -0.0530, -0.3605]) tensor([0.3886, 0.2257, 0.2223, 0.1635]) -Greedy action tensor([0.2757, 0.0709, 0.0448, 0.0657]) tensor([0.2925, 0.2383, 0.2322, 0.2371]) -Greedy action tensor([ 0.8165, -0.5284, 0.0685, -0.6296]) tensor([0.5078, 0.1323, 0.2403, 0.1196]) -Greedy action tensor([ 0.4052, 0.0455, 0.0645, -0.1560]) tensor([0.3356, 0.2342, 0.2387, 0.1915]) -Greedy action tensor([ 0.6930, -0.5745, -0.0765, -0.5245]) tensor([0.4900, 0.1380, 0.2270, 0.1450]) -Greedy action tensor([ 0.5294, -0.2239, -0.1378, -0.3300]) tensor([0.4154, 0.1956, 0.2132, 0.1759]) -Greedy action tensor([ 0.4033, -0.1594, -0.0159, -0.2427]) tensor([0.3635, 0.2070, 0.2390, 0.1905]) -Greedy action tensor([ 0.5333, -0.1229, -0.0570, -0.5335]) tensor([0.4137, 0.2147, 0.2293, 0.1424]) -Greedy action tensor([ 0.1747, 0.1908, -0.0564, 0.0928]) tensor([0.2680, 0.2724, 0.2127, 0.2469]) -Greedy action tensor([ 0.9497, -0.2267, 0.1400, -0.8049]) tensor([0.5191, 0.1601, 0.2310, 0.0898]) -Greedy action tensor([ 1.1400, -0.6643, -0.0308, -0.5941]) tensor([0.6056, 0.0997, 0.1878, 0.1069]) -Greedy action tensor([ 0.4958, 0.0606, 0.0170, -0.2388]) tensor([0.3641, 0.2356, 0.2256, 0.1747]) -Greedy action tensor([ 0.5892, -0.2422, -0.0308, -0.3310]) tensor([0.4216, 0.1836, 0.2268, 0.1680]) -Greedy action tensor([ 0.5033, -0.2053, -0.0548, -0.2725]) tensor([0.3960, 0.1950, 0.2267, 0.1823]) -Greedy action tensor([ 0.5567, -0.3622, -0.0871, -0.3769]) tensor([0.4315, 0.1722, 0.2267, 0.1696]) -Greedy action tensor([ 0.7002, -0.4357, 0.0104, -0.3626]) tensor([0.4612, 0.1481, 0.2314, 0.1593]) -Greedy action tensor([ 0.7373, -0.1741, 0.2160, -0.5279]) tensor([0.4390, 0.1765, 0.2607, 0.1239]) -Greedy action tensor([ 0.8072, -0.3855, 0.0082, -0.4275]) tensor([0.4892, 0.1484, 0.2200, 0.1423]) -Greedy action tensor([ 0.3931, -0.1812, -0.0347, -0.2508]) tensor([0.3649, 0.2055, 0.2379, 0.1917]) -Greedy action tensor([-1.7029, -0.5270, 0.5476, -0.0233]) tensor([0.0524, 0.1697, 0.4971, 0.2809]) -Greedy action tensor([-1.7855, -0.3886, 0.5810, -0.0786]) tensor([0.0471, 0.1906, 0.5025, 0.2598]) -Greedy action tensor([-1.0635, -0.6046, 0.2425, 0.4246]) tensor([0.0934, 0.1478, 0.3449, 0.4138]) -Greedy action tensor([-1.3584, -0.3709, -0.0328, -0.0978]) tensor([0.0911, 0.2446, 0.3430, 0.3214]) -Greedy action tensor([-1.3659, -0.0062, 0.2462, 0.2108]) tensor([0.0678, 0.2641, 0.3399, 0.3281]) -Greedy action tensor([-1.3153, -0.5728, 0.3611, 0.2296]) tensor([0.0761, 0.1600, 0.4070, 0.3569]) -Greedy action tensor([-1.8516, -0.4250, 0.6271, -0.1110]) tensor([0.0439, 0.1827, 0.5233, 0.2501]) -Greedy action tensor([-0.5217, -0.4946, 0.1070, -0.0025]) tensor([0.1791, 0.1840, 0.3358, 0.3010]) -Greedy action tensor([-1.6150, -0.3382, 0.7845, 0.6754]) tensor([0.0392, 0.1407, 0.4324, 0.3877]) -Greedy action tensor([-1.1982, -0.0545, 0.3531, -0.2916]) tensor([0.0883, 0.2769, 0.4163, 0.2185]) -Greedy action tensor([-0.4444, -0.0958, 0.6906, 1.4661]) tensor([0.0814, 0.1154, 0.2533, 0.5500]) -Greedy action tensor([-1.7760, -0.6108, 0.1053, -0.4590]) tensor([0.0690, 0.2211, 0.4525, 0.2574]) -Greedy action tensor([-1.7411, -1.0187, 0.0658, -0.6733]) tensor([0.0829, 0.1708, 0.5051, 0.2412]) -Greedy action tensor([-1.7251, -0.4712, 0.7463, 0.4017]) tensor([0.0404, 0.1417, 0.4787, 0.3392]) -Greedy action tensor([-1.9399, -0.8409, 0.2880, -0.1310]) tensor([0.0516, 0.1548, 0.4787, 0.3149]) -Greedy action tensor([-1.2503, -0.5965, 0.5382, 0.7941]) tensor([0.0601, 0.1156, 0.3597, 0.4646]) -Greedy action tensor([-0.8293, -0.1786, 0.2977, -0.1399]) tensor([0.1251, 0.2397, 0.3860, 0.2492]) -Greedy action tensor([-1.9039, -0.7202, 0.7555, -0.1075]) tensor([0.0407, 0.1329, 0.5812, 0.2452]) -Greedy action tensor([-0.8176, 0.1273, 0.3110, -0.2115]) tensor([0.1177, 0.3027, 0.3638, 0.2157]) -Greedy action tensor([-0.7753, -0.5681, 0.2226, 0.1674]) tensor([0.1332, 0.1638, 0.3612, 0.3418]) -Greedy action tensor([-1.1038, 0.2152, 0.4571, -0.4289]) tensor([0.0872, 0.3261, 0.4154, 0.1713]) -Greedy action tensor([-1.8580, -0.4670, 0.6218, -0.1290]) tensor([0.0443, 0.1779, 0.5284, 0.2494]) -Greedy action tensor([-0.9186, -0.0751, -0.3812, -0.1454]) tensor([0.1388, 0.3227, 0.2376, 0.3008]) -Greedy action tensor([-1.9473, -0.4979, 0.9572, 0.2200]) tensor([0.0310, 0.1321, 0.5661, 0.2708]) -Greedy action tensor([-1.6575, -0.6316, 0.4322, 0.1440]) tensor([0.0558, 0.1556, 0.4508, 0.3379]) -Greedy action tensor([-0.9823, -0.4182, 0.4254, -0.2076]) tensor([0.1109, 0.1950, 0.4533, 0.2407]) -Greedy action tensor([-1.3269, -0.7016, 0.3355, -0.0558]) tensor([0.0854, 0.1596, 0.4504, 0.3045]) -Greedy action tensor([-0.5897, 0.6824, -0.9744, 0.1728]) tensor([0.1353, 0.4827, 0.0921, 0.2900]) -Greedy action tensor([-0.9766, -0.3694, 0.7684, 1.2859]) tensor([0.0550, 0.1010, 0.3152, 0.5288]) -Greedy action tensor([-1.4434, -0.5474, 0.4758, -0.0666]) tensor([0.0703, 0.1722, 0.4790, 0.2785]) -Greedy action tensor([-2.0171, -0.7087, 1.1948, 0.3836]) tensor([0.0247, 0.0912, 0.6121, 0.2720]) -Greedy action tensor([-1.1476, -0.5750, 0.2520, 0.3724]) tensor([0.0877, 0.1555, 0.3556, 0.4011]) -Greedy action tensor([-0.8986, -0.5813, 0.3319, 0.4378]) tensor([0.1041, 0.1430, 0.3565, 0.3963]) -Greedy action tensor([-1.3861, -0.5373, 0.3452, 0.2073]) tensor([0.0719, 0.1681, 0.4062, 0.3539]) -Greedy action tensor([-1.6628, -0.4395, 0.5172, -0.0447]) tensor([0.0547, 0.1858, 0.4837, 0.2758]) -Greedy action tensor([-1.3245, -0.5413, 0.3134, 0.3249]) tensor([0.0739, 0.1617, 0.3800, 0.3844]) -Greedy action tensor([-1.1298, -0.2630, 0.8374, 0.9712]) tensor([0.0535, 0.1272, 0.3823, 0.4370]) -Greedy action tensor([-1.3206, -0.4869, 0.7035, 0.7593]) tensor([0.0530, 0.1220, 0.4010, 0.4240]) -Greedy action tensor([-1.5801, -0.5956, 0.2844, -0.0272]) tensor([0.0673, 0.1802, 0.4344, 0.3181]) -Greedy action tensor([-1.4243, -0.6149, 0.3858, 0.1155]) tensor([0.0713, 0.1602, 0.4359, 0.3326]) -Greedy action tensor([-1.4171, -0.5812, 0.8998, 0.9776]) tensor([0.0410, 0.0945, 0.4155, 0.4491]) -Greedy action tensor([-1.3165, -0.0914, 0.1685, -0.3369]) tensor([0.0871, 0.2965, 0.3845, 0.2319]) -Greedy action tensor([-1.9195, -0.7938, 0.2197, -0.2684]) tensor([0.0562, 0.1733, 0.4774, 0.2930]) -Greedy action tensor([-1.3984, -0.3667, 0.9289, 1.0654]) tensor([0.0388, 0.1087, 0.3972, 0.4553]) -Greedy action tensor([-1.4531, -0.5862, 0.4227, 0.1121]) tensor([0.0681, 0.1620, 0.4443, 0.3257]) -Greedy action tensor([-1.0955, -0.5342, 0.5618, 1.1663]) tensor([0.0568, 0.0996, 0.2981, 0.5455]) -Greedy action tensor([-1.9335, -0.9039, 0.1016, -0.4277]) tensor([0.0627, 0.1754, 0.4795, 0.2824]) -Greedy action tensor([-1.6646, -0.6556, 0.1899, -0.2698]) tensor([0.0706, 0.1936, 0.4510, 0.2848]) -Greedy action tensor([-1.8097, -0.4901, 0.7283, 0.0988]) tensor([0.0414, 0.1550, 0.5242, 0.2793]) -Greedy action tensor([-1.9152, -0.9160, 0.2175, -0.2610]) tensor([0.0575, 0.1563, 0.4854, 0.3008]) -Greedy action tensor([-1.7723, -0.4522, 0.7239, 0.2768]) tensor([0.0406, 0.1519, 0.4925, 0.3150]) -Greedy action tensor([-1.8477, -0.9264, 0.4124, -0.1712]) tensor([0.0542, 0.1362, 0.5196, 0.2899]) -Greedy action tensor([-1.7750, -0.6002, 0.0861, -0.4465]) tensor([0.0692, 0.2241, 0.4452, 0.2614]) -Greedy action tensor([-1.8730, -0.4315, 0.6221, -0.1371]) tensor([0.0434, 0.1836, 0.5265, 0.2464]) -Greedy action tensor([-1.0216, -0.4484, 0.4959, 1.1433]) tensor([0.0623, 0.1105, 0.2842, 0.5430]) -Greedy action tensor([-1.4402, -0.4854, 0.1207, -0.1695]) tensor([0.0839, 0.2179, 0.3994, 0.2988]) -Greedy action tensor([-1.5044, -0.0446, 0.0435, 0.6265]) tensor([0.0543, 0.2336, 0.2551, 0.4570]) -Greedy action tensor([-1.6814, -0.4494, 0.5755, -0.1596]) tensor([0.0539, 0.1847, 0.5147, 0.2468]) -Greedy action tensor([-1.0730, -0.5614, 0.3726, 0.2672]) tensor([0.0932, 0.1554, 0.3955, 0.3559]) -Greedy action tensor([-1.6361, -0.4967, 0.6663, 0.2566]) tensor([0.0482, 0.1505, 0.4816, 0.3197]) -Greedy action tensor([-1.5287, -0.4551, 1.4914, 1.0405]) tensor([0.0267, 0.0781, 0.5468, 0.3484]) -Greedy action tensor([-0.7265, -0.6437, 0.2079, 0.1258]) tensor([0.1433, 0.1557, 0.3649, 0.3361]) -Greedy action tensor([-0.4173, -0.3997, 0.1940, 0.2063]) tensor([0.1746, 0.1777, 0.3218, 0.3258]) -Greedy action tensor([-0.9339, -0.4465, 0.9097, 1.2877]) tensor([0.0550, 0.0896, 0.3478, 0.5076]) -Greedy action tensor([-1.7207, -0.4633, 0.5983, 0.0238]) tensor([0.0490, 0.1723, 0.4982, 0.2805]) -Greedy action tensor([-1.5615, -0.5476, 0.4736, 0.1592]) tensor([0.0588, 0.1622, 0.4502, 0.3288]) -Greedy action tensor([-1.5049, -0.6102, 0.4023, -0.1523]) tensor([0.0712, 0.1742, 0.4794, 0.2753]) -Greedy action tensor([-0.7391, 0.3684, 0.0366, 0.3549]) tensor([0.1089, 0.3295, 0.2365, 0.3251]) -Greedy action tensor([-1.9201, -0.4633, 0.6582, -0.1574]) tensor([0.0412, 0.1767, 0.5423, 0.2399]) -Greedy action tensor([-0.9397, -0.5246, 0.5286, -0.3456]) tensor([0.1154, 0.1747, 0.5009, 0.2090]) -Greedy action tensor([-1.2507, -0.5731, 0.4265, 0.0994]) tensor([0.0821, 0.1617, 0.4394, 0.3168]) -Greedy action tensor([-1.2043, -0.6005, 0.2508, 0.3386]) tensor([0.0848, 0.1551, 0.3634, 0.3967]) -Greedy action tensor([-1.6049, -0.7783, 0.2756, -0.4463]) tensor([0.0768, 0.1754, 0.5033, 0.2445]) -Greedy action tensor([-1.7799, -0.4791, 0.5989, -0.0796]) tensor([0.0478, 0.1754, 0.5154, 0.2615]) -Greedy action tensor([-0.8484, -0.5367, 0.2002, 0.2973]) tensor([0.1196, 0.1633, 0.3412, 0.3760]) -Greedy action tensor([-0.7693, -0.5496, 0.2370, 0.2315]) tensor([0.1298, 0.1617, 0.3552, 0.3532]) -Greedy action tensor([-1.9600, -0.6249, 1.4465, 0.7734]) tensor([0.0199, 0.0755, 0.5991, 0.3056]) -Greedy action tensor([-1.3861, -0.0561, 0.1881, -0.5940]) tensor([0.0846, 0.3200, 0.4085, 0.1869]) -Greedy action tensor([-1.6943, -0.1742, 0.6466, 0.2200]) tensor([0.0440, 0.2011, 0.4568, 0.2982]) -Greedy action tensor([-1.1703, -0.4506, 0.3590, 0.6337]) tensor([0.0728, 0.1494, 0.3358, 0.4420]) -Greedy action tensor([-1.7470, -0.5441, 0.7476, 0.0040]) tensor([0.0450, 0.1500, 0.5456, 0.2594]) -Greedy action tensor([ 0.9926, -0.3897, -0.2206, 0.0707]) tensor([0.5139, 0.1290, 0.1527, 0.2044]) -Greedy action tensor([ 1.4861, -0.2738, -0.3767, 0.1604]) tensor([0.6278, 0.1080, 0.0975, 0.1667]) -Greedy action tensor([ 0.9079, -0.2021, -0.3637, 0.1362]) tensor([0.4826, 0.1590, 0.1353, 0.2231]) -Greedy action tensor([ 0.9688, -0.0440, -0.3387, -0.0245]) tensor([0.4990, 0.1812, 0.1350, 0.1848]) -Greedy action tensor([ 0.7477, -0.1064, -0.5660, -0.4871]) tensor([0.5037, 0.2144, 0.1354, 0.1465]) -Greedy action tensor([ 1.3208, -0.4852, -0.3175, 0.1480]) tensor([0.5995, 0.0985, 0.1165, 0.1855]) -Greedy action tensor([ 1.2469, -0.5911, -0.1071, -0.2538]) tensor([0.6096, 0.0970, 0.1574, 0.1359]) -Greedy action tensor([ 0.6126, -0.2659, -0.2727, -0.0350]) tensor([0.4253, 0.1767, 0.1755, 0.2225]) -Greedy action tensor([ 1.2281, -0.5736, -0.7545, 0.6681]) tensor([0.5336, 0.0881, 0.0735, 0.3048]) -Greedy action tensor([ 0.9568, -0.3096, -0.4657, -0.1552]) tensor([0.5400, 0.1522, 0.1302, 0.1776]) -Greedy action tensor([ 0.8560, -0.2364, -0.4692, -0.0313]) tensor([0.4968, 0.1666, 0.1320, 0.2046]) -Greedy action tensor([ 0.7009, -0.4433, 0.1581, -0.1978]) tensor([0.4335, 0.1381, 0.2519, 0.1765]) -Greedy action tensor([ 0.4708, -0.3202, -0.3100, 0.3388]) tensor([0.3587, 0.1626, 0.1643, 0.3144]) -Greedy action tensor([ 1.2752, -0.5541, -0.3505, 0.5148]) tensor([0.5480, 0.0880, 0.1078, 0.2562]) -Greedy action tensor([ 0.4991, -0.2391, 0.0867, -0.1796]) tensor([0.3777, 0.1805, 0.2501, 0.1916]) -Greedy action tensor([ 0.9451, -0.6227, -0.0534, -0.0384]) tensor([0.5126, 0.1069, 0.1888, 0.1917]) -Greedy action tensor([ 1.1300, -0.6511, -0.1402, 0.0611]) tensor([0.5578, 0.0940, 0.1566, 0.1916]) -Greedy action tensor([ 0.8135, -0.4006, 0.0578, 0.1248]) tensor([0.4407, 0.1309, 0.2070, 0.2214]) -Greedy action tensor([ 0.6507, -0.2790, -0.1912, -0.1792]) tensor([0.4422, 0.1745, 0.1905, 0.1928]) -Greedy action tensor([ 0.9021, -0.5119, 0.1026, -0.1999]) tensor([0.4938, 0.1201, 0.2220, 0.1641]) -Greedy action tensor([ 1.4715, -0.2321, -0.3497, 0.1645]) tensor([0.6194, 0.1127, 0.1002, 0.1676]) -Greedy action tensor([ 0.8032, -0.4511, -0.1921, -0.0846]) tensor([0.4839, 0.1380, 0.1789, 0.1992]) -Greedy action tensor([ 1.2010, -0.1790, -0.0263, -0.0913]) tensor([0.5497, 0.1383, 0.1611, 0.1509]) -Greedy action tensor([ 0.8787, -0.2514, 0.0131, -0.0889]) tensor([0.4708, 0.1521, 0.1981, 0.1789]) -Greedy action tensor([ 1.1669, -0.5790, -0.2175, 0.1287]) tensor([0.5621, 0.0981, 0.1408, 0.1990]) -Greedy action tensor([ 0.9453, -0.3244, -0.2908, 0.0698]) tensor([0.5030, 0.1413, 0.1461, 0.2096]) -Greedy action tensor([ 1.4340, -0.5843, -0.4237, 0.2377]) tensor([0.6284, 0.0835, 0.0981, 0.1900]) -Greedy action tensor([ 0.9057, -0.5187, -0.5516, 0.9196]) tensor([0.4020, 0.0967, 0.0936, 0.4076]) -Greedy action tensor([ 0.9929, -0.0260, 0.0055, -0.2782]) tensor([0.4965, 0.1792, 0.1850, 0.1393]) -Greedy action tensor([ 0.4667, -0.3912, -0.2080, -0.0290]) tensor([0.3933, 0.1668, 0.2003, 0.2396]) -Greedy action tensor([ 1.1999, -0.5296, -0.2535, -0.0395]) tensor([0.5880, 0.1043, 0.1375, 0.1703]) -Greedy action tensor([ 1.5682, -0.2846, -0.1125, 0.1051]) tensor([0.6351, 0.0996, 0.1183, 0.1470]) -Greedy action tensor([ 1.2180, -0.5369, -0.1016, -0.0177]) tensor([0.5778, 0.0999, 0.1544, 0.1679]) -Greedy action tensor([ 1.3319, -0.4006, -0.3504, 0.4349]) tensor([0.5648, 0.0999, 0.1050, 0.2303]) -Greedy action tensor([ 0.9791, -0.5415, -0.5560, 0.6597]) tensor([0.4628, 0.1012, 0.0997, 0.3363]) -Greedy action tensor([ 1.0387, -0.5872, -0.5750, 0.5058]) tensor([0.5043, 0.0992, 0.1004, 0.2960]) -Greedy action tensor([ 1.2212, -0.6273, -0.1764, 0.2851]) tensor([0.5565, 0.0876, 0.1376, 0.2182]) -Greedy action tensor([ 0.8095, -0.2923, -0.2258, 0.2020]) tensor([0.4480, 0.1489, 0.1591, 0.2440]) -Greedy action tensor([ 1.1594, -0.4864, -0.3770, 0.4126]) tensor([0.5314, 0.1025, 0.1143, 0.2518]) -Greedy action tensor([ 0.6662, -0.3119, -0.1207, 0.0784]) tensor([0.4190, 0.1575, 0.1907, 0.2328]) -Greedy action tensor([ 1.4766, -0.0486, 0.2591, -0.5122]) tensor([0.6059, 0.1318, 0.1793, 0.0829]) -Greedy action tensor([ 0.9133, -0.3708, -0.5629, 0.2408]) tensor([0.4961, 0.1374, 0.1134, 0.2532]) -Greedy action tensor([ 0.5764, 0.0621, 0.0141, -0.3471]) tensor([0.3899, 0.2331, 0.2222, 0.1548]) -Greedy action tensor([ 1.4758, -0.2717, -0.3215, 0.2340]) tensor([0.6139, 0.1070, 0.1018, 0.1773]) -Greedy action tensor([ 0.4874, -0.4463, 0.1159, -0.0485]) tensor([0.3748, 0.1473, 0.2585, 0.2193]) -Greedy action tensor([ 1.2568, -0.2242, -0.2579, 0.1082]) tensor([0.5668, 0.1289, 0.1246, 0.1797]) -Greedy action tensor([ 0.6578, -0.2427, 0.0123, -0.2372]) tensor([0.4275, 0.1737, 0.2242, 0.1747]) -Greedy action tensor([ 1.2556, -0.4883, -0.3147, -0.0206]) tensor([0.6017, 0.1052, 0.1251, 0.1679]) -Greedy action tensor([ 0.8530, -0.3041, -0.1919, 0.3095]) tensor([0.4451, 0.1399, 0.1565, 0.2584]) -Greedy action tensor([ 0.6868, -0.5192, -0.0766, 0.1389]) tensor([0.4267, 0.1277, 0.1989, 0.2467]) -Greedy action tensor([ 1.5692, -0.6265, -0.1806, 0.5575]) tensor([0.6065, 0.0675, 0.1054, 0.2205]) -Greedy action tensor([ 1.1451, -0.4955, -0.6080, 0.2178]) tensor([0.5673, 0.1100, 0.0983, 0.2244]) -Greedy action tensor([ 1.6860, -0.5326, -0.2389, 0.4196]) tensor([0.6508, 0.0708, 0.0949, 0.1834]) -Greedy action tensor([ 0.9984, -0.5833, -0.0671, 0.1605]) tensor([0.5043, 0.1037, 0.1738, 0.2182]) -Greedy action tensor([ 0.6963, -0.3793, -0.2975, 0.6271]) tensor([0.3782, 0.1290, 0.1400, 0.3529]) -Greedy action tensor([ 1.0381, -0.6271, -0.4225, 0.4178]) tensor([0.5104, 0.0966, 0.1185, 0.2745]) -Greedy action tensor([ 1.1534, -0.6047, -0.0315, 0.2355]) tensor([0.5326, 0.0918, 0.1629, 0.2127]) -Greedy action tensor([ 1.0437, -0.6947, 0.0330, 0.2908]) tensor([0.4973, 0.0874, 0.1810, 0.2342]) -Greedy action tensor([ 0.9874, -0.6493, -0.2110, 0.3227]) tensor([0.4973, 0.0968, 0.1500, 0.2558]) -Greedy action tensor([ 1.0316, -0.6329, -0.2449, 0.1409]) tensor([0.5323, 0.1008, 0.1485, 0.2184]) -Greedy action tensor([ 1.2449, -0.3392, -0.2874, -0.0807]) tensor([0.5929, 0.1216, 0.1281, 0.1575]) -Greedy action tensor([ 0.8274, 0.0664, -0.3091, -0.3141]) tensor([0.4745, 0.2217, 0.1523, 0.1515]) -Greedy action tensor([ 0.9446, -0.3897, -0.2730, 0.0724]) tensor([0.5057, 0.1332, 0.1497, 0.2114]) -Greedy action tensor([ 1.4570, -0.7140, -0.2861, -0.0098]) tensor([0.6580, 0.0751, 0.1151, 0.1518]) -Greedy action tensor([ 0.7683, -0.1914, 0.0523, -0.0761]) tensor([0.4345, 0.1664, 0.2123, 0.1868]) -Greedy action tensor([ 0.7589, -0.4307, -0.2355, 0.1472]) tensor([0.4511, 0.1373, 0.1669, 0.2447]) -Greedy action tensor([ 1.4541, -0.3896, -0.2196, 0.1061]) tensor([0.6228, 0.0986, 0.1168, 0.1618]) -Greedy action tensor([ 0.4532, -0.4204, -0.4647, -0.0813]) tensor([0.4162, 0.1737, 0.1662, 0.2439]) -Greedy action tensor([ 0.8943, -0.4886, -0.9622, 0.7402]) tensor([0.4416, 0.1108, 0.0690, 0.3786]) -Greedy action tensor([ 0.9331, -0.6349, -0.0737, 0.1807]) tensor([0.4890, 0.1019, 0.1787, 0.2304]) -Greedy action tensor([ 1.0056, -0.3619, -0.0941, 0.0713]) tensor([0.5049, 0.1286, 0.1681, 0.1984]) -Greedy action tensor([ 1.9277, -0.6178, -0.4262, 0.2265]) tensor([0.7375, 0.0578, 0.0701, 0.1346]) -Greedy action tensor([ 1.5986, -0.4288, -0.3703, 0.1585]) tensor([0.6631, 0.0873, 0.0926, 0.1571]) -Greedy action tensor([ 1.0917, -0.3697, -0.5565, 0.2484]) tensor([0.5392, 0.1250, 0.1037, 0.2320]) -Greedy action tensor([ 1.4826, 0.0642, -0.0449, -0.1749]) tensor([0.6061, 0.1467, 0.1316, 0.1155]) -Greedy action tensor([ 1.5239, -0.6938, -0.3730, -0.0186]) tensor([0.6790, 0.0739, 0.1019, 0.1452]) -Greedy action tensor([ 0.5570, 0.0006, -0.0060, -0.3392]) tensor([0.3920, 0.2247, 0.2233, 0.1600]) -Greedy action tensor([ 0.9423, -0.3101, 0.1252, -0.4096]) tensor([0.5034, 0.1439, 0.2224, 0.1303]) -Greedy action tensor([ 1.3134, -0.5687, -0.2591, 0.2523]) tensor([0.5862, 0.0893, 0.1216, 0.2029]) -Greedy action tensor([ 0.6588, -0.0690, -0.2140, -0.0619]) tensor([0.4189, 0.2023, 0.1750, 0.2038]) -Greedy action tensor([ 2.0605, -0.5199, -0.3673, 0.2056]) tensor([0.7573, 0.0574, 0.0668, 0.1185]) -Greedy action tensor([ 1.4414, 1.3112, -0.5565, 1.2363]) tensor([0.3536, 0.3104, 0.0480, 0.2880]) -Greedy action tensor([ 0.3154, -0.7247, -0.5929, 2.2814]) tensor([0.1124, 0.0397, 0.0453, 0.8026]) -Greedy action tensor([ 0.4331, -0.0162, -0.9340, 1.6363]) tensor([0.1914, 0.1222, 0.0488, 0.6376]) -Greedy action tensor([ 0.9943, -0.4370, 0.5025, 1.6460]) tensor([0.2653, 0.0634, 0.1622, 0.5091]) -Greedy action tensor([ 1.5597, -0.0561, 0.8633, 1.4776]) tensor([0.3819, 0.0759, 0.1903, 0.3518]) -Greedy action tensor([ 0.4557, -0.5510, 0.5242, 0.5687]) tensor([0.2812, 0.1028, 0.3012, 0.3149]) -Greedy action tensor([ 1.2753, 0.5269, -0.5944, 1.0441]) tensor([0.4131, 0.1954, 0.0637, 0.3278]) -Greedy action tensor([ 0.4520, -0.5748, 1.0724, 0.5521]) tensor([0.2313, 0.0828, 0.4302, 0.2557]) -Greedy action tensor([ 0.2094, -1.0391, 0.2093, 0.3157]) tensor([0.2942, 0.0844, 0.2942, 0.3272]) -Greedy action tensor([ 0.3315, -0.8502, -0.2248, 1.8474]) tensor([0.1554, 0.0477, 0.0891, 0.7078]) -Greedy action tensor([0.3036, 0.8332, 0.3128, 0.4920]) tensor([0.2035, 0.3455, 0.2053, 0.2457]) -Greedy action tensor([ 0.5960, -0.4694, 0.0372, 0.8551]) tensor([0.3113, 0.1073, 0.1780, 0.4034]) -Greedy action tensor([ 0.8570, -1.5837, 0.2562, 0.5833]) tensor([0.4174, 0.0364, 0.2289, 0.3174]) -Greedy action tensor([1.0427, 0.4279, 1.1567, 0.2467]) tensor([0.3213, 0.1737, 0.3601, 0.1449]) -Greedy action tensor([-0.1342, 0.1787, 0.9585, 0.1280]) tensor([0.1504, 0.2056, 0.4485, 0.1955]) -Greedy action tensor([ 0.6433, -0.4717, 0.1855, 0.0336]) tensor([0.3993, 0.1310, 0.2527, 0.2170]) -Greedy action tensor([-0.0566, 0.4718, -0.0566, -0.4858]) tensor([0.2300, 0.3902, 0.2300, 0.1498]) -Greedy action tensor([0.8892, 0.0937, 0.5490, 1.2176]) tensor([0.2815, 0.1271, 0.2004, 0.3910]) -Greedy action tensor([ 0.9875, -0.8544, 0.0214, 0.8578]) tensor([0.4137, 0.0656, 0.1574, 0.3633]) -Greedy action tensor([ 1.7813e+00, -3.9232e-04, 1.0908e+00, 1.5262e+00]) tensor([0.4091, 0.0689, 0.2051, 0.3170]) -Greedy action tensor([-0.2160, -1.1286, -0.5165, 1.1953]) tensor([0.1602, 0.0643, 0.1186, 0.6569]) -Greedy action tensor([ 0.8887, -0.7058, 1.4118, 0.9017]) tensor([0.2562, 0.0520, 0.4323, 0.2595]) -Greedy action tensor([ 0.7568, 0.4708, -0.3721, 0.1453]) tensor([0.3821, 0.2870, 0.1236, 0.2073]) -Greedy action tensor([ 0.6015, -0.0160, 2.0999, 0.5804]) tensor([0.1430, 0.0771, 0.6399, 0.1400]) -Greedy action tensor([ 0.9298, -0.9455, 1.4880, -0.0858]) tensor([0.3065, 0.0470, 0.5356, 0.1110]) -Greedy action tensor([-0.2500, 0.7816, 0.8084, -0.9479]) tensor([0.1392, 0.3905, 0.4011, 0.0693]) -Greedy action tensor([ 0.3214, 0.6259, 0.5603, -0.4969]) tensor([0.2459, 0.3334, 0.3122, 0.1085]) -Greedy action tensor([ 1.4983, -0.4904, 0.4176, 0.2774]) tensor([0.5646, 0.0773, 0.1916, 0.1665]) -Greedy action tensor([ 1.3620, -0.9693, 1.1084, 1.3869]) tensor([0.3450, 0.0335, 0.2678, 0.3537]) -Greedy action tensor([ 0.7836, -0.0692, -1.2707, 1.1449]) tensor([0.3345, 0.1426, 0.0429, 0.4801]) -Greedy action tensor([ 0.9328, -0.1012, 0.6470, 0.2408]) tensor([0.3835, 0.1364, 0.2882, 0.1920]) -Greedy action tensor([-0.0020, -0.1147, 1.0188, -0.1917]) tensor([0.1819, 0.1626, 0.5050, 0.1505]) -Greedy action tensor([ 0.3496, 0.8203, -0.1583, 0.6834]) tensor([0.2174, 0.3481, 0.1308, 0.3036]) -Greedy action tensor([ 1.2468, -0.3394, 1.0381, 1.6085]) tensor([0.2897, 0.0593, 0.2351, 0.4159]) -Greedy action tensor([ 1.3753, 0.5535, -0.0997, 0.9952]) tensor([0.4251, 0.1869, 0.0973, 0.2907]) -Greedy action tensor([-0.3809, -0.3303, -0.4938, 0.5691]) tensor([0.1808, 0.1902, 0.1615, 0.4675]) -Greedy action tensor([ 0.2975, -0.8617, -0.1940, 0.2839]) tensor([0.3434, 0.1077, 0.2101, 0.3388]) -Greedy action tensor([1.5211, 0.1320, 0.0392, 0.8844]) tensor([0.4986, 0.1243, 0.1133, 0.2638]) -Greedy action tensor([ 0.1668, -0.1503, -0.2664, -0.5079]) tensor([0.3465, 0.2523, 0.2247, 0.1765]) -Greedy action tensor([ 1.7527, -1.0651, 0.1156, 1.1864]) tensor([0.5489, 0.0328, 0.1068, 0.3115]) -Greedy action tensor([ 1.1329, -0.0682, -0.7863, 0.3785]) tensor([0.5214, 0.1569, 0.0765, 0.2452]) -Greedy action tensor([ 0.1179, -0.3960, 1.6993, 0.3027]) tensor([0.1305, 0.0781, 0.6344, 0.1570]) -Greedy action tensor([ 1.0119, -0.3236, 1.2638, 1.0817]) tensor([0.2761, 0.0726, 0.3552, 0.2961]) -Greedy action tensor([ 1.3899, 0.8177, -0.7130, 0.6569]) tensor([0.4615, 0.2604, 0.0564, 0.2217]) -Greedy action tensor([ 1.1366, 0.3656, -0.3642, 0.3165]) tensor([0.4704, 0.2176, 0.1049, 0.2072]) -Greedy action tensor([-0.2650, -1.0837, -0.7135, 0.1078]) tensor([0.2832, 0.1249, 0.1808, 0.4111]) -Greedy action tensor([0.8871, 0.4931, 0.0990, 0.3951]) tensor([0.3649, 0.2461, 0.1659, 0.2231]) -Greedy action tensor([ 0.7759, 0.4502, -0.8407, 1.0079]) tensor([0.3143, 0.2269, 0.0624, 0.3964]) -Greedy action tensor([ 0.9565, -0.1203, -0.1519, 1.2766]) tensor([0.3281, 0.1118, 0.1083, 0.4519]) -Greedy action tensor([ 0.8313, 0.2392, 1.6051, -0.0722]) tensor([0.2423, 0.1341, 0.5254, 0.0982]) -Greedy action tensor([ 0.9365, 0.7354, -0.4322, 0.9637]) tensor([0.3226, 0.2638, 0.0821, 0.3315]) -Greedy action tensor([ 1.5936, -0.2655, 0.9014, 0.7016]) tensor([0.4840, 0.0754, 0.2422, 0.1984]) -Greedy action tensor([-0.2967, -0.9854, 1.5036, 1.0179]) tensor([0.0887, 0.0445, 0.5366, 0.3302]) -Greedy action tensor([ 1.4242, -0.0829, 0.2728, 1.1781]) tensor([0.4311, 0.0955, 0.1363, 0.3371]) -Greedy action tensor([ 0.9770, 0.2365, -0.1521, 0.9455]) tensor([0.3611, 0.1722, 0.1168, 0.3499]) -Greedy action tensor([0.0140, 0.4684, 1.7152, 0.1075]) tensor([0.1092, 0.1721, 0.5987, 0.1200]) -Greedy action tensor([0.4530, 0.9992, 0.5282, 0.6930]) tensor([0.1970, 0.3402, 0.2124, 0.2504]) -Greedy action tensor([ 0.4384, -0.5481, 0.0701, 1.1739]) tensor([0.2409, 0.0898, 0.1667, 0.5026]) -Greedy action tensor([ 0.2047, 0.4653, -0.6897, 0.8701]) tensor([0.2150, 0.2790, 0.0879, 0.4182]) -Greedy action tensor([ 0.7836, 0.1869, -0.3796, 1.0907]) tensor([0.3103, 0.1709, 0.0970, 0.4219]) -Greedy action tensor([ 0.9653, 0.5580, 0.6810, -0.5953]) tensor([0.3805, 0.2532, 0.2864, 0.0799]) -Greedy action tensor([ 0.3413, -0.3769, 0.8832, -0.5647]) tensor([0.2769, 0.1350, 0.4761, 0.1119]) -Greedy action tensor([ 0.8515, -0.2691, -0.7999, -0.8161]) tensor([0.5860, 0.1911, 0.1124, 0.1106]) -Greedy action tensor([ 1.2744, -0.1640, 0.7418, 0.0690]) tensor([0.4708, 0.1117, 0.2764, 0.1411]) -Greedy action tensor([ 0.5789, 0.1901, -0.3509, 0.2669]) tensor([0.3566, 0.2417, 0.1407, 0.2610]) -Greedy action tensor([-0.4395, 0.9682, 0.5685, 0.0517]) tensor([0.1057, 0.4319, 0.2896, 0.1727]) -Greedy action tensor([ 0.8511, 0.3633, -0.4900, 0.7724]) tensor([0.3572, 0.2193, 0.0934, 0.3301]) -Greedy action tensor([ 0.0909, 0.3560, 0.6089, -0.7874]) tensor([0.2274, 0.2964, 0.3817, 0.0945]) -Greedy action tensor([ 1.2737, -1.0326, 1.3283, 1.0086]) tensor([0.3421, 0.0341, 0.3613, 0.2625]) -Greedy action tensor([ 1.5058, -0.2259, 0.0467, 0.7026]) tensor([0.5384, 0.0953, 0.1252, 0.2411]) -Greedy action tensor([ 0.9468, -0.6733, 0.2526, 0.9498]) tensor([0.3703, 0.0733, 0.1850, 0.3714]) -Greedy action tensor([0.8903, 1.3489, 0.9233, 0.0847]) tensor([0.2462, 0.3894, 0.2544, 0.1100]) -Greedy action tensor([ 0.7955, -0.3694, -0.3005, 1.5151]) tensor([0.2703, 0.0843, 0.0903, 0.5551]) -Greedy action tensor([1.6121, 0.0475, 0.2556, 1.4896]) tensor([0.4253, 0.0890, 0.1095, 0.3762]) -Greedy action tensor([ 1.6185, -0.6256, 0.7287, 1.5005]) tensor([0.4157, 0.0441, 0.1708, 0.3695]) -Greedy action tensor([0.4099, 0.8899, 0.0228, 0.2386]) tensor([0.2417, 0.3906, 0.1641, 0.2036]) -Greedy action tensor([-0.5580, -1.3685, -0.5022, 0.3893]) tensor([0.1968, 0.0875, 0.2081, 0.5075]) -Greedy action tensor([ 1.0425, -0.6570, 1.1105, 0.9124]) tensor([0.3194, 0.0584, 0.3418, 0.2804]) -Greedy action tensor([ 1.9058, -0.9078, 0.9007, 2.0947]) tensor([0.3797, 0.0228, 0.1390, 0.4586]) -Greedy action tensor([ 0.4385, 0.6171, 0.4208, -0.1120]) tensor([0.2663, 0.3184, 0.2617, 0.1536]) -Greedy action tensor([0.4665, 0.4940, 0.1003, 0.6573]) tensor([0.2544, 0.2615, 0.1764, 0.3078]) -Greedy action tensor([0.7920, 0.1113, 0.1409, 0.8387]) tensor([0.3252, 0.1646, 0.1696, 0.3407]) -Greedy action tensor([ 0.3949, 0.0303, -0.0274, -0.1565]) tensor([0.3417, 0.2373, 0.2240, 0.1969]) -Greedy action tensor([ 0.4093, -0.0174, 0.0019, -0.1962]) tensor([0.3492, 0.2279, 0.2323, 0.1906]) -Greedy action tensor([ 0.3855, -0.0524, 0.1092, -0.1881]) tensor([0.3370, 0.2175, 0.2556, 0.1899]) -Greedy action tensor([ 0.6542, -0.1874, -0.0889, -0.2395]) tensor([0.4318, 0.1861, 0.2054, 0.1767]) -Greedy action tensor([ 0.0837, 0.0977, 0.1262, -0.1631]) tensor([0.2605, 0.2642, 0.2718, 0.2035]) -Greedy action tensor([ 1.2388, -1.0536, -0.0217, -0.6020]) tensor([0.6480, 0.0655, 0.1837, 0.1028]) -Greedy action tensor([ 0.6330, 0.0026, -0.0765, -0.3085]) tensor([0.4142, 0.2205, 0.2037, 0.1615]) -Greedy action tensor([0.3369, 0.1971, 0.0763, 0.0840]) tensor([0.2927, 0.2545, 0.2255, 0.2273]) -Greedy action tensor([ 1.0180, -0.8013, -0.0473, -0.4977]) tensor([0.5792, 0.0939, 0.1996, 0.1272]) -Greedy action tensor([ 0.4560, 0.3507, -0.0058, -0.3063]) tensor([0.3337, 0.3003, 0.2103, 0.1557]) -Greedy action tensor([ 0.8263, -0.4707, 0.0784, -0.7090]) tensor([0.5097, 0.1393, 0.2412, 0.1098]) -Greedy action tensor([ 0.4118, 0.0579, -0.0113, -0.0678]) tensor([0.3360, 0.2359, 0.2201, 0.2080]) -Greedy action tensor([ 0.4211, 0.1979, -0.0542, -0.2185]) tensor([0.3391, 0.2713, 0.2108, 0.1789]) -Greedy action tensor([ 0.4065, -0.4235, 0.3593, -0.4838]) tensor([0.3571, 0.1557, 0.3406, 0.1466]) -Greedy action tensor([ 3.7018e-01, 7.7078e-02, -2.2446e-04, -1.4659e-01]) tensor([0.3297, 0.2460, 0.2277, 0.1967]) -Greedy action tensor([ 0.8944, -0.8864, -0.0405, -0.4115]) tensor([0.5458, 0.0920, 0.2143, 0.1479]) -Greedy action tensor([ 0.0079, 0.1035, 0.0570, -0.1500]) tensor([0.2497, 0.2748, 0.2623, 0.2132]) -Greedy action tensor([ 0.2022, -0.1793, -0.1012, -0.4120]) tensor([0.3376, 0.2305, 0.2492, 0.1827]) -Greedy action tensor([ 0.4886, -0.0904, -0.0549, -0.1065]) tensor([0.3714, 0.2081, 0.2157, 0.2048]) -Greedy action tensor([ 0.5053, -0.1465, -0.0784, -0.5311]) tensor([0.4109, 0.2141, 0.2292, 0.1458]) -Greedy action tensor([ 0.7022, -0.4305, 0.0714, -0.4341]) tensor([0.4597, 0.1481, 0.2446, 0.1476]) -Greedy action tensor([ 0.4740, -0.1375, -0.0610, -0.2327]) tensor([0.3815, 0.2070, 0.2234, 0.1882]) -Greedy action tensor([ 0.5803, -0.1223, 0.0885, -0.6387]) tensor([0.4163, 0.2062, 0.2546, 0.1230]) -Greedy action tensor([ 0.9138, -0.2783, -0.1120, -0.6349]) tensor([0.5334, 0.1619, 0.1912, 0.1134]) -Greedy action tensor([ 0.5986, -0.3041, -0.2284, -0.2702]) tensor([0.4420, 0.1792, 0.1933, 0.1854]) -Greedy action tensor([ 0.3147, -0.0266, -0.0783, -0.0776]) tensor([0.3266, 0.2322, 0.2205, 0.2207]) -Greedy action tensor([ 0.5976, 0.0481, -0.1325, -0.2025]) tensor([0.3987, 0.2301, 0.1921, 0.1791]) -Greedy action tensor([ 0.2414, -0.1158, 0.0234, -0.2040]) tensor([0.3180, 0.2225, 0.2557, 0.2037]) -Greedy action tensor([ 0.3885, -0.0360, 0.0022, -0.5078]) tensor([0.3647, 0.2386, 0.2479, 0.1488]) -Greedy action tensor([ 0.9654, -0.4005, 0.0635, -0.5785]) tensor([0.5335, 0.1361, 0.2165, 0.1139]) -Greedy action tensor([ 0.2110, 0.0546, -0.1016, -0.1653]) tensor([0.3055, 0.2613, 0.2235, 0.2097]) -Greedy action tensor([ 0.5125, -0.1432, -0.0193, -0.2693]) tensor([0.3900, 0.2024, 0.2291, 0.1784]) -Greedy action tensor([ 0.7955, -0.2943, -0.0818, -0.3479]) tensor([0.4829, 0.1624, 0.2008, 0.1539]) -Greedy action tensor([ 0.5908, -0.4470, -0.1497, -0.4533]) tensor([0.4581, 0.1623, 0.2184, 0.1612]) -Greedy action tensor([ 0.5017, -0.3002, -0.0401, -0.5378]) tensor([0.4195, 0.1881, 0.2440, 0.1483]) -Greedy action tensor([ 0.3502, 0.1334, -0.1386, 0.0159]) tensor([0.3190, 0.2569, 0.1957, 0.2284]) -Greedy action tensor([ 0.7951, -0.3793, -0.1447, -0.3636]) tensor([0.4966, 0.1535, 0.1940, 0.1559]) -Greedy action tensor([ 0.8975, -0.1537, 0.1181, -0.1497]) tensor([0.4632, 0.1619, 0.2124, 0.1625]) -Greedy action tensor([ 0.4151, 0.0166, -0.0100, -0.2966]) tensor([0.3551, 0.2384, 0.2322, 0.1743]) -Greedy action tensor([ 0.4598, -0.1981, -0.0661, -0.2501]) tensor([0.3845, 0.1992, 0.2273, 0.1891]) -Greedy action tensor([ 0.4074, 0.1467, 0.0345, -0.0029]) tensor([0.3202, 0.2467, 0.2206, 0.2125]) -Greedy action tensor([ 0.7964, -0.4699, -0.1030, -0.4466]) tensor([0.5058, 0.1426, 0.2058, 0.1459]) -Greedy action tensor([ 0.6751, -0.6038, 0.1099, -0.3308]) tensor([0.4520, 0.1258, 0.2569, 0.1653]) -Greedy action tensor([ 0.8253, -0.6267, -0.0266, -0.8007]) tensor([0.5384, 0.1260, 0.2297, 0.1059]) -Greedy action tensor([ 0.4007, -0.1130, 0.1912, -0.3104]) tensor([0.3448, 0.2063, 0.2796, 0.1693]) -Greedy action tensor([ 0.6512, -0.2526, 0.1807, -0.4481]) tensor([0.4232, 0.1714, 0.2644, 0.1410]) -Greedy action tensor([ 0.3420, -0.1904, 0.1328, -0.1657]) tensor([0.3333, 0.1957, 0.2704, 0.2006]) -Greedy action tensor([ 0.4071, 0.0781, 0.0514, -0.2979]) tensor([0.3431, 0.2469, 0.2404, 0.1695]) -Greedy action tensor([ 0.8095, -0.0608, 0.1178, -0.0469]) tensor([0.4266, 0.1787, 0.2136, 0.1812]) -Greedy action tensor([ 0.6516, -0.3661, 0.1068, -0.5722]) tensor([0.4473, 0.1617, 0.2594, 0.1316]) -Greedy action tensor([ 0.6216, -0.5257, -0.1076, -0.5847]) tensor([0.4764, 0.1513, 0.2298, 0.1426]) -Greedy action tensor([ 0.5295, -0.3429, -0.2096, -0.4109]) tensor([0.4375, 0.1828, 0.2089, 0.1708]) -Greedy action tensor([ 0.5528, -0.3530, 0.2316, -0.4946]) tensor([0.4032, 0.1630, 0.2924, 0.1415]) -Greedy action tensor([ 0.7649, -0.4729, 0.0411, -0.6240]) tensor([0.4940, 0.1433, 0.2395, 0.1232]) -Greedy action tensor([ 0.2867, -0.0835, -0.0711, -0.3288]) tensor([0.3413, 0.2357, 0.2386, 0.1844]) -Greedy action tensor([ 0.3923, -0.1464, -0.0098, -0.3753]) tensor([0.3681, 0.2148, 0.2462, 0.1709]) -Greedy action tensor([ 0.4428, -0.1981, -0.0298, -0.2377]) tensor([0.3764, 0.1983, 0.2347, 0.1906]) -Greedy action tensor([ 0.3671, 0.2218, -0.0130, -0.1917]) tensor([0.3205, 0.2771, 0.2191, 0.1833]) -Greedy action tensor([ 1.1225, -0.6527, -0.0176, -0.5320]) tensor([0.5951, 0.1008, 0.1903, 0.1138]) -Greedy action tensor([ 1.0311, -0.6156, 0.0312, -0.6612]) tensor([0.5732, 0.1104, 0.2109, 0.1055]) -Greedy action tensor([ 0.8207, -0.4956, 0.0252, -0.3340]) tensor([0.4915, 0.1318, 0.2218, 0.1549]) -Greedy action tensor([ 0.7390, -0.4820, -0.1172, -0.6090]) tensor([0.5052, 0.1490, 0.2146, 0.1312]) -Greedy action tensor([ 0.5421, -0.1535, 0.0540, -0.2238]) tensor([0.3880, 0.1935, 0.2381, 0.1804]) -Greedy action tensor([ 0.4824, 0.0854, 0.0352, -0.1884]) tensor([0.3542, 0.2382, 0.2265, 0.1811]) -Greedy action tensor([ 0.5139, -0.1081, -0.0463, -0.2381]) tensor([0.3877, 0.2081, 0.2214, 0.1828]) -Greedy action tensor([ 0.6379, -0.2907, 0.1013, -0.4853]) tensor([0.4338, 0.1714, 0.2537, 0.1411]) -Greedy action tensor([ 0.6318, -0.4344, -0.1278, -0.5719]) tensor([0.4734, 0.1630, 0.2215, 0.1421]) -Greedy action tensor([ 0.2892, 0.0606, 0.0496, -0.3879]) tensor([0.3235, 0.2574, 0.2546, 0.1644]) -Greedy action tensor([ 0.6467, -0.3028, 0.0728, -0.5416]) tensor([0.4435, 0.1716, 0.2498, 0.1351]) -Greedy action tensor([ 0.5178, -0.1676, -0.0057, -0.0677]) tensor([0.3769, 0.1899, 0.2233, 0.2099]) -Greedy action tensor([ 0.6617, -0.0053, -0.1153, -0.1912]) tensor([0.4168, 0.2139, 0.1916, 0.1776]) -Greedy action tensor([ 0.2459, 0.2530, -0.0613, -0.3606]) tensor([0.3041, 0.3063, 0.2237, 0.1658]) -Greedy action tensor([ 0.8603, -0.7230, -0.2073, -0.6867]) tensor([0.5675, 0.1165, 0.1951, 0.1208]) -Greedy action tensor([ 0.6164, 0.2008, 0.1157, -0.1864]) tensor([0.3684, 0.2432, 0.2233, 0.1651]) -Greedy action tensor([ 0.7046, 0.3470, -0.2788, -0.1301]) tensor([0.3988, 0.2789, 0.1492, 0.1731]) -Greedy action tensor([ 0.1133, -0.1431, 0.0979, -0.1308]) tensor([0.2823, 0.2185, 0.2780, 0.2212]) -Greedy action tensor([ 0.7913, -0.2287, 0.0601, -0.4888]) tensor([0.4717, 0.1701, 0.2271, 0.1311]) -Greedy action tensor([ 0.3373, 0.1524, -0.1484, -0.3114]) tensor([0.3368, 0.2799, 0.2072, 0.1760]) -Greedy action tensor([ 0.6576, -0.2240, 0.0354, -0.2781]) tensor([0.4268, 0.1767, 0.2291, 0.1674]) -Greedy action tensor([ 0.3302, -0.0936, 0.1179, -0.2223]) tensor([0.3291, 0.2154, 0.2661, 0.1894]) -Greedy action tensor([ 0.6951, -0.3297, -0.0028, -0.3889]) tensor([0.4556, 0.1635, 0.2268, 0.1541]) -Greedy action tensor([ 1.1749, -0.6017, -0.6383, 1.0935]) tensor([0.4436, 0.0751, 0.0724, 0.4089]) -Greedy action tensor([ 0.7862, -0.4262, -0.0907, 0.1988]) tensor([0.4407, 0.1311, 0.1833, 0.2449]) -Greedy action tensor([ 0.6018, -0.3152, -0.3610, -0.1577]) tensor([0.4445, 0.1777, 0.1697, 0.2080]) -Greedy action tensor([ 1.2715, -0.4760, -0.1832, 0.1362]) tensor([0.5784, 0.1008, 0.1350, 0.1858]) -Greedy action tensor([ 1.1212, -0.7404, -0.5053, 0.7400]) tensor([0.4914, 0.0764, 0.0966, 0.3356]) -Greedy action tensor([ 1.2322, -0.4630, -0.1193, 0.1303]) tensor([0.5635, 0.1034, 0.1459, 0.1872]) -Greedy action tensor([ 0.9922, -0.1586, 0.1101, -0.2470]) tensor([0.4951, 0.1566, 0.2049, 0.1434]) -Greedy action tensor([ 0.9901, -0.1669, -0.3578, 0.1112]) tensor([0.5027, 0.1580, 0.1306, 0.2087]) -Greedy action tensor([ 1.3832, -0.2745, -0.2990, -0.0559]) tensor([0.6197, 0.1181, 0.1152, 0.1470]) -Greedy action tensor([ 0.9421, -0.1365, -0.1646, -0.3043]) tensor([0.5107, 0.1737, 0.1688, 0.1468]) -Greedy action tensor([ 1.1344, -0.7576, 0.0344, 0.1347]) tensor([0.5401, 0.0814, 0.1798, 0.1987]) -Greedy action tensor([ 0.6261, -0.4799, 0.0611, -0.0572]) tensor([0.4159, 0.1376, 0.2364, 0.2100]) -Greedy action tensor([ 1.3236, -0.6836, -0.1761, -0.0381]) tensor([0.6197, 0.0833, 0.1383, 0.1588]) -Greedy action tensor([ 1.0895, -0.6357, 0.0392, 0.0353]) tensor([0.5329, 0.0949, 0.1864, 0.1857]) -Greedy action tensor([ 1.7128, -0.5089, -0.0530, -0.1531]) tensor([0.6972, 0.0756, 0.1193, 0.1079]) -Greedy action tensor([ 0.9721, -0.4025, -0.1477, -0.2147]) tensor([0.5306, 0.1342, 0.1732, 0.1619]) -Greedy action tensor([ 0.9302, 0.0124, -0.0274, -0.3423]) tensor([0.4846, 0.1936, 0.1860, 0.1358]) -Greedy action tensor([ 0.9431, -0.3345, -0.1314, 0.1560]) tensor([0.4818, 0.1343, 0.1645, 0.2193]) -Greedy action tensor([ 1.0957, -0.1576, -0.4229, -0.0363]) tensor([0.5474, 0.1563, 0.1199, 0.1765]) -Greedy action tensor([ 1.3165, -0.7480, -0.2735, 0.4282]) tensor([0.5740, 0.0728, 0.1170, 0.2361]) -Greedy action tensor([ 1.4051, -0.7115, -0.4381, 0.6456]) tensor([0.5725, 0.0690, 0.0906, 0.2679]) -Greedy action tensor([ 1.2256, -0.4647, -0.4554, -0.1123]) tensor([0.6123, 0.1130, 0.1140, 0.1607]) -Greedy action tensor([ 0.8570, -0.5572, -0.2016, 0.5158]) tensor([0.4346, 0.1057, 0.1508, 0.3090]) -Greedy action tensor([ 0.7937, -0.0673, 0.2311, 0.0356]) tensor([0.4063, 0.1718, 0.2315, 0.1904]) -Greedy action tensor([ 1.1617, -0.5620, -0.2249, -0.0630]) tensor([0.5807, 0.1036, 0.1451, 0.1706]) -Greedy action tensor([ 1.3037, -0.2502, -0.1472, -0.0049]) tensor([0.5828, 0.1232, 0.1366, 0.1575]) -Greedy action tensor([ 0.9421, -0.4522, -0.1618, -0.0379]) tensor([0.5115, 0.1269, 0.1696, 0.1920]) -Greedy action tensor([ 0.6146, -0.5869, 0.0227, -0.0682]) tensor([0.4239, 0.1275, 0.2345, 0.2141]) -Greedy action tensor([ 0.4529, -0.1799, -0.3367, -0.0841]) tensor([0.3891, 0.2067, 0.1767, 0.2275]) -Greedy action tensor([ 0.7764, -0.1514, -0.0061, -0.1564]) tensor([0.4452, 0.1760, 0.2036, 0.1752]) -Greedy action tensor([ 1.1187, -0.3753, -0.7881, -0.0763]) tensor([0.5967, 0.1340, 0.0887, 0.1806]) -Greedy action tensor([ 1.0297, -0.5790, -0.3232, 0.4451]) tensor([0.4960, 0.0993, 0.1282, 0.2765]) -Greedy action tensor([ 0.8771, -0.7760, -0.3767, 0.3080]) tensor([0.4895, 0.0937, 0.1397, 0.2771]) -Greedy action tensor([ 1.3016, -0.4874, -0.5359, 0.4731]) tensor([0.5672, 0.0948, 0.0903, 0.2477]) -Greedy action tensor([ 1.3134, -0.4194, -0.5283, 0.4365]) tensor([0.5710, 0.1009, 0.0905, 0.2376]) -Greedy action tensor([ 0.6123, -0.3798, -0.7847, 0.6344]) tensor([0.3787, 0.1404, 0.0937, 0.3872]) -Greedy action tensor([ 0.4782, -0.4072, 0.0098, -0.2050]) tensor([0.3931, 0.1622, 0.2461, 0.1985]) -Greedy action tensor([ 0.6680, -0.3176, -0.5742, 0.1999]) tensor([0.4370, 0.1631, 0.1262, 0.2737]) -Greedy action tensor([ 1.4129, -0.7083, -0.2850, 0.3992]) tensor([0.6003, 0.0720, 0.1099, 0.2178]) -Greedy action tensor([ 0.9515, -0.5055, -0.0183, -0.4199]) tensor([0.5360, 0.1248, 0.2032, 0.1360]) -Greedy action tensor([ 1.4304, -0.5534, -0.2906, -0.1945]) tensor([0.6608, 0.0909, 0.1182, 0.1301]) -Greedy action tensor([ 1.0390, -0.1032, -0.1690, -0.2278]) tensor([0.5264, 0.1680, 0.1573, 0.1483]) -Greedy action tensor([ 0.9411, -0.7872, -0.1618, 0.5163]) tensor([0.4622, 0.0821, 0.1534, 0.3023]) -Greedy action tensor([ 0.3521, -0.2281, -0.4076, -0.0927]) tensor([0.3747, 0.2098, 0.1753, 0.2402]) -Greedy action tensor([ 1.3795, -0.8180, -0.1860, 0.5799]) tensor([0.5651, 0.0628, 0.1181, 0.2540]) -Greedy action tensor([ 1.9236, -0.4699, -0.3382, 0.3865]) tensor([0.7090, 0.0647, 0.0738, 0.1524]) -Greedy action tensor([ 0.5698, -0.1142, -0.0370, -0.0120]) tensor([0.3833, 0.1934, 0.2090, 0.2143]) -Greedy action tensor([ 1.0987, -0.2017, 0.0690, -0.0152]) tensor([0.5108, 0.1391, 0.1824, 0.1677]) -Greedy action tensor([ 1.5573, -0.2550, -0.3714, -0.0612]) tensor([0.6637, 0.1084, 0.0965, 0.1315]) -Greedy action tensor([ 1.0644, -0.3615, -0.3812, -0.2849]) tensor([0.5763, 0.1385, 0.1358, 0.1495]) -Greedy action tensor([ 1.4813, -0.4968, -0.1819, 0.0947]) tensor([0.6338, 0.0877, 0.1201, 0.1584]) -Greedy action tensor([ 0.6897, -0.2402, 0.0613, -0.1689]) tensor([0.4252, 0.1678, 0.2268, 0.1802]) -Greedy action tensor([ 1.3465, -0.5025, -0.1451, 0.0623]) tensor([0.6027, 0.0949, 0.1356, 0.1669]) -Greedy action tensor([ 1.0879, -0.2694, -0.1045, 0.0718]) tensor([0.5201, 0.1338, 0.1578, 0.1883]) -Greedy action tensor([ 1.1578, -0.4808, -0.3976, 0.2831]) tensor([0.5488, 0.1066, 0.1158, 0.2288]) -Greedy action tensor([ 0.9108, -0.3159, -0.2168, 0.1248]) tensor([0.4825, 0.1415, 0.1562, 0.2198]) -Greedy action tensor([ 1.4037, -0.2080, -0.2390, -0.1923]) tensor([0.6267, 0.1251, 0.1212, 0.1270]) -Greedy action tensor([ 0.4539, -0.2766, -0.0568, -0.0323]) tensor([0.3708, 0.1786, 0.2225, 0.2280]) -Greedy action tensor([ 0.8511, 0.0322, -0.2711, 0.0650]) tensor([0.4500, 0.1984, 0.1465, 0.2050]) -Greedy action tensor([ 1.3957, -0.8378, -0.3763, 0.1277]) tensor([0.6416, 0.0688, 0.1091, 0.1806]) -Greedy action tensor([ 1.6619, -0.0696, -0.4332, 0.1601]) tensor([0.6567, 0.1162, 0.0808, 0.1463]) -Greedy action tensor([ 1.0435, -0.6165, -0.1912, 0.0326]) tensor([0.5420, 0.1031, 0.1577, 0.1972]) -Greedy action tensor([ 0.6845, -0.4547, -0.4241, 0.5415]) tensor([0.3973, 0.1272, 0.1311, 0.3444]) -Greedy action tensor([ 0.9882, -0.3931, -0.6597, 0.4497]) tensor([0.4933, 0.1239, 0.0949, 0.2879]) -Greedy action tensor([ 0.9719, -0.0982, 0.0194, -0.0420]) tensor([0.4781, 0.1640, 0.1845, 0.1735]) -Greedy action tensor([ 1.2052, -0.6413, -0.4005, 0.6153]) tensor([0.5228, 0.0825, 0.1049, 0.2898]) -Greedy action tensor([ 0.8639, -0.5682, -0.4007, 0.2780]) tensor([0.4813, 0.1149, 0.1359, 0.2679]) -Greedy action tensor([ 0.5324, -0.2176, -0.3319, 0.3199]) tensor([0.3701, 0.1748, 0.1559, 0.2992]) -Greedy action tensor([ 0.6501, -0.0142, -0.5297, -0.4194]) tensor([0.4618, 0.2377, 0.1420, 0.1585]) -Greedy action tensor([ 0.6917, -0.3603, -0.2273, -0.2935]) tensor([0.4714, 0.1646, 0.1880, 0.1760]) -Greedy action tensor([ 1.5090, -0.4248, -0.2106, 0.1801]) tensor([0.6295, 0.0910, 0.1128, 0.1667]) -Greedy action tensor([ 1.5769, -0.3442, 0.0140, 0.1628]) tensor([0.6253, 0.0916, 0.1310, 0.1521]) -Greedy action tensor([ 1.2473, -0.3657, -0.3533, 0.1777]) tensor([0.5733, 0.1143, 0.1157, 0.1967]) -Greedy action tensor([ 1.3160, -0.4600, -0.2506, 0.1434]) tensor([0.5925, 0.1003, 0.1237, 0.1834]) -Greedy action tensor([ 1.4244, -0.1458, -0.2660, 0.0874]) tensor([0.6042, 0.1257, 0.1114, 0.1587]) -Greedy action tensor([ 0.8787, -0.3875, 0.1054, -0.1757]) tensor([0.4781, 0.1348, 0.2206, 0.1665]) -Greedy action tensor([ 0.7829, -0.2060, -0.5828, -0.1399]) tensor([0.4939, 0.1837, 0.1261, 0.1963]) -Greedy action tensor([ 0.7813, -0.0252, -0.1431, -0.1162]) tensor([0.4443, 0.1983, 0.1763, 0.1811]) -Greedy action tensor([ 1.0737, -0.2155, -0.1658, 0.2404]) tensor([0.5001, 0.1378, 0.1448, 0.2174]) -Greedy action tensor([ 1.0785, -0.3276, -0.4248, 0.5120]) tensor([0.4914, 0.1204, 0.1093, 0.2789]) -Greedy action tensor([ 1.2915, -0.3371, -0.3180, 0.2096]) tensor([0.5763, 0.1131, 0.1153, 0.1953]) -Greedy action tensor([-1.2914, -0.4538, 0.7244, 0.9318]) tensor([0.0499, 0.1152, 0.3743, 0.4606]) -Greedy action tensor([-1.3687, -0.5659, 0.5109, 0.5715]) tensor([0.0597, 0.1333, 0.3913, 0.4157]) -Greedy action tensor([-0.3313, -0.2505, 1.1740, 1.6341]) tensor([0.0728, 0.0790, 0.3282, 0.5200]) -Greedy action tensor([-1.4848, -0.3110, 0.6754, 0.7344]) tensor([0.0452, 0.1463, 0.3923, 0.4162]) -Greedy action tensor([-0.7423, -0.5434, 0.2925, -0.0018]) tensor([0.1402, 0.1711, 0.3947, 0.2940]) -Greedy action tensor([-1.2055, -0.6958, 0.9877, 1.2788]) tensor([0.0423, 0.0705, 0.3795, 0.5077]) -Greedy action tensor([-1.1607, -0.5561, 0.2593, 0.3274]) tensor([0.0878, 0.1606, 0.3630, 0.3886]) -Greedy action tensor([-1.2034, -0.5302, 0.3190, 0.2923]) tensor([0.0833, 0.1633, 0.3817, 0.3717]) -Greedy action tensor([-1.6339, -0.4487, 0.4852, 0.0149]) tensor([0.0562, 0.1838, 0.4677, 0.2922]) -Greedy action tensor([-1.5950, -0.4903, 0.5358, 0.1625]) tensor([0.0548, 0.1655, 0.4618, 0.3179]) -Greedy action tensor([-0.8738, -0.5380, 0.2530, 0.5057]) tensor([0.1057, 0.1479, 0.3263, 0.4201]) -Greedy action tensor([-0.7882, -0.5544, 0.2459, 0.1556]) tensor([0.1308, 0.1652, 0.3679, 0.3361]) -Greedy action tensor([-0.9555, 0.1503, 0.2841, 0.3398]) tensor([0.0899, 0.2715, 0.3104, 0.3282]) -Greedy action tensor([-1.7704, -0.5458, 0.5583, -0.0897]) tensor([0.0499, 0.1698, 0.5123, 0.2680]) -Greedy action tensor([-1.6420, -0.2876, 0.5194, 0.1160]) tensor([0.0517, 0.2001, 0.4486, 0.2996]) -Greedy action tensor([-1.9086, -0.8461, 0.2540, -0.1993]) tensor([0.0552, 0.1598, 0.4800, 0.3050]) -Greedy action tensor([-1.8317, -0.4791, 0.6265, -0.0727]) tensor([0.0447, 0.1730, 0.5226, 0.2597]) -Greedy action tensor([-1.7080, -0.7471, 0.0703, -0.1428]) tensor([0.0698, 0.1826, 0.4135, 0.3341]) -Greedy action tensor([-1.4782, -0.1728, 0.3528, 0.1506]) tensor([0.0624, 0.2302, 0.3893, 0.3181]) -Greedy action tensor([-1.3037, -0.5069, 0.3758, 0.0580]) tensor([0.0801, 0.1777, 0.4296, 0.3126]) -Greedy action tensor([-1.5134, -0.5613, 0.6752, 0.5734]) tensor([0.0486, 0.1260, 0.4337, 0.3917]) -Greedy action tensor([-0.6208, -0.5472, 0.1616, 0.2836]) tensor([0.1485, 0.1599, 0.3248, 0.3669]) -Greedy action tensor([-0.3019, -0.3790, 1.0475, 1.6630]) tensor([0.0774, 0.0717, 0.2985, 0.5524]) -Greedy action tensor([-1.5194, -0.5330, 0.5221, 0.0060]) tensor([0.0626, 0.1678, 0.4820, 0.2877]) -Greedy action tensor([-1.1940, -0.3575, 0.7276, 0.9696]) tensor([0.0531, 0.1225, 0.3626, 0.4618]) -Greedy action tensor([-1.9107, -0.4688, 0.7151, -0.0798]) tensor([0.0396, 0.1672, 0.5464, 0.2468]) -Greedy action tensor([-1.6170, -0.4412, 0.5733, 0.2589]) tensor([0.0507, 0.1645, 0.4536, 0.3312]) -Greedy action tensor([-1.9898, -0.6122, 1.1281, 0.4935]) tensor([0.0253, 0.1003, 0.5715, 0.3030]) -Greedy action tensor([-1.4129, -0.0729, 0.6518, -0.5854]) tensor([0.0667, 0.2548, 0.5259, 0.1526]) -Greedy action tensor([-1.6157, -0.4910, 0.5138, 0.0966]) tensor([0.0555, 0.1708, 0.4664, 0.3073]) -Greedy action tensor([-0.8755, -0.2499, 0.4671, -0.4252]) tensor([0.1210, 0.2261, 0.4632, 0.1898]) -Greedy action tensor([-1.7885, -0.5991, 0.5157, -0.1291]) tensor([0.0511, 0.1680, 0.5121, 0.2688]) -Greedy action tensor([-1.4559, -0.4571, 0.9336, 0.9136]) tensor([0.0395, 0.1072, 0.4309, 0.4224]) -Greedy action tensor([-1.5511, -0.5788, 0.7354, 0.5140]) tensor([0.0468, 0.1237, 0.4605, 0.3690]) -Greedy action tensor([-0.6866, -0.5178, 1.1453, 1.5937]) tensor([0.0549, 0.0650, 0.3430, 0.5371]) -Greedy action tensor([-1.4143, -0.1504, 0.6830, 0.8004]) tensor([0.0458, 0.1620, 0.3729, 0.4193]) -Greedy action tensor([-1.7620, -0.3635, 0.8222, 0.5872]) tensor([0.0347, 0.1407, 0.4605, 0.3641]) -Greedy action tensor([-0.7058, -0.5091, 0.2195, 0.5006]) tensor([0.1237, 0.1506, 0.3122, 0.4135]) -Greedy action tensor([-1.2439, -0.5435, 0.4227, 0.6805]) tensor([0.0660, 0.1329, 0.3492, 0.4519]) -Greedy action tensor([-1.6510, -0.9061, 0.0488, -0.6819]) tensor([0.0892, 0.1878, 0.4880, 0.2350]) -Greedy action tensor([-1.5319, -0.0981, 0.8160, 0.7703]) tensor([0.0390, 0.1635, 0.4079, 0.3896]) -Greedy action tensor([-0.3760, 0.8783, -0.1321, 0.0473]) tensor([0.1368, 0.4796, 0.1746, 0.2089]) -Greedy action tensor([-1.2380, -0.4073, 0.5990, 0.9521]) tensor([0.0540, 0.1240, 0.3392, 0.4828]) -Greedy action tensor([-1.8393, -0.9399, 0.1494, -0.3554]) tensor([0.0659, 0.1620, 0.4815, 0.2906]) -Greedy action tensor([-0.9261, -0.6872, 0.5340, -0.2502]) tensor([0.1171, 0.1487, 0.5042, 0.2301]) -Greedy action tensor([-1.4078, -0.5797, 0.5217, 0.0528]) tensor([0.0690, 0.1580, 0.4754, 0.2975]) -Greedy action tensor([-0.8565, -0.2754, 0.4246, 0.9800]) tensor([0.0790, 0.1412, 0.2843, 0.4955]) -Greedy action tensor([-1.3236, -0.7167, 0.3907, -0.2889]) tensor([0.0893, 0.1638, 0.4957, 0.2512]) -Greedy action tensor([-1.2664, -0.3086, 0.9406, 1.1360]) tensor([0.0421, 0.1098, 0.3828, 0.4654]) -Greedy action tensor([-1.5546, -0.5965, 1.1040, 0.9537]) tensor([0.0331, 0.0864, 0.4732, 0.4072]) -Greedy action tensor([-1.7045, -0.6495, 0.3660, -0.1696]) tensor([0.0608, 0.1747, 0.4822, 0.2823]) -Greedy action tensor([-2.0236, -0.8467, 0.4056, -0.1473]) tensor([0.0452, 0.1466, 0.5130, 0.2951]) -Greedy action tensor([-0.7196, -0.4871, 1.2641, 1.5338]) tensor([0.0525, 0.0662, 0.3816, 0.4997]) -Greedy action tensor([-1.9298, -0.7182, 1.0188, 0.1556]) tensor([0.0318, 0.1067, 0.6060, 0.2556]) -Greedy action tensor([-2.0335, -0.9502, 0.6585, 0.0366]) tensor([0.0375, 0.1109, 0.5541, 0.2975]) -Greedy action tensor([-1.3656, -0.2257, 0.5492, -0.3268]) tensor([0.0728, 0.2276, 0.4939, 0.2057]) -Greedy action tensor([-1.9020, -0.2974, 0.5100, 0.0031]) tensor([0.0419, 0.2086, 0.4677, 0.2817]) -Greedy action tensor([-1.9105, -0.6750, 0.9333, 0.2621]) tensor([0.0329, 0.1132, 0.5651, 0.2888]) -Greedy action tensor([-1.7833, 0.5452, 0.5788, -0.0285]) tensor([0.0362, 0.3710, 0.3837, 0.2091]) -Greedy action tensor([-1.4574, -0.5840, 1.1294, 1.0506]) tensor([0.0345, 0.0827, 0.4588, 0.4240]) -Greedy action tensor([-1.5048, -0.6955, 1.4297, 1.2484]) tensor([0.0265, 0.0595, 0.4983, 0.4157]) -Greedy action tensor([-1.7929, -0.4845, 0.5963, -0.0478]) tensor([0.0469, 0.1735, 0.5112, 0.2685]) -Greedy action tensor([-1.6489, -0.6561, 0.1678, -0.4276]) tensor([0.0755, 0.2038, 0.4645, 0.2561]) -Greedy action tensor([-1.1142, 0.4150, 0.3597, -0.1736]) tensor([0.0797, 0.3679, 0.3481, 0.2042]) -Greedy action tensor([-1.7925, -0.5138, 0.6418, -0.0582]) tensor([0.0462, 0.1658, 0.5265, 0.2615]) -Greedy action tensor([-1.0992, -0.6240, 0.2193, 0.2420]) tensor([0.0983, 0.1581, 0.3675, 0.3760]) -Greedy action tensor([-1.9508, -0.6888, 0.8256, 0.0754]) tensor([0.0355, 0.1254, 0.5700, 0.2692]) -Greedy action tensor([-1.5731, -0.6396, 1.2152, 0.8378]) tensor([0.0323, 0.0822, 0.5253, 0.3602]) -Greedy action tensor([-1.2598, -0.4304, 1.0591, 1.2261]) tensor([0.0393, 0.0900, 0.3991, 0.4716]) -Greedy action tensor([-1.8167, -0.8329, 0.1029, -0.4033]) tensor([0.0685, 0.1832, 0.4669, 0.2814]) -Greedy action tensor([-0.7170, -0.1827, 0.1726, 0.0974]) tensor([0.1352, 0.2306, 0.3290, 0.3052]) -Greedy action tensor([-1.5203, -0.6719, 1.0567, 0.9295]) tensor([0.0356, 0.0832, 0.4686, 0.4126]) -Greedy action tensor([-1.9136, -0.4514, 0.6509, -0.1615]) tensor([0.0415, 0.1792, 0.5397, 0.2395]) -Greedy action tensor([-1.3321, -0.5071, 0.3857, 0.0215]) tensor([0.0786, 0.1793, 0.4379, 0.3042]) -Greedy action tensor([-0.5780, -0.5730, 0.2482, 0.1130]) tensor([0.1591, 0.1599, 0.3635, 0.3175]) -Greedy action tensor([-1.9984, -0.8674, 0.3963, -0.1795]) tensor([0.0471, 0.1460, 0.5165, 0.2904]) -Greedy action tensor([-0.9191, -0.6096, 0.1755, 0.3143]) tensor([0.1138, 0.1551, 0.3402, 0.3908]) -Greedy action tensor([-1.7348, -0.4849, 0.5780, -0.0396]) tensor([0.0499, 0.1741, 0.5041, 0.2718]) -Greedy action tensor([-1.5922, -0.4498, 0.5483, 0.2248]) tensor([0.0532, 0.1668, 0.4525, 0.3275]) -Greedy action tensor([-1.0571, -0.5663, 0.4142, 0.9812]) tensor([0.0682, 0.1114, 0.2969, 0.5235]) -Greedy action tensor([-1.8345, -0.4667, 0.6089, -0.1168]) tensor([0.0454, 0.1784, 0.5230, 0.2531]) -Greedy action tensor([ 0.2690, 0.3471, -1.0937, 0.7768]) tensor([0.2501, 0.2704, 0.0640, 0.4155]) -Greedy action tensor([ 0.5441, 0.5369, -0.0852, 1.3842]) tensor([0.2065, 0.2050, 0.1101, 0.4784]) -Greedy action tensor([ 0.5124, -1.7209, -0.2683, 1.3409]) tensor([0.2594, 0.0278, 0.1188, 0.5940]) -Greedy action tensor([1.4923, 0.1219, 0.2696, 1.0188]) tensor([0.4606, 0.1170, 0.1356, 0.2868]) -Greedy action tensor([ 0.1941, 0.5854, -0.6304, 0.7426]) tensor([0.2152, 0.3182, 0.0943, 0.3723]) -Greedy action tensor([ 0.8739, -0.5011, 0.7758, 1.4385]) tensor([0.2552, 0.0645, 0.2314, 0.4489]) -Greedy action tensor([ 1.2183, -1.1118, 1.6290, 0.2045]) tensor([0.3369, 0.0328, 0.5080, 0.1222]) -Greedy action tensor([ 0.2938, 0.7953, -0.4106, 0.3395]) tensor([0.2385, 0.3938, 0.1179, 0.2497]) -Greedy action tensor([ 1.3586, -0.1220, -0.0881, 0.2552]) tensor([0.5572, 0.1268, 0.1311, 0.1849]) -Greedy action tensor([ 0.6541, 0.6696, -0.4925, 1.3483]) tensor([0.2306, 0.2343, 0.0733, 0.4618]) -Greedy action tensor([ 1.5461, -1.9087, 0.3424, 0.6640]) tensor([0.5729, 0.0181, 0.1719, 0.2371]) -Greedy action tensor([0.3404, 0.5972, 0.4393, 0.4157]) tensor([0.2235, 0.2889, 0.2467, 0.2409]) -Greedy action tensor([0.0153, 1.5431, 0.4834, 0.3183]) tensor([0.1168, 0.5384, 0.1866, 0.1582]) -Greedy action tensor([ 0.9441, -0.1546, -0.1351, 0.1441]) tensor([0.4711, 0.1570, 0.1601, 0.2117]) -Greedy action tensor([-0.2108, -0.2725, -0.2902, 0.8477]) tensor([0.1740, 0.1636, 0.1608, 0.5016]) -Greedy action tensor([ 0.3438, 0.4454, -0.0333, 0.7024]) tensor([0.2367, 0.2621, 0.1624, 0.3388]) -Greedy action tensor([ 0.1857, -0.1482, 0.4152, 0.8257]) tensor([0.2053, 0.1470, 0.2583, 0.3894]) -Greedy action tensor([-0.3228, 0.3283, 1.4195, -0.4349]) tensor([0.1050, 0.2014, 0.5997, 0.0939]) -Greedy action tensor([-0.0885, 0.1674, -0.0304, 0.5727]) tensor([0.1891, 0.2442, 0.2004, 0.3663]) -Greedy action tensor([ 1.3765, -0.5629, 0.0236, 1.4563]) tensor([0.4024, 0.0579, 0.1040, 0.4358]) -Greedy action tensor([-0.0293, -0.2022, -0.0740, 0.7226]) tensor([0.2033, 0.1710, 0.1944, 0.4312]) -Greedy action tensor([ 0.7446, -1.5646, 0.8471, 1.0696]) tensor([0.2785, 0.0277, 0.3085, 0.3854]) -Greedy action tensor([ 0.3578, -0.9754, 0.8538, 0.7746]) tensor([0.2261, 0.0596, 0.3713, 0.3430]) -Greedy action tensor([ 0.4449, 0.6904, 0.5414, -0.1046]) tensor([0.2527, 0.3230, 0.2783, 0.1459]) -Greedy action tensor([ 1.4587, -0.0950, 0.3407, 0.8415]) tensor([0.4813, 0.1018, 0.1573, 0.2596]) -Greedy action tensor([ 1.1927, -0.3450, -0.3174, 0.6680]) tensor([0.4932, 0.1060, 0.1089, 0.2919]) -Greedy action tensor([ 0.1820, -0.0462, -0.0681, -0.6164]) tensor([0.3306, 0.2632, 0.2574, 0.1488]) -Greedy action tensor([0.7972, 0.2603, 0.0561, 0.9872]) tensor([0.3058, 0.1787, 0.1457, 0.3698]) -Greedy action tensor([-0.1584, -0.4439, -0.4567, 0.5876]) tensor([0.2173, 0.1633, 0.1612, 0.4582]) -Greedy action tensor([ 1.2641, 0.5877, -0.0199, 0.6755]) tensor([0.4273, 0.2172, 0.1183, 0.2372]) -Greedy action tensor([ 0.3617, 1.7577, -0.4024, 0.4225]) tensor([0.1523, 0.6150, 0.0709, 0.1618]) -Greedy action tensor([ 0.8518, -0.1573, 0.8033, 0.7801]) tensor([0.3079, 0.1122, 0.2933, 0.2866]) -Greedy action tensor([ 0.3293, -0.1766, 0.1803, -0.0454]) tensor([0.3172, 0.1913, 0.2733, 0.2181]) -Greedy action tensor([1.5576, 0.5163, 1.5198, 1.3419]) tensor([0.3203, 0.1131, 0.3084, 0.2582]) -Greedy action tensor([ 0.5645, 0.3482, -0.1484, 1.7653]) tensor([0.1780, 0.1434, 0.0872, 0.5914]) -Greedy action tensor([ 0.9472, -0.2619, 0.0577, 0.4657]) tensor([0.4297, 0.1282, 0.1765, 0.2655]) -Greedy action tensor([ 0.4191, -1.0200, 0.8489, 0.6259]) tensor([0.2498, 0.0592, 0.3839, 0.3071]) -Greedy action tensor([0.8920, 0.2636, 0.5034, 0.3044]) tensor([0.3614, 0.1928, 0.2450, 0.2008]) -Greedy action tensor([1.1723, 0.4553, 1.7008, 0.7817]) tensor([0.2590, 0.1264, 0.4393, 0.1752]) -Greedy action tensor([-0.1511, -0.3084, -1.2281, 1.0215]) tensor([0.1843, 0.1575, 0.0628, 0.5954]) -Greedy action tensor([1.1412, 0.7241, 0.0225, 1.2927]) tensor([0.3175, 0.2092, 0.1037, 0.3695]) -Greedy action tensor([ 1.5874, -0.8774, 0.4171, 0.7282]) tensor([0.5498, 0.0467, 0.1706, 0.2328]) -Greedy action tensor([-0.1868, -1.2223, 0.1318, 0.2917]) tensor([0.2302, 0.0817, 0.3166, 0.3715]) -Greedy action tensor([ 0.7931, -0.0062, -0.2210, 0.4143]) tensor([0.4005, 0.1801, 0.1453, 0.2742]) -Greedy action tensor([ 0.7140, 0.9235, -0.1575, 0.3007]) tensor([0.3019, 0.3722, 0.1263, 0.1997]) -Greedy action tensor([ 0.0567, -0.4383, 0.2891, -0.5439]) tensor([0.2924, 0.1783, 0.3689, 0.1604]) -Greedy action tensor([ 1.6793, -0.2546, 1.4611, 0.7228]) tensor([0.4287, 0.0620, 0.3446, 0.1647]) -Greedy action tensor([ 1.0499, 0.2496, -0.7524, 0.7995]) tensor([0.4180, 0.1877, 0.0689, 0.3254]) -Greedy action tensor([1.2760, 0.5874, 0.7273, 0.9918]) tensor([0.3530, 0.1773, 0.2039, 0.2657]) -Greedy action tensor([ 1.5165, -0.7148, 0.6132, 1.0610]) tensor([0.4658, 0.0500, 0.1888, 0.2954]) -Greedy action tensor([ 1.1556, -0.2096, 1.0709, 0.6435]) tensor([0.3606, 0.0921, 0.3313, 0.2161]) -Greedy action tensor([ 0.4630, -0.9382, 0.4771, 1.2481]) tensor([0.2246, 0.0553, 0.2278, 0.4924]) -Greedy action tensor([ 1.1777, -0.2140, 0.4569, 0.3720]) tensor([0.4583, 0.1140, 0.2229, 0.2048]) -Greedy action tensor([ 1.0869, -1.3064, 0.1316, 0.3918]) tensor([0.5063, 0.0462, 0.1948, 0.2527]) -Greedy action tensor([ 1.0070, -0.6363, 0.4894, 0.8636]) tensor([0.3766, 0.0728, 0.2244, 0.3262]) -Greedy action tensor([ 0.0164, 0.5045, 0.3531, -0.5777]) tensor([0.2183, 0.3556, 0.3056, 0.1205]) -Greedy action tensor([1.4358, 0.1834, 0.0923, 1.5231]) tensor([0.3791, 0.1083, 0.0989, 0.4137]) -Greedy action tensor([ 0.0671, -0.7951, -0.0102, -0.2681]) tensor([0.3265, 0.1378, 0.3022, 0.2335]) -Greedy action tensor([ 0.7991, 0.4391, -0.1213, 0.1637]) tensor([0.3809, 0.2657, 0.1517, 0.2017]) -Greedy action tensor([ 0.1122, -1.2133, 1.0417, 0.4007]) tensor([0.1948, 0.0518, 0.4935, 0.2600]) -Greedy action tensor([ 1.5871, -0.5310, -0.3494, 0.2593]) tensor([0.6538, 0.0786, 0.0943, 0.1733]) -Greedy action tensor([ 1.1485, 0.0338, -0.6311, 0.2192]) tensor([0.5287, 0.1734, 0.0892, 0.2087]) -Greedy action tensor([ 0.8519, -1.2941, -0.1648, 0.2935]) tensor([0.4876, 0.0570, 0.1764, 0.2790]) -Greedy action tensor([ 1.1345, -0.3717, 1.5558, 1.0051]) tensor([0.2759, 0.0612, 0.4205, 0.2424]) -Greedy action tensor([ 1.1660, -0.0894, -0.1114, 1.2213]) tensor([0.3816, 0.1087, 0.1064, 0.4033]) -Greedy action tensor([0.8515, 0.2305, 0.2876, 0.1038]) tensor([0.3876, 0.2083, 0.2205, 0.1835]) -Greedy action tensor([ 1.1093, -0.4274, 0.8580, 0.8234]) tensor([0.3644, 0.0784, 0.2834, 0.2738]) -Greedy action tensor([ 1.0800, 1.5783, -0.3047, 0.7546]) tensor([0.2763, 0.4549, 0.0692, 0.1996]) -Greedy action tensor([0.6411, 0.3600, 1.2097, 1.3772]) tensor([0.1783, 0.1346, 0.3148, 0.3723]) -Greedy action tensor([ 0.2273, 0.3377, -1.3330, 0.9982]) tensor([0.2228, 0.2488, 0.0468, 0.4816]) -Greedy action tensor([ 2.2311, -0.7441, 0.2888, 2.0888]) tensor([0.4850, 0.0248, 0.0695, 0.4207]) -Greedy action tensor([ 0.0359, -0.8019, -0.3173, 1.3370]) tensor([0.1722, 0.0745, 0.1209, 0.6324]) -Greedy action tensor([ 1.4812, -0.4814, 0.9772, 0.6431]) tensor([0.4593, 0.0645, 0.2775, 0.1987]) -Greedy action tensor([1.4988, 0.6207, 0.7758, 0.4661]) tensor([0.4431, 0.1841, 0.2150, 0.1578]) -Greedy action tensor([ 0.2083, -0.2702, 0.2158, 0.8857]) tensor([0.2176, 0.1348, 0.2192, 0.4284]) -Greedy action tensor([-0.0882, 0.1605, 0.2192, 1.3474]) tensor([0.1275, 0.1635, 0.1734, 0.5357]) -Greedy action tensor([ 0.7397, -1.3523, 0.1246, -0.4274]) tensor([0.5063, 0.0625, 0.2737, 0.1576]) -Greedy action tensor([0.0735, 0.2161, 1.0642, 0.4812]) tensor([0.1575, 0.1816, 0.4241, 0.2368]) -Greedy action tensor([1.5181, 0.2320, 1.2650, 0.3871]) tensor([0.4210, 0.1163, 0.3269, 0.1358]) -Greedy action tensor([ 0.4079, 0.3329, 0.0599, -0.3949]) tensor([0.3245, 0.3010, 0.2291, 0.1454]) -Greedy action tensor([ 0.6172, -0.6809, -0.5027, -0.2007]) tensor([0.4900, 0.1338, 0.1599, 0.2163]) -Greedy action tensor([ 1.7432, -0.1122, 1.7038, 0.7048]) tensor([0.4046, 0.0633, 0.3889, 0.1432]) -Greedy action tensor([ 0.6648, -0.0076, -0.0455, -0.3134]) tensor([0.4205, 0.2147, 0.2067, 0.1581]) -Greedy action tensor([ 0.5713, -0.0065, -0.0324, -0.3709]) tensor([0.4004, 0.2247, 0.2189, 0.1560]) -Greedy action tensor([ 0.4064, -0.0221, -0.0510, -0.0982]) tensor([0.3462, 0.2256, 0.2191, 0.2090]) -Greedy action tensor([ 0.2837, 0.2153, 0.0407, -0.0085]) tensor([0.2886, 0.2695, 0.2264, 0.2155]) -Greedy action tensor([ 0.7422, -0.5154, -0.0816, -0.3574]) tensor([0.4864, 0.1383, 0.2134, 0.1620]) -Greedy action tensor([ 1.0134, -0.6388, 0.1416, -0.4839]) tensor([0.5454, 0.1045, 0.2281, 0.1220]) -Greedy action tensor([ 0.4669, -0.0419, 0.0285, -0.1118]) tensor([0.3563, 0.2142, 0.2298, 0.1997]) -Greedy action tensor([ 0.2578, -0.1788, -0.0221, -0.3494]) tensor([0.3393, 0.2193, 0.2565, 0.1849]) -Greedy action tensor([ 0.2219, 0.0544, 0.0658, -0.1176]) tensor([0.2930, 0.2478, 0.2506, 0.2086]) -Greedy action tensor([ 0.9114, -0.4214, 0.0652, -0.5000]) tensor([0.5164, 0.1362, 0.2215, 0.1259]) -Greedy action tensor([ 0.7738, -0.4377, 0.1186, -0.7005]) tensor([0.4888, 0.1455, 0.2538, 0.1119]) -Greedy action tensor([ 0.5895, -0.1942, 0.1218, -0.3170]) tensor([0.4021, 0.1836, 0.2519, 0.1624]) -Greedy action tensor([ 0.4550, -0.1685, 0.0737, -0.1770]) tensor([0.3636, 0.1949, 0.2483, 0.1932]) -Greedy action tensor([ 0.4416, 0.0889, -0.0776, -0.3266]) tensor([0.3621, 0.2545, 0.2154, 0.1680]) -Greedy action tensor([ 0.6567, -0.5761, 0.0050, -0.6720]) tensor([0.4814, 0.1403, 0.2509, 0.1275]) -Greedy action tensor([ 0.5879, -0.3742, 0.0380, -0.4709]) tensor([0.4337, 0.1657, 0.2502, 0.1504]) -Greedy action tensor([ 0.4564, -0.3879, 0.1107, -0.3600]) tensor([0.3877, 0.1666, 0.2744, 0.1713]) -Greedy action tensor([ 0.5761, -0.3954, -0.0241, -0.4223]) tensor([0.4356, 0.1649, 0.2390, 0.1605]) -Greedy action tensor([ 0.4337, -0.0662, -0.1589, -0.3811]) tensor([0.3843, 0.2331, 0.2125, 0.1701]) -Greedy action tensor([ 0.7243, -0.2335, -0.0472, -0.4924]) tensor([0.4668, 0.1791, 0.2158, 0.1383]) -Greedy action tensor([ 0.2767, -0.2600, -0.0017, -0.2233]) tensor([0.3392, 0.1983, 0.2568, 0.2057]) -Greedy action tensor([ 0.6779, 0.0602, -0.0755, -0.3710]) tensor([0.4237, 0.2284, 0.1994, 0.1484]) -Greedy action tensor([ 0.9107, -0.6371, -0.0883, -0.4089]) tensor([0.5411, 0.1151, 0.1993, 0.1446]) -Greedy action tensor([ 0.7137, -0.5650, -0.1987, -0.5007]) tensor([0.5059, 0.1408, 0.2031, 0.1502]) -Greedy action tensor([ 0.8102, -0.3057, -0.0266, -0.4068]) tensor([0.4862, 0.1593, 0.2106, 0.1440]) -Greedy action tensor([ 0.3659, 0.2397, -0.0980, -0.3224]) tensor([0.3319, 0.2926, 0.2087, 0.1668]) -Greedy action tensor([ 0.5455, -0.3463, 0.2529, -0.5732]) tensor([0.4028, 0.1651, 0.3006, 0.1316]) -Greedy action tensor([ 0.7387, -0.7258, -0.1001, -0.3729]) tensor([0.5019, 0.1160, 0.2169, 0.1651]) -Greedy action tensor([ 0.4337, 0.1350, 0.0173, -0.2316]) tensor([0.3430, 0.2544, 0.2262, 0.1764]) -Greedy action tensor([ 0.5033, -0.3095, -0.0700, -0.3812]) tensor([0.4132, 0.1833, 0.2329, 0.1706]) -Greedy action tensor([ 0.6342, -0.5231, 0.0158, -0.2815]) tensor([0.4438, 0.1395, 0.2391, 0.1776]) -Greedy action tensor([ 0.5101, -0.1616, -0.0234, -0.2445]) tensor([0.3895, 0.1990, 0.2284, 0.1831]) -Greedy action tensor([ 0.6450, -0.4646, -0.0625, -0.4797]) tensor([0.4657, 0.1535, 0.2295, 0.1512]) -Greedy action tensor([0.3248, 0.1765, 0.0009, 0.1129]) tensor([0.2946, 0.2540, 0.2131, 0.2383]) -Greedy action tensor([ 0.7049, -0.6037, 0.1899, -0.7652]) tensor([0.4767, 0.1288, 0.2848, 0.1096]) -Greedy action tensor([ 0.5487, -0.0596, -0.0595, -0.1832]) tensor([0.3892, 0.2118, 0.2118, 0.1872]) -Greedy action tensor([ 0.2110, 0.2801, -0.0540, -0.1511]) tensor([0.2829, 0.3031, 0.2170, 0.1970]) -Greedy action tensor([ 0.4109, 0.0242, 0.0250, -0.2430]) tensor([0.3473, 0.2359, 0.2361, 0.1806]) -Greedy action tensor([ 0.1057, 0.0259, -0.0025, -0.3433]) tensor([0.2891, 0.2669, 0.2594, 0.1845]) -Greedy action tensor([ 0.7532, -0.0391, -0.0397, -0.2395]) tensor([0.4394, 0.1990, 0.1988, 0.1628]) -Greedy action tensor([ 0.6510, -0.3232, -0.0758, -0.3399]) tensor([0.4480, 0.1691, 0.2166, 0.1663]) -Greedy action tensor([ 0.5350, -0.2882, -0.0448, -0.4200]) tensor([0.4195, 0.1842, 0.2349, 0.1614]) -Greedy action tensor([ 0.3046, -0.1575, 0.0213, -0.2340]) tensor([0.3371, 0.2123, 0.2539, 0.1967]) -Greedy action tensor([ 0.2780, 0.2252, -0.0599, -0.3284]) tensor([0.3118, 0.2958, 0.2224, 0.1700]) -Greedy action tensor([ 0.5240, -0.3446, -0.0237, -0.3039]) tensor([0.4107, 0.1723, 0.2375, 0.1795]) -Greedy action tensor([ 0.4686, -0.2052, 0.0054, -0.2975]) tensor([0.3840, 0.1958, 0.2417, 0.1785]) -Greedy action tensor([ 0.7013, -0.1425, -0.1258, -0.5699]) tensor([0.4656, 0.2002, 0.2036, 0.1306]) -Greedy action tensor([ 0.6400, -0.6652, 0.3315, -0.9283]) tensor([0.4516, 0.1225, 0.3318, 0.0941]) -Greedy action tensor([ 0.6814, -0.4828, -0.0965, -0.2070]) tensor([0.4581, 0.1430, 0.2104, 0.1884]) -Greedy action tensor([ 0.2645, 0.1480, 0.0565, -0.0486]) tensor([0.2913, 0.2592, 0.2366, 0.2130]) -Greedy action tensor([ 0.3848, -0.1735, -0.0797, -0.2771]) tensor([0.3681, 0.2106, 0.2313, 0.1899]) -Greedy action tensor([ 0.6934, -0.4520, 0.1500, -0.4697]) tensor([0.4522, 0.1438, 0.2626, 0.1413]) -Greedy action tensor([ 0.5698, -0.3142, 0.0063, -0.2321]) tensor([0.4114, 0.1700, 0.2342, 0.1845]) -Greedy action tensor([ 0.5336, -0.4248, -0.1116, -0.3714]) tensor([0.4324, 0.1658, 0.2268, 0.1749]) -Greedy action tensor([ 0.8051, -0.7423, -0.1861, -0.7300]) tensor([0.5557, 0.1183, 0.2063, 0.1197]) -Greedy action tensor([ 0.4086, -0.1227, 0.1219, -0.4030]) tensor([0.3594, 0.2112, 0.2698, 0.1596]) -Greedy action tensor([ 0.3291, 0.0997, 0.0323, -0.1426]) tensor([0.3162, 0.2514, 0.2350, 0.1973]) -Greedy action tensor([ 0.5216, 0.0950, 0.0028, -0.2580]) tensor([0.3695, 0.2412, 0.2199, 0.1694]) -Greedy action tensor([ 0.5794, -0.2580, -0.1354, -0.5408]) tensor([0.4448, 0.1925, 0.2176, 0.1451]) -Greedy action tensor([ 0.5861, 0.1018, 0.0923, -0.2924]) tensor([0.3785, 0.2332, 0.2310, 0.1572]) -Greedy action tensor([ 0.5512, -0.1840, 0.1335, -0.3061]) tensor([0.3903, 0.1871, 0.2570, 0.1656]) -Greedy action tensor([ 0.4084, -0.1637, 0.2173, -0.4300]) tensor([0.3543, 0.1999, 0.2926, 0.1532]) -Greedy action tensor([ 0.3506, 0.3288, -0.2333, -0.3860]) tensor([0.3317, 0.3245, 0.1850, 0.1588]) -Greedy action tensor([ 0.6536, -0.4127, -0.0471, -0.3861]) tensor([0.4558, 0.1569, 0.2262, 0.1611]) -Greedy action tensor([ 0.6935, -0.2443, 0.0371, -0.3721]) tensor([0.4435, 0.1736, 0.2301, 0.1528]) -Greedy action tensor([ 0.4646, 0.0035, -0.0799, -0.0780]) tensor([0.3582, 0.2259, 0.2078, 0.2082]) -Greedy action tensor([ 0.5903, -0.0646, -0.0909, -0.2184]) tensor([0.4047, 0.2102, 0.2048, 0.1803]) -Greedy action tensor([ 0.7501, -0.5536, 0.0600, -0.5849]) tensor([0.4911, 0.1333, 0.2463, 0.1292]) -Greedy action tensor([ 0.4219, 0.1008, 0.0134, -0.1224]) tensor([0.3367, 0.2442, 0.2238, 0.1954]) -Greedy action tensor([ 0.4803, -0.1618, -0.0225, -0.1940]) tensor([0.3787, 0.1993, 0.2290, 0.1930]) -Greedy action tensor([ 0.4968, -0.2474, -0.0202, -0.3743]) tensor([0.4016, 0.1908, 0.2395, 0.1681]) -Greedy action tensor([ 0.7769, -0.2407, -0.0650, -0.4047]) tensor([0.4764, 0.1722, 0.2053, 0.1462]) -Greedy action tensor([ 0.3974, 0.2634, -0.2050, -0.0444]) tensor([0.3263, 0.2853, 0.1786, 0.2098]) -Greedy action tensor([ 0.2735, -0.0858, 0.1075, -0.1295]) tensor([0.3112, 0.2173, 0.2636, 0.2080]) -Greedy action tensor([ 0.6058, -0.3258, -0.0989, -0.4664]) tensor([0.4483, 0.1766, 0.2216, 0.1535]) -Greedy action tensor([ 0.2700, 0.1655, -0.0089, -0.1248]) tensor([0.3002, 0.2704, 0.2271, 0.2023]) -Greedy action tensor([ 1.3041, -0.7844, 0.1846, -0.9462]) tensor([0.6428, 0.0796, 0.2098, 0.0677]) -Greedy action tensor([ 0.2275, 0.0436, -0.0571, -0.0403]) tensor([0.2986, 0.2484, 0.2246, 0.2284]) -Greedy action tensor([ 0.5838, -0.1676, -0.0939, -0.1590]) tensor([0.4073, 0.1921, 0.2068, 0.1938]) -Greedy action tensor([ 0.7234, -0.2433, -0.0858, -0.3264]) tensor([0.4596, 0.1748, 0.2047, 0.1609]) -Greedy action tensor([ 0.7985, -0.6751, -0.0926, -0.6665]) tensor([0.5347, 0.1225, 0.2193, 0.1235]) -Greedy action tensor([ 0.6334, -0.3756, -0.1359, 0.1905]) tensor([0.4048, 0.1476, 0.1876, 0.2600]) -Greedy action tensor([ 1.0200, -0.2704, -0.0452, -0.1554]) tensor([0.5185, 0.1427, 0.1787, 0.1601]) -Greedy action tensor([ 1.3482, -0.3765, -0.2096, 0.0165]) tensor([0.6050, 0.1078, 0.1274, 0.1597]) -Greedy action tensor([ 1.4111, -0.5675, -0.3276, -0.0932]) tensor([0.6510, 0.0900, 0.1144, 0.1446]) -Greedy action tensor([ 0.6866, -0.1740, -0.0894, -0.1605]) tensor([0.4326, 0.1829, 0.1991, 0.1854]) -Greedy action tensor([ 0.8674, 0.0537, -0.0849, -0.1473]) tensor([0.4563, 0.2022, 0.1761, 0.1654]) -Greedy action tensor([ 0.7568, -0.2387, -0.6480, -0.0705]) tensor([0.4873, 0.1801, 0.1196, 0.2130]) -Greedy action tensor([ 0.8536, -0.6581, 0.1185, -0.0515]) tensor([0.4752, 0.1048, 0.2278, 0.1922]) -Greedy action tensor([ 0.9752, -0.4828, -0.2805, -0.2088]) tensor([0.5484, 0.1276, 0.1562, 0.1678]) -Greedy action tensor([ 0.8909, -0.5004, -0.0312, 0.0327]) tensor([0.4830, 0.1201, 0.1921, 0.2048]) -Greedy action tensor([ 1.3990, -0.4110, -0.1411, 0.0135]) tensor([0.6142, 0.1005, 0.1316, 0.1537]) -Greedy action tensor([ 0.6943, -0.1993, 0.0849, -0.2387]) tensor([0.4262, 0.1744, 0.2317, 0.1677]) -Greedy action tensor([ 1.1406, -0.6011, -0.2781, -0.0593]) tensor([0.5819, 0.1020, 0.1408, 0.1753]) -Greedy action tensor([ 0.7227, -0.4079, -0.3528, -0.1420]) tensor([0.4796, 0.1548, 0.1636, 0.2020]) -Greedy action tensor([ 0.2377, -0.2133, -0.1354, 0.0630]) tensor([0.3159, 0.2012, 0.2175, 0.2653]) -Greedy action tensor([ 1.3488, -0.6098, 0.0403, 0.2812]) tensor([0.5698, 0.0804, 0.1540, 0.1959]) -Greedy action tensor([ 2.2699, -0.3020, -0.4797, -0.1194]) tensor([0.8117, 0.0620, 0.0519, 0.0744]) -Greedy action tensor([ 0.6927, -0.3401, 0.0059, -0.0322]) tensor([0.4267, 0.1519, 0.2147, 0.2067]) -Greedy action tensor([ 0.6427, -0.4398, -0.2399, 0.2880]) tensor([0.4075, 0.1380, 0.1686, 0.2858]) -Greedy action tensor([ 0.7062, -0.4353, -0.1730, 0.0657]) tensor([0.4422, 0.1412, 0.1836, 0.2330]) -Greedy action tensor([ 1.3073, -0.6724, -0.0288, 0.2092]) tensor([0.5765, 0.0796, 0.1516, 0.1923]) -Greedy action tensor([ 0.9468, -0.5500, -0.3018, 0.5480]) tensor([0.4583, 0.1026, 0.1315, 0.3076]) -Greedy action tensor([ 1.3072, -0.7594, -0.3750, 0.0599]) tensor([0.6251, 0.0791, 0.1162, 0.1796]) -Greedy action tensor([ 1.3766, 0.0098, -0.2160, -0.1399]) tensor([0.5960, 0.1519, 0.1212, 0.1308]) -Greedy action tensor([ 0.6510, -0.4505, -0.6063, 0.1148]) tensor([0.4542, 0.1510, 0.1292, 0.2657]) -Greedy action tensor([ 1.1479, -0.3541, -0.3127, 0.1041]) tensor([0.5534, 0.1232, 0.1284, 0.1949]) -Greedy action tensor([ 1.3150, -0.8548, -0.6060, 0.4688]) tensor([0.5918, 0.0676, 0.0867, 0.2539]) -Greedy action tensor([ 1.1594, -0.5242, -0.3883, 0.4913]) tensor([0.5233, 0.0972, 0.1113, 0.2683]) -Greedy action tensor([ 0.9658, -0.1731, -0.3003, 0.3762]) tensor([0.4637, 0.1485, 0.1307, 0.2571]) -Greedy action tensor([ 0.8659, -0.3096, -0.2646, 0.4004]) tensor([0.4426, 0.1366, 0.1429, 0.2779]) -Greedy action tensor([ 0.7467, -0.3844, -0.0157, 0.1260]) tensor([0.4298, 0.1387, 0.2005, 0.2310]) -Greedy action tensor([ 0.8978, -0.2916, -0.1711, 0.3729]) tensor([0.4465, 0.1359, 0.1533, 0.2642]) -Greedy action tensor([ 1.0614, -0.5065, -0.4725, 0.5113]) tensor([0.4997, 0.1042, 0.1078, 0.2883]) -Greedy action tensor([ 1.4975, -0.5786, -0.3657, 0.1687]) tensor([0.6471, 0.0812, 0.1004, 0.1713]) -Greedy action tensor([ 0.8843, -0.2046, -0.0570, -0.1076]) tensor([0.4767, 0.1605, 0.1860, 0.1768]) -Greedy action tensor([ 1.3346, -0.4851, -0.0837, 0.1705]) tensor([0.5826, 0.0944, 0.1411, 0.1819]) -Greedy action tensor([ 1.3248, -0.1442, -0.2532, -0.1154]) tensor([0.5976, 0.1375, 0.1233, 0.1416]) -Greedy action tensor([ 1.4802, -0.8255, -0.1304, 0.2141]) tensor([0.6324, 0.0630, 0.1263, 0.1783]) -Greedy action tensor([ 0.8984, -0.3211, -0.4261, 0.3519]) tensor([0.4672, 0.1380, 0.1242, 0.2705]) -Greedy action tensor([ 1.4395, -0.6449, -0.3249, 0.2851]) tensor([0.6208, 0.0772, 0.1063, 0.1957]) -Greedy action tensor([ 1.3239, -0.4126, -0.5276, 0.3415]) tensor([0.5856, 0.1031, 0.0919, 0.2193]) -Greedy action tensor([ 0.8369, -0.0360, -0.3724, -0.1875]) tensor([0.4819, 0.2013, 0.1438, 0.1730]) -Greedy action tensor([ 0.9042, -0.4725, 0.0488, 0.1565]) tensor([0.4649, 0.1173, 0.1976, 0.2201]) -Greedy action tensor([ 1.3147, -0.5566, -0.1454, -0.1485]) tensor([0.6182, 0.0952, 0.1436, 0.1431]) -Greedy action tensor([ 1.2982, -0.2826, 0.0754, 0.0199]) tensor([0.5622, 0.1157, 0.1655, 0.1566]) -Greedy action tensor([ 0.8072, -0.4993, -0.2796, 0.5405]) tensor([0.4212, 0.1141, 0.1421, 0.3226]) -Greedy action tensor([ 1.5057, -0.7083, -0.5300, 0.0966]) tensor([0.6738, 0.0736, 0.0880, 0.1646]) -Greedy action tensor([ 0.5713, -0.3145, -0.1184, 0.1691]) tensor([0.3871, 0.1597, 0.1942, 0.2590]) -Greedy action tensor([ 1.0533, -0.6392, -0.3073, 0.4474]) tensor([0.5035, 0.0927, 0.1291, 0.2747]) -Greedy action tensor([ 0.7929, -0.6965, -0.0441, 0.2869]) tensor([0.4422, 0.0997, 0.1915, 0.2666]) -Greedy action tensor([ 0.3000, -0.4581, -0.3827, -0.0383]) tensor([0.3722, 0.1744, 0.1881, 0.2654]) -Greedy action tensor([ 1.1282, -0.4111, -0.2295, -0.2449]) tensor([0.5797, 0.1244, 0.1491, 0.1468]) -Greedy action tensor([ 0.7312, -0.4753, -0.1678, 0.3316]) tensor([0.4207, 0.1259, 0.1712, 0.2821]) -Greedy action tensor([ 1.1456, -0.3879, -0.2098, -0.0226]) tensor([0.5604, 0.1209, 0.1445, 0.1742]) -Greedy action tensor([ 1.3293, -0.4317, -0.4473, 0.5702]) tensor([0.5527, 0.0950, 0.0935, 0.2587]) -Greedy action tensor([ 1.1043, -0.4015, -0.3603, 0.6910]) tensor([0.4729, 0.1049, 0.1093, 0.3128]) -Greedy action tensor([ 1.3633, -0.2921, -0.4577, 0.2878]) tensor([0.5903, 0.1128, 0.0956, 0.2014]) -Greedy action tensor([ 0.6231, -0.4201, -0.3134, 0.3913]) tensor([0.3941, 0.1389, 0.1545, 0.3126]) -Greedy action tensor([ 1.0460, -0.3317, -0.0704, -0.1139]) tensor([0.5282, 0.1332, 0.1730, 0.1656]) -Greedy action tensor([ 0.8791, -0.3539, -0.1933, 0.2071]) tensor([0.4664, 0.1359, 0.1596, 0.2382]) -Greedy action tensor([ 1.0733, -0.4363, -0.3462, -0.0601]) tensor([0.5603, 0.1238, 0.1355, 0.1804]) -Greedy action tensor([ 0.7744, 0.2621, 0.1231, -0.3016]) tensor([0.4063, 0.2434, 0.2118, 0.1385]) -Greedy action tensor([ 1.0575, -0.6737, -0.2755, 0.4553]) tensor([0.5029, 0.0891, 0.1326, 0.2754]) -Greedy action tensor([ 1.0346, -0.6946, -0.2238, 0.4968]) tensor([0.4889, 0.0867, 0.1389, 0.2855]) -Greedy action tensor([ 1.3449, -0.5417, -0.3508, 0.2288]) tensor([0.6014, 0.0912, 0.1104, 0.1970]) -Greedy action tensor([ 1.4010, -0.3108, -0.6507, -0.0566]) tensor([0.6486, 0.1171, 0.0834, 0.1510]) -Greedy action tensor([ 0.7714, -0.3309, -0.2436, 0.1024]) tensor([0.4532, 0.1505, 0.1642, 0.2321]) -Greedy action tensor([ 1.0358, -0.3022, -0.1355, -0.0233]) tensor([0.5211, 0.1367, 0.1615, 0.1807]) -Greedy action tensor([ 0.8233, 0.0510, -0.2328, -0.2162]) tensor([0.4622, 0.2135, 0.1608, 0.1635]) -Greedy action tensor([ 1.3968, -0.5940, -0.4620, 0.3699]) tensor([0.6059, 0.0828, 0.0944, 0.2170]) -Greedy action tensor([ 1.4791, -0.9814, -0.4589, 0.0655]) tensor([0.6790, 0.0580, 0.0978, 0.1652]) -Greedy action tensor([ 1.0716, -0.2695, -0.1761, -0.4581]) tensor([0.5665, 0.1482, 0.1627, 0.1227]) -Greedy action tensor([ 0.8616, -0.1439, -0.1384, 0.1847]) tensor([0.4461, 0.1632, 0.1641, 0.2267]) -Greedy action tensor([ 0.6589, -0.1922, -0.5115, 0.1589]) tensor([0.4267, 0.1822, 0.1324, 0.2588]) -Greedy action tensor([ 0.7843, -0.2627, -0.0198, -0.0735]) tensor([0.4499, 0.1579, 0.2013, 0.1908]) -Greedy action tensor([ 0.9290, -0.6498, -0.1260, 0.1696]) tensor([0.4945, 0.1020, 0.1722, 0.2314]) -Greedy action tensor([ 1.6251, -0.8374, 0.0380, 0.1353]) tensor([0.6600, 0.0562, 0.1350, 0.1488]) -Greedy action tensor([ 1.2465, 0.0520, -0.0315, 0.0379]) tensor([0.5319, 0.1611, 0.1482, 0.1588]) -Greedy action tensor([ 1.1690, -0.2159, -0.3320, 0.1779]) tensor([0.5422, 0.1357, 0.1208, 0.2012]) -Greedy action tensor([ 1.0455, -0.6287, -0.3238, 0.6338]) tensor([0.4752, 0.0891, 0.1208, 0.3149]) -Greedy action tensor([ 0.9796, -0.5689, -0.2715, 0.0049]) tensor([0.5330, 0.1133, 0.1525, 0.2011]) -Greedy action tensor([-1.8829, -0.4937, 0.7090, -0.0894]) tensor([0.0410, 0.1646, 0.5479, 0.2466]) -Greedy action tensor([-1.8368, -0.4554, 0.6838, 0.0724]) tensor([0.0414, 0.1647, 0.5146, 0.2792]) -Greedy action tensor([-1.8640, -0.8735, 0.0221, -0.3605]) tensor([0.0676, 0.1821, 0.4460, 0.3042]) -Greedy action tensor([-1.8336, -0.8674, 0.5584, -0.1912]) tensor([0.0507, 0.1332, 0.5542, 0.2619]) -Greedy action tensor([-0.9853, -0.5860, 0.2115, 0.3421]) tensor([0.1045, 0.1558, 0.3458, 0.3940]) -Greedy action tensor([-1.7899, -0.4764, 0.6646, -0.3085]) tensor([0.0482, 0.1792, 0.5608, 0.2119]) -Greedy action tensor([-1.8705, -0.4723, 0.6654, -0.0798]) tensor([0.0422, 0.1710, 0.5335, 0.2532]) -Greedy action tensor([-1.2892, -0.6616, 0.3398, 0.9788]) tensor([0.0567, 0.1062, 0.2892, 0.5479]) -Greedy action tensor([-1.7409, -1.0297, 0.0128, -0.7185]) tensor([0.0863, 0.1757, 0.4983, 0.2398]) -Greedy action tensor([-0.5922, -0.3320, 1.1319, 1.5547]) tensor([0.0607, 0.0788, 0.3406, 0.5198]) -Greedy action tensor([-1.6705, -0.4584, 0.5262, -0.0482]) tensor([0.0543, 0.1824, 0.4883, 0.2749]) -Greedy action tensor([-1.0744, -0.6789, 0.2720, -0.2836]) tensor([0.1172, 0.1740, 0.4504, 0.2584]) -Greedy action tensor([-2.0007, -0.6597, 1.0741, 0.4387]) tensor([0.0264, 0.1008, 0.5706, 0.3023]) -Greedy action tensor([-1.9771, -0.6529, 1.1199, 0.3820]) tensor([0.0267, 0.1003, 0.5906, 0.2824]) -Greedy action tensor([-1.3548, -0.8185, -0.3159, -0.5999]) tensor([0.1305, 0.2231, 0.3688, 0.2776]) -Greedy action tensor([-1.4335, -0.4076, 0.7725, 0.9311]) tensor([0.0425, 0.1187, 0.3862, 0.4526]) -Greedy action tensor([-1.2664, -0.9016, 0.6938, -0.3005]) tensor([0.0822, 0.1184, 0.5835, 0.2159]) -Greedy action tensor([-1.2744, -0.4472, 1.1979, 1.2516]) tensor([0.0362, 0.0827, 0.4287, 0.4524]) -Greedy action tensor([-1.7481, -0.4682, 0.5740, -0.0583]) tensor([0.0495, 0.1779, 0.5045, 0.2681]) -Greedy action tensor([-1.4587, -0.7087, 0.8786, 0.8170]) tensor([0.0431, 0.0912, 0.4461, 0.4195]) -Greedy action tensor([-1.4424, -0.7510, -0.2955, -0.4306]) tensor([0.1124, 0.2244, 0.3539, 0.3092]) -Greedy action tensor([-1.8978, -0.6943, 0.8841, 0.1879]) tensor([0.0350, 0.1168, 0.5660, 0.2822]) -Greedy action tensor([-1.8107, -0.4684, 0.6522, 0.0425]) tensor([0.0436, 0.1668, 0.5116, 0.2781]) -Greedy action tensor([-1.8940, -0.4510, 0.6408, -0.1524]) tensor([0.0425, 0.1797, 0.5355, 0.2423]) -Greedy action tensor([-1.4785, -0.3556, 0.9499, 0.8955]) tensor([0.0382, 0.1175, 0.4336, 0.4107]) -Greedy action tensor([-1.2240, -0.5974, 0.4183, 0.0320]) tensor([0.0866, 0.1620, 0.4474, 0.3040]) -Greedy action tensor([-1.2272, -0.3551, 0.5516, 0.9090]) tensor([0.0562, 0.1345, 0.3331, 0.4762]) -Greedy action tensor([-1.3616, -0.5656, 0.3537, 0.2044]) tensor([0.0737, 0.1634, 0.4098, 0.3530]) -Greedy action tensor([-1.9582, -0.6496, 0.8297, 0.0410]) tensor([0.0353, 0.1306, 0.5735, 0.2606]) -Greedy action tensor([-0.9844, -0.3569, 0.0492, 0.0796]) tensor([0.1165, 0.2182, 0.3276, 0.3377]) -Greedy action tensor([-2.0283, -0.7472, 1.3064, 0.6591]) tensor([0.0211, 0.0760, 0.5927, 0.3102]) -Greedy action tensor([-1.7002, -0.4866, 0.6857, 0.2001]) tensor([0.0456, 0.1535, 0.4958, 0.3051]) -Greedy action tensor([-1.5622, -0.4023, 0.4468, 0.1016]) tensor([0.0591, 0.1885, 0.4405, 0.3119]) -Greedy action tensor([-1.7267, -0.4894, 0.5803, -0.0317]) tensor([0.0502, 0.1729, 0.5038, 0.2732]) -Greedy action tensor([-0.2915, -0.0951, 1.0329, 1.5486]) tensor([0.0815, 0.0992, 0.3063, 0.5131]) -Greedy action tensor([-1.0460, -0.5788, 0.2428, 0.3794]) tensor([0.0963, 0.1537, 0.3494, 0.4006]) -Greedy action tensor([-0.4593, -0.5207, 0.2174, 0.2534]) tensor([0.1681, 0.1581, 0.3308, 0.3429]) -Greedy action tensor([-1.4625, -0.5125, 0.4545, 0.2570]) tensor([0.0626, 0.1619, 0.4259, 0.3496]) -Greedy action tensor([-1.5700, -0.5280, 0.5974, -0.4166]) tensor([0.0635, 0.1801, 0.5550, 0.2013]) -Greedy action tensor([-1.0397, -0.5668, 0.2276, 0.3613]) tensor([0.0979, 0.1571, 0.3477, 0.3974]) -Greedy action tensor([-1.6531, -0.2686, 0.5170, 0.0627]) tensor([0.0518, 0.2067, 0.4535, 0.2879]) -Greedy action tensor([-1.4717, 0.0140, 0.7588, -0.7636]) tensor([0.0597, 0.2637, 0.5554, 0.1212]) -Greedy action tensor([-1.6419, -1.0615, -0.0456, -0.8505]) tensor([0.1007, 0.1800, 0.4971, 0.2222]) -Greedy action tensor([-1.7695, -0.6287, 0.4295, -0.2553]) tensor([0.0565, 0.1769, 0.5096, 0.2570]) -Greedy action tensor([-0.8290, -0.7900, 0.6671, -0.1945]) tensor([0.1192, 0.1239, 0.5321, 0.2248]) -Greedy action tensor([-1.2974, -0.8023, 0.3834, 0.6066]) tensor([0.0679, 0.1114, 0.3647, 0.4559]) -Greedy action tensor([-1.2387, -0.5788, 0.8022, 1.1212]) tensor([0.0471, 0.0912, 0.3627, 0.4990]) -Greedy action tensor([-1.5152, -0.3697, 0.6357, 0.4938]) tensor([0.0495, 0.1557, 0.4255, 0.3692]) -Greedy action tensor([-1.9029, -0.8584, 0.2266, -0.2077]) tensor([0.0565, 0.1606, 0.4752, 0.3078]) -Greedy action tensor([-1.8789, -0.5219, 0.2644, -0.2072]) tensor([0.0534, 0.2074, 0.4552, 0.2841]) -Greedy action tensor([-1.9440, -0.4520, 0.6651, -0.1804]) tensor([0.0402, 0.1788, 0.5464, 0.2346]) -Greedy action tensor([-0.9998, -0.4016, 1.1165, 1.3958]) tensor([0.0453, 0.0823, 0.3757, 0.4967]) -Greedy action tensor([-0.4397, 0.5142, 0.0933, 0.1332]) tensor([0.1414, 0.3670, 0.2409, 0.2507]) -Greedy action tensor([-1.2773, -0.6174, 0.6750, 0.9209]) tensor([0.0527, 0.1019, 0.3710, 0.4744]) -Greedy action tensor([-0.6925, 0.1103, 0.2424, 0.3039]) tensor([0.1178, 0.2630, 0.3001, 0.3191]) -Greedy action tensor([-0.6042, -0.5554, 0.3138, 0.4299]) tensor([0.1357, 0.1425, 0.3399, 0.3818]) -Greedy action tensor([-2.0760, -0.8321, 1.2668, 0.6112]) tensor([0.0211, 0.0731, 0.5963, 0.3096]) -Greedy action tensor([-1.3344, -0.5550, 0.5864, -0.1978]) tensor([0.0762, 0.1661, 0.5202, 0.2375]) -Greedy action tensor([-1.4789, -0.4189, 0.9014, 0.9761]) tensor([0.0380, 0.1096, 0.4103, 0.4421]) -Greedy action tensor([-1.3299, -0.3943, 1.0411, 1.1398]) tensor([0.0383, 0.0977, 0.4106, 0.4533]) -Greedy action tensor([-1.2596, -0.2517, 0.3154, 0.1053]) tensor([0.0801, 0.2194, 0.3869, 0.3136]) -Greedy action tensor([-1.4294, -0.5209, 0.4110, 0.0589]) tensor([0.0704, 0.1746, 0.4433, 0.3117]) -Greedy action tensor([-1.9326, -0.4607, 0.6700, -0.1659]) tensor([0.0405, 0.1764, 0.5463, 0.2368]) -Greedy action tensor([-1.2978, -0.3504, 0.5231, 0.5658]) tensor([0.0617, 0.1592, 0.3813, 0.3979]) -Greedy action tensor([-1.8911, -0.5993, 0.4137, -0.2163]) tensor([0.0500, 0.1820, 0.5011, 0.2669]) -Greedy action tensor([-0.8748, -0.8986, 0.6029, -0.4258]) tensor([0.1262, 0.1232, 0.5530, 0.1977]) -Greedy action tensor([-0.2945, -0.4993, 0.3283, 0.3112]) tensor([0.1814, 0.1478, 0.3382, 0.3325]) -Greedy action tensor([-0.6701, 0.2138, 0.1002, 0.0989]) tensor([0.1292, 0.3128, 0.2792, 0.2788]) -Greedy action tensor([-0.8432, -0.4755, 1.0099, 1.5850]) tensor([0.0496, 0.0716, 0.3164, 0.5623]) -Greedy action tensor([-0.4197, 0.0595, 0.2263, 0.3451]) tensor([0.1499, 0.2421, 0.2860, 0.3221]) -Greedy action tensor([-1.4098, -0.4181, 0.4340, 0.2617]) tensor([0.0652, 0.1758, 0.4121, 0.3469]) -Greedy action tensor([-1.4720, -0.5982, 0.6099, 0.4258]) tensor([0.0553, 0.1325, 0.4434, 0.3688]) -Greedy action tensor([-1.4354, -0.5377, 0.4178, 0.1262]) tensor([0.0685, 0.1681, 0.4370, 0.3265]) -Greedy action tensor([-1.8418, -0.6992, 0.0585, -0.3451]) tensor([0.0654, 0.2050, 0.4374, 0.2922]) -Greedy action tensor([-1.8470, -0.4698, 0.6881, -0.0186]) tensor([0.0420, 0.1665, 0.5300, 0.2614]) -Greedy action tensor([-1.6988, -0.4362, 0.5359, -0.0584]) tensor([0.0525, 0.1857, 0.4909, 0.2709]) -Greedy action tensor([-0.9976, -0.6112, 0.6683, -0.3960]) tensor([0.1043, 0.1535, 0.5518, 0.1904]) -Greedy action tensor([-0.8355, 0.2620, 0.3224, -0.3644]) tensor([0.1139, 0.3412, 0.3625, 0.1824]) -Greedy action tensor([-1.2274, -0.4330, 0.3898, 0.2884]) tensor([0.0781, 0.1728, 0.3935, 0.3556]) -Greedy action tensor([-1.4817, -0.5775, 1.0356, 0.8566]) tensor([0.0381, 0.0942, 0.4726, 0.3951]) -Greedy action tensor([-0.9692, 0.1203, 0.3305, -0.1921]) tensor([0.1019, 0.3028, 0.3737, 0.2216]) -Greedy action tensor([ 0.9336, 0.0024, -0.1644, 0.8950]) tensor([0.3718, 0.1465, 0.1240, 0.3577]) -Greedy action tensor([ 0.2249, -0.8656, 0.2877, 0.1053]) tensor([0.3041, 0.1022, 0.3238, 0.2698]) -Greedy action tensor([0.8819, 0.3123, 0.8852, 0.7174]) tensor([0.2926, 0.1656, 0.2936, 0.2482]) -Greedy action tensor([ 0.9642, 0.5361, 1.2150, -0.0230]) tensor([0.3022, 0.1969, 0.3883, 0.1126]) -Greedy action tensor([ 0.1249, 0.0031, 0.1576, -0.4071]) tensor([0.2852, 0.2525, 0.2947, 0.1675]) -Greedy action tensor([ 0.0598, -0.8717, 1.2555, 0.1913]) tensor([0.1712, 0.0675, 0.5660, 0.1953]) -Greedy action tensor([-0.0680, 0.2197, 0.2634, -0.0663]) tensor([0.2115, 0.2820, 0.2946, 0.2119]) -Greedy action tensor([ 0.4140, -1.0220, -0.9500, -0.2207]) tensor([0.4942, 0.1175, 0.1263, 0.2620]) -Greedy action tensor([0.8752, 0.0024, 0.4385, 0.8980]) tensor([0.3239, 0.1353, 0.2093, 0.3314]) -Greedy action tensor([ 0.5122, -0.3177, -0.0363, 0.3517]) tensor([0.3490, 0.1522, 0.2016, 0.2972]) -Greedy action tensor([0.4805, 0.4656, 0.6465, 1.3418]) tensor([0.1808, 0.1781, 0.2134, 0.4277]) -Greedy action tensor([ 1.4160, -1.0738, 0.5922, 0.5598]) tensor([0.5138, 0.0426, 0.2254, 0.2182]) -Greedy action tensor([ 0.3382, -0.0571, -1.0786, 0.7085]) tensor([0.2973, 0.2002, 0.0721, 0.4305]) -Greedy action tensor([-0.4252, -0.2014, -1.0895, -0.2883]) tensor([0.2556, 0.3197, 0.1315, 0.2931]) -Greedy action tensor([ 0.1990, -0.8173, -0.1188, 1.6543]) tensor([0.1568, 0.0568, 0.1141, 0.6722]) -Greedy action tensor([-0.1349, -1.5912, -0.6900, 0.5182]) tensor([0.2682, 0.0625, 0.1539, 0.5153]) -Greedy action tensor([ 1.2176, -0.2797, 0.8396, 0.5547]) tensor([0.4125, 0.0923, 0.2827, 0.2126]) -Greedy action tensor([ 1.1124, -0.9110, 1.4772, 0.5833]) tensor([0.3163, 0.0418, 0.4555, 0.1863]) -Greedy action tensor([ 0.6126, -0.0729, 0.4717, -0.2390]) tensor([0.3573, 0.1800, 0.3103, 0.1525]) -Greedy action tensor([ 0.2764, -0.9854, 1.4146, 0.9605]) tensor([0.1566, 0.0443, 0.4887, 0.3104]) -Greedy action tensor([ 0.8038, 0.3431, 0.7799, -0.2084]) tensor([0.3366, 0.2124, 0.3287, 0.1223]) -Greedy action tensor([-0.2365, -0.2318, -0.0270, 0.2295]) tensor([0.2070, 0.2080, 0.2552, 0.3298]) -Greedy action tensor([ 0.7806, -0.9128, 1.1317, 1.0047]) tensor([0.2593, 0.0477, 0.3684, 0.3245]) -Greedy action tensor([ 0.9798, 0.1988, -0.1116, 0.1598]) tensor([0.4476, 0.2050, 0.1503, 0.1971]) -Greedy action tensor([ 1.3701, -1.4208, -0.5938, 1.5010]) tensor([0.4271, 0.0262, 0.0599, 0.4868]) -Greedy action tensor([ 0.3276, 0.0547, 0.3098, -0.3630]) tensor([0.3082, 0.2346, 0.3027, 0.1545]) -Greedy action tensor([ 0.5072, 0.4132, -0.2998, 0.7895]) tensor([0.2715, 0.2472, 0.1212, 0.3601]) -Greedy action tensor([ 1.8232, -0.1686, 1.2398, 0.5098]) tensor([0.5093, 0.0695, 0.2842, 0.1370]) -Greedy action tensor([ 0.1657, -1.2186, -0.1391, -0.4464]) tensor([0.3953, 0.0990, 0.2914, 0.2143]) -Greedy action tensor([ 1.6499, -0.5037, 1.5888, 0.8676]) tensor([0.3978, 0.0462, 0.3742, 0.1819]) -Greedy action tensor([-0.4900, -0.5814, -0.2043, -0.8684]) tensor([0.2546, 0.2323, 0.3387, 0.1744]) -Greedy action tensor([ 0.6898, 0.3270, -0.6914, 1.8905]) tensor([0.1898, 0.1320, 0.0477, 0.6305]) -Greedy action tensor([ 1.0915, -0.4775, 0.9593, 0.5897]) tensor([0.3718, 0.0774, 0.3257, 0.2251]) -Greedy action tensor([-0.1882, 1.4263, 0.2667, -0.4839]) tensor([0.1198, 0.6022, 0.1888, 0.0891]) -Greedy action tensor([ 1.3993, -1.0351, 0.4069, 1.4620]) tensor([0.3964, 0.0347, 0.1469, 0.4220]) -Greedy action tensor([ 0.9400, -0.6336, 0.6223, 0.4343]) tensor([0.3940, 0.0817, 0.2867, 0.2376]) -Greedy action tensor([ 1.1250, -0.0898, 1.5466, 0.5953]) tensor([0.2933, 0.0870, 0.4470, 0.1727]) -Greedy action tensor([ 1.1045, 0.8147, -0.1135, 0.4867]) tensor([0.3871, 0.2897, 0.1145, 0.2087]) -Greedy action tensor([ 0.7658, 0.3344, -0.4445, 0.6048]) tensor([0.3573, 0.2321, 0.1065, 0.3041]) -Greedy action tensor([ 1.6377, -0.3009, 1.4243, 0.0984]) tensor([0.4616, 0.0664, 0.3729, 0.0990]) -Greedy action tensor([ 0.1733, -0.0351, -0.1709, 1.3480]) tensor([0.1737, 0.1410, 0.1231, 0.5622]) -Greedy action tensor([0.3998, 0.3914, 0.2763, 0.0715]) tensor([0.2781, 0.2758, 0.2458, 0.2003]) -Greedy action tensor([-0.4009, -1.9753, -0.6486, 0.4206]) tensor([0.2346, 0.0486, 0.1832, 0.5336]) -Greedy action tensor([ 0.6532, 0.1509, 0.9024, -0.4009]) tensor([0.3090, 0.1870, 0.3964, 0.1077]) -Greedy action tensor([ 1.0060, -0.1685, 0.8453, 0.7015]) tensor([0.3451, 0.1066, 0.2938, 0.2545]) -Greedy action tensor([0.6408, 0.5193, 0.1002, 0.3716]) tensor([0.3094, 0.2740, 0.1802, 0.2364]) -Greedy action tensor([ 0.1016, 0.2392, -0.7991, 1.1691]) tensor([0.1831, 0.2101, 0.0744, 0.5324]) -Greedy action tensor([ 1.0223, -0.4656, 0.9778, 1.0944]) tensor([0.3070, 0.0693, 0.2937, 0.3300]) -Greedy action tensor([ 0.7445, -0.0055, 1.0870, -0.1105]) tensor([0.3025, 0.1429, 0.4260, 0.1286]) -Greedy action tensor([ 0.8021, -0.5143, -0.2935, -0.2124]) tensor([0.5089, 0.1364, 0.1701, 0.1845]) -Greedy action tensor([-0.6283, 0.8316, 0.3259, -0.3627]) tensor([0.1086, 0.4677, 0.2820, 0.1417]) -Greedy action tensor([1.2778, 0.9714, 1.1092, 0.6277]) tensor([0.3223, 0.2372, 0.2723, 0.1682]) -Greedy action tensor([ 1.2253, -1.4133, 0.0763, 0.6769]) tensor([0.5086, 0.0363, 0.1612, 0.2939]) -Greedy action tensor([ 0.9044, -0.1009, 0.1458, 0.6982]) tensor([0.3777, 0.1382, 0.1769, 0.3073]) -Greedy action tensor([ 1.4660, -0.9899, 0.3450, 1.4227]) tensor([0.4220, 0.0362, 0.1376, 0.4042]) -Greedy action tensor([ 0.8635, 1.0523, -0.2936, -0.0210]) tensor([0.3407, 0.4115, 0.1071, 0.1407]) -Greedy action tensor([-0.5175, 0.0343, 1.7229, -0.5912]) tensor([0.0766, 0.1329, 0.7194, 0.0711]) -Greedy action tensor([ 0.3600, -0.5348, 0.5243, 0.4651]) tensor([0.2704, 0.1105, 0.3187, 0.3004]) -Greedy action tensor([ 0.7179, -0.9085, -0.4629, 0.9478]) tensor([0.3620, 0.0712, 0.1112, 0.4556]) -Greedy action tensor([ 0.5350, -0.8291, 0.1537, -0.2653]) tensor([0.4188, 0.1070, 0.2860, 0.1881]) -Greedy action tensor([1.2810, 1.4472, 0.6374, 0.2271]) tensor([0.3274, 0.3865, 0.1720, 0.1141]) -Greedy action tensor([ 0.9630, -0.1892, -0.2552, 0.7660]) tensor([0.4110, 0.1299, 0.1216, 0.3375]) -Greedy action tensor([-0.2817, -0.2448, -0.7459, 1.0598]) tensor([0.1541, 0.1598, 0.0968, 0.5893]) -Greedy action tensor([-0.6316, -0.3495, -1.0928, 0.6645]) tensor([0.1513, 0.2006, 0.0954, 0.5528]) -Greedy action tensor([ 0.5055, 1.8331, -0.2035, 0.2851]) tensor([0.1648, 0.6218, 0.0811, 0.1322]) -Greedy action tensor([-0.0155, 0.2426, 0.2139, 0.2233]) tensor([0.2074, 0.2684, 0.2609, 0.2633]) -Greedy action tensor([ 0.4564, 0.3846, -0.2795, -0.0485]) tensor([0.3319, 0.3089, 0.1590, 0.2003]) -Greedy action tensor([ 1.3527, -0.0322, -0.5212, 0.7775]) tensor([0.5085, 0.1273, 0.0781, 0.2861]) -Greedy action tensor([0.4018, 0.4607, 0.0140, 1.6452]) tensor([0.1611, 0.1709, 0.1093, 0.5587]) -Greedy action tensor([-0.8232, -0.8603, -0.8259, 0.6554]) tensor([0.1361, 0.1311, 0.1357, 0.5970]) -Greedy action tensor([ 0.9333, -0.6440, 0.0663, 1.3165]) tensor([0.3232, 0.0668, 0.1358, 0.4742]) -Greedy action tensor([ 0.7437, 0.8479, -1.3981, 0.3596]) tensor([0.3438, 0.3816, 0.0404, 0.2342]) -Greedy action tensor([ 0.2620, -0.6919, -0.7510, 2.0230]) tensor([0.1322, 0.0509, 0.0480, 0.7689]) -Greedy action tensor([-0.1278, -0.0230, 1.0732, 0.3864]) tensor([0.1407, 0.1563, 0.4677, 0.2353]) -Greedy action tensor([1.1747, 0.1915, 0.5250, 0.6654]) tensor([0.4004, 0.1498, 0.2091, 0.2406]) -Greedy action tensor([ 0.1784, -0.0478, 1.3691, -0.0267]) tensor([0.1694, 0.1351, 0.5574, 0.1380]) -Greedy action tensor([ 0.8562, -0.8499, 1.1136, -0.0472]) tensor([0.3472, 0.0630, 0.4491, 0.1407]) -Greedy action tensor([-0.2326, -1.4637, 1.8475, -0.0599]) tensor([0.0954, 0.0278, 0.7635, 0.1133]) -Greedy action tensor([ 1.2976, -1.1110, 1.0449, -0.2991]) tensor([0.4833, 0.0435, 0.3753, 0.0979]) -Greedy action tensor([-0.1172, -0.5710, -0.7101, -0.0310]) tensor([0.3051, 0.1938, 0.1686, 0.3325]) -Greedy action tensor([ 1.0584, -1.4435, 1.0463, 0.8068]) tensor([0.3512, 0.0288, 0.3470, 0.2731]) -Greedy action tensor([ 1.0131, 0.2158, 0.1486, -0.1531]) tensor([0.4580, 0.2064, 0.1929, 0.1427]) -Greedy action tensor([ 0.8220, -0.6563, 0.1631, -0.8259]) tensor([0.5160, 0.1177, 0.2670, 0.0993]) -Greedy action tensor([ 0.7384, -0.2145, -0.0717, -0.2687]) tensor([0.4554, 0.1756, 0.2026, 0.1664]) -Greedy action tensor([ 0.7378, 0.1392, -0.0182, -0.6668]) tensor([0.4416, 0.2427, 0.2073, 0.1084]) -Greedy action tensor([ 0.6697, -0.3518, -0.1545, -0.2875]) tensor([0.4582, 0.1650, 0.2009, 0.1759]) -Greedy action tensor([ 0.5255, 0.2570, -0.1577, -0.2749]) tensor([0.3678, 0.2812, 0.1858, 0.1652]) -Greedy action tensor([ 0.5946, -0.2825, -0.0018, -0.2955]) tensor([0.4206, 0.1750, 0.2317, 0.1727]) -Greedy action tensor([ 0.7467, -0.4287, 0.2297, -0.4920]) tensor([0.4556, 0.1406, 0.2717, 0.1320]) -Greedy action tensor([ 0.9573, -0.3712, 0.0714, -0.4288]) tensor([0.5189, 0.1374, 0.2140, 0.1297]) -Greedy action tensor([ 0.6681, -0.4914, 0.2489, -0.5168]) tensor([0.4392, 0.1377, 0.2888, 0.1343]) -Greedy action tensor([ 0.7461, -0.4358, 0.0240, -0.4006]) tensor([0.4739, 0.1453, 0.2302, 0.1506]) -Greedy action tensor([ 0.3208, -0.0042, -0.0239, -0.4594]) tensor([0.3461, 0.2501, 0.2452, 0.1586]) -Greedy action tensor([ 0.4450, -0.2381, -0.1065, -0.3568]) tensor([0.3953, 0.1996, 0.2277, 0.1773]) -Greedy action tensor([ 0.6621, -0.2004, -0.0249, -0.3706]) tensor([0.4384, 0.1850, 0.2205, 0.1561]) -Greedy action tensor([ 0.5408, 0.0023, -0.0115, -0.1151]) tensor([0.3734, 0.2179, 0.2149, 0.1938]) -Greedy action tensor([ 1.0532, -0.2667, 0.0034, -0.5412]) tensor([0.5494, 0.1468, 0.1923, 0.1115]) -Greedy action tensor([ 0.4155, 0.1493, 0.0296, -0.0864]) tensor([0.3277, 0.2511, 0.2228, 0.1984]) -Greedy action tensor([ 0.9952, -0.5305, -0.0426, -0.5314]) tensor([0.5590, 0.1216, 0.1980, 0.1215]) -Greedy action tensor([ 0.5635, -0.4791, -0.0177, -0.5055]) tensor([0.4434, 0.1563, 0.2480, 0.1523]) -Greedy action tensor([ 1.0549, -0.7345, -0.1346, -0.5495]) tensor([0.5979, 0.0999, 0.1820, 0.1202]) -Greedy action tensor([ 0.7202, -0.4168, 0.0156, -0.1271]) tensor([0.4457, 0.1430, 0.2203, 0.1910]) -Greedy action tensor([ 1.2365, -1.0330, -0.0261, -0.5637]) tensor([0.6445, 0.0666, 0.1823, 0.1065]) -Greedy action tensor([ 0.5024, -0.2453, 0.0021, -0.4870]) tensor([0.4079, 0.1931, 0.2473, 0.1516]) -Greedy action tensor([ 0.4368, -0.2248, -0.0073, -0.3414]) tensor([0.3822, 0.1972, 0.2451, 0.1755]) -Greedy action tensor([ 0.3180, -0.3083, 0.1943, -0.2864]) tensor([0.3373, 0.1803, 0.2981, 0.1843]) -Greedy action tensor([ 0.5618, -0.2216, 0.1564, -0.5527]) tensor([0.4079, 0.1863, 0.2719, 0.1338]) -Greedy action tensor([ 0.4882, -0.1968, 0.0254, -0.4397]) tensor([0.3954, 0.1993, 0.2489, 0.1563]) -Greedy action tensor([ 0.4580, -0.0213, 0.0884, -0.2211]) tensor([0.3550, 0.2198, 0.2453, 0.1800]) -Greedy action tensor([ 0.7012, -0.4393, -0.0199, -0.3293]) tensor([0.4624, 0.1478, 0.2248, 0.1650]) -Greedy action tensor([ 0.3469, 0.1178, 0.0450, -0.1681]) tensor([0.3193, 0.2539, 0.2361, 0.1908]) -Greedy action tensor([ 0.7061, -0.4195, 0.0593, -0.3356]) tensor([0.4543, 0.1474, 0.2379, 0.1603]) -Greedy action tensor([ 0.3866, 0.0770, -0.0662, -0.0071]) tensor([0.3285, 0.2410, 0.2089, 0.2216]) -Greedy action tensor([ 0.3286, 0.2097, 0.1131, -0.2134]) tensor([0.3053, 0.2711, 0.2461, 0.1775]) -Greedy action tensor([ 0.2093, -0.0060, -0.0753, -0.1904]) tensor([0.3097, 0.2497, 0.2330, 0.2076]) -Greedy action tensor([ 0.4586, -0.2446, -0.0515, -0.2973]) tensor([0.3899, 0.1930, 0.2341, 0.1831]) -Greedy action tensor([ 0.4732, -0.4362, 0.1025, -0.5690]) tensor([0.4089, 0.1647, 0.2822, 0.1442]) -Greedy action tensor([ 0.8360, -0.6650, -0.0816, -0.3527]) tensor([0.5189, 0.1157, 0.2073, 0.1581]) -Greedy action tensor([ 0.3025, 0.1709, 0.0173, -0.3005]) tensor([0.3149, 0.2761, 0.2368, 0.1723]) -Greedy action tensor([ 0.5935, -0.0093, -0.0665, -0.3768]) tensor([0.4093, 0.2240, 0.2116, 0.1551]) -Greedy action tensor([ 0.7817, -0.7163, -0.0442, -0.3027]) tensor([0.5001, 0.1118, 0.2190, 0.1691]) -Greedy action tensor([ 0.4421, -0.1110, 0.0733, -0.2242]) tensor([0.3597, 0.2069, 0.2487, 0.1847]) -Greedy action tensor([ 0.5834, -0.4229, 0.0248, -0.4209]) tensor([0.4341, 0.1587, 0.2483, 0.1590]) -Greedy action tensor([ 0.7177, -0.3092, 0.0714, -0.2899]) tensor([0.4450, 0.1594, 0.2332, 0.1625]) -Greedy action tensor([ 0.5041, -0.1850, -0.0055, -0.2218]) tensor([0.3866, 0.1941, 0.2322, 0.1871]) -Greedy action tensor([ 0.4669, 0.1154, -0.0878, -0.2424]) tensor([0.3610, 0.2540, 0.2073, 0.1776]) -Greedy action tensor([ 0.6271, -0.2332, -0.1171, -0.3591]) tensor([0.4403, 0.1863, 0.2092, 0.1642]) -Greedy action tensor([ 0.4575, -0.0570, 0.1007, -0.3986]) tensor([0.3673, 0.2196, 0.2571, 0.1560]) -Greedy action tensor([ 0.4765, -0.1877, -0.2861, -0.1756]) tensor([0.3997, 0.2057, 0.1864, 0.2082]) -Greedy action tensor([ 0.7548, -0.3367, 0.0281, -0.5347]) tensor([0.4774, 0.1603, 0.2308, 0.1315]) -Greedy action tensor([ 0.6766, -0.0999, 0.0162, -0.1373]) tensor([0.4133, 0.1901, 0.2135, 0.1831]) -Greedy action tensor([ 0.4190, 0.1038, -0.0834, -0.0964]) tensor([0.3411, 0.2489, 0.2064, 0.2037]) -Greedy action tensor([ 0.6199, -0.3352, 0.1401, -0.5533]) tensor([0.4323, 0.1663, 0.2676, 0.1338]) -Greedy action tensor([ 0.5146, -0.3010, -0.0732, -0.2789]) tensor([0.4081, 0.1805, 0.2267, 0.1846]) -Greedy action tensor([ 0.6010, -0.4085, -0.0125, -0.2791]) tensor([0.4309, 0.1570, 0.2333, 0.1787]) -Greedy action tensor([ 0.9427, -0.3402, -0.0436, -0.5175]) tensor([0.5313, 0.1473, 0.1981, 0.1234]) -Greedy action tensor([ 0.7241, -0.2585, -0.0409, -0.4623]) tensor([0.4662, 0.1745, 0.2169, 0.1423]) -Greedy action tensor([ 0.5337, -0.1417, 0.0456, -0.2234]) tensor([0.3858, 0.1964, 0.2368, 0.1810]) -Greedy action tensor([ 0.3991, 0.1303, 0.0547, -0.1991]) tensor([0.3308, 0.2529, 0.2344, 0.1819]) -Greedy action tensor([ 0.2165, -0.1544, 0.0803, -0.2608]) tensor([0.3141, 0.2168, 0.2741, 0.1949]) -Greedy action tensor([ 0.6075, -0.3176, -0.1223, -0.3043]) tensor([0.4385, 0.1739, 0.2114, 0.1762]) -Greedy action tensor([ 0.4402, -0.0810, 0.0575, -0.1907]) tensor([0.3561, 0.2115, 0.2429, 0.1895]) -Greedy action tensor([ 1.0418, -0.5998, 0.1985, -0.7326]) tensor([0.5576, 0.1080, 0.2399, 0.0946]) -Greedy action tensor([ 0.6985, -0.4176, 0.0071, -0.5088]) tensor([0.4700, 0.1540, 0.2354, 0.1406]) -Greedy action tensor([ 0.3151, 0.0955, -0.1172, 0.0237]) tensor([0.3126, 0.2510, 0.2029, 0.2336]) -Greedy action tensor([ 0.5484, -0.2929, -0.0803, -0.3499]) tensor([0.4217, 0.1818, 0.2248, 0.1717]) -Greedy action tensor([ 0.3343, -0.2656, 0.1745, -0.3399]) tensor([0.3436, 0.1886, 0.2928, 0.1751]) -Greedy action tensor([ 0.5996, -0.1226, -0.0339, -0.3332]) tensor([0.4150, 0.2015, 0.2202, 0.1633]) -Greedy action tensor([ 0.3963, 0.0860, -0.0244, -0.1692]) tensor([0.3381, 0.2479, 0.2220, 0.1921]) -Greedy action tensor([ 0.5327, -0.1451, 0.0643, -0.3078]) tensor([0.3898, 0.1979, 0.2440, 0.1682]) -Greedy action tensor([ 1.1884, -1.1335, 0.0662, -0.8883]) tensor([0.6456, 0.0633, 0.2102, 0.0809]) -Greedy action tensor([ 0.3179, 0.1604, 0.0561, -0.2209]) tensor([0.3118, 0.2663, 0.2400, 0.1819]) -Greedy action tensor([ 0.5189, -0.2904, -0.0809, -0.4056]) tensor([0.4183, 0.1862, 0.2296, 0.1659]) -Greedy action tensor([ 0.4715, -0.2721, 0.0017, -0.4029]) tensor([0.3972, 0.1888, 0.2483, 0.1657]) -Greedy action tensor([ 0.5434, -0.4460, 0.2274, -0.5188]) tensor([0.4087, 0.1520, 0.2980, 0.1413]) -Greedy action tensor([ 0.3602, -0.1355, -0.0925, -0.3591]) tensor([0.3660, 0.2230, 0.2327, 0.1783]) -Greedy action tensor([ 1.3710e+00, -1.2541e+00, -1.1851e-03, -6.8775e-01]) tensor([0.6880, 0.0498, 0.1744, 0.0878]) -Greedy action tensor([ 0.7297, -0.4480, 0.0348, -0.5088]) tensor([0.4769, 0.1469, 0.2380, 0.1382]) -Greedy action tensor([ 0.4608, 0.1481, -0.0470, -0.1155]) tensor([0.3454, 0.2526, 0.2079, 0.1941]) -Greedy action tensor([ 0.6145, -0.3463, -0.0566, -0.4269]) tensor([0.4451, 0.1703, 0.2275, 0.1571]) -Greedy action tensor([ 0.3523, -0.0894, -0.0476, -0.1874]) tensor([0.3453, 0.2220, 0.2315, 0.2013]) -Greedy action tensor([ 0.7088, -0.4407, -0.0307, -0.3856]) tensor([0.4697, 0.1488, 0.2242, 0.1572]) -Greedy action tensor([ 0.1570, 0.1492, 0.0672, -0.2583]) tensor([0.2804, 0.2782, 0.2563, 0.1851]) -Greedy action tensor([ 0.8080, -0.2326, -0.0953, -0.1498]) tensor([0.4668, 0.1649, 0.1892, 0.1791]) -Greedy action tensor([ 0.7517, -0.1005, -0.4537, -0.4438]) tensor([0.4929, 0.2102, 0.1477, 0.1491]) -Greedy action tensor([ 1.7391, -0.6539, -0.1566, -0.2464]) tensor([0.7252, 0.0663, 0.1089, 0.0996]) -Greedy action tensor([ 1.1009, -0.2716, -0.0408, -0.2061]) tensor([0.5425, 0.1375, 0.1732, 0.1468]) -Greedy action tensor([ 0.9049, -0.2687, -0.3903, 0.3587]) tensor([0.4625, 0.1430, 0.1266, 0.2678]) -Greedy action tensor([ 0.8135, -0.3417, -0.0713, 0.1118]) tensor([0.4497, 0.1417, 0.1856, 0.2229]) -Greedy action tensor([ 0.6651, -0.5267, -0.3422, 0.1449]) tensor([0.4418, 0.1342, 0.1614, 0.2626]) -Greedy action tensor([ 0.5705, -0.2375, -0.3472, -0.1154]) tensor([0.4257, 0.1898, 0.1701, 0.2144]) -Greedy action tensor([ 0.3125, -0.3600, -0.0284, 0.1087]) tensor([0.3293, 0.1681, 0.2341, 0.2685]) -Greedy action tensor([ 0.8838, -0.3928, 0.0315, -0.0907]) tensor([0.4801, 0.1339, 0.2047, 0.1812]) -Greedy action tensor([ 0.9889, -0.6376, -0.0924, 0.2039]) tensor([0.5020, 0.0987, 0.1703, 0.2290]) -Greedy action tensor([ 0.9531, -0.6545, -0.4463, 0.1334]) tensor([0.5297, 0.1061, 0.1307, 0.2334]) -Greedy action tensor([ 1.2238, -0.7077, -0.3727, 0.1458]) tensor([0.5925, 0.0859, 0.1200, 0.2016]) -Greedy action tensor([ 0.8639, -0.6380, -0.3330, 0.3547]) tensor([0.4704, 0.1048, 0.1421, 0.2827]) -Greedy action tensor([ 1.2147, -0.4334, -0.3105, -0.1029]) tensor([0.5960, 0.1147, 0.1297, 0.1596]) -Greedy action tensor([ 1.7006, -0.7764, -0.4136, -0.0489]) tensor([0.7254, 0.0609, 0.0876, 0.1261]) -Greedy action tensor([ 1.3630, -0.4880, -0.4063, 0.3230]) tensor([0.5949, 0.0934, 0.1014, 0.2103]) -Greedy action tensor([ 1.2318, -0.7388, -0.1894, 0.6400]) tensor([0.5170, 0.0721, 0.1248, 0.2861]) -Greedy action tensor([ 1.5233, -0.4658, -0.3095, 0.3171]) tensor([0.6265, 0.0857, 0.1002, 0.1875]) -Greedy action tensor([ 1.0401, -0.3774, -0.2634, 0.3239]) tensor([0.4994, 0.1210, 0.1356, 0.2440]) -Greedy action tensor([ 1.2757, -0.7582, -0.4606, 0.9763]) tensor([0.4882, 0.0639, 0.0860, 0.3619]) -Greedy action tensor([ 1.1646, -0.2999, -0.2029, -0.0156]) tensor([0.5577, 0.1289, 0.1421, 0.1713]) -Greedy action tensor([ 0.8313, -0.3779, 0.0574, -0.0804]) tensor([0.4626, 0.1381, 0.2134, 0.1859]) -Greedy action tensor([ 1.2787, -0.5019, 0.0040, 0.0550]) tensor([0.5740, 0.0967, 0.1604, 0.1688]) -Greedy action tensor([ 0.7683, -0.3393, -0.2396, -0.0171]) tensor([0.4648, 0.1536, 0.1697, 0.2119]) -Greedy action tensor([ 1.1053, -0.0657, -0.0804, -0.2333]) tensor([0.5325, 0.1651, 0.1627, 0.1396]) -Greedy action tensor([ 1.2486, -0.0726, -0.3173, 0.0948]) tensor([0.5583, 0.1490, 0.1166, 0.1761]) -Greedy action tensor([ 0.5669, -0.4481, -0.0055, -0.1301]) tensor([0.4124, 0.1495, 0.2327, 0.2054]) -Greedy action tensor([ 1.0059, -0.5206, -0.3118, 0.4577]) tensor([0.4847, 0.1053, 0.1298, 0.2802]) -Greedy action tensor([ 1.6512, -0.9229, -0.0915, 0.3832]) tensor([0.6525, 0.0497, 0.1142, 0.1836]) -Greedy action tensor([ 1.1268, -0.1233, -0.5877, -0.1550]) tensor([0.5734, 0.1643, 0.1032, 0.1591]) -Greedy action tensor([ 0.9667, -0.5218, -0.4705, 0.6952]) tensor([0.4493, 0.1014, 0.1068, 0.3425]) -Greedy action tensor([ 0.7717, -0.3445, 0.0501, 0.1511]) tensor([0.4253, 0.1393, 0.2067, 0.2287]) -Greedy action tensor([ 0.9179, -0.3668, 0.0538, 0.1486]) tensor([0.4626, 0.1280, 0.1950, 0.2144]) -Greedy action tensor([ 1.0493, -0.7352, -0.2914, 0.6107]) tensor([0.4820, 0.0809, 0.1261, 0.3109]) -Greedy action tensor([ 0.7679, -0.1302, -0.0332, -0.2576]) tensor([0.4515, 0.1839, 0.2026, 0.1619]) -Greedy action tensor([ 1.3436, -0.7487, -0.3688, 0.2141]) tensor([0.6146, 0.0758, 0.1109, 0.1986]) -Greedy action tensor([ 0.9493, -0.4701, -0.5739, 0.8058]) tensor([0.4299, 0.1040, 0.0937, 0.3724]) -Greedy action tensor([ 1.3385, -0.4737, -0.1118, 0.2201]) tensor([0.5799, 0.0947, 0.1360, 0.1895]) -Greedy action tensor([ 1.2622, -0.6419, -0.3839, 0.1236]) tensor([0.6017, 0.0896, 0.1160, 0.1927]) -Greedy action tensor([ 1.5662, -0.7019, -0.2098, -0.0871]) tensor([0.6830, 0.0707, 0.1156, 0.1307]) -Greedy action tensor([ 0.7695, -0.4411, -0.5635, 0.8030]) tensor([0.3852, 0.1148, 0.1016, 0.3984]) -Greedy action tensor([ 0.9409, -0.2501, -0.5447, 0.0064]) tensor([0.5200, 0.1580, 0.1177, 0.2042]) -Greedy action tensor([ 0.7090, -0.2798, -0.0621, -0.3374]) tensor([0.4575, 0.1702, 0.2116, 0.1607]) -Greedy action tensor([ 0.8302, -0.5957, -0.0868, 0.2004]) tensor([0.4603, 0.1106, 0.1840, 0.2452]) -Greedy action tensor([ 1.3246, -0.6202, -0.4454, 0.4393]) tensor([0.5794, 0.0829, 0.0987, 0.2391]) -Greedy action tensor([ 0.9590, -0.1794, 0.0742, -0.2315]) tensor([0.4909, 0.1572, 0.2026, 0.1493]) -Greedy action tensor([ 1.2499, -0.2774, -0.3231, -0.1155]) tensor([0.5953, 0.1293, 0.1235, 0.1520]) -Greedy action tensor([ 1.0745e+00, -4.9033e-01, -2.3098e-01, 3.3382e-04]) tensor([0.5489, 0.1148, 0.1488, 0.1875]) -Greedy action tensor([ 1.2614, -0.4017, -0.2512, 0.0046]) tensor([0.5902, 0.1119, 0.1300, 0.1679]) -Greedy action tensor([ 1.2449, -0.3159, -0.3326, -0.1583]) tensor([0.6016, 0.1263, 0.1242, 0.1479]) -Greedy action tensor([ 0.7902, -0.2755, -0.3682, -0.1881]) tensor([0.4915, 0.1693, 0.1543, 0.1848]) -Greedy action tensor([ 0.9368, -0.4139, -0.0639, -0.2416]) tensor([0.5169, 0.1339, 0.1900, 0.1591]) -Greedy action tensor([ 0.6778, 0.2284, -0.1739, -0.5657]) tensor([0.4250, 0.2711, 0.1813, 0.1226]) -Greedy action tensor([ 1.1377, -0.6689, -0.0869, 0.0103]) tensor([0.5612, 0.0922, 0.1649, 0.1818]) -Greedy action tensor([ 0.9301, -0.5842, -0.3763, 0.4595]) tensor([0.4727, 0.1040, 0.1280, 0.2953]) -Greedy action tensor([ 1.4727, -0.6350, -0.3081, 0.2634]) tensor([0.6296, 0.0765, 0.1061, 0.1879]) -Greedy action tensor([ 0.9114, -0.3106, -0.2514, -0.1849]) tensor([0.5151, 0.1518, 0.1610, 0.1721]) -Greedy action tensor([ 0.8848, -0.1970, -0.0410, -0.2007]) tensor([0.4824, 0.1635, 0.1911, 0.1629]) -Greedy action tensor([ 1.3493, -0.4999, -0.1456, 0.1480]) tensor([0.5944, 0.0935, 0.1333, 0.1788]) -Greedy action tensor([ 1.1963, -0.5647, -0.1691, -0.2246]) tensor([0.5993, 0.1030, 0.1530, 0.1447]) -Greedy action tensor([ 1.3271, -0.4678, -0.6019, 0.5878]) tensor([0.5590, 0.0929, 0.0812, 0.2669]) -Greedy action tensor([ 0.8503, -0.1336, -0.3662, -0.1472]) tensor([0.4905, 0.1834, 0.1453, 0.1809]) -Greedy action tensor([ 0.7065, -0.3188, 0.1448, -0.0948]) tensor([0.4206, 0.1509, 0.2398, 0.1887]) -Greedy action tensor([ 0.6615, -0.5401, -0.3614, 0.0989]) tensor([0.4484, 0.1348, 0.1612, 0.2555]) -Greedy action tensor([ 1.0780, -0.5415, -0.2089, 0.5172]) tensor([0.4890, 0.0968, 0.1350, 0.2791]) -Greedy action tensor([ 0.7684, -0.5244, 0.0044, -0.0481]) tensor([0.4582, 0.1258, 0.2135, 0.2025]) -Greedy action tensor([ 0.8644, -0.3134, -0.0870, 0.1206]) tensor([0.4609, 0.1419, 0.1780, 0.2191]) -Greedy action tensor([ 1.3854, -0.4534, -0.4735, 0.6288]) tensor([0.5605, 0.0891, 0.0874, 0.2630]) -Greedy action tensor([ 1.0107, -0.6889, -0.5126, 0.9848]) tensor([0.4210, 0.0769, 0.0918, 0.4103]) -Greedy action tensor([ 1.2933, -0.2261, -0.1971, 0.1211]) tensor([0.5702, 0.1248, 0.1284, 0.1766]) -Greedy action tensor([ 0.9213, 0.1072, 0.1076, -0.3109]) tensor([0.4591, 0.2034, 0.2035, 0.1339]) -Greedy action tensor([ 1.0013, -0.4476, -0.2646, 0.3865]) tensor([0.4860, 0.1141, 0.1371, 0.2628]) -Greedy action tensor([ 0.7901, -0.2178, -0.4000, 0.4480]) tensor([0.4202, 0.1534, 0.1278, 0.2985]) -Greedy action tensor([ 1.1439, -0.2717, -0.1765, 0.0088]) tensor([0.5461, 0.1326, 0.1458, 0.1755]) -Greedy action tensor([ 1.2523, -0.5702, -0.5249, 0.5480]) tensor([0.5479, 0.0886, 0.0927, 0.2709]) -Greedy action tensor([ 0.9029, -0.5597, -0.2332, 0.1690]) tensor([0.4920, 0.1139, 0.1580, 0.2361]) -Greedy action tensor([ 0.9320, -0.3330, -0.1509, 0.2725]) tensor([0.4677, 0.1320, 0.1584, 0.2419]) -Greedy action tensor([ 0.7925, -0.3645, -0.0415, -0.2933]) tensor([0.4793, 0.1507, 0.2082, 0.1618]) -Greedy action tensor([ 0.9059, -0.3849, -0.3221, 0.2794]) tensor([0.4756, 0.1308, 0.1393, 0.2542]) -Greedy action tensor([ 1.3110, -0.5687, -0.3312, 0.0335]) tensor([0.6154, 0.0939, 0.1191, 0.1715]) -Greedy action tensor([-1.5259, -0.4938, 0.4611, 0.1396]) tensor([0.0610, 0.1713, 0.4450, 0.3227]) -Greedy action tensor([-1.1938, -0.5896, 0.2748, 0.2492]) tensor([0.0877, 0.1604, 0.3808, 0.3711]) -Greedy action tensor([-1.4246, -0.4548, 0.4553, 0.3112]) tensor([0.0630, 0.1663, 0.4131, 0.3576]) -Greedy action tensor([-1.9604, -0.8530, 0.2854, -0.2124]) tensor([0.0520, 0.1575, 0.4916, 0.2988]) -Greedy action tensor([-1.0162, -0.1775, 0.5360, -0.5354]) tensor([0.1036, 0.2397, 0.4892, 0.1676]) -Greedy action tensor([-2.0520, -0.8569, 0.8970, 0.3325]) tensor([0.0292, 0.0965, 0.5574, 0.3169]) -Greedy action tensor([-1.4869, -0.4922, 0.6028, 0.5517]) tensor([0.0514, 0.1389, 0.4152, 0.3945]) -Greedy action tensor([-2.0342, -0.6420, 1.1180, 0.5190]) tensor([0.0242, 0.0975, 0.5668, 0.3114]) -Greedy action tensor([-1.9309, -0.5007, 1.3476, 0.7429]) tensor([0.0216, 0.0904, 0.5742, 0.3137]) -Greedy action tensor([-1.8534, -0.4888, 0.6338, -0.1114]) tensor([0.0441, 0.1728, 0.5310, 0.2520]) -Greedy action tensor([-1.8857, -0.4020, 0.9415, 0.4860]) tensor([0.0303, 0.1335, 0.5117, 0.3245]) -Greedy action tensor([-1.5667, -0.3523, -0.4010, -1.1704]) tensor([0.1103, 0.3717, 0.3540, 0.1640]) -Greedy action tensor([-0.6022, -0.4986, 0.2021, 0.0746]) tensor([0.1584, 0.1757, 0.3541, 0.3117]) -Greedy action tensor([-0.7524, 0.0171, -0.4615, -0.1665]) tensor([0.1589, 0.3430, 0.2126, 0.2855]) -Greedy action tensor([-1.3023, -0.3281, 1.0064, 1.0718]) tensor([0.0409, 0.1083, 0.4115, 0.4393]) -Greedy action tensor([-0.7447, 0.0040, 0.9606, 1.4930]) tensor([0.0556, 0.1175, 0.3059, 0.5210]) -Greedy action tensor([-1.8332, -0.4035, 0.7155, 0.1714]) tensor([0.0394, 0.1645, 0.5037, 0.2924]) -Greedy action tensor([-1.4190, -0.4772, 1.0776, 0.9727]) tensor([0.0375, 0.0963, 0.4558, 0.4104]) -Greedy action tensor([-1.0300, -0.5535, 0.3157, 0.6164]) tensor([0.0859, 0.1384, 0.3300, 0.4458]) -Greedy action tensor([-1.7551, -0.4898, 0.5874, -0.0546]) tensor([0.0490, 0.1735, 0.5094, 0.2681]) -Greedy action tensor([-0.5853, -0.2596, 0.8731, 1.5605]) tensor([0.0656, 0.0909, 0.2822, 0.5612]) -Greedy action tensor([-0.7890, -0.6238, 0.2880, 0.0355]) tensor([0.1352, 0.1595, 0.3970, 0.3084]) -Greedy action tensor([-1.0205, -0.4897, 0.9554, 1.3889]) tensor([0.0475, 0.0808, 0.3428, 0.5288]) -Greedy action tensor([-1.2608, -0.6012, 1.1822, 1.3355]) tensor([0.0359, 0.0694, 0.4131, 0.4815]) -Greedy action tensor([-1.5408, -0.5559, 0.6203, 0.2354]) tensor([0.0547, 0.1466, 0.4752, 0.3234]) -Greedy action tensor([-1.6011, -0.3968, 0.7896, 0.6689]) tensor([0.0401, 0.1337, 0.4380, 0.3882]) -Greedy action tensor([-1.3608, -0.5710, 0.3545, 0.2016]) tensor([0.0739, 0.1628, 0.4108, 0.3525]) -Greedy action tensor([-0.6216, -0.5830, 0.0365, 0.0385]) tensor([0.1693, 0.1760, 0.3270, 0.3277]) -Greedy action tensor([-1.2404, -0.5494, 0.2878, 0.2643]) tensor([0.0826, 0.1648, 0.3807, 0.3719]) -Greedy action tensor([-1.8707, -0.4712, 0.6327, -0.1274]) tensor([0.0435, 0.1763, 0.5316, 0.2486]) -Greedy action tensor([-0.9561, -0.6098, 0.2140, 0.2679]) tensor([0.1107, 0.1565, 0.3566, 0.3763]) -Greedy action tensor([-0.9781, -0.4482, 0.4450, -0.3237]) tensor([0.1140, 0.1936, 0.4731, 0.2193]) -Greedy action tensor([-1.9819, -0.6289, 0.7562, 0.1566]) tensor([0.0347, 0.1343, 0.5365, 0.2945]) -Greedy action tensor([-0.5904, -0.5226, 0.1999, 0.2031]) tensor([0.1542, 0.1650, 0.3398, 0.3410]) -Greedy action tensor([-0.9856, -0.0856, 1.1687, 1.2770]) tensor([0.0461, 0.1134, 0.3975, 0.4430]) -Greedy action tensor([-1.4638, -0.5643, 0.4350, 0.1835]) tensor([0.0652, 0.1604, 0.4356, 0.3388]) -Greedy action tensor([-0.3374, 0.0871, 0.1049, 0.1119]) tensor([0.1769, 0.2705, 0.2753, 0.2773]) -Greedy action tensor([-1.3913, -0.5925, 0.3786, 0.1292]) tensor([0.0732, 0.1626, 0.4295, 0.3347]) -Greedy action tensor([-1.7234, -0.2279, 0.5210, -0.0173]) tensor([0.0490, 0.2187, 0.4624, 0.2699]) -Greedy action tensor([-0.8276, -0.5909, 0.3984, 0.2070]) tensor([0.1178, 0.1493, 0.4014, 0.3315]) -Greedy action tensor([-1.1718, -0.6214, 0.2965, 0.2398]) tensor([0.0895, 0.1551, 0.3884, 0.3670]) -Greedy action tensor([-1.2433, -0.5301, 0.4014, 0.2997]) tensor([0.0775, 0.1582, 0.4016, 0.3627]) -Greedy action tensor([-1.4702, -0.6728, 0.6926, 0.4006]) tensor([0.0543, 0.1206, 0.4724, 0.3527]) -Greedy action tensor([-0.5558, 0.5728, 0.2563, 0.6972]) tensor([0.1016, 0.3140, 0.2288, 0.3556]) -Greedy action tensor([-1.4729, -0.4636, 0.5237, 0.4068]) tensor([0.0566, 0.1554, 0.4170, 0.3710]) -Greedy action tensor([-1.0389, -0.5781, 0.4558, 0.7512]) tensor([0.0767, 0.1216, 0.3420, 0.4596]) -Greedy action tensor([-1.1015, -0.5262, -0.2962, -0.3635]) tensor([0.1407, 0.2501, 0.3148, 0.2943]) -Greedy action tensor([-0.5786, -0.6363, 0.4336, 0.1083]) tensor([0.1496, 0.1412, 0.4117, 0.2974]) -Greedy action tensor([-1.0794, 0.6460, 0.3937, 0.9221]) tensor([0.0544, 0.3055, 0.2374, 0.4027]) -Greedy action tensor([-1.5354, -0.9693, 0.3419, -0.4512]) tensor([0.0816, 0.1437, 0.5333, 0.2413]) -Greedy action tensor([-1.0072, -0.4575, 0.5163, 1.1720]) tensor([0.0619, 0.1072, 0.2839, 0.5470]) -Greedy action tensor([-1.8390, -0.4553, 0.6106, -0.0968]) tensor([0.0449, 0.1790, 0.5198, 0.2562]) -Greedy action tensor([-1.4551, -0.4506, 0.8354, 0.8131]) tensor([0.0430, 0.1173, 0.4245, 0.4152]) -Greedy action tensor([-1.2705, -0.6769, -0.7341, -1.3473]) tensor([0.1836, 0.3324, 0.3139, 0.1700]) -Greedy action tensor([-1.6136, -0.5001, 0.4960, 0.0369]) tensor([0.0571, 0.1740, 0.4711, 0.2977]) -Greedy action tensor([-1.2149, -0.5108, 0.3313, 0.3693]) tensor([0.0794, 0.1606, 0.3728, 0.3872]) -Greedy action tensor([-1.3159, -0.5819, 0.3357, 0.1884]) tensor([0.0781, 0.1628, 0.4074, 0.3517]) -Greedy action tensor([-1.1241, 0.6230, 0.5178, 0.8367]) tensor([0.0526, 0.3019, 0.2717, 0.3738]) -Greedy action tensor([-1.9373, -0.5541, 0.5053, -0.1205]) tensor([0.0442, 0.1761, 0.5080, 0.2717]) -Greedy action tensor([-0.5291, 1.0244, 0.1361, 0.1984]) tensor([0.1026, 0.4853, 0.1996, 0.2125]) -Greedy action tensor([-0.6110, -0.1389, 0.6035, 1.4182]) tensor([0.0736, 0.1181, 0.2481, 0.5602]) -Greedy action tensor([-1.3658, -0.1714, 0.2038, -0.2703]) tensor([0.0827, 0.2729, 0.3972, 0.2472]) -Greedy action tensor([-1.0338, -0.3114, 0.6696, 1.2226]) tensor([0.0552, 0.1138, 0.3034, 0.5275]) -Greedy action tensor([-0.2939, -0.1003, 1.1828, 1.6878]) tensor([0.0722, 0.0876, 0.3162, 0.5239]) -Greedy action tensor([-1.4592, -0.6138, 0.4331, 0.0563]) tensor([0.0689, 0.1605, 0.4571, 0.3136]) -Greedy action tensor([-1.7707, -0.7468, 0.0849, -0.2878]) tensor([0.0686, 0.1909, 0.4385, 0.3021]) -Greedy action tensor([-1.5978, -0.5130, 0.5106, 0.1561]) tensor([0.0556, 0.1646, 0.4583, 0.3215]) -Greedy action tensor([-1.4810, -0.2761, 0.4324, 0.2801]) tensor([0.0591, 0.1970, 0.4002, 0.3437]) -Greedy action tensor([-1.5257, -0.3957, 0.6375, 0.5773]) tensor([0.0477, 0.1475, 0.4145, 0.3903]) -Greedy action tensor([-1.4340, 0.7508, 0.4072, 0.0588]) tensor([0.0484, 0.4306, 0.3054, 0.2156]) -Greedy action tensor([-1.0405, -0.4768, 0.9672, 1.2700]) tensor([0.0493, 0.0866, 0.3671, 0.4970]) -Greedy action tensor([-1.9287, -0.6736, 1.1978, 0.4832]) tensor([0.0260, 0.0912, 0.5927, 0.2901]) -Greedy action tensor([-1.6178, -0.5755, 0.5192, 0.0573]) tensor([0.0567, 0.1607, 0.4802, 0.3025]) -Greedy action tensor([-1.3794, -0.5319, 0.4031, 0.3648]) tensor([0.0667, 0.1556, 0.3963, 0.3814]) -Greedy action tensor([-1.3871, -0.5925, 0.4332, 0.1884]) tensor([0.0703, 0.1557, 0.4341, 0.3399]) -Greedy action tensor([-1.5073, -0.5436, 0.4192, 0.1486]) tensor([0.0636, 0.1667, 0.4366, 0.3331]) -Greedy action tensor([-1.9113, -0.4786, 0.8635, 0.2597]) tensor([0.0333, 0.1397, 0.5346, 0.2923]) -Greedy action tensor([-0.4693, -0.4974, 0.2126, 0.0838]) tensor([0.1758, 0.1709, 0.3477, 0.3056]) -Greedy action tensor([-1.3453, -0.6376, 0.5350, 0.3526]) tensor([0.0665, 0.1349, 0.4356, 0.3630]) -Greedy action tensor([-2.0083, -0.9562, 0.3918, -0.1861]) tensor([0.0475, 0.1359, 0.5231, 0.2935]) -Greedy action tensor([-1.0661, 0.0454, 0.1660, -0.4116]) tensor([0.1065, 0.3236, 0.3651, 0.2049]) -Greedy action tensor([ 0.3973, -0.1467, -0.1203, -0.1695]) tensor([0.3645, 0.2115, 0.2172, 0.2068]) -Greedy action tensor([ 0.6109, -0.1855, -0.0394, -0.3656]) tensor([0.4256, 0.1919, 0.2221, 0.1603]) -Greedy action tensor([ 0.2652, 0.1426, 0.0974, -0.1011]) tensor([0.2921, 0.2584, 0.2470, 0.2025]) -Greedy action tensor([ 0.3274, -0.0617, 0.0206, -0.1182]) tensor([0.3274, 0.2219, 0.2409, 0.2097]) -Greedy action tensor([ 0.2856, 0.1250, 0.0354, -0.2265]) tensor([0.3096, 0.2637, 0.2411, 0.1855]) -Greedy action tensor([ 9.9529e-01, -6.6617e-01, -5.6249e-04, -7.0330e-01]) tensor([0.5740, 0.1090, 0.2120, 0.1050]) -Greedy action tensor([ 0.4188, -0.2757, -0.0565, -0.6038]) tensor([0.4031, 0.2013, 0.2506, 0.1450]) -Greedy action tensor([ 0.6879, -0.6301, -0.0043, -0.7724]) tensor([0.4999, 0.1338, 0.2502, 0.1161]) -Greedy action tensor([ 0.8185, -0.2794, 0.0400, -0.2272]) tensor([0.4664, 0.1556, 0.2141, 0.1639]) -Greedy action tensor([ 0.5742, -0.1926, -0.1037, -0.1683]) tensor([0.4085, 0.1897, 0.2074, 0.1944]) -Greedy action tensor([ 0.7121, -0.5250, 0.0534, -0.6431]) tensor([0.4841, 0.1405, 0.2506, 0.1248]) -Greedy action tensor([ 1.0068, -0.6200, -0.0466, -0.5089]) tensor([0.5666, 0.1114, 0.1976, 0.1245]) -Greedy action tensor([ 0.6895, -0.3164, -0.0318, -0.4386]) tensor([0.4597, 0.1681, 0.2235, 0.1488]) -Greedy action tensor([ 0.3696, -0.2069, 0.3501, -0.2990]) tensor([0.3273, 0.1839, 0.3210, 0.1677]) -Greedy action tensor([ 0.7935, -0.5845, 0.0293, -0.5030]) tensor([0.5022, 0.1266, 0.2339, 0.1373]) -Greedy action tensor([ 0.3548, -0.0864, -0.1485, -0.1487]) tensor([0.3506, 0.2255, 0.2119, 0.2119]) -Greedy action tensor([ 0.4270, -0.2172, -0.0278, -0.2141]) tensor([0.3722, 0.1955, 0.2362, 0.1961]) -Greedy action tensor([ 0.4256, -0.0770, 0.1391, -0.2638]) tensor([0.3499, 0.2117, 0.2628, 0.1756]) -Greedy action tensor([ 0.5629, -0.1106, -0.0596, -0.3191]) tensor([0.4064, 0.2072, 0.2181, 0.1682]) -Greedy action tensor([ 0.5362, 0.0744, 0.0160, -0.1204]) tensor([0.3645, 0.2297, 0.2167, 0.1890]) -Greedy action tensor([ 0.5724, -0.3070, 0.0092, -0.4181]) tensor([0.4245, 0.1762, 0.2417, 0.1577]) -Greedy action tensor([ 0.8489, -0.6052, -0.0311, -0.7232]) tensor([0.5388, 0.1259, 0.2235, 0.1119]) -Greedy action tensor([ 1.1437, -0.8598, -0.0553, -0.4697]) tensor([0.6114, 0.0825, 0.1843, 0.1218]) -Greedy action tensor([ 0.5636, -0.3221, 0.1280, -0.3663]) tensor([0.4075, 0.1681, 0.2636, 0.1608]) -Greedy action tensor([ 0.2946, 0.1993, 0.1280, -0.1625]) tensor([0.2951, 0.2683, 0.2498, 0.1868]) -Greedy action tensor([ 0.3978, 0.0507, -0.0073, -0.1284]) tensor([0.3373, 0.2384, 0.2250, 0.1993]) -Greedy action tensor([ 0.7247, -0.1817, 0.1606, -0.7070]) tensor([0.4521, 0.1826, 0.2572, 0.1080]) -Greedy action tensor([ 0.4214, -0.1076, -0.0358, -0.1484]) tensor([0.3587, 0.2113, 0.2271, 0.2029]) -Greedy action tensor([ 0.8260, -0.2779, -0.0187, -0.2602]) tensor([0.4765, 0.1580, 0.2047, 0.1608]) -Greedy action tensor([ 0.6566, -0.4317, -0.0382, -0.3536]) tensor([0.4545, 0.1531, 0.2269, 0.1655]) -Greedy action tensor([ 0.9563, -0.6371, 0.0965, -0.5600]) tensor([0.5417, 0.1101, 0.2293, 0.1189]) -Greedy action tensor([ 0.4649, -0.1702, 0.0697, -0.2102]) tensor([0.3686, 0.1954, 0.2483, 0.1877]) -Greedy action tensor([ 0.7868, -0.4494, -0.0199, -0.4185]) tensor([0.4911, 0.1426, 0.2192, 0.1471]) -Greedy action tensor([ 0.5037, 0.4324, -0.1330, -0.1799]) tensor([0.3373, 0.3141, 0.1784, 0.1702]) -Greedy action tensor([ 0.9534, -0.4790, 0.0201, -0.5099]) tensor([0.5366, 0.1281, 0.2110, 0.1242]) -Greedy action tensor([ 0.6803, -0.3138, -0.0072, -0.2405]) tensor([0.4403, 0.1629, 0.2214, 0.1753]) -Greedy action tensor([ 0.4227, 0.0586, 0.0443, -0.1142]) tensor([0.3373, 0.2344, 0.2311, 0.1972]) -Greedy action tensor([ 0.0814, 0.0892, 0.0008, -0.2191]) tensor([0.2724, 0.2745, 0.2513, 0.2017]) -Greedy action tensor([ 0.4819, 0.1315, 0.1159, -0.1871]) tensor([0.3436, 0.2420, 0.2383, 0.1760]) -Greedy action tensor([ 0.5454, -0.2000, -0.1067, -0.2962]) tensor([0.4121, 0.1956, 0.2147, 0.1776]) -Greedy action tensor([ 0.7755, -0.3145, 0.0030, -0.4866]) tensor([0.4805, 0.1616, 0.2219, 0.1360]) -Greedy action tensor([ 0.4588, -0.2887, -0.0840, -0.3862]) tensor([0.4025, 0.1906, 0.2339, 0.1729]) -Greedy action tensor([ 0.9225, -0.6092, -0.1547, -0.7083]) tensor([0.5706, 0.1233, 0.1943, 0.1117]) -Greedy action tensor([ 0.5258, -0.2342, -0.0168, -0.2770]) tensor([0.4005, 0.1873, 0.2328, 0.1794]) -Greedy action tensor([ 0.8149, -0.5363, 0.0297, -0.4226]) tensor([0.4987, 0.1291, 0.2274, 0.1447]) -Greedy action tensor([ 0.3922, -0.0889, 0.1330, -0.2339]) tensor([0.3419, 0.2114, 0.2639, 0.1828]) -Greedy action tensor([ 0.7660, -0.3372, 0.1494, -0.2665]) tensor([0.4489, 0.1489, 0.2423, 0.1599]) -Greedy action tensor([ 0.6071, 0.0677, -0.1130, -0.2271]) tensor([0.3994, 0.2329, 0.1944, 0.1734]) -Greedy action tensor([ 0.6800, -0.4048, 0.0428, -0.4547]) tensor([0.4570, 0.1544, 0.2416, 0.1469]) -Greedy action tensor([ 0.4043, -0.1660, -0.1557, -0.3223]) tensor([0.3817, 0.2158, 0.2180, 0.1846]) -Greedy action tensor([ 0.6584, -0.2866, -0.0682, -0.2106]) tensor([0.4364, 0.1696, 0.2110, 0.1830]) -Greedy action tensor([ 0.8521, -0.6856, -0.1019, -0.3595]) tensor([0.5269, 0.1132, 0.2030, 0.1569]) -Greedy action tensor([ 0.6131, -0.2070, -0.0120, -0.3225]) tensor([0.4223, 0.1860, 0.2260, 0.1657]) -Greedy action tensor([ 0.2884, -0.0472, 0.0587, -0.3083]) tensor([0.3268, 0.2336, 0.2597, 0.1799]) -Greedy action tensor([ 0.5999, -0.2476, 0.0067, -0.4964]) tensor([0.4319, 0.1851, 0.2387, 0.1443]) -Greedy action tensor([ 1.1517, -1.0271, -0.1425, -0.8943]) tensor([0.6594, 0.0746, 0.1807, 0.0852]) -Greedy action tensor([ 0.5885, -0.1828, -0.0787, -0.2882]) tensor([0.4181, 0.1933, 0.2145, 0.1740]) -Greedy action tensor([ 0.7182, -0.4192, 0.0301, -0.3322]) tensor([0.4602, 0.1476, 0.2313, 0.1610]) -Greedy action tensor([ 0.8348, -0.4016, 0.0974, -0.4963]) tensor([0.4919, 0.1429, 0.2353, 0.1300]) -Greedy action tensor([ 0.3965, -0.2845, 0.0199, -0.2930]) tensor([0.3712, 0.1879, 0.2547, 0.1863]) -Greedy action tensor([ 1.0959, -0.8461, -0.0313, -0.6875]) tensor([0.6115, 0.0877, 0.1981, 0.1028]) -Greedy action tensor([ 0.5536, 0.0799, 0.0204, -0.1194]) tensor([0.3677, 0.2290, 0.2157, 0.1876]) -Greedy action tensor([ 0.5020, -0.2778, -0.1020, -0.3983]) tensor([0.4147, 0.1901, 0.2267, 0.1685]) -Greedy action tensor([ 0.6286, -0.3777, -0.0568, -0.2917]) tensor([0.4410, 0.1612, 0.2222, 0.1757]) -Greedy action tensor([ 0.3625, 0.1234, 0.0609, -0.3497]) tensor([0.3314, 0.2609, 0.2451, 0.1626]) -Greedy action tensor([ 0.4888, -0.0456, 0.0690, -0.2573]) tensor([0.3680, 0.2156, 0.2418, 0.1745]) -Greedy action tensor([ 0.7010, -0.3803, -0.0336, -0.2713]) tensor([0.4552, 0.1544, 0.2183, 0.1721]) -Greedy action tensor([ 0.4908, 0.1499, 0.1299, -0.2060]) tensor([0.3441, 0.2447, 0.2398, 0.1714]) -Greedy action tensor([ 0.8539, -0.2446, -0.1511, -0.3991]) tensor([0.5038, 0.1679, 0.1844, 0.1439]) -Greedy action tensor([ 0.4514, -0.0122, 0.0022, -0.4897]) tensor([0.3763, 0.2367, 0.2401, 0.1468]) -Greedy action tensor([ 0.5808, -0.1923, 0.0690, -0.4711]) tensor([0.4149, 0.1915, 0.2487, 0.1449]) -Greedy action tensor([ 0.2369, 0.0849, 0.1536, -0.2620]) tensor([0.2953, 0.2537, 0.2717, 0.1793]) -Greedy action tensor([ 0.7152, -0.2532, 0.1194, -0.5122]) tensor([0.4497, 0.1707, 0.2478, 0.1318]) -Greedy action tensor([ 0.5716, -0.2910, -0.1284, -0.3773]) tensor([0.4337, 0.1830, 0.2154, 0.1679]) -Greedy action tensor([ 1.0020, -0.4431, -0.0181, -0.4569]) tensor([0.5468, 0.1289, 0.1972, 0.1271]) -Greedy action tensor([ 0.6511, -0.2971, -0.0773, -0.3010]) tensor([0.4432, 0.1717, 0.2140, 0.1711]) -Greedy action tensor([ 0.2491, 0.1462, -0.0769, -0.0688]) tensor([0.2984, 0.2692, 0.2153, 0.2171]) -Greedy action tensor([ 0.5808, 0.0301, 0.0579, -0.2000]) tensor([0.3806, 0.2194, 0.2256, 0.1743]) -Greedy action tensor([ 0.8140, -0.4218, 0.0706, -0.6721]) tensor([0.5019, 0.1459, 0.2387, 0.1136]) -Greedy action tensor([ 0.3718, -0.1097, 0.0801, -0.2606]) tensor([0.3453, 0.2133, 0.2579, 0.1835]) -Greedy action tensor([ 0.5124, -0.2645, -0.1632, -0.3645]) tensor([0.4193, 0.1928, 0.2134, 0.1745]) -Greedy action tensor([ 1.2039, -0.8963, 1.1831, 1.5299]) tensor([0.2868, 0.0351, 0.2809, 0.3973]) -Greedy action tensor([ 1.2671, 0.5922, -0.6318, -0.5280]) tensor([0.5479, 0.2790, 0.0820, 0.0910]) -Greedy action tensor([ 0.7787, -0.2883, -0.6105, 1.1434]) tensor([0.3296, 0.1134, 0.0822, 0.4748]) -Greedy action tensor([-0.3508, -0.6126, 0.0397, 1.0357]) tensor([0.1380, 0.1062, 0.2039, 0.5520]) -Greedy action tensor([ 1.3019, -0.0531, 1.2091, 1.3062]) tensor([0.3151, 0.0813, 0.2872, 0.3165]) -Greedy action tensor([ 0.3155, -0.1538, -0.4139, 0.6694]) tensor([0.2831, 0.1771, 0.1365, 0.4033]) -Greedy action tensor([0.2627, 0.4153, 0.2724, 0.7703]) tensor([0.2068, 0.2409, 0.2088, 0.3435]) -Greedy action tensor([ 0.7221, 0.2555, -0.6081, 0.3019]) tensor([0.3924, 0.2461, 0.1038, 0.2578]) -Greedy action tensor([ 0.1949, -1.3369, 2.0574, 0.2207]) tensor([0.1152, 0.0249, 0.7417, 0.1182]) -Greedy action tensor([-0.3680, -1.5363, 0.1633, 1.1614]) tensor([0.1311, 0.0408, 0.2230, 0.6051]) -Greedy action tensor([ 1.2585, -0.9799, 0.6611, 0.3086]) tensor([0.4893, 0.0522, 0.2693, 0.1893]) -Greedy action tensor([ 1.8364, -0.9201, 0.2011, 1.2059]) tensor([0.5584, 0.0355, 0.1088, 0.2973]) -Greedy action tensor([ 1.2291, -0.0326, 0.9779, 1.0816]) tensor([0.3420, 0.0968, 0.2660, 0.2951]) -Greedy action tensor([-0.0262, -0.1132, 0.9651, 0.1963]) tensor([0.1706, 0.1564, 0.4598, 0.2131]) -Greedy action tensor([ 0.3434, -0.5263, -1.0786, 0.1337]) tensor([0.4047, 0.1696, 0.0976, 0.3281]) -Greedy action tensor([-0.4442, -0.2772, -0.3333, -1.1235]) tensor([0.2627, 0.3105, 0.2936, 0.1332]) -Greedy action tensor([ 1.2247, -0.1676, 0.1805, 1.1344]) tensor([0.3978, 0.0988, 0.1400, 0.3634]) -Greedy action tensor([-0.3990, -0.7461, -0.5669, 0.3429]) tensor([0.2149, 0.1519, 0.1817, 0.4514]) -Greedy action tensor([ 1.0929, -1.1167, 1.4774, 0.1069]) tensor([0.3388, 0.0372, 0.4976, 0.1264]) -Greedy action tensor([ 0.8650, -0.9163, -0.1285, 0.9670]) tensor([0.3779, 0.0636, 0.1399, 0.4185]) -Greedy action tensor([1.3336, 0.0147, 1.0675, 0.4417]) tensor([0.4092, 0.1094, 0.3136, 0.1677]) -Greedy action tensor([ 1.9561, 0.3456, -0.2685, 0.1428]) tensor([0.6798, 0.1358, 0.0735, 0.1109]) -Greedy action tensor([ 1.8274, -0.6764, 0.9848, 2.0436]) tensor([0.3631, 0.0297, 0.1564, 0.4508]) -Greedy action tensor([ 0.5866, 0.3442, -0.7101, -0.2505]) tensor([0.4014, 0.3150, 0.1098, 0.1738]) -Greedy action tensor([ 0.4658, 0.3665, -0.5637, 0.6711]) tensor([0.2865, 0.2594, 0.1023, 0.3518]) -Greedy action tensor([-0.5987, 0.2010, 0.5627, 0.3372]) tensor([0.1115, 0.2481, 0.3562, 0.2843]) -Greedy action tensor([ 0.1050, -0.7647, 0.0948, 0.0199]) tensor([0.3005, 0.1260, 0.2975, 0.2760]) -Greedy action tensor([ 1.1047, -0.4115, 1.3597, 0.8767]) tensor([0.3025, 0.0664, 0.3903, 0.2408]) -Greedy action tensor([ 1.4089, -0.0871, 1.1397, -0.1531]) tensor([0.4550, 0.1019, 0.3476, 0.0954]) -Greedy action tensor([0.9092, 0.6138, 0.9985, 0.1731]) tensor([0.3015, 0.2244, 0.3297, 0.1444]) -Greedy action tensor([ 1.1188, 0.1263, -0.1076, 0.4331]) tensor([0.4613, 0.1710, 0.1353, 0.2324]) -Greedy action tensor([ 1.2902, 0.4479, -0.6449, 0.6709]) tensor([0.4732, 0.2038, 0.0683, 0.2547]) -Greedy action tensor([ 0.9679, 0.1209, -0.4481, 0.9474]) tensor([0.3772, 0.1617, 0.0915, 0.3695]) -Greedy action tensor([ 0.3967, 0.6509, -0.6309, 0.4122]) tensor([0.2730, 0.3520, 0.0977, 0.2773]) -Greedy action tensor([ 0.1935, -1.7087, 1.2674, 0.2233]) tensor([0.1958, 0.0292, 0.5732, 0.2018]) -Greedy action tensor([ 2.2326, -0.5526, 1.2539, 1.8142]) tensor([0.4772, 0.0295, 0.1793, 0.3140]) -Greedy action tensor([ 0.5141, -0.2644, 0.2688, 0.1781]) tensor([0.3383, 0.1553, 0.2647, 0.2417]) -Greedy action tensor([ 0.4571, -0.3802, 0.5195, 0.0854]) tensor([0.3138, 0.1358, 0.3340, 0.2164]) -Greedy action tensor([ 1.1333, -0.7732, 1.1241, 0.1586]) tensor([0.3973, 0.0590, 0.3937, 0.1499]) -Greedy action tensor([ 1.1744, 0.5443, 0.0830, -0.5521]) tensor([0.4887, 0.2603, 0.1641, 0.0869]) -Greedy action tensor([ 0.5061, 0.9268, -1.2516, 0.1406]) tensor([0.2950, 0.4494, 0.0509, 0.2047]) -Greedy action tensor([ 1.7706, 0.3245, -0.0033, 1.5786]) tensor([0.4483, 0.1056, 0.0761, 0.3700]) -Greedy action tensor([0.2050, 0.5481, 0.0647, 0.4019]) tensor([0.2224, 0.3135, 0.1933, 0.2708]) -Greedy action tensor([ 1.0114, -1.0900, 0.4188, 0.0703]) tensor([0.4842, 0.0592, 0.2677, 0.1889]) -Greedy action tensor([-0.4663, 0.4365, -0.2562, 1.1374]) tensor([0.1034, 0.2550, 0.1276, 0.5140]) -Greedy action tensor([ 0.7902, -0.3516, 0.0712, 0.0714]) tensor([0.4360, 0.1392, 0.2124, 0.2124]) -Greedy action tensor([ 0.0141, -0.3150, -0.3315, 0.5864]) tensor([0.2381, 0.1713, 0.1685, 0.4220]) -Greedy action tensor([ 0.8177, -0.0171, -0.2529, 0.8589]) tensor([0.3548, 0.1540, 0.1216, 0.3697]) -Greedy action tensor([1.0230, 0.4319, 0.0745, 0.9446]) tensor([0.3490, 0.1932, 0.1352, 0.3226]) -Greedy action tensor([ 1.2117, 0.1658, 0.1678, -0.0421]) tensor([0.5028, 0.1767, 0.1770, 0.1435]) -Greedy action tensor([ 0.6271, 0.1072, -0.1787, 0.9542]) tensor([0.2917, 0.1734, 0.1303, 0.4046]) -Greedy action tensor([ 0.5398, -0.0504, 1.2507, 0.5352]) tensor([0.2181, 0.1209, 0.4440, 0.2171]) -Greedy action tensor([ 0.3114, -0.3792, 0.6963, -0.7077]) tensor([0.3002, 0.1505, 0.4411, 0.1083]) -Greedy action tensor([-0.0768, -0.0681, -0.6883, 0.3953]) tensor([0.2407, 0.2428, 0.1306, 0.3859]) -Greedy action tensor([ 0.1517, -1.4030, -0.4582, 1.9207]) tensor([0.1312, 0.0277, 0.0713, 0.7697]) -Greedy action tensor([ 0.3224, -0.8149, -0.7386, 1.6793]) tensor([0.1802, 0.0578, 0.0624, 0.6997]) -Greedy action tensor([-0.0625, 1.0967, 0.3817, 0.3161]) tensor([0.1388, 0.4423, 0.2164, 0.2026]) -Greedy action tensor([ 1.2462, -1.3388, 0.2358, 1.9012]) tensor([0.2972, 0.0224, 0.1082, 0.5722]) -Greedy action tensor([-0.0505, 0.2568, 0.1836, 0.3874]) tensor([0.1933, 0.2629, 0.2443, 0.2995]) -Greedy action tensor([-0.4538, -0.6576, -0.6345, 1.2123]) tensor([0.1259, 0.1027, 0.1051, 0.6663]) -Greedy action tensor([ 0.3044, 0.1997, 1.1586, -0.4790]) tensor([0.2125, 0.1913, 0.4991, 0.0971]) -Greedy action tensor([ 1.6494, 0.6044, -1.1405, 0.6036]) tensor([0.5667, 0.1993, 0.0348, 0.1992]) -Greedy action tensor([-0.0845, 0.0732, 0.6355, -0.2684]) tensor([0.1977, 0.2315, 0.4062, 0.1645]) -Greedy action tensor([ 0.2046, 0.0935, -0.7766, 1.6154]) tensor([0.1570, 0.1405, 0.0589, 0.6436]) -Greedy action tensor([ 0.2156, -0.3970, 1.0614, 0.3228]) tensor([0.2006, 0.1087, 0.4674, 0.2233]) -Greedy action tensor([ 0.9099, -0.2606, 1.0916, 0.5496]) tensor([0.3118, 0.0967, 0.3740, 0.2175]) -Greedy action tensor([-0.2874, 0.1594, -1.1519, 0.7604]) tensor([0.1714, 0.2679, 0.0722, 0.4886]) -Greedy action tensor([ 1.1235, 0.2319, -0.9841, 2.1327]) tensor([0.2339, 0.0959, 0.0284, 0.6417]) -Greedy action tensor([ 1.6481, -0.2641, -0.0797, 1.1205]) tensor([0.5221, 0.0771, 0.0928, 0.3080]) -Greedy action tensor([ 1.3737, -1.1622, 0.7781, 0.4432]) tensor([0.4939, 0.0391, 0.2722, 0.1948]) -Greedy action tensor([ 2.2390, -0.4432, 1.5269, 1.6766]) tensor([0.4697, 0.0321, 0.2305, 0.2677]) -Greedy action tensor([-0.1940, -0.1555, 0.8494, -0.0730]) tensor([0.1665, 0.1730, 0.4726, 0.1879]) -Greedy action tensor([0.9833, 0.4275, 0.1786, 0.7701]) tensor([0.3535, 0.2028, 0.1581, 0.2856]) -Greedy action tensor([0.7005, 0.9830, 0.3920, 0.7046]) tensor([0.2460, 0.3263, 0.1807, 0.2470]) -Greedy action tensor([ 0.6484, 0.1140, 0.8825, -0.2093]) tensor([0.3054, 0.1790, 0.3860, 0.1296]) -Greedy action tensor([ 0.2417, -0.5528, 0.9798, 0.0410]) tensor([0.2293, 0.1036, 0.4796, 0.1876]) -Greedy action tensor([ 0.8885, 1.0591, -0.4933, -0.2984]) tensor([0.3647, 0.4325, 0.0916, 0.1113]) -Greedy action tensor([ 0.5081, -2.0723, 0.4369, 1.2791]) tensor([0.2399, 0.0182, 0.2234, 0.5186]) -Greedy action tensor([ 0.0148, -0.0730, 0.1623, 0.9144]) tensor([0.1807, 0.1655, 0.2094, 0.4443]) -Greedy action tensor([0.4853, 0.7205, 0.1179, 0.8810]) tensor([0.2251, 0.2847, 0.1559, 0.3343]) -Greedy action tensor([ 0.4561, 0.5033, -0.2820, 0.6813]) tensor([0.2646, 0.2774, 0.1265, 0.3315]) -Greedy action tensor([0.4857, 0.1550, 0.7462, 0.9167]) tensor([0.2195, 0.1577, 0.2849, 0.3379]) -Greedy action tensor([ 0.9020, -0.5458, -0.3134, 0.1450]) tensor([0.4998, 0.1175, 0.1482, 0.2345]) -Greedy action tensor([ 1.6501, -0.8687, -0.3792, 0.2229]) tensor([0.6887, 0.0555, 0.0905, 0.1653]) -Greedy action tensor([ 0.6658, -0.2955, -0.4194, 0.1687]) tensor([0.4295, 0.1642, 0.1451, 0.2612]) -Greedy action tensor([ 1.2689, -0.3456, -0.2606, -0.1579]) tensor([0.6040, 0.1202, 0.1308, 0.1450]) -Greedy action tensor([ 0.6299, -0.2283, -0.1599, -0.0552]) tensor([0.4198, 0.1780, 0.1906, 0.2116]) -Greedy action tensor([ 0.9385, -0.4197, -0.1364, 0.2563]) tensor([0.4753, 0.1222, 0.1622, 0.2403]) -Greedy action tensor([ 1.2578, -0.0057, -0.0503, -0.1075]) tensor([0.5530, 0.1563, 0.1495, 0.1412]) -Greedy action tensor([ 1.0888, -0.5768, -0.1178, 0.1674]) tensor([0.5302, 0.1002, 0.1586, 0.2110]) -Greedy action tensor([ 1.3584, -0.6498, -0.2587, 0.4926]) tensor([0.5703, 0.0766, 0.1132, 0.2399]) -Greedy action tensor([ 0.6767, 0.0595, -0.4528, -0.4035]) tensor([0.4541, 0.2450, 0.1468, 0.1542]) -Greedy action tensor([ 1.0510, -0.4394, -0.4262, 0.2840]) tensor([0.5214, 0.1175, 0.1190, 0.2421]) -Greedy action tensor([ 0.9355, -0.0230, -0.2610, -0.2865]) tensor([0.5050, 0.1936, 0.1526, 0.1488]) -Greedy action tensor([ 0.9220, -0.3470, -0.0786, 0.1736]) tensor([0.4713, 0.1325, 0.1733, 0.2230]) -Greedy action tensor([ 0.8980, -0.3751, -0.2093, 0.1148]) tensor([0.4837, 0.1354, 0.1598, 0.2210]) -Greedy action tensor([ 1.5741e+00, -8.0945e-01, -3.4135e-01, -1.3871e-03]) tensor([0.6914, 0.0638, 0.1018, 0.1430]) -Greedy action tensor([ 0.9645, -0.0916, -0.1589, -0.0817]) tensor([0.4940, 0.1718, 0.1606, 0.1735]) -Greedy action tensor([ 1.2179, -0.3331, -0.4672, -0.3339]) tensor([0.6214, 0.1317, 0.1152, 0.1316]) -Greedy action tensor([ 0.7680, -0.2454, -0.2509, 0.0262]) tensor([0.4545, 0.1650, 0.1641, 0.2165]) -Greedy action tensor([ 1.0198, -0.5522, -0.2359, 0.3565]) tensor([0.4981, 0.1034, 0.1419, 0.2566]) -Greedy action tensor([ 0.8225, -0.4165, -0.3887, -0.0542]) tensor([0.4991, 0.1446, 0.1486, 0.2077]) -Greedy action tensor([ 0.9827, -0.4552, -0.3847, 0.5466]) tensor([0.4676, 0.1110, 0.1191, 0.3023]) -Greedy action tensor([ 0.8188, -0.3541, -0.1559, -0.0781]) tensor([0.4774, 0.1477, 0.1801, 0.1947]) -Greedy action tensor([ 1.0795, -0.5020, -0.6375, 0.4910]) tensor([0.5154, 0.1060, 0.0926, 0.2861]) -Greedy action tensor([ 0.9692, -0.3608, 0.0255, 0.0758]) tensor([0.4847, 0.1282, 0.1887, 0.1984]) -Greedy action tensor([ 1.1944, -0.6229, -0.5514, 0.2142]) tensor([0.5840, 0.0949, 0.1019, 0.2192]) -Greedy action tensor([ 1.3179, -0.5820, -0.3889, 0.5124]) tensor([0.5625, 0.0841, 0.1021, 0.2513]) -Greedy action tensor([ 1.3823, -0.5560, -0.3552, 0.6162]) tensor([0.5603, 0.0807, 0.0986, 0.2605]) -Greedy action tensor([ 1.3007, -0.4501, -0.2801, 0.3894]) tensor([0.5613, 0.0975, 0.1155, 0.2257]) -Greedy action tensor([ 0.6114, -0.2204, -0.0781, -0.0087]) tensor([0.4040, 0.1759, 0.2028, 0.2173]) -Greedy action tensor([ 1.3570, -0.7472, -0.5829, 0.4683]) tensor([0.5964, 0.0727, 0.0857, 0.2452]) -Greedy action tensor([ 1.3921, -0.2371, -0.1220, -0.0314]) tensor([0.6035, 0.1183, 0.1328, 0.1454]) -Greedy action tensor([ 1.8533, -0.5329, -0.2370, 0.1451]) tensor([0.7159, 0.0659, 0.0885, 0.1297]) -Greedy action tensor([ 1.2433, -0.6347, -0.1437, 0.5566]) tensor([0.5247, 0.0802, 0.1311, 0.2640]) -Greedy action tensor([ 1.0509, -0.6946, -0.0733, 0.1659]) tensor([0.5230, 0.0913, 0.1699, 0.2158]) -Greedy action tensor([ 1.6867, -0.6042, -0.2041, 0.1201]) tensor([0.6845, 0.0693, 0.1033, 0.1429]) -Greedy action tensor([ 1.3525, -0.4754, -0.1066, -0.0398]) tensor([0.6091, 0.0979, 0.1416, 0.1514]) -Greedy action tensor([ 0.5651, -0.3394, -0.2695, 0.0885]) tensor([0.4066, 0.1646, 0.1765, 0.2524]) -Greedy action tensor([ 0.5726, -0.4612, 0.1370, -0.1354]) tensor([0.4008, 0.1425, 0.2592, 0.1974]) -Greedy action tensor([ 2.2269, -0.5519, -0.6344, 0.0350]) tensor([0.8123, 0.0505, 0.0465, 0.0907]) -Greedy action tensor([ 0.6996, -0.1859, -0.0725, 0.3263]) tensor([0.3902, 0.1609, 0.1803, 0.2686]) -Greedy action tensor([ 0.7669, -0.5472, -0.2169, 0.5408]) tensor([0.4098, 0.1101, 0.1532, 0.3269]) -Greedy action tensor([ 0.8318, -0.2064, -0.4640, 0.4238]) tensor([0.4362, 0.1544, 0.1194, 0.2900]) -Greedy action tensor([ 1.5277, -0.4866, 0.0910, 0.1218]) tensor([0.6187, 0.0825, 0.1471, 0.1517]) -Greedy action tensor([ 0.8264, 0.0107, 0.0643, -0.0991]) tensor([0.4338, 0.1919, 0.2024, 0.1719]) -Greedy action tensor([ 1.3329, -0.4936, -0.4353, -0.1164]) tensor([0.6384, 0.1028, 0.1089, 0.1499]) -Greedy action tensor([ 0.9080, 0.2436, 0.1507, -0.0819]) tensor([0.4246, 0.2185, 0.1991, 0.1578]) -Greedy action tensor([ 1.1804, -0.7797, -0.4193, 0.8356]) tensor([0.4875, 0.0687, 0.0985, 0.3453]) -Greedy action tensor([ 1.0365, 0.2402, 0.0227, -0.2760]) tensor([0.4801, 0.2165, 0.1742, 0.1292]) -Greedy action tensor([ 1.1815, -0.4956, -0.2828, 0.3789]) tensor([0.5358, 0.1002, 0.1239, 0.2401]) -Greedy action tensor([ 0.9602, -0.4996, -0.1148, -0.1216]) tensor([0.5229, 0.1215, 0.1784, 0.1772]) -Greedy action tensor([ 1.4676, -0.7339, -0.0794, 0.1148]) tensor([0.6321, 0.0699, 0.1346, 0.1634]) -Greedy action tensor([ 0.9969, -0.1784, -0.4310, 0.4793]) tensor([0.4663, 0.1440, 0.1118, 0.2779]) -Greedy action tensor([ 0.7887, -0.1027, 0.1476, -0.1945]) tensor([0.4327, 0.1775, 0.2279, 0.1619]) -Greedy action tensor([ 1.6647, -0.6413, -0.2583, 0.2951]) tensor([0.6667, 0.0664, 0.0974, 0.1695]) -Greedy action tensor([ 1.0400, -0.4717, 0.0130, 0.1468]) tensor([0.5030, 0.1109, 0.1801, 0.2059]) -Greedy action tensor([ 1.1656, -0.3469, -0.1347, -0.1643]) tensor([0.5690, 0.1254, 0.1550, 0.1505]) -Greedy action tensor([ 0.8863, -0.4382, -0.1733, -0.0978]) tensor([0.5034, 0.1339, 0.1745, 0.1882]) -Greedy action tensor([ 0.2236, 0.0087, -0.5237, 0.1482]) tensor([0.3118, 0.2515, 0.1477, 0.2891]) -Greedy action tensor([ 0.5036, -0.3772, -0.1790, -0.0028]) tensor([0.3964, 0.1643, 0.2003, 0.2389]) -Greedy action tensor([ 1.1227, -0.6658, -0.3592, 0.2843]) tensor([0.5474, 0.0915, 0.1244, 0.2367]) -Greedy action tensor([ 1.4896, -0.4644, -0.3621, 0.0463]) tensor([0.6515, 0.0923, 0.1023, 0.1539]) -Greedy action tensor([ 1.1535, -0.8081, -0.6332, 0.8752]) tensor([0.4842, 0.0681, 0.0811, 0.3666]) -Greedy action tensor([ 1.4542, -0.8787, -0.2831, 0.6460]) tensor([0.5819, 0.0564, 0.1024, 0.2593]) -Greedy action tensor([ 0.7091, -0.1639, -0.6300, 0.3728]) tensor([0.4177, 0.1745, 0.1095, 0.2984]) -Greedy action tensor([ 1.4060, -0.3714, -0.2092, 0.0950]) tensor([0.6107, 0.1033, 0.1214, 0.1646]) -Greedy action tensor([ 1.3068, -0.3955, -0.2984, 0.3642]) tensor([0.5641, 0.1028, 0.1133, 0.2198]) -Greedy action tensor([ 1.2189, -0.2527, -0.1005, -0.2011]) tensor([0.5752, 0.1320, 0.1537, 0.1390]) -Greedy action tensor([ 1.6978, -0.1156, -0.8218, 0.0218]) tensor([0.6990, 0.1140, 0.0563, 0.1308]) -Greedy action tensor([ 0.9886, -0.5649, -0.3424, 0.0713]) tensor([0.5332, 0.1128, 0.1409, 0.2131]) -Greedy action tensor([ 1.1426, -0.4635, -0.2921, 0.3368]) tensor([0.5303, 0.1064, 0.1263, 0.2369]) -Greedy action tensor([ 1.0574, -0.3956, -0.0393, -0.0335]) tensor([0.5253, 0.1229, 0.1754, 0.1764]) -Greedy action tensor([ 0.6946, -0.4774, -0.1899, 0.0407]) tensor([0.4459, 0.1381, 0.1841, 0.2319]) -Greedy action tensor([ 2.1785, -0.4055, -0.6803, 0.1550]) tensor([0.7905, 0.0597, 0.0453, 0.1045]) -Greedy action tensor([ 1.2460, -0.6396, -0.2743, 0.0684]) tensor([0.5958, 0.0904, 0.1303, 0.1835]) -Greedy action tensor([ 1.0971, -0.2453, -0.0195, 0.1672]) tensor([0.5042, 0.1317, 0.1651, 0.1990]) -Greedy action tensor([ 0.9073, -0.6287, -0.4948, 0.2905]) tensor([0.4998, 0.1076, 0.1230, 0.2697]) -Greedy action tensor([ 1.4413, -0.4047, -0.3466, 0.7425]) tensor([0.5487, 0.0866, 0.0918, 0.2728]) -Greedy action tensor([ 1.0327, -0.5320, -0.2663, 0.2101]) tensor([0.5205, 0.1089, 0.1420, 0.2286]) -Greedy action tensor([ 1.1433, -0.5033, 0.0142, -0.0451]) tensor([0.5492, 0.1058, 0.1776, 0.1674]) -Greedy action tensor([ 1.2012, -0.0096, -0.1032, 0.1171]) tensor([0.5242, 0.1562, 0.1422, 0.1773]) -Greedy action tensor([ 1.0611, -0.2576, -0.1218, 0.0948]) tensor([0.5117, 0.1369, 0.1568, 0.1947]) -Greedy action tensor([-1.6193, -0.7901, 0.9571, 0.7248]) tensor([0.0372, 0.0853, 0.4895, 0.3880]) -Greedy action tensor([-1.0621, -0.4268, 0.4971, 0.9504]) tensor([0.0661, 0.1248, 0.3144, 0.4947]) -Greedy action tensor([-0.9415, 0.0200, 0.3805, -0.5219]) tensor([0.1125, 0.2943, 0.4220, 0.1712]) -Greedy action tensor([-1.6602, -0.5223, 0.5171, 0.0258]) tensor([0.0545, 0.1701, 0.4810, 0.2943]) -Greedy action tensor([-1.2619, -0.5804, 0.3043, 0.2460]) tensor([0.0814, 0.1609, 0.3899, 0.3678]) -Greedy action tensor([-1.6966, -0.4548, 0.5687, 0.0379]) tensor([0.0506, 0.1752, 0.4875, 0.2867]) -Greedy action tensor([-1.8212, -0.4773, 0.6092, -0.0737]) tensor([0.0456, 0.1748, 0.5180, 0.2617]) -Greedy action tensor([-1.7858, -0.4974, 1.0070, 0.6392]) tensor([0.0310, 0.1124, 0.5062, 0.3504]) -Greedy action tensor([-1.9143, -0.4567, 0.6504, -0.1629]) tensor([0.0416, 0.1786, 0.5403, 0.2396]) -Greedy action tensor([-1.0443, -0.5370, 0.2825, 0.5100]) tensor([0.0896, 0.1488, 0.3377, 0.4239]) -Greedy action tensor([-0.5590, 0.4847, 0.0818, 0.0272]) tensor([0.1327, 0.3769, 0.2519, 0.2385]) -Greedy action tensor([-1.3581, -0.6292, 0.5490, 0.5990]) tensor([0.0592, 0.1228, 0.3988, 0.4192]) -Greedy action tensor([-1.5958, -0.7621, 0.8327, -0.0583]) tensor([0.0518, 0.1193, 0.5878, 0.2411]) -Greedy action tensor([-0.3574, 0.4487, 0.5518, 1.4087]) tensor([0.0864, 0.1935, 0.2146, 0.5055]) -Greedy action tensor([-1.8352, -0.4419, 0.6183, -0.0885]) tensor([0.0447, 0.1799, 0.5193, 0.2561]) -Greedy action tensor([-1.2055, -0.5842, 0.4170, -0.0428]) tensor([0.0899, 0.1673, 0.4553, 0.2875]) -Greedy action tensor([-1.0275, -0.5609, 0.4861, -0.3287]) tensor([0.1093, 0.1743, 0.4966, 0.2198]) -Greedy action tensor([-0.8919, 0.3988, 0.7638, 1.2515]) tensor([0.0543, 0.1976, 0.2846, 0.4635]) -Greedy action tensor([-1.6543, -0.6464, 1.0116, 0.7187]) tensor([0.0347, 0.0950, 0.4985, 0.3719]) -Greedy action tensor([-1.8322, -0.8982, 0.0383, -0.4505]) tensor([0.0713, 0.1815, 0.4631, 0.2840]) -Greedy action tensor([-1.8160, -0.4440, 0.5956, -0.1030]) tensor([0.0462, 0.1822, 0.5153, 0.2563]) -Greedy action tensor([-0.4604, -0.4099, 0.1657, 0.1410]) tensor([0.1740, 0.1830, 0.3255, 0.3175]) -Greedy action tensor([-0.8960, -0.6125, 0.3449, -0.1182]) tensor([0.1256, 0.1667, 0.4343, 0.2733]) -Greedy action tensor([-1.7905, -0.4780, 0.5957, -0.0423]) tensor([0.0469, 0.1742, 0.5097, 0.2693]) -Greedy action tensor([-1.1932, -0.2582, 0.9528, 1.0418]) tensor([0.0466, 0.1188, 0.3987, 0.4358]) -Greedy action tensor([-1.4673, -0.5211, 0.4373, 0.2035]) tensor([0.0641, 0.1650, 0.4303, 0.3406]) -Greedy action tensor([-0.3727, -0.5337, 0.2996, 0.4336]) tensor([0.1653, 0.1407, 0.3238, 0.3702]) -Greedy action tensor([-1.9902, -0.7713, 0.3076, -0.2034]) tensor([0.0492, 0.1666, 0.4901, 0.2940]) -Greedy action tensor([-1.7862, -0.4203, 0.5808, -0.1063]) tensor([0.0477, 0.1871, 0.5091, 0.2561]) -Greedy action tensor([-2.0343, -0.9533, 0.5440, -0.0438]) tensor([0.0409, 0.1206, 0.5390, 0.2995]) -Greedy action tensor([-1.9471, -0.6382, 0.9497, 0.1661]) tensor([0.0322, 0.1191, 0.5826, 0.2661]) -Greedy action tensor([-1.7406, -0.5896, 0.7629, 0.0797]) tensor([0.0443, 0.1401, 0.5419, 0.2737]) -Greedy action tensor([-1.9263, -0.9475, 0.1997, -0.3330]) tensor([0.0590, 0.1569, 0.4941, 0.2901]) -Greedy action tensor([-1.0029, -0.5015, 0.7875, 1.3433]) tensor([0.0524, 0.0865, 0.3139, 0.5472]) -Greedy action tensor([-1.4037, -0.6074, 0.4934, -0.0534]) tensor([0.0728, 0.1614, 0.4851, 0.2808]) -Greedy action tensor([-1.9366, -0.7861, 0.6032, 0.1451]) tensor([0.0402, 0.1271, 0.5100, 0.3226]) -Greedy action tensor([-1.8140, -0.7978, -0.0672, -0.4031]) tensor([0.0735, 0.2032, 0.4218, 0.3015]) -Greedy action tensor([-1.3073, -0.9785, 0.3757, -0.4420]) tensor([0.0986, 0.1369, 0.5304, 0.2341]) -Greedy action tensor([-1.0037, -0.7414, 0.8594, 0.6710]) tensor([0.0710, 0.0923, 0.4576, 0.3791]) -Greedy action tensor([-1.1004, -0.5259, 0.6164, 1.0468]) tensor([0.0592, 0.1051, 0.3293, 0.5064]) -Greedy action tensor([-0.4784, 1.0686, 0.3328, 1.0740]) tensor([0.0789, 0.3707, 0.1776, 0.3727]) -Greedy action tensor([-1.9739, -0.6411, 0.9385, 0.2099]) tensor([0.0312, 0.1182, 0.5737, 0.2769]) -Greedy action tensor([-1.0726, -0.0729, 0.5396, -0.5426]) tensor([0.0959, 0.2605, 0.4807, 0.1629]) -Greedy action tensor([-1.0138, -0.6331, 0.3824, 0.4583]) tensor([0.0921, 0.1347, 0.3719, 0.4013]) -Greedy action tensor([-0.9111, -0.5029, 0.2147, 0.3952]) tensor([0.1078, 0.1621, 0.3322, 0.3979]) -Greedy action tensor([-0.9796, -0.6062, 0.2935, 0.1513]) tensor([0.1096, 0.1592, 0.3915, 0.3396]) -Greedy action tensor([-1.9369, -0.6745, 0.7961, 0.0718]) tensor([0.0365, 0.1291, 0.5620, 0.2724]) -Greedy action tensor([-0.9355, -0.6067, 0.3717, -0.0527]) tensor([0.1176, 0.1634, 0.4347, 0.2843]) -Greedy action tensor([-1.2626, -0.5293, 0.3376, 0.1707]) tensor([0.0818, 0.1703, 0.4051, 0.3429]) -Greedy action tensor([-1.7282, -0.4826, 0.5760, -0.0300]) tensor([0.0501, 0.1741, 0.5019, 0.2738]) -Greedy action tensor([-1.7611, -0.4634, 0.7952, 0.3998]) tensor([0.0381, 0.1396, 0.4914, 0.3309]) -Greedy action tensor([-1.5520, -0.5109, 0.4779, 0.1084]) tensor([0.0599, 0.1695, 0.4557, 0.3149]) -Greedy action tensor([-1.8387, -0.4919, 0.7420, 0.0050]) tensor([0.0410, 0.1578, 0.5419, 0.2593]) -Greedy action tensor([-0.6583, -0.2328, 0.6431, 1.3189]) tensor([0.0745, 0.1140, 0.2737, 0.5379]) -Greedy action tensor([-1.2146, -0.5723, 0.2960, 0.2528]) tensor([0.0850, 0.1615, 0.3849, 0.3686]) -Greedy action tensor([-1.5798, -0.5798, 1.4189, 1.1474]) tensor([0.0256, 0.0696, 0.5135, 0.3914]) -Greedy action tensor([-1.7377, -0.5334, 0.5412, -0.1064]) tensor([0.0521, 0.1736, 0.5084, 0.2660]) -Greedy action tensor([-1.1969, -0.5693, 0.3010, 0.2990]) tensor([0.0847, 0.1586, 0.3787, 0.3780]) -Greedy action tensor([-1.9722, -0.8539, 0.1469, -0.3130]) tensor([0.0567, 0.1735, 0.4719, 0.2979]) -Greedy action tensor([-1.7185, -0.7670, 0.1971, -0.4852]) tensor([0.0724, 0.1875, 0.4916, 0.2485]) -Greedy action tensor([-0.8181, -0.5559, 0.5428, 0.6653]) tensor([0.0943, 0.1225, 0.3676, 0.4156]) -Greedy action tensor([-1.2561, -0.6136, 0.5076, 0.2052]) tensor([0.0766, 0.1457, 0.4471, 0.3305]) -Greedy action tensor([-1.7657, -0.8229, -0.1166, -0.4080]) tensor([0.0790, 0.2028, 0.4110, 0.3071]) -Greedy action tensor([-1.2397, -0.2772, -0.0907, -0.3863]) tensor([0.1096, 0.2871, 0.3459, 0.2574]) -Greedy action tensor([-1.6949, -0.3678, 0.6289, 0.0528]) tensor([0.0483, 0.1819, 0.4928, 0.2770]) -Greedy action tensor([-1.2781, -0.0258, 0.7784, 0.9413]) tensor([0.0465, 0.1626, 0.3633, 0.4276]) -Greedy action tensor([-1.3983, -0.4603, 0.7312, -0.5451]) tensor([0.0699, 0.1785, 0.5876, 0.1640]) -Greedy action tensor([-1.6923, -0.4921, 0.6268, -0.2586]) tensor([0.0535, 0.1778, 0.5442, 0.2245]) -Greedy action tensor([-1.9139, -0.7314, 0.7601, 0.0235]) tensor([0.0389, 0.1269, 0.5641, 0.2701]) -Greedy action tensor([-1.4801, 0.5897, 0.4543, 0.1975]) tensor([0.0472, 0.3738, 0.3265, 0.2525]) -Greedy action tensor([-0.5099, -0.4051, 0.2319, 0.6027]) tensor([0.1379, 0.1531, 0.2895, 0.4195]) -Greedy action tensor([-1.3720, -0.5928, 0.4531, 0.1791]) tensor([0.0709, 0.1546, 0.4400, 0.3345]) -Greedy action tensor([-1.1969, -0.2015, 0.7736, 1.0347]) tensor([0.0495, 0.1340, 0.3553, 0.4612]) -Greedy action tensor([-1.7713, -0.9148, 0.1575, -0.5322]) tensor([0.0731, 0.1720, 0.5027, 0.2522]) -Greedy action tensor([-1.6286, -0.5296, 0.4934, 0.0472]) tensor([0.0565, 0.1696, 0.4718, 0.3020]) -Greedy action tensor([-1.6940, -0.4716, 0.5435, -0.0306]) tensor([0.0525, 0.1783, 0.4920, 0.2771]) -Greedy action tensor([-1.7371, -0.7150, -0.0236, -0.4218]) tensor([0.0766, 0.2129, 0.4251, 0.2854]) -Greedy action tensor([-1.5149, -0.7584, -0.2652, -0.4497]) tensor([0.1050, 0.2238, 0.3665, 0.3047]) -Greedy action tensor([-1.6611, -0.8268, 0.0019, -0.4361]) tensor([0.0835, 0.1922, 0.4402, 0.2841]) -Greedy action tensor([-1.0083, -0.1396, -0.3565, -0.3296]) tensor([0.1375, 0.3277, 0.2638, 0.2710]) -Greedy action tensor([-1.2196, -0.5001, 0.3725, 0.5725]) tensor([0.0716, 0.1470, 0.3518, 0.4297]) -Greedy action tensor([ 0.4395, -0.2215, 0.1386, -0.3333]) tensor([0.3679, 0.1900, 0.2723, 0.1699]) -Greedy action tensor([ 0.1875, 0.2111, -0.0964, -0.3785]) tensor([0.2990, 0.3061, 0.2251, 0.1698]) -Greedy action tensor([ 0.3822, -0.1635, -0.1535, -0.3482]) tensor([0.3779, 0.2190, 0.2212, 0.1820]) -Greedy action tensor([ 1.0935, -0.5505, -0.0562, -0.5375]) tensor([0.5863, 0.1133, 0.1857, 0.1148]) -Greedy action tensor([ 0.9738, -0.7925, -0.0875, -0.4119]) tensor([0.5659, 0.0967, 0.1958, 0.1416]) -Greedy action tensor([ 0.5468, -0.0659, 0.0541, -0.4648]) tensor([0.3974, 0.2153, 0.2428, 0.1445]) -Greedy action tensor([ 0.6392, -0.4296, -0.1570, -0.3782]) tensor([0.4638, 0.1593, 0.2092, 0.1677]) -Greedy action tensor([ 0.5035, -0.4404, -0.1008, -0.4397]) tensor([0.4301, 0.1674, 0.2350, 0.1675]) -Greedy action tensor([ 0.6117, -0.1148, -0.0214, -0.2774]) tensor([0.4123, 0.1994, 0.2189, 0.1695]) -Greedy action tensor([ 0.4917, -0.0844, 0.0139, -0.0843]) tensor([0.3644, 0.2048, 0.2260, 0.2048]) -Greedy action tensor([ 0.4347, -0.1319, -0.1150, -0.3181]) tensor([0.3823, 0.2169, 0.2207, 0.1801]) -Greedy action tensor([ 0.7062, -0.2669, -0.1244, -0.3940]) tensor([0.4659, 0.1761, 0.2030, 0.1550]) -Greedy action tensor([ 0.5653, -0.1589, 0.0386, -0.2326]) tensor([0.3960, 0.1919, 0.2338, 0.1783]) -Greedy action tensor([ 0.8264, -0.4950, 0.0019, -0.6976]) tensor([0.5200, 0.1387, 0.2280, 0.1133]) -Greedy action tensor([ 0.6254, -0.3339, 0.0143, -0.3108]) tensor([0.4314, 0.1653, 0.2341, 0.1692]) -Greedy action tensor([ 0.6243, 0.3247, -0.1346, -0.1867]) tensor([0.3768, 0.2793, 0.1764, 0.1675]) -Greedy action tensor([ 0.4854, -0.3156, 0.0994, -0.4954]) tensor([0.3994, 0.1793, 0.2715, 0.1498]) -Greedy action tensor([ 0.3186, -0.1166, 0.1306, -0.3514]) tensor([0.3347, 0.2166, 0.2774, 0.1713]) -Greedy action tensor([ 0.9695, -0.5041, 0.0689, -0.3206]) tensor([0.5234, 0.1199, 0.2127, 0.1441]) -Greedy action tensor([ 0.6130, -0.1282, -0.1384, -0.5290]) tensor([0.4410, 0.2102, 0.2080, 0.1408]) -Greedy action tensor([ 0.6322, -0.2657, 0.0014, -0.5934]) tensor([0.4478, 0.1824, 0.2383, 0.1315]) -Greedy action tensor([ 0.6810, -0.2608, 0.2370, -0.4829]) tensor([0.4267, 0.1664, 0.2737, 0.1332]) -Greedy action tensor([ 0.4682, -0.1848, -0.0471, -0.1627]) tensor([0.3774, 0.1964, 0.2254, 0.2008]) -Greedy action tensor([ 0.6975, -0.1488, -0.0493, -0.1324]) tensor([0.4275, 0.1834, 0.2026, 0.1864]) -Greedy action tensor([ 0.3107, 0.1884, 0.0475, -0.2388]) tensor([0.3095, 0.2739, 0.2379, 0.1787]) -Greedy action tensor([ 0.6219, -0.1985, -0.0247, -0.3656]) tensor([0.4280, 0.1884, 0.2242, 0.1594]) -Greedy action tensor([ 0.6402, 0.0655, -0.0499, -0.2520]) tensor([0.4042, 0.2275, 0.2027, 0.1656]) -Greedy action tensor([ 0.5374, -0.3900, -0.0827, -0.4137]) tensor([0.4311, 0.1705, 0.2319, 0.1665]) -Greedy action tensor([ 0.3989, 0.0200, 0.0330, -0.2720]) tensor([0.3461, 0.2369, 0.2400, 0.1769]) -Greedy action tensor([ 0.5728, -0.1960, -0.0496, -0.1879]) tensor([0.4053, 0.1879, 0.2175, 0.1894]) -Greedy action tensor([ 0.6822, -0.4127, 0.0911, -0.3411]) tensor([0.4449, 0.1489, 0.2464, 0.1599]) -Greedy action tensor([ 0.4219, -0.0467, -0.0330, -0.3045]) tensor([0.3644, 0.2281, 0.2312, 0.1763]) -Greedy action tensor([ 0.7080, -0.1932, -0.0478, -0.3733]) tensor([0.4515, 0.1833, 0.2120, 0.1531]) -Greedy action tensor([ 0.4078, 0.0249, 0.0144, -0.1596]) tensor([0.3421, 0.2332, 0.2308, 0.1939]) -Greedy action tensor([ 0.8791, -0.5942, -0.0990, -0.4622]) tensor([0.5357, 0.1228, 0.2014, 0.1401]) -Greedy action tensor([ 0.9401, -0.5397, -0.0502, -0.5033]) tensor([0.5449, 0.1241, 0.2024, 0.1287]) -Greedy action tensor([ 0.7346, -0.4464, 0.0621, -0.5177]) tensor([0.4755, 0.1459, 0.2427, 0.1359]) -Greedy action tensor([ 0.4979, -0.0494, -0.0728, -0.0762]) tensor([0.3694, 0.2137, 0.2088, 0.2081]) -Greedy action tensor([ 0.3072, 0.1731, -0.0004, -0.2971]) tensor([0.3168, 0.2771, 0.2329, 0.1731]) -Greedy action tensor([ 0.5348, -0.2332, 0.1537, -0.4034]) tensor([0.3940, 0.1828, 0.2691, 0.1542]) -Greedy action tensor([ 0.7273, -0.3290, 0.0226, -0.7623]) tensor([0.4837, 0.1682, 0.2391, 0.1091]) -Greedy action tensor([ 0.2789, 0.2596, -0.0362, -0.1651]) tensor([0.2983, 0.2926, 0.2177, 0.1914]) -Greedy action tensor([ 0.9123, -0.7855, 0.0149, -0.6685]) tensor([0.5566, 0.1019, 0.2269, 0.1146]) -Greedy action tensor([ 0.9495, -0.4861, -0.1016, -0.4386]) tensor([0.5444, 0.1295, 0.1903, 0.1358]) -Greedy action tensor([ 0.6153, 0.1058, -0.0122, -0.2863]) tensor([0.3936, 0.2365, 0.2101, 0.1598]) -Greedy action tensor([ 0.5233, 0.1514, -0.1512, -0.0519]) tensor([0.3621, 0.2497, 0.1845, 0.2037]) -Greedy action tensor([ 0.8202, -0.0928, -0.0067, -0.1612]) tensor([0.4518, 0.1813, 0.1976, 0.1693]) -Greedy action tensor([ 0.1925, 0.0474, -0.0651, -0.0035]) tensor([0.2890, 0.2500, 0.2234, 0.2376]) -Greedy action tensor([ 0.2488, 0.2841, -0.0645, -0.0622]) tensor([0.2857, 0.2960, 0.2089, 0.2094]) -Greedy action tensor([ 0.5082, 0.2444, -0.0693, -0.0245]) tensor([0.3429, 0.2634, 0.1925, 0.2013]) -Greedy action tensor([ 0.6006, -0.2802, 0.1120, -0.3974]) tensor([0.4173, 0.1729, 0.2560, 0.1538]) -Greedy action tensor([ 0.5525, -0.3336, -0.1153, -0.3942]) tensor([0.4323, 0.1782, 0.2217, 0.1677]) -Greedy action tensor([ 0.8320, -0.5243, -0.0957, -0.3556]) tensor([0.5107, 0.1316, 0.2020, 0.1557]) -Greedy action tensor([ 0.6503, -0.0016, -0.1201, -0.3265]) tensor([0.4236, 0.2208, 0.1961, 0.1595]) -Greedy action tensor([ 0.5472, -0.0705, 0.0013, -0.2041]) tensor([0.3861, 0.2082, 0.2237, 0.1821]) -Greedy action tensor([ 0.8736, -0.7842, -0.2141, -0.8456]) tensor([0.5859, 0.1116, 0.1974, 0.1050]) -Greedy action tensor([ 0.4928, -0.3670, -0.0402, -0.4219]) tensor([0.4148, 0.1756, 0.2434, 0.1662]) -Greedy action tensor([ 0.6351, -0.0103, -0.0109, -0.3456]) tensor([0.4126, 0.2164, 0.2163, 0.1548]) -Greedy action tensor([ 0.4701, -0.3700, -0.0816, -0.4907]) tensor([0.4184, 0.1806, 0.2410, 0.1601]) -Greedy action tensor([ 0.6731, -0.1972, -0.0350, 0.0123]) tensor([0.4119, 0.1725, 0.2029, 0.2127]) -Greedy action tensor([ 0.5067, -0.2832, -0.0166, -0.3078]) tensor([0.4017, 0.1823, 0.2381, 0.1779]) -Greedy action tensor([ 0.6014, -0.1991, 0.0498, -0.1234]) tensor([0.3985, 0.1790, 0.2295, 0.1930]) -Greedy action tensor([ 0.5226, -0.3458, -0.1247, -0.4390]) tensor([0.4300, 0.1805, 0.2251, 0.1644]) -Greedy action tensor([ 0.7145, -0.3379, -0.1680, -0.4147]) tensor([0.4793, 0.1673, 0.1983, 0.1550]) -Greedy action tensor([ 0.3885, -0.0160, 0.0072, -0.3896]) tensor([0.3559, 0.2375, 0.2431, 0.1635]) -Greedy action tensor([ 0.5194, -0.1907, 0.2705, -0.4415]) tensor([0.3768, 0.1852, 0.2938, 0.1441]) -Greedy action tensor([ 0.2793, 0.0457, 0.0513, -0.1756]) tensor([0.3103, 0.2457, 0.2471, 0.1969]) -Greedy action tensor([ 0.4499, -0.0392, -0.0392, -0.1828]) tensor([0.3626, 0.2224, 0.2224, 0.1926]) -Greedy action tensor([ 0.3273, 0.0632, 0.0215, -0.1173]) tensor([0.3179, 0.2441, 0.2341, 0.2038]) -Greedy action tensor([ 0.5030, -0.2583, 0.0153, -0.3410]) tensor([0.3982, 0.1860, 0.2445, 0.1712]) -Greedy action tensor([ 0.4774, 0.1256, -0.1116, -0.2017]) tensor([0.3616, 0.2544, 0.2007, 0.1834]) -Greedy action tensor([ 0.3924, 0.0676, -0.0224, 0.0181]) tensor([0.3256, 0.2353, 0.2151, 0.2240]) -Greedy action tensor([ 0.6812, -0.4792, 0.0575, -0.4552]) tensor([0.4608, 0.1444, 0.2469, 0.1479]) -Greedy action tensor([ 0.4733, 0.0275, -0.0371, -0.1621]) tensor([0.3610, 0.2311, 0.2167, 0.1912]) -Greedy action tensor([ 0.5535, -0.2154, 0.0235, -0.3621]) tensor([0.4078, 0.1890, 0.2400, 0.1632]) -Greedy action tensor([ 1.0560, -0.4428, 0.1099, -0.5072]) tensor([0.5491, 0.1227, 0.2132, 0.1150]) -Greedy action tensor([ 0.7096, -0.2818, 0.0199, -0.2733]) tensor([0.4450, 0.1651, 0.2233, 0.1665]) -Greedy action tensor([ 0.6672, 0.0305, -0.1233, -0.1638]) tensor([0.4135, 0.2188, 0.1876, 0.1801]) -Greedy action tensor([ 0.6637, -0.1224, 0.0576, -0.2704]) tensor([0.4177, 0.1903, 0.2278, 0.1641]) -Greedy action tensor([ 0.5861, -0.1502, -0.1074, -0.3236]) tensor([0.4199, 0.2011, 0.2099, 0.1691]) -Greedy action tensor([ 0.9107, -0.0477, 0.0591, -1.1418]) tensor([0.5158, 0.1978, 0.2201, 0.0662]) -Greedy action tensor([ 1.5002, -0.4865, 0.0333, 2.0209]) tensor([0.3278, 0.0450, 0.0756, 0.5517]) -Greedy action tensor([1.0909, 0.3501, 0.0732, 0.7135]) tensor([0.3962, 0.1889, 0.1432, 0.2717]) -Greedy action tensor([-0.2298, -2.3277, -0.4286, 1.1364]) tensor([0.1706, 0.0209, 0.1398, 0.6687]) -Greedy action tensor([ 0.6027, -0.1011, 0.0699, 0.6260]) tensor([0.3220, 0.1593, 0.1890, 0.3296]) -Greedy action tensor([ 0.3066, -0.3335, -0.2227, 0.3730]) tensor([0.3140, 0.1655, 0.1849, 0.3355]) -Greedy action tensor([-0.1548, -0.8712, -0.6112, 2.0655]) tensor([0.0882, 0.0431, 0.0559, 0.8127]) -Greedy action tensor([ 1.2162, -0.4770, 0.5248, 1.8092]) tensor([0.2862, 0.0526, 0.1433, 0.5178]) -Greedy action tensor([-0.3060, 1.0964, -0.2023, -0.3544]) tensor([0.1403, 0.5704, 0.1557, 0.1337]) -Greedy action tensor([ 1.1009, 0.1938, 0.7312, -0.1447]) tensor([0.4197, 0.1694, 0.2900, 0.1208]) -Greedy action tensor([ 0.6890, -0.4393, -0.1292, 0.3629]) tensor([0.4022, 0.1301, 0.1775, 0.2902]) -Greedy action tensor([-0.4891, -1.0750, 0.8189, 0.0234]) tensor([0.1444, 0.0804, 0.5341, 0.2411]) -Greedy action tensor([-0.7558, -0.6183, -1.6919, 0.0449]) tensor([0.2098, 0.2407, 0.0823, 0.4672]) -Greedy action tensor([-0.2925, -1.1692, 0.7131, 0.9908]) tensor([0.1289, 0.0536, 0.3523, 0.4651]) -Greedy action tensor([-0.7921, -0.8144, 1.0033, -0.1322]) tensor([0.1007, 0.0984, 0.6062, 0.1947]) -Greedy action tensor([0.6560, 0.7168, 0.8217, 0.5295]) tensor([0.2425, 0.2577, 0.2862, 0.2137]) -Greedy action tensor([-0.4865, 1.4109, 0.9454, -0.7547]) tensor([0.0792, 0.5284, 0.3318, 0.0606]) -Greedy action tensor([-0.4240, 0.1339, -0.5543, 1.0806]) tensor([0.1230, 0.2150, 0.1080, 0.5540]) -Greedy action tensor([0.5116, 0.5986, 0.7818, 0.4258]) tensor([0.2315, 0.2526, 0.3034, 0.2125]) -Greedy action tensor([ 1.6100, -0.7245, -0.3323, 1.1349]) tensor([0.5371, 0.0520, 0.0770, 0.3339]) -Greedy action tensor([ 0.8124, -0.6304, -0.8823, 0.6929]) tensor([0.4334, 0.1024, 0.0796, 0.3846]) -Greedy action tensor([ 0.9743, -0.1029, 0.5984, 1.0380]) tensor([0.3233, 0.1101, 0.2220, 0.3446]) -Greedy action tensor([ 0.2768, -1.1689, -0.4837, 1.2885]) tensor([0.2246, 0.0529, 0.1050, 0.6176]) -Greedy action tensor([ 0.0431, -0.0922, 1.0205, -1.1583]) tensor([0.2070, 0.1808, 0.5500, 0.0623]) -Greedy action tensor([ 1.9397, -1.2049, 0.2997, 1.0956]) tensor([0.5999, 0.0258, 0.1164, 0.2579]) -Greedy action tensor([ 1.4788, 0.0097, 0.4245, -0.0709]) tensor([0.5584, 0.1285, 0.1946, 0.1186]) -Greedy action tensor([ 1.1315, -0.5969, 1.2737, 1.4622]) tensor([0.2687, 0.0477, 0.3097, 0.3740]) -Greedy action tensor([ 1.0888, -0.9469, 1.2174, 0.0188]) tensor([0.3830, 0.0500, 0.4356, 0.1314]) -Greedy action tensor([ 2.4050, -0.5566, 0.6978, 0.8113]) tensor([0.6962, 0.0360, 0.1263, 0.1415]) -Greedy action tensor([ 1.1772, -0.9353, 0.0283, 0.3926]) tensor([0.5279, 0.0638, 0.1673, 0.2409]) -Greedy action tensor([2.7004, 0.3043, 0.9080, 0.7468]) tensor([0.7146, 0.0651, 0.1190, 0.1013]) -Greedy action tensor([-0.2609, 0.3147, 0.1433, 0.6866]) tensor([0.1459, 0.2594, 0.2185, 0.3762]) -Greedy action tensor([-0.1472, -1.0157, 0.0151, 0.5828]) tensor([0.2141, 0.0898, 0.2518, 0.4442]) -Greedy action tensor([0.6316, 0.0658, 1.3421, 0.2272]) tensor([0.2342, 0.1330, 0.4766, 0.1563]) -Greedy action tensor([ 0.8595, -0.0193, 0.3681, 1.0930]) tensor([0.3040, 0.1262, 0.1859, 0.3839]) -Greedy action tensor([ 0.8790, 0.4555, 1.0890, -0.1301]) tensor([0.3074, 0.2013, 0.3793, 0.1121]) -Greedy action tensor([ 1.4747, -0.9974, 1.1485, 1.1666]) tensor([0.3936, 0.0332, 0.2840, 0.2892]) -Greedy action tensor([1.3099, 0.3047, 0.5878, 0.6742]) tensor([0.4199, 0.1537, 0.2040, 0.2224]) -Greedy action tensor([ 1.0036, -0.0870, -0.4191, 1.1558]) tensor([0.3648, 0.1226, 0.0879, 0.4247]) -Greedy action tensor([0.9974, 0.7743, 0.2310, 0.4797]) tensor([0.3496, 0.2797, 0.1624, 0.2083]) -Greedy action tensor([ 0.7422, -0.4197, 1.3185, 0.2764]) tensor([0.2688, 0.0841, 0.4784, 0.1687]) -Greedy action tensor([ 0.8328, -1.7579, 0.0285, 0.6607]) tensor([0.4230, 0.0317, 0.1892, 0.3561]) -Greedy action tensor([ 0.7659, -1.7108, -1.0117, 0.8869]) tensor([0.4199, 0.0353, 0.0710, 0.4739]) -Greedy action tensor([ 0.7624, -0.3613, 0.5156, 0.5845]) tensor([0.3397, 0.1104, 0.2654, 0.2844]) -Greedy action tensor([ 0.5756, -0.1654, 0.8287, 0.0334]) tensor([0.2988, 0.1424, 0.3849, 0.1738]) -Greedy action tensor([ 0.8732, -0.3675, 1.4057, 0.8646]) tensor([0.2510, 0.0726, 0.4275, 0.2489]) -Greedy action tensor([ 1.1530, -0.7033, 0.0519, 1.2827]) tensor([0.3806, 0.0595, 0.1266, 0.4333]) -Greedy action tensor([ 0.7259, -0.6140, -0.7077, 1.4949]) tensor([0.2734, 0.0716, 0.0652, 0.5898]) -Greedy action tensor([ 1.3452, 0.7202, -0.2918, 1.6384]) tensor([0.3257, 0.1743, 0.0634, 0.4366]) -Greedy action tensor([ 0.9229, -1.4627, 0.8023, 0.0142]) tensor([0.4199, 0.0386, 0.3722, 0.1693]) -Greedy action tensor([1.4331, 0.2585, 1.3841, 0.5312]) tensor([0.3750, 0.1158, 0.3570, 0.1522]) -Greedy action tensor([ 0.0652, -0.6434, 0.4637, 1.2940]) tensor([0.1563, 0.0769, 0.2328, 0.5340]) -Greedy action tensor([-0.1180, -1.8095, 0.4783, 0.9315]) tensor([0.1708, 0.0315, 0.3100, 0.4878]) -Greedy action tensor([-0.9142, 0.3398, 0.4116, 1.2339]) tensor([0.0594, 0.2081, 0.2236, 0.5089]) -Greedy action tensor([ 1.0799, -0.1429, 1.5657, 0.1475]) tensor([0.3018, 0.0889, 0.4905, 0.1188]) -Greedy action tensor([ 0.5116, 0.4101, -0.8341, 0.7867]) tensor([0.2873, 0.2596, 0.0748, 0.3783]) -Greedy action tensor([ 0.8978, -0.1018, 1.0827, -0.1169]) tensor([0.3409, 0.1255, 0.4101, 0.1236]) -Greedy action tensor([ 1.0366, 0.3716, -0.7533, 1.4637]) tensor([0.3111, 0.1600, 0.0520, 0.4769]) -Greedy action tensor([ 0.7133, -1.1759, 1.3260, 1.3072]) tensor([0.2080, 0.0314, 0.3839, 0.3767]) -Greedy action tensor([-1.2630, 0.2132, 0.4519, -0.8886]) tensor([0.0807, 0.3533, 0.4486, 0.1174]) -Greedy action tensor([ 0.0383, 0.1613, -0.2687, 1.4812]) tensor([0.1408, 0.1593, 0.1036, 0.5962]) -Greedy action tensor([1.2568, 0.1139, 1.3203, 1.3868]) tensor([0.2838, 0.0905, 0.3024, 0.3232]) -Greedy action tensor([ 1.1801, -1.4091, 1.3733, 0.6058]) tensor([0.3507, 0.0263, 0.4255, 0.1975]) -Greedy action tensor([-0.1458, -0.3759, 0.3937, 1.3102]) tensor([0.1282, 0.1019, 0.2199, 0.5500]) -Greedy action tensor([1.3549, 0.1775, 1.5981, 0.2790]) tensor([0.3420, 0.1053, 0.4361, 0.1166]) -Greedy action tensor([-0.2194, -0.0528, 1.8392, 0.2103]) tensor([0.0866, 0.1022, 0.6782, 0.1330]) -Greedy action tensor([ 1.2481, 0.2303, -0.4907, 0.8531]) tensor([0.4523, 0.1635, 0.0795, 0.3047]) -Greedy action tensor([ 1.2624, -0.8778, 0.5675, 0.4456]) tensor([0.4858, 0.0571, 0.2424, 0.2146]) -Greedy action tensor([1.2359, 1.7216, 0.1959, 0.7370]) tensor([0.2789, 0.4532, 0.0986, 0.1693]) -Greedy action tensor([ 1.1179, -0.0325, 0.2078, -0.3894]) tensor([0.5153, 0.1631, 0.2074, 0.1142]) -Greedy action tensor([-0.3040, -1.9531, -0.4104, 1.1735]) tensor([0.1545, 0.0297, 0.1389, 0.6769]) -Greedy action tensor([1.0119, 0.1722, 1.4324, 0.8556]) tensor([0.2625, 0.1133, 0.3997, 0.2245]) -Greedy action tensor([ 0.7664, 0.8111, 0.1150, -0.1099]) tensor([0.3352, 0.3505, 0.1747, 0.1395]) -Greedy action tensor([ 1.3888, -0.6635, 1.3871, -0.6598]) tensor([0.4433, 0.0569, 0.4426, 0.0572]) -Greedy action tensor([ 1.7614, -0.4549, -0.6189, 0.9458]) tensor([0.6083, 0.0663, 0.0563, 0.2691]) -Greedy action tensor([0.4151, 0.8326, 1.4832, 0.7026]) tensor([0.1479, 0.2245, 0.4304, 0.1972]) -Greedy action tensor([0.1317, 1.4856, 0.0356, 0.5167]) tensor([0.1379, 0.5341, 0.1253, 0.2027]) -Greedy action tensor([ 0.1792, 1.2665, 1.1726, -0.0818]) tensor([0.1345, 0.3988, 0.3631, 0.1036]) -Greedy action tensor([-0.5088, -0.8807, -0.5182, 0.2450]) tensor([0.2081, 0.1435, 0.2062, 0.4423]) -Greedy action tensor([ 0.5522, -1.0283, 2.2541, 0.1700]) tensor([0.1356, 0.0279, 0.7439, 0.0926]) -Greedy action tensor([ 1.3644, -1.4731, 1.0605, 1.5071]) tensor([0.3390, 0.0199, 0.2502, 0.3910]) -Greedy action tensor([ 0.9271, 0.7830, -0.0385, -0.1191]) tensor([0.3849, 0.3333, 0.1466, 0.1352]) -Greedy action tensor([ 0.8274, -0.4025, 0.1442, 1.0102]) tensor([0.3336, 0.0975, 0.1684, 0.4005]) -Greedy action tensor([ 1.3135, -0.6101, -0.2907, 0.2286]) tensor([0.5934, 0.0867, 0.1193, 0.2005]) -Greedy action tensor([ 0.7417, -0.1632, -0.2166, -0.0555]) tensor([0.4467, 0.1807, 0.1713, 0.2013]) -Greedy action tensor([ 1.3912, -0.4958, -0.1695, 0.0287]) tensor([0.6182, 0.0937, 0.1298, 0.1583]) -Greedy action tensor([ 1.3623, -0.2917, -0.1926, 0.1700]) tensor([0.5862, 0.1121, 0.1238, 0.1779]) -Greedy action tensor([ 1.3961, -0.5835, -0.3038, 0.0954]) tensor([0.6277, 0.0867, 0.1147, 0.1709]) -Greedy action tensor([ 1.0675, -0.5464, -0.2402, -0.0785]) tensor([0.5594, 0.1114, 0.1513, 0.1779]) -Greedy action tensor([ 1.1618, -0.3966, -0.1164, -0.0196]) tensor([0.5568, 0.1172, 0.1551, 0.1709]) -Greedy action tensor([ 1.0795, -0.3372, -0.3769, 0.2885]) tensor([0.5184, 0.1257, 0.1208, 0.2350]) -Greedy action tensor([ 1.2056, -0.0081, -0.0167, -0.0652]) tensor([0.5341, 0.1587, 0.1573, 0.1499]) -Greedy action tensor([ 1.2421, -0.8007, -0.3354, 0.6093]) tensor([0.5355, 0.0694, 0.1106, 0.2844]) -Greedy action tensor([ 1.2227, -0.3972, -0.1357, 0.0244]) tensor([0.5693, 0.1127, 0.1463, 0.1717]) -Greedy action tensor([ 1.0436, -0.4297, -0.4004, 0.2243]) tensor([0.5247, 0.1202, 0.1238, 0.2312]) -Greedy action tensor([ 1.4813, -0.8104, -0.2589, 0.7056]) tensor([0.5757, 0.0582, 0.1010, 0.2650]) -Greedy action tensor([ 1.0481, -0.5220, -0.2112, 0.1560]) tensor([0.5258, 0.1094, 0.1493, 0.2155]) -Greedy action tensor([ 1.1283, -0.3324, -0.1147, 0.1302]) tensor([0.5293, 0.1228, 0.1527, 0.1951]) -Greedy action tensor([ 0.9683, -0.2322, -0.1954, 0.0128]) tensor([0.5005, 0.1507, 0.1563, 0.1925]) -Greedy action tensor([ 0.8466, -0.4202, 0.0577, 0.0700]) tensor([0.4554, 0.1283, 0.2069, 0.2094]) -Greedy action tensor([ 0.9507, 0.0141, -0.1943, -0.0566]) tensor([0.4818, 0.1889, 0.1533, 0.1760]) -Greedy action tensor([ 1.3310, -0.9975, -0.8430, 0.8386]) tensor([0.5487, 0.0535, 0.0624, 0.3354]) -Greedy action tensor([ 1.2857, -0.2959, -0.5829, 0.0108]) tensor([0.6100, 0.1254, 0.0941, 0.1705]) -Greedy action tensor([ 1.1509, -0.4864, -0.4849, 0.0361]) tensor([0.5823, 0.1133, 0.1134, 0.1910]) -Greedy action tensor([ 1.1275, -0.4564, -0.1459, 0.0721]) tensor([0.5455, 0.1119, 0.1527, 0.1899]) -Greedy action tensor([ 0.8293, -0.2848, -0.2431, 0.0684]) tensor([0.4678, 0.1535, 0.1601, 0.2186]) -Greedy action tensor([ 1.3528, -0.7807, -0.4110, 0.2002]) tensor([0.6228, 0.0738, 0.1067, 0.1967]) -Greedy action tensor([ 1.3809, -0.3953, -0.1462, 0.0977]) tensor([0.6011, 0.1018, 0.1305, 0.1666]) -Greedy action tensor([ 0.6703, -0.4307, -0.1018, 0.2767]) tensor([0.4050, 0.1347, 0.1871, 0.2732]) -Greedy action tensor([ 0.8859, -0.3564, -0.2923, 0.2195]) tensor([0.4739, 0.1368, 0.1459, 0.2434]) -Greedy action tensor([ 1.3238, -0.7058, -0.3456, 0.2849]) tensor([0.5975, 0.0785, 0.1125, 0.2114]) -Greedy action tensor([ 0.8789, -0.0500, -0.3443, -0.3279]) tensor([0.5029, 0.1986, 0.1480, 0.1505]) -Greedy action tensor([ 0.9044, -0.2085, -0.1609, 0.0761]) tensor([0.4739, 0.1557, 0.1633, 0.2070]) -Greedy action tensor([ 0.7063, -0.2432, -0.2393, -0.1237]) tensor([0.4522, 0.1750, 0.1757, 0.1972]) -Greedy action tensor([ 0.5859, -0.3982, -0.0443, 0.2940]) tensor([0.3769, 0.1409, 0.2007, 0.2815]) -Greedy action tensor([ 1.2640, -0.4046, -0.4300, 0.2719]) tensor([0.5737, 0.1081, 0.1054, 0.2127]) -Greedy action tensor([ 1.3997, -0.4104, -0.2142, -0.0206]) tensor([0.6233, 0.1020, 0.1241, 0.1506]) -Greedy action tensor([ 1.5563, -0.6959, -0.3071, 0.3738]) tensor([0.6382, 0.0671, 0.0990, 0.1956]) -Greedy action tensor([ 0.9245, -0.3987, -0.1367, 0.4551]) tensor([0.4469, 0.1190, 0.1546, 0.2795]) -Greedy action tensor([ 0.9985, -0.3446, -0.4129, 0.7082]) tensor([0.4439, 0.1159, 0.1082, 0.3320]) -Greedy action tensor([ 1.2158, -0.5381, 0.0907, 0.0365]) tensor([0.5540, 0.0959, 0.1798, 0.1703]) -Greedy action tensor([ 0.5725, -0.0797, 0.1740, -0.1198]) tensor([0.3714, 0.1935, 0.2493, 0.1859]) -Greedy action tensor([ 0.9654, -0.7548, -0.4762, 0.2435]) tensor([0.5259, 0.0942, 0.1244, 0.2555]) -Greedy action tensor([ 0.6672, -0.5183, -0.0926, 0.0371]) tensor([0.4337, 0.1325, 0.2029, 0.2309]) -Greedy action tensor([ 1.8365, -0.0886, -0.4817, 0.3333]) tensor([0.6818, 0.0994, 0.0671, 0.1516]) -Greedy action tensor([ 0.9643, -0.5334, -0.0322, -0.0733]) tensor([0.5136, 0.1149, 0.1896, 0.1820]) -Greedy action tensor([ 0.8745, -0.3193, -0.6336, 0.1467]) tensor([0.4982, 0.1510, 0.1103, 0.2406]) -Greedy action tensor([ 1.1455, -0.3922, -0.3199, 0.1776]) tensor([0.5477, 0.1177, 0.1265, 0.2081]) -Greedy action tensor([ 0.8288, -0.1234, -0.1603, -0.4340]) tensor([0.4900, 0.1891, 0.1822, 0.1386]) -Greedy action tensor([ 0.8199, -0.2967, -0.1889, 0.0584]) tensor([0.4632, 0.1516, 0.1689, 0.2163]) -Greedy action tensor([ 1.1971, -0.5869, -0.2009, 0.1667]) tensor([0.5644, 0.0948, 0.1395, 0.2014]) -Greedy action tensor([ 0.7623, -0.2847, -0.0423, -0.2548]) tensor([0.4630, 0.1625, 0.2071, 0.1674]) -Greedy action tensor([ 1.2076, -0.7733, -0.4169, 0.4164]) tensor([0.5592, 0.0771, 0.1102, 0.2535]) -Greedy action tensor([ 1.1300, -0.5130, 0.0522, 0.2190]) tensor([0.5166, 0.0999, 0.1758, 0.2077]) -Greedy action tensor([ 0.7462, -0.4426, -0.0875, 0.1408]) tensor([0.4377, 0.1333, 0.1901, 0.2389]) -Greedy action tensor([ 0.7657, -0.3700, -0.3569, -0.1363]) tensor([0.4872, 0.1565, 0.1586, 0.1977]) -Greedy action tensor([ 1.3439, -0.5019, -0.3703, 0.1468]) tensor([0.6097, 0.0963, 0.1098, 0.1842]) -Greedy action tensor([ 1.2030, -0.3665, -0.2839, 0.2222]) tensor([0.5527, 0.1151, 0.1249, 0.2073]) -Greedy action tensor([ 1.0798, -0.3331, -0.3399, 0.0518]) tensor([0.5426, 0.1321, 0.1312, 0.1941]) -Greedy action tensor([ 0.9849, -0.2390, -0.0591, 0.0415]) tensor([0.4913, 0.1445, 0.1730, 0.1913]) -Greedy action tensor([ 0.9192, -0.3578, -0.1247, 0.1719]) tensor([0.4751, 0.1325, 0.1673, 0.2251]) -Greedy action tensor([ 0.6967, -0.1230, -0.1225, -0.4552]) tensor([0.4551, 0.2005, 0.2006, 0.1438]) -Greedy action tensor([ 1.1464, -0.3564, -0.4028, 0.4340]) tensor([0.5194, 0.1156, 0.1103, 0.2547]) -Greedy action tensor([ 1.0751, -0.5916, -0.4255, 0.3284]) tensor([0.5303, 0.1002, 0.1183, 0.2513]) -Greedy action tensor([ 1.3253, -0.7083, 0.1011, 0.0845]) tensor([0.5834, 0.0763, 0.1715, 0.1687]) -Greedy action tensor([ 0.9773, -0.2671, -0.0470, -0.1414]) tensor([0.5066, 0.1460, 0.1819, 0.1655]) -Greedy action tensor([ 1.4319, -0.4089, -0.3490, -0.2207]) tensor([0.6585, 0.1045, 0.1109, 0.1261]) -Greedy action tensor([ 1.0970, -0.6610, 0.2216, 0.0198]) tensor([0.5182, 0.0893, 0.2159, 0.1765]) -Greedy action tensor([ 0.5070, -0.4271, -0.3729, -0.0888]) tensor([0.4239, 0.1666, 0.1759, 0.2336]) -Greedy action tensor([ 1.3513, -0.6272, -0.2066, 0.3208]) tensor([0.5863, 0.0811, 0.1235, 0.2092]) -Greedy action tensor([ 1.1086, -0.4220, -0.5185, 0.7077]) tensor([0.4802, 0.1039, 0.0943, 0.3216]) -Greedy action tensor([ 1.3803, -0.4340, -0.3145, 0.1098]) tensor([0.6145, 0.1001, 0.1129, 0.1725]) -Greedy action tensor([ 0.8503, -0.2311, -0.0015, -0.1726]) tensor([0.4705, 0.1596, 0.2008, 0.1692]) -Greedy action tensor([ 0.5453, -0.2987, 0.1340, 0.0628]) tensor([0.3690, 0.1587, 0.2446, 0.2278]) -Greedy action tensor([ 1.1939, -0.2353, -0.2607, 0.3498]) tensor([0.5255, 0.1259, 0.1227, 0.2259]) -Greedy action tensor([ 0.5988, 0.0044, -0.4132, 0.1950]) tensor([0.3871, 0.2137, 0.1407, 0.2585]) -Greedy action tensor([ 1.0380, -0.3243, -0.4169, -0.1116]) tensor([0.5536, 0.1418, 0.1292, 0.1754]) -Greedy action tensor([ 0.7969, -0.4653, -0.1524, 0.2721]) tensor([0.4421, 0.1251, 0.1711, 0.2616]) -Greedy action tensor([ 0.4569, -0.1774, -0.0073, -0.0709]) tensor([0.3638, 0.1929, 0.2287, 0.2146]) -Greedy action tensor([ 1.1618, -0.4483, -0.3171, 0.0186]) tensor([0.5725, 0.1144, 0.1305, 0.1825]) -Greedy action tensor([ 1.7863, -0.3019, -0.1524, 0.1411]) tensor([0.6846, 0.0848, 0.0985, 0.1321]) -Greedy action tensor([ 1.1312, -0.2252, -0.4041, 0.2232]) tensor([0.5330, 0.1373, 0.1148, 0.2150]) -Greedy action tensor([ 1.0767, -0.5356, -0.1652, 0.1284]) tensor([0.5332, 0.1063, 0.1540, 0.2065]) -Greedy action tensor([ 1.1857, -0.4802, -0.4442, -0.0968]) tensor([0.6016, 0.1137, 0.1179, 0.1668]) -Greedy action tensor([-1.5428, -0.2375, 0.8661, 0.8168]) tensor([0.0379, 0.1397, 0.4213, 0.4010]) -Greedy action tensor([-0.7513, -0.5011, -0.3902, -0.2269]) tensor([0.1849, 0.2374, 0.2653, 0.3124]) -Greedy action tensor([-1.3410, -0.5182, 0.4720, 0.4882]) tensor([0.0640, 0.1456, 0.3920, 0.3984]) -Greedy action tensor([-1.6107, -0.2631, 0.5866, 0.2014]) tensor([0.0501, 0.1927, 0.4507, 0.3066]) -Greedy action tensor([-0.4922, 0.0276, 0.7631, 1.5120]) tensor([0.0735, 0.1236, 0.2578, 0.5452]) -Greedy action tensor([-0.0248, -0.1111, 0.9108, 1.7251]) tensor([0.0978, 0.0898, 0.2494, 0.5630]) -Greedy action tensor([-1.9204, -0.4068, 0.6418, -0.1637]) tensor([0.0412, 0.1870, 0.5335, 0.2384]) -Greedy action tensor([-1.3732, -0.3808, 0.6183, 0.7328]) tensor([0.0520, 0.1402, 0.3808, 0.4270]) -Greedy action tensor([-0.7582, -0.6187, 0.2017, 0.2269]) tensor([0.1344, 0.1545, 0.3510, 0.3600]) -Greedy action tensor([-0.4662, -0.1083, 0.9194, 1.5849]) tensor([0.0704, 0.1007, 0.2814, 0.5475]) -Greedy action tensor([-1.2238, -0.5653, 0.3121, 0.3001]) tensor([0.0822, 0.1588, 0.3818, 0.3772]) -Greedy action tensor([-1.6312, 0.0831, 0.4334, 0.0348]) tensor([0.0507, 0.2815, 0.3996, 0.2682]) -Greedy action tensor([-1.6164, -0.4482, 0.7522, 0.6634]) tensor([0.0405, 0.1304, 0.4329, 0.3962]) -Greedy action tensor([-1.8647, -0.9875, 0.4054, -0.1722]) tensor([0.0540, 0.1298, 0.5228, 0.2934]) -Greedy action tensor([-0.8217, 0.0075, 0.3210, 0.9051]) tensor([0.0830, 0.1902, 0.2602, 0.4666]) -Greedy action tensor([-1.1844, -0.5669, 0.3839, 0.0842]) tensor([0.0892, 0.1654, 0.4281, 0.3172]) -Greedy action tensor([-1.1432, -0.5387, 0.3206, -0.1806]) tensor([0.1023, 0.1873, 0.4423, 0.2680]) -Greedy action tensor([-1.8860, -0.4498, 1.0306, 0.5219]) tensor([0.0287, 0.1208, 0.5311, 0.3193]) -Greedy action tensor([-1.5653, -0.5997, 0.7132, -0.1970]) tensor([0.0577, 0.1517, 0.5637, 0.2269]) -Greedy action tensor([-1.9757, -0.6294, 0.8264, 0.0263]) tensor([0.0348, 0.1338, 0.5737, 0.2577]) -Greedy action tensor([-1.4842, -0.5446, 0.4180, 0.1334]) tensor([0.0654, 0.1672, 0.4379, 0.3295]) -Greedy action tensor([-1.5601, -0.1290, 0.3076, 0.0361]) tensor([0.0603, 0.2522, 0.3902, 0.2974]) -Greedy action tensor([-0.2431, -0.0369, 0.8916, 1.7072]) tensor([0.0808, 0.0994, 0.2514, 0.5684]) -Greedy action tensor([-1.1502, -0.3656, 0.5312, 0.9460]) tensor([0.0599, 0.1312, 0.3217, 0.4871]) -Greedy action tensor([-1.2942, -0.5837, 0.3283, 0.2044]) tensor([0.0795, 0.1618, 0.4028, 0.3559]) -Greedy action tensor([-1.7060, -0.4060, 0.5373, -0.0464]) tensor([0.0517, 0.1896, 0.4870, 0.2717]) -Greedy action tensor([-0.8691, -0.5863, 0.2514, 0.3926]) tensor([0.1120, 0.1487, 0.3436, 0.3957]) -Greedy action tensor([-1.3624, -0.5825, 0.3693, -0.2564]) tensor([0.0844, 0.1840, 0.4767, 0.2550]) -Greedy action tensor([-1.4132, -0.0153, 0.2880, 0.1797]) tensor([0.0647, 0.2620, 0.3548, 0.3184]) -Greedy action tensor([-1.6114, -0.5160, 1.0528, 0.7577]) tensor([0.0344, 0.1030, 0.4945, 0.3681]) -Greedy action tensor([-1.2002, -0.0215, 0.6584, 0.9447]) tensor([0.0521, 0.1692, 0.3340, 0.4447]) -Greedy action tensor([-1.4282, -0.5701, 0.4522, 0.0603]) tensor([0.0697, 0.1644, 0.4570, 0.3088]) -Greedy action tensor([-1.7102, -0.5294, 0.5583, -0.0155]) tensor([0.0516, 0.1682, 0.4990, 0.2812]) -Greedy action tensor([-2.0278, -0.9693, 0.4526, -0.2294]) tensor([0.0457, 0.1318, 0.5463, 0.2762]) -Greedy action tensor([-1.6391, -0.5082, 0.4914, 0.0522]) tensor([0.0557, 0.1727, 0.4692, 0.3024]) -Greedy action tensor([-0.7879, -0.4859, 1.2259, 1.5640]) tensor([0.0491, 0.0665, 0.3682, 0.5162]) -Greedy action tensor([-1.4831, -0.5741, 0.4865, -0.0374]) tensor([0.0671, 0.1666, 0.4812, 0.2850]) -Greedy action tensor([-1.1154, -0.6545, 0.3030, 0.1236]) tensor([0.0983, 0.1559, 0.4062, 0.3395]) -Greedy action tensor([-0.8575, -0.2987, 0.2162, 0.3466]) tensor([0.1110, 0.1941, 0.3248, 0.3701]) -Greedy action tensor([-1.1682, -0.3099, 0.6471, -0.6551]) tensor([0.0895, 0.2112, 0.5498, 0.1495]) -Greedy action tensor([-1.7971, -0.4705, 0.2675, -0.1493]) tensor([0.0560, 0.2111, 0.4417, 0.2911]) -Greedy action tensor([-0.6456, 0.0517, 0.0462, 0.2138]) tensor([0.1357, 0.2726, 0.2711, 0.3206]) -Greedy action tensor([-1.5589, -0.4529, 1.1636, 1.0465]) tensor([0.0305, 0.0922, 0.4643, 0.4130]) -Greedy action tensor([-1.8549, -0.5142, 1.1550, 0.6255]) tensor([0.0270, 0.1031, 0.5475, 0.3224]) -Greedy action tensor([-1.2181, -0.5611, 0.4386, 0.7651]) tensor([0.0648, 0.1250, 0.3396, 0.4707]) -Greedy action tensor([-1.4280, -0.3791, -0.0415, -0.4527]) tensor([0.0952, 0.2717, 0.3808, 0.2524]) -Greedy action tensor([-1.4963, -0.5863, 0.4891, 0.0841]) tensor([0.0640, 0.1590, 0.4661, 0.3109]) -Greedy action tensor([-1.6150, -0.5501, 0.6635, 0.3638]) tensor([0.0479, 0.1388, 0.4672, 0.3462]) -Greedy action tensor([-1.1577, -0.6107, 0.3801, 0.2356]) tensor([0.0876, 0.1514, 0.4079, 0.3530]) -Greedy action tensor([-1.9348, -0.9144, 0.3092, -0.1278]) tensor([0.0518, 0.1438, 0.4887, 0.3157]) -Greedy action tensor([-1.6125, -0.0555, 0.3979, 0.0541]) tensor([0.0540, 0.2564, 0.4035, 0.2861]) -Greedy action tensor([-1.9983, -0.5220, 0.8246, 0.2101]) tensor([0.0319, 0.1398, 0.5375, 0.2907]) -Greedy action tensor([-1.5130, -0.5722, 0.5558, 0.2460]) tensor([0.0579, 0.1482, 0.4580, 0.3360]) -Greedy action tensor([-1.6786, -0.4970, 0.6758, 0.3367]) tensor([0.0449, 0.1462, 0.4724, 0.3365]) -Greedy action tensor([-1.8543, -0.4477, 0.6170, -0.1288]) tensor([0.0444, 0.1811, 0.5253, 0.2492]) -Greedy action tensor([-1.6551, -0.4494, 0.6152, -0.2468]) tensor([0.0552, 0.1844, 0.5346, 0.2258]) -Greedy action tensor([-1.8685, -0.4799, 0.6804, -0.0668]) tensor([0.0419, 0.1680, 0.5361, 0.2540]) -Greedy action tensor([-1.3636, -0.5671, 0.3664, 0.1671]) tensor([0.0742, 0.1645, 0.4185, 0.3428]) -Greedy action tensor([-1.7294, -0.5470, 1.2174, 0.8901]) tensor([0.0270, 0.0881, 0.5142, 0.3707]) -Greedy action tensor([-0.8522, -0.3810, 0.7585, 1.3735]) tensor([0.0593, 0.0950, 0.2968, 0.5489]) -Greedy action tensor([-1.2801, -0.1195, 0.9812, 1.0204]) tensor([0.0421, 0.1343, 0.4037, 0.4199]) -Greedy action tensor([-1.6844, -0.4757, 0.5384, -0.0245]) tensor([0.0531, 0.1778, 0.4900, 0.2791]) -Greedy action tensor([-1.3428, -0.5312, 0.3959, 0.2710]) tensor([0.0716, 0.1612, 0.4075, 0.3596]) -Greedy action tensor([-0.9632, -0.4570, 0.2784, 0.5562]) tensor([0.0935, 0.1552, 0.3238, 0.4275]) -Greedy action tensor([-1.5605, -0.5169, 0.7834, 0.6548]) tensor([0.0427, 0.1212, 0.4449, 0.3912]) -Greedy action tensor([-1.7921, -0.4560, 0.6222, -0.0402]) tensor([0.0460, 0.1749, 0.5141, 0.2651]) -Greedy action tensor([-1.0715, -0.4482, 0.4744, 0.9402]) tensor([0.0665, 0.1241, 0.3121, 0.4973]) -Greedy action tensor([-1.8007, -0.4752, 0.6057, -0.0748]) tensor([0.0466, 0.1753, 0.5166, 0.2616]) -Greedy action tensor([-1.6092, -0.5265, 0.5519, 0.1166]) tensor([0.0548, 0.1618, 0.4757, 0.3078]) -Greedy action tensor([-1.7184, -0.4819, 0.5964, 0.0807]) tensor([0.0485, 0.1671, 0.4911, 0.2932]) -Greedy action tensor([-1.1593, -0.5850, 0.2544, 0.3229]) tensor([0.0886, 0.1573, 0.3642, 0.3900]) -Greedy action tensor([-1.5620, -0.5651, 0.4746, 0.0143]) tensor([0.0617, 0.1672, 0.4728, 0.2984]) -Greedy action tensor([-0.0171, 0.2152, 0.8642, 1.5016]) tensor([0.1082, 0.1365, 0.2612, 0.4941]) -Greedy action tensor([-1.7343, -0.5104, 0.5529, -0.0407]) tensor([0.0508, 0.1727, 0.5002, 0.2763]) -Greedy action tensor([-1.7186, -0.7428, -0.0023, -0.2949]) tensor([0.0748, 0.1985, 0.4162, 0.3106]) -Greedy action tensor([-1.8215, -0.4832, 0.6309, -0.0833]) tensor([0.0452, 0.1724, 0.5252, 0.2571]) -Greedy action tensor([-1.7137, -0.4749, 0.5342, -0.0231]) tensor([0.0517, 0.1784, 0.4895, 0.2804]) -Greedy action tensor([-1.4744, 0.1826, 0.3003, 0.1605]) tensor([0.0579, 0.3036, 0.3415, 0.2970]) -Greedy action tensor([-1.7598, -0.5109, 0.5932, -0.0228]) tensor([0.0483, 0.1686, 0.5085, 0.2746]) -Greedy action tensor([-1.6269, -0.5223, 0.4907, 0.0421]) tensor([0.0567, 0.1711, 0.4713, 0.3009]) -Greedy action tensor([-0.7093, -0.4246, 1.0661, 1.5276]) tensor([0.0568, 0.0756, 0.3354, 0.5322]) -Greedy action tensor([ 0.5733, -0.1177, -0.0922, -0.3003]) tensor([0.4111, 0.2060, 0.2113, 0.1716]) -Greedy action tensor([ 0.8534, -0.1999, -0.0252, -0.3403]) tensor([0.4837, 0.1687, 0.2009, 0.1466]) -Greedy action tensor([ 0.4573, -0.2442, 0.1542, -0.4701]) tensor([0.3802, 0.1885, 0.2808, 0.1504]) -Greedy action tensor([ 0.4492, 0.1579, 0.1688, -0.1491]) tensor([0.3276, 0.2448, 0.2475, 0.1801]) -Greedy action tensor([ 0.3500, 0.0030, 0.1885, -0.1737]) tensor([0.3175, 0.2244, 0.2701, 0.1880]) -Greedy action tensor([ 0.7381, -0.2493, -0.0561, -0.3051]) tensor([0.4594, 0.1711, 0.2076, 0.1619]) -Greedy action tensor([ 0.7826, -0.3098, 0.1466, -0.7143]) tensor([0.4788, 0.1606, 0.2535, 0.1072]) -Greedy action tensor([ 0.4922, -0.1078, 0.0063, -0.3846]) tensor([0.3876, 0.2127, 0.2384, 0.1613]) -Greedy action tensor([ 0.4106, -0.0158, 0.0332, -0.2126]) tensor([0.3479, 0.2271, 0.2385, 0.1865]) -Greedy action tensor([ 1.1878, -0.8726, -0.0264, -0.6618]) tensor([0.6323, 0.0806, 0.1877, 0.0995]) -Greedy action tensor([ 0.5568, -0.3164, 0.0467, -0.4251]) tensor([0.4179, 0.1745, 0.2509, 0.1566]) -Greedy action tensor([ 0.3855, 0.0398, -0.0703, -0.0739]) tensor([0.3363, 0.2380, 0.2132, 0.2124]) -Greedy action tensor([ 0.1883, 0.1916, -0.1396, 0.0578]) tensor([0.2777, 0.2786, 0.2000, 0.2437]) -Greedy action tensor([ 0.5789, -0.4552, -0.1154, -0.4083]) tensor([0.4489, 0.1596, 0.2242, 0.1673]) -Greedy action tensor([ 0.3201, 0.0864, -0.0544, -0.1540]) tensor([0.3224, 0.2552, 0.2217, 0.2007]) -Greedy action tensor([ 0.4872, -0.2065, 0.0395, -0.0481]) tensor([0.3671, 0.1834, 0.2346, 0.2149]) -Greedy action tensor([ 0.9959, -0.7037, -0.1675, -0.9523]) tensor([0.6106, 0.1116, 0.1908, 0.0870]) -Greedy action tensor([ 1.0618, -0.8692, -0.0333, -0.5867]) tensor([0.5981, 0.0867, 0.2001, 0.1150]) -Greedy action tensor([ 0.4846, -0.2442, -0.0124, -0.4343]) tensor([0.4016, 0.1938, 0.2443, 0.1602]) -Greedy action tensor([ 0.4025, -0.1064, 0.0519, -0.4724]) tensor([0.3673, 0.2208, 0.2587, 0.1531]) -Greedy action tensor([ 0.6982, -0.4918, -0.0961, -0.3277]) tensor([0.4729, 0.1439, 0.2137, 0.1695]) -Greedy action tensor([ 0.6583, -0.1859, 0.0890, -0.4010]) tensor([0.4269, 0.1835, 0.2416, 0.1480]) -Greedy action tensor([ 0.4888, -0.1554, -0.0722, -0.2606]) tensor([0.3893, 0.2044, 0.2222, 0.1840]) -Greedy action tensor([ 0.6465, -0.3463, -0.0130, -0.2454]) tensor([0.4353, 0.1613, 0.2251, 0.1784]) -Greedy action tensor([ 0.4912, -0.2406, 0.0152, -0.3143]) tensor([0.3923, 0.1887, 0.2437, 0.1753]) -Greedy action tensor([ 0.6885, -0.2306, -0.0994, -0.3619]) tensor([0.4538, 0.1810, 0.2064, 0.1588]) -Greedy action tensor([ 0.5716, -0.2670, -0.0900, -0.3628]) tensor([0.4271, 0.1847, 0.2204, 0.1678]) -Greedy action tensor([ 0.3366, -0.3067, 0.1878, -0.2628]) tensor([0.3406, 0.1790, 0.2935, 0.1870]) -Greedy action tensor([ 0.5012, -0.3519, -0.0371, -0.4310]) tensor([0.4161, 0.1773, 0.2429, 0.1638]) -Greedy action tensor([ 0.7347, -0.2793, 0.0551, -0.2542]) tensor([0.4461, 0.1618, 0.2261, 0.1659]) -Greedy action tensor([ 0.5583, -0.3854, -0.0643, -0.3075]) tensor([0.4262, 0.1659, 0.2287, 0.1793]) -Greedy action tensor([ 0.2602, 0.1869, -0.0734, -0.3175]) tensor([0.3118, 0.2898, 0.2234, 0.1750]) -Greedy action tensor([ 0.5433, -0.3155, -0.0338, -0.2802]) tensor([0.4125, 0.1748, 0.2316, 0.1811]) -Greedy action tensor([ 0.5162, -0.4200, 0.2993, -0.6499]) tensor([0.3986, 0.1563, 0.3209, 0.1242]) -Greedy action tensor([ 0.5896, -0.0784, -0.0676, -0.2132]) tensor([0.4034, 0.2068, 0.2091, 0.1807]) -Greedy action tensor([ 0.4869, 0.1002, -0.0640, -0.1671]) tensor([0.3603, 0.2447, 0.2077, 0.1873]) -Greedy action tensor([ 0.5400, -0.4195, -0.0860, -0.4687]) tensor([0.4381, 0.1678, 0.2343, 0.1598]) -Greedy action tensor([ 0.6788, 0.0353, 0.0408, -0.5152]) tensor([0.4243, 0.2230, 0.2242, 0.1286]) -Greedy action tensor([ 0.9211, -0.5546, -0.0302, -0.4559]) tensor([0.5356, 0.1224, 0.2069, 0.1351]) -Greedy action tensor([ 0.4645, 0.0430, -0.0517, -0.2902]) tensor([0.3672, 0.2409, 0.2192, 0.1727]) -Greedy action tensor([ 0.7933, -0.4288, -0.0127, -0.3710]) tensor([0.4870, 0.1435, 0.2175, 0.1520]) -Greedy action tensor([ 0.8776, -0.5788, 0.0284, -0.4464]) tensor([0.5190, 0.1209, 0.2220, 0.1381]) -Greedy action tensor([ 0.0925, -0.1022, 0.0963, -0.2263]) tensor([0.2814, 0.2316, 0.2825, 0.2046]) -Greedy action tensor([ 0.5544, 0.0399, 0.1505, -0.3233]) tensor([0.3730, 0.2230, 0.2490, 0.1551]) -Greedy action tensor([ 0.6522, -0.2540, -0.0750, -0.1397]) tensor([0.4273, 0.1726, 0.2065, 0.1936]) -Greedy action tensor([ 0.4515, 0.0210, 0.0316, -0.4526]) tensor([0.3687, 0.2397, 0.2423, 0.1493]) -Greedy action tensor([ 0.3615, 0.0068, -0.1013, -0.0725]) tensor([0.3357, 0.2355, 0.2113, 0.2175]) -Greedy action tensor([ 0.8131, -0.4721, -0.0052, -0.3543]) tensor([0.4929, 0.1363, 0.2174, 0.1534]) -Greedy action tensor([ 0.0412, -0.0531, 0.2309, -0.1414]) tensor([0.2530, 0.2303, 0.3059, 0.2108]) -Greedy action tensor([ 0.5433, -0.2026, -0.0203, -0.3165]) tensor([0.4054, 0.1923, 0.2307, 0.1716]) -Greedy action tensor([ 0.4000, 0.2173, -0.0005, -0.2916]) tensor([0.3329, 0.2773, 0.2230, 0.1667]) -Greedy action tensor([ 0.4163, 0.1220, -0.0866, -0.1408]) tensor([0.3421, 0.2549, 0.2069, 0.1960]) -Greedy action tensor([ 0.2934, -0.2042, 0.1449, -0.2212]) tensor([0.3260, 0.1982, 0.2810, 0.1948]) -Greedy action tensor([ 0.7933, -0.3147, 0.0241, -0.6598]) tensor([0.4932, 0.1629, 0.2286, 0.1153]) -Greedy action tensor([ 0.3137, -0.1304, 0.2312, -0.2069]) tensor([0.3168, 0.2032, 0.2917, 0.1882]) -Greedy action tensor([ 0.4569, -0.2864, -0.0594, -0.4049]) tensor([0.4009, 0.1906, 0.2392, 0.1693]) -Greedy action tensor([ 1.1669, -0.7609, 0.2294, -0.8689]) tensor([0.5997, 0.0872, 0.2348, 0.0783]) -Greedy action tensor([ 0.4804, -0.1269, -0.0875, -0.3720]) tensor([0.3940, 0.2147, 0.2233, 0.1680]) -Greedy action tensor([ 0.6469, -0.4185, 0.0057, -0.3602]) tensor([0.4471, 0.1541, 0.2355, 0.1633]) -Greedy action tensor([ 0.5831, -0.3939, -0.0264, -0.6818]) tensor([0.4541, 0.1709, 0.2469, 0.1282]) -Greedy action tensor([ 0.4723, -0.0325, 0.0996, -0.1899]) tensor([0.3561, 0.2150, 0.2453, 0.1836]) -Greedy action tensor([ 0.8367, -0.3294, -0.0119, -0.3829]) tensor([0.4914, 0.1531, 0.2103, 0.1451]) -Greedy action tensor([ 0.5954, -0.2314, -0.0982, -0.2287]) tensor([0.4209, 0.1841, 0.2103, 0.1846]) -Greedy action tensor([ 0.7860, -0.6104, -0.0610, -0.4464]) tensor([0.5082, 0.1258, 0.2179, 0.1482]) -Greedy action tensor([ 0.8591, -0.4317, 0.0539, -0.3342]) tensor([0.4937, 0.1358, 0.2207, 0.1497]) -Greedy action tensor([ 0.4092, 0.2335, 0.0872, -0.1123]) tensor([0.3167, 0.2657, 0.2295, 0.1880]) -Greedy action tensor([ 0.6382, -0.0672, -0.0379, -0.1892]) tensor([0.4099, 0.2024, 0.2085, 0.1792]) -Greedy action tensor([ 0.8174, -0.2414, 0.0353, -0.5477]) tensor([0.4855, 0.1684, 0.2221, 0.1240]) -Greedy action tensor([ 0.6211, -0.3931, 0.1188, -0.5871]) tensor([0.4412, 0.1600, 0.2670, 0.1318]) -Greedy action tensor([ 0.3880, -0.0057, 0.0004, -0.2000]) tensor([0.3438, 0.2319, 0.2333, 0.1910]) -Greedy action tensor([ 0.6684, -0.1860, -0.0984, -0.3163]) tensor([0.4418, 0.1880, 0.2052, 0.1650]) -Greedy action tensor([ 0.2244, 0.2892, -0.0103, -0.3049]) tensor([0.2901, 0.3095, 0.2294, 0.1709]) -Greedy action tensor([ 0.7879, -0.2605, -0.1357, -0.2354]) tensor([0.4746, 0.1664, 0.1885, 0.1706]) -Greedy action tensor([ 0.5332, -0.1746, -0.1123, -0.3310]) tensor([0.4101, 0.2021, 0.2150, 0.1728]) -Greedy action tensor([ 0.3278, -0.0395, 0.0424, -0.0687]) tensor([0.3208, 0.2222, 0.2412, 0.2158]) -Greedy action tensor([ 0.8744, -0.2809, -0.0407, -0.3361]) tensor([0.4967, 0.1564, 0.1989, 0.1480]) -Greedy action tensor([ 0.9061, -0.8751, -0.1133, -0.6412]) tensor([0.5740, 0.0967, 0.2071, 0.1222]) -Greedy action tensor([ 0.9054, -0.5292, 0.0845, -0.6693]) tensor([0.5304, 0.1263, 0.2334, 0.1098]) -Greedy action tensor([ 0.6736, 0.1027, 0.0846, -0.0641]) tensor([0.3849, 0.2175, 0.2136, 0.1841]) -Greedy action tensor([ 0.4831, -0.4040, 0.0185, -0.6436]) tensor([0.4229, 0.1742, 0.2658, 0.1371]) -Greedy action tensor([ 0.6295, -0.4676, 0.0195, -0.5034]) tensor([0.4547, 0.1518, 0.2470, 0.1464]) -Greedy action tensor([ 1.7294, 0.4233, -0.4539, 1.0644]) tensor([0.5269, 0.1427, 0.0594, 0.2710]) -Greedy action tensor([ 0.4748, -0.0975, -0.4139, -0.5897]) tensor([0.4310, 0.2432, 0.1772, 0.1486]) -Greedy action tensor([-0.2863, -0.7676, -0.1323, 0.5910]) tensor([0.1927, 0.1191, 0.2248, 0.4634]) -Greedy action tensor([ 0.8995, 0.0706, -0.4057, 0.7026]) tensor([0.3954, 0.1726, 0.1072, 0.3247]) -Greedy action tensor([-0.2647, 0.0274, 0.5433, 0.8524]) tensor([0.1309, 0.1753, 0.2937, 0.4001]) -Greedy action tensor([ 0.4961, -0.9981, -0.1531, 0.8450]) tensor([0.3160, 0.0709, 0.1651, 0.4480]) -Greedy action tensor([ 0.0035, -0.4857, 1.0380, 0.7100]) tensor([0.1550, 0.0950, 0.4360, 0.3141]) -Greedy action tensor([ 0.5856, -0.0190, 0.5841, -0.2091]) tensor([0.3337, 0.1823, 0.3332, 0.1507]) -Greedy action tensor([ 0.4411, 0.5233, -0.3741, 0.4837]) tensor([0.2800, 0.3040, 0.1239, 0.2922]) -Greedy action tensor([ 1.5539, -1.0589, 0.4884, 1.1911]) tensor([0.4731, 0.0347, 0.1630, 0.3292]) -Greedy action tensor([ 1.2849, -1.4615, 1.8400, 1.0555]) tensor([0.2777, 0.0178, 0.4838, 0.2208]) -Greedy action tensor([ 1.9443, -0.2251, -0.0101, 0.4470]) tensor([0.6758, 0.0772, 0.0957, 0.1512]) -Greedy action tensor([ 0.5271, -0.1794, 0.5204, 1.2421]) tensor([0.2207, 0.1089, 0.2192, 0.4512]) -Greedy action tensor([ 0.7728, -0.9948, -0.4689, 0.5504]) tensor([0.4424, 0.0755, 0.1278, 0.3542]) -Greedy action tensor([ 1.9100, -0.7002, 0.8439, 1.2397]) tensor([0.5183, 0.0381, 0.1785, 0.2651]) -Greedy action tensor([ 0.4909, -0.3364, 0.5354, 0.5613]) tensor([0.2812, 0.1230, 0.2940, 0.3018]) -Greedy action tensor([0.6865, 0.1869, 0.7634, 0.4673]) tensor([0.2865, 0.1739, 0.3095, 0.2301]) -Greedy action tensor([ 1.7601, -1.5463, -0.4452, 1.2358]) tensor([0.5751, 0.0211, 0.0634, 0.3404]) -Greedy action tensor([ 1.2336, -0.5167, -0.0535, 1.2866]) tensor([0.3993, 0.0694, 0.1102, 0.4211]) -Greedy action tensor([ 1.1830, -0.8000, 0.9142, -0.2124]) tensor([0.4652, 0.0640, 0.3555, 0.1152]) -Greedy action tensor([ 0.3502, 0.1560, 0.2355, -0.2880]) tensor([0.3083, 0.2539, 0.2749, 0.1629]) -Greedy action tensor([ 1.2561, -0.1873, 0.1528, 1.4026]) tensor([0.3669, 0.0866, 0.1217, 0.4248]) -Greedy action tensor([-0.1618, -0.0420, 0.3165, -0.2498]) tensor([0.2148, 0.2421, 0.3465, 0.1967]) -Greedy action tensor([ 1.2594, -0.2491, 0.4236, 0.9235]) tensor([0.4220, 0.0934, 0.1830, 0.3016]) -Greedy action tensor([-0.0612, -1.2557, 0.4855, 0.8594]) tensor([0.1805, 0.0547, 0.3118, 0.4531]) -Greedy action tensor([ 0.8905, -0.8052, 0.9806, 0.0029]) tensor([0.3718, 0.0682, 0.4069, 0.1531]) -Greedy action tensor([ 0.9045, -0.2398, 0.2472, 1.0359]) tensor([0.3359, 0.1070, 0.1741, 0.3831]) -Greedy action tensor([ 0.9654, -2.0235, 0.8990, 1.0697]) tensor([0.3230, 0.0163, 0.3022, 0.3585]) -Greedy action tensor([ 0.3336, -0.1993, 0.1413, 0.8040]) tensor([0.2492, 0.1463, 0.2056, 0.3989]) -Greedy action tensor([0.6321, 0.5759, 0.0272, 0.1644]) tensor([0.3207, 0.3032, 0.1752, 0.2009]) -Greedy action tensor([ 1.3727, 0.4794, -0.2315, 0.4520]) tensor([0.4979, 0.2038, 0.1001, 0.1983]) -Greedy action tensor([ 1.8514, -0.0768, 0.4627, 1.5400]) tensor([0.4701, 0.0684, 0.1172, 0.3443]) -Greedy action tensor([ 0.5378, -0.6353, 1.4023, 0.3453]) tensor([0.2218, 0.0686, 0.5266, 0.1830]) -Greedy action tensor([-0.2041, -0.2462, -0.4933, 1.2230]) tensor([0.1455, 0.1395, 0.1089, 0.6061]) -Greedy action tensor([ 1.1894, -1.6420, 0.7382, 0.4720]) tensor([0.4579, 0.0270, 0.2916, 0.2235]) -Greedy action tensor([ 0.7734, -0.0425, 0.2596, -0.4832]) tensor([0.4301, 0.1902, 0.2573, 0.1224]) -Greedy action tensor([1.3870, 0.5258, 0.4850, 0.8150]) tensor([0.4179, 0.1766, 0.1696, 0.2359]) -Greedy action tensor([ 1.3474, 1.0700, -0.3741, 0.2570]) tensor([0.4400, 0.3334, 0.0787, 0.1479]) -Greedy action tensor([ 1.1430, -1.0974, -0.1679, 0.6156]) tensor([0.5086, 0.0541, 0.1371, 0.3001]) -Greedy action tensor([ 2.0823, -1.5571, 0.4476, 1.0646]) tensor([0.6318, 0.0166, 0.1232, 0.2284]) -Greedy action tensor([ 0.0084, -0.4169, 0.1984, 0.1118]) tensor([0.2518, 0.1646, 0.3045, 0.2792]) -Greedy action tensor([ 0.4945, 0.0018, 1.4167, -0.0269]) tensor([0.2119, 0.1295, 0.5329, 0.1258]) -Greedy action tensor([ 1.5221, -0.7887, 1.4733, 0.1014]) tensor([0.4361, 0.0433, 0.4153, 0.1053]) -Greedy action tensor([ 1.8025, -0.2290, 1.1947, 1.3320]) tensor([0.4347, 0.0570, 0.2367, 0.2716]) -Greedy action tensor([ 0.2308, -0.7949, 1.2428, 0.5893]) tensor([0.1805, 0.0647, 0.4965, 0.2583]) -Greedy action tensor([ 0.9085, -0.8584, 0.8982, 1.0875]) tensor([0.2979, 0.0509, 0.2949, 0.3563]) -Greedy action tensor([ 0.3012, 0.0139, -0.0646, -0.0710]) tensor([0.3192, 0.2395, 0.2214, 0.2200]) -Greedy action tensor([ 2.0886, -0.3481, -0.9344, 0.9292]) tensor([0.6898, 0.0603, 0.0336, 0.2163]) -Greedy action tensor([ 1.2003, 0.5474, -0.0110, 1.9580]) tensor([0.2531, 0.1317, 0.0754, 0.5398]) -Greedy action tensor([ 0.5314, -0.1961, 1.0175, 1.2863]) tensor([0.1910, 0.0923, 0.3105, 0.4063]) -Greedy action tensor([-0.4481, 0.2113, -0.0215, 0.1112]) tensor([0.1609, 0.3111, 0.2465, 0.2815]) -Greedy action tensor([ 1.0773, -1.1043, -0.0114, 0.8845]) tensor([0.4397, 0.0496, 0.1480, 0.3626]) -Greedy action tensor([ 1.3207, -1.7784, -0.5446, 0.8954]) tensor([0.5395, 0.0243, 0.0835, 0.3526]) -Greedy action tensor([ 1.0491, -1.5159, -0.3570, 0.4737]) tensor([0.5306, 0.0408, 0.1301, 0.2985]) -Greedy action tensor([ 0.4068, 0.0403, -0.7042, 1.0554]) tensor([0.2541, 0.1761, 0.0837, 0.4861]) -Greedy action tensor([ 0.8979, -0.4644, -0.3016, 1.3530]) tensor([0.3191, 0.0817, 0.0962, 0.5030]) -Greedy action tensor([-0.5832, -0.4951, -0.2463, 0.1448]) tensor([0.1797, 0.1963, 0.2517, 0.3722]) -Greedy action tensor([0.5098, 0.3407, 0.5315, 0.5175]) tensor([0.2581, 0.2180, 0.2638, 0.2601]) -Greedy action tensor([ 1.2260, -1.3556, 1.5198, 0.8974]) tensor([0.3188, 0.0241, 0.4276, 0.2295]) -Greedy action tensor([-0.1767, -0.7736, -0.3171, -0.5979]) tensor([0.3251, 0.1790, 0.2825, 0.2134]) -Greedy action tensor([-0.1643, 1.0885, 1.4502, 0.0114]) tensor([0.0933, 0.3266, 0.4689, 0.1112]) -Greedy action tensor([ 0.2002, -0.0293, 0.8090, 1.2419]) tensor([0.1546, 0.1229, 0.2843, 0.4382]) -Greedy action tensor([ 1.3321, -0.2077, 0.9089, -0.1044]) tensor([0.4746, 0.1018, 0.3108, 0.1128]) -Greedy action tensor([ 0.6497, -0.1762, -0.3008, 1.1707]) tensor([0.2851, 0.1248, 0.1102, 0.4799]) -Greedy action tensor([ 0.8372, -0.5194, 1.6229, 0.7401]) tensor([0.2294, 0.0591, 0.5033, 0.2082]) -Greedy action tensor([ 0.8766, -0.9486, -0.5030, 1.2873]) tensor([0.3424, 0.0552, 0.0862, 0.5163]) -Greedy action tensor([ 0.4544, -0.7886, -0.5350, 0.6385]) tensor([0.3493, 0.1008, 0.1299, 0.4200]) -Greedy action tensor([-0.0426, 1.1027, 1.2389, -0.2068]) tensor([0.1164, 0.3658, 0.4191, 0.0987]) -Greedy action tensor([ 0.7678, -1.4265, 0.2457, 0.6433]) tensor([0.3864, 0.0431, 0.2293, 0.3412]) -Greedy action tensor([ 0.9645, -1.3750, 1.1393, -0.0574]) tensor([0.3777, 0.0364, 0.4499, 0.1360]) -Greedy action tensor([ 1.6253, -1.0808, 0.9814, 1.6703]) tensor([0.3791, 0.0253, 0.1991, 0.3965]) -Greedy action tensor([ 0.9387, -0.1975, 1.5143, 0.0421]) tensor([0.2851, 0.0915, 0.5070, 0.1163]) -Greedy action tensor([ 2.2304, -0.7338, 1.4692, 0.2431]) tensor([0.6039, 0.0312, 0.2821, 0.0828]) -Greedy action tensor([ 0.4507, -0.9452, -0.6414, 1.3018]) tensor([0.2548, 0.0631, 0.0855, 0.5967]) -Greedy action tensor([ 0.1486, 0.3698, 1.3453, -0.0379]) tensor([0.1566, 0.1953, 0.5181, 0.1299]) -Greedy action tensor([0.6003, 0.5608, 0.1053, 1.5584]) tensor([0.1931, 0.1857, 0.1177, 0.5035]) -Greedy action tensor([0.5808, 0.3069, 0.7832, 2.0849]) tensor([0.1336, 0.1016, 0.1636, 0.6012]) -Greedy action tensor([-0.0224, -0.5975, -0.5887, 0.6846]) tensor([0.2405, 0.1353, 0.1365, 0.4877]) -Greedy action tensor([0.3328, 0.0858, 0.2085, 0.5405]) tensor([0.2567, 0.2005, 0.2267, 0.3160]) -Greedy action tensor([ 1.2441, 0.5817, -0.7091, 0.0945]) tensor([0.5065, 0.2612, 0.0718, 0.1605]) -Greedy action tensor([-0.1877, 0.3436, 1.0890, 0.2305]) tensor([0.1281, 0.2180, 0.4593, 0.1946]) -Greedy action tensor([ 1.1894, -0.9734, -0.4353, 1.1253]) tensor([0.4445, 0.0511, 0.0875, 0.4169]) -Greedy action tensor([ 1.3302, -0.6816, -0.2075, 0.0424]) tensor([0.6156, 0.0823, 0.1323, 0.1698]) -Greedy action tensor([ 1.0328, -0.0858, -0.2349, -0.0727]) tensor([0.5156, 0.1685, 0.1452, 0.1707]) -Greedy action tensor([ 1.2259, -0.4797, -0.4148, 0.1641]) tensor([0.5809, 0.1055, 0.1126, 0.2009]) -Greedy action tensor([ 0.9122, -0.2890, -0.2604, 0.2029]) tensor([0.4757, 0.1431, 0.1472, 0.2340]) -Greedy action tensor([ 0.7833, -0.4068, -0.5144, -0.1453]) tensor([0.5070, 0.1542, 0.1385, 0.2003]) -Greedy action tensor([ 1.5210, 0.2114, -0.0827, 0.1476]) tensor([0.5799, 0.1565, 0.1167, 0.1469]) -Greedy action tensor([ 1.4137, -0.8122, -0.5038, 0.1937]) tensor([0.6451, 0.0696, 0.0948, 0.1904]) -Greedy action tensor([ 0.9918, -0.5403, -0.2137, 0.0151]) tensor([0.5285, 0.1142, 0.1583, 0.1990]) -Greedy action tensor([ 1.0972, -0.3244, 0.0389, -0.0256]) tensor([0.5225, 0.1261, 0.1813, 0.1700]) -Greedy action tensor([ 1.3072, -0.4922, -0.3553, -0.0533]) tensor([0.6205, 0.1026, 0.1177, 0.1592]) -Greedy action tensor([ 1.0306, -0.4737, -0.1162, -0.2162]) tensor([0.5473, 0.1216, 0.1738, 0.1573]) -Greedy action tensor([ 0.6238, -0.3554, -0.0765, 0.0190]) tensor([0.4135, 0.1553, 0.2053, 0.2259]) -Greedy action tensor([ 0.9882, -0.3600, 0.0754, -0.0985]) tensor([0.5004, 0.1300, 0.2009, 0.1688]) -Greedy action tensor([ 1.4519, -0.3064, -0.4181, 0.0321]) tensor([0.6377, 0.1099, 0.0983, 0.1542]) -Greedy action tensor([ 0.9430, -0.5195, -0.2841, 0.0850]) tensor([0.5131, 0.1189, 0.1504, 0.2176]) -Greedy action tensor([ 0.8655, -0.4161, -0.1543, 0.2652]) tensor([0.4573, 0.1269, 0.1649, 0.2509]) -Greedy action tensor([ 1.5411, -0.9250, -0.3671, 0.2735]) tensor([0.6602, 0.0561, 0.0979, 0.1858]) -Greedy action tensor([ 1.4115, -0.3912, -0.3930, 0.1485]) tensor([0.6203, 0.1022, 0.1021, 0.1754]) -Greedy action tensor([ 1.2486, -0.4117, 0.1409, -0.1518]) tensor([0.5660, 0.1076, 0.1869, 0.1395]) -Greedy action tensor([ 0.6619, -0.3632, -0.0979, -0.1708]) tensor([0.4422, 0.1586, 0.2068, 0.1923]) -Greedy action tensor([ 1.2800, -0.3591, -0.2437, 0.0573]) tensor([0.5860, 0.1138, 0.1277, 0.1725]) -Greedy action tensor([ 0.8219, -0.5886, -0.4049, 0.6007]) tensor([0.4276, 0.1043, 0.1254, 0.3427]) -Greedy action tensor([ 1.4996, -0.7424, -0.3683, 0.6369]) tensor([0.5943, 0.0631, 0.0918, 0.2508]) -Greedy action tensor([ 0.9946, -0.4379, -0.1809, 0.4125]) tensor([0.4748, 0.1133, 0.1466, 0.2653]) -Greedy action tensor([ 0.4672, -0.1736, -0.2997, -0.2518]) tensor([0.4035, 0.2126, 0.1874, 0.1966]) -Greedy action tensor([ 1.1416, -0.5041, -0.1503, 0.2612]) tensor([0.5313, 0.1025, 0.1460, 0.2203]) -Greedy action tensor([ 0.7184, -0.2814, -0.1678, -0.2495]) tensor([0.4629, 0.1703, 0.1908, 0.1759]) -Greedy action tensor([ 1.2533, -0.4395, -0.0405, 0.0317]) tensor([0.5704, 0.1050, 0.1564, 0.1681]) -Greedy action tensor([ 1.0518, -0.6907, -0.2707, 0.7876]) tensor([0.4526, 0.0792, 0.1206, 0.3475]) -Greedy action tensor([ 0.4843, -0.2100, -0.0922, 0.1594]) tensor([0.3592, 0.1794, 0.2018, 0.2596]) -Greedy action tensor([ 1.9531, -0.4884, -0.4221, -0.0687]) tensor([0.7619, 0.0663, 0.0709, 0.1009]) -Greedy action tensor([ 1.3868, -0.3062, -0.0666, 0.1213]) tensor([0.5883, 0.1082, 0.1375, 0.1659]) -Greedy action tensor([ 0.8903, -0.4233, -0.3769, 0.2756]) tensor([0.4782, 0.1286, 0.1347, 0.2586]) -Greedy action tensor([ 1.0216, -0.4515, -0.5669, 0.8556]) tensor([0.4385, 0.1005, 0.0896, 0.3714]) -Greedy action tensor([ 1.2995, -0.6472, -0.4327, 0.6120]) tensor([0.5487, 0.0783, 0.0971, 0.2759]) -Greedy action tensor([ 0.7794, -0.3170, -0.1296, -0.0599]) tensor([0.4610, 0.1540, 0.1858, 0.1992]) -Greedy action tensor([ 1.4558, -0.1876, -0.4925, 0.0917]) tensor([0.6284, 0.1215, 0.0896, 0.1606]) -Greedy action tensor([ 1.2624, -0.7418, -0.3187, 0.2514]) tensor([0.5867, 0.0791, 0.1207, 0.2135]) -Greedy action tensor([ 1.6805, -0.7474, -0.0821, 0.0104]) tensor([0.6906, 0.0609, 0.1185, 0.1300]) -Greedy action tensor([ 0.6242, -0.2191, -0.2942, -0.0611]) tensor([0.4286, 0.1844, 0.1711, 0.2160]) -Greedy action tensor([ 1.0797, -0.2856, -0.0726, -0.1795]) tensor([0.5391, 0.1376, 0.1703, 0.1530]) -Greedy action tensor([ 1.1888, -0.6383, -0.1637, 0.1908]) tensor([0.5593, 0.0900, 0.1446, 0.2061]) -Greedy action tensor([ 1.4337, -0.5311, -0.2016, 0.0832]) tensor([0.6273, 0.0879, 0.1223, 0.1625]) -Greedy action tensor([ 1.2395, -0.1300, -0.4911, -0.2168]) tensor([0.6008, 0.1527, 0.1064, 0.1400]) -Greedy action tensor([ 0.9061, -0.4608, -0.2024, 0.1040]) tensor([0.4918, 0.1254, 0.1623, 0.2205]) -Greedy action tensor([ 1.2937, -0.5263, -0.4473, 0.3359]) tensor([0.5810, 0.0941, 0.1019, 0.2230]) -Greedy action tensor([ 1.3208, -0.5268, -0.1824, -0.0392]) tensor([0.6110, 0.0963, 0.1359, 0.1568]) -Greedy action tensor([ 0.8398, -0.4803, -0.3403, 0.3894]) tensor([0.4521, 0.1208, 0.1389, 0.2882]) -Greedy action tensor([ 1.0920, -0.5184, -0.2248, 0.3686]) tensor([0.5121, 0.1023, 0.1372, 0.2484]) -Greedy action tensor([ 0.9900, -0.2728, 0.1234, -0.0910]) tensor([0.4896, 0.1385, 0.2058, 0.1661]) -Greedy action tensor([ 1.0895, -0.0333, -0.7405, -0.4310]) tensor([0.5867, 0.1909, 0.0941, 0.1283]) -Greedy action tensor([ 0.3476, -0.3838, -0.4510, 0.2584]) tensor([0.3514, 0.1691, 0.1581, 0.3214]) -Greedy action tensor([ 1.3100, -0.4063, -0.2918, -0.1530]) tensor([0.6200, 0.1114, 0.1250, 0.1436]) -Greedy action tensor([ 1.0166, -0.7188, -0.0902, 0.0178]) tensor([0.5333, 0.0940, 0.1763, 0.1964]) -Greedy action tensor([ 0.8245, -0.6946, -0.1850, 0.1159]) tensor([0.4818, 0.1055, 0.1756, 0.2372]) -Greedy action tensor([ 0.5356, -0.1686, -0.2345, 0.1542]) tensor([0.3787, 0.1873, 0.1753, 0.2586]) -Greedy action tensor([ 0.8368, -0.4242, -0.1052, 0.0298]) tensor([0.4718, 0.1337, 0.1839, 0.2105]) -Greedy action tensor([ 0.9795, -0.5705, 0.0106, 0.0950]) tensor([0.4988, 0.1059, 0.1893, 0.2060]) -Greedy action tensor([ 1.1320, -0.3814, -0.4949, 0.4098]) tensor([0.5257, 0.1157, 0.1033, 0.2553]) -Greedy action tensor([ 1.2837, -0.8908, -0.3020, 0.7417]) tensor([0.5263, 0.0598, 0.1078, 0.3061]) -Greedy action tensor([ 1.3734, -0.7415, -0.2421, 0.2477]) tensor([0.6083, 0.0734, 0.1209, 0.1974]) -Greedy action tensor([ 0.6979, -0.7086, -0.0384, -0.0403]) tensor([0.4541, 0.1113, 0.2175, 0.2171]) -Greedy action tensor([ 1.2042, -0.5849, -0.2367, -0.0497]) tensor([0.5920, 0.0989, 0.1401, 0.1689]) -Greedy action tensor([ 1.8773, -0.5439, -0.5536, -0.2405]) tensor([0.7710, 0.0685, 0.0678, 0.0927]) -Greedy action tensor([ 0.7355, -0.2452, -0.1813, 0.2298]) tensor([0.4205, 0.1577, 0.1681, 0.2536]) -Greedy action tensor([ 1.2180, -0.6491, -0.2873, 0.0148]) tensor([0.5964, 0.0922, 0.1324, 0.1791]) -Greedy action tensor([ 1.1526, -0.2958, -0.2477, -0.1415]) tensor([0.5696, 0.1338, 0.1404, 0.1561]) -Greedy action tensor([ 1.1063, -0.7143, -0.3232, 0.4697]) tensor([0.5180, 0.0839, 0.1240, 0.2741]) -Greedy action tensor([ 0.4865, -0.0396, -0.1229, -0.2590]) tensor([0.3833, 0.2265, 0.2084, 0.1819]) -Greedy action tensor([ 0.5914, -0.3313, -0.2420, 0.3898]) tensor([0.3774, 0.1500, 0.1640, 0.3085]) -Greedy action tensor([ 0.8265, -0.5108, -0.0577, -0.0953]) tensor([0.4823, 0.1266, 0.1992, 0.1919]) -Greedy action tensor([ 0.8876, -0.6581, 0.0012, 0.0929]) tensor([0.4815, 0.1026, 0.1984, 0.2175]) -Greedy action tensor([ 1.1366, -0.5174, 0.0100, 0.0144]) tensor([0.5432, 0.1039, 0.1761, 0.1768]) -Greedy action tensor([ 1.0654, -0.5263, -0.2301, -0.0237]) tensor([0.5513, 0.1122, 0.1509, 0.1855]) -Greedy action tensor([ 0.9926, -0.2709, -0.2124, 0.1649]) tensor([0.4952, 0.1400, 0.1484, 0.2164]) -Greedy action tensor([ 1.1970, -0.3614, -0.1887, 0.0078]) tensor([0.5665, 0.1192, 0.1417, 0.1725]) -Greedy action tensor([ 1.2867, -0.4703, -0.3691, 0.2624]) tensor([0.5805, 0.1002, 0.1109, 0.2084]) -Greedy action tensor([ 0.4822, -0.1292, -0.2081, -0.1744]) tensor([0.3902, 0.2117, 0.1957, 0.2024]) -Greedy action tensor([ 1.3744, -0.2382, -0.3883, 0.1896]) tensor([0.5964, 0.1189, 0.1023, 0.1824]) -Greedy action tensor([ 1.0636, -0.7030, -0.1565, -0.0485]) tensor([0.5571, 0.0952, 0.1645, 0.1832]) -Greedy action tensor([ 1.7290, -0.6079, -0.1887, 0.2169]) tensor([0.6830, 0.0660, 0.1004, 0.1506]) -Greedy action tensor([ 0.3574, 0.0184, 0.0519, -0.2363]) tensor([0.3332, 0.2374, 0.2455, 0.1840]) -Greedy action tensor([ 0.6362, 0.2954, -0.2126, -0.3233]) tensor([0.3965, 0.2820, 0.1697, 0.1519]) -Greedy action tensor([ 0.2773, -0.0252, -0.0574, 0.0543]) tensor([0.3073, 0.2271, 0.2199, 0.2458]) -Greedy action tensor([ 0.9086, -0.5629, 0.0195, -0.5163]) tensor([0.5316, 0.1220, 0.2185, 0.1279]) -Greedy action tensor([ 0.2342, -0.2856, 0.0560, -0.2938]) tensor([0.3310, 0.1968, 0.2770, 0.1952]) -Greedy action tensor([ 0.5665, -0.0814, -0.0489, -0.3724]) tensor([0.4074, 0.2131, 0.2202, 0.1593]) -Greedy action tensor([ 0.3180, 0.0553, -0.0702, -0.2281]) tensor([0.3304, 0.2541, 0.2241, 0.1914]) -Greedy action tensor([ 0.2675, 0.0493, -0.1474, -0.1396]) tensor([0.3195, 0.2569, 0.2110, 0.2126]) -Greedy action tensor([ 0.8344, -0.5573, 0.0068, -0.4345]) tensor([0.5084, 0.1264, 0.2222, 0.1429]) -Greedy action tensor([ 0.4241, -0.1431, -0.0152, -0.2294]) tensor([0.3661, 0.2076, 0.2359, 0.1904]) -Greedy action tensor([ 0.8574, -0.5766, 0.0943, -0.6045]) tensor([0.5164, 0.1231, 0.2408, 0.1197]) -Greedy action tensor([ 0.5654, 0.0888, -0.1105, -0.3063]) tensor([0.3925, 0.2437, 0.1997, 0.1642]) -Greedy action tensor([ 0.6175, -0.2014, 0.0417, -0.3146]) tensor([0.4172, 0.1839, 0.2346, 0.1643]) -Greedy action tensor([ 0.3872, -0.1181, 0.0932, -0.2634]) tensor([0.3484, 0.2102, 0.2596, 0.1818]) -Greedy action tensor([ 0.3823, -0.3331, 0.0738, -0.5119]) tensor([0.3799, 0.1858, 0.2790, 0.1553]) -Greedy action tensor([ 0.6433, -0.0093, -0.1790, -0.2004]) tensor([0.4184, 0.2179, 0.1838, 0.1800]) -Greedy action tensor([ 0.5778, -0.1371, 0.0947, -0.2066]) tensor([0.3902, 0.1909, 0.2407, 0.1781]) -Greedy action tensor([ 0.3968, -0.0756, -0.0427, -0.1929]) tensor([0.3543, 0.2209, 0.2283, 0.1965]) -Greedy action tensor([ 0.5680, -0.2781, -0.0130, -0.3403]) tensor([0.4181, 0.1794, 0.2339, 0.1686]) -Greedy action tensor([ 0.4799, -0.3809, 0.1070, -0.4906]) tensor([0.4015, 0.1698, 0.2766, 0.1521]) -Greedy action tensor([ 0.5246, -0.0574, -0.0541, -0.2315]) tensor([0.3863, 0.2158, 0.2166, 0.1813]) -Greedy action tensor([ 0.9778, -0.3046, -0.0651, -0.4328]) tensor([0.5337, 0.1480, 0.1881, 0.1302]) -Greedy action tensor([ 0.9085, -0.5543, -0.0685, -0.4126]) tensor([0.5334, 0.1235, 0.2008, 0.1423]) -Greedy action tensor([ 0.6206, -0.2478, -0.0372, -0.5273]) tensor([0.4435, 0.1861, 0.2297, 0.1407]) -Greedy action tensor([ 0.3810, -0.0260, -0.1097, -0.0564]) tensor([0.3420, 0.2277, 0.2094, 0.2209]) -Greedy action tensor([ 0.4813, -0.0949, -0.0613, -0.2792]) tensor([0.3830, 0.2153, 0.2226, 0.1790]) -Greedy action tensor([ 0.4368, -0.0851, 0.0171, -0.2419]) tensor([0.3626, 0.2152, 0.2383, 0.1839]) -Greedy action tensor([ 0.1841, -0.0231, 0.1013, -0.1495]) tensor([0.2899, 0.2356, 0.2668, 0.2077]) -Greedy action tensor([ 0.4962, -0.3947, -0.1824, -0.4732]) tensor([0.4354, 0.1786, 0.2209, 0.1651]) -Greedy action tensor([ 0.2281, -0.2755, 0.1881, -0.3889]) tensor([0.3221, 0.1947, 0.3095, 0.1738]) -Greedy action tensor([ 0.5906, -0.2450, -0.0626, -0.2023]) tensor([0.4155, 0.1802, 0.2162, 0.1881]) -Greedy action tensor([ 0.4947, -0.1798, 0.0414, -0.2981]) tensor([0.3850, 0.1961, 0.2447, 0.1742]) -Greedy action tensor([ 0.2977, -0.0509, 0.0947, -0.2904]) tensor([0.3250, 0.2293, 0.2653, 0.1805]) -Greedy action tensor([ 0.4194, -0.1576, -0.0748, -0.2710]) tensor([0.3741, 0.2101, 0.2282, 0.1876]) -Greedy action tensor([ 0.5398, 0.2666, -0.0605, -0.1782]) tensor([0.3575, 0.2720, 0.1961, 0.1744]) -Greedy action tensor([ 0.4498, -0.0230, -0.0604, -0.1689]) tensor([0.3620, 0.2256, 0.2174, 0.1950]) -Greedy action tensor([ 0.5010, -0.1813, 0.1681, -0.3153]) tensor([0.3753, 0.1897, 0.2690, 0.1659]) -Greedy action tensor([ 0.4377, -0.0189, -0.0158, -0.1949]) tensor([0.3571, 0.2262, 0.2269, 0.1897]) -Greedy action tensor([ 0.5458, 0.0037, -0.0983, -0.1714]) tensor([0.3854, 0.2241, 0.2024, 0.1881]) -Greedy action tensor([ 3.1412e-01, 4.0267e-02, -2.1945e-04, -4.6661e-01]) tensor([0.3391, 0.2579, 0.2477, 0.1553]) -Greedy action tensor([ 0.4304, -0.2311, -0.0651, -0.1503]) tensor([0.3725, 0.1922, 0.2269, 0.2084]) -Greedy action tensor([ 0.6108, -0.3655, -0.1032, -0.3321]) tensor([0.4433, 0.1670, 0.2171, 0.1727]) -Greedy action tensor([ 0.3475, 0.3333, -0.0842, -0.2010]) tensor([0.3112, 0.3068, 0.2021, 0.1798]) -Greedy action tensor([ 0.4750, -0.1689, -0.0673, -0.2995]) tensor([0.3895, 0.2046, 0.2264, 0.1795]) -Greedy action tensor([ 0.4842, 0.0235, -0.0858, -0.1749]) tensor([0.3685, 0.2325, 0.2084, 0.1906]) -Greedy action tensor([ 0.6461, -0.0862, 0.1650, -0.0727]) tensor([0.3867, 0.1859, 0.2390, 0.1884]) -Greedy action tensor([ 0.9239, -0.7733, -0.0185, -0.5485]) tensor([0.5549, 0.1016, 0.2162, 0.1273]) -Greedy action tensor([ 0.4991, -0.2419, -0.1157, -0.2529]) tensor([0.4018, 0.1915, 0.2173, 0.1894]) -Greedy action tensor([ 0.6376, -0.3197, 0.0790, -0.3213]) tensor([0.4275, 0.1641, 0.2445, 0.1639]) -Greedy action tensor([ 0.3978, 0.0240, 0.0461, -0.2962]) tensor([0.3459, 0.2380, 0.2433, 0.1728]) -Greedy action tensor([ 0.5794, -0.2785, -0.0357, -0.5508]) tensor([0.4371, 0.1854, 0.2363, 0.1412]) -Greedy action tensor([ 0.2743, 0.0908, -0.1174, -0.2240]) tensor([0.3209, 0.2671, 0.2169, 0.1950]) -Greedy action tensor([ 0.8223, -0.5555, 0.0393, -0.4217]) tensor([0.5007, 0.1262, 0.2288, 0.1443]) -Greedy action tensor([ 0.7439, -0.3953, -0.0818, -0.3471]) tensor([0.4776, 0.1529, 0.2091, 0.1604]) -Greedy action tensor([ 0.6855, -0.3085, -0.0125, -0.3210]) tensor([0.4478, 0.1657, 0.2228, 0.1637]) -Greedy action tensor([ 0.4833, -0.1408, 0.1232, -0.2914]) tensor([0.3712, 0.1989, 0.2589, 0.1710]) -Greedy action tensor([ 0.5882, -0.3859, -0.1085, -0.4489]) tensor([0.4484, 0.1693, 0.2234, 0.1589]) -Greedy action tensor([ 0.5369, -0.1332, -0.0291, -0.3491]) tensor([0.4013, 0.2053, 0.2279, 0.1655]) -Greedy action tensor([ 0.9710, -0.1891, -0.0864, -0.3503]) tensor([0.5188, 0.1626, 0.1802, 0.1384]) -Greedy action tensor([ 0.7298, -0.0133, -0.0161, -0.2388]) tensor([0.4293, 0.2042, 0.2036, 0.1630]) -Greedy action tensor([ 0.5005, 0.0135, -0.0568, -0.1537]) tensor([0.3694, 0.2270, 0.2116, 0.1920]) -Greedy action tensor([ 0.2228, 0.1384, 0.0582, -0.1982]) tensor([0.2921, 0.2684, 0.2478, 0.1917]) -Greedy action tensor([ 0.5307, -0.2714, 0.2912, -0.4929]) tensor([0.3854, 0.1728, 0.3033, 0.1385]) -Greedy action tensor([ 0.0202, 0.1444, -0.0605, -0.0913]) tensor([0.2532, 0.2867, 0.2336, 0.2265]) -Greedy action tensor([ 1.3271, -1.0513, 0.0497, -0.9925]) tensor([0.6804, 0.0631, 0.1897, 0.0669]) -Greedy action tensor([ 0.2862, -0.0110, 0.0466, -0.1023]) tensor([0.3117, 0.2316, 0.2453, 0.2114]) -Greedy action tensor([ 0.7755, -0.3736, -0.0880, -0.5292]) tensor([0.4976, 0.1577, 0.2098, 0.1350]) -Greedy action tensor([ 0.6680, 0.0087, 0.0484, -0.5717]) tensor([0.4265, 0.2206, 0.2295, 0.1234]) -Greedy action tensor([ 0.3205, 0.1356, 0.0519, -0.0926]) tensor([0.3070, 0.2552, 0.2347, 0.2031]) -Greedy action tensor([ 0.9291, -0.8412, -0.1053, -0.3484]) tensor([0.5542, 0.0944, 0.1970, 0.1545]) -Greedy action tensor([ 0.4295, -0.2052, -0.0667, -0.3545]) tensor([0.3853, 0.2042, 0.2346, 0.1759]) -Greedy action tensor([ 0.7532, -0.3673, 0.0081, -0.3495]) tensor([0.4689, 0.1529, 0.2226, 0.1557]) -Greedy action tensor([ 0.9551, 0.2694, -0.1499, -0.3706]) tensor([0.4761, 0.2398, 0.1577, 0.1264]) -Greedy action tensor([ 0.5953, -0.2736, -0.0894, -0.2107]) tensor([0.4219, 0.1769, 0.2127, 0.1884]) -Greedy action tensor([ 0.8139, -0.3870, 0.0882, -0.6047]) tensor([0.4934, 0.1485, 0.2388, 0.1194]) -Greedy action tensor([ 0.5413, -0.1655, 0.0985, -0.2618]) tensor([0.3871, 0.1909, 0.2486, 0.1734]) -Greedy action tensor([ 0.8060, -0.3819, -0.0858, -0.4597]) tensor([0.5008, 0.1527, 0.2053, 0.1412]) -Greedy action tensor([ 0.6055, -0.1244, -0.0921, -0.2860]) tensor([0.4185, 0.2017, 0.2083, 0.1716]) -Greedy action tensor([ 0.7449, -0.5083, 0.1352, -0.7374]) tensor([0.4863, 0.1389, 0.2643, 0.1105]) -Greedy action tensor([ 0.5517, -0.5406, 0.2827, -0.5484]) tensor([0.4111, 0.1379, 0.3142, 0.1368]) -Greedy action tensor([ 0.7015, -0.5061, -0.0862, -0.6781]) tensor([0.4986, 0.1491, 0.2268, 0.1255]) -Greedy action tensor([-1.3884, -0.5938, 0.9759, 0.9909]) tensor([0.0406, 0.0898, 0.4315, 0.4381]) -Greedy action tensor([-0.9326, -0.2851, 0.8950, 1.3039]) tensor([0.0541, 0.1033, 0.3363, 0.5062]) -Greedy action tensor([-1.2789, -0.5316, 0.3025, 0.2583]) tensor([0.0792, 0.1672, 0.3851, 0.3685]) -Greedy action tensor([-0.7565, -0.1832, 0.0697, 0.1642]) tensor([0.1321, 0.2344, 0.3018, 0.3317]) -Greedy action tensor([-1.8015, -0.5493, 0.7074, 0.0134]) tensor([0.0436, 0.1526, 0.5360, 0.2678]) -Greedy action tensor([-0.4233, -0.4797, 0.2874, 0.2934]) tensor([0.1659, 0.1568, 0.3376, 0.3397]) -Greedy action tensor([-1.5910, -0.4791, 0.3896, -0.0654]) tensor([0.0630, 0.1914, 0.4562, 0.2895]) -Greedy action tensor([-1.0502, -0.6033, 0.4602, 0.8059]) tensor([0.0741, 0.1159, 0.3357, 0.4743]) -Greedy action tensor([-1.7129, -0.3950, 0.5361, -0.0727]) tensor([0.0516, 0.1929, 0.4893, 0.2662]) -Greedy action tensor([-1.3777, -0.5215, 0.3798, 0.2038]) tensor([0.0714, 0.1680, 0.4137, 0.3469]) -Greedy action tensor([-1.8148, -0.9036, 0.2308, -0.3491]) tensor([0.0643, 0.1599, 0.4973, 0.2785]) -Greedy action tensor([-1.8708, -0.4318, 0.6329, -0.1241]) tensor([0.0431, 0.1819, 0.5275, 0.2474]) -Greedy action tensor([-1.2512, -0.7661, 0.7657, 0.1769]) tensor([0.0699, 0.1135, 0.5252, 0.2915]) -Greedy action tensor([-0.5501, -0.1091, 0.6045, 1.2213]) tensor([0.0862, 0.1339, 0.2734, 0.5066]) -Greedy action tensor([-0.8224, 0.1837, -0.2245, -0.0920]) tensor([0.1311, 0.3585, 0.2383, 0.2721]) -Greedy action tensor([-0.2891, -0.2340, 1.1395, 1.6631]) tensor([0.0753, 0.0796, 0.3144, 0.5307]) -Greedy action tensor([-1.8063, -0.4489, 0.5967, -0.0855]) tensor([0.0464, 0.1805, 0.5135, 0.2596]) -Greedy action tensor([-1.8288, -0.4804, 0.6492, -0.0401]) tensor([0.0440, 0.1693, 0.5238, 0.2629]) -Greedy action tensor([-1.6088, -0.5612, 0.6009, 0.1470]) tensor([0.0533, 0.1520, 0.4860, 0.3087]) -Greedy action tensor([-0.8482, 0.4410, 0.2307, -0.1327]) tensor([0.1040, 0.3775, 0.3059, 0.2127]) -Greedy action tensor([-0.1961, 0.2938, 0.1879, 0.3894]) tensor([0.1696, 0.2768, 0.2490, 0.3046]) -Greedy action tensor([-1.0799, -0.2591, 0.5575, -0.6047]) tensor([0.0998, 0.2267, 0.5130, 0.1605]) -Greedy action tensor([-1.4509, -0.5934, 0.4038, 0.1195]) tensor([0.0687, 0.1619, 0.4390, 0.3304]) -Greedy action tensor([-1.6017, -0.7522, 0.3027, -0.0037]) tensor([0.0667, 0.1559, 0.4478, 0.3296]) -Greedy action tensor([-1.4308, -0.5527, 0.4073, 0.1881]) tensor([0.0678, 0.1633, 0.4264, 0.3425]) -Greedy action tensor([-1.8590, -0.4010, 0.5818, -0.0074]) tensor([0.0432, 0.1856, 0.4960, 0.2752]) -Greedy action tensor([-1.3923, -0.4715, 0.5180, 0.5845]) tensor([0.0572, 0.1436, 0.3863, 0.4129]) -Greedy action tensor([0.0504, 0.1051, 0.8530, 1.7653]) tensor([0.1016, 0.1073, 0.2267, 0.5644]) -Greedy action tensor([-2.0599, -0.9104, 1.2021, 0.5708]) tensor([0.0227, 0.0715, 0.5913, 0.3145]) -Greedy action tensor([-0.3666, -0.1890, 1.0290, 1.5952]) tensor([0.0749, 0.0895, 0.3026, 0.5330]) -Greedy action tensor([-0.7305, 0.9409, -0.0534, 0.3566]) tensor([0.0889, 0.4727, 0.1749, 0.2635]) -Greedy action tensor([-1.3136, -0.0621, 0.0463, -0.4643]) tensor([0.0932, 0.3258, 0.3631, 0.2179]) -Greedy action tensor([-1.6663, -0.5059, 0.5027, 0.0165]) tensor([0.0546, 0.1742, 0.4776, 0.2937]) -Greedy action tensor([-1.8418, -0.4719, 0.6087, -0.1284]) tensor([0.0453, 0.1782, 0.5252, 0.2513]) -Greedy action tensor([-1.5682, -0.0132, 0.6652, 0.6149]) tensor([0.0418, 0.1978, 0.3898, 0.3707]) -Greedy action tensor([-1.8482, -0.6611, 0.0761, -0.3371]) tensor([0.0639, 0.2093, 0.4375, 0.2894]) -Greedy action tensor([-0.7656, -0.5361, 0.8303, 1.1596]) tensor([0.0712, 0.0895, 0.3512, 0.4881]) -Greedy action tensor([-0.3650, -0.2544, 0.2053, 0.6698]) tensor([0.1492, 0.1667, 0.2640, 0.4201]) -Greedy action tensor([-1.0203, -0.5707, 0.2411, 0.3515]) tensor([0.0996, 0.1561, 0.3516, 0.3927]) -Greedy action tensor([-1.8019, -0.4925, 0.5921, -0.0871]) tensor([0.0471, 0.1746, 0.5165, 0.2618]) -Greedy action tensor([-1.7948, -0.4881, 1.2953, 0.9020]) tensor([0.0241, 0.0890, 0.5295, 0.3574]) -Greedy action tensor([-0.6316, 0.9981, 0.7250, 1.2701]) tensor([0.0599, 0.3059, 0.2328, 0.4014]) -Greedy action tensor([-1.5037, -0.5385, 0.4168, 0.1421]) tensor([0.0640, 0.1679, 0.4365, 0.3316]) -Greedy action tensor([-0.4814, -0.1507, 0.8159, 1.4711]) tensor([0.0764, 0.1063, 0.2794, 0.5380]) -Greedy action tensor([-1.0768, 0.3933, 0.2883, 0.0634]) tensor([0.0807, 0.3510, 0.3160, 0.2523]) -Greedy action tensor([-0.5745, -0.5551, 0.2073, 0.1612]) tensor([0.1589, 0.1621, 0.3473, 0.3317]) -Greedy action tensor([-1.6320, -0.4951, 0.5391, 0.0910]) tensor([0.0541, 0.1686, 0.4743, 0.3030]) -Greedy action tensor([-1.0461, -0.5456, 0.2156, 0.3731]) tensor([0.0969, 0.1599, 0.3424, 0.4008]) -Greedy action tensor([-1.1210, -0.3426, 0.5226, 0.9948]) tensor([0.0601, 0.1308, 0.3108, 0.4983]) -Greedy action tensor([-1.4581, -0.3327, 0.2732, 0.0087]) tensor([0.0711, 0.2191, 0.4016, 0.3083]) -Greedy action tensor([-0.3926, -0.4531, 0.2215, 0.2151]) tensor([0.1778, 0.1673, 0.3285, 0.3264]) -Greedy action tensor([-0.7258, -0.5358, 0.2697, 0.1003]) tensor([0.1389, 0.1680, 0.3759, 0.3173]) -Greedy action tensor([-1.1255, -0.5000, 0.2717, 0.6422]) tensor([0.0783, 0.1464, 0.3167, 0.4587]) -Greedy action tensor([-1.6915e+00, -4.5895e-01, 5.3651e-01, -8.0323e-04]) tensor([0.0523, 0.1793, 0.4851, 0.2834]) -Greedy action tensor([-1.8412, -0.4498, 0.6210, -0.1089]) tensor([0.0446, 0.1795, 0.5236, 0.2523]) -Greedy action tensor([-1.7412, -0.4961, 0.5778, -0.0588]) tensor([0.0500, 0.1735, 0.5079, 0.2687]) -Greedy action tensor([-1.3284, -0.5006, 0.4653, -0.1339]) tensor([0.0794, 0.1816, 0.4770, 0.2620]) -Greedy action tensor([-0.8452, -0.5002, 0.9074, 1.4779]) tensor([0.0544, 0.0768, 0.3138, 0.5551]) -Greedy action tensor([-1.3835, -0.0327, 0.5352, 0.6141]) tensor([0.0525, 0.2027, 0.3577, 0.3871]) -Greedy action tensor([-1.3665, -0.6073, 0.5543, 0.5274]) tensor([0.0602, 0.1286, 0.4110, 0.4001]) -Greedy action tensor([-1.5690, -0.5004, 0.4942, 0.0660]) tensor([0.0591, 0.1722, 0.4654, 0.3033]) -Greedy action tensor([-1.1193, -0.7695, 0.6340, 0.2787]) tensor([0.0817, 0.1159, 0.4717, 0.3306]) -Greedy action tensor([-1.7183, -0.3609, 0.5168, -0.0413]) tensor([0.0511, 0.1984, 0.4773, 0.2732]) -Greedy action tensor([-1.3640, -0.7556, 0.7583, 0.5777]) tensor([0.0551, 0.1012, 0.4599, 0.3839]) -Greedy action tensor([-1.3851, -0.5625, 0.8610, 1.0295]) tensor([0.0418, 0.0952, 0.3952, 0.4678]) -Greedy action tensor([-1.7130, -0.4599, 0.8041, 0.5349]) tensor([0.0379, 0.1328, 0.4701, 0.3592]) -Greedy action tensor([-1.3788, -0.5929, 0.8667, 0.9766]) tensor([0.0431, 0.0947, 0.4074, 0.4548]) -Greedy action tensor([-1.4567, -0.4708, 0.6020, 0.6227]) tensor([0.0512, 0.1373, 0.4015, 0.4099]) -Greedy action tensor([-0.9881, -0.5630, 1.0072, 1.4574]) tensor([0.0467, 0.0714, 0.3433, 0.5386]) -Greedy action tensor([-1.7782, -0.6087, 0.4931, -0.0917]) tensor([0.0518, 0.1668, 0.5019, 0.2796]) -Greedy action tensor([-1.6590, -0.3958, 1.0272, 0.9107]) tensor([0.0310, 0.1096, 0.4547, 0.4047]) -Greedy action tensor([-1.2116, -0.5657, 0.3734, 0.6094]) tensor([0.0716, 0.1366, 0.3494, 0.4424]) -Greedy action tensor([-1.8979, -0.8308, 0.2164, -0.2859]) tensor([0.0581, 0.1690, 0.4815, 0.2914]) -Greedy action tensor([-1.1470, -0.5541, 0.2410, 0.3708]) tensor([0.0879, 0.1590, 0.3522, 0.4010]) -Greedy action tensor([-1.8965, -0.6744, 0.6067, -0.0306]) tensor([0.0433, 0.1471, 0.5296, 0.2800]) -Greedy action tensor([-1.3901, -0.1197, -0.1711, -0.4234]) tensor([0.0946, 0.3368, 0.3200, 0.2486]) -Greedy action tensor([-1.5796, -0.5141, 0.8311, 0.5428]) tensor([0.0427, 0.1241, 0.4763, 0.3569]) -Greedy action tensor([-1.0979, 0.3961, 0.4890, -0.4820]) tensor([0.0820, 0.3653, 0.4009, 0.1518]) -Greedy action tensor([-0.7353, -0.3977, 0.2265, 0.2887]) tensor([0.1282, 0.1796, 0.3353, 0.3569]) -Greedy action tensor([-0.5860, 0.2558, 0.2506, 0.4447]) tensor([0.1186, 0.2752, 0.2738, 0.3324]) -Greedy action tensor([-0.8763, -0.3991, 1.1372, 1.4675]) tensor([0.0487, 0.0785, 0.3650, 0.5078]) -Greedy action tensor([ 0.6714, 0.3817, 0.2351, -0.1396]) tensor([0.3522, 0.2636, 0.2277, 0.1565]) -Greedy action tensor([-0.7677, 1.0677, 0.0627, 0.0433]) tensor([0.0847, 0.5306, 0.1942, 0.1905]) -Greedy action tensor([ 0.3585, 0.9431, 0.4319, -0.9552]) tensor([0.2416, 0.4335, 0.2600, 0.0649]) -Greedy action tensor([ 0.6294, 0.5459, -0.3806, 1.2534]) tensor([0.2409, 0.2216, 0.0878, 0.4497]) -Greedy action tensor([1.1700, 0.5429, 0.2548, 1.2200]) tensor([0.3349, 0.1789, 0.1341, 0.3521]) -Greedy action tensor([ 1.1822, -0.2601, 1.7237, 1.7857]) tensor([0.2090, 0.0494, 0.3593, 0.3823]) -Greedy action tensor([ 1.7616, -0.6470, 0.4144, 1.2556]) tensor([0.5121, 0.0461, 0.1331, 0.3087]) -Greedy action tensor([-0.0603, 0.1514, -0.3443, 0.2606]) tensor([0.2290, 0.2830, 0.1724, 0.3156]) -Greedy action tensor([ 0.7888, -0.8758, 2.1144, -0.0626]) tensor([0.1859, 0.0352, 0.6996, 0.0793]) -Greedy action tensor([ 2.0356, -1.2448, -0.0451, 0.8609]) tensor([0.6796, 0.0256, 0.0849, 0.2100]) -Greedy action tensor([-0.5528, -0.2079, -0.9849, 1.4206]) tensor([0.0975, 0.1377, 0.0633, 0.7015]) -Greedy action tensor([ 1.4211, -0.1967, 1.4956, 1.1516]) tensor([0.3290, 0.0653, 0.3545, 0.2513]) -Greedy action tensor([ 0.4380, -1.3725, -0.1020, 2.3351]) tensor([0.1189, 0.0194, 0.0693, 0.7924]) -Greedy action tensor([ 1.0285, 0.1841, -0.5631, 0.9788]) tensor([0.3869, 0.1663, 0.0788, 0.3681]) -Greedy action tensor([ 1.6119, -0.7279, 0.3083, 1.2085]) tensor([0.4912, 0.0473, 0.1334, 0.3281]) -Greedy action tensor([ 0.3476, 0.2863, -0.6280, 1.0532]) tensor([0.2303, 0.2166, 0.0868, 0.4663]) -Greedy action tensor([ 0.9082, -0.6580, 0.4888, -0.0387]) tensor([0.4436, 0.0926, 0.2917, 0.1721]) -Greedy action tensor([ 0.6555, 0.4148, -0.3304, -0.0749]) tensor([0.3787, 0.2976, 0.1413, 0.1824]) -Greedy action tensor([ 1.1337, -0.4421, 2.3251, 0.7757]) tensor([0.1924, 0.0398, 0.6333, 0.1345]) -Greedy action tensor([ 0.3949, -0.7334, 1.6353, -0.5948]) tensor([0.1941, 0.0628, 0.6710, 0.0721]) -Greedy action tensor([ 0.3037, 0.2552, -0.8582, 1.2667]) tensor([0.2047, 0.1950, 0.0640, 0.5362]) -Greedy action tensor([ 1.0869, -0.3964, -0.6442, -0.0544]) tensor([0.5803, 0.1316, 0.1028, 0.1853]) -Greedy action tensor([ 0.2464, -0.3199, 0.2409, -0.4469]) tensor([0.3266, 0.1854, 0.3248, 0.1633]) -Greedy action tensor([ 0.4844, -0.4290, 1.0131, -0.0307]) tensor([0.2706, 0.1086, 0.4592, 0.1617]) -Greedy action tensor([-0.0304, -0.0879, -0.1419, 1.7039]) tensor([0.1176, 0.1110, 0.1052, 0.6662]) -Greedy action tensor([ 0.6470, -0.5072, -0.6541, 0.7441]) tensor([0.3718, 0.1172, 0.1012, 0.4097]) -Greedy action tensor([ 0.6697, -0.1627, 0.5817, 0.5096]) tensor([0.3122, 0.1358, 0.2859, 0.2660]) -Greedy action tensor([ 0.6753, 0.0962, -0.4027, -0.3891]) tensor([0.4453, 0.2495, 0.1515, 0.1536]) -Greedy action tensor([ 0.3824, 0.1771, 0.7890, -1.0669]) tensor([0.2816, 0.2294, 0.4229, 0.0661]) -Greedy action tensor([ 0.0434, -1.2945, -0.6424, 0.4553]) tensor([0.3053, 0.0801, 0.1538, 0.4609]) -Greedy action tensor([ 0.5603, -0.2232, -0.3754, 0.3354]) tensor([0.3777, 0.1725, 0.1482, 0.3016]) -Greedy action tensor([ 1.0192, -0.6063, 0.5049, 1.0461]) tensor([0.3544, 0.0697, 0.2119, 0.3640]) -Greedy action tensor([ 0.2649, -0.9964, -0.3513, 0.7097]) tensor([0.2956, 0.0837, 0.1596, 0.4611]) -Greedy action tensor([-0.2979, -1.8845, -0.4083, 0.3594]) tensor([0.2482, 0.0508, 0.2222, 0.4788]) -Greedy action tensor([-0.8315, -0.8017, 0.6249, 0.4132]) tensor([0.1021, 0.1052, 0.4381, 0.3545]) -Greedy action tensor([0.2523, 0.0950, 1.4891, 0.1842]) tensor([0.1604, 0.1371, 0.5526, 0.1499]) -Greedy action tensor([-0.0463, 0.6258, 0.8226, 0.0818]) tensor([0.1543, 0.3023, 0.3680, 0.1754]) -Greedy action tensor([ 0.4768, -0.1283, -0.8631, 1.0581]) tensor([0.2781, 0.1518, 0.0728, 0.4973]) -Greedy action tensor([ 0.1720, -0.2990, 0.3608, -0.8794]) tensor([0.3143, 0.1962, 0.3796, 0.1098]) -Greedy action tensor([-0.0150, -0.7910, 2.1273, -0.5130]) tensor([0.0945, 0.0435, 0.8047, 0.0574]) -Greedy action tensor([ 0.6701, -1.2143, -0.1398, 1.1100]) tensor([0.3175, 0.0482, 0.1413, 0.4930]) -Greedy action tensor([0.4569, 0.5730, 0.0017, 0.3844]) tensor([0.2712, 0.3046, 0.1720, 0.2522]) -Greedy action tensor([0.4924, 0.6540, 0.4351, 0.6141]) tensor([0.2353, 0.2766, 0.2222, 0.2658]) -Greedy action tensor([ 0.7198, -1.2925, 0.0435, 0.3935]) tensor([0.4231, 0.0566, 0.2151, 0.3053]) -Greedy action tensor([ 1.0404, -0.7471, 0.9519, 1.0510]) tensor([0.3233, 0.0541, 0.2959, 0.3267]) -Greedy action tensor([ 1.3853, -0.8842, 0.0319, 0.7480]) tensor([0.5290, 0.0547, 0.1367, 0.2797]) -Greedy action tensor([1.2993, 0.0798, 0.6308, 0.7069]) tensor([0.4236, 0.1251, 0.2171, 0.2342]) -Greedy action tensor([ 0.0526, -1.9449, -0.4729, 0.3987]) tensor([0.3184, 0.0432, 0.1883, 0.4501]) -Greedy action tensor([ 0.3340, -0.3339, -0.8158, 1.4773]) tensor([0.2014, 0.1032, 0.0638, 0.6316]) -Greedy action tensor([ 1.1541, 0.7413, -0.7704, 0.4678]) tensor([0.4327, 0.2863, 0.0631, 0.2178]) -Greedy action tensor([ 1.2773, -0.0063, -0.5063, 1.0047]) tensor([0.4532, 0.1256, 0.0761, 0.3451]) -Greedy action tensor([ 0.2857, -1.6932, -0.7649, 0.9262]) tensor([0.2954, 0.0408, 0.1033, 0.5605]) -Greedy action tensor([ 1.7007, -0.7851, -0.2989, 1.2395]) tensor([0.5408, 0.0450, 0.0732, 0.3410]) -Greedy action tensor([0.9194, 0.1691, 0.7812, 0.8617]) tensor([0.3042, 0.1437, 0.2650, 0.2872]) -Greedy action tensor([ 0.3669, -0.5231, 0.7528, 0.7556]) tensor([0.2295, 0.0943, 0.3376, 0.3386]) -Greedy action tensor([ 0.8963, -1.2910, 2.1419, 0.7874]) tensor([0.1823, 0.0205, 0.6337, 0.1635]) -Greedy action tensor([ 0.5207, 0.7697, -0.2018, 1.0265]) tensor([0.2259, 0.2898, 0.1097, 0.3746]) -Greedy action tensor([ 1.8751, -0.9610, 0.1885, 1.7381]) tensor([0.4726, 0.0277, 0.0875, 0.4121]) -Greedy action tensor([ 0.7145, 0.3164, -0.0576, -0.2382]) tensor([0.3969, 0.2666, 0.1834, 0.1531]) -Greedy action tensor([ 0.1253, -0.9121, 0.9281, -0.2563]) tensor([0.2342, 0.0830, 0.5228, 0.1599]) -Greedy action tensor([-0.0057, -0.3190, -0.5382, 1.4712]) tensor([0.1493, 0.1092, 0.0877, 0.6539]) -Greedy action tensor([ 0.4957, -0.8489, -0.0136, 1.0268]) tensor([0.2807, 0.0732, 0.1687, 0.4774]) -Greedy action tensor([ 0.9182, 0.7242, -0.7499, -0.0412]) tensor([0.4175, 0.3439, 0.0787, 0.1599]) -Greedy action tensor([ 0.4371, 0.5517, 0.0697, -0.0653]) tensor([0.2925, 0.3280, 0.2025, 0.1770]) -Greedy action tensor([ 0.5249, -0.4319, -0.0672, 0.5728]) tensor([0.3348, 0.1286, 0.1852, 0.3513]) -Greedy action tensor([ 1.3751, 0.1135, -0.2026, 0.5816]) tensor([0.5150, 0.1458, 0.1063, 0.2329]) -Greedy action tensor([0.9775, 1.2561, 0.1706, 0.4143]) tensor([0.2997, 0.3960, 0.1337, 0.1706]) -Greedy action tensor([ 0.5946, -1.1319, -0.3178, 2.1631]) tensor([0.1568, 0.0279, 0.0630, 0.7524]) -Greedy action tensor([ 0.9552, 0.3887, -0.1768, 0.5413]) tensor([0.3920, 0.2225, 0.1264, 0.2591]) -Greedy action tensor([-0.3554, 0.8973, 0.8125, -0.3063]) tensor([0.1141, 0.3993, 0.3668, 0.1198]) -Greedy action tensor([ 1.2842, -0.3529, 0.3363, 0.1370]) tensor([0.5264, 0.1024, 0.2040, 0.1672]) -Greedy action tensor([-1.1656, 0.2066, -0.6439, -0.4674]) tensor([0.1158, 0.4565, 0.1950, 0.2327]) -Greedy action tensor([0.7510, 0.2578, 1.2616, 1.2322]) tensor([0.2043, 0.1248, 0.3404, 0.3305]) -Greedy action tensor([ 0.3094, 0.2156, -0.5783, -0.6329]) tensor([0.3688, 0.3357, 0.1518, 0.1437]) -Greedy action tensor([ 0.1325, -1.0167, 0.1654, 0.6081]) tensor([0.2526, 0.0800, 0.2610, 0.4064]) -Greedy action tensor([ 0.8071, -0.9242, 0.9863, 2.1183]) tensor([0.1644, 0.0291, 0.1966, 0.6099]) -Greedy action tensor([-0.4499, -0.2375, -0.3810, 0.2152]) tensor([0.1904, 0.2354, 0.2040, 0.3702]) -Greedy action tensor([1.1759, 0.1148, 0.0322, 0.8213]) tensor([0.4226, 0.1463, 0.1347, 0.2965]) -Greedy action tensor([ 1.1913, -0.3845, 0.7543, 0.7530]) tensor([0.4003, 0.0828, 0.2586, 0.2583]) -Greedy action tensor([0.5336, 0.6550, 1.0384, 0.3612]) tensor([0.2161, 0.2440, 0.3580, 0.1819]) -Greedy action tensor([ 0.9096, 0.3418, -0.3046, 0.0539]) tensor([0.4369, 0.2476, 0.1297, 0.1857]) -Greedy action tensor([ 0.6335, 0.2007, 0.2469, -0.0924]) tensor([0.3556, 0.2307, 0.2416, 0.1721]) -Greedy action tensor([ 1.1574, -0.3446, -0.3660, -0.0289]) tensor([0.5727, 0.1275, 0.1248, 0.1749]) -Greedy action tensor([ 0.8435, -0.3955, -0.0575, -0.1137]) tensor([0.4808, 0.1393, 0.1953, 0.1846]) -Greedy action tensor([ 1.0528, -0.0944, -0.2431, -0.3290]) tensor([0.5428, 0.1724, 0.1485, 0.1363]) -Greedy action tensor([ 0.8611, -0.1480, -0.4209, 0.0660]) tensor([0.4777, 0.1741, 0.1325, 0.2157]) -Greedy action tensor([ 1.5963, -0.2896, -0.0082, 0.0461]) tensor([0.6390, 0.0969, 0.1284, 0.1356]) -Greedy action tensor([ 0.9829, -0.2489, -0.5379, 0.3747]) tensor([0.4867, 0.1420, 0.1064, 0.2649]) -Greedy action tensor([ 0.4266, -0.2696, -0.4336, 0.0818]) tensor([0.3802, 0.1895, 0.1609, 0.2694]) -Greedy action tensor([ 0.9112, -0.5084, -0.1043, 0.1433]) tensor([0.4836, 0.1169, 0.1752, 0.2244]) -Greedy action tensor([ 0.9010, -0.3217, -0.0826, -0.0362]) tensor([0.4854, 0.1429, 0.1815, 0.1901]) -Greedy action tensor([ 0.8514, -0.3787, -0.1719, 0.0184]) tensor([0.4793, 0.1401, 0.1723, 0.2084]) -Greedy action tensor([ 1.8292, -0.5427, -0.7264, 0.1984]) tensor([0.7317, 0.0683, 0.0568, 0.1432]) -Greedy action tensor([ 0.9374, -0.5008, -0.5260, 0.7899]) tensor([0.4289, 0.1018, 0.0993, 0.3701]) -Greedy action tensor([ 0.7706, -0.3467, -0.0121, -0.0889]) tensor([0.4530, 0.1482, 0.2071, 0.1918]) -Greedy action tensor([ 1.2791, -0.5098, 0.1831, -0.0269]) tensor([0.5642, 0.0943, 0.1886, 0.1529]) -Greedy action tensor([ 1.3472, -0.1471, -0.2957, -0.1064]) tensor([0.6055, 0.1359, 0.1171, 0.1415]) -Greedy action tensor([ 1.3824, -0.7717, -0.1160, 0.2036]) tensor([0.6071, 0.0704, 0.1357, 0.1868]) -Greedy action tensor([ 1.2574, -0.3662, -0.4971, 0.3572]) tensor([0.5628, 0.1110, 0.0974, 0.2288]) -Greedy action tensor([ 1.0438, -0.5426, -0.3004, 0.4874]) tensor([0.4905, 0.1004, 0.1279, 0.2812]) -Greedy action tensor([ 1.2771, -0.7405, -0.4144, 0.7290]) tensor([0.5276, 0.0702, 0.0972, 0.3050]) -Greedy action tensor([ 0.7590, -0.1971, -0.1608, 0.2126]) tensor([0.4234, 0.1627, 0.1688, 0.2451]) -Greedy action tensor([ 0.9296, 0.0015, 0.0961, -0.3891]) tensor([0.4768, 0.1885, 0.2072, 0.1275]) -Greedy action tensor([ 0.8922, -0.2680, -0.3709, 0.2884]) tensor([0.4667, 0.1463, 0.1320, 0.2551]) -Greedy action tensor([ 0.6154, -0.4546, -0.1750, 0.2457]) tensor([0.4020, 0.1379, 0.1824, 0.2777]) -Greedy action tensor([ 0.8173, -0.2302, 0.0549, -0.0071]) tensor([0.4433, 0.1555, 0.2068, 0.1944]) -Greedy action tensor([ 0.9255, -0.2563, -0.1895, 0.1307]) tensor([0.4793, 0.1470, 0.1572, 0.2165]) -Greedy action tensor([ 1.1805, -0.7213, -0.2360, 0.6597]) tensor([0.5036, 0.0752, 0.1221, 0.2991]) -Greedy action tensor([ 0.8366, -0.0122, -0.0528, -0.2356]) tensor([0.4585, 0.1962, 0.1884, 0.1569]) -Greedy action tensor([ 0.9755, -0.6045, -0.6716, 0.8622]) tensor([0.4364, 0.0899, 0.0841, 0.3897]) -Greedy action tensor([ 1.1471, -0.3653, -0.3250, 0.0484]) tensor([0.5608, 0.1236, 0.1287, 0.1869]) -Greedy action tensor([ 1.3785, -0.4916, -0.4454, 0.5844]) tensor([0.5658, 0.0872, 0.0913, 0.2557]) -Greedy action tensor([ 0.9505, -0.3327, 0.2561, -0.1392]) tensor([0.4733, 0.1312, 0.2364, 0.1592]) -Greedy action tensor([ 0.9381, -0.4673, -0.4478, -0.0352]) tensor([0.5338, 0.1309, 0.1335, 0.2017]) -Greedy action tensor([ 0.5000, -0.2487, -0.0781, 0.0664]) tensor([0.3728, 0.1763, 0.2092, 0.2417]) -Greedy action tensor([ 0.9480, -0.6962, -0.1974, 0.2356]) tensor([0.4996, 0.0965, 0.1589, 0.2450]) -Greedy action tensor([ 1.0221, -0.4812, -0.1896, -0.1527]) tensor([0.5468, 0.1216, 0.1628, 0.1689]) -Greedy action tensor([ 1.3393, -0.4823, -0.1708, -0.1194]) tensor([0.6191, 0.1001, 0.1367, 0.1440]) -Greedy action tensor([ 1.0834, -0.3302, -0.4693, -0.2718]) tensor([0.5838, 0.1420, 0.1236, 0.1506]) -Greedy action tensor([ 1.1778, -0.3594, -0.2851, -0.0058]) tensor([0.5705, 0.1227, 0.1321, 0.1747]) -Greedy action tensor([ 1.0130, -0.2377, -0.3229, 0.0636]) tensor([0.5165, 0.1479, 0.1358, 0.1999]) -Greedy action tensor([ 1.0364, -0.6683, -0.4752, 0.8384]) tensor([0.4499, 0.0818, 0.0992, 0.3691]) -Greedy action tensor([ 0.8342, -0.3668, -0.0063, -0.0011]) tensor([0.4616, 0.1389, 0.1992, 0.2002]) -Greedy action tensor([ 1.7596, -0.5798, -0.2157, 0.3271]) tensor([0.6785, 0.0654, 0.0941, 0.1620]) -Greedy action tensor([ 0.8214, -0.3843, 0.2005, -0.1049]) tensor([0.4478, 0.1341, 0.2407, 0.1774]) -Greedy action tensor([ 1.6480, -0.2613, -0.2145, -0.1112]) tensor([0.6777, 0.1004, 0.1052, 0.1167]) -Greedy action tensor([ 0.9105, -0.5978, -0.3244, 0.2665]) tensor([0.4908, 0.1086, 0.1428, 0.2578]) -Greedy action tensor([ 0.8554, -0.3499, -0.3135, -0.3767]) tensor([0.5258, 0.1575, 0.1634, 0.1534]) -Greedy action tensor([ 0.8600, -0.3035, -0.1106, -0.1152]) tensor([0.4835, 0.1510, 0.1832, 0.1823]) -Greedy action tensor([ 1.0801, -0.7364, -0.3272, 0.2266]) tensor([0.5454, 0.0887, 0.1335, 0.2323]) -Greedy action tensor([ 1.2383, -0.5250, -0.4570, -0.0831]) tensor([0.6166, 0.1057, 0.1132, 0.1645]) -Greedy action tensor([ 0.9126, -0.4803, -0.2453, 0.3521]) tensor([0.4687, 0.1164, 0.1472, 0.2676]) -Greedy action tensor([ 1.7200, -1.0065, -0.5650, 0.2407]) tensor([0.7168, 0.0469, 0.0730, 0.1633]) -Greedy action tensor([ 1.1392, -0.4024, 0.0442, 0.0392]) tensor([0.5315, 0.1138, 0.1778, 0.1769]) -Greedy action tensor([ 0.8117, -0.3734, -0.2607, 0.2471]) tensor([0.4512, 0.1379, 0.1544, 0.2565]) -Greedy action tensor([ 0.4983, 0.0210, -0.1186, -0.0503]) tensor([0.3652, 0.2266, 0.1971, 0.2110]) -Greedy action tensor([ 1.1251, -0.2467, -0.1282, -0.1059]) tensor([0.5461, 0.1385, 0.1559, 0.1595]) -Greedy action tensor([ 0.8574, -0.2805, -0.0357, -0.0769]) tensor([0.4711, 0.1510, 0.1929, 0.1851]) -Greedy action tensor([ 0.6793, -0.1243, 0.2148, -0.2133]) tensor([0.4023, 0.1801, 0.2528, 0.1648]) -Greedy action tensor([ 0.7293, -0.2629, -0.1080, -0.0339]) tensor([0.4406, 0.1633, 0.1907, 0.2054]) -Greedy action tensor([ 0.7270, -0.3910, -0.0729, 0.0733]) tensor([0.4355, 0.1424, 0.1957, 0.2265]) -Greedy action tensor([ 0.5568, 0.0064, 0.0680, -0.1337]) tensor([0.3715, 0.2143, 0.2279, 0.1863]) -Greedy action tensor([ 0.9168, -0.5076, 0.0517, 0.1721]) tensor([0.4680, 0.1126, 0.1971, 0.2223]) -Greedy action tensor([ 0.9353, -0.3682, -0.3365, 0.1338]) tensor([0.4999, 0.1358, 0.1401, 0.2243]) -Greedy action tensor([ 0.9396, -0.1472, -0.2673, 0.1105]) tensor([0.4824, 0.1627, 0.1443, 0.2105]) -Greedy action tensor([ 0.9036, 0.0101, -0.5046, -0.2128]) tensor([0.5047, 0.2065, 0.1235, 0.1653]) -Greedy action tensor([ 1.1084, -0.1827, -0.1385, -0.2443]) tensor([0.5492, 0.1510, 0.1578, 0.1420]) -Greedy action tensor([ 1.2498, -0.3577, -0.2484, 0.1483]) tensor([0.5694, 0.1141, 0.1273, 0.1892]) -Greedy action tensor([ 1.8471, -0.1098, -0.4508, 0.0850]) tensor([0.7075, 0.1000, 0.0711, 0.1215]) -Greedy action tensor([ 1.4182, -0.0984, -0.2098, -0.0940]) tensor([0.6112, 0.1341, 0.1200, 0.1347]) -Greedy action tensor([ 0.8609, -0.3720, -0.2640, 0.2326]) tensor([0.4652, 0.1356, 0.1510, 0.2482]) -Greedy action tensor([ 0.9906, -0.4411, -0.7187, 0.1769]) tensor([0.5367, 0.1282, 0.0971, 0.2379]) -Greedy action tensor([ 1.5545, -0.6783, -0.5252, -0.0224]) tensor([0.6950, 0.0745, 0.0869, 0.1436]) -Greedy action tensor([ 1.4837, -0.3866, -0.2997, 0.2452]) tensor([0.6204, 0.0956, 0.1043, 0.1798]) -Greedy action tensor([ 0.8814, -0.4155, -0.2345, 0.1651]) tensor([0.4786, 0.1308, 0.1568, 0.2338]) -Greedy action tensor([ 0.6281, -0.3021, -0.2717, 0.0747]) tensor([0.4209, 0.1660, 0.1711, 0.2420]) -Greedy action tensor([ 1.2635, -0.1696, -0.0974, -0.0214]) tensor([0.5644, 0.1347, 0.1447, 0.1562]) -Greedy action tensor([ 1.1362, -0.3456, -0.1091, 0.2139]) tensor([0.5228, 0.1188, 0.1505, 0.2079]) -Greedy action tensor([ 1.0924, -0.2745, -0.2420, 0.1380]) tensor([0.5254, 0.1339, 0.1383, 0.2023]) -Greedy action tensor([ 1.5296, -0.7707, -0.2727, -0.2396]) tensor([0.6966, 0.0698, 0.1149, 0.1187]) -Greedy action tensor([ 1.4123, -0.8265, -0.2681, 0.6515]) tensor([0.5681, 0.0606, 0.1058, 0.2655]) -Greedy action tensor([ 1.6692, -0.1925, -0.1770, 0.0679]) tensor([0.6601, 0.1026, 0.1042, 0.1331]) -Greedy action tensor([ 0.6989, -0.2420, -0.2915, -0.1117]) tensor([0.4533, 0.1769, 0.1683, 0.2015]) -Greedy action tensor([ 0.5073, -0.1304, -0.0321, -0.0968]) tensor([0.3762, 0.1988, 0.2194, 0.2056]) -Greedy action tensor([ 0.2550, -0.0221, 0.0954, -0.2751]) tensor([0.3126, 0.2369, 0.2665, 0.1840]) -Greedy action tensor([ 0.9007, -0.4266, -0.0435, -0.4874]) tensor([0.5253, 0.1393, 0.2043, 0.1311]) -Greedy action tensor([ 0.6130, 0.4051, -0.1627, -0.3314]) tensor([0.3757, 0.3052, 0.1730, 0.1461]) -Greedy action tensor([ 0.5602, -0.1786, -0.0943, -0.2881]) tensor([0.4123, 0.1969, 0.2143, 0.1765]) -Greedy action tensor([ 0.4862, -0.2444, 0.1222, -0.3910]) tensor([0.3857, 0.1858, 0.2680, 0.1604]) -Greedy action tensor([ 0.4726, -0.2068, 0.0733, -0.3635]) tensor([0.3830, 0.1941, 0.2569, 0.1660]) -Greedy action tensor([ 0.6021, -0.0286, 0.0192, -0.2281]) tensor([0.3958, 0.2106, 0.2210, 0.1726]) -Greedy action tensor([ 0.4233, -0.3921, 0.0427, -0.4027]) tensor([0.3901, 0.1726, 0.2666, 0.1708]) -Greedy action tensor([ 0.5408, -0.1527, 0.1013, -0.1685]) tensor([0.3794, 0.1896, 0.2444, 0.1866]) -Greedy action tensor([ 0.7053, -0.4348, -0.0306, -0.5469]) tensor([0.4797, 0.1534, 0.2298, 0.1371]) -Greedy action tensor([ 0.7234, -0.2177, -0.0982, -0.3243]) tensor([0.4586, 0.1789, 0.2016, 0.1608]) -Greedy action tensor([ 0.7850, -0.5548, 0.0544, -0.5617]) tensor([0.4991, 0.1307, 0.2404, 0.1298]) -Greedy action tensor([ 0.5905, -0.4082, -0.0132, -0.1848]) tensor([0.4209, 0.1551, 0.2302, 0.1939]) -Greedy action tensor([ 0.6971, -0.2710, 0.1271, -0.4292]) tensor([0.4406, 0.1674, 0.2492, 0.1429]) -Greedy action tensor([ 0.9199, -0.2835, 0.0854, -0.5085]) tensor([0.5066, 0.1521, 0.2199, 0.1214]) -Greedy action tensor([ 0.2276, 0.3569, -0.0416, 0.1279]) tensor([0.2627, 0.2989, 0.2007, 0.2377]) -Greedy action tensor([ 0.3579, 0.0928, 0.1301, -0.3105]) tensor([0.3251, 0.2494, 0.2589, 0.1666]) -Greedy action tensor([ 0.8004, -0.5033, -0.1917, -0.4668]) tensor([0.5198, 0.1411, 0.1927, 0.1464]) -Greedy action tensor([ 0.4928, -0.1633, -0.0397, -0.2560]) tensor([0.3877, 0.2012, 0.2277, 0.1834]) -Greedy action tensor([ 0.4814, -0.2617, -0.0540, -0.2562]) tensor([0.3938, 0.1873, 0.2305, 0.1883]) -Greedy action tensor([ 0.8517, -0.4410, 0.0832, -0.3093]) tensor([0.4875, 0.1338, 0.2260, 0.1527]) -Greedy action tensor([ 0.6045, -0.5637, 0.2310, -0.4928]) tensor([0.4286, 0.1333, 0.2950, 0.1431]) -Greedy action tensor([ 0.6253, -0.3500, -0.1111, -0.2492]) tensor([0.4400, 0.1659, 0.2107, 0.1835]) -Greedy action tensor([ 0.5834, 0.0050, -0.0859, -0.2559]) tensor([0.3992, 0.2239, 0.2044, 0.1725]) -Greedy action tensor([ 0.4518, -0.1766, 0.0051, -0.2834]) tensor([0.3770, 0.2011, 0.2412, 0.1807]) -Greedy action tensor([ 0.5162, -0.2069, 0.0235, -0.4205]) tensor([0.4019, 0.1950, 0.2456, 0.1575]) -Greedy action tensor([ 0.3876, -0.2397, 0.1375, -0.3710]) tensor([0.3596, 0.1920, 0.2800, 0.1684]) -Greedy action tensor([ 0.5458, -0.4156, 0.0441, -0.3114]) tensor([0.4146, 0.1585, 0.2510, 0.1759]) -Greedy action tensor([ 0.5000, 0.1225, 0.0237, -0.1431]) tensor([0.3531, 0.2421, 0.2193, 0.1856]) -Greedy action tensor([ 0.3643, 0.0917, -0.0648, -0.1819]) tensor([0.3343, 0.2545, 0.2176, 0.1936]) -Greedy action tensor([ 0.5702, -0.1958, 0.0109, -0.4038]) tensor([0.4142, 0.1926, 0.2368, 0.1564]) -Greedy action tensor([ 0.6569, -0.3337, -0.0910, -0.4063]) tensor([0.4566, 0.1696, 0.2162, 0.1577]) -Greedy action tensor([ 0.6912, -0.2927, 0.0470, -0.4279]) tensor([0.4493, 0.1680, 0.2359, 0.1467]) -Greedy action tensor([ 0.8799, -0.5892, -0.1301, -0.5046]) tensor([0.5421, 0.1248, 0.1974, 0.1358]) -Greedy action tensor([ 0.3629, 0.0598, -0.0103, -0.1619]) tensor([0.3313, 0.2446, 0.2281, 0.1960]) -Greedy action tensor([ 0.7367, -0.6007, -0.0562, -0.6095]) tensor([0.5063, 0.1329, 0.2291, 0.1317]) -Greedy action tensor([ 0.7301, -0.5746, 0.1622, -0.7154]) tensor([0.4823, 0.1308, 0.2733, 0.1136]) -Greedy action tensor([ 0.3966, 0.0016, -0.0171, -0.1013]) tensor([0.3398, 0.2289, 0.2247, 0.2065]) -Greedy action tensor([ 1.0353, -0.5373, -0.0623, -0.7082]) tensor([0.5827, 0.1209, 0.1944, 0.1019]) -Greedy action tensor([ 0.3841, 0.0413, -0.0766, -0.1134]) tensor([0.3391, 0.2407, 0.2139, 0.2062]) -Greedy action tensor([ 0.7200, -0.3252, -0.0695, -0.3351]) tensor([0.4643, 0.1633, 0.2108, 0.1616]) -Greedy action tensor([ 0.9172, -0.4264, 0.0122, -0.5832]) tensor([0.5295, 0.1382, 0.2142, 0.1181]) -Greedy action tensor([ 0.8728, -0.5657, -0.2286, -0.5910]) tensor([0.5552, 0.1318, 0.1846, 0.1285]) -Greedy action tensor([ 0.5126, -0.3783, 0.1971, -0.4423]) tensor([0.3961, 0.1625, 0.2889, 0.1524]) -Greedy action tensor([ 0.6817, -0.3887, 0.1149, -0.4154]) tensor([0.4456, 0.1528, 0.2528, 0.1488]) -Greedy action tensor([ 0.0734, -0.0753, -0.1193, -0.1764]) tensor([0.2886, 0.2487, 0.2380, 0.2248]) -Greedy action tensor([ 0.2809, -0.1009, -0.1073, -0.3954]) tensor([0.3485, 0.2379, 0.2364, 0.1772]) -Greedy action tensor([ 0.6703, -0.2629, -0.0121, -0.2229]) tensor([0.4333, 0.1704, 0.2190, 0.1774]) -Greedy action tensor([ 0.5442, -0.1910, -0.1279, -0.3035]) tensor([0.4135, 0.1982, 0.2111, 0.1771]) -Greedy action tensor([ 0.6439, -0.4847, -0.0718, -0.4891]) tensor([0.4685, 0.1516, 0.2290, 0.1509]) -Greedy action tensor([ 0.4837, 0.0192, 0.2736, -0.5200]) tensor([0.3565, 0.2240, 0.2889, 0.1306]) -Greedy action tensor([ 0.4294, -0.1905, -0.0510, -0.2839]) tensor([0.3779, 0.2033, 0.2337, 0.1852]) -Greedy action tensor([ 0.6264, 0.1200, -0.0641, -0.3411]) tensor([0.4026, 0.2426, 0.2018, 0.1530]) -Greedy action tensor([ 0.6121, -0.2503, 0.0140, -0.5720]) tensor([0.4390, 0.1853, 0.2414, 0.1343]) -Greedy action tensor([ 1.3624, -1.1624, 0.0926, -0.5412]) tensor([0.6622, 0.0530, 0.1860, 0.0987]) -Greedy action tensor([ 0.5993, -0.2354, 0.0353, -0.2563]) tensor([0.4119, 0.1787, 0.2343, 0.1751]) -Greedy action tensor([ 0.4712, -0.1070, -0.1055, -0.3323]) tensor([0.3890, 0.2182, 0.2185, 0.1742]) -Greedy action tensor([ 0.3241, -0.1538, 0.2014, -0.2577]) tensor([0.3264, 0.2024, 0.2887, 0.1824]) -Greedy action tensor([ 0.5843, -0.0349, 0.0732, -0.3795]) tensor([0.3969, 0.2137, 0.2381, 0.1514]) -Greedy action tensor([ 8.9949e-01, -3.5043e-01, -8.7014e-04, -5.1756e-01]) tensor([0.5167, 0.1480, 0.2100, 0.1253]) -Greedy action tensor([ 0.4543, 0.0220, 0.0540, -0.3047]) tensor([0.3588, 0.2329, 0.2404, 0.1679]) -Greedy action tensor([ 0.1534, -0.1482, -0.2566, -0.4739]) tensor([0.3405, 0.2518, 0.2259, 0.1818]) -Greedy action tensor([ 0.4426, -0.2901, -0.0594, -0.3881]) tensor([0.3966, 0.1906, 0.2401, 0.1728]) -Greedy action tensor([ 0.8861, -0.5870, -0.0681, -0.5843]) tensor([0.5423, 0.1243, 0.2088, 0.1246]) -Greedy action tensor([ 1.0741, -0.8991, -0.2180, -0.7512]) tensor([0.6350, 0.0883, 0.1744, 0.1023]) -Greedy action tensor([ 0.7029, -0.5321, 0.0679, -0.3561]) tensor([0.4613, 0.1342, 0.2445, 0.1600]) -Greedy action tensor([ 0.6016, -0.3782, -0.0324, -0.2042]) tensor([0.4251, 0.1596, 0.2255, 0.1899]) -Greedy action tensor([ 0.6006, 0.0561, 0.0259, -0.3597]) tensor([0.3959, 0.2297, 0.2228, 0.1515]) -Greedy action tensor([ 0.9837, -0.6842, 0.0068, -0.5879]) tensor([0.5641, 0.1064, 0.2124, 0.1172]) -Greedy action tensor([ 0.3137, -0.0025, -0.2085, -0.0549]) tensor([0.3318, 0.2419, 0.1968, 0.2295]) -Greedy action tensor([ 0.3543, 0.2299, -0.0626, -0.0565]) tensor([0.3120, 0.2755, 0.2056, 0.2069]) -Greedy action tensor([ 0.6744, -0.2015, -0.0763, -0.1451]) tensor([0.4293, 0.1788, 0.2027, 0.1892]) -Greedy action tensor([ 0.3518, -0.2069, -0.1220, -0.3444]) tensor([0.3713, 0.2124, 0.2312, 0.1851]) -Greedy action tensor([ 0.4911, -0.3046, -0.1520, -0.4167]) tensor([0.4201, 0.1896, 0.2208, 0.1695]) -Greedy action tensor([ 0.3647, 0.0465, -0.0038, -0.3823]) tensor([0.3457, 0.2514, 0.2391, 0.1638]) -Greedy action tensor([ 0.5820, -0.0619, -0.1203, -0.3636]) tensor([0.4151, 0.2180, 0.2057, 0.1612]) -Greedy action tensor([ 0.4679, -0.1017, -0.1093, -0.0425]) tensor([0.3666, 0.2074, 0.2059, 0.2201]) -Greedy action tensor([ 1.0605, -0.2811, 0.1186, -0.6471]) tensor([0.5457, 0.1427, 0.2127, 0.0989]) -Greedy action tensor([ 0.5753, -0.1975, -0.0883, -0.2961]) tensor([0.4175, 0.1928, 0.2150, 0.1747]) -Greedy action tensor([ 0.6774, -0.2286, -0.0423, -0.4545]) tensor([0.4518, 0.1826, 0.2200, 0.1457]) -Greedy action tensor([-1.3435, -0.6269, 0.7835, 0.8138]) tensor([0.0498, 0.1019, 0.4177, 0.4306]) -Greedy action tensor([-1.6281, -0.5947, -0.0564, -0.3738]) tensor([0.0824, 0.2317, 0.3969, 0.2890]) -Greedy action tensor([-0.9003, -0.4667, 0.7612, 1.3919]) tensor([0.0565, 0.0871, 0.2975, 0.5589]) -Greedy action tensor([-1.9083, -0.5658, 0.7591, 0.1299]) tensor([0.0372, 0.1423, 0.5353, 0.2853]) -Greedy action tensor([-1.5523, -0.1860, 0.3978, 0.4188]) tensor([0.0523, 0.2050, 0.3675, 0.3753]) -Greedy action tensor([-0.8146, -0.2354, 1.3747, 1.3861]) tensor([0.0482, 0.0860, 0.4304, 0.4354]) -Greedy action tensor([-1.9136, -0.8927, 0.2423, -0.2307]) tensor([0.0562, 0.1560, 0.4853, 0.3024]) -Greedy action tensor([-0.9333, -0.0820, -0.1467, -0.4459]) tensor([0.1395, 0.3269, 0.3064, 0.2272]) -Greedy action tensor([-1.9048, -0.4577, 0.6780, -0.1300]) tensor([0.0410, 0.1743, 0.5428, 0.2419]) -Greedy action tensor([-1.8598, -0.4357, 0.8647, 0.3847]) tensor([0.0335, 0.1392, 0.5110, 0.3162]) -Greedy action tensor([-1.2485, 0.2667, -0.4345, -0.5091]) tensor([0.1010, 0.4595, 0.2279, 0.2115]) -Greedy action tensor([-2.0177, -0.8609, 0.3402, -0.2531]) tensor([0.0486, 0.1545, 0.5133, 0.2836]) -Greedy action tensor([-1.0327, -0.6827, 0.6356, 1.1447]) tensor([0.0604, 0.0858, 0.3205, 0.5333]) -Greedy action tensor([-1.7371, -0.5016, 0.5461, -0.0328]) tensor([0.0506, 0.1742, 0.4967, 0.2784]) -Greedy action tensor([-0.4140, -0.1636, 1.0512, 1.6131]) tensor([0.0704, 0.0904, 0.3047, 0.5345]) -Greedy action tensor([-1.9247, -0.9284, 0.6707, 0.1087]) tensor([0.0404, 0.1094, 0.5415, 0.3087]) -Greedy action tensor([-1.0073, -0.5169, 0.2201, 0.6020]) tensor([0.0905, 0.1479, 0.3090, 0.4526]) -Greedy action tensor([-1.4169, -0.4154, 1.0849, 1.0747]) tensor([0.0357, 0.0972, 0.4358, 0.4313]) -Greedy action tensor([-1.3441, -0.5859, 0.7227, 0.9153]) tensor([0.0485, 0.1036, 0.3833, 0.4647]) -Greedy action tensor([-2.0126, -0.9254, 0.6694, -0.0060]) tensor([0.0384, 0.1140, 0.5617, 0.2859]) -Greedy action tensor([-1.4210, -0.4451, 1.0020, 1.0837]) tensor([0.0368, 0.0976, 0.4151, 0.4504]) -Greedy action tensor([-1.7224, -0.3675, 0.5182, -0.0438]) tensor([0.0509, 0.1974, 0.4787, 0.2729]) -Greedy action tensor([-1.0787, -0.5583, 0.2654, 0.5405]) tensor([0.0865, 0.1455, 0.3315, 0.4365]) -Greedy action tensor([-1.5898, -0.4634, 0.6430, 0.2565]) tensor([0.0506, 0.1562, 0.4723, 0.3209]) -Greedy action tensor([-1.4742, -0.4990, 0.4711, 0.3508]) tensor([0.0593, 0.1574, 0.4152, 0.3681]) -Greedy action tensor([-1.1452, -0.5702, 0.3787, 0.3111]) tensor([0.0858, 0.1524, 0.3937, 0.3680]) -Greedy action tensor([-0.8920, -0.4556, 0.6553, 1.3046]) tensor([0.0616, 0.0953, 0.2893, 0.5538]) -Greedy action tensor([-1.8273e+00, -4.7793e-01, 6.8410e-01, -1.2767e-04]) tensor([0.0427, 0.1648, 0.5267, 0.2657]) -Greedy action tensor([-1.5785, -0.6151, 0.1584, -0.0619]) tensor([0.0722, 0.1891, 0.4099, 0.3288]) -Greedy action tensor([-0.2416, -0.1397, 1.0171, 1.6893]) tensor([0.0798, 0.0884, 0.2811, 0.5506]) -Greedy action tensor([-1.7607, -0.6721, 1.1866, 0.6211]) tensor([0.0295, 0.0877, 0.5629, 0.3198]) -Greedy action tensor([-1.1742, -0.4341, 0.6255, -0.5761]) tensor([0.0912, 0.1912, 0.5517, 0.1659]) -Greedy action tensor([-1.6334, -0.0818, 0.4576, 0.0047]) tensor([0.0527, 0.2489, 0.4269, 0.2714]) -Greedy action tensor([-0.8249, -0.5435, 0.2290, 0.2842]) tensor([0.1216, 0.1611, 0.3488, 0.3686]) -Greedy action tensor([-1.2425, -0.6076, 0.5356, 0.5366]) tensor([0.0679, 0.1281, 0.4018, 0.4022]) -Greedy action tensor([-1.0834, -0.5807, 0.2984, 0.1118]) tensor([0.1006, 0.1663, 0.4006, 0.3324]) -Greedy action tensor([-1.0415, -0.6750, 0.0632, -0.4923]) tensor([0.1390, 0.2006, 0.4196, 0.2408]) -Greedy action tensor([-1.4252, -0.5725, 0.4064, 0.1323]) tensor([0.0698, 0.1636, 0.4355, 0.3311]) -Greedy action tensor([-1.8440, -0.4467, 0.6286, -0.0922]) tensor([0.0441, 0.1784, 0.5230, 0.2544]) -Greedy action tensor([-1.4601, -0.4863, 0.9950, 1.0433]) tensor([0.0363, 0.0962, 0.4232, 0.4442]) -Greedy action tensor([-1.4925, -0.6146, 0.1747, -0.3213]) tensor([0.0838, 0.2017, 0.4441, 0.2704]) -Greedy action tensor([-1.0110, -0.5558, 0.3931, -0.0923]) tensor([0.1092, 0.1722, 0.4448, 0.2738]) -Greedy action tensor([-1.5425, -0.1726, 0.6092, 0.5429]) tensor([0.0463, 0.1823, 0.3984, 0.3729]) -Greedy action tensor([-1.5441, -0.4501, 0.4689, 0.1380]) tensor([0.0594, 0.1772, 0.4443, 0.3191]) -Greedy action tensor([-1.5125, -0.4206, 0.4925, 0.3039]) tensor([0.0570, 0.1697, 0.4230, 0.3503]) -Greedy action tensor([-1.8716, -0.4105, 0.6323, -0.1270]) tensor([0.0430, 0.1853, 0.5257, 0.2460]) -Greedy action tensor([-1.7576, -0.4697, 0.6018, -0.0138]) tensor([0.0478, 0.1732, 0.5058, 0.2733]) -Greedy action tensor([-1.5601, -0.4687, 0.8493, 0.5704]) tensor([0.0425, 0.1266, 0.4730, 0.3579]) -Greedy action tensor([-1.4491, -0.3870, 0.6178, 0.5320]) tensor([0.0525, 0.1519, 0.4149, 0.3807]) -Greedy action tensor([-1.9764, -0.5385, 0.9367, 0.1594]) tensor([0.0312, 0.1312, 0.5738, 0.2637]) -Greedy action tensor([-1.4323, -0.4024, 0.6064, 0.7248]) tensor([0.0497, 0.1392, 0.3816, 0.4296]) -Greedy action tensor([-1.7277, -0.3815, 0.6449, 0.0958]) tensor([0.0460, 0.1766, 0.4929, 0.2846]) -Greedy action tensor([-1.6802, -0.3994, 0.6078, 0.2178]) tensor([0.0473, 0.1704, 0.4665, 0.3158]) -Greedy action tensor([-0.9003, 0.6174, 0.0266, 0.1870]) tensor([0.0905, 0.4126, 0.2286, 0.2683]) -Greedy action tensor([-1.0064, -0.5478, 0.3027, 0.7182]) tensor([0.0841, 0.1330, 0.3113, 0.4716]) -Greedy action tensor([-1.4162, -0.3926, 0.6935, 0.6407]) tensor([0.0504, 0.1402, 0.4154, 0.3940]) -Greedy action tensor([-1.9190, -0.4499, 0.9888, 0.4615]) tensor([0.0290, 0.1261, 0.5313, 0.3136]) -Greedy action tensor([-1.7860, -0.4371, 0.6021, -0.0583]) tensor([0.0468, 0.1803, 0.5096, 0.2633]) -Greedy action tensor([-1.8082, -0.3439, 0.5841, -0.0878]) tensor([0.0458, 0.1979, 0.5006, 0.2557]) -Greedy action tensor([-1.7909, -0.4690, 0.6153, -0.0545]) tensor([0.0465, 0.1743, 0.5154, 0.2638]) -Greedy action tensor([-1.5979, -0.4225, 0.6924, 0.5706]) tensor([0.0437, 0.1417, 0.4321, 0.3825]) -Greedy action tensor([-1.7828, -0.8013, 0.1473, -0.1585]) tensor([0.0640, 0.1707, 0.4407, 0.3246]) -Greedy action tensor([-1.1715, -0.5487, 0.2878, 0.4707]) tensor([0.0811, 0.1511, 0.3489, 0.4189]) -Greedy action tensor([-1.2136, -0.6583, 0.3319, 0.2580]) tensor([0.0848, 0.1478, 0.3978, 0.3695]) -Greedy action tensor([-0.7584, -0.6279, 0.2931, -0.0490]) tensor([0.1422, 0.1620, 0.4069, 0.2890]) -Greedy action tensor([-1.2955, -0.4072, 0.4036, 0.4504]) tensor([0.0683, 0.1661, 0.3738, 0.3917]) -Greedy action tensor([-1.8202, -0.4038, 0.7869, 0.3918]) tensor([0.0359, 0.1482, 0.4875, 0.3284]) -Greedy action tensor([-1.5633, -0.3335, 0.4281, 0.0588]) tensor([0.0595, 0.2035, 0.4358, 0.3012]) -Greedy action tensor([-1.4833, -0.5382, 0.4174, 0.0853]) tensor([0.0664, 0.1708, 0.4441, 0.3187]) -Greedy action tensor([-1.5485, -0.5758, 0.4711, 0.0805]) tensor([0.0614, 0.1625, 0.4629, 0.3132]) -Greedy action tensor([-0.6772, 0.9842, 0.0252, 0.4008]) tensor([0.0891, 0.4692, 0.1798, 0.2618]) -Greedy action tensor([-0.8427, -0.5190, 0.4263, -0.0689]) tensor([0.1233, 0.1705, 0.4388, 0.2674]) -Greedy action tensor([-1.1574, -0.2896, 0.5251, 0.9261]) tensor([0.0596, 0.1418, 0.3203, 0.4783]) -Greedy action tensor([-1.2005, -0.3858, 0.6271, 0.9976]) tensor([0.0541, 0.1222, 0.3364, 0.4873]) -Greedy action tensor([-2.0146, 0.4162, 0.4345, -0.0143]) tensor([0.0319, 0.3628, 0.3695, 0.2359]) -Greedy action tensor([-1.6479, -0.4286, 0.5843, 0.3345]) tensor([0.0477, 0.1614, 0.4446, 0.3463]) -Greedy action tensor([-1.6188, -0.5171, 0.4916, 0.0362]) tensor([0.0572, 0.1720, 0.4717, 0.2991]) -Greedy action tensor([-1.8624, -0.4612, 0.6318, -0.1345]) tensor([0.0439, 0.1781, 0.5312, 0.2469]) -Greedy action tensor([-1.9108, -0.4502, 0.6481, -0.1588]) tensor([0.0417, 0.1796, 0.5385, 0.2403]) -Greedy action tensor([-1.0541, -0.5055, 0.4007, 0.7032]) tensor([0.0781, 0.1351, 0.3344, 0.4525]) -Greedy action tensor([-1.4985, -0.5331, 0.4299, 0.0798]) tensor([0.0651, 0.1711, 0.4481, 0.3157]) -Greedy action tensor([ 0.3062, 0.1997, -0.8277, -0.0695]) tensor([0.3439, 0.3092, 0.1107, 0.2362]) -Greedy action tensor([ 0.4751, -0.9729, 0.3149, -0.2922]) tensor([0.3920, 0.0921, 0.3339, 0.1820]) -Greedy action tensor([ 0.9805, -1.0191, 0.2806, 0.7903]) tensor([0.4067, 0.0551, 0.2020, 0.3363]) -Greedy action tensor([ 0.8548, -0.1124, -0.7026, 0.4096]) tensor([0.4481, 0.1704, 0.0944, 0.2871]) -Greedy action tensor([-0.2716, -1.9222, 0.2518, 1.2869]) tensor([0.1310, 0.0252, 0.2212, 0.6226]) -Greedy action tensor([ 1.0946, -0.0834, 0.4754, 1.1776]) tensor([0.3410, 0.1050, 0.1836, 0.3705]) -Greedy action tensor([ 1.0348, -0.3442, 0.7398, 0.7038]) tensor([0.3684, 0.0928, 0.2743, 0.2646]) -Greedy action tensor([-0.2772, 1.8166, 1.2406, -0.8954]) tensor([0.0703, 0.5709, 0.3209, 0.0379]) -Greedy action tensor([ 0.7717, 0.4001, -0.9535, -0.1355]) tensor([0.4402, 0.3036, 0.0784, 0.1777]) -Greedy action tensor([-2.8991e-02, -1.1883e-03, -1.6472e+00, 7.2045e-01]) tensor([0.2303, 0.2368, 0.0457, 0.4873]) -Greedy action tensor([ 0.8893, -0.5766, 1.0632, -1.1368]) tensor([0.3918, 0.0904, 0.4662, 0.0516]) -Greedy action tensor([ 0.4382, -1.0784, 0.0117, 0.6396]) tensor([0.3231, 0.0709, 0.2109, 0.3951]) -Greedy action tensor([ 1.5814, -0.2064, 1.3144, 1.4759]) tensor([0.3530, 0.0591, 0.2703, 0.3177]) -Greedy action tensor([ 2.3015, -0.6913, 0.8935, 1.3670]) tensor([0.5926, 0.0297, 0.1450, 0.2327]) -Greedy action tensor([ 1.3920, -1.0798, 0.7095, 1.4986]) tensor([0.3700, 0.0312, 0.1870, 0.4117]) -Greedy action tensor([-0.0076, -0.2311, -0.5188, 1.3027]) tensor([0.1638, 0.1310, 0.0982, 0.6071]) -Greedy action tensor([0.3741, 0.2360, 0.0189, 1.5649]) tensor([0.1706, 0.1486, 0.1196, 0.5612]) -Greedy action tensor([1.3839, 0.1057, 0.8630, 1.2616]) tensor([0.3627, 0.1010, 0.2154, 0.3209]) -Greedy action tensor([ 0.8954, 0.5729, -0.5044, 0.5145]) tensor([0.3767, 0.2729, 0.0929, 0.2574]) -Greedy action tensor([ 0.9034, -0.0513, 0.3408, 0.1502]) tensor([0.4123, 0.1587, 0.2349, 0.1941]) -Greedy action tensor([1.2635, 1.2599, 0.2485, 0.7213]) tensor([0.3401, 0.3389, 0.1233, 0.1978]) -Greedy action tensor([ 0.6019, -0.0456, -0.5920, 1.1675]) tensor([0.2788, 0.1459, 0.0845, 0.4908]) -Greedy action tensor([1.5864, 0.4157, 0.1349, 0.5145]) tensor([0.5300, 0.1644, 0.1241, 0.1815]) -Greedy action tensor([-0.0440, 0.6210, -0.6688, 0.9412]) tensor([0.1624, 0.3158, 0.0869, 0.4349]) -Greedy action tensor([-0.7826, -1.7912, -0.3539, 0.8450]) tensor([0.1251, 0.0456, 0.1921, 0.6371]) -Greedy action tensor([0.9390, 0.9573, 0.1880, 0.2320]) tensor([0.3352, 0.3414, 0.1582, 0.1653]) -Greedy action tensor([ 0.9131, -0.1475, 0.3354, 0.6313]) tensor([0.3757, 0.1301, 0.2108, 0.2834]) -Greedy action tensor([ 1.5228, -0.5237, 0.0176, 1.1279]) tensor([0.4938, 0.0638, 0.1096, 0.3327]) -Greedy action tensor([ 1.0841, -0.4405, 1.1816, 0.5540]) tensor([0.3438, 0.0748, 0.3790, 0.2024]) -Greedy action tensor([1.3664, 0.2047, 0.3311, 0.0203]) tensor([0.5186, 0.1623, 0.1842, 0.1350]) -Greedy action tensor([ 0.2279, -0.4156, -0.4992, -0.2043]) tensor([0.3762, 0.1977, 0.1818, 0.2442]) -Greedy action tensor([ 2.5189, -0.5201, 0.6048, 1.6877]) tensor([0.6132, 0.0294, 0.0904, 0.2671]) -Greedy action tensor([ 1.6929, 0.6770, -0.2271, 0.5708]) tensor([0.5452, 0.1974, 0.0799, 0.1775]) -Greedy action tensor([ 0.5053, -1.6357, 0.6084, -0.2954]) tensor([0.3738, 0.0439, 0.4144, 0.1678]) -Greedy action tensor([0.3249, 1.3049, 0.9355, 0.0795]) tensor([0.1590, 0.4237, 0.2928, 0.1244]) -Greedy action tensor([0.3921, 0.2015, 0.5691, 0.3002]) tensor([0.2543, 0.2102, 0.3035, 0.2320]) -Greedy action tensor([ 0.3815, -1.6503, 0.7824, -0.0799]) tensor([0.3073, 0.0403, 0.4588, 0.1937]) -Greedy action tensor([-0.0180, 0.8484, -0.0043, -0.0817]) tensor([0.1876, 0.4462, 0.1902, 0.1760]) -Greedy action tensor([ 2.0123, -0.9362, 1.3285, 0.8112]) tensor([0.5382, 0.0282, 0.2716, 0.1619]) -Greedy action tensor([-0.0022, -0.3117, -1.4080, -0.3729]) tensor([0.3746, 0.2749, 0.0918, 0.2586]) -Greedy action tensor([ 0.5856, -0.5712, -0.5587, 1.0312]) tensor([0.3131, 0.0984, 0.0997, 0.4888]) -Greedy action tensor([ 1.4139, -0.1462, 0.6499, 1.0326]) tensor([0.4239, 0.0891, 0.1975, 0.2895]) -Greedy action tensor([ 0.9250, -0.0534, 0.7873, 0.7205]) tensor([0.3266, 0.1228, 0.2845, 0.2662]) -Greedy action tensor([ 0.6068, -0.0816, 0.3058, -0.3421]) tensor([0.3803, 0.1910, 0.2814, 0.1472]) -Greedy action tensor([0.2501, 1.3563, 0.6175, 0.2176]) tensor([0.1554, 0.4698, 0.2244, 0.1504]) -Greedy action tensor([ 0.4822, -0.5051, -1.2267, 0.4895]) tensor([0.3905, 0.1455, 0.0707, 0.3934]) -Greedy action tensor([-0.3988, 0.7408, 0.6857, -0.5798]) tensor([0.1263, 0.3947, 0.3736, 0.1054]) -Greedy action tensor([ 1.3921, -0.4771, 0.8777, 0.6784]) tensor([0.4461, 0.0688, 0.2667, 0.2185]) -Greedy action tensor([0.2747, 0.0998, 0.2789, 0.3257]) tensor([0.2567, 0.2155, 0.2578, 0.2701]) -Greedy action tensor([ 0.7372, -0.1787, 0.9042, 0.6968]) tensor([0.2823, 0.1130, 0.3336, 0.2711]) -Greedy action tensor([ 0.2484, 0.9276, 0.3524, -0.3055]) tensor([0.2147, 0.4235, 0.2383, 0.1234]) -Greedy action tensor([ 0.9743, -2.2459, -0.1343, 0.7022]) tensor([0.4691, 0.0187, 0.1548, 0.3574]) -Greedy action tensor([ 0.7472, -1.6785, 0.2147, 0.1645]) tensor([0.4477, 0.0396, 0.2628, 0.2499]) -Greedy action tensor([ 0.4908, 0.3363, -1.4673, 0.2752]) tensor([0.3566, 0.3056, 0.0503, 0.2875]) -Greedy action tensor([ 0.0182, -2.5231, 0.0415, 0.8838]) tensor([0.2233, 0.0176, 0.2285, 0.5306]) -Greedy action tensor([ 1.4939, -0.4135, 0.6611, 1.1056]) tensor([0.4422, 0.0656, 0.1923, 0.2999]) -Greedy action tensor([ 1.1575, -0.2356, 0.6427, 0.2116]) tensor([0.4476, 0.1111, 0.2675, 0.1738]) -Greedy action tensor([1.0600, 0.1137, 0.4118, 0.1478]) tensor([0.4324, 0.1678, 0.2261, 0.1737]) -Greedy action tensor([ 0.1170, -1.7255, -0.3722, 0.8922]) tensor([0.2536, 0.0402, 0.1555, 0.5507]) -Greedy action tensor([-0.0885, -0.2013, -0.7244, 0.6270]) tensor([0.2238, 0.1999, 0.1185, 0.4577]) -Greedy action tensor([ 0.3175, -0.3112, 0.0374, -0.6518]) tensor([0.3748, 0.1999, 0.2832, 0.1422]) -Greedy action tensor([ 1.3673, -0.7652, 1.9627, -0.1516]) tensor([0.3173, 0.0376, 0.5756, 0.0695]) -Greedy action tensor([ 0.0751, -0.6281, 0.0577, 0.8140]) tensor([0.2187, 0.1083, 0.2150, 0.4580]) -Greedy action tensor([ 0.6656, -0.3343, 2.2962, 1.0512]) tensor([0.1259, 0.0463, 0.6428, 0.1851]) -Greedy action tensor([ 0.1348, -0.5222, -0.4693, 0.0961]) tensor([0.3304, 0.1713, 0.1806, 0.3178]) -Greedy action tensor([1.1777, 0.4242, 0.5729, 0.3123]) tensor([0.4102, 0.1931, 0.2240, 0.1726]) -Greedy action tensor([ 1.4196, -0.8974, 0.2909, 1.8005]) tensor([0.3466, 0.0342, 0.1121, 0.5072]) -Greedy action tensor([1.2215, 0.3012, 0.5329, 1.0314]) tensor([0.3666, 0.1461, 0.1842, 0.3031]) -Greedy action tensor([ 0.8095, 0.1870, -0.2622, 1.1623]) tensor([0.3028, 0.1625, 0.1037, 0.4310]) -Greedy action tensor([ 0.1185, 0.7958, -0.8987, 0.6764]) tensor([0.1969, 0.3877, 0.0712, 0.3441]) -Greedy action tensor([ 0.0089, 0.3061, 1.0633, -0.1368]) tensor([0.1645, 0.2214, 0.4720, 0.1422]) -Greedy action tensor([ 0.9595, -0.1694, -0.8739, -0.1140]) tensor([0.5479, 0.1772, 0.0876, 0.1873]) -Greedy action tensor([1.2830, 0.1636, 0.6570, 0.2903]) tensor([0.4481, 0.1463, 0.2396, 0.1660]) -Greedy action tensor([0.7791, 0.0442, 1.4912, 0.8773]) tensor([0.2164, 0.1038, 0.4411, 0.2387]) -Greedy action tensor([-1.1892, -0.8366, 0.5296, 0.4877]) tensor([0.0749, 0.1066, 0.4178, 0.4007]) -Greedy action tensor([ 0.6095, -1.0820, 0.9269, 1.1138]) tensor([0.2373, 0.0437, 0.3260, 0.3930]) -Greedy action tensor([0.2954, 1.4487, 0.5064, 0.3685]) tensor([0.1543, 0.4890, 0.1906, 0.1660]) -Greedy action tensor([ 1.2684, -0.4772, 1.2455, 0.6899]) tensor([0.3686, 0.0643, 0.3603, 0.2067]) -Greedy action tensor([ 1.4540, -0.2834, 0.6692, 1.6228]) tensor([0.3551, 0.0625, 0.1620, 0.4204]) -Greedy action tensor([ 1.2046, -1.2047, 0.6513, 0.2477]) tensor([0.4880, 0.0439, 0.2806, 0.1874]) -Greedy action tensor([ 1.4126, 0.9282, -0.1300, 1.2966]) tensor([0.3676, 0.2265, 0.0786, 0.3273]) -Greedy action tensor([ 0.5679, -2.5496, 0.6270, 0.8065]) tensor([0.2963, 0.0131, 0.3144, 0.3762]) -Greedy action tensor([ 0.7912, -0.1976, 0.1490, -0.2556]) tensor([0.4446, 0.1654, 0.2339, 0.1561]) -Greedy action tensor([ 0.7857, -0.6304, -0.4769, 0.0230]) tensor([0.5020, 0.1218, 0.1420, 0.2341]) -Greedy action tensor([ 0.6224, -0.5693, -0.1214, 0.0644]) tensor([0.4253, 0.1292, 0.2021, 0.2434]) -Greedy action tensor([ 1.4296, -0.5566, -0.4274, 0.4504]) tensor([0.5992, 0.0822, 0.0936, 0.2251]) -Greedy action tensor([ 1.6996, -0.8217, -0.2419, -0.1295]) tensor([0.7223, 0.0580, 0.1037, 0.1160]) -Greedy action tensor([ 0.5268, -0.1763, -0.1825, -0.0368]) tensor([0.3912, 0.1937, 0.1925, 0.2227]) -Greedy action tensor([ 1.0480, -0.6609, -0.1518, -0.2386]) tensor([0.5687, 0.1030, 0.1713, 0.1571]) -Greedy action tensor([ 1.0525, -0.7595, -0.5363, 1.0039]) tensor([0.4310, 0.0704, 0.0880, 0.4106]) -Greedy action tensor([ 0.3211, -0.1185, -0.0012, -0.0310]) tensor([0.3255, 0.2097, 0.2358, 0.2289]) -Greedy action tensor([ 1.3299, -0.4160, -0.1381, 0.0840]) tensor([0.5908, 0.1031, 0.1361, 0.1700]) -Greedy action tensor([ 0.8467, 0.0162, -0.2238, 0.2250]) tensor([0.4318, 0.1882, 0.1480, 0.2319]) -Greedy action tensor([ 0.8262, -0.4076, -0.1442, -0.0173]) tensor([0.4761, 0.1386, 0.1804, 0.2048]) -Greedy action tensor([ 0.9063, 0.0311, -0.0758, 0.1871]) tensor([0.4389, 0.1829, 0.1644, 0.2138]) -Greedy action tensor([ 0.8562, -0.1136, -0.0315, 0.1383]) tensor([0.4389, 0.1664, 0.1806, 0.2141]) -Greedy action tensor([ 0.6882, -0.1747, -0.1776, -0.0065]) tensor([0.4270, 0.1802, 0.1797, 0.2132]) -Greedy action tensor([ 1.0583, -0.4753, 0.0079, -0.2339]) tensor([0.5434, 0.1172, 0.1901, 0.1493]) -Greedy action tensor([ 1.1181, -0.4283, -0.0280, 0.1942]) tensor([0.5187, 0.1105, 0.1649, 0.2059]) -Greedy action tensor([ 0.8353, -0.3389, -0.1922, 0.1202]) tensor([0.4638, 0.1433, 0.1660, 0.2269]) -Greedy action tensor([ 1.2192, 0.0137, 0.0335, -0.0182]) tensor([0.5276, 0.1581, 0.1612, 0.1531]) -Greedy action tensor([ 0.9678, -0.2326, 0.0354, -0.0606]) tensor([0.4873, 0.1467, 0.1918, 0.1742]) -Greedy action tensor([ 1.4638, -0.1814, -0.6178, 0.1811]) tensor([0.6270, 0.1210, 0.0782, 0.1738]) -Greedy action tensor([ 1.2935, -0.4810, -0.4405, 0.3123]) tensor([0.5810, 0.0985, 0.1026, 0.2178]) -Greedy action tensor([ 0.4497, -0.1379, -0.1539, -0.1961]) tensor([0.3807, 0.2115, 0.2082, 0.1996]) -Greedy action tensor([ 0.7509, -0.6403, -0.3218, 0.2103]) tensor([0.4601, 0.1145, 0.1574, 0.2680]) -Greedy action tensor([ 0.6815, -0.0567, 0.1320, -0.2694]) tensor([0.4096, 0.1958, 0.2364, 0.1582]) -Greedy action tensor([ 0.9437, -0.1894, -0.0362, -0.0945]) tensor([0.4875, 0.1570, 0.1830, 0.1726]) -Greedy action tensor([ 0.9470, -0.5044, -0.1587, 0.1846]) tensor([0.4922, 0.1153, 0.1629, 0.2296]) -Greedy action tensor([ 0.6126, -0.0537, 0.0230, -0.3008]) tensor([0.4050, 0.2080, 0.2246, 0.1625]) -Greedy action tensor([ 0.8966, -0.1389, -0.1300, 0.0044]) tensor([0.4710, 0.1672, 0.1687, 0.1930]) -Greedy action tensor([ 0.8192, -0.3591, -0.2028, 0.3196]) tensor([0.4397, 0.1353, 0.1582, 0.2668]) -Greedy action tensor([ 1.1859, -0.5134, -0.4100, 0.7517]) tensor([0.4918, 0.0899, 0.0997, 0.3186]) -Greedy action tensor([ 1.3887, -0.4485, -0.5280, -0.1052]) tensor([0.6532, 0.1040, 0.0961, 0.1467]) -Greedy action tensor([ 0.9757, -0.2836, -0.4529, -0.0381]) tensor([0.5301, 0.1505, 0.1270, 0.1924]) -Greedy action tensor([ 1.3021, -0.4424, -0.1897, 0.1241]) tensor([0.5856, 0.1023, 0.1317, 0.1803]) -Greedy action tensor([ 1.7377, -0.0264, -0.1611, 0.1427]) tensor([0.6562, 0.1124, 0.0983, 0.1331]) -Greedy action tensor([ 0.3978, -0.2153, -0.2439, 0.0284]) tensor([0.3624, 0.1963, 0.1908, 0.2505]) -Greedy action tensor([ 1.0504, -0.0619, -0.0461, -0.2380]) tensor([0.5159, 0.1696, 0.1723, 0.1422]) -Greedy action tensor([ 0.9490, -0.5512, -0.2157, 0.4322]) tensor([0.4692, 0.1047, 0.1464, 0.2798]) -Greedy action tensor([ 1.0202, -0.1546, -0.2327, -0.0481]) tensor([0.5160, 0.1594, 0.1474, 0.1773]) -Greedy action tensor([ 0.4615, -0.3584, -0.1583, 0.0640]) tensor([0.3773, 0.1662, 0.2030, 0.2535]) -Greedy action tensor([ 0.5441, -0.2291, -0.1443, -0.1975]) tensor([0.4098, 0.1891, 0.2059, 0.1952]) -Greedy action tensor([ 0.8307, -0.2108, -0.0938, 0.0280]) tensor([0.4550, 0.1606, 0.1805, 0.2039]) -Greedy action tensor([ 1.0327, -0.2519, -0.2362, 0.3008]) tensor([0.4905, 0.1357, 0.1379, 0.2359]) -Greedy action tensor([ 1.0383, -0.6613, -0.3751, 0.2837]) tensor([0.5273, 0.0964, 0.1283, 0.2480]) -Greedy action tensor([ 1.2869, -0.5091, -0.4161, 0.3135]) tensor([0.5794, 0.0962, 0.1055, 0.2189]) -Greedy action tensor([ 1.1297, -0.4263, -0.3063, 0.5874]) tensor([0.4925, 0.1039, 0.1172, 0.2864]) -Greedy action tensor([ 1.1053, -0.6127, -0.2080, -0.1862]) tensor([0.5803, 0.1041, 0.1561, 0.1595]) -Greedy action tensor([ 0.8489, -0.5441, -0.2646, 0.5240]) tensor([0.4349, 0.1080, 0.1428, 0.3143]) -Greedy action tensor([ 1.2380, -0.3958, -0.0632, 0.0343]) tensor([0.5658, 0.1104, 0.1540, 0.1698]) -Greedy action tensor([ 1.0780, -0.7550, -0.3478, 0.7167]) tensor([0.4769, 0.0763, 0.1146, 0.3323]) -Greedy action tensor([ 0.8933, -0.3330, -0.4601, 0.4985]) tensor([0.4493, 0.1318, 0.1161, 0.3028]) -Greedy action tensor([ 1.2689, -0.2198, -0.1787, 0.0700]) tensor([0.5674, 0.1280, 0.1334, 0.1711]) -Greedy action tensor([ 0.8960, -0.4312, -0.1058, -0.0385]) tensor([0.4938, 0.1310, 0.1813, 0.1940]) -Greedy action tensor([ 0.5960, -0.1337, -0.6644, 0.1831]) tensor([0.4120, 0.1986, 0.1168, 0.2726]) -Greedy action tensor([ 1.6019, -0.3772, -0.3913, 0.0782]) tensor([0.6701, 0.0926, 0.0913, 0.1460]) -Greedy action tensor([ 1.1154, -0.3816, 0.0039, -0.1568]) tensor([0.5455, 0.1221, 0.1795, 0.1529]) -Greedy action tensor([ 0.7014, -0.1579, -0.1929, -0.0700]) tensor([0.4358, 0.1845, 0.1782, 0.2015]) -Greedy action tensor([ 1.3918, -0.6425, -0.3070, 0.5857]) tensor([0.5681, 0.0743, 0.1039, 0.2537]) -Greedy action tensor([ 1.0370, -0.2273, -0.0278, -0.0668]) tensor([0.5105, 0.1442, 0.1760, 0.1693]) -Greedy action tensor([ 1.5783, -0.2596, -0.2074, 0.1792]) tensor([0.6355, 0.1011, 0.1066, 0.1568]) -Greedy action tensor([ 0.6042, -0.5025, -0.2716, 0.1702]) tensor([0.4175, 0.1381, 0.1739, 0.2705]) -Greedy action tensor([ 1.6250, -0.8174, -0.3360, 0.4196]) tensor([0.6548, 0.0569, 0.0921, 0.1962]) -Greedy action tensor([ 1.3582, -0.6901, -0.1739, 0.0867]) tensor([0.6152, 0.0793, 0.1329, 0.1725]) -Greedy action tensor([ 0.6518, -0.2181, -0.2159, 0.2100]) tensor([0.4029, 0.1688, 0.1692, 0.2590]) -Greedy action tensor([ 1.5474, -0.9188, -0.0504, -0.0269]) tensor([0.6692, 0.0568, 0.1354, 0.1386]) -Greedy action tensor([ 0.9139, 0.0884, 0.0482, -0.3195]) tensor([0.4651, 0.2037, 0.1957, 0.1355]) -Greedy action tensor([ 1.1825, -0.6911, -0.2122, 0.1020]) tensor([0.5744, 0.0882, 0.1424, 0.1950]) -Greedy action tensor([ 0.9979, -0.4363, -0.3656, -0.0505]) tensor([0.5422, 0.1292, 0.1386, 0.1900]) -Greedy action tensor([ 1.2376, -0.3235, -0.2309, 0.0061]) tensor([0.5773, 0.1212, 0.1330, 0.1685]) -Greedy action tensor([ 0.6838, -0.4354, -0.2419, 0.2574]) tensor([0.4210, 0.1374, 0.1668, 0.2748]) -Greedy action tensor([ 1.2176, 0.0262, -0.1474, 0.2324]) tensor([0.5174, 0.1572, 0.1322, 0.1932]) -Greedy action tensor([ 0.9692, -0.6363, -0.5495, 0.5312]) tensor([0.4843, 0.0972, 0.1060, 0.3125]) -Greedy action tensor([ 0.7127, -0.2779, -0.2399, 0.3338]) tensor([0.4096, 0.1521, 0.1580, 0.2804]) -Greedy action tensor([ 0.7269, -0.0515, 0.1971, -0.1778]) tensor([0.4077, 0.1872, 0.2401, 0.1650]) -Greedy action tensor([ 1.0600, -0.4879, -0.1272, 0.1706]) tensor([0.5185, 0.1103, 0.1582, 0.2130]) -Greedy action tensor([ 1.1834, -0.4705, -0.3541, 0.2397]) tensor([0.5570, 0.1066, 0.1197, 0.2168]) -Greedy action tensor([ 1.4962, -0.8838, -0.3114, 0.5413]) tensor([0.6092, 0.0564, 0.0999, 0.2345]) -Greedy action tensor([ 0.7613, -0.1294, 0.0691, -0.2346]) tensor([0.4386, 0.1800, 0.2195, 0.1620]) -Greedy action tensor([ 1.3763, -0.1841, -0.2851, -0.0234]) tensor([0.6073, 0.1276, 0.1153, 0.1498]) -Greedy action tensor([ 1.0263, -0.3917, -0.4929, 0.5701]) tensor([0.4774, 0.1156, 0.1045, 0.3025]) -Greedy action tensor([ 0.9126, -0.3303, -0.2704, 0.3450]) tensor([0.4626, 0.1335, 0.1417, 0.2622]) -Greedy action tensor([ 0.3021, 0.0127, 0.0298, -0.1717]) tensor([0.3192, 0.2390, 0.2431, 0.1987]) -Greedy action tensor([ 0.6040, -0.1189, 0.2207, -0.2193]) tensor([0.3837, 0.1862, 0.2616, 0.1685]) -Greedy action tensor([ 0.6135, -0.2435, 0.1300, -0.6003]) tensor([0.4277, 0.1815, 0.2637, 0.1271]) -Greedy action tensor([ 0.7542, -0.4652, -0.0022, -0.4681]) tensor([0.4856, 0.1434, 0.2279, 0.1430]) -Greedy action tensor([ 0.8342, -0.4280, 0.1826, -0.4435]) tensor([0.4801, 0.1359, 0.2502, 0.1338]) -Greedy action tensor([ 0.5389, -0.4359, 0.1636, -0.5810]) tensor([0.4183, 0.1578, 0.2874, 0.1365]) -Greedy action tensor([ 0.8570, -0.5081, 0.0014, -0.5032]) tensor([0.5163, 0.1318, 0.2194, 0.1325]) -Greedy action tensor([ 0.3857, -0.1230, -0.1495, -0.1359]) tensor([0.3597, 0.2163, 0.2106, 0.2135]) -Greedy action tensor([ 0.8132, -0.3357, 0.0828, -0.3595]) tensor([0.4743, 0.1504, 0.2285, 0.1468]) -Greedy action tensor([ 0.9118, -0.5188, 0.2011, -0.8883]) tensor([0.5275, 0.1262, 0.2592, 0.0872]) -Greedy action tensor([ 0.2929, -0.0789, 0.0388, -0.1408]) tensor([0.3212, 0.2215, 0.2491, 0.2082]) -Greedy action tensor([ 0.7763, -0.5011, 0.0591, -0.3201]) tensor([0.4760, 0.1327, 0.2323, 0.1590]) -Greedy action tensor([ 0.5517, -0.3339, 0.0973, -0.3370]) tensor([0.4068, 0.1678, 0.2582, 0.1673]) -Greedy action tensor([ 0.1445, 0.1862, 0.0004, -0.1344]) tensor([0.2729, 0.2845, 0.2362, 0.2064]) -Greedy action tensor([ 0.5127, 0.2110, 0.1423, -0.2814]) tensor([0.3470, 0.2566, 0.2396, 0.1568]) -Greedy action tensor([ 0.5696, -0.0036, -0.0574, -0.2789]) tensor([0.3959, 0.2232, 0.2115, 0.1695]) -Greedy action tensor([ 0.8177, -0.4659, 0.0455, -0.7011]) tensor([0.5107, 0.1415, 0.2360, 0.1118]) -Greedy action tensor([ 0.5851, -0.2481, -0.0344, -0.3488]) tensor([0.4227, 0.1837, 0.2275, 0.1661]) -Greedy action tensor([ 0.5333, -0.4297, -0.2216, -0.5327]) tensor([0.4553, 0.1738, 0.2140, 0.1568]) -Greedy action tensor([ 0.6119, -0.2013, -0.0657, -0.3367]) tensor([0.4276, 0.1896, 0.2172, 0.1656]) -Greedy action tensor([ 1.2032, -1.0333, -0.0684, -0.5065]) tensor([0.6377, 0.0681, 0.1788, 0.1154]) -Greedy action tensor([ 0.4083, -0.1909, -0.0356, -0.3613]) tensor([0.3768, 0.2070, 0.2417, 0.1745]) -Greedy action tensor([ 1.1894, -0.7319, 0.0779, -0.5486]) tensor([0.6056, 0.0887, 0.1993, 0.1065]) -Greedy action tensor([ 0.7706, -0.6440, -0.0106, -0.7131]) tensor([0.5187, 0.1261, 0.2375, 0.1177]) -Greedy action tensor([ 0.6143, -0.4131, -0.0207, -0.4438]) tensor([0.4474, 0.1602, 0.2371, 0.1553]) -Greedy action tensor([ 2.8351e-01, -1.2220e-04, 1.7467e-02, -4.0964e-01]) tensor([0.3312, 0.2494, 0.2538, 0.1656]) -Greedy action tensor([ 0.4474, -0.0420, 0.0760, -0.2773]) tensor([0.3588, 0.2199, 0.2475, 0.1738]) -Greedy action tensor([ 1.0007, -0.8505, -0.0164, -0.7732]) tensor([0.5923, 0.0930, 0.2142, 0.1005]) -Greedy action tensor([ 1.0843, -0.8817, -0.0036, -0.4778]) tensor([0.5929, 0.0830, 0.1998, 0.1243]) -Greedy action tensor([ 0.7234, -0.3385, -0.0989, -0.2733]) tensor([0.4642, 0.1605, 0.2040, 0.1713]) -Greedy action tensor([ 0.6250, -0.2026, -0.0831, -0.2380]) tensor([0.4253, 0.1859, 0.2095, 0.1794]) -Greedy action tensor([ 0.5413, -0.0758, 0.0747, -0.2953]) tensor([0.3846, 0.2075, 0.2412, 0.1666]) -Greedy action tensor([ 0.3456, -0.0044, 0.1516, -0.2446]) tensor([0.3244, 0.2286, 0.2672, 0.1798]) -Greedy action tensor([ 0.7463, -0.3915, 0.0165, -0.7778]) tensor([0.4950, 0.1587, 0.2386, 0.1078]) -Greedy action tensor([ 0.4918, -0.0508, 0.0273, -0.2611]) tensor([0.3731, 0.2168, 0.2344, 0.1757]) -Greedy action tensor([ 0.8227, -0.2799, -0.1415, -0.5899]) tensor([0.5110, 0.1697, 0.1949, 0.1244]) -Greedy action tensor([-0.0993, 0.0129, -0.1347, -0.3520]) tensor([0.2590, 0.2898, 0.2500, 0.2012]) -Greedy action tensor([ 0.2760, 0.0264, -0.0441, -0.2089]) tensor([0.3204, 0.2497, 0.2326, 0.1973]) -Greedy action tensor([ 0.4120, 0.1020, 0.0498, -0.2543]) tensor([0.3398, 0.2492, 0.2365, 0.1745]) -Greedy action tensor([ 0.6251, 0.0966, -0.1814, -0.0220]) tensor([0.3907, 0.2303, 0.1744, 0.2045]) -Greedy action tensor([ 0.5100, -0.2609, -0.0686, -0.3480]) tensor([0.4086, 0.1890, 0.2291, 0.1733]) -Greedy action tensor([ 0.4232, -0.0246, -0.0067, -0.2092]) tensor([0.3545, 0.2265, 0.2306, 0.1884]) -Greedy action tensor([ 0.4511, -0.1276, 0.0130, -0.2102]) tensor([0.3674, 0.2060, 0.2370, 0.1896]) -Greedy action tensor([ 0.6388, -0.5169, -0.0040, -0.4201]) tensor([0.4572, 0.1439, 0.2404, 0.1586]) -Greedy action tensor([ 0.2987, 0.0758, -0.0358, -0.1483]) tensor([0.3169, 0.2536, 0.2268, 0.2027]) -Greedy action tensor([ 0.2000, 0.2013, 0.1245, -0.2634]) tensor([0.2811, 0.2815, 0.2606, 0.1768]) -Greedy action tensor([ 0.2737, -0.0441, 0.0962, -0.4223]) tensor([0.3264, 0.2375, 0.2733, 0.1627]) -Greedy action tensor([ 1.1331, -1.0255, 0.0057, -0.5035]) tensor([0.6120, 0.0707, 0.1982, 0.1191]) -Greedy action tensor([ 0.4214, 0.1085, -0.0907, 0.1116]) tensor([0.3263, 0.2387, 0.1956, 0.2394]) -Greedy action tensor([ 0.5954, -0.0916, -0.0193, -0.4460]) tensor([0.4172, 0.2099, 0.2256, 0.1473]) -Greedy action tensor([ 0.5790, -0.1233, -0.0448, -0.1282]) tensor([0.3961, 0.1963, 0.2123, 0.1953]) -Greedy action tensor([ 0.4335, -0.1572, 0.0469, -0.2121]) tensor([0.3626, 0.2009, 0.2464, 0.1902]) -Greedy action tensor([ 0.8217, -0.3021, 0.0663, -0.4026]) tensor([0.4788, 0.1556, 0.2249, 0.1407]) -Greedy action tensor([ 0.4242, -0.0862, -0.0242, -0.1739]) tensor([0.3586, 0.2152, 0.2290, 0.1972]) -Greedy action tensor([ 0.5810, 0.0202, -0.1115, -0.1466]) tensor([0.3915, 0.2235, 0.1959, 0.1891]) -Greedy action tensor([ 0.2533, -0.2028, 0.0601, -0.3608]) tensor([0.3334, 0.2113, 0.2749, 0.1804]) -Greedy action tensor([ 0.4409, -0.1903, -0.0684, -0.2061]) tensor([0.3764, 0.2002, 0.2262, 0.1971]) -Greedy action tensor([ 0.6794, -0.4373, -0.0745, -0.3486]) tensor([0.4639, 0.1519, 0.2183, 0.1659]) -Greedy action tensor([ 0.8401, -0.4126, 0.0389, -0.4238]) tensor([0.4958, 0.1417, 0.2225, 0.1401]) -Greedy action tensor([ 0.3898, -0.0677, 0.0515, -0.2600]) tensor([0.3487, 0.2207, 0.2486, 0.1821]) -Greedy action tensor([ 0.6605, -0.3980, 0.1056, -0.5101]) tensor([0.4482, 0.1555, 0.2573, 0.1390]) -Greedy action tensor([ 0.6406, -0.2433, 0.0790, -0.3593]) tensor([0.4253, 0.1757, 0.2425, 0.1565]) -Greedy action tensor([ 0.3864, -0.2615, -0.1529, -0.4763]) tensor([0.3955, 0.2069, 0.2307, 0.1669]) -Greedy action tensor([ 0.3663, 0.1838, 0.1356, -0.1511]) tensor([0.3102, 0.2585, 0.2463, 0.1849]) -Greedy action tensor([ 0.3098, 0.0925, -0.0610, -0.1970]) tensor([0.3229, 0.2598, 0.2228, 0.1945]) -Greedy action tensor([ 0.7683, -0.2732, 0.1356, -0.5527]) tensor([0.4649, 0.1641, 0.2469, 0.1241]) -Greedy action tensor([ 0.5781, 0.1555, -0.2470, -0.0909]) tensor([0.3838, 0.2515, 0.1682, 0.1966]) -Greedy action tensor([ 0.4974, -0.3631, -0.1280, -0.5389]) tensor([0.4324, 0.1829, 0.2313, 0.1534]) -Greedy action tensor([ 0.3321, -0.0753, -0.1557, -0.2379]) tensor([0.3515, 0.2339, 0.2158, 0.1988]) -Greedy action tensor([ 0.2787, 0.0561, -0.0100, -0.1583]) tensor([0.3129, 0.2505, 0.2345, 0.2021]) -Greedy action tensor([ 0.5693, -0.2116, 0.0214, -0.3774]) tensor([0.4125, 0.1889, 0.2385, 0.1601]) -Greedy action tensor([ 0.6845, -0.2437, -0.0165, -0.2488]) tensor([0.4377, 0.1730, 0.2171, 0.1721]) -Greedy action tensor([ 0.8453, -0.3216, 0.0068, -0.3478]) tensor([0.4885, 0.1521, 0.2112, 0.1482]) -Greedy action tensor([ 0.6343, -0.4619, -0.0165, -0.4189]) tensor([0.4536, 0.1516, 0.2366, 0.1582]) -Greedy action tensor([ 0.6671, -0.6817, -0.1557, -0.5149]) tensor([0.4986, 0.1294, 0.2190, 0.1529]) -Greedy action tensor([ 0.8587, -0.5831, -0.0983, -0.3271]) tensor([0.5192, 0.1228, 0.1994, 0.1586]) -Greedy action tensor([ 0.5438, -0.3066, -0.0473, -0.3666]) tensor([0.4196, 0.1793, 0.2323, 0.1688]) -Greedy action tensor([ 0.4239, -0.0674, -0.0811, -0.3448]) tensor([0.3733, 0.2284, 0.2253, 0.1731]) -Greedy action tensor([ 0.5844, -0.4656, -0.0022, -0.3514]) tensor([0.4351, 0.1522, 0.2420, 0.1707]) -Greedy action tensor([ 0.3608, -0.0241, -0.0393, -0.2407]) tensor([0.3450, 0.2348, 0.2312, 0.1890]) -Greedy action tensor([ 0.9044, -0.6064, -0.1192, -0.5922]) tensor([0.5544, 0.1224, 0.1992, 0.1241]) -Greedy action tensor([-1.0561, -0.5284, 0.9288, 1.3608]) tensor([0.0472, 0.0800, 0.3436, 0.5292]) -Greedy action tensor([-1.2529, -0.4292, 0.7046, -0.6092]) tensor([0.0815, 0.1858, 0.5774, 0.1552]) -Greedy action tensor([-1.4127, -0.2681, 0.3044, 0.1582]) tensor([0.0689, 0.2163, 0.3835, 0.3313]) -Greedy action tensor([-0.5264, -0.5009, 0.2013, 0.1585]) tensor([0.1645, 0.1687, 0.3405, 0.3263]) -Greedy action tensor([-1.9777, -0.5236, 0.9016, 0.4208]) tensor([0.0293, 0.1256, 0.5222, 0.3229]) -Greedy action tensor([-1.2593, -0.5334, 0.8479, 1.1992]) tensor([0.0435, 0.0899, 0.3579, 0.5086]) -Greedy action tensor([-1.6121, -0.6367, 0.7049, 0.0377]) tensor([0.0526, 0.1396, 0.5339, 0.2739]) -Greedy action tensor([-1.6544, -0.6742, -0.0836, -0.4250]) tensor([0.0841, 0.2240, 0.4044, 0.2875]) -Greedy action tensor([-1.7263, -0.4417, 0.5583, -0.0756]) tensor([0.0509, 0.1839, 0.5000, 0.2652]) -Greedy action tensor([-1.8715, -0.6615, 0.7379, 0.0562]) tensor([0.0403, 0.1351, 0.5476, 0.2770]) -Greedy action tensor([-1.4158, -0.6846, 0.6851, 0.7531]) tensor([0.0500, 0.1039, 0.4087, 0.4374]) -Greedy action tensor([-1.5541, 0.0595, 0.4907, -0.2027]) tensor([0.0568, 0.2851, 0.4388, 0.2193]) -Greedy action tensor([-0.9833, -0.4635, 0.2984, 0.8763]) tensor([0.0787, 0.1324, 0.2836, 0.5054]) -Greedy action tensor([-0.5094, -0.4795, 0.2208, 0.1442]) tensor([0.1659, 0.1709, 0.3443, 0.3189]) -Greedy action tensor([-0.9928, -0.5407, 0.2051, 0.3949]) tensor([0.1011, 0.1589, 0.3350, 0.4050]) -Greedy action tensor([-1.8314e+00, -8.1114e-01, 9.0036e-04, -3.7266e-01]) tensor([0.0698, 0.1937, 0.4362, 0.3003]) -Greedy action tensor([-1.2556, 0.5253, 0.2554, 0.0218]) tensor([0.0664, 0.3943, 0.3010, 0.2383]) -Greedy action tensor([-1.7367, -0.4730, 0.5551, -0.0582]) tensor([0.0505, 0.1788, 0.4999, 0.2707]) -Greedy action tensor([-1.6531, -0.4870, 0.7072, 0.2586]) tensor([0.0464, 0.1488, 0.4912, 0.3136]) -Greedy action tensor([-1.2002, -0.4586, 0.5550, 0.7651]) tensor([0.0624, 0.1310, 0.3611, 0.4455]) -Greedy action tensor([-1.1075, -0.4669, 0.6331, 1.1212]) tensor([0.0559, 0.1061, 0.3187, 0.5193]) -Greedy action tensor([-1.9098, -0.4280, 0.6483, -0.1590]) tensor([0.0415, 0.1828, 0.5364, 0.2393]) -Greedy action tensor([-1.4830, -0.5531, 0.4097, 0.1175]) tensor([0.0661, 0.1675, 0.4388, 0.3276]) -Greedy action tensor([-1.7508, -0.4062, 0.5533, -0.0542]) tensor([0.0492, 0.1889, 0.4932, 0.2686]) -Greedy action tensor([-0.7166, -0.4931, 0.1979, 0.1617]) tensor([0.1398, 0.1748, 0.3489, 0.3365]) -Greedy action tensor([-1.7952, -0.4820, 0.5966, -0.0731]) tensor([0.0471, 0.1750, 0.5146, 0.2634]) -Greedy action tensor([-1.2144, -0.4288, 1.0166, 1.1748]) tensor([0.0427, 0.0937, 0.3977, 0.4659]) -Greedy action tensor([-1.3512, -0.5004, 0.4619, 0.5941]) tensor([0.0607, 0.1422, 0.3722, 0.4248]) -Greedy action tensor([-1.8305, -0.4736, 0.6856, -0.0115]) tensor([0.0427, 0.1658, 0.5284, 0.2632]) -Greedy action tensor([-1.9273, -0.4159, 0.6702, -0.1517]) tensor([0.0402, 0.1823, 0.5401, 0.2374]) -Greedy action tensor([-1.8246, -0.1729, 0.5789, -0.0615]) tensor([0.0433, 0.2257, 0.4787, 0.2523]) -Greedy action tensor([-1.0160, -0.5662, 0.5498, -0.2040]) tensor([0.1041, 0.1632, 0.4982, 0.2344]) -Greedy action tensor([-1.2998, -0.5778, 0.3343, 0.1478]) tensor([0.0804, 0.1655, 0.4121, 0.3420]) -Greedy action tensor([-1.8025, -0.4457, 0.5912, -0.0965]) tensor([0.0468, 0.1820, 0.5132, 0.2580]) -Greedy action tensor([-1.8401, -0.4300, 0.6066, -0.1125]) tensor([0.0449, 0.1839, 0.5186, 0.2526]) -Greedy action tensor([-1.8577, -0.9783, 0.3350, -0.2952]) tensor([0.0583, 0.1406, 0.5227, 0.2783]) -Greedy action tensor([-1.5134, -0.3731, 0.7767, 0.7345]) tensor([0.0426, 0.1333, 0.4208, 0.4034]) -Greedy action tensor([-1.6999, -0.4859, 0.5251, 0.0135]) tensor([0.0522, 0.1757, 0.4827, 0.2894]) -Greedy action tensor([-1.6223, -0.4902, 0.5459, -0.1355]) tensor([0.0579, 0.1797, 0.5063, 0.2561]) -Greedy action tensor([-1.4755, -0.7267, -0.0545, -0.4464]) tensor([0.0995, 0.2103, 0.4119, 0.2783]) -Greedy action tensor([-1.4958, -0.4768, 0.3969, 0.0185]) tensor([0.0669, 0.1853, 0.4438, 0.3040]) -Greedy action tensor([-1.7094, -0.4376, 0.5433, -0.0878]) tensor([0.0522, 0.1864, 0.4970, 0.2644]) -Greedy action tensor([-0.9335, 0.1763, -0.3649, -0.1947]) tensor([0.1267, 0.3844, 0.2237, 0.2652]) -Greedy action tensor([-1.3378, -0.3094, 0.6520, 0.6990]) tensor([0.0533, 0.1489, 0.3895, 0.4083]) -Greedy action tensor([-2.0308, -0.8740, 0.6476, 0.0990]) tensor([0.0368, 0.1171, 0.5363, 0.3098]) -Greedy action tensor([-1.7300, -0.5537, 0.8242, 0.3895]) tensor([0.0393, 0.1275, 0.5058, 0.3274]) -Greedy action tensor([-0.9151, -0.5683, 0.1667, 0.4514]) tensor([0.1077, 0.1523, 0.3177, 0.4223]) -Greedy action tensor([-1.7486, -0.5489, 0.7447, 0.1115]) tensor([0.0438, 0.1453, 0.5297, 0.2812]) -Greedy action tensor([-1.6099, -0.5039, 0.9841, 0.7694]) tensor([0.0355, 0.1072, 0.4745, 0.3829]) -Greedy action tensor([-0.9393, -0.3549, 1.2107, 1.4339]) tensor([0.0452, 0.0811, 0.3883, 0.4854]) -Greedy action tensor([-1.7039, -0.6987, 0.2361, -0.3868]) tensor([0.0693, 0.1894, 0.4824, 0.2588]) -Greedy action tensor([-1.4776, -0.9304, 0.6067, -0.1339]) tensor([0.0685, 0.1184, 0.5506, 0.2625]) -Greedy action tensor([-2.0413, -0.8936, 0.5577, -0.0981]) tensor([0.0407, 0.1282, 0.5471, 0.2840]) -Greedy action tensor([-1.3332, -0.6077, 0.4878, 0.1900]) tensor([0.0723, 0.1494, 0.4467, 0.3317]) -Greedy action tensor([-1.5307, -0.5706, 0.4865, 0.0594]) tensor([0.0624, 0.1629, 0.4689, 0.3059]) -Greedy action tensor([-1.9018, -0.7744, 0.3592, -0.2996]) tensor([0.0536, 0.1656, 0.5145, 0.2662]) -Greedy action tensor([-1.8718, -0.3259, 0.6312, -0.1049]) tensor([0.0421, 0.1975, 0.5142, 0.2463]) -Greedy action tensor([-0.9703, -0.3674, 0.4033, -0.1828]) tensor([0.1114, 0.2036, 0.4401, 0.2449]) -Greedy action tensor([-1.5353, -0.4123, 0.4370, 0.0783]) tensor([0.0614, 0.1888, 0.4414, 0.3084]) -Greedy action tensor([-1.6707, -0.4503, 0.5808, 0.0836]) tensor([0.0508, 0.1723, 0.4831, 0.2938]) -Greedy action tensor([-0.0323, -0.1517, 0.2187, 0.3294]) tensor([0.2170, 0.1926, 0.2789, 0.3115]) -Greedy action tensor([-1.8008e+00, -3.0950e-01, 6.1547e-01, -3.0655e-04]) tensor([0.0441, 0.1957, 0.4936, 0.2666]) -Greedy action tensor([-1.1488, -0.3387, 0.3006, -0.4085]) tensor([0.1041, 0.2340, 0.4436, 0.2183]) -Greedy action tensor([-1.7113, -0.4988, 0.5693, 0.0351]) tensor([0.0503, 0.1691, 0.4921, 0.2884]) -Greedy action tensor([-1.2962, -0.5493, 0.4799, 0.5459]) tensor([0.0652, 0.1377, 0.3854, 0.4117]) -Greedy action tensor([-1.6323, -0.5033, 0.4909, 0.0642]) tensor([0.0558, 0.1727, 0.4668, 0.3046]) -Greedy action tensor([-1.6472, -0.5668, 0.6151, 0.0812]) tensor([0.0521, 0.1536, 0.5007, 0.2936]) -Greedy action tensor([-0.9102, -0.4418, 0.5180, -0.4721]) tensor([0.1202, 0.1920, 0.5014, 0.1863]) -Greedy action tensor([-1.4177, 0.0081, 0.2665, -0.2794]) tensor([0.0732, 0.3044, 0.3941, 0.2283]) -Greedy action tensor([-1.7813, -0.6560, 0.2440, -0.2297]) tensor([0.0611, 0.1881, 0.4627, 0.2881]) -Greedy action tensor([-0.8673, -0.0784, 0.5128, 1.2686]) tensor([0.0639, 0.1407, 0.2542, 0.5412]) -Greedy action tensor([-1.2059, -0.5754, 0.2885, 0.2346]) tensor([0.0865, 0.1625, 0.3856, 0.3654]) -Greedy action tensor([-1.3710, -0.3461, 0.5814, 0.6724]) tensor([0.0539, 0.1502, 0.3798, 0.4160]) -Greedy action tensor([-1.9361, -0.4609, 0.7747, 0.0297]) tensor([0.0363, 0.1587, 0.5459, 0.2592]) -Greedy action tensor([-1.7551, -0.5206, 0.5276, -0.1419]) tensor([0.0519, 0.1784, 0.5090, 0.2606]) -Greedy action tensor([-1.5287, -0.5628, 0.4972, 0.1161]) tensor([0.0610, 0.1603, 0.4627, 0.3160]) -Greedy action tensor([-1.8732, -0.7722, 0.1735, -0.2798]) tensor([0.0600, 0.1804, 0.4644, 0.2952]) -Greedy action tensor([-1.5604, -0.4645, 0.4986, -0.1435]) tensor([0.0627, 0.1875, 0.4913, 0.2585]) -Greedy action tensor([-1.7945, -0.5022, 0.0795, -0.3550]) tensor([0.0650, 0.2368, 0.4237, 0.2744]) -Greedy action tensor([-0.9848, -0.4039, 0.5665, -0.5670]) tensor([0.1108, 0.1981, 0.5228, 0.1683]) -Greedy action tensor([-1.5169, -0.5358, 0.5475, 0.3157]) tensor([0.0562, 0.1499, 0.4428, 0.3512]) -Greedy action tensor([1.2886, 0.5730, 1.1851, 1.5322]) tensor([0.2727, 0.1334, 0.2459, 0.3480]) -Greedy action tensor([ 0.4908, -0.1578, 0.3718, 0.4456]) tensor([0.2971, 0.1553, 0.2637, 0.2839]) -Greedy action tensor([1.1956, 1.0402, 0.4704, 0.5698]) tensor([0.3478, 0.2977, 0.1684, 0.1860]) -Greedy action tensor([ 0.2889, 0.0561, -0.0372, 1.4436]) tensor([0.1758, 0.1393, 0.1269, 0.5579]) -Greedy action tensor([ 0.8299, -0.3298, -0.3098, 1.6723]) tensor([0.2528, 0.0793, 0.0809, 0.5870]) -Greedy action tensor([ 1.7538, -0.2929, 0.9268, -0.3677]) tensor([0.5930, 0.0766, 0.2593, 0.0711]) -Greedy action tensor([-0.0068, 1.0104, -0.3365, 0.7316]) tensor([0.1520, 0.4205, 0.1093, 0.3182]) -Greedy action tensor([0.8663, 0.4061, 0.2530, 1.2603]) tensor([0.2736, 0.1727, 0.1482, 0.4056]) -Greedy action tensor([ 0.4440, 0.7680, -0.6011, 0.5297]) tensor([0.2615, 0.3616, 0.0920, 0.2849]) -Greedy action tensor([ 0.8613, -0.1308, -1.3874, 1.5158]) tensor([0.2941, 0.1090, 0.0310, 0.5659]) -Greedy action tensor([ 1.9508, -0.3425, 1.7575, 0.6505]) tensor([0.4550, 0.0459, 0.3751, 0.1240]) -Greedy action tensor([ 0.8259, -1.9422, -0.6048, 1.3585]) tensor([0.3327, 0.0209, 0.0796, 0.5668]) -Greedy action tensor([ 0.0263, -0.2930, -0.3072, 1.9563]) tensor([0.1072, 0.0779, 0.0768, 0.7382]) -Greedy action tensor([ 0.8034, -1.1735, 1.1201, 0.1194]) tensor([0.3316, 0.0459, 0.4552, 0.1673]) -Greedy action tensor([0.4141, 0.3921, 0.5569, 0.1676]) tensor([0.2555, 0.2500, 0.2948, 0.1997]) -Greedy action tensor([ 0.9719, -0.0868, -0.2001, 1.1179]) tensor([0.3554, 0.1233, 0.1101, 0.4112]) -Greedy action tensor([0.7318, 0.6963, 0.1272, 0.0157]) tensor([0.3333, 0.3217, 0.1821, 0.1629]) -Greedy action tensor([ 0.2968, 0.5780, -0.2753, 0.0813]) tensor([0.2706, 0.3585, 0.1527, 0.2182]) -Greedy action tensor([0.5665, 0.7981, 1.0382, 0.1796]) tensor([0.2201, 0.2775, 0.3528, 0.1495]) -Greedy action tensor([-0.2705, -0.3170, 0.4807, 0.0862]) tensor([0.1817, 0.1735, 0.3852, 0.2596]) -Greedy action tensor([ 0.5857, -0.2081, 0.3870, 0.4127]) tensor([0.3212, 0.1452, 0.2633, 0.2702]) -Greedy action tensor([ 0.5687, -0.7159, -0.3386, 1.1865]) tensor([0.2829, 0.0783, 0.1142, 0.5247]) -Greedy action tensor([1.4444, 0.3690, 0.4230, 0.5858]) tensor([0.4706, 0.1605, 0.1695, 0.1994]) -Greedy action tensor([ 1.2920, -1.4479, 0.2254, 1.3393]) tensor([0.4070, 0.0263, 0.1401, 0.4267]) -Greedy action tensor([-0.5561, 0.4674, -1.0348, -0.0553]) tensor([0.1652, 0.4598, 0.1024, 0.2726]) -Greedy action tensor([-0.4020, 0.1096, -0.4009, 0.4561]) tensor([0.1659, 0.2767, 0.1661, 0.3913]) -Greedy action tensor([ 0.0707, 0.2170, -0.0439, 0.0539]) tensor([0.2480, 0.2871, 0.2211, 0.2438]) -Greedy action tensor([ 0.7385, 0.2239, -0.0265, 0.3154]) tensor([0.3679, 0.2199, 0.1712, 0.2410]) -Greedy action tensor([-0.2884, -2.2569, -0.1184, 0.9623]) tensor([0.1719, 0.0240, 0.2037, 0.6004]) -Greedy action tensor([0.8674, 0.3747, 1.0851, 0.2887]) tensor([0.2928, 0.1789, 0.3641, 0.1642]) -Greedy action tensor([ 0.8780, -0.6633, 0.3900, 1.4922]) tensor([0.2720, 0.0582, 0.1670, 0.5028]) -Greedy action tensor([ 0.3305, -0.4134, 0.1251, 0.4616]) tensor([0.2916, 0.1386, 0.2374, 0.3324]) -Greedy action tensor([ 0.2798, -0.7489, 0.0760, 1.5002]) tensor([0.1798, 0.0643, 0.1466, 0.6093]) -Greedy action tensor([-0.3068, -0.3662, 0.7747, 0.4266]) tensor([0.1434, 0.1351, 0.4229, 0.2986]) -Greedy action tensor([ 0.1242, -1.3838, -0.9136, 0.4315]) tensor([0.3407, 0.0754, 0.1207, 0.4632]) -Greedy action tensor([-0.8036, 0.5531, 0.4081, -1.4474]) tensor([0.1141, 0.4429, 0.3831, 0.0599]) -Greedy action tensor([ 1.8115, -0.3643, -0.2853, 1.6660]) tensor([0.4760, 0.0540, 0.0585, 0.4115]) -Greedy action tensor([-0.1315, 0.4231, 0.6734, 0.2966]) tensor([0.1536, 0.2674, 0.3434, 0.2356]) -Greedy action tensor([ 0.9101, 0.3083, -0.3616, 0.1710]) tensor([0.4337, 0.2376, 0.1216, 0.2071]) -Greedy action tensor([ 0.3483, 0.7474, 0.2029, -0.2868]) tensor([0.2574, 0.3836, 0.2226, 0.1364]) -Greedy action tensor([ 0.1445, -1.3980, -0.9256, 2.0924]) tensor([0.1167, 0.0250, 0.0400, 0.8184]) -Greedy action tensor([-0.1163, -1.3378, -0.0448, 1.3471]) tensor([0.1495, 0.0441, 0.1606, 0.6459]) -Greedy action tensor([ 1.6928, -0.9425, 0.8877, 1.1899]) tensor([0.4709, 0.0338, 0.2105, 0.2848]) -Greedy action tensor([ 2.1097, -1.1437, 1.4452, 1.3952]) tensor([0.4896, 0.0189, 0.2519, 0.2396]) -Greedy action tensor([0.4140, 0.0366, 1.1681, 0.6069]) tensor([0.1990, 0.1365, 0.4231, 0.2414]) -Greedy action tensor([ 0.3155, 0.4023, -0.7438, 0.5849]) tensor([0.2669, 0.2911, 0.0925, 0.3494]) -Greedy action tensor([ 0.6665, -2.3805, 0.1334, 1.6569]) tensor([0.2311, 0.0110, 0.1356, 0.6223]) -Greedy action tensor([ 0.4435, -1.5161, -0.1126, 0.5176]) tensor([0.3582, 0.0505, 0.2054, 0.3858]) -Greedy action tensor([ 1.2786, -0.0320, 0.1723, 0.7773]) tensor([0.4533, 0.1222, 0.1499, 0.2746]) -Greedy action tensor([ 1.0511, 1.1981, -0.1444, 0.5768]) tensor([0.3243, 0.3757, 0.0981, 0.2018]) -Greedy action tensor([ 1.9907, -0.6843, 1.0158, 1.4771]) tensor([0.4891, 0.0337, 0.1845, 0.2927]) -Greedy action tensor([ 0.4719, -0.3276, -0.1443, 0.4174]) tensor([0.3405, 0.1531, 0.1839, 0.3225]) -Greedy action tensor([-0.3598, -0.3163, -0.2232, 0.4696]) tensor([0.1824, 0.1905, 0.2091, 0.4180]) -Greedy action tensor([-0.2849, 0.3598, 0.0884, 0.0652]) tensor([0.1731, 0.3298, 0.2514, 0.2457]) -Greedy action tensor([0.5113, 0.6312, 0.2650, 0.6985]) tensor([0.2430, 0.2740, 0.1900, 0.2930]) -Greedy action tensor([ 1.1210, -0.2061, 1.6084, 1.2963]) tensor([0.2448, 0.0649, 0.3986, 0.2917]) -Greedy action tensor([ 0.2279, 0.1948, -0.1195, -0.3079]) tensor([0.3068, 0.2968, 0.2168, 0.1796]) -Greedy action tensor([ 1.9966, -0.2254, 1.1067, 0.1858]) tensor([0.5943, 0.0644, 0.2441, 0.0972]) -Greedy action tensor([ 1.0541, -0.0780, -0.4282, 1.2397]) tensor([0.3632, 0.1171, 0.0825, 0.4373]) -Greedy action tensor([ 0.5284, -0.3688, 0.5317, 1.2032]) tensor([0.2286, 0.0932, 0.2293, 0.4489]) -Greedy action tensor([ 1.4540, -1.0295, 0.7985, 0.7113]) tensor([0.4811, 0.0401, 0.2498, 0.2289]) -Greedy action tensor([-0.7181, -0.9368, -0.4122, 1.6497]) tensor([0.0723, 0.0581, 0.0981, 0.7715]) -Greedy action tensor([0.6411, 0.1895, 1.4737, 0.4779]) tensor([0.2090, 0.1330, 0.4805, 0.1775]) -Greedy action tensor([ 0.7787, -0.7796, 0.7567, 0.9654]) tensor([0.2946, 0.0620, 0.2882, 0.3551]) -Greedy action tensor([ 1.1141, -0.8771, -0.4337, 1.0518]) tensor([0.4369, 0.0597, 0.0929, 0.4105]) -Greedy action tensor([ 0.3076, -0.4936, -0.6070, -0.0619]) tensor([0.3936, 0.1766, 0.1577, 0.2720]) -Greedy action tensor([1.0155, 0.6405, 0.0286, 0.2879]) tensor([0.3932, 0.2703, 0.1466, 0.1900]) -Greedy action tensor([ 0.6466, -0.2342, 0.1854, 0.9601]) tensor([0.2930, 0.1214, 0.1847, 0.4009]) -Greedy action tensor([0.6618, 0.4117, 0.6668, 0.3362]) tensor([0.2852, 0.2221, 0.2867, 0.2060]) -Greedy action tensor([-0.1928, 0.2444, 0.7975, -0.4638]) tensor([0.1666, 0.2579, 0.4484, 0.1270]) -Greedy action tensor([ 0.4474, -0.1290, 0.4703, 0.5579]) tensor([0.2701, 0.1518, 0.2764, 0.3017]) -Greedy action tensor([-0.2560, -1.5211, 0.2960, -0.0340]) tensor([0.2343, 0.0661, 0.4070, 0.2926]) -Greedy action tensor([ 1.0861, -1.0420, 1.1723, 0.2315]) tensor([0.3796, 0.0452, 0.4137, 0.1615]) -Greedy action tensor([ 0.8781, -1.4074, 0.4904, 0.4193]) tensor([0.4145, 0.0422, 0.2813, 0.2620]) -Greedy action tensor([ 1.1318, -0.1233, 1.2801, -0.1189]) tensor([0.3661, 0.1044, 0.4247, 0.1048]) -Greedy action tensor([1.4903, 0.5344, 0.2528, 0.5083]) tensor([0.4880, 0.1876, 0.1416, 0.1828]) -Greedy action tensor([ 0.5338, -0.0910, -0.5813, 1.9442]) tensor([0.1678, 0.0898, 0.0550, 0.6874]) -Greedy action tensor([-0.0863, 1.5343, 0.9944, -0.4314]) tensor([0.1030, 0.5206, 0.3035, 0.0729]) -Greedy action tensor([ 0.8297, 0.3790, -0.8344, 1.5879]) tensor([0.2525, 0.1609, 0.0478, 0.5389]) -Greedy action tensor([ 1.0903, -0.9948, -0.2880, 0.7514]) tensor([0.4787, 0.0595, 0.1206, 0.3411]) -Greedy action tensor([-0.0960, 0.1101, -0.6973, 1.1730]) tensor([0.1579, 0.1940, 0.0865, 0.5616]) -Greedy action tensor([ 0.1126, -0.4324, -1.0685, 0.1400]) tensor([0.3431, 0.1990, 0.1053, 0.3526]) -Greedy action tensor([ 0.6845, -0.2689, -0.7908, 0.8804]) tensor([0.3533, 0.1362, 0.0808, 0.4298]) -Greedy action tensor([ 1.0552, -0.6498, -0.4354, 0.4299]) tensor([0.5149, 0.0936, 0.1160, 0.2755]) -Greedy action tensor([ 1.0420, -0.1342, 0.1492, -0.0516]) tensor([0.4871, 0.1502, 0.1995, 0.1632]) -Greedy action tensor([ 1.0078, -0.3221, -0.3049, 0.1178]) tensor([0.5143, 0.1361, 0.1384, 0.2112]) -Greedy action tensor([ 0.9659, -0.4304, -0.7531, 0.1897]) tensor([0.5300, 0.1312, 0.0950, 0.2439]) -Greedy action tensor([ 0.6097, -0.2059, -0.2159, 0.1819]) tensor([0.3949, 0.1747, 0.1730, 0.2575]) -Greedy action tensor([ 1.4967, -0.1234, -0.0954, 0.0917]) tensor([0.6073, 0.1202, 0.1236, 0.1490]) -Greedy action tensor([ 1.3644, -0.7982, -0.3997, 0.1975]) tensor([0.6259, 0.0720, 0.1072, 0.1949]) -Greedy action tensor([ 1.2321, -0.6319, -0.0177, 0.2120]) tensor([0.5549, 0.0860, 0.1590, 0.2001]) -Greedy action tensor([ 1.3347, -0.1942, -0.4075, 0.3387]) tensor([0.5678, 0.1231, 0.0994, 0.2097]) -Greedy action tensor([ 1.4762, -0.7269, -0.3991, 0.3332]) tensor([0.6319, 0.0698, 0.0969, 0.2015]) -Greedy action tensor([ 1.2815, -0.5291, -0.0608, 0.0791]) tensor([0.5796, 0.0948, 0.1514, 0.1742]) -Greedy action tensor([ 1.1288, -0.6429, -0.2452, 0.2336]) tensor([0.5460, 0.0928, 0.1382, 0.2230]) -Greedy action tensor([ 1.0779, -0.4126, -0.6630, 0.5486]) tensor([0.5026, 0.1132, 0.0881, 0.2960]) -Greedy action tensor([ 0.5027, -0.3989, -0.0696, 0.0276]) tensor([0.3858, 0.1566, 0.2177, 0.2399]) -Greedy action tensor([ 1.8503, -0.1371, -0.6589, 0.2793]) tensor([0.7012, 0.0961, 0.0570, 0.1457]) -Greedy action tensor([ 0.6369, -0.2161, -0.4103, 0.4951]) tensor([0.3781, 0.1611, 0.1327, 0.3281]) -Greedy action tensor([ 1.0108, -0.5899, 0.0511, -0.2867]) tensor([0.5382, 0.1086, 0.2062, 0.1471]) -Greedy action tensor([ 1.0796, -0.2984, 0.0386, -0.2863]) tensor([0.5375, 0.1355, 0.1898, 0.1372]) -Greedy action tensor([ 0.5489, -0.5754, 0.1326, 0.0296]) tensor([0.3877, 0.1260, 0.2557, 0.2307]) -Greedy action tensor([ 1.3030, -0.4736, -0.4165, 0.5182]) tensor([0.5541, 0.0938, 0.0993, 0.2528]) -Greedy action tensor([ 0.4966, -0.1035, -0.6141, 0.4067]) tensor([0.3582, 0.1965, 0.1179, 0.3274]) -Greedy action tensor([ 1.0909, -0.6826, -0.4922, 0.6624]) tensor([0.4935, 0.0838, 0.1013, 0.3215]) -Greedy action tensor([ 1.0990, -0.6193, -0.2679, 0.1067]) tensor([0.5540, 0.0994, 0.1412, 0.2054]) -Greedy action tensor([ 0.4055, -0.2731, -0.1676, 0.2017]) tensor([0.3464, 0.1757, 0.1953, 0.2825]) -Greedy action tensor([ 0.9108, -0.2722, 0.0049, -0.2080]) tensor([0.4909, 0.1504, 0.1984, 0.1604]) -Greedy action tensor([ 1.5368, -0.3405, -0.3576, 0.4354]) tensor([0.6113, 0.0935, 0.0919, 0.2032]) -Greedy action tensor([ 1.1971, -0.7760, -0.3886, 0.5680]) tensor([0.5328, 0.0741, 0.1091, 0.2840]) -Greedy action tensor([ 1.1729, -0.5685, -0.2506, 0.5279]) tensor([0.5153, 0.0903, 0.1241, 0.2703]) -Greedy action tensor([ 1.2916, -0.3068, -0.0423, 0.1552]) tensor([0.5597, 0.1132, 0.1475, 0.1797]) -Greedy action tensor([ 0.9703, -0.6377, -0.1873, 0.1271]) tensor([0.5142, 0.1030, 0.1616, 0.2213]) -Greedy action tensor([ 1.5300, -0.5924, -0.2907, 0.0811]) tensor([0.6594, 0.0790, 0.1068, 0.1549]) -Greedy action tensor([ 0.8961, -0.2007, -0.2653, 0.0136]) tensor([0.4853, 0.1620, 0.1519, 0.2008]) -Greedy action tensor([ 0.8360, -0.3467, -0.0527, 0.0525]) tensor([0.4599, 0.1409, 0.1891, 0.2101]) -Greedy action tensor([ 2.1581, -0.0189, -0.4898, 0.4715]) tensor([0.7303, 0.0828, 0.0517, 0.1352]) -Greedy action tensor([ 1.9772, -0.2749, -0.5950, -0.1197]) tensor([0.7667, 0.0806, 0.0585, 0.0942]) -Greedy action tensor([ 0.7907, -0.0087, -0.0541, 0.0929]) tensor([0.4207, 0.1891, 0.1808, 0.2094]) -Greedy action tensor([ 1.3573, -0.7560, -0.1560, 0.3274]) tensor([0.5889, 0.0712, 0.1297, 0.2103]) -Greedy action tensor([ 0.8326, -0.2374, -0.1654, 0.0706]) tensor([0.4591, 0.1575, 0.1692, 0.2143]) -Greedy action tensor([ 0.8600, -0.1098, -0.4308, 0.0720]) tensor([0.4742, 0.1798, 0.1304, 0.2156]) -Greedy action tensor([ 1.4932, -0.2611, -0.2269, 0.2295]) tensor([0.6117, 0.1058, 0.1095, 0.1729]) -Greedy action tensor([ 1.5576, -0.5005, -0.1085, 0.3239]) tensor([0.6219, 0.0794, 0.1175, 0.1811]) -Greedy action tensor([ 1.4037, -0.6855, -0.3216, 0.1690]) tensor([0.6278, 0.0777, 0.1118, 0.1826]) -Greedy action tensor([ 1.4557, -0.0468, -0.3934, -0.0240]) tensor([0.6220, 0.1384, 0.0979, 0.1416]) -Greedy action tensor([ 1.3897, -0.3243, -0.2281, 0.0704]) tensor([0.6076, 0.1095, 0.1205, 0.1624]) -Greedy action tensor([ 0.7187, -0.4008, 0.0836, -0.2026]) tensor([0.4436, 0.1448, 0.2350, 0.1766]) -Greedy action tensor([ 1.6339, -0.4210, -0.5167, 0.3210]) tensor([0.6607, 0.0846, 0.0769, 0.1778]) -Greedy action tensor([ 1.6424, -0.3300, -0.1492, 0.1006]) tensor([0.6580, 0.0915, 0.1097, 0.1408]) -Greedy action tensor([ 1.3582, -0.4901, -0.2840, -0.0043]) tensor([0.6223, 0.0980, 0.1204, 0.1593]) -Greedy action tensor([ 0.5596, -0.0760, 0.1676, -0.2377]) tensor([0.3765, 0.1994, 0.2544, 0.1697]) -Greedy action tensor([ 0.9408, -0.0237, 0.0075, -0.3908]) tensor([0.4906, 0.1870, 0.1929, 0.1295]) -Greedy action tensor([ 0.8724, 0.1893, -0.0166, -0.2363]) tensor([0.4452, 0.2249, 0.1830, 0.1469]) -Greedy action tensor([ 0.7179, -0.7062, -0.0386, -0.0357]) tensor([0.4586, 0.1104, 0.2152, 0.2158]) -Greedy action tensor([ 1.1601, -0.2821, 0.0237, -0.0433]) tensor([0.5383, 0.1273, 0.1728, 0.1616]) -Greedy action tensor([ 0.9301, -0.5525, -0.5129, 0.5184]) tensor([0.4704, 0.1068, 0.1111, 0.3117]) -Greedy action tensor([ 1.4906, -0.5346, 0.0244, 0.1062]) tensor([0.6199, 0.0818, 0.1431, 0.1553]) -Greedy action tensor([ 0.3793, -0.0443, -0.2420, -0.1303]) tensor([0.3581, 0.2344, 0.1924, 0.2151]) -Greedy action tensor([ 1.4493, -0.5595, -0.3242, 0.0300]) tensor([0.6469, 0.0868, 0.1098, 0.1565]) -Greedy action tensor([ 0.5733, -0.2807, 0.2214, -0.1585]) tensor([0.3831, 0.1631, 0.2695, 0.1843]) -Greedy action tensor([ 0.7387, -0.0751, -0.5686, -0.1806]) tensor([0.4734, 0.2098, 0.1281, 0.1888]) -Greedy action tensor([ 1.2765, -0.7777, -0.3098, 0.5858]) tensor([0.5452, 0.0699, 0.1116, 0.2733]) -Greedy action tensor([ 1.3779, -0.2781, -0.5003, 0.0034]) tensor([0.6263, 0.1196, 0.0957, 0.1584]) -Greedy action tensor([ 0.9471, -0.6327, -0.0134, -0.1088]) tensor([0.5164, 0.1064, 0.1976, 0.1796]) -Greedy action tensor([ 0.9739, -0.3861, -0.0385, -0.1712]) tensor([0.5160, 0.1324, 0.1875, 0.1642]) -Greedy action tensor([ 0.5488, -0.3269, -0.4199, -0.2021]) tensor([0.4409, 0.1837, 0.1674, 0.2081]) -Greedy action tensor([ 1.0184, -0.6025, -0.3317, -0.2130]) tensor([0.5718, 0.1131, 0.1482, 0.1669]) -Greedy action tensor([ 1.1262, -0.3034, -0.0172, 0.1717]) tensor([0.5146, 0.1232, 0.1640, 0.1981]) -Greedy action tensor([ 0.8353, -0.1244, -0.3608, 0.0214]) tensor([0.4698, 0.1799, 0.1421, 0.2082]) -Greedy action tensor([ 1.3313, -0.4868, -0.2822, 0.0857]) tensor([0.6063, 0.0984, 0.1208, 0.1745]) -Greedy action tensor([ 1.7353, -0.4310, -0.2059, -0.1327]) tensor([0.7079, 0.0811, 0.1016, 0.1093]) -Greedy action tensor([ 1.2192, -0.0830, -0.3071, 0.1076]) tensor([0.5500, 0.1495, 0.1195, 0.1810]) -Greedy action tensor([ 0.8725, -0.4872, 0.0040, 0.1896]) tensor([0.4584, 0.1177, 0.1923, 0.2316]) -Greedy action tensor([ 1.0704, -0.6171, -0.3300, 0.4401]) tensor([0.5092, 0.0942, 0.1255, 0.2711]) -Greedy action tensor([ 0.8884, -0.5422, -0.1945, 0.1066]) tensor([0.4913, 0.1175, 0.1664, 0.2248]) -Greedy action tensor([ 1.0296, -0.0467, -0.1634, -0.2104]) tensor([0.5172, 0.1763, 0.1569, 0.1497]) -Greedy action tensor([ 1.2005, -0.3510, -0.4340, 0.0063]) tensor([0.5848, 0.1239, 0.1141, 0.1772]) -Greedy action tensor([ 0.7773, -0.5226, -0.2393, -0.0333]) tensor([0.4810, 0.1311, 0.1740, 0.2139]) -Greedy action tensor([ 0.8568, -0.4220, -0.3207, 0.3266]) tensor([0.4598, 0.1280, 0.1416, 0.2706]) -Greedy action tensor([ 0.7301, -0.1837, -0.1322, 0.0702]) tensor([0.4273, 0.1714, 0.1804, 0.2209]) -Greedy action tensor([ 1.4995, -0.8527, 0.1889, 0.2653]) tensor([0.6039, 0.0575, 0.1628, 0.1758]) -Greedy action tensor([ 1.4762, -0.2688, -0.3858, 0.2416]) tensor([0.6169, 0.1077, 0.0958, 0.1795]) -Greedy action tensor([ 0.7716, -0.6449, -0.0441, -0.2854]) tensor([0.4920, 0.1193, 0.2177, 0.1710]) -Greedy action tensor([ 0.5240, -0.3726, -0.0060, -0.2327]) tensor([0.4056, 0.1654, 0.2387, 0.1903]) -Greedy action tensor([ 0.4503, -0.2191, -0.0141, -0.5275]) tensor([0.3974, 0.2034, 0.2497, 0.1495]) -Greedy action tensor([ 0.2999, 0.0438, 0.0417, -0.1491]) tensor([0.3140, 0.2431, 0.2425, 0.2004]) -Greedy action tensor([ 0.4197, -0.2472, -0.0563, -0.5469]) tensor([0.3976, 0.2041, 0.2470, 0.1512]) -Greedy action tensor([ 0.6224, -0.4723, 0.0981, -0.6133]) tensor([0.4510, 0.1509, 0.2670, 0.1311]) -Greedy action tensor([ 0.6670, -0.3346, -0.1905, -0.3425]) tensor([0.4638, 0.1704, 0.1968, 0.1690]) -Greedy action tensor([ 0.3820, -0.1635, -0.0795, -0.4215]) tensor([0.3763, 0.2181, 0.2372, 0.1685]) -Greedy action tensor([ 0.4875, -0.0267, -0.0735, -0.1565]) tensor([0.3712, 0.2220, 0.2118, 0.1950]) -Greedy action tensor([ 0.3737, -0.0089, 0.0538, -0.1774]) tensor([0.3351, 0.2285, 0.2433, 0.1931]) -Greedy action tensor([ 0.3385, -0.1141, -0.1549, -0.3257]) tensor([0.3622, 0.2303, 0.2211, 0.1864]) -Greedy action tensor([ 0.5920, -0.2094, 0.0544, -0.2754]) tensor([0.4077, 0.1829, 0.2382, 0.1712]) -Greedy action tensor([ 0.7918, -0.4891, -0.0990, -0.3836]) tensor([0.5008, 0.1391, 0.2055, 0.1546]) -Greedy action tensor([ 0.4002, -0.3395, 0.1151, -0.4213]) tensor([0.3747, 0.1788, 0.2817, 0.1648]) -Greedy action tensor([ 4.2723e-01, -7.7535e-02, 8.4788e-05, -1.9018e-01]) tensor([0.3577, 0.2159, 0.2334, 0.1929]) -Greedy action tensor([ 0.8717, -0.2714, -0.0637, -0.3300]) tensor([0.4970, 0.1585, 0.1950, 0.1495]) -Greedy action tensor([ 0.6999, -0.3609, -0.0816, -0.2677]) tensor([0.4579, 0.1585, 0.2096, 0.1740]) -Greedy action tensor([ 0.6847, -0.4031, 0.1579, -0.5180]) tensor([0.4489, 0.1512, 0.2651, 0.1348]) -Greedy action tensor([ 0.7749, -0.2210, -0.0994, -0.2885]) tensor([0.4691, 0.1733, 0.1957, 0.1620]) -Greedy action tensor([ 0.7556, -0.3652, 0.1517, -0.5138]) tensor([0.4643, 0.1514, 0.2538, 0.1305]) -Greedy action tensor([ 0.4226, -0.1318, -0.1114, -0.1969]) tensor([0.3705, 0.2128, 0.2172, 0.1994]) -Greedy action tensor([ 0.3759, 0.0584, -0.0508, -0.2396]) tensor([0.3424, 0.2492, 0.2234, 0.1850]) -Greedy action tensor([ 0.6624, -0.4656, 0.0117, -0.3181]) tensor([0.4503, 0.1458, 0.2349, 0.1689]) -Greedy action tensor([ 0.7134, -0.2489, -0.1651, -0.3599]) tensor([0.4674, 0.1786, 0.1942, 0.1598]) -Greedy action tensor([ 0.8012, -0.1378, -0.0947, -0.4285]) tensor([0.4781, 0.1869, 0.1952, 0.1398]) -Greedy action tensor([ 0.5727, -0.3561, 0.1579, -0.4724]) tensor([0.4154, 0.1641, 0.2744, 0.1461]) -Greedy action tensor([ 0.6177, -0.3030, 0.0980, -0.6307]) tensor([0.4386, 0.1747, 0.2609, 0.1259]) -Greedy action tensor([ 0.3054, -0.0899, 0.0529, -0.3050]) tensor([0.3341, 0.2250, 0.2595, 0.1815]) -Greedy action tensor([ 0.7149, -0.4222, -0.0844, -0.4909]) tensor([0.4831, 0.1550, 0.2172, 0.1447]) -Greedy action tensor([ 0.4886, -0.0719, 0.1424, -0.7054]) tensor([0.3874, 0.2212, 0.2740, 0.1174]) -Greedy action tensor([ 0.6612, -0.3306, 0.0740, -0.6695]) tensor([0.4564, 0.1693, 0.2537, 0.1206]) -Greedy action tensor([ 0.4107, -0.1948, 0.1218, -0.3337]) tensor([0.3610, 0.1971, 0.2704, 0.1715]) -Greedy action tensor([ 0.8121, -0.4949, 0.1254, -0.6106]) tensor([0.4963, 0.1343, 0.2498, 0.1196]) -Greedy action tensor([ 0.7060, -0.3887, 0.0398, -0.3370]) tensor([0.4544, 0.1521, 0.2334, 0.1601]) -Greedy action tensor([ 0.3402, -0.2503, -0.0065, -0.2095]) tensor([0.3523, 0.1952, 0.2491, 0.2033]) -Greedy action tensor([ 0.2374, 0.2202, -0.0623, -0.3106]) tensor([0.3028, 0.2977, 0.2244, 0.1751]) -Greedy action tensor([ 0.5381, -0.2247, -0.0590, -0.2779]) tensor([0.4067, 0.1897, 0.2238, 0.1798]) -Greedy action tensor([ 0.4976, 0.0484, -0.0529, -0.2336]) tensor([0.3709, 0.2367, 0.2139, 0.1785]) -Greedy action tensor([ 0.5931, -0.1568, -0.0120, -0.3143]) tensor([0.4129, 0.1950, 0.2254, 0.1666]) -Greedy action tensor([ 0.4549, -0.1567, -0.0038, -0.2542]) tensor([0.3750, 0.2034, 0.2370, 0.1845]) -Greedy action tensor([ 0.5193, 0.2071, -0.0091, -0.0593]) tensor([0.3470, 0.2539, 0.2046, 0.1945]) -Greedy action tensor([ 0.2838, -0.1120, -0.0577, -0.2476]) tensor([0.3365, 0.2265, 0.2392, 0.1978]) -Greedy action tensor([ 0.5533, -0.2325, 0.0018, -0.1629]) tensor([0.3967, 0.1808, 0.2286, 0.1939]) -Greedy action tensor([ 0.8523, -0.4353, 0.1593, -0.5236]) tensor([0.4929, 0.1360, 0.2465, 0.1245]) -Greedy action tensor([ 0.9205, -0.5281, 0.0465, -0.5049]) tensor([0.5284, 0.1241, 0.2205, 0.1270]) -Greedy action tensor([ 0.5165, -0.2061, -0.0888, -0.4445]) tensor([0.4143, 0.2011, 0.2262, 0.1585]) -Greedy action tensor([ 0.3234, -0.1594, -0.0232, -0.3577]) tensor([0.3533, 0.2180, 0.2498, 0.1788]) -Greedy action tensor([ 1.0324, -0.8961, -0.0261, -0.4504]) tensor([0.5816, 0.0845, 0.2018, 0.1320]) -Greedy action tensor([ 0.3407, 0.0170, 0.0292, -0.1978]) tensor([0.3290, 0.2380, 0.2410, 0.1920]) -Greedy action tensor([ 0.4503, 0.1002, -0.1571, -0.1398]) tensor([0.3567, 0.2513, 0.1943, 0.1977]) -Greedy action tensor([ 0.9352, -0.3196, -0.0507, -0.6381]) tensor([0.5360, 0.1528, 0.2000, 0.1111]) -Greedy action tensor([ 0.9918, -0.5010, -0.0034, -0.7591]) tensor([0.5656, 0.1271, 0.2091, 0.0982]) -Greedy action tensor([ 0.3516, -0.1191, -0.0136, -0.1544]) tensor([0.3423, 0.2138, 0.2376, 0.2064]) -Greedy action tensor([ 1.0802, -0.9459, 0.0637, -0.4686]) tensor([0.5861, 0.0773, 0.2121, 0.1245]) -Greedy action tensor([ 1.0180, -0.5486, 0.0443, -0.6305]) tensor([0.5622, 0.1174, 0.2123, 0.1081]) -Greedy action tensor([ 0.3689, -0.1520, -0.1036, -0.3588]) tensor([0.3703, 0.2200, 0.2309, 0.1789]) -Greedy action tensor([ 0.8406, -0.2674, 0.0805, -0.4362]) tensor([0.4815, 0.1590, 0.2252, 0.1343]) -Greedy action tensor([ 0.8404, -0.5944, -0.0448, -0.4521]) tensor([0.5194, 0.1237, 0.2143, 0.1426]) -Greedy action tensor([ 0.8775, -0.6419, 0.0597, -0.8375]) tensor([0.5434, 0.1189, 0.2399, 0.0978]) -Greedy action tensor([ 0.6531, -0.4156, 0.1000, -0.4239]) tensor([0.4426, 0.1520, 0.2546, 0.1508]) -Greedy action tensor([ 8.6274e-01, -5.5484e-01, -6.0463e-04, -3.7458e-01]) tensor([0.5117, 0.1240, 0.2158, 0.1485]) -Greedy action tensor([ 0.6646, -0.4940, -0.1209, -0.4424]) tensor([0.4761, 0.1495, 0.2171, 0.1574]) -Greedy action tensor([ 1.0208, -0.2775, -0.1832, -0.3488]) tensor([0.5473, 0.1494, 0.1642, 0.1391]) -Greedy action tensor([ 0.6603, -0.5103, -0.1323, -0.4484]) tensor([0.4778, 0.1482, 0.2163, 0.1577]) -Greedy action tensor([ 0.7996, -0.3273, -0.0748, -0.3007]) tensor([0.4822, 0.1563, 0.2011, 0.1605]) -Greedy action tensor([ 0.5040, -0.3628, -0.0560, -0.4020]) tensor([0.4174, 0.1754, 0.2384, 0.1687]) -Greedy action tensor([ 0.6340, -0.0377, 0.0178, -0.7124]) tensor([0.4327, 0.2210, 0.2337, 0.1126]) -Greedy action tensor([ 0.5427, -0.0319, 0.0322, -0.3890]) tensor([0.3911, 0.2201, 0.2347, 0.1540]) -Greedy action tensor([ 0.4791, -0.0560, -0.1492, -0.1102]) tensor([0.3740, 0.2190, 0.1995, 0.2075]) -Greedy action tensor([ 0.8357, -0.5689, 0.1827, -0.8729]) tensor([0.5136, 0.1261, 0.2673, 0.0930]) -Greedy action tensor([ 0.7391, -0.6266, -0.1123, -0.6839]) tensor([0.5200, 0.1327, 0.2220, 0.1253]) -Greedy action tensor([ 1.1189, -0.3310, 0.1113, -0.8130]) tensor([0.5732, 0.1345, 0.2093, 0.0830]) -Greedy action tensor([ 0.5595, -0.5164, -0.0393, -0.5382]) tensor([0.4496, 0.1533, 0.2471, 0.1500]) -Greedy action tensor([ 0.1839, 0.0546, -0.0752, -0.3510]) tensor([0.3090, 0.2715, 0.2385, 0.1810]) -Greedy action tensor([ 0.7063, -0.1909, 0.0035, -0.2872]) tensor([0.4399, 0.1793, 0.2178, 0.1629]) -Greedy action tensor([ 0.2943, -0.0699, -0.0690, -0.2810]) tensor([0.3387, 0.2353, 0.2355, 0.1905]) -Greedy action tensor([ 0.2292, 0.2482, 0.0083, -0.1891]) tensor([0.2874, 0.2929, 0.2305, 0.1892]) -Greedy action tensor([ 0.8337, -0.1591, -0.0295, -0.5067]) tensor([0.4868, 0.1804, 0.2054, 0.1274]) -Greedy action tensor([ 0.2622, 0.0609, -0.0213, -0.1232]) tensor([0.3076, 0.2515, 0.2317, 0.2092]) -Greedy action tensor([ 0.8158, -0.6374, -0.2172, -0.6162]) tensor([0.5469, 0.1279, 0.1947, 0.1306]) -Greedy action tensor([ 0.3071, -0.0014, -0.0448, -0.2076]) tensor([0.3294, 0.2420, 0.2317, 0.1969]) -Greedy action tensor([-0.6046, -0.4986, 0.2163, 0.2431]) tensor([0.1488, 0.1655, 0.3382, 0.3474]) -Greedy action tensor([-1.9275, -0.4541, 0.6586, -0.1652]) tensor([0.0409, 0.1784, 0.5427, 0.2381]) -Greedy action tensor([-1.0945, -0.6075, 0.3645, 0.3398]) tensor([0.0899, 0.1463, 0.3866, 0.3772]) -Greedy action tensor([-1.3292, -0.5737, 0.4180, 0.5290]) tensor([0.0654, 0.1393, 0.3756, 0.4196]) -Greedy action tensor([-1.2030, -0.5628, 0.2835, 0.3128]) tensor([0.0842, 0.1598, 0.3725, 0.3835]) -Greedy action tensor([-1.2117, -0.5152, 0.3103, 0.2192]) tensor([0.0850, 0.1705, 0.3892, 0.3553]) -Greedy action tensor([-1.2769, -0.3416, 0.5485, 0.4946]) tensor([0.0640, 0.1630, 0.3969, 0.3761]) -Greedy action tensor([-1.9444, -0.8081, 0.2766, -0.2922]) tensor([0.0539, 0.1679, 0.4968, 0.2813]) -Greedy action tensor([-1.3628, -0.5630, 0.3927, 0.2071]) tensor([0.0724, 0.1610, 0.4188, 0.3478]) -Greedy action tensor([-1.8824, -0.4747, 0.6834, -0.0954]) tensor([0.0415, 0.1698, 0.5406, 0.2481]) -Greedy action tensor([-1.4748, -0.5158, 0.5475, 0.4769]) tensor([0.0549, 0.1433, 0.4150, 0.3867]) -Greedy action tensor([-1.8798, -0.5116, 0.7555, -0.0736]) tensor([0.0401, 0.1574, 0.5587, 0.2439]) -Greedy action tensor([-0.7612, 0.3788, -0.2998, 0.2223]) tensor([0.1192, 0.3728, 0.1892, 0.3188]) -Greedy action tensor([-1.6770, -0.4512, 1.1368, 0.9276]) tensor([0.0289, 0.0984, 0.4818, 0.3909]) -Greedy action tensor([-1.5701, -0.5175, 0.5516, 0.2600]) tensor([0.0542, 0.1553, 0.4524, 0.3380]) -Greedy action tensor([-1.8051, -0.4893, 0.6977, 0.0498]) tensor([0.0429, 0.1597, 0.5235, 0.2739]) -Greedy action tensor([-1.1589, -0.4999, 0.3283, 0.4616]) tensor([0.0806, 0.1557, 0.3565, 0.4073]) -Greedy action tensor([-1.6478, -0.5016, 1.3839, 1.0403]) tensor([0.0253, 0.0795, 0.5238, 0.3715]) -Greedy action tensor([-1.0681, -0.5528, 0.2792, 0.5223]) tensor([0.0875, 0.1465, 0.3367, 0.4293]) -Greedy action tensor([-1.2220, -0.5588, 0.3145, 0.2755]) tensor([0.0829, 0.1609, 0.3854, 0.3707]) -Greedy action tensor([-1.0916, -0.6110, 0.4431, 0.8494]) tensor([0.0703, 0.1137, 0.3262, 0.4898]) -Greedy action tensor([-1.9984, -0.9573, 0.4645, -0.1321]) tensor([0.0454, 0.1285, 0.5327, 0.2934]) -Greedy action tensor([-1.1753, -0.3944, 0.6372, 1.0588]) tensor([0.0536, 0.1171, 0.3285, 0.5008]) -Greedy action tensor([-1.3694, -0.5868, 0.5610, 0.5300]) tensor([0.0597, 0.1305, 0.4112, 0.3987]) -Greedy action tensor([-1.2277, -0.5408, 0.6731, -0.5492]) tensor([0.0858, 0.1706, 0.5744, 0.1692]) -Greedy action tensor([-1.1125, -0.6128, 0.5472, 0.9954]) tensor([0.0620, 0.1021, 0.3258, 0.5101]) -Greedy action tensor([-1.4727, -0.5539, 0.4074, 0.2003]) tensor([0.0650, 0.1629, 0.4259, 0.3462]) -Greedy action tensor([-1.5810, -1.0382, 0.1351, -0.6161]) tensor([0.0917, 0.1578, 0.5100, 0.2406]) -Greedy action tensor([-1.0280, 0.5610, 0.0520, 0.0718]) tensor([0.0844, 0.4135, 0.2486, 0.2535]) -Greedy action tensor([-2.1043, -0.9601, 1.2639, 0.6255]) tensor([0.0206, 0.0647, 0.5985, 0.3161]) -Greedy action tensor([-1.5672, -0.4953, 0.5042, 0.1468]) tensor([0.0574, 0.1678, 0.4559, 0.3189]) -Greedy action tensor([-1.2827, 0.1948, 0.6332, -0.7092]) tensor([0.0717, 0.3141, 0.4870, 0.1272]) -Greedy action tensor([-1.8313, -0.4664, 0.6184, -0.1116]) tensor([0.0453, 0.1773, 0.5246, 0.2528]) -Greedy action tensor([-0.9697, -0.3835, 0.1396, 1.1872]) tensor([0.0691, 0.1242, 0.2095, 0.5972]) -Greedy action tensor([-1.8973, -0.4679, 0.6595, -0.1387]) tensor([0.0419, 0.1749, 0.5401, 0.2431]) -Greedy action tensor([-1.3013, -0.4360, 0.8375, 1.1459]) tensor([0.0427, 0.1014, 0.3624, 0.4934]) -Greedy action tensor([-1.8485, -0.4692, 0.6198, -0.1236]) tensor([0.0447, 0.1774, 0.5272, 0.2507]) -Greedy action tensor([-0.9959, -0.6579, 1.2317, 1.4909]) tensor([0.0422, 0.0592, 0.3914, 0.5072]) -Greedy action tensor([-1.4724, -0.0831, 0.5131, 0.5256]) tensor([0.0508, 0.2040, 0.3703, 0.3749]) -Greedy action tensor([-0.9810, 0.0105, 0.8777, 1.1593]) tensor([0.0537, 0.1448, 0.3447, 0.4568]) -Greedy action tensor([-1.7572, -0.4601, 0.5677, -0.0444]) tensor([0.0490, 0.1791, 0.5006, 0.2714]) -Greedy action tensor([-1.7649, -0.4499, 0.7345, 0.3031]) tensor([0.0403, 0.1501, 0.4908, 0.3188]) -Greedy action tensor([-0.9297, -0.2039, 0.1254, 0.5687]) tensor([0.0960, 0.1984, 0.2758, 0.4297]) -Greedy action tensor([-1.5793, -0.7750, 0.0851, -0.6273]) tensor([0.0900, 0.2012, 0.4755, 0.2332]) -Greedy action tensor([-1.5197, -0.5908, 0.4475, 0.0538]) tensor([0.0645, 0.1633, 0.4612, 0.3111]) -Greedy action tensor([-1.7722, -0.3136, 0.6082, -0.0547]) tensor([0.0461, 0.1983, 0.4986, 0.2569]) -Greedy action tensor([-1.9569, -0.6790, 1.3561, 0.6235]) tensor([0.0221, 0.0793, 0.6069, 0.2917]) -Greedy action tensor([-1.2673, -0.5900, 1.0934, 1.2903]) tensor([0.0378, 0.0744, 0.4003, 0.4875]) -Greedy action tensor([-1.8423, -0.4913, 0.7462, 0.0332]) tensor([0.0405, 0.1564, 0.5389, 0.2642]) -Greedy action tensor([-1.9734, -0.4407, 0.8942, 0.2244]) tensor([0.0310, 0.1437, 0.5459, 0.2794]) -Greedy action tensor([-1.2370, -0.4390, 0.9271, 1.1003]) tensor([0.0449, 0.0997, 0.3908, 0.4647]) -Greedy action tensor([-0.1772, -0.0626, 1.0721, 1.6952]) tensor([0.0826, 0.0926, 0.2879, 0.5369]) -Greedy action tensor([-0.9751, -0.5487, 0.3912, -0.1006]) tensor([0.1130, 0.1731, 0.4430, 0.2709]) -Greedy action tensor([-1.0153, 0.6225, -0.0019, 0.1018]) tensor([0.0837, 0.4303, 0.2304, 0.2556]) -Greedy action tensor([-1.6278, -0.4474, 0.4682, 0.0644]) tensor([0.0561, 0.1827, 0.4564, 0.3048]) -Greedy action tensor([-1.6958, -0.3431, 0.5369, 0.0268]) tensor([0.0505, 0.1954, 0.4711, 0.2829]) -Greedy action tensor([-1.3662, -0.5747, 0.5946, 0.6461]) tensor([0.0562, 0.1240, 0.3993, 0.4204]) -Greedy action tensor([-0.3671, 0.0249, 1.0931, 1.6702]) tensor([0.0692, 0.1024, 0.2979, 0.5306]) -Greedy action tensor([-1.7444, -0.5020, 0.5602, -0.0461]) tensor([0.0501, 0.1736, 0.5023, 0.2739]) -Greedy action tensor([-1.7673, -0.3864, 0.5780, -0.0648]) tensor([0.0478, 0.1903, 0.4993, 0.2625]) -Greedy action tensor([-1.1717, -0.3645, 0.8265, 1.1109]) tensor([0.0490, 0.1098, 0.3612, 0.4800]) -Greedy action tensor([-1.9393, -0.4473, 0.8637, 0.2263]) tensor([0.0326, 0.1450, 0.5380, 0.2844]) -Greedy action tensor([-1.9769, -0.5877, 1.3737, 0.6371]) tensor([0.0212, 0.0850, 0.6044, 0.2894]) -Greedy action tensor([-1.4936, -0.5994, 0.4563, 0.1063]) tensor([0.0648, 0.1585, 0.4556, 0.3210]) -Greedy action tensor([-0.4439, -0.1012, 0.9243, 1.6131]) tensor([0.0706, 0.0995, 0.2774, 0.5525]) -Greedy action tensor([-1.3071, -0.4949, 0.3322, 0.3770]) tensor([0.0725, 0.1634, 0.3735, 0.3906]) -Greedy action tensor([-1.0317, -0.0938, 0.1673, 0.2890]) tensor([0.0942, 0.2406, 0.3124, 0.3528]) -Greedy action tensor([-1.3153e+00, -1.2031e-03, 4.2950e-01, -1.9033e-01]) tensor([0.0739, 0.2751, 0.4232, 0.2277]) -Greedy action tensor([-1.8567, -0.4595, 0.6875, -0.1035]) tensor([0.0425, 0.1717, 0.5407, 0.2451]) -Greedy action tensor([-1.1526, -0.5448, 0.2530, 0.2955]) tensor([0.0895, 0.1644, 0.3651, 0.3810]) -Greedy action tensor([-1.6841, -0.4724, 0.6924, -0.3656]) tensor([0.0530, 0.1781, 0.5708, 0.1981]) -Greedy action tensor([-1.0507, -0.5687, 0.2138, 0.2949]) tensor([0.1000, 0.1619, 0.3541, 0.3840]) -Greedy action tensor([-1.2884, -0.6885, 0.6340, 0.2173]) tensor([0.0706, 0.1286, 0.4827, 0.3181]) -Greedy action tensor([-2.0252, -0.8842, 0.4152, -0.1589]) tensor([0.0453, 0.1418, 0.5200, 0.2929]) -Greedy action tensor([-1.7612, -0.9322, -0.2506, -0.6230]) tensor([0.0914, 0.2094, 0.4140, 0.2853]) -Greedy action tensor([-1.9481, -0.8481, 0.0351, -0.3725]) tensor([0.0621, 0.1866, 0.4512, 0.3002]) -Greedy action tensor([-1.0817, -0.2027, 0.7517, 1.1217]) tensor([0.0534, 0.1287, 0.3342, 0.4838]) -Greedy action tensor([-2.0415, -0.9440, 0.7425, -0.0106]) tensor([0.0360, 0.1078, 0.5821, 0.2741]) -Greedy action tensor([-1.8902, -0.4551, 0.6384, -0.1552]) tensor([0.0427, 0.1795, 0.5356, 0.2422]) -Greedy action tensor([-0.8707, -0.1970, 0.3300, -0.1497]) tensor([0.1199, 0.2352, 0.3983, 0.2466]) -Greedy action tensor([-1.7301, -0.5039, 0.5631, -0.0036]) tensor([0.0502, 0.1710, 0.4969, 0.2820]) -Greedy action tensor([0.9336, 0.0864, 0.8232, 1.5930]) tensor([0.2349, 0.1007, 0.2103, 0.4542]) -Greedy action tensor([ 0.4930, 0.7565, -0.1930, 1.1568]) tensor([0.2107, 0.2742, 0.1061, 0.4091]) -Greedy action tensor([ 0.7261, 0.2690, 0.5921, -0.1924]) tensor([0.3440, 0.2178, 0.3009, 0.1373]) -Greedy action tensor([ 0.7398, 0.0918, -0.4022, 1.8319]) tensor([0.2073, 0.1085, 0.0662, 0.6180]) -Greedy action tensor([ 0.2479, 0.5062, -0.2742, 0.7836]) tensor([0.2176, 0.2817, 0.1291, 0.3717]) -Greedy action tensor([ 0.7164, -0.4721, -0.2267, 0.8953]) tensor([0.3460, 0.1054, 0.1347, 0.4138]) -Greedy action tensor([ 1.0477, -0.1767, 0.9868, 1.1649]) tensor([0.2977, 0.0875, 0.2801, 0.3347]) -Greedy action tensor([ 0.2533, -0.5762, 0.8522, 0.5528]) tensor([0.2171, 0.0947, 0.3952, 0.2930]) -Greedy action tensor([0.7468, 0.4441, 0.3885, 0.0498]) tensor([0.3406, 0.2517, 0.2381, 0.1697]) -Greedy action tensor([ 6.5519e-01, -5.6581e-01, 1.2916e+00, 9.7662e-04]) tensor([0.2699, 0.0796, 0.5101, 0.1403]) -Greedy action tensor([ 0.9081, 0.1296, -0.4168, 0.4970]) tensor([0.4188, 0.1923, 0.1113, 0.2776]) -Greedy action tensor([1.1064, 0.4349, 0.4071, 0.9399]) tensor([0.3503, 0.1790, 0.1741, 0.2966]) -Greedy action tensor([ 0.8663, -0.5411, 0.4670, 1.1072]) tensor([0.3137, 0.0768, 0.2104, 0.3991]) -Greedy action tensor([ 0.5478, -0.5450, 0.3099, 1.2248]) tensor([0.2444, 0.0819, 0.1927, 0.4810]) -Greedy action tensor([ 1.0332, -0.8757, 0.7262, 1.0559]) tensor([0.3440, 0.0510, 0.2531, 0.3519]) -Greedy action tensor([ 1.8214, -0.8740, -0.0337, 0.9490]) tensor([0.6091, 0.0411, 0.0953, 0.2545]) -Greedy action tensor([ 0.7665, -0.3481, -0.1228, 1.2963]) tensor([0.2909, 0.0954, 0.1195, 0.4941]) -Greedy action tensor([ 1.2208, 1.2412, -0.1479, 1.0911]) tensor([0.3171, 0.3236, 0.0807, 0.2786]) -Greedy action tensor([ 0.7818, -0.6712, 1.1809, 0.5164]) tensor([0.2864, 0.0670, 0.4269, 0.2197]) -Greedy action tensor([-0.0488, 0.4163, 0.0331, 0.4777]) tensor([0.1862, 0.2965, 0.2021, 0.3152]) -Greedy action tensor([0.4026, 0.4173, 1.2796, 0.3295]) tensor([0.1870, 0.1897, 0.4494, 0.1738]) -Greedy action tensor([-0.0929, -0.3396, 0.8690, 0.3838]) tensor([0.1664, 0.1300, 0.4355, 0.2681]) -Greedy action tensor([ 0.4263, -1.8306, 0.5314, 0.2523]) tensor([0.3272, 0.0343, 0.3635, 0.2750]) -Greedy action tensor([-0.0927, 0.7874, 0.2006, 0.6198]) tensor([0.1472, 0.3550, 0.1974, 0.3003]) -Greedy action tensor([ 1.5295, -0.1056, 0.0643, 0.6932]) tensor([0.5378, 0.1048, 0.1243, 0.2331]) -Greedy action tensor([ 0.7955, -0.9448, -0.1699, 0.5187]) tensor([0.4321, 0.0758, 0.1645, 0.3276]) -Greedy action tensor([ 0.6987, -1.0364, 1.4166, 0.6419]) tensor([0.2397, 0.0423, 0.4915, 0.2265]) -Greedy action tensor([-0.5199, 0.7034, 0.7756, -0.8178]) tensor([0.1137, 0.3865, 0.4154, 0.0844]) -Greedy action tensor([-0.0741, -1.9501, 0.0101, 2.0667]) tensor([0.0930, 0.0143, 0.1012, 0.7915]) -Greedy action tensor([ 1.5349, -0.9272, -0.5990, 0.7622]) tensor([0.6005, 0.0512, 0.0711, 0.2773]) -Greedy action tensor([ 0.4624, -1.2441, 0.3692, -0.0473]) tensor([0.3713, 0.0674, 0.3383, 0.2230]) -Greedy action tensor([ 0.4692, -0.1748, -0.5368, 0.7745]) tensor([0.3079, 0.1617, 0.1126, 0.4178]) -Greedy action tensor([1.4346, 0.1191, 1.4952, 0.4687]) tensor([0.3688, 0.0990, 0.3918, 0.1404]) -Greedy action tensor([0.1079, 1.0319, 0.0828, 0.8592]) tensor([0.1512, 0.3809, 0.1474, 0.3205]) -Greedy action tensor([ 1.4754, 0.4372, -0.2475, -0.0750]) tensor([0.5731, 0.2029, 0.1023, 0.1216]) -Greedy action tensor([ 1.4088, -0.6957, 0.7592, 1.3399]) tensor([0.3880, 0.0473, 0.2026, 0.3621]) -Greedy action tensor([ 1.9080, -0.2544, 1.4100, 0.8858]) tensor([0.4802, 0.0552, 0.2918, 0.1728]) -Greedy action tensor([0.4858, 0.2838, 0.1946, 0.4587]) tensor([0.2827, 0.2310, 0.2113, 0.2751]) -Greedy action tensor([ 1.5121, -0.9514, 0.9027, 1.8750]) tensor([0.3261, 0.0278, 0.1773, 0.4688]) -Greedy action tensor([ 0.7464, -0.3346, 2.0337, 0.4250]) tensor([0.1758, 0.0596, 0.6370, 0.1275]) -Greedy action tensor([ 1.1221, 0.1828, -1.1186, 2.1879]) tensor([0.2273, 0.0888, 0.0242, 0.6597]) -Greedy action tensor([ 0.9361, -0.1138, 0.1151, 1.0631]) tensor([0.3418, 0.1196, 0.1504, 0.3881]) -Greedy action tensor([0.9993, 0.3771, 0.2731, 0.0699]) tensor([0.4140, 0.2222, 0.2003, 0.1635]) -Greedy action tensor([ 0.6919, 0.9929, -0.2204, -0.1206]) tensor([0.3128, 0.4227, 0.1256, 0.1388]) -Greedy action tensor([-0.7026, -0.1541, 0.3374, 0.8446]) tensor([0.0975, 0.1687, 0.2758, 0.4580]) -Greedy action tensor([0.9243, 0.3255, 1.3644, 2.1058]) tensor([0.1572, 0.0864, 0.2441, 0.5123]) -Greedy action tensor([ 0.4348, 0.0291, -0.1936, 0.2134]) tensor([0.3332, 0.2221, 0.1777, 0.2670]) -Greedy action tensor([ 1.1810, 0.8447, -0.1030, 0.5451]) tensor([0.3967, 0.2834, 0.1099, 0.2100]) -Greedy action tensor([ 1.3306, -0.4592, 0.4935, 1.1513]) tensor([0.4105, 0.0686, 0.1777, 0.3432]) -Greedy action tensor([ 0.0950, -0.2241, -0.4646, -0.5734]) tensor([0.3558, 0.2586, 0.2033, 0.1823]) -Greedy action tensor([ 1.1592, -0.5371, 1.4565, 0.3449]) tensor([0.3364, 0.0617, 0.4529, 0.1490]) -Greedy action tensor([ 0.7600, -0.2367, -0.9026, 1.0096]) tensor([0.3518, 0.1299, 0.0667, 0.4516]) -Greedy action tensor([ 0.2926, -0.3648, 1.8804, 0.8479]) tensor([0.1226, 0.0636, 0.6001, 0.2137]) -Greedy action tensor([ 0.0283, 0.7838, 0.5163, -0.1227]) tensor([0.1780, 0.3789, 0.2900, 0.1531]) -Greedy action tensor([ 1.3850, -0.4517, 0.3740, 0.6706]) tensor([0.4969, 0.0792, 0.1808, 0.2432]) -Greedy action tensor([ 0.1677, -1.4622, -0.5768, 0.0392]) tensor([0.3921, 0.0768, 0.1862, 0.3448]) -Greedy action tensor([ 0.6240, -0.6846, 0.9560, 0.9439]) tensor([0.2475, 0.0669, 0.3449, 0.3408]) -Greedy action tensor([ 0.6913, -0.8756, 1.0060, 0.4008]) tensor([0.3006, 0.0627, 0.4118, 0.2248]) -Greedy action tensor([0.7794, 0.7297, 0.7221, 1.4873]) tensor([0.2030, 0.1932, 0.1917, 0.4121]) -Greedy action tensor([ 0.5515, 0.5714, -0.7841, 1.2816]) tensor([0.2294, 0.2341, 0.0603, 0.4762]) -Greedy action tensor([1.2090, 0.1647, 0.2952, 0.7867]) tensor([0.4152, 0.1461, 0.1665, 0.2722]) -Greedy action tensor([ 1.0078, -0.4025, 0.1350, 1.4414]) tensor([0.3120, 0.0762, 0.1304, 0.4814]) -Greedy action tensor([ 0.6940, -1.2516, 0.2471, 1.0457]) tensor([0.3121, 0.0446, 0.1996, 0.4437]) -Greedy action tensor([-0.3413, 0.5169, 0.3462, -0.7304]) tensor([0.1660, 0.3915, 0.3301, 0.1125]) -Greedy action tensor([ 0.6615, -0.0931, -0.3503, 0.1724]) tensor([0.4087, 0.1922, 0.1486, 0.2506]) -Greedy action tensor([ 0.0046, 0.8609, 0.4385, -0.1109]) tensor([0.1728, 0.4067, 0.2666, 0.1539]) -Greedy action tensor([ 0.1877, -0.6568, 0.3289, 0.6123]) tensor([0.2433, 0.1046, 0.2802, 0.3720]) -Greedy action tensor([ 0.3494, -2.4018, -0.0480, 0.8764]) tensor([0.2916, 0.0186, 0.1959, 0.4939]) -Greedy action tensor([-0.0125, 0.5630, 1.8444, -0.2208]) tensor([0.1001, 0.1779, 0.6408, 0.0812]) -Greedy action tensor([ 0.4124, 0.1471, -0.5492, 1.2792]) tensor([0.2208, 0.1694, 0.0844, 0.5254]) -Greedy action tensor([ 0.0685, 0.9827, -0.2950, 0.9846]) tensor([0.1495, 0.3729, 0.1039, 0.3737]) -Greedy action tensor([ 0.4319, 0.7780, -0.0424, 0.4630]) tensor([0.2458, 0.3475, 0.1530, 0.2536]) -Greedy action tensor([ 0.1646, -1.4770, -0.2442, 0.6411]) tensor([0.2883, 0.0558, 0.1916, 0.4643]) -Greedy action tensor([ 0.6501, -0.1309, -0.9343, 0.8375]) tensor([0.3485, 0.1596, 0.0715, 0.4204]) -Greedy action tensor([1.8698, 0.0980, 0.8382, 0.9130]) tensor([0.5234, 0.0890, 0.1866, 0.2011]) -Greedy action tensor([-0.2467, -1.6209, 1.0415, 0.6305]) tensor([0.1373, 0.0347, 0.4979, 0.3301]) -Greedy action tensor([-0.3671, 0.4002, 0.5913, 0.8703]) tensor([0.1086, 0.2339, 0.2832, 0.3743]) -Greedy action tensor([ 1.3023, -0.6197, 1.0890, 0.8156]) tensor([0.3893, 0.0570, 0.3145, 0.2393]) -Greedy action tensor([ 1.3435, -0.0587, 0.1048, 1.0322]) tensor([0.4408, 0.1085, 0.1277, 0.3229]) -Greedy action tensor([ 1.2887, -0.6529, 1.0283, 1.4079]) tensor([0.3289, 0.0472, 0.2535, 0.3705]) -Greedy action tensor([ 0.7245, -0.6155, -0.3712, 0.7910]) tensor([0.3753, 0.0983, 0.1254, 0.4010]) -Greedy action tensor([ 0.1253, -0.4911, 0.4054, 0.8030]) tensor([0.2069, 0.1117, 0.2738, 0.4075]) -Greedy action tensor([ 2.0217, -0.5386, -0.5560, 0.3731]) tensor([0.7432, 0.0574, 0.0564, 0.1429]) -Greedy action tensor([ 0.5381, -0.0892, -0.0198, -0.2502]) tensor([0.3905, 0.2085, 0.2235, 0.1775]) -Greedy action tensor([ 0.4162, -0.2980, -0.3419, -0.1086]) tensor([0.3922, 0.1920, 0.1838, 0.2321]) -Greedy action tensor([ 0.8538, -0.3738, -0.4787, 0.6994]) tensor([0.4143, 0.1214, 0.1093, 0.3550]) -Greedy action tensor([ 1.3361, -0.5334, -0.2481, -0.0145]) tensor([0.6179, 0.0953, 0.1267, 0.1601]) -Greedy action tensor([ 0.5962, -0.4322, 0.0305, -0.0684]) tensor([0.4098, 0.1465, 0.2328, 0.2108]) -Greedy action tensor([ 0.6884, -0.1492, -0.4947, -0.2378]) tensor([0.4683, 0.2027, 0.1435, 0.1855]) -Greedy action tensor([ 0.7868, -0.5865, -0.2238, 0.3096]) tensor([0.4469, 0.1132, 0.1627, 0.2773]) -Greedy action tensor([ 0.7667, -0.5398, 0.1176, 0.1016]) tensor([0.4334, 0.1173, 0.2264, 0.2229]) -Greedy action tensor([ 0.8935, -0.3655, -0.3291, 0.5228]) tensor([0.4408, 0.1252, 0.1298, 0.3043]) -Greedy action tensor([ 1.1547, -0.1206, -0.2191, -0.0756]) tensor([0.5480, 0.1531, 0.1387, 0.1601]) -Greedy action tensor([ 0.9389, -0.3784, -0.2337, -0.0011]) tensor([0.5081, 0.1361, 0.1573, 0.1985]) -Greedy action tensor([ 0.9522, -0.2574, -0.4338, 0.1981]) tensor([0.4953, 0.1478, 0.1239, 0.2330]) -Greedy action tensor([ 0.9671, -0.4192, -0.3140, 0.2815]) tensor([0.4922, 0.1231, 0.1367, 0.2480]) -Greedy action tensor([ 1.5786, -1.0009, -0.3132, 0.4676]) tensor([0.6427, 0.0487, 0.0969, 0.2116]) -Greedy action tensor([ 1.4700, -0.7429, -0.1589, 0.2480]) tensor([0.6249, 0.0684, 0.1226, 0.1841]) -Greedy action tensor([ 1.7833, -0.9751, -0.4999, 0.1691]) tensor([0.7329, 0.0465, 0.0747, 0.1459]) -Greedy action tensor([ 1.9846, -0.7672, -0.2621, 0.6031]) tensor([0.7039, 0.0449, 0.0744, 0.1768]) -Greedy action tensor([ 0.9601, -0.4586, -0.2316, 0.2885]) tensor([0.4862, 0.1177, 0.1477, 0.2484]) -Greedy action tensor([ 0.8323, -0.6389, -0.0965, 0.2662]) tensor([0.4561, 0.1048, 0.1802, 0.2590]) -Greedy action tensor([ 0.9137, -0.5313, -0.5351, 0.2996]) tensor([0.4971, 0.1172, 0.1167, 0.2690]) -Greedy action tensor([ 1.6194, -0.9575, -0.1090, 0.1233]) tensor([0.6768, 0.0514, 0.1202, 0.1516]) -Greedy action tensor([ 1.1124, -0.6584, -0.0606, 0.3456]) tensor([0.5144, 0.0875, 0.1592, 0.2389]) -Greedy action tensor([ 0.2362, 0.0771, 0.0925, -0.1480]) tensor([0.2941, 0.2508, 0.2548, 0.2003]) -Greedy action tensor([ 1.4566, -0.7271, -0.3451, 0.1948]) tensor([0.6407, 0.0722, 0.1057, 0.1814]) -Greedy action tensor([ 1.3005, -0.4800, -0.1790, 0.1462]) tensor([0.5842, 0.0985, 0.1331, 0.1842]) -Greedy action tensor([ 1.3220, -0.3331, -0.2142, 0.1581]) tensor([0.5819, 0.1112, 0.1252, 0.1817]) -Greedy action tensor([ 1.3450, -0.5840, -0.4021, 0.6157]) tensor([0.5550, 0.0806, 0.0967, 0.2676]) -Greedy action tensor([ 0.8200, -0.0839, -0.1484, -0.0940]) tensor([0.4575, 0.1853, 0.1737, 0.1834]) -Greedy action tensor([ 1.0452, -0.2837, -0.0762, -0.0604]) tensor([0.5204, 0.1378, 0.1696, 0.1723]) -Greedy action tensor([ 0.7803, -0.4256, -0.3359, 0.4872]) tensor([0.4214, 0.1262, 0.1380, 0.3144]) -Greedy action tensor([ 1.3155, -0.7038, -0.1062, 0.4576]) tensor([0.5561, 0.0738, 0.1342, 0.2358]) -Greedy action tensor([ 0.7751, -0.3703, -0.1278, 0.0944]) tensor([0.4485, 0.1427, 0.1818, 0.2271]) -Greedy action tensor([ 0.9802, -0.3206, -0.2629, -0.0902]) tensor([0.5253, 0.1430, 0.1515, 0.1801]) -Greedy action tensor([ 0.8458, -0.2644, -0.1993, 0.0517]) tensor([0.4688, 0.1545, 0.1649, 0.2119]) -Greedy action tensor([ 0.8164, -0.2986, -0.0937, -0.1798]) tensor([0.4763, 0.1562, 0.1917, 0.1759]) -Greedy action tensor([ 1.2060, -0.3169, -0.3371, -0.3677]) tensor([0.6101, 0.1330, 0.1304, 0.1265]) -Greedy action tensor([ 1.0140, -0.4694, -0.3414, 0.3967]) tensor([0.4941, 0.1121, 0.1274, 0.2665]) -Greedy action tensor([ 0.8167, -0.5397, -0.3684, 0.3857]) tensor([0.4518, 0.1164, 0.1381, 0.2937]) -Greedy action tensor([ 0.9418, -0.5087, -0.0803, 0.1832]) tensor([0.4848, 0.1137, 0.1745, 0.2271]) -Greedy action tensor([ 1.0481, -0.5258, -0.0268, 0.1409]) tensor([0.5122, 0.1062, 0.1748, 0.2068]) -Greedy action tensor([ 0.5365, -0.3235, -0.0968, -0.0281]) tensor([0.3964, 0.1677, 0.2104, 0.2254]) -Greedy action tensor([ 1.6291, -0.8326, -0.5230, 0.1800]) tensor([0.6962, 0.0594, 0.0809, 0.1635]) -Greedy action tensor([ 1.4694, -0.6836, -0.1408, -0.0431]) tensor([0.6509, 0.0756, 0.1301, 0.1434]) -Greedy action tensor([ 1.0069, -0.4569, -0.0483, -0.0146]) tensor([0.5156, 0.1193, 0.1795, 0.1856]) -Greedy action tensor([ 1.3303, -0.3525, -0.5566, -0.0932]) tensor([0.6336, 0.1178, 0.0960, 0.1526]) -Greedy action tensor([ 0.2101, 0.1405, -0.1873, -0.0006]) tensor([0.2928, 0.2732, 0.1968, 0.2372]) -Greedy action tensor([ 1.6606, -0.3840, -0.3326, -0.0700]) tensor([0.6931, 0.0897, 0.0944, 0.1228]) -Greedy action tensor([ 1.1946, 0.0819, -0.0080, -0.0242]) tensor([0.5196, 0.1708, 0.1561, 0.1536]) -Greedy action tensor([ 1.0215, -0.7481, -0.3008, 0.6843]) tensor([0.4650, 0.0792, 0.1239, 0.3319]) -Greedy action tensor([ 0.8068, -0.4208, -0.2803, 0.5085]) tensor([0.4215, 0.1235, 0.1421, 0.3128]) -Greedy action tensor([ 1.1750, -0.3506, -0.0433, 0.1124]) tensor([0.5380, 0.1170, 0.1591, 0.1859]) -Greedy action tensor([ 0.6730, -0.4593, -0.2016, 0.2570]) tensor([0.4168, 0.1344, 0.1738, 0.2750]) -Greedy action tensor([ 0.9652, -0.5053, -0.4751, 0.2535]) tensor([0.5109, 0.1174, 0.1210, 0.2507]) -Greedy action tensor([ 1.0763, -0.7782, -0.4899, 0.9253]) tensor([0.4494, 0.0703, 0.0939, 0.3864]) -Greedy action tensor([ 0.3893, -0.0772, -0.3843, 0.2465]) tensor([0.3383, 0.2122, 0.1561, 0.2933]) -Greedy action tensor([ 1.2741, -0.4593, -0.5910, 0.3001]) tensor([0.5851, 0.1034, 0.0906, 0.2209]) -Greedy action tensor([ 0.6929, -0.5092, -0.4538, 0.6714]) tensor([0.3851, 0.1157, 0.1223, 0.3769]) -Greedy action tensor([ 1.4017, -0.5451, -0.3259, 0.0034]) tensor([0.6380, 0.0911, 0.1134, 0.1576]) -Greedy action tensor([ 1.1883, -0.3584, -0.1905, 0.1098]) tensor([0.5540, 0.1180, 0.1395, 0.1884]) -Greedy action tensor([ 0.8665, -0.3531, -0.0804, 0.0294]) tensor([0.4725, 0.1396, 0.1833, 0.2046]) -Greedy action tensor([ 1.1897, -0.3644, -0.0313, -0.1125]) tensor([0.5624, 0.1189, 0.1659, 0.1529]) -Greedy action tensor([ 0.8198, -0.3594, -0.3034, 0.1010]) tensor([0.4717, 0.1451, 0.1534, 0.2299]) -Greedy action tensor([ 0.7685, -0.4703, -0.6494, -0.1505]) tensor([0.5179, 0.1501, 0.1254, 0.2066]) -Greedy action tensor([ 0.9009, 0.0149, -0.5838, -0.0743]) tensor([0.4960, 0.2045, 0.1124, 0.1871]) -Greedy action tensor([ 1.2861, -0.2824, -0.1577, -0.0437]) tensor([0.5852, 0.1219, 0.1381, 0.1548]) -Greedy action tensor([ 0.5854, -0.5171, 0.0507, 0.0039]) tensor([0.4037, 0.1340, 0.2365, 0.2257]) -Greedy action tensor([ 0.9826, 0.0196, 0.1551, -0.1921]) tensor([0.4700, 0.1794, 0.2054, 0.1452]) -Greedy action tensor([ 0.8152, -0.3401, -0.1382, -0.0934]) tensor([0.4754, 0.1497, 0.1832, 0.1916]) -Greedy action tensor([ 1.1499, -0.3889, -0.2050, -0.0157]) tensor([0.5604, 0.1203, 0.1446, 0.1747]) -Greedy action tensor([ 1.3669, -0.7933, -0.0239, 0.1754]) tensor([0.5995, 0.0691, 0.1492, 0.1821]) -Greedy action tensor([ 1.3652, -0.3735, -0.1458, 0.0407]) tensor([0.6015, 0.1057, 0.1328, 0.1600]) -Greedy action tensor([ 1.0122, -0.1763, -0.2696, -0.2981]) tensor([0.5400, 0.1645, 0.1499, 0.1456]) -Greedy action tensor([ 2.0454, -0.5654, -0.2334, 0.0772]) tensor([0.7601, 0.0559, 0.0778, 0.1062]) -Greedy action tensor([ 0.5040, -0.0319, -0.0151, 0.0794]) tensor([0.3528, 0.2065, 0.2099, 0.2308]) -Greedy action tensor([ 1.0239, -0.0990, -0.3745, -0.3053]) tensor([0.5444, 0.1771, 0.1345, 0.1441]) -Greedy action tensor([ 1.1050, -0.7903, -0.3740, 0.5067]) tensor([0.5187, 0.0779, 0.1182, 0.2852]) -Greedy action tensor([ 1.4852, -0.9076, -0.1714, 0.3524]) tensor([0.6233, 0.0570, 0.1189, 0.2008]) -Greedy action tensor([ 1.7597, -0.2230, -0.4622, 0.1531]) tensor([0.6912, 0.0952, 0.0749, 0.1386]) -Greedy action tensor([ 1.3818, -0.3662, -0.1620, -0.0912]) tensor([0.6185, 0.1077, 0.1321, 0.1418]) -Greedy action tensor([ 1.3632, -0.7178, -0.3114, 0.3326]) tensor([0.5992, 0.0748, 0.1123, 0.2138]) -Greedy action tensor([0.2444, 0.1194, 0.0304, 0.1523]) tensor([0.2776, 0.2450, 0.2242, 0.2532]) -Greedy action tensor([ 0.5308, -0.1415, -0.1302, -0.1583]) tensor([0.3954, 0.2019, 0.2042, 0.1985]) -Greedy action tensor([ 0.8753, -0.4153, 0.0297, -0.3175]) tensor([0.4981, 0.1370, 0.2138, 0.1511]) -Greedy action tensor([ 0.5603, 0.2622, -0.2200, -0.0392]) tensor([0.3637, 0.2700, 0.1667, 0.1997]) -Greedy action tensor([ 0.9799, -0.4820, 0.0716, -0.7603]) tensor([0.5523, 0.1280, 0.2227, 0.0969]) -Greedy action tensor([ 0.4080, -0.0470, 0.0217, -0.2393]) tensor([0.3524, 0.2236, 0.2395, 0.1845]) -Greedy action tensor([ 0.9664, -0.3815, -0.0088, -0.3852]) tensor([0.5275, 0.1370, 0.1989, 0.1365]) -Greedy action tensor([ 1.0253, -0.7008, 0.0210, -0.4796]) tensor([0.5662, 0.1008, 0.2074, 0.1257]) -Greedy action tensor([ 0.7185, -0.4957, -0.0201, -0.5048]) tensor([0.4833, 0.1435, 0.2309, 0.1422]) -Greedy action tensor([ 0.3436, -0.0808, -0.1332, -0.2887]) tensor([0.3563, 0.2331, 0.2212, 0.1893]) -Greedy action tensor([ 0.3688, 0.1874, 0.0987, -0.2696]) tensor([0.3199, 0.2669, 0.2442, 0.1690]) -Greedy action tensor([ 0.6221, -0.3763, -0.1825, -0.3517]) tensor([0.4559, 0.1680, 0.2039, 0.1722]) -Greedy action tensor([ 0.6606, -0.3717, -0.0892, -0.3145]) tensor([0.4533, 0.1615, 0.2142, 0.1710]) -Greedy action tensor([ 0.7046, -0.4721, -0.0811, -0.5400]) tensor([0.4873, 0.1502, 0.2221, 0.1404]) -Greedy action tensor([ 0.3755, 0.2458, -0.0886, -0.2503]) tensor([0.3287, 0.2888, 0.2067, 0.1758]) -Greedy action tensor([ 0.8320, -0.4523, -0.2148, -0.4329]) tensor([0.5235, 0.1449, 0.1838, 0.1478]) -Greedy action tensor([ 0.3118, -0.0386, -0.1494, -0.2512]) tensor([0.3443, 0.2425, 0.2171, 0.1961]) -Greedy action tensor([ 0.5589, -0.3778, 0.0167, -0.4243]) tensor([0.4260, 0.1669, 0.2477, 0.1594]) -Greedy action tensor([ 0.5509, 0.2079, 0.1015, -0.2696]) tensor([0.3587, 0.2545, 0.2289, 0.1579]) -Greedy action tensor([ 0.7451, -0.3998, -0.0029, -0.4519]) tensor([0.4776, 0.1520, 0.2261, 0.1443]) -Greedy action tensor([ 0.4234, -0.4115, 0.2388, -0.4242]) tensor([0.3712, 0.1611, 0.3086, 0.1590]) -Greedy action tensor([ 0.4083, 0.1603, -0.0070, -0.2514]) tensor([0.3381, 0.2639, 0.2232, 0.1748]) -Greedy action tensor([ 0.2677, -0.0119, 0.0906, -0.3152]) tensor([0.3172, 0.2399, 0.2658, 0.1771]) -Greedy action tensor([ 0.4631, 0.1037, 0.0956, -0.1465]) tensor([0.3408, 0.2379, 0.2360, 0.1853]) -Greedy action tensor([ 0.6329, -0.4632, -0.1299, -0.2118]) tensor([0.4484, 0.1498, 0.2091, 0.1927]) -Greedy action tensor([ 0.5268, -0.1077, -0.0673, -0.1993]) tensor([0.3897, 0.2066, 0.2151, 0.1885]) -Greedy action tensor([ 0.2228, 0.0655, 0.1069, -0.2780]) tensor([0.2984, 0.2550, 0.2658, 0.1809]) -Greedy action tensor([ 1.0541, -0.5159, -0.0660, -0.6368]) tensor([0.5819, 0.1211, 0.1898, 0.1073]) -Greedy action tensor([ 0.8885, -0.5659, -0.0715, -0.5181]) tensor([0.5372, 0.1255, 0.2057, 0.1316]) -Greedy action tensor([ 0.9193, -0.2281, 0.1180, -0.5094]) tensor([0.4986, 0.1583, 0.2237, 0.1195]) -Greedy action tensor([ 0.6962, -0.1547, 0.0203, -0.1366]) tensor([0.4218, 0.1801, 0.2146, 0.1834]) -Greedy action tensor([ 0.7308, -0.3929, 0.1745, -0.3343]) tensor([0.4458, 0.1449, 0.2556, 0.1537]) -Greedy action tensor([ 0.7551, -0.4262, 0.0562, -0.6448]) tensor([0.4877, 0.1496, 0.2424, 0.1203]) -Greedy action tensor([ 0.3563, -0.1666, -0.1146, -0.2637]) tensor([0.3630, 0.2152, 0.2266, 0.1952]) -Greedy action tensor([ 0.6584, -0.6454, 0.0241, -0.4414]) tensor([0.4684, 0.1272, 0.2484, 0.1560]) -Greedy action tensor([ 0.6807, -0.1091, -0.0476, -0.3618]) tensor([0.4368, 0.1983, 0.2109, 0.1540]) -Greedy action tensor([ 0.4406, -0.1086, -0.1814, -0.0474]) tensor([0.3666, 0.2117, 0.1968, 0.2250]) -Greedy action tensor([ 0.7592, -0.7732, -0.0835, -0.4143]) tensor([0.5113, 0.1104, 0.2201, 0.1581]) -Greedy action tensor([ 0.6202, -0.4408, -0.2153, -0.3899]) tensor([0.4664, 0.1614, 0.2023, 0.1699]) -Greedy action tensor([ 0.6579, -0.2680, 0.0917, -0.5041]) tensor([0.4392, 0.1740, 0.2493, 0.1374]) -Greedy action tensor([ 0.7840, -0.4530, -0.0384, -0.5368]) tensor([0.5009, 0.1454, 0.2201, 0.1337]) -Greedy action tensor([ 0.3448, 0.1799, -0.0009, -0.0493]) tensor([0.3096, 0.2625, 0.2191, 0.2088]) -Greedy action tensor([ 0.4847, -0.2225, -0.1414, -0.2996]) tensor([0.4026, 0.1985, 0.2152, 0.1837]) -Greedy action tensor([ 0.4384, -0.2086, 0.1135, -0.5583]) tensor([0.3824, 0.2002, 0.2763, 0.1411]) -Greedy action tensor([ 0.3174, 0.2781, -0.2105, -0.3529]) tensor([0.3265, 0.3139, 0.1926, 0.1670]) -Greedy action tensor([ 0.8268, -0.2332, 0.3630, -0.3172]) tensor([0.4359, 0.1510, 0.2742, 0.1389]) -Greedy action tensor([ 0.7210, -0.4157, -0.0851, -0.2441]) tensor([0.4655, 0.1493, 0.2079, 0.1773]) -Greedy action tensor([ 0.4746, -0.2048, 0.1282, -0.6514]) tensor([0.3939, 0.1997, 0.2786, 0.1278]) -Greedy action tensor([ 0.9950, -0.8581, -0.0601, -0.5834]) tensor([0.5844, 0.0916, 0.2035, 0.1206]) -Greedy action tensor([ 0.4346, -0.1073, -0.0232, -0.2199]) tensor([0.3658, 0.2127, 0.2314, 0.1901]) -Greedy action tensor([ 0.7744, -0.3408, -0.0246, -0.3643]) tensor([0.4767, 0.1563, 0.2144, 0.1526]) -Greedy action tensor([ 0.5132, -0.1953, -0.0072, -0.5274]) tensor([0.4099, 0.2018, 0.2436, 0.1448]) -Greedy action tensor([ 1.1012, -0.7202, -0.0639, -0.9996]) tensor([0.6266, 0.1014, 0.1954, 0.0767]) -Greedy action tensor([ 0.5316, 0.0665, -0.0315, -0.1084]) tensor([0.3670, 0.2305, 0.2090, 0.1935]) -Greedy action tensor([ 0.3295, 0.1341, -0.0464, -0.1230]) tensor([0.3179, 0.2615, 0.2183, 0.2022]) -Greedy action tensor([ 0.5302, 0.0808, -0.0934, -0.1698]) tensor([0.3744, 0.2389, 0.2007, 0.1859]) -Greedy action tensor([ 0.4382, 0.1728, 0.0496, -0.2128]) tensor([0.3371, 0.2585, 0.2286, 0.1758]) -Greedy action tensor([ 0.4398, 0.0556, -0.1460, -0.1205]) tensor([0.3560, 0.2425, 0.1982, 0.2033]) -Greedy action tensor([ 0.5127, 0.4859, -0.2531, -0.3825]) tensor([0.3513, 0.3419, 0.1633, 0.1435]) -Greedy action tensor([ 0.5860, 0.2335, -0.1115, -0.3989]) tensor([0.3885, 0.2730, 0.1934, 0.1451]) -Greedy action tensor([ 0.8233, -0.5593, -0.1768, -0.6051]) tensor([0.5381, 0.1350, 0.1979, 0.1290]) -Greedy action tensor([ 0.9161, -0.6128, 0.0451, -0.4840]) tensor([0.5314, 0.1152, 0.2224, 0.1310]) -Greedy action tensor([ 0.5938, 0.0614, -0.1415, -0.2777]) tensor([0.4024, 0.2363, 0.1929, 0.1683]) -Greedy action tensor([ 0.3176, 0.2275, -0.0823, -0.3313]) tensor([0.3219, 0.2941, 0.2158, 0.1682]) -Greedy action tensor([ 0.8049, -0.4660, 0.0160, -0.5979]) tensor([0.5048, 0.1417, 0.2294, 0.1241]) -Greedy action tensor([ 0.6446, -0.2799, -0.0256, -0.2369]) tensor([0.4306, 0.1708, 0.2203, 0.1783]) -Greedy action tensor([ 0.7999, -0.2911, 0.1354, -0.4641]) tensor([0.4688, 0.1575, 0.2412, 0.1325]) -Greedy action tensor([ 0.7221, -0.4452, 0.1184, -0.5040]) tensor([0.4648, 0.1447, 0.2541, 0.1364]) -Greedy action tensor([ 0.5539, -0.0678, -0.1058, -0.3103]) tensor([0.4040, 0.2170, 0.2088, 0.1702]) -Greedy action tensor([ 0.3631, -0.0312, 0.1133, -0.2065]) tensor([0.3312, 0.2233, 0.2580, 0.1874]) -Greedy action tensor([ 0.9931, -0.4757, -0.0637, -0.3543]) tensor([0.5442, 0.1253, 0.1891, 0.1414]) -Greedy action tensor([ 1.0170, -0.1733, 0.0621, -0.7508]) tensor([0.5377, 0.1635, 0.2069, 0.0918]) -Greedy action tensor([ 0.7345, -0.4611, -0.1501, -0.3388]) tensor([0.4861, 0.1471, 0.2007, 0.1662]) -Greedy action tensor([ 0.9492, -0.5690, 0.0944, -0.5722]) tensor([0.5368, 0.1176, 0.2283, 0.1172]) -Greedy action tensor([ 0.8278, -0.6464, -0.0634, -0.4447]) tensor([0.5210, 0.1193, 0.2137, 0.1460]) -Greedy action tensor([ 0.3217, -0.1471, 0.0763, -0.3226]) tensor([0.3409, 0.2133, 0.2667, 0.1790]) -Greedy action tensor([ 0.3638, 0.2941, 0.0974, -0.1527]) tensor([0.3035, 0.2830, 0.2325, 0.1810]) -Greedy action tensor([ 0.7087, -0.5181, 0.1802, -0.4384]) tensor([0.4545, 0.1333, 0.2679, 0.1443]) -Greedy action tensor([ 0.7489, -0.3172, -0.0641, -0.1663]) tensor([0.4570, 0.1574, 0.2027, 0.1830]) -Greedy action tensor([ 0.6230, -0.0931, -0.1482, -0.2622]) tensor([0.4231, 0.2067, 0.1957, 0.1746]) -Greedy action tensor([ 0.7812, -0.5470, -0.2078, -0.5014]) tensor([0.5224, 0.1384, 0.1943, 0.1449]) -Greedy action tensor([-1.0410, -0.5251, 0.2671, 0.2956]) tensor([0.0982, 0.1645, 0.3634, 0.3739]) -Greedy action tensor([-1.5100, -0.4660, 0.4338, 0.0682]) tensor([0.0638, 0.1813, 0.4457, 0.3092]) -Greedy action tensor([-1.3760, -0.3388, 0.4632, 0.3802]) tensor([0.0629, 0.1774, 0.3956, 0.3641]) -Greedy action tensor([-1.2660, -0.5140, 0.7606, 0.9113]) tensor([0.0512, 0.1086, 0.3885, 0.4517]) -Greedy action tensor([-1.7483, -0.2314, 0.5131, -0.0477]) tensor([0.0485, 0.2209, 0.4651, 0.2655]) -Greedy action tensor([-1.9854, -0.8300, 0.7537, -0.0191]) tensor([0.0373, 0.1185, 0.5775, 0.2667]) -Greedy action tensor([-1.4566, -0.4537, 0.5777, -0.2903]) tensor([0.0686, 0.1869, 0.5244, 0.2201]) -Greedy action tensor([-1.6725, -0.8185, -0.0229, -0.5115]) tensor([0.0851, 0.2000, 0.4431, 0.2718]) -Greedy action tensor([-1.1570, -0.3286, 0.8437, 1.2250]) tensor([0.0465, 0.1064, 0.3438, 0.5033]) -Greedy action tensor([-1.4570, -0.5506, 0.4290, 0.2754]) tensor([0.0636, 0.1574, 0.4193, 0.3596]) -Greedy action tensor([-0.8479, 0.6889, -0.3762, -0.0946]) tensor([0.1067, 0.4959, 0.1709, 0.2265]) -Greedy action tensor([-1.6442, -0.5097, 0.5225, 0.0439]) tensor([0.0548, 0.1704, 0.4784, 0.2964]) -Greedy action tensor([-0.6552, -0.5903, 1.1063, 1.6102]) tensor([0.0571, 0.0609, 0.3322, 0.5498]) -Greedy action tensor([-1.5092, -0.5082, 0.4166, 0.1596]) tensor([0.0629, 0.1713, 0.4318, 0.3340]) -Greedy action tensor([-1.6981, -0.5125, 0.6870, 0.2800]) tensor([0.0447, 0.1463, 0.4857, 0.3233]) -Greedy action tensor([-1.4574, -0.5329, 0.2433, -0.0345]) tensor([0.0761, 0.1917, 0.4166, 0.3156]) -Greedy action tensor([-0.3992, 0.0530, 0.7880, 1.6444]) tensor([0.0737, 0.1158, 0.2416, 0.5689]) -Greedy action tensor([-1.4543, -0.4247, 1.0883, 0.9351]) tensor([0.0365, 0.1021, 0.4636, 0.3978]) -Greedy action tensor([-1.4242, -0.4448, 0.6844, 0.7963]) tensor([0.0474, 0.1261, 0.3901, 0.4364]) -Greedy action tensor([-1.6815, -0.2615, 0.5267, 0.0758]) tensor([0.0499, 0.2065, 0.4542, 0.2893]) -Greedy action tensor([-1.5983, -0.2435, 0.4311, 0.0431]) tensor([0.0567, 0.2196, 0.4312, 0.2925]) -Greedy action tensor([-0.8427, 0.6653, 0.2490, 1.0021]) tensor([0.0675, 0.3048, 0.2010, 0.4268]) -Greedy action tensor([-1.0255, -0.2964, 0.4857, 0.8338]) tensor([0.0713, 0.1478, 0.3232, 0.4577]) -Greedy action tensor([-0.3403, 0.1934, 0.7025, 1.6750]) tensor([0.0767, 0.1307, 0.2175, 0.5751]) -Greedy action tensor([-1.1366, -0.5408, 0.3652, -0.0063]) tensor([0.0961, 0.1745, 0.4317, 0.2977]) -Greedy action tensor([-0.9752, -0.6936, 0.3108, 0.3486]) tensor([0.1031, 0.1366, 0.3730, 0.3873]) -Greedy action tensor([-1.2075, -0.5915, 0.2745, 0.2825]) tensor([0.0855, 0.1584, 0.3765, 0.3796]) -Greedy action tensor([-0.5842, -0.2976, 0.9914, 1.6373]) tensor([0.0610, 0.0813, 0.2950, 0.5627]) -Greedy action tensor([-0.8872, -0.3104, 0.8617, 1.3959]) tensor([0.0545, 0.0971, 0.3135, 0.5348]) -Greedy action tensor([-0.7377, -0.5028, 1.2091, 1.5951]) tensor([0.0511, 0.0646, 0.3578, 0.5265]) -Greedy action tensor([-1.3021, -0.5677, 0.3238, 0.3096]) tensor([0.0759, 0.1581, 0.3857, 0.3803]) -Greedy action tensor([-0.4278, -0.4544, 0.1956, 0.2342]) tensor([0.1731, 0.1685, 0.3228, 0.3355]) -Greedy action tensor([-1.8149, -0.4679, 0.5932, -0.1117]) tensor([0.0466, 0.1793, 0.5181, 0.2560]) -Greedy action tensor([-1.8550, -0.4742, 0.6427, -0.0945]) tensor([0.0436, 0.1734, 0.5296, 0.2534]) -Greedy action tensor([-1.0550, -0.5155, 0.5884, -0.3950]) tensor([0.1018, 0.1746, 0.5266, 0.1970]) -Greedy action tensor([-1.3820, -0.4277, 0.4400, 0.0785]) tensor([0.0710, 0.1843, 0.4389, 0.3058]) -Greedy action tensor([-1.4304, -0.0856, 0.4706, 0.4172]) tensor([0.0559, 0.2147, 0.3744, 0.3549]) -Greedy action tensor([-1.4544, -0.5752, 0.4126, 0.1051]) tensor([0.0683, 0.1646, 0.4420, 0.3250]) -Greedy action tensor([-0.8587, -0.2976, 0.7995, 1.3895]) tensor([0.0572, 0.1003, 0.3005, 0.5420]) -Greedy action tensor([-1.9698, -0.5815, 1.3793, 0.6007]) tensor([0.0215, 0.0861, 0.6116, 0.2808]) -Greedy action tensor([-0.8986, -0.6366, 0.2285, 0.2062]) tensor([0.1190, 0.1546, 0.3673, 0.3591]) -Greedy action tensor([-1.6433, -0.5167, 0.5805, 0.0958]) tensor([0.0526, 0.1622, 0.4859, 0.2993]) -Greedy action tensor([-0.8820, -0.4523, 1.1768, 1.4811]) tensor([0.0476, 0.0732, 0.3732, 0.5060]) -Greedy action tensor([-0.5217, -0.5037, 0.1884, 0.2327]) tensor([0.1618, 0.1648, 0.3292, 0.3441]) -Greedy action tensor([-1.5955, -0.6931, 0.3425, -0.4541]) tensor([0.0739, 0.1821, 0.5128, 0.2312]) -Greedy action tensor([-1.9281, -0.6448, 0.5671, 0.0725]) tensor([0.0415, 0.1496, 0.5025, 0.3065]) -Greedy action tensor([-1.7889, -0.5103, 0.6286, -0.0438]) tensor([0.0464, 0.1668, 0.5209, 0.2659]) -Greedy action tensor([-1.3737, -0.6319, 0.4938, -0.0118]) tensor([0.0742, 0.1558, 0.4803, 0.2897]) -Greedy action tensor([-1.8720, -0.4619, 0.6451, -0.1222]) tensor([0.0430, 0.1762, 0.5332, 0.2475]) -Greedy action tensor([-1.7706, -0.4604, 0.5737, -0.0764]) tensor([0.0486, 0.1802, 0.5067, 0.2645]) -Greedy action tensor([-1.7624, -0.1941, 0.5515, -0.0584]) tensor([0.0467, 0.2241, 0.4724, 0.2567]) -Greedy action tensor([-1.8222, -0.3765, 0.5997, -0.0856]) tensor([0.0451, 0.1913, 0.5078, 0.2559]) -Greedy action tensor([-0.9717, -0.5628, 0.4122, -0.2285]) tensor([0.1163, 0.1750, 0.4641, 0.2445]) -Greedy action tensor([-0.9236, -0.6203, 0.2231, 0.1163]) tensor([0.1200, 0.1626, 0.3778, 0.3396]) -Greedy action tensor([-1.7100, -0.5803, 0.5064, -0.2009]) tensor([0.0562, 0.1739, 0.5157, 0.2542]) -Greedy action tensor([-1.7451, -0.4941, 0.5719, -0.0066]) tensor([0.0492, 0.1719, 0.4991, 0.2798]) -Greedy action tensor([-1.0735, -0.2875, 0.9524, 1.2691]) tensor([0.0472, 0.1036, 0.3579, 0.4913]) -Greedy action tensor([-0.2611, 0.3375, 0.7023, 1.6139]) tensor([0.0836, 0.1521, 0.2191, 0.5452]) -Greedy action tensor([-1.8873, -0.4123, 0.6307, -0.1444]) tensor([0.0426, 0.1861, 0.5281, 0.2433]) -Greedy action tensor([-1.4534, -0.4397, 0.5924, 0.4591]) tensor([0.0548, 0.1509, 0.4236, 0.3708]) -Greedy action tensor([-1.2183, -0.5430, 0.2860, 0.2552]) tensor([0.0845, 0.1661, 0.3805, 0.3689]) -Greedy action tensor([-1.7559, -0.8340, -0.0144, -0.4573]) tensor([0.0776, 0.1951, 0.4429, 0.2844]) -Greedy action tensor([-1.9293, -0.8611, 0.2442, -0.2263]) tensor([0.0550, 0.1600, 0.4832, 0.3018]) -Greedy action tensor([-1.2333, -0.5432, 0.3496, 0.5030]) tensor([0.0739, 0.1473, 0.3596, 0.4193]) -Greedy action tensor([-1.8459, -1.0007, 0.0050, -0.5322]) tensor([0.0746, 0.1736, 0.4746, 0.2773]) -Greedy action tensor([-1.8857, -0.8854, 0.2080, -0.2985]) tensor([0.0598, 0.1626, 0.4852, 0.2924]) -Greedy action tensor([-1.6330, -0.8811, -0.3638, -0.6730]) tensor([0.1076, 0.2283, 0.3830, 0.2811]) -Greedy action tensor([-1.4498, -0.4584, 0.5228, 0.4320]) tensor([0.0573, 0.1544, 0.4120, 0.3763]) -Greedy action tensor([-1.2516, -0.5733, 0.4293, 0.6973]) tensor([0.0651, 0.1283, 0.3496, 0.4570]) -Greedy action tensor([-1.3031, -0.4682, 0.7772, -0.6495]) tensor([0.0756, 0.1741, 0.6050, 0.1453]) -Greedy action tensor([-1.3927, -0.5519, 0.5710, 0.5819]) tensor([0.0567, 0.1314, 0.4038, 0.4082]) -Greedy action tensor([-2.0489, -0.7523, 1.2560, 0.4924]) tensor([0.0224, 0.0820, 0.6109, 0.2847]) -Greedy action tensor([-1.2783, 0.2614, -0.9206, -1.2492]) tensor([0.1231, 0.5741, 0.1761, 0.1268]) -Greedy action tensor([-1.0832, 0.4615, 0.6409, 1.0508]) tensor([0.0507, 0.2374, 0.2840, 0.4279]) -Greedy action tensor([-1.6953, -0.5038, 0.5276, -0.0261]) tensor([0.0531, 0.1748, 0.4903, 0.2818]) -Greedy action tensor([-0.5062, -0.3303, 0.2085, 0.1854]) tensor([0.1604, 0.1913, 0.3279, 0.3204]) -Greedy action tensor([-2.0079e+00, -9.0041e-01, 5.3913e-01, -7.6783e-04]) tensor([0.0413, 0.1249, 0.5268, 0.3070]) -Greedy action tensor([-1.6218, -0.5532, 0.5225, 0.0374]) tensor([0.0565, 0.1645, 0.4822, 0.2969]) -Greedy action tensor([-0.7679, -0.3556, 1.1406, 1.5074]) tensor([0.0527, 0.0796, 0.3552, 0.5126]) -Greedy action tensor([-1.8579, -0.6014, 0.3731, -0.2427]) tensor([0.0530, 0.1864, 0.4938, 0.2668]) -Greedy action tensor([-1.9069, -0.6685, 0.8095, -0.0751]) tensor([0.0387, 0.1336, 0.5858, 0.2419]) -Greedy action tensor([ 0.2811, 0.6683, -0.4509, 1.6463]) tensor([0.1456, 0.2144, 0.0700, 0.5701]) -Greedy action tensor([ 0.8703, -1.7503, 1.3893, -0.1691]) tensor([0.3219, 0.0234, 0.5409, 0.1138]) -Greedy action tensor([ 1.4681, -0.8335, 1.3056, 0.9854]) tensor([0.3895, 0.0390, 0.3311, 0.2404]) -Greedy action tensor([ 1.9114, -0.6827, 1.0012, 1.4846]) tensor([0.4696, 0.0351, 0.1890, 0.3064]) -Greedy action tensor([ 0.0255, -0.7316, 0.7139, 0.4068]) tensor([0.2031, 0.0953, 0.4043, 0.2974]) -Greedy action tensor([ 0.6973, 0.6174, -0.1722, 0.2650]) tensor([0.3343, 0.3086, 0.1401, 0.2170]) -Greedy action tensor([ 0.9888, -0.7124, 1.4627, 0.1042]) tensor([0.3123, 0.0570, 0.5017, 0.1290]) -Greedy action tensor([ 0.2377, -0.8133, 0.1415, 0.9048]) tensor([0.2377, 0.0831, 0.2159, 0.4632]) -Greedy action tensor([ 2.1063, -1.4999, 0.2458, 0.9358]) tensor([0.6698, 0.0182, 0.1042, 0.2078]) -Greedy action tensor([ 0.7467, 0.4240, -0.5690, 0.1457]) tensor([0.3936, 0.2850, 0.1056, 0.2158]) -Greedy action tensor([ 0.5603, -0.3427, 0.1552, 1.1244]) tensor([0.2611, 0.1058, 0.1741, 0.4590]) -Greedy action tensor([1.2248, 0.6850, 0.4100, 1.5614]) tensor([0.2919, 0.1701, 0.1292, 0.4087]) -Greedy action tensor([-1.0579, -0.1909, 1.1498, 0.1537]) tensor([0.0632, 0.1503, 0.5744, 0.2121]) -Greedy action tensor([ 0.7379, 0.3917, -0.0359, 1.7580]) tensor([0.2023, 0.1431, 0.0933, 0.5612]) -Greedy action tensor([ 1.3200, -0.4801, 1.7021, 0.6359]) tensor([0.3190, 0.0527, 0.4674, 0.1609]) -Greedy action tensor([ 0.7669, -1.3483, -0.7785, 0.2961]) tensor([0.5106, 0.0616, 0.1089, 0.3189]) -Greedy action tensor([ 0.6867, 0.0181, 1.5946, -1.1130]) tensor([0.2406, 0.1233, 0.5964, 0.0398]) -Greedy action tensor([ 1.7418, -0.5752, 0.9039, 0.2985]) tensor([0.5658, 0.0558, 0.2448, 0.1336]) -Greedy action tensor([0.8833, 0.1924, 1.3032, 0.7590]) tensor([0.2560, 0.1283, 0.3896, 0.2261]) -Greedy action tensor([ 0.8857, 0.5750, 1.4526, -0.5036]) tensor([0.2670, 0.1957, 0.4707, 0.0666]) -Greedy action tensor([ 1.1748, -0.6550, -0.4318, 1.3065]) tensor([0.3997, 0.0641, 0.0802, 0.4560]) -Greedy action tensor([ 0.7920, -0.1537, -0.1228, 1.1820]) tensor([0.3062, 0.1189, 0.1227, 0.4522]) -Greedy action tensor([ 0.4620, -2.7253, 0.0607, 0.9077]) tensor([0.3056, 0.0126, 0.2046, 0.4772]) -Greedy action tensor([ 1.2127, 1.2982, -0.3750, -0.0389]) tensor([0.3876, 0.4222, 0.0792, 0.1109]) -Greedy action tensor([0.1770, 0.7519, 0.3649, 0.0512]) tensor([0.2055, 0.3652, 0.2480, 0.1812]) -Greedy action tensor([ 0.5605, -1.4301, 0.6196, 0.3082]) tensor([0.3362, 0.0459, 0.3567, 0.2612]) -Greedy action tensor([ 1.2541, -1.0249, 1.3378, 0.8276]) tensor([0.3518, 0.0360, 0.3825, 0.2296]) -Greedy action tensor([ 0.1528, 0.0078, -0.5731, 1.3328]) tensor([0.1785, 0.1544, 0.0864, 0.5808]) -Greedy action tensor([ 0.0701, -0.4331, 0.4134, 0.6385]) tensor([0.2092, 0.1265, 0.2949, 0.3694]) -Greedy action tensor([ 0.2938, 0.4976, 1.6306, -0.5687]) tensor([0.1549, 0.1899, 0.5898, 0.0654]) -Greedy action tensor([ 0.8035, -0.1260, -0.0980, 1.6468]) tensor([0.2424, 0.0957, 0.0984, 0.5634]) -Greedy action tensor([ 0.3264, -0.0453, 0.6074, 0.9208]) tensor([0.2072, 0.1429, 0.2744, 0.3755]) -Greedy action tensor([ 0.4694, -2.0130, 0.6974, 0.2159]) tensor([0.3210, 0.0268, 0.4031, 0.2491]) -Greedy action tensor([-0.3989, 1.2142, 1.0195, -0.5776]) tensor([0.0910, 0.4568, 0.3760, 0.0761]) -Greedy action tensor([ 1.3605, -1.0662, 2.0181, 0.1627]) tensor([0.3012, 0.0266, 0.5813, 0.0909]) -Greedy action tensor([ 1.5974, -0.0517, -0.0825, 2.1638]) tensor([0.3184, 0.0612, 0.0593, 0.5610]) -Greedy action tensor([ 1.3707, -0.3223, 1.5595, 0.8065]) tensor([0.3378, 0.0621, 0.4080, 0.1921]) -Greedy action tensor([-0.1763, 0.2644, 0.2547, 0.5067]) tensor([0.1647, 0.2559, 0.2534, 0.3260]) -Greedy action tensor([ 0.9511, 0.5121, -0.5477, 0.5819]) tensor([0.3907, 0.2519, 0.0873, 0.2701]) -Greedy action tensor([ 1.0545, -1.1301, 0.6021, 0.6761]) tensor([0.4109, 0.0462, 0.2614, 0.2815]) -Greedy action tensor([ 1.2087, -0.6884, 1.0650, 0.5831]) tensor([0.3920, 0.0588, 0.3395, 0.2097]) -Greedy action tensor([ 0.0042, -1.7977, 0.2800, -0.4978]) tensor([0.3238, 0.0534, 0.4267, 0.1960]) -Greedy action tensor([ 0.8941, -2.3714, 0.1989, 0.2389]) tensor([0.4863, 0.0186, 0.2426, 0.2525]) -Greedy action tensor([ 1.3862, 0.7664, -0.4728, 0.1082]) tensor([0.5070, 0.2728, 0.0790, 0.1412]) -Greedy action tensor([-0.2664, -1.1933, 0.4946, 0.3357]) tensor([0.1865, 0.0738, 0.3992, 0.3405]) -Greedy action tensor([ 1.0282, -0.3432, 0.0712, 1.6128]) tensor([0.2914, 0.0739, 0.1119, 0.5228]) -Greedy action tensor([ 0.2518, 0.2985, -0.5571, 0.5821]) tensor([0.2574, 0.2697, 0.1146, 0.3582]) -Greedy action tensor([-0.1775, -0.6855, 1.0912, 0.5174]) tensor([0.1396, 0.0840, 0.4966, 0.2798]) -Greedy action tensor([ 0.9675, -0.6541, -0.3431, 1.6290]) tensor([0.2937, 0.0580, 0.0792, 0.5691]) -Greedy action tensor([1.0457, 0.0884, 0.8112, 0.9443]) tensor([0.3248, 0.1247, 0.2569, 0.2935]) -Greedy action tensor([-0.8885, -1.8498, 0.5504, -0.1410]) tensor([0.1297, 0.0496, 0.5468, 0.2739]) -Greedy action tensor([ 0.3105, -0.0470, -0.3355, 0.7980]) tensor([0.2596, 0.1816, 0.1361, 0.4227]) -Greedy action tensor([ 0.8686, -0.9145, 0.6634, 1.0948]) tensor([0.3090, 0.0519, 0.2517, 0.3874]) -Greedy action tensor([ 0.4230, -0.0808, -0.4502, 1.3048]) tensor([0.2254, 0.1362, 0.0941, 0.5443]) -Greedy action tensor([ 1.0225, 0.1190, -0.4670, 0.8983]) tensor([0.3978, 0.1612, 0.0897, 0.3513]) -Greedy action tensor([-0.3442, -1.2227, 0.9253, -0.0494]) tensor([0.1583, 0.0658, 0.5634, 0.2126]) -Greedy action tensor([-0.8049, 0.8062, 0.9931, -0.1729]) tensor([0.0718, 0.3596, 0.4335, 0.1351]) -Greedy action tensor([ 1.3218, -0.2624, -0.7530, 2.0230]) tensor([0.2988, 0.0613, 0.0375, 0.6024]) -Greedy action tensor([ 0.3587, -0.0089, 0.0465, 0.1912]) tensor([0.3058, 0.2117, 0.2238, 0.2586]) -Greedy action tensor([ 1.0037, -0.0229, 0.4266, -0.2378]) tensor([0.4528, 0.1622, 0.2542, 0.1308]) -Greedy action tensor([ 1.2869, -0.6254, 1.2825, 0.3234]) tensor([0.3960, 0.0585, 0.3943, 0.1511]) -Greedy action tensor([ 1.1241, -0.3147, 0.6720, 1.1439]) tensor([0.3456, 0.0820, 0.2199, 0.3525]) -Greedy action tensor([1.9311, 0.7361, 1.1212, 0.8255]) tensor([0.4811, 0.1456, 0.2140, 0.1592]) -Greedy action tensor([ 0.7469, -2.0291, 0.1346, 1.4962]) tensor([0.2688, 0.0167, 0.1457, 0.5687]) -Greedy action tensor([ 0.0023, -1.9343, -0.6986, 0.4415]) tensor([0.3133, 0.0452, 0.1554, 0.4861]) -Greedy action tensor([ 0.8043, -0.9408, 1.4884, 0.9684]) tensor([0.2307, 0.0403, 0.4572, 0.2718]) -Greedy action tensor([ 1.3162, -1.3878, 0.4743, 0.1145]) tensor([0.5560, 0.0372, 0.2396, 0.1672]) -Greedy action tensor([ 0.1068, 0.2927, -0.1390, 0.0905]) tensor([0.2519, 0.3033, 0.1970, 0.2478]) -Greedy action tensor([ 1.9677, -0.0228, -0.5333, 1.1513]) tensor([0.6022, 0.0823, 0.0494, 0.2662]) -Greedy action tensor([ 1.0268, -0.0479, 0.2176, 0.7166]) tensor([0.3968, 0.1355, 0.1767, 0.2910]) -Greedy action tensor([-0.1245, 0.5565, 0.3543, -0.3511]) tensor([0.1856, 0.3668, 0.2996, 0.1480]) -Greedy action tensor([ 0.8560, 0.5771, -0.3829, 0.3981]) tensor([0.3733, 0.2824, 0.1081, 0.2361]) -Greedy action tensor([-0.8740, 0.6187, 0.9660, -0.7427]) tensor([0.0776, 0.3453, 0.4886, 0.0885]) -Greedy action tensor([ 0.9208, -0.7966, 1.4854, 0.9665]) tensor([0.2509, 0.0451, 0.4413, 0.2627]) -Greedy action tensor([1.1541, 0.3072, 0.0430, 0.0813]) tensor([0.4762, 0.2042, 0.1568, 0.1629]) -Greedy action tensor([-0.3364, 0.2088, 0.1407, -0.0097]) tensor([0.1747, 0.3014, 0.2816, 0.2423]) -Greedy action tensor([ 0.6402, 0.4904, 0.0647, -0.2787]) tensor([0.3543, 0.3050, 0.1993, 0.1414]) -Greedy action tensor([-0.3915, -1.3927, -0.4128, 0.7551]) tensor([0.1820, 0.0669, 0.1782, 0.5729]) -Greedy action tensor([ 0.6889, -0.0525, -0.2875, -0.1006]) tensor([0.4334, 0.2065, 0.1633, 0.1968]) -Greedy action tensor([ 0.9528, -0.2828, 0.0056, 0.1819]) tensor([0.4671, 0.1358, 0.1811, 0.2161]) -Greedy action tensor([0.7073, 0.0055, 1.0641, 0.3059]) tensor([0.2782, 0.1379, 0.3976, 0.1863]) -Greedy action tensor([0.8599, 0.8831, 1.0561, 0.1181]) tensor([0.2691, 0.2754, 0.3274, 0.1281]) -Greedy action tensor([ 1.0672, -0.5258, -0.3940, 0.4986]) tensor([0.4996, 0.1016, 0.1159, 0.2829]) -Greedy action tensor([ 1.2487, -0.5617, -0.3014, 0.3278]) tensor([0.5637, 0.0922, 0.1196, 0.2244]) -Greedy action tensor([ 1.3178, -0.8562, -0.4808, 0.8168]) tensor([0.5305, 0.0603, 0.0878, 0.3214]) -Greedy action tensor([ 1.2546, -0.1375, -0.2956, 0.0505]) tensor([0.5679, 0.1412, 0.1205, 0.1704]) -Greedy action tensor([ 1.7199, -0.5358, -0.3431, -0.0302]) tensor([0.7114, 0.0746, 0.0904, 0.1236]) -Greedy action tensor([ 1.5440, -0.8622, 0.0135, 0.3560]) tensor([0.6206, 0.0559, 0.1343, 0.1892]) -Greedy action tensor([ 1.1226, -0.5025, -0.2293, 0.6184]) tensor([0.4855, 0.0956, 0.1256, 0.2932]) -Greedy action tensor([ 0.9974, -0.4138, -0.2900, 0.2978]) tensor([0.4959, 0.1209, 0.1369, 0.2463]) -Greedy action tensor([ 1.4163, -0.7382, -0.4814, 0.1810]) tensor([0.6424, 0.0745, 0.0963, 0.1868]) -Greedy action tensor([ 0.8057, -0.1931, -0.4578, 0.4424]) tensor([0.4262, 0.1570, 0.1205, 0.2964]) -Greedy action tensor([ 0.6311, -0.4775, -1.0308, 0.5916]) tensor([0.4031, 0.1330, 0.0765, 0.3874]) -Greedy action tensor([ 1.9463, -1.2110, -0.2733, 0.2988]) tensor([0.7442, 0.0317, 0.0809, 0.1433]) -Greedy action tensor([ 1.3417, -0.5579, -0.4824, 0.1130]) tensor([0.6236, 0.0933, 0.1006, 0.1825]) -Greedy action tensor([ 1.0622, -0.4391, -0.1550, 0.4456]) tensor([0.4858, 0.1082, 0.1438, 0.2622]) -Greedy action tensor([ 0.9875, -0.5357, -0.6403, 0.2626]) tensor([0.5267, 0.1148, 0.1034, 0.2551]) -Greedy action tensor([ 0.6428, -0.5507, -0.0136, 0.0382]) tensor([0.4223, 0.1280, 0.2190, 0.2307]) -Greedy action tensor([ 0.9945, -0.6140, -0.4569, 0.1773]) tensor([0.5330, 0.1067, 0.1249, 0.2354]) -Greedy action tensor([ 1.3899, -0.6597, -0.0664, 0.1482]) tensor([0.6058, 0.0780, 0.1412, 0.1750]) -Greedy action tensor([ 0.7464, -0.1658, -0.1613, 0.0154]) tensor([0.4373, 0.1757, 0.1765, 0.2106]) -Greedy action tensor([ 1.6906, -0.8952, -0.1080, 0.3724]) tensor([0.6629, 0.0499, 0.1097, 0.1774]) -Greedy action tensor([ 0.6393, -0.0202, -0.1671, -0.1905]) tensor([0.4167, 0.2155, 0.1861, 0.1817]) -Greedy action tensor([ 1.2981, -0.5647, -0.4272, 0.2951]) tensor([0.5882, 0.0913, 0.1048, 0.2157]) -Greedy action tensor([ 1.1956, -0.1145, -0.0329, 0.2792]) tensor([0.5096, 0.1375, 0.1492, 0.2038]) -Greedy action tensor([ 1.2236, -0.2949, -0.1542, 0.0752]) tensor([0.5592, 0.1225, 0.1410, 0.1773]) -Greedy action tensor([ 0.5967, -0.1709, -0.1361, -0.0371]) tensor([0.4040, 0.1875, 0.1942, 0.2143]) -Greedy action tensor([ 1.2921, -0.3560, -0.4803, 0.2056]) tensor([0.5883, 0.1132, 0.1000, 0.1985]) -Greedy action tensor([ 0.6148, -0.2368, -0.0751, 0.3333]) tensor([0.3727, 0.1591, 0.1870, 0.2813]) -Greedy action tensor([ 1.5915, -0.3262, -0.3060, 0.1107]) tensor([0.6560, 0.0964, 0.0984, 0.1492]) -Greedy action tensor([ 0.9548, -0.4094, 0.0210, -0.1261]) tensor([0.5030, 0.1286, 0.1977, 0.1707]) -Greedy action tensor([ 1.2005, -0.3756, -0.1156, -0.1577]) tensor([0.5773, 0.1194, 0.1548, 0.1484]) -Greedy action tensor([ 0.5900, -0.5979, -0.0088, 0.0171]) tensor([0.4135, 0.1261, 0.2272, 0.2332]) -Greedy action tensor([ 1.2771, -0.6657, -0.0746, 0.3333]) tensor([0.5583, 0.0800, 0.1445, 0.2172]) -Greedy action tensor([ 1.3130, -0.3221, -0.0696, 0.1890]) tensor([0.5647, 0.1101, 0.1417, 0.1835]) -Greedy action tensor([ 1.7080, -0.4511, -0.1061, 0.3270]) tensor([0.6537, 0.0755, 0.1065, 0.1643]) -Greedy action tensor([ 1.0549, -0.5167, -0.2247, -0.0102]) tensor([0.5463, 0.1135, 0.1520, 0.1883]) -Greedy action tensor([ 0.8745, -0.5697, 0.0193, -0.1892]) tensor([0.4984, 0.1176, 0.2119, 0.1720]) -Greedy action tensor([ 1.4525e+00, -4.4197e-01, -2.3499e-01, -6.6560e-04]) tensor([0.6373, 0.0958, 0.1179, 0.1490]) -Greedy action tensor([ 0.4590, -0.1550, -0.1678, 0.0560]) tensor([0.3645, 0.1972, 0.1947, 0.2436]) -Greedy action tensor([ 1.1932, -0.1555, 0.1269, -0.0683]) tensor([0.5299, 0.1376, 0.1824, 0.1501]) -Greedy action tensor([ 1.5464, -1.1573, -0.4231, 0.6192]) tensor([0.6242, 0.0418, 0.0871, 0.2470]) -Greedy action tensor([ 0.8848, -0.3048, -0.2396, 0.0899]) tensor([0.4806, 0.1463, 0.1561, 0.2170]) -Greedy action tensor([ 0.5535, -0.2790, -0.4805, -0.0923]) tensor([0.4320, 0.1879, 0.1536, 0.2265]) -Greedy action tensor([ 1.0159, -0.5398, 0.0498, 0.0557]) tensor([0.5065, 0.1069, 0.1927, 0.1939]) -Greedy action tensor([ 0.9626, -0.5139, -0.5594, -0.0481]) tensor([0.5523, 0.1262, 0.1205, 0.2010]) -Greedy action tensor([ 1.3771, -0.6909, -0.2469, 0.3534]) tensor([0.5942, 0.0751, 0.1171, 0.2135]) -Greedy action tensor([ 1.0079, -0.0921, 0.0342, -0.0205]) tensor([0.4835, 0.1610, 0.1826, 0.1729]) -Greedy action tensor([ 1.1023, -0.5616, -0.4262, 0.3744]) tensor([0.5293, 0.1003, 0.1148, 0.2556]) -Greedy action tensor([ 0.2364, -0.1504, -0.2813, 0.2473]) tensor([0.3043, 0.2067, 0.1813, 0.3076]) -Greedy action tensor([ 0.7836, -0.5916, 0.0956, 0.1004]) tensor([0.4424, 0.1118, 0.2223, 0.2234]) -Greedy action tensor([ 0.6032, -0.1732, -0.2660, -0.0856]) tensor([0.4199, 0.1932, 0.1761, 0.2109]) -Greedy action tensor([ 1.4585, -0.6770, -0.2007, 0.1954]) tensor([0.6284, 0.0743, 0.1196, 0.1777]) -Greedy action tensor([ 0.5963, -0.1894, 0.1021, -0.2004]) tensor([0.3974, 0.1811, 0.2424, 0.1791]) -Greedy action tensor([ 0.4001, -0.3783, 0.1225, -0.1099]) tensor([0.3550, 0.1630, 0.2689, 0.2131]) -Greedy action tensor([ 1.6849, -0.9556, -0.0315, 0.4032]) tensor([0.6542, 0.0467, 0.1176, 0.1816]) -Greedy action tensor([ 0.3469, -0.3200, -0.1261, 0.1397]) tensor([0.3391, 0.1740, 0.2113, 0.2756]) -Greedy action tensor([ 1.0891, -0.6115, -0.3217, 0.2546]) tensor([0.5375, 0.0981, 0.1311, 0.2333]) -Greedy action tensor([ 1.0461, -0.2648, -0.2022, 0.3057]) tensor([0.4918, 0.1326, 0.1411, 0.2345]) -Greedy action tensor([ 0.7821, 0.0863, -0.0740, -0.3546]) tensor([0.4456, 0.2222, 0.1893, 0.1430]) -Greedy action tensor([ 0.7785, 0.1539, -0.2116, -0.3353]) tensor([0.4474, 0.2395, 0.1662, 0.1469]) -Greedy action tensor([ 1.0351, 0.1097, 0.0368, -0.3996]) tensor([0.4992, 0.1979, 0.1840, 0.1189]) -Greedy action tensor([ 0.8241, -0.3987, 0.0782, 0.1726]) tensor([0.4367, 0.1286, 0.2071, 0.2276]) -Greedy action tensor([ 0.9349, -0.3244, -0.4256, -0.1167]) tensor([0.5292, 0.1502, 0.1358, 0.1849]) -Greedy action tensor([ 1.0615, -0.2630, -0.4232, 0.0269]) tensor([0.5412, 0.1439, 0.1226, 0.1923]) -Greedy action tensor([ 0.8306, -0.3879, -0.5767, 0.7372]) tensor([0.4080, 0.1206, 0.0999, 0.3716]) -Greedy action tensor([ 1.0377, -0.4280, -0.2806, 0.3445]) tensor([0.5004, 0.1155, 0.1339, 0.2502]) -Greedy action tensor([ 1.5616, -0.3708, -0.4814, 0.5522]) tensor([0.6102, 0.0884, 0.0791, 0.2224]) -Greedy action tensor([ 1.3295, -0.5219, -0.2389, 0.1688]) tensor([0.5957, 0.0935, 0.1241, 0.1866]) -Greedy action tensor([ 1.1865, -0.3304, -0.1947, 0.2175]) tensor([0.5405, 0.1186, 0.1358, 0.2051]) -Greedy action tensor([ 1.2170, -0.4595, -0.5521, 0.5390]) tensor([0.5361, 0.1003, 0.0914, 0.2722]) -Greedy action tensor([ 1.4056, -0.4644, -0.5151, 0.3483]) tensor([0.6068, 0.0935, 0.0889, 0.2108]) -Greedy action tensor([ 0.5770, -0.3706, -0.2042, -0.1097]) tensor([0.4258, 0.1651, 0.1949, 0.2143]) -Greedy action tensor([ 0.6898, -0.4171, -0.1843, 0.0826]) tensor([0.4362, 0.1442, 0.1820, 0.2377]) -Greedy action tensor([ 0.8994, -0.4821, -0.2404, 0.3750]) tensor([0.4623, 0.1161, 0.1479, 0.2737]) -Greedy action tensor([ 0.3163, 0.1095, -0.1155, -0.2164]) tensor([0.3279, 0.2667, 0.2129, 0.1925]) -Greedy action tensor([ 1.3395, -0.0289, 0.0094, 0.0167]) tensor([0.5601, 0.1426, 0.1481, 0.1492]) -Greedy action tensor([ 0.7501, -0.2853, 0.1374, -0.0913]) tensor([0.4295, 0.1525, 0.2328, 0.1852]) -Greedy action tensor([ 1.2280, -0.1793, -0.2009, 0.3594]) tensor([0.5252, 0.1286, 0.1258, 0.2204]) -Greedy action tensor([ 1.5647, -0.4366, -0.1769, 0.2217]) tensor([0.6363, 0.0860, 0.1115, 0.1661]) -Greedy action tensor([ 2.2042, -0.8429, -0.3561, 0.6529]) tensor([0.7481, 0.0355, 0.0578, 0.1586]) -Greedy action tensor([ 0.7173, -0.5255, -0.4119, 0.3223]) tensor([0.4375, 0.1263, 0.1414, 0.2948]) -Greedy action tensor([ 1.2821, -0.2260, 0.0046, 0.1653]) tensor([0.5472, 0.1211, 0.1525, 0.1791]) -Greedy action tensor([ 0.7320, -0.0226, -0.0292, -0.3246]) tensor([0.4376, 0.2058, 0.2044, 0.1521]) -Greedy action tensor([ 0.7795, -0.2987, -0.0882, -0.3286]) tensor([0.4784, 0.1628, 0.2009, 0.1580]) -Greedy action tensor([ 1.0808, -0.5161, 0.0483, -0.6217]) tensor([0.5744, 0.1163, 0.2046, 0.1047]) -Greedy action tensor([ 0.7680, -0.6003, 0.0191, -0.3041]) tensor([0.4832, 0.1230, 0.2285, 0.1654]) -Greedy action tensor([ 0.5495, -0.2012, 0.1897, -0.4312]) tensor([0.3929, 0.1855, 0.2742, 0.1474]) -Greedy action tensor([ 0.3393, 0.1982, 0.0169, -0.2809]) tensor([0.3194, 0.2774, 0.2314, 0.1718]) -Greedy action tensor([ 0.4440, -0.0729, 0.0362, -0.2424]) tensor([0.3617, 0.2157, 0.2406, 0.1821]) -Greedy action tensor([ 0.6276, -0.4761, -0.1387, -0.4003]) tensor([0.4642, 0.1540, 0.2157, 0.1661]) -Greedy action tensor([ 0.6278, 0.0393, -0.1480, 0.0107]) tensor([0.3914, 0.2173, 0.1802, 0.2111]) -Greedy action tensor([ 0.6396, -0.0815, -0.1141, -0.2513]) tensor([0.4225, 0.2054, 0.1988, 0.1733]) -Greedy action tensor([ 0.4946, 0.4962, -0.2151, -0.3656]) tensor([0.3429, 0.3434, 0.1686, 0.1451]) -Greedy action tensor([ 0.7886, -0.3724, -0.0815, -0.2632]) tensor([0.4804, 0.1505, 0.2013, 0.1678]) -Greedy action tensor([ 0.5681, -0.5157, -0.0240, -0.5905]) tensor([0.4534, 0.1534, 0.2508, 0.1423]) -Greedy action tensor([ 0.8851, -0.6325, -0.1429, -0.5630]) tensor([0.5519, 0.1210, 0.1974, 0.1297]) -Greedy action tensor([ 0.2783, -0.1248, -0.0576, -0.3714]) tensor([0.3442, 0.2300, 0.2460, 0.1797]) -Greedy action tensor([ 0.8052, -0.3984, -0.0010, -0.5035]) tensor([0.4958, 0.1488, 0.2214, 0.1340]) -Greedy action tensor([ 0.5632, -0.2161, 0.0728, -0.3715]) tensor([0.4059, 0.1862, 0.2485, 0.1594]) -Greedy action tensor([ 0.6424, -0.6352, 0.0655, -0.4059]) tensor([0.4564, 0.1272, 0.2564, 0.1600]) -Greedy action tensor([ 0.6900, -0.2875, 0.0010, -0.0169]) tensor([0.4217, 0.1587, 0.2117, 0.2080]) -Greedy action tensor([ 0.2191, 0.0028, -0.0900, -0.3563]) tensor([0.3224, 0.2597, 0.2367, 0.1813]) -Greedy action tensor([ 0.8164, -0.6812, -0.0232, -0.4930]) tensor([0.5193, 0.1162, 0.2243, 0.1402]) -Greedy action tensor([ 0.6765, -0.2739, 0.0782, -0.3887]) tensor([0.4384, 0.1695, 0.2410, 0.1511]) -Greedy action tensor([ 0.5984, -0.0785, -0.0737, -0.2941]) tensor([0.4118, 0.2093, 0.2103, 0.1687]) -Greedy action tensor([ 0.3429, -0.2044, -0.0201, -0.1180]) tensor([0.3443, 0.1991, 0.2395, 0.2171]) -Greedy action tensor([ 0.4259, 0.0438, -0.0155, -0.4104]) tensor([0.3625, 0.2473, 0.2331, 0.1571]) -Greedy action tensor([ 0.8064, -0.1512, 0.0063, -0.6831]) tensor([0.4858, 0.1864, 0.2183, 0.1095]) -Greedy action tensor([ 0.8552, -0.4199, 0.0132, -0.3309]) tensor([0.4961, 0.1386, 0.2138, 0.1515]) -Greedy action tensor([ 0.3253, 0.0281, -0.1555, -0.0460]) tensor([0.3278, 0.2435, 0.2026, 0.2261]) -Greedy action tensor([ 0.8124, -0.6091, -0.0801, -0.5305]) tensor([0.5230, 0.1262, 0.2142, 0.1365]) -Greedy action tensor([ 0.5273, -0.3129, 0.0171, -0.2488]) tensor([0.4013, 0.1732, 0.2409, 0.1847]) -Greedy action tensor([ 0.6270, -0.4063, -0.0419, -0.3102]) tensor([0.4425, 0.1575, 0.2267, 0.1733]) -Greedy action tensor([ 0.5685, -0.1642, 0.1011, -0.5675]) tensor([0.4118, 0.1979, 0.2581, 0.1322]) -Greedy action tensor([ 0.5094, -0.2946, 0.0639, -0.3410]) tensor([0.3976, 0.1779, 0.2546, 0.1699]) -Greedy action tensor([ 0.4475, -0.2009, -0.0354, -0.2923]) tensor([0.3821, 0.1998, 0.2358, 0.1823]) -Greedy action tensor([ 1.0699, -0.3496, -0.0291, -0.4940]) tensor([0.5604, 0.1355, 0.1867, 0.1173]) -Greedy action tensor([ 0.4503, -0.1078, -0.0067, -0.2494]) tensor([0.3701, 0.2118, 0.2343, 0.1838]) -Greedy action tensor([ 0.5014, -0.2987, -0.0762, -0.3259]) tensor([0.4085, 0.1835, 0.2293, 0.1786]) -Greedy action tensor([ 0.3375, 0.0823, -0.0116, -0.2735]) tensor([0.3308, 0.2563, 0.2333, 0.1796]) -Greedy action tensor([ 0.6067, -0.0038, -0.0643, -0.2461]) tensor([0.4032, 0.2189, 0.2061, 0.1718]) -Greedy action tensor([ 0.1706, 0.2633, 0.0288, -0.2078]) tensor([0.2740, 0.3006, 0.2378, 0.1877]) -Greedy action tensor([ 0.9001, -0.7269, 0.1104, -0.6945]) tensor([0.5395, 0.1060, 0.2449, 0.1095]) -Greedy action tensor([ 1.0436, -0.9243, 0.0298, -0.6128]) tensor([0.5905, 0.0825, 0.2143, 0.1127]) -Greedy action tensor([ 0.4722, 0.0105, 0.0400, -0.2029]) tensor([0.3586, 0.2260, 0.2328, 0.1826]) -Greedy action tensor([ 0.4669, -0.2153, 0.1841, -0.2315]) tensor([0.3628, 0.1834, 0.2734, 0.1804]) -Greedy action tensor([ 0.7306, -0.5082, -0.1046, -0.6020]) tensor([0.5032, 0.1458, 0.2183, 0.1327]) -Greedy action tensor([ 0.3046, -0.0555, -0.1913, -0.2149]) tensor([0.3447, 0.2404, 0.2099, 0.2050]) -Greedy action tensor([ 0.1697, 0.0581, 0.0727, -0.1701]) tensor([0.2846, 0.2545, 0.2583, 0.2026]) -Greedy action tensor([ 0.7361, -0.5350, -0.0259, -0.3293]) tensor([0.4781, 0.1341, 0.2231, 0.1647]) -Greedy action tensor([ 0.3321, 0.1804, 0.0631, -0.1861]) tensor([0.3106, 0.2669, 0.2374, 0.1850]) -Greedy action tensor([ 2.2085e-01, -1.0936e-04, 1.2794e-02, -4.6835e-02]) tensor([0.2959, 0.2373, 0.2404, 0.2264]) -Greedy action tensor([ 0.6389, -0.3335, 0.1986, -0.4558]) tensor([0.4243, 0.1605, 0.2732, 0.1420]) -Greedy action tensor([ 0.5791, -0.1629, -0.0161, -0.3039]) tensor([0.4096, 0.1951, 0.2259, 0.1694]) -Greedy action tensor([ 0.3044, 0.0242, -0.0129, -0.1481]) tensor([0.3205, 0.2422, 0.2334, 0.2039]) -Greedy action tensor([ 0.8071, 0.1876, -0.1163, -0.3427]) tensor([0.4440, 0.2390, 0.1764, 0.1406]) -Greedy action tensor([ 0.3194, 0.0384, -0.0469, -0.0377]) tensor([0.3177, 0.2399, 0.2202, 0.2223]) -Greedy action tensor([ 0.5911, -0.2697, 0.0467, -0.4280]) tensor([0.4230, 0.1789, 0.2454, 0.1527]) -Greedy action tensor([ 0.7102, -0.1865, -0.0962, -0.3378]) tensor([0.4535, 0.1850, 0.2025, 0.1590]) -Greedy action tensor([ 0.6010, -0.4032, -0.1278, -0.5479]) tensor([0.4617, 0.1691, 0.2228, 0.1464]) -Greedy action tensor([ 0.6595, -0.2679, -0.0911, -0.1719]) tensor([0.4342, 0.1718, 0.2050, 0.1891]) -Greedy action tensor([ 0.6691, -0.3859, 0.0256, -0.4759]) tensor([0.4562, 0.1589, 0.2397, 0.1452]) -Greedy action tensor([ 0.8472, -0.6008, -0.0949, -0.4867]) tensor([0.5296, 0.1245, 0.2064, 0.1395]) -Greedy action tensor([ 0.4306, 0.0134, 0.0126, -0.1406]) tensor([0.3470, 0.2286, 0.2284, 0.1960]) -Greedy action tensor([ 0.6969, -0.3096, -0.0673, -0.3943]) tensor([0.4615, 0.1687, 0.2149, 0.1550]) -Greedy action tensor([ 0.6273, -0.3975, -0.1456, -0.3650]) tensor([0.4563, 0.1638, 0.2107, 0.1692]) -Greedy action tensor([ 0.6262, 0.1185, -0.0515, -0.2047]) tensor([0.3929, 0.2365, 0.1995, 0.1712]) -Greedy action tensor([ 0.6441, -0.3190, -0.0116, -0.2888]) tensor([0.4359, 0.1664, 0.2263, 0.1715]) -Greedy action tensor([ 0.7145, -0.6462, -0.0731, -0.5395]) tensor([0.5008, 0.1284, 0.2278, 0.1429]) -Greedy action tensor([ 0.4879, -0.2728, -0.0871, -0.2422]) tensor([0.3981, 0.1860, 0.2240, 0.1918]) -Greedy action tensor([ 0.9902, -0.3702, -0.0362, -0.7681]) tensor([0.5595, 0.1435, 0.2005, 0.0964]) -Greedy action tensor([ 0.3101, -0.1647, -0.1053, -0.2798]) tensor([0.3525, 0.2193, 0.2327, 0.1954]) -Greedy action tensor([ 0.7101, -0.1126, -0.0571, -0.4835]) tensor([0.4532, 0.1991, 0.2104, 0.1374]) -Greedy action tensor([ 0.1940, 0.0804, 0.1117, -0.2132]) tensor([0.2874, 0.2566, 0.2647, 0.1913]) -Greedy action tensor([ 0.3606, 0.1598, -0.0577, -0.2427]) tensor([0.3308, 0.2706, 0.2177, 0.1809]) -Greedy action tensor([ 0.8121, -0.2219, -0.0894, -0.2020]) tensor([0.4707, 0.1674, 0.1911, 0.1708]) -Greedy action tensor([ 0.6718, -0.4689, -0.1384, -0.5494]) tensor([0.4856, 0.1552, 0.2160, 0.1432]) -Greedy action tensor([ 0.2553, -0.0476, 0.1054, -0.1231]) tensor([0.3045, 0.2249, 0.2621, 0.2085]) -Greedy action tensor([ 0.9097, -0.1712, 0.0026, -0.4858]) tensor([0.5023, 0.1704, 0.2028, 0.1244]) -Greedy action tensor([ 0.5398, 0.0411, 0.0329, -0.3633]) tensor([0.3824, 0.2322, 0.2304, 0.1550]) -Greedy action tensor([ 0.6460, -0.3563, -0.0187, -0.3939]) tensor([0.4474, 0.1642, 0.2302, 0.1582]) -Greedy action tensor([ 0.7396, -0.3255, -0.0055, -0.3503]) tensor([0.4639, 0.1599, 0.2202, 0.1560]) -Greedy action tensor([ 0.7781, -0.3235, 0.0865, -0.5794]) tensor([0.4784, 0.1590, 0.2396, 0.1231]) -Greedy action tensor([-1.1996, -0.4427, 0.2429, 0.3723]) tensor([0.0821, 0.1750, 0.3474, 0.3954]) -Greedy action tensor([-1.4491, -0.3489, 0.5505, 0.5888]) tensor([0.0524, 0.1576, 0.3874, 0.4025]) -Greedy action tensor([-1.4707, -0.3441, 0.7719, 0.8649]) tensor([0.0419, 0.1294, 0.3950, 0.4336]) -Greedy action tensor([-1.1580, -0.5416, 0.3930, -0.0523]) tensor([0.0944, 0.1749, 0.4454, 0.2853]) -Greedy action tensor([-1.1940, -0.5843, 1.1016, 1.3142]) tensor([0.0399, 0.0734, 0.3964, 0.4903]) -Greedy action tensor([-0.7847, -0.1914, 0.6591, 1.3449]) tensor([0.0647, 0.1171, 0.2741, 0.5441]) -Greedy action tensor([-1.4781, -0.4124, 0.7200, 0.8198]) tensor([0.0437, 0.1270, 0.3940, 0.4353]) -Greedy action tensor([-1.2895, 0.8898, 0.2750, -0.2084]) tensor([0.0569, 0.5032, 0.2721, 0.1678]) -Greedy action tensor([-1.2760, -0.4796, 0.3954, 0.5796]) tensor([0.0670, 0.1485, 0.3562, 0.4283]) -Greedy action tensor([-1.8812, -0.6598, 0.3618, -0.1977]) tensor([0.0521, 0.1767, 0.4908, 0.2805]) -Greedy action tensor([-1.1200, -0.5824, 0.2364, 0.3458]) tensor([0.0915, 0.1567, 0.3553, 0.3964]) -Greedy action tensor([-0.6516, -0.4650, 1.1597, 1.5382]) tensor([0.0579, 0.0698, 0.3546, 0.5177]) -Greedy action tensor([-1.5394, -0.5504, 0.4647, 0.1444]) tensor([0.0606, 0.1630, 0.4498, 0.3265]) -Greedy action tensor([-1.5538, -0.5261, 0.4707, 0.1707]) tensor([0.0589, 0.1646, 0.4460, 0.3304]) -Greedy action tensor([-1.6587, -0.2528, 0.8052, 0.5413]) tensor([0.0387, 0.1578, 0.4545, 0.3491]) -Greedy action tensor([-0.6748, -0.0501, 0.1776, 0.2676]) tensor([0.1285, 0.2401, 0.3015, 0.3299]) -Greedy action tensor([-1.5315, -0.7475, 0.3943, 0.1079]) tensor([0.0658, 0.1441, 0.4513, 0.3389]) -Greedy action tensor([-1.3145, -0.5468, 0.4152, 0.0669]) tensor([0.0783, 0.1687, 0.4414, 0.3116]) -Greedy action tensor([-0.9417, -0.4867, 0.5237, 1.2383]) tensor([0.0635, 0.1001, 0.2749, 0.5616]) -Greedy action tensor([-1.3175, -0.5998, 0.4444, 0.4392]) tensor([0.0682, 0.1398, 0.3971, 0.3950]) -Greedy action tensor([-0.8764, 0.6609, 0.1838, 0.2817]) tensor([0.0853, 0.3968, 0.2463, 0.2716]) -Greedy action tensor([-1.0666, -0.6113, 0.2223, 0.3132]) tensor([0.0982, 0.1549, 0.3565, 0.3904]) -Greedy action tensor([-1.8366, -1.0193, 0.0365, -0.5662]) tensor([0.0750, 0.1698, 0.4881, 0.2671]) -Greedy action tensor([-0.5316, -0.4739, 0.1572, 0.1847]) tensor([0.1640, 0.1737, 0.3266, 0.3357]) -Greedy action tensor([-0.8965, 0.2733, -0.0565, -0.4710]) tensor([0.1239, 0.3993, 0.2871, 0.1897]) -Greedy action tensor([-1.6333, -0.4123, 0.4809, -0.0019]) tensor([0.0562, 0.1907, 0.4657, 0.2874]) -Greedy action tensor([-1.3044, -0.5049, 1.1969, 1.2640]) tensor([0.0351, 0.0781, 0.4285, 0.4582]) -Greedy action tensor([-1.9365, -0.5778, 1.2219, 0.4845]) tensor([0.0252, 0.0981, 0.5930, 0.2837]) -Greedy action tensor([-1.6720, -0.7280, 0.4900, 0.2132]) tensor([0.0531, 0.1364, 0.4610, 0.3495]) -Greedy action tensor([-1.6794, -0.5065, 0.5551, 0.0466]) tensor([0.0521, 0.1684, 0.4868, 0.2928]) -Greedy action tensor([-1.5036, -0.5588, 0.6370, 0.5211]) tensor([0.0509, 0.1309, 0.4328, 0.3854]) -Greedy action tensor([-1.0631, 0.1499, 0.2286, 0.6742]) tensor([0.0731, 0.2458, 0.2659, 0.4152]) -Greedy action tensor([-1.6605, -0.3691, 0.6593, 0.3779]) tensor([0.0445, 0.1618, 0.4524, 0.3414]) -Greedy action tensor([-1.5476, -0.2762, 0.8112, 0.7776]) tensor([0.0394, 0.1405, 0.4169, 0.4031]) -Greedy action tensor([-1.5654, -1.1096, -0.0382, -0.8853]) tensor([0.1092, 0.1723, 0.5029, 0.2156]) -Greedy action tensor([-1.7668, -0.5041, 0.7645, 0.1774]) tensor([0.0415, 0.1467, 0.5217, 0.2901]) -Greedy action tensor([-1.4361, -0.5316, 0.4951, 0.3221]) tensor([0.0618, 0.1528, 0.4266, 0.3588]) -Greedy action tensor([-1.6065, -0.4192, 0.5922, 0.1779]) tensor([0.0520, 0.1703, 0.4683, 0.3095]) -Greedy action tensor([-1.2271, -0.5352, 0.4080, 0.5900]) tensor([0.0700, 0.1399, 0.3592, 0.4309]) -Greedy action tensor([-1.6915, -0.5368, 0.5963, 0.0451]) tensor([0.0508, 0.1610, 0.5000, 0.2882]) -Greedy action tensor([-1.3552, -0.6212, 0.8372, 0.8101]) tensor([0.0482, 0.1004, 0.4315, 0.4200]) -Greedy action tensor([-1.3307, -0.3995, 0.4727, -0.1791]) tensor([0.0783, 0.1987, 0.4753, 0.2477]) -Greedy action tensor([-1.9325, -0.4565, 0.6633, -0.1731]) tensor([0.0407, 0.1779, 0.5452, 0.2362]) -Greedy action tensor([-1.9920, -0.9239, 0.2818, -0.2580]) tensor([0.0518, 0.1508, 0.5037, 0.2936]) -Greedy action tensor([-2.0074, -0.7888, 0.5116, 0.0098]) tensor([0.0411, 0.1391, 0.5106, 0.3092]) -Greedy action tensor([-1.5254, -0.4934, 0.5364, 0.2445]) tensor([0.0570, 0.1600, 0.4482, 0.3347]) -Greedy action tensor([-0.9926, -0.6167, 0.2395, 0.5409]) tensor([0.0951, 0.1384, 0.3259, 0.4406]) -Greedy action tensor([-1.3537, 0.3894, 0.6688, -0.7164]) tensor([0.0619, 0.3536, 0.4675, 0.1170]) -Greedy action tensor([-1.8255e+00, -2.3415e-01, 6.0556e-01, -1.8615e-04]) tensor([0.0426, 0.2091, 0.4842, 0.2642]) -Greedy action tensor([-1.6959, -0.5316, 0.5444, -0.0024]) tensor([0.0525, 0.1683, 0.4935, 0.2857]) -Greedy action tensor([-0.5452, 0.2136, 0.5998, 1.4474]) tensor([0.0735, 0.1569, 0.2309, 0.5388]) -Greedy action tensor([-1.7126, -0.3787, 0.5418, 0.0019]) tensor([0.0503, 0.1909, 0.4794, 0.2794]) -Greedy action tensor([-1.7951, -0.8082, 0.1668, -0.3997]) tensor([0.0674, 0.1809, 0.4795, 0.2722]) -Greedy action tensor([-1.5664, -0.5462, 0.8992, 0.6657]) tensor([0.0402, 0.1116, 0.4734, 0.3748]) -Greedy action tensor([-1.9026, -0.4922, 0.7762, 0.0677]) tensor([0.0373, 0.1527, 0.5428, 0.2673]) -Greedy action tensor([-1.4950, -0.4992, 0.4814, 0.1844]) tensor([0.0614, 0.1662, 0.4431, 0.3293]) -Greedy action tensor([-1.8589, -0.4286, 0.6232, -0.1290]) tensor([0.0439, 0.1834, 0.5252, 0.2475]) -Greedy action tensor([-1.8612, -0.4332, 0.6597, -0.0675]) tensor([0.0423, 0.1765, 0.5266, 0.2545]) -Greedy action tensor([-1.5320, -0.2482, 0.3944, 0.0950]) tensor([0.0604, 0.2180, 0.4145, 0.3072]) -Greedy action tensor([-1.6969, -0.4553, 0.5636, 0.1111]) tensor([0.0496, 0.1718, 0.4759, 0.3027]) -Greedy action tensor([-1.8909, -0.9813, 0.2517, -0.3961]) tensor([0.0607, 0.1508, 0.5176, 0.2708]) -Greedy action tensor([-1.3771, -0.5626, 0.3547, 0.3081]) tensor([0.0699, 0.1579, 0.3951, 0.3771]) -Greedy action tensor([-1.5978, -0.4943, 0.6691, -0.2767]) tensor([0.0574, 0.1731, 0.5542, 0.2152]) -Greedy action tensor([-1.4457, -0.5277, 0.4005, 0.2485]) tensor([0.0654, 0.1639, 0.4146, 0.3561]) -Greedy action tensor([-1.6266, -0.5127, 0.4843, 0.0270]) tensor([0.0571, 0.1738, 0.4710, 0.2981]) -Greedy action tensor([-1.6450, -0.5090, 0.5157, 0.0565]) tensor([0.0547, 0.1704, 0.4749, 0.3000]) -Greedy action tensor([-0.9111, -0.6392, 0.2775, 0.1725]) tensor([0.1170, 0.1535, 0.3839, 0.3456]) -Greedy action tensor([-0.1494, -0.1952, 0.4172, 0.8715]) tensor([0.1540, 0.1471, 0.2714, 0.4275]) -Greedy action tensor([-1.7434, -0.7130, 0.4175, -0.0453]) tensor([0.0557, 0.1562, 0.4836, 0.3045]) -Greedy action tensor([-1.5527, -0.0021, 0.4371, -0.0904]) tensor([0.0577, 0.2718, 0.4217, 0.2488]) -Greedy action tensor([-1.8315, -0.6890, 1.5906, 0.9491]) tensor([0.0196, 0.0616, 0.6019, 0.3169]) -Greedy action tensor([-1.3822, -0.6433, 0.4066, 0.0666]) tensor([0.0750, 0.1570, 0.4486, 0.3193]) -Greedy action tensor([-1.0012, -0.6236, 0.3788, 0.3620]) tensor([0.0967, 0.1410, 0.3843, 0.3779]) -Greedy action tensor([-1.2574, -0.1482, 0.3254, 0.1559]) tensor([0.0769, 0.2331, 0.3742, 0.3159]) -Greedy action tensor([-1.3395, -0.6139, 0.3462, 0.2072]) tensor([0.0760, 0.1570, 0.4101, 0.3569]) -Greedy action tensor([-1.1109, -0.5324, 1.1443, 1.2643]) tensor([0.0433, 0.0773, 0.4133, 0.4660]) -Greedy action tensor([-1.8563, -0.4170, 0.6547, -0.0877]) tensor([0.0427, 0.1803, 0.5264, 0.2506]) -Greedy action tensor([-1.8662, -0.6696, 0.4304, 0.0042]) tensor([0.0482, 0.1595, 0.4793, 0.3130]) -Greedy action tensor([-1.9903, -0.3978, 0.9887, 0.4709]) tensor([0.0268, 0.1318, 0.5273, 0.3142]) -Greedy action tensor([-1.8172, -0.3376, 0.6007, -0.0560]) tensor([0.0446, 0.1957, 0.5003, 0.2594]) -Greedy action tensor([-1.6731, -0.7299, 0.2790, -0.2104]) tensor([0.0670, 0.1720, 0.4718, 0.2892]) -Greedy action tensor([-0.1167, -0.2195, -0.3234, 0.2678]) tensor([0.2390, 0.2156, 0.1944, 0.3510]) -Greedy action tensor([ 1.0208, 1.4506, -0.4151, 0.4682]) tensor([0.2985, 0.4587, 0.0710, 0.1718]) -Greedy action tensor([ 0.4104, 0.4772, 0.8901, -0.1676]) tensor([0.2355, 0.2518, 0.3805, 0.1321]) -Greedy action tensor([ 1.3021, -0.3728, 1.2684, 0.4161]) tensor([0.3896, 0.0730, 0.3767, 0.1607]) -Greedy action tensor([ 0.7414, -1.7582, 1.0424, 0.5196]) tensor([0.3092, 0.0254, 0.4178, 0.2477]) -Greedy action tensor([ 1.3860, -0.3484, 0.7390, 2.2818]) tensor([0.2410, 0.0425, 0.1262, 0.5903]) -Greedy action tensor([-0.5040, -1.1649, -0.9781, -0.1550]) tensor([0.2812, 0.1452, 0.1750, 0.3986]) -Greedy action tensor([ 0.1335, -0.1243, 0.9406, 0.3011]) tensor([0.1924, 0.1487, 0.4313, 0.2276]) -Greedy action tensor([ 0.7179, -0.3654, 0.4399, 1.5908]) tensor([0.2227, 0.0754, 0.1687, 0.5332]) -Greedy action tensor([ 1.1079, -1.2976, 0.6314, 0.7835]) tensor([0.4108, 0.0371, 0.2551, 0.2970]) -Greedy action tensor([ 0.3068, -0.2126, -0.3202, 0.4848]) tensor([0.3008, 0.1790, 0.1607, 0.3595]) -Greedy action tensor([-0.8683, 0.1894, -0.5740, -0.0262]) tensor([0.1326, 0.3818, 0.1779, 0.3077]) -Greedy action tensor([ 1.6504, -0.2521, 0.9320, 0.1563]) tensor([0.5373, 0.0802, 0.2619, 0.1206]) -Greedy action tensor([ 1.2396, 0.4291, -0.0840, 0.6107]) tensor([0.4456, 0.1981, 0.1186, 0.2376]) -Greedy action tensor([ 1.2113, -0.7334, 0.9595, 0.6279]) tensor([0.4035, 0.0577, 0.3137, 0.2251]) -Greedy action tensor([ 0.1888, -0.8647, 0.3539, 1.0162]) tensor([0.2077, 0.0724, 0.2449, 0.4750]) -Greedy action tensor([ 0.3709, -0.4702, -0.2202, 0.6781]) tensor([0.2990, 0.1289, 0.1656, 0.4065]) -Greedy action tensor([-0.4613, -0.2040, -0.0750, -0.0317]) tensor([0.1886, 0.2440, 0.2776, 0.2899]) -Greedy action tensor([ 1.3963, 0.6029, 0.7241, -0.0474]) tensor([0.4548, 0.2057, 0.2322, 0.1073]) -Greedy action tensor([-0.3482, -0.1981, -0.0289, 0.0910]) tensor([0.1965, 0.2283, 0.2704, 0.3048]) -Greedy action tensor([ 0.8072, 0.3190, -0.6403, 0.7091]) tensor([0.3629, 0.2227, 0.0853, 0.3290]) -Greedy action tensor([1.8325, 0.6456, 0.8467, 0.1765]) tensor([0.5350, 0.1633, 0.1996, 0.1021]) -Greedy action tensor([ 1.0497, -0.7201, 0.6307, 1.1853]) tensor([0.3363, 0.0573, 0.2212, 0.3852]) -Greedy action tensor([ 0.4401, 0.7265, 0.0190, -0.1132]) tensor([0.2807, 0.3737, 0.1842, 0.1614]) -Greedy action tensor([ 0.3015, -1.1532, 1.2244, -0.2928]) tensor([0.2325, 0.0543, 0.5850, 0.1283]) -Greedy action tensor([ 0.4252, 0.4294, 0.5904, -0.2023]) tensor([0.2690, 0.2701, 0.3173, 0.1436]) -Greedy action tensor([0.5906, 1.0507, 0.3565, 0.3268]) tensor([0.2413, 0.3823, 0.1910, 0.1854]) -Greedy action tensor([ 0.5165, -1.3782, -0.2108, 1.0088]) tensor([0.3058, 0.0460, 0.1478, 0.5004]) -Greedy action tensor([ 1.2956, -1.5379, 0.3707, 1.4105]) tensor([0.3880, 0.0228, 0.1539, 0.4353]) -Greedy action tensor([0.7382, 0.3784, 0.5986, 0.6693]) tensor([0.2856, 0.1993, 0.2484, 0.2666]) -Greedy action tensor([ 1.1964, -0.0580, -1.2700, 1.0166]) tensor([0.4534, 0.1293, 0.0385, 0.3788]) -Greedy action tensor([ 1.2222, 0.4062, -0.6567, 0.7297]) tensor([0.4533, 0.2004, 0.0692, 0.2770]) -Greedy action tensor([-0.0717, 0.2013, -0.2504, -1.1109]) tensor([0.2854, 0.3750, 0.2387, 0.1009]) -Greedy action tensor([ 1.0677, 1.1013, 0.0602, -0.2510]) tensor([0.3750, 0.3878, 0.1369, 0.1003]) -Greedy action tensor([ 1.4519, 0.7244, -0.0825, 0.2272]) tensor([0.5019, 0.2425, 0.1082, 0.1475]) -Greedy action tensor([ 0.3600, 0.4231, -1.1506, 0.4761]) tensor([0.2934, 0.3124, 0.0648, 0.3294]) -Greedy action tensor([-0.3347, 1.3189, -0.1912, 0.4371]) tensor([0.1048, 0.5476, 0.1210, 0.2267]) -Greedy action tensor([ 1.1045, -0.9640, -0.5159, 1.5158]) tensor([0.3530, 0.0446, 0.0698, 0.5326]) -Greedy action tensor([-0.4701, -0.1578, -0.8609, 0.4007]) tensor([0.1841, 0.2516, 0.1245, 0.4398]) -Greedy action tensor([ 0.4051, 1.1813, -0.6890, 0.0620]) tensor([0.2371, 0.5153, 0.0794, 0.1682]) -Greedy action tensor([ 0.6492, -0.8980, 1.5781, -0.0780]) tensor([0.2365, 0.0503, 0.5988, 0.1143]) -Greedy action tensor([ 0.0022, -0.5152, 0.1808, -0.6092]) tensor([0.2999, 0.1788, 0.3586, 0.1627]) -Greedy action tensor([ 0.6324, -0.2540, -0.6655, 1.4945]) tensor([0.2467, 0.1017, 0.0674, 0.5842]) -Greedy action tensor([1.1860, 0.3950, 0.3064, 0.3671]) tensor([0.4330, 0.1963, 0.1797, 0.1909]) -Greedy action tensor([ 0.0509, 0.5774, -0.2155, 1.0896]) tensor([0.1591, 0.2694, 0.1219, 0.4496]) -Greedy action tensor([ 1.0458, -0.5611, 0.6196, 1.4000]) tensor([0.3050, 0.0612, 0.1992, 0.4346]) -Greedy action tensor([ 0.8145, -0.6425, 0.4857, 0.1465]) tensor([0.4056, 0.0945, 0.2920, 0.2080]) -Greedy action tensor([ 0.9312, 0.3700, -0.8005, 1.2351]) tensor([0.3223, 0.1839, 0.0570, 0.4368]) -Greedy action tensor([ 1.6271, -0.7452, -0.5186, 1.3041]) tensor([0.5170, 0.0482, 0.0605, 0.3743]) -Greedy action tensor([0.5021, 0.1813, 1.3673, 0.9903]) tensor([0.1745, 0.1266, 0.4145, 0.2843]) -Greedy action tensor([1.4971, 0.1409, 0.0604, 0.8937]) tensor([0.4897, 0.1262, 0.1164, 0.2678]) -Greedy action tensor([ 1.6530, 0.1525, 0.7460, -0.1893]) tensor([0.5602, 0.1249, 0.2262, 0.0888]) -Greedy action tensor([ 1.5426, -0.4230, -0.5474, 0.8796]) tensor([0.5621, 0.0787, 0.0695, 0.2897]) -Greedy action tensor([0.7701, 0.0405, 1.2228, 0.2237]) tensor([0.2752, 0.1327, 0.4328, 0.1594]) -Greedy action tensor([1.7873, 0.1258, 0.1941, 0.5478]) tensor([0.5943, 0.1128, 0.1208, 0.1721]) -Greedy action tensor([ 0.8682, -0.8433, 0.6133, 1.8195]) tensor([0.2200, 0.0397, 0.1705, 0.5697]) -Greedy action tensor([ 0.9871, -0.9686, 1.1238, -0.3180]) tensor([0.3908, 0.0553, 0.4480, 0.1060]) -Greedy action tensor([-0.1717, -2.0633, -0.0626, 0.8047]) tensor([0.2032, 0.0307, 0.2266, 0.5395]) -Greedy action tensor([ 0.4011, -1.2932, 1.3725, 1.7961]) tensor([0.1272, 0.0234, 0.3361, 0.5133]) -Greedy action tensor([1.5708, 0.4457, 0.8466, 1.0544]) tensor([0.4156, 0.1349, 0.2015, 0.2480]) -Greedy action tensor([ 0.0205, -1.6998, -0.4786, 1.2357]) tensor([0.1939, 0.0347, 0.1177, 0.6537]) -Greedy action tensor([ 0.1989, 0.2302, -0.0686, 0.0526]) tensor([0.2731, 0.2818, 0.2090, 0.2360]) -Greedy action tensor([ 0.4735, -1.1194, 0.1829, 0.7517]) tensor([0.3056, 0.0621, 0.2286, 0.4037]) -Greedy action tensor([ 1.2133, -0.7202, 0.2417, 1.1458]) tensor([0.4069, 0.0589, 0.1540, 0.3803]) -Greedy action tensor([ 0.2907, -0.3223, 0.8975, -0.3361]) tensor([0.2557, 0.1385, 0.4691, 0.1366]) -Greedy action tensor([ 0.4011, -0.9284, 1.0642, -0.2317]) tensor([0.2676, 0.0708, 0.5194, 0.1421]) -Greedy action tensor([ 0.8045, -0.5871, 0.4645, -0.2229]) tensor([0.4313, 0.1073, 0.3070, 0.1544]) -Greedy action tensor([1.8079, 0.3627, 0.7230, 0.6826]) tensor([0.5268, 0.1242, 0.1780, 0.1710]) -Greedy action tensor([ 0.7885, 0.0833, 1.2605, -0.3760]) tensor([0.2933, 0.1449, 0.4702, 0.0915]) -Greedy action tensor([-0.2325, 0.2973, 0.6821, -0.8970]) tensor([0.1752, 0.2975, 0.4372, 0.0901]) -Greedy action tensor([ 1.0508, -1.1605, 0.3435, 1.5189]) tensor([0.3125, 0.0342, 0.1541, 0.4991]) -Greedy action tensor([ 2.4693e-01, 5.0790e-02, 4.9566e-01, -4.7567e-04]) tensor([0.2574, 0.2115, 0.3301, 0.2010]) -Greedy action tensor([ 0.0820, -0.2079, 0.2269, -0.0474]) tensor([0.2644, 0.1978, 0.3056, 0.2323]) -Greedy action tensor([ 1.3807, -0.4685, 1.8070, 0.8629]) tensor([0.3044, 0.0479, 0.4663, 0.1814]) -Greedy action tensor([ 0.4839, 0.3027, -0.3392, 0.9340]) tensor([0.2603, 0.2172, 0.1143, 0.4083]) -Greedy action tensor([0.4041, 0.3302, 0.6248, 0.2858]) tensor([0.2461, 0.2285, 0.3068, 0.2186]) -Greedy action tensor([ 1.5512, -1.5712, -0.1391, 0.6224]) tensor([0.6160, 0.0271, 0.1136, 0.2433]) -Greedy action tensor([0.2118, 0.2111, 1.3214, 0.7105]) tensor([0.1497, 0.1496, 0.4541, 0.2465]) -Greedy action tensor([0.7484, 0.6346, 0.9408, 0.6180]) tensor([0.2511, 0.2241, 0.3044, 0.2204]) -Greedy action tensor([ 1.1959, -1.2711, -0.1456, 1.0186]) tensor([0.4579, 0.0388, 0.1197, 0.3835]) -Greedy action tensor([ 1.5666, -0.2329, 0.0035, -0.1150]) tensor([0.6406, 0.1060, 0.1342, 0.1192]) -Greedy action tensor([ 0.0028, -1.6433, 0.7517, 1.0952]) tensor([0.1590, 0.0307, 0.3362, 0.4741]) -Greedy action tensor([ 0.7445, -0.2727, -0.0790, 0.1483]) tensor([0.4253, 0.1538, 0.1866, 0.2343]) -Greedy action tensor([ 1.0721, -0.3189, -0.1464, 0.1506]) tensor([0.5148, 0.1281, 0.1522, 0.2048]) -Greedy action tensor([ 0.9889, -0.4401, -0.1199, 0.2870]) tensor([0.4842, 0.1160, 0.1598, 0.2400]) -Greedy action tensor([ 1.3916, -0.7365, -0.3583, -0.0123]) tensor([0.6500, 0.0774, 0.1130, 0.1597]) -Greedy action tensor([ 1.2990, -0.3327, -0.2046, -0.0252]) tensor([0.5938, 0.1162, 0.1320, 0.1580]) -Greedy action tensor([ 0.8056, -0.5783, -0.3436, 0.6606]) tensor([0.4111, 0.1030, 0.1303, 0.3556]) -Greedy action tensor([ 0.5645, -0.3869, -0.1001, 0.1614]) tensor([0.3893, 0.1503, 0.2003, 0.2601]) -Greedy action tensor([ 0.8405, -0.3193, -0.2486, -0.1522]) tensor([0.4949, 0.1552, 0.1665, 0.1834]) -Greedy action tensor([ 1.1365, -0.4256, -0.2166, -0.3726]) tensor([0.5920, 0.1241, 0.1530, 0.1309]) -Greedy action tensor([ 0.6983, -0.2833, 0.2582, -0.1049]) tensor([0.4054, 0.1519, 0.2611, 0.1816]) -Greedy action tensor([ 1.4103, -0.5476, -0.1294, 0.3093]) tensor([0.5924, 0.0836, 0.1270, 0.1970]) -Greedy action tensor([ 1.2899, -0.3599, -0.0293, 0.0833]) tensor([0.5686, 0.1092, 0.1520, 0.1701]) -Greedy action tensor([ 0.9007, -0.3406, -0.3865, -0.1805]) tensor([0.5251, 0.1518, 0.1450, 0.1781]) -Greedy action tensor([ 0.9392, -0.2054, 0.0192, 0.2932]) tensor([0.4462, 0.1421, 0.1778, 0.2339]) -Greedy action tensor([ 1.0509, -0.5073, -0.3094, 0.2878]) tensor([0.5172, 0.1089, 0.1327, 0.2412]) -Greedy action tensor([ 1.4294, -0.1128, -0.6737, -0.2348]) tensor([0.6556, 0.1402, 0.0800, 0.1241]) -Greedy action tensor([ 1.1042, -0.4346, -0.2103, -0.1648]) tensor([0.5668, 0.1217, 0.1522, 0.1593]) -Greedy action tensor([ 0.6848, -0.5603, -0.0061, -0.0419]) tensor([0.4400, 0.1267, 0.2205, 0.2128]) -Greedy action tensor([ 1.2061, -0.3009, -0.1474, 0.1453]) tensor([0.5476, 0.1213, 0.1415, 0.1896]) -Greedy action tensor([ 1.3979, -0.7778, -0.3179, 0.6687]) tensor([0.5632, 0.0639, 0.1013, 0.2716]) -Greedy action tensor([ 1.6636, -0.7303, -0.2370, 0.2303]) tensor([0.6760, 0.0617, 0.1011, 0.1612]) -Greedy action tensor([ 1.4118, -0.6985, -0.0890, 0.1290]) tensor([0.6167, 0.0748, 0.1375, 0.1710]) -Greedy action tensor([ 1.0205, -0.5016, -0.4449, 0.3530]) tensor([0.5096, 0.1112, 0.1177, 0.2614]) -Greedy action tensor([ 1.4007, -0.3387, 0.0467, 0.2186]) tensor([0.5746, 0.1009, 0.1484, 0.1762]) -Greedy action tensor([ 0.4145, -0.2824, 0.0946, -0.0920]) tensor([0.3537, 0.1762, 0.2569, 0.2132]) -Greedy action tensor([ 1.1930, -0.2285, -0.4549, -0.1043]) tensor([0.5858, 0.1414, 0.1127, 0.1601]) -Greedy action tensor([ 0.8274, -0.3360, -0.2531, -0.0692]) tensor([0.4855, 0.1517, 0.1648, 0.1980]) -Greedy action tensor([ 1.1506, -0.6605, -0.2833, -0.1352]) tensor([0.5958, 0.0974, 0.1420, 0.1647]) -Greedy action tensor([ 1.1468, -0.3505, -0.5674, 0.0428]) tensor([0.5762, 0.1289, 0.1038, 0.1911]) -Greedy action tensor([ 0.9854, -0.7029, -0.0785, 0.2062]) tensor([0.5028, 0.0929, 0.1735, 0.2307]) -Greedy action tensor([ 0.5644, -0.2268, -0.1122, 0.0035]) tensor([0.3949, 0.1790, 0.2007, 0.2254]) -Greedy action tensor([ 0.9366, -0.5089, -0.3652, 0.3679]) tensor([0.4822, 0.1136, 0.1312, 0.2730]) -Greedy action tensor([ 1.0025, -0.5921, -0.3751, 0.2457]) tensor([0.5196, 0.1055, 0.1310, 0.2438]) -Greedy action tensor([ 1.1991, -0.7788, -0.4517, 0.5301]) tensor([0.5427, 0.0751, 0.1041, 0.2780]) -Greedy action tensor([ 1.0128, -0.3822, -0.2299, 0.1348]) tensor([0.5123, 0.1270, 0.1478, 0.2129]) -Greedy action tensor([ 1.2934, -0.4777, -0.2150, -0.0797]) tensor([0.6080, 0.1034, 0.1345, 0.1540]) -Greedy action tensor([ 0.8099, -0.5998, -0.1367, 0.0659]) tensor([0.4745, 0.1159, 0.1841, 0.2255]) -Greedy action tensor([ 0.8351, -0.3608, -0.1438, -0.2579]) tensor([0.4967, 0.1502, 0.1866, 0.1665]) -Greedy action tensor([ 1.0362, -0.1467, -0.1453, 0.0427]) tensor([0.5042, 0.1545, 0.1547, 0.1867]) -Greedy action tensor([ 0.8880, -0.3271, 0.0246, -0.1110]) tensor([0.4792, 0.1422, 0.2021, 0.1765]) -Greedy action tensor([ 0.5190, -0.2670, -0.0430, 0.1302]) tensor([0.3699, 0.1685, 0.2108, 0.2507]) -Greedy action tensor([ 1.2265, -0.5607, -0.6471, 0.5558]) tensor([0.5458, 0.0914, 0.0838, 0.2791]) -Greedy action tensor([ 1.0398, -0.3735, -0.7195, 0.4552]) tensor([0.5069, 0.1233, 0.0873, 0.2825]) -Greedy action tensor([ 1.4592, -0.3726, -0.4700, 0.1749]) tensor([0.6320, 0.1012, 0.0918, 0.1750]) -Greedy action tensor([ 1.0881, -0.4640, -0.2382, 0.2985]) tensor([0.5178, 0.1097, 0.1375, 0.2351]) -Greedy action tensor([ 0.3291, -0.2262, -0.3595, 0.1197]) tensor([0.3464, 0.1988, 0.1740, 0.2809]) -Greedy action tensor([ 1.2743, -0.5080, -0.2662, -0.0705]) tensor([0.6086, 0.1024, 0.1304, 0.1586]) -Greedy action tensor([ 1.3171, -0.3319, -0.2408, 0.2170]) tensor([0.5761, 0.1108, 0.1213, 0.1918]) -Greedy action tensor([ 1.0776, -0.4799, -0.3566, 0.3960]) tensor([0.5116, 0.1078, 0.1219, 0.2588]) -Greedy action tensor([ 1.3154, -0.5091, 0.0851, 0.3076]) tensor([0.5499, 0.0887, 0.1607, 0.2007]) -Greedy action tensor([ 1.1830, -0.0782, 0.0080, -0.3543]) tensor([0.5534, 0.1568, 0.1709, 0.1190]) -Greedy action tensor([ 0.9890, -0.0677, -0.0258, -0.0699]) tensor([0.4862, 0.1690, 0.1762, 0.1686]) -Greedy action tensor([ 1.3029, -0.7234, -0.3720, 0.3178]) tensor([0.5908, 0.0779, 0.1107, 0.2206]) -Greedy action tensor([ 0.9770, -0.4288, -0.3508, 0.6286]) tensor([0.4513, 0.1106, 0.1196, 0.3185]) -Greedy action tensor([ 1.6302, -0.0776, -0.1009, -0.1314]) tensor([0.6535, 0.1185, 0.1157, 0.1123]) -Greedy action tensor([ 1.4353, -0.7678, 0.0265, 0.1856]) tensor([0.6092, 0.0673, 0.1489, 0.1746]) -Greedy action tensor([ 1.3354, -0.3812, -0.3177, 0.1832]) tensor([0.5927, 0.1065, 0.1135, 0.1873]) -Greedy action tensor([ 1.4566, -0.9083, -0.3411, 0.8154]) tensor([0.5598, 0.0526, 0.0928, 0.2948]) -Greedy action tensor([ 0.7596, -0.3156, -0.3978, 0.1550]) tensor([0.4542, 0.1550, 0.1427, 0.2481]) -Greedy action tensor([ 1.0390, -0.5887, -0.3703, 0.2965]) tensor([0.5217, 0.1025, 0.1275, 0.2483]) -Greedy action tensor([ 1.4994, -0.7777, -0.0466, 0.4765]) tensor([0.5969, 0.0612, 0.1272, 0.2146]) -Greedy action tensor([ 1.1893, -0.5413, -0.3891, 0.1705]) tensor([0.5732, 0.1016, 0.1183, 0.2069]) -Greedy action tensor([ 1.0512, -0.3259, -0.2136, -0.2455]) tensor([0.5531, 0.1395, 0.1561, 0.1512]) -Greedy action tensor([ 1.3533, -0.6028, -0.2260, 0.2258]) tensor([0.5983, 0.0846, 0.1233, 0.1938]) -Greedy action tensor([ 0.7238, -0.2686, -0.1309, 0.0535]) tensor([0.4333, 0.1606, 0.1844, 0.2217]) -Greedy action tensor([ 1.1901, -0.5105, -0.1340, 0.1427]) tensor([0.5557, 0.1015, 0.1478, 0.1950]) -Greedy action tensor([ 1.2280, -0.6504, -0.1310, 0.1699]) tensor([0.5692, 0.0870, 0.1462, 0.1976]) -Greedy action tensor([ 1.1142, -0.3700, -0.2950, -0.1861]) tensor([0.5736, 0.1300, 0.1401, 0.1563]) -Greedy action tensor([ 0.4379, 0.0297, 0.0271, -0.2955]) tensor([0.3561, 0.2367, 0.2361, 0.1710]) -Greedy action tensor([ 1.6437, -0.6292, -0.3164, 0.5014]) tensor([0.6398, 0.0659, 0.0901, 0.2042]) -Greedy action tensor([ 0.5143, -0.2730, -0.0195, -0.0018]) tensor([0.3790, 0.1725, 0.2223, 0.2262]) -Greedy action tensor([ 0.3594, 0.1148, 0.0279, -0.2797]) tensor([0.3302, 0.2585, 0.2370, 0.1743]) -Greedy action tensor([ 1.2883, -0.3261, 0.0444, -0.0701]) tensor([0.5733, 0.1141, 0.1652, 0.1474]) -Greedy action tensor([ 1.0522, -0.7534, -0.3582, 0.8231]) tensor([0.4538, 0.0746, 0.1107, 0.3609]) -Greedy action tensor([ 0.9768, -0.1541, 0.0432, -0.0897]) tensor([0.4854, 0.1567, 0.1908, 0.1671]) -Greedy action tensor([ 1.1616, -0.3512, -0.5342, 0.3092]) tensor([0.5464, 0.1204, 0.1002, 0.2330]) -Greedy action tensor([ 0.7362, -0.2839, -0.3223, 0.2017]) tensor([0.4360, 0.1572, 0.1513, 0.2555]) -Greedy action tensor([ 0.7764, -0.2246, 0.1238, -0.1880]) tensor([0.4406, 0.1619, 0.2294, 0.1680]) -Greedy action tensor([ 0.3156, -0.0379, -0.2720, 0.1322]) tensor([0.3236, 0.2272, 0.1798, 0.2694]) -Greedy action tensor([ 1.3390, -0.6477, -0.1136, 0.2674]) tensor([0.5836, 0.0800, 0.1365, 0.1998]) -Greedy action tensor([ 1.8185, -0.6972, -0.2611, 0.3034]) tensor([0.7015, 0.0567, 0.0877, 0.1542]) -Greedy action tensor([ 0.5884, -0.2797, -0.0987, -0.4186]) tensor([0.4370, 0.1835, 0.2199, 0.1597]) -Greedy action tensor([ 0.4138, -0.0656, -0.0763, -0.2103]) tensor([0.3613, 0.2237, 0.2213, 0.1936]) -Greedy action tensor([ 0.3798, -0.0517, 0.0185, -0.1983]) tensor([0.3440, 0.2234, 0.2397, 0.1930]) -Greedy action tensor([ 0.7331, -0.4565, 0.0501, -0.3370]) tensor([0.4646, 0.1414, 0.2347, 0.1593]) -Greedy action tensor([ 0.5068, -0.2122, 0.1100, -0.2378]) tensor([0.3796, 0.1849, 0.2552, 0.1803]) -Greedy action tensor([ 0.5896, -0.2316, -0.0118, -0.3450]) tensor([0.4201, 0.1848, 0.2302, 0.1650]) -Greedy action tensor([ 0.5177, -0.2882, -0.0896, -0.3596]) tensor([0.4154, 0.1856, 0.2263, 0.1728]) -Greedy action tensor([ 0.5912, -0.2556, -0.0533, -0.2095]) tensor([0.4162, 0.1785, 0.2185, 0.1869]) -Greedy action tensor([ 0.4704, -0.5424, 0.2312, -0.4077]) tensor([0.3897, 0.1415, 0.3068, 0.1620]) -Greedy action tensor([ 0.6436, -0.2934, -0.1262, -0.3232]) tensor([0.4474, 0.1753, 0.2072, 0.1701]) -Greedy action tensor([ 0.9694, -0.2745, -0.1038, -0.4031]) tensor([0.5309, 0.1530, 0.1815, 0.1346]) -Greedy action tensor([ 0.3963, 0.2570, -0.1241, -0.1937]) tensor([0.3313, 0.2882, 0.1969, 0.1836]) -Greedy action tensor([ 0.3673, 0.0221, 0.1213, -0.1869]) tensor([0.3263, 0.2311, 0.2551, 0.1875]) -Greedy action tensor([ 0.5118, -0.2239, -0.0811, -0.3386]) tensor([0.4067, 0.1948, 0.2248, 0.1737]) -Greedy action tensor([ 0.5761, -0.3375, -0.0910, -0.2047]) tensor([0.4215, 0.1691, 0.2163, 0.1931]) -Greedy action tensor([ 0.9003, -0.7836, -0.1919, -0.5177]) tensor([0.5671, 0.1053, 0.1903, 0.1374]) -Greedy action tensor([ 0.7432, -0.4325, -0.0312, -0.4067]) tensor([0.4793, 0.1479, 0.2210, 0.1518]) -Greedy action tensor([ 0.4119, -0.0502, -0.0774, -0.0946]) tensor([0.3514, 0.2214, 0.2154, 0.2118]) -Greedy action tensor([ 0.2001, 0.0833, -0.0565, -0.2012]) tensor([0.3000, 0.2670, 0.2321, 0.2009]) -Greedy action tensor([ 0.7338, -0.5486, 0.0696, -0.4602]) tensor([0.4773, 0.1324, 0.2457, 0.1446]) -Greedy action tensor([ 0.9158, -0.4960, -0.1625, -0.7349]) tensor([0.5631, 0.1372, 0.1916, 0.1081]) -Greedy action tensor([ 8.3746e-01, -5.4506e-01, 6.5976e-04, -5.3908e-01]) tensor([0.5164, 0.1296, 0.2236, 0.1304]) -Greedy action tensor([ 0.4002, -0.0780, 0.0260, -0.2128]) tensor([0.3509, 0.2175, 0.2414, 0.1901]) -Greedy action tensor([ 0.4061, 0.1502, -0.1321, -0.0024]) tensor([0.3308, 0.2561, 0.1931, 0.2199]) -Greedy action tensor([ 0.6821, -0.3787, -0.0572, -0.6336]) tensor([0.4780, 0.1655, 0.2282, 0.1283]) -Greedy action tensor([ 0.4564, -0.1707, -0.0336, -0.2032]) tensor([0.3754, 0.2005, 0.2300, 0.1941]) -Greedy action tensor([ 0.6534, -0.4640, -0.0922, -0.4124]) tensor([0.4660, 0.1524, 0.2211, 0.1605]) -Greedy action tensor([ 0.5877, -0.3508, -0.0362, -0.3987]) tensor([0.4348, 0.1701, 0.2330, 0.1621]) -Greedy action tensor([ 0.3542, -0.2225, -0.1584, -0.4192]) tensor([0.3814, 0.2142, 0.2284, 0.1760]) -Greedy action tensor([ 0.2303, -0.0717, -0.0809, -0.4932]) tensor([0.3382, 0.2500, 0.2478, 0.1640]) -Greedy action tensor([ 0.3228, 0.0385, 0.0606, -0.1928]) tensor([0.3206, 0.2413, 0.2467, 0.1914]) -Greedy action tensor([ 0.7397, -0.6537, 0.0277, -0.4647]) tensor([0.4905, 0.1218, 0.2407, 0.1471]) -Greedy action tensor([ 0.6322, -0.4183, -0.1221, -0.3172]) tensor([0.4531, 0.1585, 0.2131, 0.1753]) -Greedy action tensor([ 0.6181, -0.4738, -0.0354, -0.1864]) tensor([0.4342, 0.1457, 0.2259, 0.1942]) -Greedy action tensor([ 0.3297, -0.0215, -0.1097, -0.2879]) tensor([0.3463, 0.2437, 0.2232, 0.1868]) -Greedy action tensor([ 0.2438, -0.0561, 0.1133, -0.3190]) tensor([0.3137, 0.2324, 0.2753, 0.1787]) -Greedy action tensor([ 0.5199, -0.2068, -0.0225, -0.1589]) tensor([0.3888, 0.1880, 0.2260, 0.1972]) -Greedy action tensor([ 0.5514, -0.1747, -0.0626, -0.3684]) tensor([0.4126, 0.1996, 0.2233, 0.1645]) -Greedy action tensor([ 0.3243, -0.0053, -0.0085, -0.2111]) tensor([0.3310, 0.2380, 0.2373, 0.1938]) -Greedy action tensor([ 0.7333, -0.5361, -0.1183, -0.4779]) tensor([0.4986, 0.1401, 0.2128, 0.1485]) -Greedy action tensor([ 0.4375, -0.3639, 0.1470, -0.3323]) tensor([0.3760, 0.1687, 0.2812, 0.1741]) -Greedy action tensor([ 0.8500, -0.1831, 0.0063, -0.4526]) tensor([0.4860, 0.1730, 0.2090, 0.1321]) -Greedy action tensor([ 0.5118, -0.3420, -0.2232, -0.6690]) tensor([0.4520, 0.1925, 0.2167, 0.1388]) -Greedy action tensor([ 0.5090, -0.0146, -0.0569, -0.1138]) tensor([0.3708, 0.2197, 0.2106, 0.1989]) -Greedy action tensor([ 0.1324, -0.1221, 0.1040, -0.1308]) tensor([0.2844, 0.2205, 0.2765, 0.2186]) -Greedy action tensor([ 0.7910, -0.7527, -0.0745, -0.7510]) tensor([0.5410, 0.1156, 0.2277, 0.1158]) -Greedy action tensor([ 1.0722, -0.2624, -0.0084, -0.4172]) tensor([0.5470, 0.1440, 0.1856, 0.1233]) -Greedy action tensor([ 0.1287, -0.1625, 0.0994, -0.2061]) tensor([0.2912, 0.2176, 0.2828, 0.2084]) -Greedy action tensor([ 0.8378, -0.5966, 0.0923, -0.3614]) tensor([0.4965, 0.1183, 0.2356, 0.1497]) -Greedy action tensor([ 0.5243, -0.5528, 0.1718, -0.4686]) tensor([0.4143, 0.1411, 0.2912, 0.1535]) -Greedy action tensor([ 0.7834, -0.5673, -0.1579, -0.5115]) tensor([0.5200, 0.1347, 0.2029, 0.1424]) -Greedy action tensor([ 0.8013, -0.4481, -0.0906, -0.3928]) tensor([0.5001, 0.1434, 0.2050, 0.1515]) -Greedy action tensor([ 0.6067, -0.3993, -0.0853, -0.2706]) tensor([0.4382, 0.1602, 0.2193, 0.1822]) -Greedy action tensor([ 0.3190, -0.2214, 0.0166, -0.3839]) tensor([0.3550, 0.2068, 0.2624, 0.1758]) -Greedy action tensor([0.3220, 0.2208, 0.0477, 0.0802]) tensor([0.2899, 0.2620, 0.2204, 0.2277]) -Greedy action tensor([ 0.3588, -0.0825, -0.0942, -0.1611]) tensor([0.3480, 0.2238, 0.2212, 0.2069]) -Greedy action tensor([ 0.6507, -0.2909, -0.0593, -0.4014]) tensor([0.4483, 0.1748, 0.2204, 0.1565]) -Greedy action tensor([ 0.7689, 0.0328, -0.1395, -0.3209]) tensor([0.4508, 0.2159, 0.1817, 0.1516]) -Greedy action tensor([ 0.5282, -0.3017, -0.0796, -0.5076]) tensor([0.4282, 0.1867, 0.2331, 0.1520]) -Greedy action tensor([ 0.5160, -0.2907, 0.0280, -0.4850]) tensor([0.4119, 0.1838, 0.2529, 0.1514]) -Greedy action tensor([ 0.9885, -0.4246, -0.0310, -0.2381]) tensor([0.5270, 0.1283, 0.1901, 0.1546]) -Greedy action tensor([ 0.5842, -0.2803, -0.0363, -0.2332]) tensor([0.4166, 0.1755, 0.2240, 0.1839]) -Greedy action tensor([ 0.4858, -0.1694, -0.0020, -0.2634]) tensor([0.3837, 0.1993, 0.2356, 0.1814]) -Greedy action tensor([ 0.4681, 0.1345, 0.0486, -0.3455]) tensor([0.3550, 0.2543, 0.2334, 0.1574]) -Greedy action tensor([ 1.0467, -0.9876, -0.0635, -0.5491]) tensor([0.6013, 0.0786, 0.1981, 0.1219]) -Greedy action tensor([ 0.6958, -0.2702, -0.0804, -0.2606]) tensor([0.4494, 0.1711, 0.2068, 0.1727]) -Greedy action tensor([ 0.7940, -0.4315, -0.0251, -0.5450]) tensor([0.5009, 0.1471, 0.2208, 0.1313]) -Greedy action tensor([ 0.8393, -0.6546, -0.0079, -0.4812]) tensor([0.5208, 0.1169, 0.2232, 0.1391]) -Greedy action tensor([ 1.1746, -0.7878, -0.0150, -0.7577]) tensor([0.6291, 0.0884, 0.1915, 0.0911]) -Greedy action tensor([ 0.6110, -0.5770, 0.0147, -0.3244]) tensor([0.4448, 0.1356, 0.2450, 0.1746]) -Greedy action tensor([ 0.7181, -0.3982, -0.1534, -0.3703]) tensor([0.4802, 0.1572, 0.2009, 0.1617]) -Greedy action tensor([ 0.7469, -0.2214, -0.1170, -0.5345]) tensor([0.4810, 0.1826, 0.2027, 0.1336]) -Greedy action tensor([ 0.5666, -0.4003, 0.0909, -0.4606]) tensor([0.4238, 0.1612, 0.2634, 0.1517]) -Greedy action tensor([ 0.5291, 0.2356, -0.1209, -0.2147]) tensor([0.3646, 0.2718, 0.1903, 0.1733]) -Greedy action tensor([ 0.9869, -0.5785, 0.1234, -0.6215]) tensor([0.5462, 0.1142, 0.2303, 0.1094]) -Greedy action tensor([ 0.6111, -0.4224, 0.0564, -0.3258]) tensor([0.4307, 0.1532, 0.2473, 0.1688]) -Greedy action tensor([ 0.6662, -0.3483, -0.0440, -0.3099]) tensor([0.4483, 0.1625, 0.2203, 0.1689]) -Greedy action tensor([ 0.8924, -0.6738, 0.1567, -0.3598]) tensor([0.5066, 0.1058, 0.2427, 0.1448]) -Greedy action tensor([ 0.5157, -0.0798, 0.0998, -0.3203]) tensor([0.3782, 0.2085, 0.2495, 0.1639]) -Greedy action tensor([ 0.6747, -0.4030, -0.0450, -0.4728]) tensor([0.4663, 0.1587, 0.2270, 0.1480]) -Greedy action tensor([ 0.8316, -0.7984, -0.1054, -0.3168]) tensor([0.5250, 0.1029, 0.2057, 0.1665]) -Greedy action tensor([-1.8046, -0.4541, 0.5941, -0.1129]) tensor([0.0470, 0.1812, 0.5169, 0.2549]) -Greedy action tensor([-1.4854, -0.5115, 0.5154, 0.3137]) tensor([0.0585, 0.1550, 0.4328, 0.3537]) -Greedy action tensor([-1.8779, -0.4574, 0.6561, -0.0845]) tensor([0.0421, 0.1743, 0.5306, 0.2530]) -Greedy action tensor([-1.4843, -0.0478, 0.5175, 0.3081]) tensor([0.0537, 0.2260, 0.3977, 0.3226]) -Greedy action tensor([-1.8755, -0.4516, 0.6807, -0.0201]) tensor([0.0409, 0.1700, 0.5274, 0.2617]) -Greedy action tensor([-1.3440, -0.7189, 0.8132, 0.8445]) tensor([0.0489, 0.0914, 0.4231, 0.4365]) -Greedy action tensor([-0.7802, -0.6890, 1.0119, 1.6369]) tensor([0.0518, 0.0567, 0.3108, 0.5807]) -Greedy action tensor([-1.5897, -0.5410, 0.4992, 0.0221]) tensor([0.0590, 0.1685, 0.4767, 0.2958]) -Greedy action tensor([-1.7640, -0.7302, 0.2350, -0.2889]) tensor([0.0642, 0.1807, 0.4743, 0.2808]) -Greedy action tensor([-2.0042, -0.4802, 0.8842, 0.3105]) tensor([0.0297, 0.1363, 0.5334, 0.3006]) -Greedy action tensor([-1.4683, -0.4913, 0.7610, -0.5197]) tensor([0.0644, 0.1710, 0.5983, 0.1662]) -Greedy action tensor([-0.8194, -0.5116, 0.3103, 0.3730]) tensor([0.1143, 0.1555, 0.3537, 0.3766]) -Greedy action tensor([-1.3391, -0.5133, 0.4137, 0.4587]) tensor([0.0663, 0.1513, 0.3824, 0.4000]) -Greedy action tensor([-1.0668, -0.2515, 0.5074, 0.9205]) tensor([0.0650, 0.1469, 0.3138, 0.4743]) -Greedy action tensor([-1.7234, -0.5218, 0.9428, 0.4646]) tensor([0.0362, 0.1204, 0.5207, 0.3228]) -Greedy action tensor([-1.4730, -0.5565, 0.4116, 0.1034]) tensor([0.0670, 0.1676, 0.4412, 0.3242]) -Greedy action tensor([-1.3241, -0.5538, 0.3524, 0.2410]) tensor([0.0752, 0.1626, 0.4023, 0.3599]) -Greedy action tensor([-1.0201, -0.6276, 1.2412, 1.4853]) tensor([0.0411, 0.0609, 0.3945, 0.5035]) -Greedy action tensor([-1.8973, -0.4698, 0.6640, -0.1325]) tensor([0.0417, 0.1740, 0.5406, 0.2437]) -Greedy action tensor([-1.0405, -0.0111, 0.2926, -0.0467]) tensor([0.0971, 0.2720, 0.3685, 0.2624]) -Greedy action tensor([-2.0140, -0.8085, 0.8729, 0.0424]) tensor([0.0332, 0.1109, 0.5961, 0.2598]) -Greedy action tensor([-1.1375, -0.5488, 0.3116, 0.3570]) tensor([0.0868, 0.1564, 0.3698, 0.3870]) -Greedy action tensor([-1.8586, -0.4721, 0.7206, 0.0248]) tensor([0.0404, 0.1616, 0.5325, 0.2655]) -Greedy action tensor([-1.8831, -0.4094, 0.6913, 0.0567]) tensor([0.0393, 0.1716, 0.5157, 0.2734]) -Greedy action tensor([-1.4411, 0.0322, 0.4034, 0.4015]) tensor([0.0556, 0.2424, 0.3514, 0.3507]) -Greedy action tensor([-1.0082, -0.3187, 0.6891, 1.2207]) tensor([0.0564, 0.1123, 0.3077, 0.5236]) -Greedy action tensor([-1.7576, -0.6351, 1.3944, 0.9248]) tensor([0.0238, 0.0730, 0.5557, 0.3475]) -Greedy action tensor([-1.2556, -0.5557, 0.7335, 1.0350]) tensor([0.0495, 0.0997, 0.3618, 0.4891]) -Greedy action tensor([-1.3274, 0.3324, 0.3174, -0.0239]) tensor([0.0661, 0.3478, 0.3426, 0.2435]) -Greedy action tensor([-2.0210, -0.8443, 0.5559, -0.0156]) tensor([0.0403, 0.1306, 0.5299, 0.2992]) -Greedy action tensor([-0.9327, -0.0646, -0.4401, -0.4430]) tensor([0.1504, 0.3582, 0.2461, 0.2454]) -Greedy action tensor([-1.2494, 0.4761, 0.2419, 0.0520]) tensor([0.0679, 0.3811, 0.3016, 0.2494]) -Greedy action tensor([-1.9698, -0.4899, 1.1507, 0.4910]) tensor([0.0251, 0.1105, 0.5698, 0.2946]) -Greedy action tensor([-0.9496, -0.5999, 0.1805, 0.3551]) tensor([0.1087, 0.1542, 0.3365, 0.4007]) -Greedy action tensor([-1.2383, -0.5171, 1.1765, 1.3053]) tensor([0.0371, 0.0763, 0.4148, 0.4718]) -Greedy action tensor([-0.7476, -0.5421, 0.5066, 0.6918]) tensor([0.1005, 0.1234, 0.3522, 0.4239]) -Greedy action tensor([-1.8195, -0.4263, 0.6053, -0.0940]) tensor([0.0456, 0.1836, 0.5150, 0.2559]) -Greedy action tensor([-1.5613, 0.1762, 0.3619, 0.0509]) tensor([0.0539, 0.3065, 0.3691, 0.2704]) -Greedy action tensor([-1.9194, -0.4870, 0.7931, 0.0229]) tensor([0.0367, 0.1538, 0.5533, 0.2561]) -Greedy action tensor([-1.5720, -0.5122, 0.5854, 0.3255]) tensor([0.0521, 0.1503, 0.4504, 0.3473]) -Greedy action tensor([-0.0556, -0.0777, 1.0047, 1.5893]) tensor([0.0995, 0.0974, 0.2874, 0.5157]) -Greedy action tensor([-0.9515, -0.6212, 0.5089, 0.0636]) tensor([0.1057, 0.1471, 0.4554, 0.2917]) -Greedy action tensor([-0.5311, -0.4070, 0.2121, 0.2105]) tensor([0.1579, 0.1787, 0.3320, 0.3314]) -Greedy action tensor([-1.2959, -0.4002, 0.7304, 1.0760]) tensor([0.0460, 0.1126, 0.3487, 0.4927]) -Greedy action tensor([-1.5436, -0.5110, 0.5345, 0.1974]) tensor([0.0571, 0.1605, 0.4565, 0.3259]) -Greedy action tensor([-1.1441, -0.3681, 0.6213, 0.7145]) tensor([0.0648, 0.1408, 0.3787, 0.4157]) -Greedy action tensor([-1.3614, -0.4386, 0.5042, 0.4683]) tensor([0.0617, 0.1553, 0.3986, 0.3845]) -Greedy action tensor([-1.0478, -0.5822, 1.2729, 1.4716]) tensor([0.0397, 0.0632, 0.4041, 0.4930]) -Greedy action tensor([-1.5013, -0.4356, 0.7261, 0.7848]) tensor([0.0435, 0.1261, 0.4030, 0.4274]) -Greedy action tensor([-1.4336, -0.8427, -0.2880, -0.4956]) tensor([0.1176, 0.2123, 0.3697, 0.3004]) -Greedy action tensor([-1.6447, -0.5871, 0.5088, -0.0431]) tensor([0.0573, 0.1650, 0.4935, 0.2842]) -Greedy action tensor([-1.4544, -0.6987, 0.4779, 0.2592]) tensor([0.0642, 0.1366, 0.4431, 0.3561]) -Greedy action tensor([-1.7722, -0.4781, 0.7759, 0.2636]) tensor([0.0399, 0.1454, 0.5095, 0.3052]) -Greedy action tensor([-1.1559, -0.3641, 0.2438, 0.2864]) tensor([0.0870, 0.1921, 0.3528, 0.3681]) -Greedy action tensor([-1.6829, -0.5305, 0.5328, -0.0910]) tensor([0.0548, 0.1735, 0.5024, 0.2692]) -Greedy action tensor([-1.8005, -0.3863, 0.5725, -0.0934]) tensor([0.0468, 0.1926, 0.5024, 0.2582]) -Greedy action tensor([-1.3856, -0.1467, 0.5532, 0.6578]) tensor([0.0523, 0.1805, 0.3635, 0.4036]) -Greedy action tensor([-1.2403, -0.5632, 0.2939, 0.3342]) tensor([0.0804, 0.1583, 0.3730, 0.3883]) -Greedy action tensor([-1.7570, -0.5402, 1.2130, 0.8730]) tensor([0.0265, 0.0895, 0.5165, 0.3676]) -Greedy action tensor([-1.5115, -0.5197, 0.4500, 0.1442]) tensor([0.0623, 0.1681, 0.4432, 0.3264]) -Greedy action tensor([-0.7604, -0.3794, 0.8379, 1.4954]) tensor([0.0590, 0.0864, 0.2917, 0.5630]) -Greedy action tensor([-0.9081, -0.5781, 0.1894, 0.3328]) tensor([0.1130, 0.1572, 0.3387, 0.3910]) -Greedy action tensor([-1.8101, -0.4820, 0.7985, 0.1701]) tensor([0.0391, 0.1474, 0.5305, 0.2830]) -Greedy action tensor([-0.9135, -0.3319, 0.5127, 1.1679]) tensor([0.0668, 0.1195, 0.2781, 0.5356]) -Greedy action tensor([-1.3306, -0.3191, 0.4843, -0.2555]) tensor([0.0780, 0.2145, 0.4790, 0.2286]) -Greedy action tensor([-1.8739, -0.4178, 0.6183, -0.1346]) tensor([0.0433, 0.1859, 0.5240, 0.2468]) -Greedy action tensor([-1.5447, 0.0751, 0.6153, 0.4103]) tensor([0.0459, 0.2319, 0.3980, 0.3242]) -Greedy action tensor([-1.0703, -0.1285, -0.1511, -0.3330]) tensor([0.1225, 0.3142, 0.3072, 0.2561]) -Greedy action tensor([-1.5898, -0.6164, 0.7124, 0.3743]) tensor([0.0481, 0.1274, 0.4813, 0.3432]) -Greedy action tensor([-1.3561, 0.4759, 0.4930, -0.5003]) tensor([0.0627, 0.3915, 0.3983, 0.1475]) -Greedy action tensor([-1.9495, -0.7388, 0.0871, -0.3154]) tensor([0.0583, 0.1957, 0.4470, 0.2989]) -Greedy action tensor([-1.4098, -0.3576, 0.9887, 1.0645]) tensor([0.0374, 0.1071, 0.4116, 0.4440]) -Greedy action tensor([-1.9830, -0.7822, 0.6123, -0.0456]) tensor([0.0405, 0.1347, 0.5433, 0.2814]) -Greedy action tensor([-1.1408, -0.4006, 0.5241, 0.9009]) tensor([0.0622, 0.1303, 0.3286, 0.4789]) -Greedy action tensor([-1.5967, -0.2864, 0.9685, 0.7734]) tensor([0.0352, 0.1305, 0.4577, 0.3766]) -Greedy action tensor([-1.8034, -0.4840, 0.6250, -0.0290]) tensor([0.0455, 0.1702, 0.5160, 0.2683]) -Greedy action tensor([-1.1404, -0.5719, 0.2703, 0.3378]) tensor([0.0889, 0.1569, 0.3644, 0.3898]) -Greedy action tensor([-1.1014, -0.6476, -0.2100, -0.3964]) tensor([0.1421, 0.2237, 0.3465, 0.2876]) -Greedy action tensor([-1.8134, -0.4054, 0.8738, 0.4380]) tensor([0.0342, 0.1396, 0.5017, 0.3245]) -Greedy action tensor([-0.2598, 1.0405, -0.8894, -0.2089]) tensor([0.1599, 0.5867, 0.0852, 0.1682]) -Greedy action tensor([-0.4543, -0.4980, 0.1927, 0.1763]) tensor([0.1740, 0.1666, 0.3324, 0.3270]) -Greedy action tensor([-0.0268, 1.1209, 0.2303, -0.6077]) tensor([0.1666, 0.5248, 0.2154, 0.0932]) -Greedy action tensor([0.5546, 0.5114, 1.0517, 0.2968]) tensor([0.2286, 0.2189, 0.3758, 0.1767]) -Greedy action tensor([ 1.2149, -0.7424, -0.0836, 1.2028]) tensor([0.4163, 0.0588, 0.1136, 0.4113]) -Greedy action tensor([ 0.4786, 0.5567, 2.5424, -0.2822]) tensor([0.0959, 0.1037, 0.7555, 0.0448]) -Greedy action tensor([ 0.7787, -1.1242, 1.6698, -0.3794]) tensor([0.2563, 0.0382, 0.6249, 0.0805]) -Greedy action tensor([ 0.4969, -1.4906, 0.4429, 0.1589]) tensor([0.3574, 0.0490, 0.3387, 0.2549]) -Greedy action tensor([ 0.6546, -0.0867, 0.1689, 0.7201]) tensor([0.3165, 0.1508, 0.1947, 0.3380]) -Greedy action tensor([ 0.4566, -0.1151, -0.7739, 1.0644]) tensor([0.2708, 0.1529, 0.0791, 0.4972]) -Greedy action tensor([-0.4048, -0.8456, -0.1549, -0.2315]) tensor([0.2429, 0.1563, 0.3119, 0.2889]) -Greedy action tensor([ 0.8932, 0.1093, -1.1359, 1.3360]) tensor([0.3179, 0.1452, 0.0418, 0.4951]) -Greedy action tensor([-0.6683, 0.4035, 0.6792, -0.1355]) tensor([0.1056, 0.3083, 0.4062, 0.1799]) -Greedy action tensor([ 0.9538, -0.2687, 0.4549, 0.4372]) tensor([0.4003, 0.1179, 0.2430, 0.2388]) -Greedy action tensor([0.2076, 1.4158, 0.0064, 0.0800]) tensor([0.1654, 0.5537, 0.1353, 0.1456]) -Greedy action tensor([-0.2464, 0.3300, 1.3729, 0.1640]) tensor([0.1071, 0.1906, 0.5408, 0.1614]) -Greedy action tensor([ 0.4537, -2.0615, 0.4915, -0.3341]) tensor([0.3885, 0.0314, 0.4034, 0.1767]) -Greedy action tensor([ 0.4404, -1.2781, 0.4675, 0.5719]) tensor([0.2987, 0.0536, 0.3069, 0.3407]) -Greedy action tensor([1.3557, 0.1166, 0.4744, 2.3499]) tensor([0.2269, 0.0657, 0.0940, 0.6133]) -Greedy action tensor([ 1.6938, -0.1058, -0.4394, 1.0180]) tensor([0.5579, 0.0923, 0.0661, 0.2838]) -Greedy action tensor([ 1.6395, -0.7031, 0.2556, 0.7761]) tensor([0.5655, 0.0543, 0.1417, 0.2385]) -Greedy action tensor([ 1.5807, 0.1316, -0.0671, 0.2708]) tensor([0.5892, 0.1383, 0.1134, 0.1590]) -Greedy action tensor([ 0.9619, -0.2185, 0.4637, -0.3134]) tensor([0.4558, 0.1400, 0.2769, 0.1273]) -Greedy action tensor([ 0.8219, -0.8846, -0.5263, 0.5253]) tensor([0.4578, 0.0831, 0.1189, 0.3403]) -Greedy action tensor([-0.2442, -1.7920, -0.8419, 0.4958]) tensor([0.2592, 0.0551, 0.1426, 0.5432]) -Greedy action tensor([-0.3897, 0.6063, 0.0731, 0.2376]) tensor([0.1395, 0.3777, 0.2216, 0.2612]) -Greedy action tensor([0.9397, 0.5059, 0.4627, 0.8362]) tensor([0.3154, 0.2044, 0.1958, 0.2844]) -Greedy action tensor([ 0.5872, 0.2145, -1.2336, 1.3954]) tensor([0.2442, 0.1682, 0.0395, 0.5480]) -Greedy action tensor([-0.2774, 0.0865, 0.1948, -1.1526]) tensor([0.2243, 0.3227, 0.3596, 0.0935]) -Greedy action tensor([-0.0866, -0.6564, -0.2858, 0.5436]) tensor([0.2346, 0.1327, 0.1922, 0.4405]) -Greedy action tensor([ 0.3964, -0.1835, 0.7023, 0.3608]) tensor([0.2575, 0.1442, 0.3497, 0.2485]) -Greedy action tensor([ 0.8106, -0.1257, -0.7415, 1.5091]) tensor([0.2766, 0.1085, 0.0586, 0.5563]) -Greedy action tensor([ 1.2313, -0.1884, 1.8904, -0.4531]) tensor([0.2976, 0.0719, 0.5753, 0.0552]) -Greedy action tensor([-0.0099, -1.0678, 1.8043, 0.3835]) tensor([0.1115, 0.0387, 0.6844, 0.1653]) -Greedy action tensor([ 0.7581, -0.0078, -0.7548, -0.4997]) tensor([0.5077, 0.2361, 0.1118, 0.1443]) -Greedy action tensor([1.7009, 0.0494, 0.0363, 0.8798]) tensor([0.5491, 0.1053, 0.1039, 0.2416]) -Greedy action tensor([ 0.3180, 0.6490, -0.3189, -0.0537]) tensor([0.2769, 0.3856, 0.1465, 0.1910]) -Greedy action tensor([-0.3557, 0.3699, 0.1135, -0.0161]) tensor([0.1648, 0.3404, 0.2634, 0.2314]) -Greedy action tensor([ 0.8232, -1.1233, 0.7630, -0.2462]) tensor([0.4119, 0.0588, 0.3879, 0.1414]) -Greedy action tensor([ 0.1896, -1.9008, -0.5538, 0.2291]) tensor([0.3789, 0.0468, 0.1802, 0.3941]) -Greedy action tensor([ 0.2707, 0.0857, -0.2549, 0.4921]) tensor([0.2725, 0.2265, 0.1611, 0.3400]) -Greedy action tensor([0.7483, 0.7346, 0.7610, 0.5815]) tensor([0.2600, 0.2565, 0.2634, 0.2201]) -Greedy action tensor([ 1.1725, -0.7529, 0.8001, 1.4892]) tensor([0.3118, 0.0455, 0.2148, 0.4279]) -Greedy action tensor([-0.0797, -0.3907, -0.4294, 0.1332]) tensor([0.2721, 0.1994, 0.1918, 0.3367]) -Greedy action tensor([ 2.1740, -0.4995, 0.5329, 1.5680]) tensor([0.5530, 0.0382, 0.1072, 0.3017]) -Greedy action tensor([ 0.6676, 0.4987, -0.5930, 1.0867]) tensor([0.2741, 0.2315, 0.0777, 0.4168]) -Greedy action tensor([ 0.5976, 0.5378, -0.4318, 0.9140]) tensor([0.2724, 0.2566, 0.0973, 0.3738]) -Greedy action tensor([0.3068, 0.5139, 0.2663, 1.7023]) tensor([0.1384, 0.1702, 0.1329, 0.5586]) -Greedy action tensor([ 0.7070, -0.0702, -0.5198, 1.6575]) tensor([0.2304, 0.1059, 0.0676, 0.5961]) -Greedy action tensor([ 0.9709, -0.4220, 1.4875, -0.3549]) tensor([0.3135, 0.0779, 0.5254, 0.0832]) -Greedy action tensor([ 1.1708, 0.2268, -0.4676, 1.1933]) tensor([0.3837, 0.1493, 0.0746, 0.3924]) -Greedy action tensor([1.3067, 0.2795, 0.3199, 0.4623]) tensor([0.4628, 0.1657, 0.1725, 0.1989]) -Greedy action tensor([ 1.1282, -0.0445, -0.1140, 0.8253]) tensor([0.4279, 0.1325, 0.1236, 0.3161]) -Greedy action tensor([-0.2730, -2.0686, -0.3296, 0.5710]) tensor([0.2254, 0.0374, 0.2130, 0.5242]) -Greedy action tensor([ 0.6448, 0.5377, -0.7592, 0.0400]) tensor([0.3717, 0.3340, 0.0913, 0.2030]) -Greedy action tensor([1.1431, 1.0355, 0.2019, 0.6085]) tensor([0.3479, 0.3124, 0.1358, 0.2039]) -Greedy action tensor([-0.1015, -0.2464, -0.6174, -0.5920]) tensor([0.3253, 0.2814, 0.1942, 0.1992]) -Greedy action tensor([0.8901, 0.9636, 0.0636, 0.3054]) tensor([0.3256, 0.3504, 0.1425, 0.1815]) -Greedy action tensor([ 1.2516, 0.7014, -0.4757, 0.2751]) tensor([0.4692, 0.2707, 0.0834, 0.1767]) -Greedy action tensor([ 1.2898, -0.2241, -0.3224, 0.7270]) tensor([0.5027, 0.1106, 0.1003, 0.2864]) -Greedy action tensor([ 0.1237, -0.6386, 0.2251, 0.8448]) tensor([0.2160, 0.1008, 0.2390, 0.4442]) -Greedy action tensor([ 0.6117, -0.0968, 1.0845, 1.4804]) tensor([0.1825, 0.0898, 0.2928, 0.4349]) -Greedy action tensor([ 1.1768, 0.1762, -0.8371, 0.8101]) tensor([0.4558, 0.1676, 0.0608, 0.3158]) -Greedy action tensor([ 0.1583, -0.6469, -0.7171, 0.6850]) tensor([0.2811, 0.1257, 0.1171, 0.4761]) -Greedy action tensor([ 0.1799, -0.6276, 0.8436, -0.1572]) tensor([0.2438, 0.1087, 0.4735, 0.1740]) -Greedy action tensor([ 0.4360, -0.0522, 0.6239, -0.1761]) tensor([0.2974, 0.1825, 0.3589, 0.1612]) -Greedy action tensor([1.2533, 0.4671, 1.0795, 0.5859]) tensor([0.3560, 0.1622, 0.2992, 0.1826]) -Greedy action tensor([ 0.6799, -0.2017, -0.8994, 0.6156]) tensor([0.3909, 0.1619, 0.0806, 0.3666]) -Greedy action tensor([-0.6844, -0.9071, -1.0747, 1.2867]) tensor([0.1036, 0.0829, 0.0701, 0.7434]) -Greedy action tensor([ 1.2639, -0.3442, -0.5670, 0.7627]) tensor([0.5086, 0.1018, 0.0815, 0.3081]) -Greedy action tensor([-0.3067, -0.9808, 0.0702, -0.1946]) tensor([0.2447, 0.1247, 0.3568, 0.2738]) -Greedy action tensor([0.5082, 0.2903, 0.5678, 0.1884]) tensor([0.2784, 0.2239, 0.2955, 0.2022]) -Greedy action tensor([ 0.1893, 0.1239, 1.1352, -0.1310]) tensor([0.1909, 0.1788, 0.4917, 0.1386]) -Greedy action tensor([ 0.7804, -1.2299, 0.6693, 0.8226]) tensor([0.3255, 0.0436, 0.2913, 0.3396]) -Greedy action tensor([ 0.7879, -0.1579, -0.2907, 1.9411]) tensor([0.2042, 0.0793, 0.0694, 0.6470]) -Greedy action tensor([1.0344, 0.3196, 0.6336, 0.7101]) tensor([0.3470, 0.1698, 0.2324, 0.2509]) -Greedy action tensor([ 0.3021, 0.3076, -0.2365, 0.8200]) tensor([0.2343, 0.2356, 0.1367, 0.3933]) -Greedy action tensor([ 0.5661, -2.4851, -0.4149, 0.6792]) tensor([0.3934, 0.0186, 0.1475, 0.4405]) -Greedy action tensor([ 0.9659, -1.0095, 1.5795, 0.4492]) tensor([0.2792, 0.0387, 0.5156, 0.1665]) -Greedy action tensor([ 1.0657, 0.1588, -0.4665, 1.0853]) tensor([0.3788, 0.1530, 0.0819, 0.3863]) -Greedy action tensor([ 1.3465, -0.4312, -0.0759, 0.4479]) tensor([0.5503, 0.0930, 0.1327, 0.2240]) -Greedy action tensor([ 0.2450, 0.1902, 0.0065, -0.1008]) tensor([0.2905, 0.2750, 0.2289, 0.2056]) -Greedy action tensor([ 0.1778, -0.2106, -1.1177, 0.4672]) tensor([0.3042, 0.2063, 0.0833, 0.4063]) -Greedy action tensor([0.4143, 0.0711, 0.4570, 1.5270]) tensor([0.1725, 0.1224, 0.1801, 0.5250]) -Greedy action tensor([ 1.5870, -0.7077, -0.2251, 0.4137]) tensor([0.6355, 0.0641, 0.1038, 0.1966]) -Greedy action tensor([ 1.0030, -0.5773, -0.2371, 0.1866]) tensor([0.5162, 0.1063, 0.1494, 0.2282]) -Greedy action tensor([ 1.4868, -0.4705, -0.4213, 0.2202]) tensor([0.6364, 0.0899, 0.0944, 0.1793]) -Greedy action tensor([ 1.2387, -0.4448, -0.0643, 0.2093]) tensor([0.5511, 0.1024, 0.1497, 0.1969]) -Greedy action tensor([ 0.9623, -0.6413, -0.5518, 0.8514]) tensor([0.4317, 0.0869, 0.0950, 0.3864]) -Greedy action tensor([ 1.3714, -0.6544, -0.0898, 0.2448]) tensor([0.5924, 0.0781, 0.1374, 0.1920]) -Greedy action tensor([ 1.4797, -0.7566, -0.3788, 0.4586]) tensor([0.6162, 0.0658, 0.0961, 0.2219]) -Greedy action tensor([ 0.9402, -0.4075, -0.3347, -0.0994]) tensor([0.5283, 0.1373, 0.1476, 0.1868]) -Greedy action tensor([ 0.7704, -0.4467, -0.0035, 0.1712]) tensor([0.4336, 0.1284, 0.2000, 0.2381]) -Greedy action tensor([ 1.0088, -0.3231, -0.1765, 0.3988]) tensor([0.4732, 0.1249, 0.1447, 0.2572]) -Greedy action tensor([ 0.4378, -0.2339, 0.1922, -0.1121]) tensor([0.3484, 0.1780, 0.2726, 0.2010]) -Greedy action tensor([ 1.1138, -0.2858, -0.0974, 0.3608]) tensor([0.4961, 0.1224, 0.1478, 0.2337]) -Greedy action tensor([ 1.2524, -0.7514, -0.4974, 1.0718]) tensor([0.4666, 0.0629, 0.0811, 0.3895]) -Greedy action tensor([ 0.4215, -0.1688, 0.0751, -0.0492]) tensor([0.3465, 0.1920, 0.2451, 0.2164]) -Greedy action tensor([ 1.1045, -0.0518, 0.1192, -0.1068]) tensor([0.5036, 0.1584, 0.1880, 0.1500]) -Greedy action tensor([ 2.1001, -0.5380, -0.3130, 0.1417]) tensor([0.7680, 0.0549, 0.0688, 0.1083]) -Greedy action tensor([ 0.8381, -0.1249, -0.0646, -0.1258]) tensor([0.4611, 0.1760, 0.1870, 0.1759]) -Greedy action tensor([ 0.8516, -0.3532, 0.0469, 0.1348]) tensor([0.4474, 0.1341, 0.2001, 0.2185]) -Greedy action tensor([ 1.4161, -0.3521, -0.0508, 0.0146]) tensor([0.6070, 0.1036, 0.1400, 0.1495]) -Greedy action tensor([ 0.6040, -0.3879, -0.2920, 0.1659]) tensor([0.4125, 0.1530, 0.1684, 0.2662]) -Greedy action tensor([ 0.5699, -0.3268, -0.3043, 0.5002]) tensor([0.3626, 0.1479, 0.1513, 0.3382]) -Greedy action tensor([ 1.0760, 0.0660, 0.0649, -0.3433]) tensor([0.5076, 0.1849, 0.1847, 0.1228]) -Greedy action tensor([ 0.6921, -0.4122, -0.1784, 0.3104]) tensor([0.4110, 0.1362, 0.1721, 0.2806]) -Greedy action tensor([ 1.3716, -0.4329, -0.1868, 0.2942]) tensor([0.5829, 0.0959, 0.1227, 0.1985]) -Greedy action tensor([ 0.6208, -0.3303, -0.5631, 0.4104]) tensor([0.3996, 0.1544, 0.1223, 0.3238]) -Greedy action tensor([ 1.3700, -0.6133, -0.1092, -0.1366]) tensor([0.6301, 0.0867, 0.1436, 0.1397]) -Greedy action tensor([ 0.8906, -0.2547, -0.5350, 0.5573]) tensor([0.4395, 0.1398, 0.1057, 0.3150]) -Greedy action tensor([ 1.3326, -0.1599, 0.1457, -0.0100]) tensor([0.5583, 0.1255, 0.1704, 0.1458]) -Greedy action tensor([ 1.3788, -0.4624, -0.2324, 0.0607]) tensor([0.6150, 0.0976, 0.1228, 0.1646]) -Greedy action tensor([ 1.5140, -0.6604, -0.1138, 0.4680]) tensor([0.6019, 0.0684, 0.1182, 0.2115]) -Greedy action tensor([ 0.6083, -0.3419, -0.1215, 0.0350]) tensor([0.4111, 0.1590, 0.1982, 0.2317]) -Greedy action tensor([ 0.7137, -0.6108, -0.3451, 0.0916]) tensor([0.4652, 0.1237, 0.1614, 0.2497]) -Greedy action tensor([ 1.1844, -0.1195, -0.1309, -0.2165]) tensor([0.5598, 0.1520, 0.1503, 0.1379]) -Greedy action tensor([ 1.2540, -0.6336, 0.0306, 0.0838]) tensor([0.5695, 0.0862, 0.1676, 0.1767]) -Greedy action tensor([ 1.2255, -0.0858, -0.3844, -0.0411]) tensor([0.5711, 0.1539, 0.1142, 0.1609]) -Greedy action tensor([ 1.2667, -0.8112, -0.2861, 0.0030]) tensor([0.6175, 0.0773, 0.1307, 0.1745]) -Greedy action tensor([ 0.9774, -0.7162, 0.0957, 0.1234]) tensor([0.4942, 0.0909, 0.2046, 0.2104]) -Greedy action tensor([ 0.6171, -0.2897, -0.2511, 0.0061]) tensor([0.4226, 0.1706, 0.1774, 0.2294]) -Greedy action tensor([ 0.3933, 0.2913, -0.3126, -0.4042]) tensor([0.3512, 0.3172, 0.1734, 0.1582]) -Greedy action tensor([ 1.0570, -0.3445, -0.2119, 0.2358]) tensor([0.5083, 0.1252, 0.1429, 0.2236]) -Greedy action tensor([ 1.0749, -0.3807, -0.5469, -0.0311]) tensor([0.5677, 0.1324, 0.1121, 0.1878]) -Greedy action tensor([ 1.6645, -0.7115, -0.4265, 0.4958]) tensor([0.6548, 0.0608, 0.0809, 0.2035]) -Greedy action tensor([ 1.2713, -0.7031, -0.1425, 0.1632]) tensor([0.5840, 0.0811, 0.1420, 0.1928]) -Greedy action tensor([ 0.6047, 0.0648, -0.0891, -0.0193]) tensor([0.3819, 0.2226, 0.1908, 0.2046]) -Greedy action tensor([ 6.0282e-01, -2.4957e-01, -6.0475e-01, -4.5818e-04]) tensor([0.4401, 0.1876, 0.1315, 0.2407]) -Greedy action tensor([ 0.4802, -0.2771, 0.0154, 0.0257]) tensor([0.3660, 0.1716, 0.2300, 0.2323]) -Greedy action tensor([ 2.0287, -0.5581, -0.5126, -0.0781]) tensor([0.7839, 0.0590, 0.0617, 0.0953]) -Greedy action tensor([ 0.9701, -0.6117, -0.2073, 0.2081]) tensor([0.5049, 0.1038, 0.1556, 0.2357]) -Greedy action tensor([ 0.8701, -0.6217, -0.2406, 0.2617]) tensor([0.4765, 0.1072, 0.1569, 0.2593]) -Greedy action tensor([ 0.7034, -0.2742, -0.0167, 0.0722]) tensor([0.4176, 0.1571, 0.2032, 0.2221]) -Greedy action tensor([ 0.6976, -0.5005, 0.1168, -0.1833]) tensor([0.4394, 0.1326, 0.2458, 0.1821]) -Greedy action tensor([ 0.6350, -0.2882, -0.3581, -0.4703]) tensor([0.4765, 0.1893, 0.1765, 0.1578]) -Greedy action tensor([ 1.1401, -0.7119, -0.2397, 0.1317]) tensor([0.5639, 0.0885, 0.1419, 0.2057]) -Greedy action tensor([ 1.1585, -0.5792, -0.2379, 0.0951]) tensor([0.5654, 0.0995, 0.1399, 0.1952]) -Greedy action tensor([ 0.9643, -0.7510, -0.3459, 0.4276]) tensor([0.4916, 0.0884, 0.1326, 0.2874]) -Greedy action tensor([ 0.9288, -0.5108, -0.3988, 0.1313]) tensor([0.5121, 0.1214, 0.1358, 0.2307]) -Greedy action tensor([ 1.1662, -0.7913, -0.0129, 0.1621]) tensor([0.5509, 0.0778, 0.1694, 0.2018]) -Greedy action tensor([ 0.6799, -0.3753, 0.2387, -0.0178]) tensor([0.4017, 0.1399, 0.2584, 0.2000]) -Greedy action tensor([ 0.9675, -0.2744, -0.2519, 0.1539]) tensor([0.4932, 0.1425, 0.1457, 0.2186]) -Greedy action tensor([ 1.2711, -0.5087, -0.3550, 0.4364]) tensor([0.5558, 0.0937, 0.1093, 0.2412]) -Greedy action tensor([ 1.2856, -0.7305, 0.0760, 0.3856]) tensor([0.5441, 0.0724, 0.1623, 0.2212]) -Greedy action tensor([ 0.6010, -0.2846, -0.2583, 0.3291]) tensor([0.3849, 0.1588, 0.1630, 0.2933]) -Greedy action tensor([ 0.5170, -0.1512, 0.1935, -0.1959]) tensor([0.3668, 0.1880, 0.2654, 0.1798]) -Greedy action tensor([ 1.0032, -0.3668, -0.4384, 0.5922]) tensor([0.4643, 0.1180, 0.1098, 0.3078]) -Greedy action tensor([ 1.4062, -0.7118, -0.2466, 0.4272]) tensor([0.5926, 0.0713, 0.1135, 0.2226]) -Greedy action tensor([ 1.0750, -0.2254, -0.1111, -0.0736]) tensor([0.5277, 0.1438, 0.1612, 0.1673]) -Greedy action tensor([ 0.9588, -0.4503, -0.1125, 0.1436]) tensor([0.4927, 0.1204, 0.1688, 0.2181]) -Greedy action tensor([ 0.9159, -0.0335, -0.3100, 0.1680]) tensor([0.4643, 0.1797, 0.1363, 0.2198]) -Greedy action tensor([ 1.5310, -0.2977, -0.2984, 0.4740]) tensor([0.5993, 0.0963, 0.0962, 0.2082]) -Greedy action tensor([ 1.1576, -0.2141, -0.3742, 0.0056]) tensor([0.5600, 0.1420, 0.1210, 0.1769]) -Greedy action tensor([ 1.2635, -0.5629, -0.0310, 0.5766]) tensor([0.5160, 0.0831, 0.1414, 0.2596]) -Greedy action tensor([ 1.1506, -0.3333, -0.2190, -0.0947]) tensor([0.5653, 0.1282, 0.1437, 0.1627]) -Greedy action tensor([ 0.8951, -0.3727, -0.1667, 0.1001]) tensor([0.4810, 0.1354, 0.1664, 0.2172]) -Greedy action tensor([ 1.2049, -0.4411, -0.3121, -0.1806]) tensor([0.6015, 0.1160, 0.1320, 0.1505]) -Greedy action tensor([ 1.6528, -0.5797, -0.3316, 0.8437]) tensor([0.5917, 0.0635, 0.0813, 0.2635]) -Greedy action tensor([ 0.7110, -0.1680, -0.4426, 0.0999]) tensor([0.4398, 0.1826, 0.1388, 0.2387]) -Greedy action tensor([ 1.2087, -0.5316, -0.2381, 0.4463]) tensor([0.5327, 0.0935, 0.1253, 0.2485]) -Greedy action tensor([ 1.6184, -0.1166, -0.4604, 0.0182]) tensor([0.6652, 0.1173, 0.0832, 0.1343]) -Greedy action tensor([ 1.0578, -0.1852, -0.3104, 0.1913]) tensor([0.5093, 0.1469, 0.1297, 0.2141]) -Greedy action tensor([ 0.5297, -0.3651, -0.1357, 0.1289]) tensor([0.3857, 0.1576, 0.1983, 0.2584]) -Greedy action tensor([ 1.0333, -0.0562, -0.0343, -0.1759]) tensor([0.5054, 0.1700, 0.1738, 0.1508]) -Greedy action tensor([ 1.0481, -0.3410, 0.0153, -0.4933]) tensor([0.5496, 0.1370, 0.1957, 0.1177]) -Greedy action tensor([ 0.5118, -0.2154, 0.0421, -0.3141]) tensor([0.3927, 0.1898, 0.2455, 0.1719]) -Greedy action tensor([ 0.4144, 0.0188, -0.0033, -0.3953]) tensor([0.3601, 0.2425, 0.2372, 0.1603]) -Greedy action tensor([ 0.5157, -0.1149, -0.1111, -0.2071]) tensor([0.3919, 0.2086, 0.2094, 0.1902]) -Greedy action tensor([ 0.7764, -0.5037, -0.1154, -0.5029]) tensor([0.5086, 0.1414, 0.2085, 0.1415]) -Greedy action tensor([ 0.2008, -0.0774, 0.1360, -0.2162]) tensor([0.2982, 0.2258, 0.2795, 0.1965]) -Greedy action tensor([ 0.3364, -0.0383, -0.0342, -0.3474]) tensor([0.3469, 0.2385, 0.2395, 0.1751]) -Greedy action tensor([ 0.6411, -0.0602, -0.0661, -0.3117]) tensor([0.4211, 0.2088, 0.2076, 0.1624]) -Greedy action tensor([ 0.7174, -0.0930, -0.0157, -0.3663]) tensor([0.4418, 0.1965, 0.2122, 0.1495]) -Greedy action tensor([ 1.0232, -0.7481, -0.1669, -0.5847]) tensor([0.5972, 0.1016, 0.1816, 0.1196]) -Greedy action tensor([ 0.3683, -0.1428, -0.0456, -0.2343]) tensor([0.3561, 0.2136, 0.2354, 0.1949]) -Greedy action tensor([ 0.6884, -0.5589, -0.0238, -0.6203]) tensor([0.4883, 0.1403, 0.2395, 0.1319]) -Greedy action tensor([ 0.3325, -0.1424, 0.2029, -0.3523]) tensor([0.3328, 0.2070, 0.2924, 0.1678]) -Greedy action tensor([ 1.0541, -0.8044, 0.0049, -0.5897]) tensor([0.5885, 0.0917, 0.2061, 0.1137]) -Greedy action tensor([ 0.4538, -0.1872, -0.0576, -0.3983]) tensor([0.3917, 0.2063, 0.2349, 0.1671]) -Greedy action tensor([ 0.0604, -0.0662, 0.0534, -0.1126]) tensor([0.2692, 0.2372, 0.2673, 0.2264]) -Greedy action tensor([ 0.7860, -0.5163, -0.0011, -0.4746]) tensor([0.4974, 0.1352, 0.2264, 0.1410]) -Greedy action tensor([ 0.2734, 0.1005, 0.0602, -0.1949]) tensor([0.3053, 0.2568, 0.2467, 0.1912]) -Greedy action tensor([ 0.4181, -0.3108, 0.0445, -0.4209]) tensor([0.3842, 0.1853, 0.2644, 0.1660]) -Greedy action tensor([ 0.4064, -0.0470, -0.0444, -0.4108]) tensor([0.3684, 0.2341, 0.2347, 0.1627]) -Greedy action tensor([ 0.8528, -0.2322, -0.0426, -0.4184]) tensor([0.4934, 0.1667, 0.2015, 0.1384]) -Greedy action tensor([ 0.9739, -0.6901, -0.0121, -0.2288]) tensor([0.5368, 0.1017, 0.2003, 0.1612]) -Greedy action tensor([ 0.9157, -0.2663, -0.0795, -0.5639]) tensor([0.5252, 0.1611, 0.1941, 0.1196]) -Greedy action tensor([ 0.6085, -0.3674, -0.0316, -0.4182]) tensor([0.4420, 0.1666, 0.2331, 0.1583]) -Greedy action tensor([ 0.7114, -0.1877, -0.0617, -0.1462]) tensor([0.4362, 0.1775, 0.2013, 0.1850]) -Greedy action tensor([ 0.3025, 0.2256, 0.0636, -0.3044]) tensor([0.3069, 0.2842, 0.2417, 0.1673]) -Greedy action tensor([ 0.5991, -0.3518, 0.0698, -0.5593]) tensor([0.4368, 0.1688, 0.2573, 0.1372]) -Greedy action tensor([ 0.3126, -0.1676, 0.1312, -0.2458]) tensor([0.3306, 0.2045, 0.2757, 0.1891]) -Greedy action tensor([0.3679, 0.1005, 0.1374, 0.0165]) tensor([0.3064, 0.2345, 0.2434, 0.2157]) -Greedy action tensor([ 0.7904, -0.0598, -0.0870, -0.2869]) tensor([0.4579, 0.1957, 0.1904, 0.1559]) -Greedy action tensor([ 0.3894, -0.0317, -0.0680, -0.1312]) tensor([0.3468, 0.2276, 0.2195, 0.2061]) -Greedy action tensor([ 0.6150, -0.5474, 0.2421, -0.5736]) tensor([0.4336, 0.1356, 0.2987, 0.1321]) -Greedy action tensor([ 0.7068, -0.2204, 0.0549, -0.2469]) tensor([0.4344, 0.1719, 0.2263, 0.1674]) -Greedy action tensor([ 0.2794, 0.0120, -0.2980, -0.1105]) tensor([0.3329, 0.2548, 0.1869, 0.2254]) -Greedy action tensor([ 0.6648, -0.4209, -0.0878, -0.3465]) tensor([0.4603, 0.1554, 0.2169, 0.1674]) -Greedy action tensor([ 0.6679, -0.5002, -0.0332, -0.4348]) tensor([0.4675, 0.1454, 0.2319, 0.1552]) -Greedy action tensor([ 0.7223, -0.3368, 0.0658, -0.2249]) tensor([0.4438, 0.1539, 0.2302, 0.1721]) -Greedy action tensor([ 0.5400, -0.2815, -0.0232, -0.2962]) tensor([0.4094, 0.1801, 0.2331, 0.1774]) -Greedy action tensor([ 0.3249, 0.1383, -0.0656, -0.1204]) tensor([0.3178, 0.2636, 0.2150, 0.2036]) -Greedy action tensor([ 0.4458, 0.2208, -0.1533, -0.0929]) tensor([0.3411, 0.2724, 0.1874, 0.1991]) -Greedy action tensor([ 0.4176, -0.2751, 0.2526, -0.4010]) tensor([0.3585, 0.1794, 0.3040, 0.1581]) -Greedy action tensor([ 0.8483, -0.3629, 0.1247, -0.4264]) tensor([0.4849, 0.1444, 0.2352, 0.1355]) -Greedy action tensor([ 0.3862, -0.0918, 0.0773, -0.2416]) tensor([0.3463, 0.2147, 0.2542, 0.1848]) -Greedy action tensor([ 0.4831, 0.0579, -0.2430, -0.1189]) tensor([0.3724, 0.2434, 0.1802, 0.2040]) -Greedy action tensor([ 0.4341, -0.0262, 0.2945, -0.5003]) tensor([0.3456, 0.2181, 0.3006, 0.1358]) -Greedy action tensor([ 0.3736, -0.0727, -0.0173, -0.2133]) tensor([0.3481, 0.2228, 0.2355, 0.1936]) -Greedy action tensor([ 0.5683, -0.3270, 0.1052, -0.3217]) tensor([0.4084, 0.1668, 0.2570, 0.1677]) -Greedy action tensor([ 0.4053, -0.1887, 0.0495, -0.2340]) tensor([0.3597, 0.1986, 0.2520, 0.1898]) -Greedy action tensor([ 0.6792, -0.3866, -0.1149, -0.3573]) tensor([0.4649, 0.1601, 0.2101, 0.1649]) -Greedy action tensor([ 0.9910, -0.3146, -0.0113, -0.4483]) tensor([0.5333, 0.1445, 0.1957, 0.1264]) -Greedy action tensor([ 0.3687, 0.1634, 0.0397, -0.1857]) tensor([0.3217, 0.2620, 0.2315, 0.1848]) -Greedy action tensor([ 0.7257, -0.6049, -0.1369, -0.5672]) tensor([0.5100, 0.1348, 0.2152, 0.1400]) -Greedy action tensor([ 0.8541, -0.2195, 0.1029, -0.6383]) tensor([0.4906, 0.1677, 0.2315, 0.1103]) -Greedy action tensor([ 0.6076, -0.2293, 0.1965, -0.6244]) tensor([0.4188, 0.1814, 0.2776, 0.1222]) -Greedy action tensor([ 0.6804, -0.5109, 0.2785, -0.8085]) tensor([0.4549, 0.1382, 0.3043, 0.1026]) -Greedy action tensor([ 0.5418, -0.2565, -0.0864, -0.1551]) tensor([0.4029, 0.1814, 0.2150, 0.2007]) -Greedy action tensor([ 0.5918, -0.4676, -0.0278, -0.1173]) tensor([0.4207, 0.1458, 0.2264, 0.2070]) -Greedy action tensor([ 0.7190, -0.2639, -0.1307, -0.4734]) tensor([0.4750, 0.1778, 0.2031, 0.1442]) -Greedy action tensor([ 0.4005, 0.0977, 0.1732, -0.1859]) tensor([0.3234, 0.2389, 0.2577, 0.1799]) -Greedy action tensor([ 0.4973, -0.0793, -0.2466, -0.1587]) tensor([0.3912, 0.2198, 0.1859, 0.2030]) -Greedy action tensor([ 0.8020, -0.5322, 0.1102, -0.7195]) tensor([0.5044, 0.1329, 0.2526, 0.1102]) -Greedy action tensor([ 0.4198, 0.0831, -0.0808, -0.1070]) tensor([0.3435, 0.2453, 0.2083, 0.2029]) -Greedy action tensor([ 0.3836, -0.0267, -0.0272, -0.3042]) tensor([0.3535, 0.2345, 0.2344, 0.1777]) -Greedy action tensor([ 0.7433, -0.6947, 0.0330, -0.5491]) tensor([0.4991, 0.1185, 0.2453, 0.1371]) -Greedy action tensor([ 0.5628, -0.3903, -0.0247, -0.3700]) tensor([0.4283, 0.1651, 0.2380, 0.1685]) -Greedy action tensor([ 8.1791e-01, -6.1217e-01, 4.6077e-04, -4.9444e-01]) tensor([0.5128, 0.1227, 0.2264, 0.1380]) -Greedy action tensor([ 0.4888, -0.1816, -0.0005, -0.4077]) tensor([0.3949, 0.2020, 0.2421, 0.1611]) -Greedy action tensor([ 0.6195, -0.0405, -0.0599, -0.2098]) tensor([0.4065, 0.2101, 0.2061, 0.1774]) -Greedy action tensor([ 0.6857, -0.3606, -0.1799, -0.2803]) tensor([0.4645, 0.1632, 0.1955, 0.1768]) -Greedy action tensor([ 0.5739, -0.2161, 0.0166, -0.2201]) tensor([0.4035, 0.1831, 0.2311, 0.1824]) -Greedy action tensor([ 0.7148, -0.2043, -0.0425, -0.6738]) tensor([0.4723, 0.1884, 0.2215, 0.1178]) -Greedy action tensor([ 0.7269, -0.2404, -0.0469, -0.0276]) tensor([0.4326, 0.1644, 0.1995, 0.2034]) -Greedy action tensor([ 0.4981, -0.1856, 0.1489, -0.2932]) tensor([0.3755, 0.1895, 0.2648, 0.1702]) -Greedy action tensor([ 0.4986, -0.3489, 0.0069, -0.5174]) tensor([0.4163, 0.1784, 0.2546, 0.1507]) -Greedy action tensor([ 0.8288, -0.3660, -0.0135, -0.3094]) tensor([0.4869, 0.1474, 0.2097, 0.1560]) -Greedy action tensor([ 0.4488, -0.0162, 0.1155, -0.3339]) tensor([0.3569, 0.2242, 0.2557, 0.1632]) -Greedy action tensor([ 0.8408, -0.3415, -0.1125, -0.4357]) tensor([0.5073, 0.1555, 0.1956, 0.1416]) -Greedy action tensor([ 0.7837, -0.4177, -0.1133, -0.6680]) tensor([0.5147, 0.1548, 0.2099, 0.1205]) -Greedy action tensor([ 0.3232, 0.1963, 0.0075, -0.1917]) tensor([0.3118, 0.2746, 0.2274, 0.1863]) -Greedy action tensor([ 0.5652, -0.3790, -0.1335, -0.3487]) tensor([0.4372, 0.1701, 0.2174, 0.1753]) -Greedy action tensor([ 0.7415, -0.3854, -0.0648, -0.3935]) tensor([0.4780, 0.1549, 0.2134, 0.1536]) -Greedy action tensor([-1.8048, -0.4923, 0.6542, -0.0130]) tensor([0.0446, 0.1658, 0.5218, 0.2678]) -Greedy action tensor([-0.9892, -0.5345, 0.2336, 0.1826]) tensor([0.1087, 0.1713, 0.3692, 0.3509]) -Greedy action tensor([-1.5282, -0.0655, 0.6953, 0.5536]) tensor([0.0443, 0.1912, 0.4093, 0.3552]) -Greedy action tensor([-1.7995, -0.4849, 0.5833, -0.0730]) tensor([0.0472, 0.1758, 0.5116, 0.2654]) -Greedy action tensor([-1.8645, -0.4707, 0.6816, -0.0416]) tensor([0.0417, 0.1681, 0.5321, 0.2581]) -Greedy action tensor([-1.2108, -0.3460, 0.5117, 0.7710]) tensor([0.0616, 0.1463, 0.3450, 0.4471]) -Greedy action tensor([-1.5838, -0.5144, 0.4829, 0.1069]) tensor([0.0580, 0.1691, 0.4583, 0.3147]) -Greedy action tensor([-1.5593, -0.5211, 0.4556, 0.1410]) tensor([0.0595, 0.1681, 0.4465, 0.3259]) -Greedy action tensor([-1.3600, -0.6308, 0.5313, 0.2953]) tensor([0.0670, 0.1388, 0.4438, 0.3505]) -Greedy action tensor([-1.4746, -0.5401, 1.4866, 1.1303]) tensor([0.0275, 0.0699, 0.5308, 0.3717]) -Greedy action tensor([-1.9073, -0.6221, 0.9346, 0.2326]) tensor([0.0330, 0.1195, 0.5667, 0.2808]) -Greedy action tensor([-1.2644, -0.5072, 0.7720, 1.0176]) tensor([0.0486, 0.1035, 0.3721, 0.4757]) -Greedy action tensor([-1.4960, -0.0065, 0.3881, 0.0318]) tensor([0.0602, 0.2668, 0.3959, 0.2772]) -Greedy action tensor([-1.2006, -0.6228, 0.2820, 0.2861]) tensor([0.0861, 0.1535, 0.3794, 0.3810]) -Greedy action tensor([-1.4756, -0.6220, 1.4101, 1.2311]) tensor([0.0276, 0.0648, 0.4943, 0.4133]) -Greedy action tensor([-0.1266, 0.0306, 0.8715, 1.7230]) tensor([0.0890, 0.1041, 0.2414, 0.5656]) -Greedy action tensor([-1.3215, -0.3791, 0.5526, 0.6786]) tensor([0.0572, 0.1469, 0.3729, 0.4230]) -Greedy action tensor([-1.6903, -0.3840, 0.5468, 0.0088]) tensor([0.0512, 0.1891, 0.4796, 0.2801]) -Greedy action tensor([-1.2211, -0.5754, 0.4140, -0.0944]) tensor([0.0899, 0.1715, 0.4612, 0.2774]) -Greedy action tensor([-1.9459, -0.4473, 0.6663, -0.1802]) tensor([0.0401, 0.1794, 0.5463, 0.2343]) -Greedy action tensor([-1.2116, -0.5951, 0.6925, 1.0342]) tensor([0.0526, 0.0974, 0.3531, 0.4969]) -Greedy action tensor([-1.6698, -0.4923, 0.5417, 0.0189]) tensor([0.0532, 0.1728, 0.4859, 0.2881]) -Greedy action tensor([-0.4795, -0.2297, 0.9934, 1.5919]) tensor([0.0686, 0.0880, 0.2991, 0.5442]) -Greedy action tensor([-0.8961, -0.4887, 0.9086, 1.4504]) tensor([0.0525, 0.0790, 0.3194, 0.5491]) -Greedy action tensor([-1.0727, -0.8616, 1.0423, 1.4167]) tensor([0.0443, 0.0547, 0.3671, 0.5339]) -Greedy action tensor([-1.7543, -0.9597, 0.1069, -0.4742]) tensor([0.0755, 0.1672, 0.4857, 0.2716]) -Greedy action tensor([-1.4838, -0.5352, 0.6788, 0.4953]) tensor([0.0513, 0.1323, 0.4456, 0.3709]) -Greedy action tensor([-1.9213, -0.4554, 0.6612, -0.1566]) tensor([0.0410, 0.1775, 0.5422, 0.2393]) -Greedy action tensor([-0.7700, -0.3913, -0.3253, -0.3557]) tensor([0.1807, 0.2639, 0.2819, 0.2735]) -Greedy action tensor([-1.2601, -0.4501, 0.2430, 0.2758]) tensor([0.0807, 0.1815, 0.3629, 0.3750]) -Greedy action tensor([ 0.1084, -0.0751, 0.9636, 1.6896]) tensor([0.1106, 0.0920, 0.2600, 0.5374]) -Greedy action tensor([-0.6445, -0.4038, 0.2500, 0.3265]) tensor([0.1359, 0.1729, 0.3324, 0.3588]) -Greedy action tensor([-1.4381, -0.5742, 0.4062, 0.0876]) tensor([0.0700, 0.1660, 0.4424, 0.3217]) -Greedy action tensor([-1.4834, -0.6121, 0.6727, 0.6248]) tensor([0.0494, 0.1180, 0.4263, 0.4064]) -Greedy action tensor([-0.9359, -0.5363, 0.3041, 0.8000]) tensor([0.0861, 0.1283, 0.2974, 0.4883]) -Greedy action tensor([-0.8205, -0.3475, 0.3821, 1.1891]) tensor([0.0747, 0.1198, 0.2485, 0.5570]) -Greedy action tensor([-1.8394, -0.4691, 0.6292, -0.0664]) tensor([0.0442, 0.1739, 0.5217, 0.2602]) -Greedy action tensor([-1.8308, -0.4565, 0.6127, -0.1258]) tensor([0.0455, 0.1799, 0.5241, 0.2504]) -Greedy action tensor([-1.7218, -0.9947, -0.0178, -0.7799]) tensor([0.0899, 0.1859, 0.4938, 0.2304]) -Greedy action tensor([-1.2202, -0.1510, 0.4630, -0.6470]) tensor([0.0903, 0.2631, 0.4863, 0.1603]) -Greedy action tensor([-1.8582, -0.4295, 0.6184, -0.1317]) tensor([0.0441, 0.1839, 0.5244, 0.2477]) -Greedy action tensor([-0.9940, -0.1214, 0.1265, 0.4564]) tensor([0.0932, 0.2231, 0.2859, 0.3977]) -Greedy action tensor([-1.0650, -0.4529, 0.3992, 0.9247]) tensor([0.0691, 0.1273, 0.2986, 0.5050]) -Greedy action tensor([-1.0213, -0.5682, 1.0542, 1.4093]) tensor([0.0456, 0.0718, 0.3637, 0.5188]) -Greedy action tensor([-1.1786, -0.5820, 0.2931, 0.3050]) tensor([0.0863, 0.1568, 0.3762, 0.3807]) -Greedy action tensor([-1.6081, -0.1994, 0.4370, 0.0595]) tensor([0.0552, 0.2257, 0.4266, 0.2925]) -Greedy action tensor([-1.1449, -0.5933, 0.2525, 0.2614]) tensor([0.0921, 0.1598, 0.3724, 0.3757]) -Greedy action tensor([-1.9498, -0.7582, 0.7682, 0.1530]) tensor([0.0362, 0.1192, 0.5483, 0.2964]) -Greedy action tensor([-1.8059, -0.0226, 0.5164, -0.0816]) tensor([0.0439, 0.2614, 0.4482, 0.2464]) -Greedy action tensor([-0.9043, -0.6015, 0.9847, 1.5310]) tensor([0.0491, 0.0664, 0.3244, 0.5602]) -Greedy action tensor([-1.1637, -0.6119, 0.2497, 0.2416]) tensor([0.0915, 0.1590, 0.3763, 0.3732]) -Greedy action tensor([-1.8142, -0.6762, 0.1199, -0.2483]) tensor([0.0632, 0.1972, 0.4371, 0.3025]) -Greedy action tensor([-1.3091, -0.5724, 1.0132, 1.2349]) tensor([0.0384, 0.0803, 0.3920, 0.4893]) -Greedy action tensor([-1.9017, -0.9528, 0.1443, -0.3313]) tensor([0.0620, 0.1601, 0.4797, 0.2981]) -Greedy action tensor([-1.5750, -0.4009, 0.6396, 0.4133]) tensor([0.0483, 0.1563, 0.4425, 0.3529]) -Greedy action tensor([-1.7510, -0.5126, 0.7803, 0.2093]) tensor([0.0415, 0.1430, 0.5211, 0.2944]) -Greedy action tensor([-1.4293, -0.2446, 0.8923, 0.9333]) tensor([0.0399, 0.1304, 0.4064, 0.4234]) -Greedy action tensor([-1.2342, -0.4919, 1.2565, 1.3096]) tensor([0.0358, 0.0753, 0.4326, 0.4562]) -Greedy action tensor([-1.6574, -0.6830, -0.0046, -0.3920]) tensor([0.0805, 0.2134, 0.4206, 0.2855]) -Greedy action tensor([-1.2286, -0.5229, 0.5906, -0.4343]) tensor([0.0877, 0.1776, 0.5407, 0.1940]) -Greedy action tensor([-1.3385, -0.4972, 0.3251, 0.1934]) tensor([0.0756, 0.1754, 0.3991, 0.3499]) -Greedy action tensor([-1.6490, -0.5012, 0.5304, -0.0057]) tensor([0.0551, 0.1735, 0.4867, 0.2847]) -Greedy action tensor([-1.2944, 0.7038, 0.2872, -0.1712]) tensor([0.0613, 0.4522, 0.2981, 0.1885]) -Greedy action tensor([-0.9258, -0.3861, 0.3987, 1.0447]) tensor([0.0733, 0.1257, 0.2755, 0.5256]) -Greedy action tensor([-1.6153, -0.5013, 0.5391, 0.1048]) tensor([0.0548, 0.1669, 0.4724, 0.3060]) -Greedy action tensor([-0.9950, -0.5494, 0.4713, 1.0444]) tensor([0.0686, 0.1071, 0.2972, 0.5271]) -Greedy action tensor([-0.5692, -0.5426, 0.2310, -0.1095]) tensor([0.1713, 0.1760, 0.3814, 0.2713]) -Greedy action tensor([-1.3340, -0.5359, 0.3352, 0.3451]) tensor([0.0720, 0.1599, 0.3822, 0.3859]) -Greedy action tensor([-1.2774, -0.5332, 0.3342, 0.4579]) tensor([0.0725, 0.1527, 0.3635, 0.4113]) -Greedy action tensor([-1.3375, -0.5565, 0.6148, 0.6956]) tensor([0.0560, 0.1222, 0.3943, 0.4275]) -Greedy action tensor([-1.3832, -0.8187, -0.1635, -0.3889]) tensor([0.1130, 0.1988, 0.3827, 0.3055]) -Greedy action tensor([-1.5716, -0.3788, 0.7588, 0.6049]) tensor([0.0427, 0.1409, 0.4395, 0.3768]) -Greedy action tensor([-1.3338, -0.6329, 1.1932, 1.2738]) tensor([0.0344, 0.0693, 0.4301, 0.4662]) -Greedy action tensor([-1.8606, -0.6444, 1.3942, 0.7849]) tensor([0.0225, 0.0760, 0.5839, 0.3175]) -Greedy action tensor([-1.3830, -0.6222, 0.4698, -0.3555]) tensor([0.0812, 0.1738, 0.5180, 0.2269]) -Greedy action tensor([-1.8173, -0.4705, 0.6393, -0.0129]) tensor([0.0443, 0.1702, 0.5165, 0.2690]) -Greedy action tensor([-1.8111, -0.4801, 0.5881, -0.0786]) tensor([0.0466, 0.1764, 0.5134, 0.2636]) -Greedy action tensor([-1.3170, -0.5242, 0.9867, 1.0104]) tensor([0.0426, 0.0941, 0.4265, 0.4367]) -Greedy action tensor([-0.5652, -0.6327, 0.8863, 1.5923]) tensor([0.0673, 0.0629, 0.2874, 0.5823]) -Greedy action tensor([-1.9002, -0.7324, 0.7651, 0.2233]) tensor([0.0371, 0.1193, 0.5333, 0.3102]) -Greedy action tensor([-1.9200, -0.4457, 0.6553, -0.1577]) tensor([0.0411, 0.1795, 0.5399, 0.2395]) -Greedy action tensor([ 0.4469, 0.0058, 0.0997, -0.2108]) tensor([0.3487, 0.2243, 0.2464, 0.1806]) -Greedy action tensor([ 0.1664, -0.1395, -0.0557, -0.4828]) tensor([0.3268, 0.2407, 0.2617, 0.1708]) -Greedy action tensor([ 0.4360, -0.2783, 0.0146, -0.5219]) tensor([0.3954, 0.1935, 0.2594, 0.1517]) -Greedy action tensor([ 0.5096, -0.1041, -0.1848, -0.2266]) tensor([0.3969, 0.2148, 0.1982, 0.1901]) -Greedy action tensor([ 0.7256, -0.2563, -0.0152, -0.2622]) tensor([0.4497, 0.1685, 0.2144, 0.1675]) -Greedy action tensor([ 0.5129, -0.1341, 0.0302, -0.2904]) tensor([0.3863, 0.2023, 0.2384, 0.1730]) -Greedy action tensor([ 0.7544, -0.2141, 0.0901, -0.3111]) tensor([0.4467, 0.1696, 0.2299, 0.1539]) -Greedy action tensor([ 0.3328, 0.0515, -0.0812, -0.2939]) tensor([0.3390, 0.2559, 0.2241, 0.1811]) -Greedy action tensor([ 0.6608, -0.0136, 0.0614, -0.1838]) tensor([0.4019, 0.2047, 0.2207, 0.1727]) -Greedy action tensor([ 0.4736, -0.2985, -0.0732, -0.1484]) tensor([0.3879, 0.1792, 0.2245, 0.2083]) -Greedy action tensor([ 0.5428, -0.2210, -0.0400, -0.1078]) tensor([0.3928, 0.1830, 0.2193, 0.2049]) -Greedy action tensor([ 1.0791, -0.7652, -0.1751, -0.7104]) tensor([0.6209, 0.0982, 0.1772, 0.1037]) -Greedy action tensor([ 0.7594, -0.3977, -0.1015, -0.5458]) tensor([0.4979, 0.1565, 0.2105, 0.1350]) -Greedy action tensor([ 0.3671, 0.0953, -0.0278, -0.0380]) tensor([0.3223, 0.2456, 0.2172, 0.2149]) -Greedy action tensor([ 0.1960, -0.0828, -0.1128, -0.3668]) tensor([0.3267, 0.2472, 0.2399, 0.1861]) -Greedy action tensor([ 0.4205, -0.1283, 0.0169, -0.6174]) tensor([0.3846, 0.2222, 0.2569, 0.1362]) -Greedy action tensor([ 0.6793, -0.2648, 0.0094, -0.4114]) tensor([0.4471, 0.1739, 0.2288, 0.1502]) -Greedy action tensor([ 0.4331, -0.0465, -0.0077, -0.1709]) tensor([0.3560, 0.2204, 0.2291, 0.1946]) -Greedy action tensor([ 0.7285, -0.5115, -0.1913, -0.5794]) tensor([0.5106, 0.1478, 0.2035, 0.1381]) -Greedy action tensor([ 0.7122, -0.3414, -0.0180, -0.3330]) tensor([0.4583, 0.1598, 0.2208, 0.1611]) -Greedy action tensor([ 1.0226, -0.2145, -0.0510, -0.2918]) tensor([0.5261, 0.1527, 0.1798, 0.1413]) -Greedy action tensor([ 0.8383, -0.5796, -0.0946, -0.4406]) tensor([0.5225, 0.1266, 0.2055, 0.1454]) -Greedy action tensor([ 0.5304, -0.2708, 0.0027, -0.3864]) tensor([0.4101, 0.1840, 0.2419, 0.1639]) -Greedy action tensor([ 0.5083, -0.2580, 0.0386, -0.4140]) tensor([0.4020, 0.1868, 0.2513, 0.1598]) -Greedy action tensor([ 0.5726, -0.0506, -0.0941, -0.2043]) tensor([0.3985, 0.2137, 0.2046, 0.1832]) -Greedy action tensor([ 0.7041, -0.2000, -0.0928, -0.3733]) tensor([0.4553, 0.1844, 0.2052, 0.1550]) -Greedy action tensor([ 1.1330, -0.9397, 0.0099, -0.5626]) tensor([0.6118, 0.0770, 0.1990, 0.1123]) -Greedy action tensor([ 0.5541, -0.0722, -0.0354, -0.3226]) tensor([0.3991, 0.2134, 0.2214, 0.1661]) -Greedy action tensor([ 0.1672, 0.3072, -0.0840, -0.2111]) tensor([0.2768, 0.3183, 0.2153, 0.1896]) -Greedy action tensor([ 0.7326, -0.4177, -0.0763, -0.5224]) tensor([0.4885, 0.1546, 0.2176, 0.1393]) -Greedy action tensor([ 0.2391, -0.0255, 0.0468, -0.2242]) tensor([0.3104, 0.2382, 0.2561, 0.1953]) -Greedy action tensor([ 0.6936, -0.3313, -0.0441, -0.1762]) tensor([0.4432, 0.1591, 0.2120, 0.1857]) -Greedy action tensor([ 0.3910, -0.0994, -0.0305, -0.2881]) tensor([0.3603, 0.2206, 0.2364, 0.1827]) -Greedy action tensor([ 0.6227, -0.4600, 0.1371, -0.4574]) tensor([0.4360, 0.1477, 0.2683, 0.1480]) -Greedy action tensor([ 0.2683, 0.1856, 0.0812, -0.5067]) tensor([0.3115, 0.2867, 0.2583, 0.1435]) -Greedy action tensor([ 0.4236, -0.0427, -0.0029, -0.2633]) tensor([0.3593, 0.2254, 0.2345, 0.1808]) -Greedy action tensor([ 0.4917, -0.0668, 0.0077, -0.1414]) tensor([0.3677, 0.2104, 0.2266, 0.1952]) -Greedy action tensor([ 0.7865, -0.3867, -0.0186, -0.6054]) tensor([0.4987, 0.1543, 0.2230, 0.1240]) -Greedy action tensor([ 0.4062, -0.0706, -0.0729, -0.2410]) tensor([0.3618, 0.2246, 0.2241, 0.1894]) -Greedy action tensor([ 1.0276, -0.5235, 0.0440, -0.6553]) tensor([0.5644, 0.1197, 0.2111, 0.1049]) -Greedy action tensor([ 0.5693, -0.1627, 0.0146, -0.2277]) tensor([0.3991, 0.1919, 0.2292, 0.1798]) -Greedy action tensor([ 0.8966, -0.5694, -0.0385, -0.4464]) tensor([0.5307, 0.1225, 0.2083, 0.1385]) -Greedy action tensor([ 0.7574, -0.2469, 0.0494, -0.4534]) tensor([0.4636, 0.1698, 0.2284, 0.1381]) -Greedy action tensor([ 0.2494, -0.2574, 0.2208, -0.3476]) tensor([0.3200, 0.1928, 0.3110, 0.1762]) -Greedy action tensor([ 0.7627, -0.5919, -0.0582, -0.5785]) tensor([0.5103, 0.1317, 0.2246, 0.1335]) -Greedy action tensor([ 0.6248, -0.4154, -0.0605, -0.4384]) tensor([0.4540, 0.1604, 0.2288, 0.1568]) -Greedy action tensor([ 0.7514, -0.5287, 0.0363, -0.3734]) tensor([0.4780, 0.1329, 0.2338, 0.1552]) -Greedy action tensor([ 0.4162, 0.0571, -0.0343, -0.1696]) tensor([0.3457, 0.2414, 0.2204, 0.1925]) -Greedy action tensor([ 0.2901, -0.1013, -0.1558, -0.2864]) tensor([0.3474, 0.2349, 0.2224, 0.1952]) -Greedy action tensor([ 0.8757, -0.4199, 0.0232, -0.5357]) tensor([0.5144, 0.1408, 0.2193, 0.1254]) -Greedy action tensor([ 0.4471, -0.1239, 0.0618, -0.2986]) tensor([0.3677, 0.2077, 0.2501, 0.1744]) -Greedy action tensor([ 0.8100, -0.3982, -0.0934, -0.5862]) tensor([0.5124, 0.1531, 0.2076, 0.1268]) -Greedy action tensor([ 0.3089, 0.1915, 0.0120, -0.1392]) tensor([0.3057, 0.2718, 0.2272, 0.1953]) -Greedy action tensor([ 0.5583, 0.1522, 0.0251, -0.3735]) tensor([0.3778, 0.2517, 0.2217, 0.1488]) -Greedy action tensor([ 0.3917, -0.0046, 0.1330, -0.2842]) tensor([0.3386, 0.2278, 0.2614, 0.1722]) -Greedy action tensor([ 0.4101, -0.0483, -0.0533, -0.2072]) tensor([0.3570, 0.2257, 0.2246, 0.1926]) -Greedy action tensor([ 0.9499, -0.8092, -0.0080, -0.6367]) tensor([0.5680, 0.0978, 0.2179, 0.1162]) -Greedy action tensor([ 0.4704, -0.2003, -0.0103, -0.2939]) tensor([0.3853, 0.1970, 0.2382, 0.1794]) -Greedy action tensor([ 0.4598, 0.2395, -0.1332, -0.2580]) tensor([0.3518, 0.2822, 0.1944, 0.1716]) -Greedy action tensor([ 0.5503, -0.1594, -0.0447, -0.3264]) tensor([0.4066, 0.1999, 0.2243, 0.1692]) -Greedy action tensor([ 0.7022, -0.5141, -0.1698, -0.5350]) tensor([0.4989, 0.1478, 0.2086, 0.1448]) -Greedy action tensor([ 0.5786, -0.2925, -0.0809, -0.3314]) tensor([0.4277, 0.1790, 0.2212, 0.1722]) -Greedy action tensor([ 0.4518, 0.1749, -0.0530, -0.0666]) tensor([0.3382, 0.2564, 0.2041, 0.2014]) -Greedy action tensor([ 0.8961, -0.6311, -0.0243, -0.4251]) tensor([0.5313, 0.1154, 0.2116, 0.1417]) -Greedy action tensor([ 0.5495, -0.3572, 0.0421, -0.2487]) tensor([0.4072, 0.1644, 0.2451, 0.1833]) -Greedy action tensor([ 0.6269, -0.4504, -0.0804, -0.4130]) tensor([0.4573, 0.1557, 0.2254, 0.1616]) -Greedy action tensor([ 0.7803, -0.6359, -0.0792, -0.6359]) tensor([0.5239, 0.1271, 0.2218, 0.1271]) -Greedy action tensor([ 0.5402, -0.3854, 0.0078, -0.5285]) tensor([0.4298, 0.1703, 0.2523, 0.1476]) -Greedy action tensor([ 0.6434, -0.2262, 0.1044, -0.4372]) tensor([0.4270, 0.1790, 0.2491, 0.1449]) -Greedy action tensor([ 0.9949, -0.5247, 0.0416, -0.5223]) tensor([0.5484, 0.1200, 0.2114, 0.1203]) -Greedy action tensor([ 0.3321, -0.0276, -0.0269, -0.1964]) tensor([0.3349, 0.2337, 0.2339, 0.1974]) -Greedy action tensor([ 0.8306, -0.4623, -0.0719, -0.3033]) tensor([0.4995, 0.1371, 0.2026, 0.1608]) -Greedy action tensor([ 0.6369, -0.1749, -0.0504, -0.1549]) tensor([0.4167, 0.1850, 0.2096, 0.1888]) -Greedy action tensor([ 0.8776, -0.4936, 0.0023, -0.5627]) tensor([0.5243, 0.1331, 0.2185, 0.1242]) -Greedy action tensor([ 0.4677, -0.1697, -0.0602, -0.2498]) tensor([0.3837, 0.2028, 0.2263, 0.1872]) -Greedy action tensor([ 0.5798, -0.2743, -0.1759, -0.2115]) tensor([0.4258, 0.1812, 0.2000, 0.1930]) -Greedy action tensor([ 0.8171, -0.6139, -0.0909, -0.5042]) tensor([0.5238, 0.1252, 0.2113, 0.1397]) -Greedy action tensor([ 0.3488, -0.0653, -0.0570, -0.3011]) tensor([0.3509, 0.2320, 0.2339, 0.1832]) -Greedy action tensor([ 0.5372, -0.2849, -0.0196, -0.4533]) tensor([0.4195, 0.1844, 0.2404, 0.1558]) -Greedy action tensor([ 0.4946, -0.2253, -0.0291, -0.5559]) tensor([0.4117, 0.2004, 0.2439, 0.1440]) -Greedy action tensor([ 0.7419, -0.5732, 0.3256, -0.8224]) tensor([0.4679, 0.1256, 0.3086, 0.0979]) -Greedy action tensor([ 0.5992, -0.2731, -0.1818, 0.1449]) tensor([0.3983, 0.1665, 0.1824, 0.2529]) -Greedy action tensor([ 1.7088, -0.5143, -0.2646, 0.2116]) tensor([0.6798, 0.0736, 0.0945, 0.1521]) -Greedy action tensor([ 0.9944, -0.1784, -0.5146, -0.0993]) tensor([0.5360, 0.1659, 0.1185, 0.1796]) -Greedy action tensor([ 1.3573, -0.7771, -0.0166, 0.3964]) tensor([0.5701, 0.0675, 0.1443, 0.2181]) -Greedy action tensor([ 1.1821, -0.3725, -0.1376, 0.1803]) tensor([0.5418, 0.1145, 0.1448, 0.1990]) -Greedy action tensor([ 1.3407, -0.2507, -0.3294, 0.0574]) tensor([0.5992, 0.1220, 0.1128, 0.1660]) -Greedy action tensor([ 1.9638, -0.8774, -0.4660, 0.5027]) tensor([0.7255, 0.0423, 0.0639, 0.1683]) -Greedy action tensor([ 1.5967, -0.7221, -0.2225, 0.5253]) tensor([0.6238, 0.0614, 0.1012, 0.2137]) -Greedy action tensor([ 1.4305, -0.9012, -0.2172, 0.7871]) tensor([0.5509, 0.0535, 0.1061, 0.2895]) -Greedy action tensor([ 1.0164, -0.3485, -0.4598, 0.4963]) tensor([0.4811, 0.1229, 0.1099, 0.2860]) -Greedy action tensor([ 1.1927, -0.3013, -0.2776, 0.0739]) tensor([0.5615, 0.1260, 0.1291, 0.1834]) -Greedy action tensor([ 1.4563, -0.7450, -0.3504, 0.2874]) tensor([0.6307, 0.0698, 0.1036, 0.1960]) -Greedy action tensor([ 1.2329, -0.4918, -0.2706, 0.0145]) tensor([0.5895, 0.1051, 0.1311, 0.1743]) -Greedy action tensor([ 1.4536, -0.3023, -0.2742, 0.0305]) tensor([0.6284, 0.1086, 0.1116, 0.1514]) -Greedy action tensor([ 0.8114, -0.3871, -0.0356, -0.1161]) tensor([0.4704, 0.1419, 0.2017, 0.1861]) -Greedy action tensor([ 0.8984, -0.5624, -0.3824, 0.5409]) tensor([0.4526, 0.1050, 0.1257, 0.3166]) -Greedy action tensor([ 0.9386, 0.0613, -0.0103, -0.4539]) tensor([0.4874, 0.2027, 0.1887, 0.1211]) -Greedy action tensor([ 2.0168, -0.2897, -0.5991, -0.0696]) tensor([0.7711, 0.0768, 0.0564, 0.0957]) -Greedy action tensor([ 0.6649, -0.2021, -0.6135, 0.3992]) tensor([0.4056, 0.1704, 0.1130, 0.3110]) -Greedy action tensor([ 1.3652, -0.2915, -0.2228, 0.1131]) tensor([0.5949, 0.1135, 0.1216, 0.1701]) -Greedy action tensor([ 1.1237, -0.4301, -0.1980, -0.1842]) tensor([0.5719, 0.1209, 0.1525, 0.1546]) -Greedy action tensor([ 1.2002, -0.6948, -0.4401, 0.5389]) tensor([0.5375, 0.0808, 0.1042, 0.2775]) -Greedy action tensor([ 1.3585, -0.5085, -0.2586, 0.3529]) tensor([0.5818, 0.0899, 0.1155, 0.2128]) -Greedy action tensor([ 0.5639, -0.4493, -0.3718, 0.0666]) tensor([0.4231, 0.1536, 0.1660, 0.2573]) -Greedy action tensor([ 1.0722, 0.0696, -0.1205, -0.0131]) tensor([0.4980, 0.1827, 0.1511, 0.1682]) -Greedy action tensor([ 0.6764, -0.3422, -0.2412, 0.0878]) tensor([0.4318, 0.1559, 0.1725, 0.2397]) -Greedy action tensor([ 1.0230, -0.3765, -0.0524, -0.3495]) tensor([0.5431, 0.1340, 0.1853, 0.1377]) -Greedy action tensor([ 1.6062, -0.8025, -0.3098, 0.3635]) tensor([0.6554, 0.0589, 0.0965, 0.1892]) -Greedy action tensor([ 1.1408, -0.6166, -0.5362, 0.0532]) tensor([0.5895, 0.1017, 0.1102, 0.1987]) -Greedy action tensor([ 0.6649, -0.4634, -0.3260, 0.3377]) tensor([0.4140, 0.1340, 0.1537, 0.2984]) -Greedy action tensor([ 1.2802, -0.4934, -0.1680, 0.1302]) tensor([0.5809, 0.0986, 0.1365, 0.1839]) -Greedy action tensor([ 0.6880, 0.2130, 0.0560, -0.4125]) tensor([0.4022, 0.2501, 0.2138, 0.1338]) -Greedy action tensor([ 0.5252, -0.2827, -0.1778, 0.1082]) tensor([0.3846, 0.1715, 0.1904, 0.2535]) -Greedy action tensor([ 1.1885, -0.5350, -0.2400, -0.1456]) tensor([0.5947, 0.1061, 0.1425, 0.1566]) -Greedy action tensor([ 1.1976, -0.4932, -0.2095, 0.5174]) tensor([0.5166, 0.0952, 0.1265, 0.2617]) -Greedy action tensor([ 1.0629, -0.1138, -0.4576, -0.2680]) tensor([0.5583, 0.1721, 0.1221, 0.1475]) -Greedy action tensor([ 1.2518, -0.6701, -0.3356, 0.5069]) tensor([0.5478, 0.0802, 0.1120, 0.2601]) -Greedy action tensor([ 0.6866, -0.1304, -0.1817, -0.0065]) tensor([0.4235, 0.1871, 0.1777, 0.2117]) -Greedy action tensor([ 1.1852, -0.6192, -0.0634, 0.3329]) tensor([0.5325, 0.0876, 0.1528, 0.2271]) -Greedy action tensor([ 1.8226, -0.6072, -0.0173, 0.3008]) tensor([0.6825, 0.0601, 0.1084, 0.1490]) -Greedy action tensor([ 0.9635, -0.5985, -0.3866, 0.5295]) tensor([0.4724, 0.0991, 0.1224, 0.3061]) -Greedy action tensor([ 1.1314, -0.2409, -0.4375, -0.2226]) tensor([0.5814, 0.1474, 0.1211, 0.1501]) -Greedy action tensor([ 1.2228, -0.6113, -0.3396, 0.2841]) tensor([0.5680, 0.0907, 0.1191, 0.2222]) -Greedy action tensor([ 1.1620, -0.7433, -0.3605, 0.6818]) tensor([0.5036, 0.0749, 0.1099, 0.3116]) -Greedy action tensor([ 1.3904, -0.3495, -0.1991, 0.2803]) tensor([0.5851, 0.1027, 0.1194, 0.1928]) -Greedy action tensor([ 0.6292, -0.1902, -0.1042, -0.2220]) tensor([0.4259, 0.1877, 0.2046, 0.1818]) -Greedy action tensor([ 0.9022, -0.4405, -0.3718, 0.1945]) tensor([0.4917, 0.1284, 0.1375, 0.2423]) -Greedy action tensor([ 0.7484, -0.1909, -0.3951, -0.1129]) tensor([0.4690, 0.1833, 0.1495, 0.1982]) -Greedy action tensor([ 1.3091, -0.7890, -0.0515, -0.0568]) tensor([0.6119, 0.0751, 0.1569, 0.1561]) -Greedy action tensor([ 1.1687, -0.5720, -0.1456, 0.1273]) tensor([0.5565, 0.0976, 0.1495, 0.1964]) -Greedy action tensor([ 1.0997, 0.0526, -0.3558, -0.1594]) tensor([0.5353, 0.1879, 0.1249, 0.1520]) -Greedy action tensor([ 1.2840, -0.5058, -0.2217, 0.5424]) tensor([0.5361, 0.0895, 0.1189, 0.2554]) -Greedy action tensor([ 1.6778, -0.6176, -0.2344, 0.1197]) tensor([0.6854, 0.0690, 0.1013, 0.1443]) -Greedy action tensor([ 1.0348, -0.2797, -0.2392, 0.2647]) tensor([0.4972, 0.1336, 0.1391, 0.2302]) -Greedy action tensor([ 0.5550, -0.1642, 0.0328, -0.1545]) tensor([0.3888, 0.1894, 0.2306, 0.1912]) -Greedy action tensor([ 0.9613, -0.2836, -0.1118, -0.1058]) tensor([0.5066, 0.1459, 0.1732, 0.1743]) -Greedy action tensor([ 0.8545, -0.5080, 0.0311, 0.0762]) tensor([0.4642, 0.1188, 0.2038, 0.2132]) -Greedy action tensor([ 0.9726, -0.2053, -0.1064, -0.4072]) tensor([0.5265, 0.1621, 0.1790, 0.1325]) -Greedy action tensor([ 1.0880, -0.5365, -0.2871, -0.1403]) tensor([0.5738, 0.1131, 0.1451, 0.1680]) -Greedy action tensor([ 1.0249, -0.2178, 0.0213, 0.0324]) tensor([0.4936, 0.1425, 0.1809, 0.1830]) -Greedy action tensor([ 1.1095, -0.3992, -0.3331, 0.2662]) tensor([0.5297, 0.1172, 0.1252, 0.2279]) -Greedy action tensor([ 0.6481, -0.0185, 0.1216, -0.2046]) tensor([0.3952, 0.2029, 0.2334, 0.1685]) -Greedy action tensor([ 1.0772, -0.5824, -0.3660, 0.8380]) tensor([0.4517, 0.0859, 0.1067, 0.3556]) -Greedy action tensor([ 1.5238, -1.0649, -0.1671, 0.4048]) tensor([0.6305, 0.0474, 0.1162, 0.2059]) -Greedy action tensor([ 1.0651, -0.5536, -0.1658, 0.1603]) tensor([0.5278, 0.1046, 0.1541, 0.2135]) -Greedy action tensor([ 1.2154, -0.6667, -0.1927, 0.3198]) tensor([0.5539, 0.0843, 0.1355, 0.2262]) -Greedy action tensor([ 1.2536, -0.6942, -0.2448, 0.4150]) tensor([0.5561, 0.0793, 0.1243, 0.2404]) -Greedy action tensor([ 0.6753, -0.3888, -0.5800, 0.3642]) tensor([0.4232, 0.1460, 0.1206, 0.3101]) -Greedy action tensor([ 1.3002, -0.4101, -0.0848, 0.2595]) tensor([0.5604, 0.1013, 0.1403, 0.1979]) -Greedy action tensor([ 1.1936, -0.5327, -0.2559, 0.2359]) tensor([0.5567, 0.0990, 0.1306, 0.2136]) -Greedy action tensor([ 0.9837, -0.6488, -0.2410, 0.5952]) tensor([0.4614, 0.0902, 0.1356, 0.3129]) -Greedy action tensor([ 0.9194, -0.7024, -0.3202, 0.4816]) tensor([0.4689, 0.0926, 0.1358, 0.3027]) -Greedy action tensor([ 0.9077, -0.5597, -0.5573, 0.6518]) tensor([0.4473, 0.1031, 0.1034, 0.3463]) -Greedy action tensor([ 1.1810, -0.0776, -0.0693, -0.2126]) tensor([0.5499, 0.1562, 0.1575, 0.1365]) -Greedy action tensor([ 1.6878, -0.7868, -0.2134, 0.3385]) tensor([0.6698, 0.0564, 0.1001, 0.1738]) -Greedy action tensor([ 0.7403, -0.6577, -0.0443, -0.0749]) tensor([0.4660, 0.1151, 0.2126, 0.2062]) -Greedy action tensor([ 0.9956, 0.1325, 0.0383, -0.0931]) tensor([0.4668, 0.1969, 0.1792, 0.1571]) -Greedy action tensor([ 1.1679, -0.4519, -0.1985, 0.5876]) tensor([0.4968, 0.0983, 0.1267, 0.2781]) -Greedy action tensor([ 1.2855, -0.3108, -0.2306, 0.4975]) tensor([0.5328, 0.1080, 0.1170, 0.2423]) -Greedy action tensor([ 0.3135, -0.3515, -0.2228, 0.3425]) tensor([0.3196, 0.1644, 0.1869, 0.3290]) -Greedy action tensor([ 0.7679, -0.2887, -0.4237, 0.5113]) tensor([0.4124, 0.1433, 0.1252, 0.3190]) -Greedy action tensor([ 0.2457, -0.0188, 0.7815, 0.0452]) tensor([0.2328, 0.1787, 0.3979, 0.1905]) -Greedy action tensor([ 1.1513, -0.9973, 1.6133, 0.0203]) tensor([0.3304, 0.0385, 0.5244, 0.1066]) -Greedy action tensor([ 0.3218, -1.5456, 0.1953, -0.0196]) tensor([0.3641, 0.0563, 0.3208, 0.2588]) -Greedy action tensor([1.4773, 0.0774, 0.2428, 0.4259]) tensor([0.5299, 0.1307, 0.1542, 0.1852]) -Greedy action tensor([ 0.9951, -0.9881, 0.6279, 1.2233]) tensor([0.3240, 0.0446, 0.2244, 0.4070]) -Greedy action tensor([ 0.4452, 0.5863, -0.4010, 0.7158]) tensor([0.2570, 0.2959, 0.1103, 0.3368]) -Greedy action tensor([ 0.2312, -1.4749, -0.0621, 0.0905]) tensor([0.3576, 0.0649, 0.2667, 0.3107]) -Greedy action tensor([ 1.2911, -0.3979, 1.3190, -0.4853]) tensor([0.4198, 0.0775, 0.4317, 0.0710]) -Greedy action tensor([ 1.8433, -0.8331, 1.1524, -0.1005]) tensor([0.5837, 0.0402, 0.2925, 0.0836]) -Greedy action tensor([ 0.6881, 0.1185, 0.8703, -0.7661]) tensor([0.3334, 0.1886, 0.4001, 0.0779]) -Greedy action tensor([-0.5738, -0.1213, 0.4379, -0.3482]) tensor([0.1521, 0.2391, 0.4183, 0.1906]) -Greedy action tensor([ 1.2000, -0.5237, 1.4502, 0.6040]) tensor([0.3318, 0.0592, 0.4262, 0.1828]) -Greedy action tensor([ 1.9322, -1.9576, -0.5583, 1.1604]) tensor([0.6388, 0.0131, 0.0529, 0.2952]) -Greedy action tensor([ 0.7930, 0.2630, -0.8503, 1.4740]) tensor([0.2661, 0.1566, 0.0515, 0.5258]) -Greedy action tensor([ 0.1757, -0.2038, -0.5329, 0.9324]) tensor([0.2321, 0.1588, 0.1143, 0.4947]) -Greedy action tensor([ 1.6761, -0.2475, 0.4350, 1.6879]) tensor([0.4087, 0.0597, 0.1181, 0.4135]) -Greedy action tensor([ 0.8613, 0.2114, 1.7236, -0.6136]) tensor([0.2427, 0.1267, 0.5750, 0.0555]) -Greedy action tensor([ 0.2984, 0.3338, -0.5621, 1.4679]) tensor([0.1761, 0.1824, 0.0745, 0.5670]) -Greedy action tensor([ 0.7615, -1.0259, 0.5224, 1.7036]) tensor([0.2212, 0.0370, 0.1742, 0.5676]) -Greedy action tensor([ 0.6779, -0.4737, -0.5274, 0.7535]) tensor([0.3712, 0.1173, 0.1112, 0.4003]) -Greedy action tensor([-0.4303, -1.2795, 0.3092, 0.7812]) tensor([0.1453, 0.0622, 0.3044, 0.4881]) -Greedy action tensor([ 0.5519, -1.7866, 0.3887, -0.1026]) tensor([0.4056, 0.0391, 0.3445, 0.2108]) -Greedy action tensor([ 0.5284, -0.3382, 1.3988, 0.0721]) tensor([0.2251, 0.0946, 0.5376, 0.1426]) -Greedy action tensor([ 1.2020, -0.6616, 0.1810, 0.8913]) tensor([0.4448, 0.0690, 0.1602, 0.3260]) -Greedy action tensor([ 0.9514, -0.9160, 0.1916, 0.7223]) tensor([0.4136, 0.0639, 0.1935, 0.3289]) -Greedy action tensor([ 1.4657, -0.9375, 0.9074, 1.5420]) tensor([0.3647, 0.0330, 0.2087, 0.3936]) -Greedy action tensor([1.3622, 0.3320, 0.8703, 0.0779]) tensor([0.4454, 0.1590, 0.2723, 0.1233]) -Greedy action tensor([ 0.9725, -0.5773, 0.8923, 0.8429]) tensor([0.3318, 0.0704, 0.3062, 0.2915]) -Greedy action tensor([ 1.2249, -0.7081, 1.5780, 0.5501]) tensor([0.3249, 0.0470, 0.4625, 0.1655]) -Greedy action tensor([ 0.1796, 0.4271, 0.2396, -1.0668]) tensor([0.2755, 0.3528, 0.2925, 0.0792]) -Greedy action tensor([ 1.1038, 0.4013, -0.5861, 0.6615]) tensor([0.4306, 0.2133, 0.0795, 0.2767]) -Greedy action tensor([-0.1413, 0.5412, -0.5450, 0.9127]) tensor([0.1535, 0.3037, 0.1025, 0.4403]) -Greedy action tensor([ 1.5154, -0.9479, 1.3773, 0.9621]) tensor([0.3951, 0.0336, 0.3441, 0.2272]) -Greedy action tensor([-1.0571, -1.1321, -0.9380, -0.9427]) tensor([0.2395, 0.2222, 0.2698, 0.2685]) -Greedy action tensor([ 0.8690, -1.0677, 1.6705, 0.4255]) tensor([0.2491, 0.0359, 0.5552, 0.1598]) -Greedy action tensor([ 1.3282, 1.1134, -1.0921, 0.1705]) tensor([0.4525, 0.3651, 0.0402, 0.1422]) -Greedy action tensor([ 1.4410, -0.0672, 0.6165, 1.2938]) tensor([0.3964, 0.0877, 0.1738, 0.3421]) -Greedy action tensor([ 0.8912, -0.0676, 1.0177, 0.5356]) tensor([0.3107, 0.1191, 0.3526, 0.2177]) -Greedy action tensor([ 1.2403, -1.2523, -0.4252, 1.4204]) tensor([0.4050, 0.0335, 0.0766, 0.4849]) -Greedy action tensor([ 0.5864, -0.3693, -0.4793, 1.6241]) tensor([0.2197, 0.0845, 0.0757, 0.6201]) -Greedy action tensor([-0.7094, -0.4235, 1.1129, 0.3433]) tensor([0.0879, 0.1169, 0.5435, 0.2517]) -Greedy action tensor([-0.0805, 1.0383, 0.7636, 0.8804]) tensor([0.1111, 0.3401, 0.2584, 0.2904]) -Greedy action tensor([ 1.3139, -1.4751, 1.1836, 1.7591]) tensor([0.2857, 0.0176, 0.2508, 0.4459]) -Greedy action tensor([ 0.6851, -0.7917, 0.4960, 1.0355]) tensor([0.2877, 0.0657, 0.2381, 0.4085]) -Greedy action tensor([-0.0910, -0.2149, 1.3253, 0.0034]) tensor([0.1408, 0.1244, 0.5802, 0.1547]) -Greedy action tensor([ 0.4657, -0.2020, 0.1444, -0.2167]) tensor([0.3645, 0.1869, 0.2643, 0.1842]) -Greedy action tensor([ 0.5608, -0.1065, 0.6520, 0.0913]) tensor([0.3092, 0.1587, 0.3387, 0.1934]) -Greedy action tensor([ 1.6569, -0.3268, 0.3266, 1.4846]) tensor([0.4457, 0.0613, 0.1178, 0.3751]) -Greedy action tensor([ 1.0573, -1.0303, 0.6609, -0.2374]) tensor([0.4829, 0.0599, 0.3249, 0.1323]) -Greedy action tensor([ 0.3071, 0.5070, -0.0452, 0.3904]) tensor([0.2493, 0.3045, 0.1753, 0.2710]) -Greedy action tensor([ 1.0287, -0.3919, -0.6460, -0.0262]) tensor([0.5627, 0.1359, 0.1054, 0.1959]) -Greedy action tensor([ 0.8459, 1.0968, -0.0791, -0.3172]) tensor([0.3340, 0.4292, 0.1324, 0.1044]) -Greedy action tensor([-0.1147, -0.6591, -0.3460, 0.2913]) tensor([0.2581, 0.1497, 0.2048, 0.3873]) -Greedy action tensor([ 0.1786, -0.9537, 1.2979, 1.1839]) tensor([0.1405, 0.0453, 0.4303, 0.3839]) -Greedy action tensor([ 1.0674, -0.5497, 1.2414, 0.1878]) tensor([0.3567, 0.0708, 0.4245, 0.1480]) -Greedy action tensor([ 1.5703, -0.3093, -0.0494, 1.2737]) tensor([0.4776, 0.0729, 0.0945, 0.3550]) -Greedy action tensor([0.1276, 0.8516, 0.5061, 0.5053]) tensor([0.1672, 0.3448, 0.2441, 0.2439]) -Greedy action tensor([-0.9603, 0.1825, -0.7145, 0.9306]) tensor([0.0831, 0.2604, 0.1062, 0.5503]) -Greedy action tensor([-0.0125, -1.4359, 0.2670, 0.2573]) tensor([0.2582, 0.0622, 0.3414, 0.3382]) -Greedy action tensor([ 0.7892, -0.3084, -0.9274, 1.7128]) tensor([0.2480, 0.0828, 0.0446, 0.6246]) -Greedy action tensor([ 1.2540, 0.3491, 1.8950, -0.4082]) tensor([0.2863, 0.1158, 0.5435, 0.0543]) -Greedy action tensor([0.5348, 0.1751, 0.3929, 0.4480]) tensor([0.2872, 0.2004, 0.2492, 0.2633]) -Greedy action tensor([ 1.5325, -0.7200, 1.8209, 1.2675]) tensor([0.3119, 0.0328, 0.4161, 0.2393]) -Greedy action tensor([ 1.3865, -0.0802, 1.2594, 0.6028]) tensor([0.3894, 0.0898, 0.3429, 0.1778]) -Greedy action tensor([ 1.2979, -1.9415, 0.8282, 1.4804]) tensor([0.3491, 0.0137, 0.2182, 0.4190]) -Greedy action tensor([ 0.5419, 0.0211, -1.1322, 0.8977]) tensor([0.3117, 0.1851, 0.0584, 0.4448]) -Greedy action tensor([-0.7836, 0.2391, 0.0484, 0.6366]) tensor([0.0979, 0.2722, 0.2249, 0.4050]) -Greedy action tensor([ 1.1718, -0.3746, 1.4511, 0.7871]) tensor([0.3110, 0.0662, 0.4112, 0.2116]) -Greedy action tensor([ 0.6728, -0.7724, 0.1606, 2.5567]) tensor([0.1188, 0.0280, 0.0712, 0.7819]) -Greedy action tensor([ 1.3561, -1.5677, 0.7743, 0.5555]) tensor([0.4850, 0.0261, 0.2711, 0.2178]) -Greedy action tensor([ 1.9846, -0.6382, 0.4356, 1.6040]) tensor([0.5080, 0.0369, 0.1079, 0.3472]) -Greedy action tensor([ 1.1864, 1.4347, -0.5022, 0.8422]) tensor([0.3149, 0.4037, 0.0582, 0.2232]) -Greedy action tensor([ 1.0215, 0.5711, -1.0466, 0.6389]) tensor([0.4088, 0.2606, 0.0517, 0.2789]) -Greedy action tensor([ 1.2633, 0.1371, -0.4374, 0.0067]) tensor([0.5582, 0.1810, 0.1019, 0.1589]) -Greedy action tensor([0.8126, 0.4764, 1.0712, 0.8726]) tensor([0.2456, 0.1755, 0.3181, 0.2608]) -Greedy action tensor([ 0.6918, -0.5366, 0.7086, 0.2150]) tensor([0.3412, 0.0999, 0.3470, 0.2118]) -Greedy action tensor([ 0.2075, -1.4529, 0.3559, 0.0748]) tensor([0.3100, 0.0589, 0.3596, 0.2715]) -Greedy action tensor([ 0.3169, -1.5515, 1.5702, 0.6978]) tensor([0.1634, 0.0252, 0.5722, 0.2391]) -Greedy action tensor([-0.2245, 0.4795, -0.3428, 0.2508]) tensor([0.1812, 0.3664, 0.1610, 0.2915]) -Greedy action tensor([ 0.2804, -0.7761, 0.0184, 1.0388]) tensor([0.2352, 0.0818, 0.1810, 0.5021]) -Greedy action tensor([ 0.3681, 0.2610, 0.5428, -0.3578]) tensor([0.2799, 0.2514, 0.3333, 0.1354]) -Greedy action tensor([-0.4759, 0.9360, 0.9503, -0.0810]) tensor([0.0930, 0.3817, 0.3872, 0.1381]) -Greedy action tensor([-0.9925, -0.6266, 0.8351, 1.3333]) tensor([0.0529, 0.0763, 0.3291, 0.5417]) -Greedy action tensor([-1.5580, -0.5682, 0.4715, 0.0758]) tensor([0.0609, 0.1638, 0.4633, 0.3119]) -Greedy action tensor([-1.1672, -0.4344, 0.5951, -0.5070]) tensor([0.0922, 0.1919, 0.5373, 0.1785]) -Greedy action tensor([-0.9961, -0.6151, 0.8710, 1.4377]) tensor([0.0492, 0.0720, 0.3181, 0.5607]) -Greedy action tensor([-0.2855, 0.1245, 0.2145, 0.3953]) tensor([0.1631, 0.2458, 0.2689, 0.3222]) -Greedy action tensor([-1.4709, -0.5134, 0.5021, 0.2933]) tensor([0.0601, 0.1566, 0.4324, 0.3509]) -Greedy action tensor([-1.2473, -0.3595, 1.0545, 1.1686]) tensor([0.0406, 0.0987, 0.4058, 0.4549]) -Greedy action tensor([-1.5776, -0.5995, 0.5759, 0.1523]) tensor([0.0558, 0.1485, 0.4809, 0.3148]) -Greedy action tensor([-0.9536, -0.6035, 0.2404, 0.3658]) tensor([0.1057, 0.1500, 0.3489, 0.3954]) -Greedy action tensor([-0.8279, -0.3821, 0.7594, 1.3804]) tensor([0.0604, 0.0944, 0.2955, 0.5498]) -Greedy action tensor([-1.2090, -0.2525, 0.2387, 0.1852]) tensor([0.0841, 0.2189, 0.3578, 0.3392]) -Greedy action tensor([-0.5334, -0.4867, 0.1767, 0.1874]) tensor([0.1629, 0.1707, 0.3314, 0.3350]) -Greedy action tensor([-1.7424, 0.1416, 0.4643, -0.0783]) tensor([0.0456, 0.2998, 0.4140, 0.2406]) -Greedy action tensor([-1.3374, 0.4510, 0.2714, 0.4506]) tensor([0.0557, 0.3331, 0.2783, 0.3329]) -Greedy action tensor([-1.8080, -0.4700, 0.6452, 0.0261]) tensor([0.0441, 0.1679, 0.5122, 0.2758]) -Greedy action tensor([-0.9126, -0.5856, 0.2034, 0.3326]) tensor([0.1122, 0.1556, 0.3425, 0.3897]) -Greedy action tensor([-1.7047, -0.4951, 0.5384, 0.0114]) tensor([0.0517, 0.1733, 0.4873, 0.2877]) -Greedy action tensor([-1.4249, 0.8410, 0.3295, 0.1167]) tensor([0.0474, 0.4570, 0.2740, 0.2215]) -Greedy action tensor([-1.3759, -0.1384, 0.3308, 0.2866]) tensor([0.0657, 0.2263, 0.3618, 0.3462]) -Greedy action tensor([-1.6334, -0.4058, 0.4862, 0.0097]) tensor([0.0558, 0.1905, 0.4649, 0.2887]) -Greedy action tensor([-1.7908, -0.5124, 1.3334, 0.9140]) tensor([0.0237, 0.0849, 0.5378, 0.3536]) -Greedy action tensor([-1.6815, -0.5112, 0.5742, 0.0711]) tensor([0.0512, 0.1650, 0.4885, 0.2953]) -Greedy action tensor([-1.0672, -0.4736, 0.4686, 0.9017]) tensor([0.0684, 0.1238, 0.3178, 0.4900]) -Greedy action tensor([-1.1902, -0.5243, 0.4432, 0.6820]) tensor([0.0686, 0.1336, 0.3515, 0.4463]) -Greedy action tensor([-1.8734, -0.4671, 0.7518, 0.0488]) tensor([0.0389, 0.1586, 0.5367, 0.2657]) -Greedy action tensor([-1.8663, -0.2421, 0.6348, -0.0564]) tensor([0.0410, 0.2081, 0.5002, 0.2506]) -Greedy action tensor([-1.5935, -0.5634, 0.5823, 0.0627]) tensor([0.0560, 0.1569, 0.4935, 0.2935]) -Greedy action tensor([-1.6234, -0.4982, 0.5719, 0.0754]) tensor([0.0540, 0.1663, 0.4847, 0.2950]) -Greedy action tensor([-1.8456, -1.0322, 0.9400, -0.0445]) tensor([0.0392, 0.0884, 0.6351, 0.2373]) -Greedy action tensor([-1.6404, 0.2808, 0.5711, -0.3649]) tensor([0.0487, 0.3325, 0.4445, 0.1743]) -Greedy action tensor([-1.8499, -0.3734, 0.6052, -0.1336]) tensor([0.0443, 0.1938, 0.5156, 0.2463]) -Greedy action tensor([-1.1913, -0.0769, 0.4655, 0.6962]) tensor([0.0629, 0.1918, 0.3299, 0.4154]) -Greedy action tensor([-1.5911, -0.5855, 0.5040, 0.0679]) tensor([0.0584, 0.1597, 0.4748, 0.3070]) -Greedy action tensor([-1.2268, -0.4419, 0.5333, -0.3287]) tensor([0.0873, 0.1913, 0.5072, 0.2142]) -Greedy action tensor([-1.5400, -0.4529, 0.4557, 0.0885]) tensor([0.0609, 0.1806, 0.4481, 0.3104]) -Greedy action tensor([-0.2456, -0.1383, 1.1251, 1.6837]) tensor([0.0773, 0.0861, 0.3044, 0.5322]) -Greedy action tensor([-0.9670, 0.3799, -0.3808, 0.1256]) tensor([0.1039, 0.3995, 0.1867, 0.3098]) -Greedy action tensor([-1.2837, -0.2008, 0.7752, 1.0116]) tensor([0.0460, 0.1360, 0.3609, 0.4571]) -Greedy action tensor([-1.7811, -0.7257, 0.1257, -0.2500]) tensor([0.0657, 0.1887, 0.4421, 0.3036]) -Greedy action tensor([-1.9435, -0.4912, 1.2249, 0.6215]) tensor([0.0238, 0.1016, 0.5653, 0.3092]) -Greedy action tensor([-0.9116, -0.3957, 0.3796, 1.0138]) tensor([0.0759, 0.1272, 0.2762, 0.5207]) -Greedy action tensor([-0.9478, -0.4649, 0.4571, 1.1706]) tensor([0.0666, 0.1079, 0.2714, 0.5540]) -Greedy action tensor([-1.5173, -0.5025, 0.5800, 0.3577]) tensor([0.0543, 0.1497, 0.4421, 0.3539]) -Greedy action tensor([-1.2750, -0.5729, 0.4244, 0.6676]) tensor([0.0647, 0.1305, 0.3537, 0.4511]) -Greedy action tensor([-1.3566, -0.6463, 0.3360, 0.0126]) tensor([0.0806, 0.1641, 0.4382, 0.3171]) -Greedy action tensor([-1.3056, -0.5775, 0.4452, -0.0226]) tensor([0.0804, 0.1665, 0.4630, 0.2900]) -Greedy action tensor([-0.6541, -0.5476, 0.1699, 0.3084]) tensor([0.1427, 0.1587, 0.3252, 0.3735]) -Greedy action tensor([-1.8055, -0.3006, 0.5753, -0.0681]) tensor([0.0455, 0.2047, 0.4915, 0.2583]) -Greedy action tensor([-1.9282, -0.5009, 1.1654, 0.5385]) tensor([0.0256, 0.1068, 0.5654, 0.3021]) -Greedy action tensor([-0.4700, -0.4407, 0.1424, -0.0263]) tensor([0.1841, 0.1895, 0.3396, 0.2868]) -Greedy action tensor([-1.8706, -0.8819, -0.2386, -0.5887]) tensor([0.0806, 0.2167, 0.4123, 0.2905]) -Greedy action tensor([-1.7044, -0.2768, 0.5527, 0.0168]) tensor([0.0492, 0.2052, 0.4704, 0.2752]) -Greedy action tensor([-1.1403, -0.4925, 0.3224, 0.6440]) tensor([0.0758, 0.1450, 0.3275, 0.4517]) -Greedy action tensor([-1.2193, -0.5552, 0.6539, 1.0571]) tensor([0.0521, 0.1012, 0.3391, 0.5076]) -Greedy action tensor([-1.3349, -0.1092, -0.2467, -0.3407]) tensor([0.0992, 0.3380, 0.2946, 0.2682]) -Greedy action tensor([-1.6123, -0.5414, 0.5363, 0.1270]) tensor([0.0550, 0.1605, 0.4714, 0.3131]) -Greedy action tensor([-0.9484, -0.6936, 0.4799, 0.5156]) tensor([0.0927, 0.1196, 0.3868, 0.4009]) -Greedy action tensor([-1.8672, -0.4372, 0.6291, -0.1345]) tensor([0.0435, 0.1819, 0.5284, 0.2462]) -Greedy action tensor([-1.8686, -0.4419, 0.6262, -0.1400]) tensor([0.0436, 0.1817, 0.5288, 0.2458]) -Greedy action tensor([-0.8178, -0.5599, 0.2082, 0.2826]) tensor([0.1236, 0.1600, 0.3449, 0.3715]) -Greedy action tensor([-0.4885, -0.3416, 0.1481, 0.0645]) tensor([0.1728, 0.2002, 0.3266, 0.3004]) -Greedy action tensor([-1.9815, -0.6004, 1.3321, 0.5809]) tensor([0.0220, 0.0876, 0.6050, 0.2854]) -Greedy action tensor([-0.6441, -0.1622, 0.5966, 1.3711]) tensor([0.0736, 0.1192, 0.2546, 0.5525]) -Greedy action tensor([-1.0270, -0.5703, 0.4620, -0.2205]) tensor([0.1081, 0.1707, 0.4791, 0.2421]) -Greedy action tensor([-0.7294, -0.5558, 0.1906, 0.1632]) tensor([0.1400, 0.1666, 0.3514, 0.3419]) -Greedy action tensor([-0.4338, -0.4870, 0.2048, 0.2611]) tensor([0.1711, 0.1622, 0.3240, 0.3427]) -Greedy action tensor([-0.2702, 0.0803, 0.8704, 1.6233]) tensor([0.0820, 0.1165, 0.2566, 0.5449]) -Greedy action tensor([-1.2618, -0.0461, 0.9479, 1.0158]) tensor([0.0430, 0.1451, 0.3922, 0.4197]) -Greedy action tensor([-0.8961, -0.5782, 0.1734, 0.3400]) tensor([0.1145, 0.1574, 0.3338, 0.3943]) -Greedy action tensor([-1.1913, -0.7013, 0.8442, 1.1476]) tensor([0.0484, 0.0790, 0.3706, 0.5020]) -Greedy action tensor([-0.4714, -0.0873, 1.0969, 1.5998]) tensor([0.0658, 0.0966, 0.3157, 0.5220]) -Greedy action tensor([-1.5580, 0.0203, 0.5973, 0.4810]) tensor([0.0451, 0.2187, 0.3895, 0.3467]) -Greedy action tensor([-0.8621, -0.1553, 0.9208, 1.2955]) tensor([0.0567, 0.1150, 0.3374, 0.4908]) -Greedy action tensor([-1.7157, -0.5882, 1.2000, 0.8508]) tensor([0.0281, 0.0868, 0.5190, 0.3661]) -Greedy action tensor([-1.5251, -0.6168, 0.6881, 0.0032]) tensor([0.0580, 0.1439, 0.5306, 0.2675]) -Greedy action tensor([-1.9051, -0.4431, 0.6421, -0.1580]) tensor([0.0420, 0.1811, 0.5361, 0.2408]) -Greedy action tensor([-1.5194, -0.5675, 0.5390, 0.1570]) tensor([0.0596, 0.1545, 0.4671, 0.3188]) -Greedy action tensor([-1.0268, -0.5635, 0.3721, 0.0874]) tensor([0.1032, 0.1641, 0.4182, 0.3146]) -Greedy action tensor([-1.8792, -0.4808, 0.6603, -0.1176]) tensor([0.0425, 0.1720, 0.5383, 0.2473]) -Greedy action tensor([-1.8309, -0.4876, 0.6318, -0.0752]) tensor([0.0447, 0.1714, 0.5250, 0.2589]) -Greedy action tensor([-0.7391, -0.3466, 0.7773, 1.3184]) tensor([0.0673, 0.0996, 0.3065, 0.5266]) -Greedy action tensor([ 0.8642, -0.6763, -0.1275, -0.5308]) tensor([0.5455, 0.1169, 0.2024, 0.1352]) -Greedy action tensor([ 0.6315, -0.3653, 0.0572, -0.4350]) tensor([0.4393, 0.1621, 0.2474, 0.1512]) -Greedy action tensor([ 0.4798, 0.0782, 0.1425, -0.0590]) tensor([0.3371, 0.2256, 0.2406, 0.1967]) -Greedy action tensor([ 0.2484, -0.0739, 0.0285, -0.1587]) tensor([0.3132, 0.2269, 0.2514, 0.2085]) -Greedy action tensor([ 0.3903, 0.0194, -0.0629, -0.2269]) tensor([0.3490, 0.2409, 0.2218, 0.1883]) -Greedy action tensor([ 0.3178, 0.0214, -0.0333, -0.1838]) tensor([0.3276, 0.2435, 0.2306, 0.1983]) -Greedy action tensor([ 0.8112, -0.3958, -0.0465, -0.5684]) tensor([0.5064, 0.1514, 0.2148, 0.1274]) -Greedy action tensor([ 0.3094, 0.0742, -0.0421, -0.2136]) tensor([0.3240, 0.2561, 0.2280, 0.1920]) -Greedy action tensor([ 0.2171, 0.1895, -0.0737, -0.2996]) tensor([0.3015, 0.2933, 0.2254, 0.1798]) -Greedy action tensor([ 0.7319, -0.3517, 0.2583, -0.8906]) tensor([0.4633, 0.1568, 0.2885, 0.0915]) -Greedy action tensor([ 0.1816, -0.1121, -0.0574, -0.3164]) tensor([0.3184, 0.2374, 0.2507, 0.1935]) -Greedy action tensor([ 0.4686, 0.0439, -0.0477, -0.2599]) tensor([0.3658, 0.2393, 0.2183, 0.1766]) -Greedy action tensor([ 0.8367, -0.6911, -0.1047, -0.6398]) tensor([0.5448, 0.1182, 0.2125, 0.1244]) -Greedy action tensor([0.4585, 0.0819, 0.1530, 0.1327]) tensor([0.3180, 0.2182, 0.2343, 0.2296]) -Greedy action tensor([ 0.8022, -0.6102, -0.1970, -0.7692]) tensor([0.5496, 0.1339, 0.2023, 0.1142]) -Greedy action tensor([ 0.8873, -0.3835, 0.0330, -0.4404]) tensor([0.5073, 0.1423, 0.2159, 0.1345]) -Greedy action tensor([ 0.6818, 0.1778, -0.0027, -0.3959]) tensor([0.4084, 0.2467, 0.2060, 0.1390]) -Greedy action tensor([ 0.5155, -0.3448, -0.0907, -0.3584]) tensor([0.4192, 0.1773, 0.2286, 0.1749]) -Greedy action tensor([ 0.7239, -0.2943, 0.1550, -0.3992]) tensor([0.4439, 0.1604, 0.2513, 0.1444]) -Greedy action tensor([ 0.6244, -0.2882, 0.0832, -0.4333]) tensor([0.4291, 0.1722, 0.2497, 0.1490]) -Greedy action tensor([ 0.5445, -0.3988, -0.0139, -0.1732]) tensor([0.4083, 0.1590, 0.2336, 0.1992]) -Greedy action tensor([ 0.8291, -0.5587, 0.1518, -0.7061]) tensor([0.5068, 0.1265, 0.2575, 0.1092]) -Greedy action tensor([ 0.5114, -0.2392, 0.0712, -0.2814]) tensor([0.3893, 0.1838, 0.2507, 0.1762]) -Greedy action tensor([ 0.3613, 0.0347, 0.0170, -0.1784]) tensor([0.3319, 0.2394, 0.2352, 0.1935]) -Greedy action tensor([ 0.7086, -0.5195, -0.0180, -0.4678]) tensor([0.4797, 0.1405, 0.2319, 0.1479]) -Greedy action tensor([ 0.6823, -0.2193, 0.0061, -0.2740]) tensor([0.4350, 0.1766, 0.2212, 0.1672]) -Greedy action tensor([ 0.3651, -0.2132, 0.2358, -0.4144]) tensor([0.3451, 0.1935, 0.3032, 0.1582]) -Greedy action tensor([ 0.4048, -0.2822, 0.0558, -0.5480]) tensor([0.3855, 0.1939, 0.2719, 0.1487]) -Greedy action tensor([ 0.3295, -0.2130, 0.0062, -0.1736]) tensor([0.3437, 0.1998, 0.2487, 0.2078]) -Greedy action tensor([ 0.6926, -0.3370, -0.0516, -0.2598]) tensor([0.4508, 0.1610, 0.2142, 0.1739]) -Greedy action tensor([ 0.6966, -0.4254, -0.1260, -0.3788]) tensor([0.4748, 0.1546, 0.2086, 0.1620]) -Greedy action tensor([ 0.5249, -0.3422, 0.1784, -0.2816]) tensor([0.3885, 0.1633, 0.2748, 0.1734]) -Greedy action tensor([ 0.4082, -0.3243, 0.1239, -0.4805]) tensor([0.3781, 0.1818, 0.2846, 0.1555]) -Greedy action tensor([ 0.1328, 0.2089, 0.0633, -0.2583]) tensor([0.2711, 0.2926, 0.2529, 0.1834]) -Greedy action tensor([ 0.4404, -0.1212, -0.0209, -0.3952]) tensor([0.3796, 0.2165, 0.2393, 0.1646]) -Greedy action tensor([ 0.8561, -0.5907, 0.1860, -0.4297]) tensor([0.4942, 0.1163, 0.2529, 0.1366]) -Greedy action tensor([ 0.4502, -0.2859, 0.1270, -0.4401]) tensor([0.3827, 0.1833, 0.2770, 0.1571]) -Greedy action tensor([ 0.2466, 0.0134, -0.0686, -0.2081]) tensor([0.3168, 0.2509, 0.2312, 0.2011]) -Greedy action tensor([ 0.5643, -0.1702, -0.0578, -0.2205]) tensor([0.4044, 0.1940, 0.2171, 0.1845]) -Greedy action tensor([ 0.6117, -0.2093, -0.0602, -0.0017]) tensor([0.4012, 0.1766, 0.2049, 0.2173]) -Greedy action tensor([ 0.0850, -0.0114, -0.0252, -0.1852]) tensor([0.2804, 0.2546, 0.2511, 0.2140]) -Greedy action tensor([ 0.6899, -0.0679, -0.0625, -0.0063]) tensor([0.4101, 0.1922, 0.1933, 0.2044]) -Greedy action tensor([ 0.8496, -0.5955, -0.1131, -0.4401]) tensor([0.5283, 0.1245, 0.2017, 0.1455]) -Greedy action tensor([ 0.5622, -0.4243, -0.0855, -0.3545]) tensor([0.4356, 0.1624, 0.2279, 0.1741]) -Greedy action tensor([ 0.5594, -0.0692, -0.0521, -0.1306]) tensor([0.3880, 0.2069, 0.2105, 0.1946]) -Greedy action tensor([ 0.3645, -0.1617, -0.0867, -0.6316]) tensor([0.3851, 0.2275, 0.2452, 0.1422]) -Greedy action tensor([ 0.6222, -0.0238, -0.0581, -0.5041]) tensor([0.4247, 0.2226, 0.2151, 0.1377]) -Greedy action tensor([ 0.7143, -0.3660, 0.0340, -0.4362]) tensor([0.4624, 0.1570, 0.2342, 0.1464]) -Greedy action tensor([ 0.3633, -0.0251, 0.0252, -0.1620]) tensor([0.3353, 0.2274, 0.2391, 0.1983]) -Greedy action tensor([ 0.6886, -0.5240, -0.0219, -0.6365]) tensor([0.4867, 0.1448, 0.2392, 0.1294]) -Greedy action tensor([ 0.4138, 0.0893, 0.1413, -0.2195]) tensor([0.3317, 0.2397, 0.2525, 0.1761]) -Greedy action tensor([ 1.0270, -0.8523, -0.0225, -0.4075]) tensor([0.5744, 0.0877, 0.2011, 0.1368]) -Greedy action tensor([ 0.5121, -0.1906, -0.0714, -0.3870]) tensor([0.4065, 0.2013, 0.2268, 0.1654]) -Greedy action tensor([ 0.7757, -0.2186, 0.0080, -0.4208]) tensor([0.4681, 0.1732, 0.2172, 0.1415]) -Greedy action tensor([ 0.4119, 0.0650, -0.0430, -0.1335]) tensor([0.3423, 0.2420, 0.2172, 0.1984]) -Greedy action tensor([ 0.4174, 0.0385, -0.0768, -0.4602]) tensor([0.3689, 0.2526, 0.2251, 0.1534]) -Greedy action tensor([ 0.4582, -0.1188, 0.0183, -0.2113]) tensor([0.3680, 0.2066, 0.2370, 0.1884]) -Greedy action tensor([ 0.3202, 0.2064, -0.0468, -0.3141]) tensor([0.3210, 0.2865, 0.2224, 0.1702]) -Greedy action tensor([ 0.5684, -0.3860, -0.0915, -0.4499]) tensor([0.4419, 0.1701, 0.2284, 0.1596]) -Greedy action tensor([ 0.3353, -0.0337, -0.0965, -0.2066]) tensor([0.3422, 0.2366, 0.2222, 0.1990]) -Greedy action tensor([ 0.6551, -0.2775, -0.0266, -0.2996]) tensor([0.4378, 0.1723, 0.2214, 0.1685]) -Greedy action tensor([ 0.6683, -0.6548, -0.1546, -0.7325]) tensor([0.5123, 0.1364, 0.2250, 0.1262]) -Greedy action tensor([ 0.9277, -0.5502, -0.0294, -0.5327]) tensor([0.5422, 0.1237, 0.2082, 0.1259]) -Greedy action tensor([ 0.6033, -0.2272, -0.0446, -0.2829]) tensor([0.4217, 0.1838, 0.2206, 0.1739]) -Greedy action tensor([ 0.5024, -0.3274, -0.0095, -0.5453]) tensor([0.4191, 0.1828, 0.2512, 0.1470]) -Greedy action tensor([ 0.6271, -0.1862, -0.0153, -0.4071]) tensor([0.4301, 0.1907, 0.2263, 0.1529]) -Greedy action tensor([ 0.8525, -0.4648, -0.1001, -0.7596]) tensor([0.5396, 0.1446, 0.2082, 0.1076]) -Greedy action tensor([ 0.4723, 0.0084, 0.0082, -0.1907]) tensor([0.3606, 0.2268, 0.2267, 0.1858]) -Greedy action tensor([ 0.3084, -0.0171, -0.0505, -0.4318]) tensor([0.3451, 0.2492, 0.2410, 0.1646]) -Greedy action tensor([ 0.5608, -0.4029, 0.1253, -0.4396]) tensor([0.4173, 0.1592, 0.2700, 0.1535]) -Greedy action tensor([ 0.6348, -0.4435, 0.1041, -0.5293]) tensor([0.4463, 0.1518, 0.2625, 0.1393]) -Greedy action tensor([ 0.5923, 0.0362, 0.0127, -0.1401]) tensor([0.3825, 0.2194, 0.2142, 0.1839]) -Greedy action tensor([ 0.5301, -0.0784, 0.0452, -0.4182]) tensor([0.3926, 0.2136, 0.2417, 0.1521]) -Greedy action tensor([ 0.2320, 0.3170, -0.0107, -0.2399]) tensor([0.2860, 0.3113, 0.2243, 0.1784]) -Greedy action tensor([ 0.7847, -0.5829, -0.2591, -0.5004]) tensor([0.5310, 0.1352, 0.1870, 0.1469]) -Greedy action tensor([ 0.6895, -0.0343, -0.0809, -0.2625]) tensor([0.4285, 0.2078, 0.1983, 0.1654]) -Greedy action tensor([ 0.4608, 0.0182, -0.0739, -0.1947]) tensor([0.3640, 0.2338, 0.2132, 0.1890]) -Greedy action tensor([ 0.2300, 0.0167, -0.0374, -0.2045]) tensor([0.3105, 0.2508, 0.2376, 0.2011]) -Greedy action tensor([ 0.6331, -0.1019, 0.0312, -0.1432]) tensor([0.4020, 0.1928, 0.2202, 0.1850]) -Greedy action tensor([ 0.4580, -0.0026, 0.0620, -0.2865]) tensor([0.3598, 0.2270, 0.2422, 0.1709]) -Greedy action tensor([ 1.0320, -0.7147, 0.0306, -0.8577]) tensor([0.5907, 0.1030, 0.2170, 0.0893]) -Greedy action tensor([ 1.3245, -0.4841, -0.2595, 0.0312]) tensor([0.6085, 0.0997, 0.1248, 0.1669]) -Greedy action tensor([ 1.3373, -0.6314, -0.3449, 0.0979]) tensor([0.6191, 0.0865, 0.1151, 0.1793]) -Greedy action tensor([ 1.3082, -0.4919, -0.3137, 0.2061]) tensor([0.5900, 0.0975, 0.1165, 0.1960]) -Greedy action tensor([ 0.6656, -0.3787, -0.5552, -0.1042]) tensor([0.4739, 0.1668, 0.1398, 0.2195]) -Greedy action tensor([ 0.9983, -0.5754, -0.1307, 0.0445]) tensor([0.5219, 0.1082, 0.1688, 0.2011]) -Greedy action tensor([ 1.2022, 0.2540, 0.0071, -0.1157]) tensor([0.5108, 0.1979, 0.1546, 0.1367]) -Greedy action tensor([ 0.8859, -0.4103, 0.2268, -0.1573]) tensor([0.4666, 0.1276, 0.2414, 0.1644]) -Greedy action tensor([ 0.7595, -0.0080, -0.3347, -0.2627]) tensor([0.4632, 0.2150, 0.1551, 0.1667]) -Greedy action tensor([ 1.1910, -0.2711, -0.1794, 0.0486]) tensor([0.5541, 0.1284, 0.1407, 0.1768]) -Greedy action tensor([ 0.7549, -0.3239, -0.2824, -0.2336]) tensor([0.4839, 0.1645, 0.1715, 0.1801]) -Greedy action tensor([ 0.2690, -0.2859, -0.0059, 0.0299]) tensor([0.3204, 0.1839, 0.2434, 0.2523]) -Greedy action tensor([ 1.1107, -0.8748, -0.0647, 0.2399]) tensor([0.5363, 0.0736, 0.1656, 0.2245]) -Greedy action tensor([ 1.2058, -0.3554, -0.2759, 0.1112]) tensor([0.5644, 0.1185, 0.1283, 0.1889]) -Greedy action tensor([ 1.5640, -0.4893, -0.4098, 0.3950]) tensor([0.6337, 0.0813, 0.0880, 0.1969]) -Greedy action tensor([ 1.0575, 0.1066, -0.1284, 0.0018]) tensor([0.4902, 0.1894, 0.1498, 0.1706]) -Greedy action tensor([ 0.8982, -0.5783, -0.1057, -0.0101]) tensor([0.5005, 0.1143, 0.1834, 0.2018]) -Greedy action tensor([ 1.2511, -0.1702, -0.2821, 0.1261]) tensor([0.5612, 0.1355, 0.1211, 0.1822]) -Greedy action tensor([ 1.1435, -0.7354, -0.2396, 0.0789]) tensor([0.5720, 0.0874, 0.1434, 0.1972]) -Greedy action tensor([ 1.5554, -0.9657, -0.4162, 0.5968]) tensor([0.6238, 0.0501, 0.0869, 0.2392]) -Greedy action tensor([ 1.3064, -0.2243, 0.2199, -0.1958]) tensor([0.5629, 0.1218, 0.1899, 0.1253]) -Greedy action tensor([ 0.8123, -0.0131, -0.3954, -0.0891]) tensor([0.4666, 0.2044, 0.1395, 0.1895]) -Greedy action tensor([ 1.7975, -0.7526, -0.0519, 0.4614]) tensor([0.6674, 0.0521, 0.1050, 0.1754]) -Greedy action tensor([ 0.7273, 0.1542, 0.2263, -0.1463]) tensor([0.3865, 0.2179, 0.2342, 0.1614]) -Greedy action tensor([ 0.6519, -0.4881, -0.0070, 0.2853]) tensor([0.3952, 0.1264, 0.2045, 0.2739]) -Greedy action tensor([ 1.1878, -0.6522, -0.2111, 0.5383]) tensor([0.5187, 0.0824, 0.1280, 0.2709]) -Greedy action tensor([ 1.6530, -0.7905, -0.2840, 0.0030]) tensor([0.7027, 0.0610, 0.1013, 0.1350]) -Greedy action tensor([ 1.7350, -0.3899, -0.5111, 0.0662]) tensor([0.7074, 0.0845, 0.0748, 0.1333]) -Greedy action tensor([ 1.2401, -0.0773, -0.2358, 0.0644]) tensor([0.5540, 0.1484, 0.1266, 0.1710]) -Greedy action tensor([ 0.6531, -0.3786, -0.1789, 0.2743]) tensor([0.4038, 0.1439, 0.1757, 0.2765]) -Greedy action tensor([ 1.4999, -0.2031, -0.5506, -0.1327]) tensor([0.6639, 0.1209, 0.0854, 0.1297]) -Greedy action tensor([ 1.5113, -0.4875, -0.2719, 0.2610]) tensor([0.6289, 0.0852, 0.1057, 0.1801]) -Greedy action tensor([ 1.1661, -0.4600, -0.4151, 0.4351]) tensor([0.5308, 0.1044, 0.1092, 0.2556]) -Greedy action tensor([ 1.6155, -0.7555, -0.4108, 0.7661]) tensor([0.6050, 0.0565, 0.0798, 0.2588]) -Greedy action tensor([ 0.8145, -0.4583, -0.0473, 0.3245]) tensor([0.4319, 0.1210, 0.1825, 0.2646]) -Greedy action tensor([ 1.0410, -0.8709, -0.3001, -0.1147]) tensor([0.5800, 0.0857, 0.1517, 0.1826]) -Greedy action tensor([ 0.5886, -0.4457, -0.1250, 0.1641]) tensor([0.4001, 0.1422, 0.1960, 0.2617]) -Greedy action tensor([ 0.6974, -0.3654, -0.0719, -0.0826]) tensor([0.4411, 0.1524, 0.2044, 0.2022]) -Greedy action tensor([ 0.7627, -0.1611, -0.0546, 0.1466]) tensor([0.4204, 0.1669, 0.1857, 0.2270]) -Greedy action tensor([ 0.9491, -0.0704, 0.0943, -0.0904]) tensor([0.4673, 0.1686, 0.1988, 0.1653]) -Greedy action tensor([ 1.1110, -0.3651, -0.2555, 0.2308]) tensor([0.5268, 0.1204, 0.1343, 0.2185]) -Greedy action tensor([ 1.1111e+00, -3.8603e-01, 9.4246e-03, -4.2375e-04]) tensor([0.5305, 0.1187, 0.1763, 0.1746]) -Greedy action tensor([ 1.2359, -0.4169, -0.1309, 0.2279]) tensor([0.5521, 0.1057, 0.1407, 0.2015]) -Greedy action tensor([ 1.3069, -0.6611, -0.2652, 0.3644]) tensor([0.5757, 0.0805, 0.1195, 0.2243]) -Greedy action tensor([ 1.1869, -0.6755, -0.0921, 0.2186]) tensor([0.5515, 0.0856, 0.1535, 0.2094]) -Greedy action tensor([ 0.9454, -0.7439, -0.3270, 0.0285]) tensor([0.5363, 0.0990, 0.1503, 0.2144]) -Greedy action tensor([ 1.5063, -0.5827, -0.2180, 0.2918]) tensor([0.6254, 0.0774, 0.1115, 0.1857]) -Greedy action tensor([ 0.8421, -0.2559, -0.5267, -0.2170]) tensor([0.5169, 0.1724, 0.1315, 0.1792]) -Greedy action tensor([ 0.6854, -0.3316, -0.2911, 0.3948]) tensor([0.4022, 0.1455, 0.1515, 0.3008]) -Greedy action tensor([ 1.3179, -0.8383, -0.2556, 0.3418]) tensor([0.5883, 0.0681, 0.1220, 0.2217]) -Greedy action tensor([ 0.5023, -0.2739, -0.3307, 0.4093]) tensor([0.3564, 0.1640, 0.1549, 0.3247]) -Greedy action tensor([ 1.5708, -0.2516, -0.5447, 0.2696]) tensor([0.6433, 0.1040, 0.0776, 0.1751]) -Greedy action tensor([ 1.4747, -0.4446, -0.1977, 0.0438]) tensor([0.6355, 0.0932, 0.1193, 0.1519]) -Greedy action tensor([ 0.5652, -0.4328, 0.0426, -0.0711]) tensor([0.4015, 0.1480, 0.2381, 0.2125]) -Greedy action tensor([ 1.5014, -0.5282, -0.3248, 0.3278]) tensor([0.6243, 0.0820, 0.1005, 0.1931]) -Greedy action tensor([ 1.1952, -0.6842, -0.0627, 0.4095]) tensor([0.5283, 0.0807, 0.1502, 0.2408]) -Greedy action tensor([ 1.3293, -0.3812, -0.0957, 0.1687]) tensor([0.5765, 0.1042, 0.1387, 0.1806]) -Greedy action tensor([ 1.7761, -0.7058, -0.3248, 0.6134]) tensor([0.6585, 0.0550, 0.0806, 0.2059]) -Greedy action tensor([ 0.9153, -0.5949, -0.2477, -0.0791]) tensor([0.5254, 0.1160, 0.1642, 0.1944]) -Greedy action tensor([ 1.3083, -0.7726, -0.2542, 0.3994]) tensor([0.5756, 0.0718, 0.1207, 0.2319]) -Greedy action tensor([ 1.3454, -0.6471, -0.3538, 0.1313]) tensor([0.6187, 0.0844, 0.1131, 0.1838]) -Greedy action tensor([ 1.2001, -0.4184, -0.2930, -0.1388]) tensor([0.5935, 0.1176, 0.1333, 0.1556]) -Greedy action tensor([ 1.3059, -0.3749, -0.3077, -0.1145]) tensor([0.6146, 0.1145, 0.1224, 0.1485]) -Greedy action tensor([ 0.5165, -0.1618, -0.3392, 0.1813]) tensor([0.3777, 0.1917, 0.1605, 0.2701]) -Greedy action tensor([ 1.3169, -0.4020, -0.5719, 0.2067]) tensor([0.6024, 0.1080, 0.0911, 0.1985]) -Greedy action tensor([ 1.8697, -0.1610, -0.3124, 0.2931]) tensor([0.6893, 0.0905, 0.0778, 0.1425]) -Greedy action tensor([ 0.7980, -0.4265, 0.0825, -0.1398]) tensor([0.4599, 0.1352, 0.2249, 0.1800]) -Greedy action tensor([ 0.8510, -0.2979, -0.0548, -0.0870]) tensor([0.4733, 0.1500, 0.1913, 0.1853]) -Greedy action tensor([ 1.0164, -0.5957, -0.1615, -0.1648]) tensor([0.5512, 0.1099, 0.1697, 0.1692]) -Greedy action tensor([ 1.0412, -0.4041, -0.3047, 0.4240]) tensor([0.4913, 0.1158, 0.1279, 0.2650]) -Greedy action tensor([ 0.3669, -0.0791, -0.0697, -0.2637]) tensor([0.3548, 0.2271, 0.2293, 0.1888]) -Greedy action tensor([ 1.9878, -0.8315, -0.4027, 0.2276]) tensor([0.7557, 0.0451, 0.0692, 0.1300]) -Greedy action tensor([ 1.4043, -0.2609, -0.2953, -0.0359]) tensor([0.6216, 0.1176, 0.1136, 0.1472]) -Greedy action tensor([ 0.8830, -0.0905, -0.2929, 0.1614]) tensor([0.4604, 0.1739, 0.1420, 0.2237]) -Greedy action tensor([ 0.4419, -0.3966, -0.1661, 0.0410]) tensor([0.3779, 0.1634, 0.2057, 0.2531]) -Greedy action tensor([ 0.8925, -0.2000, 0.0909, -0.0982]) tensor([0.4640, 0.1556, 0.2081, 0.1723]) -Greedy action tensor([ 1.2538, 0.0510, -0.1296, 0.0945]) tensor([0.5363, 0.1611, 0.1345, 0.1682]) -Greedy action tensor([ 0.8969, -0.0012, 0.0703, -0.1319]) tensor([0.4541, 0.1850, 0.1987, 0.1623]) -Greedy action tensor([ 0.8263, -0.1539, -0.1154, 0.1325]) tensor([0.4415, 0.1657, 0.1722, 0.2206]) -Greedy action tensor([ 1.9752, -0.9550, -0.4495, 0.2840]) tensor([0.7540, 0.0403, 0.0667, 0.1390]) -Greedy action tensor([ 1.1586, -0.5529, -0.4645, -0.1978]) tensor([0.6114, 0.1104, 0.1206, 0.1575]) -Greedy action tensor([ 0.7073, -0.2111, -0.0958, 0.0454]) tensor([0.4232, 0.1689, 0.1896, 0.2183]) -Greedy action tensor([ 0.9586, -1.4271, 1.0166, -0.6977]) tensor([0.4269, 0.0393, 0.4524, 0.0815]) -Greedy action tensor([ 0.4826, -1.3881, 1.3126, 0.8914]) tensor([0.2019, 0.0311, 0.4631, 0.3039]) -Greedy action tensor([-0.2408, -0.1168, -1.4966, 0.6362]) tensor([0.2075, 0.2348, 0.0591, 0.4986]) -Greedy action tensor([ 1.1577, 0.7999, 0.0723, -0.3148]) tensor([0.4412, 0.3085, 0.1490, 0.1012]) -Greedy action tensor([ 0.4622, 0.5781, -0.4658, 1.7179]) tensor([0.1659, 0.1863, 0.0656, 0.5823]) -Greedy action tensor([-0.3653, -2.2243, -0.3429, 1.5741]) tensor([0.1095, 0.0171, 0.1120, 0.7615]) -Greedy action tensor([0.5898, 0.2367, 0.1834, 1.2580]) tensor([0.2315, 0.1627, 0.1542, 0.4516]) -Greedy action tensor([ 0.9413, -0.1354, 0.5408, -0.2932]) tensor([0.4345, 0.1480, 0.2911, 0.1264]) -Greedy action tensor([1.6346, 0.0538, 0.5275, 0.5629]) tensor([0.5323, 0.1095, 0.1759, 0.1823]) -Greedy action tensor([ 0.2925, 0.0113, -0.7675, 0.1328]) tensor([0.3386, 0.2556, 0.1173, 0.2886]) -Greedy action tensor([ 0.0550, -0.8004, 1.8395, 0.0679]) tensor([0.1191, 0.0506, 0.7096, 0.1207]) -Greedy action tensor([2.2687, 1.1686, 0.7301, 1.6767]) tensor([0.4760, 0.1584, 0.1022, 0.2633]) -Greedy action tensor([0.5423, 0.0221, 0.6249, 1.1184]) tensor([0.2242, 0.1333, 0.2435, 0.3989]) -Greedy action tensor([1.7793, 0.4568, 0.6801, 1.1207]) tensor([0.4723, 0.1259, 0.1573, 0.2445]) -Greedy action tensor([ 0.3239, -0.1335, -0.5259, 0.3424]) tensor([0.3248, 0.2056, 0.1388, 0.3308]) -Greedy action tensor([ 1.9644, -0.6016, 0.0627, 1.4346]) tensor([0.5510, 0.0423, 0.0823, 0.3244]) -Greedy action tensor([ 0.8384, -0.1957, -1.0999, 1.2297]) tensor([0.3358, 0.1194, 0.0483, 0.4965]) -Greedy action tensor([-0.5701, -1.0096, -0.4770, -0.5770]) tensor([0.2677, 0.1725, 0.2938, 0.2659]) -Greedy action tensor([ 0.8328, -0.7394, 1.2484, 0.2490]) tensor([0.3048, 0.0633, 0.4619, 0.1700]) -Greedy action tensor([-0.6885, -1.0631, -0.6056, 0.9805]) tensor([0.1238, 0.0851, 0.1344, 0.6567]) -Greedy action tensor([ 1.0586, 0.3823, -0.2286, 0.2634]) tensor([0.4472, 0.2274, 0.1235, 0.2019]) -Greedy action tensor([ 0.2817, -1.0022, 0.0501, 1.7803]) tensor([0.1528, 0.0423, 0.1212, 0.6837]) -Greedy action tensor([0.4342, 0.9477, 0.1476, 0.3191]) tensor([0.2318, 0.3874, 0.1741, 0.2066]) -Greedy action tensor([ 0.6402, 0.5235, -0.0969, 0.5370]) tensor([0.3058, 0.2721, 0.1463, 0.2758]) -Greedy action tensor([-0.0656, -1.5819, -0.6185, 1.1956]) tensor([0.1878, 0.0412, 0.1080, 0.6629]) -Greedy action tensor([ 1.1108, 1.2993, -0.1189, 0.3765]) tensor([0.3356, 0.4052, 0.0981, 0.1610]) -Greedy action tensor([ 1.3245, -0.5739, 1.1394, 0.2111]) tensor([0.4330, 0.0649, 0.3599, 0.1422]) -Greedy action tensor([ 0.7183, 0.5355, -0.7249, 0.8884]) tensor([0.3073, 0.2559, 0.0726, 0.3642]) -Greedy action tensor([ 0.9028, -1.3375, 0.6711, 1.2209]) tensor([0.3054, 0.0325, 0.2423, 0.4198]) -Greedy action tensor([-0.7241, -0.2602, 1.1498, -0.5767]) tensor([0.0974, 0.1550, 0.6347, 0.1129]) -Greedy action tensor([ 1.4653, -0.2645, 1.3233, 1.5883]) tensor([0.3149, 0.0558, 0.2732, 0.3561]) -Greedy action tensor([ 0.0647, -1.2601, 0.1809, 2.2435]) tensor([0.0891, 0.0237, 0.1001, 0.7872]) -Greedy action tensor([ 0.7848, -1.6071, 1.2942, -0.4694]) tensor([0.3288, 0.0301, 0.5473, 0.0938]) -Greedy action tensor([ 1.2835, -0.0399, 0.2129, 0.9273]) tensor([0.4330, 0.1153, 0.1484, 0.3033]) -Greedy action tensor([ 1.2603, -0.1368, 1.1869, 1.6989]) tensor([0.2683, 0.0664, 0.2493, 0.4160]) -Greedy action tensor([0.9485, 0.1099, 0.7297, 0.5923]) tensor([0.3406, 0.1472, 0.2737, 0.2385]) -Greedy action tensor([-0.5285, -1.8609, -0.3882, 0.3385]) tensor([0.2086, 0.0550, 0.2400, 0.4964]) -Greedy action tensor([ 1.2413, -1.8222, 0.7152, 0.4545]) tensor([0.4778, 0.0223, 0.2823, 0.2175]) -Greedy action tensor([ 0.3156, -1.2386, 1.3270, 0.5539]) tensor([0.1912, 0.0404, 0.5257, 0.2426]) -Greedy action tensor([ 0.8739, 0.4224, -0.2938, 0.0106]) tensor([0.4220, 0.2687, 0.1313, 0.1780]) -Greedy action tensor([1.4634, 0.3633, 0.4443, 1.4414]) tensor([0.3742, 0.1246, 0.1351, 0.3661]) -Greedy action tensor([ 1.3481, -0.1381, 0.4345, 2.0607]) tensor([0.2727, 0.0617, 0.1094, 0.5562]) -Greedy action tensor([ 0.9056, -1.9761, 1.4611, 0.0785]) tensor([0.3090, 0.0173, 0.5385, 0.1351]) -Greedy action tensor([ 2.2572, -0.3655, 0.1436, 1.7798]) tensor([0.5513, 0.0400, 0.0666, 0.3421]) -Greedy action tensor([ 1.1782, -0.9975, 0.0660, 1.3864]) tensor([0.3740, 0.0425, 0.1230, 0.4606]) -Greedy action tensor([ 0.9230, 0.5332, -0.6811, 1.8138]) tensor([0.2317, 0.1569, 0.0466, 0.5647]) -Greedy action tensor([ 1.1375, -0.9038, 0.0719, 2.1276]) tensor([0.2400, 0.0312, 0.0827, 0.6461]) -Greedy action tensor([ 0.1956, 1.4785, 0.7392, -0.2577]) tensor([0.1436, 0.5179, 0.2473, 0.0913]) -Greedy action tensor([ 1.3423, -0.4362, -0.3538, 0.6995]) tensor([0.5325, 0.0899, 0.0976, 0.2800]) -Greedy action tensor([1.3889, 0.1220, 1.1038, 0.5263]) tensor([0.4072, 0.1147, 0.3062, 0.1719]) -Greedy action tensor([ 0.7006, -0.9509, 1.2154, 1.2456]) tensor([0.2179, 0.0418, 0.3646, 0.3758]) -Greedy action tensor([-0.5921, -1.0606, -0.2755, 0.0327]) tensor([0.2055, 0.1286, 0.2820, 0.3838]) -Greedy action tensor([1.3657, 0.0554, 1.2173, 0.4184]) tensor([0.3969, 0.1071, 0.3422, 0.1539]) -Greedy action tensor([ 0.7973, -0.1688, 0.3608, 1.6095]) tensor([0.2337, 0.0889, 0.1510, 0.5264]) -Greedy action tensor([ 1.1433, -0.2705, 0.6012, 1.4286]) tensor([0.3170, 0.0771, 0.1843, 0.4216]) -Greedy action tensor([ 0.5280, -0.2395, 0.9862, -0.6253]) tensor([0.2975, 0.1381, 0.4705, 0.0939]) -Greedy action tensor([0.6016, 0.0041, 0.4817, 1.1377]) tensor([0.2412, 0.1327, 0.2139, 0.4122]) -Greedy action tensor([ 1.7060, -0.5416, 0.4047, 0.4571]) tensor([0.6007, 0.0635, 0.1635, 0.1723]) -Greedy action tensor([ 0.4385, -0.1333, 0.0254, 2.0112]) tensor([0.1419, 0.0801, 0.0939, 0.6841]) -Greedy action tensor([ 0.7728, -0.3006, 0.8406, 1.1826]) tensor([0.2552, 0.0872, 0.2731, 0.3845]) -Greedy action tensor([ 1.3658, -0.5704, 1.2468, 2.0137]) tensor([0.2536, 0.0366, 0.2251, 0.4847]) -Greedy action tensor([ 0.2420, 0.0399, -0.6631, -0.2072]) tensor([0.3497, 0.2857, 0.1415, 0.2231]) -Greedy action tensor([ 1.7974, -0.4400, 0.6062, 1.8243]) tensor([0.4102, 0.0438, 0.1246, 0.4214]) -Greedy action tensor([0.8492, 0.1989, 1.1261, 0.8325]) tensor([0.2615, 0.1365, 0.3449, 0.2572]) -Greedy action tensor([ 0.3117, -1.0322, 1.3119, 1.0109]) tensor([0.1669, 0.0435, 0.4538, 0.3358]) -Greedy action tensor([ 0.6030, -0.4145, 0.1695, 0.9912]) tensor([0.2870, 0.1038, 0.1861, 0.4232]) -Greedy action tensor([ 1.0649, 1.6026, -0.2980, 0.8906]) tensor([0.2626, 0.4496, 0.0672, 0.2206]) -Greedy action tensor([ 0.8448, 0.5548, 0.0307, -0.0587]) tensor([0.3851, 0.2882, 0.1706, 0.1560]) -Greedy action tensor([ 0.2529, -0.3029, -0.6315, 1.3228]) tensor([0.2040, 0.1170, 0.0842, 0.5947]) -Greedy action tensor([ 0.4236, 1.2010, -0.2292, 1.3303]) tensor([0.1620, 0.3525, 0.0843, 0.4012]) -Greedy action tensor([ 1.3748, -1.2938, 0.4632, -0.0997]) tensor([0.5882, 0.0408, 0.2364, 0.1346]) -Greedy action tensor([0.5241, 0.1291, 0.5663, 0.7077]) tensor([0.2552, 0.1719, 0.2662, 0.3067]) -Greedy action tensor([ 0.2358, -1.3148, 0.7362, 0.7631]) tensor([0.2195, 0.0466, 0.3620, 0.3719]) -Greedy action tensor([ 2.4579, -0.0162, 0.5501, 1.7517]) tensor([0.5793, 0.0488, 0.0860, 0.2859]) -Greedy action tensor([ 2.1754, -0.4031, 0.5410, 1.2379]) tensor([0.6015, 0.0456, 0.1173, 0.2356]) -Greedy action tensor([ 1.4027, 0.6366, -0.8634, 0.7246]) tensor([0.4817, 0.2239, 0.0500, 0.2445]) -Greedy action tensor([1.2219, 1.3086, 0.0638, 1.1036]) tensor([0.3037, 0.3312, 0.0954, 0.2698]) -Greedy action tensor([-0.8446, -1.0208, -0.2148, -0.0542]) tensor([0.1689, 0.1416, 0.3171, 0.3723]) -Greedy action tensor([ 0.2865, -0.6271, 0.3192, -0.4431]) tensor([0.3429, 0.1375, 0.3543, 0.1653]) -Greedy action tensor([ 1.0973, 1.0281, -0.0368, 0.3000]) tensor([0.3696, 0.3449, 0.1189, 0.1665]) -Greedy action tensor([ 0.7450, -0.4271, 0.7308, 1.7970]) tensor([0.1938, 0.0600, 0.1911, 0.5550]) -Greedy action tensor([ 1.0055, -0.4177, 0.7009, 0.5415]) tensor([0.3836, 0.0924, 0.2828, 0.2412]) -Greedy action tensor([-0.3572, -0.3278, 1.1036, 1.6791]) tensor([0.0714, 0.0736, 0.3078, 0.5472]) -Greedy action tensor([-0.3740, 0.0154, 0.8391, 1.5761]) tensor([0.0777, 0.1147, 0.2614, 0.5462]) -Greedy action tensor([-1.6162, -0.4642, 0.5712, 0.1455]) tensor([0.0529, 0.1674, 0.4716, 0.3081]) -Greedy action tensor([-1.7898, -0.4773, 0.6034, -0.0309]) tensor([0.0466, 0.1730, 0.5099, 0.2704]) -Greedy action tensor([-1.6897, -0.4521, 0.7357, 0.4807]) tensor([0.0408, 0.1406, 0.4612, 0.3574]) -Greedy action tensor([-1.9395, -0.5361, 1.3946, 0.7658]) tensor([0.0208, 0.0846, 0.5835, 0.3111]) -Greedy action tensor([-0.6307, -0.3647, 1.0717, 1.5407]) tensor([0.0604, 0.0788, 0.3313, 0.5296]) -Greedy action tensor([-0.9009, -0.8406, 0.5810, 1.5240]) tensor([0.0563, 0.0598, 0.2478, 0.6362]) -Greedy action tensor([-1.4383, -0.4457, 0.5220, 0.5402]) tensor([0.0555, 0.1496, 0.3938, 0.4011]) -Greedy action tensor([-1.7336, -0.2469, 0.6897, 0.1143]) tensor([0.0434, 0.1919, 0.4895, 0.2753]) -Greedy action tensor([-1.8355, -0.4392, 0.6423, -0.0653]) tensor([0.0438, 0.1770, 0.5220, 0.2572]) -Greedy action tensor([-1.2829, -0.5974, 0.3061, 0.2925]) tensor([0.0786, 0.1561, 0.3852, 0.3800]) -Greedy action tensor([-1.9319, -0.6526, 0.7801, 0.0113]) tensor([0.0375, 0.1350, 0.5654, 0.2621]) -Greedy action tensor([-1.4063, -0.5287, 0.4392, 0.3779]) tensor([0.0637, 0.1533, 0.4035, 0.3795]) -Greedy action tensor([-2.0271, -0.5872, 0.7946, 0.2565]) tensor([0.0314, 0.1326, 0.5278, 0.3082]) -Greedy action tensor([-1.9272, -0.9437, 0.2188, -0.3930]) tensor([0.0593, 0.1586, 0.5071, 0.2750]) -Greedy action tensor([-1.8229, -0.5699, 1.4685, 0.9376]) tensor([0.0212, 0.0742, 0.5696, 0.3350]) -Greedy action tensor([-1.6769, -0.4526, 0.6452, 0.2160]) tensor([0.0471, 0.1602, 0.4801, 0.3126]) -Greedy action tensor([-1.4131, -0.6092, 0.3788, 0.1702]) tensor([0.0709, 0.1584, 0.4254, 0.3453]) -Greedy action tensor([-1.6306, 0.4913, 0.5454, -0.3799]) tensor([0.0462, 0.3855, 0.4070, 0.1613]) -Greedy action tensor([-1.7221, -0.5061, 0.6213, 0.0864]) tensor([0.0479, 0.1615, 0.4986, 0.2920]) -Greedy action tensor([-1.4400, 0.0188, 0.2673, -0.3206]) tensor([0.0721, 0.3099, 0.3973, 0.2207]) -Greedy action tensor([-1.8148, -0.4004, 0.6065, -0.0869]) tensor([0.0455, 0.1870, 0.5118, 0.2558]) -Greedy action tensor([-0.8564, -0.2617, 0.4101, -0.4340]) tensor([0.1268, 0.2298, 0.4499, 0.1935]) -Greedy action tensor([-0.6162, -0.5886, 0.1867, 0.1771]) tensor([0.1545, 0.1589, 0.3450, 0.3416]) -Greedy action tensor([-1.5345, -0.5279, 0.4307, 0.0970]) tensor([0.0626, 0.1712, 0.4465, 0.3198]) -Greedy action tensor([-1.3347, -0.4387, 0.7576, -0.6772]) tensor([0.0742, 0.1817, 0.6010, 0.1431]) -Greedy action tensor([-1.8233, -0.4451, 0.6570, -0.0311]) tensor([0.0436, 0.1732, 0.5213, 0.2619]) -Greedy action tensor([-0.8353, -0.3789, 1.2795, 1.4199]) tensor([0.0490, 0.0774, 0.4062, 0.4674]) -Greedy action tensor([-1.7962, -0.4771, 0.6149, -0.0686]) tensor([0.0465, 0.1739, 0.5181, 0.2615]) -Greedy action tensor([-1.8476, -0.2906, 0.5974, -0.1115]) tensor([0.0436, 0.2067, 0.5024, 0.2473]) -Greedy action tensor([-1.1828, 0.1643, 0.3214, 0.5715]) tensor([0.0661, 0.2543, 0.2975, 0.3821]) -Greedy action tensor([-0.8901, 0.1301, 0.1881, 0.0721]) tensor([0.1072, 0.2973, 0.3150, 0.2805]) -Greedy action tensor([-1.9163, -0.7731, 0.0755, -0.2930]) tensor([0.0605, 0.1897, 0.4432, 0.3066]) -Greedy action tensor([-1.8622, -0.4739, 0.6262, -0.1271]) tensor([0.0440, 0.1764, 0.5300, 0.2495]) -Greedy action tensor([-1.1439, -0.5420, 0.2943, 0.3159]) tensor([0.0882, 0.1609, 0.3714, 0.3795]) -Greedy action tensor([-1.4869, -0.5901, 0.4219, 0.1019]) tensor([0.0662, 0.1624, 0.4468, 0.3245]) -Greedy action tensor([-0.4545, -0.5152, 0.1859, 0.0751]) tensor([0.1806, 0.1700, 0.3427, 0.3067]) -Greedy action tensor([-1.5603, -0.5479, 0.4755, 0.0236]) tensor([0.0614, 0.1690, 0.4703, 0.2993]) -Greedy action tensor([-1.8279, 0.0591, 0.5556, -0.0770]) tensor([0.0413, 0.2727, 0.4480, 0.2380]) -Greedy action tensor([-1.9068, -0.3849, 0.6341, -0.1654]) tensor([0.0417, 0.1911, 0.5293, 0.2380]) -Greedy action tensor([-1.2725, 0.3451, 0.2969, 0.6536]) tensor([0.0565, 0.2847, 0.2713, 0.3876]) -Greedy action tensor([-1.4839, -0.4488, 0.4187, 0.1988]) tensor([0.0629, 0.1771, 0.4216, 0.3384]) -Greedy action tensor([-0.9284, -0.5733, 0.2421, 0.2967]) tensor([0.1104, 0.1575, 0.3560, 0.3760]) -Greedy action tensor([-1.4730, -0.4319, 1.3170, 1.1188]) tensor([0.0299, 0.0846, 0.4865, 0.3990]) -Greedy action tensor([-1.9817, -0.5288, 1.1988, 0.4869]) tensor([0.0243, 0.1039, 0.5848, 0.2870]) -Greedy action tensor([-1.3837, -0.3109, 0.7445, 0.9792]) tensor([0.0436, 0.1274, 0.3661, 0.4629]) -Greedy action tensor([-1.8507, -0.4727, 0.6236, -0.1222]) tensor([0.0445, 0.1765, 0.5283, 0.2506]) -Greedy action tensor([-1.4753, -0.6844, 0.6688, 0.1471]) tensor([0.0595, 0.1312, 0.5078, 0.3014]) -Greedy action tensor([-0.7181, -0.3468, 0.3460, 0.0311]) tensor([0.1340, 0.1942, 0.3883, 0.2834]) -Greedy action tensor([-0.8311, 0.3084, 0.1759, 0.5967]) tensor([0.0906, 0.2833, 0.2481, 0.3779]) -Greedy action tensor([-0.9848, -0.5823, 1.2854, 1.5054]) tensor([0.0413, 0.0617, 0.3994, 0.4977]) -Greedy action tensor([-1.9321, -0.4894, 0.6971, -0.1451]) tensor([0.0399, 0.1688, 0.5530, 0.2382]) -Greedy action tensor([-1.7100, -0.4904, 0.6223, 0.0617]) tensor([0.0486, 0.1646, 0.5008, 0.2859]) -Greedy action tensor([-0.6948, -0.5680, 0.3218, 0.2660]) tensor([0.1331, 0.1511, 0.3679, 0.3479]) -Greedy action tensor([-1.5528, -0.4168, 0.6033, 0.3238]) tensor([0.0519, 0.1615, 0.4479, 0.3387]) -Greedy action tensor([-1.8735, -0.4291, 0.6374, -0.1224]) tensor([0.0429, 0.1818, 0.5282, 0.2471]) -Greedy action tensor([-1.4868, -1.0234, -0.0249, -0.6789]) tensor([0.1093, 0.1738, 0.4717, 0.2452]) -Greedy action tensor([-1.8789, -0.4524, 0.6352, -0.1449]) tensor([0.0431, 0.1796, 0.5330, 0.2443]) -Greedy action tensor([-1.4175, -0.5130, 0.3660, 0.1562]) tensor([0.0702, 0.1734, 0.4177, 0.3387]) -Greedy action tensor([-0.4797, 0.9332, -0.0401, 0.2654]) tensor([0.1141, 0.4686, 0.1770, 0.2403]) -Greedy action tensor([-1.1825, -0.5874, 1.3191, 1.4095]) tensor([0.0352, 0.0639, 0.4301, 0.4708]) -Greedy action tensor([-1.6458, -0.6493, 0.5504, -0.0341]) tensor([0.0565, 0.1530, 0.5076, 0.2829]) -Greedy action tensor([-1.3656, -0.6119, 0.3937, 0.3476]) tensor([0.0691, 0.1467, 0.4011, 0.3831]) -Greedy action tensor([-1.2653, -0.2654, 0.7098, 1.0399]) tensor([0.0477, 0.1297, 0.3440, 0.4785]) -Greedy action tensor([-1.7523, -0.5198, 0.5710, -0.0682]) tensor([0.0499, 0.1713, 0.5098, 0.2690]) -Greedy action tensor([-1.3639, -0.5884, 0.3490, 0.2149]) tensor([0.0737, 0.1601, 0.4088, 0.3575]) -Greedy action tensor([-1.7850, -0.5800, 0.8887, 0.4786]) tensor([0.0352, 0.1173, 0.5095, 0.3381]) -Greedy action tensor([-1.6487, -0.5806, 0.8418, 0.4553]) tensor([0.0414, 0.1204, 0.4991, 0.3391]) -Greedy action tensor([-0.4199, 1.0051, 0.0218, 0.1354]) tensor([0.1183, 0.4917, 0.1839, 0.2061]) -Greedy action tensor([-1.6771, -0.2309, 0.5419, 0.1459]) tensor([0.0485, 0.2058, 0.4457, 0.3000]) -Greedy action tensor([-0.3600, 0.2443, 0.3973, 1.1760]) tensor([0.1041, 0.1905, 0.2219, 0.4835]) -Greedy action tensor([-1.6402, 0.1057, 0.4463, -0.0705]) tensor([0.0510, 0.2925, 0.4112, 0.2453]) -Greedy action tensor([-1.1186, 0.4726, 0.4470, -0.6454]) tensor([0.0813, 0.3991, 0.3891, 0.1305]) -Greedy action tensor([-1.4298, -0.1900, 0.5792, 0.7376]) tensor([0.0484, 0.1673, 0.3611, 0.4231]) -Greedy action tensor([-1.0687, -0.5765, 0.2551, 0.4979]) tensor([0.0894, 0.1463, 0.3360, 0.4283]) -Greedy action tensor([-1.6465, -0.5525, 0.6399, -0.2216]) tensor([0.0556, 0.1661, 0.5471, 0.2312]) -Greedy action tensor([-1.7564, -0.4922, 0.6824, 0.1481]) tensor([0.0440, 0.1559, 0.5045, 0.2957]) -Greedy action tensor([-1.5789, -0.5717, 0.4865, 0.0418]) tensor([0.0599, 0.1641, 0.4728, 0.3031]) -Greedy action tensor([-1.7401, -0.4349, 0.5641, -0.0607]) tensor([0.0498, 0.1838, 0.4991, 0.2672]) -Greedy action tensor([-1.8714, -0.4097, 0.6251, -0.1352]) tensor([0.0432, 0.1865, 0.5249, 0.2454]) -Greedy action tensor([ 0.5049, -0.0579, -0.1169, -0.0064]) tensor([0.3695, 0.2105, 0.1984, 0.2216]) -Greedy action tensor([ 0.3874, -0.0083, 0.0210, -0.3305]) tensor([0.3503, 0.2359, 0.2429, 0.1709]) -Greedy action tensor([ 0.6082, 0.0321, 0.0074, -0.2786]) tensor([0.3964, 0.2228, 0.2174, 0.1633]) -Greedy action tensor([ 0.6607, 0.1330, -0.1649, -0.3558]) tensor([0.4184, 0.2469, 0.1833, 0.1514]) -Greedy action tensor([ 0.8914, -0.5802, -0.1598, -0.5885]) tensor([0.5535, 0.1271, 0.1934, 0.1260]) -Greedy action tensor([ 0.5404, 0.1111, -0.1033, -0.1218]) tensor([0.3715, 0.2418, 0.1952, 0.1916]) -Greedy action tensor([ 0.7207, -0.2922, -0.0820, -0.2750]) tensor([0.4586, 0.1665, 0.2055, 0.1694]) -Greedy action tensor([ 0.3863, -0.3019, 0.2003, -0.5182]) tensor([0.3653, 0.1836, 0.3033, 0.1479]) -Greedy action tensor([ 0.3967, 0.1269, -0.1161, -0.0291]) tensor([0.3316, 0.2532, 0.1986, 0.2166]) -Greedy action tensor([ 0.8973, -0.3706, 0.0385, -0.3477]) tensor([0.5017, 0.1412, 0.2126, 0.1445]) -Greedy action tensor([ 0.4234, 0.1588, 0.0726, -0.2592]) tensor([0.3359, 0.2578, 0.2365, 0.1697]) -Greedy action tensor([ 0.7387, -0.4394, 0.0720, -0.6492]) tensor([0.4829, 0.1487, 0.2479, 0.1205]) -Greedy action tensor([ 0.6401, 0.0208, -0.0320, 0.0369]) tensor([0.3852, 0.2074, 0.1967, 0.2107]) -Greedy action tensor([ 0.4821, 0.0647, 0.1538, -0.3701]) tensor([0.3564, 0.2348, 0.2567, 0.1520]) -Greedy action tensor([ 0.7044, -0.1503, 0.1702, -0.3734]) tensor([0.4252, 0.1809, 0.2492, 0.1447]) -Greedy action tensor([ 0.8024, -0.1961, 0.0276, -0.2565]) tensor([0.4595, 0.1693, 0.2118, 0.1594]) -Greedy action tensor([ 0.7507, -0.3912, -0.0618, -0.2398]) tensor([0.4685, 0.1496, 0.2079, 0.1740]) -Greedy action tensor([ 0.7729, -0.2960, -0.1569, -0.3747]) tensor([0.4865, 0.1671, 0.1920, 0.1544]) -Greedy action tensor([ 0.2721, -0.0069, -0.0786, -0.2759]) tensor([0.3291, 0.2489, 0.2317, 0.1903]) -Greedy action tensor([ 0.8248, -0.5495, -0.1082, -0.3696]) tensor([0.5130, 0.1298, 0.2018, 0.1554]) -Greedy action tensor([ 1.0047, -0.5949, -0.1011, -0.6725]) tensor([0.5814, 0.1174, 0.1924, 0.1087]) -Greedy action tensor([ 0.4223, 0.0543, -0.0092, -0.3751]) tensor([0.3581, 0.2479, 0.2326, 0.1614]) -Greedy action tensor([ 0.4257, -0.1151, -0.0206, -0.3855]) tensor([0.3750, 0.2184, 0.2400, 0.1666]) -Greedy action tensor([ 0.8386, -0.3280, 0.0269, -0.4514]) tensor([0.4924, 0.1534, 0.2187, 0.1356]) -Greedy action tensor([ 0.4426, -0.3066, -0.0301, -0.4110]) tensor([0.3965, 0.1874, 0.2472, 0.1689]) -Greedy action tensor([ 0.5604, -0.0663, 0.0860, -0.4429]) tensor([0.3963, 0.2118, 0.2466, 0.1453]) -Greedy action tensor([ 0.4460, -0.1744, -0.0067, -0.2282]) tensor([0.3727, 0.2004, 0.2370, 0.1899]) -Greedy action tensor([ 0.7619, -0.3169, 0.0671, -0.3591]) tensor([0.4619, 0.1570, 0.2306, 0.1505]) -Greedy action tensor([ 0.8947, -0.4085, 0.0412, -0.4844]) tensor([0.5130, 0.1394, 0.2185, 0.1292]) -Greedy action tensor([ 1.1199, -0.9724, -0.0191, -0.6071]) tensor([0.6168, 0.0761, 0.1975, 0.1097]) -Greedy action tensor([ 1.0787, -1.1725, -0.0152, -0.4891]) tensor([0.6066, 0.0638, 0.2031, 0.1265]) -Greedy action tensor([ 0.4014, 0.0469, -0.0221, -0.4286]) tensor([0.3581, 0.2512, 0.2345, 0.1562]) -Greedy action tensor([ 0.4335, 0.0042, 0.0339, -0.2921]) tensor([0.3564, 0.2320, 0.2390, 0.1725]) -Greedy action tensor([ 0.4339, -0.2665, -0.0702, -0.3182]) tensor([0.3888, 0.1930, 0.2349, 0.1833]) -Greedy action tensor([ 0.2929, 0.2685, 0.0061, -0.1488]) tensor([0.2968, 0.2896, 0.2228, 0.1908]) -Greedy action tensor([ 0.9341, -1.0935, -0.0450, -0.4407]) tensor([0.5681, 0.0748, 0.2134, 0.1437]) -Greedy action tensor([ 0.5254, -0.1671, 0.0872, -0.4534]) tensor([0.3966, 0.1984, 0.2559, 0.1490]) -Greedy action tensor([ 0.4905, -0.1473, 0.0781, -0.2800]) tensor([0.3769, 0.1992, 0.2495, 0.1744]) -Greedy action tensor([ 0.5372, -0.2416, -0.1073, -0.2106]) tensor([0.4069, 0.1868, 0.2136, 0.1927]) -Greedy action tensor([ 0.5721, -0.2716, -0.0151, -0.3238]) tensor([0.4177, 0.1796, 0.2322, 0.1705]) -Greedy action tensor([ 0.3328, 0.1392, -0.0622, -0.3337]) tensor([0.3321, 0.2737, 0.2237, 0.1705]) -Greedy action tensor([ 0.6856, -0.4232, -0.0721, -0.3496]) tensor([0.4643, 0.1532, 0.2176, 0.1649]) -Greedy action tensor([ 0.3040, 0.0528, -0.0095, -0.0966]) tensor([0.3146, 0.2447, 0.2299, 0.2108]) -Greedy action tensor([ 0.8130, -0.5292, -0.0478, -0.8009]) tensor([0.5310, 0.1387, 0.2245, 0.1057]) -Greedy action tensor([0.2589, 0.0031, 0.1722, 0.0298]) tensor([0.2868, 0.2221, 0.2630, 0.2281]) -Greedy action tensor([ 0.3797, -0.1966, -0.0515, -0.1979]) tensor([0.3606, 0.2027, 0.2343, 0.2024]) -Greedy action tensor([ 0.6343, -0.4221, 0.0176, -0.1991]) tensor([0.4307, 0.1497, 0.2324, 0.1871]) -Greedy action tensor([ 0.5893, 0.0191, 0.0180, -0.1345]) tensor([0.3824, 0.2162, 0.2160, 0.1854]) -Greedy action tensor([ 0.7281, -0.3116, -0.0848, -0.3236]) tensor([0.4659, 0.1647, 0.2066, 0.1627]) -Greedy action tensor([ 0.9872, -0.7139, -0.0845, -0.5813]) tensor([0.5769, 0.1053, 0.1976, 0.1202]) -Greedy action tensor([ 0.7019, -0.3086, -0.0449, -0.2831]) tensor([0.4522, 0.1646, 0.2143, 0.1689]) -Greedy action tensor([ 0.6443, -0.3095, -0.0156, -0.3736]) tensor([0.4418, 0.1702, 0.2284, 0.1596]) -Greedy action tensor([ 0.5210, -0.1546, 0.2324, -0.5512]) tensor([0.3846, 0.1957, 0.2881, 0.1316]) -Greedy action tensor([ 0.8378, -0.6275, -0.0777, -0.3206]) tensor([0.5140, 0.1188, 0.2058, 0.1614]) -Greedy action tensor([ 0.8265, -0.5743, -0.0600, -0.6148]) tensor([0.5277, 0.1300, 0.2175, 0.1249]) -Greedy action tensor([ 0.4185, 0.1767, 0.2016, -0.3766]) tensor([0.3288, 0.2582, 0.2647, 0.1484]) -Greedy action tensor([ 0.4666, -0.1104, 0.0415, -0.2690]) tensor([0.3711, 0.2084, 0.2426, 0.1779]) -Greedy action tensor([0.4587, 0.1687, 0.0527, 0.0444]) tensor([0.3252, 0.2433, 0.2167, 0.2149]) -Greedy action tensor([ 0.3725, 0.0017, 0.1213, -0.2553]) tensor([0.3331, 0.2299, 0.2591, 0.1778]) -Greedy action tensor([ 0.4128, 0.0732, -0.0167, -0.2693]) tensor([0.3486, 0.2482, 0.2269, 0.1762]) -Greedy action tensor([ 0.0956, -0.0777, 0.0427, -0.1355]) tensor([0.2791, 0.2347, 0.2647, 0.2215]) -Greedy action tensor([ 0.5883, -0.3230, -0.0109, -0.2736]) tensor([0.4213, 0.1694, 0.2314, 0.1779]) -Greedy action tensor([ 0.3493, -0.1732, -0.0979, -0.2570]) tensor([0.3600, 0.2135, 0.2302, 0.1963]) -Greedy action tensor([ 0.3131, 0.0417, -0.0202, -0.2956]) tensor([0.3308, 0.2522, 0.2370, 0.1800]) -Greedy action tensor([ 0.8245, -0.3410, -0.0268, -0.4932]) tensor([0.4984, 0.1554, 0.2127, 0.1334]) -Greedy action tensor([ 0.6022, -0.1167, -0.0859, -0.3287]) tensor([0.4195, 0.2044, 0.2108, 0.1654]) -Greedy action tensor([ 0.3280, -0.2057, 0.0087, -0.1908]) tensor([0.3438, 0.2016, 0.2499, 0.2047]) -Greedy action tensor([ 0.2571, -0.1918, 0.1607, -0.4836]) tensor([0.3308, 0.2111, 0.3004, 0.1577]) -Greedy action tensor([ 0.7076, -0.3414, 0.0300, -0.4653]) tensor([0.4614, 0.1616, 0.2343, 0.1428]) -Greedy action tensor([ 0.5731, -0.4800, 0.1010, -0.5673]) tensor([0.4363, 0.1522, 0.2721, 0.1395]) -Greedy action tensor([ 0.9990, -0.2887, 0.0212, -0.8014]) tensor([0.5503, 0.1518, 0.2070, 0.0909]) -Greedy action tensor([ 0.8481, -0.5439, -0.0188, -0.3919]) tensor([0.5107, 0.1269, 0.2146, 0.1478]) -Greedy action tensor([ 0.4477, -0.1680, 0.0196, -0.4337]) tensor([0.3837, 0.2073, 0.2501, 0.1589]) -Greedy action tensor([ 0.5356, -0.1402, 0.0585, -0.2934]) tensor([0.3897, 0.1983, 0.2419, 0.1701]) -Greedy action tensor([ 0.1909, 0.1278, -0.1533, -0.0754]) tensor([0.2929, 0.2750, 0.2076, 0.2244]) -Greedy action tensor([ 0.5913, -0.2454, -0.1278, -0.2835]) tensor([0.4278, 0.1853, 0.2084, 0.1784]) -Greedy action tensor([ 1.0447, -0.4308, -0.0455, -0.2343]) tensor([0.5425, 0.1241, 0.1824, 0.1510]) -Greedy action tensor([ 0.5706, -0.4960, -0.0617, -0.2092]) tensor([0.4284, 0.1475, 0.2277, 0.1964]) -Greedy action tensor([ 0.4366, -0.0126, -0.0132, -0.4789]) tensor([0.3737, 0.2385, 0.2383, 0.1496]) -Greedy action tensor([ 0.6848, -0.4357, 0.0872, -0.5307]) tensor([0.4602, 0.1501, 0.2532, 0.1365]) -Greedy action tensor([ 0.3469, 0.0387, 0.0386, -0.2758]) tensor([0.3327, 0.2444, 0.2444, 0.1785]) -Greedy action tensor([ 0.4746, 0.1565, 0.0431, -0.2883]) tensor([0.3517, 0.2559, 0.2284, 0.1640]) -Greedy action tensor([ 1.3660, -0.4779, -0.1107, 0.2256]) tensor([0.5861, 0.0927, 0.1339, 0.1874]) -Greedy action tensor([ 1.4550, -0.3366, -0.5723, -0.1471]) tensor([0.6667, 0.1111, 0.0878, 0.1343]) -Greedy action tensor([ 1.5244, -1.1060, -0.2814, 0.3831]) tensor([0.6428, 0.0463, 0.1056, 0.2053]) -Greedy action tensor([ 1.1263, -0.4150, -0.0837, 0.1060]) tensor([0.5340, 0.1143, 0.1592, 0.1925]) -Greedy action tensor([ 1.0970, -0.3319, -0.3149, 0.6387]) tensor([0.4727, 0.1132, 0.1152, 0.2989]) -Greedy action tensor([ 1.2760, -0.7814, -0.1927, 0.4094]) tensor([0.5623, 0.0719, 0.1295, 0.2364]) -Greedy action tensor([ 1.5378, -0.7956, -0.2460, -0.0283]) tensor([0.6785, 0.0658, 0.1140, 0.1417]) -Greedy action tensor([ 1.4044, -0.6923, -0.3396, 0.2181]) tensor([0.6238, 0.0766, 0.1091, 0.1905]) -Greedy action tensor([ 0.8421, -0.3956, -0.4621, 0.6726]) tensor([0.4157, 0.1206, 0.1128, 0.3509]) -Greedy action tensor([ 0.7927, -0.6049, 0.2154, -0.0998]) tensor([0.4508, 0.1114, 0.2531, 0.1847]) -Greedy action tensor([ 1.1300, -0.4971, -0.2965, 0.2987]) tensor([0.5342, 0.1050, 0.1283, 0.2326]) -Greedy action tensor([ 0.9314, -0.3610, -0.0140, 0.0836]) tensor([0.4781, 0.1313, 0.1858, 0.2048]) -Greedy action tensor([ 1.0097, -0.5389, -0.4597, 0.7129]) tensor([0.4575, 0.0972, 0.1053, 0.3400]) -Greedy action tensor([ 1.4476, -0.6703, -0.2172, 0.4339]) tensor([0.5980, 0.0719, 0.1131, 0.2170]) -Greedy action tensor([ 1.1903, -0.4264, -0.2173, -0.1145]) tensor([0.5833, 0.1158, 0.1427, 0.1582]) -Greedy action tensor([ 0.8853, -0.0922, -0.3393, -0.2052]) tensor([0.4985, 0.1875, 0.1465, 0.1675]) -Greedy action tensor([ 1.2588, -0.4597, -0.1363, 0.1607]) tensor([0.5680, 0.1019, 0.1407, 0.1894]) -Greedy action tensor([ 0.8292, -0.2484, -0.3323, -0.3218]) tensor([0.5077, 0.1728, 0.1589, 0.1606]) -Greedy action tensor([ 1.2683, -0.3282, -0.0383, 0.1796]) tensor([0.5525, 0.1119, 0.1496, 0.1860]) -Greedy action tensor([ 0.9427, -0.1898, -0.1664, 0.1125]) tensor([0.4789, 0.1543, 0.1580, 0.2088]) -Greedy action tensor([ 0.9809, -0.3636, -0.3587, 0.2750]) tensor([0.4960, 0.1293, 0.1299, 0.2448]) -Greedy action tensor([ 0.8819, -0.5025, -0.2910, -0.0296]) tensor([0.5097, 0.1277, 0.1577, 0.2049]) -Greedy action tensor([ 0.8638, -0.2516, -0.2971, 0.3229]) tensor([0.4498, 0.1474, 0.1409, 0.2619]) -Greedy action tensor([ 1.0828, -0.2477, 0.1759, -0.1821]) tensor([0.5127, 0.1355, 0.2070, 0.1447]) -Greedy action tensor([ 1.0015, -0.5876, -0.1536, 0.2094]) tensor([0.5071, 0.1035, 0.1597, 0.2297]) -Greedy action tensor([ 0.7333, -0.6220, -0.8184, -0.2727]) tensor([0.5448, 0.1405, 0.1154, 0.1992]) -Greedy action tensor([ 0.8545, -0.1499, -0.1809, -0.0570]) tensor([0.4710, 0.1725, 0.1672, 0.1893]) -Greedy action tensor([ 1.1840, -0.7648, -0.3564, 1.1584]) tensor([0.4289, 0.0611, 0.0919, 0.4181]) -Greedy action tensor([ 0.6097, -0.4211, -0.1483, 0.1615]) tensor([0.4058, 0.1448, 0.1902, 0.2592]) -Greedy action tensor([ 1.1111, -0.3947, -0.2547, 0.2569]) tensor([0.5256, 0.1166, 0.1341, 0.2237]) -Greedy action tensor([ 0.7874, -0.2654, -0.2569, 0.0278]) tensor([0.4611, 0.1609, 0.1623, 0.2157]) -Greedy action tensor([ 0.6872, -0.1241, -0.3633, 0.0129]) tensor([0.4341, 0.1929, 0.1518, 0.2212]) -Greedy action tensor([ 1.4612, -0.6035, -0.2569, 0.4343]) tensor([0.6008, 0.0762, 0.1078, 0.2152]) -Greedy action tensor([ 0.8102, 0.0292, -0.1479, -0.4369]) tensor([0.4697, 0.2151, 0.1802, 0.1350]) -Greedy action tensor([ 1.2570, -0.4282, -0.3803, 0.1147]) tensor([0.5886, 0.1091, 0.1145, 0.1878]) -Greedy action tensor([ 1.5693, -0.8808, -0.3787, 0.3854]) tensor([0.6515, 0.0562, 0.0929, 0.1994]) -Greedy action tensor([ 1.1658, -0.5878, -0.2841, 0.2624]) tensor([0.5516, 0.0955, 0.1294, 0.2235]) -Greedy action tensor([ 1.0417, -0.5891, -0.2066, 0.6332]) tensor([0.4657, 0.0912, 0.1336, 0.3095]) -Greedy action tensor([ 1.3704, -0.2841, -0.4569, 0.2805]) tensor([0.5923, 0.1132, 0.0953, 0.1992]) -Greedy action tensor([ 1.0371, -0.2024, -0.0621, 0.2495]) tensor([0.4813, 0.1394, 0.1603, 0.2190]) -Greedy action tensor([ 1.3416, -0.3373, -0.0491, -0.1149]) tensor([0.5993, 0.1118, 0.1492, 0.1397]) -Greedy action tensor([ 1.0768, -0.5516, -0.4430, 0.3480]) tensor([0.5270, 0.1034, 0.1153, 0.2543]) -Greedy action tensor([ 0.6632, -0.3087, -0.2932, 0.1802]) tensor([0.4202, 0.1590, 0.1615, 0.2593]) -Greedy action tensor([ 0.8184, -0.3329, -0.6101, 0.8030]) tensor([0.3936, 0.1245, 0.0943, 0.3876]) -Greedy action tensor([ 1.1508, -0.0878, 0.1703, -0.0760]) tensor([0.5107, 0.1480, 0.1916, 0.1497]) -Greedy action tensor([ 1.5053, -0.6626, -0.1541, 0.2365]) tensor([0.6306, 0.0722, 0.1200, 0.1773]) -Greedy action tensor([ 1.3263, -0.4660, -0.1446, 0.1450]) tensor([0.5871, 0.0978, 0.1349, 0.1802]) -Greedy action tensor([ 0.8575, -0.4337, -0.1584, 0.0554]) tensor([0.4795, 0.1318, 0.1736, 0.2150]) -Greedy action tensor([ 0.9206, -0.6070, 0.0474, 0.2153]) tensor([0.4698, 0.1020, 0.1962, 0.2321]) -Greedy action tensor([ 0.4386, -0.2117, -0.4007, 0.1728]) tensor([0.3676, 0.1918, 0.1588, 0.2818]) -Greedy action tensor([ 0.8208, -0.3408, -0.2136, 0.2085]) tensor([0.4524, 0.1416, 0.1608, 0.2452]) -Greedy action tensor([ 1.2213, -0.4458, -0.2823, 0.3760]) tensor([0.5433, 0.1026, 0.1208, 0.2333]) -Greedy action tensor([ 1.4800, 0.0323, -0.2623, 0.3143]) tensor([0.5808, 0.1365, 0.1017, 0.1810]) -Greedy action tensor([ 1.1845, -0.5354, -0.2802, 0.3799]) tensor([0.5384, 0.0964, 0.1244, 0.2408]) -Greedy action tensor([ 1.2312, -0.3021, -0.2267, -0.0161]) tensor([0.5761, 0.1243, 0.1341, 0.1655]) -Greedy action tensor([ 1.2919, -0.4243, -0.1813, 0.3130]) tensor([0.5603, 0.1007, 0.1284, 0.2105]) -Greedy action tensor([ 0.7855, -0.4120, -0.3492, 0.0982]) tensor([0.4703, 0.1420, 0.1512, 0.2365]) -Greedy action tensor([ 1.2199, -0.2630, -0.0995, -0.0259]) tensor([0.5612, 0.1274, 0.1500, 0.1615]) -Greedy action tensor([ 0.9709, -0.4390, -0.2940, 0.2489]) tensor([0.4970, 0.1213, 0.1403, 0.2414]) -Greedy action tensor([ 0.8255, -0.1181, -0.5073, -0.3317]) tensor([0.5083, 0.1978, 0.1341, 0.1598]) -Greedy action tensor([ 1.2246, -0.1638, -0.1174, 0.0232]) tensor([0.5520, 0.1377, 0.1442, 0.1660]) -Greedy action tensor([ 0.9473, -0.4361, -0.1580, 0.2008]) tensor([0.4864, 0.1220, 0.1611, 0.2306]) -Greedy action tensor([ 1.0851, -0.4278, 0.0260, -0.1997]) tensor([0.5424, 0.1195, 0.1881, 0.1501]) -Greedy action tensor([ 1.3029, -0.2641, -0.3091, -0.1063]) tensor([0.6051, 0.1263, 0.1207, 0.1479]) -Greedy action tensor([ 1.6172, -0.8708, -0.3939, 0.2536]) tensor([0.6791, 0.0564, 0.0909, 0.1737]) -Greedy action tensor([ 1.0842, 0.2123, -0.1982, -0.3667]) tensor([0.5182, 0.2167, 0.1437, 0.1214]) -Greedy action tensor([ 1.5552, -0.6212, -0.1778, 0.3260]) tensor([0.6318, 0.0717, 0.1117, 0.1848]) -Greedy action tensor([ 0.7937, -0.6065, -0.0560, 0.0864]) tensor([0.4614, 0.1138, 0.1973, 0.2275]) -Greedy action tensor([ 1.0650, -0.5125, -0.4279, 0.7511]) tensor([0.4626, 0.0955, 0.1040, 0.3379]) -Greedy action tensor([ 0.6839, -0.1808, -0.4778, 0.5615]) tensor([0.3818, 0.1608, 0.1195, 0.3379]) -Greedy action tensor([ 0.6303, -0.2860, -0.3873, 0.2366]) tensor([0.4105, 0.1642, 0.1484, 0.2769]) -Greedy action tensor([ 1.4139, -0.4536, -0.3272, 0.3676]) tensor([0.5949, 0.0919, 0.1043, 0.2089]) -Greedy action tensor([0.8184, 0.0100, 0.0163, 0.0546]) tensor([0.4238, 0.1888, 0.1900, 0.1974]) -Greedy action tensor([ 1.5242, -0.6283, -0.0146, 0.0045]) tensor([0.6453, 0.0750, 0.1385, 0.1412]) -Greedy action tensor([ 0.8479, -0.5090, -0.2591, 0.0135]) tensor([0.4945, 0.1273, 0.1635, 0.2147]) -Greedy action tensor([ 1.2819, -0.8063, -0.0818, 0.3996]) tensor([0.5576, 0.0691, 0.1426, 0.2307]) -Greedy action tensor([ 0.9200, -0.4239, -0.1544, -0.0219]) tensor([0.5020, 0.1309, 0.1714, 0.1957]) -Greedy action tensor([ 0.7021, -0.2047, -0.2521, 0.0009]) tensor([0.4377, 0.1767, 0.1685, 0.2171]) -Greedy action tensor([ 0.9736, -0.2597, 0.0129, -0.3075]) tensor([0.5124, 0.1493, 0.1960, 0.1423]) -Greedy action tensor([ 1.3030, -0.8947, -0.3791, 0.0653]) tensor([0.6301, 0.0700, 0.1172, 0.1828]) -Greedy action tensor([ 1.6876, -0.2745, -0.4110, 0.1286]) tensor([0.6786, 0.0954, 0.0832, 0.1428]) -Greedy action tensor([ 1.1163, -0.1552, 2.3231, 0.9265]) tensor([0.1835, 0.0514, 0.6133, 0.1518]) -Greedy action tensor([ 0.9830, -0.5806, 1.3282, 1.1154]) tensor([0.2657, 0.0556, 0.3753, 0.3034]) -Greedy action tensor([ 0.9309, -0.7870, -0.0044, 0.0849]) tensor([0.4997, 0.0897, 0.1961, 0.2145]) -Greedy action tensor([-0.0314, -0.0392, -0.6827, 1.2224]) tensor([0.1662, 0.1649, 0.0867, 0.5823]) -Greedy action tensor([ 0.3721, -1.2454, 0.0244, 1.1923]) tensor([0.2395, 0.0475, 0.1691, 0.5439]) -Greedy action tensor([-1.3169, -1.2969, 0.0406, 0.0294]) tensor([0.1026, 0.1046, 0.3986, 0.3942]) -Greedy action tensor([ 1.5070, -1.2472, -0.5448, 0.9643]) tensor([0.5639, 0.0359, 0.0725, 0.3277]) -Greedy action tensor([-0.1571, 0.0935, 0.5380, 0.6432]) tensor([0.1535, 0.1972, 0.3076, 0.3417]) -Greedy action tensor([ 1.6847, 0.2185, -0.3388, 1.0268]) tensor([0.5317, 0.1227, 0.0703, 0.2754]) -Greedy action tensor([0.4368, 0.3879, 0.3305, 0.1948]) tensor([0.2750, 0.2619, 0.2473, 0.2159]) -Greedy action tensor([ 1.0684, -0.5638, 0.3540, -0.0326]) tensor([0.4957, 0.0969, 0.2426, 0.1648]) -Greedy action tensor([ 1.0928, -0.3199, -0.2669, 1.1930]) tensor([0.3838, 0.0935, 0.0985, 0.4242]) -Greedy action tensor([ 0.8160, -0.0270, 0.3216, 0.0466]) tensor([0.3994, 0.1719, 0.2436, 0.1850]) -Greedy action tensor([ 1.1804, -0.6565, 0.7409, 0.3602]) tensor([0.4456, 0.0710, 0.2872, 0.1962]) -Greedy action tensor([ 0.8647, -0.4597, 0.0421, 1.0947]) tensor([0.3374, 0.0897, 0.1482, 0.4247]) -Greedy action tensor([0.6998, 0.2904, 1.7834, 0.4961]) tensor([0.1840, 0.1222, 0.5437, 0.1501]) -Greedy action tensor([ 0.7870, -0.4862, 0.2889, 0.3506]) tensor([0.3946, 0.1105, 0.2398, 0.2551]) -Greedy action tensor([-5.5557e-01, 1.1609e-01, 2.1416e-04, 1.1012e+00]) tensor([0.1006, 0.1969, 0.1753, 0.5272]) -Greedy action tensor([ 1.2507, -0.5570, 0.5534, 1.2868]) tensor([0.3705, 0.0608, 0.1845, 0.3842]) -Greedy action tensor([ 1.6474, -0.1618, 0.4370, 1.1579]) tensor([0.4820, 0.0789, 0.1437, 0.2954]) -Greedy action tensor([1.0465, 0.0187, 0.0298, 0.6167]) tensor([0.4219, 0.1510, 0.1526, 0.2745]) -Greedy action tensor([ 0.9282, -0.2273, 0.1454, 0.1893]) tensor([0.4445, 0.1400, 0.2032, 0.2123]) -Greedy action tensor([ 0.4423, -1.4064, 0.9765, 1.0055]) tensor([0.2165, 0.0341, 0.3693, 0.3802]) -Greedy action tensor([ 1.7910, -1.4903, 0.0256, 0.7079]) tensor([0.6463, 0.0243, 0.1106, 0.2188]) -Greedy action tensor([-0.0950, -0.6416, 1.0441, 0.4581]) tensor([0.1552, 0.0899, 0.4850, 0.2699]) -Greedy action tensor([ 0.0310, -0.3008, -0.0236, 0.2905]) tensor([0.2525, 0.1812, 0.2391, 0.3273]) -Greedy action tensor([ 0.8364, -1.5061, -0.0518, 0.6520]) tensor([0.4275, 0.0411, 0.1759, 0.3555]) -Greedy action tensor([ 1.1819, -0.9320, -0.8556, 0.1753]) tensor([0.6186, 0.0747, 0.0806, 0.2261]) -Greedy action tensor([ 0.2238, 0.3989, -0.2966, 0.0615]) tensor([0.2750, 0.3277, 0.1635, 0.2338]) -Greedy action tensor([ 1.3841, -0.0509, 1.3686, 0.2472]) tensor([0.3931, 0.0936, 0.3871, 0.1261]) -Greedy action tensor([ 0.8739, -0.8509, 0.3562, 1.9698]) tensor([0.2098, 0.0374, 0.1250, 0.6277]) -Greedy action tensor([-0.7600, -0.8058, -0.2247, 0.4452]) tensor([0.1428, 0.1365, 0.2440, 0.4767]) -Greedy action tensor([-0.0996, 0.2459, 0.3452, 0.8057]) tensor([0.1551, 0.2192, 0.2421, 0.3836]) -Greedy action tensor([ 0.6088, -0.3225, -1.3833, 0.0047]) tensor([0.4815, 0.1897, 0.0657, 0.2631]) -Greedy action tensor([-0.4788, 0.9842, 0.6151, -0.3741]) tensor([0.1062, 0.4587, 0.3171, 0.1179]) -Greedy action tensor([-0.0799, -1.5607, -0.1299, 1.0917]) tensor([0.1850, 0.0421, 0.1760, 0.5970]) -Greedy action tensor([ 0.0896, -0.3306, -1.5014, -0.2202]) tensor([0.3855, 0.2532, 0.0785, 0.2828]) -Greedy action tensor([ 0.4340, -0.6367, 0.3576, 1.1960]) tensor([0.2267, 0.0777, 0.2100, 0.4856]) -Greedy action tensor([-0.4145, 0.8328, -0.4969, 0.8391]) tensor([0.1123, 0.3909, 0.1034, 0.3934]) -Greedy action tensor([ 1.2363, -0.6261, -0.6404, 0.6672]) tensor([0.5335, 0.0828, 0.0817, 0.3020]) -Greedy action tensor([ 1.5290, -0.2952, 0.1409, 1.3847]) tensor([0.4393, 0.0709, 0.1096, 0.3802]) -Greedy action tensor([ 1.3239, -1.1037, 0.1867, 1.6612]) tensor([0.3559, 0.0314, 0.1141, 0.4986]) -Greedy action tensor([ 0.9074, -0.1550, -0.5247, 2.0966]) tensor([0.2054, 0.0710, 0.0490, 0.6746]) -Greedy action tensor([ 0.9524, -1.0185, -0.2298, 0.2323]) tensor([0.5174, 0.0721, 0.1586, 0.2518]) -Greedy action tensor([ 0.7578, -1.2662, -0.6100, 0.8397]) tensor([0.4045, 0.0534, 0.1030, 0.4390]) -Greedy action tensor([ 0.6308, 0.1902, 0.0310, -0.1917]) tensor([0.3800, 0.2445, 0.2086, 0.1669]) -Greedy action tensor([ 0.7602, 0.3652, 0.8971, -0.3043]) tensor([0.3159, 0.2128, 0.3623, 0.1090]) -Greedy action tensor([ 0.3969, 0.2824, -0.7857, 1.0079]) tensor([0.2475, 0.2207, 0.0759, 0.4559]) -Greedy action tensor([ 0.5175, -0.3972, 0.6528, -0.3526]) tensor([0.3373, 0.1351, 0.3862, 0.1413]) -Greedy action tensor([ 0.2141, -1.7062, 0.6467, 0.4747]) tensor([0.2509, 0.0368, 0.3867, 0.3256]) -Greedy action tensor([ 1.1785, 0.2028, -0.1247, 0.2557]) tensor([0.4888, 0.1842, 0.1328, 0.1942]) -Greedy action tensor([ 0.7933, -1.3042, 1.0037, -0.1726]) tensor([0.3653, 0.0448, 0.4508, 0.1390]) -Greedy action tensor([ 1.1393, -1.4821, 0.6884, 0.4860]) tensor([0.4484, 0.0326, 0.2857, 0.2333]) -Greedy action tensor([-0.6069, -0.6520, 1.3414, 0.3356]) tensor([0.0867, 0.0828, 0.6081, 0.2224]) -Greedy action tensor([0.5699, 0.4431, 0.6737, 0.2634]) tensor([0.2684, 0.2364, 0.2977, 0.1975]) -Greedy action tensor([-0.8900, -0.7843, -1.0955, 0.8909]) tensor([0.1129, 0.1254, 0.0919, 0.6698]) -Greedy action tensor([ 0.4091, -0.9953, -0.3649, 0.0140]) tensor([0.4201, 0.1031, 0.1937, 0.2830]) -Greedy action tensor([-0.4610, -0.3352, 0.1357, -0.3576]) tensor([0.1977, 0.2242, 0.3590, 0.2192]) -Greedy action tensor([ 0.1434, -0.6497, 0.7991, 1.8861]) tensor([0.1100, 0.0498, 0.2119, 0.6283]) -Greedy action tensor([-0.2843, -2.0173, -0.3864, 0.9112]) tensor([0.1857, 0.0328, 0.1677, 0.6138]) -Greedy action tensor([ 0.2036, -0.2166, 0.2479, 0.4546]) tensor([0.2508, 0.1647, 0.2621, 0.3223]) -Greedy action tensor([ 0.7251, -0.2068, 0.0618, 0.2462]) tensor([0.3955, 0.1558, 0.2037, 0.2450]) -Greedy action tensor([ 1.0643, -1.2865, 0.1271, 1.1124]) tensor([0.3943, 0.0376, 0.1544, 0.4137]) -Greedy action tensor([ 0.7352, 0.6140, -0.1617, 0.8467]) tensor([0.2931, 0.2597, 0.1195, 0.3277]) -Greedy action tensor([0.4927, 0.8118, 0.2463, 0.8705]) tensor([0.2166, 0.2980, 0.1693, 0.3160]) -Greedy action tensor([ 0.4373, -0.4886, 1.3825, 0.7321]) tensor([0.1882, 0.0746, 0.4844, 0.2528]) -Greedy action tensor([-0.2741, 0.3881, 2.3767, -0.6596]) tensor([0.0562, 0.1090, 0.7965, 0.0382]) -Greedy action tensor([ 0.3289, -0.2680, 0.5489, -0.2822]) tensor([0.2995, 0.1649, 0.3732, 0.1625]) -Greedy action tensor([ 0.1309, -0.2741, 0.6116, -0.3031]) tensor([0.2543, 0.1696, 0.4113, 0.1648]) -Greedy action tensor([ 1.5464, -0.2703, 0.5985, 0.6409]) tensor([0.5116, 0.0832, 0.1983, 0.2069]) -Greedy action tensor([ 1.1716, -0.1239, 1.3856, 0.1690]) tensor([0.3473, 0.0951, 0.4302, 0.1274]) -Greedy action tensor([ 0.4793, -2.0887, 0.2673, -0.1132]) tensor([0.4101, 0.0314, 0.3317, 0.2267]) -Greedy action tensor([ 1.1211, -0.4681, -0.2402, 1.6210]) tensor([0.3216, 0.0656, 0.0824, 0.5303]) -Greedy action tensor([ 0.3753, 1.3095, 2.2420, -1.2608]) tensor([0.0980, 0.2494, 0.6336, 0.0191]) -Greedy action tensor([ 0.2728, 0.1784, -0.2286, 1.0823]) tensor([0.2100, 0.1911, 0.1272, 0.4718]) -Greedy action tensor([-0.6801, 0.2024, 0.1772, -0.9694]) tensor([0.1533, 0.3706, 0.3613, 0.1148]) -Greedy action tensor([ 0.4886, -0.1625, 0.2202, 1.5960]) tensor([0.1882, 0.0982, 0.1439, 0.5697]) -Greedy action tensor([ 0.2223, -1.3726, 0.3141, 0.5966]) tensor([0.2665, 0.0541, 0.2921, 0.3874]) -Greedy action tensor([ 0.9903, -0.6743, 0.7941, -0.0516]) tensor([0.4230, 0.0801, 0.3477, 0.1492]) -Greedy action tensor([ 0.1583, -0.2082, 0.2013, 0.5175]) tensor([0.2399, 0.1662, 0.2504, 0.3435]) -Greedy action tensor([ 0.5952, 0.5907, -0.1957, 1.1385]) tensor([0.2398, 0.2387, 0.1087, 0.4128]) -Greedy action tensor([-0.9079, -0.1803, 0.4609, -0.3188]) tensor([0.1136, 0.2352, 0.4465, 0.2047]) -Greedy action tensor([-1.7077, -0.5987, 1.2521, 0.8625]) tensor([0.0275, 0.0833, 0.5301, 0.3591]) -Greedy action tensor([-1.6350, -1.0692, -0.0195, -0.8338]) tensor([0.0998, 0.1757, 0.5021, 0.2224]) -Greedy action tensor([-1.1748, -0.0948, 1.2743, 1.1518]) tensor([0.0388, 0.1143, 0.4493, 0.3976]) -Greedy action tensor([-1.6795, -0.4252, 1.3682, 0.8540]) tensor([0.0262, 0.0918, 0.5519, 0.3300]) -Greedy action tensor([-1.1401, -0.5429, 0.4532, -0.2164]) tensor([0.0975, 0.1772, 0.4798, 0.2456]) -Greedy action tensor([-1.7675, -0.4904, 0.5795, -0.0455]) tensor([0.0485, 0.1738, 0.5066, 0.2712]) -Greedy action tensor([-1.2747, -0.2042, 0.7916, 1.1258]) tensor([0.0438, 0.1277, 0.3457, 0.4829]) -Greedy action tensor([-1.7922, -0.4893, 0.6106, -0.0346]) tensor([0.0464, 0.1709, 0.5134, 0.2693]) -Greedy action tensor([-0.6432, 0.2058, 0.1541, 0.1886]) tensor([0.1273, 0.2976, 0.2826, 0.2925]) -Greedy action tensor([-1.3157, -0.5107, 0.5057, 0.6316]) tensor([0.0609, 0.1362, 0.3762, 0.4267]) -Greedy action tensor([-1.6582, -0.5432, 0.5367, 0.0357]) tensor([0.0541, 0.1651, 0.4862, 0.2946]) -Greedy action tensor([-1.0340, -0.7332, 0.2759, 0.0456]) tensor([0.1111, 0.1501, 0.4118, 0.3270]) -Greedy action tensor([-0.7289, -0.5668, 0.2095, 0.2304]) tensor([0.1362, 0.1602, 0.3481, 0.3555]) -Greedy action tensor([-1.9455, -0.4809, 0.9555, 0.2516]) tensor([0.0308, 0.1330, 0.5595, 0.2767]) -Greedy action tensor([-0.3475, -0.2273, 0.1947, 0.2560]) tensor([0.1762, 0.1987, 0.3030, 0.3222]) -Greedy action tensor([-1.0806, -0.5565, 0.2575, 0.1922]) tensor([0.0993, 0.1677, 0.3785, 0.3546]) -Greedy action tensor([-1.8223, -0.5813, 0.8942, -0.0231]) tensor([0.0390, 0.1350, 0.5902, 0.2358]) -Greedy action tensor([-1.0623, -0.2969, 0.3349, 0.0046]) tensor([0.0990, 0.2129, 0.4004, 0.2878]) -Greedy action tensor([-1.1257, -0.5804, 0.4247, 0.7841]) tensor([0.0705, 0.1216, 0.3322, 0.4758]) -Greedy action tensor([-0.6424, -0.5591, 0.3152, -0.0664]) tensor([0.1545, 0.1680, 0.4026, 0.2749]) -Greedy action tensor([-0.3531, -0.5761, 0.3115, 0.1563]) tensor([0.1849, 0.1480, 0.3594, 0.3077]) -Greedy action tensor([-1.0757, -0.5960, 0.9921, 1.3533]) tensor([0.0457, 0.0739, 0.3615, 0.5189]) -Greedy action tensor([-1.7530, -0.4453, 0.5755, -0.0391]) tensor([0.0488, 0.1803, 0.5003, 0.2706]) -Greedy action tensor([-1.8782, -0.4663, 0.6442, -0.1346]) tensor([0.0430, 0.1763, 0.5351, 0.2456]) -Greedy action tensor([-1.9650, -0.4602, 1.1168, 0.5251]) tensor([0.0254, 0.1144, 0.5538, 0.3064]) -Greedy action tensor([-1.4713, -0.9210, -0.1907, -0.7427]) tensor([0.1190, 0.2063, 0.4282, 0.2465]) -Greedy action tensor([-1.1735, -0.2948, 0.9505, 1.1892]) tensor([0.0447, 0.1075, 0.3736, 0.4743]) -Greedy action tensor([-1.9217, -0.9705, 0.2036, -0.3033]) tensor([0.0588, 0.1522, 0.4924, 0.2966]) -Greedy action tensor([-1.7439, -0.6207, 0.8574, 0.3180]) tensor([0.0393, 0.1210, 0.5304, 0.3093]) -Greedy action tensor([-1.7389, -0.4722, 1.0356, 0.6264]) tensor([0.0320, 0.1137, 0.5134, 0.3410]) -Greedy action tensor([-1.7593, -0.4833, 0.5742, -0.0355]) tensor([0.0488, 0.1747, 0.5031, 0.2734]) -Greedy action tensor([-1.0672, -0.5654, -0.0273, -0.0249]) tensor([0.1203, 0.1986, 0.3402, 0.3410]) -Greedy action tensor([-1.7148, -0.4860, 0.6337, 0.0988]) tensor([0.0476, 0.1626, 0.4981, 0.2918]) -Greedy action tensor([-1.4713, -0.3826, 0.6067, 0.5503]) tensor([0.0513, 0.1523, 0.4095, 0.3870]) -Greedy action tensor([-0.8246, -0.5215, 1.1221, 1.5470]) tensor([0.0498, 0.0675, 0.3490, 0.5337]) -Greedy action tensor([-1.5663, -0.4977, 0.5089, 0.1053]) tensor([0.0581, 0.1693, 0.4632, 0.3094]) -Greedy action tensor([-0.7010, -0.2713, 0.2809, 0.5733]) tensor([0.1139, 0.1750, 0.3040, 0.4072]) -Greedy action tensor([-1.5792, -0.5346, 0.5049, -0.0460]) tensor([0.0606, 0.1721, 0.4867, 0.2806]) -Greedy action tensor([-1.2086, -0.5986, 0.2876, 0.2410]) tensor([0.0865, 0.1591, 0.3860, 0.3684]) -Greedy action tensor([-1.9243, -0.4808, 0.8602, 0.1951]) tensor([0.0336, 0.1424, 0.5442, 0.2798]) -Greedy action tensor([-0.2874, -0.0543, 1.1317, 1.6074]) tensor([0.0766, 0.0968, 0.3168, 0.5098]) -Greedy action tensor([-1.2982, -0.7865, 0.1550, -0.0924]) tensor([0.0972, 0.1622, 0.4158, 0.3247]) -Greedy action tensor([-1.6662, -0.2096, 0.5127, -0.0534]) tensor([0.0522, 0.2241, 0.4616, 0.2620]) -Greedy action tensor([-1.9365, -0.4557, 0.6608, -0.1769]) tensor([0.0406, 0.1785, 0.5451, 0.2359]) -Greedy action tensor([-0.9694, -0.3431, 0.5238, 1.1281]) tensor([0.0646, 0.1209, 0.2878, 0.5266]) -Greedy action tensor([-1.5692, -0.2377, 0.4198, 0.0891]) tensor([0.0577, 0.2183, 0.4213, 0.3027]) -Greedy action tensor([-0.5947, -0.4900, 1.0380, 1.5750]) tensor([0.0626, 0.0695, 0.3202, 0.5478]) -Greedy action tensor([-1.8568, -0.4826, 0.6302, -0.1148]) tensor([0.0441, 0.1742, 0.5301, 0.2516]) -Greedy action tensor([-1.7673, -0.3828, 0.5566, -0.0644]) tensor([0.0483, 0.1929, 0.4935, 0.2652]) -Greedy action tensor([-0.6502, 0.9116, 0.4215, 0.7542]) tensor([0.0784, 0.3736, 0.2289, 0.3192]) -Greedy action tensor([-1.9718, -0.6236, 1.1999, 0.4346]) tensor([0.0251, 0.0968, 0.5993, 0.2788]) -Greedy action tensor([-1.5564, 0.3878, 0.4058, 0.2637]) tensor([0.0470, 0.3285, 0.3344, 0.2901]) -Greedy action tensor([-0.9572, -0.5909, 0.2558, 0.2842]) tensor([0.1079, 0.1557, 0.3630, 0.3734]) -Greedy action tensor([-1.0249, -0.7609, 1.0882, 1.4910]) tensor([0.0436, 0.0567, 0.3604, 0.5393]) -Greedy action tensor([-1.5727, -0.5347, 0.4614, 0.0866]) tensor([0.0598, 0.1688, 0.4571, 0.3142]) -Greedy action tensor([-1.4727, -0.6348, 0.4086, -0.4682]) tensor([0.0793, 0.1834, 0.5206, 0.2166]) -Greedy action tensor([-0.6871, -0.5662, 0.1768, 0.2908]) tensor([0.1397, 0.1576, 0.3314, 0.3714]) -Greedy action tensor([-1.4461, -0.1260, 0.3434, 0.2305]) tensor([0.0622, 0.2329, 0.3724, 0.3326]) -Greedy action tensor([-1.0145, -0.5966, 0.6904, 1.1243]) tensor([0.0606, 0.0920, 0.3332, 0.5142]) -Greedy action tensor([-1.3850, -0.5709, 0.4786, 0.2172]) tensor([0.0682, 0.1539, 0.4395, 0.3384]) -Greedy action tensor([-1.4636, 0.6545, 0.5190, -0.4923]) tensor([0.0520, 0.4327, 0.3778, 0.1374]) -Greedy action tensor([-0.7756, -0.3281, 0.9499, 1.4483]) tensor([0.0574, 0.0898, 0.3223, 0.5305]) -Greedy action tensor([-1.2498, -0.5149, 0.3201, 0.4603]) tensor([0.0745, 0.1554, 0.3581, 0.4120]) -Greedy action tensor([-0.9985, -0.1101, 0.3336, -0.6161]) tensor([0.1151, 0.2799, 0.4362, 0.1687]) -Greedy action tensor([-1.4916, 0.0502, 0.3373, 0.1653]) tensor([0.0583, 0.2726, 0.3632, 0.3058]) -Greedy action tensor([-1.1789, -0.5289, 0.4264, 0.5943]) tensor([0.0725, 0.1390, 0.3612, 0.4273]) -Greedy action tensor([-1.6828, -0.5377, 0.7414, 0.2643]) tensor([0.0446, 0.1400, 0.5032, 0.3123]) -Greedy action tensor([-1.9068, -0.4698, 0.6558, -0.1540]) tensor([0.0418, 0.1757, 0.5416, 0.2410]) -Greedy action tensor([-1.7556, -0.5982, 0.3448, -0.1749]) tensor([0.0581, 0.1849, 0.4747, 0.2823]) -Greedy action tensor([-1.9230, -0.4552, 0.6629, -0.1604]) tensor([0.0409, 0.1775, 0.5431, 0.2384]) -Greedy action tensor([-1.1922, -0.0453, 0.9499, 1.0576]) tensor([0.0451, 0.1421, 0.3845, 0.4282]) -Greedy action tensor([-1.4572, -0.5980, 0.3998, 0.0936]) tensor([0.0691, 0.1631, 0.4422, 0.3256]) -Greedy action tensor([-1.4347, 0.2246, 0.3348, -0.3121]) tensor([0.0658, 0.3459, 0.3861, 0.2022]) -Greedy action tensor([-1.5243, -0.4962, 0.6499, 0.5167]) tensor([0.0493, 0.1378, 0.4335, 0.3794]) -Greedy action tensor([-1.2176, -0.6318, 0.2867, 0.3650]) tensor([0.0822, 0.1477, 0.3700, 0.4001]) -Greedy action tensor([-1.5508, -0.5388, 0.4361, 0.1097]) tensor([0.0613, 0.1687, 0.4473, 0.3227]) -Greedy action tensor([-0.3931, 1.0831, 0.0220, 0.4397]) tensor([0.1088, 0.4762, 0.1648, 0.2502]) -Greedy action tensor([-1.9000, -0.4562, 0.6581, -0.1400]) tensor([0.0417, 0.1768, 0.5389, 0.2426]) -Greedy action tensor([-1.1613, -0.5751, 0.3204, 0.2639]) tensor([0.0881, 0.1583, 0.3875, 0.3662]) -Greedy action tensor([-0.9261, -0.5755, 0.1827, 0.3120]) tensor([0.1124, 0.1596, 0.3405, 0.3876]) -Greedy action tensor([ 0.8922, -0.6991, 0.1188, -0.6840]) tensor([0.5342, 0.1088, 0.2465, 0.1105]) -Greedy action tensor([ 0.7251, -0.5661, -0.0811, -0.3719]) tensor([0.4865, 0.1338, 0.2173, 0.1624]) -Greedy action tensor([ 0.7138, -0.3416, 0.0245, -0.6639]) tensor([0.4757, 0.1656, 0.2388, 0.1199]) -Greedy action tensor([ 0.6859, -0.5282, 0.0039, -0.5522]) tensor([0.4779, 0.1419, 0.2416, 0.1386]) -Greedy action tensor([ 0.4112, -0.0638, -0.1100, -0.6131]) tensor([0.3884, 0.2415, 0.2306, 0.1394]) -Greedy action tensor([ 0.1591, -0.0935, 0.0155, -0.3453]) tensor([0.3080, 0.2392, 0.2668, 0.1860]) -Greedy action tensor([ 0.9634, -0.4862, -0.0461, -0.4737]) tensor([0.5445, 0.1278, 0.1984, 0.1294]) -Greedy action tensor([ 0.4086, -0.3973, 0.2427, -0.5030]) tensor([0.3710, 0.1657, 0.3143, 0.1491]) -Greedy action tensor([ 0.5586, -0.1406, 0.0226, -0.2182]) tensor([0.3934, 0.1955, 0.2302, 0.1809]) -Greedy action tensor([ 0.5715, -0.0212, -0.0373, -0.1076]) tensor([0.3840, 0.2123, 0.2089, 0.1947]) -Greedy action tensor([ 0.4628, -0.1227, 0.0261, -0.4048]) tensor([0.3812, 0.2123, 0.2463, 0.1601]) -Greedy action tensor([ 0.6237, -0.3483, -0.0087, -0.2634]) tensor([0.4308, 0.1630, 0.2289, 0.1774]) -Greedy action tensor([ 0.8492, -0.5088, 0.1571, -0.7091]) tensor([0.5081, 0.1307, 0.2543, 0.1069]) -Greedy action tensor([ 0.5198, -0.3248, 0.0711, -0.3198]) tensor([0.4000, 0.1719, 0.2554, 0.1727]) -Greedy action tensor([ 0.5490, -0.5313, 0.0613, -0.3617]) tensor([0.4245, 0.1441, 0.2607, 0.1707]) -Greedy action tensor([ 0.4489, -0.3469, 0.0832, -0.2584]) tensor([0.3791, 0.1711, 0.2630, 0.1869]) -Greedy action tensor([ 0.3471, 0.0808, -0.0082, -0.4689]) tensor([0.3437, 0.2634, 0.2409, 0.1520]) -Greedy action tensor([ 0.6562, -0.3897, -0.1571, -0.3022]) tensor([0.4591, 0.1613, 0.2036, 0.1761]) -Greedy action tensor([ 0.6692, -0.4131, -0.0770, -0.4121]) tensor([0.4646, 0.1574, 0.2203, 0.1576]) -Greedy action tensor([ 0.5826, 0.1574, -0.1076, -0.0947]) tensor([0.3755, 0.2454, 0.1883, 0.1908]) -Greedy action tensor([ 0.6844, -0.1039, 0.1534, -0.4680]) tensor([0.4240, 0.1928, 0.2493, 0.1339]) -Greedy action tensor([ 0.5899, -0.2827, -0.0686, -0.2288]) tensor([0.4208, 0.1758, 0.2178, 0.1856]) -Greedy action tensor([ 0.7318, -0.1674, 0.1349, -0.3413]) tensor([0.4349, 0.1770, 0.2394, 0.1487]) -Greedy action tensor([ 0.3127, -0.0184, -0.0332, -0.3415]) tensor([0.3395, 0.2438, 0.2402, 0.1765]) -Greedy action tensor([ 0.6405, -0.2578, 0.0098, -0.2564]) tensor([0.4260, 0.1735, 0.2267, 0.1737]) -Greedy action tensor([ 0.5255, -0.1391, -0.0141, -0.1515]) tensor([0.3838, 0.1975, 0.2237, 0.1950]) -Greedy action tensor([ 0.0123, -0.0626, 0.1856, -0.1103]) tensor([0.2499, 0.2319, 0.2972, 0.2211]) -Greedy action tensor([ 0.4689, 0.0194, -0.1096, -0.0301]) tensor([0.3564, 0.2274, 0.1998, 0.2164]) -Greedy action tensor([ 0.5834, -0.1435, -0.0252, -0.1053]) tensor([0.3953, 0.1911, 0.2151, 0.1985]) -Greedy action tensor([ 0.8924, -0.7866, 0.0117, -0.4385]) tensor([0.5361, 0.1000, 0.2222, 0.1417]) -Greedy action tensor([ 0.5217, -0.2593, -0.2165, -0.2240]) tensor([0.4149, 0.1900, 0.1983, 0.1968]) -Greedy action tensor([ 0.5268, -0.0819, 0.1409, -0.2743]) tensor([0.3742, 0.2036, 0.2544, 0.1679]) -Greedy action tensor([ 0.8712, -0.4074, -0.0870, -0.7214]) tensor([0.5361, 0.1493, 0.2056, 0.1090]) -Greedy action tensor([ 0.8819, -0.7284, 0.0741, -0.2494]) tensor([0.5080, 0.1015, 0.2265, 0.1639]) -Greedy action tensor([ 0.6404, -0.3035, -0.0195, -0.1753]) tensor([0.4258, 0.1657, 0.2201, 0.1884]) -Greedy action tensor([ 1.0096, -0.0528, -0.0289, -0.3399]) tensor([0.5105, 0.1764, 0.1807, 0.1324]) -Greedy action tensor([ 0.4022, 0.0269, 0.0008, -0.1746]) tensor([0.3427, 0.2354, 0.2294, 0.1925]) -Greedy action tensor([ 0.2314, 0.1247, -0.0427, -0.2578]) tensor([0.3056, 0.2747, 0.2323, 0.1874]) -Greedy action tensor([ 0.8800, -0.6178, -0.1483, -0.4595]) tensor([0.5425, 0.1213, 0.1940, 0.1421]) -Greedy action tensor([ 0.5487, -0.4337, -0.0292, -0.5821]) tensor([0.4428, 0.1658, 0.2485, 0.1429]) -Greedy action tensor([ 0.7994, -0.4536, -0.1124, -0.4337]) tensor([0.5054, 0.1444, 0.2030, 0.1473]) -Greedy action tensor([ 0.5001, 0.0352, -0.0777, -0.0833]) tensor([0.3640, 0.2287, 0.2042, 0.2031]) -Greedy action tensor([ 0.7569, -0.3724, -0.0946, -0.6532]) tensor([0.5015, 0.1621, 0.2140, 0.1224]) -Greedy action tensor([ 0.9180, -0.6200, -0.0414, -0.4534]) tensor([0.5401, 0.1160, 0.2069, 0.1370]) -Greedy action tensor([ 0.4489, -0.2134, -0.1325, -0.2211]) tensor([0.3866, 0.1994, 0.2162, 0.1978]) -Greedy action tensor([ 0.3639, -0.0753, 0.0391, -0.3094]) tensor([0.3476, 0.2240, 0.2512, 0.1773]) -Greedy action tensor([ 0.5124, -0.2767, 0.0214, -0.3705]) tensor([0.4033, 0.1832, 0.2468, 0.1668]) -Greedy action tensor([ 1.0544, -0.9626, -0.1045, -0.3882]) tensor([0.5941, 0.0790, 0.1864, 0.1404]) -Greedy action tensor([ 0.9948, -0.7540, -0.0315, -0.7913]) tensor([0.5883, 0.1024, 0.2108, 0.0986]) -Greedy action tensor([ 0.2059, 0.1345, -0.0994, -0.1552]) tensor([0.2972, 0.2767, 0.2190, 0.2071]) -Greedy action tensor([ 0.7177, -0.3392, 0.0628, -0.5045]) tensor([0.4626, 0.1608, 0.2403, 0.1363]) -Greedy action tensor([ 0.4941, -0.2396, -0.0421, -0.1909]) tensor([0.3892, 0.1869, 0.2277, 0.1962]) -Greedy action tensor([ 0.6600, -0.4214, -0.0442, -0.4107]) tensor([0.4595, 0.1558, 0.2272, 0.1575]) -Greedy action tensor([ 0.4104, -0.0006, -0.0733, -0.1528]) tensor([0.3510, 0.2327, 0.2164, 0.1999]) -Greedy action tensor([ 0.8200, -0.2490, 0.0178, -0.3549]) tensor([0.4761, 0.1635, 0.2134, 0.1470]) -Greedy action tensor([ 0.7397, -0.7801, -0.1642, -0.5917]) tensor([0.5297, 0.1159, 0.2145, 0.1399]) -Greedy action tensor([ 0.5581, -0.3863, -0.0756, -0.3884]) tensor([0.4333, 0.1685, 0.2299, 0.1682]) -Greedy action tensor([ 0.4642, -0.1118, 0.0597, -0.3369]) tensor([0.3734, 0.2099, 0.2492, 0.1676]) -Greedy action tensor([ 0.7949, -0.2157, -0.1004, -0.3689]) tensor([0.4797, 0.1746, 0.1959, 0.1498]) -Greedy action tensor([ 0.6797, 0.0022, 0.0797, -0.3619]) tensor([0.4150, 0.2108, 0.2278, 0.1465]) -Greedy action tensor([ 0.3077, -0.0399, -0.0682, -0.2647]) tensor([0.3382, 0.2389, 0.2322, 0.1908]) -Greedy action tensor([ 0.4654, 0.2067, 0.0598, -0.3340]) tensor([0.3462, 0.2673, 0.2308, 0.1557]) -Greedy action tensor([ 0.7034, -0.4790, 0.0580, -0.5822]) tensor([0.4745, 0.1455, 0.2489, 0.1312]) -Greedy action tensor([ 0.6666, -0.2705, -0.0690, -0.2288]) tensor([0.4387, 0.1719, 0.2102, 0.1792]) -Greedy action tensor([ 0.5650, -0.3143, 0.0587, -0.4307]) tensor([0.4189, 0.1739, 0.2525, 0.1548]) -Greedy action tensor([ 0.6413, -0.2958, -0.0336, -0.2342]) tensor([0.4315, 0.1690, 0.2197, 0.1798]) -Greedy action tensor([ 0.7537, -0.4603, -0.0612, -0.5460]) tensor([0.4969, 0.1476, 0.2200, 0.1355]) -Greedy action tensor([ 0.5394, 0.3442, -0.1540, -0.3982]) tensor([0.3685, 0.3031, 0.1842, 0.1443]) -Greedy action tensor([ 0.6972, -0.6004, 0.0961, -0.6643]) tensor([0.4813, 0.1315, 0.2639, 0.1233]) -Greedy action tensor([ 0.6821, -0.3978, 0.0586, -0.6263]) tensor([0.4660, 0.1583, 0.2498, 0.1259]) -Greedy action tensor([ 0.3415, -0.2927, -0.0620, -0.2469]) tensor([0.3632, 0.1926, 0.2426, 0.2016]) -Greedy action tensor([ 0.3579, 0.1269, 0.0198, -0.0243]) tensor([0.3135, 0.2489, 0.2236, 0.2140]) -Greedy action tensor([ 0.6673, -0.2616, -0.1420, -0.2549]) tensor([0.4469, 0.1765, 0.1989, 0.1777]) -Greedy action tensor([ 0.5599, -0.1254, -0.0860, -0.2618]) tensor([0.4052, 0.2042, 0.2124, 0.1782]) -Greedy action tensor([ 0.6125, -0.1272, -0.0448, -0.1755]) tensor([0.4081, 0.1948, 0.2115, 0.1856]) -Greedy action tensor([ 0.5029, -0.1635, 0.0494, -0.6139]) tensor([0.4038, 0.2074, 0.2566, 0.1322]) -Greedy action tensor([ 0.3891, 0.0534, -0.0131, -0.1556]) tensor([0.3374, 0.2412, 0.2257, 0.1957]) -Greedy action tensor([ 0.5689, -0.3546, -0.0689, -0.3950]) tensor([0.4335, 0.1721, 0.2291, 0.1653]) -Greedy action tensor([ 0.4818, -0.1576, 0.0551, -0.2544]) tensor([0.3760, 0.1984, 0.2454, 0.1801]) -Greedy action tensor([ 0.8749, -0.3910, 0.0198, -0.2789]) tensor([0.4944, 0.1394, 0.2102, 0.1560]) -Greedy action tensor([ 0.7716, -0.4055, -0.0709, -0.5624]) tensor([0.4994, 0.1539, 0.2151, 0.1316]) -Greedy action tensor([ 1.4936, -0.5742, -0.4713, 0.4720]) tensor([0.6148, 0.0777, 0.0862, 0.2213]) -Greedy action tensor([ 0.9686, -0.3980, 0.0547, 0.0923]) tensor([0.4826, 0.1230, 0.1935, 0.2009]) -Greedy action tensor([ 1.4636, 0.0123, -0.3887, 0.3046]) tensor([0.5865, 0.1374, 0.0920, 0.1841]) -Greedy action tensor([ 1.2784, -0.6686, 0.1639, 0.1430]) tensor([0.5580, 0.0796, 0.1831, 0.1793]) -Greedy action tensor([ 0.9743, -0.1591, -0.0549, 0.0290]) tensor([0.4836, 0.1557, 0.1728, 0.1879]) -Greedy action tensor([ 0.4899, -0.5178, -0.1198, 0.1227]) tensor([0.3844, 0.1403, 0.2089, 0.2663]) -Greedy action tensor([ 0.6139, -0.4674, -0.3809, 0.2757]) tensor([0.4129, 0.1400, 0.1527, 0.2944]) -Greedy action tensor([ 0.8616, -0.3968, -0.1198, 0.0449]) tensor([0.4760, 0.1352, 0.1784, 0.2103]) -Greedy action tensor([ 1.5271, -0.4701, -0.2198, 0.0633]) tensor([0.6488, 0.0880, 0.1131, 0.1501]) -Greedy action tensor([ 0.8968, -0.3852, 0.0134, -0.1025]) tensor([0.4857, 0.1348, 0.2008, 0.1788]) -Greedy action tensor([ 1.1120, -0.4161, -0.2399, 0.0843]) tensor([0.5454, 0.1183, 0.1411, 0.1951]) -Greedy action tensor([ 0.9642, -0.6098, -0.4884, 0.8481]) tensor([0.4289, 0.0889, 0.1003, 0.3819]) -Greedy action tensor([ 0.7687, -0.2531, -0.2829, -0.0384]) tensor([0.4639, 0.1670, 0.1621, 0.2070]) -Greedy action tensor([ 1.2133, -0.1287, -0.4742, -0.3193]) tensor([0.6016, 0.1572, 0.1113, 0.1299]) -Greedy action tensor([ 1.2749, -0.5643, -0.4362, 0.6181]) tensor([0.5382, 0.0855, 0.0972, 0.2790]) -Greedy action tensor([ 1.0963, -0.5835, -0.0011, 0.2022]) tensor([0.5184, 0.0966, 0.1730, 0.2120]) -Greedy action tensor([ 1.3842, -0.5526, -0.4383, 0.1892]) tensor([0.6217, 0.0896, 0.1005, 0.1882]) -Greedy action tensor([ 1.0175, -0.0981, -0.2474, 0.2008]) tensor([0.4874, 0.1597, 0.1376, 0.2153]) -Greedy action tensor([ 0.5872, -0.2751, -0.5329, -0.2975]) tensor([0.4627, 0.1953, 0.1510, 0.1910]) -Greedy action tensor([ 1.7897, -0.7727, -0.1157, 0.4782]) tensor([0.6688, 0.0516, 0.0995, 0.1802]) -Greedy action tensor([ 0.4072, -0.0856, -0.0594, 0.0305]) tensor([0.3420, 0.2089, 0.2145, 0.2346]) -Greedy action tensor([ 0.4564, -0.3005, -0.5254, 0.4484]) tensor([0.3526, 0.1654, 0.1321, 0.3498]) -Greedy action tensor([ 1.4233, -0.0166, -0.0500, 0.0049]) tensor([0.5854, 0.1387, 0.1342, 0.1417]) -Greedy action tensor([ 0.8575, -0.2789, -0.2875, -0.0488]) tensor([0.4894, 0.1571, 0.1557, 0.1977]) -Greedy action tensor([ 0.9463, -0.6860, 0.1536, 0.0471]) tensor([0.4866, 0.0951, 0.2202, 0.1980]) -Greedy action tensor([ 1.8245, -0.5825, -0.1603, 0.3414]) tensor([0.6875, 0.0619, 0.0945, 0.1560]) -Greedy action tensor([ 0.1339, -0.0905, -0.1606, -0.0004]) tensor([0.2925, 0.2337, 0.2179, 0.2558]) -Greedy action tensor([ 1.2903, -0.5014, -0.2332, -0.0144]) tensor([0.6039, 0.1007, 0.1316, 0.1638]) -Greedy action tensor([ 1.2646, -0.7996, -0.0932, 0.2650]) tensor([0.5707, 0.0724, 0.1468, 0.2100]) -Greedy action tensor([ 1.0382, -0.6139, -0.1260, 0.4171]) tensor([0.4899, 0.0939, 0.1529, 0.2633]) -Greedy action tensor([ 0.7459, -0.4021, -0.5621, -0.2780]) tensor([0.5136, 0.1630, 0.1389, 0.1845]) -Greedy action tensor([ 1.2799, -0.8545, -0.2199, 0.1478]) tensor([0.6010, 0.0711, 0.1341, 0.1937]) -Greedy action tensor([ 0.9021, -0.3541, -0.3138, 0.1129]) tensor([0.4913, 0.1399, 0.1456, 0.2232]) -Greedy action tensor([ 0.8074, -0.5633, -0.1625, -0.0471]) tensor([0.4858, 0.1233, 0.1842, 0.2067]) -Greedy action tensor([ 1.0533, -0.3809, -0.2795, 0.3318]) tensor([0.5030, 0.1199, 0.1327, 0.2445]) -Greedy action tensor([ 0.8127, -0.2017, 0.0618, -0.1716]) tensor([0.4528, 0.1642, 0.2137, 0.1692]) -Greedy action tensor([ 1.2028, -0.0879, -0.6413, -0.1058]) tensor([0.5871, 0.1615, 0.0928, 0.1586]) -Greedy action tensor([ 1.4677, -0.4669, -0.2814, 0.2688]) tensor([0.6173, 0.0892, 0.1074, 0.1861]) -Greedy action tensor([ 0.9642, -0.1056, -0.2603, -0.1557]) tensor([0.5094, 0.1747, 0.1497, 0.1662]) -Greedy action tensor([ 0.9671, -0.3667, -0.0333, 0.0097]) tensor([0.4963, 0.1307, 0.1825, 0.1905]) -Greedy action tensor([ 1.2163, -0.3475, -0.6652, -0.4108]) tensor([0.6418, 0.1343, 0.0978, 0.1261]) -Greedy action tensor([ 1.6282, -0.2631, -0.5277, 0.5206]) tensor([0.6262, 0.0945, 0.0725, 0.2069]) -Greedy action tensor([ 1.6832, -0.5287, -0.5361, 0.2119]) tensor([0.6907, 0.0756, 0.0751, 0.1586]) -Greedy action tensor([ 0.9188, -0.3618, -0.0251, 0.0206]) tensor([0.4821, 0.1340, 0.1876, 0.1964]) -Greedy action tensor([ 0.9807, -0.5634, -0.4333, 0.2008]) tensor([0.5222, 0.1115, 0.1270, 0.2394]) -Greedy action tensor([ 0.6641, -0.1525, -0.2535, -0.2446]) tensor([0.4455, 0.1969, 0.1780, 0.1796]) -Greedy action tensor([ 1.5348, -0.6026, -0.5288, -0.3507]) tensor([0.7160, 0.0845, 0.0909, 0.1086]) -Greedy action tensor([ 0.9170, -0.5477, -0.4577, 0.3891]) tensor([0.4822, 0.1115, 0.1219, 0.2844]) -Greedy action tensor([ 1.3482, -0.5783, -0.2490, 0.0310]) tensor([0.6188, 0.0901, 0.1253, 0.1658]) -Greedy action tensor([ 0.8054, -0.1973, 0.0149, -0.2642]) tensor([0.4622, 0.1696, 0.2096, 0.1586]) -Greedy action tensor([ 0.9306, -0.5329, -0.3770, 0.6171]) tensor([0.4479, 0.1036, 0.1211, 0.3274]) -Greedy action tensor([ 1.0170, -0.5277, -0.3664, 0.5921]) tensor([0.4722, 0.1007, 0.1184, 0.3087]) -Greedy action tensor([ 1.3100, -0.3303, -0.7004, -0.2486]) tensor([0.6501, 0.1261, 0.0871, 0.1368]) -Greedy action tensor([ 1.7621, -0.3974, -0.3233, 0.2907]) tensor([0.6806, 0.0785, 0.0846, 0.1563]) -Greedy action tensor([ 0.9087, -0.6044, -0.2310, 0.6799]) tensor([0.4281, 0.0943, 0.1370, 0.3406]) -Greedy action tensor([ 0.8044, -0.0101, -0.0772, -0.4411]) tensor([0.4663, 0.2065, 0.1931, 0.1342]) -Greedy action tensor([ 1.1143, -0.1841, -0.4086, 0.2152]) tensor([0.5269, 0.1438, 0.1149, 0.2144]) -Greedy action tensor([ 0.8119, -0.2738, -0.4410, -0.0775]) tensor([0.4916, 0.1660, 0.1404, 0.2020]) -Greedy action tensor([ 1.0288, -0.5868, -0.3037, 0.4208]) tensor([0.4983, 0.0990, 0.1314, 0.2713]) -Greedy action tensor([ 1.2896, -0.1184, -0.2026, -0.0406]) tensor([0.5767, 0.1411, 0.1297, 0.1525]) -Greedy action tensor([ 1.3437, -0.4813, -0.0985, 0.2877]) tensor([0.5729, 0.0924, 0.1354, 0.1993]) -Greedy action tensor([ 0.6414, -0.4428, -0.3634, 0.4278]) tensor([0.3981, 0.1346, 0.1458, 0.3215]) -Greedy action tensor([ 1.2203, -0.6290, -0.4382, -0.2261]) tensor([0.6316, 0.0994, 0.1203, 0.1487]) -Greedy action tensor([ 1.2114, -0.1992, -0.2388, 0.1809]) tensor([0.5448, 0.1329, 0.1278, 0.1944]) -Greedy action tensor([ 1.5019, -0.6245, 0.0272, 0.2553]) tensor([0.6114, 0.0729, 0.1399, 0.1758]) -Greedy action tensor([ 0.8741, -0.3614, -0.7198, 0.9436]) tensor([0.3897, 0.1133, 0.0792, 0.4178]) -Greedy action tensor([ 1.0177, -0.5326, -0.3970, 0.6603]) tensor([0.4641, 0.0985, 0.1128, 0.3246]) -Greedy action tensor([ 1.2775, -0.3701, -0.1005, 0.2093]) tensor([0.5592, 0.1077, 0.1410, 0.1922]) -Greedy action tensor([ 1.2096, -0.4596, -0.3156, 0.1411]) tensor([0.5716, 0.1077, 0.1244, 0.1964]) -Greedy action tensor([ 2.2420, -0.6498, -0.5805, 0.0183]) tensor([0.8176, 0.0454, 0.0486, 0.0885]) -Greedy action tensor([ 0.9582, -0.4772, -0.2945, 0.2231]) tensor([0.4992, 0.1188, 0.1426, 0.2393]) -Greedy action tensor([ 0.9780, -0.4741, -0.2553, 0.0324]) tensor([0.5225, 0.1223, 0.1522, 0.2030]) -Greedy action tensor([ 1.1001, -0.4252, 0.0912, 0.0069]) tensor([0.5216, 0.1135, 0.1902, 0.1748]) -Greedy action tensor([ 0.7697, -0.5457, -0.4477, 0.5868]) tensor([0.4172, 0.1120, 0.1235, 0.3474]) -Greedy action tensor([ 0.6315, -0.2690, -0.0418, -0.0811]) tensor([0.4155, 0.1688, 0.2119, 0.2037]) -Greedy action tensor([ 0.7444, -0.1322, -0.3478, -0.1391]) tensor([0.4619, 0.1922, 0.1550, 0.1909]) -Greedy action tensor([ 0.7613, -0.2556, 0.1225, -0.0715]) tensor([0.4302, 0.1556, 0.2271, 0.1871]) -Greedy action tensor([ 1.8134, -0.4531, -0.2980, -0.1434]) tensor([0.7320, 0.0759, 0.0886, 0.1034]) -Greedy action tensor([ 1.1008, -0.2355, -0.0604, 0.4739]) tensor([0.4739, 0.1245, 0.1484, 0.2532]) -Greedy action tensor([ 1.2337, 0.1132, 0.2776, -0.3002]) tensor([0.5191, 0.1693, 0.1996, 0.1120]) -Greedy action tensor([ 0.8989, -0.7892, -0.3681, 0.4730]) tensor([0.4718, 0.0872, 0.1329, 0.3081]) -Greedy action tensor([ 0.5984, 0.5246, -0.2274, 1.0885]) tensor([0.2501, 0.2323, 0.1095, 0.4082]) -Greedy action tensor([ 0.9049, 0.5685, -0.5353, 0.3993]) tensor([0.3915, 0.2797, 0.0927, 0.2361]) -Greedy action tensor([0.2545, 0.7115, 0.0223, 0.7259]) tensor([0.2010, 0.3175, 0.1594, 0.3221]) -Greedy action tensor([ 0.4422, 0.0973, 1.1887, -0.1291]) tensor([0.2282, 0.1616, 0.4814, 0.1289]) -Greedy action tensor([ 0.6283, 0.0656, -0.2198, 0.0046]) tensor([0.3947, 0.2248, 0.1690, 0.2115]) -Greedy action tensor([1.4990, 1.0542, 0.0159, 0.8325]) tensor([0.4199, 0.2691, 0.0953, 0.2156]) -Greedy action tensor([ 0.1958, 0.6444, -0.4159, 0.3905]) tensor([0.2313, 0.3622, 0.1255, 0.2810]) -Greedy action tensor([ 1.7135, -1.2920, 0.7625, 0.6511]) tensor([0.5613, 0.0278, 0.2169, 0.1940]) -Greedy action tensor([0.5803, 0.0842, 0.1574, 0.6019]) tensor([0.3043, 0.1853, 0.1994, 0.3110]) -Greedy action tensor([ 0.8941, -1.0869, 0.8365, 1.2281]) tensor([0.2875, 0.0397, 0.2714, 0.4015]) -Greedy action tensor([ 0.9821, 0.1478, -0.4875, 0.5916]) tensor([0.4272, 0.1855, 0.0983, 0.2891]) -Greedy action tensor([-0.1706, -0.1825, -0.1193, 1.1050]) tensor([0.1510, 0.1492, 0.1590, 0.5408]) -Greedy action tensor([0.8640, 0.1598, 0.6111, 0.1876]) tensor([0.3598, 0.1779, 0.2794, 0.1829]) -Greedy action tensor([ 0.5252, 0.9942, -0.1326, -0.1993]) tensor([0.2777, 0.4439, 0.1438, 0.1346]) -Greedy action tensor([0.7841, 1.4056, 0.8087, 0.0679]) tensor([0.2286, 0.4255, 0.2343, 0.1117]) -Greedy action tensor([-0.3459, -1.7291, -0.1657, 1.3071]) tensor([0.1304, 0.0327, 0.1561, 0.6808]) -Greedy action tensor([-0.0885, -0.1037, 1.6116, 0.3729]) tensor([0.1106, 0.1089, 0.6052, 0.1754]) -Greedy action tensor([ 0.2476, 0.4330, -0.4466, 0.2338]) tensor([0.2710, 0.3263, 0.1354, 0.2673]) -Greedy action tensor([ 0.9972, -0.2449, 0.8278, 1.0945]) tensor([0.3091, 0.0893, 0.2609, 0.3407]) -Greedy action tensor([0.5539, 1.1618, 0.3132, 0.1487]) tensor([0.2331, 0.4282, 0.1833, 0.1555]) -Greedy action tensor([ 0.0380, -0.2843, 0.4273, 0.0194]) tensor([0.2391, 0.1732, 0.3529, 0.2347]) -Greedy action tensor([ 0.1795, -0.5964, 1.4496, 0.1791]) tensor([0.1661, 0.0764, 0.5914, 0.1660]) -Greedy action tensor([ 0.7791, -0.3088, -0.0931, 0.1706]) tensor([0.4349, 0.1465, 0.1818, 0.2367]) -Greedy action tensor([ 0.6091, 0.7535, -0.4314, 0.8386]) tensor([0.2655, 0.3067, 0.0938, 0.3340]) -Greedy action tensor([ 0.3845, -2.0276, 0.1697, 0.3177]) tensor([0.3531, 0.0317, 0.2849, 0.3303]) -Greedy action tensor([0.2812, 0.0139, 0.9868, 0.3525]) tensor([0.2056, 0.1574, 0.4163, 0.2208]) -Greedy action tensor([ 1.1958, -0.6711, 0.3033, 0.8392]) tensor([0.4416, 0.0683, 0.1809, 0.3092]) -Greedy action tensor([ 1.7799, -0.9453, 0.8787, 1.8327]) tensor([0.3959, 0.0259, 0.1608, 0.4174]) -Greedy action tensor([ 1.5440, -0.7069, 1.6782, 0.5504]) tensor([0.3818, 0.0402, 0.4366, 0.1414]) -Greedy action tensor([ 0.0527, -0.9496, 0.0063, -0.5681]) tensor([0.3498, 0.1284, 0.3339, 0.1880]) -Greedy action tensor([1.1048, 0.3839, 0.1660, 1.7781]) tensor([0.2606, 0.1267, 0.1019, 0.5108]) -Greedy action tensor([0.1023, 0.3795, 1.0978, 0.6058]) tensor([0.1497, 0.1975, 0.4051, 0.2477]) -Greedy action tensor([ 0.9480, -1.4081, -0.7025, 0.1811]) tensor([0.5710, 0.0541, 0.1096, 0.2652]) -Greedy action tensor([-0.1233, 0.4854, -0.6121, 0.4601]) tensor([0.1907, 0.3505, 0.1170, 0.3418]) -Greedy action tensor([0.1240, 1.4606, 0.1947, 0.3314]) tensor([0.1407, 0.5353, 0.1510, 0.1731]) -Greedy action tensor([ 0.7349, -0.0724, -0.3028, -0.0457]) tensor([0.4428, 0.1975, 0.1569, 0.2029]) -Greedy action tensor([ 0.6419, -1.5322, 0.0682, 0.1735]) tensor([0.4342, 0.0494, 0.2446, 0.2718]) -Greedy action tensor([ 0.5386, -0.9787, 1.8149, -0.5827]) tensor([0.1950, 0.0428, 0.6987, 0.0635]) -Greedy action tensor([ 1.0865, 0.0836, -0.0574, 0.2690]) tensor([0.4702, 0.1725, 0.1498, 0.2076]) -Greedy action tensor([ 0.4197, -0.1207, -0.8084, 0.4386]) tensor([0.3455, 0.2012, 0.1012, 0.3521]) -Greedy action tensor([ 0.7735, -0.5344, 1.2210, 0.6734]) tensor([0.2674, 0.0723, 0.4183, 0.2419]) -Greedy action tensor([-0.7788, -0.1148, -1.1502, 0.4606]) tensor([0.1411, 0.2741, 0.0973, 0.4874]) -Greedy action tensor([ 1.2189, -0.4413, 1.9574, 0.1249]) tensor([0.2764, 0.0525, 0.5785, 0.0926]) -Greedy action tensor([1.4205, 0.7254, 0.0588, 1.3037]) tensor([0.3781, 0.1887, 0.0969, 0.3364]) -Greedy action tensor([ 1.1966, -1.1827, 0.1657, 1.1996]) tensor([0.4078, 0.0378, 0.1454, 0.4090]) -Greedy action tensor([ 0.2361, 0.8487, 0.7366, -0.1374]) tensor([0.1929, 0.3560, 0.3183, 0.1328]) -Greedy action tensor([ 1.4746, -0.4291, 0.4301, 0.8908]) tensor([0.4858, 0.0724, 0.1709, 0.2709]) -Greedy action tensor([ 0.8271, -1.3611, 0.1677, 0.3899]) tensor([0.4395, 0.0493, 0.2273, 0.2839]) -Greedy action tensor([-0.4816, -2.1973, 0.2305, -0.0701]) tensor([0.2115, 0.0380, 0.4312, 0.3192]) -Greedy action tensor([ 0.9495, 0.5252, 0.1902, -0.2746]) tensor([0.4139, 0.2708, 0.1937, 0.1217]) -Greedy action tensor([ 1.4766, 0.9830, -0.0331, 0.7338]) tensor([0.4334, 0.2646, 0.0958, 0.2062]) -Greedy action tensor([ 1.1863, -0.7037, 0.9423, -0.0193]) tensor([0.4476, 0.0676, 0.3507, 0.1341]) -Greedy action tensor([ 0.0588, -1.8879, 0.1550, 0.0969]) tensor([0.3046, 0.0435, 0.3354, 0.3165]) -Greedy action tensor([ 1.3097, -0.5889, 1.3702, 0.5447]) tensor([0.3735, 0.0559, 0.3968, 0.1738]) -Greedy action tensor([-0.8005, -1.3957, -0.1417, -0.0731]) tensor([0.1801, 0.0993, 0.3480, 0.3727]) -Greedy action tensor([ 1.2080, 0.3105, -0.3135, 0.2076]) tensor([0.5016, 0.2044, 0.1095, 0.1844]) -Greedy action tensor([ 0.3015, -0.3683, 0.4173, 0.5038]) tensor([0.2592, 0.1326, 0.2910, 0.3172]) -Greedy action tensor([ 0.8662, -0.9858, 0.4387, 1.6213]) tensor([0.2540, 0.0399, 0.1657, 0.5405]) -Greedy action tensor([-0.1814, -0.2814, -0.1956, -0.4043]) tensor([0.2709, 0.2451, 0.2671, 0.2168]) -Greedy action tensor([-0.3529, -0.7151, 0.5460, -0.7320]) tensor([0.2067, 0.1439, 0.5079, 0.1415]) -Greedy action tensor([-0.2306, -1.6313, -0.7474, 1.0504]) tensor([0.1837, 0.0453, 0.1096, 0.6614]) -Greedy action tensor([-0.1944, -0.3552, -0.0706, 0.2617]) tensor([0.2192, 0.1867, 0.2481, 0.3459]) -Greedy action tensor([-0.0838, -1.2688, 0.4784, -0.4842]) tensor([0.2681, 0.0820, 0.4704, 0.1796]) -Greedy action tensor([ 0.3621, 0.0087, -0.0649, 0.9226]) tensor([0.2435, 0.1710, 0.1589, 0.4265]) -Greedy action tensor([ 0.5974, 0.3604, 0.3813, -0.4333]) tensor([0.3388, 0.2673, 0.2730, 0.1209]) -Greedy action tensor([-0.0813, -0.9000, 0.6815, 0.4965]) tensor([0.1863, 0.0822, 0.3995, 0.3320]) -Greedy action tensor([-0.6784, 0.4049, -0.7539, -0.6902]) tensor([0.1704, 0.5033, 0.1580, 0.1684]) -Greedy action tensor([ 1.3403, -0.0612, 0.3905, 0.8673]) tensor([0.4432, 0.1091, 0.1714, 0.2762]) -Greedy action tensor([-0.6912, 0.6976, 0.6992, -0.2757]) tensor([0.0949, 0.3804, 0.3810, 0.1437]) -Greedy action tensor([ 0.2309, -1.8354, -0.2000, 0.4122]) tensor([0.3361, 0.0426, 0.2184, 0.4029]) -Greedy action tensor([ 0.6243, 0.0590, -0.0854, -0.2579]) tensor([0.4042, 0.2297, 0.1988, 0.1673]) -Greedy action tensor([ 0.1250, -0.9321, -0.7322, 0.7969]) tensor([0.2681, 0.0932, 0.1138, 0.5250]) -Greedy action tensor([ 0.8098, -0.4906, 0.5851, -0.3669]) tensor([0.4203, 0.1145, 0.3357, 0.1296]) -Greedy action tensor([ 0.3275, -0.0489, -0.0060, 1.0886]) tensor([0.2201, 0.1511, 0.1577, 0.4711]) -Greedy action tensor([-0.8527, -0.1506, 1.0368, 0.0884]) tensor([0.0820, 0.1654, 0.5424, 0.2101]) -Greedy action tensor([1.0931, 0.3466, 0.3179, 1.4985]) tensor([0.2912, 0.1380, 0.1341, 0.4367]) -Greedy action tensor([ 1.3382, 0.6704, -0.7453, -0.0255]) tensor([0.5283, 0.2709, 0.0658, 0.1351]) -Greedy action tensor([ 1.4013, -0.6175, 1.2434, 0.9036]) tensor([0.3854, 0.0512, 0.3291, 0.2343]) -Greedy action tensor([ 0.6092, 0.1781, -0.9016, 0.5938]) tensor([0.3502, 0.2276, 0.0773, 0.3449]) -Greedy action tensor([ 0.9640, -0.5703, 1.4121, 0.3609]) tensor([0.3005, 0.0648, 0.4703, 0.1644]) -Greedy action tensor([-0.9699, -0.6616, 0.0195, 1.2787]) tensor([0.0688, 0.0937, 0.1852, 0.6523]) -Greedy action tensor([ 0.0100, 1.2593, 0.9666, -0.3514]) tensor([0.1284, 0.4479, 0.3342, 0.0895]) -Greedy action tensor([ 0.3184, -0.1410, 0.1670, -0.2765]) tensor([0.3286, 0.2076, 0.2825, 0.1813]) -Greedy action tensor([ 1.0468, -0.8449, 0.0076, -0.3089]) tensor([0.5674, 0.0856, 0.2007, 0.1463]) -Greedy action tensor([ 0.5102, -0.4357, 0.0091, -0.4210]) tensor([0.4187, 0.1626, 0.2537, 0.1650]) -Greedy action tensor([ 0.4581, -0.1005, 0.0664, -0.1077]) tensor([0.3551, 0.2031, 0.2400, 0.2017]) -Greedy action tensor([ 0.3330, 0.2049, -0.0997, 0.0727]) tensor([0.3031, 0.2667, 0.1966, 0.2336]) -Greedy action tensor([ 1.1292, -0.6602, -0.2463, -0.3720]) tensor([0.6088, 0.1017, 0.1539, 0.1357]) -Greedy action tensor([ 0.6307, -0.3805, -0.1329, -0.4399]) tensor([0.4603, 0.1674, 0.2145, 0.1578]) -Greedy action tensor([ 0.6555, -0.5260, -0.0781, -0.6243]) tensor([0.4842, 0.1486, 0.2325, 0.1347]) -Greedy action tensor([ 0.0288, 0.0384, -0.0404, -0.0901]) tensor([0.2611, 0.2636, 0.2436, 0.2318]) -Greedy action tensor([ 0.7575, -0.4436, -0.1965, -0.6422]) tensor([0.5174, 0.1557, 0.1993, 0.1276]) -Greedy action tensor([ 0.6353, -0.0073, 0.4243, -0.1863]) tensor([0.3603, 0.1895, 0.2918, 0.1584]) -Greedy action tensor([ 0.4148, -0.1527, -0.1326, -0.2015]) tensor([0.3724, 0.2111, 0.2154, 0.2011]) -Greedy action tensor([ 0.0707, 0.1341, -0.0697, -0.0070]) tensor([0.2591, 0.2761, 0.2252, 0.2397]) -Greedy action tensor([ 0.8667, -0.7038, 0.0420, -0.5618]) tensor([0.5302, 0.1103, 0.2324, 0.1271]) -Greedy action tensor([ 1.1118, -1.1735, -0.1066, -0.6305]) tensor([0.6359, 0.0647, 0.1880, 0.1114]) -Greedy action tensor([ 0.4175, -0.1314, -0.0806, -0.2357]) tensor([0.3696, 0.2135, 0.2246, 0.1923]) -Greedy action tensor([ 0.5592, -0.0866, -0.0779, -0.1863]) tensor([0.3956, 0.2074, 0.2092, 0.1877]) -Greedy action tensor([ 0.6405, -0.4434, 0.1226, -0.5618]) tensor([0.4475, 0.1514, 0.2666, 0.1345]) -Greedy action tensor([ 0.6532, -0.3856, 0.1458, -0.5236]) tensor([0.4417, 0.1563, 0.2659, 0.1361]) -Greedy action tensor([ 0.9104, -0.5349, 0.0269, -0.4369]) tensor([0.5238, 0.1235, 0.2165, 0.1362]) -Greedy action tensor([ 0.6777, -0.4261, 0.2397, -0.6766]) tensor([0.4474, 0.1484, 0.2887, 0.1155]) -Greedy action tensor([ 0.6348, -0.3840, 0.0065, -0.3724]) tensor([0.4425, 0.1598, 0.2361, 0.1616]) -Greedy action tensor([ 0.6858, -0.4742, -0.1666, -0.4053]) tensor([0.4818, 0.1510, 0.2054, 0.1618]) -Greedy action tensor([ 0.8304, -0.3233, 0.1643, -0.7763]) tensor([0.4927, 0.1554, 0.2531, 0.0988]) -Greedy action tensor([ 0.8440, -0.1171, 0.2279, -0.2502]) tensor([0.4430, 0.1694, 0.2393, 0.1483]) -Greedy action tensor([ 0.4853, -0.1999, -0.0137, -0.1666]) tensor([0.3799, 0.1915, 0.2307, 0.1980]) -Greedy action tensor([ 0.5863, 0.0727, -0.0657, -0.2198]) tensor([0.3897, 0.2332, 0.2031, 0.1740]) -Greedy action tensor([ 1.0920, -0.4230, 0.0625, -0.5495]) tensor([0.5648, 0.1241, 0.2017, 0.1094]) -Greedy action tensor([ 0.5809, 0.1447, 0.0136, -0.2417]) tensor([0.3769, 0.2437, 0.2138, 0.1656]) -Greedy action tensor([ 1.0791, -0.8726, -0.0046, -0.4770]) tensor([0.5912, 0.0840, 0.2000, 0.1247]) -Greedy action tensor([ 0.9142, -0.3521, -0.0926, -0.2907]) tensor([0.5136, 0.1448, 0.1877, 0.1539]) -Greedy action tensor([ 0.4631, -0.3465, -0.0126, -0.2404]) tensor([0.3904, 0.1738, 0.2426, 0.1932]) -Greedy action tensor([ 0.7406, -0.4020, -0.0397, -0.3048]) tensor([0.4698, 0.1498, 0.2153, 0.1651]) -Greedy action tensor([ 0.5806, -0.1706, 0.0350, -0.4306]) tensor([0.4141, 0.1954, 0.2399, 0.1506]) -Greedy action tensor([ 0.4579, -0.0021, -0.0174, -0.2824]) tensor([0.3663, 0.2312, 0.2277, 0.1747]) -Greedy action tensor([ 0.3948, 0.0115, -0.0843, -0.2630]) tensor([0.3547, 0.2418, 0.2197, 0.1837]) -Greedy action tensor([ 0.4753, -0.2325, -0.0922, -0.2175]) tensor([0.3906, 0.1925, 0.2215, 0.1954]) -Greedy action tensor([ 0.5499, -0.2974, 0.1945, -0.4438]) tensor([0.4001, 0.1715, 0.2804, 0.1481]) -Greedy action tensor([ 0.9664, -0.5596, -0.0237, -0.5205]) tensor([0.5510, 0.1198, 0.2047, 0.1246]) -Greedy action tensor([ 0.5107, 0.0380, 0.0501, -0.1068]) tensor([0.3580, 0.2231, 0.2258, 0.1931]) -Greedy action tensor([ 0.0426, -0.0842, 0.0359, -0.1332]) tensor([0.2693, 0.2372, 0.2675, 0.2259]) -Greedy action tensor([ 1.0580, -1.2237, 0.0187, -0.4636]) tensor([0.5973, 0.0610, 0.2113, 0.1304]) -Greedy action tensor([ 0.5157, -0.1951, -0.0157, -0.2415]) tensor([0.3925, 0.1928, 0.2307, 0.1841]) -Greedy action tensor([ 0.4785, -0.1474, 0.0383, -0.2060]) tensor([0.3727, 0.1993, 0.2400, 0.1880]) -Greedy action tensor([ 0.5155, -0.1490, -0.0118, -0.5496]) tensor([0.4082, 0.2101, 0.2410, 0.1407]) -Greedy action tensor([ 0.6403, -0.4712, -0.1626, -0.6272]) tensor([0.4858, 0.1598, 0.2176, 0.1368]) -Greedy action tensor([ 0.9822, -0.6495, 0.0020, -0.5389]) tensor([0.5589, 0.1093, 0.2097, 0.1221]) -Greedy action tensor([ 0.9344, -0.6446, -0.1513, -0.5964]) tensor([0.5681, 0.1171, 0.1918, 0.1229]) -Greedy action tensor([ 0.7965, -0.4283, 0.0337, -0.5400]) tensor([0.4943, 0.1453, 0.2305, 0.1299]) -Greedy action tensor([ 0.3608, -0.3884, -0.1214, -0.3395]) tensor([0.3866, 0.1828, 0.2387, 0.1919]) -Greedy action tensor([ 0.8445, -0.1904, -0.0952, -0.4530]) tensor([0.4952, 0.1759, 0.1935, 0.1353]) -Greedy action tensor([ 1.1318, -0.8094, -0.0638, -0.5252]) tensor([0.6110, 0.0877, 0.1848, 0.1165]) -Greedy action tensor([ 0.6746, -0.1583, 0.0420, -0.3223]) tensor([0.4283, 0.1862, 0.2275, 0.1580]) -Greedy action tensor([ 0.3721, -0.2115, 0.0964, -0.2755]) tensor([0.3521, 0.1964, 0.2673, 0.1842]) -Greedy action tensor([ 1.0382, -0.4795, 0.0279, -0.7742]) tensor([0.5725, 0.1255, 0.2085, 0.0935]) -Greedy action tensor([ 0.5475, 0.0371, -0.0100, -0.2252]) tensor([0.3795, 0.2278, 0.2173, 0.1753]) -Greedy action tensor([ 0.4573, -0.0053, 0.1101, -0.3415]) tensor([0.3589, 0.2260, 0.2536, 0.1615]) -Greedy action tensor([ 0.4179, 0.1271, -0.1292, -0.0898]) tensor([0.3415, 0.2553, 0.1976, 0.2055]) -Greedy action tensor([ 0.5121, -0.5281, -0.0398, -0.5934]) tensor([0.4424, 0.1563, 0.2548, 0.1465]) -Greedy action tensor([ 0.6458, -0.4107, 0.0771, -0.7323]) tensor([0.4617, 0.1605, 0.2614, 0.1164]) -Greedy action tensor([ 0.5742, -0.4261, 0.0063, -0.2612]) tensor([0.4223, 0.1553, 0.2393, 0.1831]) -Greedy action tensor([ 0.6720, -0.5856, -0.1415, -0.5941]) tensor([0.4976, 0.1415, 0.2206, 0.1403]) -Greedy action tensor([ 0.4394, -0.0443, -0.1361, -0.0678]) tensor([0.3596, 0.2217, 0.2022, 0.2165]) -Greedy action tensor([ 1.0892, -0.4850, -0.0757, -0.6307]) tensor([0.5889, 0.1220, 0.1837, 0.1055]) -Greedy action tensor([ 0.1990, 0.1807, -0.0267, -0.2783]) tensor([0.2941, 0.2888, 0.2347, 0.1825]) -Greedy action tensor([ 0.6599, 0.0616, -0.1198, -0.2333]) tensor([0.4136, 0.2274, 0.1897, 0.1693]) -Greedy action tensor([ 0.5149, -0.2916, 0.0628, -0.4474]) tensor([0.4057, 0.1811, 0.2582, 0.1550]) -Greedy action tensor([ 0.9627, -0.6742, -0.1122, -0.5486]) tensor([0.5693, 0.1108, 0.1943, 0.1256]) -Greedy action tensor([ 0.8900, -0.6035, -0.1459, -0.6503]) tensor([0.5575, 0.1252, 0.1979, 0.1195]) -Greedy action tensor([ 0.5690, -0.3161, -0.0046, -0.2715]) tensor([0.4153, 0.1714, 0.2340, 0.1792]) -Greedy action tensor([ 0.2733, -0.0594, -0.0311, -0.1678]) tensor([0.3228, 0.2314, 0.2381, 0.2077]) -Greedy action tensor([ 0.5755, -0.4721, -0.4177, -0.4017]) tensor([0.4767, 0.1672, 0.1766, 0.1794]) -Greedy action tensor([ 0.6156, -0.2736, -0.0027, -0.2772]) tensor([0.4238, 0.1742, 0.2284, 0.1736]) -Greedy action tensor([ 0.8263, -0.4258, 0.0860, -0.4621]) tensor([0.4905, 0.1402, 0.2340, 0.1352]) -Greedy action tensor([ 0.8255, -0.6368, -0.0688, -0.3953]) tensor([0.5166, 0.1197, 0.2113, 0.1524]) -Greedy action tensor([ 0.7410, -0.1525, -0.0349, -0.2577]) tensor([0.4468, 0.1829, 0.2057, 0.1646]) -Greedy action tensor([ 0.4674, -0.2349, 0.0246, -0.4327]) tensor([0.3930, 0.1947, 0.2524, 0.1598]) -Greedy action tensor([ 0.8690, -0.4900, -0.0159, -0.4296]) tensor([0.5148, 0.1323, 0.2125, 0.1405]) -Greedy action tensor([ 0.4830, -0.5031, -0.1311, -0.2704]) tensor([0.4193, 0.1564, 0.2269, 0.1974]) -Greedy action tensor([ 0.3103, -0.0830, -0.0751, -0.2041]) tensor([0.3387, 0.2285, 0.2303, 0.2025]) -Greedy action tensor([ 0.7139, -0.4482, -0.1837, -0.5236]) tensor([0.4974, 0.1556, 0.2027, 0.1443]) -Greedy action tensor([-1.7404, -0.4909, 0.7182, 0.1551]) tensor([0.0438, 0.1528, 0.5119, 0.2915]) -Greedy action tensor([-1.2874, -0.4838, 0.9520, 1.2080]) tensor([0.0404, 0.0903, 0.3793, 0.4900]) -Greedy action tensor([-1.4713, -0.5919, 0.4273, 0.0772]) tensor([0.0676, 0.1629, 0.4514, 0.3181]) -Greedy action tensor([-1.0205, -0.5534, 0.4443, -0.2716]) tensor([0.1107, 0.1765, 0.4788, 0.2340]) -Greedy action tensor([-1.2222, -0.6110, 0.3565, 0.5411]) tensor([0.0739, 0.1363, 0.3586, 0.4312]) -Greedy action tensor([-1.0844, -0.5498, 0.3079, 0.2695]) tensor([0.0943, 0.1610, 0.3795, 0.3652]) -Greedy action tensor([-1.8213, -0.3714, 0.6361, -0.0223]) tensor([0.0435, 0.1855, 0.5080, 0.2630]) -Greedy action tensor([-2.0385, -0.8440, 0.9436, 0.0687]) tensor([0.0310, 0.1024, 0.6116, 0.2550]) -Greedy action tensor([-0.7128, -0.2758, 0.3674, 0.8876]) tensor([0.0957, 0.1482, 0.2819, 0.4743]) -Greedy action tensor([-1.0250, -0.5950, 0.3914, 0.2725]) tensor([0.0969, 0.1490, 0.3995, 0.3547]) -Greedy action tensor([-1.5259, -0.5863, 0.4644, 0.0692]) tensor([0.0633, 0.1619, 0.4630, 0.3119]) -Greedy action tensor([-0.6355, -0.1832, 0.8221, 1.4227]) tensor([0.0680, 0.1069, 0.2922, 0.5328]) -Greedy action tensor([-0.3099, 0.0477, 0.9683, 1.5070]) tensor([0.0822, 0.1175, 0.2949, 0.5054]) -Greedy action tensor([-0.7185, -0.5527, 0.1808, 0.2719]) tensor([0.1364, 0.1610, 0.3353, 0.3673]) -Greedy action tensor([-1.8599, -0.6328, 0.4887, -0.1476]) tensor([0.0490, 0.1670, 0.5127, 0.2713]) -Greedy action tensor([-1.5113, -0.4342, 0.4311, 0.1679]) tensor([0.0615, 0.1804, 0.4287, 0.3295]) -Greedy action tensor([-1.5497, -0.5363, 0.4427, 0.0776]) tensor([0.0618, 0.1703, 0.4533, 0.3146]) -Greedy action tensor([-0.8896, -0.6333, 0.2510, 0.1845]) tensor([0.1198, 0.1548, 0.3748, 0.3507]) -Greedy action tensor([-1.9778, -0.6129, 1.2402, 0.3911]) tensor([0.0246, 0.0965, 0.6155, 0.2633]) -Greedy action tensor([-1.0520, -0.6797, 0.0094, -0.5923]) tensor([0.1444, 0.2095, 0.4174, 0.2287]) -Greedy action tensor([-1.1215, 0.0224, 0.3796, 0.1666]) tensor([0.0816, 0.2562, 0.3662, 0.2960]) -Greedy action tensor([-1.8645, -0.4608, 0.6385, -0.1215]) tensor([0.0435, 0.1769, 0.5312, 0.2484]) -Greedy action tensor([-0.9987, 0.7157, 0.0604, 0.3149]) tensor([0.0760, 0.4221, 0.2192, 0.2827]) -Greedy action tensor([-1.1539, -0.5553, 0.2709, 0.2756]) tensor([0.0897, 0.1631, 0.3727, 0.3745]) -Greedy action tensor([-1.2601, -0.4932, 1.0761, 1.2865]) tensor([0.0381, 0.0820, 0.3938, 0.4861]) -Greedy action tensor([-1.8707, -0.4646, 0.6292, -0.1390]) tensor([0.0436, 0.1781, 0.5317, 0.2466]) -Greedy action tensor([-1.4412, -0.5923, 0.4230, 0.0858]) tensor([0.0695, 0.1624, 0.4482, 0.3199]) -Greedy action tensor([-1.9396, -0.7542, 0.3626, -0.1690]) tensor([0.0496, 0.1624, 0.4963, 0.2916]) -Greedy action tensor([-1.8099, -0.1480, 0.1724, -0.3513]) tensor([0.0561, 0.2956, 0.4072, 0.2412]) -Greedy action tensor([-1.1918, -0.5344, 0.4428, 0.0885]) tensor([0.0858, 0.1656, 0.4399, 0.3087]) -Greedy action tensor([-1.1019, -0.6591, 0.1615, -0.5708]) tensor([0.1283, 0.1997, 0.4538, 0.2182]) -Greedy action tensor([-1.0465, -0.6763, 0.3525, -0.0216]) tensor([0.1077, 0.1559, 0.4363, 0.3001]) -Greedy action tensor([-1.8915, -0.4675, 0.6423, -0.1374]) tensor([0.0425, 0.1765, 0.5355, 0.2455]) -Greedy action tensor([-0.7645, -0.3938, 0.9842, 1.3737]) tensor([0.0600, 0.0869, 0.3445, 0.5086]) -Greedy action tensor([-0.5561, -0.3109, 0.1870, 0.0740]) tensor([0.1598, 0.2042, 0.3359, 0.3001]) -Greedy action tensor([-0.8250, -0.5457, 0.4226, -0.2466]) tensor([0.1318, 0.1743, 0.4589, 0.2350]) -Greedy action tensor([-1.2652, -0.5758, 0.2928, 0.3865]) tensor([0.0772, 0.1538, 0.3665, 0.4025]) -Greedy action tensor([-0.4274, -0.3067, 0.1755, 0.2148]) tensor([0.1707, 0.1927, 0.3120, 0.3245]) -Greedy action tensor([-0.4036, -0.5957, 0.4486, 0.1185]) tensor([0.1708, 0.1409, 0.4004, 0.2878]) -Greedy action tensor([-1.6507, -0.6582, 1.4548, 1.1054]) tensor([0.0239, 0.0646, 0.5345, 0.3769]) -Greedy action tensor([-1.0965, -0.4672, 1.0321, 1.2430]) tensor([0.0462, 0.0866, 0.3880, 0.4792]) -Greedy action tensor([-1.3119, -0.5973, 0.4296, 0.1321]) tensor([0.0770, 0.1573, 0.4394, 0.3263]) -Greedy action tensor([-1.1142, -0.3167, 0.3226, 0.1795]) tensor([0.0903, 0.2005, 0.3799, 0.3293]) -Greedy action tensor([-1.0608, 0.4174, 0.3914, 0.6684]) tensor([0.0654, 0.2867, 0.2794, 0.3685]) -Greedy action tensor([-1.1167, -0.5612, 0.7652, 1.2546]) tensor([0.0499, 0.0871, 0.3280, 0.5350]) -Greedy action tensor([-1.8213, -0.4880, 0.6294, -0.0759]) tensor([0.0452, 0.1715, 0.5243, 0.2590]) -Greedy action tensor([-1.7791, -0.4410, 0.6470, -0.0091]) tensor([0.0455, 0.1733, 0.5144, 0.2669]) -Greedy action tensor([-1.8457, -0.4227, 0.6141, -0.1198]) tensor([0.0445, 0.1847, 0.5208, 0.2500]) -Greedy action tensor([-1.3085, -0.5595, 0.3245, 0.2124]) tensor([0.0781, 0.1651, 0.3996, 0.3572]) -Greedy action tensor([-1.5597, -0.5541, 0.4584, 0.0424]) tensor([0.0616, 0.1685, 0.4639, 0.3060]) -Greedy action tensor([-1.9698, -0.5376, 1.2844, 0.5882]) tensor([0.0227, 0.0952, 0.5886, 0.2934]) -Greedy action tensor([-1.1053, -0.4967, 0.7878, 1.1523]) tensor([0.0525, 0.0965, 0.3488, 0.5021]) -Greedy action tensor([-1.4535, -0.5223, 0.5387, 0.5082]) tensor([0.0556, 0.1411, 0.4078, 0.3955]) -Greedy action tensor([-1.8442, -0.4557, 0.6112, -0.1270]) tensor([0.0450, 0.1804, 0.5241, 0.2505]) -Greedy action tensor([-0.6547, -0.5620, 0.1706, 0.2675]) tensor([0.1450, 0.1591, 0.3311, 0.3648]) -Greedy action tensor([-0.6502, -0.6571, -0.0995, -0.0664]) tensor([0.1811, 0.1799, 0.3142, 0.3248]) -Greedy action tensor([-1.2999, -0.3425, 0.9476, 1.1360]) tensor([0.0408, 0.1063, 0.3864, 0.4665]) -Greedy action tensor([-1.6428, -0.5657, 0.6001, -0.0898]) tensor([0.0553, 0.1624, 0.5210, 0.2613]) -Greedy action tensor([-1.3064, -0.5990, 0.4353, 0.2435]) tensor([0.0744, 0.1509, 0.4244, 0.3504]) -Greedy action tensor([-1.6054, -0.4893, 0.5337, 0.1070]) tensor([0.0553, 0.1688, 0.4695, 0.3064]) -Greedy action tensor([-1.5762, -0.3426, 0.7493, 0.6775]) tensor([0.0413, 0.1419, 0.4230, 0.3937]) -Greedy action tensor([-1.6617, -0.5040, 0.5227, -0.0553]) tensor([0.0554, 0.1763, 0.4922, 0.2761]) -Greedy action tensor([-0.5705, 0.2167, 0.1407, -0.1008]) tensor([0.1463, 0.3216, 0.2980, 0.2341]) -Greedy action tensor([-1.3482, -0.3540, 1.0203, 1.1217]) tensor([0.0382, 0.1031, 0.4076, 0.4511]) -Greedy action tensor([-1.6177, -0.4605, 0.5034, 0.0158]) tensor([0.0567, 0.1803, 0.4727, 0.2903]) -Greedy action tensor([-1.8864, -0.4695, 0.7029, -0.0936]) tensor([0.0409, 0.1687, 0.5448, 0.2457]) -Greedy action tensor([-1.8912, -0.4290, 0.6393, -0.1483]) tensor([0.0424, 0.1829, 0.5324, 0.2422]) -Greedy action tensor([-1.7570, -0.4427, 0.6381, 0.1416]) tensor([0.0447, 0.1664, 0.4904, 0.2985]) -Greedy action tensor([-1.6334, -0.5268, 0.7484, -0.4165]) tensor([0.0549, 0.1659, 0.5939, 0.1853]) -Greedy action tensor([-1.2837, -0.5072, 0.3189, 0.2858]) tensor([0.0773, 0.1679, 0.3836, 0.3712]) -Greedy action tensor([-1.7958, -0.3352, 0.5782, -0.0771]) tensor([0.0462, 0.1992, 0.4966, 0.2579]) -Greedy action tensor([-1.0495, -0.4035, 0.7096, 1.0110]) tensor([0.0604, 0.1152, 0.3506, 0.4739]) -Greedy action tensor([-0.9150, -0.5044, 1.0012, 1.4789]) tensor([0.0494, 0.0744, 0.3354, 0.5408]) -Greedy action tensor([-1.7195, -0.5081, 0.5438, -0.0327]) tensor([0.0516, 0.1733, 0.4962, 0.2788]) -Greedy action tensor([-1.9272, -0.4533, 0.6695, -0.1600]) tensor([0.0406, 0.1772, 0.5446, 0.2376]) -Greedy action tensor([-1.1850, -0.6191, 0.2889, 0.2770]) tensor([0.0874, 0.1539, 0.3816, 0.3771]) -Greedy action tensor([-1.4229, -0.5391, 0.3885, 0.1687]) tensor([0.0692, 0.1675, 0.4235, 0.3399]) -Greedy action tensor([-1.4621, 0.0403, 0.3273, 0.2097]) tensor([0.0595, 0.2674, 0.3563, 0.3168]) -Greedy action tensor([-1.6894, -0.7504, 0.0106, -0.3003]) tensor([0.0767, 0.1961, 0.4197, 0.3075]) -Greedy action tensor([-1.2334, -0.4583, 0.9732, 1.1889]) tensor([0.0425, 0.0923, 0.3861, 0.4791]) -Greedy action tensor([-1.8551, -0.4586, 0.6174, -0.1325]) tensor([0.0445, 0.1797, 0.5269, 0.2489]) -Greedy action tensor([ 0.8096, -0.6917, -0.2583, -0.2342]) tensor([0.5212, 0.1161, 0.1791, 0.1835]) -Greedy action tensor([ 0.8475, 0.1706, -0.1611, 0.0803]) tensor([0.4279, 0.2174, 0.1561, 0.1987]) -Greedy action tensor([ 0.4392, -0.3694, -0.2648, 0.2579]) tensor([0.3605, 0.1606, 0.1783, 0.3007]) -Greedy action tensor([ 1.2544, -0.6566, -0.0901, 0.3028]) tensor([0.5572, 0.0824, 0.1452, 0.2151]) -Greedy action tensor([ 0.8810, -0.4716, -0.4475, 0.4946]) tensor([0.4539, 0.1174, 0.1202, 0.3085]) -Greedy action tensor([ 1.3849, -0.7207, -0.2661, 0.4833]) tensor([0.5815, 0.0708, 0.1116, 0.2361]) -Greedy action tensor([ 1.3303, -0.6535, -0.3341, 0.4637]) tensor([0.5723, 0.0787, 0.1083, 0.2406]) -Greedy action tensor([ 1.0814, -0.2716, -0.2389, -0.0043]) tensor([0.5367, 0.1387, 0.1433, 0.1812]) -Greedy action tensor([ 1.0493, -0.3638, -0.3041, 0.4237]) tensor([0.4910, 0.1195, 0.1269, 0.2626]) -Greedy action tensor([ 1.5780, -0.4475, -0.2903, 0.0086]) tensor([0.6691, 0.0883, 0.1033, 0.1393]) -Greedy action tensor([ 0.9849, 0.0862, 0.1517, -0.3221]) tensor([0.4734, 0.1927, 0.2058, 0.1281]) -Greedy action tensor([ 1.0977, -0.5369, -0.6535, 0.2906]) tensor([0.5510, 0.1075, 0.0956, 0.2459]) -Greedy action tensor([ 1.2369, -0.4499, -0.3515, 0.3620]) tensor([0.5536, 0.1025, 0.1131, 0.2308]) -Greedy action tensor([ 1.6207, -0.5060, -0.4625, 0.5118]) tensor([0.6355, 0.0758, 0.0791, 0.2096]) -Greedy action tensor([ 1.3871, -0.3721, -0.2802, 0.1765]) tensor([0.6028, 0.1038, 0.1138, 0.1796]) -Greedy action tensor([ 0.9695, -0.2173, -0.1172, -0.2338]) tensor([0.5148, 0.1571, 0.1736, 0.1545]) -Greedy action tensor([ 1.9286, 0.0708, -0.3213, 0.1427]) tensor([0.6997, 0.1092, 0.0738, 0.1173]) -Greedy action tensor([ 0.6244, -0.0064, -0.0115, -0.1929]) tensor([0.3995, 0.2126, 0.2115, 0.1764]) -Greedy action tensor([ 1.0669, -0.0470, -0.2119, 0.1813]) tensor([0.4953, 0.1626, 0.1379, 0.2043]) -Greedy action tensor([ 1.5378, -0.5518, -0.6124, 0.3416]) tensor([0.6483, 0.0802, 0.0755, 0.1960]) -Greedy action tensor([ 0.9408, -0.4454, -0.1427, -0.0812]) tensor([0.5133, 0.1283, 0.1737, 0.1847]) -Greedy action tensor([ 1.5566, -0.7402, -0.2520, 0.3621]) tensor([0.6380, 0.0642, 0.1046, 0.1932]) -Greedy action tensor([ 1.4340, -0.6045, 0.0112, 0.1233]) tensor([0.6094, 0.0794, 0.1469, 0.1643]) -Greedy action tensor([ 0.8547, -0.1285, -0.3265, 0.0533]) tensor([0.4695, 0.1757, 0.1441, 0.2107]) -Greedy action tensor([ 0.7118, -0.3967, -0.3174, 0.3040]) tensor([0.4251, 0.1403, 0.1519, 0.2827]) -Greedy action tensor([ 1.1737, -0.6013, -0.1896, 0.0532]) tensor([0.5710, 0.0968, 0.1461, 0.1862]) -Greedy action tensor([ 1.1984, -0.4811, -0.3829, 0.3046]) tensor([0.5552, 0.1035, 0.1142, 0.2271]) -Greedy action tensor([ 0.9775, -0.3554, -0.4409, 0.2419]) tensor([0.5038, 0.1328, 0.1220, 0.2414]) -Greedy action tensor([ 1.1251, -0.1720, 0.0271, 0.0854]) tensor([0.5101, 0.1394, 0.1701, 0.1803]) -Greedy action tensor([ 0.9967, -0.4390, -0.3206, 0.0632]) tensor([0.5266, 0.1253, 0.1411, 0.2070]) -Greedy action tensor([ 0.8363, -0.6199, -0.5682, 0.2990]) tensor([0.4847, 0.1130, 0.1190, 0.2832]) -Greedy action tensor([ 1.1314, -0.5186, -0.3463, 0.2774]) tensor([0.5417, 0.1040, 0.1236, 0.2306]) -Greedy action tensor([ 1.3263, -0.4442, -0.1914, 0.0967]) tensor([0.5946, 0.1012, 0.1303, 0.1739]) -Greedy action tensor([ 0.8922, -0.4858, -0.2344, 0.0161]) tensor([0.5019, 0.1265, 0.1627, 0.2090]) -Greedy action tensor([ 0.9865, -0.4121, -0.1595, 0.1475]) tensor([0.5007, 0.1237, 0.1592, 0.2164]) -Greedy action tensor([ 1.1148, -0.7082, -0.4645, 0.1785]) tensor([0.5683, 0.0918, 0.1171, 0.2228]) -Greedy action tensor([ 1.0403, -0.0230, -0.1913, -0.0647]) tensor([0.5080, 0.1754, 0.1483, 0.1683]) -Greedy action tensor([ 1.3816, -0.4031, -0.4292, 0.3253]) tensor([0.5955, 0.1000, 0.0974, 0.2071]) -Greedy action tensor([ 0.2148, -0.2954, -0.3329, -0.0799]) tensor([0.3421, 0.2054, 0.1978, 0.2548]) -Greedy action tensor([ 1.5916, -0.8073, -0.0921, 0.4011]) tensor([0.6327, 0.0575, 0.1175, 0.1924]) -Greedy action tensor([ 1.4288, -0.5678, -0.3511, 0.0978]) tensor([0.6375, 0.0866, 0.1075, 0.1684]) -Greedy action tensor([ 0.9732, -0.4319, -0.3313, 0.0589]) tensor([0.5215, 0.1280, 0.1415, 0.2090]) -Greedy action tensor([ 0.9077, 0.0388, 0.0583, -0.0558]) tensor([0.4487, 0.1882, 0.1919, 0.1712]) -Greedy action tensor([ 1.2054, -0.6326, -0.1708, 0.0320]) tensor([0.5811, 0.0925, 0.1467, 0.1797]) -Greedy action tensor([ 1.0035, -0.1223, 0.1211, -0.0936]) tensor([0.4826, 0.1566, 0.1997, 0.1611]) -Greedy action tensor([ 0.4760, -0.3362, -0.1531, 0.0375]) tensor([0.3814, 0.1693, 0.2033, 0.2460]) -Greedy action tensor([ 0.8112, -0.0148, -0.0359, -0.1147]) tensor([0.4420, 0.1935, 0.1895, 0.1751]) -Greedy action tensor([ 1.4586, -0.8190, -0.0096, 0.2868]) tensor([0.6088, 0.0624, 0.1402, 0.1886]) -Greedy action tensor([ 1.1234, -0.3468, -0.5088, 0.3452]) tensor([0.5306, 0.1220, 0.1037, 0.2437]) -Greedy action tensor([ 0.9598, -0.4066, -0.1958, 0.0586]) tensor([0.5061, 0.1291, 0.1593, 0.2055]) -Greedy action tensor([ 1.2223, 0.0668, -0.2389, -0.0143]) tensor([0.5443, 0.1714, 0.1263, 0.1580]) -Greedy action tensor([ 1.6176, -0.8503, -0.2311, 0.5267]) tensor([0.6337, 0.0537, 0.0998, 0.2129]) -Greedy action tensor([ 0.4769, -0.1995, 0.0169, -0.2076]) tensor([0.3782, 0.1923, 0.2387, 0.1907]) -Greedy action tensor([ 1.1929, -0.4680, -0.1867, 0.2152]) tensor([0.5501, 0.1045, 0.1385, 0.2069]) -Greedy action tensor([ 0.7896, -0.0745, -0.3666, 0.2236]) tensor([0.4340, 0.1829, 0.1366, 0.2465]) -Greedy action tensor([ 1.0694, -0.7812, -0.2110, 0.2434]) tensor([0.5340, 0.0839, 0.1484, 0.2338]) -Greedy action tensor([ 1.4031, -0.4982, -0.2241, 0.5712]) tensor([0.5615, 0.0839, 0.1103, 0.2444]) -Greedy action tensor([ 0.2659, -0.1882, -0.2130, -0.0287]) tensor([0.3334, 0.2117, 0.2065, 0.2483]) -Greedy action tensor([ 0.0276, -0.0458, -0.2425, 0.0524]) tensor([0.2690, 0.2499, 0.2053, 0.2757]) -Greedy action tensor([ 1.2540, -0.5513, -0.2452, 0.0223]) tensor([0.5954, 0.0979, 0.1330, 0.1737]) -Greedy action tensor([ 1.2624, -0.6149, -0.5291, 0.3674]) tensor([0.5786, 0.0885, 0.0965, 0.2364]) -Greedy action tensor([ 0.9468, -0.1878, -0.2967, 0.2400]) tensor([0.4755, 0.1529, 0.1371, 0.2345]) -Greedy action tensor([ 1.4010, -0.5384, -0.1249, 0.3092]) tensor([0.5893, 0.0847, 0.1281, 0.1978]) -Greedy action tensor([ 0.9812, -0.0177, -0.2114, -0.0124]) tensor([0.4897, 0.1804, 0.1486, 0.1813]) -Greedy action tensor([ 1.8778, -0.5158, 0.0505, 0.3017]) tensor([0.6854, 0.0626, 0.1103, 0.1417]) -Greedy action tensor([ 0.8458, -0.1027, -0.3550, -0.0147]) tensor([0.4736, 0.1835, 0.1425, 0.2003]) -Greedy action tensor([ 1.6488, -0.3644, -0.4055, 0.2613]) tensor([0.6616, 0.0884, 0.0848, 0.1652]) -Greedy action tensor([ 1.1631, -0.5295, -0.2678, 0.3451]) tensor([0.5363, 0.0987, 0.1282, 0.2367]) -Greedy action tensor([ 1.4571, -0.2792, -0.6910, 0.1807]) tensor([0.6362, 0.1121, 0.0742, 0.1775]) -Greedy action tensor([ 1.4048, -0.4270, -0.1047, 0.2363]) tensor([0.5910, 0.0946, 0.1306, 0.1837]) -Greedy action tensor([ 0.7481, -0.3870, -0.3626, 0.4631]) tensor([0.4162, 0.1338, 0.1371, 0.3130]) -Greedy action tensor([ 1.1549, -0.3620, -0.4022, 0.3233]) tensor([0.5360, 0.1176, 0.1130, 0.2334]) -Greedy action tensor([ 1.1166, -0.1520, 0.1490, 0.0222]) tensor([0.5010, 0.1409, 0.1904, 0.1677]) -Greedy action tensor([ 1.4746, -0.4964, -0.2918, 0.1647]) tensor([0.6329, 0.0882, 0.1082, 0.1708]) -Greedy action tensor([ 0.5392, -0.2110, -0.2564, -0.0482]) tensor([0.4033, 0.1905, 0.1820, 0.2242]) -Greedy action tensor([ 1.2889, -0.3666, 0.1927, -0.2693]) tensor([0.5761, 0.1101, 0.1925, 0.1213]) -Greedy action tensor([ 1.1544, 0.2896, 0.2183, -0.2892]) tensor([0.4880, 0.2055, 0.1914, 0.1152]) -Greedy action tensor([ 1.0952, -0.5872, -0.3215, 0.3138]) tensor([0.5302, 0.0986, 0.1286, 0.2427]) -Greedy action tensor([ 1.6110, -0.9074, -0.3323, 0.1528]) tensor([0.6866, 0.0553, 0.0983, 0.1597]) -Greedy action tensor([ 0.9601, -0.5645, -0.4868, 0.8409]) tensor([0.4272, 0.0930, 0.1005, 0.3792]) -Greedy action tensor([ 1.3968, -0.7884, -0.1820, 0.6432]) tensor([0.5589, 0.0628, 0.1152, 0.2630]) -Greedy action tensor([-0.8532, -0.7375, 1.6967, -0.2898]) tensor([0.0599, 0.0673, 0.7675, 0.1053]) -Greedy action tensor([-0.9422, -1.7518, -0.6359, 0.3030]) tensor([0.1593, 0.0709, 0.2164, 0.5534]) -Greedy action tensor([-0.2440, 0.5800, -0.1734, 0.0870]) tensor([0.1741, 0.3968, 0.1868, 0.2424]) -Greedy action tensor([1.6460, 0.5900, 0.3646, 1.9934]) tensor([0.3288, 0.1144, 0.0913, 0.4655]) -Greedy action tensor([0.5192, 0.5637, 0.9641, 0.3665]) tensor([0.2240, 0.2342, 0.3495, 0.1923]) -Greedy action tensor([ 0.0258, -0.9749, 0.1373, 1.2153]) tensor([0.1733, 0.0637, 0.1937, 0.5693]) -Greedy action tensor([ 1.2275, -0.8439, 1.5302, 0.1655]) tensor([0.3540, 0.0446, 0.4791, 0.1224]) -Greedy action tensor([ 1.8764, 2.1090, -0.1057, 1.1987]) tensor([0.3439, 0.4340, 0.0474, 0.1747]) -Greedy action tensor([0.9867, 0.6300, 0.9395, 0.7784]) tensor([0.2885, 0.2020, 0.2752, 0.2343]) -Greedy action tensor([ 1.1682, -0.3305, 0.6228, 0.9669]) tensor([0.3816, 0.0853, 0.2212, 0.3120]) -Greedy action tensor([ 0.2800, 0.3723, 0.3915, -0.0373]) tensor([0.2536, 0.2782, 0.2835, 0.1847]) -Greedy action tensor([ 0.4500, -0.8660, 0.2063, 1.3247]) tensor([0.2247, 0.0603, 0.1761, 0.5389]) -Greedy action tensor([ 0.5125, -1.7612, -0.1005, 1.3316]) tensor([0.2555, 0.0263, 0.1384, 0.5797]) -Greedy action tensor([-0.2799, 0.0513, -0.4788, 0.8127]) tensor([0.1614, 0.2248, 0.1323, 0.4814]) -Greedy action tensor([ 0.0607, -1.0665, 0.2598, 0.3980]) tensor([0.2535, 0.0821, 0.3093, 0.3551]) -Greedy action tensor([-0.3309, -0.8376, 0.1031, -0.1216]) tensor([0.2284, 0.1376, 0.3525, 0.2815]) -Greedy action tensor([1.3598, 0.0494, 0.8208, 1.4673]) tensor([0.3371, 0.0909, 0.1966, 0.3754]) -Greedy action tensor([ 0.2081, -0.0408, -0.5678, 1.8710]) tensor([0.1331, 0.1038, 0.0613, 0.7019]) -Greedy action tensor([ 1.3810, -0.4238, 0.8239, 0.5782]) tensor([0.4576, 0.0753, 0.2621, 0.2050]) -Greedy action tensor([-0.2768, -0.9314, 0.6221, -0.1378]) tensor([0.1951, 0.1014, 0.4793, 0.2242]) -Greedy action tensor([ 0.4359, -0.0184, 0.9748, -0.8456]) tensor([0.2758, 0.1751, 0.4726, 0.0765]) -Greedy action tensor([ 0.3904, 0.4328, -0.4190, -0.9268]) tensor([0.3628, 0.3785, 0.1615, 0.0972]) -Greedy action tensor([ 0.5737, 0.8253, 1.2974, -0.0116]) tensor([0.2039, 0.2622, 0.4204, 0.1135]) -Greedy action tensor([ 0.2906, 0.5891, 0.1758, -0.1962]) tensor([0.2595, 0.3497, 0.2313, 0.1595]) -Greedy action tensor([0.7499, 0.3183, 0.9590, 0.0907]) tensor([0.2942, 0.1911, 0.3626, 0.1522]) -Greedy action tensor([ 0.6068, 0.7490, -1.0563, 1.4205]) tensor([0.2174, 0.2507, 0.0412, 0.4906]) -Greedy action tensor([-0.3360, 0.1768, 1.6869, -0.5594]) tensor([0.0907, 0.1514, 0.6854, 0.0725]) -Greedy action tensor([ 0.9411, -0.1385, 1.0709, 0.2210]) tensor([0.3373, 0.1146, 0.3840, 0.1641]) -Greedy action tensor([ 0.8431, -1.6591, -0.4436, 0.4363]) tensor([0.4941, 0.0405, 0.1365, 0.3290]) -Greedy action tensor([ 0.6561, 0.6042, -0.7030, 1.1509]) tensor([0.2600, 0.2468, 0.0668, 0.4264]) -Greedy action tensor([ 0.1572, 1.1050, 0.0404, -0.3660]) tensor([0.1975, 0.5096, 0.1758, 0.1171]) -Greedy action tensor([-0.5186, -2.0805, -0.0854, 0.5115]) tensor([0.1801, 0.0378, 0.2777, 0.5045]) -Greedy action tensor([-0.3956, -1.0569, 1.2240, -0.9543]) tensor([0.1401, 0.0723, 0.7075, 0.0801]) -Greedy action tensor([-0.0317, -1.5395, 0.6794, 0.0724]) tensor([0.2290, 0.0507, 0.4662, 0.2541]) -Greedy action tensor([-0.5944, -1.5506, 0.9886, -0.3951]) tensor([0.1338, 0.0514, 0.6515, 0.1633]) -Greedy action tensor([ 0.6324, -0.8161, 0.3363, 0.1583]) tensor([0.3844, 0.0903, 0.2859, 0.2393]) -Greedy action tensor([ 0.1630, -0.0722, -0.1870, 0.9675]) tensor([0.2114, 0.1671, 0.1490, 0.4726]) -Greedy action tensor([ 0.8924, -1.1037, 0.5667, -0.1637]) tensor([0.4534, 0.0616, 0.3273, 0.1577]) -Greedy action tensor([ 1.0744, -1.4463, 1.1476, 0.1568]) tensor([0.3913, 0.0315, 0.4210, 0.1563]) -Greedy action tensor([ 0.8407, 0.2345, 1.5877, -0.6137]) tensor([0.2571, 0.1402, 0.5426, 0.0600]) -Greedy action tensor([ 0.6984, 0.0896, -0.1206, 0.2964]) tensor([0.3768, 0.2050, 0.1661, 0.2521]) -Greedy action tensor([ 1.1949, -0.3890, 0.1224, 1.2146]) tensor([0.3895, 0.0799, 0.1333, 0.3973]) -Greedy action tensor([ 0.5983, -1.4780, 0.6194, 0.6252]) tensor([0.3151, 0.0395, 0.3218, 0.3237]) -Greedy action tensor([1.5403, 0.7072, 0.7036, 1.3562]) tensor([0.3704, 0.1610, 0.1604, 0.3081]) -Greedy action tensor([-0.8715, -0.4301, 0.3635, 0.5706]) tensor([0.0978, 0.1521, 0.3363, 0.4137]) -Greedy action tensor([ 0.3747, 0.1984, -1.2023, 0.3954]) tensor([0.3262, 0.2735, 0.0674, 0.3330]) -Greedy action tensor([-0.0161, 0.7958, 0.3265, -0.7603]) tensor([0.1947, 0.4385, 0.2743, 0.0925]) -Greedy action tensor([ 0.4205, -0.1760, 0.8494, 1.1903]) tensor([0.1906, 0.1050, 0.2927, 0.4117]) -Greedy action tensor([-0.9062, -0.4514, -0.7870, 0.8184]) tensor([0.1074, 0.1692, 0.1210, 0.6024]) -Greedy action tensor([ 1.7081, 0.3173, -0.2082, -0.1308]) tensor([0.6431, 0.1600, 0.0946, 0.1022]) -Greedy action tensor([ 0.9309, 0.1690, 0.0604, -0.4269]) tensor([0.4667, 0.2178, 0.1954, 0.1200]) -Greedy action tensor([ 1.5730, -1.7447, 1.0841, 1.5985]) tensor([0.3738, 0.0135, 0.2292, 0.3834]) -Greedy action tensor([ 0.6491, -0.1405, -0.1228, 0.9771]) tensor([0.3026, 0.1374, 0.1398, 0.4201]) -Greedy action tensor([-0.1148, -1.3605, 0.0055, 1.9136]) tensor([0.0998, 0.0287, 0.1126, 0.7589]) -Greedy action tensor([0.3397, 0.0974, 0.0212, 0.1460]) tensor([0.2998, 0.2353, 0.2180, 0.2470]) -Greedy action tensor([ 0.9525, -1.3242, 1.3440, 0.2337]) tensor([0.3258, 0.0334, 0.4819, 0.1588]) -Greedy action tensor([ 1.3350, -1.0168, 1.1404, 1.1328]) tensor([0.3656, 0.0348, 0.3010, 0.2987]) -Greedy action tensor([ 0.2995, -2.1017, -0.3328, 0.1220]) tensor([0.4066, 0.0368, 0.2161, 0.3405]) -Greedy action tensor([ 0.2571, -0.8469, -0.3506, 1.2343]) tensor([0.2206, 0.0731, 0.1201, 0.5861]) -Greedy action tensor([ 0.6665, 0.8763, -0.3674, -0.0446]) tensor([0.3247, 0.4004, 0.1155, 0.1594]) -Greedy action tensor([ 0.9394, -0.2727, 1.2335, 0.8444]) tensor([0.2818, 0.0838, 0.3781, 0.2563]) -Greedy action tensor([ 0.8760, -0.5724, 0.3678, 0.9554]) tensor([0.3426, 0.0805, 0.2061, 0.3709]) -Greedy action tensor([1.1247, 0.0725, 0.0266, 2.4348]) tensor([0.1856, 0.0648, 0.0619, 0.6878]) -Greedy action tensor([ 0.7375, -0.6091, 0.7991, 1.5044]) tensor([0.2234, 0.0581, 0.2376, 0.4809]) -Greedy action tensor([ 0.8270, -0.1390, 0.4824, 0.7857]) tensor([0.3280, 0.1248, 0.2324, 0.3147]) -Greedy action tensor([-0.3631, 0.4478, 0.0714, -0.0604]) tensor([0.1627, 0.3660, 0.2512, 0.2202]) -Greedy action tensor([-0.1123, 1.3130, 0.2458, -0.4338]) tensor([0.1367, 0.5686, 0.1956, 0.0991]) -Greedy action tensor([ 0.4596, -1.1396, 0.7424, 1.0570]) tensor([0.2301, 0.0465, 0.3053, 0.4181]) -Greedy action tensor([-0.3043, -0.7367, 0.8622, 0.8335]) tensor([0.1253, 0.0813, 0.4024, 0.3910]) -Greedy action tensor([ 0.9189, -0.7884, 1.1027, 0.2882]) tensor([0.3430, 0.0622, 0.4122, 0.1826]) -Greedy action tensor([-0.2234, 0.1483, 0.4730, -0.4404]) tensor([0.1900, 0.2756, 0.3814, 0.1530]) -Greedy action tensor([ 1.3182, -1.2088, 0.3042, 1.1764]) tensor([0.4328, 0.0346, 0.1570, 0.3756]) -Greedy action tensor([ 2.2489, 0.1509, 0.8668, -0.0390]) tensor([0.6779, 0.0832, 0.1702, 0.0688]) -Greedy action tensor([ 0.4695, -0.6761, 0.4196, 0.6827]) tensor([0.2852, 0.0907, 0.2713, 0.3529]) -Greedy action tensor([ 0.5144, -1.7661, 0.6625, 0.3343]) tensor([0.3229, 0.0330, 0.3744, 0.2697]) -Greedy action tensor([0.6360, 0.6670, 0.4602, 0.1507]) tensor([0.2869, 0.2959, 0.2406, 0.1766]) -Greedy action tensor([ 1.0231, -0.6161, 1.0419, 0.7701]) tensor([0.3345, 0.0649, 0.3408, 0.2597]) -Greedy action tensor([ 1.6588, -1.1768, 1.9960, 1.0859]) tensor([0.3307, 0.0194, 0.4633, 0.1865]) -Greedy action tensor([ 0.2873, -0.0421, -0.3508, 0.9020]) tensor([0.2441, 0.1756, 0.1290, 0.4514]) -Greedy action tensor([0.3787, 0.3363, 0.7637, 0.1635]) tensor([0.2362, 0.2264, 0.3471, 0.1904]) -Greedy action tensor([ 0.7176, -0.8908, 0.6795, 1.3396]) tensor([0.2484, 0.0497, 0.2391, 0.4627]) -Greedy action tensor([-0.2546, 1.0903, 1.0553, 0.1123]) tensor([0.1001, 0.3843, 0.3711, 0.1445]) -Greedy action tensor([ 0.5413, -0.2123, 0.0908, -0.2692]) tensor([0.3917, 0.1844, 0.2497, 0.1742]) -Greedy action tensor([ 0.7506, -0.3190, 0.0898, -0.3761]) tensor([0.4579, 0.1571, 0.2365, 0.1484]) -Greedy action tensor([ 0.5362, 0.1207, -0.0400, -0.1964]) tensor([0.3700, 0.2442, 0.2079, 0.1778]) -Greedy action tensor([ 0.3312, -0.1321, -0.0979, -0.2781]) tensor([0.3541, 0.2228, 0.2306, 0.1925]) -Greedy action tensor([ 0.3700, 0.1547, 0.1679, -0.1705]) tensor([0.3119, 0.2515, 0.2549, 0.1817]) -Greedy action tensor([ 0.2562, 0.0649, -0.1143, -0.2243]) tensor([0.3190, 0.2634, 0.2202, 0.1973]) -Greedy action tensor([ 0.4719, -0.2593, 0.0058, -0.3087]) tensor([0.3896, 0.1875, 0.2444, 0.1785]) -Greedy action tensor([ 0.5569, -0.2979, -0.1269, -0.3008]) tensor([0.4248, 0.1807, 0.2144, 0.1802]) -Greedy action tensor([ 0.6811, -0.4115, -0.2359, -0.3798]) tensor([0.4805, 0.1611, 0.1921, 0.1663]) -Greedy action tensor([ 0.9605, -0.9510, 0.0442, -0.5150]) tensor([0.5629, 0.0832, 0.2252, 0.1287]) -Greedy action tensor([ 0.5967, 0.2032, -0.1698, -0.0965]) tensor([0.3789, 0.2556, 0.1760, 0.1894]) -Greedy action tensor([ 0.7383, -0.3344, -0.0089, -0.2773]) tensor([0.4591, 0.1571, 0.2175, 0.1663]) -Greedy action tensor([ 0.9324, -0.3354, -0.1398, -0.4693]) tensor([0.5348, 0.1505, 0.1830, 0.1316]) -Greedy action tensor([ 0.4912, -0.1528, -0.0936, -0.1416]) tensor([0.3826, 0.2010, 0.2132, 0.2032]) -Greedy action tensor([ 0.2069, 0.0369, 0.0069, -0.3715]) tensor([0.3103, 0.2618, 0.2540, 0.1740]) -Greedy action tensor([ 0.4251, -0.0731, -0.0891, -0.1122]) tensor([0.3584, 0.2178, 0.2143, 0.2094]) -Greedy action tensor([ 0.5419, -0.0293, -0.0559, -0.0726]) tensor([0.3765, 0.2127, 0.2071, 0.2037]) -Greedy action tensor([ 0.3237, 0.0582, -0.0776, -0.1635]) tensor([0.3278, 0.2514, 0.2194, 0.2014]) -Greedy action tensor([ 0.7481, -0.6319, 0.0818, -0.7634]) tensor([0.5036, 0.1267, 0.2586, 0.1111]) -Greedy action tensor([ 0.6603, -0.2313, 0.0753, -0.2343]) tensor([0.4209, 0.1726, 0.2345, 0.1720]) -Greedy action tensor([ 0.5656, -0.0413, -0.1311, -0.1804]) tensor([0.3972, 0.2165, 0.1979, 0.1884]) -Greedy action tensor([ 0.2922, 0.0055, -0.0683, -0.3151]) tensor([0.3341, 0.2508, 0.2330, 0.1820]) -Greedy action tensor([ 0.4804, -0.1616, -0.1119, -0.2282]) tensor([0.3889, 0.2046, 0.2151, 0.1914]) -Greedy action tensor([ 1.2885, -0.9947, 0.0659, -0.7219]) tensor([0.6534, 0.0666, 0.1924, 0.0875]) -Greedy action tensor([ 0.3965, 0.2596, -0.1403, -0.1104]) tensor([0.3269, 0.2851, 0.1911, 0.1969]) -Greedy action tensor([ 0.4431, -0.3261, -0.1043, -0.4492]) tensor([0.4079, 0.1890, 0.2360, 0.1671]) -Greedy action tensor([ 0.3185, 0.0091, -0.0099, -0.1454]) tensor([0.3244, 0.2381, 0.2336, 0.2040]) -Greedy action tensor([ 0.4363, -0.2488, -0.0634, -0.1447]) tensor([0.3745, 0.1888, 0.2272, 0.2095]) -Greedy action tensor([ 0.3599, 0.1088, -0.1240, -0.0154]) tensor([0.3245, 0.2525, 0.2000, 0.2230]) -Greedy action tensor([ 0.5753, -0.3332, 0.0275, -0.5386]) tensor([0.4330, 0.1745, 0.2503, 0.1421]) -Greedy action tensor([ 0.8280, -0.6028, -0.0958, -0.3003]) tensor([0.5103, 0.1220, 0.2026, 0.1651]) -Greedy action tensor([ 0.6834, -0.2803, 0.1028, -0.4091]) tensor([0.4393, 0.1676, 0.2458, 0.1473]) -Greedy action tensor([ 0.8447, -0.3847, -0.0629, -0.4602]) tensor([0.5083, 0.1487, 0.2051, 0.1379]) -Greedy action tensor([ 0.5131, -0.0436, -0.0370, -0.4083]) tensor([0.3925, 0.2249, 0.2264, 0.1562]) -Greedy action tensor([ 0.8592, -0.4235, -0.0281, -0.4943]) tensor([0.5135, 0.1424, 0.2114, 0.1327]) -Greedy action tensor([ 0.6585, -0.3761, -0.1110, -0.3530]) tensor([0.4582, 0.1628, 0.2123, 0.1666]) -Greedy action tensor([ 0.6132, -0.4472, 0.0112, -0.4736]) tensor([0.4482, 0.1552, 0.2455, 0.1512]) -Greedy action tensor([ 1.1776, -0.9712, -0.0166, -0.7065]) tensor([0.6363, 0.0742, 0.1928, 0.0967]) -Greedy action tensor([ 0.5076, -0.3009, 0.0061, -0.1403]) tensor([0.3885, 0.1731, 0.2353, 0.2032]) -Greedy action tensor([ 0.4188, -0.1445, -0.1790, -0.3430]) tensor([0.3867, 0.2201, 0.2127, 0.1805]) -Greedy action tensor([ 0.5515, -0.2828, -0.0899, -0.3342]) tensor([0.4214, 0.1830, 0.2219, 0.1738]) -Greedy action tensor([ 0.3176, -0.3196, 0.0529, -0.5165]) tensor([0.3662, 0.1937, 0.2811, 0.1590]) -Greedy action tensor([ 0.5657, -0.1848, -0.0348, -0.1374]) tensor([0.3975, 0.1877, 0.2180, 0.1968]) -Greedy action tensor([ 0.6753, -0.4075, -0.1932, -0.4463]) tensor([0.4799, 0.1625, 0.2013, 0.1563]) -Greedy action tensor([ 0.8537, -0.4428, -0.0101, -0.6214]) tensor([0.5198, 0.1422, 0.2191, 0.1189]) -Greedy action tensor([ 0.8919, -0.4034, -0.0660, -0.3793]) tensor([0.5160, 0.1413, 0.1980, 0.1447]) -Greedy action tensor([ 1.1030, -0.5660, -0.0272, -0.6754]) tensor([0.5951, 0.1121, 0.1922, 0.1005]) -Greedy action tensor([ 0.4450, -0.0208, 0.0025, -0.2432]) tensor([0.3607, 0.2264, 0.2317, 0.1812]) -Greedy action tensor([ 0.7322, -0.5455, -0.0193, -0.1383]) tensor([0.4610, 0.1285, 0.2175, 0.1930]) -Greedy action tensor([ 0.6648, -0.2763, 0.0320, -0.4534]) tensor([0.4448, 0.1736, 0.2362, 0.1454]) -Greedy action tensor([ 0.9411, -0.7201, -0.1962, -0.6069]) tensor([0.5803, 0.1102, 0.1861, 0.1234]) -Greedy action tensor([ 0.9247, -0.3544, -0.0876, -0.6105]) tensor([0.5385, 0.1498, 0.1957, 0.1160]) -Greedy action tensor([ 0.7694, -0.4777, 0.2218, -0.4744]) tensor([0.4642, 0.1334, 0.2685, 0.1338]) -Greedy action tensor([ 0.4021, -0.0496, -0.0592, -0.3993]) tensor([0.3682, 0.2344, 0.2321, 0.1652]) -Greedy action tensor([ 0.6889, -0.4348, 0.1303, -0.4993]) tensor([0.4541, 0.1476, 0.2598, 0.1384]) -Greedy action tensor([ 0.5670, -0.1570, -0.0577, -0.2394]) tensor([0.4054, 0.1965, 0.2171, 0.1810]) -Greedy action tensor([ 0.3363, -0.1023, 0.0244, -0.1900]) tensor([0.3370, 0.2173, 0.2467, 0.1991]) -Greedy action tensor([ 1.1194, -0.6833, 0.0863, -0.4101]) tensor([0.5756, 0.0949, 0.2049, 0.1247]) -Greedy action tensor([ 0.2254, -0.0152, 0.2708, -0.2044]) tensor([0.2871, 0.2257, 0.3004, 0.1868]) -Greedy action tensor([ 0.4841, -0.0614, -0.0242, -0.3079]) tensor([0.3796, 0.2200, 0.2284, 0.1720]) -Greedy action tensor([ 0.5648, -0.1469, 0.0125, -0.1504]) tensor([0.3913, 0.1921, 0.2252, 0.1914]) -Greedy action tensor([ 0.5986, -0.2787, 0.1072, -0.3482]) tensor([0.4140, 0.1722, 0.2532, 0.1606]) -Greedy action tensor([ 1.3251, -1.3250, 0.0567, -0.4905]) tensor([0.6602, 0.0466, 0.1857, 0.1074]) -Greedy action tensor([ 0.3400, 0.0809, -0.0755, -0.0014]) tensor([0.3182, 0.2456, 0.2100, 0.2262]) -Greedy action tensor([ 0.3260, -0.0313, -0.1193, -0.1853]) tensor([0.3401, 0.2380, 0.2179, 0.2040]) -Greedy action tensor([ 0.6352, -0.2553, 0.1565, -0.1980]) tensor([0.4057, 0.1665, 0.2514, 0.1763]) -Greedy action tensor([ 0.6733, -0.3802, 0.0008, -0.3166]) tensor([0.4483, 0.1563, 0.2288, 0.1666]) -Greedy action tensor([ 0.7740, -0.1241, -0.0686, -0.1598]) tensor([0.4482, 0.1826, 0.1930, 0.1762]) -Greedy action tensor([ 0.3077, 0.0134, -0.1259, -0.1252]) tensor([0.3287, 0.2449, 0.2131, 0.2132]) -Greedy action tensor([ 0.5891, -0.3197, 0.0414, -0.5174]) tensor([0.4325, 0.1743, 0.2501, 0.1430]) -Greedy action tensor([ 0.1865, -0.0935, -0.1092, -0.3741]) tensor([0.3256, 0.2461, 0.2423, 0.1859]) -Greedy action tensor([ 0.7203, -0.4068, 0.1036, -0.5558]) tensor([0.4667, 0.1512, 0.2519, 0.1303]) -Greedy action tensor([ 0.3260, 0.1719, -0.1686, -0.2182]) tensor([0.3282, 0.2813, 0.2001, 0.1904]) -Greedy action tensor([ 0.5505, -0.2053, -0.0483, -0.3582]) tensor([0.4129, 0.1939, 0.2268, 0.1664]) -Greedy action tensor([ 0.2112, 0.1341, 0.0572, -0.2033]) tensor([0.2904, 0.2688, 0.2489, 0.1918]) -Greedy action tensor([ 0.8607, -0.3578, 0.0550, -0.3826]) tensor([0.4924, 0.1456, 0.2200, 0.1420]) -Greedy action tensor([ 1.0164, -0.5155, 0.0474, -0.6714]) tensor([0.5616, 0.1214, 0.2131, 0.1039]) -Greedy action tensor([ 0.2930, -0.0423, -0.1049, -0.1597]) tensor([0.3308, 0.2366, 0.2222, 0.2104]) -Greedy action tensor([ 0.5838, -0.1046, 0.2070, -0.3643]) tensor([0.3882, 0.1950, 0.2663, 0.1504]) -Greedy action tensor([ 0.3920, 0.1005, -0.0841, -0.1268]) tensor([0.3374, 0.2521, 0.2096, 0.2009]) -Greedy action tensor([ 0.8474, -0.7189, -0.0097, -0.2995]) tensor([0.5126, 0.1070, 0.2175, 0.1628]) -Greedy action tensor([-1.1615, -0.5735, 0.2741, 0.3272]) tensor([0.0875, 0.1575, 0.3675, 0.3876]) -Greedy action tensor([-1.6594, -0.5390, 0.5421, -0.0098]) tensor([0.0546, 0.1675, 0.4937, 0.2843]) -Greedy action tensor([-1.7709, -0.4384, 0.5780, -0.0861]) tensor([0.0484, 0.1835, 0.5071, 0.2610]) -Greedy action tensor([-1.1589, -0.7128, 0.3255, -0.0157]) tensor([0.0989, 0.1545, 0.4364, 0.3102]) -Greedy action tensor([-1.5714, -0.3865, 0.6990, 0.6285]) tensor([0.0435, 0.1423, 0.4214, 0.3927]) -Greedy action tensor([-1.3787, -0.3179, 1.1524, 1.1072]) tensor([0.0351, 0.1015, 0.4414, 0.4220]) -Greedy action tensor([-1.8708, -0.4652, 0.6270, -0.1215]) tensor([0.0435, 0.1774, 0.5289, 0.2502]) -Greedy action tensor([-0.0884, -0.0698, 1.0913, 1.6324]) tensor([0.0921, 0.0938, 0.2995, 0.5146]) -Greedy action tensor([-1.9447, -0.5435, 1.1575, 0.5460]) tensor([0.0254, 0.1031, 0.5650, 0.3065]) -Greedy action tensor([-0.7028, -0.4271, 0.9599, 1.5381]) tensor([0.0588, 0.0775, 0.3103, 0.5533]) -Greedy action tensor([-1.1643, -0.5282, 0.5141, 0.9784]) tensor([0.0596, 0.1127, 0.3195, 0.5082]) -Greedy action tensor([-1.5127, -0.3864, 0.5047, 0.2633]) tensor([0.0571, 0.1762, 0.4294, 0.3373]) -Greedy action tensor([-1.0688, -0.1831, 0.4152, 0.7418]) tensor([0.0717, 0.1738, 0.3162, 0.4383]) -Greedy action tensor([-1.4327, -0.5052, 0.3881, 0.2247]) tensor([0.0669, 0.1691, 0.4132, 0.3509]) -Greedy action tensor([-1.8821, -0.4312, 0.6722, -0.0775]) tensor([0.0413, 0.1763, 0.5314, 0.2511]) -Greedy action tensor([-1.9603, -0.5008, 1.1260, 0.4677]) tensor([0.0259, 0.1117, 0.5682, 0.2942]) -Greedy action tensor([-1.2660, -0.7445, 0.0337, -0.5625]) tensor([0.1194, 0.2012, 0.4381, 0.2413]) -Greedy action tensor([-1.6848, -0.2365, 0.5503, -0.1579]) tensor([0.0521, 0.2216, 0.4867, 0.2397]) -Greedy action tensor([-1.7833, -0.5006, 0.6131, -0.0492]) tensor([0.0471, 0.1697, 0.5168, 0.2665]) -Greedy action tensor([-1.6096, -0.4799, 0.4912, 0.0611]) tensor([0.0569, 0.1760, 0.4648, 0.3023]) -Greedy action tensor([-0.5914, -0.5704, 0.2278, -0.0915]) tensor([0.1684, 0.1720, 0.3820, 0.2776]) -Greedy action tensor([-1.8427, -0.6919, 0.0755, -0.3223]) tensor([0.0643, 0.2034, 0.4380, 0.2943]) -Greedy action tensor([-1.4634, -0.9449, 1.0329, 0.8909]) tensor([0.0394, 0.0663, 0.4789, 0.4154]) -Greedy action tensor([-1.1683, -0.1153, 0.2455, 0.7083]) tensor([0.0689, 0.1976, 0.2834, 0.4501]) -Greedy action tensor([-1.6410, -0.4229, 0.5271, -0.1085]) tensor([0.0563, 0.1904, 0.4924, 0.2608]) -Greedy action tensor([-1.0343, -0.6168, 0.2260, 0.3920]) tensor([0.0980, 0.1487, 0.3455, 0.4079]) -Greedy action tensor([-1.3839, -0.4789, 0.7940, -0.6285]) tensor([0.0693, 0.1713, 0.6118, 0.1475]) -Greedy action tensor([-1.0564, -0.5397, 0.2435, 0.3757]) tensor([0.0949, 0.1592, 0.3483, 0.3975]) -Greedy action tensor([-1.4644, -0.5610, 0.5278, 0.4097]) tensor([0.0578, 0.1425, 0.4234, 0.3763]) -Greedy action tensor([-1.4328, -0.0362, 0.3043, 0.1759]) tensor([0.0636, 0.2571, 0.3614, 0.3179]) -Greedy action tensor([-1.3478, -0.3241, 0.8187, 1.0308]) tensor([0.0429, 0.1195, 0.3746, 0.4631]) -Greedy action tensor([-1.8060, -0.5234, 0.9440, 0.4152]) tensor([0.0339, 0.1224, 0.5308, 0.3128]) -Greedy action tensor([-0.1735, -0.1425, 1.1000, 1.5902]) tensor([0.0874, 0.0902, 0.3124, 0.5100]) -Greedy action tensor([-1.1947, -0.5460, 0.3028, 0.3116]) tensor([0.0841, 0.1608, 0.3759, 0.3792]) -Greedy action tensor([-0.5466, -0.3593, 1.1831, 1.5831]) tensor([0.0615, 0.0742, 0.3469, 0.5174]) -Greedy action tensor([-1.3619, -0.6158, 0.4099, 0.0846]) tensor([0.0755, 0.1593, 0.4443, 0.3209]) -Greedy action tensor([-0.9873, -0.6689, 0.2292, 0.2459]) tensor([0.1089, 0.1497, 0.3676, 0.3738]) -Greedy action tensor([-1.6986, -0.4529, 0.6025, 0.0461]) tensor([0.0495, 0.1722, 0.4947, 0.2836]) -Greedy action tensor([-1.4661, -0.6013, 1.3661, 1.1886]) tensor([0.0289, 0.0687, 0.4912, 0.4112]) -Greedy action tensor([-1.1749, -0.5599, 0.2521, 0.4377]) tensor([0.0831, 0.1537, 0.3463, 0.4169]) -Greedy action tensor([-1.9007, -0.8999, 0.1786, -0.4561]) tensor([0.0627, 0.1705, 0.5012, 0.2657]) -Greedy action tensor([-1.4221, -0.5365, 0.3775, 0.1650]) tensor([0.0696, 0.1688, 0.4211, 0.3405]) -Greedy action tensor([-1.8775, -0.4605, 0.6276, -0.1412]) tensor([0.0434, 0.1790, 0.5313, 0.2463]) -Greedy action tensor([-2.0771, -0.7746, 1.1713, 0.5425]) tensor([0.0226, 0.0833, 0.5831, 0.3109]) -Greedy action tensor([-1.3399, -0.2832, 0.8136, 0.9732]) tensor([0.0443, 0.1273, 0.3813, 0.4472]) -Greedy action tensor([-0.4852, -0.5142, 0.1868, 0.1533]) tensor([0.1717, 0.1668, 0.3363, 0.3252]) -Greedy action tensor([-1.6100, -0.5320, 0.4925, 0.0788]) tensor([0.0570, 0.1676, 0.4668, 0.3086]) -Greedy action tensor([-1.0648, -0.6076, 0.2430, 0.3045]) tensor([0.0979, 0.1547, 0.3622, 0.3852]) -Greedy action tensor([-1.0687, -0.5912, 0.6680, 0.9013]) tensor([0.0647, 0.1043, 0.3673, 0.4638]) -Greedy action tensor([-1.8047, -0.5009, 0.6479, -0.0315]) tensor([0.0451, 0.1660, 0.5235, 0.2654]) -Greedy action tensor([-0.6916, -0.0759, 0.5373, 1.3915]) tensor([0.0699, 0.1295, 0.2390, 0.5616]) -Greedy action tensor([-0.7980, -0.4188, 1.2665, 1.5453]) tensor([0.0482, 0.0704, 0.3797, 0.5018]) -Greedy action tensor([-1.4114, -0.3732, 0.3355, 0.1723]) tensor([0.0693, 0.1957, 0.3975, 0.3376]) -Greedy action tensor([-0.7138, -0.2530, 0.3853, 1.0318]) tensor([0.0884, 0.1401, 0.2652, 0.5063]) -Greedy action tensor([-1.1631, -0.5885, 0.3056, 0.1568]) tensor([0.0921, 0.1635, 0.3998, 0.3446]) -Greedy action tensor([-1.8136, -0.4188, 0.6458, -0.2335]) tensor([0.0463, 0.1869, 0.5419, 0.2249]) -Greedy action tensor([-1.7744, -0.5002, 0.5816, -0.0584]) tensor([0.0483, 0.1729, 0.5099, 0.2689]) -Greedy action tensor([-2.0423, -0.7578, 1.1310, 0.3621]) tensor([0.0253, 0.0913, 0.6036, 0.2798]) -Greedy action tensor([-1.3822, -0.5727, 0.3932, 0.1999]) tensor([0.0714, 0.1603, 0.4212, 0.3472]) -Greedy action tensor([-1.4248, -0.4723, 0.6375, 0.5304]) tensor([0.0540, 0.1400, 0.4246, 0.3815]) -Greedy action tensor([-1.7015, -0.4905, 0.7241, 0.3259]) tensor([0.0430, 0.1443, 0.4862, 0.3265]) -Greedy action tensor([-1.6721, -0.4857, 0.5507, 0.0796]) tensor([0.0519, 0.1699, 0.4791, 0.2991]) -Greedy action tensor([-1.0907, 0.3217, 0.4598, 0.9426]) tensor([0.0573, 0.2352, 0.2700, 0.4376]) -Greedy action tensor([-0.4680, -0.3672, 0.2821, 0.6113]) tensor([0.1395, 0.1544, 0.2955, 0.4106]) -Greedy action tensor([-1.7358, -0.5061, 0.6099, 0.0325]) tensor([0.0483, 0.1650, 0.5039, 0.2828]) -Greedy action tensor([-1.6258, -0.6385, 0.7526, 0.5449]) tensor([0.0430, 0.1155, 0.4643, 0.3772]) -Greedy action tensor([-1.5835, -0.4393, 0.6584, 0.5942]) tensor([0.0447, 0.1403, 0.4206, 0.3944]) -Greedy action tensor([-1.3160, -0.5436, 0.3153, 0.2651]) tensor([0.0761, 0.1648, 0.3891, 0.3700]) -Greedy action tensor([-1.9834, -0.8430, 0.3268, -0.2556]) tensor([0.0504, 0.1577, 0.5081, 0.2838]) -Greedy action tensor([-1.5358, -0.3782, 0.8093, 0.7997]) tensor([0.0401, 0.1275, 0.4182, 0.4142]) -Greedy action tensor([-0.8179, -0.6073, 0.1782, 0.3412]) tensor([0.1230, 0.1518, 0.3331, 0.3921]) -Greedy action tensor([-1.8083, -0.4160, 0.5836, -0.1069]) tensor([0.0466, 0.1877, 0.5100, 0.2557]) -Greedy action tensor([-1.9191, -0.4482, 0.6524, -0.1696]) tensor([0.0413, 0.1800, 0.5409, 0.2378]) -Greedy action tensor([-1.7126, -0.2965, 0.6377, 0.2367]) tensor([0.0442, 0.1821, 0.4634, 0.3103]) -Greedy action tensor([-1.1650, -0.5619, 0.2419, 0.4493]) tensor([0.0838, 0.1531, 0.3421, 0.4210]) -Greedy action tensor([-1.4675, -0.5346, 0.4192, 0.2633]) tensor([0.0634, 0.1610, 0.4180, 0.3576]) -Greedy action tensor([-1.1518, -0.5097, 0.3376, 0.6320]) tensor([0.0753, 0.1430, 0.3338, 0.4480]) -Greedy action tensor([-1.3346, -0.2254, 0.5218, 0.7087]) tensor([0.0551, 0.1671, 0.3527, 0.4251]) -Greedy action tensor([-1.4114, -0.7689, 1.2169, 1.2354]) tensor([0.0324, 0.0616, 0.4488, 0.4572]) -Greedy action tensor([-0.3972, -0.0747, 0.9014, 1.4549]) tensor([0.0805, 0.1112, 0.2951, 0.5132]) -Greedy action tensor([-0.7476, -0.3066, 0.3838, -0.3075]) tensor([0.1388, 0.2157, 0.4301, 0.2155]) -Greedy action tensor([ 0.9486, -0.1679, -0.4672, 0.2167]) tensor([0.4875, 0.1596, 0.1183, 0.2345]) -Greedy action tensor([ 1.5946, -0.7132, -0.2629, 0.5699]) tensor([0.6194, 0.0616, 0.0967, 0.2223]) -Greedy action tensor([ 1.4919, -0.5142, -0.1930, 0.3958]) tensor([0.6045, 0.0813, 0.1121, 0.2020]) -Greedy action tensor([ 0.8745, -0.3063, -0.4235, -0.0747]) tensor([0.5083, 0.1561, 0.1388, 0.1968]) -Greedy action tensor([ 0.5189, -0.2626, 0.1438, -0.2006]) tensor([0.3799, 0.1739, 0.2611, 0.1850]) -Greedy action tensor([ 0.9904, -0.1505, -0.3483, 0.0660]) tensor([0.5054, 0.1615, 0.1325, 0.2005]) -Greedy action tensor([ 1.0623, -0.2685, -0.7454, 0.0930]) tensor([0.5532, 0.1462, 0.0907, 0.2099]) -Greedy action tensor([ 1.0967, -0.4269, -0.2613, 0.1736]) tensor([0.5341, 0.1164, 0.1373, 0.2122]) -Greedy action tensor([ 1.4387, -0.8800, -0.4376, 0.7123]) tensor([0.5763, 0.0567, 0.0883, 0.2787]) -Greedy action tensor([ 0.7519, -0.3485, 0.0797, -0.2379]) tensor([0.4515, 0.1502, 0.2305, 0.1678]) -Greedy action tensor([ 1.6927, -0.6988, -0.4710, 0.3951]) tensor([0.6759, 0.0618, 0.0777, 0.1846]) -Greedy action tensor([ 0.9520, -0.3864, -0.2516, 0.0934]) tensor([0.5035, 0.1320, 0.1511, 0.2134]) -Greedy action tensor([ 1.2698, -0.8076, -0.3818, 0.9535]) tensor([0.4888, 0.0612, 0.0937, 0.3563]) -Greedy action tensor([ 1.9705, -0.3885, -0.1847, 0.2164]) tensor([0.7228, 0.0683, 0.0838, 0.1251]) -Greedy action tensor([ 1.5918, -0.7053, -0.0357, 0.1107]) tensor([0.6560, 0.0660, 0.1289, 0.1492]) -Greedy action tensor([ 0.9084, -0.3535, -0.3247, -0.0392]) tensor([0.5096, 0.1443, 0.1485, 0.1976]) -Greedy action tensor([ 1.4978, -0.2617, -0.2701, 0.2726]) tensor([0.6111, 0.1052, 0.1043, 0.1795]) -Greedy action tensor([ 0.8891, -0.5720, -0.0668, -0.1325]) tensor([0.5059, 0.1174, 0.1945, 0.1822]) -Greedy action tensor([ 0.8746, -0.4772, -0.1122, -0.0121]) tensor([0.4893, 0.1266, 0.1824, 0.2016]) -Greedy action tensor([ 1.5971, -0.9194, -0.4405, 0.6050]) tensor([0.6322, 0.0510, 0.0824, 0.2344]) -Greedy action tensor([ 0.9352, -0.7007, -0.3988, 0.8303]) tensor([0.4240, 0.0826, 0.1117, 0.3817]) -Greedy action tensor([ 0.3844, -0.0343, -0.1053, -0.0902]) tensor([0.3457, 0.2274, 0.2118, 0.2151]) -Greedy action tensor([ 0.5645, -0.3355, -0.0949, 0.0774]) tensor([0.3940, 0.1602, 0.2038, 0.2421]) -Greedy action tensor([ 1.0668, -0.7160, -0.6235, 0.6428]) tensor([0.4982, 0.0838, 0.0919, 0.3261]) -Greedy action tensor([ 0.5068, -0.3191, -0.0875, -0.0684]) tensor([0.3918, 0.1715, 0.2162, 0.2204]) -Greedy action tensor([ 1.3842, -0.5803, -0.1980, 0.5339]) tensor([0.5640, 0.0791, 0.1159, 0.2410]) -Greedy action tensor([ 0.9543, -0.2282, -0.5763, 0.0864]) tensor([0.5147, 0.1578, 0.1114, 0.2161]) -Greedy action tensor([ 0.8806, -0.2630, 0.0222, 0.0178]) tensor([0.4620, 0.1472, 0.1958, 0.1949]) -Greedy action tensor([ 1.1344, -0.7724, -0.5153, 0.8167]) tensor([0.4834, 0.0718, 0.0929, 0.3519]) -Greedy action tensor([ 1.2576, -0.6802, -0.2505, 0.4208]) tensor([0.5560, 0.0801, 0.1231, 0.2408]) -Greedy action tensor([ 1.7012, -0.6658, -0.3921, 0.3113]) tensor([0.6821, 0.0639, 0.0841, 0.1699]) -Greedy action tensor([ 1.0323, -0.3971, -0.2206, 0.2284]) tensor([0.5069, 0.1214, 0.1448, 0.2269]) -Greedy action tensor([ 1.2174, -0.0104, -0.3259, 0.0428]) tensor([0.5508, 0.1614, 0.1177, 0.1702]) -Greedy action tensor([ 1.9605, -0.6811, -0.2896, 0.3646]) tensor([0.7250, 0.0517, 0.0764, 0.1470]) -Greedy action tensor([ 1.7021, -0.7203, -0.1551, 0.1228]) tensor([0.6892, 0.0611, 0.1076, 0.1421]) -Greedy action tensor([ 1.2473, -0.4278, -0.2317, 0.2168]) tensor([0.5643, 0.1057, 0.1286, 0.2014]) -Greedy action tensor([ 1.0623, -0.5446, -0.3384, 0.1947]) tensor([0.5357, 0.1074, 0.1320, 0.2249]) -Greedy action tensor([ 1.0183, -0.5432, -0.1004, 0.3893]) tensor([0.4832, 0.1014, 0.1579, 0.2576]) -Greedy action tensor([ 0.8551, -0.3520, -0.3826, 0.6805]) tensor([0.4117, 0.1231, 0.1194, 0.3458]) -Greedy action tensor([ 0.7141, -0.2602, -0.4092, 0.1596]) tensor([0.4392, 0.1658, 0.1428, 0.2522]) -Greedy action tensor([ 1.7440, -1.1343, -0.5992, 1.1514]) tensor([0.5865, 0.0330, 0.0563, 0.3242]) -Greedy action tensor([ 0.8494, 0.2302, 0.0249, -0.1378]) tensor([0.4256, 0.2291, 0.1866, 0.1586]) -Greedy action tensor([ 1.4076, -0.3235, -0.0040, 0.0465]) tensor([0.5962, 0.1056, 0.1453, 0.1528]) -Greedy action tensor([ 1.4592, -0.1782, -0.5204, 0.1430]) tensor([0.6247, 0.1215, 0.0863, 0.1675]) -Greedy action tensor([ 0.9762, -0.1569, -0.4249, 0.4466]) tensor([0.4635, 0.1493, 0.1142, 0.2730]) -Greedy action tensor([ 1.1443, -0.6997, -0.4244, 0.4546]) tensor([0.5353, 0.0847, 0.1115, 0.2686]) -Greedy action tensor([ 1.1455, -0.3739, -0.0667, 0.2364]) tensor([0.5210, 0.1140, 0.1550, 0.2099]) -Greedy action tensor([ 0.9180, -0.6287, -0.6704, 0.6914]) tensor([0.4516, 0.0962, 0.0922, 0.3600]) -Greedy action tensor([ 0.9941, -0.2088, -0.6090, -0.3023]) tensor([0.5633, 0.1692, 0.1134, 0.1541]) -Greedy action tensor([ 0.2274, -0.1450, -0.6708, 0.2932]) tensor([0.3160, 0.2178, 0.1287, 0.3375]) -Greedy action tensor([ 0.9498, -0.1333, -0.1015, 0.0674]) tensor([0.4758, 0.1611, 0.1663, 0.1969]) -Greedy action tensor([ 0.9218, -0.4430, 0.0856, 0.1485]) tensor([0.4651, 0.1188, 0.2015, 0.2146]) -Greedy action tensor([ 0.4851, -0.2445, -0.1425, 0.1482]) tensor([0.3663, 0.1766, 0.1956, 0.2615]) -Greedy action tensor([ 1.5647, -0.5247, -0.0608, 0.2497]) tensor([0.6293, 0.0779, 0.1239, 0.1690]) -Greedy action tensor([ 0.8250, -0.1040, -0.2551, -0.0311]) tensor([0.4631, 0.1829, 0.1572, 0.1967]) -Greedy action tensor([ 1.4259, -0.4333, -0.2489, 0.2128]) tensor([0.6096, 0.0950, 0.1142, 0.1812]) -Greedy action tensor([ 1.5323, -0.8255, -0.3458, 0.2068]) tensor([0.6609, 0.0625, 0.1010, 0.1756]) -Greedy action tensor([ 1.3465, -0.2368, -0.1792, -0.2473]) tensor([0.6150, 0.1263, 0.1338, 0.1249]) -Greedy action tensor([ 0.9788, -0.6521, -0.1656, -0.0421]) tensor([0.5335, 0.1044, 0.1699, 0.1922]) -Greedy action tensor([ 0.8855, -0.2397, -0.6780, 0.3372]) tensor([0.4735, 0.1537, 0.0991, 0.2736]) -Greedy action tensor([ 0.4142, -0.2046, -0.2657, 0.3171]) tensor([0.3387, 0.1824, 0.1716, 0.3073]) -Greedy action tensor([ 1.0921, -0.5673, -0.1685, 0.2049]) tensor([0.5303, 0.1009, 0.1503, 0.2184]) -Greedy action tensor([ 0.3399, -0.3303, -0.1610, 0.1120]) tensor([0.3432, 0.1756, 0.2080, 0.2732]) -Greedy action tensor([ 1.2562, -0.7078, -0.2702, 0.5730]) tensor([0.5369, 0.0753, 0.1167, 0.2711]) -Greedy action tensor([ 0.7978, -0.0584, -0.5161, -0.3025]) tensor([0.4935, 0.2096, 0.1326, 0.1642]) -Greedy action tensor([ 1.7101, -0.7740, -0.3647, 0.6036]) tensor([0.6495, 0.0542, 0.0816, 0.2148]) -Greedy action tensor([ 1.6230, -0.6832, -0.4315, 0.2648]) tensor([0.6734, 0.0671, 0.0863, 0.1732]) -Greedy action tensor([ 1.2249, -0.2514, -0.1700, 0.4069]) tensor([0.5215, 0.1191, 0.1293, 0.2301]) -Greedy action tensor([ 0.8167, -0.1001, -0.1371, 0.0352]) tensor([0.4459, 0.1783, 0.1718, 0.2041]) -Greedy action tensor([ 1.5637, -1.1223, -0.4039, 0.3332]) tensor([0.6666, 0.0454, 0.0932, 0.1948]) -Greedy action tensor([ 1.2364, 0.0070, -0.1179, 0.1819]) tensor([0.5266, 0.1540, 0.1359, 0.1834]) -Greedy action tensor([ 1.3195, -0.4601, -0.2876, 0.3670]) tensor([0.5698, 0.0961, 0.1142, 0.2198]) -Greedy action tensor([ 1.4637, -0.5540, -0.1510, 0.1836]) tensor([0.6212, 0.0826, 0.1236, 0.1727]) -Greedy action tensor([ 1.2622, -0.1084, -0.1117, 0.2066]) tensor([0.5391, 0.1369, 0.1364, 0.1876]) -Greedy action tensor([ 1.4132, -0.6040, -0.0253, 0.4103]) tensor([0.5757, 0.0766, 0.1366, 0.2112]) -Greedy action tensor([ 1.0968, -0.4109, -0.3245, 0.2360]) tensor([0.5303, 0.1174, 0.1280, 0.2242]) -Greedy action tensor([ 1.3155, -0.3042, 0.0013, 0.0933]) tensor([0.5678, 0.1124, 0.1526, 0.1673]) -Greedy action tensor([ 0.7473, -0.6211, -0.4579, 0.4198]) tensor([0.4396, 0.1119, 0.1317, 0.3168]) -Greedy action tensor([ 1.2874, -0.7965, -0.3093, 0.4668]) tensor([0.5659, 0.0704, 0.1146, 0.2491]) -Greedy action tensor([ 0.6663, -0.3586, -0.2524, -0.1835]) tensor([0.4576, 0.1642, 0.1826, 0.1956]) -Greedy action tensor([ 1.2466, -0.5290, -0.1051, 0.1515]) tensor([0.5673, 0.0961, 0.1468, 0.1898]) -Greedy action tensor([ 0.5564, -0.3713, 1.3169, 0.9284]) tensor([0.2006, 0.0793, 0.4291, 0.2910]) -Greedy action tensor([ 0.4852, 0.9226, 1.6413, -0.3397]) tensor([0.1622, 0.2512, 0.5155, 0.0711]) -Greedy action tensor([-0.4066, 0.2153, 1.0196, -0.5477]) tensor([0.1267, 0.2359, 0.5274, 0.1100]) -Greedy action tensor([ 0.4482, -0.6486, 0.7253, -0.3041]) tensor([0.3201, 0.1069, 0.4222, 0.1508]) -Greedy action tensor([ 0.6771, -0.8686, 0.5418, -0.0875]) tensor([0.3918, 0.0835, 0.3422, 0.1824]) -Greedy action tensor([ 1.0558, -0.3329, 1.1804, -0.1672]) tensor([0.3736, 0.0932, 0.4232, 0.1100]) -Greedy action tensor([0.6783, 0.4716, 0.8765, 0.1195]) tensor([0.2774, 0.2256, 0.3382, 0.1587]) -Greedy action tensor([-0.1719, 0.5105, 0.8196, -0.4355]) tensor([0.1552, 0.3071, 0.4184, 0.1193]) -Greedy action tensor([ 1.5163, -0.5202, -0.3672, 1.1691]) tensor([0.5027, 0.0656, 0.0764, 0.3553]) -Greedy action tensor([ 1.3387, -0.7385, 0.9069, 0.5721]) tensor([0.4466, 0.0559, 0.2900, 0.2075]) -Greedy action tensor([-0.3083, -1.9534, 0.5815, 1.4802]) tensor([0.1041, 0.0201, 0.2534, 0.6224]) -Greedy action tensor([ 0.6151, -0.5755, -0.3344, 1.7061]) tensor([0.2142, 0.0651, 0.0829, 0.6378]) -Greedy action tensor([-0.2687, -0.8747, -0.3658, 1.3818]) tensor([0.1305, 0.0712, 0.1184, 0.6799]) -Greedy action tensor([ 1.8271, -0.4511, -0.4049, 1.5867]) tensor([0.5010, 0.0513, 0.0538, 0.3939]) -Greedy action tensor([ 1.6021, -0.3520, 0.8675, 1.7916]) tensor([0.3534, 0.0501, 0.1695, 0.4271]) -Greedy action tensor([-0.1404, -0.4484, 0.9065, 0.8320]) tensor([0.1384, 0.1017, 0.3942, 0.3658]) -Greedy action tensor([ 0.6635, -0.2332, 0.9568, 0.3727]) tensor([0.2860, 0.1167, 0.3835, 0.2138]) -Greedy action tensor([0.6551, 0.0683, 1.3522, 0.4951]) tensor([0.2264, 0.1259, 0.4547, 0.1930]) -Greedy action tensor([ 1.0846, -0.6435, 0.8323, 0.0013]) tensor([0.4361, 0.0775, 0.3388, 0.1476]) -Greedy action tensor([ 0.3677, -1.2633, 0.9150, -0.7523]) tensor([0.3076, 0.0602, 0.5318, 0.1004]) -Greedy action tensor([-0.1220, -1.5951, -0.7316, 0.6916]) tensor([0.2482, 0.0569, 0.1349, 0.5600]) -Greedy action tensor([0.0454, 0.6784, 0.6673, 1.1559]) tensor([0.1285, 0.2420, 0.2393, 0.3901]) -Greedy action tensor([ 1.0784, 0.2761, -0.6329, 0.2877]) tensor([0.4802, 0.2153, 0.0867, 0.2178]) -Greedy action tensor([-0.1659, -0.4739, 0.5305, -0.3496]) tensor([0.2186, 0.1607, 0.4387, 0.1820]) -Greedy action tensor([ 0.6966, -1.5053, -0.8276, 1.0162]) tensor([0.3697, 0.0409, 0.0805, 0.5089]) -Greedy action tensor([ 0.3868, 0.5921, 1.7441, -0.2172]) tensor([0.1501, 0.1844, 0.5834, 0.0821]) -Greedy action tensor([ 0.7246, -0.4621, -1.2121, 1.4761]) tensor([0.2801, 0.0855, 0.0404, 0.5940]) -Greedy action tensor([ 1.4848, -0.3676, 0.7322, 0.8470]) tensor([0.4637, 0.0727, 0.2185, 0.2451]) -Greedy action tensor([ 0.4186, -1.6028, 0.3839, 0.6278]) tensor([0.3002, 0.0398, 0.2900, 0.3701]) -Greedy action tensor([-0.0303, -0.3334, 0.8965, -0.2714]) tensor([0.1980, 0.1462, 0.5002, 0.1556]) -Greedy action tensor([ 1.3146, 0.7338, -0.5932, 0.4207]) tensor([0.4724, 0.2643, 0.0701, 0.1932]) -Greedy action tensor([ 1.3251, -0.3542, -0.1199, 0.4594]) tensor([0.5426, 0.1012, 0.1279, 0.2283]) -Greedy action tensor([ 0.8091, -0.4144, 0.8005, 1.1819]) tensor([0.2676, 0.0787, 0.2653, 0.3885]) -Greedy action tensor([0.2039, 0.2185, 0.9580, 0.8984]) tensor([0.1628, 0.1652, 0.3460, 0.3260]) -Greedy action tensor([0.2363, 0.5714, 0.0356, 1.6132]) tensor([0.1393, 0.1948, 0.1140, 0.5520]) -Greedy action tensor([ 1.1777, -0.5623, -0.0956, 1.4067]) tensor([0.3686, 0.0647, 0.1032, 0.4635]) -Greedy action tensor([0.4934, 0.3880, 0.4077, 0.1431]) tensor([0.2839, 0.2555, 0.2606, 0.2000]) -Greedy action tensor([ 1.3068, -0.6025, 0.9821, 1.8269]) tensor([0.2814, 0.0417, 0.2034, 0.4734]) -Greedy action tensor([ 1.2303, -0.5276, 0.9204, 0.7198]) tensor([0.3990, 0.0688, 0.2927, 0.2395]) -Greedy action tensor([ 1.6391, -1.0039, 0.7330, 1.8696]) tensor([0.3657, 0.0260, 0.1478, 0.4605]) -Greedy action tensor([ 0.7146, -0.8933, -0.0828, -0.5518]) tensor([0.5174, 0.1036, 0.2331, 0.1458]) -Greedy action tensor([1.3230, 0.3245, 0.4003, 0.7383]) tensor([0.4304, 0.1586, 0.1711, 0.2399]) -Greedy action tensor([ 0.8804, -0.4897, 1.3841, 0.0045]) tensor([0.3007, 0.0764, 0.4976, 0.1252]) -Greedy action tensor([ 0.1792, -0.2055, 0.4533, 0.0334]) tensor([0.2590, 0.1763, 0.3407, 0.2239]) -Greedy action tensor([ 1.0109, -0.1416, -0.5113, 1.0720]) tensor([0.3851, 0.1216, 0.0840, 0.4093]) -Greedy action tensor([ 0.7966, 0.2812, -0.3456, 0.7082]) tensor([0.3531, 0.2109, 0.1127, 0.3233]) -Greedy action tensor([-0.7812, 0.5910, 0.8126, -0.0936]) tensor([0.0843, 0.3327, 0.4152, 0.1678]) -Greedy action tensor([0.3831, 0.2997, 0.5941, 1.0426]) tensor([0.1965, 0.1808, 0.2427, 0.3800]) -Greedy action tensor([ 1.0248, -0.9940, 1.0122, 0.4935]) tensor([0.3693, 0.0490, 0.3646, 0.2171]) -Greedy action tensor([ 1.8522, -1.7143, -0.4887, 2.0775]) tensor([0.4207, 0.0119, 0.0405, 0.5270]) -Greedy action tensor([ 0.7210, 0.0778, -0.4864, 1.2133]) tensor([0.2890, 0.1519, 0.0864, 0.4728]) -Greedy action tensor([ 0.0222, -0.7816, -0.2274, 0.1310]) tensor([0.2992, 0.1340, 0.2331, 0.3337]) -Greedy action tensor([0.3456, 1.1109, 0.3392, 0.4667]) tensor([0.1897, 0.4078, 0.1885, 0.2141]) -Greedy action tensor([-0.9304, -2.7292, -0.4185, -0.1269]) tensor([0.1973, 0.0327, 0.3293, 0.4407]) -Greedy action tensor([ 1.1084, 0.2727, -0.6243, 1.4086]) tensor([0.3378, 0.1465, 0.0597, 0.4560]) -Greedy action tensor([-1.0750, 0.0350, 0.8856, -0.5501]) tensor([0.0780, 0.2365, 0.5538, 0.1318]) -Greedy action tensor([0.1237, 0.5346, 0.5942, 0.1728]) tensor([0.1938, 0.2923, 0.3103, 0.2036]) -Greedy action tensor([ 2.1676, -1.5437, 1.5387, 1.0181]) tensor([0.5335, 0.0130, 0.2844, 0.1690]) -Greedy action tensor([ 0.5160, -1.3224, -0.8271, 1.3716]) tensor([0.2651, 0.0422, 0.0692, 0.6236]) -Greedy action tensor([ 0.5019, -0.5764, 1.1411, 0.8572]) tensor([0.2145, 0.0730, 0.4065, 0.3060]) -Greedy action tensor([ 0.9300, 0.4695, -0.9699, 0.6562]) tensor([0.3935, 0.2483, 0.0589, 0.2993]) -Greedy action tensor([0.2845, 0.4656, 0.2249, 0.2954]) tensor([0.2409, 0.2887, 0.2269, 0.2435]) -Greedy action tensor([ 1.0299, -1.1315, -0.4194, 0.2876]) tensor([0.5477, 0.0631, 0.1286, 0.2607]) -Greedy action tensor([ 0.4519, -0.2341, -0.4452, 0.4777]) tensor([0.3404, 0.1714, 0.1388, 0.3493]) -Greedy action tensor([-0.1810, -0.6103, 0.4151, 0.2006]) tensor([0.2028, 0.1320, 0.3681, 0.2970]) -Greedy action tensor([ 0.7588, -0.0583, -0.9705, 1.2134]) tensor([0.3130, 0.1383, 0.0555, 0.4932]) -Greedy action tensor([-0.8728, -0.0639, -0.1622, 0.2662]) tensor([0.1190, 0.2672, 0.2422, 0.3717]) -Greedy action tensor([-0.6994, -0.5741, 0.9684, 0.2885]) tensor([0.0988, 0.1120, 0.5238, 0.2654]) -Greedy action tensor([ 0.5444, -0.8442, 1.0160, 0.9515]) tensor([0.2297, 0.0573, 0.3680, 0.3450]) -Greedy action tensor([-0.9659, 0.9533, 0.4579, -0.0946]) tensor([0.0696, 0.4747, 0.2892, 0.1665]) -Greedy action tensor([ 0.2504, -0.2364, 0.9468, 1.1202]) tensor([0.1665, 0.1023, 0.3340, 0.3972]) -Greedy action tensor([ 0.4911, -0.4107, -0.1119, 1.0257]) tensor([0.2732, 0.1109, 0.1495, 0.4664]) -Greedy action tensor([ 0.9543, -0.3985, 0.0436, 1.7610]) tensor([0.2563, 0.0663, 0.1031, 0.5743]) -Greedy action tensor([ 0.7637, -0.3595, -0.9970, 1.6029]) tensor([0.2624, 0.0853, 0.0451, 0.6072]) -Greedy action tensor([ 0.0730, -1.2772, 1.9022, 1.0000]) tensor([0.0998, 0.0259, 0.6219, 0.2523]) -Greedy action tensor([0.9348, 0.9549, 0.1717, 0.6234]) tensor([0.3107, 0.3170, 0.1448, 0.2275]) -Greedy action tensor([-0.3041, 0.6779, 0.2921, -0.0041]) tensor([0.1463, 0.3906, 0.2656, 0.1975]) -Greedy action tensor([-0.0727, -0.9043, 0.9065, -0.0889]) tensor([0.1968, 0.0857, 0.5239, 0.1936]) -Greedy action tensor([ 1.5395, -2.1872, -0.1536, 0.4772]) tensor([0.6436, 0.0155, 0.1184, 0.2225]) -Greedy action tensor([ 0.9242, 0.7444, -0.3273, 0.6977]) tensor([0.3426, 0.2862, 0.0980, 0.2732]) -Greedy action tensor([-0.9340, 0.1969, -0.7630, 0.4684]) tensor([0.1070, 0.3314, 0.1269, 0.4347]) -Greedy action tensor([-0.9631, -0.6615, 1.9401, 0.0138]) tensor([0.0430, 0.0582, 0.7845, 0.1143]) -Greedy action tensor([ 0.5858, -0.2921, -0.0418, -0.1875]) tensor([0.4148, 0.1724, 0.2214, 0.1914]) -Greedy action tensor([ 0.5852, -0.3059, -0.0392, -0.5291]) tensor([0.4398, 0.1804, 0.2355, 0.1443]) -Greedy action tensor([ 0.8700, -0.7003, -0.0104, -0.3088]) tensor([0.5181, 0.1078, 0.2148, 0.1594]) -Greedy action tensor([ 0.4326, -0.1284, -0.0595, -0.2009]) tensor([0.3686, 0.2104, 0.2254, 0.1956]) -Greedy action tensor([ 0.3041, 0.0346, 0.0069, -0.3934]) tensor([0.3328, 0.2542, 0.2473, 0.1657]) -Greedy action tensor([ 0.5584, -0.1651, 0.0849, -0.7337]) tensor([0.4197, 0.2036, 0.2614, 0.1153]) -Greedy action tensor([ 0.6894, -0.1894, 0.0475, -0.6931]) tensor([0.4561, 0.1894, 0.2401, 0.1145]) -Greedy action tensor([ 0.3523, 0.0840, 0.0179, -0.2246]) tensor([0.3287, 0.2514, 0.2353, 0.1846]) -Greedy action tensor([ 0.7057, -0.6217, -0.0239, -0.3082]) tensor([0.4739, 0.1257, 0.2285, 0.1719]) -Greedy action tensor([ 0.6693, -0.5157, 0.0102, -0.3759]) tensor([0.4598, 0.1406, 0.2379, 0.1617]) -Greedy action tensor([ 0.7735, -0.4956, 0.0974, -0.6380]) tensor([0.4918, 0.1382, 0.2501, 0.1199]) -Greedy action tensor([ 0.4971, -0.0962, -0.0427, -0.1268]) tensor([0.3744, 0.2068, 0.2182, 0.2006]) -Greedy action tensor([ 0.4779, -0.2390, -0.0971, -0.2974]) tensor([0.3982, 0.1944, 0.2240, 0.1834]) -Greedy action tensor([ 0.7141, -0.7278, 0.0076, -0.6788]) tensor([0.5055, 0.1195, 0.2494, 0.1255]) -Greedy action tensor([ 0.4013, -0.1248, -0.0580, -0.4272]) tensor([0.3760, 0.2222, 0.2376, 0.1642]) -Greedy action tensor([ 0.6990, -0.3104, 0.0107, -0.2502]) tensor([0.4437, 0.1617, 0.2229, 0.1717]) -Greedy action tensor([ 0.6259, -0.4103, -0.1575, -0.0612]) tensor([0.4320, 0.1533, 0.1974, 0.2173]) -Greedy action tensor([ 0.4224, -0.1813, -0.0172, -0.2319]) tensor([0.3689, 0.2017, 0.2377, 0.1917]) -Greedy action tensor([ 0.2241, -0.0083, -0.0756, -0.2600]) tensor([0.3175, 0.2516, 0.2352, 0.1956]) -Greedy action tensor([ 0.2932, 0.3419, -0.0833, 0.1094]) tensor([0.2802, 0.2942, 0.1923, 0.2332]) -Greedy action tensor([ 0.5540, -0.0497, -0.0058, -0.1895]) tensor([0.3856, 0.2108, 0.2203, 0.1833]) -Greedy action tensor([ 0.5191, -0.1656, -0.1937, -0.3776]) tensor([0.4162, 0.2099, 0.2041, 0.1698]) -Greedy action tensor([ 0.7904, -0.4084, -0.0982, -0.2104]) tensor([0.4807, 0.1450, 0.1977, 0.1767]) -Greedy action tensor([ 0.9272, -0.6461, -0.0965, -0.5463]) tensor([0.5569, 0.1155, 0.2001, 0.1276]) -Greedy action tensor([ 0.3827, -0.3377, -0.0273, -0.4183]) tensor([0.3847, 0.1872, 0.2553, 0.1727]) -Greedy action tensor([ 0.8286, -0.5845, 0.1210, -0.3799]) tensor([0.4914, 0.1196, 0.2422, 0.1468]) -Greedy action tensor([ 0.9583, -0.4974, 0.0273, -0.3868]) tensor([0.5297, 0.1235, 0.2088, 0.1380]) -Greedy action tensor([ 0.4030, -0.0701, -0.0846, -0.1273]) tensor([0.3539, 0.2205, 0.2173, 0.2083]) -Greedy action tensor([ 0.4067, -0.0088, -0.0761, -0.1750]) tensor([0.3526, 0.2327, 0.2176, 0.1971]) -Greedy action tensor([ 0.3076, -0.1592, -0.0550, -0.5089]) tensor([0.3617, 0.2268, 0.2517, 0.1599]) -Greedy action tensor([ 0.6733, -0.3710, -0.0123, -0.2475]) tensor([0.4437, 0.1561, 0.2235, 0.1767]) -Greedy action tensor([ 0.3819, 0.2024, -0.1609, -0.1145]) tensor([0.3305, 0.2762, 0.1921, 0.2012]) -Greedy action tensor([ 0.3212, -0.0910, 0.0807, -0.1790]) tensor([0.3274, 0.2168, 0.2574, 0.1985]) -Greedy action tensor([ 0.6078, -0.1988, -0.0232, -0.2434]) tensor([0.4158, 0.1856, 0.2212, 0.1775]) -Greedy action tensor([ 0.6229, -0.3416, -0.0561, -0.2636]) tensor([0.4347, 0.1657, 0.2204, 0.1791]) -Greedy action tensor([ 1.0495, -0.8320, 0.0321, -0.5272]) tensor([0.5812, 0.0886, 0.2101, 0.1201]) -Greedy action tensor([ 0.3106, 0.0258, -0.0497, -0.3905]) tensor([0.3395, 0.2553, 0.2368, 0.1684]) -Greedy action tensor([ 0.3453, -0.1087, -0.1218, -0.2426]) tensor([0.3550, 0.2254, 0.2225, 0.1972]) -Greedy action tensor([ 0.4113, 0.1191, -0.0031, -0.0928]) tensor([0.3321, 0.2479, 0.2194, 0.2006]) -Greedy action tensor([ 0.9240, -0.2494, -0.1306, -0.5086]) tensor([0.5273, 0.1631, 0.1837, 0.1259]) -Greedy action tensor([ 0.4818, -0.0316, -0.0532, -0.1621]) tensor([0.3691, 0.2209, 0.2162, 0.1939]) -Greedy action tensor([ 0.7637, -0.4719, 0.0446, -0.3937]) tensor([0.4780, 0.1389, 0.2329, 0.1502]) -Greedy action tensor([ 0.6166, -0.4631, -0.0305, -0.2905]) tensor([0.4411, 0.1498, 0.2310, 0.1781]) -Greedy action tensor([ 0.1940, 0.2296, -0.1652, -0.1907]) tensor([0.2928, 0.3034, 0.2044, 0.1993]) -Greedy action tensor([ 0.8875, -0.5387, -0.1338, -0.4056]) tensor([0.5334, 0.1281, 0.1921, 0.1464]) -Greedy action tensor([ 0.7231, -0.3897, -0.0429, -0.4254]) tensor([0.4738, 0.1557, 0.2202, 0.1502]) -Greedy action tensor([ 0.6734, -0.3285, -0.1229, -0.2278]) tensor([0.4496, 0.1651, 0.2028, 0.1826]) -Greedy action tensor([ 0.5134, 0.3178, -0.1245, -0.3662]) tensor([0.3616, 0.2973, 0.1910, 0.1500]) -Greedy action tensor([ 0.1528, 0.1685, -0.0120, 0.0844]) tensor([0.2633, 0.2675, 0.2233, 0.2459]) -Greedy action tensor([ 0.4179, 0.1457, -0.0936, -0.0427]) tensor([0.3342, 0.2546, 0.2004, 0.2108]) -Greedy action tensor([ 0.4758, -0.0736, -0.2057, -0.1097]) tensor([0.3788, 0.2187, 0.1916, 0.2109]) -Greedy action tensor([ 0.4515, -0.2942, -0.1893, -0.2743]) tensor([0.4024, 0.1909, 0.2120, 0.1947]) -Greedy action tensor([ 0.5014, -0.1670, 0.0526, -0.2865]) tensor([0.3838, 0.1967, 0.2450, 0.1745]) -Greedy action tensor([ 0.3629, -0.0911, 0.0246, -0.1799]) tensor([0.3414, 0.2168, 0.2434, 0.1984]) -Greedy action tensor([ 0.5177, -0.0138, -0.1242, -0.3224]) tensor([0.3928, 0.2309, 0.2067, 0.1696]) -Greedy action tensor([ 0.8618, -0.3987, 0.0271, -0.3479]) tensor([0.4961, 0.1406, 0.2153, 0.1480]) -Greedy action tensor([ 0.8596, -0.4865, -0.1353, -0.3131]) tensor([0.5156, 0.1342, 0.1906, 0.1596]) -Greedy action tensor([ 0.6110, -0.3805, -0.0198, -0.4348]) tensor([0.4436, 0.1646, 0.2360, 0.1559]) -Greedy action tensor([ 0.6055, 0.0681, -0.2228, 0.0078]) tensor([0.3889, 0.2272, 0.1699, 0.2139]) -Greedy action tensor([ 0.7459, -0.5011, -0.0676, -0.2959]) tensor([0.4800, 0.1379, 0.2128, 0.1693]) -Greedy action tensor([ 0.2995, -0.0931, -0.0124, -0.2201]) tensor([0.3331, 0.2249, 0.2439, 0.1981]) -Greedy action tensor([ 1.0999, -1.1671, -0.0951, -0.7045]) tensor([0.6366, 0.0660, 0.1927, 0.1048]) -Greedy action tensor([ 0.4271, -0.1622, 0.0614, -0.1375]) tensor([0.3550, 0.1969, 0.2463, 0.2018]) -Greedy action tensor([ 0.5698, -0.2226, -0.0813, -0.1914]) tensor([0.4096, 0.1854, 0.2136, 0.1913]) -Greedy action tensor([ 0.5708, -0.0792, 0.1149, -0.3818]) tensor([0.3934, 0.2054, 0.2494, 0.1518]) -Greedy action tensor([ 0.3890, -0.1614, -0.1583, -0.3206]) tensor([0.3778, 0.2179, 0.2185, 0.1858]) -Greedy action tensor([ 0.4757, -0.0997, -0.1109, -0.2010]) tensor([0.3807, 0.2141, 0.2117, 0.1935]) -Greedy action tensor([ 1.1086, -0.7431, 0.1272, -0.6163]) tensor([0.5848, 0.0918, 0.2192, 0.1042]) -Greedy action tensor([ 0.4250, -0.3131, -0.0246, -0.2985]) tensor([0.3845, 0.1838, 0.2452, 0.1865]) -Greedy action tensor([ 0.4582, 0.1106, 0.0518, -0.1671]) tensor([0.3439, 0.2429, 0.2291, 0.1840]) -Greedy action tensor([ 0.6347, -0.1795, 0.0198, -0.1250]) tensor([0.4079, 0.1807, 0.2206, 0.1908]) -Greedy action tensor([-0.0569, 0.0348, -0.0931, -0.0751]) tensor([0.2474, 0.2711, 0.2386, 0.2429]) -Greedy action tensor([ 0.4835, -0.1570, 0.0021, -0.2574]) tensor([0.3814, 0.2010, 0.2357, 0.1818]) -Greedy action tensor([ 0.9061, -0.3610, 0.0055, -0.2416]) tensor([0.4987, 0.1404, 0.2026, 0.1583]) -Greedy action tensor([ 0.5961, -0.3568, 0.0328, -0.1756]) tensor([0.4137, 0.1595, 0.2355, 0.1912]) -Greedy action tensor([ 0.6480, -0.1143, 0.0079, -0.3298]) tensor([0.4219, 0.1969, 0.2225, 0.1587]) -Greedy action tensor([ 0.5620, 0.0886, -0.1417, -0.1693]) tensor([0.3848, 0.2397, 0.1904, 0.1852]) -Greedy action tensor([ 0.4287, 0.0195, -0.1185, 0.0522]) tensor([0.3414, 0.2268, 0.1975, 0.2343]) -Greedy action tensor([ 0.2876, 0.1492, -0.1115, -0.1062]) tensor([0.3109, 0.2707, 0.2086, 0.2097]) -Greedy action tensor([ 0.4737, -0.1239, 0.0601, -0.2958]) tensor([0.3739, 0.2057, 0.2472, 0.1732]) -Greedy action tensor([ 0.8222, -0.5976, -0.0288, -0.5503]) tensor([0.5202, 0.1258, 0.2221, 0.1319]) -Greedy action tensor([ 0.6560, -0.0681, -0.2140, -0.0351]) tensor([0.4158, 0.2016, 0.1742, 0.2084]) -Greedy action tensor([ 1.8601, -0.5344, -0.3112, 0.3166]) tensor([0.7048, 0.0643, 0.0804, 0.1506]) -Greedy action tensor([ 1.7240, -0.9580, -0.6460, 0.7068]) tensor([0.6564, 0.0449, 0.0614, 0.2374]) -Greedy action tensor([ 1.3808, -0.5703, -0.3228, -0.0491]) tensor([0.6396, 0.0909, 0.1164, 0.1531]) -Greedy action tensor([ 0.8746, -0.1448, 0.0323, -0.2320]) tensor([0.4712, 0.1700, 0.2030, 0.1558]) -Greedy action tensor([ 1.6682, -0.6720, -0.4107, 0.1290]) tensor([0.6964, 0.0671, 0.0871, 0.1494]) -Greedy action tensor([ 2.2663, -1.2210, -0.3950, 0.6875]) tensor([0.7653, 0.0234, 0.0535, 0.1578]) -Greedy action tensor([ 0.5261, -0.3450, 0.0690, -0.0047]) tensor([0.3788, 0.1585, 0.2398, 0.2228]) -Greedy action tensor([ 1.6573, -0.2434, -0.4839, 0.1927]) tensor([0.6675, 0.0998, 0.0784, 0.1543]) -Greedy action tensor([ 1.2131, -0.5277, -0.0722, -0.2052]) tensor([0.5903, 0.1035, 0.1633, 0.1429]) -Greedy action tensor([ 1.0137, -0.3837, -0.2134, 0.1589]) tensor([0.5087, 0.1258, 0.1491, 0.2164]) -Greedy action tensor([ 0.7931, -0.2771, -0.2572, 0.1872]) tensor([0.4468, 0.1532, 0.1563, 0.2437]) -Greedy action tensor([ 1.2916, -0.5154, -0.3677, 0.2176]) tensor([0.5896, 0.0968, 0.1122, 0.2014]) -Greedy action tensor([ 1.3084, -0.2343, 0.2801, 0.1153]) tensor([0.5334, 0.1140, 0.1908, 0.1618]) -Greedy action tensor([ 1.4492, -0.3525, -0.2553, 0.2633]) tensor([0.6052, 0.0999, 0.1101, 0.1849]) -Greedy action tensor([ 2.0901, -1.3103, -0.4207, 0.6533]) tensor([0.7395, 0.0247, 0.0601, 0.1758]) -Greedy action tensor([ 0.9495, -0.7251, -0.0483, 0.0722]) tensor([0.5071, 0.0950, 0.1870, 0.2109]) -Greedy action tensor([ 0.6016, -0.3285, -0.2630, 0.4014]) tensor([0.3796, 0.1498, 0.1599, 0.3107]) -Greedy action tensor([ 1.0517, -0.4687, -0.0014, 0.0845]) tensor([0.5134, 0.1123, 0.1791, 0.1952]) -Greedy action tensor([ 0.9329, -0.3122, -0.2498, 0.1324]) tensor([0.4894, 0.1409, 0.1500, 0.2198]) -Greedy action tensor([ 1.3376, -0.5419, -0.1805, 0.3334]) tensor([0.5753, 0.0878, 0.1261, 0.2108]) -Greedy action tensor([ 1.5982, -0.5665, -0.2287, -0.0749]) tensor([0.6833, 0.0784, 0.1100, 0.1282]) -Greedy action tensor([ 1.7596, -0.8362, -0.2334, 0.6628]) tensor([0.6473, 0.0483, 0.0882, 0.2162]) -Greedy action tensor([1.3125, 0.1671, 0.0391, 0.0566]) tensor([0.5311, 0.1690, 0.1487, 0.1513]) -Greedy action tensor([ 1.1485, -0.4247, 0.0255, -0.1128]) tensor([0.5507, 0.1142, 0.1791, 0.1560]) -Greedy action tensor([ 1.7583, -0.8766, -0.2556, 0.6004]) tensor([0.6582, 0.0472, 0.0878, 0.2068]) -Greedy action tensor([ 1.4381, -0.7279, -0.2617, 0.0941]) tensor([0.6418, 0.0736, 0.1173, 0.1674]) -Greedy action tensor([ 1.5745, -0.5252, -0.3013, 0.1368]) tensor([0.6609, 0.0810, 0.1013, 0.1569]) -Greedy action tensor([ 0.8474, -0.2555, -0.1947, -0.3244]) tensor([0.5014, 0.1664, 0.1768, 0.1553]) -Greedy action tensor([ 0.6575, -0.1143, -0.0209, -0.1630]) tensor([0.4150, 0.1918, 0.2106, 0.1827]) -Greedy action tensor([ 0.8736, 0.0216, -0.4533, 0.4003]) tensor([0.4320, 0.1843, 0.1146, 0.2691]) -Greedy action tensor([ 0.7990, -0.0774, -0.1777, -0.3072]) tensor([0.4709, 0.1960, 0.1773, 0.1558]) -Greedy action tensor([ 1.2957, -0.3892, -0.1496, 0.1804]) tensor([0.5718, 0.1060, 0.1348, 0.1874]) -Greedy action tensor([ 1.5363, -0.6127, -0.2982, 0.2790]) tensor([0.6407, 0.0747, 0.1023, 0.1822]) -Greedy action tensor([ 0.6713, -0.2136, -0.3133, -0.2524]) tensor([0.4580, 0.1890, 0.1711, 0.1818]) -Greedy action tensor([ 0.7611, -0.3830, -0.6504, 0.6719]) tensor([0.4037, 0.1286, 0.0984, 0.3693]) -Greedy action tensor([ 0.9049, -0.2858, -0.1917, 0.1211]) tensor([0.4774, 0.1451, 0.1594, 0.2180]) -Greedy action tensor([ 1.5464, -0.5112, 0.0230, 0.0911]) tensor([0.6333, 0.0809, 0.1380, 0.1478]) -Greedy action tensor([ 0.7646, -0.3835, -0.4898, 0.6578]) tensor([0.3998, 0.1268, 0.1140, 0.3593]) -Greedy action tensor([ 0.6654, -0.3827, 0.2218, -0.0853]) tensor([0.4058, 0.1423, 0.2604, 0.1915]) -Greedy action tensor([ 0.8475, -0.2870, -0.6986, 0.5344]) tensor([0.4413, 0.1419, 0.0940, 0.3227]) -Greedy action tensor([ 0.9186, -0.0836, -0.2464, -0.0543]) tensor([0.4862, 0.1785, 0.1516, 0.1838]) -Greedy action tensor([ 1.0418, -0.7685, -0.1895, 0.3780]) tensor([0.5075, 0.0830, 0.1482, 0.2613]) -Greedy action tensor([ 1.0288, -0.0944, -0.1468, -0.3687]) tensor([0.5316, 0.1729, 0.1641, 0.1314]) -Greedy action tensor([ 0.7980, 0.0677, 0.1600, -0.3825]) tensor([0.4315, 0.2079, 0.2280, 0.1325]) -Greedy action tensor([ 0.6030, -0.1539, -0.1390, 0.0738]) tensor([0.3946, 0.1851, 0.1879, 0.2324]) -Greedy action tensor([ 1.2976, -0.4455, -0.3612, 0.1281]) tensor([0.5967, 0.1044, 0.1136, 0.1853]) -Greedy action tensor([ 0.7625, -0.0607, 0.0694, 0.0126]) tensor([0.4147, 0.1821, 0.2073, 0.1959]) -Greedy action tensor([ 0.9489, -0.2392, -0.1689, 0.0825]) tensor([0.4873, 0.1485, 0.1593, 0.2049]) -Greedy action tensor([ 1.1259, -0.5864, -0.0360, -0.1262]) tensor([0.5620, 0.1014, 0.1759, 0.1607]) -Greedy action tensor([ 1.2594, -0.3934, -0.1877, 0.1602]) tensor([0.5682, 0.1088, 0.1337, 0.1893]) -Greedy action tensor([ 1.0009, -0.5645, 0.0639, -0.1732]) tensor([0.5236, 0.1094, 0.2051, 0.1618]) -Greedy action tensor([ 0.7405, -0.0745, -0.2445, 0.2248]) tensor([0.4144, 0.1834, 0.1548, 0.2474]) -Greedy action tensor([ 0.9095, -0.4680, -0.1846, 0.0343]) tensor([0.4990, 0.1259, 0.1671, 0.2080]) -Greedy action tensor([ 1.6260, -0.6493, -0.4417, 0.3783]) tensor([0.6595, 0.0678, 0.0834, 0.1894]) -Greedy action tensor([ 1.7378, -0.9686, -0.3139, 0.5339]) tensor([0.6688, 0.0447, 0.0859, 0.2006]) -Greedy action tensor([ 1.3521, -0.0753, -0.1443, 0.0561]) tensor([0.5755, 0.1381, 0.1289, 0.1575]) -Greedy action tensor([ 0.8221, -0.3550, -0.4884, 0.5648]) tensor([0.4254, 0.1311, 0.1147, 0.3288]) -Greedy action tensor([ 1.1426, -0.8625, 0.1377, -0.1504]) tensor([0.5633, 0.0758, 0.2062, 0.1546]) -Greedy action tensor([ 1.0055, -0.2925, -0.4250, 0.4478]) tensor([0.4797, 0.1310, 0.1147, 0.2746]) -Greedy action tensor([ 1.2379, -0.2293, -0.3552, 0.4867]) tensor([0.5247, 0.1210, 0.1067, 0.2476]) -Greedy action tensor([ 1.4247, -0.1760, -0.3844, 0.1350]) tensor([0.6094, 0.1230, 0.0998, 0.1678]) -Greedy action tensor([ 1.4820, -0.4170, -0.1516, 0.1332]) tensor([0.6232, 0.0933, 0.1217, 0.1618]) -Greedy action tensor([ 0.5181, -0.0878, 0.2024, -0.0852]) tensor([0.3544, 0.1933, 0.2584, 0.1939]) -Greedy action tensor([ 1.0876, -0.0513, -0.0239, -0.0109]) tensor([0.5044, 0.1615, 0.1660, 0.1681]) -Greedy action tensor([ 1.0845, -0.1959, -0.1778, 0.1554]) tensor([0.5113, 0.1421, 0.1447, 0.2019]) -Greedy action tensor([ 0.9164, -0.1899, -0.0136, 0.0763]) tensor([0.4636, 0.1533, 0.1829, 0.2001]) -Greedy action tensor([ 1.5871, -0.1666, -0.2063, 0.3309]) tensor([0.6157, 0.1066, 0.1024, 0.1753]) -Greedy action tensor([ 1.4055, -0.7189, 0.1508, 0.4934]) tensor([0.5536, 0.0662, 0.1579, 0.2224]) -Greedy action tensor([ 1.5228, -0.4793, -0.1202, 0.3236]) tensor([0.6135, 0.0829, 0.1187, 0.1850]) -Greedy action tensor([ 1.7517, -0.3502, 0.0655, 0.2094]) tensor([0.6573, 0.0803, 0.1217, 0.1406]) -Greedy action tensor([ 0.9847, -0.7073, -0.3407, 0.5733]) tensor([0.4734, 0.0872, 0.1258, 0.3137]) -Greedy action tensor([ 1.3127, -0.3758, 0.0357, 0.1633]) tensor([0.5616, 0.1038, 0.1566, 0.1779]) -Greedy action tensor([ 0.7491, -0.4266, -0.4628, 0.5864]) tensor([0.4072, 0.1256, 0.1212, 0.3460]) -Greedy action tensor([ 1.1453, -0.5501, -0.2002, 0.0679]) tensor([0.5604, 0.1028, 0.1459, 0.1908]) -Greedy action tensor([ 1.7537, -0.9284, -0.2426, 0.5462]) tensor([0.6652, 0.0455, 0.0904, 0.1989]) -Greedy action tensor([ 0.7085, -0.3489, -0.3272, -0.2613]) tensor([0.4804, 0.1669, 0.1705, 0.1821]) -Greedy action tensor([ 1.1281, -0.1852, -0.1966, 0.1130]) tensor([0.5271, 0.1418, 0.1402, 0.1910]) -Greedy action tensor([ 1.1993, -0.5852, -0.1553, 0.2580]) tensor([0.5506, 0.0924, 0.1421, 0.2148]) -Greedy action tensor([ 1.0621, -0.3875, -0.1811, 0.1539]) tensor([0.5191, 0.1218, 0.1498, 0.2093]) -Greedy action tensor([ 0.8246, -0.3299, -0.4679, -0.4255]) tensor([0.5330, 0.1680, 0.1463, 0.1527]) -Greedy action tensor([-1.9078, -0.4636, 0.6568, -0.1531]) tensor([0.0416, 0.1765, 0.5411, 0.2407]) -Greedy action tensor([-1.1249, 0.2021, 0.4335, 0.9448]) tensor([0.0573, 0.2161, 0.2724, 0.4542]) -Greedy action tensor([-1.9306, -0.7467, 0.7948, 0.0161]) tensor([0.0377, 0.1231, 0.5752, 0.2640]) -Greedy action tensor([-1.0826, 0.4428, 0.2285, 0.0200]) tensor([0.0812, 0.3731, 0.3012, 0.2445]) -Greedy action tensor([-1.8779, -0.4334, 0.6258, -0.1416]) tensor([0.0432, 0.1832, 0.5283, 0.2453]) -Greedy action tensor([-0.9723, -0.6067, 0.2621, 0.4306]) tensor([0.1006, 0.1449, 0.3455, 0.4090]) -Greedy action tensor([-1.7429, -0.3944, 0.8479, 0.3788]) tensor([0.0377, 0.1451, 0.5027, 0.3145]) -Greedy action tensor([-0.8152, -0.5436, 1.2065, 1.5604]) tensor([0.0485, 0.0636, 0.3662, 0.5217]) -Greedy action tensor([-1.2094, -0.5252, 0.3386, 0.5144]) tensor([0.0752, 0.1492, 0.3538, 0.4218]) -Greedy action tensor([-1.3273, -0.3619, 0.7090, 0.8841]) tensor([0.0490, 0.1286, 0.3753, 0.4471]) -Greedy action tensor([-1.7313, -0.8701, 0.1695, -0.6171]) tensor([0.0763, 0.1805, 0.5106, 0.2325]) -Greedy action tensor([-1.2803, -0.5840, 0.3982, 0.1118]) tensor([0.0807, 0.1620, 0.4325, 0.3248]) -Greedy action tensor([-0.8976, -0.5636, 0.1884, 0.2999]) tensor([0.1153, 0.1611, 0.3417, 0.3819]) -Greedy action tensor([-1.0970, -0.0773, -0.1165, 0.1242]) tensor([0.1017, 0.2821, 0.2712, 0.3450]) -Greedy action tensor([-1.7275, -0.7607, 0.4529, -0.0265]) tensor([0.0557, 0.1464, 0.4928, 0.3051]) -Greedy action tensor([-1.1451, -0.6263, 1.2164, 1.4440]) tensor([0.0376, 0.0631, 0.3987, 0.5006]) -Greedy action tensor([-1.8359, -0.4806, 0.6161, -0.0997]) tensor([0.0451, 0.1750, 0.5239, 0.2561]) -Greedy action tensor([-1.9195, -0.4601, 0.6529, -0.1637]) tensor([0.0413, 0.1779, 0.5415, 0.2393]) -Greedy action tensor([-1.3978, -0.4692, 0.5378, 0.6244]) tensor([0.0555, 0.1405, 0.3846, 0.4194]) -Greedy action tensor([-1.5697, -0.8150, 1.1399, 1.0237]) tensor([0.0317, 0.0675, 0.4765, 0.4243]) -Greedy action tensor([-1.2747, -0.5055, 0.3666, 0.4366]) tensor([0.0722, 0.1557, 0.3725, 0.3996]) -Greedy action tensor([-0.3350, -0.4250, 0.1909, 0.2111]) tensor([0.1875, 0.1714, 0.3173, 0.3238]) -Greedy action tensor([-0.8422, -0.3895, 0.5672, 1.2116]) tensor([0.0691, 0.1087, 0.2830, 0.5391]) -Greedy action tensor([-1.2295, -0.5398, 0.2766, 0.3796]) tensor([0.0800, 0.1594, 0.3607, 0.3998]) -Greedy action tensor([-1.9563, -0.9072, 0.0865, -0.3774]) tensor([0.0609, 0.1739, 0.4698, 0.2954]) -Greedy action tensor([-1.0645, -0.2466, -0.2691, -0.0161]) tensor([0.1200, 0.2719, 0.2658, 0.3423]) -Greedy action tensor([-1.1789, -0.7281, 0.8004, 1.2546]) tensor([0.0472, 0.0740, 0.3413, 0.5375]) -Greedy action tensor([-1.8191, -0.5346, 0.5980, -0.1162]) tensor([0.0469, 0.1695, 0.5261, 0.2575]) -Greedy action tensor([-0.7554, 1.0792, 0.3618, 0.5028]) tensor([0.0723, 0.4526, 0.2209, 0.2543]) -Greedy action tensor([-1.6312, -0.4585, 0.5763, 0.1379]) tensor([0.0521, 0.1684, 0.4739, 0.3057]) -Greedy action tensor([-1.8808, -0.4375, 1.0510, 0.5687]) tensor([0.0281, 0.1190, 0.5273, 0.3255]) -Greedy action tensor([-1.3873, -0.2431, 0.5573, -0.4302]) tensor([0.0728, 0.2286, 0.5090, 0.1896]) -Greedy action tensor([-1.8447, -0.2956, 0.5919, -0.1159]) tensor([0.0439, 0.2067, 0.5020, 0.2474]) -Greedy action tensor([-1.8051, -0.4044, 0.6707, 0.0740]) tensor([0.0426, 0.1727, 0.5061, 0.2787]) -Greedy action tensor([-1.9175, -0.5548, 0.5946, -0.1666]) tensor([0.0435, 0.1699, 0.5362, 0.2505]) -Greedy action tensor([-0.2051, -0.3983, 0.2633, 0.2018]) tensor([0.2031, 0.1674, 0.3244, 0.3051]) -Greedy action tensor([-1.2627, -0.2969, 0.3518, 0.0040]) tensor([0.0820, 0.2153, 0.4119, 0.2909]) -Greedy action tensor([-1.6670, -0.5307, 0.5151, -0.0135]) tensor([0.0549, 0.1711, 0.4869, 0.2870]) -Greedy action tensor([-0.8757, -0.2962, -0.4156, -0.2124]) tensor([0.1585, 0.2829, 0.2511, 0.3076]) -Greedy action tensor([-1.1096, -0.5514, 0.3457, 0.0671]) tensor([0.0973, 0.1700, 0.4170, 0.3156]) -Greedy action tensor([-1.5500, -0.5569, 1.5216, 1.0424]) tensor([0.0259, 0.0699, 0.5584, 0.3458]) -Greedy action tensor([-0.4178, -0.2773, 0.3481, 0.8771]) tensor([0.1257, 0.1447, 0.2705, 0.4591]) -Greedy action tensor([-1.7579, -0.4772, 0.5586, -0.0461]) tensor([0.0493, 0.1775, 0.5000, 0.2732]) -Greedy action tensor([-2.0388, -0.9374, 0.4016, -0.1975]) tensor([0.0459, 0.1381, 0.5267, 0.2893]) -Greedy action tensor([-1.5695, -0.3145, 0.5805, 0.2153]) tensor([0.0525, 0.1841, 0.4506, 0.3128]) -Greedy action tensor([-1.9743, -0.4561, 1.0766, 0.4280]) tensor([0.0265, 0.1209, 0.5599, 0.2927]) -Greedy action tensor([-1.7092, -0.6518, -0.0045, -0.4176]) tensor([0.0768, 0.2212, 0.4225, 0.2795]) -Greedy action tensor([-1.7099, -0.5292, 0.6187, 0.0611]) tensor([0.0490, 0.1597, 0.5032, 0.2881]) -Greedy action tensor([-0.8440, -0.0644, 0.4208, -0.3734]) tensor([0.1201, 0.2620, 0.4256, 0.1923]) -Greedy action tensor([-1.7013, -0.5000, 0.5208, -0.0062]) tensor([0.0526, 0.1750, 0.4856, 0.2867]) -Greedy action tensor([-1.7235, -0.4430, 0.5354, -0.0253]) tensor([0.0509, 0.1833, 0.4875, 0.2783]) -Greedy action tensor([-0.7635, -0.5492, 0.3053, 0.6503]) tensor([0.1080, 0.1338, 0.3144, 0.4439]) -Greedy action tensor([-1.9640, -0.9294, 0.0147, -0.4313]) tensor([0.0638, 0.1795, 0.4614, 0.2954]) -Greedy action tensor([-1.6599, -0.5595, 0.5632, 0.0134]) tensor([0.0538, 0.1618, 0.4973, 0.2870]) -Greedy action tensor([-1.4603, -0.5153, 0.3987, 0.1192]) tensor([0.0674, 0.1733, 0.4323, 0.3269]) -Greedy action tensor([-1.9503, -0.4664, 1.0331, 0.3950]) tensor([0.0281, 0.1239, 0.5549, 0.2931]) -Greedy action tensor([-1.8809, -0.4078, 0.6437, -0.1185]) tensor([0.0422, 0.1843, 0.5274, 0.2461]) -Greedy action tensor([-1.0891, -0.5651, 0.2355, 0.4760]) tensor([0.0890, 0.1503, 0.3348, 0.4258]) -Greedy action tensor([-1.6921, -0.2676, 0.6153, 0.1371]) tensor([0.0467, 0.1939, 0.4688, 0.2906]) -Greedy action tensor([-1.8998, -0.4291, 0.6445, -0.1446]) tensor([0.0419, 0.1823, 0.5334, 0.2423]) -Greedy action tensor([-1.7210, -0.4144, 0.6596, 0.2744]) tensor([0.0437, 0.1616, 0.4729, 0.3217]) -Greedy action tensor([-0.7434, 0.8004, 0.1616, 0.6989]) tensor([0.0807, 0.3781, 0.1996, 0.3416]) -Greedy action tensor([-1.8657, -0.4033, 0.6785, -0.0657]) tensor([0.0415, 0.1791, 0.5284, 0.2510]) -Greedy action tensor([-1.3630, -0.5216, 0.6763, -0.3864]) tensor([0.0732, 0.1698, 0.5626, 0.1944]) -Greedy action tensor([-1.3998, -0.5016, 0.4857, 0.3301]) tensor([0.0638, 0.1565, 0.4201, 0.3596]) -Greedy action tensor([-1.0957, -0.5070, 0.3498, 0.1634]) tensor([0.0946, 0.1705, 0.4016, 0.3333]) -Greedy action tensor([-2.0789, -0.8624, 1.1426, 0.4107]) tensor([0.0241, 0.0813, 0.6040, 0.2905]) -Greedy action tensor([-0.2221, 0.2361, 0.5633, 1.6172]) tensor([0.0904, 0.1429, 0.1982, 0.5686]) -Greedy action tensor([-1.6416, -0.4786, 0.4989, 0.0326]) tensor([0.0554, 0.1774, 0.4714, 0.2957]) -Greedy action tensor([-1.9531, -0.5947, 0.5628, 0.1179]) tensor([0.0397, 0.1544, 0.4912, 0.3148]) -Greedy action tensor([-1.9399, -0.4563, 0.6748, -0.1723]) tensor([0.0401, 0.1769, 0.5481, 0.2349]) -Greedy action tensor([-1.6555, -0.4944, 0.6025, 0.2115]) tensor([0.0494, 0.1579, 0.4728, 0.3198]) -Greedy action tensor([-1.4880, -0.3767, 0.0559, -0.5559]) tensor([0.0888, 0.2698, 0.4158, 0.2256]) -Greedy action tensor([-0.8103, 0.3647, 0.5504, 1.2767]) tensor([0.0617, 0.1999, 0.2407, 0.4976]) -Greedy action tensor([-1.6444, 0.3630, 0.5069, 0.3096]) tensor([0.0415, 0.3089, 0.3567, 0.2928]) -Greedy action tensor([-1.0465, -0.6025, 0.2186, 0.3097]) tensor([0.1002, 0.1561, 0.3549, 0.3888]) -Greedy action tensor([-1.1487, -0.5515, 0.2893, 0.5592]) tensor([0.0797, 0.1448, 0.3357, 0.4397]) -Greedy action tensor([-0.6006, -0.0206, 0.1185, 0.3342]) tensor([0.1354, 0.2418, 0.2779, 0.3448]) -Greedy action tensor([-1.9328, -0.8156, 0.0056, -0.3555]) tensor([0.0631, 0.1929, 0.4385, 0.3056]) -Greedy action tensor([-1.8493, -0.9714, 0.2268, -0.4285]) tensor([0.0644, 0.1550, 0.5138, 0.2668]) -Greedy action tensor([-0.3059, 0.7803, -0.0684, 0.0805]) tensor([0.1492, 0.4421, 0.1892, 0.2196]) -Greedy action tensor([-0.2135, -1.3263, -0.6659, 0.4324]) tensor([0.2582, 0.0849, 0.1643, 0.4926]) -Greedy action tensor([ 0.8777, -0.2864, -0.7036, 0.0886]) tensor([0.5070, 0.1583, 0.1043, 0.2303]) -Greedy action tensor([-0.0403, -0.4254, 0.5439, -0.6380]) tensor([0.2485, 0.1691, 0.4457, 0.1367]) -Greedy action tensor([ 0.5296, 0.1719, -0.6505, 1.3009]) tensor([0.2399, 0.1677, 0.0737, 0.5187]) -Greedy action tensor([ 0.6509, 0.4761, -0.3190, 1.2171]) tensor([0.2512, 0.2109, 0.0953, 0.4426]) -Greedy action tensor([ 0.8379, -0.3752, 0.2588, 0.7183]) tensor([0.3643, 0.1083, 0.2042, 0.3232]) -Greedy action tensor([ 0.8279, -0.0982, -0.2369, 0.9393]) tensor([0.3498, 0.1386, 0.1206, 0.3910]) -Greedy action tensor([ 0.6995, 0.3678, -0.2098, 0.4212]) tensor([0.3475, 0.2494, 0.1400, 0.2631]) -Greedy action tensor([ 1.0040, 0.0835, -1.0589, 1.2460]) tensor([0.3572, 0.1423, 0.0454, 0.4551]) -Greedy action tensor([ 0.8476, -1.1608, -0.5372, 0.7544]) tensor([0.4356, 0.0585, 0.1091, 0.3969]) -Greedy action tensor([ 0.6199, -0.0664, -0.0545, -0.0104]) tensor([0.3929, 0.1978, 0.2001, 0.2092]) -Greedy action tensor([ 1.5630, -1.0421, 0.5243, 0.7620]) tensor([0.5329, 0.0394, 0.1886, 0.2392]) -Greedy action tensor([ 0.9604, -0.3899, -1.3537, 1.5158]) tensor([0.3225, 0.0836, 0.0319, 0.5620]) -Greedy action tensor([-0.8264, -2.4860, -0.3701, 1.3813]) tensor([0.0843, 0.0160, 0.1330, 0.7666]) -Greedy action tensor([ 2.1380, -0.5417, -0.2094, 0.5145]) tensor([0.7345, 0.0504, 0.0702, 0.1449]) -Greedy action tensor([ 1.3553, -0.2041, 1.7728, 0.9451]) tensor([0.2948, 0.0620, 0.4476, 0.1956]) -Greedy action tensor([0.4089, 1.4750, 1.3582, 0.9937]) tensor([0.1207, 0.3506, 0.3120, 0.2167]) -Greedy action tensor([ 1.0148, -0.0501, -1.4970, 1.4871]) tensor([0.3301, 0.1138, 0.0268, 0.5293]) -Greedy action tensor([ 0.8724, -1.6798, 0.7241, 0.2948]) tensor([0.3998, 0.0311, 0.3447, 0.2244]) -Greedy action tensor([ 1.3605, -0.6500, 1.7119, 0.5637]) tensor([0.3327, 0.0446, 0.4728, 0.1500]) -Greedy action tensor([-0.1542, -1.7233, -0.2949, -0.2456]) tensor([0.3345, 0.0697, 0.2906, 0.3053]) -Greedy action tensor([ 0.8918, 0.3696, -0.7385, 0.8196]) tensor([0.3677, 0.2181, 0.0720, 0.3421]) -Greedy action tensor([ 0.4285, -0.5717, 1.2415, 0.3630]) tensor([0.2194, 0.0807, 0.4945, 0.2054]) -Greedy action tensor([ 0.5781, -0.4995, 0.5453, 0.3340]) tensor([0.3235, 0.1101, 0.3130, 0.2534]) -Greedy action tensor([1.2024, 0.0065, 0.2438, 0.5780]) tensor([0.4502, 0.1361, 0.1726, 0.2411]) -Greedy action tensor([ 0.6795, -0.5139, -0.1716, -0.2725]) tensor([0.4726, 0.1433, 0.2018, 0.1824]) -Greedy action tensor([1.0788, 0.2537, 1.5887, 0.7814]) tensor([0.2600, 0.1139, 0.4329, 0.1931]) -Greedy action tensor([-0.0139, 0.0211, -0.6730, -0.4032]) tensor([0.3095, 0.3206, 0.1601, 0.2097]) -Greedy action tensor([-0.0933, -1.5753, 0.0443, 0.3120]) tensor([0.2581, 0.0586, 0.2962, 0.3871]) -Greedy action tensor([ 0.3879, -0.2034, -0.6823, 0.5426]) tensor([0.3264, 0.1807, 0.1119, 0.3810]) -Greedy action tensor([ 0.8447, 0.7238, -0.1218, 0.6031]) tensor([0.3277, 0.2904, 0.1247, 0.2573]) -Greedy action tensor([ 1.9192, -0.3470, 0.6659, 0.9375]) tensor([0.5669, 0.0588, 0.1619, 0.2124]) -Greedy action tensor([0.7609, 0.2782, 0.8060, 0.4951]) tensor([0.2916, 0.1799, 0.3050, 0.2235]) -Greedy action tensor([ 0.4005, 0.2693, -1.1260, 1.4023]) tensor([0.2076, 0.1820, 0.0451, 0.5653]) -Greedy action tensor([0.2457, 0.9815, 0.0793, 1.1332]) tensor([0.1572, 0.3280, 0.1331, 0.3818]) -Greedy action tensor([ 0.5525, -0.1216, 0.5472, 0.1649]) tensor([0.3142, 0.1601, 0.3125, 0.2132]) -Greedy action tensor([ 0.3418, -1.3606, 0.2403, 0.7920]) tensor([0.2737, 0.0499, 0.2472, 0.4292]) -Greedy action tensor([-0.4910, -1.2819, -0.8145, -0.0123]) tensor([0.2638, 0.1196, 0.1909, 0.4257]) -Greedy action tensor([ 1.4372, -0.8147, 2.3087, 0.5907]) tensor([0.2548, 0.0268, 0.6091, 0.1093]) -Greedy action tensor([ 0.8941, -0.0603, 0.7007, 0.7001]) tensor([0.3297, 0.1270, 0.2717, 0.2716]) -Greedy action tensor([ 0.6718, -0.6542, 0.1801, 0.2176]) tensor([0.3981, 0.1057, 0.2435, 0.2528]) -Greedy action tensor([1.3542, 0.4247, 0.4671, 0.7253]) tensor([0.4274, 0.1687, 0.1760, 0.2279]) -Greedy action tensor([0.6178, 0.3681, 0.4874, 0.5741]) tensor([0.2767, 0.2156, 0.2429, 0.2649]) -Greedy action tensor([ 1.4897, -0.7888, 0.9199, 1.1540]) tensor([0.4197, 0.0430, 0.2374, 0.3000]) -Greedy action tensor([ 0.4619, -0.6889, 1.7434, -0.1468]) tensor([0.1831, 0.0579, 0.6594, 0.0996]) -Greedy action tensor([ 0.3919, -1.3182, 0.9058, 0.3341]) tensor([0.2634, 0.0476, 0.4404, 0.2486]) -Greedy action tensor([ 0.8051, -1.4624, 0.3505, 0.1488]) tensor([0.4431, 0.0459, 0.2812, 0.2298]) -Greedy action tensor([1.0109, 0.5580, 1.2259, 0.0362]) tensor([0.3074, 0.1955, 0.3812, 0.1160]) -Greedy action tensor([ 0.4064, -0.7485, -0.2390, -0.5975]) tensor([0.4533, 0.1428, 0.2377, 0.1661]) -Greedy action tensor([-0.1111, -0.2538, 0.3359, -0.5053]) tensor([0.2436, 0.2112, 0.3809, 0.1642]) -Greedy action tensor([ 1.4298, -1.1364, 0.3791, 1.6364]) tensor([0.3765, 0.0289, 0.1317, 0.4629]) -Greedy action tensor([ 0.3511, 0.0063, 0.3136, -0.6209]) tensor([0.3279, 0.2323, 0.3158, 0.1240]) -Greedy action tensor([-0.1374, 1.1449, -0.2012, 1.1810]) tensor([0.1078, 0.3884, 0.1011, 0.4027]) -Greedy action tensor([ 0.6003, -0.1230, 1.0012, 0.5088]) tensor([0.2570, 0.1247, 0.3838, 0.2345]) -Greedy action tensor([ 1.5674, -0.7588, 0.1306, 0.6154]) tensor([0.5809, 0.0567, 0.1381, 0.2242]) -Greedy action tensor([ 0.2930, 0.8963, 1.1160, -0.2283]) tensor([0.1755, 0.3208, 0.3996, 0.1042]) -Greedy action tensor([ 1.1803, -0.4761, -0.0643, 1.0146]) tensor([0.4299, 0.0820, 0.1238, 0.3642]) -Greedy action tensor([ 0.4078, 0.2028, -0.3016, 1.5358]) tensor([0.1853, 0.1510, 0.0912, 0.5725]) -Greedy action tensor([-0.6967, 1.2267, 1.3979, -0.1133]) tensor([0.0563, 0.3854, 0.4574, 0.1009]) -Greedy action tensor([ 0.8794, 0.0682, -1.1032, 0.8142]) tensor([0.3970, 0.1764, 0.0547, 0.3719]) -Greedy action tensor([ 0.0274, -0.5926, 0.3671, -0.1853]) tensor([0.2666, 0.1434, 0.3744, 0.2155]) -Greedy action tensor([-0.8268, -1.7969, -1.1892, 0.8489]) tensor([0.1348, 0.0511, 0.0938, 0.7202]) -Greedy action tensor([ 1.5347, -0.2667, 0.3472, 1.9913]) tensor([0.3280, 0.0541, 0.1000, 0.5178]) -Greedy action tensor([ 0.3676, -0.5528, 1.1923, 0.5029]) tensor([0.2073, 0.0826, 0.4728, 0.2373]) -Greedy action tensor([ 0.3519, 0.2960, -0.2115, 0.8094]) tensor([0.2442, 0.2309, 0.1390, 0.3859]) -Greedy action tensor([ 0.4630, -0.0856, 1.1018, -0.4335]) tensor([0.2577, 0.1489, 0.4882, 0.1052]) -Greedy action tensor([-0.4780, 0.4037, 1.2402, -1.0482]) tensor([0.1047, 0.2527, 0.5834, 0.0592]) -Greedy action tensor([ 1.0277, -1.6224, 0.8253, 0.9146]) tensor([0.3596, 0.0254, 0.2937, 0.3212]) -Greedy action tensor([0.1056, 0.6231, 0.7506, 0.4474]) tensor([0.1669, 0.2801, 0.3181, 0.2349]) -Greedy action tensor([1.0009, 0.0346, 0.8304, 1.2655]) tensor([0.2836, 0.1079, 0.2391, 0.3694]) -Greedy action tensor([-0.1305, -1.3616, 0.3364, 0.6741]) tensor([0.1952, 0.0570, 0.3114, 0.4364]) -Greedy action tensor([ 0.4482, -1.9184, 1.6748, -0.4785]) tensor([0.2041, 0.0191, 0.6959, 0.0808]) -Greedy action tensor([1.7670, 0.1845, 1.0728, 1.4884]) tensor([0.4062, 0.0835, 0.2029, 0.3074]) -Greedy action tensor([-0.8736, -1.1554, -0.3644, 0.7160]) tensor([0.1202, 0.0907, 0.2000, 0.5891]) -Greedy action tensor([1.2780, 0.0014, 0.8213, 1.1481]) tensor([0.3583, 0.1000, 0.2270, 0.3147]) -Greedy action tensor([-0.0016, -0.1320, 0.2339, 0.3005]) tensor([0.2224, 0.1952, 0.2815, 0.3009]) -Greedy action tensor([-0.6137, -1.0830, -0.9512, 1.3946]) tensor([0.1021, 0.0639, 0.0729, 0.7611]) -Greedy action tensor([-0.5768, -0.1090, -0.4255, 0.6803]) tensor([0.1375, 0.2194, 0.1599, 0.4832]) -Greedy action tensor([ 0.8093, -0.8178, 1.3079, 1.3548]) tensor([0.2189, 0.0430, 0.3604, 0.3777]) -Greedy action tensor([ 0.4380, -0.9566, 1.1103, 0.6565]) tensor([0.2247, 0.0557, 0.4401, 0.2795]) -Greedy action tensor([ 0.8312, 1.6606, -0.7449, 0.5809]) tensor([0.2338, 0.5358, 0.0483, 0.1820]) -Greedy action tensor([ 0.1319, -0.3004, -0.0269, 0.8316]) tensor([0.2215, 0.1437, 0.1890, 0.4458]) -Greedy action tensor([ 0.6420, -0.2347, -0.1756, -0.3580]) tensor([0.4493, 0.1870, 0.1984, 0.1653]) -Greedy action tensor([ 0.6477, -0.2429, 0.1716, -0.5674]) tensor([0.4295, 0.1763, 0.2668, 0.1274]) -Greedy action tensor([ 0.3515, 0.0667, 0.0558, -0.3625]) tensor([0.3349, 0.2519, 0.2492, 0.1640]) -Greedy action tensor([ 0.5679, -0.2431, 0.1221, -0.4166]) tensor([0.4068, 0.1808, 0.2605, 0.1520]) -Greedy action tensor([ 0.5763, -0.0097, 0.1924, -0.1753]) tensor([0.3691, 0.2054, 0.2514, 0.1741]) -Greedy action tensor([ 0.5490, -0.3746, -0.1522, -0.5597]) tensor([0.4498, 0.1786, 0.2231, 0.1484]) -Greedy action tensor([ 0.3994, -0.1512, -0.1413, -0.1601]) tensor([0.3662, 0.2112, 0.2133, 0.2093]) -Greedy action tensor([ 0.6189, -0.4867, 0.1133, -0.6014]) tensor([0.4486, 0.1485, 0.2705, 0.1324]) -Greedy action tensor([ 0.3397, 0.1361, -0.0010, -0.1003]) tensor([0.3154, 0.2572, 0.2243, 0.2031]) -Greedy action tensor([ 0.7196, -0.3711, 0.0633, -0.6268]) tensor([0.4728, 0.1589, 0.2453, 0.1230]) -Greedy action tensor([ 0.9588, -0.9141, -0.0287, -0.3828]) tensor([0.5594, 0.0860, 0.2084, 0.1462]) -Greedy action tensor([ 0.8050, -0.5535, 0.2309, -0.8101]) tensor([0.4953, 0.1273, 0.2789, 0.0985]) -Greedy action tensor([ 0.7593, -0.2057, -0.1993, -0.2771]) tensor([0.4719, 0.1798, 0.1809, 0.1674]) -Greedy action tensor([ 0.9213, -0.6542, -0.2410, -0.8082]) tensor([0.5892, 0.1219, 0.1843, 0.1045]) -Greedy action tensor([ 0.5948, -0.3574, -0.1175, -0.3856]) tensor([0.4441, 0.1714, 0.2178, 0.1666]) -Greedy action tensor([ 0.3753, -0.0057, 0.0527, -0.1525]) tensor([0.3336, 0.2279, 0.2416, 0.1968]) -Greedy action tensor([ 0.6220, -0.2148, 0.1273, -0.1760]) tensor([0.4011, 0.1737, 0.2446, 0.1806]) -Greedy action tensor([ 0.6539, -0.6520, 0.2822, -0.8225]) tensor([0.4569, 0.1238, 0.3150, 0.1044]) -Greedy action tensor([ 0.6131, -0.4929, 0.1703, -0.5046]) tensor([0.4348, 0.1439, 0.2792, 0.1422]) -Greedy action tensor([ 0.8415, -0.5361, 0.0190, -0.4987]) tensor([0.5120, 0.1291, 0.2249, 0.1340]) -Greedy action tensor([ 0.1370, 0.0696, 0.0322, -0.1470]) tensor([0.2787, 0.2605, 0.2510, 0.2098]) -Greedy action tensor([ 0.7677, -0.4571, -0.0397, -0.4093]) tensor([0.4883, 0.1435, 0.2178, 0.1505]) -Greedy action tensor([ 0.7193, -0.5006, -0.1545, -0.5037]) tensor([0.4983, 0.1471, 0.2080, 0.1467]) -Greedy action tensor([ 0.7795, -0.4760, -0.0220, -0.2086]) tensor([0.4749, 0.1353, 0.2131, 0.1768]) -Greedy action tensor([ 0.9312, -0.8594, 0.2111, -0.9055]) tensor([0.5516, 0.0920, 0.2685, 0.0879]) -Greedy action tensor([ 0.4651, -0.0202, -0.1280, -0.2940]) tensor([0.3793, 0.2335, 0.2096, 0.1776]) -Greedy action tensor([ 1.0628, -0.8556, 0.0394, -0.4858]) tensor([0.5818, 0.0854, 0.2091, 0.1237]) -Greedy action tensor([ 0.4760, -0.2617, -0.0359, -0.1530]) tensor([0.3830, 0.1832, 0.2296, 0.2042]) -Greedy action tensor([ 0.7859, -0.2478, -0.0829, -0.3074]) tensor([0.4739, 0.1686, 0.1988, 0.1588]) -Greedy action tensor([ 0.9952, -0.8070, 0.0016, -0.5324]) tensor([0.5707, 0.0941, 0.2113, 0.1239]) -Greedy action tensor([ 1.0201, -0.7139, 0.0413, -0.5966]) tensor([0.5711, 0.1008, 0.2146, 0.1134]) -Greedy action tensor([ 0.6504, -0.4340, -0.0644, -0.4209]) tensor([0.4608, 0.1558, 0.2255, 0.1579]) -Greedy action tensor([ 0.5927, -0.3509, -0.0698, -0.2757]) tensor([0.4302, 0.1674, 0.2218, 0.1805]) -Greedy action tensor([ 0.2875, 0.0161, 0.0716, -0.2978]) tensor([0.3200, 0.2439, 0.2579, 0.1782]) -Greedy action tensor([ 0.2484, 0.1488, -0.0912, -0.0258]) tensor([0.2961, 0.2680, 0.2108, 0.2251]) -Greedy action tensor([ 0.5874, -0.2656, 0.1670, -0.4117]) tensor([0.4080, 0.1739, 0.2679, 0.1502]) -Greedy action tensor([ 0.7111, -0.3842, -0.0690, -0.3248]) tensor([0.4656, 0.1557, 0.2134, 0.1653]) -Greedy action tensor([ 0.3783, -0.0856, -0.1221, -0.2242]) tensor([0.3594, 0.2260, 0.2179, 0.1967]) -Greedy action tensor([ 0.5165, -0.1230, -0.0392, -0.3452]) tensor([0.3963, 0.2090, 0.2273, 0.1674]) -Greedy action tensor([ 0.3357, -0.1288, -0.0443, -0.2133]) tensor([0.3461, 0.2175, 0.2366, 0.1998]) -Greedy action tensor([ 0.2264, -0.0599, 0.1200, -0.3548]) tensor([0.3116, 0.2340, 0.2801, 0.1743]) -Greedy action tensor([ 0.5367, -0.5614, 0.0013, -0.6120]) tensor([0.4472, 0.1492, 0.2618, 0.1418]) -Greedy action tensor([ 0.6104, -0.1604, 0.0149, -0.2669]) tensor([0.4116, 0.1904, 0.2269, 0.1712]) -Greedy action tensor([ 0.6599, -0.2578, 0.0794, -0.6497]) tensor([0.4486, 0.1792, 0.2511, 0.1211]) -Greedy action tensor([ 0.4608, 0.0282, -0.2140, -0.1321]) tensor([0.3689, 0.2393, 0.1879, 0.2039]) -Greedy action tensor([ 0.3698, 0.0274, -0.0159, -0.1150]) tensor([0.3327, 0.2362, 0.2262, 0.2049]) -Greedy action tensor([ 0.8876, -0.7485, -0.2105, -0.5410]) tensor([0.5657, 0.1102, 0.1886, 0.1356]) -Greedy action tensor([ 1.1012, -1.1651, 0.0317, -0.6071]) tensor([0.6143, 0.0637, 0.2108, 0.1113]) -Greedy action tensor([ 0.9112, -0.6861, -0.1535, -0.6236]) tensor([0.5673, 0.1148, 0.1956, 0.1223]) -Greedy action tensor([ 0.3911, -0.0573, -0.0418, -0.3093]) tensor([0.3592, 0.2294, 0.2330, 0.1783]) -Greedy action tensor([ 0.2983, -0.1920, -0.0673, -0.4416]) tensor([0.3593, 0.2200, 0.2493, 0.1714]) -Greedy action tensor([ 0.5333, 0.1691, -0.0635, -0.0691]) tensor([0.3580, 0.2488, 0.1971, 0.1960]) -Greedy action tensor([ 0.7908, -0.4406, 0.0381, -0.1925]) tensor([0.4679, 0.1366, 0.2204, 0.1750]) -Greedy action tensor([ 0.7353, -0.2594, -0.0564, -0.2563]) tensor([0.4558, 0.1686, 0.2065, 0.1691]) -Greedy action tensor([ 0.7244, -0.6584, -0.1005, -0.6595]) tensor([0.5155, 0.1293, 0.2259, 0.1292]) -Greedy action tensor([ 0.2790, 0.1319, 0.0269, -0.3197]) tensor([0.3135, 0.2706, 0.2436, 0.1723]) -Greedy action tensor([ 0.8264, -0.1635, 0.0561, -0.8180]) tensor([0.4932, 0.1833, 0.2283, 0.0952]) -Greedy action tensor([ 0.6506, -0.2519, 0.0609, -0.1637]) tensor([0.4161, 0.1688, 0.2308, 0.1843]) -Greedy action tensor([ 0.6372, -0.1217, 0.0775, -0.3023]) tensor([0.4115, 0.1926, 0.2351, 0.1608]) -Greedy action tensor([ 0.7510, -0.4617, 0.0158, -0.4284]) tensor([0.4798, 0.1427, 0.2300, 0.1475]) -Greedy action tensor([ 0.7324, -0.4499, -0.0877, -0.4893]) tensor([0.4898, 0.1502, 0.2157, 0.1444]) -Greedy action tensor([ 0.3114, 0.1092, -0.1307, -0.1870]) tensor([0.3260, 0.2663, 0.2095, 0.1981]) -Greedy action tensor([ 0.5583, -0.2133, 0.0534, -0.4401]) tensor([0.4108, 0.1899, 0.2479, 0.1514]) -Greedy action tensor([ 0.8684, -0.3627, -0.1079, -0.3974]) tensor([0.5126, 0.1497, 0.1931, 0.1446]) -Greedy action tensor([ 0.5248, -0.3819, -0.0812, -0.5253]) tensor([0.4349, 0.1756, 0.2373, 0.1522]) -Greedy action tensor([ 0.4267, -0.1584, 0.1176, -0.2467]) tensor([0.3570, 0.1989, 0.2621, 0.1821]) -Greedy action tensor([ 1.2634, -1.1157, -0.0115, -0.5952]) tensor([0.6545, 0.0606, 0.1829, 0.1020]) -Greedy action tensor([ 0.0469, 0.1236, -0.0941, -0.2166]) tensor([0.2691, 0.2905, 0.2337, 0.2067]) -Greedy action tensor([ 0.3406, 0.0168, -0.0715, -0.1412]) tensor([0.3330, 0.2409, 0.2205, 0.2057]) -Greedy action tensor([ 0.6380, -0.4385, -0.1211, -0.3750]) tensor([0.4604, 0.1569, 0.2155, 0.1672]) -Greedy action tensor([ 0.6302, -0.3743, -0.1630, -0.4065]) tensor([0.4601, 0.1685, 0.2082, 0.1632]) -Greedy action tensor([ 0.6779, -0.1281, 0.1802, -0.3933]) tensor([0.4172, 0.1863, 0.2536, 0.1429]) -Greedy action tensor([ 0.2154, 0.0149, 0.0873, -0.4303]) tensor([0.3103, 0.2539, 0.2730, 0.1627]) -Greedy action tensor([ 0.2594, 0.0393, -0.1047, -0.3288]) tensor([0.3276, 0.2629, 0.2276, 0.1819]) -Greedy action tensor([ 0.4999, -0.1528, -0.0465, -0.3178]) tensor([0.3935, 0.2049, 0.2279, 0.1737]) -Greedy action tensor([ 0.8658, -0.4416, -0.0126, -0.3708]) tensor([0.5060, 0.1369, 0.2102, 0.1469]) -Greedy action tensor([ 0.8633, -0.2328, 0.0203, -0.6302]) tensor([0.5027, 0.1680, 0.2164, 0.1129]) -Greedy action tensor([ 1.0927, -0.7087, 0.0573, -0.7398]) tensor([0.5952, 0.0982, 0.2113, 0.0952]) -Greedy action tensor([ 0.6228, -0.4333, -0.0277, -0.3748]) tensor([0.4467, 0.1554, 0.2331, 0.1648]) -Greedy action tensor([ 0.3358, -0.0306, 0.0454, -0.2133]) tensor([0.3313, 0.2296, 0.2478, 0.1913]) -Greedy action tensor([ 0.3245, 0.1382, 0.1058, -0.2724]) tensor([0.3141, 0.2607, 0.2524, 0.1729]) -Greedy action tensor([ 0.7553, -0.3951, -0.4605, 0.3306]) tensor([0.4411, 0.1396, 0.1308, 0.2885]) -Greedy action tensor([ 1.4849, -0.2276, -0.3083, 0.2241]) tensor([0.6134, 0.1107, 0.1021, 0.1739]) -Greedy action tensor([ 0.7588, -0.3654, -0.0729, 0.1588]) tensor([0.4331, 0.1407, 0.1885, 0.2377]) -Greedy action tensor([ 1.8482, -0.4909, -0.3729, 0.2498]) tensor([0.7107, 0.0685, 0.0771, 0.1437]) -Greedy action tensor([ 1.3109, -0.3155, -0.3894, 0.5199]) tensor([0.5456, 0.1073, 0.0997, 0.2474]) -Greedy action tensor([ 0.8007, 0.0588, 0.0010, -0.0774]) tensor([0.4271, 0.2034, 0.1920, 0.1775]) -Greedy action tensor([ 1.4817, -0.3388, -0.3995, 0.2210]) tensor([0.6259, 0.1014, 0.0954, 0.1774]) -Greedy action tensor([ 2.0375, -0.6361, -0.1795, 0.1145]) tensor([0.7552, 0.0521, 0.0823, 0.1104]) -Greedy action tensor([ 1.0212, -0.3321, -0.2516, 0.0399]) tensor([0.5227, 0.1351, 0.1464, 0.1959]) -Greedy action tensor([ 1.4222, -0.8010, -0.0742, 0.2766]) tensor([0.6060, 0.0656, 0.1357, 0.1927]) -Greedy action tensor([ 1.6326, 0.1347, -0.1776, 0.3465]) tensor([0.6011, 0.1344, 0.0984, 0.1661]) -Greedy action tensor([ 1.4019, -0.6284, -0.3304, -0.0789]) tensor([0.6512, 0.0855, 0.1152, 0.1481]) -Greedy action tensor([ 0.6527, -0.1501, -0.2854, 0.0936]) tensor([0.4147, 0.1858, 0.1623, 0.2371]) -Greedy action tensor([ 0.8458, 0.2513, 0.0336, -0.3586]) tensor([0.4356, 0.2404, 0.1934, 0.1306]) -Greedy action tensor([ 1.2316, -0.1593, -0.2369, 0.0909]) tensor([0.5559, 0.1384, 0.1280, 0.1777]) -Greedy action tensor([ 1.4908, -0.3611, -0.2929, 0.1080]) tensor([0.6346, 0.0996, 0.1066, 0.1592]) -Greedy action tensor([ 1.2734, -0.3583, -0.4114, -0.0362]) tensor([0.6057, 0.1185, 0.1123, 0.1635]) -Greedy action tensor([ 1.0703, -0.5539, -0.3082, 0.0748]) tensor([0.5499, 0.1084, 0.1385, 0.2032]) -Greedy action tensor([ 0.9238, -0.6063, -0.5559, 0.4146]) tensor([0.4890, 0.1059, 0.1113, 0.2939]) -Greedy action tensor([ 1.3292, -0.6300, -0.4182, 0.6313]) tensor([0.5516, 0.0778, 0.0961, 0.2745]) -Greedy action tensor([ 1.3455, -0.1701, -0.6849, 0.3215]) tensor([0.5848, 0.1285, 0.0768, 0.2100]) -Greedy action tensor([ 1.4860, -0.8640, -0.0211, 0.0478]) tensor([0.6434, 0.0614, 0.1425, 0.1527]) -Greedy action tensor([ 0.3823, -0.1473, -0.0997, 0.0702]) tensor([0.3403, 0.2004, 0.2102, 0.2491]) -Greedy action tensor([ 1.1916, -0.5288, -0.1412, 0.2008]) tensor([0.5513, 0.0987, 0.1454, 0.2047]) -Greedy action tensor([ 0.9347, -0.3018, 0.1024, -0.0859]) tensor([0.4794, 0.1392, 0.2086, 0.1728]) -Greedy action tensor([ 0.5987, -0.4059, -0.1655, -0.0999]) tensor([0.4293, 0.1572, 0.1999, 0.2135]) -Greedy action tensor([ 1.6189, -0.4324, -0.0423, -0.0341]) tensor([0.6623, 0.0851, 0.1258, 0.1268]) -Greedy action tensor([ 1.0253, 0.0477, -0.3501, 0.4952]) tensor([0.4510, 0.1697, 0.1140, 0.2654]) -Greedy action tensor([ 1.5239, -0.6319, -0.1158, 0.4172]) tensor([0.6096, 0.0706, 0.1183, 0.2016]) -Greedy action tensor([ 1.2720, -0.2772, 0.0086, 0.0237]) tensor([0.5611, 0.1192, 0.1586, 0.1610]) -Greedy action tensor([ 1.0977, -0.7966, -0.5207, 1.0416]) tensor([0.4359, 0.0656, 0.0864, 0.4121]) -Greedy action tensor([ 0.7846, -0.4902, -0.0440, 0.3165]) tensor([0.4269, 0.1193, 0.1864, 0.2673]) -Greedy action tensor([ 1.0000, -0.7148, -0.2254, 0.6192]) tensor([0.4636, 0.0835, 0.1361, 0.3168]) -Greedy action tensor([ 0.9748, -0.7089, -0.2836, 0.6069]) tensor([0.4625, 0.0859, 0.1314, 0.3202]) -Greedy action tensor([ 0.8284, -0.4255, -0.2583, -0.2200]) tensor([0.5068, 0.1446, 0.1709, 0.1776]) -Greedy action tensor([ 1.5694, -0.5153, -0.4625, 0.2274]) tensor([0.6593, 0.0820, 0.0864, 0.1723]) -Greedy action tensor([ 1.4239, -0.3787, -0.1307, 0.2286]) tensor([0.5957, 0.0982, 0.1258, 0.1803]) -Greedy action tensor([ 0.6280, -0.0490, 0.0170, -0.2079]) tensor([0.4025, 0.2045, 0.2185, 0.1745]) -Greedy action tensor([ 0.7049, -0.1060, -0.2601, 0.0351]) tensor([0.4279, 0.1902, 0.1630, 0.2190]) -Greedy action tensor([ 1.3069, -0.1939, -0.3293, 0.2153]) tensor([0.5703, 0.1272, 0.1111, 0.1915]) -Greedy action tensor([ 1.3509, -0.2411, -0.1350, -0.1895]) tensor([0.6082, 0.1238, 0.1376, 0.1303]) -Greedy action tensor([ 1.5743, -0.2827, -0.3163, 0.6559]) tensor([0.5861, 0.0915, 0.0885, 0.2339]) -Greedy action tensor([ 1.2248, -0.7373, -0.4291, 0.5089]) tensor([0.5493, 0.0772, 0.1051, 0.2685]) -Greedy action tensor([ 0.8572, -0.3624, -0.1980, -0.1387]) tensor([0.4968, 0.1467, 0.1730, 0.1835]) -Greedy action tensor([ 0.9456, -0.1687, -0.2694, -0.3547]) tensor([0.5271, 0.1730, 0.1564, 0.1436]) -Greedy action tensor([ 1.4451, -0.5824, -0.2256, 0.2422]) tensor([0.6172, 0.0813, 0.1161, 0.1854]) -Greedy action tensor([ 1.7014, -0.2995, -0.5009, 0.1998]) tensor([0.6809, 0.0921, 0.0753, 0.1517]) -Greedy action tensor([ 0.9461, -0.1539, -0.0342, -0.0548]) tensor([0.4818, 0.1604, 0.1808, 0.1771]) -Greedy action tensor([ 1.3917, -0.7225, -0.4873, 0.1339]) tensor([0.6419, 0.0775, 0.0981, 0.1825]) -Greedy action tensor([ 2.1548, -0.4424, -0.4479, 0.2897]) tensor([0.7672, 0.0571, 0.0568, 0.1188]) -Greedy action tensor([ 0.5564, -0.2512, 0.1856, -0.1936]) tensor([0.3834, 0.1710, 0.2646, 0.1811]) -Greedy action tensor([ 0.5566, -0.2731, -0.2695, -0.2460]) tensor([0.4306, 0.1878, 0.1885, 0.1930]) -Greedy action tensor([ 0.8118, -0.0341, -0.0450, -0.1129]) tensor([0.4444, 0.1907, 0.1886, 0.1763]) -Greedy action tensor([ 1.0591, -0.2911, -0.2214, -0.2738]) tensor([0.5553, 0.1439, 0.1543, 0.1464]) -Greedy action tensor([ 0.6329, -0.0739, -0.3067, -0.1279]) tensor([0.4253, 0.2098, 0.1662, 0.1987]) -Greedy action tensor([ 0.5314, -0.0593, -0.4983, -0.2969]) tensor([0.4259, 0.2359, 0.1521, 0.1860]) -Greedy action tensor([ 0.7866, -0.3018, -0.3114, 0.3584]) tensor([0.4307, 0.1450, 0.1436, 0.2807]) -Greedy action tensor([ 1.0734, -0.5428, -0.3941, 0.4074]) tensor([0.5147, 0.1022, 0.1186, 0.2644]) -Greedy action tensor([ 0.9094, -0.2625, -0.3766, 0.4411]) tensor([0.4520, 0.1400, 0.1249, 0.2830]) -Greedy action tensor([ 0.6800, -0.3979, -0.5186, 0.6289]) tensor([0.3858, 0.1313, 0.1164, 0.3666]) -Greedy action tensor([ 1.2184, -0.3225, -0.3741, 0.2817]) tensor([0.5526, 0.1184, 0.1124, 0.2166]) -Greedy action tensor([ 1.2928, -0.4970, -0.1508, 0.0315]) tensor([0.5930, 0.0990, 0.1400, 0.1680]) -Greedy action tensor([ 0.4748, -0.3834, -0.1636, -0.0690]) tensor([0.3949, 0.1674, 0.2085, 0.2292]) -Greedy action tensor([ 1.0671, -0.7723, -0.3851, 0.3861]) tensor([0.5266, 0.0837, 0.1232, 0.2665]) -Greedy action tensor([ 1.2415, -0.7582, 0.0702, 0.1390]) tensor([0.5626, 0.0762, 0.1744, 0.1868]) -Greedy action tensor([ 0.8911, -0.3138, 0.0703, 0.2282]) tensor([0.4434, 0.1329, 0.1951, 0.2285]) -Greedy action tensor([ 1.1191, -0.2632, -0.2150, -0.2729]) tensor([0.5672, 0.1424, 0.1494, 0.1410]) -Greedy action tensor([ 1.3209, -0.1630, -0.3222, 0.0754]) tensor([0.5855, 0.1328, 0.1132, 0.1685]) -Greedy action tensor([ 2.1045, -0.8005, -0.3881, 0.5269]) tensor([0.7441, 0.0407, 0.0615, 0.1536]) -Greedy action tensor([ 1.7098, -0.4285, 0.0386, 0.2643]) tensor([0.6487, 0.0765, 0.1220, 0.1529]) -Greedy action tensor([ 1.0930, -0.6649, -0.0891, 0.0843]) tensor([0.5424, 0.0935, 0.1663, 0.1978]) -Greedy action tensor([ 1.0071, -0.3415, -0.4035, -0.2876]) tensor([0.5626, 0.1460, 0.1373, 0.1541]) -Greedy action tensor([ 0.4821, -0.3812, 0.0298, 0.0085]) tensor([0.3730, 0.1573, 0.2373, 0.2323]) -Greedy action tensor([ 1.3263, -0.1156, 0.1235, -0.0458]) tensor([0.5585, 0.1321, 0.1678, 0.1416]) -Greedy action tensor([ 1.4276, -0.5152, -0.3830, 0.2127]) tensor([0.6236, 0.0894, 0.1020, 0.1850]) -Greedy action tensor([ 1.1758, -0.1856, -0.2102, 0.0842]) tensor([0.5429, 0.1391, 0.1358, 0.1822]) -Greedy action tensor([ 0.9087, -0.4206, -0.1351, 0.0163]) tensor([0.4935, 0.1306, 0.1738, 0.2022]) -Greedy action tensor([ 1.5870, -0.6736, -0.1899, 0.3581]) tensor([0.6385, 0.0666, 0.1080, 0.1869]) -Greedy action tensor([ 1.2809, -0.4230, -0.2931, -0.0530]) tensor([0.6051, 0.1101, 0.1254, 0.1594]) -Greedy action tensor([ 1.3115, -0.4680, -0.2353, 0.3314]) tensor([0.5692, 0.0960, 0.1212, 0.2136]) -Greedy action tensor([ 0.6917, -0.5490, -0.3489, 0.2147]) tensor([0.4419, 0.1278, 0.1561, 0.2743]) -Greedy action tensor([-0.1681, -0.1839, 1.0919, 1.6970]) tensor([0.0836, 0.0823, 0.2946, 0.5395]) -Greedy action tensor([-1.8724, -0.3956, 0.6328, -0.1326]) tensor([0.0429, 0.1878, 0.5251, 0.2442]) -Greedy action tensor([-1.3764, -0.5383, 0.3799, 0.2898]) tensor([0.0695, 0.1606, 0.4023, 0.3676]) -Greedy action tensor([-0.8622, 0.0877, 0.2890, -0.2076]) tensor([0.1153, 0.2981, 0.3646, 0.2219]) -Greedy action tensor([-1.9073, -0.4563, 0.6497, -0.1616]) tensor([0.0419, 0.1786, 0.5398, 0.2398]) -Greedy action tensor([-0.5880, -0.5263, 0.5932, 1.4559]) tensor([0.0767, 0.0816, 0.2498, 0.5919]) -Greedy action tensor([-1.3343, -0.6016, 0.3358, 0.1979]) tensor([0.0768, 0.1598, 0.4080, 0.3554]) -Greedy action tensor([-1.8495, -0.4464, 0.6156, -0.1023]) tensor([0.0443, 0.1802, 0.5212, 0.2543]) -Greedy action tensor([-1.7710, -0.4513, 0.5724, -0.0705]) tensor([0.0485, 0.1814, 0.5048, 0.2654]) -Greedy action tensor([-1.7342, -0.5691, 0.6106, 0.0528]) tensor([0.0485, 0.1556, 0.5061, 0.2897]) -Greedy action tensor([-0.9094, -0.7949, 0.9597, 1.4812]) tensor([0.0512, 0.0574, 0.3320, 0.5593]) -Greedy action tensor([-1.7166, -0.4786, 1.1104, 0.8380]) tensor([0.0292, 0.1008, 0.4938, 0.3761]) -Greedy action tensor([-1.1302, -0.6247, 0.2667, 0.3383]) tensor([0.0906, 0.1501, 0.3661, 0.3933]) -Greedy action tensor([-1.8385, -0.4629, 0.6088, -0.1075]) tensor([0.0451, 0.1786, 0.5215, 0.2548]) -Greedy action tensor([-1.6826, -0.4813, 0.5409, -0.0239]) tensor([0.0531, 0.1767, 0.4910, 0.2791]) -Greedy action tensor([-1.6484, -0.5093, 0.4968, 0.0276]) tensor([0.0555, 0.1734, 0.4743, 0.2967]) -Greedy action tensor([-1.5254, -0.6489, 0.9702, 0.9565]) tensor([0.0364, 0.0874, 0.4411, 0.4351]) -Greedy action tensor([-1.3348, -0.5752, 0.4791, -0.1775]) tensor([0.0803, 0.1716, 0.4926, 0.2555]) -Greedy action tensor([-1.0763, -0.0863, 0.2409, 0.4270]) tensor([0.0839, 0.2258, 0.3131, 0.3772]) -Greedy action tensor([-1.7872, -0.4186, 0.6127, -0.0630]) tensor([0.0464, 0.1823, 0.5112, 0.2601]) -Greedy action tensor([-1.7886, -0.4623, 0.6502, 0.0640]) tensor([0.0442, 0.1667, 0.5070, 0.2821]) -Greedy action tensor([-1.9377, -0.4473, 0.6723, -0.1686]) tensor([0.0402, 0.1782, 0.5461, 0.2355]) -Greedy action tensor([-0.6819, -0.5660, 0.2915, 0.4976]) tensor([0.1247, 0.1400, 0.3299, 0.4055]) -Greedy action tensor([-1.0995, -0.3109, 0.5164, 0.9437]) tensor([0.0627, 0.1380, 0.3156, 0.4838]) -Greedy action tensor([-1.7231, -0.5023, 0.5772, -0.0120]) tensor([0.0502, 0.1703, 0.5013, 0.2781]) -Greedy action tensor([-1.5992, -0.5351, 0.9590, 0.8017]) tensor([0.0359, 0.1041, 0.4637, 0.3963]) -Greedy action tensor([-0.3330, -0.0250, 1.1250, 1.6587]) tensor([0.0715, 0.0973, 0.3072, 0.5240]) -Greedy action tensor([-1.4538, -0.2468, 0.8114, 0.8881]) tensor([0.0410, 0.1372, 0.3952, 0.4267]) -Greedy action tensor([-1.9005, -0.4059, 0.6369, -0.1639]) tensor([0.0420, 0.1874, 0.5318, 0.2388]) -Greedy action tensor([-1.8947, -0.2929, 0.7578, 0.1807]) tensor([0.0356, 0.1765, 0.5046, 0.2833]) -Greedy action tensor([-1.6568e+00, -5.0012e-01, 5.1800e-01, 1.6242e-03]) tensor([0.0549, 0.1744, 0.4827, 0.2880]) -Greedy action tensor([-0.6639, -0.4149, 1.0680, 1.5242]) tensor([0.0593, 0.0761, 0.3353, 0.5292]) -Greedy action tensor([-1.9849, -0.7706, 0.6278, 0.0193]) tensor([0.0393, 0.1325, 0.5363, 0.2919]) -Greedy action tensor([-1.2673, -0.6877, 0.6105, -0.4594]) tensor([0.0864, 0.1543, 0.5653, 0.1939]) -Greedy action tensor([-1.6658, -0.5521, 0.7222, 0.0863]) tensor([0.0483, 0.1471, 0.5261, 0.2785]) -Greedy action tensor([-1.4477, -0.7252, 0.6475, 0.5498]) tensor([0.0539, 0.1110, 0.4380, 0.3972]) -Greedy action tensor([-0.4051, -0.2809, 1.0094, 1.6830]) tensor([0.0698, 0.0791, 0.2874, 0.5637]) -Greedy action tensor([-1.8659, -0.4469, 0.6312, -0.1244]) tensor([0.0435, 0.1798, 0.5285, 0.2482]) -Greedy action tensor([-1.2463, -0.5154, 0.4746, -0.2115]) tensor([0.0871, 0.1809, 0.4868, 0.2452]) -Greedy action tensor([-1.4883, -0.0314, 0.4332, 0.3306]) tensor([0.0547, 0.2347, 0.3735, 0.3371]) -Greedy action tensor([-1.4970, -0.4152, 0.4179, 0.1428]) tensor([0.0629, 0.1856, 0.4271, 0.3244]) -Greedy action tensor([-1.6328, -0.3908, 0.5966, 0.0636]) tensor([0.0521, 0.1802, 0.4838, 0.2839]) -Greedy action tensor([-0.6970, -0.5699, 0.1617, 0.3020]) tensor([0.1387, 0.1575, 0.3273, 0.3766]) -Greedy action tensor([-1.5621, -0.5387, 0.4553, 0.1001]) tensor([0.0603, 0.1679, 0.4537, 0.3181]) -Greedy action tensor([-1.9149, -0.4553, 0.6627, -0.1540]) tensor([0.0412, 0.1772, 0.5421, 0.2395]) -Greedy action tensor([-0.9448, -0.6675, 0.3210, -0.4046]) tensor([0.1319, 0.1740, 0.4677, 0.2264]) -Greedy action tensor([-1.7621, -0.4890, 0.5753, -0.0488]) tensor([0.0488, 0.1745, 0.5057, 0.2709]) -Greedy action tensor([-1.9579, -0.4791, 1.1122, 0.4780]) tensor([0.0261, 0.1144, 0.5617, 0.2979]) -Greedy action tensor([-0.0195, -0.0671, 0.9718, 1.6792]) tensor([0.0989, 0.0943, 0.2664, 0.5405]) -Greedy action tensor([-1.8898, -0.7308, 0.0246, -0.3710]) tensor([0.0644, 0.2051, 0.4366, 0.2939]) -Greedy action tensor([-1.6187, -0.5122, 0.5768, 0.2157]) tensor([0.0519, 0.1569, 0.4662, 0.3249]) -Greedy action tensor([-1.0049, -0.5718, 0.3827, -0.0538]) tensor([0.1095, 0.1688, 0.4384, 0.2833]) -Greedy action tensor([-0.8204, 0.2964, 0.3075, -0.1259]) tensor([0.1093, 0.3340, 0.3377, 0.2190]) -Greedy action tensor([-1.5454, -0.4542, 0.8169, 0.6770]) tensor([0.0420, 0.1250, 0.4456, 0.3874]) -Greedy action tensor([-1.1547, 0.6849, 0.3386, 0.6423]) tensor([0.0563, 0.3540, 0.2504, 0.3393]) -Greedy action tensor([-1.8859, -0.9491, 0.1320, -0.3360]) tensor([0.0633, 0.1617, 0.4766, 0.2984]) -Greedy action tensor([-1.2773, -0.3334, 1.0290, 1.1676]) tensor([0.0398, 0.1022, 0.3993, 0.4587]) -Greedy action tensor([-1.2192, -1.0316, -0.7643, -0.9020]) tensor([0.1940, 0.2340, 0.3057, 0.2664]) -Greedy action tensor([-1.6471, -0.1882, 0.4905, -0.0151]) tensor([0.0529, 0.2276, 0.4488, 0.2707]) -Greedy action tensor([-0.0817, 0.0095, 1.0129, 1.7204]) tensor([0.0897, 0.0983, 0.2681, 0.5439]) -Greedy action tensor([-1.9600, -0.6739, 1.6027, 0.8435]) tensor([0.0177, 0.0642, 0.6254, 0.2927]) -Greedy action tensor([-1.7842, -0.4949, 0.6006, -0.0475]) tensor([0.0472, 0.1715, 0.5129, 0.2683]) -Greedy action tensor([-1.6735, -0.3911, 0.5163, 0.0183]) tensor([0.0527, 0.1901, 0.4710, 0.2862]) -Greedy action tensor([-0.5851, -0.2631, 0.9820, 1.6014]) tensor([0.0622, 0.0858, 0.2981, 0.5539]) -Greedy action tensor([-1.4517, -0.6080, 0.6161, 0.4376]) tensor([0.0560, 0.1303, 0.4431, 0.3706]) -Greedy action tensor([-1.7758, -0.5126, 0.6827, 0.0604]) tensor([0.0444, 0.1572, 0.5195, 0.2788]) -Greedy action tensor([-1.8001, -0.4222, 0.8583, 0.5183]) tensor([0.0340, 0.1349, 0.4855, 0.3456]) -Greedy action tensor([-1.1033, -0.6771, 0.0214, -0.0356]) tensor([0.1174, 0.1798, 0.3614, 0.3414]) -Greedy action tensor([-1.7189, -0.6076, 0.0157, -0.3973]) tensor([0.0743, 0.2258, 0.4212, 0.2787]) -Greedy action tensor([-0.8195, -0.4478, 0.5677, 1.1549]) tensor([0.0732, 0.1062, 0.2932, 0.5274]) -Greedy action tensor([-1.3900, -0.5148, 0.4419, 0.4187]) tensor([0.0635, 0.1524, 0.3966, 0.3875]) -Greedy action tensor([-1.6237, -0.4671, 0.6008, 0.1304]) tensor([0.0521, 0.1655, 0.4816, 0.3008]) -Greedy action tensor([-1.6593, -0.5138, 0.7640, -0.2895]) tensor([0.0517, 0.1624, 0.5827, 0.2032]) -Greedy action tensor([-1.4334, 0.5612, 0.2978, 0.3989]) tensor([0.0494, 0.3630, 0.2790, 0.3086]) -Greedy action tensor([-0.4587, -0.0065, 0.4908, 1.0089]) tensor([0.1053, 0.1655, 0.2722, 0.4570]) -Greedy action tensor([-1.8442, -0.4704, 0.6362, -0.0689]) tensor([0.0439, 0.1733, 0.5240, 0.2589]) -Greedy action tensor([-1.8763, -0.4483, 0.6316, -0.1439]) tensor([0.0433, 0.1805, 0.5315, 0.2447]) -Greedy action tensor([-1.5697, -0.3997, 0.9451, 0.8920]) tensor([0.0353, 0.1138, 0.4367, 0.4142]) -Greedy action tensor([-1.0057, -0.3620, 0.5440, 1.1679]) tensor([0.0610, 0.1160, 0.2871, 0.5359]) -Greedy action tensor([-1.0336, -0.2160, 0.6085, 1.0739]) tensor([0.0600, 0.1360, 0.3101, 0.4939]) -Greedy action tensor([-0.7208, -0.6107, 0.2155, 0.1228]) tensor([0.1430, 0.1597, 0.3648, 0.3325]) -Greedy action tensor([-0.0103, -1.1756, -0.3559, -0.9423]) tensor([0.4144, 0.1292, 0.2933, 0.1632]) -Greedy action tensor([0.1134, 0.4841, 0.4655, 0.4055]) tensor([0.1919, 0.2781, 0.2729, 0.2571]) -Greedy action tensor([ 2.0875, -0.3923, 0.0651, 1.1433]) tensor([0.6230, 0.0522, 0.0824, 0.2423]) -Greedy action tensor([-0.0135, -0.5548, -0.0187, -0.1080]) tensor([0.2868, 0.1669, 0.2853, 0.2610]) -Greedy action tensor([ 0.6463, -0.8078, 0.4122, -0.1845]) tensor([0.4064, 0.0949, 0.3216, 0.1771]) -Greedy action tensor([ 0.2470, -0.2085, -0.0178, 0.8955]) tensor([0.2318, 0.1470, 0.1779, 0.4433]) -Greedy action tensor([ 0.9748, -0.0955, -0.4351, -0.0451]) tensor([0.5134, 0.1760, 0.1254, 0.1852]) -Greedy action tensor([0.8758, 0.4700, 0.2774, 0.2796]) tensor([0.3614, 0.2409, 0.1987, 0.1991]) -Greedy action tensor([ 0.8924, -0.0381, 0.2789, -0.0478]) tensor([0.4299, 0.1695, 0.2327, 0.1679]) -Greedy action tensor([ 1.0103, -0.3722, 0.0407, 0.2795]) tensor([0.4736, 0.1188, 0.1796, 0.2280]) -Greedy action tensor([0.0199, 0.1231, 0.4625, 0.7962]) tensor([0.1713, 0.1899, 0.2666, 0.3722]) -Greedy action tensor([ 0.4403, 0.0119, 0.6134, -0.1556]) tensor([0.2949, 0.1921, 0.3506, 0.1625]) -Greedy action tensor([ 0.8886, -0.1977, 0.5063, -0.2153]) tensor([0.4253, 0.1435, 0.2902, 0.1410]) -Greedy action tensor([ 0.6707, -0.7569, 1.6275, 0.4701]) tensor([0.2145, 0.0515, 0.5585, 0.1755]) -Greedy action tensor([-0.0649, -0.9679, 0.2124, 1.9203]) tensor([0.0999, 0.0405, 0.1319, 0.7277]) -Greedy action tensor([ 0.3154, -0.0931, -0.1637, 0.8257]) tensor([0.2532, 0.1683, 0.1568, 0.4217]) -Greedy action tensor([-0.3558, -2.0491, -0.0520, 0.8961]) tensor([0.1657, 0.0305, 0.2245, 0.5794]) -Greedy action tensor([ 1.6875, -1.1844, 0.8972, 0.2776]) tensor([0.5700, 0.0323, 0.2586, 0.1392]) -Greedy action tensor([0.6603, 0.6021, 0.8175, 0.3568]) tensor([0.2596, 0.2449, 0.3038, 0.1917]) -Greedy action tensor([ 0.8677, -1.1740, 0.8784, 0.3571]) tensor([0.3649, 0.0474, 0.3688, 0.2190]) -Greedy action tensor([1.4878, 0.2298, 0.8516, 0.9695]) tensor([0.4151, 0.1180, 0.2197, 0.2472]) -Greedy action tensor([ 1.5305, -0.7318, 1.1850, -0.1963]) tensor([0.5025, 0.0523, 0.3557, 0.0894]) -Greedy action tensor([-0.6531, -1.2373, 0.2655, 2.0261]) tensor([0.0537, 0.0299, 0.1344, 0.7820]) -Greedy action tensor([ 0.1505, -1.4713, 0.4096, 0.2138]) tensor([0.2810, 0.0555, 0.3641, 0.2994]) -Greedy action tensor([ 0.9110, -0.5072, -0.5383, 1.3794]) tensor([0.3253, 0.0788, 0.0764, 0.5196]) -Greedy action tensor([-0.3862, 0.7420, 0.6819, -0.6948]) tensor([0.1293, 0.3995, 0.3762, 0.0950]) -Greedy action tensor([ 1.1331, -0.0991, -0.5172, 0.4393]) tensor([0.5042, 0.1471, 0.0968, 0.2519]) -Greedy action tensor([0.5391, 0.4092, 0.2591, 0.0706]) tensor([0.3068, 0.2694, 0.2318, 0.1920]) -Greedy action tensor([ 4.8149e-01, -1.4787e+00, 4.5612e-01, -8.4090e-04]) tensor([0.3659, 0.0515, 0.3567, 0.2259]) -Greedy action tensor([ 0.7686, -0.0020, 1.9439, 0.0175]) tensor([0.1933, 0.0894, 0.6261, 0.0912]) -Greedy action tensor([ 2.0190, -1.0042, 1.0084, 0.9056]) tensor([0.5744, 0.0279, 0.2091, 0.1886]) -Greedy action tensor([ 0.1383, 0.5803, -0.2527, 0.4420]) tensor([0.2180, 0.3392, 0.1474, 0.2954]) -Greedy action tensor([ 1.0851, -1.3220, -0.0201, 1.0670]) tensor([0.4161, 0.0375, 0.1378, 0.4086]) -Greedy action tensor([ 1.0974, -0.3139, 1.1081, 0.6602]) tensor([0.3448, 0.0841, 0.3485, 0.2227]) -Greedy action tensor([ 0.2967, -1.1536, 0.9326, 1.2019]) tensor([0.1787, 0.0419, 0.3375, 0.4419]) -Greedy action tensor([ 1.4130, -0.1329, 1.2581, 1.2005]) tensor([0.3474, 0.0740, 0.2976, 0.2809]) -Greedy action tensor([ 1.0927, -0.4511, -0.7303, 1.0323]) tensor([0.4317, 0.0922, 0.0697, 0.4064]) -Greedy action tensor([ 0.4628, -0.1979, -0.1260, 2.0328]) tensor([0.1454, 0.0751, 0.0807, 0.6988]) -Greedy action tensor([ 0.1826, 1.1585, -0.2462, 1.1211]) tensor([0.1457, 0.3868, 0.0949, 0.3726]) -Greedy action tensor([ 1.7224, -0.4824, 0.5183, 1.5251]) tensor([0.4482, 0.0494, 0.1344, 0.3680]) -Greedy action tensor([-1.0573, -0.6884, 0.5714, 0.4272]) tensor([0.0836, 0.1210, 0.4263, 0.3691]) -Greedy action tensor([ 0.0938, 0.1826, -0.0567, 1.0169]) tensor([0.1828, 0.1998, 0.1573, 0.4601]) -Greedy action tensor([-0.0473, -0.7424, -0.6735, 0.5861]) tensor([0.2553, 0.1274, 0.1365, 0.4809]) -Greedy action tensor([ 0.3364, -0.2880, -1.0434, 0.0720]) tensor([0.3914, 0.2096, 0.0985, 0.3005]) -Greedy action tensor([-0.3358, 0.0475, 1.1783, -0.9602]) tensor([0.1325, 0.1944, 0.6022, 0.0710]) -Greedy action tensor([ 0.2876, 0.3002, -0.5028, 2.0120]) tensor([0.1238, 0.1254, 0.0562, 0.6946]) -Greedy action tensor([ 0.4984, -1.4921, -0.6646, 0.9578]) tensor([0.3298, 0.0451, 0.1031, 0.5221]) -Greedy action tensor([ 0.5655, 0.3032, 0.4150, -0.5115]) tensor([0.3367, 0.2590, 0.2896, 0.1147]) -Greedy action tensor([ 0.8226, -1.7239, 0.3881, 0.7897]) tensor([0.3713, 0.0291, 0.2404, 0.3592]) -Greedy action tensor([ 0.9896, -0.0036, -0.0511, 0.3386]) tensor([0.4454, 0.1650, 0.1573, 0.2323]) -Greedy action tensor([ 1.0238, -0.8699, 1.5157, 1.2472]) tensor([0.2477, 0.0373, 0.4052, 0.3098]) -Greedy action tensor([ 0.5486, -0.2150, -0.3848, 0.8997]) tensor([0.3049, 0.1421, 0.1199, 0.4331]) -Greedy action tensor([ 0.7028, -0.0911, 1.1773, 0.9989]) tensor([0.2271, 0.1026, 0.3650, 0.3053]) -Greedy action tensor([0.0214, 0.4251, 0.2481, 0.9365]) tensor([0.1600, 0.2396, 0.2007, 0.3996]) -Greedy action tensor([ 0.2699, 0.1723, -0.0643, 1.3784]) tensor([0.1769, 0.1605, 0.1267, 0.5360]) -Greedy action tensor([ 0.3400, -1.1752, 0.5919, 0.3940]) tensor([0.2808, 0.0617, 0.3612, 0.2963]) -Greedy action tensor([ 1.1065, -0.7460, 1.3419, 0.8759]) tensor([0.3109, 0.0488, 0.3934, 0.2469]) -Greedy action tensor([1.2112, 0.4676, 0.8979, 0.5551]) tensor([0.3669, 0.1744, 0.2682, 0.1904]) -Greedy action tensor([ 1.3438, -0.3141, 0.7239, 2.1242]) tensor([0.2557, 0.0487, 0.1376, 0.5580]) -Greedy action tensor([-0.0973, -0.2143, 1.5159, 0.4662]) tensor([0.1154, 0.1027, 0.5792, 0.2027]) -Greedy action tensor([-0.9815, -0.6005, 0.5683, 0.0769]) tensor([0.0994, 0.1456, 0.4684, 0.2866]) -Greedy action tensor([1.0471, 1.1805, 0.5006, 0.4835]) tensor([0.3039, 0.3472, 0.1759, 0.1730]) -Greedy action tensor([-0.3951, -0.6782, -0.7550, 0.9371]) tensor([0.1602, 0.1207, 0.1118, 0.6072]) -Greedy action tensor([ 1.0303, -0.4788, 1.1398, 0.5471]) tensor([0.3386, 0.0749, 0.3777, 0.2088]) -Greedy action tensor([-0.2004, -0.0719, 0.5497, -1.1391]) tensor([0.2153, 0.2448, 0.4557, 0.0842]) -Greedy action tensor([ 0.0465, -0.3916, -0.6110, 1.8874]) tensor([0.1181, 0.0762, 0.0612, 0.7445]) -Greedy action tensor([ 1.6502, -0.3723, -1.0273, 0.8658]) tensor([0.6033, 0.0798, 0.0415, 0.2754]) -Greedy action tensor([-0.3149, -0.1761, -0.9162, 0.5530]) tensor([0.1969, 0.2262, 0.1079, 0.4690]) -Greedy action tensor([-1.1850, -0.5098, 0.7616, 0.0410]) tensor([0.0748, 0.1469, 0.5237, 0.2547]) -Greedy action tensor([ 0.6738, -0.4334, 1.0752, 1.6250]) tensor([0.1847, 0.0610, 0.2760, 0.4783]) -Greedy action tensor([ 1.0539, -0.0484, 0.4178, -0.3479]) tensor([0.4745, 0.1576, 0.2512, 0.1168]) -Greedy action tensor([-0.5734, 1.1367, 0.3572, -0.9812]) tensor([0.1028, 0.5682, 0.2606, 0.0684]) -Greedy action tensor([-0.4665, -0.0735, 0.4052, 0.5964]) tensor([0.1288, 0.1907, 0.3078, 0.3727]) -Greedy action tensor([-0.1094, -0.7185, -0.1951, 1.3676]) tensor([0.1462, 0.0795, 0.1342, 0.6402]) -Greedy action tensor([ 0.4135, -0.2211, 0.6803, 1.1261]) tensor([0.2051, 0.1087, 0.2678, 0.4183]) -Greedy action tensor([ 0.7224, -0.8322, -0.1855, 0.5767]) tensor([0.4034, 0.0852, 0.1627, 0.3487]) -Greedy action tensor([ 0.4937, -0.4731, -0.9460, 0.9507]) tensor([0.3128, 0.1190, 0.0741, 0.4941]) -Greedy action tensor([ 0.8999, -0.7811, 1.2007, -0.2134]) tensor([0.3490, 0.0650, 0.4714, 0.1146]) -Greedy action tensor([-0.1618, -0.8493, -0.3311, 1.4768]) tensor([0.1334, 0.0671, 0.1126, 0.6868]) -Greedy action tensor([ 0.9848, -0.1067, -0.2626, 1.5790]) tensor([0.2912, 0.0977, 0.0836, 0.5275]) -Greedy action tensor([ 0.8120, -0.8872, 0.7892, 1.5051]) tensor([0.2404, 0.0439, 0.2349, 0.4807]) -Greedy action tensor([-0.0359, 1.6058, 0.1702, 0.5957]) tensor([0.1078, 0.5569, 0.1325, 0.2028]) -Greedy action tensor([ 0.3999, -0.3662, -0.1024, -0.3415]) tensor([0.3927, 0.1825, 0.2376, 0.1871]) -Greedy action tensor([ 0.6007, -0.2746, -0.0400, -0.3832]) tensor([0.4315, 0.1798, 0.2274, 0.1613]) -Greedy action tensor([ 0.6242, -0.2062, 0.1350, -0.4319]) tensor([0.4172, 0.1819, 0.2558, 0.1451]) -Greedy action tensor([ 1.0201, -0.4248, 0.2147, -0.4777]) tensor([0.5246, 0.1237, 0.2344, 0.1173]) -Greedy action tensor([ 0.4585, -0.3040, -0.0576, -0.1367]) tensor([0.3824, 0.1784, 0.2283, 0.2109]) -Greedy action tensor([ 0.4228, -0.1558, -0.0204, -0.1228]) tensor([0.3594, 0.2015, 0.2308, 0.2083]) -Greedy action tensor([ 0.3851, 0.0122, 0.0351, -0.2274]) tensor([0.3407, 0.2346, 0.2401, 0.1846]) -Greedy action tensor([ 0.2535, -0.0823, -0.0944, -0.2930]) tensor([0.3333, 0.2383, 0.2354, 0.1930]) -Greedy action tensor([ 0.4359, -0.0277, -0.1075, -0.4246]) tensor([0.3798, 0.2389, 0.2206, 0.1606]) -Greedy action tensor([ 0.2137, 0.1765, 0.1380, -0.3206]) tensor([0.2876, 0.2771, 0.2667, 0.1686]) -Greedy action tensor([ 0.6724, -0.2930, -0.0333, -0.4393]) tensor([0.4538, 0.1728, 0.2241, 0.1493]) -Greedy action tensor([ 0.5157, -0.0018, -0.0234, -0.1387]) tensor([0.3705, 0.2208, 0.2161, 0.1926]) -Greedy action tensor([ 0.3514, -0.3054, 0.1787, -0.2394]) tensor([0.3432, 0.1780, 0.2888, 0.1901]) -Greedy action tensor([ 0.4017, 0.2107, 0.0229, -0.4064]) tensor([0.3382, 0.2794, 0.2316, 0.1508]) -Greedy action tensor([ 0.4958, -0.2868, -0.0854, -0.3009]) tensor([0.4053, 0.1853, 0.2267, 0.1827]) -Greedy action tensor([ 0.7347, -0.3032, 0.0429, -0.3003]) tensor([0.4525, 0.1603, 0.2265, 0.1607]) -Greedy action tensor([ 0.5265, -0.3718, 0.0246, -0.4696]) tensor([0.4198, 0.1710, 0.2542, 0.1550]) -Greedy action tensor([ 0.2290, -0.1636, -0.0244, -0.3979]) tensor([0.3349, 0.2262, 0.2600, 0.1789]) -Greedy action tensor([ 0.5081, -0.0885, 0.0241, -0.5026]) tensor([0.3951, 0.2176, 0.2435, 0.1438]) -Greedy action tensor([ 0.6557, 0.2375, -0.1463, -0.2295]) tensor([0.3969, 0.2613, 0.1780, 0.1638]) -Greedy action tensor([ 0.3647, 0.0355, -0.1514, -0.0470]) tensor([0.3357, 0.2415, 0.2004, 0.2224]) -Greedy action tensor([ 0.5035, 0.1421, -0.0969, -0.0493]) tensor([0.3545, 0.2470, 0.1945, 0.2040]) -Greedy action tensor([ 0.4825, -0.4826, -0.0956, -0.6826]) tensor([0.4437, 0.1690, 0.2489, 0.1384]) -Greedy action tensor([ 0.5611, -0.3800, -0.0654, -0.2433]) tensor([0.4216, 0.1645, 0.2253, 0.1886]) -Greedy action tensor([ 0.4976, -0.1508, -0.0261, -0.3841]) tensor([0.3954, 0.2067, 0.2342, 0.1637]) -Greedy action tensor([ 0.4253, -0.3382, -0.0363, -0.3472]) tensor([0.3909, 0.1822, 0.2464, 0.1805]) -Greedy action tensor([ 0.2567, -0.1735, 0.0386, -0.1819]) tensor([0.3226, 0.2098, 0.2594, 0.2081]) -Greedy action tensor([ 0.6803, -0.2694, 0.0075, -0.3399]) tensor([0.4429, 0.1713, 0.2260, 0.1597]) -Greedy action tensor([ 0.4788, 0.0284, -0.1434, -0.1952]) tensor([0.3726, 0.2375, 0.2000, 0.1899]) -Greedy action tensor([ 0.5925, -0.4591, -0.1068, -0.1901]) tensor([0.4341, 0.1517, 0.2157, 0.1985]) -Greedy action tensor([ 0.3714, -0.1235, -0.0802, -0.2604]) tensor([0.3600, 0.2195, 0.2292, 0.1914]) -Greedy action tensor([ 0.5536, -0.3315, 0.0596, -0.5909]) tensor([0.4271, 0.1763, 0.2606, 0.1360]) -Greedy action tensor([ 0.4507, -0.0549, 0.0172, -0.0588]) tensor([0.3506, 0.2115, 0.2273, 0.2106]) -Greedy action tensor([ 0.3223, -0.2867, -0.0541, -0.2587]) tensor([0.3585, 0.1950, 0.2460, 0.2005]) -Greedy action tensor([ 0.4726, -0.0217, -0.0825, -0.1034]) tensor([0.3641, 0.2221, 0.2090, 0.2047]) -Greedy action tensor([ 0.7885, -0.6048, -0.0127, -0.2915]) tensor([0.4910, 0.1219, 0.2203, 0.1667]) -Greedy action tensor([ 0.5066, -0.2234, 0.1363, -0.2110]) tensor([0.3759, 0.1811, 0.2596, 0.1834]) -Greedy action tensor([ 0.7547, -0.1581, 0.0881, -0.0528]) tensor([0.4236, 0.1700, 0.2175, 0.1889]) -Greedy action tensor([ 0.6881, -0.1777, -0.0146, -0.3081]) tensor([0.4376, 0.1841, 0.2167, 0.1616]) -Greedy action tensor([ 0.5299, -0.1405, 0.2559, -0.3705]) tensor([0.3734, 0.1910, 0.2839, 0.1517]) -Greedy action tensor([ 0.6905, -0.5033, -0.0467, -0.3147]) tensor([0.4657, 0.1411, 0.2228, 0.1704]) -Greedy action tensor([ 0.6698, -0.3166, -0.0902, -0.5327]) tensor([0.4671, 0.1742, 0.2184, 0.1403]) -Greedy action tensor([ 0.4041, -0.2644, 0.0386, -0.4397]) tensor([0.3793, 0.1944, 0.2632, 0.1631]) -Greedy action tensor([ 0.7078, -0.3227, 0.0054, -0.2765]) tensor([0.4492, 0.1603, 0.2226, 0.1679]) -Greedy action tensor([ 0.7499, -0.5986, 0.0727, -0.7198]) tensor([0.5006, 0.1300, 0.2543, 0.1151]) -Greedy action tensor([ 0.5836, -0.0065, -0.0713, -0.1843]) tensor([0.3940, 0.2184, 0.2047, 0.1828]) -Greedy action tensor([ 0.3091, -0.0757, -0.0324, -0.1948]) tensor([0.3338, 0.2272, 0.2373, 0.2017]) -Greedy action tensor([ 0.1973, -0.1872, -0.1010, -0.3651]) tensor([0.3341, 0.2275, 0.2480, 0.1904]) -Greedy action tensor([ 0.6052, -0.1945, 0.0423, -0.4279]) tensor([0.4211, 0.1893, 0.2398, 0.1499]) -Greedy action tensor([ 0.4482, -0.3101, -0.1124, -0.2171]) tensor([0.3916, 0.1835, 0.2236, 0.2013]) -Greedy action tensor([ 0.8804, -0.5126, -0.0787, -0.5325]) tensor([0.5333, 0.1324, 0.2044, 0.1298]) -Greedy action tensor([ 0.4447, -0.0480, -0.1433, -0.1256]) tensor([0.3661, 0.2236, 0.2033, 0.2070]) -Greedy action tensor([ 0.7500, -0.3332, -0.0546, -0.2877]) tensor([0.4673, 0.1582, 0.2090, 0.1655]) -Greedy action tensor([ 0.6982, -0.3483, 0.1314, -0.3191]) tensor([0.4386, 0.1540, 0.2488, 0.1586]) -Greedy action tensor([ 0.5310, -0.4276, 0.1168, -0.5228]) tensor([0.4179, 0.1602, 0.2762, 0.1457]) -Greedy action tensor([ 0.6405, -0.0762, 0.2048, -0.3326]) tensor([0.3979, 0.1943, 0.2574, 0.1504]) -Greedy action tensor([ 0.3244, 0.0248, 0.0212, -0.2185]) tensor([0.3267, 0.2421, 0.2413, 0.1898]) -Greedy action tensor([ 0.7085, -0.4224, -0.1371, -0.4194]) tensor([0.4817, 0.1555, 0.2068, 0.1560]) -Greedy action tensor([ 1.0393, -0.8570, 0.1668, -0.6407]) tensor([0.5700, 0.0856, 0.2382, 0.1062]) -Greedy action tensor([ 0.3448, -0.2514, -0.1948, -0.2641]) tensor([0.3734, 0.2057, 0.2177, 0.2031]) -Greedy action tensor([ 1.1657, -1.1544, -0.1097, -0.4381]) tensor([0.6334, 0.0622, 0.1769, 0.1274]) -Greedy action tensor([ 0.3972, -0.1068, -0.0393, -0.2519]) tensor([0.3606, 0.2179, 0.2331, 0.1884]) -Greedy action tensor([ 0.8035, -0.6144, -0.1204, -0.4619]) tensor([0.5205, 0.1261, 0.2066, 0.1468]) -Greedy action tensor([ 0.7500, -0.4119, -0.0727, -0.5695]) tensor([0.4952, 0.1549, 0.2175, 0.1324]) -Greedy action tensor([ 0.7280, -0.5877, 0.0789, -0.4127]) tensor([0.4738, 0.1271, 0.2476, 0.1514]) -Greedy action tensor([ 0.8990, -0.2802, 0.0227, -0.8171]) tensor([0.5253, 0.1615, 0.2187, 0.0944]) -Greedy action tensor([ 1.0615, -0.3612, -0.1033, -0.3892]) tensor([0.5594, 0.1349, 0.1745, 0.1311]) -Greedy action tensor([ 0.7697, -0.4953, -0.0065, -0.4099]) tensor([0.4879, 0.1377, 0.2245, 0.1500]) -Greedy action tensor([ 0.7316, -0.3812, -0.1701, -0.3828]) tensor([0.4848, 0.1593, 0.1968, 0.1591]) -Greedy action tensor([ 0.4841, -0.2222, -0.0480, -0.3309]) tensor([0.3963, 0.1955, 0.2328, 0.1754]) -Greedy action tensor([ 0.5022, -0.3643, -0.0234, -0.2609]) tensor([0.4036, 0.1697, 0.2386, 0.1882]) -Greedy action tensor([ 0.6235, -0.4326, 0.0870, -0.4614]) tensor([0.4404, 0.1532, 0.2576, 0.1488]) -Greedy action tensor([ 0.7545, -0.6358, -0.1556, -0.5387]) tensor([0.5192, 0.1293, 0.2090, 0.1425]) -Greedy action tensor([ 1.0607, -0.5272, 0.0664, -0.3511]) tensor([0.5501, 0.1124, 0.2035, 0.1340]) -Greedy action tensor([ 0.7240, -0.4843, 0.1192, -0.5641]) tensor([0.4715, 0.1409, 0.2575, 0.1301]) -Greedy action tensor([ 0.6401, -0.3311, -0.1091, -0.3645]) tensor([0.4510, 0.1707, 0.2132, 0.1651]) -Greedy action tensor([ 0.3951, -0.2552, -0.0163, -0.1875]) tensor([0.3646, 0.1903, 0.2416, 0.2036]) -Greedy action tensor([ 0.6194, -0.4296, 0.1557, -0.4087]) tensor([0.4279, 0.1499, 0.2691, 0.1531]) -Greedy action tensor([ 0.4290, -0.1325, 0.1889, -0.3587]) tensor([0.3556, 0.2028, 0.2797, 0.1618]) -Greedy action tensor([ 0.4373, -0.0929, -0.0156, -0.2245]) tensor([0.3649, 0.2148, 0.2320, 0.1883]) -Greedy action tensor([ 0.6018, -0.1156, 0.1775, -0.3984]) tensor([0.3984, 0.1944, 0.2606, 0.1465]) -Greedy action tensor([ 1.0630, -0.4331, -0.3161, -0.1169]) tensor([0.5608, 0.1256, 0.1412, 0.1723]) -Greedy action tensor([ 0.9856, -0.6793, -0.2166, 0.2024]) tensor([0.5137, 0.0972, 0.1544, 0.2347]) -Greedy action tensor([ 0.7635, -0.3986, -0.3203, 0.0975]) tensor([0.4619, 0.1445, 0.1563, 0.2373]) -Greedy action tensor([ 0.7474, -0.4159, -0.0904, 0.0950]) tensor([0.4413, 0.1379, 0.1909, 0.2298]) -Greedy action tensor([ 1.2717, -0.2829, -0.1090, 0.2268]) tensor([0.5511, 0.1164, 0.1386, 0.1939]) -Greedy action tensor([ 0.5718, -0.3396, -0.0713, 0.0975]) tensor([0.3922, 0.1576, 0.2062, 0.2441]) -Greedy action tensor([ 0.8094, -0.4368, -0.1866, 0.3843]) tensor([0.4328, 0.1245, 0.1598, 0.2829]) -Greedy action tensor([ 0.6572, -0.4426, -0.0506, -0.0728]) tensor([0.4333, 0.1443, 0.2135, 0.2088]) -Greedy action tensor([ 1.4000, -0.5329, -0.5279, 0.2879]) tensor([0.6176, 0.0894, 0.0898, 0.2031]) -Greedy action tensor([ 1.1726, -0.6741, -0.3960, 0.3169]) tensor([0.5583, 0.0881, 0.1163, 0.2373]) -Greedy action tensor([ 1.0941, -0.4228, -0.3527, 0.5562]) tensor([0.4905, 0.1076, 0.1154, 0.2864]) -Greedy action tensor([ 1.0168, -0.2781, -0.1039, -0.0499]) tensor([0.5144, 0.1409, 0.1677, 0.1770]) -Greedy action tensor([ 0.8666, -0.6280, -0.6972, 0.4126]) tensor([0.4834, 0.1084, 0.1012, 0.3070]) -Greedy action tensor([ 0.6474, -0.4345, -0.1829, -0.1549]) tensor([0.4498, 0.1525, 0.1961, 0.2017]) -Greedy action tensor([ 1.0599, -0.6789, -0.5845, 0.4058]) tensor([0.5294, 0.0930, 0.1022, 0.2753]) -Greedy action tensor([ 1.2840, -0.6967, 0.2295, 0.0560]) tensor([0.5621, 0.0775, 0.1958, 0.1646]) -Greedy action tensor([ 0.9467, -0.3493, -0.4706, 0.5752]) tensor([0.4534, 0.1241, 0.1099, 0.3127]) -Greedy action tensor([ 1.0040e+00, -8.8599e-04, -3.6276e-01, -3.7057e-02]) tensor([0.5066, 0.1854, 0.1291, 0.1789]) -Greedy action tensor([ 0.9612, -0.3895, -0.5146, 0.5312]) tensor([0.4677, 0.1212, 0.1069, 0.3042]) -Greedy action tensor([ 1.2061, -0.2253, -0.3201, 0.0371]) tensor([0.5659, 0.1352, 0.1230, 0.1758]) -Greedy action tensor([ 0.7298, -0.4541, -0.1447, 0.0017]) tensor([0.4533, 0.1388, 0.1891, 0.2189]) -Greedy action tensor([ 1.3782, -1.1078, -0.4778, 0.2903]) tensor([0.6343, 0.0528, 0.0991, 0.2137]) -Greedy action tensor([ 0.4786, -0.2776, -0.3692, 0.4000]) tensor([0.3543, 0.1663, 0.1518, 0.3276]) -Greedy action tensor([ 0.8260, -0.1733, -0.2910, -0.1682]) tensor([0.4842, 0.1782, 0.1584, 0.1792]) -Greedy action tensor([ 1.4013, -0.1995, -0.4592, -0.1046]) tensor([0.6333, 0.1278, 0.0985, 0.1405]) -Greedy action tensor([ 1.3996, -0.9072, -0.0316, 0.2584]) tensor([0.6031, 0.0601, 0.1442, 0.1927]) -Greedy action tensor([ 1.6614, -0.5186, -0.2848, 0.3270]) tensor([0.6583, 0.0744, 0.0940, 0.1733]) -Greedy action tensor([ 1.3043, -0.6741, -0.0502, 0.0501]) tensor([0.5947, 0.0822, 0.1535, 0.1696]) -Greedy action tensor([ 1.2360, -0.5136, -0.2816, 0.2009]) tensor([0.5720, 0.0994, 0.1254, 0.2032]) -Greedy action tensor([ 1.5160, -0.9093, -0.1234, 0.2797]) tensor([0.6357, 0.0562, 0.1234, 0.1847]) -Greedy action tensor([ 1.6226, -0.5056, -0.4165, 0.3199]) tensor([0.6575, 0.0783, 0.0856, 0.1787]) -Greedy action tensor([ 1.0178, -0.4399, -0.0475, -0.0345]) tensor([0.5191, 0.1208, 0.1789, 0.1812]) -Greedy action tensor([ 1.1673, -0.4430, -0.7834, -0.4013]) tensor([0.6450, 0.1289, 0.0917, 0.1344]) -Greedy action tensor([ 0.6423, -0.1195, -0.5822, 0.4132]) tensor([0.3912, 0.1826, 0.1150, 0.3111]) -Greedy action tensor([ 1.2765, -0.6977, -0.3605, 0.6484]) tensor([0.5356, 0.0744, 0.1042, 0.2858]) -Greedy action tensor([ 1.0883, -0.3890, -0.3143, 0.5926]) tensor([0.4800, 0.1096, 0.1181, 0.2924]) -Greedy action tensor([ 0.5908, -0.3388, -0.2666, 0.1689]) tensor([0.4041, 0.1595, 0.1714, 0.2650]) -Greedy action tensor([ 1.2861, -0.7576, -0.3231, 0.0380]) tensor([0.6186, 0.0801, 0.1237, 0.1776]) -Greedy action tensor([ 1.0864, -0.3879, -0.3820, -0.3174]) tensor([0.5865, 0.1343, 0.1351, 0.1441]) -Greedy action tensor([ 1.4348, -0.4321, -0.3029, 0.3248]) tensor([0.6024, 0.0931, 0.1060, 0.1985]) -Greedy action tensor([ 1.4414, -0.7066, -0.0839, 0.4607]) tensor([0.5850, 0.0683, 0.1273, 0.2194]) -Greedy action tensor([ 0.9188, -0.1760, 0.1343, -0.0491]) tensor([0.4606, 0.1541, 0.2102, 0.1750]) -Greedy action tensor([ 1.5749, -0.2755, -0.2420, 0.1611]) tensor([0.6398, 0.1006, 0.1040, 0.1556]) -Greedy action tensor([ 0.9105, -0.2383, -0.4219, 0.3742]) tensor([0.4617, 0.1464, 0.1218, 0.2701]) -Greedy action tensor([ 1.1520, -0.6711, -0.1992, 0.5071]) tensor([0.5141, 0.0830, 0.1331, 0.2698]) -Greedy action tensor([ 1.1986, -0.4010, -0.4869, 0.1151]) tensor([0.5795, 0.1170, 0.1074, 0.1961]) -Greedy action tensor([ 0.9635, -0.2537, -0.2544, -0.1220]) tensor([0.5182, 0.1534, 0.1533, 0.1750]) -Greedy action tensor([ 1.3257, -0.3920, -0.3272, 0.3071]) tensor([0.5773, 0.1036, 0.1106, 0.2085]) -Greedy action tensor([ 1.3516, -0.6943, -0.0547, 0.3250]) tensor([0.5772, 0.0746, 0.1414, 0.2068]) -Greedy action tensor([ 1.0384, -0.5210, -0.4815, 0.6547]) tensor([0.4739, 0.0996, 0.1037, 0.3229]) -Greedy action tensor([ 1.1337, -0.3573, -0.1487, 0.2776]) tensor([0.5188, 0.1168, 0.1439, 0.2204]) -Greedy action tensor([ 0.3958, -0.4496, -0.0876, 0.1873]) tensor([0.3499, 0.1502, 0.2158, 0.2841]) -Greedy action tensor([ 1.5363, -0.3318, -0.1504, 0.1456]) tensor([0.6296, 0.0972, 0.1165, 0.1567]) -Greedy action tensor([ 1.0355, -0.2285, -0.1390, 0.0888]) tensor([0.5052, 0.1427, 0.1561, 0.1960]) -Greedy action tensor([ 1.3212, -0.6320, -0.3291, 0.4236]) tensor([0.5743, 0.0814, 0.1102, 0.2340]) -Greedy action tensor([ 0.5197, -0.4573, -0.2505, 0.1867]) tensor([0.3912, 0.1473, 0.1811, 0.2804]) -Greedy action tensor([ 0.8423, -0.5492, -0.0627, 0.4046]) tensor([0.4350, 0.1082, 0.1760, 0.2808]) -Greedy action tensor([ 0.3509, -0.2006, -0.2371, 0.2173]) tensor([0.3326, 0.1916, 0.1847, 0.2910]) -Greedy action tensor([ 1.0653, -0.4812, -0.3248, 0.0504]) tensor([0.5481, 0.1167, 0.1365, 0.1986]) -Greedy action tensor([ 0.2969, -0.2658, -0.0505, 0.0806]) tensor([0.3245, 0.1848, 0.2293, 0.2614]) -Greedy action tensor([ 0.6879, 0.0212, -0.2276, -0.2360]) tensor([0.4328, 0.2222, 0.1732, 0.1718]) -Greedy action tensor([ 0.6424, -0.2089, -0.1500, 0.4731]) tensor([0.3671, 0.1567, 0.1662, 0.3099]) -Greedy action tensor([ 1.0371, -0.1399, -0.3020, -0.0669]) tensor([0.5258, 0.1621, 0.1378, 0.1743]) -Greedy action tensor([ 1.4153, -0.5652, -0.3232, 0.2496]) tensor([0.6152, 0.0849, 0.1081, 0.1918]) -Greedy action tensor([ 1.1718, -0.2460, -0.2712, 0.1192]) tensor([0.5472, 0.1326, 0.1293, 0.1910]) -Greedy action tensor([ 0.7048, -0.1036, 0.0129, -0.0376]) tensor([0.4128, 0.1840, 0.2067, 0.1965]) -Greedy action tensor([ 0.6873, 0.0960, 0.0131, -0.0012]) tensor([0.3898, 0.2158, 0.1986, 0.1958]) -Greedy action tensor([ 1.0445, -0.6699, -0.4514, 0.5408]) tensor([0.4979, 0.0897, 0.1116, 0.3009]) -Greedy action tensor([ 0.9382, -0.3848, -0.5090, 0.3407]) tensor([0.4874, 0.1298, 0.1146, 0.2682]) -Greedy action tensor([ 1.4807, -0.5708, -0.3105, 0.3301]) tensor([0.6204, 0.0798, 0.1035, 0.1963]) -Greedy action tensor([ 1.2527, -0.5209, -0.1866, -0.1579]) tensor([0.6058, 0.1028, 0.1436, 0.1478]) -Greedy action tensor([ 0.9723, -0.5483, -0.4466, 0.9448]) tensor([0.4109, 0.0898, 0.0994, 0.3998]) -Greedy action tensor([ 0.6256, -0.3844, -0.2296, 0.3569]) tensor([0.3916, 0.1426, 0.1665, 0.2993]) -Greedy action tensor([ 1.3852, -0.6036, -0.2056, 0.7557]) tensor([0.5338, 0.0730, 0.1088, 0.2844]) -Greedy action tensor([ 1.0307, -0.1499, 0.0487, -0.0658]) tensor([0.4961, 0.1524, 0.1858, 0.1657]) -Greedy action tensor([ 1.1600, -0.6302, -0.0866, 0.3758]) tensor([0.5233, 0.0874, 0.1504, 0.2389]) -Greedy action tensor([ 1.0017, -0.1748, -0.3821, 0.4192]) tensor([0.4723, 0.1456, 0.1184, 0.2638]) -Greedy action tensor([ 0.8205, -0.4349, 0.1643, 0.1325]) tensor([0.4336, 0.1236, 0.2250, 0.2179]) -Greedy action tensor([ 0.7749, -0.4994, -0.4952, 0.6200]) tensor([0.4137, 0.1157, 0.1162, 0.3544]) -Greedy action tensor([ 0.8682, 0.0224, -0.2159, 0.0626]) tensor([0.4516, 0.1938, 0.1527, 0.2018]) -Greedy action tensor([ 1.4476, -0.5802, -0.3113, 0.2119]) tensor([0.6272, 0.0826, 0.1080, 0.1823]) -Greedy action tensor([-1.7433, -0.4665, 0.5654, -0.0688]) tensor([0.0500, 0.1794, 0.5035, 0.2671]) -Greedy action tensor([-1.4233, -0.5288, 0.4128, 0.1801]) tensor([0.0681, 0.1665, 0.4270, 0.3384]) -Greedy action tensor([-1.6158, -0.5294, 0.8038, 0.7007]) tensor([0.0395, 0.1169, 0.4435, 0.4001]) -Greedy action tensor([-1.7657, -0.4946, 0.5876, -0.0360]) tensor([0.0483, 0.1720, 0.5076, 0.2721]) -Greedy action tensor([-1.6783, -0.5649, 0.7689, 0.3741]) tensor([0.0428, 0.1302, 0.4941, 0.3329]) -Greedy action tensor([-1.8130, -0.3823, 0.5867, -0.0563]) tensor([0.0455, 0.1901, 0.5010, 0.2634]) -Greedy action tensor([-1.4227, -0.4303, 1.0471, 1.0839]) tensor([0.0360, 0.0971, 0.4255, 0.4414]) -Greedy action tensor([-1.4453, -0.6297, 0.5327, 0.2014]) tensor([0.0638, 0.1442, 0.4610, 0.3310]) -Greedy action tensor([-1.8032, -0.4805, 0.5970, -0.1194]) tensor([0.0472, 0.1773, 0.5209, 0.2545]) -Greedy action tensor([-1.7663, -0.4323, 0.6150, -0.2030]) tensor([0.0490, 0.1862, 0.5306, 0.2342]) -Greedy action tensor([-1.1526, -0.4744, 0.5781, -0.4668]) tensor([0.0943, 0.1859, 0.5325, 0.1873]) -Greedy action tensor([-1.8927, -0.4268, 1.0058, 0.5138]) tensor([0.0289, 0.1253, 0.5249, 0.3209]) -Greedy action tensor([-1.1731, -0.3010, 0.5844, 0.9546]) tensor([0.0569, 0.1360, 0.3297, 0.4774]) -Greedy action tensor([-1.3359, -0.6410, 0.0233, -0.5738]) tensor([0.1106, 0.2217, 0.4307, 0.2370]) -Greedy action tensor([-1.5550, -0.1105, 0.4092, 0.0212]) tensor([0.0581, 0.2464, 0.4144, 0.2811]) -Greedy action tensor([-1.8320, -0.8364, 0.2796, -0.4897]) tensor([0.0633, 0.1713, 0.5230, 0.2423]) -Greedy action tensor([-1.3718, -0.4582, 0.4458, 0.4117]) tensor([0.0641, 0.1598, 0.3947, 0.3814]) -Greedy action tensor([-1.0570, -0.4333, 0.9659, 1.3277]) tensor([0.0470, 0.0877, 0.3552, 0.5101]) -Greedy action tensor([-0.2864, -0.1349, 1.0815, 1.5904]) tensor([0.0792, 0.0922, 0.3111, 0.5175]) -Greedy action tensor([-1.9447, -0.7116, 0.5291, -0.1799]) tensor([0.0452, 0.1550, 0.5360, 0.2638]) -Greedy action tensor([-1.6980, -0.4469, 0.5349, -0.0423]) tensor([0.0525, 0.1833, 0.4894, 0.2748]) -Greedy action tensor([-1.6196, -0.7751, 1.1056, 0.0182]) tensor([0.0421, 0.0981, 0.6430, 0.2168]) -Greedy action tensor([-1.9052, -0.4584, 0.6518, -0.1550]) tensor([0.0418, 0.1778, 0.5396, 0.2408]) -Greedy action tensor([-0.6744, -0.4709, 1.0963, 1.6066]) tensor([0.0559, 0.0685, 0.3285, 0.5471]) -Greedy action tensor([-1.2293, -0.6762, 0.9623, 1.1037]) tensor([0.0455, 0.0790, 0.4069, 0.4687]) -Greedy action tensor([-0.8383, -0.3779, 1.2209, 1.4892]) tensor([0.0484, 0.0766, 0.3792, 0.4958]) -Greedy action tensor([-1.3620, -0.4504, 0.6187, 0.6030]) tensor([0.0560, 0.1392, 0.4055, 0.3993]) -Greedy action tensor([-1.0813, -0.3169, 1.2549, 1.3311]) tensor([0.0406, 0.0871, 0.4195, 0.4528]) -Greedy action tensor([-1.7017, -0.4866, 0.5398, -0.0536]) tensor([0.0527, 0.1776, 0.4958, 0.2739]) -Greedy action tensor([-1.8188, -0.4838, 0.6006, -0.0819]) tensor([0.0460, 0.1750, 0.5175, 0.2615]) -Greedy action tensor([-1.9289, -0.6014, 0.9729, 0.2686]) tensor([0.0313, 0.1179, 0.5693, 0.2815]) -Greedy action tensor([-1.8414, -0.7360, 0.2504, -0.3389]) tensor([0.0602, 0.1818, 0.4875, 0.2705]) -Greedy action tensor([-0.8647, -0.5906, 0.1859, 0.3854]) tensor([0.1154, 0.1518, 0.3300, 0.4028]) -Greedy action tensor([-1.7544, -0.4279, 1.1685, 0.8466]) tensor([0.0271, 0.1023, 0.5048, 0.3658]) -Greedy action tensor([-1.8721, -0.4275, 0.6308, -0.1352]) tensor([0.0432, 0.1833, 0.5281, 0.2455]) -Greedy action tensor([-1.5023, 0.2997, 0.3782, -0.0032]) tensor([0.0553, 0.3350, 0.3623, 0.2474]) -Greedy action tensor([-1.8297, -0.9156, -0.2295, -0.6811]) tensor([0.0862, 0.2150, 0.4270, 0.2718]) -Greedy action tensor([-0.9755, -0.6751, 0.2606, 0.1963]) tensor([0.1109, 0.1497, 0.3816, 0.3578]) -Greedy action tensor([-1.3143, -0.1368, 0.5102, -0.6350]) tensor([0.0805, 0.2614, 0.4992, 0.1588]) -Greedy action tensor([-1.3025, -0.5577, 0.3715, 0.0664]) tensor([0.0808, 0.1702, 0.4311, 0.3178]) -Greedy action tensor([-1.4005, -0.2923, 0.4181, 0.2800]) tensor([0.0643, 0.1947, 0.3961, 0.3450]) -Greedy action tensor([-1.8245, -0.3593, 0.5974, -0.1063]) tensor([0.0451, 0.1952, 0.5082, 0.2514]) -Greedy action tensor([-1.4256, -0.5700, 0.7083, 0.6541]) tensor([0.0505, 0.1188, 0.4266, 0.4041]) -Greedy action tensor([-2.0225, -0.9749, 0.5399, 0.0313]) tensor([0.0406, 0.1158, 0.5268, 0.3168]) -Greedy action tensor([-1.9436, -0.4621, 0.7923, 0.0820]) tensor([0.0352, 0.1549, 0.5430, 0.2669]) -Greedy action tensor([-0.9922, 0.3314, 0.2734, -0.3898]) tensor([0.0987, 0.3709, 0.3500, 0.1803]) -Greedy action tensor([-1.9661, -0.5072, 1.0698, 0.3892]) tensor([0.0273, 0.1173, 0.5679, 0.2875]) -Greedy action tensor([-1.2873, -0.5886, 0.6924, 0.9352]) tensor([0.0513, 0.1032, 0.3717, 0.4738]) -Greedy action tensor([-1.7836, -0.3699, 0.8334, 0.4538]) tensor([0.0355, 0.1459, 0.4861, 0.3325]) -Greedy action tensor([-1.9183, -0.3284, 0.6312, -0.1665]) tensor([0.0409, 0.2004, 0.5231, 0.2356]) -Greedy action tensor([-1.4499, -0.5522, 0.4075, 0.1646]) tensor([0.0672, 0.1648, 0.4304, 0.3376]) -Greedy action tensor([-0.9137, -0.7191, 0.9012, 1.3203]) tensor([0.0565, 0.0687, 0.3471, 0.5277]) -Greedy action tensor([-1.4039, -0.5661, 1.1609, 1.1188]) tensor([0.0348, 0.0803, 0.4518, 0.4331]) -Greedy action tensor([-0.9323, -0.5756, 0.3520, 0.3157]) tensor([0.1050, 0.1500, 0.3793, 0.3657]) -Greedy action tensor([-0.5012, -0.0391, 1.0336, 1.5650]) tensor([0.0661, 0.1050, 0.3069, 0.5220]) -Greedy action tensor([-1.1561, -0.6137, 0.6529, 1.0808]) tensor([0.0550, 0.0946, 0.3356, 0.5148]) -Greedy action tensor([-1.5554, -0.2750, 0.7006, 0.6058]) tensor([0.0438, 0.1576, 0.4182, 0.3804]) -Greedy action tensor([-1.7234, -0.4756, 0.5939, 0.0997]) tensor([0.0480, 0.1673, 0.4874, 0.2973]) -Greedy action tensor([-1.8305, 0.8491, 0.4459, 0.1416]) tensor([0.0308, 0.4485, 0.2997, 0.2211]) -Greedy action tensor([-1.6755, -0.5085, 0.5221, 0.0086]) tensor([0.0538, 0.1727, 0.4840, 0.2896]) -Greedy action tensor([-1.8640, -0.4264, 0.6281, -0.1215]) tensor([0.0435, 0.1830, 0.5253, 0.2482]) -Greedy action tensor([-1.4402, -0.5025, 0.7079, -0.4084]) tensor([0.0670, 0.1711, 0.5740, 0.1880]) -Greedy action tensor([-0.9992, -0.5591, 0.3254, 0.3906]) tensor([0.0968, 0.1504, 0.3641, 0.3887]) -Greedy action tensor([-1.5099, -0.5026, 0.5867, 0.3386]) tensor([0.0549, 0.1502, 0.4465, 0.3484]) -Greedy action tensor([-1.1739, -0.1525, 0.4764, -0.3953]) tensor([0.0896, 0.2488, 0.4665, 0.1951]) -Greedy action tensor([-0.9845, -0.5201, 1.2104, 1.4757]) tensor([0.0430, 0.0684, 0.3858, 0.5029]) -Greedy action tensor([-1.2064, -0.5992, 0.3527, 0.0432]) tensor([0.0903, 0.1657, 0.4291, 0.3149]) -Greedy action tensor([-1.9263, -0.4484, 0.6549, -0.1686]) tensor([0.0410, 0.1797, 0.5416, 0.2377]) -Greedy action tensor([-1.1421, -0.5718, 0.2256, 0.3631]) tensor([0.0893, 0.1579, 0.3506, 0.4022]) -Greedy action tensor([-1.3545, -0.5451, 0.4936, 0.6301]) tensor([0.0593, 0.1332, 0.3763, 0.4313]) -Greedy action tensor([-1.7448, -0.4498, 0.5647, -0.1044]) tensor([0.0503, 0.1837, 0.5066, 0.2594]) -Greedy action tensor([-2.0089, -0.9601, 0.4867, -0.2467]) tensor([0.0459, 0.1309, 0.5562, 0.2671]) -Greedy action tensor([-1.8845, -0.3723, 0.6153, -0.1434]) tensor([0.0427, 0.1937, 0.5201, 0.2435]) -Greedy action tensor([-1.1413, -0.5814, 0.2330, 0.3330]) tensor([0.0903, 0.1581, 0.3570, 0.3946]) -Greedy action tensor([-1.7305, -0.4605, 0.6308, 0.0665]) tensor([0.0472, 0.1680, 0.5003, 0.2845]) -Greedy action tensor([-1.0193, -0.4691, 0.6555, 0.9580]) tensor([0.0654, 0.1133, 0.3490, 0.4723]) -Greedy action tensor([-1.1661, -0.4554, 0.3926, 0.6598]) tensor([0.0714, 0.1454, 0.3396, 0.4436]) -Greedy action tensor([-1.6692, -0.3709, 0.6311, 0.2850]) tensor([0.0461, 0.1688, 0.4598, 0.3253]) -Greedy action tensor([-0.5228, -0.2977, 1.0651, 1.5028]) tensor([0.0679, 0.0850, 0.3323, 0.5148]) -Greedy action tensor([-1.9273, -0.6674, 0.4868, -0.1381]) tensor([0.0461, 0.1625, 0.5154, 0.2759]) -Greedy action tensor([-1.2358, -0.5102, 0.5713, 0.7917]) tensor([0.0597, 0.1233, 0.3637, 0.4533]) -Greedy action tensor([ 0.3268, -0.9465, -0.4388, 2.1016]) tensor([0.1308, 0.0366, 0.0608, 0.7717]) -Greedy action tensor([ 0.9101, -0.3017, 0.0044, -0.0246]) tensor([0.4774, 0.1421, 0.1930, 0.1875]) -Greedy action tensor([ 1.5945, 1.0123, -0.3061, 0.6283]) tensor([0.4788, 0.2675, 0.0716, 0.1822]) -Greedy action tensor([-0.1151, -0.5577, -1.0300, 0.8834]) tensor([0.2102, 0.1350, 0.0842, 0.5705]) -Greedy action tensor([ 0.2033, 1.0359, 1.8418, -0.6795]) tensor([0.1129, 0.2595, 0.5810, 0.0467]) -Greedy action tensor([ 1.0352, -0.0303, 0.0220, 1.7478]) tensor([0.2669, 0.0920, 0.0969, 0.5443]) -Greedy action tensor([ 0.8137, -1.3375, 0.1999, 0.1091]) tensor([0.4647, 0.0541, 0.2515, 0.2297]) -Greedy action tensor([-0.6883, -0.3260, -0.0794, 0.8814]) tensor([0.1101, 0.1582, 0.2025, 0.5292]) -Greedy action tensor([-0.5015, -0.7608, -0.8890, -0.6281]) tensor([0.3002, 0.2316, 0.2037, 0.2645]) -Greedy action tensor([0.5622, 0.7301, 0.2537, 0.7646]) tensor([0.2414, 0.2856, 0.1774, 0.2956]) -Greedy action tensor([0.9682, 0.0622, 0.4374, 0.1158]) tensor([0.4135, 0.1671, 0.2432, 0.1763]) -Greedy action tensor([ 1.3842, -1.3925, 2.7668, 0.6970]) tensor([0.1802, 0.0112, 0.7180, 0.0906]) -Greedy action tensor([ 0.1508, -1.4253, 1.2824, -0.7697]) tensor([0.2125, 0.0439, 0.6589, 0.0846]) -Greedy action tensor([-0.6982, -0.5177, -0.7088, 0.5047]) tensor([0.1535, 0.1838, 0.1518, 0.5109]) -Greedy action tensor([ 0.1326, -1.2252, 0.7802, 0.4494]) tensor([0.2202, 0.0566, 0.4208, 0.3023]) -Greedy action tensor([ 0.3602, -0.2622, 0.8684, 0.6079]) tensor([0.2232, 0.1198, 0.3710, 0.2860]) -Greedy action tensor([ 0.0485, -0.2104, 0.2282, -0.4096]) tensor([0.2777, 0.2143, 0.3323, 0.1756]) -Greedy action tensor([ 0.8795, -0.5451, 1.7249, 0.9179]) tensor([0.2170, 0.0522, 0.5053, 0.2255]) -Greedy action tensor([-0.3614, -1.1597, -1.4118, 0.9170]) tensor([0.1855, 0.0835, 0.0649, 0.6661]) -Greedy action tensor([-0.2595, -0.2722, -0.5615, 0.1370]) tensor([0.2373, 0.2343, 0.1755, 0.3528]) -Greedy action tensor([ 0.0263, -0.7298, 1.0771, 1.6182]) tensor([0.1082, 0.0508, 0.3094, 0.5316]) -Greedy action tensor([-0.2171, -1.8925, 0.3913, 1.4185]) tensor([0.1226, 0.0230, 0.2253, 0.6292]) -Greedy action tensor([-0.4579, -1.1302, -0.2060, -0.7530]) tensor([0.2824, 0.1442, 0.3633, 0.2102]) -Greedy action tensor([-0.2104, -1.6066, 0.7421, 1.2335]) tensor([0.1238, 0.0306, 0.3209, 0.5246]) -Greedy action tensor([ 0.7700, -0.0510, 1.1591, 0.7319]) tensor([0.2578, 0.1135, 0.3805, 0.2482]) -Greedy action tensor([ 1.2731, -1.1127, 0.1409, 1.0195]) tensor([0.4566, 0.0420, 0.1472, 0.3543]) -Greedy action tensor([0.6115, 0.5442, 0.1936, 0.0307]) tensor([0.3172, 0.2965, 0.2088, 0.1774]) -Greedy action tensor([ 1.2564, -1.0284, -0.6058, 1.2694]) tensor([0.4405, 0.0448, 0.0684, 0.4463]) -Greedy action tensor([1.1177, 0.3427, 1.0503, 1.3122]) tensor([0.2770, 0.1276, 0.2589, 0.3365]) -Greedy action tensor([-0.3167, -0.0713, 0.5421, 0.7483]) tensor([0.1326, 0.1695, 0.3131, 0.3847]) -Greedy action tensor([ 0.4857, -1.5505, 1.1717, 0.6456]) tensor([0.2331, 0.0304, 0.4629, 0.2735]) -Greedy action tensor([ 0.3181, -2.1569, 0.0320, 1.0809]) tensor([0.2513, 0.0211, 0.1888, 0.5388]) -Greedy action tensor([ 0.3270, 0.4462, -0.9688, 0.8615]) tensor([0.2435, 0.2743, 0.0666, 0.4156]) -Greedy action tensor([ 1.7340, -1.1645, 0.3302, 0.6566]) tensor([0.6093, 0.0336, 0.1497, 0.2075]) -Greedy action tensor([0.2389, 0.3940, 0.0425, 1.1154]) tensor([0.1855, 0.2166, 0.1524, 0.4456]) -Greedy action tensor([ 0.5921, 0.3068, -0.4177, 0.3069]) tensor([0.3487, 0.2621, 0.1270, 0.2622]) -Greedy action tensor([0.7323, 0.0407, 0.3105, 0.0322]) tensor([0.3769, 0.1887, 0.2472, 0.1871]) -Greedy action tensor([ 0.8830, -1.8894, 0.1589, 1.2524]) tensor([0.3340, 0.0209, 0.1619, 0.4832]) -Greedy action tensor([ 0.3009, -1.3608, 1.1774, 0.0298]) tensor([0.2296, 0.0436, 0.5517, 0.1751]) -Greedy action tensor([-0.6670, 0.0897, 0.8167, 0.2308]) tensor([0.1001, 0.2133, 0.4411, 0.2455]) -Greedy action tensor([0.6122, 0.0272, 0.1576, 1.3919]) tensor([0.2287, 0.1274, 0.1452, 0.4988]) -Greedy action tensor([ 0.9578, -0.0437, 0.2497, 1.1489]) tensor([0.3257, 0.1196, 0.1604, 0.3943]) -Greedy action tensor([ 1.1118, -0.0242, -0.0135, 0.0234]) tensor([0.5044, 0.1620, 0.1637, 0.1699]) -Greedy action tensor([ 1.0615, -0.7484, -0.2789, 1.7988]) tensor([0.2844, 0.0466, 0.0744, 0.5946]) -Greedy action tensor([0.5796, 0.3066, 0.6627, 0.5392]) tensor([0.2626, 0.1999, 0.2854, 0.2522]) -Greedy action tensor([-0.0390, -0.5633, 0.1482, -0.3366]) tensor([0.2824, 0.1672, 0.3406, 0.2098]) -Greedy action tensor([ 1.1191, -1.6286, 0.3699, 0.7760]) tensor([0.4452, 0.0285, 0.2104, 0.3159]) -Greedy action tensor([ 0.8036, -1.3732, -0.0451, 0.9533]) tensor([0.3700, 0.0420, 0.1583, 0.4297]) -Greedy action tensor([ 1.8865, -0.4715, 0.8303, 1.5957]) tensor([0.4566, 0.0432, 0.1588, 0.3414]) -Greedy action tensor([ 0.2981, -1.1846, 1.0320, 0.8550]) tensor([0.1978, 0.0449, 0.4121, 0.3452]) -Greedy action tensor([ 0.0175, -0.9706, 0.4489, 0.2784]) tensor([0.2375, 0.0884, 0.3657, 0.3084]) -Greedy action tensor([-0.6054, 0.2318, 1.5270, -0.5479]) tensor([0.0781, 0.1804, 0.6588, 0.0827]) -Greedy action tensor([ 0.7001, -0.4835, -0.9471, 1.6911]) tensor([0.2385, 0.0730, 0.0459, 0.6425]) -Greedy action tensor([-0.6729, -0.5935, 0.0364, 0.7006]) tensor([0.1240, 0.1342, 0.2520, 0.4897]) -Greedy action tensor([-0.1595, -0.1413, -0.0066, 0.2441]) tensor([0.2136, 0.2176, 0.2489, 0.3199]) -Greedy action tensor([-0.2619, -1.6296, 0.1635, 1.8420]) tensor([0.0911, 0.0232, 0.1393, 0.7464]) -Greedy action tensor([-0.2608, 0.7380, 0.2853, 0.3141]) tensor([0.1385, 0.3761, 0.2392, 0.2462]) -Greedy action tensor([ 0.4692, -1.3256, -0.0539, 0.2863]) tensor([0.3858, 0.0641, 0.2287, 0.3214]) -Greedy action tensor([ 0.3899, 0.1765, -1.1161, 1.2117]) tensor([0.2323, 0.1877, 0.0515, 0.5285]) -Greedy action tensor([ 1.1452, 0.1789, -0.1488, 0.9322]) tensor([0.4060, 0.1545, 0.1113, 0.3281]) -Greedy action tensor([ 0.5035, -0.5457, -0.2692, 0.4336]) tensor([0.3644, 0.1276, 0.1682, 0.3398]) -Greedy action tensor([ 1.6216, 0.0320, -0.5857, 1.4143]) tensor([0.4702, 0.0959, 0.0517, 0.3822]) -Greedy action tensor([ 1.3391, -0.2209, 1.7912, 0.6967]) tensor([0.3023, 0.0635, 0.4751, 0.1590]) -Greedy action tensor([-0.3255, 0.6508, -0.4788, 0.8957]) tensor([0.1265, 0.3359, 0.1085, 0.4291]) -Greedy action tensor([-0.8637, 0.3286, -0.6343, 1.2636]) tensor([0.0717, 0.2363, 0.0902, 0.6018]) -Greedy action tensor([-0.6403, -1.7559, -0.4156, -0.2340]) tensor([0.2450, 0.0803, 0.3068, 0.3679]) -Greedy action tensor([ 0.2291, 0.4393, 0.8243, -0.1608]) tensor([0.2117, 0.2612, 0.3838, 0.1433]) -Greedy action tensor([ 0.0207, 0.1425, 1.3786, -0.8472]) tensor([0.1553, 0.1755, 0.6040, 0.0652]) -Greedy action tensor([1.8143, 0.1545, 0.1586, 0.1609]) tensor([0.6359, 0.1209, 0.1214, 0.1217]) -Greedy action tensor([0.6848, 0.4825, 0.6394, 0.6250]) tensor([0.2692, 0.2199, 0.2573, 0.2536]) -Greedy action tensor([ 1.1709, -0.4467, 1.0476, 0.8968]) tensor([0.3518, 0.0698, 0.3110, 0.2674]) -Greedy action tensor([-0.3788, 0.4288, -0.4139, 0.0861]) tensor([0.1724, 0.3866, 0.1665, 0.2745]) -Greedy action tensor([-0.4470, -2.2369, -0.1924, 1.0600]) tensor([0.1435, 0.0240, 0.1851, 0.6475]) -Greedy action tensor([ 1.2024, -0.4659, 0.9798, 1.4205]) tensor([0.3093, 0.0583, 0.2476, 0.3847]) -Greedy action tensor([ 1.3256, -0.5602, 0.8301, 0.2454]) tensor([0.4761, 0.0722, 0.2901, 0.1616]) -Greedy action tensor([-1.1685, -1.0157, 1.4324, 0.5167]) tensor([0.0475, 0.0554, 0.6406, 0.2564]) -Greedy action tensor([ 0.3754, -0.6293, -0.2995, 1.5759]) tensor([0.1924, 0.0705, 0.0980, 0.6392]) -Greedy action tensor([ 0.9526, 0.1856, -0.4189, 0.6283]) tensor([0.4096, 0.1902, 0.1039, 0.2962]) -Greedy action tensor([ 1.7453, -0.2531, 1.1299, 1.6246]) tensor([0.3903, 0.0529, 0.2109, 0.3459]) -Greedy action tensor([ 0.7012, 0.7330, -0.1816, 0.9209]) tensor([0.2709, 0.2796, 0.1120, 0.3374]) -Greedy action tensor([ 1.4649, -0.6617, 0.7868, 0.9586]) tensor([0.4485, 0.0535, 0.2277, 0.2703]) -Greedy action tensor([0.6151, 0.3320, 0.8461, 0.1484]) tensor([0.2747, 0.2070, 0.3461, 0.1722]) -Greedy action tensor([ 0.5192, -0.3126, -0.2708, -0.2608]) tensor([0.4260, 0.1854, 0.1933, 0.1953]) -Greedy action tensor([ 0.6967, -0.2511, -0.1168, -0.4037]) tensor([0.4622, 0.1791, 0.2049, 0.1538]) -Greedy action tensor([ 0.6866, -0.4037, 0.2284, -0.5111]) tensor([0.4405, 0.1480, 0.2785, 0.1330]) -Greedy action tensor([ 0.5594, -0.3234, -0.1382, -0.2407]) tensor([0.4236, 0.1752, 0.2109, 0.1903]) -Greedy action tensor([ 0.7186, -0.3714, 0.0258, -0.2054]) tensor([0.4478, 0.1505, 0.2240, 0.1777]) -Greedy action tensor([ 0.8765, -0.6348, 0.0028, -0.6445]) tensor([0.5387, 0.1188, 0.2248, 0.1177]) -Greedy action tensor([ 0.6741, -0.2439, -0.0428, -0.3749]) tensor([0.4468, 0.1784, 0.2182, 0.1565]) -Greedy action tensor([ 0.5471, -0.1336, -0.0483, -0.3435]) tensor([0.4052, 0.2051, 0.2234, 0.1663]) -Greedy action tensor([ 0.3707, 0.0491, -0.1283, -0.2088]) tensor([0.3458, 0.2507, 0.2099, 0.1937]) -Greedy action tensor([ 0.4507, -0.2006, 0.0080, -0.1434]) tensor([0.3682, 0.1920, 0.2365, 0.2033]) -Greedy action tensor([ 0.6704, -0.2953, 0.0143, -0.2783]) tensor([0.4373, 0.1665, 0.2269, 0.1693]) -Greedy action tensor([ 0.6248, -0.2594, 0.0482, -0.3616]) tensor([0.4259, 0.1759, 0.2393, 0.1588]) -Greedy action tensor([ 0.7446, -0.5562, -0.1652, -0.6686]) tensor([0.5213, 0.1420, 0.2099, 0.1269]) -Greedy action tensor([ 0.5693, -0.3132, -0.1251, -0.3849]) tensor([0.4351, 0.1800, 0.2173, 0.1676]) -Greedy action tensor([ 0.7258, -0.2989, -0.0657, -0.3343]) tensor([0.4633, 0.1663, 0.2100, 0.1605]) -Greedy action tensor([ 0.8179, 0.0982, -0.0814, -0.4146]) tensor([0.4576, 0.2228, 0.1862, 0.1334]) -Greedy action tensor([ 0.5479, -0.1560, -0.2466, -0.2633]) tensor([0.4183, 0.2069, 0.1890, 0.1858]) -Greedy action tensor([ 0.6268, -0.5548, -0.0389, -0.3145]) tensor([0.4523, 0.1388, 0.2324, 0.1765]) -Greedy action tensor([ 0.3591, -0.1008, -0.1248, -0.2779]) tensor([0.3602, 0.2274, 0.2220, 0.1905]) -Greedy action tensor([ 0.2505, 0.0727, 0.2123, -0.2857]) tensor([0.2955, 0.2473, 0.2844, 0.1728]) -Greedy action tensor([ 0.5868, -0.2668, 0.1168, -0.3839]) tensor([0.4116, 0.1753, 0.2572, 0.1559]) -Greedy action tensor([ 0.3644, -0.0409, -0.0212, -0.1831]) tensor([0.3418, 0.2279, 0.2325, 0.1977]) -Greedy action tensor([ 0.9799, -0.5207, 0.1231, -0.7302]) tensor([0.5469, 0.1220, 0.2322, 0.0989]) -Greedy action tensor([ 0.9099, -0.5893, -0.0595, -0.4629]) tensor([0.5388, 0.1203, 0.2044, 0.1365]) -Greedy action tensor([ 0.7526, -0.3767, -0.0842, -0.6849]) tensor([0.5015, 0.1621, 0.2172, 0.1191]) -Greedy action tensor([ 0.8951, -0.7792, -0.0678, -0.6697]) tensor([0.5623, 0.1054, 0.2147, 0.1176]) -Greedy action tensor([ 0.8763, -0.3592, -0.0446, -0.3170]) tensor([0.5020, 0.1459, 0.1999, 0.1522]) -Greedy action tensor([ 0.9329, -0.6603, -0.0417, -0.7708]) tensor([0.5673, 0.1153, 0.2141, 0.1033]) -Greedy action tensor([ 0.6806, -0.2703, 0.2436, -0.3947]) tensor([0.4213, 0.1628, 0.2722, 0.1438]) -Greedy action tensor([ 0.5435, -0.3527, 0.0456, -0.4390]) tensor([0.4184, 0.1707, 0.2543, 0.1566]) -Greedy action tensor([ 0.2761, -0.1015, 0.0371, -0.3005]) tensor([0.3295, 0.2259, 0.2595, 0.1851]) -Greedy action tensor([ 0.4257, -0.1871, 0.1038, -0.3469]) tensor([0.3665, 0.1986, 0.2656, 0.1693]) -Greedy action tensor([ 0.6964, -0.3093, 0.0329, -0.2745]) tensor([0.4426, 0.1619, 0.2279, 0.1676]) -Greedy action tensor([ 0.8720, -0.1188, -0.0485, -0.1229]) tensor([0.4674, 0.1736, 0.1862, 0.1728]) -Greedy action tensor([ 0.4122, 0.1191, 0.0055, -0.1798]) tensor([0.3373, 0.2516, 0.2246, 0.1866]) -Greedy action tensor([ 0.6352, -0.6453, 0.2294, -0.7456]) tensor([0.4554, 0.1266, 0.3035, 0.1145]) -Greedy action tensor([ 0.7379, -0.5251, -0.1247, -0.3759]) tensor([0.4918, 0.1391, 0.2076, 0.1615]) -Greedy action tensor([ 0.4176, 0.0102, 0.0453, -0.2411]) tensor([0.3482, 0.2317, 0.2400, 0.1802]) -Greedy action tensor([ 0.5040, -0.3061, -0.0764, -0.1989]) tensor([0.4001, 0.1780, 0.2239, 0.1981]) -Greedy action tensor([ 0.7491, -0.1819, 0.0219, -0.3400]) tensor([0.4517, 0.1780, 0.2183, 0.1520]) -Greedy action tensor([ 0.5140, -0.2832, 0.0006, -0.1507]) tensor([0.3901, 0.1758, 0.2334, 0.2007]) -Greedy action tensor([ 0.8546, -0.8918, -0.0864, -0.5760]) tensor([0.5544, 0.0967, 0.2163, 0.1326]) -Greedy action tensor([ 0.4390, 0.0411, -0.1134, -0.2517]) tensor([0.3638, 0.2444, 0.2094, 0.1824]) -Greedy action tensor([ 0.7566, -0.3687, 0.0204, -0.3472]) tensor([0.4684, 0.1520, 0.2243, 0.1553]) -Greedy action tensor([ 0.7726, -0.0725, -0.0271, -0.2644]) tensor([0.4477, 0.1923, 0.2012, 0.1587]) -Greedy action tensor([ 0.7393, -0.1930, 0.0827, -0.4371]) tensor([0.4503, 0.1773, 0.2335, 0.1389]) -Greedy action tensor([ 0.6515, 0.1803, -0.0046, -0.2267]) tensor([0.3908, 0.2440, 0.2028, 0.1624]) -Greedy action tensor([ 0.3388, 0.0515, -0.0760, -0.1120]) tensor([0.3281, 0.2462, 0.2167, 0.2090]) -Greedy action tensor([ 0.7382, 0.0510, -0.1185, -0.4774]) tensor([0.4496, 0.2262, 0.1909, 0.1333]) -Greedy action tensor([ 0.3314, -0.2956, 0.0534, -0.5427]) tensor([0.3692, 0.1972, 0.2796, 0.1540]) -Greedy action tensor([ 0.4517, 0.0342, 0.0163, -0.0821]) tensor([0.3458, 0.2278, 0.2237, 0.2028]) -Greedy action tensor([ 0.8490, -0.5731, 0.0865, -0.4615]) tensor([0.5057, 0.1220, 0.2359, 0.1364]) -Greedy action tensor([ 0.6355, -0.4813, -0.0821, -0.3384]) tensor([0.4560, 0.1493, 0.2225, 0.1722]) -Greedy action tensor([ 0.7028, -0.4010, 0.1612, -0.5171]) tensor([0.4528, 0.1501, 0.2634, 0.1337]) -Greedy action tensor([ 0.7151, -0.4024, 0.1791, -0.7135]) tensor([0.4647, 0.1520, 0.2719, 0.1114]) -Greedy action tensor([ 0.4538, -0.2159, 0.0136, -0.4318]) tensor([0.3894, 0.1993, 0.2507, 0.1606]) -Greedy action tensor([ 0.5238, -0.2294, -0.0430, -0.2989]) tensor([0.4036, 0.1901, 0.2290, 0.1773]) -Greedy action tensor([ 0.7782, -0.4156, -0.0627, -0.3685]) tensor([0.4873, 0.1477, 0.2102, 0.1548]) -Greedy action tensor([ 0.4783, -0.2146, 0.0927, -0.3329]) tensor([0.3810, 0.1906, 0.2591, 0.1693]) -Greedy action tensor([ 0.6239, -0.4406, -0.0575, -0.5751]) tensor([0.4646, 0.1603, 0.2350, 0.1401]) -Greedy action tensor([ 0.4298, -0.0853, -0.1113, -0.1864]) tensor([0.3677, 0.2197, 0.2140, 0.1986]) -Greedy action tensor([ 0.4959, -0.0287, 0.0106, -0.1997]) tensor([0.3695, 0.2187, 0.2275, 0.1843]) -Greedy action tensor([ 0.3898, -0.0884, -0.0708, -0.2860]) tensor([0.3624, 0.2246, 0.2286, 0.1844]) -Greedy action tensor([ 0.6748, -0.2970, -0.0705, -0.2184]) tensor([0.4420, 0.1673, 0.2098, 0.1809]) -Greedy action tensor([ 0.7468, -0.3791, -0.0213, -0.4460]) tensor([0.4781, 0.1551, 0.2218, 0.1450]) -Greedy action tensor([ 0.5590, -0.3765, 0.1425, -0.5036]) tensor([0.4171, 0.1637, 0.2750, 0.1441]) -Greedy action tensor([ 0.3232, -0.0614, 0.1064, -0.3470]) tensor([0.3336, 0.2271, 0.2686, 0.1707]) -Greedy action tensor([ 0.6666, -0.2712, 0.1472, -0.1445]) tensor([0.4114, 0.1611, 0.2447, 0.1828]) -Greedy action tensor([ 0.9129, -0.7233, -0.1033, -0.4296]) tensor([0.5501, 0.1071, 0.1991, 0.1437]) -Greedy action tensor([ 0.7029, -0.3632, 0.0280, -0.4058]) tensor([0.4580, 0.1577, 0.2332, 0.1511]) -Greedy action tensor([ 0.4632, -0.1637, -0.3283, -0.4665]) tensor([0.4198, 0.2243, 0.1902, 0.1657]) -Greedy action tensor([ 0.4663, -0.2654, -0.0684, -0.5357]) tensor([0.4108, 0.1976, 0.2407, 0.1508]) -Greedy action tensor([ 0.5850, -0.2483, -0.0270, -0.2252]) tensor([0.4129, 0.1795, 0.2239, 0.1837]) -Greedy action tensor([ 0.5630, -0.3038, -0.0833, -0.3042]) tensor([0.4229, 0.1778, 0.2216, 0.1777]) -Greedy action tensor([ 0.7761, -0.7331, -0.0259, -0.3833]) tensor([0.5042, 0.1115, 0.2261, 0.1582]) -Greedy action tensor([ 0.4481, 0.3200, 0.0803, -0.0154]) tensor([0.3124, 0.2748, 0.2163, 0.1965]) -Greedy action tensor([ 0.4032, -0.1437, -0.0701, -0.2855]) tensor([0.3698, 0.2140, 0.2304, 0.1857]) -Greedy action tensor([ 0.6540, -0.3954, -0.2109, -0.5976]) tensor([0.4861, 0.1702, 0.2047, 0.1390]) -Greedy action tensor([ 0.6967, -0.5162, 0.2824, -0.4596]) tensor([0.4400, 0.1308, 0.2908, 0.1384]) -Greedy action tensor([ 0.5893, -0.3765, -0.0436, -0.3310]) tensor([0.4329, 0.1648, 0.2299, 0.1725]) -Greedy action tensor([ 0.2632, 0.0442, -0.0146, -0.3518]) tensor([0.3224, 0.2590, 0.2442, 0.1743]) -Greedy action tensor([ 1.6140, -0.2115, -0.2236, 0.5149]) tensor([0.6048, 0.0974, 0.0963, 0.2015]) -Greedy action tensor([ 1.2075, -0.5167, -0.2487, 0.2037]) tensor([0.5624, 0.1003, 0.1311, 0.2061]) -Greedy action tensor([ 1.3614, -0.6607, -0.2313, 0.0552]) tensor([0.6224, 0.0824, 0.1266, 0.1686]) -Greedy action tensor([ 1.4360, -0.2224, -0.5954, -0.0390]) tensor([0.6450, 0.1228, 0.0846, 0.1476]) -Greedy action tensor([ 1.2062, -0.6918, -0.2452, 0.2918]) tensor([0.5603, 0.0840, 0.1312, 0.2245]) -Greedy action tensor([ 0.9410, -0.4823, -0.1167, 0.3527]) tensor([0.4665, 0.1124, 0.1620, 0.2591]) -Greedy action tensor([ 1.1688, -0.3284, -0.1839, 0.1860]) tensor([0.5386, 0.1205, 0.1393, 0.2016]) -Greedy action tensor([ 1.1069, -0.6055, -0.3837, 0.5632]) tensor([0.5035, 0.0908, 0.1134, 0.2923]) -Greedy action tensor([ 0.6718, -0.2409, -0.3481, 0.2042]) tensor([0.4187, 0.1681, 0.1510, 0.2623]) -Greedy action tensor([ 1.2030, -0.4262, -0.4327, 0.2211]) tensor([0.5664, 0.1111, 0.1103, 0.2122]) -Greedy action tensor([ 0.7843, -0.0206, -0.1465, 0.1179]) tensor([0.4247, 0.1899, 0.1674, 0.2181]) -Greedy action tensor([ 0.4776, -0.2855, 0.0443, -0.0494]) tensor([0.3697, 0.1724, 0.2397, 0.2182]) -Greedy action tensor([ 0.7607, -0.5616, -0.3113, 0.5801]) tensor([0.4092, 0.1091, 0.1401, 0.3416]) -Greedy action tensor([ 1.3483, -0.3500, -0.3165, 0.3437]) tensor([0.5752, 0.1053, 0.1089, 0.2106]) -Greedy action tensor([ 1.3163, -0.7407, -0.4667, 0.7320]) tensor([0.5395, 0.0690, 0.0907, 0.3008]) -Greedy action tensor([ 1.4202, -0.4845, 0.0655, 0.2756]) tensor([0.5796, 0.0863, 0.1496, 0.1845]) -Greedy action tensor([ 1.6699, -0.3817, -0.4127, 0.2682]) tensor([0.6670, 0.0857, 0.0831, 0.1642]) -Greedy action tensor([ 0.7423, -0.4658, -0.2822, 0.1696]) tensor([0.4501, 0.1345, 0.1616, 0.2539]) -Greedy action tensor([ 1.4416, -0.4671, -0.4168, 0.1744]) tensor([0.6306, 0.0935, 0.0983, 0.1776]) -Greedy action tensor([ 1.1816, -0.4900, -0.1335, 0.1487]) tensor([0.5518, 0.1037, 0.1481, 0.1964]) -Greedy action tensor([ 1.0506, 0.0624, -0.0166, -0.2159]) tensor([0.5005, 0.1863, 0.1721, 0.1410]) -Greedy action tensor([ 1.4841, -0.0441, -0.3415, 0.2129]) tensor([0.6030, 0.1308, 0.0971, 0.1691]) -Greedy action tensor([ 1.3080, -0.6503, -0.2129, 0.4772]) tensor([0.5570, 0.0786, 0.1217, 0.2427]) -Greedy action tensor([ 0.5463, -0.1453, -0.4586, 0.3025]) tensor([0.3773, 0.1889, 0.1381, 0.2957]) -Greedy action tensor([ 0.5958, -0.5696, -0.2448, 0.2309]) tensor([0.4102, 0.1279, 0.1770, 0.2848]) -Greedy action tensor([ 0.7782, -0.2674, -0.0399, 0.1114]) tensor([0.4336, 0.1524, 0.1913, 0.2226]) -Greedy action tensor([ 0.8124, -0.1166, -0.1145, 0.1436]) tensor([0.4342, 0.1715, 0.1718, 0.2224]) -Greedy action tensor([0.8238, 0.0566, 0.0922, 0.0678]) tensor([0.4141, 0.1923, 0.1992, 0.1944]) -Greedy action tensor([ 1.2628, -0.2389, -0.2700, 0.0333]) tensor([0.5777, 0.1287, 0.1247, 0.1689]) -Greedy action tensor([ 1.0744, -0.7388, -0.1435, -0.1412]) tensor([0.5696, 0.0929, 0.1685, 0.1689]) -Greedy action tensor([ 1.3657, 0.0256, -0.0241, 0.0470]) tensor([0.5623, 0.1472, 0.1401, 0.1504]) -Greedy action tensor([ 1.2339, -0.5732, -0.1610, 0.3100]) tensor([0.5528, 0.0907, 0.1370, 0.2194]) -Greedy action tensor([ 0.8969, -0.5785, -0.5210, 0.0850]) tensor([0.5222, 0.1194, 0.1265, 0.2319]) -Greedy action tensor([ 1.0009, -0.2346, -0.3591, 0.1545]) tensor([0.5060, 0.1471, 0.1299, 0.2170]) -Greedy action tensor([ 1.0608, 0.1642, -0.0420, -0.1107]) tensor([0.4879, 0.1990, 0.1619, 0.1512]) -Greedy action tensor([ 0.9616, -0.2316, -0.3409, 0.4533]) tensor([0.4594, 0.1393, 0.1249, 0.2764]) -Greedy action tensor([ 1.1874, -0.6156, -0.1549, 0.3654]) tensor([0.5360, 0.0883, 0.1400, 0.2356]) -Greedy action tensor([ 1.2616, -0.4007, -0.3128, 0.4396]) tensor([0.5445, 0.1033, 0.1128, 0.2394]) -Greedy action tensor([ 0.5740, -0.2623, 0.1619, -0.1891]) tensor([0.3904, 0.1691, 0.2585, 0.1820]) -Greedy action tensor([ 0.8502, -0.3966, -0.1507, 0.0356]) tensor([0.4767, 0.1370, 0.1752, 0.2111]) -Greedy action tensor([ 0.1437, -0.0191, -0.4539, -0.0824]) tensor([0.3127, 0.2658, 0.1721, 0.2495]) -Greedy action tensor([ 0.5014, 0.0144, 0.0526, -0.1542]) tensor([0.3608, 0.2217, 0.2303, 0.1873]) -Greedy action tensor([ 1.0198, -0.5681, -0.2046, 0.6277]) tensor([0.4600, 0.0940, 0.1352, 0.3108]) -Greedy action tensor([ 1.8654, -0.8695, -0.3887, 0.3987]) tensor([0.7140, 0.0463, 0.0749, 0.1647]) -Greedy action tensor([ 1.2514, -0.7301, -0.3434, -0.0457]) tensor([0.6195, 0.0854, 0.1257, 0.1693]) -Greedy action tensor([ 0.8949, -0.6063, -0.1824, 0.1716]) tensor([0.4882, 0.1088, 0.1662, 0.2368]) -Greedy action tensor([ 1.1325, -0.3266, -0.1802, -0.0122]) tensor([0.5495, 0.1277, 0.1479, 0.1749]) -Greedy action tensor([ 1.0230, -0.1781, -0.1753, 0.1148]) tensor([0.4985, 0.1500, 0.1504, 0.2010]) -Greedy action tensor([ 0.2025, -0.1169, -0.1730, 0.0161]) tensor([0.3083, 0.2240, 0.2118, 0.2559]) -Greedy action tensor([ 1.2408, 0.0546, -0.0825, 0.0985]) tensor([0.5289, 0.1615, 0.1408, 0.1688]) -Greedy action tensor([ 0.9446, -0.6718, -0.1364, 0.3603]) tensor([0.4772, 0.0948, 0.1619, 0.2661]) -Greedy action tensor([ 0.7685, -0.0246, -0.4011, 0.2789]) tensor([0.4209, 0.1904, 0.1307, 0.2580]) -Greedy action tensor([ 1.6453, -0.5666, -0.5938, 0.0720]) tensor([0.7025, 0.0769, 0.0749, 0.1457]) -Greedy action tensor([ 0.8414, -0.1748, -0.1756, -0.0630]) tensor([0.4698, 0.1701, 0.1699, 0.1902]) -Greedy action tensor([ 1.1632, -0.6917, -0.3565, 0.6695]) tensor([0.5036, 0.0788, 0.1102, 0.3074]) -Greedy action tensor([ 1.5122, -0.5449, -0.3639, -0.1052]) tensor([0.6759, 0.0864, 0.1036, 0.1341]) -Greedy action tensor([ 1.6748, 0.1440, -0.3591, 0.3553]) tensor([0.6194, 0.1340, 0.0810, 0.1655]) -Greedy action tensor([ 1.3236, -0.3489, 0.0919, 0.0499]) tensor([0.5684, 0.1067, 0.1659, 0.1590]) -Greedy action tensor([ 1.1622, -0.1136, -0.2801, 0.0077]) tensor([0.5462, 0.1525, 0.1291, 0.1722]) -Greedy action tensor([ 1.5479e+00, -5.1047e-01, -3.0764e-01, 3.4207e-04]) tensor([0.6681, 0.0853, 0.1045, 0.1421]) -Greedy action tensor([ 0.8959, -0.2707, 0.0105, -0.0466]) tensor([0.4731, 0.1473, 0.1952, 0.1843]) -Greedy action tensor([ 1.3402, -0.6499, -0.1745, 0.5134]) tensor([0.5574, 0.0762, 0.1226, 0.2438]) -Greedy action tensor([ 0.8359, -0.4391, -0.2224, 0.0383]) tensor([0.4815, 0.1345, 0.1671, 0.2169]) -Greedy action tensor([ 0.7104, -0.3229, -0.0116, 0.1570]) tensor([0.4138, 0.1472, 0.2010, 0.2379]) -Greedy action tensor([ 1.6276, -0.3764, -0.2939, 0.1045]) tensor([0.6670, 0.0899, 0.0976, 0.1454]) -Greedy action tensor([ 0.6645, -0.5597, -0.0135, 0.0792]) tensor([0.4240, 0.1247, 0.2152, 0.2361]) -Greedy action tensor([ 1.1652, -0.5539, -0.2194, 0.5488]) tensor([0.5077, 0.0910, 0.1271, 0.2741]) -Greedy action tensor([ 1.1976, -0.2433, -0.2785, 0.1308]) tensor([0.5527, 0.1308, 0.1263, 0.1902]) -Greedy action tensor([ 1.1422, -0.3219, -0.3983, 0.4042]) tensor([0.5198, 0.1202, 0.1114, 0.2485]) -Greedy action tensor([ 1.5829, -0.6790, -0.3295, 0.3240]) tensor([0.6511, 0.0678, 0.0962, 0.1849]) -Greedy action tensor([ 1.1777, -0.7164, -0.5610, 0.6390]) tensor([0.5236, 0.0788, 0.0920, 0.3055]) -Greedy action tensor([ 1.0176, -0.3961, -0.3080, 0.4217]) tensor([0.4854, 0.1181, 0.1290, 0.2675]) -Greedy action tensor([ 1.5074, -0.7304, -0.0118, 0.1878]) tensor([0.6278, 0.0670, 0.1374, 0.1678]) -Greedy action tensor([ 1.1338, -0.4181, -0.3476, 0.0248]) tensor([0.5653, 0.1197, 0.1285, 0.1865]) -Greedy action tensor([ 1.2127, -0.2589, -0.1952, 0.3278]) tensor([0.5300, 0.1216, 0.1297, 0.2187]) -Greedy action tensor([ 1.2976, -0.5276, -0.5680, 0.4935]) tensor([0.5671, 0.0914, 0.0878, 0.2538]) -Greedy action tensor([ 1.2948, -0.1730, -0.4957, -0.0049]) tensor([0.5988, 0.1380, 0.0999, 0.1633]) -Greedy action tensor([ 0.5059, -0.3759, -0.2129, 0.1987]) tensor([0.3792, 0.1570, 0.1848, 0.2789]) -Greedy action tensor([ 1.4360, -0.7208, 0.0833, -0.0031]) tensor([0.6206, 0.0718, 0.1604, 0.1472]) -Greedy action tensor([ 1.1927, -0.4295, -0.1348, 0.2533]) tensor([0.5395, 0.1065, 0.1430, 0.2109]) -Greedy action tensor([ 1.7526, -0.4805, -0.0211, 0.0687]) tensor([0.6837, 0.0733, 0.1160, 0.1269]) -Greedy action tensor([-1.7609, -0.5322, 0.6884, 0.0125]) tensor([0.0457, 0.1561, 0.5291, 0.2691]) -Greedy action tensor([-1.6600, -0.7067, 0.1259, -0.4231]) tensor([0.0769, 0.1995, 0.4587, 0.2649]) -Greedy action tensor([-1.7113, -0.5034, 0.5327, -0.0113]) tensor([0.0519, 0.1738, 0.4899, 0.2843]) -Greedy action tensor([-0.7931, -0.5437, 0.2637, -0.0940]) tensor([0.1394, 0.1789, 0.4012, 0.2805]) -Greedy action tensor([-1.7285, -0.5687, 1.3757, 0.9615]) tensor([0.0243, 0.0774, 0.5409, 0.3575]) -Greedy action tensor([-0.7851, -0.5075, 0.1977, 0.1348]) tensor([0.1333, 0.1760, 0.3562, 0.3345]) -Greedy action tensor([-1.1412, -0.4914, 0.3803, 0.6409]) tensor([0.0744, 0.1425, 0.3408, 0.4423]) -Greedy action tensor([-1.6486, -0.5315, 0.5278, -0.0847]) tensor([0.0567, 0.1732, 0.4995, 0.2707]) -Greedy action tensor([-1.8552, -0.4674, 0.6248, -0.1257]) tensor([0.0443, 0.1774, 0.5287, 0.2496]) -Greedy action tensor([-1.5279, -0.5211, 0.5935, 0.2843]) tensor([0.0549, 0.1503, 0.4583, 0.3364]) -Greedy action tensor([-1.2445, -0.5289, 0.6207, 0.9863]) tensor([0.0532, 0.1087, 0.3433, 0.4948]) -Greedy action tensor([-1.1509, 0.2712, -0.2628, -0.1325]) tensor([0.0967, 0.4007, 0.2349, 0.2677]) -Greedy action tensor([-1.1213, -0.6314, 0.2864, 0.2104]) tensor([0.0952, 0.1553, 0.3890, 0.3605]) -Greedy action tensor([-1.7363, -0.5086, 0.5768, -0.0315]) tensor([0.0499, 0.1705, 0.5048, 0.2747]) -Greedy action tensor([-1.4094, -0.3145, 0.8314, 0.9612]) tensor([0.0415, 0.1241, 0.3902, 0.4442]) -Greedy action tensor([-0.6812, 0.2800, 0.9431, 1.4816]) tensor([0.0575, 0.1504, 0.2919, 0.5002]) -Greedy action tensor([-1.8589, -0.4806, 0.6386, -0.1064]) tensor([0.0437, 0.1734, 0.5309, 0.2520]) -Greedy action tensor([-1.5408, -0.3323, 0.7384, -0.6518]) tensor([0.0604, 0.2023, 0.5903, 0.1470]) -Greedy action tensor([-1.7056, -0.4984, 0.6448, 0.1213]) tensor([0.0475, 0.1589, 0.4984, 0.2953]) -Greedy action tensor([-1.7017, -0.5974, 0.9214, 0.2079]) tensor([0.0407, 0.1229, 0.5613, 0.2750]) -Greedy action tensor([-1.6036, -0.4069, 0.5228, 0.1402]) tensor([0.0543, 0.1797, 0.4554, 0.3106]) -Greedy action tensor([-1.3703, 0.1923, 0.2234, 0.2200]) tensor([0.0641, 0.3059, 0.3155, 0.3145]) -Greedy action tensor([-1.8530, -0.3950, 0.6072, -0.1064]) tensor([0.0440, 0.1890, 0.5148, 0.2522]) -Greedy action tensor([-1.9242, -0.4557, 0.6557, -0.1674]) tensor([0.0411, 0.1785, 0.5423, 0.2381]) -Greedy action tensor([-1.6383, -0.1967, 0.4329, 0.0274]) tensor([0.0542, 0.2291, 0.4300, 0.2867]) -Greedy action tensor([-1.3012, 0.3106, 0.6294, -0.7884]) tensor([0.0686, 0.3439, 0.4730, 0.1146]) -Greedy action tensor([-1.7908, -0.6104, 0.6144, -0.0226]) tensor([0.0472, 0.1536, 0.5228, 0.2765]) -Greedy action tensor([-1.3679, -0.5485, 0.3880, 0.0747]) tensor([0.0752, 0.1708, 0.4356, 0.3184]) -Greedy action tensor([-1.9155, -0.4470, 0.6537, -0.1625]) tensor([0.0414, 0.1797, 0.5402, 0.2388]) -Greedy action tensor([-1.3096, -0.6953, 0.1979, 0.1398]) tensor([0.0860, 0.1590, 0.3885, 0.3665]) -Greedy action tensor([-0.9465, 0.0116, 0.6244, 1.0433]) tensor([0.0636, 0.1657, 0.3058, 0.4649]) -Greedy action tensor([-1.0461, -0.6439, 0.4745, 0.7341]) tensor([0.0769, 0.1150, 0.3519, 0.4562]) -Greedy action tensor([-1.8125, -0.5649, 1.0752, 0.4312]) tensor([0.0314, 0.1093, 0.5634, 0.2959]) -Greedy action tensor([-1.8979, -0.4496, 0.6418, -0.1581]) tensor([0.0423, 0.1801, 0.5365, 0.2411]) -Greedy action tensor([-1.9038, -0.5168, 1.0836, 0.5039]) tensor([0.0278, 0.1114, 0.5518, 0.3090]) -Greedy action tensor([-1.9673, -0.4893, 0.9492, 0.2196]) tensor([0.0305, 0.1338, 0.5639, 0.2718]) -Greedy action tensor([-1.3450, -0.5635, 0.3568, 0.2631]) tensor([0.0732, 0.1599, 0.4014, 0.3655]) -Greedy action tensor([-1.6625, -0.4696, 0.5751, 0.0769]) tensor([0.0516, 0.1703, 0.4840, 0.2941]) -Greedy action tensor([-1.0295, -0.5564, 0.2115, 0.5131]) tensor([0.0931, 0.1494, 0.3220, 0.4354]) -Greedy action tensor([-1.4177, -0.5471, 0.3946, 0.2702]) tensor([0.0670, 0.1601, 0.4105, 0.3624]) -Greedy action tensor([-1.7169, -0.9921, -0.0037, -0.6546]) tensor([0.0869, 0.1794, 0.4822, 0.2515]) -Greedy action tensor([-0.9684, -0.3155, 1.0520, 1.3782]) tensor([0.0478, 0.0919, 0.3606, 0.4997]) -Greedy action tensor([-1.0002, -0.6114, 0.2020, 0.3397]) tensor([0.1039, 0.1533, 0.3458, 0.3969]) -Greedy action tensor([-1.9517, -0.5856, 0.9106, 0.0173]) tensor([0.0338, 0.1325, 0.5916, 0.2421]) -Greedy action tensor([-0.3568, -0.2084, 1.0423, 1.5317]) tensor([0.0780, 0.0905, 0.3160, 0.5155]) -Greedy action tensor([-1.9097, -0.4246, 0.6656, -0.1414]) tensor([0.0410, 0.1809, 0.5381, 0.2401]) -Greedy action tensor([-1.1800, -0.4787, 0.4122, 0.7679]) tensor([0.0669, 0.1349, 0.3289, 0.4693]) -Greedy action tensor([-1.8027, -0.2499, 0.5689, -0.0475]) tensor([0.0450, 0.2126, 0.4821, 0.2603]) -Greedy action tensor([-1.7443, -0.5078, 0.5765, -0.0210]) tensor([0.0494, 0.1702, 0.5034, 0.2770]) -Greedy action tensor([-1.2369, -0.5369, 0.4707, 0.7778]) tensor([0.0624, 0.1256, 0.3441, 0.4678]) -Greedy action tensor([-1.8934, -0.3267, 0.6168, -0.1581]) tensor([0.0421, 0.2016, 0.5178, 0.2386]) -Greedy action tensor([-1.2040, -0.5851, 0.2659, 0.3614]) tensor([0.0834, 0.1549, 0.3627, 0.3990]) -Greedy action tensor([-1.8318, -0.4451, 0.6158, -0.1161]) tensor([0.0452, 0.1809, 0.5226, 0.2514]) -Greedy action tensor([-1.5757, -0.6586, 0.9634, 0.8458]) tensor([0.0365, 0.0912, 0.4618, 0.4105]) -Greedy action tensor([-0.9704, -0.5015, 0.5364, -0.4430]) tensor([0.1136, 0.1815, 0.5125, 0.1924]) -Greedy action tensor([-1.8034, -0.4897, 0.8113, 0.2185]) tensor([0.0386, 0.1434, 0.5268, 0.2912]) -Greedy action tensor([-1.7798, -0.4134, 0.5701, -0.0947]) tensor([0.0481, 0.1885, 0.5041, 0.2593]) -Greedy action tensor([-1.8545, -0.4388, 0.6816, -0.0402]) tensor([0.0419, 0.1725, 0.5288, 0.2569]) -Greedy action tensor([-1.5831, -0.5728, 0.1482, -0.5017]) tensor([0.0810, 0.2225, 0.4576, 0.2389]) -Greedy action tensor([-1.8606, -0.8152, 0.0226, -0.4338]) tensor([0.0686, 0.1950, 0.4508, 0.2856]) -Greedy action tensor([-1.9075, -0.4428, 0.6471, -0.1609]) tensor([0.0418, 0.1808, 0.5377, 0.2397]) -Greedy action tensor([-1.2790, -0.8063, -0.0774, -0.3825]) tensor([0.1193, 0.1914, 0.3968, 0.2925]) -Greedy action tensor([-1.2847, -0.5943, 0.3704, 0.4758]) tensor([0.0712, 0.1420, 0.3727, 0.4141]) -Greedy action tensor([-1.8037, -0.4485, 0.6279, 0.0028]) tensor([0.0448, 0.1735, 0.5092, 0.2725]) -Greedy action tensor([-1.7783, -0.4932, 0.5946, -0.1149]) tensor([0.0485, 0.1753, 0.5203, 0.2559]) -Greedy action tensor([-1.1637, -0.5621, 0.2750, 0.3456]) tensor([0.0865, 0.1578, 0.3645, 0.3912]) -Greedy action tensor([-1.7681, -0.5251, 0.7005, 0.0631]) tensor([0.0444, 0.1540, 0.5244, 0.2772]) -Greedy action tensor([-1.3617, -0.5702, 0.3602, 0.1980]) tensor([0.0738, 0.1627, 0.4126, 0.3509]) -Greedy action tensor([-0.3319, -0.4113, 0.1121, 0.4175]) tensor([0.1786, 0.1650, 0.2785, 0.3779]) -Greedy action tensor([-1.5999, -0.4578, 0.6252, 0.2625]) tensor([0.0504, 0.1580, 0.4667, 0.3248]) -Greedy action tensor([-1.8863, -0.4204, 0.6305, -0.1477]) tensor([0.0427, 0.1850, 0.5292, 0.2430]) -Greedy action tensor([-0.8945, -0.5822, 0.4280, 0.4179]) tensor([0.1017, 0.1390, 0.3816, 0.3778]) -Greedy action tensor([-0.1617, 1.1429, 0.0512, 0.1307]) tensor([0.1377, 0.5075, 0.1703, 0.1844]) -Greedy action tensor([-1.9275, -0.9244, 0.1349, -0.3028]) tensor([0.0600, 0.1636, 0.4718, 0.3046]) -Greedy action tensor([-1.9843, -0.7158, 0.9685, 0.1227]) tensor([0.0313, 0.1113, 0.5999, 0.2575]) -Greedy action tensor([-0.3543, -0.4756, 0.3016, 0.0736]) tensor([0.1870, 0.1657, 0.3604, 0.2869]) -Greedy action tensor([-1.4663, -1.1029, 0.7414, 1.0409]) tensor([0.0420, 0.0604, 0.3821, 0.5155]) -Greedy action tensor([-1.2338, 0.9763, 0.4895, 0.1747]) tensor([0.0505, 0.4602, 0.2829, 0.2065]) -Greedy action tensor([-1.4976, -0.4537, 1.0569, 1.0786]) tensor([0.0335, 0.0951, 0.4310, 0.4404]) -Greedy action tensor([-1.7051, -0.4907, 0.6212, 0.0925]) tensor([0.0484, 0.1632, 0.4960, 0.2924]) -Greedy action tensor([-1.3059, -0.5854, 0.3677, 0.2421]) tensor([0.0764, 0.1570, 0.4073, 0.3593]) -Greedy action tensor([ 0.4268, 0.9810, -0.0436, 0.7464]) tensor([0.2109, 0.3671, 0.1318, 0.2903]) -Greedy action tensor([0.3455, 0.1927, 0.0152, 0.5026]) tensor([0.2669, 0.2291, 0.1918, 0.3123]) -Greedy action tensor([ 0.2706, 0.8724, -0.0609, 0.6750]) tensor([0.1984, 0.3621, 0.1424, 0.2972]) -Greedy action tensor([ 0.0246, -0.1153, -0.1888, 0.9148]) tensor([0.1956, 0.1700, 0.1580, 0.4764]) -Greedy action tensor([ 0.4476, 1.3944, 0.0606, -0.2880]) tensor([0.2111, 0.5443, 0.1434, 0.1012]) -Greedy action tensor([ 1.5641, -0.1311, 1.9740, 0.6747]) tensor([0.3225, 0.0592, 0.4858, 0.1325]) -Greedy action tensor([-0.8910, 0.1063, -0.2243, -1.0081]) tensor([0.1527, 0.4140, 0.2974, 0.1358]) -Greedy action tensor([ 0.7834, -0.1061, 1.2969, 1.1488]) tensor([0.2211, 0.0908, 0.3695, 0.3186]) -Greedy action tensor([-0.0895, 0.3351, 0.3828, -0.0537]) tensor([0.1935, 0.2958, 0.3103, 0.2005]) -Greedy action tensor([-0.1422, -0.5007, 0.2829, 0.8990]) tensor([0.1650, 0.1153, 0.2524, 0.4673]) -Greedy action tensor([ 1.5494, -2.0506, 0.0782, 1.2799]) tensor([0.4949, 0.0135, 0.1136, 0.3780]) -Greedy action tensor([ 0.3103, -0.5375, -0.0850, 0.1698]) tensor([0.3366, 0.1442, 0.2267, 0.2925]) -Greedy action tensor([0.9896, 0.3630, 0.6642, 0.2954]) tensor([0.3628, 0.1939, 0.2620, 0.1812]) -Greedy action tensor([-0.5080, -1.2678, -0.6378, -0.2968]) tensor([0.2792, 0.1306, 0.2453, 0.3449]) -Greedy action tensor([ 1.2342, -1.6947, 0.5079, 1.1325]) tensor([0.4098, 0.0219, 0.1982, 0.3701]) -Greedy action tensor([ 1.7188, -0.4402, 1.0036, 1.1311]) tensor([0.4629, 0.0534, 0.2264, 0.2572]) -Greedy action tensor([-0.1259, 0.0112, -0.5626, 1.1277]) tensor([0.1588, 0.1822, 0.1026, 0.5564]) -Greedy action tensor([-0.7043, -0.4253, -1.1211, 0.1422]) tensor([0.1882, 0.2488, 0.1241, 0.4389]) -Greedy action tensor([ 0.6075, -0.8531, -0.5404, 0.7496]) tensor([0.3701, 0.0859, 0.1174, 0.4266]) -Greedy action tensor([-0.5343, -1.0660, -1.1389, 0.6397]) tensor([0.1863, 0.1094, 0.1018, 0.6025]) -Greedy action tensor([ 0.5616, -1.1097, 0.0789, -0.0827]) tensor([0.4291, 0.0807, 0.2648, 0.2253]) -Greedy action tensor([ 0.4305, -1.0390, 2.3035, 0.3596]) tensor([0.1153, 0.0265, 0.7507, 0.1075]) -Greedy action tensor([ 1.2189, -0.2132, -0.6434, 1.5910]) tensor([0.3515, 0.0839, 0.0546, 0.5100]) -Greedy action tensor([ 1.2950, -0.5655, -0.4192, 0.1130]) tensor([0.6089, 0.0947, 0.1097, 0.1867]) -Greedy action tensor([ 1.2611, -0.2827, -0.2994, 0.1924]) tensor([0.5659, 0.1209, 0.1189, 0.1944]) -Greedy action tensor([ 0.1656, 0.1616, -0.8394, 1.3207]) tensor([0.1806, 0.1799, 0.0661, 0.5734]) -Greedy action tensor([ 0.4434, -0.5218, 1.0973, 0.4605]) tensor([0.2314, 0.0881, 0.4450, 0.2354]) -Greedy action tensor([-1.9516, -0.8009, -0.6379, -0.5036]) tensor([0.0824, 0.2604, 0.3065, 0.3506]) -Greedy action tensor([-0.1133, -0.4298, 0.0784, 0.3952]) tensor([0.2173, 0.1583, 0.2632, 0.3613]) -Greedy action tensor([2.2950, 0.3434, 0.6043, 0.7298]) tensor([0.6513, 0.0925, 0.1201, 0.1361]) -Greedy action tensor([ 1.3289, -0.5642, 1.5815, 1.3866]) tensor([0.2859, 0.0431, 0.3681, 0.3029]) -Greedy action tensor([ 0.5925, -0.3368, 1.2204, 0.7807]) tensor([0.2234, 0.0882, 0.4186, 0.2697]) -Greedy action tensor([1.3126, 0.0780, 1.1684, 0.4971]) tensor([0.3847, 0.1119, 0.3331, 0.1702]) -Greedy action tensor([ 0.4386, -0.5834, 0.9044, 0.6400]) tensor([0.2394, 0.0862, 0.3815, 0.2929]) -Greedy action tensor([ 1.7336, -0.0372, 0.5115, 1.6781]) tensor([0.4148, 0.0706, 0.1222, 0.3924]) -Greedy action tensor([-0.5912, 0.2580, 1.2244, -0.7368]) tensor([0.0966, 0.2259, 0.5939, 0.0835]) -Greedy action tensor([ 0.2723, -1.6449, -0.4233, 0.5389]) tensor([0.3388, 0.0498, 0.1690, 0.4424]) -Greedy action tensor([ 1.4391, -1.5815, 1.7895, 0.5157]) tensor([0.3490, 0.0170, 0.4954, 0.1386]) -Greedy action tensor([ 0.5695, 0.8369, -0.1529, 0.5211]) tensor([0.2670, 0.3489, 0.1297, 0.2544]) -Greedy action tensor([ 0.6648, -0.1779, 0.7579, 1.0359]) tensor([0.2514, 0.1082, 0.2759, 0.3644]) -Greedy action tensor([ 0.2900, -0.3867, 0.2371, -0.7146]) tensor([0.3542, 0.1801, 0.3360, 0.1297]) -Greedy action tensor([-0.0878, -1.0979, -0.3975, 0.0089]) tensor([0.3126, 0.1138, 0.2293, 0.3443]) -Greedy action tensor([ 0.8702, -1.2911, 0.8628, -0.1443]) tensor([0.4048, 0.0466, 0.4018, 0.1468]) -Greedy action tensor([ 1.4215, -1.0509, 0.1684, 1.4739]) tensor([0.4126, 0.0348, 0.1178, 0.4348]) -Greedy action tensor([ 0.2600, -0.1663, -0.3358, -0.1296]) tensor([0.3470, 0.2266, 0.1913, 0.2351]) -Greedy action tensor([-0.5847, -3.2524, 0.0084, 0.3622]) tensor([0.1833, 0.0127, 0.3316, 0.4724]) -Greedy action tensor([ 0.9801, -0.4704, 1.3058, 0.6645]) tensor([0.2986, 0.0700, 0.4136, 0.2178]) -Greedy action tensor([ 0.9292, -0.1785, 1.2017, 0.6144]) tensor([0.2964, 0.0979, 0.3893, 0.2164]) -Greedy action tensor([1.8440, 1.6164, 0.9176, 1.0548]) tensor([0.3778, 0.3009, 0.1496, 0.1716]) -Greedy action tensor([ 0.5050, -0.2290, -0.6577, 0.2104]) tensor([0.3941, 0.1892, 0.1232, 0.2935]) -Greedy action tensor([ 0.3546, -0.2175, 1.0888, 0.4741]) tensor([0.2094, 0.1182, 0.4364, 0.2360]) -Greedy action tensor([ 0.6931, -0.1673, -0.3079, -0.3290]) tensor([0.4650, 0.1967, 0.1709, 0.1673]) -Greedy action tensor([-1.2596, -0.0627, -0.2620, 0.1071]) tensor([0.0914, 0.3024, 0.2478, 0.3584]) -Greedy action tensor([ 1.8010, -0.9405, 0.5747, 1.3926]) tensor([0.4944, 0.0319, 0.1450, 0.3286]) -Greedy action tensor([-0.0035, -0.4587, -0.0980, 0.0099]) tensor([0.2811, 0.1783, 0.2557, 0.2849]) -Greedy action tensor([ 0.5035, -1.0419, 0.8937, 1.2041]) tensor([0.2125, 0.0453, 0.3139, 0.4282]) -Greedy action tensor([-0.2499, 0.8644, 0.1318, 0.7199]) tensor([0.1227, 0.3739, 0.1797, 0.3236]) -Greedy action tensor([ 0.7701, -1.3056, 1.7410, 0.6561]) tensor([0.2147, 0.0269, 0.5668, 0.1915]) -Greedy action tensor([ 0.2896, -1.1461, -0.7747, 1.1929]) tensor([0.2469, 0.0587, 0.0852, 0.6092]) -Greedy action tensor([ 0.1906, 0.1810, -0.3367, 1.2338]) tensor([0.1845, 0.1828, 0.1089, 0.5238]) -Greedy action tensor([ 0.6241, 1.4226, -0.7708, 0.5475]) tensor([0.2275, 0.5055, 0.0564, 0.2107]) -Greedy action tensor([ 1.9415, 0.1191, 1.4438, -0.2097]) tensor([0.5302, 0.0857, 0.3224, 0.0617]) -Greedy action tensor([ 0.8903, 0.3975, -0.2590, -0.0413]) tensor([0.4307, 0.2631, 0.1365, 0.1697]) -Greedy action tensor([-0.5998, 0.2182, 0.2852, -0.3045]) tensor([0.1422, 0.3222, 0.3445, 0.1910]) -Greedy action tensor([ 0.6649, -0.0232, 0.8006, 0.8509]) tensor([0.2596, 0.1305, 0.2973, 0.3127]) -Greedy action tensor([-0.2943, -1.9790, -0.2487, 0.8434]) tensor([0.1869, 0.0347, 0.1956, 0.5829]) -Greedy action tensor([-0.0012, 0.2669, 1.2098, 0.7971]) tensor([0.1268, 0.1658, 0.4257, 0.2817]) -Greedy action tensor([ 0.9515, -0.4116, 0.4271, 0.1610]) tensor([0.4345, 0.1112, 0.2572, 0.1971]) -Greedy action tensor([-0.3008, 0.4340, 0.3097, 0.6232]) tensor([0.1343, 0.2800, 0.2473, 0.3383]) -Greedy action tensor([ 1.9306, -0.8367, -0.3945, 0.2017]) tensor([0.7473, 0.0470, 0.0731, 0.1326]) -Greedy action tensor([ 0.4337, -0.1057, -0.2542, -0.6616]) tensor([0.4132, 0.2409, 0.2077, 0.1382]) -Greedy action tensor([ 0.6258, -0.1161, -0.9723, 0.7101]) tensor([0.3615, 0.1721, 0.0731, 0.3933]) -Greedy action tensor([ 0.6464, -0.7043, 0.6698, 2.3012]) tensor([0.1331, 0.0345, 0.1362, 0.6962]) -Greedy action tensor([ 1.0192, -0.0143, 1.4304, 0.2975]) tensor([0.2985, 0.1062, 0.4503, 0.1450]) -Greedy action tensor([ 0.4263, 0.3018, -0.6031, 1.6105]) tensor([0.1815, 0.1603, 0.0649, 0.5933]) -Greedy action tensor([ 0.2489, -0.6412, 0.5163, 0.9037]) tensor([0.2154, 0.0885, 0.2815, 0.4147]) -Greedy action tensor([-0.0492, 0.1604, -0.0532, 0.3223]) tensor([0.2137, 0.2635, 0.2129, 0.3099]) -Greedy action tensor([ 1.2922, 0.2920, -0.3894, 0.6641]) tensor([0.4790, 0.1762, 0.0891, 0.2556]) -Greedy action tensor([ 0.2935, -0.1284, -0.6344, 1.0779]) tensor([0.2357, 0.1546, 0.0932, 0.5165]) -Greedy action tensor([ 1.0335, -0.6952, 0.4498, 0.2722]) tensor([0.4540, 0.0806, 0.2533, 0.2121]) -Greedy action tensor([-0.7032, 0.2358, 0.3409, -0.0169]) tensor([0.1193, 0.3050, 0.3388, 0.2369]) -Greedy action tensor([ 0.2744, -1.6865, 0.6430, 0.2727]) tensor([0.2790, 0.0393, 0.4033, 0.2785]) -Greedy action tensor([ 0.4525, -0.0300, 0.0037, -0.1238]) tensor([0.3549, 0.2191, 0.2266, 0.1994]) -Greedy action tensor([ 0.8899, -0.4507, 0.1023, -0.4771]) tensor([0.5072, 0.1327, 0.2307, 0.1293]) -Greedy action tensor([ 0.7381, -0.4532, -0.1022, -0.3356]) tensor([0.4814, 0.1463, 0.2078, 0.1645]) -Greedy action tensor([ 1.0389, -0.6429, -0.1655, -0.4580]) tensor([0.5849, 0.1088, 0.1754, 0.1309]) -Greedy action tensor([ 0.2699, -0.0268, -0.1130, -0.5089]) tensor([0.3467, 0.2577, 0.2364, 0.1591]) -Greedy action tensor([ 0.4036, 0.1089, -0.0410, -0.2628]) tensor([0.3449, 0.2569, 0.2211, 0.1771]) -Greedy action tensor([ 0.8608, -0.1261, -0.0064, -0.6163]) tensor([0.4948, 0.1844, 0.2079, 0.1130]) -Greedy action tensor([ 0.4759, -0.2073, 0.1066, 0.0076]) tensor([0.3543, 0.1789, 0.2449, 0.2218]) -Greedy action tensor([ 4.9613e-01, -2.2193e-01, -4.2045e-04, -2.3159e-01]) tensor([0.3877, 0.1891, 0.2360, 0.1873]) -Greedy action tensor([ 0.7243, -0.3544, 0.0136, -0.5796]) tensor([0.4756, 0.1617, 0.2336, 0.1291]) -Greedy action tensor([ 0.5519, -0.1869, -0.0214, -0.3294]) tensor([0.4072, 0.1945, 0.2295, 0.1687]) -Greedy action tensor([ 0.6859, -0.3859, 0.0105, -0.2770]) tensor([0.4478, 0.1533, 0.2279, 0.1710]) -Greedy action tensor([ 0.7331, -0.6841, -0.1197, -0.5837]) tensor([0.5164, 0.1252, 0.2201, 0.1384]) -Greedy action tensor([ 0.6740, -0.2062, 0.2207, -0.4239]) tensor([0.4195, 0.1740, 0.2666, 0.1399]) -Greedy action tensor([ 0.3049, -0.0004, 0.0626, -0.3719]) tensor([0.3300, 0.2432, 0.2590, 0.1677]) -Greedy action tensor([ 0.6024, -0.3743, 0.0372, -0.3846]) tensor([0.4315, 0.1625, 0.2452, 0.1608]) -Greedy action tensor([ 0.7637, -0.5410, -0.1022, -0.6124]) tensor([0.5143, 0.1395, 0.2163, 0.1299]) -Greedy action tensor([ 0.7200, -0.3403, -0.0607, -0.2615]) tensor([0.4589, 0.1589, 0.2102, 0.1720]) -Greedy action tensor([ 0.4458, -0.2522, -0.0352, -0.0924]) tensor([0.3704, 0.1843, 0.2290, 0.2162]) -Greedy action tensor([ 0.4710, 0.0220, 0.0313, -0.2763]) tensor([0.3628, 0.2316, 0.2337, 0.1719]) -Greedy action tensor([ 0.3282, -0.3840, 0.1757, -0.4724]) tensor([0.3574, 0.1753, 0.3068, 0.1605]) -Greedy action tensor([ 0.5656, -0.3212, 0.0288, -0.4784]) tensor([0.4258, 0.1754, 0.2489, 0.1499]) -Greedy action tensor([ 0.6473, -0.3679, -0.0518, -0.4963]) tensor([0.4591, 0.1663, 0.2282, 0.1463]) -Greedy action tensor([ 0.7078, -0.5567, -0.2646, -0.5294]) tensor([0.5126, 0.1447, 0.1939, 0.1488]) -Greedy action tensor([ 0.8747, -0.9545, -0.0517, -0.3451]) tensor([0.5400, 0.0867, 0.2138, 0.1595]) -Greedy action tensor([ 0.4398, -0.0722, -0.1193, -0.3102]) tensor([0.3783, 0.2267, 0.2163, 0.1787]) -Greedy action tensor([ 0.6054, -0.2566, -0.0280, -0.3440]) tensor([0.4273, 0.1805, 0.2268, 0.1654]) -Greedy action tensor([ 0.3993, 0.1053, 0.0253, -0.1833]) tensor([0.3343, 0.2491, 0.2300, 0.1867]) -Greedy action tensor([ 0.8156, -0.1922, 0.0406, -0.2202]) tensor([0.4586, 0.1674, 0.2113, 0.1628]) -Greedy action tensor([ 0.2515, 0.0941, 0.0325, -0.1063]) tensor([0.2979, 0.2545, 0.2393, 0.2083]) -Greedy action tensor([ 0.4067, 0.0248, -0.1026, -0.2439]) tensor([0.3565, 0.2433, 0.2142, 0.1860]) -Greedy action tensor([ 0.6887, -0.3350, 0.0188, -0.4148]) tensor([0.4540, 0.1631, 0.2323, 0.1506]) -Greedy action tensor([ 0.6372, -0.6784, 0.0201, -0.1863]) tensor([0.4451, 0.1194, 0.2401, 0.1954]) -Greedy action tensor([ 0.4673, -0.2141, -0.0874, -0.2823]) tensor([0.3918, 0.1982, 0.2249, 0.1851]) -Greedy action tensor([ 0.7932, -0.4225, -0.0475, -0.4492]) tensor([0.4959, 0.1470, 0.2139, 0.1432]) -Greedy action tensor([ 0.5784, -0.4555, -0.1373, -0.1562]) tensor([0.4303, 0.1530, 0.2103, 0.2064]) -Greedy action tensor([ 0.6646, -0.4582, 0.0748, -0.2857]) tensor([0.4412, 0.1436, 0.2446, 0.1706]) -Greedy action tensor([ 0.4952, -0.2848, 0.2717, -0.5315]) tensor([0.3822, 0.1752, 0.3057, 0.1369]) -Greedy action tensor([ 0.2181, 0.1686, -0.0898, 0.0988]) tensor([0.2798, 0.2663, 0.2056, 0.2483]) -Greedy action tensor([ 0.5069, -0.2700, -0.0609, -0.1775]) tensor([0.3951, 0.1817, 0.2239, 0.1993]) -Greedy action tensor([ 0.3522, -0.1562, 0.0949, -0.3383]) tensor([0.3477, 0.2091, 0.2688, 0.1743]) -Greedy action tensor([ 0.7214, -0.5172, -0.0433, -0.4143]) tensor([0.4816, 0.1396, 0.2242, 0.1547]) -Greedy action tensor([ 0.5669, -0.2396, -0.1127, -0.1755]) tensor([0.4116, 0.1838, 0.2086, 0.1959]) -Greedy action tensor([ 0.4720, -0.0438, 0.0247, -0.2423]) tensor([0.3669, 0.2190, 0.2345, 0.1796]) -Greedy action tensor([ 0.3934, -0.0223, -0.1000, -0.2102]) tensor([0.3549, 0.2342, 0.2167, 0.1941]) -Greedy action tensor([ 0.6567, -0.2406, -0.0228, -0.4280]) tensor([0.4439, 0.1810, 0.2250, 0.1501]) -Greedy action tensor([ 1.3186, -1.0984, -0.1429, -0.7728]) tensor([0.6922, 0.0617, 0.1605, 0.0855]) -Greedy action tensor([ 1.0043, -0.3700, -0.0659, -0.4150]) tensor([0.5441, 0.1377, 0.1866, 0.1316]) -Greedy action tensor([ 0.7476, -0.3507, 0.0350, -0.2313]) tensor([0.4546, 0.1516, 0.2229, 0.1708]) -Greedy action tensor([ 0.5939, -0.2763, 0.0150, -0.2656]) tensor([0.4162, 0.1743, 0.2333, 0.1762]) -Greedy action tensor([ 0.6767, -0.5400, 0.0454, -0.2843]) tensor([0.4524, 0.1340, 0.2406, 0.1730]) -Greedy action tensor([ 1.3103, -0.7810, 0.0016, -0.9943]) tensor([0.6696, 0.0827, 0.1809, 0.0668]) -Greedy action tensor([ 0.5945, -0.2255, 0.0541, -0.0512]) tensor([0.3926, 0.1729, 0.2287, 0.2058]) -Greedy action tensor([ 0.1685, -0.0484, 0.0418, -0.0920]) tensor([0.2893, 0.2329, 0.2549, 0.2230]) -Greedy action tensor([ 0.5129, -0.2927, -0.2781, -0.4325]) tensor([0.4369, 0.1952, 0.1981, 0.1698]) -Greedy action tensor([ 0.5700, -0.0684, -0.0908, -0.2984]) tensor([0.4058, 0.2143, 0.2096, 0.1703]) -Greedy action tensor([ 0.8453, -0.1208, -0.0704, -0.3721]) tensor([0.4815, 0.1832, 0.1927, 0.1425]) -Greedy action tensor([ 0.7967, -0.2955, -0.0660, -0.2901]) tensor([0.4774, 0.1601, 0.2015, 0.1610]) -Greedy action tensor([ 0.7209, -0.3309, -0.0544, -0.1410]) tensor([0.4480, 0.1565, 0.2063, 0.1892]) -Greedy action tensor([ 0.2834, -0.0045, -0.1257, -0.1572]) tensor([0.3270, 0.2452, 0.2172, 0.2105]) -Greedy action tensor([ 0.5237, -0.2535, 0.1245, -0.2624]) tensor([0.3867, 0.1778, 0.2594, 0.1762]) -Greedy action tensor([ 0.4228, -0.0916, -0.0067, -0.1332]) tensor([0.3543, 0.2118, 0.2306, 0.2032]) -Greedy action tensor([ 0.7353, -0.5833, -0.2930, -0.6429]) tensor([0.5327, 0.1425, 0.1905, 0.1343]) -Greedy action tensor([ 0.7785, -0.4314, 0.2489, -0.6592]) tensor([0.4707, 0.1404, 0.2772, 0.1118]) -Greedy action tensor([ 0.4131, -0.1500, -0.0172, -0.2219]) tensor([0.3637, 0.2071, 0.2365, 0.1927]) -Greedy action tensor([ 0.3991, 0.2017, -0.0011, -0.2831]) tensor([0.3337, 0.2739, 0.2237, 0.1687]) -Greedy action tensor([ 0.3840, 0.1161, 0.0051, -0.3097]) tensor([0.3391, 0.2594, 0.2321, 0.1694]) -Greedy action tensor([ 0.5982, -0.4168, -0.1284, -0.4286]) tensor([0.4537, 0.1644, 0.2194, 0.1625]) -Greedy action tensor([ 0.6297, 0.0325, 0.0931, -0.3035]) tensor([0.3955, 0.2177, 0.2313, 0.1556]) -Greedy action tensor([ 0.3030, -0.3232, 0.1358, -0.2654]) tensor([0.3393, 0.1814, 0.2871, 0.1922]) -Greedy action tensor([ 0.6636, -0.1749, -0.1010, -0.3648]) tensor([0.4434, 0.1917, 0.2064, 0.1585]) -Greedy action tensor([ 0.8580, 0.0756, -0.0549, -0.4070]) tensor([0.4671, 0.2136, 0.1875, 0.1318]) -Greedy action tensor([ 0.6567, 0.0871, -0.1123, -0.2472]) tensor([0.4108, 0.2324, 0.1904, 0.1664]) -Greedy action tensor([ 0.4980, -0.3457, 0.0620, -0.3766]) tensor([0.4010, 0.1725, 0.2593, 0.1672]) -Greedy action tensor([ 0.6029, -0.2340, -0.0861, -0.3274]) tensor([0.4293, 0.1859, 0.2155, 0.1693]) -Greedy action tensor([ 0.5996, -0.2777, -0.0054, -0.1123]) tensor([0.4077, 0.1696, 0.2226, 0.2001]) -Greedy action tensor([ 0.4973, -0.3476, -0.0541, -0.4282]) tensor([0.4163, 0.1788, 0.2399, 0.1650]) -Greedy action tensor([ 0.5625, -0.4261, -0.0691, -0.3982]) tensor([0.4374, 0.1627, 0.2326, 0.1673]) -Greedy action tensor([ 0.3693, -0.1062, -0.0669, -0.2054]) tensor([0.3532, 0.2196, 0.2284, 0.1988]) -Greedy action tensor([ 0.8586, -0.6866, -0.1240, -0.3471]) tensor([0.5299, 0.1130, 0.1984, 0.1587]) -Greedy action tensor([ 0.2884, -0.0024, -0.0711, -0.2720]) tensor([0.3315, 0.2478, 0.2314, 0.1893]) -Greedy action tensor([ 0.8430, -0.1133, -0.5144, -0.2914]) tensor([0.5094, 0.1957, 0.1311, 0.1638]) -Greedy action tensor([ 1.0873, -0.5188, -0.2007, 0.1184]) tensor([0.5388, 0.1081, 0.1486, 0.2045]) -Greedy action tensor([ 1.2015, -0.6534, -0.0070, -0.0060]) tensor([0.5701, 0.0892, 0.1703, 0.1704]) -Greedy action tensor([ 1.6442, -0.3287, -0.3171, 0.3153]) tensor([0.6475, 0.0900, 0.0911, 0.1714]) -Greedy action tensor([ 1.5927, -0.5322, -0.3525, 0.3324]) tensor([0.6468, 0.0773, 0.0925, 0.1834]) -Greedy action tensor([ 1.4215, -0.2302, -0.2419, 0.1982]) tensor([0.5969, 0.1144, 0.1131, 0.1756]) -Greedy action tensor([ 1.6885, -1.0098, -0.3533, 0.2676]) tensor([0.6951, 0.0468, 0.0902, 0.1679]) -Greedy action tensor([ 2.2447, -1.0306, -0.3991, 0.8828]) tensor([0.7326, 0.0277, 0.0521, 0.1877]) -Greedy action tensor([ 1.7810, -0.6000, -0.4288, 0.0612]) tensor([0.7240, 0.0669, 0.0794, 0.1297]) -Greedy action tensor([ 1.0311, -0.2489, -0.1982, 0.0670]) tensor([0.5123, 0.1424, 0.1499, 0.1954]) -Greedy action tensor([ 1.3969, -0.6258, -0.0618, 0.3388]) tensor([0.5841, 0.0773, 0.1358, 0.2028]) -Greedy action tensor([ 1.6128, 0.1184, -0.1973, -0.3008]) tensor([0.6512, 0.1461, 0.1066, 0.0961]) -Greedy action tensor([ 0.8898, 0.1320, -0.1912, 0.0833]) tensor([0.4436, 0.2079, 0.1505, 0.1980]) -Greedy action tensor([ 1.1508, -0.5320, -0.1609, 0.2691]) tensor([0.5350, 0.0994, 0.1441, 0.2215]) -Greedy action tensor([ 1.0635, -0.5897, -0.1507, 0.3187]) tensor([0.5094, 0.0975, 0.1513, 0.2419]) -Greedy action tensor([ 1.5299, -0.3481, -0.3628, 0.2306]) tensor([0.6344, 0.0970, 0.0956, 0.1730]) -Greedy action tensor([ 0.9973, -0.5273, -0.0205, -0.0051]) tensor([0.5138, 0.1119, 0.1857, 0.1886]) -Greedy action tensor([ 1.6917, -0.6612, -0.4013, 0.3583]) tensor([0.6748, 0.0642, 0.0832, 0.1778]) -Greedy action tensor([ 1.1313, -0.1709, 0.1164, -0.1380]) tensor([0.5221, 0.1420, 0.1892, 0.1467]) -Greedy action tensor([ 1.2617, -0.4428, -0.1995, -0.0434]) tensor([0.5935, 0.1079, 0.1377, 0.1609]) -Greedy action tensor([ 0.2767, -0.2700, 0.1315, -0.0705]) tensor([0.3174, 0.1837, 0.2745, 0.2243]) -Greedy action tensor([ 0.6102, -0.3654, 0.1796, -0.1621]) tensor([0.4018, 0.1515, 0.2612, 0.1856]) -Greedy action tensor([ 1.3063, 0.0586, -0.1472, -0.0457]) tensor([0.5619, 0.1614, 0.1313, 0.1454]) -Greedy action tensor([ 1.0429, -0.4177, -0.5753, 0.5006]) tensor([0.4971, 0.1154, 0.0986, 0.2890]) -Greedy action tensor([ 1.2489, -0.5339, -0.5531, 0.5609]) tensor([0.5448, 0.0916, 0.0899, 0.2738]) -Greedy action tensor([ 1.5156, -0.5493, -0.4291, 0.8765]) tensor([0.5563, 0.0706, 0.0796, 0.2936]) -Greedy action tensor([ 0.8303, -0.3423, -0.1576, -0.1087]) tensor([0.4824, 0.1493, 0.1796, 0.1886]) -Greedy action tensor([ 0.8807, -0.2163, -0.1398, -0.4408]) tensor([0.5099, 0.1703, 0.1838, 0.1360]) -Greedy action tensor([ 1.6204, -0.3686, -0.2897, 0.1078]) tensor([0.6643, 0.0909, 0.0984, 0.1464]) -Greedy action tensor([ 1.3185, -0.5805, -0.1164, 0.1513]) tensor([0.5886, 0.0881, 0.1402, 0.1832]) -Greedy action tensor([ 1.1361, -0.5502, -0.2366, -0.0640]) tensor([0.5748, 0.1065, 0.1457, 0.1731]) -Greedy action tensor([ 1.0186, -0.7273, -0.1989, 0.2764]) tensor([0.5137, 0.0896, 0.1520, 0.2446]) -Greedy action tensor([ 1.2268, -0.1935, -0.1912, 0.0284]) tensor([0.5601, 0.1353, 0.1356, 0.1690]) -Greedy action tensor([ 1.3086, -0.4037, -0.2964, 0.1908]) tensor([0.5854, 0.1056, 0.1176, 0.1914]) -Greedy action tensor([ 1.0964, -0.3701, -0.2268, 0.2373]) tensor([0.5207, 0.1201, 0.1386, 0.2205]) -Greedy action tensor([ 1.4143, -0.6927, -0.4761, 0.0493]) tensor([0.6545, 0.0796, 0.0988, 0.1671]) -Greedy action tensor([ 0.9757, -0.3598, -0.2933, 0.0605]) tensor([0.5143, 0.1353, 0.1446, 0.2059]) -Greedy action tensor([ 1.2174, -0.5646, -0.2659, 0.3415]) tensor([0.5520, 0.0929, 0.1252, 0.2299]) -Greedy action tensor([ 0.9431, 0.2225, 0.0868, -0.1536]) tensor([0.4454, 0.2167, 0.1892, 0.1487]) -Greedy action tensor([ 1.5153, -0.9471, -0.2535, 0.8550]) tensor([0.5642, 0.0481, 0.0962, 0.2915]) -Greedy action tensor([ 1.4044, -0.5748, -0.1867, 0.0977]) tensor([0.6201, 0.0857, 0.1263, 0.1679]) -Greedy action tensor([ 1.6654, -0.6473, -0.1803, 0.1482]) tensor([0.6774, 0.0671, 0.1070, 0.1486]) -Greedy action tensor([ 1.0519, 0.1309, 0.0441, -0.4152]) tensor([0.5016, 0.1997, 0.1831, 0.1157]) -Greedy action tensor([ 1.2064, -0.0343, -0.4759, -0.1215]) tensor([0.5747, 0.1662, 0.1069, 0.1523]) -Greedy action tensor([ 1.8506, -0.6628, -0.0937, 0.5518]) tensor([0.6680, 0.0541, 0.0956, 0.1823]) -Greedy action tensor([ 2.2812, -0.9090, -0.2087, 0.6459]) tensor([0.7582, 0.0312, 0.0629, 0.1478]) -Greedy action tensor([ 1.2796, -0.9121, -0.5481, 0.5895]) tensor([0.5637, 0.0630, 0.0906, 0.2827]) -Greedy action tensor([ 1.2707, -0.7224, -0.4249, 0.4304]) tensor([0.5710, 0.0778, 0.1048, 0.2464]) -Greedy action tensor([ 1.8159e+00, -6.8748e-01, -3.5260e-01, -8.7127e-04]) tensor([0.7360, 0.0602, 0.0842, 0.1196]) -Greedy action tensor([ 1.0103, -0.5311, 0.0485, -0.1177]) tensor([0.5208, 0.1115, 0.1991, 0.1686]) -Greedy action tensor([ 1.6904, -0.5084, -0.2341, 0.4903]) tensor([0.6418, 0.0712, 0.0937, 0.1933]) -Greedy action tensor([ 1.3830, -0.0839, -0.1875, 0.1671]) tensor([0.5764, 0.1329, 0.1198, 0.1709]) -Greedy action tensor([ 0.8061, -0.3912, -0.2460, 0.3296]) tensor([0.4401, 0.1329, 0.1537, 0.2733]) -Greedy action tensor([ 0.8784, -0.3859, -0.1382, -0.1539]) tensor([0.4999, 0.1412, 0.1809, 0.1781]) -Greedy action tensor([ 0.6817, -0.1915, 0.0598, 0.0144]) tensor([0.4052, 0.1692, 0.2176, 0.2079]) -Greedy action tensor([ 1.2199, -0.2245, -0.2571, 0.0867]) tensor([0.5598, 0.1321, 0.1278, 0.1803]) -Greedy action tensor([ 1.0203, -0.4129, -0.1912, 0.2894]) tensor([0.4956, 0.1182, 0.1476, 0.2386]) -Greedy action tensor([ 1.0831, -0.2636, -0.2902, 0.0462]) tensor([0.5354, 0.1392, 0.1356, 0.1898]) -Greedy action tensor([ 1.1128, 0.2305, -0.2058, -0.2681]) tensor([0.5174, 0.2141, 0.1384, 0.1300]) -Greedy action tensor([ 0.9557, -0.5099, -0.3384, 0.4644]) tensor([0.4724, 0.1091, 0.1295, 0.2890]) -Greedy action tensor([ 1.2001, -0.4287, -0.1386, 0.0466]) tensor([0.5637, 0.1106, 0.1478, 0.1779]) -Greedy action tensor([ 0.4795, -0.0064, -0.2527, -0.2739]) tensor([0.3896, 0.2397, 0.1873, 0.1834]) -Greedy action tensor([ 0.5704, -0.0932, 0.1478, 0.0006]) tensor([0.3655, 0.1882, 0.2395, 0.2067]) -Greedy action tensor([ 1.7639, -0.9586, -0.5647, 0.2803]) tensor([0.7194, 0.0473, 0.0701, 0.1632]) -Greedy action tensor([ 0.9265, -0.6121, -0.2260, 0.5293]) tensor([0.4540, 0.0975, 0.1434, 0.3052]) -Greedy action tensor([ 1.5484, -0.2349, -0.2377, 0.3873]) tensor([0.6065, 0.1019, 0.1017, 0.1899]) -Greedy action tensor([ 0.8668, -0.2084, -0.0433, -0.0222]) tensor([0.4641, 0.1584, 0.1868, 0.1908]) -Greedy action tensor([ 1.1930, -0.5770, -0.2870, -0.0630]) tensor([0.5943, 0.1012, 0.1353, 0.1692]) -Greedy action tensor([ 0.9828, -0.4468, 0.0734, -0.4346]) tensor([0.5306, 0.1270, 0.2137, 0.1286]) -Greedy action tensor([ 1.0319, -0.1964, -0.1650, 0.1381]) tensor([0.4990, 0.1461, 0.1508, 0.2041]) -Greedy action tensor([ 1.0228, -0.4377, 0.0104, 0.0926]) tensor([0.5025, 0.1167, 0.1826, 0.1982]) -Greedy action tensor([ 1.2188, -0.3252, -0.2448, 0.4720]) tensor([0.5212, 0.1113, 0.1206, 0.2470]) -Greedy action tensor([ 1.0584, -0.2580, -0.0524, 0.2607]) tensor([0.4883, 0.1309, 0.1608, 0.2199]) -Greedy action tensor([ 0.7298, -0.2896, 0.2422, -0.1127]) tensor([0.4157, 0.1500, 0.2553, 0.1790]) -Greedy action tensor([ 1.2541, -0.6191, -0.3807, 0.4384]) tensor([0.5584, 0.0858, 0.1089, 0.2470]) -Greedy action tensor([ 1.1053, -0.2639, -0.0607, 0.0673]) tensor([0.5208, 0.1325, 0.1623, 0.1845]) -Greedy action tensor([ 1.1667, -0.1863, -0.1383, 0.1498]) tensor([0.5287, 0.1367, 0.1434, 0.1913]) -Greedy action tensor([ 0.9998, -0.4414, -0.3282, 0.2676]) tensor([0.5044, 0.1194, 0.1337, 0.2426]) -Greedy action tensor([ 1.2081, -0.7891, -0.2005, 0.6470]) tensor([0.5126, 0.0696, 0.1253, 0.2925]) -Greedy action tensor([ 0.9455, -0.4274, -0.4529, 0.2156]) tensor([0.5045, 0.1278, 0.1246, 0.2431]) -Greedy action tensor([ 0.6161, -0.4478, -0.4594, 0.4752]) tensor([0.3914, 0.1351, 0.1335, 0.3400]) -Greedy action tensor([-1.1702, -0.4118, 0.8649, 1.0414]) tensor([0.0502, 0.1072, 0.3842, 0.4584]) -Greedy action tensor([-1.5657, -0.4860, 0.6836, 0.4914]) tensor([0.0471, 0.1385, 0.4462, 0.3682]) -Greedy action tensor([-1.0042, -0.5713, 0.3194, 0.1505]) tensor([0.1056, 0.1628, 0.3967, 0.3350]) -Greedy action tensor([-1.6912, -0.5126, 0.1636, -0.2724]) tensor([0.0677, 0.2200, 0.4326, 0.2797]) -Greedy action tensor([-1.6219, -0.3998, 0.5011, -0.0236]) tensor([0.0565, 0.1918, 0.4722, 0.2794]) -Greedy action tensor([-0.2005, 0.0204, 0.9927, 1.4695]) tensor([0.0921, 0.1149, 0.3037, 0.4893]) -Greedy action tensor([-1.7382, -0.5040, 0.5646, -0.0282]) tensor([0.0501, 0.1721, 0.5010, 0.2769]) -Greedy action tensor([-1.9629, -0.4968, 1.1577, 0.4996]) tensor([0.0252, 0.1091, 0.5704, 0.2954]) -Greedy action tensor([-1.4923, -0.5148, 0.4153, 0.1879]) tensor([0.0634, 0.1686, 0.4274, 0.3405]) -Greedy action tensor([-1.7796, -0.5349, 1.0229, 0.5716]) tensor([0.0318, 0.1104, 0.5241, 0.3337]) -Greedy action tensor([-1.8352, -0.4764, 0.6244, -0.1091]) tensor([0.0450, 0.1752, 0.5268, 0.2530]) -Greedy action tensor([-1.8659, -0.4756, 0.6242, -0.1371]) tensor([0.0440, 0.1768, 0.5311, 0.2480]) -Greedy action tensor([-1.7489e+00, -9.8597e-01, -1.2824e-03, -7.8432e-01]) tensor([0.0869, 0.1863, 0.4988, 0.2280]) -Greedy action tensor([-0.6737, 0.1129, 0.2175, -0.3216]) tensor([0.1417, 0.3112, 0.3455, 0.2015]) -Greedy action tensor([-1.8258, -0.6671, 0.1336, -0.3006]) tensor([0.0630, 0.2007, 0.4469, 0.2895]) -Greedy action tensor([-1.0785, -0.1126, 0.9979, 1.1799]) tensor([0.0472, 0.1241, 0.3767, 0.4520]) -Greedy action tensor([-1.8068, -0.3358, 0.5233, -0.0988]) tensor([0.0473, 0.2058, 0.4860, 0.2609]) -Greedy action tensor([-1.9125, -0.4527, 0.6512, -0.1614]) tensor([0.0416, 0.1790, 0.5399, 0.2395]) -Greedy action tensor([-1.7389, -0.4334, 0.4269, -0.4108]) tensor([0.0582, 0.2147, 0.5075, 0.2196]) -Greedy action tensor([-1.6763, -0.5049, 0.6568, 0.1731]) tensor([0.0479, 0.1544, 0.4935, 0.3042]) -Greedy action tensor([-0.5983, -0.1508, 0.8004, 1.5559]) tensor([0.0656, 0.1027, 0.2658, 0.5659]) -Greedy action tensor([-1.7377, -1.0433, -0.0528, -0.7369]) tensor([0.0900, 0.1802, 0.4851, 0.2448]) -Greedy action tensor([-1.3958, -0.4713, 0.4587, 0.3427]) tensor([0.0641, 0.1616, 0.4096, 0.3647]) -Greedy action tensor([-1.1073, -0.4877, 0.5662, 1.0522]) tensor([0.0593, 0.1102, 0.3163, 0.5142]) -Greedy action tensor([-1.5148, -0.0280, 0.5741, 0.5396]) tensor([0.0469, 0.2076, 0.3792, 0.3663]) -Greedy action tensor([-1.4383, -0.5679, 0.4357, 0.2666]) tensor([0.0649, 0.1550, 0.4229, 0.3571]) -Greedy action tensor([-1.3751, -0.2731, 1.2174, 1.1326]) tensor([0.0337, 0.1015, 0.4507, 0.4141]) -Greedy action tensor([-1.8585, -0.4715, 0.6377, -0.1216]) tensor([0.0438, 0.1754, 0.5319, 0.2489]) -Greedy action tensor([-1.2608, -0.1075, 0.7845, 0.9849]) tensor([0.0468, 0.1484, 0.3622, 0.4425]) -Greedy action tensor([-1.7722, -0.4956, 0.6547, 0.0544]) tensor([0.0452, 0.1620, 0.5119, 0.2809]) -Greedy action tensor([-0.9429, -0.5919, 0.7207, 1.2312]) tensor([0.0606, 0.0861, 0.3200, 0.5332]) -Greedy action tensor([-1.8448, -0.4617, 0.6821, 0.0123]) tensor([0.0418, 0.1668, 0.5235, 0.2679]) -Greedy action tensor([-0.5150, -0.1465, 0.9538, 1.5840]) tensor([0.0669, 0.0967, 0.2906, 0.5458]) -Greedy action tensor([-0.6895, -0.5751, 0.1711, 0.3036]) tensor([0.1392, 0.1560, 0.3291, 0.3757]) -Greedy action tensor([-1.9431, -0.5133, 0.6518, -0.1611]) tensor([0.0408, 0.1704, 0.5464, 0.2424]) -Greedy action tensor([-1.1427, -0.5836, 0.2356, 0.2993]) tensor([0.0914, 0.1598, 0.3625, 0.3863]) -Greedy action tensor([-2.0461, -0.8049, 0.8938, 0.1711]) tensor([0.0307, 0.1063, 0.5810, 0.2820]) -Greedy action tensor([-0.1737, -0.3814, 0.1451, 0.1040]) tensor([0.2218, 0.1802, 0.3051, 0.2928]) -Greedy action tensor([-1.6751, -0.4295, 0.6838, 0.3234]) tensor([0.0446, 0.1549, 0.4716, 0.3289]) -Greedy action tensor([-1.7396, -0.4833, 0.5769, -0.0236]) tensor([0.0495, 0.1738, 0.5016, 0.2751]) -Greedy action tensor([-1.3174, -0.5121, 0.3790, 0.3790]) tensor([0.0707, 0.1582, 0.3856, 0.3856]) -Greedy action tensor([-0.5006, -0.4033, 0.1599, 0.1248]) tensor([0.1693, 0.1866, 0.3277, 0.3164]) -Greedy action tensor([-1.7668, -0.6653, 0.5380, 0.2701]) tensor([0.0461, 0.1387, 0.4619, 0.3534]) -Greedy action tensor([-1.8621, -0.4519, 0.6337, -0.1068]) tensor([0.0435, 0.1780, 0.5272, 0.2514]) -Greedy action tensor([-1.6943, -0.5223, 0.9102, 0.7928]) tensor([0.0336, 0.1084, 0.4542, 0.4038]) -Greedy action tensor([-1.7026e+00, -4.9504e-01, 5.6538e-01, -1.3412e-03]) tensor([0.0513, 0.1717, 0.4957, 0.2813]) -Greedy action tensor([-1.7623, -0.5133, 0.6728, 0.0652]) tensor([0.0452, 0.1576, 0.5161, 0.2811]) -Greedy action tensor([-1.9021, -0.4458, 0.6446, -0.1586]) tensor([0.0421, 0.1805, 0.5370, 0.2405]) -Greedy action tensor([-0.9449, -0.5061, 0.9764, 1.4203]) tensor([0.0499, 0.0774, 0.3410, 0.5316]) -Greedy action tensor([-1.1234, -0.4825, 0.3836, 0.6221]) tensor([0.0761, 0.1445, 0.3435, 0.4360]) -Greedy action tensor([-0.4040, -0.4760, 0.1769, 0.2454]) tensor([0.1775, 0.1652, 0.3174, 0.3399]) -Greedy action tensor([-1.5150, -0.5264, 0.4442, 0.1576]) tensor([0.0621, 0.1669, 0.4404, 0.3307]) -Greedy action tensor([-1.8116, -0.2824, 0.5831, -0.0829]) tensor([0.0450, 0.2077, 0.4936, 0.2536]) -Greedy action tensor([-1.7552, -0.3791, 0.7214, 0.2800]) tensor([0.0408, 0.1615, 0.4855, 0.3122]) -Greedy action tensor([-1.7038, -0.4065, 0.7131, 0.2382]) tensor([0.0438, 0.1602, 0.4908, 0.3052]) -Greedy action tensor([-0.7976, -0.5285, 0.3402, 0.4785]) tensor([0.1110, 0.1452, 0.3462, 0.3976]) -Greedy action tensor([-1.3741, -0.2083, 0.5471, 0.5507]) tensor([0.0559, 0.1793, 0.3817, 0.3831]) -Greedy action tensor([-1.1304, -0.5724, 0.2733, 0.2529]) tensor([0.0925, 0.1617, 0.3767, 0.3691]) -Greedy action tensor([-1.8218, -0.3359, 0.5855, -0.1231]) tensor([0.0455, 0.2010, 0.5050, 0.2486]) -Greedy action tensor([-1.3122, -0.4379, 0.4635, 0.5341]) tensor([0.0639, 0.1533, 0.3776, 0.4052]) -Greedy action tensor([-1.6398, -0.5384, 0.5141, 0.0077]) tensor([0.0561, 0.1688, 0.4836, 0.2915]) -Greedy action tensor([-1.0720, -0.3266, 1.1545, 1.3434]) tensor([0.0424, 0.0894, 0.3932, 0.4750]) -Greedy action tensor([-0.8829, -0.7376, 0.1729, -0.1382]) tensor([0.1401, 0.1620, 0.4028, 0.2951]) -Greedy action tensor([-1.4560, -0.0215, -0.2347, -0.3630]) tensor([0.0864, 0.3627, 0.2931, 0.2578]) -Greedy action tensor([-0.6917, -0.5874, -0.4624, -0.2932]) tensor([0.2059, 0.2285, 0.2589, 0.3067]) -Greedy action tensor([-1.2696, -0.6324, 0.4013, 0.3054]) tensor([0.0767, 0.1450, 0.4078, 0.3705]) -Greedy action tensor([-1.9058, -0.1563, 0.5998, -0.1573]) tensor([0.0404, 0.2324, 0.4950, 0.2322]) -Greedy action tensor([-2.0111, -0.9885, 0.6467, -0.0961]) tensor([0.0403, 0.1120, 0.5744, 0.2733]) -Greedy action tensor([-0.9800, 0.9831, 0.2005, 0.1800]) tensor([0.0687, 0.4889, 0.2235, 0.2190]) -Greedy action tensor([-1.1675, -0.6350, 0.2750, 0.2154]) tensor([0.0916, 0.1559, 0.3875, 0.3650]) -Greedy action tensor([-1.7046, -0.5017, 0.5306, -0.0383]) tensor([0.0527, 0.1755, 0.4928, 0.2790]) -Greedy action tensor([-0.9868, -0.5787, 0.4224, -0.0832]) tensor([0.1103, 0.1659, 0.4515, 0.2723]) -Greedy action tensor([-0.5489, -0.5362, 0.1685, 0.2770]) tensor([0.1576, 0.1596, 0.3229, 0.3599]) -Greedy action tensor([-1.7935, -0.5659, 0.8359, 0.1095]) tensor([0.0400, 0.1366, 0.5550, 0.2684]) -Greedy action tensor([-1.8178, -0.3714, 0.5890, -0.1065]) tensor([0.0457, 0.1941, 0.5072, 0.2530]) -Greedy action tensor([-1.5494, -0.4902, 1.0437, 0.9440]) tensor([0.0341, 0.0982, 0.4555, 0.4122]) -Greedy action tensor([-1.2570, 0.0658, 0.1371, 0.3246]) tensor([0.0733, 0.2751, 0.2954, 0.3563]) -Greedy action tensor([-1.7279, -0.6787, 0.4901, -0.0536]) tensor([0.0544, 0.1554, 0.5000, 0.2903]) -Greedy action tensor([-1.4446, -0.5440, 0.5153, -0.1672]) tensor([0.0707, 0.1740, 0.5018, 0.2536]) -Greedy action tensor([-1.1095, -0.3541, 0.8174, 1.2685]) tensor([0.0481, 0.1024, 0.3305, 0.5189]) -Greedy action tensor([-0.1054, 0.0357, 0.9961, 1.4466]) tensor([0.1012, 0.1165, 0.3045, 0.4778]) -Greedy action tensor([ 0.3595, 0.1708, 0.0851, -0.1248]) tensor([0.3121, 0.2584, 0.2372, 0.1923]) -Greedy action tensor([ 0.3410, 0.1171, 0.0947, -0.3025]) tensor([0.3219, 0.2573, 0.2516, 0.1692]) -Greedy action tensor([ 1.0529, -0.9953, 0.0130, -0.4375]) tensor([0.5856, 0.0755, 0.2070, 0.1319]) -Greedy action tensor([ 0.5002, 0.1817, -0.0370, -0.4261]) tensor([0.3693, 0.2686, 0.2158, 0.1463]) -Greedy action tensor([ 0.5092, -0.2048, -0.0980, -0.1997]) tensor([0.3958, 0.1938, 0.2156, 0.1948]) -Greedy action tensor([ 0.5540, -0.4555, -0.1265, -0.5074]) tensor([0.4511, 0.1644, 0.2284, 0.1561]) -Greedy action tensor([ 0.6146, -0.1227, 0.2556, -0.5555]) tensor([0.4021, 0.1924, 0.2808, 0.1248]) -Greedy action tensor([ 0.8656, -0.6016, -0.0759, -0.4433]) tensor([0.5289, 0.1220, 0.2063, 0.1429]) -Greedy action tensor([ 0.2497, 0.1317, -0.0964, -0.1747]) tensor([0.3077, 0.2734, 0.2177, 0.2013]) -Greedy action tensor([ 0.7316, -0.6011, -0.0877, -0.5788]) tensor([0.5065, 0.1336, 0.2232, 0.1366]) -Greedy action tensor([ 1.0856, -0.6563, -0.0585, -0.6182]) tensor([0.5968, 0.1045, 0.1901, 0.1086]) -Greedy action tensor([ 0.2712, -0.0014, -0.0848, -0.0623]) tensor([0.3146, 0.2396, 0.2204, 0.2254]) -Greedy action tensor([ 0.4646, -0.2013, 0.2687, -0.3969]) tensor([0.3625, 0.1863, 0.2980, 0.1532]) -Greedy action tensor([ 0.3024, 0.1185, -0.0606, -0.1152]) tensor([0.3139, 0.2611, 0.2183, 0.2067]) -Greedy action tensor([ 0.4885, -0.1564, -0.0363, -0.4039]) tensor([0.3959, 0.2077, 0.2342, 0.1622]) -Greedy action tensor([ 0.4242, -0.1219, -0.2288, -0.6451]) tensor([0.4094, 0.2371, 0.2130, 0.1405]) -Greedy action tensor([ 0.6129, -0.1414, -0.0329, -0.1915]) tensor([0.4095, 0.1926, 0.2147, 0.1832]) -Greedy action tensor([ 0.6106, -0.4528, -0.0354, -0.3183]) tensor([0.4416, 0.1525, 0.2315, 0.1744]) -Greedy action tensor([ 0.3986, -0.2560, -0.0346, -0.5429]) tensor([0.3909, 0.2031, 0.2535, 0.1525]) -Greedy action tensor([ 0.4780, -0.1608, 0.0171, -0.3718]) tensor([0.3867, 0.2041, 0.2439, 0.1653]) -Greedy action tensor([ 0.3095, -0.1029, -0.1876, -0.1276]) tensor([0.3429, 0.2270, 0.2086, 0.2215]) -Greedy action tensor([ 0.8297, -0.6854, 0.0750, -0.5586]) tensor([0.5156, 0.1133, 0.2424, 0.1286]) -Greedy action tensor([ 0.4960, 0.0921, -0.0856, -0.1679]) tensor([0.3648, 0.2436, 0.2039, 0.1878]) -Greedy action tensor([ 0.4619, -0.3892, 0.1775, -0.4086]) tensor([0.3849, 0.1643, 0.2896, 0.1612]) -Greedy action tensor([ 0.3519, 0.2725, 0.0019, -0.3397]) tensor([0.3196, 0.2952, 0.2252, 0.1600]) -Greedy action tensor([ 0.6369, -0.1181, -0.1071, -0.2670]) tensor([0.4255, 0.2000, 0.2022, 0.1723]) -Greedy action tensor([ 0.3514, 0.0029, 0.0071, -0.1611]) tensor([0.3318, 0.2342, 0.2352, 0.1988]) -Greedy action tensor([ 0.6478, -0.5184, -0.0312, -0.4322]) tensor([0.4633, 0.1443, 0.2350, 0.1573]) -Greedy action tensor([ 0.7833, -0.6158, -0.0705, -0.5163]) tensor([0.5141, 0.1269, 0.2189, 0.1402]) -Greedy action tensor([ 0.5137, -0.1563, -0.0959, -0.2468]) tensor([0.3964, 0.2028, 0.2155, 0.1853]) -Greedy action tensor([ 0.5308, -0.3443, -0.0264, -0.3322]) tensor([0.4147, 0.1729, 0.2375, 0.1750]) -Greedy action tensor([ 0.7582, -0.2874, -0.0843, -0.0852]) tensor([0.4520, 0.1589, 0.1947, 0.1945]) -Greedy action tensor([ 0.2619, -0.1513, -0.0292, -0.2191]) tensor([0.3303, 0.2185, 0.2469, 0.2042]) -Greedy action tensor([ 1.2751, -1.4462, -0.0396, -0.4844]) tensor([0.6638, 0.0437, 0.1783, 0.1143]) -Greedy action tensor([ 0.6294, 0.0537, -0.0896, -0.1565]) tensor([0.3992, 0.2245, 0.1945, 0.1819]) -Greedy action tensor([ 0.7496, -0.6381, -0.1667, -0.3968]) tensor([0.5083, 0.1269, 0.2033, 0.1615]) -Greedy action tensor([ 0.4911, -0.2996, 0.0802, -0.4773]) tensor([0.4006, 0.1817, 0.2656, 0.1521]) -Greedy action tensor([ 0.3588, -0.0694, -0.0691, -0.1946]) tensor([0.3474, 0.2264, 0.2265, 0.1997]) -Greedy action tensor([ 0.6112, -0.1989, -0.0025, -0.2689]) tensor([0.4165, 0.1853, 0.2255, 0.1727]) -Greedy action tensor([ 0.2979, -0.1714, 0.0356, -0.2405]) tensor([0.3358, 0.2100, 0.2583, 0.1960]) -Greedy action tensor([ 0.3947, -0.1915, -0.0428, -0.2657]) tensor([0.3678, 0.2047, 0.2375, 0.1900]) -Greedy action tensor([ 0.8349, -0.5959, -0.1216, -0.3571]) tensor([0.5189, 0.1241, 0.1994, 0.1576]) -Greedy action tensor([ 0.6440, 0.0837, -0.1611, -0.2728]) tensor([0.4136, 0.2362, 0.1849, 0.1653]) -Greedy action tensor([ 0.4007, -0.1258, -0.1050, -0.3055]) tensor([0.3721, 0.2198, 0.2244, 0.1837]) -Greedy action tensor([ 0.4029, -0.1397, 0.0142, -0.2690]) tensor([0.3610, 0.2098, 0.2448, 0.1844]) -Greedy action tensor([ 0.6348, -0.0679, -0.0424, -0.1055]) tensor([0.4032, 0.1997, 0.2048, 0.1923]) -Greedy action tensor([ 0.8027, -0.8324, -0.0462, -0.3759]) tensor([0.5180, 0.1010, 0.2216, 0.1594]) -Greedy action tensor([ 0.6465, 0.1618, -0.0042, -0.3103]) tensor([0.3966, 0.2442, 0.2069, 0.1523]) -Greedy action tensor([ 0.8110, -0.4303, -0.0797, -0.3351]) tensor([0.4957, 0.1433, 0.2034, 0.1576]) -Greedy action tensor([ 0.5287, -0.0288, -0.1529, -0.1880]) tensor([0.3896, 0.2231, 0.1970, 0.1903]) -Greedy action tensor([ 0.6055, -0.1193, -0.1009, -0.3493]) tensor([0.4232, 0.2050, 0.2088, 0.1629]) -Greedy action tensor([ 0.7220, -0.6109, -0.2722, -0.6806]) tensor([0.5320, 0.1403, 0.1969, 0.1308]) -Greedy action tensor([ 0.5483, -0.3089, 0.1217, -0.5482]) tensor([0.4147, 0.1760, 0.2707, 0.1385]) -Greedy action tensor([ 0.6655, -0.5476, 0.3330, -0.6466]) tensor([0.4379, 0.1302, 0.3140, 0.1179]) -Greedy action tensor([ 0.4506, -0.4605, -0.0533, -0.0747]) tensor([0.3850, 0.1548, 0.2326, 0.2277]) -Greedy action tensor([ 0.3655, 0.0091, -0.2621, -0.0674]) tensor([0.3469, 0.2429, 0.1852, 0.2250]) -Greedy action tensor([ 0.6840, -0.2639, -0.0408, -0.2827]) tensor([0.4440, 0.1721, 0.2151, 0.1689]) -Greedy action tensor([ 0.7649, -0.4121, -0.0743, -0.4770]) tensor([0.4928, 0.1519, 0.2129, 0.1423]) -Greedy action tensor([ 0.4268, -0.2274, -0.0219, -0.5589]) tensor([0.3950, 0.2054, 0.2522, 0.1474]) -Greedy action tensor([ 0.3164, -0.1314, -0.1145, -0.1765]) tensor([0.3449, 0.2204, 0.2241, 0.2107]) -Greedy action tensor([ 0.6679, -0.3713, 0.0352, -0.6719]) tensor([0.4658, 0.1648, 0.2474, 0.1220]) -Greedy action tensor([ 0.8229, -0.4197, 0.0084, -0.1942]) tensor([0.4777, 0.1379, 0.2116, 0.1728]) -Greedy action tensor([ 0.4971, -0.3881, -0.0125, -0.5301]) tensor([0.4217, 0.1740, 0.2533, 0.1510]) -Greedy action tensor([ 0.5673, -0.2258, 0.0095, -0.2201]) tensor([0.4032, 0.1824, 0.2308, 0.1835]) -Greedy action tensor([ 0.7146, -0.4865, -0.0545, -0.5516]) tensor([0.4887, 0.1470, 0.2265, 0.1378]) -Greedy action tensor([ 0.2876, -0.2371, -0.2401, -0.3106]) tensor([0.3661, 0.2166, 0.2160, 0.2013]) -Greedy action tensor([ 0.6822, -0.2081, 0.0329, -0.3068]) tensor([0.4339, 0.1781, 0.2267, 0.1614]) -Greedy action tensor([ 0.3805, -0.0151, 0.0481, -0.2303]) tensor([0.3409, 0.2295, 0.2445, 0.1851]) -Greedy action tensor([ 0.8453, -0.4691, -0.0162, -0.3968]) tensor([0.5051, 0.1357, 0.2134, 0.1459]) -Greedy action tensor([ 0.9761, -0.6947, -0.0423, -0.6394]) tensor([0.5721, 0.1076, 0.2066, 0.1137]) -Greedy action tensor([ 0.5221, 0.0800, -0.1359, -0.1103]) tensor([0.3715, 0.2387, 0.1924, 0.1974]) -Greedy action tensor([ 0.8837, -0.4138, -0.1646, -0.2223]) tensor([0.5116, 0.1398, 0.1793, 0.1693]) -Greedy action tensor([ 0.1058, -0.1085, 0.0059, -0.1889]) tensor([0.2893, 0.2335, 0.2618, 0.2155]) -Greedy action tensor([ 0.3255, 0.0583, -0.2333, -0.0898]) tensor([0.3336, 0.2554, 0.1908, 0.2202]) -Greedy action tensor([ 0.8325, -0.5628, 0.0178, -0.4440]) tensor([0.5077, 0.1258, 0.2248, 0.1417]) -Greedy action tensor([ 0.7759, -0.4963, 0.0123, -0.3927]) tensor([0.4861, 0.1362, 0.2265, 0.1511]) -Greedy action tensor([ 0.7658, -0.5745, -0.1149, -0.6139]) tensor([0.5187, 0.1358, 0.2150, 0.1305]) -Greedy action tensor([ 0.7897, -0.5756, 0.1260, -0.4616]) tensor([0.4863, 0.1241, 0.2504, 0.1391]) -Greedy action tensor([ 0.5328, -0.0599, -0.0306, -0.3416]) tensor([0.3938, 0.2177, 0.2242, 0.1643]) -Greedy action tensor([ 0.7355, -0.3695, -0.0680, -0.3425]) tensor([0.4719, 0.1563, 0.2113, 0.1606]) -Greedy action tensor([ 0.8766, -0.5832, -0.0457, -0.7265]) tensor([0.5461, 0.1268, 0.2171, 0.1099]) -Greedy action tensor([-0.0912, 0.0051, -0.3446, -0.0662]) tensor([0.2562, 0.2821, 0.1989, 0.2627]) -Greedy action tensor([1.3639, 0.0564, 1.3743, 0.3633]) tensor([0.3776, 0.1021, 0.3815, 0.1388]) -Greedy action tensor([ 0.4128, -0.4222, 1.4053, 0.3303]) tensor([0.1979, 0.0859, 0.5340, 0.1822]) -Greedy action tensor([-0.3871, -0.3202, -1.6869, 0.7935]) tensor([0.1786, 0.1910, 0.0487, 0.5817]) -Greedy action tensor([ 0.2839, -0.5078, 0.9231, 0.1583]) tensor([0.2364, 0.1071, 0.4480, 0.2085]) -Greedy action tensor([-0.3061, 1.3322, 1.4736, -0.0938]) tensor([0.0751, 0.3866, 0.4453, 0.0929]) -Greedy action tensor([0.3907, 0.1947, 0.0208, 0.2982]) tensor([0.2920, 0.2400, 0.2017, 0.2662]) -Greedy action tensor([-0.2368, -0.2602, 1.3926, -0.1671]) tensor([0.1227, 0.1199, 0.6259, 0.1316]) -Greedy action tensor([ 0.4321, 0.1047, -0.3432, 0.8081]) tensor([0.2749, 0.1981, 0.1266, 0.4004]) -Greedy action tensor([ 0.6119, -0.0793, -0.0499, 1.3522]) tensor([0.2431, 0.1218, 0.1254, 0.5097]) -Greedy action tensor([ 0.2390, -1.7212, -0.1975, 1.2356]) tensor([0.2224, 0.0313, 0.1437, 0.6025]) -Greedy action tensor([ 0.1327, -0.1497, 0.4684, 0.7161]) tensor([0.2022, 0.1525, 0.2829, 0.3624]) -Greedy action tensor([-0.3045, -0.1296, 1.2221, -0.6162]) tensor([0.1329, 0.1583, 0.6116, 0.0973]) -Greedy action tensor([ 0.4226, -0.8500, 0.7911, -0.4917]) tensor([0.3199, 0.0896, 0.4624, 0.1282]) -Greedy action tensor([ 1.8670, -0.2058, -0.3222, 0.6727]) tensor([0.6490, 0.0817, 0.0727, 0.1966]) -Greedy action tensor([-0.7379, 0.2530, -0.1768, 0.3553]) tensor([0.1186, 0.3195, 0.2079, 0.3539]) -Greedy action tensor([ 0.8467, -1.1641, 0.7233, 1.1590]) tensor([0.2955, 0.0396, 0.2612, 0.4038]) -Greedy action tensor([ 0.3032, -0.2979, -0.5550, -0.2990]) tensor([0.3969, 0.2176, 0.1682, 0.2173]) -Greedy action tensor([ 0.7066, -0.0747, -0.9560, 0.5435]) tensor([0.4005, 0.1833, 0.0759, 0.3402]) -Greedy action tensor([ 0.2742, -0.8141, 1.6515, 0.0781]) tensor([0.1633, 0.0550, 0.6474, 0.1342]) -Greedy action tensor([-0.2226, 0.6330, 0.5127, 0.0187]) tensor([0.1490, 0.3505, 0.3108, 0.1897]) -Greedy action tensor([ 0.7290, -1.0270, 0.8398, 0.6237]) tensor([0.3135, 0.0541, 0.3502, 0.2821]) -Greedy action tensor([ 0.6130, -1.2526, -0.0622, 1.1666]) tensor([0.2938, 0.0455, 0.1496, 0.5111]) -Greedy action tensor([0.0560, 0.3873, 0.7006, 1.4305]) tensor([0.1212, 0.1688, 0.2309, 0.4791]) -Greedy action tensor([-0.3040, -0.2427, 0.4139, -0.0964]) tensor([0.1871, 0.1990, 0.3836, 0.2303]) -Greedy action tensor([0.0925, 0.9628, 0.1283, 0.7792]) tensor([0.1560, 0.3724, 0.1617, 0.3099]) -Greedy action tensor([ 0.2154, -1.2695, 0.1220, -0.5578]) tensor([0.3848, 0.0872, 0.3505, 0.1776]) -Greedy action tensor([-0.0990, -0.3664, -0.3476, 0.4013]) tensor([0.2384, 0.1825, 0.1859, 0.3932]) -Greedy action tensor([-0.0914, -1.6356, -0.6145, 1.4916]) tensor([0.1498, 0.0320, 0.0888, 0.7295]) -Greedy action tensor([-0.2166, -0.4397, 0.6815, 0.5033]) tensor([0.1585, 0.1268, 0.3891, 0.3256]) -Greedy action tensor([ 0.7484, 0.0360, -0.4930, 0.9540]) tensor([0.3325, 0.1631, 0.0961, 0.4084]) -Greedy action tensor([-0.5362, -0.2761, 0.7943, 1.1474]) tensor([0.0872, 0.1131, 0.3300, 0.4697]) -Greedy action tensor([-0.1916, -2.1817, -0.3116, 0.2156]) tensor([0.2836, 0.0388, 0.2515, 0.4261]) -Greedy action tensor([ 0.3365, -0.9395, 0.1926, 1.4021]) tensor([0.1981, 0.0553, 0.1716, 0.5750]) -Greedy action tensor([ 0.6671, -0.1501, 1.3237, 0.0550]) tensor([0.2556, 0.1129, 0.4929, 0.1386]) -Greedy action tensor([ 1.1930, -1.5098, -0.5411, 0.9886]) tensor([0.4857, 0.0326, 0.0858, 0.3959]) -Greedy action tensor([-0.0698, -2.0191, 0.4942, 0.6317]) tensor([0.2034, 0.0290, 0.3575, 0.4102]) -Greedy action tensor([ 0.3281, -0.1808, 1.0876, 1.1983]) tensor([0.1632, 0.0981, 0.3489, 0.3897]) -Greedy action tensor([ 0.4978, -0.2035, 0.0771, 1.0456]) tensor([0.2576, 0.1278, 0.1691, 0.4455]) -Greedy action tensor([ 1.7091, -0.6090, 0.9140, 1.8545]) tensor([0.3695, 0.0364, 0.1668, 0.4273]) -Greedy action tensor([ 1.2999, -0.8222, -0.2289, 0.4327]) tensor([0.5692, 0.0682, 0.1234, 0.2392]) -Greedy action tensor([ 0.9339, 0.1586, -0.2623, -0.8681]) tensor([0.5187, 0.2389, 0.1568, 0.0856]) -Greedy action tensor([ 0.6959, -0.3661, -0.2277, 0.7663]) tensor([0.3551, 0.1228, 0.1410, 0.3810]) -Greedy action tensor([-0.3422, -0.0358, 0.6874, 0.4120]) tensor([0.1373, 0.1865, 0.3844, 0.2918]) -Greedy action tensor([-0.4947, 0.4408, 1.5264, -0.3369]) tensor([0.0815, 0.2078, 0.6152, 0.0955]) -Greedy action tensor([ 1.8401, -0.2983, 0.4250, 0.8019]) tensor([0.5831, 0.0687, 0.1416, 0.2065]) -Greedy action tensor([ 0.6927, 0.3522, 2.1497, -0.3092]) tensor([0.1569, 0.1117, 0.6738, 0.0576]) -Greedy action tensor([ 0.1937, -1.5012, -0.0350, 0.6276]) tensor([0.2839, 0.0521, 0.2258, 0.4381]) -Greedy action tensor([ 2.0553, -0.1936, 1.0531, 1.5663]) tensor([0.4794, 0.0506, 0.1760, 0.2940]) -Greedy action tensor([ 0.4219, -0.8091, -0.2361, 1.3100]) tensor([0.2358, 0.0689, 0.1221, 0.5732]) -Greedy action tensor([-0.3631, 0.5767, -0.0640, 0.6212]) tensor([0.1319, 0.3375, 0.1778, 0.3528]) -Greedy action tensor([-1.0822, -1.6284, -0.7670, 1.4926]) tensor([0.0622, 0.0360, 0.0852, 0.8165]) -Greedy action tensor([ 1.4633, -0.3744, 1.2898, 0.3181]) tensor([0.4314, 0.0687, 0.3627, 0.1373]) -Greedy action tensor([ 1.5167, -0.9338, 0.4904, 0.3775]) tensor([0.5667, 0.0489, 0.2031, 0.1814]) -Greedy action tensor([ 1.1556, -0.6556, -0.3378, 0.7702]) tensor([0.4835, 0.0790, 0.1086, 0.3289]) -Greedy action tensor([-0.2686, 0.2541, -0.2146, 0.1999]) tensor([0.1873, 0.3159, 0.1977, 0.2992]) -Greedy action tensor([ 0.4134, -1.3038, 0.9818, 0.4562]) tensor([0.2507, 0.0450, 0.4426, 0.2617]) -Greedy action tensor([ 1.8479, -0.4837, 0.2494, 1.3738]) tensor([0.5204, 0.0505, 0.1052, 0.3239]) -Greedy action tensor([-1.3014, -1.1347, -1.0731, 0.6584]) tensor([0.0949, 0.1121, 0.1193, 0.6737]) -Greedy action tensor([ 1.1452, 0.3509, -0.3164, 0.2439]) tensor([0.4785, 0.2162, 0.1110, 0.1943]) -Greedy action tensor([ 0.7393, -0.8922, -0.0292, 0.7035]) tensor([0.3811, 0.0745, 0.1767, 0.3677]) -Greedy action tensor([ 0.7902, -0.2460, -0.3704, 1.8236]) tensor([0.2233, 0.0792, 0.0700, 0.6275]) -Greedy action tensor([ 0.6606, -1.1782, -0.4199, 1.7479]) tensor([0.2240, 0.0356, 0.0760, 0.6644]) -Greedy action tensor([0.7102, 0.0678, 1.1120, 2.1606]) tensor([0.1373, 0.0722, 0.2051, 0.5854]) -Greedy action tensor([ 2.3641, -0.8496, 0.9623, 1.7103]) tensor([0.5536, 0.0223, 0.1363, 0.2879]) -Greedy action tensor([1.1449, 0.0266, 1.1937, 1.6458]) tensor([0.2483, 0.0812, 0.2607, 0.4098]) -Greedy action tensor([ 1.2821, -1.1411, 1.8122, 0.5235]) tensor([0.3071, 0.0272, 0.5218, 0.1438]) -Greedy action tensor([ 0.2581, -2.2703, 0.0966, 0.5642]) tensor([0.3041, 0.0243, 0.2587, 0.4130]) -Greedy action tensor([ 0.2699, -1.0551, 1.4071, -0.2841]) tensor([0.2017, 0.0536, 0.6288, 0.1159]) -Greedy action tensor([ 1.0090, -0.2151, 1.2559, 0.9166]) tensor([0.2869, 0.0844, 0.3672, 0.2616]) -Greedy action tensor([ 0.0656, 0.4637, 0.6369, -0.5935]) tensor([0.2093, 0.3117, 0.3707, 0.1083]) -Greedy action tensor([ 1.1686, -0.1101, -0.8126, 1.4535]) tensor([0.3642, 0.1014, 0.0502, 0.4842]) -Greedy action tensor([-0.0238, -0.9064, 0.3783, 0.7350]) tensor([0.1982, 0.0820, 0.2964, 0.4234]) -Greedy action tensor([ 0.5219, -1.2263, 1.1194, -0.4461]) tensor([0.2966, 0.0516, 0.5391, 0.1127]) -Greedy action tensor([0.2135, 0.8135, 0.2769, 0.3436]) tensor([0.1989, 0.3625, 0.2120, 0.2266]) -Greedy action tensor([ 0.5907, -1.2438, -0.3588, 1.7022]) tensor([0.2181, 0.0348, 0.0844, 0.6627]) -Greedy action tensor([ 0.7651, -2.0760, 0.2262, -0.0524]) tensor([0.4800, 0.0280, 0.2800, 0.2119]) -Greedy action tensor([ 0.6325, -0.0357, -0.2967, 1.5602]) tensor([0.2254, 0.1156, 0.0890, 0.5700]) -Greedy action tensor([ 1.9517, -1.0429, 0.9781, 1.6504]) tensor([0.4613, 0.0231, 0.1743, 0.3413]) -Greedy action tensor([ 0.7929, -0.9408, 1.4727, -0.4245]) tensor([0.2902, 0.0513, 0.5727, 0.0859]) -Greedy action tensor([ 0.4084, 0.3542, -0.0065, 1.2052]) tensor([0.2072, 0.1963, 0.1368, 0.4597]) -Greedy action tensor([ 0.8598, 0.3452, -0.4272, 0.9551]) tensor([0.3363, 0.2010, 0.0928, 0.3699]) -Greedy action tensor([ 1.6744, -0.4182, -0.4097, 0.1613]) tensor([0.6812, 0.0840, 0.0848, 0.1500]) -Greedy action tensor([ 0.8427, -0.3392, -0.4012, 0.5084]) tensor([0.4328, 0.1327, 0.1247, 0.3098]) -Greedy action tensor([ 0.8129, -0.4454, -0.2385, -0.1221]) tensor([0.4935, 0.1402, 0.1725, 0.1938]) -Greedy action tensor([ 0.6655, -0.5119, -0.0172, 0.0976]) tensor([0.4202, 0.1294, 0.2123, 0.2381]) -Greedy action tensor([ 1.0388, -0.2663, -0.2563, -0.3203]) tensor([0.5550, 0.1505, 0.1520, 0.1426]) -Greedy action tensor([ 1.4604, -0.5844, -0.2943, 0.0862]) tensor([0.6429, 0.0832, 0.1112, 0.1627]) -Greedy action tensor([ 1.0751, -0.2381, -0.3808, -0.3840]) tensor([0.5765, 0.1551, 0.1344, 0.1340]) -Greedy action tensor([ 1.0900, -0.3089, -0.1736, 0.2784]) tensor([0.5067, 0.1251, 0.1432, 0.2250]) -Greedy action tensor([ 1.3189, -0.7604, -0.3139, 0.4277]) tensor([0.5779, 0.0722, 0.1129, 0.2370]) -Greedy action tensor([ 1.5170, -0.5695, -0.1020, 0.3152]) tensor([0.6162, 0.0765, 0.1221, 0.1853]) -Greedy action tensor([ 1.4455, -0.3190, -0.0154, 0.0374]) tensor([0.6068, 0.1039, 0.1408, 0.1484]) -Greedy action tensor([ 0.9999, -0.4713, -0.2610, 0.0507]) tensor([0.5263, 0.1209, 0.1491, 0.2037]) -Greedy action tensor([ 0.9005, 0.2048, -0.4266, -0.3073]) tensor([0.4848, 0.2418, 0.1286, 0.1449]) -Greedy action tensor([ 1.0393, -0.2368, -0.1125, 0.0233]) tensor([0.5109, 0.1426, 0.1615, 0.1850]) -Greedy action tensor([ 1.3230, -0.4705, -0.3680, -0.0625]) tensor([0.6247, 0.1039, 0.1151, 0.1563]) -Greedy action tensor([ 0.8865, -0.0849, -0.5854, -0.1794]) tensor([0.5122, 0.1939, 0.1175, 0.1764]) -Greedy action tensor([ 1.5780, -0.4747, -0.2470, 0.1124]) tensor([0.6577, 0.0844, 0.1060, 0.1519]) -Greedy action tensor([ 1.1075, -0.4342, -0.4290, -0.0765]) tensor([0.5763, 0.1233, 0.1240, 0.1764]) -Greedy action tensor([ 0.8148, -0.0124, -0.6746, -0.0766]) tensor([0.4824, 0.2110, 0.1088, 0.1978]) -Greedy action tensor([ 1.1516, -0.6018, -0.3667, 0.3970]) tensor([0.5369, 0.0930, 0.1176, 0.2525]) -Greedy action tensor([ 0.7347, -0.5281, -0.2969, 0.3420]) tensor([0.4320, 0.1222, 0.1540, 0.2917]) -Greedy action tensor([ 1.6326, 0.0937, -0.0638, -0.1276]) tensor([0.6370, 0.1367, 0.1168, 0.1096]) -Greedy action tensor([ 0.9688, -0.2470, 0.0370, 0.2394]) tensor([0.4603, 0.1365, 0.1813, 0.2219]) -Greedy action tensor([ 0.9407, -0.0304, -0.3291, 0.2485]) tensor([0.4630, 0.1753, 0.1300, 0.2317]) -Greedy action tensor([ 1.3739, -0.3227, -0.1917, 0.1224]) tensor([0.5958, 0.1092, 0.1245, 0.1705]) -Greedy action tensor([ 1.0310, -0.6226, -0.0364, 0.0218]) tensor([0.5264, 0.1007, 0.1810, 0.1919]) -Greedy action tensor([ 1.7140, -0.3113, -0.3829, 0.0829]) tensor([0.6894, 0.0910, 0.0847, 0.1349]) -Greedy action tensor([ 1.1159, -0.2159, -0.0882, 0.0579]) tensor([0.5232, 0.1381, 0.1570, 0.1817]) -Greedy action tensor([ 0.6957, 0.0468, 0.2117, -0.3345]) tensor([0.4007, 0.2094, 0.2469, 0.1430]) -Greedy action tensor([ 0.2997, -0.2809, -0.1802, 0.1722]) tensor([0.3269, 0.1829, 0.2023, 0.2878]) -Greedy action tensor([ 1.0880, -0.0051, -0.2736, -0.3243]) tensor([0.5450, 0.1827, 0.1396, 0.1327]) -Greedy action tensor([ 1.2586, -0.4280, -0.0162, 0.0292]) tensor([0.5691, 0.1054, 0.1591, 0.1665]) -Greedy action tensor([ 0.7786, -0.2392, -0.2578, -0.1527]) tensor([0.4739, 0.1713, 0.1681, 0.1867]) -Greedy action tensor([ 1.0022, -0.3886, -0.4014, -0.1123]) tensor([0.5487, 0.1365, 0.1348, 0.1800]) -Greedy action tensor([ 1.0264, -0.3513, -0.1266, 0.0727]) tensor([0.5120, 0.1291, 0.1616, 0.1973]) -Greedy action tensor([ 1.1932, -0.5944, -0.4793, 0.4611]) tensor([0.5447, 0.0912, 0.1023, 0.2619]) -Greedy action tensor([ 0.7697, -0.2129, -0.1129, 0.0672]) tensor([0.4379, 0.1639, 0.1812, 0.2169]) -Greedy action tensor([ 1.0570, -0.3879, -0.1475, 0.0075]) tensor([0.5303, 0.1250, 0.1590, 0.1857]) -Greedy action tensor([ 1.2537, -0.2409, -0.1908, 0.4681]) tensor([0.5219, 0.1171, 0.1231, 0.2379]) -Greedy action tensor([ 1.1471, -0.2071, -0.1775, 0.0773]) tensor([0.5356, 0.1383, 0.1424, 0.1837]) -Greedy action tensor([ 1.1733, 0.0394, -0.1000, -0.2522]) tensor([0.5429, 0.1747, 0.1519, 0.1305]) -Greedy action tensor([ 1.2995, -0.5817, -0.3194, 0.2191]) tensor([0.5917, 0.0902, 0.1172, 0.2009]) -Greedy action tensor([ 1.0692, -0.1032, -0.0517, 0.1613]) tensor([0.4905, 0.1518, 0.1599, 0.1978]) -Greedy action tensor([ 1.4548, -0.5394, -0.3463, 0.4620]) tensor([0.5982, 0.0814, 0.0988, 0.2217]) -Greedy action tensor([ 1.0720, -0.3180, 0.0074, 0.1682]) tensor([0.5003, 0.1246, 0.1725, 0.2026]) -Greedy action tensor([ 0.7320, 0.0494, -0.1760, 0.0542]) tensor([0.4138, 0.2091, 0.1669, 0.2101]) -Greedy action tensor([ 0.7186, -0.5691, -0.8049, -0.2309]) tensor([0.5317, 0.1467, 0.1159, 0.2057]) -Greedy action tensor([ 1.3409, -0.5164, -0.3029, 0.3601]) tensor([0.5799, 0.0905, 0.1121, 0.2175]) -Greedy action tensor([ 1.0160, -0.2018, -0.2607, 0.2202]) tensor([0.4936, 0.1460, 0.1377, 0.2227]) -Greedy action tensor([ 1.1349, -0.6471, -0.4234, 0.7322]) tensor([0.4884, 0.0822, 0.1028, 0.3266]) -Greedy action tensor([ 0.8753, -0.4787, 0.0101, -0.3952]) tensor([0.5102, 0.1317, 0.2148, 0.1432]) -Greedy action tensor([ 1.0697, -0.2661, -0.3118, 0.0539]) tensor([0.5330, 0.1401, 0.1339, 0.1930]) -Greedy action tensor([ 1.1301, -0.3783, -0.0277, 0.0433]) tensor([0.5340, 0.1182, 0.1678, 0.1801]) -Greedy action tensor([ 0.6096, -0.3935, -0.4026, 0.5703]) tensor([0.3715, 0.1363, 0.1350, 0.3572]) -Greedy action tensor([ 0.7190, -0.3849, 0.1759, -0.1899]) tensor([0.4319, 0.1432, 0.2509, 0.1740]) -Greedy action tensor([ 0.8696, -0.0980, -0.0019, -0.1106]) tensor([0.4601, 0.1748, 0.1925, 0.1726]) -Greedy action tensor([ 1.0284, -0.5988, -0.1290, 0.4494]) tensor([0.4828, 0.0949, 0.1517, 0.2706]) -Greedy action tensor([ 1.1559, -0.6161, -0.2815, 0.2396]) tensor([0.5532, 0.0940, 0.1314, 0.2213]) -Greedy action tensor([ 0.6692, -0.1340, 0.0653, -0.1010]) tensor([0.4069, 0.1823, 0.2225, 0.1884]) -Greedy action tensor([ 1.0796, -0.4452, -0.0902, 0.2683]) tensor([0.5070, 0.1104, 0.1574, 0.2252]) -Greedy action tensor([ 1.0368, -0.3059, -0.2289, 0.1494]) tensor([0.5115, 0.1336, 0.1443, 0.2106]) -Greedy action tensor([ 1.5585, -0.2375, -0.1995, 0.1464]) tensor([0.6321, 0.1049, 0.1090, 0.1540]) -Greedy action tensor([ 1.4110, -1.0630, -0.6926, 0.8110]) tensor([0.5698, 0.0480, 0.0695, 0.3127]) -Greedy action tensor([ 1.4008, -0.8324, -0.0051, 0.3029]) tensor([0.5932, 0.0636, 0.1454, 0.1979]) -Greedy action tensor([ 1.9141, -0.8626, -0.2658, 0.0135]) tensor([0.7548, 0.0470, 0.0853, 0.1128]) -Greedy action tensor([ 0.6387, -0.6643, -0.1497, 0.1790]) tensor([0.4241, 0.1152, 0.1928, 0.2678]) -Greedy action tensor([ 0.4980, -0.4253, -0.2789, 0.3893]) tensor([0.3631, 0.1442, 0.1670, 0.3257]) -Greedy action tensor([ 1.2458, -0.6034, -0.4417, 0.0738]) tensor([0.6053, 0.0953, 0.1120, 0.1875]) -Greedy action tensor([ 1.1547, 0.0069, -0.2175, 0.1830]) tensor([0.5130, 0.1628, 0.1301, 0.1941]) -Greedy action tensor([ 0.8952, -0.5034, -0.1561, 0.1705]) tensor([0.4806, 0.1187, 0.1679, 0.2328]) -Greedy action tensor([ 1.4463, -0.5736, -0.5369, -0.1880]) tensor([0.6824, 0.0905, 0.0939, 0.1331]) -Greedy action tensor([ 1.4934, -0.6343, -0.4114, 0.2773]) tensor([0.6392, 0.0761, 0.0952, 0.1895]) -Greedy action tensor([ 1.3010, -0.3157, -0.2671, 0.4802]) tensor([0.5414, 0.1075, 0.1128, 0.2383]) -Greedy action tensor([ 1.0030, 0.0292, -0.3198, 0.0248]) tensor([0.4950, 0.1869, 0.1319, 0.1861]) -Greedy action tensor([ 1.2786, -0.5200, -0.1286, 0.1768]) tensor([0.5739, 0.0950, 0.1405, 0.1907]) -Greedy action tensor([ 1.3696, -0.3715, -0.5558, 0.4759]) tensor([0.5779, 0.1013, 0.0843, 0.2365]) -Greedy action tensor([ 0.7046, -0.0457, -0.2411, 0.0820]) tensor([0.4172, 0.1970, 0.1620, 0.2238]) -Greedy action tensor([ 0.8390, -0.4543, -0.4300, 0.5552]) tensor([0.4332, 0.1189, 0.1218, 0.3262]) -Greedy action tensor([ 1.9520, -1.0177, 0.1079, 0.0402]) tensor([0.7368, 0.0378, 0.1165, 0.1089]) -Greedy action tensor([ 1.3424, -0.5708, -0.1807, 0.2503]) tensor([0.5878, 0.0868, 0.1282, 0.1972]) -Greedy action tensor([ 0.8720, -0.4419, -0.4725, 0.6072]) tensor([0.4354, 0.1170, 0.1135, 0.3341]) -Greedy action tensor([-1.8642, -0.4248, 0.6264, -0.1268]) tensor([0.0435, 0.1836, 0.5254, 0.2474]) -Greedy action tensor([-1.5696, -0.4236, 0.4792, -0.0112]) tensor([0.0600, 0.1889, 0.4658, 0.2853]) -Greedy action tensor([-0.6909, 0.8628, 0.0494, 0.1948]) tensor([0.0976, 0.4614, 0.2045, 0.2366]) -Greedy action tensor([-1.1160, -0.5381, 1.1561, 1.4097]) tensor([0.0400, 0.0713, 0.3883, 0.5004]) -Greedy action tensor([-1.4305, -0.0487, 0.5082, 0.4228]) tensor([0.0546, 0.2174, 0.3795, 0.3484]) -Greedy action tensor([-0.5053, -0.2111, 0.1148, 0.0237]) tensor([0.1695, 0.2275, 0.3152, 0.2877]) -Greedy action tensor([-0.9931, -0.5634, 0.3292, 0.2915]) tensor([0.1010, 0.1552, 0.3789, 0.3649]) -Greedy action tensor([-1.5732, -0.6224, 0.7695, 0.5141]) tensor([0.0453, 0.1173, 0.4719, 0.3655]) -Greedy action tensor([-1.8803, -0.4824, 0.6422, -0.1449]) tensor([0.0431, 0.1746, 0.5376, 0.2447]) -Greedy action tensor([-1.6408, -0.3695, 0.4902, 0.0291]) tensor([0.0546, 0.1948, 0.4603, 0.2903]) -Greedy action tensor([-0.6768, -0.5784, 0.2354, 0.2402]) tensor([0.1409, 0.1555, 0.3509, 0.3526]) -Greedy action tensor([-1.0252, -0.5990, 0.2129, 0.3123]) tensor([0.1021, 0.1564, 0.3523, 0.3891]) -Greedy action tensor([-0.1814, -0.1568, 0.1958, 0.2805]) tensor([0.1972, 0.2021, 0.2876, 0.3130]) -Greedy action tensor([-1.7012, -0.4385, 0.2545, -0.3966]) tensor([0.0654, 0.2312, 0.4623, 0.2411]) -Greedy action tensor([-0.8673, -0.5785, 0.1981, 0.2902]) tensor([0.1188, 0.1586, 0.3447, 0.3780]) -Greedy action tensor([-1.3727, -0.5306, 0.4577, -0.1060]) tensor([0.0763, 0.1771, 0.4758, 0.2708]) -Greedy action tensor([-0.6499, -0.6162, 0.2164, -0.0077]) tensor([0.1584, 0.1638, 0.3767, 0.3011]) -Greedy action tensor([-1.7720, -0.5131, 0.5837, -0.0829]) tensor([0.0488, 0.1719, 0.5149, 0.2644]) -Greedy action tensor([-1.1961, 0.2566, 0.3097, -0.0841]) tensor([0.0780, 0.3334, 0.3515, 0.2371]) -Greedy action tensor([-1.9404, -0.4526, 0.6623, -0.1793]) tensor([0.0404, 0.1789, 0.5455, 0.2351]) -Greedy action tensor([-0.3123, -0.3047, 0.1935, 0.2443]) tensor([0.1848, 0.1862, 0.3065, 0.3225]) -Greedy action tensor([-1.8388, -0.5108, 0.6458, -0.1099]) tensor([0.0446, 0.1684, 0.5355, 0.2515]) -Greedy action tensor([-1.6657, -0.2756, 0.7863, 0.2721]) tensor([0.0424, 0.1704, 0.4926, 0.2946]) -Greedy action tensor([-0.7595, -0.1254, 0.2148, -0.0274]) tensor([0.1313, 0.2476, 0.3480, 0.2731]) -Greedy action tensor([-1.1000, -0.4162, 0.5227, 1.0868]) tensor([0.0590, 0.1169, 0.2988, 0.5253]) -Greedy action tensor([-1.8023, -0.4758, 0.5970, -0.0795]) tensor([0.0468, 0.1762, 0.5151, 0.2619]) -Greedy action tensor([-1.2368, -0.5633, 0.3009, 0.2011]) tensor([0.0846, 0.1658, 0.3935, 0.3561]) -Greedy action tensor([-1.4418, -0.5571, 0.6217, 0.6601]) tensor([0.0513, 0.1244, 0.4042, 0.4201]) -Greedy action tensor([-1.9212, -0.4466, 0.6599, -0.1640]) tensor([0.0410, 0.1792, 0.5420, 0.2378]) -Greedy action tensor([-1.5509, -0.5936, 0.5227, 0.0902]) tensor([0.0598, 0.1558, 0.4757, 0.3087]) -Greedy action tensor([-1.6129, 0.2875, 0.8417, 0.7329]) tensor([0.0336, 0.2247, 0.3910, 0.3507]) -Greedy action tensor([-1.8938, -0.4812, 0.7202, -0.1024]) tensor([0.0404, 0.1659, 0.5515, 0.2423]) -Greedy action tensor([-0.5969, -0.2727, -0.6493, -0.1332]) tensor([0.2032, 0.2810, 0.1928, 0.3230]) -Greedy action tensor([-1.8252, -0.4815, 0.6153, -0.1218]) tensor([0.0459, 0.1758, 0.5264, 0.2519]) -Greedy action tensor([-1.7951, -0.4860, 0.7361, 0.1199]) tensor([0.0416, 0.1539, 0.5224, 0.2821]) -Greedy action tensor([-1.8856, -0.4024, 1.1014, 0.7020]) tensor([0.0260, 0.1144, 0.5146, 0.3451]) -Greedy action tensor([-1.7907, -0.7204, 0.8323, 0.3877]) tensor([0.0377, 0.1099, 0.5194, 0.3330]) -Greedy action tensor([-1.3736, -0.5569, 0.3429, 0.2722]) tensor([0.0714, 0.1615, 0.3971, 0.3700]) -Greedy action tensor([-1.3620, -0.8847, 1.2740, 1.3099]) tensor([0.0322, 0.0519, 0.4497, 0.4661]) -Greedy action tensor([-1.9251, -0.8656, 0.6427, -0.0023]) tensor([0.0421, 0.1214, 0.5487, 0.2879]) -Greedy action tensor([-1.7225, -0.4929, 0.5421, -0.0049]) tensor([0.0510, 0.1743, 0.4907, 0.2840]) -Greedy action tensor([-1.6507, -0.5167, 0.5060, 0.0044]) tensor([0.0556, 0.1728, 0.4806, 0.2910]) -Greedy action tensor([-1.6799, -0.5083, 0.5185, -0.0021]) tensor([0.0538, 0.1736, 0.4847, 0.2880]) -Greedy action tensor([-1.5646, -0.3477, 0.4351, 0.0821]) tensor([0.0590, 0.1992, 0.4357, 0.3061]) -Greedy action tensor([-1.0116, -0.4831, 0.2917, 0.8125]) tensor([0.0795, 0.1349, 0.2927, 0.4928]) -Greedy action tensor([-1.5417, -0.5400, 0.6480, 0.0888]) tensor([0.0563, 0.1533, 0.5029, 0.2875]) -Greedy action tensor([-1.5339, -0.5317, 0.4359, 0.1452]) tensor([0.0615, 0.1676, 0.4411, 0.3298]) -Greedy action tensor([-1.7252, -0.3927, 0.5397, -0.0345]) tensor([0.0504, 0.1910, 0.4853, 0.2733]) -Greedy action tensor([-1.5546, -0.0595, 0.5587, 0.4243]) tensor([0.0477, 0.2127, 0.3946, 0.3450]) -Greedy action tensor([-1.8196e+00, -4.5017e-01, 6.8015e-01, 3.5787e-04]) tensor([0.0429, 0.1689, 0.5231, 0.2651]) -Greedy action tensor([-1.6610, -0.7414, 0.1277, -0.3684]) tensor([0.0761, 0.1910, 0.4555, 0.2774]) -Greedy action tensor([-0.3066, -0.3556, 1.0567, 1.6747]) tensor([0.0763, 0.0726, 0.2981, 0.5530]) -Greedy action tensor([-0.8093, -0.5317, 0.1932, 0.3829]) tensor([0.1199, 0.1583, 0.3268, 0.3950]) -Greedy action tensor([-1.7938, -0.4872, 0.5872, -0.0651]) tensor([0.0473, 0.1747, 0.5116, 0.2665]) -Greedy action tensor([-1.7054, -0.5264, 0.5509, -0.0167]) tensor([0.0521, 0.1692, 0.4970, 0.2817]) -Greedy action tensor([-0.6799, -0.3091, 0.4067, 0.7958]) tensor([0.1022, 0.1480, 0.3029, 0.4469]) -Greedy action tensor([-0.6146, 0.0108, -0.3210, 0.1200]) tensor([0.1589, 0.2969, 0.2131, 0.3312]) -Greedy action tensor([-1.7647, -0.4741, 0.6128, 0.0187]) tensor([0.0468, 0.1702, 0.5045, 0.2785]) -Greedy action tensor([-1.3452, -0.5840, 1.2863, 1.2548]) tensor([0.0328, 0.0702, 0.4556, 0.4415]) -Greedy action tensor([-1.7716, -0.4987, 0.6010, -0.0086]) tensor([0.0473, 0.1691, 0.5077, 0.2759]) -Greedy action tensor([-1.7448, -0.4496, 0.8774, 0.5013]) tensor([0.0359, 0.1310, 0.4940, 0.3391]) -Greedy action tensor([-1.2778, -0.5645, 0.3018, 0.3004]) tensor([0.0785, 0.1602, 0.3809, 0.3804]) -Greedy action tensor([-0.9459, -0.5553, 0.7342, 1.3727]) tensor([0.0555, 0.0821, 0.2980, 0.5644]) -Greedy action tensor([-1.6893, -0.4126, 0.1848, -0.2486]) tensor([0.0653, 0.2339, 0.4252, 0.2756]) -Greedy action tensor([-0.3847, -0.0536, 0.9450, 1.5721]) tensor([0.0755, 0.1051, 0.2853, 0.5341]) -Greedy action tensor([-1.3255, -0.4098, -0.2789, -0.4791]) tensor([0.1152, 0.2879, 0.3282, 0.2686]) -Greedy action tensor([-1.3685, -0.5315, 1.3087, 1.1949]) tensor([0.0324, 0.0749, 0.4717, 0.4210]) -Greedy action tensor([-0.3982, -0.4045, 0.2124, 0.2237]) tensor([0.1755, 0.1744, 0.3232, 0.3269]) -Greedy action tensor([-1.2329, -0.5299, 0.6575, 0.9797]) tensor([0.0532, 0.1075, 0.3526, 0.4866]) -Greedy action tensor([-0.9559, -0.5227, 0.6443, 1.2868]) tensor([0.0591, 0.0912, 0.2929, 0.5568]) -Greedy action tensor([-0.9035, -0.5881, 0.2081, 0.3254]) tensor([0.1133, 0.1553, 0.3443, 0.3871]) -Greedy action tensor([-0.7218, -0.5352, 0.1650, 0.2056]) tensor([0.1397, 0.1683, 0.3390, 0.3530]) -Greedy action tensor([-2.0287, -0.6713, 0.9929, 0.3809]) tensor([0.0274, 0.1064, 0.5617, 0.3046]) -Greedy action tensor([-1.1607, -0.3658, 0.6728, -0.6437]) tensor([0.0897, 0.1986, 0.5612, 0.1504]) -Greedy action tensor([-1.3164, -0.3770, -0.1525, -0.3538]) tensor([0.1066, 0.2728, 0.3414, 0.2792]) -Greedy action tensor([-1.8269, -0.6355, 0.3519, -0.3080]) tensor([0.0565, 0.1860, 0.4993, 0.2581]) -Greedy action tensor([-1.2867, -0.5260, 0.5090, 0.5662]) tensor([0.0643, 0.1377, 0.3876, 0.4104]) -Greedy action tensor([-1.6875, -0.5309, 0.5424, -0.0378]) tensor([0.0535, 0.1702, 0.4977, 0.2786]) -Greedy action tensor([-1.5605, -0.4281, 0.4873, 0.0601]) tensor([0.0591, 0.1835, 0.4584, 0.2990]) -Greedy action tensor([-1.2248, -0.0806, 0.6431, -0.6746]) tensor([0.0810, 0.2543, 0.5243, 0.1404]) -Greedy action tensor([-1.0849, -0.2448, 0.1465, 1.1453]) tensor([0.0623, 0.1444, 0.2135, 0.5797]) -Greedy action tensor([ 0.3949, -0.0558, 0.0559, -0.3350]) tensor([0.3532, 0.2250, 0.2516, 0.1702]) -Greedy action tensor([ 0.8790, -0.1918, -0.0116, -0.3601]) tensor([0.4895, 0.1678, 0.2009, 0.1418]) -Greedy action tensor([ 0.2783, -0.0629, -0.0544, -0.1213]) tensor([0.3227, 0.2294, 0.2314, 0.2164]) -Greedy action tensor([ 0.3574, -0.1705, -0.0696, -0.2661]) tensor([0.3599, 0.2123, 0.2348, 0.1929]) -Greedy action tensor([ 0.1829, 0.2688, -0.1735, -0.2691]) tensor([0.2919, 0.3181, 0.2044, 0.1857]) -Greedy action tensor([ 0.3654, 0.2020, -0.0023, -0.1449]) tensor([0.3183, 0.2703, 0.2203, 0.1911]) -Greedy action tensor([ 0.2986, -0.0761, -0.0607, -0.4260]) tensor([0.3484, 0.2395, 0.2432, 0.1688]) -Greedy action tensor([ 0.6781, -0.4375, -0.0644, -0.3724]) tensor([0.4644, 0.1522, 0.2210, 0.1624]) -Greedy action tensor([ 0.6855, -0.5817, -0.1317, -0.6700]) tensor([0.5048, 0.1422, 0.2229, 0.1301]) -Greedy action tensor([ 0.5328, -0.1631, -0.1193, -0.4717]) tensor([0.4191, 0.2090, 0.2184, 0.1535]) -Greedy action tensor([ 0.4930, -0.0681, -0.0373, -0.0856]) tensor([0.3677, 0.2098, 0.2164, 0.2062]) -Greedy action tensor([ 0.5814, -0.1091, 0.0655, -0.2786]) tensor([0.3966, 0.1988, 0.2368, 0.1678]) -Greedy action tensor([ 0.2911, 0.1279, -0.1166, -0.0735]) tensor([0.3116, 0.2647, 0.2073, 0.2164]) -Greedy action tensor([ 0.4692, -0.1672, 0.1478, -0.3675]) tensor([0.3721, 0.1969, 0.2698, 0.1612]) -Greedy action tensor([ 0.6289, -0.1058, -0.0753, -0.2688]) tensor([0.4199, 0.2014, 0.2076, 0.1711]) -Greedy action tensor([ 0.4496, -0.2065, -0.0583, -0.3569]) tensor([0.3896, 0.2021, 0.2344, 0.1739]) -Greedy action tensor([ 0.8061, -0.1294, -0.0927, -0.1550]) tensor([0.4583, 0.1798, 0.1866, 0.1753]) -Greedy action tensor([ 0.5314, -0.3945, -0.1435, -0.5078]) tensor([0.4426, 0.1754, 0.2254, 0.1566]) -Greedy action tensor([ 0.3945, -0.2113, -0.0639, -0.2019]) tensor([0.3665, 0.2000, 0.2317, 0.2018]) -Greedy action tensor([ 0.7053, -0.4664, -0.0775, -0.3763]) tensor([0.4748, 0.1471, 0.2171, 0.1610]) -Greedy action tensor([ 0.4808, 0.0524, 0.1397, -0.2074]) tensor([0.3490, 0.2274, 0.2482, 0.1754]) -Greedy action tensor([ 0.3998, -0.1061, 0.0822, -0.2272]) tensor([0.3490, 0.2105, 0.2541, 0.1865]) -Greedy action tensor([ 0.4807, -0.0954, 0.0320, -0.1071]) tensor([0.3628, 0.2039, 0.2317, 0.2016]) -Greedy action tensor([ 0.7180, -0.2152, 0.1186, -0.3611]) tensor([0.4381, 0.1723, 0.2406, 0.1489]) -Greedy action tensor([ 0.4069, -0.2390, -0.0154, -0.2736]) tensor([0.3723, 0.1952, 0.2441, 0.1885]) -Greedy action tensor([ 1.2449, -1.0296, -0.0400, -0.6337]) tensor([0.6526, 0.0671, 0.1806, 0.0997]) -Greedy action tensor([ 0.7616, -0.3145, -0.0214, -0.4923]) tensor([0.4800, 0.1636, 0.2194, 0.1370]) -Greedy action tensor([ 0.7823, -0.5701, -0.0109, -0.6176]) tensor([0.5108, 0.1321, 0.2311, 0.1260]) -Greedy action tensor([ 0.9766, -0.5399, -0.1167, -0.7257]) tensor([0.5757, 0.1264, 0.1929, 0.1049]) -Greedy action tensor([ 0.6351, -0.4520, -0.1839, -0.5583]) tensor([0.4805, 0.1620, 0.2118, 0.1457]) -Greedy action tensor([ 0.4495, -0.3516, 0.1915, -0.5222]) tensor([0.3846, 0.1726, 0.2972, 0.1456]) -Greedy action tensor([ 0.4173, -0.2432, 0.2351, -0.3960]) tensor([0.3580, 0.1849, 0.2984, 0.1587]) -Greedy action tensor([ 0.5468, -0.2472, -0.1459, -0.3714]) tensor([0.4253, 0.1922, 0.2127, 0.1698]) -Greedy action tensor([ 0.5085, -0.3362, 0.0054, -0.3359]) tensor([0.4058, 0.1744, 0.2454, 0.1744]) -Greedy action tensor([ 0.3955, 0.0023, -0.0634, -0.2273]) tensor([0.3517, 0.2374, 0.2223, 0.1887]) -Greedy action tensor([ 0.8739, -0.6682, -0.0810, -0.6272]) tensor([0.5489, 0.1174, 0.2113, 0.1224]) -Greedy action tensor([ 0.9726, -0.7568, -0.0282, -0.4931]) tensor([0.5631, 0.0999, 0.2070, 0.1300]) -Greedy action tensor([ 0.4197, -0.0611, 0.0338, -0.2027]) tensor([0.3528, 0.2181, 0.2398, 0.1893]) -Greedy action tensor([ 0.5912, -0.3590, -0.2276, -0.5770]) tensor([0.4676, 0.1808, 0.2062, 0.1454]) -Greedy action tensor([ 0.4330, 0.0204, -0.0579, -0.0052]) tensor([0.3426, 0.2268, 0.2097, 0.2210]) -Greedy action tensor([ 0.3551, -0.0430, -0.0487, -0.2593]) tensor([0.3472, 0.2332, 0.2318, 0.1878]) -Greedy action tensor([ 0.7862, -0.3563, 0.1651, -0.4059]) tensor([0.4630, 0.1477, 0.2488, 0.1405]) -Greedy action tensor([ 0.6004, -0.3092, -0.0547, -0.3803]) tensor([0.4353, 0.1753, 0.2261, 0.1633]) -Greedy action tensor([ 1.0262, -0.7739, -0.0229, -0.3878]) tensor([0.5686, 0.0940, 0.1992, 0.1383]) -Greedy action tensor([ 0.2868, 0.0027, 0.0202, -0.1358]) tensor([0.3151, 0.2371, 0.2413, 0.2065]) -Greedy action tensor([ 0.6389, -0.3721, -0.0905, -0.3909]) tensor([0.4539, 0.1652, 0.2189, 0.1621]) -Greedy action tensor([ 0.7697, -0.3989, 0.0713, -0.4134]) tensor([0.4729, 0.1470, 0.2352, 0.1449]) -Greedy action tensor([ 0.5477, 0.0507, 0.1072, -0.2695]) tensor([0.3712, 0.2258, 0.2390, 0.1640]) -Greedy action tensor([ 0.4805, -0.0246, -0.1456, -0.1060]) tensor([0.3711, 0.2240, 0.1984, 0.2065]) -Greedy action tensor([ 0.8165, -0.3163, 0.0306, -0.6247]) tensor([0.4964, 0.1599, 0.2262, 0.1175]) -Greedy action tensor([ 0.5779, -0.4489, -0.1024, -0.3781]) tensor([0.4446, 0.1592, 0.2252, 0.1709]) -Greedy action tensor([ 0.7902, -0.3267, -0.0351, -0.1884]) tensor([0.4670, 0.1528, 0.2046, 0.1755]) -Greedy action tensor([ 0.6364, -0.4714, -0.0055, -0.3711]) tensor([0.4501, 0.1487, 0.2369, 0.1643]) -Greedy action tensor([ 0.7671, -0.5922, -0.1675, -0.7259]) tensor([0.5335, 0.1370, 0.2095, 0.1199]) -Greedy action tensor([ 1.0829, -0.8376, 0.1266, -0.3359]) tensor([0.5641, 0.0827, 0.2168, 0.1365]) -Greedy action tensor([ 0.5920, -0.2668, -0.0660, -0.4451]) tensor([0.4355, 0.1845, 0.2256, 0.1544]) -Greedy action tensor([ 0.4841, -0.1805, 0.0848, -0.2882]) tensor([0.3778, 0.1943, 0.2534, 0.1745]) -Greedy action tensor([ 0.4678, 0.0042, -0.0349, -0.2780]) tensor([0.3692, 0.2323, 0.2233, 0.1751]) -Greedy action tensor([ 0.5007, -0.1142, 0.0186, -0.2152]) tensor([0.3778, 0.2043, 0.2333, 0.1846]) -Greedy action tensor([ 0.8312, -0.4403, 0.0389, -0.4604]) tensor([0.4980, 0.1396, 0.2255, 0.1369]) -Greedy action tensor([ 0.5028, 0.1291, -0.0334, -0.2430]) tensor([0.3640, 0.2505, 0.2129, 0.1726]) -Greedy action tensor([ 0.6916, -0.0687, -0.1068, -0.4711]) tensor([0.4484, 0.2096, 0.2018, 0.1402]) -Greedy action tensor([ 0.2814, -0.0538, -0.0563, -0.1212]) tensor([0.3229, 0.2309, 0.2303, 0.2159]) -Greedy action tensor([ 0.5976, -0.4284, -0.1055, -0.3233]) tensor([0.4441, 0.1592, 0.2199, 0.1768]) -Greedy action tensor([ 0.5127, -0.2656, -0.0288, -0.1569]) tensor([0.3917, 0.1799, 0.2279, 0.2005]) -Greedy action tensor([ 0.7090, -0.2354, 0.0870, -0.4967]) tensor([0.4494, 0.1748, 0.2413, 0.1346]) -Greedy action tensor([ 0.5943, -0.0804, -0.0533, -0.2792]) tensor([0.4082, 0.2079, 0.2136, 0.1704]) -Greedy action tensor([ 0.5588, -0.3163, 0.0041, -0.3509]) tensor([0.4178, 0.1741, 0.2399, 0.1682]) -Greedy action tensor([ 0.4868, -0.3648, 0.1171, -0.5677]) tensor([0.4055, 0.1730, 0.2802, 0.1413]) -Greedy action tensor([ 0.5881, -0.2930, 0.1522, -0.4992]) tensor([0.4170, 0.1728, 0.2697, 0.1406]) -Greedy action tensor([ 0.3334, -0.0617, -0.1184, 0.0489]) tensor([0.3265, 0.2200, 0.2078, 0.2457]) -Greedy action tensor([ 0.4120, -0.1392, -0.2069, -0.3588]) tensor([0.3880, 0.2236, 0.2089, 0.1795]) -Greedy action tensor([ 0.6841, -0.2787, -0.0742, -0.0883]) tensor([0.4325, 0.1651, 0.2026, 0.1998]) -Greedy action tensor([ 0.3695, -0.0915, -0.0814, -0.2285]) tensor([0.3549, 0.2238, 0.2261, 0.1952]) -Greedy action tensor([ 0.3792, -0.1250, -0.0929, -0.2839]) tensor([0.3646, 0.2202, 0.2274, 0.1878]) -Greedy action tensor([ 0.7430, -0.3569, -0.0008, -0.3349]) tensor([0.4654, 0.1549, 0.2212, 0.1584]) -Greedy action tensor([ 0.3075, -0.1892, 0.0161, -0.4422]) tensor([0.3536, 0.2152, 0.2642, 0.1671]) -Greedy action tensor([ 0.2916, 0.0566, -0.0303, -0.1393]) tensor([0.3159, 0.2498, 0.2290, 0.2053]) -Greedy action tensor([ 0.6106, -0.3060, -0.0633, -0.3803]) tensor([0.4384, 0.1753, 0.2235, 0.1628]) -Greedy action tensor([ 0.3648, -0.1036, 0.0606, -0.2501]) tensor([0.3443, 0.2155, 0.2540, 0.1862]) -Greedy action tensor([ 0.4834, 0.0422, -0.1131, -0.1476]) tensor([0.3668, 0.2360, 0.2020, 0.1952]) -Greedy action tensor([-0.1656, 0.2392, 1.0093, 0.7231]) tensor([0.1224, 0.1835, 0.3964, 0.2977]) -Greedy action tensor([-0.1260, -0.4393, 0.6607, -0.4469]) tensor([0.2149, 0.1571, 0.4720, 0.1559]) -Greedy action tensor([1.0715, 0.6514, 1.5604, 0.2486]) tensor([0.2683, 0.1763, 0.4375, 0.1178]) -Greedy action tensor([-0.0060, -0.7046, 0.3490, 0.4779]) tensor([0.2200, 0.1094, 0.3137, 0.3569]) -Greedy action tensor([ 0.8276, -0.3206, 0.4065, 1.3422]) tensor([0.2742, 0.0870, 0.1800, 0.4588]) -Greedy action tensor([ 0.9324, 0.6984, -0.4749, 1.3400]) tensor([0.2825, 0.2236, 0.0692, 0.4247]) -Greedy action tensor([ 7.2496e-01, -2.5773e-04, 5.6946e-01, 6.2700e-01]) tensor([0.3080, 0.1491, 0.2636, 0.2792]) -Greedy action tensor([0.3152, 0.8491, 0.3930, 0.9494]) tensor([0.1763, 0.3007, 0.1906, 0.3324]) -Greedy action tensor([-0.1516, -0.9566, 1.6587, 0.5022]) tensor([0.1055, 0.0472, 0.6446, 0.2028]) -Greedy action tensor([-0.2507, -2.3974, -0.4920, -0.6639]) tensor([0.3900, 0.0456, 0.3064, 0.2580]) -Greedy action tensor([-0.2695, -1.2451, -0.4574, 1.5107]) tensor([0.1229, 0.0463, 0.1018, 0.7289]) -Greedy action tensor([-1.0806, -0.0629, 0.0301, -1.1955]) tensor([0.1300, 0.3596, 0.3946, 0.1159]) -Greedy action tensor([ 1.7640, -1.3350, 1.2493, 1.4908]) tensor([0.4160, 0.0188, 0.2486, 0.3166]) -Greedy action tensor([ 1.1643, -1.6335, 0.0191, 0.3744]) tensor([0.5455, 0.0332, 0.1736, 0.2476]) -Greedy action tensor([ 0.5464, 0.0106, -0.5604, -0.1242]) tensor([0.4120, 0.2411, 0.1362, 0.2107]) -Greedy action tensor([ 0.6870, -0.4890, -0.6091, 1.2595]) tensor([0.2981, 0.0920, 0.0816, 0.5284]) -Greedy action tensor([0.5572, 0.5130, 1.8405, 0.3606]) tensor([0.1566, 0.1498, 0.5650, 0.1286]) -Greedy action tensor([ 1.0273, -0.5220, 1.1815, 0.8751]) tensor([0.3088, 0.0656, 0.3603, 0.2652]) -Greedy action tensor([ 0.8799, -0.7095, 0.1005, 0.8028]) tensor([0.3863, 0.0788, 0.1772, 0.3577]) -Greedy action tensor([ 1.4797, -0.3465, 1.6143, 0.9995]) tensor([0.3420, 0.0551, 0.3913, 0.2116]) -Greedy action tensor([ 0.9651, 1.2992, -0.0783, -0.2095]) tensor([0.3270, 0.4567, 0.1152, 0.1010]) -Greedy action tensor([ 0.9298, -0.5297, -0.4394, 0.4533]) tensor([0.4745, 0.1102, 0.1207, 0.2946]) -Greedy action tensor([ 0.6894, -1.7496, 0.8713, 0.9629]) tensor([0.2777, 0.0242, 0.3331, 0.3650]) -Greedy action tensor([ 0.3852, -0.6731, 0.0311, 0.8523]) tensor([0.2744, 0.0952, 0.1926, 0.4378]) -Greedy action tensor([ 0.0653, -1.0741, 1.5043, -0.0488]) tensor([0.1556, 0.0498, 0.6559, 0.1388]) -Greedy action tensor([ 0.0695, -2.9181, 0.4013, 0.5867]) tensor([0.2426, 0.0122, 0.3381, 0.4070]) -Greedy action tensor([-0.5984, -0.1119, -0.2720, 0.0630]) tensor([0.1681, 0.2734, 0.2329, 0.3256]) -Greedy action tensor([ 0.3766, 0.4248, -0.1334, 0.6549]) tensor([0.2518, 0.2643, 0.1512, 0.3327]) -Greedy action tensor([0.3167, 0.2697, 0.2885, 0.0220]) tensor([0.2724, 0.2599, 0.2648, 0.2029]) -Greedy action tensor([-0.1197, -1.1749, 0.8599, -0.6525]) tensor([0.2175, 0.0757, 0.5792, 0.1276]) -Greedy action tensor([ 0.8969, -0.0179, 1.7551, 1.1374]) tensor([0.1988, 0.0796, 0.4688, 0.2528]) -Greedy action tensor([-0.8848, -0.7142, -0.2412, 0.5568]) tensor([0.1202, 0.1426, 0.2289, 0.5083]) -Greedy action tensor([-0.5336, 0.1587, 0.7698, -0.5491]) tensor([0.1305, 0.2607, 0.4804, 0.1285]) -Greedy action tensor([ 0.2140, -0.7260, 0.4128, 0.9408]) tensor([0.2137, 0.0835, 0.2607, 0.4421]) -Greedy action tensor([-0.3030, 0.3544, -0.0859, 1.2534]) tensor([0.1122, 0.2165, 0.1394, 0.5320]) -Greedy action tensor([ 1.1615, -1.5881, 0.6632, 0.3338]) tensor([0.4743, 0.0303, 0.2881, 0.2073]) -Greedy action tensor([-0.2410, -0.9685, 0.5217, 0.5440]) tensor([0.1718, 0.0830, 0.3684, 0.3767]) -Greedy action tensor([ 1.1858, -0.1981, 1.1526, 0.4108]) tensor([0.3733, 0.0936, 0.3611, 0.1720]) -Greedy action tensor([-0.8665, -1.1312, 1.0907, 0.1687]) tensor([0.0857, 0.0658, 0.6070, 0.2414]) -Greedy action tensor([-0.3478, 0.4061, -0.1328, 0.3284]) tensor([0.1579, 0.3357, 0.1958, 0.3106]) -Greedy action tensor([ 0.4275, 0.8533, -0.3273, 0.6366]) tensor([0.2362, 0.3616, 0.1110, 0.2911]) -Greedy action tensor([ 0.9299, -0.4618, 1.0389, 0.8407]) tensor([0.3050, 0.0758, 0.3401, 0.2790]) -Greedy action tensor([ 0.1079, -0.4200, -0.3193, 1.5538]) tensor([0.1541, 0.0909, 0.1005, 0.6544]) -Greedy action tensor([ 1.1417, -0.9304, 0.8284, 1.1113]) tensor([0.3537, 0.0445, 0.2586, 0.3431]) -Greedy action tensor([ 0.5809, -0.4746, 0.8737, 0.0325]) tensor([0.3062, 0.1066, 0.4103, 0.1769]) -Greedy action tensor([-0.0511, -0.9527, 0.3857, -0.4886]) tensor([0.2778, 0.1128, 0.4300, 0.1794]) -Greedy action tensor([-1.2562, -0.3443, -0.5609, 0.2954]) tensor([0.0979, 0.2437, 0.1963, 0.4621]) -Greedy action tensor([-0.4268, 1.3495, 0.8633, -0.4838]) tensor([0.0871, 0.5144, 0.3163, 0.0822]) -Greedy action tensor([-0.3486, -0.2962, 0.6177, -0.2645]) tensor([0.1733, 0.1826, 0.4555, 0.1885]) -Greedy action tensor([-0.4574, -0.0954, -0.4632, 1.0456]) tensor([0.1262, 0.1812, 0.1254, 0.5672]) -Greedy action tensor([-0.6037, -0.9161, -0.8151, 1.2153]) tensor([0.1149, 0.0840, 0.0930, 0.7081]) -Greedy action tensor([0.4459, 0.0465, 0.9743, 0.5914]) tensor([0.2211, 0.1483, 0.3750, 0.2557]) -Greedy action tensor([ 0.4263, -1.3831, 0.9233, -0.7415]) tensor([0.3207, 0.0525, 0.5271, 0.0997]) -Greedy action tensor([ 1.1144, -0.6198, 0.1184, 1.1054]) tensor([0.3942, 0.0696, 0.1456, 0.3906]) -Greedy action tensor([ 0.5199, -2.4837, -0.2295, 1.2195]) tensor([0.2829, 0.0140, 0.1337, 0.5694]) -Greedy action tensor([ 0.6599, 0.8853, -0.9330, 0.7547]) tensor([0.2812, 0.3524, 0.0572, 0.3092]) -Greedy action tensor([ 1.2373, -0.4765, 1.2577, 1.6725]) tensor([0.2669, 0.0481, 0.2724, 0.4125]) -Greedy action tensor([ 5.7614e-01, 4.4934e-02, -2.0686e-04, -1.9408e-01]) tensor([0.3827, 0.2250, 0.2151, 0.1772]) -Greedy action tensor([ 0.2746, -0.4392, 0.2551, -0.7395]) tensor([0.3530, 0.1729, 0.3461, 0.1280]) -Greedy action tensor([ 1.8537, -0.3295, 1.6751, 1.4402]) tensor([0.3831, 0.0432, 0.3204, 0.2533]) -Greedy action tensor([-0.1247, -0.3520, -0.7292, 1.3244]) tensor([0.1515, 0.1207, 0.0828, 0.6451]) -Greedy action tensor([ 0.9072, -0.3050, -0.5305, 1.1402]) tensor([0.3575, 0.1064, 0.0849, 0.4513]) -Greedy action tensor([ 1.8899, -0.4910, 1.4777, 1.6650]) tensor([0.3916, 0.0362, 0.2594, 0.3128]) -Greedy action tensor([ 0.5897, -0.2289, 0.6733, 0.8487]) tensor([0.2615, 0.1153, 0.2843, 0.3388]) -Greedy action tensor([ 0.4211, -1.4612, 0.8869, 0.3431]) tensor([0.2724, 0.0415, 0.4341, 0.2520]) -Greedy action tensor([-0.2623, 0.6223, 0.8752, 0.1940]) tensor([0.1232, 0.2983, 0.3842, 0.1944]) -Greedy action tensor([-0.3929, -0.2608, 0.3627, 0.1348]) tensor([0.1676, 0.1913, 0.3569, 0.2842]) -Greedy action tensor([-1.1807, -1.1953, -0.3421, 0.3638]) tensor([0.1113, 0.1097, 0.2575, 0.5215]) -Greedy action tensor([-0.1823, 0.5693, 0.5704, 0.0553]) tensor([0.1536, 0.3257, 0.3260, 0.1948]) -Greedy action tensor([ 0.8536, -0.2848, 0.8927, 0.4956]) tensor([0.3269, 0.1047, 0.3399, 0.2285]) -Greedy action tensor([-0.3866, -0.7578, -0.5000, 0.3346]) tensor([0.2155, 0.1487, 0.1924, 0.4433]) -Greedy action tensor([0.3496, 0.0345, 0.1277, 1.1239]) tensor([0.2128, 0.1553, 0.1704, 0.4615]) -Greedy action tensor([ 1.7502, -0.5515, 0.2290, -0.3727]) tensor([0.6953, 0.0696, 0.1519, 0.0832]) -Greedy action tensor([ 0.7861, -0.3187, 1.2812, 1.5948]) tensor([0.1917, 0.0635, 0.3145, 0.4303]) -Greedy action tensor([ 0.9741, 1.1740, -0.6982, 0.4768]) tensor([0.3314, 0.4048, 0.0622, 0.2016]) -Greedy action tensor([0.3574, 0.4852, 0.7055, 0.0781]) tensor([0.2321, 0.2637, 0.3287, 0.1755]) -Greedy action tensor([ 0.3734, 1.1177, -0.6828, 0.6308]) tensor([0.2107, 0.4435, 0.0733, 0.2726]) -Greedy action tensor([ 0.7844, -0.5920, 2.0202, 0.5207]) tensor([0.1831, 0.0462, 0.6300, 0.1406]) -Greedy action tensor([0.4104, 0.1928, 0.4464, 0.0672]) tensor([0.2816, 0.2266, 0.2920, 0.1998]) -Greedy action tensor([1.7782, 0.8631, 0.0801, 1.1865]) tensor([0.4680, 0.1874, 0.0857, 0.2590]) -Greedy action tensor([ 0.2631, -0.7651, -0.0326, 1.0818]) tensor([0.2289, 0.0819, 0.1703, 0.5190]) -Greedy action tensor([ 1.0876, -0.3271, -0.4721, 0.2401]) tensor([0.5315, 0.1291, 0.1117, 0.2277]) -Greedy action tensor([ 2.0512, -0.9399, -0.5961, 0.6220]) tensor([0.7350, 0.0369, 0.0521, 0.1760]) -Greedy action tensor([ 1.2566, -0.3320, -0.0562, 0.3956]) tensor([0.5274, 0.1077, 0.1419, 0.2230]) -Greedy action tensor([ 1.5317, -0.4359, -0.1739, 0.7133]) tensor([0.5673, 0.0793, 0.1031, 0.2503]) -Greedy action tensor([ 0.4544, -0.3144, -0.2450, 0.1497]) tensor([0.3707, 0.1718, 0.1842, 0.2733]) -Greedy action tensor([ 0.9057, -0.4138, -0.1068, 0.1299]) tensor([0.4783, 0.1278, 0.1738, 0.2202]) -Greedy action tensor([ 1.4496, -0.4701, -0.5830, 0.0703]) tensor([0.6538, 0.0959, 0.0857, 0.1646]) -Greedy action tensor([ 1.0490, -0.5321, -0.1143, 0.1423]) tensor([0.5203, 0.1070, 0.1626, 0.2101]) -Greedy action tensor([ 1.2504, 0.0358, -0.1640, -0.1204]) tensor([0.5575, 0.1655, 0.1355, 0.1415]) -Greedy action tensor([ 0.5298, -0.0813, -0.2759, 0.1258]) tensor([0.3763, 0.2043, 0.1681, 0.2513]) -Greedy action tensor([ 1.0475, -0.1566, -0.4998, 0.2390]) tensor([0.5106, 0.1532, 0.1087, 0.2275]) -Greedy action tensor([ 0.7088, -0.4298, -0.4337, 0.0344]) tensor([0.4654, 0.1490, 0.1485, 0.2371]) -Greedy action tensor([ 1.2745, -0.5591, -0.3568, 0.1870]) tensor([0.5908, 0.0944, 0.1156, 0.1991]) -Greedy action tensor([ 0.9867, -0.4664, 0.0346, 0.1857]) tensor([0.4834, 0.1130, 0.1866, 0.2170]) -Greedy action tensor([ 0.3802, -0.1073, 0.0083, -0.3268]) tensor([0.3576, 0.2196, 0.2465, 0.1763]) -Greedy action tensor([ 1.1791, -0.4448, -0.4882, -0.1493]) tensor([0.6058, 0.1194, 0.1143, 0.1605]) -Greedy action tensor([ 0.6501, -0.3511, -0.4034, 0.6359]) tensor([0.3701, 0.1360, 0.1291, 0.3649]) -Greedy action tensor([ 0.3587, -0.2765, -0.0429, -0.0380]) tensor([0.3482, 0.1845, 0.2331, 0.2342]) -Greedy action tensor([ 1.3603, -0.3863, -0.1231, 0.0180]) tensor([0.6015, 0.1049, 0.1365, 0.1571]) -Greedy action tensor([ 1.0006, 0.0303, 0.1185, -0.2847]) tensor([0.4832, 0.1831, 0.2000, 0.1336]) -Greedy action tensor([ 1.0153, -0.1449, -0.1235, 0.4166]) tensor([0.4581, 0.1436, 0.1467, 0.2517]) -Greedy action tensor([ 1.2199, -0.3781, -0.3406, 0.5611]) tensor([0.5182, 0.1048, 0.1088, 0.2681]) -Greedy action tensor([ 0.6297, -0.6666, -0.0442, -0.3144]) tensor([0.4604, 0.1259, 0.2346, 0.1791]) -Greedy action tensor([ 1.8091, -0.9059, -0.1799, 0.6385]) tensor([0.6609, 0.0437, 0.0904, 0.2050]) -Greedy action tensor([ 0.9320, 0.2038, -0.4180, -0.4554]) tensor([0.5021, 0.2424, 0.1302, 0.1254]) -Greedy action tensor([ 1.0046, -0.4618, -0.1603, 0.1424]) tensor([0.5089, 0.1174, 0.1588, 0.2149]) -Greedy action tensor([ 1.1007, -0.6394, -0.1541, 0.1308]) tensor([0.5435, 0.0954, 0.1550, 0.2061]) -Greedy action tensor([1.4025, 0.1831, 0.0350, 0.0365]) tensor([0.5539, 0.1636, 0.1411, 0.1413]) -Greedy action tensor([ 0.7775, -0.5712, -0.0456, -0.0170]) tensor([0.4650, 0.1207, 0.2042, 0.2101]) -Greedy action tensor([ 1.0901, -0.9031, -0.1029, 0.3682]) tensor([0.5194, 0.0708, 0.1575, 0.2523]) -Greedy action tensor([ 1.4923, -0.5061, -0.0749, 0.2102]) tensor([0.6167, 0.0836, 0.1286, 0.1711]) -Greedy action tensor([ 0.8238, -0.5358, -0.0714, 0.0569]) tensor([0.4696, 0.1206, 0.1918, 0.2181]) -Greedy action tensor([ 1.8241, -0.6543, -0.2048, 0.4462]) tensor([0.6814, 0.0572, 0.0896, 0.1718]) -Greedy action tensor([ 1.4449, -0.3695, -0.5146, 0.3571]) tensor([0.6095, 0.0993, 0.0859, 0.2054]) -Greedy action tensor([ 0.9523, -0.3437, 0.1545, -0.0629]) tensor([0.4793, 0.1312, 0.2159, 0.1737]) -Greedy action tensor([ 1.0469, -0.3470, -0.0416, 0.1224]) tensor([0.5046, 0.1252, 0.1699, 0.2002]) -Greedy action tensor([ 1.2108, -0.5824, -0.2799, 0.2176]) tensor([0.5675, 0.0944, 0.1278, 0.2102]) -Greedy action tensor([ 1.2111, -0.1454, 0.0155, -0.3717]) tensor([0.5664, 0.1459, 0.1714, 0.1163]) -Greedy action tensor([ 1.3707, -0.6590, -0.2993, 0.0923]) tensor([0.6257, 0.0822, 0.1178, 0.1743]) -Greedy action tensor([ 0.9623, -0.6120, -0.3994, 0.6404]) tensor([0.4570, 0.0947, 0.1171, 0.3312]) -Greedy action tensor([ 1.2669, -0.4933, -0.2973, 0.1558]) tensor([0.5846, 0.1006, 0.1223, 0.1925]) -Greedy action tensor([ 1.8896, -0.8747, -0.3133, 0.4038]) tensor([0.7144, 0.0450, 0.0789, 0.1617]) -Greedy action tensor([ 1.2897, -0.5313, -0.5915, -0.1173]) tensor([0.6414, 0.1038, 0.0977, 0.1571]) -Greedy action tensor([ 0.8497, -0.3669, -0.2840, 0.2142]) tensor([0.4656, 0.1379, 0.1499, 0.2466]) -Greedy action tensor([ 1.1582, -0.7311, -0.7039, 0.8328]) tensor([0.4929, 0.0745, 0.0766, 0.3560]) -Greedy action tensor([ 1.2796, -0.0525, 0.1849, -0.0797]) tensor([0.5390, 0.1422, 0.1804, 0.1384]) -Greedy action tensor([ 1.2757, -0.3520, -0.1177, 0.3046]) tensor([0.5485, 0.1077, 0.1361, 0.2077]) -Greedy action tensor([ 1.1496, -0.2135, -0.5797, -0.1337]) tensor([0.5846, 0.1496, 0.1037, 0.1620]) -Greedy action tensor([ 1.1702, -0.2705, -0.3175, 0.1135]) tensor([0.5524, 0.1308, 0.1248, 0.1920]) -Greedy action tensor([ 0.9532, -0.3192, -0.1216, 0.0415]) tensor([0.4942, 0.1385, 0.1687, 0.1986]) -Greedy action tensor([ 1.1194, -0.7013, -0.4364, 0.6517]) tensor([0.5002, 0.0810, 0.1055, 0.3133]) -Greedy action tensor([ 0.9335, -0.1108, 0.2176, -0.3471]) tensor([0.4720, 0.1661, 0.2307, 0.1312]) -Greedy action tensor([ 1.9694, -1.0602, -0.2855, 0.1584]) tensor([0.7595, 0.0367, 0.0797, 0.1242]) -Greedy action tensor([ 1.5407, -0.9004, -0.5587, 1.1548]) tensor([0.5293, 0.0461, 0.0649, 0.3598]) -Greedy action tensor([ 0.7928, -0.0334, -0.1718, -0.3187]) tensor([0.4656, 0.2038, 0.1774, 0.1532]) -Greedy action tensor([ 1.3715, -0.6318, -0.2736, 0.3310]) tensor([0.5948, 0.0802, 0.1148, 0.2101]) -Greedy action tensor([ 1.2749, -0.5449, -0.2929, 0.3092]) tensor([0.5710, 0.0925, 0.1191, 0.2174]) -Greedy action tensor([ 1.4612, -0.1668, -0.4082, 0.2058]) tensor([0.6114, 0.1200, 0.0943, 0.1742]) -Greedy action tensor([ 0.9498, -0.3001, -0.2413, 0.3190]) tensor([0.4711, 0.1350, 0.1432, 0.2507]) -Greedy action tensor([ 1.3907, -0.6951, -0.3752, -0.0423]) tensor([0.6520, 0.0810, 0.1115, 0.1556]) -Greedy action tensor([ 0.8822, -0.2598, -0.1684, -0.3081]) tensor([0.5068, 0.1618, 0.1772, 0.1541]) -Greedy action tensor([ 0.5987, -0.2661, -0.4164, -0.0709]) tensor([0.4356, 0.1835, 0.1579, 0.2230]) -Greedy action tensor([ 0.5357, -0.4116, -0.4904, 0.6955]) tensor([0.3425, 0.1328, 0.1228, 0.4019]) -Greedy action tensor([ 0.9818, -0.3799, 0.0946, 0.0515]) tensor([0.4849, 0.1242, 0.1997, 0.1913]) -Greedy action tensor([ 1.8806, -0.7002, -0.2607, 0.2481]) tensor([0.7201, 0.0545, 0.0846, 0.1407]) -Greedy action tensor([ 1.0815, -0.2848, -0.2017, -0.2385]) tensor([0.5558, 0.1418, 0.1540, 0.1485]) -Greedy action tensor([ 0.9720, -0.2003, -0.2315, 0.1120]) tensor([0.4919, 0.1523, 0.1476, 0.2081]) -Greedy action tensor([ 0.6598, -0.1207, -0.0868, -0.3468]) tensor([0.4352, 0.1994, 0.2063, 0.1591]) -Greedy action tensor([ 0.9981, -0.2609, 0.0079, 0.0720]) tensor([0.4874, 0.1384, 0.1811, 0.1931]) -Greedy action tensor([ 1.4015, -0.5035, -0.3831, 0.1591]) tensor([0.6229, 0.0927, 0.1046, 0.1798]) -Greedy action tensor([ 0.9387, -0.3789, -0.3838, 0.5429]) tensor([0.4530, 0.1213, 0.1207, 0.3050]) -Greedy action tensor([ 1.5241, -0.5681, -0.0631, -0.0350]) tensor([0.6501, 0.0802, 0.1329, 0.1367]) -Greedy action tensor([ 0.9427, -0.5257, -0.2859, 0.5212]) tensor([0.4589, 0.1057, 0.1343, 0.3011]) -Greedy action tensor([ 1.3322, -0.6047, -0.4085, 0.3898]) tensor([0.5851, 0.0843, 0.1026, 0.2280]) -Greedy action tensor([ 1.1710, -0.3760, -0.2128, 0.1913]) tensor([0.5438, 0.1158, 0.1363, 0.2042]) -Greedy action tensor([ 1.3163, -0.5742, -0.3343, 0.1331]) tensor([0.6063, 0.0916, 0.1164, 0.1857]) -Greedy action tensor([ 1.2413, -0.6044, -0.1297, 0.1374]) tensor([0.5736, 0.0906, 0.1456, 0.1902]) -Greedy action tensor([ 1.2795, -0.6255, -0.3282, 0.6135]) tensor([0.5368, 0.0799, 0.1075, 0.2758]) -Greedy action tensor([ 1.5181, -0.7017, -0.1018, 0.1094]) tensor([0.6447, 0.0700, 0.1276, 0.1576]) -Greedy action tensor([ 0.6956, -0.4464, -0.0095, -0.0400]) tensor([0.4362, 0.1392, 0.2155, 0.2090]) -Greedy action tensor([ 1.2420, -0.7575, -0.1778, 0.3198]) tensor([0.5634, 0.0763, 0.1362, 0.2241]) -Greedy action tensor([-1.8376, -0.3770, 0.6021, -0.1203]) tensor([0.0447, 0.1928, 0.5132, 0.2492]) -Greedy action tensor([-1.0404, -0.3070, 0.0148, 0.0255]) tensor([0.1129, 0.2351, 0.3243, 0.3278]) -Greedy action tensor([-1.8305, -0.4714, 0.7001, -0.0123]) tensor([0.0423, 0.1648, 0.5319, 0.2609]) -Greedy action tensor([-0.7796, -0.4516, 0.2066, 0.1127]) tensor([0.1331, 0.1848, 0.3570, 0.3250]) -Greedy action tensor([-1.7207, -0.1301, 0.5033, -0.0879]) tensor([0.0493, 0.2421, 0.4561, 0.2525]) -Greedy action tensor([-0.4812, -0.4070, 0.1496, 0.0902]) tensor([0.1746, 0.1881, 0.3281, 0.3092]) -Greedy action tensor([-1.6837, -0.7797, 0.1300, -0.4606]) tensor([0.0769, 0.1899, 0.4718, 0.2614]) -Greedy action tensor([-1.9136, -0.6014, 0.4515, -0.1913]) tensor([0.0477, 0.1772, 0.5079, 0.2671]) -Greedy action tensor([-0.7016, -0.5458, 1.1222, 1.6187]) tensor([0.0539, 0.0630, 0.3341, 0.5489]) -Greedy action tensor([-1.8202, -0.4837, 0.6286, -0.0471]) tensor([0.0449, 0.1709, 0.5197, 0.2645]) -Greedy action tensor([-0.4382, -0.1563, 0.9429, 1.5403]) tensor([0.0739, 0.0979, 0.2939, 0.5343]) -Greedy action tensor([-1.2096, -0.5961, 0.2948, 0.4386]) tensor([0.0797, 0.1472, 0.3588, 0.4143]) -Greedy action tensor([-1.9616, -0.9412, 0.0941, -0.3549]) tensor([0.0603, 0.1674, 0.4714, 0.3009]) -Greedy action tensor([-0.6839, -0.3576, 0.7263, 1.3679]) tensor([0.0701, 0.0972, 0.2872, 0.5455]) -Greedy action tensor([-0.8003, -0.4035, 0.4081, 0.0196]) tensor([0.1234, 0.1835, 0.4131, 0.2801]) -Greedy action tensor([-1.3012, -0.5217, 0.4772, 0.7143]) tensor([0.0602, 0.1313, 0.3565, 0.4519]) -Greedy action tensor([-1.8168, -0.4921, 0.7756, 0.1200]) tensor([0.0399, 0.1501, 0.5332, 0.2768]) -Greedy action tensor([-1.1466, -0.5541, 0.4235, 0.5274]) tensor([0.0772, 0.1397, 0.3712, 0.4119]) -Greedy action tensor([-1.8318, -0.4797, 0.6233, -0.0708]) tensor([0.0448, 0.1731, 0.5216, 0.2605]) -Greedy action tensor([-1.4347, -0.4757, 0.5051, 0.3118]) tensor([0.0613, 0.1601, 0.4268, 0.3518]) -Greedy action tensor([-1.9888, -0.6050, 0.9426, 0.2925]) tensor([0.0298, 0.1190, 0.5593, 0.2919]) -Greedy action tensor([-1.6870, -0.3721, 0.9923, 0.7778]) tensor([0.0322, 0.1199, 0.4692, 0.3786]) -Greedy action tensor([-1.2997, -0.3012, 0.7516, 1.0036]) tensor([0.0465, 0.1262, 0.3618, 0.4655]) -Greedy action tensor([-1.0152, -0.3450, 0.7928, 1.1152]) tensor([0.0572, 0.1119, 0.3490, 0.4818]) -Greedy action tensor([-0.8589, -0.5642, 0.2439, 0.3342]) tensor([0.1156, 0.1552, 0.3482, 0.3811]) -Greedy action tensor([-1.8044, -0.3239, 0.6245, -0.0200]) tensor([0.0441, 0.1936, 0.4999, 0.2624]) -Greedy action tensor([-0.7934, -0.2425, 0.6521, 1.3300]) tensor([0.0652, 0.1131, 0.2767, 0.5450]) -Greedy action tensor([-5.4687e-01, 4.1023e-01, 1.0577e-01, -3.7912e-04]) tensor([0.1379, 0.3591, 0.2648, 0.2382]) -Greedy action tensor([-1.6426, -0.6751, 1.1502, 0.7097]) tensor([0.0328, 0.0864, 0.5359, 0.3449]) -Greedy action tensor([-1.7110, -0.5174, 1.0532, 0.6225]) tensor([0.0328, 0.1082, 0.5206, 0.3384]) -Greedy action tensor([-1.7384, -0.4364, 0.5710, -0.0515]) tensor([0.0496, 0.1825, 0.4997, 0.2682]) -Greedy action tensor([-1.7766, 0.4312, 0.4621, 0.0198]) tensor([0.0392, 0.3566, 0.3678, 0.2363]) -Greedy action tensor([-0.7880, -0.6621, 0.5836, -0.4080]) tensor([0.1327, 0.1505, 0.5229, 0.1940]) -Greedy action tensor([-1.2529, -0.4623, 0.4287, 0.6023]) tensor([0.0668, 0.1473, 0.3590, 0.4270]) -Greedy action tensor([-1.4859, -0.4421, 0.3919, 0.0886]) tensor([0.0658, 0.1867, 0.4300, 0.3175]) -Greedy action tensor([-0.7265, -0.4348, 0.2528, 0.7549]) tensor([0.1064, 0.1424, 0.2832, 0.4680]) -Greedy action tensor([-1.6156, -0.6477, 0.1181, -0.4033]) tensor([0.0790, 0.2080, 0.4474, 0.2656]) -Greedy action tensor([-0.9504, -0.4965, 0.5024, -0.3366]) tensor([0.1150, 0.1810, 0.4915, 0.2124]) -Greedy action tensor([-0.4705, -0.4097, 0.1855, 0.1735]) tensor([0.1697, 0.1803, 0.3270, 0.3231]) -Greedy action tensor([-1.7579, -0.3613, 0.5709, -0.0739]) tensor([0.0483, 0.1953, 0.4961, 0.2603]) -Greedy action tensor([-1.2751, -0.5456, 0.3442, 0.3767]) tensor([0.0750, 0.1555, 0.3785, 0.3910]) -Greedy action tensor([-1.7823, -0.5155, 0.6351, -0.0191]) tensor([0.0463, 0.1644, 0.5194, 0.2700]) -Greedy action tensor([-1.8603, -0.4058, 0.6203, -0.1208]) tensor([0.0436, 0.1868, 0.5212, 0.2484]) -Greedy action tensor([-1.6121, -0.5328, 0.4863, 0.0231]) tensor([0.0580, 0.1708, 0.4733, 0.2978]) -Greedy action tensor([-1.7630, -0.5328, 0.7597, 0.1499]) tensor([0.0423, 0.1447, 0.5268, 0.2863]) -Greedy action tensor([-1.0457, -0.5604, 0.2702, 0.5257]) tensor([0.0896, 0.1455, 0.3339, 0.4311]) -Greedy action tensor([-1.9072, -0.4051, 0.6387, -0.1592]) tensor([0.0417, 0.1872, 0.5317, 0.2394]) -Greedy action tensor([-1.3965, -0.5092, 0.7105, -0.4449]) tensor([0.0702, 0.1705, 0.5774, 0.1818]) -Greedy action tensor([-1.4745, -0.2564, 0.4533, 0.3182]) tensor([0.0579, 0.1959, 0.3983, 0.3479]) -Greedy action tensor([-1.3368, -0.4958, 0.4698, 0.4868]) tensor([0.0641, 0.1486, 0.3903, 0.3970]) -Greedy action tensor([-1.7957, -0.7649, 0.3007, -0.3035]) tensor([0.0610, 0.1711, 0.4965, 0.2714]) -Greedy action tensor([-1.9414, -0.4540, 0.6654, -0.1789]) tensor([0.0403, 0.1784, 0.5464, 0.2349]) -Greedy action tensor([-1.2752, -0.1914, 0.4211, -0.1612]) tensor([0.0803, 0.2373, 0.4378, 0.2446]) -Greedy action tensor([-0.8830, -0.1903, 0.4968, 1.2900]) tensor([0.0635, 0.1269, 0.2522, 0.5575]) -Greedy action tensor([-1.3119, -0.4806, 0.6143, -0.4647]) tensor([0.0800, 0.1838, 0.5494, 0.1867]) -Greedy action tensor([-1.4892, -0.8253, 1.2754, 1.1691]) tensor([0.0302, 0.0587, 0.4797, 0.4313]) -Greedy action tensor([-1.4546, -0.5330, 0.4575, 0.2221]) tensor([0.0640, 0.1608, 0.4330, 0.3422]) -Greedy action tensor([-1.7050, -0.6746, -0.0523, -0.4513]) tensor([0.0798, 0.2237, 0.4168, 0.2797]) -Greedy action tensor([-0.8693, -0.5644, 0.2968, 0.1453]) tensor([0.1201, 0.1630, 0.3856, 0.3314]) -Greedy action tensor([-1.6442, -0.4958, 0.8416, 0.4478]) tensor([0.0412, 0.1300, 0.4950, 0.3339]) -Greedy action tensor([-0.7832, 0.8998, 0.0708, -0.2823]) tensor([0.0963, 0.5184, 0.2263, 0.1590]) -Greedy action tensor([-1.1669, -0.5796, 0.3656, 0.1169]) tensor([0.0906, 0.1630, 0.4194, 0.3270]) -Greedy action tensor([-1.5470, -1.0622, 0.0729, -0.7758]) tensor([0.1016, 0.1650, 0.5135, 0.2198]) -Greedy action tensor([-0.4344, -0.0180, 0.6973, 1.4556]) tensor([0.0817, 0.1239, 0.2534, 0.5409]) -Greedy action tensor([-1.7476, -0.8317, 0.2065, -0.5025]) tensor([0.0713, 0.1781, 0.5030, 0.2476]) -Greedy action tensor([-1.3610, -0.4843, 0.5955, 0.6863]) tensor([0.0549, 0.1319, 0.3882, 0.4251]) -Greedy action tensor([-1.7747, -0.3139, 0.5774, -0.1432]) tensor([0.0478, 0.2059, 0.5021, 0.2442]) -Greedy action tensor([-1.9319, -0.4197, 0.6543, -0.2019]) tensor([0.0409, 0.1855, 0.5430, 0.2306]) -Greedy action tensor([-1.5185, -0.5515, 0.4320, 0.0808]) tensor([0.0641, 0.1685, 0.4504, 0.3170]) -Greedy action tensor([-1.0322, -0.3264, 0.7254, 1.2302]) tensor([0.0543, 0.1099, 0.3146, 0.5212]) -Greedy action tensor([-1.8149, -0.4576, 0.5991, -0.1269]) tensor([0.0466, 0.1810, 0.5206, 0.2519]) -Greedy action tensor([-1.5426, -0.4553, 1.0384, 0.9124]) tensor([0.0347, 0.1029, 0.4583, 0.4041]) -Greedy action tensor([-1.8099, -0.6384, 0.6143, 0.0300]) tensor([0.0458, 0.1479, 0.5177, 0.2886]) -Greedy action tensor([-1.2244, -0.5824, 0.2861, 0.3837]) tensor([0.0805, 0.1530, 0.3646, 0.4020]) -Greedy action tensor([-1.1662, -0.4355, 0.7024, 0.8777]) tensor([0.0579, 0.1202, 0.3750, 0.4469]) -Greedy action tensor([-1.4348, -0.5767, 0.8984, 0.9020]) tensor([0.0416, 0.0982, 0.4293, 0.4308]) -Greedy action tensor([-0.8776, 0.3375, 0.2930, -0.2274]) tensor([0.1051, 0.3544, 0.3390, 0.2015]) -Greedy action tensor([-0.9212, -0.5862, 0.1483, 0.4396]) tensor([0.1086, 0.1518, 0.3164, 0.4233]) -Greedy action tensor([-0.8875, -0.7969, 0.5838, 0.7281]) tensor([0.0871, 0.0954, 0.3793, 0.4382]) -Greedy action tensor([-1.8902, -0.4226, 0.6443, -0.1317]) tensor([0.0421, 0.1827, 0.5309, 0.2443]) -Greedy action tensor([-1.3594, -0.2650, 0.4575, 0.5854]) tensor([0.0584, 0.1744, 0.3591, 0.4081]) -Greedy action tensor([ 0.6510, -0.0340, -0.0696, -0.2325]) tensor([0.4160, 0.2097, 0.2024, 0.1719]) -Greedy action tensor([ 0.7388, -0.6263, -0.2509, -0.6210]) tensor([0.5308, 0.1356, 0.1973, 0.1363]) -Greedy action tensor([ 0.7275, -0.2619, -0.0219, -0.4915]) tensor([0.4673, 0.1737, 0.2209, 0.1381]) -Greedy action tensor([ 0.8467, -0.4954, 0.0193, -0.5393]) tensor([0.5132, 0.1341, 0.2244, 0.1283]) -Greedy action tensor([ 0.6387, -0.3824, 0.0898, -0.7218]) tensor([0.4557, 0.1642, 0.2632, 0.1169]) -Greedy action tensor([ 0.7122, -0.4351, -0.1709, -0.4148]) tensor([0.4866, 0.1545, 0.2012, 0.1577]) -Greedy action tensor([ 0.4697, -0.3088, -0.0850, -0.2257]) tensor([0.3949, 0.1813, 0.2268, 0.1970]) -Greedy action tensor([ 0.2707, 0.2012, -0.1439, -0.0878]) tensor([0.3038, 0.2833, 0.2007, 0.2122]) -Greedy action tensor([ 0.4741, -0.3204, 0.0218, -0.1270]) tensor([0.3793, 0.1714, 0.2413, 0.2080]) -Greedy action tensor([ 0.4332, 0.1058, -0.1059, -0.1282]) tensor([0.3479, 0.2508, 0.2029, 0.1984]) -Greedy action tensor([ 0.5206, -0.0924, 0.1104, -0.2761]) tensor([0.3765, 0.2040, 0.2498, 0.1697]) -Greedy action tensor([ 0.7234, -0.5951, 0.1488, -0.7040]) tensor([0.4830, 0.1292, 0.2719, 0.1159]) -Greedy action tensor([ 0.5051, -0.0866, 0.0124, -0.1611]) tensor([0.3734, 0.2066, 0.2281, 0.1918]) -Greedy action tensor([ 0.8693, -0.7163, 0.1240, -0.6997]) tensor([0.5297, 0.1085, 0.2514, 0.1103]) -Greedy action tensor([ 0.9119, -0.7882, 0.0230, -0.6092]) tensor([0.5518, 0.1008, 0.2268, 0.1206]) -Greedy action tensor([ 0.4915, 0.1173, -0.0712, -0.1878]) tensor([0.3617, 0.2488, 0.2061, 0.1834]) -Greedy action tensor([ 0.5072, -0.4280, 0.0429, -0.6912]) tensor([0.4305, 0.1690, 0.2706, 0.1299]) -Greedy action tensor([ 0.4951, -0.3036, -0.1476, -0.6661]) tensor([0.4369, 0.1966, 0.2298, 0.1368]) -Greedy action tensor([ 0.5178, -0.3002, -0.0053, -0.2707]) tensor([0.4019, 0.1773, 0.2382, 0.1826]) -Greedy action tensor([ 0.8036, -0.5876, -0.0936, -0.4434]) tensor([0.5145, 0.1280, 0.2097, 0.1478]) -Greedy action tensor([ 0.6369, -0.3163, -0.0732, -0.2726]) tensor([0.4386, 0.1691, 0.2156, 0.1766]) -Greedy action tensor([ 0.6931, -0.6084, -0.0742, -0.3821]) tensor([0.4813, 0.1310, 0.2235, 0.1642]) -Greedy action tensor([ 0.3260, -0.0921, -0.1098, -0.1233]) tensor([0.3398, 0.2237, 0.2197, 0.2168]) -Greedy action tensor([ 0.6139, -0.5110, 0.0963, -0.5439]) tensor([0.4475, 0.1453, 0.2667, 0.1406]) -Greedy action tensor([ 0.5773, -0.2769, -0.0626, -0.2970]) tensor([0.4219, 0.1796, 0.2225, 0.1760]) -Greedy action tensor([ 1.0103, -0.2938, -0.2652, -0.2809]) tensor([0.5478, 0.1487, 0.1530, 0.1506]) -Greedy action tensor([ 0.3996, -0.0360, 0.1988, -0.3992]) tensor([0.3431, 0.2219, 0.2807, 0.1543]) -Greedy action tensor([ 0.6863, -0.1581, -0.0974, -0.3597]) tensor([0.4468, 0.1921, 0.2041, 0.1570]) -Greedy action tensor([ 0.4017, -0.3267, -0.0717, -0.2131]) tensor([0.3779, 0.1824, 0.2354, 0.2043]) -Greedy action tensor([ 0.4606, 0.0582, 0.0684, -0.0912]) tensor([0.3424, 0.2290, 0.2313, 0.1972]) -Greedy action tensor([ 0.6683, -0.2799, 0.0108, -0.3090]) tensor([0.4382, 0.1698, 0.2271, 0.1649]) -Greedy action tensor([ 0.2423, 0.1453, -0.1671, -0.0454]) tensor([0.3011, 0.2732, 0.1999, 0.2258]) -Greedy action tensor([ 0.8110, -0.4410, -0.0336, -0.3169]) tensor([0.4903, 0.1402, 0.2107, 0.1587]) -Greedy action tensor([ 0.4107, -0.1587, -0.0641, -0.2056]) tensor([0.3666, 0.2074, 0.2280, 0.1979]) -Greedy action tensor([ 0.8199, -0.5917, -0.0962, -0.5328]) tensor([0.5257, 0.1281, 0.2103, 0.1359]) -Greedy action tensor([ 1.0268, -1.1108, 0.0646, -0.4899]) tensor([0.5816, 0.0686, 0.2222, 0.1276]) -Greedy action tensor([ 0.6056, -0.3904, -0.0347, -0.3194]) tensor([0.4361, 0.1611, 0.2299, 0.1729]) -Greedy action tensor([ 0.5781, -0.2990, -0.1368, -0.2655]) tensor([0.4282, 0.1781, 0.2095, 0.1842]) -Greedy action tensor([ 0.5696, 0.1680, 0.1480, -0.2632]) tensor([0.3623, 0.2425, 0.2377, 0.1575]) -Greedy action tensor([ 0.6643, -0.5125, 0.1410, -0.5970]) tensor([0.4579, 0.1411, 0.2713, 0.1297]) -Greedy action tensor([ 0.5103, -0.2539, -0.1675, -0.2956]) tensor([0.4132, 0.1924, 0.2098, 0.1846]) -Greedy action tensor([ 0.6843, -0.2329, 0.2584, -0.3988]) tensor([0.4182, 0.1671, 0.2731, 0.1416]) -Greedy action tensor([ 0.3534, -0.1877, -0.0585, -0.3725]) tensor([0.3665, 0.2134, 0.2428, 0.1774]) -Greedy action tensor([ 0.8133, -0.8030, -0.0557, -0.3968]) tensor([0.5219, 0.1037, 0.2189, 0.1556]) -Greedy action tensor([ 0.8592, -0.4387, 0.2146, -0.4135]) tensor([0.4812, 0.1314, 0.2526, 0.1348]) -Greedy action tensor([ 0.3236, 0.0085, -0.0349, -0.2845]) tensor([0.3364, 0.2455, 0.2350, 0.1831]) -Greedy action tensor([ 1.0523, -0.7267, -0.1046, -0.4469]) tensor([0.5860, 0.0989, 0.1843, 0.1309]) -Greedy action tensor([ 0.5928, -0.3479, -0.0239, -0.3963]) tensor([0.4344, 0.1696, 0.2345, 0.1616]) -Greedy action tensor([ 0.8582, -0.4007, 0.0311, -0.5522]) tensor([0.5088, 0.1445, 0.2225, 0.1242]) -Greedy action tensor([ 0.4998, 0.1411, -0.1633, -0.0561]) tensor([0.3588, 0.2506, 0.1849, 0.2058]) -Greedy action tensor([ 0.5160, -0.2213, -0.0450, -0.5079]) tensor([0.4152, 0.1987, 0.2370, 0.1492]) -Greedy action tensor([ 1.0007, -1.1077, -0.0929, -0.4026]) tensor([0.5875, 0.0713, 0.1968, 0.1444]) -Greedy action tensor([ 4.3078e-01, -6.8009e-05, 5.3549e-02, -2.4359e-01]) tensor([0.3515, 0.2284, 0.2410, 0.1791]) -Greedy action tensor([ 0.5482, 0.0962, -0.0019, -0.0853]) tensor([0.3644, 0.2319, 0.2102, 0.1934]) -Greedy action tensor([ 1.1675, -0.6168, -0.1135, -0.4511]) tensor([0.6083, 0.1021, 0.1690, 0.1206]) -Greedy action tensor([ 0.7787, -0.2815, -0.0593, -0.4057]) tensor([0.4796, 0.1661, 0.2075, 0.1467]) -Greedy action tensor([ 8.9163e-01, -4.7188e-01, -7.0840e-05, -5.1162e-01]) tensor([0.5231, 0.1338, 0.2145, 0.1286]) -Greedy action tensor([ 0.9415, -0.5952, 0.0733, -0.8762]) tensor([0.5564, 0.1197, 0.2335, 0.0904]) -Greedy action tensor([ 0.6050, -0.0933, -0.1131, -0.2349]) tensor([0.4138, 0.2058, 0.2018, 0.1786]) -Greedy action tensor([ 0.2348, 0.0846, 0.2268, -0.2981]) tensor([0.2907, 0.2502, 0.2884, 0.1706]) -Greedy action tensor([ 0.2887, -0.0245, 0.0056, -0.1085]) tensor([0.3168, 0.2316, 0.2387, 0.2129]) -Greedy action tensor([ 0.2929, -0.0405, 0.1122, -0.0566]) tensor([0.3071, 0.2200, 0.2563, 0.2165]) -Greedy action tensor([ 0.7288, -0.6059, 0.0218, -0.8004]) tensor([0.5068, 0.1334, 0.2499, 0.1098]) -Greedy action tensor([ 1.0861, -0.8384, -0.0450, -0.9919]) tensor([0.6274, 0.0916, 0.2025, 0.0785]) -Greedy action tensor([ 0.5888, -0.4557, 0.1129, -0.4759]) tensor([0.4314, 0.1518, 0.2680, 0.1488]) -Greedy action tensor([ 0.0932, -0.0621, 0.0734, -0.3431]) tensor([0.2871, 0.2458, 0.2815, 0.1856]) -Greedy action tensor([ 0.6879, -0.1598, 0.0904, -0.2458]) tensor([0.4216, 0.1806, 0.2320, 0.1657]) -Greedy action tensor([ 0.9413, -0.2827, -0.0270, -0.7224]) tensor([0.5367, 0.1578, 0.2038, 0.1017]) -Greedy action tensor([ 0.8450, -0.2104, -0.0857, -0.4203]) tensor([0.4939, 0.1719, 0.1948, 0.1394]) -Greedy action tensor([ 1.0301, -0.7106, -0.0621, -0.3154]) tensor([0.5646, 0.0990, 0.1894, 0.1470]) -Greedy action tensor([ 0.4098, 0.0882, -0.0041, -0.1921]) tensor([0.3408, 0.2471, 0.2253, 0.1867]) -Greedy action tensor([ 0.6229, 0.2404, -0.2259, -0.0507]) tensor([0.3817, 0.2604, 0.1633, 0.1946]) -Greedy action tensor([ 0.8098, -0.7132, 0.0073, -0.4390]) tensor([0.5120, 0.1116, 0.2295, 0.1469]) -Greedy action tensor([ 0.4366, -0.0450, -0.1616, -0.2715]) tensor([0.3759, 0.2322, 0.2067, 0.1852]) -Greedy action tensor([ 0.5381, -0.5979, -0.1508, -0.1409]) tensor([0.4291, 0.1378, 0.2155, 0.2176]) -Greedy action tensor([ 0.5864, -0.4308, -0.0133, -0.3207]) tensor([0.4321, 0.1563, 0.2372, 0.1744]) -Greedy action tensor([ 1.0584, -0.9045, -0.0023, -0.4890]) tensor([0.5884, 0.0826, 0.2037, 0.1252]) -Greedy action tensor([ 0.4734, -0.1691, -0.0796, -0.1615]) tensor([0.3801, 0.1999, 0.2186, 0.2014]) -Greedy action tensor([ 0.6686, -0.5482, 0.1346, -0.6504]) tensor([0.4651, 0.1378, 0.2727, 0.1244]) -Greedy action tensor([ 0.4899, -0.0816, -0.0602, -0.1754]) tensor([0.3766, 0.2126, 0.2172, 0.1936]) -Greedy action tensor([ 0.6710, -0.3207, -0.0446, -0.5051]) tensor([0.4612, 0.1711, 0.2255, 0.1423]) -Greedy action tensor([ 1.7571, -0.4781, -0.4686, 0.4620]) tensor([0.6717, 0.0719, 0.0725, 0.1839]) -Greedy action tensor([ 1.1779, -0.0299, -0.5325, -0.2524]) tensor([0.5818, 0.1739, 0.1052, 0.1392]) -Greedy action tensor([ 1.6686, -0.8274, -0.4757, 1.0453]) tensor([0.5761, 0.0475, 0.0675, 0.3089]) -Greedy action tensor([ 0.9393, -0.1240, -0.0235, -0.0464]) tensor([0.4761, 0.1644, 0.1818, 0.1777]) -Greedy action tensor([ 0.9479, -0.5725, -0.2693, 0.1860]) tensor([0.5047, 0.1103, 0.1494, 0.2356]) -Greedy action tensor([ 0.9632, -0.3200, -0.1308, 0.2473]) tensor([0.4760, 0.1319, 0.1594, 0.2327]) -Greedy action tensor([ 1.7809, -0.7495, -0.5167, 0.8389]) tensor([0.6370, 0.0507, 0.0640, 0.2483]) -Greedy action tensor([ 0.9972, -0.2468, -0.1050, 0.1730]) tensor([0.4857, 0.1400, 0.1613, 0.2130]) -Greedy action tensor([ 0.7340, -0.3202, -0.2178, 0.2135]) tensor([0.4294, 0.1496, 0.1658, 0.2552]) -Greedy action tensor([ 0.8363, -0.3238, -0.5286, 0.4268]) tensor([0.4479, 0.1404, 0.1144, 0.2974]) -Greedy action tensor([ 1.4540, -0.5651, -0.1010, 0.1406]) tensor([0.6200, 0.0823, 0.1309, 0.1667]) -Greedy action tensor([ 1.4757, 0.2847, -0.2859, -0.0642]) tensor([0.5917, 0.1798, 0.1016, 0.1269]) -Greedy action tensor([ 1.2303, -0.2862, -0.2667, 0.1812]) tensor([0.5576, 0.1224, 0.1248, 0.1953]) -Greedy action tensor([ 1.1022, -0.4821, -0.1678, -0.0493]) tensor([0.5549, 0.1138, 0.1558, 0.1754]) -Greedy action tensor([ 1.0909, -0.0759, -0.1887, 0.1600]) tensor([0.5041, 0.1570, 0.1402, 0.1987]) -Greedy action tensor([ 1.2924, -0.6798, -0.5634, 0.0478]) tensor([0.6315, 0.0879, 0.0987, 0.1819]) -Greedy action tensor([ 1.3987, -0.5189, -0.2604, -0.0114]) tensor([0.6324, 0.0929, 0.1203, 0.1544]) -Greedy action tensor([ 0.7595, -0.5124, -0.1619, 0.1100]) tensor([0.4544, 0.1274, 0.1808, 0.2374]) -Greedy action tensor([ 0.7347, -0.1212, -0.0920, 0.3511]) tensor([0.3931, 0.1670, 0.1720, 0.2679]) -Greedy action tensor([ 1.4860, -0.2225, -0.3070, 0.5193]) tensor([0.5787, 0.1048, 0.0963, 0.2201]) -Greedy action tensor([ 1.1643, -0.5749, -0.4215, 0.5922]) tensor([0.5142, 0.0903, 0.1053, 0.2902]) -Greedy action tensor([ 1.1637, -0.4156, 0.2321, 0.1498]) tensor([0.5095, 0.1050, 0.2007, 0.1848]) -Greedy action tensor([ 0.9412, -0.6745, -0.2495, 0.5326]) tensor([0.4614, 0.0917, 0.1403, 0.3066]) -Greedy action tensor([ 0.2778, 0.0722, -0.6943, -0.1420]) tensor([0.3509, 0.2857, 0.1328, 0.2306]) -Greedy action tensor([ 0.9019, -0.5290, -0.2988, 0.5846]) tensor([0.4409, 0.1054, 0.1327, 0.3210]) -Greedy action tensor([ 0.5207, -0.3862, -0.2983, 0.2082]) tensor([0.3882, 0.1567, 0.1711, 0.2840]) -Greedy action tensor([ 0.6631, -0.1640, -0.1487, -0.0423]) tensor([0.4210, 0.1841, 0.1869, 0.2079]) -Greedy action tensor([ 1.0161e+00, -1.5756e-01, 2.7436e-04, 8.5968e-03]) tensor([0.4911, 0.1518, 0.1778, 0.1793]) -Greedy action tensor([ 1.2407, -0.7016, -0.4303, 0.1277]) tensor([0.6024, 0.0864, 0.1133, 0.1979]) -Greedy action tensor([ 1.1360, -0.7760, -0.4363, 0.7758]) tensor([0.4871, 0.0720, 0.1011, 0.3398]) -Greedy action tensor([ 0.5205, 0.0318, -0.1180, -0.0463]) tensor([0.3692, 0.2265, 0.1950, 0.2094]) -Greedy action tensor([ 1.1577, -0.3626, -0.1823, 0.1213]) tensor([0.5449, 0.1191, 0.1427, 0.1933]) -Greedy action tensor([ 1.5270, -0.0173, -0.2825, 0.5229]) tensor([0.5735, 0.1224, 0.0939, 0.2101]) -Greedy action tensor([ 1.7921, 0.0731, -0.1377, 0.0733]) tensor([0.6650, 0.1192, 0.0965, 0.1192]) -Greedy action tensor([ 1.0676, -0.4562, -0.1212, 0.1516]) tensor([0.5201, 0.1133, 0.1584, 0.2081]) -Greedy action tensor([ 1.0611, 0.0386, -0.1882, -0.0459]) tensor([0.5058, 0.1819, 0.1450, 0.1672]) -Greedy action tensor([ 1.2432, -0.4750, -0.1604, 0.3684]) tensor([0.5429, 0.0974, 0.1334, 0.2264]) -Greedy action tensor([ 0.7308, -0.4334, -0.1127, 0.0229]) tensor([0.4474, 0.1397, 0.1925, 0.2204]) -Greedy action tensor([ 1.0533, -0.4155, -0.2563, 0.2993]) tensor([0.5075, 0.1168, 0.1370, 0.2388]) -Greedy action tensor([ 1.1983, -0.2711, 0.1439, 0.0139]) tensor([0.5307, 0.1221, 0.1849, 0.1624]) -Greedy action tensor([ 1.0047, -0.5693, -0.4911, 0.9959]) tensor([0.4128, 0.0855, 0.0925, 0.4092]) -Greedy action tensor([ 0.5712, -0.3804, -0.7489, 0.8378]) tensor([0.3380, 0.1305, 0.0903, 0.4412]) -Greedy action tensor([ 1.7770, -0.9027, -0.2109, 0.4745]) tensor([0.6769, 0.0464, 0.0927, 0.1840]) -Greedy action tensor([ 0.6087, -0.4446, -0.0944, 0.2127]) tensor([0.3973, 0.1386, 0.1967, 0.2674]) -Greedy action tensor([ 1.6371, 0.0401, -0.2235, 0.4822]) tensor([0.5977, 0.1210, 0.0930, 0.1883]) -Greedy action tensor([ 1.3791, -0.2884, -0.3764, 0.3806]) tensor([0.5780, 0.1091, 0.0999, 0.2130]) -Greedy action tensor([ 1.1516, -0.2695, -0.2376, 0.0168]) tensor([0.5518, 0.1332, 0.1375, 0.1774]) -Greedy action tensor([ 0.8173, -0.0978, 0.0620, 0.0344]) tensor([0.4297, 0.1721, 0.2019, 0.1964]) -Greedy action tensor([ 1.5040, -0.1428, -0.3421, 0.0868]) tensor([0.6278, 0.1210, 0.0991, 0.1522]) -Greedy action tensor([ 1.2422, -0.2675, -0.3522, 0.2175]) tensor([0.5609, 0.1239, 0.1139, 0.2013]) -Greedy action tensor([ 0.7008, -0.0743, -0.4972, 0.4213]) tensor([0.3970, 0.1829, 0.1198, 0.3002]) -Greedy action tensor([ 0.9853, -0.4984, -0.0534, 0.1422]) tensor([0.4972, 0.1128, 0.1760, 0.2140]) -Greedy action tensor([ 1.3928, -0.3829, 0.0714, 0.4812]) tensor([0.5441, 0.0922, 0.1451, 0.2187]) -Greedy action tensor([ 1.3213, -0.6227, 0.0745, 0.2051]) tensor([0.5688, 0.0814, 0.1635, 0.1863]) -Greedy action tensor([ 1.1135, -0.5446, -0.3732, -0.0170]) tensor([0.5749, 0.1095, 0.1300, 0.1856]) -Greedy action tensor([ 0.7462, -0.4758, -0.1024, 0.1003]) tensor([0.4451, 0.1311, 0.1905, 0.2333]) -Greedy action tensor([ 1.2336, -0.6145, -0.1077, 0.1565]) tensor([0.5683, 0.0895, 0.1486, 0.1936]) -Greedy action tensor([ 1.3662, -0.7169, -0.3927, 0.3858]) tensor([0.5981, 0.0745, 0.1030, 0.2244]) -Greedy action tensor([ 1.0805, -0.4350, -0.0912, 0.1368]) tensor([0.5212, 0.1145, 0.1615, 0.2028]) -Greedy action tensor([ 1.1435, -0.5154, -0.4062, 0.1534]) tensor([0.5636, 0.1073, 0.1197, 0.2094]) -Greedy action tensor([ 0.2893, -0.2243, -0.2573, 0.2714]) tensor([0.3165, 0.1894, 0.1832, 0.3109]) -Greedy action tensor([ 1.4325, -0.7696, -0.0738, 0.3010]) tensor([0.6043, 0.0668, 0.1340, 0.1949]) -Greedy action tensor([ 0.9621, -0.1189, -0.0768, -0.0161]) tensor([0.4833, 0.1640, 0.1710, 0.1817]) -Greedy action tensor([ 0.9606, -0.0808, -0.0835, 0.1967]) tensor([0.4606, 0.1626, 0.1622, 0.2146]) -Greedy action tensor([ 1.5010, -0.2483, -0.3090, 0.3240]) tensor([0.6076, 0.1057, 0.0994, 0.1873]) -Greedy action tensor([ 0.9187, -0.4165, -0.4640, 0.6540]) tensor([0.4383, 0.1153, 0.1100, 0.3364]) -Greedy action tensor([ 0.8258, -0.0644, -0.0481, -0.3174]) tensor([0.4658, 0.1913, 0.1944, 0.1485]) -Greedy action tensor([ 1.7500, -0.6666, -0.3370, 0.3252]) tensor([0.6878, 0.0614, 0.0853, 0.1655]) -Greedy action tensor([ 1.4919, -0.5084, -0.4194, 0.3324]) tensor([0.6262, 0.0847, 0.0926, 0.1964]) -Greedy action tensor([ 1.1623, -0.4042, -0.2223, -0.0855]) tensor([0.5726, 0.1195, 0.1434, 0.1644]) -Greedy action tensor([ 1.3073, -0.1025, 0.0338, 0.1173]) tensor([0.5470, 0.1336, 0.1531, 0.1664]) -Greedy action tensor([ 0.4941, -0.3597, -0.0500, 0.1159]) tensor([0.3716, 0.1582, 0.2156, 0.2546]) -Greedy action tensor([ 0.5090, -0.6614, -0.0827, 0.2271]) tensor([0.3820, 0.1185, 0.2114, 0.2881]) -Greedy action tensor([ 1.5476, -1.1078, -0.2264, 0.5015]) tensor([0.6284, 0.0442, 0.1066, 0.2208]) -Greedy action tensor([ 1.2538, -0.3447, -0.3336, -0.0980]) tensor([0.6004, 0.1214, 0.1228, 0.1554]) -Greedy action tensor([ 1.5135, -0.6639, -0.5680, 0.3673]) tensor([0.6427, 0.0728, 0.0802, 0.2043]) -Greedy action tensor([ 1.4853, -0.1958, -0.5396, 0.3346]) tensor([0.6118, 0.1139, 0.0808, 0.1936]) -Greedy action tensor([ 0.9947, -0.3077, -0.4872, 0.5551]) tensor([0.4666, 0.1268, 0.1060, 0.3006]) -Greedy action tensor([ 0.9603, -0.1771, -0.1819, 0.0607]) tensor([0.4886, 0.1567, 0.1559, 0.1987]) -Greedy action tensor([ 0.9366, -0.7279, -0.1189, 0.0594]) tensor([0.5120, 0.0969, 0.1782, 0.2130]) -Greedy action tensor([ 1.4885, -0.3224, -0.3423, 0.2921]) tensor([0.6150, 0.1006, 0.0986, 0.1859]) -Greedy action tensor([ 0.4631, 0.2752, -0.2760, 0.7404]) tensor([0.2758, 0.2286, 0.1317, 0.3639]) -Greedy action tensor([ 0.3775, -2.3011, 0.3714, 1.0328]) tensor([0.2507, 0.0172, 0.2492, 0.4829]) -Greedy action tensor([ 0.8822, 0.4075, -0.5883, 1.1341]) tensor([0.3187, 0.1982, 0.0732, 0.4099]) -Greedy action tensor([-0.0721, -0.4292, -0.4240, 2.0844]) tensor([0.0905, 0.0634, 0.0637, 0.7824]) -Greedy action tensor([ 0.9674, 0.7177, -0.3523, 0.8575]) tensor([0.3399, 0.2648, 0.0908, 0.3045]) -Greedy action tensor([ 1.2475, 0.0918, -0.7302, 0.3326]) tensor([0.5394, 0.1698, 0.0747, 0.2161]) -Greedy action tensor([ 0.4554, -1.1509, -0.1255, 1.0904]) tensor([0.2742, 0.0550, 0.1534, 0.5174]) -Greedy action tensor([ 1.2024, -0.1288, 0.9463, -0.1217]) tensor([0.4340, 0.1146, 0.3359, 0.1154]) -Greedy action tensor([ 0.8887, -1.3805, 2.0696, -0.1481]) tensor([0.2121, 0.0219, 0.6908, 0.0752]) -Greedy action tensor([-0.8124, -1.1087, 0.9369, -0.3482]) tensor([0.1101, 0.0818, 0.6330, 0.1751]) -Greedy action tensor([ 0.6672, -0.9222, 0.3636, 1.0680]) tensor([0.2911, 0.0594, 0.2149, 0.4346]) -Greedy action tensor([ 1.1825, 0.1422, -0.3624, 1.3620]) tensor([0.3619, 0.1279, 0.0772, 0.4330]) -Greedy action tensor([-0.3775, -0.1983, -0.4549, -0.1572]) tensor([0.2289, 0.2739, 0.2119, 0.2853]) -Greedy action tensor([ 0.5564, -0.6520, 0.6768, 0.2728]) tensor([0.3145, 0.0939, 0.3547, 0.2368]) -Greedy action tensor([1.0877, 0.6186, 0.2131, 1.6793]) tensor([0.2598, 0.1625, 0.1083, 0.4694]) -Greedy action tensor([ 0.5595, -1.1907, 1.0465, 0.2523]) tensor([0.2827, 0.0491, 0.4602, 0.2080]) -Greedy action tensor([ 0.2325, -0.6996, 0.2787, 0.8448]) tensor([0.2333, 0.0919, 0.2444, 0.4304]) -Greedy action tensor([-0.9318, -0.1837, -0.1173, 1.9201]) tensor([0.0441, 0.0931, 0.0995, 0.7633]) -Greedy action tensor([ 0.8349, -0.2036, 1.2680, 1.3673]) tensor([0.2174, 0.0770, 0.3353, 0.3703]) -Greedy action tensor([1.8046, 0.8415, 0.3951, 0.6512]) tensor([0.5150, 0.1966, 0.1258, 0.1625]) -Greedy action tensor([ 0.7602, 0.3258, -0.5590, 2.2433]) tensor([0.1582, 0.1025, 0.0423, 0.6971]) -Greedy action tensor([1.3207, 0.4403, 0.5773, 0.1142]) tensor([0.4568, 0.1894, 0.2172, 0.1367]) -Greedy action tensor([ 1.3375, -0.6580, -0.0780, 0.5716]) tensor([0.5424, 0.0737, 0.1317, 0.2522]) -Greedy action tensor([-0.2343, -0.5564, 0.4250, -0.1718]) tensor([0.2117, 0.1534, 0.4094, 0.2254]) -Greedy action tensor([0.7445, 0.6699, 0.7691, 0.7146]) tensor([0.2549, 0.2365, 0.2612, 0.2474]) -Greedy action tensor([ 1.2935, -1.3109, 0.8659, 0.2574]) tensor([0.4806, 0.0355, 0.3134, 0.1705]) -Greedy action tensor([ 0.3391, -0.2873, 0.2676, 0.5102]) tensor([0.2738, 0.1464, 0.2549, 0.3249]) -Greedy action tensor([-0.3794, -1.0654, -0.5734, 1.0225]) tensor([0.1565, 0.0788, 0.1289, 0.6358]) -Greedy action tensor([ 0.5159, -1.7435, -0.4362, 0.9488]) tensor([0.3298, 0.0344, 0.1273, 0.5085]) -Greedy action tensor([-0.1455, -0.7051, 0.5984, 0.2911]) tensor([0.1915, 0.1094, 0.4029, 0.2963]) -Greedy action tensor([ 0.7926, 0.1466, -0.4272, -0.5370]) tensor([0.4799, 0.2515, 0.1417, 0.1270]) -Greedy action tensor([ 0.8040, -0.6135, -0.3801, 1.2524]) tensor([0.3211, 0.0778, 0.0983, 0.5028]) -Greedy action tensor([ 1.1087, -0.7137, 0.2044, 0.8498]) tensor([0.4276, 0.0691, 0.1731, 0.3301]) -Greedy action tensor([0.5265, 0.2407, 0.2223, 1.4190]) tensor([0.2028, 0.1524, 0.1496, 0.4951]) -Greedy action tensor([ 0.7070, 0.5574, -0.1038, 0.7658]) tensor([0.2971, 0.2558, 0.1321, 0.3151]) -Greedy action tensor([ 0.8363, -1.8073, 1.2820, 0.0506]) tensor([0.3238, 0.0230, 0.5056, 0.1476]) -Greedy action tensor([ 1.4225, -0.2191, 0.1782, 1.6322]) tensor([0.3683, 0.0713, 0.1061, 0.4542]) -Greedy action tensor([ 0.4094, -0.6086, 0.3624, 0.2948]) tensor([0.3118, 0.1127, 0.2975, 0.2780]) -Greedy action tensor([ 0.1856, 0.5905, 0.4717, -0.9795]) tensor([0.2414, 0.3619, 0.3214, 0.0753]) -Greedy action tensor([ 1.7001, -0.2000, -0.4746, 0.7371]) tensor([0.6079, 0.0909, 0.0691, 0.2321]) -Greedy action tensor([ 0.0663, -0.1574, 1.2986, 1.5505]) tensor([0.1037, 0.0829, 0.3557, 0.4576]) -Greedy action tensor([ 0.5319, 0.0019, 0.5597, -0.0643]) tensor([0.3157, 0.1858, 0.3246, 0.1739]) -Greedy action tensor([0.6436, 0.2104, 0.5658, 0.9110]) tensor([0.2577, 0.1671, 0.2384, 0.3367]) -Greedy action tensor([0.8783, 0.7123, 0.8948, 0.5513]) tensor([0.2789, 0.2363, 0.2836, 0.2011]) -Greedy action tensor([ 1.2634, -0.2072, 0.3176, 0.8633]) tensor([0.4370, 0.1004, 0.1697, 0.2929]) -Greedy action tensor([-4.1945e-02, -6.5841e-05, -2.5099e-01, 8.3287e-01]) tensor([0.1904, 0.1985, 0.1545, 0.4566]) -Greedy action tensor([ 0.1205, -1.2218, 0.0087, 0.9386]) tensor([0.2262, 0.0591, 0.2022, 0.5125]) -Greedy action tensor([ 0.4578, -1.3821, 0.4073, 1.0390]) tensor([0.2566, 0.0407, 0.2439, 0.4588]) -Greedy action tensor([ 1.0273, -0.2761, 0.7036, 2.0889]) tensor([0.2047, 0.0556, 0.1481, 0.5917]) -Greedy action tensor([-0.0231, 0.2101, 1.0147, 0.1818]) tensor([0.1584, 0.2000, 0.4472, 0.1944]) -Greedy action tensor([-0.0193, 0.7982, 1.2912, -1.1129]) tensor([0.1368, 0.3099, 0.5074, 0.0458]) -Greedy action tensor([ 0.8503, -0.1350, 1.0425, 1.1290]) tensor([0.2560, 0.0956, 0.3102, 0.3382]) -Greedy action tensor([0.5949, 1.2527, 0.3588, 0.3015]) tensor([0.2239, 0.4323, 0.1768, 0.1670]) -Greedy action tensor([ 0.1867, -0.5901, 0.5779, -0.7144]) tensor([0.2990, 0.1375, 0.4421, 0.1214]) -Greedy action tensor([ 0.8229, 0.0781, 1.3148, -0.2350]) tensor([0.2892, 0.1373, 0.4730, 0.1004]) -Greedy action tensor([ 0.3417, -0.1679, 0.5157, 1.0472]) tensor([0.2077, 0.1247, 0.2471, 0.4205]) -Greedy action tensor([ 0.5995, -0.3055, 1.0823, 1.1485]) tensor([0.2102, 0.0850, 0.3407, 0.3640]) -Greedy action tensor([-0.3256, 0.3765, 0.2521, 0.4264]) tensor([0.1445, 0.2916, 0.2575, 0.3065]) -Greedy action tensor([-0.0352, -0.6161, 0.4720, 0.2910]) tensor([0.2171, 0.1215, 0.3606, 0.3009]) -Greedy action tensor([ 0.8366, 0.4865, -0.0609, 0.6048]) tensor([0.3442, 0.2425, 0.1403, 0.2730]) -Greedy action tensor([ 1.1605, -0.4778, 0.8639, 0.4208]) tensor([0.4141, 0.0805, 0.3078, 0.1976]) -Greedy action tensor([ 1.5904, -0.0365, -0.1022, 1.1544]) tensor([0.4933, 0.0969, 0.0908, 0.3190]) -Greedy action tensor([0.7830, 1.2309, 0.2753, 0.2479]) tensor([0.2665, 0.4171, 0.1604, 0.1561]) -Greedy action tensor([-1.1734, -0.3419, -0.0508, -0.7278]) tensor([0.1261, 0.2896, 0.3874, 0.1969]) -Greedy action tensor([-0.1647, -1.3431, -0.0682, 1.1361]) tensor([0.1644, 0.0506, 0.1811, 0.6039]) -Greedy action tensor([ 0.6317, -0.7017, 0.5810, 0.2222]) tensor([0.3475, 0.0916, 0.3303, 0.2307]) -Greedy action tensor([0.8439, 0.9302, 0.8193, 0.3935]) tensor([0.2700, 0.2944, 0.2635, 0.1721]) -Greedy action tensor([ 0.8979, 0.7045, -1.0166, 0.4917]) tensor([0.3791, 0.3124, 0.0559, 0.2526]) -Greedy action tensor([1.9727, 0.6112, 1.4201, 0.0478]) tensor([0.5057, 0.1296, 0.2910, 0.0738]) -Greedy action tensor([ 1.4636, -0.6268, 1.4580, 1.1438]) tensor([0.3516, 0.0435, 0.3496, 0.2553]) -Greedy action tensor([ 0.0276, -0.0853, -0.8035, -1.5771]) tensor([0.3953, 0.3531, 0.1722, 0.0794]) -Greedy action tensor([ 0.0129, -1.4181, -0.2117, 0.4323]) tensor([0.2810, 0.0672, 0.2244, 0.4274]) -Greedy action tensor([ 0.3110, 0.5399, -0.6793, 0.0125]) tensor([0.2967, 0.3730, 0.1102, 0.2201]) -Greedy action tensor([-0.1295, 0.2717, 1.9196, -0.0862]) tensor([0.0885, 0.1322, 0.6869, 0.0924]) -Greedy action tensor([1.1191, 0.6855, 0.8430, 0.3129]) tensor([0.3504, 0.2271, 0.2659, 0.1565]) -Greedy action tensor([ 1.2770, -0.5539, 1.4804, 0.5856]) tensor([0.3464, 0.0555, 0.4246, 0.1735]) -Greedy action tensor([ 0.7949, -0.0225, -0.4286, 1.0425]) tensor([0.3315, 0.1464, 0.0975, 0.4246]) -Greedy action tensor([ 1.6406, -1.0211, 1.0297, 0.4942]) tensor([0.5180, 0.0362, 0.2812, 0.1646]) -Greedy action tensor([ 0.5597, -0.1319, 1.1939, 0.4069]) tensor([0.2356, 0.1180, 0.4442, 0.2022]) -Greedy action tensor([ 0.5542, 0.3956, -0.0062, -0.2796]) tensor([0.3498, 0.2985, 0.1997, 0.1519]) -Greedy action tensor([ 1.1853, 0.7378, -1.0218, -0.1412]) tensor([0.4964, 0.3173, 0.0546, 0.1317]) -Greedy action tensor([-0.5266, -1.6554, 2.1209, -0.0411]) tensor([0.0586, 0.0190, 0.8272, 0.0952]) -Greedy action tensor([-1.6163, -0.5348, 0.5698, 0.1636]) tensor([0.0532, 0.1570, 0.4740, 0.3157]) -Greedy action tensor([-1.7392, -0.5043, 0.5524, -0.0500]) tensor([0.0506, 0.1741, 0.5009, 0.2743]) -Greedy action tensor([-1.2039, -0.6330, 0.5237, 0.4763]) tensor([0.0727, 0.1286, 0.4088, 0.3899]) -Greedy action tensor([-1.3438, -0.5304, 0.3380, 0.2310]) tensor([0.0743, 0.1676, 0.3993, 0.3588]) -Greedy action tensor([-1.8286, -0.4818, 0.6106, -0.0964]) tensor([0.0455, 0.1751, 0.5220, 0.2574]) -Greedy action tensor([-0.8217, -0.0962, 1.2006, 1.3709]) tensor([0.0511, 0.1055, 0.3859, 0.4575]) -Greedy action tensor([-1.9396, -0.4480, 0.6619, -0.1779]) tensor([0.0404, 0.1796, 0.5448, 0.2352]) -Greedy action tensor([-1.1087, -0.6882, 0.2503, 0.2537]) tensor([0.0969, 0.1475, 0.3771, 0.3784]) -Greedy action tensor([-1.3307, -0.6476, 0.7739, 0.6917]) tensor([0.0534, 0.1057, 0.4378, 0.4032]) -Greedy action tensor([-1.2051, -0.5391, 0.4409, 0.6915]) tensor([0.0676, 0.1316, 0.3505, 0.4503]) -Greedy action tensor([-1.2904, -0.4199, 0.5126, 0.5298]) tensor([0.0640, 0.1528, 0.3882, 0.3950]) -Greedy action tensor([-1.3726, -0.5762, 0.7198, -0.1797]) tensor([0.0684, 0.1517, 0.5544, 0.2255]) -Greedy action tensor([-0.6751, -0.5461, 0.1815, 0.2853]) tensor([0.1407, 0.1601, 0.3315, 0.3677]) -Greedy action tensor([-1.7925, -0.5434, 0.7235, 0.0448]) tensor([0.0432, 0.1507, 0.5348, 0.2713]) -Greedy action tensor([-1.3473, -0.5792, 0.3239, 0.2524]) tensor([0.0745, 0.1606, 0.3961, 0.3688]) -Greedy action tensor([-1.8201, -0.4922, 0.6294, -0.0658]) tensor([0.0452, 0.1705, 0.5233, 0.2611]) -Greedy action tensor([-1.6521, -0.6266, 0.8336, 0.4364]) tensor([0.0419, 0.1168, 0.5031, 0.3382]) -Greedy action tensor([-0.9842, -0.5159, 1.2551, 1.4830]) tensor([0.0421, 0.0672, 0.3949, 0.4959]) -Greedy action tensor([-1.1120, -0.5822, 0.2390, 0.3301]) tensor([0.0927, 0.1574, 0.3579, 0.3920]) -Greedy action tensor([-1.4606, -0.3023, 0.5953, -0.3409]) tensor([0.0664, 0.2114, 0.5188, 0.2034]) -Greedy action tensor([-2.0315, -0.9335, 0.5290, -0.1794]) tensor([0.0429, 0.1286, 0.5551, 0.2734]) -Greedy action tensor([-1.8827, -0.4560, 0.6359, -0.1477]) tensor([0.0430, 0.1792, 0.5339, 0.2439]) -Greedy action tensor([-1.8365, -0.4901, 0.8668, 0.3257]) tensor([0.0351, 0.1350, 0.5245, 0.3053]) -Greedy action tensor([-1.9059, -0.8554, 0.1755, -0.3136]) tensor([0.0596, 0.1703, 0.4774, 0.2927]) -Greedy action tensor([-0.8723, -0.4260, 0.5259, 1.1920]) tensor([0.0690, 0.1078, 0.2794, 0.5438]) -Greedy action tensor([-1.5462, -0.4586, 1.1273, 1.0442]) tensor([0.0315, 0.0933, 0.4558, 0.4194]) -Greedy action tensor([-1.8846, -0.4487, 0.6831, -0.0969]) tensor([0.0413, 0.1736, 0.5383, 0.2468]) -Greedy action tensor([-0.4886, -0.5038, 0.1850, 0.2540]) tensor([0.1653, 0.1629, 0.3243, 0.3475]) -Greedy action tensor([-1.6446, -0.5741, 1.0919, 0.6739]) tensor([0.0339, 0.0988, 0.5230, 0.3443]) -Greedy action tensor([-1.8111, -0.4769, 0.6900, 0.0335]) tensor([0.0429, 0.1628, 0.5230, 0.2713]) -Greedy action tensor([-1.2827, -0.5566, 1.2885, 1.3084]) tensor([0.0339, 0.0701, 0.4436, 0.4524]) -Greedy action tensor([-1.8747, -0.4178, 0.6274, -0.1292]) tensor([0.0430, 0.1848, 0.5255, 0.2466]) -Greedy action tensor([-1.7924, -0.0262, 0.5506, 0.0101]) tensor([0.0429, 0.2507, 0.4464, 0.2600]) -Greedy action tensor([-0.0364, -0.0516, 1.0284, 1.6053]) tensor([0.0995, 0.0980, 0.2886, 0.5139]) -Greedy action tensor([-1.2227, -0.4157, 0.6204, -0.5216]) tensor([0.0864, 0.1937, 0.5457, 0.1742]) -Greedy action tensor([-1.8264, -0.3021, 0.5894, -0.0938]) tensor([0.0446, 0.2046, 0.4989, 0.2520]) -Greedy action tensor([-1.1233, -0.5946, 0.3531, 0.1016]) tensor([0.0954, 0.1619, 0.4178, 0.3249]) -Greedy action tensor([-1.0243, -0.4558, 0.4796, -0.3413]) tensor([0.1082, 0.1910, 0.4867, 0.2142]) -Greedy action tensor([-0.6020, 0.6066, -0.1330, 0.0866]) tensor([0.1260, 0.4219, 0.2014, 0.2508]) -Greedy action tensor([-1.8842, -0.4618, 0.6780, -0.0715]) tensor([0.0413, 0.1711, 0.5349, 0.2528]) -Greedy action tensor([-1.4380, -0.6775, 0.4179, 0.0341]) tensor([0.0720, 0.1540, 0.4604, 0.3137]) -Greedy action tensor([-0.4799, 0.2652, 0.0921, -0.0193]) tensor([0.1547, 0.3259, 0.2741, 0.2452]) -Greedy action tensor([-1.7415, -0.4936, 0.6028, 0.0518]) tensor([0.0478, 0.1665, 0.4984, 0.2873]) -Greedy action tensor([-0.8917, -0.5279, 0.2691, 0.1184]) tensor([0.1194, 0.1717, 0.3811, 0.3278]) -Greedy action tensor([-1.6896, -0.2803, 0.5492, -0.1396]) tensor([0.0521, 0.2133, 0.4890, 0.2456]) -Greedy action tensor([-1.7567, -0.4738, 0.5864, -0.0318]) tensor([0.0485, 0.1748, 0.5047, 0.2720]) -Greedy action tensor([-1.8560, -0.4670, 0.6764, -0.0852]) tensor([0.0426, 0.1709, 0.5361, 0.2504]) -Greedy action tensor([-1.7443, -0.4947, 0.5708, -0.0089]) tensor([0.0493, 0.1720, 0.4992, 0.2795]) -Greedy action tensor([-1.8084, -0.4871, 0.6038, -0.1085]) tensor([0.0468, 0.1753, 0.5219, 0.2560]) -Greedy action tensor([-1.1661, -0.5920, 0.2650, 0.2522]) tensor([0.0902, 0.1601, 0.3773, 0.3724]) -Greedy action tensor([-1.3060, -0.8636, 0.6013, -0.4928]) tensor([0.0866, 0.1348, 0.5833, 0.1953]) -Greedy action tensor([-1.6633, -0.4730, 0.5424, 0.0195]) tensor([0.0533, 0.1754, 0.4842, 0.2870]) -Greedy action tensor([-1.6865, -0.7353, 0.3351, -0.3867]) tensor([0.0675, 0.1748, 0.5099, 0.2477]) -Greedy action tensor([-0.4849, -0.3446, 1.0721, 1.5412]) tensor([0.0691, 0.0795, 0.3277, 0.5238]) -Greedy action tensor([-1.8801, -0.4240, 0.6252, -0.1534]) tensor([0.0432, 0.1852, 0.5288, 0.2428]) -Greedy action tensor([-1.7930, -0.5076, 0.7207, 0.1247]) tensor([0.0421, 0.1521, 0.5195, 0.2863]) -Greedy action tensor([-1.5641, -0.3886, 0.6762, 0.5335]) tensor([0.0459, 0.1487, 0.4314, 0.3740]) -Greedy action tensor([-0.5702, -0.2987, 0.8699, 1.4913]) tensor([0.0695, 0.0912, 0.2933, 0.5460]) -Greedy action tensor([-1.8622, -0.4874, 0.6479, -0.1225]) tensor([0.0436, 0.1722, 0.5361, 0.2481]) -Greedy action tensor([-1.4484, -0.1431, 0.4478, 0.0588]) tensor([0.0630, 0.2325, 0.4199, 0.2846]) -Greedy action tensor([-1.8209, -0.5880, 0.6860, -0.0409]) tensor([0.0442, 0.1516, 0.5421, 0.2621]) -Greedy action tensor([-0.9783, 0.0672, 0.4716, -0.4921]) tensor([0.1027, 0.2923, 0.4379, 0.1671]) -Greedy action tensor([-1.2479, -0.5914, 0.3607, 0.1657]) tensor([0.0831, 0.1602, 0.4151, 0.3416]) -Greedy action tensor([-1.7334, -0.5197, 0.6281, 0.0943]) tensor([0.0472, 0.1588, 0.5005, 0.2935]) -Greedy action tensor([-1.1061, 1.0301, 0.4705, 0.1065]) tensor([0.0566, 0.4792, 0.2739, 0.1903]) -Greedy action tensor([-0.3434, 0.0020, 1.0422, 1.6510]) tensor([0.0727, 0.1027, 0.2905, 0.5341]) -Greedy action tensor([-1.8273, -0.3855, 0.6030, -0.0979]) tensor([0.0450, 0.1902, 0.5112, 0.2536]) -Greedy action tensor([-1.8320, -0.6990, 0.6570, -0.2170]) tensor([0.0472, 0.1466, 0.5688, 0.2374]) -Greedy action tensor([-1.9331, -0.4569, 0.6603, -0.1724]) tensor([0.0407, 0.1781, 0.5444, 0.2368]) -Greedy action tensor([-1.9720, -0.8107, 0.1947, -0.2196]) tensor([0.0535, 0.1709, 0.4670, 0.3086]) -Greedy action tensor([-1.7215, -0.5027, 0.5535, -0.0049]) tensor([0.0508, 0.1719, 0.4944, 0.2829]) -Greedy action tensor([-1.5278, -0.2592, 0.4121, 0.1257]) tensor([0.0597, 0.2124, 0.4157, 0.3122]) -Greedy action tensor([-1.8750, -0.4753, 0.6422, -0.1220]) tensor([0.0431, 0.1746, 0.5338, 0.2486]) -Greedy action tensor([-0.8949, -0.6022, 0.1835, 0.3432]) tensor([0.1146, 0.1535, 0.3368, 0.3951]) -Greedy action tensor([-1.8483, -0.3683, 0.6087, -0.1505]) tensor([0.0444, 0.1950, 0.5181, 0.2425]) -Greedy action tensor([-1.1446, -0.1232, 1.2100, 1.1697]) tensor([0.0409, 0.1137, 0.4312, 0.4142]) -Greedy action tensor([-1.2473, -0.4696, 0.5572, 0.7572]) tensor([0.0600, 0.1305, 0.3644, 0.4451]) -Greedy action tensor([-0.6768, -0.3210, 1.0990, 1.5672]) tensor([0.0563, 0.0804, 0.3324, 0.5309]) -Greedy action tensor([-1.9057, -0.5761, 0.9818, 0.1041]) tensor([0.0331, 0.1252, 0.5945, 0.2472]) -Greedy action tensor([-1.8837, -0.3667, 0.6286, -0.1315]) tensor([0.0423, 0.1927, 0.5213, 0.2438]) -Greedy action tensor([-1.8625, -0.4796, 0.6598, -0.0892]) tensor([0.0429, 0.1708, 0.5339, 0.2524]) -Greedy action tensor([-0.0003, -0.0404, 0.2029, -0.1274]) tensor([0.2459, 0.2362, 0.3013, 0.2166]) -Greedy action tensor([ 0.6430, -0.1606, -0.0981, -0.2183]) tensor([0.4261, 0.1908, 0.2031, 0.1801]) -Greedy action tensor([ 0.6062, 0.1206, -0.1522, -0.4057]) tensor([0.4086, 0.2514, 0.1914, 0.1485]) -Greedy action tensor([ 0.6658, -0.1983, 0.0281, -0.1067]) tensor([0.4146, 0.1747, 0.2191, 0.1915]) -Greedy action tensor([ 0.6114, -0.3474, 0.0278, -0.4229]) tensor([0.4354, 0.1669, 0.2429, 0.1548]) -Greedy action tensor([ 0.4457, -0.0438, -0.0595, -0.4067]) tensor([0.3784, 0.2319, 0.2283, 0.1613]) -Greedy action tensor([ 0.6484, -0.4981, 0.0621, -0.4831]) tensor([0.4552, 0.1447, 0.2533, 0.1468]) -Greedy action tensor([ 0.8127, -0.9349, -0.0532, -0.3444]) tensor([0.5238, 0.0912, 0.2203, 0.1647]) -Greedy action tensor([ 0.5334, -0.1616, -0.1045, -0.1725]) tensor([0.3967, 0.1979, 0.2096, 0.1958]) -Greedy action tensor([ 1.2485, -0.7771, 0.0384, -0.8446]) tensor([0.6437, 0.0849, 0.1920, 0.0794]) -Greedy action tensor([ 0.5962, -0.0597, -0.0918, -0.2272]) tensor([0.4064, 0.2109, 0.2043, 0.1784]) -Greedy action tensor([ 0.8435, -0.4927, -0.0181, -0.4837]) tensor([0.5127, 0.1348, 0.2166, 0.1360]) -Greedy action tensor([ 0.5755, -0.3363, -0.0152, -0.4637]) tensor([0.4330, 0.1740, 0.2399, 0.1532]) -Greedy action tensor([ 0.2696, -0.1444, 0.2219, -0.3768]) tensor([0.3186, 0.2106, 0.3038, 0.1669]) -Greedy action tensor([ 0.3943, -0.3661, -0.3223, -0.3732]) tensor([0.4132, 0.1932, 0.2018, 0.1918]) -Greedy action tensor([ 0.5460, -0.1400, -0.1135, -0.2423]) tensor([0.4040, 0.2034, 0.2089, 0.1837]) -Greedy action tensor([ 0.3442, 0.1273, -0.0084, -0.3416]) tensor([0.3321, 0.2673, 0.2334, 0.1672]) -Greedy action tensor([ 0.5078, -0.3429, -0.0250, -0.1690]) tensor([0.3965, 0.1693, 0.2327, 0.2015]) -Greedy action tensor([ 0.6717, -0.3252, 0.0057, -0.5213]) tensor([0.4574, 0.1688, 0.2350, 0.1387]) -Greedy action tensor([ 0.8599, -0.8028, -0.1000, -0.3159]) tensor([0.5316, 0.1008, 0.2036, 0.1640]) -Greedy action tensor([ 0.4702, 0.0327, -0.0524, -0.2390]) tensor([0.3662, 0.2365, 0.2172, 0.1802]) -Greedy action tensor([ 0.6401, -0.2937, 0.0604, -0.3040]) tensor([0.4270, 0.1678, 0.2391, 0.1661]) -Greedy action tensor([ 0.8647, -0.6413, 0.2313, -0.9180]) tensor([0.5206, 0.1155, 0.2763, 0.0876]) -Greedy action tensor([ 0.5080, -0.2352, 0.0163, -0.3728]) tensor([0.3997, 0.1901, 0.2445, 0.1657]) -Greedy action tensor([ 0.6094, -0.4080, -0.1357, -0.4124]) tensor([0.4553, 0.1646, 0.2161, 0.1639]) -Greedy action tensor([ 0.5019, -0.3898, 0.2187, -0.5283]) tensor([0.3968, 0.1627, 0.2989, 0.1416]) -Greedy action tensor([ 0.4294, -0.1065, -0.0411, -0.3285]) tensor([0.3733, 0.2185, 0.2332, 0.1750]) -Greedy action tensor([ 0.4197, 0.1015, -0.0336, -0.2210]) tensor([0.3460, 0.2517, 0.2199, 0.1823]) -Greedy action tensor([ 0.3226, 0.1918, 0.0366, -0.3488]) tensor([0.3185, 0.2795, 0.2393, 0.1627]) -Greedy action tensor([ 0.3619, -0.2453, 0.0434, -0.3702]) tensor([0.3632, 0.1979, 0.2642, 0.1747]) -Greedy action tensor([ 0.5419, -0.1331, -0.1856, -0.3927]) tensor([0.4193, 0.2135, 0.2026, 0.1647]) -Greedy action tensor([ 0.4249, -0.2017, 0.0327, -0.2018]) tensor([0.3644, 0.1947, 0.2462, 0.1947]) -Greedy action tensor([ 0.5122, -0.2840, 0.0253, -0.3982]) tensor([0.4052, 0.1828, 0.2490, 0.1630]) -Greedy action tensor([ 0.6332, -0.4748, -0.1066, -0.3945]) tensor([0.4618, 0.1525, 0.2204, 0.1653]) -Greedy action tensor([ 0.6771, 0.1967, -0.0974, -0.2970]) tensor([0.4070, 0.2517, 0.1876, 0.1537]) -Greedy action tensor([ 0.5069, 0.0458, -0.0911, -0.1835]) tensor([0.3729, 0.2351, 0.2050, 0.1870]) -Greedy action tensor([ 0.5381, -0.0712, -0.1772, -0.1845]) tensor([0.3971, 0.2159, 0.1942, 0.1928]) -Greedy action tensor([ 0.4040, -0.0569, 0.0358, -0.4991]) tensor([0.3666, 0.2312, 0.2537, 0.1486]) -Greedy action tensor([ 0.3336, -0.1204, -0.0088, -0.2184]) tensor([0.3424, 0.2174, 0.2431, 0.1971]) -Greedy action tensor([ 0.6282, -0.3213, -0.0692, -0.4722]) tensor([0.4510, 0.1745, 0.2245, 0.1500]) -Greedy action tensor([ 1.1359, -1.0491, 0.0787, -0.4758]) tensor([0.6026, 0.0678, 0.2094, 0.1202]) -Greedy action tensor([ 0.4717, -0.1233, 0.0885, -0.2008]) tensor([0.3645, 0.2010, 0.2485, 0.1860]) -Greedy action tensor([0.2205, 0.0902, 0.0589, 0.1177]) tensor([0.2754, 0.2418, 0.2343, 0.2485]) -Greedy action tensor([ 0.2089, 0.1502, -0.1481, 0.0937]) tensor([0.2830, 0.2668, 0.1980, 0.2522]) -Greedy action tensor([ 0.8749, -0.5010, -0.0166, -0.3098]) tensor([0.5080, 0.1283, 0.2083, 0.1554]) -Greedy action tensor([ 1.0469, -0.4647, 0.0477, -0.7535]) tensor([0.5702, 0.1257, 0.2099, 0.0942]) -Greedy action tensor([ 0.5000, -0.2365, 0.0148, -0.3075]) tensor([0.3937, 0.1885, 0.2423, 0.1756]) -Greedy action tensor([ 0.4492, 0.2010, -0.1310, -0.1694]) tensor([0.3474, 0.2710, 0.1945, 0.1871]) -Greedy action tensor([ 0.5989, -0.3530, -0.1817, -0.3217]) tensor([0.4459, 0.1721, 0.2043, 0.1776]) -Greedy action tensor([ 0.5496, -0.3827, -0.1184, -0.2231]) tensor([0.4223, 0.1662, 0.2165, 0.1950]) -Greedy action tensor([ 0.3930, 0.0629, -0.0728, -0.4264]) tensor([0.3588, 0.2579, 0.2252, 0.1581]) -Greedy action tensor([ 0.2023, -0.0234, -0.0887, -0.5222]) tensor([0.3300, 0.2634, 0.2467, 0.1599]) -Greedy action tensor([ 0.5749, -0.4759, -0.1431, -0.5981]) tensor([0.4658, 0.1629, 0.2272, 0.1441]) -Greedy action tensor([ 0.9553, -0.7029, -0.0376, -0.8224]) tensor([0.5780, 0.1101, 0.2142, 0.0977]) -Greedy action tensor([ 0.5194, -0.1715, -0.1120, -0.5761]) tensor([0.4224, 0.2117, 0.2247, 0.1412]) -Greedy action tensor([ 0.9537, -0.8650, -0.2035, -0.3845]) tensor([0.5751, 0.0933, 0.1808, 0.1508]) -Greedy action tensor([ 0.8966, -0.8316, 0.0580, -0.4334]) tensor([0.5335, 0.0948, 0.2306, 0.1411]) -Greedy action tensor([ 0.5919, -0.2373, -0.0494, -0.3285]) tensor([0.4235, 0.1848, 0.2230, 0.1687]) -Greedy action tensor([ 0.5917, -0.3939, -0.0707, -0.2873]) tensor([0.4340, 0.1620, 0.2238, 0.1802]) -Greedy action tensor([ 0.0037, 0.0983, 0.0199, -0.1418]) tensor([0.2512, 0.2762, 0.2554, 0.2172]) -Greedy action tensor([ 0.6273, -0.1425, 0.1829, -0.3797]) tensor([0.4049, 0.1875, 0.2596, 0.1479]) -Greedy action tensor([ 1.0194, -0.9800, -0.0509, -0.3650]) tensor([0.5784, 0.0783, 0.1984, 0.1449]) -Greedy action tensor([ 0.6799, -0.3505, -0.1013, -0.3898]) tensor([0.4634, 0.1654, 0.2122, 0.1590]) -Greedy action tensor([ 0.2917, 0.1730, 0.0760, -0.0346]) tensor([0.2928, 0.2600, 0.2360, 0.2113]) -Greedy action tensor([ 0.2897, 0.1574, -0.0117, -0.3725]) tensor([0.3193, 0.2798, 0.2362, 0.1647]) -Greedy action tensor([ 0.7711, -0.5466, 0.0173, -0.5561]) tensor([0.4991, 0.1336, 0.2349, 0.1324]) -Greedy action tensor([ 0.7716, -0.4471, -0.0438, -0.2008]) tensor([0.4725, 0.1397, 0.2091, 0.1787]) -Greedy action tensor([ 0.5878, -0.3940, -0.1316, -0.3194]) tensor([0.4414, 0.1654, 0.2150, 0.1782]) -Greedy action tensor([ 0.6265, -0.3548, 0.0324, -0.4150]) tensor([0.4386, 0.1644, 0.2421, 0.1548]) -Greedy action tensor([ 0.3836, -0.2019, -0.0093, -0.2758]) tensor([0.3638, 0.2026, 0.2456, 0.1881]) -Greedy action tensor([ 0.3651, 0.0134, -0.1131, -0.1895]) tensor([0.3451, 0.2428, 0.2139, 0.1982]) -Greedy action tensor([ 0.6423, -0.4305, -0.0138, -0.3811]) tensor([0.4504, 0.1541, 0.2337, 0.1619]) -Greedy action tensor([ 0.6176, -0.2444, -0.0234, -0.1993]) tensor([0.4183, 0.1766, 0.2203, 0.1848]) -Greedy action tensor([ 0.7129, -0.3104, -0.0332, -0.4528]) tensor([0.4661, 0.1675, 0.2210, 0.1453]) -Greedy action tensor([ 0.7033, -0.4032, 0.0664, -0.3405]) tensor([0.4521, 0.1495, 0.2391, 0.1592]) -Greedy action tensor([ 0.7143, -0.2976, -0.0716, -0.3473]) tensor([0.4619, 0.1679, 0.2105, 0.1598]) -Greedy action tensor([ 1.2264, -1.2800, 0.0716, -0.5244]) tensor([0.6368, 0.0519, 0.2007, 0.1106]) -Greedy action tensor([ 0.5892, 0.2182, -0.1902, -0.0022]) tensor([0.3701, 0.2554, 0.1697, 0.2048]) -Greedy action tensor([ 0.3706, 0.1074, -0.1155, -0.2095]) tensor([0.3397, 0.2611, 0.2089, 0.1902]) -Greedy action tensor([ 0.5985, -0.2701, -0.0165, -0.2535]) tensor([0.4190, 0.1758, 0.2265, 0.1787]) -Greedy action tensor([ 0.5330, 0.0387, -0.0835, -0.2467]) tensor([0.3834, 0.2339, 0.2070, 0.1758]) -Greedy action tensor([ 1.0545, -0.6609, -0.0497, 0.3040]) tensor([0.5041, 0.0907, 0.1671, 0.2380]) -Greedy action tensor([ 1.3203, -0.5593, 0.0264, -0.2730]) tensor([0.6135, 0.0936, 0.1682, 0.1247]) -Greedy action tensor([ 1.7582, -0.5187, -0.4632, 0.4149]) tensor([0.6793, 0.0697, 0.0737, 0.1773]) -Greedy action tensor([ 1.0569, -0.8490, -0.1876, 0.5242]) tensor([0.4941, 0.0735, 0.1423, 0.2901]) -Greedy action tensor([ 0.9189, -0.3052, -0.1819, 0.0624]) tensor([0.4875, 0.1433, 0.1621, 0.2070]) -Greedy action tensor([ 1.0377, -0.2212, -0.1303, 0.0793]) tensor([0.5054, 0.1435, 0.1572, 0.1938]) -Greedy action tensor([ 0.7849, -0.6543, -0.4641, 0.5188]) tensor([0.4366, 0.1035, 0.1252, 0.3346]) -Greedy action tensor([ 1.1829, -0.0611, -0.1224, 0.1418]) tensor([0.5229, 0.1507, 0.1418, 0.1846]) -Greedy action tensor([ 1.3821, -0.3970, -0.3239, -0.0157]) tensor([0.6260, 0.1057, 0.1137, 0.1547]) -Greedy action tensor([ 0.8221, -0.0664, -0.3422, 0.1655]) tensor([0.4460, 0.1834, 0.1392, 0.2313]) -Greedy action tensor([ 1.2221, -0.4353, -0.1875, 0.2607]) tensor([0.5503, 0.1049, 0.1344, 0.2104]) -Greedy action tensor([ 1.7059, -0.6988, -0.0955, 0.3508]) tensor([0.6608, 0.0597, 0.1091, 0.1704]) -Greedy action tensor([ 0.6928, -0.3087, -0.1588, 0.2404]) tensor([0.4115, 0.1511, 0.1756, 0.2618]) -Greedy action tensor([ 0.6577, -0.3313, -0.0319, 0.1108]) tensor([0.4078, 0.1517, 0.2046, 0.2360]) -Greedy action tensor([ 0.8055, -0.3692, -0.4013, 0.7837]) tensor([0.3866, 0.1194, 0.1157, 0.3783]) -Greedy action tensor([ 0.7430, -0.2503, -0.5564, 0.4660]) tensor([0.4165, 0.1542, 0.1136, 0.3157]) -Greedy action tensor([ 1.0514, -0.2777, -0.6306, 0.5772]) tensor([0.4824, 0.1277, 0.0897, 0.3002]) -Greedy action tensor([ 1.1366, -0.4775, -0.2876, 0.3937]) tensor([0.5221, 0.1039, 0.1257, 0.2484]) -Greedy action tensor([ 0.5945, -0.2080, -0.0222, -0.1598]) tensor([0.4068, 0.1823, 0.2196, 0.1913]) -Greedy action tensor([ 0.6539, -0.3325, -0.1635, 0.0430]) tensor([0.4242, 0.1582, 0.1873, 0.2303]) -Greedy action tensor([ 1.1660e+00, -3.7769e-01, -2.1568e-01, -1.0280e-03]) tensor([0.5630, 0.1203, 0.1414, 0.1753]) -Greedy action tensor([ 0.8638, -0.1785, -0.2889, -0.2326]) tensor([0.4994, 0.1761, 0.1577, 0.1668]) -Greedy action tensor([ 1.3431, -0.3468, -0.4307, -0.2093]) tensor([0.6386, 0.1178, 0.1084, 0.1352]) -Greedy action tensor([ 0.6073, -0.5096, -0.3552, 0.3988]) tensor([0.3967, 0.1298, 0.1515, 0.3220]) -Greedy action tensor([ 0.9697, -0.5643, -0.1279, 0.3571]) tensor([0.4782, 0.1031, 0.1596, 0.2591]) -Greedy action tensor([ 1.5332, -0.1859, -0.4333, 0.2731]) tensor([0.6239, 0.1118, 0.0873, 0.1770]) -Greedy action tensor([ 1.0886, -0.4976, -0.0802, 0.4397]) tensor([0.4907, 0.1004, 0.1525, 0.2564]) -Greedy action tensor([ 1.1779, -0.6830, -0.5908, 0.7321]) tensor([0.5085, 0.0791, 0.0867, 0.3256]) -Greedy action tensor([ 1.4759, -0.5230, -0.3474, 0.1281]) tensor([0.6423, 0.0870, 0.1037, 0.1669]) -Greedy action tensor([ 1.7188, -0.8657, -0.1143, 0.3552]) tensor([0.6706, 0.0506, 0.1072, 0.1715]) -Greedy action tensor([ 1.1161, -0.2365, -0.1133, 0.0505]) tensor([0.5275, 0.1364, 0.1543, 0.1818]) -Greedy action tensor([ 1.1568, -0.2850, -0.1947, -0.0049]) tensor([0.5530, 0.1308, 0.1431, 0.1731]) -Greedy action tensor([ 1.1387, -0.2338, -0.1140, 0.0781]) tensor([0.5304, 0.1344, 0.1515, 0.1836]) -Greedy action tensor([ 0.8761, -0.2741, -0.1669, -0.0953]) tensor([0.4884, 0.1546, 0.1721, 0.1849]) -Greedy action tensor([ 1.0472, -0.3168, -0.1541, 0.0494]) tensor([0.5194, 0.1328, 0.1563, 0.1915]) -Greedy action tensor([ 1.0603, -0.3157, -0.2032, -0.1133]) tensor([0.5422, 0.1369, 0.1532, 0.1677]) -Greedy action tensor([ 0.8114, -0.5398, 0.0714, -0.1290]) tensor([0.4702, 0.1218, 0.2244, 0.1836]) -Greedy action tensor([ 1.6425, -0.9326, -0.3505, 0.3633]) tensor([0.6708, 0.0511, 0.0914, 0.1867]) -Greedy action tensor([ 1.1236, -0.6860, -0.3049, 0.1629]) tensor([0.5599, 0.0917, 0.1342, 0.2142]) -Greedy action tensor([ 1.6351, -0.8186, -0.2089, 0.2885]) tensor([0.6648, 0.0572, 0.1052, 0.1729]) -Greedy action tensor([ 0.6509, -0.3349, 0.0304, -0.1313]) tensor([0.4223, 0.1576, 0.2270, 0.1931]) -Greedy action tensor([ 1.1974, -1.0803, -0.6190, 0.5554]) tensor([0.5582, 0.0572, 0.0908, 0.2938]) -Greedy action tensor([ 0.1745, -0.0563, 0.0606, 0.2426]) tensor([0.2662, 0.2113, 0.2375, 0.2849]) -Greedy action tensor([ 0.4561, -0.2534, -0.1187, 0.1465]) tensor([0.3586, 0.1764, 0.2018, 0.2631]) -Greedy action tensor([ 1.4336, -0.9813, -0.2132, 0.2419]) tensor([0.6306, 0.0564, 0.1215, 0.1915]) -Greedy action tensor([ 1.2771, -0.5686, -0.1444, 0.2085]) tensor([0.5738, 0.0906, 0.1385, 0.1971]) -Greedy action tensor([ 1.0352, -0.2032, -0.5020, 0.5566]) tensor([0.4707, 0.1364, 0.1012, 0.2917]) -Greedy action tensor([ 1.4953, -0.5041, -0.0367, 0.4475]) tensor([0.5875, 0.0796, 0.1269, 0.2060]) -Greedy action tensor([ 1.3557, -0.5125, 0.0681, -0.0417]) tensor([0.5961, 0.0920, 0.1645, 0.1474]) -Greedy action tensor([ 0.9393, -0.3160, 0.0026, -0.0496]) tensor([0.4881, 0.1391, 0.1913, 0.1816]) -Greedy action tensor([ 1.0529, -0.6534, -0.0905, 0.1585]) tensor([0.5238, 0.0951, 0.1670, 0.2141]) -Greedy action tensor([ 0.8601, -0.4983, 0.1347, -0.0336]) tensor([0.4650, 0.1196, 0.2251, 0.1903]) -Greedy action tensor([ 0.8532, -0.3842, 0.0010, 0.0788]) tensor([0.4592, 0.1332, 0.1958, 0.2117]) -Greedy action tensor([ 2.1597, -1.0723, -0.4148, 0.6058]) tensor([0.7535, 0.0297, 0.0574, 0.1593]) -Greedy action tensor([ 1.7759, -0.7126, -0.4563, 0.2159]) tensor([0.7141, 0.0593, 0.0766, 0.1500]) -Greedy action tensor([ 0.9034, -0.4875, -0.2941, 0.2978]) tensor([0.4770, 0.1187, 0.1440, 0.2603]) -Greedy action tensor([ 1.3666, -0.3617, -0.2385, 0.3104]) tensor([0.5793, 0.1029, 0.1164, 0.2015]) -Greedy action tensor([ 1.5267, -0.3054, -0.6763, 0.2777]) tensor([0.6421, 0.1028, 0.0709, 0.1841]) -Greedy action tensor([ 1.4705, -0.4442, -0.3553, 0.2809]) tensor([0.6200, 0.0914, 0.0999, 0.1887]) -Greedy action tensor([ 0.9912, -0.3901, -0.0825, -0.0244]) tensor([0.5115, 0.1285, 0.1748, 0.1852]) -Greedy action tensor([ 1.1096, -0.5529, -0.5732, 0.5317]) tensor([0.5164, 0.0979, 0.0960, 0.2897]) -Greedy action tensor([ 1.1176, -0.3858, -0.2783, 0.3557]) tensor([0.5163, 0.1148, 0.1279, 0.2410]) -Greedy action tensor([ 1.4313, -0.7002, -0.2130, 0.3087]) tensor([0.6108, 0.0725, 0.1180, 0.1988]) -Greedy action tensor([ 0.3623, -0.2616, -0.5230, 0.5765]) tensor([0.3138, 0.1681, 0.1294, 0.3887]) -Greedy action tensor([ 1.0986, -0.6686, -0.0705, 0.1807]) tensor([0.5317, 0.0908, 0.1652, 0.2123]) -Greedy action tensor([ 0.5700, -0.2519, -0.2193, -0.1185]) tensor([0.4173, 0.1835, 0.1895, 0.2096]) -Greedy action tensor([ 1.1239, -0.4539, -0.1553, 0.3738]) tensor([0.5110, 0.1055, 0.1422, 0.2413]) -Greedy action tensor([ 1.5491, -0.2935, -0.3237, 0.3196]) tensor([0.6232, 0.0987, 0.0958, 0.1823]) -Greedy action tensor([ 0.8722, -0.0090, 0.2184, -0.2354]) tensor([0.4416, 0.1829, 0.2296, 0.1459]) -Greedy action tensor([ 0.9143, -0.3770, -0.2057, -0.0498]) tensor([0.5044, 0.1387, 0.1646, 0.1923]) -Greedy action tensor([ 1.6052, -0.5162, -0.2385, -0.0520]) tensor([0.6808, 0.0816, 0.1077, 0.1298]) -Greedy action tensor([ 1.2517e+00, -6.6167e-02, 1.1673e-03, 5.6713e-02]) tensor([0.5386, 0.1442, 0.1542, 0.1630]) -Greedy action tensor([ 1.3746, -0.3742, -0.1643, 0.3697]) tensor([0.5699, 0.0992, 0.1223, 0.2086]) -Greedy action tensor([ 0.9808, -0.1245, -0.4908, 0.0185]) tensor([0.5148, 0.1704, 0.1182, 0.1966]) -Greedy action tensor([ 1.1887, -0.2062, -0.4057, -0.1460]) tensor([0.5834, 0.1446, 0.1184, 0.1536]) -Greedy action tensor([ 1.1356, -0.0108, -0.0783, 0.0100]) tensor([0.5156, 0.1639, 0.1532, 0.1673]) -Greedy action tensor([ 1.2227, -0.5364, -0.0045, -0.1488]) tensor([0.5817, 0.1002, 0.1705, 0.1476]) -Greedy action tensor([ 0.8691, -0.3348, -0.1795, 0.2165]) tensor([0.4606, 0.1382, 0.1614, 0.2398]) -Greedy action tensor([ 0.7316, -0.2436, -0.0688, 0.1640]) tensor([0.4179, 0.1576, 0.1877, 0.2369]) -Greedy action tensor([ 1.0911, -0.5102, -0.2165, 0.0641]) tensor([0.5464, 0.1102, 0.1478, 0.1957]) -Greedy action tensor([ 0.8898, -0.4930, -0.0861, 0.0128]) tensor([0.4893, 0.1227, 0.1844, 0.2036]) -Greedy action tensor([-0.4559, -0.4533, 1.4288, -0.0475]) tensor([0.0991, 0.0993, 0.6525, 0.1491]) -Greedy action tensor([ 0.5250, 0.7173, -0.2535, 0.0403]) tensor([0.3042, 0.3687, 0.1397, 0.1874]) -Greedy action tensor([ 1.4171, -1.0597, -0.1687, 1.0319]) tensor([0.5078, 0.0427, 0.1040, 0.3455]) -Greedy action tensor([-0.2103, 0.4739, -0.5632, 0.9709]) tensor([0.1440, 0.2855, 0.1012, 0.4693]) -Greedy action tensor([-0.6264, -0.7826, 1.3763, 0.1745]) tensor([0.0870, 0.0744, 0.6447, 0.1938]) -Greedy action tensor([1.7392, 0.1810, 0.5079, 1.8162]) tensor([0.3872, 0.0815, 0.1130, 0.4182]) -Greedy action tensor([ 0.2596, -0.6509, -0.7059, 0.6486]) tensor([0.3069, 0.1235, 0.1169, 0.4528]) -Greedy action tensor([-0.7610, 0.0380, 1.4338, -0.3140]) tensor([0.0726, 0.1615, 0.6522, 0.1136]) -Greedy action tensor([ 0.2866, -0.8100, -0.2631, -0.3143]) tensor([0.4066, 0.1358, 0.2347, 0.2230]) -Greedy action tensor([ 2.5599, -0.8929, 0.9253, 1.1965]) tensor([0.6746, 0.0214, 0.1316, 0.1725]) -Greedy action tensor([-1.1881, -0.9060, 1.4048, -0.4240]) tensor([0.0560, 0.0743, 0.7493, 0.1203]) -Greedy action tensor([ 1.3628, -0.0639, 0.1395, 1.9163]) tensor([0.3055, 0.0733, 0.0899, 0.5313]) -Greedy action tensor([1.3960, 1.6566, 0.1429, 0.3507]) tensor([0.3407, 0.4422, 0.0973, 0.1198]) -Greedy action tensor([ 1.3880, -0.7281, 0.6757, 1.7097]) tensor([0.3344, 0.0403, 0.1640, 0.4613]) -Greedy action tensor([ 1.3702, -0.8231, -0.4791, 0.8429]) tensor([0.5379, 0.0600, 0.0846, 0.3175]) -Greedy action tensor([-1.0938, 0.0041, -0.6237, -0.1575]) tensor([0.1227, 0.3679, 0.1964, 0.3130]) -Greedy action tensor([ 0.3763, -0.7991, 0.7295, 0.0882]) tensor([0.2872, 0.0887, 0.4088, 0.2153]) -Greedy action tensor([-0.1597, 0.7063, 1.5734, -0.3857]) tensor([0.1017, 0.2418, 0.5754, 0.0811]) -Greedy action tensor([ 1.3875, -0.8297, 2.5105, 0.4267]) tensor([0.2190, 0.0239, 0.6733, 0.0838]) -Greedy action tensor([ 0.4096, -0.8883, 1.2109, -0.2683]) tensor([0.2494, 0.0681, 0.5558, 0.1266]) -Greedy action tensor([ 1.0955, -1.5750, -0.8839, 1.3471]) tensor([0.4011, 0.0278, 0.0554, 0.5158]) -Greedy action tensor([-0.3720, 0.4609, -0.0178, -0.6108]) tensor([0.1814, 0.4172, 0.2585, 0.1429]) -Greedy action tensor([-0.9231, -0.9447, -0.8873, -0.1986]) tensor([0.1969, 0.1927, 0.2041, 0.4063]) -Greedy action tensor([ 0.4292, -1.7255, 1.1647, 0.3295]) tensor([0.2434, 0.0282, 0.5080, 0.2204]) -Greedy action tensor([ 0.5011, -0.1512, 1.9162, 0.8947]) tensor([0.1404, 0.0732, 0.5782, 0.2082]) -Greedy action tensor([ 1.7724, -1.4225, 0.7195, 0.8930]) tensor([0.5540, 0.0227, 0.1933, 0.2300]) -Greedy action tensor([ 1.3402, -1.3190, 1.1822, 1.0689]) tensor([0.3723, 0.0261, 0.3179, 0.2838]) -Greedy action tensor([ 1.5964, -1.1056, 0.9043, 1.3674]) tensor([0.4232, 0.0284, 0.2118, 0.3366]) -Greedy action tensor([ 0.5049, 0.1619, -0.6172, 0.1980]) tensor([0.3609, 0.2561, 0.1175, 0.2655]) -Greedy action tensor([-0.4618, -1.8720, 0.6835, -0.0549]) tensor([0.1698, 0.0414, 0.5337, 0.2550]) -Greedy action tensor([-0.1269, -0.8860, 0.0646, -0.0921]) tensor([0.2692, 0.1260, 0.3260, 0.2787]) -Greedy action tensor([ 0.3397, 0.6677, 0.0980, -0.7709]) tensor([0.2855, 0.3963, 0.2242, 0.0940]) -Greedy action tensor([ 0.2522, -2.0254, -0.3866, 0.7666]) tensor([0.3027, 0.0310, 0.1598, 0.5064]) -Greedy action tensor([0.9764, 0.3241, 1.0062, 0.3437]) tensor([0.3244, 0.1690, 0.3343, 0.1723]) -Greedy action tensor([-0.0203, -1.4557, 1.0102, -0.7095]) tensor([0.2201, 0.0524, 0.6169, 0.1105]) -Greedy action tensor([ 0.1466, -0.0644, 0.7113, 0.6614]) tensor([0.1908, 0.1545, 0.3355, 0.3192]) -Greedy action tensor([0.4648, 0.3691, 0.5983, 1.1858]) tensor([0.1958, 0.1779, 0.2237, 0.4026]) -Greedy action tensor([ 1.4066, -1.1530, -0.2089, 0.9354]) tensor([0.5262, 0.0407, 0.1046, 0.3285]) -Greedy action tensor([-0.8111, 0.9575, 0.8908, -0.5096]) tensor([0.0730, 0.4280, 0.4004, 0.0987]) -Greedy action tensor([-0.3583, 0.2477, -0.2121, -0.3570]) tensor([0.2003, 0.3672, 0.2319, 0.2006]) -Greedy action tensor([ 0.0947, 0.8244, 0.0130, -0.5273]) tensor([0.2206, 0.4576, 0.2033, 0.1184]) -Greedy action tensor([ 0.3174, -0.6910, -0.0958, 0.1089]) tensor([0.3523, 0.1285, 0.2331, 0.2860]) -Greedy action tensor([0.1552, 0.1705, 0.0549, 0.7655]) tensor([0.2100, 0.2133, 0.1900, 0.3867]) -Greedy action tensor([-1.6273, -0.2982, -0.1624, 0.4489]) tensor([0.0586, 0.2212, 0.2534, 0.4669]) -Greedy action tensor([ 0.6499, 0.3427, -0.3126, 0.4275]) tensor([0.3427, 0.2521, 0.1309, 0.2744]) -Greedy action tensor([ 0.8583, -1.7535, -0.4508, 0.3543]) tensor([0.5135, 0.0377, 0.1387, 0.3102]) -Greedy action tensor([-0.5126, -0.7089, 0.0568, 0.2265]) tensor([0.1760, 0.1446, 0.3110, 0.3685]) -Greedy action tensor([ 1.3261, -1.9440, -0.0665, 0.0706]) tensor([0.6364, 0.0242, 0.1581, 0.1813]) -Greedy action tensor([ 0.3541, -0.3240, 1.3671, 1.5404]) tensor([0.1327, 0.0674, 0.3654, 0.4345]) -Greedy action tensor([-0.2250, -1.8004, 0.1861, 1.0643]) tensor([0.1576, 0.0326, 0.2377, 0.5721]) -Greedy action tensor([-0.3959, 0.6310, 1.1520, -0.9818]) tensor([0.1105, 0.3085, 0.5195, 0.0615]) -Greedy action tensor([0.5443, 0.0454, 0.1120, 0.5362]) tensor([0.3079, 0.1869, 0.1998, 0.3054]) -Greedy action tensor([-0.7901, 0.5826, -0.1963, -0.2225]) tensor([0.1174, 0.4631, 0.2125, 0.2070]) -Greedy action tensor([ 1.2555, -1.0784, 1.5765, 1.0188]) tensor([0.3063, 0.0297, 0.4223, 0.2418]) -Greedy action tensor([1.3186, 0.4349, 0.1822, 1.8464]) tensor([0.2916, 0.1205, 0.0936, 0.4943]) -Greedy action tensor([ 0.1605, -1.1934, 1.5622, -0.6945]) tensor([0.1740, 0.0449, 0.7070, 0.0740]) -Greedy action tensor([ 1.6459, -0.4431, 2.0418, -0.5568]) tensor([0.3676, 0.0455, 0.5462, 0.0406]) -Greedy action tensor([ 0.0241, 0.0914, -1.1496, 0.7687]) tensor([0.2230, 0.2385, 0.0690, 0.4695]) -Greedy action tensor([ 0.5316, -0.3374, 0.7878, 0.6415]) tensor([0.2613, 0.1096, 0.3376, 0.2916]) -Greedy action tensor([0.1778, 0.5626, 0.7253, 0.7802]) tensor([0.1660, 0.2439, 0.2870, 0.3032]) -Greedy action tensor([ 0.8818, -1.6798, 0.3559, -0.5323]) tensor([0.5232, 0.0404, 0.3092, 0.1272]) -Greedy action tensor([ 1.0674, -0.3301, 0.9211, 1.3299]) tensor([0.2931, 0.0725, 0.2532, 0.3811]) -Greedy action tensor([-0.0308, -0.8363, 0.7120, 1.1734]) tensor([0.1453, 0.0649, 0.3054, 0.4844]) -Greedy action tensor([ 0.4657, 0.3755, -0.2358, 0.2883]) tensor([0.3080, 0.2814, 0.1527, 0.2579]) -Greedy action tensor([ 0.2764, -1.1556, 0.2798, 0.4302]) tensor([0.2934, 0.0701, 0.2944, 0.3422]) -Greedy action tensor([ 0.8998, -0.3104, 0.3029, 1.7113]) tensor([0.2439, 0.0727, 0.1343, 0.5491]) -Greedy action tensor([ 0.8227, -0.2747, 0.7756, 0.3873]) tensor([0.3408, 0.1137, 0.3251, 0.2205]) -Greedy action tensor([ 0.7479, -0.4442, 0.7722, 0.3577]) tensor([0.3328, 0.1010, 0.3409, 0.2252]) -Greedy action tensor([ 1.1547e+00, -5.8436e-01, 1.6864e+00, -1.3404e-03]) tensor([0.3133, 0.0550, 0.5331, 0.0986]) -Greedy action tensor([ 0.7858, 0.0964, -0.1800, 0.1016]) tensor([0.4189, 0.2103, 0.1595, 0.2113]) -Greedy action tensor([ 1.6024, -1.1867, 1.3107, 0.7330]) tensor([0.4489, 0.0276, 0.3353, 0.1882]) -Greedy action tensor([-1.1154, -1.7405, 1.0813, 0.0653]) tensor([0.0725, 0.0388, 0.6524, 0.2362]) -Greedy action tensor([-0.9454, -1.8691, -0.8893, 0.3565]) tensor([0.1631, 0.0648, 0.1725, 0.5996]) -Greedy action tensor([ 0.4856, -0.9952, -0.3718, 1.0568]) tensor([0.2922, 0.0665, 0.1240, 0.5173]) -Greedy action tensor([-0.5170, -2.2062, 1.1707, 0.2256]) tensor([0.1150, 0.0212, 0.6220, 0.2417]) -Greedy action tensor([1.1590, 0.7928, 0.6343, 0.7015]) tensor([0.3427, 0.2376, 0.2028, 0.2169]) -Greedy action tensor([ 1.3250, -0.1106, 0.2417, 1.5505]) tensor([0.3534, 0.0841, 0.1196, 0.4428]) -Greedy action tensor([ 0.8790, -0.8654, 0.8939, 1.0639]) tensor([0.2947, 0.0515, 0.2992, 0.3546]) -Greedy action tensor([0.4117, 0.5649, 0.7724, 0.7304]) tensor([0.2010, 0.2343, 0.2883, 0.2764]) -Greedy action tensor([-0.5111, -0.6544, -0.5485, 0.9108]) tensor([0.1434, 0.1242, 0.1381, 0.5943]) -Greedy action tensor([0.3722, 1.1744, 1.2087, 0.1503]) tensor([0.1577, 0.3518, 0.3641, 0.1263]) -Greedy action tensor([-0.0245, -1.9594, -0.2962, 0.5792]) tensor([0.2677, 0.0387, 0.2040, 0.4896]) -Greedy action tensor([-0.7419, -0.6624, 0.9423, 1.2812]) tensor([0.0665, 0.0720, 0.3584, 0.5030]) -Greedy action tensor([-0.9606, 0.7141, 0.1086, -0.0682]) tensor([0.0855, 0.4565, 0.2492, 0.2088]) -Greedy action tensor([-1.0696, -0.4388, 0.5483, 0.9593]) tensor([0.0644, 0.1210, 0.3247, 0.4898]) -Greedy action tensor([-0.8140, -0.5139, 0.5684, 1.0707]) tensor([0.0774, 0.1045, 0.3084, 0.5097]) -Greedy action tensor([-1.1169, -0.5909, 0.2428, 0.4280]) tensor([0.0887, 0.1501, 0.3455, 0.4157]) -Greedy action tensor([-1.6239, -0.4981, 0.4965, 0.0657]) tensor([0.0561, 0.1729, 0.4673, 0.3037]) -Greedy action tensor([-1.8589, -0.5833, 0.4418, -0.0628]) tensor([0.0486, 0.1739, 0.4848, 0.2927]) -Greedy action tensor([-1.9132, -0.7609, 1.4561, 0.8051]) tensor([0.0207, 0.0654, 0.6007, 0.3132]) -Greedy action tensor([-1.0449, -0.5314, 0.3084, 0.1912]) tensor([0.1002, 0.1674, 0.3877, 0.3448]) -Greedy action tensor([-1.9518, -0.4661, 1.1235, 0.5444]) tensor([0.0255, 0.1127, 0.5523, 0.3095]) -Greedy action tensor([-1.4202, -0.5117, 0.4176, 0.0494]) tensor([0.0709, 0.1758, 0.4452, 0.3081]) -Greedy action tensor([-1.7745, -0.4061, 0.6230, -0.0026]) tensor([0.0459, 0.1802, 0.5042, 0.2697]) -Greedy action tensor([-0.9408, 0.6143, 0.1722, 0.0044]) tensor([0.0881, 0.4171, 0.2681, 0.2267]) -Greedy action tensor([-1.8998, -0.5146, 1.2777, 0.6928]) tensor([0.0236, 0.0944, 0.5664, 0.3156]) -Greedy action tensor([-1.1751, 0.3740, -0.0222, 0.3893]) tensor([0.0732, 0.3447, 0.2320, 0.3501]) -Greedy action tensor([-0.9802, -0.6125, 0.2029, 0.2861]) tensor([0.1080, 0.1560, 0.3527, 0.3833]) -Greedy action tensor([-1.3324, -0.5850, 0.3987, 0.3038]) tensor([0.0720, 0.1520, 0.4064, 0.3696]) -Greedy action tensor([-0.5323, -0.1382, -0.0467, -0.0112]) tensor([0.1726, 0.2560, 0.2806, 0.2907]) -Greedy action tensor([-0.6380, -0.5518, 0.1580, 0.3095]) tensor([0.1452, 0.1583, 0.3219, 0.3746]) -Greedy action tensor([-1.9508, -0.4666, 0.8453, 0.1348]) tensor([0.0335, 0.1478, 0.5489, 0.2697]) -Greedy action tensor([-0.5707, -0.5841, 0.3088, 0.1572]) tensor([0.1546, 0.1526, 0.3726, 0.3202]) -Greedy action tensor([-1.7881, -0.1480, 0.5408, -0.0512]) tensor([0.0452, 0.2333, 0.4645, 0.2570]) -Greedy action tensor([-1.5680, -0.1629, 0.4157, 0.0948]) tensor([0.0568, 0.2313, 0.4126, 0.2993]) -Greedy action tensor([-1.8584, -0.3359, 0.6133, -0.1127]) tensor([0.0432, 0.1979, 0.5114, 0.2475]) -Greedy action tensor([-0.5694, 1.0886, 0.3651, 0.7026]) tensor([0.0809, 0.4246, 0.2059, 0.2886]) -Greedy action tensor([-1.3602, -0.4996, 0.4438, 0.4195]) tensor([0.0651, 0.1539, 0.3953, 0.3858]) -Greedy action tensor([-1.2601, -0.4284, 0.5079, -0.2869]) tensor([0.0847, 0.1946, 0.4964, 0.2242]) -Greedy action tensor([-0.7496, -0.1197, 0.3458, -0.4119]) tensor([0.1376, 0.2583, 0.4114, 0.1928]) -Greedy action tensor([-0.6675, -0.6023, 0.1907, 0.1938]) tensor([0.1472, 0.1571, 0.3473, 0.3484]) -Greedy action tensor([-1.5378, -0.5163, 0.5129, -0.1319]) tensor([0.0640, 0.1777, 0.4973, 0.2610]) -Greedy action tensor([-0.4630, -0.3706, 1.1663, 1.6645]) tensor([0.0641, 0.0703, 0.3271, 0.5384]) -Greedy action tensor([-1.9420, -0.6748, 0.7250, 0.1276]) tensor([0.0372, 0.1322, 0.5358, 0.2948]) -Greedy action tensor([-1.1184, -0.4300, 0.4414, 0.8947]) tensor([0.0656, 0.1307, 0.3123, 0.4914]) -Greedy action tensor([-1.9786, -0.4978, 1.0032, 0.3196]) tensor([0.0285, 0.1253, 0.5623, 0.2838]) -Greedy action tensor([-1.8964, -0.2530, 0.6058, -0.1484]) tensor([0.0414, 0.2144, 0.5061, 0.2381]) -Greedy action tensor([-1.0803, -0.3598, 0.4876, 0.7115]) tensor([0.0722, 0.1484, 0.3463, 0.4331]) -Greedy action tensor([-1.6291, -0.6938, 0.6147, 0.0102]) tensor([0.0552, 0.1406, 0.5201, 0.2842]) -Greedy action tensor([-1.9374, -0.4512, 0.6606, -0.1759]) tensor([0.0405, 0.1791, 0.5445, 0.2359]) -Greedy action tensor([-1.8772, -0.4647, 0.6264, -0.1341]) tensor([0.0434, 0.1782, 0.5305, 0.2480]) -Greedy action tensor([-1.6258, -0.2490, 0.5711, 0.2650]) tensor([0.0486, 0.1925, 0.4371, 0.3218]) -Greedy action tensor([-1.8024, -0.3529, 0.5796, -0.1054]) tensor([0.0464, 0.1978, 0.5025, 0.2533]) -Greedy action tensor([-1.6390, -0.6008, 0.5122, 0.0674]) tensor([0.0558, 0.1575, 0.4794, 0.3073]) -Greedy action tensor([-1.9759, -0.7981, 0.9397, 0.2620]) tensor([0.0312, 0.1012, 0.5754, 0.2922]) -Greedy action tensor([-0.8152, 0.1803, 0.4266, 0.9719]) tensor([0.0761, 0.2059, 0.2635, 0.4545]) -Greedy action tensor([-1.4504, -0.5985, 0.4121, 0.1400]) tensor([0.0681, 0.1596, 0.4384, 0.3340]) -Greedy action tensor([-1.0751, -0.6197, 0.2320, 0.3093]) tensor([0.0974, 0.1536, 0.3600, 0.3889]) -Greedy action tensor([-1.5259, -0.3552, 0.6496, 0.4269]) tensor([0.0498, 0.1606, 0.4386, 0.3510]) -Greedy action tensor([-1.1280, -0.5730, 0.3989, 0.6196]) tensor([0.0764, 0.1331, 0.3518, 0.4387]) -Greedy action tensor([0.1203, 0.2160, 0.9267, 1.7137]) tensor([0.1080, 0.1188, 0.2419, 0.5313]) -Greedy action tensor([-1.1386, -0.1884, 1.2572, 1.2158]) tensor([0.0398, 0.1031, 0.4374, 0.4197]) -Greedy action tensor([-1.3837, -0.4200, 1.0824, 1.1163]) tensor([0.0363, 0.0950, 0.4270, 0.4417]) -Greedy action tensor([-1.2346, 0.0666, 0.4718, -0.5544]) tensor([0.0823, 0.3022, 0.4532, 0.1624]) -Greedy action tensor([-1.5857, -0.5105, 0.5236, 0.1483]) tensor([0.0561, 0.1643, 0.4621, 0.3175]) -Greedy action tensor([-1.6957, -0.5201, 0.5454, -0.0702]) tensor([0.0534, 0.1730, 0.5022, 0.2714]) -Greedy action tensor([-0.5131, -0.2170, 1.1474, 1.5356]) tensor([0.0651, 0.0875, 0.3425, 0.5049]) -Greedy action tensor([-1.0613, -0.5561, 0.2259, 0.5264]) tensor([0.0895, 0.1483, 0.3242, 0.4379]) -Greedy action tensor([-1.7806, -0.4623, 0.5874, -0.0798]) tensor([0.0479, 0.1789, 0.5110, 0.2622]) -Greedy action tensor([-1.7320, -0.5014, 0.6472, 0.0623]) tensor([0.0471, 0.1612, 0.5084, 0.2833]) -Greedy action tensor([-1.9141, -0.4683, 0.6558, -0.1623]) tensor([0.0415, 0.1763, 0.5427, 0.2395]) -Greedy action tensor([-1.6316, -0.5248, 0.5707, 0.0818]) tensor([0.0537, 0.1624, 0.4859, 0.2980]) -Greedy action tensor([-1.7637, -0.5162, 0.5635, -0.0866]) tensor([0.0498, 0.1734, 0.5104, 0.2664]) -Greedy action tensor([-0.5144, -0.4804, 0.0631, -0.0650]) tensor([0.1858, 0.1922, 0.3309, 0.2911]) -Greedy action tensor([-1.5120, -0.5691, 0.6893, 0.1790]) tensor([0.0555, 0.1424, 0.5012, 0.3009]) -Greedy action tensor([-1.7999, -0.6284, 1.4381, 0.9151]) tensor([0.0223, 0.0720, 0.5686, 0.3371]) -Greedy action tensor([-1.8616, -0.2941, 0.6015, -0.1245]) tensor([0.0431, 0.2065, 0.5057, 0.2447]) -Greedy action tensor([-1.4021, -0.4773, 1.0058, 1.1494]) tensor([0.0364, 0.0918, 0.4046, 0.4671]) -Greedy action tensor([-1.6938, 0.1639, 0.4438, 0.0294]) tensor([0.0465, 0.2982, 0.3946, 0.2607]) -Greedy action tensor([-1.4603, -0.5341, 0.4171, 0.1240]) tensor([0.0669, 0.1690, 0.4376, 0.3264]) -Greedy action tensor([-1.4162, 0.1810, 0.4477, 0.5246]) tensor([0.0517, 0.2552, 0.3332, 0.3599]) -Greedy action tensor([-1.8692, -0.4286, 0.6279, -0.1219]) tensor([0.0433, 0.1827, 0.5256, 0.2483]) -Greedy action tensor([-1.5182, -0.5875, 0.4959, 0.1072]) tensor([0.0621, 0.1574, 0.4652, 0.3154]) -Greedy action tensor([-1.1087, -0.5689, 0.2339, 0.4283]) tensor([0.0893, 0.1532, 0.3420, 0.4154]) -Greedy action tensor([-1.1300, -0.5863, 0.2416, 0.2982]) tensor([0.0923, 0.1590, 0.3638, 0.3850]) -Greedy action tensor([-1.3029, -0.5816, 0.3063, 0.2765]) tensor([0.0775, 0.1594, 0.3873, 0.3759]) -Greedy action tensor([-1.8322, -0.4725, 0.6488, -0.0357]) tensor([0.0437, 0.1703, 0.5225, 0.2635]) -Greedy action tensor([-1.1905, -0.5759, 0.4886, -0.1674]) tensor([0.0910, 0.1682, 0.4877, 0.2531]) -Greedy action tensor([-1.4554, -0.5025, 0.3893, 0.1376]) tensor([0.0674, 0.1748, 0.4263, 0.3315]) -Greedy action tensor([-1.7403, -0.5276, 0.5819, -0.0563]) tensor([0.0501, 0.1686, 0.5112, 0.2701]) -Greedy action tensor([-0.3803, -0.2482, 1.1016, 1.6389]) tensor([0.0710, 0.0811, 0.3127, 0.5352]) -Greedy action tensor([-1.6481e+00, -5.3956e-01, 5.4222e-01, 1.2159e-03]) tensor([0.0550, 0.1667, 0.4919, 0.2864]) -Greedy action tensor([-0.4053, -0.2448, 0.1536, 0.1590]) tensor([0.1760, 0.2067, 0.3078, 0.3095]) -Greedy action tensor([ 0.6825, -0.2574, -0.0107, -0.3899]) tensor([0.4479, 0.1750, 0.2239, 0.1533]) -Greedy action tensor([ 0.9523, -0.2495, -0.1041, -0.4860]) tensor([0.5303, 0.1594, 0.1844, 0.1259]) -Greedy action tensor([ 0.8632, -0.8517, -0.1129, -0.4545]) tensor([0.5481, 0.0987, 0.2065, 0.1468]) -Greedy action tensor([ 0.5078, -0.2217, 0.0376, -0.2910]) tensor([0.3911, 0.1886, 0.2444, 0.1759]) -Greedy action tensor([ 0.6991, -0.4741, -0.0402, -0.5207]) tensor([0.4803, 0.1486, 0.2293, 0.1418]) -Greedy action tensor([ 0.8135, -0.2827, 0.1512, -0.4976]) tensor([0.4719, 0.1577, 0.2433, 0.1272]) -Greedy action tensor([ 1.0679, -0.6525, -0.0792, -0.2962]) tensor([0.5707, 0.1022, 0.1812, 0.1459]) -Greedy action tensor([ 0.6752, 0.0657, 0.0810, -0.3690]) tensor([0.4086, 0.2221, 0.2255, 0.1438]) -Greedy action tensor([ 0.6142, -0.5239, -0.0394, -0.3143]) tensor([0.4473, 0.1433, 0.2327, 0.1767]) -Greedy action tensor([ 0.8290, -0.4046, 0.0104, -0.4473]) tensor([0.4972, 0.1448, 0.2193, 0.1387]) -Greedy action tensor([ 0.4257, -0.1586, 0.0235, -0.2160]) tensor([0.3633, 0.2025, 0.2430, 0.1912]) -Greedy action tensor([ 0.5545, -0.3535, -0.1320, -0.4225]) tensor([0.4380, 0.1767, 0.2205, 0.1649]) -Greedy action tensor([ 0.9250, -0.4156, 0.1169, -0.1795]) tensor([0.4905, 0.1284, 0.2186, 0.1625]) -Greedy action tensor([ 0.4972, 0.1366, -0.0825, -0.3499]) tensor([0.3723, 0.2596, 0.2085, 0.1596]) -Greedy action tensor([ 0.6979, -0.3070, 0.2732, -0.2793]) tensor([0.4173, 0.1528, 0.2729, 0.1570]) -Greedy action tensor([ 0.9086, -0.3281, -0.0910, -0.1190]) tensor([0.4960, 0.1440, 0.1825, 0.1775]) -Greedy action tensor([ 0.4429, -0.0735, -0.1067, -0.2721]) tensor([0.3755, 0.2240, 0.2167, 0.1837]) -Greedy action tensor([ 0.6179, -0.1155, -0.0080, -0.1429]) tensor([0.4028, 0.1935, 0.2154, 0.1882]) -Greedy action tensor([ 1.3987, -1.3730, 0.0402, -0.6002]) tensor([0.6872, 0.0430, 0.1767, 0.0931]) -Greedy action tensor([ 0.5835, -0.3216, -0.0701, -0.2447]) tensor([0.4235, 0.1713, 0.2203, 0.1850]) -Greedy action tensor([ 0.5830, -0.3974, -0.1031, -0.3821]) tensor([0.4426, 0.1660, 0.2228, 0.1686]) -Greedy action tensor([ 0.1189, 0.0171, 0.1510, -0.1116]) tensor([0.2681, 0.2422, 0.2768, 0.2129]) -Greedy action tensor([ 0.2772, -0.2644, 0.0887, -0.4088]) tensor([0.3432, 0.1997, 0.2843, 0.1728]) -Greedy action tensor([ 0.4614, 0.0252, -0.0449, -0.1353]) tensor([0.3571, 0.2309, 0.2153, 0.1967]) -Greedy action tensor([ 0.7516, -0.5449, 0.0241, -0.5695]) tensor([0.4942, 0.1352, 0.2387, 0.1319]) -Greedy action tensor([ 0.7041, -0.6039, -0.0688, -0.1868]) tensor([0.4668, 0.1262, 0.2155, 0.1915]) -Greedy action tensor([ 0.9502, -0.5244, -0.0707, -0.7779]) tensor([0.5660, 0.1295, 0.2039, 0.1005]) -Greedy action tensor([ 0.5208, -0.2992, -0.0177, -0.1547]) tensor([0.3948, 0.1739, 0.2304, 0.2009]) -Greedy action tensor([ 0.7051, -0.0991, 0.0968, -0.2142]) tensor([0.4183, 0.1872, 0.2277, 0.1668]) -Greedy action tensor([ 0.7518, -0.6471, 0.1740, -0.7832]) tensor([0.4942, 0.1220, 0.2773, 0.1065]) -Greedy action tensor([ 0.8808, -0.4988, -0.0190, -0.4015]) tensor([0.5166, 0.1300, 0.2101, 0.1433]) -Greedy action tensor([ 0.9285, -0.7612, -0.2465, -0.7023]) tensor([0.5920, 0.1093, 0.1828, 0.1159]) -Greedy action tensor([ 0.6123, -0.1705, -0.0826, -0.2134]) tensor([0.4177, 0.1909, 0.2085, 0.1829]) -Greedy action tensor([ 0.9811, -0.6706, 0.1611, -0.5716]) tensor([0.5424, 0.1040, 0.2389, 0.1148]) -Greedy action tensor([ 0.3292, -0.1119, -0.0148, -0.2378]) tensor([0.3425, 0.2203, 0.2428, 0.1943]) -Greedy action tensor([ 0.7471, -0.4845, -0.1024, -0.4402]) tensor([0.4939, 0.1442, 0.2112, 0.1507]) -Greedy action tensor([ 0.1544, 0.2300, -0.0718, -0.1606]) tensor([0.2773, 0.2991, 0.2212, 0.2024]) -Greedy action tensor([ 0.5002, -0.1372, -0.0362, -0.0303]) tensor([0.3701, 0.1957, 0.2165, 0.2177]) -Greedy action tensor([ 0.7079, -0.6576, -0.0248, -0.3387]) tensor([0.4791, 0.1223, 0.2303, 0.1682]) -Greedy action tensor([ 0.2559, 0.1158, -0.0440, -0.0999]) tensor([0.3020, 0.2626, 0.2238, 0.2116]) -Greedy action tensor([ 0.8837, -0.5507, 0.1593, -0.4018]) tensor([0.5002, 0.1192, 0.2424, 0.1383]) -Greedy action tensor([ 0.6206, -0.4969, 0.1235, -0.6402]) tensor([0.4507, 0.1474, 0.2741, 0.1277]) -Greedy action tensor([ 0.6846, -0.2206, 0.0408, -0.5657]) tensor([0.4512, 0.1825, 0.2370, 0.1292]) -Greedy action tensor([ 0.4637, -0.2310, -0.0518, -0.2472]) tensor([0.3864, 0.1929, 0.2308, 0.1898]) -Greedy action tensor([ 0.9832, -0.8854, -0.0663, -0.2827]) tensor([0.5598, 0.0864, 0.1960, 0.1578]) -Greedy action tensor([ 0.3473, -0.0008, -0.1015, -0.2595]) tensor([0.3461, 0.2443, 0.2209, 0.1886]) -Greedy action tensor([ 0.7631, -0.3955, 0.0632, -0.4940]) tensor([0.4773, 0.1498, 0.2371, 0.1358]) -Greedy action tensor([ 0.3774, 0.0593, -0.0907, -0.1423]) tensor([0.3392, 0.2467, 0.2124, 0.2017]) -Greedy action tensor([ 0.5256, 0.0991, -0.1049, -0.1457]) tensor([0.3709, 0.2421, 0.1974, 0.1895]) -Greedy action tensor([ 0.9202, -0.8778, -0.0280, -0.4651]) tensor([0.5545, 0.0918, 0.2148, 0.1388]) -Greedy action tensor([ 0.6068, -0.1823, 0.0028, -0.1199]) tensor([0.4025, 0.1828, 0.2200, 0.1946]) -Greedy action tensor([ 0.7061, -0.4960, 0.1477, -0.4848]) tensor([0.4594, 0.1381, 0.2628, 0.1396]) -Greedy action tensor([ 0.4291, 0.1556, 0.0732, -0.2225]) tensor([0.3353, 0.2551, 0.2349, 0.1747]) -Greedy action tensor([ 0.2513, -0.0424, -0.0098, -0.3200]) tensor([0.3246, 0.2420, 0.2500, 0.1833]) -Greedy action tensor([ 0.4298, -0.0049, 0.0088, -0.0866]) tensor([0.3448, 0.2232, 0.2263, 0.2057]) -Greedy action tensor([ 0.3696, -0.1429, -0.0145, -0.3079]) tensor([0.3587, 0.2149, 0.2443, 0.1822]) -Greedy action tensor([ 0.7499, -0.3884, -0.0347, -0.4181]) tensor([0.4790, 0.1535, 0.2186, 0.1490]) -Greedy action tensor([ 1.2704, -1.0626, 0.0967, -0.7879]) tensor([0.6519, 0.0632, 0.2016, 0.0832]) -Greedy action tensor([ 0.6441, 0.0153, -0.0388, -0.4003]) tensor([0.4184, 0.2231, 0.2113, 0.1472]) -Greedy action tensor([ 0.5024, 0.0246, -0.0700, -0.2780]) tensor([0.3784, 0.2347, 0.2135, 0.1734]) -Greedy action tensor([ 0.6197, 0.2315, -0.1812, -0.0681]) tensor([0.3802, 0.2579, 0.1707, 0.1911]) -Greedy action tensor([ 0.6040, -0.0219, -0.0882, -0.3082]) tensor([0.4104, 0.2195, 0.2054, 0.1648]) -Greedy action tensor([ 0.5634, -0.1907, -0.0608, -0.3912]) tensor([0.4182, 0.1967, 0.2240, 0.1610]) -Greedy action tensor([ 0.4862, -0.1674, 0.0122, -0.2538]) tensor([0.3817, 0.1985, 0.2376, 0.1821]) -Greedy action tensor([ 0.4499, 0.0757, -0.0500, -0.4144]) tensor([0.3682, 0.2533, 0.2234, 0.1551]) -Greedy action tensor([ 0.6049, -0.4977, 0.0881, -0.4735]) tensor([0.4408, 0.1464, 0.2629, 0.1499]) -Greedy action tensor([ 0.3434, -0.2066, -0.1328, -0.4326]) tensor([0.3762, 0.2170, 0.2337, 0.1731]) -Greedy action tensor([ 0.7866, -0.4485, 0.0045, -0.3856]) tensor([0.4859, 0.1413, 0.2223, 0.1505]) -Greedy action tensor([ 0.6173, -0.3557, -0.0261, -0.4780]) tensor([0.4469, 0.1689, 0.2348, 0.1494]) -Greedy action tensor([ 0.6020, -0.2924, -0.0321, -0.1755]) tensor([0.4169, 0.1704, 0.2211, 0.1916]) -Greedy action tensor([ 0.5438, -0.3223, -0.0475, -0.2766]) tensor([0.4142, 0.1742, 0.2293, 0.1823]) -Greedy action tensor([ 0.7267, -0.3654, -0.0962, -0.1519]) tensor([0.4566, 0.1532, 0.2005, 0.1897]) -Greedy action tensor([ 0.6446, -0.4821, 0.3322, -1.1477]) tensor([0.4500, 0.1458, 0.3292, 0.0750]) -Greedy action tensor([ 0.6688, -0.5019, -0.0842, -0.3973]) tensor([0.4705, 0.1459, 0.2216, 0.1620]) -Greedy action tensor([ 0.5505, -0.4141, 0.1800, -0.4279]) tensor([0.4086, 0.1557, 0.2821, 0.1536]) -Greedy action tensor([ 0.3325, 0.1419, 0.0488, -0.2370]) tensor([0.3179, 0.2628, 0.2394, 0.1799]) -Greedy action tensor([ 0.4720, -0.2277, 0.0051, -0.3939]) tensor([0.3930, 0.1952, 0.2464, 0.1653]) -Greedy action tensor([ 0.9667, -0.8120, 0.0828, -0.4576]) tensor([0.5486, 0.0926, 0.2267, 0.1320]) -Greedy action tensor([ 0.8056, -0.5703, -0.0648, -0.3396]) tensor([0.5026, 0.1270, 0.2105, 0.1599]) -Greedy action tensor([ 0.6844, -0.0234, -0.0927, -0.1871]) tensor([0.4218, 0.2078, 0.1939, 0.1764]) -Greedy action tensor([ 0.8856, -0.5619, -0.0343, -0.6365]) tensor([0.5400, 0.1270, 0.2152, 0.1178]) -Greedy action tensor([ 1.4565, -0.5451, -0.3875, 0.0948]) tensor([0.6454, 0.0872, 0.1021, 0.1654]) -Greedy action tensor([ 0.4490, -0.2455, -0.1025, -0.0021]) tensor([0.3687, 0.1841, 0.2124, 0.2348]) -Greedy action tensor([ 0.7313, -0.3753, -0.0482, -0.2205]) tensor([0.4597, 0.1520, 0.2108, 0.1775]) -Greedy action tensor([ 0.7821, -0.4266, 0.0196, 0.2837]) tensor([0.4215, 0.1258, 0.1966, 0.2560]) -Greedy action tensor([ 1.1980, -0.7442, -0.4479, -0.2224]) tensor([0.6338, 0.0909, 0.1222, 0.1531]) -Greedy action tensor([ 0.6332, -0.3759, 0.0619, 0.1414]) tensor([0.3936, 0.1435, 0.2223, 0.2407]) -Greedy action tensor([ 1.2564, -0.6412, -0.2019, 0.5016]) tensor([0.5398, 0.0809, 0.1256, 0.2538]) -Greedy action tensor([ 0.6721, -0.1438, 0.0113, -0.0383]) tensor([0.4082, 0.1805, 0.2108, 0.2006]) -Greedy action tensor([ 1.6437, -0.1344, -0.5490, 0.2238]) tensor([0.6569, 0.1110, 0.0733, 0.1588]) -Greedy action tensor([ 1.2657, -0.1683, -0.1584, 0.0524]) tensor([0.5630, 0.1342, 0.1355, 0.1673]) -Greedy action tensor([ 1.2311, -0.3953, -0.4170, 0.1057]) tensor([0.5836, 0.1147, 0.1123, 0.1894]) -Greedy action tensor([ 0.9865, -0.2298, -0.0614, 0.1902]) tensor([0.4766, 0.1412, 0.1671, 0.2150]) -Greedy action tensor([ 1.6857, -0.7827, 0.0205, 0.3997]) tensor([0.6450, 0.0547, 0.1220, 0.1783]) -Greedy action tensor([ 1.1020, -0.5441, -0.0451, -0.1601]) tensor([0.5576, 0.1075, 0.1771, 0.1578]) -Greedy action tensor([ 1.3638, -0.7283, -0.6329, 0.3287]) tensor([0.6194, 0.0765, 0.0841, 0.2200]) -Greedy action tensor([ 0.8553, -0.0949, -0.1977, -0.0829]) tensor([0.4702, 0.1818, 0.1640, 0.1840]) -Greedy action tensor([ 1.2991, -0.5838, 0.0045, 0.3257]) tensor([0.5543, 0.0843, 0.1519, 0.2094]) -Greedy action tensor([ 0.8254, -0.4055, -0.3647, 0.1450]) tensor([0.4756, 0.1389, 0.1447, 0.2409]) -Greedy action tensor([ 0.6390, -0.3573, 0.1127, -0.2709]) tensor([0.4233, 0.1563, 0.2501, 0.1704]) -Greedy action tensor([ 1.1192, -0.2341, -0.2632, 0.3705]) tensor([0.5045, 0.1303, 0.1266, 0.2386]) -Greedy action tensor([ 1.2236, 0.0877, -0.0893, 0.1024]) tensor([0.5219, 0.1676, 0.1404, 0.1701]) -Greedy action tensor([ 0.6960, -0.1042, -0.0555, -0.0630]) tensor([0.4186, 0.1880, 0.1974, 0.1959]) -Greedy action tensor([ 1.2097, -0.2949, -0.2483, 0.3246]) tensor([0.5355, 0.1189, 0.1246, 0.2210]) -Greedy action tensor([ 2.2633, -0.9451, -0.3129, 0.1926]) tensor([0.8048, 0.0325, 0.0612, 0.1015]) -Greedy action tensor([ 0.5903, -0.6067, -0.3985, 0.6503]) tensor([0.3655, 0.1104, 0.1360, 0.3881]) -Greedy action tensor([ 1.0039, -0.3753, -0.2854, 0.0494]) tensor([0.5229, 0.1317, 0.1441, 0.2013]) -Greedy action tensor([ 1.3449, -0.5045, -0.6638, 0.3996]) tensor([0.5952, 0.0936, 0.0799, 0.2313]) -Greedy action tensor([ 1.5178, -0.2013, -0.4167, 0.0565]) tensor([0.6428, 0.1152, 0.0929, 0.1491]) -Greedy action tensor([ 1.2054, -0.6161, -0.3848, 0.8937]) tensor([0.4767, 0.0771, 0.0972, 0.3490]) -Greedy action tensor([ 0.5983, -0.5347, -0.2685, 0.2464]) tensor([0.4089, 0.1317, 0.1719, 0.2876]) -Greedy action tensor([ 1.7192, -0.6133, -0.4299, 0.4025]) tensor([0.6749, 0.0655, 0.0787, 0.1809]) -Greedy action tensor([ 0.6568, -0.1675, -0.3358, -0.2228]) tensor([0.4496, 0.1972, 0.1666, 0.1866]) -Greedy action tensor([ 1.1850, -0.2712, -0.1283, 0.0866]) tensor([0.5448, 0.1270, 0.1465, 0.1816]) -Greedy action tensor([ 1.1493, -0.3000, 0.0474, -0.0233]) tensor([0.5329, 0.1251, 0.1771, 0.1650]) -Greedy action tensor([ 1.1771, -0.2008, -0.4536, 0.1851]) tensor([0.5498, 0.1386, 0.1076, 0.2039]) -Greedy action tensor([ 1.4406, -0.7793, -0.3052, 0.3950]) tensor([0.6118, 0.0665, 0.1068, 0.2150]) -Greedy action tensor([ 0.6401, -0.3783, 0.2324, -0.1434]) tensor([0.4027, 0.1454, 0.2679, 0.1840]) -Greedy action tensor([ 1.1049, -0.4511, -0.2573, 0.2645]) tensor([0.5267, 0.1111, 0.1349, 0.2273]) -Greedy action tensor([ 1.6463, -0.5144, -0.1001, 0.2897]) tensor([0.6463, 0.0745, 0.1127, 0.1665]) -Greedy action tensor([ 1.2956, -0.2771, -0.2391, 0.4395]) tensor([0.5412, 0.1123, 0.1166, 0.2299]) -Greedy action tensor([ 0.8760, -0.4507, -0.1041, 0.1311]) tensor([0.4727, 0.1254, 0.1774, 0.2244]) -Greedy action tensor([ 1.3423, -0.2482, -0.2976, 0.2748]) tensor([0.5742, 0.1170, 0.1114, 0.1974]) -Greedy action tensor([ 1.1250, -0.6055, -0.2427, -0.2677]) tensor([0.5951, 0.1055, 0.1516, 0.1478]) -Greedy action tensor([ 0.7471, -0.7309, -0.2250, 0.1704]) tensor([0.4612, 0.1052, 0.1745, 0.2591]) -Greedy action tensor([ 1.2437, -0.1834, -0.3952, 0.1740]) tensor([0.5627, 0.1350, 0.1093, 0.1930]) -Greedy action tensor([ 1.1154, -0.5816, -0.2615, -0.1443]) tensor([0.5816, 0.1066, 0.1468, 0.1650]) -Greedy action tensor([ 1.2199, 0.1558, -0.1686, -0.4206]) tensor([0.5592, 0.1929, 0.1395, 0.1084]) -Greedy action tensor([ 1.1519, -0.6187, -0.4285, 0.5925]) tensor([0.5134, 0.0874, 0.1057, 0.2935]) -Greedy action tensor([ 1.3011, -0.1318, -0.2135, 0.2172]) tensor([0.5566, 0.1328, 0.1224, 0.1883]) -Greedy action tensor([ 1.3307, -0.8856, -0.2211, 0.3741]) tensor([0.5865, 0.0639, 0.1243, 0.2253]) -Greedy action tensor([ 1.0760, -0.4209, 0.0407, -0.0785]) tensor([0.5279, 0.1182, 0.1875, 0.1664]) -Greedy action tensor([ 1.4347, -0.4537, -0.1909, 0.2777]) tensor([0.6015, 0.0910, 0.1184, 0.1891]) -Greedy action tensor([ 1.3850, -0.5914, -0.2222, 0.1213]) tensor([0.6167, 0.0854, 0.1236, 0.1743]) -Greedy action tensor([ 0.9792, 0.0190, -0.1745, 0.0725]) tensor([0.4757, 0.1821, 0.1501, 0.1921]) -Greedy action tensor([ 0.7762, -0.0373, 0.2836, -0.1177]) tensor([0.4059, 0.1800, 0.2481, 0.1661]) -Greedy action tensor([ 1.3705, -0.4708, -0.1172, 0.1664]) tensor([0.5937, 0.0942, 0.1341, 0.1781]) -Greedy action tensor([ 1.1490, -0.3122, -0.3715, 0.3299]) tensor([0.5287, 0.1226, 0.1156, 0.2331]) -Greedy action tensor([ 1.0909, -0.4042, -0.4971, 0.4620]) tensor([0.5098, 0.1143, 0.1042, 0.2718]) -Greedy action tensor([ 1.4398, -0.6105, -0.1623, 0.3551]) tensor([0.5995, 0.0771, 0.1208, 0.2026]) -Greedy action tensor([ 1.8886, -1.0359, -0.2471, 0.4140]) tensor([0.7139, 0.0383, 0.0844, 0.1634]) -Greedy action tensor([ 1.4199, -0.1207, -0.3523, -0.0801]) tensor([0.6221, 0.1333, 0.1057, 0.1388]) -Greedy action tensor([ 0.9104, -0.3025, -0.6248, -0.4580]) tensor([0.5658, 0.1682, 0.1219, 0.1440]) -Greedy action tensor([ 2.1260, -0.8091, -0.2946, 0.5174]) tensor([0.7451, 0.0396, 0.0662, 0.1491]) -Greedy action tensor([ 1.6465, -0.3436, -0.2214, 0.3015]) tensor([0.6445, 0.0881, 0.0995, 0.1679]) -Greedy action tensor([ 0.9084, -0.4841, -0.4288, 0.4880]) tensor([0.4613, 0.1146, 0.1211, 0.3030]) -Greedy action tensor([ 0.7804, -0.3224, -0.1638, 0.2365]) tensor([0.4345, 0.1442, 0.1690, 0.2522]) -Greedy action tensor([ 1.4433, -0.5423, -0.3955, -0.1419]) tensor([0.6661, 0.0915, 0.1059, 0.1365]) -Greedy action tensor([ 1.6189, -1.0621, -0.3413, 0.2274]) tensor([0.6859, 0.0470, 0.0966, 0.1706]) -Greedy action tensor([ 0.6331, -0.1491, -0.0802, -0.0798]) tensor([0.4102, 0.1876, 0.2010, 0.2011]) -Greedy action tensor([ 1.4281, -0.5666, -0.2271, 0.7409]) tensor([0.5464, 0.0743, 0.1044, 0.2748]) -Greedy action tensor([ 1.4139, -0.3982, -0.1731, 0.1805]) tensor([0.6027, 0.0984, 0.1233, 0.1756]) -Greedy action tensor([ 1.1494, -0.4014, -0.0242, -0.0072]) tensor([0.5447, 0.1155, 0.1684, 0.1713]) -Greedy action tensor([ 1.4245, -0.6449, -0.1837, 0.2943]) tensor([0.6062, 0.0765, 0.1214, 0.1958]) -Greedy action tensor([ 1.3204, -0.1126, 0.0015, -0.0079]) tensor([0.5647, 0.1347, 0.1510, 0.1496]) -Greedy action tensor([ 1.2956, -0.1584, -0.2248, -0.0154]) tensor([0.5808, 0.1357, 0.1270, 0.1565]) -Greedy action tensor([ 1.7348, -0.3939, -0.1007, 0.4799]) tensor([0.6395, 0.0761, 0.1020, 0.1823]) -Greedy action tensor([ 1.0599, -0.3874, -0.2699, 0.2098]) tensor([0.5189, 0.1221, 0.1373, 0.2218]) -Greedy action tensor([ 1.5691, -1.2824, -0.3120, 0.5005]) tensor([0.6436, 0.0372, 0.0981, 0.2211]) -Greedy action tensor([ 1.1763, -0.5431, -0.2506, 0.0431]) tensor([0.5743, 0.1029, 0.1379, 0.1849]) -Greedy action tensor([ 0.8936, -0.3982, -0.2567, 0.3067]) tensor([0.4657, 0.1280, 0.1474, 0.2589]) -Greedy action tensor([ 1.2176, -0.0032, -0.1455, -0.2202]) tensor([0.5592, 0.1649, 0.1431, 0.1328]) -Greedy action tensor([ 0.6422, -0.2876, 1.0128, 0.4887]) tensor([0.2702, 0.1066, 0.3914, 0.2317]) -Greedy action tensor([ 0.3337, 0.7455, 0.5991, -0.3127]) tensor([0.2306, 0.3480, 0.3006, 0.1208]) -Greedy action tensor([-0.0705, -1.2804, 2.0293, -0.2174]) tensor([0.0968, 0.0289, 0.7907, 0.0836]) -Greedy action tensor([-0.1571, 1.2453, 0.2674, 0.5375]) tensor([0.1163, 0.4728, 0.1778, 0.2330]) -Greedy action tensor([ 1.2388, -0.3552, 0.0509, 0.6043]) tensor([0.4907, 0.0997, 0.1496, 0.2601]) -Greedy action tensor([-0.0610, -1.5395, -0.8653, 0.4575]) tensor([0.2981, 0.0679, 0.1334, 0.5006]) -Greedy action tensor([ 0.8848, -0.6939, -0.5051, 0.3456]) tensor([0.4905, 0.1012, 0.1222, 0.2861]) -Greedy action tensor([ 0.6464, 0.0314, 1.5378, -0.0415]) tensor([0.2231, 0.1206, 0.5441, 0.1121]) -Greedy action tensor([0.3582, 0.1048, 1.1028, 0.4374]) tensor([0.2014, 0.1563, 0.4242, 0.2181]) -Greedy action tensor([ 1.0586, -0.4150, -0.3140, 0.7260]) tensor([0.4546, 0.1042, 0.1152, 0.3260]) -Greedy action tensor([ 0.0781, 0.2809, 0.8075, -1.3698]) tensor([0.2206, 0.2702, 0.4574, 0.0518]) -Greedy action tensor([-0.2712, 0.6412, 0.0345, 1.4742]) tensor([0.0946, 0.2355, 0.1284, 0.5416]) -Greedy action tensor([1.1827, 0.4210, 0.1526, 0.7419]) tensor([0.4053, 0.1892, 0.1447, 0.2608]) -Greedy action tensor([-0.0775, 0.0102, 0.4427, 0.3883]) tensor([0.1863, 0.2034, 0.3134, 0.2969]) -Greedy action tensor([-0.0097, -1.3406, -0.5652, 0.8656]) tensor([0.2360, 0.0624, 0.1354, 0.5663]) -Greedy action tensor([ 1.3018, 0.0797, -1.0920, 1.7518]) tensor([0.3385, 0.0997, 0.0309, 0.5309]) -Greedy action tensor([ 0.8511, -0.2578, 1.5361, 0.2266]) tensor([0.2598, 0.0857, 0.5154, 0.1391]) -Greedy action tensor([-1.0475, 0.0599, 0.1782, -1.2318]) tensor([0.1210, 0.3662, 0.4122, 0.1006]) -Greedy action tensor([-0.3323, -2.3313, 1.4312, -0.5879]) tensor([0.1292, 0.0175, 0.7533, 0.1000]) -Greedy action tensor([ 1.7677, -0.2517, 0.4714, 0.2958]) tensor([0.6113, 0.0811, 0.1672, 0.1403]) -Greedy action tensor([ 1.1982, -0.3903, 1.1412, 0.4955]) tensor([0.3782, 0.0772, 0.3572, 0.1873]) -Greedy action tensor([-1.5188, -0.0179, 0.3160, -1.0377]) tensor([0.0748, 0.3356, 0.4686, 0.1210]) -Greedy action tensor([-0.3311, -0.1220, 0.2020, 1.0528]) tensor([0.1262, 0.1555, 0.2150, 0.5034]) -Greedy action tensor([ 0.9870, -0.6534, 1.3264, -0.3402]) tensor([0.3492, 0.0677, 0.4904, 0.0926]) -Greedy action tensor([ 1.0928, -0.8037, 1.4311, 1.1437]) tensor([0.2774, 0.0416, 0.3891, 0.2919]) -Greedy action tensor([0.0854, 0.1780, 0.7363, 0.4847]) tensor([0.1817, 0.1993, 0.3483, 0.2708]) -Greedy action tensor([ 0.4941, 0.5766, 0.5119, -0.4013]) tensor([0.2847, 0.3092, 0.2898, 0.1163]) -Greedy action tensor([0.1945, 0.2774, 0.1045, 0.2601]) tensor([0.2458, 0.2671, 0.2247, 0.2625]) -Greedy action tensor([ 1.7899, -1.3844, 1.1954, 1.0360]) tensor([0.4845, 0.0203, 0.2673, 0.2279]) -Greedy action tensor([-0.0669, -0.1190, 0.0092, 0.0053]) tensor([0.2437, 0.2313, 0.2630, 0.2620]) -Greedy action tensor([-0.0195, 0.7474, 0.2886, 0.4844]) tensor([0.1621, 0.3490, 0.2206, 0.2683]) -Greedy action tensor([ 1.6285, -1.0054, 0.1058, 0.1826]) tensor([0.6555, 0.0471, 0.1430, 0.1544]) -Greedy action tensor([ 0.4099, -0.2132, 0.6560, 0.1878]) tensor([0.2765, 0.1483, 0.3537, 0.2215]) -Greedy action tensor([ 1.5444, -0.6201, 1.0879, 0.5864]) tensor([0.4691, 0.0539, 0.2971, 0.1800]) -Greedy action tensor([ 0.9004, -0.4620, 0.4786, 0.6057]) tensor([0.3764, 0.0964, 0.2469, 0.2803]) -Greedy action tensor([ 1.4760, -0.0154, -0.9035, 0.1797]) tensor([0.6285, 0.1414, 0.0582, 0.1719]) -Greedy action tensor([0.7817, 0.3296, 0.2382, 0.0869]) tensor([0.3682, 0.2343, 0.2138, 0.1838]) -Greedy action tensor([ 1.5251, -1.0426, 1.4206, 0.7834]) tensor([0.4075, 0.0313, 0.3671, 0.1941]) -Greedy action tensor([ 1.1190, -0.2742, -0.2072, 1.2101]) tensor([0.3833, 0.0952, 0.1018, 0.4198]) -Greedy action tensor([1.1144, 0.2028, 1.0373, 0.9242]) tensor([0.3170, 0.1274, 0.2935, 0.2621]) -Greedy action tensor([-0.4335, -0.8310, 0.4067, 0.0094]) tensor([0.1803, 0.1212, 0.4177, 0.2808]) -Greedy action tensor([ 1.3283, 0.1931, -0.2672, 0.5036]) tensor([0.5095, 0.1637, 0.1033, 0.2234]) -Greedy action tensor([-0.8028, -0.5707, 1.8263, 0.4238]) tensor([0.0512, 0.0646, 0.7097, 0.1746]) -Greedy action tensor([0.4755, 0.2392, 1.6236, 0.0464]) tensor([0.1788, 0.1412, 0.5636, 0.1164]) -Greedy action tensor([ 1.2896, -1.7113, 1.4746, 0.6782]) tensor([0.3577, 0.0178, 0.4304, 0.1941]) -Greedy action tensor([-0.0444, 0.0981, 0.2385, -0.2823]) tensor([0.2343, 0.2702, 0.3109, 0.1847]) -Greedy action tensor([-1.1371, 0.0028, -0.7568, -1.2343]) tensor([0.1539, 0.4813, 0.2252, 0.1397]) -Greedy action tensor([ 0.6490, 0.8428, -0.3092, -0.2656]) tensor([0.3335, 0.4049, 0.1279, 0.1336]) -Greedy action tensor([ 0.0664, -1.9541, -0.2603, 1.1452]) tensor([0.2085, 0.0277, 0.1504, 0.6134]) -Greedy action tensor([ 0.3578, -2.1675, 0.4244, -0.2519]) tensor([0.3714, 0.0297, 0.3970, 0.2019]) -Greedy action tensor([-0.4262, 1.2854, 0.7706, 0.0394]) tensor([0.0874, 0.4841, 0.2893, 0.1392]) -Greedy action tensor([0.2643, 0.4536, 0.2362, 0.1197]) tensor([0.2471, 0.2987, 0.2403, 0.2139]) -Greedy action tensor([ 1.7523, -0.9015, -0.9300, 1.5406]) tensor([0.5133, 0.0361, 0.0351, 0.4154]) -Greedy action tensor([0.9182, 1.3048, 0.0680, 0.1448]) tensor([0.2975, 0.4380, 0.1271, 0.1373]) -Greedy action tensor([-1.2948, -0.9106, -0.8288, -0.6424]) tensor([0.1672, 0.2455, 0.2664, 0.3210]) -Greedy action tensor([ 0.3118, 1.0100, -0.2863, 0.8021]) tensor([0.1926, 0.3871, 0.1059, 0.3144]) -Greedy action tensor([-1.4299, -0.9120, -0.7384, 0.3576]) tensor([0.0939, 0.1576, 0.1875, 0.5610]) -Greedy action tensor([ 0.2557, -0.8092, -0.7727, 0.5495]) tensor([0.3285, 0.1133, 0.1175, 0.4407]) -Greedy action tensor([ 0.5088, -0.4514, -0.2868, 1.1119]) tensor([0.2731, 0.1045, 0.1232, 0.4991]) -Greedy action tensor([ 1.3664, -1.3806, 0.8849, 0.2423]) tensor([0.4983, 0.0320, 0.3079, 0.1619]) -Greedy action tensor([ 0.1091, -1.6738, 1.2489, 0.9764]) tensor([0.1498, 0.0252, 0.4684, 0.3566]) -Greedy action tensor([ 0.3217, 0.1462, -0.0912, 0.2306]) tensor([0.2930, 0.2458, 0.1938, 0.2674]) -Greedy action tensor([ 0.4868, -1.0763, 0.1850, 1.1582]) tensor([0.2560, 0.0536, 0.1893, 0.5010]) -Greedy action tensor([ 1.5390, -0.5608, 1.5379, -0.0081]) tensor([0.4284, 0.0525, 0.4279, 0.0912]) -Greedy action tensor([ 0.4709, -2.2589, -0.4024, 0.8022]) tensor([0.3478, 0.0227, 0.1452, 0.4843]) -Greedy action tensor([ 0.4710, -0.6748, 0.3734, -0.4795]) tensor([0.3829, 0.1218, 0.3473, 0.1480]) -Greedy action tensor([-0.7529, -1.1196, 0.3185, 0.2615]) tensor([0.1357, 0.0940, 0.3961, 0.3742]) -Greedy action tensor([ 1.3756, 0.4743, -0.2599, 0.6782]) tensor([0.4765, 0.1935, 0.0928, 0.2372]) -Greedy action tensor([-0.7639, -1.2365, 1.5665, -0.5608]) tensor([0.0762, 0.0475, 0.7831, 0.0933]) -Greedy action tensor([ 0.8499, -1.0208, -0.7524, 1.2588]) tensor([0.3496, 0.0538, 0.0704, 0.5262]) -Greedy action tensor([-0.2238, -2.0410, -0.2508, 0.5864]) tensor([0.2281, 0.0371, 0.2220, 0.5128]) -Greedy action tensor([-0.5792, -0.9136, 0.5277, 1.1365]) tensor([0.0971, 0.0695, 0.2936, 0.5398]) -Greedy action tensor([-0.1370, 0.3935, 0.7809, -0.0382]) tensor([0.1585, 0.2695, 0.3970, 0.1750]) -Greedy action tensor([ 0.8666, -0.9878, -0.5312, 1.0664]) tensor([0.3810, 0.0596, 0.0942, 0.4652]) -Greedy action tensor([ 0.8062, -0.9965, 0.0257, 1.3294]) tensor([0.3021, 0.0498, 0.1384, 0.5097]) -Greedy action tensor([ 0.0392, 0.5189, 0.5785, -0.2682]) tensor([0.1974, 0.3189, 0.3385, 0.1452]) -Greedy action tensor([-0.2920, -0.0028, 1.4568, -0.9814]) tensor([0.1165, 0.1555, 0.6695, 0.0585]) -Greedy action tensor([-0.4465, 0.1460, -0.7910, 0.4792]) tensor([0.1655, 0.2994, 0.1173, 0.4178]) -Greedy action tensor([ 0.3328, -0.1998, 0.1017, 0.7138]) tensor([0.2601, 0.1527, 0.2065, 0.3807]) -Greedy action tensor([ 1.1145, 0.5167, -0.5365, 0.9578]) tensor([0.3851, 0.2118, 0.0739, 0.3292]) -Greedy action tensor([ 0.5354, -0.5998, 0.1685, 0.4976]) tensor([0.3359, 0.1079, 0.2327, 0.3234]) -Greedy action tensor([-1.1443, -1.1586, -0.5632, 0.6493]) tensor([0.1022, 0.1007, 0.1827, 0.6143]) -Greedy action tensor([ 0.1673, -0.0580, 0.1894, -0.2486]) tensor([0.2873, 0.2294, 0.2937, 0.1896]) -Greedy action tensor([ 0.9987, -0.3128, 0.0364, -0.4312]) tensor([0.5289, 0.1425, 0.2020, 0.1266]) -Greedy action tensor([ 0.4002, -0.0772, 0.0947, -0.2423]) tensor([0.3469, 0.2152, 0.2555, 0.1824]) -Greedy action tensor([ 0.6509, -0.1579, -0.0146, -0.4795]) tensor([0.4381, 0.1952, 0.2252, 0.1415]) -Greedy action tensor([ 0.5461, -0.2955, -0.0175, -0.2582]) tensor([0.4086, 0.1761, 0.2325, 0.1828]) -Greedy action tensor([ 0.8282, -0.4945, -0.1803, -0.4414]) tensor([0.5230, 0.1393, 0.1908, 0.1469]) -Greedy action tensor([ 0.7639, -0.3149, 0.1942, -0.3885]) tensor([0.4501, 0.1530, 0.2546, 0.1422]) -Greedy action tensor([ 0.7649, -0.3489, -0.0306, -0.2816]) tensor([0.4693, 0.1541, 0.2118, 0.1648]) -Greedy action tensor([ 0.5018, -0.1989, 0.0326, -0.1244]) tensor([0.3765, 0.1868, 0.2355, 0.2013]) -Greedy action tensor([ 0.3166, -0.1633, 0.1345, -0.2700]) tensor([0.3324, 0.2057, 0.2770, 0.1849]) -Greedy action tensor([ 0.7531, 0.1588, -0.1389, -0.1905]) tensor([0.4253, 0.2348, 0.1743, 0.1656]) -Greedy action tensor([ 1.2204, -1.1434, 0.1085, -0.4911]) tensor([0.6236, 0.0587, 0.2051, 0.1126]) -Greedy action tensor([ 0.5174, -0.2158, -0.0223, -0.2287]) tensor([0.3941, 0.1893, 0.2297, 0.1869]) -Greedy action tensor([ 0.6745, -0.3788, 0.0551, -0.5589]) tensor([0.4591, 0.1601, 0.2471, 0.1337]) -Greedy action tensor([ 0.7645, -0.4589, -0.0804, -0.3300]) tensor([0.4858, 0.1429, 0.2087, 0.1626]) -Greedy action tensor([ 0.6392, 0.0095, -0.0673, -0.1800]) tensor([0.4054, 0.2160, 0.2000, 0.1787]) -Greedy action tensor([ 0.7453, -0.6095, -0.2863, -0.7535]) tensor([0.5441, 0.1404, 0.1939, 0.1216]) -Greedy action tensor([ 1.0326, -0.5540, 0.1289, -0.5838]) tensor([0.5530, 0.1132, 0.2240, 0.1098]) -Greedy action tensor([ 0.4618, -0.1347, -0.0282, -0.2484]) tensor([0.3767, 0.2074, 0.2308, 0.1851]) -Greedy action tensor([ 0.5594, -0.4157, 0.0860, -0.3551]) tensor([0.4165, 0.1571, 0.2595, 0.1669]) -Greedy action tensor([ 0.7705, -0.7121, 0.0321, -0.6439]) tensor([0.5133, 0.1166, 0.2453, 0.1248]) -Greedy action tensor([ 0.7029, -0.3559, -0.1005, -0.5378]) tensor([0.4799, 0.1665, 0.2149, 0.1388]) -Greedy action tensor([ 0.6309, -0.4490, -0.0302, -0.5631]) tensor([0.4632, 0.1573, 0.2391, 0.1403]) -Greedy action tensor([ 0.8344, -0.6292, -0.1238, -0.6418]) tensor([0.5425, 0.1255, 0.2081, 0.1240]) -Greedy action tensor([ 0.6769, -0.2829, -0.0076, -0.2153]) tensor([0.4353, 0.1667, 0.2196, 0.1784]) -Greedy action tensor([ 0.6819, -0.5339, 0.0548, -0.5475]) tensor([0.4710, 0.1396, 0.2516, 0.1378]) -Greedy action tensor([ 0.8159, -0.3868, 0.0511, -0.1219]) tensor([0.4635, 0.1392, 0.2158, 0.1815]) -Greedy action tensor([ 0.3162, 0.0854, 0.0260, -0.2057]) tensor([0.3189, 0.2532, 0.2386, 0.1893]) -Greedy action tensor([ 0.6257, -0.4878, -0.0485, -0.4492]) tensor([0.4589, 0.1507, 0.2338, 0.1566]) -Greedy action tensor([ 0.4308, -0.2046, 0.1509, -0.2417]) tensor([0.3577, 0.1895, 0.2703, 0.1826]) -Greedy action tensor([ 0.6623, -0.5393, -0.0407, -0.4240]) tensor([0.4688, 0.1410, 0.2321, 0.1582]) -Greedy action tensor([ 0.7813, -0.6079, -0.0275, -0.6090]) tensor([0.5145, 0.1283, 0.2291, 0.1281]) -Greedy action tensor([ 0.9403, -0.5775, -0.0012, -0.4053]) tensor([0.5349, 0.1172, 0.2086, 0.1393]) -Greedy action tensor([ 0.5751, -0.1872, 0.0466, -0.3035]) tensor([0.4046, 0.1888, 0.2385, 0.1681]) -Greedy action tensor([ 0.6834, -0.6066, 0.1665, -0.7871]) tensor([0.4759, 0.1310, 0.2838, 0.1094]) -Greedy action tensor([ 0.7887, -0.3572, 0.0512, -0.4142]) tensor([0.4770, 0.1517, 0.2281, 0.1433]) -Greedy action tensor([ 0.8294, -0.3328, -0.1080, -0.2894]) tensor([0.4924, 0.1540, 0.1928, 0.1608]) -Greedy action tensor([ 0.3536, -0.2107, -0.0712, -0.4454]) tensor([0.3742, 0.2128, 0.2447, 0.1683]) -Greedy action tensor([ 1.1167e+00, -5.5570e-01, -3.1269e-04, -5.0107e-01]) tensor([0.5836, 0.1096, 0.1910, 0.1158]) -Greedy action tensor([ 0.8103, -0.5805, -0.0083, -0.4335]) tensor([0.5055, 0.1258, 0.2230, 0.1457]) -Greedy action tensor([ 0.6655, -0.5158, -0.0284, -0.2852]) tensor([0.4560, 0.1399, 0.2278, 0.1762]) -Greedy action tensor([ 0.6744, -0.1795, 0.1257, -0.4639]) tensor([0.4303, 0.1832, 0.2486, 0.1379]) -Greedy action tensor([ 0.5577, -0.0294, -0.0539, -0.2055]) tensor([0.3899, 0.2168, 0.2115, 0.1818]) -Greedy action tensor([ 0.1313, 0.1879, -0.0139, -0.1869]) tensor([0.2739, 0.2899, 0.2369, 0.1993]) -Greedy action tensor([ 0.8861, -0.4129, -0.0554, -0.3678]) tensor([0.5133, 0.1400, 0.2002, 0.1465]) -Greedy action tensor([ 0.6692, -0.5988, -0.0675, -0.5647]) tensor([0.4875, 0.1372, 0.2334, 0.1419]) -Greedy action tensor([ 1.2741, -1.5515, -0.0647, -0.6486]) tensor([0.6814, 0.0404, 0.1786, 0.0996]) -Greedy action tensor([ 0.4114, 0.0691, 0.0501, -0.1527]) tensor([0.3361, 0.2386, 0.2341, 0.1912]) -Greedy action tensor([ 0.5312, 0.0237, 0.0047, -0.0762]) tensor([0.3653, 0.2199, 0.2158, 0.1990]) -Greedy action tensor([ 0.3880, -0.1012, -0.1609, -0.2862]) tensor([0.3703, 0.2271, 0.2139, 0.1887]) -Greedy action tensor([ 0.5897, -0.0461, -0.0807, -0.4823]) tensor([0.4196, 0.2222, 0.2146, 0.1436]) -Greedy action tensor([ 0.9648, -0.4831, 0.0793, -0.6695]) tensor([0.5427, 0.1276, 0.2239, 0.1059]) -Greedy action tensor([ 0.3467, -0.1739, -0.0695, -0.4168]) tensor([0.3677, 0.2185, 0.2425, 0.1714]) -Greedy action tensor([ 0.8391, -0.5470, -0.0217, -0.1876]) tensor([0.4924, 0.1231, 0.2082, 0.1763]) -Greedy action tensor([ 0.5417, -0.0915, 0.0024, -0.2838]) tensor([0.3918, 0.2080, 0.2285, 0.1716]) -Greedy action tensor([ 0.8864, -0.5439, 0.0111, -0.4449]) tensor([0.5208, 0.1246, 0.2170, 0.1376]) -Greedy action tensor([ 0.8456, -0.6689, -0.0221, -0.4898]) tensor([0.5255, 0.1156, 0.2207, 0.1382]) -Greedy action tensor([ 0.0822, -0.0315, -0.1393, -0.4287]) tensor([0.3036, 0.2710, 0.2433, 0.1821]) -Greedy action tensor([ 0.4377, -0.0540, -0.0487, -0.0849]) tensor([0.3547, 0.2169, 0.2181, 0.2103]) -Greedy action tensor([ 0.7977, -0.6152, -0.2330, -0.7074]) tensor([0.5488, 0.1336, 0.1958, 0.1218]) -Greedy action tensor([ 0.9279, -0.6920, -0.0196, -0.7429]) tensor([0.5638, 0.1116, 0.2186, 0.1060]) -Greedy action tensor([ 0.3407, -0.0067, -0.1639, -0.3342]) tensor([0.3547, 0.2506, 0.2141, 0.1806]) -Greedy action tensor([ 0.5780, -0.3524, -0.1177, -0.2791]) tensor([0.4315, 0.1702, 0.2152, 0.1831]) -Greedy action tensor([ 0.8948, -0.4880, -0.1156, -0.5600]) tensor([0.5410, 0.1357, 0.1970, 0.1263]) -Greedy action tensor([ 0.5494, -0.3392, 0.0263, -0.3229]) tensor([0.4129, 0.1698, 0.2447, 0.1726]) -Greedy action tensor([ 0.3890, 0.1113, -0.0300, -0.1952]) tensor([0.3364, 0.2548, 0.2212, 0.1875]) -Greedy action tensor([ 0.6972, -0.4117, 0.1212, -0.6852]) tensor([0.4666, 0.1540, 0.2623, 0.1171]) -Greedy action tensor([ 0.7722, -0.7567, -0.0853, -0.3651]) tensor([0.5098, 0.1105, 0.2163, 0.1635]) -Greedy action tensor([ 0.3585, -0.1535, 0.0244, -0.3844]) tensor([0.3583, 0.2147, 0.2565, 0.1705]) -Greedy action tensor([ 0.4010, -0.0907, -0.1102, -0.0730]) tensor([0.3529, 0.2158, 0.2117, 0.2197]) -Greedy action tensor([ 0.6096, -0.3017, -0.0743, -0.2915]) tensor([0.4324, 0.1738, 0.2182, 0.1756]) -Greedy action tensor([ 0.6284, -0.3531, -0.0072, -0.3036]) tensor([0.4351, 0.1631, 0.2305, 0.1713]) -Greedy action tensor([ 0.6961, -0.4705, 0.0295, -0.4373]) tensor([0.4658, 0.1451, 0.2392, 0.1500]) -Greedy action tensor([ 0.9641, -0.9440, -0.0129, -0.4453]) tensor([0.5653, 0.0839, 0.2128, 0.1381]) -Greedy action tensor([ 1.0186, -0.5154, -0.0877, -0.6320]) tensor([0.5752, 0.1241, 0.1903, 0.1104]) -Greedy action tensor([ 0.7262, -0.2926, 0.0982, -0.1774]) tensor([0.4348, 0.1570, 0.2320, 0.1761]) -Greedy action tensor([ 0.5424, 0.0322, -0.1319, -0.2537]) tensor([0.3905, 0.2344, 0.1990, 0.1761]) -Greedy action tensor([ 5.4460e-01, -7.5649e-02, -3.8326e-04, -3.2931e-01]) tensor([0.3945, 0.2122, 0.2287, 0.1646]) -Greedy action tensor([ 0.8059, -0.2711, 0.1228, -0.0664]) tensor([0.4418, 0.1505, 0.2231, 0.1847]) -Greedy action tensor([ 1.2658, -0.8787, -0.0059, -0.5037]) tensor([0.6378, 0.0747, 0.1788, 0.1087]) -Greedy action tensor([ 0.4416, -0.0779, -0.1601, -0.1559]) tensor([0.3714, 0.2209, 0.2035, 0.2043]) -Greedy action tensor([-0.3326, -0.2967, 1.1345, 1.6618]) tensor([0.0729, 0.0755, 0.3160, 0.5355]) -Greedy action tensor([-1.1715, -0.5384, 0.9443, 1.1951]) tensor([0.0458, 0.0862, 0.3798, 0.4881]) -Greedy action tensor([-1.2890, -0.5402, 1.1537, 1.1528]) tensor([0.0383, 0.0810, 0.4406, 0.4402]) -Greedy action tensor([-1.0815, -0.4255, 0.3593, 0.5872]) tensor([0.0803, 0.1547, 0.3391, 0.4259]) -Greedy action tensor([-1.4329, -0.5328, 0.7344, 0.7955]) tensor([0.0466, 0.1145, 0.4067, 0.4323]) -Greedy action tensor([-1.1119, -0.6172, 0.2711, 0.4271]) tensor([0.0886, 0.1453, 0.3532, 0.4129]) -Greedy action tensor([-1.7217, -0.4029, 0.5494, -0.0534]) tensor([0.0507, 0.1895, 0.4911, 0.2687]) -Greedy action tensor([-1.4624, 0.3904, 0.8309, 0.8138]) tensor([0.0370, 0.2360, 0.3666, 0.3604]) -Greedy action tensor([-1.1322, -0.6984, 0.2383, 0.1011]) tensor([0.1009, 0.1557, 0.3972, 0.3463]) -Greedy action tensor([-1.5672, -0.5714, 0.5416, 0.2299]) tensor([0.0556, 0.1506, 0.4583, 0.3355]) -Greedy action tensor([-1.4198, -0.5948, 0.3912, 0.1541]) tensor([0.0703, 0.1604, 0.4300, 0.3392]) -Greedy action tensor([-1.5573, -0.4777, 1.2073, 0.9251]) tensor([0.0315, 0.0926, 0.4994, 0.3766]) -Greedy action tensor([-1.4124, -0.1523, 0.3203, 0.2099]) tensor([0.0656, 0.2313, 0.3710, 0.3322]) -Greedy action tensor([-1.7247, -0.4296, 0.6190, 0.1793]) tensor([0.0459, 0.1676, 0.4783, 0.3082]) -Greedy action tensor([-1.6527, -0.1432, 0.4387, 0.0268]) tensor([0.0527, 0.2383, 0.4265, 0.2825]) -Greedy action tensor([-2.0032, -0.6386, 1.3988, 0.6402]) tensor([0.0204, 0.0799, 0.6127, 0.2870]) -Greedy action tensor([-1.3764, -0.6437, 0.4297, 0.3818]) tensor([0.0668, 0.1390, 0.4066, 0.3876]) -Greedy action tensor([-0.5079, 0.8515, -1.0020, -0.3859]) tensor([0.1508, 0.5870, 0.0920, 0.1703]) -Greedy action tensor([-1.3810, -0.5546, 0.4390, 0.2593]) tensor([0.0684, 0.1564, 0.4223, 0.3529]) -Greedy action tensor([-1.1290, -0.5882, 0.3644, 0.1028]) tensor([0.0944, 0.1621, 0.4201, 0.3234]) -Greedy action tensor([-0.7274, -0.5076, 1.0791, 1.6282]) tensor([0.0530, 0.0660, 0.3225, 0.5585]) -Greedy action tensor([-1.9070, -0.4453, 0.6443, -0.1589]) tensor([0.0419, 0.1806, 0.5370, 0.2405]) -Greedy action tensor([-1.3403, -0.5289, 0.4720, 0.6020]) tensor([0.0612, 0.1377, 0.3746, 0.4266]) -Greedy action tensor([-1.6708, -0.3403, 0.7333, 0.2783]) tensor([0.0437, 0.1654, 0.4839, 0.3070]) -Greedy action tensor([-1.4743, -0.4775, 0.4249, 0.0958]) tensor([0.0658, 0.1783, 0.4396, 0.3163]) -Greedy action tensor([-1.8190, -0.4407, 0.6401, -0.0414]) tensor([0.0443, 0.1757, 0.5179, 0.2620]) -Greedy action tensor([-0.5548, -0.5770, 0.2004, 0.1264]) tensor([0.1644, 0.1608, 0.3499, 0.3249]) -Greedy action tensor([-1.8865, -0.3612, 0.6300, -0.1379]) tensor([0.0421, 0.1937, 0.5220, 0.2422]) -Greedy action tensor([-1.8222, -0.4387, 0.6163, -0.0789]) tensor([0.0451, 0.1800, 0.5169, 0.2579]) -Greedy action tensor([-1.6913, -0.4182, 0.7020, 0.4444]) tensor([0.0417, 0.1489, 0.4565, 0.3528]) -Greedy action tensor([0.1411, 0.3110, 0.8903, 1.7024]) tensor([0.1103, 0.1307, 0.2333, 0.5256]) -Greedy action tensor([-1.9129, -0.2010, 0.6932, 0.0103]) tensor([0.0371, 0.2057, 0.5030, 0.2541]) -Greedy action tensor([-1.5005, -0.7618, 0.7653, 0.6214]) tensor([0.0474, 0.0993, 0.4573, 0.3960]) -Greedy action tensor([-1.3978, -0.6090, 0.3895, 0.1169]) tensor([0.0729, 0.1604, 0.4353, 0.3314]) -Greedy action tensor([-1.3204, -0.4601, 0.4980, 0.6864]) tensor([0.0589, 0.1393, 0.3632, 0.4385]) -Greedy action tensor([-1.3593, -0.6534, 0.4030, 0.1561]) tensor([0.0746, 0.1511, 0.4347, 0.3396]) -Greedy action tensor([-1.5016, -0.5888, 0.4608, 0.0816]) tensor([0.0646, 0.1610, 0.4598, 0.3147]) -Greedy action tensor([-1.1598, -0.5532, 0.2542, 0.2758]) tensor([0.0897, 0.1645, 0.3689, 0.3769]) -Greedy action tensor([-0.9727, -0.5267, 0.4927, -0.3216]) tensor([0.1135, 0.1773, 0.4915, 0.2177]) -Greedy action tensor([-0.6624, -0.2789, -0.7596, -0.1255]) tensor([0.1966, 0.2885, 0.1784, 0.3364]) -Greedy action tensor([-1.5830, -0.5150, 0.4679, 0.0838]) tensor([0.0589, 0.1714, 0.4579, 0.3119]) -Greedy action tensor([-1.0090, -0.6602, 0.4660, 0.3231]) tensor([0.0945, 0.1340, 0.4132, 0.3582]) -Greedy action tensor([-1.8005, -0.3498, 0.5702, -0.0802]) tensor([0.0464, 0.1979, 0.4966, 0.2591]) -Greedy action tensor([-1.3609, 0.4709, 0.2011, 0.2419]) tensor([0.0589, 0.3678, 0.2808, 0.2925]) -Greedy action tensor([-1.8806, -0.4204, 0.6322, -0.1387]) tensor([0.0428, 0.1844, 0.5284, 0.2444]) -Greedy action tensor([-1.3865, 0.2088, -0.1924, -0.1749]) tensor([0.0794, 0.3916, 0.2622, 0.2668]) -Greedy action tensor([-0.2906, -0.1135, 0.9495, 1.7264]) tensor([0.0760, 0.0907, 0.2625, 0.5709]) -Greedy action tensor([-1.4609, -0.5588, 0.4737, 0.3033]) tensor([0.0616, 0.1519, 0.4266, 0.3598]) -Greedy action tensor([-1.7600, -0.4977, 0.5793, -0.0429]) tensor([0.0488, 0.1726, 0.5066, 0.2720]) -Greedy action tensor([-0.9661, -0.6101, 0.5390, -0.3537]) tensor([0.1139, 0.1627, 0.5132, 0.2102]) -Greedy action tensor([-1.4296, -0.1128, 0.4282, -0.1119]) tensor([0.0672, 0.2508, 0.4309, 0.2511]) -Greedy action tensor([-1.7423, -0.4740, 0.5471, -0.0672]) tensor([0.0506, 0.1799, 0.4993, 0.2702]) -Greedy action tensor([-1.8016, -0.4970, 0.6542, -0.0256]) tensor([0.0449, 0.1657, 0.5239, 0.2655]) -Greedy action tensor([-1.6874, -0.5352, 0.5362, -0.0355]) tensor([0.0537, 0.1700, 0.4962, 0.2801]) -Greedy action tensor([-1.0559, -0.5339, 0.4306, 1.0127]) tensor([0.0666, 0.1122, 0.2944, 0.5269]) -Greedy action tensor([-1.8107, -0.7034, 0.1762, -0.2891]) tensor([0.0629, 0.1903, 0.4587, 0.2881]) -Greedy action tensor([-1.0632, -0.4570, -0.2966, -0.0860]) tensor([0.1308, 0.2399, 0.2816, 0.3476]) -Greedy action tensor([-1.4335, -0.5781, 0.6108, 0.5078]) tensor([0.0554, 0.1304, 0.4281, 0.3861]) -Greedy action tensor([-0.5641, -0.5625, 0.1783, 0.0848]) tensor([0.1662, 0.1665, 0.3492, 0.3181]) -Greedy action tensor([-1.2559, -0.5636, 0.2807, 0.2919]) tensor([0.0810, 0.1618, 0.3765, 0.3807]) -Greedy action tensor([-1.5190, -0.4589, 0.5484, 0.1971]) tensor([0.0576, 0.1663, 0.4555, 0.3205]) -Greedy action tensor([-0.6797, -0.5511, 0.2298, 0.2583]) tensor([0.1394, 0.1585, 0.3461, 0.3561]) -Greedy action tensor([-0.9800, -0.6025, 0.2156, 0.3357]) tensor([0.1054, 0.1537, 0.3483, 0.3927]) -Greedy action tensor([-1.7677, -0.4673, 0.5937, -0.0344]) tensor([0.0478, 0.1753, 0.5066, 0.2703]) -Greedy action tensor([-0.7999, -0.1404, 1.2249, 1.4540]) tensor([0.0499, 0.0965, 0.3781, 0.4754]) -Greedy action tensor([-2.0151, -0.8842, 0.4193, -0.1872]) tensor([0.0460, 0.1426, 0.5251, 0.2863]) -Greedy action tensor([-1.3753, -0.4147, 0.6159, 0.7563]) tensor([0.0516, 0.1349, 0.3782, 0.4352]) -Greedy action tensor([-1.8626, -0.4629, 0.6320, -0.1212]) tensor([0.0437, 0.1772, 0.5297, 0.2494]) -Greedy action tensor([-1.5964, -0.6702, 0.5098, 0.0550]) tensor([0.0590, 0.1489, 0.4846, 0.3075]) -Greedy action tensor([-1.3405, -0.4236, 0.1244, -0.7296]) tensor([0.1034, 0.2587, 0.4474, 0.1905]) -Greedy action tensor([-0.9555, -0.5355, 0.1650, -0.4983]) tensor([0.1395, 0.2123, 0.4278, 0.2204]) -Greedy action tensor([-1.2239, -1.0795, 0.5594, -0.5150]) tensor([0.0987, 0.1140, 0.5869, 0.2004]) -Greedy action tensor([-1.8726, -0.4659, 0.6870, -0.0641]) tensor([0.0415, 0.1693, 0.5362, 0.2530]) -Greedy action tensor([-1.7962, -0.4271, 0.6625, 0.0086]) tensor([0.0441, 0.1732, 0.5150, 0.2678]) -Greedy action tensor([-1.7341, -0.3290, 0.6077, 0.0894]) tensor([0.0461, 0.1881, 0.4799, 0.2858]) -Greedy action tensor([-1.1407, -0.5315, 0.3500, 0.7324]) tensor([0.0725, 0.1334, 0.3220, 0.4721]) -Greedy action tensor([-0.8533, -0.6933, 0.5066, 0.7151]) tensor([0.0920, 0.1080, 0.3584, 0.4416]) -Greedy action tensor([-1.3666, 0.4593, 0.6402, -0.6952]) tensor([0.0602, 0.3739, 0.4480, 0.1179]) -Greedy action tensor([-1.8634, -0.4395, 0.6357, -0.1136]) tensor([0.0433, 0.1800, 0.5274, 0.2493]) -Greedy action tensor([-1.0017, -0.6130, 0.5704, 1.2437]) tensor([0.0597, 0.0881, 0.2878, 0.5643]) -Greedy action tensor([-1.6109, -0.4278, 0.4860, 0.0192]) tensor([0.0571, 0.1864, 0.4649, 0.2915]) -Greedy action tensor([ 1.1686, -0.3065, -0.2069, -0.0743]) tensor([0.5650, 0.1292, 0.1428, 0.1630]) -Greedy action tensor([ 1.0748, -0.5242, -0.0231, 0.0963]) tensor([0.5231, 0.1057, 0.1745, 0.1966]) -Greedy action tensor([ 1.3297, -0.7053, -0.2803, 0.2409]) tensor([0.5998, 0.0784, 0.1199, 0.2019]) -Greedy action tensor([ 1.1102, -0.8425, -0.5220, 0.8567]) tensor([0.4732, 0.0671, 0.0925, 0.3672]) -Greedy action tensor([ 0.9113, -0.3714, 0.0050, 0.5274]) tensor([0.4233, 0.1174, 0.1710, 0.2883]) -Greedy action tensor([ 1.3316, -0.7339, -0.3028, 0.4738]) tensor([0.5728, 0.0726, 0.1117, 0.2429]) -Greedy action tensor([ 2.0221, -0.7353, -0.1438, 0.2790]) tensor([0.7390, 0.0469, 0.0847, 0.1293]) -Greedy action tensor([ 1.0354, 0.1906, 0.3410, -0.3531]) tensor([0.4590, 0.1972, 0.2292, 0.1145]) -Greedy action tensor([ 1.4533, -0.0849, -0.5855, 0.1954]) tensor([0.6138, 0.1318, 0.0799, 0.1745]) -Greedy action tensor([ 2.0944, -0.5385, -0.5364, 0.1499]) tensor([0.7770, 0.0558, 0.0560, 0.1112]) -Greedy action tensor([ 0.0486, -0.1217, -0.0666, 0.0306]) tensor([0.2691, 0.2269, 0.2398, 0.2643]) -Greedy action tensor([ 1.4630, -0.2658, -0.3895, -0.0806]) tensor([0.6460, 0.1147, 0.1013, 0.1380]) -Greedy action tensor([ 0.7115, -0.4768, -0.1849, 0.4189]) tensor([0.4067, 0.1239, 0.1659, 0.3035]) -Greedy action tensor([ 0.8485, -0.3177, -0.0848, -0.0616]) tensor([0.4745, 0.1478, 0.1866, 0.1910]) -Greedy action tensor([ 0.7581, -0.5288, -0.2514, 0.5319]) tensor([0.4102, 0.1133, 0.1495, 0.3271]) -Greedy action tensor([ 0.9560, -0.4714, -0.6338, -0.1398]) tensor([0.5624, 0.1349, 0.1147, 0.1880]) -Greedy action tensor([ 1.0164, -0.4961, -0.1642, 0.2641]) tensor([0.5003, 0.1102, 0.1536, 0.2358]) -Greedy action tensor([ 1.0895, -0.5792, -0.3001, 0.7115]) tensor([0.4711, 0.0888, 0.1174, 0.3228]) -Greedy action tensor([ 1.8199, -1.0095, -0.1887, 0.0787]) tensor([0.7307, 0.0431, 0.0980, 0.1281]) -Greedy action tensor([ 1.2423, -0.6628, 0.0932, 0.1083]) tensor([0.5594, 0.0833, 0.1773, 0.1800]) -Greedy action tensor([ 1.5615, -0.0896, -0.2814, 0.2973]) tensor([0.6125, 0.1175, 0.0970, 0.1730]) -Greedy action tensor([ 1.7023, -0.2192, -0.3374, -0.0825]) tensor([0.6924, 0.1014, 0.0901, 0.1162]) -Greedy action tensor([ 0.9863, -0.2406, -0.0508, 0.2100]) tensor([0.4744, 0.1391, 0.1682, 0.2183]) -Greedy action tensor([ 1.6419, -0.7929, -0.2838, 0.6044]) tensor([0.6298, 0.0552, 0.0918, 0.2232]) -Greedy action tensor([ 0.7896, -0.4279, 0.1215, 0.0609]) tensor([0.4365, 0.1292, 0.2238, 0.2106]) -Greedy action tensor([ 1.1411, -0.2016, -0.3097, 0.0615]) tensor([0.5449, 0.1423, 0.1277, 0.1851]) -Greedy action tensor([ 1.5831, -0.8896, -0.4569, 0.7876]) tensor([0.6003, 0.0506, 0.0781, 0.2710]) -Greedy action tensor([ 1.2486, -0.7504, -0.2744, 0.5742]) tensor([0.5368, 0.0727, 0.1170, 0.2735]) -Greedy action tensor([ 1.3750, -0.8780, -0.2582, 0.2752]) tensor([0.6123, 0.0643, 0.1196, 0.2038]) -Greedy action tensor([ 1.0826, -0.1981, -0.4297, -0.2063]) tensor([0.5638, 0.1566, 0.1242, 0.1554]) -Greedy action tensor([ 0.7458, -0.3738, -0.0650, -0.1663]) tensor([0.4603, 0.1502, 0.2046, 0.1849]) -Greedy action tensor([ 1.1416, -0.6152, -0.2401, 0.0827]) tensor([0.5648, 0.0975, 0.1418, 0.1959]) -Greedy action tensor([ 1.2369, -0.3019, -0.1022, 0.6223]) tensor([0.4956, 0.1064, 0.1299, 0.2681]) -Greedy action tensor([ 0.9560, -0.7681, -0.2270, 0.1720]) tensor([0.5151, 0.0919, 0.1578, 0.2352]) -Greedy action tensor([ 1.2163, -0.7613, -0.1714, 0.3497]) tensor([0.5530, 0.0765, 0.1380, 0.2324]) -Greedy action tensor([ 0.7111, -0.1968, 0.2531, 0.1023]) tensor([0.3876, 0.1563, 0.2452, 0.2109]) -Greedy action tensor([ 0.9907, -0.2804, -0.1832, 0.0945]) tensor([0.5006, 0.1404, 0.1547, 0.2043]) -Greedy action tensor([ 1.4504, -0.2562, -0.3285, 0.2011]) tensor([0.6109, 0.1109, 0.1031, 0.1751]) -Greedy action tensor([ 0.7735, -0.3184, 0.1008, -0.0356]) tensor([0.4365, 0.1465, 0.2227, 0.1943]) -Greedy action tensor([ 0.8223, -0.1753, 0.1256, -0.1282]) tensor([0.4438, 0.1636, 0.2211, 0.1715]) -Greedy action tensor([ 1.1293, -0.2775, -0.0050, 0.0071]) tensor([0.5285, 0.1294, 0.1700, 0.1721]) -Greedy action tensor([ 0.9860, -0.5046, -0.3277, 0.4480]) tensor([0.4812, 0.1084, 0.1294, 0.2810]) -Greedy action tensor([ 0.9742, -0.1767, -0.3291, 0.2062]) tensor([0.4874, 0.1542, 0.1324, 0.2261]) -Greedy action tensor([ 0.9578, -0.5317, -0.4682, 0.4131]) tensor([0.4888, 0.1102, 0.1174, 0.2835]) -Greedy action tensor([ 1.3065, -0.1086, -0.4708, -0.0856]) tensor([0.6022, 0.1463, 0.1018, 0.1497]) -Greedy action tensor([ 1.5067, -0.0156, -0.2187, 0.2307]) tensor([0.5969, 0.1302, 0.1063, 0.1666]) -Greedy action tensor([ 1.2485, -0.4139, -0.1778, 0.4159]) tensor([0.5362, 0.1017, 0.1288, 0.2332]) -Greedy action tensor([ 1.3333, -0.4141, -0.5310, 0.1607]) tensor([0.6102, 0.1063, 0.0946, 0.1889]) -Greedy action tensor([ 1.2606, -0.6108, -0.5626, 0.5991]) tensor([0.5460, 0.0840, 0.0882, 0.2818]) -Greedy action tensor([ 0.4237, -0.1696, -0.0550, -0.0408]) tensor([0.3571, 0.1973, 0.2212, 0.2244]) -Greedy action tensor([ 1.1315, -0.5581, -0.2503, 0.2929]) tensor([0.5353, 0.0988, 0.1344, 0.2314]) -Greedy action tensor([ 1.0510, -0.3753, -0.1044, 0.2322]) tensor([0.5010, 0.1203, 0.1578, 0.2209]) -Greedy action tensor([ 0.9630, -0.1816, 0.1387, -0.0857]) tensor([0.4746, 0.1511, 0.2081, 0.1663]) -Greedy action tensor([ 1.4720, -0.7893, -0.3719, 0.6870]) tensor([0.5819, 0.0606, 0.0921, 0.2654]) -Greedy action tensor([ 1.3374, -0.4974, -0.1446, 0.0926]) tensor([0.5971, 0.0953, 0.1356, 0.1720]) -Greedy action tensor([ 0.6604, -0.5387, -0.1488, 0.1809]) tensor([0.4227, 0.1274, 0.1882, 0.2617]) -Greedy action tensor([ 1.1884, -0.0659, -0.2477, 0.1694]) tensor([0.5308, 0.1514, 0.1262, 0.1916]) -Greedy action tensor([ 1.1124, -0.2786, -0.2633, 0.2066]) tensor([0.5247, 0.1306, 0.1326, 0.2121]) -Greedy action tensor([ 1.0943, 0.2067, -0.1279, -0.1793]) tensor([0.5035, 0.2073, 0.1483, 0.1409]) -Greedy action tensor([ 1.2038, -0.2347, -0.2131, 0.2217]) tensor([0.5393, 0.1280, 0.1308, 0.2020]) -Greedy action tensor([ 1.4084, -0.3839, -0.3461, -0.0122]) tensor([0.6325, 0.1053, 0.1094, 0.1528]) -Greedy action tensor([ 1.1467, -0.7867, 0.0520, -0.4037]) tensor([0.5912, 0.0855, 0.1978, 0.1254]) -Greedy action tensor([ 0.9337, -0.5791, -0.2079, 0.2062]) tensor([0.4944, 0.1089, 0.1579, 0.2389]) -Greedy action tensor([ 1.3822, -0.2662, -0.2324, 0.3410]) tensor([0.5733, 0.1103, 0.1141, 0.2024]) -Greedy action tensor([ 0.6623, -0.4518, -0.1979, 0.4455]) tensor([0.3912, 0.1284, 0.1655, 0.3149]) -Greedy action tensor([ 0.9897, -0.4768, -0.3807, 0.4331]) tensor([0.4859, 0.1121, 0.1234, 0.2785]) -Greedy action tensor([ 0.4583, -0.3155, -0.0425, 0.2031]) tensor([0.3518, 0.1623, 0.2132, 0.2726]) -Greedy action tensor([ 1.2556, -0.6710, -0.2672, 0.4329]) tensor([0.5546, 0.0808, 0.1210, 0.2436]) -Greedy action tensor([ 0.8514, -0.5862, -0.1452, 0.2788]) tensor([0.4607, 0.1094, 0.1701, 0.2599]) -Greedy action tensor([ 1.0689, -0.2244, -0.3010, 0.1245]) tensor([0.5215, 0.1431, 0.1325, 0.2028]) -Greedy action tensor([ 0.9983, -0.4184, -0.1138, 0.1545]) tensor([0.4996, 0.1212, 0.1643, 0.2149]) -Greedy action tensor([ 0.6646, -0.2867, -0.0777, -0.2104]) tensor([0.4388, 0.1695, 0.2089, 0.1829]) -Greedy action tensor([ 1.2720, -0.4279, -0.1830, 0.2190]) tensor([0.5666, 0.1035, 0.1322, 0.1977]) -Greedy action tensor([ 0.7104, -0.1943, -0.1592, -0.0027]) tensor([0.4322, 0.1749, 0.1811, 0.2118]) -Greedy action tensor([ 1.5807, -0.3263, -0.3936, 0.1215]) tensor([0.6580, 0.0977, 0.0914, 0.1529]) -Greedy action tensor([ 1.1728, -0.3045, -0.0151, -0.0160]) tensor([0.5442, 0.1242, 0.1659, 0.1657]) -Greedy action tensor([ 1.6687, -0.2715, -0.4592, 0.1630]) tensor([0.6736, 0.0968, 0.0802, 0.1494]) -Greedy action tensor([ 1.3820, -0.4725, -0.1081, -0.0262]) tensor([0.6148, 0.0962, 0.1386, 0.1504]) -Greedy action tensor([ 0.3088, -0.1712, -0.0595, -0.1232]) tensor([0.3378, 0.2091, 0.2338, 0.2193]) -Greedy action tensor([ 1.0245, -0.2486, 0.0200, 0.0319]) tensor([0.4958, 0.1388, 0.1816, 0.1838]) -Greedy action tensor([ 0.8549, -0.4816, -0.2298, 0.3766]) tensor([0.4503, 0.1183, 0.1522, 0.2791]) -Greedy action tensor([ 0.7023, 0.1181, -0.6733, -0.7920]) tensor([0.4915, 0.2740, 0.1242, 0.1103]) -Greedy action tensor([1.1106, 0.5294, 0.0131, 0.6316]) tensor([0.3980, 0.2226, 0.1328, 0.2465]) -Greedy action tensor([-0.6442, -1.6317, 0.7350, -0.3742]) tensor([0.1503, 0.0560, 0.5969, 0.1969]) -Greedy action tensor([ 1.6060, -0.2186, 0.5113, 0.7833]) tensor([0.5168, 0.0833, 0.1729, 0.2270]) -Greedy action tensor([-0.0145, -0.6165, 1.2588, 1.3048]) tensor([0.1129, 0.0618, 0.4032, 0.4222]) -Greedy action tensor([ 1.7380, 0.9888, 0.5467, -0.2252]) tensor([0.5217, 0.2466, 0.1585, 0.0732]) -Greedy action tensor([ 0.1150, 0.3976, -0.1125, -0.1436]) tensor([0.2567, 0.3406, 0.2045, 0.1982]) -Greedy action tensor([ 0.5311, -0.8290, 0.9223, 0.1269]) tensor([0.2939, 0.0754, 0.4346, 0.1962]) -Greedy action tensor([0.7596, 1.0300, 0.4432, 0.4256]) tensor([0.2663, 0.3490, 0.1941, 0.1907]) -Greedy action tensor([-0.8528, 0.4646, 1.9872, -0.9668]) tensor([0.0440, 0.1642, 0.7526, 0.0392]) -Greedy action tensor([1.4881, 0.3658, 1.7886, 1.3861]) tensor([0.2794, 0.0910, 0.3774, 0.2523]) -Greedy action tensor([ 0.8498, -0.1677, 1.5469, -0.0540]) tensor([0.2649, 0.0958, 0.5320, 0.1073]) -Greedy action tensor([ 0.7871, 0.4459, -1.2805, 0.9410]) tensor([0.3329, 0.2367, 0.0421, 0.3883]) -Greedy action tensor([-0.0585, -0.9864, -0.4803, 0.3013]) tensor([0.2870, 0.1135, 0.1882, 0.4113]) -Greedy action tensor([ 1.3014, -0.5193, 0.3626, 2.1325]) tensor([0.2598, 0.0421, 0.1016, 0.5965]) -Greedy action tensor([ 0.5960, -1.1976, -0.0461, 0.9284]) tensor([0.3239, 0.0539, 0.1705, 0.4517]) -Greedy action tensor([ 1.1111, 0.7079, -0.2382, 0.2212]) tensor([0.4277, 0.2858, 0.1109, 0.1756]) -Greedy action tensor([ 0.7917, -0.7511, 0.4284, 1.3431]) tensor([0.2744, 0.0587, 0.1908, 0.4762]) -Greedy action tensor([ 2.4246, -1.3382, 1.6245, 1.1523]) tensor([0.5705, 0.0132, 0.2563, 0.1599]) -Greedy action tensor([ 0.7478, 0.1653, 0.7205, -0.2068]) tensor([0.3429, 0.1915, 0.3336, 0.1320]) -Greedy action tensor([ 1.4981, 0.3457, -0.6395, 1.0448]) tensor([0.4832, 0.1526, 0.0570, 0.3071]) -Greedy action tensor([ 0.9156, -0.0517, 0.3011, 0.5728]) tensor([0.3801, 0.1445, 0.2056, 0.2698]) -Greedy action tensor([0.8594, 0.6468, 1.0975, 0.0892]) tensor([0.2825, 0.2284, 0.3584, 0.1308]) -Greedy action tensor([-0.2026, -0.9393, 0.4078, 1.0128]) tensor([0.1494, 0.0715, 0.2751, 0.5039]) -Greedy action tensor([ 1.3138, -1.6363, 0.7353, 1.2061]) tensor([0.3983, 0.0208, 0.2233, 0.3576]) -Greedy action tensor([-0.3911, -0.7239, 0.5076, 0.2233]) tensor([0.1661, 0.1190, 0.4079, 0.3070]) -Greedy action tensor([-0.2966, -1.0146, -0.0153, -0.0452]) tensor([0.2440, 0.1190, 0.3233, 0.3137]) -Greedy action tensor([-0.1096, 0.6049, 0.7135, -0.5324]) tensor([0.1673, 0.3419, 0.3811, 0.1096]) -Greedy action tensor([ 0.2113, -0.5398, 1.2404, 0.2151]) tensor([0.1896, 0.0895, 0.5306, 0.1903]) -Greedy action tensor([ 0.8442, 0.3282, -0.7344, 0.8897]) tensor([0.3509, 0.2095, 0.0724, 0.3672]) -Greedy action tensor([-0.7675, -2.0822, -0.2257, -0.4840]) tensor([0.2317, 0.0622, 0.3984, 0.3077]) -Greedy action tensor([ 0.6917, -1.1189, 0.1497, 0.0852]) tensor([0.4366, 0.0714, 0.2539, 0.2381]) -Greedy action tensor([1.4808, 0.0527, 1.7997, 0.8272]) tensor([0.3189, 0.0765, 0.4387, 0.1659]) -Greedy action tensor([-0.0680, 0.6888, -0.9061, 1.2853]) tensor([0.1345, 0.2867, 0.0582, 0.5206]) -Greedy action tensor([-0.3428, -0.7090, -0.1240, 1.0770]) tensor([0.1414, 0.0980, 0.1759, 0.5847]) -Greedy action tensor([0.3949, 0.6331, 0.4519, 1.1224]) tensor([0.1853, 0.2351, 0.1961, 0.3835]) -Greedy action tensor([-0.7928, -0.0964, -0.3228, -0.5919]) tensor([0.1716, 0.3442, 0.2745, 0.2097]) -Greedy action tensor([ 0.7481, -0.4289, 0.6319, 1.1958]) tensor([0.2657, 0.0819, 0.2366, 0.4158]) -Greedy action tensor([ 1.5305, -1.1017, -0.1570, 1.3511]) tensor([0.4779, 0.0344, 0.0884, 0.3994]) -Greedy action tensor([ 0.8652, -0.5750, 0.3532, 0.5109]) tensor([0.3940, 0.0933, 0.2361, 0.2765]) -Greedy action tensor([ 0.3864, -0.9981, 0.6108, 0.2242]) tensor([0.2983, 0.0747, 0.3734, 0.2536]) -Greedy action tensor([ 0.1052, 0.8107, 0.2251, -0.2957]) tensor([0.2074, 0.4199, 0.2338, 0.1389]) -Greedy action tensor([ 1.0219, -1.7618, 1.8279, 0.7421]) tensor([0.2465, 0.0152, 0.5519, 0.1863]) -Greedy action tensor([ 1.5150, 0.3212, -0.2797, 1.0732]) tensor([0.4735, 0.1435, 0.0787, 0.3044]) -Greedy action tensor([1.4014, 0.5136, 0.9973, 0.5323]) tensor([0.4002, 0.1647, 0.2672, 0.1678]) -Greedy action tensor([ 0.8603, 0.4745, -0.3291, -0.2680]) tensor([0.4333, 0.2946, 0.1319, 0.1402]) -Greedy action tensor([-0.0609, -0.4965, 0.2159, -0.4246]) tensor([0.2732, 0.1767, 0.3603, 0.1899]) -Greedy action tensor([ 0.6153, 0.2539, 0.2574, -0.9537]) tensor([0.3840, 0.2675, 0.2685, 0.0800]) -Greedy action tensor([ 1.7866, -1.9279, 0.3307, 0.2965]) tensor([0.6744, 0.0164, 0.1573, 0.1520]) -Greedy action tensor([-0.3664, -0.5063, 0.4067, -0.0991]) tensor([0.1872, 0.1627, 0.4055, 0.2445]) -Greedy action tensor([-0.4026, 0.3520, 0.5563, 1.0222]) tensor([0.1011, 0.2150, 0.2637, 0.4202]) -Greedy action tensor([1.1201, 0.5004, 1.1458, 0.5505]) tensor([0.3195, 0.1719, 0.3278, 0.1808]) -Greedy action tensor([ 0.9205, -0.1621, 0.8157, 0.2148]) tensor([0.3659, 0.1239, 0.3295, 0.1807]) -Greedy action tensor([ 0.9244, -1.5186, -0.1647, 1.2511]) tensor([0.3559, 0.0309, 0.1198, 0.4934]) -Greedy action tensor([ 0.4245, -0.5571, -0.1669, -0.3118]) tensor([0.4154, 0.1557, 0.2300, 0.1989]) -Greedy action tensor([ 1.2850, 0.5456, -0.3137, 0.7487]) tensor([0.4416, 0.2108, 0.0893, 0.2583]) -Greedy action tensor([ 0.2183, 0.0457, -0.2809, 0.3541]) tensor([0.2782, 0.2341, 0.1689, 0.3187]) -Greedy action tensor([ 1.1073, -0.4623, 0.6545, 1.6001]) tensor([0.2873, 0.0598, 0.1827, 0.4702]) -Greedy action tensor([ 0.5887, -1.6857, -0.2773, 0.8680]) tensor([0.3514, 0.0361, 0.1478, 0.4646]) -Greedy action tensor([ 0.9086, -1.4334, 2.7368, 0.3888]) tensor([0.1264, 0.0121, 0.7863, 0.0751]) -Greedy action tensor([ 1.2328, -0.9309, 0.3242, 1.1616]) tensor([0.4083, 0.0469, 0.1646, 0.3802]) -Greedy action tensor([-1.1878, -1.4610, 0.3726, -0.8695]) tensor([0.1266, 0.0964, 0.6029, 0.1741]) -Greedy action tensor([ 0.9244, 0.6074, -0.5253, 0.2203]) tensor([0.4069, 0.2964, 0.0955, 0.2012]) -Greedy action tensor([1.4589, 0.1482, 0.1919, 1.7686]) tensor([0.3431, 0.0925, 0.0966, 0.4677]) -Greedy action tensor([-0.0031, -1.7328, 0.7766, 0.6139]) tensor([0.1919, 0.0340, 0.4185, 0.3556]) -Greedy action tensor([ 0.8464, -0.7724, 1.9761, 0.6333]) tensor([0.1960, 0.0388, 0.6067, 0.1584]) -Greedy action tensor([ 0.1629, -0.0910, -0.4118, 0.3637]) tensor([0.2808, 0.2178, 0.1581, 0.3433]) -Greedy action tensor([ 0.7432, -1.9513, 0.9558, 1.0539]) tensor([0.2726, 0.0184, 0.3371, 0.3719]) -Greedy action tensor([ 1.4348, -0.6304, 1.0346, 0.1936]) tensor([0.4794, 0.0608, 0.3213, 0.1386]) -Greedy action tensor([ 2.2201, -0.3035, 0.7446, 1.2161]) tensor([0.5969, 0.0479, 0.1365, 0.2187]) -Greedy action tensor([ 0.7055, -0.9369, -0.2359, 0.1381]) tensor([0.4650, 0.0900, 0.1814, 0.2637]) -Greedy action tensor([-0.0998, 0.1706, 0.0204, 0.6904]) tensor([0.1772, 0.2323, 0.1999, 0.3906]) -Greedy action tensor([ 1.3076, -0.4824, 1.5984, 1.4327]) tensor([0.2749, 0.0459, 0.3677, 0.3115]) -Greedy action tensor([ 0.4818, -0.0345, 0.8476, 0.1386]) tensor([0.2668, 0.1592, 0.3847, 0.1893]) -Greedy action tensor([-0.0716, -0.4360, -0.3225, 0.4342]) tensor([0.2421, 0.1681, 0.1884, 0.4014]) -Greedy action tensor([ 0.9396, -1.3302, 1.1859, -0.6091]) tensor([0.3853, 0.0398, 0.4930, 0.0819]) -Greedy action tensor([-0.6441, 0.0715, 0.8872, 0.2916]) tensor([0.0979, 0.2002, 0.4525, 0.2494]) -Greedy action tensor([1.5227, 0.0490, 0.4293, 0.3629]) tensor([0.5326, 0.1220, 0.1785, 0.1670]) -Greedy action tensor([ 1.1866, -0.6530, 0.1248, 0.7564]) tensor([0.4640, 0.0737, 0.1605, 0.3018]) -Greedy action tensor([1.1438, 0.9409, 0.2363, 0.5278]) tensor([0.3623, 0.2958, 0.1462, 0.1957]) -Greedy action tensor([ 0.3938, -0.7817, -0.0455, 1.6336]) tensor([0.1849, 0.0571, 0.1192, 0.6388]) -Greedy action tensor([ 1.1529, -0.4173, 1.1379, 0.0710]) tensor([0.3949, 0.0821, 0.3891, 0.1339]) -Greedy action tensor([ 0.3855, -0.1322, -0.0023, -0.0687]) tensor([0.3437, 0.2048, 0.2332, 0.2182]) -Greedy action tensor([ 0.9227, -0.6329, -0.0070, -0.4835]) tensor([0.5403, 0.1140, 0.2133, 0.1324]) -Greedy action tensor([ 0.3616, 0.0874, -0.0416, -0.1954]) tensor([0.3332, 0.2533, 0.2226, 0.1909]) -Greedy action tensor([ 0.5573, -0.1796, -0.0033, -0.1684]) tensor([0.3947, 0.1889, 0.2253, 0.1910]) -Greedy action tensor([ 7.5907e-01, -3.3738e-01, -3.6821e-04, -4.3402e-01]) tensor([0.4750, 0.1587, 0.2223, 0.1441]) -Greedy action tensor([ 0.6157, -0.1382, -0.0289, -0.3348]) tensor([0.4198, 0.1975, 0.2204, 0.1623]) -Greedy action tensor([ 0.7987, -0.4797, 0.0087, -0.2722]) tensor([0.4819, 0.1342, 0.2187, 0.1652]) -Greedy action tensor([ 0.1697, 0.4137, -0.1434, -0.1214]) tensor([0.2663, 0.3399, 0.1947, 0.1991]) -Greedy action tensor([ 0.9677, -0.0445, 0.0791, -0.6106]) tensor([0.5048, 0.1835, 0.2076, 0.1042]) -Greedy action tensor([ 0.5769, -0.2589, -0.0599, -0.3762]) tensor([0.4259, 0.1846, 0.2253, 0.1642]) -Greedy action tensor([ 0.5279, -0.2478, -0.0738, -0.6593]) tensor([0.4323, 0.1990, 0.2368, 0.1319]) -Greedy action tensor([ 0.3789, -0.1554, -0.0041, -0.5192]) tensor([0.3738, 0.2191, 0.2549, 0.1523]) -Greedy action tensor([ 0.3134, -0.2555, -0.2983, -0.4484]) tensor([0.3883, 0.2198, 0.2106, 0.1813]) -Greedy action tensor([ 1.2082, -1.0204, -0.0946, -0.8365]) tensor([0.6627, 0.0714, 0.1801, 0.0858]) -Greedy action tensor([ 0.8388, -0.6531, 0.0252, -0.4520]) tensor([0.5146, 0.1158, 0.2281, 0.1415]) -Greedy action tensor([ 0.9179, -0.1403, 0.1221, -0.9636]) tensor([0.5126, 0.1779, 0.2313, 0.0781]) -Greedy action tensor([ 1.0095, -0.5295, -0.1135, -0.3980]) tensor([0.5603, 0.1202, 0.1823, 0.1371]) -Greedy action tensor([ 0.9878, -0.5507, -0.0248, -0.3510]) tensor([0.5434, 0.1167, 0.1974, 0.1425]) -Greedy action tensor([ 0.5328, -0.3097, -0.0448, -0.4046]) tensor([0.4195, 0.1807, 0.2355, 0.1643]) -Greedy action tensor([ 0.1700, 0.1850, -0.1153, -0.0562]) tensor([0.2805, 0.2848, 0.2109, 0.2238]) -Greedy action tensor([ 0.6217, -0.1244, 0.2174, -0.3280]) tensor([0.3955, 0.1875, 0.2640, 0.1530]) -Greedy action tensor([ 0.7159, -0.5475, 0.1103, -0.4744]) tensor([0.4689, 0.1326, 0.2559, 0.1426]) -Greedy action tensor([ 0.6266, -0.6086, 0.1180, -0.4863]) tensor([0.4503, 0.1309, 0.2708, 0.1480]) -Greedy action tensor([ 0.6304, -0.3126, -0.0255, -0.4533]) tensor([0.4451, 0.1733, 0.2310, 0.1506]) -Greedy action tensor([ 0.8049, -0.6704, -0.0822, -0.2687]) tensor([0.5045, 0.1154, 0.2078, 0.1724]) -Greedy action tensor([ 0.8529, -0.1317, -0.0287, -0.4583]) tensor([0.4861, 0.1816, 0.2013, 0.1310]) -Greedy action tensor([ 0.6039, -0.2485, -0.0655, -0.2493]) tensor([0.4229, 0.1803, 0.2165, 0.1802]) -Greedy action tensor([ 1.2563, -0.7532, -0.0811, -0.5908]) tensor([0.6434, 0.0862, 0.1689, 0.1015]) -Greedy action tensor([ 0.6820, -0.5545, -0.2060, -0.4013]) tensor([0.4901, 0.1423, 0.2017, 0.1659]) -Greedy action tensor([ 0.6487, 0.1204, -0.1624, -0.2944]) tensor([0.4126, 0.2433, 0.1834, 0.1607]) -Greedy action tensor([ 0.9042, -0.4684, 0.0118, -0.3478]) tensor([0.5131, 0.1300, 0.2102, 0.1467]) -Greedy action tensor([ 0.9934, -0.9538, -0.0246, -0.4332]) tensor([0.5734, 0.0818, 0.2072, 0.1377]) -Greedy action tensor([ 7.0386e-01, -2.4491e-01, 6.1852e-04, -3.2630e-01]) tensor([0.4466, 0.1729, 0.2211, 0.1594]) -Greedy action tensor([ 0.5967, -0.4628, 0.1678, -0.5288]) tensor([0.4306, 0.1493, 0.2804, 0.1397]) -Greedy action tensor([ 0.4802, -0.0558, -0.1268, -0.0397]) tensor([0.3670, 0.2147, 0.2000, 0.2182]) -Greedy action tensor([ 0.8491, -0.6381, 0.0432, -0.5499]) tensor([0.5210, 0.1177, 0.2327, 0.1286]) -Greedy action tensor([ 0.4758, 0.0897, 0.0011, -0.3023]) tensor([0.3622, 0.2462, 0.2253, 0.1663]) -Greedy action tensor([ 0.4790, 0.1326, 0.0538, -0.1453]) tensor([0.3452, 0.2442, 0.2257, 0.1849]) -Greedy action tensor([ 0.3745, -0.2940, -0.0853, -0.4965]) tensor([0.3903, 0.2000, 0.2464, 0.1633]) -Greedy action tensor([ 0.5558, -0.2719, 0.1062, -0.4492]) tensor([0.4097, 0.1790, 0.2613, 0.1500]) -Greedy action tensor([ 0.3477, 0.2280, -0.1636, -0.1051]) tensor([0.3202, 0.2841, 0.1920, 0.2036]) -Greedy action tensor([ 0.6844, -0.1307, 0.1113, -0.3402]) tensor([0.4228, 0.1871, 0.2383, 0.1518]) -Greedy action tensor([ 0.7782, -0.6434, -0.1211, -0.4109]) tensor([0.5121, 0.1236, 0.2084, 0.1559]) -Greedy action tensor([ 0.3606, -0.0019, -0.0197, -0.1210]) tensor([0.3336, 0.2322, 0.2281, 0.2061]) -Greedy action tensor([ 0.3920, -0.0585, -0.0793, -0.0326]) tensor([0.3430, 0.2186, 0.2141, 0.2243]) -Greedy action tensor([ 0.1978, 0.0450, 0.0420, -0.4422]) tensor([0.3085, 0.2648, 0.2640, 0.1627]) -Greedy action tensor([ 0.3732, 0.2402, -0.1140, -0.3665]) tensor([0.3370, 0.2951, 0.2070, 0.1608]) -Greedy action tensor([ 0.4442, 0.1073, -0.1218, -0.0367]) tensor([0.3448, 0.2462, 0.1958, 0.2132]) -Greedy action tensor([ 0.5104, -0.2644, -0.1091, -0.2127]) tensor([0.4025, 0.1855, 0.2167, 0.1953]) -Greedy action tensor([ 0.3778, -0.1781, -0.0040, -0.1074]) tensor([0.3482, 0.1997, 0.2377, 0.2144]) -Greedy action tensor([ 0.6037, -0.6144, 0.0171, -0.6947]) tensor([0.4706, 0.1392, 0.2618, 0.1285]) -Greedy action tensor([ 0.7558, -0.5182, -0.0065, -0.3944]) tensor([0.4847, 0.1356, 0.2262, 0.1535]) -Greedy action tensor([ 1.0865, -0.9729, 0.1332, -0.6084]) tensor([0.5894, 0.0752, 0.2272, 0.1082]) -Greedy action tensor([ 0.4105, 0.2290, 0.0273, -0.2586]) tensor([0.3303, 0.2754, 0.2251, 0.1691]) -Greedy action tensor([ 1.2450, -0.6881, 0.0598, -0.3778]) tensor([0.6069, 0.0878, 0.1855, 0.1198]) -Greedy action tensor([ 0.2907, 0.1805, -0.0664, -0.0907]) tensor([0.3050, 0.2732, 0.2134, 0.2083]) -Greedy action tensor([ 0.5408, 0.0982, -0.1316, -0.5977]) tensor([0.4043, 0.2597, 0.2064, 0.1295]) -Greedy action tensor([ 0.4979, 0.0500, -0.0383, -0.4145]) tensor([0.3809, 0.2434, 0.2228, 0.1530]) -Greedy action tensor([ 0.5385, -0.2992, -0.3088, -0.2621]) tensor([0.4328, 0.1873, 0.1855, 0.1944]) -Greedy action tensor([ 0.8661, -0.8478, -0.1210, -0.7538]) tensor([0.5712, 0.1029, 0.2129, 0.1131]) -Greedy action tensor([ 0.4849, 0.0219, -0.0180, -0.1127]) tensor([0.3592, 0.2260, 0.2172, 0.1976]) -Greedy action tensor([ 0.3697, 0.0291, 0.0486, -0.0955]) tensor([0.3263, 0.2321, 0.2367, 0.2049]) -Greedy action tensor([ 0.4528, -0.1640, -0.0412, -0.1842]) tensor([0.3733, 0.2015, 0.2278, 0.1974]) -Greedy action tensor([ 0.4996, -0.3090, -0.2036, -0.2811]) tensor([0.4169, 0.1857, 0.2064, 0.1910]) -Greedy action tensor([ 1.0299, -0.9595, 0.0590, -0.6301]) tensor([0.5863, 0.0802, 0.2221, 0.1115]) -Greedy action tensor([ 0.5377, -0.3542, -0.1090, -0.4643]) tensor([0.4346, 0.1782, 0.2276, 0.1596]) -Greedy action tensor([ 0.6379, -0.4495, -0.1410, -0.3759]) tensor([0.4632, 0.1561, 0.2126, 0.1681]) -Greedy action tensor([ 0.6604, -0.3353, 0.0421, -0.2802]) tensor([0.4350, 0.1607, 0.2344, 0.1698]) -Greedy action tensor([ 0.6476, -0.1601, -0.0134, -0.3896]) tensor([0.4317, 0.1925, 0.2229, 0.1530]) -Greedy action tensor([ 0.5163, 0.1099, -0.0656, -0.3008]) tensor([0.3750, 0.2498, 0.2096, 0.1656]) -Greedy action tensor([ 0.8180, -0.4888, -0.1161, -0.3547]) tensor([0.5068, 0.1372, 0.1991, 0.1569]) -Greedy action tensor([ 0.7133, -0.5440, 0.0509, -0.3752]) tensor([0.4680, 0.1331, 0.2413, 0.1576]) -Greedy action tensor([ 0.5637, 0.2797, -0.0152, -0.1852]) tensor([0.3589, 0.2702, 0.2012, 0.1697]) -Greedy action tensor([ 0.5021, 0.0627, 0.0137, -0.0995]) tensor([0.3564, 0.2297, 0.2187, 0.1953]) -Greedy action tensor([ 0.3751, -0.0453, -0.1744, -0.2107]) tensor([0.3583, 0.2354, 0.2068, 0.1995]) -Greedy action tensor([ 0.2848, 0.2910, -0.1023, -0.0342]) tensor([0.2931, 0.2949, 0.1990, 0.2130]) -Greedy action tensor([ 0.4797, 0.0428, 0.0202, -0.0634]) tensor([0.3498, 0.2260, 0.2209, 0.2032]) -Greedy action tensor([ 0.8138, -0.6000, -0.1089, -0.4451]) tensor([0.5196, 0.1264, 0.2065, 0.1475]) -Greedy action tensor([ 0.6846, -0.5200, 0.0317, -0.5461]) tensor([0.4734, 0.1419, 0.2464, 0.1383]) -Greedy action tensor([ 1.0209, -0.8427, -0.1372, -0.6629]) tensor([0.6043, 0.0937, 0.1898, 0.1122]) -Greedy action tensor([ 5.3660e-01, -2.6613e-04, -4.0736e-02, 2.0508e-02]) tensor([0.3646, 0.2131, 0.2047, 0.2176]) -Greedy action tensor([-0.6588, -0.5961, 0.2558, -0.0826]) tensor([0.1577, 0.1679, 0.3937, 0.2806]) -Greedy action tensor([-0.9385, -0.6227, 0.3608, 0.0516]) tensor([0.1146, 0.1571, 0.4200, 0.3083]) -Greedy action tensor([-1.8740, -0.4687, 0.6515, -0.1120]) tensor([0.0427, 0.1742, 0.5341, 0.2489]) -Greedy action tensor([-0.7603, -0.6091, 0.2572, -0.0142]) tensor([0.1421, 0.1653, 0.3930, 0.2996]) -Greedy action tensor([-1.0923, -0.7102, 0.5208, 0.8880]) tensor([0.0679, 0.0995, 0.3407, 0.4919]) -Greedy action tensor([-1.5765, -0.5735, 0.6758, -0.0658]) tensor([0.0563, 0.1535, 0.5353, 0.2550]) -Greedy action tensor([-1.3475, -0.6002, 0.5382, 0.5095]) tensor([0.0621, 0.1311, 0.4092, 0.3976]) -Greedy action tensor([-1.1981, -0.4281, 0.6374, 0.8134]) tensor([0.0592, 0.1278, 0.3708, 0.4422]) -Greedy action tensor([-1.7103, -0.5652, 0.7038, 0.0907]) tensor([0.0468, 0.1470, 0.5230, 0.2833]) -Greedy action tensor([-1.6817, -0.9545, -0.4770, -0.8719]) tensor([0.1156, 0.2392, 0.3855, 0.2598]) -Greedy action tensor([-1.4921, -0.6065, 1.1002, 1.0828]) tensor([0.0334, 0.0810, 0.4466, 0.4389]) -Greedy action tensor([-1.8921, -0.5121, 1.0870, 0.5322]) tensor([0.0278, 0.1106, 0.5473, 0.3143]) -Greedy action tensor([-0.7456, 0.9263, 0.0502, 0.3506]) tensor([0.0867, 0.4615, 0.1922, 0.2595]) -Greedy action tensor([-1.7195, -0.5089, 0.5853, 0.0370]) tensor([0.0496, 0.1664, 0.4969, 0.2872]) -Greedy action tensor([-1.8603, -0.4679, 0.6226, -0.1235]) tensor([0.0441, 0.1775, 0.5280, 0.2504]) -Greedy action tensor([-1.6626, -0.3279, 0.6944, 0.3364]) tensor([0.0440, 0.1671, 0.4643, 0.3246]) -Greedy action tensor([-1.1980, 0.1575, 0.2987, -0.6537]) tensor([0.0903, 0.3504, 0.4036, 0.1557]) -Greedy action tensor([-1.7169, -0.6314, 0.2468, -0.1960]) tensor([0.0638, 0.1890, 0.4549, 0.2922]) -Greedy action tensor([-1.9208, -0.4195, 0.6514, -0.1638]) tensor([0.0410, 0.1841, 0.5372, 0.2377]) -Greedy action tensor([-0.9977, -0.6356, 0.3034, 0.2013]) tensor([0.1061, 0.1524, 0.3897, 0.3519]) -Greedy action tensor([-1.6895, -0.4817, 0.6486, 0.1847]) tensor([0.0471, 0.1577, 0.4882, 0.3070]) -Greedy action tensor([-0.8966, 0.1129, 0.4124, -0.4243]) tensor([0.1105, 0.3032, 0.4091, 0.1772]) -Greedy action tensor([-1.9008, -0.5708, 0.8908, -0.0468]) tensor([0.0364, 0.1376, 0.5936, 0.2324]) -Greedy action tensor([-1.7697, -0.3701, 0.5661, -0.0729]) tensor([0.0480, 0.1944, 0.4959, 0.2617]) -Greedy action tensor([-1.3165, -0.2629, 0.6761, 0.9229]) tensor([0.0486, 0.1393, 0.3562, 0.4559]) -Greedy action tensor([-1.9073, -0.4532, 0.6469, -0.1503]) tensor([0.0418, 0.1788, 0.5373, 0.2421]) -Greedy action tensor([-0.6273, -0.4696, 0.3400, -0.1664]) tensor([0.1566, 0.1833, 0.4119, 0.2482]) -Greedy action tensor([-1.1894, -0.5990, 0.5989, -0.1411]) tensor([0.0859, 0.1551, 0.5138, 0.2452]) -Greedy action tensor([-0.9714, -0.6181, 1.2232, 1.5098]) tensor([0.0428, 0.0610, 0.3843, 0.5119]) -Greedy action tensor([-0.8923, -0.8880, -0.1738, -0.1927]) tensor([0.1648, 0.1655, 0.3380, 0.3317]) -Greedy action tensor([-1.2770, -0.5916, 0.3514, 0.1437]) tensor([0.0818, 0.1624, 0.4170, 0.3388]) -Greedy action tensor([-0.2542, -0.1844, 0.1616, 0.1659]) tensor([0.1957, 0.2098, 0.2966, 0.2979]) -Greedy action tensor([-1.7325, -0.4922, 0.5521, -0.0279]) tensor([0.0506, 0.1748, 0.4966, 0.2781]) -Greedy action tensor([-1.8245, -0.4488, 0.6052, -0.1138]) tensor([0.0458, 0.1812, 0.5198, 0.2533]) -Greedy action tensor([-1.9390, -0.5720, 1.3120, 0.5530]) tensor([0.0234, 0.0916, 0.6028, 0.2822]) -Greedy action tensor([-0.7798, -0.6147, 0.2629, -0.1034]) tensor([0.1432, 0.1689, 0.4062, 0.2816]) -Greedy action tensor([-1.0925, -0.5764, 0.2322, 0.5038]) tensor([0.0879, 0.1473, 0.3308, 0.4340]) -Greedy action tensor([-1.5477, -0.5301, 0.4514, 0.1074]) tensor([0.0610, 0.1689, 0.4506, 0.3195]) -Greedy action tensor([-1.7526, -0.2979, 0.5352, -0.0400]) tensor([0.0484, 0.2071, 0.4765, 0.2681]) -Greedy action tensor([-0.5347, -0.5679, 0.1731, 0.1025]) tensor([0.1698, 0.1643, 0.3447, 0.3212]) -Greedy action tensor([-1.5156, -0.4927, 0.7508, 0.7206]) tensor([0.0439, 0.1221, 0.4233, 0.4107]) -Greedy action tensor([-0.4512, -0.1770, 0.3562, 0.6062]) tensor([0.1345, 0.1769, 0.3015, 0.3871]) -Greedy action tensor([-1.0828, -0.5190, 0.5756, 1.1407]) tensor([0.0580, 0.1019, 0.3045, 0.5357]) -Greedy action tensor([-1.6721, -0.5294, 0.5434, 0.0292]) tensor([0.0532, 0.1669, 0.4880, 0.2918]) -Greedy action tensor([-1.7927, -0.4958, 0.5968, -0.0607]) tensor([0.0471, 0.1724, 0.5141, 0.2664]) -Greedy action tensor([-0.2747, -0.2627, 1.1290, 1.6184]) tensor([0.0786, 0.0796, 0.3199, 0.5219]) -Greedy action tensor([-0.9195, 0.4998, 0.3482, -0.4446]) tensor([0.0971, 0.4016, 0.3451, 0.1562]) -Greedy action tensor([-1.8518, -0.4217, 0.6138, -0.1246]) tensor([0.0443, 0.1851, 0.5214, 0.2492]) -Greedy action tensor([-0.9118, -0.2793, 0.4494, -0.4094]) tensor([0.1185, 0.2231, 0.4624, 0.1959]) -Greedy action tensor([-1.8950, -0.4493, 0.6422, -0.1468]) tensor([0.0423, 0.1796, 0.5350, 0.2431]) -Greedy action tensor([-1.0751, -0.6057, 1.1584, 1.4167]) tensor([0.0416, 0.0666, 0.3886, 0.5032]) -Greedy action tensor([-1.5780, -0.3356, 0.6517, 0.5181]) tensor([0.0457, 0.1582, 0.4246, 0.3715]) -Greedy action tensor([-1.5867, -0.5379, 0.5027, 0.0626]) tensor([0.0584, 0.1666, 0.4715, 0.3036]) -Greedy action tensor([-1.8121, -0.4763, 0.6011, -0.1064]) tensor([0.0466, 0.1771, 0.5201, 0.2563]) -Greedy action tensor([-0.5160, -0.3848, 1.1545, 1.6433]) tensor([0.0620, 0.0707, 0.3297, 0.5375]) -Greedy action tensor([ 0.0116, -0.0484, 0.9076, 1.7107]) tensor([0.1014, 0.0955, 0.2484, 0.5546]) -Greedy action tensor([-0.4959, -0.4920, 0.1320, 0.0403]) tensor([0.1790, 0.1797, 0.3354, 0.3060]) -Greedy action tensor([-0.7979, -0.3468, 1.1276, 1.4593]) tensor([0.0527, 0.0827, 0.3613, 0.5034]) -Greedy action tensor([-0.9876, -0.2623, 0.9358, 1.2778]) tensor([0.0512, 0.1057, 0.3502, 0.4930]) -Greedy action tensor([-1.9872, -0.6856, 0.8341, -0.0405]) tensor([0.0351, 0.1291, 0.5899, 0.2460]) -Greedy action tensor([-1.9672, -0.4515, 1.0024, 0.3643]) tensor([0.0283, 0.1289, 0.5515, 0.2914]) -Greedy action tensor([-0.7661, -0.5470, 0.1772, 0.3211]) tensor([0.1286, 0.1600, 0.3302, 0.3813]) -Greedy action tensor([-1.1345, -0.5390, 0.4620, 0.7664]) tensor([0.0692, 0.1256, 0.3418, 0.4634]) -Greedy action tensor([-0.4767, -0.4992, 0.1876, 0.3223]) tensor([0.1627, 0.1591, 0.3163, 0.3619]) -Greedy action tensor([-0.2210, 0.0234, 0.2109, 0.3453]) tensor([0.1792, 0.2289, 0.2761, 0.3158]) -Greedy action tensor([-1.9275, -0.4551, 0.6570, -0.1712]) tensor([0.0410, 0.1786, 0.5431, 0.2373]) -Greedy action tensor([-0.9654, -0.5776, 0.4437, -0.2375]) tensor([0.1158, 0.1706, 0.4738, 0.2397]) -Greedy action tensor([-1.6675, -0.5086, 1.0087, 0.6228]) tensor([0.0350, 0.1114, 0.5081, 0.3455]) -Greedy action tensor([-1.8423, -0.4695, 0.6108, -0.1249]) tensor([0.0452, 0.1783, 0.5250, 0.2516]) -Greedy action tensor([-1.8032, -0.8260, 1.2177, 0.8527]) tensor([0.0260, 0.0692, 0.5340, 0.3707]) -Greedy action tensor([-1.9069, -0.4624, 0.6878, -0.1025]) tensor([0.0405, 0.1716, 0.5420, 0.2459]) -Greedy action tensor([-1.2358, -0.8481, 0.3532, -0.4995]) tensor([0.1057, 0.1558, 0.5178, 0.2207]) -Greedy action tensor([-1.6713, -0.3409, 0.5888, 0.0971]) tensor([0.0494, 0.1870, 0.4738, 0.2898]) -Greedy action tensor([-1.0133, 0.0041, 0.5385, 1.0510]) tensor([0.0611, 0.1690, 0.2884, 0.4815]) -Greedy action tensor([-1.8686, -0.3876, 0.6209, -0.1315]) tensor([0.0432, 0.1901, 0.5211, 0.2456]) -Greedy action tensor([-1.0565, -0.6632, 0.2624, 0.2468]) tensor([0.1010, 0.1497, 0.3776, 0.3718]) -Greedy action tensor([-1.0039, -0.6013, 0.2193, 0.3134]) tensor([0.1039, 0.1554, 0.3530, 0.3878]) -Greedy action tensor([-2.0436, -0.9237, 1.1545, 0.5602]) tensor([0.0238, 0.0729, 0.5821, 0.3213]) -Greedy action tensor([-1.6770, -0.2430, 0.4870, 0.0143]) tensor([0.0517, 0.2171, 0.4504, 0.2807]) -Greedy action tensor([-2.0389, -0.8965, 1.0377, 0.3629]) tensor([0.0271, 0.0850, 0.5883, 0.2996]) -Greedy action tensor([-0.8883, -0.2239, 0.7994, 1.0680]) tensor([0.0648, 0.1260, 0.3506, 0.4586]) -Greedy action tensor([ 0.6576, -0.5328, -0.7194, 0.0945]) tensor([0.4704, 0.1430, 0.1187, 0.2678]) -Greedy action tensor([ 0.8395, -0.5658, 0.2012, 0.1883]) tensor([0.4357, 0.1069, 0.2302, 0.2272]) -Greedy action tensor([ 1.3589, -0.6162, -0.1707, 0.7984]) tensor([0.5191, 0.0720, 0.1125, 0.2964]) -Greedy action tensor([ 1.2487, -0.3947, -0.2162, 0.3503]) tensor([0.5460, 0.1055, 0.1262, 0.2223]) -Greedy action tensor([1.0740, 0.0692, 0.0488, 0.0490]) tensor([0.4799, 0.1757, 0.1722, 0.1722]) -Greedy action tensor([ 1.3722, -0.7086, -0.1190, 0.3976]) tensor([0.5790, 0.0723, 0.1303, 0.2185]) -Greedy action tensor([ 1.0291, -0.2069, -0.0331, -0.0504]) tensor([0.5061, 0.1470, 0.1749, 0.1719]) -Greedy action tensor([ 1.0375, -0.2128, -0.2477, 0.1205]) tensor([0.5095, 0.1459, 0.1409, 0.2037]) -Greedy action tensor([ 1.3512, -0.8112, -0.2512, 0.6344]) tensor([0.5541, 0.0637, 0.1116, 0.2706]) -Greedy action tensor([ 1.8439, -0.5968, -0.4280, 0.5989]) tensor([0.6765, 0.0589, 0.0698, 0.1948]) -Greedy action tensor([ 1.1836, -0.7113, -0.2062, 0.6285]) tensor([0.5067, 0.0762, 0.1262, 0.2909]) -Greedy action tensor([ 1.4543, -0.4383, -0.2572, 0.2167]) tensor([0.6168, 0.0929, 0.1114, 0.1789]) -Greedy action tensor([ 1.0114, -0.6610, -0.1366, 0.2347]) tensor([0.5089, 0.0956, 0.1615, 0.2341]) -Greedy action tensor([ 0.8661, -0.2512, -0.1688, 0.0829]) tensor([0.4674, 0.1529, 0.1661, 0.2136]) -Greedy action tensor([ 0.7893, -0.5430, -0.4480, 0.7681]) tensor([0.3948, 0.1042, 0.1146, 0.3865]) -Greedy action tensor([ 2.0056, -0.3922, -0.4271, 0.3729]) tensor([0.7277, 0.0662, 0.0639, 0.1422]) -Greedy action tensor([ 1.7912, -0.3347, -0.5174, 0.3576]) tensor([0.6863, 0.0819, 0.0682, 0.1636]) -Greedy action tensor([ 0.9316, 0.1139, 0.0331, -0.1444]) tensor([0.4567, 0.2016, 0.1860, 0.1557]) -Greedy action tensor([ 0.5879, -0.6256, -0.3307, 0.1666]) tensor([0.4251, 0.1263, 0.1696, 0.2790]) -Greedy action tensor([ 1.2830, -0.6583, -0.2638, 0.2370]) tensor([0.5856, 0.0840, 0.1247, 0.2057]) -Greedy action tensor([ 0.5233, -0.4478, -0.7275, 0.3417]) tensor([0.4002, 0.1515, 0.1146, 0.3337]) -Greedy action tensor([ 1.2836, -0.5949, -0.2875, 0.4885]) tensor([0.5518, 0.0843, 0.1147, 0.2492]) -Greedy action tensor([ 1.1162, -0.4824, -0.1436, 0.2445]) tensor([0.5252, 0.1062, 0.1490, 0.2197]) -Greedy action tensor([ 1.1152, -0.6851, -0.1030, 0.0923]) tensor([0.5493, 0.0908, 0.1625, 0.1975]) -Greedy action tensor([ 1.5127, -0.7122, -0.1585, 0.4149]) tensor([0.6136, 0.0663, 0.1154, 0.2047]) -Greedy action tensor([ 1.3452, -0.3106, -0.2536, 0.1203]) tensor([0.5928, 0.1132, 0.1198, 0.1742]) -Greedy action tensor([ 1.2005, -0.5340, -0.6836, 0.4799]) tensor([0.5510, 0.0972, 0.0837, 0.2680]) -Greedy action tensor([ 1.5424, -0.8650, -0.2810, 0.5338]) tensor([0.6187, 0.0557, 0.0999, 0.2257]) -Greedy action tensor([ 1.2107, -0.2051, -0.1414, 0.3852]) tensor([0.5156, 0.1252, 0.1334, 0.2258]) -Greedy action tensor([ 1.6784, -0.3265, -0.6445, 0.1527]) tensor([0.6896, 0.0929, 0.0676, 0.1500]) -Greedy action tensor([ 0.4046, -0.1099, -0.6025, 0.4065]) tensor([0.3373, 0.2016, 0.1232, 0.3379]) -Greedy action tensor([ 0.6980, -0.1466, 0.0688, -0.0024]) tensor([0.4067, 0.1747, 0.2167, 0.2019]) -Greedy action tensor([ 0.8029, -0.2817, 0.2636, 0.0347]) tensor([0.4193, 0.1417, 0.2445, 0.1945]) -Greedy action tensor([ 0.6320, -0.4202, -0.0429, 0.0759]) tensor([0.4112, 0.1436, 0.2094, 0.2358]) -Greedy action tensor([ 1.0545, -0.3897, 0.0610, -0.0531]) tensor([0.5164, 0.1218, 0.1912, 0.1706]) -Greedy action tensor([ 2.1682, -1.0213, -0.1834, 0.6147]) tensor([0.7419, 0.0306, 0.0706, 0.1569]) -Greedy action tensor([ 1.2375, -0.5249, -0.4358, 0.5072]) tensor([0.5432, 0.0932, 0.1019, 0.2617]) -Greedy action tensor([ 1.4661, -0.9592, -0.0689, 0.4956]) tensor([0.5942, 0.0526, 0.1280, 0.2252]) -Greedy action tensor([ 1.4508, -0.6065, -0.4075, 0.0075]) tensor([0.6580, 0.0841, 0.1026, 0.1554]) -Greedy action tensor([ 1.5597, -0.4760, -0.3272, 0.4243]) tensor([0.6237, 0.0814, 0.0945, 0.2004]) -Greedy action tensor([ 0.9955, -0.4360, -0.2532, 0.0377]) tensor([0.5237, 0.1251, 0.1502, 0.2010]) -Greedy action tensor([ 1.3629, -0.7330, -0.6521, 0.7091]) tensor([0.5630, 0.0692, 0.0751, 0.2928]) -Greedy action tensor([ 1.1513, -0.4014, -0.2980, -0.0799]) tensor([0.5753, 0.1218, 0.1350, 0.1679]) -Greedy action tensor([ 0.7725, -0.5436, -0.4101, 0.3722]) tensor([0.4455, 0.1195, 0.1365, 0.2985]) -Greedy action tensor([ 1.0727, -0.2590, -0.2317, -0.0922]) tensor([0.5413, 0.1429, 0.1469, 0.1689]) -Greedy action tensor([ 1.0753, -0.4791, -0.0611, 0.3657]) tensor([0.4940, 0.1044, 0.1586, 0.2430]) -Greedy action tensor([ 0.8449, 0.0154, -0.5131, -0.3749]) tensor([0.5028, 0.2194, 0.1293, 0.1485]) -Greedy action tensor([ 1.0137, -0.5698, -0.1400, 0.2316]) tensor([0.5055, 0.1038, 0.1595, 0.2313]) -Greedy action tensor([ 1.5089, -0.4477, -0.3385, 0.3107]) tensor([0.6247, 0.0883, 0.0985, 0.1885]) -Greedy action tensor([ 1.5300, -0.5858, -0.3946, 0.3671]) tensor([0.6333, 0.0763, 0.0924, 0.1979]) -Greedy action tensor([ 0.7041, -0.1743, -0.3378, 0.3421]) tensor([0.4058, 0.1686, 0.1431, 0.2825]) -Greedy action tensor([ 1.2492, -0.2601, -0.1939, 0.0474]) tensor([0.5689, 0.1257, 0.1344, 0.1710]) -Greedy action tensor([ 1.5180, -0.6754, -0.1860, 0.2920]) tensor([0.6301, 0.0703, 0.1147, 0.1849]) -Greedy action tensor([ 1.5927, -0.6764, -0.4617, 0.3020]) tensor([0.6637, 0.0686, 0.0851, 0.1826]) -Greedy action tensor([ 1.2269, -0.7367, -0.0042, 0.1796]) tensor([0.5608, 0.0787, 0.1637, 0.1968]) -Greedy action tensor([ 0.9561, -0.4905, 0.0436, -0.1411]) tensor([0.5074, 0.1194, 0.2037, 0.1694]) -Greedy action tensor([ 1.6373, -0.7541, 0.0356, 0.2262]) tensor([0.6506, 0.0595, 0.1311, 0.1587]) -Greedy action tensor([ 1.2542, -0.4967, -0.3025, 0.1885]) tensor([0.5784, 0.1004, 0.1219, 0.1992]) -Greedy action tensor([ 1.2385, 0.0402, -0.1616, 0.1018]) tensor([0.5350, 0.1614, 0.1319, 0.1717]) -Greedy action tensor([ 1.2093, -0.5471, -0.2219, 0.3106]) tensor([0.5498, 0.0949, 0.1314, 0.2238]) -Greedy action tensor([ 1.7404, -0.2813, -0.2545, 0.3649]) tensor([0.6574, 0.0871, 0.0894, 0.1661]) -Greedy action tensor([ 0.7695, -0.0939, 0.0977, 0.0037]) tensor([0.4171, 0.1759, 0.2130, 0.1939]) -Greedy action tensor([ 1.0738, -0.6458, -0.1767, 0.3145]) tensor([0.5172, 0.0927, 0.1481, 0.2420]) -Greedy action tensor([ 0.7249, -0.4886, -0.2549, 0.5949]) tensor([0.3921, 0.1165, 0.1472, 0.3443]) -Greedy action tensor([ 0.4725, -0.4839, 0.0938, -0.0815]) tensor([0.3783, 0.1454, 0.2590, 0.2174]) -Greedy action tensor([ 1.4294, -0.5733, -0.3736, 0.1633]) tensor([0.6322, 0.0853, 0.1042, 0.1782]) -Greedy action tensor([ 1.0108, 0.2363, 0.1570, -0.1322]) tensor([0.4534, 0.2090, 0.1931, 0.1446]) -Greedy action tensor([ 0.8325, -0.3344, -0.2152, 0.1030]) tensor([0.4664, 0.1452, 0.1636, 0.2249]) -Greedy action tensor([ 0.9524, -0.5579, -0.2883, -0.2630]) tensor([0.5535, 0.1222, 0.1601, 0.1642]) -Greedy action tensor([ 2.1247, -0.7977, -0.2811, 0.4976]) tensor([0.7460, 0.0401, 0.0673, 0.1466]) -Greedy action tensor([ 1.0303, -0.4806, 0.0389, 0.2347]) tensor([0.4895, 0.1080, 0.1816, 0.2209]) -Greedy action tensor([ 5.2129e-01, -3.1065e-01, -2.4626e-03, 4.7596e-04]) tensor([0.3815, 0.1660, 0.2259, 0.2266]) -Greedy action tensor([ 0.8714, -0.3627, -0.3311, 0.4092]) tensor([0.4502, 0.1310, 0.1353, 0.2836]) -Greedy action tensor([ 1.2172, -0.6287, -0.1868, 0.0456]) tensor([0.5837, 0.0921, 0.1434, 0.1808]) -Greedy action tensor([ 0.5538, -0.2359, -0.1180, 0.0923]) tensor([0.3853, 0.1749, 0.1968, 0.2429]) -Greedy action tensor([ 1.4955, -0.3768, -0.4398, 0.4315]) tensor([0.6086, 0.0936, 0.0879, 0.2100]) -Greedy action tensor([ 1.2346, -0.6542, -0.4529, 0.7090]) tensor([0.5188, 0.0785, 0.0960, 0.3067]) -Greedy action tensor([ 1.7254, -1.1360, -0.2255, 0.5512]) tensor([0.6629, 0.0379, 0.0942, 0.2049]) -Greedy action tensor([ 1.4675, -0.5897, -0.2824, 0.4129]) tensor([0.6061, 0.0775, 0.1053, 0.2111]) -Greedy action tensor([ 1.1899, -0.5157, -0.2878, 0.3461]) tensor([0.5435, 0.0987, 0.1240, 0.2338]) -Greedy action tensor([ 1.1784, -0.5417, 0.1412, 0.1496]) tensor([0.5288, 0.0947, 0.1874, 0.1890]) -Greedy action tensor([ 1.3873, 0.6251, -0.2516, 0.2691]) tensor([0.5031, 0.2348, 0.0977, 0.1644]) -Greedy action tensor([1.3854, 0.4622, 0.8973, 0.5735]) tensor([0.4073, 0.1618, 0.2500, 0.1808]) -Greedy action tensor([ 0.7391, -1.6655, -0.0244, -0.0178]) tensor([0.4937, 0.0446, 0.2301, 0.2316]) -Greedy action tensor([ 0.3179, 0.3884, -0.1620, 1.1865]) tensor([0.1970, 0.2114, 0.1219, 0.4696]) -Greedy action tensor([-0.3845, -2.4453, -0.0760, 0.3919]) tensor([0.2145, 0.0273, 0.2920, 0.4662]) -Greedy action tensor([ 1.1290, 0.2823, -0.6363, -0.1107]) tensor([0.5293, 0.2270, 0.0906, 0.1532]) -Greedy action tensor([ 0.5093, -0.4139, 1.7150, 1.0306]) tensor([0.1557, 0.0619, 0.5201, 0.2623]) -Greedy action tensor([ 0.3415, -0.2044, 0.1259, -0.2379]) tensor([0.3395, 0.1967, 0.2736, 0.1902]) -Greedy action tensor([ 0.7670, -0.1348, 0.6338, 0.9086]) tensor([0.2913, 0.1182, 0.2549, 0.3356]) -Greedy action tensor([ 0.7423, -0.0554, 0.9517, 1.1011]) tensor([0.2430, 0.1094, 0.2996, 0.3479]) -Greedy action tensor([ 1.8851, -1.0610, 2.2048, 1.4012]) tensor([0.3283, 0.0173, 0.4520, 0.2024]) -Greedy action tensor([ 0.2432, -1.3718, -0.3990, 0.6825]) tensor([0.3052, 0.0607, 0.1606, 0.4735]) -Greedy action tensor([1.1004, 0.0533, 1.6851, 0.2737]) tensor([0.2791, 0.0980, 0.5008, 0.1221]) -Greedy action tensor([ 0.3665, 0.1230, -0.7685, 0.7345]) tensor([0.2817, 0.2208, 0.0905, 0.4070]) -Greedy action tensor([-1.9443, 0.5531, 0.8737, -0.3790]) tensor([0.0288, 0.3504, 0.4828, 0.1380]) -Greedy action tensor([ 0.9020, 0.2491, -1.3043, -0.5542]) tensor([0.5365, 0.2793, 0.0591, 0.1251]) -Greedy action tensor([-0.5754, -0.7541, 1.6811, -0.5607]) tensor([0.0806, 0.0674, 0.7701, 0.0818]) -Greedy action tensor([ 0.5650, -1.7506, -0.4426, -0.1286]) tensor([0.5093, 0.0503, 0.1859, 0.2545]) -Greedy action tensor([ 0.8123, -0.2806, -0.2931, 1.3449]) tensor([0.2968, 0.0995, 0.0983, 0.5055]) -Greedy action tensor([-0.0709, -1.3166, -0.0748, -0.0187]) tensor([0.2996, 0.0862, 0.2985, 0.3157]) -Greedy action tensor([-1.6534, -1.4848, 0.2094, 0.4841]) tensor([0.0585, 0.0692, 0.3766, 0.4957]) -Greedy action tensor([-1.4290, -0.0232, -0.1524, -1.0161]) tensor([0.0983, 0.4009, 0.3523, 0.1485]) -Greedy action tensor([ 1.3621, -1.5934, -0.2308, 0.8873]) tensor([0.5327, 0.0277, 0.1083, 0.3313]) -Greedy action tensor([-0.6521, 0.6778, 1.4309, 0.4579]) tensor([0.0631, 0.2386, 0.5067, 0.1915]) -Greedy action tensor([0.0396, 0.1794, 0.5213, 0.1032]) tensor([0.2068, 0.2379, 0.3348, 0.2204]) -Greedy action tensor([ 1.2597, -0.7104, 0.6116, 0.7524]) tensor([0.4416, 0.0616, 0.2310, 0.2659]) -Greedy action tensor([ 1.3349, -0.2971, 0.4143, 0.0456]) tensor([0.5350, 0.1046, 0.2131, 0.1474]) -Greedy action tensor([0.8676, 0.4028, 0.2535, 1.1880]) tensor([0.2819, 0.1771, 0.1526, 0.3884]) -Greedy action tensor([ 0.9168, 1.1328, 0.9916, -0.2449]) tensor([0.2753, 0.3418, 0.2967, 0.0862]) -Greedy action tensor([0.2165, 0.8718, 0.5545, 0.0641]) tensor([0.1928, 0.3713, 0.2703, 0.1656]) -Greedy action tensor([-0.0023, -1.4589, -0.1205, 0.2678]) tensor([0.2914, 0.0679, 0.2589, 0.3818]) -Greedy action tensor([-1.4020, -2.0185, -0.7029, -0.6663]) tensor([0.1773, 0.0957, 0.3568, 0.3701]) -Greedy action tensor([ 0.6267, -0.3349, 0.4167, -0.2821]) tensor([0.3852, 0.1473, 0.3123, 0.1552]) -Greedy action tensor([ 0.5167, -0.3180, 0.0686, 1.6511]) tensor([0.1930, 0.0838, 0.1233, 0.6000]) -Greedy action tensor([ 1.8974, -0.6327, 1.5949, 0.2719]) tensor([0.4962, 0.0395, 0.3667, 0.0977]) -Greedy action tensor([0.4453, 1.2573, 0.0106, 0.2619]) tensor([0.2113, 0.4760, 0.1368, 0.1759]) -Greedy action tensor([ 0.0304, -0.4796, 0.2349, -0.4698]) tensor([0.2912, 0.1749, 0.3573, 0.1766]) -Greedy action tensor([ 1.2955, -0.7498, 0.3590, 1.3590]) tensor([0.3866, 0.0500, 0.1515, 0.4119]) -Greedy action tensor([1.1470, 0.6542, 0.2291, 0.3495]) tensor([0.4064, 0.2483, 0.1623, 0.1831]) -Greedy action tensor([-0.3915, 0.1234, -0.0484, -0.7707]) tensor([0.2098, 0.3510, 0.2956, 0.1436]) -Greedy action tensor([ 1.8198, -0.9123, 0.6373, 1.1879]) tensor([0.5254, 0.0342, 0.1610, 0.2793]) -Greedy action tensor([ 0.8786, -0.8048, 0.9419, 2.1540]) tensor([0.1715, 0.0319, 0.1827, 0.6140]) -Greedy action tensor([-0.3254, 0.3562, -0.5309, -0.5327]) tensor([0.2172, 0.4294, 0.1769, 0.1765]) -Greedy action tensor([-1.0693, -1.8118, -0.8002, 1.0332]) tensor([0.0911, 0.0434, 0.1193, 0.7462]) -Greedy action tensor([0.1318, 1.6975, 0.2246, 0.3890]) tensor([0.1223, 0.5853, 0.1342, 0.1582]) -Greedy action tensor([ 0.2807, -0.2166, 0.5320, -0.0541]) tensor([0.2770, 0.1685, 0.3562, 0.1982]) -Greedy action tensor([-0.0185, -1.0336, 0.1615, 2.4782]) tensor([0.0680, 0.0246, 0.0814, 0.8259]) -Greedy action tensor([0.7201, 0.1094, 0.6340, 0.4587]) tensor([0.3095, 0.1681, 0.2840, 0.2384]) -Greedy action tensor([ 0.3040, 0.6680, -0.5672, 1.2267]) tensor([0.1861, 0.2678, 0.0779, 0.4682]) -Greedy action tensor([ 0.8570, -0.2630, 0.9162, 1.0889]) tensor([0.2741, 0.0894, 0.2908, 0.3457]) -Greedy action tensor([-0.1185, 0.4920, 0.4837, -0.3721]) tensor([0.1837, 0.3383, 0.3355, 0.1426]) -Greedy action tensor([ 1.7334, -0.3225, 1.0931, 0.6661]) tensor([0.5002, 0.0640, 0.2637, 0.1720]) -Greedy action tensor([ 0.6563, -1.1095, 0.0825, 1.3136]) tensor([0.2729, 0.0467, 0.1538, 0.5266]) -Greedy action tensor([-0.3177, -1.0546, -0.5196, -0.2938]) tensor([0.3012, 0.1442, 0.2461, 0.3085]) -Greedy action tensor([ 2.2450, -1.2420, 0.1544, 1.2261]) tensor([0.6600, 0.0202, 0.0816, 0.2383]) -Greedy action tensor([ 0.2812, 0.0684, 0.1352, -0.1789]) tensor([0.3027, 0.2447, 0.2616, 0.1911]) -Greedy action tensor([ 1.1952, 0.3151, -0.3277, 1.2816]) tensor([0.3672, 0.1523, 0.0801, 0.4004]) -Greedy action tensor([1.8737, 0.3360, 1.0811, 1.2822]) tensor([0.4502, 0.0967, 0.2038, 0.2492]) -Greedy action tensor([ 0.4213, -1.2608, 1.1051, 0.9782]) tensor([0.2036, 0.0379, 0.4033, 0.3552]) -Greedy action tensor([ 0.3759, 0.5050, -0.9160, 0.8823]) tensor([0.2456, 0.2794, 0.0675, 0.4075]) -Greedy action tensor([-0.5695, -1.2963, 0.0056, 0.6618]) tensor([0.1496, 0.0723, 0.2658, 0.5123]) -Greedy action tensor([ 0.0829, -1.7230, -0.0342, 0.8929]) tensor([0.2325, 0.0382, 0.2068, 0.5225]) -Greedy action tensor([-0.3166, -0.8263, -1.2154, 0.2462]) tensor([0.2657, 0.1596, 0.1082, 0.4665]) -Greedy action tensor([ 0.6126, -0.1844, 1.1234, -0.6031]) tensor([0.2929, 0.1320, 0.4882, 0.0869]) -Greedy action tensor([-0.2882, -0.3840, -0.7491, 1.9770]) tensor([0.0822, 0.0746, 0.0518, 0.7914]) -Greedy action tensor([1.0322, 0.3844, 0.9610, 0.1615]) tensor([0.3480, 0.1821, 0.3241, 0.1457]) -Greedy action tensor([ 1.4258, -1.1681, -0.3144, 0.7355]) tensor([0.5709, 0.0427, 0.1002, 0.2863]) -Greedy action tensor([-0.6491, -1.6522, 1.3265, -0.6177]) tensor([0.1041, 0.0382, 0.7504, 0.1074]) -Greedy action tensor([ 0.9535, -1.8175, 0.2752, 0.8196]) tensor([0.4090, 0.0256, 0.2076, 0.3578]) -Greedy action tensor([ 0.0212, -1.3934, -0.8361, 0.4226]) tensor([0.3163, 0.0769, 0.1342, 0.4726]) -Greedy action tensor([ 0.5232, -0.4887, -0.6586, 0.0701]) tensor([0.4337, 0.1576, 0.1330, 0.2757]) -Greedy action tensor([ 0.5789, -0.2202, -0.1927, 0.7819]) tensor([0.3188, 0.1434, 0.1474, 0.3905]) -Greedy action tensor([0.6426, 0.6885, 0.6256, 0.5378]) tensor([0.2544, 0.2664, 0.2501, 0.2291]) -Greedy action tensor([ 0.6673, -0.4966, 0.0025, 1.9509]) tensor([0.1839, 0.0574, 0.0946, 0.6640]) -Greedy action tensor([1.0521, 0.5062, 1.1844, 0.1841]) tensor([0.3184, 0.1845, 0.3635, 0.1337]) -Greedy action tensor([0.1987, 0.6584, 0.3215, 0.4217]) tensor([0.2014, 0.3190, 0.2278, 0.2518]) -Greedy action tensor([ 0.5609, -0.5565, -1.1781, 1.5727]) tensor([0.2351, 0.0769, 0.0413, 0.6467]) -Greedy action tensor([ 0.7557, -1.7649, -0.0522, 1.5691]) tensor([0.2644, 0.0213, 0.1179, 0.5964]) -Greedy action tensor([-0.2888, -2.5393, 0.0745, 0.4118]) tensor([0.2194, 0.0231, 0.3155, 0.4420]) -Greedy action tensor([ 0.7861, -0.1927, -0.2344, -0.1986]) tensor([0.4740, 0.1781, 0.1708, 0.1771]) -Greedy action tensor([ 0.6179, -0.6651, -0.3649, 0.9296]) tensor([0.3314, 0.0919, 0.1240, 0.4526]) -Greedy action tensor([ 1.1869, -0.1724, 0.8498, 1.8086]) tensor([0.2609, 0.0670, 0.1862, 0.4858]) -Greedy action tensor([ 0.3644, -0.0394, 0.0177, -0.1124]) tensor([0.3338, 0.2229, 0.2360, 0.2072]) -Greedy action tensor([ 0.3310, -0.0669, -0.0268, -0.3424]) tensor([0.3471, 0.2332, 0.2427, 0.1770]) -Greedy action tensor([ 0.6956, -0.6142, -0.0571, -0.1663]) tensor([0.4622, 0.1248, 0.2178, 0.1952]) -Greedy action tensor([ 0.4794, -0.0787, -0.0421, -0.2105]) tensor([0.3749, 0.2145, 0.2225, 0.1880]) -Greedy action tensor([ 0.7513, -0.2694, -0.0279, -0.2269]) tensor([0.4556, 0.1642, 0.2090, 0.1713]) -Greedy action tensor([ 0.7298, 0.0465, -0.0702, -0.1474]) tensor([0.4219, 0.2130, 0.1896, 0.1755]) -Greedy action tensor([ 0.3662, -0.1404, -0.1256, -0.2996]) tensor([0.3666, 0.2209, 0.2242, 0.1884]) -Greedy action tensor([ 0.5377, -0.3919, 0.0991, -0.4285]) tensor([0.4132, 0.1631, 0.2665, 0.1572]) -Greedy action tensor([ 0.2759, 0.0198, -0.0075, -0.4768]) tensor([0.3335, 0.2582, 0.2512, 0.1571]) -Greedy action tensor([ 0.3872, 0.0160, -0.0638, -0.2010]) tensor([0.3470, 0.2394, 0.2210, 0.1927]) -Greedy action tensor([ 0.8898, -0.3421, -0.0755, -0.3486]) tensor([0.5096, 0.1487, 0.1941, 0.1477]) -Greedy action tensor([ 0.4445, -0.4692, -0.1771, -0.1079]) tensor([0.3978, 0.1595, 0.2137, 0.2290]) -Greedy action tensor([ 0.6144, -0.2297, -0.0085, -0.4246]) tensor([0.4310, 0.1853, 0.2312, 0.1525]) -Greedy action tensor([ 0.4298, -0.3339, -0.0627, -0.1819]) tensor([0.3818, 0.1779, 0.2333, 0.2071]) -Greedy action tensor([ 0.6773, -0.4531, -0.1407, -0.3298]) tensor([0.4696, 0.1516, 0.2072, 0.1715]) -Greedy action tensor([ 0.7617, -0.5664, -0.1787, -0.5603]) tensor([0.5203, 0.1379, 0.2031, 0.1387]) -Greedy action tensor([ 0.6726, -0.4348, -0.0737, -0.2823]) tensor([0.4568, 0.1509, 0.2166, 0.1758]) -Greedy action tensor([ 0.6710, -0.1528, 0.1190, -0.1022]) tensor([0.4039, 0.1772, 0.2325, 0.1864]) -Greedy action tensor([ 0.7743, -0.6470, -0.0782, -0.2926]) tensor([0.4970, 0.1200, 0.2119, 0.1710]) -Greedy action tensor([ 0.3105, -0.0872, -0.1492, -0.2087]) tensor([0.3450, 0.2318, 0.2179, 0.2053]) -Greedy action tensor([ 0.6156, -0.5365, 0.0034, -0.2379]) tensor([0.4378, 0.1383, 0.2374, 0.1865]) -Greedy action tensor([ 0.7350, -0.7683, 0.0187, -0.3451]) tensor([0.4877, 0.1085, 0.2383, 0.1656]) -Greedy action tensor([ 0.3300, -0.0601, -0.0616, -0.2299]) tensor([0.3420, 0.2315, 0.2312, 0.1954]) -Greedy action tensor([ 0.5806, -0.5046, -0.0035, -0.2085]) tensor([0.4256, 0.1438, 0.2373, 0.1933]) -Greedy action tensor([ 0.9519, -0.6369, 0.0313, -0.3710]) tensor([0.5351, 0.1093, 0.2131, 0.1425]) -Greedy action tensor([ 0.9425, -0.9588, -0.0454, -0.4590]) tensor([0.5656, 0.0845, 0.2106, 0.1393]) -Greedy action tensor([ 0.7154, -0.3346, -0.1936, -0.3545]) tensor([0.4771, 0.1670, 0.1922, 0.1637]) -Greedy action tensor([ 0.4661, -0.2265, 0.2149, -0.3037]) tensor([0.3648, 0.1825, 0.2838, 0.1689]) -Greedy action tensor([ 0.5588, -0.0031, -0.0576, -0.2453]) tensor([0.3910, 0.2229, 0.2111, 0.1750]) -Greedy action tensor([ 0.6764, 0.0289, -0.1148, 0.0470]) tensor([0.3985, 0.2085, 0.1806, 0.2124]) -Greedy action tensor([ 0.4905, -0.1976, -0.0562, -0.1374]) tensor([0.3824, 0.1922, 0.2214, 0.2041]) -Greedy action tensor([ 0.4397, -0.1946, 0.0491, -0.3591]) tensor([0.3764, 0.1996, 0.2547, 0.1693]) -Greedy action tensor([ 0.7410, -0.3351, 0.0015, -0.3280]) tensor([0.4626, 0.1577, 0.2208, 0.1588]) -Greedy action tensor([ 0.4440, -0.1728, -0.0618, -0.3727]) tensor([0.3869, 0.2088, 0.2333, 0.1710]) -Greedy action tensor([ 0.7203, 0.1178, -0.0184, -0.3344]) tensor([0.4213, 0.2306, 0.2013, 0.1467]) -Greedy action tensor([ 0.8163, -0.2008, 0.1741, -0.4100]) tensor([0.4585, 0.1658, 0.2412, 0.1345]) -Greedy action tensor([ 0.4684, -0.3139, 0.1326, -0.2179]) tensor([0.3738, 0.1709, 0.2671, 0.1882]) -Greedy action tensor([ 0.4221, -0.1366, -0.0228, -0.1749]) tensor([0.3619, 0.2070, 0.2319, 0.1992]) -Greedy action tensor([ 0.4220, 0.0820, 0.1195, -0.3313]) tensor([0.3423, 0.2436, 0.2529, 0.1611]) -Greedy action tensor([ 0.8222, -0.4083, -0.0089, -0.6077]) tensor([0.5084, 0.1485, 0.2214, 0.1217]) -Greedy action tensor([ 0.6032, -0.0324, 0.0559, -0.1217]) tensor([0.3857, 0.2043, 0.2231, 0.1868]) -Greedy action tensor([ 0.2091, 0.0737, -0.2094, 0.0066]) tensor([0.2987, 0.2608, 0.1965, 0.2439]) -Greedy action tensor([ 0.8062, -0.3558, -0.0678, -0.3480]) tensor([0.4889, 0.1530, 0.2040, 0.1541]) -Greedy action tensor([ 0.6526, 0.0867, -0.0887, -0.3434]) tensor([0.4143, 0.2353, 0.1974, 0.1530]) -Greedy action tensor([ 0.7475, -0.1688, -0.0156, 0.0256]) tensor([0.4252, 0.1701, 0.1982, 0.2066]) -Greedy action tensor([ 0.8784, -0.6908, 0.1213, -0.4142]) tensor([0.5123, 0.1067, 0.2403, 0.1407]) -Greedy action tensor([ 0.9477, -0.5871, -0.0687, -0.5572]) tensor([0.5557, 0.1198, 0.2011, 0.1234]) -Greedy action tensor([ 0.4682, -0.1351, -0.1096, -0.1251]) tensor([0.3758, 0.2056, 0.2109, 0.2077]) -Greedy action tensor([ 0.2951, -0.0712, -0.0738, -0.0782]) tensor([0.3254, 0.2256, 0.2250, 0.2240]) -Greedy action tensor([ 0.6844, -0.4424, -0.0138, -0.2810]) tensor([0.4541, 0.1471, 0.2259, 0.1729]) -Greedy action tensor([ 0.7889, -0.3736, -0.0934, -0.3915]) tensor([0.4917, 0.1538, 0.2035, 0.1510]) -Greedy action tensor([ 1.0317, -0.4420, 0.1168, -0.4071]) tensor([0.5356, 0.1227, 0.2146, 0.1271]) -Greedy action tensor([-0.1048, -0.0281, 0.0439, -0.1518]) tensor([0.2384, 0.2574, 0.2766, 0.2275]) -Greedy action tensor([ 0.6330, -0.3917, -0.0088, -0.2204]) tensor([0.4327, 0.1553, 0.2277, 0.1843]) -Greedy action tensor([ 0.6259, -0.4835, -0.1003, -0.3133]) tensor([0.4536, 0.1496, 0.2194, 0.1773]) -Greedy action tensor([ 0.7816, -0.4630, -0.0112, -0.5253]) tensor([0.4972, 0.1432, 0.2250, 0.1346]) -Greedy action tensor([ 0.3966, 0.0436, -0.1782, -0.2947]) tensor([0.3615, 0.2540, 0.2035, 0.1811]) -Greedy action tensor([ 0.7267, -0.2658, 0.0300, 0.0151]) tensor([0.4238, 0.1571, 0.2111, 0.2080]) -Greedy action tensor([ 0.2986, 0.2837, -0.0599, 0.0271]) tensor([0.2902, 0.2859, 0.2028, 0.2212]) -Greedy action tensor([ 0.4544, -0.2008, -0.1764, -0.3132]) tensor([0.3975, 0.2064, 0.2115, 0.1845]) -Greedy action tensor([ 0.4686, -0.2343, -0.1424, -0.4388]) tensor([0.4096, 0.2028, 0.2223, 0.1653]) -Greedy action tensor([ 0.4171, 0.1728, -0.0481, -0.2994]) tensor([0.3449, 0.2701, 0.2166, 0.1685]) -Greedy action tensor([ 0.6195, -0.3877, 0.0636, -0.4025]) tensor([0.4350, 0.1589, 0.2495, 0.1566]) -Greedy action tensor([ 0.4092, 0.1150, 0.0492, -0.2275]) tensor([0.3365, 0.2507, 0.2348, 0.1780]) -Greedy action tensor([ 0.7033, -0.3071, 0.0158, -0.4911]) tensor([0.4609, 0.1678, 0.2317, 0.1396]) -Greedy action tensor([ 0.7332, -0.4791, -0.2565, -0.3920]) tensor([0.5016, 0.1492, 0.1864, 0.1628]) -Greedy action tensor([ 0.8208, -0.5131, -0.1099, -0.5893]) tensor([0.5258, 0.1385, 0.2073, 0.1284]) -Greedy action tensor([ 0.9794, -0.6668, 0.0125, -0.5526]) tensor([0.5589, 0.1078, 0.2125, 0.1208]) -Greedy action tensor([ 0.5418, -0.2930, -0.0282, -0.1908]) tensor([0.4032, 0.1750, 0.2280, 0.1938]) -Greedy action tensor([ 0.5972, -0.5159, -0.1613, -0.3350]) tensor([0.4565, 0.1500, 0.2138, 0.1797]) -Greedy action tensor([ 0.4722, 0.0525, 0.0579, -0.3085]) tensor([0.3602, 0.2367, 0.2380, 0.1650]) -Greedy action tensor([ 0.3674, 0.0371, 0.1023, -0.2444]) tensor([0.3302, 0.2373, 0.2533, 0.1791]) -Greedy action tensor([ 0.4699, -0.2992, 0.2564, -0.4574]) tensor([0.3750, 0.1738, 0.3029, 0.1483]) -Greedy action tensor([ 0.5442, -0.4274, -0.3540, -0.4606]) tensor([0.4647, 0.1759, 0.1893, 0.1701]) -Greedy action tensor([ 0.3171, -0.0255, -0.0587, -0.2624]) tensor([0.3382, 0.2401, 0.2323, 0.1895]) -Greedy action tensor([ 0.6023, -0.3528, -0.0320, -0.3053]) tensor([0.4313, 0.1660, 0.2287, 0.1740]) -Greedy action tensor([ 0.6663, -0.4551, 0.0215, -0.3236]) tensor([0.4500, 0.1466, 0.2361, 0.1672]) -Greedy action tensor([ 0.4175, 0.0451, -0.0692, -0.2714]) tensor([0.3564, 0.2456, 0.2191, 0.1790]) -Greedy action tensor([ 0.7930, -0.3895, -0.0852, -0.5247]) tensor([0.5026, 0.1540, 0.2088, 0.1346]) -Greedy action tensor([ 0.7864, -0.4238, -0.0249, -0.4248]) tensor([0.4901, 0.1461, 0.2178, 0.1460]) -Greedy action tensor([ 0.6454, -0.4635, 0.0206, -0.4870]) tensor([0.4571, 0.1508, 0.2447, 0.1473]) -Greedy action tensor([-1.3103, -0.5016, 0.5819, 0.7443]) tensor([0.0566, 0.1270, 0.3752, 0.4413]) -Greedy action tensor([-1.8742, -0.4638, 0.6412, -0.1298]) tensor([0.0431, 0.1767, 0.5335, 0.2467]) -Greedy action tensor([-1.0426, -0.5851, 0.1513, 0.6737]) tensor([0.0874, 0.1381, 0.2884, 0.4862]) -Greedy action tensor([-1.6872, -0.5359, 1.0481, 0.6255]) tensor([0.0337, 0.1066, 0.5194, 0.3404]) -Greedy action tensor([-1.8784, -0.4677, 0.6433, -0.1297]) tensor([0.0429, 0.1760, 0.5344, 0.2467]) -Greedy action tensor([-1.6104, -0.4551, 0.6165, 0.2188]) tensor([0.0508, 0.1614, 0.4712, 0.3166]) -Greedy action tensor([-1.9311, -0.4085, 0.6490, -0.1704]) tensor([0.0407, 0.1864, 0.5365, 0.2364]) -Greedy action tensor([-1.4265, -0.4603, -0.1696, -0.3364]) tensor([0.0988, 0.2598, 0.3474, 0.2940]) -Greedy action tensor([-1.9269, -0.4459, 0.6544, -0.1709]) tensor([0.0410, 0.1802, 0.5415, 0.2373]) -Greedy action tensor([-1.4874, -0.4040, 0.4658, 0.1198]) tensor([0.0625, 0.1847, 0.4409, 0.3119]) -Greedy action tensor([-0.8125, -0.0513, 0.6153, 1.2302]) tensor([0.0666, 0.1425, 0.2776, 0.5133]) -Greedy action tensor([-1.7388, -0.4541, 0.5648, -0.0326]) tensor([0.0497, 0.1795, 0.4972, 0.2736]) -Greedy action tensor([-1.7748, -0.1592, 0.5454, -0.1200]) tensor([0.0466, 0.2346, 0.4747, 0.2440]) -Greedy action tensor([-1.1447, -0.7031, 0.9044, 1.1671]) tensor([0.0490, 0.0762, 0.3803, 0.4945]) -Greedy action tensor([-1.9128, -0.4606, 0.6551, -0.1588]) tensor([0.0415, 0.1774, 0.5413, 0.2398]) -Greedy action tensor([-1.4597, 0.0425, 0.4121, 0.3411]) tensor([0.0554, 0.2489, 0.3602, 0.3355]) -Greedy action tensor([-1.8687, -0.4522, 0.6338, -0.1290]) tensor([0.0434, 0.1790, 0.5303, 0.2473]) -Greedy action tensor([-1.9255, -0.4391, 0.6568, -0.1672]) tensor([0.0409, 0.1808, 0.5410, 0.2373]) -Greedy action tensor([-1.6053, -0.5903, 0.1851, -0.8132]) tensor([0.0836, 0.2307, 0.5010, 0.1846]) -Greedy action tensor([-1.8914, -0.9809, 0.1965, -0.4081]) tensor([0.0626, 0.1557, 0.5055, 0.2761]) -Greedy action tensor([-1.4196, -0.5623, 0.4144, 0.0182]) tensor([0.0723, 0.1704, 0.4527, 0.3046]) -Greedy action tensor([-0.4564, -0.1447, 1.0133, 1.5164]) tensor([0.0719, 0.0982, 0.3127, 0.5172]) -Greedy action tensor([-1.5754, -0.5536, -0.0847, -0.4653]) tensor([0.0889, 0.2469, 0.3946, 0.2697]) -Greedy action tensor([-1.7121, -0.3580, 0.5272, -0.0153]) tensor([0.0507, 0.1964, 0.4761, 0.2767]) -Greedy action tensor([-0.7989, -0.5409, 0.2991, -0.1027]) tensor([0.1370, 0.1773, 0.4108, 0.2749]) -Greedy action tensor([-1.4497, -0.9662, 0.8787, 0.7927]) tensor([0.0448, 0.0727, 0.4602, 0.4222]) -Greedy action tensor([-1.8685, -0.4225, 0.6285, -0.1235]) tensor([0.0433, 0.1837, 0.5254, 0.2477]) -Greedy action tensor([-1.9027, -0.3602, 0.6370, -0.1429]) tensor([0.0414, 0.1935, 0.5246, 0.2405]) -Greedy action tensor([-1.5859, -0.4999, 0.5741, 0.3306]) tensor([0.0515, 0.1525, 0.4463, 0.3498]) -Greedy action tensor([-1.5018, -0.5441, 0.4158, 0.1099]) tensor([0.0648, 0.1690, 0.4413, 0.3249]) -Greedy action tensor([-1.2305, -0.5644, 0.2731, 0.3502]) tensor([0.0813, 0.1582, 0.3656, 0.3949]) -Greedy action tensor([-1.3350, -0.4581, 0.7528, -0.6161]) tensor([0.0740, 0.1777, 0.5965, 0.1518]) -Greedy action tensor([-1.8687, -0.4784, 0.6298, -0.1328]) tensor([0.0438, 0.1757, 0.5322, 0.2483]) -Greedy action tensor([-1.9997, -0.6094, 1.1001, 0.3801]) tensor([0.0263, 0.1057, 0.5838, 0.2842]) -Greedy action tensor([-1.8062, -0.4832, 0.6099, -0.0559]) tensor([0.0461, 0.1729, 0.5159, 0.2651]) -Greedy action tensor([-1.5952, -0.5442, 0.2012, -0.2987]) tensor([0.0738, 0.2112, 0.4450, 0.2700]) -Greedy action tensor([-1.2249, -0.6738, 0.4568, 0.1699]) tensor([0.0823, 0.1429, 0.4426, 0.3322]) -Greedy action tensor([-1.4780, -0.5475, 0.4085, 0.1031]) tensor([0.0667, 0.1691, 0.4400, 0.3242]) -Greedy action tensor([-1.8696, -0.4833, 0.6607, -0.1061]) tensor([0.0428, 0.1710, 0.5369, 0.2494]) -Greedy action tensor([-1.6828, -0.5499, 0.5537, -0.0147]) tensor([0.0533, 0.1654, 0.4988, 0.2825]) -Greedy action tensor([-1.8487, -0.4600, 0.6186, -0.1144]) tensor([0.0445, 0.1785, 0.5248, 0.2522]) -Greedy action tensor([-0.9879, -0.0476, 0.2211, 0.9303]) tensor([0.0729, 0.1867, 0.2442, 0.4963]) -Greedy action tensor([-1.4801, -0.3789, 0.6979, 0.7429]) tensor([0.0453, 0.1363, 0.4000, 0.4184]) -Greedy action tensor([-1.0688, -0.6389, 1.1342, 1.4423]) tensor([0.0418, 0.0643, 0.3786, 0.5153]) -Greedy action tensor([-1.8518, -0.4766, 0.6484, -0.0865]) tensor([0.0435, 0.1721, 0.5302, 0.2542]) -Greedy action tensor([-1.3028, 0.4671, 0.1911, -0.0157]) tensor([0.0669, 0.3927, 0.2980, 0.2423]) -Greedy action tensor([-1.8749, -0.4456, 0.6340, -0.1336]) tensor([0.0432, 0.1802, 0.5304, 0.2462]) -Greedy action tensor([-1.8454, -0.5297, 1.2533, 0.7369]) tensor([0.0249, 0.0929, 0.5525, 0.3297]) -Greedy action tensor([-1.6643, -0.5168, 0.5371, 0.0312]) tensor([0.0537, 0.1690, 0.4849, 0.2924]) -Greedy action tensor([0.0514, 0.1573, 0.9059, 1.7612]) tensor([0.1001, 0.1113, 0.2353, 0.5533]) -Greedy action tensor([-0.9849, -0.4067, 0.3796, 0.7235]) tensor([0.0819, 0.1459, 0.3204, 0.4518]) -Greedy action tensor([-1.6507, -0.3600, 0.7179, 0.5206]) tensor([0.0415, 0.1509, 0.4435, 0.3641]) -Greedy action tensor([-1.3454, -0.7979, 1.2597, 1.2799]) tensor([0.0333, 0.0575, 0.4500, 0.4592]) -Greedy action tensor([-0.8341, -0.1020, 0.3632, -0.4373]) tensor([0.1269, 0.2640, 0.4203, 0.1888]) -Greedy action tensor([-1.8241, -0.3573, 0.6050, -0.0904]) tensor([0.0448, 0.1940, 0.5079, 0.2534]) -Greedy action tensor([-1.7133, -0.4981, 0.7787, 0.2596]) tensor([0.0423, 0.1425, 0.5111, 0.3041]) -Greedy action tensor([-1.8604, -0.4648, 0.6355, -0.1137]) tensor([0.0437, 0.1763, 0.5297, 0.2504]) -Greedy action tensor([-1.5826, -0.5202, 0.4788, 0.0922]) tensor([0.0585, 0.1693, 0.4598, 0.3124]) -Greedy action tensor([-1.6284, -0.4858, 0.6984, 0.3155]) tensor([0.0468, 0.1467, 0.4795, 0.3270]) -Greedy action tensor([-0.3609, -0.0717, 0.1799, 0.1974]) tensor([0.1724, 0.2302, 0.2961, 0.3013]) -Greedy action tensor([-1.1034, 0.7992, 0.2672, 0.6763]) tensor([0.0569, 0.3816, 0.2241, 0.3374]) -Greedy action tensor([-1.8042, -0.2818, 0.5690, -0.0933]) tensor([0.0458, 0.2098, 0.4912, 0.2533]) -Greedy action tensor([-0.9743, -0.6245, 0.2685, 0.1250]) tensor([0.1125, 0.1597, 0.3900, 0.3378]) -Greedy action tensor([-1.8488, -0.3701, 0.6502, -0.0800]) tensor([0.0427, 0.1873, 0.5196, 0.2504]) -Greedy action tensor([-1.0868, -0.5345, 0.3751, 0.1101]) tensor([0.0965, 0.1677, 0.4164, 0.3194]) -Greedy action tensor([-1.8750, -0.4419, 0.6251, -0.1452]) tensor([0.0435, 0.1821, 0.5294, 0.2450]) -Greedy action tensor([-1.5597, -0.3840, 0.9668, 0.6899]) tensor([0.0381, 0.1235, 0.4769, 0.3615]) -Greedy action tensor([-1.3272, -0.5803, 0.3447, 0.1972]) tensor([0.0768, 0.1620, 0.4086, 0.3526]) -Greedy action tensor([-1.9641, -0.5437, 1.1853, 0.5160]) tensor([0.0248, 0.1024, 0.5772, 0.2956]) -Greedy action tensor([-1.7121, -0.5147, 0.9604, 0.5281]) tensor([0.0355, 0.1175, 0.5136, 0.3334]) -Greedy action tensor([-1.5661, 0.9682, 0.5120, -0.5069]) tensor([0.0408, 0.5150, 0.3264, 0.1178]) -Greedy action tensor([-1.4032, -0.0119, 0.2891, 0.2394]) tensor([0.0640, 0.2573, 0.3477, 0.3309]) -Greedy action tensor([-0.6144, -0.2254, 0.8394, 1.3645]) tensor([0.0715, 0.1055, 0.3059, 0.5172]) -Greedy action tensor([-1.6321, -0.4047, 0.5002, -0.0322]) tensor([0.0562, 0.1917, 0.4739, 0.2782]) -Greedy action tensor([-1.9161, -0.4408, 0.6505, -0.1624]) tensor([0.0414, 0.1809, 0.5388, 0.2390]) -Greedy action tensor([-0.7383, -0.3522, 0.0720, 0.0206]) tensor([0.1459, 0.2146, 0.3280, 0.3116]) -Greedy action tensor([-1.9298, -0.4551, 0.6584, -0.1719]) tensor([0.0409, 0.1785, 0.5436, 0.2370]) -Greedy action tensor([-1.1520, -0.6381, 0.6776, 1.0918]) tensor([0.0545, 0.0912, 0.3399, 0.5143]) -Greedy action tensor([-1.2949, -0.3540, 0.3862, 0.2184]) tensor([0.0742, 0.1902, 0.3986, 0.3370]) -Greedy action tensor([-1.7519, -0.1911, 0.5366, 0.0028]) tensor([0.0467, 0.2225, 0.4607, 0.2701]) -Greedy action tensor([-1.9046, -0.4568, 0.7179, -0.0371]) tensor([0.0392, 0.1668, 0.5401, 0.2539]) -Greedy action tensor([ 0.7040, -0.2498, 0.0445, 0.1728]) tensor([0.4016, 0.1547, 0.2076, 0.2361]) -Greedy action tensor([ 1.4881, -0.3871, -0.5493, 0.3556]) tensor([0.6227, 0.0955, 0.0812, 0.2006]) -Greedy action tensor([ 1.7254, -0.4503, -0.2149, 0.3567]) tensor([0.6615, 0.0751, 0.0950, 0.1683]) -Greedy action tensor([ 1.0956, -0.4364, -0.2555, 0.0691]) tensor([0.5455, 0.1179, 0.1413, 0.1954]) -Greedy action tensor([ 1.4607, -0.7722, -0.4478, 0.2257]) tensor([0.6467, 0.0693, 0.0959, 0.1881]) -Greedy action tensor([ 1.7291, -0.7686, -0.3225, 0.3206]) tensor([0.6871, 0.0565, 0.0883, 0.1680]) -Greedy action tensor([ 1.7211, -0.3487, -0.2309, 0.2091]) tensor([0.6717, 0.0848, 0.0954, 0.1481]) -Greedy action tensor([ 1.2395, -0.5377, -0.1340, 0.3074]) tensor([0.5506, 0.0931, 0.1394, 0.2168]) -Greedy action tensor([ 0.6690, -0.0757, 0.1995, -0.1737]) tensor([0.3951, 0.1876, 0.2471, 0.1701]) -Greedy action tensor([ 0.7718, -0.1018, -0.1836, 0.2252]) tensor([0.4200, 0.1753, 0.1616, 0.2431]) -Greedy action tensor([ 0.9233, -0.5263, -0.2039, 0.2272]) tensor([0.4861, 0.1141, 0.1575, 0.2423]) -Greedy action tensor([ 1.0727, -0.7312, -0.4078, 0.4636]) tensor([0.5165, 0.0850, 0.1175, 0.2809]) -Greedy action tensor([ 1.8394, -1.1186, -0.2554, 0.9249]) tensor([0.6346, 0.0330, 0.0781, 0.2543]) -Greedy action tensor([ 1.6551, -0.5071, -0.2646, 0.1267]) tensor([0.6763, 0.0778, 0.0992, 0.1467]) -Greedy action tensor([ 0.9539, -0.3346, 0.0180, -0.0072]) tensor([0.4877, 0.1345, 0.1913, 0.1865]) -Greedy action tensor([ 0.0939, -0.1201, -0.2114, 0.2808]) tensor([0.2667, 0.2153, 0.1965, 0.3215]) -Greedy action tensor([ 1.1023, -0.3732, -0.2133, 0.1181]) tensor([0.5346, 0.1222, 0.1434, 0.1998]) -Greedy action tensor([ 1.1461, -0.6282, -0.5964, 0.6550]) tensor([0.5111, 0.0867, 0.0895, 0.3128]) -Greedy action tensor([ 1.3289, -0.5997, -0.2302, 0.2277]) tensor([0.5924, 0.0861, 0.1246, 0.1969]) -Greedy action tensor([ 1.0169, -0.3884, -0.1510, 0.2170]) tensor([0.4986, 0.1223, 0.1551, 0.2240]) -Greedy action tensor([ 1.6557, -0.5890, -0.4050, 0.3052]) tensor([0.6700, 0.0710, 0.0853, 0.1736]) -Greedy action tensor([ 1.1399, -0.1187, -0.2362, -0.3715]) tensor([0.5691, 0.1616, 0.1437, 0.1255]) -Greedy action tensor([ 1.1052, -0.1920, -0.1828, 0.0090]) tensor([0.5310, 0.1451, 0.1465, 0.1774]) -Greedy action tensor([ 1.1656, -0.3399, -0.1323, 0.4622]) tensor([0.5025, 0.1115, 0.1372, 0.2487]) -Greedy action tensor([ 1.0875, -0.4204, -0.2033, 0.1855]) tensor([0.5257, 0.1164, 0.1446, 0.2133]) -Greedy action tensor([ 1.1011, -0.1437, 0.1349, 0.0408]) tensor([0.4963, 0.1429, 0.1889, 0.1719]) -Greedy action tensor([ 1.2149, -0.4460, 0.1274, -0.1906]) tensor([0.5643, 0.1072, 0.1902, 0.1384]) -Greedy action tensor([ 0.9779, -0.3771, -0.1832, 0.2931]) tensor([0.4819, 0.1243, 0.1509, 0.2430]) -Greedy action tensor([ 0.9664, -0.1289, -0.2418, -0.2622]) tensor([0.5192, 0.1737, 0.1551, 0.1520]) -Greedy action tensor([ 0.5621, -0.0154, -0.0848, -0.1566]) tensor([0.3887, 0.2182, 0.2036, 0.1895]) -Greedy action tensor([ 0.7763, -0.5973, -0.2106, 0.4179]) tensor([0.4302, 0.1089, 0.1603, 0.3006]) -Greedy action tensor([ 1.0700, -0.3834, -0.2056, 0.5050]) tensor([0.4804, 0.1123, 0.1342, 0.2731]) -Greedy action tensor([ 0.3045, -0.0658, -0.1346, -0.0608]) tensor([0.3301, 0.2280, 0.2128, 0.2291]) -Greedy action tensor([ 1.0060, -0.5295, 0.0587, 0.0700]) tensor([0.5012, 0.1079, 0.1943, 0.1966]) -Greedy action tensor([ 1.9592, -1.2885, -0.2071, 0.8128]) tensor([0.6797, 0.0264, 0.0779, 0.2160]) -Greedy action tensor([ 1.2931, -0.5395, -0.5402, 0.3151]) tensor([0.5896, 0.0943, 0.0943, 0.2218]) -Greedy action tensor([ 1.1908, -0.8330, -0.9505, 1.0411]) tensor([0.4738, 0.0626, 0.0557, 0.4079]) -Greedy action tensor([ 1.3630, -0.7287, -0.2872, 0.5383]) tensor([0.5702, 0.0704, 0.1095, 0.2499]) -Greedy action tensor([ 0.8879, -0.3685, -0.3800, 0.3949]) tensor([0.4594, 0.1308, 0.1293, 0.2806]) -Greedy action tensor([ 1.3827, -0.1544, -0.2515, 0.1936]) tensor([0.5832, 0.1254, 0.1138, 0.1776]) -Greedy action tensor([ 1.2590, 0.0491, -0.3501, 0.3440]) tensor([0.5266, 0.1571, 0.1054, 0.2109]) -Greedy action tensor([ 0.9890, -0.3305, -0.2158, 0.1340]) tensor([0.5019, 0.1342, 0.1505, 0.2135]) -Greedy action tensor([ 1.2805, -0.5633, -0.3891, 0.1561]) tensor([0.5983, 0.0947, 0.1127, 0.1944]) -Greedy action tensor([ 1.3889, -0.4221, -0.2494, 0.3243]) tensor([0.5873, 0.0960, 0.1141, 0.2025]) -Greedy action tensor([ 1.3218, -0.5234, -0.2804, 0.1387]) tensor([0.6003, 0.0948, 0.1209, 0.1839]) -Greedy action tensor([ 0.7366, -0.5565, -0.1469, 0.2164]) tensor([0.4382, 0.1202, 0.1811, 0.2605]) -Greedy action tensor([ 1.0286, 0.0506, -0.0249, 0.1310]) tensor([0.4690, 0.1764, 0.1635, 0.1911]) -Greedy action tensor([ 0.6368, -0.3107, -0.3136, 0.1277]) tensor([0.4210, 0.1632, 0.1628, 0.2530]) -Greedy action tensor([ 1.3822, -0.4720, -0.1662, 0.0937]) tensor([0.6080, 0.0952, 0.1292, 0.1676]) -Greedy action tensor([ 1.3869, -0.6036, -0.4280, 0.3187]) tensor([0.6086, 0.0831, 0.0991, 0.2091]) -Greedy action tensor([ 0.9477, -0.2819, -0.2757, 0.1769]) tensor([0.4880, 0.1427, 0.1436, 0.2258]) -Greedy action tensor([ 1.2876, -0.3223, -0.3796, -0.0946]) tensor([0.6099, 0.1219, 0.1151, 0.1531]) -Greedy action tensor([ 1.3692, -0.2346, -0.3257, 0.2248]) tensor([0.5871, 0.1181, 0.1078, 0.1870]) -Greedy action tensor([ 0.6645, -0.2269, 0.1686, -0.1021]) tensor([0.4026, 0.1651, 0.2452, 0.1871]) -Greedy action tensor([ 1.4415, -0.4340, -0.4036, 0.2260]) tensor([0.6219, 0.0953, 0.0983, 0.1845]) -Greedy action tensor([ 1.3168, -0.1390, -0.1070, 0.2316]) tensor([0.5519, 0.1287, 0.1329, 0.1864]) -Greedy action tensor([ 1.1253, -0.3442, -0.3930, 0.5076]) tensor([0.5029, 0.1157, 0.1102, 0.2712]) -Greedy action tensor([ 1.5798, -0.3544, -0.3007, -0.0286]) tensor([0.6679, 0.0965, 0.1019, 0.1337]) -Greedy action tensor([ 1.3972, -0.1044, -0.3521, 0.2822]) tensor([0.5798, 0.1292, 0.1008, 0.1901]) -Greedy action tensor([ 0.8784, -0.1347, -0.1685, -0.2602]) tensor([0.4916, 0.1785, 0.1725, 0.1574]) -Greedy action tensor([ 1.0414, -0.5089, -0.2411, 0.1322]) tensor([0.5284, 0.1121, 0.1466, 0.2129]) -Greedy action tensor([ 1.7938, -1.0016, -0.2065, 0.6017]) tensor([0.6667, 0.0407, 0.0902, 0.2024]) -Greedy action tensor([ 0.9348, -0.3323, -0.5323, 0.7957]) tensor([0.4197, 0.1182, 0.0968, 0.3652]) -Greedy action tensor([ 0.5369, -0.4568, -0.1022, -0.0402]) tensor([0.4066, 0.1505, 0.2146, 0.2283]) -Greedy action tensor([ 0.4215, -0.5555, -0.0046, -0.0285]) tensor([0.3749, 0.1411, 0.2448, 0.2391]) -Greedy action tensor([ 1.1244, -0.1897, -0.1799, -0.1458]) tensor([0.5492, 0.1476, 0.1490, 0.1542]) -Greedy action tensor([ 1.6207, -0.5438, -0.7834, 0.4499]) tensor([0.6599, 0.0758, 0.0596, 0.2047]) -Greedy action tensor([ 0.9652, -0.2389, -0.1836, 0.0206]) tensor([0.4986, 0.1495, 0.1580, 0.1939]) -Greedy action tensor([ 1.3842, -0.3059, -0.4527, 0.3649]) tensor([0.5866, 0.1082, 0.0934, 0.2117]) -Greedy action tensor([ 0.9549, -0.5822, -0.6134, 0.7049]) tensor([0.4541, 0.0976, 0.0946, 0.3536]) -Greedy action tensor([ 1.1925, -0.4960, -0.0832, -0.0176]) tensor([0.5675, 0.1049, 0.1585, 0.1692]) -Greedy action tensor([ 0.7735, -0.2044, 0.2259, -0.2980]) tensor([0.4354, 0.1637, 0.2518, 0.1491]) -Greedy action tensor([ 1.5080, -0.6189, -0.2864, 0.3087]) tensor([0.6302, 0.0751, 0.1048, 0.1899]) -Greedy action tensor([ 0.7400, -0.5596, 0.1033, -0.3494]) tensor([0.4677, 0.1275, 0.2474, 0.1573]) -Greedy action tensor([ 0.6155, -0.4569, 0.0479, -0.0780]) tensor([0.4151, 0.1421, 0.2353, 0.2075]) -Greedy action tensor([ 0.7740, 0.1009, 0.0143, -0.0430]) tensor([0.4133, 0.2108, 0.1933, 0.1826]) -Greedy action tensor([ 0.9012, -0.5819, -0.6965, -0.0958]) tensor([0.5561, 0.1262, 0.1125, 0.2052]) -Greedy action tensor([ 1.4145, -0.5136, -0.4679, 0.2899]) tensor([0.6164, 0.0896, 0.0938, 0.2002]) -Greedy action tensor([ 1.2469, -0.5625, -0.3119, 0.5965]) tensor([0.5274, 0.0864, 0.1110, 0.2752]) -Greedy action tensor([ 1.0608, -0.2170, -0.2535, 0.4336]) tensor([0.4805, 0.1339, 0.1291, 0.2566]) -Greedy action tensor([ 1.0058, 0.1413, -0.2292, 0.1392]) tensor([0.4689, 0.1975, 0.1364, 0.1971]) -Greedy action tensor([-0.2425, -0.2214, -0.0548, 0.6006]) tensor([0.1801, 0.1840, 0.2173, 0.4186]) -Greedy action tensor([ 0.5519, -1.2499, 0.3538, 0.6068]) tensor([0.3288, 0.0542, 0.2697, 0.3473]) -Greedy action tensor([ 1.5834, -0.9298, 1.4336, 1.3318]) tensor([0.3677, 0.0298, 0.3166, 0.2859]) -Greedy action tensor([-0.3478, -1.0798, -0.3253, -0.3096]) tensor([0.2823, 0.1358, 0.2887, 0.2933]) -Greedy action tensor([-0.9950, 0.4721, 0.7870, 0.1691]) tensor([0.0691, 0.2995, 0.4103, 0.2212]) -Greedy action tensor([ 0.8934, -0.2658, 0.4671, 1.2353]) tensor([0.2963, 0.0930, 0.1935, 0.4172]) -Greedy action tensor([ 0.8923, -1.0235, 1.7372, 0.2429]) tensor([0.2502, 0.0368, 0.5823, 0.1307]) -Greedy action tensor([ 0.7345, -2.0812, -0.4211, 0.6987]) tensor([0.4274, 0.0256, 0.1346, 0.4124]) -Greedy action tensor([ 0.3103, -0.3848, 0.1261, 0.3752]) tensor([0.2943, 0.1469, 0.2448, 0.3140]) -Greedy action tensor([ 0.2075, -0.5712, 0.9847, -0.1850]) tensor([0.2320, 0.1065, 0.5048, 0.1567]) -Greedy action tensor([ 0.0592, -0.9933, -0.0810, 0.0132]) tensor([0.3151, 0.1100, 0.2739, 0.3010]) -Greedy action tensor([0.2344, 0.4277, 0.1030, 0.7859]) tensor([0.2072, 0.2514, 0.1817, 0.3597]) -Greedy action tensor([-0.2132, -0.8351, 1.0744, 0.1339]) tensor([0.1521, 0.0816, 0.5511, 0.2152]) -Greedy action tensor([ 0.8811, 0.1725, -0.0978, 1.1945]) tensor([0.3090, 0.1521, 0.1161, 0.4228]) -Greedy action tensor([-0.1070, -0.0234, -1.1517, 1.4442]) tensor([0.1397, 0.1519, 0.0492, 0.6592]) -Greedy action tensor([ 1.3335, -0.3259, 0.8972, -0.1096]) tensor([0.4824, 0.0918, 0.3118, 0.1139]) -Greedy action tensor([0.0807, 0.5334, 0.6208, 0.9282]) tensor([0.1510, 0.2374, 0.2591, 0.3524]) -Greedy action tensor([ 0.9030, -0.1234, 0.0139, 1.1281]) tensor([0.3309, 0.1186, 0.1360, 0.4145]) -Greedy action tensor([ 0.8963, -0.2671, 0.4543, 1.4237]) tensor([0.2740, 0.0856, 0.1761, 0.4643]) -Greedy action tensor([ 0.7973, -0.1635, -0.2624, 1.1425]) tensor([0.3183, 0.1218, 0.1103, 0.4496]) -Greedy action tensor([ 0.8970, 0.2632, -0.3086, 0.2600]) tensor([0.4239, 0.2249, 0.1270, 0.2242]) -Greedy action tensor([-0.9325, -1.0166, -1.2101, 0.6132]) tensor([0.1357, 0.1248, 0.1028, 0.6367]) -Greedy action tensor([ 1.3768, -0.4390, 0.7823, 1.0257]) tensor([0.4135, 0.0673, 0.2282, 0.2911]) -Greedy action tensor([ 1.7696, -2.2271, -0.1861, 1.0590]) tensor([0.6056, 0.0111, 0.0857, 0.2976]) -Greedy action tensor([-0.2197, -0.9381, 0.3712, 0.5971]) tensor([0.1800, 0.0877, 0.3250, 0.4073]) -Greedy action tensor([ 0.2315, -1.4291, 0.8287, 0.0972]) tensor([0.2576, 0.0490, 0.4681, 0.2253]) -Greedy action tensor([-0.3130, 0.4996, 0.3030, 0.1390]) tensor([0.1498, 0.3376, 0.2773, 0.2354]) -Greedy action tensor([ 0.8568, 1.2052, -0.8018, -0.0890]) tensor([0.3338, 0.4730, 0.0636, 0.1296]) -Greedy action tensor([0.7020, 0.1338, 0.3083, 1.3033]) tensor([0.2460, 0.1393, 0.1659, 0.4488]) -Greedy action tensor([ 1.1016, -0.4449, 1.9917, 0.3256]) tensor([0.2434, 0.0518, 0.5927, 0.1120]) -Greedy action tensor([ 0.8606, 0.2853, 0.6004, -0.1316]) tensor([0.3698, 0.2080, 0.2851, 0.1371]) -Greedy action tensor([ 1.0617, -1.1074, 0.8068, 0.5759]) tensor([0.3993, 0.0456, 0.3094, 0.2456]) -Greedy action tensor([ 0.0612, -0.5939, 0.3356, 2.0014]) tensor([0.1021, 0.0530, 0.1343, 0.7106]) -Greedy action tensor([-0.5294, 2.1760, 0.4231, -0.4955]) tensor([0.0511, 0.7638, 0.1323, 0.0528]) -Greedy action tensor([-0.2044, -1.1515, -0.1919, 1.5046]) tensor([0.1262, 0.0489, 0.1278, 0.6971]) -Greedy action tensor([-1.3710, -1.3107, -0.3592, 0.3840]) tensor([0.0944, 0.1002, 0.2596, 0.5458]) -Greedy action tensor([-0.1907, -2.0052, -0.0107, 0.5410]) tensor([0.2253, 0.0367, 0.2697, 0.4683]) -Greedy action tensor([-0.5873, 0.3115, 0.2807, -0.0041]) tensor([0.1311, 0.3219, 0.3122, 0.2348]) -Greedy action tensor([ 1.3680, -0.6553, 1.0972, 1.4697]) tensor([0.3331, 0.0440, 0.2541, 0.3688]) -Greedy action tensor([ 0.9808, -1.0181, 0.9024, 1.3353]) tensor([0.2869, 0.0389, 0.2653, 0.4090]) -Greedy action tensor([ 0.9203, -0.2125, 0.5937, 0.1971]) tensor([0.3955, 0.1274, 0.2853, 0.1919]) -Greedy action tensor([ 0.6812, 0.4261, -0.1422, 0.9109]) tensor([0.2880, 0.2232, 0.1264, 0.3624]) -Greedy action tensor([-1.2985, 0.0358, -0.1733, 1.0781]) tensor([0.0536, 0.2037, 0.1652, 0.5775]) -Greedy action tensor([ 0.4324, -0.1291, -0.2282, 0.6502]) tensor([0.3003, 0.1713, 0.1551, 0.3733]) -Greedy action tensor([-0.1582, 0.2190, -0.0215, -0.5791]) tensor([0.2347, 0.3422, 0.2691, 0.1541]) -Greedy action tensor([0.0774, 0.7658, 1.0630, 0.5217]) tensor([0.1383, 0.2753, 0.3706, 0.2157]) -Greedy action tensor([ 1.0607, -0.5715, -0.1142, 0.3712]) tensor([0.4985, 0.0975, 0.1539, 0.2501]) -Greedy action tensor([ 1.5833, -0.8737, 0.8766, 1.0247]) tensor([0.4649, 0.0398, 0.2293, 0.2659]) -Greedy action tensor([ 1.6020, -0.1259, 1.0873, 0.6405]) tensor([0.4635, 0.0823, 0.2770, 0.1772]) -Greedy action tensor([ 0.2839, -0.7527, 1.5677, 0.7056]) tensor([0.1541, 0.0546, 0.5563, 0.2349]) -Greedy action tensor([ 0.0412, -1.5553, 0.1891, 0.3597]) tensor([0.2676, 0.0542, 0.3102, 0.3680]) -Greedy action tensor([ 1.5259, 0.5186, -0.2892, 0.5299]) tensor([0.5270, 0.1925, 0.0858, 0.1947]) -Greedy action tensor([1.1516, 0.0471, 0.8756, 1.0281]) tensor([0.3362, 0.1114, 0.2552, 0.2972]) -Greedy action tensor([ 1.2917, -0.6850, 1.5646, -0.0112]) tensor([0.3671, 0.0509, 0.4823, 0.0998]) -Greedy action tensor([ 0.4150, -0.8103, 0.3535, 0.4818]) tensor([0.3027, 0.0889, 0.2847, 0.3237]) -Greedy action tensor([-0.7813, -1.3676, 1.2375, 1.0560]) tensor([0.0651, 0.0362, 0.4900, 0.4087]) -Greedy action tensor([ 0.4082, 0.2523, -0.3564, 0.3497]) tensor([0.3063, 0.2621, 0.1426, 0.2889]) -Greedy action tensor([ 0.5365, -0.4487, 0.9712, 0.1673]) tensor([0.2771, 0.1034, 0.4279, 0.1915]) -Greedy action tensor([-1.0296, 2.2802, 0.6896, -0.5735]) tensor([0.0281, 0.7705, 0.1570, 0.0444]) -Greedy action tensor([ 0.4787, -0.8490, -0.1732, 0.0561]) tensor([0.4096, 0.1086, 0.2134, 0.2684]) -Greedy action tensor([-0.3106, -1.6538, 0.5633, 0.9828]) tensor([0.1369, 0.0357, 0.3281, 0.4992]) -Greedy action tensor([ 0.7634, 0.7631, 0.6250, -0.3256]) tensor([0.3118, 0.3117, 0.2715, 0.1049]) -Greedy action tensor([ 0.3644, 0.4530, -0.2352, 0.4548]) tensor([0.2676, 0.2925, 0.1469, 0.2930]) -Greedy action tensor([ 0.8384, -0.4686, 1.0189, 0.4766]) tensor([0.3160, 0.0855, 0.3785, 0.2201]) -Greedy action tensor([ 0.6128, 0.4041, 0.8789, -0.6597]) tensor([0.2944, 0.2389, 0.3842, 0.0825]) -Greedy action tensor([-0.4851, -0.5976, 0.1077, 1.0147]) tensor([0.1222, 0.1092, 0.2211, 0.5475]) -Greedy action tensor([ 0.6235, -0.7318, 1.0729, 0.1627]) tensor([0.2894, 0.0746, 0.4535, 0.1825]) -Greedy action tensor([ 0.1592, -0.1748, 0.4025, 0.1990]) tensor([0.2480, 0.1776, 0.3163, 0.2581]) -Greedy action tensor([ 1.2576, -1.4452, 1.4036, -0.1349]) tensor([0.4044, 0.0271, 0.4680, 0.1005]) -Greedy action tensor([ 0.5346, 0.3833, -0.6984, -0.3027]) tensor([0.3870, 0.3327, 0.1128, 0.1675]) -Greedy action tensor([ 1.1032, -1.1673, 1.1655, 0.7687]) tensor([0.3468, 0.0358, 0.3691, 0.2482]) -Greedy action tensor([ 0.0593, -0.1316, 0.7342, -0.2611]) tensor([0.2214, 0.1830, 0.4349, 0.1607]) -Greedy action tensor([ 0.4057, -1.3842, -0.6668, 1.6409]) tensor([0.2021, 0.0337, 0.0691, 0.6950]) -Greedy action tensor([ 0.4478, -0.7744, 0.3230, 0.7651]) tensor([0.2816, 0.0830, 0.2486, 0.3868]) -Greedy action tensor([ 0.5576, -0.4179, 0.2933, 0.4572]) tensor([0.3280, 0.1236, 0.2518, 0.2966]) -Greedy action tensor([ 0.0513, -0.4698, 0.5840, 2.0645]) tensor([0.0927, 0.0551, 0.1580, 0.6942]) -Greedy action tensor([ 1.0595, -0.6693, 0.9385, 0.7798]) tensor([0.3547, 0.0630, 0.3142, 0.2681]) -Greedy action tensor([-0.7124, -1.0302, -0.0868, 1.3117]) tensor([0.0896, 0.0652, 0.1674, 0.6779]) -Greedy action tensor([-0.0471, -0.4278, 1.6593, -0.0458]) tensor([0.1220, 0.0834, 0.6724, 0.1222]) -Greedy action tensor([ 0.5628, 0.6991, -0.3864, -0.2972]) tensor([0.3383, 0.3877, 0.1309, 0.1431]) -Greedy action tensor([-0.0871, -0.2383, -0.0247, 0.3624]) tensor([0.2226, 0.1914, 0.2370, 0.3490]) -Greedy action tensor([ 0.7600, -0.0268, -0.7243, 1.8755]) tensor([0.2113, 0.0962, 0.0479, 0.6446]) -Greedy action tensor([ 0.6839, -0.1091, 0.0757, -0.2173]) tensor([0.4161, 0.1883, 0.2265, 0.1690]) -Greedy action tensor([ 0.5413, -0.2050, -0.0925, -0.1356]) tensor([0.3980, 0.1887, 0.2111, 0.2022]) -Greedy action tensor([ 0.7594, -0.4386, -0.0631, -0.2293]) tensor([0.4732, 0.1428, 0.2079, 0.1761]) -Greedy action tensor([ 0.7626, -0.8438, -0.1182, -0.3052]) tensor([0.5105, 0.1024, 0.2116, 0.1755]) -Greedy action tensor([ 0.4850, -0.3025, -0.0896, -0.2880]) tensor([0.4033, 0.1835, 0.2270, 0.1862]) -Greedy action tensor([ 0.5382, -0.3196, 0.0306, -0.3480]) tensor([0.4101, 0.1739, 0.2469, 0.1691]) -Greedy action tensor([ 0.2227, 0.0228, 0.1821, -0.2929]) tensor([0.2962, 0.2425, 0.2844, 0.1769]) -Greedy action tensor([ 0.5562, 0.1321, 0.0352, -0.0991]) tensor([0.3613, 0.2364, 0.2146, 0.1876]) -Greedy action tensor([ 0.6348, -0.2310, 0.2645, -0.5293]) tensor([0.4126, 0.1736, 0.2849, 0.1288]) -Greedy action tensor([ 0.5115, -0.3392, -0.1174, -0.2788]) tensor([0.4143, 0.1769, 0.2209, 0.1880]) -Greedy action tensor([ 0.7881, -0.5105, 0.0634, -0.3885]) tensor([0.4841, 0.1321, 0.2345, 0.1493]) -Greedy action tensor([ 0.7749, -0.4881, -0.0372, -0.3433]) tensor([0.4870, 0.1377, 0.2162, 0.1592]) -Greedy action tensor([ 0.4627, -0.2504, 0.0173, -0.3793]) tensor([0.3904, 0.1913, 0.2501, 0.1682]) -Greedy action tensor([ 0.2065, 0.0120, -0.0799, -0.1964]) tensor([0.3084, 0.2539, 0.2316, 0.2061]) -Greedy action tensor([ 0.5905, -0.5127, 0.1979, -0.4239]) tensor([0.4220, 0.1400, 0.2850, 0.1530]) -Greedy action tensor([ 0.4092, -0.2494, -0.1204, -0.3404]) tensor([0.3878, 0.2007, 0.2283, 0.1832]) -Greedy action tensor([ 0.8444, -0.3330, 0.0641, -0.5816]) tensor([0.4983, 0.1535, 0.2284, 0.1197]) -Greedy action tensor([ 0.7054, -0.4926, -0.0677, -0.3890]) tensor([0.4766, 0.1438, 0.2200, 0.1595]) -Greedy action tensor([ 0.7122, -0.3012, -0.0643, -0.4090]) tensor([0.4654, 0.1689, 0.2141, 0.1517]) -Greedy action tensor([ 0.3731, 0.0017, -0.0731, -0.1858]) tensor([0.3446, 0.2377, 0.2206, 0.1971]) -Greedy action tensor([ 0.3635, -0.1453, -0.1183, -0.1030]) tensor([0.3514, 0.2112, 0.2170, 0.2204]) -Greedy action tensor([ 0.5990, -0.4606, 0.0680, -0.5671]) tensor([0.4452, 0.1543, 0.2618, 0.1387]) -Greedy action tensor([ 0.4634, -0.0086, -0.0521, -0.0129]) tensor([0.3519, 0.2195, 0.2101, 0.2185]) -Greedy action tensor([ 0.5925, -0.1866, 0.0458, -0.1671]) tensor([0.3991, 0.1831, 0.2310, 0.1867]) -Greedy action tensor([ 0.9540, -0.7051, 0.2111, -0.7217]) tensor([0.5396, 0.1027, 0.2567, 0.1010]) -Greedy action tensor([ 0.6772, 0.0609, -0.0566, -0.4423]) tensor([0.4262, 0.2301, 0.2046, 0.1391]) -Greedy action tensor([ 0.6720, -0.3115, -0.0516, -0.3968]) tensor([0.4540, 0.1698, 0.2202, 0.1559]) -Greedy action tensor([ 0.7580, -0.4955, 0.2502, -0.5658]) tensor([0.4644, 0.1326, 0.2795, 0.1236]) -Greedy action tensor([ 0.4100, -0.1440, -0.1486, -0.1285]) tensor([0.3663, 0.2105, 0.2095, 0.2138]) -Greedy action tensor([ 0.3027, 0.1247, 0.0394, -0.1794]) tensor([0.3103, 0.2597, 0.2385, 0.1916]) -Greedy action tensor([ 0.9046, -0.6154, 0.0209, -0.5297]) tensor([0.5347, 0.1169, 0.2210, 0.1274]) -Greedy action tensor([ 0.3149, -0.1401, 0.0130, -0.3590]) tensor([0.3468, 0.2200, 0.2564, 0.1768]) -Greedy action tensor([ 0.5488, -0.2875, -0.0654, -0.1388]) tensor([0.4037, 0.1749, 0.2184, 0.2030]) -Greedy action tensor([ 0.9040, -0.8866, 0.0279, -0.5043]) tensor([0.5471, 0.0913, 0.2278, 0.1338]) -Greedy action tensor([ 0.9556, -0.3522, -0.0557, -0.4567]) tensor([0.5325, 0.1440, 0.1937, 0.1297]) -Greedy action tensor([ 0.4863, -0.2741, -0.0742, -0.2826]) tensor([0.3997, 0.1868, 0.2282, 0.1853]) -Greedy action tensor([ 0.7225, -0.2710, -0.0130, -0.2429]) tensor([0.4484, 0.1660, 0.2149, 0.1707]) -Greedy action tensor([ 8.6984e-01, -1.1047e+00, -5.9387e-04, -9.4371e-01]) tensor([0.5812, 0.0807, 0.2434, 0.0948]) -Greedy action tensor([ 0.4240, 0.1563, 0.1479, -0.1025]) tensor([0.3211, 0.2457, 0.2436, 0.1896]) -Greedy action tensor([ 0.3974, -0.0716, -0.1204, 0.0268]) tensor([0.3434, 0.2149, 0.2046, 0.2371]) -Greedy action tensor([ 0.8885, -0.6848, -0.1098, -0.5503]) tensor([0.5515, 0.1144, 0.2033, 0.1308]) -Greedy action tensor([ 0.8744, -0.5415, 0.0634, -0.5970]) tensor([0.5217, 0.1266, 0.2319, 0.1198]) -Greedy action tensor([ 0.6881, -0.3003, 0.0732, -0.3256]) tensor([0.4394, 0.1635, 0.2376, 0.1595]) -Greedy action tensor([ 0.7331, -0.1937, -0.1645, -0.3870]) tensor([0.4696, 0.1859, 0.1914, 0.1532]) -Greedy action tensor([ 0.6838, -0.1837, -0.1096, -0.0810]) tensor([0.4278, 0.1797, 0.1935, 0.1991]) -Greedy action tensor([ 0.6986, -0.6218, 0.1233, -0.6425]) tensor([0.4782, 0.1277, 0.2690, 0.1251]) -Greedy action tensor([ 0.3856, -0.0264, -0.0642, -0.1075]) tensor([0.3435, 0.2275, 0.2191, 0.2098]) -Greedy action tensor([ 0.3349, -0.0395, -0.0406, -0.3507]) tensor([0.3474, 0.2389, 0.2387, 0.1750]) -Greedy action tensor([ 1.0819, -0.5759, -0.0114, -0.6683]) tensor([0.5884, 0.1121, 0.1972, 0.1022]) -Greedy action tensor([ 0.9542, 0.2379, -0.0728, -0.3584]) tensor([0.4726, 0.2309, 0.1692, 0.1272]) -Greedy action tensor([ 0.6616, -0.3175, 0.0228, -0.3949]) tensor([0.4442, 0.1669, 0.2345, 0.1544]) -Greedy action tensor([ 0.6266, -0.3440, 0.0892, -0.5267]) tensor([0.4388, 0.1663, 0.2564, 0.1385]) -Greedy action tensor([ 0.7050, -0.3711, -0.1078, -0.2464]) tensor([0.4607, 0.1571, 0.2044, 0.1779]) -Greedy action tensor([ 0.5379, -0.2952, -0.1176, -0.2969]) tensor([0.4188, 0.1820, 0.2174, 0.1817]) -Greedy action tensor([ 0.5933, -0.2935, -0.1261, -0.3266]) tensor([0.4352, 0.1793, 0.2120, 0.1735]) -Greedy action tensor([ 0.8270, -0.6016, -0.1694, -0.7283]) tensor([0.5495, 0.1317, 0.2028, 0.1160]) -Greedy action tensor([ 0.5810, -0.4130, 0.0421, -0.3817]) tensor([0.4282, 0.1585, 0.2498, 0.1635]) -Greedy action tensor([ 0.4096, -0.1059, -0.1218, -0.1026]) tensor([0.3592, 0.2145, 0.2111, 0.2152]) -Greedy action tensor([ 0.7258, -0.2135, 0.0598, -0.0874]) tensor([0.4259, 0.1665, 0.2188, 0.1888]) -Greedy action tensor([ 0.5343, -0.1893, -0.0789, -0.2574]) tensor([0.4033, 0.1956, 0.2184, 0.1827]) -Greedy action tensor([ 0.4404, -0.1050, -0.3482, -0.2761]) tensor([0.3964, 0.2298, 0.1802, 0.1936]) -Greedy action tensor([ 0.2545, 0.3084, 0.1674, -0.3595]) tensor([0.2846, 0.3004, 0.2609, 0.1540]) -Greedy action tensor([ 0.3667, 0.0006, -0.2401, -0.3987]) tensor([0.3699, 0.2565, 0.2016, 0.1720]) -Greedy action tensor([ 0.6236, -0.0726, 0.0038, -0.2494]) tensor([0.4075, 0.2031, 0.2192, 0.1702]) -Greedy action tensor([ 0.7437, -0.3285, -0.0499, -0.2537]) tensor([0.4623, 0.1582, 0.2090, 0.1705]) -Greedy action tensor([ 0.5859, -0.1125, -0.2399, -0.3595]) tensor([0.4303, 0.2140, 0.1884, 0.1672]) -Greedy action tensor([ 0.5191, 0.0990, -0.0899, -0.0956]) tensor([0.3647, 0.2396, 0.1984, 0.1973]) -Greedy action tensor([ 0.0864, 0.2030, -0.1290, -0.4637]) tensor([0.2852, 0.3204, 0.2299, 0.1645]) -Greedy action tensor([ 0.4599, -0.3969, 0.1160, -0.1537]) tensor([0.3739, 0.1587, 0.2650, 0.2024]) -Greedy action tensor([ 0.6700, -0.2339, 0.0170, -0.3580]) tensor([0.4380, 0.1774, 0.2280, 0.1567]) -Greedy action tensor([ 0.5240, -0.2595, -0.0859, -0.4027]) tensor([0.4174, 0.1906, 0.2268, 0.1652]) -Greedy action tensor([ 0.3469, -0.2424, -0.2702, -0.3210]) tensor([0.3836, 0.2128, 0.2069, 0.1967]) -Greedy action tensor([ 0.7058, 0.0429, -0.0824, -0.3685]) tensor([0.4326, 0.2229, 0.1967, 0.1478]) -Greedy action tensor([ 0.5942, -0.2961, -0.1532, -0.1939]) tensor([0.4276, 0.1755, 0.2025, 0.1944]) -Greedy action tensor([ 0.5974, -0.1864, 0.0459, -0.4947]) tensor([0.4223, 0.1928, 0.2433, 0.1417]) -Greedy action tensor([ 0.3769, -0.0068, 0.0916, -0.2879]) tensor([0.3393, 0.2312, 0.2550, 0.1745]) -Greedy action tensor([ 0.3360, -0.0560, -0.0107, -0.1323]) tensor([0.3324, 0.2246, 0.2350, 0.2081]) -Greedy action tensor([ 0.6926, -0.3085, -0.0385, -0.4814]) tensor([0.4634, 0.1703, 0.2231, 0.1432]) -Greedy action tensor([ 0.8377, -0.6078, -0.0207, -0.4767]) tensor([0.5187, 0.1222, 0.2198, 0.1393]) -Greedy action tensor([ 0.7189, -0.2065, -0.1439, -0.2484]) tensor([0.4549, 0.1803, 0.1919, 0.1729]) -Greedy action tensor([ 0.5275, -0.4961, 0.2427, -0.6091]) tensor([0.4111, 0.1477, 0.3092, 0.1319]) -Greedy action tensor([-0.4986, -0.4109, 0.1870, 0.0477]) tensor([0.1723, 0.1881, 0.3420, 0.2976]) -Greedy action tensor([-1.9404, -0.4510, 0.6625, -0.1786]) tensor([0.0404, 0.1791, 0.5454, 0.2352]) -Greedy action tensor([-1.8994, -0.4316, 0.6437, -0.1478]) tensor([0.0420, 0.1822, 0.5339, 0.2420]) -Greedy action tensor([-1.6771, -0.5644, 0.6898, 0.2166]) tensor([0.0468, 0.1425, 0.4995, 0.3112]) -Greedy action tensor([-1.7196, 0.0903, 0.4668, -0.2923]) tensor([0.0496, 0.3027, 0.4412, 0.2065]) -Greedy action tensor([-1.5984, 0.2132, 0.4381, 0.2543]) tensor([0.0473, 0.2892, 0.3622, 0.3013]) -Greedy action tensor([-1.7721, -0.4494, 0.5753, -0.0829]) tensor([0.0485, 0.1820, 0.5070, 0.2625]) -Greedy action tensor([-1.6727, -0.4942, 0.5376, -0.0743]) tensor([0.0546, 0.1774, 0.4979, 0.2700]) -Greedy action tensor([-1.8556, -0.4300, 0.6191, -0.1386]) tensor([0.0442, 0.1840, 0.5254, 0.2463]) -Greedy action tensor([-1.5157, -0.4561, 0.6773, 0.4937]) tensor([0.0492, 0.1421, 0.4413, 0.3673]) -Greedy action tensor([-0.5574, -0.5831, 0.1858, 0.0983]) tensor([0.1666, 0.1623, 0.3502, 0.3209]) -Greedy action tensor([-1.3941, -0.5486, 1.0918, 1.0592]) tensor([0.0371, 0.0864, 0.4454, 0.4311]) -Greedy action tensor([-1.4236, -0.5553, 0.3764, 0.1836]) tensor([0.0693, 0.1652, 0.4195, 0.3459]) -Greedy action tensor([-0.9082, -0.6171, 0.3219, -0.1000]) tensor([0.1249, 0.1672, 0.4275, 0.2804]) -Greedy action tensor([-1.8582, -0.4871, 0.6745, -0.0864]) tensor([0.0427, 0.1683, 0.5377, 0.2513]) -Greedy action tensor([-1.8444, -0.4701, 0.6105, -0.1145]) tensor([0.0450, 0.1777, 0.5237, 0.2536]) -Greedy action tensor([-1.5059, -0.3994, 0.7563, -0.6245]) tensor([0.0623, 0.1885, 0.5987, 0.1505]) -Greedy action tensor([-1.8830, -0.4342, 0.6256, -0.1354]) tensor([0.0429, 0.1829, 0.5277, 0.2465]) -Greedy action tensor([-1.9201, -0.4512, 0.6733, -0.1582]) tensor([0.0407, 0.1770, 0.5450, 0.2373]) -Greedy action tensor([-1.5683, -0.5429, 0.5322, -0.1366]) tensor([0.0619, 0.1727, 0.5061, 0.2593]) -Greedy action tensor([-1.7487, -0.4213, 0.6162, 0.0286]) tensor([0.0469, 0.1768, 0.4990, 0.2773]) -Greedy action tensor([-1.3894, -0.5615, 0.3920, 0.2219]) tensor([0.0702, 0.1608, 0.4171, 0.3519]) -Greedy action tensor([-1.4741, -0.3398, 0.8032, -0.7090]) tensor([0.0625, 0.1942, 0.6091, 0.1343]) -Greedy action tensor([-1.8421, -0.4755, 0.6160, -0.0921]) tensor([0.0447, 0.1754, 0.5225, 0.2574]) -Greedy action tensor([-1.3039, -0.3853, 0.6974, 1.0093]) tensor([0.0476, 0.1193, 0.3521, 0.4810]) -Greedy action tensor([-1.4725, -0.5507, 0.4438, 0.2506]) tensor([0.0628, 0.1580, 0.4271, 0.3521]) -Greedy action tensor([-1.2427, -0.6018, 0.3338, 0.1529]) tensor([0.0849, 0.1612, 0.4109, 0.3429]) -Greedy action tensor([-1.7406, -0.4676, 0.8743, 0.4610]) tensor([0.0367, 0.1309, 0.5010, 0.3314]) -Greedy action tensor([-1.2064, -0.6001, 0.2933, 0.2495]) tensor([0.0862, 0.1580, 0.3862, 0.3696]) -Greedy action tensor([-1.9166, -0.4590, 0.6597, -0.1625]) tensor([0.0413, 0.1773, 0.5428, 0.2385]) -Greedy action tensor([-1.4098, 0.5611, 0.2993, 0.3446]) tensor([0.0513, 0.3684, 0.2836, 0.2967]) -Greedy action tensor([-0.0747, -0.0819, 1.0916, 1.6690]) tensor([0.0916, 0.0909, 0.2939, 0.5236]) -Greedy action tensor([-1.6207, -0.4976, 0.4914, 0.0558]) tensor([0.0565, 0.1738, 0.4673, 0.3023]) -Greedy action tensor([-0.5622, 0.0255, 0.9850, 1.5319]) tensor([0.0640, 0.1153, 0.3008, 0.5199]) -Greedy action tensor([-1.3502, -0.5430, 0.6916, 0.8013]) tensor([0.0512, 0.1147, 0.3942, 0.4399]) -Greedy action tensor([0.0107, 0.1190, 0.8963, 1.7803]) tensor([0.0961, 0.1071, 0.2330, 0.5639]) -Greedy action tensor([-1.8177, -0.4257, 0.6009, -0.0985]) tensor([0.0458, 0.1843, 0.5144, 0.2556]) -Greedy action tensor([-1.8317, -0.4759, 0.6192, -0.0720]) tensor([0.0449, 0.1741, 0.5204, 0.2607]) -Greedy action tensor([-0.4159, 0.0892, 1.0013, 1.5569]) tensor([0.0716, 0.1186, 0.2952, 0.5146]) -Greedy action tensor([-1.2132, -0.4962, 0.3703, 0.5339]) tensor([0.0732, 0.1500, 0.3567, 0.4201]) -Greedy action tensor([-0.6372, -0.1631, 0.5012, 1.0673]) tensor([0.0891, 0.1431, 0.2781, 0.4898]) -Greedy action tensor([-1.3420, -0.4188, 0.6815, -0.5566]) tensor([0.0753, 0.1896, 0.5698, 0.1652]) -Greedy action tensor([-1.8386, -0.4766, 0.7064, -0.0314]) tensor([0.0421, 0.1644, 0.5368, 0.2567]) -Greedy action tensor([-1.1513, -0.5589, 0.4388, 0.6635]) tensor([0.0722, 0.1305, 0.3540, 0.4432]) -Greedy action tensor([-1.5460, -0.6541, 0.6274, 0.3251]) tensor([0.0534, 0.1303, 0.4694, 0.3469]) -Greedy action tensor([-1.8951, -0.6694, 0.4808, -0.1702]) tensor([0.0481, 0.1639, 0.5179, 0.2701]) -Greedy action tensor([-0.9152, -0.0292, 0.3051, 1.1458]) tensor([0.0682, 0.1654, 0.2310, 0.5355]) -Greedy action tensor([-1.9613, -0.5861, 1.3422, 0.6801]) tensor([0.0216, 0.0856, 0.5890, 0.3038]) -Greedy action tensor([-1.0946, -0.6089, 0.8896, 1.3117]) tensor([0.0476, 0.0774, 0.3465, 0.5284]) -Greedy action tensor([-1.6896, -0.4750, 0.5184, -0.0117]) tensor([0.0531, 0.1790, 0.4834, 0.2845]) -Greedy action tensor([-0.7415, 0.1754, 0.5896, 1.1000]) tensor([0.0736, 0.1840, 0.2785, 0.4639]) -Greedy action tensor([-1.8713, -0.2002, 0.5746, -0.1268]) tensor([0.0424, 0.2255, 0.4894, 0.2427]) -Greedy action tensor([-1.8883, -0.4714, 0.6912, -0.0725]) tensor([0.0409, 0.1686, 0.5393, 0.2513]) -Greedy action tensor([-1.7576, -0.4603, 0.5757, -0.0644]) tensor([0.0490, 0.1793, 0.5053, 0.2664]) -Greedy action tensor([-1.0179, 0.2336, 0.3700, -0.3806]) tensor([0.0962, 0.3363, 0.3855, 0.1820]) -Greedy action tensor([-0.7647, -0.5832, 0.2092, 0.3150]) tensor([0.1284, 0.1539, 0.3399, 0.3778]) -Greedy action tensor([-1.3001, -0.6201, 0.8945, 0.8799]) tensor([0.0481, 0.0949, 0.4316, 0.4254]) -Greedy action tensor([-0.9877, -0.5746, 0.7363, 1.1960]) tensor([0.0588, 0.0889, 0.3299, 0.5224]) -Greedy action tensor([-1.4817, -0.4889, 0.8532, 0.7110]) tensor([0.0435, 0.1174, 0.4493, 0.3898]) -Greedy action tensor([-1.8718, -0.3638, 0.6252, -0.1136]) tensor([0.0426, 0.1925, 0.5176, 0.2472]) -Greedy action tensor([-1.3571, -0.4329, -0.1061, -0.4445]) tensor([0.1052, 0.2651, 0.3676, 0.2621]) -Greedy action tensor([-1.3735, -0.6851, 0.4198, 0.1713]) tensor([0.0731, 0.1454, 0.4391, 0.3424]) -Greedy action tensor([-1.1240, -0.9913, -0.7413, -0.5762]) tensor([0.1873, 0.2139, 0.2747, 0.3240]) -Greedy action tensor([-1.2031, -0.6472, 1.0005, 1.2657]) tensor([0.0424, 0.0739, 0.3837, 0.5001]) -Greedy action tensor([-1.7915, -0.4836, 0.6650, 0.0339]) tensor([0.0443, 0.1639, 0.5168, 0.2750]) -Greedy action tensor([-0.0994, 0.1876, 0.8330, 1.6485]) tensor([0.0942, 0.1255, 0.2393, 0.5409]) -Greedy action tensor([-0.7298, 0.2752, -0.2316, -0.0596]) tensor([0.1364, 0.3726, 0.2244, 0.2666]) -Greedy action tensor([-1.7373, -1.0628, 0.0070, -0.7082]) tensor([0.0871, 0.1709, 0.4983, 0.2437]) -Greedy action tensor([-1.6687, -0.4513, 0.5666, 0.0811]) tensor([0.0513, 0.1734, 0.4799, 0.2953]) -Greedy action tensor([-1.8247, -0.4798, 0.6239, -0.0866]) tensor([0.0453, 0.1737, 0.5237, 0.2573]) -Greedy action tensor([-1.7602, -0.4935, 0.7088, 0.1298]) tensor([0.0435, 0.1544, 0.5140, 0.2881]) -Greedy action tensor([-1.7765, -0.5003, 0.7264, 0.1965]) tensor([0.0417, 0.1493, 0.5092, 0.2998]) -Greedy action tensor([-1.5910, -0.5781, 0.6100, -0.0051]) tensor([0.0566, 0.1558, 0.5112, 0.2764]) -Greedy action tensor([-1.9582, -0.5507, 0.6722, -0.1648]) tensor([0.0400, 0.1636, 0.5557, 0.2406]) -Greedy action tensor([-0.6342, -0.5776, 0.1933, 0.2475]) tensor([0.1479, 0.1565, 0.3384, 0.3572]) -Greedy action tensor([-1.2999, -0.5029, 1.2552, 1.1912]) tensor([0.0355, 0.0788, 0.4570, 0.4287]) -Greedy action tensor([-0.8866, 0.4668, 0.0715, -0.4397]) tensor([0.1106, 0.4281, 0.2883, 0.1729]) -Greedy action tensor([-1.5656, -0.5502, 0.4639, 0.0502]) tensor([0.0610, 0.1683, 0.4640, 0.3068]) -Greedy action tensor([-1.2182, -0.5312, 0.2842, 0.2237]) tensor([0.0854, 0.1698, 0.3837, 0.3612]) -Greedy action tensor([-1.9817, -0.5363, 1.2770, 0.5734]) tensor([0.0227, 0.0962, 0.5895, 0.2917]) -Greedy action tensor([-1.7842, -0.2298, 0.5406, -0.0977]) tensor([0.0468, 0.2216, 0.4788, 0.2529]) -Greedy action tensor([ 1.0530, 0.3204, 0.2027, -0.4431]) tensor([0.4691, 0.2255, 0.2004, 0.1051]) -Greedy action tensor([ 1.3939, -0.2939, -0.3040, 0.1155]) tensor([0.6074, 0.1123, 0.1112, 0.1691]) -Greedy action tensor([ 1.0932, 0.0687, -0.1247, 0.1104]) tensor([0.4928, 0.1769, 0.1458, 0.1844]) -Greedy action tensor([ 1.0525, -0.6119, -0.6730, 0.8241]) tensor([0.4623, 0.0875, 0.0823, 0.3679]) -Greedy action tensor([ 1.5388, -0.0136, -0.3146, 0.3365]) tensor([0.5992, 0.1269, 0.0939, 0.1801]) -Greedy action tensor([ 1.1121, -0.2181, -0.0921, -0.0523]) tensor([0.5329, 0.1409, 0.1598, 0.1663]) -Greedy action tensor([ 1.0308, -0.0128, -0.2044, -0.2716]) tensor([0.5222, 0.1839, 0.1519, 0.1420]) -Greedy action tensor([ 1.6582, -0.2771, -0.3783, 0.3730]) tensor([0.6446, 0.0931, 0.0841, 0.1783]) -Greedy action tensor([ 0.8329, -0.3936, -0.1519, 0.2548]) tensor([0.4489, 0.1317, 0.1677, 0.2518]) -Greedy action tensor([ 1.0947, -0.5798, -0.1620, 0.0978]) tensor([0.5432, 0.1018, 0.1546, 0.2004]) -Greedy action tensor([ 1.0180, -0.4372, -0.2036, 0.0563]) tensor([0.5235, 0.1222, 0.1543, 0.2001]) -Greedy action tensor([ 1.5005, -0.6702, -0.2692, -0.0790]) tensor([0.6709, 0.0765, 0.1143, 0.1383]) -Greedy action tensor([ 1.5544, -0.2747, -0.3876, 0.0855]) tensor([0.6518, 0.1047, 0.0935, 0.1500]) -Greedy action tensor([ 1.6275, -0.5117, -0.4774, 0.4302]) tensor([0.6487, 0.0764, 0.0790, 0.1959]) -Greedy action tensor([ 1.1772, -0.6758, 0.0235, 0.3392]) tensor([0.5250, 0.0823, 0.1656, 0.2271]) -Greedy action tensor([ 1.3363, 0.0132, -0.2637, 0.1200]) tensor([0.5667, 0.1509, 0.1144, 0.1679]) -Greedy action tensor([ 1.1477, -0.7045, -0.3594, 0.9351]) tensor([0.4573, 0.0717, 0.1013, 0.3697]) -Greedy action tensor([ 0.9315, -0.2375, -0.4869, -0.2760]) tensor([0.5400, 0.1678, 0.1307, 0.1614]) -Greedy action tensor([ 1.6718, -1.1478, -0.4682, 0.4921]) tensor([0.6735, 0.0402, 0.0793, 0.2070]) -Greedy action tensor([ 1.5658, -0.6187, -0.2464, 0.4568]) tensor([0.6228, 0.0701, 0.1017, 0.2055]) -Greedy action tensor([ 1.2052, -0.7764, -0.3566, 0.0954]) tensor([0.5962, 0.0822, 0.1251, 0.1965]) -Greedy action tensor([ 1.2468, -0.3165, -0.2444, 0.1210]) tensor([0.5685, 0.1191, 0.1280, 0.1844]) -Greedy action tensor([ 0.6321, -0.2612, -0.6094, 0.7418]) tensor([0.3553, 0.1454, 0.1027, 0.3965]) -Greedy action tensor([ 1.5983, -0.7749, -0.1895, 0.3423]) tensor([0.6471, 0.0603, 0.1083, 0.1843]) -Greedy action tensor([ 1.1886, -0.3607, -0.2511, -0.1272]) tensor([0.5822, 0.1237, 0.1380, 0.1562]) -Greedy action tensor([ 1.0362, -0.5174, -0.3246, 0.4038]) tensor([0.5002, 0.1058, 0.1283, 0.2658]) -Greedy action tensor([ 1.7081, -0.4264, -0.6752, 0.4681]) tensor([0.6667, 0.0789, 0.0615, 0.1929]) -Greedy action tensor([ 1.9108, -0.5602, -0.8935, 0.1862]) tensor([0.7557, 0.0639, 0.0458, 0.1347]) -Greedy action tensor([ 0.7770, -0.3715, 0.2004, -0.0305]) tensor([0.4301, 0.1364, 0.2416, 0.1918]) -Greedy action tensor([ 1.4057, -0.5972, -0.2136, 0.2070]) tensor([0.6118, 0.0826, 0.1212, 0.1845]) -Greedy action tensor([ 0.5483, -0.4249, -0.0857, -0.0990]) tensor([0.4112, 0.1554, 0.2181, 0.2153]) -Greedy action tensor([ 1.0192, -0.1942, 0.0380, 0.0635]) tensor([0.4862, 0.1445, 0.1823, 0.1870]) -Greedy action tensor([ 1.3554, -0.3920, -0.0269, 0.0533]) tensor([0.5892, 0.1027, 0.1479, 0.1602]) -Greedy action tensor([ 0.2816, -0.3458, 0.1720, -0.1001]) tensor([0.3213, 0.1715, 0.2879, 0.2193]) -Greedy action tensor([ 0.5360, -0.2495, 0.2187, -0.0891]) tensor([0.3677, 0.1677, 0.2678, 0.1968]) -Greedy action tensor([ 0.8014, -0.3225, 0.3154, 0.0722]) tensor([0.4128, 0.1342, 0.2539, 0.1991]) -Greedy action tensor([ 1.2224, -0.5177, -0.3131, 0.2683]) tensor([0.5631, 0.0988, 0.1213, 0.2169]) -Greedy action tensor([ 0.9480, -0.4419, -0.3182, 0.0433]) tensor([0.5166, 0.1287, 0.1456, 0.2091]) -Greedy action tensor([ 0.8203, -0.2158, -0.4388, -0.0267]) tensor([0.4837, 0.1716, 0.1373, 0.2074]) -Greedy action tensor([ 1.2615, -0.1888, -0.3767, 0.2641]) tensor([0.5563, 0.1305, 0.1081, 0.2052]) -Greedy action tensor([ 1.9027e+00, -1.5556e-03, -4.0985e-01, 2.2441e-01]) tensor([0.6970, 0.1038, 0.0690, 0.1301]) -Greedy action tensor([ 1.0791, -0.0182, 0.2420, 0.0159]) tensor([0.4735, 0.1580, 0.2050, 0.1635]) -Greedy action tensor([ 1.6621, -1.0118, -0.5536, 0.3544]) tensor([0.6904, 0.0476, 0.0753, 0.1867]) -Greedy action tensor([ 1.1381, -0.7710, -0.0368, 0.0506]) tensor([0.5574, 0.0826, 0.1722, 0.1879]) -Greedy action tensor([ 1.3242, -0.0157, 0.0092, 0.1615]) tensor([0.5426, 0.1421, 0.1457, 0.1696]) -Greedy action tensor([ 1.2948, -0.1478, -0.2543, 0.2923]) tensor([0.5508, 0.1301, 0.1170, 0.2021]) -Greedy action tensor([ 1.2406, -0.3281, -0.2497, 0.1368]) tensor([0.5665, 0.1180, 0.1276, 0.1878]) -Greedy action tensor([ 1.3370, -0.4814, -0.1913, 0.2176]) tensor([0.5863, 0.0951, 0.1272, 0.1914]) -Greedy action tensor([ 1.7961, -0.6068, -0.3630, 0.3119]) tensor([0.6980, 0.0631, 0.0806, 0.1582]) -Greedy action tensor([ 1.8955, -0.4025, -0.2315, 0.1156]) tensor([0.7203, 0.0724, 0.0859, 0.1215]) -Greedy action tensor([ 0.9741, -0.5812, -0.2641, 0.3597]) tensor([0.4897, 0.1034, 0.1420, 0.2649]) -Greedy action tensor([ 0.8639, -0.4228, -0.2877, 0.4526]) tensor([0.4434, 0.1225, 0.1402, 0.2939]) -Greedy action tensor([ 0.4545, -0.2802, -0.7478, 0.5438]) tensor([0.3480, 0.1669, 0.1046, 0.3805]) -Greedy action tensor([ 0.4294, -0.1199, 0.1807, 0.0163]) tensor([0.3313, 0.1913, 0.2583, 0.2192]) -Greedy action tensor([ 1.2132, -0.4945, -0.3482, 0.5083]) tensor([0.5304, 0.0962, 0.1113, 0.2621]) -Greedy action tensor([ 1.2370, -0.3680, -0.0027, 0.0526]) tensor([0.5567, 0.1118, 0.1611, 0.1703]) -Greedy action tensor([ 0.6697, -0.0765, -0.1485, 0.2989]) tensor([0.3838, 0.1820, 0.1693, 0.2649]) -Greedy action tensor([ 1.3744, -0.3254, -0.5253, 0.3045]) tensor([0.5969, 0.1091, 0.0893, 0.2048]) -Greedy action tensor([ 0.7601, 0.0541, -0.1526, -0.2110]) tensor([0.4398, 0.2171, 0.1766, 0.1665]) -Greedy action tensor([ 1.2811, -0.9006, -0.1546, 0.4416]) tensor([0.5609, 0.0633, 0.1335, 0.2423]) -Greedy action tensor([ 2.2004, -0.5638, -0.1060, 0.3915]) tensor([0.7539, 0.0475, 0.0751, 0.1235]) -Greedy action tensor([ 0.8067, -0.2157, -0.2175, -0.0316]) tensor([0.4648, 0.1672, 0.1669, 0.2010]) -Greedy action tensor([ 1.1799, -0.5601, -0.3919, 0.3977]) tensor([0.5433, 0.0954, 0.1128, 0.2485]) -Greedy action tensor([ 0.5973, -0.2968, -0.2516, -0.2748]) tensor([0.4435, 0.1814, 0.1898, 0.1854]) -Greedy action tensor([ 0.8265, -0.2839, -0.1157, 0.2113]) tensor([0.4425, 0.1458, 0.1725, 0.2392]) -Greedy action tensor([ 0.9333, -0.4135, -0.5359, 0.7082]) tensor([0.4369, 0.1136, 0.1005, 0.3489]) -Greedy action tensor([ 0.6787, -0.6263, -0.2748, 0.1916]) tensor([0.4403, 0.1194, 0.1697, 0.2705]) -Greedy action tensor([ 1.1261, -0.3716, -0.1972, 0.2551]) tensor([0.5240, 0.1172, 0.1395, 0.2193]) -Greedy action tensor([ 1.8078, -0.5104, -0.4281, 0.5333]) tensor([0.6735, 0.0663, 0.0720, 0.1883]) -Greedy action tensor([ 0.5107, -0.3895, -0.2189, 0.0352]) tensor([0.3984, 0.1619, 0.1921, 0.2476]) -Greedy action tensor([ 0.7793, -0.2259, -0.2825, 0.2295]) tensor([0.4369, 0.1599, 0.1511, 0.2521]) -Greedy action tensor([ 1.0539, -0.5531, -0.5204, 0.8062]) tensor([0.4570, 0.0916, 0.0947, 0.3567]) -Greedy action tensor([ 0.8092, -0.2285, -0.3342, 0.3061]) tensor([0.4390, 0.1555, 0.1399, 0.2655]) -Greedy action tensor([ 1.0772, -0.0242, -0.2674, 0.3741]) tensor([0.4789, 0.1592, 0.1248, 0.2371]) -Greedy action tensor([ 1.4979, -0.6679, 0.1290, 0.4028]) tensor([0.5870, 0.0673, 0.1493, 0.1964]) -Greedy action tensor([ 1.4532, -0.6875, -0.4492, 0.3405]) tensor([0.6268, 0.0737, 0.0935, 0.2060]) -Greedy action tensor([ 1.5723, -0.8631, -0.5381, 0.8852]) tensor([0.5842, 0.0512, 0.0708, 0.2939]) -Greedy action tensor([ 1.5708, -0.9016, -0.3904, 0.5201]) tensor([0.6350, 0.0536, 0.0893, 0.2221]) -Greedy action tensor([ 1.6270, -0.4171, -0.0810, 0.3529]) tensor([0.6288, 0.0814, 0.1140, 0.1759]) -Greedy action tensor([ 2.0501, -0.6389, -0.1899, 0.2081]) tensor([0.7502, 0.0510, 0.0799, 0.1189]) -Greedy action tensor([ 2.0728, -0.9037, -0.2336, 0.1353]) tensor([0.7724, 0.0394, 0.0769, 0.1113]) -Greedy action tensor([ 0.3275, 0.3355, 0.0563, -1.0268]) tensor([0.3302, 0.3328, 0.2518, 0.0852]) -Greedy action tensor([ 0.5012, -1.1983, -0.0739, 1.3323]) tensor([0.2474, 0.0452, 0.1392, 0.5681]) -Greedy action tensor([ 0.4209, -1.0022, 0.9495, 0.3984]) tensor([0.2554, 0.0615, 0.4333, 0.2497]) -Greedy action tensor([ 0.8800, -0.0016, -0.3343, 1.5269]) tensor([0.2762, 0.1144, 0.0820, 0.5274]) -Greedy action tensor([ 1.3666, -0.8142, -0.2491, 1.6222]) tensor([0.3842, 0.0434, 0.0764, 0.4961]) -Greedy action tensor([1.1793, 0.6386, 0.9220, 1.0347]) tensor([0.3105, 0.1808, 0.2400, 0.2687]) -Greedy action tensor([ 1.2887, -1.4229, -1.1285, 0.9781]) tensor([0.5295, 0.0352, 0.0472, 0.3881]) -Greedy action tensor([ 0.6986, -0.2276, 0.3564, 1.4555]) tensor([0.2360, 0.0935, 0.1676, 0.5030]) -Greedy action tensor([1.3244, 1.0136, 1.1227, 1.5355]) tensor([0.2642, 0.1936, 0.2159, 0.3263]) -Greedy action tensor([ 0.3270, 0.6609, -1.0120, 0.9999]) tensor([0.2165, 0.3024, 0.0568, 0.4244]) -Greedy action tensor([-0.3821, -1.4902, 0.5774, 2.4269]) tensor([0.0487, 0.0161, 0.1271, 0.8081]) -Greedy action tensor([ 1.0989, 0.3279, -0.1509, 0.0748]) tensor([0.4743, 0.2194, 0.1359, 0.1703]) -Greedy action tensor([-0.0182, -0.8682, 0.2450, 0.3296]) tensor([0.2413, 0.1031, 0.3139, 0.3417]) -Greedy action tensor([1.2442, 0.2825, 0.5107, 0.9846]) tensor([0.3797, 0.1451, 0.1823, 0.2929]) -Greedy action tensor([ 0.9640, 0.3139, -0.0219, 0.1462]) tensor([0.4280, 0.2234, 0.1597, 0.1889]) -Greedy action tensor([ 0.6038, -0.1389, 0.0460, 0.3165]) tensor([0.3573, 0.1700, 0.2046, 0.2681]) -Greedy action tensor([ 0.8338, -0.4865, 0.5495, 0.3462]) tensor([0.3797, 0.1014, 0.2857, 0.2332]) -Greedy action tensor([ 0.9128, 0.7935, -0.1210, 0.1301]) tensor([0.3703, 0.3287, 0.1317, 0.1693]) -Greedy action tensor([0.4677, 0.3449, 0.4738, 0.2343]) tensor([0.2716, 0.2402, 0.2732, 0.2150]) -Greedy action tensor([-1.7616, 0.5681, 0.1115, 0.3972]) tensor([0.0378, 0.3886, 0.2461, 0.3275]) -Greedy action tensor([ 1.3533, -1.2216, -0.2009, 1.5329]) tensor([0.4025, 0.0307, 0.0851, 0.4817]) -Greedy action tensor([ 1.3980, -0.8490, 1.4607, 0.6349]) tensor([0.3793, 0.0401, 0.4038, 0.1768]) -Greedy action tensor([ 1.7212, -0.4337, 1.5550, 1.2010]) tensor([0.3911, 0.0453, 0.3312, 0.2324]) -Greedy action tensor([ 1.9005, 0.4667, -0.2964, 0.8625]) tensor([0.5870, 0.1399, 0.0652, 0.2079]) -Greedy action tensor([ 1.3347, -0.4298, 1.8178, 0.0845]) tensor([0.3248, 0.0556, 0.5266, 0.0930]) -Greedy action tensor([ 1.6799, 0.0335, -0.6295, 0.8311]) tensor([0.5814, 0.1121, 0.0577, 0.2488]) -Greedy action tensor([ 0.9446, -0.7511, 0.2396, -0.3877]) tensor([0.5151, 0.0945, 0.2545, 0.1359]) -Greedy action tensor([ 1.3651, -0.8415, 1.1316, 1.3724]) tensor([0.3437, 0.0378, 0.2722, 0.3463]) -Greedy action tensor([ 0.8876, -0.6117, 0.0048, 0.9608]) tensor([0.3686, 0.0823, 0.1525, 0.3966]) -Greedy action tensor([ 0.5947, 0.4725, 0.6137, -0.5115]) tensor([0.3091, 0.2736, 0.3151, 0.1023]) -Greedy action tensor([ 0.9836, -0.4364, -0.9953, 1.8547]) tensor([0.2653, 0.0641, 0.0367, 0.6339]) -Greedy action tensor([ 0.8362, 0.8635, -0.1396, 0.1790]) tensor([0.3421, 0.3516, 0.1289, 0.1773]) -Greedy action tensor([ 0.6462, -0.2185, 0.2317, -0.5444]) tensor([0.4191, 0.1765, 0.2769, 0.1274]) -Greedy action tensor([ 1.2092, -0.2181, 0.8682, 0.6312]) tensor([0.3981, 0.0955, 0.2831, 0.2233]) -Greedy action tensor([ 0.1360, -0.7378, -0.1468, -0.7912]) tensor([0.3896, 0.1626, 0.2936, 0.1542]) -Greedy action tensor([ 1.1681, 0.4730, -0.9394, 0.1393]) tensor([0.5056, 0.2523, 0.0614, 0.1807]) -Greedy action tensor([0.2448, 0.3895, 0.3315, 0.1703]) tensor([0.2396, 0.2768, 0.2612, 0.2224]) -Greedy action tensor([ 1.1169, -0.1043, 0.6043, 0.7810]) tensor([0.3834, 0.1130, 0.2296, 0.2740]) -Greedy action tensor([0.3364, 1.0617, 0.1947, 0.3725]) tensor([0.2012, 0.4156, 0.1746, 0.2086]) -Greedy action tensor([-0.3423, -1.6736, -0.5479, 0.9948]) tensor([0.1699, 0.0449, 0.1383, 0.6469]) -Greedy action tensor([ 0.4291, 0.0935, -0.1251, 0.7077]) tensor([0.2770, 0.1980, 0.1591, 0.3659]) -Greedy action tensor([-1.0538, -0.4494, -0.2385, -0.2312]) tensor([0.1357, 0.2484, 0.3068, 0.3090]) -Greedy action tensor([ 0.6194, -0.0729, -0.3354, 1.3314]) tensor([0.2549, 0.1275, 0.0981, 0.5195]) -Greedy action tensor([-0.4027, 0.2735, -0.8136, 0.6282]) tensor([0.1554, 0.3057, 0.1031, 0.4358]) -Greedy action tensor([ 0.0558, -1.4371, 1.6057, -0.2705]) tensor([0.1502, 0.0338, 0.7077, 0.1084]) -Greedy action tensor([-0.0677, 0.7336, 0.9789, 0.0122]) tensor([0.1397, 0.3113, 0.3978, 0.1513]) -Greedy action tensor([-0.3026, 0.5729, 0.0286, -1.2127]) tensor([0.1925, 0.4620, 0.2681, 0.0775]) -Greedy action tensor([ 0.9303, 0.8551, -0.4584, 1.0621]) tensor([0.3014, 0.2796, 0.0752, 0.3439]) -Greedy action tensor([ 0.5453, -0.6938, -0.0261, 0.7963]) tensor([0.3185, 0.0922, 0.1799, 0.4094]) -Greedy action tensor([ 0.3665, 0.5719, -0.2840, -0.6028]) tensor([0.3196, 0.3924, 0.1668, 0.1212]) -Greedy action tensor([ 0.8649, -0.3387, 0.2648, 1.2481]) tensor([0.3016, 0.0905, 0.1655, 0.4424]) -Greedy action tensor([ 1.1175, -1.2553, 1.5271, 0.6435]) tensor([0.3104, 0.0289, 0.4675, 0.1932]) -Greedy action tensor([ 0.3482, -0.6608, 0.8963, 1.3204]) tensor([0.1743, 0.0635, 0.3015, 0.4607]) -Greedy action tensor([ 1.0198, -1.4181, 1.7021, 0.6123]) tensor([0.2680, 0.0234, 0.5303, 0.1783]) -Greedy action tensor([ 2.1127, -1.0940, -0.5228, 0.9470]) tensor([0.7023, 0.0284, 0.0503, 0.2189]) -Greedy action tensor([ 0.4436, -0.8069, 0.4042, 1.9017]) tensor([0.1528, 0.0437, 0.1469, 0.6566]) -Greedy action tensor([ 0.0707, 0.0558, -0.8592, 1.2746]) tensor([0.1750, 0.1725, 0.0691, 0.5834]) -Greedy action tensor([ 0.3674, -0.2040, -1.3710, 0.7235]) tensor([0.3156, 0.1783, 0.0555, 0.4506]) -Greedy action tensor([ 0.9083, -0.6649, 0.2539, 0.8009]) tensor([0.3809, 0.0790, 0.1980, 0.3421]) -Greedy action tensor([ 0.2174, -0.8554, 1.0161, -0.3832]) tensor([0.2431, 0.0832, 0.5404, 0.1333]) -Greedy action tensor([-0.0426, 0.4730, 0.1267, -0.2480]) tensor([0.2140, 0.3583, 0.2534, 0.1742]) -Greedy action tensor([-1.2341, -0.5538, -0.4579, -0.5804]) tensor([0.1414, 0.2793, 0.3074, 0.2719]) -Greedy action tensor([ 0.7471, -1.3659, 0.1763, 1.2789]) tensor([0.2952, 0.0357, 0.1668, 0.5024]) -Greedy action tensor([ 0.1462, -1.2737, 0.2739, 1.7700]) tensor([0.1342, 0.0324, 0.1525, 0.6808]) -Greedy action tensor([ 1.2548, -1.0492, 0.8801, -0.1034]) tensor([0.4891, 0.0488, 0.3363, 0.1258]) -Greedy action tensor([1.0092, 0.4482, 0.0853, 1.3020]) tensor([0.3023, 0.1725, 0.1200, 0.4052]) -Greedy action tensor([-0.5801, -0.2720, 1.1324, -0.8247]) tensor([0.1151, 0.1567, 0.6381, 0.0901]) -Greedy action tensor([ 0.2927, 0.3884, -0.9353, 2.4581]) tensor([0.0900, 0.0990, 0.0264, 0.7846]) -Greedy action tensor([0.1878, 0.5497, 0.9409, 0.1150]) tensor([0.1822, 0.2616, 0.3869, 0.1694]) -Greedy action tensor([ 0.7628, -1.2909, 0.3202, 0.1122]) tensor([0.4362, 0.0560, 0.2802, 0.2276]) -Greedy action tensor([-0.9903, -0.6514, -0.1712, 0.7138]) tensor([0.0983, 0.1380, 0.2231, 0.5405]) -Greedy action tensor([ 1.2429, -0.5941, -0.1253, 0.7573]) tensor([0.4928, 0.0785, 0.1254, 0.3032]) -Greedy action tensor([ 0.9541, 1.1118, 0.6740, -0.3507]) tensor([0.3127, 0.3661, 0.2363, 0.0848]) -Greedy action tensor([ 1.4126, -0.4635, -0.9733, 1.1544]) tensor([0.4956, 0.0759, 0.0456, 0.3828]) -Greedy action tensor([ 0.6425, -0.4230, 0.9177, -0.6654]) tensor([0.3411, 0.1175, 0.4492, 0.0922]) -Greedy action tensor([ 1.0591, -0.3009, 1.1848, 1.8692]) tensor([0.2156, 0.0553, 0.2444, 0.4846]) -Greedy action tensor([ 0.3763, 0.1009, 0.0473, -1.0466]) tensor([0.3677, 0.2792, 0.2646, 0.0886]) -Greedy action tensor([ 0.3317, 0.4318, 0.9328, -1.4735]) tensor([0.2443, 0.2700, 0.4456, 0.0402]) -Greedy action tensor([0.8324, 0.0159, 0.9747, 1.2936]) tensor([0.2392, 0.1057, 0.2758, 0.3793]) -Greedy action tensor([ 0.3418, -0.1891, 0.4852, -1.3292]) tensor([0.3413, 0.2007, 0.3939, 0.0642]) -Greedy action tensor([ 0.1186, -0.7164, 0.7146, 0.9083]) tensor([0.1834, 0.0796, 0.3329, 0.4041]) -Greedy action tensor([-0.8019, 0.0478, -0.4208, 0.5706]) tensor([0.1143, 0.2674, 0.1673, 0.4510]) -Greedy action tensor([ 0.7690, -0.1631, 0.0931, -0.3337]) tensor([0.4476, 0.1762, 0.2277, 0.1486]) -Greedy action tensor([ 0.5800, -0.2521, 0.0057, -0.6287]) tensor([0.4354, 0.1895, 0.2452, 0.1300]) -Greedy action tensor([ 0.5011, 0.0353, -0.1075, -0.2966]) tensor([0.3814, 0.2394, 0.2075, 0.1718]) -Greedy action tensor([ 0.7218, -0.3162, -0.3663, -0.4497]) tensor([0.4998, 0.1770, 0.1684, 0.1549]) -Greedy action tensor([ 0.5882, -0.3580, -0.0604, -0.3735]) tensor([0.4361, 0.1693, 0.2280, 0.1667]) -Greedy action tensor([ 0.6375, -0.2272, -0.2242, -0.3821]) tensor([0.4536, 0.1911, 0.1916, 0.1637]) -Greedy action tensor([ 0.5175, -0.0559, -0.1036, -0.0605]) tensor([0.3756, 0.2117, 0.2019, 0.2108]) -Greedy action tensor([ 0.2809, -0.1057, -0.0685, -0.2559]) tensor([0.3368, 0.2288, 0.2375, 0.1969]) -Greedy action tensor([ 0.6521, -0.1190, 0.0848, -0.2887]) tensor([0.4132, 0.1911, 0.2343, 0.1613]) -Greedy action tensor([ 0.8844, -0.7098, -0.0385, -0.5135]) tensor([0.5413, 0.1099, 0.2151, 0.1338]) -Greedy action tensor([ 0.5239, -0.1929, -0.0222, -0.2039]) tensor([0.3921, 0.1915, 0.2271, 0.1894]) -Greedy action tensor([ 0.4839, -0.3842, 0.1089, -0.5307]) tensor([0.4049, 0.1700, 0.2783, 0.1468]) -Greedy action tensor([ 0.6516, -0.0521, 0.0985, -0.1564]) tensor([0.3975, 0.1967, 0.2286, 0.1772]) -Greedy action tensor([ 0.9355, -0.7338, -0.0845, -0.2716]) tensor([0.5411, 0.1019, 0.1951, 0.1618]) -Greedy action tensor([ 0.3405, -0.0650, 0.0084, -0.1442]) tensor([0.3333, 0.2222, 0.2391, 0.2053]) -Greedy action tensor([ 0.5970, -0.3730, -0.0240, -0.6747]) tensor([0.4552, 0.1726, 0.2446, 0.1276]) -Greedy action tensor([ 0.5497, -0.4004, 0.1825, -0.8676]) tensor([0.4307, 0.1666, 0.2983, 0.1044]) -Greedy action tensor([ 0.1589, 0.0332, -0.0790, -0.3223]) tensor([0.3041, 0.2682, 0.2397, 0.1880]) -Greedy action tensor([ 0.7649, -0.5011, -0.0581, -0.3004]) tensor([0.4841, 0.1365, 0.2126, 0.1668]) -Greedy action tensor([ 0.6581, -0.3593, 0.1899, -0.3650]) tensor([0.4260, 0.1540, 0.2668, 0.1532]) -Greedy action tensor([ 0.5381, -0.1354, -0.1187, -0.1768]) tensor([0.3972, 0.2025, 0.2060, 0.1943]) -Greedy action tensor([ 0.4816, -0.2597, 0.2480, -0.4545]) tensor([0.3759, 0.1791, 0.2976, 0.1474]) -Greedy action tensor([ 0.8297, -0.4964, 0.0802, -0.2096]) tensor([0.4780, 0.1269, 0.2259, 0.1691]) -Greedy action tensor([ 0.4157, -0.2349, -0.0810, -0.1626]) tensor([0.3716, 0.1939, 0.2261, 0.2084]) -Greedy action tensor([ 0.4295, -0.1776, -0.2289, -0.3606]) tensor([0.3974, 0.2166, 0.2057, 0.1803]) -Greedy action tensor([ 1.0032, -0.6055, -0.1667, -0.6662]) tensor([0.5886, 0.1178, 0.1827, 0.1109]) -Greedy action tensor([ 0.4584, -0.3875, 0.1624, -0.5170]) tensor([0.3922, 0.1683, 0.2917, 0.1479]) -Greedy action tensor([ 0.6534, -0.8859, 0.0805, -0.5747]) tensor([0.4828, 0.1036, 0.2722, 0.1414]) -Greedy action tensor([ 0.8973, -0.4614, 0.1218, -0.5249]) tensor([0.5106, 0.1312, 0.2351, 0.1231]) -Greedy action tensor([ 0.6184, 0.2824, -0.2223, -0.1323]) tensor([0.3820, 0.2730, 0.1648, 0.1803]) -Greedy action tensor([ 0.7724, -0.7143, 0.0798, -0.5982]) tensor([0.5050, 0.1142, 0.2526, 0.1282]) -Greedy action tensor([ 0.3036, 0.0261, -0.1115, -0.1677]) tensor([0.3287, 0.2491, 0.2170, 0.2052]) -Greedy action tensor([ 1.0444, -0.9829, -0.0446, -0.4323]) tensor([0.5894, 0.0776, 0.1984, 0.1346]) -Greedy action tensor([ 0.5686, -0.4154, -0.0100, -0.5171]) tensor([0.4401, 0.1645, 0.2468, 0.1486]) -Greedy action tensor([ 0.9308, -0.7934, -0.0944, -0.4721]) tensor([0.5609, 0.1000, 0.2012, 0.1379]) -Greedy action tensor([ 0.7570, -0.5034, -0.0148, -0.3705]) tensor([0.4832, 0.1370, 0.2233, 0.1565]) -Greedy action tensor([ 0.6221, -0.0941, -0.0150, -0.5373]) tensor([0.4290, 0.2096, 0.2268, 0.1346]) -Greedy action tensor([ 0.7727, -0.6135, -0.0936, -0.6226]) tensor([0.5213, 0.1303, 0.2192, 0.1292]) -Greedy action tensor([ 0.5293, -0.4721, -0.1027, -0.3927]) tensor([0.4354, 0.1600, 0.2314, 0.1732]) -Greedy action tensor([ 0.5134, -0.3650, -0.0340, -0.3299]) tensor([0.4125, 0.1714, 0.2386, 0.1775]) -Greedy action tensor([ 0.4446, -0.1320, -0.0672, -0.4300]) tensor([0.3879, 0.2179, 0.2325, 0.1617]) -Greedy action tensor([ 0.3874, -0.2383, -0.0510, -0.3045]) tensor([0.3731, 0.1995, 0.2406, 0.1868]) -Greedy action tensor([ 0.6491, -0.3131, 0.0965, -0.7224]) tensor([0.4522, 0.1728, 0.2603, 0.1147]) -Greedy action tensor([ 1.0536, -0.2180, -0.0340, -0.5419]) tensor([0.5494, 0.1540, 0.1852, 0.1114]) -Greedy action tensor([ 0.3949, -0.1069, -0.1421, -0.2964]) tensor([0.3716, 0.2250, 0.2172, 0.1862]) -Greedy action tensor([ 0.8398, -0.2371, -0.0276, -0.4805]) tensor([0.4932, 0.1680, 0.2071, 0.1317]) -Greedy action tensor([ 0.4352, -0.1804, 0.0034, -0.1261]) tensor([0.3623, 0.1958, 0.2353, 0.2067]) -Greedy action tensor([ 0.4181, -0.0235, -0.0206, -0.1824]) tensor([0.3526, 0.2267, 0.2274, 0.1934]) -Greedy action tensor([ 0.7907, -0.7483, -0.1347, -0.2798]) tensor([0.5118, 0.1098, 0.2029, 0.1755]) -Greedy action tensor([ 0.4212, -0.0282, -0.1094, -0.0639]) tensor([0.3519, 0.2245, 0.2070, 0.2166]) -Greedy action tensor([ 0.3128, -0.0493, -0.1726, -0.2204]) tensor([0.3450, 0.2402, 0.2123, 0.2024]) -Greedy action tensor([ 0.4534, -0.1149, -0.0570, -0.2508]) tensor([0.3758, 0.2129, 0.2256, 0.1858]) -Greedy action tensor([ 0.2394, 0.3080, -0.0828, -0.1141]) tensor([0.2859, 0.3062, 0.2072, 0.2008]) -Greedy action tensor([ 0.6423, -0.5742, -0.1311, -0.4317]) tensor([0.4763, 0.1411, 0.2198, 0.1627]) -Greedy action tensor([ 0.6427, -0.1508, -0.1261, -0.2427]) tensor([0.4295, 0.1942, 0.1991, 0.1772]) -Greedy action tensor([ 0.5061, -0.0926, -0.0708, -0.1404]) tensor([0.3795, 0.2085, 0.2131, 0.1988]) -Greedy action tensor([ 0.5755, -0.4053, -0.1349, -0.1537]) tensor([0.4258, 0.1597, 0.2092, 0.2053]) -Greedy action tensor([ 0.5075, -0.2423, 0.0143, -0.0716]) tensor([0.3783, 0.1787, 0.2310, 0.2120]) -Greedy action tensor([ 0.3188, 0.0571, -0.1153, -0.0551]) tensor([0.3220, 0.2479, 0.2086, 0.2216]) -Greedy action tensor([ 0.3577, -0.0787, 0.0364, -0.3635]) tensor([0.3499, 0.2262, 0.2538, 0.1701]) -Greedy action tensor([ 0.5910, -0.2245, 0.0628, -0.3463]) tensor([0.4126, 0.1825, 0.2433, 0.1616]) -Greedy action tensor([ 0.8210, -0.6282, 0.0189, -0.6218]) tensor([0.5210, 0.1223, 0.2336, 0.1231]) -Greedy action tensor([ 0.8969, -0.4698, -0.0458, -0.3968]) tensor([0.5211, 0.1329, 0.2030, 0.1429]) -Greedy action tensor([ 0.6220, -0.3063, -0.0639, -0.2207]) tensor([0.4293, 0.1697, 0.2162, 0.1848]) -Greedy action tensor([ 0.6302, -0.4569, -0.1385, -0.4219]) tensor([0.4651, 0.1568, 0.2156, 0.1624]) -Greedy action tensor([ 0.3598, -0.0450, 0.0208, -0.1462]) tensor([0.3353, 0.2237, 0.2389, 0.2022]) -Greedy action tensor([ 0.9016, -0.4501, -0.1371, -0.5184]) tensor([0.5393, 0.1396, 0.1908, 0.1303]) -Greedy action tensor([ 0.4883, 0.0725, -0.1022, -0.2288]) tensor([0.3701, 0.2442, 0.2051, 0.1807]) -Greedy action tensor([ 0.4529, -0.2037, 0.0261, -0.4119]) tensor([0.3858, 0.2000, 0.2517, 0.1625]) -Greedy action tensor([ 0.8415, -0.5515, -0.0518, -0.2857]) tensor([0.5047, 0.1253, 0.2066, 0.1635]) -Greedy action tensor([ 0.6479, -0.2578, -0.0844, -0.1819]) tensor([0.4308, 0.1742, 0.2071, 0.1879]) -Greedy action tensor([ 0.9023, -0.5196, -0.0830, -0.3956]) tensor([0.5298, 0.1278, 0.1978, 0.1447]) -Greedy action tensor([ 0.4439, -0.2281, -0.1620, -0.6745]) tensor([0.4196, 0.2143, 0.2289, 0.1371]) -Greedy action tensor([ 0.9017, -0.1276, -0.0143, -0.5131]) tensor([0.4999, 0.1786, 0.2000, 0.1215]) -Greedy action tensor([ 0.3534, -0.0502, -0.2109, -0.1310]) tensor([0.3506, 0.2341, 0.1994, 0.2159]) -Greedy action tensor([ 0.6617, -0.3594, -0.0250, -0.4615]) tensor([0.4569, 0.1646, 0.2299, 0.1486]) -Greedy action tensor([ 0.4866, -0.2780, -0.1723, -0.5830]) tensor([0.4299, 0.2001, 0.2224, 0.1475]) -Greedy action tensor([ 0.9845, -0.6557, -0.1591, -0.6219]) tensor([0.5837, 0.1132, 0.1860, 0.1171]) -Greedy action tensor([ 0.4341, 0.0444, -0.1061, -0.3527]) tensor([0.3683, 0.2494, 0.2146, 0.1677]) -Greedy action tensor([ 0.5466, -0.2573, -0.1887, -0.2003]) tensor([0.4165, 0.1864, 0.1997, 0.1974]) -Greedy action tensor([ 0.5656, -0.4755, -0.1458, -0.2469]) tensor([0.4371, 0.1543, 0.2146, 0.1940]) -Greedy action tensor([ 0.7255, -0.6923, -0.2749, 0.2941]) tensor([0.4426, 0.1072, 0.1627, 0.2875]) -Greedy action tensor([ 0.9246, 0.1512, -0.6013, -0.2162]) tensor([0.5004, 0.2309, 0.1088, 0.1599]) -Greedy action tensor([ 1.3375, -0.6832, -0.2438, 0.3179]) tensor([0.5886, 0.0780, 0.1211, 0.2123]) -Greedy action tensor([ 0.9139, -0.3481, -0.3771, -0.1740]) tensor([0.5277, 0.1494, 0.1451, 0.1778]) -Greedy action tensor([ 1.2542, -0.5116, -0.5526, 0.4916]) tensor([0.5550, 0.0949, 0.0911, 0.2589]) -Greedy action tensor([ 0.7250, -0.4946, -0.3949, 0.2002]) tensor([0.4518, 0.1334, 0.1474, 0.2673]) -Greedy action tensor([ 1.4654, -0.4621, -0.1910, 0.3748]) tensor([0.5980, 0.0870, 0.1141, 0.2009]) -Greedy action tensor([ 1.6031, -0.6274, -0.1402, 0.2739]) tensor([0.6464, 0.0695, 0.1131, 0.1711]) -Greedy action tensor([ 1.1003, -0.1975, -0.1456, -0.1716]) tensor([0.5432, 0.1484, 0.1563, 0.1522]) -Greedy action tensor([ 0.9119, -0.6261, -0.6158, 0.9112]) tensor([0.4113, 0.0884, 0.0893, 0.4110]) -Greedy action tensor([ 0.9016, -0.1066, -0.0168, 0.1523]) tensor([0.4471, 0.1631, 0.1784, 0.2113]) -Greedy action tensor([ 1.2069, -0.1508, -0.3571, -0.1697]) tensor([0.5817, 0.1497, 0.1218, 0.1469]) -Greedy action tensor([ 0.9781, -0.3767, -0.2913, 0.2615]) tensor([0.4932, 0.1273, 0.1386, 0.2409]) -Greedy action tensor([ 1.1944, -0.3201, -0.2596, 0.1909]) tensor([0.5494, 0.1208, 0.1284, 0.2014]) -Greedy action tensor([ 1.4341, -0.0933, -0.5908, 0.0926]) tensor([0.6209, 0.1348, 0.0820, 0.1623]) -Greedy action tensor([ 1.3925, -0.5715, -0.3260, 0.5872]) tensor([0.5661, 0.0794, 0.1015, 0.2530]) -Greedy action tensor([ 1.4464, -0.2826, -0.0125, 0.0944]) tensor([0.5993, 0.1063, 0.1393, 0.1550]) -Greedy action tensor([ 1.2251, -0.6065, -0.3459, 0.5520]) tensor([0.5324, 0.0853, 0.1107, 0.2716]) -Greedy action tensor([ 1.0686, -0.5596, -0.4152, 0.3331]) tensor([0.5257, 0.1032, 0.1192, 0.2519]) -Greedy action tensor([ 1.0900, -0.3722, -0.2193, 0.2744]) tensor([0.5144, 0.1192, 0.1389, 0.2275]) -Greedy action tensor([ 0.6676, -0.4246, 0.0223, 0.0447]) tensor([0.4173, 0.1400, 0.2189, 0.2238]) -Greedy action tensor([ 1.6277, -0.3896, -0.1946, 0.5545]) tensor([0.6110, 0.0813, 0.0988, 0.2089]) -Greedy action tensor([ 1.6152, -0.4736, -0.3343, 0.1936]) tensor([0.6634, 0.0821, 0.0944, 0.1601]) -Greedy action tensor([ 1.7585, -0.4174, -0.1240, 0.2254]) tensor([0.6750, 0.0766, 0.1027, 0.1457]) -Greedy action tensor([ 1.0096, -0.2526, -0.0577, 0.1558]) tensor([0.4871, 0.1379, 0.1676, 0.2074]) -Greedy action tensor([ 1.0567, -0.1387, -0.2276, -0.0181]) tensor([0.5206, 0.1575, 0.1441, 0.1777]) -Greedy action tensor([ 0.9734, -0.5114, -0.2334, 0.3251]) tensor([0.4881, 0.1106, 0.1460, 0.2553]) -Greedy action tensor([ 1.5118, -0.6335, -0.2087, 0.2592]) tensor([0.6322, 0.0740, 0.1131, 0.1807]) -Greedy action tensor([ 0.7486, 0.0699, 0.0586, -0.1062]) tensor([0.4108, 0.2084, 0.2061, 0.1747]) -Greedy action tensor([ 1.2891, -0.9936, -0.0679, 0.5463]) tensor([0.5449, 0.0556, 0.1403, 0.2592]) -Greedy action tensor([ 0.9388, -0.1081, -0.2688, 0.1654]) tensor([0.4736, 0.1663, 0.1416, 0.2185]) -Greedy action tensor([ 1.4137, -0.3819, -0.1502, 0.2563]) tensor([0.5918, 0.0983, 0.1239, 0.1860]) -Greedy action tensor([ 1.3353, -0.5757, -0.2821, 0.3539]) tensor([0.5810, 0.0859, 0.1153, 0.2177]) -Greedy action tensor([ 1.1113, -0.4509, -0.0950, 0.3544]) tensor([0.5055, 0.1060, 0.1513, 0.2371]) -Greedy action tensor([ 1.6178, 0.0680, -0.5483, 0.1205]) tensor([0.6449, 0.1369, 0.0739, 0.1443]) -Greedy action tensor([ 1.0250, -0.7198, -0.5747, -0.0286]) tensor([0.5796, 0.1012, 0.1171, 0.2021]) -Greedy action tensor([ 0.6768, -0.2420, -0.3157, 0.1764]) tensor([0.4209, 0.1679, 0.1560, 0.2552]) -Greedy action tensor([ 1.1350, -0.0737, -0.1635, 0.3597]) tensor([0.4921, 0.1469, 0.1343, 0.2266]) -Greedy action tensor([ 1.3818, -0.7568, -0.1928, 0.8832]) tensor([0.5175, 0.0610, 0.1072, 0.3143]) -Greedy action tensor([ 1.4675, -0.9990, -0.0581, 0.5773]) tensor([0.5838, 0.0496, 0.1270, 0.2397]) -Greedy action tensor([ 1.3990, -0.2308, -0.3985, 0.0676]) tensor([0.6151, 0.1205, 0.1019, 0.1625]) -Greedy action tensor([ 1.0280, -0.4054, -0.1977, 0.0899]) tensor([0.5199, 0.1240, 0.1526, 0.2035]) -Greedy action tensor([ 1.0345, -0.3897, -0.0204, 0.1922]) tensor([0.4951, 0.1192, 0.1724, 0.2133]) -Greedy action tensor([ 1.1796, -0.3835, -0.1199, 0.0352]) tensor([0.5554, 0.1163, 0.1514, 0.1768]) -Greedy action tensor([ 0.6337, -0.2050, -0.6551, 0.0326]) tensor([0.4433, 0.1916, 0.1222, 0.2430]) -Greedy action tensor([ 1.0280, -0.5725, -0.2640, 0.1437]) tensor([0.5292, 0.1068, 0.1454, 0.2186]) -Greedy action tensor([ 0.5181, -0.3956, -0.2498, 0.2959]) tensor([0.3751, 0.1504, 0.1741, 0.3004]) -Greedy action tensor([ 1.7933, -0.8204, -0.1749, 0.2900]) tensor([0.6967, 0.0510, 0.0973, 0.1549]) -Greedy action tensor([ 1.0775, -0.2471, -0.4177, -0.0834]) tensor([0.5545, 0.1475, 0.1243, 0.1737]) -Greedy action tensor([ 1.0735, -0.7923, -0.5064, 0.6178]) tensor([0.5013, 0.0776, 0.1033, 0.3178]) -Greedy action tensor([ 1.4751, -0.6726, -0.1659, 0.2212]) tensor([0.6266, 0.0732, 0.1214, 0.1788]) -Greedy action tensor([ 0.9456, -0.0886, -0.2486, 0.7320]) tensor([0.4055, 0.1442, 0.1228, 0.3275]) -Greedy action tensor([ 1.4361, -0.5914, -0.2732, 0.2525]) tensor([0.6177, 0.0813, 0.1118, 0.1891]) -Greedy action tensor([ 1.4919, -0.4241, -0.4027, 0.0395]) tensor([0.6529, 0.0961, 0.0982, 0.1528]) -Greedy action tensor([ 1.0199, -0.6114, -0.1545, 0.2260]) tensor([0.5111, 0.1000, 0.1579, 0.2310]) -Greedy action tensor([ 1.0957, -0.4101, -0.3211, -0.0998]) tensor([0.5660, 0.1256, 0.1372, 0.1712]) -Greedy action tensor([ 0.9515, -0.1920, -0.4246, 0.0462]) tensor([0.5062, 0.1613, 0.1278, 0.2047]) -Greedy action tensor([ 0.9361, -0.3940, -0.1651, 0.2018]) tensor([0.4815, 0.1273, 0.1601, 0.2311]) -Greedy action tensor([ 1.1023, -0.4095, -0.6779, 0.0434]) tensor([0.5761, 0.1270, 0.0971, 0.1998]) -Greedy action tensor([ 0.8574, -0.3476, -0.2906, 0.0262]) tensor([0.4872, 0.1460, 0.1546, 0.2122]) -Greedy action tensor([ 1.0980, -0.5982, -0.6660, 0.9414]) tensor([0.4525, 0.0830, 0.0775, 0.3869]) -Greedy action tensor([ 1.3952, -0.7310, -0.2920, 0.2309]) tensor([0.6186, 0.0738, 0.1145, 0.1931]) -Greedy action tensor([ 0.8559, -0.3893, -0.2220, 0.1467]) tensor([0.4717, 0.1358, 0.1605, 0.2321]) -Greedy action tensor([ 2.1349, -1.2194, -0.2590, 0.4429]) tensor([0.7632, 0.0267, 0.0697, 0.1405]) -Greedy action tensor([ 1.5448, -0.2526, -0.4706, 0.4027]) tensor([0.6180, 0.1024, 0.0824, 0.1972]) -Greedy action tensor([ 1.1452, -0.2498, -0.0510, 0.1540]) tensor([0.5205, 0.1290, 0.1574, 0.1932]) -Greedy action tensor([ 0.9543, -0.3376, -0.2817, 0.2246]) tensor([0.4884, 0.1342, 0.1419, 0.2354]) -Greedy action tensor([ 1.0931, -0.5831, -0.1543, 0.3707]) tensor([0.5102, 0.0955, 0.1466, 0.2478]) -Greedy action tensor([ 0.7903, -0.0830, -0.2847, 0.3671]) tensor([0.4143, 0.1730, 0.1414, 0.2713]) -Greedy action tensor([ 1.1976, -0.5581, -0.2431, 0.1243]) tensor([0.5710, 0.0987, 0.1352, 0.1952]) -Greedy action tensor([ 1.4433, -0.3324, -0.1831, -0.0933]) tensor([0.6325, 0.1071, 0.1244, 0.1361]) -Greedy action tensor([ 1.3563, -0.6325, -0.4679, 0.9234]) tensor([0.5137, 0.0703, 0.0829, 0.3332]) -Greedy action tensor([ 0.9157, -0.3198, -0.3540, 0.5815]) tensor([0.4372, 0.1271, 0.1228, 0.3130]) -Greedy action tensor([ 1.0217, -0.3779, -0.2591, 0.3601]) tensor([0.4901, 0.1209, 0.1361, 0.2529]) -Greedy action tensor([ 1.3233, -0.2390, -0.6424, 0.2095]) tensor([0.5959, 0.1249, 0.0835, 0.1956]) -Greedy action tensor([ 1.2932, -0.7521, -0.5711, 0.8070]) tensor([0.5265, 0.0681, 0.0816, 0.3238]) -Greedy action tensor([ 1.2397, -0.2838, -0.2754, -0.1958]) tensor([0.5967, 0.1301, 0.1312, 0.1420]) -Greedy action tensor([ 2.3240, -0.8169, -0.3241, 0.6518]) tensor([0.7681, 0.0332, 0.0544, 0.1443]) -Greedy action tensor([ 0.6219, -0.2891, -0.1597, 0.1005]) tensor([0.4076, 0.1639, 0.1865, 0.2420]) -Greedy action tensor([ 1.0290, -0.3392, -0.1656, 0.3172]) tensor([0.4882, 0.1243, 0.1479, 0.2396]) -Greedy action tensor([ 0.2943, 0.2085, -0.0991, -0.1742]) tensor([0.3107, 0.2852, 0.2097, 0.1945]) -Greedy action tensor([-1.6747, -0.5428, 0.6646, 0.1946]) tensor([0.0477, 0.1480, 0.4950, 0.3093]) -Greedy action tensor([-1.8757, -0.3558, 0.6200, -0.1329]) tensor([0.0427, 0.1952, 0.5181, 0.2440]) -Greedy action tensor([-1.1422, -0.5806, 0.2552, 0.4833]) tensor([0.0842, 0.1476, 0.3405, 0.4277]) -Greedy action tensor([-0.7746, 0.3034, 0.4047, 1.2236]) tensor([0.0687, 0.2017, 0.2233, 0.5063]) -Greedy action tensor([-1.4752, 0.2093, 0.3470, 0.2243]) tensor([0.0554, 0.2987, 0.3428, 0.3032]) -Greedy action tensor([-2.0692, -0.5543, 1.2954, 0.6727]) tensor([0.0200, 0.0910, 0.5786, 0.3104]) -Greedy action tensor([-1.4505, -0.6675, 0.5581, 0.3379]) tensor([0.0602, 0.1316, 0.4484, 0.3598]) -Greedy action tensor([-0.7465, -0.5111, 0.3945, 0.2941]) tensor([0.1216, 0.1538, 0.3805, 0.3441]) -Greedy action tensor([-1.6962, -0.5248, 0.5344, -0.0324]) tensor([0.0532, 0.1715, 0.4947, 0.2807]) -Greedy action tensor([-1.5790, -0.4991, 0.4275, -0.4271]) tensor([0.0687, 0.2024, 0.5113, 0.2175]) -Greedy action tensor([-1.8218, -0.4603, 0.6044, -0.0489]) tensor([0.0452, 0.1765, 0.5119, 0.2664]) -Greedy action tensor([-1.8449, -0.4675, 0.6099, -0.1174]) tensor([0.0450, 0.1783, 0.5237, 0.2531]) -Greedy action tensor([-1.3083, 0.3396, 0.2873, 0.5308]) tensor([0.0574, 0.2983, 0.2831, 0.3612]) -Greedy action tensor([-1.3818, -0.6016, 0.5708, 0.4064]) tensor([0.0617, 0.1346, 0.4348, 0.3689]) -Greedy action tensor([-1.9749, -0.6441, 1.4611, 0.7662]) tensor([0.0195, 0.0737, 0.6049, 0.3019]) -Greedy action tensor([-1.8162, -0.4523, 0.6545, -0.0420]) tensor([0.0442, 0.1728, 0.5226, 0.2604]) -Greedy action tensor([-0.8124, -0.4091, 0.4629, -0.3874]) tensor([0.1315, 0.1968, 0.4706, 0.2011]) -Greedy action tensor([-1.6193, -0.5417, 0.5224, 0.0786]) tensor([0.0558, 0.1640, 0.4753, 0.3049]) -Greedy action tensor([-1.0510, -0.5890, 0.3358, 0.1062]) tensor([0.1023, 0.1625, 0.4096, 0.3256]) -Greedy action tensor([-1.8791, -0.4392, 0.6332, -0.1389]) tensor([0.0430, 0.1815, 0.5304, 0.2451]) -Greedy action tensor([-1.3811, -0.4922, 0.4654, 0.3753]) tensor([0.0643, 0.1563, 0.4073, 0.3722]) -Greedy action tensor([-1.7658, -0.2126, 0.5432, -0.0352]) tensor([0.0467, 0.2205, 0.4695, 0.2633]) -Greedy action tensor([-1.9262, -0.4512, 0.6551, -0.1704]) tensor([0.0410, 0.1793, 0.5422, 0.2375]) -Greedy action tensor([-1.3012, -0.5575, 0.4175, 0.5171]) tensor([0.0674, 0.1417, 0.3757, 0.4151]) -Greedy action tensor([-1.7420, -1.0381, -0.0197, -0.7473]) tensor([0.0883, 0.1785, 0.4943, 0.2388]) -Greedy action tensor([-1.7011, -0.4962, 0.5520, 0.0028]) tensor([0.0517, 0.1724, 0.4919, 0.2840]) -Greedy action tensor([-1.5957, -0.5605, 0.4900, 0.0169]) tensor([0.0592, 0.1668, 0.4769, 0.2971]) -Greedy action tensor([-0.9350, 0.4950, 0.3380, 0.9529]) tensor([0.0651, 0.2721, 0.2326, 0.4302]) -Greedy action tensor([-1.2864, -0.5736, 1.2212, 1.2267]) tensor([0.0362, 0.0737, 0.4438, 0.4463]) -Greedy action tensor([-1.9026, -0.3805, 0.6331, -0.1489]) tensor([0.0417, 0.1911, 0.5264, 0.2408]) -Greedy action tensor([-1.8837, -0.4128, 0.6295, -0.1438]) tensor([0.0427, 0.1861, 0.5277, 0.2435]) -Greedy action tensor([-1.8300, -0.4363, 0.6035, -0.1196]) tensor([0.0455, 0.1835, 0.5191, 0.2519]) -Greedy action tensor([-1.9202, -0.4291, 0.6484, -0.1705]) tensor([0.0412, 0.1832, 0.5382, 0.2373]) -Greedy action tensor([-1.0469, -0.6682, 0.2009, 0.2968]) tensor([0.1023, 0.1494, 0.3562, 0.3921]) -Greedy action tensor([-1.6321, -0.4878, 0.5387, 0.1611]) tensor([0.0529, 0.1660, 0.4634, 0.3177]) -Greedy action tensor([-1.7457, -0.7562, 0.0614, -0.4044]) tensor([0.0735, 0.1977, 0.4478, 0.2811]) -Greedy action tensor([-1.5851, -0.4905, 0.5408, 0.1366]) tensor([0.0557, 0.1664, 0.4666, 0.3114]) -Greedy action tensor([-0.3852, -0.1682, 0.9910, 1.5549]) tensor([0.0760, 0.0944, 0.3009, 0.5288]) -Greedy action tensor([-1.2082, -0.4452, 0.6173, 0.7448]) tensor([0.0610, 0.1308, 0.3784, 0.4299]) -Greedy action tensor([-1.7474, -0.4547, 0.5615, -0.0693]) tensor([0.0498, 0.1816, 0.5016, 0.2669]) -Greedy action tensor([-1.3961, -0.1270, 0.6948, -0.6244]) tensor([0.0675, 0.2402, 0.5463, 0.1460]) -Greedy action tensor([-1.7912, -0.5009, 0.8699, 0.2750]) tensor([0.0373, 0.1354, 0.5332, 0.2941]) -Greedy action tensor([-1.2413, -0.5549, 0.3291, 0.1934]) tensor([0.0834, 0.1656, 0.4009, 0.3500]) -Greedy action tensor([-1.5109, -0.5953, 0.1781, -0.3476]) tensor([0.0826, 0.2062, 0.4470, 0.2642]) -Greedy action tensor([-1.7232, -0.4570, 0.5783, 0.0183]) tensor([0.0494, 0.1752, 0.4935, 0.2819]) -Greedy action tensor([-1.8880, -0.5486, 1.3745, 0.8053]) tensor([0.0219, 0.0835, 0.5713, 0.3234]) -Greedy action tensor([-0.8025, -0.5226, 0.2164, 0.1135]) tensor([0.1317, 0.1742, 0.3649, 0.3292]) -Greedy action tensor([-1.8400, -0.4762, 0.6525, -0.0697]) tensor([0.0437, 0.1710, 0.5286, 0.2567]) -Greedy action tensor([-1.1227, -0.4893, 0.3863, 0.6966]) tensor([0.0737, 0.1388, 0.3332, 0.4544]) -Greedy action tensor([-1.9549, -0.8179, 0.4530, 0.0173]) tensor([0.0446, 0.1391, 0.4957, 0.3206]) -Greedy action tensor([-1.7064, -0.4588, 0.9637, 0.7043]) tensor([0.0333, 0.1158, 0.4803, 0.3706]) -Greedy action tensor([-1.8519, -0.4942, 0.6307, -0.1242]) tensor([0.0445, 0.1729, 0.5324, 0.2503]) -Greedy action tensor([-1.8786, -0.4410, 0.6340, -0.1525]) tensor([0.0432, 0.1817, 0.5325, 0.2425]) -Greedy action tensor([-1.8870, -0.3367, 0.6610, -0.0864]) tensor([0.0407, 0.1920, 0.5207, 0.2466]) -Greedy action tensor([-1.8557, -0.7605, 0.1521, -0.1955]) tensor([0.0599, 0.1791, 0.4460, 0.3150]) -Greedy action tensor([-1.4496, -0.5687, 0.4016, 0.1140]) tensor([0.0687, 0.1658, 0.4374, 0.3281]) -Greedy action tensor([-1.5162, -0.5280, 0.4215, 0.1073]) tensor([0.0637, 0.1711, 0.4422, 0.3230]) -Greedy action tensor([-0.6016, -0.4222, 1.0834, 1.6506]) tensor([0.0585, 0.0700, 0.3154, 0.5561]) -Greedy action tensor([-1.6735, 0.0503, 0.4066, 0.0034]) tensor([0.0501, 0.2809, 0.4011, 0.2680]) -Greedy action tensor([-1.8389, -0.1916, 0.5850, -0.1057]) tensor([0.0432, 0.2244, 0.4879, 0.2445]) -Greedy action tensor([-1.5471, -0.0351, 0.5502, -0.2860]) tensor([0.0581, 0.2636, 0.4733, 0.2051]) -Greedy action tensor([-1.8502, -0.4521, 0.6575, -0.0794]) tensor([0.0431, 0.1745, 0.5292, 0.2533]) -Greedy action tensor([-1.6074, 0.5205, 0.5801, 0.5141]) tensor([0.0375, 0.3150, 0.3344, 0.3130]) -Greedy action tensor([-1.2919, -0.9921, 0.1132, -0.8773]) tensor([0.1260, 0.1700, 0.5134, 0.1907]) -Greedy action tensor([-1.7180, -0.5130, 0.5749, -0.0848]) tensor([0.0517, 0.1723, 0.5115, 0.2645]) -Greedy action tensor([-1.8723, -0.4730, 0.7002, -0.0695]) tensor([0.0413, 0.1673, 0.5409, 0.2505]) -Greedy action tensor([-1.5339, -0.3945, 0.5974, -0.0651]) tensor([0.0592, 0.1850, 0.4987, 0.2571]) -Greedy action tensor([-1.0894, -0.7337, 0.9667, 1.3265]) tensor([0.0466, 0.0666, 0.3645, 0.5223]) -Greedy action tensor([-1.9081, -0.4686, 0.6863, -0.1221]) tensor([0.0407, 0.1717, 0.5449, 0.2428]) -Greedy action tensor([-1.8828, -0.4672, 0.6473, -0.1339]) tensor([0.0427, 0.1759, 0.5360, 0.2454]) -Greedy action tensor([-1.5978, -0.5142, 0.5414, 0.1351]) tensor([0.0552, 0.1632, 0.4691, 0.3125]) -Greedy action tensor([-1.3017, -0.5594, 0.3352, 0.2197]) tensor([0.0780, 0.1639, 0.4009, 0.3572]) -Greedy action tensor([-0.9319, -0.5536, 1.2147, 1.4991]) tensor([0.0447, 0.0652, 0.3822, 0.5079]) -Greedy action tensor([-1.6036, -0.0180, 0.4289, 0.0079]) tensor([0.0540, 0.2635, 0.4120, 0.2705]) -Greedy action tensor([-1.3359, -0.5876, 0.3377, 0.1817]) tensor([0.0769, 0.1625, 0.4099, 0.3507]) -Greedy action tensor([-0.7046, -0.5285, 0.1475, 0.4159]) tensor([0.1315, 0.1568, 0.3084, 0.4033]) -Greedy action tensor([-1.2295, -0.5851, 0.2940, 0.2548]) tensor([0.0840, 0.1600, 0.3854, 0.3706]) -Greedy action tensor([-1.8424, -0.3302, 0.6072, -0.0815]) tensor([0.0436, 0.1978, 0.5050, 0.2536]) -Greedy action tensor([-1.7378, -0.6620, 0.2775, -0.4045]) tensor([0.0657, 0.1925, 0.4927, 0.2491]) -Greedy action tensor([-0.4063, -0.2046, 1.0531, 1.6019]) tensor([0.0716, 0.0875, 0.3079, 0.5330]) -Greedy action tensor([-7.7737e-01, 1.8427e-04, 2.1593e-01, -9.3311e-02]) tensor([0.1273, 0.2769, 0.3436, 0.2522]) -Greedy action tensor([ 2.2215, -0.6268, 1.2067, 1.5373]) tensor([0.5195, 0.0301, 0.1883, 0.2621]) -Greedy action tensor([ 0.1363, -0.4848, -0.7978, 0.6602]) tensor([0.2763, 0.1485, 0.1086, 0.4666]) -Greedy action tensor([-0.7492, -0.3382, -0.6582, -0.8625]) tensor([0.2224, 0.3354, 0.2436, 0.1986]) -Greedy action tensor([ 0.6729, -0.0965, -0.5933, 1.3687]) tensor([0.2666, 0.1235, 0.0752, 0.5347]) -Greedy action tensor([ 0.3302, -0.5296, 0.5492, 0.7920]) tensor([0.2350, 0.0995, 0.2925, 0.3730]) -Greedy action tensor([ 0.7458, -0.3379, 0.7284, 0.3727]) tensor([0.3323, 0.1124, 0.3265, 0.2288]) -Greedy action tensor([0.6366, 1.0254, 0.0104, 0.4253]) tensor([0.2618, 0.3862, 0.1400, 0.2119]) -Greedy action tensor([ 1.4897, -0.1450, 0.7339, 0.8891]) tensor([0.4518, 0.0881, 0.2122, 0.2478]) -Greedy action tensor([ 0.9018, -1.3444, 0.3996, 0.4266]) tensor([0.4287, 0.0454, 0.2594, 0.2665]) -Greedy action tensor([ 1.0262, 0.5044, 0.1805, -0.3711]) tensor([0.4405, 0.2614, 0.1891, 0.1089]) -Greedy action tensor([ 0.2499, 0.7096, 0.3824, -0.8369]) tensor([0.2462, 0.3898, 0.2810, 0.0830]) -Greedy action tensor([ 0.6670, -1.7443, 1.9827, -0.4698]) tensor([0.1946, 0.0175, 0.7255, 0.0624]) -Greedy action tensor([-0.5529, 0.5158, -0.3201, 1.3837]) tensor([0.0826, 0.2405, 0.1042, 0.5727]) -Greedy action tensor([ 0.0905, -1.5090, 0.5795, 1.1191]) tensor([0.1776, 0.0359, 0.2896, 0.4969]) -Greedy action tensor([ 0.9422, -1.9373, 0.6118, 0.6640]) tensor([0.3949, 0.0222, 0.2838, 0.2990]) -Greedy action tensor([ 0.7371, 0.1082, -0.9291, 0.5657]) tensor([0.3899, 0.2079, 0.0737, 0.3285]) -Greedy action tensor([ 1.1672, -1.2290, 1.0031, -0.0139]) tensor([0.4451, 0.0405, 0.3777, 0.1366]) -Greedy action tensor([ 1.5302, -0.2300, 0.5003, 0.7836]) tensor([0.4992, 0.0859, 0.1783, 0.2366]) -Greedy action tensor([-0.3453, 0.2897, 0.7942, -0.3924]) tensor([0.1435, 0.2709, 0.4486, 0.1369]) -Greedy action tensor([ 0.6032, -0.3293, -1.0675, 1.2499]) tensor([0.2864, 0.1127, 0.0539, 0.5469]) -Greedy action tensor([ 0.8547, 0.2592, -0.8717, 0.1917]) tensor([0.4455, 0.2456, 0.0793, 0.2296]) -Greedy action tensor([ 0.1762, -0.3734, -1.0174, 0.7414]) tensor([0.2747, 0.1586, 0.0833, 0.4834]) -Greedy action tensor([-0.2522, -1.1531, -0.7033, 1.5199]) tensor([0.1262, 0.0512, 0.0804, 0.7422]) -Greedy action tensor([ 0.5314, -0.4049, 1.0826, 1.4515]) tensor([0.1774, 0.0696, 0.3078, 0.4452]) -Greedy action tensor([ 1.0838, -1.0250, 0.9171, 0.9867]) tensor([0.3478, 0.0422, 0.2944, 0.3156]) -Greedy action tensor([ 0.7637, -1.8461, 0.3973, 0.3610]) tensor([0.4106, 0.0302, 0.2846, 0.2745]) -Greedy action tensor([ 0.1027, -0.7458, 0.2922, 0.7460]) tensor([0.2203, 0.0943, 0.2663, 0.4192]) -Greedy action tensor([-0.1967, -0.0106, -0.2918, 0.5414]) tensor([0.1921, 0.2314, 0.1747, 0.4018]) -Greedy action tensor([0.8762, 0.0617, 0.2004, 2.5372]) tensor([0.1386, 0.0614, 0.0705, 0.7296]) -Greedy action tensor([ 0.1553, -0.3369, 0.1078, -0.1036]) tensor([0.2997, 0.1832, 0.2858, 0.2313]) -Greedy action tensor([ 0.3046, -0.4022, -0.1366, 0.7427]) tensor([0.2713, 0.1338, 0.1745, 0.4204]) -Greedy action tensor([ 1.3944, -0.4636, -0.1096, 0.7707]) tensor([0.5224, 0.0815, 0.1161, 0.2800]) -Greedy action tensor([-0.1862, -2.0047, 0.6081, 0.2492]) tensor([0.2032, 0.0330, 0.4497, 0.3141]) -Greedy action tensor([-0.1677, -0.2450, -0.0885, -0.3441]) tensor([0.2600, 0.2407, 0.2814, 0.2179]) -Greedy action tensor([ 0.6133, 0.0731, -0.1958, -0.2129]) tensor([0.4056, 0.2363, 0.1806, 0.1775]) -Greedy action tensor([ 1.2104, -0.4283, -0.4662, 1.8185]) tensor([0.3107, 0.0604, 0.0581, 0.5708]) -Greedy action tensor([-0.7680, -1.2508, -0.7211, -0.5971]) tensor([0.2596, 0.1602, 0.2721, 0.3080]) -Greedy action tensor([ 1.6133, -0.9763, 0.4223, 0.4634]) tensor([0.5897, 0.0443, 0.1792, 0.1868]) -Greedy action tensor([-0.4226, -0.5484, -0.5473, 0.2167]) tensor([0.2146, 0.1892, 0.1894, 0.4067]) -Greedy action tensor([ 1.7206, -0.7687, 0.7706, 0.0401]) tensor([0.6039, 0.0501, 0.2335, 0.1125]) -Greedy action tensor([ 0.3137, -0.0166, -0.2888, -1.0646]) tensor([0.3971, 0.2854, 0.2174, 0.1001]) -Greedy action tensor([ 0.2722, -2.0337, -0.1398, 1.0234]) tensor([0.2576, 0.0257, 0.1706, 0.5461]) -Greedy action tensor([ 0.9855, -1.1368, 1.3381, 0.6218]) tensor([0.3089, 0.0370, 0.4394, 0.2147]) -Greedy action tensor([-1.0916, 1.4942, 0.3737, -0.2778]) tensor([0.0479, 0.6364, 0.2075, 0.1082]) -Greedy action tensor([ 1.6414, -0.2184, 0.6510, 1.5230]) tensor([0.4140, 0.0645, 0.1538, 0.3678]) -Greedy action tensor([ 0.8712, 0.3270, 0.5412, -0.2528]) tensor([0.3811, 0.2211, 0.2740, 0.1238]) -Greedy action tensor([ 0.2606, -0.3515, 0.3180, 0.6032]) tensor([0.2494, 0.1352, 0.2641, 0.3513]) -Greedy action tensor([-0.7294, -0.8694, -0.1245, 1.7733]) tensor([0.0628, 0.0546, 0.1150, 0.7675]) -Greedy action tensor([ 1.3314, -0.2883, 0.9102, 0.7907]) tensor([0.4104, 0.0812, 0.2693, 0.2390]) -Greedy action tensor([-0.0743, 0.1746, 1.3429, -0.1080]) tensor([0.1356, 0.1739, 0.5594, 0.1311]) -Greedy action tensor([ 1.0766, -1.4999, -0.2058, 1.5068]) tensor([0.3459, 0.0263, 0.0959, 0.5319]) -Greedy action tensor([ 0.4451, 0.1495, 2.3755, -0.0172]) tensor([0.1079, 0.0803, 0.7438, 0.0680]) -Greedy action tensor([-0.0996, -0.6007, 2.3223, 0.3543]) tensor([0.0692, 0.0419, 0.7799, 0.1090]) -Greedy action tensor([-0.3953, -1.1930, 0.8516, -0.5192]) tensor([0.1720, 0.0775, 0.5985, 0.1520]) -Greedy action tensor([0.3667, 0.3578, 0.2532, 0.2094]) tensor([0.2675, 0.2651, 0.2388, 0.2286]) -Greedy action tensor([ 0.8002, -1.3629, 0.2769, -0.5538]) tensor([0.5087, 0.0585, 0.3014, 0.1314]) -Greedy action tensor([ 1.8219, -0.4342, 0.7147, 0.6955]) tensor([0.5684, 0.0595, 0.1878, 0.1843]) -Greedy action tensor([ 0.0797, -0.3434, 0.9118, 1.9162]) tensor([0.0978, 0.0640, 0.2247, 0.6135]) -Greedy action tensor([ 0.0279, -0.1589, -0.1182, 0.6356]) tensor([0.2208, 0.1831, 0.1907, 0.4053]) -Greedy action tensor([ 0.9568, -0.1559, 1.3714, 0.8818]) tensor([0.2653, 0.0872, 0.4015, 0.2461]) -Greedy action tensor([ 0.7203, -0.1602, 0.1648, 1.1858]) tensor([0.2792, 0.1158, 0.1602, 0.4448]) -Greedy action tensor([ 0.2665, 0.6274, 0.0153, -0.1259]) tensor([0.2572, 0.3690, 0.2001, 0.1737]) -Greedy action tensor([ 0.8767, -0.2717, -0.2489, 0.8698]) tensor([0.3796, 0.1204, 0.1231, 0.3769]) -Greedy action tensor([ 0.7446, 0.4942, -0.1030, 1.0732]) tensor([0.2781, 0.2165, 0.1191, 0.3863]) -Greedy action tensor([-0.4689, 0.2224, 0.0789, 0.2923]) tensor([0.1456, 0.2907, 0.2519, 0.3118]) -Greedy action tensor([-0.4532, -1.3034, 1.1159, 0.3800]) tensor([0.1172, 0.0501, 0.5630, 0.2697]) -Greedy action tensor([ 0.5309, -1.8967, 0.7454, 0.6473]) tensor([0.2898, 0.0256, 0.3591, 0.3256]) -Greedy action tensor([-1.0631, -0.4667, 0.2798, 0.6599]) tensor([0.0817, 0.1483, 0.3127, 0.4574]) -Greedy action tensor([-0.1020, 0.4044, 0.4785, 1.8701]) tensor([0.0860, 0.1427, 0.1536, 0.6177]) -Greedy action tensor([ 0.1374, -1.0255, 0.0776, -0.1170]) tensor([0.3300, 0.1032, 0.3109, 0.2559]) -Greedy action tensor([ 1.9331, -1.2820, 0.5168, 1.3241]) tensor([0.5474, 0.0220, 0.1328, 0.2978]) -Greedy action tensor([ 0.3674, -0.4896, 0.3506, 1.0534]) tensor([0.2276, 0.0966, 0.2238, 0.4520]) -Greedy action tensor([ 0.1992, -0.9821, 0.8270, 0.2957]) tensor([0.2336, 0.0717, 0.4376, 0.2572]) -Greedy action tensor([ 1.7011, -1.4025, 1.5440, 0.8045]) tensor([0.4334, 0.0195, 0.3704, 0.1768]) -Greedy action tensor([ 0.2672, 0.0019, -0.1586, 0.9027]) tensor([0.2321, 0.1780, 0.1516, 0.4382]) -Greedy action tensor([ 1.3886, -0.4125, 0.1579, 1.6818]) tensor([0.3574, 0.0590, 0.1044, 0.4792]) -Greedy action tensor([-0.6255, -2.3429, 0.1737, 0.5656]) tensor([0.1494, 0.0268, 0.3322, 0.4916]) -Greedy action tensor([ 0.0079, -0.1116, 0.9393, -0.9298]) tensor([0.2076, 0.1842, 0.5269, 0.0813]) -Greedy action tensor([1.3388, 1.3169, 0.1012, 0.4695]) tensor([0.3721, 0.3640, 0.1079, 0.1560]) -Greedy action tensor([ 0.5680, -0.8420, 0.3869, 1.6897]) tensor([0.1942, 0.0474, 0.1620, 0.5963]) -Greedy action tensor([ 0.7835, -0.5398, -0.7013, 0.8929]) tensor([0.3834, 0.1021, 0.0869, 0.4277]) -Greedy action tensor([-0.2172, -1.2650, -0.1916, -0.1609]) tensor([0.2912, 0.1021, 0.2987, 0.3080]) -Greedy action tensor([ 0.8921, -0.8256, 0.0221, -0.3926]) tensor([0.5333, 0.0957, 0.2234, 0.1476]) -Greedy action tensor([ 0.2683, -0.3677, -0.0601, -0.3354]) tensor([0.3576, 0.1893, 0.2575, 0.1955]) -Greedy action tensor([ 0.2791, 0.0966, -0.0500, -0.1721]) tensor([0.3135, 0.2612, 0.2256, 0.1997]) -Greedy action tensor([ 0.9644, -0.5349, -0.0290, -0.5211]) tensor([0.5495, 0.1227, 0.2035, 0.1244]) -Greedy action tensor([ 0.4164, 0.1455, 0.1121, -0.2771]) tensor([0.3333, 0.2542, 0.2459, 0.1666]) -Greedy action tensor([ 0.4048, -0.1189, -0.0643, -0.1763]) tensor([0.3601, 0.2133, 0.2252, 0.2014]) -Greedy action tensor([ 0.7800, -0.3459, 0.1816, -0.5114]) tensor([0.4653, 0.1509, 0.2558, 0.1279]) -Greedy action tensor([ 0.9045, -0.5655, -0.0944, -0.4899]) tensor([0.5416, 0.1245, 0.1995, 0.1343]) -Greedy action tensor([ 0.8024, -0.5157, -0.1238, -0.6151]) tensor([0.5247, 0.1404, 0.2078, 0.1271]) -Greedy action tensor([ 0.9187, -0.5841, -0.0055, -0.6322]) tensor([0.5460, 0.1215, 0.2167, 0.1158]) -Greedy action tensor([ 0.3258, 0.2017, -0.0940, -0.0621]) tensor([0.3106, 0.2744, 0.2042, 0.2108]) -Greedy action tensor([ 1.0074, -0.7078, 0.0554, -0.4591]) tensor([0.5566, 0.1002, 0.2148, 0.1284]) -Greedy action tensor([ 0.7373, -0.5397, -0.1368, -0.3639]) tensor([0.4930, 0.1375, 0.2057, 0.1639]) -Greedy action tensor([ 0.5862, -0.2194, -0.0223, -0.2984]) tensor([0.4160, 0.1859, 0.2264, 0.1718]) -Greedy action tensor([ 0.5223, 0.0733, -0.1936, -0.0856]) tensor([0.3743, 0.2389, 0.1829, 0.2038]) -Greedy action tensor([ 0.3909, 0.2083, 0.1939, -0.1991]) tensor([0.3117, 0.2596, 0.2559, 0.1728]) -Greedy action tensor([ 0.9462, -0.9594, 0.0986, -0.4402]) tensor([0.5473, 0.0814, 0.2345, 0.1368]) -Greedy action tensor([ 1.0043, -0.4236, 0.1324, -0.3705]) tensor([0.5233, 0.1255, 0.2188, 0.1323]) -Greedy action tensor([ 0.6028, -0.2362, -0.0731, -0.1762]) tensor([0.4167, 0.1801, 0.2120, 0.1912]) -Greedy action tensor([ 0.4386, -0.3149, -0.0803, -0.2878]) tensor([0.3922, 0.1846, 0.2335, 0.1897]) -Greedy action tensor([ 0.5521, -0.0406, -0.0322, -0.2216]) tensor([0.3888, 0.2150, 0.2168, 0.1794]) -Greedy action tensor([ 0.7970, -0.5564, 0.0969, -0.7629]) tensor([0.5089, 0.1315, 0.2527, 0.1069]) -Greedy action tensor([ 0.7606, -0.6471, 0.0949, -0.6249]) tensor([0.4978, 0.1218, 0.2558, 0.1245]) -Greedy action tensor([ 0.4574, -0.2328, -0.0618, -0.3637]) tensor([0.3943, 0.1977, 0.2346, 0.1734]) -Greedy action tensor([ 0.5346, -0.4075, 0.0803, -0.6091]) tensor([0.4267, 0.1663, 0.2709, 0.1360]) -Greedy action tensor([ 0.5301, -0.2678, -0.0419, -0.2569]) tensor([0.4049, 0.1823, 0.2285, 0.1843]) -Greedy action tensor([ 0.6129, -0.4230, -0.0764, -0.4037]) tensor([0.4507, 0.1600, 0.2262, 0.1631]) -Greedy action tensor([ 0.7406, -0.4025, -0.1686, -0.3610]) tensor([0.4868, 0.1552, 0.1961, 0.1618]) -Greedy action tensor([ 0.6828, -0.1837, -0.1747, -0.2562]) tensor([0.4473, 0.1880, 0.1898, 0.1749]) -Greedy action tensor([ 0.3924, -0.0868, 0.0194, -0.1674]) tensor([0.3473, 0.2151, 0.2392, 0.1984]) -Greedy action tensor([ 0.8211, -0.7030, -0.0090, -0.6756]) tensor([0.5326, 0.1160, 0.2322, 0.1192]) -Greedy action tensor([ 0.3861, -0.2510, -0.0954, -0.2837]) tensor([0.3762, 0.1989, 0.2324, 0.1925]) -Greedy action tensor([ 0.9503, -0.5011, 0.2155, -0.4027]) tensor([0.5070, 0.1188, 0.2432, 0.1310]) -Greedy action tensor([ 0.4268, 0.0480, -0.0122, -0.1523]) tensor([0.3460, 0.2369, 0.2231, 0.1939]) -Greedy action tensor([ 0.4353, -0.3495, -0.0694, -0.1983]) tensor([0.3860, 0.1761, 0.2330, 0.2048]) -Greedy action tensor([ 0.2288, -0.0844, -0.0666, -0.2713]) tensor([0.3245, 0.2372, 0.2415, 0.1968]) -Greedy action tensor([ 0.7428, -0.0484, 0.0507, -0.1829]) tensor([0.4255, 0.1929, 0.2130, 0.1686]) -Greedy action tensor([ 0.4631, -0.1153, 0.1307, -0.2530]) tensor([0.3614, 0.2027, 0.2592, 0.1766]) -Greedy action tensor([ 0.8422, -0.5765, -0.0379, -0.4145]) tensor([0.5151, 0.1247, 0.2136, 0.1466]) -Greedy action tensor([ 0.5566, -0.2753, -0.1319, -0.3103]) tensor([0.4241, 0.1846, 0.2130, 0.1782]) -Greedy action tensor([ 0.4277, -0.2162, -0.0606, -0.0646]) tensor([0.3636, 0.1910, 0.2231, 0.2222]) -Greedy action tensor([ 0.5641, -0.2048, -0.0304, -0.1110]) tensor([0.3961, 0.1836, 0.2186, 0.2017]) -Greedy action tensor([ 0.4701, -0.1380, -0.1044, -0.3839]) tensor([0.3948, 0.2149, 0.2223, 0.1681]) -Greedy action tensor([ 0.3520, -0.1842, 0.0380, -0.0528]) tensor([0.3353, 0.1961, 0.2449, 0.2237]) -Greedy action tensor([ 0.8864, -0.2496, -0.1625, -0.6319]) tensor([0.5290, 0.1699, 0.1853, 0.1159]) -Greedy action tensor([ 0.3988, -0.3682, -0.1224, -0.3466]) tensor([0.3948, 0.1834, 0.2345, 0.1874]) -Greedy action tensor([ 0.5545, -0.1955, 0.1435, -0.2952]) tensor([0.3902, 0.1843, 0.2587, 0.1668]) -Greedy action tensor([ 0.7076, -0.6611, -0.0511, -0.4921]) tensor([0.4941, 0.1257, 0.2314, 0.1489]) -Greedy action tensor([ 0.5010, -0.0797, -0.1089, -0.1140]) tensor([0.3783, 0.2117, 0.2056, 0.2045]) -Greedy action tensor([ 0.7042, -0.3313, -0.1846, -0.3250]) tensor([0.4709, 0.1672, 0.1936, 0.1683]) -Greedy action tensor([ 0.4635, -0.1230, 0.0527, -0.4786]) tensor([0.3833, 0.2132, 0.2541, 0.1494]) -Greedy action tensor([ 0.6250, -0.2823, -0.0607, -0.1939]) tensor([0.4258, 0.1719, 0.2145, 0.1878]) -Greedy action tensor([ 0.6034, -0.2319, 0.0945, -0.5263]) tensor([0.4241, 0.1839, 0.2550, 0.1370]) -Greedy action tensor([ 0.8308, -0.5680, -0.0444, -0.2887]) tensor([0.5025, 0.1241, 0.2094, 0.1640]) -Greedy action tensor([ 0.5374, -0.4159, -0.0860, -0.4515]) tensor([0.4360, 0.1681, 0.2337, 0.1622]) -Greedy action tensor([ 0.1473, 0.1796, -0.0477, -0.3148]) tensor([0.2869, 0.2963, 0.2361, 0.1807]) -Greedy action tensor([ 0.4457, -0.1961, -0.1437, -0.2548]) tensor([0.3880, 0.2042, 0.2152, 0.1926]) -Greedy action tensor([ 1.0171, -0.8908, 0.0269, -0.5130]) tensor([0.5759, 0.0855, 0.2139, 0.1247]) -Greedy action tensor([ 0.5561, -0.1729, -0.0766, -0.3348]) tensor([0.4126, 0.1990, 0.2191, 0.1693]) -Greedy action tensor([ 0.4462, -0.2605, -0.0202, -0.2372]) tensor([0.3809, 0.1879, 0.2389, 0.1923]) -Greedy action tensor([ 0.7082, -0.6312, -0.0315, -0.2459]) tensor([0.4707, 0.1233, 0.2247, 0.1813]) -Greedy action tensor([ 0.4441, -0.1343, -0.0490, -0.2097]) tensor([0.3715, 0.2083, 0.2269, 0.1932]) -Greedy action tensor([ 0.5354, -0.0527, 0.0577, -0.1728]) tensor([0.3748, 0.2082, 0.2325, 0.1846]) -Greedy action tensor([ 0.5242, 0.2983, 0.1177, -0.3686]) tensor([0.3480, 0.2777, 0.2318, 0.1425]) -Greedy action tensor([ 0.4687, -0.1385, -0.1318, -0.2879]) tensor([0.3902, 0.2126, 0.2140, 0.1831]) -Greedy action tensor([ 0.8583, -0.5836, -0.1551, -0.4333]) tensor([0.5335, 0.1262, 0.1937, 0.1466]) -Greedy action tensor([ 0.6171, -0.2336, -0.0643, -0.1907]) tensor([0.4204, 0.1795, 0.2127, 0.1874]) -Greedy action tensor([ 0.5874, -0.3471, -0.0365, -0.1085]) tensor([0.4120, 0.1618, 0.2208, 0.2054]) -Greedy action tensor([ 0.6885, -0.3006, -0.0285, -0.1552]) tensor([0.4366, 0.1624, 0.2132, 0.1878]) -Greedy action tensor([ 1.0410, -0.4364, -0.0040, -0.5398]) tensor([0.5600, 0.1278, 0.1969, 0.1152]) -Greedy action tensor([ 0.5828, -0.0099, -0.0878, -0.0336]) tensor([0.3840, 0.2123, 0.1964, 0.2073]) -Greedy action tensor([ 0.7603, -0.5218, -0.0263, -0.2906]) tensor([0.4802, 0.1332, 0.2187, 0.1679]) -Greedy action tensor([ 0.4782, -0.2383, -0.0895, -0.3215]) tensor([0.3992, 0.1950, 0.2263, 0.1795]) -Greedy action tensor([ 0.5867, -0.2437, 0.0642, -0.3540]) tensor([0.4133, 0.1802, 0.2451, 0.1613]) -Greedy action tensor([ 0.5011, -0.2025, -0.0614, -0.2118]) tensor([0.3914, 0.1937, 0.2230, 0.1919]) -Greedy action tensor([ 0.9699, -0.6526, -0.0578, -0.5732]) tensor([0.5653, 0.1116, 0.2023, 0.1208]) -Greedy action tensor([ 0.6881, -0.2199, -0.0657, -0.1233]) tensor([0.4314, 0.1740, 0.2030, 0.1916]) -Greedy action tensor([ 0.7093, -0.4270, -0.0652, -0.3068]) tensor([0.4664, 0.1497, 0.2150, 0.1688]) -Greedy action tensor([ 0.7185, -0.6337, 0.2136, -0.8152]) tensor([0.4812, 0.1245, 0.2905, 0.1038]) -Greedy action tensor([ 0.4863, -0.0644, 0.2176, -0.3306]) tensor([0.3594, 0.2072, 0.2747, 0.1588]) -Greedy action tensor([ 0.6122, -0.5468, -0.1217, -0.4193]) tensor([0.4650, 0.1459, 0.2232, 0.1658]) -Greedy action tensor([-1.4869, -0.6775, 1.1211, 1.0112]) tensor([0.0345, 0.0775, 0.4684, 0.4196]) -Greedy action tensor([-1.2678, 0.3609, 0.1260, 0.2669]) tensor([0.0677, 0.3452, 0.2729, 0.3142]) -Greedy action tensor([-1.2217, -0.8051, 1.2026, 1.4219]) tensor([0.0359, 0.0544, 0.4052, 0.5045]) -Greedy action tensor([-1.8284, -0.3987, 0.6397, -0.0750]) tensor([0.0440, 0.1836, 0.5186, 0.2538]) -Greedy action tensor([-1.7302, -0.4988, 0.5509, -0.0143]) tensor([0.0506, 0.1733, 0.4949, 0.2813]) -Greedy action tensor([-1.8161, -0.4873, 0.6513, -0.0258]) tensor([0.0443, 0.1674, 0.5227, 0.2656]) -Greedy action tensor([-1.0591, -0.5612, 0.3920, 0.7887]) tensor([0.0754, 0.1241, 0.3219, 0.4786]) -Greedy action tensor([-1.7435, -0.4615, 0.6093, 0.0157]) tensor([0.0478, 0.1722, 0.5025, 0.2775]) -Greedy action tensor([-1.9020, -0.3950, 0.6463, -0.1474]) tensor([0.0415, 0.1874, 0.5310, 0.2401]) -Greedy action tensor([-1.1903, 0.0815, 0.2795, 0.4470]) tensor([0.0711, 0.2538, 0.3093, 0.3658]) -Greedy action tensor([-0.9566, -0.3248, 0.5544, -0.5133]) tensor([0.1115, 0.2097, 0.5051, 0.1737]) -Greedy action tensor([-1.8913, -0.4608, 0.6488, -0.1404]) tensor([0.0423, 0.1770, 0.5369, 0.2438]) -Greedy action tensor([-1.2417, 0.6482, 0.1683, 0.2807]) tensor([0.0614, 0.4061, 0.2513, 0.2812]) -Greedy action tensor([-1.5562, -0.5218, 0.6095, 0.4011]) tensor([0.0510, 0.1434, 0.4446, 0.3610]) -Greedy action tensor([-1.5756, -0.5676, 0.4817, 0.0793]) tensor([0.0595, 0.1631, 0.4658, 0.3115]) -Greedy action tensor([-1.1399, -0.4825, 1.1551, 1.3502]) tensor([0.0401, 0.0775, 0.3983, 0.4841]) -Greedy action tensor([-1.8487, -0.4836, 0.6160, -0.1170]) tensor([0.0448, 0.1754, 0.5267, 0.2531]) -Greedy action tensor([-1.2326, -0.1449, 0.2749, 0.1674]) tensor([0.0798, 0.2367, 0.3601, 0.3234]) -Greedy action tensor([-2.0409, -0.8362, 0.6304, 0.0301]) tensor([0.0374, 0.1248, 0.5410, 0.2968]) -Greedy action tensor([-1.2943, -0.5625, 0.4266, 0.2177]) tensor([0.0757, 0.1574, 0.4233, 0.3435]) -Greedy action tensor([-1.6602, -0.5357, 0.5283, 0.0195]) tensor([0.0545, 0.1676, 0.4858, 0.2921]) -Greedy action tensor([-1.8996, -0.5793, 0.8528, -0.0668]) tensor([0.0375, 0.1404, 0.5878, 0.2343]) -Greedy action tensor([-1.3085, -0.5344, 0.7358, 0.8699]) tensor([0.0507, 0.1099, 0.3916, 0.4478]) -Greedy action tensor([-1.7479, -0.5213, 0.5907, -0.0382]) tensor([0.0493, 0.1679, 0.5106, 0.2722]) -Greedy action tensor([-1.7496e+00, -2.8225e-01, 5.9843e-01, 7.9393e-04]) tensor([0.0464, 0.2012, 0.4854, 0.2670]) -Greedy action tensor([-1.2438, -0.5559, 0.2959, 0.4027]) tensor([0.0779, 0.1549, 0.3631, 0.4041]) -Greedy action tensor([-1.7195, -0.4731, 0.6512, 0.2032]) tensor([0.0454, 0.1579, 0.4861, 0.3106]) -Greedy action tensor([-1.2814, -0.5951, 0.3970, 0.2623]) tensor([0.0768, 0.1525, 0.4113, 0.3594]) -Greedy action tensor([-0.5244, 0.2879, 0.4625, 1.4093]) tensor([0.0778, 0.1753, 0.2088, 0.5381]) -Greedy action tensor([-1.3794, 0.6086, 0.2116, 0.1597]) tensor([0.0560, 0.4086, 0.2747, 0.2608]) -Greedy action tensor([-0.2965, -0.2067, 1.1035, 1.5773]) tensor([0.0790, 0.0864, 0.3203, 0.5144]) -Greedy action tensor([-1.7931, -0.3716, 0.5975, -0.0647]) tensor([0.0461, 0.1910, 0.5033, 0.2596]) -Greedy action tensor([-1.8191, -0.4476, 0.6214, -0.0785]) tensor([0.0452, 0.1782, 0.5189, 0.2577]) -Greedy action tensor([-1.7394, -0.7824, -0.0206, -0.4819]) tensor([0.0787, 0.2051, 0.4392, 0.2770]) -Greedy action tensor([-1.1684, -0.5215, 0.8129, 1.0291]) tensor([0.0522, 0.0996, 0.3784, 0.4697]) -Greedy action tensor([-0.5242, -0.6472, 0.5373, 0.0286]) tensor([0.1535, 0.1358, 0.4438, 0.2669]) -Greedy action tensor([-1.8806, -0.4362, 0.6297, -0.1460]) tensor([0.0431, 0.1826, 0.5302, 0.2441]) -Greedy action tensor([-1.1757, -0.6242, 0.2701, 0.2821]) tensor([0.0887, 0.1539, 0.3764, 0.3810]) -Greedy action tensor([-1.9048, -0.4606, 0.6620, -0.1279]) tensor([0.0414, 0.1753, 0.5388, 0.2445]) -Greedy action tensor([-1.8917, -0.4186, 0.6353, -0.1745]) tensor([0.0426, 0.1861, 0.5338, 0.2375]) -Greedy action tensor([-1.8757, -0.3544, 0.6182, -0.1375]) tensor([0.0428, 0.1959, 0.5180, 0.2433]) -Greedy action tensor([-1.8099, -0.3084, 0.5809, -0.0728]) tensor([0.0453, 0.2032, 0.4944, 0.2571]) -Greedy action tensor([-1.8389, -0.4890, 0.6171, -0.1116]) tensor([0.0452, 0.1742, 0.5265, 0.2541]) -Greedy action tensor([-0.9997, 0.6738, -0.1233, -0.6512]) tensor([0.0985, 0.5252, 0.2367, 0.1396]) -Greedy action tensor([-0.7888, -0.5512, 0.4481, 0.2223]) tensor([0.1182, 0.1499, 0.4071, 0.3248]) -Greedy action tensor([-1.7042, -0.5798, 1.4494, 1.0523]) tensor([0.0231, 0.0712, 0.5416, 0.3641]) -Greedy action tensor([-1.3801, -0.5538, 0.3601, 0.1890]) tensor([0.0725, 0.1657, 0.4134, 0.3484]) -Greedy action tensor([-1.5811, -0.5003, 0.4719, 0.1593]) tensor([0.0573, 0.1690, 0.4468, 0.3269]) -Greedy action tensor([-0.9240, -0.4977, 0.4693, -0.3322]) tensor([0.1195, 0.1831, 0.4814, 0.2160]) -Greedy action tensor([-1.6930, -0.5278, 0.5374, -0.0197]) tensor([0.0531, 0.1702, 0.4938, 0.2829]) -Greedy action tensor([-1.4936, -0.0212, 0.3354, 0.1484]) tensor([0.0597, 0.2602, 0.3717, 0.3083]) -Greedy action tensor([-1.3186, -0.5565, 1.2821, 1.2930]) tensor([0.0331, 0.0709, 0.4456, 0.4505]) -Greedy action tensor([-1.1123, -0.5928, 0.3352, -0.0113]) tensor([0.1006, 0.1691, 0.4278, 0.3025]) -Greedy action tensor([-0.2578, -0.2312, 0.1757, 0.1712]) tensor([0.1959, 0.2011, 0.3022, 0.3008]) -Greedy action tensor([-1.7164, -0.4545, 1.3282, 0.9220]) tensor([0.0253, 0.0894, 0.5313, 0.3540]) -Greedy action tensor([-1.1676, 0.0108, -0.0012, -0.6367]) tensor([0.1092, 0.3547, 0.3505, 0.1856]) -Greedy action tensor([-1.8601, -0.4719, 0.6254, -0.1293]) tensor([0.0441, 0.1769, 0.5299, 0.2491]) -Greedy action tensor([-1.2024, -0.2500, 0.6079, -0.5605]) tensor([0.0862, 0.2234, 0.5267, 0.1637]) -Greedy action tensor([-1.4467, -0.5386, 0.5252, 0.3622]) tensor([0.0596, 0.1479, 0.4285, 0.3640]) -Greedy action tensor([-1.6879, -0.3548, 0.5674, 0.0363]) tensor([0.0502, 0.1902, 0.4784, 0.2813]) -Greedy action tensor([-1.4587, -0.7540, -0.2476, -0.5039]) tensor([0.1114, 0.2253, 0.3739, 0.2894]) -Greedy action tensor([-1.1430, -0.5677, 0.9029, 1.1121]) tensor([0.0499, 0.0887, 0.3858, 0.4756]) -Greedy action tensor([-1.4974, -0.3546, 0.7176, 0.5354]) tensor([0.0478, 0.1498, 0.4377, 0.3648]) -Greedy action tensor([-1.8691, -0.4710, 0.6409, -0.1040]) tensor([0.0431, 0.1745, 0.5305, 0.2519]) -Greedy action tensor([-1.3766, -0.6009, 0.3812, 0.1531]) tensor([0.0736, 0.1598, 0.4268, 0.3398]) -Greedy action tensor([-1.5933, -0.3190, 0.4733, 0.0989]) tensor([0.0558, 0.1997, 0.4411, 0.3034]) -Greedy action tensor([-1.7994, -0.4242, 0.5971, -0.0653]) tensor([0.0463, 0.1831, 0.5085, 0.2621]) -Greedy action tensor([-1.1188, -0.9894, 0.7467, 1.3597]) tensor([0.0487, 0.0555, 0.3148, 0.5810]) -Greedy action tensor([-1.7861, -0.4136, 1.2836, 0.8458]) tensor([0.0248, 0.0977, 0.5333, 0.3442]) -Greedy action tensor([-1.8135, -0.4664, 0.6125, -0.0852]) tensor([0.0459, 0.1765, 0.5192, 0.2584]) -Greedy action tensor([-1.8748, -0.5002, 1.0399, 0.3305]) tensor([0.0308, 0.1218, 0.5680, 0.2794]) -Greedy action tensor([-2.0502, -0.7801, 0.8084, 0.1312]) tensor([0.0324, 0.1154, 0.5651, 0.2871]) -Greedy action tensor([-1.8003, -0.4559, 0.7070, 0.0479]) tensor([0.0426, 0.1635, 0.5232, 0.2706]) -Greedy action tensor([-1.7003, -0.4784, 0.6235, 0.1468]) tensor([0.0477, 0.1620, 0.4876, 0.3027]) -Greedy action tensor([-1.6967, -0.4785, 0.6748, 0.1767]) tensor([0.0463, 0.1565, 0.4959, 0.3013]) -Greedy action tensor([-1.7328, -0.6677, 0.3677, -0.1692]) tensor([0.0594, 0.1722, 0.4850, 0.2835]) -Greedy action tensor([-1.6648, -0.4998, 0.5959, 0.0514]) tensor([0.0517, 0.1656, 0.4954, 0.2874]) -Greedy action tensor([-1.2388, -0.5967, 0.3222, 0.2249]) tensor([0.0834, 0.1586, 0.3974, 0.3606]) -Greedy action tensor([-0.6680, -0.5721, 0.2058, 0.3848]) tensor([0.1358, 0.1495, 0.3254, 0.3892]) -Greedy action tensor([-1.7809, -0.6583, 0.0351, -0.3281]) tensor([0.0690, 0.2120, 0.4241, 0.2949]) -Greedy action tensor([-1.6552, -0.5148, 0.5292, -0.0108]) tensor([0.0550, 0.1719, 0.4885, 0.2846]) -Greedy action tensor([ 1.2878, -0.7323, -0.0991, -0.0778]) tensor([0.6106, 0.0810, 0.1526, 0.1558]) -Greedy action tensor([ 1.5721, -0.8230, -0.3182, 0.5025]) tensor([0.6308, 0.0575, 0.0953, 0.2164]) -Greedy action tensor([ 1.0166, -0.3850, -0.3468, 0.4175]) tensor([0.4875, 0.1200, 0.1247, 0.2678]) -Greedy action tensor([ 0.5617, -0.2212, 0.1193, -0.0364]) tensor([0.3775, 0.1725, 0.2425, 0.2075]) -Greedy action tensor([ 1.1144, -0.2679, -0.5508, 0.2625]) tensor([0.5357, 0.1345, 0.1013, 0.2285]) -Greedy action tensor([ 0.3818, -0.3850, -0.1446, 0.3082]) tensor([0.3351, 0.1556, 0.1979, 0.3113]) -Greedy action tensor([ 1.1747, -0.4282, -0.3780, -0.0083]) tensor([0.5816, 0.1171, 0.1231, 0.1782]) -Greedy action tensor([ 1.0697, -0.3384, -0.1502, 0.2814]) tensor([0.5014, 0.1226, 0.1480, 0.2279]) -Greedy action tensor([ 1.0533, -0.5020, 0.0312, -0.1841]) tensor([0.5373, 0.1134, 0.1933, 0.1559]) -Greedy action tensor([ 1.0755, -0.7509, -0.1639, 0.2310]) tensor([0.5318, 0.0856, 0.1540, 0.2286]) -Greedy action tensor([ 2.0790, -0.7466, -0.3768, 0.6507]) tensor([0.7221, 0.0428, 0.0620, 0.1731]) -Greedy action tensor([ 1.1117, -0.1275, -0.2584, 0.3796]) tensor([0.4939, 0.1430, 0.1255, 0.2375]) -Greedy action tensor([ 1.3009, -0.6729, -0.2247, 0.0504]) tensor([0.6087, 0.0846, 0.1324, 0.1743]) -Greedy action tensor([ 1.3734, -0.2520, -0.5167, 0.2159]) tensor([0.6016, 0.1184, 0.0909, 0.1891]) -Greedy action tensor([ 1.7980, -0.4683, -0.2750, 0.5077]) tensor([0.6646, 0.0689, 0.0836, 0.1829]) -Greedy action tensor([ 1.1556, -0.2946, -0.1081, 0.1482]) tensor([0.5313, 0.1246, 0.1501, 0.1940]) -Greedy action tensor([ 1.4480, -0.4490, 0.0695, 0.1311]) tensor([0.5988, 0.0898, 0.1509, 0.1605]) -Greedy action tensor([ 0.8728, -0.1888, -0.1454, 0.3406]) tensor([0.4358, 0.1508, 0.1575, 0.2560]) -Greedy action tensor([ 1.6647, -0.7979, 0.0501, 0.4845]) tensor([0.6284, 0.0535, 0.1250, 0.1930]) -Greedy action tensor([ 1.2170, -0.6477, -0.5096, 0.2674]) tensor([0.5815, 0.0901, 0.1034, 0.2250]) -Greedy action tensor([ 1.4630, -0.4435, -0.3196, 0.3220]) tensor([0.6111, 0.0908, 0.1028, 0.1953]) -Greedy action tensor([ 1.8502, -1.0214, -0.4571, 0.6220]) tensor([0.6901, 0.0391, 0.0687, 0.2021]) -Greedy action tensor([ 0.8708, -0.3704, -0.0209, 0.0064]) tensor([0.4716, 0.1363, 0.1933, 0.1987]) -Greedy action tensor([ 0.9704, -0.4361, -0.2321, 0.2810]) tensor([0.4885, 0.1197, 0.1468, 0.2451]) -Greedy action tensor([ 1.0883, -0.6931, -0.0920, 0.2847]) tensor([0.5199, 0.0876, 0.1597, 0.2328]) -Greedy action tensor([ 0.7979, -0.2292, -0.3816, 0.3899]) tensor([0.4291, 0.1536, 0.1319, 0.2853]) -Greedy action tensor([ 0.6322, -0.0130, -0.0033, -0.0251]) tensor([0.3887, 0.2039, 0.2059, 0.2015]) -Greedy action tensor([ 1.4637e+00, -2.9999e-01, 5.0222e-02, -4.9263e-05]) tensor([0.6075, 0.1041, 0.1478, 0.1406]) -Greedy action tensor([ 0.6093, -0.0357, -0.0820, 0.0151]) tensor([0.3880, 0.2035, 0.1943, 0.2142]) -Greedy action tensor([ 1.4327, -0.3359, -0.2409, 0.2560]) tensor([0.6001, 0.1024, 0.1126, 0.1850]) -Greedy action tensor([ 1.3415, -0.7017, -0.1913, 0.4948]) tensor([0.5636, 0.0730, 0.1217, 0.2417]) -Greedy action tensor([ 1.4096, -0.9057, -0.2492, 0.6139]) tensor([0.5746, 0.0567, 0.1094, 0.2593]) -Greedy action tensor([ 0.3759, -0.1615, 0.0350, 0.0451]) tensor([0.3318, 0.1939, 0.2360, 0.2384]) -Greedy action tensor([ 1.4019, -0.2684, -0.5381, 0.1517]) tensor([0.6179, 0.1163, 0.0888, 0.1770]) -Greedy action tensor([ 0.9218, -0.0974, -0.1151, 0.2450]) tensor([0.4497, 0.1623, 0.1594, 0.2285]) -Greedy action tensor([ 0.7982, -0.6246, -0.2607, 0.3720]) tensor([0.4463, 0.1076, 0.1548, 0.2914]) -Greedy action tensor([ 0.9581, -0.5148, -0.1147, 0.2224]) tensor([0.4877, 0.1118, 0.1668, 0.2337]) -Greedy action tensor([ 1.5120, -0.0389, -0.1771, 0.4115]) tensor([0.5782, 0.1226, 0.1068, 0.1924]) -Greedy action tensor([ 0.9812, -0.2888, -0.4168, 0.3591]) tensor([0.4843, 0.1360, 0.1197, 0.2600]) -Greedy action tensor([ 1.7922, -0.6125, -0.4273, 0.1360]) tensor([0.7195, 0.0650, 0.0782, 0.1373]) -Greedy action tensor([ 0.8392, -0.6173, -0.2402, 0.4620]) tensor([0.4427, 0.1032, 0.1504, 0.3036]) -Greedy action tensor([ 1.0579, -0.5378, -0.1800, 0.1492]) tensor([0.5275, 0.1070, 0.1530, 0.2126]) -Greedy action tensor([ 1.4232, -0.5820, -0.4849, 0.2023]) tensor([0.6337, 0.0853, 0.0940, 0.1869]) -Greedy action tensor([ 0.7292, -0.2954, -0.2392, 0.2474]) tensor([0.4244, 0.1523, 0.1611, 0.2621]) -Greedy action tensor([ 1.2033, -0.3286, -0.0312, 0.0816]) tensor([0.5456, 0.1179, 0.1588, 0.1777]) -Greedy action tensor([ 1.2317, -0.6041, -0.2547, -0.0073]) tensor([0.5969, 0.0952, 0.1350, 0.1729]) -Greedy action tensor([ 0.9625, -0.2208, -0.0649, -0.1442]) tensor([0.5013, 0.1535, 0.1794, 0.1657]) -Greedy action tensor([ 1.1257, -0.0358, -0.0972, 0.2340]) tensor([0.4957, 0.1552, 0.1459, 0.2032]) -Greedy action tensor([ 1.7039, -0.7209, -0.3683, 0.3271]) tensor([0.6818, 0.0603, 0.0858, 0.1721]) -Greedy action tensor([ 1.4674, -0.8698, -0.1474, 0.5855]) tensor([0.5849, 0.0565, 0.1164, 0.2422]) -Greedy action tensor([ 1.4910, -0.0490, -0.2659, 0.1216]) tensor([0.6093, 0.1306, 0.1052, 0.1549]) -Greedy action tensor([ 1.2112, -0.5360, -0.3383, 0.4564]) tensor([0.5386, 0.0939, 0.1144, 0.2532]) -Greedy action tensor([ 1.1935, -0.0887, -0.4876, -0.1958]) tensor([0.5838, 0.1620, 0.1087, 0.1455]) -Greedy action tensor([ 1.1081, -0.0976, 0.0822, 0.0541]) tensor([0.4984, 0.1492, 0.1787, 0.1737]) -Greedy action tensor([ 1.7608, -0.7324, -0.3108, 0.6145]) tensor([0.6551, 0.0541, 0.0825, 0.2082]) -Greedy action tensor([ 0.9481, -0.6623, -0.0602, 0.1899]) tensor([0.4919, 0.0983, 0.1794, 0.2304]) -Greedy action tensor([ 0.6038, -0.4400, 0.1140, -0.1221]) tensor([0.4084, 0.1438, 0.2502, 0.1976]) -Greedy action tensor([ 1.5958, -0.5120, -0.0772, 0.2161]) tensor([0.6407, 0.0778, 0.1202, 0.1612]) -Greedy action tensor([ 1.4944, -0.5272, -0.2983, 0.1700]) tensor([0.6390, 0.0846, 0.1064, 0.1700]) -Greedy action tensor([ 0.9309, -0.2911, -0.4806, 0.1325]) tensor([0.5029, 0.1482, 0.1226, 0.2263]) -Greedy action tensor([ 0.9892, -0.5335, -0.1885, 0.3189]) tensor([0.4908, 0.1070, 0.1511, 0.2510]) -Greedy action tensor([ 0.9106, -0.0317, -0.2711, 0.2569]) tensor([0.4511, 0.1758, 0.1384, 0.2346]) -Greedy action tensor([ 1.6550, -0.6266, -0.4294, 0.7137]) tensor([0.6186, 0.0632, 0.0769, 0.2413]) -Greedy action tensor([ 0.6064, -0.3602, -0.2459, -0.2798]) tensor([0.4506, 0.1714, 0.1922, 0.1858]) -Greedy action tensor([ 0.7125, -0.4954, -0.1622, -0.0244]) tensor([0.4557, 0.1362, 0.1900, 0.2181]) -Greedy action tensor([ 1.2253, -0.4582, 0.2170, 0.0381]) tensor([0.5389, 0.1001, 0.1966, 0.1644]) -Greedy action tensor([ 0.8330, -0.4036, -0.3445, 0.0015]) tensor([0.4917, 0.1428, 0.1515, 0.2141]) -Greedy action tensor([ 1.1595, -0.3754, -0.1738, 0.5540]) tensor([0.4939, 0.1064, 0.1302, 0.2695]) -Greedy action tensor([ 1.1535, -0.3451, -0.4999, 0.1618]) tensor([0.5600, 0.1251, 0.1072, 0.2077]) -Greedy action tensor([ 1.5938, -0.4068, -0.2635, 0.0905]) tensor([0.6606, 0.0893, 0.1031, 0.1469]) -Greedy action tensor([ 0.2796, -0.2629, 0.0096, -0.0477]) tensor([0.3262, 0.1896, 0.2490, 0.2351]) -Greedy action tensor([ 1.1104, -0.3611, -0.3661, 0.4210]) tensor([0.5102, 0.1171, 0.1165, 0.2561]) -Greedy action tensor([ 1.5838, -0.4300, -0.0723, 0.1101]) tensor([0.6437, 0.0859, 0.1229, 0.1475]) -Greedy action tensor([ 0.8217, -0.3022, -0.0760, -0.0525]) tensor([0.4652, 0.1512, 0.1896, 0.1941]) -Greedy action tensor([ 1.8340, -0.6539, -0.3652, 0.3751]) tensor([0.7010, 0.0582, 0.0777, 0.1630]) -Greedy action tensor([ 1.3673, -0.2669, -0.3862, 0.0921]) tensor([0.6069, 0.1184, 0.1051, 0.1696]) -Greedy action tensor([ 1.0714, -0.4911, -0.3252, 0.4785]) tensor([0.4976, 0.1043, 0.1231, 0.2750]) -Greedy action tensor([ 0.8382, -0.1738, -0.0583, 0.3601]) tensor([0.4182, 0.1520, 0.1706, 0.2592]) -Greedy action tensor([ 1.7789, -1.0047, -0.4764, 0.3542]) tensor([0.7106, 0.0439, 0.0745, 0.1710]) -Greedy action tensor([ 0.8890, -0.1399, -0.1719, 0.3525]) tensor([0.4370, 0.1562, 0.1513, 0.2555]) -Greedy action tensor([ 1.0099, -0.5152, -0.1031, 0.2679]) tensor([0.4945, 0.1076, 0.1625, 0.2355]) -Greedy action tensor([ 1.7524, -0.3029, 1.3976, -0.4932]) tensor([0.5167, 0.0662, 0.3624, 0.0547]) -Greedy action tensor([0.6842, 0.1211, 0.0634, 0.5700]) tensor([0.3334, 0.1899, 0.1792, 0.2975]) -Greedy action tensor([1.6291, 0.2766, 0.0804, 0.5065]) tensor([0.5566, 0.1439, 0.1183, 0.1811]) -Greedy action tensor([-0.6993, -0.6015, -0.0276, 1.4605]) tensor([0.0786, 0.0866, 0.1538, 0.6810]) -Greedy action tensor([-1.5423, 1.0646, 0.0943, 0.0820]) tensor([0.0404, 0.5473, 0.2074, 0.2049]) -Greedy action tensor([ 1.6647, 0.9150, -0.4811, 0.9254]) tensor([0.4838, 0.2286, 0.0566, 0.2310]) -Greedy action tensor([1.1508, 0.1143, 1.5282, 0.1630]) tensor([0.3139, 0.1113, 0.4578, 0.1169]) -Greedy action tensor([ 0.5007, -1.7153, 0.8991, -0.6444]) tensor([0.3429, 0.0374, 0.5107, 0.1091]) -Greedy action tensor([ 0.1528, -0.3880, 0.6238, -0.4353]) tensor([0.2674, 0.1557, 0.4283, 0.1485]) -Greedy action tensor([-0.4654, -0.1186, -0.1955, 0.0389]) tensor([0.1859, 0.2629, 0.2434, 0.3078]) -Greedy action tensor([0.6310, 0.0915, 0.4470, 0.1229]) tensor([0.3315, 0.1933, 0.2758, 0.1994]) -Greedy action tensor([-0.2055, -2.2064, 0.2965, 1.0954]) tensor([0.1548, 0.0209, 0.2557, 0.5685]) -Greedy action tensor([ 0.0144, 0.1711, -0.3721, 1.8289]) tensor([0.1113, 0.1302, 0.0756, 0.6830]) -Greedy action tensor([ 0.4757, 0.4487, -0.1339, 0.4862]) tensor([0.2835, 0.2759, 0.1541, 0.2865]) -Greedy action tensor([ 0.1751, 0.1813, -0.3336, -0.0067]) tensor([0.2906, 0.2924, 0.1747, 0.2423]) -Greedy action tensor([ 0.4770, -0.5165, -0.7266, 0.4356]) tensor([0.3803, 0.1408, 0.1141, 0.3648]) -Greedy action tensor([ 0.4319, 0.3929, 0.1416, -0.0947]) tensor([0.3030, 0.2914, 0.2267, 0.1789]) -Greedy action tensor([0.3657, 0.0227, 0.1124, 0.6216]) tensor([0.2647, 0.1879, 0.2055, 0.3419]) -Greedy action tensor([ 1.0135, 0.2077, 1.2116, -0.4783]) tensor([0.3459, 0.1545, 0.4217, 0.0778]) -Greedy action tensor([ 0.1252, -0.0942, 1.3968, 0.4540]) tensor([0.1480, 0.1188, 0.5277, 0.2056]) -Greedy action tensor([ 0.5087, -0.6797, 1.1476, 0.8384]) tensor([0.2179, 0.0664, 0.4128, 0.3030]) -Greedy action tensor([-0.4591, -0.0050, 1.6693, 0.3157]) tensor([0.0761, 0.1198, 0.6391, 0.1651]) -Greedy action tensor([-0.0883, 0.6032, 0.7885, -0.1586]) tensor([0.1579, 0.3153, 0.3795, 0.1472]) -Greedy action tensor([-0.6103, 0.7396, 0.7953, 1.4655]) tensor([0.0592, 0.2281, 0.2412, 0.4715]) -Greedy action tensor([ 0.6685, -0.4725, 0.0326, -0.0999]) tensor([0.4324, 0.1381, 0.2289, 0.2005]) -Greedy action tensor([ 0.0383, 0.0521, 1.0442, -0.1461]) tensor([0.1792, 0.1817, 0.4900, 0.1490]) -Greedy action tensor([0.2864, 0.1983, 0.2978, 1.3554]) tensor([0.1712, 0.1568, 0.1732, 0.4987]) -Greedy action tensor([ 0.5300, -0.1245, 1.2661, 0.4973]) tensor([0.2186, 0.1136, 0.4563, 0.2115]) -Greedy action tensor([ 1.2214, -1.0829, 0.3037, 1.4688]) tensor([0.3597, 0.0359, 0.1437, 0.4607]) -Greedy action tensor([0.2836, 0.9444, 0.0431, 1.0806]) tensor([0.1683, 0.3259, 0.1323, 0.3735]) -Greedy action tensor([-0.5306, -0.9091, 0.0142, -0.5468]) tensor([0.2276, 0.1559, 0.3925, 0.2240]) -Greedy action tensor([-1.2292, 0.2098, -0.0683, 0.0550]) tensor([0.0832, 0.3508, 0.2656, 0.3005]) -Greedy action tensor([ 1.5991, -0.1457, -0.4776, 0.2832]) tensor([0.6376, 0.1114, 0.0799, 0.1710]) -Greedy action tensor([ 0.0173, -1.2167, 1.0539, -0.2678]) tensor([0.2056, 0.0599, 0.5798, 0.1546]) -Greedy action tensor([-0.2142, -0.0418, 0.3797, -0.2256]) tensor([0.2005, 0.2382, 0.3631, 0.1982]) -Greedy action tensor([ 0.7272, -0.0170, -0.1697, 0.6368]) tensor([0.3576, 0.1699, 0.1458, 0.3267]) -Greedy action tensor([-0.8950, 0.5857, 0.1253, -0.8137]) tensor([0.1080, 0.4750, 0.2997, 0.1172]) -Greedy action tensor([ 0.5083, -0.2429, 1.2732, -0.1439]) tensor([0.2415, 0.1139, 0.5188, 0.1258]) -Greedy action tensor([ 0.5877, -1.3693, 0.6741, 0.4663]) tensor([0.3208, 0.0453, 0.3498, 0.2841]) -Greedy action tensor([ 1.1625, -0.2016, -0.3421, 1.0954]) tensor([0.4145, 0.1059, 0.0920, 0.3875]) -Greedy action tensor([ 1.1858, -1.6211, 0.2620, 0.1997]) tensor([0.5463, 0.0330, 0.2169, 0.2038]) -Greedy action tensor([-1.1031, -2.7442, -0.2614, 0.3393]) tensor([0.1291, 0.0250, 0.2996, 0.5463]) -Greedy action tensor([-0.2893, 0.3891, 1.4273, -0.7853]) tensor([0.1093, 0.2155, 0.6086, 0.0666]) -Greedy action tensor([-0.4201, -1.2673, -0.5383, 1.8354]) tensor([0.0843, 0.0361, 0.0749, 0.8046]) -Greedy action tensor([ 0.4266, -1.0379, -0.3118, 0.4667]) tensor([0.3636, 0.0841, 0.1738, 0.3785]) -Greedy action tensor([ 0.5636, -1.1499, 0.1687, 1.6023]) tensor([0.2137, 0.0385, 0.1440, 0.6038]) -Greedy action tensor([-0.3785, 0.2007, -0.4417, -0.2158]) tensor([0.2041, 0.3642, 0.1916, 0.2401]) -Greedy action tensor([-1.4368, 0.0731, -0.9199, -0.7035]) tensor([0.1077, 0.4875, 0.1806, 0.2242]) -Greedy action tensor([-0.7663, -1.2653, -0.0328, 1.9101]) tensor([0.0549, 0.0333, 0.1143, 0.7975]) -Greedy action tensor([ 0.4738, -2.0491, -0.7194, -0.2708]) tensor([0.5381, 0.0432, 0.1632, 0.2556]) -Greedy action tensor([ 0.3755, 0.1432, -0.3108, 1.8297]) tensor([0.1520, 0.1205, 0.0765, 0.6509]) -Greedy action tensor([-0.5402, -1.3218, 1.5797, -1.2171]) tensor([0.0971, 0.0445, 0.8091, 0.0494]) -Greedy action tensor([-0.0854, -1.9524, -0.0907, 1.0671]) tensor([0.1881, 0.0291, 0.1871, 0.5957]) -Greedy action tensor([-0.2783, 0.6989, -0.3589, 0.7579]) tensor([0.1352, 0.3591, 0.1247, 0.3810]) -Greedy action tensor([ 1.0540, 0.2104, 0.4355, -0.5213]) tensor([0.4596, 0.1977, 0.2476, 0.0951]) -Greedy action tensor([ 1.5310, -1.2643, 0.0792, 0.7589]) tensor([0.5691, 0.0348, 0.1332, 0.2629]) -Greedy action tensor([ 0.4514, -1.0283, 0.1632, 0.5225]) tensor([0.3278, 0.0746, 0.2457, 0.3519]) -Greedy action tensor([ 0.5187, -1.1081, -0.4842, 0.6612]) tensor([0.3681, 0.0724, 0.1350, 0.4245]) -Greedy action tensor([1.3529, 0.0323, 1.2006, 0.3811]) tensor([0.3994, 0.1066, 0.3429, 0.1511]) -Greedy action tensor([ 0.9672, -0.9356, 0.3064, 1.8393]) tensor([0.2465, 0.0368, 0.1273, 0.5895]) -Greedy action tensor([-0.1438, 0.3037, -0.1812, 0.2808]) tensor([0.1978, 0.3094, 0.1905, 0.3024]) -Greedy action tensor([ 1.6213, -0.4609, 1.3645, 0.6120]) tensor([0.4420, 0.0551, 0.3419, 0.1611]) -Greedy action tensor([ 0.4280, -0.1951, -0.2957, 0.5861]) tensor([0.3132, 0.1680, 0.1519, 0.3669]) -Greedy action tensor([ 1.2294, -0.8526, 0.4621, 1.2449]) tensor([0.3839, 0.0479, 0.1783, 0.3899]) -Greedy action tensor([ 0.4895, 0.0246, -0.0967, -0.1270]) tensor([0.3671, 0.2306, 0.2042, 0.1981]) -Greedy action tensor([-0.0649, -0.7515, 1.0724, 0.4390]) tensor([0.1593, 0.0802, 0.4968, 0.2637]) -Greedy action tensor([ 0.2119, -1.2796, 0.5311, 0.6115]) tensor([0.2444, 0.0550, 0.3362, 0.3644]) -Greedy action tensor([ 1.3052, -0.8681, -0.2143, 0.1165]) tensor([0.6108, 0.0695, 0.1336, 0.1860]) -Greedy action tensor([ 0.8544, -1.6856, 1.3864, 0.0333]) tensor([0.3105, 0.0245, 0.5285, 0.1366]) -Greedy action tensor([-0.1021, -0.7039, 1.9659, 1.1814]) tensor([0.0765, 0.0419, 0.6053, 0.2762]) -Greedy action tensor([ 0.0807, -0.7404, 0.8422, -0.0495]) tensor([0.2242, 0.0987, 0.4802, 0.1969]) -Greedy action tensor([-1.1477, -1.3000, -0.9707, 0.3997]) tensor([0.1290, 0.1108, 0.1540, 0.6062]) -Greedy action tensor([1.0183, 1.0452, 0.8111, 0.8070]) tensor([0.2740, 0.2815, 0.2227, 0.2218]) -Greedy action tensor([-0.7187, -2.3466, 0.2662, 0.3039]) tensor([0.1503, 0.0295, 0.4024, 0.4178]) -Greedy action tensor([0.3312, 0.1047, 0.0011, 0.8431]) tensor([0.2390, 0.1905, 0.1718, 0.3987]) -Greedy action tensor([ 0.8171, -0.0980, 1.7804, 0.9939]) tensor([0.1918, 0.0768, 0.5025, 0.2289]) -Greedy action tensor([ 0.9102, -0.2155, -0.1424, 1.3168]) tensor([0.3149, 0.1022, 0.1099, 0.4730]) -Greedy action tensor([ 0.4957, -1.4263, 1.1483, 0.7549]) tensor([0.2292, 0.0335, 0.4402, 0.2970]) -Greedy action tensor([ 0.2560, -0.7094, 1.5014, 2.0180]) tensor([0.0936, 0.0357, 0.3253, 0.5454]) -Greedy action tensor([ 1.0898, -1.4893, 0.5072, 0.3911]) tensor([0.4692, 0.0356, 0.2620, 0.2333]) -Greedy action tensor([ 1.1200, -0.3622, 0.0341, 1.1075]) tensor([0.3918, 0.0890, 0.1323, 0.3869]) -Greedy action tensor([-0.2519, 0.5702, 1.9615, -0.8252]) tensor([0.0770, 0.1752, 0.7044, 0.0434]) -Greedy action tensor([ 0.5840, -0.0453, 0.1614, -0.1966]) tensor([0.3779, 0.2014, 0.2476, 0.1731]) -Greedy action tensor([ 0.2456, 0.1523, -0.0699, -0.2398]) tensor([0.3071, 0.2798, 0.2240, 0.1890]) -Greedy action tensor([ 0.2755, -0.1010, -0.0240, -0.1900]) tensor([0.3273, 0.2246, 0.2426, 0.2055]) -Greedy action tensor([ 0.3880, 0.1109, -0.0904, -0.1873]) tensor([0.3401, 0.2578, 0.2108, 0.1913]) -Greedy action tensor([ 0.1785, 0.1744, 0.1002, -0.2408]) tensor([0.2795, 0.2783, 0.2584, 0.1838]) -Greedy action tensor([ 1.0159, -0.7644, -0.0240, -0.5545]) tensor([0.5780, 0.0974, 0.2043, 0.1202]) -Greedy action tensor([ 0.2650, 0.3071, -0.0404, 0.1650]) tensor([0.2714, 0.2831, 0.2000, 0.2456]) -Greedy action tensor([ 1.2948, -1.3553, -0.0327, -0.7200]) tensor([0.6807, 0.0481, 0.1805, 0.0908]) -Greedy action tensor([ 0.3910, 0.0036, -0.0790, -0.4376]) tensor([0.3649, 0.2477, 0.2281, 0.1593]) -Greedy action tensor([ 0.7100, -0.3154, -0.0127, -0.3991]) tensor([0.4600, 0.1650, 0.2233, 0.1517]) -Greedy action tensor([ 0.4407, -0.2358, -0.0111, -0.3237]) tensor([0.3831, 0.1948, 0.2438, 0.1784]) -Greedy action tensor([ 0.5835, -0.1282, 0.0267, -0.4656]) tensor([0.4142, 0.2033, 0.2374, 0.1451]) -Greedy action tensor([ 0.3681, -0.3996, 0.1277, -0.4549]) tensor([0.3718, 0.1726, 0.2924, 0.1633]) -Greedy action tensor([ 1.0482, -0.3859, -0.0408, -0.4055]) tensor([0.5529, 0.1318, 0.1861, 0.1292]) -Greedy action tensor([ 0.8849, -0.3829, -0.1335, -0.2454]) tensor([0.5088, 0.1432, 0.1837, 0.1643]) -Greedy action tensor([ 8.0094e-01, -4.4187e-01, 2.4223e-04, -3.9992e-01]) tensor([0.4906, 0.1416, 0.2203, 0.1476]) -Greedy action tensor([ 0.6935, -0.4498, -0.0635, -0.4693]) tensor([0.4761, 0.1518, 0.2233, 0.1488]) -Greedy action tensor([ 0.5246, 0.0829, -0.0099, -0.2047]) tensor([0.3688, 0.2371, 0.2161, 0.1779]) -Greedy action tensor([ 0.6301, -0.0863, 0.1212, -0.3156]) tensor([0.4035, 0.1971, 0.2426, 0.1567]) -Greedy action tensor([ 0.9444, -0.8232, 0.0054, -0.4642]) tensor([0.5536, 0.0945, 0.2165, 0.1354]) -Greedy action tensor([ 0.8025, -0.3464, -0.1307, -0.4212]) tensor([0.4989, 0.1581, 0.1962, 0.1468]) -Greedy action tensor([ 0.7643, -0.5453, 0.0968, -0.3966]) tensor([0.4771, 0.1288, 0.2447, 0.1494]) -Greedy action tensor([ 0.6020, -0.0231, 0.0139, -0.4310]) tensor([0.4087, 0.2188, 0.2270, 0.1455]) -Greedy action tensor([ 0.6929, -0.5229, 0.0468, -0.5097]) tensor([0.4715, 0.1398, 0.2471, 0.1416]) -Greedy action tensor([ 0.5352, -0.4518, -0.2658, -0.5425]) tensor([0.4625, 0.1724, 0.2076, 0.1574]) -Greedy action tensor([ 0.6665, -0.2348, 0.1400, -0.3092]) tensor([0.4213, 0.1711, 0.2488, 0.1588]) -Greedy action tensor([ 0.5910, -0.3911, -0.0205, -0.4092]) tensor([0.4377, 0.1639, 0.2375, 0.1610]) -Greedy action tensor([ 0.3511, -0.0166, -0.0987, -0.1036]) tensor([0.3373, 0.2335, 0.2151, 0.2141]) -Greedy action tensor([ 0.6769, -0.5050, 0.1925, -0.6608]) tensor([0.4576, 0.1404, 0.2819, 0.1201]) -Greedy action tensor([ 0.6705, -0.3157, -0.1049, -0.3026]) tensor([0.4522, 0.1687, 0.2082, 0.1709]) -Greedy action tensor([ 0.8331, -0.4092, -0.1014, -0.5952]) tensor([0.5205, 0.1503, 0.2044, 0.1248]) -Greedy action tensor([ 0.7408, -0.4730, 0.1657, -0.5902]) tensor([0.4708, 0.1399, 0.2649, 0.1244]) -Greedy action tensor([ 1.4442, -1.3808, -0.0421, -0.6851]) tensor([0.7120, 0.0422, 0.1611, 0.0847]) -Greedy action tensor([ 0.9158, -0.1087, 0.0738, -0.4229]) tensor([0.4873, 0.1749, 0.2100, 0.1278]) -Greedy action tensor([-0.0145, 0.1399, -0.1968, -0.2296]) tensor([0.2627, 0.3066, 0.2189, 0.2118]) -Greedy action tensor([ 0.9999, -0.4900, -0.0676, -0.4419]) tensor([0.5538, 0.1248, 0.1904, 0.1310]) -Greedy action tensor([ 1.1613, -1.2629, 0.0068, -0.5762]) tensor([0.6330, 0.0561, 0.1995, 0.1114]) -Greedy action tensor([ 0.6351, -0.2831, -0.1537, -0.2940]) tensor([0.4448, 0.1776, 0.2021, 0.1756]) -Greedy action tensor([ 0.6571, -0.2931, -0.0413, -0.3617]) tensor([0.4454, 0.1722, 0.2215, 0.1608]) -Greedy action tensor([ 0.9355, -0.5300, -0.0298, -0.6150]) tensor([0.5483, 0.1266, 0.2088, 0.1163]) -Greedy action tensor([ 0.5243, -0.3578, -0.0982, -0.4905]) tensor([0.4323, 0.1790, 0.2320, 0.1567]) -Greedy action tensor([ 0.5669, 0.0439, -0.0758, -0.1631]) tensor([0.3845, 0.2279, 0.2022, 0.1853]) -Greedy action tensor([ 0.5526, 0.1084, 0.0656, -0.2885]) tensor([0.3722, 0.2387, 0.2287, 0.1605]) -Greedy action tensor([ 1.3518, -1.3670, 0.0485, -0.6080]) tensor([0.6764, 0.0446, 0.1837, 0.0953]) -Greedy action tensor([ 0.6718, -0.0571, 0.0451, -0.6447]) tensor([0.4377, 0.2112, 0.2339, 0.1173]) -Greedy action tensor([ 0.6235, -0.3854, -0.0052, -0.3448]) tensor([0.4391, 0.1601, 0.2341, 0.1667]) -Greedy action tensor([ 0.7483, -0.1087, -0.0702, -0.2586]) tensor([0.4482, 0.1903, 0.1977, 0.1638]) -Greedy action tensor([ 1.0287, -0.6933, -0.1469, -0.4467]) tensor([0.5828, 0.1041, 0.1798, 0.1333]) -Greedy action tensor([ 0.8721, -0.4464, -0.0650, -0.4410]) tensor([0.5186, 0.1387, 0.2032, 0.1395]) -Greedy action tensor([ 0.6501, -0.3478, -0.1675, -0.2833]) tensor([0.4539, 0.1673, 0.2004, 0.1785]) -Greedy action tensor([ 0.3363, 0.1835, -0.0720, 0.0783]) tensor([0.3034, 0.2604, 0.2017, 0.2344]) -Greedy action tensor([ 0.2691, -0.1383, -0.1071, -0.1716]) tensor([0.3338, 0.2221, 0.2292, 0.2149]) -Greedy action tensor([ 0.7929, -0.4660, 0.0326, -0.2939]) tensor([0.4788, 0.1359, 0.2238, 0.1615]) -Greedy action tensor([ 0.6198, -0.2816, 0.0960, -0.3120]) tensor([0.4180, 0.1697, 0.2476, 0.1646]) -Greedy action tensor([ 0.5237, -0.0473, -0.0922, -0.2427]) tensor([0.3891, 0.2198, 0.2102, 0.1808]) -Greedy action tensor([ 0.6108, -0.0738, 0.1224, -0.4507]) tensor([0.4059, 0.2047, 0.2490, 0.1404]) -Greedy action tensor([ 0.6061, -0.4671, -0.0687, -0.4347]) tensor([0.4537, 0.1551, 0.2310, 0.1602]) -Greedy action tensor([ 0.4455, -0.1464, -0.0765, -0.2806]) tensor([0.3802, 0.2103, 0.2256, 0.1839]) -Greedy action tensor([ 0.6131, -0.3894, -0.0282, -0.2238]) tensor([0.4298, 0.1577, 0.2263, 0.1861]) -Greedy action tensor([ 0.5882, -0.0566, 0.0039, -0.3248]) tensor([0.4027, 0.2113, 0.2245, 0.1616]) -Greedy action tensor([ 0.6452, -0.4409, -0.1437, -0.4166]) tensor([0.4678, 0.1579, 0.2125, 0.1618]) -Greedy action tensor([ 0.9687, -1.0105, -0.0816, -0.2953]) tensor([0.5648, 0.0780, 0.1976, 0.1596]) -Greedy action tensor([ 0.6488, -0.3214, -0.0726, -0.2417]) tensor([0.4395, 0.1666, 0.2136, 0.1804]) -Greedy action tensor([ 0.8058, -0.2191, -0.0222, -0.3099]) tensor([0.4709, 0.1690, 0.2058, 0.1543]) -Greedy action tensor([ 0.3200, 0.3053, -0.1669, 0.0499]) tensor([0.2973, 0.2930, 0.1827, 0.2270]) -Greedy action tensor([ 0.4199, -0.2303, -0.1091, -0.3251]) tensor([0.3867, 0.2018, 0.2278, 0.1836]) -Greedy action tensor([ 0.3697, 0.2210, -0.0797, 0.0449]) tensor([0.3103, 0.2674, 0.1980, 0.2243]) -Greedy action tensor([ 0.6370, -0.4056, -0.3233, -0.2936]) tensor([0.4696, 0.1655, 0.1797, 0.1852]) -Greedy action tensor([ 0.8628, -0.7148, -0.0130, -0.4537]) tensor([0.5288, 0.1092, 0.2203, 0.1418]) -Greedy action tensor([ 0.4622, -0.0526, -0.1564, -0.0939]) tensor([0.3690, 0.2205, 0.1988, 0.2116]) -Greedy action tensor([ 0.3172, -0.1117, -0.2043, -0.4597]) tensor([0.3697, 0.2408, 0.2195, 0.1700]) -Greedy action tensor([ 0.6075, -0.4470, 0.0660, -0.4842]) tensor([0.4413, 0.1537, 0.2568, 0.1481]) -Greedy action tensor([ 0.3585, 0.0874, -0.0117, -0.2392]) tensor([0.3330, 0.2539, 0.2300, 0.1832]) -Greedy action tensor([ 0.4859, 0.2292, -0.1465, 0.1203]) tensor([0.3335, 0.2580, 0.1772, 0.2314]) -Greedy action tensor([ 0.6592, -0.7121, -0.1372, -0.4252]) tensor([0.4895, 0.1242, 0.2207, 0.1655]) -Greedy action tensor([ 0.6245, -0.0857, -0.1006, -0.2819]) tensor([0.4202, 0.2065, 0.2035, 0.1697]) -Greedy action tensor([ 4.4612e-01, -1.1263e-01, 3.2480e-04, -2.4536e-01]) tensor([0.3686, 0.2108, 0.2360, 0.1846]) -Greedy action tensor([ 1.1637, -1.3337, -0.0272, -0.7231]) tensor([0.6503, 0.0535, 0.1977, 0.0985]) -Greedy action tensor([ 0.4656, -0.2414, 0.0028, -0.3723]) tensor([0.3913, 0.1930, 0.2464, 0.1693]) -Greedy action tensor([ 0.4708, -0.2632, -0.1653, -0.2823]) tensor([0.4032, 0.1935, 0.2134, 0.1899]) -Greedy action tensor([ 0.4349, -0.2971, -0.1920, -0.4024]) tensor([0.4085, 0.1965, 0.2182, 0.1768]) -Greedy action tensor([ 1.5669, -0.4407, -0.0660, 0.3854]) tensor([0.6110, 0.0821, 0.1194, 0.1875]) -Greedy action tensor([ 1.3128, -0.5318, -0.2343, 0.2544]) tensor([0.5821, 0.0920, 0.1239, 0.2020]) -Greedy action tensor([ 0.9915, -0.0923, -0.2610, 0.1752]) tensor([0.4840, 0.1637, 0.1383, 0.2140]) -Greedy action tensor([ 1.7276, -0.0104, -0.1545, 0.6044]) tensor([0.6048, 0.1064, 0.0921, 0.1967]) -Greedy action tensor([ 0.9489, -0.5436, -0.1335, 0.3717]) tensor([0.4706, 0.1058, 0.1594, 0.2642]) -Greedy action tensor([ 1.0406, 0.0259, -0.0185, -0.0874]) tensor([0.4919, 0.1783, 0.1706, 0.1592]) -Greedy action tensor([ 1.2211, -0.5663, -0.1680, 0.3867]) tensor([0.5403, 0.0904, 0.1347, 0.2346]) -Greedy action tensor([ 0.5638, -0.4285, -0.1199, -0.1467]) tensor([0.4225, 0.1566, 0.2133, 0.2076]) -Greedy action tensor([ 1.6128, -0.1297, -0.4054, 0.2521]) tensor([0.6392, 0.1119, 0.0850, 0.1639]) -Greedy action tensor([ 1.1424, -0.6013, -0.0595, 0.1633]) tensor([0.5402, 0.0945, 0.1624, 0.2029]) -Greedy action tensor([ 1.1366, -0.5399, -0.1993, 0.3062]) tensor([0.5303, 0.0992, 0.1394, 0.2311]) -Greedy action tensor([ 0.8992, -0.3776, -0.2583, 0.2361]) tensor([0.4743, 0.1323, 0.1491, 0.2444]) -Greedy action tensor([ 1.0954, -0.3578, -0.2928, 0.1162]) tensor([0.5379, 0.1258, 0.1342, 0.2021]) -Greedy action tensor([ 0.9909, -0.6049, -0.0230, 0.0222]) tensor([0.5141, 0.1042, 0.1865, 0.1951]) -Greedy action tensor([ 1.2593, -0.7694, -0.3330, 0.3384]) tensor([0.5770, 0.0759, 0.1174, 0.2297]) -Greedy action tensor([ 1.1334, -0.6016, -0.0700, 0.1637]) tensor([0.5389, 0.0951, 0.1618, 0.2043]) -Greedy action tensor([ 1.6108, -0.6392, -0.3426, 0.3793]) tensor([0.6498, 0.0685, 0.0921, 0.1896]) -Greedy action tensor([ 1.5102, -0.8787, -0.2928, 0.0848]) tensor([0.6680, 0.0613, 0.1101, 0.1606]) -Greedy action tensor([ 0.7645, -0.3213, 0.0333, -0.0241]) tensor([0.4399, 0.1485, 0.2117, 0.1999]) -Greedy action tensor([ 1.2787, -0.4749, -0.2822, 0.2666]) tensor([0.5726, 0.0991, 0.1202, 0.2081]) -Greedy action tensor([ 0.5474, 0.0529, -0.4235, -0.3793]) tensor([0.4194, 0.2558, 0.1588, 0.1660]) -Greedy action tensor([ 1.2813, -0.5900, -0.1657, 0.5118]) tensor([0.5398, 0.0831, 0.1270, 0.2501]) -Greedy action tensor([ 1.0442, 0.0961, -0.0954, 0.0977]) tensor([0.4772, 0.1849, 0.1527, 0.1852]) -Greedy action tensor([ 1.9974, -1.1214, -0.3153, 0.3576]) tensor([0.7478, 0.0331, 0.0740, 0.1451]) -Greedy action tensor([ 1.0199, -0.4813, -0.0441, -0.1778]) tensor([0.5348, 0.1192, 0.1846, 0.1615]) -Greedy action tensor([ 0.9281, -0.1989, -0.1789, 0.0537]) tensor([0.4827, 0.1564, 0.1596, 0.2013]) -Greedy action tensor([1.5047, 0.1364, 0.0744, 0.0926]) tensor([0.5756, 0.1465, 0.1377, 0.1402]) -Greedy action tensor([ 1.6536, -0.7269, -0.2776, 0.3393]) tensor([0.6640, 0.0614, 0.0963, 0.1784]) -Greedy action tensor([ 1.6242, -0.8649, -0.3463, 0.2993]) tensor([0.6720, 0.0558, 0.0937, 0.1786]) -Greedy action tensor([ 1.3520, -0.2123, -0.4202, 0.1490]) tensor([0.5954, 0.1246, 0.1012, 0.1788]) -Greedy action tensor([ 1.4893, -0.4815, -0.0508, 0.4834]) tensor([0.5816, 0.0810, 0.1247, 0.2127]) -Greedy action tensor([ 1.7015, 0.0825, -0.0857, 0.3573]) tensor([0.6149, 0.1218, 0.1030, 0.1603]) -Greedy action tensor([ 1.2041, -0.7174, -0.2533, 0.0624]) tensor([0.5887, 0.0862, 0.1371, 0.1880]) -Greedy action tensor([ 1.9874, -0.6466, -0.4286, 0.5947]) tensor([0.7095, 0.0509, 0.0633, 0.1762]) -Greedy action tensor([ 1.3926, -0.3990, -0.4191, 0.1682]) tensor([0.6158, 0.1026, 0.1006, 0.1810]) -Greedy action tensor([ 1.0098, -0.3293, -0.2881, -0.0619]) tensor([0.5326, 0.1396, 0.1455, 0.1824]) -Greedy action tensor([ 0.7975, -0.5262, 0.0732, 0.0322]) tensor([0.4513, 0.1201, 0.2187, 0.2099]) -Greedy action tensor([ 2.0515, -1.0713, -0.2188, 0.6177]) tensor([0.7217, 0.0318, 0.0745, 0.1720]) -Greedy action tensor([ 1.7841, -0.3454, -0.2343, 0.4361]) tensor([0.6616, 0.0787, 0.0879, 0.1719]) -Greedy action tensor([ 0.8471, -0.1589, -0.2726, 0.1338]) tensor([0.4583, 0.1676, 0.1496, 0.2246]) -Greedy action tensor([ 0.9041, -0.3962, 0.1307, -0.0252]) tensor([0.4698, 0.1280, 0.2168, 0.1855]) -Greedy action tensor([ 1.0534, -0.6349, -0.2579, 0.1167]) tensor([0.5417, 0.1001, 0.1460, 0.2123]) -Greedy action tensor([ 1.3272, -0.2883, -0.0410, 0.0470]) tensor([0.5776, 0.1148, 0.1470, 0.1606]) -Greedy action tensor([ 0.7752, -0.2820, -0.1369, 0.1179]) tensor([0.4410, 0.1532, 0.1772, 0.2286]) -Greedy action tensor([ 0.8467, -0.3092, -0.2479, 0.1508]) tensor([0.4655, 0.1465, 0.1558, 0.2321]) -Greedy action tensor([ 0.8838, -0.6398, -0.5228, 0.5764]) tensor([0.4549, 0.0991, 0.1114, 0.3345]) -Greedy action tensor([ 1.6731, 0.0408, -0.4369, 0.1925]) tensor([0.6476, 0.1266, 0.0785, 0.1473]) -Greedy action tensor([ 1.5642, -0.9244, -0.3514, 0.4779]) tensor([0.6379, 0.0530, 0.0939, 0.2152]) -Greedy action tensor([ 0.8729, -0.1659, -0.1162, 0.1752]) tensor([0.4497, 0.1592, 0.1673, 0.2238]) -Greedy action tensor([ 1.6460, -0.5319, -0.1804, 0.3570]) tensor([0.6452, 0.0731, 0.1039, 0.1778]) -Greedy action tensor([ 1.8490, -0.0068, -0.1088, 0.4300]) tensor([0.6496, 0.1015, 0.0917, 0.1572]) -Greedy action tensor([ 1.2487, -0.0203, -0.3327, -0.0021]) tensor([0.5640, 0.1585, 0.1160, 0.1615]) -Greedy action tensor([ 0.6356, -0.1831, -0.4215, 0.2369]) tensor([0.4065, 0.1793, 0.1413, 0.2729]) -Greedy action tensor([ 1.1948, -0.2699, -0.3980, 0.1562]) tensor([0.5591, 0.1292, 0.1137, 0.1979]) -Greedy action tensor([ 0.8268, -0.4010, 0.0093, -0.0926]) tensor([0.4688, 0.1373, 0.2070, 0.1869]) -Greedy action tensor([ 2.0748, -0.8337, -0.5081, 0.3547]) tensor([0.7638, 0.0417, 0.0577, 0.1368]) -Greedy action tensor([ 1.3908, -0.5601, -0.2497, -0.1430]) tensor([0.6444, 0.0916, 0.1249, 0.1390]) -Greedy action tensor([ 0.9392, -0.2614, -0.1097, 0.1500]) tensor([0.4749, 0.1430, 0.1664, 0.2157]) -Greedy action tensor([ 1.4213, -0.2762, -0.1902, 0.7676]) tensor([0.5255, 0.0962, 0.1049, 0.2733]) -Greedy action tensor([ 0.9560, 0.0691, -0.2328, -0.2874]) tensor([0.4988, 0.2055, 0.1519, 0.1438]) -Greedy action tensor([ 0.7961, -0.4312, -0.3354, 0.2374]) tensor([0.4571, 0.1340, 0.1474, 0.2615]) -Greedy action tensor([ 0.6120, -0.4069, -0.1471, 0.1628]) tensor([0.4053, 0.1463, 0.1897, 0.2586]) -Greedy action tensor([ 1.0431, -0.7015, 0.0301, 0.1323]) tensor([0.5155, 0.0901, 0.1872, 0.2073]) -Greedy action tensor([ 2.0011, -0.6514, -0.4233, 0.8457]) tensor([0.6785, 0.0478, 0.0601, 0.2137]) -Greedy action tensor([ 1.7414, -0.2237, -0.4048, 0.3134]) tensor([0.6681, 0.0936, 0.0781, 0.1602]) -Greedy action tensor([ 1.0108, -0.1153, -0.4089, 0.1518]) tensor([0.5026, 0.1630, 0.1215, 0.2129]) -Greedy action tensor([ 1.5109, -0.3862, -0.3059, 0.0832]) tensor([0.6442, 0.0966, 0.1047, 0.1545]) -Greedy action tensor([ 1.3416, -0.3213, -0.1468, 0.6440]) tensor([0.5227, 0.0991, 0.1180, 0.2602]) -Greedy action tensor([ 1.3562, -0.8885, -0.3165, 0.4629]) tensor([0.5872, 0.0622, 0.1102, 0.2403]) -Greedy action tensor([ 1.1276, -0.4922, -0.2083, 0.1259]) tensor([0.5470, 0.1083, 0.1438, 0.2009]) -Greedy action tensor([ 1.1885, -0.5748, -0.0226, 0.2575]) tensor([0.5366, 0.0920, 0.1598, 0.2115]) -Greedy action tensor([ 1.3084, -0.2938, -0.3863, -0.1316]) tensor([0.6165, 0.1242, 0.1132, 0.1461]) -Greedy action tensor([ 1.2991, -0.4053, -0.2758, 0.1628]) tensor([0.5848, 0.1064, 0.1211, 0.1877]) -Greedy action tensor([ 0.8060, -0.3127, -0.3696, 0.4694]) tensor([0.4256, 0.1391, 0.1314, 0.3040]) -Greedy action tensor([ 1.8401, -0.4556, -0.3112, 0.1436]) tensor([0.7141, 0.0719, 0.0831, 0.1309]) -Greedy action tensor([ 0.7735, -0.3614, -0.1633, 0.2376]) tensor([0.4351, 0.1399, 0.1705, 0.2546]) -Greedy action tensor([ 1.3084, -0.2638, -0.4054, 0.3564]) tensor([0.5638, 0.1170, 0.1016, 0.2176]) -Greedy action tensor([ 1.4849, -0.6911, -0.1297, 0.1763]) tensor([0.6318, 0.0717, 0.1257, 0.1707]) -Greedy action tensor([ 1.2524, -0.6918, -0.2976, 0.2612]) tensor([0.5792, 0.0829, 0.1229, 0.2150]) -Greedy action tensor([ 1.3260, -0.6552, -0.0529, 0.3051]) tensor([0.5714, 0.0788, 0.1439, 0.2059]) -Greedy action tensor([ 0.8418, -0.0701, 0.1601, 0.0352]) tensor([0.4248, 0.1707, 0.2149, 0.1896]) -Greedy action tensor([-1.2545, -0.4276, 0.5520, 0.7227]) tensor([0.0602, 0.1377, 0.3669, 0.4351]) -Greedy action tensor([-1.5829, -0.5602, 0.9137, 0.6205]) tensor([0.0400, 0.1113, 0.4861, 0.3626]) -Greedy action tensor([-0.9720, -0.6719, 0.4464, 0.2891]) tensor([0.0999, 0.1349, 0.4126, 0.3526]) -Greedy action tensor([-0.9628, -0.3266, 0.7138, 1.1304]) tensor([0.0612, 0.1156, 0.3271, 0.4962]) -Greedy action tensor([-1.8345, -0.4310, 0.6073, -0.1099]) tensor([0.0451, 0.1835, 0.5184, 0.2530]) -Greedy action tensor([-1.8084, -0.6719, 0.2286, -0.1473]) tensor([0.0587, 0.1828, 0.4498, 0.3088]) -Greedy action tensor([-1.5934, -0.9060, 0.3146, -0.6067]) tensor([0.0806, 0.1602, 0.5430, 0.2161]) -Greedy action tensor([-1.4532, -0.6067, 0.4147, 0.0636]) tensor([0.0696, 0.1623, 0.4508, 0.3173]) -Greedy action tensor([-1.3831, -0.3148, 0.4746, -0.1623]) tensor([0.0729, 0.2123, 0.4675, 0.2473]) -Greedy action tensor([-1.5792, -0.7408, -0.0340, -0.6166]) tensor([0.0942, 0.2178, 0.4415, 0.2466]) -Greedy action tensor([-1.8305, -0.4754, 0.6049, -0.1119]) tensor([0.0457, 0.1772, 0.5221, 0.2549]) -Greedy action tensor([-1.7520, -0.4811, 0.5868, -0.0285]) tensor([0.0487, 0.1735, 0.5049, 0.2729]) -Greedy action tensor([-1.7154, -0.5006, 0.5416, -0.0105]) tensor([0.0515, 0.1735, 0.4919, 0.2832]) -Greedy action tensor([-1.7168, -0.4271, 0.6475, 0.1320]) tensor([0.0463, 0.1680, 0.4920, 0.2938]) -Greedy action tensor([-1.7402, -0.1225, 0.4508, -0.3782]) tensor([0.0529, 0.2669, 0.4735, 0.2067]) -Greedy action tensor([-1.2336, -0.5025, 0.7702, 1.0866]) tensor([0.0484, 0.1005, 0.3588, 0.4923]) -Greedy action tensor([-1.6492, -0.5149, 0.5033, 0.0523]) tensor([0.0550, 0.1709, 0.4729, 0.3013]) -Greedy action tensor([-0.4589, -0.4913, 0.2151, 0.1334]) tensor([0.1743, 0.1687, 0.3419, 0.3151]) -Greedy action tensor([-1.7201, -0.6649, 0.3040, -0.3046]) tensor([0.0643, 0.1846, 0.4864, 0.2647]) -Greedy action tensor([-0.9952, -0.5998, 0.2042, 0.3643]) tensor([0.1031, 0.1531, 0.3422, 0.4016]) -Greedy action tensor([-1.7423, -0.1332, 0.5291, -0.1038]) tensor([0.0480, 0.2399, 0.4651, 0.2470]) -Greedy action tensor([-1.5555, -0.4315, 0.4737, 0.0926]) tensor([0.0592, 0.1823, 0.4506, 0.3078]) -Greedy action tensor([-0.8497, -0.4514, 0.9575, 1.3992]) tensor([0.0554, 0.0825, 0.3374, 0.5248]) -Greedy action tensor([-1.7411, -0.4652, 0.5860, -0.1615]) tensor([0.0508, 0.1820, 0.5207, 0.2466]) -Greedy action tensor([-1.8857, -0.4591, 0.6331, -0.1431]) tensor([0.0429, 0.1788, 0.5330, 0.2453]) -Greedy action tensor([-1.9140, -0.7202, 0.0358, -0.3271]) tensor([0.0617, 0.2035, 0.4334, 0.3015]) -Greedy action tensor([-1.7915, -0.4003, 0.5787, -0.0716]) tensor([0.0469, 0.1887, 0.5023, 0.2621]) -Greedy action tensor([-1.6355, -0.7433, -0.1414, -0.4136]) tensor([0.0886, 0.2162, 0.3946, 0.3006]) -Greedy action tensor([-1.9305, -0.4508, 0.6577, -0.1729]) tensor([0.0408, 0.1793, 0.5432, 0.2367]) -Greedy action tensor([-1.3730, -0.5596, 0.5629, 0.5260]) tensor([0.0593, 0.1337, 0.4109, 0.3960]) -Greedy action tensor([-1.1799, -0.0400, 0.2017, 0.2410]) tensor([0.0816, 0.2552, 0.3250, 0.3381]) -Greedy action tensor([-1.7007, -0.4472, 0.6164, 0.0859]) tensor([0.0485, 0.1699, 0.4921, 0.2895]) -Greedy action tensor([-0.6022, -0.5865, 0.1819, 0.1523]) tensor([0.1579, 0.1604, 0.3459, 0.3358]) -Greedy action tensor([-1.6116, -0.5427, 0.5111, -0.0649]) tensor([0.0590, 0.1717, 0.4925, 0.2768]) -Greedy action tensor([-0.9980, 0.3016, 0.3165, -0.1834]) tensor([0.0939, 0.3444, 0.3496, 0.2121]) -Greedy action tensor([-1.8843, -0.4508, 0.6402, -0.1333]) tensor([0.0427, 0.1789, 0.5327, 0.2458]) -Greedy action tensor([-1.8800, -0.4754, 0.6897, -0.1003]) tensor([0.0416, 0.1693, 0.5428, 0.2463]) -Greedy action tensor([-2.0193, -0.1187, 0.5030, 0.1104]) tensor([0.0350, 0.2342, 0.4362, 0.2946]) -Greedy action tensor([-1.1595, -0.7027, 0.3719, 0.3834]) tensor([0.0842, 0.1329, 0.3892, 0.3937]) -Greedy action tensor([-1.7335, -0.4861, 0.6956, 0.1423]) tensor([0.0447, 0.1557, 0.5076, 0.2919]) -Greedy action tensor([-1.7630, -0.5053, 0.5841, -0.0425]) tensor([0.0486, 0.1711, 0.5085, 0.2718]) -Greedy action tensor([-1.3167, -0.1955, 0.3317, 0.2799]) tensor([0.0704, 0.2160, 0.3660, 0.3476]) -Greedy action tensor([-0.4692, -0.1324, 1.0103, 1.4878]) tensor([0.0721, 0.1010, 0.3166, 0.5103]) -Greedy action tensor([-1.2168, -0.4919, 0.3953, 0.1118]) tensor([0.0844, 0.1742, 0.4229, 0.3185]) -Greedy action tensor([-1.9047, -0.4499, 0.6454, -0.1530]) tensor([0.0419, 0.1796, 0.5369, 0.2416]) -Greedy action tensor([-0.7027, -0.4555, 0.1726, 0.5420]) tensor([0.1227, 0.1571, 0.2944, 0.4259]) -Greedy action tensor([-1.4387, -0.6107, 0.4024, 0.1087]) tensor([0.0700, 0.1602, 0.4411, 0.3288]) -Greedy action tensor([-1.8555, -0.4823, 0.7135, -0.0068]) tensor([0.0411, 0.1621, 0.5360, 0.2608]) -Greedy action tensor([-0.2338, -0.4593, 0.8450, 1.6157]) tensor([0.0901, 0.0719, 0.2651, 0.5729]) -Greedy action tensor([-0.7458, -0.4709, 0.4212, 0.2874]) tensor([0.1199, 0.1579, 0.3852, 0.3370]) -Greedy action tensor([-1.8916, -0.4353, 0.6381, -0.1555]) tensor([0.0425, 0.1824, 0.5337, 0.2414]) -Greedy action tensor([-0.8910, -0.1652, 0.2345, 0.2310]) tensor([0.1085, 0.2241, 0.3343, 0.3331]) -Greedy action tensor([-1.0929, -0.2088, 0.4066, -0.2504]) tensor([0.0978, 0.2368, 0.4382, 0.2272]) -Greedy action tensor([-1.8408, -0.4786, 0.6536, -0.0688]) tensor([0.0437, 0.1705, 0.5290, 0.2569]) -Greedy action tensor([-1.5182, -0.4380, 0.7222, 0.5749]) tensor([0.0466, 0.1373, 0.4380, 0.3780]) -Greedy action tensor([-1.3778, -1.1338, 0.6208, 1.0395]) tensor([0.0479, 0.0612, 0.3535, 0.5374]) -Greedy action tensor([-1.7581, -0.6583, 1.2857, 0.8709]) tensor([0.0257, 0.0773, 0.5402, 0.3568]) -Greedy action tensor([-1.9320, -0.4548, 0.6578, -0.1716]) tensor([0.0408, 0.1786, 0.5435, 0.2371]) -Greedy action tensor([-1.0797, -0.6374, 0.2613, 0.2907]) tensor([0.0969, 0.1509, 0.3706, 0.3816]) -Greedy action tensor([-1.8451, -0.6394, 0.3767, -0.1416]) tensor([0.0525, 0.1752, 0.4840, 0.2883]) -Greedy action tensor([-1.8780, -0.4693, 0.6318, -0.1335]) tensor([0.0433, 0.1770, 0.5322, 0.2476]) -Greedy action tensor([-1.5991, -0.8530, 0.3337, -0.4816]) tensor([0.0765, 0.1613, 0.5284, 0.2338]) -Greedy action tensor([-0.9484, -0.6372, 0.2454, 0.2133]) tensor([0.1129, 0.1541, 0.3724, 0.3607]) -Greedy action tensor([-1.4274, -0.5666, 0.4502, 0.0697]) tensor([0.0696, 0.1646, 0.4549, 0.3109]) -Greedy action tensor([-1.5552, -0.4616, 1.2905, 1.0827]) tensor([0.0284, 0.0848, 0.4893, 0.3975]) -Greedy action tensor([-1.3358, -0.3952, 0.1017, -0.4631]) tensor([0.0984, 0.2520, 0.4142, 0.2355]) -Greedy action tensor([-1.6734, -0.3930, 0.6002, 0.2483]) tensor([0.0473, 0.1702, 0.4594, 0.3231]) -Greedy action tensor([-1.4619, -0.6168, 0.4469, 0.1144]) tensor([0.0671, 0.1561, 0.4524, 0.3244]) -Greedy action tensor([-1.2032, -0.5534, 0.3225, 0.3674]) tensor([0.0811, 0.1554, 0.3731, 0.3903]) -Greedy action tensor([-1.6218, -0.5250, 0.5147, 0.0714]) tensor([0.0559, 0.1673, 0.4731, 0.3037]) -Greedy action tensor([-1.4405, -0.5493, 0.4142, 0.2453]) tensor([0.0657, 0.1601, 0.4197, 0.3545]) -Greedy action tensor([-0.9823, -0.5433, 0.2484, 0.2215]) tensor([0.1074, 0.1666, 0.3678, 0.3581]) -Greedy action tensor([-1.4699, -0.6082, 0.4530, 0.1256]) tensor([0.0661, 0.1564, 0.4519, 0.3257]) -Greedy action tensor([-1.7015, -0.0052, 0.4681, -0.0583]) tensor([0.0491, 0.2676, 0.4296, 0.2538]) -Greedy action tensor([-1.3615, -0.5652, 0.3420, 0.2088]) tensor([0.0740, 0.1640, 0.4063, 0.3557]) -Greedy action tensor([-1.8427, -0.4805, 0.6587, -0.0606]) tensor([0.0434, 0.1694, 0.5293, 0.2579]) -Greedy action tensor([-1.8926, -0.4469, 0.6447, -0.1447]) tensor([0.0423, 0.1796, 0.5351, 0.2430]) -Greedy action tensor([-1.0616, -0.0323, 0.9359, 1.0019]) tensor([0.0525, 0.1470, 0.3870, 0.4135]) -Greedy action tensor([-1.8741, -0.4861, 0.6765, -0.1089]) tensor([0.0423, 0.1693, 0.5415, 0.2469]) -Greedy action tensor([-1.6134, -0.4659, 0.4986, 0.0213]) tensor([0.0570, 0.1796, 0.4711, 0.2923]) -Greedy action tensor([-0.6745, 0.4076, -0.0112, 0.1497]) tensor([0.1224, 0.3611, 0.2375, 0.2790]) -Greedy action tensor([ 0.6424, -0.4553, -0.1363, -0.3981]) tensor([0.4660, 0.1555, 0.2139, 0.1646]) -Greedy action tensor([ 0.4648, -0.1645, -0.0505, -0.2268]) tensor([0.3801, 0.2026, 0.2270, 0.1903]) -Greedy action tensor([ 0.4096, 0.0532, -0.1036, -0.3651]) tensor([0.3624, 0.2537, 0.2169, 0.1670]) -Greedy action tensor([ 0.8790, -0.3704, -0.1698, -0.6360]) tensor([0.5386, 0.1544, 0.1887, 0.1184]) -Greedy action tensor([ 0.6685, -0.4136, -0.0435, -0.1768]) tensor([0.4427, 0.1500, 0.2172, 0.1901]) -Greedy action tensor([ 0.7208, -0.5287, -0.0425, -0.4844]) tensor([0.4872, 0.1397, 0.2271, 0.1460]) -Greedy action tensor([ 0.4565, -0.1683, -0.0350, -0.1573]) tensor([0.3720, 0.1991, 0.2275, 0.2013]) -Greedy action tensor([ 0.3359, -0.3857, -0.1150, -0.1043]) tensor([0.3614, 0.1756, 0.2302, 0.2327]) -Greedy action tensor([ 1.2868, -0.6934, 0.0938, -0.3826]) tensor([0.6136, 0.0847, 0.1861, 0.1156]) -Greedy action tensor([ 0.5898, -0.2425, -0.0290, -0.2450]) tensor([0.4154, 0.1807, 0.2237, 0.1803]) -Greedy action tensor([ 0.7104, -0.4307, -0.0259, -0.5184]) tensor([0.4782, 0.1528, 0.2290, 0.1400]) -Greedy action tensor([ 0.8581, -0.7266, -0.1773, -0.6628]) tensor([0.5622, 0.1153, 0.1996, 0.1229]) -Greedy action tensor([ 0.8195, -0.5522, -0.0402, -0.4472]) tensor([0.5105, 0.1295, 0.2161, 0.1439]) -Greedy action tensor([ 0.5649, -0.1043, -0.0391, -0.2387]) tensor([0.3990, 0.2043, 0.2181, 0.1786]) -Greedy action tensor([ 0.4440, -0.3986, -0.1466, -0.2760]) tensor([0.4046, 0.1742, 0.2242, 0.1969]) -Greedy action tensor([ 0.6511, 0.3652, -0.1604, -0.1477]) tensor([0.3780, 0.2840, 0.1679, 0.1700]) -Greedy action tensor([ 0.6249, 0.0811, -0.1246, -0.0567]) tensor([0.3908, 0.2269, 0.1847, 0.1977]) -Greedy action tensor([ 0.8252, -0.2640, -0.0131, -0.4257]) tensor([0.4866, 0.1637, 0.2104, 0.1393]) -Greedy action tensor([ 0.7262, -0.8768, -0.0531, -0.3813]) tensor([0.5024, 0.1011, 0.2305, 0.1660]) -Greedy action tensor([ 0.8542, -0.7227, -0.2092, -0.8360]) tensor([0.5759, 0.1190, 0.1989, 0.1062]) -Greedy action tensor([ 0.4149, 0.0014, -0.1079, -0.2575]) tensor([0.3617, 0.2392, 0.2144, 0.1846]) -Greedy action tensor([ 0.4954, -0.0022, -0.0310, -0.1409]) tensor([0.3666, 0.2229, 0.2165, 0.1940]) -Greedy action tensor([ 0.0857, 0.1179, -0.0483, -0.2131]) tensor([0.2741, 0.2830, 0.2397, 0.2033]) -Greedy action tensor([ 0.4872, -0.1315, -0.1954, -0.3900]) tensor([0.4065, 0.2190, 0.2054, 0.1691]) -Greedy action tensor([ 0.5257, -0.0147, -0.2805, -0.2036]) tensor([0.3982, 0.2320, 0.1778, 0.1920]) -Greedy action tensor([ 0.8922, -0.5930, 0.0846, -0.5404]) tensor([0.5233, 0.1185, 0.2333, 0.1249]) -Greedy action tensor([ 0.5964, -0.2953, 0.0536, -0.3367]) tensor([0.4194, 0.1719, 0.2437, 0.1650]) -Greedy action tensor([ 0.3911, 0.0843, -0.0178, -0.0891]) tensor([0.3312, 0.2437, 0.2201, 0.2049]) -Greedy action tensor([ 0.8280, -0.3186, -0.1317, -0.2325]) tensor([0.4885, 0.1552, 0.1871, 0.1692]) -Greedy action tensor([ 0.7010, -0.4023, 0.0898, -0.4040]) tensor([0.4534, 0.1504, 0.2460, 0.1502]) -Greedy action tensor([ 0.6677, -0.1148, -0.1449, -0.5833]) tensor([0.4572, 0.2091, 0.2029, 0.1309]) -Greedy action tensor([ 0.2719, 0.1878, -0.0014, -0.3599]) tensor([0.3113, 0.2862, 0.2369, 0.1655]) -Greedy action tensor([ 0.2205, 0.4137, -0.1500, -0.0613]) tensor([0.2734, 0.3316, 0.1887, 0.2062]) -Greedy action tensor([ 0.7250, -0.4367, -0.1315, -0.3705]) tensor([0.4826, 0.1510, 0.2050, 0.1614]) -Greedy action tensor([ 0.4437, -0.2503, 0.1545, -0.2198]) tensor([0.3619, 0.1808, 0.2710, 0.1864]) -Greedy action tensor([ 0.9506, -0.7693, 0.0141, -0.4299]) tensor([0.5487, 0.0983, 0.2151, 0.1380]) -Greedy action tensor([ 0.9793, -0.5489, 0.0998, -0.6630]) tensor([0.5478, 0.1188, 0.2273, 0.1060]) -Greedy action tensor([ 0.6271, -0.4746, -0.0156, -0.4685]) tensor([0.4561, 0.1516, 0.2399, 0.1525]) -Greedy action tensor([ 0.5059, -0.2093, -0.1625, -0.3498]) tensor([0.4121, 0.2016, 0.2112, 0.1751]) -Greedy action tensor([ 0.1827, 0.3163, -0.1334, -0.0731]) tensor([0.2743, 0.3135, 0.1999, 0.2124]) -Greedy action tensor([ 0.4600, -0.0444, -0.0200, -0.1220]) tensor([0.3595, 0.2171, 0.2225, 0.2009]) -Greedy action tensor([ 0.8014, -0.4739, -0.1323, -0.6824]) tensor([0.5265, 0.1471, 0.2070, 0.1194]) -Greedy action tensor([ 0.8498, -0.5853, -0.1420, -0.5303]) tensor([0.5375, 0.1280, 0.1994, 0.1352]) -Greedy action tensor([ 0.5951, -0.4975, -0.0196, -0.2680]) tensor([0.4352, 0.1459, 0.2353, 0.1836]) -Greedy action tensor([ 0.7430, -0.4720, -0.1877, -0.4376]) tensor([0.5005, 0.1485, 0.1973, 0.1537]) -Greedy action tensor([ 0.4485, -0.3798, 0.0311, -0.4044]) tensor([0.3966, 0.1732, 0.2612, 0.1690]) -Greedy action tensor([ 0.9874, -0.5908, -0.1195, -0.6232]) tensor([0.5758, 0.1188, 0.1903, 0.1150]) -Greedy action tensor([ 0.9712, -0.5774, 0.1145, -0.4625]) tensor([0.5332, 0.1133, 0.2264, 0.1271]) -Greedy action tensor([ 0.4645, 0.1318, -0.1502, -0.0428]) tensor([0.3497, 0.2507, 0.1891, 0.2105]) -Greedy action tensor([ 0.9409, 0.0772, -0.0440, -0.4831]) tensor([0.4912, 0.2071, 0.1835, 0.1183]) -Greedy action tensor([ 0.6202, -0.7679, 0.0020, -0.2809]) tensor([0.4557, 0.1137, 0.2456, 0.1851]) -Greedy action tensor([ 0.5291, -0.1890, 0.1425, -0.3111]) tensor([0.3848, 0.1877, 0.2614, 0.1661]) -Greedy action tensor([ 0.2251, -0.0073, -0.0332, -0.0456]) tensor([0.3005, 0.2382, 0.2321, 0.2292]) -Greedy action tensor([ 0.4283, -0.0358, -0.1357, -0.1230]) tensor([0.3605, 0.2266, 0.2051, 0.2077]) -Greedy action tensor([ 0.4408, -0.2373, -0.0445, -0.1051]) tensor([0.3700, 0.1878, 0.2278, 0.2144]) -Greedy action tensor([ 0.6939, -0.1465, 0.1057, -0.3644]) tensor([0.4285, 0.1849, 0.2379, 0.1487]) -Greedy action tensor([ 1.0250, -1.2141, -0.0405, -0.4928]) tensor([0.5987, 0.0638, 0.2063, 0.1312]) -Greedy action tensor([ 0.6390, -0.4161, -0.1112, -0.3487]) tensor([0.4560, 0.1588, 0.2154, 0.1698]) -Greedy action tensor([ 0.3802, -0.0713, -0.0594, -0.1199]) tensor([0.3463, 0.2205, 0.2231, 0.2100]) -Greedy action tensor([ 0.5158, -0.2664, 0.0150, -0.2222]) tensor([0.3935, 0.1800, 0.2385, 0.1881]) -Greedy action tensor([ 0.3816, -0.2084, -0.1677, -0.3353]) tensor([0.3817, 0.2116, 0.2204, 0.1864]) -Greedy action tensor([ 0.6629, 0.5337, -0.2943, -0.3302]) tensor([0.3798, 0.3337, 0.1458, 0.1407]) -Greedy action tensor([ 0.5890, -0.5348, -0.0852, -0.1361]) tensor([0.4313, 0.1402, 0.2197, 0.2088]) -Greedy action tensor([ 0.5660, -0.3000, -0.0327, -0.3043]) tensor([0.4186, 0.1761, 0.2300, 0.1753]) -Greedy action tensor([ 0.8167, -0.5346, 0.0756, -0.4376]) tensor([0.4949, 0.1281, 0.2358, 0.1412]) -Greedy action tensor([ 0.8389, -0.6849, 0.0216, -0.6173]) tensor([0.5284, 0.1151, 0.2333, 0.1232]) -Greedy action tensor([ 0.6980, -0.5651, -0.0578, -0.3390]) tensor([0.4746, 0.1342, 0.2229, 0.1683]) -Greedy action tensor([ 1.0989, -0.4791, -0.0497, -0.5446]) tensor([0.5825, 0.1202, 0.1847, 0.1126]) -Greedy action tensor([ 0.4366, 0.0592, 0.0308, -0.0572]) tensor([0.3376, 0.2314, 0.2250, 0.2060]) -Greedy action tensor([ 0.5793, -0.4156, -0.0442, -0.1114]) tensor([0.4154, 0.1536, 0.2227, 0.2082]) -Greedy action tensor([ 0.6791, -0.3617, 0.0440, -0.1966]) tensor([0.4348, 0.1536, 0.2304, 0.1812]) -Greedy action tensor([ 0.4453, -0.1142, 0.1641, -0.3757]) tensor([0.3615, 0.2066, 0.2729, 0.1591]) -Greedy action tensor([ 0.6167, -0.1308, -0.0894, -0.4882]) tensor([0.4351, 0.2060, 0.2147, 0.1441]) -Greedy action tensor([ 0.4954, -0.2263, 0.0518, -0.2682]) tensor([0.3856, 0.1874, 0.2474, 0.1797]) -Greedy action tensor([ 0.5674, -0.1638, -0.1311, -0.4379]) tensor([0.4265, 0.2053, 0.2121, 0.1561]) -Greedy action tensor([ 0.7237, -0.5044, -0.1592, -0.4591]) tensor([0.4968, 0.1455, 0.2055, 0.1522]) -Greedy action tensor([ 0.7305, -0.3316, -0.1127, -0.3809]) tensor([0.4750, 0.1642, 0.2044, 0.1563]) -Greedy action tensor([ 0.6469, -0.5254, -0.0654, -0.6433]) tensor([0.4818, 0.1492, 0.2363, 0.1326]) -Greedy action tensor([ 0.4115, 0.0452, 0.1267, -0.1529]) tensor([0.3318, 0.2300, 0.2495, 0.1887]) -Greedy action tensor([ 0.4687, -0.1464, 0.1188, -0.3096]) tensor([0.3698, 0.1999, 0.2606, 0.1698]) -Greedy action tensor([ 1.0552, -0.9941, -0.0211, -0.7401]) tensor([0.6113, 0.0788, 0.2084, 0.1015]) -Greedy action tensor([0.5665, 0.4636, 0.0894, 0.5474]) tensor([0.2854, 0.2575, 0.1771, 0.2800]) -Greedy action tensor([ 1.3553, -0.8694, 0.8358, -0.0115]) tensor([0.5108, 0.0552, 0.3038, 0.1302]) -Greedy action tensor([ 0.6371, -0.3242, 0.3331, 0.3571]) tensor([0.3477, 0.1330, 0.2566, 0.2628]) -Greedy action tensor([-0.2715, -0.5011, 0.2868, 0.7664]) tensor([0.1571, 0.1249, 0.2745, 0.4435]) -Greedy action tensor([-0.3155, -1.9691, -0.3136, 1.3479]) tensor([0.1339, 0.0256, 0.1341, 0.7064]) -Greedy action tensor([ 0.9145, -0.2376, -0.5161, 1.1292]) tensor([0.3578, 0.1131, 0.0856, 0.4435]) -Greedy action tensor([ 0.8266, -1.5431, 0.9444, 0.8368]) tensor([0.3097, 0.0290, 0.3484, 0.3129]) -Greedy action tensor([ 1.2317, -0.0847, 1.1948, 0.6303]) tensor([0.3597, 0.0964, 0.3467, 0.1971]) -Greedy action tensor([ 0.7127, -1.2791, 1.2289, 0.2185]) tensor([0.2922, 0.0399, 0.4897, 0.1783]) -Greedy action tensor([-0.5971, -0.0102, 0.7948, -0.5221]) tensor([0.1266, 0.2277, 0.5092, 0.1365]) -Greedy action tensor([-0.3712, -1.2277, 0.7426, 0.4932]) tensor([0.1461, 0.0620, 0.4450, 0.3468]) -Greedy action tensor([-0.0224, -0.3737, 0.8651, 0.1205]) tensor([0.1892, 0.1331, 0.4595, 0.2182]) -Greedy action tensor([ 1.4204, -1.6517, -0.1445, 0.2467]) tensor([0.6391, 0.0296, 0.1336, 0.1976]) -Greedy action tensor([ 1.0075, -0.2413, 1.7057, 0.5867]) tensor([0.2529, 0.0726, 0.5084, 0.1661]) -Greedy action tensor([-0.6916, -1.0315, -0.1514, 1.5662]) tensor([0.0770, 0.0548, 0.1321, 0.7361]) -Greedy action tensor([ 0.5380, -1.7403, -0.7682, 0.7802]) tensor([0.3777, 0.0387, 0.1023, 0.4813]) -Greedy action tensor([-0.7503, 0.4563, -0.1759, 1.3013]) tensor([0.0720, 0.2405, 0.1278, 0.5598]) -Greedy action tensor([ 1.8457, -0.0677, 1.2469, 1.5717]) tensor([0.4069, 0.0601, 0.2236, 0.3094]) -Greedy action tensor([ 0.8660, -1.0599, 1.6640, 1.0641]) tensor([0.2181, 0.0318, 0.4843, 0.2658]) -Greedy action tensor([ 0.3495, -0.9945, 0.7877, -0.1797]) tensor([0.2941, 0.0767, 0.4559, 0.1733]) -Greedy action tensor([ 1.2207, -1.5188, -0.5793, 0.6189]) tensor([0.5625, 0.0363, 0.0930, 0.3082]) -Greedy action tensor([-1.0620, -0.5337, -0.7452, -0.4944]) tensor([0.1714, 0.2908, 0.2354, 0.3024]) -Greedy action tensor([ 0.1740, -2.2062, -0.2175, -0.7948]) tensor([0.4655, 0.0431, 0.3147, 0.1767]) -Greedy action tensor([ 0.2839, -0.0359, 0.7414, 1.0798]) tensor([0.1811, 0.1315, 0.2861, 0.4013]) -Greedy action tensor([-0.5688, -0.1377, -0.2410, -0.2017]) tensor([0.1862, 0.2866, 0.2584, 0.2688]) -Greedy action tensor([ 1.0715, -1.6023, 0.4645, 1.2531]) tensor([0.3555, 0.0245, 0.1937, 0.4263]) -Greedy action tensor([-1.3473, -0.5743, 0.4077, 0.4651]) tensor([0.0663, 0.1437, 0.3836, 0.4063]) -Greedy action tensor([0.9790, 0.7643, 0.2932, 0.6674]) tensor([0.3287, 0.2652, 0.1655, 0.2407]) -Greedy action tensor([ 1.1472, -0.3297, 1.7632, 0.5688]) tensor([0.2747, 0.0627, 0.5086, 0.1540]) -Greedy action tensor([-0.3268, -2.1923, 0.4402, 0.5744]) tensor([0.1733, 0.0268, 0.3732, 0.4267]) -Greedy action tensor([ 1.1984, -0.1853, 0.3419, -0.3973]) tensor([0.5325, 0.1335, 0.2261, 0.1080]) -Greedy action tensor([-1.1269, 0.7843, 1.0219, -0.4695]) tensor([0.0548, 0.3702, 0.4694, 0.1057]) -Greedy action tensor([-0.4168, 0.9599, 1.0795, -0.8882]) tensor([0.0995, 0.3942, 0.4443, 0.0621]) -Greedy action tensor([1.0174, 0.1770, 0.7663, 1.0375]) tensor([0.3096, 0.1336, 0.2409, 0.3159]) -Greedy action tensor([ 0.3522, -0.9216, -0.5298, 0.9022]) tensor([0.2918, 0.0816, 0.1208, 0.5058]) -Greedy action tensor([ 0.5755, -1.0206, 0.2240, 0.7465]) tensor([0.3233, 0.0655, 0.2275, 0.3836]) -Greedy action tensor([ 1.0071, -1.5420, 1.0121, 0.7002]) tensor([0.3547, 0.0277, 0.3565, 0.2610]) -Greedy action tensor([ 0.1776, -1.4719, 0.9238, 1.2619]) tensor([0.1598, 0.0307, 0.3370, 0.4725]) -Greedy action tensor([ 1.3994, 1.0320, -0.8449, 0.3729]) tensor([0.4636, 0.3211, 0.0491, 0.1661]) -Greedy action tensor([ 0.9746, -0.8706, 1.4301, 1.0038]) tensor([0.2656, 0.0420, 0.4189, 0.2735]) -Greedy action tensor([-0.1357, -0.3442, 0.9621, -0.2869]) tensor([0.1764, 0.1432, 0.5288, 0.1516]) -Greedy action tensor([ 0.8726, -1.2117, 0.3722, 2.2648]) tensor([0.1738, 0.0216, 0.1054, 0.6992]) -Greedy action tensor([ 0.3770, -0.2720, 0.5299, -0.3401]) tensor([0.3149, 0.1645, 0.3669, 0.1537]) -Greedy action tensor([ 0.8537, -0.7779, 0.8799, -0.1735]) tensor([0.3876, 0.0758, 0.3979, 0.1388]) -Greedy action tensor([-0.1771, -0.2845, 0.0122, 0.4450]) tensor([0.2012, 0.1807, 0.2432, 0.3749]) -Greedy action tensor([ 0.7302, -0.1312, 0.5053, -0.1393]) tensor([0.3787, 0.1600, 0.3025, 0.1588]) -Greedy action tensor([ 0.7160, -0.0701, 0.7403, 0.1103]) tensor([0.3305, 0.1506, 0.3386, 0.1803]) -Greedy action tensor([1.1149, 0.8337, 0.3897, 0.9810]) tensor([0.3211, 0.2424, 0.1555, 0.2809]) -Greedy action tensor([ 1.4756, -0.7706, 0.8443, 0.3295]) tensor([0.5114, 0.0541, 0.2720, 0.1625]) -Greedy action tensor([-0.1812, 0.4362, 0.8441, -0.3326]) tensor([0.1538, 0.2852, 0.4288, 0.1322]) -Greedy action tensor([ 0.5733, 0.1433, -0.4491, 0.8920]) tensor([0.2954, 0.1921, 0.1063, 0.4062]) -Greedy action tensor([ 0.4107, -1.0800, -0.2459, 0.2229]) tensor([0.3887, 0.0875, 0.2016, 0.3222]) -Greedy action tensor([ 0.2200, -0.7815, -0.8393, -0.3913]) tensor([0.4431, 0.1628, 0.1536, 0.2405]) -Greedy action tensor([-0.2242, 0.8210, 0.4300, -0.6575]) tensor([0.1559, 0.4433, 0.2998, 0.1011]) -Greedy action tensor([ 1.2554, -1.0115, -0.0816, 0.4869]) tensor([0.5465, 0.0566, 0.1435, 0.2534]) -Greedy action tensor([ 0.6803, -1.3918, -0.1304, 1.3269]) tensor([0.2874, 0.0362, 0.1278, 0.5486]) -Greedy action tensor([ 0.6820, -0.6298, 0.8149, 1.3915]) tensor([0.2250, 0.0606, 0.2570, 0.4574]) -Greedy action tensor([-0.3836, -0.8281, 1.9047, -0.4524]) tensor([0.0804, 0.0516, 0.7929, 0.0751]) -Greedy action tensor([ 0.1305, -0.6597, 1.4367, -0.3357]) tensor([0.1732, 0.0786, 0.6395, 0.1087]) -Greedy action tensor([1.2325, 0.0482, 0.3899, 1.4187]) tensor([0.3400, 0.1040, 0.1464, 0.4096]) -Greedy action tensor([ 0.5315, -1.3652, 0.0881, 0.7975]) tensor([0.3229, 0.0485, 0.2073, 0.4213]) -Greedy action tensor([0.9126, 0.6022, 0.4477, 0.2648]) tensor([0.3467, 0.2542, 0.2178, 0.1814]) -Greedy action tensor([ 0.1694, 0.1747, -0.1819, -0.7991]) tensor([0.3237, 0.3255, 0.2279, 0.1229]) -Greedy action tensor([ 1.4946, -0.3702, 1.7552, -0.2067]) tensor([0.3795, 0.0588, 0.4925, 0.0692]) -Greedy action tensor([ 0.2599, -1.3518, 1.3272, 0.9714]) tensor([0.1628, 0.0325, 0.4732, 0.3315]) -Greedy action tensor([0.3086, 0.6565, 0.7794, 0.2425]) tensor([0.2019, 0.2859, 0.3233, 0.1890]) -Greedy action tensor([ 1.2295, 0.9750, 0.1432, -0.0618]) tensor([0.4188, 0.3247, 0.1413, 0.1151]) -Greedy action tensor([0.1003, 0.3849, 1.2563, 0.0140]) tensor([0.1557, 0.2069, 0.4946, 0.1428]) -Greedy action tensor([ 0.5802, -0.3238, 1.4250, 1.1096]) tensor([0.1842, 0.0746, 0.4286, 0.3127]) -Greedy action tensor([-0.4045, 0.3263, -0.7386, 1.1603]) tensor([0.1166, 0.2422, 0.0835, 0.5577]) -Greedy action tensor([ 0.0499, -0.9881, 1.2641, 0.3660]) tensor([0.1641, 0.0581, 0.5527, 0.2251]) -Greedy action tensor([-0.4688, 0.0668, -1.1431, 0.7037]) tensor([0.1551, 0.2650, 0.0790, 0.5009]) -Greedy action tensor([ 0.9325, -0.3936, 0.3446, -0.2304]) tensor([0.4687, 0.1244, 0.2604, 0.1465]) -Greedy action tensor([ 0.5916, -0.6144, 1.3933, 1.6748]) tensor([0.1542, 0.0462, 0.3439, 0.4557]) -Greedy action tensor([ 0.0755, -1.2100, 1.8630, 0.2128]) tensor([0.1191, 0.0329, 0.7114, 0.1366]) -Greedy action tensor([ 0.4677, -0.8624, 0.6537, 0.3004]) tensor([0.3017, 0.0798, 0.3633, 0.2552]) -Greedy action tensor([0.2188, 0.4900, 0.5766, 1.1075]) tensor([0.1620, 0.2124, 0.2317, 0.3939]) -Greedy action tensor([ 0.0220, -1.0113, -0.5783, 0.7693]) tensor([0.2490, 0.0886, 0.1366, 0.5258]) -Greedy action tensor([ 0.7845, -2.0476, 0.0147, 1.0401]) tensor([0.3555, 0.0209, 0.1646, 0.4590]) -Greedy action tensor([-0.5323, -0.5246, -0.1309, 0.9880]) tensor([0.1238, 0.1248, 0.1850, 0.5664]) -Greedy action tensor([-0.2455, -2.1855, 0.7213, -0.3395]) tensor([0.2135, 0.0307, 0.5614, 0.1944]) -Greedy action tensor([ 1.1183, -0.6451, 0.5830, -0.0611]) tensor([0.4844, 0.0831, 0.2836, 0.1489]) -Greedy action tensor([ 0.6566, -0.3440, -0.0387, 0.1138]) tensor([0.4085, 0.1502, 0.2038, 0.2374]) -Greedy action tensor([ 1.0329, -0.1534, -0.3377, 0.0872]) tensor([0.5134, 0.1568, 0.1304, 0.1994]) -Greedy action tensor([ 1.6404, -0.7185, -0.3355, 0.4328]) tensor([0.6527, 0.0617, 0.0905, 0.1951]) -Greedy action tensor([ 0.6125, -0.3626, -0.1709, 0.1705]) tensor([0.4038, 0.1523, 0.1845, 0.2595]) -Greedy action tensor([ 1.3762, -0.4139, -0.4471, 0.1312]) tensor([0.6187, 0.1033, 0.0999, 0.1781]) -Greedy action tensor([ 1.5388, -0.2880, -0.4371, 0.0552]) tensor([0.6551, 0.1054, 0.0908, 0.1486]) -Greedy action tensor([ 0.7269, -0.5412, -0.4873, 0.4850]) tensor([0.4231, 0.1191, 0.1256, 0.3322]) -Greedy action tensor([ 1.0576, -0.5329, -0.2033, 0.5071]) tensor([0.4845, 0.0988, 0.1373, 0.2794]) -Greedy action tensor([ 0.8991, -0.2519, 0.0984, 0.1931]) tensor([0.4427, 0.1400, 0.1988, 0.2185]) -Greedy action tensor([ 1.5100, -0.3977, -0.1334, 0.4372]) tensor([0.5939, 0.0881, 0.1148, 0.2031]) -Greedy action tensor([ 1.5389, -0.7523, -0.2010, -0.1811]) tensor([0.6869, 0.0695, 0.1206, 0.1230]) -Greedy action tensor([ 1.5489, -0.6544, -0.4928, 0.1511]) tensor([0.6723, 0.0742, 0.0873, 0.1661]) -Greedy action tensor([ 1.4978, -0.4114, -0.5039, -0.1136]) tensor([0.6744, 0.0999, 0.0911, 0.1346]) -Greedy action tensor([ 1.1969, -0.3261, -0.2771, 0.6102]) tensor([0.4992, 0.1089, 0.1143, 0.2776]) -Greedy action tensor([ 1.8248, -1.2091, 0.0747, 0.2973]) tensor([0.6950, 0.0334, 0.1208, 0.1509]) -Greedy action tensor([ 1.6914, -0.5997, -0.3118, 0.5324]) tensor([0.6452, 0.0653, 0.0870, 0.2025]) -Greedy action tensor([ 1.3393, -0.8544, -0.0314, 0.1453]) tensor([0.5994, 0.0668, 0.1522, 0.1816]) -Greedy action tensor([ 0.9014, -0.4532, -0.0248, 0.1663]) tensor([0.4687, 0.1210, 0.1856, 0.2247]) -Greedy action tensor([ 0.8721, -0.5047, -0.2560, 0.2226]) tensor([0.4766, 0.1203, 0.1542, 0.2489]) -Greedy action tensor([ 0.3858, -0.4245, 0.0854, -0.1063]) tensor([0.3576, 0.1590, 0.2648, 0.2186]) -Greedy action tensor([ 1.2080, -0.2788, -0.3007, 0.2885]) tensor([0.5417, 0.1225, 0.1198, 0.2160]) -Greedy action tensor([ 1.1149, -0.3129, -0.1661, 0.4077]) tensor([0.4974, 0.1193, 0.1381, 0.2452]) -Greedy action tensor([ 1.1832, -0.0954, -0.2111, 0.0242]) tensor([0.5434, 0.1513, 0.1348, 0.1705]) -Greedy action tensor([ 0.6457, -0.7358, -0.2766, 0.2468]) tensor([0.4311, 0.1083, 0.1714, 0.2893]) -Greedy action tensor([ 1.4033, -0.7118, -0.2935, 0.4414]) tensor([0.5931, 0.0715, 0.1087, 0.2267]) -Greedy action tensor([ 1.1282, -0.3983, -0.2548, -0.1627]) tensor([0.5737, 0.1247, 0.1439, 0.1578]) -Greedy action tensor([ 1.7373, -0.6061, -0.3998, 0.0784]) tensor([0.7121, 0.0684, 0.0840, 0.1355]) -Greedy action tensor([ 1.0542, -0.6136, -0.1597, -0.0971]) tensor([0.5550, 0.1047, 0.1648, 0.1755]) -Greedy action tensor([ 1.6488, -0.7427, -0.1068, 0.3955]) tensor([0.6452, 0.0590, 0.1115, 0.1842]) -Greedy action tensor([ 0.8797, -0.6152, -0.0659, 0.0936]) tensor([0.4835, 0.1084, 0.1878, 0.2203]) -Greedy action tensor([ 1.4937, -0.2883, -0.2928, -0.0423]) tensor([0.6447, 0.1085, 0.1080, 0.1388]) -Greedy action tensor([ 0.8713, -0.5024, -0.1384, 0.3758]) tensor([0.4491, 0.1137, 0.1636, 0.2736]) -Greedy action tensor([ 0.3881, -0.3502, -0.1622, 0.0504]) tensor([0.3613, 0.1727, 0.2084, 0.2577]) -Greedy action tensor([ 0.8353, -0.2956, -0.0263, 0.1468]) tensor([0.4449, 0.1436, 0.1880, 0.2235]) -Greedy action tensor([ 1.0935, -0.6511, -0.0355, 0.1971]) tensor([0.5246, 0.0917, 0.1696, 0.2141]) -Greedy action tensor([ 1.0893, -0.2136, -0.3687, 0.2846]) tensor([0.5124, 0.1392, 0.1192, 0.2291]) -Greedy action tensor([ 1.4797, -0.5960, -0.5812, 0.4993]) tensor([0.6143, 0.0771, 0.0782, 0.2305]) -Greedy action tensor([ 1.5004, -0.1976, -0.2938, -0.1336]) tensor([0.6475, 0.1185, 0.1076, 0.1264]) -Greedy action tensor([ 1.5423, -0.0516, -0.2778, 0.0538]) tensor([0.6286, 0.1277, 0.1018, 0.1419]) -Greedy action tensor([ 1.2408, -0.3084, -0.4943, 0.4880]) tensor([0.5377, 0.1142, 0.0948, 0.2533]) -Greedy action tensor([ 1.3006, -0.0386, -0.4576, 0.2330]) tensor([0.5624, 0.1474, 0.0969, 0.1934]) -Greedy action tensor([ 1.2208, -0.5001, -0.4636, 0.4333]) tensor([0.5496, 0.0983, 0.1020, 0.2501]) -Greedy action tensor([ 1.2817, -0.6962, 0.0143, -0.1933]) tensor([0.6065, 0.0839, 0.1708, 0.1388]) -Greedy action tensor([ 1.1627, -0.4520, -0.1335, 0.3943]) tensor([0.5165, 0.1027, 0.1413, 0.2395]) -Greedy action tensor([ 2.0107, -0.6926, -0.3484, 0.2675]) tensor([0.7483, 0.0501, 0.0707, 0.1309]) -Greedy action tensor([ 0.6135, -0.1102, -0.0530, 0.1053]) tensor([0.3846, 0.1865, 0.1975, 0.2314]) -Greedy action tensor([ 1.5631, -0.6501, 0.1191, 0.3798]) tensor([0.6055, 0.0662, 0.1429, 0.1854]) -Greedy action tensor([ 1.4479, -0.6200, 0.0458, 0.3340]) tensor([0.5879, 0.0744, 0.1447, 0.1930]) -Greedy action tensor([ 1.5399, -0.5734, -0.2098, 0.1883]) tensor([0.6437, 0.0778, 0.1119, 0.1666]) -Greedy action tensor([ 0.8262, -0.3588, -0.0464, -0.2638]) tensor([0.4855, 0.1484, 0.2029, 0.1632]) -Greedy action tensor([ 1.3232, -0.4700, -0.0531, 0.0946]) tensor([0.5843, 0.0972, 0.1475, 0.1710]) -Greedy action tensor([ 0.5440, -0.2223, -0.0705, -0.0244]) tensor([0.3888, 0.1807, 0.2103, 0.2202]) -Greedy action tensor([ 1.2996, -0.5199, 0.5762, 0.1115]) tensor([0.5123, 0.0831, 0.2485, 0.1561]) -Greedy action tensor([ 0.9373, -0.2652, -0.2373, 0.3151]) tensor([0.4660, 0.1400, 0.1440, 0.2501]) -Greedy action tensor([ 1.2567, -0.4930, -0.4160, 0.1636]) tensor([0.5894, 0.1025, 0.1106, 0.1975]) -Greedy action tensor([ 1.7043, -0.2016, -0.3476, 0.2987]) tensor([0.6569, 0.0977, 0.0844, 0.1611]) -Greedy action tensor([ 0.6285, -0.2118, -0.0288, 0.0406]) tensor([0.3992, 0.1723, 0.2069, 0.2217]) -Greedy action tensor([ 0.9252, -0.4894, -0.4142, 0.1165]) tensor([0.5127, 0.1246, 0.1343, 0.2284]) -Greedy action tensor([ 1.2589, -0.8301, -0.3260, 0.0679]) tensor([0.6125, 0.0758, 0.1255, 0.1861]) -Greedy action tensor([ 1.9115, -0.3968, -0.3494, 0.1533]) tensor([0.7267, 0.0723, 0.0758, 0.1253]) -Greedy action tensor([ 0.6826, -0.4042, -0.1179, 0.2899]) tensor([0.4062, 0.1370, 0.1824, 0.2743]) -Greedy action tensor([ 1.0699, -0.6578, -0.3464, 0.4010]) tensor([0.5174, 0.0919, 0.1255, 0.2651]) -Greedy action tensor([ 1.4046, -0.6313, -0.2775, 0.0987]) tensor([0.6299, 0.0822, 0.1172, 0.1707]) -Greedy action tensor([ 1.7523, -0.5007, -0.3058, 0.3165]) tensor([0.6800, 0.0714, 0.0868, 0.1618]) -Greedy action tensor([ 0.8970, -0.5665, 0.0148, 0.1361]) tensor([0.4734, 0.1095, 0.1959, 0.2212]) -Greedy action tensor([ 1.4820, -0.6037, -0.3502, 0.5073]) tensor([0.6018, 0.0748, 0.0963, 0.2271]) -Greedy action tensor([ 1.0735, -0.6905, -0.2706, 0.6259]) tensor([0.4828, 0.0827, 0.1259, 0.3086]) -Greedy action tensor([ 0.5990, -0.1323, 0.3959, -0.1254]) tensor([0.3594, 0.1730, 0.2934, 0.1742]) -Greedy action tensor([ 1.1613, -0.5019, -0.2476, 0.3029]) tensor([0.5383, 0.1020, 0.1316, 0.2282]) -Greedy action tensor([ 1.4163, 0.0611, 0.1487, -0.1702]) tensor([0.5734, 0.1479, 0.1614, 0.1173]) -Greedy action tensor([ 2.0974, -1.0859, -0.2831, 0.4032]) tensor([0.7589, 0.0315, 0.0702, 0.1394]) -Greedy action tensor([ 0.6603, -0.3679, -0.0118, -0.2911]) tensor([0.4436, 0.1586, 0.2265, 0.1713]) -Greedy action tensor([ 0.6871, -0.2848, -0.5340, 0.7121]) tensor([0.3706, 0.1402, 0.1093, 0.3799]) -Greedy action tensor([ 2.1404, -1.1293, -0.4105, 0.4911]) tensor([0.7644, 0.0291, 0.0596, 0.1469]) -Greedy action tensor([ 1.2188, -0.5374, -0.4398, 0.3702]) tensor([0.5583, 0.0964, 0.1063, 0.2390]) -Greedy action tensor([ 0.3824, -0.2075, 0.0866, 0.1734]) tensor([0.3216, 0.1783, 0.2392, 0.2609]) -Greedy action tensor([ 1.3420, -0.6108, -0.5767, -0.1796]) tensor([0.6635, 0.0941, 0.0974, 0.1449]) -Greedy action tensor([ 1.6474, -0.3765, -0.3990, 0.3771]) tensor([0.6485, 0.0857, 0.0838, 0.1821]) -Greedy action tensor([ 2.2120, -1.3297, -0.3815, 0.7578]) tensor([0.7478, 0.0217, 0.0559, 0.1747]) -Greedy action tensor([ 1.6657, -0.6627, -0.0303, 0.1176]) tensor([0.6696, 0.0652, 0.1228, 0.1424]) -Greedy action tensor([ 1.7426, -0.4038, -0.4151, 0.3560]) tensor([0.6746, 0.0789, 0.0780, 0.1686]) -Greedy action tensor([-0.2613, -0.4184, 0.1776, 0.2512]) tensor([0.1970, 0.1684, 0.3056, 0.3290]) -Greedy action tensor([-1.5402, -0.4972, 0.6505, -0.3854]) tensor([0.0627, 0.1779, 0.5605, 0.1989]) -Greedy action tensor([-1.0034, -0.4342, 1.0244, 1.3917]) tensor([0.0469, 0.0828, 0.3561, 0.5142]) -Greedy action tensor([-1.1179, -0.5828, 0.2537, 0.2707]) tensor([0.0938, 0.1602, 0.3698, 0.3761]) -Greedy action tensor([-0.9496, -0.5410, 0.2108, 0.2908]) tensor([0.1093, 0.1644, 0.3486, 0.3777]) -Greedy action tensor([-1.4637, -0.7009, 0.4814, 0.1505]) tensor([0.0660, 0.1414, 0.4613, 0.3313]) -Greedy action tensor([-1.9025, -0.4403, 0.6363, -0.1519]) tensor([0.0421, 0.1818, 0.5335, 0.2426]) -Greedy action tensor([-0.6356, -0.2401, 0.2396, 0.4734]) tensor([0.1263, 0.1876, 0.3031, 0.3829]) -Greedy action tensor([-1.2413, -0.8974, 0.8899, 1.0969]) tensor([0.0472, 0.0665, 0.3974, 0.4889]) -Greedy action tensor([-1.4751, -0.5179, 0.4844, 0.3981]) tensor([0.0581, 0.1513, 0.4123, 0.3782]) -Greedy action tensor([-1.7841, -0.4205, 0.6306, -0.0253]) tensor([0.0457, 0.1785, 0.5108, 0.2651]) -Greedy action tensor([-1.3676, -0.4559, 0.4586, 0.4668]) tensor([0.0627, 0.1559, 0.3891, 0.3923]) -Greedy action tensor([-1.7672, -0.5170, 0.5582, -0.0737]) tensor([0.0496, 0.1732, 0.5075, 0.2698]) -Greedy action tensor([-1.7257, -0.4953, 0.5397, -0.0221]) tensor([0.0511, 0.1750, 0.4928, 0.2810]) -Greedy action tensor([-1.9007, 0.0462, 0.6225, -0.0193]) tensor([0.0370, 0.2591, 0.4611, 0.2427]) -Greedy action tensor([-1.4882, -0.3809, 1.0622, 1.0454]) tensor([0.0340, 0.1028, 0.4352, 0.4280]) -Greedy action tensor([-1.4083, -0.4430, 0.4726, 0.3283]) tensor([0.0630, 0.1655, 0.4135, 0.3579]) -Greedy action tensor([-1.2468, -0.5631, 0.4132, -0.0417]) tensor([0.0864, 0.1711, 0.4543, 0.2882]) -Greedy action tensor([-1.9311, -0.4583, 0.6623, -0.1678]) tensor([0.0407, 0.1775, 0.5444, 0.2374]) -Greedy action tensor([-1.9207, -0.4503, 0.6524, -0.1685]) tensor([0.0413, 0.1796, 0.5410, 0.2381]) -Greedy action tensor([-1.6582, -0.2572, 0.5040, 0.0923]) tensor([0.0513, 0.2081, 0.4455, 0.2951]) -Greedy action tensor([-1.8492, -0.3889, 0.6052, -0.1166]) tensor([0.0442, 0.1906, 0.5150, 0.2502]) -Greedy action tensor([-0.7162, -0.4426, 0.5313, 0.6653]) tensor([0.1023, 0.1345, 0.3561, 0.4072]) -Greedy action tensor([-1.1239, -0.4040, 0.2333, 0.5283]) tensor([0.0823, 0.1690, 0.3196, 0.4292]) -Greedy action tensor([-0.8423, -0.4156, 0.5206, 1.2872]) tensor([0.0673, 0.1032, 0.2631, 0.5664]) -Greedy action tensor([-0.8531, -0.3254, 1.1871, 1.4687]) tensor([0.0486, 0.0824, 0.3737, 0.4953]) -Greedy action tensor([-1.2375, -0.5858, 0.3581, 0.3435]) tensor([0.0787, 0.1510, 0.3880, 0.3824]) -Greedy action tensor([-1.8371, -0.3485, 0.6730, -0.2921]) tensor([0.0446, 0.1976, 0.5488, 0.2090]) -Greedy action tensor([0.5652, 1.2996, 0.0500, 0.6198]) tensor([0.2111, 0.4399, 0.1261, 0.2229]) -Greedy action tensor([-1.3888, -0.3967, 0.5446, 0.4325]) tensor([0.0596, 0.1606, 0.4117, 0.3681]) -Greedy action tensor([-1.8037, -0.4823, 0.5992, -0.0589]) tensor([0.0464, 0.1741, 0.5135, 0.2659]) -Greedy action tensor([-1.9139, -0.4560, 0.6484, -0.1640]) tensor([0.0416, 0.1789, 0.5399, 0.2396]) -Greedy action tensor([-1.2711, -0.5378, 0.3003, 0.2927]) tensor([0.0789, 0.1643, 0.3798, 0.3770]) -Greedy action tensor([-1.4683, -0.5600, 1.1317, 1.0546]) tensor([0.0340, 0.0843, 0.4578, 0.4238]) -Greedy action tensor([-1.8053, -0.4799, 0.8448, 0.2295]) tensor([0.0376, 0.1417, 0.5328, 0.2879]) -Greedy action tensor([-1.0266, -0.4479, 0.7092, 1.0390]) tensor([0.0612, 0.1091, 0.3471, 0.4827]) -Greedy action tensor([-1.4938, 1.0827, 0.6126, 0.3007]) tensor([0.0352, 0.4633, 0.2895, 0.2120]) -Greedy action tensor([-1.8848, -0.4265, 0.6674, -0.1280]) tensor([0.0418, 0.1797, 0.5364, 0.2421]) -Greedy action tensor([-0.8845, -0.5805, 0.4062, 0.9268]) tensor([0.0826, 0.1119, 0.3002, 0.5053]) -Greedy action tensor([-1.8149, -0.4801, 0.6689, -0.0294]) tensor([0.0440, 0.1670, 0.5269, 0.2621]) -Greedy action tensor([-1.1904, -0.6079, 0.2618, 0.2783]) tensor([0.0877, 0.1570, 0.3746, 0.3808]) -Greedy action tensor([-0.9128, -0.4237, 1.1482, 1.3962]) tensor([0.0487, 0.0794, 0.3822, 0.4898]) -Greedy action tensor([-1.5557, -0.5377, 0.7585, 0.3894]) tensor([0.0479, 0.1326, 0.4846, 0.3350]) -Greedy action tensor([-1.8569, -0.4520, 0.6130, -0.1319]) tensor([0.0444, 0.1811, 0.5252, 0.2493]) -Greedy action tensor([-1.9157, -0.4407, 0.6492, -0.1620]) tensor([0.0414, 0.1810, 0.5384, 0.2392]) -Greedy action tensor([-0.1996, -0.5867, 0.4266, 0.2840]) tensor([0.1934, 0.1313, 0.3617, 0.3136]) -Greedy action tensor([-1.6594, -0.6501, 0.1394, -0.3503]) tensor([0.0741, 0.2034, 0.4479, 0.2745]) -Greedy action tensor([-1.6414, -0.4719, 1.2920, 1.0134]) tensor([0.0269, 0.0865, 0.5047, 0.3820]) -Greedy action tensor([-1.7212, -0.4690, 0.5558, -0.0623]) tensor([0.0513, 0.1794, 0.4999, 0.2694]) -Greedy action tensor([-1.7621, -0.5323, 0.7178, 0.1767]) tensor([0.0429, 0.1467, 0.5122, 0.2982]) -Greedy action tensor([-1.2736, 0.5024, 0.3571, -0.6183]) tensor([0.0717, 0.4237, 0.3664, 0.1382]) -Greedy action tensor([-1.5945, -0.5587, 0.5687, 0.0639]) tensor([0.0563, 0.1586, 0.4896, 0.2955]) -Greedy action tensor([-1.6111, -0.5216, 0.4941, -0.0391]) tensor([0.0588, 0.1749, 0.4829, 0.2833]) -Greedy action tensor([-1.8883, -0.3951, 0.6305, -0.1469]) tensor([0.0424, 0.1889, 0.5267, 0.2421]) -Greedy action tensor([-0.6344, -0.5412, 0.1865, 0.2081]) tensor([0.1494, 0.1640, 0.3396, 0.3470]) -Greedy action tensor([-1.7569, -0.3999, 0.5718, -0.0284]) tensor([0.0481, 0.1869, 0.4939, 0.2710]) -Greedy action tensor([-0.9392, -0.5621, 0.2206, 0.5602]) tensor([0.0988, 0.1440, 0.3149, 0.4423]) -Greedy action tensor([-1.8387, -0.8975, -0.0886, -0.5154]) tensor([0.0765, 0.1960, 0.4402, 0.2873]) -Greedy action tensor([-0.7404, -0.4921, 1.1739, 1.5864]) tensor([0.0518, 0.0664, 0.3512, 0.5306]) -Greedy action tensor([-1.9052, -0.6794, 0.5948, -0.0085]) tensor([0.0430, 0.1465, 0.5239, 0.2866]) -Greedy action tensor([-1.6978, -0.5267, 0.5917, 0.0127]) tensor([0.0509, 0.1643, 0.5029, 0.2818]) -Greedy action tensor([-1.2158, -0.7072, 1.3055, 1.3946]) tensor([0.0348, 0.0579, 0.4334, 0.4738]) -Greedy action tensor([-1.7602, -0.3159, 0.6670, -0.3220]) tensor([0.0481, 0.2040, 0.5451, 0.2028]) -Greedy action tensor([-1.8230, -0.2584, 0.5773, -0.0965]) tensor([0.0446, 0.2132, 0.4916, 0.2506]) -Greedy action tensor([-1.3938, -0.4762, 0.5132, -0.1747]) tensor([0.0734, 0.1838, 0.4943, 0.2485]) -Greedy action tensor([-1.4717, -0.5499, 0.3973, 0.1925]) tensor([0.0655, 0.1646, 0.4243, 0.3457]) -Greedy action tensor([-1.5082, -0.5828, 0.4531, 0.0399]) tensor([0.0652, 0.1645, 0.4636, 0.3067]) -Greedy action tensor([-1.8516, -0.4659, 0.6233, -0.1291]) tensor([0.0445, 0.1779, 0.5286, 0.2491]) -Greedy action tensor([-1.5417, -0.4778, 0.7660, -0.6127]) tensor([0.0607, 0.1758, 0.6099, 0.1536]) -Greedy action tensor([-1.1477, -0.6239, 0.2780, 0.4670]) tensor([0.0842, 0.1422, 0.3504, 0.4233]) -Greedy action tensor([-1.2197, -0.3838, 0.6749, 0.8300]) tensor([0.0564, 0.1302, 0.3752, 0.4382]) -Greedy action tensor([-0.3379, -0.1373, 1.0823, 1.5250]) tensor([0.0781, 0.0955, 0.3232, 0.5032]) -Greedy action tensor([-1.7027, -0.4913, 0.5583, 0.0343]) tensor([0.0509, 0.1711, 0.4886, 0.2894]) -Greedy action tensor([-1.9614, -0.6970, 0.6884, -0.1741]) tensor([0.0405, 0.1436, 0.5737, 0.2422]) -Greedy action tensor([-0.8666, -0.5769, 0.1507, 0.3334]) tensor([0.1187, 0.1586, 0.3284, 0.3942]) -Greedy action tensor([-1.6158, -0.0653, 0.5275, 0.1410]) tensor([0.0499, 0.2353, 0.4256, 0.2892]) -Greedy action tensor([-1.6320, -0.2173, 0.5174, 0.0868]) tensor([0.0519, 0.2135, 0.4452, 0.2894]) -Greedy action tensor([-1.8742, -0.4591, 0.6286, -0.1290]) tensor([0.0434, 0.1785, 0.5298, 0.2484]) -Greedy action tensor([-1.3415, -0.5035, 0.3580, 0.1159]) tensor([0.0765, 0.1768, 0.4184, 0.3284]) -Greedy action tensor([-1.8104, -0.4865, 0.6093, -0.1218]) tensor([0.0467, 0.1755, 0.5251, 0.2527]) -Greedy action tensor([-1.0297, -0.4932, 0.4201, 0.9673]) tensor([0.0697, 0.1192, 0.2972, 0.5138]) -Greedy action tensor([ 0.6509, -0.3444, -0.0892, -0.2530]) tensor([0.4441, 0.1642, 0.2119, 0.1799]) -Greedy action tensor([ 0.3792, -0.1868, -0.1538, -0.4101]) tensor([0.3833, 0.2176, 0.2249, 0.1741]) -Greedy action tensor([ 0.3474, 0.1057, 0.0555, -0.4257]) tensor([0.3340, 0.2623, 0.2495, 0.1542]) -Greedy action tensor([ 6.5386e-01, -3.0781e-01, 1.0459e-04, -2.8321e-01]) tensor([0.4359, 0.1666, 0.2267, 0.1708]) -Greedy action tensor([ 0.5699, -0.2543, -0.0293, -0.3425]) tensor([0.4185, 0.1835, 0.2299, 0.1681]) -Greedy action tensor([ 0.3003, -0.0575, 0.0485, -0.2292]) tensor([0.3262, 0.2281, 0.2536, 0.1921]) -Greedy action tensor([ 0.8288, -0.5319, -0.0400, -0.3003]) tensor([0.5002, 0.1283, 0.2098, 0.1617]) -Greedy action tensor([ 0.6704, -0.8744, 0.0411, -0.4725]) tensor([0.4842, 0.1033, 0.2581, 0.1544]) -Greedy action tensor([ 0.6899, -0.4387, -0.0988, -0.4025]) tensor([0.4732, 0.1531, 0.2150, 0.1587]) -Greedy action tensor([ 0.4760, 0.1032, -0.0497, -0.3070]) tensor([0.3654, 0.2517, 0.2160, 0.1670]) -Greedy action tensor([ 1.0923, -0.8459, -0.1473, -0.6160]) tensor([0.6193, 0.0892, 0.1793, 0.1122]) -Greedy action tensor([ 0.9199, -0.7170, 0.0974, -0.5513]) tensor([0.5366, 0.1044, 0.2358, 0.1232]) -Greedy action tensor([ 0.7766, -0.3606, -0.0656, -0.4906]) tensor([0.4919, 0.1577, 0.2119, 0.1385]) -Greedy action tensor([ 0.5832, -0.0340, 0.0510, -0.1994]) tensor([0.3870, 0.2088, 0.2273, 0.1769]) -Greedy action tensor([ 0.5576, -0.3399, 0.0946, -0.3020]) tensor([0.4065, 0.1657, 0.2558, 0.1721]) -Greedy action tensor([ 0.4776, -0.0361, -0.0795, -0.0286]) tensor([0.3605, 0.2157, 0.2065, 0.2173]) -Greedy action tensor([ 0.5223, -0.1695, 0.0488, -0.2303]) tensor([0.3854, 0.1930, 0.2400, 0.1816]) -Greedy action tensor([ 0.8038, -0.8037, 0.1672, -0.3724]) tensor([0.4907, 0.0983, 0.2596, 0.1513]) -Greedy action tensor([ 0.2377, 0.0343, -0.0160, 0.0245]) tensor([0.2941, 0.2400, 0.2282, 0.2376]) -Greedy action tensor([ 0.7247, -0.5121, 0.0165, -0.4390]) tensor([0.4773, 0.1386, 0.2351, 0.1491]) -Greedy action tensor([ 0.1814, -0.2277, -0.1021, -0.2041]) tensor([0.3229, 0.2144, 0.2431, 0.2196]) -Greedy action tensor([ 0.3518, -0.0666, -0.2255, -0.4292]) tensor([0.3735, 0.2458, 0.2097, 0.1710]) -Greedy action tensor([ 0.5405, -0.6234, -0.0243, -0.2488]) tensor([0.4283, 0.1337, 0.2435, 0.1945]) -Greedy action tensor([ 0.6843, -0.3577, 0.0354, -0.6090]) tensor([0.4652, 0.1641, 0.2431, 0.1276]) -Greedy action tensor([ 0.5774, -0.0746, -0.0764, -0.1561]) tensor([0.3966, 0.2066, 0.2063, 0.1905]) -Greedy action tensor([ 0.6070, -0.1399, -0.0743, -0.2254]) tensor([0.4141, 0.1962, 0.2095, 0.1801]) -Greedy action tensor([ 0.5953, 0.0410, -0.0881, -0.2445]) tensor([0.3982, 0.2288, 0.2011, 0.1719]) -Greedy action tensor([ 0.9558, -0.3747, 0.1411, -0.3278]) tensor([0.5040, 0.1332, 0.2232, 0.1396]) -Greedy action tensor([ 0.5970, 0.1287, 0.0924, -0.0851]) tensor([0.3656, 0.2289, 0.2207, 0.1848]) -Greedy action tensor([ 0.4258, -0.0786, -0.1558, -0.1440]) tensor([0.3665, 0.2213, 0.2049, 0.2073]) -Greedy action tensor([ 0.3627, 0.0186, -0.0230, -0.1289]) tensor([0.3333, 0.2362, 0.2266, 0.2039]) -Greedy action tensor([ 0.1526, -0.0305, -0.0330, -0.3982]) tensor([0.3087, 0.2570, 0.2564, 0.1779]) -Greedy action tensor([ 0.9924, -0.9292, 0.0313, -0.5650]) tensor([0.5749, 0.0841, 0.2199, 0.1211]) -Greedy action tensor([ 0.3819, -0.1059, -0.0656, -0.0868]) tensor([0.3473, 0.2133, 0.2220, 0.2174]) -Greedy action tensor([ 0.9874, -0.5157, -0.1286, -0.3694]) tensor([0.5532, 0.1231, 0.1812, 0.1425]) -Greedy action tensor([ 0.9574, -0.3181, -0.1189, -0.3427]) tensor([0.5284, 0.1476, 0.1801, 0.1440]) -Greedy action tensor([ 0.7156, -0.0553, 0.1010, -0.5781]) tensor([0.4390, 0.2031, 0.2375, 0.1204]) -Greedy action tensor([ 1.2932, -1.0805, 0.1684, -0.6277]) tensor([0.6393, 0.0595, 0.2076, 0.0936]) -Greedy action tensor([ 0.2699, 0.0565, -0.1068, -0.1002]) tensor([0.3140, 0.2537, 0.2155, 0.2169]) -Greedy action tensor([ 0.8544, -0.0777, -0.0516, 0.1304]) tensor([0.4381, 0.1725, 0.1770, 0.2124]) -Greedy action tensor([ 0.8699, -0.9201, -0.0444, -0.4450]) tensor([0.5446, 0.0909, 0.2183, 0.1462]) -Greedy action tensor([ 0.5720, -0.3132, -0.1470, -0.3515]) tensor([0.4354, 0.1796, 0.2121, 0.1729]) -Greedy action tensor([ 0.4226, 0.0889, 0.0322, -0.3668]) tensor([0.3512, 0.2516, 0.2377, 0.1595]) -Greedy action tensor([ 0.5097, -0.4185, -0.0478, -0.4636]) tensor([0.4263, 0.1685, 0.2441, 0.1611]) -Greedy action tensor([ 0.9224, -0.6324, -0.3080, -0.6299]) tensor([0.5830, 0.1232, 0.1704, 0.1235]) -Greedy action tensor([ 0.6162, -0.3581, 0.1036, -0.4626]) tensor([0.4317, 0.1629, 0.2586, 0.1468]) -Greedy action tensor([ 1.0708, -0.3545, 0.0050, -0.2974]) tensor([0.5436, 0.1307, 0.1873, 0.1384]) -Greedy action tensor([ 0.8622, -0.3225, -0.0845, -0.1827]) tensor([0.4889, 0.1495, 0.1897, 0.1720]) -Greedy action tensor([ 0.6324, -0.0974, 0.0197, -0.1975]) tensor([0.4065, 0.1959, 0.2203, 0.1773]) -Greedy action tensor([ 0.6869, -0.3905, 0.0468, -0.2755]) tensor([0.4445, 0.1513, 0.2344, 0.1698]) -Greedy action tensor([ 0.8335, -0.5217, -0.0927, -0.5772]) tensor([0.5269, 0.1359, 0.2087, 0.1285]) -Greedy action tensor([ 0.3775, 0.0874, 0.0542, -0.2163]) tensor([0.3307, 0.2474, 0.2393, 0.1826]) -Greedy action tensor([ 0.3910, -0.0524, 0.1035, -0.2134]) tensor([0.3403, 0.2184, 0.2553, 0.1859]) -Greedy action tensor([ 0.5348, -0.5875, -0.0739, -0.1181]) tensor([0.4184, 0.1362, 0.2276, 0.2178]) -Greedy action tensor([ 0.9027, -0.2323, -0.1623, -0.0760]) tensor([0.4897, 0.1574, 0.1688, 0.1840]) -Greedy action tensor([ 0.4894, -0.5025, 0.0041, -0.6319]) tensor([0.4325, 0.1604, 0.2662, 0.1409]) -Greedy action tensor([ 0.8889, -0.5725, -0.1444, -0.5639]) tensor([0.5490, 0.1273, 0.1953, 0.1284]) -Greedy action tensor([ 0.3966, -0.2121, -0.0723, -0.1167]) tensor([0.3612, 0.1965, 0.2260, 0.2162]) -Greedy action tensor([ 0.7486, -0.3950, 0.0847, -0.3699]) tensor([0.4629, 0.1475, 0.2383, 0.1513]) -Greedy action tensor([ 0.1169, -0.0395, -0.1039, -0.4188]) tensor([0.3084, 0.2638, 0.2473, 0.1805]) -Greedy action tensor([ 0.7092, -0.4340, -0.2918, -0.3653]) tensor([0.4931, 0.1572, 0.1812, 0.1684]) -Greedy action tensor([ 0.6651, -0.3371, -0.0092, -0.3431]) tensor([0.4461, 0.1638, 0.2273, 0.1628]) -Greedy action tensor([ 1.0476, -0.6748, -0.2691, -0.5834]) tensor([0.6089, 0.1088, 0.1632, 0.1192]) -Greedy action tensor([ 1.3145, -0.3034, 0.0514, -0.5443]) tensor([0.6109, 0.1212, 0.1727, 0.0952]) -Greedy action tensor([ 0.6073, -0.2321, 0.0301, -0.3871]) tensor([0.4231, 0.1828, 0.2376, 0.1565]) -Greedy action tensor([ 0.8252, -0.6805, 0.0960, -0.5657]) tensor([0.5120, 0.1136, 0.2469, 0.1274]) -Greedy action tensor([ 0.4229, 0.1269, -0.0884, -0.2280]) tensor([0.3490, 0.2596, 0.2093, 0.1820]) -Greedy action tensor([ 0.8321, -0.8042, 0.1413, -0.5750]) tensor([0.5153, 0.1003, 0.2582, 0.1262]) -Greedy action tensor([ 0.4934, -0.0576, -0.1810, -0.4609]) tensor([0.4047, 0.2333, 0.2062, 0.1558]) -Greedy action tensor([ 0.4419, 0.0894, -0.0350, -0.2366]) tensor([0.3533, 0.2483, 0.2192, 0.1792]) -Greedy action tensor([ 0.5003, -0.0498, 0.0754, -0.1970]) tensor([0.3665, 0.2114, 0.2396, 0.1825]) -Greedy action tensor([ 0.7709, -0.5617, -0.2565, -0.4433]) tensor([0.5212, 0.1375, 0.1865, 0.1548]) -Greedy action tensor([ 0.5747, -0.2335, -0.2032, -0.2373]) tensor([0.4257, 0.1897, 0.1955, 0.1890]) -Greedy action tensor([ 0.3256, -0.0161, -0.0298, -0.3021]) tensor([0.3395, 0.2412, 0.2380, 0.1812]) -Greedy action tensor([ 0.5827, -0.4291, -0.0692, -0.4987]) tensor([0.4497, 0.1635, 0.2343, 0.1525]) -Greedy action tensor([ 0.3789, -0.0168, -0.0908, -0.0673]) tensor([0.3403, 0.2291, 0.2128, 0.2178]) -Greedy action tensor([ 0.7477, -0.3739, -0.0372, -0.4791]) tensor([0.4819, 0.1570, 0.2198, 0.1413]) -Greedy action tensor([ 0.7596, -0.4145, -0.0783, -0.5069]) tensor([0.4942, 0.1528, 0.2138, 0.1393]) -Greedy action tensor([ 0.3220, -0.1038, 0.0272, -0.5695]) tensor([0.3561, 0.2326, 0.2652, 0.1460]) -Greedy action tensor([ 0.5475, 0.0231, -0.0925, -0.1654]) tensor([0.3832, 0.2268, 0.2021, 0.1879]) -Greedy action tensor([ 0.6299, -0.1518, -0.1169, -0.1356]) tensor([0.4173, 0.1909, 0.1977, 0.1941]) -Greedy action tensor([ 0.7173, -0.8747, 0.1109, 1.2763]) tensor([0.2859, 0.0582, 0.1559, 0.5000]) -Greedy action tensor([-0.1061, -0.2057, 0.3638, 1.6655]) tensor([0.1066, 0.0964, 0.1705, 0.6265]) -Greedy action tensor([-0.0642, 0.1166, -1.2944, 1.1563]) tensor([0.1701, 0.2038, 0.0497, 0.5764]) -Greedy action tensor([ 0.0654, -0.8231, 0.4630, 0.8128]) tensor([0.1996, 0.0821, 0.2970, 0.4214]) -Greedy action tensor([-0.4315, -0.1551, -0.9926, 0.6133]) tensor([0.1745, 0.2300, 0.0995, 0.4960]) -Greedy action tensor([-0.5642, -1.3785, 1.0161, 0.3987]) tensor([0.1121, 0.0497, 0.5445, 0.2937]) -Greedy action tensor([-1.0624, -1.9635, -0.1745, 1.0331]) tensor([0.0836, 0.0339, 0.2031, 0.6794]) -Greedy action tensor([1.6580, 0.9602, 0.1830, 1.9590]) tensor([0.3249, 0.1617, 0.0743, 0.4391]) -Greedy action tensor([ 2.2030, -2.1998, 0.0190, 1.3695]) tensor([0.6413, 0.0079, 0.0722, 0.2787]) -Greedy action tensor([ 0.1427, -1.9160, 0.1039, 0.2332]) tensor([0.3141, 0.0401, 0.3021, 0.3438]) -Greedy action tensor([ 4.3642e-01, -1.1964e+00, 8.1159e-01, 8.7322e-04]) tensor([0.3033, 0.0593, 0.4413, 0.1962]) -Greedy action tensor([ 0.3982, -0.5589, -0.8088, 2.2462]) tensor([0.1245, 0.0478, 0.0372, 0.7904]) -Greedy action tensor([ 0.5542, -0.4752, -0.4438, 1.5443]) tensor([0.2264, 0.0809, 0.0834, 0.6093]) -Greedy action tensor([ 1.5419, -0.8840, 0.2085, 0.7165]) tensor([0.5587, 0.0494, 0.1472, 0.2447]) -Greedy action tensor([-0.4047, 0.8924, 1.1419, 0.4498]) tensor([0.0854, 0.3126, 0.4012, 0.2008]) -Greedy action tensor([ 0.5439, -1.7682, 1.7357, -0.6485]) tensor([0.2130, 0.0211, 0.7013, 0.0646]) -Greedy action tensor([ 0.0708, 0.2942, -0.1193, 1.3248]) tensor([0.1519, 0.1900, 0.1256, 0.5325]) -Greedy action tensor([1.2235, 0.2402, 0.6999, 0.7501]) tensor([0.3862, 0.1445, 0.2288, 0.2406]) -Greedy action tensor([ 0.0626, -1.7031, 0.2784, 0.9609]) tensor([0.2054, 0.0351, 0.2549, 0.5045]) -Greedy action tensor([-0.2440, -0.6516, -0.2870, 1.5189]) tensor([0.1183, 0.0787, 0.1133, 0.6896]) -Greedy action tensor([ 0.8606, -0.6057, 0.0220, 1.3726]) tensor([0.3001, 0.0693, 0.1298, 0.5008]) -Greedy action tensor([-0.1780, 0.5332, -0.7177, 0.3588]) tensor([0.1876, 0.3821, 0.1094, 0.3209]) -Greedy action tensor([1.2342, 0.6607, 0.4356, 0.7678]) tensor([0.3787, 0.2134, 0.1704, 0.2375]) -Greedy action tensor([ 0.4733, -1.8372, -0.5025, -0.8176]) tensor([0.5711, 0.0567, 0.2152, 0.1571]) -Greedy action tensor([ 1.6457, -0.5851, 0.8858, 1.9665]) tensor([0.3386, 0.0364, 0.1584, 0.4667]) -Greedy action tensor([ 0.9473, -0.2473, 0.4995, 0.2721]) tensor([0.4080, 0.1236, 0.2607, 0.2077]) -Greedy action tensor([ 1.3362, -1.7099, -0.7000, 0.4399]) tensor([0.6305, 0.0300, 0.0823, 0.2573]) -Greedy action tensor([ 0.0397, -0.3546, -0.4563, 0.8497]) tensor([0.2207, 0.1488, 0.1344, 0.4961]) -Greedy action tensor([ 0.5654, -0.5025, 0.6361, 0.4289]) tensor([0.3040, 0.1045, 0.3263, 0.2652]) -Greedy action tensor([ 0.8113, -0.0431, 0.6623, 0.5431]) tensor([0.3277, 0.1394, 0.2823, 0.2506]) -Greedy action tensor([ 0.5414, -1.3652, -0.2185, 0.2105]) tensor([0.4283, 0.0636, 0.2003, 0.3077]) -Greedy action tensor([-0.0610, 1.7221, 1.6260, -0.5681]) tensor([0.0772, 0.4592, 0.4171, 0.0465]) -Greedy action tensor([ 0.2574, -1.2624, 0.1847, 0.4198]) tensor([0.3008, 0.0658, 0.2797, 0.3538]) -Greedy action tensor([-0.2407, 0.8037, 0.6285, 0.0875]) tensor([0.1313, 0.3732, 0.3132, 0.1823]) -Greedy action tensor([ 0.5859, 0.1285, -0.2315, 1.9886]) tensor([0.1628, 0.1031, 0.0719, 0.6622]) -Greedy action tensor([ 0.5585, -1.2689, 1.2649, -0.7951]) tensor([0.2902, 0.0467, 0.5882, 0.0750]) -Greedy action tensor([-1.3527, 0.6919, 1.5085, -1.1081]) tensor([0.0364, 0.2811, 0.6361, 0.0465]) -Greedy action tensor([ 0.7817, -0.0285, 0.7768, 0.6915]) tensor([0.2982, 0.1326, 0.2967, 0.2725]) -Greedy action tensor([-0.4302, -0.2326, -0.1627, -0.9921]) tensor([0.2442, 0.2975, 0.3191, 0.1392]) -Greedy action tensor([-1.2705, -0.5091, -0.7715, -0.5282]) tensor([0.1452, 0.3108, 0.2391, 0.3049]) -Greedy action tensor([ 0.9255, -1.0516, 0.9064, 0.2524]) tensor([0.3803, 0.0527, 0.3731, 0.1940]) -Greedy action tensor([ 0.1647, -0.1160, 0.7050, -1.3948]) tensor([0.2716, 0.2051, 0.4662, 0.0571]) -Greedy action tensor([ 0.9057, -0.3805, 0.7688, 1.5645]) tensor([0.2450, 0.0677, 0.2137, 0.4735]) -Greedy action tensor([-0.1109, -1.6615, -0.0204, 0.7272]) tensor([0.2165, 0.0459, 0.2370, 0.5005]) -Greedy action tensor([ 1.3399, -0.7955, -0.9845, 1.7916]) tensor([0.3588, 0.0424, 0.0351, 0.5637]) -Greedy action tensor([-0.5274, 0.6809, 1.5266, -0.8031]) tensor([0.0775, 0.2594, 0.6043, 0.0588]) -Greedy action tensor([-0.3295, -0.7199, -0.4653, 1.9892]) tensor([0.0787, 0.0532, 0.0687, 0.7994]) -Greedy action tensor([-0.7113, -0.2803, -0.8189, 1.9072]) tensor([0.0583, 0.0897, 0.0524, 0.7996]) -Greedy action tensor([ 0.9524, -1.0464, 0.3457, 1.0095]) tensor([0.3650, 0.0495, 0.1990, 0.3865]) -Greedy action tensor([ 1.6363, -0.7262, 1.9029, 0.0028]) tensor([0.3854, 0.0363, 0.5031, 0.0752]) -Greedy action tensor([-0.7778, -1.4551, -0.8665, 0.0268]) tensor([0.2146, 0.1090, 0.1964, 0.4799]) -Greedy action tensor([ 0.4515, 0.2105, 0.2746, -0.0824]) tensor([0.3115, 0.2448, 0.2610, 0.1826]) -Greedy action tensor([ 1.5907, -1.0275, 1.2499, -0.0181]) tensor([0.5040, 0.0368, 0.3584, 0.1009]) -Greedy action tensor([-0.3502, -1.1868, -1.3743, 0.6769]) tensor([0.2181, 0.0945, 0.0783, 0.6091]) -Greedy action tensor([ 0.0669, -1.4344, 0.7463, 0.3066]) tensor([0.2239, 0.0499, 0.4417, 0.2845]) -Greedy action tensor([ 0.3742, 0.7337, 0.5466, -0.1737]) tensor([0.2382, 0.3412, 0.2830, 0.1377]) -Greedy action tensor([ 1.2180, -0.9586, 1.4196, 0.6200]) tensor([0.3464, 0.0393, 0.4238, 0.1905]) -Greedy action tensor([-0.2689, 0.7161, 0.3197, 1.1507]) tensor([0.1040, 0.2785, 0.1874, 0.4301]) -Greedy action tensor([ 0.4079, -2.0312, -0.0197, 0.1924]) tensor([0.3929, 0.0343, 0.2562, 0.3167]) -Greedy action tensor([-0.8302, -0.5712, -0.3523, 0.7956]) tensor([0.1112, 0.1441, 0.1794, 0.5653]) -Greedy action tensor([-1.0884, -1.1778, 0.0455, 1.5078]) tensor([0.0542, 0.0496, 0.1686, 0.7276]) -Greedy action tensor([ 0.3035, 0.6854, 1.4522, -0.5231]) tensor([0.1651, 0.2419, 0.5208, 0.0722]) -Greedy action tensor([ 0.3013, 0.0137, 0.1079, -0.0129]) tensor([0.3026, 0.2270, 0.2494, 0.2210]) -Greedy action tensor([-1.1122, -1.4633, 0.5277, -0.1288]) tensor([0.1049, 0.0738, 0.5408, 0.2805]) -Greedy action tensor([ 1.7506, -1.3907, 0.9015, 0.9955]) tensor([0.5152, 0.0223, 0.2204, 0.2421]) -Greedy action tensor([ 0.7080, -0.1686, 0.0900, 0.6239]) tensor([0.3479, 0.1448, 0.1875, 0.3198]) -Greedy action tensor([ 0.7191, 0.0464, -1.5238, 1.5866]) tensor([0.2502, 0.1277, 0.0266, 0.5956]) -Greedy action tensor([-1.6640, -1.5816, -0.2576, -0.1115]) tensor([0.0918, 0.0997, 0.3748, 0.4337]) -Greedy action tensor([ 0.2399, -0.3283, 1.6616, -0.0984]) tensor([0.1557, 0.0882, 0.6451, 0.1110]) -Greedy action tensor([-0.1883, 0.2942, 0.4691, 0.2509]) tensor([0.1639, 0.2655, 0.3163, 0.2543]) -Greedy action tensor([-0.0885, 0.8580, 0.3659, 1.2588]) tensor([0.1111, 0.2863, 0.1750, 0.4275]) -Greedy action tensor([ 1.4538, -0.0300, 0.2662, 1.2867]) tensor([0.4205, 0.0954, 0.1282, 0.3558]) -Greedy action tensor([ 1.0770, -0.5128, 0.4057, 1.1906]) tensor([0.3527, 0.0719, 0.1802, 0.3951]) -Greedy action tensor([-0.9164, -0.3894, -1.2365, 0.4291]) tensor([0.1377, 0.2333, 0.1000, 0.5289]) -Greedy action tensor([ 0.0233, -1.1830, 0.5716, 0.6219]) tensor([0.2062, 0.0617, 0.3568, 0.3752]) -Greedy action tensor([ 1.0938, -0.1042, 1.5148, 0.7476]) tensor([0.2831, 0.0854, 0.4313, 0.2002]) -Greedy action tensor([ 0.2219, 0.6609, -0.1946, -0.0442]) tensor([0.2514, 0.3901, 0.1658, 0.1927]) -Greedy action tensor([ 0.8573, 1.0126, -0.3844, 0.6619]) tensor([0.3049, 0.3562, 0.0881, 0.2508]) -Greedy action tensor([ 0.3963, 0.7383, -0.5196, 0.3216]) tensor([0.2677, 0.3768, 0.1071, 0.2484]) -Greedy action tensor([-0.0094, 1.0872, -0.2962, 0.5792]) tensor([0.1528, 0.4574, 0.1147, 0.2752]) -Greedy action tensor([ 0.3491, -1.1996, 1.5583, -0.1628]) tensor([0.1937, 0.0412, 0.6491, 0.1161]) -Greedy action tensor([-0.8978, -0.5528, 0.6915, 1.2869]) tensor([0.0617, 0.0872, 0.3025, 0.5486]) -Greedy action tensor([-1.9379, -0.4521, 0.6626, -0.1763]) tensor([0.0405, 0.1788, 0.5451, 0.2356]) -Greedy action tensor([-1.5686, -0.5446, 0.4653, 0.0717]) tensor([0.0603, 0.1679, 0.4609, 0.3109]) -Greedy action tensor([-0.8974, -0.5772, 1.1974, 1.5359]) tensor([0.0457, 0.0629, 0.3710, 0.5205]) -Greedy action tensor([-1.4661, -0.1364, 0.5073, 0.2594]) tensor([0.0568, 0.2149, 0.4090, 0.3192]) -Greedy action tensor([-0.5298, -0.5651, 0.9666, 1.1581]) tensor([0.0845, 0.0815, 0.3772, 0.4568]) -Greedy action tensor([-0.9709, -0.6622, 0.8467, 1.3016]) tensor([0.0549, 0.0747, 0.3379, 0.5325]) -Greedy action tensor([-1.4629, -0.5603, 0.3954, 0.1276]) tensor([0.0676, 0.1668, 0.4337, 0.3318]) -Greedy action tensor([-1.8433, -0.4686, 0.6080, -0.1242]) tensor([0.0452, 0.1786, 0.5242, 0.2520]) -Greedy action tensor([-1.9537, -0.8353, 0.3549, -0.2518]) tensor([0.0510, 0.1561, 0.5132, 0.2797]) -Greedy action tensor([-1.6195, -0.3000, -0.1263, -0.4049]) tensor([0.0796, 0.2978, 0.3544, 0.2682]) -Greedy action tensor([-1.5473, -0.5563, 1.0242, 0.8717]) tensor([0.0357, 0.0962, 0.4671, 0.4010]) -Greedy action tensor([-1.7008, -0.5107, 0.5341, -0.0269]) tensor([0.0527, 0.1733, 0.4928, 0.2812]) -Greedy action tensor([-1.5946, 0.0899, 0.4132, 0.0256]) tensor([0.0529, 0.2853, 0.3942, 0.2676]) -Greedy action tensor([-0.8516, 0.4236, 0.3063, -0.2639]) tensor([0.1046, 0.3743, 0.3329, 0.1882]) -Greedy action tensor([-1.5346, -0.5345, 0.4780, 0.2600]) tensor([0.0581, 0.1579, 0.4346, 0.3495]) -Greedy action tensor([-1.7722, -0.4860, 0.5985, -0.1578]) tensor([0.0491, 0.1779, 0.5261, 0.2469]) -Greedy action tensor([-1.7097, -0.2774, 0.5214, -0.0586]) tensor([0.0507, 0.2125, 0.4723, 0.2645]) -Greedy action tensor([-1.7121, -0.4625, 1.2337, 0.9324]) tensor([0.0266, 0.0928, 0.5061, 0.3745]) -Greedy action tensor([-1.3960, -0.5490, 0.6603, -0.1042]) tensor([0.0676, 0.1577, 0.5286, 0.2461]) -Greedy action tensor([-0.5680, -0.1888, 0.8568, 1.4709]) tensor([0.0699, 0.1022, 0.2907, 0.5372]) -Greedy action tensor([-1.7482, -0.5011, 0.7183, 0.1877]) tensor([0.0431, 0.1501, 0.5080, 0.2988]) -Greedy action tensor([-1.8226, -0.6943, 1.0256, 0.5594]) tensor([0.0311, 0.0960, 0.5364, 0.3365]) -Greedy action tensor([-1.7169, 0.7864, 0.6571, 0.1870]) tensor([0.0326, 0.3985, 0.3501, 0.2188]) -Greedy action tensor([-0.7000, -0.5669, 0.1703, 0.2865]) tensor([0.1387, 0.1584, 0.3311, 0.3719]) -Greedy action tensor([-1.9220, -0.4714, 0.6892, -0.1521]) tensor([0.0404, 0.1723, 0.5501, 0.2372]) -Greedy action tensor([-1.8064, -0.3490, 0.5953, -0.0696]) tensor([0.0454, 0.1951, 0.5015, 0.2580]) -Greedy action tensor([-0.8071, -0.4239, 0.4601, -0.4402]) tensor([0.1340, 0.1966, 0.4759, 0.1934]) -Greedy action tensor([-1.9005, -0.4247, 0.6470, -0.1426]) tensor([0.0418, 0.1826, 0.5334, 0.2422]) -Greedy action tensor([-1.3726, 0.4854, 0.5291, -0.4561]) tensor([0.0602, 0.3860, 0.4032, 0.1506]) -Greedy action tensor([-1.0306, -0.6289, 0.7702, 1.3202]) tensor([0.0525, 0.0785, 0.3179, 0.5511]) -Greedy action tensor([-1.8819, -0.4483, 0.7223, -0.0355]) tensor([0.0399, 0.1674, 0.5397, 0.2530]) -Greedy action tensor([-1.9065, -0.5763, 1.1196, 0.5952]) tensor([0.0266, 0.1006, 0.5483, 0.3245]) -Greedy action tensor([-1.6999, -0.4702, 0.6118, 0.0712]) tensor([0.0490, 0.1677, 0.4949, 0.2883]) -Greedy action tensor([-1.8240, -0.5848, 0.1888, -0.3237]) tensor([0.0609, 0.2103, 0.4558, 0.2730]) -Greedy action tensor([-1.8018, -0.2909, 0.5837, -0.0623]) tensor([0.0453, 0.2051, 0.4918, 0.2578]) -Greedy action tensor([-1.8082, -0.4834, 0.6017, -0.0996]) tensor([0.0467, 0.1756, 0.5198, 0.2578]) -Greedy action tensor([-1.7614, -0.4722, 0.5904, -0.0720]) tensor([0.0487, 0.1766, 0.5112, 0.2636]) -Greedy action tensor([-1.7780, -0.3974, 0.5896, -0.0664]) tensor([0.0472, 0.1877, 0.5037, 0.2614]) -Greedy action tensor([-1.7613, -0.5490, 0.5846, -0.0850]) tensor([0.0496, 0.1668, 0.5183, 0.2653]) -Greedy action tensor([-0.7630, -0.0738, 0.6396, 1.3932]) tensor([0.0637, 0.1269, 0.2590, 0.5503]) -Greedy action tensor([-1.5309, -0.5048, 0.4134, 0.1210]) tensor([0.0625, 0.1744, 0.4369, 0.3261]) -Greedy action tensor([-1.8019, -0.3645, 0.5849, -0.1263]) tensor([0.0467, 0.1964, 0.5076, 0.2493]) -Greedy action tensor([-0.0428, -0.0187, 0.2360, 1.0973]) tensor([0.1545, 0.1582, 0.2042, 0.4831]) -Greedy action tensor([-1.1926, -0.6112, 0.4415, 0.2019]) tensor([0.0837, 0.1497, 0.4290, 0.3376]) -Greedy action tensor([-1.5236, -0.4924, 0.7399, 0.6314]) tensor([0.0454, 0.1272, 0.4362, 0.3913]) -Greedy action tensor([-1.8217, -0.4728, 0.6151, -0.0615]) tensor([0.0452, 0.1743, 0.5174, 0.2630]) -Greedy action tensor([-1.2074, -0.5516, 0.2717, 0.3378]) tensor([0.0833, 0.1605, 0.3656, 0.3906]) -Greedy action tensor([-1.7184, -0.3379, 0.5599, -0.0120]) tensor([0.0494, 0.1964, 0.4821, 0.2721]) -Greedy action tensor([-1.8811, -0.4241, 0.6310, -0.1482]) tensor([0.0430, 0.1844, 0.5297, 0.2430]) -Greedy action tensor([-1.2866, -0.3856, 0.5079, 0.6563]) tensor([0.0608, 0.1496, 0.3656, 0.4240]) -Greedy action tensor([-1.9296, -0.4378, 0.6623, -0.1656]) tensor([0.0406, 0.1804, 0.5421, 0.2369]) -Greedy action tensor([-0.7787, -0.3797, 0.5165, 1.2327]) tensor([0.0734, 0.1095, 0.2682, 0.5489]) -Greedy action tensor([-1.9532, -0.7015, 0.2745, -0.2560]) tensor([0.0520, 0.1818, 0.4824, 0.2838]) -Greedy action tensor([-1.6687, 0.0502, 0.1244, -0.3211]) tensor([0.0608, 0.3394, 0.3656, 0.2342]) -Greedy action tensor([-1.7684, -0.2799, 0.1415, -0.3228]) tensor([0.0609, 0.2697, 0.4111, 0.2584]) -Greedy action tensor([-1.8293, -0.4555, 0.6061, -0.1268]) tensor([0.0457, 0.1807, 0.5225, 0.2511]) -Greedy action tensor([-1.5918, -0.5301, 0.4636, 0.0702]) tensor([0.0589, 0.1704, 0.4602, 0.3105]) -Greedy action tensor([-0.3561, -0.2927, 1.1322, 1.6767]) tensor([0.0708, 0.0754, 0.3135, 0.5403]) -Greedy action tensor([-1.9599, -0.7175, 0.8048, 0.0382]) tensor([0.0361, 0.1250, 0.5728, 0.2661]) -Greedy action tensor([-1.8377, -0.3294, 0.5981, -0.0954]) tensor([0.0441, 0.1995, 0.5043, 0.2521]) -Greedy action tensor([-0.9992, -0.4490, 0.4134, 0.9569]) tensor([0.0719, 0.1246, 0.2952, 0.5083]) -Greedy action tensor([-0.2756, -0.2435, 0.8132, 1.6996]) tensor([0.0819, 0.0846, 0.2433, 0.5903]) -Greedy action tensor([-1.6425, -0.3372, 0.6403, 0.1935]) tensor([0.0482, 0.1777, 0.4721, 0.3020]) -Greedy action tensor([-1.2927, -0.2211, 0.5561, 0.6271]) tensor([0.0585, 0.1708, 0.3716, 0.3990]) -Greedy action tensor([-1.4361, 0.2540, 0.5713, -0.4585]) tensor([0.0605, 0.3280, 0.4506, 0.1609]) -Greedy action tensor([-1.7307, -0.5269, 0.9087, 0.5420]) tensor([0.0357, 0.1188, 0.4994, 0.3461]) -Greedy action tensor([-1.5296, -0.8480, -0.1515, -0.6527]) tensor([0.1070, 0.2115, 0.4244, 0.2571]) -Greedy action tensor([-1.7253, -0.4475, 0.6279, 0.0517]) tensor([0.0476, 0.1707, 0.5004, 0.2813]) -Greedy action tensor([-1.9746, -0.4961, 1.2133, 0.6182]) tensor([0.0233, 0.1020, 0.5638, 0.3109]) -Greedy action tensor([-1.8929, -0.4936, 0.7222, -0.0947]) tensor([0.0404, 0.1637, 0.5520, 0.2439]) -Greedy action tensor([-1.8437, -0.4698, 0.6188, -0.0902]) tensor([0.0445, 0.1759, 0.5225, 0.2571]) -Greedy action tensor([-1.3636, -0.2920, 0.3722, 0.0558]) tensor([0.0728, 0.2127, 0.4133, 0.3012]) -Greedy action tensor([-1.4475, -0.5121, 0.8146, 0.7103]) tensor([0.0459, 0.1169, 0.4404, 0.3968]) -Greedy action tensor([-0.8632, -0.5807, 0.2285, 0.4068]) tensor([0.1128, 0.1496, 0.3360, 0.4016]) -Greedy action tensor([-0.6729, -0.1857, 0.1384, -0.2163]) tensor([0.1549, 0.2521, 0.3486, 0.2445]) -Greedy action tensor([-1.3855, -0.5887, 0.3835, 0.0943]) tensor([0.0742, 0.1646, 0.4352, 0.3259]) -Greedy action tensor([-1.2563, -0.5234, 0.3468, 0.4785]) tensor([0.0729, 0.1517, 0.3622, 0.4132]) -Greedy action tensor([-0.9767, 0.0625, -0.0110, 1.0326]) tensor([0.0719, 0.2032, 0.1888, 0.5361]) -Greedy action tensor([-1.8886, -0.3768, 0.6293, -0.1410]) tensor([0.0422, 0.1915, 0.5238, 0.2424]) -Greedy action tensor([-1.8465, -0.3955, 0.6216, -0.0821]) tensor([0.0437, 0.1863, 0.5152, 0.2549]) -Greedy action tensor([ 1.6381, -0.3958, -0.5552, 0.3229]) tensor([0.6619, 0.0866, 0.0738, 0.1777]) -Greedy action tensor([ 1.0782, -0.4939, -0.4722, 0.4233]) tensor([0.5157, 0.1071, 0.1094, 0.2679]) -Greedy action tensor([ 1.3906, -0.5169, -0.5213, 0.4389]) tensor([0.5944, 0.0882, 0.0879, 0.2295]) -Greedy action tensor([ 1.1568, 0.1110, 0.2902, -0.0752]) tensor([0.4846, 0.1703, 0.2037, 0.1414]) -Greedy action tensor([ 0.7397, -0.3035, -0.1123, 0.1024]) tensor([0.4333, 0.1527, 0.1849, 0.2291]) -Greedy action tensor([ 0.8887, -0.4115, -0.5876, -0.0439]) tensor([0.5278, 0.1438, 0.1206, 0.2077]) -Greedy action tensor([ 1.4608, -0.1763, -0.1212, -0.3333]) tensor([0.6384, 0.1242, 0.1312, 0.1062]) -Greedy action tensor([ 0.9695, -0.0578, -0.1191, 0.0289]) tensor([0.4796, 0.1717, 0.1615, 0.1872]) -Greedy action tensor([ 0.9926, -0.3657, -0.2102, 0.1829]) tensor([0.4994, 0.1284, 0.1500, 0.2222]) -Greedy action tensor([ 1.0076, -0.5515, -0.3594, 0.2498]) tensor([0.5171, 0.1088, 0.1318, 0.2424]) -Greedy action tensor([ 0.7712, -0.0450, -0.0589, -0.3556]) tensor([0.4541, 0.2008, 0.1980, 0.1472]) -Greedy action tensor([ 1.0417, -0.1477, -0.3027, -0.0859]) tensor([0.5294, 0.1611, 0.1380, 0.1714]) -Greedy action tensor([ 2.1380, -1.1626, -0.3331, 0.6674]) tensor([0.7401, 0.0273, 0.0625, 0.1701]) -Greedy action tensor([ 1.9247, -0.6948, -0.3412, 0.3212]) tensor([0.7258, 0.0529, 0.0753, 0.1460]) -Greedy action tensor([ 1.3456, -0.3363, -0.3851, 0.3378]) tensor([0.5786, 0.1076, 0.1025, 0.2112]) -Greedy action tensor([ 0.2854, -0.3579, 0.0120, -0.1450]) tensor([0.3405, 0.1790, 0.2591, 0.2214]) -Greedy action tensor([ 1.2398, -0.3861, -0.0281, 0.2376]) tensor([0.5419, 0.1066, 0.1525, 0.1989]) -Greedy action tensor([ 0.9281, -0.4834, -0.1543, 0.4510]) tensor([0.4539, 0.1106, 0.1538, 0.2817]) -Greedy action tensor([ 1.3674, -0.5607, -0.0973, -0.0595]) tensor([0.6186, 0.0900, 0.1430, 0.1485]) -Greedy action tensor([ 1.3124, 0.0745, -0.2401, 0.4089]) tensor([0.5244, 0.1521, 0.1110, 0.2125]) -Greedy action tensor([ 0.9307, -0.6762, -0.3928, 0.2002]) tensor([0.5132, 0.1029, 0.1366, 0.2472]) -Greedy action tensor([ 0.8139, -0.4062, -0.0564, -0.1610]) tensor([0.4782, 0.1412, 0.2003, 0.1804]) -Greedy action tensor([ 1.1194, -0.5143, -0.4165, 0.0992]) tensor([0.5647, 0.1102, 0.1215, 0.2036]) -Greedy action tensor([ 1.3206, 0.0487, 0.0158, -0.1822]) tensor([0.5637, 0.1580, 0.1529, 0.1254]) -Greedy action tensor([ 1.4890, -0.1914, -0.3043, 0.1310]) tensor([0.6212, 0.1157, 0.1034, 0.1598]) -Greedy action tensor([ 1.0020, -0.4375, -0.1579, 0.1046]) tensor([0.5107, 0.1211, 0.1601, 0.2082]) -Greedy action tensor([ 1.6438, -0.3804, -0.4846, 0.1767]) tensor([0.6749, 0.0892, 0.0803, 0.1556]) -Greedy action tensor([ 1.2760, -0.3930, -0.5876, 0.8387]) tensor([0.5027, 0.0947, 0.0780, 0.3246]) -Greedy action tensor([ 1.5165, -0.8351, -0.2713, 0.3360]) tensor([0.6371, 0.0607, 0.1066, 0.1957]) -Greedy action tensor([ 2.7514, 0.3835, -0.8663, 0.7063]) tensor([0.8001, 0.0749, 0.0215, 0.1035]) -Greedy action tensor([ 1.3209, -0.6883, -0.3583, 0.1928]) tensor([0.6082, 0.0816, 0.1134, 0.1968]) -Greedy action tensor([ 0.7036, -0.1583, -0.3745, 0.5890]) tensor([0.3767, 0.1591, 0.1282, 0.3359]) -Greedy action tensor([ 1.1974, -0.5692, -0.0641, 0.3274]) tensor([0.5339, 0.0912, 0.1512, 0.2237]) -Greedy action tensor([ 1.6519, -0.7890, -0.3455, 0.5301]) tensor([0.6458, 0.0562, 0.0876, 0.2103]) -Greedy action tensor([ 1.2264, -0.3863, -0.2712, -0.0351]) tensor([0.5861, 0.1168, 0.1311, 0.1660]) -Greedy action tensor([ 0.1521, -0.0673, 0.1659, 0.1617]) tensor([0.2613, 0.2099, 0.2650, 0.2639]) -Greedy action tensor([ 1.5422, -0.3403, -0.2201, 0.2979]) tensor([0.6204, 0.0944, 0.1065, 0.1787]) -Greedy action tensor([ 1.7164, -0.9296, 0.0529, 0.3092]) tensor([0.6644, 0.0471, 0.1259, 0.1626]) -Greedy action tensor([ 1.4444, -0.8041, -0.2397, 0.6160]) tensor([0.5787, 0.0611, 0.1074, 0.2528]) -Greedy action tensor([ 1.2404, -0.8172, -0.0541, 0.1663]) tensor([0.5736, 0.0733, 0.1572, 0.1959]) -Greedy action tensor([ 1.0052, -0.0432, -0.2669, -0.1211]) tensor([0.5115, 0.1793, 0.1433, 0.1659]) -Greedy action tensor([ 1.5301, 0.1979, -0.3561, -0.1074]) tensor([0.6211, 0.1639, 0.0942, 0.1208]) -Greedy action tensor([ 1.7928, 0.0662, -0.2263, 0.1217]) tensor([0.6672, 0.1187, 0.0886, 0.1255]) -Greedy action tensor([ 1.0926, -0.5048, -0.4673, 0.4388]) tensor([0.5174, 0.1047, 0.1087, 0.2691]) -Greedy action tensor([ 1.0909, -0.4809, -0.3480, 0.2604]) tensor([0.5317, 0.1104, 0.1261, 0.2317]) -Greedy action tensor([ 0.9463, -0.4019, -0.1694, 0.1329]) tensor([0.4924, 0.1279, 0.1614, 0.2183]) -Greedy action tensor([ 1.4011, -0.1730, -0.2972, 0.0924]) tensor([0.6023, 0.1248, 0.1102, 0.1627]) -Greedy action tensor([ 0.9253, -0.3998, -0.2178, -0.1805]) tensor([0.5221, 0.1387, 0.1664, 0.1728]) -Greedy action tensor([ 1.0646, -0.4671, -0.2153, 0.2489]) tensor([0.5164, 0.1116, 0.1436, 0.2284]) -Greedy action tensor([ 1.2339, -0.5304, 0.0471, 0.1577]) tensor([0.5502, 0.0943, 0.1679, 0.1876]) -Greedy action tensor([ 1.2545, -0.6151, -0.3605, 0.2962]) tensor([0.5758, 0.0888, 0.1145, 0.2208]) -Greedy action tensor([ 0.9282, -0.4121, -0.0030, 0.0485]) tensor([0.4829, 0.1264, 0.1903, 0.2004]) -Greedy action tensor([ 0.5027, -0.2201, -0.2385, -0.0877]) tensor([0.3974, 0.1929, 0.1894, 0.2202]) -Greedy action tensor([ 1.8168, -0.3454, -0.6136, 0.2914]) tensor([0.7039, 0.0810, 0.0619, 0.1531]) -Greedy action tensor([ 1.5595, -0.9333, -0.1871, 0.4995]) tensor([0.6237, 0.0516, 0.1087, 0.2160]) -Greedy action tensor([ 0.5652, -0.5672, -0.4974, 0.5243]) tensor([0.3806, 0.1226, 0.1315, 0.3653]) -Greedy action tensor([ 0.8595, -0.3353, -0.3338, 0.2376]) tensor([0.4667, 0.1413, 0.1415, 0.2506]) -Greedy action tensor([ 0.7925, -0.5003, -0.2607, 0.1653]) tensor([0.4635, 0.1272, 0.1617, 0.2476]) -Greedy action tensor([ 1.0423, -0.4640, -0.3976, 0.0639]) tensor([0.5451, 0.1209, 0.1292, 0.2049]) -Greedy action tensor([ 1.2725, -0.3906, -0.2620, 0.2255]) tensor([0.5694, 0.1079, 0.1228, 0.1999]) -Greedy action tensor([ 1.4117, -0.5030, -0.3536, 0.4065]) tensor([0.5937, 0.0875, 0.1016, 0.2173]) -Greedy action tensor([ 1.3114, -0.1361, -0.0743, 0.1315]) tensor([0.5579, 0.1312, 0.1395, 0.1714]) -Greedy action tensor([ 2.2339, -1.1421, -0.2177, 0.2196]) tensor([0.7976, 0.0273, 0.0687, 0.1064]) -Greedy action tensor([ 0.7966, 0.0935, -0.2662, 0.1263]) tensor([0.4251, 0.2105, 0.1469, 0.2175]) -Greedy action tensor([ 1.1503, -0.5956, -0.2963, 0.2360]) tensor([0.5523, 0.0964, 0.1300, 0.2214]) -Greedy action tensor([ 0.9954, -0.6750, -0.2184, 0.3352]) tensor([0.4995, 0.0940, 0.1484, 0.2581]) -Greedy action tensor([ 1.0858, -0.5077, -0.1407, 0.1083]) tensor([0.5340, 0.1085, 0.1566, 0.2009]) -Greedy action tensor([ 0.7655, -0.1837, -0.1275, 0.0495]) tensor([0.4376, 0.1694, 0.1792, 0.2139]) -Greedy action tensor([ 0.2698, -0.3461, 0.0403, -0.1707]) tensor([0.3357, 0.1813, 0.2669, 0.2161]) -Greedy action tensor([ 1.5144, 0.0305, -0.3456, 0.0373]) tensor([0.6208, 0.1408, 0.0966, 0.1417]) -Greedy action tensor([ 1.0806, -0.5127, -0.7451, 0.8528]) tensor([0.4628, 0.0941, 0.0746, 0.3685]) -Greedy action tensor([ 1.4893, -0.4947, -0.0549, 0.3445]) tensor([0.5991, 0.0824, 0.1279, 0.1907]) -Greedy action tensor([ 1.9039, -0.4666, -0.3688, 0.5362]) tensor([0.6891, 0.0644, 0.0710, 0.1755]) -Greedy action tensor([ 0.8402, -0.4111, -0.3828, 0.5668]) tensor([0.4271, 0.1222, 0.1257, 0.3249]) -Greedy action tensor([ 0.8794, -0.4428, -0.4488, 0.5279]) tensor([0.4474, 0.1193, 0.1185, 0.3148]) -Greedy action tensor([ 1.2734, -0.3856, -0.4484, 0.7801]) tensor([0.5051, 0.0961, 0.0903, 0.3084]) -Greedy action tensor([ 2.2431, -0.7647, -0.2225, 0.2758]) tensor([0.7848, 0.0388, 0.0667, 0.1097]) -Greedy action tensor([ 1.3492, -0.4150, -0.2280, 0.1096]) tensor([0.5997, 0.1027, 0.1239, 0.1736]) -Greedy action tensor([ 0.5585, -0.4382, 0.0449, 0.0515]) tensor([0.3891, 0.1436, 0.2328, 0.2344]) -Greedy action tensor([ 1.3326, -0.2446, -0.4724, 0.2034]) tensor([0.5902, 0.1219, 0.0971, 0.1908]) -Greedy action tensor([ 0.5399, -0.1870, -0.0875, 0.1614]) tensor([0.3701, 0.1789, 0.1976, 0.2535]) -Greedy action tensor([ 0.3618, -0.3729, -0.1922, -0.4852]) tensor([0.4027, 0.1932, 0.2314, 0.1727]) -Greedy action tensor([ 0.5721, -0.0495, -0.1350, -0.1452]) tensor([0.3971, 0.2133, 0.1958, 0.1938]) -Greedy action tensor([ 0.5833, -0.3511, -0.0910, -0.3901]) tensor([0.4386, 0.1723, 0.2235, 0.1657]) -Greedy action tensor([ 0.8422, -0.9071, 0.1499, -0.6391]) tensor([0.5258, 0.0915, 0.2631, 0.1196]) -Greedy action tensor([ 0.4188, -0.0026, -0.0670, -0.3503]) tensor([0.3657, 0.2399, 0.2250, 0.1695]) -Greedy action tensor([ 0.7856, -0.4701, -0.0235, -0.3691]) tensor([0.4889, 0.1393, 0.2177, 0.1541]) -Greedy action tensor([ 0.4600, -0.0671, 0.0251, -0.1227]) tensor([0.3576, 0.2111, 0.2315, 0.1997]) -Greedy action tensor([ 0.7808, -0.4729, -0.0867, -0.4798]) tensor([0.5028, 0.1435, 0.2112, 0.1425]) -Greedy action tensor([ 0.6323, -0.2711, 0.0027, -0.4134]) tensor([0.4368, 0.1770, 0.2327, 0.1535]) -Greedy action tensor([ 0.5048, -0.1040, -0.1262, -0.2165]) tensor([0.3903, 0.2123, 0.2077, 0.1897]) -Greedy action tensor([ 0.5981, -0.2666, 0.1989, -0.5703]) tensor([0.4162, 0.1753, 0.2792, 0.1294]) -Greedy action tensor([ 0.6680, -0.4237, 0.0516, -0.3282]) tensor([0.4455, 0.1495, 0.2405, 0.1645]) -Greedy action tensor([ 0.8946, -0.3622, -0.1269, -0.2713]) tensor([0.5112, 0.1455, 0.1841, 0.1593]) -Greedy action tensor([ 0.8612, -0.8936, -0.0829, -0.3939]) tensor([0.5414, 0.0936, 0.2106, 0.1543]) -Greedy action tensor([ 0.3248, -0.1210, -0.0959, -0.2043]) tensor([0.3465, 0.2219, 0.2275, 0.2041]) -Greedy action tensor([ 0.8661, -0.4741, 0.0347, -0.3280]) tensor([0.4999, 0.1309, 0.2177, 0.1515]) -Greedy action tensor([ 0.9191, -0.5191, -0.0292, -0.6866]) tensor([0.5478, 0.1300, 0.2122, 0.1100]) -Greedy action tensor([ 8.2154e-01, -4.9167e-01, -3.9391e-04, -1.4957e-01]) tensor([0.4791, 0.1289, 0.2106, 0.1814]) -Greedy action tensor([ 0.4767, 0.0377, -0.0996, -0.1489]) tensor([0.3647, 0.2352, 0.2050, 0.1951]) -Greedy action tensor([ 0.4162, -0.1862, -0.0950, -0.2804]) tensor([0.3780, 0.2069, 0.2267, 0.1884]) -Greedy action tensor([ 0.4870, -0.4631, -0.1232, -0.1547]) tensor([0.4071, 0.1574, 0.2212, 0.2143]) -Greedy action tensor([ 0.5170, -0.4147, -0.0698, -0.5776]) tensor([0.4377, 0.1724, 0.2434, 0.1465]) -Greedy action tensor([ 0.5916, -0.1574, -0.0057, -0.2305]) tensor([0.4061, 0.1920, 0.2235, 0.1785]) -Greedy action tensor([ 0.8749, -0.8059, -0.2387, -0.6964]) tensor([0.5806, 0.1081, 0.1907, 0.1206]) -Greedy action tensor([ 0.0070, 0.1064, 0.0233, -0.1724]) tensor([0.2527, 0.2792, 0.2569, 0.2112]) -Greedy action tensor([ 0.9911, -0.7874, 0.0736, -0.4013]) tensor([0.5504, 0.0930, 0.2199, 0.1368]) -Greedy action tensor([ 0.0593, 0.1801, 0.1324, -0.1952]) tensor([0.2513, 0.2836, 0.2703, 0.1948]) -Greedy action tensor([ 0.3378, 0.0891, -0.0685, -0.2867]) tensor([0.3354, 0.2616, 0.2234, 0.1796]) -Greedy action tensor([ 0.1419, 0.0335, -0.0476, -0.1616]) tensor([0.2888, 0.2591, 0.2389, 0.2132]) -Greedy action tensor([ 0.5328, -0.4053, 0.0303, -0.2521]) tensor([0.4077, 0.1596, 0.2467, 0.1860]) -Greedy action tensor([ 0.4406, 0.1085, 0.0777, -0.1256]) tensor([0.3355, 0.2407, 0.2334, 0.1905]) -Greedy action tensor([ 0.4294, 0.0295, 0.0023, -0.1524]) tensor([0.3470, 0.2326, 0.2264, 0.1940]) -Greedy action tensor([ 0.7757, -0.4063, -0.0217, -0.3558]) tensor([0.4808, 0.1475, 0.2166, 0.1551]) -Greedy action tensor([ 0.4443, -0.1768, -0.0884, -0.3356]) tensor([0.3872, 0.2080, 0.2273, 0.1775]) -Greedy action tensor([ 0.3288, -0.0947, 0.0582, -0.0384]) tensor([0.3215, 0.2105, 0.2453, 0.2227]) -Greedy action tensor([ 0.2897, -0.1395, -0.0408, -0.1523]) tensor([0.3320, 0.2161, 0.2385, 0.2134]) -Greedy action tensor([ 0.9454, -1.1518, -0.0297, -0.7167]) tensor([0.5918, 0.0727, 0.2232, 0.1123]) -Greedy action tensor([ 0.7736, -0.4945, -0.1270, -0.2940]) tensor([0.4923, 0.1385, 0.2000, 0.1692]) -Greedy action tensor([ 0.7244, -0.6433, -0.0828, -0.4161]) tensor([0.4949, 0.1261, 0.2208, 0.1582]) -Greedy action tensor([ 0.3729, -0.1213, -0.0549, -0.1441]) tensor([0.3499, 0.2134, 0.2281, 0.2086]) -Greedy action tensor([ 0.5841, -0.2602, 0.0374, -0.4927]) tensor([0.4256, 0.1830, 0.2464, 0.1450]) -Greedy action tensor([ 0.5435, -0.2242, -0.1023, -0.4310]) tensor([0.4227, 0.1962, 0.2216, 0.1595]) -Greedy action tensor([ 0.7520, -0.3243, 0.0302, -0.4381]) tensor([0.4693, 0.1600, 0.2280, 0.1427]) -Greedy action tensor([ 0.1811, 0.1118, -0.1790, -0.3059]) tensor([0.3082, 0.2875, 0.2150, 0.1893]) -Greedy action tensor([ 0.2905, 0.1062, -0.1140, -0.1239]) tensor([0.3165, 0.2632, 0.2112, 0.2091]) -Greedy action tensor([ 0.3313, -0.2931, -0.1339, -0.4323]) tensor([0.3803, 0.2037, 0.2388, 0.1772]) -Greedy action tensor([ 0.8970, -0.2666, -0.0958, -0.3289]) tensor([0.5060, 0.1581, 0.1875, 0.1485]) -Greedy action tensor([ 0.5939, -0.3989, -0.0966, -0.6060]) tensor([0.4602, 0.1705, 0.2307, 0.1386]) -Greedy action tensor([ 0.6884, -0.3996, 0.0538, -0.2551]) tensor([0.4432, 0.1493, 0.2350, 0.1725]) -Greedy action tensor([ 0.3661, -0.1217, -0.2336, -0.3731]) tensor([0.3787, 0.2325, 0.2079, 0.1808]) -Greedy action tensor([ 0.3606, 0.2163, -0.1986, -0.0133]) tensor([0.3200, 0.2770, 0.1829, 0.2202]) -Greedy action tensor([ 0.7087, -0.3691, 0.2265, -0.5240]) tensor([0.4446, 0.1513, 0.2745, 0.1296]) -Greedy action tensor([ 0.5878, -0.0368, -0.0813, -0.1798]) tensor([0.3981, 0.2132, 0.2039, 0.1848]) -Greedy action tensor([ 0.9863, -0.5282, 0.0055, -0.4096]) tensor([0.5427, 0.1194, 0.2035, 0.1344]) -Greedy action tensor([ 0.4048, 0.1453, 0.0075, -0.4304]) tensor([0.3475, 0.2681, 0.2336, 0.1508]) -Greedy action tensor([ 0.5156, -0.2090, 0.1665, -0.4479]) tensor([0.3889, 0.1884, 0.2743, 0.1484]) -Greedy action tensor([ 0.1493, 0.0002, -0.1487, -0.1441]) tensor([0.2986, 0.2572, 0.2216, 0.2226]) -Greedy action tensor([ 0.8449, -0.6364, -0.0525, -0.4661]) tensor([0.5251, 0.1194, 0.2140, 0.1415]) -Greedy action tensor([ 0.8188, -0.3557, 0.1021, -0.3919]) tensor([0.4772, 0.1475, 0.2331, 0.1422]) -Greedy action tensor([ 0.0923, 0.2525, -0.0399, -0.2058]) tensor([0.2637, 0.3095, 0.2310, 0.1957]) -Greedy action tensor([ 0.4895, -0.1070, -0.1220, -0.2131]) tensor([0.3863, 0.2128, 0.2096, 0.1913]) -Greedy action tensor([ 0.5125, -0.2368, -0.0255, -0.5550]) tensor([0.4166, 0.1969, 0.2433, 0.1433]) -Greedy action tensor([ 0.5221, -0.0415, -0.0863, -0.0463]) tensor([0.3732, 0.2124, 0.2031, 0.2114]) -Greedy action tensor([ 0.6802, -0.3129, -0.0812, -0.2076]) tensor([0.4446, 0.1647, 0.2077, 0.1830]) -Greedy action tensor([ 0.1484, 0.0807, -0.1669, -0.4375]) tensor([0.3105, 0.2902, 0.2265, 0.1728]) -Greedy action tensor([ 0.2996, -0.1072, -0.0571, -0.3565]) tensor([0.3467, 0.2308, 0.2427, 0.1799]) -Greedy action tensor([ 0.4594, 0.3106, -0.1336, 0.0996]) tensor([0.3213, 0.2769, 0.1776, 0.2242]) -Greedy action tensor([ 0.5997, -0.2005, 0.1081, -0.6325]) tensor([0.4251, 0.1910, 0.2600, 0.1240]) -Greedy action tensor([ 0.3134, -0.0096, -0.1827, -0.2669]) tensor([0.3457, 0.2503, 0.2105, 0.1935]) -Greedy action tensor([ 0.1975, 0.1354, -0.0317, 0.1052]) tensor([0.2742, 0.2577, 0.2180, 0.2500]) -Greedy action tensor([ 0.4220, 0.2456, -0.0648, -0.3998]) tensor([0.3457, 0.2898, 0.2125, 0.1520]) -Greedy action tensor([ 0.6150, -0.4173, -0.1970, -0.4254]) tensor([0.4644, 0.1654, 0.2062, 0.1641]) -Greedy action tensor([ 0.4239, -0.1002, -0.0458, -0.2548]) tensor([0.3670, 0.2173, 0.2295, 0.1862]) -Greedy action tensor([ 0.9009, -0.4938, 0.0271, -0.8422]) tensor([0.5434, 0.1347, 0.2268, 0.0951]) -Greedy action tensor([ 0.9086, -0.9593, 0.0640, -0.4760]) tensor([0.5451, 0.0842, 0.2342, 0.1365]) -Greedy action tensor([ 0.4893, -0.0193, 0.3023, -0.5473]) tensor([0.3590, 0.2159, 0.2978, 0.1273]) -Greedy action tensor([ 5.8332e-01, 1.6251e-04, -1.7899e-01, -2.0545e-01]) tensor([0.4034, 0.2251, 0.1882, 0.1833]) -Greedy action tensor([ 0.4747, -0.3020, 0.0088, -0.4069]) tensor([0.3998, 0.1838, 0.2509, 0.1655]) -Greedy action tensor([ 0.4387, -0.4024, -0.1157, -0.4614]) tensor([0.4146, 0.1788, 0.2381, 0.1685]) -Greedy action tensor([ 0.6957, -0.5424, -0.0436, -0.3213]) tensor([0.4697, 0.1362, 0.2242, 0.1699]) -Greedy action tensor([ 0.4424, -0.2434, -0.0280, -0.2005]) tensor([0.3768, 0.1898, 0.2354, 0.1981]) -Greedy action tensor([ 0.9219, -1.8980, -0.7526, 0.8723]) tensor([0.4548, 0.0271, 0.0852, 0.4328]) -Greedy action tensor([ 0.9821, -0.0696, 1.5278, 0.7435]) tensor([0.2589, 0.0904, 0.4468, 0.2039]) -Greedy action tensor([-0.9605, 0.6905, 1.1483, 0.5961]) tensor([0.0521, 0.2716, 0.4292, 0.2471]) -Greedy action tensor([ 0.8422, -1.9166, -0.4463, 0.8489]) tensor([0.4263, 0.0270, 0.1175, 0.4292]) -Greedy action tensor([0.5365, 0.2854, 0.4463, 0.3256]) tensor([0.2856, 0.2222, 0.2609, 0.2313]) -Greedy action tensor([ 0.6676, -0.0661, 0.2256, 0.7746]) tensor([0.3090, 0.1484, 0.1986, 0.3440]) -Greedy action tensor([-0.3427, -0.7355, 1.7986, 0.2716]) tensor([0.0831, 0.0561, 0.7072, 0.1536]) -Greedy action tensor([ 0.8194, -1.5505, 1.4418, -0.1863]) tensor([0.3010, 0.0281, 0.5608, 0.1101]) -Greedy action tensor([ 0.7500, -0.5404, 0.6591, 0.0510]) tensor([0.3724, 0.1025, 0.3400, 0.1851]) -Greedy action tensor([-1.0793, -0.1774, 0.7105, -1.1106]) tensor([0.0960, 0.2365, 0.5746, 0.0930]) -Greedy action tensor([ 0.2771, 0.6871, -0.8564, 0.4684]) tensor([0.2475, 0.3730, 0.0797, 0.2998]) -Greedy action tensor([0.4432, 0.0212, 0.2555, 1.2755]) tensor([0.2091, 0.1371, 0.1733, 0.4806]) -Greedy action tensor([-0.9088, 1.3336, 0.5933, 0.1623]) tensor([0.0561, 0.5282, 0.2519, 0.1637]) -Greedy action tensor([-0.2747, -0.0095, -0.1230, 0.6161]) tensor([0.1694, 0.2208, 0.1971, 0.4128]) -Greedy action tensor([-0.2951, -0.0317, 0.5883, -0.0257]) tensor([0.1659, 0.2158, 0.4012, 0.2171]) -Greedy action tensor([ 1.5738, -0.8336, 0.3570, 1.7413]) tensor([0.3893, 0.0351, 0.1153, 0.4603]) -Greedy action tensor([ 1.2949, 1.2155, -0.1337, 0.1862]) tensor([0.4011, 0.3705, 0.0961, 0.1324]) -Greedy action tensor([-0.3226, -1.2965, 0.0781, -0.1934]) tensor([0.2495, 0.0942, 0.3724, 0.2839]) -Greedy action tensor([1.6429, 0.7506, 0.6138, 0.3684]) tensor([0.4886, 0.2002, 0.1746, 0.1366]) -Greedy action tensor([-0.6645, 0.9318, 0.3826, -0.0434]) tensor([0.0939, 0.4636, 0.2677, 0.1748]) -Greedy action tensor([ 1.0939, -0.4606, 1.2034, 1.0119]) tensor([0.3079, 0.0650, 0.3435, 0.2836]) -Greedy action tensor([ 0.9977, -0.1466, -0.4530, 1.4127]) tensor([0.3260, 0.1038, 0.0764, 0.4937]) -Greedy action tensor([ 0.4121, 0.3172, 1.2774, -1.1634]) tensor([0.2226, 0.2025, 0.5289, 0.0461]) -Greedy action tensor([-0.0576, -2.0665, -0.4620, -0.8134]) tensor([0.4403, 0.0591, 0.2939, 0.2068]) -Greedy action tensor([ 0.9160, -2.1177, -0.2050, 1.2803]) tensor([0.3554, 0.0171, 0.1158, 0.5116]) -Greedy action tensor([ 0.6440, -0.0876, 0.3639, -0.0507]) tensor([0.3655, 0.1759, 0.2762, 0.1825]) -Greedy action tensor([-0.0868, -1.0971, 2.4009, -0.3792]) tensor([0.0707, 0.0257, 0.8508, 0.0528]) -Greedy action tensor([-0.8333, -0.3772, 1.3433, -0.4252]) tensor([0.0775, 0.1223, 0.6835, 0.1166]) -Greedy action tensor([ 1.1107, -0.5633, 0.7235, 1.1078]) tensor([0.3492, 0.0655, 0.2371, 0.3482]) -Greedy action tensor([ 0.5131, 0.2325, -1.1151, 0.8274]) tensor([0.3011, 0.2275, 0.0591, 0.4123]) -Greedy action tensor([ 0.8674, -0.4673, 1.0297, 0.8147]) tensor([0.2951, 0.0777, 0.3472, 0.2800]) -Greedy action tensor([-0.3771, -0.2051, 1.9565, -0.3389]) tensor([0.0739, 0.0877, 0.7617, 0.0767]) -Greedy action tensor([-0.1693, -0.5912, 0.2323, -0.4595]) tensor([0.2565, 0.1682, 0.3833, 0.1919]) -Greedy action tensor([ 0.0944, -0.0754, -0.2920, 0.6107]) tensor([0.2381, 0.2010, 0.1618, 0.3991]) -Greedy action tensor([ 1.0495, -2.0279, 0.6326, 1.1994]) tensor([0.3488, 0.0161, 0.2299, 0.4052]) -Greedy action tensor([ 1.1994, -0.6196, 1.4939, 0.3379]) tensor([0.3416, 0.0554, 0.4586, 0.1444]) -Greedy action tensor([ 0.2528, -1.2785, 0.6336, -0.9789]) tensor([0.3365, 0.0728, 0.4925, 0.0982]) -Greedy action tensor([-0.1996, 1.1756, -0.5822, 0.6721]) tensor([0.1246, 0.4927, 0.0850, 0.2978]) -Greedy action tensor([-0.5328, -0.7229, -1.1481, 0.7672]) tensor([0.1657, 0.1370, 0.0895, 0.6078]) -Greedy action tensor([ 0.7317, -0.8336, 1.7798, 1.1306]) tensor([0.1801, 0.0377, 0.5138, 0.2684]) -Greedy action tensor([ 0.6090, 1.1294, 1.5875, -0.6182]) tensor([0.1774, 0.2986, 0.4720, 0.0520]) -Greedy action tensor([ 0.5101, -0.5961, 0.8834, 0.6914]) tensor([0.2511, 0.0831, 0.3648, 0.3010]) -Greedy action tensor([0.1230, 0.5543, 0.9299, 1.0685]) tensor([0.1360, 0.2093, 0.3047, 0.3500]) -Greedy action tensor([-0.7422, 0.5448, -0.3523, -0.0870]) tensor([0.1246, 0.4514, 0.1841, 0.2400]) -Greedy action tensor([-0.6745, 0.2996, 0.5696, -1.4261]) tensor([0.1317, 0.3490, 0.4571, 0.0621]) -Greedy action tensor([0.8089, 0.5175, 0.3817, 0.2496]) tensor([0.3366, 0.2515, 0.2195, 0.1924]) -Greedy action tensor([-0.9031, -1.6566, -0.4063, 0.2672]) tensor([0.1578, 0.0743, 0.2593, 0.5086]) -Greedy action tensor([ 0.3661, 1.6329, 0.5843, -1.2260]) tensor([0.1667, 0.5919, 0.2074, 0.0339]) -Greedy action tensor([ 1.2354, -0.3992, -0.5690, 1.3186]) tensor([0.4088, 0.0797, 0.0673, 0.4442]) -Greedy action tensor([ 0.3675, -0.2282, 1.2281, -0.5981]) tensor([0.2327, 0.1283, 0.5504, 0.0886]) -Greedy action tensor([-1.1264, -0.5865, 0.4026, 0.4902]) tensor([0.0809, 0.1388, 0.3731, 0.4073]) -Greedy action tensor([2.0420, 0.5039, 1.3287, 1.2382]) tensor([0.4646, 0.0998, 0.2277, 0.2080]) -Greedy action tensor([-0.1926, -0.8803, -0.1332, 1.1179]) tensor([0.1594, 0.0802, 0.1692, 0.5912]) -Greedy action tensor([ 1.0007, 0.0897, -0.8426, 1.3020]) tensor([0.3434, 0.1381, 0.0544, 0.4641]) -Greedy action tensor([-0.5040, -0.0734, 0.9272, 1.3267]) tensor([0.0772, 0.1187, 0.3228, 0.4814]) -Greedy action tensor([-0.2907, -2.2625, -0.5622, -0.0261]) tensor([0.3121, 0.0434, 0.2379, 0.4066]) -Greedy action tensor([0.3314, 0.4055, 0.0453, 0.1849]) tensor([0.2709, 0.2917, 0.2035, 0.2340]) -Greedy action tensor([ 0.2563, -2.3144, -0.0607, 1.5102]) tensor([0.1884, 0.0144, 0.1372, 0.6600]) -Greedy action tensor([ 0.7250, -0.5629, 1.1554, -0.2400]) tensor([0.3130, 0.0863, 0.4814, 0.1193]) -Greedy action tensor([ 0.2962, -2.0578, 0.4848, -0.4593]) tensor([0.3607, 0.0343, 0.4356, 0.1695]) -Greedy action tensor([ 0.7090, 0.5290, -0.2008, 0.3918]) tensor([0.3371, 0.2816, 0.1357, 0.2455]) -Greedy action tensor([ 0.1088, -0.7015, 0.1499, 1.8348]) tensor([0.1234, 0.0549, 0.1286, 0.6932]) -Greedy action tensor([-0.1530, -0.4640, 0.2050, 0.2784]) tensor([0.2127, 0.1558, 0.3042, 0.3274]) -Greedy action tensor([ 0.0238, -2.4424, -0.0164, 0.6506]) tensor([0.2553, 0.0217, 0.2452, 0.4778]) -Greedy action tensor([ 0.5138, -0.7816, 2.0493, 0.1453]) tensor([0.1513, 0.0414, 0.7026, 0.1047]) -Greedy action tensor([ 1.2820, -1.7816, 0.4543, 0.7574]) tensor([0.4818, 0.0225, 0.2106, 0.2851]) -Greedy action tensor([-0.6556, -0.2295, -0.1535, -0.0649]) tensor([0.1670, 0.2557, 0.2759, 0.3014]) -Greedy action tensor([ 1.2320, -0.0313, 0.7878, -0.4568]) tensor([0.4742, 0.1341, 0.3041, 0.0876]) -Greedy action tensor([-0.2514, -0.1830, 1.0928, 0.2740]) tensor([0.1316, 0.1410, 0.5048, 0.2226]) -Greedy action tensor([-0.8850, -1.4653, -0.6545, -0.2933]) tensor([0.2162, 0.1210, 0.2722, 0.3906]) -Greedy action tensor([1.0061, 0.1292, 0.4624, 0.0624]) tensor([0.4191, 0.1744, 0.2433, 0.1631]) -Greedy action tensor([ 0.6645, -0.9196, -0.6589, 1.6343]) tensor([0.2434, 0.0499, 0.0648, 0.6419]) -Greedy action tensor([ 0.6556, -0.0844, 1.1680, 1.2416]) tensor([0.2023, 0.0965, 0.3377, 0.3635]) -Greedy action tensor([-0.6213, 0.0592, -0.4322, -0.3624]) tensor([0.1825, 0.3605, 0.2205, 0.2365]) -Greedy action tensor([-0.3026, 0.1469, 1.2613, 1.5634]) tensor([0.0724, 0.1135, 0.3460, 0.4680]) -Greedy action tensor([ 0.2110, -0.7783, 0.4646, 0.6486]) tensor([0.2376, 0.0883, 0.3061, 0.3680]) -Greedy action tensor([-0.0402, -0.2684, 0.9943, 0.4730]) tensor([0.1592, 0.1267, 0.4480, 0.2660]) -Greedy action tensor([ 0.9588, -0.3713, 1.2484, 1.1211]) tensor([0.2648, 0.0700, 0.3537, 0.3115]) -Greedy action tensor([ 0.8586, -1.3250, 0.6625, 0.7642]) tensor([0.3516, 0.0396, 0.2890, 0.3199]) -Greedy action tensor([ 0.3806, -0.0675, 0.6714, -0.3110]) tensor([0.2876, 0.1837, 0.3847, 0.1440]) -Greedy action tensor([ 0.0610, -0.9750, -0.1317, 0.5674]) tensor([0.2605, 0.0924, 0.2148, 0.4322]) -Greedy action tensor([ 0.3781, -1.0795, -0.1116, 0.9408]) tensor([0.2777, 0.0646, 0.1702, 0.4875]) -Greedy action tensor([-1.9451, -0.4513, 0.6685, -0.1801]) tensor([0.0401, 0.1786, 0.5471, 0.2342]) -Greedy action tensor([-1.3289, -0.3352, 0.6690, -0.5785]) tensor([0.0758, 0.2047, 0.5589, 0.1605]) -Greedy action tensor([-0.4557, -0.0904, 0.1522, 0.1524]) tensor([0.1635, 0.2357, 0.3004, 0.3004]) -Greedy action tensor([-1.3748, -0.5482, 0.4482, 0.0071]) tensor([0.0743, 0.1698, 0.4600, 0.2959]) -Greedy action tensor([-1.3271, -0.5706, 0.4051, 0.3683]) tensor([0.0703, 0.1497, 0.3972, 0.3828]) -Greedy action tensor([-1.3564, -0.5680, 0.3536, 0.1746]) tensor([0.0749, 0.1648, 0.4141, 0.3462]) -Greedy action tensor([-1.6881, -0.4661, 0.5523, 0.0670]) tensor([0.0511, 0.1734, 0.4801, 0.2955]) -Greedy action tensor([-1.0585, -0.6572, 0.3793, 0.2015]) tensor([0.0977, 0.1460, 0.4116, 0.3446]) -Greedy action tensor([-1.5141, -0.5377, 0.5028, 0.2009]) tensor([0.0598, 0.1587, 0.4493, 0.3322]) -Greedy action tensor([-1.0131, -0.0599, 0.2090, 0.7028]) tensor([0.0797, 0.2067, 0.2705, 0.4432]) -Greedy action tensor([-1.3116, -0.5726, 0.6418, 0.6743]) tensor([0.0574, 0.1201, 0.4046, 0.4179]) -Greedy action tensor([-1.5999, -0.4162, 0.6655, 0.3068]) tensor([0.0485, 0.1583, 0.4670, 0.3262]) -Greedy action tensor([-0.1259, -0.0021, 1.0943, 1.6792]) tensor([0.0862, 0.0976, 0.2921, 0.5242]) -Greedy action tensor([-1.1100, -0.4426, 0.8700, 1.2818]) tensor([0.0473, 0.0923, 0.3429, 0.5175]) -Greedy action tensor([-1.1722, -0.0427, 0.5596, -0.6212]) tensor([0.0871, 0.2695, 0.4922, 0.1511]) -Greedy action tensor([-1.1546, -0.5263, 0.3468, 0.0200]) tensor([0.0943, 0.1769, 0.4234, 0.3054]) -Greedy action tensor([-1.1551, -0.5801, 0.2770, 0.3103]) tensor([0.0885, 0.1573, 0.3708, 0.3833]) -Greedy action tensor([-0.9997, -0.6850, 1.2074, 1.5080]) tensor([0.0421, 0.0577, 0.3829, 0.5172]) -Greedy action tensor([-1.9218, -0.4520, 0.6562, -0.1737]) tensor([0.0412, 0.1792, 0.5428, 0.2367]) -Greedy action tensor([-1.7383, -0.5098, 0.5624, -0.0265]) tensor([0.0502, 0.1713, 0.5007, 0.2778]) -Greedy action tensor([-1.8949, -0.3658, 0.6339, -0.1463]) tensor([0.0418, 0.1931, 0.5246, 0.2405]) -Greedy action tensor([-1.4173, -0.5776, 0.3810, 0.1655]) tensor([0.0703, 0.1628, 0.4246, 0.3423]) -Greedy action tensor([-1.8777, -0.4426, 0.6571, -0.1194]) tensor([0.0423, 0.1778, 0.5341, 0.2457]) -Greedy action tensor([-1.7859, -0.4258, 0.5820, -0.0920]) tensor([0.0476, 0.1854, 0.5080, 0.2589]) -Greedy action tensor([-1.1824, -0.5304, 0.4777, 0.6858]) tensor([0.0682, 0.1310, 0.3589, 0.4419]) -Greedy action tensor([-1.8439, -0.1198, 0.5717, -0.0590]) tensor([0.0421, 0.2360, 0.4712, 0.2508]) -Greedy action tensor([-0.7914, -0.1569, 0.2886, 0.4113]) tensor([0.1092, 0.2059, 0.3215, 0.3634]) -Greedy action tensor([-0.9988, -0.1361, 0.3639, -0.3456]) tensor([0.1087, 0.2576, 0.4247, 0.2089]) -Greedy action tensor([-1.5576, -0.4984, 0.4446, 0.0728]) tensor([0.0610, 0.1759, 0.4517, 0.3114]) -Greedy action tensor([-1.5431, -0.5091, 0.6642, -0.2344]) tensor([0.0602, 0.1694, 0.5475, 0.2229]) -Greedy action tensor([-1.1167, -0.6027, 0.2695, 0.2885]) tensor([0.0930, 0.1556, 0.3721, 0.3793]) -Greedy action tensor([-1.2684, -0.3732, 0.6896, 0.7411]) tensor([0.0556, 0.1361, 0.3938, 0.4146]) -Greedy action tensor([-0.9482, 0.0273, 0.6672, 0.9333]) tensor([0.0656, 0.1740, 0.3299, 0.4305]) -Greedy action tensor([-1.0358, -0.2787, 0.6081, -0.5772]) tensor([0.1011, 0.2156, 0.5233, 0.1600]) -Greedy action tensor([-1.8294, -0.4544, 0.6098, -0.1063]) tensor([0.0454, 0.1796, 0.5206, 0.2544]) -Greedy action tensor([-0.7264, -0.1427, 0.3137, -0.2694]) tensor([0.1389, 0.2489, 0.3929, 0.2193]) -Greedy action tensor([-1.6790, 0.1084, 0.5008, 0.0648]) tensor([0.0464, 0.2774, 0.4106, 0.2656]) -Greedy action tensor([-1.3310, -0.4474, 0.7745, 0.9878]) tensor([0.0459, 0.1110, 0.3767, 0.4663]) -Greedy action tensor([-0.8200, -0.6969, 1.1001, 1.5604]) tensor([0.0506, 0.0572, 0.3452, 0.5470]) -Greedy action tensor([-1.5630, -0.3437, 0.6321, 0.2626]) tensor([0.0511, 0.1729, 0.4589, 0.3171]) -Greedy action tensor([-1.9283, -0.4241, 0.8829, 0.3391]) tensor([0.0315, 0.1416, 0.5232, 0.3037]) -Greedy action tensor([-1.4770, -0.5212, 0.7454, -0.3710]) tensor([0.0631, 0.1641, 0.5822, 0.1907]) -Greedy action tensor([-1.7951, -0.4906, 0.5955, -0.0650]) tensor([0.0471, 0.1735, 0.5140, 0.2655]) -Greedy action tensor([-0.7901, -0.4537, 0.4161, 0.0273]) tensor([0.1249, 0.1749, 0.4173, 0.2829]) -Greedy action tensor([-0.6833, -0.0258, -0.0073, 0.2375]) tensor([0.1350, 0.2605, 0.2654, 0.3390]) -Greedy action tensor([-1.6030, -0.5402, 0.5093, -0.0663]) tensor([0.0595, 0.1722, 0.4918, 0.2766]) -Greedy action tensor([-1.5724, -0.3404, 0.6739, 0.5498]) tensor([0.0450, 0.1542, 0.4252, 0.3756]) -Greedy action tensor([-1.7727, -0.4266, 0.9156, 0.4870]) tensor([0.0343, 0.1319, 0.5049, 0.3289]) -Greedy action tensor([-1.9387, -0.4561, 0.6630, -0.1775]) tensor([0.0405, 0.1782, 0.5458, 0.2355]) -Greedy action tensor([-0.6930, -0.5402, 0.2563, 0.4868]) tensor([0.1250, 0.1456, 0.3229, 0.4066]) -Greedy action tensor([-1.8847, -0.3981, 0.6334, -0.1348]) tensor([0.0424, 0.1875, 0.5261, 0.2440]) -Greedy action tensor([-1.9241, -0.4407, 0.6577, -0.1655]) tensor([0.0409, 0.1804, 0.5411, 0.2376]) -Greedy action tensor([-1.1053, 0.0509, 0.7641, 0.8236]) tensor([0.0570, 0.1811, 0.3696, 0.3923]) -Greedy action tensor([-1.9810, -0.7206, 0.6146, -0.1581]) tensor([0.0415, 0.1462, 0.5557, 0.2566]) -Greedy action tensor([-1.3101, -0.1466, 0.6421, 0.6586]) tensor([0.0543, 0.1739, 0.3827, 0.3891]) -Greedy action tensor([-0.6527, 0.1597, 0.1545, 0.0300]) tensor([0.1338, 0.3015, 0.2999, 0.2648]) -Greedy action tensor([-0.8596, 0.7923, 0.0598, 0.2231]) tensor([0.0856, 0.4467, 0.2148, 0.2529]) -Greedy action tensor([-0.6508, -0.3946, 0.8713, 1.5630]) tensor([0.0624, 0.0806, 0.2859, 0.5710]) -Greedy action tensor([-1.5633, -0.4998, 0.5986, 0.2754]) tensor([0.0530, 0.1535, 0.4603, 0.3332]) -Greedy action tensor([-1.9063, -0.4341, 0.6434, -0.1586]) tensor([0.0418, 0.1823, 0.5356, 0.2402]) -Greedy action tensor([-1.4915, -0.4868, 0.8694, 0.7704]) tensor([0.0418, 0.1141, 0.4429, 0.4012]) -Greedy action tensor([-1.0222, -0.4689, 0.2272, 0.2935]) tensor([0.1005, 0.1747, 0.3504, 0.3744]) -Greedy action tensor([-1.6841, -0.4970, 0.5388, -0.0452]) tensor([0.0536, 0.1756, 0.4948, 0.2760]) -Greedy action tensor([-1.8320, -0.3498, 0.6020, -0.1228]) tensor([0.0448, 0.1972, 0.5107, 0.2474]) -Greedy action tensor([-0.5414, 0.0284, 1.1243, 1.5213]) tensor([0.0628, 0.1110, 0.3322, 0.4940]) -Greedy action tensor([-1.4604, -0.4907, 0.4607, 0.2984]) tensor([0.0615, 0.1621, 0.4196, 0.3568]) -Greedy action tensor([-0.9038, -0.4594, 0.4033, 1.0807]) tensor([0.0739, 0.1153, 0.2731, 0.5377]) -Greedy action tensor([-1.6408, -0.3121, 0.6687, 0.1885]) tensor([0.0474, 0.1792, 0.4778, 0.2956]) -Greedy action tensor([-1.6952, -0.4631, 0.5728, 0.0266]) tensor([0.0508, 0.1742, 0.4908, 0.2842]) -Greedy action tensor([-1.8532, -0.3994, 0.6202, -0.1130]) tensor([0.0438, 0.1874, 0.5194, 0.2495]) -Greedy action tensor([-1.7931, -0.4003, 0.5833, -0.0661]) tensor([0.0467, 0.1880, 0.5027, 0.2626]) -Greedy action tensor([-1.8895, -0.4604, 0.6546, -0.1328]) tensor([0.0422, 0.1762, 0.5372, 0.2444]) -Greedy action tensor([-1.2883, -0.1793, 0.2925, 0.3119]) tensor([0.0722, 0.2190, 0.3510, 0.3578]) -Greedy action tensor([-1.4290, -0.5539, 0.3815, 0.2088]) tensor([0.0682, 0.1637, 0.4171, 0.3510]) -Greedy action tensor([-0.5354, 0.0100, 0.5856, 1.4014]) tensor([0.0786, 0.1355, 0.2410, 0.5449]) -Greedy action tensor([-1.6661, -0.5118, 0.8133, 0.5914]) tensor([0.0390, 0.1236, 0.4650, 0.3725]) -Greedy action tensor([-1.3419, -0.1593, 0.6751, -0.6043]) tensor([0.0721, 0.2353, 0.5419, 0.1508]) -Greedy action tensor([-1.9347, -0.4534, 0.6593, -0.1753]) tensor([0.0407, 0.1789, 0.5442, 0.2362]) -Greedy action tensor([-1.6418, -0.4961, 0.5555, 0.0968]) tensor([0.0531, 0.1670, 0.4779, 0.3021]) -Greedy action tensor([-1.1120, -0.5292, 0.2982, 0.4985]) tensor([0.0841, 0.1506, 0.3445, 0.4209]) -Greedy action tensor([-1.5924, -0.8205, 1.1358, 0.8396]) tensor([0.0335, 0.0725, 0.5127, 0.3813]) -Greedy action tensor([ 0.8098, -0.3333, -0.2871, 0.3994]) tensor([0.4318, 0.1377, 0.1442, 0.2864]) -Greedy action tensor([ 1.3580, -0.5741, 0.0507, 0.1372]) tensor([0.5847, 0.0847, 0.1582, 0.1725]) -Greedy action tensor([ 1.4673, -0.4366, -0.3458, 0.5770]) tensor([0.5805, 0.0865, 0.0947, 0.2383]) -Greedy action tensor([ 1.8111, -0.6528, -0.2780, 0.5333]) tensor([0.6722, 0.0572, 0.0832, 0.1873]) -Greedy action tensor([ 1.3600, -0.0797, -0.4795, 0.1016]) tensor([0.5952, 0.1411, 0.0946, 0.1691]) -Greedy action tensor([ 0.9810, 0.0651, -0.2487, 0.1993]) tensor([0.4651, 0.1861, 0.1360, 0.2128]) -Greedy action tensor([ 1.0205, -0.5840, -0.1311, 0.2768]) tensor([0.5019, 0.1009, 0.1587, 0.2386]) -Greedy action tensor([ 0.2206, -0.3149, -0.4159, -0.0958]) tensor([0.3517, 0.2059, 0.1861, 0.2563]) -Greedy action tensor([ 1.6848, -1.1027, -0.0906, 0.4479]) tensor([0.6573, 0.0405, 0.1114, 0.1908]) -Greedy action tensor([ 1.5764, -0.0289, -0.3686, 0.1428]) tensor([0.6320, 0.1269, 0.0904, 0.1507]) -Greedy action tensor([ 1.0400, -0.3809, -0.1137, 0.1142]) tensor([0.5120, 0.1236, 0.1615, 0.2029]) -Greedy action tensor([ 0.9258, -0.3758, -0.4296, 0.5241]) tensor([0.4547, 0.1237, 0.1172, 0.3043]) -Greedy action tensor([ 1.0825, -0.1147, -0.1837, 0.2485]) tensor([0.4955, 0.1497, 0.1397, 0.2152]) -Greedy action tensor([ 1.4331, -0.4968, -0.5159, 0.1804]) tensor([0.6356, 0.0923, 0.0905, 0.1816]) -Greedy action tensor([ 1.1663, -0.2886, -0.3502, 0.2087]) tensor([0.5445, 0.1271, 0.1195, 0.2090]) -Greedy action tensor([ 1.1633, -0.6712, 0.1792, 0.1693]) tensor([0.5253, 0.0839, 0.1964, 0.1944]) -Greedy action tensor([ 1.3586, -0.9058, -0.5430, 0.8776]) tensor([0.5344, 0.0555, 0.0798, 0.3303]) -Greedy action tensor([ 1.3540, -0.0673, -0.4358, 0.4635]) tensor([0.5498, 0.1327, 0.0918, 0.2257]) -Greedy action tensor([ 1.0386, -0.3218, -0.3600, 0.3041]) tensor([0.5042, 0.1294, 0.1245, 0.2419]) -Greedy action tensor([ 1.0950, -0.3688, -0.1422, -0.1177]) tensor([0.5498, 0.1272, 0.1595, 0.1635]) -Greedy action tensor([ 0.7334, -0.3005, -0.0967, -0.0075]) tensor([0.4409, 0.1568, 0.1922, 0.2101]) -Greedy action tensor([ 0.9522, -0.6652, -0.2233, 0.5200]) tensor([0.4638, 0.0920, 0.1432, 0.3010]) -Greedy action tensor([ 0.7686, -0.3396, -0.4974, 0.4663]) tensor([0.4253, 0.1404, 0.1199, 0.3144]) -Greedy action tensor([ 1.3321, -0.2801, -0.2631, 0.6549]) tensor([0.5235, 0.1044, 0.1062, 0.2660]) -Greedy action tensor([ 1.1534, -0.5340, -0.3600, 0.4330]) tensor([0.5286, 0.0978, 0.1164, 0.2572]) -Greedy action tensor([ 1.6011, 0.2770, -0.3743, -0.0457]) tensor([0.6260, 0.1666, 0.0868, 0.1206]) -Greedy action tensor([ 1.1694, -0.3143, -0.5325, 0.1961]) tensor([0.5596, 0.1269, 0.1020, 0.2114]) -Greedy action tensor([ 1.9750, -0.4865, -1.3191, 0.1575]) tensor([0.7783, 0.0664, 0.0289, 0.1264]) -Greedy action tensor([ 1.1981, -0.0334, -0.0484, 0.0617]) tensor([0.5262, 0.1536, 0.1513, 0.1689]) -Greedy action tensor([ 1.5251, -0.9580, -0.0741, 0.2646]) tensor([0.6373, 0.0532, 0.1288, 0.1807]) -Greedy action tensor([ 1.0722, -0.4564, -0.0992, 0.3815]) tensor([0.4931, 0.1069, 0.1528, 0.2472]) -Greedy action tensor([ 0.9751, -0.2768, -0.2644, 0.2436]) tensor([0.4862, 0.1390, 0.1408, 0.2340]) -Greedy action tensor([ 1.4525, -0.0103, 0.0627, 0.0576]) tensor([0.5785, 0.1340, 0.1441, 0.1434]) -Greedy action tensor([ 0.8559, 0.1093, -0.3548, -0.2693]) tensor([0.4770, 0.2261, 0.1421, 0.1548]) -Greedy action tensor([ 1.5381, -0.4051, -0.2881, -0.0106]) tensor([0.6593, 0.0944, 0.1062, 0.1401]) -Greedy action tensor([ 0.9517, -0.5754, -0.3233, 0.4072]) tensor([0.4815, 0.1046, 0.1346, 0.2794]) -Greedy action tensor([ 0.6059, -0.2552, -0.6454, 0.8782]) tensor([0.3309, 0.1399, 0.0947, 0.4345]) -Greedy action tensor([ 1.3612, -0.3660, -0.5887, 0.2848]) tensor([0.6021, 0.1070, 0.0857, 0.2052]) -Greedy action tensor([ 1.0199, -0.8072, -0.2299, 0.6109]) tensor([0.4735, 0.0762, 0.1357, 0.3146]) -Greedy action tensor([ 0.9212, -0.1344, -0.0825, -0.0775]) tensor([0.4801, 0.1671, 0.1760, 0.1768]) -Greedy action tensor([ 1.2703, -0.5220, -0.2049, 0.2737]) tensor([0.5667, 0.0944, 0.1296, 0.2092]) -Greedy action tensor([ 0.8527, -0.3719, -0.4893, 0.8093]) tensor([0.3980, 0.1170, 0.1040, 0.3811]) -Greedy action tensor([ 1.1048, -0.3452, -0.4489, -0.2218]) tensor([0.5843, 0.1371, 0.1236, 0.1551]) -Greedy action tensor([ 1.1082, -0.1551, 0.1059, -0.0068]) tensor([0.5057, 0.1430, 0.1856, 0.1658]) -Greedy action tensor([ 1.0243, -0.1643, -0.1032, 0.0213]) tensor([0.5012, 0.1527, 0.1623, 0.1838]) -Greedy action tensor([ 1.2396, -0.8515, 0.2074, 0.2014]) tensor([0.5453, 0.0674, 0.1942, 0.1931]) -Greedy action tensor([ 1.4703, -0.4293, -0.0710, 0.1561]) tensor([0.6126, 0.0917, 0.1312, 0.1646]) -Greedy action tensor([ 1.2008, -0.0307, -0.1382, -0.0479]) tensor([0.5432, 0.1585, 0.1424, 0.1558]) -Greedy action tensor([ 1.3827, -0.5710, 0.2701, 0.0869]) tensor([0.5733, 0.0813, 0.1885, 0.1569]) -Greedy action tensor([ 0.6071, -0.0126, 0.0066, -0.1098]) tensor([0.3884, 0.2090, 0.2130, 0.1896]) -Greedy action tensor([ 1.8172, -0.8175, -0.4073, 0.3641]) tensor([0.7074, 0.0507, 0.0765, 0.1654]) -Greedy action tensor([ 0.7251, -0.1001, -0.1363, -0.1025]) tensor([0.4352, 0.1907, 0.1839, 0.1902]) -Greedy action tensor([ 0.8828, -0.3564, -0.0049, 0.1074]) tensor([0.4626, 0.1340, 0.1904, 0.2130]) -Greedy action tensor([ 0.9780, -0.1335, -0.3372, 0.0769]) tensor([0.4991, 0.1642, 0.1340, 0.2027]) -Greedy action tensor([ 2.0147, -0.5493, -0.4603, 0.5795]) tensor([0.7147, 0.0550, 0.0602, 0.1701]) -Greedy action tensor([ 1.0622, -0.1751, 0.0605, 0.3786]) tensor([0.4625, 0.1342, 0.1699, 0.2335]) -Greedy action tensor([ 1.4463, -0.5035, -0.2760, 0.1369]) tensor([0.6286, 0.0894, 0.1123, 0.1697]) -Greedy action tensor([ 1.2040, 0.0235, -0.3138, 0.0813]) tensor([0.5400, 0.1659, 0.1184, 0.1757]) -Greedy action tensor([ 1.8199, -0.0325, -0.2441, 0.6235]) tensor([0.6305, 0.0989, 0.0800, 0.1906]) -Greedy action tensor([ 0.5507, -0.2701, 0.0475, 0.1031]) tensor([0.3726, 0.1640, 0.2253, 0.2381]) -Greedy action tensor([ 1.2433, -0.4150, -0.4016, 0.2803]) tensor([0.5665, 0.1079, 0.1094, 0.2163]) -Greedy action tensor([ 0.8177, -0.2645, -0.3688, 0.4439]) tensor([0.4288, 0.1453, 0.1309, 0.2951]) -Greedy action tensor([ 1.1410, -0.7055, -0.3269, 0.7728]) tensor([0.4807, 0.0759, 0.1108, 0.3327]) -Greedy action tensor([ 1.5400, -0.3942, -0.2389, 0.1375]) tensor([0.6413, 0.0927, 0.1083, 0.1577]) -Greedy action tensor([ 1.2795, -0.3182, -0.5906, 0.4447]) tensor([0.5585, 0.1130, 0.0861, 0.2424]) -Greedy action tensor([ 1.0221, -0.7169, -0.0756, 0.3591]) tensor([0.4939, 0.0868, 0.1648, 0.2545]) -Greedy action tensor([ 0.9306, -0.3413, -0.4204, 0.3249]) tensor([0.4796, 0.1344, 0.1242, 0.2617]) -Greedy action tensor([ 1.6007, -0.6573, -0.1120, 0.0857]) tensor([0.6646, 0.0695, 0.1199, 0.1461]) -Greedy action tensor([ 0.8246, -0.3971, -0.2406, 0.0416]) tensor([0.4770, 0.1406, 0.1644, 0.2180]) -Greedy action tensor([ 1.5105, -0.5518, -0.3798, 0.5016]) tensor([0.6087, 0.0774, 0.0919, 0.2219]) -Greedy action tensor([ 1.1481, -0.3525, -0.3608, 0.4187]) tensor([0.5191, 0.1158, 0.1148, 0.2503]) -Greedy action tensor([ 0.5150, -0.3458, -0.0242, -0.1028]) tensor([0.3929, 0.1661, 0.2292, 0.2118]) -Greedy action tensor([ 1.4421, -0.4818, -0.3918, 0.2728]) tensor([0.6187, 0.0903, 0.0989, 0.1921]) -Greedy action tensor([ 1.3600, -0.4296, -0.6654, -0.2427]) tensor([0.6665, 0.1113, 0.0879, 0.1342]) -Greedy action tensor([ 1.6129, -0.2376, -0.5358, 0.4477]) tensor([0.6306, 0.0991, 0.0736, 0.1967]) -Greedy action tensor([ 0.7230, -0.3665, -0.2492, 0.0300]) tensor([0.4515, 0.1519, 0.1708, 0.2258]) -Greedy action tensor([ 0.7826, -0.6960, -0.1310, 0.1150]) tensor([0.4669, 0.1064, 0.1872, 0.2395]) -Greedy action tensor([ 1.7636, -0.7979, -0.3408, 0.2429]) tensor([0.7054, 0.0544, 0.0860, 0.1542]) -Greedy action tensor([ 1.3889, -0.1498, -0.0859, 0.2290]) tensor([0.5691, 0.1222, 0.1302, 0.1784]) -Greedy action tensor([ 1.3490, -0.4440, -0.1888, 0.0175]) tensor([0.6078, 0.1012, 0.1306, 0.1605]) -Greedy action tensor([ 1.1508, -0.2491, -0.3654, -0.2148]) tensor([0.5809, 0.1433, 0.1275, 0.1483]) -Greedy action tensor([ 0.7918, -0.1364, -0.0053, -0.2654]) tensor([0.4559, 0.1802, 0.2054, 0.1584]) -Greedy action tensor([ 0.8359, -0.4748, -0.0535, -0.3265]) tensor([0.5017, 0.1353, 0.2061, 0.1569]) -Greedy action tensor([ 0.5364, -0.2863, -0.1223, -0.2947]) tensor([0.4180, 0.1836, 0.2163, 0.1821]) -Greedy action tensor([ 0.4561, -0.2006, -0.0573, -0.1367]) tensor([0.3746, 0.1942, 0.2242, 0.2070]) -Greedy action tensor([ 1.2025, -0.6417, 0.0528, -0.7428]) tensor([0.6181, 0.0978, 0.1958, 0.0884]) -Greedy action tensor([ 0.5854, -0.2727, 0.0730, -0.3408]) tensor([0.4134, 0.1753, 0.2476, 0.1637]) -Greedy action tensor([ 0.5601, 0.0533, -0.0077, -0.0728]) tensor([0.3703, 0.2231, 0.2099, 0.1967]) -Greedy action tensor([ 0.7857, -0.5574, 0.1122, -0.6147]) tensor([0.4957, 0.1294, 0.2528, 0.1222]) -Greedy action tensor([ 0.4344, 0.1562, -0.1396, -0.1657]) tensor([0.3485, 0.2639, 0.1963, 0.1913]) -Greedy action tensor([ 0.4746, -0.1065, -0.0574, -0.3157]) tensor([0.3846, 0.2151, 0.2259, 0.1745]) -Greedy action tensor([ 0.8009, -0.8492, 0.0010, -0.4622]) tensor([0.5197, 0.0998, 0.2335, 0.1470]) -Greedy action tensor([ 0.8689, -0.3946, 0.0435, -0.5115]) tensor([0.5071, 0.1433, 0.2221, 0.1275]) -Greedy action tensor([ 0.4642, 0.2372, -0.1381, -0.1438]) tensor([0.3462, 0.2759, 0.1895, 0.1885]) -Greedy action tensor([ 1.0402, -0.6983, -0.0308, -0.6584]) tensor([0.5878, 0.1033, 0.2014, 0.1075]) -Greedy action tensor([ 0.3849, -0.2928, -0.0623, -0.4395]) tensor([0.3867, 0.1964, 0.2473, 0.1696]) -Greedy action tensor([ 0.7382, -0.5442, 0.0643, -0.6336]) tensor([0.4900, 0.1359, 0.2498, 0.1243]) -Greedy action tensor([ 0.2930, -0.3003, 0.2124, -0.2777]) tensor([0.3289, 0.1817, 0.3035, 0.1859]) -Greedy action tensor([ 0.6504, -0.3542, 0.0178, -0.3943]) tensor([0.4446, 0.1628, 0.2362, 0.1564]) -Greedy action tensor([ 0.6115, 0.2626, 0.1333, -0.4573]) tensor([0.3747, 0.2643, 0.2323, 0.1287]) -Greedy action tensor([ 0.6809, -0.5132, -0.0830, -0.2680]) tensor([0.4638, 0.1405, 0.2161, 0.1796]) -Greedy action tensor([ 0.5152, -0.2219, 0.1359, -0.3386]) tensor([0.3863, 0.1848, 0.2644, 0.1645]) -Greedy action tensor([ 0.5032, -0.2514, 0.1308, -0.3372]) tensor([0.3860, 0.1815, 0.2660, 0.1666]) -Greedy action tensor([ 0.1295, 0.0455, 0.0160, -0.1772]) tensor([0.2819, 0.2591, 0.2516, 0.2074]) -Greedy action tensor([ 0.7364, -0.1712, -0.0915, -0.4320]) tensor([0.4648, 0.1876, 0.2031, 0.1445]) -Greedy action tensor([ 0.2934, -0.1761, 0.1025, -0.2743]) tensor([0.3313, 0.2072, 0.2737, 0.1878]) -Greedy action tensor([ 1.1962, -1.0751, -0.0110, -0.7507]) tensor([0.6473, 0.0668, 0.1936, 0.0924]) -Greedy action tensor([ 0.6318, -0.3238, -0.0659, -0.2618]) tensor([0.4364, 0.1678, 0.2172, 0.1786]) -Greedy action tensor([ 0.7894, -0.7309, -0.0115, -0.4031]) tensor([0.5074, 0.1109, 0.2278, 0.1540]) -Greedy action tensor([ 0.6790, 0.1307, -0.1204, -0.2696]) tensor([0.4141, 0.2393, 0.1862, 0.1604]) -Greedy action tensor([ 0.5928, -0.1011, 0.1829, -0.1725]) tensor([0.3804, 0.1901, 0.2525, 0.1770]) -Greedy action tensor([ 0.6259, -0.5165, -0.0614, -0.4407]) tensor([0.4616, 0.1473, 0.2322, 0.1589]) -Greedy action tensor([ 0.5108, -0.2093, 0.0024, -0.1089]) tensor([0.3808, 0.1853, 0.2290, 0.2049]) -Greedy action tensor([ 0.7811, -0.3493, 0.0484, -0.4509]) tensor([0.4773, 0.1541, 0.2294, 0.1392]) -Greedy action tensor([ 0.5370, 0.1828, 0.0009, -0.2666]) tensor([0.3657, 0.2566, 0.2139, 0.1637]) -Greedy action tensor([ 0.7218, -0.3151, -0.0386, -0.1463]) tensor([0.4461, 0.1582, 0.2085, 0.1872]) -Greedy action tensor([ 0.4219, -0.2123, -0.0573, -0.3918]) tensor([0.3857, 0.2045, 0.2388, 0.1709]) -Greedy action tensor([ 0.5522, -0.6126, -0.0559, -0.1940]) tensor([0.4291, 0.1339, 0.2336, 0.2035]) -Greedy action tensor([ 0.3575, -0.0531, 0.0224, -0.1731]) tensor([0.3371, 0.2236, 0.2411, 0.1983]) -Greedy action tensor([ 0.3364, -0.0694, -0.1204, -0.1280]) tensor([0.3415, 0.2276, 0.2163, 0.2146]) -Greedy action tensor([ 0.5332, -0.2486, -0.1138, -0.3553]) tensor([0.4180, 0.1913, 0.2189, 0.1719]) -Greedy action tensor([ 0.1968, 0.2200, -0.0725, -0.1135]) tensor([0.2840, 0.2907, 0.2170, 0.2083]) -Greedy action tensor([ 0.5657, -0.4653, -0.1348, -0.2272]) tensor([0.4337, 0.1547, 0.2153, 0.1963]) -Greedy action tensor([ 0.4928, -0.2587, -0.0511, -0.2784]) tensor([0.3977, 0.1876, 0.2308, 0.1839]) -Greedy action tensor([ 0.5390, -0.1756, -0.0195, -0.3356]) tensor([0.4035, 0.1975, 0.2308, 0.1683]) -Greedy action tensor([ 0.5399, -0.3402, -0.0114, -0.1772]) tensor([0.4034, 0.1673, 0.2324, 0.1969]) -Greedy action tensor([ 0.6790, -0.4078, -0.0589, -0.3234]) tensor([0.4582, 0.1545, 0.2191, 0.1682]) -Greedy action tensor([ 0.3428, -0.3578, -0.0379, -0.0755]) tensor([0.3524, 0.1749, 0.2408, 0.2319]) -Greedy action tensor([ 0.4226, -0.1096, 0.1678, -0.4034]) tensor([0.3571, 0.2097, 0.2768, 0.1563]) -Greedy action tensor([ 0.6534, -0.1277, -0.0093, -0.5158]) tensor([0.4378, 0.2005, 0.2257, 0.1360]) -Greedy action tensor([ 0.7636, -0.6147, -0.1193, -0.5740]) tensor([0.5186, 0.1307, 0.2145, 0.1361]) -Greedy action tensor([ 0.5208, -0.2590, -0.0084, -0.2246]) tensor([0.3965, 0.1818, 0.2336, 0.1881]) -Greedy action tensor([ 0.6216, -0.4268, -0.0261, -0.3893]) tensor([0.4469, 0.1566, 0.2339, 0.1626]) -Greedy action tensor([ 0.7263, -0.2730, -0.0474, -0.2893]) tensor([0.4563, 0.1680, 0.2105, 0.1653]) -Greedy action tensor([ 0.4602, -0.1770, -0.0818, -0.2590]) tensor([0.3850, 0.2036, 0.2239, 0.1875]) -Greedy action tensor([ 0.9482, -0.9227, 0.0247, -0.4955]) tensor([0.5595, 0.0862, 0.2222, 0.1321]) -Greedy action tensor([ 0.2108, -0.1721, -0.0846, -0.3537]) tensor([0.3339, 0.2277, 0.2485, 0.1899]) -Greedy action tensor([ 0.5971, -0.2404, 0.2331, -0.2074]) tensor([0.3884, 0.1681, 0.2699, 0.1737]) -Greedy action tensor([ 0.5677, -0.3474, -0.0620, -0.2506]) tensor([0.4212, 0.1687, 0.2244, 0.1858]) -Greedy action tensor([ 0.5358, -0.0300, -0.0529, -0.0705]) tensor([0.3748, 0.2128, 0.2080, 0.2044]) -Greedy action tensor([ 1.0927, -1.0646, 0.1288, -0.3767]) tensor([0.5790, 0.0670, 0.2208, 0.1332]) -Greedy action tensor([ 0.2062, 0.0375, -0.0889, -0.4054]) tensor([0.3193, 0.2698, 0.2377, 0.1732]) -Greedy action tensor([ 0.5640, -0.4778, -0.1201, -0.0837]) tensor([0.4200, 0.1482, 0.2119, 0.2198]) -Greedy action tensor([ 0.6239, -0.3480, 0.0139, -0.3690]) tensor([0.4363, 0.1651, 0.2370, 0.1616]) -Greedy action tensor([ 0.3093, -0.3164, -0.0202, -0.3876]) tensor([0.3633, 0.1943, 0.2614, 0.1810]) -Greedy action tensor([ 0.7355, -0.4045, 0.0869, -0.5704]) tensor([0.4732, 0.1513, 0.2473, 0.1282]) -Greedy action tensor([ 0.7464, -0.4072, 0.0170, -0.3385]) tensor([0.4683, 0.1477, 0.2258, 0.1582]) -Greedy action tensor([ 0.9008, -0.7579, -0.0025, -0.4167]) tensor([0.5367, 0.1022, 0.2175, 0.1437]) -Greedy action tensor([ 0.5226, -0.5928, -0.0086, -0.4327]) tensor([0.4347, 0.1425, 0.2556, 0.1672]) -Greedy action tensor([ 0.6547, -0.3281, -0.0744, -0.4845]) tensor([0.4594, 0.1719, 0.2216, 0.1470]) -Greedy action tensor([ 0.5649, -0.1353, 0.0723, -0.1352]) tensor([0.3840, 0.1907, 0.2346, 0.1907]) -Greedy action tensor([ 0.6688, -0.3100, -0.1050, -0.3661]) tensor([0.4561, 0.1714, 0.2104, 0.1621]) -Greedy action tensor([ 1.0636, -0.4550, -0.2356, -0.4687]) tensor([0.5855, 0.1282, 0.1597, 0.1265]) -Greedy action tensor([ 0.9368, -0.8447, 0.0691, -0.4924]) tensor([0.5471, 0.0921, 0.2297, 0.1310]) -Greedy action tensor([ 0.9252, -0.9506, 0.0893, -0.4000]) tensor([0.5398, 0.0827, 0.2340, 0.1435]) -Greedy action tensor([ 0.5782, -0.1233, -0.1015, -0.0776]) tensor([0.3966, 0.1966, 0.2010, 0.2058]) -Greedy action tensor([ 0.6298, -0.4569, 0.0799, -0.2804]) tensor([0.4316, 0.1456, 0.2491, 0.1737]) -Greedy action tensor([ 0.4154, -0.0481, -0.1675, -0.1557]) tensor([0.3633, 0.2286, 0.2028, 0.2052]) -Greedy action tensor([ 0.5344, 0.0891, -0.0679, -0.1512]) tensor([0.3715, 0.2380, 0.2034, 0.1871]) -Greedy action tensor([ 0.7050, -0.6722, -0.0551, -0.3309]) tensor([0.4820, 0.1216, 0.2254, 0.1711]) -Greedy action tensor([ 0.6111, 0.1622, -0.1419, -0.1218]) tensor([0.3861, 0.2465, 0.1818, 0.1855]) -Greedy action tensor([ 0.6928, -0.5021, 0.2284, -1.0545]) tensor([0.4749, 0.1438, 0.2985, 0.0828]) -Greedy action tensor([ 0.4887, -1.0916, 0.1002, 0.6691]) tensor([0.3245, 0.0668, 0.2200, 0.3887]) -Greedy action tensor([1.3121, 0.0155, 0.1284, 1.0804]) tensor([0.4214, 0.1152, 0.1290, 0.3343]) -Greedy action tensor([-0.2298, 0.7827, -0.0118, 0.1681]) tensor([0.1542, 0.4244, 0.1918, 0.2296]) -Greedy action tensor([ 1.9133, -0.2977, 1.1189, 0.8927]) tensor([0.5203, 0.0570, 0.2351, 0.1875]) -Greedy action tensor([-0.1701, 0.5978, 0.8259, -1.1561]) tensor([0.1604, 0.3456, 0.4342, 0.0598]) -Greedy action tensor([ 0.1764, 0.2213, -0.3923, -0.4846]) tensor([0.3196, 0.3343, 0.1810, 0.1650]) -Greedy action tensor([ 0.2877, 0.5695, -0.0337, 0.5501]) tensor([0.2298, 0.3047, 0.1667, 0.2988]) -Greedy action tensor([ 0.0506, -0.7946, 0.1253, 0.4112]) tensor([0.2537, 0.1090, 0.2734, 0.3639]) -Greedy action tensor([1.6114, 0.0676, 1.1827, 1.2539]) tensor([0.3900, 0.0833, 0.2540, 0.2728]) -Greedy action tensor([ 0.8369, 0.8060, -0.3099, 1.6765]) tensor([0.2173, 0.2107, 0.0690, 0.5031]) -Greedy action tensor([-0.1002, -1.4251, -0.5516, 0.8763]) tensor([0.2194, 0.0583, 0.1397, 0.5826]) -Greedy action tensor([ 1.0861, -1.8681, 1.5212, 1.3763]) tensor([0.2542, 0.0132, 0.3928, 0.3398]) -Greedy action tensor([1.0932, 0.6597, 0.2896, 1.1764]) tensor([0.3142, 0.2037, 0.1407, 0.3415]) -Greedy action tensor([ 1.0244, 0.4144, -0.7061, 1.2622]) tensor([0.3346, 0.1818, 0.0593, 0.4244]) -Greedy action tensor([ 0.9409, -0.1594, 0.1030, 0.5363]) tensor([0.4111, 0.1368, 0.1778, 0.2743]) -Greedy action tensor([-0.1208, -2.1922, -0.3645, 1.1131]) tensor([0.1871, 0.0236, 0.1467, 0.6427]) -Greedy action tensor([0.0841, 1.2605, 1.7789, 0.3757]) tensor([0.0907, 0.2941, 0.4938, 0.1214]) -Greedy action tensor([ 0.5831, -0.1827, -0.5056, -0.1392]) tensor([0.4372, 0.2033, 0.1472, 0.2123]) -Greedy action tensor([ 0.2131, 0.9107, -0.2242, 1.4024]) tensor([0.1441, 0.2895, 0.0931, 0.4734]) -Greedy action tensor([ 0.3067, -0.7623, 0.9554, 0.0233]) tensor([0.2494, 0.0856, 0.4771, 0.1879]) -Greedy action tensor([ 0.9262, -1.2036, 1.4644, 0.6935]) tensor([0.2759, 0.0328, 0.4726, 0.2186]) -Greedy action tensor([ 0.6585, -0.4853, 0.1820, 0.9578]) tensor([0.3041, 0.0969, 0.1888, 0.4102]) -Greedy action tensor([1.1221, 0.0386, 0.4163, 1.4032]) tensor([0.3168, 0.1072, 0.1564, 0.4196]) -Greedy action tensor([-1.1516, -1.2678, -0.7421, 0.4210]) tensor([0.1217, 0.1084, 0.1833, 0.5866]) -Greedy action tensor([ 0.5039, -1.9819, 0.1081, 1.2761]) tensor([0.2550, 0.0212, 0.1717, 0.5520]) -Greedy action tensor([-0.4385, -0.5612, -0.0887, 0.4234]) tensor([0.1763, 0.1560, 0.2502, 0.4175]) -Greedy action tensor([-0.0884, 0.0027, -0.2744, 0.1688]) tensor([0.2370, 0.2596, 0.1968, 0.3065]) -Greedy action tensor([-0.1400, 1.6628, 0.6273, 0.6842]) tensor([0.0870, 0.5275, 0.1873, 0.1983]) -Greedy action tensor([-0.1214, -0.0503, 1.5591, -0.2517]) tensor([0.1202, 0.1291, 0.6452, 0.1055]) -Greedy action tensor([ 1.8278, -0.1378, 0.3728, 1.5451]) tensor([0.4701, 0.0658, 0.1097, 0.3543]) -Greedy action tensor([-0.3565, 0.4824, -0.1711, 0.1330]) tensor([0.1626, 0.3763, 0.1958, 0.2653]) -Greedy action tensor([ 0.0298, -0.3308, 0.9412, 1.1370]) tensor([0.1387, 0.0967, 0.3450, 0.4196]) -Greedy action tensor([ 1.3173, 0.4307, -0.6379, 0.5101]) tensor([0.5001, 0.2061, 0.0708, 0.2231]) -Greedy action tensor([ 1.6657, -1.0260, 0.5629, 0.3263]) tensor([0.6018, 0.0408, 0.1998, 0.1577]) -Greedy action tensor([ 0.0906, 1.5966, -0.1543, -0.0191]) tensor([0.1391, 0.6273, 0.1089, 0.1247]) -Greedy action tensor([0.7917, 0.0699, 0.7914, 0.9366]) tensor([0.2746, 0.1334, 0.2745, 0.3174]) -Greedy action tensor([ 0.0510, -0.8042, 0.1276, 1.3916]) tensor([0.1581, 0.0672, 0.1707, 0.6041]) -Greedy action tensor([-0.7970, -0.2814, 2.3482, -0.5669]) tensor([0.0368, 0.0617, 0.8552, 0.0463]) -Greedy action tensor([-0.0538, -0.6469, -0.9679, 1.3011]) tensor([0.1715, 0.0948, 0.0688, 0.6649]) -Greedy action tensor([ 0.9373, 0.7084, -1.1589, 0.4601]) tensor([0.3939, 0.3133, 0.0484, 0.2444]) -Greedy action tensor([ 0.7449, -1.0472, 0.9764, 1.3154]) tensor([0.2383, 0.0397, 0.3004, 0.4216]) -Greedy action tensor([-1.2491, -0.1603, 0.7906, -1.0723]) tensor([0.0778, 0.2311, 0.5982, 0.0928]) -Greedy action tensor([ 1.0511, -0.8497, 1.8708, 0.9814]) tensor([0.2298, 0.0343, 0.5216, 0.2143]) -Greedy action tensor([ 0.3744, 1.3764, -0.0120, -0.0459]) tensor([0.1976, 0.5383, 0.1343, 0.1298]) -Greedy action tensor([ 0.4197, -1.5406, -0.5038, 1.6788]) tensor([0.1976, 0.0278, 0.0785, 0.6961]) -Greedy action tensor([ 0.2959, 0.4621, -0.3307, 0.8814]) tensor([0.2217, 0.2618, 0.1185, 0.3981]) -Greedy action tensor([-0.2352, 0.8802, 0.8192, -0.1407]) tensor([0.1247, 0.3804, 0.3579, 0.1370]) -Greedy action tensor([ 0.1122, -0.7721, 0.3793, 1.0083]) tensor([0.1935, 0.0799, 0.2527, 0.4740]) -Greedy action tensor([0.7795, 0.1170, 1.3269, 0.7045]) tensor([0.2397, 0.1236, 0.4144, 0.2224]) -Greedy action tensor([ 0.9903, -1.2995, 2.4205, -0.0519]) tensor([0.1775, 0.0180, 0.7419, 0.0626]) -Greedy action tensor([ 1.1424, -0.3741, 0.3434, 0.9866]) tensor([0.3960, 0.0869, 0.1781, 0.3389]) -Greedy action tensor([ 0.4520, -0.5794, 0.9904, 0.3366]) tensor([0.2525, 0.0900, 0.4325, 0.2250]) -Greedy action tensor([1.3789, 0.5359, 0.4926, 0.9237]) tensor([0.4037, 0.1738, 0.1664, 0.2561]) -Greedy action tensor([0.7694, 0.2495, 1.4592, 0.1926]) tensor([0.2410, 0.1433, 0.4804, 0.1354]) -Greedy action tensor([-0.1029, -0.5319, 0.4427, 0.1054]) tensor([0.2170, 0.1413, 0.3744, 0.2673]) -Greedy action tensor([ 0.4138, -1.1226, 0.8897, 0.3507]) tensor([0.2657, 0.0572, 0.4277, 0.2495]) -Greedy action tensor([ 0.3558, -0.8855, 0.0625, -0.2821]) tensor([0.3901, 0.1128, 0.2910, 0.2061]) -Greedy action tensor([-0.8235, -1.7336, 0.7540, 0.2997]) tensor([0.1073, 0.0432, 0.5196, 0.3299]) -Greedy action tensor([-0.6915, -0.9806, 0.7976, 0.6183]) tensor([0.1011, 0.0757, 0.4484, 0.3748]) -Greedy action tensor([-0.5874, -0.2121, -0.0224, 0.1389]) tensor([0.1592, 0.2317, 0.2801, 0.3291]) -Greedy action tensor([0.4264, 0.0753, 0.6513, 0.1615]) tensor([0.2686, 0.1890, 0.3363, 0.2061]) -Greedy action tensor([ 1.5460, -0.4161, 0.4120, -0.4315]) tensor([0.6247, 0.0878, 0.2010, 0.0865]) -Greedy action tensor([0.5215, 0.5533, 0.0533, 0.9629]) tensor([0.2373, 0.2450, 0.1486, 0.3690]) -Greedy action tensor([ 0.6159, -0.0779, -0.9728, 0.2289]) tensor([0.4197, 0.2097, 0.0857, 0.2850]) -Greedy action tensor([ 1.0432, -1.1895, 1.5496, 0.6794]) tensor([0.2889, 0.0310, 0.4793, 0.2008]) -Greedy action tensor([ 0.1927, 0.1825, -1.1028, 1.1335]) tensor([0.2072, 0.2051, 0.0567, 0.5309]) -Greedy action tensor([ 0.2222, -1.3205, 0.3240, 0.8232]) tensor([0.2413, 0.0516, 0.2671, 0.4400]) -Greedy action tensor([ 0.9734, -1.6624, -0.4592, 0.1381]) tensor([0.5734, 0.0411, 0.1369, 0.2487]) -Greedy action tensor([ 0.6266, -1.3995, 1.7874, -0.3287]) tensor([0.2123, 0.0280, 0.6780, 0.0817]) -Greedy action tensor([ 0.3162, -0.0112, -0.0290, -0.8659]) tensor([0.3656, 0.2635, 0.2588, 0.1121]) -Greedy action tensor([ 0.6501, -1.6441, -0.8026, 0.9866]) tensor([0.3656, 0.0369, 0.0855, 0.5119]) -Greedy action tensor([ 1.0699, -1.1401, 1.2388, 0.8775]) tensor([0.3207, 0.0352, 0.3797, 0.2645]) -Greedy action tensor([ 0.4735, -1.3895, 1.6718, 0.3446]) tensor([0.1870, 0.0290, 0.6197, 0.1644]) -Greedy action tensor([-0.3221, -2.7343, -0.0871, 0.6675]) tensor([0.1982, 0.0178, 0.2507, 0.5333]) -Greedy action tensor([-0.0541, -0.7161, 0.6802, -1.0639]) tensor([0.2523, 0.1301, 0.5257, 0.0919]) -Greedy action tensor([ 1.2614, -0.7145, 0.7829, 2.0789]) tensor([0.2486, 0.0345, 0.1540, 0.5630]) -Greedy action tensor([ 0.1150, -0.5501, 0.9406, -0.0420]) tensor([0.2150, 0.1105, 0.4908, 0.1837]) -Greedy action tensor([-0.5793, -1.4379, 0.3323, -0.0439]) tensor([0.1779, 0.0754, 0.4427, 0.3039]) -Greedy action tensor([-0.8564, -1.1720, -0.7261, 1.0143]) tensor([0.1068, 0.0779, 0.1217, 0.6936]) -Greedy action tensor([0.3213, 0.1737, 0.5643, 0.8078]) tensor([0.2099, 0.1811, 0.2676, 0.3414]) -Greedy action tensor([ 0.2734, 1.2506, 0.9529, -0.8745]) tensor([0.1681, 0.4468, 0.3317, 0.0534]) -Greedy action tensor([ 0.5355, -0.1343, -0.3353, 0.0461]) tensor([0.3932, 0.2012, 0.1646, 0.2410]) -Greedy action tensor([-1.8899, -0.4930, 0.6787, -0.1153]) tensor([0.0417, 0.1685, 0.5439, 0.2459]) -Greedy action tensor([-1.5114, -0.5371, 0.4534, 0.0027]) tensor([0.0652, 0.1728, 0.4654, 0.2965]) -Greedy action tensor([-1.0520, -0.5799, 0.2420, 0.3749]) tensor([0.0960, 0.1539, 0.3502, 0.3999]) -Greedy action tensor([-1.9787, -0.6884, 1.2400, 0.5811]) tensor([0.0235, 0.0854, 0.5873, 0.3039]) -Greedy action tensor([-0.7593, -0.6825, 0.0572, -0.1503]) tensor([0.1618, 0.1747, 0.3660, 0.2975]) -Greedy action tensor([-1.9110, -0.4687, 0.6912, -0.1280]) tensor([0.0405, 0.1715, 0.5469, 0.2411]) -Greedy action tensor([-1.7376, -0.4713, 0.5596, -0.0564]) tensor([0.0503, 0.1786, 0.5007, 0.2704]) -Greedy action tensor([-1.8124, -0.3415, 0.5812, -0.0847]) tensor([0.0456, 0.1985, 0.4994, 0.2566]) -Greedy action tensor([-1.1419, -0.0694, 0.3986, -0.7457]) tensor([0.0993, 0.2901, 0.4632, 0.1475]) -Greedy action tensor([-0.2747, 0.0569, 1.0271, 1.4895]) tensor([0.0840, 0.1170, 0.3087, 0.4903]) -Greedy action tensor([-1.4689, -0.4128, 0.5204, 0.4404]) tensor([0.0558, 0.1603, 0.4076, 0.3763]) -Greedy action tensor([-1.0870, -0.5559, 0.2487, 0.3936]) tensor([0.0918, 0.1561, 0.3489, 0.4033]) -Greedy action tensor([-1.0613, -0.6284, 0.3230, 0.0129]) tensor([0.1057, 0.1630, 0.4219, 0.3094]) -Greedy action tensor([-1.4699, -0.5219, 0.4452, 0.1902]) tensor([0.0640, 0.1651, 0.4343, 0.3366]) -Greedy action tensor([-1.8518, -0.4288, 0.6648, -0.0818]) tensor([0.0427, 0.1773, 0.5292, 0.2508]) -Greedy action tensor([-1.6801, -0.5324, 0.6041, 0.1065]) tensor([0.0502, 0.1580, 0.4924, 0.2994]) -Greedy action tensor([-1.5602, -0.5907, 0.4887, 0.0339]) tensor([0.0613, 0.1616, 0.4755, 0.3017]) -Greedy action tensor([-1.3664, -0.6359, 0.4213, 0.0556]) tensor([0.0758, 0.1573, 0.4528, 0.3141]) -Greedy action tensor([-1.6722, 0.0339, 0.4754, -0.1074]) tensor([0.0504, 0.2774, 0.4314, 0.2409]) -Greedy action tensor([-1.8312, -0.4485, 0.6100, -0.1215]) tensor([0.0455, 0.1812, 0.5221, 0.2512]) -Greedy action tensor([-1.8796, -0.4708, 0.6370, -0.1351]) tensor([0.0431, 0.1763, 0.5339, 0.2467]) -Greedy action tensor([-1.5879, -0.6107, 1.0917, 0.7482]) tensor([0.0350, 0.0930, 0.5102, 0.3619]) -Greedy action tensor([-1.9094, -0.5233, 1.0883, 0.4702]) tensor([0.0279, 0.1116, 0.5592, 0.3013]) -Greedy action tensor([-0.6002, -0.5739, 0.1922, 0.1722]) tensor([0.1562, 0.1604, 0.3451, 0.3383]) -Greedy action tensor([-1.7285, -0.3107, 0.5601, 0.0098]) tensor([0.0484, 0.1996, 0.4769, 0.2751]) -Greedy action tensor([-1.8638, -0.4439, 0.6687, -0.0939]) tensor([0.0424, 0.1754, 0.5334, 0.2488]) -Greedy action tensor([-1.2684, 0.8823, 0.1907, 0.0813]) tensor([0.0563, 0.4840, 0.2424, 0.2173]) -Greedy action tensor([-1.4755, -0.5365, 1.2095, 1.1004]) tensor([0.0319, 0.0816, 0.4674, 0.4191]) -Greedy action tensor([-0.8052, -0.3555, 0.5081, 1.0257]) tensor([0.0798, 0.1252, 0.2969, 0.4981]) -Greedy action tensor([-1.4723, -0.5002, 0.5770, 0.3652]) tensor([0.0565, 0.1495, 0.4389, 0.3551]) -Greedy action tensor([-1.8713, -0.3378, 0.6100, -0.1410]) tensor([0.0430, 0.1995, 0.5146, 0.2428]) -Greedy action tensor([-1.5750, -0.5649, 0.5290, 0.0938]) tensor([0.0580, 0.1592, 0.4753, 0.3076]) -Greedy action tensor([-1.8877, -0.4459, 0.6354, -0.1604]) tensor([0.0429, 0.1813, 0.5346, 0.2412]) -Greedy action tensor([-1.9073, -0.4627, 0.6457, -0.1583]) tensor([0.0420, 0.1779, 0.5389, 0.2412]) -Greedy action tensor([-1.6657, -0.4688, 0.5742, -0.1610]) tensor([0.0549, 0.1818, 0.5159, 0.2473]) -Greedy action tensor([-1.1333, -0.5904, 0.2736, 0.3344]) tensor([0.0897, 0.1544, 0.3664, 0.3894]) -Greedy action tensor([-1.5149, -0.5581, 0.4630, 0.0705]) tensor([0.0636, 0.1657, 0.4600, 0.3107]) -Greedy action tensor([-1.5315, -0.5354, 0.5464, 0.2724]) tensor([0.0563, 0.1524, 0.4495, 0.3418]) -Greedy action tensor([-1.8710, -0.4511, 0.6292, -0.1397]) tensor([0.0435, 0.1801, 0.5305, 0.2459]) -Greedy action tensor([-0.7895, -0.4010, 0.4118, 1.2465]) tensor([0.0743, 0.1096, 0.2470, 0.5691]) -Greedy action tensor([-1.7770, -0.4947, 0.5996, -0.0569]) tensor([0.0477, 0.1720, 0.5138, 0.2665]) -Greedy action tensor([-1.0010, -0.6229, 1.0254, 1.4612]) tensor([0.0459, 0.0670, 0.3484, 0.5387]) -Greedy action tensor([-1.8419, -0.6555, 0.2271, -0.2499]) tensor([0.0585, 0.1915, 0.4628, 0.2872]) -Greedy action tensor([-1.7665, -0.4518, 0.5874, -0.1133]) tensor([0.0488, 0.1819, 0.5141, 0.2551]) -Greedy action tensor([-1.2519, -0.4998, 0.7582, 0.9880]) tensor([0.0501, 0.1062, 0.3736, 0.4701]) -Greedy action tensor([-1.1032, -0.6747, 1.0682, 1.3047]) tensor([0.0446, 0.0685, 0.3913, 0.4957]) -Greedy action tensor([-0.9133, -0.5614, 0.2353, 0.2237]) tensor([0.1150, 0.1636, 0.3628, 0.3586]) -Greedy action tensor([-1.7402, -0.3804, 0.8492, 0.5570]) tensor([0.0355, 0.1383, 0.4730, 0.3532]) -Greedy action tensor([-1.7683, -0.4484, 0.5797, -0.1024]) tensor([0.0488, 0.1826, 0.5105, 0.2581]) -Greedy action tensor([-1.8671, -0.4794, 0.6501, -0.1148]) tensor([0.0432, 0.1729, 0.5350, 0.2490]) -Greedy action tensor([-1.6447, -0.3426, 0.4921, -0.0453]) tensor([0.0553, 0.2032, 0.4681, 0.2735]) -Greedy action tensor([-1.4970, 0.8829, 0.3484, 0.2265]) tensor([0.0421, 0.4551, 0.2667, 0.2361]) -Greedy action tensor([-1.3676, -0.6034, -0.1871, -0.6098]) tensor([0.1171, 0.2515, 0.3814, 0.2499]) -Greedy action tensor([-1.9639, -0.5628, 1.4111, 0.7343]) tensor([0.0204, 0.0826, 0.5947, 0.3023]) -Greedy action tensor([-1.9313, -0.4463, 0.6625, -0.1692]) tensor([0.0406, 0.1793, 0.5435, 0.2366]) -Greedy action tensor([-1.8527, -0.4737, 0.6199, -0.1383]) tensor([0.0447, 0.1775, 0.5297, 0.2482]) -Greedy action tensor([-1.9002, -0.3786, 0.6439, -0.1346]) tensor([0.0414, 0.1896, 0.5271, 0.2420]) -Greedy action tensor([-0.9527, 0.4097, 0.3999, -0.4439]) tensor([0.0958, 0.3742, 0.3706, 0.1594]) -Greedy action tensor([-1.5670, -0.5910, 0.4771, 0.0333]) tensor([0.0612, 0.1625, 0.4729, 0.3034]) -Greedy action tensor([-1.7897, -0.3931, 0.6079, -0.0389]) tensor([0.0459, 0.1854, 0.5045, 0.2642]) -Greedy action tensor([-1.9409, -0.4532, 0.6643, -0.1792]) tensor([0.0404, 0.1786, 0.5461, 0.2349]) -Greedy action tensor([-0.7166, -0.6277, 0.2331, 0.0788]) tensor([0.1451, 0.1586, 0.3750, 0.3214]) -Greedy action tensor([-1.8695, -0.4589, 0.6389, -0.1217]) tensor([0.0432, 0.1772, 0.5312, 0.2483]) -Greedy action tensor([-1.9050, -0.4638, 0.6652, -0.1261]) tensor([0.0413, 0.1745, 0.5396, 0.2446]) -Greedy action tensor([-1.7183, -0.5009, 0.5585, -0.0141]) tensor([0.0510, 0.1722, 0.4967, 0.2802]) -Greedy action tensor([-1.1066, -0.5635, 0.2987, 0.6029]) tensor([0.0811, 0.1397, 0.3308, 0.4484]) -Greedy action tensor([-1.9824, -0.9718, 0.4659, -0.1766]) tensor([0.0467, 0.1284, 0.5406, 0.2843]) -Greedy action tensor([-0.6337, -0.5794, 0.2040, 0.2496]) tensor([0.1474, 0.1556, 0.3406, 0.3565]) -Greedy action tensor([-0.7681, -0.5361, 0.2085, 0.0925]) tensor([0.1373, 0.1732, 0.3647, 0.3248]) -Greedy action tensor([-1.4251, -0.2724, 0.3961, 0.2458]) tensor([0.0638, 0.2022, 0.3945, 0.3395]) -Greedy action tensor([-0.9907, -0.7961, 0.2219, 1.1259]) tensor([0.0720, 0.0875, 0.2422, 0.5982]) -Greedy action tensor([-1.3509, -0.5509, 0.3768, 0.2609]) tensor([0.0721, 0.1605, 0.4059, 0.3615]) -Greedy action tensor([-1.2597, -0.5649, 0.3105, 0.2412]) tensor([0.0813, 0.1629, 0.3910, 0.3648]) -Greedy action tensor([-1.9350, -0.4524, 0.6613, -0.1752]) tensor([0.0406, 0.1788, 0.5446, 0.2360]) -Greedy action tensor([-1.2830, -0.5318, 0.3021, 0.2271]) tensor([0.0798, 0.1692, 0.3896, 0.3614]) -Greedy action tensor([-1.0924, -0.6044, 0.3571, 0.2018]) tensor([0.0949, 0.1546, 0.4043, 0.3462]) -Greedy action tensor([-1.8780, -0.6839, 0.2007, -0.3552]) tensor([0.0592, 0.1955, 0.4736, 0.2716]) -Greedy action tensor([-1.1690, -0.6303, 0.4708, 0.4323]) tensor([0.0780, 0.1336, 0.4018, 0.3866]) -Greedy action tensor([-1.8266, -0.3952, 0.6135, -0.0723]) tensor([0.0446, 0.1865, 0.5114, 0.2576]) -Greedy action tensor([-1.0240, -0.6482, 0.2650, 0.1622]) tensor([0.1068, 0.1556, 0.3877, 0.3499]) -Greedy action tensor([-0.9815, -0.7014, -0.3266, -0.5416]) tensor([0.1724, 0.2281, 0.3318, 0.2676]) -Greedy action tensor([ 0.7660, -0.1692, -0.6054, 0.6393]) tensor([0.3957, 0.1553, 0.1004, 0.3486]) -Greedy action tensor([ 1.3278, -0.5063, -0.2707, 0.4379]) tensor([0.5641, 0.0901, 0.1141, 0.2317]) -Greedy action tensor([ 1.1580, -0.5259, -0.0731, -0.0319]) tensor([0.5612, 0.1042, 0.1639, 0.1707]) -Greedy action tensor([ 1.9131, -1.2290, -0.4630, 0.6871]) tensor([0.6995, 0.0302, 0.0650, 0.2053]) -Greedy action tensor([ 0.9167, -0.3931, 0.1836, 0.1791]) tensor([0.4487, 0.1211, 0.2156, 0.2146]) -Greedy action tensor([ 1.0021, -0.1058, -0.3409, -0.2218]) tensor([0.5304, 0.1752, 0.1385, 0.1560]) -Greedy action tensor([ 1.7155, -0.2193, -0.8029, 0.5927]) tensor([0.6450, 0.0932, 0.0520, 0.2099]) -Greedy action tensor([ 1.1098, -0.3977, -0.0830, 0.0205]) tensor([0.5373, 0.1190, 0.1630, 0.1808]) -Greedy action tensor([ 1.4696, -0.5475, -0.5415, 0.6451]) tensor([0.5864, 0.0780, 0.0785, 0.2571]) -Greedy action tensor([ 1.2270, -0.4184, 0.0294, 0.0471]) tensor([0.5549, 0.1071, 0.1675, 0.1705]) -Greedy action tensor([ 1.3399, 0.1277, -0.2647, 0.4240]) tensor([0.5267, 0.1567, 0.1058, 0.2108]) -Greedy action tensor([ 1.5288, -0.4197, -0.1998, 0.1376]) tensor([0.6374, 0.0908, 0.1132, 0.1586]) -Greedy action tensor([ 1.5179, -0.4728, -0.6003, 0.4378]) tensor([0.6264, 0.0856, 0.0753, 0.2127]) -Greedy action tensor([ 1.0553, -0.8744, -0.5903, 0.8136]) tensor([0.4710, 0.0684, 0.0908, 0.3698]) -Greedy action tensor([ 1.1553, -0.2275, -0.8035, 0.1940]) tensor([0.5636, 0.1414, 0.0795, 0.2155]) -Greedy action tensor([ 0.3976, -0.2579, 0.2274, -0.1922]) tensor([0.3428, 0.1780, 0.2892, 0.1901]) -Greedy action tensor([ 1.0364, -0.8627, -0.6280, 0.7403]) tensor([0.4802, 0.0719, 0.0909, 0.3571]) -Greedy action tensor([ 0.8959, -0.5210, -0.0576, -0.0228]) tensor([0.4934, 0.1196, 0.1901, 0.1969]) -Greedy action tensor([ 1.4462, -0.8124, -0.5807, 0.5536]) tensor([0.6076, 0.0635, 0.0800, 0.2489]) -Greedy action tensor([ 0.8843, -0.1854, -0.2302, 0.1851]) tensor([0.4612, 0.1582, 0.1513, 0.2292]) -Greedy action tensor([ 0.8542, -0.7265, -0.2502, 0.5949]) tensor([0.4331, 0.0891, 0.1435, 0.3342]) -Greedy action tensor([ 1.8413, -0.8228, -0.5941, 0.7826]) tensor([0.6648, 0.0463, 0.0582, 0.2306]) -Greedy action tensor([ 1.4253, -0.6655, -0.2443, 0.3271]) tensor([0.6078, 0.0751, 0.1145, 0.2027]) -Greedy action tensor([ 1.4366, -0.1721, -0.0022, 0.1222]) tensor([0.5862, 0.1173, 0.1391, 0.1575]) -Greedy action tensor([ 1.0620, -0.4637, -0.2075, 0.0891]) tensor([0.5329, 0.1159, 0.1497, 0.2014]) -Greedy action tensor([ 1.2003, 0.1008, -0.5881, 0.2666]) tensor([0.5282, 0.1759, 0.0883, 0.2076]) -Greedy action tensor([ 1.6178, -0.3774, -0.2456, 0.1907]) tensor([0.6531, 0.0888, 0.1013, 0.1567]) -Greedy action tensor([ 1.4038, -0.2221, -0.6559, 0.5578]) tensor([0.5703, 0.1122, 0.0727, 0.2447]) -Greedy action tensor([ 0.5388, -0.5129, -0.1719, 0.2247]) tensor([0.3889, 0.1359, 0.1911, 0.2841]) -Greedy action tensor([ 2.2358, -1.0955, -0.0836, 0.5537]) tensor([0.7575, 0.0271, 0.0745, 0.1409]) -Greedy action tensor([ 1.1320, -0.3083, -0.1985, 0.2895]) tensor([0.5177, 0.1226, 0.1368, 0.2229]) -Greedy action tensor([ 1.6267, -0.6565, -0.3445, 0.7787]) tensor([0.5990, 0.0611, 0.0834, 0.2565]) -Greedy action tensor([ 0.3493, -0.1093, 0.0866, -0.1602]) tensor([0.3331, 0.2106, 0.2562, 0.2001]) -Greedy action tensor([ 1.8524, -0.6793, -0.5900, 0.2060]) tensor([0.7357, 0.0585, 0.0640, 0.1418]) -Greedy action tensor([ 1.3413, -0.3963, -0.2511, 0.4034]) tensor([0.5647, 0.0994, 0.1149, 0.2211]) -Greedy action tensor([ 1.4837, -0.3222, -0.1934, 0.3432]) tensor([0.5985, 0.0983, 0.1119, 0.1913]) -Greedy action tensor([ 1.3646, -0.4412, -0.1468, -0.0754]) tensor([0.6166, 0.1013, 0.1360, 0.1461]) -Greedy action tensor([ 1.3967, 0.1824, -0.1270, 0.2531]) tensor([0.5454, 0.1619, 0.1188, 0.1738]) -Greedy action tensor([ 1.1254, -0.4911, -0.2736, 0.0904]) tensor([0.5553, 0.1103, 0.1371, 0.1973]) -Greedy action tensor([ 1.4482, -0.4270, -0.2920, 0.1094]) tensor([0.6285, 0.0964, 0.1103, 0.1648]) -Greedy action tensor([ 1.8958, -0.7997, -0.3512, 0.4237]) tensor([0.7129, 0.0481, 0.0754, 0.1636]) -Greedy action tensor([ 1.0621, -0.3729, -0.3662, 0.1786]) tensor([0.5288, 0.1259, 0.1268, 0.2186]) -Greedy action tensor([ 1.4054, -0.3011, -0.3785, 0.0089]) tensor([0.6262, 0.1136, 0.1052, 0.1550]) -Greedy action tensor([ 0.6790, -0.0967, -0.0336, -0.1808]) tensor([0.4212, 0.1939, 0.2066, 0.1783]) -Greedy action tensor([ 0.4820, -0.2660, -0.0097, 0.0494]) tensor([0.3658, 0.1731, 0.2237, 0.2373]) -Greedy action tensor([ 1.4098, -0.1641, -0.5594, 0.3032]) tensor([0.5961, 0.1235, 0.0832, 0.1971]) -Greedy action tensor([ 0.7173, -0.2775, -0.1143, -0.0576]) tensor([0.4413, 0.1632, 0.1921, 0.2033]) -Greedy action tensor([ 1.2235, -0.2645, -0.1017, 0.0196]) tensor([0.5582, 0.1260, 0.1483, 0.1675]) -Greedy action tensor([ 1.3037, -0.7937, -0.3752, 0.3969]) tensor([0.5837, 0.0717, 0.1089, 0.2357]) -Greedy action tensor([ 1.5387, -0.0533, -0.2825, 0.0442]) tensor([0.6290, 0.1280, 0.1018, 0.1411]) -Greedy action tensor([ 1.5404, -0.0589, -0.1543, 0.3324]) tensor([0.5937, 0.1199, 0.1090, 0.1774]) -Greedy action tensor([ 1.2708, -0.4457, -0.1958, 0.3644]) tensor([0.5512, 0.0990, 0.1272, 0.2227]) -Greedy action tensor([ 1.2824, 0.0300, -0.5685, 0.2799]) tensor([0.5525, 0.1579, 0.0868, 0.2027]) -Greedy action tensor([ 1.5029, -0.3094, -0.0757, 0.2483]) tensor([0.6043, 0.0987, 0.1246, 0.1723]) -Greedy action tensor([ 1.1778, -0.3656, -0.0905, 0.0456]) tensor([0.5503, 0.1176, 0.1548, 0.1774]) -Greedy action tensor([ 1.3516, -0.6919, -0.2441, 0.2769]) tensor([0.5975, 0.0774, 0.1211, 0.2040]) -Greedy action tensor([ 1.0239, -0.0528, -0.6957, 0.4225]) tensor([0.4836, 0.1648, 0.0866, 0.2650]) -Greedy action tensor([ 1.1014, -0.3408, -0.4582, 0.2610]) tensor([0.5324, 0.1259, 0.1119, 0.2298]) -Greedy action tensor([ 0.6725, 0.1144, -0.5914, -0.1130]) tensor([0.4328, 0.2477, 0.1223, 0.1973]) -Greedy action tensor([ 1.2464, -0.4494, -0.0524, 0.2503]) tensor([0.5477, 0.1005, 0.1495, 0.2023]) -Greedy action tensor([ 0.7320, -0.2377, -0.1618, 0.1359]) tensor([0.4275, 0.1621, 0.1749, 0.2355]) -Greedy action tensor([ 1.0041, -0.7740, -0.5348, 0.6663]) tensor([0.4769, 0.0806, 0.1023, 0.3402]) -Greedy action tensor([ 1.3223, -0.0235, 0.0943, -0.3288]) tensor([0.5730, 0.1492, 0.1678, 0.1099]) -Greedy action tensor([ 1.1187, -0.6895, -0.1186, 0.2012]) tensor([0.5395, 0.0884, 0.1565, 0.2155]) -Greedy action tensor([ 1.1545, -0.5013, -0.0070, 0.0691]) tensor([0.5430, 0.1037, 0.1700, 0.1834]) -Greedy action tensor([ 1.0421, -0.4997, 0.1004, -0.0382]) tensor([0.5146, 0.1101, 0.2007, 0.1747]) -Greedy action tensor([ 1.5877, -1.0164, -0.1757, 0.4828]) tensor([0.6342, 0.0469, 0.1087, 0.2101]) -Greedy action tensor([ 1.4675, -0.5933, -0.0856, 0.1150]) tensor([0.6260, 0.0797, 0.1324, 0.1619]) -Greedy action tensor([ 0.9882, -0.3263, -0.2625, 0.1330]) tensor([0.5050, 0.1357, 0.1446, 0.2147]) -Greedy action tensor([ 1.4477, -0.4349, -0.2590, 0.4167]) tensor([0.5916, 0.0900, 0.1074, 0.2110]) -Greedy action tensor([ 1.2537, -0.4909, -0.2527, 0.0850]) tensor([0.5857, 0.1023, 0.1299, 0.1820]) -Greedy action tensor([ 1.3854, -0.7077, -0.4034, 0.5576]) tensor([0.5789, 0.0714, 0.0968, 0.2530]) -Greedy action tensor([ 1.6655, -1.0874, -0.3041, 0.5644]) tensor([0.6512, 0.0415, 0.0908, 0.2165]) -Greedy action tensor([ 0.8636, -0.2231, -0.1707, 0.2653]) tensor([0.4459, 0.1504, 0.1585, 0.2451]) -Greedy action tensor([ 1.5181, -0.5443, -0.2153, 0.4960]) tensor([0.6011, 0.0764, 0.1062, 0.2163]) -Greedy action tensor([ 1.1825, -0.3442, -0.3708, 0.0846]) tensor([0.5674, 0.1233, 0.1200, 0.1893]) -Greedy action tensor([ 0.8822, -0.4505, -0.0995, 0.2373]) tensor([0.4623, 0.1219, 0.1732, 0.2426]) -Greedy action tensor([ 0.9976, 0.0771, -0.6991, -0.6066]) tensor([0.5610, 0.2234, 0.1028, 0.1128]) -Greedy action tensor([ 1.0341, -0.1684, -0.1065, -0.0589]) tensor([0.5114, 0.1537, 0.1635, 0.1714]) -Greedy action tensor([ 1.0707, 0.2807, -0.3359, 0.1291]) tensor([0.4787, 0.2173, 0.1173, 0.1867]) -Greedy action tensor([ 1.6257, -0.9234, -0.3703, 1.0405]) tensor([0.5646, 0.0441, 0.0767, 0.3145]) -Greedy action tensor([ 0.3350, 0.4257, -0.1197, -0.1087]) tensor([0.2966, 0.3248, 0.1883, 0.1903]) -Greedy action tensor([ 0.3467, -0.0264, 0.0299, -0.1576]) tensor([0.3310, 0.2279, 0.2411, 0.1999]) -Greedy action tensor([ 0.5885, -0.4214, -0.1597, -0.2976]) tensor([0.4445, 0.1619, 0.2103, 0.1832]) -Greedy action tensor([-0.0193, -0.0850, 0.1559, -0.1311]) tensor([0.2486, 0.2328, 0.2962, 0.2223]) -Greedy action tensor([ 0.7827, -0.2237, 0.1359, -0.5496]) tensor([0.4644, 0.1698, 0.2432, 0.1225]) -Greedy action tensor([0.4752, 0.1252, 0.0128, 0.0629]) tensor([0.3337, 0.2352, 0.2102, 0.2210]) -Greedy action tensor([ 0.5970, -0.3739, 0.0358, -0.2763]) tensor([0.4225, 0.1600, 0.2410, 0.1764]) -Greedy action tensor([ 0.4446, -0.0488, -0.0427, -0.2202]) tensor([0.3651, 0.2229, 0.2243, 0.1878]) -Greedy action tensor([ 0.3856, 0.0394, -0.0903, -0.1398]) tensor([0.3425, 0.2422, 0.2128, 0.2025]) -Greedy action tensor([ 0.5020, -0.2285, 0.0755, -0.2637]) tensor([0.3847, 0.1853, 0.2511, 0.1789]) -Greedy action tensor([ 0.3715, -0.0226, 0.0924, -0.2891]) tensor([0.3393, 0.2288, 0.2567, 0.1753]) -Greedy action tensor([ 0.6137, -0.3529, -0.0532, -0.3604]) tensor([0.4403, 0.1675, 0.2260, 0.1662]) -Greedy action tensor([ 0.5422, 0.0477, -0.0595, 0.0283]) tensor([0.3628, 0.2213, 0.1988, 0.2170]) -Greedy action tensor([ 0.5982, -0.4499, -0.1525, -0.3405]) tensor([0.4517, 0.1584, 0.2132, 0.1767]) -Greedy action tensor([ 0.5546, -0.1840, -0.0789, -0.1679]) tensor([0.4010, 0.1916, 0.2128, 0.1947]) -Greedy action tensor([ 0.7194, -0.4964, -0.2932, -0.3666]) tensor([0.5007, 0.1484, 0.1819, 0.1690]) -Greedy action tensor([ 0.5621, -0.5558, -0.1118, -0.2756]) tensor([0.4407, 0.1441, 0.2246, 0.1907]) -Greedy action tensor([ 0.5899, -0.0665, -0.0700, -0.2893]) tensor([0.4081, 0.2117, 0.2109, 0.1694]) -Greedy action tensor([ 0.7311, -0.1716, -0.1589, -0.2300]) tensor([0.4548, 0.1844, 0.1868, 0.1740]) -Greedy action tensor([ 0.1714, 0.1143, 0.0082, -0.2994]) tensor([0.2925, 0.2763, 0.2485, 0.1827]) -Greedy action tensor([ 0.6664, -0.1826, -0.0455, -0.2484]) tensor([0.4312, 0.1845, 0.2116, 0.1727]) -Greedy action tensor([ 1.3394, -1.1276, -0.1179, -0.6850]) tensor([0.6898, 0.0585, 0.1606, 0.0911]) -Greedy action tensor([ 0.5988, -0.5692, -0.1437, -0.2951]) tensor([0.4554, 0.1416, 0.2167, 0.1863]) -Greedy action tensor([ 0.6472, -0.4928, 0.2590, -0.5653]) tensor([0.4356, 0.1393, 0.2955, 0.1296]) -Greedy action tensor([ 0.7692, 0.4156, -0.0432, -0.4584]) tensor([0.4100, 0.2879, 0.1820, 0.1201]) -Greedy action tensor([ 0.5603, -0.4004, -0.1239, -0.2973]) tensor([0.4327, 0.1655, 0.2183, 0.1835]) -Greedy action tensor([ 0.2304, 0.0423, -0.1050, -0.1412]) tensor([0.3093, 0.2563, 0.2212, 0.2133]) -Greedy action tensor([ 0.8190, -0.6027, -0.0075, -0.2779]) tensor([0.4968, 0.1199, 0.2174, 0.1659]) -Greedy action tensor([ 0.5693, -0.0316, -0.0057, -0.1335]) tensor([0.3837, 0.2104, 0.2159, 0.1900]) -Greedy action tensor([ 0.7199, -0.5933, -0.0634, -0.3403]) tensor([0.4826, 0.1298, 0.2205, 0.1671]) -Greedy action tensor([ 0.7032, -0.4373, -0.0731, -0.4742]) tensor([0.4789, 0.1531, 0.2204, 0.1476]) -Greedy action tensor([ 0.6191, -0.4140, -0.0262, -0.3045]) tensor([0.4391, 0.1563, 0.2303, 0.1743]) -Greedy action tensor([ 0.6039, -0.3078, -0.0046, -0.3742]) tensor([0.4307, 0.1730, 0.2343, 0.1619]) -Greedy action tensor([ 0.2518, -0.5005, -0.2157, -0.0557]) tensor([0.3530, 0.1664, 0.2212, 0.2595]) -Greedy action tensor([ 0.5406, -0.0751, -0.0737, -0.2971]) tensor([0.3978, 0.2149, 0.2152, 0.1721]) -Greedy action tensor([ 0.8012, -0.6897, -0.1549, -0.3660]) tensor([0.5206, 0.1172, 0.2001, 0.1620]) -Greedy action tensor([ 0.8152, -0.5800, -0.1099, -0.4170]) tensor([0.5166, 0.1280, 0.2048, 0.1506]) -Greedy action tensor([ 0.5214, -0.1688, -0.0852, -0.2676]) tensor([0.3999, 0.2005, 0.2180, 0.1816]) -Greedy action tensor([ 0.6509, -0.3330, -0.0345, -0.2278]) tensor([0.4361, 0.1630, 0.2197, 0.1811]) -Greedy action tensor([ 0.3471, 0.0264, -0.1941, -0.1063]) tensor([0.3398, 0.2465, 0.1978, 0.2159]) -Greedy action tensor([ 0.6458, -0.6007, -0.1173, -0.5922]) tensor([0.4893, 0.1407, 0.2281, 0.1419]) -Greedy action tensor([ 0.8990, -0.3105, 0.0177, -0.5684]) tensor([0.5146, 0.1535, 0.2132, 0.1186]) -Greedy action tensor([ 1.0049, -0.4943, 0.0802, -0.2908]) tensor([0.5281, 0.1179, 0.2095, 0.1445]) -Greedy action tensor([ 0.3446, 0.0759, -0.0715, -0.2837]) tensor([0.3381, 0.2585, 0.2230, 0.1804]) -Greedy action tensor([ 0.5767, -0.1235, -0.0817, -0.0623]) tensor([0.3934, 0.1953, 0.2037, 0.2076]) -Greedy action tensor([ 0.8471, -0.1923, 0.0558, -0.4188]) tensor([0.4787, 0.1693, 0.2170, 0.1350]) -Greedy action tensor([ 0.7077, -0.3519, -0.0309, -0.3502]) tensor([0.4605, 0.1596, 0.2200, 0.1599]) -Greedy action tensor([ 0.2800, 0.2809, -0.1149, -0.0554]) tensor([0.2950, 0.2953, 0.1988, 0.2109]) -Greedy action tensor([ 0.7847, -0.3451, 0.0116, -0.3930]) tensor([0.4779, 0.1544, 0.2206, 0.1472]) -Greedy action tensor([ 0.4693, -0.0394, 0.0940, -0.2282]) tensor([0.3589, 0.2158, 0.2466, 0.1787]) -Greedy action tensor([ 0.6382, -0.2867, 0.0538, -0.3518]) tensor([0.4300, 0.1705, 0.2397, 0.1598]) -Greedy action tensor([ 0.4395, 0.0724, 0.0237, -0.1952]) tensor([0.3469, 0.2403, 0.2289, 0.1839]) -Greedy action tensor([ 0.3225, 0.5637, -0.0997, 0.0552]) tensor([0.2707, 0.3446, 0.1775, 0.2072]) -Greedy action tensor([ 0.4159, -0.2573, -0.1107, -0.4969]) tensor([0.3997, 0.2039, 0.2360, 0.1604]) -Greedy action tensor([ 0.8298, -0.1908, -0.1777, -0.4681]) tensor([0.5003, 0.1803, 0.1827, 0.1366]) -Greedy action tensor([ 0.6411, -0.0805, -0.0800, -0.3094]) tensor([0.4239, 0.2060, 0.2061, 0.1639]) -Greedy action tensor([ 0.4389, 0.1926, -0.1052, -0.1954]) tensor([0.3457, 0.2703, 0.2007, 0.1834]) -Greedy action tensor([ 0.7083, -0.4456, 0.0243, -0.4884]) tensor([0.4712, 0.1486, 0.2378, 0.1424]) -Greedy action tensor([ 0.9056, -0.6852, -0.1565, -0.5992]) tensor([0.5645, 0.1150, 0.1952, 0.1253]) -Greedy action tensor([ 0.6833, -0.5152, -0.1830, -0.3001]) tensor([0.4771, 0.1439, 0.2006, 0.1784]) -Greedy action tensor([ 0.7676, -0.7281, -0.0927, -0.6463]) tensor([0.5290, 0.1185, 0.2238, 0.1287]) -Greedy action tensor([ 0.7225, -0.4263, -0.1716, -0.4166]) tensor([0.4887, 0.1549, 0.1999, 0.1565]) -Greedy action tensor([ 0.3600, -0.2398, -0.0616, -0.3242]) tensor([0.3691, 0.2026, 0.2421, 0.1862]) -Greedy action tensor([ 1.2756, -1.4875, -0.1623, -0.7490]) tensor([0.6980, 0.0440, 0.1657, 0.0922]) -Greedy action tensor([ 0.7561, -0.5336, -0.1850, -0.4832]) tensor([0.5115, 0.1408, 0.1996, 0.1481]) -Greedy action tensor([ 0.5255, -0.0879, -0.1261, -0.1585]) tensor([0.3895, 0.2109, 0.2030, 0.1966]) -Greedy action tensor([ 0.4527, 0.0997, -0.0329, -0.0220]) tensor([0.3401, 0.2390, 0.2093, 0.2116]) -Greedy action tensor([ 0.1831, -0.1922, 0.0763, -0.2032]) tensor([0.3063, 0.2104, 0.2752, 0.2081]) -Greedy action tensor([ 0.6784, -0.6961, -0.0078, -0.3797]) tensor([0.4754, 0.1203, 0.2393, 0.1650]) -Greedy action tensor([ 0.5195, -0.0916, -0.0434, 0.0205]) tensor([0.3677, 0.1996, 0.2094, 0.2233]) -Greedy action tensor([ 0.5048, -0.1879, -0.0579, -0.3751]) tensor([0.4025, 0.2013, 0.2293, 0.1669]) -Greedy action tensor([ 0.7626, -0.5537, -0.0925, -0.5863]) tensor([0.5121, 0.1373, 0.2177, 0.1329]) -Greedy action tensor([ 0.4031, -0.0035, -0.0793, -0.1949]) tensor([0.3530, 0.2351, 0.2179, 0.1941]) -Greedy action tensor([ 0.1262, -0.0270, 0.0337, -0.2195]) tensor([0.2876, 0.2467, 0.2622, 0.2035]) -Greedy action tensor([ 0.6457, -0.3779, 0.0477, -0.4243]) tensor([0.4440, 0.1595, 0.2442, 0.1523]) -Greedy action tensor([ 0.9186, -0.5390, 0.0886, -0.2375]) tensor([0.5041, 0.1174, 0.2198, 0.1587]) -Greedy action tensor([ 0.7868, -0.2580, -0.0216, -0.4607]) tensor([0.4797, 0.1688, 0.2137, 0.1378]) -Greedy action tensor([ 0.9337, -0.4404, -0.0828, -0.4516]) tensor([0.5361, 0.1357, 0.1940, 0.1342]) -Greedy action tensor([ 0.7470, -0.3149, -0.0795, -0.3456]) tensor([0.4720, 0.1632, 0.2065, 0.1583]) -Greedy action tensor([ 1.1480, -0.5871, 0.0081, -0.3673]) tensor([0.5828, 0.1028, 0.1864, 0.1281]) -Greedy action tensor([ 0.4759, 0.1172, 0.0217, -0.1290]) tensor([0.3473, 0.2426, 0.2205, 0.1896]) -Greedy action tensor([-0.3399, -0.2718, 0.5418, -0.7614]) tensor([0.1945, 0.2082, 0.4697, 0.1276]) -Greedy action tensor([-0.2764, 0.9374, 0.3545, -0.1150]) tensor([0.1348, 0.4536, 0.2532, 0.1584]) -Greedy action tensor([ 0.3583, -0.2223, 0.8528, 0.1166]) tensor([0.2510, 0.1404, 0.4115, 0.1971]) -Greedy action tensor([ 0.4949, -1.7281, -0.7533, 0.5634]) tensor([0.4055, 0.0439, 0.1164, 0.4342]) -Greedy action tensor([0.3341, 0.0756, 0.2032, 1.1425]) tensor([0.2043, 0.1578, 0.1793, 0.4586]) -Greedy action tensor([-0.5682, -0.8208, 0.0242, 1.3558]) tensor([0.0958, 0.0744, 0.1733, 0.6564]) -Greedy action tensor([-0.1284, 0.4401, -0.1039, 0.0068]) tensor([0.2026, 0.3578, 0.2076, 0.2320]) -Greedy action tensor([1.2097, 0.3724, 0.7737, 0.3305]) tensor([0.4009, 0.1735, 0.2592, 0.1664]) -Greedy action tensor([ 0.5975, -0.2674, 1.8074, 0.4045]) tensor([0.1786, 0.0752, 0.5989, 0.1473]) -Greedy action tensor([ 1.3983, -0.4060, 0.9443, 0.0468]) tensor([0.4858, 0.0800, 0.3085, 0.1257]) -Greedy action tensor([ 1.0704, 0.0530, -0.1003, 0.0858]) tensor([0.4889, 0.1768, 0.1517, 0.1827]) -Greedy action tensor([ 0.5059, 0.4406, 0.3064, -0.2428]) tensor([0.3097, 0.2901, 0.2537, 0.1465]) -Greedy action tensor([ 0.7345, 0.3857, -0.4820, 0.3204]) tensor([0.3755, 0.2650, 0.1113, 0.2482]) -Greedy action tensor([ 0.8611, -1.7011, 0.5849, -0.2792]) tensor([0.4639, 0.0358, 0.3520, 0.1483]) -Greedy action tensor([ 1.7894, -0.7769, 0.9622, 1.7534]) tensor([0.4034, 0.0310, 0.1764, 0.3892]) -Greedy action tensor([0.3521, 0.2726, 0.3987, 0.3379]) tensor([0.2527, 0.2334, 0.2648, 0.2491]) -Greedy action tensor([-0.3533, -0.8415, 0.4113, 0.1056]) tensor([0.1871, 0.1148, 0.4020, 0.2961]) -Greedy action tensor([ 1.4566, -1.7614, 0.2241, 1.6310]) tensor([0.3965, 0.0159, 0.1156, 0.4720]) -Greedy action tensor([-0.0518, -1.0105, -0.6344, 0.3045]) tensor([0.2968, 0.1138, 0.1657, 0.4238]) -Greedy action tensor([ 1.1641, -0.1460, -0.1564, 0.6767]) tensor([0.4649, 0.1254, 0.1241, 0.2856]) -Greedy action tensor([ 0.1627, 0.0594, -0.5022, -0.7901]) tensor([0.3569, 0.3219, 0.1836, 0.1376]) -Greedy action tensor([1.6308, 0.1161, 1.0905, 0.7205]) tensor([0.4536, 0.0997, 0.2642, 0.1825]) -Greedy action tensor([ 0.5178, -0.4628, -0.2014, 1.1103]) tensor([0.2724, 0.1022, 0.1327, 0.4927]) -Greedy action tensor([ 0.3604, -1.0191, 1.7086, 0.0049]) tensor([0.1723, 0.0434, 0.6635, 0.1208]) -Greedy action tensor([-0.6886, -0.4563, -0.1918, 1.0273]) tensor([0.1056, 0.1333, 0.1736, 0.5875]) -Greedy action tensor([2.0633, 0.4638, 0.6968, 0.5393]) tensor([0.5971, 0.1206, 0.1522, 0.1301]) -Greedy action tensor([-0.1308, 0.7582, -0.1587, 0.5834]) tensor([0.1551, 0.3773, 0.1508, 0.3168]) -Greedy action tensor([ 1.2697, -0.1636, 1.0081, 1.3199]) tensor([0.3268, 0.0780, 0.2516, 0.3436]) -Greedy action tensor([-0.0440, -1.1935, 0.7306, 1.0705]) tensor([0.1530, 0.0485, 0.3320, 0.4664]) -Greedy action tensor([ 0.3869, 0.8454, 0.9670, -0.2051]) tensor([0.2032, 0.3214, 0.3630, 0.1124]) -Greedy action tensor([ 1.0981, -0.4637, -0.2652, 1.0025]) tensor([0.4212, 0.0883, 0.1077, 0.3828]) -Greedy action tensor([ 0.1250, 0.3662, -0.8977, 1.1303]) tensor([0.1864, 0.2372, 0.0670, 0.5094]) -Greedy action tensor([ 0.9872, 0.0406, -0.9401, 0.8820]) tensor([0.4109, 0.1594, 0.0598, 0.3699]) -Greedy action tensor([-0.0503, 0.2792, -1.0071, -0.3755]) tensor([0.2860, 0.3976, 0.1099, 0.2066]) -Greedy action tensor([1.3313, 0.0428, 0.3075, 0.2302]) tensor([0.5083, 0.1401, 0.1826, 0.1690]) -Greedy action tensor([ 0.3851, -0.1844, 0.6576, 0.7697]) tensor([0.2300, 0.1301, 0.3020, 0.3379]) -Greedy action tensor([-0.4871, 0.7422, 1.4473, -0.2509]) tensor([0.0793, 0.2712, 0.5490, 0.1005]) -Greedy action tensor([ 0.3031, -1.2604, 0.4014, -0.4121]) tensor([0.3569, 0.0747, 0.3938, 0.1746]) -Greedy action tensor([-0.5773, -0.1208, -0.0388, -0.0934]) tensor([0.1691, 0.2669, 0.2897, 0.2743]) -Greedy action tensor([-0.2261, 0.9275, -0.5542, 0.8366]) tensor([0.1285, 0.4072, 0.0925, 0.3718]) -Greedy action tensor([-0.2847, -0.8934, 0.9678, -0.9253]) tensor([0.1795, 0.0977, 0.6282, 0.0946]) -Greedy action tensor([-0.3873, -0.6367, 0.4357, 1.2355]) tensor([0.1096, 0.0854, 0.2496, 0.5554]) -Greedy action tensor([ 1.9036, -0.0777, 0.3418, 0.8301]) tensor([0.5919, 0.0816, 0.1242, 0.2023]) -Greedy action tensor([-0.3695, -0.7614, 0.0468, 0.9481]) tensor([0.1444, 0.0976, 0.2189, 0.5392]) -Greedy action tensor([-0.0868, -0.1926, 0.1548, 0.0989]) tensor([0.2285, 0.2055, 0.2909, 0.2751]) -Greedy action tensor([ 0.5627, -0.6363, -0.1259, 0.7501]) tensor([0.3322, 0.1002, 0.1669, 0.4007]) -Greedy action tensor([ 0.5090, -0.4262, 1.7856, -0.0097]) tensor([0.1795, 0.0704, 0.6433, 0.1068]) -Greedy action tensor([0.7708, 0.4719, 0.0895, 0.4936]) tensor([0.3327, 0.2468, 0.1683, 0.2522]) -Greedy action tensor([-0.2431, -0.6637, 0.2324, -0.3391]) tensor([0.2396, 0.1573, 0.3854, 0.2177]) -Greedy action tensor([-0.6227, 1.0554, 0.3049, -0.1411]) tensor([0.0952, 0.5099, 0.2407, 0.1541]) -Greedy action tensor([-0.0506, 0.6174, 0.3805, 1.1863]) tensor([0.1260, 0.2458, 0.1940, 0.4342]) -Greedy action tensor([ 0.2466, -0.8204, 0.2756, 0.3580]) tensor([0.2864, 0.0985, 0.2948, 0.3202]) -Greedy action tensor([-1.3306, -0.3443, 0.1238, 0.5485]) tensor([0.0689, 0.1848, 0.2951, 0.4512]) -Greedy action tensor([ 0.2320, -0.5480, 0.4545, -0.0261]) tensor([0.2874, 0.1317, 0.3589, 0.2220]) -Greedy action tensor([ 0.5409, -0.6555, -0.1018, 0.7184]) tensor([0.3309, 0.1000, 0.1740, 0.3951]) -Greedy action tensor([ 0.0624, -1.0091, -0.3449, 0.4559]) tensor([0.2865, 0.0981, 0.1907, 0.4247]) -Greedy action tensor([-0.6619, -2.3106, -0.2411, 0.8816]) tensor([0.1352, 0.0260, 0.2059, 0.6329]) -Greedy action tensor([ 0.5057, -0.4947, -0.0600, 0.5141]) tensor([0.3397, 0.1249, 0.1929, 0.3425]) -Greedy action tensor([0.1582, 0.4638, 0.4156, 2.0854]) tensor([0.0951, 0.1290, 0.1230, 0.6530]) -Greedy action tensor([ 1.4567, -0.5056, 2.0795, -0.1424]) tensor([0.3118, 0.0438, 0.5813, 0.0630]) -Greedy action tensor([0.6755, 0.2740, 0.3415, 0.0274]) tensor([0.3438, 0.2301, 0.2462, 0.1798]) -Greedy action tensor([ 1.5238, -0.9961, 1.1377, 1.4442]) tensor([0.3726, 0.0300, 0.2533, 0.3441]) -Greedy action tensor([ 0.7475, -1.1777, -0.7739, 1.4315]) tensor([0.2989, 0.0436, 0.0653, 0.5923]) -Greedy action tensor([ 0.7808, -0.8302, -0.5152, 0.8866]) tensor([0.3869, 0.0773, 0.1059, 0.4300]) -Greedy action tensor([-0.1349, -1.9559, 0.9327, -0.5130]) tensor([0.2103, 0.0340, 0.6116, 0.1441]) -Greedy action tensor([ 0.4456, -0.4835, 1.3118, -0.0019]) tensor([0.2267, 0.0895, 0.5390, 0.1449]) -Greedy action tensor([ 1.5586, -1.2737, 0.3150, 0.7227]) tensor([0.5616, 0.0331, 0.1619, 0.2434]) -Greedy action tensor([ 0.5915, -0.2127, -0.1022, 0.3045]) tensor([0.3707, 0.1659, 0.1852, 0.2782]) -Greedy action tensor([-0.1204, -0.0378, -0.3653, 1.6115]) tensor([0.1174, 0.1275, 0.0919, 0.6633]) -Greedy action tensor([-1.3089, 0.9178, 0.8353, -1.6757]) tensor([0.0513, 0.4754, 0.4378, 0.0355]) -Greedy action tensor([1.3121, 0.8598, 0.3387, 0.8306]) tensor([0.3800, 0.2417, 0.1435, 0.2348]) -Greedy action tensor([-0.9057, -0.4174, -0.4945, 0.1026]) tensor([0.1454, 0.2369, 0.2193, 0.3984]) -Greedy action tensor([-0.7283, -0.6077, -1.0256, -0.2809]) tensor([0.2255, 0.2544, 0.1675, 0.3527]) -Greedy action tensor([-0.3019, 0.0742, -0.1547, 0.0680]) tensor([0.1975, 0.2877, 0.2289, 0.2859]) -Greedy action tensor([ 0.6819, 0.5190, -0.0076, 0.3818]) tensor([0.3234, 0.2748, 0.1623, 0.2395]) -Greedy action tensor([0.3651, 0.9748, 0.4743, 0.3161]) tensor([0.2038, 0.3749, 0.2273, 0.1940]) -Greedy action tensor([ 0.2112, 0.1557, -1.0514, 1.1908]) tensor([0.2044, 0.1934, 0.0578, 0.5444]) -Greedy action tensor([ 0.4896, 0.5592, -0.0532, 0.8318]) tensor([0.2462, 0.2640, 0.1431, 0.3467]) -Greedy action tensor([ 0.3992, -0.1520, -0.1488, 1.1667]) tensor([0.2321, 0.1337, 0.1342, 0.5000]) -Greedy action tensor([ 0.7122, -0.1601, -0.4465, 1.7635]) tensor([0.2177, 0.0910, 0.0683, 0.6229]) -Greedy action tensor([-0.5603, -1.2624, -0.5743, 0.1725]) tensor([0.2192, 0.1086, 0.2161, 0.4561]) -Greedy action tensor([-1.2940, -1.0946, 1.0251, -0.5854]) tensor([0.0694, 0.0847, 0.7051, 0.1409]) -Greedy action tensor([ 1.3395, -0.2345, -0.2636, 0.1771]) tensor([0.5810, 0.1204, 0.1169, 0.1817]) -Greedy action tensor([ 1.1043, -0.5000, -0.0817, 0.4053]) tensor([0.4991, 0.1003, 0.1525, 0.2481]) -Greedy action tensor([ 1.4076, 0.2514, -0.0266, 0.1044]) tensor([0.5480, 0.1725, 0.1306, 0.1489]) -Greedy action tensor([ 0.4166, -0.3277, -0.3611, 0.2850]) tensor([0.3557, 0.1690, 0.1634, 0.3119]) -Greedy action tensor([ 0.6765, -0.1834, -0.0088, 0.0634]) tensor([0.4051, 0.1714, 0.2041, 0.2194]) -Greedy action tensor([ 0.9863, -0.4575, -0.5566, 0.0041]) tensor([0.5482, 0.1294, 0.1172, 0.2053]) -Greedy action tensor([ 1.0359, -0.8322, -0.3634, 0.7118]) tensor([0.4707, 0.0727, 0.1162, 0.3404]) -Greedy action tensor([ 1.4371, -0.6970, -0.0250, 0.2341]) tensor([0.6059, 0.0717, 0.1404, 0.1819]) -Greedy action tensor([ 1.1774, -0.0961, -0.1310, 0.0157]) tensor([0.5367, 0.1502, 0.1451, 0.1680]) -Greedy action tensor([ 1.4637, 0.1337, -0.0551, 0.1990]) tensor([0.5663, 0.1498, 0.1240, 0.1599]) -Greedy action tensor([ 0.7191, -0.2939, -0.2359, 0.2874]) tensor([0.4171, 0.1515, 0.1605, 0.2709]) -Greedy action tensor([ 1.9798, -0.8133, -0.2357, 0.8763]) tensor([0.6658, 0.0408, 0.0726, 0.2208]) -Greedy action tensor([ 1.3393, -0.6991, -0.2653, 0.0550]) tensor([0.6219, 0.0810, 0.1250, 0.1722]) -Greedy action tensor([ 0.8928, -0.5633, -0.3661, 0.2232]) tensor([0.4929, 0.1149, 0.1399, 0.2523]) -Greedy action tensor([ 0.5905, -0.0703, -0.1569, 0.1477]) tensor([0.3799, 0.1962, 0.1799, 0.2440]) -Greedy action tensor([ 1.1643, 0.1424, -0.1595, 0.0825]) tensor([0.5089, 0.1831, 0.1354, 0.1725]) -Greedy action tensor([ 1.3723, -0.0801, -0.1898, 0.2655]) tensor([0.5636, 0.1319, 0.1182, 0.1863]) -Greedy action tensor([ 0.9049, -0.0518, -0.1972, 0.3401]) tensor([0.4377, 0.1681, 0.1454, 0.2488]) -Greedy action tensor([ 1.8507, -0.7798, -0.2491, 0.1226]) tensor([0.7288, 0.0525, 0.0893, 0.1294]) -Greedy action tensor([ 1.2434, -0.5977, -0.4488, 0.5811]) tensor([0.5381, 0.0854, 0.0991, 0.2775]) -Greedy action tensor([ 0.8087, -0.4716, 0.0458, 0.0248]) tensor([0.4544, 0.1263, 0.2119, 0.2075]) -Greedy action tensor([ 0.7136, -0.2630, 0.0831, 0.0815]) tensor([0.4098, 0.1543, 0.2181, 0.2178]) -Greedy action tensor([ 1.0721, -0.2000, -0.5068, 0.3640]) tensor([0.5053, 0.1416, 0.1042, 0.2489]) -Greedy action tensor([ 0.4818, -0.2573, -0.2139, 0.0630]) tensor([0.3796, 0.1813, 0.1893, 0.2497]) -Greedy action tensor([ 1.4975, -0.1265, -0.6706, -0.1408]) tensor([0.6641, 0.1309, 0.0760, 0.1290]) -Greedy action tensor([ 1.6950, -0.4589, -0.0175, 0.0060]) tensor([0.6751, 0.0783, 0.1218, 0.1247]) -Greedy action tensor([ 1.4324, -0.5835, 0.0154, 0.1964]) tensor([0.6002, 0.0799, 0.1455, 0.1744]) -Greedy action tensor([ 1.6616, -0.8567, -0.1549, 0.3726]) tensor([0.6584, 0.0531, 0.1071, 0.1814]) -Greedy action tensor([ 1.0101, -0.1997, -0.5823, 0.4542]) tensor([0.4819, 0.1437, 0.0980, 0.2764]) -Greedy action tensor([ 0.4140, -0.2686, -0.0019, -0.0197]) tensor([0.3555, 0.1796, 0.2345, 0.2304]) -Greedy action tensor([ 0.9962, -0.2030, -0.1994, 0.2691]) tensor([0.4791, 0.1444, 0.1449, 0.2316]) -Greedy action tensor([ 0.8521, -0.2797, -0.0947, 0.1344]) tensor([0.4549, 0.1467, 0.1765, 0.2219]) -Greedy action tensor([ 0.9477, -0.1865, -0.4312, 0.2683]) tensor([0.4807, 0.1546, 0.1211, 0.2437]) -Greedy action tensor([ 0.7595, -0.3157, 0.1249, -0.0488]) tensor([0.4316, 0.1473, 0.2288, 0.1923]) -Greedy action tensor([ 0.3937, -0.1879, -0.2876, 0.1780]) tensor([0.3483, 0.1947, 0.1762, 0.2807]) -Greedy action tensor([ 0.1196, -0.0607, -0.5246, 0.3167]) tensor([0.2795, 0.2334, 0.1468, 0.3404]) -Greedy action tensor([ 2.1053, -0.8681, -0.5323, 0.0493]) tensor([0.7996, 0.0409, 0.0572, 0.1023]) -Greedy action tensor([ 0.5473, -0.0464, -0.3812, 0.0088]) tensor([0.3951, 0.2182, 0.1561, 0.2306]) -Greedy action tensor([ 0.9458, -0.4307, -0.0571, -0.0604]) tensor([0.5038, 0.1272, 0.1848, 0.1842]) -Greedy action tensor([ 1.4428, -0.6477, -0.2807, 0.2222]) tensor([0.6261, 0.0774, 0.1117, 0.1847]) -Greedy action tensor([ 0.6992, -0.5926, -0.1286, 0.2233]) tensor([0.4286, 0.1178, 0.1873, 0.2663]) -Greedy action tensor([ 1.1500, -0.2261, -0.2869, 0.1284]) tensor([0.5405, 0.1365, 0.1284, 0.1946]) -Greedy action tensor([ 1.2760, -0.8382, -0.0642, 0.4525]) tensor([0.5490, 0.0663, 0.1437, 0.2410]) -Greedy action tensor([ 0.9917, -0.2077, -0.2951, 0.0469]) tensor([0.5086, 0.1533, 0.1404, 0.1977]) -Greedy action tensor([ 1.1166, -0.7243, -0.2305, 0.4700]) tensor([0.5148, 0.0817, 0.1339, 0.2697]) -Greedy action tensor([ 1.3432, -0.8826, -0.4450, 0.7340]) tensor([0.5497, 0.0594, 0.0920, 0.2989]) -Greedy action tensor([ 1.4986, -0.3292, -0.1379, -0.0335]) tensor([0.6363, 0.1023, 0.1239, 0.1375]) -Greedy action tensor([ 1.1873, -0.7308, -0.2871, 0.7012]) tensor([0.5023, 0.0738, 0.1150, 0.3089]) -Greedy action tensor([ 0.6877, -0.0935, 0.0026, -0.0076]) tensor([0.4064, 0.1861, 0.2048, 0.2028]) -Greedy action tensor([ 0.7737, -0.3419, -0.3428, 0.4402]) tensor([0.4217, 0.1382, 0.1381, 0.3021]) -Greedy action tensor([ 1.5377, -0.1671, -0.3272, -0.1811]) tensor([0.6596, 0.1199, 0.1022, 0.1183]) -Greedy action tensor([1.0184, 0.0573, 0.1312, 0.0282]) tensor([0.4617, 0.1766, 0.1901, 0.1715]) -Greedy action tensor([ 1.5126, -0.0180, -0.4804, 0.0516]) tensor([0.6310, 0.1366, 0.0860, 0.1464]) -Greedy action tensor([ 1.0620, -0.5668, -0.2599, 0.1940]) tensor([0.5312, 0.1042, 0.1416, 0.2230]) -Greedy action tensor([ 0.9622, -0.2519, -0.3675, -0.0260]) tensor([0.5171, 0.1536, 0.1368, 0.1925]) -Greedy action tensor([ 1.4013, -0.3666, 0.0922, 0.1007]) tensor([0.5837, 0.0996, 0.1576, 0.1590]) -Greedy action tensor([ 1.3101, -0.4837, -0.6044, 0.3462]) tensor([0.5899, 0.0981, 0.0870, 0.2250]) -Greedy action tensor([ 1.7248, -0.4300, -0.4042, 0.4699]) tensor([0.6579, 0.0763, 0.0783, 0.1876]) -Greedy action tensor([ 1.4678, -0.6461, -0.3403, 0.1121]) tensor([0.6483, 0.0783, 0.1063, 0.1671]) -Greedy action tensor([ 1.5482, -0.5424, -0.0585, 0.6292]) tensor([0.5804, 0.0717, 0.1164, 0.2315]) -Greedy action tensor([ 1.4081, -0.4013, -0.3487, 0.0373]) tensor([0.6288, 0.1030, 0.1085, 0.1597]) -Greedy action tensor([ 1.1901, -0.8413, -0.6341, 0.6737]) tensor([0.5293, 0.0694, 0.0854, 0.3158]) -Greedy action tensor([ 1.4410, -0.6756, -0.4114, 0.0082]) tensor([0.6597, 0.0795, 0.1035, 0.1574]) -Greedy action tensor([ 0.9896, -0.1520, -0.3544, 0.2828]) tensor([0.4823, 0.1540, 0.1258, 0.2379]) -Greedy action tensor([ 1.0222, -0.1542, -0.4584, -0.2491]) tensor([0.5505, 0.1698, 0.1253, 0.1544]) -Greedy action tensor([ 1.2319, -0.4485, -0.1550, 0.3254]) tensor([0.5434, 0.1012, 0.1358, 0.2195]) -Greedy action tensor([ 0.7731, -0.5120, 0.0702, 0.0972]) tensor([0.4385, 0.1213, 0.2171, 0.2231]) -Greedy action tensor([ 0.9978, -0.1680, -0.0265, -0.2807]) tensor([0.5130, 0.1599, 0.1842, 0.1429]) -Greedy action tensor([ 1.4216, -0.4784, -0.2795, 0.4130]) tensor([0.5893, 0.0882, 0.1075, 0.2150]) -Greedy action tensor([ 0.9662, -0.8332, -0.3565, 0.3339]) tensor([0.5094, 0.0843, 0.1357, 0.2707]) -Greedy action tensor([ 0.6001, -0.2983, -0.0035, -0.0150]) tensor([0.4009, 0.1632, 0.2192, 0.2167]) -Greedy action tensor([ 1.0564, -0.2063, -0.3873, 0.2714]) tensor([0.5063, 0.1432, 0.1195, 0.2309]) -Greedy action tensor([ 0.6583, -0.2117, -0.1841, -0.2763]) tensor([0.4460, 0.1868, 0.1921, 0.1752]) -Greedy action tensor([ 0.9950, -0.4112, -0.1592, 0.0559]) tensor([0.5125, 0.1256, 0.1616, 0.2004]) -Greedy action tensor([ 0.9381, -0.8947, -0.2356, -0.0190]) tensor([0.5396, 0.0863, 0.1669, 0.2072]) -Greedy action tensor([ 1.3311, -0.1610, -0.3345, 0.5492]) tensor([0.5343, 0.1202, 0.1010, 0.2445]) -Greedy action tensor([ 1.0053, -0.2893, 0.0169, 0.0196]) tensor([0.4952, 0.1357, 0.1843, 0.1848]) -Greedy action tensor([ 1.0540, -0.4003, 0.0603, 0.2543]) tensor([0.4870, 0.1138, 0.1803, 0.2189]) -Greedy action tensor([ 0.8392, -0.3647, 0.0479, 0.0764]) tensor([0.4505, 0.1352, 0.2042, 0.2101]) -Greedy action tensor([ 1.5394, -0.6179, -0.3872, 0.5814]) tensor([0.6079, 0.0703, 0.0885, 0.2332]) -Greedy action tensor([ 1.1762, -0.4683, -0.2279, 0.3106]) tensor([0.5378, 0.1038, 0.1321, 0.2263]) -Greedy action tensor([-1.7714, -0.7337, 0.1099, -0.4321]) tensor([0.0704, 0.1988, 0.4621, 0.2687]) -Greedy action tensor([-1.8955, -0.4575, 0.6427, -0.1381]) tensor([0.0423, 0.1780, 0.5348, 0.2450]) -Greedy action tensor([-0.9152, -0.5550, 0.2036, 0.2100]) tensor([0.1166, 0.1672, 0.3570, 0.3593]) -Greedy action tensor([-1.3453, -0.6903, 0.4432, 0.1655]) tensor([0.0744, 0.1433, 0.4451, 0.3372]) -Greedy action tensor([-1.2680, 0.8199, 0.1882, 0.4538]) tensor([0.0528, 0.4257, 0.2263, 0.2952]) -Greedy action tensor([-1.7624, -0.4622, 0.6668, 0.1219]) tensor([0.0442, 0.1624, 0.5022, 0.2912]) -Greedy action tensor([-0.8401, -0.5076, 0.2509, 0.4905]) tensor([0.1092, 0.1523, 0.3252, 0.4133]) -Greedy action tensor([-1.8217, -0.4695, 0.6465, 0.0154]) tensor([0.0436, 0.1685, 0.5143, 0.2736]) -Greedy action tensor([-1.6899, -0.5121, 0.5455, 0.0045]) tensor([0.0525, 0.1705, 0.4911, 0.2859]) -Greedy action tensor([-1.7210, -0.4985, 0.5340, -0.0764]) tensor([0.0523, 0.1777, 0.4990, 0.2710]) -Greedy action tensor([-1.8071, -0.5926, 1.5286, 0.9549]) tensor([0.0207, 0.0697, 0.5818, 0.3278]) -Greedy action tensor([-1.8504, -0.4831, 0.7064, -0.0193]) tensor([0.0416, 0.1631, 0.5359, 0.2594]) -Greedy action tensor([-0.2787, 0.1151, 0.2052, 0.3791]) tensor([0.1657, 0.2456, 0.2688, 0.3199]) -Greedy action tensor([-1.2005, -0.5128, 1.0804, 1.2473]) tensor([0.0411, 0.0817, 0.4021, 0.4751]) -Greedy action tensor([-1.6520, -0.5281, 0.5055, -0.0165]) tensor([0.0560, 0.1723, 0.4843, 0.2874]) -Greedy action tensor([-0.7909, -0.5408, 0.2069, 0.2093]) tensor([0.1296, 0.1664, 0.3516, 0.3524]) -Greedy action tensor([-1.0058, -0.2461, 0.4588, -0.3774]) tensor([0.1071, 0.2289, 0.4632, 0.2008]) -Greedy action tensor([-1.2955, -0.5782, 0.3202, 0.2312]) tensor([0.0788, 0.1615, 0.3967, 0.3629]) -Greedy action tensor([-1.8502, -0.4557, 0.6892, -0.0655]) tensor([0.0423, 0.1704, 0.5355, 0.2518]) -Greedy action tensor([-1.8417, -0.4641, 0.6846, -0.0471]) tensor([0.0426, 0.1688, 0.5325, 0.2562]) -Greedy action tensor([-1.6788, -0.3643, 0.5221, -0.0619]) tensor([0.0532, 0.1981, 0.4806, 0.2680]) -Greedy action tensor([-1.4736, -0.5540, 0.5065, -0.1438]) tensor([0.0688, 0.1726, 0.4984, 0.2601]) -Greedy action tensor([-1.7348, -0.6224, 0.1617, -0.3097]) tensor([0.0673, 0.2046, 0.4483, 0.2798]) -Greedy action tensor([-1.5240, -0.3158, 0.6287, -0.4389]) tensor([0.0628, 0.2103, 0.5409, 0.1860]) -Greedy action tensor([-1.9172, -0.4348, 0.6542, -0.1601]) tensor([0.0412, 0.1813, 0.5388, 0.2387]) -Greedy action tensor([-1.3183, -0.3152, 1.3976, 1.1227]) tensor([0.0330, 0.0899, 0.4984, 0.3787]) -Greedy action tensor([-1.2013, -0.5655, 0.2935, 0.4680]) tensor([0.0790, 0.1492, 0.3523, 0.4195]) -Greedy action tensor([-1.0565, -0.5343, 0.5751, -0.3061]) tensor([0.1008, 0.1700, 0.5156, 0.2136]) -Greedy action tensor([-1.9419, -0.4559, 0.6729, -0.1750]) tensor([0.0401, 0.1772, 0.5480, 0.2347]) -Greedy action tensor([-0.3900, -0.4038, 0.1960, 0.2026]) tensor([0.1788, 0.1764, 0.3213, 0.3235]) -Greedy action tensor([-0.9646, -0.3242, -0.2427, -0.3873]) tensor([0.1484, 0.2816, 0.3055, 0.2644]) -Greedy action tensor([ 0.5536, -0.2036, 0.2312, 1.1041]) tensor([0.2546, 0.1194, 0.1844, 0.4415]) -Greedy action tensor([-1.9278, -0.4292, 0.6575, -0.1676]) tensor([0.0407, 0.1822, 0.5403, 0.2367]) -Greedy action tensor([-1.7518, -0.5056, 0.5901, -0.0401]) tensor([0.0490, 0.1703, 0.5094, 0.2713]) -Greedy action tensor([-0.4906, -0.5741, 0.2190, 0.0693]) tensor([0.1753, 0.1613, 0.3565, 0.3069]) -Greedy action tensor([-1.5073, -0.7058, -0.1930, -0.4890]) tensor([0.1029, 0.2293, 0.3830, 0.2848]) -Greedy action tensor([-1.3153, -0.5898, 0.3324, 0.2177]) tensor([0.0776, 0.1602, 0.4029, 0.3593]) -Greedy action tensor([-1.9800, -0.6162, 1.3733, 0.6506]) tensor([0.0211, 0.0825, 0.6034, 0.2929]) -Greedy action tensor([-1.9009, -0.4653, 0.6471, -0.1542]) tensor([0.0422, 0.1772, 0.5389, 0.2418]) -Greedy action tensor([-1.5813, -0.5934, 0.8576, 0.6441]) tensor([0.0410, 0.1100, 0.4696, 0.3793]) -Greedy action tensor([-1.7390, -0.6180, 0.4698, -0.1152]) tensor([0.0548, 0.1681, 0.4990, 0.2780]) -Greedy action tensor([-1.2501, -0.5676, 0.3294, 0.1899]) tensor([0.0830, 0.1642, 0.4026, 0.3502]) -Greedy action tensor([-0.6621, 0.1502, 0.4514, 1.3425]) tensor([0.0729, 0.1642, 0.2219, 0.5410]) -Greedy action tensor([-0.8860, -0.5410, 0.3218, -0.1041]) tensor([0.1259, 0.1777, 0.4212, 0.2751]) -Greedy action tensor([-1.5408, -0.4110, 0.4684, 0.0790]) tensor([0.0602, 0.1864, 0.4491, 0.3043]) -Greedy action tensor([-1.2761, -0.6606, 0.3159, 0.2921]) tensor([0.0796, 0.1473, 0.3911, 0.3820]) -Greedy action tensor([-1.4942, -0.4806, 1.0001, 1.0132]) tensor([0.0355, 0.0979, 0.4305, 0.4361]) -Greedy action tensor([-0.9525, -0.3944, 1.3497, 1.4457]) tensor([0.0421, 0.0736, 0.4210, 0.4634]) -Greedy action tensor([-0.7149, -0.1695, 1.0067, 1.4794]) tensor([0.0578, 0.0998, 0.3235, 0.5189]) -Greedy action tensor([-0.4166, -0.2267, 0.9664, 1.6374]) tensor([0.0715, 0.0864, 0.2849, 0.5573]) -Greedy action tensor([-1.8774, -0.4678, 0.6368, -0.1189]) tensor([0.0430, 0.1761, 0.5314, 0.2496]) -Greedy action tensor([-1.6016, -0.5028, 0.4787, -0.0092]) tensor([0.0591, 0.1773, 0.4731, 0.2905]) -Greedy action tensor([-1.5834, -0.5075, 0.4855, 0.0512]) tensor([0.0589, 0.1728, 0.4663, 0.3020]) -Greedy action tensor([-1.9289, -0.4645, 0.7537, -0.0486]) tensor([0.0377, 0.1632, 0.5517, 0.2473]) -Greedy action tensor([-1.1035, -0.5904, 0.4023, 0.0920]) tensor([0.0954, 0.1593, 0.4300, 0.3153]) -Greedy action tensor([-1.6752, -0.3807, 0.4647, -0.0138]) tensor([0.0543, 0.1982, 0.4615, 0.2860]) -Greedy action tensor([-1.8508, -0.5355, 0.5752, -0.1766]) tensor([0.0468, 0.1743, 0.5293, 0.2496]) -Greedy action tensor([-1.1380, -0.3135, 0.5945, -0.5780]) tensor([0.0936, 0.2134, 0.5292, 0.1638]) -Greedy action tensor([-1.9164, -0.4492, 0.6701, -0.1417]) tensor([0.0408, 0.1769, 0.5418, 0.2406]) -Greedy action tensor([-1.7670, -0.4482, 0.5821, -0.0612]) tensor([0.0483, 0.1804, 0.5056, 0.2657]) -Greedy action tensor([-1.6761, -0.4509, 0.6066, 0.1060]) tensor([0.0496, 0.1690, 0.4865, 0.2949]) -Greedy action tensor([-1.7064, -0.4778, 0.6100, 0.0837]) tensor([0.0487, 0.1663, 0.4935, 0.2916]) -Greedy action tensor([-1.8700, -0.4493, 0.6264, -0.1403]) tensor([0.0436, 0.1807, 0.5297, 0.2461]) -Greedy action tensor([-1.2455, 0.4829, 0.2841, -0.0545]) tensor([0.0688, 0.3874, 0.3175, 0.2263]) -Greedy action tensor([-1.8492, -0.4730, 0.6145, -0.1293]) tensor([0.0449, 0.1776, 0.5270, 0.2505]) -Greedy action tensor([-0.8875, -0.5769, 0.1954, 0.2903]) tensor([0.1168, 0.1593, 0.3448, 0.3791]) -Greedy action tensor([-0.7911, -0.5974, 0.8361, 1.4902]) tensor([0.0585, 0.0710, 0.2978, 0.5727]) -Greedy action tensor([-1.1334, -0.4081, 0.5600, 0.9037]) tensor([0.0618, 0.1277, 0.3363, 0.4742]) -Greedy action tensor([-1.1443, -0.7427, 0.5888, 0.2074]) tensor([0.0832, 0.1243, 0.4709, 0.3215]) -Greedy action tensor([-0.8693, -0.3882, 0.5887, 1.3068]) tensor([0.0636, 0.1029, 0.2733, 0.5603]) -Greedy action tensor([-1.1334, -0.5663, 0.7724, 1.2372]) tensor([0.0495, 0.0873, 0.3330, 0.5301]) -Greedy action tensor([-0.3241, 0.2514, 0.8033, 1.5772]) tensor([0.0796, 0.1416, 0.2458, 0.5330]) -Greedy action tensor([-0.9676, -0.3692, 0.7697, 1.1309]) tensor([0.0600, 0.1092, 0.3412, 0.4896]) -Greedy action tensor([-1.6156, -0.4080, 0.5433, 0.1399]) tensor([0.0532, 0.1780, 0.4609, 0.3079]) -Greedy action tensor([-1.7942, -0.3739, 0.6021, -0.0369]) tensor([0.0456, 0.1888, 0.5011, 0.2645]) -Greedy action tensor([-1.8582, -0.4404, 0.6249, -0.1268]) tensor([0.0439, 0.1814, 0.5264, 0.2482]) -Greedy action tensor([-1.6936, -0.5098, 0.5329, 0.0180]) tensor([0.0524, 0.1713, 0.4859, 0.2904]) -Greedy action tensor([-1.9805, -0.6454, 0.9005, -0.0057]) tensor([0.0335, 0.1274, 0.5976, 0.2415]) -Greedy action tensor([-1.5190, -0.5701, 0.4454, 0.0874]) tensor([0.0637, 0.1645, 0.4542, 0.3175]) -Greedy action tensor([-1.6288, -0.5038, 0.7357, -0.3781]) tensor([0.0549, 0.1691, 0.5842, 0.1918]) -Greedy action tensor([-1.8201, -0.4331, 0.6180, -0.0736]) tensor([0.0451, 0.1804, 0.5161, 0.2584]) -Greedy action tensor([ 0.5004, -0.2471, -0.1149, -0.2133]) tensor([0.3994, 0.1891, 0.2159, 0.1956]) -Greedy action tensor([ 0.4441, 0.1203, -0.0714, -0.3133]) tensor([0.3585, 0.2593, 0.2141, 0.1681]) -Greedy action tensor([ 0.4183, 0.1660, -0.1144, -0.3157]) tensor([0.3516, 0.2732, 0.2064, 0.1688]) -Greedy action tensor([ 0.8051, -0.6524, 0.0431, -0.3618]) tensor([0.4973, 0.1158, 0.2321, 0.1548]) -Greedy action tensor([ 0.6380, -0.4018, 0.0353, -0.2490]) tensor([0.4324, 0.1529, 0.2367, 0.1781]) -Greedy action tensor([ 1.2084, -0.9489, 0.1422, -0.5235]) tensor([0.6109, 0.0706, 0.2104, 0.1081]) -Greedy action tensor([ 0.5835, -0.4161, -0.1753, -0.4064]) tensor([0.4529, 0.1667, 0.2121, 0.1683]) -Greedy action tensor([ 0.4175, -0.0093, 0.1242, -0.1342]) tensor([0.3362, 0.2194, 0.2507, 0.1936]) -Greedy action tensor([ 1.0742, -0.6049, -0.0326, -0.6247]) tensor([0.5882, 0.1097, 0.1945, 0.1076]) -Greedy action tensor([ 0.6777, -0.2997, 0.0085, -0.1896]) tensor([0.4332, 0.1630, 0.2218, 0.1820]) -Greedy action tensor([ 0.5973, -0.3285, 0.0568, -0.2774]) tensor([0.4174, 0.1654, 0.2431, 0.1741]) -Greedy action tensor([ 0.3531, -0.2727, -0.1422, -0.3470]) tensor([0.3787, 0.2025, 0.2308, 0.1880]) -Greedy action tensor([ 0.8959, -0.5085, -0.0689, -0.3266]) tensor([0.5205, 0.1278, 0.1984, 0.1533]) -Greedy action tensor([ 0.9294, -0.6459, -0.2145, -0.6283]) tensor([0.5760, 0.1192, 0.1835, 0.1213]) -Greedy action tensor([ 0.5386, -0.1877, -0.0767, -0.1810]) tensor([0.3982, 0.1926, 0.2152, 0.1939]) -Greedy action tensor([ 0.2822, -0.2616, 0.0514, -0.4953]) tensor([0.3529, 0.2049, 0.2801, 0.1622]) -Greedy action tensor([ 0.8543, -0.8096, -0.0373, -0.7160]) tensor([0.5533, 0.1048, 0.2269, 0.1151]) -Greedy action tensor([ 0.9452, -0.4883, -0.1159, -0.4490]) tensor([0.5457, 0.1301, 0.1888, 0.1353]) -Greedy action tensor([ 0.4945, -0.0868, -0.0081, -0.4072]) tensor([0.3891, 0.2176, 0.2354, 0.1579]) -Greedy action tensor([ 0.6980, -0.4560, -0.1287, -0.2614]) tensor([0.4682, 0.1476, 0.2048, 0.1794]) -Greedy action tensor([ 0.4802, -0.1542, -0.0675, -0.2736]) tensor([0.3877, 0.2056, 0.2242, 0.1825]) -Greedy action tensor([ 0.7095, -0.4482, 0.0056, -0.5959]) tensor([0.4808, 0.1511, 0.2378, 0.1303]) -Greedy action tensor([ 0.8256, -0.4979, 0.0390, -0.6084]) tensor([0.5102, 0.1358, 0.2323, 0.1216]) -Greedy action tensor([ 0.6971, -0.4566, 0.0379, -0.6302]) tensor([0.4767, 0.1504, 0.2466, 0.1264]) -Greedy action tensor([ 0.5220, -0.2900, -0.0229, -0.4373]) tensor([0.4155, 0.1844, 0.2409, 0.1592]) -Greedy action tensor([ 0.6222, 0.0054, -0.1065, -0.2651]) tensor([0.4109, 0.2217, 0.1983, 0.1692]) -Greedy action tensor([ 0.6240, -0.2662, -0.0423, -0.4909]) tensor([0.4440, 0.1823, 0.2281, 0.1456]) -Greedy action tensor([ 0.7069, -0.1639, -0.0085, -0.6279]) tensor([0.4606, 0.1928, 0.2253, 0.1213]) -Greedy action tensor([ 0.8099, -0.5036, -0.0943, -0.7025]) tensor([0.5280, 0.1420, 0.2137, 0.1164]) -Greedy action tensor([ 0.6384, -0.0360, -0.2268, -0.3426]) tensor([0.4338, 0.2210, 0.1826, 0.1626]) -Greedy action tensor([ 0.4931, -0.4658, 0.1501, -0.6126]) tensor([0.4126, 0.1581, 0.2928, 0.1365]) -Greedy action tensor([ 0.8361, -0.3406, -0.1361, -0.4431]) tensor([0.5090, 0.1569, 0.1925, 0.1416]) -Greedy action tensor([ 8.4537e-01, -4.6410e-01, -1.9822e-04, -3.1357e-01]) tensor([0.4967, 0.1341, 0.2133, 0.1559]) -Greedy action tensor([ 0.4921, -0.3524, -0.0446, -0.2200]) tensor([0.3992, 0.1716, 0.2334, 0.1959]) -Greedy action tensor([ 0.4035, -0.1485, -0.0548, -0.2295]) tensor([0.3651, 0.2102, 0.2308, 0.1939]) -Greedy action tensor([ 0.3905, -0.1939, -0.3240, -0.5373]) tensor([0.4094, 0.2282, 0.2004, 0.1619]) -Greedy action tensor([ 0.5481, 0.1685, -0.0613, -0.0368]) tensor([0.3591, 0.2457, 0.1952, 0.2001]) -Greedy action tensor([ 0.5613, -0.2897, 0.1876, -0.3400]) tensor([0.3966, 0.1694, 0.2730, 0.1610]) -Greedy action tensor([ 0.5511, -0.2621, 0.1961, -0.2994]) tensor([0.3888, 0.1724, 0.2726, 0.1661]) -Greedy action tensor([ 1.0816, -0.3967, 0.0531, -0.5596]) tensor([0.5620, 0.1282, 0.2009, 0.1089]) -Greedy action tensor([ 0.5766, -0.3468, 0.1594, -0.5564]) tensor([0.4205, 0.1670, 0.2771, 0.1354]) -Greedy action tensor([ 0.2694, -0.2104, -0.0478, -0.2530]) tensor([0.3401, 0.2105, 0.2477, 0.2017]) -Greedy action tensor([ 0.8542, -0.3189, 0.1213, -0.3712]) tensor([0.4800, 0.1485, 0.2306, 0.1409]) -Greedy action tensor([ 0.6687, -0.3900, 0.0178, -0.4266]) tensor([0.4539, 0.1575, 0.2368, 0.1518]) -Greedy action tensor([ 0.6810, -0.1278, -0.0667, -0.3290]) tensor([0.4380, 0.1951, 0.2074, 0.1595]) -Greedy action tensor([ 0.8770, -0.5378, -0.0545, -0.5084]) tensor([0.5299, 0.1288, 0.2088, 0.1326]) -Greedy action tensor([ 0.5702, -0.2057, -0.1937, -0.2957]) tensor([0.4261, 0.1961, 0.1985, 0.1793]) -Greedy action tensor([ 0.3604, 0.0103, -0.0896, -0.1427]) tensor([0.3393, 0.2391, 0.2164, 0.2052]) -Greedy action tensor([ 0.8655, -0.3395, -0.0281, -0.4106]) tensor([0.5030, 0.1507, 0.2058, 0.1404]) -Greedy action tensor([ 0.6493, -0.4878, 0.0488, -0.1374]) tensor([0.4302, 0.1380, 0.2360, 0.1959]) -Greedy action tensor([ 0.7688, -0.1867, 0.1261, -0.0693]) tensor([0.4268, 0.1642, 0.2244, 0.1846]) -Greedy action tensor([ 0.8333, -0.5063, -0.0551, -0.3277]) tensor([0.5034, 0.1319, 0.2071, 0.1577]) -Greedy action tensor([ 0.4705, -0.0715, -0.0687, -0.1410]) tensor([0.3694, 0.2148, 0.2154, 0.2004]) -Greedy action tensor([ 0.5237, -0.3423, 0.0435, -0.2964]) tensor([0.4033, 0.1696, 0.2495, 0.1776]) -Greedy action tensor([ 0.5603, -0.1037, -0.0990, -0.0852]) tensor([0.3912, 0.2014, 0.2023, 0.2051]) -Greedy action tensor([ 0.3556, -0.3390, -0.1886, -0.5041]) tensor([0.3995, 0.1995, 0.2318, 0.1691]) -Greedy action tensor([ 0.7905, -0.7085, 0.0532, -0.6603]) tensor([0.5165, 0.1154, 0.2471, 0.1211]) -Greedy action tensor([ 0.5842, -0.4884, -0.1133, -0.2987]) tensor([0.4437, 0.1518, 0.2209, 0.1835]) -Greedy action tensor([ 0.7484, -0.5792, -0.0750, -0.3376]) tensor([0.4898, 0.1299, 0.2150, 0.1653]) -Greedy action tensor([ 0.7676, -0.7380, -0.0845, -0.4633]) tensor([0.5153, 0.1143, 0.2198, 0.1505]) -Greedy action tensor([ 0.8030, -0.6246, -0.1872, -0.6775]) tensor([0.5438, 0.1304, 0.2020, 0.1237]) -Greedy action tensor([ 0.5181, -0.1014, -0.1505, -0.3816]) tensor([0.4069, 0.2190, 0.2085, 0.1655]) -Greedy action tensor([ 0.9376, -0.5294, 0.1039, -0.6801]) tensor([0.5367, 0.1238, 0.2331, 0.1064]) -Greedy action tensor([ 0.4210, -0.1843, 0.1268, -0.2822]) tensor([0.3589, 0.1959, 0.2675, 0.1777]) -Greedy action tensor([ 0.5621, -0.0217, 0.1959, -0.1726]) tensor([0.3662, 0.2043, 0.2539, 0.1756]) -Greedy action tensor([ 0.5879, -0.2198, -0.0773, -0.1482]) tensor([0.4100, 0.1828, 0.2108, 0.1964]) -Greedy action tensor([ 0.3595, 0.1154, -0.0049, -0.4343]) tensor([0.3413, 0.2674, 0.2371, 0.1543]) -Greedy action tensor([ 0.8229, -0.6721, -0.0367, -0.4281]) tensor([0.5171, 0.1160, 0.2189, 0.1480]) -Greedy action tensor([ 0.5157, -0.3197, -0.0049, -0.3509]) tensor([0.4084, 0.1772, 0.2427, 0.1717]) -Greedy action tensor([ 0.7706, -0.2389, -0.0249, -0.4785]) tensor([0.4756, 0.1733, 0.2147, 0.1364]) -Greedy action tensor([ 0.7318, -0.2801, -0.0910, -0.3569]) tensor([0.4674, 0.1699, 0.2053, 0.1574]) -Greedy action tensor([ 0.5394, -0.4252, 0.2117, -0.3114]) tensor([0.3954, 0.1507, 0.2849, 0.1689]) -Greedy action tensor([ 0.6355, -0.4745, 0.1213, -0.3974]) tensor([0.4379, 0.1443, 0.2619, 0.1559]) -Greedy action tensor([ 0.6445, -0.3222, -0.1428, -0.4318]) tensor([0.4595, 0.1748, 0.2091, 0.1566]) -Greedy action tensor([ 0.8384, -0.8794, 0.1219, -0.4532]) tensor([0.5147, 0.0924, 0.2514, 0.1415]) -Greedy action tensor([ 0.5785, -0.2783, 0.0317, -0.1780]) tensor([0.4044, 0.1717, 0.2341, 0.1898]) -Greedy action tensor([ 0.7532, 0.0207, -0.1745, -0.4010]) tensor([0.4563, 0.2193, 0.1805, 0.1439]) -Greedy action tensor([ 0.4948, 0.1774, -0.1503, -0.2394]) tensor([0.3660, 0.2664, 0.1920, 0.1756]) -Greedy action tensor([ 0.4845, -0.0146, 0.0160, -0.4255]) tensor([0.3794, 0.2303, 0.2375, 0.1527]) -Greedy action tensor([ 0.7007, -0.6279, -0.1451, -0.5524]) tensor([0.5051, 0.1338, 0.2168, 0.1443]) -Greedy action tensor([ 0.8425, -0.5956, -0.0693, -0.4924]) tensor([0.5257, 0.1248, 0.2112, 0.1383]) -Greedy action tensor([ 0.3418, -0.4474, -0.1837, 0.1131]) tensor([0.3520, 0.1599, 0.2081, 0.2800]) -Greedy action tensor([ 1.2070, -0.3328, 0.9110, 1.5903]) tensor([0.2919, 0.0626, 0.2171, 0.4283]) -Greedy action tensor([ 0.9452, -0.2872, 0.0777, 1.3060]) tensor([0.3179, 0.0927, 0.1335, 0.4559]) -Greedy action tensor([ 1.5857, -0.8461, 1.4017, 1.0752]) tensor([0.3968, 0.0349, 0.3301, 0.2382]) -Greedy action tensor([ 1.0305, -0.0282, 2.5829, -0.1938]) tensor([0.1571, 0.0545, 0.7422, 0.0462]) -Greedy action tensor([0.7925, 0.2758, 0.2290, 0.2966]) tensor([0.3604, 0.2150, 0.2051, 0.2195]) -Greedy action tensor([ 0.6093, -1.0659, 0.7094, 0.1563]) tensor([0.3415, 0.0640, 0.3775, 0.2171]) -Greedy action tensor([-0.5232, -1.6658, 0.2858, -0.0231]) tensor([0.1918, 0.0612, 0.4307, 0.3163]) -Greedy action tensor([1.0428, 0.2626, 1.0648, 0.3911]) tensor([0.3331, 0.1527, 0.3406, 0.1736]) -Greedy action tensor([-0.4069, -0.3436, 1.1892, 0.1863]) tensor([0.1135, 0.1209, 0.5601, 0.2054]) -Greedy action tensor([-0.0614, 0.6996, 0.5628, -0.9096]) tensor([0.1840, 0.3938, 0.3434, 0.0788]) -Greedy action tensor([ 1.1212, -1.3295, 0.7861, 0.0213]) tensor([0.4685, 0.0404, 0.3351, 0.1560]) -Greedy action tensor([ 0.6886, -0.3309, 0.8499, 0.3196]) tensor([0.3099, 0.1118, 0.3641, 0.2142]) -Greedy action tensor([ 0.8306, -1.4340, 0.1170, 1.5086]) tensor([0.2806, 0.0291, 0.1375, 0.5528]) -Greedy action tensor([-0.4281, -1.1528, 0.5961, 0.9013]) tensor([0.1243, 0.0602, 0.3460, 0.4695]) -Greedy action tensor([-0.1374, 0.0259, 0.4534, 1.3552]) tensor([0.1186, 0.1396, 0.2141, 0.5276]) -Greedy action tensor([-0.9979, -1.5285, -0.2700, 0.5640]) tensor([0.1187, 0.0698, 0.2457, 0.5658]) -Greedy action tensor([ 0.9782, -0.1559, 0.6597, 0.1508]) tensor([0.4022, 0.1294, 0.2925, 0.1758]) -Greedy action tensor([-0.8153, -1.0337, 2.3401, 0.7010]) tensor([0.0335, 0.0270, 0.7868, 0.1527]) -Greedy action tensor([ 1.4213, -0.1232, 0.4609, 0.8377]) tensor([0.4642, 0.0991, 0.1777, 0.2590]) -Greedy action tensor([0.6082, 0.1502, 2.1379, 0.9783]) tensor([0.1299, 0.0822, 0.5998, 0.1881]) -Greedy action tensor([ 0.2452, -0.4259, 1.2351, 0.8060]) tensor([0.1679, 0.0858, 0.4520, 0.2943]) -Greedy action tensor([ 0.9544, -0.0196, -0.0414, 0.6318]) tensor([0.4047, 0.1528, 0.1495, 0.2931]) -Greedy action tensor([ 0.4793, -1.4490, -0.0862, 0.3979]) tensor([0.3795, 0.0552, 0.2156, 0.3498]) -Greedy action tensor([ 0.3296, -0.9849, 1.4778, 0.3351]) tensor([0.1843, 0.0495, 0.5809, 0.1853]) -Greedy action tensor([-0.3628, -1.8585, -0.1810, -0.2385]) tensor([0.2812, 0.0630, 0.3373, 0.3185]) -Greedy action tensor([-0.7896, -0.6306, -0.4536, 0.6034]) tensor([0.1316, 0.1543, 0.1842, 0.5299]) -Greedy action tensor([ 1.3132, -2.2868, 0.2951, 0.7457]) tensor([0.5114, 0.0140, 0.1848, 0.2899]) -Greedy action tensor([ 0.8052, 0.0477, 0.4369, -0.1870]) tensor([0.3950, 0.1852, 0.2733, 0.1464]) -Greedy action tensor([-0.8848, -0.3101, -0.5816, 1.0608]) tensor([0.0899, 0.1596, 0.1217, 0.6288]) -Greedy action tensor([ 0.0251, -0.9769, -0.0308, 0.7433]) tensor([0.2292, 0.0841, 0.2167, 0.4700]) -Greedy action tensor([ 0.8307, -1.2992, 1.8185, 1.4895]) tensor([0.1743, 0.0207, 0.4681, 0.3369]) -Greedy action tensor([ 0.2419, -0.0485, 0.0225, 1.2985]) tensor([0.1842, 0.1378, 0.1479, 0.5300]) -Greedy action tensor([ 1.3098, -0.1351, -0.2580, 1.0257]) tensor([0.4552, 0.1073, 0.0949, 0.3426]) -Greedy action tensor([0.1639, 1.0617, 0.5061, 1.2634]) tensor([0.1271, 0.3120, 0.1790, 0.3818]) -Greedy action tensor([ 0.2139, -0.5586, 0.2088, 1.2022]) tensor([0.1944, 0.0898, 0.1934, 0.5224]) -Greedy action tensor([ 1.0058, -0.1066, 0.3715, 0.9385]) tensor([0.3579, 0.1177, 0.1898, 0.3346]) -Greedy action tensor([0.9569, 0.0729, 1.5070, 0.0399]) tensor([0.2820, 0.1165, 0.4888, 0.1127]) -Greedy action tensor([ 0.6062, -0.9481, 0.9032, -1.0990]) tensor([0.3651, 0.0772, 0.4914, 0.0664]) -Greedy action tensor([ 1.4988, -0.6028, -0.2295, 1.0735]) tensor([0.5119, 0.0626, 0.0909, 0.3346]) -Greedy action tensor([ 0.5406, 0.6707, -0.1936, -0.1410]) tensor([0.3201, 0.3645, 0.1536, 0.1619]) -Greedy action tensor([-0.0827, -0.2343, 0.3819, -1.1871]) tensor([0.2644, 0.2272, 0.4208, 0.0876]) -Greedy action tensor([ 1.9039, -1.4151, 1.0415, 1.1107]) tensor([0.5234, 0.0189, 0.2209, 0.2368]) -Greedy action tensor([ 1.1331, -0.8160, 1.2571, 1.3323]) tensor([0.2861, 0.0407, 0.3239, 0.3492]) -Greedy action tensor([ 1.4412, -0.2717, 1.1473, 0.7919]) tensor([0.4085, 0.0737, 0.3044, 0.2134]) -Greedy action tensor([-0.3807, -0.4122, 0.9755, 0.2241]) tensor([0.1302, 0.1261, 0.5053, 0.2384]) -Greedy action tensor([ 0.8948, 0.9534, -0.4178, -0.3638]) tensor([0.3826, 0.4057, 0.1030, 0.1087]) -Greedy action tensor([ 0.4883, -0.8058, 0.1117, 0.0850]) tensor([0.3804, 0.1043, 0.2611, 0.2542]) -Greedy action tensor([-0.6308, -0.7564, -1.3615, 1.4211]) tensor([0.0986, 0.0869, 0.0475, 0.7671]) -Greedy action tensor([-0.4209, -1.2231, -1.0452, 1.6958]) tensor([0.0972, 0.0436, 0.0521, 0.8072]) -Greedy action tensor([ 1.1464, -0.5323, 2.1860, 1.1823]) tensor([0.1980, 0.0369, 0.5599, 0.2052]) -Greedy action tensor([ 0.9566, -1.5293, 1.4536, 0.1423]) tensor([0.3155, 0.0263, 0.5185, 0.1397]) -Greedy action tensor([-0.6123, -1.0755, -0.0781, 0.7582]) tensor([0.1375, 0.0865, 0.2346, 0.5414]) -Greedy action tensor([-7.9381e-04, 2.5607e-01, 7.6422e-01, 4.7338e-04]) tensor([0.1837, 0.2375, 0.3948, 0.1839]) -Greedy action tensor([-0.5391, -1.1878, 0.9134, 0.3746]) tensor([0.1206, 0.0631, 0.5155, 0.3008]) -Greedy action tensor([ 0.5888, -0.8970, 0.6239, 0.8361]) tensor([0.2823, 0.0639, 0.2924, 0.3615]) -Greedy action tensor([ 1.6084, -0.2383, -0.8109, 1.4735]) tensor([0.4716, 0.0744, 0.0420, 0.4121]) -Greedy action tensor([-0.3991, 0.0365, -0.2713, 0.0257]) tensor([0.1919, 0.2966, 0.2180, 0.2935]) -Greedy action tensor([ 1.5320, -0.8292, 1.6703, 0.6240]) tensor([0.3779, 0.0356, 0.4340, 0.1524]) -Greedy action tensor([-0.0728, -2.0514, 0.2024, 0.6766]) tensor([0.2188, 0.0302, 0.2881, 0.4629]) -Greedy action tensor([-0.0474, 1.0666, 0.4411, -0.2691]) tensor([0.1544, 0.4703, 0.2516, 0.1237]) -Greedy action tensor([ 0.5856, -0.0378, -0.0026, 0.4440]) tensor([0.3379, 0.1812, 0.1876, 0.2933]) -Greedy action tensor([ 1.0715, -0.1690, -0.7616, 0.5608]) tensor([0.4880, 0.1411, 0.0780, 0.2928]) -Greedy action tensor([0.9474, 0.7016, 0.3269, 0.9875]) tensor([0.2975, 0.2327, 0.1600, 0.3097]) -Greedy action tensor([ 2.1443, -0.0825, 0.0577, 1.2661]) tensor([0.6070, 0.0655, 0.0753, 0.2522]) -Greedy action tensor([ 1.3414, -0.8726, 0.4640, 0.5318]) tensor([0.5076, 0.0555, 0.2111, 0.2259]) -Greedy action tensor([-0.1076, 0.5643, 0.6309, 0.2328]) tensor([0.1549, 0.3033, 0.3241, 0.2177]) -Greedy action tensor([-1.2055, -1.0217, -0.0591, -0.8042]) tensor([0.1461, 0.1756, 0.4599, 0.2183]) -Greedy action tensor([ 0.9963, -0.1810, 0.0487, 0.3112]) tensor([0.4546, 0.1401, 0.1762, 0.2291]) -Greedy action tensor([ 1.0193, -0.7437, 0.2194, 0.9589]) tensor([0.3903, 0.0669, 0.1754, 0.3674]) -Greedy action tensor([-0.1591, 0.1703, 0.3913, 0.6592]) tensor([0.1565, 0.2175, 0.2713, 0.3547]) -Greedy action tensor([1.2324, 0.3337, 0.7582, 0.6669]) tensor([0.3850, 0.1567, 0.2396, 0.2187]) -Greedy action tensor([ 0.7066, -0.1688, 2.0942, -0.1658]) tensor([0.1712, 0.0714, 0.6858, 0.0716]) -Greedy action tensor([ 0.7968, -0.6346, 0.0549, 1.0754]) tensor([0.3293, 0.0787, 0.1568, 0.4351]) -Greedy action tensor([ 0.3578, -0.9324, 0.6887, -0.0952]) tensor([0.3027, 0.0833, 0.4215, 0.1925]) -Greedy action tensor([ 0.1909, -1.0721, 0.7802, 0.1991]) tensor([0.2443, 0.0691, 0.4404, 0.2463]) -Greedy action tensor([ 1.9141, -0.7563, 2.0326, 0.5160]) tensor([0.4095, 0.0283, 0.4610, 0.1012]) -Greedy action tensor([ 0.3967, 0.2411, -0.1863, 0.2291]) tensor([0.3068, 0.2626, 0.1712, 0.2594]) -Greedy action tensor([ 0.7768, -0.9812, -0.0648, 0.4632]) tensor([0.4284, 0.0739, 0.1846, 0.3131]) -Greedy action tensor([-1.3877, 0.2496, -0.6232, 0.0456]) tensor([0.0801, 0.4119, 0.1721, 0.3359]) -Greedy action tensor([ 1.2213, 0.2798, 1.3636, -0.0552]) tensor([0.3544, 0.1382, 0.4086, 0.0989]) -Greedy action tensor([ 0.2366, -0.2357, 1.0335, 1.2065]) tensor([0.1543, 0.0962, 0.3424, 0.4071]) -Greedy action tensor([ 1.5273, -0.5619, -0.1365, -0.0193]) tensor([0.6552, 0.0811, 0.1241, 0.1395]) -Greedy action tensor([ 1.2211, -0.4712, -0.0083, 0.2026]) tensor([0.5442, 0.1002, 0.1591, 0.1965]) -Greedy action tensor([ 1.6933, -0.1786, -0.1996, 0.1496]) tensor([0.6587, 0.1013, 0.0992, 0.1407]) -Greedy action tensor([ 2.1680, -1.5093, -0.1610, 0.5302]) tensor([0.7592, 0.0192, 0.0739, 0.1476]) -Greedy action tensor([ 1.1889, -0.2096, -0.3235, 0.3296]) tensor([0.5289, 0.1306, 0.1166, 0.2240]) -Greedy action tensor([ 1.2183, -0.2837, -0.3567, 0.2200]) tensor([0.5561, 0.1238, 0.1151, 0.2049]) -Greedy action tensor([ 1.6855, -0.5435, -0.3371, 0.6049]) tensor([0.6332, 0.0682, 0.0838, 0.2149]) -Greedy action tensor([ 1.2023, -0.2924, -0.2675, 0.3218]) tensor([0.5351, 0.1200, 0.1231, 0.2218]) -Greedy action tensor([ 0.9300, -0.1163, -0.1083, 0.2522]) tensor([0.4519, 0.1587, 0.1600, 0.2294]) -Greedy action tensor([ 1.1263, -0.5866, -0.1551, 0.0263]) tensor([0.5584, 0.1007, 0.1550, 0.1859]) -Greedy action tensor([ 1.2015, -0.3041, -0.2935, 0.3120]) tensor([0.5385, 0.1195, 0.1208, 0.2213]) -Greedy action tensor([ 1.0991, -0.4163, -0.2668, -0.2131]) tensor([0.5734, 0.1260, 0.1463, 0.1544]) -Greedy action tensor([ 0.8835, -0.6469, -0.2399, 0.7595]) tensor([0.4124, 0.0893, 0.1341, 0.3643]) -Greedy action tensor([ 1.5559, -0.5038, -0.2930, 0.3083]) tensor([0.6361, 0.0811, 0.1001, 0.1827]) -Greedy action tensor([ 0.7570, -0.1521, -0.0706, 0.0869]) tensor([0.4252, 0.1713, 0.1859, 0.2176]) -Greedy action tensor([ 0.7267, -0.3156, -0.1721, 0.0629]) tensor([0.4396, 0.1550, 0.1790, 0.2264]) -Greedy action tensor([ 0.8357, -0.5244, -0.3660, 0.5070]) tensor([0.4391, 0.1127, 0.1320, 0.3161]) -Greedy action tensor([ 1.1035, -0.4758, -0.1869, -0.2130]) tensor([0.5716, 0.1178, 0.1573, 0.1532]) -Greedy action tensor([ 1.0429, -0.1504, -0.1893, -0.0903]) tensor([0.5217, 0.1582, 0.1521, 0.1680]) -Greedy action tensor([ 0.6919, -0.3310, -0.1773, 0.2934]) tensor([0.4082, 0.1467, 0.1711, 0.2740]) -Greedy action tensor([ 1.9988, -0.2554, -0.3790, 0.3474]) tensor([0.7197, 0.0755, 0.0668, 0.1380]) -Greedy action tensor([ 0.6707, -0.1066, -0.2055, -0.1050]) tensor([0.4280, 0.1967, 0.1782, 0.1970]) -Greedy action tensor([ 1.0269, -0.2471, -0.2743, 0.1803]) tensor([0.5048, 0.1412, 0.1374, 0.2165]) -Greedy action tensor([ 1.7691, -0.8204, -0.1102, 0.1210]) tensor([0.7041, 0.0529, 0.1075, 0.1355]) -Greedy action tensor([ 0.9381, -0.1005, -0.5556, 0.3670]) tensor([0.4666, 0.1651, 0.1048, 0.2636]) -Greedy action tensor([ 1.2027, 0.0690, -0.1057, 0.0616]) tensor([0.5231, 0.1684, 0.1414, 0.1671]) -Greedy action tensor([ 1.0954, -0.1290, -0.5554, 0.1429]) tensor([0.5343, 0.1571, 0.1025, 0.2061]) -Greedy action tensor([ 2.0690, -0.6526, -0.2944, 0.1149]) tensor([0.7683, 0.0505, 0.0723, 0.1089]) -Greedy action tensor([ 1.5909, -0.4495, -0.4978, 0.3843]) tensor([0.6439, 0.0837, 0.0797, 0.1927]) -Greedy action tensor([ 0.8178, -0.4290, -0.2419, 0.2790]) tensor([0.4510, 0.1296, 0.1563, 0.2631]) -Greedy action tensor([ 1.3608, -0.1333, -0.2976, 0.2729]) tensor([0.5708, 0.1281, 0.1087, 0.1923]) -Greedy action tensor([ 0.8833, 0.0649, -0.2478, 0.1858]) tensor([0.4422, 0.1950, 0.1427, 0.2201]) -Greedy action tensor([ 0.9626, 0.0450, 0.1111, -0.2208]) tensor([0.4689, 0.1873, 0.2001, 0.1436]) -Greedy action tensor([ 2.1686, -1.0422, -0.1454, 0.4187]) tensor([0.7616, 0.0307, 0.0753, 0.1324]) -Greedy action tensor([ 1.1047, -0.5342, -0.1361, -0.1038]) tensor([0.5612, 0.1090, 0.1623, 0.1676]) -Greedy action tensor([ 0.7282, 0.1292, -0.3441, -0.2297]) tensor([0.4395, 0.2414, 0.1504, 0.1686]) -Greedy action tensor([ 1.9257, -0.7490, -0.2173, 0.6152]) tensor([0.6869, 0.0473, 0.0806, 0.1852]) -Greedy action tensor([ 1.3404, -0.7572, -0.1982, 0.3989]) tensor([0.5789, 0.0711, 0.1243, 0.2258]) -Greedy action tensor([ 1.0916e+00, -4.0482e-01, -9.3730e-04, 5.6856e-02]) tensor([0.5223, 0.1170, 0.1752, 0.1856]) -Greedy action tensor([ 0.6932, -0.0531, -0.3401, -0.1340]) tensor([0.4411, 0.2091, 0.1569, 0.1929]) -Greedy action tensor([ 1.6875, -0.3472, -0.1341, 0.4026]) tensor([0.6373, 0.0833, 0.1031, 0.1763]) -Greedy action tensor([ 1.4859, -0.5616, -0.3058, 0.3858]) tensor([0.6140, 0.0792, 0.1023, 0.2044]) -Greedy action tensor([ 1.4026, -0.5737, -0.5321, 0.4890]) tensor([0.5938, 0.0823, 0.0858, 0.2382]) -Greedy action tensor([ 0.8638, -0.4772, -0.2546, 0.3799]) tensor([0.4536, 0.1186, 0.1482, 0.2796]) -Greedy action tensor([ 1.7108, -1.1934, -0.4655, 0.5187]) tensor([0.6794, 0.0372, 0.0771, 0.2063]) -Greedy action tensor([ 1.2537, -0.2236, -0.3965, 0.3259]) tensor([0.5507, 0.1257, 0.1058, 0.2178]) -Greedy action tensor([ 0.6385, -0.2755, -0.1969, 0.2935]) tensor([0.3933, 0.1577, 0.1706, 0.2785]) -Greedy action tensor([ 1.3433, -0.6950, -0.1553, 0.2680]) tensor([0.5900, 0.0768, 0.1318, 0.2013]) -Greedy action tensor([ 1.0273, -0.6691, -0.4709, 0.1939]) tensor([0.5430, 0.0996, 0.1214, 0.2360]) -Greedy action tensor([ 1.1022, -0.5671, -0.0624, 0.3948]) tensor([0.5017, 0.0945, 0.1565, 0.2473]) -Greedy action tensor([ 0.7693, -0.3421, -0.3120, 0.1658]) tensor([0.4514, 0.1486, 0.1531, 0.2469]) -Greedy action tensor([ 0.8370, -0.1829, -0.5510, -0.0915]) tensor([0.4986, 0.1798, 0.1245, 0.1971]) -Greedy action tensor([ 0.9498, -0.2893, -0.2005, 0.1320]) tensor([0.4884, 0.1415, 0.1546, 0.2156]) -Greedy action tensor([ 1.0413, -0.2461, 0.2702, 0.0786]) tensor([0.4716, 0.1302, 0.2181, 0.1801]) -Greedy action tensor([ 1.4086, -0.6494, -0.1900, 0.3592]) tensor([0.5952, 0.0760, 0.1203, 0.2084]) -Greedy action tensor([ 1.3218, -0.6385, -0.1298, 0.3086]) tensor([0.5754, 0.0810, 0.1347, 0.2089]) -Greedy action tensor([ 1.5910, -0.5757, -0.3288, 0.2314]) tensor([0.6588, 0.0755, 0.0966, 0.1692]) -Greedy action tensor([ 0.7064, -0.2872, -0.6141, 0.4223]) tensor([0.4184, 0.1549, 0.1117, 0.3149]) -Greedy action tensor([ 1.9331, -0.9017, -0.3521, 0.4242]) tensor([0.7238, 0.0425, 0.0736, 0.1601]) -Greedy action tensor([ 0.9799, -0.3918, -0.0305, -0.1954]) tensor([0.5191, 0.1317, 0.1890, 0.1603]) -Greedy action tensor([ 0.6345, -0.5468, -0.2069, 0.3823]) tensor([0.3976, 0.1220, 0.1714, 0.3090]) -Greedy action tensor([ 0.3715, -0.3164, -0.1766, 0.1658]) tensor([0.3455, 0.1736, 0.1997, 0.2812]) -Greedy action tensor([ 1.5343, -0.5025, -0.4682, 0.5101]) tensor([0.6156, 0.0803, 0.0831, 0.2210]) -Greedy action tensor([ 1.1567, -0.4964, -0.2661, 0.0474]) tensor([0.5675, 0.1086, 0.1368, 0.1871]) -Greedy action tensor([ 1.1683, -0.0174, -0.1178, 0.0302]) tensor([0.5257, 0.1606, 0.1453, 0.1684]) -Greedy action tensor([ 1.5930, -0.3816, -0.3566, 0.1835]) tensor([0.6556, 0.0910, 0.0933, 0.1601]) -Greedy action tensor([ 1.1302, -0.2443, -0.0690, 0.1155]) tensor([0.5217, 0.1320, 0.1573, 0.1891]) -Greedy action tensor([ 0.9518, -0.2524, -0.1904, 0.0788]) tensor([0.4910, 0.1473, 0.1567, 0.2051]) -Greedy action tensor([ 0.8476, -0.4089, -0.5942, 0.2468]) tensor([0.4832, 0.1375, 0.1143, 0.2650]) -Greedy action tensor([ 0.9937, -0.2582, -0.5941, 0.3326]) tensor([0.4983, 0.1425, 0.1019, 0.2573]) -Greedy action tensor([ 1.9511, -0.7696, -0.4023, 0.5962]) tensor([0.7048, 0.0464, 0.0670, 0.1818]) -Greedy action tensor([ 1.3772, -0.5135, -0.3412, 0.4158]) tensor([0.5839, 0.0881, 0.1047, 0.2233]) -Greedy action tensor([ 0.4721, -0.2757, 0.0148, -0.0237]) tensor([0.3682, 0.1743, 0.2331, 0.2243]) -Greedy action tensor([ 1.1823, -0.0765, -0.3591, -0.0599]) tensor([0.5597, 0.1589, 0.1198, 0.1616]) -Greedy action tensor([ 0.9491, -0.4476, -0.3834, 0.5606]) tensor([0.4568, 0.1130, 0.1205, 0.3097]) -Greedy action tensor([1.4227, 0.2062, 0.0251, 0.3591]) tensor([0.5295, 0.1569, 0.1309, 0.1828]) -Greedy action tensor([ 0.6728, -0.1474, 0.0227, -0.1200]) tensor([0.4141, 0.1823, 0.2162, 0.1874]) -Greedy action tensor([ 1.0358, -0.0850, -0.0763, 0.3322]) tensor([0.4652, 0.1517, 0.1530, 0.2302]) -Greedy action tensor([ 1.9291, -0.6999, -0.1669, 0.3284]) tensor([0.7159, 0.0517, 0.0880, 0.1444]) -Greedy action tensor([ 1.1213, -0.8045, -0.3924, 0.2964]) tensor([0.5543, 0.0808, 0.1220, 0.2429]) -Greedy action tensor([ 2.1116, -0.6857, -0.3234, 0.5312]) tensor([0.7383, 0.0450, 0.0647, 0.1520]) -Greedy action tensor([-1.1198, -0.2444, 0.1522, 0.3123]) tensor([0.0896, 0.2151, 0.3199, 0.3754]) -Greedy action tensor([-0.9132, -0.7369, 0.7947, 1.4399]) tensor([0.0549, 0.0654, 0.3027, 0.5770]) -Greedy action tensor([-1.7886, -0.4761, 0.7392, 0.1430]) tensor([0.0414, 0.1539, 0.5188, 0.2858]) -Greedy action tensor([-1.8606, -0.3937, 0.6412, -0.0746]) tensor([0.0425, 0.1845, 0.5192, 0.2538]) -Greedy action tensor([-1.5300, -0.5568, 0.5092, 0.2288]) tensor([0.0584, 0.1544, 0.4484, 0.3388]) -Greedy action tensor([-1.8888, -0.2639, 0.6163, -0.1369]) tensor([0.0415, 0.2108, 0.5083, 0.2393]) -Greedy action tensor([-1.8806, -0.4430, 0.6700, -0.1174]) tensor([0.0419, 0.1765, 0.5372, 0.2444]) -Greedy action tensor([-1.8999, -0.4314, 0.6564, -0.1367]) tensor([0.0416, 0.1805, 0.5356, 0.2423]) -Greedy action tensor([-1.7711, -0.5128, 0.6675, 0.0554]) tensor([0.0451, 0.1586, 0.5163, 0.2800]) -Greedy action tensor([-0.2976, 0.2548, 0.7829, 1.6019]) tensor([0.0809, 0.1405, 0.2383, 0.5404]) -Greedy action tensor([-0.7881, -0.5917, 0.1776, 0.3906]) tensor([0.1235, 0.1504, 0.3245, 0.4016]) -Greedy action tensor([-2.0399, -0.6087, 0.8755, 0.4283]) tensor([0.0282, 0.1180, 0.5208, 0.3330]) -Greedy action tensor([-1.1380, -0.1270, 0.3773, 0.6121]) tensor([0.0712, 0.1956, 0.3238, 0.4095]) -Greedy action tensor([-1.4810, -0.5384, 0.6693, 0.5410]) tensor([0.0507, 0.1302, 0.4357, 0.3833]) -Greedy action tensor([-0.6873, 1.1173, 0.0566, 0.7053]) tensor([0.0757, 0.4602, 0.1593, 0.3048]) -Greedy action tensor([-1.0469, -0.8909, 0.7578, 1.4650]) tensor([0.0486, 0.0568, 0.2954, 0.5992]) -Greedy action tensor([-1.6647, -0.4552, 0.7127, 0.2323]) tensor([0.0459, 0.1538, 0.4945, 0.3058]) -Greedy action tensor([-1.5309, 0.1381, 0.3574, 0.0061]) tensor([0.0569, 0.3021, 0.3762, 0.2648]) -Greedy action tensor([-0.3771, -0.8011, 0.9674, 0.4788]) tensor([0.1275, 0.0834, 0.4891, 0.3000]) -Greedy action tensor([-1.8123, -0.3896, 0.5869, -0.0889]) tensor([0.0459, 0.1906, 0.5060, 0.2574]) -Greedy action tensor([-1.7598, -0.5290, 0.6732, 0.0265]) tensor([0.0459, 0.1572, 0.5230, 0.2739]) -Greedy action tensor([-1.8927, -0.4580, 0.6763, -0.0890]) tensor([0.0411, 0.1726, 0.5366, 0.2496]) -Greedy action tensor([-1.1873, -0.7016, 1.1867, 1.4023]) tensor([0.0375, 0.0609, 0.4024, 0.4992]) -Greedy action tensor([-1.8162, -0.4736, 0.6007, -0.0820]) tensor([0.0461, 0.1764, 0.5165, 0.2610]) -Greedy action tensor([-1.5088, -0.4120, 0.7899, 0.7306]) tensor([0.0428, 0.1283, 0.4267, 0.4022]) -Greedy action tensor([-1.8247, -0.4239, 0.6617, 0.0314]) tensor([0.0426, 0.1729, 0.5119, 0.2726]) -Greedy action tensor([-1.5203, -0.3969, 1.0273, 0.9447]) tensor([0.0349, 0.1075, 0.4465, 0.4111]) -Greedy action tensor([-1.7858, -0.4477, 0.5827, -0.0954]) tensor([0.0478, 0.1823, 0.5107, 0.2592]) -Greedy action tensor([-1.8537, -0.4501, 0.6375, -0.1047]) tensor([0.0437, 0.1778, 0.5275, 0.2511]) -Greedy action tensor([-1.8498, -0.4114, 0.6097, -0.1171]) tensor([0.0443, 0.1867, 0.5184, 0.2506]) -Greedy action tensor([-1.7280, -0.1354, 0.5173, -0.1009]) tensor([0.0489, 0.2404, 0.4618, 0.2489]) -Greedy action tensor([-1.8479, -0.5181, 0.6467, -0.0985]) tensor([0.0442, 0.1669, 0.5350, 0.2539]) -Greedy action tensor([-0.7108, 0.7435, 0.2280, 0.7210]) tensor([0.0832, 0.3561, 0.2126, 0.3481]) -Greedy action tensor([-1.6737, -0.5130, 0.5372, 0.0133]) tensor([0.0534, 0.1705, 0.4874, 0.2886]) -Greedy action tensor([-0.9639, -0.2292, 0.4807, 1.1236]) tensor([0.0650, 0.1355, 0.2755, 0.5240]) -Greedy action tensor([-0.8162, -0.5529, 0.3503, 0.7111]) tensor([0.0988, 0.1286, 0.3173, 0.4552]) -Greedy action tensor([-0.9560, -0.4900, 0.3483, 0.0359]) tensor([0.1114, 0.1776, 0.4106, 0.3004]) -Greedy action tensor([-1.7034, -0.7210, 0.7640, -0.1535]) tensor([0.0496, 0.1324, 0.5845, 0.2335]) -Greedy action tensor([-1.6249, -0.5636, 0.5295, 0.0047]) tensor([0.0568, 0.1641, 0.4895, 0.2896]) -Greedy action tensor([-0.9637, -0.5982, 0.1779, 0.3394]) tensor([0.1081, 0.1557, 0.3384, 0.3978]) -Greedy action tensor([-1.5818, -0.5644, 0.4782, -0.0074]) tensor([0.0608, 0.1682, 0.4773, 0.2937]) -Greedy action tensor([-1.6676, -0.5306, 0.5368, 0.0159]) tensor([0.0539, 0.1679, 0.4882, 0.2900]) -Greedy action tensor([-1.0635, -0.5623, 0.4538, 0.7009]) tensor([0.0766, 0.1265, 0.3495, 0.4474]) -Greedy action tensor([-1.8333, -0.4838, 0.6072, -0.1088]) tensor([0.0456, 0.1757, 0.5231, 0.2556]) -Greedy action tensor([-0.6114, -0.6054, 0.1962, 0.0561]) tensor([0.1613, 0.1623, 0.3618, 0.3145]) -Greedy action tensor([-1.1238, -0.5161, 1.3082, 1.3655]) tensor([0.0381, 0.0699, 0.4332, 0.4588]) -Greedy action tensor([-0.2879, 0.0916, 0.8006, 1.4977]) tensor([0.0878, 0.1283, 0.2606, 0.5233]) -Greedy action tensor([-0.7556, -0.0941, 0.2313, 0.2698]) tensor([0.1189, 0.2304, 0.3191, 0.3316]) -Greedy action tensor([-0.7226, -0.2655, 0.6975, 1.1321]) tensor([0.0763, 0.1205, 0.3157, 0.4875]) -Greedy action tensor([-1.1583, -0.1484, 0.5588, -0.4891]) tensor([0.0888, 0.2437, 0.4942, 0.1733]) -Greedy action tensor([-1.7285, -0.4940, 0.5458, -0.0288]) tensor([0.0509, 0.1751, 0.4952, 0.2788]) -Greedy action tensor([-1.7430, -0.4976, 0.5570, -0.0433]) tensor([0.0502, 0.1744, 0.5007, 0.2747]) -Greedy action tensor([-1.1318, -0.5068, 0.4367, 0.4587]) tensor([0.0795, 0.1486, 0.3817, 0.3902]) -Greedy action tensor([-1.9362, -0.5970, 1.4860, 0.8024]) tensor([0.0196, 0.0749, 0.6017, 0.3037]) -Greedy action tensor([-1.4769, -0.2259, 0.3731, 0.1374]) tensor([0.0630, 0.2200, 0.4005, 0.3164]) -Greedy action tensor([-0.5760, -0.3002, 1.1035, 1.5615]) tensor([0.0619, 0.0815, 0.3319, 0.5247]) -Greedy action tensor([-1.7756, -0.4837, 0.7354, 0.1470]) tensor([0.0420, 0.1530, 0.5176, 0.2874]) -Greedy action tensor([-1.7993, -0.4628, 0.6042, -0.0809]) tensor([0.0466, 0.1775, 0.5159, 0.2600]) -Greedy action tensor([-1.1616, -0.5530, 0.2822, 0.4849]) tensor([0.0815, 0.1499, 0.3455, 0.4231]) -Greedy action tensor([-1.2908, -0.9743, 1.1016, 1.3005]) tensor([0.0375, 0.0515, 0.4103, 0.5007]) -Greedy action tensor([-1.0808, -0.5492, 0.2748, 0.1369]) tensor([0.1004, 0.1708, 0.3895, 0.3393]) -Greedy action tensor([-1.1047, 0.0278, 0.3613, -0.1827]) tensor([0.0913, 0.2834, 0.3956, 0.2296]) -Greedy action tensor([-1.8278, -0.6477, 0.1158, -0.3619]) tensor([0.0642, 0.2090, 0.4485, 0.2782]) -Greedy action tensor([-0.7408, -0.4547, 0.4385, -0.2830]) tensor([0.1396, 0.1858, 0.4540, 0.2206]) -Greedy action tensor([-1.1199, -0.4677, 0.4592, -0.5154]) tensor([0.1042, 0.2000, 0.5052, 0.1906]) -Greedy action tensor([-1.6486, -0.5236, 0.5147, 0.0263]) tensor([0.0552, 0.1700, 0.4802, 0.2946]) -Greedy action tensor([-1.9035, -0.2772, 0.6180, -0.1356]) tensor([0.0410, 0.2085, 0.5103, 0.2402]) -Greedy action tensor([-1.7335, -0.5934, 1.3411, 0.9319]) tensor([0.0249, 0.0779, 0.5391, 0.3581]) -Greedy action tensor([-1.7984, -0.3506, 0.5762, -0.0619]) tensor([0.0461, 0.1962, 0.4957, 0.2619]) -Greedy action tensor([-1.8009, -0.4751, 0.5927, -0.0644]) tensor([0.0467, 0.1760, 0.5119, 0.2653]) -Greedy action tensor([-1.4115, -0.5927, 0.3814, 0.1409]) tensor([0.0714, 0.1620, 0.4291, 0.3374]) -Greedy action tensor([-1.7142e+00, -2.1844e-01, 4.9273e-01, -6.5458e-04]) tensor([0.0498, 0.2220, 0.4521, 0.2761]) -Greedy action tensor([-0.6879, -0.1882, -0.9541, -0.1771]) tensor([0.1968, 0.3244, 0.1508, 0.3280]) -Greedy action tensor([-1.2613, -0.3061, 0.2308, 0.2624]) tensor([0.0791, 0.2057, 0.3519, 0.3632]) -Greedy action tensor([-1.0999, 0.5301, 0.3398, -0.6393]) tensor([0.0840, 0.4286, 0.3543, 0.1331]) -Greedy action tensor([-1.8806, -0.4508, 0.6346, -0.1367]) tensor([0.0430, 0.1796, 0.5316, 0.2458]) -Greedy action tensor([-1.7851, -0.4983, 0.5910, -0.0746]) tensor([0.0478, 0.1731, 0.5146, 0.2645]) -Greedy action tensor([0.0828, 1.0368, 0.4704, 1.1899]) tensor([0.1235, 0.3207, 0.1820, 0.3738]) -Greedy action tensor([-1.1414, -0.5602, 1.2272, 1.3877]) tensor([0.0384, 0.0687, 0.4107, 0.4822]) -Greedy action tensor([-1.0362, -0.6079, 0.2119, 0.6299]) tensor([0.0884, 0.1357, 0.3080, 0.4679]) -Greedy action tensor([-1.0801, -0.6875, 1.0619, 1.4567]) tensor([0.0423, 0.0627, 0.3603, 0.5347]) -Greedy action tensor([ 0.5532, -0.6304, -0.1042, -0.3653]) tensor([0.4498, 0.1377, 0.2331, 0.1795]) -Greedy action tensor([ 0.8019, -0.5828, 0.0403, -0.3982]) tensor([0.4954, 0.1240, 0.2313, 0.1492]) -Greedy action tensor([ 0.5868, -0.2260, -0.0287, -0.4536]) tensor([0.4279, 0.1898, 0.2312, 0.1512]) -Greedy action tensor([ 0.4704, -0.2663, 0.0030, -0.6439]) tensor([0.4109, 0.1967, 0.2575, 0.1348]) -Greedy action tensor([ 0.7810, -0.4619, 0.0702, -0.7142]) tensor([0.4990, 0.1440, 0.2451, 0.1119]) -Greedy action tensor([ 0.8348, -0.1897, 0.0353, -0.3625]) tensor([0.4738, 0.1701, 0.2130, 0.1431]) -Greedy action tensor([ 0.7972, -0.9098, -0.1218, -0.3179]) tensor([0.5241, 0.0951, 0.2090, 0.1718]) -Greedy action tensor([ 0.6001, -0.2767, -0.2987, -0.2051]) tensor([0.4405, 0.1833, 0.1793, 0.1969]) -Greedy action tensor([ 0.5519, -0.2832, -0.0442, -0.1641]) tensor([0.4043, 0.1754, 0.2227, 0.1976]) -Greedy action tensor([ 0.5251, -0.3073, -0.0547, -0.3511]) tensor([0.4147, 0.1804, 0.2322, 0.1727]) -Greedy action tensor([ 0.8744, -0.3994, 0.0071, -0.5510]) tensor([0.5154, 0.1442, 0.2165, 0.1239]) -Greedy action tensor([ 0.5910, -0.2729, -0.0901, -0.3819]) tensor([0.4337, 0.1828, 0.2195, 0.1640]) -Greedy action tensor([ 0.6162, -0.5558, -0.0808, -0.2552]) tensor([0.4492, 0.1391, 0.2237, 0.1879]) -Greedy action tensor([ 0.2788, -0.1492, -0.1477, -0.2204]) tensor([0.3434, 0.2239, 0.2242, 0.2085]) -Greedy action tensor([ 0.6027, -0.4176, -0.0511, -0.3677]) tensor([0.4426, 0.1596, 0.2302, 0.1677]) -Greedy action tensor([ 0.8135, -0.4839, -0.0898, -0.2977]) tensor([0.4981, 0.1361, 0.2019, 0.1640]) -Greedy action tensor([ 0.8973, -0.3392, -0.0298, -0.4222]) tensor([0.5119, 0.1487, 0.2026, 0.1368]) -Greedy action tensor([ 0.6868, -0.3063, 0.0982, -0.4843]) tensor([0.4473, 0.1657, 0.2483, 0.1387]) -Greedy action tensor([ 0.3528, 0.2911, -0.0294, -0.4208]) tensor([0.3243, 0.3049, 0.2213, 0.1496]) -Greedy action tensor([ 0.4666, -0.3488, -0.0747, -0.3434]) tensor([0.4050, 0.1792, 0.2357, 0.1802]) -Greedy action tensor([ 0.3953, -0.2173, 0.0680, -0.4929]) tensor([0.3739, 0.2027, 0.2696, 0.1538]) -Greedy action tensor([ 0.7608, -0.5662, -0.0748, -0.4413]) tensor([0.5001, 0.1327, 0.2169, 0.1503]) -Greedy action tensor([ 0.7292, -0.5306, 0.1273, -0.6302]) tensor([0.4789, 0.1359, 0.2623, 0.1230]) -Greedy action tensor([ 0.7085, -0.2606, -0.0189, -0.4493]) tensor([0.4594, 0.1743, 0.2220, 0.1443]) -Greedy action tensor([ 0.7411, -0.5275, -0.0699, -0.3037]) tensor([0.4814, 0.1354, 0.2139, 0.1693]) -Greedy action tensor([ 0.2226, -0.1878, 0.0848, -0.3457]) tensor([0.3225, 0.2139, 0.2810, 0.1827]) -Greedy action tensor([ 0.4866, -0.3722, 0.1814, -0.5063]) tensor([0.3951, 0.1674, 0.2912, 0.1464]) -Greedy action tensor([ 0.4681, 0.0448, 0.0091, -0.0559]) tensor([0.3473, 0.2275, 0.2195, 0.2057]) -Greedy action tensor([ 0.6822, -0.3674, 0.1003, -0.4904]) tensor([0.4508, 0.1578, 0.2519, 0.1395]) -Greedy action tensor([ 0.7877, -0.4971, -0.0396, -0.4375]) tensor([0.4981, 0.1378, 0.2178, 0.1463]) -Greedy action tensor([ 0.3377, -0.1799, -0.0075, -0.3662]) tensor([0.3573, 0.2129, 0.2530, 0.1767]) -Greedy action tensor([ 0.5191, -0.4881, -0.0556, -0.1665]) tensor([0.4112, 0.1502, 0.2315, 0.2072]) -Greedy action tensor([ 0.5827, -0.4583, -0.0225, -0.3805]) tensor([0.4385, 0.1548, 0.2394, 0.1673]) -Greedy action tensor([ 0.7775, -0.2720, -0.1048, -0.3752]) tensor([0.4808, 0.1683, 0.1990, 0.1518]) -Greedy action tensor([ 0.7167, 0.0449, 0.0453, -0.2103]) tensor([0.4137, 0.2113, 0.2114, 0.1637]) -Greedy action tensor([ 0.4914, -0.2763, -0.0259, -0.1969]) tensor([0.3902, 0.1811, 0.2326, 0.1960]) -Greedy action tensor([ 0.2651, 0.1889, -0.0450, -0.1538]) tensor([0.3014, 0.2793, 0.2210, 0.1983]) -Greedy action tensor([ 0.6567, -0.3818, -0.0926, -0.6507]) tensor([0.4768, 0.1688, 0.2254, 0.1290]) -Greedy action tensor([ 1.3058, -0.4772, 0.1960, -0.6704]) tensor([0.6111, 0.1028, 0.2014, 0.0847]) -Greedy action tensor([ 0.9606, -0.9742, 0.0437, -0.3437]) tensor([0.5508, 0.0796, 0.2202, 0.1495]) -Greedy action tensor([ 0.3647, -0.1369, -0.0451, -0.3355]) tensor([0.3616, 0.2189, 0.2400, 0.1795]) -Greedy action tensor([ 0.5593, -0.1507, -0.1356, -0.1949]) tensor([0.4063, 0.1998, 0.2028, 0.1911]) -Greedy action tensor([ 9.8543e-01, -9.2666e-01, -5.0601e-04, -6.0410e-01]) tensor([0.5798, 0.0857, 0.2163, 0.1183]) -Greedy action tensor([ 0.8685, -0.7012, -0.0251, -0.5384]) tensor([0.5370, 0.1118, 0.2197, 0.1315]) -Greedy action tensor([ 0.9171, -1.1181, 0.0341, -0.4922]) tensor([0.5591, 0.0730, 0.2312, 0.1366]) -Greedy action tensor([ 0.7153, -0.4603, 0.0262, -0.2983]) tensor([0.4601, 0.1420, 0.2310, 0.1670]) -Greedy action tensor([ 0.6178, -0.2361, 0.0259, -0.4361]) tensor([0.4296, 0.1829, 0.2377, 0.1498]) -Greedy action tensor([ 0.5190, -0.2575, -0.0072, -0.2187]) tensor([0.3954, 0.1819, 0.2336, 0.1891]) -Greedy action tensor([ 0.3739, -0.0454, 0.0358, -0.3862]) tensor([0.3523, 0.2317, 0.2513, 0.1648]) -Greedy action tensor([ 0.6203, -0.4415, 0.0245, -0.5666]) tensor([0.4541, 0.1570, 0.2503, 0.1386]) -Greedy action tensor([ 0.4523, 0.0717, -0.1876, 0.0266]) tensor([0.3491, 0.2386, 0.1841, 0.2281]) -Greedy action tensor([ 0.9215, -0.5673, -0.0374, -0.6002]) tensor([0.5473, 0.1235, 0.2098, 0.1195]) -Greedy action tensor([ 0.8591, -0.4231, -0.0599, -0.4471]) tensor([0.5136, 0.1425, 0.2049, 0.1391]) -Greedy action tensor([ 0.5338, -0.1510, 0.0497, -0.4165]) tensor([0.3989, 0.2011, 0.2458, 0.1542]) -Greedy action tensor([ 0.5060, -0.3886, -0.0608, -0.0932]) tensor([0.3960, 0.1619, 0.2247, 0.2175]) -Greedy action tensor([ 0.7855, -0.3447, 0.0840, -0.1010]) tensor([0.4483, 0.1448, 0.2223, 0.1847]) -Greedy action tensor([ 0.1455, -0.0702, -0.0459, -0.3371]) tensor([0.3078, 0.2481, 0.2542, 0.1900]) -Greedy action tensor([ 0.6713, -0.3890, -0.0562, -0.5279]) tensor([0.4693, 0.1625, 0.2267, 0.1415]) -Greedy action tensor([ 0.4245, 0.0168, -0.0258, -0.3258]) tensor([0.3604, 0.2397, 0.2297, 0.1702]) -Greedy action tensor([ 0.5272, 0.0352, 0.1347, -0.1018]) tensor([0.3546, 0.2168, 0.2395, 0.1891]) -Greedy action tensor([ 0.7052, -0.2299, 0.1370, -0.8127]) tensor([0.4591, 0.1802, 0.2601, 0.1006]) -Greedy action tensor([ 0.2479, -0.0446, -0.0610, -0.2019]) tensor([0.3207, 0.2394, 0.2355, 0.2045]) -Greedy action tensor([ 0.1980, -0.0183, 0.0046, -0.1692]) tensor([0.3010, 0.2424, 0.2481, 0.2085]) -Greedy action tensor([ 0.7411, -0.4079, -0.1044, -0.1981]) tensor([0.4679, 0.1483, 0.2009, 0.1829]) -Greedy action tensor([ 0.7430, -0.5329, 0.0147, -0.6809]) tensor([0.4993, 0.1394, 0.2410, 0.1202]) -Greedy action tensor([ 0.3899, 0.0374, -0.0323, -0.0335]) tensor([0.3319, 0.2333, 0.2176, 0.2173]) -Greedy action tensor([ 0.6224, -0.3007, -0.0346, -0.3905]) tensor([0.4388, 0.1743, 0.2275, 0.1594]) -Greedy action tensor([ 0.4602, -0.0015, -0.0835, -0.3639]) tensor([0.3774, 0.2379, 0.2191, 0.1656]) -Greedy action tensor([ 0.4891, -0.1840, -0.0095, -0.3411]) tensor([0.3916, 0.1998, 0.2379, 0.1707]) -Greedy action tensor([ 0.8893, -0.5181, 0.0582, -0.5238]) tensor([0.5198, 0.1272, 0.2264, 0.1265]) -Greedy action tensor([ 1.0318, -0.2679, -0.0621, -0.5584]) tensor([0.5521, 0.1505, 0.1849, 0.1126]) -Greedy action tensor([ 0.8504, -0.6134, -0.1405, -0.4959]) tensor([0.5368, 0.1242, 0.1993, 0.1397]) -Greedy action tensor([ 0.8049, -0.7833, -0.0677, -0.3321]) tensor([0.5147, 0.1051, 0.2151, 0.1651]) -Greedy action tensor([ 0.9780, -0.7709, -0.0828, -0.4747]) tensor([0.5701, 0.0992, 0.1974, 0.1334]) -Greedy action tensor([ 0.6366, -0.2719, -0.0250, -0.2318]) tensor([0.4276, 0.1724, 0.2206, 0.1794]) -Greedy action tensor([ 0.2719, -0.2221, -0.0433, -0.4228]) tensor([0.3522, 0.2149, 0.2570, 0.1758]) -Greedy action tensor([ 0.5198, -0.4601, 0.1402, -0.6024]) tensor([0.4193, 0.1574, 0.2868, 0.1365]) -Greedy action tensor([ 0.5507, -0.3977, -0.2095, -0.3884]) tensor([0.4453, 0.1725, 0.2082, 0.1741]) -Greedy action tensor([ 0.6970, -0.4485, -0.0989, -0.4093]) tensor([0.4762, 0.1515, 0.2148, 0.1575]) -Greedy action tensor([ 0.2624, 0.3254, -0.0461, 0.0245]) tensor([0.2787, 0.2968, 0.2047, 0.2197]) -Greedy action tensor([ 0.9192, -0.5699, -0.0855, -0.5388]) tensor([0.5481, 0.1236, 0.2007, 0.1275]) -Greedy action tensor([-1.0213e-03, 3.5499e-01, 1.2769e+00, -6.0488e-01]) tensor([0.1524, 0.2175, 0.5468, 0.0833]) -Greedy action tensor([-0.3172, -1.0173, 0.3659, -0.9445]) tensor([0.2493, 0.1238, 0.4937, 0.1332]) -Greedy action tensor([ 0.7952, -0.1034, -0.1433, 0.5417]) tensor([0.3884, 0.1581, 0.1520, 0.3015]) -Greedy action tensor([-1.0894, -1.2540, -0.3796, 0.3277]) tensor([0.1249, 0.1059, 0.2540, 0.5152]) -Greedy action tensor([0.7104, 0.1731, 0.2975, 0.5927]) tensor([0.3190, 0.1864, 0.2111, 0.2836]) -Greedy action tensor([ 0.8349, -0.7383, 0.7825, 1.7380]) tensor([0.2163, 0.0449, 0.2052, 0.5336]) -Greedy action tensor([ 0.1174, -0.7408, 0.4291, 0.4912]) tensor([0.2357, 0.0999, 0.3219, 0.3425]) -Greedy action tensor([-1.0701, -2.4828, -0.2898, 0.3689]) tensor([0.1309, 0.0319, 0.2856, 0.5517]) -Greedy action tensor([ 0.0641, 0.5700, -0.5102, 0.3548]) tensor([0.2193, 0.3638, 0.1235, 0.2933]) -Greedy action tensor([ 0.5296, -1.4231, -0.5857, 1.3983]) tensor([0.2595, 0.0368, 0.0851, 0.6186]) -Greedy action tensor([-0.6123, -0.1146, 0.1053, 0.2364]) tensor([0.1422, 0.2340, 0.2915, 0.3323]) -Greedy action tensor([-0.3409, -0.2395, -0.1739, 1.6346]) tensor([0.0952, 0.1054, 0.1126, 0.6868]) -Greedy action tensor([ 0.2121, 0.7536, -1.1187, 0.1921]) tensor([0.2523, 0.4337, 0.0667, 0.2473]) -Greedy action tensor([0.6876, 0.0680, 2.0218, 0.8152]) tensor([0.1545, 0.0832, 0.5867, 0.1756]) -Greedy action tensor([-0.7267, -1.5399, -1.0737, 0.7780]) tensor([0.1503, 0.0667, 0.1062, 0.6768]) -Greedy action tensor([1.1769, 0.2282, 1.1903, 0.7142]) tensor([0.3300, 0.1278, 0.3344, 0.2078]) -Greedy action tensor([ 0.5067, -0.4367, -0.1817, 0.3820]) tensor([0.3604, 0.1403, 0.1811, 0.3182]) -Greedy action tensor([ 0.1601, 0.3921, -0.2816, 0.1943]) tensor([0.2539, 0.3202, 0.1632, 0.2627]) -Greedy action tensor([-0.6798, -1.2333, -0.0150, 0.1939]) tensor([0.1691, 0.0972, 0.3287, 0.4051]) -Greedy action tensor([ 1.7007, 0.5060, -0.6970, 1.0423]) tensor([0.5232, 0.1584, 0.0476, 0.2708]) -Greedy action tensor([ 0.9308, -0.7159, 0.8440, 1.4190]) tensor([0.2675, 0.0515, 0.2452, 0.4358]) -Greedy action tensor([ 0.1314, -1.4239, 2.1206, 0.3256]) tensor([0.1027, 0.0217, 0.7509, 0.1247]) -Greedy action tensor([ 0.8295, -0.4100, 1.3687, -0.5575]) tensor([0.3073, 0.0890, 0.5270, 0.0768]) -Greedy action tensor([ 0.6179, -1.2760, -0.0646, -0.2116]) tensor([0.4780, 0.0719, 0.2416, 0.2085]) -Greedy action tensor([ 1.8932, -1.4736, 0.6933, 1.1329]) tensor([0.5545, 0.0191, 0.1670, 0.2593]) -Greedy action tensor([ 1.6570, -0.5580, 0.9077, 1.8279]) tensor([0.3613, 0.0394, 0.1708, 0.4286]) -Greedy action tensor([ 2.2640, -1.5333, 0.6322, 1.2571]) tensor([0.6316, 0.0142, 0.1235, 0.2307]) -Greedy action tensor([ 0.3512, -1.4359, -0.2364, 1.2320]) tensor([0.2418, 0.0405, 0.1343, 0.5834]) -Greedy action tensor([ 1.0891, 0.1832, -0.2619, 0.6971]) tensor([0.4276, 0.1728, 0.1107, 0.2889]) -Greedy action tensor([ 0.2768, 1.1139, -0.3031, 0.6653]) tensor([0.1871, 0.4322, 0.1048, 0.2759]) -Greedy action tensor([ 0.8508, -0.9216, -0.3679, 0.4860]) tensor([0.4630, 0.0787, 0.1369, 0.3215]) -Greedy action tensor([ 0.6121, -1.7149, -0.2545, 0.7905]) tensor([0.3686, 0.0360, 0.1549, 0.4405]) -Greedy action tensor([ 0.7173, 0.3943, -0.2509, 0.1124]) tensor([0.3774, 0.2732, 0.1433, 0.2061]) -Greedy action tensor([ 0.8498, 0.2903, -0.2664, 0.7878]) tensor([0.3522, 0.2013, 0.1154, 0.3311]) -Greedy action tensor([ 0.4510, -1.1737, 0.3234, 0.4243]) tensor([0.3278, 0.0646, 0.2885, 0.3191]) -Greedy action tensor([ 1.5919, -0.7786, 0.7511, 0.0818]) tensor([0.5728, 0.0535, 0.2471, 0.1265]) -Greedy action tensor([ 0.7894, -0.2045, -1.0419, 1.0758]) tensor([0.3494, 0.1293, 0.0560, 0.4653]) -Greedy action tensor([-0.4326, 0.0638, -0.5452, -0.1296]) tensor([0.2045, 0.3359, 0.1827, 0.2769]) -Greedy action tensor([ 0.7723, 0.1040, 0.5462, -0.6906]) tensor([0.3934, 0.2017, 0.3138, 0.0911]) -Greedy action tensor([ 0.0436, -1.2431, 0.2608, 0.8234]) tensor([0.2128, 0.0588, 0.2644, 0.4641]) -Greedy action tensor([ 0.9072, -0.2919, -0.0446, 2.0267]) tensor([0.2105, 0.0635, 0.0813, 0.6448]) -Greedy action tensor([ 0.5953, -1.7318, -0.1566, 0.6724]) tensor([0.3775, 0.0368, 0.1780, 0.4077]) -Greedy action tensor([ 1.0038, -0.2834, 0.6876, -0.1441]) tensor([0.4306, 0.1189, 0.3139, 0.1366]) -Greedy action tensor([ 0.1318, -1.2813, 1.7864, 0.2312]) tensor([0.1319, 0.0321, 0.6902, 0.1457]) -Greedy action tensor([ 0.1577, -0.3031, 1.5057, 0.8705]) tensor([0.1330, 0.0839, 0.5119, 0.2712]) -Greedy action tensor([ 0.9200, 0.8637, 1.3654, -0.1841]) tensor([0.2606, 0.2463, 0.4068, 0.0864]) -Greedy action tensor([-1.4828, -0.8834, -0.5288, -0.8301]) tensor([0.1363, 0.2482, 0.3538, 0.2618]) -Greedy action tensor([ 0.2238, 0.6396, 0.3644, -0.3812]) tensor([0.2374, 0.3598, 0.2732, 0.1296]) -Greedy action tensor([-0.2256, -2.8650, -0.3290, 1.0352]) tensor([0.1818, 0.0130, 0.1639, 0.6413]) -Greedy action tensor([ 0.5454, 0.1804, -0.1644, 0.3912]) tensor([0.3286, 0.2281, 0.1616, 0.2817]) -Greedy action tensor([ 0.7287, 0.2855, -0.2329, -0.8085]) tensor([0.4466, 0.2867, 0.1707, 0.0960]) -Greedy action tensor([0.5154, 1.1103, 0.7125, 0.3083]) tensor([0.2065, 0.3743, 0.2514, 0.1678]) -Greedy action tensor([ 0.3136, 0.2194, -0.0172, -0.6614]) tensor([0.3327, 0.3028, 0.2390, 0.1255]) -Greedy action tensor([-0.3969, -0.4843, 0.0506, -0.4349]) tensor([0.2250, 0.2062, 0.3521, 0.2167]) -Greedy action tensor([ 0.0585, -1.1758, 0.2110, 0.2977]) tensor([0.2684, 0.0781, 0.3126, 0.3409]) -Greedy action tensor([-1.0854, 0.6494, 1.1428, 1.1860]) tensor([0.0390, 0.2210, 0.3620, 0.3780]) -Greedy action tensor([0.3979, 0.6136, 0.2469, 0.3171]) tensor([0.2486, 0.3084, 0.2137, 0.2293]) -Greedy action tensor([ 0.2202, 0.2768, -0.9750, 0.5256]) tensor([0.2690, 0.2846, 0.0814, 0.3650]) -Greedy action tensor([-0.5686, 0.2691, -0.0165, 0.3819]) tensor([0.1310, 0.3027, 0.2275, 0.3389]) -Greedy action tensor([ 0.3571, -0.7140, 0.6078, 0.8697]) tensor([0.2327, 0.0797, 0.2990, 0.3885]) -Greedy action tensor([-0.4661, 0.7158, 1.0979, 0.4224]) tensor([0.0872, 0.2843, 0.4166, 0.2120]) -Greedy action tensor([-0.3634, 0.4753, -0.8105, 0.9941]) tensor([0.1276, 0.2951, 0.0816, 0.4958]) -Greedy action tensor([-1.2747, -0.9376, -0.4299, 0.5121]) tensor([0.0935, 0.1309, 0.2175, 0.5580]) -Greedy action tensor([ 0.9313, -0.7973, 1.0436, 0.8292]) tensor([0.3126, 0.0555, 0.3497, 0.2822]) -Greedy action tensor([ 1.0670, -0.5324, 0.9603, -0.1098]) tensor([0.4151, 0.0839, 0.3731, 0.1280]) -Greedy action tensor([ 1.1698, -0.6728, 1.2346, 1.2113]) tensor([0.3060, 0.0485, 0.3265, 0.3190]) -Greedy action tensor([ 0.9285, -2.1240, 0.6927, 0.5842]) tensor([0.3928, 0.0186, 0.3103, 0.2784]) -Greedy action tensor([ 1.1301, 0.5324, 0.4574, -0.1153]) tensor([0.4258, 0.2342, 0.2173, 0.1226]) -Greedy action tensor([-0.3374, -1.4294, -0.0736, -0.1366]) tensor([0.2591, 0.0869, 0.3373, 0.3167]) -Greedy action tensor([ 0.4822, -0.2006, -0.4849, -0.3546]) tensor([0.4313, 0.2179, 0.1640, 0.1868]) -Greedy action tensor([ 0.9348, -0.1333, 1.0619, 1.1389]) tensor([0.2699, 0.0927, 0.3064, 0.3310]) -Greedy action tensor([0.8167, 0.2486, 0.0426, 0.9217]) tensor([0.3186, 0.1805, 0.1469, 0.3539]) -Greedy action tensor([0.6565, 0.1627, 0.8503, 0.2223]) tensor([0.2880, 0.1758, 0.3496, 0.1866]) -Greedy action tensor([ 0.9595, 0.6972, -0.0830, 1.3969]) tensor([0.2724, 0.2096, 0.0961, 0.4219]) -Greedy action tensor([-0.0841, -2.0271, -0.2776, 0.6297]) tensor([0.2494, 0.0357, 0.2056, 0.5093]) -Greedy action tensor([-0.3699, 1.1239, 0.3090, 0.0050]) tensor([0.1126, 0.5015, 0.2220, 0.1638]) -Greedy action tensor([ 1.5342, 0.9443, -0.1564, 0.3495]) tensor([0.4891, 0.2711, 0.0902, 0.1496]) -Greedy action tensor([ 0.8846, -0.7990, 0.9205, 1.8651]) tensor([0.2046, 0.0380, 0.2121, 0.5454]) -Greedy action tensor([-0.1470, -1.0519, 0.0739, -0.2575]) tensor([0.2819, 0.1141, 0.3516, 0.2524]) -Greedy action tensor([ 0.3864, -0.8852, 0.0600, 0.0517]) tensor([0.3680, 0.1032, 0.2655, 0.2633]) -Greedy action tensor([ 0.7913, 0.4035, 0.5979, -0.4287]) tensor([0.3574, 0.2425, 0.2945, 0.1055]) -Greedy action tensor([ 1.0268, 0.5598, 1.1902, -0.7910]) tensor([0.3371, 0.2113, 0.3969, 0.0547]) -Greedy action tensor([ 1.0944, -0.6938, -0.3152, 0.4399]) tensor([0.5178, 0.0866, 0.1265, 0.2691]) -Greedy action tensor([ 1.3747, -0.1831, -0.3454, 0.3576]) tensor([0.5710, 0.1203, 0.1022, 0.2065]) -Greedy action tensor([ 0.3883, -0.1401, -0.0558, -0.2706]) tensor([0.3639, 0.2145, 0.2334, 0.1883]) -Greedy action tensor([ 1.6177, -0.8443, -0.5277, 0.6678]) tensor([0.6293, 0.0537, 0.0736, 0.2434]) -Greedy action tensor([ 1.0979, -0.5810, -0.3058, -0.0845]) tensor([0.5751, 0.1073, 0.1413, 0.1763]) -Greedy action tensor([ 1.5392, -0.3036, -0.4410, 0.1235]) tensor([0.6497, 0.1029, 0.0897, 0.1577]) -Greedy action tensor([ 2.3723, -1.1499, -0.3939, 0.6187]) tensor([0.7902, 0.0233, 0.0497, 0.1368]) -Greedy action tensor([ 1.0637, -0.1959, 0.0100, 0.0936]) tensor([0.4971, 0.1411, 0.1733, 0.1884]) -Greedy action tensor([ 1.5837, -0.3573, -0.4910, 0.3454]) tensor([0.6414, 0.0921, 0.0806, 0.1859]) -Greedy action tensor([ 0.8429, 0.0567, -0.2054, -0.1444]) tensor([0.4590, 0.2091, 0.1609, 0.1710]) -Greedy action tensor([ 1.4026, 0.0102, -0.0924, -0.0458]) tensor([0.5856, 0.1455, 0.1313, 0.1376]) -Greedy action tensor([ 2.1651, -0.5950, -0.6095, 0.7024]) tensor([0.7368, 0.0466, 0.0460, 0.1706]) -Greedy action tensor([ 0.9439, -0.2468, -0.4390, 0.0360]) tensor([0.5107, 0.1553, 0.1281, 0.2060]) -Greedy action tensor([ 1.0073, -0.3484, -0.4146, 0.9557]) tensor([0.4084, 0.1053, 0.0985, 0.3878]) -Greedy action tensor([ 0.4763, -0.0118, -0.0539, -0.1480]) tensor([0.3653, 0.2242, 0.2149, 0.1956]) -Greedy action tensor([ 1.3242, 0.0033, -0.1487, 0.0658]) tensor([0.5617, 0.1499, 0.1288, 0.1596]) -Greedy action tensor([ 1.4842, -0.3436, -0.5099, 0.1709]) tensor([0.6386, 0.1027, 0.0869, 0.1718]) -Greedy action tensor([ 1.2029, -0.7804, -0.4817, 0.8277]) tensor([0.4974, 0.0685, 0.0923, 0.3418]) -Greedy action tensor([ 1.8231, -0.0917, 0.0135, 0.1496]) tensor([0.6673, 0.0983, 0.1092, 0.1252]) -Greedy action tensor([ 0.5187, -0.1634, 0.1734, 0.1193]) tensor([0.3467, 0.1753, 0.2455, 0.2325]) -Greedy action tensor([ 0.9127, -0.5853, -0.3366, 0.4628]) tensor([0.4656, 0.1041, 0.1335, 0.2969]) -Greedy action tensor([ 1.6315, -0.4102, -0.4073, 0.2823]) tensor([0.6581, 0.0854, 0.0857, 0.1708]) -Greedy action tensor([ 1.1351, -0.0610, -0.2591, -0.0149]) tensor([0.5356, 0.1620, 0.1328, 0.1696]) -Greedy action tensor([ 1.5165, -0.3858, -0.2927, 0.3775]) tensor([0.6123, 0.0914, 0.1003, 0.1960]) -Greedy action tensor([ 0.8822, -0.1633, -0.4500, 0.3212]) tensor([0.4575, 0.1608, 0.1207, 0.2610]) -Greedy action tensor([ 1.4438, -0.2634, -0.4854, 0.1025]) tensor([0.6297, 0.1142, 0.0915, 0.1647]) -Greedy action tensor([ 1.0348, -0.5165, -0.1961, 0.5514]) tensor([0.4716, 0.1000, 0.1377, 0.2908]) -Greedy action tensor([ 0.7201, -0.4013, 0.0415, 0.1153]) tensor([0.4203, 0.1369, 0.2132, 0.2296]) -Greedy action tensor([ 0.9566, 0.0444, -0.0959, -0.0541]) tensor([0.4729, 0.1899, 0.1651, 0.1721]) -Greedy action tensor([ 0.6204, -0.0526, -0.1998, -0.1685]) tensor([0.4158, 0.2122, 0.1831, 0.1889]) -Greedy action tensor([ 1.1355, -0.4307, -0.3243, 0.3102]) tensor([0.5321, 0.1111, 0.1236, 0.2331]) -Greedy action tensor([ 0.7110, -0.1875, 0.0325, 0.0263]) tensor([0.4134, 0.1683, 0.2098, 0.2085]) -Greedy action tensor([ 1.6399, -0.5480, -0.4248, 0.2457]) tensor([0.6725, 0.0754, 0.0853, 0.1668]) -Greedy action tensor([ 1.3387, -0.6521, -0.2542, 0.5092]) tensor([0.5630, 0.0769, 0.1145, 0.2456]) -Greedy action tensor([ 0.5873, -0.1836, -0.0581, -0.1515]) tensor([0.4057, 0.1877, 0.2128, 0.1938]) -Greedy action tensor([ 1.0154, -0.5282, -0.0365, 0.2075]) tensor([0.4978, 0.1063, 0.1739, 0.2219]) -Greedy action tensor([ 1.1785, -0.6799, -0.5154, 0.7703]) tensor([0.4989, 0.0778, 0.0917, 0.3317]) -Greedy action tensor([ 1.5170, -0.0877, -0.1169, 0.2748]) tensor([0.5935, 0.1193, 0.1158, 0.1714]) -Greedy action tensor([ 1.5014, -0.1147, 0.0219, 0.1384]) tensor([0.5944, 0.1181, 0.1354, 0.1521]) -Greedy action tensor([ 1.1436, -0.3249, -0.0175, 0.2613]) tensor([0.5109, 0.1177, 0.1600, 0.2114]) -Greedy action tensor([ 1.0045, -0.3610, -0.7494, 0.2966]) tensor([0.5206, 0.1329, 0.0901, 0.2565]) -Greedy action tensor([ 1.4226, -0.3839, -0.3092, 0.2639]) tensor([0.6042, 0.0992, 0.1069, 0.1897]) -Greedy action tensor([ 0.7923, -0.2174, -0.0225, -0.0327]) tensor([0.4454, 0.1623, 0.1972, 0.1952]) -Greedy action tensor([ 0.5383, -0.2064, 0.0449, 0.0713]) tensor([0.3687, 0.1751, 0.2251, 0.2311]) -Greedy action tensor([ 1.7604, -0.8802, -0.4403, 0.4216]) tensor([0.6924, 0.0494, 0.0767, 0.1815]) -Greedy action tensor([ 1.7575, -0.4431, -0.6006, 0.1414]) tensor([0.7122, 0.0789, 0.0674, 0.1415]) -Greedy action tensor([ 0.9542, 0.1162, -0.1648, -0.3346]) tensor([0.4915, 0.2126, 0.1605, 0.1354]) -Greedy action tensor([ 0.9719, -0.5674, -0.0870, -0.0174]) tensor([0.5173, 0.1110, 0.1794, 0.1924]) -Greedy action tensor([ 0.3908, -0.4415, 0.0173, 0.0752]) tensor([0.3505, 0.1525, 0.2413, 0.2557]) -Greedy action tensor([ 1.0696, -0.2045, -0.1333, 0.0560]) tensor([0.5147, 0.1440, 0.1546, 0.1868]) -Greedy action tensor([ 1.4941, -0.8240, -0.2940, 0.2696]) tensor([0.6412, 0.0631, 0.1073, 0.1884]) -Greedy action tensor([ 2.0514, -0.7830, -0.1674, 0.2867]) tensor([0.7470, 0.0439, 0.0812, 0.1279]) -Greedy action tensor([ 0.7735, -0.4299, -0.1137, -0.3387]) tensor([0.4900, 0.1471, 0.2018, 0.1611]) -Greedy action tensor([ 1.8021, 0.0425, -0.3790, 0.1957]) tensor([0.6731, 0.1158, 0.0760, 0.1350]) -Greedy action tensor([ 1.7542, -0.5462, -0.2346, 0.3882]) tensor([0.6702, 0.0672, 0.0917, 0.1710]) -Greedy action tensor([1.2544, 0.0736, 0.0420, 0.0496]) tensor([0.5251, 0.1612, 0.1562, 0.1574]) -Greedy action tensor([ 1.3734, -0.4193, -0.0435, 0.3701]) tensor([0.5632, 0.0938, 0.1366, 0.2065]) -Greedy action tensor([ 1.2478, -0.8118, -0.3807, 0.5229]) tensor([0.5531, 0.0705, 0.1085, 0.2679]) -Greedy action tensor([ 0.2928, -0.2998, -0.4159, 0.3453]) tensor([0.3227, 0.1784, 0.1589, 0.3401]) -Greedy action tensor([ 0.7492, -0.2794, -0.1975, 0.2420]) tensor([0.4259, 0.1523, 0.1653, 0.2565]) -Greedy action tensor([ 0.9851, -0.5039, 0.0664, -0.1682]) tensor([0.5154, 0.1163, 0.2057, 0.1627]) -Greedy action tensor([ 1.3393, -0.4998, -0.1914, -0.1523]) tensor([0.6249, 0.0993, 0.1352, 0.1406]) -Greedy action tensor([ 1.1471, -0.0465, -0.4574, 0.2656]) tensor([0.5213, 0.1580, 0.1048, 0.2159]) -Greedy action tensor([ 1.0310, -0.0514, -0.2243, 0.2593]) tensor([0.4794, 0.1624, 0.1366, 0.2216]) -Greedy action tensor([ 1.0967, -0.4530, 0.0755, -0.0495]) tensor([0.5290, 0.1123, 0.1905, 0.1681]) -Greedy action tensor([ 0.9777, -0.6663, -0.3363, 0.1602]) tensor([0.5254, 0.1015, 0.1412, 0.2320]) -Greedy action tensor([ 0.7787, -0.2324, -0.2461, 0.3281]) tensor([0.4237, 0.1542, 0.1521, 0.2700]) -Greedy action tensor([ 1.4632, -0.1899, -0.0063, 0.0400]) tensor([0.6015, 0.1152, 0.1384, 0.1449]) -Greedy action tensor([ 0.8703, -0.4861, -0.3966, 0.2931]) tensor([0.4760, 0.1226, 0.1341, 0.2673]) -Greedy action tensor([ 1.2284, -0.3630, -0.4575, 0.2634]) tensor([0.5650, 0.1151, 0.1047, 0.2153]) -Greedy action tensor([ 1.4373, -0.6523, -0.0705, 0.4224]) tensor([0.5856, 0.0725, 0.1297, 0.2123]) -Greedy action tensor([ 1.0035, -0.5423, -0.3257, 0.6590]) tensor([0.4574, 0.0975, 0.1211, 0.3241]) -Greedy action tensor([ 1.3648, -0.4518, -0.3324, 0.2350]) tensor([0.5992, 0.0974, 0.1098, 0.1936]) -Greedy action tensor([ 1.8426, -0.9684, -0.2332, 0.2257]) tensor([0.7225, 0.0435, 0.0906, 0.1434]) -Greedy action tensor([ 1.1166, -0.4344, -0.2452, 0.2863]) tensor([0.5252, 0.1114, 0.1346, 0.2289]) -Greedy action tensor([ 1.4310, -0.5181, -0.3730, 0.1389]) tensor([0.6322, 0.0900, 0.1041, 0.1737]) -Greedy action tensor([ 1.5816, -0.3322, 0.3587, 0.2856]) tensor([0.5829, 0.0860, 0.1716, 0.1595]) -Greedy action tensor([ 0.7342, -0.5318, -0.0218, 0.1719]) tensor([0.4308, 0.1215, 0.2023, 0.2455]) -Greedy action tensor([ 0.7667, -0.4174, -0.3434, 0.0768]) tensor([0.4679, 0.1432, 0.1542, 0.2347]) -Greedy action tensor([ 1.5346, -0.6160, -0.2296, 0.5769]) tensor([0.5983, 0.0696, 0.1025, 0.2296]) -Greedy action tensor([ 1.7524, -0.9451, -0.4747, 0.6091]) tensor([0.6694, 0.0451, 0.0722, 0.2134]) -Greedy action tensor([-1.8830, -0.3413, 0.6646, -0.1060]) tensor([0.0410, 0.1918, 0.5245, 0.2427]) -Greedy action tensor([-1.6861, -0.4759, 0.7097, 0.2253]) tensor([0.0453, 0.1518, 0.4968, 0.3061]) -Greedy action tensor([-1.2504, -0.2532, 0.3156, 0.3251]) tensor([0.0750, 0.2033, 0.3591, 0.3625]) -Greedy action tensor([-1.3276, -0.5804, 0.3275, 0.2234]) tensor([0.0766, 0.1616, 0.4007, 0.3611]) -Greedy action tensor([-1.6131, 0.0146, 0.3979, 0.0666]) tensor([0.0528, 0.2690, 0.3947, 0.2834]) -Greedy action tensor([-1.9108, -0.4401, 0.6460, -0.1625]) tensor([0.0417, 0.1814, 0.5375, 0.2395]) -Greedy action tensor([-0.6352, -0.3507, 1.1537, 1.5448]) tensor([0.0583, 0.0775, 0.3487, 0.5156]) -Greedy action tensor([-0.8335, -0.5956, 0.6613, 1.1040]) tensor([0.0732, 0.0928, 0.3262, 0.5078]) -Greedy action tensor([-0.7313, -0.5219, -0.4950, -0.2981]) tensor([0.1983, 0.2446, 0.2512, 0.3059]) -Greedy action tensor([-0.3279, -0.7011, 0.5929, 0.5889]) tensor([0.1492, 0.1028, 0.3748, 0.3733]) -Greedy action tensor([-1.6783, -0.4509, 0.5240, -0.0409]) tensor([0.0538, 0.1835, 0.4863, 0.2764]) -Greedy action tensor([-1.8635, -0.4645, 0.6645, -0.0785]) tensor([0.0425, 0.1721, 0.5322, 0.2532]) -Greedy action tensor([-1.9222, -0.3877, 0.6462, -0.1654]) tensor([0.0409, 0.1895, 0.5329, 0.2367]) -Greedy action tensor([-1.7444, -0.5110, 0.5572, -0.0719]) tensor([0.0506, 0.1738, 0.5059, 0.2697]) -Greedy action tensor([-1.4070, -0.4029, 0.6549, -0.4833]) tensor([0.0709, 0.1935, 0.5572, 0.1785]) -Greedy action tensor([-1.9217, -0.4470, 0.6559, -0.1661]) tensor([0.0411, 0.1797, 0.5413, 0.2379]) -Greedy action tensor([-1.8415, -0.2796, 0.6024, -0.1023]) tensor([0.0435, 0.2075, 0.5012, 0.2477]) -Greedy action tensor([-1.0143, -0.5766, 0.2196, 0.3153]) tensor([0.1024, 0.1587, 0.3518, 0.3871]) -Greedy action tensor([-1.4836, -0.1734, 0.6064, 0.5705]) tensor([0.0486, 0.1800, 0.3926, 0.3788]) -Greedy action tensor([-1.1228, -0.5145, 0.4068, 0.8223]) tensor([0.0692, 0.1272, 0.3195, 0.4841]) -Greedy action tensor([-1.6976, -0.4808, 0.5592, 0.0177]) tensor([0.0513, 0.1733, 0.4902, 0.2852]) -Greedy action tensor([-1.2252, -0.5699, 0.2871, 0.2333]) tensor([0.0850, 0.1637, 0.3857, 0.3655]) -Greedy action tensor([-0.9082, -0.4411, 1.0154, 1.3512]) tensor([0.0526, 0.0839, 0.3599, 0.5036]) -Greedy action tensor([-0.7610, -0.5845, 0.2435, 0.1666]) tensor([0.1342, 0.1601, 0.3664, 0.3393]) -Greedy action tensor([-1.5937, -0.4272, 0.6997, 0.3391]) tensor([0.0476, 0.1527, 0.4712, 0.3285]) -Greedy action tensor([-1.8810, -0.4423, 0.6343, -0.1352]) tensor([0.0429, 0.1808, 0.5306, 0.2458]) -Greedy action tensor([-1.8059, -0.4546, 0.5970, -0.1056]) tensor([0.0467, 0.1805, 0.5168, 0.2559]) -Greedy action tensor([-1.8920, -0.4853, 0.6605, -0.1449]) tensor([0.0423, 0.1725, 0.5427, 0.2425]) -Greedy action tensor([-1.2008, -0.5697, 0.4375, -0.0892]) tensor([0.0904, 0.1699, 0.4651, 0.2747]) -Greedy action tensor([-1.4719, -0.2574, 0.1170, -0.7668]) tensor([0.0886, 0.2983, 0.4338, 0.1793]) -Greedy action tensor([-1.0783, -0.0647, 0.5663, -0.6640]) tensor([0.0957, 0.2638, 0.4957, 0.1448]) -Greedy action tensor([-1.4299, 0.2054, 0.5118, -0.3563]) tensor([0.0624, 0.3201, 0.4349, 0.1825]) -Greedy action tensor([-1.6327, -0.3395, 0.4971, -0.0081]) tensor([0.0551, 0.2010, 0.4639, 0.2799]) -Greedy action tensor([-1.3303, -0.5340, 0.5912, -0.3486]) tensor([0.0786, 0.1743, 0.5372, 0.2099]) -Greedy action tensor([-1.6297, -0.5468, 0.5277, 0.0799]) tensor([0.0552, 0.1629, 0.4771, 0.3049]) -Greedy action tensor([-1.5475, 0.4424, 0.3437, -0.0409]) tensor([0.0514, 0.3760, 0.3407, 0.2319]) -Greedy action tensor([-1.9232, -0.4727, 0.7633, -0.0898]) tensor([0.0382, 0.1628, 0.5603, 0.2387]) -Greedy action tensor([-1.2926, -0.2939, 0.6659, -0.6013]) tensor([0.0781, 0.2121, 0.5538, 0.1560]) -Greedy action tensor([-1.6806, -0.4602, 0.6130, 0.1759]) tensor([0.0483, 0.1637, 0.4788, 0.3092]) -Greedy action tensor([-1.1971, -0.4481, 0.7708, 1.0910]) tensor([0.0497, 0.1051, 0.3555, 0.4897]) -Greedy action tensor([-1.4850e+00, -9.3585e-04, 4.0839e-02, -4.4638e-01]) tensor([0.0779, 0.3437, 0.3583, 0.2201]) -Greedy action tensor([-0.9977, 0.0620, 0.2342, 0.5445]) tensor([0.0834, 0.2407, 0.2859, 0.3900]) -Greedy action tensor([-1.6589, -0.6279, 0.4813, -0.1969]) tensor([0.0602, 0.1687, 0.5115, 0.2596]) -Greedy action tensor([-0.9350, -0.3095, 0.5024, -0.4843]) tensor([0.1156, 0.2161, 0.4868, 0.1815]) -Greedy action tensor([-1.8134, -0.2243, 0.5733, -0.1142]) tensor([0.0449, 0.2202, 0.4890, 0.2459]) -Greedy action tensor([-0.2561, -0.1793, 1.0641, 1.5574]) tensor([0.0836, 0.0903, 0.3132, 0.5129]) -Greedy action tensor([-1.7377, -0.9409, -0.3684, -0.6743]) tensor([0.0995, 0.2208, 0.3914, 0.2883]) -Greedy action tensor([-1.0442, -0.2774, 0.5571, -0.6334]) tensor([0.1040, 0.2238, 0.5155, 0.1568]) -Greedy action tensor([-1.8654, -0.4316, 0.7185, 0.0852]) tensor([0.0393, 0.1646, 0.5201, 0.2760]) -Greedy action tensor([-1.6634, -0.5218, 0.5339, -0.0429]) tensor([0.0550, 0.1722, 0.4949, 0.2780]) -Greedy action tensor([-0.6928, -0.6042, 0.6730, 1.0478]) tensor([0.0854, 0.0933, 0.3346, 0.4867]) -Greedy action tensor([-1.7099, -0.5916, 0.5667, 0.0181]) tensor([0.0515, 0.1574, 0.5014, 0.2897]) -Greedy action tensor([-1.7131, -0.3638, 0.7415, 0.2629]) tensor([0.0422, 0.1626, 0.4910, 0.3042]) -Greedy action tensor([-1.1207, -0.4776, 0.6093, 1.0472]) tensor([0.0579, 0.1101, 0.3264, 0.5057]) -Greedy action tensor([-1.2401, -0.5600, 0.4924, 0.4767]) tensor([0.0704, 0.1391, 0.3983, 0.3922]) -Greedy action tensor([-1.8695, -0.4796, 0.6706, -0.0916]) tensor([0.0423, 0.1700, 0.5370, 0.2506]) -Greedy action tensor([-1.0558, -0.4578, 0.3159, 0.7638]) tensor([0.0773, 0.1406, 0.3049, 0.4771]) -Greedy action tensor([-1.6178, -0.4134, 0.8867, 0.7617]) tensor([0.0365, 0.1218, 0.4471, 0.3946]) -Greedy action tensor([-1.8856, -0.3403, 0.6227, -0.1381]) tensor([0.0422, 0.1978, 0.5180, 0.2421]) -Greedy action tensor([-0.7186, -0.1506, 0.3241, -0.2462]) tensor([0.1388, 0.2449, 0.3937, 0.2226]) -Greedy action tensor([-1.6266, -0.4984, 0.5144, -0.0107]) tensor([0.0567, 0.1753, 0.4826, 0.2854]) -Greedy action tensor([-1.2403, -0.3628, 0.6561, -0.6310]) tensor([0.0840, 0.2020, 0.5596, 0.1545]) -Greedy action tensor([-1.7590, -0.4800, 0.5784, -0.0622]) tensor([0.0490, 0.1761, 0.5075, 0.2674]) -Greedy action tensor([-0.8820, -0.4290, 0.4780, -0.4389]) tensor([0.1246, 0.1960, 0.4854, 0.1940]) -Greedy action tensor([-1.4716, -0.4416, 0.5649, 0.4113]) tensor([0.0554, 0.1553, 0.4249, 0.3644]) -Greedy action tensor([-1.4797, -0.5246, 0.4645, 0.2119]) tensor([0.0624, 0.1623, 0.4363, 0.3389]) -Greedy action tensor([-1.8670, -0.4477, 0.6357, -0.1231]) tensor([0.0433, 0.1792, 0.5295, 0.2479]) -Greedy action tensor([-1.0281, -0.6511, 0.4076, 0.7486]) tensor([0.0795, 0.1160, 0.3343, 0.4702]) -Greedy action tensor([-1.3753, -1.1453, 0.5573, 0.8861]) tensor([0.0533, 0.0671, 0.3681, 0.5115]) -Greedy action tensor([-1.2542, -0.5658, 0.2920, 0.2366]) tensor([0.0825, 0.1642, 0.3871, 0.3662]) -Greedy action tensor([-1.2575, -0.3637, 0.5034, 0.7163]) tensor([0.0608, 0.1485, 0.3534, 0.4373]) -Greedy action tensor([-1.8760, -0.4413, 0.6296, -0.1339]) tensor([0.0432, 0.1813, 0.5290, 0.2465]) -Greedy action tensor([-1.3312, -0.7173, 0.3962, 0.4224]) tensor([0.0702, 0.1297, 0.3948, 0.4053]) -Greedy action tensor([-1.9693, -0.9495, 0.0426, -0.4224]) tensor([0.0627, 0.1739, 0.4689, 0.2945]) -Greedy action tensor([-1.8980, -0.4253, 0.6487, -0.1429]) tensor([0.0418, 0.1824, 0.5339, 0.2419]) -Greedy action tensor([-1.8594, -0.4640, 0.6210, -0.1575]) tensor([0.0445, 0.1797, 0.5317, 0.2441]) -Greedy action tensor([-1.1794, -0.6470, 0.5316, 0.6511]) tensor([0.0691, 0.1177, 0.3824, 0.4309]) -Greedy action tensor([-1.5533, -0.4263, 0.4664, 0.0537]) tensor([0.0602, 0.1858, 0.4537, 0.3003]) -Greedy action tensor([-1.6657, -0.4828, 0.5311, 0.0049]) tensor([0.0538, 0.1757, 0.4843, 0.2862]) -Greedy action tensor([-1.5050, -0.5182, 0.4570, 0.0879]) tensor([0.0636, 0.1707, 0.4527, 0.3130]) -Greedy action tensor([-0.9669, -0.4448, 0.3060, 0.6951]) tensor([0.0868, 0.1462, 0.3098, 0.4572]) -Greedy action tensor([ 0.8344, -0.5340, -0.1374, -0.2499]) tensor([0.5073, 0.1291, 0.1920, 0.1716]) -Greedy action tensor([ 0.9103, -0.1166, -0.0085, -0.4499]) tensor([0.4966, 0.1778, 0.1981, 0.1274]) -Greedy action tensor([ 0.7413, -0.2636, -0.2935, -0.3693]) tensor([0.4876, 0.1785, 0.1733, 0.1606]) -Greedy action tensor([ 0.9129, -0.5458, 0.1138, -0.4318]) tensor([0.5147, 0.1197, 0.2315, 0.1341]) -Greedy action tensor([ 0.8577, -0.6130, 0.1288, -0.3877]) tensor([0.5000, 0.1149, 0.2412, 0.1439]) -Greedy action tensor([ 0.7194, -0.2412, 0.1577, -0.3417]) tensor([0.4350, 0.1664, 0.2480, 0.1505]) -Greedy action tensor([ 0.4396, -0.2881, 0.2615, -0.3742]) tensor([0.3619, 0.1748, 0.3029, 0.1604]) -Greedy action tensor([ 0.9770, -0.4718, -0.0804, -0.2928]) tensor([0.5367, 0.1261, 0.1864, 0.1508]) -Greedy action tensor([ 0.8528, -0.6773, 0.0398, -0.4318]) tensor([0.5163, 0.1118, 0.2290, 0.1429]) -Greedy action tensor([ 0.7513, -0.4500, -0.1334, -0.2779]) tensor([0.4829, 0.1452, 0.1993, 0.1725]) -Greedy action tensor([ 0.5052, -0.1758, 0.0501, -0.2285]) tensor([0.3816, 0.1931, 0.2421, 0.1832]) -Greedy action tensor([ 0.4878, 0.0173, -0.1052, -0.2722]) tensor([0.3781, 0.2362, 0.2089, 0.1768]) -Greedy action tensor([ 0.8706, -0.7087, -0.0053, -0.2853]) tensor([0.5162, 0.1064, 0.2150, 0.1625]) -Greedy action tensor([ 0.5562, 0.0822, -0.0700, -0.1451]) tensor([0.3769, 0.2346, 0.2015, 0.1869]) -Greedy action tensor([ 0.5501, -0.0941, 0.1569, -0.3587]) tensor([0.3842, 0.2017, 0.2593, 0.1548]) -Greedy action tensor([ 0.2965, 0.0037, -0.0875, -0.1937]) tensor([0.3290, 0.2455, 0.2241, 0.2015]) -Greedy action tensor([ 0.5531, -0.2503, -0.0069, -0.1508]) tensor([0.3978, 0.1781, 0.2272, 0.1968]) -Greedy action tensor([ 0.4906, -0.1525, 0.0520, -0.1198]) tensor([0.3685, 0.1937, 0.2377, 0.2001]) -Greedy action tensor([ 0.6929, -0.2070, -0.0845, -0.6286]) tensor([0.4688, 0.1906, 0.2155, 0.1251]) -Greedy action tensor([ 0.5362, 0.0346, -0.0605, -0.0783]) tensor([0.3708, 0.2245, 0.2042, 0.2006]) -Greedy action tensor([ 0.2540, 0.0946, 0.0784, -0.2349]) tensor([0.3026, 0.2580, 0.2539, 0.1856]) -Greedy action tensor([ 0.6879, -0.0868, 0.1767, -0.2947]) tensor([0.4107, 0.1893, 0.2463, 0.1537]) -Greedy action tensor([ 0.4518, -0.1572, 0.0437, -0.4435]) tensor([0.3821, 0.2078, 0.2540, 0.1561]) -Greedy action tensor([ 0.5462, -0.1025, 0.1528, -0.4463]) tensor([0.3894, 0.2036, 0.2628, 0.1443]) -Greedy action tensor([ 0.9373, -0.6441, -0.1351, -0.4447]) tensor([0.5559, 0.1143, 0.1902, 0.1396]) -Greedy action tensor([ 0.6509, -0.4119, 0.0170, -0.2718]) tensor([0.4399, 0.1520, 0.2334, 0.1748]) -Greedy action tensor([ 0.2670, -0.1700, -0.1310, -0.4518]) tensor([0.3565, 0.2303, 0.2395, 0.1737]) -Greedy action tensor([ 0.4703, -0.1260, -0.0290, -0.3349]) tensor([0.3839, 0.2115, 0.2330, 0.1716]) -Greedy action tensor([ 0.3428, 0.0209, -0.0749, -0.1086]) tensor([0.3311, 0.2400, 0.2181, 0.2108]) -Greedy action tensor([ 0.9013, -0.6146, 0.0829, -0.3860]) tensor([0.5163, 0.1134, 0.2278, 0.1425]) -Greedy action tensor([ 0.2462, 0.2116, 0.0415, -0.2577]) tensor([0.2954, 0.2854, 0.2407, 0.1785]) -Greedy action tensor([ 0.5276, -0.0890, 0.0764, -0.1741]) tensor([0.3742, 0.2020, 0.2383, 0.1855]) -Greedy action tensor([ 0.1386, -0.2159, -0.0908, -0.1020]) tensor([0.3046, 0.2137, 0.2422, 0.2395]) -Greedy action tensor([ 0.6570, -0.3714, -0.0419, -0.4290]) tensor([0.4562, 0.1631, 0.2268, 0.1540]) -Greedy action tensor([ 0.4492, -0.1399, 0.0089, -0.2466]) tensor([0.3707, 0.2057, 0.2387, 0.1849]) -Greedy action tensor([ 6.4189e-01, 5.0068e-06, -1.1913e-01, -9.6189e-03]) tensor([0.3977, 0.2093, 0.1858, 0.2073]) -Greedy action tensor([ 0.5323, -0.1096, -0.0675, -0.1757]) tensor([0.3894, 0.2050, 0.2138, 0.1918]) -Greedy action tensor([ 0.4426, -0.0937, -0.1743, -0.1556]) tensor([0.3739, 0.2187, 0.2018, 0.2056]) -Greedy action tensor([ 0.4836, 0.1132, 0.1540, -0.4815]) tensor([0.3584, 0.2474, 0.2577, 0.1365]) -Greedy action tensor([ 0.7771, -0.4500, 0.0288, -0.4106]) tensor([0.4828, 0.1415, 0.2284, 0.1472]) -Greedy action tensor([ 0.2059, -0.0239, -0.1089, 0.0069]) tensor([0.2990, 0.2376, 0.2183, 0.2451]) -Greedy action tensor([ 0.3934, -0.2298, -0.0307, -0.2940]) tensor([0.3713, 0.1991, 0.2429, 0.1867]) -Greedy action tensor([ 0.4849, -0.0353, -0.1794, -0.0474]) tensor([0.3709, 0.2205, 0.1909, 0.2178]) -Greedy action tensor([ 0.7702, -0.5547, -0.0025, -0.6133]) tensor([0.5055, 0.1344, 0.2334, 0.1267]) -Greedy action tensor([ 0.7481, -0.5218, -0.0724, -0.5926]) tensor([0.5044, 0.1417, 0.2220, 0.1320]) -Greedy action tensor([ 0.9261, -0.3039, 0.0027, -0.3426]) tensor([0.5074, 0.1483, 0.2015, 0.1427]) -Greedy action tensor([ 0.5738, -0.3549, 0.0652, -0.3375]) tensor([0.4170, 0.1647, 0.2507, 0.1676]) -Greedy action tensor([ 0.6706, -0.3519, 0.0135, -0.3717]) tensor([0.4483, 0.1612, 0.2324, 0.1581]) -Greedy action tensor([ 0.8466, -0.5341, 0.0172, -0.3709]) tensor([0.5041, 0.1267, 0.2200, 0.1492]) -Greedy action tensor([ 0.6127, -0.2931, -0.0596, -0.1403]) tensor([0.4192, 0.1694, 0.2140, 0.1974]) -Greedy action tensor([ 0.9913, -0.6640, -0.0362, -0.4473]) tensor([0.5598, 0.1069, 0.2004, 0.1328]) -Greedy action tensor([ 0.7235, -0.7368, -0.1242, -0.2529]) tensor([0.4909, 0.1140, 0.2103, 0.1849]) -Greedy action tensor([ 0.5635, -0.3614, -0.1358, -0.4112]) tensor([0.4404, 0.1746, 0.2188, 0.1662]) -Greedy action tensor([ 0.3184, -0.2238, -0.0765, -0.3996]) tensor([0.3646, 0.2120, 0.2456, 0.1778]) -Greedy action tensor([ 0.3712, 0.0706, -0.1456, 0.0640]) tensor([0.3255, 0.2410, 0.1941, 0.2394]) -Greedy action tensor([ 0.8523, -0.5915, -0.0938, -0.4057]) tensor([0.5240, 0.1237, 0.2034, 0.1489]) -Greedy action tensor([ 0.6351, -0.6246, -0.0421, -0.5902]) tensor([0.4795, 0.1361, 0.2436, 0.1408]) -Greedy action tensor([ 0.6648, -0.3049, -0.0426, -0.3316]) tensor([0.4462, 0.1692, 0.2199, 0.1647]) -Greedy action tensor([ 0.8120, -0.3113, -0.0441, -0.1927]) tensor([0.4725, 0.1537, 0.2008, 0.1730]) -Greedy action tensor([ 0.5240, -0.1243, 0.0346, -0.2213]) tensor([0.3831, 0.2003, 0.2348, 0.1818]) -Greedy action tensor([ 0.4528, -0.0747, 0.0232, -0.1526]) tensor([0.3589, 0.2117, 0.2335, 0.1959]) -Greedy action tensor([ 0.5146, 0.1162, -0.1208, -0.1108]) tensor([0.3655, 0.2454, 0.1936, 0.1955]) -Greedy action tensor([ 0.4560, -0.0396, -0.0311, -0.0027]) tensor([0.3502, 0.2133, 0.2152, 0.2213]) -Greedy action tensor([ 0.8045, -0.5404, 0.0648, -0.4974]) tensor([0.4975, 0.1296, 0.2375, 0.1353]) -Greedy action tensor([ 0.7315, -0.5936, -0.0576, -0.3906]) tensor([0.4889, 0.1299, 0.2221, 0.1592]) -Greedy action tensor([ 0.8595, -0.7513, -0.2102, -0.9116]) tensor([0.5838, 0.1166, 0.2003, 0.0993]) -Greedy action tensor([ 0.3636, -0.0707, -0.0483, -0.2173]) tensor([0.3485, 0.2257, 0.2308, 0.1949]) -Greedy action tensor([ 0.5077, -0.0103, -0.0320, -0.2185]) tensor([0.3756, 0.2237, 0.2189, 0.1817]) -Greedy action tensor([ 0.5099, -0.1778, 0.0574, -0.2310]) tensor([0.3823, 0.1922, 0.2432, 0.1823]) -Greedy action tensor([ 0.7095, -0.4900, -0.2106, -0.3545]) tensor([0.4890, 0.1474, 0.1949, 0.1687]) -Greedy action tensor([ 0.4563, 0.0433, -0.1558, -0.1810]) tensor([0.3660, 0.2421, 0.1984, 0.1935]) -Greedy action tensor([ 0.1805, -0.0260, -0.0653, -0.1723]) tensor([0.3032, 0.2466, 0.2371, 0.2131]) -Greedy action tensor([ 0.6359, -0.5936, 0.1500, -0.7024]) tensor([0.4609, 0.1348, 0.2835, 0.1209]) -Greedy action tensor([ 0.4187, -0.3772, -0.1068, -0.1754]) tensor([0.3854, 0.1739, 0.2279, 0.2128]) -Greedy action tensor([ 0.5647, -0.3152, 0.1394, -0.3145]) tensor([0.4027, 0.1670, 0.2632, 0.1671]) -Greedy action tensor([ 0.6503, -0.2097, -0.1079, -0.1202]) tensor([0.4247, 0.1797, 0.1990, 0.1966]) -Greedy action tensor([ 0.3791, -0.0764, -0.0500, -0.2301]) tensor([0.3535, 0.2241, 0.2301, 0.1922]) -Greedy action tensor([ 0.3119, -0.0262, 0.0464, -0.6162]) tensor([0.3478, 0.2480, 0.2667, 0.1375]) -Greedy action tensor([ 5.4217e-01, -1.0424e-01, 1.3366e-04, -1.7076e-01]) tensor([0.3853, 0.2018, 0.2240, 0.1889]) -Greedy action tensor([ 0.4291, -0.1744, -0.2267, -0.2435]) tensor([0.3882, 0.2123, 0.2015, 0.1981]) -Greedy action tensor([ 0.5722, -0.3576, -0.0553, -0.4102]) tensor([0.4342, 0.1714, 0.2318, 0.1626]) -Greedy action tensor([-0.4421, -0.8881, -0.8886, 0.2090]) tensor([0.2382, 0.1525, 0.1524, 0.4568]) -Greedy action tensor([ 0.6137, -0.3466, 1.4796, 0.8711]) tensor([0.1979, 0.0757, 0.4704, 0.2560]) -Greedy action tensor([ 0.0757, 0.5014, -0.3165, -0.2875]) tensor([0.2563, 0.3923, 0.1731, 0.1782]) -Greedy action tensor([ 0.1945, -0.4799, 1.7886, 0.5056]) tensor([0.1282, 0.0653, 0.6314, 0.1750]) -Greedy action tensor([-0.0837, 0.2425, -0.2737, 0.6492]) tensor([0.1889, 0.2618, 0.1562, 0.3931]) -Greedy action tensor([-1.3284, -0.0730, 0.1867, -0.5184]) tensor([0.0884, 0.3104, 0.4024, 0.1988]) -Greedy action tensor([ 0.7322, 0.3775, -0.8902, 0.5562]) tensor([0.3653, 0.2562, 0.0721, 0.3063]) -Greedy action tensor([ 0.2480, 0.1706, 0.2175, -0.5505]) tensor([0.2989, 0.2766, 0.2899, 0.1345]) -Greedy action tensor([ 1.1279, -0.2493, 0.5915, 0.6084]) tensor([0.4112, 0.1037, 0.2405, 0.2446]) -Greedy action tensor([ 0.2815, -1.2876, 1.3901, -0.2605]) tensor([0.2075, 0.0432, 0.6287, 0.1207]) -Greedy action tensor([ 1.5461, -0.4746, 1.0968, 1.5420]) tensor([0.3615, 0.0479, 0.2306, 0.3600]) -Greedy action tensor([ 0.5841, -0.0275, 0.2652, 1.1436]) tensor([0.2488, 0.1350, 0.1809, 0.4354]) -Greedy action tensor([ 0.9109, -0.2322, -0.2847, 0.9058]) tensor([0.3822, 0.1219, 0.1156, 0.3803]) -Greedy action tensor([-0.5748, -0.5592, -0.8957, 2.4320]) tensor([0.0435, 0.0442, 0.0316, 0.8806]) -Greedy action tensor([-0.8130, -0.8446, 0.2076, 0.2372]) tensor([0.1315, 0.1275, 0.3650, 0.3760]) -Greedy action tensor([ 0.9215, -1.0566, 0.4547, -0.2274]) tensor([0.4802, 0.0664, 0.3011, 0.1522]) -Greedy action tensor([-0.1588, 1.7316, 1.0363, -0.1205]) tensor([0.0836, 0.5534, 0.2761, 0.0868]) -Greedy action tensor([ 0.0209, 0.4632, 1.3880, -0.6909]) tensor([0.1434, 0.2233, 0.5629, 0.0704]) -Greedy action tensor([-0.6616, -2.9033, -0.0180, 0.3882]) tensor([0.1705, 0.0181, 0.3244, 0.4870]) -Greedy action tensor([ 1.0353, 0.7728, -0.0884, 0.3013]) tensor([0.3885, 0.2988, 0.1263, 0.1865]) -Greedy action tensor([ 1.0609, -1.8575, -0.0901, 1.0220]) tensor([0.4288, 0.0232, 0.1356, 0.4124]) -Greedy action tensor([-0.0908, -0.1988, -0.3485, 2.1494]) tensor([0.0829, 0.0744, 0.0641, 0.7787]) -Greedy action tensor([-0.3884, -0.5076, 1.4678, -0.6001]) tensor([0.1099, 0.0976, 0.7035, 0.0890]) -Greedy action tensor([-0.2454, 0.2597, 0.5036, 0.3571]) tensor([0.1515, 0.2511, 0.3205, 0.2768]) -Greedy action tensor([-1.4011, 0.1233, -0.8145, -1.0816]) tensor([0.1141, 0.5238, 0.2051, 0.1570]) -Greedy action tensor([0.6496, 0.7719, 0.2881, 0.1838]) tensor([0.2895, 0.3271, 0.2017, 0.1817]) -Greedy action tensor([-0.0136, 0.4345, 0.3231, 0.1811]) tensor([0.1930, 0.3022, 0.2703, 0.2345]) -Greedy action tensor([-0.4974, -1.5778, 0.6062, 0.0575]) tensor([0.1640, 0.0557, 0.4946, 0.2857]) -Greedy action tensor([ 0.3052, -1.2346, 1.1897, 0.1973]) tensor([0.2206, 0.0473, 0.5341, 0.1980]) -Greedy action tensor([ 0.6754, 0.9606, -0.9050, 0.3344]) tensor([0.3080, 0.4096, 0.0634, 0.2190]) -Greedy action tensor([ 0.3900, -0.2997, -0.3963, 0.8298]) tensor([0.2849, 0.1430, 0.1298, 0.4423]) -Greedy action tensor([-0.5234, -0.1974, 1.3488, 0.4179]) tensor([0.0873, 0.1210, 0.5678, 0.2238]) -Greedy action tensor([ 1.6215, -0.4324, 0.7266, 1.0363]) tensor([0.4776, 0.0612, 0.1952, 0.2660]) -Greedy action tensor([ 0.1090, -1.2348, -0.0086, 0.0464]) tensor([0.3237, 0.0844, 0.2878, 0.3041]) -Greedy action tensor([ 0.4556, -0.0923, 2.4896, 0.5227]) tensor([0.0972, 0.0562, 0.7428, 0.1039]) -Greedy action tensor([ 0.7152, -1.0128, 0.6850, -0.1837]) tensor([0.3914, 0.0695, 0.3798, 0.1593]) -Greedy action tensor([ 0.5568, 1.1524, -0.0473, -0.1976]) tensor([0.2610, 0.4735, 0.1427, 0.1228]) -Greedy action tensor([0.1154, 0.5072, 0.9075, 0.3374]) tensor([0.1685, 0.2493, 0.3720, 0.2103]) -Greedy action tensor([ 0.4769, -1.6463, -0.4365, -0.1907]) tensor([0.4917, 0.0588, 0.1973, 0.2522]) -Greedy action tensor([ 0.4635, 0.8445, 0.3843, -0.4213]) tensor([0.2631, 0.3851, 0.2431, 0.1086]) -Greedy action tensor([-0.2463, -1.3991, -1.0804, 0.9835]) tensor([0.1934, 0.0611, 0.0840, 0.6615]) -Greedy action tensor([-0.1245, -2.1549, -0.1573, 1.0161]) tensor([0.1913, 0.0251, 0.1851, 0.5985]) -Greedy action tensor([ 1.1232, 0.2038, -0.1404, -0.0760]) tensor([0.5043, 0.2011, 0.1425, 0.1520]) -Greedy action tensor([ 0.6390, 0.7004, -0.1360, 1.1716]) tensor([0.2365, 0.2515, 0.1090, 0.4029]) -Greedy action tensor([ 1.0178, -1.1351, 2.7570, -0.3205]) tensor([0.1414, 0.0164, 0.8050, 0.0371]) -Greedy action tensor([ 1.5113, -1.4129, 1.1500, 0.7787]) tensor([0.4482, 0.0241, 0.3123, 0.2154]) -Greedy action tensor([ 0.8510, -1.0042, 1.4563, 0.6149]) tensor([0.2647, 0.0414, 0.4849, 0.2090]) -Greedy action tensor([ 1.1868, 0.8522, -0.8374, 0.6185]) tensor([0.4142, 0.2964, 0.0547, 0.2346]) -Greedy action tensor([-0.5404, 1.1181, 0.3450, 0.4158]) tensor([0.0887, 0.4657, 0.2149, 0.2307]) -Greedy action tensor([0.5941, 0.3531, 0.8933, 0.9475]) tensor([0.2194, 0.1724, 0.2959, 0.3124]) -Greedy action tensor([-0.1558, -0.4057, -1.0357, 0.6277]) tensor([0.2282, 0.1777, 0.0946, 0.4995]) -Greedy action tensor([-0.7781, -0.2936, 1.0391, -0.1108]) tensor([0.0932, 0.1513, 0.5738, 0.1817]) -Greedy action tensor([-1.1112, -1.2351, 0.2310, 0.3455]) tensor([0.1000, 0.0883, 0.3826, 0.4291]) -Greedy action tensor([ 0.5627, 0.9308, -0.6134, 0.2648]) tensor([0.2861, 0.4134, 0.0882, 0.2124]) -Greedy action tensor([ 0.5549, -0.5684, 0.6978, 0.4192]) tensor([0.2984, 0.0970, 0.3442, 0.2605]) -Greedy action tensor([ 0.4017, 0.6286, -0.1900, 0.4104]) tensor([0.2620, 0.3287, 0.1450, 0.2643]) -Greedy action tensor([ 0.1099, -2.5274, -0.3141, 0.3876]) tensor([0.3283, 0.0235, 0.2148, 0.4334]) -Greedy action tensor([1.4419, 0.2244, 0.6426, 0.1300]) tensor([0.4963, 0.1469, 0.2232, 0.1337]) -Greedy action tensor([-0.1098, -0.8481, -0.7788, 0.8199]) tensor([0.2210, 0.1056, 0.1132, 0.5601]) -Greedy action tensor([ 0.4735, -0.4065, 1.2767, -0.6232]) tensor([0.2512, 0.1042, 0.5608, 0.0839]) -Greedy action tensor([ 1.4971, -0.1896, -0.2275, 0.5547]) tensor([0.5704, 0.1056, 0.1017, 0.2223]) -Greedy action tensor([ 0.2980, -0.0469, 0.4198, 1.7217]) tensor([0.1431, 0.1013, 0.1616, 0.5940]) -Greedy action tensor([1.5834, 0.3823, 0.0724, 1.4055]) tensor([0.4240, 0.1276, 0.0936, 0.3549]) -Greedy action tensor([1.5708, 0.0848, 0.0787, 0.4887]) tensor([0.5586, 0.1264, 0.1256, 0.1893]) -Greedy action tensor([-0.5935, -1.4012, -0.1617, -0.0714]) tensor([0.2141, 0.0954, 0.3297, 0.3608]) -Greedy action tensor([ 0.7743, 0.9698, -0.5132, 0.7630]) tensor([0.2873, 0.3493, 0.0793, 0.2841]) -Greedy action tensor([ 0.6203, -0.3200, 1.2288, 1.1499]) tensor([0.2030, 0.0793, 0.3730, 0.3447]) -Greedy action tensor([ 0.3750, -1.7047, -0.2174, 0.4455]) tensor([0.3635, 0.0454, 0.2010, 0.3900]) -Greedy action tensor([-0.9783, -1.4992, 0.5671, 0.3809]) tensor([0.0983, 0.0584, 0.4608, 0.3825]) -Greedy action tensor([-0.5275, -1.0499, -0.2034, -0.8124]) tensor([0.2682, 0.1591, 0.3709, 0.2017]) -Greedy action tensor([ 0.9668, -1.4059, 1.3660, 0.1887]) tensor([0.3286, 0.0306, 0.4898, 0.1509]) -Greedy action tensor([ 0.4919, -0.1537, 0.4761, 0.9317]) tensor([0.2462, 0.1291, 0.2424, 0.3823]) -Greedy action tensor([ 0.8618, -1.3469, -0.6408, 1.0467]) tensor([0.3944, 0.0433, 0.0878, 0.4745]) -Greedy action tensor([-0.5161, -1.0955, -0.0504, 0.7045]) tensor([0.1528, 0.0856, 0.2435, 0.5180]) -Greedy action tensor([ 1.4377, -0.9914, -0.6290, 0.3125]) tensor([0.6496, 0.0572, 0.0822, 0.2109]) -Greedy action tensor([ 1.2851, -1.9817, -0.3431, 0.4181]) tensor([0.6044, 0.0230, 0.1186, 0.2540]) -Greedy action tensor([0.6667, 0.5192, 0.6704, 0.8370]) tensor([0.2468, 0.2129, 0.2477, 0.2926]) -Greedy action tensor([-0.5049, -1.2297, 0.2083, 2.2018]) tensor([0.0540, 0.0262, 0.1103, 0.8095]) -Greedy action tensor([ 0.7390, 0.1984, -0.1610, 0.9160]) tensor([0.3142, 0.1830, 0.1278, 0.3750]) -Greedy action tensor([-0.9473, -0.4854, 0.2831, 0.0224]) tensor([0.1156, 0.1835, 0.3958, 0.3050]) -Greedy action tensor([ 1.2340e+00, -3.5652e-02, -6.9344e-01, -2.9790e-04]) tensor([0.5822, 0.1636, 0.0847, 0.1695]) -Greedy action tensor([ 0.0556, -1.2575, -1.0385, 1.5640]) tensor([0.1633, 0.0439, 0.0547, 0.7381]) -Greedy action tensor([ 0.9176, -0.6813, -0.2063, -0.2974]) tensor([0.5483, 0.1108, 0.1782, 0.1627]) -Greedy action tensor([ 1.1035, -0.2218, -0.7636, 0.2763]) tensor([0.5383, 0.1431, 0.0832, 0.2354]) -Greedy action tensor([ 1.2806, -0.4511, -0.3028, -0.0017]) tensor([0.6025, 0.1066, 0.1237, 0.1671]) -Greedy action tensor([ 1.5195, 0.0861, -0.2078, 0.0339]) tensor([0.6088, 0.1452, 0.1082, 0.1378]) -Greedy action tensor([ 1.6923, -0.7885, -0.4496, 0.9548]) tensor([0.5955, 0.0498, 0.0699, 0.2848]) -Greedy action tensor([ 0.8653, -0.6338, -0.5625, 0.5853]) tensor([0.4506, 0.1006, 0.1081, 0.3406]) -Greedy action tensor([ 1.1288, -0.5564, -0.1059, 0.1980]) tensor([0.5346, 0.0991, 0.1555, 0.2108]) -Greedy action tensor([ 0.9078, -0.3289, -0.1923, 0.1270]) tensor([0.4805, 0.1395, 0.1599, 0.2201]) -Greedy action tensor([ 1.3320, -0.3372, 0.1061, 0.1715]) tensor([0.5570, 0.1049, 0.1635, 0.1745]) -Greedy action tensor([ 1.1508, -0.4768, -0.1392, 0.3514]) tensor([0.5205, 0.1022, 0.1433, 0.2340]) -Greedy action tensor([ 1.2847, -0.6502, -0.3284, 0.2026]) tensor([0.5943, 0.0858, 0.1184, 0.2014]) -Greedy action tensor([ 1.3521, -0.3487, -0.3061, 0.2426]) tensor([0.5873, 0.1072, 0.1119, 0.1936]) -Greedy action tensor([ 1.6691, 0.0167, -0.1787, 0.1777]) tensor([0.6352, 0.1217, 0.1001, 0.1430]) -Greedy action tensor([ 1.9692, -0.7539, -0.0625, 0.1868]) tensor([0.7326, 0.0481, 0.0961, 0.1232]) -Greedy action tensor([ 1.2765, -0.2639, -0.4084, 0.1797]) tensor([0.5768, 0.1236, 0.1070, 0.1926]) -Greedy action tensor([ 1.3160, -0.7418, -0.1452, 0.2788]) tensor([0.5834, 0.0745, 0.1353, 0.2068]) -Greedy action tensor([ 1.6275, -0.6982, -0.0055, 0.0845]) tensor([0.6637, 0.0648, 0.1296, 0.1418]) -Greedy action tensor([ 0.6106, -0.2250, -0.4578, 0.4454]) tensor([0.3810, 0.1652, 0.1309, 0.3230]) -Greedy action tensor([ 0.8015, -0.3973, -0.2157, 0.2571]) tensor([0.4458, 0.1344, 0.1612, 0.2586]) -Greedy action tensor([ 0.9024, -0.4305, -0.1631, 0.3074]) tensor([0.4630, 0.1221, 0.1595, 0.2554]) -Greedy action tensor([ 1.0459, -0.1358, -0.1701, 0.0647]) tensor([0.5055, 0.1551, 0.1499, 0.1895]) -Greedy action tensor([ 1.3828, -0.5882, -0.1427, 0.2421]) tensor([0.5965, 0.0831, 0.1297, 0.1906]) -Greedy action tensor([ 0.8254, -0.6569, -0.1155, 0.1032]) tensor([0.4755, 0.1080, 0.1856, 0.2309]) -Greedy action tensor([ 1.2727, -0.6840, -0.3025, 0.0915]) tensor([0.6041, 0.0854, 0.1250, 0.1854]) -Greedy action tensor([ 1.2088, -0.5340, 0.0296, 0.1136]) tensor([0.5504, 0.0963, 0.1692, 0.1841]) -Greedy action tensor([ 0.7031, -0.3637, -0.0710, 0.0613]) tensor([0.4289, 0.1476, 0.1978, 0.2258]) -Greedy action tensor([ 0.9965, -0.3618, -0.2220, 0.1969]) tensor([0.4994, 0.1284, 0.1477, 0.2245]) -Greedy action tensor([ 0.7422, -0.0312, -0.1277, 0.0642]) tensor([0.4188, 0.1932, 0.1755, 0.2126]) -Greedy action tensor([ 1.2315, -0.8307, -0.3985, 1.0923]) tensor([0.4560, 0.0580, 0.0893, 0.3967]) -Greedy action tensor([ 1.3283, -0.7189, -0.2430, 0.4101]) tensor([0.5760, 0.0744, 0.1197, 0.2300]) -Greedy action tensor([ 1.7588, -0.8900, -0.2104, 0.3575]) tensor([0.6865, 0.0486, 0.0958, 0.1691]) -Greedy action tensor([ 1.4250, -0.7045, -0.3528, 0.4459]) tensor([0.6011, 0.0715, 0.1016, 0.2258]) -Greedy action tensor([ 1.3217, -0.6680, -0.3427, 0.6891]) tensor([0.5384, 0.0736, 0.1019, 0.2860]) -Greedy action tensor([ 1.7517, -0.9008, -0.1289, 0.3159]) tensor([0.6845, 0.0482, 0.1044, 0.1629]) -Greedy action tensor([ 1.0215, -0.4291, -0.3167, -0.0142]) tensor([0.5401, 0.1266, 0.1417, 0.1917]) -Greedy action tensor([ 1.8679, -0.8322, -0.3412, 0.5230]) tensor([0.6956, 0.0467, 0.0764, 0.1813]) -Greedy action tensor([ 1.4385, -0.4237, -0.2242, 0.2599]) tensor([0.6051, 0.0940, 0.1147, 0.1862]) -Greedy action tensor([ 1.7040, -0.7344, -0.0664, 0.5721]) tensor([0.6329, 0.0553, 0.1078, 0.2041]) -Greedy action tensor([ 1.2323, -0.6712, -0.2928, 0.6249]) tensor([0.5232, 0.0780, 0.1138, 0.2850]) -Greedy action tensor([ 1.1781, -0.1166, -0.3526, 0.3453]) tensor([0.5194, 0.1423, 0.1124, 0.2259]) -Greedy action tensor([ 0.8510, -0.3030, 0.2612, -0.0744]) tensor([0.4413, 0.1392, 0.2447, 0.1749]) -Greedy action tensor([ 1.0872, -0.6141, -0.2326, 0.2917]) tensor([0.5261, 0.0960, 0.1406, 0.2374]) -Greedy action tensor([ 0.8165, -0.3553, -0.0913, 0.1482]) tensor([0.4493, 0.1392, 0.1812, 0.2303]) -Greedy action tensor([ 1.1707, -0.0018, -0.2075, -0.0768]) tensor([0.5409, 0.1675, 0.1363, 0.1554]) -Greedy action tensor([ 1.1380, -0.5337, -0.6007, 0.7901]) tensor([0.4831, 0.0908, 0.0849, 0.3412]) -Greedy action tensor([ 1.5684, -0.6562, -0.3244, 0.3451]) tensor([0.6439, 0.0696, 0.0970, 0.1895]) -Greedy action tensor([ 1.2985, -0.7652, -0.2350, 0.1315]) tensor([0.6046, 0.0768, 0.1304, 0.1882]) -Greedy action tensor([ 1.0050, -0.7521, -0.3327, 0.3448]) tensor([0.5124, 0.0884, 0.1345, 0.2648]) -Greedy action tensor([ 1.1121, -0.6134, -0.0913, 0.3703]) tensor([0.5116, 0.0911, 0.1536, 0.2437]) -Greedy action tensor([ 1.2057, -0.1998, -0.1681, 0.0377]) tensor([0.5527, 0.1355, 0.1399, 0.1719]) -Greedy action tensor([ 1.7307, 0.1937, -0.1587, 0.0963]) tensor([0.6405, 0.1377, 0.0968, 0.1249]) -Greedy action tensor([ 1.9603, -1.3589, -0.3721, 0.9044]) tensor([0.6752, 0.0244, 0.0655, 0.2349]) -Greedy action tensor([ 0.5029, -0.1502, -0.0622, 0.1722]) tensor([0.3562, 0.1854, 0.2025, 0.2559]) -Greedy action tensor([ 1.5903, -0.6178, -0.2565, 0.3465]) tensor([0.6427, 0.0706, 0.1014, 0.1853]) -Greedy action tensor([ 0.7757, -0.5384, -0.1407, 0.2905]) tensor([0.4378, 0.1176, 0.1751, 0.2695]) -Greedy action tensor([ 0.8658, -0.3697, -0.0847, -0.2260]) tensor([0.4968, 0.1444, 0.1920, 0.1667]) -Greedy action tensor([ 1.4033, -0.2925, -0.0792, 0.1066]) tensor([0.5938, 0.1089, 0.1348, 0.1624]) -Greedy action tensor([ 1.0279, -0.6750, -0.2814, 0.2855]) tensor([0.5186, 0.0945, 0.1400, 0.2468]) -Greedy action tensor([ 1.8852, -0.8844, -0.3293, 0.3946]) tensor([0.7158, 0.0449, 0.0782, 0.1612]) -Greedy action tensor([ 1.9783, -0.6745, -0.3097, 0.5953]) tensor([0.7029, 0.0495, 0.0713, 0.1763]) -Greedy action tensor([ 1.7038, -0.2300, -0.3932, 0.4141]) tensor([0.6482, 0.0937, 0.0796, 0.1785]) -Greedy action tensor([ 1.7730, -0.5664, -0.4069, 0.3348]) tensor([0.6912, 0.0666, 0.0781, 0.1641]) -Greedy action tensor([ 1.6649, -0.7577, -0.6290, 0.3192]) tensor([0.6897, 0.0612, 0.0696, 0.1796]) -Greedy action tensor([ 1.0852, -0.6101, -0.2130, 0.2311]) tensor([0.5313, 0.0975, 0.1451, 0.2262]) -Greedy action tensor([ 1.3427, -0.0149, 0.0414, 0.2683]) tensor([0.5345, 0.1375, 0.1455, 0.1825]) -Greedy action tensor([ 1.3163, -0.4174, -0.2456, 0.5643]) tensor([0.5383, 0.0951, 0.1129, 0.2538]) -Greedy action tensor([ 0.8517, -0.0133, 0.0520, -0.1976]) tensor([0.4503, 0.1896, 0.2024, 0.1577]) -Greedy action tensor([ 1.7164, -1.0120, -0.2326, 0.3647]) tensor([0.6819, 0.0445, 0.0971, 0.1765]) -Greedy action tensor([ 0.9049, -0.6160, -0.0387, 0.2355]) tensor([0.4718, 0.1031, 0.1836, 0.2415]) -Greedy action tensor([ 2.0008, -0.8404, -0.1596, 0.5238]) tensor([0.7133, 0.0416, 0.0822, 0.1628]) -Greedy action tensor([ 1.3739, -0.2400, -0.2083, 0.2123]) tensor([0.5822, 0.1159, 0.1197, 0.1822]) -Greedy action tensor([ 1.9619, -1.1608, -0.3334, 0.5176]) tensor([0.7243, 0.0319, 0.0730, 0.1709]) -Greedy action tensor([ 1.7633, -0.5683, -0.4003, 0.7672]) tensor([0.6324, 0.0614, 0.0727, 0.2335]) -Greedy action tensor([ 0.8984, -0.6827, -0.3386, 0.7532]) tensor([0.4236, 0.0872, 0.1229, 0.3663]) -Greedy action tensor([ 1.6875, -0.5983, -0.1897, 0.4403]) tensor([0.6485, 0.0659, 0.0992, 0.1863]) -Greedy action tensor([ 1.6160, -0.5419, -0.5980, 0.4564]) tensor([0.6500, 0.0751, 0.0710, 0.2039]) -Greedy action tensor([ 0.7868, -0.2691, -0.4015, 0.4054]) tensor([0.4282, 0.1490, 0.1305, 0.2924]) -Greedy action tensor([ 1.0889, -0.5264, -0.4259, 0.6199]) tensor([0.4892, 0.0973, 0.1075, 0.3060]) -Greedy action tensor([ 1.0332, -0.0514, -0.1486, 0.0912]) tensor([0.4915, 0.1661, 0.1508, 0.1916]) -Greedy action tensor([ 0.7912, -0.2881, -0.5348, 0.6131]) tensor([0.4095, 0.1391, 0.1087, 0.3427]) -Greedy action tensor([ 1.3957, -0.4093, -0.0699, 0.3090]) tensor([0.5771, 0.0949, 0.1333, 0.1947]) -Greedy action tensor([-1.8813, -0.4080, 0.6275, -0.1386]) tensor([0.0428, 0.1867, 0.5260, 0.2445]) -Greedy action tensor([-1.7438, -0.0187, 0.5144, -0.0030]) tensor([0.0457, 0.2565, 0.4372, 0.2606]) -Greedy action tensor([-1.7442, -0.4420, 0.5716, -0.0757]) tensor([0.0497, 0.1828, 0.5038, 0.2637]) -Greedy action tensor([-0.5626, 0.4985, 0.5366, 1.0261]) tensor([0.0848, 0.2451, 0.2546, 0.4154]) -Greedy action tensor([-1.6713, -0.0309, 0.4641, -0.0264]) tensor([0.0505, 0.2605, 0.4273, 0.2617]) -Greedy action tensor([-0.7148, -0.2271, 0.4783, 0.5345]) tensor([0.1062, 0.1730, 0.3503, 0.3705]) -Greedy action tensor([-1.7906, -0.4377, 0.5797, -0.0862]) tensor([0.0475, 0.1836, 0.5079, 0.2610]) -Greedy action tensor([-1.7755, -0.4687, 0.7899, 0.2740]) tensor([0.0393, 0.1451, 0.5108, 0.3049]) -Greedy action tensor([-0.5210, -0.5115, 0.1870, 0.2504]) tensor([0.1612, 0.1628, 0.3273, 0.3487]) -Greedy action tensor([-1.8989, -0.4433, 0.6414, -0.1578]) tensor([0.0422, 0.1811, 0.5357, 0.2409]) -Greedy action tensor([-1.3941, -0.7563, 0.8292, 0.7543]) tensor([0.0483, 0.0914, 0.4462, 0.4140]) -Greedy action tensor([-1.8060, -0.4818, 0.5657, -0.1085]) tensor([0.0478, 0.1796, 0.5118, 0.2608]) -Greedy action tensor([-1.9147, -0.4687, 0.6564, -0.1564]) tensor([0.0414, 0.1760, 0.5421, 0.2405]) -Greedy action tensor([-1.8723, -0.4445, 0.6363, -0.1256]) tensor([0.0431, 0.1798, 0.5298, 0.2473]) -Greedy action tensor([-1.0349, -0.6892, 0.5388, 0.7269]) tensor([0.0766, 0.1082, 0.3694, 0.4459]) -Greedy action tensor([-1.5840, -0.2780, 0.6624, 0.4179]) tensor([0.0464, 0.1713, 0.4387, 0.3436]) -Greedy action tensor([-0.4307, 0.2306, 0.7689, 1.4130]) tensor([0.0795, 0.1540, 0.2639, 0.5025]) -Greedy action tensor([-1.0565, -0.5829, 0.9834, 1.3595]) tensor([0.0465, 0.0747, 0.3577, 0.5211]) -Greedy action tensor([-1.6271, -0.4984, 0.5035, 0.0792]) tensor([0.0555, 0.1716, 0.4672, 0.3057]) -Greedy action tensor([-1.3187, -0.6625, 1.3126, 1.3502]) tensor([0.0320, 0.0617, 0.4446, 0.4617]) -Greedy action tensor([-1.8820, -0.4051, 0.6459, -0.1227]) tensor([0.0422, 0.1847, 0.5282, 0.2449]) -Greedy action tensor([-1.9000, -0.3846, 0.6548, -0.1276]) tensor([0.0411, 0.1873, 0.5295, 0.2421]) -Greedy action tensor([-1.7395, -0.4781, 0.8121, 0.2798]) tensor([0.0402, 0.1418, 0.5153, 0.3026]) -Greedy action tensor([-1.6094, -0.5307, 0.4839, 0.0243]) tensor([0.0582, 0.1712, 0.4723, 0.2983]) -Greedy action tensor([-1.8827, -0.4250, 0.6448, -0.1233]) tensor([0.0423, 0.1818, 0.5300, 0.2459]) -Greedy action tensor([-1.7994, -1.0397, 0.0642, -0.5586]) tensor([0.0767, 0.1639, 0.4943, 0.2651]) -Greedy action tensor([-0.7691, 0.9011, 0.0675, 0.4042]) tensor([0.0844, 0.4482, 0.1947, 0.2727]) -Greedy action tensor([-0.2822, 0.6524, 0.7173, 1.2987]) tensor([0.0899, 0.2289, 0.2443, 0.4369]) -Greedy action tensor([-0.9450, -0.4228, 0.4330, 0.9228]) tensor([0.0762, 0.1284, 0.3022, 0.4932]) -Greedy action tensor([-1.8494, -0.8152, 0.4704, -0.0946]) tensor([0.0506, 0.1423, 0.5146, 0.2925]) -Greedy action tensor([-1.3981, -0.4098, 0.5670, 0.5043]) tensor([0.0571, 0.1533, 0.4072, 0.3824]) -Greedy action tensor([-1.2853, -0.6449, 0.3321, 0.1529]) tensor([0.0823, 0.1561, 0.4148, 0.3467]) -Greedy action tensor([-1.0018, -0.4320, 0.8318, 1.3270]) tensor([0.0518, 0.0917, 0.3243, 0.5322]) -Greedy action tensor([-1.8264, -0.4774, 0.5976, -0.0931]) tensor([0.0459, 0.1767, 0.5178, 0.2596]) -Greedy action tensor([-1.9789, -0.7976, 0.5376, -0.1496]) tensor([0.0437, 0.1425, 0.5415, 0.2724]) -Greedy action tensor([-0.3906, -0.4954, 0.6395, 1.2506]) tensor([0.1014, 0.0913, 0.2840, 0.5233]) -Greedy action tensor([-1.9023, -0.3874, 0.6395, -0.1422]) tensor([0.0416, 0.1890, 0.5278, 0.2416]) -Greedy action tensor([-1.8547, -0.3500, 0.6090, -0.1141]) tensor([0.0436, 0.1962, 0.5119, 0.2484]) -Greedy action tensor([-1.9039, -0.4305, 0.6460, -0.1526]) tensor([0.0418, 0.1824, 0.5351, 0.2408]) -Greedy action tensor([-0.8246, 0.3068, 0.2053, -0.1039]) tensor([0.1117, 0.3461, 0.3127, 0.2295]) -Greedy action tensor([-1.9343, -0.8887, 0.2961, -0.2029]) tensor([0.0532, 0.1514, 0.4949, 0.3005]) -Greedy action tensor([-1.0187, -0.6010, 0.2036, 0.3455]) tensor([0.1018, 0.1545, 0.3455, 0.3982]) -Greedy action tensor([-1.6939, -0.4844, 0.5448, -0.0336]) tensor([0.0527, 0.1765, 0.4939, 0.2770]) -Greedy action tensor([-1.8230, -0.4750, 0.6021, -0.0941]) tensor([0.0459, 0.1767, 0.5188, 0.2586]) -Greedy action tensor([-1.9130, -0.3648, 0.6322, -0.1548]) tensor([0.0412, 0.1939, 0.5256, 0.2393]) -Greedy action tensor([-1.1866, -0.5988, 0.2556, 0.3291]) tensor([0.0863, 0.1554, 0.3652, 0.3931]) -Greedy action tensor([-0.8397, -0.3231, 0.3496, 0.9604]) tensor([0.0833, 0.1396, 0.2735, 0.5037]) -Greedy action tensor([-1.3949, -0.5046, 1.2343, 1.2102]) tensor([0.0324, 0.0790, 0.4496, 0.4389]) -Greedy action tensor([-1.8443, -0.0120, 0.5518, -0.0843]) tensor([0.0416, 0.2599, 0.4567, 0.2418]) -Greedy action tensor([-0.9967, -0.5057, 0.5649, 1.1998]) tensor([0.0610, 0.0997, 0.2908, 0.5486]) -Greedy action tensor([-1.8341, -0.5646, 0.9802, 0.3106]) tensor([0.0336, 0.1195, 0.5601, 0.2868]) -Greedy action tensor([-1.8012, -0.4072, 0.6435, -0.2661]) tensor([0.0472, 0.1901, 0.5437, 0.2190]) -Greedy action tensor([-1.6966, -0.5365, 0.6063, 0.0432]) tensor([0.0503, 0.1604, 0.5029, 0.2864]) -Greedy action tensor([-1.0925, -0.6392, 0.2215, 0.3741]) tensor([0.0941, 0.1480, 0.3501, 0.4078]) -Greedy action tensor([-1.8224, -0.5064, 1.3292, 0.8592]) tensor([0.0234, 0.0873, 0.5473, 0.3420]) -Greedy action tensor([-1.0966, -0.7258, 0.3984, 0.7377]) tensor([0.0759, 0.1100, 0.3386, 0.4754]) -Greedy action tensor([-0.7520, -0.3165, 0.7351, 1.4608]) tensor([0.0621, 0.0959, 0.2746, 0.5674]) -Greedy action tensor([-1.8629, -0.7194, 0.1891, -0.2652]) tensor([0.0593, 0.1861, 0.4616, 0.2930]) -Greedy action tensor([-1.8751, -0.4280, 0.6324, -0.1223]) tensor([0.0429, 0.1825, 0.5269, 0.2477]) -Greedy action tensor([-1.8341, -0.4727, 0.6300, -0.0522]) tensor([0.0443, 0.1727, 0.5201, 0.2629]) -Greedy action tensor([-1.8937, -0.4670, 0.6462, -0.1374]) tensor([0.0423, 0.1762, 0.5364, 0.2450]) -Greedy action tensor([-1.0834, -0.2828, 0.6121, -0.6346]) tensor([0.0976, 0.2174, 0.5320, 0.1529]) -Greedy action tensor([-0.7566, 0.0991, -0.2491, 0.9730]) tensor([0.0939, 0.2209, 0.1559, 0.5293]) -Greedy action tensor([-1.0650, 0.7250, 0.1022, 0.2264]) tensor([0.0722, 0.4328, 0.2321, 0.2628]) -Greedy action tensor([-1.0822, -0.5888, 0.3068, 0.4036]) tensor([0.0904, 0.1480, 0.3624, 0.3992]) -Greedy action tensor([-1.4272, -0.1606, 0.1906, -0.2228]) tensor([0.0774, 0.2746, 0.3901, 0.2580]) -Greedy action tensor([-1.9112, -0.4697, 0.6781, -0.1305]) tensor([0.0408, 0.1727, 0.5441, 0.2424]) -Greedy action tensor([-0.6100, -0.6378, 0.1564, 0.0692]) tensor([0.1640, 0.1595, 0.3530, 0.3235]) -Greedy action tensor([-1.0839, -0.6190, 0.6679, 1.1634]) tensor([0.0561, 0.0893, 0.3235, 0.5310]) -Greedy action tensor([-0.8449, -0.2467, 0.3385, 0.7356]) tensor([0.0914, 0.1662, 0.2984, 0.4439]) -Greedy action tensor([-1.7784, -0.4909, 0.5723, -0.0596]) tensor([0.0483, 0.1751, 0.5070, 0.2695]) -Greedy action tensor([-1.4970, -0.6224, 1.2183, 1.0379]) tensor([0.0321, 0.0770, 0.4855, 0.4053]) -Greedy action tensor([-1.2713, -0.4627, 0.8712, 1.0907]) tensor([0.0447, 0.1003, 0.3808, 0.4742]) -Greedy action tensor([-1.8476, -0.4682, 1.2721, 0.8418]) tensor([0.0236, 0.0938, 0.5348, 0.3478]) -Greedy action tensor([-0.9253, -0.6535, 0.2643, 0.2104]) tensor([0.1148, 0.1506, 0.3772, 0.3574]) -Greedy action tensor([-1.2940, -0.5588, 0.3228, 0.1950]) tensor([0.0796, 0.1661, 0.4012, 0.3530]) -Greedy action tensor([-1.2751, -0.5684, 0.4961, 0.7417]) tensor([0.0609, 0.1235, 0.3580, 0.4577]) -Greedy action tensor([-1.7987, -0.2669, 0.5715, -0.1199]) tensor([0.0461, 0.2134, 0.4934, 0.2471]) -Greedy action tensor([-1.9029, -0.3901, 0.6548, -0.1442]) tensor([0.0412, 0.1872, 0.5322, 0.2394]) -Greedy action tensor([-1.7236, -0.4119, 0.6488, 0.0602]) tensor([0.0468, 0.1736, 0.5014, 0.2783]) -Greedy action tensor([-0.3128, 0.1843, 0.6897, 1.6141]) tensor([0.0817, 0.1343, 0.2227, 0.5612]) -Greedy action tensor([ 0.2274, -0.1915, -0.0472, -0.3602]) tensor([0.3363, 0.2212, 0.2556, 0.1869]) -Greedy action tensor([ 0.4970, -0.2660, -0.1025, -0.2356]) tensor([0.4006, 0.1868, 0.2200, 0.1926]) -Greedy action tensor([ 0.6845, -0.4487, 0.0905, -0.3557]) tensor([0.4489, 0.1446, 0.2479, 0.1587]) -Greedy action tensor([ 0.4141, -0.2289, 0.0081, -0.4854]) tensor([0.3848, 0.2023, 0.2564, 0.1565]) -Greedy action tensor([ 1.0294, -0.7903, 0.0198, -0.5056]) tensor([0.5741, 0.0930, 0.2092, 0.1237]) -Greedy action tensor([ 0.6213, -0.3886, -0.0480, -0.4078]) tensor([0.4477, 0.1631, 0.2292, 0.1600]) -Greedy action tensor([ 0.4761, -0.4021, -0.1144, -0.3208]) tensor([0.4132, 0.1717, 0.2289, 0.1862]) -Greedy action tensor([ 0.3477, 0.0635, 0.0139, -0.2910]) tensor([0.3337, 0.2511, 0.2390, 0.1762]) -Greedy action tensor([ 0.8370, -0.4600, -0.1351, -0.3482]) tensor([0.5109, 0.1397, 0.1933, 0.1562]) -Greedy action tensor([ 0.5899, -0.4177, -0.0537, -0.4722]) tensor([0.4472, 0.1633, 0.2349, 0.1546]) -Greedy action tensor([ 0.8547, -0.7372, -0.0171, -0.5976]) tensor([0.5388, 0.1097, 0.2254, 0.1261]) -Greedy action tensor([ 0.6858, -0.4724, -0.2814, -0.3746]) tensor([0.4901, 0.1539, 0.1863, 0.1697]) -Greedy action tensor([ 0.2364, 0.0280, -0.0157, -0.2962]) tensor([0.3149, 0.2556, 0.2447, 0.1848]) -Greedy action tensor([ 1.1260, -0.6524, -0.1087, -0.3712]) tensor([0.5940, 0.1003, 0.1728, 0.1329]) -Greedy action tensor([ 0.5014, -0.6294, -0.1826, -0.1425]) tensor([0.4251, 0.1372, 0.2145, 0.2233]) -Greedy action tensor([ 0.3969, -0.0711, -0.0067, -0.2799]) tensor([0.3568, 0.2235, 0.2383, 0.1814]) -Greedy action tensor([ 0.6069, -0.3262, 0.0516, -0.4150]) tensor([0.4297, 0.1690, 0.2466, 0.1547]) -Greedy action tensor([ 0.6435, -0.5084, 0.2237, -0.6243]) tensor([0.4435, 0.1402, 0.2915, 0.1248]) -Greedy action tensor([ 0.8256, -0.6277, -0.1758, -0.4746]) tensor([0.5337, 0.1248, 0.1961, 0.1454]) -Greedy action tensor([ 0.6581, 0.2932, -0.1786, -0.2918]) tensor([0.3977, 0.2761, 0.1723, 0.1538]) -Greedy action tensor([ 0.4445, -0.0231, -0.1354, -0.3446]) tensor([0.3787, 0.2372, 0.2120, 0.1720]) -Greedy action tensor([ 0.5255, -0.1829, -0.0231, -0.1683]) tensor([0.3891, 0.1916, 0.2248, 0.1944]) -Greedy action tensor([ 0.5011, -0.3079, 0.0014, -0.4078]) tensor([0.4073, 0.1814, 0.2471, 0.1641]) -Greedy action tensor([ 0.7328, 0.0030, -0.0459, -0.2716]) tensor([0.4334, 0.2089, 0.1989, 0.1587]) -Greedy action tensor([ 0.6718, -0.0492, -0.0721, -0.2682]) tensor([0.4251, 0.2067, 0.2021, 0.1661]) -Greedy action tensor([ 0.5518, -0.2955, -0.0531, -0.3834]) tensor([0.4224, 0.1810, 0.2307, 0.1658]) -Greedy action tensor([ 0.4754, -0.3919, -0.0254, -0.4160]) tensor([0.4105, 0.1724, 0.2488, 0.1683]) -Greedy action tensor([ 1.0074, -0.3097, -0.0360, -0.4586]) tensor([0.5402, 0.1447, 0.1903, 0.1247]) -Greedy action tensor([ 0.4546, -0.2356, -0.0588, -0.4127]) tensor([0.3968, 0.1990, 0.2375, 0.1667]) -Greedy action tensor([ 0.2346, 0.0158, -0.0946, -0.3130]) tensor([0.3224, 0.2591, 0.2320, 0.1865]) -Greedy action tensor([ 0.6980, -0.3846, -0.0248, -0.5274]) tensor([0.4722, 0.1599, 0.2292, 0.1387]) -Greedy action tensor([ 0.5428, -0.0830, -0.0815, -0.1998]) tensor([0.3927, 0.2100, 0.2103, 0.1869]) -Greedy action tensor([ 0.2702, -0.0143, -0.1118, -0.4016]) tensor([0.3395, 0.2554, 0.2317, 0.1734]) -Greedy action tensor([ 0.4766, 0.2425, -0.1051, 0.0162]) tensor([0.3354, 0.2654, 0.1875, 0.2117]) -Greedy action tensor([ 0.9501, -0.2490, 0.1134, -0.6754]) tensor([0.5178, 0.1561, 0.2243, 0.1019]) -Greedy action tensor([ 0.2475, 0.0524, -0.0954, -0.0573]) tensor([0.3058, 0.2516, 0.2171, 0.2255]) -Greedy action tensor([ 0.4660, -0.2387, 0.0788, -0.4370]) tensor([0.3878, 0.1917, 0.2633, 0.1572]) -Greedy action tensor([ 0.7499, -0.3998, -0.0714, -0.5303]) tensor([0.4915, 0.1557, 0.2162, 0.1366]) -Greedy action tensor([ 0.6590, -0.4657, -0.0201, -0.5424]) tensor([0.4689, 0.1523, 0.2378, 0.1410]) -Greedy action tensor([ 0.4487, -0.0433, -0.1104, -0.3275]) tensor([0.3783, 0.2313, 0.2163, 0.1741]) -Greedy action tensor([ 0.8186, -0.5433, -0.1014, -0.2220]) tensor([0.4980, 0.1276, 0.1985, 0.1759]) -Greedy action tensor([ 0.8638, -0.2758, -0.1108, -0.3719]) tensor([0.5030, 0.1609, 0.1898, 0.1462]) -Greedy action tensor([ 0.6141, -0.6759, -0.0910, -0.3541]) tensor([0.4653, 0.1281, 0.2299, 0.1767]) -Greedy action tensor([ 0.5528, -0.1391, -0.0643, -0.2414]) tensor([0.4013, 0.2009, 0.2165, 0.1814]) -Greedy action tensor([ 0.6456, -0.2271, 0.0789, -0.2424]) tensor([0.4172, 0.1743, 0.2367, 0.1717]) -Greedy action tensor([ 0.5296, 0.0911, -0.1515, -0.3037]) tensor([0.3868, 0.2494, 0.1957, 0.1681]) -Greedy action tensor([ 0.6306, -0.0155, -0.1309, -0.1962]) tensor([0.4118, 0.2158, 0.1923, 0.1801]) -Greedy action tensor([ 0.7859, -0.6578, -0.1041, -0.3680]) tensor([0.5097, 0.1203, 0.2093, 0.1607]) -Greedy action tensor([ 0.8157, -0.6576, -0.0012, -0.3404]) tensor([0.5036, 0.1154, 0.2225, 0.1585]) -Greedy action tensor([ 0.7347, -0.4129, -0.0716, -0.6195]) tensor([0.4945, 0.1570, 0.2208, 0.1277]) -Greedy action tensor([ 0.3552, 0.1888, 0.0593, -0.0842]) tensor([0.3091, 0.2617, 0.2299, 0.1992]) -Greedy action tensor([ 0.4340, -0.0632, -0.1333, -0.1371]) tensor([0.3649, 0.2220, 0.2069, 0.2061]) -Greedy action tensor([ 0.6067, -0.3333, 0.0472, -0.5307]) tensor([0.4381, 0.1711, 0.2503, 0.1405]) -Greedy action tensor([ 0.8237, -0.5472, -0.0591, -0.4365]) tensor([0.5125, 0.1301, 0.2120, 0.1454]) -Greedy action tensor([ 0.3848, 0.4108, -0.0713, -0.2890]) tensor([0.3155, 0.3238, 0.1999, 0.1608]) -Greedy action tensor([ 0.6354, -0.4372, 0.0016, -0.3765]) tensor([0.4472, 0.1530, 0.2373, 0.1626]) -Greedy action tensor([ 0.3552, 0.0509, -0.0283, -0.1079]) tensor([0.3280, 0.2420, 0.2235, 0.2064]) -Greedy action tensor([ 0.6494, -0.3094, 0.0338, -0.3557]) tensor([0.4367, 0.1674, 0.2360, 0.1599]) -Greedy action tensor([ 1.0294, -0.6993, -0.0689, -0.3970]) tensor([0.5711, 0.1014, 0.1904, 0.1372]) -Greedy action tensor([ 0.3639, 0.1467, -0.1935, 0.0344]) tensor([0.3229, 0.2599, 0.1849, 0.2323]) -Greedy action tensor([ 0.7397, -0.2603, -0.0946, -0.2004]) tensor([0.4561, 0.1678, 0.1980, 0.1781]) -Greedy action tensor([ 0.3020, 0.0854, -0.1636, -0.1150]) tensor([0.3234, 0.2604, 0.2030, 0.2131]) -Greedy action tensor([ 0.9831, -0.6439, 0.0073, -0.4622]) tensor([0.5528, 0.1086, 0.2083, 0.1303]) -Greedy action tensor([ 1.1255, -0.7180, 0.0714, -0.3003]) tensor([0.5724, 0.0906, 0.1995, 0.1375]) -Greedy action tensor([ 0.9211, -0.5515, 0.0063, -0.4316]) tensor([0.5295, 0.1214, 0.2121, 0.1369]) -Greedy action tensor([ 0.6136, -0.1299, -0.0770, -0.3159]) tensor([0.4217, 0.2005, 0.2114, 0.1664]) -Greedy action tensor([ 0.6714, -0.5154, -0.0198, -0.4342]) tensor([0.4679, 0.1428, 0.2344, 0.1549]) -Greedy action tensor([ 0.3165, -0.1705, 0.0246, -0.1664]) tensor([0.3357, 0.2063, 0.2508, 0.2072]) -Greedy action tensor([ 0.6730, -0.4285, -0.0328, -0.3638]) tensor([0.4586, 0.1524, 0.2264, 0.1626]) -Greedy action tensor([ 0.6369, -0.2526, -0.0801, -0.2854]) tensor([0.4354, 0.1789, 0.2126, 0.1731]) -Greedy action tensor([ 0.4786, -0.2855, -0.0109, -0.3212]) tensor([0.3956, 0.1842, 0.2424, 0.1778]) -Greedy action tensor([ 0.7844, -0.5356, 0.0389, -0.4931]) tensor([0.4950, 0.1322, 0.2349, 0.1380]) -Greedy action tensor([ 0.3238, -0.1291, -0.0614, -0.2447]) tensor([0.3469, 0.2206, 0.2360, 0.1965]) -Greedy action tensor([ 0.6600, -0.0956, -0.0268, -0.1726]) tensor([0.4153, 0.1951, 0.2090, 0.1806]) -Greedy action tensor([ 0.6594, -0.2710, -0.0537, -0.2023]) tensor([0.4335, 0.1710, 0.2125, 0.1831]) -Greedy action tensor([ 0.6399, -0.7089, 0.0277, -0.5496]) tensor([0.4748, 0.1232, 0.2574, 0.1445]) -Greedy action tensor([ 0.3265, -0.0162, -0.0471, -0.1917]) tensor([0.3340, 0.2371, 0.2299, 0.1990]) -Greedy action tensor([ 0.6776, -0.3567, -0.1177, -0.3224]) tensor([0.4598, 0.1635, 0.2076, 0.1692]) -Greedy action tensor([ 0.6117, -0.0265, -0.0329, -0.3188]) tensor([0.4086, 0.2158, 0.2145, 0.1611]) -Greedy action tensor([ 0.4469, -0.3107, -0.0902, -0.1063]) tensor([0.3805, 0.1784, 0.2224, 0.2188]) -Greedy action tensor([ 0.5210, -0.0681, -0.0325, -0.3319]) tensor([0.3912, 0.2171, 0.2249, 0.1667]) -Greedy action tensor([ 0.1369, -1.2905, 0.6033, 0.6916]) tensor([0.2186, 0.0524, 0.3484, 0.3806]) -Greedy action tensor([ 0.6210, -1.2381, 0.1153, 1.3016]) tensor([0.2678, 0.0417, 0.1615, 0.5289]) -Greedy action tensor([ 0.2713, 0.5051, -0.3662, -0.2826]) tensor([0.2970, 0.3753, 0.1570, 0.1707]) -Greedy action tensor([-1.1255, -0.0310, -1.0658, 0.8599]) tensor([0.0811, 0.2423, 0.0861, 0.5905]) -Greedy action tensor([ 0.8358, 0.0931, -0.5062, 0.8531]) tensor([0.3630, 0.1727, 0.0949, 0.3694]) -Greedy action tensor([ 1.2244, -0.6237, 1.0120, 0.0730]) tensor([0.4381, 0.0690, 0.3543, 0.1385]) -Greedy action tensor([ 0.0782, -1.8325, -0.3616, 0.8401]) tensor([0.2542, 0.0376, 0.1637, 0.5445]) -Greedy action tensor([0.6033, 0.5359, 0.6434, 0.0219]) tensor([0.2829, 0.2645, 0.2945, 0.1582]) -Greedy action tensor([0.7200, 0.6160, 0.6088, 0.3879]) tensor([0.2846, 0.2565, 0.2547, 0.2042]) -Greedy action tensor([ 1.4407, -0.9890, 0.4662, 0.8108]) tensor([0.5005, 0.0441, 0.1889, 0.2666]) -Greedy action tensor([0.4249, 0.6944, 0.3451, 0.2416]) tensor([0.2460, 0.3221, 0.2271, 0.2048]) -Greedy action tensor([ 1.1299, -1.3254, 0.8021, -0.2572]) tensor([0.4864, 0.0417, 0.3504, 0.1215]) -Greedy action tensor([ 0.1081, -0.1165, -0.4661, 0.8396]) tensor([0.2252, 0.1799, 0.1268, 0.4681]) -Greedy action tensor([1.1330, 0.2545, 1.2073, 0.4806]) tensor([0.3318, 0.1379, 0.3575, 0.1728]) -Greedy action tensor([ 0.1384, 0.6611, -0.2983, 0.6623]) tensor([0.1991, 0.3359, 0.1287, 0.3363]) -Greedy action tensor([-0.3359, -0.6374, -0.3558, 1.9452]) tensor([0.0800, 0.0591, 0.0784, 0.7825]) -Greedy action tensor([-0.6874, 0.0551, -0.9505, 0.4073]) tensor([0.1458, 0.3064, 0.1121, 0.4357]) -Greedy action tensor([ 0.1769, -0.6977, 0.3418, 0.3607]) tensor([0.2633, 0.1098, 0.3105, 0.3164]) -Greedy action tensor([ 0.6336, -0.4138, -0.1836, 1.9365]) tensor([0.1827, 0.0641, 0.0807, 0.6725]) -Greedy action tensor([-0.2716, 1.9059, 0.2361, -0.7646]) tensor([0.0827, 0.7295, 0.1374, 0.0505]) -Greedy action tensor([ 0.9666, -0.3181, -0.6020, 1.1956]) tensor([0.3646, 0.1009, 0.0760, 0.4585]) -Greedy action tensor([ 1.3119, -0.8376, 1.3919, 0.7181]) tensor([0.3634, 0.0423, 0.3936, 0.2007]) -Greedy action tensor([-0.3492, 0.3816, 0.1602, -0.2155]) tensor([0.1700, 0.3529, 0.2828, 0.1943]) -Greedy action tensor([ 0.9976, -0.7560, 0.1067, 1.5084]) tensor([0.3077, 0.0533, 0.1262, 0.5128]) -Greedy action tensor([ 1.4868, 0.2522, 1.1698, -0.1648]) tensor([0.4523, 0.1316, 0.3294, 0.0867]) -Greedy action tensor([ 0.6987, -0.4348, 0.3992, 0.9422]) tensor([0.2995, 0.0964, 0.2220, 0.3821]) -Greedy action tensor([ 0.7939, -0.8257, 1.3410, 0.2084]) tensor([0.2871, 0.0568, 0.4962, 0.1599]) -Greedy action tensor([ 1.4143, -0.3837, 0.5291, 0.6757]) tensor([0.4864, 0.0806, 0.2007, 0.2324]) -Greedy action tensor([-0.0016, -0.1325, 0.3211, 0.3620]) tensor([0.2129, 0.1868, 0.2940, 0.3063]) -Greedy action tensor([ 0.1716, -1.9240, -0.8558, 1.2703]) tensor([0.2231, 0.0274, 0.0799, 0.6695]) -Greedy action tensor([-0.8526, -0.5832, -0.5659, 0.0143]) tensor([0.1661, 0.2175, 0.2212, 0.3952]) -Greedy action tensor([-5.0037e-01, -1.3248e-01, 9.6584e-01, -3.5173e-04]) tensor([0.1187, 0.1715, 0.5142, 0.1957]) -Greedy action tensor([-0.0619, 0.6724, 0.1538, 0.3952]) tensor([0.1694, 0.3530, 0.2101, 0.2675]) -Greedy action tensor([ 0.5220, -0.8112, 1.2201, -0.4429]) tensor([0.2736, 0.0721, 0.5500, 0.1043]) -Greedy action tensor([ 0.3332, -1.2352, 0.2010, 0.2958]) tensor([0.3281, 0.0684, 0.2875, 0.3160]) -Greedy action tensor([ 1.0337, -0.9170, 1.1553, 0.0289]) tensor([0.3791, 0.0539, 0.4282, 0.1388]) -Greedy action tensor([-0.0525, 1.0633, 0.2300, 1.2771]) tensor([0.1092, 0.3333, 0.1448, 0.4127]) -Greedy action tensor([ 0.3866, -1.6468, 0.7359, -0.0316]) tensor([0.3118, 0.0408, 0.4422, 0.2052]) -Greedy action tensor([-0.3536, -0.3252, -0.9563, 0.6053]) tensor([0.1929, 0.1984, 0.1056, 0.5031]) -Greedy action tensor([-0.7510, -0.7620, 0.4910, -1.0727]) tensor([0.1619, 0.1601, 0.5606, 0.1174]) -Greedy action tensor([ 1.1779, 0.4617, -0.1708, 0.7221]) tensor([0.4198, 0.2051, 0.1090, 0.2661]) -Greedy action tensor([ 0.4871, -0.7604, -0.0202, 0.7684]) tensor([0.3111, 0.0894, 0.1873, 0.4122]) -Greedy action tensor([-0.2903, -0.3813, -0.2233, -1.0246]) tensor([0.2888, 0.2637, 0.3088, 0.1386]) -Greedy action tensor([ 0.0482, 0.0175, 0.0316, -0.2046]) tensor([0.2681, 0.2600, 0.2637, 0.2082]) -Greedy action tensor([ 0.0964, 0.5044, -0.5296, -0.2042]) tensor([0.2646, 0.3979, 0.1415, 0.1959]) -Greedy action tensor([ 0.5722, 0.2807, 0.1624, -0.1169]) tensor([0.3433, 0.2565, 0.2279, 0.1723]) -Greedy action tensor([ 0.2546, 0.3617, -0.4947, 0.9953]) tensor([0.2135, 0.2377, 0.1009, 0.4479]) -Greedy action tensor([ 1.2474, -0.4237, -0.4189, 1.4839]) tensor([0.3782, 0.0711, 0.0715, 0.4792]) -Greedy action tensor([0.8069, 0.3582, 0.2873, 0.9372]) tensor([0.2965, 0.1893, 0.1764, 0.3378]) -Greedy action tensor([ 0.2110, -0.7151, 1.3390, -0.3774]) tensor([0.1984, 0.0786, 0.6129, 0.1101]) -Greedy action tensor([ 1.0388, -2.3372, -0.1958, 1.1166]) tensor([0.4156, 0.0142, 0.1209, 0.4492]) -Greedy action tensor([ 0.2712, -1.2171, 1.0296, 0.3053]) tensor([0.2275, 0.0514, 0.4857, 0.2354]) -Greedy action tensor([ 0.8999, -0.0459, 0.6158, 0.0087]) tensor([0.3920, 0.1522, 0.2950, 0.1608]) -Greedy action tensor([ 0.6773, -0.1401, 1.2295, 0.0186]) tensor([0.2705, 0.1195, 0.4700, 0.1400]) -Greedy action tensor([-0.1601, -1.4939, -0.2326, -0.1496]) tensor([0.3121, 0.0822, 0.2903, 0.3154]) -Greedy action tensor([ 1.1563, -0.1524, -0.9474, 0.2333]) tensor([0.5588, 0.1510, 0.0682, 0.2220]) -Greedy action tensor([-0.2403, -0.2853, -0.8415, -0.3848]) tensor([0.2968, 0.2837, 0.1627, 0.2569]) -Greedy action tensor([ 0.9785, -0.4052, 0.1085, 0.7056]) tensor([0.4114, 0.1031, 0.1724, 0.3131]) -Greedy action tensor([ 1.0346, -0.1975, -0.6793, 1.1737]) tensor([0.3815, 0.1113, 0.0687, 0.4384]) -Greedy action tensor([-0.9706, -1.4443, 0.9759, 0.6162]) tensor([0.0740, 0.0461, 0.5183, 0.3617]) -Greedy action tensor([ 1.4487, -1.2509, 0.1457, 1.2311]) tensor([0.4666, 0.0314, 0.1268, 0.3753]) -Greedy action tensor([ 1.0782, -0.8532, -0.1383, 1.6193]) tensor([0.3166, 0.0459, 0.0938, 0.5438]) -Greedy action tensor([ 0.1413, -0.0780, 0.3373, -0.7863]) tensor([0.2928, 0.2352, 0.3562, 0.1158]) -Greedy action tensor([ 0.4025, -0.4207, -0.2896, 1.2650]) tensor([0.2321, 0.1019, 0.1162, 0.5498]) -Greedy action tensor([ 0.0910, 0.2128, -0.3994, 0.3322]) tensor([0.2491, 0.2813, 0.1525, 0.3170]) -Greedy action tensor([ 0.5569, -0.1759, 0.9501, 0.8862]) tensor([0.2298, 0.1104, 0.3404, 0.3194]) -Greedy action tensor([-2.8265e-01, 4.1246e-01, 3.6168e-04, 2.5830e-01]) tensor([0.1653, 0.3313, 0.2194, 0.2840]) -Greedy action tensor([ 1.3278, 0.7070, 0.4610, -0.2174]) tensor([0.4606, 0.2476, 0.1936, 0.0982]) -Greedy action tensor([ 1.8218, -0.5349, 1.9234, 0.5405]) tensor([0.4033, 0.0382, 0.4465, 0.1120]) -Greedy action tensor([-0.6964, 1.3026, 0.8175, -1.0510]) tensor([0.0734, 0.5417, 0.3335, 0.0515]) -Greedy action tensor([-0.8031, -0.0753, 0.1502, 0.4795]) tensor([0.1079, 0.2233, 0.2798, 0.3890]) -Greedy action tensor([ 0.6817, -0.3219, 1.8176, 0.9843]) tensor([0.1714, 0.0628, 0.5338, 0.2320]) -Greedy action tensor([-0.9102, -1.2790, -0.7606, 0.5513]) tensor([0.1396, 0.0965, 0.1621, 0.6018]) -Greedy action tensor([ 0.7132, -0.3834, 0.5995, 0.2573]) tensor([0.3496, 0.1168, 0.3120, 0.2216]) -Greedy action tensor([ 1.4918, 0.5946, -0.9871, 1.2411]) tensor([0.4405, 0.1796, 0.0369, 0.3429]) -Greedy action tensor([ 0.6918, -0.5547, 0.6774, 0.9980]) tensor([0.2754, 0.0792, 0.2714, 0.3740]) -Greedy action tensor([ 0.6214, -0.8697, -0.3053, 2.0351]) tensor([0.1745, 0.0393, 0.0691, 0.7172]) -Greedy action tensor([1.5585, 0.2588, 0.9471, 0.5230]) tensor([0.4608, 0.1256, 0.2500, 0.1636]) -Greedy action tensor([ 0.4655, -0.7472, -0.6249, 0.7875]) tensor([0.3319, 0.0987, 0.1115, 0.4579]) -Greedy action tensor([-0.3259, 0.1758, -0.2182, 0.9874]) tensor([0.1336, 0.2207, 0.1488, 0.4969]) -Greedy action tensor([-0.1278, -0.9838, -0.6562, 1.1329]) tensor([0.1804, 0.0767, 0.1064, 0.6365]) -Greedy action tensor([ 1.2971, 0.0357, -0.0409, -0.0046]) tensor([0.5502, 0.1558, 0.1443, 0.1497]) -Greedy action tensor([ 0.8290, -0.1785, -0.1991, -0.0057]) tensor([0.4637, 0.1693, 0.1658, 0.2012]) -Greedy action tensor([ 1.2148, -0.2607, -0.6894, 0.1131]) tensor([0.5848, 0.1337, 0.0871, 0.1943]) -Greedy action tensor([ 1.0352, -0.5032, -0.1993, 0.0349]) tensor([0.5338, 0.1146, 0.1553, 0.1963]) -Greedy action tensor([ 0.6582, -0.2382, 0.2228, -0.0461]) tensor([0.3922, 0.1600, 0.2538, 0.1939]) -Greedy action tensor([ 1.2313, -0.1435, -0.2612, 0.1440]) tensor([0.5510, 0.1393, 0.1239, 0.1858]) -Greedy action tensor([ 0.8668, -0.7611, 0.0472, 0.2059]) tensor([0.4644, 0.0912, 0.2046, 0.2398]) -Greedy action tensor([ 0.4661, -0.1106, -0.1854, -0.0995]) tensor([0.3772, 0.2119, 0.1966, 0.2143]) -Greedy action tensor([ 1.4316, -0.9738, -0.3877, 0.5081]) tensor([0.6063, 0.0547, 0.0983, 0.2407]) -Greedy action tensor([ 1.3308, -0.8696, -0.3383, 0.4333]) tensor([0.5859, 0.0649, 0.1104, 0.2388]) -Greedy action tensor([ 0.8034, -0.1163, -0.2755, 0.0308]) tensor([0.4545, 0.1812, 0.1545, 0.2099]) -Greedy action tensor([ 1.3481, -0.1893, -0.3473, 0.1707]) tensor([0.5860, 0.1259, 0.1075, 0.1805]) -Greedy action tensor([ 0.6414, -0.1739, -0.2173, -0.1555]) tensor([0.4316, 0.1910, 0.1829, 0.1945]) -Greedy action tensor([ 0.6238, -0.0754, -0.0657, 0.0113]) tensor([0.3936, 0.1956, 0.1975, 0.2133]) -Greedy action tensor([ 1.4339, -0.3564, -0.3445, 0.0864]) tensor([0.6267, 0.1046, 0.1059, 0.1629]) -Greedy action tensor([ 1.6907, -0.3279, -0.2430, 0.4033]) tensor([0.6437, 0.0855, 0.0931, 0.1777]) -Greedy action tensor([ 0.4991, -0.3145, -0.7155, 0.5707]) tensor([0.3553, 0.1575, 0.1055, 0.3817]) -Greedy action tensor([ 0.9609, -0.2973, 0.1557, -0.0725]) tensor([0.4792, 0.1362, 0.2142, 0.1705]) -Greedy action tensor([ 0.9115, -0.7746, -0.0793, 0.2179]) tensor([0.4863, 0.0901, 0.1806, 0.2430]) -Greedy action tensor([ 2.4450, -0.9591, -0.5822, 0.9895]) tensor([0.7605, 0.0253, 0.0368, 0.1774]) -Greedy action tensor([ 1.1931, -0.2777, -0.1519, 0.1527]) tensor([0.5424, 0.1246, 0.1413, 0.1916]) -Greedy action tensor([ 1.7309, -0.4459, -0.2826, 0.4995]) tensor([0.6499, 0.0737, 0.0868, 0.1897]) -Greedy action tensor([ 1.0888, -0.4372, -0.4933, 0.3423]) tensor([0.5272, 0.1146, 0.1083, 0.2499]) -Greedy action tensor([ 0.8834, -0.2521, -0.2333, 0.2446]) tensor([0.4594, 0.1476, 0.1504, 0.2426]) -Greedy action tensor([ 1.3850, -0.5081, -0.0975, 0.1892]) tensor([0.5952, 0.0896, 0.1352, 0.1800]) -Greedy action tensor([ 1.7584, 0.0709, -0.2957, 0.1742]) tensor([0.6586, 0.1218, 0.0844, 0.1351]) -Greedy action tensor([ 1.1058, -0.2559, -0.9705, 0.5725]) tensor([0.5081, 0.1302, 0.0637, 0.2981]) -Greedy action tensor([ 1.3539, -0.0535, -0.2240, 0.2649]) tensor([0.5594, 0.1369, 0.1155, 0.1883]) -Greedy action tensor([ 1.2277, -0.7291, -0.1431, 0.3057]) tensor([0.5578, 0.0788, 0.1416, 0.2218]) -Greedy action tensor([ 1.2250, -0.5757, -0.4404, 0.4147]) tensor([0.5559, 0.0918, 0.1051, 0.2472]) -Greedy action tensor([ 1.5833, -0.7012, 0.0893, 0.0595]) tensor([0.6476, 0.0659, 0.1454, 0.1411]) -Greedy action tensor([ 0.9001, 0.0094, -0.1040, -0.3751]) tensor([0.4863, 0.1996, 0.1782, 0.1359]) -Greedy action tensor([ 1.0862, -0.3883, -0.0543, 0.4286]) tensor([0.4839, 0.1108, 0.1547, 0.2507]) -Greedy action tensor([ 1.1775, -0.1643, -0.2841, 0.0542]) tensor([0.5499, 0.1437, 0.1275, 0.1788]) -Greedy action tensor([ 1.6220, -0.2800, -0.4265, 0.4861]) tensor([0.6253, 0.0933, 0.0806, 0.2008]) -Greedy action tensor([ 1.0594, -0.5376, -0.4021, 0.4748]) tensor([0.5021, 0.1017, 0.1164, 0.2798]) -Greedy action tensor([ 2.0803, -0.9612, -0.3317, 0.5065]) tensor([0.7437, 0.0355, 0.0667, 0.1541]) -Greedy action tensor([ 1.3243, -0.5258, -0.7715, 0.5201]) tensor([0.5788, 0.0910, 0.0712, 0.2590]) -Greedy action tensor([ 0.9688, -0.3174, -0.3959, 0.3843]) tensor([0.4787, 0.1323, 0.1223, 0.2668]) -Greedy action tensor([ 1.1696, -0.1873, -0.4193, -0.0097]) tensor([0.5652, 0.1455, 0.1154, 0.1738]) -Greedy action tensor([ 1.0777, -0.2893, -0.2442, 0.0632]) tensor([0.5308, 0.1353, 0.1415, 0.1924]) -Greedy action tensor([ 1.1330, -0.1849, 0.0109, 0.0435]) tensor([0.5182, 0.1387, 0.1687, 0.1743]) -Greedy action tensor([ 0.8318, -0.0026, -0.1829, -0.2147]) tensor([0.4656, 0.2021, 0.1688, 0.1635]) -Greedy action tensor([ 1.3880, -0.7836, -0.5977, 0.9040]) tensor([0.5355, 0.0610, 0.0735, 0.3300]) -Greedy action tensor([ 1.0012, -0.1906, -0.2811, 0.1707]) tensor([0.4958, 0.1506, 0.1375, 0.2161]) -Greedy action tensor([ 1.8409, -0.1845, -0.7934, 0.1757]) tensor([0.7179, 0.0947, 0.0515, 0.1358]) -Greedy action tensor([ 1.2007, 0.4202, 0.3553, -0.2004]) tensor([0.4686, 0.2147, 0.2012, 0.1154]) -Greedy action tensor([ 1.3646, -0.4046, -0.2327, 0.3549]) tensor([0.5756, 0.0981, 0.1165, 0.2097]) -Greedy action tensor([ 0.6381, -0.3981, -0.0452, 0.0629]) tensor([0.4128, 0.1465, 0.2084, 0.2323]) -Greedy action tensor([ 1.3544, -0.5489, -0.4011, 0.2686]) tensor([0.6026, 0.0898, 0.1041, 0.2035]) -Greedy action tensor([ 1.1047, -0.3000, -0.1467, 0.0829]) tensor([0.5287, 0.1298, 0.1513, 0.1903]) -Greedy action tensor([ 1.3201, -0.5854, -0.3790, 0.1264]) tensor([0.6117, 0.0910, 0.1119, 0.1854]) -Greedy action tensor([ 1.2662, -0.3380, -0.4450, -0.2103]) tensor([0.6211, 0.1249, 0.1122, 0.1419]) -Greedy action tensor([ 1.4598, -0.8497, -0.3698, 0.4090]) tensor([0.6213, 0.0617, 0.0997, 0.2173]) -Greedy action tensor([ 0.9464, -0.5663, -0.3031, 0.3089]) tensor([0.4913, 0.1082, 0.1408, 0.2597]) -Greedy action tensor([ 1.8331, -0.7491, -0.2597, 0.6592]) tensor([0.6631, 0.0501, 0.0818, 0.2050]) -Greedy action tensor([ 1.7335, -0.7507, -0.4544, 0.1517]) tensor([0.7137, 0.0595, 0.0800, 0.1467]) -Greedy action tensor([ 0.8777, -0.2989, 0.1348, -0.1062]) tensor([0.4634, 0.1429, 0.2205, 0.1732]) -Greedy action tensor([ 0.6538, -0.2870, -0.3678, 0.0997]) tensor([0.4301, 0.1679, 0.1549, 0.2471]) -Greedy action tensor([ 0.7103, -0.1742, -0.3915, 0.2053]) tensor([0.4258, 0.1758, 0.1415, 0.2570]) -Greedy action tensor([ 1.2014, -0.2713, -0.4516, 0.2280]) tensor([0.5560, 0.1275, 0.1065, 0.2101]) -Greedy action tensor([ 0.2135, -0.3205, -0.2566, 0.2807]) tensor([0.3048, 0.1787, 0.1905, 0.3260]) -Greedy action tensor([ 1.2079, -0.5520, -0.1842, 0.2542]) tensor([0.5537, 0.0953, 0.1376, 0.2134]) -Greedy action tensor([ 0.8680, -0.3354, -0.1562, 0.2779]) tensor([0.4518, 0.1356, 0.1622, 0.2504]) -Greedy action tensor([ 1.0395, -0.3850, -0.0024, 0.0766]) tensor([0.5063, 0.1218, 0.1786, 0.1933]) -Greedy action tensor([ 1.2443, -0.2766, -0.0924, 0.0918]) tensor([0.5565, 0.1216, 0.1462, 0.1757]) -Greedy action tensor([ 0.3485, -0.1146, -0.6060, 0.3944]) tensor([0.3267, 0.2056, 0.1258, 0.3420]) -Greedy action tensor([ 1.5149, -0.5670, -0.2086, 0.5208]) tensor([0.5977, 0.0745, 0.1066, 0.2212]) -Greedy action tensor([ 1.4107, -0.1897, -0.4532, 0.4230]) tensor([0.5783, 0.1167, 0.0897, 0.2154]) -Greedy action tensor([ 1.0754, -0.3851, -0.3297, 0.2207]) tensor([0.5255, 0.1220, 0.1289, 0.2235]) -Greedy action tensor([ 0.7672, -0.4162, -0.2008, 0.1403]) tensor([0.4504, 0.1379, 0.1711, 0.2406]) -Greedy action tensor([ 1.0116, -0.1421, -0.3338, -0.2342]) tensor([0.5366, 0.1693, 0.1397, 0.1544]) -Greedy action tensor([ 1.4674, -0.9444, -0.3680, 0.6839]) tensor([0.5862, 0.0526, 0.0935, 0.2678]) -Greedy action tensor([ 1.1340, 0.0761, -0.1180, -0.2169]) tensor([0.5285, 0.1835, 0.1511, 0.1369]) -Greedy action tensor([ 1.8967, -0.8427, -0.3523, 0.5425]) tensor([0.7001, 0.0452, 0.0739, 0.1807]) -Greedy action tensor([ 1.4898, -0.3253, -0.3558, -0.0500]) tensor([0.6514, 0.1061, 0.1029, 0.1397]) -Greedy action tensor([ 0.5627, -0.1119, 0.2243, 0.0821]) tensor([0.3520, 0.1793, 0.2510, 0.2177]) -Greedy action tensor([ 1.5055, -0.7215, -0.1712, 0.4474]) tensor([0.6090, 0.0657, 0.1139, 0.2114]) -Greedy action tensor([ 1.3355, -0.5387, -0.3819, 0.5074]) tensor([0.5650, 0.0867, 0.1014, 0.2468]) -Greedy action tensor([ 1.2301, -0.5563, -0.2133, 0.0578]) tensor([0.5836, 0.0978, 0.1378, 0.1807]) -Greedy action tensor([ 1.1907, -0.1805, -0.3444, 0.0939]) tensor([0.5546, 0.1408, 0.1195, 0.1852]) -Greedy action tensor([-1.0249, -0.5719, 0.2375, 0.4827]) tensor([0.0941, 0.1481, 0.3327, 0.4251]) -Greedy action tensor([-1.9131, -0.7024, 1.0814, 0.1936]) tensor([0.0307, 0.1031, 0.6136, 0.2525]) -Greedy action tensor([-1.0783, -0.6430, 1.2594, 1.4129]) tensor([0.0400, 0.0619, 0.4147, 0.4834]) -Greedy action tensor([-1.9581, -0.5418, 1.2921, 0.6347]) tensor([0.0226, 0.0931, 0.5825, 0.3019]) -Greedy action tensor([-1.3254, -0.6548, 0.8373, 0.8294]) tensor([0.0493, 0.0964, 0.4288, 0.4254]) -Greedy action tensor([-1.6901, -0.5145, 0.5565, -0.0122]) tensor([0.0525, 0.1701, 0.4963, 0.2811]) -Greedy action tensor([-1.8528, -0.4565, 0.7001, -0.0210]) tensor([0.0414, 0.1674, 0.5323, 0.2588]) -Greedy action tensor([-1.8143e+00, -4.5744e-01, 7.3717e-01, -1.3200e-03]) tensor([0.0419, 0.1629, 0.5380, 0.2571]) -Greedy action tensor([-1.8555, -0.4321, 0.6234, -0.1151]) tensor([0.0439, 0.1822, 0.5236, 0.2502]) -Greedy action tensor([-1.6879, -0.5054, 0.7757, -0.4166]) tensor([0.0511, 0.1667, 0.6001, 0.1821]) -Greedy action tensor([-1.7360, 0.1155, 0.4891, -0.1258]) tensor([0.0462, 0.2945, 0.4279, 0.2314]) -Greedy action tensor([-0.7453, -0.6104, 0.2685, -0.0581]) tensor([0.1452, 0.1661, 0.4001, 0.2886]) -Greedy action tensor([-1.7290, -0.2939, 0.6529, -0.3161]) tensor([0.0497, 0.2086, 0.5377, 0.2040]) -Greedy action tensor([-0.6570, -0.4978, 1.1590, 1.6229]) tensor([0.0553, 0.0648, 0.3397, 0.5402]) -Greedy action tensor([-0.7922, 0.3220, 0.0047, -0.3872]) tensor([0.1288, 0.3924, 0.2857, 0.1931]) -Greedy action tensor([-1.2393, -0.6637, 0.4860, -0.3576]) tensor([0.0925, 0.1645, 0.5195, 0.2235]) -Greedy action tensor([-0.9402, -0.5235, 0.3602, 0.8894]) tensor([0.0805, 0.1221, 0.2956, 0.5018]) -Greedy action tensor([-1.7506, -0.4633, 0.5708, -0.0475]) tensor([0.0493, 0.1784, 0.5019, 0.2704]) -Greedy action tensor([-1.5262, -0.3888, 0.5979, 0.5325]) tensor([0.0492, 0.1535, 0.4117, 0.3856]) -Greedy action tensor([-1.2617, -0.5787, 0.3024, 0.2051]) tensor([0.0827, 0.1637, 0.3951, 0.3585]) -Greedy action tensor([-0.8921, -0.5625, 0.1808, 0.2800]) tensor([0.1171, 0.1628, 0.3422, 0.3779]) -Greedy action tensor([-0.6579, -0.5506, 0.1609, 0.3067]) tensor([0.1428, 0.1589, 0.3238, 0.3746]) -Greedy action tensor([-0.8672, -0.3196, 1.2172, 1.4511]) tensor([0.0478, 0.0826, 0.3842, 0.4854]) -Greedy action tensor([-1.8873, -0.5753, 0.4301, -0.2031]) tensor([0.0494, 0.1834, 0.5012, 0.2661]) -Greedy action tensor([-1.8343, -0.4710, 0.6707, -0.0090]) tensor([0.0428, 0.1674, 0.5242, 0.2656]) -Greedy action tensor([-1.8413, -0.4517, 0.6148, -0.1228]) tensor([0.0449, 0.1804, 0.5240, 0.2506]) -Greedy action tensor([-1.8349, -0.4819, 0.6514, -0.0626]) tensor([0.0439, 0.1699, 0.5277, 0.2584]) -Greedy action tensor([-0.2740, -0.3581, 1.0438, 1.6404]) tensor([0.0804, 0.0739, 0.3003, 0.5454]) -Greedy action tensor([-1.6897, -0.7031, -0.0094, -0.3530]) tensor([0.0778, 0.2086, 0.4175, 0.2961]) -Greedy action tensor([-1.7703, -0.4351, 0.5967, -0.0688]) tensor([0.0477, 0.1814, 0.5091, 0.2617]) -Greedy action tensor([-1.9398, -0.4509, 0.6650, -0.1789]) tensor([0.0404, 0.1789, 0.5460, 0.2348]) -Greedy action tensor([-0.6458, 0.1254, 0.1414, -0.2139]) tensor([0.1449, 0.3134, 0.3185, 0.2232]) -Greedy action tensor([-0.7101, -0.2982, 0.3684, 0.7687]) tensor([0.1016, 0.1535, 0.2989, 0.4460]) -Greedy action tensor([-1.9586, -1.0150, 0.3767, -0.2386]) tensor([0.0513, 0.1318, 0.5302, 0.2866]) -Greedy action tensor([-1.8825, -0.4646, 0.7009, -0.0719]) tensor([0.0408, 0.1686, 0.5408, 0.2497]) -Greedy action tensor([-2.0458, -0.4449, 0.5207, 0.2024]) tensor([0.0352, 0.1743, 0.4577, 0.3329]) -Greedy action tensor([-1.4055, -0.4407, 0.5618, 0.6784]) tensor([0.0532, 0.1395, 0.3802, 0.4272]) -Greedy action tensor([-0.9876, -0.5967, 0.2889, 0.3317]) tensor([0.1020, 0.1508, 0.3656, 0.3816]) -Greedy action tensor([-0.4076, -0.0482, 0.1052, 0.0301]) tensor([0.1769, 0.2535, 0.2955, 0.2741]) -Greedy action tensor([-1.4168, -0.3918, 0.1855, 0.3869]) tensor([0.0675, 0.1880, 0.3349, 0.4096]) -Greedy action tensor([-1.9092, -0.6513, 0.7606, 0.0022]) tensor([0.0389, 0.1368, 0.5614, 0.2630]) -Greedy action tensor([-1.3170, -0.0089, 0.4504, 0.5945]) tensor([0.0577, 0.2136, 0.3381, 0.3905]) -Greedy action tensor([-1.7083, -0.4680, 0.5745, 0.0299]) tensor([0.0501, 0.1733, 0.4915, 0.2851]) -Greedy action tensor([-1.5761, -0.7040, 1.1287, 0.6072]) tensor([0.0367, 0.0879, 0.5493, 0.3261]) -Greedy action tensor([-1.7257, -0.5173, 0.6287, 0.0629]) tensor([0.0479, 0.1605, 0.5049, 0.2867]) -Greedy action tensor([-1.5295, -0.4943, 0.5362, 0.2321]) tensor([0.0570, 0.1606, 0.4502, 0.3321]) -Greedy action tensor([-1.3981, -0.5587, 0.4605, 0.3200]) tensor([0.0653, 0.1513, 0.4192, 0.3642]) -Greedy action tensor([-1.0828, -0.1158, 0.5489, -0.6444]) tensor([0.0972, 0.2555, 0.4967, 0.1506]) -Greedy action tensor([-1.9099, -0.4546, 0.6488, -0.1590]) tensor([0.0417, 0.1788, 0.5391, 0.2403]) -Greedy action tensor([-0.1658, -0.0034, 0.2047, 0.3253]) tensor([0.1902, 0.2237, 0.2754, 0.3107]) -Greedy action tensor([-0.9635, -0.4936, 0.4459, 0.7070]) tensor([0.0833, 0.1332, 0.3409, 0.4426]) -Greedy action tensor([-0.9755, -0.6717, 0.2380, 0.1970]) tensor([0.1117, 0.1514, 0.3760, 0.3609]) -Greedy action tensor([-1.5634, -0.2968, 0.4454, 0.0839]) tensor([0.0582, 0.2064, 0.4335, 0.3020]) -Greedy action tensor([-1.2144, -0.6109, 0.3004, 0.2129]) tensor([0.0866, 0.1584, 0.3940, 0.3610]) -Greedy action tensor([-1.5719, -0.4968, 0.5444, -0.2228]) tensor([0.0622, 0.1822, 0.5160, 0.2396]) -Greedy action tensor([-0.3397, -0.3757, 0.7015, 1.5479]) tensor([0.0877, 0.0846, 0.2485, 0.5792]) -Greedy action tensor([-1.7702, -0.5324, 1.0052, 0.4431]) tensor([0.0337, 0.1163, 0.5413, 0.3086]) -Greedy action tensor([-1.9047, -0.4608, 0.6643, -0.1277]) tensor([0.0413, 0.1751, 0.5393, 0.2443]) -Greedy action tensor([-1.2691, -0.5639, 0.4256, 0.3572]) tensor([0.0738, 0.1493, 0.4017, 0.3752]) -Greedy action tensor([-1.8533, -0.3694, 0.6206, -0.0991]) tensor([0.0434, 0.1913, 0.5147, 0.2506]) -Greedy action tensor([-1.1926, -0.5378, 0.2737, 0.3250]) tensor([0.0846, 0.1629, 0.3666, 0.3859]) -Greedy action tensor([-1.8241, 0.4726, 0.7249, 0.0849]) tensor([0.0328, 0.3261, 0.4197, 0.2213]) -Greedy action tensor([-1.3371, -0.8764, 0.8430, 1.0730]) tensor([0.0443, 0.0702, 0.3920, 0.4934]) -Greedy action tensor([-0.4872, -0.5315, 0.2014, 0.1637]) tensor([0.1705, 0.1631, 0.3395, 0.3269]) -Greedy action tensor([-1.7822, -0.1592, 0.5500, -0.0934]) tensor([0.0459, 0.2327, 0.4729, 0.2485]) -Greedy action tensor([-1.7630, -0.4808, 0.6107, -0.0147]) tensor([0.0474, 0.1709, 0.5092, 0.2724]) -Greedy action tensor([-1.4034, -0.5450, 0.7766, 0.7365]) tensor([0.0483, 0.1140, 0.4273, 0.4105]) -Greedy action tensor([-1.5792, -0.5226, 0.4767, 0.1016]) tensor([0.0586, 0.1686, 0.4580, 0.3147]) -Greedy action tensor([-1.1543, 0.1979, 0.6130, -0.6720]) tensor([0.0810, 0.3133, 0.4745, 0.1313]) -Greedy action tensor([-1.9058, -0.4828, 0.7421, -0.0552]) tensor([0.0390, 0.1618, 0.5509, 0.2482]) -Greedy action tensor([-1.7716, -0.4004, 0.7264, 0.3539]) tensor([0.0393, 0.1547, 0.4773, 0.3288]) -Greedy action tensor([-1.1765, -0.5706, 0.2540, 0.3026]) tensor([0.0877, 0.1607, 0.3666, 0.3849]) -Greedy action tensor([-1.4432, -0.5419, 0.4566, 0.3986]) tensor([0.0608, 0.1497, 0.4062, 0.3833]) -Greedy action tensor([-1.8215, -0.5171, 0.6389, -0.0538]) tensor([0.0449, 0.1656, 0.5262, 0.2632]) -Greedy action tensor([-1.8700, -0.4450, 0.6322, -0.1294]) tensor([0.0433, 0.1802, 0.5293, 0.2471]) -Greedy action tensor([-1.3903, -0.5736, 0.5318, 0.2639]) tensor([0.0652, 0.1476, 0.4460, 0.3412]) -Greedy action tensor([-1.5535, -0.4848, 0.6103, 0.3045]) tensor([0.0526, 0.1530, 0.4575, 0.3369]) -Greedy action tensor([-1.7690, -0.5256, 0.6202, -0.0496]) tensor([0.0477, 0.1655, 0.5204, 0.2664]) -Greedy action tensor([-1.6684, -0.2937, 0.7096, 0.2025]) tensor([0.0450, 0.1779, 0.4850, 0.2921]) -Greedy action tensor([-1.5150, -0.5422, 0.8595, 0.7149]) tensor([0.0422, 0.1117, 0.4536, 0.3925]) -Greedy action tensor([-1.2142, -0.5940, 0.3266, 0.4010]) tensor([0.0796, 0.1481, 0.3718, 0.4005]) -Greedy action tensor([ 0.9574, -0.4000, 0.0241, -0.4853]) tensor([0.5300, 0.1364, 0.2084, 0.1252]) -Greedy action tensor([ 1.1162, -0.6085, -0.1787, -0.6046]) tensor([0.6131, 0.1093, 0.1679, 0.1097]) -Greedy action tensor([ 0.3510, -0.1328, -0.1225, -0.1761]) tensor([0.3534, 0.2179, 0.2201, 0.2086]) -Greedy action tensor([ 0.6995, -0.6156, -0.0998, -0.2556]) tensor([0.4755, 0.1277, 0.2138, 0.1830]) -Greedy action tensor([ 0.7262, -0.7530, -0.0900, -0.3382]) tensor([0.4963, 0.1131, 0.2194, 0.1712]) -Greedy action tensor([ 0.7747, -0.2881, -0.1422, -0.5156]) tensor([0.4950, 0.1710, 0.1979, 0.1362]) -Greedy action tensor([ 0.6103, -0.4616, -0.0358, -0.4849]) tensor([0.4544, 0.1556, 0.2381, 0.1520]) -Greedy action tensor([ 0.9615, -1.2126, -0.0157, -0.5906]) tensor([0.5876, 0.0668, 0.2212, 0.1245]) -Greedy action tensor([ 0.8000, -0.4175, -0.0052, -0.3944]) tensor([0.4888, 0.1447, 0.2185, 0.1480]) -Greedy action tensor([ 0.4451, -0.1123, -0.0745, -0.2328]) tensor([0.3738, 0.2141, 0.2223, 0.1898]) -Greedy action tensor([ 0.4087, -0.1058, -0.0602, -0.3593]) tensor([0.3721, 0.2224, 0.2328, 0.1726]) -Greedy action tensor([ 0.9295, -0.8125, 0.0772, -0.5771]) tensor([0.5485, 0.0961, 0.2339, 0.1216]) -Greedy action tensor([ 0.9071, -0.5945, -0.3288, -0.5790]) tensor([0.5749, 0.1281, 0.1670, 0.1301]) -Greedy action tensor([ 0.5235, -0.0201, 0.0359, -0.1982]) tensor([0.3730, 0.2166, 0.2291, 0.1813]) -Greedy action tensor([ 0.5899, -0.2067, -0.0245, -0.2636]) tensor([0.4136, 0.1865, 0.2238, 0.1762]) -Greedy action tensor([ 0.7313, -0.0329, -0.0490, -0.4862]) tensor([0.4505, 0.2098, 0.2064, 0.1333]) -Greedy action tensor([ 0.3233, -0.1689, -0.0976, -0.3919]) tensor([0.3627, 0.2217, 0.2381, 0.1774]) -Greedy action tensor([ 0.7385, -0.9145, 0.1097, -0.5336]) tensor([0.4988, 0.0955, 0.2660, 0.1398]) -Greedy action tensor([ 0.4782, -0.1568, 0.0349, -0.2649]) tensor([0.3777, 0.2002, 0.2425, 0.1797]) -Greedy action tensor([ 0.4946, -0.3315, 0.0164, -0.5585]) tensor([0.4155, 0.1819, 0.2576, 0.1450]) -Greedy action tensor([ 0.7321, -0.6777, -0.2009, -0.3596]) tensor([0.5068, 0.1238, 0.1993, 0.1701]) -Greedy action tensor([ 0.3547, -0.0148, -0.0588, -0.2788]) tensor([0.3468, 0.2397, 0.2294, 0.1841]) -Greedy action tensor([ 0.4752, -0.0669, -0.1199, -0.3608]) tensor([0.3896, 0.2266, 0.2149, 0.1689]) -Greedy action tensor([ 0.3218, -0.0770, -0.0290, -0.2523]) tensor([0.3403, 0.2284, 0.2396, 0.1917]) -Greedy action tensor([ 0.7349, -0.5461, -0.0533, -0.5565]) tensor([0.4982, 0.1384, 0.2265, 0.1369]) -Greedy action tensor([ 0.9671, -0.4728, 0.0179, -0.4555]) tensor([0.5362, 0.1270, 0.2075, 0.1293]) -Greedy action tensor([ 0.7154, -0.2795, -0.0684, -0.2666]) tensor([0.4543, 0.1680, 0.2075, 0.1702]) -Greedy action tensor([ 0.3370, -0.2479, 0.0199, -0.2144]) tensor([0.3495, 0.1947, 0.2545, 0.2013]) -Greedy action tensor([ 0.1887, -0.1973, -0.0240, -0.2936]) tensor([0.3220, 0.2189, 0.2603, 0.1988]) -Greedy action tensor([ 0.2162, 0.2534, 0.2165, -0.2587]) tensor([0.2732, 0.2836, 0.2733, 0.1699]) -Greedy action tensor([ 0.5863, -0.3679, -0.1647, -0.2956]) tensor([0.4403, 0.1696, 0.2078, 0.1823]) -Greedy action tensor([ 0.4019, -0.2412, -0.0169, -0.3682]) tensor([0.3779, 0.1986, 0.2486, 0.1749]) -Greedy action tensor([ 0.4973, 0.0105, -0.0834, -0.0972]) tensor([0.3669, 0.2255, 0.2053, 0.2024]) -Greedy action tensor([ 0.4332, 0.1040, -0.0422, -0.1341]) tensor([0.3439, 0.2474, 0.2138, 0.1950]) -Greedy action tensor([ 0.5531, -0.2582, -0.0602, -0.1966]) tensor([0.4068, 0.1807, 0.2203, 0.1922]) -Greedy action tensor([ 0.4471, 0.4601, -0.1295, -0.4620]) tensor([0.3358, 0.3402, 0.1887, 0.1353]) -Greedy action tensor([ 0.6573, -0.5771, -0.0981, -0.5651]) tensor([0.4865, 0.1416, 0.2286, 0.1433]) -Greedy action tensor([ 0.8552, -0.3955, -0.0398, -0.5342]) tensor([0.5144, 0.1473, 0.2102, 0.1282]) -Greedy action tensor([ 0.8842, -0.7905, -0.0288, -0.3660]) tensor([0.5333, 0.0999, 0.2140, 0.1528]) -Greedy action tensor([ 0.8616, -0.4365, 0.0295, -0.6898]) tensor([0.5208, 0.1422, 0.2266, 0.1104]) -Greedy action tensor([ 0.5066, -0.2722, 0.1864, -0.4421]) tensor([0.3888, 0.1784, 0.2822, 0.1505]) -Greedy action tensor([ 0.9919, -0.5995, -0.1269, -0.5167]) tensor([0.5709, 0.1163, 0.1865, 0.1263]) -Greedy action tensor([ 0.8021, -0.8770, 0.0857, -0.6294]) tensor([0.5225, 0.0975, 0.2552, 0.1248]) -Greedy action tensor([ 0.6297, -0.4277, 0.0821, -0.3401]) tensor([0.4339, 0.1507, 0.2509, 0.1645]) -Greedy action tensor([ 0.7540, -0.5688, -0.1021, -0.3840]) tensor([0.4971, 0.1324, 0.2112, 0.1593]) -Greedy action tensor([ 0.7680, -0.2618, -0.2771, -0.3136]) tensor([0.4883, 0.1744, 0.1717, 0.1656]) -Greedy action tensor([ 0.9224, -0.4110, -0.0634, -0.3893]) tensor([0.5246, 0.1383, 0.1958, 0.1413]) -Greedy action tensor([ 0.9118, -0.6219, -0.0476, -0.5824]) tensor([0.5484, 0.1183, 0.2101, 0.1231]) -Greedy action tensor([ 0.6610, -0.2940, 0.0575, -0.3932]) tensor([0.4386, 0.1688, 0.2398, 0.1528]) -Greedy action tensor([ 0.6335, -0.0702, 0.1118, -0.2069]) tensor([0.3968, 0.1963, 0.2355, 0.1713]) -Greedy action tensor([ 0.5303, -0.3593, -0.0890, -0.4650]) tensor([0.4313, 0.1772, 0.2322, 0.1594]) -Greedy action tensor([ 0.6484, -0.3657, -0.0952, -0.4598]) tensor([0.4612, 0.1673, 0.2193, 0.1523]) -Greedy action tensor([ 0.4707, -0.2975, -0.0838, -0.2271]) tensor([0.3943, 0.1829, 0.2265, 0.1963]) -Greedy action tensor([ 1.0983, -0.5673, 0.1476, -0.7405]) tensor([0.5765, 0.1090, 0.2228, 0.0917]) -Greedy action tensor([ 0.9316, -0.1650, -0.0395, -0.5368]) tensor([0.5147, 0.1719, 0.1949, 0.1185]) -Greedy action tensor([ 0.5095, -0.0140, -0.0973, -0.1876]) tensor([0.3794, 0.2248, 0.2068, 0.1890]) -Greedy action tensor([ 0.8221, 0.1803, -0.2147, -0.4075]) tensor([0.4601, 0.2422, 0.1632, 0.1345]) -Greedy action tensor([ 0.5773, -0.0290, 0.0067, -0.2790]) tensor([0.3944, 0.2151, 0.2229, 0.1675]) -Greedy action tensor([ 0.8064, -0.4839, 0.0105, -0.5354]) tensor([0.5031, 0.1384, 0.2270, 0.1315]) -Greedy action tensor([ 0.1960, -0.1454, -0.0842, -0.1704]) tensor([0.3165, 0.2250, 0.2391, 0.2194]) -Greedy action tensor([ 0.5225, -0.3537, -0.2065, -0.3269]) tensor([0.4298, 0.1790, 0.2074, 0.1838]) -Greedy action tensor([ 0.4461, -0.2011, -0.0660, -0.3912]) tensor([0.3913, 0.2048, 0.2345, 0.1694]) -Greedy action tensor([ 0.7678, -0.8118, 0.0471, -0.7877]) tensor([0.5253, 0.1083, 0.2555, 0.1109]) -Greedy action tensor([ 0.4811, -0.1916, -0.0633, -0.4390]) tensor([0.4018, 0.2050, 0.2331, 0.1601]) -Greedy action tensor([ 0.7759, -0.8996, -0.1042, -0.3349]) tensor([0.5178, 0.0969, 0.2147, 0.1705]) -Greedy action tensor([ 0.6569, -0.4770, 0.0095, -0.5448]) tensor([0.4660, 0.1500, 0.2439, 0.1401]) -Greedy action tensor([ 0.3756, -0.3719, -0.1190, -0.6043]) tensor([0.4067, 0.1926, 0.2480, 0.1527]) -Greedy action tensor([ 0.3325, 0.0763, -0.1297, -0.1996]) tensor([0.3343, 0.2587, 0.2106, 0.1964]) -Greedy action tensor([ 0.4608, -0.1919, -0.0696, -0.2110]) tensor([0.3817, 0.1987, 0.2246, 0.1950]) -Greedy action tensor([ 0.6923, -0.5517, 0.0376, -0.4197]) tensor([0.4680, 0.1349, 0.2432, 0.1539]) -Greedy action tensor([ 0.4612, -0.0876, -0.0945, -0.1110]) tensor([0.3682, 0.2127, 0.2113, 0.2078]) -Greedy action tensor([ 0.4124, -0.0920, -0.0312, -0.3092]) tensor([0.3661, 0.2211, 0.2349, 0.1779]) -Greedy action tensor([ 0.3680, -0.0019, -0.1215, -0.1894]) tensor([0.3477, 0.2402, 0.2131, 0.1991]) -Greedy action tensor([ 0.5922, -0.1871, 0.0557, -0.4222]) tensor([0.4156, 0.1906, 0.2430, 0.1507]) -Greedy action tensor([ 0.4944, 0.0177, 0.0112, -0.0921]) tensor([0.3579, 0.2222, 0.2208, 0.1991]) -Greedy action tensor([ 0.3195, 0.0781, 0.0043, -0.1930]) tensor([0.3211, 0.2522, 0.2343, 0.1923]) -Greedy action tensor([ 0.7084, -0.5940, -0.0654, -0.3927]) tensor([0.4841, 0.1316, 0.2233, 0.1610]) -Greedy action tensor([ 0.3892, -0.5179, -0.1655, -0.2411]) tensor([0.3984, 0.1608, 0.2287, 0.2121]) -Greedy action tensor([ 0.6268, -0.4221, -0.0587, -0.3247]) tensor([0.4464, 0.1564, 0.2249, 0.1724]) -Greedy action tensor([ 0.5086, 0.0531, -0.0100, -0.2281]) tensor([0.3692, 0.2342, 0.2198, 0.1768]) -Greedy action tensor([ 0.5763, -0.1049, 0.0364, -0.2422]) tensor([0.3953, 0.2000, 0.2304, 0.1744]) -Greedy action tensor([ 0.1930, -1.2446, -0.2934, 0.2099]) tensor([0.3485, 0.0828, 0.2143, 0.3544]) -Greedy action tensor([1.1018, 0.2710, 0.6284, 0.2542]) tensor([0.4021, 0.1752, 0.2504, 0.1723]) -Greedy action tensor([ 0.9348, -1.2730, -0.2561, 0.7123]) tensor([0.4516, 0.0496, 0.1373, 0.3615]) -Greedy action tensor([ 0.3584, -2.0051, 0.7125, 1.2344]) tensor([0.2032, 0.0191, 0.2896, 0.4880]) -Greedy action tensor([-0.2762, 0.9550, -0.7652, 1.3963]) tensor([0.0965, 0.3305, 0.0592, 0.5138]) -Greedy action tensor([0.0763, 0.0442, 0.7704, 0.0206]) tensor([0.2034, 0.1970, 0.4072, 0.1924]) -Greedy action tensor([ 0.2149, 0.1515, -0.4247, 1.6952]) tensor([0.1458, 0.1368, 0.0769, 0.6405]) -Greedy action tensor([-0.4579, 0.2704, 0.0814, 1.2400]) tensor([0.0976, 0.2021, 0.1673, 0.5330]) -Greedy action tensor([ 1.1508, -0.7679, 1.1586, 0.5993]) tensor([0.3662, 0.0538, 0.3691, 0.2110]) -Greedy action tensor([0.9576, 0.1167, 0.7463, 1.4462]) tensor([0.2583, 0.1114, 0.2091, 0.4211]) -Greedy action tensor([ 2.1542, -0.9500, 1.0663, 1.6863]) tensor([0.4980, 0.0223, 0.1678, 0.3119]) -Greedy action tensor([ 1.9866, -0.6677, 0.9191, 1.1439]) tensor([0.5421, 0.0381, 0.1864, 0.2334]) -Greedy action tensor([1.2985, 0.2548, 1.4927, 0.0623]) tensor([0.3500, 0.1233, 0.4251, 0.1017]) -Greedy action tensor([-1.0243, -1.0016, 1.7740, -1.2698]) tensor([0.0520, 0.0532, 0.8541, 0.0407]) -Greedy action tensor([ 0.1196, -0.1197, -1.2606, 1.1811]) tensor([0.2029, 0.1597, 0.0510, 0.5864]) -Greedy action tensor([ 1.8825, -0.8000, 1.8877, 1.1767]) tensor([0.3895, 0.0266, 0.3915, 0.1923]) -Greedy action tensor([ 0.4513, 0.3979, -0.3681, 0.1122]) tensor([0.3225, 0.3057, 0.1421, 0.2297]) -Greedy action tensor([ 2.1240, -0.9404, 1.0319, 0.0454]) tensor([0.6634, 0.0310, 0.2226, 0.0830]) -Greedy action tensor([0.6756, 0.8189, 0.1510, 1.1996]) tensor([0.2255, 0.2602, 0.1334, 0.3808]) -Greedy action tensor([ 0.2001, -0.7936, -0.9028, 0.2655]) tensor([0.3611, 0.1337, 0.1198, 0.3854]) -Greedy action tensor([-0.7023, -0.2040, 0.1373, 0.5944]) tensor([0.1160, 0.1910, 0.2687, 0.4243]) -Greedy action tensor([ 1.2290, 0.0484, 1.8932, -0.6103]) tensor([0.2934, 0.0901, 0.5699, 0.0466]) -Greedy action tensor([1.6898, 0.4877, 0.6798, 1.0629]) tensor([0.4548, 0.1367, 0.1656, 0.2429]) -Greedy action tensor([ 1.1383, -2.4963, 0.2492, 0.3998]) tensor([0.5221, 0.0138, 0.2146, 0.2495]) -Greedy action tensor([ 1.0266, -0.4661, -0.1439, 0.2870]) tensor([0.4969, 0.1117, 0.1542, 0.2372]) -Greedy action tensor([ 1.4215, -1.7716, 0.5603, 0.8025]) tensor([0.4995, 0.0205, 0.2111, 0.2690]) -Greedy action tensor([ 1.5936, -0.3379, -0.3947, 0.7597]) tensor([0.5827, 0.0844, 0.0798, 0.2531]) -Greedy action tensor([ 1.6216, -0.6205, -0.4312, 1.1557]) tensor([0.5370, 0.0570, 0.0689, 0.3370]) -Greedy action tensor([ 0.3996, -0.0334, 0.7466, 1.0948]) tensor([0.1973, 0.1280, 0.2792, 0.3955]) -Greedy action tensor([ 0.1303, -1.9410, 0.3195, -0.0256]) tensor([0.3135, 0.0395, 0.3788, 0.2682]) -Greedy action tensor([-0.8590, -0.1196, 0.9505, -0.3183]) tensor([0.0916, 0.1918, 0.5593, 0.1573]) -Greedy action tensor([-0.1406, 0.6825, 0.5500, 0.1467]) tensor([0.1514, 0.3448, 0.3020, 0.2018]) -Greedy action tensor([ 1.1450, -0.0160, 0.5102, 1.1831]) tensor([0.3470, 0.1087, 0.1839, 0.3604]) -Greedy action tensor([ 1.2198, -0.2806, 1.7378, 0.9707]) tensor([0.2717, 0.0606, 0.4560, 0.2117]) -Greedy action tensor([ 1.1331, -1.2101, -0.4970, 1.3223]) tensor([0.4000, 0.0384, 0.0784, 0.4833]) -Greedy action tensor([0.6539, 0.1342, 0.2100, 1.1931]) tensor([0.2531, 0.1505, 0.1624, 0.4340]) -Greedy action tensor([ 0.4107, -0.3317, 1.6083, 1.0520]) tensor([0.1495, 0.0712, 0.4953, 0.2840]) -Greedy action tensor([ 0.1695, 0.7340, -0.4345, -0.5039]) tensor([0.2621, 0.4609, 0.1433, 0.1337]) -Greedy action tensor([-0.3531, -0.2087, 0.5774, -0.0253]) tensor([0.1645, 0.1900, 0.4171, 0.2283]) -Greedy action tensor([ 0.4725, -1.7636, -0.1918, 0.3617]) tensor([0.3973, 0.0425, 0.2045, 0.3557]) -Greedy action tensor([0.0123, 0.7674, 0.0372, 0.0614]) tensor([0.1922, 0.4089, 0.1970, 0.2019]) -Greedy action tensor([-0.2320, 0.2125, -0.6075, -0.2319]) tensor([0.2355, 0.3673, 0.1618, 0.2355]) -Greedy action tensor([-0.1260, -0.0754, 1.5667, 0.1228]) tensor([0.1140, 0.1200, 0.6197, 0.1463]) -Greedy action tensor([ 1.4411, -0.4423, 1.6510, 1.3032]) tensor([0.3070, 0.0467, 0.3788, 0.2675]) -Greedy action tensor([ 1.2583, -0.2784, 0.6256, 1.9381]) tensor([0.2688, 0.0578, 0.1428, 0.5306]) -Greedy action tensor([ 1.2094, -0.5806, 1.4807, 0.6409]) tensor([0.3284, 0.0548, 0.4308, 0.1860]) -Greedy action tensor([-0.2861, -1.8413, -0.3551, 0.1803]) tensor([0.2675, 0.0565, 0.2496, 0.4264]) -Greedy action tensor([ 0.7750, 0.0437, -0.3442, 1.1125]) tensor([0.3116, 0.1500, 0.1017, 0.4367]) -Greedy action tensor([-0.1653, 0.7986, 1.2141, 0.3603]) tensor([0.1077, 0.2824, 0.4278, 0.1822]) -Greedy action tensor([-0.0792, 0.0701, -0.2830, -0.5765]) tensor([0.2790, 0.3239, 0.2275, 0.1697]) -Greedy action tensor([-0.1035, -0.2245, 0.4462, 0.6709]) tensor([0.1728, 0.1531, 0.2993, 0.3748]) -Greedy action tensor([-0.3680, 0.7193, 0.0550, -1.0239]) tensor([0.1663, 0.4934, 0.2539, 0.0863]) -Greedy action tensor([0.4360, 0.2984, 0.2038, 0.1530]) tensor([0.2926, 0.2550, 0.2320, 0.2205]) -Greedy action tensor([ 0.5242, -0.7340, 0.8943, 0.5562]) tensor([0.2656, 0.0755, 0.3846, 0.2743]) -Greedy action tensor([-0.5362, -0.7863, -0.1182, 1.2671]) tensor([0.1068, 0.0831, 0.1622, 0.6480]) -Greedy action tensor([ 1.2881, -0.3006, 0.4219, 0.7022]) tensor([0.4584, 0.0936, 0.1928, 0.2552]) -Greedy action tensor([ 0.7492, -0.0170, 1.7127, 0.5569]) tensor([0.2036, 0.0946, 0.5337, 0.1680]) -Greedy action tensor([-0.1651, 0.5367, 0.4290, -0.1245]) tensor([0.1704, 0.3437, 0.3086, 0.1774]) -Greedy action tensor([ 0.2017, -0.3303, 1.2567, 0.0193]) tensor([0.1889, 0.1110, 0.5426, 0.1574]) -Greedy action tensor([-0.4255, -1.5382, -0.4946, 0.4709]) tensor([0.2122, 0.0697, 0.1980, 0.5200]) -Greedy action tensor([ 1.5752, -0.9564, 0.7464, 1.1775]) tensor([0.4570, 0.0363, 0.1995, 0.3071]) -Greedy action tensor([-1.3566, -0.2948, -0.5683, -0.4534]) tensor([0.1168, 0.3378, 0.2570, 0.2883]) -Greedy action tensor([-0.1957, -0.1101, -0.8103, 2.1127]) tensor([0.0788, 0.0859, 0.0426, 0.7927]) -Greedy action tensor([0.2527, 0.0409, 0.8920, 0.1208]) tensor([0.2183, 0.1766, 0.4137, 0.1913]) -Greedy action tensor([0.7215, 0.2016, 1.1553, 1.3678]) tensor([0.1982, 0.1178, 0.3058, 0.3782]) -Greedy action tensor([-0.6999, -1.7663, 0.0643, 1.5054]) tensor([0.0796, 0.0274, 0.1709, 0.7221]) -Greedy action tensor([1.1209, 0.7041, 0.4788, 0.0571]) tensor([0.3952, 0.2605, 0.2079, 0.1364]) -Greedy action tensor([ 1.2564, -0.0019, -0.2366, 1.0126]) tensor([0.4362, 0.1239, 0.0980, 0.3418]) -Greedy action tensor([0.1498, 0.1505, 0.1604, 0.0892]) tensor([0.2530, 0.2532, 0.2557, 0.2381]) -Greedy action tensor([ 0.3394, -0.2639, 0.6751, 1.3273]) tensor([0.1776, 0.0971, 0.2484, 0.4769]) -Greedy action tensor([-0.5720, -0.2976, -0.8090, 0.8275]) tensor([0.1397, 0.1838, 0.1102, 0.5663]) -Greedy action tensor([-0.1213, -0.4012, 0.0665, -0.6122]) tensor([0.2798, 0.2115, 0.3376, 0.1712]) -Greedy action tensor([-0.5400, -2.1165, 0.5219, 0.4153]) tensor([0.1493, 0.0309, 0.4318, 0.3881]) -Greedy action tensor([ 0.7099, -0.6608, -0.5241, 0.4094]) tensor([0.4375, 0.1111, 0.1274, 0.3240]) -Greedy action tensor([1.6173, 0.1065, 0.4847, 1.1135]) tensor([0.4657, 0.1028, 0.1501, 0.2814]) -Greedy action tensor([1.2294, 0.3590, 0.2952, 0.6729]) tensor([0.4193, 0.1756, 0.1647, 0.2403]) -Greedy action tensor([ 0.4045, -0.1991, 0.6542, -0.6418]) tensor([0.3143, 0.1719, 0.4034, 0.1104]) -Greedy action tensor([ 2.0199, -0.1581, 0.7238, 1.3431]) tensor([0.5277, 0.0598, 0.1444, 0.2682]) -Greedy action tensor([ 1.8568, -0.2861, 0.5286, 1.3766]) tensor([0.4998, 0.0586, 0.1324, 0.3092]) -Greedy action tensor([ 1.6215, -0.4669, 0.7487, 0.3718]) tensor([0.5470, 0.0678, 0.2285, 0.1568]) -Greedy action tensor([-0.4648, -0.4479, 0.8638, -0.3045]) tensor([0.1435, 0.1460, 0.5420, 0.1685]) -Greedy action tensor([-1.3286, -1.2488, 1.3734, -0.4295]) tensor([0.0514, 0.0557, 0.7666, 0.1263]) -Greedy action tensor([ 1.2805e+00, -8.4812e-01, 1.5181e-04, 1.5593e-01]) tensor([0.5808, 0.0691, 0.1614, 0.1886]) -Greedy action tensor([ 1.0676, -0.6377, -0.4066, 0.5319]) tensor([0.5010, 0.0910, 0.1147, 0.2932]) -Greedy action tensor([ 1.0112, -0.2979, -0.2623, 0.2611]) tensor([0.4945, 0.1335, 0.1384, 0.2336]) -Greedy action tensor([ 1.3189, -0.7575, -0.1537, 0.0296]) tensor([0.6134, 0.0769, 0.1407, 0.1690]) -Greedy action tensor([ 1.3308, -0.4558, -0.3518, 0.1803]) tensor([0.5988, 0.1003, 0.1113, 0.1895]) -Greedy action tensor([ 1.9319, -0.7413, -0.4262, 0.2017]) tensor([0.7458, 0.0515, 0.0706, 0.1322]) -Greedy action tensor([ 1.4941, -0.1605, -0.2451, 0.2789]) tensor([0.6011, 0.1149, 0.1056, 0.1783]) -Greedy action tensor([ 1.0155, -0.1013, 0.3389, -0.1961]) tensor([0.4687, 0.1534, 0.2383, 0.1396]) -Greedy action tensor([ 0.6832, -0.3738, 0.1637, -0.1478]) tensor([0.4205, 0.1461, 0.2501, 0.1832]) -Greedy action tensor([ 1.7245, -0.5063, -0.2945, 0.5584]) tensor([0.6444, 0.0692, 0.0856, 0.2008]) -Greedy action tensor([ 1.3611, -0.7637, -0.2738, 0.3537]) tensor([0.5954, 0.0711, 0.1161, 0.2174]) -Greedy action tensor([ 1.4073, -0.5079, -0.3421, 0.2869]) tensor([0.6070, 0.0894, 0.1056, 0.1980]) -Greedy action tensor([ 0.4580, -0.1559, -0.2324, -0.0488]) tensor([0.3781, 0.2046, 0.1895, 0.2277]) -Greedy action tensor([ 0.9508, -0.5162, -0.1574, 0.1949]) tensor([0.4925, 0.1136, 0.1626, 0.2313]) -Greedy action tensor([ 1.1255, -0.7888, -0.2345, 0.4565]) tensor([0.5218, 0.0769, 0.1339, 0.2673]) -Greedy action tensor([ 1.2885, -0.5224, 0.0176, -0.1675]) tensor([0.5962, 0.0975, 0.1673, 0.1390]) -Greedy action tensor([ 0.8180, -0.6845, -0.1402, 0.1471]) tensor([0.4723, 0.1051, 0.1811, 0.2415]) -Greedy action tensor([ 1.0181, -0.2055, -0.2658, 0.2597]) tensor([0.4903, 0.1442, 0.1358, 0.2297]) -Greedy action tensor([ 1.9113, -1.0333, -0.3076, 0.6866]) tensor([0.6872, 0.0362, 0.0747, 0.2019]) -Greedy action tensor([ 0.9139, -0.3910, -0.1577, -0.1455]) tensor([0.5101, 0.1383, 0.1747, 0.1768]) -Greedy action tensor([ 0.6096, -0.1308, -0.8046, -0.3128]) tensor([0.4722, 0.2252, 0.1148, 0.1877]) -Greedy action tensor([ 0.7461, -0.2861, -0.1185, -0.2058]) tensor([0.4622, 0.1647, 0.1947, 0.1784]) -Greedy action tensor([ 1.3186, -0.8070, 0.0213, 0.5687]) tensor([0.5362, 0.0640, 0.1465, 0.2533]) -Greedy action tensor([ 1.1519, -0.3785, -0.1525, 0.1666]) tensor([0.5373, 0.1163, 0.1458, 0.2006]) -Greedy action tensor([ 1.4115, -0.3224, -0.4503, 0.5296]) tensor([0.5727, 0.1011, 0.0890, 0.2371]) -Greedy action tensor([ 0.3995, 0.0314, 0.1849, -0.0330]) tensor([0.3177, 0.2198, 0.2563, 0.2061]) -Greedy action tensor([ 1.5507, 0.2580, -0.3812, -0.4133]) tensor([0.6412, 0.1760, 0.0929, 0.0900]) -Greedy action tensor([ 1.0107, -0.0377, -0.3136, 0.1617]) tensor([0.4892, 0.1714, 0.1301, 0.2093]) -Greedy action tensor([ 2.0063, -0.3702, -0.4260, 0.5880]) tensor([0.7028, 0.0653, 0.0617, 0.1702]) -Greedy action tensor([ 1.3987, -0.2154, -0.5028, 0.4398]) tensor([0.5775, 0.1150, 0.0862, 0.2213]) -Greedy action tensor([ 1.0219, -0.2822, -0.1495, -0.0123]) tensor([0.5163, 0.1401, 0.1600, 0.1836]) -Greedy action tensor([ 1.0933, -0.3085, -0.2420, 0.2672]) tensor([0.5136, 0.1264, 0.1351, 0.2248]) -Greedy action tensor([ 0.7042, -0.3589, -0.1691, 0.1432]) tensor([0.4285, 0.1480, 0.1789, 0.2445]) -Greedy action tensor([ 2.1716, -0.7695, -0.1914, 0.3176]) tensor([0.7671, 0.0405, 0.0722, 0.1201]) -Greedy action tensor([ 1.3365, -0.2651, -0.1429, 0.5759]) tensor([0.5272, 0.1063, 0.1201, 0.2464]) -Greedy action tensor([ 1.0345, -0.6047, -0.5379, 0.6396]) tensor([0.4818, 0.0935, 0.1000, 0.3246]) -Greedy action tensor([ 1.0113, -0.3988, -0.2148, 0.2645]) tensor([0.4972, 0.1214, 0.1459, 0.2356]) -Greedy action tensor([ 0.6469, 0.0714, 0.0086, -0.0823]) tensor([0.3887, 0.2186, 0.2053, 0.1874]) -Greedy action tensor([ 0.8772, -0.6058, -0.0812, -0.0946]) tensor([0.5028, 0.1141, 0.1928, 0.1903]) -Greedy action tensor([ 1.5534, -0.0582, 0.2566, 0.1687]) tensor([0.5803, 0.1158, 0.1586, 0.1453]) -Greedy action tensor([ 0.3120, 0.1131, 0.1089, -0.1041]) tensor([0.3035, 0.2487, 0.2477, 0.2002]) -Greedy action tensor([ 0.9689, -0.5344, -0.0628, 0.1117]) tensor([0.4992, 0.1110, 0.1779, 0.2119]) -Greedy action tensor([ 1.3280, -0.6974, -0.1702, 0.5823]) tensor([0.5465, 0.0721, 0.1222, 0.2592]) -Greedy action tensor([ 1.4051, -0.5878, -0.1787, 0.4950]) tensor([0.5734, 0.0782, 0.1177, 0.2308]) -Greedy action tensor([ 1.2256, -0.1634, -0.0496, 0.0472]) tensor([0.5445, 0.1358, 0.1521, 0.1676]) -Greedy action tensor([ 0.8592, -0.4315, -0.3673, 0.0610]) tensor([0.4954, 0.1363, 0.1453, 0.2230]) -Greedy action tensor([ 0.9139, -0.9701, -0.3985, 0.5579]) tensor([0.4713, 0.0716, 0.1269, 0.3302]) -Greedy action tensor([ 1.6417, -0.3824, -0.2197, 0.2606]) tensor([0.6498, 0.0859, 0.1010, 0.1633]) -Greedy action tensor([ 1.0606, -0.5142, -0.2852, 0.4462]) tensor([0.4979, 0.1031, 0.1296, 0.2694]) -Greedy action tensor([ 0.8874, -0.4348, -0.0906, 0.0710]) tensor([0.4797, 0.1279, 0.1804, 0.2120]) -Greedy action tensor([ 1.3849, -0.3800, -0.5600, -0.2849]) tensor([0.6656, 0.1139, 0.0952, 0.1253]) -Greedy action tensor([ 0.5019, -0.0933, 0.0039, -0.1167]) tensor([0.3707, 0.2044, 0.2253, 0.1997]) -Greedy action tensor([ 1.2016, -0.3835, -0.3396, 0.4913]) tensor([0.5234, 0.1073, 0.1121, 0.2573]) -Greedy action tensor([ 1.0265, -0.2858, -0.0534, 0.4659]) tensor([0.4588, 0.1235, 0.1558, 0.2619]) -Greedy action tensor([ 0.8878, -0.5592, -0.0840, 0.5697]) tensor([0.4271, 0.1005, 0.1616, 0.3107]) -Greedy action tensor([ 1.8153, -0.6789, -0.5598, 0.2071]) tensor([0.7268, 0.0600, 0.0676, 0.1455]) -Greedy action tensor([ 1.3544, -0.1864, -0.3684, 0.4453]) tensor([0.5569, 0.1193, 0.0994, 0.2244]) -Greedy action tensor([ 1.2188, 0.0417, -0.2825, 0.3528]) tensor([0.5124, 0.1579, 0.1142, 0.2155]) -Greedy action tensor([ 1.7093, -0.6030, -0.0649, 0.2720]) tensor([0.6639, 0.0658, 0.1126, 0.1577]) -Greedy action tensor([ 1.2498, -0.5355, -0.2550, 0.4905]) tensor([0.5383, 0.0903, 0.1195, 0.2519]) -Greedy action tensor([1.3285, 0.0749, 0.1139, 0.0500]) tensor([0.5374, 0.1534, 0.1595, 0.1496]) -Greedy action tensor([ 1.6143, -0.2878, -0.2188, 0.2960]) tensor([0.6342, 0.0947, 0.1014, 0.1697]) -Greedy action tensor([ 2.0541, -0.9488, -0.4476, 0.2989]) tensor([0.7666, 0.0381, 0.0628, 0.1325]) -Greedy action tensor([ 1.1632, -0.2276, -0.4596, 0.2544]) tensor([0.5408, 0.1346, 0.1067, 0.2179]) -Greedy action tensor([ 0.8273, -0.0569, -0.0747, -0.3392]) tensor([0.4694, 0.1939, 0.1905, 0.1462]) -Greedy action tensor([ 1.1406, -0.3105, -0.2037, 0.0389]) tensor([0.5473, 0.1282, 0.1427, 0.1818]) -Greedy action tensor([ 1.2016, 0.0121, 0.0580, -0.2991]) tensor([0.5417, 0.1649, 0.1726, 0.1208]) -Greedy action tensor([ 1.6860, -0.8546, -0.4059, 0.4732]) tensor([0.6668, 0.0526, 0.0823, 0.1983]) -Greedy action tensor([ 0.8561, -0.2677, -0.0202, -0.0688]) tensor([0.4677, 0.1520, 0.1947, 0.1855]) -Greedy action tensor([ 2.1990, -1.2482, -0.1221, 1.0587]) tensor([0.6898, 0.0220, 0.0677, 0.2205]) -Greedy action tensor([ 1.2861, 0.0580, -0.3131, 0.1222]) tensor([0.5534, 0.1620, 0.1118, 0.1728]) -Greedy action tensor([ 1.3889, -0.2580, -0.3460, 0.1856]) tensor([0.5991, 0.1154, 0.1057, 0.1798]) -Greedy action tensor([ 1.3499, -0.4947, -0.1597, 0.2093]) tensor([0.5887, 0.0931, 0.1301, 0.1882]) -Greedy action tensor([ 1.0387, -0.7799, -0.2404, 0.3306]) tensor([0.5173, 0.0839, 0.1440, 0.2548]) -Greedy action tensor([ 1.5092, 0.0046, -0.5219, 0.1649]) tensor([0.6196, 0.1376, 0.0813, 0.1615]) -Greedy action tensor([ 1.6546, -0.2801, -0.2044, 0.1158]) tensor([0.6601, 0.0954, 0.1029, 0.1417]) -Greedy action tensor([ 1.3354, -1.0049, -0.2402, 0.3577]) tensor([0.5955, 0.0573, 0.1232, 0.2240]) -Greedy action tensor([ 2.0447, -0.2181, -0.1065, 0.6520]) tensor([0.6808, 0.0708, 0.0792, 0.1691]) -Greedy action tensor([ 1.4569, -0.4815, -0.3753, 0.0035]) tensor([0.6503, 0.0936, 0.1041, 0.1520]) -Greedy action tensor([ 0.9929, -0.4890, -0.2908, 0.5771]) tensor([0.4621, 0.1050, 0.1280, 0.3049]) -Greedy action tensor([ 0.7325, -0.6782, -0.2125, 0.3113]) tensor([0.4369, 0.1066, 0.1698, 0.2867]) -Greedy action tensor([-1.8756, -0.4955, 0.6820, -0.1089]) tensor([0.0421, 0.1675, 0.5438, 0.2466]) -Greedy action tensor([-0.4709, 0.0167, 1.0461, 1.6399]) tensor([0.0648, 0.1055, 0.2952, 0.5346]) -Greedy action tensor([-1.9238, -0.4170, 0.6522, -0.1646]) tensor([0.0409, 0.1844, 0.5373, 0.2374]) -Greedy action tensor([-1.7434, -0.4435, 0.6135, 0.0269]) tensor([0.0474, 0.1739, 0.5004, 0.2783]) -Greedy action tensor([-1.8529, -0.4528, 0.6221, -0.1291]) tensor([0.0444, 0.1799, 0.5271, 0.2487]) -Greedy action tensor([-1.8972, -0.3896, 0.6482, -0.1363]) tensor([0.0415, 0.1875, 0.5294, 0.2416]) -Greedy action tensor([-1.3545, -0.7582, 0.2857, -0.1304]) tensor([0.0879, 0.1596, 0.4534, 0.2991]) -Greedy action tensor([-1.4608, -0.5726, 0.5200, 0.2456]) tensor([0.0618, 0.1502, 0.4478, 0.3403]) -Greedy action tensor([-1.3856, -0.3236, 0.9817, 1.0257]) tensor([0.0389, 0.1125, 0.4150, 0.4336]) -Greedy action tensor([-1.6291, -0.4804, 0.4982, -0.0094]) tensor([0.0568, 0.1792, 0.4769, 0.2871]) -Greedy action tensor([-1.6732, -0.5118, 0.5672, 0.0734]) tensor([0.0517, 0.1653, 0.4862, 0.2967]) -Greedy action tensor([-1.9190, -0.1632, 0.6018, -0.1548]) tensor([0.0399, 0.2309, 0.4963, 0.2329]) -Greedy action tensor([-1.8172, -0.4795, 0.6007, -0.0901]) tensor([0.0462, 0.1759, 0.5182, 0.2597]) -Greedy action tensor([-1.7685, -0.5282, 0.6520, -0.0261]) tensor([0.0467, 0.1614, 0.5253, 0.2666]) -Greedy action tensor([-1.7778, -0.2074, 0.5610, -0.0754]) tensor([0.0462, 0.2219, 0.4786, 0.2533]) -Greedy action tensor([-1.6905, -0.3179, 0.5816, 0.0824]) tensor([0.0487, 0.1922, 0.4724, 0.2868]) -Greedy action tensor([-0.5795, 0.0228, 1.2426, 1.4459]) tensor([0.0603, 0.1101, 0.3728, 0.4568]) -Greedy action tensor([-0.2245, 0.0259, 0.9829, 1.5414]) tensor([0.0871, 0.1119, 0.2915, 0.5095]) -Greedy action tensor([-1.7536, -0.5011, 0.5583, -0.0524]) tensor([0.0498, 0.1743, 0.5028, 0.2730]) -Greedy action tensor([-1.8411, -0.2640, 0.5881, -0.1501]) tensor([0.0442, 0.2140, 0.5019, 0.2399]) -Greedy action tensor([-1.8978, -0.4628, 0.6977, -0.1225]) tensor([0.0408, 0.1714, 0.5470, 0.2409]) -Greedy action tensor([-0.4039, -0.1435, 0.1755, 0.2631]) tensor([0.1658, 0.2151, 0.2960, 0.3231]) -Greedy action tensor([-1.7014, -0.4640, 0.5505, -0.0352]) tensor([0.0520, 0.1791, 0.4939, 0.2750]) -Greedy action tensor([-0.2498, -0.3542, 0.1960, 0.2384]) tensor([0.1964, 0.1769, 0.3067, 0.3200]) -Greedy action tensor([-1.8555, -0.2297, 0.6271, -0.2147]) tensor([0.0431, 0.2189, 0.5157, 0.2223]) -Greedy action tensor([-1.8310, -0.3788, 0.6014, -0.0891]) tensor([0.0447, 0.1910, 0.5091, 0.2552]) -Greedy action tensor([-0.2419, -0.1798, 1.0983, 1.5846]) tensor([0.0827, 0.0880, 0.3158, 0.5136]) -Greedy action tensor([-1.8663, -0.4730, 0.6768, -0.0738]) tensor([0.0421, 0.1696, 0.5355, 0.2528]) -Greedy action tensor([-0.8266, -0.8750, 0.8379, 1.4490]) tensor([0.0589, 0.0561, 0.3113, 0.5736]) -Greedy action tensor([-1.4314, -0.4835, 0.6626, 0.5029]) tensor([0.0537, 0.1386, 0.4360, 0.3717]) -Greedy action tensor([-1.2404, -0.6473, 0.4958, 0.3304]) tensor([0.0752, 0.1361, 0.4269, 0.3618]) -Greedy action tensor([-0.4330, -0.1751, 0.1836, 0.2315]) tensor([0.1642, 0.2125, 0.3042, 0.3191]) -Greedy action tensor([-1.7390, -0.4535, 0.5911, -0.1055]) tensor([0.0500, 0.1807, 0.5135, 0.2559]) -Greedy action tensor([-1.9187, -0.4416, 0.6535, -0.1673]) tensor([0.0413, 0.1807, 0.5403, 0.2378]) -Greedy action tensor([-1.1913, -0.6119, 0.3582, 0.0298]) tensor([0.0919, 0.1640, 0.4326, 0.3115]) -Greedy action tensor([-1.6942, -0.5163, 0.5359, -0.0331]) tensor([0.0532, 0.1726, 0.4944, 0.2799]) -Greedy action tensor([-1.8822, -0.4746, 0.6474, -0.1239]) tensor([0.0427, 0.1743, 0.5354, 0.2476]) -Greedy action tensor([-1.6431, -0.4623, 0.6795, 0.2881]) tensor([0.0468, 0.1525, 0.4777, 0.3230]) -Greedy action tensor([-1.1710, -0.6721, 0.3231, 0.0982]) tensor([0.0938, 0.1545, 0.4179, 0.3338]) -Greedy action tensor([-1.6032, 0.1162, 0.4020, 0.0581]) tensor([0.0519, 0.2896, 0.3854, 0.2732]) -Greedy action tensor([-1.7740, -0.2621, 0.5691, -0.0043]) tensor([0.0458, 0.2079, 0.4773, 0.2690]) -Greedy action tensor([-1.7672, -0.6717, -0.3415, -0.6470]) tensor([0.0891, 0.2666, 0.3709, 0.2733]) -Greedy action tensor([-1.3499, -0.5032, 0.4967, 0.4484]) tensor([0.0637, 0.1484, 0.4035, 0.3844]) -Greedy action tensor([-1.0981, -0.5083, 0.4112, 0.8131]) tensor([0.0710, 0.1280, 0.3211, 0.4799]) -Greedy action tensor([-1.9337, -0.4507, 0.6592, -0.1754]) tensor([0.0407, 0.1793, 0.5439, 0.2361]) -Greedy action tensor([-0.6681, -0.6461, 0.9797, 1.5359]) tensor([0.0614, 0.0628, 0.3192, 0.5566]) -Greedy action tensor([-0.9005, -0.5476, 0.3127, 0.2514]) tensor([0.1117, 0.1590, 0.3758, 0.3535]) -Greedy action tensor([-1.0911, -0.5687, 0.2411, 0.4189]) tensor([0.0909, 0.1532, 0.3444, 0.4114]) -Greedy action tensor([-2.0606, -0.9187, 0.5176, -0.0611]) tensor([0.0405, 0.1269, 0.5335, 0.2991]) -Greedy action tensor([-1.4927, -0.5253, 0.4328, 0.1306]) tensor([0.0643, 0.1691, 0.4408, 0.3258]) -Greedy action tensor([-1.6587, -0.5756, 0.0325, -0.3947]) tensor([0.0774, 0.2286, 0.4200, 0.2740]) -Greedy action tensor([-1.2020, -0.5516, 0.3058, 0.3911]) tensor([0.0810, 0.1551, 0.3657, 0.3982]) -Greedy action tensor([-1.8289, -0.4555, 0.7589, 0.1565]) tensor([0.0392, 0.1547, 0.5210, 0.2852]) -Greedy action tensor([-1.0109, -0.1656, 0.3821, -0.4397]) tensor([0.1096, 0.2552, 0.4412, 0.1940]) -Greedy action tensor([-1.8532, -0.4835, 0.6481, -0.0936]) tensor([0.0436, 0.1715, 0.5317, 0.2532]) -Greedy action tensor([-1.6379, -0.5278, 0.4970, 0.0181]) tensor([0.0564, 0.1712, 0.4770, 0.2955]) -Greedy action tensor([-1.5928, 0.1247, 0.4115, 0.1171]) tensor([0.0512, 0.2854, 0.3802, 0.2832]) -Greedy action tensor([-1.3573, 0.0749, 0.4934, 0.6875]) tensor([0.0519, 0.2172, 0.3301, 0.4008]) -Greedy action tensor([-1.4814, -0.5384, 0.4072, 0.1199]) tensor([0.0661, 0.1696, 0.4367, 0.3276]) -Greedy action tensor([-1.2539, -0.3349, 0.5091, 0.7690]) tensor([0.0592, 0.1484, 0.3450, 0.4474]) -Greedy action tensor([-1.7989, -0.4773, 0.6567, -0.0519]) tensor([0.0452, 0.1694, 0.5264, 0.2591]) -Greedy action tensor([-1.7898, -0.3919, 0.5684, -0.0822]) tensor([0.0473, 0.1915, 0.5002, 0.2610]) -Greedy action tensor([-1.7872, -0.4179, 0.5826, -0.0954]) tensor([0.0475, 0.1868, 0.5079, 0.2578]) -Greedy action tensor([-1.5526, -0.4213, 0.9024, 0.7349]) tensor([0.0391, 0.1211, 0.4550, 0.3848]) -Greedy action tensor([-0.3778, 0.1208, 0.9154, 1.6537]) tensor([0.0719, 0.1183, 0.2619, 0.5479]) -Greedy action tensor([-1.3678, -0.5695, 0.4649, 0.2119]) tensor([0.0698, 0.1551, 0.4363, 0.3388]) -Greedy action tensor([-1.9118, -0.4642, 0.6495, -0.1647]) tensor([0.0418, 0.1776, 0.5410, 0.2397]) -Greedy action tensor([-1.9227, -0.4245, 0.6524, -0.1706]) tensor([0.0410, 0.1836, 0.5388, 0.2366]) -Greedy action tensor([-1.3942, -0.4780, 0.4028, 0.2039]) tensor([0.0691, 0.1727, 0.4167, 0.3415]) -Greedy action tensor([-1.8359, -0.4377, 0.6417, -0.0746]) tensor([0.0439, 0.1777, 0.5229, 0.2555]) -Greedy action tensor([-1.2566, -0.5418, 0.2886, 0.3696]) tensor([0.0780, 0.1594, 0.3658, 0.3967]) -Greedy action tensor([-1.8992, -0.4416, 0.6448, -0.1540]) tensor([0.0421, 0.1809, 0.5359, 0.2411]) -Greedy action tensor([-1.7935, -0.4404, 0.5894, -0.0889]) tensor([0.0472, 0.1825, 0.5110, 0.2593]) -Greedy action tensor([-1.9427, -0.4800, 0.8939, 0.1018]) tensor([0.0332, 0.1434, 0.5667, 0.2566]) -Greedy action tensor([-1.7994, -0.4522, 0.6970, 0.0616]) tensor([0.0427, 0.1643, 0.5184, 0.2746]) -Greedy action tensor([-1.2253, -0.5914, 1.1139, 1.3281]) tensor([0.0383, 0.0722, 0.3973, 0.4922]) -Greedy action tensor([-1.7913, 0.1891, 0.4718, -0.0320]) tensor([0.0423, 0.3061, 0.4062, 0.2454]) -Greedy action tensor([-1.3107, -0.6489, 1.3208, 1.3568]) tensor([0.0320, 0.0621, 0.4448, 0.4611]) -Greedy action tensor([-1.6090, -0.4244, 0.6668, 0.3585]) tensor([0.0473, 0.1545, 0.4601, 0.3381]) -Greedy action tensor([-1.2289, -0.3991, 0.5292, 0.8688]) tensor([0.0580, 0.1330, 0.3365, 0.4725]) -Greedy action tensor([-1.8263, -0.4893, 0.6053, -0.1105]) tensor([0.0460, 0.1751, 0.5232, 0.2557]) -Greedy action tensor([ 1.0687, -0.7441, -0.0137, -0.5345]) tensor([0.5871, 0.0958, 0.1989, 0.1182]) -Greedy action tensor([ 0.4039, -0.0340, 0.0059, -0.1983]) tensor([0.3491, 0.2253, 0.2345, 0.1912]) -Greedy action tensor([ 0.7793, -0.6556, -0.1833, -0.7703]) tensor([0.5457, 0.1300, 0.2084, 0.1159]) -Greedy action tensor([ 0.5676, -0.4713, 0.0202, -0.4275]) tensor([0.4344, 0.1537, 0.2513, 0.1606]) -Greedy action tensor([ 0.6961, -0.2254, -0.0075, -0.4477]) tensor([0.4522, 0.1799, 0.2238, 0.1441]) -Greedy action tensor([ 0.4569, -0.2811, 0.0261, -0.3497]) tensor([0.3884, 0.1857, 0.2525, 0.1734]) -Greedy action tensor([ 0.8661, -0.8004, -0.0064, -0.5150]) tensor([0.5382, 0.1017, 0.2249, 0.1352]) -Greedy action tensor([ 1.0435, -0.7999, -0.2984, -0.6690]) tensor([0.6250, 0.0989, 0.1633, 0.1128]) -Greedy action tensor([ 0.2464, 0.0185, -0.0812, -0.0446]) tensor([0.3063, 0.2439, 0.2208, 0.2290]) -Greedy action tensor([ 0.8342, -0.5789, -0.1124, -0.5379]) tensor([0.5305, 0.1291, 0.2059, 0.1345]) -Greedy action tensor([ 0.6631, -0.0989, 0.0666, -0.3409]) tensor([0.4195, 0.1958, 0.2310, 0.1537]) -Greedy action tensor([ 0.4405, 0.0211, -0.1088, -0.0759]) tensor([0.3532, 0.2322, 0.2039, 0.2107]) -Greedy action tensor([ 0.5288, -0.3135, -0.0740, -0.2075]) tensor([0.4070, 0.1753, 0.2228, 0.1949]) -Greedy action tensor([ 0.4563, -0.2101, -0.0651, -0.0982]) tensor([0.3729, 0.1915, 0.2214, 0.2142]) -Greedy action tensor([ 0.4993, -0.3882, -0.1247, -0.4401]) tensor([0.4276, 0.1761, 0.2291, 0.1671]) -Greedy action tensor([ 0.6935, -0.3365, -0.1017, -0.2752]) tensor([0.4570, 0.1632, 0.2063, 0.1735]) -Greedy action tensor([ 0.6239, -0.1971, -0.0419, -0.1633]) tensor([0.4151, 0.1827, 0.2133, 0.1889]) -Greedy action tensor([ 0.4357, 0.0034, -0.1513, -0.0093]) tensor([0.3514, 0.2281, 0.1954, 0.2252]) -Greedy action tensor([ 0.2110, 0.0304, 0.0782, -0.4148]) tensor([0.3081, 0.2572, 0.2698, 0.1648]) -Greedy action tensor([ 0.7256, -0.1895, 0.0860, -0.0616]) tensor([0.4196, 0.1680, 0.2214, 0.1910]) -Greedy action tensor([ 0.6360, -0.2620, 0.0217, -0.3396]) tensor([0.4300, 0.1752, 0.2327, 0.1621]) -Greedy action tensor([ 0.2191, 0.0374, -0.0495, -0.1897]) tensor([0.3065, 0.2556, 0.2343, 0.2036]) -Greedy action tensor([ 0.9631, -1.1835, 0.0386, -0.5257]) tensor([0.5750, 0.0672, 0.2281, 0.1297]) -Greedy action tensor([ 0.5241, -0.0802, -0.0246, -0.3675]) tensor([0.3946, 0.2156, 0.2280, 0.1618]) -Greedy action tensor([ 0.6465, -0.2619, 0.0263, -0.1922]) tensor([0.4214, 0.1699, 0.2266, 0.1821]) -Greedy action tensor([ 0.4257, -0.1256, -0.1220, -0.1340]) tensor([0.3668, 0.2114, 0.2121, 0.2096]) -Greedy action tensor([ 0.5930, -0.2245, -0.0552, -0.2947]) tensor([0.4209, 0.1858, 0.2201, 0.1732]) -Greedy action tensor([ 0.6910, -0.2668, 0.0648, -0.1484]) tensor([0.4255, 0.1633, 0.2275, 0.1838]) -Greedy action tensor([ 0.7022, -0.4989, -0.1307, -0.5704]) tensor([0.4961, 0.1493, 0.2157, 0.1390]) -Greedy action tensor([ 0.6165, -0.2877, -0.1078, -0.2880]) tensor([0.4359, 0.1765, 0.2112, 0.1764]) -Greedy action tensor([ 1.0624, -0.7780, 0.0552, -0.5440]) tensor([0.5798, 0.0921, 0.2118, 0.1163]) -Greedy action tensor([ 0.4770, 0.1082, -0.0225, -0.1282]) tensor([0.3516, 0.2431, 0.2133, 0.1919]) -Greedy action tensor([ 0.6567, -0.2718, -0.0481, -0.1673]) tensor([0.4295, 0.1697, 0.2123, 0.1884]) -Greedy action tensor([ 0.4076, -0.0165, -0.0134, -0.2641]) tensor([0.3544, 0.2319, 0.2326, 0.1810]) -Greedy action tensor([ 0.6731, -0.5186, -0.0831, -0.3689]) tensor([0.4704, 0.1429, 0.2208, 0.1659]) -Greedy action tensor([ 0.8255, -0.4313, -0.0766, -0.3785]) tensor([0.5025, 0.1430, 0.2038, 0.1507]) -Greedy action tensor([ 0.4766, -0.1016, 0.1740, -0.1607]) tensor([0.3535, 0.1983, 0.2612, 0.1869]) -Greedy action tensor([ 0.1501, 0.0077, 0.0246, -0.4165]) tensor([0.3015, 0.2615, 0.2659, 0.1711]) -Greedy action tensor([ 0.3519, -0.3249, -0.0273, -0.3377]) tensor([0.3711, 0.1886, 0.2540, 0.1862]) -Greedy action tensor([ 0.3771, 0.0055, -0.0695, -0.0323]) tensor([0.3341, 0.2304, 0.2137, 0.2218]) -Greedy action tensor([ 0.5854, -0.4286, 0.0564, -0.2959]) tensor([0.4226, 0.1533, 0.2490, 0.1751]) -Greedy action tensor([ 0.1131, 0.2788, 0.0122, -0.3224]) tensor([0.2680, 0.3163, 0.2423, 0.1734]) -Greedy action tensor([ 1.1188, -1.3284, -0.1327, -0.5536]) tensor([0.6409, 0.0555, 0.1833, 0.1204]) -Greedy action tensor([ 0.4279, -0.4076, 0.0007, -0.4080]) tensor([0.3969, 0.1721, 0.2589, 0.1721]) -Greedy action tensor([ 0.2871, -0.3209, -0.2329, -0.1033]) tensor([0.3552, 0.1934, 0.2111, 0.2404]) -Greedy action tensor([ 0.4545, -0.3151, 0.0220, -0.3159]) tensor([0.3884, 0.1799, 0.2520, 0.1797]) -Greedy action tensor([ 0.0275, 0.0017, -0.0161, -0.2441]) tensor([0.2707, 0.2638, 0.2592, 0.2063]) -Greedy action tensor([ 0.5725, -0.1371, -0.1136, -0.2813]) tensor([0.4130, 0.2031, 0.2080, 0.1759]) -Greedy action tensor([ 1.0119, -1.3170, 0.0117, -0.5751]) tensor([0.5989, 0.0583, 0.2203, 0.1225]) -Greedy action tensor([ 0.5746, -0.4773, 0.3015, -0.5897]) tensor([0.4128, 0.1442, 0.3142, 0.1289]) -Greedy action tensor([ 0.4906, 0.1734, 0.1151, -0.3427]) tensor([0.3509, 0.2555, 0.2411, 0.1525]) -Greedy action tensor([ 0.9914, -0.6613, 0.2399, -0.5867]) tensor([0.5349, 0.1024, 0.2523, 0.1104]) -Greedy action tensor([ 0.9515, -0.8887, 0.0734, -0.5966]) tensor([0.5596, 0.0889, 0.2326, 0.1190]) -Greedy action tensor([ 0.7822, -0.6332, -0.0953, -0.6122]) tensor([0.5245, 0.1274, 0.2181, 0.1301]) -Greedy action tensor([ 0.6321, -0.1617, -0.0101, -0.0928]) tensor([0.4061, 0.1836, 0.2136, 0.1967]) -Greedy action tensor([ 0.6026, -0.2480, -0.0340, -0.3279]) tensor([0.4254, 0.1817, 0.2251, 0.1678]) -Greedy action tensor([ 0.7401, 0.0210, -0.1123, -0.3380]) tensor([0.4437, 0.2162, 0.1892, 0.1510]) -Greedy action tensor([ 0.7538, -0.6938, 0.0595, -0.2832]) tensor([0.4787, 0.1126, 0.2391, 0.1697]) -Greedy action tensor([ 0.5434, -0.1319, -0.1146, -0.3423]) tensor([0.4100, 0.2087, 0.2123, 0.1691]) -Greedy action tensor([ 0.7619, -0.1975, -0.1195, -0.6373]) tensor([0.4892, 0.1874, 0.2026, 0.1207]) -Greedy action tensor([ 0.7495, -0.5214, 0.0878, -0.5440]) tensor([0.4829, 0.1355, 0.2492, 0.1325]) -Greedy action tensor([ 0.7817, -0.5845, 0.1392, -0.5508]) tensor([0.4890, 0.1247, 0.2572, 0.1290]) -Greedy action tensor([ 0.6502, -0.2658, -0.0367, -0.1360]) tensor([0.4239, 0.1696, 0.2133, 0.1931]) -Greedy action tensor([ 0.9495, -0.7144, -0.0892, -0.6367]) tensor([0.5721, 0.1084, 0.2025, 0.1171]) -Greedy action tensor([ 0.6253, -0.4069, 0.0011, -0.2751]) tensor([0.4351, 0.1550, 0.2331, 0.1768]) -Greedy action tensor([ 0.5734, 0.0157, 0.0797, -0.3142]) tensor([0.3854, 0.2207, 0.2353, 0.1587]) -Greedy action tensor([ 0.7807, -0.4709, 0.1233, -0.4695]) tensor([0.4783, 0.1368, 0.2479, 0.1370]) -Greedy action tensor([ 0.6191, 0.3274, -0.1633, -0.2412]) tensor([0.3806, 0.2843, 0.1741, 0.1610]) -Greedy action tensor([ 0.4591, -0.0094, -0.0200, -0.1575]) tensor([0.3591, 0.2247, 0.2224, 0.1938]) -Greedy action tensor([ 0.7779, -0.3011, 0.0592, -0.4154]) tensor([0.4694, 0.1596, 0.2288, 0.1423]) -Greedy action tensor([ 0.8145, -0.8007, 0.0064, -0.5087]) tensor([0.5233, 0.1041, 0.2333, 0.1394]) -Greedy action tensor([ 0.6874, -0.4589, 0.0213, -0.3482]) tensor([0.4573, 0.1454, 0.2349, 0.1624]) -Greedy action tensor([ 0.4672, -0.1003, -0.0600, -0.3279]) tensor([0.3833, 0.2173, 0.2263, 0.1731]) -Greedy action tensor([ 0.9017, -0.5157, 0.0162, -0.5557]) tensor([0.5298, 0.1284, 0.2185, 0.1233]) -Greedy action tensor([ 0.5353, -0.1491, -0.0957, -0.1504]) tensor([0.3937, 0.1986, 0.2094, 0.1983]) -Greedy action tensor([ 0.6687, -0.4533, -0.0360, -0.1538]) tensor([0.4426, 0.1441, 0.2188, 0.1945]) -Greedy action tensor([ 0.6939, -0.5642, -0.0837, -0.7361]) tensor([0.5043, 0.1433, 0.2317, 0.1207]) -Greedy action tensor([ 0.4520, 0.0169, 0.0943, -0.1863]) tensor([0.3479, 0.2251, 0.2433, 0.1837]) -Greedy action tensor([ 0.8177, -0.1802, 0.0904, -0.5268]) tensor([0.4734, 0.1745, 0.2287, 0.1234]) -Greedy action tensor([ 0.6580, -0.5165, -0.0464, -0.4094]) tensor([0.4657, 0.1439, 0.2302, 0.1602]) -Greedy action tensor([ 0.5608, -0.2635, -0.0469, -0.1488]) tensor([0.4040, 0.1772, 0.2201, 0.1987]) -Greedy action tensor([ 1.5255, -0.5862, -0.0870, 0.2648]) tensor([0.6235, 0.0755, 0.1243, 0.1767]) -Greedy action tensor([ 0.9069, -0.4935, -0.4378, 0.6439]) tensor([0.4394, 0.1083, 0.1145, 0.3378]) -Greedy action tensor([ 1.4054, -0.8286, -0.2178, 0.3005]) tensor([0.6114, 0.0655, 0.1206, 0.2025]) -Greedy action tensor([ 0.8236, -0.3762, -0.4132, 0.4668]) tensor([0.4364, 0.1315, 0.1267, 0.3055]) -Greedy action tensor([ 1.2819, -0.5501, -0.3572, 0.1354]) tensor([0.5981, 0.0957, 0.1161, 0.1900]) -Greedy action tensor([ 1.5841, 0.2139, -0.5756, 0.2551]) tensor([0.6119, 0.1555, 0.0706, 0.1620]) -Greedy action tensor([ 1.5599, -0.7133, -0.5097, 0.6250]) tensor([0.6166, 0.0635, 0.0778, 0.2421]) -Greedy action tensor([ 1.6710, 0.0983, -0.2524, 0.4989]) tensor([0.6012, 0.1247, 0.0878, 0.1862]) -Greedy action tensor([ 0.8040, -0.4818, -0.4360, 0.4772]) tensor([0.4372, 0.1209, 0.1265, 0.3153]) -Greedy action tensor([ 1.3730, -0.4367, -0.1037, 0.1400]) tensor([0.5940, 0.0972, 0.1357, 0.1731]) -Greedy action tensor([ 2.0081, -1.2162, -0.4996, 0.4862]) tensor([0.7465, 0.0297, 0.0608, 0.1630]) -Greedy action tensor([ 0.9639, -0.2209, 0.0240, 0.2341]) tensor([0.4590, 0.1404, 0.1793, 0.2213]) -Greedy action tensor([ 1.4556, -0.6747, -0.4234, 0.8311]) tensor([0.5534, 0.0657, 0.0845, 0.2963]) -Greedy action tensor([ 1.5730, -0.6840, -0.4356, 0.6354]) tensor([0.6134, 0.0642, 0.0823, 0.2402]) -Greedy action tensor([ 0.8335, -0.3906, -0.0540, 0.1227]) tensor([0.4552, 0.1338, 0.1874, 0.2236]) -Greedy action tensor([ 1.4023, -0.4080, -0.4296, -0.0792]) tensor([0.6447, 0.1055, 0.1032, 0.1465]) -Greedy action tensor([ 0.3659, -0.1016, -0.6998, -0.4883]) tensor([0.4173, 0.2614, 0.1437, 0.1776]) -Greedy action tensor([ 1.2927, -0.5110, -0.2723, 0.4556]) tensor([0.5535, 0.0912, 0.1157, 0.2396]) -Greedy action tensor([ 1.6795, -0.6312, 0.0574, 0.3086]) tensor([0.6449, 0.0640, 0.1274, 0.1637]) -Greedy action tensor([ 2.0539, -1.0241, -0.2128, 0.1895]) tensor([0.7665, 0.0353, 0.0794, 0.1188]) -Greedy action tensor([ 2.0393, -0.4854, -0.2862, 0.1887]) tensor([0.7491, 0.0600, 0.0732, 0.1177]) -Greedy action tensor([ 1.3310, 0.0546, -0.6031, 0.3574]) tensor([0.5551, 0.1549, 0.0803, 0.2097]) -Greedy action tensor([ 0.9906, -0.7586, -0.1116, 0.2672]) tensor([0.5022, 0.0873, 0.1668, 0.2436]) -Greedy action tensor([ 1.3312, 0.1313, -0.1636, 0.2803]) tensor([0.5333, 0.1606, 0.1196, 0.1865]) -Greedy action tensor([ 1.4510, 0.1215, -0.4142, -0.1017]) tensor([0.6131, 0.1622, 0.0949, 0.1298]) -Greedy action tensor([ 1.2865, -0.4198, 0.0171, 0.4630]) tensor([0.5259, 0.0955, 0.1478, 0.2308]) -Greedy action tensor([ 1.2566, -0.6029, -0.3353, 0.5100]) tensor([0.5455, 0.0850, 0.1110, 0.2585]) -Greedy action tensor([ 1.1141, -0.4986, -0.4466, 0.0964]) tensor([0.5647, 0.1126, 0.1186, 0.2041]) -Greedy action tensor([ 1.7042, -0.4409, -0.3132, 0.3078]) tensor([0.6678, 0.0782, 0.0888, 0.1653]) -Greedy action tensor([ 1.1842, -0.2743, -0.4131, 0.3696]) tensor([0.5325, 0.1239, 0.1078, 0.2358]) -Greedy action tensor([ 0.8241, -0.3275, -0.2995, 0.0286]) tensor([0.4779, 0.1511, 0.1554, 0.2157]) -Greedy action tensor([ 1.2658, -0.5160, -0.3220, 0.3870]) tensor([0.5593, 0.0941, 0.1143, 0.2323]) -Greedy action tensor([ 0.5077, -0.1317, -0.0662, -0.0833]) tensor([0.3781, 0.1995, 0.2130, 0.2094]) -Greedy action tensor([ 0.7999, -0.0037, -0.6766, 0.0810]) tensor([0.4622, 0.2069, 0.1056, 0.2252]) -Greedy action tensor([ 1.4573, -0.1134, -0.3164, -0.0619]) tensor([0.6264, 0.1302, 0.1063, 0.1371]) -Greedy action tensor([ 1.4576, -0.4267, -0.2985, 0.0999]) tensor([0.6322, 0.0960, 0.1092, 0.1626]) -Greedy action tensor([ 1.2710, -0.6605, -0.4439, 0.2490]) tensor([0.5936, 0.0860, 0.1068, 0.2136]) -Greedy action tensor([ 1.1036, -0.2743, 0.0278, -0.0633]) tensor([0.5251, 0.1324, 0.1791, 0.1635]) -Greedy action tensor([ 0.3665, -0.0874, 0.0131, -0.0735]) tensor([0.3354, 0.2130, 0.2356, 0.2160]) -Greedy action tensor([ 0.8925, -0.4043, -0.0908, -0.1989]) tensor([0.5042, 0.1379, 0.1886, 0.1693]) -Greedy action tensor([ 1.4295, -0.3135, -0.3993, -0.0853]) tensor([0.6429, 0.1125, 0.1033, 0.1413]) -Greedy action tensor([ 0.7444, -0.4703, 0.0448, -0.0885]) tensor([0.4487, 0.1332, 0.2229, 0.1951]) -Greedy action tensor([ 2.0802, -1.0561, -0.4472, 0.8340]) tensor([0.7088, 0.0308, 0.0566, 0.2038]) -Greedy action tensor([ 1.2713, -0.5412, -0.0028, 0.3530]) tensor([0.5429, 0.0886, 0.1518, 0.2167]) -Greedy action tensor([ 1.1938, -0.7722, -0.2168, 0.1755]) tensor([0.5730, 0.0802, 0.1398, 0.2070]) -Greedy action tensor([ 1.2444, -0.5585, -0.7782, 0.4319]) tensor([0.5744, 0.0947, 0.0760, 0.2549]) -Greedy action tensor([ 0.9198, -0.5878, -0.0881, 0.4198]) tensor([0.4560, 0.1010, 0.1664, 0.2766]) -Greedy action tensor([ 1.6597, -0.7259, 0.0263, 0.5373]) tensor([0.6200, 0.0571, 0.1211, 0.2018]) -Greedy action tensor([ 0.1933, -0.2128, -0.2883, 0.4182]) tensor([0.2828, 0.1884, 0.1747, 0.3541]) -Greedy action tensor([ 0.7414, -0.3797, -0.2028, 0.1699]) tensor([0.4387, 0.1430, 0.1706, 0.2477]) -Greedy action tensor([ 1.8835, -0.4552, -0.3464, 0.3312]) tensor([0.7063, 0.0681, 0.0760, 0.1496]) -Greedy action tensor([ 1.5000, 0.0018, -0.2556, 0.0404]) tensor([0.6140, 0.1373, 0.1061, 0.1426]) -Greedy action tensor([ 1.3084, -0.4848, -0.2878, 0.3136]) tensor([0.5751, 0.0957, 0.1166, 0.2127]) -Greedy action tensor([ 1.0550, -0.4246, -0.3265, 0.0066]) tensor([0.5466, 0.1245, 0.1373, 0.1916]) -Greedy action tensor([ 0.9557, -0.4426, -0.4706, -0.2112]) tensor([0.5560, 0.1373, 0.1335, 0.1731]) -Greedy action tensor([ 0.8104, -0.2229, 0.1627, 0.0440]) tensor([0.4267, 0.1518, 0.2233, 0.1983]) -Greedy action tensor([ 1.8653, -0.8381, -0.2547, 0.7565]) tensor([0.6592, 0.0442, 0.0791, 0.2175]) -Greedy action tensor([ 1.6622, -0.0990, -0.1489, 0.4153]) tensor([0.6162, 0.1059, 0.1007, 0.1771]) -Greedy action tensor([ 1.2970, -0.5219, -0.2560, 0.2672]) tensor([0.5777, 0.0937, 0.1223, 0.2063]) -Greedy action tensor([ 1.7074, -0.6378, -0.3254, 0.4640]) tensor([0.6600, 0.0632, 0.0864, 0.1903]) -Greedy action tensor([ 1.2706, -0.5010, -0.1733, 0.3176]) tensor([0.5581, 0.0949, 0.1317, 0.2152]) -Greedy action tensor([ 1.1956, -0.4682, -0.2684, 0.5835]) tensor([0.5094, 0.0965, 0.1178, 0.2762]) -Greedy action tensor([ 0.6955, -0.2614, -0.1300, 0.0525]) tensor([0.4259, 0.1636, 0.1866, 0.2239]) -Greedy action tensor([ 1.5929, -0.7823, -0.4114, 0.4866]) tensor([0.6416, 0.0597, 0.0865, 0.2122]) -Greedy action tensor([ 2.4160, -0.6908, -0.3232, 0.4192]) tensor([0.8031, 0.0359, 0.0519, 0.1090]) -Greedy action tensor([ 1.2008, -0.5484, -0.6182, 0.1659]) tensor([0.5912, 0.1028, 0.0959, 0.2100]) -Greedy action tensor([ 1.2983, -0.3191, -0.4657, 0.4492]) tensor([0.5563, 0.1104, 0.0953, 0.2380]) -Greedy action tensor([ 1.6127, -0.3315, -0.4117, 0.3419]) tensor([0.6428, 0.0920, 0.0849, 0.1804]) -Greedy action tensor([ 0.8592, -0.3591, -0.2988, 0.3516]) tensor([0.4521, 0.1337, 0.1420, 0.2721]) -Greedy action tensor([ 1.1466, -0.5523, -0.3552, -0.0045]) tensor([0.5808, 0.1062, 0.1293, 0.1837]) -Greedy action tensor([ 1.4717, -0.5446, -0.1152, 0.2040]) tensor([0.6176, 0.0822, 0.1263, 0.1738]) -Greedy action tensor([ 1.0238, -0.2530, -0.3933, 0.2555]) tensor([0.5037, 0.1405, 0.1221, 0.2336]) -Greedy action tensor([ 1.1856, -0.0336, 0.0923, 0.2393]) tensor([0.4954, 0.1464, 0.1660, 0.1923]) -Greedy action tensor([ 1.4711, -0.4090, 0.0060, 0.7306]) tensor([0.5375, 0.0820, 0.1242, 0.2563]) -Greedy action tensor([ 0.4086, -0.2470, -0.1537, 0.0375]) tensor([0.3599, 0.1868, 0.2051, 0.2483]) -Greedy action tensor([1.0990, 0.1932, 0.0675, 0.1195]) tensor([0.4681, 0.1892, 0.1669, 0.1758]) -Greedy action tensor([ 1.6704, -0.6267, -0.4632, 0.3538]) tensor([0.6725, 0.0676, 0.0796, 0.1803]) -Greedy action tensor([ 0.9258, -0.1422, -0.2763, 0.1093]) tensor([0.4793, 0.1647, 0.1441, 0.2118]) -Greedy action tensor([ 1.3454, -0.4827, -0.5422, 0.4380]) tensor([0.5828, 0.0937, 0.0883, 0.2352]) -Greedy action tensor([ 0.9570, 0.0821, -0.1905, -0.2743]) tensor([0.4935, 0.2058, 0.1567, 0.1441]) -Greedy action tensor([ 1.0870, 0.0751, -0.4111, 0.0763]) tensor([0.5125, 0.1863, 0.1146, 0.1865]) -Greedy action tensor([-0.1829, 0.6298, -0.6594, 1.2799]) tensor([0.1221, 0.2751, 0.0758, 0.5271]) -Greedy action tensor([-0.6582, -1.9815, 0.7243, -0.0056]) tensor([0.1394, 0.0371, 0.5556, 0.2678]) -Greedy action tensor([-0.1250, -0.1391, -0.2023, -0.2214]) tensor([0.2618, 0.2581, 0.2423, 0.2377]) -Greedy action tensor([0.8768, 0.8323, 0.2285, 0.0398]) tensor([0.3434, 0.3284, 0.1796, 0.1487]) -Greedy action tensor([-0.4918, -0.5093, -0.0305, -1.0498]) tensor([0.2415, 0.2373, 0.3830, 0.1382]) -Greedy action tensor([ 1.5236, -1.4871, 0.3132, 1.3959]) tensor([0.4489, 0.0221, 0.1338, 0.3951]) -Greedy action tensor([ 1.5568, -0.4021, 1.5437, 0.3512]) tensor([0.4119, 0.0581, 0.4066, 0.1234]) -Greedy action tensor([1.2728, 0.2847, 0.6967, 0.6848]) tensor([0.4016, 0.1495, 0.2258, 0.2231]) -Greedy action tensor([ 1.3799, -0.1547, -0.4249, 1.3685]) tensor([0.4222, 0.0910, 0.0694, 0.4174]) -Greedy action tensor([ 7.7579e-01, -5.4844e-01, 6.4991e-01, -7.7558e-04]) tensor([0.3835, 0.1020, 0.3381, 0.1764]) -Greedy action tensor([0.7136, 0.3655, 1.0124, 0.8967]) tensor([0.2350, 0.1659, 0.3168, 0.2822]) -Greedy action tensor([ 1.1302, -0.6451, 0.7379, 1.4185]) tensor([0.3146, 0.0533, 0.2125, 0.4197]) -Greedy action tensor([ 1.1798, -0.6364, 1.3103, 0.4061]) tensor([0.3619, 0.0589, 0.4123, 0.1669]) -Greedy action tensor([-0.5617, 0.0374, 0.9649, -1.2605]) tensor([0.1263, 0.2299, 0.5811, 0.0628]) -Greedy action tensor([ 1.4196, -1.1993, 0.5377, 1.8852]) tensor([0.3247, 0.0237, 0.1344, 0.5172]) -Greedy action tensor([-0.5382, 0.9406, 1.3247, -0.2590]) tensor([0.0760, 0.3336, 0.4898, 0.1005]) -Greedy action tensor([ 0.4557, -2.5444, 0.0194, -0.3461]) tensor([0.4663, 0.0232, 0.3014, 0.2091]) -Greedy action tensor([-0.0125, 0.0863, 1.0338, -0.9567]) tensor([0.1873, 0.2067, 0.5332, 0.0729]) -Greedy action tensor([-0.8486, -0.3583, 0.0321, 0.2168]) tensor([0.1258, 0.2054, 0.3036, 0.3651]) -Greedy action tensor([-0.3045, -1.3109, -0.4594, -0.2899]) tensor([0.3089, 0.1129, 0.2646, 0.3135]) -Greedy action tensor([ 1.4741, -1.0244, 1.8791, -0.6523]) tensor([0.3703, 0.0304, 0.5551, 0.0442]) -Greedy action tensor([ 1.2454, -1.4907, 1.5029, 1.4986]) tensor([0.2742, 0.0178, 0.3548, 0.3532]) -Greedy action tensor([ 0.9482, -0.1242, 1.0606, 0.8703]) tensor([0.2953, 0.1011, 0.3305, 0.2732]) -Greedy action tensor([ 0.4962, -1.6640, 0.1616, -0.0507]) tensor([0.4150, 0.0479, 0.2970, 0.2402]) -Greedy action tensor([1.3962, 0.4399, 0.1186, 0.3706]) tensor([0.4947, 0.1901, 0.1379, 0.1774]) -Greedy action tensor([ 1.1401, 0.7854, -0.7484, 0.8290]) tensor([0.3868, 0.2713, 0.0585, 0.2834]) -Greedy action tensor([-1.0690, -0.5990, -0.4465, -1.5196]) tensor([0.1960, 0.3137, 0.3653, 0.1249]) -Greedy action tensor([0.2561, 0.1144, 1.1562, 0.0664]) tensor([0.1940, 0.1684, 0.4772, 0.1605]) -Greedy action tensor([-0.9480, -0.7709, -0.1359, 0.1585]) tensor([0.1339, 0.1598, 0.3016, 0.4048]) -Greedy action tensor([ 0.3749, -1.7021, -0.0507, 0.7859]) tensor([0.3042, 0.0381, 0.1988, 0.4589]) -Greedy action tensor([ 0.5529, -2.7532, -0.0823, 0.0602]) tensor([0.4593, 0.0168, 0.2433, 0.2806]) -Greedy action tensor([ 0.5884, -0.2619, 0.1236, 0.7154]) tensor([0.3134, 0.1339, 0.1969, 0.3558]) -Greedy action tensor([ 0.8816, 0.6213, -1.1000, -0.6380]) tensor([0.4701, 0.3623, 0.0648, 0.1028]) -Greedy action tensor([-0.0918, 0.2410, -0.7726, 0.0399]) tensor([0.2474, 0.3451, 0.1252, 0.2822]) -Greedy action tensor([ 1.3199, -0.8694, 0.6315, 1.3132]) tensor([0.3835, 0.0429, 0.1927, 0.3809]) -Greedy action tensor([-0.2592, -0.9943, 0.2324, 1.1330]) tensor([0.1401, 0.0672, 0.2290, 0.5637]) -Greedy action tensor([ 1.2977, -0.3931, -0.4214, 0.2234]) tensor([0.5865, 0.1081, 0.1051, 0.2003]) -Greedy action tensor([ 1.2844, 0.1804, 0.2245, -0.3111]) tensor([0.5317, 0.1763, 0.1842, 0.1078]) -Greedy action tensor([0.4544, 0.1526, 0.0180, 0.9723]) tensor([0.2460, 0.1819, 0.1590, 0.4130]) -Greedy action tensor([ 0.2512, -1.7326, -0.1040, 1.2020]) tensor([0.2259, 0.0311, 0.1584, 0.5846]) -Greedy action tensor([ 1.1150, -0.1529, 1.3608, 0.3431]) tensor([0.3309, 0.0931, 0.4231, 0.1529]) -Greedy action tensor([-0.9846, 0.6644, 0.5251, 0.1362]) tensor([0.0725, 0.3771, 0.3280, 0.2224]) -Greedy action tensor([ 0.3572, -1.6617, 0.1044, 0.0540]) tensor([0.3777, 0.0502, 0.2933, 0.2789]) -Greedy action tensor([-0.9439, 0.3141, 1.1498, -0.3113]) tensor([0.0689, 0.2424, 0.5590, 0.1297]) -Greedy action tensor([ 0.0029, -0.6209, -0.0967, 0.2526]) tensor([0.2685, 0.1439, 0.2430, 0.3446]) -Greedy action tensor([ 1.3255, -0.6226, 0.3654, 0.3711]) tensor([0.5234, 0.0746, 0.2004, 0.2016]) -Greedy action tensor([-0.5221, 0.5399, 1.1232, -0.4808]) tensor([0.0988, 0.2859, 0.5123, 0.1030]) -Greedy action tensor([ 0.9928, -0.0606, 0.1256, 1.2639]) tensor([0.3247, 0.1132, 0.1364, 0.4257]) -Greedy action tensor([ 0.1389, 0.9387, -0.4315, 0.0966]) tensor([0.2106, 0.4685, 0.1190, 0.2019]) -Greedy action tensor([ 1.2307, 1.1898, -0.5374, 0.6499]) tensor([0.3717, 0.3569, 0.0634, 0.2080]) -Greedy action tensor([ 0.4339, 0.3602, 1.9043, -0.2282]) tensor([0.1472, 0.1367, 0.6402, 0.0759]) -Greedy action tensor([ 0.6330, -1.2792, -0.6763, -0.0100]) tensor([0.5145, 0.0760, 0.1389, 0.2705]) -Greedy action tensor([ 1.7049, -0.2458, 0.7968, 1.0984]) tensor([0.4783, 0.0680, 0.1929, 0.2608]) -Greedy action tensor([-0.9709, -0.7967, -1.2313, 0.4210]) tensor([0.1432, 0.1704, 0.1104, 0.5760]) -Greedy action tensor([-0.4151, -1.3154, -0.5415, 0.6591]) tensor([0.1917, 0.0779, 0.1690, 0.5614]) -Greedy action tensor([0.2130, 0.4871, 0.7052, 0.4860]) tensor([0.1899, 0.2498, 0.3107, 0.2495]) -Greedy action tensor([ 0.1362, -1.0441, 0.1711, 0.8672]) tensor([0.2263, 0.0695, 0.2343, 0.4699]) -Greedy action tensor([ 0.2710, -2.0398, -0.4810, 0.3875]) tensor([0.3712, 0.0368, 0.1750, 0.4170]) -Greedy action tensor([ 1.2517, -1.6758, 0.0390, 0.9169]) tensor([0.4839, 0.0259, 0.1439, 0.3462]) -Greedy action tensor([ 0.3008, -0.5312, 1.1510, 1.3717]) tensor([0.1494, 0.0650, 0.3496, 0.4360]) -Greedy action tensor([-0.1251, -0.1516, 1.2331, -0.1834]) tensor([0.1469, 0.1431, 0.5714, 0.1386]) -Greedy action tensor([ 2.8500e-01, -2.7424e+00, -2.2490e-03, 7.6115e-01]) tensor([0.2934, 0.0142, 0.2201, 0.4723]) -Greedy action tensor([-0.5451, -1.3169, -0.5652, 0.1481]) tensor([0.2251, 0.1040, 0.2206, 0.4502]) -Greedy action tensor([ 1.3794, -0.3134, 0.1606, -0.2788]) tensor([0.5988, 0.1102, 0.1770, 0.1141]) -Greedy action tensor([1.2845, 0.0252, 0.7350, 1.2671]) tensor([0.3516, 0.0998, 0.2030, 0.3456]) -Greedy action tensor([-0.3805, -1.0480, -0.2846, 0.8820]) tensor([0.1627, 0.0834, 0.1790, 0.5749]) -Greedy action tensor([ 0.7174, -0.3226, 0.1595, 0.7929]) tensor([0.3329, 0.1177, 0.1905, 0.3590]) -Greedy action tensor([ 0.5821, -1.6665, 1.1403, 0.3416]) tensor([0.2748, 0.0290, 0.4802, 0.2160]) -Greedy action tensor([-0.6355, -0.2193, -1.0642, 0.5253]) tensor([0.1572, 0.2384, 0.1024, 0.5020]) -Greedy action tensor([ 0.6760, -1.0782, 0.0404, 0.9097]) tensor([0.3372, 0.0583, 0.1786, 0.4259]) -Greedy action tensor([ 1.0338, -1.1502, 0.9559, 1.3989]) tensor([0.2875, 0.0324, 0.2660, 0.4142]) -Greedy action tensor([ 0.1167, 0.8651, -0.6015, 0.2944]) tensor([0.2085, 0.4407, 0.1017, 0.2491]) -Greedy action tensor([ 0.1473, 0.0449, -0.7294, 0.5615]) tensor([0.2610, 0.2356, 0.1086, 0.3949]) -Greedy action tensor([ 0.7458, 0.6049, -0.6045, 0.2457]) tensor([0.3657, 0.3177, 0.0948, 0.2218]) -Greedy action tensor([-0.5331, 0.4580, 1.7212, 0.3130]) tensor([0.0643, 0.1732, 0.6126, 0.1499]) -Greedy action tensor([0.7475, 0.4113, 1.3329, 0.3349]) tensor([0.2397, 0.1713, 0.4304, 0.1587]) -Greedy action tensor([-0.5235, -0.1882, -0.0021, 0.6273]) tensor([0.1381, 0.1930, 0.2326, 0.4363]) -Greedy action tensor([-0.9089, -0.6312, 0.8483, 0.5972]) tensor([0.0792, 0.1046, 0.4591, 0.3572]) -Greedy action tensor([ 0.2898, -1.5481, 1.0601, 0.4633]) tensor([0.2218, 0.0353, 0.4791, 0.2638]) -Greedy action tensor([ 1.8220e-03, 5.9267e-01, 1.8798e+00, -8.5321e-01]) tensor([0.1023, 0.1848, 0.6693, 0.0435]) -Greedy action tensor([-0.7554, -0.8964, -0.3144, 1.0533]) tensor([0.1050, 0.0912, 0.1632, 0.6407]) -Greedy action tensor([-1.6301, -0.4326, 0.9773, 0.7843]) tensor([0.0344, 0.1140, 0.4668, 0.3848]) -Greedy action tensor([-1.9269, -0.4490, 0.6571, -0.1688]) tensor([0.0409, 0.1794, 0.5423, 0.2374]) -Greedy action tensor([-1.1407, -0.6165, 0.8970, 1.2420]) tensor([0.0472, 0.0797, 0.3620, 0.5111]) -Greedy action tensor([-1.5727, -0.5398, 0.4571, 0.0560]) tensor([0.0605, 0.1701, 0.4608, 0.3086]) -Greedy action tensor([-0.8525, -0.6387, 0.3092, -0.0816]) tensor([0.1317, 0.1630, 0.4207, 0.2846]) -Greedy action tensor([-1.8396, -0.4673, 0.6340, -0.0672]) tensor([0.0441, 0.1738, 0.5228, 0.2593]) -Greedy action tensor([-1.8837, -0.4693, 0.6375, -0.1327]) tensor([0.0429, 0.1764, 0.5337, 0.2470]) -Greedy action tensor([-1.3378, 0.4871, 0.2821, 0.3682]) tensor([0.0563, 0.3492, 0.2845, 0.3100]) -Greedy action tensor([-0.5981, -0.4503, 1.1490, 1.6230]) tensor([0.0584, 0.0677, 0.3353, 0.5386]) -Greedy action tensor([-0.6178, -0.5389, 0.1754, 0.2545]) tensor([0.1496, 0.1619, 0.3307, 0.3579]) -Greedy action tensor([-1.6551, -0.5399, 1.0603, 0.7090]) tensor([0.0336, 0.1024, 0.5071, 0.3569]) -Greedy action tensor([-0.6593, -0.6514, 0.0227, 0.0153]) tensor([0.1681, 0.1694, 0.3324, 0.3300]) -Greedy action tensor([-1.3687, 0.0926, 0.2848, 0.1275]) tensor([0.0667, 0.2874, 0.3483, 0.2976]) -Greedy action tensor([-1.1479, -0.2950, 1.0087, 1.2602]) tensor([0.0433, 0.1016, 0.3741, 0.4811]) -Greedy action tensor([-1.3331, -0.5693, 0.3802, 0.2227]) tensor([0.0744, 0.1598, 0.4130, 0.3528]) -Greedy action tensor([-0.9486, -0.6626, 0.2955, 0.0576]) tensor([0.1171, 0.1559, 0.4065, 0.3204]) -Greedy action tensor([-2.0397, -0.8226, 1.0144, 0.4293]) tensor([0.0267, 0.0903, 0.5671, 0.3159]) -Greedy action tensor([-0.8545, -0.4767, 0.9247, 1.2386]) tensor([0.0606, 0.0885, 0.3592, 0.4917]) -Greedy action tensor([-1.1221, -0.5651, 0.2418, 0.3200]) tensor([0.0919, 0.1603, 0.3593, 0.3885]) -Greedy action tensor([-0.5444, 0.0548, 0.7432, 1.2733]) tensor([0.0793, 0.1445, 0.2876, 0.4886]) -Greedy action tensor([-1.6483, -0.4023, 0.8711, 0.5220]) tensor([0.0390, 0.1355, 0.4841, 0.3415]) -Greedy action tensor([-2.0140, -0.9273, 0.3270, -0.2137]) tensor([0.0490, 0.1453, 0.5092, 0.2965]) -Greedy action tensor([-1.2651, -0.9245, 1.0252, 0.0523]) tensor([0.0624, 0.0878, 0.6167, 0.2331]) -Greedy action tensor([-0.7150, 0.0929, 0.1716, 0.2453]) tensor([0.1207, 0.2708, 0.2930, 0.3154]) -Greedy action tensor([-1.1649, -0.5945, 0.2595, 0.2799]) tensor([0.0896, 0.1584, 0.3722, 0.3798]) -Greedy action tensor([-1.8685, -0.9590, 0.1323, -0.4592]) tensor([0.0668, 0.1659, 0.4940, 0.2734]) -Greedy action tensor([-1.3656, -0.4548, 0.7746, -0.6629]) tensor([0.0714, 0.1775, 0.6069, 0.1442]) -Greedy action tensor([-1.0540, -0.5492, 0.3981, 0.0729]) tensor([0.0999, 0.1654, 0.4266, 0.3081]) -Greedy action tensor([-1.6045, -0.5277, 0.5095, 0.0813]) tensor([0.0568, 0.1667, 0.4702, 0.3064]) -Greedy action tensor([-1.1263, 0.6940, 0.3764, 0.5646]) tensor([0.0585, 0.3612, 0.2629, 0.3174]) -Greedy action tensor([-1.6727, -0.5230, 0.5256, 0.0327]) tensor([0.0536, 0.1691, 0.4826, 0.2948]) -Greedy action tensor([-1.9455, -0.4540, 0.6665, -0.1819]) tensor([0.0402, 0.1784, 0.5472, 0.2342]) -Greedy action tensor([-1.8411, -0.4119, 0.5975, -0.0969]) tensor([0.0447, 0.1868, 0.5125, 0.2559]) -Greedy action tensor([-1.7981, -0.3696, 0.6412, -0.0318]) tensor([0.0445, 0.1856, 0.5099, 0.2601]) -Greedy action tensor([ 0.0143, -0.1896, 0.6994, 1.6077]) tensor([0.1147, 0.0935, 0.2275, 0.5643]) -Greedy action tensor([-1.9469, -0.9088, 0.4576, -0.0680]) tensor([0.0466, 0.1317, 0.5164, 0.3053]) -Greedy action tensor([-1.0039, -0.6668, 1.0429, 1.3106]) tensor([0.0493, 0.0691, 0.3821, 0.4994]) -Greedy action tensor([-1.8587, -0.4579, 0.6235, -0.1325]) tensor([0.0442, 0.1792, 0.5285, 0.2482]) -Greedy action tensor([-1.5206, -0.5260, 0.5923, 0.3896]) tensor([0.0534, 0.1443, 0.4417, 0.3606]) -Greedy action tensor([-1.1447, -0.6140, 0.2737, 0.3805]) tensor([0.0875, 0.1488, 0.3615, 0.4022]) -Greedy action tensor([-1.9101, -0.4580, 0.6544, -0.1603]) tensor([0.0416, 0.1779, 0.5410, 0.2395]) -Greedy action tensor([-1.7065, -0.4151, 0.5402, -0.0536]) tensor([0.0518, 0.1883, 0.4895, 0.2703]) -Greedy action tensor([-0.5760, -0.5759, 0.2151, 0.1477]) tensor([0.1595, 0.1596, 0.3519, 0.3290]) -Greedy action tensor([-1.4229, -0.5238, 0.4652, -0.0881]) tensor([0.0721, 0.1773, 0.4766, 0.2740]) -Greedy action tensor([-1.4423, -0.5026, 0.7834, -0.6399]) tensor([0.0664, 0.1701, 0.6153, 0.1482]) -Greedy action tensor([-1.7992, -0.5317, 0.8597, 0.1814]) tensor([0.0383, 0.1362, 0.5476, 0.2779]) -Greedy action tensor([-0.8522, -0.5823, 0.1804, 0.3197]) tensor([0.1198, 0.1569, 0.3365, 0.3868]) -Greedy action tensor([-0.5554, -0.4211, 0.2241, 0.0089]) tensor([0.1644, 0.1880, 0.3585, 0.2891]) -Greedy action tensor([-1.9325, -0.4559, 0.6752, -0.1692]) tensor([0.0404, 0.1767, 0.5476, 0.2354]) -Greedy action tensor([-0.8907, -0.3546, 0.0861, -0.5032]) tensor([0.1462, 0.2500, 0.3884, 0.2154]) -Greedy action tensor([-1.0052, 0.1736, 0.2319, 0.0186]) tensor([0.0954, 0.3102, 0.3288, 0.2656]) -Greedy action tensor([-1.4850, -0.5362, 1.3416, 1.1564]) tensor([0.0290, 0.0749, 0.4894, 0.4067]) -Greedy action tensor([ 0.2575, 0.9908, -0.0222, 0.4055]) tensor([0.2001, 0.4166, 0.1513, 0.2320]) -Greedy action tensor([-1.8355, -0.4681, 0.6771, 0.0122]) tensor([0.0424, 0.1663, 0.5226, 0.2688]) -Greedy action tensor([-0.0733, -0.0758, 1.0576, 1.6302]) tensor([0.0944, 0.0942, 0.2926, 0.5188]) -Greedy action tensor([-1.7602, -0.4918, 0.5936, -0.0220]) tensor([0.0482, 0.1712, 0.5068, 0.2738]) -Greedy action tensor([-1.9219, -0.4192, 1.0172, 0.5171]) tensor([0.0279, 0.1253, 0.5271, 0.3197]) -Greedy action tensor([-0.6033, -0.5128, 0.1871, 0.3884]) tensor([0.1430, 0.1565, 0.3151, 0.3854]) -Greedy action tensor([-1.7696, -0.4386, 0.6463, 0.0271]) tensor([0.0454, 0.1719, 0.5088, 0.2739]) -Greedy action tensor([-0.7400, -0.4236, 0.3567, -0.2572]) tensor([0.1431, 0.1964, 0.4286, 0.2319]) -Greedy action tensor([-1.3660, -0.5917, 0.4175, -0.0407]) tensor([0.0776, 0.1684, 0.4619, 0.2921]) -Greedy action tensor([-2.0592, -0.6390, 0.9836, 0.4405]) tensor([0.0261, 0.1081, 0.5476, 0.3182]) -Greedy action tensor([-1.2787, -0.5553, 0.2869, 0.3135]) tensor([0.0784, 0.1615, 0.3750, 0.3851]) -Greedy action tensor([-1.8185, -0.2841, 0.5839, -0.0728]) tensor([0.0446, 0.2069, 0.4929, 0.2556]) -Greedy action tensor([-1.6456, -0.5290, 0.5748, 0.0774]) tensor([0.0530, 0.1619, 0.4882, 0.2969]) -Greedy action tensor([-2.0161, -0.7060, 0.7497, 0.0316]) tensor([0.0353, 0.1308, 0.5606, 0.2734]) -Greedy action tensor([-1.9433, -0.4493, 0.6635, -0.1799]) tensor([0.0403, 0.1793, 0.5457, 0.2348]) -Greedy action tensor([-0.2008, 1.2226, 0.0427, 0.7827]) tensor([0.1099, 0.4561, 0.1402, 0.2938]) -Greedy action tensor([-1.6153, -0.2839, -0.0056, -0.3711]) tensor([0.0754, 0.2856, 0.3772, 0.2617]) -Greedy action tensor([-0.2620, -0.3698, 0.2031, 0.2359]) tensor([0.1947, 0.1748, 0.3100, 0.3204]) -Greedy action tensor([-1.5040, -0.4664, 0.9843, 0.9976]) tensor([0.0356, 0.1006, 0.4290, 0.4348]) -Greedy action tensor([-1.7921, -0.5056, 0.5944, -0.0941]) tensor([0.0477, 0.1727, 0.5189, 0.2607]) -Greedy action tensor([-0.9495, -0.4768, 0.8281, 1.4314]) tensor([0.0517, 0.0830, 0.3060, 0.5593]) -Greedy action tensor([-0.6323, 0.4034, 0.1171, 0.0665]) tensor([0.1259, 0.3546, 0.2663, 0.2532]) -Greedy action tensor([-1.8236, -0.1095, 0.5577, -0.0956]) tensor([0.0435, 0.2414, 0.4704, 0.2448]) -Greedy action tensor([-1.9194, -0.4616, 0.6513, -0.1666]) tensor([0.0414, 0.1780, 0.5416, 0.2390]) -Greedy action tensor([-1.6135, -0.5138, 0.5218, 0.1482]) tensor([0.0547, 0.1642, 0.4626, 0.3184]) -Greedy action tensor([-1.7645, -0.4527, 0.5819, -0.0423]) tensor([0.0482, 0.1789, 0.5033, 0.2696]) -Greedy action tensor([-1.3898, -0.5255, 1.1186, 1.1767]) tensor([0.0349, 0.0828, 0.4284, 0.4540]) -Greedy action tensor([-1.4987, 0.2273, 0.2902, 0.0710]) tensor([0.0574, 0.3228, 0.3437, 0.2761]) -Greedy action tensor([-1.3117, -0.4707, 1.3382, 1.2672]) tensor([0.0326, 0.0756, 0.4617, 0.4301]) -Greedy action tensor([ 0.9425, -0.6623, -0.0892, -0.6941]) tensor([0.5708, 0.1147, 0.2034, 0.1111]) -Greedy action tensor([ 0.2404, 0.0283, 0.1749, -0.3437]) tensor([0.3027, 0.2449, 0.2835, 0.1688]) -Greedy action tensor([ 0.5224, -0.2303, -0.0124, -0.5200]) tensor([0.4150, 0.1955, 0.2431, 0.1463]) -Greedy action tensor([ 0.6629, -0.6266, 0.0406, -0.7115]) tensor([0.4842, 0.1334, 0.2599, 0.1225]) -Greedy action tensor([ 0.7635, -0.4567, -0.1003, -0.4293]) tensor([0.4950, 0.1461, 0.2087, 0.1502]) -Greedy action tensor([ 0.5642, -0.3090, -0.0097, -0.5029]) tensor([0.4301, 0.1796, 0.2423, 0.1480]) -Greedy action tensor([ 0.9309, -0.6250, -0.0155, -0.2628]) tensor([0.5257, 0.1109, 0.2040, 0.1593]) -Greedy action tensor([ 0.9480, -0.4623, 0.0855, -0.5391]) tensor([0.5285, 0.1290, 0.2231, 0.1195]) -Greedy action tensor([ 0.4588, -0.1899, -0.2007, -0.1558]) tensor([0.3875, 0.2026, 0.2004, 0.2096]) -Greedy action tensor([ 0.1731, 0.0208, -0.1456, -0.2604]) tensor([0.3092, 0.2655, 0.2248, 0.2004]) -Greedy action tensor([ 0.7088, 0.0077, 0.0462, -0.4590]) tensor([0.4306, 0.2136, 0.2220, 0.1339]) -Greedy action tensor([ 1.2475, -0.2487, 0.1749, -0.7425]) tensor([0.5873, 0.1315, 0.2009, 0.0803]) -Greedy action tensor([ 0.3813, 0.0427, 0.0022, -0.2336]) tensor([0.3404, 0.2426, 0.2330, 0.1840]) -Greedy action tensor([ 0.4243, -0.2131, -0.0438, -0.2782]) tensor([0.3773, 0.1995, 0.2363, 0.1869]) -Greedy action tensor([ 0.8984, -0.6678, 0.1359, -0.6473]) tensor([0.5295, 0.1106, 0.2470, 0.1129]) -Greedy action tensor([ 0.7430, -0.3316, -0.0133, -0.3105]) tensor([0.4631, 0.1581, 0.2174, 0.1615]) -Greedy action tensor([ 0.6607, -0.1740, -0.0378, -0.3939]) tensor([0.4387, 0.1904, 0.2182, 0.1528]) -Greedy action tensor([ 0.8425, -0.8626, -0.0978, -0.4388]) tensor([0.5406, 0.0982, 0.2111, 0.1501]) -Greedy action tensor([ 0.4270, -0.4314, -0.0069, -0.0149]) tensor([0.3684, 0.1561, 0.2387, 0.2368]) -Greedy action tensor([ 0.2718, -0.0493, -0.0076, -0.2138]) tensor([0.3229, 0.2342, 0.2442, 0.1987]) -Greedy action tensor([ 0.5372, -0.5274, 0.2303, -0.5208]) tensor([0.4119, 0.1421, 0.3031, 0.1430]) -Greedy action tensor([ 1.0579, -0.4971, 0.0417, -0.5574]) tensor([0.5643, 0.1192, 0.2043, 0.1122]) -Greedy action tensor([ 0.6133, 0.1354, -0.0893, -0.0706]) tensor([0.3817, 0.2367, 0.1890, 0.1926]) -Greedy action tensor([ 0.4603, -0.0385, 0.0016, -0.1595]) tensor([0.3600, 0.2187, 0.2276, 0.1937]) -Greedy action tensor([ 0.6481, -0.4000, -0.0455, -0.4214]) tensor([0.4559, 0.1598, 0.2278, 0.1564]) -Greedy action tensor([ 0.6291, -0.3011, -0.1150, -0.2873]) tensor([0.4406, 0.1738, 0.2094, 0.1762]) -Greedy action tensor([ 0.3840, -0.3556, -0.0624, -0.5104]) tensor([0.3959, 0.1890, 0.2533, 0.1619]) -Greedy action tensor([ 0.4432, -0.0916, 0.0900, -0.1017]) tensor([0.3487, 0.2042, 0.2449, 0.2022]) -Greedy action tensor([ 0.7300, -0.3968, -0.0348, -0.0951]) tensor([0.4489, 0.1455, 0.2089, 0.1967]) -Greedy action tensor([ 0.7028, -0.2827, 0.0010, -0.3590]) tensor([0.4515, 0.1685, 0.2238, 0.1562]) -Greedy action tensor([ 0.5486, -0.2978, 0.0561, -0.2875]) tensor([0.4043, 0.1734, 0.2471, 0.1752]) -Greedy action tensor([ 0.7479, -0.3562, -0.0200, -0.4222]) tensor([0.4749, 0.1574, 0.2203, 0.1474]) -Greedy action tensor([ 0.3050, -0.1226, 0.0308, -0.3036]) tensor([0.3383, 0.2206, 0.2571, 0.1840]) -Greedy action tensor([ 0.4186, 0.0404, -0.0591, -0.1734]) tensor([0.3498, 0.2397, 0.2170, 0.1935]) -Greedy action tensor([ 0.7460, -0.7590, 0.0479, -0.3324]) tensor([0.4855, 0.1078, 0.2416, 0.1651]) -Greedy action tensor([ 0.4978, -0.0321, 0.0221, -0.1152]) tensor([0.3634, 0.2139, 0.2258, 0.1969]) -Greedy action tensor([ 1.1934, -0.9752, -0.0194, -0.6142]) tensor([0.6346, 0.0726, 0.1887, 0.1041]) -Greedy action tensor([ 0.3365, -0.4292, -0.2846, -0.1496]) tensor([0.3821, 0.1777, 0.2053, 0.2350]) -Greedy action tensor([ 0.9950, -0.5534, 0.1373, -0.5895]) tensor([0.5430, 0.1154, 0.2303, 0.1113]) -Greedy action tensor([ 0.1855, -0.0534, -0.0559, -0.3899]) tensor([0.3189, 0.2512, 0.2505, 0.1794]) -Greedy action tensor([ 0.2158, 0.1351, -0.0855, -0.2410]) tensor([0.3034, 0.2799, 0.2245, 0.1922]) -Greedy action tensor([ 0.6534, -0.5493, -0.0692, -0.2954]) tensor([0.4602, 0.1382, 0.2234, 0.1782]) -Greedy action tensor([ 0.5397, -0.3280, -0.0082, -0.1938]) tensor([0.4035, 0.1694, 0.2333, 0.1938]) -Greedy action tensor([ 0.5364, -0.3262, 0.0087, -0.0825]) tensor([0.3921, 0.1655, 0.2313, 0.2111]) -Greedy action tensor([ 0.7567, -0.6136, -0.1328, -0.4509]) tensor([0.5092, 0.1294, 0.2092, 0.1522]) -Greedy action tensor([ 0.2943, -0.0641, -0.0895, -0.2104]) tensor([0.3351, 0.2342, 0.2283, 0.2023]) -Greedy action tensor([ 0.7095, -0.1927, 0.0878, -0.5083]) tensor([0.4467, 0.1812, 0.2399, 0.1322]) -Greedy action tensor([ 0.7887, -0.3403, -0.0278, -0.4825]) tensor([0.4888, 0.1581, 0.2160, 0.1371]) -Greedy action tensor([ 0.4688, -0.1564, 0.1566, -0.2547]) tensor([0.3634, 0.1945, 0.2659, 0.1762]) -Greedy action tensor([ 0.3227, -0.0133, -0.0892, -0.1310]) tensor([0.3320, 0.2372, 0.2199, 0.2109]) -Greedy action tensor([ 0.6580, 0.0025, -0.1202, -0.5006]) tensor([0.4362, 0.2265, 0.2003, 0.1369]) -Greedy action tensor([ 0.4987, 0.0695, -0.0734, -0.3233]) tensor([0.3767, 0.2452, 0.2126, 0.1656]) -Greedy action tensor([ 0.2310, -0.1290, -0.1277, -0.4227]) tensor([0.3429, 0.2392, 0.2395, 0.1783]) -Greedy action tensor([ 0.3733, 0.0940, -0.2240, -0.0128]) tensor([0.3349, 0.2533, 0.1843, 0.2276]) -Greedy action tensor([ 0.6174, -0.3076, -0.0567, -0.3210]) tensor([0.4353, 0.1726, 0.2218, 0.1703]) -Greedy action tensor([ 0.6136, -0.3256, -0.0491, -0.4601]) tensor([0.4448, 0.1739, 0.2293, 0.1520]) -Greedy action tensor([ 0.4712, -0.3392, -0.0407, -0.5209]) tensor([0.4141, 0.1841, 0.2482, 0.1535]) -Greedy action tensor([ 0.6964, -0.5249, -0.2457, -0.4623]) tensor([0.5004, 0.1475, 0.1950, 0.1571]) -Greedy action tensor([ 0.5059, -0.1725, -0.0097, -0.1016]) tensor([0.3775, 0.1915, 0.2254, 0.2056]) -Greedy action tensor([ 0.3469, -0.5017, -0.0285, -0.0202]) tensor([0.3562, 0.1524, 0.2447, 0.2467]) -Greedy action tensor([ 0.7625, -0.3711, 0.0184, -0.2875]) tensor([0.4658, 0.1499, 0.2213, 0.1630]) -Greedy action tensor([ 0.7561, -0.8284, -0.0577, -0.2410]) tensor([0.4957, 0.1017, 0.2197, 0.1829]) -Greedy action tensor([ 0.4902, -0.0103, -0.1739, -0.1710]) tensor([0.3792, 0.2299, 0.1952, 0.1957]) -Greedy action tensor([ 0.8539, -0.6115, -0.2651, -0.5192]) tensor([0.5522, 0.1276, 0.1804, 0.1399]) -Greedy action tensor([ 0.8329, -0.3071, -0.0174, -0.4458]) tensor([0.4937, 0.1579, 0.2110, 0.1375]) -Greedy action tensor([ 0.4318, -0.2589, -0.1731, -0.2689]) tensor([0.3931, 0.1971, 0.2147, 0.1951]) -Greedy action tensor([ 0.1886, -0.0534, 0.1533, -0.2144]) tensor([0.2925, 0.2296, 0.2824, 0.1955]) -Greedy action tensor([ 0.6005, -0.2948, -0.0497, -0.1870]) tensor([0.4192, 0.1712, 0.2188, 0.1907]) -Greedy action tensor([ 0.3437, -0.2044, -0.0781, -0.2273]) tensor([0.3573, 0.2065, 0.2343, 0.2019]) -Greedy action tensor([ 0.6213, -0.4850, 0.0326, -0.0434]) tensor([0.4166, 0.1378, 0.2312, 0.2143]) -Greedy action tensor([ 0.2833, -0.0042, -0.0619, -0.4440]) tensor([0.3400, 0.2550, 0.2407, 0.1643]) -Greedy action tensor([ 0.6451, -0.2001, -0.1234, -0.1511]) tensor([0.4266, 0.1832, 0.1978, 0.1924]) -Greedy action tensor([ 0.7015, -0.5472, -0.0375, -0.2941]) tensor([0.4686, 0.1344, 0.2238, 0.1732]) -Greedy action tensor([ 0.6405, -0.1786, -0.2175, -0.4438]) tensor([0.4539, 0.2001, 0.1925, 0.1535]) -Greedy action tensor([ 0.5834, -0.5075, -0.1478, -0.5712]) tensor([0.4690, 0.1575, 0.2257, 0.1478]) -Greedy action tensor([ 0.7399, -0.3313, -0.0843, -0.1845]) tensor([0.4591, 0.1573, 0.2014, 0.1822]) -Greedy action tensor([ 0.7879, -0.5220, -0.1436, -0.4516]) tensor([0.5119, 0.1382, 0.2017, 0.1482]) -Greedy action tensor([ 0.5204, -0.3287, -0.0446, -0.4855]) tensor([0.4234, 0.1811, 0.2406, 0.1548]) -Greedy action tensor([ 1.1432, -1.3501, 0.0478, -0.5228]) tensor([0.6227, 0.0515, 0.2082, 0.1177]) -Greedy action tensor([ 0.7520, -0.0519, -0.0303, -0.3519]) tensor([0.4471, 0.2001, 0.2045, 0.1483]) -Greedy action tensor([ 0.5842, -0.1248, 0.0354, -0.4453]) tensor([0.4121, 0.2028, 0.2380, 0.1472]) -Greedy action tensor([ 1.9814, -0.8411, -0.6183, 0.4960]) tensor([0.7352, 0.0437, 0.0546, 0.1665]) -Greedy action tensor([ 1.3942, -0.7027, -0.0290, 0.2438]) tensor([0.5951, 0.0731, 0.1434, 0.1884]) -Greedy action tensor([ 1.0084, 0.0268, -0.4008, -0.2341]) tensor([0.5242, 0.1964, 0.1281, 0.1513]) -Greedy action tensor([ 1.3905, -0.3736, -0.6113, -0.2270]) tensor([0.6645, 0.1139, 0.0898, 0.1318]) -Greedy action tensor([ 0.5738, -0.3751, 0.1660, -0.0280]) tensor([0.3846, 0.1489, 0.2558, 0.2107]) -Greedy action tensor([ 0.6464, -0.1314, -0.1332, -0.2779]) tensor([0.4320, 0.1985, 0.1981, 0.1714]) -Greedy action tensor([ 1.2184, -0.5880, -0.1807, 0.4958]) tensor([0.5273, 0.0866, 0.1301, 0.2560]) -Greedy action tensor([ 1.6049, -0.3418, -0.2849, 0.7154]) tensor([0.5866, 0.0837, 0.0886, 0.2410]) -Greedy action tensor([ 1.1983e+00, 6.2096e-04, -6.2687e-01, -4.6432e-02]) tensor([0.5711, 0.1724, 0.0920, 0.1645]) -Greedy action tensor([ 1.5799, -0.6845, 0.0417, 0.1932]) tensor([0.6375, 0.0662, 0.1369, 0.1593]) -Greedy action tensor([ 1.8877, -0.6857, -0.1831, 0.4967]) tensor([0.6891, 0.0526, 0.0869, 0.1715]) -Greedy action tensor([ 0.6263, -0.2257, -0.0286, 0.1024]) tensor([0.3940, 0.1680, 0.2047, 0.2333]) -Greedy action tensor([ 1.0280, -0.3604, -0.0726, 0.0176]) tensor([0.5138, 0.1282, 0.1709, 0.1871]) -Greedy action tensor([ 1.2473, -0.2982, -0.1331, 0.1312]) tensor([0.5580, 0.1190, 0.1403, 0.1828]) -Greedy action tensor([ 1.6877, -0.8376, -0.2994, 0.6848]) tensor([0.6313, 0.0505, 0.0866, 0.2316]) -Greedy action tensor([ 1.5046, -0.6048, -0.0501, 0.4802]) tensor([0.5912, 0.0717, 0.1249, 0.2122]) -Greedy action tensor([ 0.4059, -0.4828, 0.0810, -0.1473]) tensor([0.3692, 0.1518, 0.2668, 0.2123]) -Greedy action tensor([ 1.6260, -0.5308, -0.5288, 0.3804]) tensor([0.6582, 0.0761, 0.0763, 0.1894]) -Greedy action tensor([ 1.1097, 0.0046, -0.1814, 0.0258]) tensor([0.5143, 0.1703, 0.1414, 0.1740]) -Greedy action tensor([ 0.1854, -0.0634, -0.7136, 0.5337]) tensor([0.2775, 0.2164, 0.1129, 0.3932]) -Greedy action tensor([ 0.9163, -0.5959, -0.0127, 0.0960]) tensor([0.4865, 0.1072, 0.1921, 0.2142]) -Greedy action tensor([ 2.4800, -0.9269, -0.1586, 0.3894]) tensor([0.8142, 0.0270, 0.0582, 0.1007]) -Greedy action tensor([ 1.7640, -0.7716, -0.2185, 0.5997]) tensor([0.6540, 0.0518, 0.0901, 0.2041]) -Greedy action tensor([ 1.7653, 0.4078, -0.1502, -0.3066]) tensor([0.6534, 0.1681, 0.0962, 0.0823]) -Greedy action tensor([ 0.6136, -0.0810, -0.0109, 0.1088]) tensor([0.3790, 0.1892, 0.2030, 0.2288]) -Greedy action tensor([ 1.6269, -0.3682, -0.3866, 0.4167]) tensor([0.6379, 0.0867, 0.0852, 0.1902]) -Greedy action tensor([ 1.7339, -0.0460, -0.0766, 0.3203]) tensor([0.6347, 0.1070, 0.1038, 0.1544]) -Greedy action tensor([ 1.3310, -0.5710, -0.3456, 0.1626]) tensor([0.6071, 0.0906, 0.1135, 0.1887]) -Greedy action tensor([ 1.5198, -0.3780, -0.1834, 0.2794]) tensor([0.6168, 0.0925, 0.1123, 0.1784]) -Greedy action tensor([ 1.5211, -0.8528, -0.1796, 0.3577]) tensor([0.6297, 0.0586, 0.1149, 0.1967]) -Greedy action tensor([ 1.7215, -0.6912, -0.3250, 0.2240]) tensor([0.6933, 0.0621, 0.0896, 0.1551]) -Greedy action tensor([ 1.4898, -0.3387, -0.5139, 0.2349]) tensor([0.6327, 0.1016, 0.0853, 0.1804]) -Greedy action tensor([ 2.8308, -1.4221, -0.5801, 0.9813]) tensor([0.8302, 0.0118, 0.0274, 0.1306]) -Greedy action tensor([ 1.4647, -0.3152, -0.7533, 0.2055]) tensor([0.6405, 0.1080, 0.0697, 0.1818]) -Greedy action tensor([ 1.6643, -0.1863, -0.2914, 0.0554]) tensor([0.6672, 0.1048, 0.0944, 0.1335]) -Greedy action tensor([ 1.2659, -0.1899, -0.3067, 0.2899]) tensor([0.5502, 0.1283, 0.1142, 0.2073]) -Greedy action tensor([ 1.6538, -0.2987, -0.2560, 0.7274]) tensor([0.5931, 0.0842, 0.0878, 0.2349]) -Greedy action tensor([ 1.2900, 0.3168, 0.0723, -0.3156]) tensor([0.5335, 0.2016, 0.1578, 0.1071]) -Greedy action tensor([ 0.9003, -0.6713, -0.1767, 0.4356]) tensor([0.4594, 0.0954, 0.1565, 0.2887]) -Greedy action tensor([ 1.1985, -0.3561, -0.0239, 0.1103]) tensor([0.5427, 0.1147, 0.1598, 0.1828]) -Greedy action tensor([ 1.5388, -0.3908, -0.2539, 0.4503]) tensor([0.6066, 0.0881, 0.1010, 0.2043]) -Greedy action tensor([ 1.4434, -0.4865, -0.8193, 0.4658]) tensor([0.6152, 0.0893, 0.0640, 0.2315]) -Greedy action tensor([ 2.1761, -0.7838, -0.2761, 0.4492]) tensor([0.7600, 0.0394, 0.0654, 0.1352]) -Greedy action tensor([ 1.8002, -0.1416, -0.5069, 0.1508]) tensor([0.6968, 0.1000, 0.0694, 0.1339]) -Greedy action tensor([ 1.4221, -0.3823, -0.3855, 0.1034]) tensor([0.6265, 0.1031, 0.1028, 0.1676]) -Greedy action tensor([ 1.1331, -0.4645, -0.2497, 0.4049]) tensor([0.5165, 0.1045, 0.1296, 0.2494]) -Greedy action tensor([ 0.9508, -0.4699, -0.3217, -0.0410]) tensor([0.5284, 0.1276, 0.1480, 0.1960]) -Greedy action tensor([ 0.6526, -0.3587, 0.1217, -0.0951]) tensor([0.4123, 0.1500, 0.2425, 0.1952]) -Greedy action tensor([ 1.2885, -0.1127, -0.2737, 0.5176]) tensor([0.5212, 0.1284, 0.1093, 0.2411]) -Greedy action tensor([ 0.7793, -0.4908, 0.0993, -0.2546]) tensor([0.4666, 0.1310, 0.2364, 0.1659]) -Greedy action tensor([ 1.2787, -0.1192, -0.3447, 0.4506]) tensor([0.5316, 0.1314, 0.1048, 0.2322]) -Greedy action tensor([ 0.9747, -0.5118, -0.3744, 0.4630]) tensor([0.4796, 0.1085, 0.1244, 0.2875]) -Greedy action tensor([ 1.4986, -0.2537, -0.3950, 0.5328]) tensor([0.5866, 0.1017, 0.0883, 0.2233]) -Greedy action tensor([ 0.4485, -0.0918, 0.1279, -0.1253]) tensor([0.3482, 0.2029, 0.2527, 0.1962]) -Greedy action tensor([ 1.5226, -0.9067, -0.2503, 0.5718]) tensor([0.6081, 0.0536, 0.1033, 0.2350]) -Greedy action tensor([ 1.2472, -0.5047, -0.2550, 0.0265]) tensor([0.5913, 0.1026, 0.1317, 0.1745]) -Greedy action tensor([ 1.3642, -0.6960, 0.0915, 0.3865]) tensor([0.5606, 0.0714, 0.1570, 0.2109]) -Greedy action tensor([ 1.3943, -0.5684, -0.2429, 0.3725]) tensor([0.5900, 0.0829, 0.1148, 0.2124]) -Greedy action tensor([ 1.2055, 0.1152, -0.2936, 0.0504]) tensor([0.5335, 0.1793, 0.1191, 0.1681]) -Greedy action tensor([ 1.2616, -0.8668, 0.0411, 0.5168]) tensor([0.5294, 0.0630, 0.1562, 0.2514]) -Greedy action tensor([ 1.8498, -0.3547, -0.3271, -0.0052]) tensor([0.7246, 0.0799, 0.0822, 0.1134]) -Greedy action tensor([ 0.8920, -0.2554, -0.3063, 0.4064]) tensor([0.4475, 0.1421, 0.1350, 0.2754]) -Greedy action tensor([ 0.8549, -0.5643, 0.0684, -0.0849]) tensor([0.4789, 0.1159, 0.2181, 0.1871]) -Greedy action tensor([ 1.0100, -0.6289, -0.4532, 0.6392]) tensor([0.4726, 0.0918, 0.1094, 0.3262]) -Greedy action tensor([ 1.8339, -0.8029, -0.2103, 0.2476]) tensor([0.7114, 0.0509, 0.0921, 0.1456]) -Greedy action tensor([ 1.2837, -0.7499, -0.3694, 0.2854]) tensor([0.5914, 0.0774, 0.1132, 0.2180]) -Greedy action tensor([ 0.6012, -0.5570, -0.0655, 0.0501]) tensor([0.4160, 0.1306, 0.2136, 0.2397]) -Greedy action tensor([ 0.7257, 0.1213, 0.0928, -0.2232]) tensor([0.4058, 0.2217, 0.2155, 0.1571]) -Greedy action tensor([ 0.9184, -0.3019, -0.3606, 0.6949]) tensor([0.4214, 0.1244, 0.1173, 0.3370]) -Greedy action tensor([ 1.5206, -0.3344, -0.1790, 0.7041]) tensor([0.5614, 0.0878, 0.1026, 0.2481]) -Greedy action tensor([ 1.3153, -0.3775, -0.2593, 0.2275]) tensor([0.5787, 0.1065, 0.1198, 0.1950]) -Greedy action tensor([ 1.0600, -0.3725, -0.2940, 0.0786]) tensor([0.5343, 0.1275, 0.1379, 0.2002]) -Greedy action tensor([ 1.6769, -0.0975, 0.0639, 0.0808]) tensor([0.6363, 0.1079, 0.1268, 0.1290]) -Greedy action tensor([ 1.3330, -0.4631, -0.3566, 0.2786]) tensor([0.5886, 0.0977, 0.1087, 0.2051]) -Greedy action tensor([ 1.1538, -0.1375, -0.0805, 0.0987]) tensor([0.5224, 0.1436, 0.1521, 0.1819]) -Greedy action tensor([ 1.4237, -0.7625, -0.1949, 0.4876]) tensor([0.5873, 0.0660, 0.1164, 0.2303]) -Greedy action tensor([ 1.0389, -0.0565, -0.6018, -0.2564]) tensor([0.5549, 0.1856, 0.1076, 0.1519]) -Greedy action tensor([ 0.8498, -0.3014, -0.2789, 0.1516]) tensor([0.4679, 0.1480, 0.1513, 0.2328]) -Greedy action tensor([ 1.5969, -0.0028, -0.2230, 0.2971]) tensor([0.6110, 0.1234, 0.0990, 0.1666]) -Greedy action tensor([ 1.0923, -0.6004, -0.1085, 0.4038]) tensor([0.5032, 0.0926, 0.1514, 0.2528]) -Greedy action tensor([ 1.7020, -0.0284, 0.3585, -0.6296]) tensor([0.6513, 0.1154, 0.1700, 0.0633]) -Greedy action tensor([0.6112, 0.2798, 1.3010, 0.3207]) tensor([0.2243, 0.1610, 0.4470, 0.1677]) -Greedy action tensor([ 0.6876, -0.3555, 1.2882, 1.0023]) tensor([0.2200, 0.0775, 0.4011, 0.3014]) -Greedy action tensor([-0.3945, -0.4433, -0.6855, -0.6915]) tensor([0.2904, 0.2766, 0.2171, 0.2158]) -Greedy action tensor([ 1.0537, -0.0133, -0.0532, 0.6644]) tensor([0.4251, 0.1463, 0.1405, 0.2881]) -Greedy action tensor([-0.4365, -1.4955, 0.6114, 0.7356]) tensor([0.1346, 0.0467, 0.3839, 0.4347]) -Greedy action tensor([ 0.6130, -0.5496, 1.7213, 0.8945]) tensor([0.1765, 0.0552, 0.5345, 0.2338]) -Greedy action tensor([-0.0765, -0.8571, 1.1885, -0.4998]) tensor([0.1768, 0.0810, 0.6264, 0.1158]) -Greedy action tensor([ 0.0428, -0.8673, 0.4333, 0.1144]) tensor([0.2529, 0.1018, 0.3737, 0.2717]) -Greedy action tensor([ 1.0451, -0.3789, -0.5741, 0.2681]) tensor([0.5267, 0.1268, 0.1043, 0.2422]) -Greedy action tensor([-0.7825, -1.7291, 2.1711, 0.5397]) tensor([0.0411, 0.0160, 0.7886, 0.1543]) -Greedy action tensor([ 1.0787, 0.4019, 0.2865, -0.7902]) tensor([0.4727, 0.2403, 0.2141, 0.0729]) -Greedy action tensor([ 0.4097, -0.1250, -0.3985, 0.0702]) tensor([0.3645, 0.2135, 0.1624, 0.2595]) -Greedy action tensor([-0.5177, -0.5038, -0.7902, 0.4690]) tensor([0.1832, 0.1858, 0.1395, 0.4915]) -Greedy action tensor([-0.2895, -0.7270, 1.4981, 0.2543]) tensor([0.1070, 0.0691, 0.6395, 0.1844]) -Greedy action tensor([ 0.5803, 0.1638, -1.0470, 0.7529]) tensor([0.3285, 0.2166, 0.0645, 0.3904]) -Greedy action tensor([ 0.4498, -0.3934, 1.6234, 0.5711]) tensor([0.1726, 0.0743, 0.5582, 0.1949]) -Greedy action tensor([-0.1992, -0.3426, -0.2522, 1.3375]) tensor([0.1340, 0.1161, 0.1271, 0.6229]) -Greedy action tensor([-0.8143, -1.8515, -0.4263, 0.0831]) tensor([0.1893, 0.0671, 0.2791, 0.4645]) -Greedy action tensor([-0.9593, -1.5376, -0.1782, -0.4865]) tensor([0.1869, 0.1049, 0.4083, 0.2999]) -Greedy action tensor([ 0.8200, -0.9181, 0.8643, 2.2718]) tensor([0.1540, 0.0271, 0.1610, 0.6578]) -Greedy action tensor([ 0.4961, -0.8204, 0.4728, 0.1733]) tensor([0.3368, 0.0903, 0.3290, 0.2439]) -Greedy action tensor([-1.4542, -0.2472, 1.0735, -0.7725]) tensor([0.0531, 0.1774, 0.6646, 0.1049]) -Greedy action tensor([ 0.5485, 0.3899, -0.3046, 1.1641]) tensor([0.2421, 0.2066, 0.1032, 0.4481]) -Greedy action tensor([ 0.6372, -0.4122, -0.3466, 0.8041]) tensor([0.3442, 0.1205, 0.1287, 0.4067]) -Greedy action tensor([-0.1984, 0.5670, 0.0661, 0.6592]) tensor([0.1468, 0.3157, 0.1913, 0.3462]) -Greedy action tensor([0.6424, 0.3966, 0.2606, 0.5079]) tensor([0.2995, 0.2342, 0.2045, 0.2618]) -Greedy action tensor([ 1.0577, -0.7626, 1.4321, 0.0685]) tensor([0.3347, 0.0542, 0.4867, 0.1245]) -Greedy action tensor([-1.0625, -1.3100, 0.0830, -0.4126]) tensor([0.1462, 0.1141, 0.4596, 0.2800]) -Greedy action tensor([ 0.4872, 0.0546, -0.0578, 0.5730]) tensor([0.3014, 0.1955, 0.1748, 0.3284]) -Greedy action tensor([ 0.9427, -0.3920, 0.3078, 0.3804]) tensor([0.4232, 0.1114, 0.2243, 0.2412]) -Greedy action tensor([ 0.4434, -0.1374, -0.8046, 1.7190]) tensor([0.1843, 0.1031, 0.0529, 0.6598]) -Greedy action tensor([-0.4185, -0.3406, 1.5330, -0.1115]) tensor([0.0954, 0.1032, 0.6717, 0.1297]) -Greedy action tensor([ 0.5023, -1.1883, 0.2350, 1.0712]) tensor([0.2691, 0.0496, 0.2060, 0.4753]) -Greedy action tensor([ 0.3159, -1.6776, -0.2316, 1.1239]) tensor([0.2527, 0.0344, 0.1461, 0.5668]) -Greedy action tensor([ 0.7763, -0.4398, 0.9202, 0.4718]) tensor([0.3136, 0.0930, 0.3622, 0.2313]) -Greedy action tensor([ 1.4532, -0.0877, 0.7438, -0.3669]) tensor([0.5353, 0.1147, 0.2633, 0.0867]) -Greedy action tensor([ 1.0958, -1.4538, 1.1565, 0.7705]) tensor([0.3493, 0.0273, 0.3711, 0.2523]) -Greedy action tensor([ 0.3102, -0.4452, 0.2704, 1.8287]) tensor([0.1429, 0.0672, 0.1374, 0.6526]) -Greedy action tensor([ 0.9234, -0.1109, 0.6244, 0.7527]) tensor([0.3401, 0.1209, 0.2522, 0.2867]) -Greedy action tensor([ 0.2884, 0.6665, 0.3139, -0.2000]) tensor([0.2440, 0.3561, 0.2503, 0.1497]) -Greedy action tensor([ 1.5169, -1.3039, -0.0909, 0.3403]) tensor([0.6377, 0.0380, 0.1277, 0.1966]) -Greedy action tensor([-0.7403, -0.9480, -0.9481, 0.0403]) tensor([0.2080, 0.1690, 0.1690, 0.4540]) -Greedy action tensor([ 1.8170, -0.2466, 0.7418, 1.3590]) tensor([0.4760, 0.0605, 0.1624, 0.3011]) -Greedy action tensor([ 1.1045, -0.3900, 0.5152, 0.8120]) tensor([0.3960, 0.0888, 0.2197, 0.2955]) -Greedy action tensor([ 1.1590, -1.3972, -0.2185, 0.5301]) tensor([0.5368, 0.0417, 0.1354, 0.2862]) -Greedy action tensor([-0.1174, -2.0126, -0.4024, 0.6237]) tensor([0.2500, 0.0376, 0.1880, 0.5245]) -Greedy action tensor([-0.9970, -0.7367, -1.1501, 0.7792]) tensor([0.1103, 0.1431, 0.0947, 0.6518]) -Greedy action tensor([-0.4635, -0.1808, 0.7270, -1.0631]) tensor([0.1622, 0.2152, 0.5335, 0.0891]) -Greedy action tensor([ 0.4169, 0.4414, -0.3881, -0.6678]) tensor([0.3559, 0.3647, 0.1591, 0.1203]) -Greedy action tensor([ 0.0786, 0.6082, 1.1175, -0.7918]) tensor([0.1683, 0.2857, 0.4755, 0.0705]) -Greedy action tensor([0.4691, 0.1192, 0.6853, 0.2146]) tensor([0.2687, 0.1894, 0.3336, 0.2083]) -Greedy action tensor([0.6038, 0.7191, 1.3370, 0.2609]) tensor([0.2035, 0.2284, 0.4237, 0.1444]) -Greedy action tensor([ 0.3893, -2.3417, -0.1385, 0.1562]) tensor([0.4087, 0.0266, 0.2411, 0.3237]) -Greedy action tensor([ 0.4325, -0.5390, 1.6485, -0.0416]) tensor([0.1861, 0.0704, 0.6277, 0.1158]) -Greedy action tensor([ 1.2692, -0.6811, 0.2686, 1.4701]) tensor([0.3660, 0.0521, 0.1346, 0.4474]) -Greedy action tensor([-0.3783, -0.3607, 0.0702, 1.0844]) tensor([0.1266, 0.1288, 0.1982, 0.5464]) -Greedy action tensor([-1.6425, -1.5314, -0.0631, 0.5985]) tensor([0.0611, 0.0683, 0.2964, 0.5743]) -Greedy action tensor([ 0.4433, -1.4972, -0.1371, 1.3680]) tensor([0.2367, 0.0340, 0.1325, 0.5968]) -Greedy action tensor([-0.3829, -0.2361, -0.1205, 0.7822]) tensor([0.1501, 0.1738, 0.1951, 0.4811]) -Greedy action tensor([0.0369, 0.0171, 0.2958, 0.5293]) tensor([0.2036, 0.1996, 0.2637, 0.3331]) -Greedy action tensor([ 0.3451, -0.2312, 0.8009, 0.3649]) tensor([0.2404, 0.1351, 0.3793, 0.2452]) -Greedy action tensor([ 1.4839, -1.2307, 0.5893, 1.0728]) tensor([0.4678, 0.0310, 0.1912, 0.3101]) -Greedy action tensor([ 0.5878, -1.0412, 1.4351, -0.2510]) tensor([0.2524, 0.0495, 0.5890, 0.1091]) -Greedy action tensor([ 1.5742, -2.0543, 0.6520, 0.2334]) tensor([0.5932, 0.0158, 0.2359, 0.1552]) -Greedy action tensor([ 0.9641, 0.1034, -0.4182, 0.4095]) tensor([0.4448, 0.1881, 0.1116, 0.2554]) -Greedy action tensor([ 1.0092, -0.7905, 1.0521, 0.2977]) tensor([0.3704, 0.0612, 0.3866, 0.1818]) -Greedy action tensor([ 1.9987, -1.0746, -0.0235, 1.2391]) tensor([0.6074, 0.0281, 0.0804, 0.2842]) -Greedy action tensor([ 1.9281, -0.0896, 1.2618, 0.4570]) tensor([0.5330, 0.0709, 0.2737, 0.1224]) -Greedy action tensor([ 0.0601, -1.4256, 1.1791, 0.4210]) tensor([0.1747, 0.0396, 0.5350, 0.2507]) -Greedy action tensor([-0.7734, -0.9116, -0.2013, 0.6123]) tensor([0.1309, 0.1140, 0.2319, 0.5232]) -Greedy action tensor([ 0.4418, -2.2534, 0.4267, -0.0178]) tensor([0.3726, 0.0252, 0.3670, 0.2353]) -Greedy action tensor([-1.1296, -1.0886, 0.3642, 0.0625]) tensor([0.1021, 0.1064, 0.4550, 0.3365]) -Greedy action tensor([ 0.7023, -1.9252, -0.2745, 0.7288]) tensor([0.4039, 0.0292, 0.1521, 0.4148]) -Greedy action tensor([ 0.5799, -1.3097, 0.0318, 1.7776]) tensor([0.1984, 0.0300, 0.1147, 0.6570]) -Greedy action tensor([1.0350, 0.2488, 1.5724, 0.1267]) tensor([0.2801, 0.1276, 0.4794, 0.1129]) -Greedy action tensor([ 0.1666, -2.9194, 0.0291, 0.6037]) tensor([0.2886, 0.0132, 0.2515, 0.4468]) -Greedy action tensor([-0.8625, 0.0723, 0.6697, -0.6419]) tensor([0.1061, 0.2703, 0.4912, 0.1323]) -Greedy action tensor([-1.2198, -0.3777, 0.8731, 1.1550]) tensor([0.0451, 0.1047, 0.3656, 0.4846]) -Greedy action tensor([-0.5909, -0.6366, 0.2103, -0.4218]) tensor([0.1863, 0.1780, 0.4151, 0.2206]) -Greedy action tensor([ 0.8327, -0.5660, 0.4621, 1.0935]) tensor([0.3091, 0.0763, 0.2134, 0.4012]) -Greedy action tensor([ 0.7723, -0.3779, 0.7486, 0.7789]) tensor([0.3030, 0.0959, 0.2960, 0.3051]) -Greedy action tensor([-0.5729, 0.7857, -0.1479, -1.7007]) tensor([0.1483, 0.5769, 0.2268, 0.0480]) -Greedy action tensor([ 1.0075, -0.7806, -0.0313, -0.6793]) tensor([0.5861, 0.0980, 0.2074, 0.1085]) -Greedy action tensor([ 0.4385, -0.1375, -0.0060, -0.1573]) tensor([0.3631, 0.2041, 0.2328, 0.2001]) -Greedy action tensor([ 0.4577, -0.3411, -0.0118, -0.1963]) tensor([0.3853, 0.1734, 0.2410, 0.2004]) -Greedy action tensor([ 0.7022, -0.4718, 0.0198, -0.3622]) tensor([0.4631, 0.1432, 0.2340, 0.1597]) -Greedy action tensor([ 0.7636, -0.5814, 0.0213, -0.1614]) tensor([0.4688, 0.1221, 0.2231, 0.1859]) -Greedy action tensor([ 0.5445, -0.4201, 0.1764, -0.4297]) tensor([0.4081, 0.1555, 0.2824, 0.1540]) -Greedy action tensor([ 0.4584, -0.1603, -0.0203, -0.3036]) tensor([0.3810, 0.2052, 0.2360, 0.1778]) -Greedy action tensor([ 1.3043, -1.2275, 0.1168, -0.6392]) tensor([0.6546, 0.0520, 0.1996, 0.0937]) -Greedy action tensor([ 0.5518, -0.3453, -0.1487, -0.2854]) tensor([0.4279, 0.1745, 0.2124, 0.1852]) -Greedy action tensor([ 0.5759, -0.3854, 0.0948, -0.3384]) tensor([0.4165, 0.1592, 0.2574, 0.1669]) -Greedy action tensor([ 0.9413, -0.5105, -0.1997, -0.5130]) tensor([0.5595, 0.1310, 0.1788, 0.1307]) -Greedy action tensor([ 1.0264, -0.7008, 0.1987, -0.7837]) tensor([0.5623, 0.1000, 0.2457, 0.0920]) -Greedy action tensor([ 0.7280, -0.4832, -0.1184, -0.4157]) tensor([0.4889, 0.1456, 0.2097, 0.1558]) -Greedy action tensor([ 0.7698, -0.5705, -0.0356, -0.3202]) tensor([0.4890, 0.1280, 0.2186, 0.1644]) -Greedy action tensor([ 0.6670, -0.3080, -0.0694, -0.1857]) tensor([0.4381, 0.1653, 0.2098, 0.1868]) -Greedy action tensor([ 0.6246, -0.4771, -0.1151, -0.4643]) tensor([0.4660, 0.1548, 0.2224, 0.1568]) -Greedy action tensor([ 0.5959, -0.0210, -0.0446, -0.1173]) tensor([0.3911, 0.2111, 0.2061, 0.1917]) -Greedy action tensor([ 0.4427, -0.2203, -0.1121, -0.1976]) tensor([0.3822, 0.1969, 0.2194, 0.2014]) -Greedy action tensor([ 0.6369, -0.0224, -0.1208, -0.1691]) tensor([0.4111, 0.2126, 0.1927, 0.1836]) -Greedy action tensor([ 0.8991, -0.9070, -0.0596, -0.3474]) tensor([0.5449, 0.0895, 0.2089, 0.1567]) -Greedy action tensor([ 0.8382, -0.4229, 0.0053, -0.5212]) tensor([0.5064, 0.1435, 0.2201, 0.1300]) -Greedy action tensor([ 0.4177, 0.2295, -0.0898, -0.0104]) tensor([0.3244, 0.2688, 0.1953, 0.2115]) -Greedy action tensor([ 1.2284, -1.0451, 0.1182, -0.5250]) tensor([0.6228, 0.0641, 0.2052, 0.1079]) -Greedy action tensor([ 4.7055e-01, -3.9822e-04, -6.6523e-02, -1.5660e-01]) tensor([0.3646, 0.2276, 0.2131, 0.1947]) -Greedy action tensor([ 0.5464, -0.2627, -0.0370, -0.1213]) tensor([0.3974, 0.1770, 0.2218, 0.2038]) -Greedy action tensor([ 0.6763, -0.2118, 0.0475, -0.3062]) tensor([0.4312, 0.1774, 0.2299, 0.1614]) -Greedy action tensor([ 0.8966, -0.2067, -0.0209, -0.1430]) tensor([0.4796, 0.1591, 0.1916, 0.1696]) -Greedy action tensor([ 0.9133, -0.6950, -0.0272, -0.5933]) tensor([0.5518, 0.1105, 0.2154, 0.1223]) -Greedy action tensor([ 0.6251, -0.4462, -0.0548, -0.3343]) tensor([0.4479, 0.1535, 0.2270, 0.1716]) -Greedy action tensor([ 0.4581, -0.5123, -0.1501, -0.1910]) tensor([0.4089, 0.1549, 0.2226, 0.2136]) -Greedy action tensor([ 0.9982, -1.2058, 0.0447, -0.7786]) tensor([0.6006, 0.0663, 0.2315, 0.1016]) -Greedy action tensor([ 0.4879, 0.0029, 0.1403, -0.3101]) tensor([0.3607, 0.2221, 0.2548, 0.1624]) -Greedy action tensor([ 0.2491, -0.0591, 0.0474, -0.3959]) tensor([0.3250, 0.2388, 0.2657, 0.1705]) -Greedy action tensor([ 0.9134, -0.4521, 0.0160, -0.6636]) tensor([0.5349, 0.1365, 0.2181, 0.1105]) -Greedy action tensor([ 1.3350, -1.6172, 0.0161, -0.6040]) tensor([0.6833, 0.0357, 0.1827, 0.0983]) -Greedy action tensor([ 0.7007, -0.3794, 0.0061, -0.5499]) tensor([0.4705, 0.1598, 0.2349, 0.1347]) -Greedy action tensor([ 0.8194, -0.5922, -0.0320, -0.7013]) tensor([0.5293, 0.1290, 0.2259, 0.1157]) -Greedy action tensor([ 0.5711, -0.0195, -0.0673, -0.5043]) tensor([0.4127, 0.2286, 0.2179, 0.1408]) -Greedy action tensor([ 0.5196, -0.3444, -0.1329, -0.3348]) tensor([0.4223, 0.1780, 0.2199, 0.1797]) -Greedy action tensor([ 0.6840, -0.4443, 0.0525, -0.4030]) tensor([0.4561, 0.1476, 0.2425, 0.1538]) -Greedy action tensor([ 0.7170, -0.5284, 0.0444, -0.4607]) tensor([0.4748, 0.1367, 0.2423, 0.1462]) -Greedy action tensor([ 0.3442, -0.1893, 0.4076, -0.4976]) tensor([0.3244, 0.1903, 0.3456, 0.1398]) -Greedy action tensor([ 0.5415, -0.1164, -0.0346, -0.2820]) tensor([0.3970, 0.2056, 0.2231, 0.1742]) -Greedy action tensor([ 0.7037, -0.2886, -0.0987, -0.1763]) tensor([0.4477, 0.1660, 0.2007, 0.1857]) -Greedy action tensor([ 1.0835, -1.0413, -0.0416, -0.4652]) tensor([0.6036, 0.0721, 0.1960, 0.1283]) -Greedy action tensor([ 0.4487, -0.0575, -0.1239, -0.1547]) tensor([0.3685, 0.2221, 0.2079, 0.2016]) -Greedy action tensor([ 0.2498, -0.0576, 0.1187, -0.3778]) tensor([0.3178, 0.2337, 0.2788, 0.1697]) -Greedy action tensor([ 0.8126, -0.6560, -0.1047, -0.5508]) tensor([0.5303, 0.1221, 0.2119, 0.1356]) -Greedy action tensor([ 0.8796, -0.4425, -0.1623, -0.2550]) tensor([0.5152, 0.1374, 0.1818, 0.1657]) -Greedy action tensor([ 0.9889, -0.7869, -0.1202, -0.5443]) tensor([0.5831, 0.0987, 0.1923, 0.1259]) -Greedy action tensor([ 0.7533, -0.2545, -0.1677, -0.2235]) tensor([0.4674, 0.1706, 0.1861, 0.1760]) -Greedy action tensor([ 0.3319, -0.2947, -0.1051, -0.2922]) tensor([0.3682, 0.1968, 0.2378, 0.1972]) -Greedy action tensor([ 0.8373, -0.8021, 0.0527, -0.8938]) tensor([0.5472, 0.1062, 0.2497, 0.0969]) -Greedy action tensor([ 0.1889, -0.0940, 0.0641, -0.1263]) tensor([0.2971, 0.2239, 0.2622, 0.2168]) -Greedy action tensor([ 0.5795, -0.3014, 0.0113, -0.3569]) tensor([0.4214, 0.1746, 0.2387, 0.1652]) -Greedy action tensor([ 0.4817, -0.4234, -0.0518, -0.2431]) tensor([0.4040, 0.1634, 0.2369, 0.1957]) -Greedy action tensor([ 1.0375, -1.0572, -0.0508, -0.6090]) tensor([0.6051, 0.0745, 0.2038, 0.1166]) -Greedy action tensor([ 0.5454, -0.3477, -0.0990, -0.3297]) tensor([0.4253, 0.1741, 0.2233, 0.1773]) -Greedy action tensor([ 0.7932, -0.3529, -0.0167, -0.4044]) tensor([0.4843, 0.1540, 0.2155, 0.1462]) -Greedy action tensor([ 0.6409, -0.0934, -0.2757, -0.1757]) tensor([0.4307, 0.2067, 0.1722, 0.1904]) -Greedy action tensor([ 0.6019, -0.2979, 0.0076, -0.1088]) tensor([0.4082, 0.1660, 0.2253, 0.2005]) -Greedy action tensor([ 0.9878, -0.2930, 0.0999, -0.2111]) tensor([0.5023, 0.1395, 0.2067, 0.1515]) -Greedy action tensor([ 0.9766, -0.5780, -0.0936, -0.5339]) tensor([0.5634, 0.1190, 0.1932, 0.1244]) -Greedy action tensor([ 0.5920, -0.4358, -0.1070, -0.1779]) tensor([0.4314, 0.1544, 0.2144, 0.1998]) -Greedy action tensor([ 0.3926, -0.0865, -0.0439, -0.2285]) tensor([0.3568, 0.2210, 0.2306, 0.1917]) -Greedy action tensor([ 0.4240, -0.2443, 0.0327, -0.1663]) tensor([0.3646, 0.1869, 0.2465, 0.2020]) -Greedy action tensor([ 0.6145, -0.3657, -0.0643, -0.3194]) tensor([0.4395, 0.1649, 0.2229, 0.1727]) -Greedy action tensor([ 0.7612, -0.6685, 0.0293, -0.5341]) tensor([0.5015, 0.1200, 0.2412, 0.1373]) -Greedy action tensor([ 0.8672, -0.6452, -0.0545, -0.4613]) tensor([0.5310, 0.1170, 0.2113, 0.1407]) -Greedy action tensor([ 1.0591, -0.7830, 0.0370, -0.5061]) tensor([0.5789, 0.0917, 0.2083, 0.1210]) -Greedy action tensor([ 0.3880, 0.1097, -0.0722, -0.2177]) tensor([0.3408, 0.2580, 0.2151, 0.1860]) -Greedy action tensor([ 0.4569, -0.0386, -0.1056, -0.1955]) tensor([0.3704, 0.2257, 0.2110, 0.1929]) -Greedy action tensor([ 0.6252, -0.2636, -0.1391, -0.4822]) tensor([0.4531, 0.1863, 0.2110, 0.1497]) -Greedy action tensor([ 1.2149, -1.1082, 0.0681, -0.6231]) tensor([0.6350, 0.0622, 0.2017, 0.1010]) -Greedy action tensor([ 0.6384, -0.6149, -0.0395, -0.2446]) tensor([0.4532, 0.1294, 0.2300, 0.1874]) -Greedy action tensor([ 0.7111, -0.4900, 0.0472, -0.2780]) tensor([0.4571, 0.1375, 0.2353, 0.1700]) -Greedy action tensor([ 0.9335, -0.4969, 0.0578, -0.4570]) tensor([0.5250, 0.1256, 0.2187, 0.1307]) -Greedy action tensor([ 0.5850, -0.2303, 0.0531, -0.2994]) tensor([0.4093, 0.1811, 0.2405, 0.1690]) -Greedy action tensor([ 0.9544, 0.2505, -0.0403, -0.3573]) tensor([0.4686, 0.2318, 0.1733, 0.1262]) -Greedy action tensor([ 0.9239, -0.3437, -0.1053, -0.2730]) tensor([0.5152, 0.1450, 0.1841, 0.1557]) -Greedy action tensor([ 0.8115, -0.6593, 0.1482, -0.5943]) tensor([0.5025, 0.1154, 0.2589, 0.1232]) -Greedy action tensor([-1.3552, -0.3377, 0.5746, 0.5915]) tensor([0.0566, 0.1566, 0.3901, 0.3967]) -Greedy action tensor([-0.4510, -0.4946, 0.2094, 0.1475]) tensor([0.1751, 0.1676, 0.3388, 0.3185]) -Greedy action tensor([-0.9453, -0.5975, 0.2650, 0.1290]) tensor([0.1150, 0.1628, 0.3856, 0.3366]) -Greedy action tensor([-1.0330, -0.4800, 0.5118, -0.4333]) tensor([0.1081, 0.1880, 0.5069, 0.1970]) -Greedy action tensor([-1.9240, -0.4050, 0.6508, -0.1653]) tensor([0.0408, 0.1864, 0.5358, 0.2369]) -Greedy action tensor([-1.3540, -0.4576, 0.5538, 0.5027]) tensor([0.0603, 0.1477, 0.4061, 0.3859]) -Greedy action tensor([-1.2306, -0.5428, 0.5295, -0.0127]) tensor([0.0821, 0.1633, 0.4772, 0.2775]) -Greedy action tensor([-1.4264, -0.7204, 0.9306, 0.5500]) tensor([0.0481, 0.0974, 0.5076, 0.3469]) -Greedy action tensor([-1.4507, -0.5325, 0.3983, 0.1343]) tensor([0.0679, 0.1700, 0.4311, 0.3311]) -Greedy action tensor([-1.3728, -0.0994, 0.4543, 0.3212]) tensor([0.0616, 0.2201, 0.3830, 0.3352]) -Greedy action tensor([-1.0564, -0.4108, 0.9393, 1.1742]) tensor([0.0511, 0.0975, 0.3760, 0.4755]) -Greedy action tensor([-1.8230, -0.4413, 0.5985, -0.1087]) tensor([0.0459, 0.1827, 0.5167, 0.2548]) -Greedy action tensor([-0.3947, 0.0533, 1.2359, 1.5649]) tensor([0.0677, 0.1060, 0.3458, 0.4805]) -Greedy action tensor([-0.7699, -0.5954, 0.1885, 0.2432]) tensor([0.1324, 0.1577, 0.3453, 0.3647]) -Greedy action tensor([-1.2880, -0.5327, 0.3321, 0.2329]) tensor([0.0784, 0.1668, 0.3961, 0.3587]) -Greedy action tensor([-1.7612, -0.4642, 0.5901, -0.0162]) tensor([0.0479, 0.1752, 0.5027, 0.2742]) -Greedy action tensor([-1.9398, -0.4713, 0.9143, 0.1555]) tensor([0.0324, 0.1409, 0.5631, 0.2636]) -Greedy action tensor([-1.2675, -0.6253, 0.3719, 0.2962]) tensor([0.0780, 0.1481, 0.4016, 0.3723]) -Greedy action tensor([-1.1971, -0.7079, 0.8119, -0.5056]) tensor([0.0828, 0.1350, 0.6170, 0.1652]) -Greedy action tensor([-1.6482, -0.5119, 0.5335, 0.0833]) tensor([0.0537, 0.1673, 0.4757, 0.3033]) -Greedy action tensor([-1.4618, -0.5412, 0.4115, 0.1120]) tensor([0.0674, 0.1691, 0.4385, 0.3250]) -Greedy action tensor([-1.9386, -0.4522, 0.6791, -0.1660]) tensor([0.0400, 0.1768, 0.5479, 0.2353]) -Greedy action tensor([-1.7146, -0.4607, 0.6241, 0.0780]) tensor([0.0479, 0.1678, 0.4966, 0.2876]) -Greedy action tensor([-1.8499, -0.4814, 0.6200, -0.1098]) tensor([0.0445, 0.1750, 0.5266, 0.2538]) -Greedy action tensor([-1.7129, -0.5169, 0.5773, -0.0397]) tensor([0.0513, 0.1695, 0.5062, 0.2731]) -Greedy action tensor([-1.2719, -0.6243, 0.5368, 0.7543]) tensor([0.0602, 0.1151, 0.3677, 0.4570]) -Greedy action tensor([-0.8397, -0.5627, 0.1441, 0.3906]) tensor([0.1188, 0.1567, 0.3178, 0.4066]) -Greedy action tensor([-1.6741, -0.5149, 0.5222, -0.0181]) tensor([0.0543, 0.1731, 0.4882, 0.2844]) -Greedy action tensor([-1.7830, -0.4638, 0.6061, -0.0523]) tensor([0.0470, 0.1757, 0.5122, 0.2652]) -Greedy action tensor([-1.6636, -0.5062, 0.5594, 0.0598]) tensor([0.0526, 0.1673, 0.4855, 0.2946]) -Greedy action tensor([-1.6606, -0.4933, 0.5320, -0.0790]) tensor([0.0555, 0.1782, 0.4967, 0.2696]) -Greedy action tensor([-1.8851, -0.4345, 0.6330, -0.1527]) tensor([0.0429, 0.1829, 0.5319, 0.2424]) -Greedy action tensor([-1.1602, -0.7879, 1.1845, 1.3725]) tensor([0.0393, 0.0570, 0.4095, 0.4942]) -Greedy action tensor([-1.4328, -0.4492, 0.5146, 0.3348]) tensor([0.0605, 0.1617, 0.4238, 0.3541]) -Greedy action tensor([-1.0438, -0.6892, 0.2894, -0.0322]) tensor([0.1115, 0.1590, 0.4229, 0.3066]) -Greedy action tensor([-1.7657, -0.2988, 0.5475, -0.0442]) tensor([0.0475, 0.2061, 0.4805, 0.2659]) -Greedy action tensor([-1.9309, -0.4432, 0.6598, -0.1722]) tensor([0.0407, 0.1802, 0.5429, 0.2362]) -Greedy action tensor([-1.1004, -0.5378, 0.3418, 0.2995]) tensor([0.0906, 0.1590, 0.3831, 0.3673]) -Greedy action tensor([-1.3332, 0.0028, 0.3000, -0.0943]) tensor([0.0748, 0.2844, 0.3828, 0.2581]) -Greedy action tensor([-1.3303, -0.6304, 0.5160, 0.4939]) tensor([0.0643, 0.1295, 0.4075, 0.3986]) -Greedy action tensor([-1.0986, -0.3877, 0.6114, 0.8137]) tensor([0.0652, 0.1328, 0.3606, 0.4414]) -Greedy action tensor([-0.9722, -0.5882, 0.2029, 0.3228]) tensor([0.1069, 0.1569, 0.3461, 0.3902]) -Greedy action tensor([-1.0014, -0.4805, 0.5672, -0.6187]) tensor([0.1117, 0.1881, 0.5363, 0.1638]) -Greedy action tensor([-1.4195, -0.4542, 0.7530, 0.6640]) tensor([0.0489, 0.1285, 0.4296, 0.3930]) -Greedy action tensor([-1.4358, -0.5397, 0.5294, -0.0574]) tensor([0.0687, 0.1683, 0.4903, 0.2727]) -Greedy action tensor([-1.9236, -0.4433, 0.6511, -0.1678]) tensor([0.0411, 0.1808, 0.5400, 0.2381]) -Greedy action tensor([-1.8492, -0.4157, 0.6175, -0.1159]) tensor([0.0442, 0.1852, 0.5206, 0.2500]) -Greedy action tensor([-1.2365, -0.7560, 0.2813, -0.1953]) tensor([0.0999, 0.1615, 0.4557, 0.2829]) -Greedy action tensor([-0.4083, -0.3812, 0.1881, 0.2089]) tensor([0.1755, 0.1804, 0.3187, 0.3254]) -Greedy action tensor([-1.6776, -0.4638, 0.5330, 0.0119]) tensor([0.0529, 0.1781, 0.4825, 0.2865]) -Greedy action tensor([-1.5241, -0.6490, 0.4911, -0.0923]) tensor([0.0663, 0.1590, 0.4972, 0.2775]) -Greedy action tensor([-1.6471, -0.5350, 0.4440, 0.0037]) tensor([0.0577, 0.1753, 0.4666, 0.3004]) -Greedy action tensor([-1.7983, -0.4396, 0.5749, -0.0997]) tensor([0.0474, 0.1845, 0.5089, 0.2592]) -Greedy action tensor([-1.6463, -0.4707, 0.5338, 0.0451]) tensor([0.0540, 0.1750, 0.4779, 0.2931]) -Greedy action tensor([-1.3767, -0.4554, 0.6703, 0.8062]) tensor([0.0497, 0.1248, 0.3848, 0.4407]) -Greedy action tensor([-1.4265, -0.5788, 1.2896, 1.1497]) tensor([0.0316, 0.0739, 0.4785, 0.4160]) -Greedy action tensor([-1.5328, -0.5422, 0.5356, 0.2751]) tensor([0.0565, 0.1521, 0.4470, 0.3444]) -Greedy action tensor([-1.2643, -0.3789, 0.4421, -0.2102]) tensor([0.0847, 0.2054, 0.4668, 0.2431]) -Greedy action tensor([-1.1620, -0.7998, 0.2976, 0.5578]) tensor([0.0811, 0.1166, 0.3492, 0.4531]) -Greedy action tensor([-1.7910, -0.4508, 0.5845, -0.1051]) tensor([0.0477, 0.1821, 0.5129, 0.2573]) -Greedy action tensor([-1.3587, -0.3389, 0.5796, -0.4211]) tensor([0.0753, 0.2089, 0.5234, 0.1924]) -Greedy action tensor([-1.4470, -0.3126, 0.5239, 0.3333]) tensor([0.0581, 0.1806, 0.4168, 0.3445]) -Greedy action tensor([-1.2461, -0.0967, 0.3974, -0.1014]) tensor([0.0802, 0.2531, 0.4148, 0.2519]) -Greedy action tensor([-1.4360, -0.5285, 0.4590, 0.2602]) tensor([0.0642, 0.1590, 0.4269, 0.3499]) -Greedy action tensor([-1.3724, -0.5962, 0.3510, 0.1664]) tensor([0.0744, 0.1618, 0.4171, 0.3467]) -Greedy action tensor([-0.7818, -0.5866, 0.2353, 0.2837]) tensor([0.1268, 0.1542, 0.3508, 0.3682]) -Greedy action tensor([-1.6530, -0.4197, 0.5160, -0.0932]) tensor([0.0557, 0.1913, 0.4877, 0.2652]) -Greedy action tensor([-1.3322, -0.5350, 0.4711, 0.6120]) tensor([0.0614, 0.1363, 0.3729, 0.4293]) -Greedy action tensor([-1.7359, -0.3045, 0.5585, -0.0197]) tensor([0.0484, 0.2025, 0.4799, 0.2692]) -Greedy action tensor([-1.8540, -0.3122, 0.6008, -0.1201]) tensor([0.0435, 0.2033, 0.5067, 0.2464]) -Greedy action tensor([-1.8979, -0.4524, 0.6408, -0.1636]) tensor([0.0424, 0.1801, 0.5372, 0.2403]) -Greedy action tensor([-0.9477, -0.2603, 0.8980, 1.3237]) tensor([0.0526, 0.1046, 0.3330, 0.5098]) -Greedy action tensor([-1.9007, -0.4520, 0.6413, -0.1550]) tensor([0.0422, 0.1797, 0.5363, 0.2418]) -Greedy action tensor([-0.4225, 0.2924, 0.2149, 1.1335]) tensor([0.1034, 0.2113, 0.1955, 0.4899]) -Greedy action tensor([-1.2733, -0.5699, 0.5223, 0.2953]) tensor([0.0722, 0.1460, 0.4351, 0.3467]) -Greedy action tensor([-0.7807, -0.5230, 0.3030, 0.0941]) tensor([0.1308, 0.1692, 0.3865, 0.3136]) -Greedy action tensor([-1.7905, -0.4586, 0.8318, 0.2897]) tensor([0.0376, 0.1426, 0.5183, 0.3014]) -Greedy action tensor([-1.8313, -0.4592, 0.6040, -0.1193]) tensor([0.0457, 0.1800, 0.5214, 0.2529]) -Greedy action tensor([-1.4449, -0.5163, 0.4066, 0.0721]) tensor([0.0692, 0.1750, 0.4405, 0.3153]) -Greedy action tensor([-1.7077, -0.5102, 0.6269, 0.0710]) tensor([0.0486, 0.1611, 0.5022, 0.2881]) -Greedy action tensor([-1.0466, -0.4770, 1.0271, 1.2499]) tensor([0.0484, 0.0856, 0.3850, 0.4811]) -Greedy action tensor([ 1.2288, -0.3649, -0.2856, 0.1737]) tensor([0.5646, 0.1147, 0.1242, 0.1966]) -Greedy action tensor([ 1.3604, -0.3897, -0.3192, 0.2122]) tensor([0.5962, 0.1036, 0.1112, 0.1891]) -Greedy action tensor([ 0.9395, -0.2707, -0.1713, -0.0618]) tensor([0.5013, 0.1495, 0.1651, 0.1842]) -Greedy action tensor([ 1.7528, -1.0559, -0.2733, 0.4291]) tensor([0.6857, 0.0413, 0.0904, 0.1825]) -Greedy action tensor([ 0.5166, -0.3682, -0.0779, -0.1025]) tensor([0.3995, 0.1649, 0.2205, 0.2151]) -Greedy action tensor([ 0.7438, -0.1195, -0.0989, 0.1749]) tensor([0.4135, 0.1744, 0.1780, 0.2341]) -Greedy action tensor([ 0.6778, -0.4238, 0.0997, -0.1790]) tensor([0.4314, 0.1434, 0.2420, 0.1832]) -Greedy action tensor([ 1.0252, -0.4972, -0.0917, -0.0231]) tensor([0.5274, 0.1151, 0.1726, 0.1849]) -Greedy action tensor([ 1.3699, -0.9871, -0.0816, 0.1699]) tensor([0.6135, 0.0581, 0.1437, 0.1848]) -Greedy action tensor([ 1.2471, -0.5218, -0.2397, 0.1445]) tensor([0.5785, 0.0986, 0.1308, 0.1921]) -Greedy action tensor([ 1.1496, -0.4152, -0.0540, 0.4042]) tensor([0.5041, 0.1054, 0.1513, 0.2392]) -Greedy action tensor([ 0.8705, -0.6701, -0.3712, 0.3162]) tensor([0.4813, 0.1031, 0.1390, 0.2765]) -Greedy action tensor([ 0.7456, -0.0685, 0.0186, -0.0506]) tensor([0.4206, 0.1864, 0.2033, 0.1897]) -Greedy action tensor([ 1.2454, -0.1533, -0.1756, 0.1328]) tensor([0.5503, 0.1359, 0.1329, 0.1809]) -Greedy action tensor([ 0.7731, -0.6131, -0.2486, 0.5990]) tensor([0.4081, 0.1020, 0.1469, 0.3429]) -Greedy action tensor([ 0.8605, -0.1077, -0.4791, -0.4792]) tensor([0.5253, 0.1995, 0.1376, 0.1376]) -Greedy action tensor([ 0.7533, -0.2555, -0.1110, -0.1495]) tensor([0.4563, 0.1664, 0.1923, 0.1850]) -Greedy action tensor([ 1.7011, -0.9573, -0.5228, 0.7621]) tensor([0.6372, 0.0446, 0.0689, 0.2492]) -Greedy action tensor([ 1.0610, -0.2155, -0.2325, 0.2850]) tensor([0.4966, 0.1386, 0.1362, 0.2286]) -Greedy action tensor([ 1.1198, -0.5206, 0.0174, 0.0434]) tensor([0.5357, 0.1039, 0.1779, 0.1826]) -Greedy action tensor([ 2.6907, -0.3698, -0.4708, 0.6811]) tensor([0.8175, 0.0383, 0.0346, 0.1096]) -Greedy action tensor([ 1.3058, -0.1693, -0.3474, 0.2485]) tensor([0.5657, 0.1294, 0.1083, 0.1965]) -Greedy action tensor([ 0.6214, -0.3395, -0.1222, 0.1201]) tensor([0.4059, 0.1553, 0.1930, 0.2459]) -Greedy action tensor([ 0.7849, -0.4837, 0.3481, -0.0559]) tensor([0.4240, 0.1192, 0.2739, 0.1829]) -Greedy action tensor([ 1.0026, -0.1036, -0.7835, 1.0467]) tensor([0.3931, 0.1301, 0.0659, 0.4109]) -Greedy action tensor([ 0.4942, -0.1838, -0.0422, 0.4503]) tensor([0.3279, 0.1665, 0.1918, 0.3138]) -Greedy action tensor([ 1.7635, -0.2110, -0.8980, 0.1185]) tensor([0.7134, 0.0990, 0.0498, 0.1377]) -Greedy action tensor([ 1.1555, 0.0088, -0.7847, -0.0034]) tensor([0.5633, 0.1790, 0.0809, 0.1768]) -Greedy action tensor([ 1.0432, -0.2360, 0.0290, 0.2563]) tensor([0.4771, 0.1327, 0.1730, 0.2172]) -Greedy action tensor([ 1.4345, -0.3843, -0.1673, 0.2981]) tensor([0.5936, 0.0963, 0.1196, 0.1905]) -Greedy action tensor([ 1.0219, -0.3131, -0.4248, 0.6629]) tensor([0.4552, 0.1198, 0.1071, 0.3179]) -Greedy action tensor([ 0.8165, -0.2807, -0.0901, -0.0777]) tensor([0.4659, 0.1555, 0.1882, 0.1905]) -Greedy action tensor([ 0.7998, -0.5855, 0.1322, 0.0010]) tensor([0.4519, 0.1131, 0.2318, 0.2033]) -Greedy action tensor([ 0.9563, -0.1635, -0.1086, 0.3206]) tensor([0.4544, 0.1483, 0.1567, 0.2406]) -Greedy action tensor([ 1.3398, -0.3791, -0.4550, 0.4635]) tensor([0.5676, 0.1018, 0.0943, 0.2363]) -Greedy action tensor([ 1.3495, -0.6700, -0.1381, 0.1031]) tensor([0.6075, 0.0806, 0.1372, 0.1747]) -Greedy action tensor([ 0.9326, -0.2666, 0.0268, -0.1858]) tensor([0.4920, 0.1483, 0.1989, 0.1608]) -Greedy action tensor([ 0.9389, -0.6016, -0.4061, 0.6280]) tensor([0.4530, 0.0971, 0.1180, 0.3319]) -Greedy action tensor([ 1.6464, -0.5399, -0.2539, 0.3218]) tensor([0.6546, 0.0735, 0.0979, 0.1741]) -Greedy action tensor([ 1.3451, -0.4552, -0.2837, 0.1381]) tensor([0.6022, 0.0995, 0.1181, 0.1801]) -Greedy action tensor([ 1.3794, -0.5749, -0.3799, 0.2470]) tensor([0.6112, 0.0866, 0.1052, 0.1970]) -Greedy action tensor([ 1.4817, -0.5503, -0.2132, 0.4685]) tensor([0.5960, 0.0781, 0.1094, 0.2164]) -Greedy action tensor([ 0.5033, -0.2725, -0.3914, 0.4521]) tensor([0.3547, 0.1633, 0.1450, 0.3370]) -Greedy action tensor([ 1.0726, -0.6742, -0.3163, 0.5757]) tensor([0.4921, 0.0858, 0.1227, 0.2994]) -Greedy action tensor([ 1.0328, -0.2581, -0.1082, 0.1517]) tensor([0.4978, 0.1369, 0.1591, 0.2063]) -Greedy action tensor([ 1.4767, -0.5160, -0.4260, 0.2567]) tensor([0.6326, 0.0862, 0.0944, 0.1868]) -Greedy action tensor([ 2.0015, -0.6616, -0.3011, 0.1633]) tensor([0.7525, 0.0525, 0.0752, 0.1197]) -Greedy action tensor([ 1.1326, -0.3751, -0.0664, 0.1431]) tensor([0.5278, 0.1169, 0.1591, 0.1962]) -Greedy action tensor([ 1.2159, 0.0905, -0.1706, -0.3576]) tensor([0.5612, 0.1821, 0.1403, 0.1163]) -Greedy action tensor([ 1.4286, -0.7326, -0.1768, 0.1171]) tensor([0.6307, 0.0727, 0.1267, 0.1699]) -Greedy action tensor([ 1.7831, -0.5601, -0.5334, 0.6576]) tensor([0.6583, 0.0632, 0.0649, 0.2136]) -Greedy action tensor([ 1.0008, -0.2786, -0.0997, 0.3184]) tensor([0.4725, 0.1315, 0.1572, 0.2388]) -Greedy action tensor([ 1.1457, -0.6020, 0.0728, -0.1956]) tensor([0.5625, 0.0980, 0.1924, 0.1471]) -Greedy action tensor([ 0.6056, -0.3333, -0.3602, 0.2029]) tensor([0.4098, 0.1602, 0.1560, 0.2739]) -Greedy action tensor([ 0.8530, -0.7439, -0.5072, 0.5618]) tensor([0.4532, 0.0918, 0.1163, 0.3387]) -Greedy action tensor([ 1.6660, -0.6699, -0.5734, 0.4267]) tensor([0.6699, 0.0648, 0.0714, 0.1940]) -Greedy action tensor([ 1.3124, -0.1766, -0.0491, 0.1620]) tensor([0.5560, 0.1255, 0.1425, 0.1760]) -Greedy action tensor([ 0.7238, -0.2943, -0.5114, 0.4712]) tensor([0.4117, 0.1487, 0.1197, 0.3198]) -Greedy action tensor([ 1.1798, -0.4837, -0.2206, 0.1294]) tensor([0.5600, 0.1061, 0.1380, 0.1959]) -Greedy action tensor([ 1.3280, -0.5657, -0.2844, 0.5996]) tensor([0.5457, 0.0821, 0.1088, 0.2634]) -Greedy action tensor([ 1.1714, -0.3891, -0.4345, 0.2844]) tensor([0.5487, 0.1152, 0.1101, 0.2260]) -Greedy action tensor([ 1.3179, -0.2710, -0.4198, 0.2762]) tensor([0.5771, 0.1178, 0.1015, 0.2036]) -Greedy action tensor([ 0.9603, -0.6790, -0.4584, 0.4426]) tensor([0.4921, 0.0955, 0.1191, 0.2933]) -Greedy action tensor([ 1.1758, -0.1454, -0.0934, 0.2973]) tensor([0.5093, 0.1359, 0.1432, 0.2116]) -Greedy action tensor([ 1.7424, -0.4656, -0.3273, 0.0918]) tensor([0.7002, 0.0770, 0.0884, 0.1344]) -Greedy action tensor([ 1.3396, -0.4347, -0.1546, 0.7779]) tensor([0.5091, 0.0863, 0.1143, 0.2903]) -Greedy action tensor([ 1.4108, -0.6068, -0.6176, 0.6185]) tensor([0.5823, 0.0774, 0.0766, 0.2637]) -Greedy action tensor([ 0.8607, -0.0277, -0.0628, 0.1096]) tensor([0.4385, 0.1804, 0.1742, 0.2069]) -Greedy action tensor([ 1.3077, -0.0346, -0.1090, 0.0158]) tensor([0.5623, 0.1469, 0.1364, 0.1545]) -Greedy action tensor([ 1.3977, -0.2388, -0.3433, 0.2672]) tensor([0.5907, 0.1150, 0.1036, 0.1907]) -Greedy action tensor([ 1.4192, -0.5113, -0.2740, 0.4291]) tensor([0.5880, 0.0853, 0.1082, 0.2185]) -Greedy action tensor([ 0.9018, -0.4318, -0.2270, 0.0771]) tensor([0.4938, 0.1301, 0.1597, 0.2164]) -Greedy action tensor([ 1.8184, -0.7491, -0.2856, 0.5177]) tensor([0.6798, 0.0522, 0.0829, 0.1851]) -Greedy action tensor([ 1.3506, -0.8021, -0.1115, 0.5127]) tensor([0.5616, 0.0652, 0.1302, 0.2430]) -Greedy action tensor([ 1.6414, -0.3439, -0.6238, 0.5167]) tensor([0.6386, 0.0877, 0.0663, 0.2074]) -Greedy action tensor([ 0.7731, -0.1972, -0.1015, 0.1084]) tensor([0.4328, 0.1640, 0.1805, 0.2227]) -Greedy action tensor([ 1.6209, -0.2087, -0.3889, 0.1846]) tensor([0.6526, 0.1047, 0.0875, 0.1552]) -Greedy action tensor([ 1.4603, -0.8336, -0.3279, 0.7910]) tensor([0.5617, 0.0567, 0.0940, 0.2876]) -Greedy action tensor([ 1.3197, -0.5825, -0.2181, 0.3790]) tensor([0.5700, 0.0851, 0.1225, 0.2225]) -Greedy action tensor([ 0.9457, -0.4824, -0.4973, 0.6190]) tensor([0.4551, 0.1091, 0.1075, 0.3283]) -Greedy action tensor([ 1.8799, -0.9174, -0.4721, 0.6460]) tensor([0.6909, 0.0421, 0.0658, 0.2012]) -Greedy action tensor([-0.1117, 0.1834, 2.3784, -0.9102]) tensor([0.0673, 0.0904, 0.8120, 0.0303]) -Greedy action tensor([-0.0560, -1.7209, -1.0482, -0.4044]) tensor([0.4413, 0.0835, 0.1636, 0.3115]) -Greedy action tensor([-0.2555, -1.6982, -0.2057, 1.0497]) tensor([0.1673, 0.0395, 0.1759, 0.6172]) -Greedy action tensor([-1.2358, -1.3855, 0.6262, -0.8668]) tensor([0.1026, 0.0884, 0.6606, 0.1484]) -Greedy action tensor([-0.3931, -0.9696, -0.1471, -0.0180]) tensor([0.2328, 0.1308, 0.2977, 0.3387]) -Greedy action tensor([ 0.4680, 0.6693, -0.7917, 0.1483]) tensor([0.3093, 0.3783, 0.0878, 0.2247]) -Greedy action tensor([ 0.0651, -0.7529, -0.1395, 2.0942]) tensor([0.1014, 0.0447, 0.0826, 0.7712]) -Greedy action tensor([-0.0557, 0.9625, -0.1989, -0.1298]) tensor([0.1797, 0.4976, 0.1558, 0.1669]) -Greedy action tensor([-0.8894, -0.1677, -0.5701, 0.1295]) tensor([0.1388, 0.2857, 0.1910, 0.3845]) -Greedy action tensor([ 0.1637, 0.4153, 0.5217, -0.4681]) tensor([0.2354, 0.3027, 0.3367, 0.1251]) -Greedy action tensor([-0.1892, -1.2537, 0.7137, 0.7864]) tensor([0.1547, 0.0534, 0.3816, 0.4103]) -Greedy action tensor([-0.0865, 0.0091, 0.1224, 0.8228]) tensor([0.1720, 0.1892, 0.2119, 0.4269]) -Greedy action tensor([ 0.8597, -0.6274, 0.7963, 1.4081]) tensor([0.2567, 0.0580, 0.2410, 0.4443]) -Greedy action tensor([ 0.3202, -0.1936, 0.6556, 0.9906]) tensor([0.2020, 0.1208, 0.2824, 0.3948]) -Greedy action tensor([ 0.3457, -0.7929, -0.0435, -0.5062]) tensor([0.4125, 0.1321, 0.2795, 0.1760]) -Greedy action tensor([0.8501, 0.6605, 0.0341, 0.2184]) tensor([0.3570, 0.2953, 0.1579, 0.1898]) -Greedy action tensor([ 1.2156, -0.9543, -0.0594, 0.5321]) tensor([0.5267, 0.0601, 0.1472, 0.2659]) -Greedy action tensor([-0.1985, 0.7838, 0.7126, -0.7954]) tensor([0.1491, 0.3981, 0.3708, 0.0821]) -Greedy action tensor([-0.1237, -1.4429, 0.8088, 0.1522]) tensor([0.1951, 0.0522, 0.4957, 0.2571]) -Greedy action tensor([ 0.7529, -1.3177, 0.9075, -0.6034]) tensor([0.3920, 0.0494, 0.4575, 0.1010]) -Greedy action tensor([-0.4041, -0.1886, -0.0814, 0.6225]) tensor([0.1559, 0.1934, 0.2153, 0.4353]) -Greedy action tensor([-0.4006, -0.0625, -0.5321, 0.2335]) tensor([0.1936, 0.2715, 0.1698, 0.3651]) -Greedy action tensor([ 1.8498, -0.4483, 0.9725, 1.4483]) tensor([0.4575, 0.0460, 0.1903, 0.3062]) -Greedy action tensor([0.8561, 0.3845, 0.2182, 0.0196]) tensor([0.3867, 0.2413, 0.2044, 0.1676]) -Greedy action tensor([ 0.3684, -0.3632, -0.6953, 0.4428]) tensor([0.3444, 0.1657, 0.1189, 0.3710]) -Greedy action tensor([ 0.4017, -2.0320, -0.0613, 1.0783]) tensor([0.2714, 0.0238, 0.1708, 0.5339]) -Greedy action tensor([ 1.3883, -0.7340, 0.5748, 0.5315]) tensor([0.5031, 0.0603, 0.2230, 0.2136]) -Greedy action tensor([ 1.0546, 0.2661, -0.0725, -0.4363]) tensor([0.4991, 0.2268, 0.1617, 0.1124]) -Greedy action tensor([-0.5186, -0.2576, -0.3088, 0.2357]) tensor([0.1768, 0.2294, 0.2180, 0.3758]) -Greedy action tensor([-0.5925, -0.3325, -0.4922, 0.8908]) tensor([0.1280, 0.1661, 0.1416, 0.5643]) -Greedy action tensor([ 0.0339, -1.0447, -0.5759, 2.3982]) tensor([0.0799, 0.0272, 0.0434, 0.8496]) -Greedy action tensor([-1.4193, -1.1793, -0.4574, -0.3644]) tensor([0.1289, 0.1638, 0.3372, 0.3701]) -Greedy action tensor([-0.2145, -0.7614, 1.1065, 0.3512]) tensor([0.1411, 0.0817, 0.5288, 0.2485]) -Greedy action tensor([ 0.7364, -1.7774, 0.4333, 0.7109]) tensor([0.3579, 0.0290, 0.2643, 0.3489]) -Greedy action tensor([ 0.9257, 0.5834, -0.1260, 0.6250]) tensor([0.3572, 0.2536, 0.1248, 0.2644]) -Greedy action tensor([ 0.9383, -0.5629, -0.4910, 1.3243]) tensor([0.3409, 0.0760, 0.0816, 0.5015]) -Greedy action tensor([ 1.2725, -1.2831, 0.5344, 1.5136]) tensor([0.3536, 0.0275, 0.1690, 0.4500]) -Greedy action tensor([ 0.0265, -0.2069, 0.0271, 1.6763]) tensor([0.1250, 0.0990, 0.1251, 0.6509]) -Greedy action tensor([-0.5858, -1.0602, -0.2601, -0.6109]) tensor([0.2511, 0.1562, 0.3478, 0.2449]) -Greedy action tensor([-0.4454, -0.6224, 1.4371, -0.4682]) tensor([0.1065, 0.0893, 0.7000, 0.1041]) -Greedy action tensor([ 1.3263, -0.4025, 1.5492, 0.4868]) tensor([0.3497, 0.0621, 0.4371, 0.1511]) -Greedy action tensor([ 1.4278, -0.5865, 0.2140, 0.8431]) tensor([0.5031, 0.0671, 0.1494, 0.2804]) -Greedy action tensor([ 1.7515, -1.2757, 0.9031, 1.8272]) tensor([0.3914, 0.0190, 0.1675, 0.4221]) -Greedy action tensor([0.5806, 0.7036, 0.1378, 0.1236]) tensor([0.2936, 0.3320, 0.1885, 0.1859]) -Greedy action tensor([0.9824, 0.1985, 1.3193, 0.8811]) tensor([0.2659, 0.1214, 0.3724, 0.2403]) -Greedy action tensor([ 0.1295, -0.9332, 0.0588, 1.0286]) tensor([0.2112, 0.0730, 0.1968, 0.5190]) -Greedy action tensor([ 1.7668, -0.5104, 0.2289, 1.5283]) tensor([0.4750, 0.0487, 0.1021, 0.3742]) -Greedy action tensor([-0.1265, 1.0558, 0.1631, 0.0257]) tensor([0.1479, 0.4824, 0.1976, 0.1722]) -Greedy action tensor([ 0.7162, 0.4651, 0.1322, -0.8425]) tensor([0.3928, 0.3055, 0.2190, 0.0826]) -Greedy action tensor([-0.8347, -1.8651, 0.3679, 0.3059]) tensor([0.1280, 0.0457, 0.4260, 0.4004]) -Greedy action tensor([ 0.5648, -0.7720, 0.2823, 0.9631]) tensor([0.2852, 0.0749, 0.2150, 0.4248]) -Greedy action tensor([-0.0049, 0.1600, 0.2707, -0.1303]) tensor([0.2284, 0.2693, 0.3008, 0.2015]) -Greedy action tensor([ 0.3518, 0.0650, -0.3102, 1.2415]) tensor([0.2127, 0.1597, 0.1097, 0.5179]) -Greedy action tensor([ 0.1796, 0.2064, -0.2682, 0.5323]) tensor([0.2446, 0.2512, 0.1563, 0.3480]) -Greedy action tensor([-0.2775, -1.3642, -0.2363, -0.3859]) tensor([0.3052, 0.1030, 0.3180, 0.2738]) -Greedy action tensor([-0.4631, -0.4405, 0.1829, -0.7100]) tensor([0.2122, 0.2171, 0.4049, 0.1658]) -Greedy action tensor([1.5181, 0.4816, 0.5883, 0.8017]) tensor([0.4469, 0.1585, 0.1763, 0.2183]) -Greedy action tensor([ 0.5381, 0.1210, 1.7452, -0.6295]) tensor([0.1882, 0.1240, 0.6293, 0.0585]) -Greedy action tensor([ 0.9466, 0.7728, -0.7774, 0.2969]) tensor([0.3935, 0.3308, 0.0702, 0.2055]) -Greedy action tensor([-0.1090, -1.1022, -0.4500, 2.2891]) tensor([0.0764, 0.0283, 0.0543, 0.8409]) -Greedy action tensor([-0.5355, -0.4859, -0.3802, 0.4721]) tensor([0.1678, 0.1764, 0.1961, 0.4597]) -Greedy action tensor([ 0.8527, -1.4847, 0.1861, 0.5409]) tensor([0.4270, 0.0412, 0.2192, 0.3126]) -Greedy action tensor([ 0.2139, 0.7657, 0.3533, -0.0494]) tensor([0.2148, 0.3730, 0.2470, 0.1651]) -Greedy action tensor([ 0.8947, -1.2721, 0.7630, 0.9164]) tensor([0.3319, 0.0380, 0.2909, 0.3392]) -Greedy action tensor([-0.2671, 1.4028, 1.1761, -0.7911]) tensor([0.0898, 0.4769, 0.3801, 0.0532]) -Greedy action tensor([0.5854, 0.0525, 1.1346, 2.0840]) tensor([0.1283, 0.0753, 0.2222, 0.5742]) -Greedy action tensor([1.4651, 0.5329, 1.4066, 0.0561]) tensor([0.3874, 0.1525, 0.3654, 0.0947]) -Greedy action tensor([0.4787, 0.0709, 1.1923, 1.3789]) tensor([0.1622, 0.1079, 0.3310, 0.3989]) -Greedy action tensor([ 0.7888, 0.2421, -0.1647, -0.2091]) tensor([0.4286, 0.2481, 0.1652, 0.1580]) -Greedy action tensor([-0.1353, -2.6861, 0.1318, 0.6619]) tensor([0.2172, 0.0169, 0.2837, 0.4821]) -Greedy action tensor([ 0.9988, -0.6340, 0.7267, 0.7318]) tensor([0.3673, 0.0718, 0.2798, 0.2812]) -Greedy action tensor([ 1.0132, 0.6587, -1.0724, 0.4836]) tensor([0.4141, 0.2905, 0.0515, 0.2439]) -Greedy action tensor([ 0.2054, -1.2576, -0.2351, -0.2238]) tensor([0.3958, 0.0917, 0.2548, 0.2577]) -Greedy action tensor([ 0.2021, -0.3282, 0.1672, 1.0424]) tensor([0.2053, 0.1208, 0.1982, 0.4757]) -Greedy action tensor([ 1.5439, -1.5397, 1.7947, 0.4801]) tensor([0.3737, 0.0171, 0.4802, 0.1290]) -Greedy action tensor([-0.7511, -0.6482, 0.0582, 0.9536]) tensor([0.1015, 0.1125, 0.2280, 0.5581]) -Greedy action tensor([ 0.7567, -2.4290, 0.0359, 0.7708]) tensor([0.3934, 0.0163, 0.1913, 0.3990]) -Greedy action tensor([ 0.8932, -0.2895, 0.1916, 1.0670]) tensor([0.3342, 0.1024, 0.1657, 0.3976]) -Greedy action tensor([-0.2405, -1.2397, 0.0531, -0.0895]) tensor([0.2582, 0.0951, 0.3464, 0.3003]) -Greedy action tensor([ 0.3823, 0.3570, 0.8288, -0.1275]) tensor([0.2416, 0.2356, 0.3776, 0.1451]) -Greedy action tensor([ 0.1427, -0.4817, 0.5833, -0.3070]) tensor([0.2683, 0.1437, 0.4168, 0.1711]) -Greedy action tensor([-1.6110, -1.4541, 0.3199, -0.4728]) tensor([0.0821, 0.0960, 0.5658, 0.2561]) -Greedy action tensor([ 0.5910, -0.5297, -0.0952, -0.2253]) tensor([0.4402, 0.1435, 0.2216, 0.1946]) -Greedy action tensor([ 0.7027, -0.2719, 0.2106, -0.4920]) tensor([0.4364, 0.1647, 0.2668, 0.1321]) -Greedy action tensor([ 0.5659, -0.2966, 0.0294, -0.2913]) tensor([0.4113, 0.1736, 0.2405, 0.1745]) -Greedy action tensor([ 0.8559, -0.6073, -0.0818, -0.6336]) tensor([0.5410, 0.1252, 0.2118, 0.1220]) -Greedy action tensor([ 0.5818, -0.3528, -0.0368, -0.4353]) tensor([0.4361, 0.1713, 0.2349, 0.1577]) -Greedy action tensor([ 0.7701, -0.5665, -0.1210, -0.3471]) tensor([0.5000, 0.1314, 0.2051, 0.1636]) -Greedy action tensor([ 0.6872, -0.4605, 0.0494, -0.4494]) tensor([0.4615, 0.1465, 0.2439, 0.1481]) -Greedy action tensor([ 0.2483, 0.0628, -0.1879, -0.2169]) tensor([0.3220, 0.2675, 0.2082, 0.2023]) -Greedy action tensor([ 0.4642, 0.1146, 0.1128, -0.3609]) tensor([0.3513, 0.2476, 0.2472, 0.1539]) -Greedy action tensor([ 0.0536, -0.1195, -0.1538, -0.4180]) tensor([0.3051, 0.2566, 0.2480, 0.1904]) -Greedy action tensor([ 0.7560, -0.5260, -0.0260, -0.4475]) tensor([0.4914, 0.1363, 0.2248, 0.1475]) -Greedy action tensor([0.2396, 0.0417, 0.0870, 0.0442]) tensor([0.2856, 0.2343, 0.2452, 0.2349]) -Greedy action tensor([ 0.3949, 0.2353, -0.1810, -0.2046]) tensor([0.3374, 0.2876, 0.1897, 0.1853]) -Greedy action tensor([ 0.8378, -0.6615, -0.1133, -0.4260]) tensor([0.5285, 0.1180, 0.2042, 0.1493]) -Greedy action tensor([ 0.7332, 0.0553, -0.1508, -0.2732]) tensor([0.4374, 0.2220, 0.1807, 0.1599]) -Greedy action tensor([ 0.8053, -0.5072, -0.1120, -0.4579]) tensor([0.5124, 0.1379, 0.2048, 0.1449]) -Greedy action tensor([ 0.8695, -0.5241, -0.1258, -0.5071]) tensor([0.5347, 0.1327, 0.1976, 0.1350]) -Greedy action tensor([ 0.3952, 0.1516, 0.0140, -0.2504]) tensor([0.3343, 0.2620, 0.2283, 0.1753]) -Greedy action tensor([ 0.7723, -0.3667, 0.0553, -0.0434]) tensor([0.4443, 0.1422, 0.2169, 0.1965]) -Greedy action tensor([ 0.5042, -0.4370, -0.0915, -0.4181]) tensor([0.4275, 0.1668, 0.2356, 0.1700]) -Greedy action tensor([ 0.2353, -0.3065, -0.2228, -0.4759]) tensor([0.3697, 0.2150, 0.2338, 0.1815]) -Greedy action tensor([ 0.6113, -0.2984, 0.0560, -0.1434]) tensor([0.4087, 0.1646, 0.2346, 0.1922]) -Greedy action tensor([ 0.4159, -0.3804, 0.1406, -0.4630]) tensor([0.3809, 0.1718, 0.2892, 0.1582]) -Greedy action tensor([ 0.4983, 0.1051, 0.2208, -0.1075]) tensor([0.3358, 0.2266, 0.2544, 0.1832]) -Greedy action tensor([ 1.0792, -1.0381, 0.2287, -0.7195]) tensor([0.5838, 0.0703, 0.2494, 0.0966]) -Greedy action tensor([ 0.7688, -0.5232, -0.0491, -0.4054]) tensor([0.4938, 0.1357, 0.2179, 0.1526]) -Greedy action tensor([ 0.2981, -0.0126, 0.0403, -0.2123]) tensor([0.3219, 0.2360, 0.2488, 0.1933]) -Greedy action tensor([ 0.4058, 0.2938, 0.0296, -0.4117]) tensor([0.3309, 0.2958, 0.2271, 0.1461]) -Greedy action tensor([ 0.7065, -0.6050, 0.0246, -0.3917]) tensor([0.4743, 0.1278, 0.2398, 0.1582]) -Greedy action tensor([ 0.8326, -0.7678, -0.1109, -0.7092]) tensor([0.5540, 0.1118, 0.2157, 0.1186]) -Greedy action tensor([ 0.5621, -0.3668, 0.0023, -0.1424]) tensor([0.4064, 0.1605, 0.2322, 0.2009]) -Greedy action tensor([ 0.2691, -0.3093, -0.1469, -0.4903]) tensor([0.3720, 0.2086, 0.2454, 0.1741]) -Greedy action tensor([ 0.6122, -0.4181, -0.0674, -0.3531]) tensor([0.4455, 0.1590, 0.2258, 0.1697]) -Greedy action tensor([ 1.2875, -1.6096, -0.1353, -0.8791]) tensor([0.7088, 0.0391, 0.1708, 0.0812]) -Greedy action tensor([ 0.6664, -0.6242, -0.0294, -0.4215]) tensor([0.4738, 0.1303, 0.2363, 0.1596]) -Greedy action tensor([ 0.9010, -0.7489, -0.0594, -0.8205]) tensor([0.5703, 0.1095, 0.2183, 0.1020]) -Greedy action tensor([ 0.7199, -0.3728, -0.1953, -0.3236]) tensor([0.4789, 0.1606, 0.1918, 0.1687]) -Greedy action tensor([ 0.3516, -0.2055, -0.0828, -0.2980]) tensor([0.3646, 0.2089, 0.2361, 0.1904]) -Greedy action tensor([ 0.9732, -1.2261, 0.0234, -0.5466]) tensor([0.5826, 0.0646, 0.2254, 0.1274]) -Greedy action tensor([ 0.4341, -0.3438, 0.0462, -0.4351]) tensor([0.3911, 0.1796, 0.2653, 0.1640]) -Greedy action tensor([ 0.4213, -0.1154, 0.0710, -0.0870]) tensor([0.3460, 0.2023, 0.2437, 0.2081]) -Greedy action tensor([ 0.2833, 0.0003, -0.0695, -0.0340]) tensor([0.3140, 0.2366, 0.2207, 0.2287]) -Greedy action tensor([ 0.7730, -0.3392, 0.0790, -0.3064]) tensor([0.4612, 0.1517, 0.2304, 0.1567]) -Greedy action tensor([ 0.1963, -0.0095, -0.0908, -0.2577]) tensor([0.3125, 0.2544, 0.2345, 0.1985]) -Greedy action tensor([ 0.6313, -0.4914, -0.0525, -0.2744]) tensor([0.4476, 0.1456, 0.2259, 0.1809]) -Greedy action tensor([ 0.6369, -0.5117, -0.0465, -0.2269]) tensor([0.4457, 0.1413, 0.2250, 0.1879]) -Greedy action tensor([ 0.6507, -0.5479, -0.1092, -0.4304]) tensor([0.4743, 0.1430, 0.2218, 0.1609]) -Greedy action tensor([ 0.4139, 0.0959, 0.0088, -0.2464]) tensor([0.3435, 0.2499, 0.2291, 0.1775]) -Greedy action tensor([ 0.4116, 0.0458, -0.0043, -0.3640]) tensor([0.3554, 0.2465, 0.2345, 0.1636]) -Greedy action tensor([ 0.5027, -0.0690, -0.0794, -0.0648]) tensor([0.3717, 0.2098, 0.2077, 0.2107]) -Greedy action tensor([ 0.4603, 0.1434, -0.0946, -0.2594]) tensor([0.3585, 0.2611, 0.2058, 0.1746]) -Greedy action tensor([ 0.5049, -0.3146, -0.1447, -0.4109]) tensor([0.4232, 0.1865, 0.2210, 0.1694]) -Greedy action tensor([ 0.5750, -0.3057, 0.0069, -0.4240]) tensor([0.4257, 0.1764, 0.2412, 0.1567]) -Greedy action tensor([ 0.6206, 0.0172, -0.1676, -0.2232]) tensor([0.4112, 0.2249, 0.1870, 0.1769]) -Greedy action tensor([ 0.5323, -0.6155, -0.0792, -0.0733]) tensor([0.4157, 0.1319, 0.2255, 0.2269]) -Greedy action tensor([ 1.1482, -1.1798, 0.0091, -0.8274]) tensor([0.6426, 0.0626, 0.2057, 0.0891]) -Greedy action tensor([ 0.6077, -0.2453, -0.0715, -0.1445]) tensor([0.4159, 0.1772, 0.2109, 0.1960]) -Greedy action tensor([ 0.3567, -0.0820, -0.1490, -0.1908]) tensor([0.3538, 0.2282, 0.2134, 0.2046]) -Greedy action tensor([ 0.5371, 0.2048, -0.1101, -0.0193]) tensor([0.3554, 0.2549, 0.1860, 0.2037]) -Greedy action tensor([ 0.8528, -0.3617, -0.0739, -0.4995]) tensor([0.5125, 0.1521, 0.2029, 0.1325]) -Greedy action tensor([ 0.2966, 0.1064, -0.0673, -0.1482]) tensor([0.3162, 0.2614, 0.2197, 0.2027]) -Greedy action tensor([ 0.8183, -0.3924, -0.0612, -0.2985]) tensor([0.4901, 0.1460, 0.2034, 0.1604]) -Greedy action tensor([ 0.5054, -0.0801, -0.1034, -0.3212]) tensor([0.3940, 0.2194, 0.2143, 0.1724]) -Greedy action tensor([ 0.3748, -0.2820, -0.1583, -0.5549]) tensor([0.4000, 0.2074, 0.2347, 0.1579]) -Greedy action tensor([ 0.8201, -0.5002, -0.0636, -0.5757]) tensor([0.5187, 0.1385, 0.2144, 0.1284]) -Greedy action tensor([ 0.1399, -0.1970, -0.0855, -0.4163]) tensor([0.3241, 0.2314, 0.2587, 0.1858]) -Greedy action tensor([ 0.7457, -0.2772, -0.0708, -0.2094]) tensor([0.4574, 0.1645, 0.2022, 0.1760]) -Greedy action tensor([ 0.4145, -0.0797, -0.0775, -0.2313]) tensor([0.3642, 0.2222, 0.2227, 0.1909]) -Greedy action tensor([ 0.7984, -0.5116, -0.0014, -0.3207]) tensor([0.4888, 0.1319, 0.2197, 0.1596]) -Greedy action tensor([ 0.6809, -0.5787, 0.1341, -0.6637]) tensor([0.4710, 0.1336, 0.2726, 0.1228]) -Greedy action tensor([ 0.3594, 0.1314, -0.2141, -0.3030]) tensor([0.3478, 0.2769, 0.1960, 0.1793]) -Greedy action tensor([ 0.3250, -0.2272, -0.0618, -0.2690]) tensor([0.3563, 0.2051, 0.2420, 0.1967]) -Greedy action tensor([ 0.6289, -0.0310, -0.0484, -0.2995]) tensor([0.4132, 0.2136, 0.2099, 0.1633]) -Greedy action tensor([ 0.5739, -0.1104, -0.0208, -0.5301]) tensor([0.4188, 0.2113, 0.2311, 0.1389]) -Greedy action tensor([ 0.4640, -0.2334, -0.1245, -0.2418]) tensor([0.3927, 0.1955, 0.2180, 0.1939]) -Greedy action tensor([ 1.0174, -0.9331, 0.0720, -0.5824]) tensor([0.5771, 0.0821, 0.2242, 0.1165]) -Greedy action tensor([ 0.4607, -0.0308, -0.0309, -0.0441]) tensor([0.3537, 0.2164, 0.2164, 0.2135]) -Greedy action tensor([ 0.5618, 0.0230, -0.0665, -0.1044]) tensor([0.3801, 0.2218, 0.2028, 0.1953]) -Greedy action tensor([ 0.6633, -0.3294, -0.0020, -0.5297]) tensor([0.4570, 0.1694, 0.2350, 0.1386]) -Greedy action tensor([ 0.3086, 0.1205, -0.2256, -0.3348]) tensor([0.3401, 0.2818, 0.1994, 0.1787]) -Greedy action tensor([ 0.5570, -0.2942, 0.0230, -0.2971]) tensor([0.4100, 0.1751, 0.2404, 0.1745]) -Greedy action tensor([-1.9088, -0.4543, 0.6670, -0.1294]) tensor([0.0411, 0.1759, 0.5397, 0.2434]) -Greedy action tensor([-1.9061, -0.4548, 0.6447, -0.1603]) tensor([0.0420, 0.1792, 0.5382, 0.2406]) -Greedy action tensor([-1.1272, -0.4065, 1.2853, 1.3409]) tensor([0.0384, 0.0790, 0.4290, 0.4535]) -Greedy action tensor([-1.8661, -0.3994, 0.6191, -0.1549]) tensor([0.0437, 0.1895, 0.5248, 0.2420]) -Greedy action tensor([-1.3591, -0.4863, 1.1163, 0.9524]) tensor([0.0394, 0.0944, 0.4685, 0.3977]) -Greedy action tensor([-1.7680, -0.2594, 0.5516, -0.0326]) tensor([0.0468, 0.2116, 0.4761, 0.2655]) -Greedy action tensor([-1.4666, -0.5989, 0.5870, 0.2418]) tensor([0.0599, 0.1426, 0.4669, 0.3306]) -Greedy action tensor([-1.8490, -0.4622, 0.6514, -0.0903]) tensor([0.0435, 0.1740, 0.5300, 0.2524]) -Greedy action tensor([-1.9113, -0.4124, 0.6477, -0.1547]) tensor([0.0413, 0.1851, 0.5342, 0.2394]) -Greedy action tensor([-1.5191, -0.4646, 0.5814, -0.2654]) tensor([0.0643, 0.1847, 0.5256, 0.2254]) -Greedy action tensor([-0.7497, -0.4828, 0.2025, 0.0800]) tensor([0.1391, 0.1816, 0.3604, 0.3189]) -Greedy action tensor([-1.4978, -0.4563, 0.4715, 0.0811]) tensor([0.0631, 0.1788, 0.4521, 0.3060]) -Greedy action tensor([-1.2047, -0.4055, -0.0132, 1.3211]) tensor([0.0526, 0.1169, 0.1731, 0.6574]) -Greedy action tensor([-0.9614, -0.5770, 0.1897, 0.2770]) tensor([0.1101, 0.1617, 0.3482, 0.3800]) -Greedy action tensor([-1.5862, -0.5294, 0.5518, 0.2064]) tensor([0.0545, 0.1567, 0.4619, 0.3270]) -Greedy action tensor([-1.9291, -0.4575, 0.6830, -0.1488]) tensor([0.0401, 0.1748, 0.5470, 0.2381]) -Greedy action tensor([-1.7641, -0.1430, 0.5308, -0.0375]) tensor([0.0463, 0.2341, 0.4593, 0.2602]) -Greedy action tensor([-0.6116, -0.1354, 0.2214, -0.1472]) tensor([0.1538, 0.2476, 0.3538, 0.2447]) -Greedy action tensor([-0.9898, -0.5418, 0.2224, 0.3848]) tensor([0.1012, 0.1584, 0.3402, 0.4002]) -Greedy action tensor([-1.6261, -0.4792, 1.0951, 0.7603]) tensor([0.0331, 0.1042, 0.5029, 0.3598]) -Greedy action tensor([-1.0999, -0.5894, 0.2722, 0.1407]) tensor([0.0993, 0.1655, 0.3917, 0.3435]) -Greedy action tensor([-1.3155, -0.6236, 0.3453, 0.2181]) tensor([0.0775, 0.1549, 0.4081, 0.3594]) -Greedy action tensor([-1.6680, 0.0674, 0.4434, -0.0290]) tensor([0.0498, 0.2824, 0.4113, 0.2565]) -Greedy action tensor([-0.7831, -0.5496, 0.2677, 0.2291]) tensor([0.1270, 0.1604, 0.3632, 0.3494]) -Greedy action tensor([-1.8433, -0.4308, 0.9138, 0.4712]) tensor([0.0323, 0.1325, 0.5085, 0.3267]) -Greedy action tensor([-1.9074, -0.4455, 0.6678, -0.1441]) tensor([0.0412, 0.1777, 0.5409, 0.2402]) -Greedy action tensor([-0.9750, -0.2185, 0.2661, 0.0369]) tensor([0.1071, 0.2281, 0.3704, 0.2945]) -Greedy action tensor([-1.4715, -0.4341, 0.4845, 0.2233]) tensor([0.0612, 0.1727, 0.4328, 0.3333]) -Greedy action tensor([-0.4459, -0.4449, 0.1494, 0.1412]) tensor([0.1781, 0.1783, 0.3231, 0.3205]) -Greedy action tensor([-1.5736, -0.5367, 0.5060, 0.1839]) tensor([0.0568, 0.1601, 0.4541, 0.3291]) -Greedy action tensor([-1.8797, -0.2811, 0.6058, -0.1318]) tensor([0.0422, 0.2087, 0.5067, 0.2423]) -Greedy action tensor([-1.7264, -0.4521, 0.5997, -0.2154]) tensor([0.0517, 0.1849, 0.5292, 0.2342]) -Greedy action tensor([-1.0415, -0.5725, 0.3767, 0.4580]) tensor([0.0892, 0.1426, 0.3685, 0.3997]) -Greedy action tensor([-1.8858, -0.4982, 0.6185, -0.1607]) tensor([0.0438, 0.1753, 0.5354, 0.2456]) -Greedy action tensor([-1.2365, -0.7270, 0.3751, 0.4935]) tensor([0.0751, 0.1250, 0.3763, 0.4236]) -Greedy action tensor([-0.6015, -0.5443, 0.1710, 0.3216]) tensor([0.1483, 0.1571, 0.3212, 0.3734]) -Greedy action tensor([-0.8112, -0.5998, 0.2170, 0.3424]) tensor([0.1219, 0.1506, 0.3409, 0.3865]) -Greedy action tensor([-1.3826, -0.1202, 0.2868, 0.2386]) tensor([0.0671, 0.2371, 0.3563, 0.3395]) -Greedy action tensor([-1.7928, -0.4939, 0.6352, -0.0129]) tensor([0.0456, 0.1671, 0.5169, 0.2704]) -Greedy action tensor([-1.6661, -0.3216, 0.8192, 0.6664]) tensor([0.0368, 0.1413, 0.4423, 0.3796]) -Greedy action tensor([-1.2544, -0.3460, 0.6114, 0.8554]) tensor([0.0550, 0.1364, 0.3552, 0.4534]) -Greedy action tensor([-1.4347, -0.5678, 0.3799, 0.1492]) tensor([0.0695, 0.1653, 0.4265, 0.3386]) -Greedy action tensor([-1.0416, -0.2344, 0.7853, 1.2450]) tensor([0.0518, 0.1162, 0.3221, 0.5100]) -Greedy action tensor([-1.6224, -0.5173, 0.5161, 0.0644]) tensor([0.0558, 0.1686, 0.4739, 0.3017]) -Greedy action tensor([-0.9829, -0.5478, 0.5318, -0.2707]) tensor([0.1095, 0.1692, 0.4981, 0.2232]) -Greedy action tensor([-1.5546, -0.4902, -0.0215, -0.4234]) tensor([0.0860, 0.2493, 0.3983, 0.2665]) -Greedy action tensor([-1.0659, -0.5672, 0.3322, 0.6403]) tensor([0.0820, 0.1349, 0.3317, 0.4514]) -Greedy action tensor([-1.7692, -0.4921, 0.5861, -0.0562]) tensor([0.0484, 0.1735, 0.5099, 0.2682]) -Greedy action tensor([-1.6155, -0.4566, 0.4770, 0.0398]) tensor([0.0571, 0.1818, 0.4625, 0.2987]) -Greedy action tensor([-1.7800, -0.5006, 0.5832, -0.0976]) tensor([0.0485, 0.1745, 0.5158, 0.2611]) -Greedy action tensor([-1.1072, -0.5304, 0.3497, 0.4951]) tensor([0.0831, 0.1479, 0.3566, 0.4124]) -Greedy action tensor([-1.6735, -0.2644, 0.6852, 0.1738]) tensor([0.0454, 0.1859, 0.4805, 0.2882]) -Greedy action tensor([-1.4577, -0.5433, 0.4120, 0.0672]) tensor([0.0686, 0.1712, 0.4450, 0.3152]) -Greedy action tensor([-0.7984, 0.2987, 0.1402, -0.0703]) tensor([0.1160, 0.3474, 0.2965, 0.2402]) -Greedy action tensor([-1.8592, -0.4867, 0.6715, -0.0783]) tensor([0.0427, 0.1683, 0.5359, 0.2532]) -Greedy action tensor([-1.3182, -0.5436, 0.4492, 0.1902]) tensor([0.0738, 0.1602, 0.4323, 0.3337]) -Greedy action tensor([-1.8740, -0.4311, 0.6544, -0.1171]) tensor([0.0424, 0.1797, 0.5320, 0.2459]) -Greedy action tensor([-1.7153, -0.4194, 0.7330, 0.2490]) tensor([0.0428, 0.1565, 0.4954, 0.3053]) -Greedy action tensor([-1.1939, -0.5836, 0.3287, 0.3445]) tensor([0.0828, 0.1524, 0.3794, 0.3854]) -Greedy action tensor([-1.8396, -0.3526, 0.5928, -0.1034]) tensor([0.0445, 0.1967, 0.5064, 0.2524]) -Greedy action tensor([-1.2109, -0.5377, 0.4961, 0.7469]) tensor([0.0643, 0.1260, 0.3543, 0.4554]) -Greedy action tensor([-0.7939, -0.5127, 0.3798, -0.0872]) tensor([0.1318, 0.1746, 0.4263, 0.2673]) -Greedy action tensor([-1.2590, -0.5636, 0.8840, 1.0793]) tensor([0.0457, 0.0916, 0.3894, 0.4734]) -Greedy action tensor([-0.9429, -0.8034, 1.0406, 0.5677]) tensor([0.0717, 0.0824, 0.5211, 0.3248]) -Greedy action tensor([-1.6092, 0.2190, 0.4692, 0.0732]) tensor([0.0486, 0.3022, 0.3881, 0.2612]) -Greedy action tensor([-1.3752, -0.3692, 0.9482, 1.0167]) tensor([0.0402, 0.1099, 0.4104, 0.4395]) -Greedy action tensor([-1.0639, -0.6993, 0.2875, 0.6779]) tensor([0.0833, 0.1199, 0.3216, 0.4752]) -Greedy action tensor([-1.8011, -0.4082, 0.6316, 0.0099]) tensor([0.0444, 0.1787, 0.5055, 0.2714]) -Greedy action tensor([-1.8056, -0.4842, 0.6041, -0.0613]) tensor([0.0463, 0.1735, 0.5153, 0.2649]) -Greedy action tensor([-1.7122, -0.2956, 0.4863, -0.0230]) tensor([0.0511, 0.2109, 0.4609, 0.2770]) -Greedy action tensor([-1.7612, -0.3457, 0.5853, -0.1662]) tensor([0.0488, 0.2009, 0.5098, 0.2404]) -Greedy action tensor([-1.3563, -0.5822, 1.4870, 1.2314]) tensor([0.0297, 0.0645, 0.5105, 0.3953]) -Greedy action tensor([-1.0858, -0.4314, 1.2628, 1.3594]) tensor([0.0401, 0.0772, 0.4201, 0.4626]) -Greedy action tensor([-1.1584, -0.6077, 1.2775, 1.4275]) tensor([0.0364, 0.0632, 0.4165, 0.4839]) -Greedy action tensor([-1.6661, -0.6114, 0.4825, -0.2207]) tensor([0.0599, 0.1720, 0.5137, 0.2543]) -Greedy action tensor([-0.9453, -0.5600, 0.2279, 0.2080]) tensor([0.1127, 0.1657, 0.3644, 0.3572]) -Greedy action tensor([-1.8141, -0.4260, 0.6158, -0.0606]) tensor([0.0452, 0.1810, 0.5130, 0.2608]) -Greedy action tensor([-1.8791, -0.4605, 0.6353, -0.1312]) tensor([0.0430, 0.1778, 0.5320, 0.2472]) -Greedy action tensor([-1.3332, -0.4262, 0.5480, 0.7762]) tensor([0.0547, 0.1355, 0.3589, 0.4509]) -Greedy action tensor([-1.8243, -0.4773, 0.6035, -0.0911]) tensor([0.0458, 0.1761, 0.5190, 0.2591]) -Greedy action tensor([-1.0829, -0.6245, 0.2506, 0.3158]) tensor([0.0959, 0.1517, 0.3639, 0.3884]) -Greedy action tensor([ 1.8894, -0.4688, -0.3120, 0.3224]) tensor([0.7073, 0.0669, 0.0783, 0.1476]) -Greedy action tensor([ 1.6547, -0.5146, -0.2833, 0.4546]) tensor([0.6413, 0.0733, 0.0923, 0.1931]) -Greedy action tensor([ 1.3394, -0.4935, -0.2711, 0.2045]) tensor([0.5948, 0.0951, 0.1188, 0.1912]) -Greedy action tensor([ 1.3091, -0.1687, -0.1205, 0.2859]) tensor([0.5474, 0.1249, 0.1310, 0.1967]) -Greedy action tensor([ 1.0503, -0.5194, -0.1828, 0.2249]) tensor([0.5161, 0.1074, 0.1504, 0.2261]) -Greedy action tensor([ 1.0297, 0.0133, -0.4854, 0.2005]) tensor([0.4955, 0.1793, 0.1089, 0.2162]) -Greedy action tensor([ 0.8241, -0.5462, -0.1638, 0.4785]) tensor([0.4284, 0.1088, 0.1595, 0.3032]) -Greedy action tensor([ 1.2250, -0.6874, -0.4286, 0.4126]) tensor([0.5609, 0.0829, 0.1073, 0.2489]) -Greedy action tensor([ 2.4072, -1.4742, -0.4429, 0.8159]) tensor([0.7800, 0.0161, 0.0451, 0.1589]) -Greedy action tensor([ 0.8298, -0.3475, 0.1596, -0.0370]) tensor([0.4464, 0.1375, 0.2284, 0.1876]) -Greedy action tensor([ 0.5126, -0.2790, -0.3039, 0.0153]) tensor([0.3995, 0.1810, 0.1766, 0.2430]) -Greedy action tensor([ 2.8210, -1.3866, -0.2839, 0.8760]) tensor([0.8315, 0.0124, 0.0373, 0.1189]) -Greedy action tensor([ 1.1451, 0.0451, 0.1018, -0.3461]) tensor([0.5235, 0.1743, 0.1844, 0.1178]) -Greedy action tensor([ 1.1370, -0.4060, -0.3907, 0.3558]) tensor([0.5295, 0.1132, 0.1149, 0.2424]) -Greedy action tensor([ 0.7637, -0.4399, -0.1846, 0.0422]) tensor([0.4601, 0.1381, 0.1782, 0.2236]) -Greedy action tensor([ 1.2640, -0.3360, -0.1206, 0.3101]) tensor([0.5442, 0.1099, 0.1363, 0.2096]) -Greedy action tensor([ 1.7801, -0.9734, -0.2383, 0.4361]) tensor([0.6862, 0.0437, 0.0912, 0.1789]) -Greedy action tensor([ 1.5405, -0.3625, -0.3952, 0.3268]) tensor([0.6287, 0.0938, 0.0907, 0.1868]) -Greedy action tensor([ 1.6770, -0.5488, -0.3568, 0.3563]) tensor([0.6641, 0.0717, 0.0869, 0.1773]) -Greedy action tensor([ 1.2213, -0.2177, 0.3798, 0.0147]) tensor([0.5083, 0.1206, 0.2191, 0.1521]) -Greedy action tensor([ 1.4740, -0.3771, -0.3999, 0.2644]) tensor([0.6215, 0.0976, 0.0954, 0.1854]) -Greedy action tensor([ 1.1588, -0.7632, -0.4438, 0.8650]) tensor([0.4778, 0.0699, 0.0962, 0.3561]) -Greedy action tensor([ 1.4616, -0.5584, -0.5071, 0.3035]) tensor([0.6304, 0.0836, 0.0880, 0.1980]) -Greedy action tensor([ 0.8265, -0.3123, -0.0135, 0.2580]) tensor([0.4313, 0.1381, 0.1862, 0.2443]) -Greedy action tensor([ 0.9551, -0.5692, -0.2063, 0.2214]) tensor([0.4973, 0.1083, 0.1557, 0.2387]) -Greedy action tensor([ 1.5020, -0.7645, 0.0690, 0.4628]) tensor([0.5896, 0.0611, 0.1407, 0.2086]) -Greedy action tensor([ 1.8316, -0.9633, -0.5999, 0.4955]) tensor([0.7083, 0.0433, 0.0623, 0.1862]) -Greedy action tensor([ 1.2686, -0.2615, -0.5254, 0.0648]) tensor([0.5942, 0.1287, 0.0988, 0.1783]) -Greedy action tensor([ 1.1294, -0.7806, -0.2157, 0.2909]) tensor([0.5432, 0.0804, 0.1415, 0.2349]) -Greedy action tensor([ 0.6725, -0.4486, -0.2605, 0.0181]) tensor([0.4466, 0.1456, 0.1757, 0.2321]) -Greedy action tensor([ 1.0678, -0.5067, -0.1849, -0.0842]) tensor([0.5528, 0.1145, 0.1580, 0.1747]) -Greedy action tensor([ 1.2055, -0.2887, -0.4584, 0.5582]) tensor([0.5162, 0.1159, 0.0978, 0.2702]) -Greedy action tensor([ 2.1914, -1.1220, -0.0764, 0.4186]) tensor([0.7635, 0.0278, 0.0791, 0.1297]) -Greedy action tensor([ 1.5286, -0.4143, -0.1249, 0.4312]) tensor([0.5994, 0.0859, 0.1147, 0.2000]) -Greedy action tensor([ 1.4504, -0.7961, -0.4974, 0.4648]) tensor([0.6167, 0.0652, 0.0879, 0.2301]) -Greedy action tensor([ 0.8202, -0.3638, -0.0196, 0.0640]) tensor([0.4531, 0.1387, 0.1956, 0.2127]) -Greedy action tensor([ 1.8698, -0.7549, 0.1063, 0.2696]) tensor([0.6917, 0.0501, 0.1186, 0.1396]) -Greedy action tensor([ 0.7083, -0.4588, -0.2291, 0.3636]) tensor([0.4147, 0.1291, 0.1624, 0.2938]) -Greedy action tensor([ 1.4740, -0.5597, -0.3956, 0.5043]) tensor([0.6009, 0.0786, 0.0926, 0.2278]) -Greedy action tensor([ 1.4949e+00, -3.9393e-01, -2.2162e-01, -9.1776e-04]) tensor([0.6431, 0.0973, 0.1156, 0.1441]) -Greedy action tensor([ 1.0087, -0.2434, -0.2830, 0.1999]) tensor([0.4985, 0.1425, 0.1370, 0.2220]) -Greedy action tensor([ 1.7480, 0.0081, -0.6552, 0.5073]) tensor([0.6430, 0.1129, 0.0581, 0.1859]) -Greedy action tensor([ 0.9998, -0.4200, -0.2378, 0.2230]) tensor([0.5021, 0.1214, 0.1456, 0.2309]) -Greedy action tensor([ 1.5658, -0.4804, -0.0976, 0.2135]) tensor([0.6340, 0.0819, 0.1201, 0.1640]) -Greedy action tensor([ 1.9622, -0.7295, -0.1250, 0.3754]) tensor([0.7161, 0.0485, 0.0888, 0.1465]) -Greedy action tensor([ 1.5624, -0.6937, -0.1985, 0.3426]) tensor([0.6362, 0.0666, 0.1093, 0.1879]) -Greedy action tensor([ 1.0371, -0.3590, -0.0958, 0.1253]) tensor([0.5072, 0.1256, 0.1634, 0.2038]) -Greedy action tensor([ 1.1327, -0.4102, -0.4365, 0.5853]) tensor([0.4999, 0.1069, 0.1041, 0.2892]) -Greedy action tensor([ 1.0549, -0.3298, -0.4537, 0.2123]) tensor([0.5257, 0.1316, 0.1163, 0.2264]) -Greedy action tensor([ 1.3282, -0.5524, -0.5054, -0.1708]) tensor([0.6512, 0.0993, 0.1041, 0.1454]) -Greedy action tensor([ 1.1529, -1.0543, -0.3054, 0.5292]) tensor([0.5323, 0.0586, 0.1238, 0.2853]) -Greedy action tensor([ 1.3850, -0.2246, -0.4736, 0.5129]) tensor([0.5637, 0.1127, 0.0879, 0.2357]) -Greedy action tensor([ 1.0484, -0.3495, -0.0371, 0.1337]) tensor([0.5037, 0.1245, 0.1701, 0.2018]) -Greedy action tensor([ 1.1729, -0.0182, -0.0777, 0.1783]) tensor([0.5102, 0.1550, 0.1461, 0.1887]) -Greedy action tensor([ 1.0657, -0.1875, -0.6332, 0.5260]) tensor([0.4875, 0.1392, 0.0892, 0.2841]) -Greedy action tensor([ 2.2160, -0.8527, -0.4898, 0.5116]) tensor([0.7721, 0.0359, 0.0516, 0.1404]) -Greedy action tensor([ 1.1770, -0.8144, -0.3234, 0.7454]) tensor([0.4978, 0.0679, 0.1110, 0.3233]) -Greedy action tensor([ 1.4608, -0.4780, -0.4042, 0.0732]) tensor([0.6458, 0.0929, 0.1000, 0.1612]) -Greedy action tensor([ 2.4572, -1.0352, -0.3268, 0.8112]) tensor([0.7782, 0.0237, 0.0481, 0.1501]) -Greedy action tensor([ 1.0288, -0.2347, -0.0888, 0.0947]) tensor([0.4993, 0.1411, 0.1633, 0.1962]) -Greedy action tensor([ 1.0782, -0.4276, -0.3386, 0.1054]) tensor([0.5428, 0.1204, 0.1316, 0.2052]) -Greedy action tensor([ 1.6457, -0.7612, -0.3975, 0.4065]) tensor([0.6626, 0.0597, 0.0859, 0.1919]) -Greedy action tensor([ 1.5064, -0.4478, -0.3455, 0.5971]) tensor([0.5878, 0.0833, 0.0922, 0.2367]) -Greedy action tensor([ 1.8006, -0.8467, -0.3324, 0.6035]) tensor([0.6705, 0.0475, 0.0794, 0.2025]) -Greedy action tensor([ 1.1502, 0.2805, -0.2289, -0.3060]) tensor([0.5252, 0.2201, 0.1323, 0.1224]) -Greedy action tensor([ 1.7847, -0.5737, -0.3261, 0.2281]) tensor([0.7010, 0.0663, 0.0849, 0.1478]) -Greedy action tensor([ 0.6858, -0.0604, -0.1029, 0.0043]) tensor([0.4108, 0.1948, 0.1867, 0.2078]) -Greedy action tensor([ 1.4497, -0.6945, -0.6562, -0.0496]) tensor([0.6839, 0.0801, 0.0833, 0.1527]) -Greedy action tensor([ 1.1391, -0.3771, 0.0685, 0.1890]) tensor([0.5131, 0.1126, 0.1759, 0.1984]) -Greedy action tensor([ 1.3254, -0.4914, -0.2179, 0.2931]) tensor([0.5772, 0.0938, 0.1233, 0.2056]) -Greedy action tensor([ 1.4151e+00, -5.2828e-01, -3.5808e-01, 3.5688e-05]) tensor([0.6427, 0.0920, 0.1091, 0.1561]) -Greedy action tensor([ 1.2743, -0.3914, -0.3781, -0.2271]) tensor([0.6236, 0.1179, 0.1195, 0.1390]) -Greedy action tensor([ 1.2674, -0.3990, -0.3348, 0.2067]) tensor([0.5758, 0.1088, 0.1160, 0.1994]) -Greedy action tensor([ 1.7219, -0.5275, -0.2184, 0.2333]) tensor([0.6781, 0.0715, 0.0974, 0.1530]) -Greedy action tensor([ 0.7219, -0.2064, -0.1731, 0.3234]) tensor([0.4040, 0.1597, 0.1651, 0.2712]) -Greedy action tensor([ 1.4559, -0.5779, -0.2316, 0.0632]) tensor([0.6393, 0.0836, 0.1183, 0.1588]) -Greedy action tensor([ 2.0863, -0.6873, -0.3050, 0.9603]) tensor([0.6765, 0.0422, 0.0619, 0.2194]) -Greedy action tensor([ 0.8692, -0.4774, -0.1287, 0.0099]) tensor([0.4873, 0.1268, 0.1796, 0.2063]) -Greedy action tensor([ 1.2713, -0.5098, -0.5453, 0.8090]) tensor([0.5100, 0.0859, 0.0829, 0.3212]) -Greedy action tensor([ 0.7516, -0.3107, -0.2843, 0.3929]) tensor([0.4168, 0.1441, 0.1479, 0.2912]) -Greedy action tensor([ 0.9541, -0.4129, -0.1964, 0.3486]) tensor([0.4723, 0.1204, 0.1495, 0.2578]) -Greedy action tensor([ 0.7749, -0.6431, 0.5746, 0.8269]) tensor([0.3211, 0.0778, 0.2628, 0.3383]) -Greedy action tensor([ 0.1463, 0.7053, 1.5553, -0.6457]) tensor([0.1371, 0.2398, 0.5610, 0.0621]) -Greedy action tensor([0.7095, 0.9023, 0.8214, 0.7972]) tensor([0.2261, 0.2742, 0.2529, 0.2468]) -Greedy action tensor([ 0.0502, 0.0040, -0.2676, 0.8499]) tensor([0.2038, 0.1946, 0.1483, 0.4534]) -Greedy action tensor([-0.3790, 0.2007, -0.3837, 0.8722]) tensor([0.1374, 0.2454, 0.1368, 0.4803]) -Greedy action tensor([-1.0621, 0.1444, 0.0689, -0.2794]) tensor([0.1039, 0.3471, 0.3219, 0.2272]) -Greedy action tensor([ 0.9695, -1.3618, -0.4893, 1.1285]) tensor([0.3997, 0.0388, 0.0929, 0.4686]) -Greedy action tensor([1.6973, 0.0722, 0.1198, 1.2395]) tensor([0.4912, 0.0967, 0.1014, 0.3107]) -Greedy action tensor([-0.2216, -1.2111, 0.4014, 0.6822]) tensor([0.1753, 0.0652, 0.3268, 0.4328]) -Greedy action tensor([-0.2228, -2.0385, 0.1338, 0.8736]) tensor([0.1791, 0.0291, 0.2558, 0.5360]) -Greedy action tensor([ 0.6263, 1.6407, 0.4069, -0.3861]) tensor([0.2031, 0.5601, 0.1631, 0.0738]) -Greedy action tensor([-1.3651, 0.0157, 0.0770, -0.9952]) tensor([0.0938, 0.3734, 0.3969, 0.1359]) -Greedy action tensor([1.3158, 0.0448, 0.4127, 0.2597]) tensor([0.4917, 0.1380, 0.1993, 0.1710]) -Greedy action tensor([-0.5598, -1.3192, 0.0081, 0.6702]) tensor([0.1503, 0.0703, 0.2652, 0.5142]) -Greedy action tensor([ 0.1661, -0.9805, 0.8613, 0.3766]) tensor([0.2195, 0.0697, 0.4399, 0.2709]) -Greedy action tensor([ 0.8864, -0.6657, 0.4119, 0.3797]) tensor([0.4104, 0.0869, 0.2554, 0.2473]) -Greedy action tensor([ 0.7624, -0.0793, 0.5373, -0.5246]) tensor([0.3991, 0.1720, 0.3187, 0.1102]) -Greedy action tensor([-1.0769, -0.2722, 0.6044, 1.4000]) tensor([0.0488, 0.1090, 0.2619, 0.5803]) -Greedy action tensor([-0.1015, -0.1620, 0.2366, 1.1615]) tensor([0.1454, 0.1368, 0.2038, 0.5140]) -Greedy action tensor([-0.3463, -1.2830, -0.0089, 0.1748]) tensor([0.2234, 0.0875, 0.3130, 0.3761]) -Greedy action tensor([-0.0915, -1.1163, 0.8037, -0.0473]) tensor([0.2061, 0.0740, 0.5045, 0.2154]) -Greedy action tensor([ 1.9756, -0.3244, 1.4185, 0.7655]) tensor([0.5073, 0.0509, 0.2906, 0.1513]) -Greedy action tensor([-0.0312, -0.3180, 0.6457, 1.1891]) tensor([0.1407, 0.1056, 0.2769, 0.4768]) -Greedy action tensor([ 0.7403, 0.4569, 0.5618, -0.0536]) tensor([0.3287, 0.2476, 0.2750, 0.1486]) -Greedy action tensor([ 0.4132, -2.3776, -0.4952, 1.1802]) tensor([0.2764, 0.0170, 0.1114, 0.5952]) -Greedy action tensor([ 0.4099, -1.0025, 0.3057, 0.8909]) tensor([0.2658, 0.0647, 0.2395, 0.4300]) -Greedy action tensor([ 0.6804, -1.1801, -0.2655, 1.5916]) tensor([0.2481, 0.0386, 0.0963, 0.6170]) -Greedy action tensor([-0.3470, 0.2267, -0.0060, -0.5227]) tensor([0.1992, 0.3536, 0.2801, 0.1671]) -Greedy action tensor([ 0.6138, -2.3142, 0.0244, 1.0373]) tensor([0.3189, 0.0171, 0.1769, 0.4871]) -Greedy action tensor([ 0.1695, -0.8755, 1.5452, -0.3132]) tensor([0.1687, 0.0593, 0.6678, 0.1041]) -Greedy action tensor([ 0.3976, -0.8141, -0.3117, 1.9060]) tensor([0.1585, 0.0472, 0.0780, 0.7163]) -Greedy action tensor([-1.1346, -1.1089, 0.1898, 0.6086]) tensor([0.0869, 0.0892, 0.3269, 0.4970]) -Greedy action tensor([ 1.2720, -1.0790, 1.6421, 1.1444]) tensor([0.2921, 0.0278, 0.4230, 0.2571]) -Greedy action tensor([ 0.4365, -0.7944, 0.0649, 1.0024]) tensor([0.2672, 0.0780, 0.1843, 0.4705]) -Greedy action tensor([-0.7616, -0.6014, -0.5349, -0.0842]) tensor([0.1853, 0.2175, 0.2324, 0.3648]) -Greedy action tensor([ 0.8586, 0.0393, 1.4843, -0.6496]) tensor([0.2832, 0.1248, 0.5294, 0.0627]) -Greedy action tensor([ 1.1095, -1.6248, 1.4731, 0.9970]) tensor([0.2944, 0.0191, 0.4235, 0.2630]) -Greedy action tensor([ 0.4853, -1.7034, -0.3784, 0.6639]) tensor([0.3664, 0.0411, 0.1545, 0.4381]) -Greedy action tensor([1.0058, 0.2043, 1.2330, 0.6411]) tensor([0.2943, 0.1320, 0.3693, 0.2044]) -Greedy action tensor([ 1.5601, -0.6560, 1.7243, 0.3465]) tensor([0.3869, 0.0422, 0.4559, 0.1150]) -Greedy action tensor([-0.6612, -2.1215, -0.3102, 0.8650]) tensor([0.1379, 0.0320, 0.1958, 0.6343]) -Greedy action tensor([ 0.4396, 0.9185, -0.1785, 0.4202]) tensor([0.2419, 0.3905, 0.1304, 0.2372]) -Greedy action tensor([-1.0824, -0.5839, 1.7879, -0.6076]) tensor([0.0457, 0.0752, 0.8057, 0.0734]) -Greedy action tensor([-0.8180, -0.9190, 1.7340, -0.6439]) tensor([0.0628, 0.0568, 0.8057, 0.0747]) -Greedy action tensor([-0.3416, -0.4737, 0.6733, 0.2485]) tensor([0.1553, 0.1361, 0.4285, 0.2802]) -Greedy action tensor([ 0.3450, -1.5460, -0.2548, 0.8531]) tensor([0.2974, 0.0449, 0.1633, 0.4944]) -Greedy action tensor([ 1.1390, -1.7652, 0.2944, 0.7530]) tensor([0.4621, 0.0253, 0.1986, 0.3141]) -Greedy action tensor([ 0.7457, 0.1660, 1.4001, -0.3387]) tensor([0.2616, 0.1465, 0.5034, 0.0885]) -Greedy action tensor([ 0.4514, -2.2585, -0.6416, 0.7248]) tensor([0.3682, 0.0245, 0.1234, 0.4839]) -Greedy action tensor([ 0.8600, -0.4322, 0.2190, 0.8754]) tensor([0.3550, 0.0975, 0.1870, 0.3605]) -Greedy action tensor([ 1.6286, -0.1637, 1.7357, 0.8537]) tensor([0.3649, 0.0608, 0.4062, 0.1681]) -Greedy action tensor([ 0.7282, -0.5959, -0.0521, 0.4121]) tensor([0.4076, 0.1084, 0.1868, 0.2971]) -Greedy action tensor([-0.9534, -0.0858, -0.4952, -0.4597]) tensor([0.1515, 0.3607, 0.2395, 0.2482]) -Greedy action tensor([ 0.2482, -0.3433, 0.0215, 0.5722]) tensor([0.2679, 0.1483, 0.2135, 0.3703]) -Greedy action tensor([-0.1454, -1.7441, -0.2515, 0.3785]) tensor([0.2638, 0.0533, 0.2373, 0.4455]) -Greedy action tensor([ 0.2099, -0.9161, 1.3795, -0.4460]) tensor([0.1975, 0.0640, 0.6360, 0.1025]) -Greedy action tensor([ 1.3856, -0.8972, -0.1881, 1.0142]) tensor([0.5002, 0.0510, 0.1037, 0.3450]) -Greedy action tensor([1.3319, 0.1692, 1.4489, 1.7426]) tensor([0.2535, 0.0793, 0.2850, 0.3823]) -Greedy action tensor([-0.6767, -0.5033, 0.4752, 0.4415]) tensor([0.1189, 0.1414, 0.3761, 0.3636]) -Greedy action tensor([ 0.7856, -2.2768, -0.2307, -0.1195]) tensor([0.5515, 0.0258, 0.1996, 0.2231]) -Greedy action tensor([-0.0593, -1.6292, -0.5785, 1.6047]) tensor([0.1412, 0.0294, 0.0840, 0.7454]) -Greedy action tensor([-1.1670, 0.3011, 0.6056, -1.4350]) tensor([0.0834, 0.3620, 0.4908, 0.0638]) -Greedy action tensor([-0.0432, 0.1759, 0.5060, 0.3765]) tensor([0.1819, 0.2264, 0.3150, 0.2767]) -Greedy action tensor([ 0.8205, 0.0102, -0.2434, -0.0657]) tensor([0.4541, 0.2020, 0.1567, 0.1872]) -Greedy action tensor([ 1.1922, -2.4968, -0.1299, 0.6935]) tensor([0.5266, 0.0132, 0.1404, 0.3198]) -Greedy action tensor([ 0.2071, -1.6172, -0.0540, 0.1953]) tensor([0.3425, 0.0553, 0.2638, 0.3385]) -Greedy action tensor([-0.3182, -0.7359, -1.1732, -0.0567]) tensor([0.2956, 0.1947, 0.1257, 0.3840]) -Greedy action tensor([ 0.4761, -0.2506, 1.1955, -0.4414]) tensor([0.2541, 0.1228, 0.5216, 0.1015]) -Greedy action tensor([-0.2877, -0.2967, 0.9770, 1.2407]) tensor([0.0986, 0.0977, 0.3492, 0.4545]) -Greedy action tensor([1.0410, 0.3498, 1.2531, 0.8566]) tensor([0.2802, 0.1404, 0.3464, 0.2330]) -Greedy action tensor([ 0.8579, 0.1432, 0.1757, -0.2000]) tensor([0.4270, 0.2089, 0.2158, 0.1482]) -Greedy action tensor([ 1.5706, -1.3748, 1.1259, 1.2994]) tensor([0.4072, 0.0214, 0.2610, 0.3104]) -Greedy action tensor([-0.8270, -0.0159, -0.5461, 0.5856]) tensor([0.1152, 0.2592, 0.1526, 0.4730]) -Greedy action tensor([ 0.4208, -0.1179, -0.0219, 1.3668]) tensor([0.2083, 0.1215, 0.1338, 0.5364]) -Greedy action tensor([0.7886, 0.0615, 0.5500, 0.6095]) tensor([0.3218, 0.1556, 0.2535, 0.2691]) -Greedy action tensor([0.1954, 0.0295, 0.2198, 1.0190]) tensor([0.1942, 0.1645, 0.1990, 0.4424]) -Greedy action tensor([ 0.8966, -1.1982, 0.2188, 1.2910]) tensor([0.3211, 0.0395, 0.1630, 0.4763]) -Greedy action tensor([-0.1262, 0.2275, 0.0958, 0.1363]) tensor([0.2011, 0.2864, 0.2511, 0.2614]) -Greedy action tensor([-1.1966, -0.5255, 0.5034, -0.0295]) tensor([0.0859, 0.1680, 0.4701, 0.2759]) -Greedy action tensor([1.0427, 1.2806, 1.2246, 0.2826]) tensor([0.2541, 0.3223, 0.3048, 0.1188]) -Greedy action tensor([-0.4390, -0.7148, 0.3213, 0.7957]) tensor([0.1363, 0.1035, 0.2916, 0.4686]) -Greedy action tensor([ 0.6713, 0.5779, -0.0789, -0.5507]) tensor([0.3734, 0.3402, 0.1764, 0.1100]) -Greedy action tensor([ 0.8674, -0.6072, -0.0728, -0.2267]) tensor([0.5117, 0.1171, 0.1998, 0.1713]) -Greedy action tensor([ 0.7489, -0.5048, -0.1826, -0.5343]) tensor([0.5111, 0.1459, 0.2014, 0.1417]) -Greedy action tensor([ 1.0844, -1.0999, -0.0248, -0.5750]) tensor([0.6125, 0.0689, 0.2020, 0.1165]) -Greedy action tensor([ 0.3183, 0.1868, -0.0536, -0.0654]) tensor([0.3079, 0.2700, 0.2123, 0.2098]) -Greedy action tensor([ 0.9367, -0.4040, -0.1783, -0.4442]) tensor([0.5432, 0.1421, 0.1781, 0.1365]) -Greedy action tensor([ 1.0534, -0.7215, -0.1173, -0.5462]) tensor([0.5947, 0.1008, 0.1844, 0.1201]) -Greedy action tensor([ 0.6701, -0.5561, -0.1093, -0.4228]) tensor([0.4791, 0.1406, 0.2197, 0.1606]) -Greedy action tensor([ 0.6966, -0.5942, -0.1585, -0.5846]) tensor([0.5056, 0.1391, 0.2150, 0.1404]) -Greedy action tensor([ 0.5352, -0.3354, -0.1564, -0.3942]) tensor([0.4321, 0.1809, 0.2164, 0.1706]) -Greedy action tensor([ 0.8866, -0.4396, 0.0195, -0.8605]) tensor([0.5377, 0.1427, 0.2259, 0.0937]) -Greedy action tensor([ 1.0254, -0.6336, -0.0290, -0.4326]) tensor([0.5645, 0.1074, 0.1967, 0.1314]) -Greedy action tensor([ 0.4664, -0.0351, 0.0967, -0.2284]) tensor([0.3577, 0.2166, 0.2471, 0.1785]) -Greedy action tensor([ 0.7247, -0.6229, -0.1428, -0.4477]) tensor([0.5026, 0.1306, 0.2111, 0.1556]) -Greedy action tensor([ 0.8624, -0.3136, 0.0682, -0.4027]) tensor([0.4896, 0.1510, 0.2213, 0.1382]) -Greedy action tensor([ 0.8352, -0.6208, 0.0371, -0.5792]) tensor([0.5191, 0.1210, 0.2337, 0.1262]) -Greedy action tensor([ 0.4632, -0.3490, -0.1303, -0.3516]) tensor([0.4100, 0.1820, 0.2265, 0.1815]) -Greedy action tensor([ 0.8106, -0.9432, 0.1335, -0.5142]) tensor([0.5136, 0.0889, 0.2610, 0.1365]) -Greedy action tensor([ 0.4106, 0.2286, -0.0320, -0.1519]) tensor([0.3283, 0.2737, 0.2109, 0.1871]) -Greedy action tensor([ 0.6589, -0.2176, -0.0200, -0.2038]) tensor([0.4264, 0.1775, 0.2162, 0.1799]) -Greedy action tensor([ 0.5371, -0.2774, -0.0282, -0.3622]) tensor([0.4136, 0.1832, 0.2350, 0.1683]) -Greedy action tensor([ 0.7390, -0.7313, -0.1305, -0.2682]) tensor([0.4965, 0.1141, 0.2081, 0.1813]) -Greedy action tensor([ 0.4791, -0.1578, -0.0133, -0.0655]) tensor([0.3676, 0.1944, 0.2247, 0.2132]) -Greedy action tensor([ 0.5921, -0.2536, -0.0913, -0.2579]) tensor([0.4234, 0.1818, 0.2138, 0.1810]) -Greedy action tensor([ 0.6877, -0.4662, 0.0188, -0.4155]) tensor([0.4631, 0.1461, 0.2372, 0.1537]) -Greedy action tensor([ 0.6020, -0.3859, -0.0935, -0.2104]) tensor([0.4320, 0.1609, 0.2155, 0.1917]) -Greedy action tensor([ 0.6692, -0.2422, 0.0582, -0.3161]) tensor([0.4314, 0.1734, 0.2342, 0.1610]) -Greedy action tensor([ 0.6498, -0.2494, -0.1010, -0.5194]) tensor([0.4567, 0.1858, 0.2156, 0.1419]) -Greedy action tensor([ 0.6599, -0.1936, 0.0963, -0.1206]) tensor([0.4076, 0.1736, 0.2320, 0.1868]) -Greedy action tensor([ 0.7342, -0.4923, 0.1135, -0.7693]) tensor([0.4870, 0.1429, 0.2618, 0.1083]) -Greedy action tensor([ 0.6308, -0.2613, 0.1471, -0.6135]) tensor([0.4321, 0.1771, 0.2664, 0.1245]) -Greedy action tensor([ 0.2117, 0.0629, -0.0577, -0.2599]) tensor([0.3077, 0.2652, 0.2351, 0.1920]) -Greedy action tensor([ 0.4177, -0.3332, -0.0650, -0.3172]) tensor([0.3893, 0.1837, 0.2403, 0.1867]) -Greedy action tensor([ 0.7478, -0.2559, -0.0108, -0.3863]) tensor([0.4637, 0.1700, 0.2172, 0.1492]) -Greedy action tensor([ 0.6637, -0.5172, -0.1735, -0.7406]) tensor([0.5037, 0.1546, 0.2180, 0.1237]) -Greedy action tensor([ 0.4141, -0.1529, 0.1829, -0.3961]) tensor([0.3564, 0.2022, 0.2829, 0.1585]) -Greedy action tensor([ 0.8896, -0.5838, 0.0305, -0.3966]) tensor([0.5184, 0.1188, 0.2196, 0.1433]) -Greedy action tensor([ 0.2208, -0.1198, -0.0614, -0.2468]) tensor([0.3234, 0.2301, 0.2439, 0.2026]) -Greedy action tensor([ 1.0447, -0.9641, -0.1054, -0.5751]) tensor([0.6065, 0.0814, 0.1920, 0.1201]) -Greedy action tensor([ 0.4355, 0.0024, -0.0721, -0.5208]) tensor([0.3795, 0.2461, 0.2285, 0.1459]) -Greedy action tensor([ 0.4694, -0.0875, -0.0449, -0.2795]) tensor([0.3782, 0.2167, 0.2262, 0.1789]) -Greedy action tensor([ 0.5351, 0.0580, 0.0443, -0.1654]) tensor([0.3664, 0.2274, 0.2243, 0.1819]) -Greedy action tensor([ 0.4571, -0.0824, -0.2248, -0.0488]) tensor([0.3715, 0.2166, 0.1879, 0.2240]) -Greedy action tensor([ 0.6435, -0.4554, 0.2856, -0.6032]) tensor([0.4311, 0.1436, 0.3014, 0.1239]) -Greedy action tensor([ 0.5705, -0.5393, -0.1769, -0.1445]) tensor([0.4362, 0.1438, 0.2066, 0.2134]) -Greedy action tensor([ 0.8844, -0.6229, -0.1213, -0.7178]) tensor([0.5591, 0.1238, 0.2045, 0.1126]) -Greedy action tensor([ 0.8121, -0.3740, -0.0060, -0.4491]) tensor([0.4926, 0.1505, 0.2174, 0.1396]) -Greedy action tensor([ 0.8338, -0.1482, -0.2222, -0.3820]) tensor([0.4953, 0.1855, 0.1723, 0.1468]) -Greedy action tensor([ 0.6435, -0.3405, -0.2162, -0.3040]) tensor([0.4577, 0.1711, 0.1937, 0.1775]) -Greedy action tensor([ 0.5015, -0.1673, 0.1288, -0.4236]) tensor([0.3849, 0.1972, 0.2652, 0.1526]) -Greedy action tensor([ 0.4010, 0.1577, -0.0446, -0.2175]) tensor([0.3375, 0.2646, 0.2161, 0.1818]) -Greedy action tensor([ 0.6233, -0.0249, -0.1183, -0.2067]) tensor([0.4106, 0.2147, 0.1956, 0.1790]) -Greedy action tensor([ 0.4781, -0.0822, 0.0358, -0.1184]) tensor([0.3618, 0.2066, 0.2324, 0.1992]) -Greedy action tensor([ 0.7735, -0.5531, -0.0800, -0.5421]) tensor([0.5103, 0.1354, 0.2173, 0.1369]) -Greedy action tensor([ 0.6745, -0.1715, -0.0597, -0.3512]) tensor([0.4410, 0.1892, 0.2116, 0.1581]) -Greedy action tensor([ 0.7571, -0.4009, -0.0205, -0.0941]) tensor([0.4544, 0.1427, 0.2088, 0.1940]) -Greedy action tensor([ 0.9914, -0.4188, 0.0100, -0.3843]) tensor([0.5343, 0.1304, 0.2003, 0.1350]) -Greedy action tensor([ 0.7097, -0.2821, 0.1951, -0.6187]) tensor([0.4477, 0.1661, 0.2676, 0.1186]) -Greedy action tensor([ 0.6740, -0.4551, 0.0489, -0.5426]) tensor([0.4641, 0.1501, 0.2484, 0.1375]) -Greedy action tensor([ 0.7144, -0.3825, 0.0225, -0.3408]) tensor([0.4582, 0.1530, 0.2294, 0.1595]) -Greedy action tensor([ 0.4585, -0.4364, 0.1755, -0.3102]) tensor([0.3808, 0.1556, 0.2870, 0.1766]) -Greedy action tensor([ 0.9781, -0.5397, -0.0498, -0.2778]) tensor([0.5371, 0.1177, 0.1922, 0.1530]) -Greedy action tensor([ 0.2674, 0.0481, 0.0105, -0.1584]) tensor([0.3096, 0.2487, 0.2395, 0.2023]) -Greedy action tensor([ 0.3266, -0.1775, -0.0734, -0.1735]) tensor([0.3471, 0.2097, 0.2327, 0.2105]) -Greedy action tensor([ 0.3103, -0.0587, 0.0035, -0.0340]) tensor([0.3189, 0.2205, 0.2346, 0.2260]) -Greedy action tensor([ 0.4487, -0.2122, -0.0439, -0.1750]) tensor([0.3755, 0.1939, 0.2294, 0.2012]) -Greedy action tensor([ 0.6233, -0.4293, 0.0727, -0.3493]) tensor([0.4341, 0.1515, 0.2503, 0.1641]) -Greedy action tensor([ 0.3745, 0.0029, -0.0156, -0.1722]) tensor([0.3395, 0.2341, 0.2298, 0.1965]) -Greedy action tensor([ 0.3597, 0.0091, 0.1367, -0.2560]) tensor([0.3284, 0.2313, 0.2628, 0.1774]) -Greedy action tensor([ 0.8491, -0.7578, -0.0504, -0.5446]) tensor([0.5389, 0.1081, 0.2192, 0.1338]) -Greedy action tensor([ 0.1363, 0.1864, -0.0962, -0.3116]) tensor([0.2871, 0.3019, 0.2276, 0.1835]) -Greedy action tensor([ 0.6197, -0.0858, -0.0070, -0.2928]) tensor([0.4116, 0.2033, 0.2199, 0.1653]) -Greedy action tensor([ 0.9935, -0.2889, 0.1126, -0.3126]) tensor([0.5095, 0.1413, 0.2111, 0.1380]) -Greedy action tensor([ 0.7858, -0.2016, -0.1503, -0.5143]) tensor([0.4909, 0.1829, 0.1925, 0.1338]) -Greedy action tensor([ 0.3937, 0.2555, -0.2185, -0.2628]) tensor([0.3411, 0.2971, 0.1849, 0.1769]) -Greedy action tensor([ 0.3958, -0.0554, -0.0679, -0.2546]) tensor([0.3587, 0.2285, 0.2256, 0.1872]) -Greedy action tensor([ 0.8934, -0.5464, -0.0804, -0.4438]) tensor([0.5327, 0.1262, 0.2012, 0.1399]) -Greedy action tensor([ 0.7091, -0.4357, -0.0474, -0.1509]) tensor([0.4523, 0.1440, 0.2123, 0.1914]) -Greedy action tensor([ 0.4913, -0.2473, 0.0118, -0.3265]) tensor([0.3940, 0.1882, 0.2439, 0.1739]) -Greedy action tensor([ 0.2956, 0.0252, -0.0784, -0.0621]) tensor([0.3174, 0.2422, 0.2184, 0.2220]) -Greedy action tensor([ 0.9324, -0.6216, 0.0597, -0.4768]) tensor([0.5337, 0.1128, 0.2230, 0.1304]) -Greedy action tensor([-0.0322, -0.0162, 0.0360, -0.0537]) tensor([0.2460, 0.2499, 0.2633, 0.2407]) -Greedy action tensor([-0.8007, -0.5001, 1.2123, 1.5118]) tensor([0.0502, 0.0677, 0.3755, 0.5066]) -Greedy action tensor([-1.1528, -0.5710, 0.2707, 0.3426]) tensor([0.0877, 0.1569, 0.3641, 0.3912]) -Greedy action tensor([-1.7245, -0.2249, 0.6038, -0.2439]) tensor([0.0497, 0.2225, 0.5096, 0.2183]) -Greedy action tensor([-0.0764, 0.6130, -0.6201, 0.3101]) tensor([0.1982, 0.3950, 0.1151, 0.2917]) -Greedy action tensor([-1.8523, -0.8062, 0.3142, -0.1459]) tensor([0.0553, 0.1574, 0.4826, 0.3047]) -Greedy action tensor([-1.4136, -0.4890, 1.2243, 1.1635]) tensor([0.0326, 0.0822, 0.4560, 0.4291]) -Greedy action tensor([-1.8899, -0.3606, 0.6279, -0.1388]) tensor([0.0421, 0.1941, 0.5216, 0.2423]) -Greedy action tensor([-1.8438, -0.2605, 0.5782, -0.1313]) tensor([0.0441, 0.2148, 0.4968, 0.2444]) -Greedy action tensor([-1.1009, -0.5510, 0.2094, 0.4356]) tensor([0.0902, 0.1563, 0.3343, 0.4192]) -Greedy action tensor([-1.4501, -0.6153, 0.8088, 0.6337]) tensor([0.0478, 0.1102, 0.4578, 0.3842]) -Greedy action tensor([-1.8014, -0.3838, 0.5840, -0.1209]) tensor([0.0468, 0.1932, 0.5086, 0.2513]) -Greedy action tensor([-1.3836, -0.4517, 0.3851, 0.0753]) tensor([0.0730, 0.1853, 0.4278, 0.3139]) -Greedy action tensor([-1.1560, -0.5771, 0.3844, 0.3746]) tensor([0.0828, 0.1478, 0.3866, 0.3828]) -Greedy action tensor([-1.0924, -0.2392, 0.2925, -0.1353]) tensor([0.1005, 0.2360, 0.4016, 0.2618]) -Greedy action tensor([-1.7634, -0.5533, 0.5969, -0.1196]) tensor([0.0497, 0.1667, 0.5265, 0.2572]) -Greedy action tensor([-0.2199, 0.4102, 0.2041, 0.4544]) tensor([0.1570, 0.2949, 0.2399, 0.3082]) -Greedy action tensor([-1.4492, -0.5893, 0.4096, 0.1175]) tensor([0.0686, 0.1622, 0.4404, 0.3288]) -Greedy action tensor([-1.7350, -0.4955, 0.5611, -0.0965]) tensor([0.0512, 0.1768, 0.5085, 0.2635]) -Greedy action tensor([-1.3208, -0.5665, 0.4970, 0.4158]) tensor([0.0668, 0.1421, 0.4116, 0.3795]) -Greedy action tensor([-1.8030, -0.4426, 0.6113, -0.0776]) tensor([0.0461, 0.1797, 0.5154, 0.2588]) -Greedy action tensor([-1.7821, 0.0181, 0.2340, -0.1854]) tensor([0.0513, 0.3104, 0.3851, 0.2532]) -Greedy action tensor([-1.5440, -0.2043, 0.6322, 0.1658]) tensor([0.0522, 0.1993, 0.4600, 0.2885]) -Greedy action tensor([-1.8773, -0.4487, 0.6401, -0.1597]) tensor([0.0432, 0.1803, 0.5357, 0.2408]) -Greedy action tensor([-0.8800, -0.5859, 0.1602, 0.3489]) tensor([0.1164, 0.1562, 0.3295, 0.3979]) -Greedy action tensor([-1.6062, -0.6094, 0.5606, 0.0464]) tensor([0.0566, 0.1534, 0.4944, 0.2956]) -Greedy action tensor([-1.1137, -0.5858, 0.2542, 0.2809]) tensor([0.0938, 0.1591, 0.3685, 0.3785]) -Greedy action tensor([-1.1385, -0.5656, 0.2470, 0.2923]) tensor([0.0913, 0.1619, 0.3649, 0.3819]) -Greedy action tensor([-1.7186, -0.5818, 0.9942, 0.5568]) tensor([0.0346, 0.1078, 0.5212, 0.3365]) -Greedy action tensor([-1.8976, -0.4749, 0.6864, -0.1161]) tensor([0.0411, 0.1705, 0.5444, 0.2440]) -Greedy action tensor([ 0.0889, -0.3667, 1.3286, 1.5745]) tensor([0.1052, 0.0667, 0.3634, 0.4647]) -Greedy action tensor([-1.4583, 0.0445, 0.4884, 0.2624]) tensor([0.0553, 0.2485, 0.3873, 0.3090]) -Greedy action tensor([-1.8150, -0.4676, 0.5990, -0.0902]) tensor([0.0462, 0.1778, 0.5166, 0.2593]) -Greedy action tensor([-1.0488, -0.5555, 0.5443, -0.0961]) tensor([0.0985, 0.1614, 0.4847, 0.2555]) -Greedy action tensor([-1.8610, -0.4271, 0.6217, -0.1302]) tensor([0.0438, 0.1839, 0.5249, 0.2474]) -Greedy action tensor([-0.8778, 0.2231, 0.2885, -0.3934]) tensor([0.1131, 0.3401, 0.3631, 0.1836]) -Greedy action tensor([-1.8237, -0.4118, 0.6084, -0.0800]) tensor([0.0450, 0.1848, 0.5126, 0.2575]) -Greedy action tensor([-1.3723, -0.5454, 1.2559, 1.1771]) tensor([0.0334, 0.0764, 0.4626, 0.4276]) -Greedy action tensor([-1.1367, -0.6414, 0.3116, 0.3982]) tensor([0.0867, 0.1422, 0.3689, 0.4022]) -Greedy action tensor([-1.3197, -0.5359, 0.3477, 0.2330]) tensor([0.0757, 0.1657, 0.4010, 0.3575]) -Greedy action tensor([-0.7879, -0.4893, 0.4052, -0.4039]) tensor([0.1406, 0.1895, 0.4635, 0.2064]) -Greedy action tensor([-1.6673, -0.4655, 0.5302, -0.0093]) tensor([0.0538, 0.1790, 0.4846, 0.2825]) -Greedy action tensor([-1.9765, -0.5087, 1.1821, 0.4911]) tensor([0.0246, 0.1067, 0.5787, 0.2900]) -Greedy action tensor([-0.8734, -0.2230, 1.0938, 1.3599]) tensor([0.0516, 0.0988, 0.3686, 0.4810]) -Greedy action tensor([-1.4815, 0.2339, 0.4693, 0.3206]) tensor([0.0509, 0.2828, 0.3579, 0.3084]) -Greedy action tensor([-1.3866, -0.4266, 0.7766, -0.6908]) tensor([0.0699, 0.1824, 0.6076, 0.1401]) -Greedy action tensor([-0.4036, 0.0398, 0.9851, 1.5713]) tensor([0.0726, 0.1131, 0.2911, 0.5232]) -Greedy action tensor([-0.8739, -0.3943, 1.1129, 1.4352]) tensor([0.0501, 0.0809, 0.3651, 0.5040]) -Greedy action tensor([-1.8901, -0.6175, 0.6839, -0.1320]) tensor([0.0426, 0.1520, 0.5585, 0.2470]) -Greedy action tensor([-1.8481, -0.3073, 0.6091, -0.0937]) tensor([0.0433, 0.2019, 0.5048, 0.2500]) -Greedy action tensor([-1.8001, -0.5725, 0.7814, -0.0117]) tensor([0.0424, 0.1446, 0.5598, 0.2533]) -Greedy action tensor([-1.7260, -0.5276, 0.5635, -0.0183]) tensor([0.0508, 0.1683, 0.5010, 0.2800]) -Greedy action tensor([-1.0185, -0.3006, 0.7540, 1.2127]) tensor([0.0548, 0.1124, 0.3226, 0.5103]) -Greedy action tensor([-1.5387, -0.3475, 0.4537, -0.0217]) tensor([0.0618, 0.2034, 0.4531, 0.2817]) -Greedy action tensor([-1.1456, -0.7020, 1.2607, 1.4484]) tensor([0.0370, 0.0576, 0.4103, 0.4950]) -Greedy action tensor([-0.1924, 0.1880, 1.0230, 1.6684]) tensor([0.0815, 0.1193, 0.2749, 0.5242]) -Greedy action tensor([-1.4446, -0.5818, 0.7865, 0.7433]) tensor([0.0463, 0.1097, 0.4311, 0.4129]) -Greedy action tensor([-1.7747, -0.4698, 0.6358, 0.0408]) tensor([0.0455, 0.1678, 0.5070, 0.2796]) -Greedy action tensor([-1.0036, -0.6195, 0.3003, 0.4662]) tensor([0.0952, 0.1398, 0.3508, 0.4141]) -Greedy action tensor([-1.8290, -0.4996, 0.6601, -0.0414]) tensor([0.0439, 0.1657, 0.5284, 0.2620]) -Greedy action tensor([-0.6615, -0.3136, 0.7764, 1.4353]) tensor([0.0677, 0.0959, 0.2852, 0.5512]) -Greedy action tensor([-1.9160, -0.4284, 0.6479, -0.1647]) tensor([0.0414, 0.1831, 0.5372, 0.2384]) -Greedy action tensor([-1.7795, -0.6485, 0.8802, 0.2878]) tensor([0.0380, 0.1178, 0.5435, 0.3006]) -Greedy action tensor([-1.2863, -0.4681, 0.5440, 0.6013]) tensor([0.0621, 0.1407, 0.3872, 0.4100]) -Greedy action tensor([-1.2133, -0.4877, 0.6621, 0.0755]) tensor([0.0757, 0.1563, 0.4935, 0.2745]) -Greedy action tensor([-1.5720, -0.5343, 0.4615, 0.0881]) tensor([0.0598, 0.1688, 0.4569, 0.3145]) -Greedy action tensor([-1.9261, -0.4032, 0.6487, -0.1696]) tensor([0.0408, 0.1871, 0.5357, 0.2363]) -Greedy action tensor([-1.4827, 0.1621, 0.6254, -0.5156]) tensor([0.0587, 0.3039, 0.4831, 0.1543]) -Greedy action tensor([-1.9051, -0.4026, 0.6425, -0.1539]) tensor([0.0416, 0.1870, 0.5317, 0.2398]) -Greedy action tensor([-0.7114, -0.5573, 0.2435, 0.4052]) tensor([0.1279, 0.1492, 0.3323, 0.3906]) -Greedy action tensor([-2.0266, -0.6052, 1.1348, 0.4285]) tensor([0.0248, 0.1026, 0.5843, 0.2883]) -Greedy action tensor([-1.8488, -0.4745, 0.6830, -0.0585]) tensor([0.0425, 0.1680, 0.5347, 0.2547]) -Greedy action tensor([-1.7440, -0.4474, 0.7670, 0.2172]) tensor([0.0415, 0.1518, 0.5115, 0.2952]) -Greedy action tensor([-1.0443, -0.4209, 0.5994, -0.5885]) tensor([0.1040, 0.1940, 0.5380, 0.1640]) -Greedy action tensor([-1.4174, -0.5487, 0.3982, 0.1469]) tensor([0.0699, 0.1666, 0.4295, 0.3340]) -Greedy action tensor([-1.6847, -0.4497, 0.5487, 0.0280]) tensor([0.0518, 0.1780, 0.4832, 0.2870]) -Greedy action tensor([-1.1999, -0.5903, 0.2792, 0.2739]) tensor([0.0862, 0.1587, 0.3786, 0.3765]) -Greedy action tensor([-1.4998, -0.6592, 0.6089, 0.4252]) tensor([0.0543, 0.1259, 0.4474, 0.3724]) -Greedy action tensor([-1.3092, -0.5701, 0.3435, 0.2169]) tensor([0.0774, 0.1621, 0.4043, 0.3562]) -Greedy action tensor([-1.8504, -0.5501, 1.2039, 0.5817]) tensor([0.0268, 0.0985, 0.5691, 0.3055]) -Greedy action tensor([-1.2402, -0.6209, 0.2860, 0.2440]) tensor([0.0843, 0.1565, 0.3876, 0.3716]) -Greedy action tensor([-1.9666, -0.4948, 0.8617, 0.1411]) tensor([0.0328, 0.1428, 0.5546, 0.2698]) -Greedy action tensor([ 1.0276, 0.0149, -0.3571, -0.1771]) tensor([0.5226, 0.1898, 0.1309, 0.1567]) -Greedy action tensor([ 1.1878, -0.4259, -0.1804, 0.3600]) tensor([0.5289, 0.1053, 0.1346, 0.2311]) -Greedy action tensor([ 1.9288, -0.3143, -0.5630, -0.1389]) tensor([0.7602, 0.0807, 0.0629, 0.0961]) -Greedy action tensor([ 1.1481, -0.4436, -0.3160, 0.4279]) tensor([0.5204, 0.1059, 0.1204, 0.2533]) -Greedy action tensor([ 0.6940, -0.6445, -0.1741, -0.2567]) tensor([0.4834, 0.1268, 0.2029, 0.1868]) -Greedy action tensor([ 1.2061, 0.0680, 0.1566, -0.2847]) tensor([0.5275, 0.1690, 0.1847, 0.1188]) -Greedy action tensor([ 0.7405, -0.5058, -0.4005, 0.6914]) tensor([0.3907, 0.1124, 0.1249, 0.3720]) -Greedy action tensor([ 1.1906, -0.7793, -0.2163, 0.4160]) tensor([0.5419, 0.0756, 0.1327, 0.2498]) -Greedy action tensor([ 2.2026, -1.0043, -0.2897, 0.6684]) tensor([0.7469, 0.0302, 0.0618, 0.1611]) -Greedy action tensor([ 1.4179, -0.4185, -1.0210, 0.3295]) tensor([0.6315, 0.1007, 0.0551, 0.2127]) -Greedy action tensor([ 1.4619, -0.5439, -0.2994, 0.1687]) tensor([0.6326, 0.0851, 0.1087, 0.1736]) -Greedy action tensor([ 2.2439, -1.0454, -0.3491, 0.6001]) tensor([0.7661, 0.0286, 0.0573, 0.1480]) -Greedy action tensor([ 1.7005, 0.1079, -0.6472, 0.3660]) tensor([0.6401, 0.1302, 0.0612, 0.1685]) -Greedy action tensor([ 1.6241, -0.6119, 0.1651, 0.0716]) tensor([0.6447, 0.0689, 0.1499, 0.1365]) -Greedy action tensor([ 0.8368, -0.2550, 0.0673, 0.2139]) tensor([0.4282, 0.1437, 0.1984, 0.2297]) -Greedy action tensor([ 1.0153, -0.4016, -0.4470, 0.5096]) tensor([0.4814, 0.1167, 0.1115, 0.2903]) -Greedy action tensor([ 1.6544, -0.8332, -0.4429, 0.2530]) tensor([0.6886, 0.0572, 0.0846, 0.1696]) -Greedy action tensor([ 1.4764, -0.5453, -0.3139, 0.1881]) tensor([0.6349, 0.0841, 0.1060, 0.1751]) -Greedy action tensor([ 1.1751, -0.2972, -0.1796, -0.0648]) tensor([0.5628, 0.1291, 0.1452, 0.1629]) -Greedy action tensor([ 0.7342, -0.5141, -0.1561, 0.2678]) tensor([0.4301, 0.1235, 0.1766, 0.2698]) -Greedy action tensor([ 0.5213, -0.0137, -0.1070, 0.0237]) tensor([0.3667, 0.2148, 0.1956, 0.2229]) -Greedy action tensor([ 1.1572, -0.3206, -0.3791, 0.1562]) tensor([0.5522, 0.1260, 0.1188, 0.2030]) -Greedy action tensor([ 1.2916, -0.6040, -0.2615, 0.3154]) tensor([0.5752, 0.0864, 0.1217, 0.2167]) -Greedy action tensor([ 1.3778, -0.3262, -0.4346, 0.2386]) tensor([0.6005, 0.1093, 0.0980, 0.1922]) -Greedy action tensor([ 1.0149, -0.2573, -0.2748, 0.3290]) tensor([0.4856, 0.1361, 0.1337, 0.2446]) -Greedy action tensor([ 0.9008, -0.4168, 0.0568, 0.0274]) tensor([0.4728, 0.1266, 0.2033, 0.1974]) -Greedy action tensor([ 1.0656, -0.1754, 0.0763, -0.1868]) tensor([0.5137, 0.1485, 0.1910, 0.1468]) -Greedy action tensor([ 1.2416, -0.2746, -0.2851, 0.1119]) tensor([0.5682, 0.1247, 0.1234, 0.1836]) -Greedy action tensor([ 0.7860, -0.0341, 0.0170, -0.5673]) tensor([0.4625, 0.2037, 0.2144, 0.1195]) -Greedy action tensor([ 0.9062, -0.2401, -0.6097, 0.4611]) tensor([0.4591, 0.1459, 0.1008, 0.2942]) -Greedy action tensor([ 0.3841, -0.3781, -0.0082, -0.0989]) tensor([0.3624, 0.1691, 0.2448, 0.2236]) -Greedy action tensor([ 0.7762, -0.3447, 0.0311, -0.0764]) tensor([0.4490, 0.1464, 0.2132, 0.1914]) -Greedy action tensor([ 0.8380, -0.5064, -0.2216, 0.3885]) tensor([0.4454, 0.1161, 0.1544, 0.2841]) -Greedy action tensor([ 0.5996, -0.5182, -0.1399, 0.1521]) tensor([0.4092, 0.1338, 0.1953, 0.2616]) -Greedy action tensor([ 0.5112, -0.4254, -0.2000, 0.2056]) tensor([0.3817, 0.1496, 0.1874, 0.2812]) -Greedy action tensor([ 0.8859, -0.0259, -0.2184, -0.1637]) tensor([0.4800, 0.1929, 0.1591, 0.1680]) -Greedy action tensor([ 2.3638, -0.9772, -0.5122, 0.6310]) tensor([0.7883, 0.0279, 0.0444, 0.1394]) -Greedy action tensor([ 0.8628, -0.5783, -0.1041, 0.2355]) tensor([0.4649, 0.1100, 0.1768, 0.2483]) -Greedy action tensor([ 2.1328, -0.2347, -0.6866, 1.0793]) tensor([0.6658, 0.0624, 0.0397, 0.2321]) -Greedy action tensor([ 1.0609, -0.1642, -0.2985, -0.2115]) tensor([0.5462, 0.1605, 0.1403, 0.1530]) -Greedy action tensor([-0.1014, -0.1940, -0.2230, 0.2627]) tensor([0.2360, 0.2152, 0.2090, 0.3397]) -Greedy action tensor([ 0.9682, -0.1134, 0.1853, -0.0852]) tensor([0.4662, 0.1581, 0.2131, 0.1626]) -Greedy action tensor([ 1.0410, -0.4294, -0.3845, 0.6868]) tensor([0.4604, 0.1058, 0.1107, 0.3231]) -Greedy action tensor([ 1.4212, -0.3152, -0.4724, 0.4164]) tensor([0.5907, 0.1041, 0.0889, 0.2163]) -Greedy action tensor([ 1.8038, -0.4115, -0.1264, 0.3073]) tensor([0.6765, 0.0738, 0.0982, 0.1515]) -Greedy action tensor([ 1.5606, -0.3574, -0.3201, 0.2166]) tensor([0.6410, 0.0942, 0.0977, 0.1672]) -Greedy action tensor([ 1.3688, -0.4554, -0.2472, 0.0867]) tensor([0.6107, 0.0985, 0.1213, 0.1694]) -Greedy action tensor([ 0.9696, -0.3082, 0.1938, -0.2264]) tensor([0.4899, 0.1365, 0.2255, 0.1481]) -Greedy action tensor([ 1.6759, -0.7918, -0.0484, 0.4845]) tensor([0.6382, 0.0541, 0.1138, 0.1939]) -Greedy action tensor([ 1.0611, -0.5220, -0.1698, 0.0735]) tensor([0.5348, 0.1098, 0.1562, 0.1992]) -Greedy action tensor([ 1.2871, -0.6294, -0.2341, 0.5218]) tensor([0.5462, 0.0804, 0.1193, 0.2541]) -Greedy action tensor([ 1.9357, 0.1069, -0.0709, 0.1738]) tensor([0.6818, 0.1095, 0.0917, 0.1171]) -Greedy action tensor([ 0.8916, -0.5995, -0.2038, 0.4523]) tensor([0.4537, 0.1021, 0.1517, 0.2924]) -Greedy action tensor([ 1.1067, 0.0375, -0.0980, 0.3685]) tensor([0.4715, 0.1619, 0.1413, 0.2254]) -Greedy action tensor([ 1.4389, -0.3612, -0.0459, 0.5576]) tensor([0.5537, 0.0915, 0.1254, 0.2294]) -Greedy action tensor([ 1.3579, -0.7783, -0.0751, 0.2627]) tensor([0.5913, 0.0698, 0.1411, 0.1978]) -Greedy action tensor([ 0.7950, -0.4793, 0.1762, -0.0523]) tensor([0.4451, 0.1245, 0.2397, 0.1907]) -Greedy action tensor([ 1.1918, -0.4365, -0.2238, 0.2754]) tensor([0.5438, 0.1067, 0.1320, 0.2175]) -Greedy action tensor([ 1.1561, -0.0023, -0.5243, -0.1625]) tensor([0.5657, 0.1776, 0.1054, 0.1513]) -Greedy action tensor([ 1.6661, -0.7388, -0.4187, 0.4553]) tensor([0.6611, 0.0597, 0.0822, 0.1970]) -Greedy action tensor([ 1.6546, -0.2603, -0.4352, 0.1080]) tensor([0.6738, 0.0993, 0.0834, 0.1435]) -Greedy action tensor([ 2.1441, -1.0348, -0.2140, 0.6662]) tensor([0.7329, 0.0305, 0.0693, 0.1672]) -Greedy action tensor([ 1.2081, -0.6444, -0.4332, 0.2555]) tensor([0.5759, 0.0903, 0.1116, 0.2222]) -Greedy action tensor([ 1.5702, -0.4670, -0.4495, 0.6793]) tensor([0.5976, 0.0779, 0.0793, 0.2452]) -Greedy action tensor([ 2.1192, -0.6880, -0.2170, 0.4542]) tensor([0.7428, 0.0448, 0.0718, 0.1405]) -Greedy action tensor([ 0.7956, -0.3372, -0.1254, 0.1745]) tensor([0.4429, 0.1427, 0.1764, 0.2380]) -Greedy action tensor([ 1.3335, -0.4655, -0.3804, 0.3150]) tensor([0.5859, 0.0969, 0.1056, 0.2116]) -Greedy action tensor([ 1.2718, -0.2181, -0.3381, 0.3710]) tensor([0.5460, 0.1231, 0.1092, 0.2218]) -Greedy action tensor([ 1.3684, -0.7517, -0.3168, 0.3886]) tensor([0.5950, 0.0714, 0.1103, 0.2233]) -Greedy action tensor([ 1.6863, 0.1377, -0.3278, 0.1641]) tensor([0.6393, 0.1359, 0.0853, 0.1395]) -Greedy action tensor([ 1.1011, -0.3114, -0.2390, -0.1058]) tensor([0.5542, 0.1350, 0.1451, 0.1658]) -Greedy action tensor([ 0.7336, -0.4351, -0.8549, 0.4417]) tensor([0.4421, 0.1374, 0.0903, 0.3302]) -Greedy action tensor([ 0.8950, -0.0889, -0.0836, -0.2644]) tensor([0.4846, 0.1812, 0.1822, 0.1520]) -Greedy action tensor([ 1.3917, -0.2584, -0.2953, 0.1535]) tensor([0.5999, 0.1152, 0.1110, 0.1739]) -Greedy action tensor([ 2.0029, -0.6538, -0.1812, 0.6884]) tensor([0.6890, 0.0484, 0.0776, 0.1851]) -Greedy action tensor([ 0.8833, -0.1598, -0.0728, -0.0786]) tensor([0.4720, 0.1663, 0.1814, 0.1804]) -Greedy action tensor([ 1.7065, -0.0979, 0.0065, 0.3602]) tensor([0.6221, 0.1024, 0.1136, 0.1619]) -Greedy action tensor([ 1.3895, -0.4047, -0.1608, 0.1997]) tensor([0.5943, 0.0988, 0.1261, 0.1808]) -Greedy action tensor([ 1.5036, -0.5573, -0.2896, 0.2070]) tensor([0.6381, 0.0813, 0.1062, 0.1745]) -Greedy action tensor([ 1.4916, -0.8404, -0.5109, 0.5900]) tensor([0.6105, 0.0593, 0.0824, 0.2478]) -Greedy action tensor([ 1.3705, -0.6155, -0.1018, 0.1092]) tensor([0.6061, 0.0832, 0.1390, 0.1717]) -Greedy action tensor([ 1.1731, -0.1046, -0.3005, 1.4461]) tensor([0.3544, 0.0988, 0.0812, 0.4656]) -Greedy action tensor([0.2022, 0.8207, 0.2390, 0.2167]) tensor([0.2037, 0.3782, 0.2114, 0.2067]) -Greedy action tensor([ 1.2720, -0.5304, 0.9603, 0.6966]) tensor([0.4066, 0.0670, 0.2977, 0.2287]) -Greedy action tensor([-0.0293, -1.5162, 0.0076, 0.4172]) tensor([0.2613, 0.0591, 0.2712, 0.4084]) -Greedy action tensor([ 0.7930, -0.8674, 0.8466, 0.7820]) tensor([0.3092, 0.0588, 0.3262, 0.3058]) -Greedy action tensor([-0.3510, -0.4387, 1.0377, -1.0291]) tensor([0.1554, 0.1424, 0.6233, 0.0789]) -Greedy action tensor([-0.2505, -0.1825, 1.0576, 0.4391]) tensor([0.1288, 0.1379, 0.4766, 0.2567]) -Greedy action tensor([ 0.7710, -1.7043, 0.0279, 0.7689]) tensor([0.3910, 0.0329, 0.1860, 0.3902]) -Greedy action tensor([ 0.6054, -1.8796, 0.6003, 1.0555]) tensor([0.2742, 0.0228, 0.2728, 0.4301]) -Greedy action tensor([-0.0098, -1.3133, -0.6093, 0.4662]) tensor([0.2915, 0.0792, 0.1601, 0.4692]) -Greedy action tensor([ 1.7628, -1.1752, 0.8510, 1.1322]) tensor([0.5033, 0.0267, 0.2022, 0.2679]) -Greedy action tensor([ 1.4700, 0.2365, -0.4383, 1.1076]) tensor([0.4683, 0.1364, 0.0695, 0.3259]) -Greedy action tensor([ 0.3770, -1.1722, -0.7393, 0.6764]) tensor([0.3461, 0.0735, 0.1134, 0.4670]) -Greedy action tensor([-0.3878, 0.2205, 0.0310, 0.9420]) tensor([0.1229, 0.2258, 0.1868, 0.4645]) -Greedy action tensor([1.2374, 0.7765, 0.2046, 0.5412]) tensor([0.4024, 0.2538, 0.1433, 0.2006]) -Greedy action tensor([ 1.0932, -1.0745, -0.7064, 1.4292]) tensor([0.3733, 0.0427, 0.0617, 0.5223]) -Greedy action tensor([0.6228, 0.3041, 1.1034, 0.8022]) tensor([0.2202, 0.1601, 0.3561, 0.2635]) -Greedy action tensor([-0.9225, 0.2393, -0.7121, -0.3607]) tensor([0.1392, 0.4449, 0.1718, 0.2441]) -Greedy action tensor([-0.5075, -1.6527, -0.6800, -0.2753]) tensor([0.2923, 0.0930, 0.2460, 0.3687]) -Greedy action tensor([ 0.6927, -0.1182, -0.3429, 0.3232]) tensor([0.4015, 0.1785, 0.1425, 0.2775]) -Greedy action tensor([ 0.6101, -0.4424, 0.5075, 0.5229]) tensor([0.3156, 0.1102, 0.2849, 0.2893]) -Greedy action tensor([ 1.9833, -0.4895, 1.1392, 1.4827]) tensor([0.4716, 0.0398, 0.2027, 0.2859]) -Greedy action tensor([ 0.6234, 0.3975, -0.1672, 1.3853]) tensor([0.2276, 0.1816, 0.1032, 0.4876]) -Greedy action tensor([ 1.0257, -0.8893, 1.9392, 0.8855]) tensor([0.2218, 0.0327, 0.5528, 0.1927]) -Greedy action tensor([0.9491, 0.1129, 1.3062, 0.1029]) tensor([0.3038, 0.1317, 0.4342, 0.1304]) -Greedy action tensor([ 0.7040, 0.9180, 1.1644, -0.2940]) tensor([0.2385, 0.2955, 0.3780, 0.0879]) -Greedy action tensor([-0.0920, -1.2800, 0.0906, 0.8073]) tensor([0.2015, 0.0614, 0.2418, 0.4953]) -Greedy action tensor([-0.0994, -1.5972, 0.4863, -0.0146]) tensor([0.2434, 0.0544, 0.4372, 0.2650]) -Greedy action tensor([ 1.0945, -1.0567, -0.2690, 1.5100]) tensor([0.3464, 0.0403, 0.0886, 0.5248]) -Greedy action tensor([ 0.3512, 0.3752, -0.1576, 0.9791]) tensor([0.2223, 0.2277, 0.1336, 0.4164]) -Greedy action tensor([-1.2116, -0.5016, -0.4829, -0.0387]) tensor([0.1199, 0.2439, 0.2486, 0.3876]) -Greedy action tensor([-0.4579, -1.4437, -0.9852, -0.0577]) tensor([0.2894, 0.1080, 0.1708, 0.4318]) -Greedy action tensor([ 0.0269, -1.6474, 0.0846, 0.9253]) tensor([0.2126, 0.0399, 0.2253, 0.5222]) -Greedy action tensor([ 1.0734, -0.2634, 0.1269, 0.1362]) tensor([0.4896, 0.1286, 0.1900, 0.1918]) -Greedy action tensor([ 1.0396, -1.2653, -0.4691, 0.4925]) tensor([0.5264, 0.0525, 0.1164, 0.3046]) -Greedy action tensor([ 0.5715, -1.4243, 0.8026, -0.1324]) tensor([0.3460, 0.0470, 0.4359, 0.1711]) -Greedy action tensor([ 0.6266, -0.2473, 1.5180, -0.0120]) tensor([0.2281, 0.0952, 0.5563, 0.1204]) -Greedy action tensor([ 0.5071, 0.1098, -0.1592, 0.4565]) tensor([0.3188, 0.2143, 0.1638, 0.3031]) -Greedy action tensor([ 1.2423, -0.9913, 0.2890, 0.2398]) tensor([0.5378, 0.0576, 0.2073, 0.1973]) -Greedy action tensor([ 1.0760, 0.3813, -0.3223, 1.2319]) tensor([0.3431, 0.1713, 0.0847, 0.4009]) -Greedy action tensor([ 0.1057, 0.9054, -0.1863, 0.6344]) tensor([0.1764, 0.3925, 0.1317, 0.2993]) -Greedy action tensor([ 0.7734, -0.6609, 2.3914, 0.0831]) tensor([0.1474, 0.0351, 0.7435, 0.0739]) -Greedy action tensor([ 1.6083, -0.3785, 0.4999, 1.7499]) tensor([0.3818, 0.0524, 0.1260, 0.4398]) -Greedy action tensor([-0.9287, -1.8529, 0.4752, -0.6864]) tensor([0.1483, 0.0589, 0.6038, 0.1890]) -Greedy action tensor([-0.6489, -0.7697, 0.0201, 0.2054]) tensor([0.1616, 0.1432, 0.3155, 0.3797]) -Greedy action tensor([ 1.1099, -0.0394, -0.0718, 1.0938]) tensor([0.3835, 0.1215, 0.1176, 0.3774]) -Greedy action tensor([-1.2253, -0.5873, 0.1171, -0.0217]) tensor([0.0995, 0.1883, 0.3808, 0.3315]) -Greedy action tensor([-0.1188, 0.8346, -0.3516, 0.1598]) tensor([0.1752, 0.4545, 0.1388, 0.2315]) -Greedy action tensor([1.0204, 0.0809, 0.3285, 0.4836]) tensor([0.4039, 0.1578, 0.2022, 0.2361]) -Greedy action tensor([1.3897, 0.4351, 0.5880, 0.6509]) tensor([0.4327, 0.1666, 0.1941, 0.2067]) -Greedy action tensor([ 0.1306, -1.1367, 1.4330, -0.1652]) tensor([0.1753, 0.0494, 0.6449, 0.1304]) -Greedy action tensor([2.3244, 0.0306, 0.9295, 1.9268]) tensor([0.4949, 0.0499, 0.1227, 0.3325]) -Greedy action tensor([ 0.7613, -1.4644, 0.4351, 0.6351]) tensor([0.3689, 0.0398, 0.2662, 0.3251]) -Greedy action tensor([-0.4529, -0.4241, -0.7141, 0.3064]) tensor([0.2026, 0.2085, 0.1560, 0.4329]) -Greedy action tensor([ 0.3830, -0.6524, 0.9221, -0.1081]) tensor([0.2716, 0.0964, 0.4657, 0.1662]) -Greedy action tensor([ 0.5483, -0.5929, -0.3811, -0.2510]) tensor([0.4621, 0.1476, 0.1825, 0.2078]) -Greedy action tensor([ 0.7702, 0.2382, 0.2394, -0.3429]) tensor([0.3993, 0.2346, 0.2349, 0.1312]) -Greedy action tensor([ 0.9507, 0.9299, -0.2107, 0.4204]) tensor([0.3471, 0.3400, 0.1087, 0.2043]) -Greedy action tensor([ 0.0222, -0.4433, 1.3156, -0.8325]) tensor([0.1755, 0.1102, 0.6397, 0.0747]) -Greedy action tensor([-0.2318, -0.9215, -0.3061, 0.8752]) tensor([0.1833, 0.0920, 0.1702, 0.5545]) -Greedy action tensor([ 1.2193, -1.3733, 0.2170, 1.1783]) tensor([0.4164, 0.0312, 0.1528, 0.3996]) -Greedy action tensor([ 1.2549, -1.6021, 0.8450, 0.2709]) tensor([0.4773, 0.0274, 0.3168, 0.1784]) -Greedy action tensor([ 0.6915, -1.4472, 0.4455, 0.6005]) tensor([0.3555, 0.0419, 0.2780, 0.3246]) -Greedy action tensor([ 0.4885, -1.2311, 1.8841, 1.4103]) tensor([0.1294, 0.0232, 0.5223, 0.3252]) -Greedy action tensor([0.6536, 1.0332, 0.1834, 0.5670]) tensor([0.2498, 0.3651, 0.1561, 0.2291]) -Greedy action tensor([ 0.4588, -1.2884, 0.6184, 0.7489]) tensor([0.2714, 0.0473, 0.3184, 0.3628]) -Greedy action tensor([-0.0418, 0.8080, 0.7644, 0.0124]) tensor([0.1507, 0.3526, 0.3376, 0.1591]) -Greedy action tensor([ 0.8694, -0.7097, 1.2923, 0.0171]) tensor([0.3165, 0.0653, 0.4832, 0.1350]) -Greedy action tensor([ 0.0897, -2.6723, -0.1571, 0.7844]) tensor([0.2599, 0.0164, 0.2031, 0.5206]) -Greedy action tensor([ 0.1998, -1.1742, 1.4528, -0.5070]) tensor([0.1906, 0.0482, 0.6672, 0.0940]) -Greedy action tensor([ 0.8836, -0.1375, 0.3113, 1.7031]) tensor([0.2384, 0.0859, 0.1345, 0.5411]) -Greedy action tensor([-0.8873, -0.2575, 0.3725, 0.3820]) tensor([0.1004, 0.1885, 0.3539, 0.3573]) -Greedy action tensor([-0.7702, -2.0467, 1.6779, -0.5968]) tensor([0.0713, 0.0199, 0.8241, 0.0847]) -Greedy action tensor([-0.3809, -1.9162, 0.4300, 1.4486]) tensor([0.1031, 0.0222, 0.2321, 0.6426]) -Greedy action tensor([ 1.2810, -0.4211, -0.2236, 1.6575]) tensor([0.3495, 0.0637, 0.0776, 0.5092]) -Greedy action tensor([-0.1091, 0.0129, 0.8854, 0.3150]) tensor([0.1572, 0.1776, 0.4250, 0.2402]) -Greedy action tensor([ 0.1308, -1.6214, 0.0238, 0.7396]) tensor([0.2557, 0.0443, 0.2298, 0.4701]) -Greedy action tensor([-0.5885, 0.4495, -0.5025, 0.2389]) tensor([0.1389, 0.3921, 0.1514, 0.3176]) -Greedy action tensor([ 0.3791, -0.8491, 0.3610, -0.3867]) tensor([0.3650, 0.1069, 0.3584, 0.1697]) -Greedy action tensor([ 0.3932, -0.1793, -0.6861, 1.5447]) tensor([0.1974, 0.1113, 0.0671, 0.6242]) -Greedy action tensor([-0.6100, -0.6622, -0.1495, -0.0512]) tensor([0.1893, 0.1797, 0.3000, 0.3310]) -Greedy action tensor([ 0.0567, 0.3931, 0.2201, -0.0694]) tensor([0.2243, 0.3140, 0.2641, 0.1977]) -Greedy action tensor([ 0.8418, -0.6950, -0.1930, -0.6496]) tensor([0.5570, 0.1198, 0.1979, 0.1253]) -Greedy action tensor([ 0.4577, -0.0650, 0.0254, -0.1532]) tensor([0.3591, 0.2129, 0.2331, 0.1949]) -Greedy action tensor([ 0.7526, -0.7077, -0.0123, -0.2618]) tensor([0.4854, 0.1127, 0.2259, 0.1760]) -Greedy action tensor([ 0.4634, -0.3951, 0.0211, -0.0943]) tensor([0.3789, 0.1606, 0.2435, 0.2170]) -Greedy action tensor([ 0.5244, -0.5597, -0.1388, -0.5416]) tensor([0.4550, 0.1539, 0.2344, 0.1567]) -Greedy action tensor([ 0.6676, -0.1320, -0.0996, -0.6650]) tensor([0.4592, 0.2064, 0.2132, 0.1211]) -Greedy action tensor([ 0.5770, -0.1924, -0.0322, -0.5266]) tensor([0.4276, 0.1981, 0.2325, 0.1418]) -Greedy action tensor([ 0.6416, -0.1967, -0.1971, -0.0764]) tensor([0.4251, 0.1838, 0.1838, 0.2073]) -Greedy action tensor([ 1.0559, -0.1444, -0.0718, -0.4502]) tensor([0.5415, 0.1631, 0.1753, 0.1201]) -Greedy action tensor([ 0.5909, -0.3744, -0.0660, -0.2929]) tensor([0.4324, 0.1647, 0.2242, 0.1787]) -Greedy action tensor([ 0.7141, -0.6017, -0.0994, -0.3766]) tensor([0.4884, 0.1310, 0.2165, 0.1641]) -Greedy action tensor([ 0.6947, -0.4251, 0.1800, -0.3800]) tensor([0.4414, 0.1440, 0.2638, 0.1507]) -Greedy action tensor([ 0.6460, -0.5125, 0.0157, -0.4759]) tensor([0.4604, 0.1445, 0.2451, 0.1499]) -Greedy action tensor([ 0.3473, -0.1898, -0.1148, -0.1411]) tensor([0.3536, 0.2067, 0.2228, 0.2170]) -Greedy action tensor([ 0.7129, -0.1477, 0.0629, -0.2982]) tensor([0.4331, 0.1832, 0.2261, 0.1576]) -Greedy action tensor([ 0.6492, -0.1462, -0.0597, -0.1749]) tensor([0.4198, 0.1895, 0.2066, 0.1841]) -Greedy action tensor([ 0.2989, 0.1314, -0.1963, -0.1214]) tensor([0.3213, 0.2718, 0.1958, 0.2111]) -Greedy action tensor([ 0.6084, -0.3065, -0.0391, -0.2650]) tensor([0.4271, 0.1711, 0.2235, 0.1783]) -Greedy action tensor([ 0.7994, -0.5054, -0.1600, -0.4141]) tensor([0.5124, 0.1390, 0.1963, 0.1523]) -Greedy action tensor([ 0.6259, -0.3558, 0.0669, -0.3347]) tensor([0.4293, 0.1609, 0.2455, 0.1643]) -Greedy action tensor([ 0.2938, 0.0495, -0.0175, -0.1627]) tensor([0.3175, 0.2487, 0.2326, 0.2012]) -Greedy action tensor([ 0.5389, -0.5798, -0.2063, -0.2614]) tensor([0.4443, 0.1452, 0.2109, 0.1996]) -Greedy action tensor([ 0.6006, -0.1236, -0.2608, -0.4896]) tensor([0.4458, 0.2161, 0.1884, 0.1498]) -Greedy action tensor([ 0.3147, -0.1606, -0.0789, -0.1304]) tensor([0.3405, 0.2117, 0.2297, 0.2182]) -Greedy action tensor([ 0.7436, -0.3645, -0.1307, -0.3669]) tensor([0.4815, 0.1590, 0.2009, 0.1586]) -Greedy action tensor([ 0.8019, -0.6269, 0.1772, -0.9025]) tensor([0.5110, 0.1224, 0.2736, 0.0929]) -Greedy action tensor([ 0.6097, -0.4066, -0.0910, -0.4116]) tensor([0.4508, 0.1632, 0.2237, 0.1623]) -Greedy action tensor([ 1.0907, -0.3985, 0.0023, -0.5941]) tensor([0.5721, 0.1291, 0.1927, 0.1061]) -Greedy action tensor([ 0.3344, 0.0319, -0.0251, -0.3240]) tensor([0.3384, 0.2501, 0.2363, 0.1752]) -Greedy action tensor([ 0.2928, 0.0289, -0.1087, -0.2108]) tensor([0.3288, 0.2525, 0.2201, 0.1987]) -Greedy action tensor([ 0.6200, -0.0313, -0.0955, -0.5989]) tensor([0.4337, 0.2261, 0.2120, 0.1282]) -Greedy action tensor([ 0.4598, 0.1110, -0.1435, -0.0580]) tensor([0.3511, 0.2477, 0.1920, 0.2092]) -Greedy action tensor([ 0.9963, -0.5877, -0.0798, -0.4243]) tensor([0.5594, 0.1148, 0.1907, 0.1351]) -Greedy action tensor([ 0.8543, -0.7036, -0.0541, -0.3767]) tensor([0.5247, 0.1105, 0.2116, 0.1532]) -Greedy action tensor([ 1.2188, -0.8747, -0.1707, -0.8371]) tensor([0.6665, 0.0821, 0.1661, 0.0853]) -Greedy action tensor([ 0.7020, -0.2238, -0.0380, -0.4354]) tensor([0.4558, 0.1806, 0.2175, 0.1461]) -Greedy action tensor([ 0.4424, -0.2400, -0.0858, -0.4593]) tensor([0.3999, 0.2021, 0.2358, 0.1623]) -Greedy action tensor([ 7.5158e-01, -8.7312e-01, 2.6798e-04, -2.5545e-01]) tensor([0.4916, 0.0968, 0.2319, 0.1796]) -Greedy action tensor([ 0.7350, -0.5828, 0.0550, -0.5760]) tensor([0.4893, 0.1310, 0.2479, 0.1319]) -Greedy action tensor([ 0.3825, -0.3924, -0.0849, -0.2107]) tensor([0.3788, 0.1745, 0.2374, 0.2093]) -Greedy action tensor([ 0.8080, -0.5359, -0.1103, -0.5154]) tensor([0.5191, 0.1354, 0.2073, 0.1382]) -Greedy action tensor([ 0.3246, -0.2005, 0.0185, -0.3565]) tensor([0.3529, 0.2087, 0.2598, 0.1786]) -Greedy action tensor([ 0.8503, -0.3903, -0.0898, -0.2732]) tensor([0.4988, 0.1442, 0.1948, 0.1622]) -Greedy action tensor([ 0.6433, -0.3443, 0.0169, -0.1542]) tensor([0.4242, 0.1580, 0.2267, 0.1911]) -Greedy action tensor([ 0.4759, -0.0996, -0.0070, -0.3687]) tensor([0.3833, 0.2156, 0.2365, 0.1647]) -Greedy action tensor([ 0.6721, -0.6705, -0.0890, -0.1629]) tensor([0.4625, 0.1208, 0.2161, 0.2007]) -Greedy action tensor([ 0.9479, -0.1322, 0.1083, -0.2632]) tensor([0.4833, 0.1641, 0.2087, 0.1439]) -Greedy action tensor([ 0.6157, -0.5098, 0.2767, -0.6866]) tensor([0.4331, 0.1405, 0.3086, 0.1178]) -Greedy action tensor([ 0.2981, 0.1063, -0.0537, -0.3330]) tensor([0.3267, 0.2697, 0.2298, 0.1738]) -Greedy action tensor([ 0.8133, -0.6899, -0.1149, -0.3952]) tensor([0.5218, 0.1161, 0.2063, 0.1558]) -Greedy action tensor([ 0.6528, -0.6214, -0.0712, -0.1951]) tensor([0.4560, 0.1275, 0.2211, 0.1953]) -Greedy action tensor([ 0.2863, -0.0150, -0.1299, -0.2821]) tensor([0.3372, 0.2495, 0.2224, 0.1910]) -Greedy action tensor([ 0.4788, -0.1625, -0.0480, -0.1520]) tensor([0.3775, 0.1988, 0.2229, 0.2009]) -Greedy action tensor([ 0.9288, -0.9893, 0.1014, -0.6672]) tensor([0.5597, 0.0822, 0.2447, 0.1135]) -Greedy action tensor([ 0.5165, -0.3329, -0.1216, -0.5440]) tensor([0.4343, 0.1858, 0.2295, 0.1504]) -Greedy action tensor([ 0.5859, -0.3519, 0.0423, -0.3414]) tensor([0.4223, 0.1653, 0.2452, 0.1671]) -Greedy action tensor([ 0.6038, -0.5150, 0.0159, -0.2167]) tensor([0.4306, 0.1407, 0.2392, 0.1895]) -Greedy action tensor([ 0.8600, 0.1762, -0.1291, -0.1937]) tensor([0.4494, 0.2268, 0.1671, 0.1567]) -Greedy action tensor([ 0.5314, -0.3348, -0.0857, -0.2105]) tensor([0.4105, 0.1726, 0.2214, 0.1955]) -Greedy action tensor([ 0.4955, -0.1714, -0.1023, -0.2725]) tensor([0.3957, 0.2031, 0.2176, 0.1836]) -Greedy action tensor([ 0.5483, -0.2894, -0.0484, -0.2609]) tensor([0.4118, 0.1782, 0.2267, 0.1833]) -Greedy action tensor([ 0.4042, -0.1420, -0.0384, -0.1446]) tensor([0.3572, 0.2069, 0.2295, 0.2064]) -Greedy action tensor([ 0.8866, -0.6808, 0.1920, -0.4402]) tensor([0.5068, 0.1057, 0.2530, 0.1345]) -Greedy action tensor([ 0.8229, -0.4771, -0.0439, -0.3216]) tensor([0.4972, 0.1355, 0.2090, 0.1583]) -Greedy action tensor([ 0.5333, -0.3237, -0.0198, -0.3700]) tensor([0.4158, 0.1765, 0.2392, 0.1685]) -Greedy action tensor([ 0.8159, -0.4038, -0.0208, -0.4287]) tensor([0.4959, 0.1464, 0.2148, 0.1428]) -Greedy action tensor([ 1.0585, -0.7576, -0.0275, -0.3835]) tensor([0.5758, 0.0937, 0.1944, 0.1362]) -Greedy action tensor([ 0.7342, -0.4145, -0.0685, -0.4607]) tensor([0.4836, 0.1533, 0.2167, 0.1464]) -Greedy action tensor([ 0.7530, -0.5814, -0.0863, -0.3878]) tensor([0.4963, 0.1307, 0.2144, 0.1586]) -Greedy action tensor([ 0.3645, 0.0505, -0.0632, -0.0144]) tensor([0.3260, 0.2382, 0.2126, 0.2232]) -Greedy action tensor([ 0.6983, -0.2508, 0.0297, -0.3805]) tensor([0.4465, 0.1728, 0.2288, 0.1518]) -Greedy action tensor([ 0.9239, -0.5753, 0.1750, -0.5451]) tensor([0.5191, 0.1159, 0.2455, 0.1195]) -Greedy action tensor([ 0.7880, -0.5512, 0.0655, -0.4277]) tensor([0.4892, 0.1282, 0.2375, 0.1450]) -Greedy action tensor([ 0.5721, -0.4595, 0.1048, -0.4636]) tensor([0.4277, 0.1524, 0.2680, 0.1518]) -Greedy action tensor([ 0.8195, -0.6589, -0.1528, -0.3741]) tensor([0.5238, 0.1194, 0.1981, 0.1588]) -Greedy action tensor([ 0.4968, -0.4418, -0.1588, -0.7812]) tensor([0.4569, 0.1787, 0.2372, 0.1273]) -Greedy action tensor([ 0.4715, -0.3110, -0.0524, -0.1851]) tensor([0.3894, 0.1781, 0.2306, 0.2019]) -Greedy action tensor([ 0.4252, -0.0995, 0.0151, -0.2380]) tensor([0.3609, 0.2136, 0.2395, 0.1860]) -Greedy action tensor([ 0.3024, 0.0387, -0.0295, -0.1818]) tensor([0.3224, 0.2477, 0.2313, 0.1986]) -Greedy action tensor([ 0.2273, 0.1833, -0.0213, -0.3435]) tensor([0.3028, 0.2898, 0.2362, 0.1711]) -Greedy action tensor([ 0.6359, -0.0833, -0.0167, -0.2056]) tensor([0.4100, 0.1997, 0.2135, 0.1767]) -Greedy action tensor([ 0.9801, -0.2914, -0.2344, 0.2970]) tensor([0.4802, 0.1347, 0.1426, 0.2425]) -Greedy action tensor([ 1.1434, -0.3750, -0.0311, 0.1117]) tensor([0.5307, 0.1163, 0.1640, 0.1891]) -Greedy action tensor([ 1.8539, -0.5776, -0.3794, 0.3271]) tensor([0.7081, 0.0622, 0.0759, 0.1538]) -Greedy action tensor([ 1.3601, -0.5282, -0.4654, 0.7630]) tensor([0.5368, 0.0812, 0.0865, 0.2955]) -Greedy action tensor([ 0.8638, -0.2578, -0.3091, 0.4665]) tensor([0.4334, 0.1412, 0.1341, 0.2913]) -Greedy action tensor([ 0.7455, -0.1977, -0.2729, 0.0322]) tensor([0.4463, 0.1738, 0.1612, 0.2187]) -Greedy action tensor([ 1.7907, -0.5758, -0.3468, 0.4409]) tensor([0.6798, 0.0638, 0.0802, 0.1763]) -Greedy action tensor([ 1.4254, -0.5552, -0.3950, 0.1881]) tensor([0.6289, 0.0868, 0.1019, 0.1825]) -Greedy action tensor([ 1.2986, -0.0707, 0.0183, -0.0782]) tensor([0.5604, 0.1425, 0.1557, 0.1414]) -Greedy action tensor([ 1.1510, -0.6594, -0.1844, 0.4290]) tensor([0.5229, 0.0855, 0.1375, 0.2540]) -Greedy action tensor([ 0.8743, -0.4998, -0.0431, 0.0501]) tensor([0.4782, 0.1210, 0.1911, 0.2097]) -Greedy action tensor([ 1.3644, -0.4420, -0.6010, -0.2414]) tensor([0.6644, 0.1091, 0.0931, 0.1334]) -Greedy action tensor([ 1.5686, -0.4852, -0.2347, 0.0599]) tensor([0.6604, 0.0847, 0.1088, 0.1461]) -Greedy action tensor([ 1.0370, -0.0940, -0.1861, 0.2527]) tensor([0.4823, 0.1556, 0.1419, 0.2201]) -Greedy action tensor([ 1.1882, -0.7010, -0.2644, 0.0954]) tensor([0.5813, 0.0879, 0.1360, 0.1949]) -Greedy action tensor([ 1.0580, -0.3049, -0.3002, 0.1506]) tensor([0.5218, 0.1335, 0.1342, 0.2106]) -Greedy action tensor([ 0.5475, -0.0636, 0.0554, -0.1038]) tensor([0.3738, 0.2029, 0.2285, 0.1949]) -Greedy action tensor([ 1.4377, -0.4984, -0.4431, 0.3485]) tensor([0.6123, 0.0883, 0.0934, 0.2060]) -Greedy action tensor([ 0.4687, 0.1451, -0.1843, 0.0672]) tensor([0.3433, 0.2483, 0.1787, 0.2297]) -Greedy action tensor([ 1.1834, -0.6124, -0.1296, 0.3936]) tensor([0.5294, 0.0879, 0.1424, 0.2403]) -Greedy action tensor([ 1.5117, -0.4649, -0.0224, 0.2365]) tensor([0.6122, 0.0848, 0.1320, 0.1710]) -Greedy action tensor([ 0.8474, -0.4763, 0.0271, -0.1981]) tensor([0.4859, 0.1293, 0.2139, 0.1708]) -Greedy action tensor([ 2.0717, -0.2436, 0.0439, 0.2249]) tensor([0.7204, 0.0711, 0.0948, 0.1136]) -Greedy action tensor([ 1.2120, -0.1176, 0.0786, 0.2563]) tensor([0.5073, 0.1342, 0.1633, 0.1951]) -Greedy action tensor([ 0.6943, -0.0256, -0.0015, -0.1387]) tensor([0.4132, 0.2011, 0.2060, 0.1796]) -Greedy action tensor([ 1.0980, -0.6908, -0.5120, 0.7661]) tensor([0.4797, 0.0802, 0.0959, 0.3442]) -Greedy action tensor([ 0.9412, -0.2384, -0.3930, 0.4542]) tensor([0.4576, 0.1407, 0.1205, 0.2812]) -Greedy action tensor([ 1.4626, -0.3348, -0.6802, -0.1460]) tensor([0.6742, 0.1117, 0.0791, 0.1350]) -Greedy action tensor([ 1.2879, -0.4315, -0.2781, 0.3609]) tensor([0.5606, 0.1005, 0.1171, 0.2219]) -Greedy action tensor([ 1.0748, -0.4569, -0.1413, 0.2863]) tensor([0.5084, 0.1099, 0.1507, 0.2311]) -Greedy action tensor([ 1.5011, -0.3186, -0.4080, 0.1320]) tensor([0.6391, 0.1036, 0.0947, 0.1626]) -Greedy action tensor([ 1.7955, -0.5078, -0.1971, 0.3375]) tensor([0.6807, 0.0680, 0.0928, 0.1584]) -Greedy action tensor([ 1.4789, -0.9207, -0.0747, 0.4232]) tensor([0.6060, 0.0550, 0.1282, 0.2109]) -Greedy action tensor([ 1.4794, -0.4538, -0.4199, 0.4627]) tensor([0.6038, 0.0874, 0.0904, 0.2185]) -Greedy action tensor([ 1.1829, -0.5012, -0.2848, 0.4457]) tensor([0.5278, 0.0980, 0.1216, 0.2526]) -Greedy action tensor([ 1.3971, -0.3996, -0.6146, 0.2012]) tensor([0.6242, 0.1035, 0.0835, 0.1888]) -Greedy action tensor([ 1.2419, -0.5943, -0.0093, -0.0262]) tensor([0.5791, 0.0923, 0.1657, 0.1629]) -Greedy action tensor([ 1.4909, -0.4247, 0.0090, 0.0734]) tensor([0.6185, 0.0911, 0.1405, 0.1499]) -Greedy action tensor([ 1.2976, -0.5198, -0.0270, 0.2039]) tensor([0.5671, 0.0921, 0.1508, 0.1900]) -Greedy action tensor([ 0.6074, -0.5061, -0.2054, 0.2987]) tensor([0.3990, 0.1310, 0.1770, 0.2930]) -Greedy action tensor([ 0.8745, -0.3331, -0.3532, 0.4304]) tensor([0.4478, 0.1338, 0.1312, 0.2872]) -Greedy action tensor([ 0.5272, -0.4707, -0.4937, 0.4342]) tensor([0.3788, 0.1396, 0.1365, 0.3451]) -Greedy action tensor([ 1.7595, -0.8287, -0.2577, 0.4166]) tensor([0.6806, 0.0511, 0.0905, 0.1777]) -Greedy action tensor([ 1.1445, 0.2967, -0.4184, -0.0211]) tensor([0.5129, 0.2197, 0.1075, 0.1599]) -Greedy action tensor([ 0.9270, -0.2829, -0.1281, -0.2017]) tensor([0.5076, 0.1514, 0.1767, 0.1642]) -Greedy action tensor([ 0.4976, -0.3076, 0.0919, 0.0379]) tensor([0.3643, 0.1628, 0.2428, 0.2301]) -Greedy action tensor([ 1.0834, -0.4063, -0.0192, -0.1513]) tensor([0.5410, 0.1220, 0.1796, 0.1574]) -Greedy action tensor([ 1.2052, -0.1781, -0.2286, -0.0941]) tensor([0.5676, 0.1423, 0.1353, 0.1548]) -Greedy action tensor([ 1.3167, -0.2101, -0.5381, -0.2040]) tensor([0.6280, 0.1364, 0.0983, 0.1373]) -Greedy action tensor([ 0.7193, -0.1841, -0.1858, 0.0662]) tensor([0.4292, 0.1739, 0.1736, 0.2233]) -Greedy action tensor([ 0.8633, -0.2667, -0.1049, 0.2147]) tensor([0.4493, 0.1451, 0.1706, 0.2349]) -Greedy action tensor([ 1.5538, 0.1207, -0.3165, 0.2587]) tensor([0.6001, 0.1432, 0.0925, 0.1643]) -Greedy action tensor([ 0.9173, -0.3295, -0.2281, -0.0080]) tensor([0.4995, 0.1436, 0.1589, 0.1980]) -Greedy action tensor([ 1.4536, -0.4476, -0.4732, 0.5675]) tensor([0.5857, 0.0875, 0.0853, 0.2415]) -Greedy action tensor([ 1.5194e+00, -5.2595e-04, -1.9927e-01, -2.8244e-02]) tensor([0.6208, 0.1358, 0.1113, 0.1321]) -Greedy action tensor([ 1.9779, -0.7684, -0.1376, 0.3674]) tensor([0.7223, 0.0463, 0.0871, 0.1443]) -Greedy action tensor([ 1.1905, -0.7648, -0.2584, 0.4161]) tensor([0.5443, 0.0770, 0.1278, 0.2509]) -Greedy action tensor([ 1.0764, -0.3810, -0.4922, 0.5528]) tensor([0.4917, 0.1145, 0.1025, 0.2913]) -Greedy action tensor([ 1.5509, -0.2913, -0.6945, 0.5747]) tensor([0.6093, 0.0966, 0.0645, 0.2296]) -Greedy action tensor([ 1.2491, -0.2959, -0.2019, 0.0164]) tensor([0.5750, 0.1227, 0.1347, 0.1676]) -Greedy action tensor([ 1.7871, -0.6139, -0.4373, 0.2717]) tensor([0.7050, 0.0639, 0.0762, 0.1549]) -Greedy action tensor([ 0.7088, -0.1475, -0.2248, 0.1759]) tensor([0.4158, 0.1766, 0.1635, 0.2441]) -Greedy action tensor([ 1.5802, -0.8350, -0.2934, 0.5341]) tensor([0.6273, 0.0560, 0.0963, 0.2203]) -Greedy action tensor([ 1.7993, -0.6691, -0.1684, -0.0896]) tensor([0.7269, 0.0616, 0.1016, 0.1099]) -Greedy action tensor([ 1.2356, -0.4485, -0.1367, 0.3011]) tensor([0.5459, 0.1013, 0.1384, 0.2144]) -Greedy action tensor([ 1.3209, -0.3834, -0.5388, 0.2433]) tensor([0.5959, 0.1084, 0.0928, 0.2029]) -Greedy action tensor([ 1.0422, 0.2528, -0.0255, 0.0430]) tensor([0.4617, 0.2097, 0.1587, 0.1700]) -Greedy action tensor([ 1.2897, -0.4336, 0.0540, 0.3880]) tensor([0.5333, 0.0952, 0.1550, 0.2165]) -Greedy action tensor([ 1.6105, -0.8683, -0.1409, 0.1995]) tensor([0.6661, 0.0558, 0.1156, 0.1625]) -Greedy action tensor([ 1.3364, 0.1203, -0.1587, 0.3893]) tensor([0.5240, 0.1553, 0.1175, 0.2032]) -Greedy action tensor([ 0.6455, 0.1837, 0.2789, -0.0754]) tensor([0.3559, 0.2243, 0.2467, 0.1731]) -Greedy action tensor([ 1.5076, -0.5723, -0.3495, 0.3157]) tensor([0.6310, 0.0788, 0.0985, 0.1916]) -Greedy action tensor([ 1.7821, -0.6868, -0.2255, 0.1684]) tensor([0.7051, 0.0597, 0.0947, 0.1404]) -Greedy action tensor([ 1.7077, -1.0307, -0.0515, 0.3668]) tensor([0.6673, 0.0432, 0.1149, 0.1746]) -Greedy action tensor([ 3.6695e-01, -3.1814e-04, -1.6376e-01, -8.3222e-02]) tensor([0.3427, 0.2373, 0.2015, 0.2185]) -Greedy action tensor([ 1.1619, -0.0065, -0.4349, 0.0141]) tensor([0.5462, 0.1698, 0.1106, 0.1733]) -Greedy action tensor([ 1.6849, -0.6737, -0.2251, 0.4613]) tensor([0.6507, 0.0615, 0.0964, 0.1914]) -Greedy action tensor([ 0.9053, -0.0630, -0.0722, -0.3643]) tensor([0.4909, 0.1864, 0.1847, 0.1379]) -Greedy action tensor([ 1.1653, -0.5886, -0.1561, 0.3242]) tensor([0.5345, 0.0925, 0.1426, 0.2305]) -Greedy action tensor([ 1.5244, -0.2868, -0.3350, 0.3158]) tensor([0.6181, 0.1010, 0.0963, 0.1846]) -Greedy action tensor([ 0.8867, -0.2881, -0.1020, 0.2983]) tensor([0.4472, 0.1381, 0.1664, 0.2483]) -Greedy action tensor([-1.0530, -0.2510, 0.1622, 0.3776]) tensor([0.0927, 0.2068, 0.3126, 0.3878]) -Greedy action tensor([-1.2772, -0.4367, 0.7314, 0.8075]) tensor([0.0532, 0.1232, 0.3962, 0.4275]) -Greedy action tensor([-1.5742, -0.5768, 0.4890, 0.0635]) tensor([0.0598, 0.1621, 0.4706, 0.3075]) -Greedy action tensor([-1.4353, -0.5342, 0.4017, 0.2311]) tensor([0.0665, 0.1638, 0.4176, 0.3521]) -Greedy action tensor([-1.8161, -0.4851, 0.6023, -0.1313]) tensor([0.0467, 0.1768, 0.5246, 0.2519]) -Greedy action tensor([-0.7638, -0.1801, 0.1595, 1.0686]) tensor([0.0865, 0.1551, 0.2178, 0.5406]) -Greedy action tensor([-1.6964, -0.5252, 0.5413, -0.0699]) tensor([0.0535, 0.1727, 0.5016, 0.2722]) -Greedy action tensor([-1.6814, -0.4679, 0.7058, 0.2229]) tensor([0.0455, 0.1532, 0.4955, 0.3057]) -Greedy action tensor([-1.5215, -0.5537, 0.4303, 0.0904]) tensor([0.0638, 0.1678, 0.4489, 0.3196]) -Greedy action tensor([-1.3575, -0.3854, 0.4956, 0.3447]) tensor([0.0645, 0.1705, 0.4113, 0.3537]) -Greedy action tensor([-1.8764, -0.4834, 0.7427, -0.0053]) tensor([0.0396, 0.1595, 0.5436, 0.2573]) -Greedy action tensor([0.0794, 0.7581, 0.8665, 1.6127]) tensor([0.1020, 0.2011, 0.2241, 0.4727]) -Greedy action tensor([-1.7359, -0.4723, 0.5609, -0.0663]) tensor([0.0505, 0.1788, 0.5024, 0.2683]) -Greedy action tensor([-1.5558, -0.5875, 0.4695, -0.0489]) tensor([0.0636, 0.1675, 0.4820, 0.2870]) -Greedy action tensor([-0.8217, -0.4764, 1.1329, 1.4995]) tensor([0.0509, 0.0718, 0.3591, 0.5182]) -Greedy action tensor([0.0725, 0.5250, 0.5736, 1.4542]) tensor([0.1219, 0.1916, 0.2012, 0.4853]) -Greedy action tensor([-1.8341, -0.3428, 0.6196, -0.0468]) tensor([0.0434, 0.1928, 0.5047, 0.2592]) -Greedy action tensor([-1.2957, -0.5414, 0.3660, 0.1271]) tensor([0.0797, 0.1695, 0.4200, 0.3308]) -Greedy action tensor([-1.4532, -0.5546, 0.4472, 0.3071]) tensor([0.0627, 0.1539, 0.4191, 0.3643]) -Greedy action tensor([-1.9540, -0.9101, 0.6486, 0.1876]) tensor([0.0387, 0.1099, 0.5222, 0.3293]) -Greedy action tensor([-1.4098, -0.6793, 0.2988, -0.0289]) tensor([0.0795, 0.1651, 0.4390, 0.3163]) -Greedy action tensor([-1.0398, -0.6319, 0.2905, 0.4346]) tensor([0.0939, 0.1411, 0.3550, 0.4100]) -Greedy action tensor([-0.2873, -0.1946, 0.1667, 0.1807]) tensor([0.1898, 0.2082, 0.2989, 0.3031]) -Greedy action tensor([-1.8019, -0.4122, 0.6250, -0.0349]) tensor([0.0451, 0.1809, 0.5103, 0.2638]) -Greedy action tensor([-1.6781, -0.5348, 1.3986, 1.0767]) tensor([0.0241, 0.0755, 0.5220, 0.3784]) -Greedy action tensor([-1.7784, -0.3914, 0.5243, -0.0962]) tensor([0.0491, 0.1964, 0.4907, 0.2638]) -Greedy action tensor([-0.2026, 0.0037, 1.1731, 1.6841]) tensor([0.0782, 0.0961, 0.3096, 0.5161]) -Greedy action tensor([-1.3748, -0.5658, 0.3521, 0.1605]) tensor([0.0740, 0.1662, 0.4162, 0.3436]) -Greedy action tensor([-1.3784, -0.1333, 0.8936, 0.9326]) tensor([0.0412, 0.1432, 0.3998, 0.4157]) -Greedy action tensor([-1.5675, -0.5531, 0.4690, 0.0958]) tensor([0.0599, 0.1651, 0.4590, 0.3160]) -Greedy action tensor([-1.9281, -0.4565, 0.6649, -0.1636]) tensor([0.0407, 0.1773, 0.5443, 0.2377]) -Greedy action tensor([-1.0095, -0.5763, 0.2014, 0.3974]) tensor([0.1002, 0.1545, 0.3363, 0.4091]) -Greedy action tensor([-1.8954, -0.4050, 0.6414, -0.1413]) tensor([0.0419, 0.1861, 0.5298, 0.2422]) -Greedy action tensor([-1.2279, -0.4192, 0.7338, 0.9935]) tensor([0.0511, 0.1147, 0.3633, 0.4710]) -Greedy action tensor([-1.1125, 0.5800, 0.4771, 0.7624]) tensor([0.0560, 0.3043, 0.2745, 0.3652]) -Greedy action tensor([-1.8328, -0.4958, 0.6110, -0.1189]) tensor([0.0457, 0.1741, 0.5265, 0.2537]) -Greedy action tensor([-1.7360, -0.1376, 0.5207, -0.0308]) tensor([0.0476, 0.2355, 0.4549, 0.2620]) -Greedy action tensor([-1.1795, -0.6095, 0.4081, -0.0961]) tensor([0.0942, 0.1666, 0.4609, 0.2783]) -Greedy action tensor([-1.8163, -0.3963, 0.6473, -0.0083]) tensor([0.0435, 0.1800, 0.5111, 0.2653]) -Greedy action tensor([-0.2816, 0.0749, 0.2127, 0.3678]) tensor([0.1672, 0.2388, 0.2741, 0.3200]) -Greedy action tensor([-0.8628, -0.5618, 0.2714, 0.0536]) tensor([0.1256, 0.1697, 0.3905, 0.3141]) -Greedy action tensor([-1.8704, -0.4636, 0.6436, -0.1200]) tensor([0.0431, 0.1760, 0.5326, 0.2482]) -Greedy action tensor([-1.9228, -0.4427, 0.6534, -0.1670]) tensor([0.0411, 0.1806, 0.5404, 0.2379]) -Greedy action tensor([-0.9906, 0.2835, 0.4368, 1.1332]) tensor([0.0585, 0.2090, 0.2436, 0.4889]) -Greedy action tensor([-1.8493, -0.3593, 0.6341, -0.0990]) tensor([0.0431, 0.1915, 0.5170, 0.2484]) -Greedy action tensor([-1.4905, -0.5940, 1.3912, 1.2135]) tensor([0.0276, 0.0676, 0.4925, 0.4123]) -Greedy action tensor([-0.8475, -0.5304, 0.1272, -0.1178]) tensor([0.1409, 0.1935, 0.3734, 0.2923]) -Greedy action tensor([-0.9572, -0.5689, 0.2161, 0.1867]) tensor([0.1130, 0.1667, 0.3654, 0.3548]) -Greedy action tensor([-1.8418, -0.4481, 0.6859, -0.0752]) tensor([0.0427, 0.1722, 0.5351, 0.2500]) -Greedy action tensor([-1.9001, -0.4569, 0.6443, -0.1514]) tensor([0.0422, 0.1785, 0.5370, 0.2423]) -Greedy action tensor([-1.8771, -0.3341, 0.6201, -0.1349]) tensor([0.0425, 0.1988, 0.5162, 0.2426]) -Greedy action tensor([-1.4375, -0.0859, 0.6469, -0.6416]) tensor([0.0661, 0.2555, 0.5317, 0.1466]) -Greedy action tensor([-1.8643, -0.7640, 0.5225, 0.0695]) tensor([0.0459, 0.1379, 0.4990, 0.3173]) -Greedy action tensor([-1.0665, -0.7137, 0.7102, 1.1955]) tensor([0.0558, 0.0793, 0.3295, 0.5354]) -Greedy action tensor([-1.9081, -0.3965, 0.6448, -0.1791]) tensor([0.0416, 0.1888, 0.5349, 0.2347]) -Greedy action tensor([-1.6866, -0.6291, 0.9625, 0.6880]) tensor([0.0348, 0.1001, 0.4916, 0.3736]) -Greedy action tensor([-1.7773, -0.4614, 0.5848, -0.1164]) tensor([0.0485, 0.1809, 0.5151, 0.2555]) -Greedy action tensor([-1.8507, -0.4346, 0.6304, -0.1082]) tensor([0.0439, 0.1808, 0.5246, 0.2507]) -Greedy action tensor([-0.9910, -0.4076, 0.3985, -0.3887]) tensor([0.1159, 0.2076, 0.4649, 0.2116]) -Greedy action tensor([-1.7343, -0.4943, 0.7077, 0.1175]) tensor([0.0448, 0.1548, 0.5150, 0.2854]) -Greedy action tensor([-1.8919, -0.4287, 0.6390, -0.1492]) tensor([0.0424, 0.1831, 0.5325, 0.2421]) -Greedy action tensor([-1.6400, -0.5356, 0.5710, 0.1626]) tensor([0.0521, 0.1571, 0.4751, 0.3158]) -Greedy action tensor([-1.2799, -0.4925, 0.3573, 0.2840]) tensor([0.0762, 0.1676, 0.3920, 0.3642]) -Greedy action tensor([-1.1498, -0.6254, 0.9009, 1.2400]) tensor([0.0468, 0.0790, 0.3637, 0.5105]) -Greedy action tensor([-1.0302, -0.6409, 0.2838, 0.1760]) tensor([0.1048, 0.1547, 0.3901, 0.3503]) -Greedy action tensor([-0.8106, -0.5817, 0.2328, 0.1232]) tensor([0.1309, 0.1646, 0.3716, 0.3330]) -Greedy action tensor([-1.1976, -0.4131, 0.6601, -0.6479]) tensor([0.0882, 0.1934, 0.5655, 0.1529]) -Greedy action tensor([-1.1328, -0.7018, 1.2823, 1.4545]) tensor([0.0370, 0.0569, 0.4141, 0.4919]) -Greedy action tensor([-0.8635, -0.5221, 0.4432, 0.9800]) tensor([0.0805, 0.1133, 0.2974, 0.5088]) -Greedy action tensor([-1.5939, 0.3155, 0.3646, 0.0519]) tensor([0.0499, 0.3371, 0.3540, 0.2590]) -Greedy action tensor([-1.8412, -0.4908, 0.7016, -0.0432]) tensor([0.0424, 0.1634, 0.5385, 0.2557]) -Greedy action tensor([-1.2854, -0.0860, 0.6425, -0.7088]) tensor([0.0771, 0.2558, 0.5300, 0.1372]) -Greedy action tensor([-1.8290, -0.4221, 0.6132, -0.0898]) tensor([0.0449, 0.1833, 0.5162, 0.2556]) -Greedy action tensor([-1.7414, -0.4116, 0.5567, -0.0643]) tensor([0.0498, 0.1882, 0.4956, 0.2663]) -Greedy action tensor([-1.7361, -0.5067, 0.5693, -0.0295]) tensor([0.0501, 0.1713, 0.5025, 0.2761]) -Greedy action tensor([-1.5104, -0.2776, 0.6477, 0.3016]) tensor([0.0521, 0.1786, 0.4506, 0.3188]) -Greedy action tensor([-1.9369, -0.4582, 0.6680, -0.1724]) tensor([0.0404, 0.1772, 0.5465, 0.2359]) -Greedy action tensor([-0.8779, -0.6147, 0.1636, 0.3430]) tensor([0.1173, 0.1526, 0.3324, 0.3977]) -Greedy action tensor([-1.3643, -0.5197, 0.3986, 0.1670]) tensor([0.0726, 0.1689, 0.4230, 0.3356]) -Greedy action tensor([-1.2181, -0.6223, 0.3257, 0.4262]) tensor([0.0789, 0.1432, 0.3694, 0.4085]) -Greedy action tensor([-0.7321, -0.2697, 1.2072, 1.5001]) tensor([0.0530, 0.0842, 0.3687, 0.4941]) -Greedy action tensor([-0.5232, -0.1230, 1.0280, 1.4865]) tensor([0.0682, 0.1017, 0.3215, 0.5086]) -Greedy action tensor([0.6106, 0.6077, 1.3808, 0.9002]) tensor([0.1820, 0.1815, 0.3933, 0.2432]) -Greedy action tensor([ 0.1767, -0.0346, -1.1268, -0.1680]) tensor([0.3585, 0.2902, 0.0974, 0.2540]) -Greedy action tensor([ 0.1607, 0.7274, 1.5943, -0.2029]) tensor([0.1307, 0.2303, 0.5481, 0.0909]) -Greedy action tensor([-0.1005, -0.1653, 0.0659, 0.9133]) tensor([0.1702, 0.1596, 0.2011, 0.4692]) -Greedy action tensor([-0.6938, -2.1579, 0.5443, 0.0619]) tensor([0.1469, 0.0340, 0.5065, 0.3127]) -Greedy action tensor([-0.2302, -1.5108, 1.1589, 0.7494]) tensor([0.1258, 0.0349, 0.5044, 0.3349]) -Greedy action tensor([ 1.5746, -0.5522, 0.4739, 1.0120]) tensor([0.4947, 0.0590, 0.1645, 0.2818]) -Greedy action tensor([0.8583, 0.1870, 0.3349, 1.0142]) tensor([0.3056, 0.1562, 0.1811, 0.3572]) -Greedy action tensor([1.3823, 0.0415, 0.3159, 2.1263]) tensor([0.2695, 0.0705, 0.0928, 0.5672]) -Greedy action tensor([-0.0225, -1.0917, 1.2136, -0.1296]) tensor([0.1759, 0.0604, 0.6056, 0.1581]) -Greedy action tensor([-0.0597, -2.0592, -0.0695, -0.4513]) tensor([0.3569, 0.0483, 0.3535, 0.2413]) -Greedy action tensor([ 0.8262, -0.6493, 0.1765, -0.6786]) tensor([0.5069, 0.1159, 0.2647, 0.1126]) -Greedy action tensor([ 1.2995, -0.3643, -0.0074, 1.1259]) tensor([0.4346, 0.0823, 0.1176, 0.3654]) -Greedy action tensor([-0.4345, -0.8811, -0.3960, 0.8890]) tensor([0.1554, 0.0994, 0.1615, 0.5837]) -Greedy action tensor([ 0.4837, -0.5011, 0.1207, -0.2015]) tensor([0.3886, 0.1452, 0.2703, 0.1959]) -Greedy action tensor([ 0.3114, -1.7854, -0.0950, -0.0317]) tensor([0.4002, 0.0492, 0.2666, 0.2840]) -Greedy action tensor([ 0.0530, -0.4560, 0.0819, 0.4731]) tensor([0.2408, 0.1448, 0.2479, 0.3665]) -Greedy action tensor([-0.7975, -2.0880, -0.1183, 0.6441]) tensor([0.1338, 0.0368, 0.2639, 0.5655]) -Greedy action tensor([ 0.2185, -0.5508, -0.2091, 1.7545]) tensor([0.1479, 0.0685, 0.0964, 0.6871]) -Greedy action tensor([-0.7929, -1.6975, -0.1097, -0.2425]) tensor([0.1954, 0.0791, 0.3869, 0.3387]) -Greedy action tensor([ 0.7840, -0.6341, 0.8985, 0.9705]) tensor([0.2802, 0.0679, 0.3142, 0.3377]) -Greedy action tensor([-0.0048, -1.3815, 1.5755, -0.5152]) tensor([0.1490, 0.0376, 0.7239, 0.0895]) -Greedy action tensor([-1.2561, -0.4704, 1.0475, 0.2896]) tensor([0.0559, 0.1226, 0.5594, 0.2622]) -Greedy action tensor([1.1451, 0.7843, 1.2592, 0.3389]) tensor([0.3063, 0.2135, 0.3433, 0.1368]) -Greedy action tensor([-0.9296, 0.3390, 0.0948, -1.2055]) tensor([0.1234, 0.4390, 0.3439, 0.0937]) -Greedy action tensor([-0.7918, -0.1875, -0.2323, 0.2651]) tensor([0.1341, 0.2454, 0.2346, 0.3859]) -Greedy action tensor([ 0.0246, -1.1119, 1.8667, 1.4446]) tensor([0.0850, 0.0273, 0.5362, 0.3516]) -Greedy action tensor([0.4088, 0.2451, 0.6155, 1.3116]) tensor([0.1803, 0.1531, 0.2217, 0.4448]) -Greedy action tensor([0.0717, 0.2470, 0.5115, 0.2908]) tensor([0.2004, 0.2389, 0.3112, 0.2495]) -Greedy action tensor([0.4007, 1.3153, 0.6157, 0.0342]) tensor([0.1842, 0.4597, 0.2284, 0.1277]) -Greedy action tensor([ 0.7678, -0.0214, -0.8830, 0.3593]) tensor([0.4328, 0.1966, 0.0830, 0.2876]) -Greedy action tensor([0.2441, 0.2383, 0.3679, 1.2622]) tensor([0.1697, 0.1687, 0.1920, 0.4696]) -Greedy action tensor([ 1.4383, -0.7824, 1.5374, 0.8619]) tensor([0.3604, 0.0391, 0.3979, 0.2025]) -Greedy action tensor([ 0.3630, 0.1806, -0.1091, 0.7577]) tensor([0.2538, 0.2114, 0.1583, 0.3766]) -Greedy action tensor([-0.3242, -0.3599, -0.0219, 2.3752]) tensor([0.0550, 0.0531, 0.0744, 0.8176]) -Greedy action tensor([ 1.8452, -2.1172, 0.2508, 1.1073]) tensor([0.5882, 0.0112, 0.1194, 0.2812]) -Greedy action tensor([-0.4481, -0.4337, -0.0617, -1.2565]) tensor([0.2543, 0.2580, 0.3743, 0.1133]) -Greedy action tensor([-0.6997, -2.2686, 0.1731, 0.4306]) tensor([0.1493, 0.0311, 0.3573, 0.4623]) -Greedy action tensor([ 0.6950, -0.4893, 0.8576, 0.7268]) tensor([0.2845, 0.0870, 0.3347, 0.2937]) -Greedy action tensor([ 0.3084, 0.8793, 0.4139, -0.2085]) tensor([0.2233, 0.3953, 0.2482, 0.1332]) -Greedy action tensor([-0.8711, 0.0717, -0.2012, -0.3517]) tensor([0.1389, 0.3564, 0.2713, 0.2334]) -Greedy action tensor([ 0.5470, -0.2555, 0.1886, 1.6755]) tensor([0.1909, 0.0856, 0.1334, 0.5901]) -Greedy action tensor([1.1115, 0.3940, 0.0578, 0.7370]) tensor([0.3962, 0.1933, 0.1381, 0.2724]) -Greedy action tensor([ 0.2337, 0.5543, 0.0838, -0.1450]) tensor([0.2549, 0.3512, 0.2194, 0.1745]) -Greedy action tensor([ 0.6381, -1.1715, 0.5706, 0.4643]) tensor([0.3403, 0.0557, 0.3180, 0.2860]) -Greedy action tensor([ 0.2214, -0.1270, -0.3417, -0.0148]) tensor([0.3263, 0.2303, 0.1858, 0.2576]) -Greedy action tensor([ 0.1620, -1.1202, 0.5764, 0.3258]) tensor([0.2520, 0.0699, 0.3813, 0.2968]) -Greedy action tensor([-0.8117, -0.9225, -0.0050, -0.8536]) tensor([0.1963, 0.1757, 0.4398, 0.1882]) -Greedy action tensor([0.5065, 0.4078, 0.4731, 0.3336]) tensor([0.2692, 0.2439, 0.2604, 0.2265]) -Greedy action tensor([-0.2499, 0.3547, -0.1662, -0.6209]) tensor([0.2170, 0.3973, 0.2360, 0.1498]) -Greedy action tensor([ 1.7781, -0.3182, 0.7104, -0.0150]) tensor([0.6123, 0.0753, 0.2105, 0.1019]) -Greedy action tensor([ 1.1411, -0.5477, -0.3607, 1.1209]) tensor([0.4189, 0.0774, 0.0933, 0.4105]) -Greedy action tensor([-0.4659, -0.4279, -0.1714, 0.2583]) tensor([0.1837, 0.1908, 0.2466, 0.3789]) -Greedy action tensor([-0.7520, -1.3451, -0.4740, -0.7357]) tensor([0.2571, 0.1421, 0.3395, 0.2613]) -Greedy action tensor([ 0.1030, -0.0985, 0.0102, 1.2694]) tensor([0.1684, 0.1377, 0.1534, 0.5405]) -Greedy action tensor([ 1.1864, 0.0554, -0.7782, 0.1746]) tensor([0.5475, 0.1767, 0.0768, 0.1991]) -Greedy action tensor([-0.8746, -1.6061, 0.3283, 0.5024]) tensor([0.1140, 0.0548, 0.3795, 0.4517]) -Greedy action tensor([ 0.1758, -1.0428, -0.5200, -0.9180]) tensor([0.4697, 0.1388, 0.2342, 0.1573]) -Greedy action tensor([-1.0834, 0.1602, 0.8835, -0.3951]) tensor([0.0735, 0.2549, 0.5254, 0.1463]) -Greedy action tensor([ 1.0326, 0.5722, 0.5441, -0.4419]) tensor([0.4043, 0.2551, 0.2481, 0.0925]) -Greedy action tensor([ 0.1070, -0.7907, 0.6318, -1.4172]) tensor([0.3016, 0.1229, 0.5098, 0.0657]) -Greedy action tensor([ 0.3454, 1.0312, -0.5394, 0.7506]) tensor([0.2042, 0.4053, 0.0843, 0.3062]) -Greedy action tensor([ 1.0326, -1.7750, -0.0131, -0.1318]) tensor([0.5801, 0.0350, 0.2039, 0.1811]) -Greedy action tensor([-0.9360, 1.1300, 1.6483, -1.6708]) tensor([0.0442, 0.3488, 0.5858, 0.0212]) -Greedy action tensor([ 0.5214, -1.3320, -0.5792, 0.6593]) tensor([0.3792, 0.0594, 0.1261, 0.4352]) -Greedy action tensor([-0.3488, 0.8547, 1.8841, -0.8500]) tensor([0.0701, 0.2336, 0.6538, 0.0425]) -Greedy action tensor([-0.4722, -0.5160, 1.3295, -0.7147]) tensor([0.1136, 0.1087, 0.6885, 0.0891]) -Greedy action tensor([ 0.1361, -0.2393, 0.0795, -1.0634]) tensor([0.3409, 0.2342, 0.3222, 0.1027]) -Greedy action tensor([-0.0019, 0.2680, 1.3628, -0.6228]) tensor([0.1479, 0.1937, 0.5789, 0.0795]) -Greedy action tensor([-0.7277, -0.1710, -0.4065, 0.6365]) tensor([0.1244, 0.2171, 0.1716, 0.4869]) -Greedy action tensor([ 0.8694, -1.6192, 0.5852, 0.6081]) tensor([0.3838, 0.0319, 0.2888, 0.2955]) -Greedy action tensor([0.0033, 0.1773, 1.4333, 0.6069]) tensor([0.1220, 0.1452, 0.5098, 0.2231]) -Greedy action tensor([ 1.6535, 0.3299, 0.5894, -0.2678]) tensor([0.5689, 0.1514, 0.1963, 0.0833]) -Greedy action tensor([-0.0783, -1.1882, 2.1798, -0.2872]) tensor([0.0854, 0.0282, 0.8171, 0.0693]) -Greedy action tensor([-0.0179, -0.3614, 1.1597, -0.5176]) tensor([0.1798, 0.1275, 0.5837, 0.1091]) -Greedy action tensor([ 0.5028, -0.4866, 0.7302, 0.0865]) tensor([0.3043, 0.1131, 0.3819, 0.2007]) -Greedy action tensor([ 0.9536, -0.1577, 0.1406, 1.3562]) tensor([0.3060, 0.1007, 0.1357, 0.4576]) -Greedy action tensor([-0.0369, 0.5561, -0.7517, 0.7823]) tensor([0.1796, 0.3250, 0.0879, 0.4075]) -Greedy action tensor([ 0.9211, 0.0128, -0.1618, 0.9257]) tensor([0.3641, 0.1468, 0.1233, 0.3658]) -Greedy action tensor([ 1.1271, -1.2682, -0.0976, 0.5036]) tensor([0.5205, 0.0474, 0.1530, 0.2791]) -Greedy action tensor([0.0267, 0.1686, 0.9271, 0.1960]) tensor([0.1725, 0.1988, 0.4244, 0.2043]) -Greedy action tensor([ 0.7931, -0.1368, -0.8557, 1.2812]) tensor([0.3109, 0.1227, 0.0598, 0.5066]) -Greedy action tensor([ 0.4704, -0.4553, -0.0652, -0.1113]) tensor([0.3936, 0.1560, 0.2304, 0.2200]) -Greedy action tensor([ 0.5496, -0.1154, -0.0465, -0.3503]) tensor([0.4046, 0.2081, 0.2229, 0.1645]) -Greedy action tensor([ 0.5119, -0.2239, -0.0317, -0.2100]) tensor([0.3928, 0.1882, 0.2281, 0.1909]) -Greedy action tensor([ 0.3413, 0.1110, -0.1369, -0.0020]) tensor([0.3201, 0.2543, 0.1985, 0.2271]) -Greedy action tensor([ 0.6980, -0.5174, -0.0756, -0.2134]) tensor([0.4630, 0.1373, 0.2136, 0.1861]) -Greedy action tensor([ 0.4637, -0.1550, -0.0798, -0.3585]) tensor([0.3908, 0.2105, 0.2269, 0.1717]) -Greedy action tensor([ 0.1912, 0.0362, 0.0927, -0.2027]) tensor([0.2909, 0.2492, 0.2637, 0.1962]) -Greedy action tensor([ 0.7907, -0.5233, -0.2159, -0.4681]) tensor([0.5213, 0.1401, 0.1905, 0.1481]) -Greedy action tensor([ 0.3344, -0.0497, -0.2235, -0.3717]) tensor([0.3640, 0.2479, 0.2084, 0.1797]) -Greedy action tensor([ 0.3813, -0.2492, -0.0167, -0.1756]) tensor([0.3601, 0.1917, 0.2419, 0.2063]) -Greedy action tensor([ 0.0483, 0.1110, 0.1458, -0.1034]) tensor([0.2484, 0.2644, 0.2738, 0.2134]) -Greedy action tensor([ 0.3819, 0.0624, -0.1305, 0.0564]) tensor([0.3281, 0.2384, 0.1966, 0.2369]) -Greedy action tensor([ 0.4940, -0.3839, -0.0876, -0.3947]) tensor([0.4191, 0.1742, 0.2343, 0.1724]) -Greedy action tensor([ 1.0349, -0.4401, -0.1101, -0.3361]) tensor([0.5553, 0.1270, 0.1767, 0.1410]) -Greedy action tensor([ 0.3768, -0.2362, 0.0750, -0.0101]) tensor([0.3378, 0.1830, 0.2498, 0.2294]) -Greedy action tensor([ 0.5791, 0.0256, -0.1113, -0.1420]) tensor([0.3903, 0.2244, 0.1957, 0.1897]) -Greedy action tensor([ 0.2282, 0.0198, -0.0172, -0.3660]) tensor([0.3178, 0.2580, 0.2487, 0.1754]) -Greedy action tensor([ 0.9080, -0.5292, -0.0685, -0.4159]) tensor([0.5318, 0.1264, 0.2003, 0.1415]) -Greedy action tensor([ 0.6369, -0.0409, 0.0249, -0.3917]) tensor([0.4154, 0.2109, 0.2252, 0.1485]) -Greedy action tensor([ 0.8832, -0.7750, -0.0873, -0.4181]) tensor([0.5430, 0.1034, 0.2058, 0.1478]) -Greedy action tensor([ 0.9670, -0.4200, 0.0724, -0.8143]) tensor([0.5473, 0.1367, 0.2237, 0.0922]) -Greedy action tensor([ 0.6275, -0.4387, 0.0044, -0.4855]) tensor([0.4527, 0.1559, 0.2428, 0.1487]) -Greedy action tensor([ 0.3236, -0.1342, -0.0360, -0.1013]) tensor([0.3351, 0.2120, 0.2339, 0.2191]) -Greedy action tensor([ 0.7445, -0.9218, 0.0849, -0.5338]) tensor([0.5039, 0.0952, 0.2606, 0.1403]) -Greedy action tensor([ 0.4654, -0.3294, 0.0638, -0.2993]) tensor([0.3866, 0.1746, 0.2588, 0.1800]) -Greedy action tensor([ 0.4098, -0.1281, -0.0334, -0.3349]) tensor([0.3703, 0.2162, 0.2377, 0.1758]) -Greedy action tensor([ 0.6143, -0.4420, -0.0859, -0.2833]) tensor([0.4441, 0.1544, 0.2205, 0.1810]) -Greedy action tensor([ 0.3501, 0.0104, -0.2125, -0.1497]) tensor([0.3462, 0.2465, 0.1973, 0.2100]) -Greedy action tensor([ 0.4474, -0.1658, -0.0803, -0.3791]) tensor([0.3892, 0.2108, 0.2296, 0.1703]) -Greedy action tensor([-0.0503, -0.0219, -0.0470, -0.1010]) tensor([0.2511, 0.2583, 0.2519, 0.2387]) -Greedy action tensor([ 0.7002, -0.2768, -0.1079, -0.4706]) tensor([0.4690, 0.1765, 0.2090, 0.1454]) -Greedy action tensor([ 0.3694, 0.0138, -0.1179, -0.1203]) tensor([0.3415, 0.2393, 0.2098, 0.2093]) -Greedy action tensor([ 0.8299, -0.5835, -0.0966, -0.4975]) tensor([0.5251, 0.1278, 0.2079, 0.1392]) -Greedy action tensor([ 0.7010, -0.1979, -0.1008, -0.3042]) tensor([0.4501, 0.1832, 0.2019, 0.1647]) -Greedy action tensor([ 0.3185, -0.2026, -0.0935, -0.3518]) tensor([0.3613, 0.2146, 0.2393, 0.1848]) -Greedy action tensor([ 0.3532, -0.2340, 0.1911, -0.1939]) tensor([0.3350, 0.1862, 0.2849, 0.1939]) -Greedy action tensor([ 0.8458, -0.5692, 0.0274, -0.3224]) tensor([0.5013, 0.1218, 0.2211, 0.1559]) -Greedy action tensor([ 0.4806, -0.2769, -0.1277, -0.3333]) tensor([0.4071, 0.1909, 0.2216, 0.1804]) -Greedy action tensor([ 0.8471, -0.7460, -0.3309, -0.7166]) tensor([0.5812, 0.1182, 0.1790, 0.1217]) -Greedy action tensor([ 0.8276, -0.6325, -0.0569, -0.2927]) tensor([0.5073, 0.1178, 0.2095, 0.1655]) -Greedy action tensor([ 0.3512, 0.0733, -0.0067, -0.2298]) tensor([0.3316, 0.2511, 0.2318, 0.1855]) -Greedy action tensor([ 0.6087, 0.5251, -0.2611, 0.2578]) tensor([0.3286, 0.3023, 0.1377, 0.2314]) -Greedy action tensor([ 0.7233, -0.6075, -0.0493, -0.4807]) tensor([0.4936, 0.1304, 0.2279, 0.1481]) -Greedy action tensor([ 0.3625, -0.1181, -0.0121, -0.3021]) tensor([0.3546, 0.2193, 0.2438, 0.1824]) -Greedy action tensor([ 0.4501, 0.0102, -0.0399, -0.3044]) tensor([0.3667, 0.2362, 0.2246, 0.1724]) -Greedy action tensor([ 0.7641, -0.4671, 0.1311, -0.5418]) tensor([0.4776, 0.1394, 0.2536, 0.1294]) -Greedy action tensor([ 0.1789, -0.0397, -0.0883, -0.1820]) tensor([0.3062, 0.2460, 0.2344, 0.2134]) -Greedy action tensor([ 0.4558, -0.1933, -0.0264, -0.5913]) tensor([0.4015, 0.2098, 0.2479, 0.1409]) -Greedy action tensor([ 0.9826, -0.5030, -0.0976, -0.3364]) tensor([0.5455, 0.1235, 0.1852, 0.1459]) -Greedy action tensor([ 0.5679, -0.5192, 0.0624, -0.5230]) tensor([0.4393, 0.1481, 0.2650, 0.1476]) -Greedy action tensor([ 0.8482, -0.3647, 0.0037, -0.5390]) tensor([0.5059, 0.1504, 0.2174, 0.1263]) -Greedy action tensor([ 0.5945, -0.4422, -0.1409, -0.3186]) tensor([0.4474, 0.1587, 0.2144, 0.1795]) -Greedy action tensor([ 0.6143, -0.6621, 0.0310, -0.4395]) tensor([0.4575, 0.1277, 0.2553, 0.1595]) -Greedy action tensor([ 0.7489, -0.5478, 0.0554, -0.3329]) tensor([0.4734, 0.1295, 0.2366, 0.1605]) -Greedy action tensor([ 0.2208, 0.1366, -0.0706, -0.3724]) tensor([0.3107, 0.2856, 0.2321, 0.1716]) -Greedy action tensor([ 0.8598, -0.4433, -0.0872, -0.4886]) tensor([0.5210, 0.1416, 0.2021, 0.1353]) -Greedy action tensor([ 0.4898, 0.1302, -0.1710, -0.1781]) tensor([0.3667, 0.2559, 0.1894, 0.1880]) -Greedy action tensor([ 1.3460, -1.0696, 0.0245, -0.8506]) tensor([0.6816, 0.0609, 0.1818, 0.0758]) -Greedy action tensor([ 0.6818, -0.5227, -0.1275, -0.5049]) tensor([0.4877, 0.1462, 0.2171, 0.1489]) -Greedy action tensor([ 0.6354, -0.7691, -0.0677, -0.4790]) tensor([0.4834, 0.1187, 0.2393, 0.1586]) -Greedy action tensor([ 0.5063, -0.2455, -0.1100, -0.7216]) tensor([0.4340, 0.2046, 0.2343, 0.1271]) -Greedy action tensor([ 0.6615, -0.3110, -0.0466, -0.3862]) tensor([0.4502, 0.1702, 0.2217, 0.1579]) -Greedy action tensor([ 0.4816, -0.2629, -0.0793, -0.4272]) tensor([0.4084, 0.1940, 0.2331, 0.1646]) -Greedy action tensor([ 0.5790, -0.3338, -0.0996, -0.5718]) tensor([0.4494, 0.1804, 0.2280, 0.1422]) -Greedy action tensor([ 0.8494, -1.0371, 0.1473, -0.6529]) tensor([0.5348, 0.0811, 0.2650, 0.1191]) -Greedy action tensor([ 0.6708, -0.1242, -0.0891, -0.4641]) tensor([0.4463, 0.2015, 0.2087, 0.1435]) -Greedy action tensor([ 0.5355, -0.5601, -0.1200, -0.0934]) tensor([0.4190, 0.1401, 0.2175, 0.2234]) -Greedy action tensor([ 0.6003, -0.2636, -0.2024, -0.1849]) tensor([0.4300, 0.1812, 0.1927, 0.1961]) -Greedy action tensor([ 0.4281, -0.3104, 0.0579, -0.3385]) tensor([0.3798, 0.1815, 0.2623, 0.1764]) -Greedy action tensor([ 0.5434, -0.1414, -0.0213, -0.3644]) tensor([0.4039, 0.2036, 0.2296, 0.1629]) -Greedy action tensor([ 1.0930, -0.2703, 0.0282, -0.3257]) tensor([0.5427, 0.1388, 0.1871, 0.1314]) -Greedy action tensor([ 0.5522, 0.2708, -0.0629, -0.2875]) tensor([0.3667, 0.2768, 0.1982, 0.1583]) -Greedy action tensor([ 0.7161, -0.4316, -0.1875, -0.3374]) tensor([0.4828, 0.1532, 0.1956, 0.1684]) -Greedy action tensor([ 0.3781, -0.0854, -0.2009, -0.2841]) tensor([0.3696, 0.2325, 0.2072, 0.1906]) -Greedy action tensor([ 0.7191, -0.5506, -0.0496, -0.3492]) tensor([0.4789, 0.1345, 0.2220, 0.1645]) -Greedy action tensor([ 0.3664, -0.0789, -0.0888, -0.1859]) tensor([0.3508, 0.2247, 0.2225, 0.2019]) -Greedy action tensor([ 0.5395, -0.2421, -0.0642, -0.2027]) tensor([0.4031, 0.1845, 0.2204, 0.1919]) -Greedy action tensor([ 0.5779, -0.2156, -0.0890, -0.3128]) tensor([0.4209, 0.1904, 0.2160, 0.1727]) -Greedy action tensor([ 0.2376, 0.1196, -0.1289, -0.0381]) tensor([0.2993, 0.2660, 0.2075, 0.2272]) -Greedy action tensor([ 0.6782, -0.4517, -0.0013, -0.4165]) tensor([0.4620, 0.1492, 0.2342, 0.1546]) -Greedy action tensor([ 0.5776, -0.3348, -0.1319, -0.4381]) tensor([0.4433, 0.1780, 0.2181, 0.1606]) -Greedy action tensor([ 0.8657, -0.5806, -0.4562, 0.5013]) tensor([0.4552, 0.1072, 0.1214, 0.3162]) -Greedy action tensor([ 1.9846, -0.5991, -0.3418, 0.1744]) tensor([0.7481, 0.0565, 0.0730, 0.1224]) -Greedy action tensor([ 1.1145, 0.0335, -0.2735, 0.1024]) tensor([0.5122, 0.1738, 0.1278, 0.1862]) -Greedy action tensor([ 1.8955, 0.1538, -0.1962, -0.0747]) tensor([0.6953, 0.1218, 0.0859, 0.0969]) -Greedy action tensor([ 0.8342, -0.3015, -0.1005, 0.1501]) tensor([0.4508, 0.1448, 0.1770, 0.2274]) -Greedy action tensor([ 1.4371, -0.2869, -0.2264, 0.3144]) tensor([0.5906, 0.1053, 0.1119, 0.1922]) -Greedy action tensor([ 1.2704, -0.4382, -0.1227, 0.0187]) tensor([0.5829, 0.1056, 0.1447, 0.1667]) -Greedy action tensor([ 1.4802, -0.9136, -0.5438, 0.2239]) tensor([0.6631, 0.0605, 0.0876, 0.1888]) -Greedy action tensor([ 1.1187, -0.1756, -0.0970, 0.1411]) tensor([0.5137, 0.1408, 0.1523, 0.1932]) -Greedy action tensor([ 0.9403, -0.4005, -0.2183, 0.3827]) tensor([0.4655, 0.1218, 0.1461, 0.2665]) -Greedy action tensor([ 1.7282, -1.2784, -0.0824, 0.4524]) tensor([0.6701, 0.0331, 0.1096, 0.1871]) -Greedy action tensor([ 1.8393, -0.2954, -0.2719, 0.3633]) tensor([0.6812, 0.0806, 0.0825, 0.1557]) -Greedy action tensor([ 1.0681, -0.6233, -0.1990, 0.7504]) tensor([0.4558, 0.0840, 0.1284, 0.3318]) -Greedy action tensor([ 0.8534, -0.3197, -0.3511, 0.5331]) tensor([0.4282, 0.1325, 0.1284, 0.3109]) -Greedy action tensor([ 2.2294, -1.0138, -0.5031, 0.7593]) tensor([0.7496, 0.0293, 0.0488, 0.1723]) -Greedy action tensor([ 1.5059, -0.6168, -0.2674, 0.7320]) tensor([0.5712, 0.0684, 0.0970, 0.2635]) -Greedy action tensor([ 0.8534, -0.1009, -0.0512, -0.0443]) tensor([0.4551, 0.1752, 0.1842, 0.1855]) -Greedy action tensor([ 1.8715, -0.4226, -0.6050, 0.5151]) tensor([0.6933, 0.0699, 0.0583, 0.1786]) -Greedy action tensor([ 1.6317, -0.3131, -0.6574, -0.2281]) tensor([0.7143, 0.1021, 0.0724, 0.1112]) -Greedy action tensor([ 1.8507, -0.9351, -0.0370, 0.3759]) tensor([0.6935, 0.0428, 0.1050, 0.1587]) -Greedy action tensor([ 0.9569, -0.3572, -0.1124, 0.3320]) tensor([0.4657, 0.1251, 0.1599, 0.2493]) -Greedy action tensor([ 1.1434, -0.2043, -0.1715, 0.0540]) tensor([0.5363, 0.1393, 0.1440, 0.1804]) -Greedy action tensor([ 1.1024, -0.2276, -0.1466, 0.2648]) tensor([0.5040, 0.1333, 0.1446, 0.2181]) -Greedy action tensor([ 1.4381, -0.5076, -0.5120, 0.3072]) tensor([0.6219, 0.0889, 0.0885, 0.2007]) -Greedy action tensor([ 1.7312, -0.5280, -0.0693, 0.5663]) tensor([0.6323, 0.0660, 0.1045, 0.1972]) -Greedy action tensor([ 1.3192, -0.8337, -0.1551, 0.6443]) tensor([0.5393, 0.0626, 0.1235, 0.2746]) -Greedy action tensor([ 0.5914, 0.1164, 0.0869, -0.1691]) tensor([0.3713, 0.2309, 0.2242, 0.1736]) -Greedy action tensor([ 1.8978, -1.2625, 0.0815, 0.6602]) tensor([0.6688, 0.0284, 0.1088, 0.1940]) -Greedy action tensor([ 0.6968, -0.1854, -0.3033, 0.3259]) tensor([0.4046, 0.1674, 0.1488, 0.2792]) -Greedy action tensor([ 1.5712, -0.8957, -0.1480, 0.6215]) tensor([0.6057, 0.0514, 0.1086, 0.2343]) -Greedy action tensor([ 0.5832, -0.2912, -0.3846, 0.1145]) tensor([0.4127, 0.1722, 0.1568, 0.2583]) -Greedy action tensor([ 0.4901, -0.3669, -0.5679, 0.2972]) tensor([0.3852, 0.1635, 0.1337, 0.3176]) -Greedy action tensor([ 1.1136, -0.5362, -0.1502, 0.5311]) tensor([0.4919, 0.0945, 0.1390, 0.2747]) -Greedy action tensor([ 0.8198, -0.4619, 0.0607, -0.0663]) tensor([0.4634, 0.1286, 0.2169, 0.1910]) -Greedy action tensor([ 1.3000, 0.0119, -0.1309, 0.3145]) tensor([0.5296, 0.1461, 0.1266, 0.1977]) -Greedy action tensor([ 1.9096, -0.0122, -0.1799, 0.0770]) tensor([0.6993, 0.1023, 0.0865, 0.1119]) -Greedy action tensor([ 0.8787, 0.0106, -0.0630, -0.2138]) tensor([0.4662, 0.1957, 0.1818, 0.1563]) -Greedy action tensor([ 1.4269, -0.4647, -0.4130, 0.2638]) tensor([0.6165, 0.0930, 0.0979, 0.1926]) -Greedy action tensor([ 1.4804, 0.3865, -0.0192, -0.5777]) tensor([0.5932, 0.1987, 0.1324, 0.0757]) -Greedy action tensor([ 0.8763, -0.1403, -0.0331, 0.0709]) tensor([0.4522, 0.1636, 0.1821, 0.2021]) -Greedy action tensor([ 1.4286, -0.7227, -0.2683, 0.5096]) tensor([0.5888, 0.0685, 0.1079, 0.2349]) -Greedy action tensor([ 1.4435, -0.7444, -0.4442, 0.6772]) tensor([0.5786, 0.0649, 0.0876, 0.2689]) -Greedy action tensor([ 1.4075, -0.4284, -0.1406, 0.0505]) tensor([0.6137, 0.0979, 0.1305, 0.1580]) -Greedy action tensor([ 0.9717, 0.0668, -0.2747, -0.2049]) tensor([0.4999, 0.2022, 0.1437, 0.1541]) -Greedy action tensor([ 1.3927, -0.3145, -0.3689, -0.2481]) tensor([0.6464, 0.1172, 0.1110, 0.1253]) -Greedy action tensor([ 0.5985, -0.1173, -0.1668, -0.1992]) tensor([0.4159, 0.2033, 0.1935, 0.1873]) -Greedy action tensor([ 1.0232, -0.5651, -0.2677, 0.6018]) tensor([0.4683, 0.0957, 0.1288, 0.3072]) -Greedy action tensor([ 1.4808, -0.5984, -0.2951, 0.6401]) tensor([0.5795, 0.0724, 0.0981, 0.2500]) -Greedy action tensor([ 1.4408, -0.3700, -0.1716, 0.3994]) tensor([0.5828, 0.0953, 0.1162, 0.2057]) -Greedy action tensor([ 1.8580, 0.2609, -0.3491, 0.2750]) tensor([0.6588, 0.1334, 0.0725, 0.1353]) -Greedy action tensor([ 1.4798, -0.1054, -0.2970, 0.3289]) tensor([0.5916, 0.1212, 0.1001, 0.1871]) -Greedy action tensor([ 1.3758, -0.3026, -0.2980, 0.1698]) tensor([0.5975, 0.1115, 0.1121, 0.1789]) -Greedy action tensor([ 1.1420, -0.5309, -0.4154, 0.1119]) tensor([0.5697, 0.1069, 0.1200, 0.2034]) -Greedy action tensor([1.8881, 0.1741, 0.4182, 0.1303]) tensor([0.6319, 0.1138, 0.1453, 0.1090]) -Greedy action tensor([ 1.1851, -0.2942, -0.2545, 0.6889]) tensor([0.4822, 0.1098, 0.1143, 0.2936]) -Greedy action tensor([ 1.4740, -0.7838, -0.1009, 0.3277]) tensor([0.6137, 0.0642, 0.1271, 0.1950]) -Greedy action tensor([ 0.7800, -0.0138, 0.1896, -0.1552]) tensor([0.4169, 0.1885, 0.2310, 0.1636]) -Greedy action tensor([ 1.7378, -0.3749, -0.5533, 0.2955]) tensor([0.6857, 0.0829, 0.0694, 0.1621]) -Greedy action tensor([ 2.4650, -0.6118, -0.3922, 0.7467]) tensor([0.7795, 0.0359, 0.0448, 0.1398]) -Greedy action tensor([ 1.2278, -0.5539, -0.2871, 0.1526]) tensor([0.5782, 0.0974, 0.1271, 0.1973]) -Greedy action tensor([ 0.8823, -0.1879, -0.2780, 0.4447]) tensor([0.4344, 0.1490, 0.1361, 0.2805]) -Greedy action tensor([ 1.3655, -0.2120, -0.3533, 0.3300]) tensor([0.5744, 0.1186, 0.1030, 0.2040]) -Greedy action tensor([ 1.0735, -0.9583, 0.0658, 0.1380]) tensor([0.5295, 0.0694, 0.1933, 0.2078]) -Greedy action tensor([ 1.2167, -0.5254, -0.3770, 0.3894]) tensor([0.5508, 0.0965, 0.1119, 0.2408]) -Greedy action tensor([ 1.1154, -0.4048, -0.2704, -0.3672]) tensor([0.5897, 0.1289, 0.1475, 0.1339]) -Greedy action tensor([ 1.1104, 0.3227, -0.3298, 0.2459]) tensor([0.4733, 0.2153, 0.1121, 0.1994]) -Greedy action tensor([ 1.3890, -0.1561, -0.2846, 0.2650]) tensor([0.5794, 0.1236, 0.1087, 0.1883]) -Greedy action tensor([ 1.0881, -0.3167, -0.3252, 0.1554]) tensor([0.5313, 0.1304, 0.1293, 0.2091]) -Greedy action tensor([ 1.3352, -0.8536, -0.0937, 0.4073]) tensor([0.5724, 0.0641, 0.1371, 0.2263]) -Greedy action tensor([ 0.3355, -0.2087, -0.1196, 0.3805]) tensor([0.3067, 0.1780, 0.1946, 0.3208]) -Greedy action tensor([ 1.2243, -0.6661, -0.0348, 0.5783]) tensor([0.5105, 0.0771, 0.1449, 0.2675]) -Greedy action tensor([ 1.3429, -0.0375, -0.3550, 0.2145]) tensor([0.5688, 0.1430, 0.1041, 0.1840]) -Greedy action tensor([ 0.9337, -0.3018, 0.0184, -0.2335]) tensor([0.4994, 0.1452, 0.2000, 0.1554]) -Greedy action tensor([ 1.6199, -0.8064, -0.3195, 0.0844]) tensor([0.6908, 0.0610, 0.0993, 0.1488]) -Greedy action tensor([ 1.3427, -0.5035, 0.0467, 0.2387]) tensor([0.5672, 0.0895, 0.1552, 0.1881]) -Greedy action tensor([ 1.6111, -0.3879, -0.2174, -0.1113]) tensor([0.6781, 0.0919, 0.1089, 0.1211]) -Greedy action tensor([ 1.1624, -0.0806, -0.2652, 0.0292]) tensor([0.5404, 0.1559, 0.1296, 0.1740]) -Greedy action tensor([ 1.2206, -0.3954, -0.2125, 0.3916]) tensor([0.5337, 0.1060, 0.1273, 0.2329]) -Greedy action tensor([ 1.0803, 0.2420, -0.0312, -0.2618]) tensor([0.4944, 0.2138, 0.1627, 0.1292]) -Greedy action tensor([ 1.3129, -0.3199, -0.1577, 0.1188]) tensor([0.5787, 0.1131, 0.1330, 0.1753]) -Greedy action tensor([ 1.1135, -0.4206, -0.1929, -0.1085]) tensor([0.5615, 0.1211, 0.1520, 0.1654]) -Greedy action tensor([-1.9138, -0.4197, 0.6491, -0.1643]) tensor([0.0414, 0.1843, 0.5365, 0.2379]) -Greedy action tensor([-1.0827, -0.6002, 0.4993, -0.1577]) tensor([0.0999, 0.1619, 0.4862, 0.2520]) -Greedy action tensor([-1.6769, -0.4829, 0.6837, 0.3512]) tensor([0.0444, 0.1467, 0.4710, 0.3378]) -Greedy action tensor([-1.8450, -0.4069, 0.6038, -0.1163]) tensor([0.0446, 0.1879, 0.5162, 0.2513]) -Greedy action tensor([-1.1983, -0.4510, 0.6764, 0.8780]) tensor([0.0568, 0.1199, 0.3703, 0.4530]) -Greedy action tensor([-1.4836, -0.5912, 0.4308, 0.0851]) tensor([0.0666, 0.1625, 0.4515, 0.3195]) -Greedy action tensor([-1.7506, -0.2532, 0.6150, 0.0060]) tensor([0.0456, 0.2040, 0.4860, 0.2643]) -Greedy action tensor([-0.4783, 0.4514, 0.5302, 1.3163]) tensor([0.0814, 0.2061, 0.2230, 0.4895]) -Greedy action tensor([-1.9817, -0.7066, 1.4056, 0.8022]) tensor([0.0199, 0.0711, 0.5877, 0.3214]) -Greedy action tensor([-1.4244, -0.3123, 0.5426, 0.2822]) tensor([0.0599, 0.1821, 0.4281, 0.3300]) -Greedy action tensor([-1.8577, -0.3599, 0.6116, -0.1061]) tensor([0.0434, 0.1940, 0.5125, 0.2501]) -Greedy action tensor([-0.0123, -0.1845, 0.8920, 1.6991]) tensor([0.1015, 0.0855, 0.2508, 0.5622]) -Greedy action tensor([-1.1090, -0.6205, 0.2253, 0.3154]) tensor([0.0945, 0.1540, 0.3588, 0.3927]) -Greedy action tensor([-1.8936, -0.4198, 0.6357, -0.1456]) tensor([0.0423, 0.1846, 0.5304, 0.2428]) -Greedy action tensor([-1.8991, -0.5240, 0.7278, -0.0189]) tensor([0.0395, 0.1561, 0.5458, 0.2587]) -Greedy action tensor([-1.9372, -0.4489, 0.6667, -0.1738]) tensor([0.0404, 0.1788, 0.5455, 0.2354]) -Greedy action tensor([-1.7525, -0.4036, 0.6381, -0.2786]) tensor([0.0497, 0.1913, 0.5422, 0.2168]) -Greedy action tensor([-1.0825, -0.5913, 0.4209, -0.1965]) tensor([0.1046, 0.1710, 0.4706, 0.2538]) -Greedy action tensor([-1.0578, -0.6586, 0.2655, 0.2584]) tensor([0.1002, 0.1494, 0.3765, 0.3738]) -Greedy action tensor([-1.8886, -0.2538, 0.5997, -0.1297]) tensor([0.0417, 0.2139, 0.5022, 0.2422]) -Greedy action tensor([-1.2627, -0.5550, 0.3327, 0.2571]) tensor([0.0798, 0.1619, 0.3934, 0.3648]) -Greedy action tensor([0.0683, 0.3053, 0.9700, 1.6393]) tensor([0.1048, 0.1328, 0.2582, 0.5042]) -Greedy action tensor([-1.9427, -0.4505, 0.6642, -0.1797]) tensor([0.0403, 0.1791, 0.5459, 0.2348]) -Greedy action tensor([-1.5011, -0.5388, 0.4475, 0.1131]) tensor([0.0639, 0.1672, 0.4482, 0.3208]) -Greedy action tensor([-0.9668, -0.6067, 0.2179, 0.3245]) tensor([0.1071, 0.1535, 0.3500, 0.3894]) -Greedy action tensor([-1.1532, -0.6509, 0.3800, 0.1946]) tensor([0.0898, 0.1484, 0.4161, 0.3457]) -Greedy action tensor([-1.6833, -0.4588, 1.0344, 0.8571]) tensor([0.0310, 0.1056, 0.4699, 0.3935]) -Greedy action tensor([-0.3285, -0.0964, 0.2273, 0.3596]) tensor([0.1668, 0.2104, 0.2908, 0.3320]) -Greedy action tensor([-1.4047, -0.5641, 0.3676, 0.1827]) tensor([0.0710, 0.1645, 0.4175, 0.3470]) -Greedy action tensor([-1.8741, -0.4624, 0.6286, -0.1416]) tensor([0.0435, 0.1786, 0.5317, 0.2462]) -Greedy action tensor([-1.9329, -0.4225, 0.6560, -0.1730]) tensor([0.0406, 0.1837, 0.5400, 0.2357]) -Greedy action tensor([-1.4337, -0.3819, 0.3990, 0.1354]) tensor([0.0670, 0.1919, 0.4191, 0.3220]) -Greedy action tensor([-0.7745, -0.5829, 0.3484, -0.2712]) tensor([0.1441, 0.1745, 0.4430, 0.2384]) -Greedy action tensor([-0.0622, -0.3362, 0.1612, 0.1069]) tensor([0.2384, 0.1812, 0.2981, 0.2823]) -Greedy action tensor([-1.5638, -0.4487, 0.5146, 0.1049]) tensor([0.0576, 0.1758, 0.4607, 0.3058]) -Greedy action tensor([-1.8159, -0.3693, 0.6039, -0.0819]) tensor([0.0451, 0.1918, 0.5075, 0.2556]) -Greedy action tensor([-1.8388, -0.4101, 0.6083, -0.1179]) tensor([0.0448, 0.1870, 0.5177, 0.2504]) -Greedy action tensor([-1.9224, -0.3876, 0.6492, -0.1606]) tensor([0.0407, 0.1890, 0.5331, 0.2372]) -Greedy action tensor([-1.4338, -0.4733, 1.0706, 1.0336]) tensor([0.0362, 0.0945, 0.4427, 0.4266]) -Greedy action tensor([-1.8657, -0.4599, 0.6655, -0.0860]) tensor([0.0424, 0.1730, 0.5331, 0.2514]) -Greedy action tensor([-1.5135, -0.6449, 0.6454, 0.0725]) tensor([0.0591, 0.1408, 0.5116, 0.2885]) -Greedy action tensor([-1.8255, -0.4579, 0.6432, -0.0525]) tensor([0.0442, 0.1735, 0.5219, 0.2603]) -Greedy action tensor([-1.7740, -0.3101, 0.5329, -0.0695]) tensor([0.0479, 0.2072, 0.4813, 0.2635]) -Greedy action tensor([-1.8326, -0.3759, 0.5893, -0.1108]) tensor([0.0451, 0.1937, 0.5086, 0.2525]) -Greedy action tensor([-1.7140, -0.4981, 0.6278, 0.0810]) tensor([0.0481, 0.1622, 0.5002, 0.2895]) -Greedy action tensor([-0.8075, -0.4252, 0.2622, -0.0922]) tensor([0.1347, 0.1974, 0.3925, 0.2754]) -Greedy action tensor([-1.9210, -0.4098, 0.6522, -0.1619]) tensor([0.0409, 0.1854, 0.5362, 0.2375]) -Greedy action tensor([-1.2023, -0.6068, 0.2725, 0.3050]) tensor([0.0855, 0.1551, 0.3736, 0.3859]) -Greedy action tensor([-1.4999, -0.4258, 1.4597, 1.0397]) tensor([0.0279, 0.0816, 0.5374, 0.3531]) -Greedy action tensor([-1.4615, 0.2311, 0.4738, 0.2142]) tensor([0.0535, 0.2905, 0.3703, 0.2857]) -Greedy action tensor([-1.8938, -0.3973, 0.6276, -0.1453]) tensor([0.0423, 0.1888, 0.5261, 0.2429]) -Greedy action tensor([-1.1255, -0.7960, 0.3617, 0.1869]) tensor([0.0950, 0.1320, 0.4202, 0.3528]) -Greedy action tensor([-1.2881, -0.6329, 0.5956, -0.1848]) tensor([0.0799, 0.1538, 0.5255, 0.2408]) -Greedy action tensor([-1.8560, -0.8231, 0.0675, -0.2770]) tensor([0.0645, 0.1812, 0.4415, 0.3128]) -Greedy action tensor([-1.9117, -0.4389, 0.6545, -0.1486]) tensor([0.0413, 0.1802, 0.5377, 0.2409]) -Greedy action tensor([-1.4683, -0.5482, 0.4076, 0.1179]) tensor([0.0670, 0.1682, 0.4374, 0.3274]) -Greedy action tensor([-0.3275, -0.1593, 0.7350, 1.6287]) tensor([0.0823, 0.0974, 0.2382, 0.5821]) -Greedy action tensor([-1.8683, -0.4315, 0.6275, -0.1379]) tensor([0.0435, 0.1831, 0.5279, 0.2455]) -Greedy action tensor([-1.5990, -0.3225, 0.4620, 0.0852]) tensor([0.0561, 0.2010, 0.4406, 0.3023]) -Greedy action tensor([-1.8734, -0.4912, 0.7273, -0.0664]) tensor([0.0407, 0.1623, 0.5488, 0.2482]) -Greedy action tensor([-0.9729, -0.4488, 0.4405, 0.9687]) tensor([0.0726, 0.1227, 0.2985, 0.5062]) -Greedy action tensor([-0.7066, -0.5739, 0.1308, 0.4579]) tensor([0.1306, 0.1491, 0.3017, 0.4185]) -Greedy action tensor([-0.9243, -0.6031, 0.3036, 0.9877]) tensor([0.0796, 0.1098, 0.2718, 0.5388]) -Greedy action tensor([-1.6418, -0.5276, 0.5034, -0.0258]) tensor([0.0567, 0.1729, 0.4848, 0.2856]) -Greedy action tensor([-1.7548, -0.4987, 0.5622, -0.0576]) tensor([0.0497, 0.1746, 0.5043, 0.2714]) -Greedy action tensor([-1.4246, -0.5863, 0.9999, 0.6798]) tensor([0.0438, 0.1014, 0.4952, 0.3596]) -Greedy action tensor([-1.8900, -0.3605, 0.6198, -0.1461]) tensor([0.0423, 0.1953, 0.5204, 0.2420]) -Greedy action tensor([-1.8867, -0.3286, 0.6244, -0.1373]) tensor([0.0420, 0.1994, 0.5171, 0.2414]) -Greedy action tensor([-1.7909, -0.4250, 0.5869, -0.1162]) tensor([0.0475, 0.1863, 0.5125, 0.2537]) -Greedy action tensor([-1.5658, -0.5137, 0.4898, -0.0770]) tensor([0.0621, 0.1778, 0.4850, 0.2752]) -Greedy action tensor([-0.7126, -0.5321, 0.1651, 0.2787]) tensor([0.1370, 0.1641, 0.3296, 0.3693]) -Greedy action tensor([-1.6516, -0.5135, 0.4855, -0.0416]) tensor([0.0568, 0.1773, 0.4816, 0.2843]) -Greedy action tensor([-1.4646, -0.5127, 0.4992, 0.2891]) tensor([0.0606, 0.1571, 0.4321, 0.3502]) -Greedy action tensor([-1.1179, -0.8544, -0.2762, -0.2043]) tensor([0.1406, 0.1829, 0.3261, 0.3504]) -Greedy action tensor([-0.7168, -0.5660, 0.2911, 0.1857]) tensor([0.1357, 0.1578, 0.3718, 0.3346]) -Greedy action tensor([-1.9031, -0.4963, 0.9522, 0.3755]) tensor([0.0310, 0.1267, 0.5393, 0.3030]) -Greedy action tensor([-1.9378, -0.4426, 0.6625, -0.1761]) tensor([0.0404, 0.1802, 0.5441, 0.2352]) -Greedy action tensor([-1.7413, -0.1533, 0.5221, -0.1025]) tensor([0.0484, 0.2369, 0.4655, 0.2492]) -Greedy action tensor([-1.8624, -0.4196, 0.6248, -0.1242]) tensor([0.0436, 0.1844, 0.5241, 0.2478]) -Greedy action tensor([-1.2709, -0.6644, 0.3459, -0.0632]) tensor([0.0892, 0.1635, 0.4490, 0.2983]) -Greedy action tensor([-0.8997, -0.5829, 0.1212, 0.6724]) tensor([0.1004, 0.1378, 0.2785, 0.4834]) -Greedy action tensor([-0.3853, -1.1397, -0.0601, 0.7409]) tensor([0.1684, 0.0792, 0.2331, 0.5193]) -Greedy action tensor([-0.3626, -1.0942, 0.6166, -0.7673]) tensor([0.2079, 0.1000, 0.5534, 0.1387]) -Greedy action tensor([-0.7637, -1.0052, -0.4424, 2.1646]) tensor([0.0457, 0.0359, 0.0631, 0.8552]) -Greedy action tensor([ 0.9252, -1.6261, 1.1310, -0.1801]) tensor([0.3791, 0.0296, 0.4658, 0.1255]) -Greedy action tensor([ 0.9894, -1.0429, 1.2440, 1.1535]) tensor([0.2778, 0.0364, 0.3584, 0.3274]) -Greedy action tensor([ 0.2429, -0.7392, 0.5710, 0.4546]) tensor([0.2501, 0.0937, 0.3472, 0.3091]) -Greedy action tensor([ 0.9102, -0.1107, -0.7111, 0.4151]) tensor([0.4614, 0.1662, 0.0912, 0.2812]) -Greedy action tensor([ 0.6203, -0.4319, 0.0057, 0.8159]) tensor([0.3220, 0.1124, 0.1741, 0.3915]) -Greedy action tensor([0.3310, 0.3394, 0.4620, 1.1615]) tensor([0.1837, 0.1853, 0.2094, 0.4216]) -Greedy action tensor([ 1.4760, -0.6959, 1.3080, 0.9439]) tensor([0.3927, 0.0448, 0.3320, 0.2306]) -Greedy action tensor([ 0.7166, -0.3542, -0.8745, 1.8003]) tensor([0.2221, 0.0761, 0.0452, 0.6565]) -Greedy action tensor([ 0.1435, -0.1298, -0.0663, 0.5801]) tensor([0.2428, 0.1847, 0.1968, 0.3757]) -Greedy action tensor([ 1.4953, -0.4260, 0.8925, 0.8792]) tensor([0.4477, 0.0655, 0.2450, 0.2418]) -Greedy action tensor([-0.6249, -0.2798, 0.7713, -0.4067]) tensor([0.1299, 0.1835, 0.5250, 0.1616]) -Greedy action tensor([ 0.9717, -0.7397, -0.7293, 0.4372]) tensor([0.5131, 0.0927, 0.0936, 0.3006]) -Greedy action tensor([ 1.0000, 0.5085, 0.4448, -0.2248]) tensor([0.4033, 0.2467, 0.2315, 0.1185]) -Greedy action tensor([ 1.4751, -1.1471, 0.6596, 0.4278]) tensor([0.5359, 0.0389, 0.2371, 0.1881]) -Greedy action tensor([0.2992, 0.1331, 0.0497, 1.6297]) tensor([0.1560, 0.1321, 0.1216, 0.5903]) -Greedy action tensor([ 1.0650, -1.5617, 0.7896, 0.0633]) tensor([0.4548, 0.0329, 0.3453, 0.1670]) -Greedy action tensor([ 0.3412, -0.4850, 0.2597, -0.1135]) tensor([0.3340, 0.1462, 0.3078, 0.2120]) -Greedy action tensor([ 0.3223, 0.8502, 0.5890, -0.5173]) tensor([0.2256, 0.3824, 0.2945, 0.0974]) -Greedy action tensor([ 0.7459, -0.8468, 0.8481, 1.1342]) tensor([0.2642, 0.0537, 0.2926, 0.3895]) -Greedy action tensor([ 1.2663, -1.1361, 0.9278, 1.1213]) tensor([0.3748, 0.0339, 0.2671, 0.3242]) -Greedy action tensor([-0.6574, -0.9589, 0.2482, 0.3225]) tensor([0.1454, 0.1076, 0.3596, 0.3874]) -Greedy action tensor([-0.0532, -0.2081, -0.3176, -0.3266]) tensor([0.2954, 0.2530, 0.2268, 0.2248]) -Greedy action tensor([-0.3033, -0.6614, 0.2735, 1.1987]) tensor([0.1255, 0.0877, 0.2234, 0.5634]) -Greedy action tensor([-0.1517, -0.8048, 0.9334, 0.1531]) tensor([0.1713, 0.0892, 0.5071, 0.2324]) -Greedy action tensor([ 0.4076, -0.2471, 1.1512, -0.0214]) tensor([0.2339, 0.1216, 0.4921, 0.1523]) -Greedy action tensor([ 1.3668, -0.0438, -0.9155, 1.1783]) tensor([0.4599, 0.1122, 0.0469, 0.3809]) -Greedy action tensor([ 0.5378, -1.0338, 0.4284, -0.0413]) tensor([0.3753, 0.0780, 0.3364, 0.2103]) -Greedy action tensor([ 1.0839, -0.9416, -0.0631, 0.5631]) tensor([0.4893, 0.0646, 0.1554, 0.2907]) -Greedy action tensor([-0.6762, 0.0012, -1.0317, -0.0091]) tensor([0.1780, 0.3504, 0.1247, 0.3468]) -Greedy action tensor([-0.7866, -1.0020, 0.5998, 0.3544]) tensor([0.1119, 0.0902, 0.4477, 0.3502]) -Greedy action tensor([ 0.5893, -1.4438, -0.3756, 0.0324]) tensor([0.4796, 0.0628, 0.1827, 0.2748]) -Greedy action tensor([ 0.5971, -1.2986, 0.2891, 0.2886]) tensor([0.3817, 0.0573, 0.2805, 0.2804]) -Greedy action tensor([ 0.4097, 0.3978, -0.0171, 0.8760]) tensor([0.2361, 0.2333, 0.1541, 0.3764]) -Greedy action tensor([ 0.6248, -1.3274, 0.4444, 0.5129]) tensor([0.3483, 0.0494, 0.2908, 0.3114]) -Greedy action tensor([ 0.9207, -0.0613, 0.8395, 1.0163]) tensor([0.2944, 0.1103, 0.2714, 0.3239]) -Greedy action tensor([ 1.0483, -0.5405, 0.9570, -0.5081]) tensor([0.4296, 0.0877, 0.3921, 0.0906]) -Greedy action tensor([ 0.0579, -1.8014, -0.0239, 0.1851]) tensor([0.3113, 0.0485, 0.2868, 0.3535]) -Greedy action tensor([-1.4423, -2.1327, 0.9666, 0.1786]) tensor([0.0566, 0.0284, 0.6290, 0.2861]) -Greedy action tensor([-0.5705, -1.3031, -0.1838, -0.2619]) tensor([0.2318, 0.1114, 0.3412, 0.3156]) -Greedy action tensor([1.6812, 0.3584, 0.7779, 0.6832]) tensor([0.4902, 0.1306, 0.1986, 0.1807]) -Greedy action tensor([ 0.6010, -0.5970, 0.9292, 0.6445]) tensor([0.2678, 0.0808, 0.3718, 0.2797]) -Greedy action tensor([1.3521, 0.4258, 0.7836, 0.7252]) tensor([0.4005, 0.1586, 0.2269, 0.2140]) -Greedy action tensor([ 0.4038, -0.0659, -0.6337, 1.4667]) tensor([0.2051, 0.1283, 0.0727, 0.5939]) -Greedy action tensor([-0.9200, -0.6520, 0.1694, -0.4844]) tensor([0.1465, 0.1915, 0.4355, 0.2265]) -Greedy action tensor([ 2.3725, -0.3327, 0.2857, 1.4602]) tensor([0.6279, 0.0420, 0.0779, 0.2522]) -Greedy action tensor([1.3732, 0.5521, 0.3954, 1.4992]) tensor([0.3389, 0.1491, 0.1275, 0.3845]) -Greedy action tensor([ 1.1201, -1.0519, -0.1537, 0.7734]) tensor([0.4760, 0.0542, 0.1332, 0.3366]) -Greedy action tensor([ 1.1965, -0.7609, -0.0505, 0.6675]) tensor([0.4956, 0.0700, 0.1424, 0.2920]) -Greedy action tensor([-0.1751, -0.8253, 1.8563, 0.0593]) tensor([0.0960, 0.0501, 0.7324, 0.1214]) -Greedy action tensor([ 1.0281, -0.2409, -0.1669, 2.2322]) tensor([0.2033, 0.0572, 0.0616, 0.6779]) -Greedy action tensor([-0.8531, -1.4098, -0.6646, 1.2635]) tensor([0.0902, 0.0517, 0.1089, 0.7491]) -Greedy action tensor([-0.0308, -1.1846, 1.2721, -1.1391]) tensor([0.1878, 0.0592, 0.6910, 0.0620]) -Greedy action tensor([-1.1217, -0.5854, -1.0536, -0.2916]) tensor([0.1646, 0.2815, 0.1762, 0.3776]) -Greedy action tensor([ 0.5127, -0.9976, 1.2998, -0.0256]) tensor([0.2499, 0.0552, 0.5490, 0.1459]) -Greedy action tensor([ 2.0239, -0.5782, 1.2599, 0.8705]) tensor([0.5389, 0.0399, 0.2511, 0.1701]) -Greedy action tensor([ 0.5209, -1.8088, 0.7720, 0.0611]) tensor([0.3318, 0.0323, 0.4265, 0.2095]) -Greedy action tensor([-0.4173, 0.3843, 0.8761, -0.0223]) tensor([0.1196, 0.2667, 0.4361, 0.1776]) -Greedy action tensor([ 0.4758, -2.6304, -0.0454, 1.1576]) tensor([0.2765, 0.0124, 0.1642, 0.5468]) -Greedy action tensor([ 1.1886, -1.5784, -0.0226, 0.8456]) tensor([0.4830, 0.0304, 0.1439, 0.3428]) -Greedy action tensor([ 0.1393, 0.3079, 0.9752, -0.2536]) tensor([0.1936, 0.2291, 0.4466, 0.1307]) -Greedy action tensor([-0.7133, -1.2019, 0.3859, 0.3965]) tensor([0.1307, 0.0802, 0.3924, 0.3966]) -Greedy action tensor([-0.2551, -0.5453, 1.2968, 0.9783]) tensor([0.1010, 0.0756, 0.4767, 0.3467]) -Greedy action tensor([ 9.4023e-04, -1.2853e+00, -2.7919e-01, 7.7471e-01]) tensor([0.2381, 0.0658, 0.1799, 0.5162]) -Greedy action tensor([-0.0499, -0.6525, 0.9146, 0.2690]) tensor([0.1803, 0.0987, 0.4730, 0.2480]) -Greedy action tensor([-0.0669, -1.6489, 0.0109, 0.8346]) tensor([0.2105, 0.0433, 0.2276, 0.5186]) -Greedy action tensor([ 0.9351, -0.3238, 0.2007, 1.3122]) tensor([0.3104, 0.0881, 0.1489, 0.4526]) -Greedy action tensor([ 1.2885, -0.1535, -0.1475, 0.5913]) tensor([0.5070, 0.1199, 0.1206, 0.2525]) -Greedy action tensor([ 0.8345, -1.0233, 0.3212, 0.2042]) tensor([0.4373, 0.0682, 0.2617, 0.2328]) -Greedy action tensor([-0.4829, -0.6205, 0.3641, 0.2670]) tensor([0.1582, 0.1379, 0.3690, 0.3349]) -Greedy action tensor([ 0.0942, -1.2490, 0.1675, -1.0014]) tensor([0.3743, 0.0977, 0.4028, 0.1252]) -Greedy action tensor([ 0.8881, -2.3298, -0.3239, 0.9351]) tensor([0.4192, 0.0168, 0.1247, 0.4393]) -Greedy action tensor([ 1.5658, -1.0993, 0.9700, 0.3804]) tensor([0.5191, 0.0361, 0.2861, 0.1587]) -Greedy action tensor([-0.2837, 0.1494, 0.3940, -1.4163]) tensor([0.2069, 0.3190, 0.4074, 0.0667]) -Greedy action tensor([ 0.8082, -0.4707, -0.3220, 1.8810]) tensor([0.2210, 0.0615, 0.0714, 0.6461]) -Greedy action tensor([ 0.6230, -1.7705, 0.4056, 0.8839]) tensor([0.3131, 0.0286, 0.2519, 0.4064]) -Greedy action tensor([ 0.7132, -1.4270, -1.0925, 0.3463]) tensor([0.5064, 0.0596, 0.0832, 0.3508]) -Greedy action tensor([-0.3045, -0.3307, 2.0678, -0.3579]) tensor([0.0733, 0.0714, 0.7858, 0.0695]) -Greedy action tensor([ 1.0144, -0.1368, 0.8772, 1.6471]) tensor([0.2457, 0.0777, 0.2142, 0.4625]) -Greedy action tensor([ 0.7704, -0.2289, 0.0499, -0.2956]) tensor([0.4547, 0.1674, 0.2212, 0.1566]) -Greedy action tensor([ 0.2009, 0.0245, 0.0038, -0.0391]) tensor([0.2902, 0.2433, 0.2383, 0.2283]) -Greedy action tensor([ 0.9059, -0.5149, -0.0554, -0.5799]) tensor([0.5405, 0.1305, 0.2067, 0.1223]) -Greedy action tensor([ 0.8162, -0.5223, -0.0022, -0.3962]) tensor([0.4998, 0.1311, 0.2205, 0.1487]) -Greedy action tensor([ 0.5920, -0.2416, -0.0769, -0.2467]) tensor([0.4204, 0.1826, 0.2153, 0.1817]) -Greedy action tensor([ 1.1263, -0.6848, -0.0223, -0.4565]) tensor([0.5931, 0.0970, 0.1881, 0.1218]) -Greedy action tensor([ 0.5770, -0.1852, -0.2026, -0.2848]) tensor([0.4260, 0.1988, 0.1953, 0.1799]) -Greedy action tensor([ 0.3402, 0.0780, 0.0939, -0.3492]) tensor([0.3276, 0.2520, 0.2560, 0.1644]) -Greedy action tensor([ 0.7322, -0.1654, -0.0089, -0.3790]) tensor([0.4518, 0.1841, 0.2153, 0.1487]) -Greedy action tensor([ 1.0363, -0.7901, -0.1466, -0.8064]) tensor([0.6151, 0.0990, 0.1885, 0.0974]) -Greedy action tensor([ 0.4533, -0.0447, 0.0479, -0.1619]) tensor([0.3552, 0.2159, 0.2368, 0.1920]) -Greedy action tensor([ 0.5954, -0.2043, -0.0306, -0.2377]) tensor([0.4134, 0.1858, 0.2211, 0.1797]) -Greedy action tensor([ 0.4597, -0.0930, -0.1097, -0.1814]) tensor([0.3748, 0.2157, 0.2121, 0.1974]) -Greedy action tensor([ 0.7820, -0.5254, -0.0485, -0.4821]) tensor([0.5028, 0.1360, 0.2191, 0.1420]) -Greedy action tensor([ 0.6975, -0.2410, 0.0081, -0.3103]) tensor([0.4429, 0.1732, 0.2222, 0.1616]) -Greedy action tensor([ 0.7547, -0.3183, -0.0228, -0.4916]) tensor([0.4787, 0.1637, 0.2200, 0.1376]) -Greedy action tensor([ 0.6972, -0.3909, -0.0884, -0.2226]) tensor([0.4563, 0.1537, 0.2080, 0.1819]) -Greedy action tensor([ 0.7901, -0.5286, -0.0363, -0.4776]) tensor([0.5034, 0.1346, 0.2203, 0.1417]) -Greedy action tensor([ 0.6507, -0.4309, -0.1956, -0.3523]) tensor([0.4684, 0.1588, 0.2010, 0.1718]) -Greedy action tensor([ 0.6453, -0.2183, -0.1610, -0.2294]) tensor([0.4376, 0.1845, 0.1954, 0.1825]) -Greedy action tensor([ 0.6214, -0.1489, -0.0723, -0.0535]) tensor([0.4046, 0.1873, 0.2022, 0.2060]) -Greedy action tensor([ 0.8086, -0.5538, -0.0165, -0.3803]) tensor([0.5003, 0.1281, 0.2192, 0.1524]) -Greedy action tensor([ 0.6576, -0.3415, -0.0320, -0.1759]) tensor([0.4339, 0.1598, 0.2177, 0.1886]) -Greedy action tensor([ 0.8246, -0.7641, -0.0363, -0.3196]) tensor([0.5140, 0.1050, 0.2173, 0.1637]) -Greedy action tensor([ 0.9644, -0.5906, 0.0492, -0.3804]) tensor([0.5341, 0.1128, 0.2139, 0.1392]) -Greedy action tensor([ 1.1197, -1.3318, -0.0180, -0.6941]) tensor([0.6370, 0.0549, 0.2042, 0.1039]) -Greedy action tensor([ 0.7675, -0.7757, -0.0111, -0.3770]) tensor([0.5022, 0.1073, 0.2306, 0.1599]) -Greedy action tensor([ 0.7907, -0.4683, -0.1110, -0.4682]) tensor([0.5066, 0.1438, 0.2056, 0.1439]) -Greedy action tensor([ 0.5897, -0.0894, -0.0141, -0.4872]) tensor([0.4176, 0.2118, 0.2283, 0.1423]) -Greedy action tensor([ 1.0366, -0.9250, 0.0412, -0.4369]) tensor([0.5749, 0.0809, 0.2125, 0.1317]) -Greedy action tensor([ 0.5624, -0.4009, -0.0351, -0.3940]) tensor([0.4318, 0.1648, 0.2375, 0.1659]) -Greedy action tensor([ 0.6098, -0.3236, 0.0025, -0.4285]) tensor([0.4363, 0.1716, 0.2377, 0.1545]) -Greedy action tensor([ 0.5663, -0.0303, 0.1055, -0.2442]) tensor([0.3808, 0.2097, 0.2402, 0.1693]) -Greedy action tensor([ 0.2309, 0.0853, 0.0765, -0.1691]) tensor([0.2948, 0.2549, 0.2526, 0.1976]) -Greedy action tensor([ 0.3322, -0.1300, -0.0843, -0.4370]) tensor([0.3633, 0.2288, 0.2395, 0.1683]) -Greedy action tensor([ 0.6336, -0.4668, -0.0708, -0.2600]) tensor([0.4472, 0.1488, 0.2211, 0.1830]) -Greedy action tensor([ 0.7158, -0.5065, 0.1014, -0.3674]) tensor([0.4600, 0.1355, 0.2488, 0.1557]) -Greedy action tensor([ 0.7311, -0.3048, 0.0474, -0.4934]) tensor([0.4643, 0.1648, 0.2344, 0.1365]) -Greedy action tensor([ 0.5790, 0.0590, 0.0471, -0.1213]) tensor([0.3734, 0.2220, 0.2193, 0.1853]) -Greedy action tensor([ 0.7060, -0.4420, 0.1221, -0.4073]) tensor([0.4538, 0.1440, 0.2531, 0.1491]) -Greedy action tensor([ 0.5877, -0.0773, 0.0940, -0.4139]) tensor([0.4013, 0.2064, 0.2449, 0.1474]) -Greedy action tensor([ 0.7724, -0.3841, 0.0759, -0.3947]) tensor([0.4708, 0.1481, 0.2346, 0.1465]) -Greedy action tensor([ 0.5294, -0.3274, 0.1837, -0.3695]) tensor([0.3938, 0.1672, 0.2787, 0.1603]) -Greedy action tensor([ 1.0616, -0.8549, -0.0908, -0.4597]) tensor([0.5947, 0.0875, 0.1879, 0.1299]) -Greedy action tensor([ 0.7482, -0.4501, -0.0160, -0.3578]) tensor([0.4766, 0.1438, 0.2219, 0.1577]) -Greedy action tensor([ 0.8806, -0.3400, 0.0355, -0.4405]) tensor([0.5021, 0.1482, 0.2157, 0.1340]) -Greedy action tensor([ 0.6925, -0.6215, -0.0813, -0.4210]) tensor([0.4858, 0.1306, 0.2241, 0.1595]) -Greedy action tensor([ 0.5539, -0.2808, -0.0705, -0.3637]) tensor([0.4221, 0.1832, 0.2261, 0.1686]) -Greedy action tensor([ 0.0290, 0.0655, -0.0376, -0.0887]) tensor([0.2590, 0.2686, 0.2423, 0.2302]) -Greedy action tensor([ 0.7305, -0.4652, -0.0413, -0.5907]) tensor([0.4922, 0.1489, 0.2275, 0.1313]) -Greedy action tensor([ 0.6131, -0.3879, 0.0586, -0.2762]) tensor([0.4250, 0.1562, 0.2441, 0.1747]) -Greedy action tensor([ 0.5280, -0.4900, -0.1212, -0.2239]) tensor([0.4246, 0.1534, 0.2218, 0.2002]) -Greedy action tensor([ 0.5696, -0.3039, -0.0645, -0.3610]) tensor([0.4269, 0.1782, 0.2265, 0.1684]) -Greedy action tensor([ 1.0306, -0.4586, -0.0503, -0.4741]) tensor([0.5596, 0.1262, 0.1899, 0.1243]) -Greedy action tensor([ 0.6064, -0.4067, -0.1029, -0.3728]) tensor([0.4483, 0.1628, 0.2205, 0.1684]) -Greedy action tensor([ 0.3855, -0.1263, -0.1465, -0.2571]) tensor([0.3686, 0.2210, 0.2165, 0.1939]) -Greedy action tensor([ 0.6679, -0.4755, -0.1862, -0.3606]) tensor([0.4758, 0.1516, 0.2025, 0.1701]) -Greedy action tensor([ 0.4602, -0.2809, -0.0214, -0.4005]) tensor([0.3973, 0.1893, 0.2454, 0.1680]) -Greedy action tensor([ 0.8222, -0.6974, 0.0360, -0.5250]) tensor([0.5170, 0.1131, 0.2355, 0.1344]) -Greedy action tensor([ 0.4584, -0.1408, 0.1005, -0.1729]) tensor([0.3597, 0.1975, 0.2515, 0.1913]) -Greedy action tensor([ 0.6631, -0.5900, -0.1104, -0.4112]) tensor([0.4788, 0.1367, 0.2209, 0.1635]) -Greedy action tensor([ 0.7169, -0.4440, 0.1416, -0.3190]) tensor([0.4483, 0.1404, 0.2522, 0.1591]) -Greedy action tensor([ 0.8461, -0.6630, -0.1934, -0.5762]) tensor([0.5507, 0.1218, 0.1947, 0.1328]) -Greedy action tensor([ 0.6142, -0.4606, -0.0035, -0.3003]) tensor([0.4384, 0.1496, 0.2364, 0.1757]) -Greedy action tensor([ 0.6253, -0.3128, 0.0435, -0.1659]) tensor([0.4160, 0.1628, 0.2325, 0.1886]) -Greedy action tensor([ 0.5805, 0.0181, -0.0314, -0.0629]) tensor([0.3791, 0.2160, 0.2056, 0.1992]) -Greedy action tensor([ 0.7766, -0.7374, -0.1188, -0.1994]) tensor([0.4987, 0.1097, 0.2037, 0.1879]) -Greedy action tensor([ 0.7060, -0.5552, 0.0086, -0.4405]) tensor([0.4764, 0.1350, 0.2372, 0.1514]) -Greedy action tensor([ 0.3720, -0.3265, 0.1140, -0.3173]) tensor([0.3608, 0.1794, 0.2787, 0.1811]) -Greedy action tensor([ 0.7586, -0.5242, -0.0563, -0.3682]) tensor([0.4892, 0.1356, 0.2166, 0.1585]) -Greedy action tensor([ 0.7853, -0.4557, -0.1072, -0.3700]) tensor([0.4966, 0.1436, 0.2034, 0.1564]) -Greedy action tensor([ 0.5431, -0.1357, -0.0389, -0.1936]) tensor([0.3930, 0.1993, 0.2196, 0.1881]) -Greedy action tensor([ 0.9437, -0.6009, -0.0088, -0.4868]) tensor([0.5440, 0.1161, 0.2098, 0.1301]) -Greedy action tensor([ 0.7527, -0.3667, 0.1276, -0.3758]) tensor([0.4576, 0.1494, 0.2449, 0.1480]) -Greedy action tensor([ 0.6387, -0.4501, 0.1000, -0.3869]) tensor([0.4389, 0.1477, 0.2561, 0.1574]) -Greedy action tensor([ 0.2327, -0.2225, -0.1834, -0.2936]) tensor([0.3466, 0.2199, 0.2286, 0.2048]) -Greedy action tensor([ 0.4575, 0.0760, -0.0529, -0.1523]) tensor([0.3538, 0.2416, 0.2124, 0.1923]) -Greedy action tensor([ 0.6722, -0.5200, -0.0955, -0.3757]) tensor([0.4721, 0.1433, 0.2191, 0.1656]) -Greedy action tensor([ 0.5304, -0.4896, 0.0645, -0.7347]) tensor([0.4405, 0.1588, 0.2764, 0.1243]) -Greedy action tensor([ 0.5021, -0.3639, -0.0573, -0.3554]) tensor([0.4138, 0.1741, 0.2365, 0.1756]) -Greedy action tensor([ 0.5785, -0.1679, -0.1055, -0.4274]) tensor([0.4265, 0.2022, 0.2152, 0.1560]) -Greedy action tensor([ 1.1163, -0.1533, -0.6486, 0.4442]) tensor([0.5095, 0.1431, 0.0872, 0.2602]) -Greedy action tensor([ 1.3184, -0.3836, -0.3802, 0.2280]) tensor([0.5878, 0.1072, 0.1075, 0.1975]) -Greedy action tensor([ 1.6740, -0.2086, -0.3470, 0.1311]) tensor([0.6673, 0.1016, 0.0884, 0.1427]) -Greedy action tensor([ 1.2798, -0.3294, -0.6550, 0.4301]) tensor([0.5643, 0.1129, 0.0815, 0.2413]) -Greedy action tensor([ 1.4583, -0.1061, -0.2539, 0.4117]) tensor([0.5744, 0.1202, 0.1037, 0.2017]) -Greedy action tensor([ 0.7770, -0.2986, -0.2426, 0.1962]) tensor([0.4422, 0.1508, 0.1595, 0.2474]) -Greedy action tensor([ 1.4594, -0.3843, -0.4782, 0.0344]) tensor([0.6482, 0.1026, 0.0934, 0.1559]) -Greedy action tensor([ 1.1306, -0.2419, -0.1789, 0.1679]) tensor([0.5249, 0.1330, 0.1417, 0.2004]) -Greedy action tensor([ 1.6751, -0.2251, -0.7008, 0.6492]) tensor([0.6246, 0.0934, 0.0580, 0.2239]) -Greedy action tensor([ 0.9951, -0.0819, -0.4582, 0.0108]) tensor([0.5133, 0.1748, 0.1200, 0.1918]) -Greedy action tensor([ 1.2754, -0.5469, -0.1031, 0.2243]) tensor([0.5672, 0.0917, 0.1429, 0.1982]) -Greedy action tensor([ 0.7122, -0.2994, -0.1958, -0.0362]) tensor([0.4464, 0.1623, 0.1801, 0.2112]) -Greedy action tensor([ 0.8786, -0.6138, -0.2339, 0.1163]) tensor([0.4950, 0.1113, 0.1627, 0.2310]) -Greedy action tensor([ 0.3218, -0.3386, 0.0271, -0.1256]) tensor([0.3448, 0.1781, 0.2568, 0.2204]) -Greedy action tensor([ 0.5992, -0.2298, -0.0084, 0.0746]) tensor([0.3887, 0.1696, 0.2117, 0.2300]) -Greedy action tensor([ 1.2544, -0.5271, -0.1232, 0.5488]) tensor([0.5224, 0.0880, 0.1317, 0.2579]) -Greedy action tensor([ 0.8667, -0.4976, -0.5293, 0.5882]) tensor([0.4425, 0.1131, 0.1096, 0.3349]) -Greedy action tensor([ 1.7359, -0.0418, -0.5298, 0.0890]) tensor([0.6824, 0.1153, 0.0708, 0.1315]) -Greedy action tensor([ 1.2619, -0.4183, -0.2020, 0.3214]) tensor([0.5531, 0.1031, 0.1279, 0.2159]) -Greedy action tensor([ 1.0991, -0.4314, -0.2672, 0.1843]) tensor([0.5342, 0.1156, 0.1362, 0.2140]) -Greedy action tensor([ 1.4176, -0.6829, -0.2125, 0.1999]) tensor([0.6195, 0.0758, 0.1214, 0.1833]) -Greedy action tensor([ 1.5964, -0.3886, -0.3357, 0.1931]) tensor([0.6545, 0.0899, 0.0948, 0.1609]) -Greedy action tensor([ 1.2832, -0.5003, -0.1043, 0.2975]) tensor([0.5584, 0.0938, 0.1394, 0.2084]) -Greedy action tensor([ 0.5832, -0.1138, -0.0342, 0.0913]) tensor([0.3775, 0.1880, 0.2036, 0.2308]) -Greedy action tensor([ 1.8519, -0.4068, -0.4069, 0.5673]) tensor([0.6731, 0.0703, 0.0703, 0.1863]) -Greedy action tensor([ 0.7059, -0.3330, -0.2366, -0.1280]) tensor([0.4592, 0.1625, 0.1789, 0.1994]) -Greedy action tensor([ 0.7701, -0.3458, 0.1442, -0.0634]) tensor([0.4354, 0.1426, 0.2328, 0.1892]) -Greedy action tensor([ 0.5360, -0.4946, -0.1824, 0.0188]) tensor([0.4097, 0.1462, 0.1998, 0.2443]) -Greedy action tensor([ 1.1497, -0.3092, -0.2335, -0.0259]) tensor([0.5581, 0.1297, 0.1400, 0.1722]) -Greedy action tensor([ 1.6675, -0.3990, -0.2900, 0.0185]) tensor([0.6849, 0.0867, 0.0967, 0.1317]) -Greedy action tensor([ 0.8059, -0.0160, 0.3580, -0.2111]) tensor([0.4098, 0.1801, 0.2618, 0.1482]) -Greedy action tensor([ 0.8980, -0.0233, -0.1461, 0.1284]) tensor([0.4518, 0.1798, 0.1590, 0.2093]) -Greedy action tensor([ 1.1080, -0.1702, -0.1756, 0.3691]) tensor([0.4918, 0.1370, 0.1363, 0.2349]) -Greedy action tensor([ 1.1809, -0.1875, 0.0500, -0.0513]) tensor([0.5351, 0.1362, 0.1727, 0.1560]) -Greedy action tensor([ 1.1314, -0.4543, -0.2324, 0.2747]) tensor([0.5305, 0.1086, 0.1356, 0.2252]) -Greedy action tensor([ 1.4060, -0.3959, -0.1858, 0.3614]) tensor([0.5813, 0.0959, 0.1183, 0.2045]) -Greedy action tensor([ 2.0248, -0.2234, 0.1204, 0.2190]) tensor([0.7048, 0.0744, 0.1050, 0.1158]) -Greedy action tensor([ 0.7770, -0.1678, -0.1577, -0.1768]) tensor([0.4615, 0.1794, 0.1812, 0.1778]) -Greedy action tensor([ 0.9586, -0.2609, -0.0227, -0.1994]) tensor([0.5040, 0.1489, 0.1889, 0.1583]) -Greedy action tensor([ 1.0908, -0.5366, -0.6051, 0.4529]) tensor([0.5240, 0.1029, 0.0961, 0.2769]) -Greedy action tensor([ 1.1753, -0.7087, -0.2253, 0.1556]) tensor([0.5685, 0.0864, 0.1401, 0.2051]) -Greedy action tensor([ 0.9686, -0.4255, -0.6857, 0.2181]) tensor([0.5232, 0.1298, 0.1000, 0.2470]) -Greedy action tensor([ 1.1505, -0.0559, -0.2211, 0.1258]) tensor([0.5230, 0.1565, 0.1327, 0.1877]) -Greedy action tensor([ 1.6694, -0.7572, -0.0568, 0.1328]) tensor([0.6750, 0.0596, 0.1201, 0.1452]) -Greedy action tensor([ 1.7752, -0.0044, -0.4100, 0.2272]) tensor([0.6694, 0.1129, 0.0753, 0.1424]) -Greedy action tensor([ 1.0124, -0.2695, -0.0366, 0.0430]) tensor([0.4982, 0.1383, 0.1745, 0.1890]) -Greedy action tensor([ 1.5122, -0.5592, -0.4364, 0.5551]) tensor([0.6051, 0.0763, 0.0862, 0.2324]) -Greedy action tensor([ 1.2294, -0.5203, -0.2979, 0.1536]) tensor([0.5774, 0.1004, 0.1254, 0.1969]) -Greedy action tensor([ 1.0549, -0.4277, -0.1695, 0.1502]) tensor([0.5193, 0.1179, 0.1526, 0.2102]) -Greedy action tensor([ 0.4811, -0.5101, 0.0562, 0.1549]) tensor([0.3641, 0.1351, 0.2380, 0.2628]) -Greedy action tensor([ 1.3677e+00, 1.9062e-04, -6.6034e-01, -2.3363e-01]) tensor([0.6297, 0.1604, 0.0829, 0.1270]) -Greedy action tensor([ 1.9348, -0.0175, 0.0733, 0.2435]) tensor([0.6749, 0.0958, 0.1049, 0.1244]) -Greedy action tensor([ 1.9008, -0.4268, -0.4403, 0.3636]) tensor([0.7099, 0.0692, 0.0683, 0.1526]) -Greedy action tensor([ 1.4162, -0.0214, -0.2685, 0.0310]) tensor([0.5976, 0.1419, 0.1109, 0.1496]) -Greedy action tensor([ 0.9911, -0.3933, 0.1245, 0.0253]) tensor([0.4874, 0.1221, 0.2049, 0.1856]) -Greedy action tensor([ 1.2377, -0.7031, -0.2457, 0.7713]) tensor([0.5006, 0.0719, 0.1136, 0.3140]) -Greedy action tensor([ 1.3156, -0.3015, -0.5306, 0.2903]) tensor([0.5831, 0.1157, 0.0920, 0.2091]) -Greedy action tensor([ 1.1867, -0.2089, -0.4113, 0.2042]) tensor([0.5481, 0.1358, 0.1109, 0.2052]) -Greedy action tensor([ 2.0392, -0.8341, 0.1190, 0.4520]) tensor([0.7104, 0.0401, 0.1041, 0.1453]) -Greedy action tensor([ 1.6304, -0.7988, -0.0641, 0.1195]) tensor([0.6700, 0.0590, 0.1231, 0.1479]) -Greedy action tensor([1.4178, 0.0142, 0.0901, 0.0856]) tensor([0.5635, 0.1385, 0.1494, 0.1487]) -Greedy action tensor([ 1.3640, -0.5795, -0.1847, 0.1455]) tensor([0.6055, 0.0867, 0.1287, 0.1790]) -Greedy action tensor([ 0.6397, -0.2917, -0.3436, -0.1547]) tensor([0.4505, 0.1775, 0.1685, 0.2035]) -Greedy action tensor([ 1.2646, -0.4664, -0.0747, 0.3273]) tensor([0.5462, 0.0967, 0.1431, 0.2139]) -Greedy action tensor([ 1.5003, -0.5488, -0.2558, 0.1698]) tensor([0.6386, 0.0823, 0.1103, 0.1688]) -Greedy action tensor([ 1.0646, -0.3531, -0.3331, -0.5589]) tensor([0.5929, 0.1436, 0.1465, 0.1169]) -Greedy action tensor([ 0.7080, -0.4238, -0.1218, 0.0407]) tensor([0.4402, 0.1419, 0.1920, 0.2259]) -Greedy action tensor([ 0.9400, -0.2156, -0.1352, 0.6537]) tensor([0.4154, 0.1308, 0.1418, 0.3120]) -Greedy action tensor([ 0.9021, -0.4256, -0.2045, 0.1956]) tensor([0.4787, 0.1269, 0.1583, 0.2362]) -Greedy action tensor([ 1.1909, -0.3648, -0.5385, 0.3764]) tensor([0.5461, 0.1152, 0.0969, 0.2418]) -Greedy action tensor([ 0.8140, -0.4568, -0.1015, 0.3305]) tensor([0.4353, 0.1221, 0.1742, 0.2684]) -Greedy action tensor([ 1.5086, -0.5229, -0.5671, 0.0359]) tensor([0.6730, 0.0883, 0.0844, 0.1543]) -Greedy action tensor([ 1.0118, -0.1044, 0.0472, 0.1625]) tensor([0.4681, 0.1533, 0.1784, 0.2002]) -Greedy action tensor([ 1.5779, -0.2904, -0.1026, 0.7477]) tensor([0.5629, 0.0869, 0.1049, 0.2454]) -Greedy action tensor([ 1.4029, -0.1086, -0.3870, -0.1036]) tensor([0.6214, 0.1371, 0.1038, 0.1378]) -Greedy action tensor([ 1.9161, -0.4856, 0.0406, 0.4640]) tensor([0.6766, 0.0613, 0.1037, 0.1584]) -Greedy action tensor([ 0.8389, -0.1352, -0.0133, -0.1931]) tensor([0.4629, 0.1748, 0.1974, 0.1649]) -Greedy action tensor([ 1.6583, -0.2369, -0.5623, 0.2215]) tensor([0.6682, 0.1004, 0.0725, 0.1588]) -Greedy action tensor([ 0.7009, -0.5403, -0.2650, 0.2040]) tensor([0.4390, 0.1269, 0.1671, 0.2671]) -Greedy action tensor([ 1.7876, -0.3790, -0.1804, 0.1565]) tensor([0.6896, 0.0790, 0.0964, 0.1350]) -Greedy action tensor([ 0.5358, -0.0998, -0.0065, 0.0473]) tensor([0.3670, 0.1944, 0.2134, 0.2252]) -Greedy action tensor([-0.9105, -0.4328, 0.5041, 0.9494]) tensor([0.0760, 0.1226, 0.3129, 0.4884]) -Greedy action tensor([-1.9320, -0.4354, 0.6659, -0.1659]) tensor([0.0404, 0.1805, 0.5429, 0.2363]) -Greedy action tensor([-1.7770, -0.2754, 0.5504, -0.0548]) tensor([0.0469, 0.2104, 0.4804, 0.2623]) -Greedy action tensor([-1.8900, -0.4208, 0.6353, -0.1486]) tensor([0.0425, 0.1846, 0.5307, 0.2423]) -Greedy action tensor([-1.8875, -0.3713, 0.6266, -0.1339]) tensor([0.0422, 0.1923, 0.5216, 0.2438]) -Greedy action tensor([-1.0291, -0.6310, 0.2371, 0.1803]) tensor([0.1065, 0.1586, 0.3779, 0.3570]) -Greedy action tensor([-1.6208, -0.5124, 0.4920, 0.0555]) tensor([0.0567, 0.1717, 0.4687, 0.3029]) -Greedy action tensor([-1.9103, -0.4161, 0.6428, -0.1530]) tensor([0.0415, 0.1849, 0.5331, 0.2405]) -Greedy action tensor([-1.2118, -0.7095, 0.2910, 0.0287]) tensor([0.0943, 0.1558, 0.4238, 0.3260]) -Greedy action tensor([-1.8858, -0.4779, 0.6512, -0.1429]) tensor([0.0427, 0.1744, 0.5392, 0.2437]) -Greedy action tensor([-0.7163, -0.1730, 1.1954, 1.5020]) tensor([0.0535, 0.0922, 0.3622, 0.4921]) -Greedy action tensor([-1.7403, -0.4886, 0.5604, -0.0558]) tensor([0.0503, 0.1760, 0.5024, 0.2713]) -Greedy action tensor([-1.8379, -0.4409, 0.6182, -0.1045]) tensor([0.0447, 0.1808, 0.5214, 0.2531]) -Greedy action tensor([-1.7517, -0.4920, 0.5580, -0.0566]) tensor([0.0499, 0.1758, 0.5025, 0.2718]) -Greedy action tensor([-1.7930, -0.5112, 0.5832, -0.0957]) tensor([0.0480, 0.1730, 0.5168, 0.2621]) -Greedy action tensor([-1.4824, -0.4123, 0.7371, -0.6206]) tensor([0.0646, 0.1883, 0.5943, 0.1529]) -Greedy action tensor([-1.8447, -0.2722, 0.5971, -0.1089]) tensor([0.0435, 0.2096, 0.5000, 0.2468]) -Greedy action tensor([-0.9531, -0.4780, 1.0245, 1.4561]) tensor([0.0477, 0.0767, 0.3447, 0.5308]) -Greedy action tensor([-1.7582, -0.4118, 0.5622, -0.0839]) tensor([0.0491, 0.1888, 0.5000, 0.2620]) -Greedy action tensor([-1.9641, -0.7752, 1.0236, 0.3456]) tensor([0.0292, 0.0960, 0.5802, 0.2945]) -Greedy action tensor([-1.4207, -0.5714, 0.4047, 0.0593]) tensor([0.0718, 0.1678, 0.4453, 0.3152]) -Greedy action tensor([-1.3723, -0.4115, 1.1374, 1.1029]) tensor([0.0360, 0.0940, 0.4425, 0.4275]) -Greedy action tensor([-0.9401, -0.6311, 0.1663, 0.7451]) tensor([0.0928, 0.1264, 0.2805, 0.5004]) -Greedy action tensor([-1.6386, -0.4404, 0.6075, 0.1394]) tensor([0.0508, 0.1684, 0.4802, 0.3007]) -Greedy action tensor([-1.9156, -0.4284, 0.6518, -0.1586]) tensor([0.0412, 0.1825, 0.5374, 0.2390]) -Greedy action tensor([-1.8337, -0.4092, 0.5974, -0.1166]) tensor([0.0453, 0.1881, 0.5147, 0.2520]) -Greedy action tensor([-1.6772, -0.8536, 0.0452, -0.5935]) tensor([0.0845, 0.1926, 0.4731, 0.2498]) -Greedy action tensor([-1.8262, -0.4345, 0.6475, -0.0411]) tensor([0.0438, 0.1760, 0.5194, 0.2609]) -Greedy action tensor([-1.4608, -0.6725, 0.3954, -0.2774]) tensor([0.0777, 0.1710, 0.4975, 0.2538]) -Greedy action tensor([-1.8143, -0.4875, 0.6107, -0.0691]) tensor([0.0459, 0.1729, 0.5185, 0.2627]) -Greedy action tensor([-1.8033, -0.4246, 0.5914, -0.1020]) tensor([0.0467, 0.1854, 0.5120, 0.2559]) -Greedy action tensor([-1.5925, -0.4913, 0.4835, 0.1009]) tensor([0.0574, 0.1727, 0.4577, 0.3122]) -Greedy action tensor([-1.9024, -0.3764, 0.6523, -0.1417]) tensor([0.0412, 0.1894, 0.5299, 0.2395]) -Greedy action tensor([-1.2771, -0.6032, 0.4579, 0.5520]) tensor([0.0673, 0.1320, 0.3815, 0.4192]) -Greedy action tensor([-1.4393, 0.1614, 0.6082, -0.5197]) tensor([0.0617, 0.3057, 0.4779, 0.1547]) -Greedy action tensor([-0.5634, -0.2671, 0.9011, 1.5846]) tensor([0.0656, 0.0883, 0.2839, 0.5623]) -Greedy action tensor([-1.3274, -0.5420, 0.4292, 0.3195]) tensor([0.0705, 0.1547, 0.4086, 0.3662]) -Greedy action tensor([-1.8030, -0.2830, 0.5781, -0.1155]) tensor([0.0459, 0.2098, 0.4963, 0.2480]) -Greedy action tensor([-0.3854, 0.8411, 0.2871, 1.0778]) tensor([0.0936, 0.3190, 0.1833, 0.4041]) -Greedy action tensor([-0.9144, -0.4316, 1.0292, 1.3133]) tensor([0.0530, 0.0858, 0.3698, 0.4914]) -Greedy action tensor([-1.4810, -0.4902, 0.5225, 0.3181]) tensor([0.0583, 0.1570, 0.4323, 0.3524]) -Greedy action tensor([-1.4238, -0.4431, 0.5312, 0.3663]) tensor([0.0598, 0.1595, 0.4225, 0.3582]) -Greedy action tensor([-1.6829, -0.4913, 1.3168, 0.9742]) tensor([0.0259, 0.0852, 0.5198, 0.3690]) -Greedy action tensor([-1.9395, -0.4527, 0.6629, -0.1792]) tensor([0.0404, 0.1788, 0.5457, 0.2351]) -Greedy action tensor([-1.7753, -0.4450, 0.6921, 0.0518]) tensor([0.0439, 0.1660, 0.5174, 0.2727]) -Greedy action tensor([-1.6736, -0.4718, 0.6652, 0.2055]) tensor([0.0471, 0.1566, 0.4881, 0.3082]) -Greedy action tensor([-1.5708, 0.0863, 0.4037, -0.0153]) tensor([0.0550, 0.2884, 0.3961, 0.2605]) -Greedy action tensor([-1.0519, -0.6480, 0.3849, 0.5097]) tensor([0.0872, 0.1306, 0.3668, 0.4155]) -Greedy action tensor([-0.4108, -0.4118, 0.2008, 0.1477]) tensor([0.1789, 0.1787, 0.3297, 0.3127]) -Greedy action tensor([-1.2847, -0.1129, 0.3982, 0.4997]) tensor([0.0642, 0.2074, 0.3457, 0.3827]) -Greedy action tensor([-1.1719, 0.7967, 0.2441, 0.6715]) tensor([0.0538, 0.3850, 0.2216, 0.3397]) -Greedy action tensor([-1.4053, -0.6321, 0.4437, 0.1051]) tensor([0.0712, 0.1542, 0.4522, 0.3223]) -Greedy action tensor([-1.9026, -0.4387, 0.6533, -0.1438]) tensor([0.0416, 0.1800, 0.5365, 0.2418]) -Greedy action tensor([-1.9690, -0.7477, 0.3728, -0.1810]) tensor([0.0481, 0.1633, 0.5007, 0.2878]) -Greedy action tensor([-1.4922, -0.6208, 0.6135, -0.1384]) tensor([0.0646, 0.1545, 0.5307, 0.2502]) -Greedy action tensor([-1.8902, -0.4277, 0.6335, -0.1594]) tensor([0.0427, 0.1842, 0.5323, 0.2409]) -Greedy action tensor([-0.2396, -0.1549, 0.4036, 0.4332]) tensor([0.1681, 0.1829, 0.3197, 0.3293]) -Greedy action tensor([-0.5812, -0.2881, 0.9222, 1.5459]) tensor([0.0657, 0.0880, 0.2953, 0.5510]) -Greedy action tensor([-1.2989, -0.2236, 0.3202, 0.3646]) tensor([0.0701, 0.2056, 0.3541, 0.3702]) -Greedy action tensor([-1.9050, -0.4668, 0.6660, -0.1423]) tensor([0.0415, 0.1747, 0.5422, 0.2416]) -Greedy action tensor([-1.9108, -0.4333, 0.6547, -0.1500]) tensor([0.0413, 0.1810, 0.5374, 0.2403]) -Greedy action tensor([-0.8515, -0.5807, 0.1835, 0.3561]) tensor([0.1180, 0.1547, 0.3323, 0.3949]) -Greedy action tensor([-0.5277, -0.4998, 0.1826, 0.3849]) tensor([0.1526, 0.1569, 0.3104, 0.3801]) -Greedy action tensor([-1.9296, -0.7075, 0.4299, -0.2129]) tensor([0.0487, 0.1652, 0.5152, 0.2709]) -Greedy action tensor([-1.8752, -0.4419, 0.6262, -0.1395]) tensor([0.0434, 0.1818, 0.5289, 0.2459]) -Greedy action tensor([-1.9529, -0.5406, 0.6982, -0.1584]) tensor([0.0395, 0.1623, 0.5603, 0.2379]) -Greedy action tensor([-1.6920, -0.4819, 0.5370, -0.0960]) tensor([0.0538, 0.1805, 0.5001, 0.2656]) -Greedy action tensor([-1.3558, -0.5534, 0.6809, -0.2588]) tensor([0.0720, 0.1606, 0.5518, 0.2156]) -Greedy action tensor([-1.9258, -0.4345, 0.6566, -0.1671]) tensor([0.0409, 0.1815, 0.5405, 0.2372]) -Greedy action tensor([-1.8543, -0.4667, 0.6285, -0.1163]) tensor([0.0441, 0.1767, 0.5283, 0.2509]) -Greedy action tensor([-1.8925, -0.4531, 0.7192, -0.0086]) tensor([0.0393, 0.1659, 0.5359, 0.2588]) -Greedy action tensor([-1.9318, -0.4460, 0.6661, -0.1689]) tensor([0.0405, 0.1790, 0.5443, 0.2362]) -Greedy action tensor([-1.7938, -0.2133, 0.5551, -0.0857]) tensor([0.0458, 0.2223, 0.4794, 0.2526]) -Greedy action tensor([-1.2940, 0.4229, 0.3582, -0.1872]) tensor([0.0675, 0.3759, 0.3523, 0.2042]) -Greedy action tensor([-1.9270, -0.4068, 0.6533, -0.1691]) tensor([0.0407, 0.1861, 0.5372, 0.2360]) -Greedy action tensor([-4.3906e-01, 1.3911e-03, 1.0202e+00, 1.5896e+00]) tensor([0.0692, 0.1074, 0.2976, 0.5259]) -Greedy action tensor([-0.0913, 0.4110, 0.9196, 1.5520]) tensor([0.0946, 0.1563, 0.2599, 0.4892]) -Greedy action tensor([-1.7814, -0.4660, 0.5870, -0.1061]) tensor([0.0482, 0.1796, 0.5148, 0.2574]) -Greedy action tensor([-1.3486, -0.5573, 0.3407, 0.1915]) tensor([0.0753, 0.1660, 0.4076, 0.3511]) -Greedy action tensor([-1.6999, -0.5086, 0.5339, -0.0184]) tensor([0.0526, 0.1732, 0.4913, 0.2828]) -Greedy action tensor([-1.9001, -0.4727, 0.6475, -0.1485]) tensor([0.0422, 0.1758, 0.5389, 0.2431]) -Greedy action tensor([ 0.6517, -0.3766, 0.0623, -0.2423]) tensor([0.4308, 0.1541, 0.2390, 0.1762]) -Greedy action tensor([ 0.4965, 0.0269, -0.0411, -0.0447]) tensor([0.3582, 0.2240, 0.2093, 0.2085]) -Greedy action tensor([ 0.5611, -0.1362, -0.0947, -0.3079]) tensor([0.4104, 0.2044, 0.2130, 0.1721]) -Greedy action tensor([ 0.6025, 0.0579, 0.1253, -0.1844]) tensor([0.3765, 0.2184, 0.2336, 0.1714]) -Greedy action tensor([ 1.1355, -0.5820, -0.1699, -0.4673]) tensor([0.6054, 0.1087, 0.1641, 0.1219]) -Greedy action tensor([ 0.2221, 0.1211, -0.0402, -0.3346]) tensor([0.3080, 0.2785, 0.2370, 0.1765]) -Greedy action tensor([ 0.7700, -0.5391, -0.0084, -0.4377]) tensor([0.4931, 0.1332, 0.2264, 0.1474]) -Greedy action tensor([ 0.5442, -0.3704, 0.0416, -0.3430]) tensor([0.4137, 0.1657, 0.2503, 0.1703]) -Greedy action tensor([ 0.3789, -0.0493, -0.0399, -0.1307]) tensor([0.3436, 0.2239, 0.2260, 0.2064]) -Greedy action tensor([ 0.7837, -0.7778, 0.0389, -0.4128]) tensor([0.5033, 0.1056, 0.2390, 0.1521]) -Greedy action tensor([ 0.5137, -0.1578, -0.0073, -0.1825]) tensor([0.3841, 0.1963, 0.2281, 0.1915]) -Greedy action tensor([ 0.6870, -0.3550, 0.0101, -0.1967]) tensor([0.4397, 0.1551, 0.2235, 0.1817]) -Greedy action tensor([ 0.5686, -0.5097, 0.1748, -0.4572]) tensor([0.4214, 0.1433, 0.2842, 0.1511]) -Greedy action tensor([ 0.4461, 0.0198, -0.0899, -0.2186]) tensor([0.3633, 0.2372, 0.2126, 0.1869]) -Greedy action tensor([ 0.7203, -0.4391, 0.1667, -0.6429]) tensor([0.4663, 0.1463, 0.2681, 0.1193]) -Greedy action tensor([ 0.9057, -0.4946, 0.0313, -0.6061]) tensor([0.5307, 0.1308, 0.2214, 0.1170]) -Greedy action tensor([ 0.3036, -0.1058, -0.0776, -0.3487]) tensor([0.3487, 0.2315, 0.2382, 0.1816]) -Greedy action tensor([ 0.8950, -0.3766, -0.0182, -0.4125]) tensor([0.5123, 0.1436, 0.2056, 0.1386]) -Greedy action tensor([ 0.3016, 0.2144, -0.1101, -0.1688]) tensor([0.3121, 0.2861, 0.2068, 0.1950]) -Greedy action tensor([ 0.3207, -0.1210, 0.1752, -0.4307]) tensor([0.3357, 0.2158, 0.2902, 0.1583]) -Greedy action tensor([ 0.8237, -0.6745, 0.0621, -0.4499]) tensor([0.5076, 0.1135, 0.2370, 0.1420]) -Greedy action tensor([ 0.7520, -0.7670, -0.0794, -0.5106]) tensor([0.5162, 0.1130, 0.2248, 0.1460]) -Greedy action tensor([ 0.6180, -0.1708, 0.0381, -0.2913]) tensor([0.4137, 0.1880, 0.2317, 0.1666]) -Greedy action tensor([ 0.6367, -0.4292, -0.0523, -0.4869]) tensor([0.4605, 0.1586, 0.2312, 0.1497]) -Greedy action tensor([ 0.3998, 0.0086, -0.0974, -0.2366]) tensor([0.3554, 0.2403, 0.2162, 0.1881]) -Greedy action tensor([ 0.7155, -0.5382, 0.0318, -0.5428]) tensor([0.4821, 0.1376, 0.2433, 0.1370]) -Greedy action tensor([ 0.6036, -0.0626, -0.0690, -0.5728]) tensor([0.4287, 0.2202, 0.2188, 0.1322]) -Greedy action tensor([ 0.5743, -0.2065, 0.0027, -0.1072]) tensor([0.3955, 0.1812, 0.2233, 0.2001]) -Greedy action tensor([ 0.8075, -0.7755, 0.1457, -0.6292]) tensor([0.5105, 0.1048, 0.2634, 0.1213]) -Greedy action tensor([ 0.6202, -0.2195, 0.1529, -0.3092]) tensor([0.4076, 0.1760, 0.2555, 0.1609]) -Greedy action tensor([ 0.8194, -0.4754, -0.0766, -0.3282]) tensor([0.5001, 0.1370, 0.2041, 0.1587]) -Greedy action tensor([ 0.8336, -0.2564, 0.0852, -0.2475]) tensor([0.4654, 0.1565, 0.2202, 0.1579]) -Greedy action tensor([ 0.3754, -0.1679, -0.0748, -0.2348]) tensor([0.3621, 0.2103, 0.2308, 0.1967]) -Greedy action tensor([ 1.0675, -1.2188, -0.0751, -0.4593]) tensor([0.6105, 0.0621, 0.1948, 0.1326]) -Greedy action tensor([ 0.6439, -0.3276, -0.1519, -0.3067]) tensor([0.4512, 0.1708, 0.2036, 0.1744]) -Greedy action tensor([ 0.5631, 0.2265, -0.0660, -0.5588]) tensor([0.3887, 0.2776, 0.2072, 0.1266]) -Greedy action tensor([ 0.5109, -0.2415, -0.1486, -0.2142]) tensor([0.4044, 0.1906, 0.2091, 0.1959]) -Greedy action tensor([ 0.7118, -0.4963, -0.1654, -0.3300]) tensor([0.4837, 0.1445, 0.2012, 0.1706]) -Greedy action tensor([ 0.7424, -0.4762, -0.0399, -0.4266]) tensor([0.4846, 0.1433, 0.2216, 0.1505]) -Greedy action tensor([ 0.4343, -0.4563, -0.1702, -0.4871]) tensor([0.4247, 0.1743, 0.2320, 0.1690]) -Greedy action tensor([ 0.3730, -0.0467, 0.0540, -0.1254]) tensor([0.3343, 0.2197, 0.2430, 0.2031]) -Greedy action tensor([ 1.0415, -0.4694, -0.0791, -0.7447]) tensor([0.5833, 0.1287, 0.1902, 0.0978]) -Greedy action tensor([ 0.9053, -0.4838, -0.1191, -0.3995]) tensor([0.5321, 0.1326, 0.1910, 0.1443]) -Greedy action tensor([ 0.7985, -0.6402, -0.0483, -0.5978]) tensor([0.5226, 0.1240, 0.2241, 0.1293]) -Greedy action tensor([ 0.5156, -0.2452, -0.0270, -0.1961]) tensor([0.3938, 0.1840, 0.2289, 0.1933]) -Greedy action tensor([ 1.0600, -0.8905, -0.0729, -0.6439]) tensor([0.6074, 0.0864, 0.1956, 0.1105]) -Greedy action tensor([ 0.3746, -0.0013, -0.0875, -0.3984]) tensor([0.3599, 0.2472, 0.2267, 0.1662]) -Greedy action tensor([ 0.5156, -0.1938, -0.0317, -0.1772]) tensor([0.3890, 0.1914, 0.2250, 0.1946]) -Greedy action tensor([ 0.8656, -0.6671, 0.0063, -0.4921]) tensor([0.5272, 0.1139, 0.2233, 0.1356]) -Greedy action tensor([ 1.4327, -1.2141, 0.0451, -0.8314]) tensor([0.7020, 0.0498, 0.1753, 0.0730]) -Greedy action tensor([ 0.9338, -0.3738, -0.2020, -0.4401]) tensor([0.5421, 0.1466, 0.1741, 0.1372]) -Greedy action tensor([ 0.5242, -0.1456, 0.0121, -0.1795]) tensor([0.3837, 0.1964, 0.2300, 0.1899]) -Greedy action tensor([ 0.6554, -0.7388, -0.1331, -0.5238]) tensor([0.4975, 0.1234, 0.2261, 0.1530]) -Greedy action tensor([ 0.8425, -0.8970, 0.0067, -0.6743]) tensor([0.5469, 0.0960, 0.2371, 0.1200]) -Greedy action tensor([ 0.7300, -0.5733, -0.0520, -0.7715]) tensor([0.5123, 0.1392, 0.2344, 0.1141]) -Greedy action tensor([ 0.6618, -0.3965, -0.0802, -0.2031]) tensor([0.4456, 0.1546, 0.2122, 0.1876]) -Greedy action tensor([ 0.7656, -0.4212, -0.0370, -0.2890]) tensor([0.4758, 0.1452, 0.2132, 0.1657]) -Greedy action tensor([ 0.8650, -0.6205, 0.0362, -0.4367]) tensor([0.5168, 0.1170, 0.2256, 0.1406]) -Greedy action tensor([ 0.4911, -0.1321, -0.0712, -0.3600]) tensor([0.3948, 0.2117, 0.2250, 0.1685]) -Greedy action tensor([ 0.3474, -0.1816, -0.1188, -0.4583]) tensor([0.3755, 0.2212, 0.2356, 0.1677]) -Greedy action tensor([ 1.0107, -0.8761, -0.0195, -0.5337]) tensor([0.5807, 0.0880, 0.2073, 0.1240]) -Greedy action tensor([ 0.8922, -0.7521, 0.1005, -0.4450]) tensor([0.5239, 0.1012, 0.2374, 0.1376]) -Greedy action tensor([ 0.2909, 0.1052, -0.1719, -0.1591]) tensor([0.3228, 0.2681, 0.2032, 0.2058]) -Greedy action tensor([ 0.1696, 0.0124, -0.0387, -0.5322]) tensor([0.3162, 0.2702, 0.2568, 0.1568]) -Greedy action tensor([ 0.6513, -0.3208, 0.1255, -0.2390]) tensor([0.4202, 0.1590, 0.2484, 0.1725]) -Greedy action tensor([ 0.6200, -0.4353, -0.0217, -0.2372]) tensor([0.4350, 0.1514, 0.2290, 0.1846]) -Greedy action tensor([ 0.6515, -0.4756, -0.0826, -0.3393]) tensor([0.4597, 0.1489, 0.2206, 0.1707]) -Greedy action tensor([ 0.4484, 0.0276, -0.2014, -0.2471]) tensor([0.3735, 0.2452, 0.1950, 0.1863]) -Greedy action tensor([ 0.5126, 0.0320, -0.1072, -0.1025]) tensor([0.3708, 0.2293, 0.1995, 0.2004]) -Greedy action tensor([ 0.5038, 0.1317, -0.0031, -0.0661]) tensor([0.3500, 0.2412, 0.2108, 0.1979]) -Greedy action tensor([ 0.6205, -0.5085, 0.1583, -0.4004]) tensor([0.4322, 0.1398, 0.2723, 0.1557]) -Greedy action tensor([ 0.6518, -0.2981, -0.0209, -0.3455]) tensor([0.4413, 0.1707, 0.2252, 0.1628]) -Greedy action tensor([ 0.5241, -0.2067, 0.0268, -0.2640]) tensor([0.3930, 0.1892, 0.2390, 0.1787]) -Greedy action tensor([ 0.6371, -0.5560, -0.0634, -0.5481]) tensor([0.4750, 0.1441, 0.2357, 0.1452]) -Greedy action tensor([ 0.8960, -0.5627, -0.0340, -0.4854]) tensor([0.5324, 0.1238, 0.2101, 0.1338]) -Greedy action tensor([ 0.5510, -0.5160, -0.0947, -0.2160]) tensor([0.4287, 0.1475, 0.2247, 0.1991]) -Greedy action tensor([ 0.8084, -0.3058, -0.1225, -0.3052]) tensor([0.4876, 0.1600, 0.1922, 0.1601]) -Greedy action tensor([ 0.6060, -0.2737, 0.0016, -0.2055]) tensor([0.4157, 0.1725, 0.2272, 0.1847]) -Greedy action tensor([ 0.8224, -0.3279, -0.0978, -0.2403]) tensor([0.4853, 0.1536, 0.1934, 0.1677]) -Greedy action tensor([ 0.3599, 0.0829, -0.0850, -0.0786]) tensor([0.3285, 0.2490, 0.2105, 0.2119]) -Greedy action tensor([ 0.5505, -0.5531, 0.1328, -0.7002]) tensor([0.4393, 0.1457, 0.2893, 0.1258]) -Greedy action tensor([ 0.0690, -0.1970, -0.0834, 0.3942]) tensor([0.2494, 0.1912, 0.2142, 0.3453]) -Greedy action tensor([-0.6131, -1.6171, -0.3350, 1.5021]) tensor([0.0911, 0.0334, 0.1203, 0.7552]) -Greedy action tensor([-0.5828, -0.2925, 0.4058, -0.0836]) tensor([0.1499, 0.2004, 0.4028, 0.2469]) -Greedy action tensor([-1.5360, -0.6003, -0.1480, -0.3244]) tensor([0.0916, 0.2335, 0.3671, 0.3077]) -Greedy action tensor([ 0.5402, -0.1028, -0.0935, 0.4229]) tensor([0.3395, 0.1785, 0.1801, 0.3019]) -Greedy action tensor([ 1.2786, -0.5810, 0.0847, -0.0579]) tensor([0.5809, 0.0905, 0.1760, 0.1526]) -Greedy action tensor([-0.2187, -1.5831, -0.7060, -0.1980]) tensor([0.3459, 0.0884, 0.2125, 0.3532]) -Greedy action tensor([ 1.9648, -0.6456, 1.3542, -0.3511]) tensor([0.5830, 0.0429, 0.3166, 0.0575]) -Greedy action tensor([-0.3478, 0.7566, -0.3244, 0.9112]) tensor([0.1168, 0.3524, 0.1195, 0.4113]) -Greedy action tensor([0.9334, 0.0707, 0.9799, 0.0456]) tensor([0.3471, 0.1465, 0.3636, 0.1428]) -Greedy action tensor([-0.5611, -1.1322, -0.4943, -0.4635]) tensor([0.2676, 0.1512, 0.2861, 0.2951]) -Greedy action tensor([ 1.0887, -1.5768, 1.9323, -0.1787]) tensor([0.2720, 0.0189, 0.6324, 0.0766]) -Greedy action tensor([-0.0360, 0.1830, 1.7114, 0.4257]) tensor([0.1045, 0.1301, 0.5997, 0.1658]) -Greedy action tensor([ 0.4009, -1.5778, 0.0996, 0.1311]) tensor([0.3786, 0.0523, 0.2801, 0.2890]) -Greedy action tensor([-0.6195, 0.0769, -1.1028, -0.6205]) tensor([0.2163, 0.4341, 0.1334, 0.2161]) -Greedy action tensor([-0.0886, -1.1160, 0.5608, -0.5585]) tensor([0.2566, 0.0918, 0.4912, 0.1604]) -Greedy action tensor([ 2.1958, -1.4966, 1.0831, 0.9645]) tensor([0.6077, 0.0151, 0.1997, 0.1774]) -Greedy action tensor([ 1.1875, -1.0794, 0.2949, 0.2486]) tensor([0.5251, 0.0544, 0.2151, 0.2054]) -Greedy action tensor([ 0.4164, -0.7666, 0.4014, 1.9336]) tensor([0.1460, 0.0447, 0.1438, 0.6655]) -Greedy action tensor([-0.4284, 1.5778, 1.2491, 0.2034]) tensor([0.0638, 0.4745, 0.3416, 0.1201]) -Greedy action tensor([-1.2400, -1.9662, 0.2711, -0.0980]) tensor([0.1093, 0.0529, 0.4953, 0.3425]) -Greedy action tensor([ 0.0460, -2.1489, -0.4798, -0.1346]) tensor([0.3941, 0.0439, 0.2330, 0.3290]) -Greedy action tensor([ 1.9342, -1.0246, 0.4367, 0.8840]) tensor([0.6152, 0.0319, 0.1376, 0.2153]) -Greedy action tensor([ 0.3482, -1.4180, -0.6710, 0.0256]) tensor([0.4433, 0.0758, 0.1600, 0.3210]) -Greedy action tensor([ 0.1907, -1.6210, -0.2130, -0.2361]) tensor([0.4026, 0.0658, 0.2689, 0.2627]) -Greedy action tensor([-0.5125, 0.7608, 0.1599, 0.4719]) tensor([0.1086, 0.3880, 0.2127, 0.2906]) -Greedy action tensor([ 0.9310, -0.6700, 1.1063, 0.5938]) tensor([0.3218, 0.0649, 0.3835, 0.2297]) -Greedy action tensor([-1.5798, -0.2374, 0.9102, -0.0568]) tensor([0.0466, 0.1783, 0.5616, 0.2135]) -Greedy action tensor([ 1.1841, 0.3944, -0.3507, 0.7146]) tensor([0.4358, 0.1978, 0.0939, 0.2725]) -Greedy action tensor([ 1.4326, -1.1033, 1.6586, 0.1496]) tensor([0.3831, 0.0303, 0.4803, 0.1062]) -Greedy action tensor([ 0.6646, -0.4251, 0.3035, 1.7164]) tensor([0.2042, 0.0687, 0.1423, 0.5847]) -Greedy action tensor([-1.2456, 0.2284, -0.7260, 1.4148]) tensor([0.0468, 0.2045, 0.0788, 0.6699]) -Greedy action tensor([-0.6526, 0.2148, 0.4627, 0.6725]) tensor([0.0981, 0.2335, 0.2993, 0.3691]) -Greedy action tensor([ 1.1958, 0.7989, -0.1923, 0.5697]) tensor([0.4071, 0.2737, 0.1016, 0.2176]) -Greedy action tensor([ 0.4836, -1.1320, 1.1909, 0.8053]) tensor([0.2171, 0.0432, 0.4403, 0.2994]) -Greedy action tensor([ 1.1596, -1.0807, 0.9473, 0.8483]) tensor([0.3777, 0.0402, 0.3054, 0.2767]) -Greedy action tensor([-0.6873, -1.6098, -0.3215, 1.7349]) tensor([0.0709, 0.0282, 0.1022, 0.7988]) -Greedy action tensor([ 1.7885, -0.2429, 0.2502, 1.2534]) tensor([0.5177, 0.0679, 0.1112, 0.3032]) -Greedy action tensor([-0.8416, -1.4514, -0.6621, -0.1025]) tensor([0.2069, 0.1124, 0.2475, 0.4332]) -Greedy action tensor([-0.0367, 0.3409, 0.9652, 1.0097]) tensor([0.1245, 0.1817, 0.3392, 0.3546]) -Greedy action tensor([-0.1187, -1.0886, 0.5095, -0.1795]) tensor([0.2384, 0.0904, 0.4468, 0.2244]) -Greedy action tensor([ 0.9951, -0.0883, 2.1245, -0.6851]) tensor([0.2165, 0.0733, 0.6699, 0.0403]) -Greedy action tensor([ 1.1981, -0.0262, -0.0108, 0.9706]) tensor([0.4186, 0.1231, 0.1250, 0.3334]) -Greedy action tensor([-0.5595, 0.1647, 0.7030, -0.4446]) tensor([0.1295, 0.2673, 0.4579, 0.1453]) -Greedy action tensor([-0.7618, -1.7214, 0.9311, -1.1285]) tensor([0.1331, 0.0510, 0.7236, 0.0923]) -Greedy action tensor([ 0.7737, 0.4232, -0.4629, -0.2049]) tensor([0.4218, 0.2971, 0.1225, 0.1586]) -Greedy action tensor([-0.4764, 0.8392, 0.2953, -0.5406]) tensor([0.1277, 0.4761, 0.2764, 0.1198]) -Greedy action tensor([-0.1183, 0.0845, 0.0535, -0.3161]) tensor([0.2362, 0.2894, 0.2805, 0.1938]) -Greedy action tensor([ 0.1241, -0.7005, 1.0512, -0.8451]) tensor([0.2302, 0.1009, 0.5816, 0.0873]) -Greedy action tensor([ 0.1384, -0.2579, 1.1270, -0.5223]) tensor([0.2051, 0.1380, 0.5511, 0.1059]) -Greedy action tensor([ 0.3670, -2.0911, 0.3550, 0.2635]) tensor([0.3361, 0.0288, 0.3321, 0.3031]) -Greedy action tensor([-0.8683, -1.2722, 0.7214, 0.2946]) tensor([0.1024, 0.0683, 0.5018, 0.3275]) -Greedy action tensor([ 0.0390, -1.7015, 1.2933, 0.1425]) tensor([0.1727, 0.0303, 0.6054, 0.1916]) -Greedy action tensor([ 0.3371, -1.1869, -0.0184, 2.1465]) tensor([0.1246, 0.0271, 0.0873, 0.7609]) -Greedy action tensor([ 0.0605, -0.0816, -0.0372, -0.8884]) tensor([0.3163, 0.2744, 0.2869, 0.1225]) -Greedy action tensor([-0.1833, -0.6594, 0.1756, -0.2647]) tensor([0.2516, 0.1563, 0.3602, 0.2319]) -Greedy action tensor([ 1.9447, -1.2862, 1.7238, 0.1714]) tensor([0.4973, 0.0197, 0.3987, 0.0844]) -Greedy action tensor([ 0.7774, 0.4768, -0.5485, 0.6095]) tensor([0.3507, 0.2597, 0.0931, 0.2965]) -Greedy action tensor([1.5722, 0.4674, 0.9625, 0.4934]) tensor([0.4515, 0.1496, 0.2454, 0.1535]) -Greedy action tensor([ 0.4142, 0.6162, 0.3494, -0.3891]) tensor([0.2771, 0.3391, 0.2597, 0.1241]) -Greedy action tensor([-0.2422, -0.6451, -0.6644, -0.1739]) tensor([0.2946, 0.1969, 0.1931, 0.3154]) -Greedy action tensor([1.3469, 0.7173, 1.4911, 0.0215]) tensor([0.3386, 0.1804, 0.3911, 0.0900]) -Greedy action tensor([ 0.8422, -0.4733, 1.6677, 0.1457]) tensor([0.2469, 0.0663, 0.5638, 0.1231]) -Greedy action tensor([ 1.0082, -1.8997, 0.3411, 1.1492]) tensor([0.3678, 0.0201, 0.1887, 0.4234]) -Greedy action tensor([-1.1012, 0.0589, 0.0400, -0.7725]) tensor([0.1148, 0.3663, 0.3594, 0.1595]) -Greedy action tensor([ 1.3106, -0.6698, 0.5986, -0.6268]) tensor([0.5641, 0.0779, 0.2768, 0.0813]) -Greedy action tensor([ 1.3007, 0.4965, -0.2212, 0.5055]) tensor([0.4723, 0.2113, 0.1031, 0.2132]) -Greedy action tensor([-0.0055, 0.9262, -1.0220, 0.2893]) tensor([0.1907, 0.4842, 0.0690, 0.2561]) -Greedy action tensor([ 0.1450, -1.5271, -0.1589, 0.2817]) tensor([0.3255, 0.0611, 0.2402, 0.3732]) -Greedy action tensor([ 0.0314, -0.9304, -0.7175, -0.2553]) tensor([0.3838, 0.1467, 0.1815, 0.2881]) -Greedy action tensor([ 0.3381, -0.2429, 0.4765, 0.9662]) tensor([0.2183, 0.1221, 0.2507, 0.4090]) -Greedy action tensor([-1.9643, -0.0290, 0.4328, -0.2416]) tensor([0.0408, 0.2825, 0.4483, 0.2284]) -Greedy action tensor([ 0.1976, -0.3210, 0.5891, 1.1075]) tensor([0.1799, 0.1071, 0.2661, 0.4469]) -Greedy action tensor([ 1.3734, 1.5532, 0.1835, -0.0470]) tensor([0.3646, 0.4364, 0.1109, 0.0881]) -Greedy action tensor([ 1.6091, -1.4405, 1.0764, 0.3376]) tensor([0.5223, 0.0247, 0.3066, 0.1464]) -Greedy action tensor([ 1.5201, -0.2356, 1.5625, 1.0439]) tensor([0.3525, 0.0609, 0.3677, 0.2189]) -Greedy action tensor([ 0.7694, -0.7818, 1.5608, -0.1021]) tensor([0.2606, 0.0553, 0.5751, 0.1090]) -Greedy action tensor([ 0.9513, -0.8454, -0.4538, 1.0137]) tensor([0.4039, 0.0670, 0.0991, 0.4300]) -Greedy action tensor([ 0.5118, -0.3693, 1.5617, -0.0817]) tensor([0.2073, 0.0859, 0.5923, 0.1145]) -Greedy action tensor([-0.1759, -0.3558, -0.0122, 0.6422]) tensor([0.1894, 0.1582, 0.2231, 0.4293]) -Greedy action tensor([-0.5801, 0.2890, -0.1412, 1.0823]) tensor([0.0980, 0.2336, 0.1519, 0.5165]) -Greedy action tensor([ 0.5716, -0.0078, -0.3202, 0.0725]) tensor([0.3880, 0.2174, 0.1591, 0.2356]) -Greedy action tensor([ 1.6378, -0.3231, -0.2897, 0.4499]) tensor([0.6285, 0.0885, 0.0915, 0.1916]) -Greedy action tensor([ 0.1265, -0.1864, 0.0526, 0.1321]) tensor([0.2728, 0.1995, 0.2534, 0.2743]) -Greedy action tensor([ 1.0845, -0.6561, -0.6781, 1.2516]) tensor([0.3954, 0.0694, 0.0679, 0.4674]) -Greedy action tensor([ 1.9255, -0.1659, -0.6047, 0.2419]) tensor([0.7200, 0.0889, 0.0573, 0.1337]) -Greedy action tensor([ 0.6419, -0.5828, -0.0283, 0.3479]) tensor([0.3920, 0.1152, 0.2006, 0.2922]) -Greedy action tensor([0.7365, 0.1946, 0.0619, 0.0269]) tensor([0.3872, 0.2252, 0.1972, 0.1904]) -Greedy action tensor([ 1.5897, -0.5922, -0.4613, 0.5796]) tensor([0.6228, 0.0703, 0.0801, 0.2268]) -Greedy action tensor([ 1.6103, -0.7113, -0.1401, 0.2739]) tensor([0.6516, 0.0639, 0.1132, 0.1712]) -Greedy action tensor([ 0.7969, -0.2866, -0.2174, 0.3922]) tensor([0.4223, 0.1429, 0.1531, 0.2817]) -Greedy action tensor([ 1.3889, -0.7425, -0.5315, 0.2479]) tensor([0.6310, 0.0749, 0.0925, 0.2016]) -Greedy action tensor([ 1.5824, -0.5334, -0.1462, 0.1495]) tensor([0.6507, 0.0784, 0.1155, 0.1553]) -Greedy action tensor([ 1.9179, -0.9427, -0.1593, 0.6004]) tensor([0.6895, 0.0395, 0.0864, 0.1847]) -Greedy action tensor([ 0.9069, -0.0943, -0.3510, 0.1136]) tensor([0.4753, 0.1746, 0.1351, 0.2150]) -Greedy action tensor([ 1.2411, 0.0895, -0.4706, -0.2660]) tensor([0.5820, 0.1840, 0.1051, 0.1289]) -Greedy action tensor([ 1.7147, -0.3157, -0.1578, 0.0773]) tensor([0.6759, 0.0887, 0.1039, 0.1315]) -Greedy action tensor([ 1.2330, -0.8172, -0.3710, 0.4705]) tensor([0.5567, 0.0717, 0.1119, 0.2597]) -Greedy action tensor([ 1.3973, -0.1743, -0.7008, 0.0156]) tensor([0.6323, 0.1313, 0.0776, 0.1588]) -Greedy action tensor([ 1.3405, -0.5300, -0.4213, 0.2061]) tensor([0.6070, 0.0935, 0.1043, 0.1952]) -Greedy action tensor([ 1.4182, -0.4988, -0.3074, 0.1260]) tensor([0.6251, 0.0919, 0.1113, 0.1717]) -Greedy action tensor([ 1.3752, -0.3689, -0.3024, 0.5827]) tensor([0.5512, 0.0963, 0.1030, 0.2495]) -Greedy action tensor([ 1.6755, -0.3486, -0.5083, 0.3052]) tensor([0.6672, 0.0881, 0.0751, 0.1695]) -Greedy action tensor([ 1.5156, -0.7457, -0.3852, 0.3330]) tensor([0.6410, 0.0668, 0.0958, 0.1965]) -Greedy action tensor([ 1.2023, -0.5727, -0.3541, 0.3174]) tensor([0.5577, 0.0945, 0.1176, 0.2302]) -Greedy action tensor([ 1.3115, -0.1814, -0.4122, 0.5344]) tensor([0.5368, 0.1206, 0.0958, 0.2468]) -Greedy action tensor([ 2.2222, -0.5154, -0.7130, 0.0400]) tensor([0.8126, 0.0526, 0.0432, 0.0917]) -Greedy action tensor([ 1.3614, -0.3549, -0.2671, 0.4558]) tensor([0.5617, 0.1010, 0.1102, 0.2271]) -Greedy action tensor([ 1.2138, -0.2423, -0.4648, -0.1308]) tensor([0.5951, 0.1387, 0.1111, 0.1551]) -Greedy action tensor([ 1.2439, -0.0737, 0.0386, 0.0819]) tensor([0.5319, 0.1424, 0.1593, 0.1664]) -Greedy action tensor([ 0.9264, -0.5266, -0.0773, 0.1727]) tensor([0.4829, 0.1129, 0.1770, 0.2272]) -Greedy action tensor([ 1.5041, 0.3691, -0.4224, 0.2123]) tensor([0.5741, 0.1845, 0.0836, 0.1578]) -Greedy action tensor([ 1.0915, -0.2372, -0.3262, -0.3174]) tensor([0.5709, 0.1512, 0.1383, 0.1395]) -Greedy action tensor([ 1.2781, -0.4827, -0.3095, -0.1598]) tensor([0.6197, 0.1065, 0.1267, 0.1471]) -Greedy action tensor([ 1.2035, -0.2423, -0.1633, 0.0275]) tensor([0.5559, 0.1309, 0.1417, 0.1715]) -Greedy action tensor([ 1.1805, -0.3139, -0.1870, 0.3923]) tensor([0.5171, 0.1160, 0.1317, 0.2351]) -Greedy action tensor([ 2.1750, -0.3872, -0.8452, 0.5745]) tensor([0.7532, 0.0581, 0.0367, 0.1520]) -Greedy action tensor([ 1.3619, -0.5660, -0.4658, 0.2872]) tensor([0.6069, 0.0883, 0.0976, 0.2072]) -Greedy action tensor([ 0.4052, -0.5693, -0.7455, 0.2027]) tensor([0.3983, 0.1503, 0.1260, 0.3253]) -Greedy action tensor([ 1.3755, -0.2290, -0.4257, 0.5786]) tensor([0.5504, 0.1106, 0.0909, 0.2481]) -Greedy action tensor([ 1.6196, 0.1178, -0.8620, 0.3038]) tensor([0.6351, 0.1415, 0.0531, 0.1704]) -Greedy action tensor([ 1.2519, -0.1425, -0.6580, 0.5217]) tensor([0.5325, 0.1321, 0.0789, 0.2566]) -Greedy action tensor([ 1.4656, -0.1711, -0.3331, 0.3525]) tensor([0.5922, 0.1153, 0.0980, 0.1945]) -Greedy action tensor([ 2.1260, -1.3346, -0.4677, 0.5548]) tensor([0.7611, 0.0239, 0.0569, 0.1581]) -Greedy action tensor([ 1.4397, -0.8530, -0.3893, 0.8915]) tensor([0.5436, 0.0549, 0.0873, 0.3142]) -Greedy action tensor([ 1.4912, -0.2474, -0.4955, 0.2165]) tensor([0.6280, 0.1104, 0.0861, 0.1755]) -Greedy action tensor([ 1.5240, -1.0130, -0.1267, 0.5906]) tensor([0.6009, 0.0475, 0.1153, 0.2363]) -Greedy action tensor([ 1.2248, -0.4920, -0.2474, 0.3724]) tensor([0.5448, 0.0979, 0.1250, 0.2323]) -Greedy action tensor([ 1.1612, -0.7282, -0.1869, 0.0702]) tensor([0.5725, 0.0865, 0.1487, 0.1923]) -Greedy action tensor([ 0.9732, -0.1842, -0.5941, 0.3744]) tensor([0.4825, 0.1517, 0.1007, 0.2651]) -Greedy action tensor([ 0.7915, -0.4924, -0.4032, 0.3007]) tensor([0.4562, 0.1263, 0.1381, 0.2793]) -Greedy action tensor([ 1.3854, -0.1900, -0.4272, 0.3759]) tensor([0.5765, 0.1193, 0.0941, 0.2101]) -Greedy action tensor([ 0.3973, -0.5016, 0.0518, 0.0401]) tensor([0.3553, 0.1446, 0.2515, 0.2486]) -Greedy action tensor([ 1.5522, -1.1207, -0.0100, 0.3674]) tensor([0.6311, 0.0436, 0.1323, 0.1930]) -Greedy action tensor([ 1.3179, -0.3120, -0.4960, 0.3744]) tensor([0.5720, 0.1121, 0.0932, 0.2227]) -Greedy action tensor([ 1.0034, -0.6604, -0.3260, 0.4367]) tensor([0.4947, 0.0937, 0.1309, 0.2807]) -Greedy action tensor([ 1.1013, -0.4165, -0.2862, 0.2830]) tensor([0.5235, 0.1148, 0.1307, 0.2310]) -Greedy action tensor([ 1.4184, -0.5858, -0.3650, 0.4030]) tensor([0.6006, 0.0809, 0.1009, 0.2176]) -Greedy action tensor([ 1.0276, -0.1768, -0.3976, 0.2103]) tensor([0.5046, 0.1513, 0.1213, 0.2228]) -Greedy action tensor([ 1.6761, -0.5587, -0.4335, 0.6101]) tensor([0.6359, 0.0680, 0.0771, 0.2190]) -Greedy action tensor([ 1.0455, -0.5443, -0.2085, 0.1002]) tensor([0.5325, 0.1086, 0.1520, 0.2069]) -Greedy action tensor([ 1.4515, -0.1899, -0.4035, 0.3557]) tensor([0.5937, 0.1150, 0.0929, 0.1984]) -Greedy action tensor([ 1.0163, -0.2627, -0.2596, 0.0643]) tensor([0.5146, 0.1432, 0.1436, 0.1986]) -Greedy action tensor([ 0.7631, -0.0915, -0.0446, 0.0466]) tensor([0.4238, 0.1803, 0.1890, 0.2070]) -Greedy action tensor([ 1.6308, -0.7931, -0.3254, 0.5889]) tensor([0.6318, 0.0560, 0.0893, 0.2229]) -Greedy action tensor([ 1.8572, -1.0760, -0.3022, 0.5959]) tensor([0.6887, 0.0367, 0.0795, 0.1951]) -Greedy action tensor([ 0.7781, -0.5293, -0.3117, 0.4300]) tensor([0.4324, 0.1170, 0.1454, 0.3053]) -Greedy action tensor([ 0.3984, -0.2132, -0.0422, -0.0113]) tensor([0.3509, 0.1903, 0.2259, 0.2329]) -Greedy action tensor([ 0.8796, -0.1071, -0.0899, 0.0432]) tensor([0.4576, 0.1706, 0.1736, 0.1983]) -Greedy action tensor([ 0.9026, -0.0013, 0.1162, -0.0163]) tensor([0.4426, 0.1792, 0.2016, 0.1766]) -Greedy action tensor([ 1.9885, -0.2753, -0.5193, 0.0460]) tensor([0.7526, 0.0782, 0.0613, 0.1079]) -Greedy action tensor([ 1.1158, -0.3500, -0.4386, -0.2084]) tensor([0.5854, 0.1352, 0.1237, 0.1557]) -Greedy action tensor([ 1.1837, -0.8208, -0.4243, 0.3987]) tensor([0.5583, 0.0752, 0.1118, 0.2546]) -Greedy action tensor([ 0.9912, -0.3596, -0.2113, 0.1884]) tensor([0.4981, 0.1290, 0.1497, 0.2232]) -Greedy action tensor([ 0.6815, -0.5263, 0.0019, 0.0770]) tensor([0.4252, 0.1271, 0.2155, 0.2323]) -Greedy action tensor([ 1.6079, -0.6574, -0.0062, 0.1720]) tensor([0.6490, 0.0674, 0.1292, 0.1544]) -Greedy action tensor([ 1.3591, -0.3183, -0.3948, 0.1157]) tensor([0.6067, 0.1134, 0.1050, 0.1750]) -Greedy action tensor([ 1.3160, -0.0687, -0.2873, 0.2229]) tensor([0.5597, 0.1401, 0.1126, 0.1876]) -Greedy action tensor([ 1.1956, -0.4292, -0.3590, 0.5034]) tensor([0.5239, 0.1032, 0.1107, 0.2622]) -Greedy action tensor([ 1.3211, -0.3275, -0.0895, 0.1303]) tensor([0.5746, 0.1105, 0.1402, 0.1747]) -Greedy action tensor([ 2.0998, -0.6019, -0.4223, 0.5622]) tensor([0.7341, 0.0492, 0.0589, 0.1577]) -Greedy action tensor([ 0.8638, -0.2579, -0.1684, 0.2254]) tensor([0.4525, 0.1474, 0.1612, 0.2390]) -Greedy action tensor([ 0.5779, 0.0127, -0.4746, -0.0959]) tensor([0.4120, 0.2341, 0.1438, 0.2100]) -Greedy action tensor([-1.9320, -0.5484, 0.5166, -0.1891]) tensor([0.0449, 0.1791, 0.5195, 0.2565]) -Greedy action tensor([-1.8126, -0.4827, 0.6034, -0.1359]) tensor([0.0469, 0.1772, 0.5251, 0.2507]) -Greedy action tensor([-0.4172, -0.4805, 0.2253, 0.2311]) tensor([0.1739, 0.1632, 0.3305, 0.3324]) -Greedy action tensor([-1.8735, -0.3416, 0.6181, -0.1280]) tensor([0.0427, 0.1974, 0.5155, 0.2445]) -Greedy action tensor([-1.5233, -0.4668, 0.4529, 0.0191]) tensor([0.0634, 0.1824, 0.4576, 0.2965]) -Greedy action tensor([-0.9941, -0.7099, -0.2552, -0.0910]) tensor([0.1451, 0.1929, 0.3039, 0.3581]) -Greedy action tensor([-0.6618, -0.5470, 0.1442, 0.5107]) tensor([0.1317, 0.1478, 0.2950, 0.4255]) -Greedy action tensor([-1.3767, -0.5323, 0.3481, 0.2028]) tensor([0.0725, 0.1687, 0.4069, 0.3519]) -Greedy action tensor([-1.4220, -0.3387, 0.7056, 0.8070]) tensor([0.0462, 0.1365, 0.3879, 0.4293]) -Greedy action tensor([-1.5777, -0.5617, 0.4711, 0.0367]) tensor([0.0604, 0.1669, 0.4689, 0.3037]) -Greedy action tensor([-1.4958, -0.4608, 0.4291, 0.0742]) tensor([0.0646, 0.1819, 0.4429, 0.3106]) -Greedy action tensor([-1.7497, -0.3559, 0.5488, -0.0696]) tensor([0.0491, 0.1980, 0.4893, 0.2636]) -Greedy action tensor([-1.4988, -0.2616, 0.3935, 0.1358]) tensor([0.0617, 0.2126, 0.4093, 0.3163]) -Greedy action tensor([-0.7926, -0.4887, 0.2429, 0.2409]) tensor([0.1253, 0.1698, 0.3528, 0.3521]) -Greedy action tensor([-0.9551, -0.3913, 0.3266, 0.9825]) tensor([0.0752, 0.1321, 0.2708, 0.5219]) -Greedy action tensor([-1.6506, -0.3886, 0.5832, 0.0668]) tensor([0.0514, 0.1817, 0.4803, 0.2866]) -Greedy action tensor([-1.7244, -0.5203, 0.5551, -0.0304]) tensor([0.0512, 0.1706, 0.4999, 0.2784]) -Greedy action tensor([-0.6321, -0.0684, 0.7374, 1.3838]) tensor([0.0704, 0.1238, 0.2770, 0.5288]) -Greedy action tensor([-1.4092, -0.1867, 0.6624, -0.6055]) tensor([0.0686, 0.2331, 0.5449, 0.1533]) -Greedy action tensor([-1.8861, -0.4860, 0.6510, -0.1458]) tensor([0.0427, 0.1733, 0.5404, 0.2436]) -Greedy action tensor([-1.8337, -0.3566, 0.6002, -0.1042]) tensor([0.0446, 0.1954, 0.5086, 0.2515]) -Greedy action tensor([-1.2519, -0.4731, 0.4566, -0.4009]) tensor([0.0906, 0.1973, 0.5000, 0.2121]) -Greedy action tensor([-0.9182, -0.4184, 0.5025, -0.3807]) tensor([0.1176, 0.1939, 0.4870, 0.2014]) -Greedy action tensor([-1.5714, -0.2832, 0.6561, 0.4017]) tensor([0.0474, 0.1719, 0.4397, 0.3410]) -Greedy action tensor([-1.7294, -0.2755, 0.5081, -0.0376]) tensor([0.0498, 0.2132, 0.4666, 0.2704]) -Greedy action tensor([-0.8418, -0.6272, 0.2579, 0.0486]) tensor([0.1302, 0.1614, 0.3911, 0.3172]) -Greedy action tensor([-1.1969, -0.6119, 0.2867, 0.3106]) tensor([0.0853, 0.1532, 0.3762, 0.3853]) -Greedy action tensor([-1.9074, -0.4466, 0.6511, -0.1556]) tensor([0.0417, 0.1796, 0.5384, 0.2403]) -Greedy action tensor([-1.7545, -0.6797, 0.2653, -0.4420]) tensor([0.0659, 0.1930, 0.4964, 0.2447]) -Greedy action tensor([-1.9008, -0.4482, 0.6428, -0.1581]) tensor([0.0422, 0.1802, 0.5367, 0.2409]) -Greedy action tensor([-1.1598, -0.6721, 0.3014, 0.1611]) tensor([0.0936, 0.1524, 0.4034, 0.3506]) -Greedy action tensor([-1.7113, 0.3633, 0.4443, -0.1230]) tensor([0.0445, 0.3540, 0.3839, 0.2177]) -Greedy action tensor([-1.8570, -0.4906, 0.6212, -0.1330]) tensor([0.0445, 0.1747, 0.5310, 0.2498]) -Greedy action tensor([-1.9221, -0.4493, 0.6523, -0.1672]) tensor([0.0412, 0.1797, 0.5408, 0.2383]) -Greedy action tensor([-1.7801, -0.4495, 0.5933, -0.0806]) tensor([0.0476, 0.1803, 0.5114, 0.2607]) -Greedy action tensor([-1.5819, -0.4962, 0.5482, 0.1584]) tensor([0.0553, 0.1638, 0.4656, 0.3153]) -Greedy action tensor([-1.3671, -0.5415, 1.3592, 1.2564]) tensor([0.0309, 0.0706, 0.4723, 0.4262]) -Greedy action tensor([-1.1644, -0.5281, 0.3930, 0.6358]) tensor([0.0731, 0.1381, 0.3468, 0.4421]) -Greedy action tensor([-1.3006, 0.8283, 0.3938, -0.1021]) tensor([0.0551, 0.4628, 0.2997, 0.1825]) -Greedy action tensor([-1.3220, -0.1948, 0.3796, 0.5541]) tensor([0.0621, 0.1918, 0.3406, 0.4055]) -Greedy action tensor([-1.5363, 0.1512, 0.3824, 0.0022]) tensor([0.0559, 0.3024, 0.3811, 0.2606]) -Greedy action tensor([-0.2489, -0.0600, 1.1486, 1.6900]) tensor([0.0757, 0.0915, 0.3064, 0.5264]) -Greedy action tensor([-1.6702, -0.5114, 0.5466, 0.0189]) tensor([0.0532, 0.1697, 0.4887, 0.2883]) -Greedy action tensor([-1.5290, -0.2504, 0.5369, -0.2811]) tensor([0.0626, 0.2249, 0.4943, 0.2181]) -Greedy action tensor([-1.7601, -0.4687, 0.6420, 0.0341]) tensor([0.0461, 0.1677, 0.5091, 0.2772]) -Greedy action tensor([-1.0947, -0.5547, 0.2758, 0.1645]) tensor([0.0983, 0.1686, 0.3869, 0.3462]) -Greedy action tensor([-0.9894, -0.4808, 0.3886, 0.5601]) tensor([0.0882, 0.1467, 0.3499, 0.4153]) -Greedy action tensor([-1.8073, -0.4062, 0.6334, -0.0475]) tensor([0.0447, 0.1816, 0.5137, 0.2600]) -Greedy action tensor([-1.2916, -0.5114, 0.8304, 0.8207]) tensor([0.0505, 0.1102, 0.4217, 0.4176]) -Greedy action tensor([-1.8562, -0.3735, 0.6203, -0.1161]) tensor([0.0435, 0.1915, 0.5173, 0.2477]) -Greedy action tensor([-1.4778, -0.1778, 0.6483, 0.2622]) tensor([0.0533, 0.1957, 0.4471, 0.3039]) -Greedy action tensor([-1.8922, -0.4342, 0.6377, -0.1683]) tensor([0.0426, 0.1832, 0.5351, 0.2390]) -Greedy action tensor([-1.2155, -0.5840, 0.2774, 0.2570]) tensor([0.0855, 0.1608, 0.3806, 0.3730]) -Greedy action tensor([-1.4241, -0.5458, 0.3744, 0.1521]) tensor([0.0700, 0.1685, 0.4229, 0.3386]) -Greedy action tensor([-1.0076, 0.1085, 0.4187, -0.3369]) tensor([0.0983, 0.3001, 0.4093, 0.1923]) -Greedy action tensor([-1.9297, -0.4463, 0.6585, -0.1730]) tensor([0.0408, 0.1799, 0.5429, 0.2364]) -Greedy action tensor([-0.2669, -0.3302, 0.1736, 0.1915]) tensor([0.1971, 0.1850, 0.3062, 0.3117]) -Greedy action tensor([-1.1218, -0.6025, 0.2543, 0.2542]) tensor([0.0943, 0.1586, 0.3736, 0.3735]) -Greedy action tensor([-1.0005, -0.6122, 0.4895, 0.8571]) tensor([0.0751, 0.1107, 0.3331, 0.4811]) -Greedy action tensor([-1.8459, -0.3499, 0.6060, -0.1124]) tensor([0.0440, 0.1963, 0.5107, 0.2490]) -Greedy action tensor([-1.7935, -0.1807, 0.5476, -0.0858]) tensor([0.0456, 0.2288, 0.4740, 0.2516]) -Greedy action tensor([-1.7618, -0.4719, 0.5900, -0.1464]) tensor([0.0496, 0.1801, 0.5209, 0.2494]) -Greedy action tensor([-1.4298, 0.4702, 0.2422, 0.2073]) tensor([0.0551, 0.3684, 0.2933, 0.2832]) -Greedy action tensor([-1.2246, -0.5290, 0.3185, 0.1276]) tensor([0.0866, 0.1736, 0.4051, 0.3347]) -Greedy action tensor([-1.7208, -0.4690, 0.5675, 0.0067]) tensor([0.0500, 0.1750, 0.4934, 0.2816]) -Greedy action tensor([-1.7816, -0.3466, 0.5730, -0.0839]) tensor([0.0472, 0.1981, 0.4970, 0.2577]) -Greedy action tensor([-1.8933, -0.3709, 0.6319, -0.1544]) tensor([0.0421, 0.1928, 0.5256, 0.2395]) -Greedy action tensor([-1.6134, -0.4980, 0.5477, 0.0756]) tensor([0.0551, 0.1681, 0.4784, 0.2984]) -Greedy action tensor([-1.4965, -0.4710, 0.4500, 0.3086]) tensor([0.0593, 0.1653, 0.4151, 0.3604]) -Greedy action tensor([-1.7321, -0.2804, 0.5411, -0.0244]) tensor([0.0488, 0.2083, 0.4738, 0.2691]) -Greedy action tensor([-1.1464, -0.5619, 0.3399, 0.5106]) tensor([0.0803, 0.1440, 0.3548, 0.4209]) -Greedy action tensor([-1.3185, -0.3392, 0.4588, -0.2529]) tensor([0.0801, 0.2134, 0.4739, 0.2326]) -Greedy action tensor([-1.7362, -0.5042, 0.6264, 0.0479]) tensor([0.0476, 0.1632, 0.5056, 0.2835]) -Greedy action tensor([-1.0096, 0.2887, 0.1999, 0.1555]) tensor([0.0891, 0.3264, 0.2987, 0.2857]) -Greedy action tensor([-1.8734, -0.4256, 0.6726, -0.1079]) tensor([0.0419, 0.1783, 0.5347, 0.2450]) -Greedy action tensor([-0.5222, -0.0591, 0.7191, 1.4116]) tensor([0.0771, 0.1226, 0.2669, 0.5334]) -Greedy action tensor([-1.2398, -0.5736, 0.2756, 0.2705]) tensor([0.0831, 0.1619, 0.3785, 0.3765]) -Greedy action tensor([-0.2950, -0.3790, 0.2388, 0.3498]) tensor([0.1808, 0.1662, 0.3084, 0.3446]) -Greedy action tensor([-1.2084, -0.5870, 0.2675, 0.2648]) tensor([0.0862, 0.1605, 0.3771, 0.3762]) -Greedy action tensor([-1.2607, -0.6095, 0.3315, 0.1196]) tensor([0.0847, 0.1624, 0.4162, 0.3367]) -Greedy action tensor([-1.1933, -0.3743, 0.6729, -0.7207]) tensor([0.0882, 0.2001, 0.5702, 0.1415]) -Greedy action tensor([ 0.5241, -0.2835, 0.0285, -0.5139]) tensor([0.4151, 0.1851, 0.2528, 0.1470]) -Greedy action tensor([ 0.7057, -0.2905, -0.0751, -0.2857]) tensor([0.4549, 0.1680, 0.2084, 0.1688]) -Greedy action tensor([ 0.3261, 0.2105, 0.0191, -0.2061]) tensor([0.3112, 0.2772, 0.2289, 0.1827]) -Greedy action tensor([ 0.4575, 0.0358, -0.0284, -0.0617]) tensor([0.3489, 0.2289, 0.2146, 0.2076]) -Greedy action tensor([ 0.4616, -0.5659, -0.1759, -0.1659]) tensor([0.4132, 0.1479, 0.2184, 0.2206]) -Greedy action tensor([ 0.3176, 0.6984, -0.2802, 0.0158]) tensor([0.2664, 0.3900, 0.1466, 0.1970]) -Greedy action tensor([ 0.3710, -0.0419, -0.1196, -0.1111]) tensor([0.3459, 0.2289, 0.2117, 0.2135]) -Greedy action tensor([ 0.7560, -0.0203, 0.1439, -0.3445]) tensor([0.4283, 0.1970, 0.2322, 0.1425]) -Greedy action tensor([ 0.8436, -0.5355, 0.0920, -0.5852]) tensor([0.5094, 0.1283, 0.2402, 0.1221]) -Greedy action tensor([ 0.9228, -0.9217, 0.0694, -0.3753]) tensor([0.5385, 0.0851, 0.2294, 0.1470]) -Greedy action tensor([ 0.5184, -0.1338, -0.1297, -0.2876]) tensor([0.4015, 0.2091, 0.2100, 0.1793]) -Greedy action tensor([ 0.7540, 0.2635, -0.1343, -0.2942]) tensor([0.4212, 0.2579, 0.1733, 0.1477]) -Greedy action tensor([ 0.3288, 0.1610, 0.0103, -0.1046]) tensor([0.3105, 0.2625, 0.2258, 0.2013]) -Greedy action tensor([ 0.7772, -0.4660, -0.0561, -0.5237]) tensor([0.5012, 0.1446, 0.2178, 0.1365]) -Greedy action tensor([ 0.2488, -0.0972, 0.0042, -0.2399]) tensor([0.3222, 0.2279, 0.2523, 0.1976]) -Greedy action tensor([ 0.3211, -0.2535, -0.0781, -0.3224]) tensor([0.3624, 0.2040, 0.2431, 0.1904]) -Greedy action tensor([ 0.4336, -0.4575, 0.2114, -0.3728]) tensor([0.3763, 0.1544, 0.3013, 0.1680]) -Greedy action tensor([ 0.5828, -0.1572, 0.0777, -0.2260]) tensor([0.3959, 0.1889, 0.2389, 0.1763]) -Greedy action tensor([ 0.8099, -0.4295, 0.1147, -0.6545]) tensor([0.4951, 0.1434, 0.2470, 0.1145]) -Greedy action tensor([ 0.9482, -0.8447, 0.0462, -0.8060]) tensor([0.5730, 0.0954, 0.2325, 0.0992]) -Greedy action tensor([ 0.6247, 0.0772, -0.0832, -0.3860]) tensor([0.4107, 0.2375, 0.2023, 0.1495]) -Greedy action tensor([ 0.8014, -0.4697, -0.1255, -0.4179]) tensor([0.5072, 0.1423, 0.2007, 0.1498]) -Greedy action tensor([ 0.8539, -0.2926, -0.0170, -0.6452]) tensor([0.5103, 0.1621, 0.2136, 0.1140]) -Greedy action tensor([ 0.4123, -0.3021, 0.0778, -0.3330]) tensor([0.3732, 0.1827, 0.2671, 0.1771]) -Greedy action tensor([ 0.8212, -0.2926, 0.1400, -0.5979]) tensor([0.4816, 0.1581, 0.2437, 0.1165]) -Greedy action tensor([ 0.8990, -0.5550, -0.0225, -0.2399]) tensor([0.5124, 0.1197, 0.2039, 0.1641]) -Greedy action tensor([ 0.4721, -0.1385, 0.2417, -0.3956]) tensor([0.3627, 0.1969, 0.2881, 0.1523]) -Greedy action tensor([ 0.8877, -0.5641, 0.0681, -0.3838]) tensor([0.5115, 0.1198, 0.2254, 0.1434]) -Greedy action tensor([ 0.5628, -0.4219, 0.0691, -0.5387]) tensor([0.4317, 0.1613, 0.2635, 0.1435]) -Greedy action tensor([ 0.3098, 0.0220, -0.1850, -0.2249]) tensor([0.3395, 0.2546, 0.2070, 0.1989]) -Greedy action tensor([ 0.6902, -0.3712, 0.0330, -0.2876]) tensor([0.4464, 0.1544, 0.2314, 0.1679]) -Greedy action tensor([ 0.8007, -0.1145, 0.0567, -0.4272]) tensor([0.4611, 0.1847, 0.2191, 0.1351]) -Greedy action tensor([ 0.5203, -0.1125, 0.0135, -0.2161]) tensor([0.3828, 0.2033, 0.2306, 0.1833]) -Greedy action tensor([ 0.5541, -0.3399, -0.0772, -0.2711]) tensor([0.4203, 0.1719, 0.2236, 0.1842]) -Greedy action tensor([ 0.4522, -0.0740, 0.0470, -0.2779]) tensor([0.3650, 0.2157, 0.2434, 0.1759]) -Greedy action tensor([ 0.7217, -0.5538, -0.1693, -0.4475]) tensor([0.5000, 0.1396, 0.2051, 0.1553]) -Greedy action tensor([ 0.7128, -0.2350, 0.0820, -0.4484]) tensor([0.4479, 0.1736, 0.2383, 0.1402]) -Greedy action tensor([ 0.4576, 0.1811, -0.0844, -0.0162]) tensor([0.3375, 0.2560, 0.1963, 0.2102]) -Greedy action tensor([ 0.5225, -0.3101, 0.0192, -0.2618]) tensor([0.4006, 0.1743, 0.2422, 0.1829]) -Greedy action tensor([ 0.5159, 0.0747, -0.1431, -0.1278]) tensor([0.3723, 0.2395, 0.1926, 0.1956]) -Greedy action tensor([ 0.8068, -0.4847, -0.0135, -0.5144]) tensor([0.5045, 0.1387, 0.2222, 0.1346]) -Greedy action tensor([ 0.8368, -0.4134, -0.0131, -0.3400]) tensor([0.4945, 0.1416, 0.2114, 0.1524]) -Greedy action tensor([-0.1171, 0.1731, -0.1243, 0.0364]) tensor([0.2225, 0.2973, 0.2208, 0.2594]) -Greedy action tensor([ 0.4729, 0.1215, -0.0259, -0.1551]) tensor([0.3515, 0.2474, 0.2135, 0.1876]) -Greedy action tensor([ 0.6470, -0.7697, -0.1177, -0.2512]) tensor([0.4727, 0.1147, 0.2201, 0.1925]) -Greedy action tensor([ 0.5102, -0.0026, 0.0284, -0.1610]) tensor([0.3666, 0.2195, 0.2265, 0.1874]) -Greedy action tensor([ 0.4876, -0.1241, -0.0046, -0.0900]) tensor([0.3683, 0.1998, 0.2252, 0.2067]) -Greedy action tensor([ 0.8133, -0.4430, -0.1911, -0.2337]) tensor([0.4995, 0.1422, 0.1829, 0.1753]) -Greedy action tensor([ 0.8261, -0.3786, -0.0215, -0.3669]) tensor([0.4922, 0.1476, 0.2109, 0.1493]) -Greedy action tensor([ 0.8704, -0.6702, -0.1668, -0.5018]) tensor([0.5488, 0.1176, 0.1945, 0.1391]) -Greedy action tensor([ 0.6247, -0.4789, 0.0230, -0.2749]) tensor([0.4374, 0.1451, 0.2396, 0.1779]) -Greedy action tensor([ 0.3086, -0.0343, -0.0884, -0.1634]) tensor([0.3327, 0.2361, 0.2237, 0.2075]) -Greedy action tensor([ 0.2857, 0.1495, -0.0370, -0.1733]) tensor([0.3097, 0.2703, 0.2243, 0.1957]) -Greedy action tensor([ 0.8435, -0.3596, -0.0457, -0.2229]) tensor([0.4865, 0.1461, 0.1999, 0.1675]) -Greedy action tensor([ 0.4614, -0.0837, -0.0844, -0.1353]) tensor([0.3690, 0.2140, 0.2138, 0.2032]) -Greedy action tensor([ 0.4358, -0.0021, -0.0567, -0.0436]) tensor([0.3477, 0.2244, 0.2125, 0.2153]) -Greedy action tensor([ 0.2877, 0.1375, -0.0465, -0.2326]) tensor([0.3154, 0.2714, 0.2258, 0.1874]) -Greedy action tensor([ 0.2611, -0.2360, -0.0975, -0.1432]) tensor([0.3362, 0.2045, 0.2349, 0.2244]) -Greedy action tensor([ 0.3629, -0.1177, -0.0201, -0.2224]) tensor([0.3500, 0.2164, 0.2386, 0.1949]) -Greedy action tensor([ 0.9996, -0.6841, -0.0525, -0.5878]) tensor([0.5749, 0.1068, 0.2008, 0.1175]) -Greedy action tensor([ 0.5383, -0.2527, -0.1431, -0.1042]) tensor([0.4024, 0.1824, 0.2036, 0.2116]) -Greedy action tensor([ 0.7102, -0.3684, -0.0378, -0.2135]) tensor([0.4524, 0.1538, 0.2141, 0.1796]) -Greedy action tensor([ 0.5914, -0.1504, -0.0664, -0.0519]) tensor([0.3968, 0.1890, 0.2056, 0.2086]) -Greedy action tensor([ 0.4429, -0.2581, -0.1035, -0.3405]) tensor([0.3949, 0.1959, 0.2287, 0.1804]) -Greedy action tensor([ 0.3725, -0.0906, -0.0492, -0.1504]) tensor([0.3475, 0.2187, 0.2279, 0.2060]) -Greedy action tensor([ 1.1046, -0.5734, -0.2082, -0.2979]) tensor([0.5876, 0.1097, 0.1581, 0.1445]) -Greedy action tensor([ 0.3701, -0.1012, -0.1408, -0.1331]) tensor([0.3535, 0.2207, 0.2121, 0.2137]) -Greedy action tensor([ 0.7480, -0.2262, -0.0252, -0.2912]) tensor([0.4560, 0.1722, 0.2105, 0.1613]) -Greedy action tensor([ 0.3744, -0.0763, 0.0719, -0.2744]) tensor([0.3450, 0.2198, 0.2549, 0.1803]) -Greedy action tensor([ 0.4907, -0.0768, -0.0317, -0.1253]) tensor([0.3703, 0.2100, 0.2197, 0.2000]) -Greedy action tensor([ 1.2585, -1.2602, 0.1045, -0.9315]) tensor([0.6632, 0.0534, 0.2092, 0.0742]) -Greedy action tensor([ 0.8283, -0.3320, -0.1485, -0.1300]) tensor([0.4823, 0.1511, 0.1816, 0.1850]) -Greedy action tensor([ 0.7231, -0.6478, -0.1395, -0.5363]) tensor([0.5103, 0.1295, 0.2154, 0.1448]) -Greedy action tensor([ 0.5609, -0.1254, 0.0412, -0.2076]) tensor([0.3903, 0.1965, 0.2321, 0.1810]) -Greedy action tensor([ 0.5099, -0.2664, -0.0631, -0.2511]) tensor([0.4014, 0.1847, 0.2263, 0.1875]) -Greedy action tensor([ 0.3592, -0.0773, -0.0441, -0.2092]) tensor([0.3471, 0.2243, 0.2319, 0.1966]) -Greedy action tensor([ 0.1986, 0.1715, -0.2265, 0.0409]) tensor([0.2873, 0.2796, 0.1878, 0.2454]) -Greedy action tensor([ 0.6101, -0.4329, 0.0142, -0.2480]) tensor([0.4297, 0.1514, 0.2368, 0.1822]) -Greedy action tensor([ 0.5568, -0.2630, 0.0190, -0.1719]) tensor([0.3989, 0.1757, 0.2329, 0.1925]) -Greedy action tensor([ 0.3321, -0.2668, -0.0306, -0.2737]) tensor([0.3583, 0.1969, 0.2493, 0.1955]) -Greedy action tensor([ 0.5724, -0.2377, 0.0880, -0.6425]) tensor([0.4242, 0.1887, 0.2613, 0.1259]) -Greedy action tensor([ 0.0118, -2.3993, -0.4186, 0.8309]) tensor([0.2495, 0.0224, 0.1622, 0.5659]) -Greedy action tensor([1.3965, 0.2662, 0.8298, 1.1823]) tensor([0.3707, 0.1197, 0.2103, 0.2992]) -Greedy action tensor([ 0.6478, 0.3181, 1.4947, -0.0801]) tensor([0.2205, 0.1586, 0.5144, 0.1065]) -Greedy action tensor([ 0.2006, -0.6110, -0.6681, 0.3276]) tensor([0.3334, 0.1481, 0.1399, 0.3786]) -Greedy action tensor([ 0.2968, -1.0908, -0.2628, -1.2992]) tensor([0.4941, 0.1234, 0.2824, 0.1002]) -Greedy action tensor([ 0.4455, -0.1518, -0.7206, 1.7211]) tensor([0.1837, 0.1011, 0.0572, 0.6579]) -Greedy action tensor([ 1.8464, -0.6122, -0.7517, 1.2007]) tensor([0.5937, 0.0508, 0.0442, 0.3113]) -Greedy action tensor([-0.2349, -1.2405, 1.4897, -0.9732]) tensor([0.1342, 0.0491, 0.7526, 0.0641]) -Greedy action tensor([ 0.4211, 0.0168, 1.4500, -0.1941]) tensor([0.1998, 0.1333, 0.5589, 0.1080]) -Greedy action tensor([ 0.4700, -0.1822, -0.2198, 1.5867]) tensor([0.1970, 0.1026, 0.0988, 0.6016]) -Greedy action tensor([ 0.5002, -2.3130, 0.8286, 0.7032]) tensor([0.2722, 0.0163, 0.3780, 0.3335]) -Greedy action tensor([ 0.5358, -0.2514, -0.0282, 0.0265]) tensor([0.3810, 0.1734, 0.2167, 0.2289]) -Greedy action tensor([ 0.6026, -1.9721, 0.5507, 0.4203]) tensor([0.3498, 0.0266, 0.3321, 0.2915]) -Greedy action tensor([-0.7864, -0.2310, -0.8496, 1.2086]) tensor([0.0906, 0.1579, 0.0851, 0.6664]) -Greedy action tensor([ 0.3415, -0.4818, -0.4044, 0.8335]) tensor([0.2818, 0.1237, 0.1336, 0.4609]) -Greedy action tensor([ 1.4887, -0.2998, 0.5677, 0.7402]) tensor([0.4906, 0.0820, 0.1953, 0.2321]) -Greedy action tensor([ 0.0713, -1.4982, -0.2598, 0.4563]) tensor([0.2945, 0.0613, 0.2115, 0.4328]) -Greedy action tensor([0.8166, 0.0923, 0.4544, 0.0946]) tensor([0.3750, 0.1818, 0.2611, 0.1822]) -Greedy action tensor([-0.3283, -0.7576, -1.3621, -0.0190]) tensor([0.2968, 0.1932, 0.1056, 0.4044]) -Greedy action tensor([-0.7859, 0.6653, 0.8758, -0.4076]) tensor([0.0834, 0.3558, 0.4392, 0.1217]) -Greedy action tensor([-0.1149, -0.6802, 0.9075, 0.2397]) tensor([0.1732, 0.0984, 0.4815, 0.2469]) -Greedy action tensor([ 1.1954, -0.4189, -0.1203, 0.7222]) tensor([0.4784, 0.0952, 0.1283, 0.2981]) -Greedy action tensor([ 1.1856, -1.4481, 0.5800, 0.1014]) tensor([0.5113, 0.0367, 0.2790, 0.1729]) -Greedy action tensor([ 0.5979, -0.0914, 1.0293, 0.2765]) tensor([0.2655, 0.1333, 0.4087, 0.1925]) -Greedy action tensor([-0.2592, -0.7678, 0.2803, 0.4948]) tensor([0.1838, 0.1105, 0.3152, 0.3906]) -Greedy action tensor([ 1.2065, 0.7448, -0.0787, 1.0662]) tensor([0.3602, 0.2270, 0.0996, 0.3131]) -Greedy action tensor([ 0.1759, -1.8375, -0.0828, 0.4031]) tensor([0.3164, 0.0422, 0.2443, 0.3971]) -Greedy action tensor([ 0.7297, -1.3344, 1.4552, -0.1939]) tensor([0.2786, 0.0354, 0.5755, 0.1106]) -Greedy action tensor([ 1.0737, -0.5795, 1.5283, 0.9979]) tensor([0.2707, 0.0518, 0.4265, 0.2509]) -Greedy action tensor([-0.0324, 0.1144, 0.0755, -0.6368]) tensor([0.2619, 0.3033, 0.2917, 0.1431]) -Greedy action tensor([ 1.8984, -0.5877, 1.2395, 0.1651]) tensor([0.5626, 0.0468, 0.2911, 0.0994]) -Greedy action tensor([ 0.2752, -1.1913, -0.2407, 1.0848]) tensor([0.2454, 0.0566, 0.1465, 0.5515]) -Greedy action tensor([-1.3582, -0.8293, 0.1132, 1.1302]) tensor([0.0524, 0.0889, 0.2281, 0.6307]) -Greedy action tensor([ 1.1817, -0.3274, -0.5890, 1.1351]) tensor([0.4263, 0.0943, 0.0726, 0.4069]) -Greedy action tensor([ 1.4727, -0.0813, -1.0505, 0.1516]) tensor([0.6417, 0.1356, 0.0515, 0.1712]) -Greedy action tensor([-0.1852, -0.8274, -0.3067, 0.9505]) tensor([0.1810, 0.0952, 0.1603, 0.5635]) -Greedy action tensor([ 1.0219, 1.6722, 2.1107, -0.7573]) tensor([0.1651, 0.3164, 0.4906, 0.0279]) -Greedy action tensor([ 0.5621, -0.7607, 0.4330, 0.6347]) tensor([0.3105, 0.0827, 0.2729, 0.3339]) -Greedy action tensor([ 0.1117, -2.3399, -0.0252, 0.5028]) tensor([0.2910, 0.0251, 0.2537, 0.4302]) -Greedy action tensor([-0.1482, 0.2288, 1.7886, -1.2063]) tensor([0.1027, 0.1497, 0.7120, 0.0356]) -Greedy action tensor([-0.6621, -1.2311, -0.2717, -0.3282]) tensor([0.2252, 0.1275, 0.3328, 0.3145]) -Greedy action tensor([ 0.9079, -1.0883, -0.3425, 0.7210]) tensor([0.4441, 0.0603, 0.1272, 0.3684]) -Greedy action tensor([ 0.8934, -1.0172, 0.6389, 0.5482]) tensor([0.3800, 0.0562, 0.2946, 0.2691]) -Greedy action tensor([ 0.1985, -1.1200, 0.3252, -0.2603]) tensor([0.3295, 0.0882, 0.3740, 0.2083]) -Greedy action tensor([ 0.1572, -0.6741, 0.8844, 0.2225]) tensor([0.2187, 0.0952, 0.4526, 0.2335]) -Greedy action tensor([-0.0705, 0.7808, -0.8735, 1.0950]) tensor([0.1429, 0.3348, 0.0640, 0.4583]) -Greedy action tensor([ 0.1609, -1.2604, 0.1292, 1.0457]) tensor([0.2159, 0.0521, 0.2091, 0.5229]) -Greedy action tensor([ 0.6635, -1.3437, -0.3700, 0.0433]) tensor([0.4931, 0.0663, 0.1754, 0.2652]) -Greedy action tensor([ 1.1313, -0.5929, -0.5912, 0.6115]) tensor([0.5124, 0.0914, 0.0915, 0.3047]) -Greedy action tensor([ 1.5152, -0.9316, 0.7786, 0.4520]) tensor([0.5234, 0.0453, 0.2506, 0.1807]) -Greedy action tensor([-0.2479, -1.0359, 0.2821, -0.5383]) tensor([0.2563, 0.1166, 0.4354, 0.1917]) -Greedy action tensor([ 0.4605, -0.6712, -0.3457, 0.4188]) tensor([0.3665, 0.1182, 0.1637, 0.3516]) -Greedy action tensor([ 0.5168, 0.2353, 0.4429, -0.5671]) tensor([0.3309, 0.2497, 0.3074, 0.1119]) -Greedy action tensor([ 0.5503, -0.2066, 0.4214, 0.6998]) tensor([0.2849, 0.1337, 0.2505, 0.3309]) -Greedy action tensor([-0.9321, -0.7715, 0.1844, 0.5677]) tensor([0.1030, 0.1209, 0.3146, 0.4615]) -Greedy action tensor([-1.1481, -0.2059, 0.1126, -0.5063]) tensor([0.1112, 0.2853, 0.3923, 0.2113]) -Greedy action tensor([ 0.9234, 0.3082, 0.1060, -0.3214]) tensor([0.4405, 0.2381, 0.1945, 0.1269]) -Greedy action tensor([ 0.5972, -0.9554, 0.7606, 0.6468]) tensor([0.2907, 0.0615, 0.3423, 0.3055]) -Greedy action tensor([ 1.4679, -0.1450, 0.3533, 1.0275]) tensor([0.4606, 0.0918, 0.1511, 0.2965]) -Greedy action tensor([-0.7844, -0.8569, -0.6924, 1.0195]) tensor([0.1099, 0.1022, 0.1205, 0.6674]) -Greedy action tensor([-0.6361, -0.4998, -0.6512, -0.7184]) tensor([0.2468, 0.2828, 0.2431, 0.2273]) -Greedy action tensor([-0.0686, -0.1239, 0.0323, 0.7161]) tensor([0.1907, 0.1804, 0.2109, 0.4179]) -Greedy action tensor([ 0.4940, -0.6215, 1.1090, -1.1289]) tensor([0.2963, 0.0971, 0.5481, 0.0585]) -Greedy action tensor([0.7608, 0.2335, 1.6932, 1.1641]) tensor([0.1777, 0.1049, 0.4515, 0.2660]) -Greedy action tensor([ 1.8852, -1.4521, 0.9027, -0.2045]) tensor([0.6521, 0.0232, 0.2441, 0.0807]) -Greedy action tensor([ 1.7379, -2.1530, 0.6629, 0.5555]) tensor([0.5994, 0.0122, 0.2046, 0.1837]) -Greedy action tensor([ 1.3747, -0.7682, 0.5745, 0.6482]) tensor([0.4878, 0.0572, 0.2191, 0.2359]) -Greedy action tensor([-0.0350, -1.2288, 1.3463, 0.2555]) tensor([0.1511, 0.0458, 0.6012, 0.2020]) -Greedy action tensor([ 0.4152, 0.5194, 0.1415, -0.1168]) tensor([0.2892, 0.3210, 0.2199, 0.1699]) -Greedy action tensor([ 0.2537, 0.7448, -0.2071, 0.6880]) tensor([0.2080, 0.3398, 0.1312, 0.3211]) -Greedy action tensor([-0.8370, -0.2863, -0.0854, 0.0464]) tensor([0.1375, 0.2385, 0.2915, 0.3326]) -Greedy action tensor([ 1.7299, -0.9202, 2.0382, 0.3664]) tensor([0.3721, 0.0263, 0.5064, 0.0952]) -Greedy action tensor([0.7088, 0.0035, 0.1478, 0.7643]) tensor([0.3203, 0.1582, 0.1828, 0.3386]) -Greedy action tensor([-0.0643, -0.6780, 0.2270, -0.7720]) tensor([0.2965, 0.1605, 0.3968, 0.1461]) -Greedy action tensor([0.5101, 0.1140, 0.1182, 1.9118]) tensor([0.1560, 0.1050, 0.1054, 0.6336]) -Greedy action tensor([ 1.4168, -0.1390, 0.2899, 1.9669]) tensor([0.3059, 0.0646, 0.0991, 0.5304]) -Greedy action tensor([-0.4690, 0.5411, -0.6270, -0.6462]) tensor([0.1839, 0.5050, 0.1570, 0.1540]) -Greedy action tensor([ 0.3567, -1.5560, -0.7931, -0.0475]) tensor([0.4691, 0.0693, 0.1486, 0.3131]) -Greedy action tensor([ 0.7442, 0.9122, -0.2340, 0.3914]) tensor([0.3066, 0.3627, 0.1153, 0.2155]) -Greedy action tensor([ 1.6102, -0.3938, 1.1753, 1.3125]) tensor([0.3961, 0.0534, 0.2564, 0.2941]) -Greedy action tensor([-1.6357, -1.2568, 1.1452, -0.3584]) tensor([0.0451, 0.0659, 0.7274, 0.1617]) -Greedy action tensor([-0.8232, 0.2221, 0.6641, -1.5234]) tensor([0.1141, 0.3245, 0.5048, 0.0566]) -Greedy action tensor([ 1.2150, -0.4095, -0.1501, 0.1775]) tensor([0.5535, 0.1090, 0.1413, 0.1961]) -Greedy action tensor([ 0.8032, -0.5800, -0.0078, 0.3049]) tensor([0.4343, 0.1089, 0.1930, 0.2638]) -Greedy action tensor([ 0.9063, -0.2335, 0.1001, 0.2083]) tensor([0.4417, 0.1413, 0.1972, 0.2198]) -Greedy action tensor([ 1.6332, -0.3855, -0.5308, 0.5982]) tensor([0.6239, 0.0829, 0.0717, 0.2216]) -Greedy action tensor([ 1.0596, -0.2680, -0.1298, 0.1381]) tensor([0.5083, 0.1347, 0.1547, 0.2022]) -Greedy action tensor([ 1.0243, -0.2582, 0.0239, -0.0833]) tensor([0.5062, 0.1404, 0.1861, 0.1672]) -Greedy action tensor([ 1.2387, 0.1334, -0.6899, 0.4146]) tensor([0.5222, 0.1729, 0.0759, 0.2290]) -Greedy action tensor([ 1.2199, -0.4817, -0.1147, -0.0049]) tensor([0.5749, 0.1049, 0.1513, 0.1689]) -Greedy action tensor([ 1.8019, -0.1219, -0.3172, 0.4044]) tensor([0.6608, 0.0965, 0.0794, 0.1634]) -Greedy action tensor([ 2.2372, 0.1415, -0.0388, -0.0905]) tensor([0.7558, 0.0929, 0.0776, 0.0737]) -Greedy action tensor([ 0.9882, 0.1937, -0.1778, -0.0391]) tensor([0.4714, 0.2130, 0.1469, 0.1687]) -Greedy action tensor([ 0.7216, -0.0210, 0.0160, 0.0940]) tensor([0.3994, 0.1901, 0.1972, 0.2132]) -Greedy action tensor([ 1.0306, -0.3008, -0.4216, 0.2165]) tensor([0.5151, 0.1361, 0.1206, 0.2282]) -Greedy action tensor([ 0.9925, -0.3532, -0.3731, 0.3161]) tensor([0.4941, 0.1286, 0.1261, 0.2512]) -Greedy action tensor([ 1.7185, -0.0886, -0.4965, -0.1882]) tensor([0.7033, 0.1154, 0.0768, 0.1045]) -Greedy action tensor([ 1.2291, -0.0766, -0.4079, 0.2831]) tensor([0.5394, 0.1462, 0.1049, 0.2095]) -Greedy action tensor([ 1.9795, -0.4277, -0.1716, 0.6276]) tensor([0.6825, 0.0615, 0.0794, 0.1766]) -Greedy action tensor([ 0.9782, -0.0362, -0.1809, -0.1614]) tensor([0.5009, 0.1816, 0.1572, 0.1603]) -Greedy action tensor([ 1.1562, -0.7105, -0.2426, -0.5030]) tensor([0.6282, 0.0971, 0.1551, 0.1195]) -Greedy action tensor([ 0.7637, -0.1726, 0.1115, -0.1823]) tensor([0.4345, 0.1704, 0.2264, 0.1687]) -Greedy action tensor([ 0.7436, -0.0455, -0.1851, 0.2426]) tensor([0.4073, 0.1850, 0.1609, 0.2468]) -Greedy action tensor([ 1.6317, -0.4155, -0.4610, 0.3807]) tensor([0.6499, 0.0839, 0.0802, 0.1860]) -Greedy action tensor([ 0.8661, -0.2224, -0.2309, 0.2289]) tensor([0.4547, 0.1531, 0.1518, 0.2404]) -Greedy action tensor([ 1.5307, -0.7503, -0.1733, 0.2177]) tensor([0.6439, 0.0658, 0.1172, 0.1732]) -Greedy action tensor([ 0.7707, 0.1277, 0.0955, -0.6406]) tensor([0.4389, 0.2307, 0.2234, 0.1070]) -Greedy action tensor([ 1.0573, -0.2634, -0.0602, 0.1004]) tensor([0.5055, 0.1349, 0.1653, 0.1942]) -Greedy action tensor([ 1.6376, -0.9871, -0.3191, 0.3122]) tensor([0.6759, 0.0490, 0.0955, 0.1796]) -Greedy action tensor([ 1.2329, -0.5943, -0.5125, 0.4335]) tensor([0.5602, 0.0901, 0.0978, 0.2519]) -Greedy action tensor([ 1.2568, -0.4637, -0.2626, 0.2427]) tensor([0.5680, 0.1017, 0.1243, 0.2060]) -Greedy action tensor([ 2.1798, -1.2351, -0.1160, 0.5052]) tensor([0.7570, 0.0249, 0.0762, 0.1419]) -Greedy action tensor([ 1.7898, -1.2434, -0.2956, 0.5903]) tensor([0.6785, 0.0327, 0.0843, 0.2045]) -Greedy action tensor([ 0.5534, -0.2311, -0.5194, 0.3940]) tensor([0.3772, 0.1721, 0.1290, 0.3216]) -Greedy action tensor([ 1.1179, -0.1123, -0.2618, 0.3228]) tensor([0.5012, 0.1464, 0.1261, 0.2263]) -Greedy action tensor([ 0.8445, -0.3074, -0.0370, 0.2208]) tensor([0.4413, 0.1395, 0.1828, 0.2365]) -Greedy action tensor([ 2.1564, -0.7908, -0.3716, 0.3982]) tensor([0.7665, 0.0402, 0.0612, 0.1321]) -Greedy action tensor([ 1.3418, -0.2884, -0.3466, 0.1458]) tensor([0.5941, 0.1164, 0.1098, 0.1797]) -Greedy action tensor([ 1.4075, -0.5767, -0.2869, 0.3395]) tensor([0.6006, 0.0826, 0.1103, 0.2064]) -Greedy action tensor([ 0.6228, -0.0186, -0.1528, -0.0480]) tensor([0.4003, 0.2108, 0.1843, 0.2047]) -Greedy action tensor([ 1.2683, 0.0996, -0.6426, 0.2375]) tensor([0.5508, 0.1712, 0.0815, 0.1965]) -Greedy action tensor([ 0.7411, -0.0678, -0.0746, -0.0244]) tensor([0.4250, 0.1893, 0.1880, 0.1977]) -Greedy action tensor([ 1.5659, -0.0394, -0.7299, 0.2840]) tensor([0.6333, 0.1272, 0.0638, 0.1757]) -Greedy action tensor([ 1.6837, -0.5141, -0.4316, 0.3462]) tensor([0.6693, 0.0743, 0.0807, 0.1757]) -Greedy action tensor([ 1.6120, 0.1007, -0.7185, 0.1851]) tensor([0.6419, 0.1416, 0.0624, 0.1541]) -Greedy action tensor([ 1.0367, -0.5547, -0.6182, 0.6908]) tensor([0.4757, 0.0969, 0.0909, 0.3366]) -Greedy action tensor([ 1.2233, -0.6560, -0.3702, 0.6465]) tensor([0.5215, 0.0796, 0.1060, 0.2929]) -Greedy action tensor([ 0.5769, -0.3702, -0.1996, 0.2330]) tensor([0.3911, 0.1517, 0.1799, 0.2773]) -Greedy action tensor([ 1.1799, -0.4530, -0.2882, 0.3733]) tensor([0.5342, 0.1044, 0.1230, 0.2384]) -Greedy action tensor([ 1.3140, -0.5808, -0.3027, -0.3794]) tensor([0.6524, 0.0981, 0.1295, 0.1200]) -Greedy action tensor([ 1.5246, -0.4549, -0.4159, 0.6191]) tensor([0.5931, 0.0819, 0.0852, 0.2398]) -Greedy action tensor([ 1.4093, -0.1557, -0.4044, 0.4778]) tensor([0.5662, 0.1184, 0.0923, 0.2231]) -Greedy action tensor([ 1.4836, -0.3836, -0.3927, 0.1497]) tensor([0.6365, 0.0984, 0.0975, 0.1677]) -Greedy action tensor([ 1.4235, -0.7591, -0.1322, 0.4353]) tensor([0.5896, 0.0665, 0.1244, 0.2195]) -Greedy action tensor([ 0.9937, -0.3397, -0.6186, 0.9811]) tensor([0.4081, 0.1076, 0.0814, 0.4030]) -Greedy action tensor([ 1.2281, -0.4456, -0.0745, 0.0471]) tensor([0.5661, 0.1062, 0.1539, 0.1738]) -Greedy action tensor([ 1.8246, -1.0237, -0.1160, 0.3907]) tensor([0.6945, 0.0402, 0.0997, 0.1655]) -Greedy action tensor([ 0.7467, -0.0890, 0.1137, 0.0052]) tensor([0.4097, 0.1776, 0.2175, 0.1952]) -Greedy action tensor([ 1.0054, -0.4776, -0.1649, 0.2907]) tensor([0.4934, 0.1120, 0.1531, 0.2415]) -Greedy action tensor([ 0.7212, -0.2403, -0.3042, 0.0714]) tensor([0.4419, 0.1689, 0.1585, 0.2307]) -Greedy action tensor([ 1.0668, -0.3269, -0.3683, -0.0809]) tensor([0.5544, 0.1376, 0.1320, 0.1760]) -Greedy action tensor([ 1.9602, -1.1501, -0.6657, 1.2240]) tensor([0.6266, 0.0279, 0.0454, 0.3001]) -Greedy action tensor([ 0.4706, -0.4792, 0.1902, -0.1287]) tensor([0.3715, 0.1437, 0.2807, 0.2041]) -Greedy action tensor([ 1.5456, -0.0605, -0.2837, 0.4975]) tensor([0.5842, 0.1172, 0.0938, 0.2048]) -Greedy action tensor([ 0.7539, -0.2321, -0.1890, 0.1858]) tensor([0.4293, 0.1602, 0.1672, 0.2433]) -Greedy action tensor([ 1.1083, -0.3450, -0.0528, 0.4260]) tensor([0.4872, 0.1139, 0.1526, 0.2463]) -Greedy action tensor([ 1.6276, -0.6952, -0.2128, 0.5760]) tensor([0.6226, 0.0610, 0.0988, 0.2175]) -Greedy action tensor([ 2.3444, -1.4670, -0.5611, 1.0762]) tensor([0.7363, 0.0163, 0.0403, 0.2072]) -Greedy action tensor([ 0.7528, -0.0230, -0.2352, 0.2248]) tensor([0.4128, 0.1900, 0.1537, 0.2435]) -Greedy action tensor([ 1.9661, -0.7002, -0.1261, 0.5895]) tensor([0.6919, 0.0481, 0.0854, 0.1747]) -Greedy action tensor([ 1.4820, -0.6522, -0.6704, 0.2795]) tensor([0.6515, 0.0771, 0.0757, 0.1957]) -Greedy action tensor([ 0.8842, -0.4012, -0.1781, 0.1484]) tensor([0.4759, 0.1316, 0.1645, 0.2280]) -Greedy action tensor([ 1.2278, -0.5114, -0.0536, 0.3400]) tensor([0.5362, 0.0942, 0.1489, 0.2207]) -Greedy action tensor([ 2.3001, -0.5929, -0.3415, 0.5019]) tensor([0.7738, 0.0429, 0.0551, 0.1281]) -Greedy action tensor([ 1.7224, -0.9888, -0.5452, 0.5426]) tensor([0.6769, 0.0450, 0.0701, 0.2080]) -Greedy action tensor([ 0.8217, -0.2683, -0.2495, 0.2355]) tensor([0.4474, 0.1504, 0.1533, 0.2489]) -Greedy action tensor([ 1.3793, -0.9420, 0.1082, 0.1877]) tensor([0.5944, 0.0583, 0.1667, 0.1805]) -Greedy action tensor([ 1.8824, -0.3553, -0.2278, 0.2507]) tensor([0.7025, 0.0750, 0.0851, 0.1374]) -Greedy action tensor([ 0.7609, -0.5157, -0.1209, -0.0362]) tensor([0.4665, 0.1301, 0.1931, 0.2102]) -Greedy action tensor([ 1.0107, -0.0674, -0.6805, -0.5907]) tensor([0.5793, 0.1971, 0.1068, 0.1168]) -Greedy action tensor([ 1.2857, -0.2588, -0.3162, 0.2536]) tensor([0.5646, 0.1205, 0.1138, 0.2011]) -Greedy action tensor([ 0.4698, -0.1489, -0.1573, 0.2063]) tensor([0.3520, 0.1896, 0.1880, 0.2704]) -Greedy action tensor([ 1.4511, -0.5974, -0.5078, 0.4308]) tensor([0.6133, 0.0791, 0.0865, 0.2211]) -Greedy action tensor([-0.2614, -0.2337, 0.8987, 1.7438]) tensor([0.0791, 0.0813, 0.2523, 0.5873]) -Greedy action tensor([-1.8319, -0.3621, 0.6596, -0.2649]) tensor([0.0450, 0.1957, 0.5436, 0.2157]) -Greedy action tensor([-1.4659, -0.5733, 0.5552, 0.2297]) tensor([0.0608, 0.1485, 0.4591, 0.3315]) -Greedy action tensor([-0.6454, -0.5805, 0.2959, 0.0856]) tensor([0.1491, 0.1591, 0.3822, 0.3097]) -Greedy action tensor([-0.8728, -0.6137, 0.2329, 0.4333]) tensor([0.1110, 0.1438, 0.3354, 0.4098]) -Greedy action tensor([-1.8527, -0.4721, 0.6144, -0.1112]) tensor([0.0445, 0.1770, 0.5246, 0.2539]) -Greedy action tensor([-0.9617, -0.6073, 0.1680, 0.6098]) tensor([0.0968, 0.1379, 0.2995, 0.4658]) -Greedy action tensor([-0.2843, -0.1064, 0.2030, 0.2944]) tensor([0.1784, 0.2131, 0.2904, 0.3182]) -Greedy action tensor([-1.7363, -0.5001, 0.5585, -0.0528]) tensor([0.0506, 0.1743, 0.5024, 0.2726]) -Greedy action tensor([0.0143, 0.0601, 0.9785, 1.7517]) tensor([0.0966, 0.1011, 0.2534, 0.5489]) -Greedy action tensor([-1.7618, -0.5216, 0.5802, -0.0716]) tensor([0.0493, 0.1704, 0.5129, 0.2673]) -Greedy action tensor([-1.2172, -0.5417, 0.3316, 0.0866]) tensor([0.0881, 0.1731, 0.4145, 0.3244]) -Greedy action tensor([-0.7159, -0.6287, 0.2201, 0.1262]) tensor([0.1436, 0.1567, 0.3662, 0.3334]) -Greedy action tensor([-1.8682, -0.4890, 0.6332, -0.1415]) tensor([0.0439, 0.1742, 0.5352, 0.2467]) -Greedy action tensor([-1.8041, -0.5066, 0.6005, -0.1034]) tensor([0.0471, 0.1725, 0.5221, 0.2582]) -Greedy action tensor([-1.0212, -0.7876, 0.2202, 0.0620]) tensor([0.1152, 0.1456, 0.3988, 0.3404]) -Greedy action tensor([-1.3771, -0.3968, 0.7719, 0.6627]) tensor([0.0502, 0.1337, 0.4303, 0.3858]) -Greedy action tensor([-1.7617, -0.4485, 0.5717, -0.0985]) tensor([0.0492, 0.1831, 0.5078, 0.2598]) -Greedy action tensor([-1.7261, -0.4741, 0.5666, -0.0297]) tensor([0.0504, 0.1762, 0.4987, 0.2747]) -Greedy action tensor([-1.9065, -0.3716, 0.6386, -0.1533]) tensor([0.0414, 0.1921, 0.5275, 0.2390]) -Greedy action tensor([-1.8706, -0.7276, 0.2666, -0.2421]) tensor([0.0565, 0.1771, 0.4786, 0.2878]) -Greedy action tensor([-1.1223, -0.1148, 0.5112, -0.6368]) tensor([0.0954, 0.2612, 0.4885, 0.1550]) -Greedy action tensor([-1.6564, -0.5295, 0.5129, -0.0088]) tensor([0.0555, 0.1711, 0.4853, 0.2881]) -Greedy action tensor([-1.3697, 0.4340, 0.3818, 0.3278]) tensor([0.0547, 0.3319, 0.3150, 0.2984]) -Greedy action tensor([-1.8262, -0.4298, 0.6612, -0.0777]) tensor([0.0438, 0.1771, 0.5272, 0.2518]) -Greedy action tensor([-1.8666, -0.3791, 0.6249, -0.1284]) tensor([0.0431, 0.1908, 0.5208, 0.2452]) -Greedy action tensor([-1.9351, -0.4365, 0.6573, -0.1764]) tensor([0.0406, 0.1816, 0.5422, 0.2356]) -Greedy action tensor([-0.9560, -0.4247, 0.3940, 1.0783]) tensor([0.0704, 0.1197, 0.2715, 0.5383]) -Greedy action tensor([-0.8789, -0.5436, 0.1911, 0.5535]) tensor([0.1052, 0.1472, 0.3068, 0.4408]) -Greedy action tensor([-1.7623, -0.4847, 0.5554, -0.0413]) tensor([0.0492, 0.1765, 0.4994, 0.2750]) -Greedy action tensor([-1.9070, -0.3842, 0.6368, -0.1564]) tensor([0.0415, 0.1905, 0.5288, 0.2392]) -Greedy action tensor([-1.9416, -0.4546, 0.6639, -0.1795]) tensor([0.0403, 0.1785, 0.5462, 0.2350]) -Greedy action tensor([-1.7584, -0.5144, 0.5762, -0.0743]) tensor([0.0495, 0.1719, 0.5116, 0.2669]) -Greedy action tensor([-0.5341, -0.4600, 0.2343, 0.0418]) tensor([0.1663, 0.1791, 0.3587, 0.2959]) -Greedy action tensor([-1.2113, -0.3614, 0.3097, 0.3271]) tensor([0.0795, 0.1861, 0.3640, 0.3704]) -Greedy action tensor([-1.2381, -0.4964, 0.5480, 0.6346]) tensor([0.0642, 0.1348, 0.3831, 0.4178]) -Greedy action tensor([-1.8718, -0.4669, 0.6333, -0.1292]) tensor([0.0434, 0.1769, 0.5316, 0.2480]) -Greedy action tensor([-1.9196, -0.4615, 0.6599, -0.1582]) tensor([0.0411, 0.1768, 0.5426, 0.2394]) -Greedy action tensor([-1.6839, -0.5210, 0.5403, 0.0049]) tensor([0.0530, 0.1696, 0.4903, 0.2870]) -Greedy action tensor([-1.9044, -0.4586, 0.6430, -0.1538]) tensor([0.0421, 0.1785, 0.5372, 0.2422]) -Greedy action tensor([-1.1638, -0.6447, 0.3697, -0.0412]) tensor([0.0963, 0.1618, 0.4461, 0.2958]) -Greedy action tensor([-1.0752, -0.5675, 0.3175, 0.4005]) tensor([0.0904, 0.1502, 0.3639, 0.3955]) -Greedy action tensor([-1.5145, 0.6471, 0.3509, 0.2324]) tensor([0.0457, 0.3969, 0.2952, 0.2622]) -Greedy action tensor([-2.0363, -0.9079, 0.6569, -0.0938]) tensor([0.0387, 0.1196, 0.5718, 0.2699]) -Greedy action tensor([-0.7128, -0.0045, 0.1338, 0.1027]) tensor([0.1312, 0.2664, 0.3059, 0.2965]) -Greedy action tensor([-0.5392, -0.4490, 0.2496, 0.5944]) tensor([0.1351, 0.1479, 0.2973, 0.4197]) -Greedy action tensor([-1.9403, -0.4572, 0.6654, -0.1778]) tensor([0.0404, 0.1779, 0.5466, 0.2352]) -Greedy action tensor([-1.1356, -0.4389, 0.5905, 1.0994]) tensor([0.0556, 0.1117, 0.3126, 0.5201]) -Greedy action tensor([-1.9278, -0.4704, 0.6686, -0.1632]) tensor([0.0407, 0.1749, 0.5465, 0.2379]) -Greedy action tensor([-1.8430, -0.4658, 0.6148, -0.1485]) tensor([0.0453, 0.1795, 0.5288, 0.2465]) -Greedy action tensor([-1.9078, -0.4552, 0.6554, -0.1506]) tensor([0.0416, 0.1777, 0.5396, 0.2410]) -Greedy action tensor([-1.8199, -0.4785, 0.6103, -0.1104]) tensor([0.0461, 0.1761, 0.5233, 0.2545]) -Greedy action tensor([-0.8869, -0.4574, 0.2391, 0.9098]) tensor([0.0858, 0.1319, 0.2647, 0.5176]) -Greedy action tensor([0.1035, 0.1952, 0.9688, 1.6501]) tensor([0.1091, 0.1196, 0.2592, 0.5122]) -Greedy action tensor([-1.7638, -0.4860, 0.6119, -0.0160]) tensor([0.0474, 0.1702, 0.5101, 0.2723]) -Greedy action tensor([-1.8832, -0.4692, 0.6426, -0.1300]) tensor([0.0428, 0.1758, 0.5345, 0.2469]) -Greedy action tensor([-0.7738, -0.5945, 0.3911, -0.0233]) tensor([0.1330, 0.1591, 0.4263, 0.2817]) -Greedy action tensor([-0.7044, -0.4210, 0.6383, 1.1968]) tensor([0.0778, 0.1033, 0.2980, 0.5209]) -Greedy action tensor([-1.4777, -0.5072, 0.5197, 0.2456]) tensor([0.0602, 0.1589, 0.4436, 0.3373]) -Greedy action tensor([-1.8442, -0.4007, 0.6212, -0.0881]) tensor([0.0439, 0.1858, 0.5163, 0.2540]) -Greedy action tensor([-1.8414, -0.4206, 0.6517, -0.0713]) tensor([0.0433, 0.1792, 0.5235, 0.2540]) -Greedy action tensor([-1.8492, -0.2938, 0.5979, -0.1469]) tensor([0.0439, 0.2080, 0.5073, 0.2409]) -Greedy action tensor([-0.7660, -0.3420, 0.5430, 1.2659]) tensor([0.0722, 0.1103, 0.2671, 0.5504]) -Greedy action tensor([-0.9335, -0.4795, 0.2428, 0.8561]) tensor([0.0847, 0.1334, 0.2747, 0.5072]) -Greedy action tensor([-1.8848, -0.4576, 0.6474, -0.1216]) tensor([0.0424, 0.1767, 0.5336, 0.2473]) -Greedy action tensor([-1.2048, -0.6075, 0.2818, 0.2384]) tensor([0.0872, 0.1584, 0.3854, 0.3691]) -Greedy action tensor([-1.7249, -0.4934, 0.5589, -0.0188]) tensor([0.0506, 0.1735, 0.4970, 0.2789]) -Greedy action tensor([-0.3143, -0.4601, 0.1535, 0.1414]) tensor([0.1985, 0.1716, 0.3169, 0.3131]) -Greedy action tensor([-2.0096, -0.9075, 0.3802, -0.4211]) tensor([0.0505, 0.1519, 0.5506, 0.2471]) -Greedy action tensor([-1.7438, -0.5207, 0.5669, -0.0354]) tensor([0.0500, 0.1699, 0.5041, 0.2760]) -Greedy action tensor([-1.5314, -0.4309, 0.6401, 0.3526]) tensor([0.0517, 0.1553, 0.4531, 0.3399]) -Greedy action tensor([-1.9718, -0.6296, 0.7806, -0.1143]) tensor([0.0372, 0.1422, 0.5826, 0.2381]) -Greedy action tensor([-1.2699, -0.6706, 0.7499, 0.8005]) tensor([0.0547, 0.0996, 0.4122, 0.4336]) -Greedy action tensor([-1.9054, -0.4592, 0.7815, 0.0484]) tensor([0.0371, 0.1574, 0.5442, 0.2614]) -Greedy action tensor([-0.9672, -0.4306, 0.3502, -0.0024]) tensor([0.1103, 0.1886, 0.4117, 0.2894]) -Greedy action tensor([-2.0038, -0.9832, 0.4347, -0.2555]) tensor([0.0477, 0.1323, 0.5462, 0.2739]) -Greedy action tensor([-0.3957, -0.2081, 0.7323, 1.4444]) tensor([0.0863, 0.1041, 0.2665, 0.5432]) -Greedy action tensor([-1.8505, -0.4308, 0.6193, -0.1287]) tensor([0.0443, 0.1834, 0.5242, 0.2481]) -Greedy action tensor([-1.5274, -0.5271, 0.4826, 0.1580]) tensor([0.0603, 0.1640, 0.4502, 0.3254]) -Greedy action tensor([-1.7782, -0.2872, 0.5594, -0.0836]) tensor([0.0471, 0.2091, 0.4875, 0.2563]) -Greedy action tensor([-1.7238, -0.3011, 0.5334, -0.0569]) tensor([0.0500, 0.2074, 0.4778, 0.2648]) -Greedy action tensor([-1.7544, -0.4717, 0.6497, 0.1774]) tensor([0.0443, 0.1597, 0.4902, 0.3057]) -Greedy action tensor([ 1.2952, -0.7994, -0.0398, -0.8343]) tensor([0.6644, 0.0818, 0.1748, 0.0790]) -Greedy action tensor([ 1.4258, -1.5768, -0.0040, -0.7142]) tensor([0.7109, 0.0353, 0.1702, 0.0836]) -Greedy action tensor([ 0.9594, -0.4124, 0.0635, -0.2717]) tensor([0.5118, 0.1298, 0.2089, 0.1494]) -Greedy action tensor([ 0.6965, -0.7321, 0.0465, -0.2859]) tensor([0.4681, 0.1122, 0.2444, 0.1753]) -Greedy action tensor([ 0.6471, -0.5534, -0.0899, -0.2589]) tensor([0.4579, 0.1379, 0.2191, 0.1851]) -Greedy action tensor([ 1.0038, -0.5782, 0.0079, -0.4035]) tensor([0.5495, 0.1130, 0.2030, 0.1345]) -Greedy action tensor([ 1.2093, -1.0019, 0.0566, -0.8525]) tensor([0.6441, 0.0706, 0.2034, 0.0819]) -Greedy action tensor([ 0.4522, -0.0727, -0.2160, -0.2607]) tensor([0.3854, 0.2280, 0.1976, 0.1889]) -Greedy action tensor([ 0.2316, 0.0050, -0.0636, -0.2724]) tensor([0.3179, 0.2534, 0.2366, 0.1921]) -Greedy action tensor([ 0.1678, -0.1990, -0.1555, -0.4761]) tensor([0.3399, 0.2355, 0.2460, 0.1785]) -Greedy action tensor([ 0.8070, -0.6335, -0.1267, -0.5137]) tensor([0.5272, 0.1248, 0.2072, 0.1407]) -Greedy action tensor([ 0.3238, -0.0370, 0.0275, -0.1249]) tensor([0.3248, 0.2264, 0.2415, 0.2073]) -Greedy action tensor([ 0.8146, -0.4036, -0.1314, -0.3673]) tensor([0.5023, 0.1486, 0.1951, 0.1541]) -Greedy action tensor([ 0.5141, 0.1076, -0.0537, -0.1125]) tensor([0.3614, 0.2407, 0.2048, 0.1931]) -Greedy action tensor([ 1.2374, -0.8217, -0.0718, -0.3333]) tensor([0.6229, 0.0795, 0.1682, 0.1295]) -Greedy action tensor([ 0.7164, -0.3436, 0.0446, -0.2672]) tensor([0.4482, 0.1553, 0.2289, 0.1676]) -Greedy action tensor([ 0.1315, -0.0398, 0.0169, -0.0819]) tensor([0.2823, 0.2379, 0.2518, 0.2281]) -Greedy action tensor([ 0.8640, -0.4617, 0.0571, -0.2741]) tensor([0.4920, 0.1307, 0.2196, 0.1577]) -Greedy action tensor([ 0.6458, -0.4741, -0.0142, -0.4109]) tensor([0.4565, 0.1489, 0.2359, 0.1587]) -Greedy action tensor([ 0.7724, -0.5428, -0.0302, -0.3912]) tensor([0.4929, 0.1323, 0.2209, 0.1539]) -Greedy action tensor([ 0.3831, -0.5264, -0.0346, -0.3057]) tensor([0.3901, 0.1571, 0.2569, 0.1959]) -Greedy action tensor([ 0.4180, -0.5774, -0.1174, -0.3982]) tensor([0.4172, 0.1542, 0.2442, 0.1844]) -Greedy action tensor([ 0.3879, -0.0216, -0.2511, -0.3095]) tensor([0.3718, 0.2469, 0.1962, 0.1851]) -Greedy action tensor([ 0.1381, 0.1489, -0.1190, -0.3195]) tensor([0.2926, 0.2958, 0.2263, 0.1852]) -Greedy action tensor([ 0.7362, -0.5322, 0.0557, -0.4495]) tensor([0.4778, 0.1344, 0.2419, 0.1460]) -Greedy action tensor([ 1.0046, -0.4521, -0.1187, -0.4955]) tensor([0.5614, 0.1308, 0.1826, 0.1253]) -Greedy action tensor([ 0.8010, -0.3484, -0.0614, -0.3544]) tensor([0.4869, 0.1543, 0.2055, 0.1533]) -Greedy action tensor([ 0.7143, -0.4758, -0.0311, -0.3691]) tensor([0.4723, 0.1437, 0.2241, 0.1599]) -Greedy action tensor([ 0.9033, -0.5617, -0.0041, -0.8853]) tensor([0.5550, 0.1282, 0.2240, 0.0928]) -Greedy action tensor([ 0.2213, -0.1070, -0.0130, -0.3317]) tensor([0.3240, 0.2333, 0.2563, 0.1864]) -Greedy action tensor([ 1.2735e+00, -7.3912e-01, -7.4029e-04, -5.0779e-01]) tensor([0.6322, 0.0845, 0.1768, 0.1065]) -Greedy action tensor([ 0.4664, 0.0432, 0.0839, -0.1565]) tensor([0.3480, 0.2279, 0.2374, 0.1867]) -Greedy action tensor([ 0.4486, -0.3490, 0.0226, -0.2752]) tensor([0.3863, 0.1740, 0.2523, 0.1873]) -Greedy action tensor([ 0.3776, -0.0853, -0.0823, -0.2255]) tensor([0.3561, 0.2242, 0.2249, 0.1948]) -Greedy action tensor([ 0.7852, -0.3924, -0.0371, -0.2648]) tensor([0.4768, 0.1468, 0.2095, 0.1668]) -Greedy action tensor([ 0.5091, -0.4295, -0.0871, -0.3042]) tensor([0.4192, 0.1640, 0.2309, 0.1859]) -Greedy action tensor([ 0.5744, -0.1875, -0.0027, -0.3850]) tensor([0.4147, 0.1936, 0.2329, 0.1589]) -Greedy action tensor([ 0.8843, -0.4917, -0.0394, -0.3257]) tensor([0.5134, 0.1297, 0.2038, 0.1531]) -Greedy action tensor([ 0.5005, -0.3357, -0.1159, -0.1385]) tensor([0.3998, 0.1733, 0.2159, 0.2110]) -Greedy action tensor([ 0.8277, -0.6499, -0.0500, -0.6991]) tensor([0.5373, 0.1226, 0.2234, 0.1167]) -Greedy action tensor([ 0.5989, 0.2644, -0.1161, -0.3560]) tensor([0.3861, 0.2764, 0.1889, 0.1486]) -Greedy action tensor([ 0.6350, -0.4262, -0.0242, -0.2587]) tensor([0.4401, 0.1523, 0.2276, 0.1801]) -Greedy action tensor([ 0.4293, -0.3260, -0.1077, -0.6714]) tensor([0.4189, 0.1968, 0.2449, 0.1394]) -Greedy action tensor([ 0.6694, -0.3692, 0.1648, -0.4875]) tensor([0.4401, 0.1558, 0.2657, 0.1384]) -Greedy action tensor([ 0.4616, 0.0487, 0.1820, -0.3674]) tensor([0.3503, 0.2318, 0.2649, 0.1529]) -Greedy action tensor([ 0.3697, 0.0792, -0.0353, -0.4086]) tensor([0.3479, 0.2602, 0.2321, 0.1598]) -Greedy action tensor([ 0.6774, -0.4358, 0.0751, -0.3253]) tensor([0.4458, 0.1465, 0.2441, 0.1636]) -Greedy action tensor([ 0.5842, -0.3704, -0.0864, -0.1659]) tensor([0.4222, 0.1625, 0.2159, 0.1994]) -Greedy action tensor([ 0.6474, 0.3878, 0.1706, -0.4163]) tensor([0.3653, 0.2818, 0.2268, 0.1261]) -Greedy action tensor([ 0.6988, -0.1859, 0.0078, -0.2744]) tensor([0.4363, 0.1801, 0.2186, 0.1649]) -Greedy action tensor([ 0.8450, -0.3599, -0.0746, -0.2568]) tensor([0.4925, 0.1476, 0.1963, 0.1636]) -Greedy action tensor([ 0.5217, -0.1790, -0.1052, -0.2616]) tensor([0.4020, 0.1995, 0.2148, 0.1837]) -Greedy action tensor([ 1.1676, -1.2016, -0.1626, -0.4721]) tensor([0.6443, 0.0603, 0.1704, 0.1250]) -Greedy action tensor([ 0.6812, -0.4275, -0.0057, -0.2538]) tensor([0.4493, 0.1483, 0.2261, 0.1764]) -Greedy action tensor([ 0.6330, 0.0548, 0.0494, -0.2412]) tensor([0.3943, 0.2212, 0.2200, 0.1645]) -Greedy action tensor([ 0.3759, -0.1374, -0.0673, 0.0148]) tensor([0.3404, 0.2038, 0.2185, 0.2373]) -Greedy action tensor([ 0.5310, -0.1392, 0.0815, -0.0761]) tensor([0.3711, 0.1899, 0.2368, 0.2022]) -Greedy action tensor([ 0.4721, -0.3172, -0.0778, -0.3549]) tensor([0.4051, 0.1840, 0.2338, 0.1772]) -Greedy action tensor([ 0.5339, -0.1341, -0.0706, -0.2787]) tensor([0.3996, 0.2049, 0.2183, 0.1773]) -Greedy action tensor([ 0.4818, -0.2625, -0.0505, -0.2570]) tensor([0.3937, 0.1870, 0.2312, 0.1881]) -Greedy action tensor([ 1.0255, -1.1875, -0.0276, -0.4960]) tensor([0.5964, 0.0652, 0.2081, 0.1303]) -Greedy action tensor([ 0.8444, -0.5654, -0.0802, -0.3933]) tensor([0.5179, 0.1265, 0.2054, 0.1502]) -Greedy action tensor([ 0.7756, -0.2782, -0.0234, -0.3371]) tensor([0.4701, 0.1639, 0.2115, 0.1545]) -Greedy action tensor([ 0.6077, -0.1639, -0.0446, -0.1797]) tensor([0.4101, 0.1896, 0.2136, 0.1866]) -Greedy action tensor([ 0.0926, 0.0853, -0.0677, -0.4725]) tensor([0.2930, 0.2909, 0.2496, 0.1665]) -Greedy action tensor([ 0.8982, -0.5176, -0.0271, -0.4361]) tensor([0.5256, 0.1276, 0.2084, 0.1384]) -Greedy action tensor([ 0.5556, -0.0738, 0.0449, -0.1246]) tensor([0.3789, 0.2019, 0.2273, 0.1919]) -Greedy action tensor([ 0.8856, -0.4049, -0.0090, -0.2566]) tensor([0.4992, 0.1374, 0.2041, 0.1593]) -Greedy action tensor([ 0.2948, 0.0048, -0.0262, -0.2183]) tensor([0.3255, 0.2436, 0.2361, 0.1948]) -Greedy action tensor([ 0.3297, -0.1902, -0.0848, -0.2893]) tensor([0.3579, 0.2128, 0.2365, 0.1927]) -Greedy action tensor([ 0.4721, -0.1195, -0.0006, -0.3550]) tensor([0.3826, 0.2117, 0.2384, 0.1673]) -Greedy action tensor([ 0.6262, -0.5388, -0.1363, -0.5076]) tensor([0.4761, 0.1485, 0.2221, 0.1532]) -Greedy action tensor([ 0.6345, -0.4756, 0.0430, -0.4100]) tensor([0.4475, 0.1475, 0.2476, 0.1574]) -Greedy action tensor([ 0.4604, -0.2184, -0.0900, -0.2353]) tensor([0.3872, 0.1964, 0.2233, 0.1931]) -Greedy action tensor([ 0.5717, -0.1806, -0.0545, -0.3278]) tensor([0.4145, 0.1953, 0.2216, 0.1686]) -Greedy action tensor([ 0.6476, 0.3205, 0.1122, -0.3328]) tensor([0.3729, 0.2689, 0.2183, 0.1399]) -Greedy action tensor([ 0.3745, -0.3557, -0.1529, -0.4014]) tensor([0.3949, 0.1903, 0.2330, 0.1818]) -Greedy action tensor([ 0.6670, -0.3582, -0.0692, -0.4684]) tensor([0.4632, 0.1661, 0.2218, 0.1488]) -Greedy action tensor([ 0.3956, 0.0481, -0.0655, -0.2831]) tensor([0.3516, 0.2484, 0.2217, 0.1784]) -Greedy action tensor([ 0.6431, -0.4573, -0.0968, -0.2836]) tensor([0.4533, 0.1508, 0.2163, 0.1795]) -Greedy action tensor([ 0.8128, -0.7073, -0.0468, -0.4835]) tensor([0.5220, 0.1142, 0.2210, 0.1428]) -Greedy action tensor([1.4904, 0.4008, 0.1004, 0.5355]) tensor([0.5075, 0.1707, 0.1264, 0.1953]) -Greedy action tensor([-0.4828, -0.6110, -0.3148, 1.3411]) tensor([0.1080, 0.0950, 0.1278, 0.6692]) -Greedy action tensor([ 0.2927, 0.1877, -0.0549, 0.2746]) tensor([0.2787, 0.2509, 0.1968, 0.2736]) -Greedy action tensor([ 0.5634, -0.7713, 1.3107, 0.7181]) tensor([0.2202, 0.0580, 0.4648, 0.2570]) -Greedy action tensor([-0.3723, -0.3104, 0.3888, 0.0190]) tensor([0.1760, 0.1872, 0.3766, 0.2602]) -Greedy action tensor([ 0.1224, -0.3769, 1.2998, -0.3795]) tensor([0.1832, 0.1112, 0.5947, 0.1109]) -Greedy action tensor([-0.0814, -1.3908, 0.2607, 0.2804]) tensor([0.2431, 0.0656, 0.3422, 0.3491]) -Greedy action tensor([0.6473, 0.4350, 0.6419, 0.8508]) tensor([0.2482, 0.2007, 0.2469, 0.3042]) -Greedy action tensor([-0.2374, -1.4331, -0.9879, 0.0757]) tensor([0.3183, 0.0963, 0.1502, 0.4352]) -Greedy action tensor([ 0.7083, -0.4374, 1.2581, -0.0658]) tensor([0.2847, 0.0905, 0.4934, 0.1313]) -Greedy action tensor([-0.7278, -1.6331, 0.3807, 0.4001]) tensor([0.1329, 0.0538, 0.4027, 0.4106]) -Greedy action tensor([ 0.7999, -2.0706, -0.3774, 0.9525]) tensor([0.3953, 0.0224, 0.1218, 0.4605]) -Greedy action tensor([ 0.8391, 0.7219, -0.9191, 1.5436]) tensor([0.2448, 0.2177, 0.0422, 0.4952]) -Greedy action tensor([0.7011, 0.0042, 0.3282, 1.4201]) tensor([0.2359, 0.1175, 0.1625, 0.4841]) -Greedy action tensor([-0.6871, -0.6994, -0.3693, -0.3066]) tensor([0.2073, 0.2047, 0.2848, 0.3032]) -Greedy action tensor([-0.6586, -1.7678, -0.4975, 0.1843]) tensor([0.2071, 0.0683, 0.2434, 0.4812]) -Greedy action tensor([-0.3362, -2.0294, 0.2618, 0.4452]) tensor([0.1928, 0.0355, 0.3506, 0.4212]) -Greedy action tensor([ 0.0189, 0.8508, -0.3554, 0.2832]) tensor([0.1891, 0.4345, 0.1301, 0.2463]) -Greedy action tensor([0.3225, 0.3678, 0.0200, 0.2110]) tensor([0.2718, 0.2843, 0.2008, 0.2431]) -Greedy action tensor([2.1362, 1.1840, 0.1172, 1.0548]) tensor([0.5383, 0.2077, 0.0715, 0.1825]) -Greedy action tensor([-0.7449, -2.5258, 0.4527, 0.0605]) tensor([0.1488, 0.0251, 0.4930, 0.3331]) -Greedy action tensor([-0.8374, -0.2939, 0.9013, -1.7382]) tensor([0.1134, 0.1953, 0.6452, 0.0461]) -Greedy action tensor([ 0.4690, -0.8557, 2.2912, 1.1769]) tensor([0.1055, 0.0280, 0.6524, 0.2141]) -Greedy action tensor([ 1.0559, -1.7056, 0.0468, -0.0788]) tensor([0.5717, 0.0361, 0.2084, 0.1838]) -Greedy action tensor([ 0.4207, -0.7600, -0.1597, -0.1019]) tensor([0.4066, 0.1248, 0.2275, 0.2411]) -Greedy action tensor([-0.4309, -0.1028, 0.7977, 0.1080]) tensor([0.1330, 0.1846, 0.4544, 0.2280]) -Greedy action tensor([ 1.0601, -1.1041, 0.6407, 0.1760]) tensor([0.4576, 0.0526, 0.3008, 0.1890]) -Greedy action tensor([-0.3150, 0.1156, 1.3841, -0.2379]) tensor([0.1100, 0.1693, 0.6018, 0.1189]) -Greedy action tensor([ 0.0704, -0.7688, 1.1968, -1.1023]) tensor([0.2072, 0.0895, 0.6391, 0.0641]) -Greedy action tensor([ 0.7304, 0.3785, -0.6911, -0.9641]) tensor([0.4698, 0.3305, 0.1134, 0.0863]) -Greedy action tensor([-0.7197, -0.2757, 0.0453, -0.5157]) tensor([0.1685, 0.2627, 0.3621, 0.2067]) -Greedy action tensor([ 0.7554, -0.6420, 0.6917, -0.3763]) tensor([0.3987, 0.0986, 0.3741, 0.1286]) -Greedy action tensor([ 0.4692, 0.0942, -0.0028, 0.7996]) tensor([0.2701, 0.1856, 0.1685, 0.3758]) -Greedy action tensor([0.0852, 0.9959, 0.4758, 1.1838]) tensor([0.1256, 0.3122, 0.1856, 0.3767]) -Greedy action tensor([ 0.1904, -0.9489, -0.6531, 0.6968]) tensor([0.2933, 0.0939, 0.1262, 0.4867]) -Greedy action tensor([ 1.0028, -0.3141, 1.5018, 0.0223]) tensor([0.3039, 0.0814, 0.5006, 0.1140]) -Greedy action tensor([ 1.0576, -0.9777, 1.3316, -0.1079]) tensor([0.3626, 0.0474, 0.4769, 0.1131]) -Greedy action tensor([ 0.5211, -1.7331, -0.4494, 0.0214]) tensor([0.4783, 0.0502, 0.1812, 0.2902]) -Greedy action tensor([ 0.1674, -1.0630, -0.3540, 2.1223]) tensor([0.1117, 0.0326, 0.0663, 0.7893]) -Greedy action tensor([ 1.0927, -0.4836, -0.1563, 0.2086]) tensor([0.5245, 0.1084, 0.1504, 0.2167]) -Greedy action tensor([-0.1708, 0.9887, 0.9941, -0.9007]) tensor([0.1270, 0.4048, 0.4070, 0.0612]) -Greedy action tensor([-0.1860, -1.2920, 0.4681, 1.2065]) tensor([0.1374, 0.0455, 0.2642, 0.5529]) -Greedy action tensor([1.0111, 0.2731, 0.6292, 1.4461]) tensor([0.2699, 0.1290, 0.1842, 0.4169]) -Greedy action tensor([-0.1307, -1.0278, -0.3894, -0.0700]) tensor([0.3084, 0.1258, 0.2381, 0.3277]) -Greedy action tensor([ 0.5567, -2.3871, 0.0533, 0.3206]) tensor([0.4087, 0.0215, 0.2470, 0.3227]) -Greedy action tensor([-1.2682, 0.0983, 0.7609, -0.0384]) tensor([0.0627, 0.2459, 0.4770, 0.2145]) -Greedy action tensor([ 2.0498, -0.8207, 1.2481, 0.5945]) tensor([0.5752, 0.0326, 0.2580, 0.1342]) -Greedy action tensor([-0.1854, 1.0501, 0.2077, -0.5665]) tensor([0.1514, 0.5208, 0.2243, 0.1034]) -Greedy action tensor([ 0.4326, -0.2826, -0.4564, -0.1925]) tensor([0.4106, 0.2008, 0.1688, 0.2198]) -Greedy action tensor([ 1.2834, -0.6474, 0.3749, 1.2581]) tensor([0.3963, 0.0575, 0.1598, 0.3864]) -Greedy action tensor([-1.2871, 0.0345, 0.0511, -0.9776]) tensor([0.1008, 0.3778, 0.3841, 0.1373]) -Greedy action tensor([ 0.4254, -1.5918, -0.2298, 0.5011]) tensor([0.3662, 0.0487, 0.1902, 0.3950]) -Greedy action tensor([-0.1710, 0.0961, 0.3388, 0.4191]) tensor([0.1732, 0.2262, 0.2883, 0.3124]) -Greedy action tensor([-0.3997, -0.5627, -1.1717, 0.9197]) tensor([0.1652, 0.1404, 0.0763, 0.6181]) -Greedy action tensor([ 0.9554, -0.3712, -1.3723, 0.1211]) tensor([0.5565, 0.1477, 0.0543, 0.2416]) -Greedy action tensor([ 1.2392, -0.3544, 1.0186, 0.6953]) tensor([0.3868, 0.0786, 0.3102, 0.2245]) -Greedy action tensor([ 0.8291, 0.7221, -0.3445, 0.8023]) tensor([0.3143, 0.2824, 0.0972, 0.3060]) -Greedy action tensor([-0.3693, 0.2983, 1.7497, -0.4898]) tensor([0.0822, 0.1603, 0.6845, 0.0729]) -Greedy action tensor([ 1.3082, 0.3940, -0.7788, 0.5489]) tensor([0.5018, 0.2011, 0.0623, 0.2348]) -Greedy action tensor([0.7919, 0.2552, 0.2047, 0.1068]) tensor([0.3781, 0.2211, 0.2102, 0.1906]) -Greedy action tensor([ 1.2878, -1.3911, -0.3583, 1.8410]) tensor([0.3333, 0.0229, 0.0643, 0.5795]) -Greedy action tensor([-1.1752, 0.3987, -0.6270, -0.1054]) tensor([0.0955, 0.4609, 0.1652, 0.2784]) -Greedy action tensor([ 0.6100, -1.9814, -0.6227, 0.2592]) tensor([0.4830, 0.0362, 0.1408, 0.3401]) -Greedy action tensor([ 1.1818, 0.2440, -0.0061, 0.0240]) tensor([0.4974, 0.1947, 0.1516, 0.1563]) -Greedy action tensor([ 1.1890, -0.5528, 0.0687, 0.9284]) tensor([0.4401, 0.0771, 0.1436, 0.3392]) -Greedy action tensor([ 0.0850, -0.4391, 0.0860, 0.2744]) tensor([0.2630, 0.1557, 0.2633, 0.3179]) -Greedy action tensor([0.7865, 0.4165, 0.7558, 0.5481]) tensor([0.2900, 0.2003, 0.2812, 0.2285]) -Greedy action tensor([ 0.7106, 0.0103, 0.1477, -0.8282]) tensor([0.4385, 0.2177, 0.2497, 0.0941]) -Greedy action tensor([ 0.7723, 0.4724, 0.6496, -1.2009]) tensor([0.3617, 0.2680, 0.3200, 0.0503]) -Greedy action tensor([ 1.1599, -0.0661, 0.4192, 0.1713]) tensor([0.4668, 0.1370, 0.2226, 0.1737]) -Greedy action tensor([ 1.2354, -0.6968, 1.1777, 0.3000]) tensor([0.4030, 0.0584, 0.3804, 0.1582]) -Greedy action tensor([ 0.7686, -0.8194, 0.9529, 1.1629]) tensor([0.2571, 0.0525, 0.3091, 0.3813]) -Greedy action tensor([ 0.2325, 0.0474, 1.0593, -0.5318]) tensor([0.2182, 0.1813, 0.4988, 0.1016]) -Greedy action tensor([ 0.5959, 1.1654, -0.3612, -0.1899]) tensor([0.2772, 0.4900, 0.1065, 0.1263]) -Greedy action tensor([ 1.6669, -1.0809, 0.6370, 1.3058]) tensor([0.4721, 0.0302, 0.1686, 0.3290]) -Greedy action tensor([1.4599, 0.1828, 1.6934, 1.0527]) tensor([0.3118, 0.0869, 0.3938, 0.2075]) -Greedy action tensor([ 0.6107, -1.2265, 0.1827, -0.6818]) tensor([0.4795, 0.0764, 0.3125, 0.1317]) -Greedy action tensor([-0.2470, -0.2537, 0.4130, -0.7053]) tensor([0.2193, 0.2178, 0.4243, 0.1387]) -Greedy action tensor([ 0.6585, -0.6776, 0.9409, 0.1697]) tensor([0.3123, 0.0821, 0.4141, 0.1915]) -Greedy action tensor([ 1.3735, -0.4181, 1.4906, 0.4750]) tensor([0.3706, 0.0618, 0.4167, 0.1509]) -Greedy action tensor([-0.5429, -0.2101, -0.4700, 0.7327]) tensor([0.1418, 0.1978, 0.1525, 0.5078]) -Greedy action tensor([0.2136, 0.0475, 0.1121, 0.5058]) tensor([0.2445, 0.2071, 0.2209, 0.3275]) -Greedy action tensor([ 2.0785, -0.8397, -0.0183, 0.3516]) tensor([0.7382, 0.0399, 0.0907, 0.1313]) -Greedy action tensor([ 0.5081, -0.2296, -0.0678, 0.0578]) tensor([0.3734, 0.1786, 0.2099, 0.2380]) -Greedy action tensor([ 1.1672, -0.6468, -0.3220, 0.3358]) tensor([0.5482, 0.0894, 0.1237, 0.2387]) -Greedy action tensor([ 1.0213, -0.1618, 0.0554, -0.5653]) tensor([0.5287, 0.1619, 0.2012, 0.1082]) -Greedy action tensor([ 0.5166, -0.1734, -0.0536, -0.1405]) tensor([0.3868, 0.1940, 0.2187, 0.2005]) -Greedy action tensor([ 0.5894, -0.4744, -0.3025, 0.2793]) tensor([0.4019, 0.1387, 0.1647, 0.2947]) -Greedy action tensor([ 1.7454, -0.6391, -0.1439, 0.5178]) tensor([0.6509, 0.0600, 0.0984, 0.1907]) -Greedy action tensor([ 0.8960, -0.5712, -0.2312, 0.3336]) tensor([0.4707, 0.1085, 0.1525, 0.2682]) -Greedy action tensor([ 1.4421, -0.5472, -0.3155, 0.2323]) tensor([0.6221, 0.0851, 0.1073, 0.1855]) -Greedy action tensor([ 2.2851, -0.8283, -0.5578, 1.0055]) tensor([0.7242, 0.0322, 0.0422, 0.2014]) -Greedy action tensor([ 1.1907, -0.2142, -0.1808, 0.0473]) tensor([0.5501, 0.1350, 0.1396, 0.1753]) -Greedy action tensor([ 2.1048, -0.8082, -0.3325, 1.0231]) tensor([0.6754, 0.0367, 0.0590, 0.2289]) -Greedy action tensor([ 0.6523, -0.0341, 0.0916, 0.3364]) tensor([0.3567, 0.1796, 0.2036, 0.2601]) -Greedy action tensor([ 1.1966, -0.4896, -0.4407, 0.7282]) tensor([0.4986, 0.0923, 0.0970, 0.3121]) -Greedy action tensor([ 0.7367, -0.4735, 0.0917, 0.0973]) tensor([0.4255, 0.1268, 0.2232, 0.2245]) -Greedy action tensor([ 0.9919, -0.0678, -0.4690, -0.2484]) tensor([0.5354, 0.1855, 0.1242, 0.1549]) -Greedy action tensor([ 0.7868, -0.1921, -0.2564, 0.2639]) tensor([0.4309, 0.1619, 0.1518, 0.2554]) -Greedy action tensor([ 1.4517, -0.8768, -0.1363, 0.1278]) tensor([0.6378, 0.0621, 0.1303, 0.1697]) -Greedy action tensor([ 0.2585, -0.2029, 0.0138, 0.1520]) tensor([0.3019, 0.1903, 0.2364, 0.2714]) -Greedy action tensor([ 0.8908, -0.2221, -0.7114, 0.3579]) tensor([0.4724, 0.1552, 0.0952, 0.2772]) -Greedy action tensor([ 1.5963, -0.4541, -0.3725, 0.4712]) tensor([0.6278, 0.0808, 0.0877, 0.2038]) -Greedy action tensor([ 0.9806, -0.5619, -0.3226, 0.0342]) tensor([0.5337, 0.1141, 0.1450, 0.2072]) -Greedy action tensor([ 0.5700, -0.3582, -0.2508, 0.3034]) tensor([0.3844, 0.1519, 0.1692, 0.2945]) -Greedy action tensor([ 1.8187, -0.0094, -0.5721, 0.1151]) tensor([0.6972, 0.1121, 0.0638, 0.1269]) -Greedy action tensor([ 1.6948, -0.9615, -0.2052, 0.4900]) tensor([0.6581, 0.0462, 0.0984, 0.1973]) -Greedy action tensor([ 1.0503, -0.2644, -0.3173, -0.1283]) tensor([0.5462, 0.1467, 0.1391, 0.1681]) -Greedy action tensor([ 1.1559, -0.2303, -0.1167, 0.3641]) tensor([0.5042, 0.1261, 0.1412, 0.2284]) -Greedy action tensor([ 1.1925, -0.1784, -0.3757, -0.1478]) tensor([0.5800, 0.1473, 0.1209, 0.1518]) -Greedy action tensor([ 1.4742, -0.2022, -0.2836, 0.4012]) tensor([0.5877, 0.1099, 0.1013, 0.2010]) -Greedy action tensor([ 1.5554, -0.1217, -0.5023, 0.2085]) tensor([0.6350, 0.1187, 0.0811, 0.1651]) -Greedy action tensor([ 0.8363, -0.8549, -0.2657, 0.2847]) tensor([0.4779, 0.0881, 0.1588, 0.2753]) -Greedy action tensor([ 1.2038, -0.6492, -0.1895, 0.3798]) tensor([0.5424, 0.0850, 0.1347, 0.2379]) -Greedy action tensor([ 0.4213, -0.3178, 0.0515, -0.0947]) tensor([0.3616, 0.1727, 0.2498, 0.2158]) -Greedy action tensor([ 0.5635, -0.0642, -0.1635, 0.0775]) tensor([0.3799, 0.2028, 0.1836, 0.2337]) -Greedy action tensor([ 0.9030, -0.1726, 0.1491, 0.0965]) tensor([0.4429, 0.1511, 0.2084, 0.1977]) -Greedy action tensor([ 1.0914, -0.7901, -0.2588, 0.3225]) tensor([0.5333, 0.0813, 0.1382, 0.2472]) -Greedy action tensor([ 1.0845, 0.0202, -0.3863, -0.2511]) tensor([0.5442, 0.1877, 0.1250, 0.1431]) -Greedy action tensor([ 1.3721, -0.3393, -0.3361, 0.2326]) tensor([0.5946, 0.1074, 0.1077, 0.1903]) -Greedy action tensor([ 1.1895, -0.1294, 0.0167, 0.1472]) tensor([0.5182, 0.1386, 0.1604, 0.1828]) -Greedy action tensor([ 1.4031, -0.0337, -0.3426, 0.3167]) tensor([0.5715, 0.1359, 0.0998, 0.1929]) -Greedy action tensor([ 1.2328, -0.5170, -0.5373, 0.3421]) tensor([0.5700, 0.0991, 0.0971, 0.2339]) -Greedy action tensor([ 1.1014, -0.5483, -0.6112, 1.1420]) tensor([0.4143, 0.0796, 0.0747, 0.4314]) -Greedy action tensor([ 0.8637, -0.2591, -0.3557, 0.0424]) tensor([0.4853, 0.1579, 0.1434, 0.2135]) -Greedy action tensor([ 1.0068, -0.0888, -0.1875, 0.1213]) tensor([0.4878, 0.1631, 0.1478, 0.2013]) -Greedy action tensor([ 1.1772, -0.2895, -0.0725, -0.1630]) tensor([0.5621, 0.1297, 0.1611, 0.1471]) -Greedy action tensor([ 1.1889, -0.3170, -0.1538, 0.3992]) tensor([0.5163, 0.1145, 0.1348, 0.2344]) -Greedy action tensor([ 1.2510, -0.5265, -0.4620, 0.0411]) tensor([0.6069, 0.1026, 0.1094, 0.1810]) -Greedy action tensor([ 1.0607, -0.1900, -0.6729, -0.1123]) tensor([0.5642, 0.1615, 0.0997, 0.1746]) -Greedy action tensor([ 0.6131, -0.3545, -0.0140, 0.0144]) tensor([0.4059, 0.1542, 0.2168, 0.2230]) -Greedy action tensor([ 1.0116, -0.4336, -0.4549, 0.8109]) tensor([0.4377, 0.1032, 0.1010, 0.3581]) -Greedy action tensor([ 1.3571, -0.4875, -0.3375, 0.4567]) tensor([0.5720, 0.0904, 0.1051, 0.2325]) -Greedy action tensor([ 1.3761, -0.3583, -0.1344, 0.3559]) tensor([0.5689, 0.1004, 0.1256, 0.2051]) -Greedy action tensor([ 1.7336, -0.3115, -0.1949, 0.4238]) tensor([0.6474, 0.0838, 0.0941, 0.1747]) -Greedy action tensor([ 0.9338, 0.0754, 0.1360, -0.3307]) tensor([0.4637, 0.1965, 0.2088, 0.1309]) -Greedy action tensor([ 1.8721, -0.6755, -0.2140, 0.2143]) tensor([0.7179, 0.0562, 0.0891, 0.1368]) -Greedy action tensor([ 0.8763, -0.4064, -0.4654, 0.8080]) tensor([0.4044, 0.1121, 0.1057, 0.3777]) -Greedy action tensor([ 1.4987, -0.7398, -0.2855, 0.2237]) tensor([0.6435, 0.0686, 0.1081, 0.1798]) -Greedy action tensor([ 1.3058, 0.0497, -0.1786, 0.2202]) tensor([0.5408, 0.1540, 0.1226, 0.1826]) -Greedy action tensor([ 1.2302, -0.4125, -0.2796, 0.7456]) tensor([0.4925, 0.0953, 0.1088, 0.3034]) -Greedy action tensor([ 1.2455, -0.8435, -0.2175, 0.2444]) tensor([0.5804, 0.0719, 0.1344, 0.2133]) -Greedy action tensor([ 0.6973, -0.2887, 0.0522, -0.1290]) tensor([0.4282, 0.1597, 0.2246, 0.1874]) -Greedy action tensor([ 0.9925, -0.2899, -0.3977, 0.4823]) tensor([0.4702, 0.1304, 0.1171, 0.2823]) -Greedy action tensor([ 1.1711, -0.1939, -0.2874, 0.1505]) tensor([0.5410, 0.1382, 0.1258, 0.1950]) -Greedy action tensor([ 1.5031, 0.0784, -0.1714, 0.2218]) tensor([0.5863, 0.1411, 0.1099, 0.1628]) -Greedy action tensor([ 1.6909, -0.7344, -0.3689, 0.2904]) tensor([0.6838, 0.0605, 0.0872, 0.1685]) -Greedy action tensor([ 1.5591, -0.7941, -0.1860, 0.5739]) tensor([0.6086, 0.0579, 0.1063, 0.2272]) -Greedy action tensor([ 0.9670, -0.4094, -0.3315, 0.5570]) tensor([0.4568, 0.1153, 0.1247, 0.3032]) -Greedy action tensor([ 0.9819, -0.2751, 0.0551, -0.1257]) tensor([0.4974, 0.1415, 0.1968, 0.1643]) -Greedy action tensor([ 1.6210, -0.5935, -0.2043, 0.2732]) tensor([0.6535, 0.0714, 0.1053, 0.1698]) -Greedy action tensor([ 1.2723, -0.3705, 0.0623, -0.1446]) tensor([0.5767, 0.1116, 0.1720, 0.1398]) -Greedy action tensor([ 2.3396, -0.2254, -0.6446, 0.1098]) tensor([0.8097, 0.0623, 0.0410, 0.0871]) -Greedy action tensor([ 1.3702, -0.3804, -0.2676, 0.2666]) tensor([0.5883, 0.1022, 0.1144, 0.1951]) -Greedy action tensor([ 1.9381, -0.9994, -0.5448, 0.4654]) tensor([0.7322, 0.0388, 0.0611, 0.1679]) -Greedy action tensor([ 1.5836, -0.5539, -0.2675, 0.4965]) tensor([0.6203, 0.0732, 0.0974, 0.2092]) -Greedy action tensor([ 1.5001, -0.4439, -0.3383, -0.2594]) tensor([0.6783, 0.0971, 0.1079, 0.1168]) -Greedy action tensor([ 1.0057, -0.5944, -0.1446, 0.0756]) tensor([0.5227, 0.1055, 0.1655, 0.2062]) -Greedy action tensor([ 1.0382, -0.1443, -0.4908, 0.1339]) tensor([0.5187, 0.1590, 0.1124, 0.2100]) -Greedy action tensor([ 1.7716, -0.4581, -0.4843, 0.4047]) tensor([0.6816, 0.0733, 0.0714, 0.1737]) -Greedy action tensor([ 1.1141, -0.1438, -0.3355, -0.1125]) tensor([0.5518, 0.1569, 0.1295, 0.1618]) -Greedy action tensor([ 0.9011, -0.2147, -0.2025, 0.2270]) tensor([0.4611, 0.1511, 0.1529, 0.2350]) -Greedy action tensor([ 1.8317, -0.6855, -0.3479, 0.7333]) tensor([0.6548, 0.0528, 0.0740, 0.2183]) -Greedy action tensor([-1.8048, -0.3934, 0.5888, -0.0941]) tensor([0.0463, 0.1900, 0.5074, 0.2563]) -Greedy action tensor([-1.6903, -0.5949, 0.5369, -0.0660]) tensor([0.0545, 0.1631, 0.5057, 0.2767]) -Greedy action tensor([-1.8483, -0.4472, 0.6180, -0.1146]) tensor([0.0444, 0.1804, 0.5235, 0.2516]) -Greedy action tensor([-1.7972, -0.8406, -0.3045, -0.5638]) tensor([0.0871, 0.2266, 0.3874, 0.2989]) -Greedy action tensor([-0.9090, -0.5397, 0.4099, -0.2852]) tensor([0.1242, 0.1797, 0.4644, 0.2317]) -Greedy action tensor([-1.6192, -0.5169, 0.4850, 0.0400]) tensor([0.0573, 0.1724, 0.4695, 0.3009]) -Greedy action tensor([-1.4923, -0.2530, 0.5294, 0.4397]) tensor([0.0529, 0.1826, 0.3994, 0.3651]) -Greedy action tensor([-1.8422, -0.6574, 1.3608, 0.7773]) tensor([0.0235, 0.0768, 0.5776, 0.3222]) -Greedy action tensor([-1.7903, -0.0124, 0.5352, -0.1307]) tensor([0.0446, 0.2641, 0.4566, 0.2346]) -Greedy action tensor([-0.8903, -0.0086, 0.3761, -0.4472]) tensor([0.1174, 0.2834, 0.4164, 0.1828]) -Greedy action tensor([-1.8349, -0.4281, 0.6044, -0.1219]) tensor([0.0453, 0.1848, 0.5189, 0.2510]) -Greedy action tensor([-1.1600, -0.5515, 0.8302, 1.0971]) tensor([0.0507, 0.0932, 0.3712, 0.4848]) -Greedy action tensor([-1.1414, -0.5910, 0.3551, -0.0382]) tensor([0.0979, 0.1698, 0.4372, 0.2951]) -Greedy action tensor([-1.6987, -0.2036, 0.5016, -0.0400]) tensor([0.0507, 0.2259, 0.4573, 0.2661]) -Greedy action tensor([-1.4635, -0.5479, 0.8789, 0.6653]) tensor([0.0448, 0.1120, 0.4664, 0.3767]) -Greedy action tensor([-1.8917, -0.3144, 0.6241, -0.1434]) tensor([0.0417, 0.2020, 0.5165, 0.2397]) -Greedy action tensor([-1.5925, 0.4984, 0.2446, -0.0538]) tensor([0.0499, 0.4040, 0.3135, 0.2326]) -Greedy action tensor([-1.6934, -0.5103, 0.5461, 0.0250]) tensor([0.0520, 0.1698, 0.4883, 0.2899]) -Greedy action tensor([-1.3390, -0.6134, 0.6648, 0.7512]) tensor([0.0538, 0.1113, 0.3994, 0.4355]) -Greedy action tensor([-1.1213, -0.6307, 0.2648, 0.3266]) tensor([0.0919, 0.1500, 0.3674, 0.3908]) -Greedy action tensor([-1.7856, -0.3712, 1.0744, 0.7816]) tensor([0.0281, 0.1155, 0.4904, 0.3659]) -Greedy action tensor([-0.9124, -0.5095, 0.4499, 0.7434]) tensor([0.0859, 0.1286, 0.3355, 0.4500]) -Greedy action tensor([-1.0412, -0.5202, 0.3335, 0.5851]) tensor([0.0853, 0.1436, 0.3373, 0.4338]) -Greedy action tensor([-1.2063, 0.1084, 0.6211, -0.6938]) tensor([0.0793, 0.2953, 0.4931, 0.1324]) -Greedy action tensor([-1.3171, -0.6611, 1.3439, 1.3135]) tensor([0.0321, 0.0619, 0.4599, 0.4461]) -Greedy action tensor([-1.8783, -0.3297, 0.6120, -0.1437]) tensor([0.0427, 0.2008, 0.5148, 0.2418]) -Greedy action tensor([-2.0556, -0.8980, 0.9561, 0.1246]) tensor([0.0300, 0.0954, 0.6093, 0.2653]) -Greedy action tensor([-1.2923, -0.5390, 0.5265, -0.1319]) tensor([0.0801, 0.1702, 0.4940, 0.2557]) -Greedy action tensor([-1.8425, -0.4695, 0.6091, -0.1581]) tensor([0.0456, 0.1799, 0.5289, 0.2456]) -Greedy action tensor([-1.7991, -0.4848, 0.5902, -0.0592]) tensor([0.0469, 0.1745, 0.5114, 0.2671]) -Greedy action tensor([-0.4786, -0.5327, 0.1872, 0.1706]) tensor([0.1722, 0.1631, 0.3351, 0.3296]) -Greedy action tensor([-1.4845, -0.5829, 0.4196, 0.0929]) tensor([0.0666, 0.1640, 0.4470, 0.3224]) -Greedy action tensor([-1.9151, -0.3862, 0.6386, -0.1679]) tensor([0.0413, 0.1906, 0.5311, 0.2371]) -Greedy action tensor([-1.7007, -0.7077, -0.0443, -0.5413]) tensor([0.0825, 0.2226, 0.4321, 0.2629]) -Greedy action tensor([-1.5095, -0.5015, 0.4122, 0.1262]) tensor([0.0637, 0.1745, 0.4350, 0.3268]) -Greedy action tensor([-1.5218, -0.5147, 0.9154, 0.5494]) tensor([0.0433, 0.1184, 0.4950, 0.3433]) -Greedy action tensor([-1.1825, -0.5759, 0.5584, 0.7823]) tensor([0.0638, 0.1171, 0.3639, 0.4552]) -Greedy action tensor([-1.7860, -0.3986, 0.5736, -0.0915]) tensor([0.0475, 0.1904, 0.5033, 0.2588]) -Greedy action tensor([-1.1748, -0.4886, 0.3139, 0.6296]) tensor([0.0741, 0.1472, 0.3284, 0.4503]) -Greedy action tensor([-1.9167, -0.4369, 0.6506, -0.1689]) tensor([0.0414, 0.1818, 0.5392, 0.2376]) -Greedy action tensor([-0.7403, -0.5823, 0.1306, 0.3754]) tensor([0.1314, 0.1539, 0.3138, 0.4009]) -Greedy action tensor([-1.2284, -0.3983, 0.5397, 0.9241]) tensor([0.0563, 0.1291, 0.3300, 0.4846]) -Greedy action tensor([-1.8152, -0.4525, 0.6049, -0.1450]) tensor([0.0466, 0.1820, 0.5239, 0.2475]) -Greedy action tensor([-1.7399, -0.4393, 0.6202, 0.1288]) tensor([0.0460, 0.1688, 0.4872, 0.2980]) -Greedy action tensor([-0.5031, -0.4678, 0.1839, 0.0986]) tensor([0.1710, 0.1771, 0.3399, 0.3121]) -Greedy action tensor([-1.8400, -0.4459, 0.6340, -0.0954]) tensor([0.0442, 0.1782, 0.5247, 0.2530]) -Greedy action tensor([-0.5900, -0.3863, 1.1795, 1.5703]) tensor([0.0596, 0.0731, 0.3499, 0.5173]) -Greedy action tensor([-0.9084, -0.6241, 0.4946, -0.2401]) tensor([0.1198, 0.1592, 0.4873, 0.2337]) -Greedy action tensor([-1.8700, -0.4470, 0.6357, -0.1232]) tensor([0.0432, 0.1793, 0.5295, 0.2479]) -Greedy action tensor([-1.9258, -0.6695, 0.2664, -0.2622]) tensor([0.0533, 0.1874, 0.4777, 0.2816]) -Greedy action tensor([-1.4770, -0.5659, 0.5173, 0.2352]) tensor([0.0611, 0.1519, 0.4487, 0.3384]) -Greedy action tensor([-1.1749, -0.2974, 0.5346, 0.7849]) tensor([0.0624, 0.1500, 0.3448, 0.4428]) -Greedy action tensor([-1.5299, -0.5769, 0.4458, 0.0689]) tensor([0.0635, 0.1646, 0.4578, 0.3141]) -Greedy action tensor([-1.8696, -0.4413, 0.6277, -0.1429]) tensor([0.0436, 0.1818, 0.5295, 0.2451]) -Greedy action tensor([-1.8716, -0.4431, 0.6306, -0.1290]) tensor([0.0433, 0.1807, 0.5287, 0.2474]) -Greedy action tensor([-0.6611, 0.5783, 0.3755, 1.3049]) tensor([0.0694, 0.2396, 0.1956, 0.4955]) -Greedy action tensor([-1.8744, -0.4393, 0.6443, -0.1237]) tensor([0.0428, 0.1797, 0.5311, 0.2464]) -Greedy action tensor([-1.6868, -0.5066, 0.5303, -0.0075]) tensor([0.0532, 0.1732, 0.4884, 0.2852]) -Greedy action tensor([-1.1100, -0.6837, 0.4987, 0.3772]) tensor([0.0837, 0.1281, 0.4180, 0.3702]) -Greedy action tensor([-1.9235, -0.5696, 1.1547, 0.2056]) tensor([0.0286, 0.1106, 0.6206, 0.2402]) -Greedy action tensor([-1.5769, -0.1762, 0.7669, 0.4553]) tensor([0.0433, 0.1756, 0.4509, 0.3302]) -Greedy action tensor([-1.7470, -0.0313, 0.5050, -0.0430]) tensor([0.0464, 0.2579, 0.4409, 0.2549]) -Greedy action tensor([-0.8746, -0.7945, 0.7495, 0.3624]) tensor([0.0943, 0.1022, 0.4786, 0.3249]) -Greedy action tensor([-0.2047, 0.3269, 0.9084, 1.6700]) tensor([0.0815, 0.1387, 0.2482, 0.5315]) -Greedy action tensor([-1.8469, -0.3925, 0.6263, -0.0843]) tensor([0.0435, 0.1864, 0.5163, 0.2537]) -Greedy action tensor([-0.5927, 0.9151, -0.0152, 0.2332]) tensor([0.1044, 0.4714, 0.1859, 0.2383]) -Greedy action tensor([-0.7540, -0.1012, 0.8316, 1.4213]) tensor([0.0602, 0.1157, 0.2940, 0.5302]) -Greedy action tensor([-1.6146, -0.6043, 0.5325, -0.0179]) tensor([0.0580, 0.1593, 0.4964, 0.2863]) -Greedy action tensor([-1.7902, -0.4735, 0.5898, -0.0762]) tensor([0.0474, 0.1769, 0.5124, 0.2632]) -Greedy action tensor([-0.0741, 0.3297, 0.9291, 1.3895]) tensor([0.1048, 0.1569, 0.2857, 0.4527]) -Greedy action tensor([-2.0239, -0.8509, 0.4463, -0.0626]) tensor([0.0432, 0.1395, 0.5104, 0.3069]) -Greedy action tensor([-1.8855, -0.4530, 0.6359, -0.1431]) tensor([0.0428, 0.1794, 0.5331, 0.2446]) -Greedy action tensor([-1.8636, -0.4199, 0.6827, -0.1080]) tensor([0.0420, 0.1781, 0.5365, 0.2433]) -Greedy action tensor([-1.7841, -0.4208, 0.6884, 0.0264]) tensor([0.0437, 0.1709, 0.5181, 0.2673]) -Greedy action tensor([-0.7307, -0.5662, 0.1928, 0.2919]) tensor([0.1337, 0.1577, 0.3368, 0.3718]) -Greedy action tensor([-1.3783, -0.3968, 0.7666, -0.7035]) tensor([0.0706, 0.1883, 0.6026, 0.1385]) -Greedy action tensor([-1.9311, -0.4465, 0.6585, -0.1745]) tensor([0.0408, 0.1799, 0.5432, 0.2361]) -Greedy action tensor([-0.5332, -0.4972, 1.0730, 1.6423]) tensor([0.0632, 0.0655, 0.3149, 0.5564]) -Greedy action tensor([-1.7666, -0.0584, 0.5196, -0.0877]) tensor([0.0460, 0.2542, 0.4530, 0.2468]) -Greedy action tensor([-1.7012, -0.7581, 0.4995, -0.1491]) tensor([0.0577, 0.1483, 0.5214, 0.2726]) -Greedy action tensor([-1.7578, -0.2889, 0.6780, 0.1392]) tensor([0.0427, 0.1854, 0.4875, 0.2844]) -Greedy action tensor([ 0.6868, -0.1429, -0.0338, -0.1017]) tensor([0.4207, 0.1835, 0.2046, 0.1912]) -Greedy action tensor([ 0.7652, -0.3035, -0.0178, -0.2401]) tensor([0.4616, 0.1585, 0.2110, 0.1689]) -Greedy action tensor([ 0.2894, -0.2338, -0.0912, -0.3094]) tensor([0.3539, 0.2097, 0.2419, 0.1945]) -Greedy action tensor([ 0.5268, 0.1088, -0.1765, -0.3308]) tensor([0.3880, 0.2554, 0.1920, 0.1646]) -Greedy action tensor([ 0.2526, 0.0521, -0.0417, -0.0970]) tensor([0.3060, 0.2504, 0.2280, 0.2157]) -Greedy action tensor([ 0.4700, -0.2407, -0.0501, -0.2273]) tensor([0.3870, 0.1902, 0.2301, 0.1927]) -Greedy action tensor([ 0.5928, 0.2765, -0.2167, -0.1053]) tensor([0.3743, 0.2728, 0.1666, 0.1862]) -Greedy action tensor([ 0.7366, -0.3708, -0.0952, -0.4576]) tensor([0.4834, 0.1597, 0.2104, 0.1464]) -Greedy action tensor([ 0.8914, -0.7537, -0.0658, -0.4662]) tensor([0.5452, 0.1052, 0.2093, 0.1403]) -Greedy action tensor([ 0.7045, -0.5668, -0.1614, -0.4233]) tensor([0.4938, 0.1385, 0.2078, 0.1599]) -Greedy action tensor([ 0.8473, -0.6213, -0.0135, -0.5767]) tensor([0.5280, 0.1216, 0.2233, 0.1271]) -Greedy action tensor([ 0.5150, -0.3641, 0.0458, -0.3857]) tensor([0.4087, 0.1697, 0.2556, 0.1660]) -Greedy action tensor([ 1.1768, -1.0162, -0.1565, -0.7978]) tensor([0.6605, 0.0737, 0.1741, 0.0917]) -Greedy action tensor([ 0.7492, -0.4302, 0.0584, -0.3402]) tensor([0.4662, 0.1433, 0.2336, 0.1568]) -Greedy action tensor([ 0.6416, -0.2883, -0.0319, -0.3164]) tensor([0.4370, 0.1725, 0.2228, 0.1677]) -Greedy action tensor([ 0.4936, -0.5344, 0.0499, -0.6232]) tensor([0.4298, 0.1537, 0.2758, 0.1407]) -Greedy action tensor([ 0.9068, -0.5603, -0.1266, -0.3561]) tensor([0.5350, 0.1234, 0.1903, 0.1513]) -Greedy action tensor([ 0.9312, -0.6207, -0.0292, -0.5094]) tensor([0.5460, 0.1157, 0.2090, 0.1293]) -Greedy action tensor([ 0.2558, 0.0587, -0.0210, -0.0687]) tensor([0.3028, 0.2486, 0.2296, 0.2189]) -Greedy action tensor([ 0.4937, -0.4217, -0.0805, -0.4273]) tensor([0.4234, 0.1695, 0.2385, 0.1686]) -Greedy action tensor([ 0.5948, 0.0903, -0.1461, -0.2707]) tensor([0.3998, 0.2414, 0.1906, 0.1683]) -Greedy action tensor([ 1.1345, -0.6727, 0.0084, -0.9985]) tensor([0.6223, 0.1021, 0.2018, 0.0737]) -Greedy action tensor([ 0.3630, 0.3112, -0.0146, -0.0549]) tensor([0.3037, 0.2883, 0.2081, 0.1999]) -Greedy action tensor([ 0.9809, -0.2811, -0.1591, -0.2684]) tensor([0.5292, 0.1498, 0.1692, 0.1517]) -Greedy action tensor([ 0.7096, -0.4054, -0.0068, -0.4742]) tensor([0.4711, 0.1545, 0.2301, 0.1442]) -Greedy action tensor([ 0.4421, -0.1403, -0.0763, -0.1679]) tensor([0.3707, 0.2071, 0.2208, 0.2014]) -Greedy action tensor([ 0.2932, 0.1806, 0.0474, -0.2054]) tensor([0.3046, 0.2722, 0.2382, 0.1850]) -Greedy action tensor([ 0.8519, -0.7163, -0.3166, -0.9031]) tensor([0.5910, 0.1232, 0.1837, 0.1022]) -Greedy action tensor([ 0.6529, -0.2753, -0.0450, -0.2054]) tensor([0.4316, 0.1706, 0.2148, 0.1830]) -Greedy action tensor([ 0.2656, -0.0830, 0.1202, -0.2828]) tensor([0.3176, 0.2242, 0.2746, 0.1836]) -Greedy action tensor([ 0.7138, -0.3792, -0.0067, -0.4559]) tensor([0.4690, 0.1572, 0.2282, 0.1456]) -Greedy action tensor([ 0.8562, -0.4381, -0.0710, -0.2950]) tensor([0.5035, 0.1380, 0.1992, 0.1592]) -Greedy action tensor([ 0.6413, -0.4791, -0.1580, -0.4698]) tensor([0.4751, 0.1549, 0.2136, 0.1564]) -Greedy action tensor([ 0.8384, -0.5318, -0.0428, -0.3601]) tensor([0.5076, 0.1290, 0.2103, 0.1531]) -Greedy action tensor([ 0.6891, -0.8922, -0.1056, -0.2568]) tensor([0.4888, 0.1006, 0.2208, 0.1898]) -Greedy action tensor([ 0.7607, -0.4789, -0.0401, -0.3168]) tensor([0.4810, 0.1393, 0.2160, 0.1638]) -Greedy action tensor([ 0.8989, -0.5688, 0.0653, -0.2245]) tensor([0.5025, 0.1158, 0.2183, 0.1634]) -Greedy action tensor([ 0.7606, -0.4963, -0.0009, -0.4213]) tensor([0.4859, 0.1382, 0.2269, 0.1490]) -Greedy action tensor([ 0.4944, 0.0097, -0.1657, -0.2210]) tensor([0.3814, 0.2349, 0.1971, 0.1865]) -Greedy action tensor([ 0.2704, -0.4052, -0.1037, -0.1000]) tensor([0.3463, 0.1762, 0.2383, 0.2391]) -Greedy action tensor([ 0.5514, -0.1413, -0.0243, -0.4621]) tensor([0.4123, 0.2062, 0.2318, 0.1496]) -Greedy action tensor([ 0.4495, 0.0808, 0.0305, -0.1851]) tensor([0.3473, 0.2402, 0.2284, 0.1841]) -Greedy action tensor([ 0.2644, 0.0585, -0.0799, -0.1703]) tensor([0.3155, 0.2567, 0.2236, 0.2042]) -Greedy action tensor([ 0.8285, -0.3418, 0.0981, -0.5272]) tensor([0.4879, 0.1514, 0.2350, 0.1258]) -Greedy action tensor([ 1.0554, -1.1396, 0.1062, -0.5870]) tensor([0.5910, 0.0658, 0.2288, 0.1144]) -Greedy action tensor([ 0.2566, 0.3602, -0.1049, -0.2500]) tensor([0.2934, 0.3254, 0.2044, 0.1768]) -Greedy action tensor([ 0.8284, -0.5206, 0.0472, -0.3660]) tensor([0.4950, 0.1284, 0.2266, 0.1499]) -Greedy action tensor([ 0.8665, -0.3851, 0.1301, -0.1119]) tensor([0.4671, 0.1336, 0.2237, 0.1756]) -Greedy action tensor([ 0.6287, 0.0233, 0.1323, -0.4306]) tensor([0.3998, 0.2182, 0.2434, 0.1386]) -Greedy action tensor([ 0.4157, -0.3577, -0.0463, -0.3556]) tensor([0.3916, 0.1807, 0.2467, 0.1811]) -Greedy action tensor([ 0.3314, -0.0530, 0.0082, -0.2015]) tensor([0.3343, 0.2276, 0.2420, 0.1962]) -Greedy action tensor([ 0.7358, -0.7235, -0.0232, -0.7265]) tensor([0.5175, 0.1203, 0.2423, 0.1199]) -Greedy action tensor([ 0.4516, -0.0178, 0.0421, -0.2369]) tensor([0.3582, 0.2240, 0.2378, 0.1799]) -Greedy action tensor([ 0.4713, -0.2390, -0.0711, -0.2252]) tensor([0.3889, 0.1912, 0.2261, 0.1938]) -Greedy action tensor([ 0.8619, -0.9640, 0.0496, -0.4207]) tensor([0.5313, 0.0856, 0.2358, 0.1473]) -Greedy action tensor([ 0.7822, -0.2657, -0.0224, -0.0575]) tensor([0.4485, 0.1573, 0.2006, 0.1937]) -Greedy action tensor([ 0.9005, -0.4645, -0.1142, -0.6219]) tensor([0.5446, 0.1391, 0.1974, 0.1188]) -Greedy action tensor([ 0.4408, -0.1892, -0.0969, -0.2541]) tensor([0.3823, 0.2036, 0.2233, 0.1908]) -Greedy action tensor([ 0.3876, -0.1849, -0.0593, -0.1832]) tensor([0.3612, 0.2037, 0.2310, 0.2041]) -Greedy action tensor([ 0.7213, -0.4119, -0.1041, -0.2501]) tensor([0.4676, 0.1506, 0.2048, 0.1770]) -Greedy action tensor([ 0.8392, -0.3608, -0.1025, -0.3552]) tensor([0.5015, 0.1511, 0.1956, 0.1519]) -Greedy action tensor([ 1.0139, -0.5889, -0.1310, -0.4831]) tensor([0.5736, 0.1155, 0.1826, 0.1284]) -Greedy action tensor([ 0.5761, 0.3341, -0.2440, -0.0752]) tensor([0.3641, 0.2858, 0.1603, 0.1898]) -Greedy action tensor([ 0.5693, -0.2841, 0.0242, -0.3801]) tensor([0.4179, 0.1780, 0.2423, 0.1617]) -Greedy action tensor([ 1.0276, -0.8006, 0.0214, -0.3868]) tensor([0.5652, 0.0908, 0.2066, 0.1374]) -Greedy action tensor([ 0.9360, -0.4348, 0.0419, -0.2319]) tensor([0.5066, 0.1286, 0.2072, 0.1576]) -Greedy action tensor([ 0.5717, -0.2518, 0.0526, -0.2830]) tensor([0.4066, 0.1784, 0.2420, 0.1730]) -Greedy action tensor([ 0.8586, -0.2151, 0.0767, -0.4528]) tensor([0.4834, 0.1652, 0.2212, 0.1302]) -Greedy action tensor([ 1.0722, -1.3800, 0.0451, -0.7262]) tensor([0.6212, 0.0535, 0.2224, 0.1029]) -Greedy action tensor([ 1.0159, -0.9041, -0.0671, -0.3436]) tensor([0.5741, 0.0842, 0.1944, 0.1474]) -Greedy action tensor([ 0.8589, -0.6338, -0.1740, -0.4932]) tensor([0.5436, 0.1222, 0.1935, 0.1406]) -Greedy action tensor([ 0.9089, -0.3023, 0.0612, -0.2478]) tensor([0.4900, 0.1460, 0.2099, 0.1541]) -Greedy action tensor([ 0.8716, -0.7704, -0.1658, -0.5786]) tensor([0.5610, 0.1086, 0.1988, 0.1316]) -Greedy action tensor([ 0.8452, -0.5095, -0.0442, -0.3575]) tensor([0.5078, 0.1310, 0.2087, 0.1525]) -Greedy action tensor([ 0.3018, -0.1183, 0.0019, -0.3115]) tensor([0.3402, 0.2235, 0.2521, 0.1842]) -Greedy action tensor([ 0.4708, -0.1632, -0.0941, -0.1798]) tensor([0.3816, 0.2024, 0.2169, 0.1991]) -Greedy action tensor([ 0.4707, -0.5653, -0.0673, -0.1648]) tensor([0.4051, 0.1438, 0.2366, 0.2146]) -Greedy action tensor([ 0.8816, -0.6585, -0.0973, -0.5620]) tensor([0.5476, 0.1174, 0.2057, 0.1293]) -Greedy action tensor([ 0.8961, -0.5252, -0.0828, -0.4069]) tensor([0.5294, 0.1278, 0.1989, 0.1438]) -Greedy action tensor([-0.0418, -0.0456, 0.0537, -0.1506]) tensor([0.2504, 0.2495, 0.2755, 0.2246]) -Greedy action tensor([ 0.4553, -0.4289, -0.0219, -0.6175]) tensor([0.4210, 0.1739, 0.2612, 0.1440]) -Greedy action tensor([ 1.1015, -0.7771, 0.8643, 0.6798]) tensor([0.3850, 0.0588, 0.3037, 0.2525]) -Greedy action tensor([-0.9215, -0.5542, -0.8921, 0.4004]) tensor([0.1384, 0.1999, 0.1426, 0.5192]) -Greedy action tensor([ 0.5668, -1.2531, 0.6351, 0.4182]) tensor([0.3231, 0.0524, 0.3460, 0.2785]) -Greedy action tensor([1.6552, 0.1218, 0.6021, 0.5844]) tensor([0.5243, 0.1131, 0.1829, 0.1797]) -Greedy action tensor([ 1.1976, -0.1541, -0.0081, 0.8930]) tensor([0.4356, 0.1127, 0.1305, 0.3212]) -Greedy action tensor([0.8285, 0.8920, 0.5244, 0.0090]) tensor([0.3083, 0.3285, 0.2274, 0.1358]) -Greedy action tensor([-0.0154, 0.8853, 1.0833, -0.1049]) tensor([0.1356, 0.3337, 0.4068, 0.1240]) -Greedy action tensor([-0.7398, -0.8684, -0.9067, 1.2446]) tensor([0.1000, 0.0879, 0.0846, 0.7275]) -Greedy action tensor([-0.1438, -0.6482, -0.3290, 0.5442]) tensor([0.2260, 0.1365, 0.1878, 0.4497]) -Greedy action tensor([-0.2170, 0.7555, 1.2894, 0.0191]) tensor([0.1061, 0.2807, 0.4788, 0.1344]) -Greedy action tensor([ 0.7952, 0.2547, -0.2998, -0.1384]) tensor([0.4329, 0.2521, 0.1448, 0.1702]) -Greedy action tensor([ 1.3112, -1.6492, 1.3276, 0.5883]) tensor([0.3916, 0.0203, 0.3981, 0.1901]) -Greedy action tensor([-0.2357, -1.4376, -1.1386, 0.2598]) tensor([0.2987, 0.0898, 0.1211, 0.4903]) -Greedy action tensor([ 0.6399, -1.1470, 0.8705, 0.4701]) tensor([0.3057, 0.0512, 0.3851, 0.2580]) -Greedy action tensor([-0.3810, -0.6197, -1.1788, 0.7427]) tensor([0.1882, 0.1482, 0.0847, 0.5789]) -Greedy action tensor([ 0.8785, -1.7766, 0.5167, 1.6487]) tensor([0.2547, 0.0179, 0.1774, 0.5501]) -Greedy action tensor([ 1.5715, -0.0399, 0.8672, 1.2265]) tensor([0.4163, 0.0831, 0.2058, 0.2948]) -Greedy action tensor([ 1.0502, -0.5512, 0.0444, 0.4980]) tensor([0.4666, 0.0941, 0.1707, 0.2686]) -Greedy action tensor([ 0.1867, -0.4108, 1.2195, 1.5669]) tensor([0.1200, 0.0660, 0.3370, 0.4770]) -Greedy action tensor([ 1.4744, -0.4643, 0.9807, 1.5236]) tensor([0.3565, 0.0513, 0.2176, 0.3745]) -Greedy action tensor([-0.7508, -0.8700, -0.8747, 0.8237]) tensor([0.1316, 0.1168, 0.1163, 0.6354]) -Greedy action tensor([ 1.9317, -1.3682, -0.3732, 0.7309]) tensor([0.6956, 0.0257, 0.0694, 0.2093]) -Greedy action tensor([ 0.3791, -2.1486, -0.1974, 0.7119]) tensor([0.3293, 0.0263, 0.1850, 0.4594]) -Greedy action tensor([ 0.9190, -0.7844, -0.9706, 0.2282]) tensor([0.5451, 0.0992, 0.0824, 0.2732]) -Greedy action tensor([-0.3205, -1.3719, 0.6434, 0.0272]) tensor([0.1856, 0.0649, 0.4867, 0.2628]) -Greedy action tensor([-0.6182, 0.3714, -0.5340, -0.7716]) tensor([0.1774, 0.4773, 0.1930, 0.1522]) -Greedy action tensor([-0.6321, -0.2493, 0.2881, 1.1654]) tensor([0.0908, 0.1332, 0.2279, 0.5481]) -Greedy action tensor([ 0.2212, 0.0058, 0.5617, -0.1013]) tensor([0.2540, 0.2048, 0.3571, 0.1840]) -Greedy action tensor([-0.1053, -0.1120, 0.7803, -0.1168]) tensor([0.1850, 0.1837, 0.4484, 0.1829]) -Greedy action tensor([0.0362, 0.3457, 0.6677, 0.2120]) tensor([0.1840, 0.2507, 0.3460, 0.2193]) -Greedy action tensor([ 0.0783, -0.2315, 0.3721, 0.0533]) tensor([0.2469, 0.1811, 0.3312, 0.2408]) -Greedy action tensor([-1.1891, -1.1900, -0.7540, -0.3983]) tensor([0.1739, 0.1738, 0.2687, 0.3835]) -Greedy action tensor([ 0.8147, -0.6584, 0.1131, 0.6373]) tensor([0.3903, 0.0894, 0.1935, 0.3268]) -Greedy action tensor([ 1.4350, -1.6729, 0.4438, 0.2326]) tensor([0.5827, 0.0260, 0.2162, 0.1751]) -Greedy action tensor([ 0.8970, -0.7306, 1.3900, 1.0099]) tensor([0.2530, 0.0497, 0.4142, 0.2832]) -Greedy action tensor([-1.2031, -1.0239, 2.5064, -0.3396]) tensor([0.0220, 0.0263, 0.8994, 0.0522]) -Greedy action tensor([1.2482, 0.6258, 1.5744, 0.2861]) tensor([0.3026, 0.1624, 0.4193, 0.1156]) -Greedy action tensor([ 1.3176, -0.9920, -0.7876, -0.2453]) tensor([0.6990, 0.0694, 0.0852, 0.1465]) -Greedy action tensor([-0.4978, 0.0305, 0.4163, -0.4762]) tensor([0.1610, 0.2730, 0.4015, 0.1645]) -Greedy action tensor([ 0.5106, -0.2203, 0.3184, -0.5163]) tensor([0.3753, 0.1807, 0.3097, 0.1344]) -Greedy action tensor([-0.0450, -0.4730, -0.4524, 0.5655]) tensor([0.2405, 0.1567, 0.1600, 0.4428]) -Greedy action tensor([ 1.0190, -0.7758, 0.4246, 0.1319]) tensor([0.4695, 0.0780, 0.2591, 0.1934]) -Greedy action tensor([-1.2203, -1.0378, -0.1735, -0.6103]) tensor([0.1452, 0.1742, 0.4135, 0.2671]) -Greedy action tensor([-0.0350, 0.4214, 0.6886, 0.6569]) tensor([0.1506, 0.2378, 0.3106, 0.3009]) -Greedy action tensor([ 1.2923, -0.3896, 0.9262, 0.5402]) tensor([0.4254, 0.0791, 0.2950, 0.2005]) -Greedy action tensor([ 0.0039, 0.3610, -0.3153, -0.4169]) tensor([0.2623, 0.3749, 0.1906, 0.1722]) -Greedy action tensor([-0.4833, 0.6505, -0.1690, 0.2819]) tensor([0.1311, 0.4075, 0.1796, 0.2818]) -Greedy action tensor([ 0.6852, -0.6572, -0.7532, 0.6730]) tensor([0.4022, 0.1051, 0.0954, 0.3973]) -Greedy action tensor([ 0.9167, -1.0971, 3.0484, -0.1856]) tensor([0.1011, 0.0135, 0.8519, 0.0336]) -Greedy action tensor([-0.3306, -1.8297, -0.6482, 0.4988]) tensor([0.2357, 0.0526, 0.1715, 0.5402]) -Greedy action tensor([ 0.0059, -1.0677, -0.1565, 0.8467]) tensor([0.2217, 0.0758, 0.1885, 0.5140]) -Greedy action tensor([-0.3773, -1.3999, 0.1449, -0.1999]) tensor([0.2359, 0.0848, 0.3976, 0.2817]) -Greedy action tensor([ 0.8631, -0.0993, 0.8121, -0.6680]) tensor([0.3924, 0.1499, 0.3729, 0.0849]) -Greedy action tensor([-0.0959, -1.0664, -0.1720, 0.8558]) tensor([0.2043, 0.0774, 0.1893, 0.5290]) -Greedy action tensor([ 0.1165, -1.5918, -0.0607, 0.7526]) tensor([0.2559, 0.0464, 0.2143, 0.4834]) -Greedy action tensor([-0.0443, -2.0220, 0.7565, 0.5015]) tensor([0.1964, 0.0272, 0.4375, 0.3390]) -Greedy action tensor([-1.2816, -2.2993, -0.2775, 0.3173]) tensor([0.1106, 0.0400, 0.3020, 0.5474]) -Greedy action tensor([-0.2701, -0.8108, -0.5032, -0.1171]) tensor([0.2825, 0.1645, 0.2238, 0.3292]) -Greedy action tensor([ 0.2212, -0.6203, -0.1317, 1.0602]) tensor([0.2248, 0.0969, 0.1580, 0.5203]) -Greedy action tensor([0.5789, 0.1416, 0.2557, 0.9444]) tensor([0.2624, 0.1695, 0.1899, 0.3782]) -Greedy action tensor([0.9087, 0.1175, 0.7602, 0.3097]) tensor([0.3491, 0.1582, 0.3009, 0.1918]) -Greedy action tensor([ 0.2148, 0.4277, -0.5917, -1.1841]) tensor([0.3412, 0.4222, 0.1523, 0.0842]) -Greedy action tensor([ 0.0586, -0.0258, -0.2451, 1.4605]) tensor([0.1488, 0.1368, 0.1098, 0.6046]) -Greedy action tensor([ 0.5250, 0.3775, 0.1191, -0.2630]) tensor([0.3351, 0.2892, 0.2233, 0.1524]) -Greedy action tensor([ 0.7620, -0.4825, -0.0485, 1.0359]) tensor([0.3281, 0.0945, 0.1459, 0.4315]) -Greedy action tensor([0.8032, 0.7724, 0.4534, 0.3512]) tensor([0.3021, 0.2929, 0.2129, 0.1922]) -Greedy action tensor([ 0.9368, -0.8016, 1.2288, 0.8202]) tensor([0.2937, 0.0516, 0.3933, 0.2614]) -Greedy action tensor([ 0.7524, -1.4599, -0.1373, -0.1212]) tensor([0.5161, 0.0565, 0.2120, 0.2154]) -Greedy action tensor([ 8.0250e-01, -1.7381e+00, 1.4879e+00, -7.9274e-04]) tensor([0.2848, 0.0224, 0.5652, 0.1275]) -Greedy action tensor([ 0.2729, -0.8896, 0.1190, -0.2376]) tensor([0.3610, 0.1129, 0.3095, 0.2167]) -Greedy action tensor([ 0.1505, 0.5518, -0.0099, 0.4794]) tensor([0.2112, 0.3155, 0.1799, 0.2934]) -Greedy action tensor([ 1.5762, -0.5889, 1.6623, 0.4224]) tensor([0.3968, 0.0455, 0.4325, 0.1252]) -Greedy action tensor([ 1.4805, 0.5609, -0.8615, 0.3102]) tensor([0.5540, 0.2209, 0.0533, 0.1719]) -Greedy action tensor([ 0.6122, -0.9403, 0.4716, 0.8158]) tensor([0.3024, 0.0640, 0.2628, 0.3707]) -Greedy action tensor([0.7166, 0.6084, 0.0522, 0.0581]) tensor([0.3413, 0.3063, 0.1757, 0.1767]) -Greedy action tensor([-0.7482, 0.6575, -0.3274, 0.8987]) tensor([0.0848, 0.3458, 0.1292, 0.4402]) -Greedy action tensor([ 1.0696, -0.0930, 1.1476, 0.2186]) tensor([0.3545, 0.1108, 0.3833, 0.1514]) -Greedy action tensor([-0.2194, -0.9033, 0.0389, 0.0049]) tensor([0.2469, 0.1246, 0.3196, 0.3089]) -Greedy action tensor([ 1.0482, 0.1812, 0.2886, -0.0560]) tensor([0.4506, 0.1893, 0.2108, 0.1493]) -Greedy action tensor([-0.0356, -1.1989, 0.8096, -0.5960]) tensor([0.2374, 0.0742, 0.5528, 0.1356]) -Greedy action tensor([ 1.2578, -1.2477, 0.6878, 0.8234]) tensor([0.4358, 0.0356, 0.2464, 0.2822]) -Greedy action tensor([-0.4152, 0.5273, -0.5634, 1.2948]) tensor([0.1004, 0.2577, 0.0866, 0.5552]) -Greedy action tensor([ 0.7189, -0.0502, -0.1083, -0.1291]) tensor([0.4294, 0.1990, 0.1877, 0.1839]) -Greedy action tensor([ 1.2174, -0.2056, -0.3132, -0.0538]) tensor([0.5754, 0.1387, 0.1245, 0.1614]) -Greedy action tensor([ 1.4888, -0.4849, -0.0648, 0.5969]) tensor([0.5681, 0.0789, 0.1201, 0.2328]) -Greedy action tensor([ 0.9304, -0.3953, -0.4246, 0.6687]) tensor([0.4361, 0.1158, 0.1125, 0.3356]) -Greedy action tensor([ 2.0730, -1.2778, -0.2594, 0.3279]) tensor([0.7653, 0.0268, 0.0743, 0.1336]) -Greedy action tensor([ 1.1879, -0.5710, -0.1164, 0.1112]) tensor([0.5604, 0.0965, 0.1521, 0.1910]) -Greedy action tensor([ 1.3037, -0.3816, -0.2895, 0.2934]) tensor([0.5705, 0.1058, 0.1160, 0.2077]) -Greedy action tensor([ 1.1655, 0.0218, -0.4859, 0.2546]) tensor([0.5229, 0.1666, 0.1003, 0.2103]) -Greedy action tensor([ 1.9491, -0.7275, -0.2503, 0.3845]) tensor([0.7200, 0.0495, 0.0798, 0.1506]) -Greedy action tensor([ 1.5269, -0.3777, -0.3588, 0.3684]) tensor([0.6194, 0.0922, 0.0940, 0.1944]) -Greedy action tensor([ 1.4533, -0.1624, -0.6220, 0.7287]) tensor([0.5529, 0.1099, 0.0694, 0.2679]) -Greedy action tensor([ 1.9096, -0.7653, -0.0677, 0.1869]) tensor([0.7215, 0.0497, 0.0999, 0.1289]) -Greedy action tensor([ 1.3791, -0.5980, -0.1866, 0.6443]) tensor([0.5473, 0.0758, 0.1144, 0.2625]) -Greedy action tensor([ 2.1209, -0.8227, -0.4020, 0.4046]) tensor([0.7618, 0.0401, 0.0611, 0.1369]) -Greedy action tensor([ 0.5793, -0.3879, -0.3818, 0.3407]) tensor([0.3921, 0.1491, 0.1500, 0.3089]) -Greedy action tensor([ 0.7065, -0.1874, 0.2498, 0.1128]) tensor([0.3854, 0.1576, 0.2441, 0.2128]) -Greedy action tensor([ 1.2994, -0.2419, -0.5200, 0.1546]) tensor([0.5901, 0.1263, 0.0957, 0.1878]) -Greedy action tensor([ 1.4724, -0.3978, -0.1781, 0.3982]) tensor([0.5925, 0.0913, 0.1137, 0.2024]) -Greedy action tensor([ 0.5664, -0.0399, -0.1341, -0.4281]) tensor([0.4147, 0.2261, 0.2058, 0.1534]) -Greedy action tensor([ 1.3423, -0.5501, -0.4079, 0.0326]) tensor([0.6272, 0.0945, 0.1090, 0.1693]) -Greedy action tensor([ 1.0488, -0.3822, -0.3321, 0.3105]) tensor([0.5080, 0.1215, 0.1277, 0.2428]) -Greedy action tensor([ 0.8261, -0.4082, -0.1141, 0.1918]) tensor([0.4521, 0.1316, 0.1766, 0.2398]) -Greedy action tensor([ 1.4398, -0.7849, -0.2509, 0.1877]) tensor([0.6335, 0.0685, 0.1168, 0.1811]) -Greedy action tensor([ 1.3138, 0.0626, -0.9580, 0.5206]) tensor([0.5430, 0.1554, 0.0560, 0.2456]) -Greedy action tensor([ 1.0520, -0.2685, -0.2322, 0.1057]) tensor([0.5176, 0.1382, 0.1433, 0.2009]) -Greedy action tensor([ 1.2888, -0.7380, -0.6610, 0.9258]) tensor([0.5077, 0.0669, 0.0722, 0.3532]) -Greedy action tensor([ 1.0536, -0.1406, -0.2483, 0.1371]) tensor([0.5064, 0.1534, 0.1377, 0.2025]) -Greedy action tensor([ 1.2186, -0.7449, -0.4294, 0.8129]) tensor([0.5002, 0.0702, 0.0963, 0.3334]) -Greedy action tensor([ 1.6228, -0.3630, -0.2968, 0.2876]) tensor([0.6464, 0.0887, 0.0948, 0.1701]) -Greedy action tensor([ 1.1150, -0.3184, -0.1489, 0.2020]) tensor([0.5202, 0.1241, 0.1470, 0.2088]) -Greedy action tensor([ 0.9538, -0.3052, -0.0907, 0.3246]) tensor([0.4611, 0.1309, 0.1622, 0.2458]) -Greedy action tensor([ 0.8967, -0.4225, -0.0227, -0.2570]) tensor([0.5047, 0.1349, 0.2012, 0.1592]) -Greedy action tensor([ 0.8123, -0.3113, -0.0818, 0.2126]) tensor([0.4380, 0.1424, 0.1791, 0.2404]) -Greedy action tensor([ 1.7550, -0.5413, -0.3362, -0.0306]) tensor([0.7185, 0.0723, 0.0888, 0.1205]) -Greedy action tensor([ 0.9437, -0.2181, -0.6011, 0.1011]) tensor([0.5110, 0.1599, 0.1090, 0.2200]) -Greedy action tensor([ 1.3268, -0.5202, -0.5008, 0.7531]) tensor([0.5314, 0.0838, 0.0854, 0.2994]) -Greedy action tensor([ 2.1295, -0.8832, -0.3401, 0.7559]) tensor([0.7210, 0.0354, 0.0610, 0.1826]) -Greedy action tensor([ 1.4480, -0.8057, -0.0840, 0.0785]) tensor([0.6348, 0.0667, 0.1372, 0.1614]) -Greedy action tensor([ 1.0385, 0.0845, -0.1088, -0.2313]) tensor([0.5041, 0.1942, 0.1601, 0.1416]) -Greedy action tensor([ 1.1001, -0.3760, -0.0461, 0.1669]) tensor([0.5155, 0.1178, 0.1639, 0.2028]) -Greedy action tensor([ 1.7196, -0.6946, -0.3965, 0.4133]) tensor([0.6753, 0.0604, 0.0814, 0.1829]) -Greedy action tensor([ 1.7485, -0.5701, -0.0507, 0.3220]) tensor([0.6649, 0.0654, 0.1100, 0.1597]) -Greedy action tensor([ 1.4824, -0.5825, -0.5889, 0.4372]) tensor([0.6233, 0.0790, 0.0785, 0.2192]) -Greedy action tensor([ 1.1529, -0.5322, -0.2996, -0.0639]) tensor([0.5829, 0.1081, 0.1364, 0.1726]) -Greedy action tensor([ 0.8605, -0.4595, -0.1793, 0.1996]) tensor([0.4679, 0.1250, 0.1654, 0.2416]) -Greedy action tensor([ 1.5861, -0.2132, -0.0672, 0.5211]) tensor([0.5877, 0.0972, 0.1125, 0.2026]) -Greedy action tensor([ 1.2401, -0.1408, -0.3027, 0.4158]) tensor([0.5253, 0.1320, 0.1123, 0.2304]) -Greedy action tensor([ 1.4537, -0.7215, 0.1871, 0.5511]) tensor([0.5553, 0.0631, 0.1565, 0.2252]) -Greedy action tensor([ 1.6380, -0.4536, -0.4940, 0.3928]) tensor([0.6536, 0.0807, 0.0775, 0.1882]) -Greedy action tensor([ 1.2348, -0.5174, -0.1485, 0.2038]) tensor([0.5615, 0.0974, 0.1408, 0.2003]) -Greedy action tensor([ 1.2841, -0.3363, -0.2450, 0.4715]) tensor([0.5381, 0.1065, 0.1166, 0.2388]) -Greedy action tensor([ 1.0530, -0.1303, -0.2676, 0.0403]) tensor([0.5164, 0.1582, 0.1379, 0.1876]) -Greedy action tensor([ 0.6732, -0.4241, -0.3245, 0.5508]) tensor([0.3865, 0.1290, 0.1425, 0.3420]) -Greedy action tensor([ 1.3640, -0.3639, -0.5323, -0.3007]) tensor([0.6592, 0.1171, 0.0990, 0.1247]) -Greedy action tensor([ 1.0187, -0.7986, -0.2924, 0.5225]) tensor([0.4900, 0.0796, 0.1321, 0.2983]) -Greedy action tensor([ 1.3533, -0.7676, -0.4821, 0.7833]) tensor([0.5420, 0.0650, 0.0865, 0.3065]) -Greedy action tensor([ 1.5581, -0.6382, -0.5639, 0.3428]) tensor([0.6546, 0.0728, 0.0784, 0.1942]) -Greedy action tensor([ 1.9703, -0.8217, -0.5693, 0.1717]) tensor([0.7658, 0.0469, 0.0604, 0.1268]) -Greedy action tensor([ 0.6643, -0.1300, 0.0940, -0.0432]) tensor([0.3984, 0.1800, 0.2252, 0.1964]) -Greedy action tensor([ 1.5551, -0.6490, -0.2751, 0.3225]) tensor([0.6401, 0.0706, 0.1027, 0.1866]) -Greedy action tensor([ 1.0703, -0.1557, -0.1557, 0.1421]) tensor([0.5045, 0.1481, 0.1481, 0.1994]) -Greedy action tensor([ 1.1735, -0.3302, -0.3860, 0.1635]) tensor([0.5566, 0.1237, 0.1170, 0.2027]) -Greedy action tensor([ 0.7993, -0.3312, -0.0991, 0.2085]) tensor([0.4378, 0.1414, 0.1783, 0.2425]) -Greedy action tensor([ 0.8856, -0.2329, -0.1598, 0.1342]) tensor([0.4651, 0.1520, 0.1635, 0.2194]) -Greedy action tensor([ 1.0792, -0.6381, -0.2674, 0.4002]) tensor([0.5136, 0.0922, 0.1336, 0.2605]) -Greedy action tensor([ 1.6969, -0.7493, -0.1112, 0.0393]) tensor([0.6939, 0.0601, 0.1138, 0.1322]) -Greedy action tensor([ 1.3223, -0.4069, -0.1525, 0.2748]) tensor([0.5691, 0.1010, 0.1302, 0.1997]) -Greedy action tensor([ 0.8388, -0.2924, 0.3765, -0.0366]) tensor([0.4221, 0.1362, 0.2658, 0.1759]) -Greedy action tensor([ 0.0349, -0.3370, -0.2476, 0.1833]) tensor([0.2775, 0.1913, 0.2092, 0.3219]) -Greedy action tensor([ 1.1339, -0.2493, -0.7059, 0.2901]) tensor([0.5436, 0.1363, 0.0863, 0.2338]) -Greedy action tensor([ 1.7263, -0.4485, -0.3566, 0.6928]) tensor([0.6274, 0.0713, 0.0782, 0.2232]) -Greedy action tensor([ 0.8648, -0.2498, -0.0295, -0.0170]) tensor([0.4649, 0.1525, 0.1901, 0.1925]) -Greedy action tensor([ 1.1382, -0.2373, -0.3253, 0.2535]) tensor([0.5272, 0.1332, 0.1220, 0.2176]) -Greedy action tensor([ 1.0359, -0.5625, -0.5293, 0.6170]) tensor([0.4833, 0.0977, 0.1010, 0.3179]) -Greedy action tensor([ 1.4808, -0.5362, -0.5451, 0.2507]) tensor([0.6422, 0.0854, 0.0847, 0.1877]) -Greedy action tensor([ 1.7418, -0.4118, -0.3629, 0.2184]) tensor([0.6869, 0.0797, 0.0837, 0.1497]) -Greedy action tensor([ 0.9151, -0.6116, -0.2030, 0.4058]) tensor([0.4662, 0.1013, 0.1524, 0.2801]) -Greedy action tensor([ 1.4746, -0.5857, -0.5077, 0.5059]) tensor([0.6080, 0.0775, 0.0838, 0.2308]) -Greedy action tensor([ 1.4549, -0.5063, -0.2944, 0.3170]) tensor([0.6116, 0.0860, 0.1064, 0.1960]) -Greedy action tensor([ 1.3633, 0.0521, -1.1657, 0.0579]) tensor([0.6172, 0.1663, 0.0492, 0.1673]) -Greedy action tensor([ 1.8652, -0.6686, -0.3887, 0.4100]) tensor([0.7054, 0.0560, 0.0741, 0.1646]) -Greedy action tensor([ 1.0819, -0.5651, -0.0339, -0.7438]) tensor([0.5948, 0.1146, 0.1949, 0.0958]) -Greedy action tensor([ 0.3900, -0.1290, -0.0496, -0.0304]) tensor([0.3453, 0.2055, 0.2225, 0.2268]) -Greedy action tensor([ 0.8418, -0.7486, -0.0177, -0.4457]) tensor([0.5254, 0.1071, 0.2225, 0.1450]) -Greedy action tensor([ 0.2831, 0.2078, 0.1005, -0.2212]) tensor([0.2972, 0.2757, 0.2476, 0.1795]) -Greedy action tensor([ 0.4804, 0.0653, -0.1345, -0.2349]) tensor([0.3717, 0.2455, 0.2010, 0.1818]) -Greedy action tensor([ 0.6317, 0.0975, -0.1316, -0.2815]) tensor([0.4076, 0.2389, 0.1900, 0.1635]) -Greedy action tensor([ 0.5219, 0.1315, 0.0960, -0.2419]) tensor([0.3577, 0.2421, 0.2336, 0.1666]) -Greedy action tensor([ 0.6671, -0.4830, -0.1314, -0.2457]) tensor([0.4613, 0.1460, 0.2076, 0.1851]) -Greedy action tensor([ 0.6732, -0.2906, 0.0580, -0.4101]) tensor([0.4424, 0.1688, 0.2391, 0.1497]) -Greedy action tensor([ 0.5647, -0.2086, -0.0724, -0.0986]) tensor([0.3991, 0.1842, 0.2111, 0.2056]) -Greedy action tensor([ 0.4938, -0.3016, -0.0680, -0.3561]) tensor([0.4083, 0.1843, 0.2328, 0.1745]) -Greedy action tensor([ 0.4289, -0.0026, -0.1529, -0.4401]) tensor([0.3805, 0.2472, 0.2127, 0.1596]) -Greedy action tensor([ 0.4915, 0.0410, 0.0591, -0.2105]) tensor([0.3595, 0.2291, 0.2333, 0.1781]) -Greedy action tensor([ 0.4788, 0.0011, -0.0781, -0.3250]) tensor([0.3787, 0.2349, 0.2170, 0.1695]) -Greedy action tensor([ 0.5932, -0.5858, -0.0383, -0.5282]) tensor([0.4618, 0.1421, 0.2456, 0.1505]) -Greedy action tensor([ 0.8522, -0.6598, -0.1660, -0.3383]) tensor([0.5303, 0.1169, 0.1916, 0.1612]) -Greedy action tensor([ 0.5462, -0.3941, 0.0072, -0.0991]) tensor([0.4003, 0.1563, 0.2335, 0.2099]) -Greedy action tensor([ 0.0585, 0.0379, 0.0692, -0.0850]) tensor([0.2593, 0.2540, 0.2621, 0.2246]) -Greedy action tensor([ 0.5504, -0.4240, -0.1645, -0.4302]) tensor([0.4461, 0.1684, 0.2182, 0.1673]) -Greedy action tensor([ 0.2752, -0.1501, -0.0390, -0.2026]) tensor([0.3329, 0.2176, 0.2431, 0.2064]) -Greedy action tensor([ 0.8485, -0.3621, -0.1561, -0.6189]) tensor([0.5278, 0.1573, 0.1933, 0.1217]) -Greedy action tensor([ 0.4270, -0.1919, -0.1202, -0.4993]) tensor([0.3979, 0.2143, 0.2302, 0.1576]) -Greedy action tensor([ 0.5045, -0.1545, -0.0520, -0.0942]) tensor([0.3788, 0.1960, 0.2171, 0.2081]) -Greedy action tensor([ 0.5192, -0.1608, 0.0054, -0.2374]) tensor([0.3885, 0.1968, 0.2324, 0.1823]) -Greedy action tensor([ 1.0072, -0.7456, 0.0629, -0.3741]) tensor([0.5514, 0.0956, 0.2145, 0.1386]) -Greedy action tensor([ 0.2280, -0.2049, -0.2348, -0.4424]) tensor([0.3585, 0.2325, 0.2257, 0.1834]) -Greedy action tensor([ 0.8102, -0.5729, -0.1082, -0.5638]) tensor([0.5255, 0.1318, 0.2098, 0.1330]) -Greedy action tensor([ 0.4982, 0.0087, 0.0193, -0.3612]) tensor([0.3765, 0.2308, 0.2333, 0.1594]) -Greedy action tensor([ 0.6779, -0.4322, 0.0755, -0.6043]) tensor([0.4641, 0.1530, 0.2541, 0.1288]) -Greedy action tensor([ 0.2595, -0.1823, -0.0385, -0.3823]) tensor([0.3435, 0.2208, 0.2549, 0.1808]) -Greedy action tensor([ 0.8193, -0.3705, 0.0121, -0.2244]) tensor([0.4756, 0.1447, 0.2122, 0.1675]) -Greedy action tensor([ 0.9037, -0.4502, 0.0716, -0.3989]) tensor([0.5089, 0.1314, 0.2214, 0.1383]) -Greedy action tensor([ 0.7729, -0.5710, -0.0716, -0.4799]) tensor([0.5060, 0.1320, 0.2175, 0.1446]) -Greedy action tensor([ 0.8064, -0.3352, -0.0391, -0.4904]) tensor([0.4945, 0.1579, 0.2123, 0.1352]) -Greedy action tensor([ 0.5178, -0.1122, 0.0264, -0.3294]) tensor([0.3886, 0.2070, 0.2378, 0.1666]) -Greedy action tensor([ 0.8086, -0.4496, -0.0394, -0.4031]) tensor([0.4975, 0.1414, 0.2131, 0.1481]) -Greedy action tensor([ 0.6862, -0.4879, -0.0664, -0.4200]) tensor([0.4737, 0.1464, 0.2232, 0.1567]) -Greedy action tensor([ 0.8105, -0.6754, 0.2182, -0.6909]) tensor([0.4995, 0.1130, 0.2762, 0.1113]) -Greedy action tensor([ 0.4557, -0.1922, -0.0767, -0.3338]) tensor([0.3900, 0.2040, 0.2290, 0.1771]) -Greedy action tensor([ 0.3812, -0.1912, -0.0189, -0.0771]) tensor([0.3488, 0.1968, 0.2338, 0.2206]) -Greedy action tensor([ 0.7098, -0.5366, 0.0153, -0.3280]) tensor([0.4670, 0.1343, 0.2332, 0.1655]) -Greedy action tensor([ 0.8585, -0.8715, -0.0372, -0.3701]) tensor([0.5324, 0.0944, 0.2174, 0.1558]) -Greedy action tensor([ 0.3101, 0.0804, 0.0093, -0.1834]) tensor([0.3179, 0.2527, 0.2353, 0.1941]) -Greedy action tensor([ 0.6280, -0.4142, 0.0343, -0.3474]) tensor([0.4382, 0.1545, 0.2420, 0.1652]) -Greedy action tensor([ 1.0952, -0.7942, 0.0778, -0.7181]) tensor([0.5967, 0.0902, 0.2157, 0.0973]) -Greedy action tensor([ 0.6155, -0.5276, 0.2416, -0.7760]) tensor([0.4434, 0.1413, 0.3050, 0.1103]) -Greedy action tensor([ 0.5938, -0.4419, -0.1065, -0.3493]) tensor([0.4462, 0.1584, 0.2216, 0.1738]) -Greedy action tensor([ 0.5853, -0.3277, -0.0117, -0.2196]) tensor([0.4169, 0.1673, 0.2295, 0.1864]) -Greedy action tensor([ 0.5791, -0.4875, -0.0278, -0.5893]) tensor([0.4545, 0.1564, 0.2477, 0.1413]) -Greedy action tensor([ 0.8258, -0.3530, -0.0596, -0.1167]) tensor([0.4740, 0.1458, 0.1955, 0.1847]) -Greedy action tensor([ 0.5165, -0.0526, 0.1492, -0.2598]) tensor([0.3678, 0.2082, 0.2547, 0.1692]) -Greedy action tensor([ 0.7443, -0.2833, -0.1478, -0.2935]) tensor([0.4713, 0.1686, 0.1931, 0.1669]) -Greedy action tensor([ 0.3529, 0.0126, 0.0464, -0.3510]) tensor([0.3399, 0.2419, 0.2502, 0.1681]) -Greedy action tensor([ 0.6425, -0.2901, -0.2096, -0.4160]) tensor([0.4615, 0.1816, 0.1968, 0.1601]) -Greedy action tensor([ 0.5053, -0.1075, 0.0650, -0.1786]) tensor([0.3717, 0.2014, 0.2393, 0.1876]) -Greedy action tensor([ 0.7980, -0.5950, -0.0366, -0.4663]) tensor([0.5090, 0.1264, 0.2209, 0.1438]) -Greedy action tensor([ 0.3335, -0.0398, -0.1804, -0.1022]) tensor([0.3409, 0.2347, 0.2039, 0.2205]) -Greedy action tensor([ 0.7665, -0.3187, -0.0580, -0.2384]) tensor([0.4668, 0.1577, 0.2047, 0.1709]) -Greedy action tensor([ 0.5306, -0.3526, 0.0049, -0.1773]) tensor([0.4004, 0.1656, 0.2367, 0.1973]) -Greedy action tensor([ 0.3575, -0.4332, -0.1407, -0.2716]) tensor([0.3855, 0.1748, 0.2342, 0.2055]) -Greedy action tensor([ 0.6316, -0.6080, 0.0213, -0.6775]) tensor([0.4756, 0.1377, 0.2583, 0.1284]) -Greedy action tensor([ 0.4469, -0.5418, -0.0393, -0.0900]) tensor([0.3889, 0.1447, 0.2391, 0.2273]) -Greedy action tensor([ 0.7807, -0.6087, -0.0570, -0.3690]) tensor([0.5003, 0.1247, 0.2165, 0.1585]) -Greedy action tensor([ 0.9392, -0.5910, -0.0888, -0.5934]) tensor([0.5586, 0.1209, 0.1998, 0.1206]) -Greedy action tensor([ 0.1752, 0.2273, -0.0479, -0.2386]) tensor([0.2845, 0.2997, 0.2276, 0.1881]) -Greedy action tensor([ 1.1666, -0.4446, 0.2753, -0.6504]) tensor([0.5642, 0.1126, 0.2314, 0.0917]) -Greedy action tensor([ 0.5032, -0.1617, 0.1472, -0.3576]) tensor([0.3791, 0.1950, 0.2656, 0.1603]) -Greedy action tensor([ 0.5489, -0.1399, -0.1070, -0.0963]) tensor([0.3928, 0.1973, 0.2039, 0.2061]) -Greedy action tensor([ 0.2936, -0.1368, -0.0932, -0.1427]) tensor([0.3360, 0.2185, 0.2282, 0.2172]) -Greedy action tensor([ 0.5468, 0.0762, -0.2157, 0.0147]) tensor([0.3733, 0.2332, 0.1742, 0.2193]) -Greedy action tensor([ 0.7883, -0.6452, -0.0136, -0.5077]) tensor([0.5101, 0.1216, 0.2287, 0.1396]) -Greedy action tensor([ 2.6171e-01, -1.0530e-01, -2.4818e-04, -1.1991e-01]) tensor([0.3180, 0.2203, 0.2447, 0.2171]) -Greedy action tensor([ 1.1230, -0.7704, -0.0205, -0.6743]) tensor([0.6116, 0.0921, 0.1949, 0.1014]) -Greedy action tensor([ 1.1224, -0.3161, -0.0251, -0.4887]) tensor([0.5700, 0.1353, 0.1809, 0.1138]) -Greedy action tensor([ 0.3977, -0.3534, -0.0189, -0.3362]) tensor([0.3830, 0.1807, 0.2525, 0.1838]) -Greedy action tensor([ 0.4107, 0.1347, 0.1401, -0.1308]) tensor([0.3222, 0.2445, 0.2458, 0.1875]) -Greedy action tensor([ 0.6755, -0.1525, -0.0854, 0.0334]) tensor([0.4115, 0.1798, 0.1923, 0.2165]) -Greedy action tensor([ 0.8231, -0.8727, -0.1379, -0.3719]) tensor([0.5351, 0.0982, 0.2047, 0.1620]) -Greedy action tensor([ 0.7091, -0.5601, -0.0991, -0.4005]) tensor([0.4863, 0.1367, 0.2167, 0.1603]) -Greedy action tensor([ 0.5730, -0.6237, -0.1639, -0.5133]) tensor([0.4721, 0.1427, 0.2259, 0.1593]) -Greedy action tensor([ 0.3789, 0.2740, -0.1427, 0.0518]) tensor([0.3110, 0.2801, 0.1846, 0.2243]) -Greedy action tensor([-1.5263, -0.5323, 0.4583, 0.2185]) tensor([0.0599, 0.1618, 0.4356, 0.3427]) -Greedy action tensor([-0.8165, -0.5628, 0.2511, 0.2728]) tensor([0.1224, 0.1578, 0.3560, 0.3638]) -Greedy action tensor([-1.8515, -0.4524, 0.6124, -0.1260]) tensor([0.0446, 0.1807, 0.5242, 0.2505]) -Greedy action tensor([-1.0987, -0.7950, 0.0829, -0.0209]) tensor([0.1169, 0.1584, 0.3811, 0.3436]) -Greedy action tensor([-0.5052, -0.1760, 0.6708, 1.3890]) tensor([0.0814, 0.1132, 0.2640, 0.5414]) -Greedy action tensor([-1.6455, 0.3496, 0.4308, 0.2137]) tensor([0.0440, 0.3232, 0.3506, 0.2822]) -Greedy action tensor([-1.8369, -0.2524, 0.5845, -0.1019]) tensor([0.0438, 0.2138, 0.4938, 0.2486]) -Greedy action tensor([-1.9221, -0.4609, 0.6568, -0.1643]) tensor([0.0412, 0.1775, 0.5426, 0.2387]) -Greedy action tensor([-1.6137, -0.6177, 1.5321, 1.0797]) tensor([0.0240, 0.0649, 0.5569, 0.3543]) -Greedy action tensor([-1.6478, -0.5034, 0.5344, 0.0592]) tensor([0.0540, 0.1696, 0.4787, 0.2977]) -Greedy action tensor([-1.7978, -0.3738, 0.5799, -0.1049]) tensor([0.0468, 0.1944, 0.5045, 0.2543]) -Greedy action tensor([-1.5866, -0.5249, 0.4633, 0.0545]) tensor([0.0595, 0.1719, 0.4618, 0.3068]) -Greedy action tensor([-1.9089, -0.3420, 0.6310, -0.1666]) tensor([0.0414, 0.1982, 0.5243, 0.2362]) -Greedy action tensor([-1.8626, -0.4647, 0.6269, -0.1381]) tensor([0.0440, 0.1782, 0.5308, 0.2470]) -Greedy action tensor([-1.9201, -0.4520, 0.6521, -0.1614]) tensor([0.0413, 0.1791, 0.5402, 0.2395]) -Greedy action tensor([-1.6588, -0.2981, 0.5640, 0.1853]) tensor([0.0489, 0.1906, 0.4514, 0.3091]) -Greedy action tensor([-1.0829, -0.6175, 0.7223, 1.1828]) tensor([0.0546, 0.0870, 0.3321, 0.5263]) -Greedy action tensor([-1.8493, -0.3880, 0.6104, -0.1243]) tensor([0.0442, 0.1906, 0.5172, 0.2481]) -Greedy action tensor([-1.8899, -0.3369, 0.6232, -0.1418]) tensor([0.0420, 0.1985, 0.5183, 0.2412]) -Greedy action tensor([-1.7338, -0.5265, 0.5636, -0.0586]) tensor([0.0509, 0.1703, 0.5067, 0.2720]) -Greedy action tensor([-0.9758, -0.4258, 0.2782, 0.4924]) tensor([0.0945, 0.1638, 0.3313, 0.4104]) -Greedy action tensor([-1.8273, -0.2782, 0.6215, -0.0529]) tensor([0.0431, 0.2031, 0.4994, 0.2544]) -Greedy action tensor([-1.6432, 0.3505, 0.4048, 0.0758]) tensor([0.0461, 0.3388, 0.3577, 0.2574]) -Greedy action tensor([-1.2045, -0.4979, 0.3840, 0.4533]) tensor([0.0759, 0.1539, 0.3718, 0.3984]) -Greedy action tensor([-0.9525, -0.5394, 0.2384, 0.6725]) tensor([0.0919, 0.1389, 0.3024, 0.4668]) -Greedy action tensor([-1.7928, -0.3920, 0.5870, -0.1256]) tensor([0.0473, 0.1918, 0.5106, 0.2504]) -Greedy action tensor([-1.7664, -0.4712, 1.2757, 0.9109]) tensor([0.0249, 0.0910, 0.5218, 0.3623]) -Greedy action tensor([-0.7459, -0.6932, 1.0331, 1.5575]) tensor([0.0556, 0.0586, 0.3294, 0.5564]) -Greedy action tensor([-1.2829, -0.5513, 0.4774, 0.6183]) tensor([0.0642, 0.1333, 0.3730, 0.4295]) -Greedy action tensor([-1.8506, -0.4777, 0.7939, 0.1100]) tensor([0.0383, 0.1511, 0.5388, 0.2719]) -Greedy action tensor([-1.1050, -0.5674, 0.2308, 0.3426]) tensor([0.0929, 0.1590, 0.3532, 0.3950]) -Greedy action tensor([-1.8560, -0.4074, 0.6165, -0.1261]) tensor([0.0440, 0.1871, 0.5210, 0.2479]) -Greedy action tensor([-1.5752, -0.5007, 0.5149, 0.1686]) tensor([0.0564, 0.1651, 0.4560, 0.3225]) -Greedy action tensor([-1.8433, -0.4531, 0.6383, -0.1297]) tensor([0.0444, 0.1783, 0.5310, 0.2464]) -Greedy action tensor([-1.8501, -0.4579, 0.6165, -0.1435]) tensor([0.0448, 0.1803, 0.5280, 0.2469]) -Greedy action tensor([-1.8550, -0.3955, 0.6291, -0.1016]) tensor([0.0433, 0.1866, 0.5198, 0.2503]) -Greedy action tensor([-1.7533, -0.4790, 0.8001, 0.2682]) tensor([0.0400, 0.1432, 0.5145, 0.3023]) -Greedy action tensor([-0.8198, -0.5827, 0.2774, 0.1918]) tensor([0.1248, 0.1582, 0.3738, 0.3432]) -Greedy action tensor([-0.7013, -0.5462, 0.2608, -0.1976]) tensor([0.1553, 0.1813, 0.4064, 0.2570]) -Greedy action tensor([-1.6400, -0.4840, 0.5210, 0.0615]) tensor([0.0545, 0.1732, 0.4733, 0.2989]) -Greedy action tensor([-1.8509, -0.5039, 0.6534, -0.1164]) tensor([0.0440, 0.1691, 0.5379, 0.2491]) -Greedy action tensor([-1.4035, -0.5277, 0.5679, 0.6022]) tensor([0.0555, 0.1333, 0.3986, 0.4126]) -Greedy action tensor([-1.6783, -0.5050, 0.5146, 0.0066]) tensor([0.0538, 0.1739, 0.4822, 0.2901]) -Greedy action tensor([-1.4732, -0.6144, 0.5601, 0.1274]) tensor([0.0627, 0.1479, 0.4788, 0.3106]) -Greedy action tensor([-1.6012, -0.6011, 0.5454, 0.0732]) tensor([0.0568, 0.1544, 0.4858, 0.3030]) -Greedy action tensor([-1.8300, -0.2754, 0.5779, -0.1057]) tensor([0.0445, 0.2108, 0.4949, 0.2498]) -Greedy action tensor([-1.7310, -0.3952, 0.5505, -0.0195]) tensor([0.0497, 0.1889, 0.4864, 0.2750]) -Greedy action tensor([-1.6319, -0.3600, 0.6693, -0.4126]) tensor([0.0557, 0.1989, 0.5567, 0.1887]) -Greedy action tensor([-0.8400, -0.5722, 0.1788, 0.2771]) tensor([0.1230, 0.1607, 0.3406, 0.3758]) -Greedy action tensor([-1.8503, -0.3921, 0.6142, -0.1253]) tensor([0.0441, 0.1896, 0.5187, 0.2476]) -Greedy action tensor([-0.3218, 0.1693, 0.8821, 1.5799]) tensor([0.0790, 0.1290, 0.2632, 0.5288]) -Greedy action tensor([-1.9506, -0.6385, 0.6000, -0.1677]) tensor([0.0426, 0.1582, 0.5459, 0.2533]) -Greedy action tensor([-0.7638, -0.5956, 0.1663, 0.4084]) tensor([0.1258, 0.1489, 0.3190, 0.4063]) -Greedy action tensor([-1.5787, -0.4804, 0.5578, 0.1779]) tensor([0.0548, 0.1642, 0.4638, 0.3172]) -Greedy action tensor([-1.8071, -0.4806, 0.6042, -0.0499]) tensor([0.0461, 0.1735, 0.5135, 0.2669]) -Greedy action tensor([-0.2399, -0.1908, 0.6957, 1.4985]) tensor([0.0972, 0.1021, 0.2478, 0.5529]) -Greedy action tensor([-1.2504, -0.7075, 1.2037, 1.3293]) tensor([0.0363, 0.0625, 0.4223, 0.4789]) -Greedy action tensor([-1.2812, -0.5840, 0.3108, 0.2783]) tensor([0.0789, 0.1584, 0.3876, 0.3752]) -Greedy action tensor([-0.8463, -0.5908, 0.3084, 0.2999]) tensor([0.1161, 0.1499, 0.3685, 0.3654]) -Greedy action tensor([-1.6659, -0.6749, 0.9589, 0.0588]) tensor([0.0433, 0.1166, 0.5973, 0.2428]) -Greedy action tensor([-0.7929, -0.5462, 0.2540, -0.0018]) tensor([0.1363, 0.1745, 0.3884, 0.3008]) -Greedy action tensor([-1.9842, -0.7577, 0.3884, -0.1711]) tensor([0.0470, 0.1603, 0.5044, 0.2883]) -Greedy action tensor([-0.9210, -0.2045, 0.3988, 0.8393]) tensor([0.0793, 0.1624, 0.2969, 0.4613]) -Greedy action tensor([-1.8063, -0.4287, 0.6219, -0.0567]) tensor([0.0453, 0.1798, 0.5141, 0.2608]) -Greedy action tensor([-0.6864, -0.2957, 0.3304, -0.3127]) tensor([0.1494, 0.2207, 0.4129, 0.2170]) -Greedy action tensor([-0.6452, -0.1232, 1.2042, 1.4289]) tensor([0.0588, 0.0992, 0.3739, 0.4681]) -Greedy action tensor([-0.8033, -0.3705, 0.1915, 0.1380]) tensor([0.1281, 0.1974, 0.3463, 0.3283]) -Greedy action tensor([-1.7002, -0.4154, 0.5448, -0.0584]) tensor([0.0520, 0.1880, 0.4912, 0.2687]) -Greedy action tensor([-1.9159, -0.4317, 0.6499, -0.1608]) tensor([0.0413, 0.1822, 0.5375, 0.2389]) -Greedy action tensor([-1.9165, -0.4311, 0.6503, -0.1621]) tensor([0.0413, 0.1824, 0.5377, 0.2386]) -Greedy action tensor([-1.9389, -0.4167, 0.6557, -0.1758]) tensor([0.0403, 0.1847, 0.5399, 0.2351]) -Greedy action tensor([-1.0964, -0.0839, 0.5245, 0.3192]) tensor([0.0773, 0.2129, 0.3912, 0.3186]) -Greedy action tensor([-1.8026, -0.4745, 0.5843, -0.0847]) tensor([0.0471, 0.1778, 0.5126, 0.2625]) -Greedy action tensor([-0.6419, -0.4166, 0.5559, 1.0334]) tensor([0.0917, 0.1149, 0.3038, 0.4897]) -Greedy action tensor([-1.8729, -0.4053, 0.6276, -0.1228]) tensor([0.0430, 0.1864, 0.5235, 0.2472]) -Greedy action tensor([-1.2335, -0.5962, 0.3797, 0.2655]) tensor([0.0807, 0.1527, 0.4051, 0.3614]) -Greedy action tensor([-1.9147, -0.4640, 0.6503, -0.1660]) tensor([0.0416, 0.1777, 0.5414, 0.2393]) -Greedy action tensor([-0.7434, -0.5713, 0.1736, 0.3389]) tensor([0.1309, 0.1555, 0.3274, 0.3863]) -Greedy action tensor([-1.6809, -0.1623, 0.1229, -0.6057]) tensor([0.0686, 0.3134, 0.4168, 0.2011]) -Greedy action tensor([-1.0776, -0.5946, 0.2265, 0.2962]) tensor([0.0975, 0.1581, 0.3592, 0.3852]) -Greedy action tensor([-1.6003, 0.3871, 0.3619, 0.0879]) tensor([0.0480, 0.3504, 0.3417, 0.2598]) -Greedy action tensor([ 1.2371, -0.6170, 0.8209, 0.2706]) tensor([0.4553, 0.0713, 0.3003, 0.1732]) -Greedy action tensor([ 0.1912, -0.6182, -1.0591, 0.6351]) tensor([0.3039, 0.1353, 0.0870, 0.4738]) -Greedy action tensor([ 0.5881, -1.0858, 1.6937, -0.5797]) tensor([0.2213, 0.0415, 0.6684, 0.0688]) -Greedy action tensor([-0.0330, -2.2745, 0.1203, 0.0780]) tensor([0.2950, 0.0314, 0.3439, 0.3297]) -Greedy action tensor([ 1.1443, -0.0352, 1.4249, 0.5560]) tensor([0.3138, 0.0965, 0.4155, 0.1743]) -Greedy action tensor([-0.5184, -0.0915, -0.7202, -0.0990]) tensor([0.2053, 0.3146, 0.1678, 0.3123]) -Greedy action tensor([ 1.0957, -0.0021, 0.0485, 1.1705]) tensor([0.3620, 0.1208, 0.1270, 0.3901]) -Greedy action tensor([-0.6953, -0.8342, -0.7608, 0.8699]) tensor([0.1317, 0.1147, 0.1234, 0.6302]) -Greedy action tensor([ 0.4161, -1.0689, -0.2388, 0.4854]) tensor([0.3549, 0.0804, 0.1844, 0.3803]) -Greedy action tensor([ 1.0156, -0.8539, 0.4460, 0.7603]) tensor([0.4009, 0.0618, 0.2268, 0.3105]) -Greedy action tensor([0.7142, 0.5072, 0.1142, 0.3781]) tensor([0.3251, 0.2643, 0.1784, 0.2323]) -Greedy action tensor([ 0.8710, -0.7460, 0.9406, 1.2442]) tensor([0.2686, 0.0533, 0.2880, 0.3901]) -Greedy action tensor([ 0.2239, 0.2026, 1.9737, -0.1306]) tensor([0.1186, 0.1161, 0.6822, 0.0832]) -Greedy action tensor([ 0.5518, -1.8613, 0.5824, 0.0019]) tensor([0.3707, 0.0332, 0.3822, 0.2139]) -Greedy action tensor([ 0.4064, 0.1912, -0.8312, 1.0443]) tensor([0.2507, 0.2022, 0.0727, 0.4744]) -Greedy action tensor([ 0.3143, -1.0651, 0.0074, 1.3562]) tensor([0.2074, 0.0522, 0.1526, 0.5879]) -Greedy action tensor([-0.9795, 0.3386, 0.0016, -0.3347]) tensor([0.1074, 0.4014, 0.2865, 0.2047]) -Greedy action tensor([ 0.2700, -1.4149, 0.4225, 0.4091]) tensor([0.2858, 0.0530, 0.3328, 0.3284]) -Greedy action tensor([-0.1878, -0.8624, 1.6008, -0.5786]) tensor([0.1224, 0.0624, 0.7324, 0.0828]) -Greedy action tensor([-0.4903, -0.8934, -0.2989, 0.5372]) tensor([0.1763, 0.1178, 0.2135, 0.4925]) -Greedy action tensor([ 0.4009, -0.8173, 0.3602, -0.3941]) tensor([0.3693, 0.1092, 0.3546, 0.1668]) -Greedy action tensor([0.4520, 0.4269, 0.4214, 0.8201]) tensor([0.2278, 0.2221, 0.2209, 0.3291]) -Greedy action tensor([ 0.3875, -1.2183, 0.6619, -0.2373]) tensor([0.3277, 0.0658, 0.4311, 0.1754]) -Greedy action tensor([ 0.3344, 1.2964, 0.9186, -0.0428]) tensor([0.1640, 0.4293, 0.2942, 0.1125]) -Greedy action tensor([-0.6711, -1.6522, 0.0997, 0.5910]) tensor([0.1415, 0.0530, 0.3058, 0.4997]) -Greedy action tensor([ 0.7475, -0.2406, 1.8268, 0.1193]) tensor([0.2063, 0.0768, 0.6069, 0.1100]) -Greedy action tensor([-0.3451, -0.7103, -0.6569, 0.4316]) tensor([0.2174, 0.1509, 0.1591, 0.4726]) -Greedy action tensor([ 1.1010, -0.5109, 0.1239, 0.4318]) tensor([0.4789, 0.0956, 0.1803, 0.2453]) -Greedy action tensor([-1.0435, -0.3929, -1.4769, 0.2985]) tensor([0.1353, 0.2593, 0.0877, 0.5177]) -Greedy action tensor([ 0.5314, 0.0743, 0.3747, -0.2580]) tensor([0.3399, 0.2152, 0.2906, 0.1543]) -Greedy action tensor([ 1.5222, -0.2997, 0.2016, 0.8616]) tensor([0.5141, 0.0831, 0.1372, 0.2655]) -Greedy action tensor([ 0.8341, 0.3701, 1.1701, -0.1107]) tensor([0.2927, 0.1840, 0.4095, 0.1138]) -Greedy action tensor([ 0.5971, -1.3073, 0.5566, -0.3477]) tensor([0.4003, 0.0596, 0.3844, 0.1556]) -Greedy action tensor([0.1502, 0.4458, 0.3697, 0.4157]) tensor([0.2043, 0.2746, 0.2545, 0.2665]) -Greedy action tensor([-0.1573, -0.5810, -0.8295, 0.0580]) tensor([0.2936, 0.1922, 0.1499, 0.3642]) -Greedy action tensor([ 0.3824, -0.9216, 0.7561, 1.0812]) tensor([0.2111, 0.0573, 0.3068, 0.4247]) -Greedy action tensor([ 1.5515, -0.6963, 0.8093, 1.6157]) tensor([0.3777, 0.0399, 0.1798, 0.4027]) -Greedy action tensor([ 0.1513, -0.8428, -0.4548, 0.1729]) tensor([0.3404, 0.1260, 0.1857, 0.3479]) -Greedy action tensor([ 0.6129, -1.3893, 1.5220, -0.2715]) tensor([0.2481, 0.0335, 0.6159, 0.1025]) -Greedy action tensor([ 1.6249, -0.5035, 0.2382, 0.2890]) tensor([0.6128, 0.0729, 0.1531, 0.1611]) -Greedy action tensor([-0.1908, -0.8286, 1.3049, 0.4953]) tensor([0.1254, 0.0663, 0.5594, 0.2490]) -Greedy action tensor([ 0.1753, -0.1773, -0.3828, 1.0712]) tensor([0.2116, 0.1488, 0.1211, 0.5185]) -Greedy action tensor([ 1.1956, -1.5193, 0.9403, 1.7630]) tensor([0.2774, 0.0184, 0.2149, 0.4893]) -Greedy action tensor([-1.9936, -0.4566, 0.6001, 0.3467]) tensor([0.0340, 0.1581, 0.4548, 0.3530]) -Greedy action tensor([ 1.4676, -0.5962, 1.5207, -0.2233]) tensor([0.4227, 0.0537, 0.4457, 0.0779]) -Greedy action tensor([ 0.3530, 0.0960, 0.2090, -0.3057]) tensor([0.3168, 0.2450, 0.2743, 0.1639]) -Greedy action tensor([ 0.9427, -0.7814, 0.3365, 0.6045]) tensor([0.4104, 0.0732, 0.2238, 0.2926]) -Greedy action tensor([1.0183, 0.3869, 0.6228, 0.7049]) tensor([0.3406, 0.1811, 0.2293, 0.2490]) -Greedy action tensor([-1.0896, 0.6482, -0.3228, -0.3826]) tensor([0.0920, 0.5232, 0.1981, 0.1866]) -Greedy action tensor([0.9009, 0.3177, 0.2002, 0.2270]) tensor([0.3900, 0.2177, 0.1935, 0.1988]) -Greedy action tensor([ 0.1485, -1.9014, 1.1008, 0.4365]) tensor([0.1978, 0.0255, 0.5128, 0.2639]) -Greedy action tensor([-0.3070, 0.0249, 0.4277, 0.9691]) tensor([0.1241, 0.1729, 0.2586, 0.4444]) -Greedy action tensor([-0.5779, 0.0754, 0.5778, -0.0852]) tensor([0.1293, 0.2485, 0.4107, 0.2116]) -Greedy action tensor([0.5227, 0.1673, 0.1001, 0.6696]) tensor([0.2845, 0.1994, 0.1865, 0.3296]) -Greedy action tensor([-0.9049, -1.3721, 0.4670, -0.6378]) tensor([0.1454, 0.0912, 0.5734, 0.1900]) -Greedy action tensor([ 0.1676, -0.4498, 0.4344, 0.1518]) tensor([0.2611, 0.1408, 0.3410, 0.2570]) -Greedy action tensor([ 0.0148, -0.1166, -0.6583, 1.5439]) tensor([0.1428, 0.1253, 0.0729, 0.6591]) -Greedy action tensor([ 0.5952, -1.1644, 0.5264, 0.8980]) tensor([0.2891, 0.0498, 0.2699, 0.3913]) -Greedy action tensor([ 0.5882, -1.2568, 0.8694, -0.6105]) tensor([0.3591, 0.0568, 0.4758, 0.1083]) -Greedy action tensor([ 0.3671, -0.4924, 1.7016, -0.6151]) tensor([0.1787, 0.0757, 0.6787, 0.0669]) -Greedy action tensor([ 1.4083, -0.2487, 0.8822, 1.2810]) tensor([0.3756, 0.0716, 0.2220, 0.3307]) -Greedy action tensor([-0.9100, 1.1472, -0.0226, -0.4124]) tensor([0.0775, 0.6066, 0.1883, 0.1275]) -Greedy action tensor([ 1.2488, -1.0075, 0.8762, 1.7084]) tensor([0.2961, 0.0310, 0.2040, 0.4689]) -Greedy action tensor([ 0.2021, -2.7324, 0.0347, -0.1756]) tensor([0.3869, 0.0206, 0.3273, 0.2652]) -Greedy action tensor([ 0.1473, -1.8643, 0.3243, 0.7537]) tensor([0.2403, 0.0321, 0.2868, 0.4407]) -Greedy action tensor([ 0.8935, -1.5862, 0.4008, -0.1791]) tensor([0.4910, 0.0411, 0.2999, 0.1680]) -Greedy action tensor([-0.7002, -1.2521, 0.8965, 0.1732]) tensor([0.1123, 0.0646, 0.5542, 0.2689]) -Greedy action tensor([-0.1082, 0.4217, -0.1599, -0.4546]) tensor([0.2296, 0.3900, 0.2180, 0.1624]) -Greedy action tensor([ 0.3306, -0.8836, 1.4059, -0.1154]) tensor([0.2054, 0.0610, 0.6021, 0.1315]) -Greedy action tensor([-0.0407, -1.1404, -0.8881, -0.8547]) tensor([0.4536, 0.1510, 0.1944, 0.2010]) -Greedy action tensor([ 0.8300, 1.0422, 1.0721, -0.1371]) tensor([0.2570, 0.3178, 0.3274, 0.0977]) -Greedy action tensor([ 0.6625, -1.7079, -0.1192, 0.8676]) tensor([0.3599, 0.0336, 0.1647, 0.4418]) -Greedy action tensor([ 0.9540, -0.0500, 0.6429, 0.0893]) tensor([0.3968, 0.1454, 0.2907, 0.1671]) -Greedy action tensor([ 0.0462, -1.3002, -0.6379, 1.2369]) tensor([0.1979, 0.0515, 0.0998, 0.6508]) -Greedy action tensor([ 1.3315, -1.3182, 0.0467, 0.8656]) tensor([0.5063, 0.0358, 0.1401, 0.3178]) -Greedy action tensor([ 1.4383, -0.0433, 0.8748, 0.3501]) tensor([0.4687, 0.1065, 0.2668, 0.1579]) -Greedy action tensor([ 0.5728, -0.9720, 2.2257, 0.1096]) tensor([0.1415, 0.0302, 0.7392, 0.0891]) -Greedy action tensor([ 0.0570, -0.4308, 0.4853, 0.7865]) tensor([0.1915, 0.1176, 0.2938, 0.3971]) -Greedy action tensor([ 1.2464, -0.2658, -0.1545, 0.3710]) tensor([0.5309, 0.1170, 0.1308, 0.2212]) -Greedy action tensor([-0.4888, -0.5550, -0.3205, 0.6732]) tensor([0.1583, 0.1482, 0.1874, 0.5061]) -Greedy action tensor([1.3331, 1.3567, 0.2950, 0.5262]) tensor([0.3541, 0.3625, 0.1254, 0.1580]) -Greedy action tensor([ 0.7333, -0.0569, 1.3871, 0.5976]) tensor([0.2353, 0.1068, 0.4525, 0.2055]) -Greedy action tensor([ 1.2167, -0.1545, -0.1693, 0.1518]) tensor([0.5409, 0.1373, 0.1353, 0.1865]) -Greedy action tensor([ 0.7986, 0.1930, 0.0223, -0.1791]) tensor([0.4198, 0.2291, 0.1932, 0.1579]) -Greedy action tensor([ 0.9681, -0.1403, -0.1409, -0.1357]) tensor([0.5021, 0.1657, 0.1656, 0.1665]) -Greedy action tensor([ 0.9019, -0.7643, -0.0263, 0.0703]) tensor([0.4951, 0.0936, 0.1957, 0.2156]) -Greedy action tensor([ 1.3766, -0.4296, -0.5860, 0.4356]) tensor([0.5900, 0.0969, 0.0829, 0.2302]) -Greedy action tensor([ 1.3591, -0.6791, -0.3096, 0.5286]) tensor([0.5699, 0.0742, 0.1074, 0.2484]) -Greedy action tensor([ 1.5908, -0.5979, -0.5530, 0.5828]) tensor([0.6273, 0.0703, 0.0735, 0.2289]) -Greedy action tensor([ 0.7293, -0.5580, -0.3907, 0.5830]) tensor([0.4055, 0.1119, 0.1323, 0.3503]) -Greedy action tensor([ 0.9609, -0.4697, -0.4991, 0.8648]) tensor([0.4202, 0.1005, 0.0976, 0.3817]) -Greedy action tensor([ 1.3131, -0.5782, -0.3082, 0.1881]) tensor([0.5977, 0.0902, 0.1181, 0.1940]) -Greedy action tensor([ 1.4568, -0.1354, -0.3203, 0.0826]) tensor([0.6151, 0.1252, 0.1040, 0.1557]) -Greedy action tensor([ 0.7983, -0.2748, -0.0877, 0.1543]) tensor([0.4387, 0.1500, 0.1809, 0.2304]) -Greedy action tensor([ 1.0159, -0.4418, -0.0369, 0.1980]) tensor([0.4943, 0.1151, 0.1725, 0.2181]) -Greedy action tensor([ 0.9839, -0.4742, 0.0208, 0.1387]) tensor([0.4893, 0.1138, 0.1867, 0.2101]) -Greedy action tensor([ 1.2344, -0.2706, -0.4229, 0.3073]) tensor([0.5530, 0.1228, 0.1054, 0.2188]) -Greedy action tensor([ 1.4610, -0.8850, -0.2411, 0.0121]) tensor([0.6610, 0.0633, 0.1205, 0.1552]) -Greedy action tensor([ 1.6358, -0.7158, -0.1618, 0.2699]) tensor([0.6596, 0.0628, 0.1093, 0.1683]) -Greedy action tensor([ 1.1986, -0.1798, -0.1135, -0.1856]) tensor([0.5644, 0.1422, 0.1520, 0.1414]) -Greedy action tensor([ 1.3316, -0.4663, -0.7014, 0.5348]) tensor([0.5723, 0.0948, 0.0749, 0.2580]) -Greedy action tensor([ 1.3977, -0.3380, -0.3889, 0.2747]) tensor([0.5991, 0.1056, 0.1004, 0.1949]) -Greedy action tensor([ 1.3657, -0.3376, -0.4465, 0.3101]) tensor([0.5905, 0.1075, 0.0964, 0.2055]) -Greedy action tensor([ 1.5255, -0.4461, -0.4312, 0.1595]) tensor([0.6512, 0.0907, 0.0920, 0.1661]) -Greedy action tensor([ 1.1152, -0.4194, 0.0979, -0.0449]) tensor([0.5290, 0.1140, 0.1912, 0.1658]) -Greedy action tensor([ 1.7916, -0.5951, -0.6619, 0.5265]) tensor([0.6849, 0.0630, 0.0589, 0.1933]) -Greedy action tensor([ 0.8704, -0.1590, -0.1722, 0.0182]) tensor([0.4681, 0.1672, 0.1650, 0.1996]) -Greedy action tensor([ 0.6453, 0.1272, -0.2467, -0.2605]) tensor([0.4150, 0.2472, 0.1701, 0.1678]) -Greedy action tensor([ 2.1725, -1.1279, -0.2081, 0.6759]) tensor([0.7390, 0.0272, 0.0684, 0.1654]) -Greedy action tensor([ 1.5722, -0.6262, -0.3732, 0.5593]) tensor([0.6184, 0.0686, 0.0884, 0.2246]) -Greedy action tensor([ 0.8554, -0.1530, 0.3856, -0.0148]) tensor([0.4152, 0.1514, 0.2595, 0.1739]) -Greedy action tensor([ 1.1018, -0.3534, -0.1751, 0.0462]) tensor([0.5376, 0.1254, 0.1499, 0.1871]) -Greedy action tensor([ 1.8983, -0.7764, -0.0310, 0.3407]) tensor([0.7018, 0.0484, 0.1019, 0.1478]) -Greedy action tensor([ 1.4175, -0.1679, -0.2922, 0.3457]) tensor([0.5787, 0.1185, 0.1047, 0.1981]) -Greedy action tensor([ 2.2499, -0.9461, -0.1779, 0.3593]) tensor([0.7812, 0.0320, 0.0689, 0.1179]) -Greedy action tensor([ 1.2106, -0.4990, -0.2842, 0.2868]) tensor([0.5549, 0.1004, 0.1244, 0.2203]) -Greedy action tensor([ 1.7750, -0.5671, -0.0744, 0.4713]) tensor([0.6557, 0.0630, 0.1032, 0.1781]) -Greedy action tensor([ 1.5160, -0.1609, -0.0682, 0.0027]) tensor([0.6202, 0.1160, 0.1272, 0.1366]) -Greedy action tensor([ 1.6451, -0.7318, -0.5535, 0.9606]) tensor([0.5854, 0.0544, 0.0650, 0.2953]) -Greedy action tensor([ 0.8950, -0.2935, 0.1226, 0.5056]) tensor([0.4092, 0.1247, 0.1890, 0.2772]) -Greedy action tensor([ 1.5289, -0.4200, -0.5247, 0.4211]) tensor([0.6246, 0.0890, 0.0801, 0.2063]) -Greedy action tensor([ 0.7638, -0.5698, 0.1300, -0.0773]) tensor([0.4494, 0.1184, 0.2384, 0.1938]) -Greedy action tensor([ 1.4155, -0.6790, -0.0855, 0.1890]) tensor([0.6100, 0.0751, 0.1360, 0.1789]) -Greedy action tensor([ 0.6060, -0.2043, 0.0351, -0.0304]) tensor([0.3939, 0.1752, 0.2225, 0.2084]) -Greedy action tensor([ 1.2656, -0.2671, -0.2478, 0.0768]) tensor([0.5745, 0.1241, 0.1265, 0.1750]) -Greedy action tensor([ 1.7049, -0.3490, -0.3674, 0.2098]) tensor([0.6764, 0.0867, 0.0852, 0.1517]) -Greedy action tensor([ 1.4312, -0.0284, -0.2872, 0.2569]) tensor([0.5811, 0.1350, 0.1042, 0.1796]) -Greedy action tensor([ 1.2347, -0.3894, -0.0343, 0.0848]) tensor([0.5572, 0.1098, 0.1566, 0.1764]) -Greedy action tensor([ 0.8938, -0.1304, -0.5117, 0.2837]) tensor([0.4656, 0.1672, 0.1142, 0.2530]) -Greedy action tensor([ 1.3645, -0.0745, -0.1439, -0.2837]) tensor([0.6058, 0.1437, 0.1340, 0.1165]) -Greedy action tensor([ 1.4135, -0.5219, -0.3866, 0.0965]) tensor([0.6339, 0.0915, 0.1048, 0.1698]) -Greedy action tensor([ 1.7236, -0.3776, -0.5032, 0.1515]) tensor([0.6955, 0.0851, 0.0750, 0.1444]) -Greedy action tensor([ 1.1515, -0.1918, -0.1322, 0.2832]) tensor([0.5108, 0.1333, 0.1415, 0.2144]) -Greedy action tensor([ 0.9885, -0.2935, -0.1431, 0.1190]) tensor([0.4953, 0.1374, 0.1597, 0.2076]) -Greedy action tensor([ 1.0906, -0.4203, 0.0611, -0.0811]) tensor([0.5297, 0.1169, 0.1892, 0.1641]) -Greedy action tensor([ 1.4949, -0.1686, -0.2816, 0.1738]) tensor([0.6152, 0.1166, 0.1041, 0.1641]) -Greedy action tensor([ 0.5879, -0.2393, -0.2530, -0.0199]) tensor([0.4144, 0.1812, 0.1787, 0.2257]) -Greedy action tensor([ 2.0784, -0.1321, -1.4836, 0.4910]) tensor([0.7449, 0.0817, 0.0211, 0.1523]) -Greedy action tensor([ 0.6699, -0.1932, 0.0275, -0.1086]) tensor([0.4155, 0.1753, 0.2185, 0.1907]) -Greedy action tensor([ 1.4829, -0.3674, -0.3837, 0.6216]) tensor([0.5766, 0.0906, 0.0892, 0.2436]) -Greedy action tensor([ 1.3619, -0.5839, -0.1914, 0.4063]) tensor([0.5750, 0.0822, 0.1217, 0.2211]) -Greedy action tensor([ 1.1464, -0.0386, -0.3081, 0.2991]) tensor([0.5082, 0.1554, 0.1187, 0.2178]) -Greedy action tensor([ 0.8490, 0.1857, -0.2316, 0.1076]) tensor([0.4290, 0.2210, 0.1456, 0.2044]) -Greedy action tensor([ 0.6179, -0.2648, -0.2647, 0.1972]) tensor([0.4026, 0.1665, 0.1665, 0.2643]) -Greedy action tensor([ 1.2229, -0.5983, -0.0228, 0.0502]) tensor([0.5685, 0.0920, 0.1636, 0.1760]) -Greedy action tensor([ 1.3375, -0.6236, -0.4678, 0.0997]) tensor([0.6269, 0.0882, 0.1031, 0.1818]) -Greedy action tensor([ 1.3100, -0.5850, 0.0491, 0.1328]) tensor([0.5741, 0.0863, 0.1627, 0.1769]) -Greedy action tensor([ 0.7889, -0.2881, -0.0982, 0.3618]) tensor([0.4158, 0.1416, 0.1713, 0.2713]) -Greedy action tensor([ 0.6579, -0.6771, -0.0634, 0.0971]) tensor([0.4310, 0.1134, 0.2095, 0.2460]) -Greedy action tensor([ 1.7625, -0.4695, -0.4767, 0.3098]) tensor([0.6907, 0.0741, 0.0736, 0.1616]) -Greedy action tensor([ 1.2528, -0.4310, -0.5043, 0.5050]) tensor([0.5460, 0.1014, 0.0942, 0.2585]) -Greedy action tensor([ 2.5503, -0.8838, -0.1938, 0.6500]) tensor([0.8025, 0.0259, 0.0516, 0.1200]) -Greedy action tensor([ 0.9459, -0.1280, -0.2382, -0.4340]) tensor([0.5265, 0.1799, 0.1611, 0.1325]) -Greedy action tensor([ 0.8629, -0.4820, -0.4729, 0.9679]) tensor([0.3796, 0.0989, 0.0998, 0.4216]) -Greedy action tensor([ 1.5463, -0.4712, -0.3173, 0.5004]) tensor([0.6099, 0.0811, 0.0946, 0.2143]) -Greedy action tensor([ 1.0206, 0.0922, -0.3185, 0.2507]) tensor([0.4716, 0.1864, 0.1236, 0.2184]) -Greedy action tensor([ 1.1950, -0.7405, -0.1636, 0.3444]) tensor([0.5469, 0.0789, 0.1406, 0.2336]) -Greedy action tensor([ 1.5466, -0.4569, -0.5806, 0.4201]) tensor([0.6336, 0.0855, 0.0755, 0.2054]) -Greedy action tensor([ 0.5427, -0.2362, -0.2495, 0.0028]) tensor([0.4009, 0.1840, 0.1815, 0.2336]) -Greedy action tensor([ 1.3224, -0.3720, -0.3110, 0.6529]) tensor([0.5288, 0.0972, 0.1033, 0.2708]) -Greedy action tensor([ 0.8866, -0.4568, -0.4409, 0.3705]) tensor([0.4710, 0.1229, 0.1249, 0.2811]) -Greedy action tensor([ 1.1952, -0.3295, -0.1683, -0.0266]) tensor([0.5656, 0.1231, 0.1447, 0.1667]) -Greedy action tensor([ 1.3422, -0.2499, -0.1311, 0.1704]) tensor([0.5739, 0.1168, 0.1315, 0.1778]) -Greedy action tensor([ 0.4367, -0.4664, -0.0685, -0.5650]) tensor([0.4209, 0.1706, 0.2539, 0.1546]) -Greedy action tensor([ 0.6892, -0.6227, -0.1280, -0.2424]) tensor([0.4751, 0.1279, 0.2098, 0.1871]) -Greedy action tensor([ 0.7495, -0.4420, -0.0289, -0.5023]) tensor([0.4881, 0.1483, 0.2241, 0.1396]) -Greedy action tensor([ 0.7808, -0.9798, -0.0947, -0.3739]) tensor([0.5253, 0.0903, 0.2189, 0.1655]) -Greedy action tensor([ 0.7725, -0.2093, 0.1193, -0.5034]) tensor([0.4599, 0.1723, 0.2393, 0.1284]) -Greedy action tensor([ 0.3018, -0.0532, 0.0539, -0.0691]) tensor([0.3153, 0.2211, 0.2461, 0.2176]) -Greedy action tensor([ 0.5734, 0.1450, -0.2842, 0.0616]) tensor([0.3738, 0.2436, 0.1586, 0.2241]) -Greedy action tensor([ 0.3568, -0.2686, -0.0874, -0.7531]) tensor([0.3990, 0.2135, 0.2559, 0.1315]) -Greedy action tensor([ 0.5157, -0.1415, -0.1117, -0.3451]) tensor([0.4040, 0.2094, 0.2157, 0.1708]) -Greedy action tensor([ 0.7566, -0.4524, -0.1711, -0.2986]) tensor([0.4897, 0.1462, 0.1937, 0.1705]) -Greedy action tensor([ 0.7229, 0.1347, -0.1021, -0.1923]) tensor([0.4177, 0.2320, 0.1831, 0.1673]) -Greedy action tensor([ 1.1772, -0.7998, 0.0428, -0.7095]) tensor([0.6205, 0.0859, 0.1996, 0.0940]) -Greedy action tensor([ 0.7303, -0.4385, -0.0791, -0.2015]) tensor([0.4652, 0.1445, 0.2071, 0.1832]) -Greedy action tensor([ 0.7308, -0.5752, -0.0439, -0.3894]) tensor([0.4859, 0.1316, 0.2239, 0.1585]) -Greedy action tensor([ 0.7404, -0.2235, 0.1128, -0.4590]) tensor([0.4511, 0.1721, 0.2409, 0.1360]) -Greedy action tensor([ 0.7206, -0.5036, -0.0942, -0.3892]) tensor([0.4840, 0.1423, 0.2142, 0.1595]) -Greedy action tensor([ 0.7574, -0.0134, -0.0183, -0.4544]) tensor([0.4503, 0.2083, 0.2073, 0.1340]) -Greedy action tensor([ 0.7201, -0.3976, -0.1235, -0.6139]) tensor([0.4949, 0.1618, 0.2129, 0.1304]) -Greedy action tensor([ 0.8486, -0.8189, 0.1153, -0.5475]) tensor([0.5218, 0.0985, 0.2506, 0.1292]) -Greedy action tensor([-0.0437, -0.1825, 0.1482, -0.1705]) tensor([0.2523, 0.2196, 0.3057, 0.2223]) -Greedy action tensor([ 0.5206, -0.1152, 0.1617, -0.3166]) tensor([0.3758, 0.1990, 0.2625, 0.1627]) -Greedy action tensor([ 0.4936, -0.0571, 0.0743, -0.1937]) tensor([0.3654, 0.2106, 0.2402, 0.1838]) -Greedy action tensor([ 0.2732, -0.2394, 0.0660, -0.4640]) tensor([0.3460, 0.2072, 0.2812, 0.1655]) -Greedy action tensor([ 0.5900, -0.2220, -0.0554, -0.1725]) tensor([0.4107, 0.1823, 0.2154, 0.1916]) -Greedy action tensor([ 0.3254, -0.0386, -0.0501, -0.2266]) tensor([0.3381, 0.2349, 0.2323, 0.1947]) -Greedy action tensor([ 0.6665, -0.6122, -0.0200, -0.2554]) tensor([0.4588, 0.1277, 0.2309, 0.1825]) -Greedy action tensor([ 0.8794, -0.4598, -0.0613, -0.5062]) tensor([0.5256, 0.1377, 0.2052, 0.1315]) -Greedy action tensor([ 0.8299, -0.5876, -0.0278, -0.5811]) tensor([0.5235, 0.1268, 0.2220, 0.1277]) -Greedy action tensor([ 0.8953, -0.8148, 0.1922, -0.5602]) tensor([0.5238, 0.0947, 0.2593, 0.1222]) -Greedy action tensor([ 0.6462, -0.0501, -0.1044, -0.4423]) tensor([0.4334, 0.2160, 0.2046, 0.1459]) -Greedy action tensor([ 0.8402, -0.6639, -0.1235, -0.3149]) tensor([0.5212, 0.1158, 0.1988, 0.1642]) -Greedy action tensor([ 0.5596, -0.4226, -0.1245, -0.3361]) tensor([0.4372, 0.1637, 0.2206, 0.1785]) -Greedy action tensor([ 0.5760, -0.0759, 0.0044, -0.2431]) tensor([0.3958, 0.2062, 0.2235, 0.1745]) -Greedy action tensor([ 0.6029, -0.4580, -0.1158, -0.2735]) tensor([0.4445, 0.1539, 0.2166, 0.1850]) -Greedy action tensor([ 0.3740, 0.0136, -0.0126, -0.2384]) tensor([0.3426, 0.2389, 0.2328, 0.1857]) -Greedy action tensor([ 0.5856, -0.2702, 0.0046, -0.3700]) tensor([0.4221, 0.1794, 0.2361, 0.1623]) -Greedy action tensor([ 0.5000, -0.1676, -0.0218, -0.4022]) tensor([0.3981, 0.2042, 0.2362, 0.1615]) -Greedy action tensor([ 0.8787, -0.5275, 0.2378, -0.7921]) tensor([0.5102, 0.1250, 0.2688, 0.0960]) -Greedy action tensor([ 0.6921, -0.2445, -0.1345, -0.2678]) tensor([0.4520, 0.1772, 0.1978, 0.1731]) -Greedy action tensor([ 0.7397, -0.5735, -0.1816, -0.4385]) tensor([0.5064, 0.1362, 0.2015, 0.1559]) -Greedy action tensor([ 0.7196, -0.4089, -0.2287, -0.4965]) tensor([0.4982, 0.1612, 0.1930, 0.1477]) -Greedy action tensor([ 0.4230, -0.0865, -0.0289, -0.1752]) tensor([0.3588, 0.2156, 0.2284, 0.1973]) -Greedy action tensor([ 0.3206, -0.0618, -0.0862, -0.2058]) tensor([0.3403, 0.2321, 0.2266, 0.2010]) -Greedy action tensor([ 0.5890, -0.1584, -0.1249, -0.2681]) tensor([0.4188, 0.1983, 0.2051, 0.1777]) -Greedy action tensor([ 0.4998, -0.0463, -0.0498, -0.0656]) tensor([0.3670, 0.2126, 0.2119, 0.2085]) -Greedy action tensor([ 0.3183, 0.1541, 0.1090, -0.1269]) tensor([0.3030, 0.2571, 0.2458, 0.1941]) -Greedy action tensor([ 1.0465, -0.8088, 0.1759, -0.5474]) tensor([0.5623, 0.0880, 0.2355, 0.1142]) -Greedy action tensor([ 0.4316, -0.3249, -0.0493, -0.2736]) tensor([0.3874, 0.1818, 0.2395, 0.1914]) -Greedy action tensor([ 9.9561e-01, 2.4816e-01, -6.6440e-04, -2.0202e-03]) tensor([0.4522, 0.2141, 0.1670, 0.1667]) -Greedy action tensor([ 0.3006, 0.0122, -0.1305, 0.0536]) tensor([0.3144, 0.2356, 0.2043, 0.2456]) -Greedy action tensor([ 0.7510, -0.2021, 0.1285, -0.7069]) tensor([0.4641, 0.1789, 0.2490, 0.1080]) -Greedy action tensor([ 0.4679, 0.1419, 0.0042, -0.2351]) tensor([0.3514, 0.2536, 0.2210, 0.1740]) -Greedy action tensor([ 0.6552, 0.0210, -0.0413, -0.1923]) tensor([0.4070, 0.2158, 0.2028, 0.1744]) -Greedy action tensor([ 0.8042, -0.2815, 0.0529, -0.2351]) tensor([0.4623, 0.1561, 0.2181, 0.1635]) -Greedy action tensor([ 0.7965, -0.5221, -0.0553, -0.3177]) tensor([0.4945, 0.1323, 0.2110, 0.1623]) -Greedy action tensor([ 0.8132, -0.5012, -0.1814, -0.4642]) tensor([0.5216, 0.1401, 0.1929, 0.1454]) -Greedy action tensor([ 0.3636, -0.3122, -0.1546, -0.1971]) tensor([0.3738, 0.1902, 0.2226, 0.2134]) -Greedy action tensor([ 0.6412, -0.2263, -0.0966, -0.1338]) tensor([0.4239, 0.1781, 0.2027, 0.1953]) -Greedy action tensor([ 0.4017, -0.0384, -0.1539, -0.4385]) tensor([0.3774, 0.2431, 0.2166, 0.1629]) -Greedy action tensor([ 0.4949, -0.4118, -0.1247, -0.2554]) tensor([0.4142, 0.1673, 0.2229, 0.1956]) -Greedy action tensor([ 0.4755, -0.2746, -0.0123, -0.2382]) tensor([0.3882, 0.1833, 0.2383, 0.1901]) -Greedy action tensor([ 0.6640, -0.7005, -0.0334, -0.2788]) tensor([0.4667, 0.1192, 0.2323, 0.1818]) -Greedy action tensor([ 0.6490, -0.3374, -0.0316, -0.4150]) tensor([0.4496, 0.1677, 0.2276, 0.1551]) -Greedy action tensor([ 0.1924, 0.1804, -0.0480, -0.4435]) tensor([0.3027, 0.2991, 0.2380, 0.1603]) -Greedy action tensor([ 0.4597, -0.5367, 0.1697, -0.7173]) tensor([0.4123, 0.1522, 0.3085, 0.1271]) -Greedy action tensor([ 1.2193, -1.2766, -0.0148, -0.4505]) tensor([0.6403, 0.0528, 0.1864, 0.1206]) -Greedy action tensor([ 0.7459, -0.4246, -0.0334, -0.3495]) tensor([0.4754, 0.1475, 0.2181, 0.1590]) -Greedy action tensor([ 0.1035, 0.1025, 0.0785, -0.0908]) tensor([0.2633, 0.2631, 0.2568, 0.2168]) -Greedy action tensor([ 0.4077, -0.0052, -0.0300, -0.3539]) tensor([0.3605, 0.2385, 0.2327, 0.1683]) -Greedy action tensor([ 0.6279, -0.3445, -0.2036, -0.3391]) tensor([0.4558, 0.1724, 0.1985, 0.1733]) -Greedy action tensor([ 0.7303, -0.3842, -0.1501, -0.3023]) tensor([0.4765, 0.1563, 0.1976, 0.1697]) -Greedy action tensor([ 0.5643, -0.0947, -0.2065, -0.2790]) tensor([0.4149, 0.2146, 0.1919, 0.1785]) -Greedy action tensor([ 0.7398, -0.5514, 0.0957, -0.6845]) tensor([0.4900, 0.1347, 0.2573, 0.1179]) -Greedy action tensor([ 1.0307, -0.4169, 0.0854, -0.5413]) tensor([0.5460, 0.1284, 0.2122, 0.1134]) -Greedy action tensor([ 0.8141, -0.3662, 0.0095, -0.2315]) tensor([0.4749, 0.1459, 0.2124, 0.1669]) -Greedy action tensor([ 1.0301, -1.0444, 0.1787, -0.6269]) tensor([0.5737, 0.0721, 0.2449, 0.1094]) -Greedy action tensor([ 0.5999, -0.4174, -0.0822, -0.4225]) tensor([0.4491, 0.1624, 0.2270, 0.1615]) -Greedy action tensor([ 0.6372, -0.3295, -0.1001, -0.1887]) tensor([0.4354, 0.1656, 0.2083, 0.1907]) -Greedy action tensor([ 0.7156, -0.2650, 0.0092, -0.2604]) tensor([0.4454, 0.1670, 0.2198, 0.1678]) -Greedy action tensor([ 0.6759, -0.5279, 0.0704, -0.3473]) tensor([0.4535, 0.1361, 0.2475, 0.1630]) -Greedy action tensor([ 0.6867, -0.5208, -0.1915, -0.6056]) tensor([0.5027, 0.1503, 0.2089, 0.1381]) -Greedy action tensor([-1.8448, -0.4387, 0.6206, -0.0994]) tensor([0.0443, 0.1807, 0.5213, 0.2537]) -Greedy action tensor([-1.8562, -0.5022, 0.6526, -0.1128]) tensor([0.0437, 0.1693, 0.5372, 0.2499]) -Greedy action tensor([-1.6992, -0.5473, 0.5631, -0.0811]) tensor([0.0532, 0.1682, 0.5106, 0.2681]) -Greedy action tensor([-0.1435, -0.1230, 0.5680, 1.4841]) tensor([0.1093, 0.1116, 0.2227, 0.5565]) -Greedy action tensor([-1.4016, -0.5389, 0.4901, 0.1705]) tensor([0.0675, 0.1599, 0.4475, 0.3251]) -Greedy action tensor([-1.6690, -0.4808, 0.5262, -0.0369]) tensor([0.0544, 0.1785, 0.4887, 0.2783]) -Greedy action tensor([-1.8263, -0.4943, 0.6255, -0.0855]) tensor([0.0452, 0.1714, 0.5253, 0.2580]) -Greedy action tensor([-0.4833, -0.5846, 0.6596, 0.5309]) tensor([0.1283, 0.1159, 0.4022, 0.3536]) -Greedy action tensor([-0.2481, -0.4320, 0.1999, 0.1614]) tensor([0.2039, 0.1697, 0.3192, 0.3072]) -Greedy action tensor([-1.8533, -0.5781, 0.8492, 0.3324]) tensor([0.0352, 0.1261, 0.5254, 0.3134]) -Greedy action tensor([-1.6946, -0.8280, -0.2066, -0.6475]) tensor([0.0938, 0.2232, 0.4155, 0.2674]) -Greedy action tensor([-0.6768, -0.1172, 0.8401, 1.4895]) tensor([0.0624, 0.1091, 0.2843, 0.5442]) -Greedy action tensor([-1.8756, -0.3650, 0.6256, -0.1654]) tensor([0.0430, 0.1948, 0.5244, 0.2378]) -Greedy action tensor([-1.5837, -0.4813, 0.5432, -0.1468]) tensor([0.0602, 0.1813, 0.5051, 0.2533]) -Greedy action tensor([-1.7321, -0.4949, 0.5518, -0.0743]) tensor([0.0513, 0.1766, 0.5031, 0.2690]) -Greedy action tensor([-1.8851, -0.2640, 0.6063, -0.1359]) tensor([0.0419, 0.2118, 0.5056, 0.2407]) -Greedy action tensor([-1.6089, -0.4691, 0.5583, 0.0756]) tensor([0.0548, 0.1713, 0.4786, 0.2953]) -Greedy action tensor([-0.8561, 0.2032, -0.0138, -0.1986]) tensor([0.1229, 0.3545, 0.2854, 0.2372]) -Greedy action tensor([-1.0657, 0.4012, -0.4852, -0.0033]) tensor([0.0998, 0.4329, 0.1784, 0.2889]) -Greedy action tensor([-1.7961, -0.4382, 0.5910, -0.0955]) tensor([0.0471, 0.1830, 0.5122, 0.2578]) -Greedy action tensor([-1.8764, -0.4735, 0.6384, -0.1292]) tensor([0.0432, 0.1755, 0.5336, 0.2477]) -Greedy action tensor([-0.6816, -0.3434, 1.1880, 1.5592]) tensor([0.0547, 0.0767, 0.3546, 0.5140]) -Greedy action tensor([-1.9076, -0.3558, 0.6306, -0.1612]) tensor([0.0415, 0.1958, 0.5250, 0.2378]) -Greedy action tensor([-1.8953, -0.4184, 0.6411, -0.1441]) tensor([0.0421, 0.1842, 0.5314, 0.2423]) -Greedy action tensor([-1.3098, -0.5352, 0.5308, 0.6181]) tensor([0.0612, 0.1328, 0.3855, 0.4206]) -Greedy action tensor([-1.7708, -0.4065, 0.5700, -0.0768]) tensor([0.0482, 0.1886, 0.5009, 0.2623]) -Greedy action tensor([-1.5429, -0.5333, 0.6649, 0.3756]) tensor([0.0509, 0.1397, 0.4629, 0.3466]) -Greedy action tensor([-1.8440, -0.4793, 0.6177, -0.1327]) tensor([0.0451, 0.1765, 0.5287, 0.2497]) -Greedy action tensor([-1.8977, -0.4233, 0.6405, -0.1531]) tensor([0.0421, 0.1839, 0.5329, 0.2410]) -Greedy action tensor([-0.9070, -0.5461, 1.1743, 1.5147]) tensor([0.0461, 0.0661, 0.3691, 0.5188]) -Greedy action tensor([-1.8075, -0.4813, 0.6451, -0.0564]) tensor([0.0452, 0.1701, 0.5246, 0.2601]) -Greedy action tensor([-0.9111, -0.6241, 0.2304, 0.2049]) tensor([0.1174, 0.1565, 0.3677, 0.3584]) -Greedy action tensor([-1.7737, -0.4292, 0.6421, 0.0242]) tensor([0.0453, 0.1738, 0.5074, 0.2735]) -Greedy action tensor([-1.2798, 0.0978, 0.1612, 0.1458]) tensor([0.0749, 0.2970, 0.3164, 0.3116]) -Greedy action tensor([-1.1965, -0.6268, 0.3294, 0.3626]) tensor([0.0825, 0.1458, 0.3794, 0.3922]) -Greedy action tensor([-1.3090, -0.5960, 0.3211, 0.1919]) tensor([0.0792, 0.1615, 0.4041, 0.3552]) -Greedy action tensor([-1.8027, -0.3561, 0.6878, 0.0890]) tensor([0.0418, 0.1774, 0.5039, 0.2769]) -Greedy action tensor([-1.1930, -0.5544, 0.2842, 0.2496]) tensor([0.0869, 0.1646, 0.3807, 0.3678]) -Greedy action tensor([-0.9368, -0.1067, 1.0435, 1.1284]) tensor([0.0543, 0.1245, 0.3932, 0.4280]) -Greedy action tensor([-0.3580, -0.1760, 0.1957, 0.2594]) tensor([0.1726, 0.2071, 0.3003, 0.3200]) -Greedy action tensor([-1.2026, 0.2078, 0.2430, 0.5780]) tensor([0.0655, 0.2682, 0.2779, 0.3884]) -Greedy action tensor([-1.2675, -0.7466, 0.2415, -0.1210]) tensor([0.0966, 0.1626, 0.4368, 0.3040]) -Greedy action tensor([-1.0044, -0.6574, 0.2956, 0.2342]) tensor([0.1049, 0.1484, 0.3848, 0.3619]) -Greedy action tensor([-1.6430, -0.4388, 0.5705, 0.1891]) tensor([0.0507, 0.1690, 0.4637, 0.3167]) -Greedy action tensor([-1.8823, -0.4993, 0.7397, -0.0682]) tensor([0.0402, 0.1602, 0.5531, 0.2466]) -Greedy action tensor([-1.5235, -0.5503, 0.4876, 0.2041]) tensor([0.0597, 0.1580, 0.4462, 0.3360]) -Greedy action tensor([-1.7080, -0.0975, 0.4999, -0.0616]) tensor([0.0493, 0.2467, 0.4483, 0.2557]) -Greedy action tensor([-1.4049, -0.5166, 0.6194, 0.5478]) tensor([0.0554, 0.1347, 0.4194, 0.3905]) -Greedy action tensor([-1.4653, -0.3174, 0.7304, -0.6843]) tensor([0.0653, 0.2057, 0.5865, 0.1425]) -Greedy action tensor([-1.4912, 0.0513, 0.5274, 0.2169]) tensor([0.0534, 0.2498, 0.4021, 0.2948]) -Greedy action tensor([-1.7445, -0.4344, 0.5758, -0.0671]) tensor([0.0494, 0.1832, 0.5030, 0.2645]) -Greedy action tensor([-1.6940, -0.4998, 0.5509, -0.0312]) tensor([0.0526, 0.1736, 0.4964, 0.2774]) -Greedy action tensor([-1.8843, -0.4844, 0.7030, -0.0878]) tensor([0.0410, 0.1663, 0.5453, 0.2473]) -Greedy action tensor([-1.5192e+00, 2.4512e-04, 4.6655e-01, 3.7437e-01]) tensor([0.0513, 0.2344, 0.3736, 0.3407]) -Greedy action tensor([-1.2667, 0.5413, 0.4825, 0.7251]) tensor([0.0496, 0.3022, 0.2850, 0.3632]) -Greedy action tensor([-0.5954, -0.1693, -0.1530, -0.0191]) tensor([0.1704, 0.2610, 0.2653, 0.3033]) -Greedy action tensor([-1.9177, -0.4434, 0.6549, -0.1598]) tensor([0.0412, 0.1800, 0.5398, 0.2390]) -Greedy action tensor([-1.6792, -0.4805, 0.5361, -0.0328]) tensor([0.0536, 0.1776, 0.4909, 0.2779]) -Greedy action tensor([-1.9521, -0.4894, 0.9771, 0.1753]) tensor([0.0308, 0.1332, 0.5771, 0.2589]) -Greedy action tensor([-1.5879, -0.3342, 0.6158, 0.0383]) tensor([0.0536, 0.1879, 0.4858, 0.2727]) -Greedy action tensor([-1.7907, -0.4673, 0.5705, -0.0661]) tensor([0.0477, 0.1791, 0.5057, 0.2675]) -Greedy action tensor([-1.9143, -0.4651, 0.6496, -0.1646]) tensor([0.0417, 0.1775, 0.5411, 0.2397]) -Greedy action tensor([-1.9121, -0.4589, 0.6716, -0.1427]) tensor([0.0410, 0.1753, 0.5431, 0.2406]) -Greedy action tensor([-1.4237, -0.2459, 0.6544, -0.5181]) tensor([0.0680, 0.2207, 0.5431, 0.1682]) -Greedy action tensor([0.3407, 0.2255, 0.9176, 1.7819]) tensor([0.1266, 0.1128, 0.2255, 0.5351]) -Greedy action tensor([-1.1698, -0.3886, 0.6197, -0.5812]) tensor([0.0911, 0.1991, 0.5456, 0.1642]) -Greedy action tensor([-0.8281, -0.6145, 0.1445, -0.2184]) tensor([0.1488, 0.1842, 0.3934, 0.2737]) -Greedy action tensor([-1.6194, -0.5659, 0.1437, -0.6243]) tensor([0.0806, 0.2312, 0.4701, 0.2181]) -Greedy action tensor([-1.8090, -0.5162, 0.7416, 0.0279]) tensor([0.0421, 0.1535, 0.5399, 0.2645]) -Greedy action tensor([-1.8013, -0.4214, 0.5851, -0.1035]) tensor([0.0469, 0.1865, 0.5103, 0.2563]) -Greedy action tensor([-1.6530, -0.3779, 0.4660, -0.3109]) tensor([0.0598, 0.2139, 0.4975, 0.2288]) -Greedy action tensor([-1.1699, -0.6162, 0.5082, 0.6969]) tensor([0.0687, 0.1195, 0.3677, 0.4441]) -Greedy action tensor([ 0.0497, -0.0152, 0.9888, 1.7166]) tensor([0.1021, 0.0957, 0.2612, 0.5409]) -Greedy action tensor([-1.9118, -0.4534, 0.6496, -0.1577]) tensor([0.0416, 0.1789, 0.5390, 0.2404]) -Greedy action tensor([-1.7035, -0.4461, 0.6649, 0.1174]) tensor([0.0468, 0.1645, 0.4997, 0.2890]) -Greedy action tensor([-1.6220, -0.5853, 0.5725, 0.0665]) tensor([0.0549, 0.1549, 0.4930, 0.2972]) -Greedy action tensor([-1.4891, -0.5630, 1.1758, 1.0203]) tensor([0.0331, 0.0836, 0.4759, 0.4073]) -Greedy action tensor([-1.3577, -0.5883, 0.4636, 0.3033]) tensor([0.0685, 0.1478, 0.4232, 0.3605]) -Greedy action tensor([-0.3804, 0.2740, 0.5676, 1.5820]) tensor([0.0792, 0.1524, 0.2045, 0.5639]) -Greedy action tensor([-1.4634, -0.4521, 0.4499, 0.1426]) tensor([0.0645, 0.1773, 0.4369, 0.3213]) -Greedy action tensor([-1.2368, -0.5257, 0.5499, 0.8231]) tensor([0.0593, 0.1208, 0.3543, 0.4656]) -Greedy action tensor([1.3576, 0.0261, 0.0967, 1.2993]) tensor([0.4015, 0.1060, 0.1138, 0.3787]) -Greedy action tensor([-0.3939, -0.7826, 1.2188, -0.6315]) tensor([0.1336, 0.0906, 0.6704, 0.1054]) -Greedy action tensor([ 1.9642, 0.4133, -0.7428, 0.8974]) tensor([0.6162, 0.1307, 0.0411, 0.2120]) -Greedy action tensor([ 1.3083, -0.3710, 1.3493, 1.1887]) tensor([0.3210, 0.0599, 0.3344, 0.2848]) -Greedy action tensor([-0.2780, -0.4146, -0.2652, -0.0869]) tensor([0.2442, 0.2130, 0.2473, 0.2956]) -Greedy action tensor([ 1.9152, -1.0927, 0.7993, 1.0061]) tensor([0.5618, 0.0278, 0.1841, 0.2263]) -Greedy action tensor([ 0.0443, 0.7566, 1.1563, -0.3817]) tensor([0.1485, 0.3028, 0.4516, 0.0970]) -Greedy action tensor([ 0.8931, 0.6531, -0.0809, 0.4200]) tensor([0.3588, 0.2822, 0.1355, 0.2235]) -Greedy action tensor([ 0.3834, -0.8100, -0.6815, 0.9600]) tensor([0.2917, 0.0884, 0.1006, 0.5192]) -Greedy action tensor([0.8027, 0.3929, 0.0323, 0.1715]) tensor([0.3761, 0.2497, 0.1741, 0.2001]) -Greedy action tensor([ 1.3907, -0.7043, 2.2694, -0.3146]) tensor([0.2694, 0.0331, 0.6485, 0.0489]) -Greedy action tensor([-0.2037, -0.2878, -0.5023, 0.6406]) tensor([0.2005, 0.1843, 0.1487, 0.4664]) -Greedy action tensor([-0.3458, -0.9398, -0.6839, -0.3863]) tensor([0.3100, 0.1712, 0.2211, 0.2977]) -Greedy action tensor([-0.0389, -0.8258, -0.7891, 0.4137]) tensor([0.2857, 0.1301, 0.1349, 0.4493]) -Greedy action tensor([ 0.2081, -1.1520, 0.5343, 0.1811]) tensor([0.2766, 0.0710, 0.3832, 0.2692]) -Greedy action tensor([-1.6356, -0.8485, -0.5532, -0.9855]) tensor([0.1240, 0.2724, 0.3660, 0.2376]) -Greedy action tensor([1.3291e+00, 1.2018e-01, 5.5850e-05, 7.2923e-02]) tensor([0.5411, 0.1615, 0.1433, 0.1541]) -Greedy action tensor([ 0.6754, -1.2636, -0.1511, -0.0519]) tensor([0.4843, 0.0697, 0.2119, 0.2340]) -Greedy action tensor([1.3266, 0.4496, 0.1595, 1.0891]) tensor([0.3975, 0.1654, 0.1237, 0.3135]) -Greedy action tensor([ 1.7678, -1.2177, 0.6737, 1.1378]) tensor([0.5214, 0.0263, 0.1746, 0.2777]) -Greedy action tensor([ 0.1737, -1.0437, 0.5632, 0.1650]) tensor([0.2657, 0.0786, 0.3922, 0.2634]) -Greedy action tensor([ 0.4292, -1.5708, 0.9479, 0.4822]) tensor([0.2584, 0.0350, 0.4341, 0.2725]) -Greedy action tensor([-0.0525, -1.1304, 1.1354, 0.8133]) tensor([0.1429, 0.0486, 0.4688, 0.3397]) -Greedy action tensor([-1.0138, -0.5722, 0.1934, -0.4791]) tensor([0.1315, 0.2045, 0.4396, 0.2244]) -Greedy action tensor([ 1.5229, -1.3161, 1.4382, 0.9861]) tensor([0.3903, 0.0228, 0.3586, 0.2282]) -Greedy action tensor([-0.6346, -0.8407, -0.0990, -0.5055]) tensor([0.2146, 0.1746, 0.3666, 0.2442]) -Greedy action tensor([ 0.5518, -0.0065, 0.4690, -0.7979]) tensor([0.3634, 0.2079, 0.3345, 0.0942]) -Greedy action tensor([ 1.6433, -0.3902, -0.5430, 0.8692]) tensor([0.5867, 0.0768, 0.0659, 0.2706]) -Greedy action tensor([ 1.2319, 0.4632, -0.6126, -0.2080]) tensor([0.5380, 0.2494, 0.0851, 0.1275]) -Greedy action tensor([ 0.2061, -2.2078, -0.0029, 1.4027]) tensor([0.1920, 0.0172, 0.1557, 0.6351]) -Greedy action tensor([ 0.6400, -0.9622, -0.2244, 0.3782]) tensor([0.4180, 0.0842, 0.1761, 0.3217]) -Greedy action tensor([ 0.4619, 0.8050, -0.8629, 0.9770]) tensor([0.2299, 0.3241, 0.0611, 0.3849]) -Greedy action tensor([ 0.0530, -0.5379, 0.7116, 1.2516]) tensor([0.1470, 0.0814, 0.2841, 0.4875]) -Greedy action tensor([ 1.0359, -1.9426, 1.2215, 0.4395]) tensor([0.3564, 0.0181, 0.4291, 0.1963]) -Greedy action tensor([ 0.3635, -2.0529, -0.0551, 0.6238]) tensor([0.3285, 0.0293, 0.2161, 0.4261]) -Greedy action tensor([ 0.0679, 0.0121, 1.4667, -0.5050]) tensor([0.1524, 0.1442, 0.6174, 0.0860]) -Greedy action tensor([0.4668, 0.1759, 0.3746, 0.3068]) tensor([0.2848, 0.2129, 0.2597, 0.2427]) -Greedy action tensor([ 0.9595, -1.5196, 0.2721, -0.2724]) tensor([0.5323, 0.0446, 0.2677, 0.1553]) -Greedy action tensor([-0.6763, 0.0315, -0.8830, 1.1848]) tensor([0.0973, 0.1976, 0.0792, 0.6260]) -Greedy action tensor([-0.5898, -0.5767, -0.1528, 0.6737]) tensor([0.1409, 0.1427, 0.2181, 0.4983]) -Greedy action tensor([ 0.2108, -1.0687, 0.6008, -0.1943]) tensor([0.2922, 0.0813, 0.4316, 0.1949]) -Greedy action tensor([ 1.6936, 0.3369, -0.3896, 0.4103]) tensor([0.6027, 0.1552, 0.0751, 0.1670]) -Greedy action tensor([-0.5709, -2.8373, -0.2352, 0.1726]) tensor([0.2171, 0.0225, 0.3037, 0.4566]) -Greedy action tensor([-0.4323, -0.6788, -0.3799, 1.0947]) tensor([0.1344, 0.1051, 0.1416, 0.6189]) -Greedy action tensor([ 0.4217, 0.1105, 1.1381, -0.1486]) tensor([0.2302, 0.1686, 0.4711, 0.1301]) -Greedy action tensor([-0.2544, -1.3963, 0.3241, 0.5976]) tensor([0.1836, 0.0586, 0.3274, 0.4304]) -Greedy action tensor([-0.3949, -0.2009, -0.1336, -0.0395]) tensor([0.2024, 0.2458, 0.2629, 0.2889]) -Greedy action tensor([ 0.4331, 0.8714, -0.7085, 0.7595]) tensor([0.2350, 0.3643, 0.0750, 0.3257]) -Greedy action tensor([ 0.8279, -0.5498, -0.3492, 1.1879]) tensor([0.3340, 0.0842, 0.1029, 0.4788]) -Greedy action tensor([ 0.8783, 0.9804, 0.8585, -0.3176]) tensor([0.2950, 0.3267, 0.2892, 0.0892]) -Greedy action tensor([-0.0183, -1.7387, 1.1470, -0.1623]) tensor([0.1904, 0.0341, 0.6106, 0.1649]) -Greedy action tensor([ 1.8023, -1.4287, 0.4979, 1.8828]) tensor([0.4176, 0.0165, 0.1133, 0.4526]) -Greedy action tensor([-0.6268, -1.2484, 0.1464, 0.6851]) tensor([0.1348, 0.0724, 0.2921, 0.5006]) -Greedy action tensor([0.1695, 0.0101, 1.2076, 0.6442]) tensor([0.1591, 0.1357, 0.4494, 0.2558]) -Greedy action tensor([-0.3279, -2.0844, 0.0397, 2.1267]) tensor([0.0701, 0.0121, 0.1013, 0.8165]) -Greedy action tensor([1.3351, 0.1069, 0.5591, 0.5294]) tensor([0.4546, 0.1331, 0.2092, 0.2031]) -Greedy action tensor([ 0.6144, -1.6571, 0.2170, 0.5835]) tensor([0.3643, 0.0376, 0.2448, 0.3532]) -Greedy action tensor([ 1.7538, 0.2971, -0.2150, 0.5495]) tensor([0.5979, 0.1393, 0.0835, 0.1793]) -Greedy action tensor([ 0.9487, 0.3996, 1.6739, -0.5095]) tensor([0.2580, 0.1490, 0.5329, 0.0600]) -Greedy action tensor([-0.4822, -1.5255, 0.3117, 0.1406]) tensor([0.1842, 0.0649, 0.4075, 0.3434]) -Greedy action tensor([ 1.6957, -1.0414, 1.1071, 0.2656]) tensor([0.5379, 0.0348, 0.2986, 0.1287]) -Greedy action tensor([-0.8201, -0.0815, -0.8472, 0.4318]) tensor([0.1322, 0.2767, 0.1287, 0.4624]) -Greedy action tensor([ 0.4355, -0.7108, -0.6007, 1.2771]) tensor([0.2505, 0.0796, 0.0889, 0.5811]) -Greedy action tensor([-0.8409, -0.5263, -0.0225, 0.5207]) tensor([0.1171, 0.1604, 0.2655, 0.4570]) -Greedy action tensor([ 0.2112, -0.1410, 0.3803, 0.4152]) tensor([0.2431, 0.1709, 0.2879, 0.2981]) -Greedy action tensor([ 0.7836, -1.6866, 0.6033, 1.2855]) tensor([0.2800, 0.0237, 0.2338, 0.4625]) -Greedy action tensor([ 1.0650, -0.3793, 1.2676, 0.3608]) tensor([0.3384, 0.0798, 0.4144, 0.1674]) -Greedy action tensor([ 1.1465, -0.6929, 2.1679, 0.2261]) tensor([0.2307, 0.0367, 0.6407, 0.0919]) -Greedy action tensor([ 1.2851, -0.5445, 1.7983, -0.2850]) tensor([0.3290, 0.0528, 0.5497, 0.0684]) -Greedy action tensor([ 0.2708, -0.6900, -0.2335, 0.0852]) tensor([0.3550, 0.1358, 0.2144, 0.2948]) -Greedy action tensor([ 1.4917, -1.4717, 0.3172, 0.7611]) tensor([0.5428, 0.0280, 0.1677, 0.2614]) -Greedy action tensor([-0.9610, -0.2459, 1.5526, -0.6659]) tensor([0.0597, 0.1221, 0.7378, 0.0803]) -Greedy action tensor([ 0.6821, 1.0603, -0.4617, 0.8943]) tensor([0.2491, 0.3636, 0.0794, 0.3080]) -Greedy action tensor([ 0.8837, -0.9820, 1.3278, 0.1944]) tensor([0.3110, 0.0481, 0.4848, 0.1561]) -Greedy action tensor([-0.3824, -0.2340, -0.0230, -0.1233]) tensor([0.2046, 0.2373, 0.2930, 0.2651]) -Greedy action tensor([-0.7886, -1.6611, 0.6521, 0.5717]) tensor([0.1048, 0.0438, 0.4428, 0.4086]) -Greedy action tensor([ 0.4914, -0.2405, -0.2455, 1.5954]) tensor([0.2010, 0.0967, 0.0962, 0.6062]) -Greedy action tensor([ 0.8190, 0.2453, 0.8497, -0.2375]) tensor([0.3399, 0.1915, 0.3505, 0.1182]) -Greedy action tensor([-0.1734, 0.9733, 1.3032, -0.0610]) tensor([0.1037, 0.3264, 0.4539, 0.1160]) -Greedy action tensor([ 1.3684, -0.3370, 1.3412, 0.5908]) tensor([0.3825, 0.0695, 0.3722, 0.1758]) -Greedy action tensor([ 0.8964, -1.6599, 0.8988, 0.2715]) tensor([0.3824, 0.0297, 0.3833, 0.2047]) -Greedy action tensor([-0.2123, 0.4193, 0.7247, -0.3162]) tensor([0.1579, 0.2969, 0.4029, 0.1423]) -Greedy action tensor([ 1.7555, 0.0202, -0.1013, 0.0539]) tensor([0.6601, 0.1164, 0.1031, 0.1204]) -Greedy action tensor([ 0.7781, -0.3794, -0.0879, 0.1859]) tensor([0.4371, 0.1374, 0.1838, 0.2417]) -Greedy action tensor([ 1.0222, -0.3242, -0.2659, 0.0306]) tensor([0.5244, 0.1364, 0.1446, 0.1945]) -Greedy action tensor([ 1.6782, -0.4732, -0.3861, 0.6278]) tensor([0.6277, 0.0730, 0.0797, 0.2196]) -Greedy action tensor([ 1.9183, -0.4610, -0.8073, 0.2688]) tensor([0.7406, 0.0686, 0.0485, 0.1423]) -Greedy action tensor([ 0.9418, -0.3075, -0.4806, 0.2958]) tensor([0.4873, 0.1397, 0.1175, 0.2554]) -Greedy action tensor([ 1.1181, -0.1079, -0.4198, 0.5677]) tensor([0.4796, 0.1407, 0.1030, 0.2766]) -Greedy action tensor([ 1.1205, -0.5659, -0.4915, 0.6639]) tensor([0.4955, 0.0918, 0.0989, 0.3139]) -Greedy action tensor([ 1.5288, -0.1393, -0.5381, 0.3770]) tensor([0.6130, 0.1156, 0.0776, 0.1938]) -Greedy action tensor([ 1.6685, -0.7752, -0.2316, 0.4548]) tensor([0.6521, 0.0566, 0.0975, 0.1937]) -Greedy action tensor([ 1.4139, -0.1088, -0.2958, 0.1309]) tensor([0.5966, 0.1301, 0.1079, 0.1654]) -Greedy action tensor([ 1.0561, -0.5657, -0.6599, 1.0531]) tensor([0.4212, 0.0832, 0.0757, 0.4199]) -Greedy action tensor([ 1.2777, -0.2984, -0.1970, 0.1716]) tensor([0.5661, 0.1171, 0.1295, 0.1873]) -Greedy action tensor([ 2.0865, -0.6975, -0.4165, 0.5450]) tensor([0.7365, 0.0455, 0.0603, 0.1577]) -Greedy action tensor([ 1.5022, -0.6759, -0.3924, 0.4069]) tensor([0.6258, 0.0709, 0.0941, 0.2093]) -Greedy action tensor([ 1.9461, -0.7134, -0.2052, 0.4956]) tensor([0.7038, 0.0493, 0.0819, 0.1650]) -Greedy action tensor([ 1.0320, -0.0890, -0.0829, 0.2542]) tensor([0.4732, 0.1542, 0.1552, 0.2174]) -Greedy action tensor([ 1.0500, -0.5570, -0.1025, 0.1473]) tensor([0.5203, 0.1043, 0.1644, 0.2110]) -Greedy action tensor([ 1.6414, -0.5182, -0.5148, 0.3766]) tensor([0.6607, 0.0762, 0.0765, 0.1865]) -Greedy action tensor([ 2.2622, -0.9067, -0.4626, 0.6049]) tensor([0.7703, 0.0324, 0.0505, 0.1469]) -Greedy action tensor([ 1.6542, -0.6470, -0.3111, 0.3201]) tensor([0.6651, 0.0666, 0.0932, 0.1752]) -Greedy action tensor([ 0.9039, -0.2562, -0.4936, 0.9057]) tensor([0.3902, 0.1223, 0.0965, 0.3910]) -Greedy action tensor([ 0.9995, -0.2441, -0.3969, 0.3386]) tensor([0.4873, 0.1405, 0.1206, 0.2516]) -Greedy action tensor([ 1.0293, -0.5751, -0.1685, 0.1658]) tensor([0.5196, 0.1044, 0.1568, 0.2191]) -Greedy action tensor([ 1.3506, -0.4566, -0.2900, 0.2899]) tensor([0.5868, 0.0963, 0.1138, 0.2032]) -Greedy action tensor([ 0.9178, -0.2815, -0.0676, 0.1461]) tensor([0.4680, 0.1411, 0.1747, 0.2163]) -Greedy action tensor([ 0.8455, -0.4281, -0.4981, 0.8945]) tensor([0.3860, 0.1080, 0.1007, 0.4053]) -Greedy action tensor([ 1.5981, -0.6731, -0.2490, 0.1968]) tensor([0.6635, 0.0685, 0.1046, 0.1634]) -Greedy action tensor([ 1.2206, -0.2681, -0.3128, 0.3701]) tensor([0.5352, 0.1208, 0.1155, 0.2286]) -Greedy action tensor([ 1.1060, -0.1225, -0.1452, 0.4108]) tensor([0.4813, 0.1409, 0.1377, 0.2401]) -Greedy action tensor([ 1.2348, -0.2580, -0.5222, 0.3601]) tensor([0.5512, 0.1239, 0.0951, 0.2298]) -Greedy action tensor([ 1.3822, -0.5014, -0.4550, 0.3186]) tensor([0.6037, 0.0918, 0.0961, 0.2084]) -Greedy action tensor([ 1.2550, -0.0610, 0.0989, 0.3471]) tensor([0.5035, 0.1350, 0.1584, 0.2031]) -Greedy action tensor([ 1.2196, -0.4038, -0.4516, 0.2660]) tensor([0.5648, 0.1114, 0.1062, 0.2176]) -Greedy action tensor([ 1.6725, -0.7864, -0.4076, 0.1063]) tensor([0.7046, 0.0603, 0.0880, 0.1471]) -Greedy action tensor([ 1.8302, -0.4347, -0.3053, 0.4230]) tensor([0.6817, 0.0708, 0.0806, 0.1669]) -Greedy action tensor([ 1.1084, 0.2742, -0.3477, -0.4391]) tensor([0.5319, 0.2309, 0.1240, 0.1132]) -Greedy action tensor([ 1.4762, -0.1858, -0.3978, 0.1650]) tensor([0.6201, 0.1177, 0.0952, 0.1671]) -Greedy action tensor([ 1.0771, -0.5955, -0.3015, 0.2861]) tensor([0.5282, 0.0992, 0.1331, 0.2395]) -Greedy action tensor([ 1.3322, -0.2721, -0.2783, 0.3482]) tensor([0.5635, 0.1133, 0.1126, 0.2106]) -Greedy action tensor([ 1.2523, -0.0741, -0.4189, 0.1633]) tensor([0.5587, 0.1483, 0.1050, 0.1880]) -Greedy action tensor([ 1.3088, -0.2811, -0.3814, 0.3577]) tensor([0.5635, 0.1149, 0.1039, 0.2177]) -Greedy action tensor([ 1.4361, -0.8784, -0.1739, 0.2230]) tensor([0.6266, 0.0619, 0.1253, 0.1863]) -Greedy action tensor([ 1.5016, -0.3937, -0.3718, 0.4046]) tensor([0.6106, 0.0918, 0.0938, 0.2039]) -Greedy action tensor([ 0.8485, -0.3295, -0.0809, -0.0709]) tensor([0.4759, 0.1465, 0.1879, 0.1898]) -Greedy action tensor([ 1.0860, -0.4460, -0.4848, 0.5339]) tensor([0.5001, 0.1081, 0.1040, 0.2879]) -Greedy action tensor([ 0.8459, -0.4641, -0.2787, 0.3755]) tensor([0.4506, 0.1216, 0.1463, 0.2815]) -Greedy action tensor([ 1.0811, 0.0943, 0.1249, -0.1493]) tensor([0.4880, 0.1819, 0.1875, 0.1426]) -Greedy action tensor([ 1.0882, -0.4045, -0.4159, 0.2876]) tensor([0.5274, 0.1185, 0.1172, 0.2368]) -Greedy action tensor([ 1.0248, -0.5967, -0.0463, 0.4493]) tensor([0.4756, 0.0940, 0.1630, 0.2675]) -Greedy action tensor([ 0.7610, -0.5649, -0.2390, 0.5005]) tensor([0.4159, 0.1105, 0.1530, 0.3206]) -Greedy action tensor([ 0.9974, -0.4063, -0.1494, 0.1496]) tensor([0.5021, 0.1234, 0.1595, 0.2151]) -Greedy action tensor([ 1.6711, -0.4845, -0.4009, 0.2665]) tensor([0.6724, 0.0779, 0.0847, 0.1651]) -Greedy action tensor([ 1.2682, -0.6331, -0.3208, 0.1906]) tensor([0.5904, 0.0882, 0.1205, 0.2010]) -Greedy action tensor([ 1.1330, -0.2229, -0.3774, 0.3255]) tensor([0.5196, 0.1339, 0.1147, 0.2317]) -Greedy action tensor([ 1.2444, -0.1218, -0.2775, -0.1167]) tensor([0.5781, 0.1475, 0.1262, 0.1482]) -Greedy action tensor([ 1.3326, -0.1892, -0.4355, 0.0360]) tensor([0.6015, 0.1313, 0.1027, 0.1645]) -Greedy action tensor([ 1.3149e+00, -3.8919e-04, -1.8310e-01, 1.4742e-01]) tensor([0.5546, 0.1489, 0.1240, 0.1726]) -Greedy action tensor([ 1.0909, -0.3872, -0.7136, 0.3339]) tensor([0.5371, 0.1225, 0.0884, 0.2520]) -Greedy action tensor([ 1.7691, -0.9175, 0.2193, 0.1136]) tensor([0.6796, 0.0463, 0.1443, 0.1298]) -Greedy action tensor([ 1.2578, -0.4157, -0.4275, 0.4994]) tensor([0.5431, 0.1019, 0.1007, 0.2544]) -Greedy action tensor([ 0.8751, -0.4571, -0.3154, 0.7683]) tensor([0.4054, 0.1070, 0.1233, 0.3643]) -Greedy action tensor([ 1.4652, -0.6959, -0.1876, 0.2011]) tensor([0.6292, 0.0725, 0.1205, 0.1778]) -Greedy action tensor([ 0.9544, -0.0014, -0.5022, -0.1367]) tensor([0.5119, 0.1968, 0.1193, 0.1719]) -Greedy action tensor([ 0.7461, -0.3260, -0.1353, 0.1615]) tensor([0.4322, 0.1479, 0.1790, 0.2409]) -Greedy action tensor([ 1.2698, -0.2211, -0.4878, -0.0399]) tensor([0.5997, 0.1350, 0.1034, 0.1619]) -Greedy action tensor([ 1.0498, -0.4038, -0.1154, 0.2275]) tensor([0.5038, 0.1178, 0.1571, 0.2214]) -Greedy action tensor([ 0.4758, -0.1094, 0.2229, -0.0911]) tensor([0.3447, 0.1920, 0.2677, 0.1956]) -Greedy action tensor([ 1.0064, -0.2409, -0.0368, 0.0753]) tensor([0.4917, 0.1413, 0.1732, 0.1938]) -Greedy action tensor([ 1.2818, -0.5381, -0.2913, 0.2020]) tensor([0.5851, 0.0948, 0.1214, 0.1987]) -Greedy action tensor([ 1.0503, -0.2614, -0.5138, 0.0620]) tensor([0.5403, 0.1455, 0.1131, 0.2011]) -Greedy action tensor([ 1.8212, -0.6245, -0.4132, 0.3844]) tensor([0.6986, 0.0605, 0.0748, 0.1660]) -Greedy action tensor([ 2.0286, -0.7821, -0.4295, 0.3883]) tensor([0.7465, 0.0449, 0.0639, 0.1447]) -Greedy action tensor([ 0.8243, -0.1384, -0.3465, 0.3353]) tensor([0.4338, 0.1657, 0.1345, 0.2660]) -Greedy action tensor([ 2.1616, -0.3493, -0.4304, 0.3597]) tensor([0.7570, 0.0615, 0.0567, 0.1249]) -Greedy action tensor([ 1.4861, -0.4360, -0.3262, 0.1805]) tensor([0.6327, 0.0926, 0.1033, 0.1715]) -Greedy action tensor([ 0.9386, -0.4339, -0.6607, 0.7174]) tensor([0.4431, 0.1123, 0.0895, 0.3551]) -Greedy action tensor([ 2.0296, -0.1521, -0.4150, 0.0331]) tensor([0.7488, 0.0845, 0.0650, 0.1017]) -Greedy action tensor([ 1.2649, -0.5526, -0.3597, 0.0331]) tensor([0.6056, 0.0984, 0.1193, 0.1767]) -Greedy action tensor([ 0.9566, 0.0174, -0.1936, 0.4678]) tensor([0.4309, 0.1684, 0.1364, 0.2643]) -Greedy action tensor([ 1.2808, -0.3344, -0.2286, 0.1006]) tensor([0.5790, 0.1151, 0.1280, 0.1779]) -Greedy action tensor([ 0.5378, -0.5317, -0.1984, -0.1916]) tensor([0.4340, 0.1489, 0.2078, 0.2093]) -Greedy action tensor([ 0.5187, -0.0494, 0.0445, -0.1410]) tensor([0.3696, 0.2094, 0.2300, 0.1911]) -Greedy action tensor([ 0.9092, -0.5352, -0.0218, -0.4769]) tensor([0.5319, 0.1255, 0.2097, 0.1330]) -Greedy action tensor([ 0.6968, -0.4832, -0.0273, -0.5003]) tensor([0.4775, 0.1467, 0.2315, 0.1442]) -Greedy action tensor([ 0.6936, -0.5826, -0.0742, -0.2218]) tensor([0.4665, 0.1302, 0.2165, 0.1868]) -Greedy action tensor([ 0.8941, -0.4098, 0.1889, -0.2478]) tensor([0.4797, 0.1302, 0.2370, 0.1531]) -Greedy action tensor([ 0.6260, -0.1670, -0.0833, -0.4195]) tensor([0.4355, 0.1971, 0.2143, 0.1531]) -Greedy action tensor([ 0.3787, 0.0346, 0.0942, -0.1597]) tensor([0.3284, 0.2328, 0.2471, 0.1917]) -Greedy action tensor([ 0.9008, -0.3949, -0.0545, -0.4431]) tensor([0.5210, 0.1426, 0.2004, 0.1359]) -Greedy action tensor([ 0.3913, 0.0687, 0.0831, -0.1714]) tensor([0.3302, 0.2391, 0.2426, 0.1881]) -Greedy action tensor([ 0.6035, -0.4209, 0.1750, -0.3552]) tensor([0.4177, 0.1500, 0.2721, 0.1602]) -Greedy action tensor([ 0.4086, -0.4049, 0.0921, -0.3530]) tensor([0.3789, 0.1680, 0.2761, 0.1769]) -Greedy action tensor([ 0.3595, -0.0392, -0.0632, -0.1083]) tensor([0.3387, 0.2273, 0.2219, 0.2121]) -Greedy action tensor([ 0.8421, -0.4156, 0.0188, -0.2928]) tensor([0.4891, 0.1390, 0.2147, 0.1572]) -Greedy action tensor([ 0.6908, -0.5026, -0.0980, -0.4601]) tensor([0.4822, 0.1462, 0.2191, 0.1525]) -Greedy action tensor([ 0.4874, -0.4124, 0.1192, -0.7754]) tensor([0.4199, 0.1708, 0.2906, 0.1188]) -Greedy action tensor([ 0.5749, -0.4418, -0.1068, -0.4379]) tensor([0.4483, 0.1622, 0.2267, 0.1628]) -Greedy action tensor([ 0.5713, -0.3609, -0.0745, -0.0518]) tensor([0.4075, 0.1604, 0.2136, 0.2185]) -Greedy action tensor([ 0.9199, -0.6685, -0.0992, -0.4096]) tensor([0.5465, 0.1116, 0.1973, 0.1446]) -Greedy action tensor([ 0.7581, -0.5171, -0.0802, -0.4239]) tensor([0.4954, 0.1384, 0.2142, 0.1519]) -Greedy action tensor([ 0.2763, -0.0410, -0.0366, -0.1042]) tensor([0.3182, 0.2317, 0.2327, 0.2175]) -Greedy action tensor([ 0.7468, -0.6618, -0.2353, -0.6672]) tensor([0.5370, 0.1313, 0.2011, 0.1306]) -Greedy action tensor([ 0.7073, -0.0580, 0.0441, -0.6255]) tensor([0.4456, 0.2073, 0.2296, 0.1175]) -Greedy action tensor([ 0.5284, -0.4469, -0.2001, -0.4719]) tensor([0.4490, 0.1693, 0.2167, 0.1651]) -Greedy action tensor([ 1.0708, -1.2276, 0.0529, -0.6643]) tensor([0.6104, 0.0613, 0.2206, 0.1077]) -Greedy action tensor([ 0.5100, -0.0008, -0.0020, -0.2000]) tensor([0.3716, 0.2230, 0.2227, 0.1827]) -Greedy action tensor([ 0.7455, -0.4568, -0.0466, -0.7775]) tensor([0.5072, 0.1524, 0.2297, 0.1106]) -Greedy action tensor([ 0.6395, -0.5201, -0.1752, -0.3071]) tensor([0.4663, 0.1462, 0.2065, 0.1810]) -Greedy action tensor([ 0.3299, 0.0264, -0.0260, -0.1995]) tensor([0.3303, 0.2438, 0.2314, 0.1945]) -Greedy action tensor([ 0.8078, -0.6757, -0.1011, -0.3691]) tensor([0.5160, 0.1170, 0.2079, 0.1590]) -Greedy action tensor([ 0.4150, -0.3528, -0.1159, -0.0964]) tensor([0.3771, 0.1750, 0.2218, 0.2261]) -Greedy action tensor([ 0.6087, -0.4064, -0.1437, -0.2386]) tensor([0.4421, 0.1602, 0.2083, 0.1894]) -Greedy action tensor([ 0.4150, -0.3092, -0.1460, -0.2709]) tensor([0.3908, 0.1894, 0.2230, 0.1968]) -Greedy action tensor([ 0.5777, -0.1531, -0.0867, -0.1723]) tensor([0.4051, 0.1951, 0.2085, 0.1914]) -Greedy action tensor([ 0.6174, -0.2129, 0.0208, -0.5345]) tensor([0.4343, 0.1893, 0.2391, 0.1373]) -Greedy action tensor([ 0.5524, -0.3203, 0.1131, -0.3705]) tensor([0.4066, 0.1699, 0.2620, 0.1615]) -Greedy action tensor([ 0.3810, 0.1035, -0.0209, -0.2631]) tensor([0.3388, 0.2567, 0.2267, 0.1779]) -Greedy action tensor([ 0.8441, -0.5805, 0.1315, -0.6147]) tensor([0.5093, 0.1225, 0.2498, 0.1184]) -Greedy action tensor([ 0.4970, -0.4040, -0.0422, -0.3379]) tensor([0.4127, 0.1676, 0.2407, 0.1791]) -Greedy action tensor([ 0.4491, -0.0428, -0.0612, -0.1446]) tensor([0.3618, 0.2212, 0.2172, 0.1998]) -Greedy action tensor([ 0.5998, -0.2654, -0.0268, -0.1854]) tensor([0.4147, 0.1746, 0.2216, 0.1891]) -Greedy action tensor([ 0.5766, -0.3515, 0.0908, -0.3877]) tensor([0.4181, 0.1653, 0.2572, 0.1594]) -Greedy action tensor([ 0.9231, -0.4510, 0.0192, -0.5773]) tensor([0.5316, 0.1345, 0.2153, 0.1186]) -Greedy action tensor([ 0.6602, -0.3389, 0.0263, -0.3444]) tensor([0.4415, 0.1626, 0.2342, 0.1617]) -Greedy action tensor([ 0.8266, -0.7749, 0.0771, -0.4097]) tensor([0.5090, 0.1026, 0.2405, 0.1478]) -Greedy action tensor([ 0.8777, -0.6393, 0.0226, -0.5821]) tensor([0.5328, 0.1169, 0.2266, 0.1238]) -Greedy action tensor([ 0.5256, -0.3525, 0.0749, -0.3855]) tensor([0.4074, 0.1693, 0.2596, 0.1638]) -Greedy action tensor([ 0.9902, -1.0704, 0.0340, -0.7934]) tensor([0.5953, 0.0758, 0.2288, 0.1000]) -Greedy action tensor([ 0.4322, -0.0717, -0.0326, -0.2053]) tensor([0.3622, 0.2188, 0.2275, 0.1915]) -Greedy action tensor([ 0.7061, -0.6913, -0.0079, -0.3471]) tensor([0.4794, 0.1185, 0.2348, 0.1672]) -Greedy action tensor([ 0.4667, -0.0373, -0.1218, 0.0276]) tensor([0.3566, 0.2155, 0.1980, 0.2299]) -Greedy action tensor([ 0.9213, -0.5386, -0.1390, -0.4258]) tensor([0.5439, 0.1263, 0.1884, 0.1414]) -Greedy action tensor([ 0.7636, -0.3604, -0.1275, -0.3829]) tensor([0.4871, 0.1583, 0.1998, 0.1548]) -Greedy action tensor([ 0.4043, -0.3696, 0.0585, -0.3028]) tensor([0.3757, 0.1733, 0.2658, 0.1852]) -Greedy action tensor([ 0.8618, -0.6517, -0.0913, -0.4909]) tensor([0.5364, 0.1181, 0.2068, 0.1387]) -Greedy action tensor([ 0.9400, -0.4357, -0.0553, -0.3718]) tensor([0.5286, 0.1336, 0.1954, 0.1424]) -Greedy action tensor([ 0.6639, -0.3859, 0.0563, -0.5366]) tensor([0.4554, 0.1594, 0.2481, 0.1371]) -Greedy action tensor([ 0.5019, -0.2212, -0.0739, -0.2814]) tensor([0.3993, 0.1938, 0.2245, 0.1824]) -Greedy action tensor([ 0.7253, -0.3392, 0.0163, -0.6460]) tensor([0.4783, 0.1650, 0.2354, 0.1214]) -Greedy action tensor([ 0.4120, -0.1471, -0.1014, -0.1514]) tensor([0.3650, 0.2087, 0.2184, 0.2078]) -Greedy action tensor([ 0.9475, -0.4958, -0.1469, -0.2951]) tensor([0.5378, 0.1270, 0.1800, 0.1552]) -Greedy action tensor([ 0.4774, 0.2055, 0.0154, -0.0684]) tensor([0.3366, 0.2564, 0.2120, 0.1950]) -Greedy action tensor([ 0.4250, -0.5095, -0.1723, -0.1843]) tensor([0.4021, 0.1580, 0.2213, 0.2187]) -Greedy action tensor([ 0.8691, -0.3147, -0.0223, -0.4515]) tensor([0.5042, 0.1544, 0.2068, 0.1346]) -Greedy action tensor([ 0.3751, -0.0622, -0.1183, -0.1865]) tensor([0.3538, 0.2285, 0.2160, 0.2018]) -Greedy action tensor([ 0.8306, -0.5400, -0.1159, -0.4905]) tensor([0.5239, 0.1330, 0.2033, 0.1398]) -Greedy action tensor([ 0.7286, -0.4090, -0.0652, -0.2688]) tensor([0.4669, 0.1497, 0.2111, 0.1722]) -Greedy action tensor([ 0.1464, 0.0064, 0.0067, -0.3833]) tensor([0.3005, 0.2612, 0.2613, 0.1769]) -Greedy action tensor([ 0.6187, -0.6206, -0.0987, -0.2400]) tensor([0.4543, 0.1316, 0.2217, 0.1925]) -Greedy action tensor([ 0.5031, 0.0772, -0.2623, -0.0852]) tensor([0.3740, 0.2443, 0.1740, 0.2077]) -Greedy action tensor([ 0.6312, -0.0623, -0.0030, -0.4280]) tensor([0.4207, 0.2103, 0.2231, 0.1459]) -Greedy action tensor([ 0.7231, -0.3181, -0.1207, -0.5736]) tensor([0.4863, 0.1717, 0.2091, 0.1330]) -Greedy action tensor([ 0.5692, -0.0413, -0.0974, -0.3229]) tensor([0.4054, 0.2202, 0.2082, 0.1662]) -Greedy action tensor([ 0.5470, -0.4400, -0.0688, -0.1774]) tensor([0.4171, 0.1555, 0.2253, 0.2021]) -Greedy action tensor([ 0.3795, 0.2541, -0.1030, -0.3399]) tensor([0.3349, 0.2954, 0.2067, 0.1631]) -Greedy action tensor([ 0.3048, 0.3032, -0.0847, -0.1407]) tensor([0.3015, 0.3011, 0.2043, 0.1931]) -Greedy action tensor([ 0.4962, -0.4937, -0.1921, -0.1257]) tensor([0.4148, 0.1541, 0.2084, 0.2227]) -Greedy action tensor([ 0.6873, -0.3722, 0.1008, -0.3619]) tensor([0.4438, 0.1538, 0.2469, 0.1554]) -Greedy action tensor([ 0.4876, -0.0267, 0.0411, -0.1432]) tensor([0.3610, 0.2159, 0.2310, 0.1921]) -Greedy action tensor([ 1.1423, -1.4572, -0.0390, -0.6573]) tensor([0.6466, 0.0480, 0.1984, 0.1069]) -Greedy action tensor([ 0.5301, 0.0761, 0.0188, -0.3339]) tensor([0.3765, 0.2391, 0.2258, 0.1587]) -Greedy action tensor([-1.8929, -0.3451, 0.6254, -0.1470]) tensor([0.0419, 0.1972, 0.5205, 0.2404]) -Greedy action tensor([-1.9285, -0.4316, 0.6579, -0.1708]) tensor([0.0407, 0.1820, 0.5410, 0.2362]) -Greedy action tensor([-0.2908, 0.1852, 0.8031, 1.6802]) tensor([0.0783, 0.1260, 0.2338, 0.5619]) -Greedy action tensor([-1.8355, -0.4634, 0.6223, -0.0979]) tensor([0.0448, 0.1768, 0.5236, 0.2548]) -Greedy action tensor([-1.3518, -0.5818, 0.3390, 0.2024]) tensor([0.0751, 0.1622, 0.4073, 0.3553]) -Greedy action tensor([-1.0865, -0.5467, 0.3130, 0.2598]) tensor([0.0942, 0.1617, 0.3819, 0.3621]) -Greedy action tensor([-1.1310, -0.7487, 0.9833, 1.0744]) tensor([0.0504, 0.0739, 0.4179, 0.4577]) -Greedy action tensor([-1.8863, -0.5971, 0.8384, -0.0772]) tensor([0.0385, 0.1397, 0.5869, 0.2349]) -Greedy action tensor([-1.5886, -0.5229, 0.5169, 0.0644]) tensor([0.0577, 0.1674, 0.4736, 0.3012]) -Greedy action tensor([-1.8213, -0.3876, 0.5968, -0.1018]) tensor([0.0455, 0.1906, 0.5102, 0.2537]) -Greedy action tensor([-1.3125, -0.6010, 0.3358, 0.1581]) tensor([0.0794, 0.1618, 0.4130, 0.3457]) -Greedy action tensor([-1.9139, -0.4423, 0.6523, -0.1594]) tensor([0.0414, 0.1804, 0.5389, 0.2393]) -Greedy action tensor([-0.7608, 0.7794, 0.2795, 0.7530]) tensor([0.0767, 0.3578, 0.2170, 0.3485]) -Greedy action tensor([-0.0126, 0.8985, 0.7206, 1.5084]) tensor([0.0986, 0.2451, 0.2052, 0.4511]) -Greedy action tensor([-1.6328, -0.3957, 0.4979, -0.0198]) tensor([0.0559, 0.1927, 0.4709, 0.2806]) -Greedy action tensor([-1.5337, -0.6348, 0.7707, 0.1178]) tensor([0.0535, 0.1315, 0.5360, 0.2790]) -Greedy action tensor([-0.9176, -0.1721, 0.7073, 1.0908]) tensor([0.0640, 0.1348, 0.3247, 0.4765]) -Greedy action tensor([-1.9150, -0.3993, 0.6483, -0.1574]) tensor([0.0411, 0.1871, 0.5334, 0.2383]) -Greedy action tensor([-1.8973, -0.4407, 0.6404, -0.1496]) tensor([0.0422, 0.1812, 0.5342, 0.2424]) -Greedy action tensor([-1.5915, -0.5189, 0.5831, -0.2226]) tensor([0.0600, 0.1755, 0.5284, 0.2361]) -Greedy action tensor([-1.2750, -0.5452, 0.3128, 0.4129]) tensor([0.0748, 0.1551, 0.3658, 0.4043]) -Greedy action tensor([-1.8205, -0.4467, 0.6501, -0.2323]) tensor([0.0461, 0.1823, 0.5458, 0.2258]) -Greedy action tensor([-1.8052, -0.4767, 0.5907, -0.1422]) tensor([0.0476, 0.1795, 0.5220, 0.2509]) -Greedy action tensor([-1.6065, 0.0457, 0.4594, -0.1094]) tensor([0.0538, 0.2809, 0.4248, 0.2405]) -Greedy action tensor([-0.1336, -0.2105, 1.0602, 1.6988]) tensor([0.0872, 0.0807, 0.2876, 0.5446]) -Greedy action tensor([-1.8606, -0.3624, 0.6143, -0.1219]) tensor([0.0434, 0.1941, 0.5155, 0.2469]) -Greedy action tensor([-1.7393, -0.4904, 0.6615, 0.0195]) tensor([0.0469, 0.1635, 0.5173, 0.2723]) -Greedy action tensor([-1.8037, -0.2209, 0.5747, -0.0909]) tensor([0.0450, 0.2193, 0.4859, 0.2498]) -Greedy action tensor([-1.1071, -0.6037, 0.3264, -0.0357]) tensor([0.1024, 0.1694, 0.4293, 0.2989]) -Greedy action tensor([-1.1322, -0.6264, 0.2980, 0.1445]) tensor([0.0959, 0.1591, 0.4010, 0.3439]) -Greedy action tensor([-1.9091, -0.4472, 0.6472, -0.1630]) tensor([0.0418, 0.1802, 0.5385, 0.2395]) -Greedy action tensor([-1.8319, -0.4778, 0.6295, -0.0651]) tensor([0.0445, 0.1726, 0.5222, 0.2607]) -Greedy action tensor([-1.3845, -0.4103, 0.5043, 0.5638]) tensor([0.0579, 0.1533, 0.3827, 0.4061]) -Greedy action tensor([-1.4826, -0.6110, 0.6416, -0.0249]) tensor([0.0623, 0.1489, 0.5212, 0.2676]) -Greedy action tensor([-1.2427, -0.6075, 0.3724, 0.2503]) tensor([0.0809, 0.1526, 0.4066, 0.3599]) -Greedy action tensor([-1.7068, -0.4915, 0.5446, -0.0072]) tensor([0.0517, 0.1743, 0.4911, 0.2829]) -Greedy action tensor([-1.8621, -0.4172, 0.6218, -0.1289]) tensor([0.0437, 0.1853, 0.5238, 0.2472]) -Greedy action tensor([-1.9075, -0.4653, 0.6522, -0.1505]) tensor([0.0417, 0.1766, 0.5398, 0.2419]) -Greedy action tensor([-1.4141, -0.3407, 0.5218, 0.3326]) tensor([0.0603, 0.1763, 0.4177, 0.3457]) -Greedy action tensor([-1.6178, -1.0057, -0.2146, -0.8304]) tensor([0.1098, 0.2024, 0.4466, 0.2412]) -Greedy action tensor([-1.9075, -0.3605, 0.6369, -0.1806]) tensor([0.0416, 0.1953, 0.5294, 0.2338]) -Greedy action tensor([-1.8765, -0.4794, 0.6351, -0.1312]) tensor([0.0433, 0.1751, 0.5336, 0.2480]) -Greedy action tensor([-1.2256, -0.3322, 0.5597, 0.6224]) tensor([0.0635, 0.1551, 0.3785, 0.4029]) -Greedy action tensor([-1.5944, -0.4893, 0.5038, -0.0016]) tensor([0.0585, 0.1767, 0.4770, 0.2878]) -Greedy action tensor([-1.8987, -0.4208, 0.6480, -0.1362]) tensor([0.0417, 0.1828, 0.5324, 0.2430]) -Greedy action tensor([-1.4145, -0.4025, 0.7567, -0.6924]) tensor([0.0686, 0.1887, 0.6015, 0.1412]) -Greedy action tensor([-1.1549, -0.5768, 0.2544, 0.2793]) tensor([0.0903, 0.1610, 0.3697, 0.3790]) -Greedy action tensor([-1.8642, -0.4726, 0.6665, -0.0986]) tensor([0.0427, 0.1716, 0.5362, 0.2495]) -Greedy action tensor([-1.7987, -0.2813, 0.5737, -0.0866]) tensor([0.0458, 0.2090, 0.4913, 0.2539]) -Greedy action tensor([-1.9085, -0.4428, 0.6460, -0.1598]) tensor([0.0418, 0.1809, 0.5373, 0.2400]) -Greedy action tensor([-1.0332, -0.6450, 0.2872, 0.2859]) tensor([0.1004, 0.1480, 0.3760, 0.3755]) -Greedy action tensor([-1.7255, -0.4792, 0.5499, -0.0395]) tensor([0.0510, 0.1773, 0.4964, 0.2753]) -Greedy action tensor([-1.4982, -0.0973, 0.7573, 0.4269]) tensor([0.0466, 0.1892, 0.4447, 0.3195]) -Greedy action tensor([-1.0196, 0.0185, 0.1884, 0.3376]) tensor([0.0904, 0.2554, 0.3027, 0.3514]) -Greedy action tensor([-1.5612, -0.0346, 0.6262, 0.3686]) tensor([0.0467, 0.2150, 0.4164, 0.3218]) -Greedy action tensor([-1.8013, -0.4245, 0.6679, -0.2973]) tensor([0.0470, 0.1862, 0.5553, 0.2115]) -Greedy action tensor([-1.7321, -0.5389, 0.8528, 0.3068]) tensor([0.0396, 0.1306, 0.5254, 0.3044]) -Greedy action tensor([-1.8957, -0.3676, 0.2460, -0.2675]) tensor([0.0520, 0.2398, 0.4430, 0.2651]) -Greedy action tensor([-1.6628, -0.5067, 0.5623, 0.0492]) tensor([0.0527, 0.1675, 0.4878, 0.2920]) -Greedy action tensor([-1.7921, 0.0151, 0.5165, -0.1049]) tensor([0.0443, 0.2701, 0.4460, 0.2396]) -Greedy action tensor([-1.0094, 0.1005, 0.6103, 0.8005]) tensor([0.0658, 0.1997, 0.3324, 0.4021]) -Greedy action tensor([-1.5151, -0.4925, 1.1599, 1.0815]) tensor([0.0315, 0.0877, 0.4577, 0.4231]) -Greedy action tensor([-1.3734, -0.5693, 0.7746, 0.7347]) tensor([0.0499, 0.1115, 0.4276, 0.4109]) -Greedy action tensor([-1.8975, -0.4720, 0.6418, -0.1519]) tensor([0.0424, 0.1766, 0.5378, 0.2432]) -Greedy action tensor([-1.9450, -0.4534, 0.6701, -0.1794]) tensor([0.0401, 0.1781, 0.5477, 0.2342]) -Greedy action tensor([-1.6757, -0.5313, 0.5367, -0.0156]) tensor([0.0539, 0.1694, 0.4929, 0.2837]) -Greedy action tensor([-0.8579, -0.2537, 0.9125, 1.2968]) tensor([0.0577, 0.1056, 0.3389, 0.4978]) -Greedy action tensor([-1.5950, -0.4665, 0.4996, 0.0421]) tensor([0.0576, 0.1781, 0.4680, 0.2962]) -Greedy action tensor([-1.7047, -0.4990, 0.5537, 0.0483]) tensor([0.0508, 0.1697, 0.4862, 0.2933]) -Greedy action tensor([-0.0912, -0.0431, 1.0957, 1.7433]) tensor([0.0863, 0.0905, 0.2828, 0.5404]) -Greedy action tensor([-1.7328, -0.0551, 0.5249, 0.0769]) tensor([0.0454, 0.2431, 0.4342, 0.2774]) -Greedy action tensor([-1.8864, -0.3752, 0.6321, -0.1391]) tensor([0.0422, 0.1914, 0.5240, 0.2424]) -Greedy action tensor([-1.6875, -0.4338, 0.6325, 0.1619]) tensor([0.0475, 0.1665, 0.4838, 0.3022]) -Greedy action tensor([-1.8668, -0.4724, 0.6367, -0.1071]) tensor([0.0433, 0.1748, 0.5299, 0.2519]) -Greedy action tensor([-1.8176, -0.6951, 0.2361, -0.2200]) tensor([0.0595, 0.1828, 0.4638, 0.2939]) -Greedy action tensor([-1.4661, 0.1760, 0.4548, 0.3852]) tensor([0.0517, 0.2668, 0.3526, 0.3289]) -Greedy action tensor([-1.8363, -0.3290, 0.6088, -0.1031]) tensor([0.0440, 0.1988, 0.5079, 0.2492]) -Greedy action tensor([-1.7966, -0.4198, 0.5840, -0.1017]) tensor([0.0471, 0.1867, 0.5095, 0.2567]) -Greedy action tensor([-0.3967, -0.1127, 1.1577, 1.6550]) tensor([0.0674, 0.0895, 0.3189, 0.5243]) -Greedy action tensor([-1.8200, -0.1364, 0.5563, -0.0669]) tensor([0.0436, 0.2349, 0.4696, 0.2518]) -Greedy action tensor([-0.7557, 0.9962, 0.1673, 0.0312]) tensor([0.0871, 0.5023, 0.2193, 0.1914]) -Greedy action tensor([ 0.7266, -1.1343, 1.1929, 0.2141]) tensor([0.2986, 0.0464, 0.4761, 0.1789]) -Greedy action tensor([-0.5921, -0.9305, 2.4017, 0.6254]) tensor([0.0399, 0.0285, 0.7968, 0.1349]) -Greedy action tensor([0.4239, 0.6743, 0.2871, 0.4033]) tensor([0.2418, 0.3105, 0.2108, 0.2368]) -Greedy action tensor([ 0.9193, -0.2851, 0.2198, 0.9880]) tensor([0.3487, 0.1046, 0.1733, 0.3735]) -Greedy action tensor([ 0.0578, 0.4837, -0.1087, -0.9608]) tensor([0.2675, 0.4095, 0.2264, 0.0966]) -Greedy action tensor([0.8567, 0.1662, 0.0251, 0.3091]) tensor([0.3976, 0.1993, 0.1731, 0.2300]) -Greedy action tensor([-0.5319, -0.1062, 0.9975, 0.2527]) tensor([0.1071, 0.1639, 0.4943, 0.2347]) -Greedy action tensor([1.0520, 1.2267, 0.2197, 0.5354]) tensor([0.3103, 0.3696, 0.1350, 0.1851]) -Greedy action tensor([ 0.6252, -0.9809, -0.0524, 1.9405]) tensor([0.1840, 0.0369, 0.0935, 0.6856]) -Greedy action tensor([ 0.8296, -0.3317, 0.7816, -0.2758]) tensor([0.3850, 0.1205, 0.3670, 0.1275]) -Greedy action tensor([-1.2976, -0.4499, -0.7450, -0.7210]) tensor([0.1459, 0.3407, 0.2536, 0.2598]) -Greedy action tensor([ 0.0099, -0.8969, -0.4803, 0.5498]) tensor([0.2679, 0.1082, 0.1641, 0.4597]) -Greedy action tensor([-0.8440, -0.9642, -0.6055, -0.8587]) tensor([0.2414, 0.2141, 0.3065, 0.2379]) -Greedy action tensor([-0.5276, -1.6572, -0.9415, 0.1297]) tensor([0.2555, 0.0826, 0.1689, 0.4930]) -Greedy action tensor([ 0.7649, -1.1622, 0.7804, 0.1544]) tensor([0.3698, 0.0538, 0.3756, 0.2008]) -Greedy action tensor([ 0.6807, 0.5945, -1.1528, 0.9113]) tensor([0.2997, 0.2749, 0.0479, 0.3775]) -Greedy action tensor([ 1.7504, -0.5457, -0.1098, 0.9385]) tensor([0.5881, 0.0592, 0.0915, 0.2611]) -Greedy action tensor([ 0.9679, -0.5195, -0.4143, 0.6169]) tensor([0.4585, 0.1036, 0.1151, 0.3228]) -Greedy action tensor([ 1.1482, -0.2108, 0.1285, 1.8738]) tensor([0.2715, 0.0697, 0.0979, 0.5609]) -Greedy action tensor([ 0.1790, -0.8182, 0.2770, 0.6434]) tensor([0.2461, 0.0908, 0.2715, 0.3916]) -Greedy action tensor([ 0.8528, -2.0477, 0.7879, 0.0793]) tensor([0.4076, 0.0224, 0.3820, 0.1881]) -Greedy action tensor([-0.9011, 0.4089, 1.6048, -0.4467]) tensor([0.0539, 0.1999, 0.6611, 0.0850]) -Greedy action tensor([ 0.4593, -0.1365, 0.7441, 0.4577]) tensor([0.2578, 0.1421, 0.3427, 0.2574]) -Greedy action tensor([ 0.0711, 0.0503, -0.7818, -1.4550]) tensor([0.3813, 0.3734, 0.1625, 0.0829]) -Greedy action tensor([ 0.5269, 0.3328, 0.2170, -0.0079]) tensor([0.3182, 0.2621, 0.2334, 0.1864]) -Greedy action tensor([ 0.8504, -1.6477, 0.8734, 1.3539]) tensor([0.2660, 0.0219, 0.2721, 0.4400]) -Greedy action tensor([ 1.8963, -1.1846, -0.3631, 0.7652]) tensor([0.6789, 0.0312, 0.0709, 0.2191]) -Greedy action tensor([ 0.4621, -0.4345, 0.4312, 1.6746]) tensor([0.1742, 0.0711, 0.1689, 0.5857]) -Greedy action tensor([ 0.1808, -0.4042, 0.6936, 0.6554]) tensor([0.2068, 0.1152, 0.3454, 0.3325]) -Greedy action tensor([ 0.8754, -0.5157, -0.4552, 0.7526]) tensor([0.4171, 0.1038, 0.1102, 0.3689]) -Greedy action tensor([ 1.6027, -0.2518, 0.5346, 1.3017]) tensor([0.4464, 0.0699, 0.1534, 0.3304]) -Greedy action tensor([ 0.4631, -0.6251, -0.1794, 0.7725]) tensor([0.3100, 0.1044, 0.1631, 0.4225]) -Greedy action tensor([-0.3569, -0.3046, -0.1358, -0.7519]) tensor([0.2516, 0.2651, 0.3138, 0.1695]) -Greedy action tensor([ 1.2721, -0.2710, 1.4938, 0.1660]) tensor([0.3581, 0.0765, 0.4469, 0.1185]) -Greedy action tensor([-0.5805, -0.7963, 1.5045, -0.5150]) tensor([0.0916, 0.0738, 0.7368, 0.0978]) -Greedy action tensor([ 0.6820, -0.6239, -0.0658, 0.7979]) tensor([0.3488, 0.0945, 0.1651, 0.3916]) -Greedy action tensor([ 1.3612, -1.6413, 0.3800, 1.1856]) tensor([0.4418, 0.0219, 0.1656, 0.3706]) -Greedy action tensor([ 1.8051, -0.0446, 0.8300, 0.5777]) tensor([0.5472, 0.0861, 0.2064, 0.1604]) -Greedy action tensor([ 1.0478, 1.1265, -0.0524, -0.4442]) tensor([0.3788, 0.4099, 0.1261, 0.0852]) -Greedy action tensor([0.3734, 0.5532, 0.1250, 1.1224]) tensor([0.1964, 0.2351, 0.1532, 0.4153]) -Greedy action tensor([ 1.3047, 0.4128, 0.8396, -0.1603]) tensor([0.4407, 0.1806, 0.2768, 0.1018]) -Greedy action tensor([ 1.2536, -0.2650, 0.6455, 0.2266]) tensor([0.4714, 0.1032, 0.2566, 0.1688]) -Greedy action tensor([ 0.0892, -1.9474, -0.2538, 0.8577]) tensor([0.2502, 0.0326, 0.1776, 0.5396]) -Greedy action tensor([-0.7135, 0.5143, 0.0494, -1.3929]) tensor([0.1415, 0.4832, 0.3035, 0.0718]) -Greedy action tensor([ 0.6879, -1.7229, 0.4721, 1.2228]) tensor([0.2776, 0.0249, 0.2237, 0.4738]) -Greedy action tensor([ 0.7325, -1.4986, 1.1197, 0.4418]) tensor([0.3005, 0.0323, 0.4426, 0.2247]) -Greedy action tensor([-0.1469, -1.0595, -0.1840, -0.0417]) tensor([0.2877, 0.1155, 0.2772, 0.3196]) -Greedy action tensor([ 0.9758, -0.8685, -0.3109, 0.5013]) tensor([0.4863, 0.0769, 0.1343, 0.3025]) -Greedy action tensor([ 0.1766, -0.7803, -0.2118, 0.1902]) tensor([0.3251, 0.1249, 0.2205, 0.3296]) -Greedy action tensor([ 0.2448, -0.5197, -0.1536, 2.2400]) tensor([0.1054, 0.0491, 0.0707, 0.7748]) -Greedy action tensor([-0.7949, -1.8801, -0.1401, -0.5843]) tensor([0.2224, 0.0751, 0.4280, 0.2745]) -Greedy action tensor([-0.4438, -1.2118, -0.2367, 0.8315]) tensor([0.1594, 0.0739, 0.1961, 0.5706]) -Greedy action tensor([-0.5976, 0.0306, 1.0224, -1.3204]) tensor([0.1189, 0.2228, 0.6006, 0.0577]) -Greedy action tensor([-0.0817, -1.7846, -0.2502, 0.6341]) tensor([0.2455, 0.0447, 0.2075, 0.5023]) -Greedy action tensor([-0.3324, -1.8101, -0.9199, 0.8073]) tensor([0.2037, 0.0465, 0.1132, 0.6367]) -Greedy action tensor([-0.6206, -1.3582, -0.2435, -0.4845]) tensor([0.2450, 0.1172, 0.3572, 0.2807]) -Greedy action tensor([ 1.0010, -0.3064, 0.5514, 0.9088]) tensor([0.3546, 0.0959, 0.2262, 0.3233]) -Greedy action tensor([ 1.1492, 0.5232, -0.0546, 0.4460]) tensor([0.4292, 0.2295, 0.1288, 0.2125]) -Greedy action tensor([ 1.4109, -0.5843, 1.9645, 0.2344]) tensor([0.3141, 0.0427, 0.5464, 0.0968]) -Greedy action tensor([ 0.7046, -0.7779, -0.0362, 0.8756]) tensor([0.3460, 0.0786, 0.1649, 0.4105]) -Greedy action tensor([ 2.4894, -0.4830, 0.6281, 1.8109]) tensor([0.5834, 0.0299, 0.0907, 0.2960]) -Greedy action tensor([ 0.1750, -1.2044, -0.0164, 1.7846]) tensor([0.1413, 0.0356, 0.1167, 0.7065]) -Greedy action tensor([ 0.1809, -0.3205, 1.0843, 0.9650]) tensor([0.1596, 0.0967, 0.3940, 0.3497]) -Greedy action tensor([1.2557, 0.3796, 0.9889, 0.0601]) tensor([0.4025, 0.1676, 0.3082, 0.1218]) -Greedy action tensor([-0.1890, 0.0399, -0.1965, -0.5583]) tensor([0.2537, 0.3190, 0.2518, 0.1754]) -Greedy action tensor([1.3889, 0.3725, 0.1288, 0.7382]) tensor([0.4614, 0.1670, 0.1309, 0.2407]) -Greedy action tensor([ 1.2576, -1.1818, 0.8629, 0.9731]) tensor([0.3979, 0.0347, 0.2681, 0.2993]) -Greedy action tensor([1.7086, 0.2345, 0.7980, 1.4302]) tensor([0.4187, 0.0959, 0.1684, 0.3170]) -Greedy action tensor([ 0.3524, -0.9211, -0.2376, 1.4046]) tensor([0.2129, 0.0596, 0.1180, 0.6096]) -Greedy action tensor([ 1.0305, -0.2119, 1.2389, 0.9242]) tensor([0.2924, 0.0844, 0.3602, 0.2629]) -Greedy action tensor([ 0.3902, -0.8238, -0.3800, 1.0132]) tensor([0.2759, 0.0819, 0.1277, 0.5144]) -Greedy action tensor([ 0.0801, 0.9274, 0.4955, -0.1863]) tensor([0.1781, 0.4156, 0.2698, 0.1365]) -Greedy action tensor([ 1.3375, -0.8720, 0.9550, 0.5920]) tensor([0.4412, 0.0484, 0.3010, 0.2094]) -Greedy action tensor([-1.5117, -0.8914, 1.8223, -0.9506]) tensor([0.0306, 0.0569, 0.8588, 0.0537]) -Greedy action tensor([ 0.7304, -0.3517, -0.2531, 1.1490]) tensor([0.3093, 0.1048, 0.1157, 0.4701]) -Greedy action tensor([ 0.0676, 0.4060, 0.2142, -0.6440]) tensor([0.2468, 0.3462, 0.2858, 0.1211]) -Greedy action tensor([-0.3256, -1.5257, 1.1147, 0.9817]) tensor([0.1085, 0.0327, 0.4580, 0.4009]) -Greedy action tensor([ 1.1539, -0.4979, -0.2964, 0.5631]) tensor([0.5050, 0.0968, 0.1184, 0.2797]) -Greedy action tensor([ 0.5102, -0.6300, 0.0027, -0.4724]) tensor([0.4355, 0.1393, 0.2622, 0.1630]) -Greedy action tensor([-0.3873, -1.0661, -0.0795, 0.1756]) tensor([0.2163, 0.1097, 0.2943, 0.3797]) -Greedy action tensor([ 0.3170, -1.0143, -0.0435, 0.2357]) tensor([0.3468, 0.0916, 0.2419, 0.3197]) -Greedy action tensor([-0.2281, -1.0786, -0.2980, 0.6539]) tensor([0.2094, 0.0895, 0.1953, 0.5059]) -Greedy action tensor([ 0.5848, -0.4156, -0.0374, -0.0193]) tensor([0.4080, 0.1500, 0.2190, 0.2230]) -Greedy action tensor([ 1.7828, -0.5124, -0.5638, 0.5674]) tensor([0.6698, 0.0675, 0.0641, 0.1987]) -Greedy action tensor([ 2.0838, -1.0630, -0.0825, 0.4595]) tensor([0.7382, 0.0317, 0.0846, 0.1455]) -Greedy action tensor([ 0.5968, 0.0713, -0.0751, -0.2663]) tensor([0.3962, 0.2343, 0.2024, 0.1671]) -Greedy action tensor([ 1.4083, -0.3564, -0.2252, 0.0637]) tensor([0.6146, 0.1052, 0.1200, 0.1602]) -Greedy action tensor([ 0.9021, -0.4746, -0.4688, 0.9077]) tensor([0.3981, 0.1005, 0.1011, 0.4003]) -Greedy action tensor([ 1.3006, -0.2712, -0.1744, 0.0152]) tensor([0.5838, 0.1212, 0.1336, 0.1614]) -Greedy action tensor([ 1.8260, -0.5843, -0.3395, 0.3864]) tensor([0.6937, 0.0623, 0.0796, 0.1644]) -Greedy action tensor([ 1.6770, 0.4400, -0.4516, 0.3076]) tensor([0.6011, 0.1745, 0.0715, 0.1528]) -Greedy action tensor([ 1.7047, -0.2830, -0.4702, 0.2976]) tensor([0.6687, 0.0916, 0.0760, 0.1637]) -Greedy action tensor([ 0.8725, -0.3305, -0.3840, 0.5404]) tensor([0.4343, 0.1304, 0.1236, 0.3116]) -Greedy action tensor([ 0.8083, -0.4283, -0.0749, 0.1058]) tensor([0.4547, 0.1320, 0.1880, 0.2252]) -Greedy action tensor([ 1.9573, -0.7981, -0.2701, 0.5881]) tensor([0.7014, 0.0446, 0.0756, 0.1784]) -Greedy action tensor([ 0.7434, -0.4490, -0.2052, 0.5931]) tensor([0.3920, 0.1190, 0.1518, 0.3373]) -Greedy action tensor([ 0.9418, -0.5014, -0.2525, 0.3672]) tensor([0.4757, 0.1124, 0.1441, 0.2678]) -Greedy action tensor([ 1.4436, -0.0347, -0.7425, 0.1062]) tensor([0.6239, 0.1423, 0.0701, 0.1638]) -Greedy action tensor([ 1.6693, -0.7130, -0.2000, 0.5321]) tensor([0.6380, 0.0589, 0.0984, 0.2046]) -Greedy action tensor([ 1.5693, -0.9390, -0.2983, 0.8003]) tensor([0.5884, 0.0479, 0.0909, 0.2727]) -Greedy action tensor([ 1.3248, -0.7033, -0.4104, 0.5145]) tensor([0.5706, 0.0751, 0.1006, 0.2537]) -Greedy action tensor([ 0.7579, -0.2852, -0.3109, 0.1937]) tensor([0.4416, 0.1556, 0.1516, 0.2512]) -Greedy action tensor([ 1.4826, -0.2863, -0.3656, 0.2853]) tensor([0.6135, 0.1046, 0.0966, 0.1853]) -Greedy action tensor([ 2.0841, -1.0916, -0.1659, 0.3056]) tensor([0.7599, 0.0317, 0.0801, 0.1283]) -Greedy action tensor([ 1.2950, 0.2384, -0.5840, 0.3284]) tensor([0.5317, 0.1848, 0.0812, 0.2022]) -Greedy action tensor([ 1.0026, -0.2902, 0.1208, 0.1790]) tensor([0.4701, 0.1290, 0.1946, 0.2063]) -Greedy action tensor([ 0.8159, -0.0549, -0.1384, 0.0830]) tensor([0.4378, 0.1833, 0.1686, 0.2104]) -Greedy action tensor([ 0.9368, -0.0874, -0.4817, -0.0264]) tensor([0.5043, 0.1811, 0.1221, 0.1925]) -Greedy action tensor([ 1.2043, -0.0166, 0.3281, -0.4202]) tensor([0.5240, 0.1546, 0.2182, 0.1032]) -Greedy action tensor([ 1.0258, -0.5856, -0.1224, 0.3773]) tensor([0.4903, 0.0979, 0.1555, 0.2563]) -Greedy action tensor([ 1.1542, -0.2002, -0.5468, 0.2427]) tensor([0.5427, 0.1401, 0.0990, 0.2181]) -Greedy action tensor([ 0.9321, -0.5793, -0.0671, 0.3372]) tensor([0.4672, 0.1031, 0.1720, 0.2577]) -Greedy action tensor([ 1.6205, -0.4153, -0.1005, 0.0289]) tensor([0.6609, 0.0863, 0.1182, 0.1346]) -Greedy action tensor([ 1.7547, -0.6696, -0.4136, 0.5493]) tensor([0.6656, 0.0589, 0.0761, 0.1994]) -Greedy action tensor([ 1.4768, -0.8398, -0.6564, 0.8311]) tensor([0.5743, 0.0566, 0.0680, 0.3011]) -Greedy action tensor([ 1.2359, -0.6162, -0.1569, 0.0300]) tensor([0.5866, 0.0920, 0.1457, 0.1756]) -Greedy action tensor([ 2.3071, -0.5960, -0.0726, 0.0759]) tensor([0.7969, 0.0437, 0.0738, 0.0856]) -Greedy action tensor([ 1.0454, -0.1296, -0.3137, 0.4190]) tensor([0.4761, 0.1470, 0.1223, 0.2545]) -Greedy action tensor([ 2.5038, -1.1329, -0.5707, 0.9368]) tensor([0.7805, 0.0206, 0.0361, 0.1629]) -Greedy action tensor([ 1.2753, -0.6422, -0.1662, 0.2724]) tensor([0.5713, 0.0840, 0.1352, 0.2096]) -Greedy action tensor([ 1.1612, -0.4142, -0.2424, 0.1667]) tensor([0.5487, 0.1135, 0.1348, 0.2030]) -Greedy action tensor([ 1.0810, -0.2957, -0.1048, 0.1640]) tensor([0.5108, 0.1289, 0.1561, 0.2042]) -Greedy action tensor([ 1.7493, -0.5196, -0.4552, 0.6127]) tensor([0.6516, 0.0674, 0.0719, 0.2091]) -Greedy action tensor([ 0.9943, -0.6841, -0.2426, 0.2481]) tensor([0.5125, 0.0957, 0.1488, 0.2430]) -Greedy action tensor([ 2.0032, -0.3114, -0.4579, 0.9222]) tensor([0.6564, 0.0649, 0.0560, 0.2227]) -Greedy action tensor([ 1.8198, -0.6213, -0.0453, 0.0649]) tensor([0.7068, 0.0615, 0.1095, 0.1222]) -Greedy action tensor([ 1.2044, -0.4635, -0.6403, 0.8643]) tensor([0.4858, 0.0916, 0.0768, 0.3457]) -Greedy action tensor([ 1.1134, -0.2219, -0.4139, 0.7051]) tensor([0.4662, 0.1227, 0.1012, 0.3099]) -Greedy action tensor([ 1.7844, -0.6162, -0.2849, 0.4168]) tensor([0.6795, 0.0616, 0.0858, 0.1731]) -Greedy action tensor([ 0.4065, -0.2473, 0.0895, -0.0836]) tensor([0.3495, 0.1818, 0.2546, 0.2141]) -Greedy action tensor([ 1.7895, -0.2021, -0.5779, 0.0666]) tensor([0.7099, 0.0969, 0.0665, 0.1267]) -Greedy action tensor([ 1.6916, -0.4289, -0.3066, 0.6740]) tensor([0.6184, 0.0742, 0.0838, 0.2235]) -Greedy action tensor([ 1.3341, -0.2496, -0.2724, 0.0823]) tensor([0.5911, 0.1213, 0.1186, 0.1690]) -Greedy action tensor([ 1.2179, -0.7203, -0.2852, 0.2793]) tensor([0.5690, 0.0819, 0.1266, 0.2226]) -Greedy action tensor([ 0.9236, -0.2459, -0.1954, 0.0670]) tensor([0.4850, 0.1506, 0.1584, 0.2059]) -Greedy action tensor([ 0.9072, -0.4399, -0.2084, 0.1592]) tensor([0.4852, 0.1261, 0.1590, 0.2297]) -Greedy action tensor([ 1.8111, -1.1128, -0.2137, 0.2657]) tensor([0.7148, 0.0384, 0.0944, 0.1524]) -Greedy action tensor([ 1.6592, -0.5880, -0.3973, 0.3374]) tensor([0.6666, 0.0705, 0.0853, 0.1777]) -Greedy action tensor([ 1.7544, -0.6653, -0.1468, 0.5445]) tensor([0.6508, 0.0579, 0.0972, 0.1941]) -Greedy action tensor([ 0.5978, -0.2063, -0.8912, 1.0262]) tensor([0.3117, 0.1395, 0.0703, 0.4784]) -Greedy action tensor([ 1.4598, -0.3442, -0.3802, 0.3065]) tensor([0.6101, 0.1004, 0.0969, 0.1925]) -Greedy action tensor([ 1.1398, -0.3052, 0.0406, -0.0116]) tensor([0.5305, 0.1251, 0.1767, 0.1677]) -Greedy action tensor([ 1.2886, 0.0024, -0.3707, 0.1609]) tensor([0.5585, 0.1543, 0.1063, 0.1808]) -Greedy action tensor([ 0.6539, -0.2845, -0.0962, 0.1219]) tensor([0.4080, 0.1596, 0.1927, 0.2397]) -Greedy action tensor([ 0.9290, -0.3162, -0.2227, 0.1360]) tensor([0.4863, 0.1400, 0.1537, 0.2200]) -Greedy action tensor([ 0.9841, -0.3012, -0.3260, 0.6314]) tensor([0.4446, 0.1230, 0.1200, 0.3125]) -Greedy action tensor([ 0.4127, -0.2196, -0.4979, 0.4624]) tensor([0.3351, 0.1780, 0.1348, 0.3521]) -Greedy action tensor([ 0.3323, -0.5441, -0.1404, 0.3720]) tensor([0.3247, 0.1352, 0.2024, 0.3378]) -Greedy action tensor([ 1.2514, -0.0135, -0.1602, 0.2240]) tensor([0.5308, 0.1498, 0.1294, 0.1900]) -Greedy action tensor([ 0.1773, -0.0181, -0.1064, 0.1763]) tensor([0.2798, 0.2301, 0.2107, 0.2795]) -Greedy action tensor([ 1.5367, -0.5335, -0.6917, 0.4457]) tensor([0.6371, 0.0804, 0.0686, 0.2140]) -Greedy action tensor([ 0.8144, 0.0617, -0.3939, -0.2118]) tensor([0.4699, 0.2214, 0.1404, 0.1684]) -Greedy action tensor([ 1.1842, -0.5203, 0.0214, 0.0978]) tensor([0.5459, 0.0993, 0.1706, 0.1842]) -Greedy action tensor([ 1.3303, -0.3536, -0.4154, 0.6784]) tensor([0.5316, 0.0987, 0.0928, 0.2770]) -Greedy action tensor([ 0.6645, -0.3299, -0.4034, 0.7456]) tensor([0.3574, 0.1322, 0.1228, 0.3876]) -Greedy action tensor([ 1.7985, -0.5124, -0.4284, 0.8091]) tensor([0.6334, 0.0628, 0.0683, 0.2355]) -Greedy action tensor([ 1.0506, -0.1724, -0.4711, 0.4382]) tensor([0.4867, 0.1433, 0.1063, 0.2638]) -Greedy action tensor([ 1.7769, -0.4259, -0.6782, 0.6036]) tensor([0.6641, 0.0734, 0.0570, 0.2054]) -Greedy action tensor([ 1.5560, -0.4580, -0.8182, 0.7056]) tensor([0.6047, 0.0807, 0.0563, 0.2583]) -Greedy action tensor([ 0.8361, -0.2922, 0.0484, 0.0453]) tensor([0.4480, 0.1450, 0.2038, 0.2032]) -Greedy action tensor([ 0.9955, -0.4225, -0.4061, 0.4571]) tensor([0.4826, 0.1169, 0.1188, 0.2817]) -Greedy action tensor([ 1.0408, -0.3117, -0.3173, 0.2363]) tensor([0.5094, 0.1317, 0.1310, 0.2279]) -Greedy action tensor([ 1.2596, -0.4817, -0.2663, 0.0310]) tensor([0.5933, 0.1040, 0.1290, 0.1737]) -Greedy action tensor([ 1.1644, -1.2812, 0.0130, -0.5241]) tensor([0.6299, 0.0546, 0.1992, 0.1164]) -Greedy action tensor([ 0.6939, -0.5893, -0.0746, -0.5524]) tensor([0.4930, 0.1366, 0.2286, 0.1418]) -Greedy action tensor([ 0.7502, -0.5741, -0.0090, -0.4846]) tensor([0.4938, 0.1314, 0.2311, 0.1437]) -Greedy action tensor([ 0.6292, -0.5344, -0.0343, -0.4796]) tensor([0.4635, 0.1448, 0.2388, 0.1529]) -Greedy action tensor([ 0.8078, -0.4186, -0.0683, -0.6209]) tensor([0.5130, 0.1505, 0.2136, 0.1229]) -Greedy action tensor([ 0.2208, 0.0706, -0.0574, -0.2640]) tensor([0.3093, 0.2661, 0.2342, 0.1904]) -Greedy action tensor([ 0.4371, -0.2449, -0.0076, -0.3015]) tensor([0.3810, 0.1927, 0.2442, 0.1821]) -Greedy action tensor([ 0.6425, -0.1181, -0.1326, -0.3868]) tensor([0.4376, 0.2045, 0.2016, 0.1563]) -Greedy action tensor([ 0.7366, -0.3688, 0.0438, -0.4961]) tensor([0.4711, 0.1560, 0.2356, 0.1373]) -Greedy action tensor([ 0.8399, -0.3280, -0.1745, -0.7739]) tensor([0.5340, 0.1661, 0.1936, 0.1063]) -Greedy action tensor([ 0.8160, -0.3586, -0.0340, -0.3235]) tensor([0.4863, 0.1502, 0.2079, 0.1556]) -Greedy action tensor([ 0.7639, -0.8558, -0.1059, -0.4106]) tensor([0.5192, 0.1028, 0.2176, 0.1604]) -Greedy action tensor([ 0.8108, -0.5259, -0.0792, -0.3987]) tensor([0.5072, 0.1332, 0.2083, 0.1513]) -Greedy action tensor([ 0.6445, -0.4969, -0.1842, -0.4745]) tensor([0.4802, 0.1533, 0.2097, 0.1568]) -Greedy action tensor([ 0.8331, -0.5262, 0.1601, -0.6059]) tensor([0.4989, 0.1282, 0.2546, 0.1183]) -Greedy action tensor([ 0.7752, -0.7106, 0.1536, -0.4710]) tensor([0.4876, 0.1103, 0.2619, 0.1402]) -Greedy action tensor([ 0.6107, -0.2950, -0.1182, -0.3793]) tensor([0.4428, 0.1790, 0.2136, 0.1645]) -Greedy action tensor([ 0.7199, -0.4560, 0.0535, -0.4211]) tensor([0.4669, 0.1441, 0.2398, 0.1492]) -Greedy action tensor([ 0.2119, -0.2405, 0.1123, -0.3823]) tensor([0.3233, 0.2056, 0.2926, 0.1785]) -Greedy action tensor([ 0.6364, -0.2171, -0.0237, -0.2318]) tensor([0.4233, 0.1803, 0.2188, 0.1777]) -Greedy action tensor([ 0.4281, -0.3727, -0.0358, -0.2759]) tensor([0.3888, 0.1745, 0.2444, 0.1923]) -Greedy action tensor([ 0.8212, -0.8022, 0.0130, -0.4373]) tensor([0.5189, 0.1024, 0.2313, 0.1474]) -Greedy action tensor([ 0.7047, -0.6069, -0.0410, -0.3970]) tensor([0.4817, 0.1298, 0.2285, 0.1601]) -Greedy action tensor([ 0.7033, -1.1791, -0.0765, -0.5141]) tensor([0.5245, 0.0798, 0.2405, 0.1552]) -Greedy action tensor([ 0.9144, -0.4138, -0.1176, -0.2995]) tensor([0.5213, 0.1381, 0.1857, 0.1548]) -Greedy action tensor([ 0.3864, -0.1817, -0.0034, -0.2873]) tensor([0.3632, 0.2058, 0.2459, 0.1851]) -Greedy action tensor([ 0.6210, -0.4340, -0.0419, -0.3368]) tensor([0.4450, 0.1549, 0.2293, 0.1708]) -Greedy action tensor([ 0.9343, -0.0037, -0.1356, -0.5758]) tensor([0.5114, 0.2002, 0.1754, 0.1130]) -Greedy action tensor([ 7.4585e-01, -9.4867e-01, 7.1740e-04, -3.1146e-01]) tensor([0.4986, 0.0916, 0.2367, 0.1732]) -Greedy action tensor([ 0.5025, -0.8633, 0.0823, -0.7197]) tensor([0.4532, 0.1156, 0.2977, 0.1335]) -Greedy action tensor([ 0.6435, 0.2054, -0.3278, -0.0169]) tensor([0.3936, 0.2540, 0.1490, 0.2034]) -Greedy action tensor([ 0.4602, -0.0112, -0.1405, -0.0632]) tensor([0.3616, 0.2257, 0.1983, 0.2143]) -Greedy action tensor([ 0.3653, -0.1504, -0.1017, -0.5455]) tensor([0.3808, 0.2274, 0.2387, 0.1531]) -Greedy action tensor([ 1.2667, -0.3746, 0.2879, -0.4939]) tensor([0.5742, 0.1112, 0.2158, 0.0987]) -Greedy action tensor([ 0.4554, 0.0880, -0.0049, -0.2918]) tensor([0.3575, 0.2476, 0.2256, 0.1693]) -Greedy action tensor([ 0.5492, -0.0211, -0.0797, -0.4326]) tensor([0.4043, 0.2286, 0.2156, 0.1515]) -Greedy action tensor([ 0.8758, 0.2548, 0.1929, -0.3927]) tensor([0.4303, 0.2313, 0.2174, 0.1210]) -Greedy action tensor([ 0.6337, -0.5724, -0.1487, -0.5691]) tensor([0.4862, 0.1455, 0.2223, 0.1460]) -Greedy action tensor([ 0.4420, -0.1096, 0.0347, -0.3573]) tensor([0.3716, 0.2141, 0.2473, 0.1671]) -Greedy action tensor([ 0.5703, 0.0730, -0.1839, -0.1283]) tensor([0.3882, 0.2361, 0.1826, 0.1930]) -Greedy action tensor([ 0.6699, -0.3795, -0.0465, -0.2467]) tensor([0.4467, 0.1564, 0.2182, 0.1786]) -Greedy action tensor([ 0.3778, 0.0159, -0.0737, 0.0565]) tensor([0.3270, 0.2277, 0.2082, 0.2371]) -Greedy action tensor([ 0.6668, -0.5589, -0.0655, -0.2154]) tensor([0.4570, 0.1342, 0.2197, 0.1891]) -Greedy action tensor([ 0.6715, -0.2522, -0.0374, -0.3539]) tensor([0.4449, 0.1766, 0.2189, 0.1595]) -Greedy action tensor([ 0.4548, -0.1336, -0.0060, -0.0139]) tensor([0.3556, 0.1975, 0.2243, 0.2226]) -Greedy action tensor([ 0.5360, -0.3798, -0.0734, -0.4033]) tensor([0.4283, 0.1714, 0.2329, 0.1674]) -Greedy action tensor([ 0.8567, -0.8745, 0.1409, -0.7144]) tensor([0.5337, 0.0945, 0.2609, 0.1109]) -Greedy action tensor([ 0.5486, -0.1082, 0.0529, -0.0569]) tensor([0.3741, 0.1939, 0.2278, 0.2042]) -Greedy action tensor([ 0.4563, -0.2650, -0.0529, -0.4052]) tensor([0.3985, 0.1937, 0.2395, 0.1684]) -Greedy action tensor([ 0.7870, -0.2064, -0.0749, -0.4774]) tensor([0.4819, 0.1785, 0.2035, 0.1361]) -Greedy action tensor([ 0.9424, -0.7764, 0.1200, -0.2609]) tensor([0.5211, 0.0934, 0.2290, 0.1565]) -Greedy action tensor([ 0.6147, -0.1502, 0.0279, -0.2658]) tensor([0.4105, 0.1910, 0.2283, 0.1702]) -Greedy action tensor([ 0.6151, -0.6283, 0.0335, -0.1829]) tensor([0.4352, 0.1255, 0.2433, 0.1960]) -Greedy action tensor([ 0.7913, -0.6159, -0.0825, -0.4765]) tensor([0.5145, 0.1260, 0.2147, 0.1448]) -Greedy action tensor([ 0.9654, -1.3668, -0.1631, -0.8056]) tensor([0.6286, 0.0610, 0.2034, 0.1070]) -Greedy action tensor([ 0.6684, -0.5143, -0.1751, -0.5328]) tensor([0.4908, 0.1504, 0.2111, 0.1476]) -Greedy action tensor([ 1.1156, -0.9028, -0.0875, -0.7248]) tensor([0.6282, 0.0835, 0.1886, 0.0997]) -Greedy action tensor([ 0.5119, -0.3281, 0.0434, -0.5360]) tensor([0.4152, 0.1793, 0.2599, 0.1456]) -Greedy action tensor([ 0.5552, 0.0354, -0.0359, -0.3790]) tensor([0.3935, 0.2340, 0.2179, 0.1546]) -Greedy action tensor([ 0.8727, -0.3647, -0.0545, -0.5839]) tensor([0.5212, 0.1512, 0.2062, 0.1214]) -Greedy action tensor([ 0.7296, -0.2798, 0.0776, -0.4462]) tensor([0.4558, 0.1661, 0.2375, 0.1406]) -Greedy action tensor([ 0.7201, -0.5603, 0.0564, -0.4727]) tensor([0.4771, 0.1326, 0.2456, 0.1447]) -Greedy action tensor([ 0.6650, -0.5039, 0.0248, -0.5977]) tensor([0.4715, 0.1465, 0.2486, 0.1334]) -Greedy action tensor([ 0.6130, -0.3498, 0.1458, -0.3996]) tensor([0.4216, 0.1610, 0.2642, 0.1532]) -Greedy action tensor([ 0.5000, -0.2219, 0.1859, -0.4713]) tensor([0.3854, 0.1872, 0.2815, 0.1459]) -Greedy action tensor([ 0.4207, 0.1064, -0.1706, 0.0373]) tensor([0.3372, 0.2463, 0.1867, 0.2298]) -Greedy action tensor([ 0.6872, -0.1809, 0.1005, -0.3830]) tensor([0.4313, 0.1810, 0.2398, 0.1479]) -Greedy action tensor([ 0.7733, -0.1251, -0.0570, -0.0254]) tensor([0.4361, 0.1776, 0.1901, 0.1962]) -Greedy action tensor([ 0.6198, -0.4028, -0.1106, -0.3349]) tensor([0.4492, 0.1616, 0.2164, 0.1729]) -Greedy action tensor([ 0.0152, -0.0256, -0.1246, -0.2691]) tensor([0.2792, 0.2680, 0.2427, 0.2101]) -Greedy action tensor([ 0.8626, -0.4053, 0.0191, -0.3893]) tensor([0.5006, 0.1409, 0.2154, 0.1432]) -Greedy action tensor([ 1.0209, -1.1245, 0.0665, -0.6264]) tensor([0.5901, 0.0691, 0.2272, 0.1136]) -Greedy action tensor([ 0.7497, -0.5169, -0.1364, -0.4069]) tensor([0.4979, 0.1403, 0.2052, 0.1566]) -Greedy action tensor([ 0.4827, -0.1248, 0.0714, -0.2857]) tensor([0.3744, 0.2039, 0.2481, 0.1736]) -Greedy action tensor([ 0.1933, 0.1528, -0.0875, -0.4235]) tensor([0.3072, 0.2950, 0.2320, 0.1658]) -Greedy action tensor([ 0.3212, 0.2228, -0.2176, -0.1304]) tensor([0.3199, 0.2899, 0.1866, 0.2036]) -Greedy action tensor([ 0.2640, -0.1761, 0.1529, 0.1219]) tensor([0.2936, 0.1890, 0.2627, 0.2547]) -Greedy action tensor([ 0.6271, -0.3450, -0.0438, -0.3681]) tensor([0.4426, 0.1675, 0.2263, 0.1636]) -Greedy action tensor([ 0.7473, -0.5549, -0.0301, -0.4163]) tensor([0.4893, 0.1330, 0.2249, 0.1528]) -Greedy action tensor([ 0.3821, 0.1565, -0.0818, -0.1914]) tensor([0.3344, 0.2669, 0.2103, 0.1885]) -Greedy action tensor([ 0.4099, -0.1170, -0.0312, -0.2196]) tensor([0.3614, 0.2134, 0.2325, 0.1926]) -Greedy action tensor([-0.6282, -0.3927, 1.0824, 1.5416]) tensor([0.0604, 0.0764, 0.3342, 0.5290]) -Greedy action tensor([-1.2258, -0.5389, 0.3966, 0.5303]) tensor([0.0722, 0.1436, 0.3659, 0.4182]) -Greedy action tensor([-1.5427, -0.5555, 0.5804, 0.0934]) tensor([0.0582, 0.1562, 0.4866, 0.2990]) -Greedy action tensor([-1.6664, -0.3119, 0.5060, -0.0577]) tensor([0.0536, 0.2078, 0.4707, 0.2679]) -Greedy action tensor([-1.8167, -0.2298, 0.5703, -0.0989]) tensor([0.0448, 0.2188, 0.4870, 0.2494]) -Greedy action tensor([-1.7919, -0.4674, 0.6512, -0.0337]) tensor([0.0453, 0.1704, 0.5214, 0.2629]) -Greedy action tensor([-1.6700, -0.2816, 0.4972, -0.0303]) tensor([0.0529, 0.2121, 0.4622, 0.2727]) -Greedy action tensor([-1.8177, -0.4834, 0.6467, -0.0738]) tensor([0.0449, 0.1705, 0.5278, 0.2568]) -Greedy action tensor([-1.8177, -0.5185, 0.7213, 0.0568]) tensor([0.0419, 0.1537, 0.5311, 0.2732]) -Greedy action tensor([-1.8136, -0.4502, 0.5949, -0.1150]) tensor([0.0465, 0.1819, 0.5172, 0.2543]) -Greedy action tensor([-1.9075, -0.4174, 0.6445, -0.1539]) tensor([0.0416, 0.1845, 0.5337, 0.2402]) -Greedy action tensor([-1.3998, -0.2917, 0.6555, 0.5546]) tensor([0.0529, 0.1603, 0.4132, 0.3736]) -Greedy action tensor([-0.9173, -0.5877, 0.1909, 0.3374]) tensor([0.1120, 0.1558, 0.3393, 0.3929]) -Greedy action tensor([-1.8437, -0.4307, 0.6140, -0.1147]) tensor([0.0446, 0.1832, 0.5209, 0.2513]) -Greedy action tensor([-0.7076, -0.1909, -0.5655, -0.0528]) tensor([0.1738, 0.2914, 0.2003, 0.3345]) -Greedy action tensor([-1.8436, -0.4783, 0.7651, 0.0931]) tensor([0.0393, 0.1540, 0.5340, 0.2727]) -Greedy action tensor([-1.8615, -0.8336, 0.9799, 0.6845]) tensor([0.0297, 0.0830, 0.5087, 0.3786]) -Greedy action tensor([-1.2581, -0.5085, 0.4006, 0.5792]) tensor([0.0683, 0.1445, 0.3586, 0.4287]) -Greedy action tensor([-1.8227, -0.4698, 0.6901, -0.0032]) tensor([0.0428, 0.1655, 0.5278, 0.2639]) -Greedy action tensor([-1.6886, -0.2276, 0.6791, 0.1251]) tensor([0.0452, 0.1949, 0.4826, 0.2773]) -Greedy action tensor([-0.9461, -0.5081, 0.4452, -0.3352]) tensor([0.1189, 0.1842, 0.4779, 0.2190]) -Greedy action tensor([-1.1369, -0.5564, 0.2288, 0.3934]) tensor([0.0883, 0.1578, 0.3460, 0.4079]) -Greedy action tensor([-1.8116, -0.4062, 0.6347, -0.2124]) tensor([0.0464, 0.1890, 0.5352, 0.2294]) -Greedy action tensor([-1.1628, -0.5251, 0.4860, 0.7616]) tensor([0.0669, 0.1266, 0.3480, 0.4584]) -Greedy action tensor([-1.7931, -0.4629, 0.8172, 0.2450]) tensor([0.0384, 0.1451, 0.5220, 0.2945]) -Greedy action tensor([-1.6266, -0.5198, 0.5759, 0.2205]) tensor([0.0515, 0.1558, 0.4660, 0.3267]) -Greedy action tensor([-1.1317, -0.5394, 0.4799, 0.9295]) tensor([0.0638, 0.1154, 0.3197, 0.5012]) -Greedy action tensor([-1.9018, -0.4698, 0.6850, -0.1312]) tensor([0.0411, 0.1720, 0.5457, 0.2413]) -Greedy action tensor([-1.5469, -0.4794, 0.4771, 0.0250]) tensor([0.0614, 0.1785, 0.4645, 0.2956]) -Greedy action tensor([-1.8868, -0.4548, 0.6340, -0.1494]) tensor([0.0429, 0.1796, 0.5336, 0.2438]) -Greedy action tensor([-2.0261, -0.7488, 1.4810, 0.7316]) tensor([0.0186, 0.0668, 0.6210, 0.2935]) -Greedy action tensor([-0.9860, -0.6371, 0.2147, 0.2291]) tensor([0.1098, 0.1556, 0.3647, 0.3700]) -Greedy action tensor([-1.8719, -0.4068, 0.6251, -0.1363]) tensor([0.0432, 0.1870, 0.5247, 0.2451]) -Greedy action tensor([-1.9406, -0.4558, 0.6640, -0.1784]) tensor([0.0404, 0.1782, 0.5462, 0.2352]) -Greedy action tensor([-1.8678, -0.4601, 0.6293, -0.1289]) tensor([0.0436, 0.1783, 0.5299, 0.2482]) -Greedy action tensor([-1.6975, -0.4698, 0.7533, 0.3156]) tensor([0.0426, 0.1453, 0.4936, 0.3186]) -Greedy action tensor([-1.8787, -0.4441, 0.6378, -0.1317]) tensor([0.0429, 0.1800, 0.5311, 0.2460]) -Greedy action tensor([-1.5645, -0.5142, 0.4811, 0.1622]) tensor([0.0581, 0.1661, 0.4493, 0.3266]) -Greedy action tensor([-1.5995, -0.5163, 0.4751, 0.0347]) tensor([0.0587, 0.1734, 0.4672, 0.3008]) -Greedy action tensor([-1.1530, -0.5055, 0.7785, 1.0107]) tensor([0.0540, 0.1032, 0.3727, 0.4701]) -Greedy action tensor([-1.3734, -0.5699, 0.3546, 0.1640]) tensor([0.0740, 0.1652, 0.4165, 0.3442]) -Greedy action tensor([-1.3037, -0.5639, 0.3110, 0.3027]) tensor([0.0763, 0.1599, 0.3835, 0.3803]) -Greedy action tensor([-1.5813, -0.6002, 0.5568, -0.1637]) tensor([0.0614, 0.1639, 0.5212, 0.2535]) -Greedy action tensor([-1.2725, -0.6380, 0.3468, 0.2161]) tensor([0.0809, 0.1525, 0.4083, 0.3583]) -Greedy action tensor([-1.8847, -0.5224, 0.5789, -0.1864]) tensor([0.0452, 0.1766, 0.5311, 0.2471]) -Greedy action tensor([-0.2929, 0.3529, 0.6177, 1.5811]) tensor([0.0840, 0.1602, 0.2088, 0.5471]) -Greedy action tensor([-1.1251, -0.0421, 0.2714, 0.0138]) tensor([0.0899, 0.2657, 0.3635, 0.2809]) -Greedy action tensor([-1.3157, -0.5336, 0.3514, 0.3445]) tensor([0.0728, 0.1591, 0.3854, 0.3827]) -Greedy action tensor([ 0.0939, -0.0565, 0.8956, 1.7713]) tensor([0.1059, 0.0911, 0.2361, 0.5668]) -Greedy action tensor([-1.8586, -0.4172, 0.6614, -0.1006]) tensor([0.0426, 0.1802, 0.5299, 0.2473]) -Greedy action tensor([-1.8150, -0.4555, 0.5956, -0.0933]) tensor([0.0462, 0.1801, 0.5151, 0.2586]) -Greedy action tensor([-0.6532, -0.6109, 0.8684, 1.3483]) tensor([0.0713, 0.0744, 0.3266, 0.5277]) -Greedy action tensor([-1.2129, -0.0440, 0.7976, 1.0376]) tensor([0.0472, 0.1520, 0.3526, 0.4482]) -Greedy action tensor([-1.9882, -0.5148, 1.0288, 0.3576]) tensor([0.0276, 0.1204, 0.5638, 0.2882]) -Greedy action tensor([-1.8516, -0.5988, 1.0014, 0.1886]) tensor([0.0339, 0.1185, 0.5872, 0.2605]) -Greedy action tensor([-0.8043, -0.8324, 0.3614, 0.3250]) tensor([0.1209, 0.1175, 0.3877, 0.3739]) -Greedy action tensor([-1.3173, -0.8029, 0.1107, -0.0110]) tensor([0.0949, 0.1588, 0.3959, 0.3505]) -Greedy action tensor([-1.6170, -0.5300, 0.5421, 0.0523]) tensor([0.0558, 0.1653, 0.4830, 0.2959]) -Greedy action tensor([-1.0898, -0.6283, 0.9045, 1.3071]) tensor([0.0478, 0.0758, 0.3512, 0.5252]) -Greedy action tensor([-1.8347, -0.4090, 1.0091, 0.6213]) tensor([0.0294, 0.1224, 0.5053, 0.3429]) -Greedy action tensor([-1.9383, -0.4516, 0.6618, -0.1770]) tensor([0.0405, 0.1790, 0.5450, 0.2356]) -Greedy action tensor([-1.7726, -0.4869, 0.6483, 0.0542]) tensor([0.0453, 0.1638, 0.5096, 0.2813]) -Greedy action tensor([-1.9167, -0.4434, 0.6505, -0.1622]) tensor([0.0414, 0.1805, 0.5390, 0.2391]) -Greedy action tensor([-1.5885, -0.3875, 0.8852, 0.6093]) tensor([0.0397, 0.1319, 0.4710, 0.3574]) -Greedy action tensor([-1.9113, -0.3941, 0.6426, -0.1463]) tensor([0.0412, 0.1880, 0.5300, 0.2408]) -Greedy action tensor([-1.8458, -0.4051, 0.6114, -0.1201]) tensor([0.0444, 0.1876, 0.5185, 0.2495]) -Greedy action tensor([-1.7347, -0.4814, 0.5652, -0.1118]) tensor([0.0512, 0.1792, 0.5103, 0.2593]) -Greedy action tensor([-1.5463, -0.5253, 0.4632, 0.0901]) tensor([0.0611, 0.1695, 0.4556, 0.3137]) -Greedy action tensor([-1.9520, -0.5979, 0.8183, -0.0719]) tensor([0.0365, 0.1414, 0.5828, 0.2393]) -Greedy action tensor([-1.8998, -0.2719, 0.6150, -0.1676]) tensor([0.0415, 0.2113, 0.5128, 0.2345]) -Greedy action tensor([-1.8049, -0.3473, 0.6715, 0.0422]) tensor([0.0425, 0.1825, 0.5056, 0.2694]) -Greedy action tensor([-1.4390, -0.5116, 0.4367, 0.2769]) tensor([0.0640, 0.1619, 0.4179, 0.3562]) -Greedy action tensor([-1.6963, -0.3632, 0.6119, 0.1499]) tensor([0.0472, 0.1790, 0.4747, 0.2991]) -Greedy action tensor([-1.5186, -0.4676, 0.5178, 0.2171]) tensor([0.0582, 0.1664, 0.4456, 0.3299]) -Greedy action tensor([-1.3731, -0.4697, 0.4411, 0.3515]) tensor([0.0657, 0.1622, 0.4033, 0.3688]) -Greedy action tensor([-1.9676, -0.1967, 0.6410, -0.0731]) tensor([0.0369, 0.2168, 0.5010, 0.2453]) -Greedy action tensor([-1.7256, -0.5086, 0.8891, 0.4048]) tensor([0.0378, 0.1276, 0.5164, 0.3182]) -Greedy action tensor([-1.8085, -0.4808, 0.6853, 0.0072]) tensor([0.0434, 0.1638, 0.5258, 0.2669]) -Greedy action tensor([-1.9026, -0.4332, 0.6412, -0.1536]) tensor([0.0420, 0.1825, 0.5343, 0.2413]) -Greedy action tensor([-1.8149, -0.4515, 0.6141, -0.0772]) tensor([0.0456, 0.1782, 0.5172, 0.2591]) -Greedy action tensor([-1.7198, -0.4758, 0.8886, 0.5098]) tensor([0.0366, 0.1269, 0.4965, 0.3400]) -Greedy action tensor([ 1.6134, -0.7511, -0.2422, 0.2854]) tensor([0.6599, 0.0620, 0.1032, 0.1749]) -Greedy action tensor([ 1.9385, -0.4764, -0.2513, 0.3331]) tensor([0.7132, 0.0637, 0.0798, 0.1432]) -Greedy action tensor([ 1.7255, -0.0780, -0.4289, 0.4384]) tensor([0.6424, 0.1058, 0.0745, 0.1773]) -Greedy action tensor([ 1.1113, -0.0743, -0.9250, 0.3880]) tensor([0.5205, 0.1590, 0.0679, 0.2525]) -Greedy action tensor([ 1.2776, 0.0912, -0.3724, -0.2545]) tensor([0.5836, 0.1782, 0.1121, 0.1261]) -Greedy action tensor([ 1.3797, -0.9872, -0.0784, 0.3614]) tensor([0.5925, 0.0556, 0.1379, 0.2140]) -Greedy action tensor([ 1.4022, -0.7412, -0.0939, 0.3959]) tensor([0.5859, 0.0687, 0.1312, 0.2142]) -Greedy action tensor([ 1.6218, -0.0596, -0.3815, 0.0449]) tensor([0.6546, 0.1218, 0.0883, 0.1352]) -Greedy action tensor([ 1.0280, -0.3521, -0.4404, 0.0696]) tensor([0.5361, 0.1348, 0.1235, 0.2056]) -Greedy action tensor([ 1.3916, 0.1777, -0.3192, 0.3697]) tensor([0.5442, 0.1616, 0.0983, 0.1959]) -Greedy action tensor([ 1.2555, 0.0304, -0.3368, 0.5456]) tensor([0.5028, 0.1477, 0.1023, 0.2472]) -Greedy action tensor([ 1.9298, -0.3775, -0.4573, 0.5273]) tensor([0.6957, 0.0692, 0.0639, 0.1711]) -Greedy action tensor([ 1.2349, -0.2856, -0.8517, 0.5608]) tensor([0.5399, 0.1180, 0.0670, 0.2751]) -Greedy action tensor([ 1.6172, -0.6907, -0.2117, 0.5246]) tensor([0.6268, 0.0623, 0.1007, 0.2102]) -Greedy action tensor([ 1.3863, -0.3163, -0.6133, 0.1863]) tensor([0.6177, 0.1126, 0.0836, 0.1861]) -Greedy action tensor([ 1.0693, -0.2379, 0.0927, -0.1352]) tensor([0.5136, 0.1390, 0.1934, 0.1540]) -Greedy action tensor([ 1.6401, -1.0229, -0.1655, 0.1981]) tensor([0.6800, 0.0474, 0.1118, 0.1608]) -Greedy action tensor([ 0.8652, -0.4824, -0.3766, 0.2246]) tensor([0.4818, 0.1252, 0.1392, 0.2539]) -Greedy action tensor([ 0.6597, -0.2311, -0.0224, -0.0705]) tensor([0.4171, 0.1711, 0.2109, 0.2010]) -Greedy action tensor([ 1.2292, -0.8765, 0.0620, 0.2883]) tensor([0.5485, 0.0668, 0.1707, 0.2140]) -Greedy action tensor([ 0.7004, -0.3635, -0.1155, 0.1406]) tensor([0.4240, 0.1463, 0.1875, 0.2422]) -Greedy action tensor([ 1.2890, -0.3126, -0.1485, 0.1912]) tensor([0.5641, 0.1137, 0.1340, 0.1882]) -Greedy action tensor([ 1.5632, -0.6855, -0.1246, 0.5497]) tensor([0.6048, 0.0638, 0.1118, 0.2195]) -Greedy action tensor([ 1.5043, -0.8408, -0.1620, 0.1593]) tensor([0.6471, 0.0620, 0.1223, 0.1686]) -Greedy action tensor([ 1.3606, -0.3877, -0.4744, 0.0709]) tensor([0.6215, 0.1082, 0.0992, 0.1711]) -Greedy action tensor([ 0.8862, -0.2276, -0.0736, 0.1584]) tensor([0.4557, 0.1496, 0.1745, 0.2201]) -Greedy action tensor([ 0.9875, -0.5816, -0.3214, 0.1845]) tensor([0.5191, 0.1081, 0.1402, 0.2326]) -Greedy action tensor([ 1.5203, -0.5073, -0.2432, 0.5375]) tensor([0.5962, 0.0785, 0.1022, 0.2231]) -Greedy action tensor([ 1.4622, -0.2429, -0.3430, -0.2897]) tensor([0.6581, 0.1196, 0.1082, 0.1141]) -Greedy action tensor([ 1.4500, -0.3097, -0.9313, 0.4984]) tensor([0.6058, 0.1043, 0.0560, 0.2339]) -Greedy action tensor([ 1.6169, -0.7047, -0.1356, 0.2189]) tensor([0.6585, 0.0646, 0.1142, 0.1627]) -Greedy action tensor([ 1.9457, -0.6707, -0.2329, 0.5630]) tensor([0.6958, 0.0508, 0.0788, 0.1746]) -Greedy action tensor([ 1.9157, -0.8636, -0.1365, 0.4983]) tensor([0.6979, 0.0433, 0.0896, 0.1691]) -Greedy action tensor([ 1.3543, -0.2153, -0.3585, 0.2659]) tensor([0.5796, 0.1206, 0.1045, 0.1952]) -Greedy action tensor([ 1.5723, -0.4161, -0.4613, 0.2787]) tensor([0.6485, 0.0888, 0.0849, 0.1779]) -Greedy action tensor([ 1.1702, -0.1614, -0.1183, 0.0411]) tensor([0.5367, 0.1417, 0.1480, 0.1735]) -Greedy action tensor([ 1.6812, -0.3343, -0.5573, 0.1190]) tensor([0.6899, 0.0919, 0.0735, 0.1446]) -Greedy action tensor([ 1.5717, -0.4626, -0.2142, 0.6243]) tensor([0.5931, 0.0776, 0.0994, 0.2300]) -Greedy action tensor([ 1.1697, -0.3958, -0.5136, 0.3447]) tensor([0.5455, 0.1140, 0.1013, 0.2391]) -Greedy action tensor([ 0.7047, -0.2467, 0.1312, -0.0271]) tensor([0.4114, 0.1589, 0.2318, 0.1979]) -Greedy action tensor([ 1.6645, -0.2952, -0.2005, 0.6768]) tensor([0.5994, 0.0845, 0.0928, 0.2233]) -Greedy action tensor([ 1.3862, -0.7062, -0.1372, 0.3272]) tensor([0.5924, 0.0731, 0.1291, 0.2054]) -Greedy action tensor([ 1.7006, -0.7981, -0.2766, 0.3126]) tensor([0.6802, 0.0559, 0.0942, 0.1697]) -Greedy action tensor([ 1.2957, -0.1963, -0.6145, 0.1400]) tensor([0.5925, 0.1333, 0.0877, 0.1865]) -Greedy action tensor([ 1.2810, -0.1533, -0.3950, 0.3692]) tensor([0.5473, 0.1304, 0.1024, 0.2199]) -Greedy action tensor([ 1.3594, -0.2850, -0.2585, 0.3630]) tensor([0.5680, 0.1097, 0.1126, 0.2097]) -Greedy action tensor([ 1.4207, -0.5662, -0.5344, 0.4543]) tensor([0.6027, 0.0826, 0.0853, 0.2293]) -Greedy action tensor([ 1.4580, -0.2200, -0.0900, 0.2418]) tensor([0.5897, 0.1101, 0.1254, 0.1748]) -Greedy action tensor([ 0.8083, -0.3223, -0.3110, 0.2711]) tensor([0.4477, 0.1445, 0.1462, 0.2616]) -Greedy action tensor([ 0.6900, -0.0950, -0.0679, 0.0851]) tensor([0.4047, 0.1846, 0.1897, 0.2210]) -Greedy action tensor([ 1.2673, -0.4945, -0.2995, 0.5421]) tensor([0.5363, 0.0921, 0.1119, 0.2597]) -Greedy action tensor([ 0.6097, -0.2792, -0.2266, 0.2522]) tensor([0.3931, 0.1616, 0.1703, 0.2749]) -Greedy action tensor([ 2.2890, -0.9034, -0.2737, 0.7369]) tensor([0.7519, 0.0309, 0.0580, 0.1592]) -Greedy action tensor([ 0.9943, -0.7371, -0.3447, 0.8201]) tensor([0.4387, 0.0777, 0.1150, 0.3686]) -Greedy action tensor([ 0.9389, -0.4030, -0.2109, 0.4583]) tensor([0.4553, 0.1190, 0.1442, 0.2816]) -Greedy action tensor([ 1.7716, -0.7423, -0.4839, 0.2836]) tensor([0.7084, 0.0573, 0.0743, 0.1600]) -Greedy action tensor([ 1.3657, -0.2465, -0.3728, 0.3526]) tensor([0.5753, 0.1147, 0.1011, 0.2089]) -Greedy action tensor([ 1.6281, -0.5854, -0.3321, 0.5806]) tensor([0.6246, 0.0683, 0.0880, 0.2191]) -Greedy action tensor([ 1.4696, -0.6528, -0.3474, 0.4603]) tensor([0.6073, 0.0727, 0.0987, 0.2213]) -Greedy action tensor([ 0.9103, -0.2579, -0.0864, 0.4824]) tensor([0.4288, 0.1333, 0.1583, 0.2795]) -Greedy action tensor([ 1.5166, -0.3555, 0.0022, 0.3223]) tensor([0.5964, 0.0917, 0.1312, 0.1807]) -Greedy action tensor([ 1.6480, -0.9278, -0.2275, 0.4754]) tensor([0.6498, 0.0494, 0.0996, 0.2012]) -Greedy action tensor([ 1.5969, -0.5436, -0.3570, 0.4774]) tensor([0.6306, 0.0742, 0.0894, 0.2059]) -Greedy action tensor([ 1.4584, -0.7785, -0.5484, 1.0708]) tensor([0.5209, 0.0556, 0.0700, 0.3535]) -Greedy action tensor([ 2.2005, -1.1695, -0.6596, 0.9713]) tensor([0.7224, 0.0248, 0.0414, 0.2113]) -Greedy action tensor([ 0.9507, -0.5242, -0.3129, 0.1463]) tensor([0.5105, 0.1168, 0.1443, 0.2284]) -Greedy action tensor([ 1.0713, -0.6343, -0.1926, 0.2733]) tensor([0.5224, 0.0949, 0.1476, 0.2352]) -Greedy action tensor([ 0.9861, -0.4568, -0.2410, 0.1615]) tensor([0.5082, 0.1201, 0.1490, 0.2228]) -Greedy action tensor([ 1.2074, 0.2526, 0.1598, -0.2230]) tensor([0.5064, 0.1949, 0.1776, 0.1211]) -Greedy action tensor([ 0.7745, -0.2852, -0.3059, 0.7351]) tensor([0.3777, 0.1309, 0.1282, 0.3632]) -Greedy action tensor([ 1.7469, -0.8464, -0.3838, 0.3591]) tensor([0.6929, 0.0518, 0.0823, 0.1730]) -Greedy action tensor([ 0.7868, -0.2360, -0.0736, 0.2761]) tensor([0.4197, 0.1509, 0.1775, 0.2518]) -Greedy action tensor([ 0.9511, -0.5150, -0.1632, -0.0205]) tensor([0.5161, 0.1191, 0.1694, 0.1953]) -Greedy action tensor([ 2.5089, -1.4467, -0.2490, 0.7269]) tensor([0.7994, 0.0153, 0.0507, 0.1345]) -Greedy action tensor([ 1.4151, -0.3400, -0.5494, 0.1532]) tensor([0.6265, 0.1083, 0.0878, 0.1774]) -Greedy action tensor([ 0.8767, -0.9803, 0.1950, 0.0372]) tensor([0.4776, 0.0746, 0.2416, 0.2063]) -Greedy action tensor([ 1.6189, -0.4406, -0.2480, 0.4630]) tensor([0.6262, 0.0798, 0.0968, 0.1971]) -Greedy action tensor([ 1.2391, -0.3138, -0.0918, 0.2057]) tensor([0.5459, 0.1155, 0.1443, 0.1943]) -Greedy action tensor([ 1.6880, -0.1091, -0.5770, 0.3320]) tensor([0.6547, 0.1085, 0.0680, 0.1687]) -Greedy action tensor([-0.1072, -0.0252, -0.2989, 0.0973]) tensor([0.2417, 0.2623, 0.1995, 0.2965]) -Greedy action tensor([ 1.1383, -0.8406, -0.7126, 1.1289]) tensor([0.4375, 0.0605, 0.0687, 0.4334]) -Greedy action tensor([ 0.0689, -1.0951, -0.6193, 0.5123]) tensor([0.2965, 0.0926, 0.1490, 0.4619]) -Greedy action tensor([ 2.1604, -2.0262, 0.8880, 1.0137]) tensor([0.6200, 0.0094, 0.1737, 0.1969]) -Greedy action tensor([-0.2656, -0.4728, 0.0376, 0.9426]) tensor([0.1535, 0.1248, 0.2079, 0.5138]) -Greedy action tensor([-0.6582, -0.6622, -0.0917, 0.6504]) tensor([0.1341, 0.1335, 0.2362, 0.4962]) -Greedy action tensor([ 1.0778, -2.2896, -0.0213, 0.6841]) tensor([0.4897, 0.0169, 0.1631, 0.3303]) -Greedy action tensor([0.0023, 0.6646, 0.5548, 0.0922]) tensor([0.1733, 0.3360, 0.3011, 0.1896]) -Greedy action tensor([-0.0950, -0.7190, 0.6898, 0.4779]) tensor([0.1818, 0.0974, 0.3984, 0.3224]) -Greedy action tensor([ 1.4329, -0.8506, 1.4187, 0.9940]) tensor([0.3660, 0.0373, 0.3608, 0.2360]) -Greedy action tensor([ 1.3978, -0.5395, -0.1554, 0.3664]) tensor([0.5841, 0.0842, 0.1236, 0.2082]) -Greedy action tensor([-0.7184, -0.3552, 1.1553, -1.4288]) tensor([0.1059, 0.1523, 0.6897, 0.0521]) -Greedy action tensor([ 0.5655, 0.0821, 0.4833, -0.6356]) tensor([0.3523, 0.2173, 0.3245, 0.1060]) -Greedy action tensor([ 0.3351, -1.6538, -0.4645, 0.8877]) tensor([0.3008, 0.0412, 0.1352, 0.5228]) -Greedy action tensor([-0.5337, -0.0971, -0.0580, -0.5011]) tensor([0.1927, 0.2982, 0.3101, 0.1991]) -Greedy action tensor([ 0.5142, -0.0686, -0.4469, 0.0298]) tensor([0.3911, 0.2184, 0.1496, 0.2409]) -Greedy action tensor([ 0.4842, -1.0308, 1.0259, 0.7306]) tensor([0.2371, 0.0521, 0.4075, 0.3033]) -Greedy action tensor([-0.8362, -1.9653, 1.0319, -0.0852]) tensor([0.1008, 0.0326, 0.6529, 0.2137]) -Greedy action tensor([-0.5091, -0.3807, -0.1832, 1.1256]) tensor([0.1156, 0.1314, 0.1602, 0.5928]) -Greedy action tensor([ 0.3623, -1.0513, 0.0020, 1.1736]) tensor([0.2386, 0.0580, 0.1664, 0.5370]) -Greedy action tensor([ 0.5340, -0.1861, 0.6093, -0.2067]) tensor([0.3288, 0.1600, 0.3545, 0.1567]) -Greedy action tensor([-0.0134, -2.0999, -0.1358, 1.6446]) tensor([0.1378, 0.0171, 0.1219, 0.7232]) -Greedy action tensor([ 0.8942, 0.0508, -0.3487, 0.6012]) tensor([0.4057, 0.1746, 0.1171, 0.3027]) -Greedy action tensor([-0.8188, -0.7677, -0.0527, 0.5858]) tensor([0.1208, 0.1271, 0.2599, 0.4922]) -Greedy action tensor([-0.1148, 0.1586, -0.2375, 1.5882]) tensor([0.1151, 0.1513, 0.1018, 0.6319]) -Greedy action tensor([ 1.8871, -0.1584, 0.9897, 0.7430]) tensor([0.5390, 0.0697, 0.2197, 0.1716]) -Greedy action tensor([-0.5773, 1.2669, 0.8365, -0.5181]) tensor([0.0800, 0.5060, 0.3290, 0.0849]) -Greedy action tensor([-1.0583, -0.4315, 0.0207, 0.6196]) tensor([0.0895, 0.1676, 0.2634, 0.4795]) -Greedy action tensor([-0.0957, -0.6475, -0.0174, -1.1069]) tensor([0.3310, 0.1906, 0.3580, 0.1204]) -Greedy action tensor([-0.9189, -0.7037, 0.6402, 0.1933]) tensor([0.0996, 0.1236, 0.4738, 0.3030]) -Greedy action tensor([ 1.1017, -1.3070, -0.6897, 0.9973]) tensor([0.4635, 0.0417, 0.0773, 0.4175]) -Greedy action tensor([ 0.1375, 0.2331, -0.1486, 0.3693]) tensor([0.2432, 0.2676, 0.1827, 0.3066]) -Greedy action tensor([2.0302, 1.2507, 0.1283, 1.2075]) tensor([0.4885, 0.2240, 0.0729, 0.2146]) -Greedy action tensor([-0.5356, -1.1538, 0.1639, 0.6278]) tensor([0.1481, 0.0798, 0.2981, 0.4740]) -Greedy action tensor([ 0.2938, 1.0357, -0.5178, 1.6948]) tensor([0.1315, 0.2762, 0.0584, 0.5339]) -Greedy action tensor([-2.0720, -0.8186, -1.1258, -0.7282]) tensor([0.0916, 0.3210, 0.2361, 0.3513]) -Greedy action tensor([ 0.9529, -0.3405, 1.6252, -0.4630]) tensor([0.2877, 0.0789, 0.5635, 0.0698]) -Greedy action tensor([ 1.7543, -0.0437, -0.1742, 0.1429]) tensor([0.6620, 0.1096, 0.0962, 0.1321]) -Greedy action tensor([-0.7301, -0.0701, -0.3286, 0.5657]) tensor([0.1237, 0.2394, 0.1848, 0.4521]) -Greedy action tensor([ 0.3426, -1.8547, 0.7826, -0.8760]) tensor([0.3379, 0.0375, 0.5246, 0.0999]) -Greedy action tensor([ 1.4911, -1.0790, 0.1068, 1.2420]) tensor([0.4747, 0.0363, 0.1189, 0.3700]) -Greedy action tensor([ 0.1189, -2.6070, -0.2755, -0.1664]) tensor([0.4014, 0.0263, 0.2706, 0.3017]) -Greedy action tensor([ 0.7688, -1.3527, 0.3828, 0.6636]) tensor([0.3704, 0.0444, 0.2518, 0.3334]) -Greedy action tensor([ 0.2242, 0.4138, -0.2063, -0.3415]) tensor([0.2918, 0.3527, 0.1897, 0.1657]) -Greedy action tensor([ 0.5062, -0.0440, 0.8847, -0.5866]) tensor([0.2966, 0.1711, 0.4330, 0.0994]) -Greedy action tensor([ 0.2316, -0.5482, -0.0173, -0.5623]) tensor([0.3717, 0.1704, 0.2898, 0.1680]) -Greedy action tensor([ 1.0969, -0.1817, -0.7493, 1.2848]) tensor([0.3784, 0.1054, 0.0597, 0.4566]) -Greedy action tensor([-0.3150, -0.4910, -0.2009, -0.5177]) tensor([0.2648, 0.2221, 0.2968, 0.2162]) -Greedy action tensor([ 0.7416, -1.1070, 0.1506, 1.2083]) tensor([0.3025, 0.0476, 0.1675, 0.4824]) -Greedy action tensor([-0.1906, -0.1930, -0.1081, 0.4151]) tensor([0.2034, 0.2029, 0.2209, 0.3728]) -Greedy action tensor([ 0.7062, -0.1086, 0.2767, 1.3702]) tensor([0.2478, 0.1097, 0.1613, 0.4813]) -Greedy action tensor([-0.9242, -1.0504, -0.7800, 0.9450]) tensor([0.1050, 0.0926, 0.1213, 0.6810]) -Greedy action tensor([-0.4703, -0.4722, -1.0262, 1.2023]) tensor([0.1266, 0.1264, 0.0726, 0.6744]) -Greedy action tensor([ 1.3555, -0.5959, 0.0249, 0.9793]) tensor([0.4778, 0.0679, 0.1263, 0.3280]) -Greedy action tensor([ 1.1378, -0.6262, -0.1355, 0.6729]) tensor([0.4809, 0.0824, 0.1346, 0.3021]) -Greedy action tensor([ 0.1301, 0.0731, -0.3333, 0.5690]) tensor([0.2424, 0.2290, 0.1525, 0.3760]) -Greedy action tensor([1.4329, 0.0234, 0.1168, 0.7156]) tensor([0.4999, 0.1221, 0.1341, 0.2440]) -Greedy action tensor([-0.4381, -0.6854, 0.3600, 0.2232]) tensor([0.1684, 0.1315, 0.3740, 0.3262]) -Greedy action tensor([-0.2476, -1.6866, -0.0545, 0.9954]) tensor([0.1690, 0.0401, 0.2050, 0.5859]) -Greedy action tensor([-0.1622, 1.3562, 1.3021, -0.3331]) tensor([0.0932, 0.4253, 0.4030, 0.0785]) -Greedy action tensor([-0.6009, 0.3336, 0.2861, 0.2997]) tensor([0.1186, 0.3018, 0.2878, 0.2918]) -Greedy action tensor([ 0.6899, 0.0830, -0.0149, 0.4898]) tensor([0.3499, 0.1907, 0.1729, 0.2865]) -Greedy action tensor([-0.1302, -1.3975, 1.6592, -0.0156]) tensor([0.1192, 0.0336, 0.7136, 0.1337]) -Greedy action tensor([ 0.3700, -0.9720, 0.6521, -1.0953]) tensor([0.3548, 0.0927, 0.4705, 0.0820]) -Greedy action tensor([-1.2822, -0.9325, -0.6138, 1.3005]) tensor([0.0568, 0.0806, 0.1108, 0.7518]) -Greedy action tensor([ 0.5021, -1.5973, -0.5445, -0.3145]) tensor([0.5220, 0.0640, 0.1833, 0.2307]) -Greedy action tensor([-1.5150, -1.1068, 0.5894, 0.9644]) tensor([0.0442, 0.0664, 0.3623, 0.5271]) -Greedy action tensor([-0.5736, -1.7477, -0.1227, 0.0887]) tensor([0.2075, 0.0642, 0.3258, 0.4025]) -Greedy action tensor([-0.9995, -0.5963, -1.1336, -0.1409]) tensor([0.1745, 0.2611, 0.1526, 0.4118]) -Greedy action tensor([ 0.2049, -1.4852, -0.5698, 0.3366]) tensor([0.3589, 0.0662, 0.1654, 0.4095]) -Greedy action tensor([-0.0940, -1.2309, 0.6975, -0.6893]) tensor([0.2452, 0.0787, 0.5410, 0.1352]) -Greedy action tensor([ 0.5148, -0.5950, 1.8706, 0.0496]) tensor([0.1713, 0.0565, 0.6646, 0.1076]) -Greedy action tensor([ 0.0804, -1.9442, 0.6378, 0.7033]) tensor([0.2108, 0.0278, 0.3682, 0.3931]) -Greedy action tensor([-0.3494, -0.1877, -0.1206, -0.6710]) tensor([0.2405, 0.2827, 0.3024, 0.1744]) -Greedy action tensor([ 0.9121, 1.0786, -0.2590, 0.2310]) tensor([0.3336, 0.3941, 0.1034, 0.1688]) -Greedy action tensor([ 0.0895, -0.4270, 0.0021, 0.3831]) tensor([0.2595, 0.1548, 0.2377, 0.3480]) -Greedy action tensor([0.2180, 0.8634, 0.6774, 0.0084]) tensor([0.1886, 0.3597, 0.2987, 0.1530]) -Greedy action tensor([ 0.4091, -0.8962, 0.3511, -0.3963]) tensor([0.3757, 0.1019, 0.3545, 0.1679]) -Greedy action tensor([-0.9343, -1.2955, -0.6514, -0.7288]) tensor([0.2352, 0.1639, 0.3121, 0.2889]) -Greedy action tensor([-0.9387, 0.2674, 0.9099, -0.4360]) tensor([0.0810, 0.2706, 0.5145, 0.1339]) -Greedy action tensor([ 0.6354, 0.9733, 0.8220, -0.2337]) tensor([0.2484, 0.3482, 0.2993, 0.1041]) -Greedy action tensor([-0.5652, -0.5640, -1.0276, -0.7346]) tensor([0.2878, 0.2881, 0.1812, 0.2429]) -Greedy action tensor([-0.1857, -0.0723, 0.4008, 0.1859]) tensor([0.1863, 0.2087, 0.3349, 0.2701]) -Greedy action tensor([-0.2236, 0.4994, 0.0355, 0.1637]) tensor([0.1715, 0.3535, 0.2223, 0.2527]) -Greedy action tensor([ 0.6185, -0.5089, -0.0739, -0.0417]) tensor([0.4272, 0.1384, 0.2137, 0.2207]) -Greedy action tensor([ 0.7577, -0.7188, 0.0846, -0.4090]) tensor([0.4878, 0.1114, 0.2488, 0.1519]) -Greedy action tensor([ 0.4603, -0.0309, -0.0940, -0.2631]) tensor([0.3743, 0.2290, 0.2150, 0.1816]) -Greedy action tensor([ 0.4020, 0.0031, -0.0320, -0.2937]) tensor([0.3549, 0.2382, 0.2299, 0.1770]) -Greedy action tensor([ 1.0562, -0.6489, 0.1508, -0.4877]) tensor([0.5556, 0.1010, 0.2247, 0.1187]) -Greedy action tensor([ 0.6859, -0.5592, -0.1208, -0.3780]) tensor([0.4809, 0.1385, 0.2147, 0.1660]) -Greedy action tensor([ 0.6591, -0.5930, -0.1448, -0.4028]) tensor([0.4809, 0.1375, 0.2153, 0.1663]) -Greedy action tensor([ 0.7689, -0.4392, -0.1683, -0.3514]) tensor([0.4959, 0.1481, 0.1943, 0.1618]) -Greedy action tensor([ 0.6110, -0.3206, -0.0599, -0.1496]) tensor([0.4215, 0.1660, 0.2155, 0.1970]) -Greedy action tensor([ 0.7888, -0.3698, -0.0453, -0.2301]) tensor([0.4741, 0.1488, 0.2059, 0.1712]) -Greedy action tensor([ 0.7342, -0.1969, 0.0318, -0.4215]) tensor([0.4537, 0.1788, 0.2247, 0.1428]) -Greedy action tensor([ 0.3864, -0.0839, -0.1436, -0.2703]) tensor([0.3660, 0.2287, 0.2154, 0.1898]) -Greedy action tensor([ 0.4670, -0.0659, 0.1219, -0.4497]) tensor([0.3711, 0.2178, 0.2628, 0.1484]) -Greedy action tensor([ 0.8841, -0.4003, -0.1424, -0.4957]) tensor([0.5300, 0.1467, 0.1899, 0.1334]) -Greedy action tensor([ 0.7031, -0.3485, -0.0857, -0.2065]) tensor([0.4532, 0.1583, 0.2059, 0.1825]) -Greedy action tensor([ 0.6560, -0.7138, -0.0807, -0.3937]) tensor([0.4801, 0.1220, 0.2298, 0.1681]) -Greedy action tensor([ 0.1324, 0.1329, -0.1513, -0.4075]) tensor([0.2997, 0.2999, 0.2257, 0.1747]) -Greedy action tensor([ 0.5008, 0.0384, -0.1014, -0.1689]) tensor([0.3718, 0.2342, 0.2036, 0.1903]) -Greedy action tensor([ 0.6591, -0.1874, -0.0702, -0.4917]) tensor([0.4489, 0.1925, 0.2165, 0.1420]) -Greedy action tensor([ 0.4132, -0.1927, -0.0741, -0.0611]) tensor([0.3594, 0.1961, 0.2208, 0.2237]) -Greedy action tensor([ 0.6194, -0.3422, 0.0238, -0.3303]) tensor([0.4310, 0.1647, 0.2376, 0.1667]) -Greedy action tensor([ 0.2565, -0.0750, -0.1098, -0.1992]) tensor([0.3284, 0.2357, 0.2277, 0.2082]) -Greedy action tensor([ 0.8413, -0.5187, -0.0575, -0.2948]) tensor([0.5038, 0.1293, 0.2051, 0.1618]) -Greedy action tensor([ 0.4949, -0.2659, -0.2282, -0.2582]) tensor([0.4126, 0.1928, 0.2002, 0.1943]) -Greedy action tensor([ 1.0671, -0.5412, 0.1634, -0.6475]) tensor([0.5601, 0.1121, 0.2269, 0.1008]) -Greedy action tensor([ 0.5443, -0.3673, -0.1437, -0.1957]) tensor([0.4199, 0.1687, 0.2110, 0.2003]) -Greedy action tensor([ 0.3432, 0.1112, 0.1154, -0.1829]) tensor([0.3144, 0.2493, 0.2504, 0.1858]) -Greedy action tensor([ 0.8494, -0.7494, 0.0567, -0.6582]) tensor([0.5330, 0.1077, 0.2412, 0.1180]) -Greedy action tensor([ 0.6816, -0.1888, -0.0656, -0.0985]) tensor([0.4254, 0.1781, 0.2015, 0.1950]) -Greedy action tensor([ 0.5481, -0.1222, -0.0012, -0.1819]) tensor([0.3890, 0.1990, 0.2246, 0.1875]) -Greedy action tensor([ 0.6097, -0.3905, -0.0999, -0.3608]) tensor([0.4467, 0.1643, 0.2197, 0.1693]) -Greedy action tensor([ 0.9459, -0.5978, -0.0453, -0.4979]) tensor([0.5492, 0.1173, 0.2038, 0.1296]) -Greedy action tensor([ 0.2143, 0.0006, -0.0324, -0.0140]) tensor([0.2954, 0.2386, 0.2308, 0.2351]) -Greedy action tensor([ 0.8426, -0.1294, 0.3292, -0.3012]) tensor([0.4356, 0.1648, 0.2607, 0.1388]) -Greedy action tensor([ 0.3937, 0.2433, -0.1326, -0.0267]) tensor([0.3217, 0.2768, 0.1901, 0.2113]) -Greedy action tensor([ 0.7866, -0.8759, 0.0428, -0.4923]) tensor([0.5146, 0.0976, 0.2446, 0.1432]) -Greedy action tensor([ 0.2740, 0.0247, -0.0821, -0.1914]) tensor([0.3218, 0.2508, 0.2254, 0.2020]) -Greedy action tensor([ 0.3392, -0.2652, -0.1192, -0.4347]) tensor([0.3788, 0.2070, 0.2395, 0.1747]) -Greedy action tensor([ 0.8373, -0.3930, -0.0557, -0.5087]) tensor([0.5097, 0.1489, 0.2087, 0.1327]) -Greedy action tensor([ 0.5170, 0.1053, -0.1811, 0.0625]) tensor([0.3578, 0.2371, 0.1780, 0.2271]) -Greedy action tensor([ 0.3098, -0.1206, -0.0346, -0.2104]) tensor([0.3386, 0.2202, 0.2400, 0.2013]) -Greedy action tensor([ 0.5964, -0.4948, 0.0255, -0.6771]) tensor([0.4586, 0.1540, 0.2591, 0.1283]) -Greedy action tensor([ 0.4909, 0.0931, -0.0146, -0.1331]) tensor([0.3558, 0.2390, 0.2146, 0.1906]) -Greedy action tensor([ 0.7075, -0.1746, 0.0210, -0.4300]) tensor([0.4469, 0.1849, 0.2249, 0.1433]) -Greedy action tensor([ 0.7772, -0.4837, -0.0244, -0.4542]) tensor([0.4941, 0.1400, 0.2217, 0.1442]) -Greedy action tensor([ 0.6430, -0.0091, -0.1223, -0.1802]) tensor([0.4123, 0.2148, 0.1918, 0.1810]) -Greedy action tensor([ 0.1868, -0.0526, 0.0856, -0.2640]) tensor([0.3005, 0.2365, 0.2716, 0.1914]) -Greedy action tensor([ 0.3964, -0.0966, 0.0267, -0.2285]) tensor([0.3525, 0.2153, 0.2435, 0.1887]) -Greedy action tensor([ 0.6176, -0.2533, -0.1163, -0.1306]) tensor([0.4216, 0.1765, 0.2024, 0.1995]) -Greedy action tensor([ 0.7585, -0.3552, 0.0674, -0.3387]) tensor([0.4623, 0.1518, 0.2316, 0.1543]) -Greedy action tensor([ 1.1194, -1.0954, -0.0033, -0.6607]) tensor([0.6237, 0.0681, 0.2030, 0.1052]) -Greedy action tensor([ 0.9260, -0.2171, 0.1964, -0.4203]) tensor([0.4852, 0.1547, 0.2339, 0.1262]) -Greedy action tensor([ 0.6368, -0.3562, 0.0265, -0.6012]) tensor([0.4538, 0.1681, 0.2465, 0.1316]) -Greedy action tensor([ 0.6679, -0.1865, -0.0542, -0.2359]) tensor([0.4317, 0.1837, 0.2097, 0.1749]) -Greedy action tensor([ 0.8661, -0.7275, -0.0413, -0.3484]) tensor([0.5253, 0.1067, 0.2120, 0.1559]) -Greedy action tensor([ 0.4697, -0.4044, -0.1015, -0.5094]) tensor([0.4241, 0.1770, 0.2396, 0.1593]) -Greedy action tensor([ 0.7532, -0.9119, 0.0173, -0.4167]) tensor([0.5054, 0.0956, 0.2421, 0.1569]) -Greedy action tensor([ 0.6055, -0.3971, 0.0815, -0.3232]) tensor([0.4248, 0.1559, 0.2515, 0.1678]) -Greedy action tensor([ 0.3571, 0.1242, 0.0052, -0.3811]) tensor([0.3363, 0.2664, 0.2365, 0.1607]) -Greedy action tensor([ 0.7736, -0.7011, 0.1023, -0.7173]) tensor([0.5089, 0.1165, 0.2601, 0.1146]) -Greedy action tensor([ 0.9356, -0.7395, -0.0945, -0.5534]) tensor([0.5650, 0.1058, 0.2017, 0.1275]) -Greedy action tensor([ 0.7241, -0.7448, -0.0989, -0.2988]) tensor([0.4929, 0.1135, 0.2164, 0.1772]) -Greedy action tensor([ 0.6702, -0.3857, 0.0233, -0.3462]) tensor([0.4477, 0.1558, 0.2345, 0.1620]) -Greedy action tensor([ 0.4613, -0.0133, 0.0015, -0.1268]) tensor([0.3560, 0.2215, 0.2248, 0.1977]) -Greedy action tensor([ 0.2760, 0.0498, -0.0959, -0.1251]) tensor([0.3168, 0.2527, 0.2184, 0.2121]) -Greedy action tensor([ 0.2901, 0.2153, -0.1486, -0.2376]) tensor([0.3162, 0.2934, 0.2039, 0.1865]) -Greedy action tensor([ 0.8526, -0.6180, -0.0741, -0.6619]) tensor([0.5418, 0.1245, 0.2145, 0.1192]) -Greedy action tensor([ 0.5636, -0.1641, -0.0372, -0.2762]) tensor([0.4060, 0.1961, 0.2226, 0.1753]) -Greedy action tensor([ 0.3424, 0.0536, -0.0465, -0.0881]) tensor([0.3250, 0.2435, 0.2203, 0.2113]) -Greedy action tensor([ 0.6861, -0.4270, -0.0520, -0.2881]) tensor([0.4579, 0.1504, 0.2189, 0.1728]) -Greedy action tensor([ 0.3300, -0.1448, -0.0836, -0.2381]) tensor([0.3509, 0.2183, 0.2320, 0.1988]) -Greedy action tensor([ 0.8536, -0.4527, -0.1110, -0.1613]) tensor([0.4964, 0.1344, 0.1892, 0.1799]) -Greedy action tensor([ 0.4898, -0.1117, -0.1359, -0.1717]) tensor([0.3848, 0.2109, 0.2058, 0.1986]) -Greedy action tensor([ 0.8479, -0.3960, -0.1465, -0.3584]) tensor([0.5108, 0.1473, 0.1890, 0.1529]) -Greedy action tensor([ 0.5713, -0.1532, 0.0134, -0.2057]) tensor([0.3973, 0.1925, 0.2274, 0.1827]) -Greedy action tensor([ 0.6053, -0.3945, -0.1728, -0.0814]) tensor([0.4291, 0.1579, 0.1971, 0.2159]) -Greedy action tensor([ 0.8686, -0.6388, -0.0517, -0.7291]) tensor([0.5488, 0.1216, 0.2186, 0.1111]) -Greedy action tensor([ 0.5516, -0.0138, 0.0353, -0.0792]) tensor([0.3708, 0.2106, 0.2213, 0.1973]) -Greedy action tensor([ 0.4362, -0.0872, -0.0189, -0.4343]) tensor([0.3780, 0.2239, 0.2398, 0.1583]) -Greedy action tensor([ 0.5445, -0.4192, 0.1983, -0.6245]) tensor([0.4167, 0.1590, 0.2948, 0.1295]) -Greedy action tensor([ 0.4314, -0.0160, -0.1154, -0.0678]) tensor([0.3540, 0.2263, 0.2049, 0.2149]) -Greedy action tensor([-1.8043, -0.4411, 0.5948, -0.1048]) tensor([0.0467, 0.1827, 0.5148, 0.2557]) -Greedy action tensor([-1.8553, -0.4779, 0.6203, -0.1268]) tensor([0.0445, 0.1763, 0.5287, 0.2505]) -Greedy action tensor([-1.6487, -0.5607, 0.4994, -0.0256]) tensor([0.0568, 0.1686, 0.4867, 0.2879]) -Greedy action tensor([-0.9822, 0.9545, 0.0847, 0.5076]) tensor([0.0655, 0.4540, 0.1902, 0.2904]) -Greedy action tensor([-1.7553, -0.0927, 0.5541, 0.0495]) tensor([0.0446, 0.2352, 0.4491, 0.2711]) -Greedy action tensor([-1.4267, -0.5374, 0.3750, 0.2201]) tensor([0.0681, 0.1657, 0.4127, 0.3535]) -Greedy action tensor([-1.5510, -0.4796, 0.5427, 0.1597]) tensor([0.0569, 0.1662, 0.4619, 0.3150]) -Greedy action tensor([-1.9981, -0.9406, 0.4369, -0.1486]) tensor([0.0462, 0.1330, 0.5272, 0.2936]) -Greedy action tensor([-1.7541, -0.3818, 0.6684, 0.0113]) tensor([0.0453, 0.1788, 0.5110, 0.2649]) -Greedy action tensor([-1.8587, -0.4098, 0.6186, -0.1277]) tensor([0.0438, 0.1867, 0.5220, 0.2475]) -Greedy action tensor([-1.5153, -0.5423, 0.5564, 0.2126]) tensor([0.0581, 0.1537, 0.4612, 0.3270]) -Greedy action tensor([-1.3858, -0.3791, 0.3667, 0.1612]) tensor([0.0704, 0.1927, 0.4062, 0.3307]) -Greedy action tensor([-1.8131, -0.2835, 0.5914, -0.0906]) tensor([0.0449, 0.2071, 0.4968, 0.2512]) -Greedy action tensor([-0.8630, -0.6039, 0.1829, 0.2869]) tensor([0.1205, 0.1561, 0.3429, 0.3805]) -Greedy action tensor([-1.9117, -0.4707, 0.6537, -0.1599]) tensor([0.0417, 0.1761, 0.5420, 0.2402]) -Greedy action tensor([-1.8661, -0.4446, 0.6309, -0.1376]) tensor([0.0436, 0.1808, 0.5299, 0.2457]) -Greedy action tensor([-1.8638, -0.4635, 0.6210, -0.1227]) tensor([0.0439, 0.1782, 0.5272, 0.2506]) -Greedy action tensor([-1.3909, -0.5541, 0.3541, 0.2816]) tensor([0.0696, 0.1608, 0.3987, 0.3709]) -Greedy action tensor([-1.9016, -0.4042, 0.6401, -0.1470]) tensor([0.0418, 0.1866, 0.5303, 0.2414]) -Greedy action tensor([-0.6918, 0.5227, -0.0233, -0.1301]) tensor([0.1239, 0.4172, 0.2417, 0.2172]) -Greedy action tensor([-1.9154, -0.4507, 0.6532, -0.1603]) tensor([0.0414, 0.1791, 0.5401, 0.2394]) -Greedy action tensor([-0.8630, -0.6669, 0.3324, 0.1703]) tensor([0.1200, 0.1460, 0.3967, 0.3373]) -Greedy action tensor([-1.6054, -0.5702, 0.7055, -0.0199]) tensor([0.0532, 0.1499, 0.5369, 0.2599]) -Greedy action tensor([-0.8362, -0.5399, 0.3712, -0.0414]) tensor([0.1265, 0.1702, 0.4232, 0.2801]) -Greedy action tensor([-1.6517, -0.5245, 0.9054, 0.3806]) tensor([0.0406, 0.1254, 0.5240, 0.3100]) -Greedy action tensor([-1.6075, -0.4759, 0.5034, 0.1745]) tensor([0.0546, 0.1694, 0.4512, 0.3247]) -Greedy action tensor([-0.8606, -0.0793, 0.9193, 1.2577]) tensor([0.0574, 0.1253, 0.3402, 0.4772]) -Greedy action tensor([-1.9084, -0.4540, 0.6523, -0.1520]) tensor([0.0416, 0.1783, 0.5390, 0.2411]) -Greedy action tensor([-1.8251, -0.3338, 0.5858, -0.1262]) tensor([0.0453, 0.2015, 0.5053, 0.2479]) -Greedy action tensor([-1.8113, -0.4592, 0.6114, -0.1568]) tensor([0.0468, 0.1809, 0.5276, 0.2447]) -Greedy action tensor([-1.5836, -0.5582, 0.5477, 0.1756]) tensor([0.0555, 0.1547, 0.4675, 0.3223]) -Greedy action tensor([-0.6002, 0.0352, 0.8658, 1.5204]) tensor([0.0643, 0.1214, 0.2785, 0.5359]) -Greedy action tensor([-0.7386, -0.5136, 0.2270, 0.1812]) tensor([0.1354, 0.1695, 0.3555, 0.3396]) -Greedy action tensor([-0.7431, -0.4175, 0.3957, 0.6396]) tensor([0.1053, 0.1459, 0.3290, 0.4198]) -Greedy action tensor([-1.4725, -0.1260, 0.4045, -0.0192]) tensor([0.0639, 0.2455, 0.4174, 0.2732]) -Greedy action tensor([-1.5902, -0.5232, 0.5320, 0.1070]) tensor([0.0565, 0.1641, 0.4713, 0.3081]) -Greedy action tensor([-1.5304, -0.5505, 0.4485, 0.0797]) tensor([0.0629, 0.1675, 0.4549, 0.3146]) -Greedy action tensor([-1.5424, -0.3608, 0.6119, 0.3522]) tensor([0.0512, 0.1669, 0.4414, 0.3405]) -Greedy action tensor([-1.2228, 0.8217, 0.2210, 0.5286]) tensor([0.0534, 0.4126, 0.2263, 0.3078]) -Greedy action tensor([-1.6728, -0.4353, 0.9026, 0.5719]) tensor([0.0370, 0.1276, 0.4862, 0.3493]) -Greedy action tensor([-1.4878, -0.5549, 0.4149, 0.1741]) tensor([0.0645, 0.1638, 0.4321, 0.3396]) -Greedy action tensor([-1.6960, -0.4762, 0.5504, -0.0398]) tensor([0.0524, 0.1775, 0.4955, 0.2746]) -Greedy action tensor([-1.8384, -0.5548, 1.3737, 0.8290]) tensor([0.0228, 0.0823, 0.5664, 0.3285]) -Greedy action tensor([-1.9228, -0.5557, 0.6098, -0.1545]) tensor([0.0428, 0.1679, 0.5385, 0.2508]) -Greedy action tensor([-0.1304, -0.4085, 0.2367, 0.5542]) tensor([0.1929, 0.1461, 0.2785, 0.3825]) -Greedy action tensor([-1.7032, -0.5066, 0.8111, 0.2760]) tensor([0.0418, 0.1384, 0.5170, 0.3028]) -Greedy action tensor([-1.1696, -0.5890, 0.3099, 0.0812]) tensor([0.0937, 0.1675, 0.4115, 0.3273]) -Greedy action tensor([-1.8488, -0.6626, 0.3829, -0.4198]) tensor([0.0563, 0.1843, 0.5244, 0.2350]) -Greedy action tensor([-1.3231, -0.6199, 0.0969, -0.0180]) tensor([0.0922, 0.1863, 0.3815, 0.3401]) -Greedy action tensor([-2.0285, -0.8237, 0.5503, -0.1733]) tensor([0.0418, 0.1395, 0.5513, 0.2674]) -Greedy action tensor([-1.3657, 0.9266, 0.4063, -0.3243]) tensor([0.0510, 0.5046, 0.2999, 0.1444]) -Greedy action tensor([-1.7712, -0.4647, 0.7463, 0.1287]) tensor([0.0421, 0.1553, 0.5214, 0.2812]) -Greedy action tensor([-1.6015, -0.5429, 0.4827, 0.0523]) tensor([0.0583, 0.1681, 0.4688, 0.3048]) -Greedy action tensor([-1.7468, -0.4882, 0.6585, 0.1260]) tensor([0.0452, 0.1592, 0.5012, 0.2943]) -Greedy action tensor([-1.0124, -0.4954, 0.5018, 1.1106]) tensor([0.0642, 0.1076, 0.2918, 0.5364]) -Greedy action tensor([-1.6084, -0.3704, 0.5141, 0.1282]) tensor([0.0541, 0.1866, 0.4520, 0.3073]) -Greedy action tensor([0.1871, 0.2774, 0.8449, 1.7249]) tensor([0.1152, 0.1261, 0.2224, 0.5362]) -Greedy action tensor([-1.8139, -0.3754, 0.5889, -0.1010]) tensor([0.0458, 0.1932, 0.5068, 0.2542]) -Greedy action tensor([-1.8977, -0.4687, 0.6780, -0.1221]) tensor([0.0413, 0.1724, 0.5426, 0.2438]) -Greedy action tensor([-1.8898, -0.4132, 0.6326, -0.1539]) tensor([0.0425, 0.1862, 0.5299, 0.2413]) -Greedy action tensor([-1.1326, -0.4939, 0.4205, -0.3973]) tensor([0.1030, 0.1951, 0.4869, 0.2149]) -Greedy action tensor([-1.8136, -0.4549, 0.6022, -0.0995]) tensor([0.0462, 0.1798, 0.5175, 0.2565]) -Greedy action tensor([-1.8960, -0.4594, 0.6424, -0.1564]) tensor([0.0424, 0.1785, 0.5373, 0.2417]) -Greedy action tensor([-1.9146, -0.4559, 0.6471, -0.1630]) tensor([0.0416, 0.1790, 0.5394, 0.2399]) -Greedy action tensor([-1.8419, -0.4494, 0.6127, -0.1272]) tensor([0.0450, 0.1811, 0.5239, 0.2500]) -Greedy action tensor([-1.8343, -0.4627, 0.6999, 0.0196]) tensor([0.0418, 0.1647, 0.5267, 0.2668]) -Greedy action tensor([-1.7220, -0.4878, 0.5444, -0.0310]) tensor([0.0513, 0.1761, 0.4945, 0.2781]) -Greedy action tensor([-1.5662, -0.3788, 0.5274, 0.0573]) tensor([0.0573, 0.1877, 0.4646, 0.2904]) -Greedy action tensor([-1.4808, -0.5104, 0.7551, -0.4172]) tensor([0.0629, 0.1661, 0.5887, 0.1823]) -Greedy action tensor([-0.7864, -0.4654, 0.3434, -0.1292]) tensor([0.1351, 0.1862, 0.4181, 0.2606]) -Greedy action tensor([-1.8987, -0.4140, 0.6415, -0.1465]) tensor([0.0419, 0.1849, 0.5315, 0.2417]) -Greedy action tensor([-1.7387, -0.0164, 0.5003, -0.0376]) tensor([0.0466, 0.2608, 0.4373, 0.2553]) -Greedy action tensor([-1.9148, -0.4123, 0.6518, -0.1554]) tensor([0.0411, 0.1847, 0.5354, 0.2388]) -Greedy action tensor([-1.8559, -0.3163, 0.6036, -0.1251]) tensor([0.0435, 0.2027, 0.5085, 0.2454]) -Greedy action tensor([-1.6749, -0.6241, 1.4727, 1.0239]) tensor([0.0238, 0.0681, 0.5543, 0.3538]) -Greedy action tensor([-1.8603, -0.3755, 0.6111, -0.1526]) tensor([0.0439, 0.1939, 0.5199, 0.2423]) -Greedy action tensor([-1.7779, -0.4911, 0.5834, -0.0857]) tensor([0.0484, 0.1753, 0.5134, 0.2629]) -Greedy action tensor([-1.9334, -0.4432, 0.6586, -0.1761]) tensor([0.0407, 0.1805, 0.5431, 0.2357]) -Greedy action tensor([-1.9104, -0.4531, 0.7050, -0.0773]) tensor([0.0397, 0.1703, 0.5421, 0.2479]) -Greedy action tensor([-1.9299, -0.4468, 0.6557, -0.1756]) tensor([0.0409, 0.1802, 0.5426, 0.2363]) -Greedy action tensor([-0.5320, -0.0641, 0.7543, 1.4294]) tensor([0.0750, 0.1198, 0.2716, 0.5335]) -Greedy action tensor([ 1.5610, -0.1927, -0.2825, 0.1890]) tensor([0.6309, 0.1092, 0.0999, 0.1600]) -Greedy action tensor([ 1.3693, -0.1450, -0.1037, 0.1391]) tensor([0.5742, 0.1263, 0.1316, 0.1678]) -Greedy action tensor([ 0.9922, -0.5762, -0.3387, 0.3263]) tensor([0.5034, 0.1049, 0.1330, 0.2587]) -Greedy action tensor([ 1.7163, -0.2833, -0.7984, 0.3475]) tensor([0.6800, 0.0921, 0.0550, 0.1730]) -Greedy action tensor([ 0.9834, -0.1824, -0.4510, 0.1390]) tensor([0.5051, 0.1574, 0.1204, 0.2171]) -Greedy action tensor([ 0.8969, -0.0998, -0.0543, -0.4001]) tensor([0.4929, 0.1819, 0.1904, 0.1347]) -Greedy action tensor([ 1.7509, -0.1134, -0.2065, 0.1442]) tensor([0.6681, 0.1036, 0.0944, 0.1340]) -Greedy action tensor([ 1.4572, 0.0496, -0.2907, 0.4138]) tensor([0.5646, 0.1382, 0.0983, 0.1989]) -Greedy action tensor([ 1.2068, -0.2410, -0.3524, 0.3905]) tensor([0.5298, 0.1245, 0.1114, 0.2342]) -Greedy action tensor([ 1.4441, -0.5598, -0.5128, 0.5058]) tensor([0.5997, 0.0808, 0.0847, 0.2347]) -Greedy action tensor([ 1.3853, -0.3581, -0.3759, 0.1584]) tensor([0.6098, 0.1067, 0.1048, 0.1788]) -Greedy action tensor([ 1.3233, -0.6586, -0.2746, 0.6849]) tensor([0.5353, 0.0738, 0.1083, 0.2827]) -Greedy action tensor([ 2.3906, -1.2460, -0.3188, 0.7409]) tensor([0.7782, 0.0205, 0.0518, 0.1495]) -Greedy action tensor([ 1.2462, -0.3899, -0.2302, 0.6954]) tensor([0.5001, 0.0974, 0.1142, 0.2883]) -Greedy action tensor([ 1.5504, -0.5534, -0.2581, 0.5679]) tensor([0.6023, 0.0735, 0.0987, 0.2255]) -Greedy action tensor([ 1.0567, -0.6711, -0.5414, 0.5761]) tensor([0.5004, 0.0889, 0.1012, 0.3095]) -Greedy action tensor([ 1.3656, -0.6823, 0.0103, -0.0045]) tensor([0.6094, 0.0786, 0.1571, 0.1548]) -Greedy action tensor([ 1.1475, -0.1993, -0.3912, 0.5050]) tensor([0.4998, 0.1300, 0.1073, 0.2629]) -Greedy action tensor([ 0.6870, -0.4389, -0.3947, 0.3335]) tensor([0.4227, 0.1371, 0.1433, 0.2969]) -Greedy action tensor([ 1.0540, -0.2417, -0.1221, 0.0391]) tensor([0.5142, 0.1408, 0.1586, 0.1864]) -Greedy action tensor([ 1.2808, -0.5957, -0.2976, 0.2076]) tensor([0.5878, 0.0900, 0.1213, 0.2010]) -Greedy action tensor([ 1.3413, -0.2215, -0.3015, 0.3067]) tensor([0.5687, 0.1192, 0.1100, 0.2021]) -Greedy action tensor([ 0.7938, -0.5368, -0.0917, 0.0834]) tensor([0.4612, 0.1219, 0.1902, 0.2266]) -Greedy action tensor([ 1.4779, -0.2600, -0.6099, 0.6217]) tensor([0.5798, 0.1020, 0.0719, 0.2463]) -Greedy action tensor([ 0.9286, -0.3830, -0.2798, 0.0747]) tensor([0.5016, 0.1351, 0.1498, 0.2135]) -Greedy action tensor([ 0.7332, -0.4136, -0.3353, 0.6569]) tensor([0.3864, 0.1228, 0.1328, 0.3581]) -Greedy action tensor([ 1.3551, -0.0584, -0.3258, 0.5067]) tensor([0.5383, 0.1310, 0.1002, 0.2305]) -Greedy action tensor([ 1.4650, -1.1011, -0.3434, 0.3107]) tensor([0.6427, 0.0494, 0.1053, 0.2026]) -Greedy action tensor([ 1.0980, -0.8255, -0.1767, 0.5050]) tensor([0.5055, 0.0739, 0.1413, 0.2794]) -Greedy action tensor([ 1.6907, -0.7101, -0.2245, 0.5254]) tensor([0.6453, 0.0585, 0.0950, 0.2012]) -Greedy action tensor([ 1.0371, -0.5382, -0.0966, -0.0719]) tensor([0.5380, 0.1113, 0.1731, 0.1775]) -Greedy action tensor([ 1.1357, -0.0932, -0.1411, 0.0748]) tensor([0.5215, 0.1526, 0.1455, 0.1805]) -Greedy action tensor([ 1.4699, -0.6628, -0.5732, 0.4758]) tensor([0.6180, 0.0732, 0.0801, 0.2287]) -Greedy action tensor([ 0.9581, -0.4506, -0.0803, -0.0621]) tensor([0.5105, 0.1248, 0.1807, 0.1840]) -Greedy action tensor([ 1.0923, -0.3261, 0.0619, 0.3432]) tensor([0.4827, 0.1169, 0.1723, 0.2282]) -Greedy action tensor([ 1.0285, -0.3167, -0.1913, -0.1200]) tensor([0.5339, 0.1391, 0.1577, 0.1693]) -Greedy action tensor([ 1.0147, -0.0856, -0.4301, 0.3359]) tensor([0.4817, 0.1603, 0.1136, 0.2444]) -Greedy action tensor([ 1.0499, -0.1970, -0.0772, 0.3770]) tensor([0.4713, 0.1355, 0.1527, 0.2405]) -Greedy action tensor([ 1.7328, -0.0621, -0.2951, 0.0383]) tensor([0.6750, 0.1122, 0.0888, 0.1240]) -Greedy action tensor([ 1.1173, -0.4213, -0.4066, 0.2816]) tensor([0.5359, 0.1150, 0.1168, 0.2323]) -Greedy action tensor([ 1.3245, -0.8324, -0.1599, 0.5167]) tensor([0.5592, 0.0647, 0.1268, 0.2493]) -Greedy action tensor([ 1.2196, -0.3450, -0.2219, 0.5161]) tensor([0.5153, 0.1078, 0.1219, 0.2550]) -Greedy action tensor([ 0.7036, -0.2185, -0.1766, -0.0809]) tensor([0.4408, 0.1753, 0.1828, 0.2011]) -Greedy action tensor([ 1.5373, -0.6289, -0.4910, 0.5985]) tensor([0.6108, 0.0700, 0.0804, 0.2389]) -Greedy action tensor([ 0.5663, -0.2861, 0.0082, 0.0423]) tensor([0.3860, 0.1646, 0.2209, 0.2286]) -Greedy action tensor([ 1.2337, -0.2923, -0.3427, 0.3883]) tensor([0.5395, 0.1173, 0.1115, 0.2317]) -Greedy action tensor([ 1.0615, -0.6424, -0.0187, 0.4026]) tensor([0.4905, 0.0893, 0.1665, 0.2538]) -Greedy action tensor([ 1.5087, -0.3738, -0.3962, 0.4430]) tensor([0.6077, 0.0925, 0.0904, 0.2093]) -Greedy action tensor([ 1.5004, -0.7071, -0.0954, 0.2872]) tensor([0.6211, 0.0683, 0.1259, 0.1846]) -Greedy action tensor([ 1.7277, -0.8726, -0.2147, 0.4547]) tensor([0.6677, 0.0496, 0.0957, 0.1869]) -Greedy action tensor([ 1.0579, -0.2738, -0.3656, 0.1525]) tensor([0.5238, 0.1383, 0.1262, 0.2118]) -Greedy action tensor([ 0.7754, -0.4989, -0.1332, 0.5295]) tensor([0.4057, 0.1134, 0.1635, 0.3173]) -Greedy action tensor([ 1.5350, -0.5438, -0.1890, 0.6279]) tensor([0.5858, 0.0733, 0.1045, 0.2365]) -Greedy action tensor([ 1.9318, -0.5718, -0.3637, 0.7588]) tensor([0.6703, 0.0548, 0.0675, 0.2074]) -Greedy action tensor([ 1.2495, -0.5343, -0.0641, 0.1887]) tensor([0.5608, 0.0942, 0.1508, 0.1941]) -Greedy action tensor([ 0.7303, -0.3116, -0.1680, 0.0304]) tensor([0.4431, 0.1563, 0.1805, 0.2201]) -Greedy action tensor([ 0.9476, -0.2124, -0.1084, 0.0346]) tensor([0.4848, 0.1520, 0.1686, 0.1946]) -Greedy action tensor([ 0.7253, -0.0360, -0.2631, 0.2534]) tensor([0.4060, 0.1896, 0.1511, 0.2533]) -Greedy action tensor([ 1.8573, -0.6522, -0.0719, -0.0244]) tensor([0.7252, 0.0590, 0.1054, 0.1105]) -Greedy action tensor([ 1.4024, -0.6586, -0.3681, -0.1582]) tensor([0.6633, 0.0845, 0.1129, 0.1393]) -Greedy action tensor([ 1.5158, -0.1459, -0.0775, 0.3516]) tensor([0.5864, 0.1113, 0.1192, 0.1831]) -Greedy action tensor([ 0.6161, -0.1784, -0.0783, 0.0073]) tensor([0.4008, 0.1811, 0.2001, 0.2180]) -Greedy action tensor([ 0.7911, -0.3080, -0.5560, 0.7052]) tensor([0.3983, 0.1327, 0.1036, 0.3655]) -Greedy action tensor([ 1.5429, -0.2161, -0.3257, 0.4229]) tensor([0.6050, 0.1042, 0.0934, 0.1974]) -Greedy action tensor([ 0.8578, -0.0295, -0.0383, -0.0949]) tensor([0.4534, 0.1867, 0.1851, 0.1749]) -Greedy action tensor([ 1.0677, -0.0877, -0.1423, 0.2166]) tensor([0.4902, 0.1544, 0.1462, 0.2093]) -Greedy action tensor([ 1.1902, -0.5104, -0.3008, 0.1440]) tensor([0.5685, 0.1038, 0.1280, 0.1997]) -Greedy action tensor([ 1.1393, -0.2774, -0.4273, -0.0128]) tensor([0.5659, 0.1372, 0.1181, 0.1788]) -Greedy action tensor([ 0.8426, -0.5601, 0.1141, 0.0026]) tensor([0.4629, 0.1138, 0.2234, 0.1998]) -Greedy action tensor([ 0.8957, -0.3767, -0.2356, 0.1952]) tensor([0.4764, 0.1335, 0.1537, 0.2365]) -Greedy action tensor([ 1.5891, -0.1449, -0.2378, 0.2169]) tensor([0.6285, 0.1110, 0.1011, 0.1594]) -Greedy action tensor([ 1.1947, 0.0922, -0.1139, 0.2368]) tensor([0.5035, 0.1672, 0.1361, 0.1932]) -Greedy action tensor([ 1.3477, -0.4395, -0.0830, 0.3930]) tensor([0.5582, 0.0935, 0.1335, 0.2149]) -Greedy action tensor([ 1.4930, -0.4294, -0.5911, 0.1690]) tensor([0.6507, 0.0952, 0.0810, 0.1731]) -Greedy action tensor([ 1.3740, -0.6780, -0.3778, 0.4668]) tensor([0.5863, 0.0753, 0.1017, 0.2367]) -Greedy action tensor([ 1.7368, -0.5333, -0.1911, 0.4721]) tensor([0.6531, 0.0675, 0.0950, 0.1844]) -Greedy action tensor([ 1.2094, -0.4741, -0.2507, 0.1064]) tensor([0.5715, 0.1061, 0.1327, 0.1897]) -Greedy action tensor([ 1.2722, 0.4268, -0.5009, 0.3416]) tensor([0.5016, 0.2154, 0.0852, 0.1978]) -Greedy action tensor([ 1.2942, -0.1196, -0.4550, -0.1030]) tensor([0.6008, 0.1461, 0.1045, 0.1486]) -Greedy action tensor([ 1.0965, -0.3357, -0.2265, 0.4778]) tensor([0.4893, 0.1168, 0.1303, 0.2635]) -Greedy action tensor([0.9521, 0.3052, 0.1163, 0.2872]) tensor([0.4046, 0.2119, 0.1754, 0.2081]) -Greedy action tensor([-0.2476, -1.2552, 1.1666, -0.2092]) tensor([0.1534, 0.0560, 0.6311, 0.1594]) -Greedy action tensor([ 1.5939, -0.5997, 0.6880, 0.1372]) tensor([0.5719, 0.0638, 0.2311, 0.1332]) -Greedy action tensor([ 0.0062, -2.1908, 0.4029, 0.8933]) tensor([0.1990, 0.0221, 0.2958, 0.4831]) -Greedy action tensor([-0.5480, -0.9778, -0.4630, 2.0831]) tensor([0.0601, 0.0391, 0.0655, 0.8353]) -Greedy action tensor([ 0.1297, -0.6694, -0.1801, -0.1704]) tensor([0.3420, 0.1538, 0.2509, 0.2533]) -Greedy action tensor([-1.2683, -0.4638, 0.6249, -0.4459]) tensor([0.0823, 0.1840, 0.5464, 0.1873]) -Greedy action tensor([-0.2568, -0.2360, 1.1819, -0.5178]) tensor([0.1427, 0.1457, 0.6016, 0.1099]) -Greedy action tensor([ 1.5734, -0.1047, 1.1793, 0.5485]) tensor([0.4505, 0.0841, 0.3038, 0.1616]) -Greedy action tensor([-0.7238, -0.3803, -1.0889, 0.3157]) tensor([0.1686, 0.2377, 0.1170, 0.4767]) -Greedy action tensor([-0.8194, -1.5031, -1.3410, -0.2381]) tensor([0.2573, 0.1299, 0.1527, 0.4601]) -Greedy action tensor([ 1.1290, -0.4709, -0.1751, 0.5392]) tensor([0.4932, 0.0996, 0.1338, 0.2734]) -Greedy action tensor([-0.6948, -1.2806, -0.1427, 0.4069]) tensor([0.1587, 0.0883, 0.2756, 0.4774]) -Greedy action tensor([ 0.8805, -1.5744, 0.8650, -0.4501]) tensor([0.4283, 0.0368, 0.4217, 0.1132]) -Greedy action tensor([ 0.2240, -0.7178, 0.4850, -0.0825]) tensor([0.2920, 0.1139, 0.3791, 0.2149]) -Greedy action tensor([ 0.9086, -0.2571, 0.3408, -0.0618]) tensor([0.4430, 0.1381, 0.2511, 0.1679]) -Greedy action tensor([0.2866, 0.1360, 1.1743, 0.3578]) tensor([0.1864, 0.1604, 0.4530, 0.2002]) -Greedy action tensor([ 1.7313, -0.9730, 1.2775, 0.6135]) tensor([0.4928, 0.0330, 0.3131, 0.1611]) -Greedy action tensor([-0.8479, 0.1474, 1.2856, -0.6390]) tensor([0.0747, 0.2022, 0.6310, 0.0921]) -Greedy action tensor([-0.0130, -1.9193, -0.5637, 0.2745]) tensor([0.3270, 0.0486, 0.1885, 0.4359]) -Greedy action tensor([ 0.2858, -2.2637, 0.2535, 0.9278]) tensor([0.2534, 0.0198, 0.2453, 0.4815]) -Greedy action tensor([ 1.0020, -0.1784, 0.4773, 0.8937]) tensor([0.3576, 0.1098, 0.2116, 0.3209]) -Greedy action tensor([ 1.3550, -1.3889, 0.2897, 0.7604]) tensor([0.5100, 0.0328, 0.1758, 0.2814]) -Greedy action tensor([ 0.8317, 0.6304, -0.6852, 1.2656]) tensor([0.2793, 0.2284, 0.0613, 0.4310]) -Greedy action tensor([ 1.1144, -0.6179, 1.6652, 0.3883]) tensor([0.2945, 0.0521, 0.5109, 0.1425]) -Greedy action tensor([1.2759, 0.3920, 0.4960, 1.9807]) tensor([0.2567, 0.1061, 0.1177, 0.5195]) -Greedy action tensor([ 0.2319, -1.3037, -0.3738, 0.9087]) tensor([0.2682, 0.0578, 0.1464, 0.5277]) -Greedy action tensor([ 0.8494, -0.9921, 1.4032, 0.0662]) tensor([0.2980, 0.0473, 0.5185, 0.1362]) -Greedy action tensor([ 0.3812, -0.3802, 0.8627, -0.0144]) tensor([0.2660, 0.1242, 0.4306, 0.1791]) -Greedy action tensor([ 1.8334, -0.5369, 0.6374, 1.4992]) tensor([0.4735, 0.0443, 0.1432, 0.3390]) -Greedy action tensor([ 0.7182, 0.1761, -0.2030, 0.1968]) tensor([0.3886, 0.2260, 0.1547, 0.2307]) -Greedy action tensor([ 0.1440, 0.3104, 0.5326, -0.4872]) tensor([0.2388, 0.2820, 0.3522, 0.1270]) -Greedy action tensor([1.0549, 0.0089, 0.8184, 0.6579]) tensor([0.3555, 0.1249, 0.2806, 0.2390]) -Greedy action tensor([-0.2907, 0.2391, -0.4200, -0.4928]) tensor([0.2276, 0.3865, 0.2000, 0.1859]) -Greedy action tensor([-1.2297, -1.4690, 0.5753, -0.5084]) tensor([0.1008, 0.0793, 0.6126, 0.2073]) -Greedy action tensor([ 0.3722, -0.9396, -0.4848, 0.6009]) tensor([0.3389, 0.0913, 0.1438, 0.4260]) -Greedy action tensor([ 1.2433, -1.2123, 0.8510, 1.3223]) tensor([0.3517, 0.0302, 0.2376, 0.3806]) -Greedy action tensor([ 0.8895, 0.0524, 1.1067, -0.9302]) tensor([0.3524, 0.1526, 0.4379, 0.0571]) -Greedy action tensor([ 0.5932, 0.2618, 1.7454, -0.0148]) tensor([0.1842, 0.1323, 0.5832, 0.1003]) -Greedy action tensor([-0.2687, -0.9935, 0.4187, 1.4304]) tensor([0.1118, 0.0542, 0.2224, 0.6116]) -Greedy action tensor([ 0.1998, -0.1242, 1.0672, -0.0032]) tensor([0.2032, 0.1470, 0.4839, 0.1659]) -Greedy action tensor([0.4975, 0.3081, 0.1166, 0.3032]) tensor([0.2999, 0.2482, 0.2049, 0.2470]) -Greedy action tensor([ 1.9204, -0.1874, 2.3048, -0.0231]) tensor([0.3658, 0.0445, 0.5373, 0.0524]) -Greedy action tensor([1.4441, 0.4361, 0.5789, 0.5277]) tensor([0.4575, 0.1670, 0.1926, 0.1830]) -Greedy action tensor([ 0.8655, 0.1273, -0.7223, 0.7571]) tensor([0.3877, 0.1853, 0.0792, 0.3478]) -Greedy action tensor([-0.8951, -1.0907, -0.1424, -0.3052]) tensor([0.1739, 0.1430, 0.3692, 0.3138]) -Greedy action tensor([ 0.0364, -0.3485, -0.5198, -0.6922]) tensor([0.3654, 0.2487, 0.2095, 0.1764]) -Greedy action tensor([ 0.8810, -0.6713, 0.8093, 0.1169]) tensor([0.3834, 0.0812, 0.3569, 0.1786]) -Greedy action tensor([-0.3281, -0.6989, -0.8633, 0.0701]) tensor([0.2656, 0.1833, 0.1555, 0.3955]) -Greedy action tensor([-0.4946, -0.6625, -1.1162, 1.4679]) tensor([0.1053, 0.0890, 0.0565, 0.7492]) -Greedy action tensor([ 0.4088, -2.4398, 0.2604, 0.1542]) tensor([0.3710, 0.0215, 0.3199, 0.2876]) -Greedy action tensor([ 1.0448, 0.4914, -0.1560, 0.1727]) tensor([0.4359, 0.2506, 0.1312, 0.1822]) -Greedy action tensor([ 0.0331, -1.0241, -0.7078, 1.4212]) tensor([0.1715, 0.0596, 0.0817, 0.6872]) -Greedy action tensor([ 0.0863, -0.9832, 0.2742, 1.3182]) tensor([0.1673, 0.0574, 0.2019, 0.5734]) -Greedy action tensor([1.5514, 0.3418, 1.1784, 0.9968]) tensor([0.3904, 0.1165, 0.2689, 0.2242]) -Greedy action tensor([ 0.6420, 0.4589, -0.2944, -0.7659]) tensor([0.4050, 0.3372, 0.1588, 0.0991]) -Greedy action tensor([ 0.2016, -0.2784, 0.9950, -0.3301]) tensor([0.2264, 0.1401, 0.5005, 0.1330]) -Greedy action tensor([1.6232, 0.4720, 0.5848, 0.4911]) tensor([0.5018, 0.1587, 0.1777, 0.1618]) -Greedy action tensor([ 0.8830, -0.5614, 0.6722, 0.0842]) tensor([0.4007, 0.0945, 0.3245, 0.1803]) -Greedy action tensor([ 0.9831, -0.1951, 1.1938, -0.8038]) tensor([0.3690, 0.1136, 0.4556, 0.0618]) -Greedy action tensor([-0.7183, -1.5487, -0.0520, -0.7332]) tensor([0.2289, 0.0998, 0.4457, 0.2255]) -Greedy action tensor([ 1.2716, -0.2500, 2.1233, 1.1073]) tensor([0.2267, 0.0495, 0.5314, 0.1924]) -Greedy action tensor([ 1.4594, -0.9751, 1.5522, -0.1841]) tensor([0.4205, 0.0369, 0.4614, 0.0813]) -Greedy action tensor([ 1.5679, -1.3653, 0.6308, 1.9795]) tensor([0.3385, 0.0180, 0.1326, 0.5109]) -Greedy action tensor([ 1.4339, -1.3020, 0.7555, 1.7346]) tensor([0.3421, 0.0222, 0.1736, 0.4621]) -Greedy action tensor([ 1.2474, -0.6635, 0.1709, 1.7192]) tensor([0.3234, 0.0479, 0.1102, 0.5185]) -Greedy action tensor([-0.0117, -0.5931, -1.1926, 0.0764]) tensor([0.3381, 0.1890, 0.1038, 0.3692]) -Greedy action tensor([ 0.3497, -1.0489, 0.9378, 0.1919]) tensor([0.2563, 0.0633, 0.4615, 0.2189]) -Greedy action tensor([-0.7885, -0.6858, 1.4073, -0.0201]) tensor([0.0755, 0.0836, 0.6782, 0.1627]) -Greedy action tensor([-0.2433, -1.3171, 0.5807, 0.2296]) tensor([0.1914, 0.0654, 0.4362, 0.3071]) -Greedy action tensor([-0.2974, 0.4618, 0.5331, 0.3546]) tensor([0.1360, 0.2907, 0.3122, 0.2611]) -Greedy action tensor([ 1.7212, 0.5838, 0.9104, -0.0114]) tensor([0.5149, 0.1651, 0.2289, 0.0911]) -Greedy action tensor([ 0.5994, 0.1710, -0.1205, 1.3192]) tensor([0.2385, 0.1554, 0.1161, 0.4900]) -Greedy action tensor([-0.0349, -1.4112, 0.6024, 0.2057]) tensor([0.2265, 0.0572, 0.4283, 0.2881]) -Greedy action tensor([ 1.5002, -0.2890, -0.1083, 1.2845]) tensor([0.4601, 0.0769, 0.0921, 0.3709]) -Greedy action tensor([-0.5013, -1.1059, -0.3298, 1.1519]) tensor([0.1257, 0.0687, 0.1492, 0.6565]) -Greedy action tensor([0.6538, 0.7214, 0.8109, 0.3574]) tensor([0.2510, 0.2686, 0.2937, 0.1866]) -Greedy action tensor([-0.6711, -0.3318, 0.4902, 0.6971]) tensor([0.1050, 0.1474, 0.3353, 0.4124]) -Greedy action tensor([-0.1128, -0.5271, 2.2459, 1.5188]) tensor([0.0576, 0.0381, 0.6096, 0.2946]) -Greedy action tensor([-0.2239, -0.0375, 0.0026, 1.0684]) tensor([0.1408, 0.1697, 0.1766, 0.5128]) -Greedy action tensor([ 1.0527, -0.5628, 1.2062, -0.4160]) tensor([0.3854, 0.0766, 0.4493, 0.0887]) -Greedy action tensor([-0.3034, 1.1787, -0.5686, 0.8433]) tensor([0.1073, 0.4725, 0.0823, 0.3379]) -Greedy action tensor([0.1596, 0.1000, 0.3475, 0.8364]) tensor([0.1954, 0.1841, 0.2359, 0.3846]) -Greedy action tensor([ 0.8206, -0.5891, 0.0135, -0.4817]) tensor([0.5096, 0.1244, 0.2274, 0.1386]) -Greedy action tensor([ 0.5250, -0.0971, -0.0358, -0.1666]) tensor([0.3834, 0.2058, 0.2188, 0.1920]) -Greedy action tensor([ 0.6852, -0.3844, -0.1426, -0.1261]) tensor([0.4496, 0.1543, 0.1964, 0.1997]) -Greedy action tensor([ 0.1095, 0.0627, 0.1988, -0.2703]) tensor([0.2680, 0.2557, 0.2930, 0.1833]) -Greedy action tensor([ 0.3109, -0.1087, 0.0221, -0.2746]) tensor([0.3375, 0.2218, 0.2528, 0.1879]) -Greedy action tensor([ 0.3855, -0.2771, -0.2244, -0.4604]) tensor([0.4019, 0.2072, 0.2184, 0.1725]) -Greedy action tensor([ 0.3522, -0.3309, -0.1083, -0.2051]) tensor([0.3692, 0.1865, 0.2329, 0.2114]) -Greedy action tensor([ 0.3108, -0.1206, -0.0253, -0.2747]) tensor([0.3423, 0.2224, 0.2446, 0.1906]) -Greedy action tensor([ 0.6922, -0.1010, 0.0112, -0.3774]) tensor([0.4345, 0.1966, 0.2199, 0.1491]) -Greedy action tensor([ 0.3380, -0.0203, -0.0625, -0.2335]) tensor([0.3409, 0.2382, 0.2284, 0.1925]) -Greedy action tensor([ 0.5157, -0.4102, 0.2063, -0.5077]) tensor([0.4017, 0.1591, 0.2948, 0.1444]) -Greedy action tensor([ 0.8870, -0.1974, 0.0867, -0.2774]) tensor([0.4763, 0.1611, 0.2140, 0.1487]) -Greedy action tensor([ 0.5650, -0.2674, 0.0707, -0.2723]) tensor([0.4036, 0.1756, 0.2462, 0.1747]) -Greedy action tensor([ 0.9034, -0.5144, 0.0237, -0.4558]) tensor([0.5225, 0.1266, 0.2168, 0.1342]) -Greedy action tensor([ 0.9690, -0.7406, -0.1736, -0.5613]) tensor([0.5826, 0.1054, 0.1858, 0.1261]) -Greedy action tensor([ 0.7788, -0.5871, 0.0251, -0.5839]) tensor([0.5046, 0.1287, 0.2375, 0.1292]) -Greedy action tensor([ 1.1912, -0.9976, 0.0884, -0.7094]) tensor([0.6276, 0.0703, 0.2083, 0.0938]) -Greedy action tensor([ 0.2252, -0.0035, -0.0391, -0.1839]) tensor([0.3098, 0.2465, 0.2379, 0.2058]) -Greedy action tensor([ 0.8549, -0.5928, -0.0802, -0.4399]) tensor([0.5259, 0.1236, 0.2064, 0.1441]) -Greedy action tensor([ 0.4457, -0.1842, 0.0191, -0.3020]) tensor([0.3761, 0.2003, 0.2455, 0.1781]) -Greedy action tensor([ 0.3584, 0.1835, 0.0093, -0.1629]) tensor([0.3186, 0.2675, 0.2247, 0.1892]) -Greedy action tensor([ 0.6795, -0.1898, -0.2571, -0.2485]) tensor([0.4532, 0.1900, 0.1776, 0.1792]) -Greedy action tensor([ 0.5032, -0.1347, 0.1078, -0.6107]) tensor([0.3952, 0.2089, 0.2662, 0.1297]) -Greedy action tensor([ 0.3379, -0.0816, -0.1621, -0.2374]) tensor([0.3538, 0.2326, 0.2146, 0.1990]) -Greedy action tensor([ 0.8275, -0.1187, -0.1660, -0.4265]) tensor([0.4893, 0.1899, 0.1812, 0.1396]) -Greedy action tensor([ 0.6122, -0.3441, -0.0193, -0.0987]) tensor([0.4154, 0.1596, 0.2209, 0.2040]) -Greedy action tensor([-0.0765, -0.0291, -0.1434, -0.2622]) tensor([0.2622, 0.2749, 0.2452, 0.2177]) -Greedy action tensor([ 0.5222, -0.4892, -0.2030, -0.1976]) tensor([0.4283, 0.1558, 0.2074, 0.2085]) -Greedy action tensor([ 0.6378, -0.7917, 0.0591, -0.3257]) tensor([0.4584, 0.1098, 0.2570, 0.1749]) -Greedy action tensor([ 0.9734, -0.8931, -0.1005, -0.3778]) tensor([0.5697, 0.0881, 0.1947, 0.1475]) -Greedy action tensor([ 0.8776, -0.5679, -0.0551, -0.4089]) tensor([0.5248, 0.1237, 0.2065, 0.1450]) -Greedy action tensor([ 0.2397, 0.1348, -0.0858, -0.3971]) tensor([0.3173, 0.2857, 0.2291, 0.1679]) -Greedy action tensor([ 0.8899, -0.5328, 0.0588, -0.3511]) tensor([0.5087, 0.1226, 0.2216, 0.1471]) -Greedy action tensor([ 0.8732, -0.6699, -0.0804, -0.3754]) tensor([0.5302, 0.1133, 0.2043, 0.1521]) -Greedy action tensor([ 1.3392, -1.5656, -0.0452, -0.4602]) tensor([0.6800, 0.0372, 0.1703, 0.1125]) -Greedy action tensor([ 0.7169, -0.0297, -0.0921, -0.4100]) tensor([0.4458, 0.2113, 0.1985, 0.1444]) -Greedy action tensor([ 0.4642, -0.1184, -0.0354, -0.1349]) tensor([0.3684, 0.2057, 0.2235, 0.2024]) -Greedy action tensor([ 0.5506, -0.3201, 0.0855, -0.2940]) tensor([0.4038, 0.1691, 0.2536, 0.1735]) -Greedy action tensor([ 0.4269, -0.3450, -0.1005, -0.2254]) tensor([0.3886, 0.1796, 0.2294, 0.2024]) -Greedy action tensor([ 1.1047, -0.6539, -0.1528, -0.5737]) tensor([0.6085, 0.1048, 0.1730, 0.1136]) -Greedy action tensor([ 0.6232, -0.3453, -0.0918, -0.4191]) tensor([0.4502, 0.1709, 0.2202, 0.1587]) -Greedy action tensor([ 1.3096, -0.9392, 0.0497, -0.5787]) tensor([0.6491, 0.0685, 0.1841, 0.0982]) -Greedy action tensor([ 0.5906, 0.1484, -0.1254, 0.0812]) tensor([0.3660, 0.2352, 0.1789, 0.2199]) -Greedy action tensor([ 0.7597, -0.4968, -0.0568, -0.4943]) tensor([0.4970, 0.1415, 0.2197, 0.1418]) -Greedy action tensor([ 0.6949, -0.3047, 0.0453, -0.0812]) tensor([0.4254, 0.1566, 0.2222, 0.1958]) -Greedy action tensor([ 0.7011, 0.1009, -0.1271, -0.3389]) tensor([0.4275, 0.2346, 0.1868, 0.1511]) -Greedy action tensor([ 0.7524, -0.4420, -0.0806, -0.2246]) tensor([0.4730, 0.1433, 0.2056, 0.1781]) -Greedy action tensor([ 0.8448, -0.9591, 0.0686, -0.6419]) tensor([0.5403, 0.0890, 0.2486, 0.1222]) -Greedy action tensor([ 1.1258, -0.4226, 0.1316, -0.6500]) tensor([0.5708, 0.1213, 0.2112, 0.0967]) -Greedy action tensor([ 0.8323, -1.1190, -0.1177, -0.7314]) tensor([0.5753, 0.0817, 0.2225, 0.1204]) -Greedy action tensor([ 0.7508, -0.3268, -0.0671, -0.0680]) tensor([0.4499, 0.1532, 0.1986, 0.1984]) -Greedy action tensor([ 0.2412, -0.1061, 0.1520, -0.1722]) tensor([0.3046, 0.2152, 0.2786, 0.2015]) -Greedy action tensor([ 0.3793, -0.1035, -0.0105, -0.4497]) tensor([0.3662, 0.2260, 0.2480, 0.1598]) -Greedy action tensor([ 0.6005, -0.4452, -0.1544, -0.5026]) tensor([0.4644, 0.1632, 0.2183, 0.1541]) -Greedy action tensor([ 1.0044, -0.5660, -0.1714, -0.7051]) tensor([0.5891, 0.1225, 0.1818, 0.1066]) -Greedy action tensor([ 1.0969, -0.8791, -0.0966, -0.6910]) tensor([0.6215, 0.0861, 0.1884, 0.1040]) -Greedy action tensor([ 0.7550, -0.3631, -0.1355, -0.2227]) tensor([0.4731, 0.1547, 0.1942, 0.1780]) -Greedy action tensor([ 0.3261, -0.0043, -0.0665, -0.5591]) tensor([0.3563, 0.2561, 0.2406, 0.1470]) -Greedy action tensor([ 0.4050, -0.0833, 0.0014, -0.0665]) tensor([0.3441, 0.2112, 0.2299, 0.2148]) -Greedy action tensor([ 0.4783, -0.0925, -0.0134, -0.2813]) tensor([0.3781, 0.2137, 0.2313, 0.1769]) -Greedy action tensor([ 0.8031, -0.3968, -0.1545, -0.4022]) tensor([0.5039, 0.1518, 0.1934, 0.1510]) -Greedy action tensor([ 0.7473, -0.1825, -0.0744, -0.0569]) tensor([0.4382, 0.1730, 0.1927, 0.1961]) -Greedy action tensor([ 1.0222, -0.9469, -0.0322, -0.5168]) tensor([0.5873, 0.0820, 0.2046, 0.1260]) -Greedy action tensor([ 0.2501, -0.0161, -0.0675, -0.0489]) tensor([0.3091, 0.2368, 0.2249, 0.2292]) -Greedy action tensor([ 0.3527, -0.0819, 0.0790, -0.1628]) tensor([0.3327, 0.2155, 0.2531, 0.1987]) -Greedy action tensor([ 0.4491, -0.2667, -0.0613, -0.3071]) tensor([0.3908, 0.1910, 0.2346, 0.1835]) -Greedy action tensor([ 0.4955, 0.0835, -0.0127, -0.2392]) tensor([0.3645, 0.2414, 0.2193, 0.1748]) -Greedy action tensor([ 0.7914, -0.6771, 0.0442, -0.2144]) tensor([0.4831, 0.1113, 0.2289, 0.1767]) -Greedy action tensor([ 1.0463, -0.9743, 0.0236, -0.6513]) tensor([0.5969, 0.0791, 0.2147, 0.1093]) -Greedy action tensor([ 0.7199, -0.1974, 0.1206, -0.1701]) tensor([0.4238, 0.1694, 0.2328, 0.1741]) -Greedy action tensor([ 0.8081, -0.5366, -0.1317, -0.4321]) tensor([0.5153, 0.1343, 0.2013, 0.1491]) -Greedy action tensor([ 0.5517, -0.3262, -0.0839, -0.0548]) tensor([0.4015, 0.1669, 0.2127, 0.2189]) -Greedy action tensor([ 0.9207, -0.5842, 0.1544, -0.6473]) tensor([0.5276, 0.1172, 0.2452, 0.1100]) -Greedy action tensor([ 0.8075, -0.2278, -0.0095, -0.2749]) tensor([0.4682, 0.1663, 0.2069, 0.1586]) -Greedy action tensor([ 0.9212, -0.0657, -0.3006, -0.0714]) tensor([0.4907, 0.1829, 0.1446, 0.1818]) -Greedy action tensor([ 0.5096, -0.1525, -0.1391, -0.4200]) tensor([0.4110, 0.2120, 0.2148, 0.1622]) -Greedy action tensor([ 0.6101, -0.2348, 0.1987, -0.4814]) tensor([0.4118, 0.1769, 0.2729, 0.1383]) -Greedy action tensor([ 0.5274, -0.2927, 0.0451, -0.2559]) tensor([0.3977, 0.1751, 0.2455, 0.1817]) -Greedy action tensor([ 0.2232, -0.1207, -0.1415, -0.4153]) tensor([0.3411, 0.2418, 0.2369, 0.1801]) -Greedy action tensor([ 1.2040, -1.3817, -0.0114, -0.5743]) tensor([0.6490, 0.0489, 0.1925, 0.1096]) -Greedy action tensor([ 0.9688, -0.4579, 0.0064, -0.5577]) tensor([0.5437, 0.1305, 0.2077, 0.1181]) -Greedy action tensor([-1.6618, -0.4763, 0.5646, 0.0377]) tensor([0.0526, 0.1721, 0.4875, 0.2878]) -Greedy action tensor([-0.9397, 0.9700, 0.1327, 0.8072]) tensor([0.0609, 0.4114, 0.1781, 0.3496]) -Greedy action tensor([-1.8840, -0.4431, 0.6314, -0.1489]) tensor([0.0430, 0.1816, 0.5317, 0.2437]) -Greedy action tensor([-1.5028, -0.3122, 0.4192, 0.1786]) tensor([0.0606, 0.1994, 0.4143, 0.3257]) -Greedy action tensor([-1.8255, -0.4827, 0.2355, -0.4588]) tensor([0.0602, 0.2306, 0.4730, 0.2362]) -Greedy action tensor([-1.8036, 0.1647, 0.4984, -0.1109]) tensor([0.0424, 0.3035, 0.4237, 0.2304]) -Greedy action tensor([-1.7859, -0.4284, 0.5791, -0.0920]) tensor([0.0477, 0.1853, 0.5076, 0.2594]) -Greedy action tensor([-1.8480, -0.4143, 0.6608, -0.0850]) tensor([0.0429, 0.1799, 0.5272, 0.2500]) -Greedy action tensor([-1.1893, -0.5027, 0.1920, 0.3090]) tensor([0.0874, 0.1737, 0.3479, 0.3911]) -Greedy action tensor([-1.6326, -0.1318, 0.5586, 0.0393]) tensor([0.0506, 0.2271, 0.4529, 0.2694]) -Greedy action tensor([-1.8922, -0.4509, 0.6416, -0.1442]) tensor([0.0424, 0.1793, 0.5346, 0.2436]) -Greedy action tensor([-1.7945, -0.4523, 0.5973, -0.1584]) tensor([0.0479, 0.1832, 0.5232, 0.2458]) -Greedy action tensor([-1.0365, -0.3412, 0.7772, 1.0547]) tensor([0.0580, 0.1163, 0.3559, 0.4697]) -Greedy action tensor([-1.7679, -0.4894, 0.5907, -0.0148]) tensor([0.0478, 0.1715, 0.5051, 0.2757]) -Greedy action tensor([-1.8449, -0.2914, 0.5755, -0.1027]) tensor([0.0441, 0.2084, 0.4959, 0.2517]) -Greedy action tensor([-1.9367, -0.4535, 0.6642, -0.1766]) tensor([0.0405, 0.1785, 0.5457, 0.2354]) -Greedy action tensor([-1.7941, 0.9270, 0.0834, -0.2811]) tensor([0.0367, 0.5572, 0.2397, 0.1665]) -Greedy action tensor([-1.0451, -0.5566, 0.2401, 0.3680]) tensor([0.0966, 0.1574, 0.3492, 0.3968]) -Greedy action tensor([-1.0582, -0.4932, 0.4688, -0.3771]) tensor([0.1071, 0.1884, 0.4930, 0.2116]) -Greedy action tensor([-1.8929, -0.3331, 0.6363, -0.1335]) tensor([0.0415, 0.1973, 0.5202, 0.2409]) -Greedy action tensor([-1.6059, -0.1096, 0.4560, -0.0414]) tensor([0.0552, 0.2466, 0.4341, 0.2640]) -Greedy action tensor([-1.6525, -1.0098, 0.0478, -0.6735]) tensor([0.0906, 0.1723, 0.4960, 0.2411]) -Greedy action tensor([-0.2620, 0.0657, 0.7960, 1.5475]) tensor([0.0879, 0.1220, 0.2532, 0.5369]) -Greedy action tensor([-1.9248, -0.3807, 0.6428, -0.1715]) tensor([0.0408, 0.1912, 0.5322, 0.2357]) -Greedy action tensor([-1.0604, 0.8267, 0.1508, 0.4423]) tensor([0.0647, 0.4272, 0.2173, 0.2908]) -Greedy action tensor([-1.1237, -0.6269, 0.2722, 0.2886]) tensor([0.0927, 0.1523, 0.3744, 0.3806]) -Greedy action tensor([-0.8836, 0.0096, 0.7912, 1.2319]) tensor([0.0586, 0.1431, 0.3126, 0.4857]) -Greedy action tensor([-1.2891, -0.6032, 0.3494, 0.2421]) tensor([0.0784, 0.1557, 0.4035, 0.3625]) -Greedy action tensor([-1.8810, -0.4579, 0.6741, -0.0995]) tensor([0.0417, 0.1732, 0.5372, 0.2478]) -Greedy action tensor([-1.4124, -0.5557, 0.4045, 0.0464]) tensor([0.0724, 0.1706, 0.4456, 0.3115]) -Greedy action tensor([-1.9088, -0.3663, 0.6304, -0.1563]) tensor([0.0415, 0.1939, 0.5254, 0.2392]) -Greedy action tensor([-1.6082, -0.6416, 0.1081, -0.6477]) tensor([0.0847, 0.2227, 0.4713, 0.2213]) -Greedy action tensor([-1.9299, -0.4461, 0.6579, -0.1749]) tensor([0.0408, 0.1800, 0.5430, 0.2361]) -Greedy action tensor([-1.9362, -0.4484, 0.6688, -0.1701]) tensor([0.0403, 0.1785, 0.5455, 0.2357]) -Greedy action tensor([-1.9185, -0.4051, 0.6473, -0.1625]) tensor([0.0411, 0.1866, 0.5345, 0.2378]) -Greedy action tensor([-0.7431, 0.1019, -0.0432, 0.5582]) tensor([0.1109, 0.2582, 0.2233, 0.4075]) -Greedy action tensor([-1.9156, -0.3804, 0.6510, -0.1559]) tensor([0.0409, 0.1897, 0.5320, 0.2374]) -Greedy action tensor([-1.6585, -0.3840, 0.5127, -0.0239]) tensor([0.0541, 0.1936, 0.4747, 0.2775]) -Greedy action tensor([-1.5898, -0.5290, 0.4881, 0.0863]) tensor([0.0581, 0.1677, 0.4638, 0.3103]) -Greedy action tensor([-1.7004, -0.7067, 0.0741, -0.3508]) tensor([0.0743, 0.2008, 0.4383, 0.2866]) -Greedy action tensor([-1.4490, -0.0543, 0.6498, 0.2721]) tensor([0.0532, 0.2148, 0.4343, 0.2977]) -Greedy action tensor([-1.3724, -0.5379, 0.6143, 0.7670]) tensor([0.0524, 0.1207, 0.3820, 0.4450]) -Greedy action tensor([-1.4160, -0.5452, 0.3489, 0.1346]) tensor([0.0717, 0.1713, 0.4189, 0.3381]) -Greedy action tensor([-1.5459, -0.3930, 0.6817, -0.4801]) tensor([0.0612, 0.1937, 0.5675, 0.1776]) -Greedy action tensor([-1.1789, 0.2615, 0.2850, -0.0202]) tensor([0.0785, 0.3317, 0.3395, 0.2502]) -Greedy action tensor([-1.6111, -0.5206, 0.4830, 0.0150]) tensor([0.0582, 0.1732, 0.4726, 0.2960]) -Greedy action tensor([-1.4154, -0.6034, 0.3827, 0.1319]) tensor([0.0715, 0.1610, 0.4316, 0.3359]) -Greedy action tensor([-1.9279, -0.7792, 0.0209, -0.3585]) tensor([0.0626, 0.1974, 0.4394, 0.3006]) -Greedy action tensor([-0.9425, -0.3336, 0.2656, -0.3505]) tensor([0.1251, 0.2300, 0.4188, 0.2261]) -Greedy action tensor([-1.3805, -0.5836, 0.4032, -0.0166]) tensor([0.0764, 0.1696, 0.4550, 0.2990]) -Greedy action tensor([-1.6084, -0.1871, 0.5798, 0.3023]) tensor([0.0480, 0.1990, 0.4284, 0.3246]) -Greedy action tensor([-1.8985, -0.4490, 0.6415, -0.1565]) tensor([0.0423, 0.1802, 0.5362, 0.2414]) -Greedy action tensor([-0.8180, 0.8007, 0.1936, -0.1518]) tensor([0.0931, 0.4697, 0.2560, 0.1812]) -Greedy action tensor([-1.7263, -0.3815, 0.5604, -0.1284]) tensor([0.0510, 0.1956, 0.5016, 0.2519]) -Greedy action tensor([-1.5515, -0.1048, 0.5289, 0.3953]) tensor([0.0493, 0.2097, 0.3952, 0.3458]) -Greedy action tensor([-1.9275, -0.4470, 0.6559, -0.1765]) tensor([0.0410, 0.1801, 0.5428, 0.2361]) -Greedy action tensor([-1.8144, -0.4343, 0.6049, -0.0910]) tensor([0.0458, 0.1822, 0.5151, 0.2569]) -Greedy action tensor([-1.8029, -0.5043, 0.6547, -0.0122]) tensor([0.0448, 0.1641, 0.5228, 0.2683]) -Greedy action tensor([-1.6300, 0.3213, 0.4128, -0.0541]) tensor([0.0486, 0.3419, 0.3746, 0.2349]) -Greedy action tensor([-1.1559, -0.3387, 0.3817, -0.1680]) tensor([0.0943, 0.2135, 0.4389, 0.2533]) -Greedy action tensor([-1.8836, -0.4344, 0.6340, -0.1431]) tensor([0.0428, 0.1824, 0.5308, 0.2440]) -Greedy action tensor([-1.8211, -0.4807, 0.6998, 0.0279]) tensor([0.0424, 0.1618, 0.5268, 0.2690]) -Greedy action tensor([-1.8996, -0.7969, 0.2389, -0.2611]) tensor([0.0567, 0.1707, 0.4809, 0.2917]) -Greedy action tensor([-1.0651, -0.6720, 1.0949, 1.4289]) tensor([0.0430, 0.0637, 0.3728, 0.5206]) -Greedy action tensor([-1.3265, -0.6499, 0.5266, 0.1814]) tensor([0.0721, 0.1419, 0.4601, 0.3258]) -Greedy action tensor([-1.1457, -0.8832, 0.3557, 1.2509]) tensor([0.0563, 0.0731, 0.2525, 0.6181]) -Greedy action tensor([-1.8374, -0.3252, 0.5773, -0.1009]) tensor([0.0446, 0.2025, 0.4994, 0.2534]) -Greedy action tensor([-1.0972, -0.5592, 0.4432, 0.9467]) tensor([0.0662, 0.1134, 0.3090, 0.5113]) -Greedy action tensor([-1.7646, -0.5184, 0.7364, 0.1395]) tensor([0.0428, 0.1487, 0.5215, 0.2871]) -Greedy action tensor([-1.4432, -0.5300, 0.4476, 0.2867]) tensor([0.0635, 0.1582, 0.4204, 0.3579]) -Greedy action tensor([-1.8153, -0.4856, 0.6905, 0.0247]) tensor([0.0429, 0.1620, 0.5252, 0.2699]) -Greedy action tensor([-1.8880, 0.0080, 0.2572, -0.2885]) tensor([0.0473, 0.3148, 0.4039, 0.2340]) -Greedy action tensor([-1.4975, -0.5281, 0.4284, 0.0977]) tensor([0.0648, 0.1709, 0.4448, 0.3195]) -Greedy action tensor([-1.6315, -0.4741, 0.6290, 0.2039]) tensor([0.0499, 0.1588, 0.4785, 0.3128]) -Greedy action tensor([-1.7755, -0.5354, 0.8098, 0.1752]) tensor([0.0404, 0.1396, 0.5359, 0.2841]) -Greedy action tensor([-1.4472, -0.5602, 0.4018, 0.1283]) tensor([0.0684, 0.1661, 0.4347, 0.3307]) -Greedy action tensor([-1.8298, -0.2594, 0.5865, -0.1335]) tensor([0.0445, 0.2140, 0.4987, 0.2427]) -Greedy action tensor([-1.6271, -0.0715, 0.4538, -0.0233]) tensor([0.0534, 0.2531, 0.4279, 0.2656]) -Greedy action tensor([-1.8048, -0.5112, 0.6815, -0.0721]) tensor([0.0448, 0.1633, 0.5384, 0.2534]) -Greedy action tensor([-1.3742, -0.2755, 0.4843, -0.6917]) tensor([0.0807, 0.2421, 0.5175, 0.1597]) -Greedy action tensor([-1.1655, -0.2910, 1.0833, 1.2494]) tensor([0.0416, 0.0996, 0.3938, 0.4650]) -Greedy action tensor([ 0.3023, -0.2214, 0.1107, -0.1144]) tensor([0.3250, 0.1925, 0.2683, 0.2142]) -Greedy action tensor([ 0.4024, 0.0197, -0.4214, 0.0920]) tensor([0.3504, 0.2390, 0.1537, 0.2569]) -Greedy action tensor([ 0.8635, -0.7409, -0.1900, 0.4947]) tensor([0.4462, 0.0897, 0.1556, 0.3086]) -Greedy action tensor([ 0.5538, -0.0036, 0.0988, -0.1786]) tensor([0.3721, 0.2131, 0.2360, 0.1789]) -Greedy action tensor([ 0.5512, -0.4647, -0.1186, 0.3599]) tensor([0.3704, 0.1341, 0.1896, 0.3059]) -Greedy action tensor([ 1.1810, -0.4835, -0.1853, 0.2084]) tensor([0.5487, 0.1039, 0.1400, 0.2075]) -Greedy action tensor([ 1.2209, -0.4735, -0.2519, 0.2236]) tensor([0.5612, 0.1031, 0.1287, 0.2070]) -Greedy action tensor([ 0.9941, 0.0496, -0.6809, 0.3819]) tensor([0.4721, 0.1836, 0.0884, 0.2559]) -Greedy action tensor([ 1.3715, -0.2922, -0.3786, 0.2857]) tensor([0.5880, 0.1114, 0.1022, 0.1985]) -Greedy action tensor([ 0.8800, -0.2972, -0.0197, 0.0761]) tensor([0.4624, 0.1425, 0.1881, 0.2070]) -Greedy action tensor([ 0.8539, 0.2397, 0.3314, -0.3183]) tensor([0.4092, 0.2214, 0.2427, 0.1267]) -Greedy action tensor([ 1.5734, -0.7387, -0.3009, 0.5849]) tensor([0.6155, 0.0610, 0.0945, 0.2291]) -Greedy action tensor([ 1.2239, -0.8784, -0.2974, 0.2038]) tensor([0.5878, 0.0718, 0.1284, 0.2119]) -Greedy action tensor([ 0.9096, -0.1122, -0.0975, 0.2730]) tensor([0.4436, 0.1597, 0.1620, 0.2347]) -Greedy action tensor([ 0.9129, 0.0510, -0.0346, 0.0847]) tensor([0.4451, 0.1880, 0.1726, 0.1944]) -Greedy action tensor([ 1.4223, -0.2247, -0.3044, 0.7191]) tensor([0.5361, 0.1033, 0.0953, 0.2653]) -Greedy action tensor([ 1.4401, -0.7379, -0.4369, 0.1131]) tensor([0.6529, 0.0740, 0.0999, 0.1732]) -Greedy action tensor([ 1.8037, -1.0515, -0.2429, 0.3298]) tensor([0.7063, 0.0406, 0.0912, 0.1618]) -Greedy action tensor([ 1.1601, 0.1289, -0.1419, 0.3655]) tensor([0.4807, 0.1714, 0.1307, 0.2172]) -Greedy action tensor([ 1.7364, -0.4112, -0.7161, 0.6265]) tensor([0.6526, 0.0762, 0.0562, 0.2151]) -Greedy action tensor([ 0.8036, -0.1087, -0.0512, -0.2167]) tensor([0.4571, 0.1836, 0.1945, 0.1648]) -Greedy action tensor([ 1.0213e+00, -3.5761e-01, -1.2177e-01, 2.6079e-04]) tensor([0.5179, 0.1304, 0.1651, 0.1866]) -Greedy action tensor([ 0.8971, -0.3801, -0.5426, 0.5409]) tensor([0.4512, 0.1258, 0.1069, 0.3160]) -Greedy action tensor([ 1.1322, -0.6101, -0.1018, 0.5060]) tensor([0.4998, 0.0875, 0.1455, 0.2672]) -Greedy action tensor([ 0.7230, -0.3665, -0.1375, -0.1202]) tensor([0.4567, 0.1536, 0.1932, 0.1965]) -Greedy action tensor([ 1.3330, -0.6968, -0.1257, 0.2616]) tensor([0.5860, 0.0770, 0.1363, 0.2007]) -Greedy action tensor([ 1.3811, -0.5650, -0.2975, 0.4250]) tensor([0.5835, 0.0833, 0.1089, 0.2243]) -Greedy action tensor([ 0.9430, 0.0563, -0.0042, -0.2934]) tensor([0.4784, 0.1971, 0.1855, 0.1389]) -Greedy action tensor([ 0.9625, -0.4865, -0.5371, 0.7920]) tensor([0.4345, 0.1020, 0.0970, 0.3664]) -Greedy action tensor([ 0.8525, -0.3999, -0.5983, 0.5914]) tensor([0.4366, 0.1248, 0.1023, 0.3363]) -Greedy action tensor([ 1.7644, -0.7333, -0.1918, 0.2842]) tensor([0.6890, 0.0567, 0.0974, 0.1568]) -Greedy action tensor([ 1.9992, -0.7326, -0.6385, 0.1419]) tensor([0.7736, 0.0504, 0.0553, 0.1208]) -Greedy action tensor([ 1.6010, -0.4590, -0.3660, 0.1808]) tensor([0.6627, 0.0845, 0.0927, 0.1602]) -Greedy action tensor([ 2.4018, -0.2081, -0.6039, 0.2568]) tensor([0.8064, 0.0593, 0.0399, 0.0944]) -Greedy action tensor([ 0.2634, 0.0167, 0.1442, -0.0846]) tensor([0.2963, 0.2315, 0.2630, 0.2092]) -Greedy action tensor([ 0.9673, -0.4514, -0.1387, 0.1457]) tensor([0.4969, 0.1203, 0.1644, 0.2185]) -Greedy action tensor([ 0.8837, -0.1372, -0.1475, -0.0284]) tensor([0.4720, 0.1701, 0.1683, 0.1896]) -Greedy action tensor([ 1.6962, -0.1400, -0.4389, 0.5036]) tensor([0.6325, 0.1008, 0.0748, 0.1919]) -Greedy action tensor([ 1.3410, -0.3222, -0.1174, 0.1851]) tensor([0.5757, 0.1091, 0.1339, 0.1812]) -Greedy action tensor([ 1.8496, -0.5865, 0.1117, 0.4474]) tensor([0.6625, 0.0580, 0.1165, 0.1630]) -Greedy action tensor([ 1.2147, -0.4560, -0.2357, 0.6073]) tensor([0.5083, 0.0956, 0.1192, 0.2769]) -Greedy action tensor([ 1.2607, -0.2498, -0.3585, 0.0400]) tensor([0.5835, 0.1288, 0.1156, 0.1721]) -Greedy action tensor([ 1.4082, -0.3164, -0.2912, 0.5863]) tensor([0.5553, 0.0990, 0.1015, 0.2441]) -Greedy action tensor([ 1.6097, -0.0572, -0.4051, 0.0649]) tensor([0.6512, 0.1230, 0.0868, 0.1389]) -Greedy action tensor([ 1.4748, -0.5122, 0.0719, 0.0513]) tensor([0.6158, 0.0844, 0.1514, 0.1483]) -Greedy action tensor([ 1.3118, 0.1348, -0.7247, 0.4207]) tensor([0.5409, 0.1667, 0.0706, 0.2219]) -Greedy action tensor([ 0.7772, -0.5063, -0.3022, 0.1253]) tensor([0.4677, 0.1296, 0.1589, 0.2437]) -Greedy action tensor([ 1.0950, -0.4293, -0.4694, 0.3465]) tensor([0.5263, 0.1146, 0.1101, 0.2490]) -Greedy action tensor([ 0.8781, -0.3518, -0.1129, 0.1390]) tensor([0.4671, 0.1365, 0.1734, 0.2230]) -Greedy action tensor([ 0.7712, 0.1994, -0.1459, 0.1762]) tensor([0.3975, 0.2244, 0.1589, 0.2193]) -Greedy action tensor([ 1.2083, -0.1356, -0.6260, 0.1319]) tensor([0.5677, 0.1481, 0.0907, 0.1935]) -Greedy action tensor([ 1.1278, -0.6576, -0.4861, 0.9077]) tensor([0.4610, 0.0773, 0.0918, 0.3699]) -Greedy action tensor([ 2.2606, -0.8581, -0.0803, 1.1224]) tensor([0.6845, 0.0303, 0.0659, 0.2193]) -Greedy action tensor([ 0.7374, -0.4614, -0.0895, -0.1004]) tensor([0.4605, 0.1389, 0.2014, 0.1992]) -Greedy action tensor([ 1.9611, -0.1662, -0.3029, 0.3937]) tensor([0.6985, 0.0832, 0.0726, 0.1457]) -Greedy action tensor([1.2718, 0.1368, 0.0440, 0.3521]) tensor([0.4968, 0.1597, 0.1455, 0.1980]) -Greedy action tensor([ 1.2041, -0.6905, -0.1992, 0.2606]) tensor([0.5601, 0.0842, 0.1377, 0.2180]) -Greedy action tensor([ 0.6212, -0.0563, -0.1993, 0.1115]) tensor([0.3924, 0.1993, 0.1727, 0.2357]) -Greedy action tensor([ 1.0852, -0.3205, -0.0387, 0.1184]) tensor([0.5127, 0.1257, 0.1666, 0.1950]) -Greedy action tensor([ 1.5451, -0.5529, -0.1017, 0.2112]) tensor([0.6334, 0.0777, 0.1220, 0.1669]) -Greedy action tensor([ 0.8498, -0.3232, -0.1264, 0.4276]) tensor([0.4270, 0.1321, 0.1609, 0.2800]) -Greedy action tensor([ 1.7927, -0.7781, -0.3651, 0.5789]) tensor([0.6715, 0.0514, 0.0776, 0.1995]) -Greedy action tensor([ 1.1008, -0.2357, -0.1277, 0.1022]) tensor([0.5198, 0.1366, 0.1521, 0.1915]) -Greedy action tensor([ 0.5498, -0.0107, 0.1719, -0.3774]) tensor([0.3771, 0.2153, 0.2584, 0.1492]) -Greedy action tensor([ 1.4590, -0.2391, -0.1488, 0.5028]) tensor([0.5657, 0.1035, 0.1133, 0.2174]) -Greedy action tensor([ 1.6823, -0.8131, -0.1706, 0.6692]) tensor([0.6241, 0.0515, 0.0978, 0.2266]) -Greedy action tensor([ 1.3855, -0.4485, -0.5360, 0.3547]) tensor([0.6014, 0.0961, 0.0880, 0.2145]) -Greedy action tensor([ 0.7521, -0.3861, -0.3459, 0.6272]) tensor([0.3942, 0.1263, 0.1315, 0.3480]) -Greedy action tensor([ 1.9419, -0.4649, -0.3934, 0.2931]) tensor([0.7251, 0.0653, 0.0702, 0.1394]) -Greedy action tensor([ 1.1940, -0.2756, -0.0250, 0.3254]) tensor([0.5141, 0.1183, 0.1519, 0.2157]) -Greedy action tensor([ 1.0939, -0.3888, 0.0368, 0.1994]) tensor([0.5042, 0.1145, 0.1752, 0.2061]) -Greedy action tensor([ 0.9573, -0.5245, -0.3323, 0.4394]) tensor([0.4765, 0.1083, 0.1312, 0.2839]) -Greedy action tensor([ 0.8867, -0.4680, -0.0828, 0.1947]) tensor([0.4678, 0.1207, 0.1774, 0.2341]) -Greedy action tensor([ 1.6015, -0.9606, 0.2411, 0.1667]) tensor([0.6362, 0.0491, 0.1632, 0.1515]) -Greedy action tensor([ 1.0033, -0.2647, -0.6403, 0.5687]) tensor([0.4712, 0.1326, 0.0911, 0.3051]) -Greedy action tensor([ 1.1801, -0.2505, -0.0336, 0.1405]) tensor([0.5291, 0.1266, 0.1572, 0.1871]) -Greedy action tensor([ 1.8788, -1.3388, -0.1688, 0.1612]) tensor([0.7415, 0.0297, 0.0957, 0.1331]) -Greedy action tensor([ 2.1146, -0.3816, 0.0949, 0.1410]) tensor([0.7385, 0.0609, 0.0980, 0.1026]) -Greedy action tensor([ 0.6934, -0.3510, -0.0874, -0.1442]) tensor([0.4459, 0.1569, 0.2042, 0.1930]) -Greedy action tensor([ 1.9127, -0.3419, -0.6179, 0.3332]) tensor([0.7191, 0.0754, 0.0573, 0.1482]) -Greedy action tensor([ 1.1332, -0.5261, -0.6517, 0.4662]) tensor([0.5344, 0.1017, 0.0897, 0.2743]) -Greedy action tensor([-0.0868, -0.2961, -0.4128, -0.1088]) tensor([0.2848, 0.2310, 0.2056, 0.2786]) -Greedy action tensor([ 0.3037, -1.5365, -0.0521, 1.6341]) tensor([0.1772, 0.0281, 0.1242, 0.6704]) -Greedy action tensor([ 0.3415, -1.4197, 1.2248, 2.1738]) tensor([0.1016, 0.0175, 0.2458, 0.6350]) -Greedy action tensor([ 1.3080, -0.4426, -0.0114, 1.4316]) tensor([0.3887, 0.0675, 0.1039, 0.4399]) -Greedy action tensor([ 0.5810, -0.4232, 0.8137, -0.1276]) tensor([0.3204, 0.1174, 0.4044, 0.1578]) -Greedy action tensor([ 1.2346, -1.0057, 0.7982, 0.9865]) tensor([0.3948, 0.0420, 0.2552, 0.3080]) -Greedy action tensor([-0.8687, 0.1220, 0.3608, -0.2786]) tensor([0.1122, 0.3020, 0.3835, 0.2023]) -Greedy action tensor([ 0.2178, -2.0772, -0.7652, 0.3234]) tensor([0.3866, 0.0390, 0.1447, 0.4297]) -Greedy action tensor([-1.0540, -0.9276, 1.6698, -0.5305]) tensor([0.0525, 0.0595, 0.7995, 0.0886]) -Greedy action tensor([-0.2618, -0.1933, -0.2024, 0.0302]) tensor([0.2237, 0.2395, 0.2373, 0.2995]) -Greedy action tensor([ 0.9218, -0.5745, 0.2373, 0.6417]) tensor([0.4026, 0.0902, 0.2030, 0.3042]) -Greedy action tensor([ 0.5486, 0.8671, -0.3757, 1.4225]) tensor([0.1935, 0.2661, 0.0768, 0.4637]) -Greedy action tensor([-0.4148, -0.0379, -0.7856, 0.4337]) tensor([0.1823, 0.2658, 0.1259, 0.4260]) -Greedy action tensor([1.2831, 0.6784, 0.7205, 0.1447]) tensor([0.4105, 0.2242, 0.2338, 0.1315]) -Greedy action tensor([ 0.6649, 0.4765, -0.6598, 1.0744]) tensor([0.2778, 0.2301, 0.0739, 0.4183]) -Greedy action tensor([ 0.5799, -1.5016, -0.0694, 1.1698]) tensor([0.2898, 0.0361, 0.1514, 0.5227]) -Greedy action tensor([-0.6568, -0.2999, -0.6020, 1.2130]) tensor([0.1003, 0.1433, 0.1059, 0.6505]) -Greedy action tensor([ 1.4822, -0.9225, 1.0656, 1.0941]) tensor([0.4119, 0.0372, 0.2715, 0.2794]) -Greedy action tensor([ 1.0262, -0.1609, 1.3790, 0.6937]) tensor([0.2903, 0.0886, 0.4131, 0.2081]) -Greedy action tensor([ 0.9357, -0.2818, -0.2414, 0.3233]) tensor([0.4659, 0.1379, 0.1436, 0.2526]) -Greedy action tensor([-0.6002, -1.2611, 2.1683, -0.1063]) tensor([0.0524, 0.0271, 0.8347, 0.0858]) -Greedy action tensor([ 0.3497, -0.6659, 0.5654, -0.0393]) tensor([0.3048, 0.1104, 0.3782, 0.2066]) -Greedy action tensor([-0.4917, 0.0769, 0.7939, 1.0875]) tensor([0.0890, 0.1572, 0.3220, 0.4318]) -Greedy action tensor([ 0.6673, -1.1010, 0.9869, 0.1610]) tensor([0.3175, 0.0542, 0.4370, 0.1913]) -Greedy action tensor([ 0.9348, -0.2530, 0.3093, -0.6244]) tensor([0.4878, 0.1487, 0.2609, 0.1026]) -Greedy action tensor([-0.6548, -1.8947, -0.1754, 0.9472]) tensor([0.1271, 0.0368, 0.2053, 0.6308]) -Greedy action tensor([ 1.9913, 0.6165, -0.4270, 0.4729]) tensor([0.6406, 0.1620, 0.0571, 0.1403]) -Greedy action tensor([-0.7237, 0.3140, 0.2033, -0.6693]) tensor([0.1350, 0.3812, 0.3412, 0.1426]) -Greedy action tensor([ 2.0190, -0.3494, 0.8766, 1.7721]) tensor([0.4558, 0.0427, 0.1454, 0.3561]) -Greedy action tensor([-0.0874, 0.0365, -0.1700, -0.5865]) tensor([0.2733, 0.3093, 0.2516, 0.1659]) -Greedy action tensor([ 0.6187, -0.5385, -0.6181, 1.2412]) tensor([0.2883, 0.0906, 0.0837, 0.5373]) -Greedy action tensor([-0.6998, -0.1806, -0.5256, 1.2110]) tensor([0.0941, 0.1581, 0.1120, 0.6358]) -Greedy action tensor([ 1.8412, -1.6175, 0.8826, 0.4704]) tensor([0.5992, 0.0189, 0.2298, 0.1521]) -Greedy action tensor([ 0.1404, -0.7763, 0.1269, 0.0891]) tensor([0.2997, 0.1198, 0.2957, 0.2847]) -Greedy action tensor([ 0.5400, -1.3949, 0.0887, -0.2837]) tensor([0.4504, 0.0651, 0.2868, 0.1977]) -Greedy action tensor([ 1.6245, -0.5870, 1.1157, 0.9652]) tensor([0.4488, 0.0492, 0.2698, 0.2322]) -Greedy action tensor([ 1.3496, -1.1262, 0.0192, 1.5076]) tensor([0.3969, 0.0334, 0.1049, 0.4648]) -Greedy action tensor([ 0.0923, -1.3271, -0.3167, 0.2178]) tensor([0.3290, 0.0796, 0.2185, 0.3729]) -Greedy action tensor([ 1.3044, -1.2243, 0.4929, 0.7073]) tensor([0.4821, 0.0385, 0.2141, 0.2653]) -Greedy action tensor([ 0.7263, -0.3616, -0.0079, 0.6242]) tensor([0.3677, 0.1239, 0.1765, 0.3320]) -Greedy action tensor([ 0.5138, -0.0772, 0.0498, -0.1656]) tensor([0.3718, 0.2059, 0.2338, 0.1885]) -Greedy action tensor([-1.2763, 0.2418, 0.4821, -1.0318]) tensor([0.0791, 0.3609, 0.4590, 0.1010]) -Greedy action tensor([ 0.6455, -0.4184, -0.4461, 1.4300]) tensor([0.2583, 0.0891, 0.0867, 0.5659]) -Greedy action tensor([ 0.9874, 0.3368, 0.6275, -0.1074]) tensor([0.3915, 0.2043, 0.2732, 0.1310]) -Greedy action tensor([ 1.9025, -0.6554, 0.1743, 0.7651]) tensor([0.6346, 0.0492, 0.1127, 0.2035]) -Greedy action tensor([-0.4635, 0.8469, 1.2007, 0.0663]) tensor([0.0856, 0.3172, 0.4519, 0.1453]) -Greedy action tensor([-0.6956, -0.5790, 0.6252, -0.0862]) tensor([0.1297, 0.1458, 0.4860, 0.2386]) -Greedy action tensor([-0.4002, -0.3979, -0.2383, 0.6756]) tensor([0.1637, 0.1640, 0.1924, 0.4799]) -Greedy action tensor([ 1.3098, -1.5779, 1.4778, 0.3215]) tensor([0.3830, 0.0213, 0.4531, 0.1426]) -Greedy action tensor([-0.5660, -0.9981, -0.0269, 0.1126]) tensor([0.1874, 0.1217, 0.3214, 0.3695]) -Greedy action tensor([ 1.0762, 0.8563, -0.1895, -0.3275]) tensor([0.4291, 0.3444, 0.1210, 0.1054]) -Greedy action tensor([ 0.2950, -0.3903, 0.2346, 0.9147]) tensor([0.2324, 0.1171, 0.2187, 0.4318]) -Greedy action tensor([-0.5167, -0.9156, -0.1584, 0.6381]) tensor([0.1593, 0.1069, 0.2280, 0.5057]) -Greedy action tensor([ 2.5130, -0.3890, 1.0506, 0.4359]) tensor([0.7083, 0.0389, 0.1641, 0.0887]) -Greedy action tensor([-0.7235, -0.6864, 0.2935, -1.3329]) tensor([0.1870, 0.1941, 0.5171, 0.1017]) -Greedy action tensor([-0.4578, 0.7787, -0.4628, 0.9095]) tensor([0.1068, 0.3678, 0.1063, 0.4192]) -Greedy action tensor([-0.3252, 0.8196, 1.1684, -0.5327]) tensor([0.1063, 0.3340, 0.4733, 0.0864]) -Greedy action tensor([ 0.3970, -0.4627, -0.2292, 1.1564]) tensor([0.2442, 0.1034, 0.1306, 0.5219]) -Greedy action tensor([ 2.0719, -0.1166, 0.2590, -0.7788]) tensor([0.7501, 0.0841, 0.1224, 0.0434]) -Greedy action tensor([ 0.3151, -1.0880, 2.1937, -0.1969]) tensor([0.1192, 0.0293, 0.7801, 0.0714]) -Greedy action tensor([ 0.5206, -0.0098, 0.3367, 1.0324]) tensor([0.2446, 0.1439, 0.2035, 0.4080]) -Greedy action tensor([ 0.4381, -0.5203, -0.0172, 2.0834]) tensor([0.1389, 0.0533, 0.0881, 0.7198]) -Greedy action tensor([-0.4969, -1.9285, -0.0422, 0.3685]) tensor([0.1927, 0.0460, 0.3036, 0.4577]) -Greedy action tensor([0.6340, 0.4481, 0.6169, 0.0989]) tensor([0.2942, 0.2443, 0.2892, 0.1723]) -Greedy action tensor([ 1.5630, -0.5826, 0.4516, 0.5726]) tensor([0.5502, 0.0644, 0.1811, 0.2044]) -Greedy action tensor([ 1.2444, -0.8734, -0.1907, 0.8997]) tensor([0.4838, 0.0582, 0.1152, 0.3428]) -Greedy action tensor([ 1.3069, 0.0638, -0.4272, 0.9955]) tensor([0.4551, 0.1313, 0.0803, 0.3333]) -Greedy action tensor([-0.1811, -1.9191, 0.9009, -0.1324]) tensor([0.1932, 0.0340, 0.5700, 0.2028]) -Greedy action tensor([-0.3433, -0.9219, 1.3287, -0.3284]) tensor([0.1266, 0.0710, 0.6739, 0.1285]) -Greedy action tensor([-0.1197, -0.5374, 0.1557, 1.5840]) tensor([0.1181, 0.0778, 0.1555, 0.6487]) -Greedy action tensor([ 1.6510, 1.7322, -0.3216, 0.8950]) tensor([0.3713, 0.4027, 0.0516, 0.1743]) -Greedy action tensor([ 0.2192, -1.1082, -0.4963, 0.2166]) tensor([0.3635, 0.0964, 0.1777, 0.3625]) -Greedy action tensor([ 2.0682, -0.2551, 0.8704, 2.2309]) tensor([0.3881, 0.0380, 0.1172, 0.4567]) -Greedy action tensor([-0.2855, -1.0612, 0.5779, 1.2819]) tensor([0.1159, 0.0534, 0.2749, 0.5558]) -Greedy action tensor([ 1.3108, -1.3146, -0.0823, 1.2598]) tensor([0.4403, 0.0319, 0.1093, 0.4184]) -Greedy action tensor([-0.4425, -0.4684, -0.0645, -1.0145]) tensor([0.2501, 0.2437, 0.3650, 0.1412]) -Greedy action tensor([-1.3614, -0.9846, -0.3534, -0.8091]) tensor([0.1442, 0.2102, 0.3951, 0.2505]) -Greedy action tensor([ 0.5621, 0.4425, -1.1930, 2.1288]) tensor([0.1460, 0.1295, 0.0252, 0.6993]) -Greedy action tensor([0.3020, 0.0655, 1.2373, 0.1695]) tensor([0.1918, 0.1514, 0.4887, 0.1680]) -Greedy action tensor([ 1.1269, -0.9395, 1.3513, -0.2115]) tensor([0.3787, 0.0480, 0.4740, 0.0993]) -Greedy action tensor([-1.4660, -0.1825, 0.7717, -0.7181]) tensor([0.0621, 0.2243, 0.5823, 0.1313]) -Greedy action tensor([ 0.8670, 0.9211, -0.1396, -0.8069]) tensor([0.3834, 0.4047, 0.1401, 0.0719]) -Greedy action tensor([ 0.2816, -0.0986, -0.0348, -0.2133]) tensor([0.3309, 0.2262, 0.2412, 0.2017]) -Greedy action tensor([ 0.7025, -0.2958, -0.1895, -0.3313]) tensor([0.4686, 0.1727, 0.1921, 0.1667]) -Greedy action tensor([ 0.5089, 0.0977, -0.0918, -0.1962]) tensor([0.3696, 0.2450, 0.2027, 0.1826]) -Greedy action tensor([ 0.7161, -0.5002, -0.1417, -0.5383]) tensor([0.4986, 0.1477, 0.2115, 0.1422]) -Greedy action tensor([ 0.3576, -0.2943, 0.1078, -0.2967]) tensor([0.3546, 0.1848, 0.2762, 0.1843]) -Greedy action tensor([ 0.4360, -0.0961, 0.0336, -0.2801]) tensor([0.3643, 0.2140, 0.2436, 0.1780]) -Greedy action tensor([ 0.3495, 0.0471, 0.0306, -0.3379]) tensor([0.3368, 0.2489, 0.2449, 0.1694]) -Greedy action tensor([ 1.0887, -0.6617, -0.1450, -0.5461]) tensor([0.6024, 0.1046, 0.1754, 0.1175]) -Greedy action tensor([0.3457, 0.0995, 0.0841, 0.1441]) tensor([0.2968, 0.2320, 0.2285, 0.2426]) -Greedy action tensor([ 0.6015, -0.1100, -0.0462, -0.3492]) tensor([0.4165, 0.2045, 0.2180, 0.1610]) -Greedy action tensor([ 0.6454, -0.0261, -0.1393, -0.2517]) tensor([0.4211, 0.2151, 0.1921, 0.1717]) -Greedy action tensor([ 0.4042, -0.0810, -0.1095, -0.0710]) tensor([0.3527, 0.2171, 0.2110, 0.2193]) -Greedy action tensor([ 0.3603, -0.0571, -0.0239, -0.2981]) tensor([0.3500, 0.2305, 0.2383, 0.1812]) -Greedy action tensor([ 0.8180, -0.7842, -0.1380, -0.2296]) tensor([0.5163, 0.1040, 0.1985, 0.1811]) -Greedy action tensor([ 0.3932, -0.0604, 0.0837, -0.2844]) tensor([0.3476, 0.2208, 0.2551, 0.1765]) -Greedy action tensor([ 0.7322, -0.6076, 0.0065, -0.2125]) tensor([0.4685, 0.1227, 0.2267, 0.1821]) -Greedy action tensor([ 0.9016, -0.4788, -0.1103, -0.2938]) tensor([0.5215, 0.1311, 0.1896, 0.1578]) -Greedy action tensor([ 0.6728, -0.5104, 0.0142, -0.3236]) tensor([0.4560, 0.1397, 0.2360, 0.1684]) -Greedy action tensor([ 0.6560, -0.5524, -0.0742, -0.3524]) tensor([0.4662, 0.1392, 0.2246, 0.1700]) -Greedy action tensor([ 0.7702, -0.4203, -0.0673, -0.3954]) tensor([0.4881, 0.1484, 0.2113, 0.1522]) -Greedy action tensor([ 0.4146, -0.1277, 0.1061, -0.2464]) tensor([0.3531, 0.2053, 0.2594, 0.1823]) -Greedy action tensor([ 0.7537, -0.4467, -0.0681, -0.2758]) tensor([0.4767, 0.1435, 0.2096, 0.1703]) -Greedy action tensor([ 0.5314, -0.5105, -0.1198, -0.3507]) tensor([0.4370, 0.1542, 0.2279, 0.1809]) -Greedy action tensor([ 0.5373, -0.3219, 0.0763, -0.2632]) tensor([0.3995, 0.1692, 0.2519, 0.1794]) -Greedy action tensor([ 0.5650, 0.3138, -0.1769, 0.1372]) tensor([0.3441, 0.2677, 0.1639, 0.2243]) -Greedy action tensor([ 0.5635, -0.1912, 0.0830, -0.4329]) tensor([0.4069, 0.1913, 0.2516, 0.1502]) -Greedy action tensor([ 0.4667, 0.0306, -0.0483, -0.0686]) tensor([0.3534, 0.2285, 0.2112, 0.2069]) -Greedy action tensor([ 0.3764, -0.4107, -0.1315, -0.3799]) tensor([0.3958, 0.1802, 0.2382, 0.1858]) -Greedy action tensor([ 0.2542, -0.0096, -0.2834, -0.1547]) tensor([0.3315, 0.2546, 0.1936, 0.2202]) -Greedy action tensor([ 0.5810, -0.3201, 0.1848, -0.2551]) tensor([0.3980, 0.1617, 0.2678, 0.1725]) -Greedy action tensor([ 0.5489, -0.1899, 0.0105, -0.3429]) tensor([0.4046, 0.1933, 0.2362, 0.1659]) -Greedy action tensor([ 0.6131, -0.4960, -0.1157, -0.5086]) tensor([0.4677, 0.1543, 0.2257, 0.1523]) -Greedy action tensor([ 0.4880, -0.3890, -0.1874, -0.5112]) tensor([0.4361, 0.1814, 0.2220, 0.1606]) -Greedy action tensor([ 0.5954, -0.4337, 0.0904, -0.1600]) tensor([0.4114, 0.1470, 0.2483, 0.1933]) -Greedy action tensor([ 0.6268, -0.4372, -0.1172, -0.5272]) tensor([0.4682, 0.1616, 0.2225, 0.1477]) -Greedy action tensor([ 0.3526, -0.1371, -0.0904, -0.1848]) tensor([0.3522, 0.2158, 0.2262, 0.2058]) -Greedy action tensor([ 0.8287, -0.5385, -0.1732, -0.3981]) tensor([0.5221, 0.1330, 0.1917, 0.1531]) -Greedy action tensor([ 0.3446, 0.0401, -0.0849, -0.1263]) tensor([0.3319, 0.2448, 0.2160, 0.2073]) -Greedy action tensor([ 0.7516, -0.1195, -0.0743, -0.1575]) tensor([0.4426, 0.1852, 0.1938, 0.1783]) -Greedy action tensor([ 0.4800, -0.2501, -0.0302, -0.3171]) tensor([0.3948, 0.1902, 0.2370, 0.1779]) -Greedy action tensor([ 0.4983, -0.5077, -0.1345, -0.3279]) tensor([0.4284, 0.1566, 0.2275, 0.1875]) -Greedy action tensor([ 1.2485, -0.6380, 0.1475, -0.4050]) tensor([0.5968, 0.0905, 0.1985, 0.1142]) -Greedy action tensor([ 0.6929, -0.3880, -0.0234, -0.3457]) tensor([0.4583, 0.1555, 0.2239, 0.1622]) -Greedy action tensor([ 0.8575, -0.9001, -0.3036, -0.7556]) tensor([0.5935, 0.1024, 0.1859, 0.1183]) -Greedy action tensor([ 0.5538, -0.2767, 0.0243, -0.4481]) tensor([0.4181, 0.1822, 0.2462, 0.1535]) -Greedy action tensor([ 0.7896, -0.2053, -0.0342, -0.0527]) tensor([0.4466, 0.1651, 0.1959, 0.1924]) -Greedy action tensor([ 0.4183, 0.0489, -0.0235, -0.1267]) tensor([0.3432, 0.2372, 0.2206, 0.1990]) -Greedy action tensor([ 0.4424, -0.2108, 0.0502, -0.2027]) tensor([0.3676, 0.1913, 0.2483, 0.1928]) -Greedy action tensor([ 0.5818, -0.2336, 0.0548, -0.2759]) tensor([0.4070, 0.1801, 0.2403, 0.1726]) -Greedy action tensor([ 0.3351, -0.0981, -0.1428, -0.2742]) tensor([0.3556, 0.2306, 0.2205, 0.1933]) -Greedy action tensor([ 0.4670, 0.0148, -0.0788, -0.1636]) tensor([0.3639, 0.2315, 0.2108, 0.1937]) -Greedy action tensor([ 1.1152, -0.6453, -0.0235, -0.7646]) tensor([0.6080, 0.1046, 0.1947, 0.0928]) -Greedy action tensor([ 0.7582, -0.4985, -0.0980, -0.2761]) tensor([0.4843, 0.1378, 0.2057, 0.1722]) -Greedy action tensor([ 0.9419, -0.3628, -0.0085, -0.4062]) tensor([0.5215, 0.1415, 0.2016, 0.1354]) -Greedy action tensor([ 0.5196, -0.1241, -0.0462, -0.2769]) tensor([0.3931, 0.2065, 0.2232, 0.1772]) -Greedy action tensor([ 0.5218, -0.1809, -0.0340, -0.2002]) tensor([0.3915, 0.1939, 0.2245, 0.1901]) -Greedy action tensor([ 0.4500, -0.4834, -0.1202, -0.5496]) tensor([0.4298, 0.1690, 0.2430, 0.1582]) -Greedy action tensor([ 1.0964, -1.0715, 0.0076, -0.6460]) tensor([0.6150, 0.0704, 0.2070, 0.1077]) -Greedy action tensor([ 0.3327, 0.3935, -0.2158, -0.1150]) tensor([0.3049, 0.3240, 0.1762, 0.1949]) -Greedy action tensor([ 0.4710, 0.1908, -0.1185, -0.0990]) tensor([0.3477, 0.2628, 0.1929, 0.1967]) -Greedy action tensor([ 0.3327, -0.0968, 0.0593, -0.1126]) tensor([0.3276, 0.2132, 0.2493, 0.2099]) -Greedy action tensor([ 0.7391, -0.5434, -0.2159, -0.3993]) tensor([0.5044, 0.1399, 0.1941, 0.1616]) -Greedy action tensor([ 0.7511, -0.4276, -0.0527, -0.2042]) tensor([0.4673, 0.1438, 0.2092, 0.1798]) -Greedy action tensor([ 0.7103, -0.3801, 0.0713, -0.4352]) tensor([0.4583, 0.1540, 0.2419, 0.1458]) -Greedy action tensor([ 0.3752, -0.0888, -0.1093, -0.1143]) tensor([0.3499, 0.2200, 0.2156, 0.2145]) -Greedy action tensor([ 0.7090, -0.6966, -0.0939, -0.5563]) tensor([0.5062, 0.1241, 0.2268, 0.1428]) -Greedy action tensor([ 0.7420, -0.2938, -0.0305, -0.4922]) tensor([0.4744, 0.1684, 0.2191, 0.1381]) -Greedy action tensor([ 0.5927, -0.0650, -0.0143, -0.0339]) tensor([0.3850, 0.1994, 0.2098, 0.2057]) -Greedy action tensor([ 0.7960, -0.1417, -0.0964, -0.2749]) tensor([0.4664, 0.1826, 0.1911, 0.1598]) -Greedy action tensor([ 0.4712, 0.0211, -0.1450, -0.0496]) tensor([0.3608, 0.2300, 0.1948, 0.2143]) -Greedy action tensor([ 0.5454, -0.4235, -0.0875, -0.5134]) tensor([0.4430, 0.1681, 0.2352, 0.1537]) -Greedy action tensor([ 0.6920, -0.3643, -0.0925, -0.2729]) tensor([0.4576, 0.1591, 0.2088, 0.1744]) -Greedy action tensor([ 0.9140, -0.5442, -0.2397, -0.3882]) tensor([0.5494, 0.1278, 0.1733, 0.1494]) -Greedy action tensor([ 0.2856, 0.0896, -0.1097, 0.0086]) tensor([0.3074, 0.2527, 0.2070, 0.2330]) -Greedy action tensor([ 0.3880, -0.1678, -0.0148, -0.3850]) tensor([0.3699, 0.2122, 0.2472, 0.1707]) -Greedy action tensor([ 0.1959, -0.0834, 0.1929, -0.2447]) tensor([0.2944, 0.2226, 0.2935, 0.1895]) -Greedy action tensor([ 0.4272, -0.1787, -0.0426, -0.2126]) tensor([0.3706, 0.2022, 0.2317, 0.1955]) -Greedy action tensor([ 0.9032, -0.4357, -0.0482, -0.6294]) tensor([0.5364, 0.1406, 0.2071, 0.1158]) -Greedy action tensor([ 0.5742, -0.1168, -0.0733, -0.3884]) tensor([0.4156, 0.2082, 0.2175, 0.1587]) -Greedy action tensor([ 0.6913, -0.2318, -0.0438, -0.3568]) tensor([0.4490, 0.1784, 0.2153, 0.1574]) -Greedy action tensor([ 0.7788, -0.7922, -0.0222, -0.3479]) tensor([0.5048, 0.1049, 0.2266, 0.1636]) -Greedy action tensor([ 1.2555, -0.3084, -0.5307, 0.4080]) tensor([0.5539, 0.1159, 0.0928, 0.2373]) -Greedy action tensor([ 1.5425, -0.5196, -0.2759, 0.3191]) tensor([0.6314, 0.0803, 0.1025, 0.1858]) -Greedy action tensor([ 0.3436, 0.0980, 0.1281, -0.1405]) tensor([0.3120, 0.2441, 0.2516, 0.1923]) -Greedy action tensor([ 1.9989, -0.8581, -0.1032, 0.2149]) tensor([0.7421, 0.0426, 0.0907, 0.1246]) -Greedy action tensor([ 0.6903, -0.3196, -0.1683, 0.4001]) tensor([0.3943, 0.1436, 0.1671, 0.2950]) -Greedy action tensor([ 0.7135, -0.4347, -0.4072, 0.0450]) tensor([0.4639, 0.1472, 0.1513, 0.2377]) -Greedy action tensor([ 0.2513, 0.0653, 0.1833, -0.0509]) tensor([0.2854, 0.2370, 0.2667, 0.2110]) -Greedy action tensor([ 1.0269, -0.6277, -0.6992, 0.6719]) tensor([0.4830, 0.0923, 0.0860, 0.3387]) -Greedy action tensor([ 0.1679, -0.2811, -0.1614, 0.3884]) tensor([0.2774, 0.1771, 0.1996, 0.3459]) -Greedy action tensor([ 2.7378, -1.7410, -0.4303, 1.0033]) tensor([0.8131, 0.0092, 0.0342, 0.1435]) -Greedy action tensor([ 0.9848, -0.0556, -0.3119, 0.1644]) tensor([0.4838, 0.1709, 0.1323, 0.2130]) -Greedy action tensor([ 2.8905, -1.2094, -0.3120, 0.4962]) tensor([0.8707, 0.0144, 0.0354, 0.0794]) -Greedy action tensor([ 0.9291, -0.2199, -0.0244, 0.0217]) tensor([0.4749, 0.1505, 0.1830, 0.1916]) -Greedy action tensor([ 1.0832, -0.2944, -0.4281, 0.6195]) tensor([0.4758, 0.1200, 0.1050, 0.2992]) -Greedy action tensor([ 1.1717, -0.9164, -0.3051, 0.6975]) tensor([0.5064, 0.0628, 0.1156, 0.3152]) -Greedy action tensor([ 1.1188, -0.3147, -0.4610, 0.4583]) tensor([0.5099, 0.1216, 0.1051, 0.2634]) -Greedy action tensor([ 1.1052, -0.5516, -0.1258, 0.2381]) tensor([0.5255, 0.1002, 0.1535, 0.2208]) -Greedy action tensor([ 1.5666, -0.4226, -0.4402, 0.1176]) tensor([0.6640, 0.0908, 0.0893, 0.1559]) -Greedy action tensor([ 1.4467, -0.6668, -0.1204, 0.1285]) tensor([0.6261, 0.0757, 0.1306, 0.1676]) -Greedy action tensor([ 1.6646, -0.1958, -0.6281, 0.3453]) tensor([0.6562, 0.1021, 0.0663, 0.1754]) -Greedy action tensor([ 1.9597, -0.7366, -0.2278, 0.4521]) tensor([0.7137, 0.0481, 0.0801, 0.1580]) -Greedy action tensor([ 1.1237, -0.3279, -0.3056, 0.5568]) tensor([0.4900, 0.1147, 0.1173, 0.2779]) -Greedy action tensor([ 1.1759, -0.2711, -0.3719, 0.2545]) tensor([0.5417, 0.1275, 0.1152, 0.2156]) -Greedy action tensor([ 1.0526, -0.0504, -0.3265, -0.2568]) tensor([0.5395, 0.1790, 0.1358, 0.1456]) -Greedy action tensor([ 1.2562, -0.4859, -0.2860, 0.1770]) tensor([0.5784, 0.1013, 0.1237, 0.1966]) -Greedy action tensor([ 1.5625, -0.3199, -0.0320, 0.5749]) tensor([0.5788, 0.0881, 0.1175, 0.2156]) -Greedy action tensor([ 1.1567, -0.1324, -0.3990, 0.2625]) tensor([0.5275, 0.1454, 0.1113, 0.2157]) -Greedy action tensor([ 1.7902, -0.6786, -0.0716, 0.4864]) tensor([0.6616, 0.0560, 0.1028, 0.1796]) -Greedy action tensor([ 0.9497, -0.3714, -0.3348, -0.1502]) tensor([0.5329, 0.1422, 0.1475, 0.1774]) -Greedy action tensor([ 1.4698, 0.1325, -0.1568, -0.0404]) tensor([0.5952, 0.1563, 0.1170, 0.1315]) -Greedy action tensor([ 1.8310, -1.2968, -0.1200, 0.7451]) tensor([0.6564, 0.0288, 0.0933, 0.2216]) -Greedy action tensor([ 1.5456, -0.2768, 0.0104, -0.1707]) tensor([0.6424, 0.1038, 0.1384, 0.1154]) -Greedy action tensor([ 1.3155, -0.1092, -0.1503, -0.0587]) tensor([0.5799, 0.1395, 0.1339, 0.1467]) -Greedy action tensor([ 1.8232, -0.8602, -0.1785, 0.5155]) tensor([0.6785, 0.0464, 0.0917, 0.1835]) -Greedy action tensor([ 2.1749, -0.8005, -0.2190, 0.3485]) tensor([0.7673, 0.0392, 0.0700, 0.1235]) -Greedy action tensor([ 1.1664, 0.2186, -0.2800, 0.2162]) tensor([0.4976, 0.1929, 0.1171, 0.1924]) -Greedy action tensor([ 1.3642, -0.7761, 0.0314, 0.1729]) tensor([0.5934, 0.0698, 0.1565, 0.1803]) -Greedy action tensor([ 1.0056, -0.5070, -0.6796, 1.1407]) tensor([0.3921, 0.0864, 0.0727, 0.4488]) -Greedy action tensor([ 1.1362, -0.6364, -0.1656, 0.3101]) tensor([0.5320, 0.0904, 0.1447, 0.2329]) -Greedy action tensor([ 0.9986, -0.2113, -0.2606, 0.2087]) tensor([0.4912, 0.1465, 0.1394, 0.2229]) -Greedy action tensor([ 1.5039, 0.1942, -0.5557, 0.2732]) tensor([0.5919, 0.1598, 0.0755, 0.1729]) -Greedy action tensor([ 0.7637, -0.1689, -0.3070, 0.0763]) tensor([0.4466, 0.1757, 0.1531, 0.2246]) -Greedy action tensor([ 1.1913, -0.3729, -0.1998, 0.0428]) tensor([0.5633, 0.1179, 0.1402, 0.1786]) -Greedy action tensor([ 0.7384, -0.2254, -0.2009, 0.2704]) tensor([0.4169, 0.1590, 0.1630, 0.2611]) -Greedy action tensor([ 1.7580, -1.0209, -0.2364, 0.7895]) tensor([0.6338, 0.0394, 0.0863, 0.2406]) -Greedy action tensor([ 1.8128, -0.2186, -0.0850, -0.1588]) tensor([0.7041, 0.0923, 0.1055, 0.0980]) -Greedy action tensor([ 1.9300, -1.3050, -0.2488, 0.5446]) tensor([0.7129, 0.0281, 0.0807, 0.1784]) -Greedy action tensor([ 1.5246, -0.1621, -0.0917, 0.8178]) tensor([0.5328, 0.0986, 0.1058, 0.2628]) -Greedy action tensor([ 0.9554, -0.0131, 0.1533, -0.4267]) tensor([0.4810, 0.1826, 0.2157, 0.1208]) -Greedy action tensor([ 1.2266, -0.1345, -0.4587, 0.3524]) tensor([0.5379, 0.1379, 0.0997, 0.2244]) -Greedy action tensor([ 0.8052, -0.1785, -0.6746, 0.2818]) tensor([0.4558, 0.1704, 0.1038, 0.2701]) -Greedy action tensor([ 1.3772, -0.7069, -0.2053, 0.4179]) tensor([0.5838, 0.0726, 0.1199, 0.2237]) -Greedy action tensor([ 0.6381, -0.2562, -0.0517, 0.0464]) tensor([0.4058, 0.1660, 0.2036, 0.2246]) -Greedy action tensor([ 2.0412, -0.7992, -0.3664, 0.4930]) tensor([0.7347, 0.0429, 0.0661, 0.1562]) -Greedy action tensor([ 1.1653, -0.3819, -0.3626, 0.4500]) tensor([0.5211, 0.1109, 0.1131, 0.2549]) -Greedy action tensor([ 1.6409, -0.5797, -0.4187, 0.3409]) tensor([0.6629, 0.0720, 0.0845, 0.1806]) -Greedy action tensor([ 1.5023, -0.4597, -0.3133, 0.4007]) tensor([0.6114, 0.0859, 0.0995, 0.2032]) -Greedy action tensor([ 0.9730, -0.4338, -0.2501, 0.1494]) tensor([0.5056, 0.1238, 0.1488, 0.2218]) -Greedy action tensor([ 1.5731, -0.9084, -0.0361, -0.0399]) tensor([0.6743, 0.0564, 0.1349, 0.1344]) -Greedy action tensor([ 0.9869, -0.2894, -0.2930, 0.0023]) tensor([0.5179, 0.1445, 0.1440, 0.1935]) -Greedy action tensor([ 0.9931, -0.4449, -0.1295, 0.6564]) tensor([0.4392, 0.1043, 0.1429, 0.3136]) -Greedy action tensor([ 1.5875, -0.8997, -0.1494, 0.4411]) tensor([0.6341, 0.0527, 0.1116, 0.2015]) -Greedy action tensor([ 1.2970, -0.2969, -0.5211, -0.0135]) tensor([0.6116, 0.1242, 0.0993, 0.1649]) -Greedy action tensor([ 0.5798, -0.2502, -0.1487, 0.2649]) tensor([0.3776, 0.1646, 0.1822, 0.2756]) -Greedy action tensor([ 0.5579, -0.1790, 0.0213, 0.2264]) tensor([0.3596, 0.1721, 0.2102, 0.2581]) -Greedy action tensor([ 0.7661, -0.1123, 0.0701, -0.0740]) tensor([0.4263, 0.1771, 0.2126, 0.1840]) -Greedy action tensor([ 1.8302, 0.0040, -0.9689, 0.6645]) tensor([0.6521, 0.1050, 0.0397, 0.2032]) -Greedy action tensor([ 0.9464, -0.7874, -0.2944, 0.3044]) tensor([0.5020, 0.0887, 0.1452, 0.2642]) -Greedy action tensor([ 0.9595, -0.3601, -0.5267, 0.6549]) tensor([0.4482, 0.1198, 0.1014, 0.3306]) -Greedy action tensor([ 1.2436, -0.9658, -0.4700, 0.7090]) tensor([0.5331, 0.0585, 0.0961, 0.3123]) -Greedy action tensor([ 1.5897, -0.7887, -0.1847, 0.9044]) tensor([0.5662, 0.0525, 0.0960, 0.2853]) -Greedy action tensor([ 0.9661, -0.3558, -0.0867, 0.2292]) tensor([0.4775, 0.1273, 0.1666, 0.2285]) -Greedy action tensor([ 1.2085, -0.1374, -0.6400, 0.3880]) tensor([0.5382, 0.1401, 0.0848, 0.2369]) -Greedy action tensor([ 1.8506, -0.2667, -0.6454, -0.1306]) tensor([0.7459, 0.0898, 0.0615, 0.1029]) -Greedy action tensor([ 1.5087, -0.8163, -0.0834, 0.4744]) tensor([0.6036, 0.0590, 0.1228, 0.2146]) -Greedy action tensor([ 1.4811, -0.5127, -0.5451, 0.8627]) tensor([0.5535, 0.0754, 0.0730, 0.2982]) -Greedy action tensor([ 1.3294, -0.5285, -0.8434, 0.8122]) tensor([0.5359, 0.0836, 0.0610, 0.3195]) -Greedy action tensor([ 1.3987, -0.3311, -0.1074, -0.2912]) tensor([0.6315, 0.1120, 0.1400, 0.1165]) -Greedy action tensor([ 1.0451, -0.0906, -0.2021, -0.0646]) tensor([0.5159, 0.1657, 0.1482, 0.1701]) -Greedy action tensor([ 2.0324, -0.7706, -0.7199, 0.3942]) tensor([0.7583, 0.0460, 0.0484, 0.1474]) -Greedy action tensor([ 1.4183, -0.8013, -0.4292, 0.3345]) tensor([0.6232, 0.0677, 0.0982, 0.2108]) -Greedy action tensor([-1.8803, -0.4424, 0.6362, -0.1455]) tensor([0.0430, 0.1810, 0.5324, 0.2436]) -Greedy action tensor([-0.2910, 0.2329, 0.1536, 0.2884]) tensor([0.1658, 0.2799, 0.2585, 0.2958]) -Greedy action tensor([-1.9136, -0.4573, 0.6720, -0.1536]) tensor([0.0410, 0.1760, 0.5445, 0.2385]) -Greedy action tensor([-1.0500, -0.5497, 0.3781, 0.0469]) tensor([0.1019, 0.1680, 0.4249, 0.3051]) -Greedy action tensor([-1.5534, -0.5565, 0.4674, 0.0108]) tensor([0.0624, 0.1690, 0.4705, 0.2981]) -Greedy action tensor([-1.5645, -0.5770, 0.4882, -0.0337]) tensor([0.0621, 0.1668, 0.4839, 0.2872]) -Greedy action tensor([-1.8211, -0.4753, 0.6242, -0.1933]) tensor([0.0466, 0.1789, 0.5373, 0.2372]) -Greedy action tensor([-1.0347, -0.6182, 0.4209, 0.5857]) tensor([0.0843, 0.1279, 0.3615, 0.4263]) -Greedy action tensor([-0.7829, -0.1659, -0.7824, -0.4021]) tensor([0.1881, 0.3486, 0.1882, 0.2752]) -Greedy action tensor([-1.8627, -0.3704, 0.6348, -0.0981]) tensor([0.0427, 0.1897, 0.5185, 0.2491]) -Greedy action tensor([-1.3539, -0.4319, 0.6874, 0.7444]) tensor([0.0516, 0.1298, 0.3976, 0.4209]) -Greedy action tensor([-1.6784, -0.2936, 0.5403, 0.0201]) tensor([0.0509, 0.2032, 0.4678, 0.2781]) -Greedy action tensor([-0.7370, 0.0114, 0.6845, 1.4900]) tensor([0.0605, 0.1279, 0.2507, 0.5610]) -Greedy action tensor([-1.2831, -0.7559, 0.5797, 0.2092]) tensor([0.0736, 0.1247, 0.4743, 0.3274]) -Greedy action tensor([-1.8848, -0.6292, 0.3187, -0.2322]) tensor([0.0532, 0.1868, 0.4821, 0.2779]) -Greedy action tensor([-1.8967, -0.4419, 0.6448, -0.1552]) tensor([0.0422, 0.1808, 0.5361, 0.2409]) -Greedy action tensor([-1.6138, -0.4111, 0.5079, 0.0436]) tensor([0.0558, 0.1858, 0.4657, 0.2927]) -Greedy action tensor([-1.8593, -0.4178, 0.6212, -0.1271]) tensor([0.0438, 0.1852, 0.5234, 0.2476]) -Greedy action tensor([-1.5738, -0.3884, 0.6388, 0.3445]) tensor([0.0494, 0.1618, 0.4520, 0.3368]) -Greedy action tensor([-1.9123, -0.4207, 0.6467, -0.1607]) tensor([0.0414, 0.1842, 0.5355, 0.2389]) -Greedy action tensor([-1.9220, -0.4306, 0.6617, -0.1587]) tensor([0.0408, 0.1812, 0.5402, 0.2378]) -Greedy action tensor([-1.8243, -0.4831, 0.6938, 0.0037]) tensor([0.0426, 0.1631, 0.5290, 0.2653]) -Greedy action tensor([-1.4581, 0.0680, 0.3361, 0.0048]) tensor([0.0628, 0.2887, 0.3775, 0.2710]) -Greedy action tensor([-1.4514, -0.4321, 0.7392, 0.5483]) tensor([0.0498, 0.1379, 0.4448, 0.3675]) -Greedy action tensor([-1.9287, -0.4114, 0.6550, -0.1704]) tensor([0.0406, 0.1853, 0.5383, 0.2358]) -Greedy action tensor([-0.4790, -0.3849, 0.1504, 0.0810]) tensor([0.1746, 0.1919, 0.3277, 0.3057]) -Greedy action tensor([-1.8759, -0.3846, 0.6491, -0.1217]) tensor([0.0422, 0.1874, 0.5268, 0.2437]) -Greedy action tensor([-0.5246, 0.0169, 0.0443, -0.3081]) tensor([0.1746, 0.3001, 0.3084, 0.2168]) -Greedy action tensor([-1.3269, -0.6002, 0.3254, 0.3123]) tensor([0.0744, 0.1539, 0.3884, 0.3833]) -Greedy action tensor([-1.6353, -0.0267, 0.4524, -0.0520]) tensor([0.0528, 0.2639, 0.4260, 0.2573]) -Greedy action tensor([-1.5019, -0.5221, 0.5062, 0.1916]) tensor([0.0604, 0.1610, 0.4501, 0.3286]) -Greedy action tensor([-0.5859, -0.3232, 1.0751, 1.4826]) tensor([0.0646, 0.0840, 0.3401, 0.5112]) -Greedy action tensor([-0.7418, -0.3235, 0.5230, 1.2825]) tensor([0.0734, 0.1114, 0.2599, 0.5553]) -Greedy action tensor([-1.8833, -0.4569, 0.6315, -0.1467]) tensor([0.0431, 0.1794, 0.5328, 0.2447]) -Greedy action tensor([-1.7085, -0.4420, 0.6566, 0.1282]) tensor([0.0466, 0.1653, 0.4958, 0.2923]) -Greedy action tensor([-1.4079, -0.5255, 0.4650, 0.4521]) tensor([0.0612, 0.1478, 0.3980, 0.3929]) -Greedy action tensor([-1.7937, -0.4749, 0.6031, -0.0874]) tensor([0.0471, 0.1761, 0.5174, 0.2594]) -Greedy action tensor([-1.8733, -0.4438, 0.6283, -0.1516]) tensor([0.0435, 0.1818, 0.5312, 0.2435]) -Greedy action tensor([-1.5656, -0.5384, 0.4590, 0.0322]) tensor([0.0613, 0.1713, 0.4644, 0.3030]) -Greedy action tensor([-1.8241, -0.3910, 0.6045, -0.0831]) tensor([0.0450, 0.1885, 0.5101, 0.2565]) -Greedy action tensor([-1.7398, -0.4972, 0.6011, -0.0135]) tensor([0.0488, 0.1692, 0.5075, 0.2745]) -Greedy action tensor([-1.8394, -0.3077, 0.5928, -0.0968]) tensor([0.0440, 0.2036, 0.5010, 0.2514]) -Greedy action tensor([-0.4766, -0.3390, 0.9218, 1.4230]) tensor([0.0776, 0.0891, 0.3144, 0.5189]) -Greedy action tensor([-1.9317, -0.4514, 0.6655, -0.1680]) tensor([0.0406, 0.1782, 0.5446, 0.2366]) -Greedy action tensor([-1.7957, -0.3837, 0.6695, -0.0348]) tensor([0.0441, 0.1809, 0.5186, 0.2564]) -Greedy action tensor([-1.7691, -0.4486, 0.5698, -0.0848]) tensor([0.0488, 0.1827, 0.5058, 0.2628]) -Greedy action tensor([-1.4943, -0.5319, 0.5470, 0.3743]) tensor([0.0562, 0.1471, 0.4327, 0.3640]) -Greedy action tensor([-1.7415, -0.0146, 0.4999, -0.0645]) tensor([0.0468, 0.2630, 0.4400, 0.2502]) -Greedy action tensor([-0.7092, -0.3515, -0.8616, -0.1928]) tensor([0.2014, 0.2880, 0.1730, 0.3376]) -Greedy action tensor([-1.8698, -0.3401, 0.6015, -0.1251]) tensor([0.0431, 0.1992, 0.5107, 0.2470]) -Greedy action tensor([-1.8915, -0.4014, 0.6339, -0.1482]) tensor([0.0423, 0.1876, 0.5284, 0.2417]) -Greedy action tensor([-1.8597, -0.3599, 0.6192, -0.1222]) tensor([0.0433, 0.1940, 0.5166, 0.2461]) -Greedy action tensor([-1.8840, -0.4108, 0.6282, -0.1407]) tensor([0.0427, 0.1864, 0.5267, 0.2442]) -Greedy action tensor([-0.8328, -0.5799, 0.1629, 0.3457]) tensor([0.1213, 0.1562, 0.3283, 0.3942]) -Greedy action tensor([-0.8996, -0.5754, 0.1554, 0.3774]) tensor([0.1131, 0.1564, 0.3249, 0.4056]) -Greedy action tensor([-1.4996, -0.5048, 0.4670, 0.1246]) tensor([0.0628, 0.1698, 0.4487, 0.3186]) -Greedy action tensor([-1.8961, -0.4711, 0.6476, -0.1422]) tensor([0.0423, 0.1757, 0.5379, 0.2442]) -Greedy action tensor([-0.9199, -0.3067, 0.8506, 1.2795]) tensor([0.0564, 0.1041, 0.3311, 0.5084]) -Greedy action tensor([-1.1144, -0.3762, 0.9662, 1.2172]) tensor([0.0467, 0.0978, 0.3743, 0.4811]) -Greedy action tensor([-1.7156, -0.1084, 0.6765, -0.5046]) tensor([0.0493, 0.2460, 0.5392, 0.1655]) -Greedy action tensor([-1.6945, -0.5411, 0.5867, 0.0251]) tensor([0.0512, 0.1622, 0.5009, 0.2857]) -Greedy action tensor([-1.7095, -0.4714, 0.6071, 0.0572]) tensor([0.0489, 0.1687, 0.4961, 0.2862]) -Greedy action tensor([-1.8690, -0.5120, 0.7049, -0.0942]) tensor([0.0418, 0.1625, 0.5488, 0.2468]) -Greedy action tensor([-1.8270, -0.2231, 0.5837, -0.0931]) tensor([0.0439, 0.2183, 0.4892, 0.2486]) -Greedy action tensor([-1.5565, -0.5324, 0.6604, 0.3702]) tensor([0.0504, 0.1404, 0.4629, 0.3463]) -Greedy action tensor([-1.0738, -0.2856, 0.5532, -0.5119]) tensor([0.0996, 0.2190, 0.5067, 0.1747]) -Greedy action tensor([-1.1409, -0.5474, 0.2760, 0.2537]) tensor([0.0912, 0.1651, 0.3760, 0.3677]) -Greedy action tensor([-1.7350, -0.2382, 0.5764, 0.0855]) tensor([0.0460, 0.2056, 0.4643, 0.2841]) -Greedy action tensor([-1.9125, -0.4578, 0.6474, -0.1673]) tensor([0.0418, 0.1789, 0.5402, 0.2392]) -Greedy action tensor([-1.8500, -0.3946, 0.6188, -0.1038]) tensor([0.0438, 0.1878, 0.5173, 0.2511]) -Greedy action tensor([-1.5197, -0.2880, 0.4361, 0.1833]) tensor([0.0589, 0.2018, 0.4162, 0.3232]) -Greedy action tensor([-1.1519, 0.1374, -0.0156, 0.2907]) tensor([0.0835, 0.3031, 0.2601, 0.3533]) -Greedy action tensor([-1.3866, 0.5819, 0.2512, 0.1259]) tensor([0.0560, 0.4013, 0.2883, 0.2543]) -Greedy action tensor([-1.6250, 0.4178, 0.3922, 0.0850]) tensor([0.0460, 0.3545, 0.3455, 0.2541]) -Greedy action tensor([-1.5785, -0.5278, 0.5012, 0.1200]) tensor([0.0577, 0.1650, 0.4618, 0.3154]) -Greedy action tensor([-1.9203, -0.3905, 0.6515, -0.1574]) tensor([0.0408, 0.1882, 0.5335, 0.2376]) -Greedy action tensor([-1.8487, -0.3161, 0.5988, -0.1178]) tensor([0.0438, 0.2028, 0.5062, 0.2472]) -Greedy action tensor([-1.4595, -0.3987, 0.4164, 0.0454]) tensor([0.0670, 0.1936, 0.4375, 0.3019]) -Greedy action tensor([-1.1370, -0.9030, -0.5581, -0.5891]) tensor([0.1731, 0.2187, 0.3088, 0.2994]) -Greedy action tensor([-1.6626, 0.0124, 0.4923, -0.1333]) tensor([0.0511, 0.2727, 0.4406, 0.2357]) -Greedy action tensor([-1.8796, -0.4350, 0.9143, 0.4034]) tensor([0.0319, 0.1351, 0.5207, 0.3124]) -Greedy action tensor([ 0.0026, 0.2934, 0.1762, -0.2055]) tensor([0.2305, 0.3082, 0.2741, 0.1872]) -Greedy action tensor([ 0.1715, -1.2307, 1.5200, -0.5579]) tensor([0.1792, 0.0441, 0.6903, 0.0864]) -Greedy action tensor([ 1.8542, -0.1959, 1.1515, 0.1313]) tensor([0.5548, 0.0714, 0.2748, 0.0991]) -Greedy action tensor([ 0.8595, -0.6822, 1.7322, -0.5966]) tensor([0.2604, 0.0557, 0.6232, 0.0607]) -Greedy action tensor([-0.1802, -2.4735, 0.6835, 0.5879]) tensor([0.1777, 0.0179, 0.4214, 0.3830]) -Greedy action tensor([-0.9015, -0.9826, -1.2895, -0.4159]) tensor([0.2366, 0.2182, 0.1606, 0.3846]) -Greedy action tensor([ 0.3160, 0.0837, 1.9507, -0.5682]) tensor([0.1364, 0.1081, 0.6992, 0.0563]) -Greedy action tensor([-0.9290, -1.0483, -0.9862, -0.0821]) tensor([0.1936, 0.1719, 0.1829, 0.4516]) -Greedy action tensor([ 1.1006, -0.3236, 0.4523, 0.3231]) tensor([0.4498, 0.1083, 0.2352, 0.2067]) -Greedy action tensor([-0.0324, -2.0185, -0.1685, 1.1064]) tensor([0.1948, 0.0267, 0.1700, 0.6084]) -Greedy action tensor([ 0.4282, -1.0029, -0.5586, 0.1839]) tensor([0.4175, 0.0998, 0.1556, 0.3270]) -Greedy action tensor([ 0.4948, -1.2201, 0.8275, -0.1221]) tensor([0.3211, 0.0578, 0.4479, 0.1733]) -Greedy action tensor([ 0.2482, -2.2240, 0.1850, 0.8433]) tensor([0.2607, 0.0220, 0.2447, 0.4726]) -Greedy action tensor([ 0.8797, 0.5142, -0.4805, 0.9679]) tensor([0.3287, 0.2280, 0.0843, 0.3590]) -Greedy action tensor([-0.8814, 1.0754, 0.8347, -0.6729]) tensor([0.0672, 0.4759, 0.3741, 0.0828]) -Greedy action tensor([0.3318, 0.4503, 0.1213, 0.9255]) tensor([0.2107, 0.2372, 0.1707, 0.3815]) -Greedy action tensor([ 0.1956, -1.7450, 0.3755, -0.2461]) tensor([0.3352, 0.0481, 0.4012, 0.2155]) -Greedy action tensor([0.6220, 0.8755, 0.2271, 0.3710]) tensor([0.2674, 0.3445, 0.1801, 0.2080]) -Greedy action tensor([ 0.4420, -0.3819, -0.2081, -0.9568]) tensor([0.4530, 0.1987, 0.2364, 0.1118]) -Greedy action tensor([-0.3840, -0.0510, -0.3352, -0.2669]) tensor([0.2188, 0.3053, 0.2298, 0.2460]) -Greedy action tensor([ 0.4635, -0.2771, -0.5766, 0.1613]) tensor([0.3892, 0.1856, 0.1375, 0.2877]) -Greedy action tensor([ 0.4452, -1.9892, 0.1299, -0.1674]) tensor([0.4239, 0.0372, 0.3093, 0.2297]) -Greedy action tensor([-0.1705, -0.1902, 2.1383, -0.1060]) tensor([0.0763, 0.0748, 0.7676, 0.0814]) -Greedy action tensor([1.3880, 0.1137, 0.4805, 0.3129]) tensor([0.4940, 0.1381, 0.1993, 0.1686]) -Greedy action tensor([ 1.4571, -0.9650, 0.4420, 1.8396]) tensor([0.3428, 0.0304, 0.1242, 0.5025]) -Greedy action tensor([ 0.1263, -0.4501, 0.7299, -0.5428]) tensor([0.2562, 0.1440, 0.4686, 0.1312]) -Greedy action tensor([-0.4054, -0.5930, 0.8328, 0.1983]) tensor([0.1407, 0.1166, 0.4853, 0.2573]) -Greedy action tensor([-0.4902, -1.2520, 0.1972, 0.4957]) tensor([0.1630, 0.0761, 0.3241, 0.4368]) -Greedy action tensor([ 0.0174, -1.1842, 0.7240, -0.6296]) tensor([0.2596, 0.0781, 0.5263, 0.1360]) -Greedy action tensor([ 0.5368, -0.3150, 0.7583, -0.0641]) tensor([0.3103, 0.1324, 0.3872, 0.1701]) -Greedy action tensor([ 1.1350, -0.4191, 1.0199, 0.3841]) tensor([0.3884, 0.0821, 0.3462, 0.1833]) -Greedy action tensor([ 0.7240, -0.2854, 0.9329, 1.2773]) tensor([0.2306, 0.0841, 0.2842, 0.4011]) -Greedy action tensor([ 0.5170, 0.2692, -0.0194, 1.9844]) tensor([0.1492, 0.1164, 0.0872, 0.6471]) -Greedy action tensor([ 0.6133, -0.3906, -1.1405, -0.6776]) tensor([0.5511, 0.2019, 0.0954, 0.1516]) -Greedy action tensor([ 1.2079, -0.4602, 1.4688, 0.6016]) tensor([0.3298, 0.0622, 0.4281, 0.1799]) -Greedy action tensor([ 0.4696, -1.5778, 1.9782, 1.0305]) tensor([0.1351, 0.0174, 0.6107, 0.2367]) -Greedy action tensor([-0.5316, -0.9945, 1.4445, -0.2407]) tensor([0.0982, 0.0618, 0.7086, 0.1314]) -Greedy action tensor([-0.1302, -1.0195, -1.0205, -0.3971]) tensor([0.3865, 0.1588, 0.1587, 0.2960]) -Greedy action tensor([-6.2236e-01, -1.2847e+00, 9.0123e-01, -9.5326e-04]) tensor([0.1255, 0.0647, 0.5760, 0.2337]) -Greedy action tensor([ 0.9969, -0.3162, 0.7482, -0.3229]) tensor([0.4318, 0.1161, 0.3367, 0.1154]) -Greedy action tensor([-0.9897, -0.2121, 0.1135, -0.2199]) tensor([0.1198, 0.2607, 0.3610, 0.2586]) -Greedy action tensor([-0.0951, 0.5242, 0.6020, -0.9151]) tensor([0.1885, 0.3501, 0.3784, 0.0830]) -Greedy action tensor([ 1.7382, -0.1180, 1.3836, 0.1422]) tensor([0.4853, 0.0758, 0.3404, 0.0984]) -Greedy action tensor([ 1.1081, -1.3608, 1.0032, 0.3952]) tensor([0.4040, 0.0342, 0.3638, 0.1980]) -Greedy action tensor([-0.2216, -0.8353, -1.1698, 0.5031]) tensor([0.2504, 0.1356, 0.0970, 0.5170]) -Greedy action tensor([ 1.1391, 0.1557, -0.2733, -0.3741]) tensor([0.5441, 0.2035, 0.1325, 0.1198]) -Greedy action tensor([1.0303, 0.2493, 0.7977, 0.4051]) tensor([0.3590, 0.1644, 0.2845, 0.1921]) -Greedy action tensor([-1.0684, 0.1273, -0.7910, 0.5761]) tensor([0.0926, 0.3060, 0.1222, 0.4793]) -Greedy action tensor([ 1.0655, -0.6284, 1.5148, -0.3448]) tensor([0.3339, 0.0614, 0.5233, 0.0815]) -Greedy action tensor([-0.7872, -0.5800, -0.9721, -0.2546]) tensor([0.2099, 0.2582, 0.1744, 0.3575]) -Greedy action tensor([-0.3015, -0.5514, 0.2242, -0.5248]) tensor([0.2342, 0.1824, 0.3961, 0.1873]) -Greedy action tensor([-0.5955, 0.0929, -0.0197, 0.1707]) tensor([0.1445, 0.2876, 0.2570, 0.3109]) -Greedy action tensor([-0.2540, -1.3665, -0.4584, -0.3320]) tensor([0.3258, 0.1071, 0.2656, 0.3014]) -Greedy action tensor([ 0.5246, -0.6037, 0.9585, -1.5122]) tensor([0.3336, 0.1080, 0.5149, 0.0435]) -Greedy action tensor([ 1.4799, 0.5016, 0.8408, -0.1297]) tensor([0.4753, 0.1787, 0.2509, 0.0951]) -Greedy action tensor([-0.2790, -2.6211, 0.2925, 0.1486]) tensor([0.2272, 0.0218, 0.4024, 0.3485]) -Greedy action tensor([ 0.3550, -0.6566, -0.4461, 0.9051]) tensor([0.2820, 0.1026, 0.1266, 0.4888]) -Greedy action tensor([-0.5701, -1.4627, -0.9810, -0.0990]) tensor([0.2721, 0.1115, 0.1805, 0.4359]) -Greedy action tensor([ 0.3103, -2.1805, -0.0868, 0.4091]) tensor([0.3498, 0.0290, 0.2351, 0.3861]) -Greedy action tensor([ 0.0149, -1.5506, -0.0375, 0.6585]) tensor([0.2462, 0.0515, 0.2336, 0.4687]) -Greedy action tensor([ 0.0287, -0.8809, 0.2426, 1.2322]) tensor([0.1674, 0.0674, 0.2074, 0.5578]) -Greedy action tensor([-1.0352, -0.7517, 0.1761, 0.9476]) tensor([0.0772, 0.1025, 0.2593, 0.5609]) -Greedy action tensor([1.1988, 0.5775, 0.0687, 0.5960]) tensor([0.4154, 0.2232, 0.1342, 0.2273]) -Greedy action tensor([ 1.7495, -0.3123, 1.5193, 0.2237]) tensor([0.4675, 0.0595, 0.3714, 0.1016]) -Greedy action tensor([ 0.3620, -1.8824, 0.7469, 2.1941]) tensor([0.1133, 0.0120, 0.1666, 0.7081]) -Greedy action tensor([ 1.4612, 0.7587, 0.0272, -0.6503]) tensor([0.5392, 0.2671, 0.1285, 0.0653]) -Greedy action tensor([-0.0201, -0.2073, -0.3649, -0.2077]) tensor([0.2970, 0.2463, 0.2104, 0.2462]) -Greedy action tensor([ 0.9756, -0.4502, 0.7899, -0.1832]) tensor([0.4193, 0.1008, 0.3483, 0.1316]) -Greedy action tensor([ 0.0246, -0.9584, 0.6761, -0.3929]) tensor([0.2531, 0.0947, 0.4855, 0.1667]) -Greedy action tensor([ 0.0721, -0.0892, -0.4183, -0.0806]) tensor([0.3010, 0.2562, 0.1844, 0.2584]) -Greedy action tensor([ 0.3914, -0.7751, 1.9059, -0.7490]) tensor([0.1619, 0.0504, 0.7360, 0.0517]) -Greedy action tensor([ 0.8745, 0.5133, 2.9060, -0.5115]) tensor([0.1045, 0.0728, 0.7966, 0.0261]) -Greedy action tensor([-0.3093, -0.1346, 1.0334, -0.1113]) tensor([0.1381, 0.1645, 0.5290, 0.1684]) -Greedy action tensor([-0.2537, -0.3862, -0.7305, 0.2464]) tensor([0.2412, 0.2113, 0.1497, 0.3977]) -Greedy action tensor([-0.2850, -0.3008, -0.1096, 0.6175]) tensor([0.1773, 0.1745, 0.2112, 0.4370]) -Greedy action tensor([-1.0417, -0.8647, -0.7734, -0.8106]) tensor([0.2100, 0.2507, 0.2747, 0.2646]) -Greedy action tensor([ 0.1061, -2.2608, -0.0071, 1.2008]) tensor([0.2010, 0.0188, 0.1795, 0.6006]) -Greedy action tensor([ 1.3080, -0.7116, 0.9582, 1.1736]) tensor([0.3688, 0.0489, 0.2599, 0.3224]) -Greedy action tensor([ 1.7481, -0.3694, -0.6086, 1.2173]) tensor([0.5546, 0.0667, 0.0525, 0.3262]) -Greedy action tensor([-0.9073, -1.2970, -0.0737, -1.2947]) tensor([0.2147, 0.1454, 0.4942, 0.1457]) -Greedy action tensor([-0.3378, -0.6858, 0.9567, 0.9032]) tensor([0.1134, 0.0801, 0.4140, 0.3924]) -Greedy action tensor([ 0.7975, -0.5767, 0.0574, -0.4390]) tensor([0.4949, 0.1252, 0.2361, 0.1437]) -Greedy action tensor([ 0.2999, -0.2867, -0.1220, -0.4757]) tensor([0.3742, 0.2081, 0.2454, 0.1723]) -Greedy action tensor([ 0.0473, 0.2165, -0.0254, -0.0945]) tensor([0.2511, 0.2974, 0.2335, 0.2179]) -Greedy action tensor([ 0.6675, -0.6573, -0.1548, -0.4847]) tensor([0.4948, 0.1315, 0.2174, 0.1563]) -Greedy action tensor([ 0.5783, -0.3106, 0.0045, -0.4232]) tensor([0.4270, 0.1755, 0.2406, 0.1569]) -Greedy action tensor([ 0.6928, -0.4319, -0.0417, -0.3788]) tensor([0.4658, 0.1513, 0.2235, 0.1595]) -Greedy action tensor([ 0.5551, -0.3276, 0.0765, -0.3136]) tensor([0.4077, 0.1687, 0.2526, 0.1710]) -Greedy action tensor([ 0.6244, -0.1372, 0.0329, -0.0770]) tensor([0.3974, 0.1856, 0.2200, 0.1971]) -Greedy action tensor([ 0.7103, -0.4507, -0.0695, -0.3201]) tensor([0.4698, 0.1471, 0.2154, 0.1677]) -Greedy action tensor([ 0.6901, -0.4545, -0.1199, -0.5182]) tensor([0.4850, 0.1544, 0.2158, 0.1449]) -Greedy action tensor([ 0.8014, -0.4543, -0.0011, -0.4362]) tensor([0.4943, 0.1408, 0.2215, 0.1434]) -Greedy action tensor([ 0.8897, -0.4976, -0.0374, -0.4198]) tensor([0.5221, 0.1304, 0.2066, 0.1409]) -Greedy action tensor([ 0.4550, -0.1467, 0.1285, -0.3771]) tensor([0.3698, 0.2026, 0.2668, 0.1609]) -Greedy action tensor([ 0.5760, -0.3253, 0.0927, -0.4671]) tensor([0.4210, 0.1709, 0.2597, 0.1484]) -Greedy action tensor([ 0.4283, -0.2405, 0.0962, -0.0409]) tensor([0.3502, 0.1794, 0.2513, 0.2191]) -Greedy action tensor([ 0.7788, -0.4949, -0.0661, -0.4409]) tensor([0.4988, 0.1396, 0.2143, 0.1473]) -Greedy action tensor([ 1.3916, -1.4900, -0.0953, -0.7347]) tensor([0.7136, 0.0400, 0.1613, 0.0851]) -Greedy action tensor([ 0.7021, -0.3789, 0.0125, -0.2736]) tensor([0.4509, 0.1530, 0.2262, 0.1699]) -Greedy action tensor([ 0.7803, -0.2719, 0.1411, -0.4441]) tensor([0.4606, 0.1608, 0.2431, 0.1354]) -Greedy action tensor([ 0.6052, -0.6290, -0.1230, -0.1438]) tensor([0.4451, 0.1296, 0.2149, 0.2105]) -Greedy action tensor([ 0.6826, -0.3880, -0.0334, -0.2990]) tensor([0.4533, 0.1554, 0.2215, 0.1698]) -Greedy action tensor([ 0.1914, -0.2707, -0.1595, -0.1423]) tensor([0.3278, 0.2065, 0.2308, 0.2348]) -Greedy action tensor([ 0.5030, -0.3038, 0.0049, -0.1403]) tensor([0.3877, 0.1730, 0.2356, 0.2037]) -Greedy action tensor([ 0.8372, -0.2885, 0.0247, -0.4116]) tensor([0.4866, 0.1579, 0.2159, 0.1396]) -Greedy action tensor([ 1.4586, -1.0918, -0.0102, -0.8090]) tensor([0.7083, 0.0553, 0.1630, 0.0734]) -Greedy action tensor([ 0.6023, -0.5799, -0.0212, -0.3553]) tensor([0.4491, 0.1377, 0.2408, 0.1724]) -Greedy action tensor([ 0.2240, -0.0946, 0.2025, -0.3623]) tensor([0.3065, 0.2229, 0.3000, 0.1705]) -Greedy action tensor([ 0.4279, -0.1879, -0.0781, -0.1555]) tensor([0.3702, 0.2000, 0.2232, 0.2066]) -Greedy action tensor([ 0.3280, -0.0190, 0.0073, -0.2717]) tensor([0.3354, 0.2371, 0.2434, 0.1841]) -Greedy action tensor([ 0.5165, -0.3958, 0.0658, -0.4660]) tensor([0.4144, 0.1664, 0.2641, 0.1551]) -Greedy action tensor([ 0.6005, -0.3209, -0.0177, -0.4277]) tensor([0.4358, 0.1734, 0.2349, 0.1559]) -Greedy action tensor([ 0.9064, -0.3509, -0.0692, -0.3440]) tensor([0.5134, 0.1460, 0.1935, 0.1470]) -Greedy action tensor([ 1.1876, -1.4493, 0.0394, -0.7668]) tensor([0.6534, 0.0468, 0.2073, 0.0926]) -Greedy action tensor([ 0.5975, -0.2438, -0.1594, -0.2944]) tensor([0.4329, 0.1866, 0.2031, 0.1774]) -Greedy action tensor([ 1.0345, -0.7466, 0.0732, -0.4573]) tensor([0.5631, 0.0949, 0.2153, 0.1267]) -Greedy action tensor([ 1.0225, -0.9398, 0.1187, -0.5284]) tensor([0.5690, 0.0800, 0.2304, 0.1206]) -Greedy action tensor([ 0.6998, -0.9898, -0.0098, -0.2300]) tensor([0.4828, 0.0891, 0.2375, 0.1905]) -Greedy action tensor([ 0.7741, -0.4650, 0.0392, -0.2303]) tensor([0.4683, 0.1356, 0.2246, 0.1715]) -Greedy action tensor([ 0.5049, -0.2404, -0.0039, -0.2823]) tensor([0.3951, 0.1875, 0.2376, 0.1798]) -Greedy action tensor([ 0.6573, -0.3851, -0.1019, -0.2747]) tensor([0.4516, 0.1592, 0.2114, 0.1778]) -Greedy action tensor([ 0.1592, 0.1082, 0.0080, -0.0825]) tensor([0.2781, 0.2643, 0.2391, 0.2184]) -Greedy action tensor([ 0.3515, -0.3626, 0.2134, -0.3051]) tensor([0.3473, 0.1701, 0.3025, 0.1801]) -Greedy action tensor([ 0.8779, -0.4605, -0.2005, -0.2693]) tensor([0.5209, 0.1366, 0.1772, 0.1654]) -Greedy action tensor([ 0.7792, -0.4279, -0.0211, -0.3736]) tensor([0.4845, 0.1449, 0.2176, 0.1530]) -Greedy action tensor([ 0.6256, -0.7338, 0.1559, -0.8305]) tensor([0.4728, 0.1214, 0.2956, 0.1102]) -Greedy action tensor([ 0.4498, -0.2319, -0.1330, -0.2839]) tensor([0.3930, 0.1988, 0.2194, 0.1887]) -Greedy action tensor([ 0.6706, -0.4898, -0.0307, -0.3150]) tensor([0.4582, 0.1436, 0.2272, 0.1710]) -Greedy action tensor([ 0.5671, 0.0103, -0.1395, -0.1135]) tensor([0.3887, 0.2227, 0.1917, 0.1968]) -Greedy action tensor([ 0.8635, -0.4425, -0.1964, -0.6383]) tensor([0.5434, 0.1472, 0.1883, 0.1210]) -Greedy action tensor([ 0.5884, -0.4212, -0.0688, -0.2022]) tensor([0.4280, 0.1560, 0.2219, 0.1941]) -Greedy action tensor([ 0.4310, -0.1773, -0.0337, -0.2022]) tensor([0.3699, 0.2013, 0.2324, 0.1964]) -Greedy action tensor([ 1.0388, -0.6404, -0.0937, -0.4430]) tensor([0.5760, 0.1074, 0.1856, 0.1309]) -Greedy action tensor([ 0.1952, 0.0533, -0.0926, -0.3993]) tensor([0.3155, 0.2738, 0.2366, 0.1741]) -Greedy action tensor([ 0.1550, -0.1353, -0.0240, -0.2691]) tensor([0.3088, 0.2310, 0.2582, 0.2021]) -Greedy action tensor([ 0.9894, -0.4084, -0.0926, -0.4678]) tensor([0.5498, 0.1359, 0.1863, 0.1280]) -Greedy action tensor([ 0.5266, -0.3685, -0.0230, -0.2689]) tensor([0.4103, 0.1676, 0.2368, 0.1852]) -Greedy action tensor([ 0.5110, -0.4010, -0.1401, -0.1645]) tensor([0.4112, 0.1652, 0.2144, 0.2092]) -Greedy action tensor([ 1.0696, -0.1725, -0.0380, -0.1513]) tensor([0.5224, 0.1509, 0.1726, 0.1541]) -Greedy action tensor([ 0.6320, -0.3847, 0.0541, -0.1766]) tensor([0.4222, 0.1528, 0.2369, 0.1881]) -Greedy action tensor([ 0.3997, -0.2467, -0.1835, -0.5003]) tensor([0.4018, 0.2105, 0.2243, 0.1634]) -Greedy action tensor([ 0.5386, 0.1049, -0.1274, -0.2798]) tensor([0.3842, 0.2490, 0.1974, 0.1695]) -Greedy action tensor([ 0.4290, -0.2171, -0.1285, -0.3693]) tensor([0.3926, 0.2058, 0.2248, 0.1767]) -Greedy action tensor([ 0.4932, -0.1625, -0.0088, -0.1008]) tensor([0.3736, 0.1939, 0.2262, 0.2063]) -Greedy action tensor([ 0.8701, -0.5320, 0.0200, -0.6956]) tensor([0.5312, 0.1307, 0.2270, 0.1110]) -Greedy action tensor([ 0.8345, -0.7116, -0.2439, -0.5978]) tensor([0.5580, 0.1189, 0.1898, 0.1332]) -Greedy action tensor([ 0.8267, -0.4514, 0.0839, -0.5032]) tensor([0.4953, 0.1380, 0.2357, 0.1310]) -Greedy action tensor([ 0.8895, -0.3610, -0.0781, -0.4368]) tensor([0.5177, 0.1482, 0.1967, 0.1374]) -Greedy action tensor([ 0.4912, -0.2769, 0.2054, -0.3241]) tensor([0.3762, 0.1745, 0.2827, 0.1665]) -Greedy action tensor([ 0.5759, -0.0535, -0.1012, -0.1715]) tensor([0.3977, 0.2119, 0.2021, 0.1883]) -Greedy action tensor([ 0.7440, -0.6010, -0.1485, -0.6434]) tensor([0.5209, 0.1357, 0.2134, 0.1301]) -Greedy action tensor([ 0.6289, -0.6287, -0.0837, -0.2582]) tensor([0.4573, 0.1300, 0.2243, 0.1883]) -Greedy action tensor([ 0.7914, -0.4844, 0.1158, -0.4082]) tensor([0.4786, 0.1336, 0.2435, 0.1442]) -Greedy action tensor([ 0.7994, -0.8521, -0.1376, -0.4097]) tensor([0.5313, 0.1019, 0.2082, 0.1586]) -Greedy action tensor([ 1.2747, -0.9176, 0.0600, -0.8421]) tensor([0.6541, 0.0730, 0.1941, 0.0788]) -Greedy action tensor([ 0.3130, -0.0297, -0.0428, -0.2704]) tensor([0.3369, 0.2391, 0.2360, 0.1880]) -Greedy action tensor([0.4728, 0.2993, 0.0032, 0.0526]) tensor([0.3202, 0.2692, 0.2002, 0.2104]) -Greedy action tensor([ 0.3838, -0.0351, -0.1862, -0.3599]) tensor([0.3706, 0.2437, 0.2096, 0.1761]) -Greedy action tensor([ 0.4495, -0.1962, -0.0575, -0.1667]) tensor([0.3750, 0.1966, 0.2259, 0.2025]) -Greedy action tensor([ 0.4914, -0.0423, 0.1567, -0.3782]) tensor([0.3675, 0.2155, 0.2630, 0.1540]) -Greedy action tensor([ 0.6090, 0.2282, -0.0848, -0.2247]) tensor([0.3821, 0.2611, 0.1909, 0.1660]) -Greedy action tensor([ 0.8277, -0.5047, -0.0166, -0.4846]) tensor([0.5095, 0.1344, 0.2190, 0.1371]) -Greedy action tensor([ 1.1595, -0.4160, -0.5438, 0.4642]) tensor([0.5297, 0.1096, 0.0964, 0.2643]) -Greedy action tensor([ 0.8760, -0.7246, 0.1224, 0.1657]) tensor([0.4621, 0.0932, 0.2175, 0.2271]) -Greedy action tensor([ 1.5724, -0.9691, -0.5623, 0.4618]) tensor([0.6551, 0.0516, 0.0775, 0.2158]) -Greedy action tensor([ 2.0836, -1.2600, -0.2984, 0.6936]) tensor([0.7264, 0.0256, 0.0671, 0.1809]) -Greedy action tensor([ 1.1665, -0.6603, -0.2029, 0.4265]) tensor([0.5285, 0.0850, 0.1344, 0.2521]) -Greedy action tensor([ 1.4545, -0.2863, -0.2867, 0.7331]) tensor([0.5444, 0.0955, 0.0954, 0.2646]) -Greedy action tensor([ 0.7481, -0.3859, -0.2218, 0.0668]) tensor([0.4531, 0.1458, 0.1718, 0.2293]) -Greedy action tensor([ 1.5850, -0.6315, -0.4626, 0.6216]) tensor([0.6174, 0.0673, 0.0797, 0.2356]) -Greedy action tensor([ 1.8812, -0.6205, -0.3220, 0.4577]) tensor([0.6977, 0.0572, 0.0771, 0.1681]) -Greedy action tensor([ 1.2394, -0.2482, -0.4572, 0.2380]) tensor([0.5629, 0.1272, 0.1032, 0.2068]) -Greedy action tensor([ 1.6112, 0.1050, -0.1445, 0.1237]) tensor([0.6171, 0.1368, 0.1066, 0.1394]) -Greedy action tensor([ 1.5742, -0.3823, 0.0200, 0.4052]) tensor([0.6012, 0.0850, 0.1271, 0.1868]) -Greedy action tensor([ 1.4299, -0.3233, -0.5584, 0.2046]) tensor([0.6235, 0.1080, 0.0854, 0.1831]) -Greedy action tensor([ 1.2258, 0.1547, -0.7103, 0.2269]) tensor([0.5390, 0.1847, 0.0778, 0.1985]) -Greedy action tensor([ 2.3001, -0.5427, -0.0667, 0.6837]) tensor([0.7404, 0.0431, 0.0694, 0.1470]) -Greedy action tensor([ 1.5084, -0.4763, -0.1902, 0.3093]) tensor([0.6166, 0.0847, 0.1128, 0.1859]) -Greedy action tensor([ 1.2244, -0.3798, -0.3959, 0.4447]) tensor([0.5384, 0.1082, 0.1065, 0.2469]) -Greedy action tensor([ 0.3464, -0.3497, -0.5132, 0.3262]) tensor([0.3446, 0.1718, 0.1459, 0.3377]) -Greedy action tensor([ 1.4348, -0.1521, -0.2430, 0.7257]) tensor([0.5309, 0.1086, 0.0992, 0.2613]) -Greedy action tensor([ 1.6230, -0.6644, -0.2393, 0.3194]) tensor([0.6543, 0.0664, 0.1016, 0.1777]) -Greedy action tensor([ 0.6936, -0.3081, 0.1267, -0.0328]) tensor([0.4135, 0.1519, 0.2346, 0.2000]) -Greedy action tensor([ 0.8101, -0.5334, -0.0546, -0.0133]) tensor([0.4715, 0.1230, 0.1986, 0.2070]) -Greedy action tensor([ 1.2049, -0.6510, -0.0235, 0.0300]) tensor([0.5689, 0.0889, 0.1665, 0.1757]) -Greedy action tensor([ 1.1339, -0.4301, -0.2406, 0.5495]) tensor([0.4951, 0.1036, 0.1252, 0.2760]) -Greedy action tensor([ 1.2050, -0.2553, -0.1971, 0.4780]) tensor([0.5098, 0.1184, 0.1255, 0.2464]) -Greedy action tensor([ 1.5855, -0.2188, -0.5151, 0.0799]) tensor([0.6628, 0.1091, 0.0811, 0.1471]) -Greedy action tensor([ 0.8672, -0.0887, -0.1491, 0.3946]) tensor([0.4220, 0.1622, 0.1527, 0.2630]) -Greedy action tensor([ 1.2229, -0.3140, -0.1448, 0.2810]) tensor([0.5377, 0.1156, 0.1370, 0.2097]) -Greedy action tensor([ 0.9101, -0.2687, -0.0795, 0.1410]) tensor([0.4667, 0.1436, 0.1735, 0.2163]) -Greedy action tensor([ 1.9733, -0.7923, -0.5364, 0.6368]) tensor([0.7107, 0.0447, 0.0578, 0.1868]) -Greedy action tensor([ 1.1074, 0.1387, -0.2132, 0.1943]) tensor([0.4883, 0.1854, 0.1304, 0.1960]) -Greedy action tensor([ 0.6519, 0.0855, -0.0558, -0.2512]) tensor([0.4056, 0.2302, 0.1999, 0.1644]) -Greedy action tensor([ 1.2453, -0.1183, -0.3005, 0.5584]) tensor([0.5071, 0.1297, 0.1081, 0.2551]) -Greedy action tensor([ 0.8908, -0.1310, -0.1714, 0.0973]) tensor([0.4634, 0.1668, 0.1602, 0.2096]) -Greedy action tensor([ 1.2783, -0.1868, -0.3567, 0.6508]) tensor([0.5102, 0.1179, 0.0995, 0.2724]) -Greedy action tensor([ 1.9169, -0.8443, -0.3794, 0.7955]) tensor([0.6713, 0.0424, 0.0676, 0.2187]) -Greedy action tensor([ 1.2039, -0.2970, -0.2537, 0.2668]) tensor([0.5413, 0.1207, 0.1260, 0.2120]) -Greedy action tensor([ 1.1581, -0.6303, -0.3867, 0.4689]) tensor([0.5312, 0.0888, 0.1133, 0.2666]) -Greedy action tensor([ 0.5382, -0.0418, -0.1468, -0.1555]) tensor([0.3901, 0.2184, 0.1966, 0.1949]) -Greedy action tensor([ 1.6866, -1.0594, -0.3949, 0.6434]) tensor([0.6488, 0.0416, 0.0809, 0.2286]) -Greedy action tensor([ 1.5396, -0.3389, 0.1119, 0.0749]) tensor([0.6158, 0.0941, 0.1477, 0.1423]) -Greedy action tensor([ 0.4694, -0.1887, -0.0450, 0.3308]) tensor([0.3349, 0.1734, 0.2002, 0.2915]) -Greedy action tensor([ 1.3480, -0.2845, -0.2403, -0.0027]) tensor([0.6029, 0.1178, 0.1231, 0.1562]) -Greedy action tensor([ 1.0827, -0.3202, -0.3876, 0.4178]) tensor([0.5025, 0.1236, 0.1155, 0.2584]) -Greedy action tensor([ 1.4711, -0.1775, 0.1269, 0.0213]) tensor([0.5925, 0.1140, 0.1545, 0.1390]) -Greedy action tensor([ 1.0095, -0.1032, 0.0179, 0.0743]) tensor([0.4780, 0.1571, 0.1773, 0.1876]) -Greedy action tensor([ 0.8113, -0.1216, -0.2665, 0.3943]) tensor([0.4179, 0.1644, 0.1422, 0.2754]) -Greedy action tensor([ 1.5044, -0.4678, -0.3087, 0.6666]) tensor([0.5764, 0.0802, 0.0940, 0.2494]) -Greedy action tensor([ 1.9516, -0.5204, -0.5148, 0.1637]) tensor([0.7482, 0.0632, 0.0635, 0.1252]) -Greedy action tensor([ 0.9485, -0.2628, -0.2146, 0.2199]) tensor([0.4778, 0.1423, 0.1493, 0.2306]) -Greedy action tensor([ 0.4648, -0.3886, -0.5274, 0.4942]) tensor([0.3538, 0.1507, 0.1312, 0.3644]) -Greedy action tensor([ 0.6820, -0.3042, -0.3264, 0.5286]) tensor([0.3853, 0.1437, 0.1405, 0.3305]) -Greedy action tensor([ 0.3901, -0.2084, -0.0113, 0.1371]) tensor([0.3338, 0.1835, 0.2235, 0.2592]) -Greedy action tensor([ 1.2460, -0.6616, -0.0988, 0.1825]) tensor([0.5700, 0.0846, 0.1485, 0.1968]) -Greedy action tensor([ 1.5404, -0.4860, 0.1613, 0.4000]) tensor([0.5871, 0.0774, 0.1478, 0.1877]) -Greedy action tensor([ 1.0830, -0.6757, -0.0841, -0.3265]) tensor([0.5788, 0.0997, 0.1802, 0.1414]) -Greedy action tensor([ 1.8286, -0.8494, -0.1385, 0.4835]) tensor([0.6807, 0.0468, 0.0952, 0.1773]) -Greedy action tensor([ 1.4828, -0.4302, -0.4152, 0.4182]) tensor([0.6089, 0.0899, 0.0913, 0.2100]) -Greedy action tensor([ 1.1268, -0.2605, -0.1552, 0.1252]) tensor([0.5278, 0.1318, 0.1465, 0.1939]) -Greedy action tensor([ 1.0814, -0.6793, -0.3073, 0.1341]) tensor([0.5528, 0.0950, 0.1379, 0.2143]) -Greedy action tensor([ 1.1184, -0.6066, 0.3161, 0.2576]) tensor([0.4880, 0.0869, 0.2188, 0.2063]) -Greedy action tensor([ 0.7861, -0.1717, -0.0967, -0.0151]) tensor([0.4452, 0.1708, 0.1842, 0.1998]) -Greedy action tensor([ 1.0449, -0.4790, 0.4140, 0.1771]) tensor([0.4609, 0.1004, 0.2452, 0.1935]) -Greedy action tensor([ 1.6696, -0.1116, -0.4966, 0.2233]) tensor([0.6586, 0.1109, 0.0755, 0.1550]) -Greedy action tensor([ 0.7241, -0.4320, -0.3790, 0.3067]) tensor([0.4338, 0.1365, 0.1439, 0.2858]) -Greedy action tensor([ 0.8226, -0.4790, -0.4013, 0.3289]) tensor([0.4594, 0.1250, 0.1351, 0.2804]) -Greedy action tensor([ 2.3774, -1.1946, -0.3583, 0.3420]) tensor([0.8173, 0.0230, 0.0530, 0.1068]) -Greedy action tensor([ 1.6054, -0.5712, -0.6647, 0.8751]) tensor([0.5887, 0.0668, 0.0608, 0.2837]) -Greedy action tensor([ 1.7470, 0.1525, -0.3841, 0.2370]) tensor([0.6483, 0.1316, 0.0769, 0.1432]) -Greedy action tensor([ 1.3180, -0.6130, -0.0895, 0.2023]) tensor([0.5822, 0.0844, 0.1425, 0.1908]) -Greedy action tensor([ 0.9238, -0.1186, -0.0799, -0.0470]) tensor([0.4767, 0.1681, 0.1747, 0.1805]) -Greedy action tensor([ 1.1192, -0.2725, -0.3466, 0.3488]) tensor([0.5148, 0.1280, 0.1189, 0.2383]) -Greedy action tensor([ 1.5517, -0.6009, -0.3281, 0.5558]) tensor([0.6104, 0.0709, 0.0932, 0.2255]) -Greedy action tensor([ 2.0264, -0.4012, -0.5134, 0.6188]) tensor([0.7083, 0.0625, 0.0559, 0.1733]) -Greedy action tensor([ 0.8142, -0.2181, -0.4046, 0.7813]) tensor([0.3818, 0.1360, 0.1128, 0.3694]) -Greedy action tensor([ 2.1634, -0.8391, -0.4830, 0.4117]) tensor([0.7728, 0.0384, 0.0548, 0.1341]) -Greedy action tensor([ 0.7043, -0.3482, -0.2398, 0.0352]) tensor([0.4444, 0.1551, 0.1729, 0.2276]) -Greedy action tensor([0.9252, 0.0644, 0.1956, 0.2340]) tensor([0.4157, 0.1757, 0.2004, 0.2082]) -Greedy action tensor([ 1.5210, -0.4024, -0.6372, 0.4021]) tensor([0.6296, 0.0920, 0.0727, 0.2057]) -Greedy action tensor([ 1.8096, -0.8154, -0.1707, 0.7855]) tensor([0.6371, 0.0461, 0.0879, 0.2288]) -Greedy action tensor([ 1.0078, -0.3046, -0.3708, 0.1448]) tensor([0.5147, 0.1385, 0.1297, 0.2171]) -Greedy action tensor([ 1.3880, -0.3913, 1.8578, 0.0210]) tensor([0.3308, 0.0558, 0.5291, 0.0843]) -Greedy action tensor([ 1.4070, -0.0215, -0.2862, 1.4598]) tensor([0.4036, 0.0967, 0.0742, 0.4254]) -Greedy action tensor([ 0.8491, -0.6743, 1.9150, 0.4128]) tensor([0.2097, 0.0457, 0.6090, 0.1356]) -Greedy action tensor([-0.2589, -1.3035, 0.2611, 1.4145]) tensor([0.1196, 0.0421, 0.2011, 0.6373]) -Greedy action tensor([ 0.5981, -1.5663, -0.4884, 0.3113]) tensor([0.4539, 0.0521, 0.1532, 0.3408]) -Greedy action tensor([-1.0283, -2.2503, 0.0608, 0.1232]) tensor([0.1346, 0.0397, 0.4000, 0.4257]) -Greedy action tensor([ 1.0688, -0.9823, 0.3212, -0.0963]) tensor([0.5225, 0.0672, 0.2474, 0.1629]) -Greedy action tensor([ 0.6711, -0.1366, 0.4245, 0.1845]) tensor([0.3519, 0.1569, 0.2750, 0.2163]) -Greedy action tensor([ 0.4602, 0.3457, -0.7606, 0.4784]) tensor([0.3120, 0.2782, 0.0920, 0.3177]) -Greedy action tensor([ 0.9593, 0.6169, -0.5902, 0.4400]) tensor([0.3972, 0.2821, 0.0844, 0.2363]) -Greedy action tensor([ 0.3414, -1.1841, 0.1578, 0.0953]) tensor([0.3532, 0.0768, 0.2939, 0.2761]) -Greedy action tensor([-0.5427, -1.2830, 0.1772, 0.9037]) tensor([0.1286, 0.0613, 0.2641, 0.5461]) -Greedy action tensor([ 0.2950, -1.0986, -0.8462, 0.1305]) tensor([0.4139, 0.1027, 0.1322, 0.3511]) -Greedy action tensor([-0.4748, -0.0224, -0.5498, 0.4833]) tensor([0.1638, 0.2574, 0.1519, 0.4269]) -Greedy action tensor([-0.3024, 0.4471, 0.4482, 0.9425]) tensor([0.1149, 0.2430, 0.2433, 0.3988]) -Greedy action tensor([-0.8471, -1.4429, 0.8007, -0.1318]) tensor([0.1137, 0.0627, 0.5910, 0.2326]) -Greedy action tensor([-0.4505, 0.2576, 0.1782, -1.2079]) tensor([0.1861, 0.3778, 0.3489, 0.0872]) -Greedy action tensor([ 0.8855, -0.6865, -0.5925, 0.6675]) tensor([0.4465, 0.0927, 0.1018, 0.3590]) -Greedy action tensor([-1.7350, -0.0990, -1.1659, -0.5348]) tensor([0.0891, 0.4575, 0.1574, 0.2959]) -Greedy action tensor([-0.5423, -0.8456, -0.8415, 0.4160]) tensor([0.1966, 0.1452, 0.1457, 0.5125]) -Greedy action tensor([-0.3601, -0.3252, -0.4824, -0.6343]) tensor([0.2717, 0.2813, 0.2404, 0.2065]) -Greedy action tensor([-0.1611, -2.2192, -0.2569, 0.2353]) tensor([0.2839, 0.0362, 0.2579, 0.4219]) -Greedy action tensor([ 0.9776, -1.1176, 0.4769, 0.2680]) tensor([0.4502, 0.0554, 0.2729, 0.2215]) -Greedy action tensor([ 1.4635, -1.9312, 0.0648, 0.7957]) tensor([0.5576, 0.0187, 0.1377, 0.2860]) -Greedy action tensor([ 0.1847, 0.5458, 0.1148, -0.6632]) tensor([0.2635, 0.3780, 0.2457, 0.1128]) -Greedy action tensor([ 0.7919, 0.0726, 1.1210, -0.0188]) tensor([0.3011, 0.1467, 0.4184, 0.1338]) -Greedy action tensor([ 0.4281, -1.8878, -1.1293, -0.0949]) tensor([0.5257, 0.0519, 0.1108, 0.3116]) -Greedy action tensor([-0.5787, 0.3974, 2.1796, -0.9804]) tensor([0.0498, 0.1321, 0.7849, 0.0333]) -Greedy action tensor([ 1.5232, -1.0162, 1.8724, 0.7539]) tensor([0.3378, 0.0267, 0.4790, 0.1565]) -Greedy action tensor([ 1.2422, -1.4439, 1.0926, 1.7526]) tensor([0.2782, 0.0190, 0.2395, 0.4634]) -Greedy action tensor([-0.0182, 0.6220, 0.9615, -0.0076]) tensor([0.1522, 0.2887, 0.4053, 0.1538]) -Greedy action tensor([ 0.0207, 0.0318, -0.4864, -0.7069]) tensor([0.3230, 0.3265, 0.1945, 0.1560]) -Greedy action tensor([ 0.0481, -2.7116, -0.1598, 1.5703]) tensor([0.1549, 0.0098, 0.1258, 0.7096]) -Greedy action tensor([-0.9342, -0.9953, 2.0779, -0.4183]) tensor([0.0418, 0.0393, 0.8490, 0.0700]) -Greedy action tensor([ 0.2246, -1.3607, 0.5248, 1.5477]) tensor([0.1585, 0.0325, 0.2140, 0.5951]) -Greedy action tensor([ 1.4448, -0.7224, 1.7318, 0.9355]) tensor([0.3281, 0.0376, 0.4372, 0.1972]) -Greedy action tensor([ 0.2172, -0.5966, 0.7931, -0.0737]) tensor([0.2519, 0.1116, 0.4481, 0.1883]) -Greedy action tensor([ 0.1077, -0.5816, -0.5362, -1.2842]) tensor([0.4394, 0.2206, 0.2308, 0.1092]) -Greedy action tensor([ 0.2144, -1.5465, -0.3340, -0.2056]) tensor([0.4155, 0.0714, 0.2401, 0.2730]) -Greedy action tensor([-0.1497, -0.7911, 1.2561, 1.7193]) tensor([0.0827, 0.0436, 0.3375, 0.5362]) -Greedy action tensor([ 1.5497, -0.6535, 1.3783, 0.1583]) tensor([0.4542, 0.0502, 0.3827, 0.1130]) -Greedy action tensor([-0.5065, -1.4754, -0.1655, 0.9991]) tensor([0.1371, 0.0520, 0.1928, 0.6180]) -Greedy action tensor([ 1.1750, 0.2619, -0.0403, 0.5264]) tensor([0.4503, 0.1807, 0.1336, 0.2354]) -Greedy action tensor([ 0.7809, -0.2915, 1.2478, 0.9534]) tensor([0.2424, 0.0829, 0.3866, 0.2880]) -Greedy action tensor([ 0.7351, -1.8667, 0.4002, 0.5092]) tensor([0.3865, 0.0287, 0.2765, 0.3083]) -Greedy action tensor([-0.6138, -1.1219, -0.9300, -0.2298]) tensor([0.2632, 0.1584, 0.1919, 0.3865]) -Greedy action tensor([-0.8319, -0.8009, -0.6782, -0.0544]) tensor([0.1861, 0.1920, 0.2170, 0.4049]) -Greedy action tensor([-0.4194, -1.1266, 0.5926, 0.3742]) tensor([0.1549, 0.0764, 0.4262, 0.3426]) -Greedy action tensor([ 0.2973, -0.0872, -0.8634, -0.4433]) tensor([0.4047, 0.2755, 0.1268, 0.1930]) -Greedy action tensor([ 0.6669, -1.2511, 0.9794, 0.6651]) tensor([0.2847, 0.0418, 0.3892, 0.2842]) -Greedy action tensor([-0.3257, -0.4898, 1.2750, -0.8520]) tensor([0.1352, 0.1147, 0.6702, 0.0799]) -Greedy action tensor([ 1.0002, -0.6529, 0.9698, 0.3039]) tensor([0.3760, 0.0720, 0.3647, 0.1874]) -Greedy action tensor([ 0.0731, 1.0030, -0.0051, -0.3676]) tensor([0.1960, 0.4966, 0.1812, 0.1261]) -Greedy action tensor([0.9344, 0.5607, 0.6434, 1.4361]) tensor([0.2447, 0.1684, 0.1829, 0.4041]) -Greedy action tensor([ 1.7569, -0.3058, 0.4095, -0.4119]) tensor([0.6661, 0.0847, 0.1731, 0.0761]) -Greedy action tensor([-0.0341, -0.2683, -1.2177, 0.6713]) tensor([0.2426, 0.1919, 0.0743, 0.4912]) -Greedy action tensor([ 1.2144, -0.8466, 0.1455, 0.5189]) tensor([0.5077, 0.0646, 0.1744, 0.2533]) -Greedy action tensor([ 0.6268, -0.2699, -0.1557, 0.1073]) tensor([0.4065, 0.1658, 0.1859, 0.2418]) -Greedy action tensor([0.3110, 0.1875, 0.6421, 0.0981]) tensor([0.2448, 0.2164, 0.3409, 0.1979]) -Greedy action tensor([-1.2621, -0.5629, -0.5070, -1.5151]) tensor([0.1690, 0.3401, 0.3597, 0.1312]) -Greedy action tensor([ 0.7591, -0.4648, 0.2764, 0.1516]) tensor([0.4072, 0.1197, 0.2513, 0.2218]) -Greedy action tensor([-0.3805, 0.0329, 0.9680, -0.9369]) tensor([0.1442, 0.2180, 0.5552, 0.0826]) -Greedy action tensor([-0.4542, 0.5684, 1.4735, -1.4251]) tensor([0.0906, 0.2520, 0.6230, 0.0343]) -Greedy action tensor([ 1.2480, -0.8810, 0.2574, 0.7987]) tensor([0.4698, 0.0559, 0.1745, 0.2998]) -Greedy action tensor([ 1.5317, -2.2348, -0.9961, 0.0251]) tensor([0.7549, 0.0175, 0.0603, 0.1673]) -Greedy action tensor([ 0.5259, -0.7925, 1.4892, 0.0477]) tensor([0.2219, 0.0594, 0.5813, 0.1375]) -Greedy action tensor([ 0.3240, 0.3335, -0.0483, -0.8682]) tensor([0.3331, 0.3363, 0.2296, 0.1011]) -Greedy action tensor([-0.7533, -1.3954, 1.3979, 0.5031]) tensor([0.0733, 0.0386, 0.6304, 0.2576]) -Greedy action tensor([ 1.1976, -0.3333, 0.6168, -0.3016]) tensor([0.5002, 0.1082, 0.2799, 0.1117]) -Greedy action tensor([ 1.1504, 0.1975, -0.2699, 0.4701]) tensor([0.4687, 0.1807, 0.1132, 0.2374]) -Greedy action tensor([-0.0555, 0.0245, -0.1575, -0.3785]) tensor([0.2695, 0.2920, 0.2434, 0.1951]) -Greedy action tensor([-0.7558, -1.2488, 0.6000, -0.3284]) tensor([0.1424, 0.0870, 0.5524, 0.2183]) -Greedy action tensor([ 1.1876, -0.7811, 0.9128, -0.2587]) tensor([0.4684, 0.0654, 0.3559, 0.1103]) -Greedy action tensor([ 0.4651, 0.6561, 0.4643, -0.3150]) tensor([0.2726, 0.3300, 0.2724, 0.1250]) -Greedy action tensor([ 0.1451, -0.6957, 0.8044, 0.6684]) tensor([0.1979, 0.0854, 0.3827, 0.3340]) -Greedy action tensor([ 0.9222, -0.9249, 1.3154, -0.3362]) tensor([0.3421, 0.0539, 0.5068, 0.0972]) -Greedy action tensor([-0.5750, -1.4241, 0.1117, 1.2200]) tensor([0.1060, 0.0453, 0.2106, 0.6380]) -Greedy action tensor([ 0.3826, -0.1408, 0.9850, 1.2831]) tensor([0.1701, 0.1008, 0.3106, 0.4185]) -Greedy action tensor([ 0.6025, -1.0717, -0.2536, 0.1228]) tensor([0.4482, 0.0840, 0.1904, 0.2774]) -Greedy action tensor([ 1.5139, -1.2498, 0.4530, 1.6089]) tensor([0.3986, 0.0251, 0.1380, 0.4383]) -Greedy action tensor([-0.2823, -1.9944, -0.4067, 0.3586]) tensor([0.2524, 0.0456, 0.2229, 0.4791]) -Greedy action tensor([ 0.3657, -1.1216, 0.2182, 0.1371]) tensor([0.3467, 0.0783, 0.2991, 0.2758]) -Greedy action tensor([-1.8038, -0.2462, 0.5980, -0.0721]) tensor([0.0446, 0.2115, 0.4921, 0.2518]) -Greedy action tensor([-1.9684, -0.5652, 0.8516, 0.0378]) tensor([0.0342, 0.1389, 0.5730, 0.2539]) -Greedy action tensor([-1.2093, -0.6601, 0.3574, 0.3136]) tensor([0.0826, 0.1430, 0.3957, 0.3787]) -Greedy action tensor([-1.2406, -0.5465, 0.2802, 0.3295]) tensor([0.0807, 0.1616, 0.3695, 0.3881]) -Greedy action tensor([-1.6965, -0.4381, 0.5404, -0.0397]) tensor([0.0523, 0.1840, 0.4896, 0.2741]) -Greedy action tensor([-1.3271, -0.6185, 0.3849, 0.3046]) tensor([0.0731, 0.1484, 0.4049, 0.3736]) -Greedy action tensor([-0.7418, 0.6164, 0.1471, 0.0481]) tensor([0.1050, 0.4083, 0.2554, 0.2313]) -Greedy action tensor([-1.3821, -0.5135, 0.8462, 0.7771]) tensor([0.0469, 0.1117, 0.4352, 0.4062]) -Greedy action tensor([-1.0257, -0.5943, 0.0782, 0.5699]) tensor([0.0954, 0.1468, 0.2876, 0.4702]) -Greedy action tensor([-1.3220, -0.5728, 1.2452, 1.2350]) tensor([0.0344, 0.0728, 0.4486, 0.4441]) -Greedy action tensor([-1.9074, -0.3514, 0.6458, -0.1459]) tensor([0.0410, 0.1942, 0.5264, 0.2385]) -Greedy action tensor([-1.6789, -0.5205, 0.5245, -0.0162]) tensor([0.0540, 0.1720, 0.4891, 0.2848]) -Greedy action tensor([-1.8981, -0.4540, 0.6449, -0.1607]) tensor([0.0423, 0.1793, 0.5380, 0.2404]) -Greedy action tensor([-1.8106, -0.3218, 0.5699, -0.1118]) tensor([0.0461, 0.2041, 0.4980, 0.2518]) -Greedy action tensor([-1.8120, -0.4497, 0.8402, 0.2152]) tensor([0.0375, 0.1464, 0.5316, 0.2845]) -Greedy action tensor([-1.8846, -0.4347, 0.6336, -0.1330]) tensor([0.0427, 0.1819, 0.5294, 0.2460]) -Greedy action tensor([-1.5622, 0.2534, 0.3773, 0.1835]) tensor([0.0504, 0.3099, 0.3507, 0.2890]) -Greedy action tensor([-1.4242, -0.3090, 0.6841, 0.5428]) tensor([0.0515, 0.1569, 0.4237, 0.3679]) -Greedy action tensor([-1.8162, -0.4713, 0.5991, -0.1029]) tensor([0.0463, 0.1778, 0.5187, 0.2571]) -Greedy action tensor([-1.8531, -0.4133, 0.6133, -0.1206]) tensor([0.0441, 0.1863, 0.5200, 0.2496]) -Greedy action tensor([-0.8534, -0.1777, 0.2248, 0.4467]) tensor([0.1044, 0.2053, 0.3070, 0.3833]) -Greedy action tensor([-1.6906, -0.5223, 0.5293, -0.0086]) tensor([0.0532, 0.1711, 0.4897, 0.2860]) -Greedy action tensor([-1.7123, -0.4787, 0.7104, 0.2636]) tensor([0.0436, 0.1498, 0.4919, 0.3147]) -Greedy action tensor([-0.7164, -0.3507, -1.0099, -0.5235]) tensor([0.2273, 0.3276, 0.1695, 0.2756]) -Greedy action tensor([-1.6058, -0.5038, 0.4968, 0.0353]) tensor([0.0576, 0.1734, 0.4717, 0.2973]) -Greedy action tensor([-1.5094, -0.5574, 0.4279, 0.0853]) tensor([0.0647, 0.1676, 0.4490, 0.3187]) -Greedy action tensor([-1.8131, -0.4976, 0.6276, -0.0783]) tensor([0.0457, 0.1704, 0.5248, 0.2591]) -Greedy action tensor([-0.9406, -0.1536, 0.2677, -0.0843]) tensor([0.1124, 0.2468, 0.3762, 0.2646]) -Greedy action tensor([-1.6650, -0.4355, 0.5253, -0.0224]) tensor([0.0540, 0.1846, 0.4824, 0.2790]) -Greedy action tensor([-1.8412, -0.4228, 0.6228, -0.1017]) tensor([0.0443, 0.1829, 0.5205, 0.2522]) -Greedy action tensor([-0.8645, -0.6075, 0.4080, 0.4101]) tensor([0.1059, 0.1370, 0.3782, 0.3790]) -Greedy action tensor([-1.7734, -0.2735, 0.5668, -0.0531]) tensor([0.0466, 0.2089, 0.4841, 0.2604]) -Greedy action tensor([-1.8524, -0.4880, 0.6185, -0.1347]) tensor([0.0448, 0.1753, 0.5302, 0.2497]) -Greedy action tensor([-0.8975, -0.5520, 0.3775, 0.9323]) tensor([0.0818, 0.1156, 0.2928, 0.5099]) -Greedy action tensor([-1.6991, -0.1600, 0.5884, 0.1734]) tensor([0.0454, 0.2117, 0.4474, 0.2955]) -Greedy action tensor([-1.8594, -0.4017, 0.6164, -0.1460]) tensor([0.0440, 0.1890, 0.5230, 0.2440]) -Greedy action tensor([-1.8083, -0.4157, 0.5937, -0.1240]) tensor([0.0466, 0.1876, 0.5147, 0.2511]) -Greedy action tensor([-1.7834, -0.5001, 0.6228, -0.0324]) tensor([0.0466, 0.1681, 0.5168, 0.2684]) -Greedy action tensor([-1.8946, -0.4250, 0.6394, -0.1570]) tensor([0.0423, 0.1840, 0.5333, 0.2405]) -Greedy action tensor([-1.3159, -0.5688, 0.3484, 0.1262]) tensor([0.0792, 0.1672, 0.4184, 0.3351]) -Greedy action tensor([-1.7554, -0.7053, 0.2491, -0.4136]) tensor([0.0662, 0.1892, 0.4913, 0.2533]) -Greedy action tensor([-1.9413, -0.4502, 0.6664, -0.1777]) tensor([0.0403, 0.1788, 0.5461, 0.2348]) -Greedy action tensor([-1.8095, -0.4686, 0.6572, -0.0363]) tensor([0.0445, 0.1699, 0.5238, 0.2618]) -Greedy action tensor([-1.8547, -0.4376, 0.6170, -0.1319]) tensor([0.0443, 0.1828, 0.5247, 0.2482]) -Greedy action tensor([-1.2653, -0.5926, 0.3057, 0.2598]) tensor([0.0809, 0.1584, 0.3891, 0.3716]) -Greedy action tensor([-1.6920, -0.5059, 0.5689, 0.0404]) tensor([0.0512, 0.1677, 0.4914, 0.2897]) -Greedy action tensor([-1.0545, -0.5839, 0.1932, 0.3902]) tensor([0.0969, 0.1551, 0.3373, 0.4108]) -Greedy action tensor([-1.9085, -0.4422, 0.6473, -0.1570]) tensor([0.0417, 0.1807, 0.5372, 0.2404]) -Greedy action tensor([-1.5632, -0.2692, 0.4564, 0.0593]) tensor([0.0580, 0.2115, 0.4369, 0.2937]) -Greedy action tensor([-1.0699, -0.7578, 1.0627, 1.2147]) tensor([0.0485, 0.0662, 0.4091, 0.4762]) -Greedy action tensor([-0.9413, 0.3579, 0.4329, 0.9754]) tensor([0.0649, 0.2378, 0.2563, 0.4410]) -Greedy action tensor([-1.9245, -0.4474, 0.6552, -0.1722]) tensor([0.0411, 0.1799, 0.5420, 0.2370]) -Greedy action tensor([-1.6325, -0.1127, 0.4534, 0.0023]) tensor([0.0533, 0.2438, 0.4294, 0.2735]) -Greedy action tensor([-0.2007, -0.6445, 0.5846, 0.6541]) tensor([0.1617, 0.1037, 0.3545, 0.3801]) -Greedy action tensor([-1.6363, 0.2337, 0.4268, -0.0809]) tensor([0.0498, 0.3229, 0.3916, 0.2357]) -Greedy action tensor([-1.9181, -0.4420, 0.6549, -0.1666]) tensor([0.0412, 0.1805, 0.5405, 0.2377]) -Greedy action tensor([-1.8635, -0.4009, 0.6192, -0.1462]) tensor([0.0437, 0.1889, 0.5238, 0.2436]) -Greedy action tensor([-1.5944, -0.4339, 0.5398, -0.2049]) tensor([0.0600, 0.1916, 0.5074, 0.2409]) -Greedy action tensor([-0.4286, -0.5897, 0.3635, 0.4336]) tensor([0.1556, 0.1324, 0.3435, 0.3685]) -Greedy action tensor([-1.5936, -0.4316, 0.4704, -0.0025]) tensor([0.0589, 0.1882, 0.4639, 0.2891]) -Greedy action tensor([-1.8598, -0.5242, 0.7017, 0.0321]) tensor([0.0410, 0.1559, 0.5312, 0.2719]) -Greedy action tensor([-1.9055, -0.3654, 0.6541, -0.1431]) tensor([0.0409, 0.1910, 0.5294, 0.2386]) -Greedy action tensor([-1.7548, -0.4610, 0.6404, 0.1326]) tensor([0.0450, 0.1641, 0.4937, 0.2971]) -Greedy action tensor([-1.8716, -0.4749, 0.6413, -0.1160]) tensor([0.0432, 0.1745, 0.5326, 0.2498]) -Greedy action tensor([-1.5260, -0.5640, 0.4390, 0.1390]) tensor([0.0624, 0.1632, 0.4449, 0.3296]) -Greedy action tensor([-1.8276, -0.4145, 0.6007, -0.0880]) tensor([0.0452, 0.1856, 0.5121, 0.2572]) -Greedy action tensor([-0.6889, -0.2125, 0.7187, 1.1978]) tensor([0.0752, 0.1211, 0.3074, 0.4963]) -Greedy action tensor([-0.6598, 0.1113, 0.1515, 0.0171]) tensor([0.1355, 0.2929, 0.3050, 0.2666]) -Greedy action tensor([-1.9016, -0.4406, 0.6419, -0.1596]) tensor([0.0421, 0.1815, 0.5359, 0.2404]) -Greedy action tensor([-0.3825, 0.9083, -0.7903, -0.1495]) tensor([0.1524, 0.5540, 0.1013, 0.1923]) -Greedy action tensor([-1.7063, -0.4467, 0.5233, -0.0444]) tensor([0.0524, 0.1846, 0.4870, 0.2760]) -Greedy action tensor([-1.7315, -0.5019, 0.5562, -0.0125]) tensor([0.0504, 0.1723, 0.4963, 0.2810]) -Greedy action tensor([-1.9193, -0.4454, 0.6523, -0.1617]) tensor([0.0412, 0.1800, 0.5396, 0.2391]) -Greedy action tensor([-1.8847, -0.4483, 0.6738, -0.1101]) tensor([0.0416, 0.1751, 0.5377, 0.2455]) -Greedy action tensor([-1.2879, -0.4988, 0.4221, 0.4645]) tensor([0.0690, 0.1518, 0.3813, 0.3979]) -Greedy action tensor([-1.6056, -0.4924, 0.5687, 0.1473]) tensor([0.0537, 0.1636, 0.4726, 0.3101]) -Greedy action tensor([-1.8526, -0.2668, 0.6027, -0.1048]) tensor([0.0430, 0.2098, 0.5005, 0.2467]) -Greedy action tensor([-1.4585, -0.0610, 0.4296, -0.0861]) tensor([0.0641, 0.2594, 0.4236, 0.2529]) -Greedy action tensor([-0.9128, -0.5896, 0.2760, 0.0442]) tensor([0.1209, 0.1671, 0.3971, 0.3149]) -Greedy action tensor([-1.4865, -0.2133, 0.5969, 0.6913]) tensor([0.0467, 0.1667, 0.3748, 0.4119]) -Greedy action tensor([-1.6732, -0.4619, 0.5171, -0.0440]) tensor([0.0544, 0.1825, 0.4859, 0.2772]) -Greedy action tensor([ 0.3952, -0.2443, 0.0076, -0.3230]) tensor([0.3712, 0.1958, 0.2519, 0.1810]) -Greedy action tensor([ 0.6848, -0.8066, 0.0184, -0.2239]) tensor([0.4669, 0.1051, 0.2398, 0.1882]) -Greedy action tensor([ 0.4848, -0.4383, -0.1253, -0.4861]) tensor([0.4312, 0.1713, 0.2343, 0.1633]) -Greedy action tensor([ 0.5565, -0.4860, -0.0400, -0.2997]) tensor([0.4296, 0.1514, 0.2366, 0.1825]) -Greedy action tensor([ 1.0787, -0.6339, -0.0360, -0.4104]) tensor([0.5767, 0.1040, 0.1892, 0.1301]) -Greedy action tensor([ 0.4830, -0.0151, -0.1664, -0.0489]) tensor([0.3680, 0.2236, 0.1922, 0.2162]) -Greedy action tensor([ 0.7537, -0.2453, -0.0464, -0.4914]) tensor([0.4750, 0.1749, 0.2134, 0.1367]) -Greedy action tensor([ 0.6168, 0.0158, -0.2081, -0.2629]) tensor([0.4164, 0.2283, 0.1825, 0.1728]) -Greedy action tensor([ 0.7110, -0.2860, -0.0095, -0.3462]) tensor([0.4540, 0.1675, 0.2209, 0.1577]) -Greedy action tensor([ 1.2062, -0.4590, 0.1287, -0.5449]) tensor([0.5871, 0.1111, 0.1999, 0.1019]) -Greedy action tensor([ 0.4383, 0.0068, -0.0301, -0.3208]) tensor([0.3645, 0.2367, 0.2282, 0.1706]) -Greedy action tensor([ 0.3308, -0.0110, -0.0832, -0.5055]) tensor([0.3565, 0.2533, 0.2357, 0.1545]) -Greedy action tensor([ 0.6979, -0.4475, -0.0517, -0.3724]) tensor([0.4687, 0.1491, 0.2215, 0.1607]) -Greedy action tensor([ 0.6089, -0.5720, 0.0612, -0.5454]) tensor([0.4544, 0.1395, 0.2628, 0.1433]) -Greedy action tensor([ 0.8042, -0.1514, -0.2350, -0.4754]) tensor([0.4959, 0.1907, 0.1754, 0.1379]) -Greedy action tensor([ 1.0589, -0.7722, -0.0993, -0.6910]) tensor([0.6068, 0.0972, 0.1906, 0.1055]) -Greedy action tensor([ 0.8157, -0.6059, -0.0113, -0.5109]) tensor([0.5144, 0.1241, 0.2250, 0.1365]) -Greedy action tensor([0.2425, 0.0082, 0.0687, 0.0946]) tensor([0.2862, 0.2264, 0.2405, 0.2469]) -Greedy action tensor([ 0.7664, -0.4015, -0.1317, -0.1902]) tensor([0.4756, 0.1479, 0.1937, 0.1827]) -Greedy action tensor([ 0.4628, 0.1131, -0.1385, -0.2875]) tensor([0.3670, 0.2587, 0.2011, 0.1733]) -Greedy action tensor([ 0.7210, -0.5383, 0.2543, -0.5827]) tensor([0.4582, 0.1301, 0.2873, 0.1244]) -Greedy action tensor([ 0.5161, -0.1281, -0.0224, -0.2427]) tensor([0.3880, 0.2038, 0.2265, 0.1817]) -Greedy action tensor([ 0.3820, -0.4666, -0.0913, -0.2175]) tensor([0.3846, 0.1646, 0.2396, 0.2112]) -Greedy action tensor([ 0.6235, 0.2582, -0.2410, -0.1384]) tensor([0.3873, 0.2688, 0.1632, 0.1808]) -Greedy action tensor([ 0.6742, -0.0872, 0.0973, -0.5123]) tensor([0.4285, 0.2001, 0.2406, 0.1308]) -Greedy action tensor([ 0.6444, -0.3541, -0.1159, -0.3829]) tensor([0.4558, 0.1679, 0.2131, 0.1632]) -Greedy action tensor([ 0.6040, -0.2439, -0.0101, -0.3294]) tensor([0.4232, 0.1813, 0.2290, 0.1664]) -Greedy action tensor([ 0.8512, -0.6326, -0.1291, -0.2614]) tensor([0.5179, 0.1175, 0.1943, 0.1703]) -Greedy action tensor([ 0.4807, -0.1668, 0.1181, -0.5566]) tensor([0.3886, 0.2034, 0.2704, 0.1377]) -Greedy action tensor([ 0.8442, -0.4721, 0.0757, -0.4822]) tensor([0.5007, 0.1342, 0.2322, 0.1329]) -Greedy action tensor([ 0.4269, -0.1855, -0.0999, -0.2343]) tensor([0.3775, 0.2046, 0.2229, 0.1949]) -Greedy action tensor([ 1.1765, -0.9538, 0.1231, -0.8348]) tensor([0.6245, 0.0742, 0.2178, 0.0836]) -Greedy action tensor([ 1.1140, -0.3444, -0.0858, -0.4281]) tensor([0.5721, 0.1331, 0.1724, 0.1224]) -Greedy action tensor([ 0.4020, 0.1224, -0.1122, -0.1748]) tensor([0.3430, 0.2593, 0.2051, 0.1926]) -Greedy action tensor([ 0.3194, 0.0869, -0.1433, -0.1604]) tensor([0.3288, 0.2606, 0.2070, 0.2035]) -Greedy action tensor([ 0.2442, 0.0049, -0.1398, -0.1110]) tensor([0.3155, 0.2484, 0.2149, 0.2212]) -Greedy action tensor([ 0.8861, -0.4843, -0.0609, -0.3962]) tensor([0.5210, 0.1323, 0.2021, 0.1445]) -Greedy action tensor([ 0.3651, 0.3558, -0.0895, -0.0914]) tensor([0.3068, 0.3040, 0.1948, 0.1944]) -Greedy action tensor([ 0.7443, -0.2061, -0.0761, -0.4534]) tensor([0.4698, 0.1816, 0.2068, 0.1418]) -Greedy action tensor([ 0.6639, -0.3620, 0.1145, -0.5465]) tensor([0.4477, 0.1605, 0.2584, 0.1334]) -Greedy action tensor([ 1.4260, -0.9230, 0.1272, -0.6081]) tensor([0.6671, 0.0637, 0.1820, 0.0872]) -Greedy action tensor([ 0.3862, 0.1977, -0.1854, -0.1584]) tensor([0.3364, 0.2786, 0.1899, 0.1951]) -Greedy action tensor([ 0.3845, -0.0460, -0.2716, -0.6270]) tensor([0.3948, 0.2567, 0.2049, 0.1436]) -Greedy action tensor([ 0.3699, -0.0432, 0.0461, -0.2016]) tensor([0.3390, 0.2243, 0.2452, 0.1914]) -Greedy action tensor([ 1.0587e+00, -9.5344e-04, -4.2092e-02, -4.4576e-01]) tensor([0.5259, 0.1823, 0.1749, 0.1168]) -Greedy action tensor([ 0.8072, -0.7255, 0.1023, -0.4485]) tensor([0.5012, 0.1083, 0.2477, 0.1428]) -Greedy action tensor([ 0.6997, -0.6067, 0.0718, -0.3334]) tensor([0.4629, 0.1253, 0.2470, 0.1647]) -Greedy action tensor([ 0.3255, 0.1129, -0.1810, 0.0442]) tensor([0.3159, 0.2554, 0.1903, 0.2384]) -Greedy action tensor([ 0.6152, 0.0716, -0.1296, -0.2186]) tensor([0.4016, 0.2332, 0.1907, 0.1745]) -Greedy action tensor([ 0.3617, -0.0983, 0.0559, -0.3763]) tensor([0.3514, 0.2218, 0.2588, 0.1680]) -Greedy action tensor([ 0.4989, -0.2550, -0.1566, -0.3359]) tensor([0.4126, 0.1941, 0.2142, 0.1790]) -Greedy action tensor([ 0.5362, 0.0619, 0.0267, -0.3953]) tensor([0.3821, 0.2378, 0.2296, 0.1505]) -Greedy action tensor([ 0.5808, 0.1340, -0.1307, -0.4326]) tensor([0.4010, 0.2565, 0.1969, 0.1456]) -Greedy action tensor([ 0.5704, -0.2815, 0.0029, -0.1629]) tensor([0.4042, 0.1724, 0.2292, 0.1941]) -Greedy action tensor([ 0.5840, -0.2004, -0.0285, -0.1070]) tensor([0.4001, 0.1826, 0.2169, 0.2005]) -Greedy action tensor([ 0.9094, -0.6466, -0.1282, -0.4946]) tensor([0.5522, 0.1165, 0.1956, 0.1356]) -Greedy action tensor([ 0.8727, -1.1402, -0.0466, -0.4456]) tensor([0.5555, 0.0742, 0.2216, 0.1487]) -Greedy action tensor([ 0.5060, -0.1234, -0.0061, -0.1949]) tensor([0.3805, 0.2028, 0.2280, 0.1888]) -Greedy action tensor([ 0.7746, -0.5410, 0.0821, -0.3124]) tensor([0.4749, 0.1274, 0.2376, 0.1601]) -Greedy action tensor([ 0.1191, 0.2291, -0.0836, -0.3722]) tensor([0.2821, 0.3149, 0.2304, 0.1726]) -Greedy action tensor([ 0.2477, -0.0134, -0.0461, -0.0033]) tensor([0.3036, 0.2338, 0.2263, 0.2362]) -Greedy action tensor([ 0.5713, -0.3348, -0.0947, -0.4779]) tensor([0.4409, 0.1782, 0.2265, 0.1544]) -Greedy action tensor([ 0.6144, -0.3341, -0.1129, -0.2821]) tensor([0.4389, 0.1700, 0.2121, 0.1791]) -Greedy action tensor([ 0.4947, -0.3026, 0.0226, -0.1173]) tensor([0.3822, 0.1722, 0.2384, 0.2073]) -Greedy action tensor([ 0.3233, 0.3201, -0.0282, -0.2006]) tensor([0.3037, 0.3027, 0.2137, 0.1798]) -Greedy action tensor([ 0.2068, 0.0373, -0.0870, -0.0496]) tensor([0.2973, 0.2510, 0.2216, 0.2301]) -Greedy action tensor([ 0.6720, -0.5536, -0.1195, -0.4617]) tensor([0.4834, 0.1419, 0.2191, 0.1556]) -Greedy action tensor([ 0.6599, -0.2979, -0.0516, -0.1271]) tensor([0.4292, 0.1647, 0.2107, 0.1954]) -Greedy action tensor([ 0.8412, -0.8867, 0.0644, -0.5120]) tensor([0.5274, 0.0937, 0.2426, 0.1363]) -Greedy action tensor([ 0.6106, -0.2837, 0.1970, -0.6716]) tensor([0.4260, 0.1742, 0.2817, 0.1182]) -Greedy action tensor([ 0.2879, 0.0545, -0.0883, -0.2947]) tensor([0.3293, 0.2608, 0.2261, 0.1839]) -Greedy action tensor([ 0.8842, -0.5759, 0.0201, -0.3615]) tensor([0.5151, 0.1196, 0.2171, 0.1482]) -Greedy action tensor([ 1.1133, -1.0214, 0.0551, -0.7532]) tensor([0.6173, 0.0730, 0.2143, 0.0955]) -Greedy action tensor([ 0.3207, -0.0095, -0.0111, -0.2485]) tensor([0.3331, 0.2394, 0.2390, 0.1885]) -Greedy action tensor([ 0.4420, 0.1036, -0.0578, -0.2394]) tensor([0.3539, 0.2523, 0.2147, 0.1791]) -Greedy action tensor([ 0.2723, -0.2052, -0.1162, -0.2586]) tensor([0.3465, 0.2149, 0.2349, 0.2037]) -Greedy action tensor([ 0.6710, -0.4505, 0.0566, -0.3716]) tensor([0.4506, 0.1468, 0.2438, 0.1589]) -Greedy action tensor([ 0.5809, -0.3427, 0.0109, -0.2140]) tensor([0.4142, 0.1645, 0.2343, 0.1871]) -Greedy action tensor([ 0.5227, -0.1231, -0.0558, -0.2239]) tensor([0.3908, 0.2049, 0.2191, 0.1852]) -Greedy action tensor([ 0.7759, -0.5019, -0.0923, -0.4052]) tensor([0.4987, 0.1390, 0.2093, 0.1531]) -Greedy action tensor([ 0.5642, 0.0116, -0.0756, -0.4217]) tensor([0.4039, 0.2324, 0.2130, 0.1507]) -Greedy action tensor([ 1.3675, -0.0192, -0.2933, 0.2467]) tensor([0.5663, 0.1415, 0.1076, 0.1846]) -Greedy action tensor([ 1.8704, -0.2927, -0.3694, 0.3207]) tensor([0.6975, 0.0802, 0.0743, 0.1481]) -Greedy action tensor([ 1.0383, -0.1028, -0.2925, 0.4794]) tensor([0.4639, 0.1482, 0.1226, 0.2653]) -Greedy action tensor([ 0.8406, -0.4853, -0.1347, 0.3428]) tensor([0.4443, 0.1180, 0.1676, 0.2701]) -Greedy action tensor([ 0.9537, -0.2858, -0.4301, 0.1620]) tensor([0.5017, 0.1453, 0.1257, 0.2273]) -Greedy action tensor([ 0.9723, -0.3258, -0.0879, 0.1284]) tensor([0.4879, 0.1332, 0.1690, 0.2098]) -Greedy action tensor([ 1.1583, -0.7025, -0.1374, 0.1525]) tensor([0.5571, 0.0867, 0.1525, 0.2038]) -Greedy action tensor([ 1.4122, -0.6884, 0.0243, 0.1037]) tensor([0.6089, 0.0745, 0.1520, 0.1646]) -Greedy action tensor([ 1.0559, -0.5808, -0.3291, 0.3778]) tensor([0.5122, 0.0997, 0.1282, 0.2599]) -Greedy action tensor([ 1.1037, -0.1814, 0.2983, -0.1412]) tensor([0.4971, 0.1375, 0.2222, 0.1432]) -Greedy action tensor([ 1.1127, -0.5647, -0.1918, 0.4257]) tensor([0.5099, 0.0953, 0.1383, 0.2565]) -Greedy action tensor([ 0.3964, -0.1839, 0.0123, -0.1398]) tensor([0.3539, 0.1981, 0.2410, 0.2070]) -Greedy action tensor([ 1.3950, -0.2860, -0.7782, 0.3395]) tensor([0.6068, 0.1130, 0.0691, 0.2112]) -Greedy action tensor([ 1.5406, -0.8254, -0.3508, 0.1301]) tensor([0.6717, 0.0630, 0.1013, 0.1639]) -Greedy action tensor([ 1.5669, -0.5750, -0.1577, 0.7402]) tensor([0.5770, 0.0678, 0.1028, 0.2524]) -Greedy action tensor([ 1.1775, -0.2396, 0.0996, 0.0441]) tensor([0.5250, 0.1273, 0.1787, 0.1690]) -Greedy action tensor([ 1.1703, -0.4006, -0.4599, 0.3282]) tensor([0.5451, 0.1133, 0.1068, 0.2348]) -Greedy action tensor([ 1.3064, -0.4749, -0.7120, 0.2068]) tensor([0.6119, 0.1031, 0.0813, 0.2038]) -Greedy action tensor([ 1.4777, -0.2216, -0.8835, 0.3440]) tensor([0.6254, 0.1143, 0.0590, 0.2013]) -Greedy action tensor([ 1.4508, -0.6608, -0.4921, 0.3329]) tensor([0.6284, 0.0761, 0.0900, 0.2055]) -Greedy action tensor([ 0.9746, -0.0896, -0.7046, 0.2389]) tensor([0.4973, 0.1716, 0.0928, 0.2383]) -Greedy action tensor([ 1.1905, -0.5594, -0.5276, 0.6105]) tensor([0.5227, 0.0908, 0.0938, 0.2927]) -Greedy action tensor([ 1.0718, -0.1436, 0.1636, -0.4284]) tensor([0.5200, 0.1542, 0.2097, 0.1160]) -Greedy action tensor([ 1.5291, -0.5320, -0.1242, 0.1436]) tensor([0.6374, 0.0811, 0.1220, 0.1595]) -Greedy action tensor([ 2.0385, -1.2634, -0.2676, 0.7767]) tensor([0.7044, 0.0259, 0.0702, 0.1994]) -Greedy action tensor([ 0.9042, -0.4946, 0.0652, 0.0244]) tensor([0.4776, 0.1179, 0.2064, 0.1981]) -Greedy action tensor([ 1.3262, -0.4216, -0.3626, 0.1761]) tensor([0.5968, 0.1039, 0.1103, 0.1890]) -Greedy action tensor([ 1.2283, -0.1847, -0.4510, 0.0103]) tensor([0.5795, 0.1410, 0.1081, 0.1714]) -Greedy action tensor([ 1.4067, -0.7613, -0.0163, 0.4647]) tensor([0.5730, 0.0656, 0.1381, 0.2234]) -Greedy action tensor([ 1.3486, -0.1746, -0.2276, -0.0856]) tensor([0.6013, 0.1311, 0.1243, 0.1433]) -Greedy action tensor([ 1.4999, -0.5327, -0.3191, 0.5484]) tensor([0.5955, 0.0780, 0.0966, 0.2300]) -Greedy action tensor([ 1.2149, -0.4556, -0.0921, -0.0804]) tensor([0.5772, 0.1086, 0.1562, 0.1580]) -Greedy action tensor([ 0.6905, -0.2120, -0.0805, 0.1499]) tensor([0.4081, 0.1655, 0.1888, 0.2377]) -Greedy action tensor([ 1.8060, -0.7805, -0.4218, 0.5105]) tensor([0.6864, 0.0517, 0.0740, 0.1879]) -Greedy action tensor([ 1.4660, -0.0483, -0.4791, 0.2482]) tensor([0.6028, 0.1326, 0.0862, 0.1784]) -Greedy action tensor([ 1.4420, -0.2498, -0.1073, 0.3154]) tensor([0.5812, 0.1070, 0.1234, 0.1884]) -Greedy action tensor([ 0.8941, -0.4336, -0.6401, 0.7272]) tensor([0.4297, 0.1139, 0.0927, 0.3637]) -Greedy action tensor([ 0.9485, -0.6801, -0.4467, 0.3239]) tensor([0.5052, 0.0991, 0.1252, 0.2705]) -Greedy action tensor([ 1.4584, -0.6939, -0.2509, 0.5033]) tensor([0.5945, 0.0691, 0.1076, 0.2288]) -Greedy action tensor([ 0.8950, -0.1835, -0.0572, 0.1544]) tensor([0.4540, 0.1544, 0.1752, 0.2165]) -Greedy action tensor([ 0.7884, -0.0097, -0.6156, 0.0285]) tensor([0.4622, 0.2081, 0.1135, 0.2162]) -Greedy action tensor([ 1.4177, -0.5537, -0.3178, 0.4761]) tensor([0.5863, 0.0816, 0.1034, 0.2287]) -Greedy action tensor([ 0.9275, -0.2887, -0.0676, -0.0217]) tensor([0.4871, 0.1443, 0.1801, 0.1885]) -Greedy action tensor([ 1.5976, -0.5181, -0.4381, 0.4767]) tensor([0.6341, 0.0764, 0.0828, 0.2067]) -Greedy action tensor([ 1.5148, -0.5205, -0.3242, 0.5537]) tensor([0.5981, 0.0781, 0.0951, 0.2287]) -Greedy action tensor([ 1.1657, -0.4068, -0.0536, 0.3317]) tensor([0.5162, 0.1071, 0.1525, 0.2242]) -Greedy action tensor([ 0.8838, -0.5063, -0.2751, 0.0074]) tensor([0.5053, 0.1258, 0.1586, 0.2103]) -Greedy action tensor([ 1.9046, -1.1129, -0.1234, 0.3214]) tensor([0.7216, 0.0353, 0.0950, 0.1482]) -Greedy action tensor([ 1.6967, 0.0600, -0.5034, 0.3315]) tensor([0.6407, 0.1247, 0.0710, 0.1636]) -Greedy action tensor([ 0.5110, -0.5447, -0.3667, -0.1024]) tensor([0.4338, 0.1509, 0.1803, 0.2349]) -Greedy action tensor([ 1.2140, -0.5847, -0.3580, 0.3893]) tensor([0.5520, 0.0914, 0.1146, 0.2420]) -Greedy action tensor([ 0.6321, -0.3158, -0.7591, 0.8670]) tensor([0.3447, 0.1336, 0.0858, 0.4360]) -Greedy action tensor([ 1.1609, -0.4134, -0.3862, 0.3173]) tensor([0.5405, 0.1120, 0.1151, 0.2325]) -Greedy action tensor([ 1.1083, -0.1877, -0.4029, 0.1822]) tensor([0.5290, 0.1447, 0.1167, 0.2095]) -Greedy action tensor([ 1.0761, -0.2132, -0.2352, 0.2795]) tensor([0.5011, 0.1380, 0.1350, 0.2259]) -Greedy action tensor([ 0.5644, -0.4032, -0.3677, 0.4885]) tensor([0.3703, 0.1407, 0.1458, 0.3432]) -Greedy action tensor([ 1.2122, -0.5247, -0.0613, -0.0285]) tensor([0.5730, 0.1009, 0.1604, 0.1657]) -Greedy action tensor([ 1.0414, -0.2250, -0.3281, 0.3110]) tensor([0.4956, 0.1397, 0.1260, 0.2387]) -Greedy action tensor([ 1.4783, -0.2216, -0.3957, 0.3423]) tensor([0.6034, 0.1102, 0.0926, 0.1938]) -Greedy action tensor([ 0.9109, -0.3951, -0.0522, 0.1086]) tensor([0.4760, 0.1289, 0.1817, 0.2134]) -Greedy action tensor([ 1.6598, -0.4176, -0.4717, 0.3660]) tensor([0.6587, 0.0825, 0.0782, 0.1806]) -Greedy action tensor([ 1.3339, -0.3086, -0.3601, 0.4171]) tensor([0.5627, 0.1089, 0.1034, 0.2250]) -Greedy action tensor([ 1.1686, -0.3001, -0.5201, 0.4417]) tensor([0.5268, 0.1213, 0.0973, 0.2546]) -Greedy action tensor([ 1.3852, -0.2652, -0.4171, 0.2183]) tensor([0.5994, 0.1151, 0.0989, 0.1866]) -Greedy action tensor([ 1.8742, -0.5429, -0.2610, -0.0080]) tensor([0.7355, 0.0656, 0.0870, 0.1120]) -Greedy action tensor([ 1.1570, -0.6509, -0.1510, 0.0372]) tensor([0.5680, 0.0931, 0.1536, 0.1853]) -Greedy action tensor([ 0.9272, -0.5131, -0.1425, 0.1238]) tensor([0.4931, 0.1168, 0.1692, 0.2208]) -Greedy action tensor([ 1.8386, -0.6177, -0.4540, 0.1947]) tensor([0.7246, 0.0621, 0.0732, 0.1400]) -Greedy action tensor([ 1.2451, -0.2942, -0.3981, 0.1414]) tensor([0.5749, 0.1233, 0.1112, 0.1907]) -Greedy action tensor([ 0.9099, -0.1775, -0.0205, 0.2329]) tensor([0.4465, 0.1505, 0.1761, 0.2269]) -Greedy action tensor([ 1.1582, -0.4873, -0.3061, 0.5215]) tensor([0.5120, 0.0988, 0.1184, 0.2709]) -Greedy action tensor([ 1.6136, -0.5943, -0.4046, 0.6433]) tensor([0.6166, 0.0678, 0.0819, 0.2337]) -Greedy action tensor([ 1.0291, 0.0316, -0.7901, 0.2979]) tensor([0.4969, 0.1833, 0.0806, 0.2392]) -Greedy action tensor([ 1.9691, -0.8176, -0.1248, 0.2750]) tensor([0.7307, 0.0450, 0.0900, 0.1343]) -Greedy action tensor([ 1.6947, -0.0950, -0.0619, 0.2926]) tensor([0.6306, 0.1053, 0.1089, 0.1552]) -Greedy action tensor([ 1.2285, -0.4433, -0.3471, 0.5431]) tensor([0.5267, 0.0990, 0.1090, 0.2654]) -Greedy action tensor([ 1.4421, -0.2384, -0.3299, -0.0156]) tensor([0.6293, 0.1172, 0.1070, 0.1465]) -Greedy action tensor([ 1.7343, -1.0953, -0.3210, 0.6181]) tensor([0.6602, 0.0390, 0.0845, 0.2163]) -Greedy action tensor([ 0.9890, -0.2084, -0.0835, 0.0202]) tensor([0.4942, 0.1492, 0.1691, 0.1875]) -Greedy action tensor([ 1.5124, -0.2927, -0.4036, 0.1894]) tensor([0.6337, 0.1042, 0.0933, 0.1688]) -Greedy action tensor([ 1.0788, -0.3951, -0.1204, 0.2332]) tensor([0.5103, 0.1169, 0.1538, 0.2191]) -Greedy action tensor([-0.6709, 0.6175, -0.9157, -0.1799]) tensor([0.1420, 0.5149, 0.1111, 0.2320]) -Greedy action tensor([ 1.3380, -0.7108, 1.3106, 0.5377]) tensor([0.3920, 0.0505, 0.3814, 0.1761]) -Greedy action tensor([ 1.8628, -0.7905, 0.4616, 0.4938]) tensor([0.6365, 0.0448, 0.1568, 0.1619]) -Greedy action tensor([ 1.3853, -0.4315, 1.1443, -1.0203]) tensor([0.4905, 0.0797, 0.3855, 0.0443]) -Greedy action tensor([ 0.4722, -1.8885, -0.2699, 0.7888]) tensor([0.3398, 0.0321, 0.1618, 0.4664]) -Greedy action tensor([ 0.8553, -1.6321, 1.5311, 0.3580]) tensor([0.2735, 0.0227, 0.5375, 0.1663]) -Greedy action tensor([ 0.6078, -0.1884, -0.1837, 1.5442]) tensor([0.2245, 0.1012, 0.1017, 0.5726]) -Greedy action tensor([-0.3295, -0.2010, -0.9163, 0.4620]) tensor([0.2041, 0.2321, 0.1135, 0.4504]) -Greedy action tensor([ 0.0366, 0.8981, 0.5343, -0.3508]) tensor([0.1757, 0.4159, 0.2891, 0.1193]) -Greedy action tensor([-0.0531, 0.6549, 1.0246, -0.2246]) tensor([0.1468, 0.2981, 0.4314, 0.1237]) -Greedy action tensor([-0.1392, -0.8662, -0.1234, 1.5984]) tensor([0.1222, 0.0591, 0.1241, 0.6946]) -Greedy action tensor([ 0.0299, -2.9122, -0.1962, 0.5069]) tensor([0.2889, 0.0152, 0.2304, 0.4655]) -Greedy action tensor([ 0.1179, -1.0716, -0.2267, -0.0637]) tensor([0.3513, 0.1069, 0.2489, 0.2929]) -Greedy action tensor([-1.0491, -0.1586, 0.2300, -1.4718]) tensor([0.1301, 0.3170, 0.4676, 0.0853]) -Greedy action tensor([ 0.7743, -0.9461, -0.9425, 1.0232]) tensor([0.3786, 0.0678, 0.0680, 0.4856]) -Greedy action tensor([-0.3688, -0.2336, -1.0042, 0.1989]) tensor([0.2253, 0.2579, 0.1193, 0.3975]) -Greedy action tensor([ 0.1558, -0.3940, 0.1017, 0.3500]) tensor([0.2675, 0.1544, 0.2534, 0.3248]) -Greedy action tensor([-0.2585, -1.7003, 0.1263, 0.0367]) tensor([0.2470, 0.0584, 0.3629, 0.3318]) -Greedy action tensor([0.9190, 0.0700, 1.1248, 0.3068]) tensor([0.3126, 0.1338, 0.3841, 0.1695]) -Greedy action tensor([ 1.2559, -0.5167, -0.1527, 0.5431]) tensor([0.5250, 0.0892, 0.1284, 0.2574]) -Greedy action tensor([ 1.4559, -0.3218, 0.0285, 1.6446]) tensor([0.3822, 0.0646, 0.0917, 0.4615]) -Greedy action tensor([ 0.5399, 0.3264, -0.0125, 1.2280]) tensor([0.2287, 0.1847, 0.1316, 0.4550]) -Greedy action tensor([-0.0210, -0.8922, 0.6693, 0.6524]) tensor([0.1861, 0.0779, 0.3711, 0.3649]) -Greedy action tensor([ 1.8638, -0.9074, 1.4178, -0.1970]) tensor([0.5464, 0.0342, 0.3498, 0.0696]) -Greedy action tensor([ 0.3377, -1.6835, 0.0523, 1.1487]) tensor([0.2419, 0.0320, 0.1818, 0.5443]) -Greedy action tensor([-0.7383, -1.2172, 0.5557, -0.2231]) tensor([0.1441, 0.0893, 0.5255, 0.2412]) -Greedy action tensor([ 0.7899, -1.0772, -0.4722, -0.0239]) tensor([0.5317, 0.0822, 0.1505, 0.2356]) -Greedy action tensor([ 1.2601, 0.2627, -0.3581, 0.6593]) tensor([0.4727, 0.1744, 0.0937, 0.2592]) -Greedy action tensor([-0.5883, -0.5031, -0.7642, 0.8800]) tensor([0.1376, 0.1498, 0.1154, 0.5973]) -Greedy action tensor([ 0.1593, -0.8343, 0.0403, 1.0077]) tensor([0.2177, 0.0806, 0.1933, 0.5085]) -Greedy action tensor([0.1006, 0.4178, 0.2578, 0.0631]) tensor([0.2219, 0.3047, 0.2597, 0.2137]) -Greedy action tensor([-0.7982, -0.7957, -0.5480, 0.3153]) tensor([0.1579, 0.1583, 0.2028, 0.4809]) -Greedy action tensor([ 0.2254, 0.2328, 0.7053, -0.1042]) tensor([0.2303, 0.2320, 0.3721, 0.1656]) -Greedy action tensor([-1.3608, -0.7229, 1.9085, -0.0340]) tensor([0.0303, 0.0574, 0.7979, 0.1144]) -Greedy action tensor([-1.8483, 0.5708, -0.2086, -0.5323]) tensor([0.0474, 0.5320, 0.2440, 0.1766]) -Greedy action tensor([-0.2396, 0.1600, 0.1157, -0.2962]) tensor([0.2057, 0.3067, 0.2934, 0.1943]) -Greedy action tensor([ 0.4243, -0.4687, 0.2648, 0.2428]) tensor([0.3230, 0.1322, 0.2754, 0.2694]) -Greedy action tensor([-0.0216, -1.1093, -0.2030, -0.0224]) tensor([0.3154, 0.1063, 0.2631, 0.3152]) -Greedy action tensor([-0.4610, -1.6165, 0.7277, -0.2651]) tensor([0.1720, 0.0542, 0.5646, 0.2092]) -Greedy action tensor([ 0.7484, -0.3470, -0.2308, 0.3827]) tensor([0.4160, 0.1391, 0.1563, 0.2886]) -Greedy action tensor([-0.2729, -0.7284, 0.4766, 0.6399]) tensor([0.1602, 0.1016, 0.3390, 0.3992]) -Greedy action tensor([ 0.7940, -0.2117, 0.5240, 0.3291]) tensor([0.3627, 0.1327, 0.2769, 0.2278]) -Greedy action tensor([-0.0984, 0.9751, 0.4937, -0.0055]) tensor([0.1464, 0.4283, 0.2647, 0.1606]) -Greedy action tensor([ 0.5745, -0.1771, 0.7733, 0.4971]) tensor([0.2765, 0.1304, 0.3373, 0.2559]) -Greedy action tensor([0.1551, 0.1501, 0.3937, 0.3798]) tensor([0.2214, 0.2203, 0.2811, 0.2772]) -Greedy action tensor([ 1.5792, -1.4672, 0.5632, 1.4785]) tensor([0.4322, 0.0205, 0.1565, 0.3908]) -Greedy action tensor([ 0.0082, -0.1084, 0.8003, -0.9613]) tensor([0.2233, 0.1988, 0.4932, 0.0847]) -Greedy action tensor([ 0.8543, 0.5103, -0.3647, 0.1767]) tensor([0.3980, 0.2822, 0.1176, 0.2021]) -Greedy action tensor([-0.9212, -1.3310, 0.2738, 0.6768]) tensor([0.1009, 0.0670, 0.3334, 0.4988]) -Greedy action tensor([ 1.5934, -0.4797, 0.5316, 0.9964]) tensor([0.4945, 0.0622, 0.1710, 0.2722]) -Greedy action tensor([-1.3243, -0.4053, 0.9368, -0.8828]) tensor([0.0682, 0.1710, 0.6546, 0.1061]) -Greedy action tensor([-0.0733, -0.5946, -1.1035, -0.7304]) tensor([0.4050, 0.2405, 0.1446, 0.2099]) -Greedy action tensor([-0.2899, -0.1474, 0.5680, -0.0618]) tensor([0.1734, 0.1999, 0.4089, 0.2178]) -Greedy action tensor([-1.2418, 0.0645, 0.6846, -0.2991]) tensor([0.0708, 0.2614, 0.4860, 0.1817]) -Greedy action tensor([ 0.8723, 0.3388, 0.8993, -0.5223]) tensor([0.3494, 0.2049, 0.3590, 0.0866]) -Greedy action tensor([-1.1186, -0.9539, 0.8588, -1.2254]) tensor([0.0971, 0.1144, 0.7012, 0.0872]) -Greedy action tensor([ 1.2546, -0.9400, -0.4427, 1.1424]) tensor([0.4569, 0.0509, 0.0837, 0.4085]) -Greedy action tensor([ 1.4510, -0.8506, 1.1840, 0.9282]) tensor([0.4067, 0.0407, 0.3114, 0.2411]) -Greedy action tensor([ 0.5980, 0.5954, 0.7222, -0.2430]) tensor([0.2808, 0.2801, 0.3180, 0.1211]) -Greedy action tensor([ 0.8432, -0.9478, 0.2104, 0.6511]) tensor([0.3963, 0.0661, 0.2105, 0.3271]) -Greedy action tensor([ 1.6978, -0.5353, 0.2262, 0.9588]) tensor([0.5511, 0.0591, 0.1265, 0.2632]) -Greedy action tensor([ 0.9594, -0.0263, -1.2702, 0.2950]) tensor([0.5012, 0.1870, 0.0539, 0.2579]) -Greedy action tensor([-1.2589, 0.5782, 1.1580, -0.7720]) tensor([0.0497, 0.3121, 0.5573, 0.0809]) -Greedy action tensor([0.7804, 0.7544, 0.5538, 0.8368]) tensor([0.2611, 0.2544, 0.2082, 0.2763]) -Greedy action tensor([1.3295, 0.6388, 1.1126, 0.7197]) tensor([0.3509, 0.1759, 0.2825, 0.1907]) -Greedy action tensor([ 1.4240, -0.8525, 1.1832, 0.9274]) tensor([0.4005, 0.0411, 0.3148, 0.2437]) -Greedy action tensor([ 0.7335, -1.2493, 0.0162, 0.1844]) tensor([0.4539, 0.0625, 0.2215, 0.2621]) -Greedy action tensor([1.0040, 0.0282, 0.2104, 0.2473]) tensor([0.4351, 0.1640, 0.1967, 0.2041]) -Greedy action tensor([ 0.2895, -0.7126, -0.3357, 1.2464]) tensor([0.2219, 0.0815, 0.1188, 0.5778]) -Greedy action tensor([0.1507, 0.4976, 0.2372, 1.2673]) tensor([0.1525, 0.2157, 0.1662, 0.4657]) -Greedy action tensor([0.9894, 0.6483, 0.7471, 0.4194]) tensor([0.3267, 0.2323, 0.2564, 0.1847]) -Greedy action tensor([-0.9405, -0.2682, -0.8504, 0.1737]) tensor([0.1408, 0.2759, 0.1541, 0.4292]) -Greedy action tensor([1.4352, 1.6591, 0.0686, 0.3182]) tensor([0.3530, 0.4415, 0.0900, 0.1155]) -Greedy action tensor([-0.1078, 0.3334, 0.1918, -1.0786]) tensor([0.2335, 0.3630, 0.3151, 0.0884]) -Greedy action tensor([ 1.4681, -0.4830, 0.4407, 0.7622]) tensor([0.5016, 0.0713, 0.1795, 0.2476]) -Greedy action tensor([0.3495, 0.3113, 0.5257, 1.3748]) tensor([0.1683, 0.1620, 0.2007, 0.4691]) -Greedy action tensor([ 1.8745, -0.8853, 1.1806, -0.1349]) tensor([0.5893, 0.0373, 0.2944, 0.0790]) -Greedy action tensor([ 0.7008, -0.1470, 1.0277, 0.0382]) tensor([0.3003, 0.1286, 0.4164, 0.1548]) -Greedy action tensor([ 0.0920, -2.5102, 0.5286, 0.0678]) tensor([0.2780, 0.0206, 0.4301, 0.2713]) -Greedy action tensor([1.4700, 0.1576, 0.0239, 0.8297]) tensor([0.4922, 0.1325, 0.1159, 0.2594]) -Greedy action tensor([ 0.3176, -1.4942, -0.1894, 0.2458]) tensor([0.3709, 0.0606, 0.2234, 0.3452]) -Greedy action tensor([-0.9688, -1.6443, 0.3757, 0.9655]) tensor([0.0815, 0.0415, 0.3128, 0.5642]) -Greedy action tensor([-1.4795, 0.8754, 0.2876, 0.1896]) tensor([0.0441, 0.4642, 0.2579, 0.2338]) -Greedy action tensor([-1.2042, -0.3048, 0.4655, -0.3564]) tensor([0.0901, 0.2214, 0.4783, 0.2102]) -Greedy action tensor([-1.6533, -0.3747, 0.6074, 0.0731]) tensor([0.0505, 0.1814, 0.4843, 0.2838]) -Greedy action tensor([-1.3580, -0.4799, 0.7240, -0.5491]) tensor([0.0731, 0.1760, 0.5866, 0.1642]) -Greedy action tensor([-0.7166, -0.7429, 0.5827, 0.4445]) tensor([0.1132, 0.1103, 0.4151, 0.3615]) -Greedy action tensor([-1.4528, 0.3443, 0.3272, 0.0834]) tensor([0.0568, 0.3426, 0.3368, 0.2639]) -Greedy action tensor([-1.0389, -0.6249, 0.2221, 0.3637]) tensor([0.0989, 0.1497, 0.3491, 0.4022]) -Greedy action tensor([-1.7314, -0.4987, 0.5702, 0.0069]) tensor([0.0497, 0.1706, 0.4968, 0.2828]) -Greedy action tensor([-1.5200, -0.5664, 0.4445, 0.1391]) tensor([0.0626, 0.1624, 0.4462, 0.3288]) -Greedy action tensor([-1.4751, -0.3443, 0.8387, 0.7531]) tensor([0.0426, 0.1319, 0.4304, 0.3951]) -Greedy action tensor([-1.1185, -0.6297, 0.2528, 0.2450]) tensor([0.0954, 0.1556, 0.3760, 0.3731]) -Greedy action tensor([-0.5707, 0.8774, 0.1023, 1.0312]) tensor([0.0821, 0.3494, 0.1610, 0.4075]) -Greedy action tensor([-1.3110, -0.5890, 0.3258, 0.2202]) tensor([0.0780, 0.1606, 0.4008, 0.3606]) -Greedy action tensor([-1.0225, -0.3911, 1.2181, 1.3885]) tensor([0.0427, 0.0803, 0.4013, 0.4758]) -Greedy action tensor([-1.7780, -0.4342, 0.6704, -0.0439]) tensor([0.0453, 0.1737, 0.5243, 0.2567]) -Greedy action tensor([-1.4403, -0.5388, 0.4562, 0.2538]) tensor([0.0642, 0.1582, 0.4280, 0.3496]) -Greedy action tensor([-1.9335, -0.4519, 0.6581, -0.1760]) tensor([0.0407, 0.1792, 0.5439, 0.2362]) -Greedy action tensor([-1.8451, -0.4271, 0.6173, -0.1081]) tensor([0.0444, 0.1832, 0.5205, 0.2520]) -Greedy action tensor([-0.2806, -0.1988, 0.1975, 0.2445]) tensor([0.1856, 0.2014, 0.2993, 0.3137]) -Greedy action tensor([-0.6378, -0.5264, 0.2651, 0.3276]) tensor([0.1387, 0.1550, 0.3421, 0.3642]) -Greedy action tensor([-1.6765, -0.2339, 0.5190, 0.1081]) tensor([0.0496, 0.2098, 0.4454, 0.2953]) -Greedy action tensor([-1.3873, -0.6071, 0.3856, 0.1159]) tensor([0.0737, 0.1608, 0.4340, 0.3314]) -Greedy action tensor([-1.7192, -0.2354, 0.5156, -0.0040]) tensor([0.0492, 0.2171, 0.4600, 0.2736]) -Greedy action tensor([-1.8658, -0.2437, 0.5917, -0.1241]) tensor([0.0427, 0.2160, 0.4980, 0.2434]) -Greedy action tensor([-1.3837, 0.3083, 0.3483, -0.0932]) tensor([0.0636, 0.3455, 0.3596, 0.2313]) -Greedy action tensor([-1.9056, -0.4566, 0.6442, -0.1539]) tensor([0.0420, 0.1787, 0.5374, 0.2419]) -Greedy action tensor([-1.8848, -0.4435, 0.6315, -0.1534]) tensor([0.0430, 0.1817, 0.5324, 0.2429]) -Greedy action tensor([-1.3381, -0.2561, 0.7489, 0.7495]) tensor([0.0498, 0.1470, 0.4015, 0.4017]) -Greedy action tensor([-1.8279, -0.3890, 0.5876, -0.1127]) tensor([0.0455, 0.1919, 0.5096, 0.2530]) -Greedy action tensor([-1.8757, -0.4472, 0.6301, -0.1442]) tensor([0.0433, 0.1808, 0.5310, 0.2448]) -Greedy action tensor([-1.8377, -0.4987, 0.6270, -0.1164]) tensor([0.0451, 0.1721, 0.5305, 0.2523]) -Greedy action tensor([-1.5661, -0.3581, 0.7265, 0.3829]) tensor([0.0470, 0.1574, 0.4655, 0.3301]) -Greedy action tensor([-1.9096, -0.4089, 0.6503, -0.1578]) tensor([0.0414, 0.1854, 0.5348, 0.2384]) -Greedy action tensor([-0.5032, -0.5490, 0.1685, 0.2358]) tensor([0.1665, 0.1590, 0.3259, 0.3486]) -Greedy action tensor([-1.8021, -0.4101, 0.5883, -0.1353]) tensor([0.0471, 0.1894, 0.5141, 0.2493]) -Greedy action tensor([-1.8474, -0.4230, 0.6125, -0.1283]) tensor([0.0446, 0.1852, 0.5216, 0.2487]) -Greedy action tensor([-0.4256, -0.5181, 0.1934, 0.0710]) tensor([0.1848, 0.1685, 0.3432, 0.3036]) -Greedy action tensor([-1.8949, -0.4525, 0.6486, -0.1443]) tensor([0.0422, 0.1784, 0.5366, 0.2428]) -Greedy action tensor([-1.9041, -0.4698, 0.6776, -0.1308]) tensor([0.0411, 0.1727, 0.5439, 0.2423]) -Greedy action tensor([-1.7224, -0.6008, 0.6413, 0.1071]) tensor([0.0478, 0.1467, 0.5079, 0.2977]) -Greedy action tensor([-1.9542, -0.5420, 1.4042, 0.7460]) tensor([0.0205, 0.0842, 0.5898, 0.3054]) -Greedy action tensor([-0.9100, 0.2205, -0.1998, -0.5752]) tensor([0.1328, 0.4114, 0.2702, 0.1856]) -Greedy action tensor([-1.9134, -0.3795, 0.6461, -0.1490]) tensor([0.0410, 0.1900, 0.5298, 0.2392]) -Greedy action tensor([-1.6348, -0.4977, 0.4936, 0.0206]) tensor([0.0563, 0.1756, 0.4732, 0.2949]) -Greedy action tensor([-1.4349, -0.5554, 0.4861, 0.1933]) tensor([0.0652, 0.1572, 0.4453, 0.3323]) -Greedy action tensor([-1.9315, -0.4624, 0.6719, -0.1672]) tensor([0.0405, 0.1760, 0.5471, 0.2364]) -Greedy action tensor([-1.8709, -0.1647, 0.5855, -0.1133]) tensor([0.0417, 0.2298, 0.4866, 0.2419]) -Greedy action tensor([-0.9102, -0.3775, 0.4480, 1.1171]) tensor([0.0705, 0.1201, 0.2742, 0.5353]) -Greedy action tensor([-1.8844, -0.4605, 0.6689, -0.0813]) tensor([0.0415, 0.1725, 0.5338, 0.2521]) -Greedy action tensor([-1.7526, -0.4701, 0.5624, -0.0711]) tensor([0.0497, 0.1794, 0.5036, 0.2673]) -Greedy action tensor([-1.0186, -0.5882, 0.1976, 0.3547]) tensor([0.1014, 0.1560, 0.3422, 0.4004]) -Greedy action tensor([-1.7342, -0.4924, 0.5885, -0.1116]) tensor([0.0507, 0.1755, 0.5171, 0.2568]) -Greedy action tensor([-1.4038, 0.5850, 0.2876, 0.2137]) tensor([0.0533, 0.3892, 0.2891, 0.2685]) -Greedy action tensor([-1.2710, -0.7801, 0.8450, -0.2324]) tensor([0.0727, 0.1188, 0.6032, 0.2054]) -Greedy action tensor([-1.4383, -0.3557, 0.5908, -0.0357]) tensor([0.0640, 0.1890, 0.4868, 0.2602]) -Greedy action tensor([-0.8742, 0.2068, 0.9289, 1.0691]) tensor([0.0588, 0.1734, 0.3570, 0.4108]) -Greedy action tensor([-1.4393, -0.5035, 0.4460, 0.2294]) tensor([0.0648, 0.1651, 0.4266, 0.3435]) -Greedy action tensor([-1.7690, -0.2186, 0.5489, -0.0825]) tensor([0.0470, 0.2216, 0.4774, 0.2539]) -Greedy action tensor([-1.3135, 0.4912, 0.3008, 0.2835]) tensor([0.0587, 0.3567, 0.2949, 0.2898]) -Greedy action tensor([-1.8351, -0.3166, 0.5991, -0.1190]) tensor([0.0444, 0.2026, 0.5062, 0.2469]) -Greedy action tensor([-1.9012, -0.4535, 0.6700, -0.1346]) tensor([0.0413, 0.1759, 0.5409, 0.2419]) -Greedy action tensor([-1.7323, -0.2489, 0.6191, -0.0300]) tensor([0.0467, 0.2060, 0.4908, 0.2564]) -Greedy action tensor([-1.8948, -0.4464, 0.6412, -0.1551]) tensor([0.0424, 0.1805, 0.5356, 0.2415]) -Greedy action tensor([-1.9059, -0.4726, 0.6544, -0.1606]) tensor([0.0419, 0.1757, 0.5423, 0.2401]) -Greedy action tensor([-1.8131, -0.2581, 0.5818, -0.0546]) tensor([0.0444, 0.2104, 0.4873, 0.2579]) -Greedy action tensor([-1.7665, -0.4989, 0.5717, -0.1302]) tensor([0.0499, 0.1772, 0.5168, 0.2562]) -Greedy action tensor([-1.5060, -0.2915, 0.4134, -0.0034]) tensor([0.0638, 0.2148, 0.4348, 0.2866]) -Greedy action tensor([-1.8570, -0.4299, 0.6394, -0.0898]) tensor([0.0432, 0.1799, 0.5241, 0.2528]) -Greedy action tensor([-1.7008, -0.5028, 0.5583, -0.0223]) tensor([0.0520, 0.1722, 0.4975, 0.2784]) -Greedy action tensor([-1.9079, -0.3977, 0.6421, -0.1598]) tensor([0.0415, 0.1880, 0.5319, 0.2385]) -Greedy action tensor([-1.2749, -0.6547, 0.3426, 0.1408]) tensor([0.0832, 0.1547, 0.4194, 0.3427]) -Greedy action tensor([-1.5571, -0.4999, 0.7977, -0.5583]) tensor([0.0584, 0.1680, 0.6151, 0.1585]) -Greedy action tensor([-1.6610, -0.3739, 0.5046, -0.0206]) tensor([0.0541, 0.1958, 0.4713, 0.2788]) -Greedy action tensor([-1.5852, -0.5255, 0.4773, 0.0836]) tensor([0.0586, 0.1692, 0.4611, 0.3111]) -Greedy action tensor([-1.1501, -0.6161, 0.5136, -0.0303]) tensor([0.0905, 0.1544, 0.4778, 0.2773]) -Greedy action tensor([-0.7921, -0.2387, 0.6461, 1.1783]) tensor([0.0708, 0.1231, 0.2983, 0.5078]) -Greedy action tensor([-1.0589, -0.3871, 0.8147, 1.1256]) tensor([0.0545, 0.1067, 0.3547, 0.4841]) -Greedy action tensor([-0.6127, -0.5708, 0.2192, 0.0209]) tensor([0.1607, 0.1675, 0.3691, 0.3027]) -Greedy action tensor([-0.9211, -0.8028, 0.7364, 0.3429]) tensor([0.0916, 0.1032, 0.4808, 0.3244]) -Greedy action tensor([-0.2931, -0.4222, 1.1998, 1.5783]) tensor([0.0780, 0.0685, 0.3469, 0.5066]) -Greedy action tensor([-1.5549, -0.5335, 0.5112, 0.1292]) tensor([0.0586, 0.1628, 0.4628, 0.3158]) -Greedy action tensor([ 0.2528, 0.0674, -0.0073, -0.1961]) tensor([0.3086, 0.2564, 0.2380, 0.1970]) -Greedy action tensor([ 0.8886, -1.0532, 0.1153, -0.5051]) tensor([0.5396, 0.0774, 0.2490, 0.1339]) -Greedy action tensor([ 0.8345, -0.6825, -0.0112, -0.3499]) tensor([0.5116, 0.1122, 0.2196, 0.1565]) -Greedy action tensor([ 0.3353, 0.0213, -0.1493, -0.4346]) tensor([0.3559, 0.2600, 0.2192, 0.1648]) -Greedy action tensor([ 0.6321, -0.2054, -0.2082, -0.4167]) tensor([0.4515, 0.1954, 0.1949, 0.1582]) -Greedy action tensor([ 0.2932, 0.5536, -0.2805, 0.1299]) tensor([0.2695, 0.3497, 0.1519, 0.2289]) -Greedy action tensor([ 0.5584, -0.4397, -0.0504, -0.3212]) tensor([0.4297, 0.1584, 0.2337, 0.1783]) -Greedy action tensor([ 0.6201, -0.4168, -0.0799, -0.3805]) tensor([0.4507, 0.1598, 0.2238, 0.1657]) -Greedy action tensor([ 0.8579, -0.7697, 0.2509, -0.5964]) tensor([0.5063, 0.0994, 0.2760, 0.1183]) -Greedy action tensor([ 0.5162, -0.2304, -0.0608, -0.1987]) tensor([0.3961, 0.1877, 0.2224, 0.1938]) -Greedy action tensor([ 0.6624, -0.6212, -0.1061, -0.1995]) tensor([0.4623, 0.1281, 0.2144, 0.1953]) -Greedy action tensor([ 0.3956, -0.0993, -0.1013, -0.0726]) tensor([0.3516, 0.2143, 0.2139, 0.2202]) -Greedy action tensor([ 0.5986, -0.7163, -0.0883, -0.1110]) tensor([0.4418, 0.1186, 0.2223, 0.2173]) -Greedy action tensor([ 0.8356, -0.5006, -0.1722, -0.5416]) tensor([0.5319, 0.1398, 0.1942, 0.1342]) -Greedy action tensor([ 0.5176, -0.3977, 0.0664, -0.3107]) tensor([0.4042, 0.1618, 0.2574, 0.1765]) -Greedy action tensor([ 0.9610, -0.4920, 0.0105, -0.3672]) tensor([0.5304, 0.1240, 0.2050, 0.1405]) -Greedy action tensor([ 0.6922, -0.6076, 0.1507, -0.2671]) tensor([0.4469, 0.1218, 0.2600, 0.1712]) -Greedy action tensor([ 0.5644, -0.4042, -0.0302, -0.5190]) tensor([0.4406, 0.1672, 0.2431, 0.1491]) -Greedy action tensor([ 0.2615, 0.2242, -0.1953, -0.2651]) tensor([0.3137, 0.3022, 0.1987, 0.1853]) -Greedy action tensor([ 0.6645, -0.4488, -0.0913, -0.3736]) tensor([0.4646, 0.1526, 0.2182, 0.1645]) -Greedy action tensor([ 0.5638, -0.5399, 0.0545, -0.6905]) tensor([0.4509, 0.1495, 0.2709, 0.1286]) -Greedy action tensor([ 0.8956, -0.7765, 0.0840, -0.4208]) tensor([0.5263, 0.0989, 0.2338, 0.1411]) -Greedy action tensor([ 0.6671, -0.6281, -0.0915, -0.3778]) tensor([0.4776, 0.1308, 0.2237, 0.1680]) -Greedy action tensor([ 0.4909, -0.4214, 0.0594, -0.4709]) tensor([0.4110, 0.1650, 0.2669, 0.1571]) -Greedy action tensor([ 0.5987, -0.3330, -0.0647, -0.5960]) tensor([0.4521, 0.1781, 0.2329, 0.1369]) -Greedy action tensor([ 0.5874, -0.3813, -0.0161, -0.3884]) tensor([0.4342, 0.1648, 0.2374, 0.1636]) -Greedy action tensor([ 0.7691, -0.4790, -0.0959, -0.5433]) tensor([0.5057, 0.1452, 0.2129, 0.1361]) -Greedy action tensor([ 0.6051, -0.1398, -0.1036, -0.2464]) tensor([0.4178, 0.1983, 0.2056, 0.1783]) -Greedy action tensor([ 0.3468, 0.0468, -0.0668, -0.5168]) tensor([0.3541, 0.2623, 0.2342, 0.1493]) -Greedy action tensor([ 0.4432, -0.2858, 0.0074, -0.2889]) tensor([0.3831, 0.1848, 0.2478, 0.1843]) -Greedy action tensor([ 0.6372, -0.1911, -0.1989, -0.4225]) tensor([0.4511, 0.1970, 0.1955, 0.1563]) -Greedy action tensor([ 0.7772, -0.5734, -0.1076, -0.5760]) tensor([0.5181, 0.1342, 0.2138, 0.1339]) -Greedy action tensor([ 0.6328, -0.0721, -0.1316, -0.1455]) tensor([0.4134, 0.2043, 0.1925, 0.1898]) -Greedy action tensor([ 0.1584, -0.1486, 0.0474, -0.1950]) tensor([0.3000, 0.2207, 0.2685, 0.2107]) -Greedy action tensor([ 0.8161, -0.7746, 0.2532, -0.5124]) tensor([0.4906, 0.1000, 0.2794, 0.1300]) -Greedy action tensor([ 0.3090, -0.0575, -0.0610, -0.2595]) tensor([0.3389, 0.2350, 0.2341, 0.1920]) -Greedy action tensor([ 0.8380, -0.6558, -0.1111, -0.8524]) tensor([0.5568, 0.1250, 0.2155, 0.1027]) -Greedy action tensor([ 0.5685, -0.6885, -0.2242, -0.4427]) tensor([0.4760, 0.1354, 0.2154, 0.1732]) -Greedy action tensor([ 0.7497, -0.0754, 0.0315, -0.7738]) tensor([0.4665, 0.2044, 0.2275, 0.1017]) -Greedy action tensor([ 0.5924, -0.4517, -0.1755, -0.3662]) tensor([0.4547, 0.1600, 0.2110, 0.1743]) -Greedy action tensor([ 0.7583, -0.7853, -0.0643, -0.1567]) tensor([0.4870, 0.1040, 0.2139, 0.1951]) -Greedy action tensor([ 0.4691, -0.2297, 0.0331, -0.1610]) tensor([0.3737, 0.1858, 0.2416, 0.1990]) -Greedy action tensor([ 0.5626, -0.4843, 0.0198, -0.7363]) tensor([0.4535, 0.1592, 0.2635, 0.1237]) -Greedy action tensor([ 0.6334, -0.3130, -0.1936, -0.0313]) tensor([0.4274, 0.1659, 0.1869, 0.2198]) -Greedy action tensor([ 0.8484, -0.3153, -0.0741, -0.4192]) tensor([0.5022, 0.1568, 0.1996, 0.1414]) -Greedy action tensor([ 0.6225, -0.4189, -0.1710, -0.4565]) tensor([0.4662, 0.1645, 0.2108, 0.1585]) -Greedy action tensor([ 0.2881, -0.1445, -0.1605, -0.4765]) tensor([0.3633, 0.2357, 0.2320, 0.1691]) -Greedy action tensor([ 0.4044, -0.0673, -0.1877, -0.3207]) tensor([0.3757, 0.2344, 0.2079, 0.1820]) -Greedy action tensor([ 0.7316, -0.6250, -0.0036, -0.1706]) tensor([0.4667, 0.1202, 0.2238, 0.1893]) -Greedy action tensor([ 0.4569, -0.0487, 0.0272, -0.2939]) tensor([0.3669, 0.2213, 0.2387, 0.1731]) -Greedy action tensor([ 0.1754, -0.0636, -0.0637, -0.4570]) tensor([0.3220, 0.2535, 0.2535, 0.1711]) -Greedy action tensor([ 0.2784, -0.0396, -0.0293, -0.1544]) tensor([0.3214, 0.2338, 0.2363, 0.2085]) -Greedy action tensor([ 1.0192, -0.6064, 0.2066, -0.6161]) tensor([0.5448, 0.1072, 0.2417, 0.1062]) -Greedy action tensor([ 0.8406, -0.3654, -0.1295, -0.2490]) tensor([0.4963, 0.1486, 0.1881, 0.1669]) -Greedy action tensor([ 0.4941, -0.3610, -0.1581, -0.3107]) tensor([0.4178, 0.1777, 0.2176, 0.1868]) -Greedy action tensor([ 0.8183, -0.5705, 0.0067, -0.3920]) tensor([0.5021, 0.1252, 0.2230, 0.1497]) -Greedy action tensor([ 0.2989, 0.0594, -0.0303, -0.2333]) tensor([0.3232, 0.2544, 0.2326, 0.1898]) -Greedy action tensor([ 0.5415, -0.2087, -0.0671, -0.1385]) tensor([0.3964, 0.1872, 0.2157, 0.2008]) -Greedy action tensor([ 0.6348, -0.5560, -0.1374, -0.2767]) tensor([0.4613, 0.1402, 0.2131, 0.1854]) -Greedy action tensor([ 0.4084, -0.2421, -0.0275, -0.1001]) tensor([0.3610, 0.1884, 0.2335, 0.2171]) -Greedy action tensor([ 0.8373, -0.4136, -0.0603, -0.3400]) tensor([0.4995, 0.1430, 0.2036, 0.1539]) -Greedy action tensor([ 0.3952, 0.1109, -0.0985, -0.0364]) tensor([0.3320, 0.2498, 0.2026, 0.2156]) -Greedy action tensor([ 0.6965, -0.3056, 0.0158, -0.3193]) tensor([0.4473, 0.1642, 0.2265, 0.1620]) -Greedy action tensor([ 0.7374, -0.5077, 0.0254, -0.5416]) tensor([0.4862, 0.1400, 0.2385, 0.1353]) -Greedy action tensor([ 0.9137, -0.7967, -0.0133, -0.5525]) tensor([0.5533, 0.1000, 0.2190, 0.1277]) -Greedy action tensor([ 0.7997, -0.4158, 0.0935, -0.3719]) tensor([0.4762, 0.1412, 0.2350, 0.1476]) -Greedy action tensor([ 0.8794, -0.4770, 0.0136, -0.0882]) tensor([0.4858, 0.1251, 0.2044, 0.1846]) -Greedy action tensor([ 0.3235, 0.1329, -0.1663, -0.2435]) tensor([0.3326, 0.2749, 0.2038, 0.1887]) -Greedy action tensor([ 0.2693, -0.0361, -0.0053, -0.2262]) tensor([0.3220, 0.2372, 0.2447, 0.1962]) -Greedy action tensor([ 0.7461, -0.5695, 0.0125, -0.4874]) tensor([0.4903, 0.1315, 0.2354, 0.1428]) -Greedy action tensor([ 0.6321, -0.3578, -0.0794, -0.3497]) tensor([0.4470, 0.1661, 0.2194, 0.1675]) -Greedy action tensor([ 0.8387, 0.2990, -0.1169, -0.2999]) tensor([0.4371, 0.2548, 0.1681, 0.1400]) -Greedy action tensor([ 0.4772, 0.0094, -0.1038, -0.1440]) tensor([0.3672, 0.2300, 0.2054, 0.1973]) -Greedy action tensor([ 0.7241, -0.6647, -0.0994, -0.3373]) tensor([0.4916, 0.1226, 0.2158, 0.1701]) -Greedy action tensor([ 0.4980, -0.3696, -0.0755, -0.2147]) tensor([0.4042, 0.1698, 0.2278, 0.1982]) -Greedy action tensor([ 0.7867, -0.4302, -0.1518, -0.3979]) tensor([0.5017, 0.1486, 0.1963, 0.1535]) -Greedy action tensor([ 0.4800, -0.2146, -0.0727, -0.2807]) tensor([0.3934, 0.1964, 0.2264, 0.1838]) -Greedy action tensor([ 0.4771, 0.0851, -0.0729, 0.0311]) tensor([0.3457, 0.2336, 0.1994, 0.2213]) -Greedy action tensor([ 0.4989, -0.3059, -0.2030, -0.2244]) tensor([0.4119, 0.1842, 0.2041, 0.1998]) -Greedy action tensor([ 0.5144, -0.1021, -0.0251, -0.0913]) tensor([0.3747, 0.2023, 0.2185, 0.2045]) -Greedy action tensor([ 0.8226, 0.1586, -0.2490, -0.4614]) tensor([0.4686, 0.2412, 0.1605, 0.1298]) -Greedy action tensor([ 1.9015, -0.5293, -0.3175, 0.9173]) tensor([0.6368, 0.0560, 0.0692, 0.2380]) -Greedy action tensor([ 1.5429, -0.4254, -0.3773, 0.5695]) tensor([0.6009, 0.0839, 0.0881, 0.2270]) -Greedy action tensor([ 0.9778, -0.3511, -0.3567, 0.3536]) tensor([0.4846, 0.1283, 0.1276, 0.2596]) -Greedy action tensor([ 1.2132, -0.4362, -0.2536, -0.1415]) tensor([0.5949, 0.1143, 0.1372, 0.1535]) -Greedy action tensor([ 1.3533, -0.2863, -0.2569, -0.0230]) tensor([0.6074, 0.1179, 0.1214, 0.1534]) -Greedy action tensor([ 1.6596, -0.7633, -0.3234, 0.6509]) tensor([0.6285, 0.0557, 0.0865, 0.2292]) -Greedy action tensor([ 1.5647, -0.6182, 0.1086, 0.2335]) tensor([0.6211, 0.0700, 0.1448, 0.1641]) -Greedy action tensor([ 1.1488, -0.5007, 0.0896, 0.3009]) tensor([0.5083, 0.0977, 0.1763, 0.2177]) -Greedy action tensor([ 1.1335, -0.5924, -0.6092, 0.8738]) tensor([0.4707, 0.0838, 0.0824, 0.3631]) -Greedy action tensor([ 1.4865, -0.4392, -0.2416, 0.4795]) tensor([0.5922, 0.0863, 0.1052, 0.2163]) -Greedy action tensor([ 0.5404, -0.2196, 0.0226, 0.0608]) tensor([0.3728, 0.1743, 0.2221, 0.2308]) -Greedy action tensor([ 0.6963, 0.1477, 0.3410, -0.2218]) tensor([0.3734, 0.2158, 0.2617, 0.1491]) -Greedy action tensor([ 1.9760, -0.7596, -0.2668, 0.6052]) tensor([0.7018, 0.0455, 0.0745, 0.1782]) -Greedy action tensor([ 1.0112, -0.4907, -0.1041, 0.2088]) tensor([0.5003, 0.1114, 0.1640, 0.2243]) -Greedy action tensor([ 1.5046, -0.2655, -0.5442, 0.3740]) tensor([0.6165, 0.1050, 0.0795, 0.1990]) -Greedy action tensor([ 1.2267, -0.3447, -0.1598, 0.0883]) tensor([0.5624, 0.1168, 0.1406, 0.1802]) -Greedy action tensor([ 1.7601, -0.5460, -0.2388, 0.3126]) tensor([0.6801, 0.0678, 0.0921, 0.1599]) -Greedy action tensor([ 1.7707, -0.0993, -0.5118, 0.3537]) tensor([0.6673, 0.1028, 0.0681, 0.1618]) -Greedy action tensor([ 0.7292, -0.2793, -0.1128, 0.5214]) tensor([0.3835, 0.1399, 0.1652, 0.3115]) -Greedy action tensor([ 1.1911, -0.1406, -0.2144, 0.2382]) tensor([0.5277, 0.1393, 0.1294, 0.2035]) -Greedy action tensor([ 1.4685, -0.3501, -0.2913, -0.1098]) tensor([0.6491, 0.1053, 0.1117, 0.1339]) -Greedy action tensor([ 1.4295, -0.4174, -0.0534, 0.3150]) tensor([0.5838, 0.0921, 0.1325, 0.1916]) -Greedy action tensor([ 1.4633, -0.6220, -0.1696, 0.7268]) tensor([0.5560, 0.0691, 0.1086, 0.2662]) -Greedy action tensor([ 1.4267, -0.3092, -0.0833, 0.1126]) tensor([0.6003, 0.1058, 0.1326, 0.1613]) -Greedy action tensor([ 1.1683, -0.0979, -0.1574, 0.1749]) tensor([0.5214, 0.1470, 0.1385, 0.1931]) -Greedy action tensor([ 0.7378, -0.3571, -0.2790, 0.1562]) tensor([0.4434, 0.1484, 0.1604, 0.2479]) -Greedy action tensor([ 1.4543, -0.9061, 0.0120, 0.3159]) tensor([0.6056, 0.0572, 0.1432, 0.1940]) -Greedy action tensor([ 1.1794, -0.6209, -0.2497, 0.3635]) tensor([0.5414, 0.0895, 0.1297, 0.2394]) -Greedy action tensor([1.3241, 0.0227, 0.0629, 0.6778]) tensor([0.4809, 0.1309, 0.1362, 0.2520]) -Greedy action tensor([ 2.0237, -0.2376, -0.4212, 0.2661]) tensor([0.7335, 0.0764, 0.0636, 0.1265]) -Greedy action tensor([ 1.2402, -0.1527, -0.5020, 0.4183]) tensor([0.5367, 0.1333, 0.0940, 0.2360]) -Greedy action tensor([ 1.3521, -0.5411, -0.1089, 0.1354]) tensor([0.5957, 0.0897, 0.1382, 0.1764]) -Greedy action tensor([ 1.6441, -0.0263, -0.4536, -0.0778]) tensor([0.6713, 0.1263, 0.0824, 0.1200]) -Greedy action tensor([ 1.8478, -1.0833, -0.1980, 0.2154]) tensor([0.7257, 0.0387, 0.0938, 0.1418]) -Greedy action tensor([ 1.4622, -0.4485, -0.6666, 0.0605]) tensor([0.6609, 0.0978, 0.0786, 0.1627]) -Greedy action tensor([ 1.3468, -0.3969, -0.2737, 0.5671]) tensor([0.5461, 0.0955, 0.1080, 0.2504]) -Greedy action tensor([ 0.8930, -0.0896, -0.0586, 0.2028]) tensor([0.4421, 0.1655, 0.1707, 0.2217]) -Greedy action tensor([ 0.7584, -0.5591, -0.2114, 0.1761]) tensor([0.4534, 0.1214, 0.1719, 0.2533]) -Greedy action tensor([ 0.9020, -0.1201, -0.1438, 0.3227]) tensor([0.4402, 0.1584, 0.1547, 0.2466]) -Greedy action tensor([ 1.5733, -0.3709, -0.3624, 0.7042]) tensor([0.5859, 0.0838, 0.0846, 0.2457]) -Greedy action tensor([ 1.4318, -0.5213, -0.6690, 0.2706]) tensor([0.6340, 0.0899, 0.0776, 0.1985]) -Greedy action tensor([ 1.6529, -0.4676, -0.2971, 0.2091]) tensor([0.6674, 0.0801, 0.0950, 0.1575]) -Greedy action tensor([ 1.5483, -0.7238, -0.2058, 0.6293]) tensor([0.5970, 0.0615, 0.1033, 0.2381]) -Greedy action tensor([ 1.0324, 0.0156, -0.2684, -0.0458]) tensor([0.5065, 0.1832, 0.1379, 0.1723]) -Greedy action tensor([ 0.3212, -0.4200, 0.0402, -0.0159]) tensor([0.3395, 0.1618, 0.2563, 0.2424]) -Greedy action tensor([ 0.6951, -0.2846, -0.3212, 0.6154]) tensor([0.3758, 0.1411, 0.1360, 0.3471]) -Greedy action tensor([ 1.4691, -0.8356, -0.3598, 0.7348]) tensor([0.5746, 0.0573, 0.0923, 0.2757]) -Greedy action tensor([ 1.2882, -0.6361, -0.3149, 0.1164]) tensor([0.6035, 0.0881, 0.1215, 0.1870]) -Greedy action tensor([ 1.5563e+00, -1.0130e+00, -2.0763e-01, -1.3901e-03]) tensor([0.6856, 0.0525, 0.1175, 0.1444]) -Greedy action tensor([ 1.3090, -0.4825, -0.2475, -0.0255]) tensor([0.6094, 0.1016, 0.1285, 0.1605]) -Greedy action tensor([ 0.8540, -0.4668, -0.3402, 0.4835]) tensor([0.4424, 0.1181, 0.1340, 0.3055]) -Greedy action tensor([ 1.1182, 0.0606, -0.1628, 0.3866]) tensor([0.4748, 0.1649, 0.1319, 0.2284]) -Greedy action tensor([ 1.3178, -0.7658, -0.4741, 0.3155]) tensor([0.6031, 0.0751, 0.1005, 0.2213]) -Greedy action tensor([ 1.0281, -0.2122, 0.0802, 0.2233]) tensor([0.4708, 0.1362, 0.1825, 0.2105]) -Greedy action tensor([ 1.6183, -0.3763, -0.3204, 0.1860]) tensor([0.6584, 0.0896, 0.0947, 0.1572]) -Greedy action tensor([ 1.1442, -0.5768, 0.0638, 0.2010]) tensor([0.5242, 0.0938, 0.1779, 0.2041]) -Greedy action tensor([ 1.9267, -0.5544, -0.2556, 0.3887]) tensor([0.7086, 0.0593, 0.0799, 0.1522]) -Greedy action tensor([ 1.1540, -0.3415, -0.2954, 0.6338]) tensor([0.4870, 0.1092, 0.1143, 0.2895]) -Greedy action tensor([ 1.5287, -0.9003, -0.0669, 0.0894]) tensor([0.6545, 0.0577, 0.1327, 0.1552]) -Greedy action tensor([ 1.9514, -0.6903, -0.5936, 0.2025]) tensor([0.7555, 0.0538, 0.0593, 0.1314]) -Greedy action tensor([ 1.2778, -0.8110, -0.1073, 0.3885]) tensor([0.5602, 0.0694, 0.1402, 0.2302]) -Greedy action tensor([ 1.3145, -0.1788, -0.4739, 0.5366]) tensor([0.5402, 0.1213, 0.0903, 0.2481]) -Greedy action tensor([ 1.3577, -0.4006, -0.4935, 0.4251]) tensor([0.5804, 0.1000, 0.0912, 0.2284]) -Greedy action tensor([ 0.2180, -0.1739, -0.3275, 0.5393]) tensor([0.2752, 0.1859, 0.1595, 0.3794]) -Greedy action tensor([ 1.4943, -0.6101, -0.3963, 0.6324]) tensor([0.5899, 0.0719, 0.0891, 0.2491]) -Greedy action tensor([ 1.8335, -0.3689, -0.5532, 0.4033]) tensor([0.6936, 0.0767, 0.0638, 0.1660]) -Greedy action tensor([ 1.4032, -0.4960, -0.2103, 0.1979]) tensor([0.6066, 0.0908, 0.1208, 0.1817]) -Greedy action tensor([ 1.5049, -0.2280, -0.6733, 0.3066]) tensor([0.6282, 0.1111, 0.0711, 0.1895]) -Greedy action tensor([ 0.9280, -0.0368, -0.0869, 0.2921]) tensor([0.4400, 0.1677, 0.1594, 0.2329]) -Greedy action tensor([ 1.9316, -0.6513, -0.5190, 0.5612]) tensor([0.7063, 0.0534, 0.0609, 0.1794]) -Greedy action tensor([ 2.1555, -0.9291, -0.3743, 0.9754]) tensor([0.6980, 0.0319, 0.0556, 0.2145]) -Greedy action tensor([ 1.0678, -0.5083, -0.3596, 0.4274]) tensor([0.5067, 0.1048, 0.1216, 0.2670]) -Greedy action tensor([ 1.5129, -0.3077, -0.9682, 0.4921]) tensor([0.6227, 0.1008, 0.0521, 0.2244]) -Greedy action tensor([ 2.1073, 0.0410, -0.8853, 0.7338]) tensor([0.6993, 0.0886, 0.0351, 0.1771]) -Greedy action tensor([ 1.0487, -0.6392, -0.3012, 0.7214]) tensor([0.4619, 0.0854, 0.1198, 0.3329]) -Greedy action tensor([ 1.2619, 0.1743, -0.4770, 0.0695]) tensor([0.5506, 0.1856, 0.0967, 0.1671]) -Greedy action tensor([ 1.8578, -0.4347, 0.1082, 0.2339]) tensor([0.6793, 0.0686, 0.1181, 0.1339]) -Greedy action tensor([ 1.4282, -0.3175, -0.2945, 0.3435]) tensor([0.5913, 0.1032, 0.1056, 0.1999]) -Greedy action tensor([ 1.3942, -0.5815, -0.1145, 0.2436]) tensor([0.5966, 0.0827, 0.1320, 0.1888]) -Greedy action tensor([ 1.4273, -0.8316, -0.3758, 0.7575]) tensor([0.5615, 0.0587, 0.0925, 0.2874]) -Greedy action tensor([ 1.0162, -0.2858, 0.1227, 0.0457]) tensor([0.4854, 0.1320, 0.1986, 0.1839]) -Greedy action tensor([2.0946, 0.7037, 0.3585, 1.4313]) tensor([0.5154, 0.1283, 0.0908, 0.2655]) -Greedy action tensor([ 0.6909, -0.5151, -0.6783, 0.2149]) tensor([0.4598, 0.1377, 0.1169, 0.2856]) -Greedy action tensor([ 0.1770, -0.2077, 0.5274, 0.5952]) tensor([0.2165, 0.1473, 0.3073, 0.3289]) -Greedy action tensor([ 0.5390, -1.3073, 0.2882, 1.2941]) tensor([0.2461, 0.0388, 0.1915, 0.5236]) -Greedy action tensor([ 0.8944, 0.4596, -0.1847, -0.5512]) tensor([0.4498, 0.2912, 0.1529, 0.1060]) -Greedy action tensor([-0.8444, -0.5584, -0.6783, 0.1260]) tensor([0.1626, 0.2164, 0.1920, 0.4290]) -Greedy action tensor([ 0.7241, -0.0875, -0.2924, 0.2025]) tensor([0.4167, 0.1851, 0.1508, 0.2474]) -Greedy action tensor([ 0.6357, -0.3125, -0.4940, 1.6126]) tensor([0.2290, 0.0887, 0.0740, 0.6083]) -Greedy action tensor([-0.2019, -1.2762, 0.1378, 0.8896]) tensor([0.1747, 0.0597, 0.2453, 0.5203]) -Greedy action tensor([ 1.3183, -1.0823, -0.0398, 0.8321]) tensor([0.5095, 0.0462, 0.1310, 0.3133]) -Greedy action tensor([ 1.6048, -0.7814, 0.4125, 0.8361]) tensor([0.5379, 0.0495, 0.1633, 0.2494]) -Greedy action tensor([ 0.4600, 0.9855, 0.1132, -0.8710]) tensor([0.2730, 0.4618, 0.1930, 0.0721]) -Greedy action tensor([ 0.3086, 0.3154, 0.4394, -0.4931]) tensor([0.2782, 0.2801, 0.3170, 0.1248]) -Greedy action tensor([ 0.8875, -1.9946, 0.5187, 0.6492]) tensor([0.3944, 0.0221, 0.2728, 0.3108]) -Greedy action tensor([ 0.7256, 0.7893, -0.4640, 0.5510]) tensor([0.3115, 0.3320, 0.0948, 0.2616]) -Greedy action tensor([-0.0643, -0.0436, 0.8510, 1.3340]) tensor([0.1167, 0.1192, 0.2915, 0.4726]) -Greedy action tensor([-0.5621, -0.3875, -0.5540, 0.7498]) tensor([0.1447, 0.1723, 0.1459, 0.5372]) -Greedy action tensor([ 1.6125, -0.8397, 1.8606, 0.4712]) tensor([0.3721, 0.0320, 0.4769, 0.1189]) -Greedy action tensor([1.0217, 0.2074, 0.0766, 1.2705]) tensor([0.3211, 0.1422, 0.1248, 0.4118]) -Greedy action tensor([-1.2364, -0.4380, 0.5264, -0.2433]) tensor([0.0851, 0.1891, 0.4961, 0.2297]) -Greedy action tensor([ 0.7684, 0.7096, 1.2409, -0.4709]) tensor([0.2607, 0.2458, 0.4181, 0.0755]) -Greedy action tensor([ 0.3622, -0.0391, 0.2532, -0.4395]) tensor([0.3317, 0.2221, 0.2975, 0.1488]) -Greedy action tensor([-0.6916, 0.0034, -0.4075, -1.0924]) tensor([0.1999, 0.4006, 0.2656, 0.1339]) -Greedy action tensor([ 0.6702, -2.1024, -0.0765, 0.0471]) tensor([0.4824, 0.0302, 0.2287, 0.2587]) -Greedy action tensor([-0.3162, -0.0662, -0.5583, 0.7276]) tensor([0.1692, 0.2173, 0.1328, 0.4806]) -Greedy action tensor([-0.6729, -0.5238, -0.3517, -0.5545]) tensor([0.2144, 0.2488, 0.2955, 0.2413]) -Greedy action tensor([-0.4164, -1.8534, -0.1473, -0.3775]) tensor([0.2789, 0.0663, 0.3650, 0.2899]) -Greedy action tensor([-0.3897, -0.6021, 0.1708, 0.3122]) tensor([0.1793, 0.1450, 0.3140, 0.3617]) -Greedy action tensor([ 0.8774, -0.8963, 0.0562, 0.5439]) tensor([0.4299, 0.0730, 0.1891, 0.3080]) -Greedy action tensor([ 0.1045, -0.8365, -0.4614, 1.2189]) tensor([0.1998, 0.0780, 0.1134, 0.6088]) -Greedy action tensor([ 0.4260, -2.8340, -0.4712, 0.3786]) tensor([0.4167, 0.0160, 0.1699, 0.3974]) -Greedy action tensor([ 0.3600, -0.1657, 0.4305, 1.4300]) tensor([0.1792, 0.1060, 0.1923, 0.5225]) -Greedy action tensor([-1.0164, -1.0839, 0.2358, 0.0632]) tensor([0.1194, 0.1116, 0.4176, 0.3514]) -Greedy action tensor([ 0.1029, 0.6131, -0.7016, 0.5863]) tensor([0.2112, 0.3518, 0.0945, 0.3425]) -Greedy action tensor([ 1.9142, -0.1235, 0.1103, 0.3944]) tensor([0.6606, 0.0861, 0.1088, 0.1445]) -Greedy action tensor([-1.6190, -0.5877, -0.6318, 0.5791]) tensor([0.0645, 0.1810, 0.1732, 0.5813]) -Greedy action tensor([-0.2168, -0.3848, 0.6259, -0.6658]) tensor([0.2081, 0.1759, 0.4832, 0.1328]) -Greedy action tensor([0.6131, 0.3344, 0.4359, 0.4132]) tensor([0.2930, 0.2217, 0.2454, 0.2399]) -Greedy action tensor([-0.3892, -1.7079, 0.1040, 0.3125]) tensor([0.2032, 0.0543, 0.3327, 0.4098]) -Greedy action tensor([ 0.7648, -1.1044, 0.9685, -0.4250]) tensor([0.3725, 0.0575, 0.4567, 0.1134]) -Greedy action tensor([ 0.0063, -0.9926, -0.0199, 1.1823]) tensor([0.1791, 0.0660, 0.1745, 0.5805]) -Greedy action tensor([0.7127, 0.5338, 0.6850, 0.4160]) tensor([0.2815, 0.2354, 0.2738, 0.2092]) -Greedy action tensor([-0.9125, -1.0627, 0.9287, -1.5351]) tensor([0.1149, 0.0989, 0.7245, 0.0617]) -Greedy action tensor([ 0.6384, 0.2022, -0.0975, 0.2655]) tensor([0.3553, 0.2297, 0.1702, 0.2447]) -Greedy action tensor([ 1.4146, -0.6815, 1.0888, 1.0275]) tensor([0.3962, 0.0487, 0.2861, 0.2690]) -Greedy action tensor([ 0.5980, -0.2686, 1.4923, 0.5846]) tensor([0.2061, 0.0866, 0.5040, 0.2033]) -Greedy action tensor([-0.1462, -1.5787, -0.4353, -0.0243]) tensor([0.3208, 0.0766, 0.2402, 0.3624]) -Greedy action tensor([ 0.0212, 0.5005, -0.1159, 0.0325]) tensor([0.2223, 0.3590, 0.1938, 0.2248]) -Greedy action tensor([-0.1878, -0.9473, 0.6299, 0.1223]) tensor([0.1962, 0.0918, 0.4445, 0.2675]) -Greedy action tensor([ 0.1894, -0.0415, 0.5563, 0.1202]) tensor([0.2398, 0.1904, 0.3461, 0.2238]) -Greedy action tensor([ 0.5934, 0.5431, -0.8695, -1.1357]) tensor([0.4237, 0.4029, 0.0981, 0.0752]) -Greedy action tensor([-0.4091, 0.5739, 1.8623, -0.7208]) tensor([0.0709, 0.1896, 0.6876, 0.0519]) -Greedy action tensor([ 0.1573, -1.0982, 0.3474, 0.0465]) tensor([0.2950, 0.0841, 0.3568, 0.2641]) -Greedy action tensor([-0.7787, 0.0929, 0.5872, -0.6763]) tensor([0.1188, 0.2840, 0.4656, 0.1316]) -Greedy action tensor([0.9601, 0.7031, 0.7999, 0.6279]) tensor([0.2991, 0.2314, 0.2549, 0.2146]) -Greedy action tensor([ 0.7515, 0.6234, 0.0502, -1.0040]) tensor([0.3924, 0.3452, 0.1946, 0.0678]) -Greedy action tensor([ 0.1023, -1.1478, -0.5620, 0.4755]) tensor([0.3074, 0.0881, 0.1582, 0.4464]) -Greedy action tensor([0.4709, 1.1520, 0.4075, 0.0886]) tensor([0.2175, 0.4299, 0.2042, 0.1484]) -Greedy action tensor([ 1.5027, -1.7130, 1.1667, -0.0083]) tensor([0.5062, 0.0203, 0.3618, 0.1117]) -Greedy action tensor([ 0.1685, -2.1770, -0.0483, 0.8950]) tensor([0.2520, 0.0241, 0.2029, 0.5210]) -Greedy action tensor([ 1.3626, -1.1731, -0.5656, 0.6078]) tensor([0.5901, 0.0467, 0.0858, 0.2774]) -Greedy action tensor([ 0.7849, 0.4733, 1.3803, -0.7169]) tensor([0.2653, 0.1943, 0.4812, 0.0591]) -Greedy action tensor([ 1.2167, -0.9002, 1.2110, 0.9762]) tensor([0.3447, 0.0415, 0.3427, 0.2710]) -Greedy action tensor([-0.8492, -0.9084, -0.5410, 0.3153]) tensor([0.1537, 0.1448, 0.2091, 0.4924]) -Greedy action tensor([ 1.0538, 0.4201, -0.7349, 0.9898]) tensor([0.3794, 0.2013, 0.0634, 0.3559]) -Greedy action tensor([0.1726, 0.3756, 0.3065, 0.3275]) tensor([0.2205, 0.2701, 0.2521, 0.2574]) -Greedy action tensor([-0.7471, -0.0130, 0.4457, -0.4399]) tensor([0.1292, 0.2692, 0.4259, 0.1757]) -Greedy action tensor([-0.8736, -0.7775, -0.6497, -0.0425]) tensor([0.1771, 0.1949, 0.2215, 0.4065]) -Greedy action tensor([-0.5508, -1.7016, 0.2459, 1.2155]) tensor([0.1066, 0.0337, 0.2364, 0.6233]) -Greedy action tensor([-0.8811, -0.7704, -0.7206, 0.3255]) tensor([0.1508, 0.1684, 0.1770, 0.5038]) -Greedy action tensor([-2.6372e-03, -2.6639e+00, 1.2460e+00, 2.2328e-01]) tensor([0.1721, 0.0120, 0.6000, 0.2158]) -Greedy action tensor([ 0.0127, -2.1384, 0.5505, -0.5489]) tensor([0.2942, 0.0342, 0.5038, 0.1678]) -Greedy action tensor([ 1.2320, -0.0767, 0.5910, 0.0421]) tensor([0.4759, 0.1286, 0.2507, 0.1448]) -Greedy action tensor([ 1.0208, -1.2506, -0.2295, -0.0968]) tensor([0.5825, 0.0601, 0.1668, 0.1905]) -Greedy action tensor([ 0.4120, 0.4093, -1.8578, 0.2114]) tensor([0.3426, 0.3417, 0.0354, 0.2803]) -Greedy action tensor([ 0.9787, -1.4389, 0.9369, 0.0367]) tensor([0.4102, 0.0366, 0.3934, 0.1599]) -Greedy action tensor([ 0.6815, -0.2576, -0.5033, -0.6537]) tensor([0.5102, 0.1995, 0.1560, 0.1342]) -Greedy action tensor([-0.1055, -2.3757, 0.5948, 0.3731]) tensor([0.2114, 0.0218, 0.4257, 0.3411]) -Greedy action tensor([0.2597, 0.1441, 0.2598, 0.2884]) tensor([0.2551, 0.2273, 0.2551, 0.2625]) -Greedy action tensor([ 0.5515, -1.0411, 1.1948, -0.5557]) tensor([0.2910, 0.0592, 0.5537, 0.0962]) -Greedy action tensor([-0.8281, -2.9472, -0.0916, 0.2419]) tensor([0.1633, 0.0196, 0.3410, 0.4761]) -Greedy action tensor([ 0.8837, -0.3415, 0.6216, 1.3106]) tensor([0.2781, 0.0817, 0.2140, 0.4262]) -Greedy action tensor([-1.8703, -0.4723, 0.6501, -0.0971]) tensor([0.0428, 0.1732, 0.5320, 0.2520]) -Greedy action tensor([-1.9251, -0.3986, 0.6512, -0.1664]) tensor([0.0407, 0.1874, 0.5355, 0.2364]) -Greedy action tensor([-1.1633, -0.9115, 0.3839, 1.2237]) tensor([0.0560, 0.0720, 0.2630, 0.6090]) -Greedy action tensor([-1.9294, -0.4582, 0.6571, -0.1711]) tensor([0.0409, 0.1782, 0.5435, 0.2374]) -Greedy action tensor([-1.8010, -0.5276, 0.7225, 0.0316]) tensor([0.0429, 0.1534, 0.5354, 0.2683]) -Greedy action tensor([-1.9076, -0.3600, 0.6422, -0.1396]) tensor([0.0410, 0.1929, 0.5256, 0.2405]) -Greedy action tensor([-1.3194, -0.4436, 0.7103, 0.8164]) tensor([0.0513, 0.1233, 0.3908, 0.4346]) -Greedy action tensor([-1.8809, -0.4246, 0.6375, -0.1352]) tensor([0.0427, 0.1831, 0.5296, 0.2446]) -Greedy action tensor([-1.8333, -0.4543, 0.6199, -0.1817]) tensor([0.0458, 0.1820, 0.5330, 0.2391]) -Greedy action tensor([-1.8511, -0.4791, 0.6659, -0.0786]) tensor([0.0431, 0.1698, 0.5337, 0.2535]) -Greedy action tensor([-1.2541, 0.1172, 0.7550, 0.6455]) tensor([0.0524, 0.2065, 0.3908, 0.3503]) -Greedy action tensor([-0.7064, -0.4548, 0.2307, 0.1723]) tensor([0.1380, 0.1775, 0.3523, 0.3323]) -Greedy action tensor([-1.4637, -0.5354, 0.6244, 0.5062]) tensor([0.0533, 0.1348, 0.4299, 0.3820]) -Greedy action tensor([-1.8777, -0.4682, 0.6407, -0.1344]) tensor([0.0431, 0.1763, 0.5344, 0.2462]) -Greedy action tensor([-1.9191, -0.3810, 0.6456, -0.1676]) tensor([0.0410, 0.1907, 0.5323, 0.2361]) -Greedy action tensor([-0.9430, 0.2191, 0.2918, -0.2111]) tensor([0.1030, 0.3291, 0.3539, 0.2140]) -Greedy action tensor([-1.4622, -0.4647, 0.5776, 0.4722]) tensor([0.0546, 0.1480, 0.4197, 0.3777]) -Greedy action tensor([-1.7122, -0.3261, 0.5144, -0.0237]) tensor([0.0508, 0.2032, 0.4710, 0.2750]) -Greedy action tensor([-0.8178, -0.5955, 0.5387, 0.4636]) tensor([0.1027, 0.1283, 0.3989, 0.3700]) -Greedy action tensor([-1.1493, -0.4984, 0.2488, -0.0025]) tensor([0.0989, 0.1896, 0.4002, 0.3113]) -Greedy action tensor([-1.5641, -0.4580, 1.0004, 0.7581]) tensor([0.0367, 0.1111, 0.4775, 0.3747]) -Greedy action tensor([-1.7978, -0.3337, 0.6538, -0.0777]) tensor([0.0444, 0.1920, 0.5155, 0.2481]) -Greedy action tensor([-1.8469, -0.5392, 0.9289, 0.1841]) tensor([0.0352, 0.1303, 0.5658, 0.2686]) -Greedy action tensor([-0.9907, -0.6412, 0.1615, 0.5165]) tensor([0.0990, 0.1405, 0.3135, 0.4470]) -Greedy action tensor([-1.7617, -0.3327, 0.5546, -0.0533]) tensor([0.0480, 0.2004, 0.4866, 0.2650]) -Greedy action tensor([-0.6069, -0.2277, 0.1938, -0.0656]) tensor([0.1561, 0.2281, 0.3476, 0.2682]) -Greedy action tensor([-1.8953, -0.8226, 0.2233, -0.4378]) tensor([0.0605, 0.1768, 0.5031, 0.2597]) -Greedy action tensor([-1.7048, -0.0631, 0.3871, -0.4166]) tensor([0.0559, 0.2886, 0.4528, 0.2027]) -Greedy action tensor([-1.9315, -0.4165, 0.6581, -0.1716]) tensor([0.0405, 0.1843, 0.5398, 0.2354]) -Greedy action tensor([-1.7430, -0.4460, 0.5768, -0.0238]) tensor([0.0490, 0.1792, 0.4984, 0.2734]) -Greedy action tensor([-1.3909, -0.6104, 0.3515, 0.1077]) tensor([0.0748, 0.1633, 0.4272, 0.3348]) -Greedy action tensor([-1.6343, -0.4994, 0.5006, -0.0105]) tensor([0.0567, 0.1764, 0.4794, 0.2876]) -Greedy action tensor([-1.2398, 0.6637, 0.1950, 0.1707]) tensor([0.0625, 0.4192, 0.2623, 0.2560]) -Greedy action tensor([-1.8563, -0.4151, 0.6104, -0.1265]) tensor([0.0442, 0.1866, 0.5203, 0.2490]) -Greedy action tensor([-0.9756, -0.5516, -0.3636, -0.1358]) tensor([0.1495, 0.2285, 0.2757, 0.3463]) -Greedy action tensor([-1.0874, -0.5506, 0.2438, 0.4262]) tensor([0.0906, 0.1549, 0.3429, 0.4115]) -Greedy action tensor([-1.1869, -0.4141, 0.7637, 1.0264]) tensor([0.0517, 0.1120, 0.3636, 0.4728]) -Greedy action tensor([-1.1984, -0.4576, 0.3936, 0.5781]) tensor([0.0718, 0.1507, 0.3530, 0.4245]) -Greedy action tensor([-1.6731, -0.5216, 0.5264, 0.0059]) tensor([0.0539, 0.1706, 0.4865, 0.2891]) -Greedy action tensor([-1.9546, -0.7924, 0.3254, -0.2036]) tensor([0.0507, 0.1620, 0.4954, 0.2919]) -Greedy action tensor([-1.5234, 0.3103, 0.3660, -0.0402]) tensor([0.0547, 0.3423, 0.3619, 0.2411]) -Greedy action tensor([-0.0405, 1.1761, 0.0368, 0.4797]) tensor([0.1401, 0.4729, 0.1513, 0.2357]) -Greedy action tensor([-1.6692, -0.3933, 0.6656, 0.2321]) tensor([0.0463, 0.1658, 0.4780, 0.3099]) -Greedy action tensor([-1.9062, -0.1986, 0.5960, -0.1404]) tensor([0.0407, 0.2245, 0.4969, 0.2379]) -Greedy action tensor([-1.9748, -0.7727, 0.2392, -0.2233]) tensor([0.0520, 0.1729, 0.4756, 0.2995]) -Greedy action tensor([-0.7247, -0.3168, 0.3469, -0.2259]) tensor([0.1414, 0.2127, 0.4130, 0.2329]) -Greedy action tensor([-1.9145, -0.4336, 0.6488, -0.1668]) tensor([0.0415, 0.1823, 0.5381, 0.2381]) -Greedy action tensor([-1.7645, -0.1872, 0.5986, -0.0386]) tensor([0.0453, 0.2193, 0.4811, 0.2544]) -Greedy action tensor([-1.1411, -0.5810, 0.2689, 0.2459]) tensor([0.0922, 0.1614, 0.3775, 0.3689]) -Greedy action tensor([-1.8152, -0.4511, 0.6299, -0.0645]) tensor([0.0450, 0.1762, 0.5194, 0.2594]) -Greedy action tensor([-0.7580, 0.1320, 0.1305, 0.9647]) tensor([0.0872, 0.2124, 0.2120, 0.4884]) -Greedy action tensor([-1.9148, -0.4725, 0.6771, -0.1363]) tensor([0.0408, 0.1726, 0.5450, 0.2416]) -Greedy action tensor([-1.8859, -0.3896, 0.6292, -0.1441]) tensor([0.0425, 0.1897, 0.5254, 0.2425]) -Greedy action tensor([-0.7746, -0.1182, 1.0756, 1.4403]) tensor([0.0542, 0.1045, 0.3448, 0.4965]) -Greedy action tensor([-1.6491, -0.4542, 0.5720, 0.1584]) tensor([0.0510, 0.1684, 0.4699, 0.3107]) -Greedy action tensor([-1.2243, -0.3720, 0.4279, 0.5864]) tensor([0.0681, 0.1598, 0.3555, 0.4166]) -Greedy action tensor([-0.5471, -0.1779, 1.1187, 1.5631]) tensor([0.0626, 0.0905, 0.3309, 0.5161]) -Greedy action tensor([-1.4896, -0.6137, 0.4500, 0.1027]) tensor([0.0655, 0.1572, 0.4555, 0.3218]) -Greedy action tensor([-1.5242, -0.2252, 0.5055, 0.2167]) tensor([0.0556, 0.2039, 0.4233, 0.3172]) -Greedy action tensor([-1.7643, -0.4979, 0.5762, -0.0706]) tensor([0.0491, 0.1742, 0.5098, 0.2670]) -Greedy action tensor([-1.7369, -0.6188, 0.9560, 0.0685]) tensor([0.0401, 0.1228, 0.5930, 0.2441]) -Greedy action tensor([-0.4492, 0.0222, 0.9022, 1.4652]) tensor([0.0755, 0.1209, 0.2916, 0.5120]) -Greedy action tensor([-1.5553, -0.5902, 0.4699, 0.0300]) tensor([0.0622, 0.1632, 0.4711, 0.3035]) -Greedy action tensor([-1.5206, -0.5438, 0.4560, 0.1754]) tensor([0.0613, 0.1627, 0.4421, 0.3339]) -Greedy action tensor([-0.3895, -0.2644, 0.1439, 0.1303]) tensor([0.1812, 0.2053, 0.3088, 0.3047]) -Greedy action tensor([-1.6732, -0.4729, 0.5255, -0.0117]) tensor([0.0538, 0.1785, 0.4845, 0.2832]) -Greedy action tensor([-1.8362, -0.4985, 0.6078, -0.1233]) tensor([0.0457, 0.1742, 0.5266, 0.2535]) -Greedy action tensor([-1.9382, -0.4483, 0.6603, -0.1765]) tensor([0.0405, 0.1796, 0.5442, 0.2357]) -Greedy action tensor([-1.9311, -0.4317, 0.6549, -0.1732]) tensor([0.0407, 0.1824, 0.5407, 0.2362]) -Greedy action tensor([-1.5278, 0.2212, 0.3684, -0.0375]) tensor([0.0560, 0.3221, 0.3732, 0.2487]) -Greedy action tensor([-1.4637, -0.4898, 0.4329, 0.2460]) tensor([0.0631, 0.1672, 0.4207, 0.3490]) -Greedy action tensor([-1.8173, -0.4151, 0.6035, -0.0826]) tensor([0.0455, 0.1848, 0.5119, 0.2578]) -Greedy action tensor([-1.7882, -0.4903, 0.5874, -0.1012]) tensor([0.0480, 0.1759, 0.5166, 0.2595]) -Greedy action tensor([-1.6163, -0.3666, 0.5916, 0.3503]) tensor([0.0482, 0.1683, 0.4388, 0.3447]) -Greedy action tensor([-0.9904, -0.5184, 0.5300, -0.3901]) tensor([0.1111, 0.1781, 0.5082, 0.2025]) -Greedy action tensor([-1.8452, -0.3340, 0.5904, -0.1065]) tensor([0.0442, 0.2001, 0.5044, 0.2513]) -Greedy action tensor([-1.9213, -0.4281, 0.6568, -0.1591]) tensor([0.0409, 0.1821, 0.5388, 0.2383]) -Greedy action tensor([-1.7073, -0.4373, 0.5561, -0.0501]) tensor([0.0515, 0.1834, 0.4951, 0.2700]) -Greedy action tensor([-1.6671, -0.5178, 0.5131, 0.0018]) tensor([0.0546, 0.1723, 0.4832, 0.2898]) -Greedy action tensor([-0.9805, -0.7029, 0.2538, 0.0705]) tensor([0.1161, 0.1532, 0.3988, 0.3320]) -Greedy action tensor([-1.4345, -0.5528, 0.3864, 0.2361]) tensor([0.0671, 0.1620, 0.4144, 0.3565]) -Greedy action tensor([ 0.8972, -0.2790, 0.0283, -0.4298]) tensor([0.5017, 0.1548, 0.2104, 0.1331]) -Greedy action tensor([ 0.8434, -0.4780, -0.2119, -0.3097]) tensor([0.5180, 0.1382, 0.1803, 0.1635]) -Greedy action tensor([ 0.6677, -0.2359, -0.0338, -0.3304]) tensor([0.4406, 0.1785, 0.2185, 0.1624]) -Greedy action tensor([ 1.2155, -0.8993, 0.0038, -0.6969]) tensor([0.6385, 0.0770, 0.1901, 0.0943]) -Greedy action tensor([ 0.5129, 0.0077, -0.0083, -0.1211]) tensor([0.3666, 0.2212, 0.2177, 0.1945]) -Greedy action tensor([ 0.6121, -0.3788, -0.0475, -0.1887]) tensor([0.4278, 0.1588, 0.2212, 0.1921]) -Greedy action tensor([ 1.0572, -0.3995, 0.1182, -0.4600]) tensor([0.5425, 0.1264, 0.2121, 0.1190]) -Greedy action tensor([ 0.3018, 0.1469, -0.1241, -0.1631]) tensor([0.3187, 0.2730, 0.2082, 0.2002]) -Greedy action tensor([ 0.7912, -0.5377, -0.1131, -0.4450]) tensor([0.5102, 0.1351, 0.2065, 0.1482]) -Greedy action tensor([ 0.3955, -0.4892, -0.0167, -0.3022]) tensor([0.3887, 0.1605, 0.2574, 0.1935]) -Greedy action tensor([ 0.6247, -0.3263, -0.0745, -0.4004]) tensor([0.4460, 0.1723, 0.2217, 0.1600]) -Greedy action tensor([ 0.9056, -0.9760, 0.1596, -0.5364]) tensor([0.5367, 0.0818, 0.2546, 0.1269]) -Greedy action tensor([ 0.7958, -0.6777, -0.1312, -0.5142]) tensor([0.5278, 0.1209, 0.2089, 0.1424]) -Greedy action tensor([ 0.6345, -0.3245, -0.0376, -0.3426]) tensor([0.4405, 0.1688, 0.2249, 0.1658]) -Greedy action tensor([ 0.7343, -0.2697, -0.1359, -0.4252]) tensor([0.4764, 0.1746, 0.1996, 0.1494]) -Greedy action tensor([ 0.8490, -0.6671, -0.0055, -0.5308]) tensor([0.5272, 0.1158, 0.2243, 0.1327]) -Greedy action tensor([ 0.7145, -0.4525, -0.1945, -0.4247]) tensor([0.4916, 0.1530, 0.1981, 0.1574]) -Greedy action tensor([ 0.5149, -0.4218, 0.1140, -0.6210]) tensor([0.4197, 0.1645, 0.2811, 0.1348]) -Greedy action tensor([ 0.8554, -0.5137, -0.0864, -0.4475]) tensor([0.5219, 0.1327, 0.2035, 0.1418]) -Greedy action tensor([ 0.4988, -0.2427, 0.0254, -0.3457]) tensor([0.3954, 0.1884, 0.2463, 0.1699]) -Greedy action tensor([ 0.6269, -0.2698, -0.1500, -0.3552]) tensor([0.4460, 0.1819, 0.2051, 0.1670]) -Greedy action tensor([ 1.1476, -0.5093, -0.1876, -0.1612]) tensor([0.5801, 0.1106, 0.1526, 0.1567]) -Greedy action tensor([ 0.5186, -0.4488, -0.0268, -0.2635]) tensor([0.4137, 0.1572, 0.2398, 0.1892]) -Greedy action tensor([ 0.8865, -0.4903, 0.1923, -0.5606]) tensor([0.5033, 0.1270, 0.2513, 0.1184]) -Greedy action tensor([ 0.7281, -0.4732, -0.0523, -0.4142]) tensor([0.4812, 0.1448, 0.2205, 0.1536]) -Greedy action tensor([ 0.7670, -0.6229, 0.0273, -0.4203]) tensor([0.4923, 0.1226, 0.2349, 0.1502]) -Greedy action tensor([ 0.6781, -0.4977, -0.0404, -0.5255]) tensor([0.4771, 0.1472, 0.2326, 0.1432]) -Greedy action tensor([ 0.0703, 0.2870, -0.0984, -0.2611]) tensor([0.2628, 0.3264, 0.2220, 0.1887]) -Greedy action tensor([ 0.3791, -0.1773, 0.0014, -0.3863]) tensor([0.3671, 0.2105, 0.2516, 0.1708]) -Greedy action tensor([ 0.6674, -0.3951, -0.0646, -0.2455]) tensor([0.4488, 0.1551, 0.2159, 0.1802]) -Greedy action tensor([ 0.4062, -0.1318, -0.0180, -0.1863]) tensor([0.3583, 0.2092, 0.2344, 0.1981]) -Greedy action tensor([ 0.6813, -0.4019, 0.0364, -0.3156]) tensor([0.4480, 0.1516, 0.2351, 0.1653]) -Greedy action tensor([ 0.4983, -0.3647, 0.0677, -0.3303]) tensor([0.3986, 0.1682, 0.2592, 0.1741]) -Greedy action tensor([ 0.9938, -0.9033, 0.1682, -0.5616]) tensor([0.5558, 0.0834, 0.2434, 0.1173]) -Greedy action tensor([ 0.5168, -0.0919, -0.0289, -0.1489]) tensor([0.3791, 0.2063, 0.2197, 0.1949]) -Greedy action tensor([ 0.8574, -0.4893, 0.0513, -0.3862]) tensor([0.5012, 0.1304, 0.2239, 0.1445]) -Greedy action tensor([ 0.6363, -0.3366, 0.0538, -0.3155]) tensor([0.4306, 0.1628, 0.2405, 0.1662]) -Greedy action tensor([ 0.3708, -0.2547, -0.0958, -0.1951]) tensor([0.3663, 0.1960, 0.2297, 0.2080]) -Greedy action tensor([ 0.6381, -0.2337, -0.0752, -0.2300]) tensor([0.4296, 0.1796, 0.2105, 0.1803]) -Greedy action tensor([ 0.7725, -0.3656, -0.0030, -0.3083]) tensor([0.4717, 0.1511, 0.2172, 0.1600]) -Greedy action tensor([ 0.6491, -0.3899, 0.0623, -0.3090]) tensor([0.4360, 0.1543, 0.2425, 0.1673]) -Greedy action tensor([ 0.6360, 0.0976, -0.0903, -0.2005]) tensor([0.3999, 0.2334, 0.1934, 0.1733]) -Greedy action tensor([ 0.4080, -0.1229, -0.1680, -0.1901]) tensor([0.3704, 0.2178, 0.2082, 0.2036]) -Greedy action tensor([ 0.8834, -0.6435, -0.0596, -0.3808]) tensor([0.5293, 0.1150, 0.2062, 0.1495]) -Greedy action tensor([ 0.6359, -0.2730, -0.0875, -0.2921]) tensor([0.4379, 0.1765, 0.2125, 0.1731]) -Greedy action tensor([ 0.8001, -0.5255, 0.0199, -0.3214]) tensor([0.4879, 0.1296, 0.2236, 0.1589]) -Greedy action tensor([ 0.5854, -0.0745, 0.1161, -0.4504]) tensor([0.4004, 0.2070, 0.2504, 0.1421]) -Greedy action tensor([ 0.9149, -0.6160, 0.0763, -0.4699]) tensor([0.5266, 0.1139, 0.2277, 0.1318]) -Greedy action tensor([ 0.4200, -0.0045, -0.0187, -0.1527]) tensor([0.3493, 0.2285, 0.2252, 0.1970]) -Greedy action tensor([ 0.7175, -0.8133, -0.2114, -0.2380]) tensor([0.5010, 0.1084, 0.1979, 0.1927]) -Greedy action tensor([ 0.4158, 0.0578, -0.0258, -0.2566]) tensor([0.3506, 0.2451, 0.2254, 0.1790]) -Greedy action tensor([ 1.0215e+00, -3.7694e-01, -9.5826e-04, -7.4592e-01]) tensor([0.5626, 0.1390, 0.2024, 0.0961]) -Greedy action tensor([ 0.8362, -0.4449, -0.0021, -0.2478]) tensor([0.4882, 0.1356, 0.2111, 0.1651]) -Greedy action tensor([ 0.5039, -0.1169, -0.0275, -0.1089]) tensor([0.3749, 0.2015, 0.2204, 0.2031]) -Greedy action tensor([ 0.5798, -0.4417, 0.0018, -0.3371]) tensor([0.4309, 0.1551, 0.2417, 0.1723]) -Greedy action tensor([ 0.4739, 0.0760, -0.0705, -0.3263]) tensor([0.3702, 0.2487, 0.2148, 0.1663]) -Greedy action tensor([ 0.7331, -0.5108, 0.1177, -0.4087]) tensor([0.4656, 0.1342, 0.2516, 0.1486]) -Greedy action tensor([ 0.6058, -0.5406, -0.1172, -0.3526]) tensor([0.4573, 0.1453, 0.2219, 0.1754]) -Greedy action tensor([ 0.4270, -0.2812, -0.0348, -0.2127]) tensor([0.3773, 0.1858, 0.2378, 0.1990]) -Greedy action tensor([ 8.1298e-01, -5.4002e-01, -3.0109e-04, -3.5151e-01]) tensor([0.4965, 0.1283, 0.2202, 0.1550]) -Greedy action tensor([ 1.0815, -0.9728, 0.1655, -0.5216]) tensor([0.5782, 0.0741, 0.2313, 0.1164]) -Greedy action tensor([ 0.5590, -0.6137, -0.1838, -0.1713]) tensor([0.4411, 0.1365, 0.2099, 0.2125]) -Greedy action tensor([ 0.5129, -0.0715, -0.0366, -0.4008]) tensor([0.3944, 0.2198, 0.2276, 0.1582]) -Greedy action tensor([ 0.7891, -0.4812, -0.0790, -0.3488]) tensor([0.4948, 0.1389, 0.2077, 0.1586]) -Greedy action tensor([ 0.5143, -0.3495, -0.0481, -0.3245]) tensor([0.4126, 0.1739, 0.2351, 0.1783]) -Greedy action tensor([ 1.0172, -0.3906, -0.1084, -0.2459]) tensor([0.5400, 0.1321, 0.1752, 0.1527]) -Greedy action tensor([ 0.7693, -0.5222, -0.1234, -0.2600]) tensor([0.4898, 0.1346, 0.2006, 0.1750]) -Greedy action tensor([ 0.8876, -0.7370, -0.0071, -0.4580]) tensor([0.5359, 0.1056, 0.2190, 0.1395]) -Greedy action tensor([ 0.2935, -0.2144, -0.1271, -0.3122]) tensor([0.3566, 0.2146, 0.2342, 0.1946]) -Greedy action tensor([ 0.8937, -0.1197, -0.0078, -0.5042]) tensor([0.4960, 0.1800, 0.2014, 0.1226]) -Greedy action tensor([ 0.6671, -0.4663, -0.1056, -0.3985]) tensor([0.4699, 0.1513, 0.2170, 0.1619]) -Greedy action tensor([ 0.7968, -1.0469, 0.1797, -0.6840]) tensor([0.5194, 0.0822, 0.2802, 0.1181]) -Greedy action tensor([ 0.6390, -0.1663, 0.0958, -0.3586]) tensor([0.4173, 0.1865, 0.2424, 0.1539]) -Greedy action tensor([ 0.6897, -0.0987, 0.1305, -0.0439]) tensor([0.3990, 0.1814, 0.2281, 0.1916]) -Greedy action tensor([ 0.4514, -0.1386, 0.0571, -0.1636]) tensor([0.3611, 0.2002, 0.2435, 0.1952]) -Greedy action tensor([ 0.6312, -0.4952, -0.1063, 0.0027]) tensor([0.4281, 0.1388, 0.2048, 0.2283]) -Greedy action tensor([ 0.4023, -0.0776, -0.0707, -0.2329]) tensor([0.3608, 0.2233, 0.2248, 0.1912]) -Greedy action tensor([ 0.9735, -0.9071, -0.0752, -0.6745]) tensor([0.5898, 0.0900, 0.2067, 0.1135]) -Greedy action tensor([ 0.7380, -0.2866, -0.0224, -0.4745]) tensor([0.4709, 0.1690, 0.2201, 0.1400]) -Greedy action tensor([ 0.8977, -0.6791, 0.0225, -0.6237]) tensor([0.5429, 0.1122, 0.2263, 0.1186]) -Greedy action tensor([ 0.8113, -0.3683, -0.0912, -0.2869]) tensor([0.4887, 0.1502, 0.1982, 0.1630]) -Greedy action tensor([ 0.9747, -0.5171, 0.1317, 0.1849]) tensor([0.4741, 0.1067, 0.2041, 0.2152]) -Greedy action tensor([ 2.0903, -0.8982, -0.2580, 0.4856]) tensor([0.7425, 0.0374, 0.0709, 0.1492]) -Greedy action tensor([ 0.9462, 0.0098, -0.3508, 0.2498]) tensor([0.4622, 0.1812, 0.1263, 0.2303]) -Greedy action tensor([ 1.6505, -0.4574, -0.2364, 0.1141]) tensor([0.6720, 0.0816, 0.1018, 0.1446]) -Greedy action tensor([ 0.9303, -0.3295, 0.0278, -0.1455]) tensor([0.4925, 0.1397, 0.1998, 0.1680]) -Greedy action tensor([ 0.5201, -0.0559, -0.2240, 0.0144]) tensor([0.3787, 0.2129, 0.1800, 0.2284]) -Greedy action tensor([ 1.3978, -0.0687, -0.3904, 0.1946]) tensor([0.5888, 0.1359, 0.0985, 0.1768]) -Greedy action tensor([ 1.4945, -0.7211, -0.3513, 0.3831]) tensor([0.6265, 0.0683, 0.0989, 0.2062]) -Greedy action tensor([ 1.0746, -0.1627, -0.6110, 0.3317]) tensor([0.5125, 0.1487, 0.0950, 0.2438]) -Greedy action tensor([ 1.0864, -0.5411, -0.0123, 0.1054]) tensor([0.5250, 0.1031, 0.1750, 0.1968]) -Greedy action tensor([ 0.9141, -0.1361, -0.3408, 0.2652]) tensor([0.4635, 0.1621, 0.1321, 0.2422]) -Greedy action tensor([ 1.0014, -0.4894, 0.2347, 0.0513]) tensor([0.4816, 0.1085, 0.2237, 0.1862]) -Greedy action tensor([ 1.1819, 0.0912, -0.3957, 0.3310]) tensor([0.5077, 0.1706, 0.1048, 0.2168]) -Greedy action tensor([ 1.7179, -0.5554, -0.1413, 0.1659]) tensor([0.6800, 0.0700, 0.1059, 0.1440]) -Greedy action tensor([ 1.2552, -0.6662, -0.4774, 0.6827]) tensor([0.5299, 0.0776, 0.0937, 0.2989]) -Greedy action tensor([ 2.1079, -0.7854, -0.3470, 0.6681]) tensor([0.7256, 0.0402, 0.0623, 0.1719]) -Greedy action tensor([ 0.9541, -0.5613, -0.1670, 0.5252]) tensor([0.4552, 0.1000, 0.1484, 0.2964]) -Greedy action tensor([ 1.6203, -0.1408, -0.1951, 0.6781]) tensor([0.5799, 0.0997, 0.0944, 0.2260]) -Greedy action tensor([ 0.0664, 0.0641, -0.4413, 0.1188]) tensor([0.2737, 0.2731, 0.1647, 0.2885]) -Greedy action tensor([ 1.3186, -0.4569, -0.3469, 0.1618]) tensor([0.5977, 0.1013, 0.1130, 0.1880]) -Greedy action tensor([ 1.3083, -0.4568, -0.0845, 0.1340]) tensor([0.5785, 0.0990, 0.1437, 0.1788]) -Greedy action tensor([ 1.8876, 0.0404, -0.8263, 0.9238]) tensor([0.6229, 0.0982, 0.0413, 0.2376]) -Greedy action tensor([ 1.4927, 0.0214, -0.2267, 0.5867]) tensor([0.5516, 0.1267, 0.0988, 0.2229]) -Greedy action tensor([ 1.2730, -0.9062, 0.0221, 0.2123]) tensor([0.5729, 0.0648, 0.1640, 0.1983]) -Greedy action tensor([ 1.2850, -0.1892, -0.3609, 0.1331]) tensor([0.5754, 0.1318, 0.1110, 0.1819]) -Greedy action tensor([ 1.6649, -0.7544, -0.5353, 0.4672]) tensor([0.6659, 0.0593, 0.0738, 0.2010]) -Greedy action tensor([ 1.3279, -0.5304, -0.5657, 0.6356]) tensor([0.5534, 0.0863, 0.0833, 0.2769]) -Greedy action tensor([ 0.9789, -0.5730, 0.3182, 0.1762]) tensor([0.4595, 0.0973, 0.2373, 0.2059]) -Greedy action tensor([ 1.6612, -0.3375, -0.5933, 0.6609]) tensor([0.6218, 0.0843, 0.0652, 0.2287]) -Greedy action tensor([ 1.7236, -0.2284, -0.4323, 0.2962]) tensor([0.6677, 0.0948, 0.0773, 0.1602]) -Greedy action tensor([ 0.5812, -0.3357, -0.1007, 0.2135]) tensor([0.3849, 0.1539, 0.1947, 0.2665]) -Greedy action tensor([ 1.4530, -0.1516, -0.8750, 0.3055]) tensor([0.6189, 0.1244, 0.0603, 0.1964]) -Greedy action tensor([ 1.5543, -0.6741, -0.5490, 0.3924]) tensor([0.6482, 0.0698, 0.0791, 0.2028]) -Greedy action tensor([ 0.8389, -0.2293, -0.0778, 0.0275]) tensor([0.4571, 0.1571, 0.1828, 0.2031]) -Greedy action tensor([ 0.7854, -0.3527, -0.2774, 0.1277]) tensor([0.4579, 0.1467, 0.1582, 0.2372]) -Greedy action tensor([ 2.0228, -0.6891, -0.1793, 0.5564]) tensor([0.7104, 0.0472, 0.0785, 0.1639]) -Greedy action tensor([ 1.6020, -0.5375, -0.3864, 0.3485]) tensor([0.6493, 0.0764, 0.0889, 0.1854]) -Greedy action tensor([ 1.0772, -0.4263, -0.2889, 0.3862]) tensor([0.5054, 0.1124, 0.1289, 0.2533]) -Greedy action tensor([ 0.8626, -0.2201, -0.1248, 0.0570]) tensor([0.4634, 0.1569, 0.1726, 0.2071]) -Greedy action tensor([ 0.5639, -0.3051, -0.2820, 0.4120]) tensor([0.3693, 0.1549, 0.1585, 0.3173]) -Greedy action tensor([ 1.3234, -0.7726, -0.4968, 0.3687]) tensor([0.5988, 0.0736, 0.0970, 0.2305]) -Greedy action tensor([ 1.5698, -0.2505, -0.4141, 0.3777]) tensor([0.6238, 0.1010, 0.0858, 0.1894]) -Greedy action tensor([ 1.5775, -0.6762, -0.3914, 0.5558]) tensor([0.6232, 0.0654, 0.0870, 0.2243]) -Greedy action tensor([ 0.7994, -0.3767, 0.0462, -0.1837]) tensor([0.4644, 0.1432, 0.2186, 0.1738]) -Greedy action tensor([ 1.4164, -0.5426, -0.1521, 0.2244]) tensor([0.6050, 0.0853, 0.1261, 0.1837]) -Greedy action tensor([ 1.2167, -0.0386, -0.8333, 0.2687]) tensor([0.5552, 0.1582, 0.0715, 0.2151]) -Greedy action tensor([ 0.9007, -0.4681, -0.1628, 0.1469]) tensor([0.4830, 0.1229, 0.1668, 0.2273]) -Greedy action tensor([ 0.7706, 0.0517, -0.0814, 0.0035]) tensor([0.4205, 0.2049, 0.1794, 0.1953]) -Greedy action tensor([ 1.3942, -0.3761, -0.2817, 0.4824]) tensor([0.5684, 0.0968, 0.1064, 0.2284]) -Greedy action tensor([ 1.2621, -0.1279, -0.4953, 0.0991]) tensor([0.5767, 0.1436, 0.0995, 0.1802]) -Greedy action tensor([ 1.0196, -0.2429, 0.0113, -0.3864]) tensor([0.5283, 0.1495, 0.1927, 0.1295]) -Greedy action tensor([ 1.7984, -0.6605, -0.4124, 0.0520]) tensor([0.7302, 0.0624, 0.0800, 0.1273]) -Greedy action tensor([ 1.0810, -0.3177, -0.1461, 0.5526]) tensor([0.4696, 0.1159, 0.1377, 0.2768]) -Greedy action tensor([ 1.0995, -0.5510, -0.2226, 0.1270]) tensor([0.5445, 0.1045, 0.1451, 0.2059]) -Greedy action tensor([ 0.7328, -0.4400, -0.2195, 0.0536]) tensor([0.4541, 0.1405, 0.1752, 0.2302]) -Greedy action tensor([ 1.3509, -0.1751, -0.3720, -0.2518]) tensor([0.6261, 0.1361, 0.1118, 0.1261]) -Greedy action tensor([ 1.5997, -0.2319, 0.0809, -0.1569]) tensor([0.6444, 0.1032, 0.1411, 0.1113]) -Greedy action tensor([ 1.8012, -0.8514, -0.4455, 0.6233]) tensor([0.6738, 0.0475, 0.0713, 0.2075]) -Greedy action tensor([ 0.4718, 0.1812, -0.2933, -0.3421]) tensor([0.3765, 0.2815, 0.1752, 0.1668]) -Greedy action tensor([ 1.2184, -0.1550, -0.5239, 0.0583]) tensor([0.5741, 0.1454, 0.1005, 0.1800]) -Greedy action tensor([ 1.4617, -0.6558, -0.2428, 0.4296]) tensor([0.6030, 0.0726, 0.1097, 0.2148]) -Greedy action tensor([ 0.8242, -0.1116, -0.3807, 0.0778]) tensor([0.4617, 0.1811, 0.1384, 0.2189]) -Greedy action tensor([ 1.6379, -0.9891, -0.2750, 0.7485]) tensor([0.6132, 0.0443, 0.0905, 0.2520]) -Greedy action tensor([ 1.5825, -0.4812, -0.5243, 0.8589]) tensor([0.5768, 0.0732, 0.0702, 0.2798]) -Greedy action tensor([ 2.0245, -0.5912, -0.0783, 0.7278]) tensor([0.6809, 0.0498, 0.0832, 0.1862]) -Greedy action tensor([ 1.6818, -0.5334, -0.5093, 0.8355]) tensor([0.6061, 0.0661, 0.0678, 0.2600]) -Greedy action tensor([ 1.0788, -0.1483, -0.3438, 0.2476]) tensor([0.5077, 0.1488, 0.1224, 0.2211]) -Greedy action tensor([ 1.4071, -0.6221, -0.2925, 0.0315]) tensor([0.6382, 0.0839, 0.1166, 0.1613]) -Greedy action tensor([ 0.8946, -0.1759, -0.1381, 0.2050]) tensor([0.4544, 0.1558, 0.1618, 0.2280]) -Greedy action tensor([ 0.7800, 0.0948, -0.1125, -0.1399]) tensor([0.4325, 0.2180, 0.1772, 0.1724]) -Greedy action tensor([ 1.1133, -0.6690, -0.2153, 0.0881]) tensor([0.5581, 0.0939, 0.1478, 0.2002]) -Greedy action tensor([ 1.3397, -0.4282, -0.2168, 0.2185]) tensor([0.5857, 0.1000, 0.1235, 0.1909]) -Greedy action tensor([ 1.4922, -0.2391, -0.2460, -0.0898]) tensor([0.6417, 0.1136, 0.1128, 0.1319]) -Greedy action tensor([ 1.2426, -0.5746, -0.2341, 0.0669]) tensor([0.5884, 0.0956, 0.1344, 0.1816]) -Greedy action tensor([ 1.5111, -0.3415, -0.1819, 0.1472]) tensor([0.6264, 0.0982, 0.1152, 0.1601]) -Greedy action tensor([ 1.1836, -0.0209, -0.3469, 0.3293]) tensor([0.5150, 0.1544, 0.1115, 0.2192]) -Greedy action tensor([ 1.1171, 0.1064, 0.0388, -0.0299]) tensor([0.4946, 0.1800, 0.1683, 0.1571]) -Greedy action tensor([ 1.6954, -0.2634, -0.3995, -0.0386]) tensor([0.6941, 0.0979, 0.0854, 0.1226]) -Greedy action tensor([ 1.2627, -0.5312, -0.3969, 0.2742]) tensor([0.5785, 0.0962, 0.1100, 0.2153]) -Greedy action tensor([ 1.3819, -0.5864, -0.3581, -0.0521]) tensor([0.6437, 0.0899, 0.1130, 0.1534]) -Greedy action tensor([ 1.6162, -0.1251, -0.5726, 0.2669]) tensor([0.6465, 0.1133, 0.0724, 0.1677]) -Greedy action tensor([-0.1575, -0.2250, 0.5641, -0.4427]) tensor([0.2108, 0.1970, 0.4337, 0.1585]) -Greedy action tensor([ 0.7084, -0.2925, 0.9666, -0.0997]) tensor([0.3218, 0.1183, 0.4165, 0.1434]) -Greedy action tensor([-0.2010, -1.6268, 0.5781, 0.1910]) tensor([0.2041, 0.0490, 0.4448, 0.3020]) -Greedy action tensor([-0.5184, -0.0946, 0.4007, -1.6550]) tensor([0.1867, 0.2853, 0.4681, 0.0599]) -Greedy action tensor([-0.4253, -1.0743, 0.3808, -0.0872]) tensor([0.1936, 0.1012, 0.4336, 0.2715]) -Greedy action tensor([-0.6465, -0.6311, -0.0788, -0.2354]) tensor([0.1891, 0.1920, 0.3336, 0.2853]) -Greedy action tensor([ 0.3478, -1.2258, -0.1085, 0.2849]) tensor([0.3597, 0.0746, 0.2279, 0.3378]) -Greedy action tensor([ 1.0036, -2.0753, 0.1907, 0.0368]) tensor([0.5348, 0.0246, 0.2372, 0.2034]) -Greedy action tensor([0.0208, 0.7007, 0.7251, 0.4011]) tensor([0.1548, 0.3056, 0.3131, 0.2265]) -Greedy action tensor([ 0.1657, -1.5401, -0.0708, 0.3009]) tensor([0.3209, 0.0583, 0.2534, 0.3674]) -Greedy action tensor([1.1783, 0.1386, 0.1390, 0.7272]) tensor([0.4266, 0.1508, 0.1509, 0.2717]) -Greedy action tensor([ 0.4374, -1.7141, -0.0137, 0.2743]) tensor([0.3842, 0.0447, 0.2447, 0.3264]) -Greedy action tensor([ 0.5767, -0.7075, 0.3517, -0.1651]) tensor([0.3919, 0.1085, 0.3129, 0.1867]) -Greedy action tensor([-0.0528, 0.1982, 0.4663, 0.4850]) tensor([0.1761, 0.2264, 0.2960, 0.3016]) -Greedy action tensor([ 0.3531, -1.2689, 0.1478, 0.1873]) tensor([0.3498, 0.0691, 0.2848, 0.2963]) -Greedy action tensor([-0.1498, -0.8239, -0.1060, -0.4228]) tensor([0.3016, 0.1537, 0.3151, 0.2296]) -Greedy action tensor([-0.8575, -0.3514, -0.2668, -0.9538]) tensor([0.1861, 0.3088, 0.3360, 0.1691]) -Greedy action tensor([ 0.7158, -1.6373, -0.3120, 0.3576]) tensor([0.4647, 0.0442, 0.1663, 0.3248]) -Greedy action tensor([-0.8230, -0.7991, 0.9855, -0.5556]) tensor([0.1060, 0.1086, 0.6469, 0.1385]) -Greedy action tensor([ 0.6936, -0.7022, 1.7702, -0.1467]) tensor([0.2167, 0.0537, 0.6361, 0.0935]) -Greedy action tensor([0.5647, 0.1947, 0.4697, 0.0934]) tensor([0.3102, 0.2142, 0.2820, 0.1936]) -Greedy action tensor([-0.4161, 0.3872, 0.6867, -0.1858]) tensor([0.1333, 0.2975, 0.4014, 0.1678]) -Greedy action tensor([-0.1115, -2.7508, 0.0886, 0.7815]) tensor([0.2112, 0.0151, 0.2580, 0.5158]) -Greedy action tensor([-1.5794, 0.3722, 0.1106, -0.5181]) tensor([0.0612, 0.4306, 0.3315, 0.1768]) -Greedy action tensor([-0.8788, -0.7109, 1.6179, -0.7951]) tensor([0.0649, 0.0767, 0.7878, 0.0705]) -Greedy action tensor([ 0.9180, -1.7531, 1.3996, -1.1847]) tensor([0.3559, 0.0246, 0.5760, 0.0435]) -Greedy action tensor([-1.0986, -1.4369, -0.6195, -0.1936]) tensor([0.1724, 0.1229, 0.2784, 0.4262]) -Greedy action tensor([-0.6171, 1.4235, 0.5831, -1.4741]) tensor([0.0804, 0.6186, 0.2669, 0.0341]) -Greedy action tensor([ 0.1451, 0.5246, 1.4030, -0.9657]) tensor([0.1585, 0.2317, 0.5576, 0.0522]) -Greedy action tensor([-0.0865, -1.7685, -0.9073, 0.6604]) tensor([0.2676, 0.0498, 0.1178, 0.5648]) -Greedy action tensor([-0.3806, -0.6446, 1.4008, -1.1881]) tensor([0.1227, 0.0942, 0.7284, 0.0547]) -Greedy action tensor([-0.0559, -0.7514, 0.8626, 1.1114]) tensor([0.1385, 0.0691, 0.3471, 0.4452]) -Greedy action tensor([0.7738, 1.0848, 0.9867, 1.3479]) tensor([0.1860, 0.2538, 0.2301, 0.3302]) -Greedy action tensor([ 0.3906, -1.5339, -0.7583, 0.7919]) tensor([0.3382, 0.0494, 0.1072, 0.5052]) -Greedy action tensor([-0.2712, -0.7994, -0.1457, 1.0382]) tensor([0.1556, 0.0917, 0.1764, 0.5763]) -Greedy action tensor([ 1.3856, -1.0575, 1.4359, 1.1114]) tensor([0.3450, 0.0300, 0.3628, 0.2622]) -Greedy action tensor([ 0.6704, -1.0480, 1.2485, 0.0869]) tensor([0.2841, 0.0510, 0.5064, 0.1585]) -Greedy action tensor([ 0.3353, -1.3314, 0.7056, 0.1896]) tensor([0.2856, 0.0539, 0.4136, 0.2469]) -Greedy action tensor([-0.5390, -0.3016, -0.4011, -0.7327]) tensor([0.2359, 0.2991, 0.2707, 0.1943]) -Greedy action tensor([-0.3091, -1.1393, 0.4873, 0.0571]) tensor([0.1962, 0.0856, 0.4352, 0.2830]) -Greedy action tensor([ 1.5870, -1.0673, 0.7678, 1.5502]) tensor([0.4040, 0.0284, 0.1781, 0.3895]) -Greedy action tensor([-0.1018, -0.4053, -1.3247, 1.8703]) tensor([0.1085, 0.0801, 0.0319, 0.7795]) -Greedy action tensor([-0.6328, -0.5636, 0.7315, -0.8488]) tensor([0.1473, 0.1578, 0.5763, 0.1187]) -Greedy action tensor([ 0.9303, 0.6077, 0.1024, -0.9028]) tensor([0.4308, 0.3120, 0.1882, 0.0689]) -Greedy action tensor([ 1.2847, -1.0066, -0.2689, 0.3047]) tensor([0.5924, 0.0599, 0.1253, 0.2224]) -Greedy action tensor([ 0.1246, -1.3661, 0.7177, -0.1912]) tensor([0.2657, 0.0598, 0.4808, 0.1937]) -Greedy action tensor([ 0.6723, 0.3344, 0.3448, -0.5758]) tensor([0.3675, 0.2621, 0.2649, 0.1055]) -Greedy action tensor([ 0.5584, -1.5288, -0.5412, 0.3127]) tensor([0.4466, 0.0554, 0.1487, 0.3493]) -Greedy action tensor([-0.3369, 0.3538, 0.1747, 0.3380]) tensor([0.1509, 0.3011, 0.2517, 0.2963]) -Greedy action tensor([-0.2986, -1.5448, 0.1333, 0.3639]) tensor([0.2098, 0.0603, 0.3231, 0.4069]) -Greedy action tensor([ 0.5576, 0.4234, 0.0054, -0.5374]) tensor([0.3591, 0.3140, 0.2067, 0.1201]) -Greedy action tensor([ 0.5826, -1.0775, 1.7382, -0.2700]) tensor([0.2087, 0.0397, 0.6627, 0.0890]) -Greedy action tensor([-1.3926, -0.3753, -0.2248, 0.1803]) tensor([0.0847, 0.2344, 0.2724, 0.4085]) -Greedy action tensor([-1.2316, -1.2653, -0.4908, -0.4153]) tensor([0.1581, 0.1528, 0.3316, 0.3575]) -Greedy action tensor([ 0.6945, 0.1216, -0.7265, -0.4928]) tensor([0.4738, 0.2672, 0.1144, 0.1445]) -Greedy action tensor([-0.1669, -1.3216, -0.1349, 1.3766]) tensor([0.1423, 0.0448, 0.1469, 0.6660]) -Greedy action tensor([ 0.9492, -1.3374, 0.8820, -0.2897]) tensor([0.4299, 0.0437, 0.4019, 0.1245]) -Greedy action tensor([ 0.1984, -0.4622, -0.9289, 0.9202]) tensor([0.2565, 0.1325, 0.0831, 0.5279]) -Greedy action tensor([0.3287, 0.6913, 0.7754, 0.9871]) tensor([0.1686, 0.2423, 0.2635, 0.3256]) -Greedy action tensor([-0.7287, -1.5998, -0.0100, 1.1125]) tensor([0.1023, 0.0428, 0.2099, 0.6450]) -Greedy action tensor([ 1.3927, -0.4697, 0.5242, 0.2081]) tensor([0.5317, 0.0826, 0.2231, 0.1626]) -Greedy action tensor([ 1.3391, 0.2747, 1.0078, -0.9647]) tensor([0.4623, 0.1595, 0.3320, 0.0462]) -Greedy action tensor([ 0.3078, -0.4436, 0.2535, 2.1214]) tensor([0.1169, 0.0552, 0.1108, 0.7171]) -Greedy action tensor([ 1.2824, -1.9568, 1.0059, 1.7007]) tensor([0.3015, 0.0118, 0.2286, 0.4581]) -Greedy action tensor([-0.5910, -1.1148, 0.4593, 1.3650]) tensor([0.0868, 0.0514, 0.2481, 0.6137]) -Greedy action tensor([-0.2785, -1.0957, 0.1085, -0.1944]) tensor([0.2499, 0.1104, 0.3680, 0.2718]) -Greedy action tensor([-0.2813, -0.7350, 0.7938, 0.1167]) tensor([0.1652, 0.1049, 0.4840, 0.2459]) -Greedy action tensor([0.5646, 0.6471, 0.6291, 0.0862]) tensor([0.2651, 0.2879, 0.2827, 0.1643]) -Greedy action tensor([ 1.1221, -1.8110, -0.4084, 0.7233]) tensor([0.5153, 0.0274, 0.1115, 0.3458]) -Greedy action tensor([-0.8910, -0.7292, 0.7878, -0.2629]) tensor([0.1063, 0.1250, 0.5696, 0.1992]) -Greedy action tensor([-0.3851, -1.6603, 1.0912, 0.7981]) tensor([0.1121, 0.0313, 0.4906, 0.3660]) -Greedy action tensor([ 0.4409, 0.1712, -0.3896, -0.4217]) tensor([0.3815, 0.2913, 0.1663, 0.1610]) -Greedy action tensor([-0.1710, -0.7157, 0.6345, 0.5177]) tensor([0.1721, 0.0999, 0.3852, 0.3428]) -Greedy action tensor([ 0.8435, 0.2785, 2.7298, -0.2070]) tensor([0.1175, 0.0668, 0.7747, 0.0411]) -Greedy action tensor([-0.9684, -1.1933, -1.1351, -0.4585]) tensor([0.2320, 0.1853, 0.1964, 0.3863]) -Greedy action tensor([ 0.5266, 0.1573, -0.2432, -0.2357]) tensor([0.3815, 0.2637, 0.1767, 0.1780]) -Greedy action tensor([ 0.0267, -1.3893, 0.5429, 0.0311]) tensor([0.2549, 0.0619, 0.4272, 0.2561]) -Greedy action tensor([ 0.3185, -0.1843, -0.5069, 1.9074]) tensor([0.1441, 0.0871, 0.0631, 0.7057]) -Greedy action tensor([-0.6597, -0.2747, -1.4341, 1.0289]) tensor([0.1199, 0.1762, 0.0553, 0.6487]) -Greedy action tensor([ 0.6860, -1.5600, -0.1519, 1.4681]) tensor([0.2685, 0.0284, 0.1162, 0.5870]) -Greedy action tensor([ 0.9712, -1.3499, -0.3563, 1.1588]) tensor([0.3891, 0.0382, 0.1032, 0.4695]) -Greedy action tensor([-0.7932, -0.1048, 0.8812, -0.5974]) tensor([0.1048, 0.2086, 0.5592, 0.1275]) -Greedy action tensor([-1.9251, -0.4169, 0.6548, -0.1641]) tensor([0.0408, 0.1842, 0.5379, 0.2372]) -Greedy action tensor([-1.9148, -0.4517, 0.6537, -0.1591]) tensor([0.0414, 0.1788, 0.5402, 0.2396]) -Greedy action tensor([-1.7879, -0.5030, 0.6410, -0.0260]) tensor([0.0459, 0.1659, 0.5209, 0.2673]) -Greedy action tensor([-1.8252, -0.4646, 0.6156, -0.0821]) tensor([0.0453, 0.1764, 0.5197, 0.2587]) -Greedy action tensor([-1.5191, -0.5668, 0.5302, 0.1488]) tensor([0.0600, 0.1556, 0.4661, 0.3183]) -Greedy action tensor([-1.3751, -0.4982, 0.7991, 0.7730]) tensor([0.0482, 0.1157, 0.4235, 0.4126]) -Greedy action tensor([-1.8417, -0.3222, 0.5958, -0.1052]) tensor([0.0441, 0.2014, 0.5043, 0.2502]) -Greedy action tensor([-1.8768, -0.4745, 0.6741, -0.0912]) tensor([0.0419, 0.1704, 0.5376, 0.2501]) -Greedy action tensor([-1.8003, -0.3633, 0.6600, -0.0482]) tensor([0.0441, 0.1855, 0.5162, 0.2542]) -Greedy action tensor([-1.6815, -0.4488, 0.5505, 0.0199]) tensor([0.0520, 0.1784, 0.4846, 0.2851]) -Greedy action tensor([-1.6238, -0.2752, 0.4671, 0.0146]) tensor([0.0553, 0.2129, 0.4473, 0.2845]) -Greedy action tensor([-1.5430, -0.5438, 0.4915, 0.1536]) tensor([0.0595, 0.1615, 0.4547, 0.3243]) -Greedy action tensor([-1.9329, -0.4490, 0.6591, -0.1756]) tensor([0.0407, 0.1795, 0.5438, 0.2360]) -Greedy action tensor([-1.8880, -0.4654, 0.6385, -0.1582]) tensor([0.0429, 0.1780, 0.5370, 0.2421]) -Greedy action tensor([-1.8701, -0.4494, 0.6292, -0.1414]) tensor([0.0436, 0.1804, 0.5305, 0.2455]) -Greedy action tensor([-1.8730, -0.4552, 0.7125, -0.0022]) tensor([0.0402, 0.1658, 0.5331, 0.2609]) -Greedy action tensor([-1.6466, -0.5640, 0.5213, 0.0147]) tensor([0.0557, 0.1644, 0.4867, 0.2932]) -Greedy action tensor([-1.6007, 0.4258, -0.1764, -0.5905]) tensor([0.0646, 0.4899, 0.2683, 0.1773]) -Greedy action tensor([-1.3475, 0.0588, 0.1734, 0.8444]) tensor([0.0537, 0.2193, 0.2459, 0.4811]) -Greedy action tensor([-1.9390, -0.4521, 0.6632, -0.1790]) tensor([0.0404, 0.1789, 0.5456, 0.2350]) -Greedy action tensor([-1.7936, -0.5274, 0.6320, -0.0945]) tensor([0.0469, 0.1663, 0.5303, 0.2564]) -Greedy action tensor([-1.7805, -0.4353, 0.5814, -0.0849]) tensor([0.0478, 0.1837, 0.5077, 0.2608]) -Greedy action tensor([-1.3953, -0.2625, 0.3134, 0.2146]) tensor([0.0684, 0.2122, 0.3775, 0.3420]) -Greedy action tensor([-1.6662, -0.5268, 0.5291, 0.0274]) tensor([0.0539, 0.1685, 0.4843, 0.2933]) -Greedy action tensor([-1.6921, -0.5167, 0.5546, 0.0403]) tensor([0.0517, 0.1674, 0.4887, 0.2922]) -Greedy action tensor([-1.9285, -0.4397, 0.6576, -0.1693]) tensor([0.0408, 0.1808, 0.5416, 0.2369]) -Greedy action tensor([-1.2560, -0.4370, 1.1597, 1.2501]) tensor([0.0374, 0.0849, 0.4190, 0.4587]) -Greedy action tensor([-0.5892, -0.4986, 0.4128, 0.0636]) tensor([0.1484, 0.1625, 0.4041, 0.2850]) -Greedy action tensor([-1.0995, -0.5367, 0.7494, 1.1825]) tensor([0.0529, 0.0929, 0.3360, 0.5182]) -Greedy action tensor([-0.8760, -0.1417, 0.2060, 0.1451]) tensor([0.1135, 0.2365, 0.3349, 0.3151]) -Greedy action tensor([-1.3333, -0.6265, 0.3650, 0.2481]) tensor([0.0749, 0.1518, 0.4092, 0.3641]) -Greedy action tensor([-1.8417, -0.4535, 0.6115, -0.1276]) tensor([0.0451, 0.1806, 0.5240, 0.2502]) -Greedy action tensor([-1.2855, 0.0192, 0.5430, 0.6990]) tensor([0.0550, 0.2027, 0.3423, 0.4000]) -Greedy action tensor([-1.5096, -0.5093, 0.4717, 0.0491]) tensor([0.0636, 0.1729, 0.4612, 0.3023]) -Greedy action tensor([-1.8906, -0.4454, 0.7687, 0.0287]) tensor([0.0380, 0.1610, 0.5423, 0.2587]) -Greedy action tensor([-1.6832, -0.4774, 0.7223, 0.3853]) tensor([0.0429, 0.1431, 0.4750, 0.3391]) -Greedy action tensor([-1.5172, -0.4858, 0.7205, 0.4739]) tensor([0.0488, 0.1368, 0.4571, 0.3572]) -Greedy action tensor([-1.2897, -0.5857, 0.4331, 0.0262]) tensor([0.0810, 0.1637, 0.4534, 0.3019]) -Greedy action tensor([-0.7483, -0.1291, 0.2127, -0.0935]) tensor([0.1352, 0.2511, 0.3534, 0.2602]) -Greedy action tensor([-0.6804, -0.1631, -0.5287, 0.0121]) tensor([0.1712, 0.2872, 0.1993, 0.3422]) -Greedy action tensor([-1.5358, -0.5546, 0.4547, 0.0722]) tensor([0.0626, 0.1669, 0.4580, 0.3125]) -Greedy action tensor([-1.6193, -0.5031, 0.5287, 0.0916]) tensor([0.0551, 0.1682, 0.4719, 0.3048]) -Greedy action tensor([-0.6720, -0.5793, 0.1412, 0.2335]) tensor([0.1465, 0.1607, 0.3304, 0.3623]) -Greedy action tensor([-1.8580, -0.4200, 0.6280, -0.1257]) tensor([0.0437, 0.1841, 0.5251, 0.2471]) -Greedy action tensor([-1.7855, -0.3645, 0.6297, -0.0324]) tensor([0.0452, 0.1873, 0.5063, 0.2611]) -Greedy action tensor([-1.6250, -0.5358, 0.5346, 0.1114]) tensor([0.0546, 0.1623, 0.4732, 0.3099]) -Greedy action tensor([-1.8627, -0.4570, 0.6541, -0.1051]) tensor([0.0430, 0.1753, 0.5325, 0.2492]) -Greedy action tensor([-1.7027, 0.1064, 0.4551, -0.0431]) tensor([0.0476, 0.2905, 0.4117, 0.2502]) -Greedy action tensor([-1.9944, -0.9622, 0.4101, -0.3205]) tensor([0.0495, 0.1389, 0.5478, 0.2638]) -Greedy action tensor([-1.9169, -0.4615, 0.6690, -0.1358]) tensor([0.0408, 0.1750, 0.5419, 0.2423]) -Greedy action tensor([-1.2725, -0.5741, 0.3088, 0.2060]) tensor([0.0816, 0.1640, 0.3966, 0.3578]) -Greedy action tensor([-1.8849, -0.3323, 0.6202, -0.1376]) tensor([0.0422, 0.1993, 0.5165, 0.2421]) -Greedy action tensor([-0.1727, -0.1446, 0.3399, 0.8882]) tensor([0.1518, 0.1561, 0.2535, 0.4386]) -Greedy action tensor([-1.8520, -0.4722, 0.6251, -0.1210]) tensor([0.0444, 0.1764, 0.5286, 0.2506]) -Greedy action tensor([-0.1528, 0.0735, 0.8586, 1.6263]) tensor([0.0915, 0.1147, 0.2516, 0.5422]) -Greedy action tensor([-1.7218, -0.6378, 1.2577, 0.6453]) tensor([0.0292, 0.0862, 0.5737, 0.3110]) -Greedy action tensor([-1.7596, -0.5148, 0.5853, -0.0934]) tensor([0.0495, 0.1719, 0.5165, 0.2620]) -Greedy action tensor([-1.7866, -0.5062, 0.5877, -0.0818]) tensor([0.0480, 0.1726, 0.5155, 0.2639]) -Greedy action tensor([-1.8267, -0.4082, 0.6101, -0.1145]) tensor([0.0452, 0.1869, 0.5173, 0.2506]) -Greedy action tensor([-0.5635, -0.3196, 0.9981, 1.6068]) tensor([0.0633, 0.0808, 0.3016, 0.5543]) -Greedy action tensor([-1.8552, -0.4260, 0.6147, -0.1180]) tensor([0.0441, 0.1841, 0.5213, 0.2505]) -Greedy action tensor([-1.8733, -0.4485, 0.6341, -0.1498]) tensor([0.0434, 0.1805, 0.5328, 0.2433]) -Greedy action tensor([-1.7653, -0.4807, 0.5745, -0.1085]) tensor([0.0494, 0.1786, 0.5129, 0.2591]) -Greedy action tensor([-1.7810, -0.4773, 0.5921, -0.0741]) tensor([0.0478, 0.1760, 0.5128, 0.2634]) -Greedy action tensor([-0.0486, 0.9923, 0.2399, 0.7887]) tensor([0.1338, 0.3788, 0.1785, 0.3090]) -Greedy action tensor([-1.3119, -0.4425, 0.5783, 0.7035]) tensor([0.0571, 0.1362, 0.3781, 0.4286]) -Greedy action tensor([-1.8497, -0.4704, 0.6842, -0.0479]) tensor([0.0423, 0.1681, 0.5332, 0.2564]) -Greedy action tensor([-1.5670, -0.2977, 0.5653, -0.2873]) tensor([0.0603, 0.2145, 0.5084, 0.2168]) -Greedy action tensor([-1.8770, -0.4411, 0.6358, -0.1427]) tensor([0.0431, 0.1811, 0.5317, 0.2441]) -Greedy action tensor([-0.9837, -0.6444, 0.2107, 0.6005]) tensor([0.0945, 0.1327, 0.3120, 0.4608]) -Greedy action tensor([-1.8010, -0.4287, 0.5902, -0.1050]) tensor([0.0469, 0.1850, 0.5124, 0.2557]) -Greedy action tensor([-1.7243, -0.4598, 0.5600, -0.0627]) tensor([0.0509, 0.1804, 0.5003, 0.2684]) -Greedy action tensor([-1.8601, -0.3382, 0.6101, -0.1487]) tensor([0.0436, 0.1997, 0.5154, 0.2413]) -Greedy action tensor([-1.6997, -0.4241, 0.6051, 0.1341]) tensor([0.0479, 0.1717, 0.4804, 0.3000]) -Greedy action tensor([-0.0187, -0.1077, 1.0352, 1.7190]) tensor([0.0955, 0.0874, 0.2741, 0.5430]) -Greedy action tensor([-1.5867, -0.2519, 0.6168, 0.3159]) tensor([0.0486, 0.1848, 0.4405, 0.3260]) -Greedy action tensor([-0.6038, -0.4672, 0.3538, -0.1216]) tensor([0.1569, 0.1799, 0.4089, 0.2542]) -Greedy action tensor([-1.7355, -0.4909, 0.5542, -0.0575]) tensor([0.0508, 0.1762, 0.5011, 0.2719]) -Greedy action tensor([-1.4590, -0.0595, -0.1755, -0.4307]) tensor([0.0873, 0.3537, 0.3150, 0.2440]) -Greedy action tensor([-1.7361, -0.2149, 0.5308, -0.0495]) tensor([0.0485, 0.2219, 0.4678, 0.2618]) -Greedy action tensor([-1.7595, -1.0810, 0.0376, -0.7062]) tensor([0.0842, 0.1660, 0.5082, 0.2415]) -Greedy action tensor([ 0.6304, -0.1882, -0.0446, -0.0936]) tensor([0.4107, 0.1811, 0.2091, 0.1991]) -Greedy action tensor([ 1.3112, -0.7029, -0.1218, -0.5319]) tensor([0.6534, 0.0872, 0.1559, 0.1035]) -Greedy action tensor([ 0.4843, -0.0278, -0.0441, -0.2318]) tensor([0.3735, 0.2238, 0.2202, 0.1825]) -Greedy action tensor([ 0.5004, -0.0592, -0.1666, -0.0170]) tensor([0.3730, 0.2132, 0.1914, 0.2223]) -Greedy action tensor([ 0.7555, -0.4316, 0.2140, -0.2826]) tensor([0.4462, 0.1361, 0.2596, 0.1580]) -Greedy action tensor([ 0.5820, -0.1775, 0.0770, -0.8266]) tensor([0.4318, 0.2020, 0.2606, 0.1056]) -Greedy action tensor([ 0.3536, -0.0632, -0.0309, -0.2011]) tensor([0.3432, 0.2262, 0.2336, 0.1970]) -Greedy action tensor([ 0.5385, -0.4321, -0.0754, -0.1750]) tensor([0.4149, 0.1572, 0.2246, 0.2033]) -Greedy action tensor([ 0.1055, 0.0857, 0.0091, -0.3380]) tensor([0.2833, 0.2777, 0.2572, 0.1818]) -Greedy action tensor([ 0.4515, 0.0599, -0.1274, -0.0217]) tensor([0.3497, 0.2364, 0.1960, 0.2179]) -Greedy action tensor([ 0.8999, -0.6067, 0.0252, -0.4512]) tensor([0.5270, 0.1168, 0.2197, 0.1365]) -Greedy action tensor([ 0.4644, -0.2421, 0.0314, -0.2799]) tensor([0.3821, 0.1885, 0.2478, 0.1815]) -Greedy action tensor([ 0.4745, -0.1926, -0.0990, -0.1807]) tensor([0.3852, 0.1977, 0.2171, 0.2000]) -Greedy action tensor([ 0.6280, -0.3942, 0.0292, -0.5099]) tensor([0.4485, 0.1614, 0.2464, 0.1437]) -Greedy action tensor([ 0.2924, 0.2500, -0.0712, -0.2828]) tensor([0.3109, 0.2980, 0.2161, 0.1749]) -Greedy action tensor([ 0.7072, -0.4942, 0.0901, -0.4473]) tensor([0.4639, 0.1395, 0.2503, 0.1462]) -Greedy action tensor([ 0.7301, -0.5582, 0.0021, -0.6246]) tensor([0.4959, 0.1367, 0.2394, 0.1280]) -Greedy action tensor([ 0.5532, -0.3114, -0.0163, -0.2400]) tensor([0.4099, 0.1727, 0.2319, 0.1855]) -Greedy action tensor([ 0.7043, -0.4536, 0.0150, -0.3524]) tensor([0.4622, 0.1452, 0.2320, 0.1607]) -Greedy action tensor([ 0.6709, -0.4260, -0.0932, -0.6652]) tensor([0.4848, 0.1619, 0.2258, 0.1274]) -Greedy action tensor([ 0.8010, -1.1495, -0.0086, -0.6127]) tensor([0.5463, 0.0777, 0.2431, 0.1329]) -Greedy action tensor([ 0.5895, -0.3456, 0.2499, -0.3246]) tensor([0.3991, 0.1567, 0.2842, 0.1600]) -Greedy action tensor([ 0.3566, 0.0562, -0.1453, 0.0843]) tensor([0.3218, 0.2383, 0.1948, 0.2451]) -Greedy action tensor([ 0.6187, -0.3453, 0.0449, -0.2702]) tensor([0.4245, 0.1619, 0.2391, 0.1745]) -Greedy action tensor([ 0.5050, -0.0622, -0.1000, -0.0165]) tensor([0.3694, 0.2095, 0.2017, 0.2193]) -Greedy action tensor([ 0.6334, -0.1150, -0.0023, -0.4267]) tensor([0.4257, 0.2014, 0.2254, 0.1475]) -Greedy action tensor([-0.0273, -0.1017, -0.0684, -0.1159]) tensor([0.2629, 0.2441, 0.2523, 0.2406]) -Greedy action tensor([ 0.8163, -0.5078, -0.1300, -0.4327]) tensor([0.5152, 0.1371, 0.2000, 0.1478]) -Greedy action tensor([ 0.8018, -0.5512, -0.1190, -0.5384]) tensor([0.5212, 0.1347, 0.2076, 0.1365]) -Greedy action tensor([ 0.8692, -0.6146, 0.0422, -0.4255]) tensor([0.5160, 0.1170, 0.2257, 0.1414]) -Greedy action tensor([ 0.5369, -0.3411, 0.0387, -0.4865]) tensor([0.4197, 0.1744, 0.2550, 0.1508]) -Greedy action tensor([ 0.6661, -0.1406, -0.1642, -0.1983]) tensor([0.4341, 0.1938, 0.1892, 0.1829]) -Greedy action tensor([ 0.4453, -0.0535, -0.2242, -0.1765]) tensor([0.3765, 0.2286, 0.1928, 0.2022]) -Greedy action tensor([ 0.8072, -0.3182, -0.1302, -0.2829]) tensor([0.4872, 0.1581, 0.1908, 0.1638]) -Greedy action tensor([ 0.4864, -0.5689, -0.0394, -0.1892]) tensor([0.4085, 0.1422, 0.2415, 0.2079]) -Greedy action tensor([ 0.9448, -1.1029, 0.1660, -1.0482]) tensor([0.5800, 0.0748, 0.2662, 0.0790]) -Greedy action tensor([ 0.2406, -0.3065, -0.0977, -0.1645]) tensor([0.3380, 0.1956, 0.2410, 0.2254]) -Greedy action tensor([ 0.8797, -0.6502, -0.1276, -0.3999]) tensor([0.5377, 0.1164, 0.1964, 0.1496]) -Greedy action tensor([ 0.7436, -0.4089, 0.0857, -0.4854]) tensor([0.4703, 0.1485, 0.2436, 0.1376]) -Greedy action tensor([ 0.5237, -0.5142, -0.1049, -0.3551]) tensor([0.4343, 0.1538, 0.2316, 0.1803]) -Greedy action tensor([ 0.7029, -1.0076, 0.0322, -0.5503]) tensor([0.5056, 0.0914, 0.2586, 0.1444]) -Greedy action tensor([ 0.8651, -0.4167, -0.0669, -0.1632]) tensor([0.4929, 0.1368, 0.1941, 0.1763]) -Greedy action tensor([ 0.6498, -0.1394, -0.3245, -0.4745]) tensor([0.4637, 0.2106, 0.1750, 0.1506]) -Greedy action tensor([ 0.6221, -0.1125, 0.0212, -0.2412]) tensor([0.4082, 0.1958, 0.2238, 0.1722]) -Greedy action tensor([ 0.5216, -0.4778, -0.0891, -0.3019]) tensor([0.4255, 0.1566, 0.2311, 0.1868]) -Greedy action tensor([ 0.7791, -0.5059, -0.1692, -0.3918]) tensor([0.5065, 0.1401, 0.1962, 0.1571]) -Greedy action tensor([ 0.6823, -0.5351, -0.1068, -0.6260]) tensor([0.4949, 0.1465, 0.2248, 0.1338]) -Greedy action tensor([ 0.9539, -0.9699, 0.1549, -0.5639]) tensor([0.5510, 0.0805, 0.2478, 0.1208]) -Greedy action tensor([ 0.4561, -0.2619, 0.0737, -0.2820]) tensor([0.3777, 0.1842, 0.2576, 0.1805]) -Greedy action tensor([ 0.8463, -0.8438, -0.0567, -0.3254]) tensor([0.5264, 0.0971, 0.2134, 0.1631]) -Greedy action tensor([ 1.1064, -0.8554, -0.0778, -0.7113]) tensor([0.6215, 0.0874, 0.1902, 0.1009]) -Greedy action tensor([ 0.4685, -0.3026, -0.0520, -0.4295]) tensor([0.4058, 0.1877, 0.2411, 0.1653]) -Greedy action tensor([ 0.3582, -0.2158, -0.0787, -0.3737]) tensor([0.3717, 0.2094, 0.2401, 0.1788]) -Greedy action tensor([ 0.3920, -0.2979, 0.0142, -0.3346]) tensor([0.3745, 0.1878, 0.2566, 0.1811]) -Greedy action tensor([ 0.7281, -0.2032, -0.0913, -0.1835]) tensor([0.4471, 0.1762, 0.1970, 0.1797]) -Greedy action tensor([ 0.5331, -0.1325, -0.2310, -0.3272]) tensor([0.4162, 0.2139, 0.1938, 0.1761]) -Greedy action tensor([ 0.9983, -0.6025, -0.1258, -0.6847]) tensor([0.5839, 0.1178, 0.1897, 0.1085]) -Greedy action tensor([ 0.5041, -0.1754, 0.1869, -0.3181]) tensor([0.3739, 0.1895, 0.2723, 0.1643]) -Greedy action tensor([ 0.6395, -0.2752, -0.0519, -0.1974]) tensor([0.4283, 0.1716, 0.2145, 0.1855]) -Greedy action tensor([ 0.6412, -0.3884, -0.0580, -0.2790]) tensor([0.4439, 0.1586, 0.2206, 0.1769]) -Greedy action tensor([ 0.5424, 0.0332, -0.0234, -0.0505]) tensor([0.3674, 0.2208, 0.2087, 0.2031]) -Greedy action tensor([ 0.2860, -0.1448, 0.1454, -0.1899]) tensor([0.3185, 0.2070, 0.2767, 0.1979]) -Greedy action tensor([ 0.4622, -0.2495, -0.1986, -0.5037]) tensor([0.4188, 0.2055, 0.2163, 0.1594]) -Greedy action tensor([ 0.8251, -0.5523, -0.1826, -0.5360]) tensor([0.5337, 0.1346, 0.1948, 0.1368]) -Greedy action tensor([ 0.6809, -0.5198, -0.1835, -0.4803]) tensor([0.4913, 0.1479, 0.2070, 0.1538]) -Greedy action tensor([ 0.4465, 0.2276, -0.2310, -0.0154]) tensor([0.3400, 0.2731, 0.1727, 0.2142]) -Greedy action tensor([ 0.7869, -0.5036, 0.1078, -0.5169]) tensor([0.4869, 0.1340, 0.2469, 0.1322]) -Greedy action tensor([ 0.4876, -0.4240, 0.3133, -0.5477]) tensor([0.3851, 0.1547, 0.3234, 0.1367]) -Greedy action tensor([ 1.0052, -0.7247, -0.1372, -0.6754]) tensor([0.5943, 0.1054, 0.1896, 0.1107]) -Greedy action tensor([ 0.5967, -0.6383, -0.0453, -0.2826]) tensor([0.4480, 0.1303, 0.2358, 0.1860]) -Greedy action tensor([ 0.6478, -0.2949, -0.0422, -0.2086]) tensor([0.4318, 0.1682, 0.2166, 0.1834]) -Greedy action tensor([ 0.3962, -0.3165, 0.0412, -0.4569]) tensor([0.3820, 0.1873, 0.2679, 0.1628]) -Greedy action tensor([ 0.4209, -0.1989, -0.1296, -0.1374]) tensor([0.3722, 0.2003, 0.2146, 0.2130]) -Greedy action tensor([ 0.5950, -0.6090, -0.1267, -0.2879]) tensor([0.4547, 0.1364, 0.2209, 0.1880]) -Greedy action tensor([ 0.9516, -0.5736, -0.1130, -0.3860]) tensor([0.5480, 0.1192, 0.1890, 0.1438]) -Greedy action tensor([ 0.7996, -0.2267, 0.0849, -0.3725]) tensor([0.4635, 0.1661, 0.2268, 0.1436]) -Greedy action tensor([ 0.4744, -0.6478, -0.0046, -0.2672]) tensor([0.4130, 0.1345, 0.2558, 0.1967]) -Greedy action tensor([ 0.4824, -0.0509, -0.1413, -0.2272]) tensor([0.3825, 0.2244, 0.2050, 0.1881]) -Greedy action tensor([ 0.6939, -0.4777, -0.0172, -0.4986]) tensor([0.4752, 0.1473, 0.2334, 0.1442]) -Greedy action tensor([ 0.6058, -0.3464, -0.0760, -0.3047]) tensor([0.4359, 0.1682, 0.2204, 0.1754]) -Greedy action tensor([ 0.8082, -0.5291, -0.1023, -0.5228]) tensor([0.5184, 0.1361, 0.2086, 0.1370]) -Greedy action tensor([ 1.0793, -0.2788, -0.3636, 0.0593]) tensor([0.5394, 0.1387, 0.1274, 0.1945]) -Greedy action tensor([ 1.0547, 0.1022, -0.1697, 0.3182]) tensor([0.4633, 0.1787, 0.1362, 0.2218]) -Greedy action tensor([ 1.2979, 0.0254, -0.3998, 0.0659]) tensor([0.5698, 0.1596, 0.1043, 0.1662]) -Greedy action tensor([ 1.1244, -0.5005, -0.2054, 0.2351]) tensor([0.5341, 0.1052, 0.1413, 0.2195]) -Greedy action tensor([ 1.2964, -0.6806, -0.0900, 0.3223]) tensor([0.5663, 0.0784, 0.1415, 0.2138]) -Greedy action tensor([ 1.2530, -0.4667, -0.2149, -0.0499]) tensor([0.5948, 0.1065, 0.1370, 0.1616]) -Greedy action tensor([ 0.8171, -0.2871, -0.5632, 0.4220]) tensor([0.4432, 0.1469, 0.1114, 0.2985]) -Greedy action tensor([ 0.5927, -0.3219, 0.1568, -0.0521]) tensor([0.3888, 0.1558, 0.2514, 0.2040]) -Greedy action tensor([ 1.5108, -0.2644, -0.4265, 0.3236]) tensor([0.6178, 0.1047, 0.0890, 0.1885]) -Greedy action tensor([ 1.8858, -0.7757, -0.0870, 0.5970]) tensor([0.6736, 0.0470, 0.0937, 0.1857]) -Greedy action tensor([ 2.2309, -0.8958, -0.3917, 0.4824]) tensor([0.7749, 0.0340, 0.0563, 0.1349]) -Greedy action tensor([ 1.6109, -0.5828, -0.4230, 0.2300]) tensor([0.6695, 0.0746, 0.0876, 0.1683]) -Greedy action tensor([ 0.7212, -0.1727, -0.1447, -0.2286]) tensor([0.4512, 0.1845, 0.1898, 0.1745]) -Greedy action tensor([ 1.9899, -0.8898, -0.0243, 0.4519]) tensor([0.7121, 0.0400, 0.0950, 0.1530]) -Greedy action tensor([ 1.4701, -0.3366, -0.2230, 0.1293]) tensor([0.6212, 0.1020, 0.1143, 0.1625]) -Greedy action tensor([1.3209, 0.0929, 0.0838, 0.0456]) tensor([0.5369, 0.1573, 0.1558, 0.1500]) -Greedy action tensor([ 1.1168, 0.1023, -0.3604, 0.2796]) tensor([0.4941, 0.1792, 0.1128, 0.2139]) -Greedy action tensor([ 2.2004, -1.2485, 0.0143, 0.8247]) tensor([0.7159, 0.0228, 0.0804, 0.1809]) -Greedy action tensor([ 1.2866, -0.6023, -0.6056, 0.5182]) tensor([0.5663, 0.0856, 0.0854, 0.2626]) -Greedy action tensor([ 0.5237, -0.0799, 0.0762, -0.1139]) tensor([0.3684, 0.2014, 0.2355, 0.1947]) -Greedy action tensor([ 1.4796, -0.2466, -0.1486, 0.5051]) tensor([0.5709, 0.1016, 0.1121, 0.2154]) -Greedy action tensor([ 1.3718, -0.2485, -0.6766, 0.0131]) tensor([0.6314, 0.1249, 0.0814, 0.1623]) -Greedy action tensor([ 1.2372, -0.3554, -0.3773, 0.3688]) tensor([0.5488, 0.1116, 0.1092, 0.2303]) -Greedy action tensor([ 2.7557, -1.2756, -0.6118, 1.0428]) tensor([0.8113, 0.0144, 0.0280, 0.1463]) -Greedy action tensor([ 1.2306, -0.1672, 0.0065, -0.2432]) tensor([0.5649, 0.1396, 0.1661, 0.1294]) -Greedy action tensor([ 1.0438, -0.4726, 0.1119, 0.1369]) tensor([0.4958, 0.1088, 0.1952, 0.2002]) -Greedy action tensor([ 1.3833, -0.9297, -0.0292, 0.2759]) tensor([0.5978, 0.0592, 0.1456, 0.1975]) -Greedy action tensor([ 1.3605, -0.5731, -0.3556, 0.5033]) tensor([0.5718, 0.0827, 0.1028, 0.2427]) -Greedy action tensor([ 1.1638, -0.6795, -0.3005, 0.1908]) tensor([0.5658, 0.0896, 0.1308, 0.2138]) -Greedy action tensor([ 1.6056, -0.1009, -0.6636, 0.4627]) tensor([0.6235, 0.1132, 0.0645, 0.1988]) -Greedy action tensor([ 1.5673, -0.4327, -0.7576, 0.8457]) tensor([0.5817, 0.0787, 0.0569, 0.2827]) -Greedy action tensor([ 1.4590, -0.5833, 0.0406, 0.0937]) tensor([0.6146, 0.0797, 0.1488, 0.1569]) -Greedy action tensor([ 1.5284, -0.5811, -0.3852, 0.3757]) tensor([0.6311, 0.0765, 0.0931, 0.1993]) -Greedy action tensor([ 1.2649, -0.5373, -0.5325, 0.6284]) tensor([0.5377, 0.0887, 0.0891, 0.2845]) -Greedy action tensor([ 1.5447, 0.0376, -0.3394, 0.6356]) tensor([0.5629, 0.1247, 0.0856, 0.2268]) -Greedy action tensor([ 1.3868, -0.1024, -0.4722, 0.4193]) tensor([0.5677, 0.1281, 0.0885, 0.2158]) -Greedy action tensor([ 1.0282, -0.4921, 0.1538, -0.0616]) tensor([0.5071, 0.1109, 0.2115, 0.1705]) -Greedy action tensor([ 0.7881, -0.2597, -0.2158, 0.2738]) tensor([0.4319, 0.1515, 0.1583, 0.2583]) -Greedy action tensor([ 1.1495, -0.2684, -0.2734, 0.1220]) tensor([0.5431, 0.1316, 0.1309, 0.1944]) -Greedy action tensor([ 1.3116, -0.4612, -0.1721, 0.2136]) tensor([0.5780, 0.0982, 0.1311, 0.1928]) -Greedy action tensor([ 1.8964, -1.3124, -0.4765, 0.3154]) tensor([0.7466, 0.0302, 0.0696, 0.1536]) -Greedy action tensor([ 1.4195, -0.6925, -0.2421, 0.8537]) tensor([0.5323, 0.0644, 0.1010, 0.3023]) -Greedy action tensor([ 1.3538, -0.3336, -0.1821, -0.0990]) tensor([0.6119, 0.1132, 0.1317, 0.1431]) -Greedy action tensor([ 1.2893, -0.4876, -0.2457, 0.1182]) tensor([0.5901, 0.0998, 0.1271, 0.1829]) -Greedy action tensor([ 0.8319, -0.3774, 0.0551, 0.1972]) tensor([0.4370, 0.1304, 0.2010, 0.2316]) -Greedy action tensor([ 1.0564, -0.4558, -0.6436, 0.6747]) tensor([0.4795, 0.1057, 0.0876, 0.3273]) -Greedy action tensor([ 0.6028, -0.3721, -0.1125, 0.2853]) tensor([0.3855, 0.1454, 0.1885, 0.2806]) -Greedy action tensor([ 1.8715, -0.2164, 0.0395, 0.6716]) tensor([0.6308, 0.0782, 0.1010, 0.1900]) -Greedy action tensor([ 0.8241, -0.2314, -0.6078, 0.6986]) tensor([0.4050, 0.1410, 0.0967, 0.3573]) -Greedy action tensor([ 0.5144, -0.0685, -0.0387, -0.0008]) tensor([0.3662, 0.2044, 0.2106, 0.2188]) -Greedy action tensor([ 1.5813, -0.8122, -0.0723, 0.2722]) tensor([0.6440, 0.0588, 0.1232, 0.1739]) -Greedy action tensor([ 1.3996, -0.7137, -0.3069, 0.1319]) tensor([0.6314, 0.0763, 0.1146, 0.1777]) -Greedy action tensor([ 1.4767, -0.2965, -0.5734, 0.5185]) tensor([0.5945, 0.1009, 0.0765, 0.2280]) -Greedy action tensor([ 1.4571, -0.0627, -0.2154, 0.2352]) tensor([0.5878, 0.1286, 0.1104, 0.1732]) -Greedy action tensor([ 1.3573, -0.0070, -0.1485, 0.3026]) tensor([0.5477, 0.1400, 0.1215, 0.1908]) -Greedy action tensor([ 1.9131, -0.2464, -0.3606, 0.3145]) tensor([0.7040, 0.0812, 0.0725, 0.1423]) -Greedy action tensor([ 2.0486, -1.0921, -0.4423, 0.7370]) tensor([0.7166, 0.0310, 0.0594, 0.1931]) -Greedy action tensor([ 1.5111, -0.4718, -0.4265, 0.5981]) tensor([0.5942, 0.0818, 0.0856, 0.2385]) -Greedy action tensor([ 1.2944, -0.7104, -0.3731, -0.0155]) tensor([0.6276, 0.0845, 0.1185, 0.1694]) -Greedy action tensor([ 1.0492, -0.2868, 0.0799, 0.3645]) tensor([0.4659, 0.1225, 0.1767, 0.2349]) -Greedy action tensor([ 1.4564, -0.3355, -0.2954, 0.5786]) tensor([0.5695, 0.0949, 0.0988, 0.2368]) -Greedy action tensor([ 1.0858, -0.5571, -0.6844, 0.4567]) tensor([0.5272, 0.1020, 0.0898, 0.2810]) -Greedy action tensor([ 0.5996, -0.2505, 0.0175, 0.5333]) tensor([0.3422, 0.1463, 0.1912, 0.3203]) -Greedy action tensor([ 0.6201, -0.2791, -0.2151, 0.2857]) tensor([0.3912, 0.1592, 0.1697, 0.2800]) -Greedy action tensor([ 1.4527, -0.4390, -0.1114, 0.2979]) tensor([0.5970, 0.0900, 0.1249, 0.1881]) -Greedy action tensor([ 1.4229, -0.9733, -0.3248, 0.6327]) tensor([0.5817, 0.0530, 0.1013, 0.2640]) -Greedy action tensor([ 1.2929, 0.0748, 0.0405, -0.1638]) tensor([0.5511, 0.1630, 0.1575, 0.1284]) -Greedy action tensor([ 0.8026, -0.2517, -0.2261, 0.2197]) tensor([0.4417, 0.1539, 0.1579, 0.2466]) -Greedy action tensor([ 1.3740, -0.3104, -0.5013, -0.1019]) tensor([0.6380, 0.1184, 0.0978, 0.1458]) -Greedy action tensor([ 1.2270, -0.5706, -0.3923, 0.6086]) tensor([0.5256, 0.0871, 0.1041, 0.2832]) -Greedy action tensor([ 1.1789, -0.2138, -0.5976, 0.4917]) tensor([0.5207, 0.1293, 0.0881, 0.2619]) -Greedy action tensor([ 1.1481, -0.3228, -0.5219, 0.4591]) tensor([0.5208, 0.1196, 0.0980, 0.2615]) -Greedy action tensor([ 1.5993, -0.3053, -0.3763, 0.2048]) tensor([0.6512, 0.0970, 0.0903, 0.1615]) -Greedy action tensor([ 1.9280, -0.4803, -0.1806, 0.4279]) tensor([0.6971, 0.0627, 0.0846, 0.1555]) -Greedy action tensor([ 1.0097, -0.2078, -0.1156, 0.2909]) tensor([0.4744, 0.1404, 0.1540, 0.2312]) -Greedy action tensor([ 1.2950, -0.4005, -0.5043, 0.6198]) tensor([0.5382, 0.0988, 0.0890, 0.2740]) -Greedy action tensor([ 1.8088, -0.4155, -0.1762, 0.4904]) tensor([0.6609, 0.0715, 0.0908, 0.1768]) -Greedy action tensor([ 1.6792, -0.3276, -0.3910, 0.3410]) tensor([0.6566, 0.0883, 0.0828, 0.1722]) -Greedy action tensor([ 1.5223, -0.7824, -0.4989, 0.8192]) tensor([0.5789, 0.0578, 0.0767, 0.2866]) -Greedy action tensor([ 1.1475, -0.6502, -0.1670, 0.3591]) tensor([0.5294, 0.0877, 0.1422, 0.2407]) -Greedy action tensor([ 1.7250, -1.0513, -0.1576, 0.1016]) tensor([0.7084, 0.0441, 0.1078, 0.1397]) -Greedy action tensor([ 0.7260, -0.4529, -0.0401, -0.2339]) tensor([0.4639, 0.1427, 0.2157, 0.1777]) -Greedy action tensor([ 0.9412, -1.2242, 0.0119, -0.8247]) tensor([0.5950, 0.0683, 0.2349, 0.1018]) -Greedy action tensor([ 0.4986, -0.1557, -0.0288, -0.1098]) tensor([0.3768, 0.1958, 0.2224, 0.2050]) -Greedy action tensor([ 0.5359, -0.3475, -0.0918, -0.3010]) tensor([0.4201, 0.1737, 0.2243, 0.1819]) -Greedy action tensor([ 0.3405, 0.2705, -0.1501, -0.3473]) tensor([0.3282, 0.3060, 0.2009, 0.1649]) -Greedy action tensor([ 1.0720, -0.7797, 0.0419, -0.3077]) tensor([0.5664, 0.0889, 0.2022, 0.1425]) -Greedy action tensor([ 0.4418, 0.0756, -0.1132, -0.1865]) tensor([0.3570, 0.2475, 0.2050, 0.1905]) -Greedy action tensor([ 0.7270, -0.3528, -0.1308, -0.4775]) tensor([0.4846, 0.1646, 0.2055, 0.1453]) -Greedy action tensor([ 0.2762, -0.1352, -0.0259, 0.0047]) tensor([0.3160, 0.2094, 0.2336, 0.2409]) -Greedy action tensor([ 0.6909, -0.1625, -0.0077, -0.2134]) tensor([0.4295, 0.1830, 0.2136, 0.1739]) -Greedy action tensor([ 0.6944, -0.3229, 0.1475, -0.4594]) tensor([0.4433, 0.1603, 0.2566, 0.1398]) -Greedy action tensor([ 0.6135, 0.2406, -0.1230, -0.1305]) tensor([0.3784, 0.2606, 0.1812, 0.1798]) -Greedy action tensor([ 0.6937, -0.3510, -0.0744, -0.3601]) tensor([0.4621, 0.1625, 0.2143, 0.1611]) -Greedy action tensor([ 0.5277, -0.1613, -0.0753, -0.1448]) tensor([0.3907, 0.1962, 0.2138, 0.1994]) -Greedy action tensor([ 0.9835, -1.3005, 0.0591, -0.6066]) tensor([0.5874, 0.0598, 0.2330, 0.1198]) -Greedy action tensor([ 0.9895, -1.0062, 0.0950, -0.5096]) tensor([0.5656, 0.0769, 0.2312, 0.1263]) -Greedy action tensor([ 0.4537, -0.2398, -0.0933, -0.2132]) tensor([0.3858, 0.1929, 0.2233, 0.1980]) -Greedy action tensor([ 0.7409, -0.9136, 0.0676, -0.1642]) tensor([0.4749, 0.0908, 0.2422, 0.1921]) -Greedy action tensor([ 0.8455, -0.5068, -0.0584, -0.2678]) tensor([0.5020, 0.1298, 0.2033, 0.1649]) -Greedy action tensor([ 0.3481, -0.0943, 0.0340, -0.0580]) tensor([0.3290, 0.2114, 0.2403, 0.2192]) -Greedy action tensor([ 0.5493, -0.3818, -0.0690, -0.4565]) tensor([0.4350, 0.1714, 0.2344, 0.1591]) -Greedy action tensor([ 0.7411, -0.4981, 0.0370, -0.3624]) tensor([0.4726, 0.1369, 0.2337, 0.1568]) -Greedy action tensor([ 0.9059, -0.6131, 0.0401, -0.3706]) tensor([0.5212, 0.1141, 0.2193, 0.1454]) -Greedy action tensor([ 0.5948, -0.1338, 0.3251, -0.5751]) tensor([0.3911, 0.1888, 0.2987, 0.1214]) -Greedy action tensor([ 0.5553, -0.2432, 0.0027, -0.3283]) tensor([0.4100, 0.1845, 0.2360, 0.1695]) -Greedy action tensor([ 0.5914, -0.7312, 0.0428, -0.6171]) tensor([0.4667, 0.1243, 0.2696, 0.1394]) -Greedy action tensor([ 0.6314, -0.6332, -0.0485, -0.2750]) tensor([0.4560, 0.1288, 0.2310, 0.1842]) -Greedy action tensor([ 0.3844, 0.0497, -0.0615, -0.1480]) tensor([0.3398, 0.2431, 0.2176, 0.1995]) -Greedy action tensor([ 0.8427, -0.3104, -0.0054, -0.3677]) tensor([0.4897, 0.1546, 0.2097, 0.1460]) -Greedy action tensor([ 0.5825, -0.4836, -0.0901, -0.3310]) tensor([0.4433, 0.1526, 0.2263, 0.1778]) -Greedy action tensor([ 0.5465, -0.3008, 0.0813, -0.4597]) tensor([0.4129, 0.1769, 0.2593, 0.1509]) -Greedy action tensor([ 0.7220, -0.1613, 0.0038, -0.1198]) tensor([0.4288, 0.1773, 0.2091, 0.1848]) -Greedy action tensor([ 1.1025, -0.7736, 0.0910, -0.7239]) tensor([0.5960, 0.0913, 0.2168, 0.0959]) -Greedy action tensor([ 0.6413, -0.8474, -0.1519, -0.2315]) tensor([0.4772, 0.1077, 0.2158, 0.1993]) -Greedy action tensor([ 0.7235, -0.6716, -0.0332, -0.3183]) tensor([0.4831, 0.1197, 0.2267, 0.1705]) -Greedy action tensor([ 0.6005, -0.3856, 0.0405, -0.3841]) tensor([0.4314, 0.1609, 0.2464, 0.1612]) -Greedy action tensor([ 0.6954, -0.7541, -0.1385, -0.8251]) tensor([0.5298, 0.1243, 0.2301, 0.1158]) -Greedy action tensor([ 0.7422, -0.6878, -0.0484, -0.4704]) tensor([0.5024, 0.1202, 0.2279, 0.1494]) -Greedy action tensor([ 0.4004, 0.2355, -0.2019, -0.0563]) tensor([0.3301, 0.2800, 0.1808, 0.2091]) -Greedy action tensor([ 0.4097, 0.2319, -0.1417, -0.4281]) tensor([0.3514, 0.2941, 0.2024, 0.1520]) -Greedy action tensor([ 1.1660, -0.5857, 0.0780, -0.3556]) tensor([0.5785, 0.1004, 0.1949, 0.1263]) -Greedy action tensor([ 0.7353, -0.4993, 0.1448, -0.6515]) tensor([0.4774, 0.1389, 0.2645, 0.1193]) -Greedy action tensor([ 0.4110, -0.1391, 0.0744, -0.0832]) tensor([0.3447, 0.1989, 0.2462, 0.2103]) -Greedy action tensor([ 0.8378, -0.4846, -0.1303, -0.2836]) tensor([0.5071, 0.1351, 0.1926, 0.1652]) -Greedy action tensor([ 0.6752, -0.3079, -0.0215, -0.2909]) tensor([0.4439, 0.1661, 0.2211, 0.1689]) -Greedy action tensor([ 0.5282, -0.3268, 0.0170, -0.3509]) tensor([0.4098, 0.1743, 0.2458, 0.1701]) -Greedy action tensor([ 0.3875, -0.3788, -0.1169, -0.2680]) tensor([0.3864, 0.1796, 0.2333, 0.2006]) -Greedy action tensor([ 0.6865, -0.2258, -0.0788, -0.1366]) tensor([0.4337, 0.1742, 0.2017, 0.1904]) -Greedy action tensor([ 0.6958, -0.6774, -0.1473, -0.3673]) tensor([0.4928, 0.1248, 0.2121, 0.1702]) -Greedy action tensor([ 0.9433, -0.5151, -0.0454, -0.4396]) tensor([0.5389, 0.1254, 0.2005, 0.1352]) -Greedy action tensor([ 0.6821, -0.3472, 0.1213, -0.4141]) tensor([0.4421, 0.1579, 0.2523, 0.1477]) -Greedy action tensor([ 0.6254, -0.5037, -0.1550, -0.3676]) tensor([0.4647, 0.1502, 0.2129, 0.1721]) -Greedy action tensor([ 0.3301, 0.0686, 0.0559, -0.1371]) tensor([0.3168, 0.2439, 0.2408, 0.1985]) -Greedy action tensor([ 0.7892, -1.1247, 0.0595, -0.9223]) tensor([0.5524, 0.0815, 0.2663, 0.0998]) -Greedy action tensor([ 0.4978, 0.3021, -0.1815, -0.3473]) tensor([0.3625, 0.2981, 0.1838, 0.1557]) -Greedy action tensor([ 0.6091, -0.2687, 0.0194, -0.1786]) tensor([0.4124, 0.1714, 0.2286, 0.1876]) -Greedy action tensor([ 0.5409, -0.5978, -0.0742, -0.2327]) tensor([0.4306, 0.1379, 0.2328, 0.1987]) -Greedy action tensor([ 0.8018, -0.6849, -0.1071, -0.4027]) tensor([0.5184, 0.1172, 0.2089, 0.1554]) -Greedy action tensor([ 0.4377, -0.1868, -0.0357, -0.3517]) tensor([0.3828, 0.2050, 0.2384, 0.1738]) -Greedy action tensor([ 0.6426, -0.2232, -0.0281, -0.4594]) tensor([0.4416, 0.1858, 0.2258, 0.1467]) -Greedy action tensor([ 0.7821, -0.5181, -0.1838, -0.3844]) tensor([0.5090, 0.1387, 0.1938, 0.1585]) -Greedy action tensor([ 0.7974, -0.3949, 0.2185, -0.3697]) tensor([0.4597, 0.1395, 0.2577, 0.1431]) -Greedy action tensor([ 0.8915, -0.5995, 0.0248, -0.4834]) tensor([0.5268, 0.1186, 0.2214, 0.1332]) -Greedy action tensor([ 0.7775, -0.2280, -0.1002, -0.1816]) tensor([0.4619, 0.1690, 0.1920, 0.1770]) -Greedy action tensor([ 0.2253, -0.0654, 0.0895, -0.2242]) tensor([0.3069, 0.2295, 0.2679, 0.1958]) -Greedy action tensor([ 0.8838, -0.4163, -0.0791, -0.1812]) tensor([0.5002, 0.1363, 0.1910, 0.1724]) -Greedy action tensor([ 0.6570, -0.5388, -0.0790, -0.2948]) tensor([0.4613, 0.1395, 0.2210, 0.1781]) -Greedy action tensor([ 0.4598, -0.1685, 0.0441, -0.7230]) tensor([0.4000, 0.2134, 0.2640, 0.1226]) -Greedy action tensor([ 0.6159, -0.3233, -0.0342, -0.3660]) tensor([0.4372, 0.1709, 0.2282, 0.1638]) -Greedy action tensor([ 0.4878, -0.0839, 0.0466, -0.1551]) tensor([0.3658, 0.2065, 0.2353, 0.1923]) -Greedy action tensor([ 0.7087, -0.6534, 0.0574, -0.3675]) tensor([0.4721, 0.1209, 0.2461, 0.1609]) -Greedy action tensor([ 1.0194, -0.5228, -0.0147, -0.6463]) tensor([0.5687, 0.1216, 0.2022, 0.1075]) -Greedy action tensor([0.2729, 0.2435, 0.0410, 0.0680]) tensor([0.2794, 0.2713, 0.2216, 0.2277]) -Greedy action tensor([ 0.8842, -0.6022, 0.0875, -0.4965]) tensor([0.5186, 0.1173, 0.2338, 0.1304]) -Greedy action tensor([ 0.4420, 0.0266, 0.0114, -0.1867]) tensor([0.3517, 0.2321, 0.2286, 0.1875]) -Greedy action tensor([ 0.7520, -0.3083, -0.1318, -0.1968]) tensor([0.4658, 0.1613, 0.1925, 0.1804]) -Greedy action tensor([ 0.3855, 0.0735, -0.1090, -0.4529]) tensor([0.3605, 0.2639, 0.2198, 0.1559]) -Greedy action tensor([ 0.7299, 0.0727, -0.1251, -0.5566]) tensor([0.4505, 0.2335, 0.1916, 0.1244]) -Greedy action tensor([ 0.6710, -0.3369, -0.0763, -0.2296]) tensor([0.4454, 0.1626, 0.2110, 0.1810]) -Greedy action tensor([ 0.9556, -0.6116, -0.1136, -0.5893]) tensor([0.5665, 0.1182, 0.1945, 0.1209]) -Greedy action tensor([ 0.7688, -0.2321, -0.0049, -0.1197]) tensor([0.4464, 0.1641, 0.2059, 0.1836]) -Greedy action tensor([-0.3733, -1.1746, -0.6173, -0.9016]) tensor([0.3544, 0.1590, 0.2776, 0.2089]) -Greedy action tensor([-0.2695, -0.8716, 0.5487, -0.1524]) tensor([0.2025, 0.1109, 0.4590, 0.2277]) -Greedy action tensor([ 0.9425, -0.2272, 0.8496, 0.0815]) tensor([0.3782, 0.1174, 0.3446, 0.1599]) -Greedy action tensor([ 0.3916, -0.9172, 0.9107, 0.0135]) tensor([0.2750, 0.0743, 0.4622, 0.1884]) -Greedy action tensor([ 0.5373, -0.7340, 0.8894, 1.2043]) tensor([0.2150, 0.0603, 0.3058, 0.4189]) -Greedy action tensor([-0.4053, -0.8075, 0.9170, -0.4181]) tensor([0.1561, 0.1044, 0.5855, 0.1541]) -Greedy action tensor([-0.8398, 0.0232, -0.4321, 1.5239]) tensor([0.0645, 0.1529, 0.0970, 0.6856]) -Greedy action tensor([-0.2731, -0.3625, 0.0225, 0.4073]) tensor([0.1911, 0.1747, 0.2568, 0.3774]) -Greedy action tensor([-0.1736, -1.7817, -0.5669, 1.7481]) tensor([0.1148, 0.0230, 0.0775, 0.7847]) -Greedy action tensor([ 0.2160, -0.8794, -0.9372, 1.0658]) tensor([0.2507, 0.0838, 0.0791, 0.5864]) -Greedy action tensor([ 0.9174, -0.2520, 1.6137, -0.3933]) tensor([0.2788, 0.0866, 0.5594, 0.0752]) -Greedy action tensor([ 0.8041, -1.5620, 1.5010, 1.1583]) tensor([0.2209, 0.0207, 0.4435, 0.3148]) -Greedy action tensor([-1.4076, -0.2981, -0.3135, -0.2818]) tensor([0.0990, 0.3002, 0.2956, 0.3052]) -Greedy action tensor([ 0.4119, -0.5786, 0.7398, 0.9760]) tensor([0.2214, 0.0822, 0.3073, 0.3891]) -Greedy action tensor([ 0.6978, 0.9633, -0.4440, 0.3943]) tensor([0.2975, 0.3879, 0.0950, 0.2196]) -Greedy action tensor([-0.3629, -0.1805, 0.6801, -0.2925]) tensor([0.1636, 0.1964, 0.4644, 0.1756]) -Greedy action tensor([ 1.9776, -1.9430, 1.0203, 1.3718]) tensor([0.5130, 0.0102, 0.1969, 0.2799]) -Greedy action tensor([-0.0056, 1.0974, 1.7917, 0.6150]) tensor([0.0840, 0.2531, 0.5067, 0.1562]) -Greedy action tensor([ 0.1206, -0.9391, 0.0371, 0.5820]) tensor([0.2596, 0.0900, 0.2388, 0.4117]) -Greedy action tensor([-0.1234, 0.3454, -0.7719, 1.5715]) tensor([0.1167, 0.1865, 0.0610, 0.6357]) -Greedy action tensor([ 0.2666, -0.2528, 0.6076, 0.3567]) tensor([0.2442, 0.1452, 0.3434, 0.2672]) -Greedy action tensor([ 1.1459, -0.5573, 1.4896, 1.1226]) tensor([0.2802, 0.0510, 0.3951, 0.2737]) -Greedy action tensor([-0.5176, -0.8675, 0.5571, 0.1391]) tensor([0.1524, 0.1074, 0.4463, 0.2939]) -Greedy action tensor([ 0.5693, -0.0383, 0.4119, 0.7143]) tensor([0.2813, 0.1532, 0.2403, 0.3252]) -Greedy action tensor([ 0.5652, -2.2000, 0.6414, 0.1139]) tensor([0.3599, 0.0227, 0.3883, 0.2292]) -Greedy action tensor([ 0.4579, 0.3095, -0.4257, 0.7330]) tensor([0.2784, 0.2400, 0.1151, 0.3665]) -Greedy action tensor([-1.0011, -2.6677, 0.8286, 0.2127]) tensor([0.0927, 0.0175, 0.5777, 0.3121]) -Greedy action tensor([ 0.8399, -1.4678, 0.3060, 0.9009]) tensor([0.3638, 0.0362, 0.2133, 0.3867]) -Greedy action tensor([-0.9155, -0.4727, 1.3765, -1.2796]) tensor([0.0761, 0.1184, 0.7527, 0.0528]) -Greedy action tensor([1.1744, 0.1467, 0.4266, 0.6909]) tensor([0.4085, 0.1462, 0.1934, 0.2519]) -Greedy action tensor([-1.5193, -1.0052, -0.0527, -0.2834]) tensor([0.0957, 0.1600, 0.4149, 0.3294]) -Greedy action tensor([ 1.3707, -0.3340, 1.3541, 0.8347]) tensor([0.3636, 0.0661, 0.3576, 0.2127]) -Greedy action tensor([ 2.1423, -0.3853, 0.4090, 1.6368]) tensor([0.5377, 0.0429, 0.0950, 0.3243]) -Greedy action tensor([-0.1516, -0.3005, -0.9640, 0.7520]) tensor([0.2095, 0.1805, 0.0930, 0.5171]) -Greedy action tensor([ 0.2959, 0.0464, -0.4688, 0.8764]) tensor([0.2480, 0.1933, 0.1155, 0.4432]) -Greedy action tensor([ 0.4188, 1.0003, 2.0831, -0.4230]) tensor([0.1176, 0.2104, 0.6213, 0.0507]) -Greedy action tensor([-0.3124, -1.1617, -0.9536, 0.8461]) tensor([0.1946, 0.0832, 0.1025, 0.6197]) -Greedy action tensor([-0.9281, -0.1914, -0.2242, -0.3124]) tensor([0.1436, 0.3001, 0.2904, 0.2659]) -Greedy action tensor([ 0.1474, 0.6748, 0.2998, -0.5084]) tensor([0.2284, 0.3870, 0.2660, 0.1185]) -Greedy action tensor([ 0.8345, 0.2406, -0.2433, 0.2952]) tensor([0.4039, 0.2230, 0.1375, 0.2355]) -Greedy action tensor([ 4.3404e-04, 3.7343e-01, 9.9043e-01, -1.3309e+00]) tensor([0.1849, 0.2685, 0.4977, 0.0488]) -Greedy action tensor([-0.1797, -1.8837, 0.1061, -0.0592]) tensor([0.2747, 0.0500, 0.3655, 0.3098]) -Greedy action tensor([-0.4166, 0.0801, -0.4091, -1.2416]) tensor([0.2446, 0.4019, 0.2464, 0.1072]) -Greedy action tensor([ 0.5193, -0.9846, -0.5638, 0.9957]) tensor([0.3154, 0.0701, 0.1068, 0.5078]) -Greedy action tensor([-8.7517e-04, 2.9524e-01, 1.5405e+00, -4.7893e-01]) tensor([0.1310, 0.1761, 0.6117, 0.0812]) -Greedy action tensor([ 0.5614, -1.5232, -0.0442, -0.1943]) tensor([0.4673, 0.0581, 0.2550, 0.2195]) -Greedy action tensor([ 1.3287, -2.5943, -0.4054, 0.8841]) tensor([0.5442, 0.0108, 0.0961, 0.3489]) -Greedy action tensor([-0.1375, 0.0160, 0.8218, 1.5665]) tensor([0.0974, 0.1135, 0.2541, 0.5351]) -Greedy action tensor([ 0.1362, -1.1999, 1.4682, 0.0350]) tensor([0.1679, 0.0441, 0.6362, 0.1518]) -Greedy action tensor([ 0.9533, -0.2217, 0.5068, 0.5826]) tensor([0.3789, 0.1170, 0.2425, 0.2616]) -Greedy action tensor([ 0.1272, -0.9830, 2.0062, 0.0135]) tensor([0.1140, 0.0376, 0.7466, 0.1018]) -Greedy action tensor([1.3193, 0.0829, 1.1008, 0.3626]) tensor([0.4035, 0.1172, 0.3243, 0.1550]) -Greedy action tensor([ 0.8707, 0.4393, -0.7063, 1.6984]) tensor([0.2413, 0.1567, 0.0499, 0.5521]) -Greedy action tensor([1.2245, 0.7212, 1.1367, 0.2019]) tensor([0.3472, 0.2099, 0.3180, 0.1249]) -Greedy action tensor([-0.0675, -1.9936, 0.1766, 0.5055]) tensor([0.2383, 0.0347, 0.3042, 0.4227]) -Greedy action tensor([ 0.6376, -1.6285, -0.1344, 1.6770]) tensor([0.2276, 0.0236, 0.1052, 0.6436]) -Greedy action tensor([ 0.2456, -0.1126, -0.7757, 1.1084]) tensor([0.2258, 0.1578, 0.0813, 0.5351]) -Greedy action tensor([-1.0913, -0.9010, -0.0352, 1.4162]) tensor([0.0576, 0.0697, 0.1656, 0.7071]) -Greedy action tensor([ 0.4529, 0.0250, 0.0830, -0.0525]) tensor([0.3394, 0.2213, 0.2345, 0.2048]) -Greedy action tensor([-0.1339, -0.1191, 1.1510, 1.2953]) tensor([0.1020, 0.1035, 0.3686, 0.4258]) -Greedy action tensor([ 0.3194, -0.0618, -0.2099, 1.7771]) tensor([0.1522, 0.1040, 0.0897, 0.6541]) -Greedy action tensor([-0.1686, -1.7582, -0.4180, 1.1233]) tensor([0.1778, 0.0363, 0.1386, 0.6473]) -Greedy action tensor([ 0.2370, -0.8732, -0.2438, -0.0738]) tensor([0.3731, 0.1229, 0.2307, 0.2734]) -Greedy action tensor([-0.2795, -0.7835, -0.0112, -0.0571]) tensor([0.2403, 0.1452, 0.3143, 0.3002]) -Greedy action tensor([ 0.0507, -1.8152, -0.0649, 0.8540]) tensor([0.2337, 0.0362, 0.2082, 0.5219]) -Greedy action tensor([ 0.8246, -0.6217, 0.0436, -0.1194]) tensor([0.4802, 0.1131, 0.2199, 0.1868]) -Greedy action tensor([ 0.1580, 0.3498, -0.1874, -0.9630]) tensor([0.3081, 0.3733, 0.2181, 0.1004]) -Greedy action tensor([-0.6452, -0.6138, -0.0420, 0.2404]) tensor([0.1591, 0.1642, 0.2909, 0.3858]) -Greedy action tensor([ 0.3066, 0.2336, 0.2814, -0.3565]) tensor([0.2924, 0.2718, 0.2851, 0.1507]) -Greedy action tensor([1.3877, 0.1723, 0.6516, 1.6925]) tensor([0.3193, 0.0947, 0.1529, 0.4331]) -Greedy action tensor([ 0.1395, -1.8922, 0.0789, 0.7246]) tensor([0.2586, 0.0339, 0.2434, 0.4642]) -Greedy action tensor([ 0.2892, -1.3823, 0.9801, -0.1845]) tensor([0.2627, 0.0494, 0.5243, 0.1636]) -Greedy action tensor([ 0.7332, -0.7484, 0.0371, 1.5390]) tensor([0.2523, 0.0573, 0.1258, 0.5647]) -Greedy action tensor([-0.8933, -0.0372, 0.5883, -1.4370]) tensor([0.1200, 0.2824, 0.5279, 0.0697]) -Greedy action tensor([ 0.4860, -0.7203, 0.7514, 0.5421]) tensor([0.2731, 0.0818, 0.3562, 0.2889]) -Greedy action tensor([-0.7019, -1.3133, 0.9936, 0.0880]) tensor([0.1088, 0.0590, 0.5926, 0.2396]) -Greedy action tensor([ 1.0447, 0.3440, -0.1323, 0.3533]) tensor([0.4338, 0.2153, 0.1337, 0.2173]) -Greedy action tensor([-0.1051, -0.1531, -0.0319, -0.5124]) tensor([0.2707, 0.2580, 0.2912, 0.1801]) -Greedy action tensor([-0.4401, -0.0058, -0.3720, -0.0867]) tensor([0.1985, 0.3064, 0.2125, 0.2826]) -Greedy action tensor([ 0.3202, -1.8436, 0.6157, -0.5360]) tensor([0.3468, 0.0398, 0.4660, 0.1473]) -Greedy action tensor([ 0.6714, -0.5029, 0.1485, 1.1130]) tensor([0.2893, 0.0894, 0.1715, 0.4499]) -Greedy action tensor([-1.8460, -0.3987, 0.6003, -0.1161]) tensor([0.0446, 0.1895, 0.5146, 0.2514]) -Greedy action tensor([-1.8007, -0.0218, 0.5492, -0.0478]) tensor([0.0431, 0.2555, 0.4523, 0.2490]) -Greedy action tensor([-1.7652, -0.4848, 0.5827, -0.0411]) tensor([0.0484, 0.1741, 0.5062, 0.2713]) -Greedy action tensor([-1.5550, -0.4983, 0.5888, 0.3348]) tensor([0.0526, 0.1512, 0.4484, 0.3478]) -Greedy action tensor([-1.8494, -0.4644, 0.6156, -0.1321]) tensor([0.0448, 0.1789, 0.5269, 0.2494]) -Greedy action tensor([-0.4193, -0.2744, 0.1501, 0.0168]) tensor([0.1828, 0.2113, 0.3231, 0.2828]) -Greedy action tensor([-1.3836, -0.4709, 1.4696, 1.1764]) tensor([0.0296, 0.0738, 0.5136, 0.3831]) -Greedy action tensor([-1.8070, -0.3663, 0.5837, -0.1327]) tensor([0.0466, 0.1966, 0.5084, 0.2484]) -Greedy action tensor([-1.8962, -0.4307, 0.6403, -0.1487]) tensor([0.0422, 0.1827, 0.5330, 0.2421]) -Greedy action tensor([-1.8365, -0.4233, 0.6047, -0.1229]) tensor([0.0452, 0.1856, 0.5187, 0.2506]) -Greedy action tensor([-1.7649, -0.5097, 0.5785, -0.1212]) tensor([0.0498, 0.1746, 0.5182, 0.2574]) -Greedy action tensor([-1.0451, -0.5988, 0.2207, 0.3900]) tensor([0.0970, 0.1516, 0.3440, 0.4074]) -Greedy action tensor([-1.5389, -0.5642, 0.5587, 0.1941]) tensor([0.0573, 0.1518, 0.4667, 0.3241]) -Greedy action tensor([-1.4175, -0.5363, 0.4187, 0.1442]) tensor([0.0692, 0.1670, 0.4340, 0.3298]) -Greedy action tensor([-1.9132, -0.4215, 0.5288, -0.1850]) tensor([0.0443, 0.1969, 0.5093, 0.2494]) -Greedy action tensor([-1.7510, -0.5319, 1.4271, 0.9395]) tensor([0.0232, 0.0785, 0.5566, 0.3418]) -Greedy action tensor([-1.8214, -0.5094, 0.6652, -0.0763]) tensor([0.0445, 0.1653, 0.5352, 0.2550]) -Greedy action tensor([-2.0454, -0.8614, 0.8353, 0.1394]) tensor([0.0323, 0.1055, 0.5754, 0.2869]) -Greedy action tensor([-1.4967, -0.4355, 0.4251, 0.0850]) tensor([0.0642, 0.1854, 0.4384, 0.3120]) -Greedy action tensor([-1.9476, -0.4544, 0.6713, -0.1818]) tensor([0.0400, 0.1779, 0.5484, 0.2337]) -Greedy action tensor([-1.8363, -0.3926, 0.6057, -0.1109]) tensor([0.0447, 0.1896, 0.5144, 0.2512]) -Greedy action tensor([-1.2663, 0.7040, -0.6127, -0.9923]) tensor([0.0876, 0.6286, 0.1685, 0.1153]) -Greedy action tensor([-1.1623, -0.5617, 0.4153, -0.1467]) tensor([0.0959, 0.1748, 0.4645, 0.2648]) -Greedy action tensor([-1.9611, -0.9278, 0.2714, -0.3782]) tensor([0.0556, 0.1561, 0.5179, 0.2705]) -Greedy action tensor([-1.8456, -0.4644, 0.6214, -0.1124]) tensor([0.0446, 0.1775, 0.5256, 0.2523]) -Greedy action tensor([-1.0355, -0.5634, 0.2105, 0.5459]) tensor([0.0914, 0.1465, 0.3177, 0.4444]) -Greedy action tensor([-1.9163, -0.4308, 0.6494, -0.1627]) tensor([0.0413, 0.1825, 0.5375, 0.2386]) -Greedy action tensor([-1.4590, -0.5321, 0.4271, 0.2501]) tensor([0.0639, 0.1615, 0.4215, 0.3531]) -Greedy action tensor([-1.8733, -0.7336, -0.1297, -0.4305]) tensor([0.0710, 0.2221, 0.4062, 0.3007]) -Greedy action tensor([-1.6977, -0.2251, 0.5452, -0.1468]) tensor([0.0513, 0.2237, 0.4832, 0.2419]) -Greedy action tensor([-1.8690, -0.4270, 0.6198, -0.1334]) tensor([0.0436, 0.1843, 0.5250, 0.2472]) -Greedy action tensor([-2.0300, -1.1434, 0.3272, 0.3384]) tensor([0.0405, 0.0984, 0.4281, 0.4329]) -Greedy action tensor([-1.9190, -0.4748, 0.6927, -0.1473]) tensor([0.0404, 0.1713, 0.5506, 0.2377]) -Greedy action tensor([-1.6207, -0.4078, 0.6564, 0.4311]) tensor([0.0457, 0.1536, 0.4453, 0.3554]) -Greedy action tensor([-1.0214, 0.5043, 0.4355, 0.7292]) tensor([0.0639, 0.2938, 0.2743, 0.3680]) -Greedy action tensor([-1.5197, -0.3884, 0.6133, 0.5032]) tensor([0.0497, 0.1542, 0.4199, 0.3761]) -Greedy action tensor([-1.8122, -0.5063, 0.5889, -0.1134]) tensor([0.0472, 0.1741, 0.5207, 0.2580]) -Greedy action tensor([-1.3497, -0.4518, 0.4469, 0.2726]) tensor([0.0687, 0.1687, 0.4144, 0.3481]) -Greedy action tensor([-1.8965, -0.4736, 0.6813, -0.1136]) tensor([0.0412, 0.1710, 0.5427, 0.2451]) -Greedy action tensor([-0.3889, -0.4388, 0.1898, 0.1775]) tensor([0.1819, 0.1731, 0.3245, 0.3205]) -Greedy action tensor([-0.6196, -0.5436, 0.4164, -0.0481]) tensor([0.1500, 0.1618, 0.4226, 0.2656]) -Greedy action tensor([-1.8049, -0.4934, 0.6249, -0.0697]) tensor([0.0460, 0.1707, 0.5224, 0.2608]) -Greedy action tensor([-0.9273, -0.4329, 0.9135, 1.2944]) tensor([0.0551, 0.0903, 0.3469, 0.5078]) -Greedy action tensor([-1.6444, -0.4862, 0.6170, 0.1686]) tensor([0.0502, 0.1599, 0.4820, 0.3078]) -Greedy action tensor([-1.4490, -0.5259, 0.4417, 0.3133]) tensor([0.0626, 0.1576, 0.4148, 0.3649]) -Greedy action tensor([-1.6172, -0.1930, 0.6311, 0.1358]) tensor([0.0490, 0.2037, 0.4643, 0.2830]) -Greedy action tensor([-1.9319, -0.4535, 0.6624, -0.1680]) tensor([0.0406, 0.1782, 0.5440, 0.2371]) -Greedy action tensor([-1.7364, -0.5132, 0.3738, -0.3322]) tensor([0.0598, 0.2032, 0.4934, 0.2436]) -Greedy action tensor([-1.1229, -0.4263, 0.4499, 0.6712]) tensor([0.0722, 0.1450, 0.3483, 0.4345]) -Greedy action tensor([-1.8258, -0.4448, 0.6782, -0.0578]) tensor([0.0433, 0.1725, 0.5302, 0.2540]) -Greedy action tensor([-0.2700, 0.2375, 0.2097, 0.4204]) tensor([0.1595, 0.2649, 0.2576, 0.3180]) -Greedy action tensor([ 1.1160, 0.3911, -0.0031, 0.6579]) tensor([0.4093, 0.1982, 0.1337, 0.2588]) -Greedy action tensor([-1.8985, -0.4504, 0.6396, -0.1582]) tensor([0.0424, 0.1802, 0.5360, 0.2414]) -Greedy action tensor([-1.7913, -0.4000, 0.6434, -0.0570]) tensor([0.0453, 0.1819, 0.5165, 0.2564]) -Greedy action tensor([-1.1915, 0.5357, 0.1676, 0.0793]) tensor([0.0710, 0.3995, 0.2764, 0.2531]) -Greedy action tensor([-1.4874, -0.4859, 1.0650, 0.8910]) tensor([0.0366, 0.0995, 0.4694, 0.3944]) -Greedy action tensor([-1.2982, -0.5881, 0.4421, -0.0953]) tensor([0.0829, 0.1686, 0.4724, 0.2760]) -Greedy action tensor([-1.8808, -0.4361, 0.6397, -0.1430]) tensor([0.0428, 0.1815, 0.5323, 0.2434]) -Greedy action tensor([-0.8175, 0.2226, 0.3570, 0.8078]) tensor([0.0823, 0.2329, 0.2665, 0.4182]) -Greedy action tensor([-1.0274, -0.5300, 0.3842, 0.8095]) tensor([0.0768, 0.1263, 0.3150, 0.4820]) -Greedy action tensor([-1.7141, -0.0274, 0.4634, -0.2185]) tensor([0.0508, 0.2744, 0.4482, 0.2266]) -Greedy action tensor([-1.8187, -0.5176, 0.5302, -0.1372]) tensor([0.0487, 0.1790, 0.5104, 0.2619]) -Greedy action tensor([-1.9435, -0.4506, 0.6642, -0.1806]) tensor([0.0402, 0.1791, 0.5460, 0.2346]) -Greedy action tensor([-1.2578, -0.4668, 0.4490, 0.6143]) tensor([0.0657, 0.1449, 0.3621, 0.4272]) -Greedy action tensor([-1.5508, -0.3593, 0.7678, -0.6896]) tensor([0.0595, 0.1957, 0.6042, 0.1407]) -Greedy action tensor([-1.6810, -0.5025, 0.5392, 0.0573]) tensor([0.0522, 0.1697, 0.4810, 0.2971]) -Greedy action tensor([-1.8636, -0.4450, 0.6297, -0.1267]) tensor([0.0436, 0.1803, 0.5282, 0.2479]) -Greedy action tensor([-1.3250, -0.6217, 0.3311, 0.2189]) tensor([0.0773, 0.1561, 0.4048, 0.3618]) -Greedy action tensor([-1.9035, -0.4353, 0.6464, -0.1533]) tensor([0.0418, 0.1816, 0.5357, 0.2408]) -Greedy action tensor([-1.7702, -0.4902, 0.5810, -0.0642]) tensor([0.0485, 0.1746, 0.5096, 0.2673]) -Greedy action tensor([-0.7424, -0.5973, 0.4594, 0.4741]) tensor([0.1129, 0.1305, 0.3755, 0.3811]) -Greedy action tensor([-1.4079, -0.7758, 1.2268, 0.4184]) tensor([0.0434, 0.0817, 0.6052, 0.2697]) -Greedy action tensor([-0.6813, -0.3551, 0.5537, 1.2044]) tensor([0.0806, 0.1116, 0.2769, 0.5309]) -Greedy action tensor([-1.8201, -0.4745, 0.6420, -0.0277]) tensor([0.0443, 0.1701, 0.5196, 0.2660]) -Greedy action tensor([-1.5176, -0.1950, 0.6397, 0.4834]) tensor([0.0481, 0.1805, 0.4158, 0.3556]) -Greedy action tensor([-1.4272, -0.5846, 0.4185, 0.0269]) tensor([0.0718, 0.1667, 0.4544, 0.3072]) -Greedy action tensor([-0.9990, -0.1721, 0.2476, 0.6515]) tensor([0.0835, 0.1909, 0.2905, 0.4350]) -Greedy action tensor([-1.9099, -0.3487, 0.6296, -0.1589]) tensor([0.0413, 0.1969, 0.5237, 0.2381]) -Greedy action tensor([-1.8988, -0.3871, 0.6415, -0.1436]) tensor([0.0417, 0.1889, 0.5284, 0.2410]) -Greedy action tensor([-1.8782, -0.3843, 0.6231, -0.1488]) tensor([0.0429, 0.1913, 0.5238, 0.2421]) -Greedy action tensor([-1.7907, -0.4407, 0.5945, -0.1537]) tensor([0.0479, 0.1849, 0.5207, 0.2464]) -Greedy action tensor([ 0.9207, 0.2111, 0.3233, -0.0834]) tensor([0.4152, 0.2042, 0.2285, 0.1521]) -Greedy action tensor([ 1.1211, -0.6002, -0.3917, 0.5980]) tensor([0.5021, 0.0898, 0.1106, 0.2976]) -Greedy action tensor([ 0.1976, -0.0175, 0.0682, 0.2239]) tensor([0.2694, 0.2173, 0.2367, 0.2766]) -Greedy action tensor([ 1.7670, -0.5837, -0.2382, 0.5334]) tensor([0.6574, 0.0627, 0.0885, 0.1915]) -Greedy action tensor([ 0.6789, -0.1413, -0.1726, -0.2485]) tensor([0.4420, 0.1946, 0.1886, 0.1748]) -Greedy action tensor([ 1.8983, -0.7181, -0.1239, 0.7012]) tensor([0.6634, 0.0485, 0.0878, 0.2004]) -Greedy action tensor([ 1.9964, -0.5308, -0.5706, 0.5078]) tensor([0.7234, 0.0578, 0.0555, 0.1633]) -Greedy action tensor([ 0.8701, -0.4756, -0.2218, 0.4086]) tensor([0.4492, 0.1169, 0.1507, 0.2831]) -Greedy action tensor([ 0.5980, -0.4519, 0.1464, 0.1306]) tensor([0.3827, 0.1339, 0.2436, 0.2398]) -Greedy action tensor([ 1.1950, -0.7167, -0.4015, 0.3754]) tensor([0.5583, 0.0825, 0.1131, 0.2460]) -Greedy action tensor([ 1.3998, -0.3428, -0.2553, 0.3275]) tensor([0.5854, 0.1025, 0.1119, 0.2003]) -Greedy action tensor([ 1.4231, -0.3259, -0.0811, -0.0598]) tensor([0.6161, 0.1072, 0.1369, 0.1398]) -Greedy action tensor([ 0.8481, -0.1341, -0.1477, 0.0848]) tensor([0.4525, 0.1694, 0.1672, 0.2109]) -Greedy action tensor([ 1.5085, -0.1828, -0.4872, 0.6085]) tensor([0.5791, 0.1067, 0.0787, 0.2354]) -Greedy action tensor([ 1.5118, -0.3012, -0.6344, 0.2654]) tensor([0.6379, 0.1041, 0.0746, 0.1834]) -Greedy action tensor([ 1.2901, -0.4732, -0.5502, 0.1668]) tensor([0.6041, 0.1036, 0.0959, 0.1964]) -Greedy action tensor([ 0.9768, -0.2702, -0.1626, 0.2509]) tensor([0.4782, 0.1374, 0.1530, 0.2314]) -Greedy action tensor([ 1.0748, -0.4550, -0.2626, 0.1775]) tensor([0.5300, 0.1148, 0.1391, 0.2161]) -Greedy action tensor([ 1.8338, -0.5953, -0.6517, 1.1675]) tensor([0.5935, 0.0523, 0.0494, 0.3048]) -Greedy action tensor([ 1.2477, -0.1206, -0.4846, 0.6448]) tensor([0.5054, 0.1286, 0.0894, 0.2766]) -Greedy action tensor([ 1.0421, -0.3827, -0.1190, 0.1829]) tensor([0.5057, 0.1217, 0.1584, 0.2142]) -Greedy action tensor([ 1.4799, -0.6458, -0.5005, 0.4930]) tensor([0.6135, 0.0732, 0.0847, 0.2287]) -Greedy action tensor([ 1.4303, -0.3853, -0.7820, 0.6157]) tensor([0.5831, 0.0949, 0.0638, 0.2582]) -Greedy action tensor([ 2.2390, -0.8426, -0.1447, 0.7625]) tensor([0.7318, 0.0336, 0.0675, 0.1672]) -Greedy action tensor([ 1.4016, -0.5368, -0.1682, -0.0578]) tensor([0.6312, 0.0908, 0.1313, 0.1467]) -Greedy action tensor([ 1.7707, -0.5173, -0.4076, 0.3937]) tensor([0.6816, 0.0692, 0.0772, 0.1720]) -Greedy action tensor([ 2.0317, 0.0828, -0.8238, 0.4286]) tensor([0.7137, 0.1016, 0.0411, 0.1436]) -Greedy action tensor([0.9325, 0.2639, 0.0378, 0.2754]) tensor([0.4099, 0.2101, 0.1676, 0.2125]) -Greedy action tensor([ 0.9059, -0.3391, -0.4811, 0.6119]) tensor([0.4380, 0.1261, 0.1094, 0.3264]) -Greedy action tensor([ 1.5918, -0.2768, -0.1551, 0.3636]) tensor([0.6167, 0.0952, 0.1075, 0.1806]) -Greedy action tensor([ 1.0813, -0.1523, -0.5919, 0.3649]) tensor([0.5083, 0.1480, 0.0954, 0.2483]) -Greedy action tensor([ 0.8809, -0.0040, -0.3094, 0.3357]) tensor([0.4354, 0.1797, 0.1324, 0.2524]) -Greedy action tensor([ 1.9302, -0.3976, -0.4276, 0.3448]) tensor([0.7158, 0.0698, 0.0677, 0.1466]) -Greedy action tensor([ 1.9568, -0.4000, -0.6135, -0.0204]) tensor([0.7635, 0.0723, 0.0584, 0.1057]) -Greedy action tensor([ 2.2335, -0.8540, -0.0851, 0.7362]) tensor([0.7311, 0.0334, 0.0719, 0.1636]) -Greedy action tensor([ 0.9077, 0.0859, -0.2745, 0.2119]) tensor([0.4454, 0.1958, 0.1366, 0.2221]) -Greedy action tensor([ 2.3241, -0.8443, -0.6164, 0.7378]) tensor([0.7695, 0.0324, 0.0407, 0.1575]) -Greedy action tensor([ 1.4632, 0.1145, -0.1780, -0.2008]) tensor([0.6087, 0.1580, 0.1179, 0.1153]) -Greedy action tensor([ 0.2068, -0.1798, -0.1763, 0.4218]) tensor([0.2777, 0.1887, 0.1893, 0.3443]) -Greedy action tensor([ 0.9320, -0.5484, -0.2993, 0.3655]) tensor([0.4792, 0.1090, 0.1399, 0.2719]) -Greedy action tensor([ 0.7091, -0.1555, 0.2644, -0.0162]) tensor([0.3927, 0.1654, 0.2517, 0.1901]) -Greedy action tensor([ 1.9624, 0.0052, -0.2590, 0.1561]) tensor([0.7072, 0.0999, 0.0767, 0.1162]) -Greedy action tensor([ 0.9751, -0.5070, -0.1029, 0.1550]) tensor([0.4980, 0.1131, 0.1695, 0.2193]) -Greedy action tensor([ 1.7143, -0.6397, -0.2884, 0.3289]) tensor([0.6756, 0.0642, 0.0912, 0.1690]) -Greedy action tensor([ 1.9377, -0.5237, -0.5236, 0.4327]) tensor([0.7180, 0.0613, 0.0613, 0.1594]) -Greedy action tensor([ 0.7112, -0.0656, -0.3252, 0.5649]) tensor([0.3733, 0.1717, 0.1324, 0.3225]) -Greedy action tensor([ 1.6596, -0.6903, -0.2781, 0.4254]) tensor([0.6534, 0.0623, 0.0941, 0.1902]) -Greedy action tensor([ 2.1817, -0.6026, -0.3670, 0.2302]) tensor([0.7800, 0.0482, 0.0610, 0.1108]) -Greedy action tensor([ 1.7679, -0.8879, -0.3760, 1.1886]) tensor([0.5722, 0.0402, 0.0671, 0.3206]) -Greedy action tensor([ 1.4935, -0.8714, -0.2976, 0.4677]) tensor([0.6176, 0.0580, 0.1030, 0.2214]) -Greedy action tensor([ 1.9043, -0.5806, -0.5406, 0.6120]) tensor([0.6922, 0.0577, 0.0600, 0.1901]) -Greedy action tensor([ 1.4681, -0.3767, -0.4206, 0.0936]) tensor([0.6401, 0.1012, 0.0968, 0.1619]) -Greedy action tensor([ 1.2356, -0.2620, -0.3884, 0.2876]) tensor([0.5530, 0.1237, 0.1090, 0.2143]) -Greedy action tensor([ 1.0104, -0.4183, -0.1594, 0.2806]) tensor([0.4921, 0.1179, 0.1528, 0.2372]) -Greedy action tensor([ 2.0022, -0.4392, -0.4312, 0.5843]) tensor([0.7057, 0.0614, 0.0619, 0.1709]) -Greedy action tensor([ 1.9509, -0.2010, -0.2871, 0.5760]) tensor([0.6776, 0.0788, 0.0723, 0.1713]) -Greedy action tensor([ 0.6807, -0.0907, -0.1028, 0.1587]) tensor([0.3980, 0.1840, 0.1818, 0.2362]) -Greedy action tensor([ 0.7801, -0.3563, -0.3543, 0.5833]) tensor([0.4058, 0.1303, 0.1305, 0.3334]) -Greedy action tensor([ 1.1520, -0.2323, -0.3315, -0.0071]) tensor([0.5583, 0.1399, 0.1267, 0.1752]) -Greedy action tensor([ 1.5515, -0.0806, -0.0302, 0.1842]) tensor([0.6039, 0.1181, 0.1242, 0.1539]) -Greedy action tensor([ 1.7010, -0.5435, -0.4094, 0.2159]) tensor([0.6879, 0.0729, 0.0834, 0.1558]) -Greedy action tensor([ 1.5337, -0.8882, -0.0667, 0.2998]) tensor([0.6322, 0.0561, 0.1276, 0.1841]) -Greedy action tensor([ 1.7578, -1.0890, -0.3894, 0.4855]) tensor([0.6873, 0.0399, 0.0803, 0.1926]) -Greedy action tensor([ 1.4712, -0.4603, -0.1014, 0.3142]) tensor([0.5999, 0.0869, 0.1245, 0.1886]) -Greedy action tensor([ 2.2374, -1.3989, -0.2390, 0.5499]) tensor([0.7720, 0.0203, 0.0649, 0.1428]) -Greedy action tensor([ 1.5801, -0.4963, -0.4593, 0.2432]) tensor([0.6587, 0.0826, 0.0857, 0.1730]) -Greedy action tensor([ 1.0248, -0.4068, -0.1892, -0.0115]) tensor([0.5289, 0.1264, 0.1571, 0.1876]) -Greedy action tensor([ 1.7823, -0.6327, -0.2539, 0.7138]) tensor([0.6396, 0.0572, 0.0835, 0.2197]) -Greedy action tensor([ 1.1615, -0.3548, 0.0648, -0.3902]) tensor([0.5665, 0.1243, 0.1892, 0.1200]) -Greedy action tensor([ 1.0266, -0.5338, 0.0137, 0.1629]) tensor([0.5013, 0.1053, 0.1821, 0.2114]) -Greedy action tensor([ 0.6323, -0.4728, -0.0077, -0.0927]) tensor([0.4268, 0.1414, 0.2251, 0.2067]) -Greedy action tensor([ 1.0474, -0.1813, -0.2628, 0.2015]) tensor([0.5021, 0.1470, 0.1355, 0.2155]) -Greedy action tensor([ 1.3689, 0.0447, -0.3574, 0.2172]) tensor([0.5682, 0.1511, 0.1011, 0.1796]) -Greedy action tensor([ 0.9676, -0.2489, -0.4981, 0.2886]) tensor([0.4916, 0.1456, 0.1135, 0.2493]) -Greedy action tensor([ 1.0635, -0.2448, -0.4920, 0.2614]) tensor([0.5182, 0.1401, 0.1094, 0.2324]) -Greedy action tensor([ 1.4314, -0.8464, -0.0738, 0.6311]) tensor([0.5638, 0.0578, 0.1251, 0.2533]) -Greedy action tensor([ 1.5286, -0.1516, -0.3754, 0.2533]) tensor([0.6193, 0.1154, 0.0923, 0.1730]) -Greedy action tensor([ 1.4456, -0.3161, 0.1994, -0.0598]) tensor([0.5948, 0.1022, 0.1711, 0.1320]) -Greedy action tensor([ 1.3212, -0.4358, -0.1158, 0.1531]) tensor([0.5810, 0.1003, 0.1381, 0.1807]) -Greedy action tensor([ 1.3910, -0.7087, -0.2306, 0.3523]) tensor([0.5974, 0.0732, 0.1180, 0.2114]) -Greedy action tensor([ 1.3373, -0.2340, -0.2098, 0.2228]) tensor([0.5718, 0.1188, 0.1217, 0.1876]) -Greedy action tensor([ 0.5080, 0.1423, -0.0854, -0.1980]) tensor([0.3650, 0.2532, 0.2016, 0.1802]) -Greedy action tensor([ 0.7388, -0.1381, -0.1546, 0.0130]) tensor([0.4330, 0.1802, 0.1772, 0.2096]) -Greedy action tensor([ 0.9663, -0.7422, -0.0064, -0.4260]) tensor([0.5532, 0.1002, 0.2091, 0.1375]) -Greedy action tensor([ 0.5713, -0.2282, -0.0609, -0.0995]) tensor([0.4012, 0.1804, 0.2132, 0.2052]) -Greedy action tensor([ 0.7400, -0.4501, -0.0905, -0.4505]) tensor([0.4892, 0.1488, 0.2132, 0.1488]) -Greedy action tensor([ 0.5924, -0.4393, -0.0470, -0.2634]) tensor([0.4331, 0.1544, 0.2285, 0.1841]) -Greedy action tensor([ 0.5026, -0.2262, -0.0866, -0.2367]) tensor([0.3976, 0.1919, 0.2206, 0.1899]) -Greedy action tensor([ 0.4358, 0.2146, -0.1160, -0.3586]) tensor([0.3534, 0.2833, 0.2036, 0.1597]) -Greedy action tensor([ 0.5002, -0.0782, -0.2364, -0.0842]) tensor([0.3851, 0.2159, 0.1843, 0.2147]) -Greedy action tensor([ 1.0312, -1.2633, -0.0535, -0.5439]) tensor([0.6076, 0.0613, 0.2054, 0.1258]) -Greedy action tensor([ 0.8638, -0.5025, 0.0600, -0.3451]) tensor([0.4997, 0.1274, 0.2237, 0.1492]) -Greedy action tensor([ 0.6480, -0.3969, -0.0065, -0.3558]) tensor([0.4468, 0.1572, 0.2322, 0.1638]) -Greedy action tensor([ 0.4602, -0.1885, -0.1260, -0.3454]) tensor([0.3959, 0.2069, 0.2203, 0.1769]) -Greedy action tensor([ 0.6478, -0.3451, -0.1074, -0.2995]) tensor([0.4488, 0.1663, 0.2109, 0.1740]) -Greedy action tensor([ 0.8500, -0.3782, 0.0009, -0.4856]) tensor([0.5041, 0.1476, 0.2157, 0.1326]) -Greedy action tensor([ 1.4138, -1.1829, 0.0754, -0.8138]) tensor([0.6922, 0.0516, 0.1816, 0.0746]) -Greedy action tensor([ 0.3501, -0.1059, -0.0892, -0.0831]) tensor([0.3417, 0.2166, 0.2202, 0.2216]) -Greedy action tensor([ 0.1361, -0.0860, -0.1491, -0.4346]) tensor([0.3207, 0.2568, 0.2411, 0.1813]) -Greedy action tensor([ 0.6423, -0.1753, 0.0274, -0.3986]) tensor([0.4282, 0.1890, 0.2315, 0.1512]) -Greedy action tensor([ 0.7190, -0.3825, -0.0790, 0.0377]) tensor([0.4370, 0.1452, 0.1967, 0.2211]) -Greedy action tensor([ 0.8262, -0.6770, 0.0703, -0.6813]) tensor([0.5226, 0.1162, 0.2454, 0.1157]) -Greedy action tensor([ 0.8992, -0.6304, 0.0567, -0.6978]) tensor([0.5406, 0.1171, 0.2328, 0.1095]) -Greedy action tensor([ 0.5832, -0.0729, -0.0635, -0.9713]) tensor([0.4437, 0.2302, 0.2324, 0.0938]) -Greedy action tensor([ 0.9240, -0.6608, -0.0934, -0.4040]) tensor([0.5460, 0.1119, 0.1974, 0.1447]) -Greedy action tensor([ 0.9231, -0.6072, 0.1188, -0.3675]) tensor([0.5157, 0.1116, 0.2307, 0.1419]) -Greedy action tensor([ 0.4743, -0.0490, 0.0053, -0.4541]) tensor([0.3826, 0.2267, 0.2394, 0.1512]) -Greedy action tensor([ 0.4687, 0.0129, -0.0448, -0.0578]) tensor([0.3542, 0.2246, 0.2120, 0.2092]) -Greedy action tensor([ 0.2619, -0.1398, -0.1291, -0.1658]) tensor([0.3336, 0.2232, 0.2256, 0.2175]) -Greedy action tensor([ 0.4485, 0.0079, -0.0297, -0.1863]) tensor([0.3580, 0.2304, 0.2219, 0.1897]) -Greedy action tensor([ 0.5477, -0.4421, 0.1739, -0.4127]) tensor([0.4094, 0.1522, 0.2817, 0.1567]) -Greedy action tensor([ 0.4350, 0.0970, -0.0640, -0.1279]) tensor([0.3460, 0.2468, 0.2101, 0.1971]) -Greedy action tensor([ 0.5564, -0.4941, 0.0313, -0.4264]) tensor([0.4319, 0.1511, 0.2554, 0.1616]) -Greedy action tensor([ 0.3021, -0.0214, -0.0182, -0.5399]) tensor([0.3472, 0.2512, 0.2520, 0.1496]) -Greedy action tensor([ 0.7758, -0.5438, -0.0151, -0.1291]) tensor([0.4705, 0.1257, 0.2133, 0.1904]) -Greedy action tensor([ 0.2888, -0.3958, -0.1188, -0.1721]) tensor([0.3571, 0.1801, 0.2376, 0.2252]) -Greedy action tensor([ 1.0201, -0.5220, -0.0062, -0.3151]) tensor([0.5448, 0.1166, 0.1952, 0.1434]) -Greedy action tensor([ 0.6498, -0.3613, 0.0519, -0.4192]) tensor([0.4430, 0.1612, 0.2437, 0.1521]) -Greedy action tensor([ 0.5494, -0.0798, -0.0342, -0.1091]) tensor([0.3833, 0.2043, 0.2139, 0.1984]) -Greedy action tensor([ 0.4985, -0.4269, -0.0114, -0.2882]) tensor([0.4078, 0.1616, 0.2449, 0.1857]) -Greedy action tensor([ 0.9315, -0.9051, 0.1180, -0.7842]) tensor([0.5610, 0.0894, 0.2487, 0.1009]) -Greedy action tensor([ 0.4514, -0.2038, 0.1705, -0.5144]) tensor([0.3766, 0.1956, 0.2844, 0.1434]) -Greedy action tensor([ 1.1673, -1.2911, 0.0402, -0.4834]) tensor([0.6244, 0.0534, 0.2023, 0.1198]) -Greedy action tensor([ 0.4992, 0.4160, -0.2591, -0.3122]) tensor([0.3530, 0.3248, 0.1654, 0.1568]) -Greedy action tensor([ 0.5130, -0.4307, -0.0947, -0.3925]) tensor([0.4277, 0.1665, 0.2329, 0.1729]) -Greedy action tensor([ 0.2977, 0.0306, 0.0286, -0.1952]) tensor([0.3184, 0.2438, 0.2433, 0.1945]) -Greedy action tensor([ 0.9192, -0.7768, 0.0377, -0.3193]) tensor([0.5298, 0.0972, 0.2194, 0.1536]) -Greedy action tensor([ 0.5067, -0.3221, 0.0146, -0.4406]) tensor([0.4106, 0.1792, 0.2510, 0.1592]) -Greedy action tensor([ 0.7651, -0.5431, -0.0302, -0.4734]) tensor([0.4971, 0.1344, 0.2244, 0.1441]) -Greedy action tensor([ 0.6899, -0.4835, -0.0106, -0.3973]) tensor([0.4667, 0.1443, 0.2316, 0.1574]) -Greedy action tensor([ 0.5573, -0.4458, 0.0188, -0.5039]) tensor([0.4355, 0.1597, 0.2541, 0.1507]) -Greedy action tensor([ 0.4011, 0.0432, -0.0592, -0.3019]) tensor([0.3539, 0.2474, 0.2234, 0.1752]) -Greedy action tensor([ 0.5167, -0.4691, 0.0589, -0.2396]) tensor([0.4040, 0.1508, 0.2556, 0.1896]) -Greedy action tensor([ 0.4801, -0.4249, 0.2615, -0.1152]) tensor([0.3624, 0.1466, 0.2912, 0.1998]) -Greedy action tensor([ 0.5202, -0.1038, -0.0248, -0.3345]) tensor([0.3935, 0.2109, 0.2282, 0.1674]) -Greedy action tensor([ 4.7348e-01, -2.2503e-01, -4.2680e-04, -1.8767e-01]) tensor([0.3793, 0.1887, 0.2362, 0.1958]) -Greedy action tensor([ 0.5309, -0.3330, -0.0763, -0.2805]) tensor([0.4148, 0.1749, 0.2260, 0.1843]) -Greedy action tensor([ 0.5000, -0.4893, 0.1315, -0.6072]) tensor([0.4177, 0.1553, 0.2889, 0.1380]) -Greedy action tensor([ 0.3540, 0.2025, -0.0623, -0.2586]) tensor([0.3267, 0.2808, 0.2155, 0.1771]) -Greedy action tensor([ 0.6164, -0.3234, -0.0587, -0.2711]) tensor([0.4326, 0.1690, 0.2202, 0.1781]) -Greedy action tensor([ 0.6771, -0.5853, 0.0228, -0.3560]) tensor([0.4633, 0.1311, 0.2408, 0.1649]) -Greedy action tensor([ 1.1094, -0.7834, 0.1001, -0.3183]) tensor([0.5698, 0.0858, 0.2077, 0.1367]) -Greedy action tensor([ 0.6498, -0.5656, -0.0407, -0.5199]) tensor([0.4743, 0.1407, 0.2378, 0.1472]) -Greedy action tensor([ 0.8029, -0.6216, -0.1394, -0.8228]) tensor([0.5473, 0.1317, 0.2133, 0.1077]) -Greedy action tensor([ 0.6419, -0.1718, -0.3295, -0.4256]) tensor([0.4618, 0.2047, 0.1748, 0.1588]) -Greedy action tensor([ 0.2278, 0.0954, -0.0776, -0.1113]) tensor([0.3007, 0.2634, 0.2216, 0.2142]) -Greedy action tensor([ 0.4006, -0.3147, 0.0472, -0.3929]) tensor([0.3783, 0.1850, 0.2657, 0.1711]) -Greedy action tensor([ 0.4628, 0.0466, -0.0373, -0.0345]) tensor([0.3479, 0.2295, 0.2110, 0.2116]) -Greedy action tensor([ 0.6447, -0.3320, -0.0130, -0.2822]) tensor([0.4366, 0.1644, 0.2262, 0.1728]) -Greedy action tensor([ 0.4882, 0.1986, -0.1077, -0.2454]) tensor([0.3597, 0.2693, 0.1982, 0.1727]) -Greedy action tensor([ 0.3597, 0.1195, -0.1292, -0.3725]) tensor([0.3471, 0.2730, 0.2129, 0.1669]) -Greedy action tensor([ 0.9816, 0.2642, -0.0415, -0.3248]) tensor([0.4721, 0.2304, 0.1697, 0.1278]) -Greedy action tensor([ 0.4761, -0.2872, 0.0095, -0.2014]) tensor([0.3844, 0.1792, 0.2411, 0.1952]) -Greedy action tensor([ 0.6432, -0.2487, -0.0721, -0.1717]) tensor([0.4270, 0.1750, 0.2089, 0.1891]) -Greedy action tensor([ 0.6954, -0.4655, -0.0033, -0.3186]) tensor([0.4602, 0.1441, 0.2288, 0.1669]) -Greedy action tensor([ 0.7438, -0.5478, -0.1971, -0.1786]) tensor([0.4848, 0.1332, 0.1892, 0.1927]) -Greedy action tensor([ 0.5024, -0.3329, 0.1669, -0.4751]) tensor([0.3961, 0.1718, 0.2832, 0.1490]) -Greedy action tensor([ 0.5935, -0.3176, 0.0720, -0.3885]) tensor([0.4219, 0.1696, 0.2504, 0.1580]) -Greedy action tensor([ 0.5020, 0.4094, -0.0540, -0.2564]) tensor([0.3386, 0.3086, 0.1942, 0.1586]) -Greedy action tensor([ 0.8128, -0.3769, 0.0048, -0.3161]) tensor([0.4823, 0.1468, 0.2150, 0.1560]) -Greedy action tensor([ 0.6424, -0.5659, 0.1760, -0.6582]) tensor([0.4549, 0.1359, 0.2853, 0.1239]) -Greedy action tensor([ 0.6788, -0.3340, -0.0381, -0.3379]) tensor([0.4518, 0.1641, 0.2206, 0.1635]) -Greedy action tensor([ 1.6626, -0.2397, 0.7202, 0.7868]) tensor([0.5114, 0.0763, 0.1993, 0.2130]) -Greedy action tensor([-1.5072, -0.5367, -0.9426, -0.1693]) tensor([0.1086, 0.2866, 0.1910, 0.4138]) -Greedy action tensor([-1.0063, -0.5205, 0.9729, 0.3312]) tensor([0.0731, 0.1189, 0.5293, 0.2786]) -Greedy action tensor([0.9142, 0.1165, 0.6402, 1.0590]) tensor([0.2970, 0.1338, 0.2259, 0.3433]) -Greedy action tensor([ 1.8648, -0.4150, -0.3265, 0.8940]) tensor([0.6278, 0.0642, 0.0702, 0.2378]) -Greedy action tensor([0.7382, 0.4404, 0.4019, 1.0034]) tensor([0.2659, 0.1974, 0.1900, 0.3467]) -Greedy action tensor([ 0.2822, 0.8871, -1.0738, 0.6219]) tensor([0.2226, 0.4075, 0.0574, 0.3126]) -Greedy action tensor([-0.1337, 0.2474, -0.2325, 0.0808]) tensor([0.2170, 0.3176, 0.1966, 0.2689]) -Greedy action tensor([1.0718, 0.9817, 0.0040, 0.0917]) tensor([0.3798, 0.3471, 0.1306, 0.1425]) -Greedy action tensor([-0.4054, -1.0430, 1.0864, 0.3345]) tensor([0.1239, 0.0655, 0.5509, 0.2597]) -Greedy action tensor([0.5063, 0.1290, 0.4616, 0.8570]) tensor([0.2462, 0.1688, 0.2354, 0.3496]) -Greedy action tensor([ 1.6393, -0.8092, 0.5485, 0.4542]) tensor([0.5787, 0.0500, 0.1944, 0.1769]) -Greedy action tensor([-0.1849, -1.3384, 0.7950, -0.5024]) tensor([0.2124, 0.0670, 0.5659, 0.1546]) -Greedy action tensor([ 0.9346, -1.4911, -0.3311, 0.1848]) tensor([0.5426, 0.0480, 0.1530, 0.2564]) -Greedy action tensor([ 0.7398, -1.6803, 0.8194, -0.1960]) tensor([0.3900, 0.0347, 0.4223, 0.1530]) -Greedy action tensor([ 0.4508, -2.6536, -0.2961, 0.5946]) tensor([0.3741, 0.0168, 0.1772, 0.4319]) -Greedy action tensor([-1.1338, -1.9655, -0.5200, 1.1720]) tensor([0.0751, 0.0327, 0.1387, 0.7535]) -Greedy action tensor([-0.4411, -0.4305, -0.2550, 0.0882]) tensor([0.2035, 0.2057, 0.2452, 0.3456]) -Greedy action tensor([0.3858, 0.1176, 0.6995, 0.4025]) tensor([0.2410, 0.1843, 0.3298, 0.2450]) -Greedy action tensor([-0.3091, -0.4932, -1.4071, 0.1499]) tensor([0.2668, 0.2220, 0.0890, 0.4222]) -Greedy action tensor([ 1.4106, -0.1583, 1.1103, 0.8502]) tensor([0.3968, 0.0827, 0.2939, 0.2266]) -Greedy action tensor([ 0.3660, -1.9213, -0.3460, -0.1518]) tensor([0.4570, 0.0464, 0.2242, 0.2723]) -Greedy action tensor([-0.8143, -1.7188, -0.8238, 0.7989]) tensor([0.1349, 0.0546, 0.1336, 0.6769]) -Greedy action tensor([ 1.1309, 0.5341, -0.3259, -0.0144]) tensor([0.4758, 0.2620, 0.1109, 0.1514]) -Greedy action tensor([1.5345, 0.2509, 0.5600, 0.8080]) tensor([0.4677, 0.1296, 0.1765, 0.2262]) -Greedy action tensor([-1.3234, -0.2457, -0.7683, -0.8551]) tensor([0.1374, 0.4037, 0.2394, 0.2195]) -Greedy action tensor([ 0.6962, -0.9674, 0.7892, 0.6720]) tensor([0.3065, 0.0581, 0.3363, 0.2991]) -Greedy action tensor([ 0.2653, -0.0258, 2.3723, -0.7621]) tensor([0.0968, 0.0724, 0.7962, 0.0347]) -Greedy action tensor([1.4382, 0.5150, 1.3766, 0.7031]) tensor([0.3550, 0.1410, 0.3338, 0.1702]) -Greedy action tensor([0.7049, 0.6357, 0.1095, 0.2852]) tensor([0.3183, 0.2970, 0.1755, 0.2092]) -Greedy action tensor([-0.3616, -2.1926, 0.2558, 0.2324]) tensor([0.2072, 0.0332, 0.3842, 0.3753]) -Greedy action tensor([ 1.3657, -0.6838, 1.2950, 0.2593]) tensor([0.4182, 0.0539, 0.3896, 0.1383]) -Greedy action tensor([ 0.2249, -0.3152, -0.6579, 1.5639]) tensor([0.1721, 0.1003, 0.0712, 0.6565]) -Greedy action tensor([1.0949, 0.4076, 0.5978, 1.6948]) tensor([0.2542, 0.1279, 0.1547, 0.4632]) -Greedy action tensor([ 1.0606, -1.5043, 0.7515, 0.8826]) tensor([0.3777, 0.0291, 0.2772, 0.3161]) -Greedy action tensor([ 0.7361, -0.3357, 1.2356, 0.4465]) tensor([0.2675, 0.0916, 0.4407, 0.2002]) -Greedy action tensor([-0.6667, -1.0987, -0.3119, 1.9418]) tensor([0.0600, 0.0390, 0.0856, 0.8153]) -Greedy action tensor([-1.0528, -2.4903, 0.8086, 0.2534]) tensor([0.0880, 0.0209, 0.5661, 0.3250]) -Greedy action tensor([-0.7717, 0.0501, 0.0904, 0.0917]) tensor([0.1248, 0.2838, 0.2955, 0.2959]) -Greedy action tensor([ 0.9682, -0.0783, 0.4389, -0.4097]) tensor([0.4561, 0.1602, 0.2687, 0.1150]) -Greedy action tensor([-0.8635, -0.7293, -0.7822, -0.7757]) tensor([0.2315, 0.2647, 0.2511, 0.2527]) -Greedy action tensor([-1.7352, -0.4127, -0.5698, -0.5689]) tensor([0.0895, 0.3360, 0.2871, 0.2874]) -Greedy action tensor([ 0.4977, -1.9380, 0.0063, -0.1398]) tensor([0.4488, 0.0393, 0.2746, 0.2373]) -Greedy action tensor([ 1.5536, -0.7991, 0.7093, 1.5920]) tensor([0.3900, 0.0371, 0.1677, 0.4053]) -Greedy action tensor([ 0.4382, -0.2570, 0.3898, 0.8709]) tensor([0.2504, 0.1250, 0.2386, 0.3860]) -Greedy action tensor([-0.0744, 0.5386, 0.0147, 0.1420]) tensor([0.1930, 0.3563, 0.2110, 0.2397]) -Greedy action tensor([ 1.3740, 0.5877, -0.0269, 0.5020]) tensor([0.4717, 0.2149, 0.1162, 0.1972]) -Greedy action tensor([ 0.8819, -1.0043, 1.5060, 0.5742]) tensor([0.2664, 0.0404, 0.4973, 0.1959]) -Greedy action tensor([-1.4221, -0.2340, 0.6467, -1.2105]) tensor([0.0745, 0.2443, 0.5893, 0.0920]) -Greedy action tensor([0.4508, 0.0043, 1.6223, 0.1887]) tensor([0.1774, 0.1135, 0.5725, 0.1365]) -Greedy action tensor([-0.0991, -1.0262, -0.2543, 1.0766]) tensor([0.1821, 0.0720, 0.1559, 0.5900]) -Greedy action tensor([-0.7456, -0.5556, 0.0919, -0.3154]) tensor([0.1651, 0.1996, 0.3815, 0.2538]) -Greedy action tensor([-0.3725, -1.3876, -0.4187, -0.1778]) tensor([0.2831, 0.1026, 0.2703, 0.3440]) -Greedy action tensor([ 1.9822, -0.2223, 0.5558, 0.3195]) tensor([0.6493, 0.0716, 0.1559, 0.1231]) -Greedy action tensor([ 0.6836, -1.4095, -0.3695, 1.4477]) tensor([0.2763, 0.0341, 0.0964, 0.5932]) -Greedy action tensor([-0.4106, -1.5958, -0.1745, 0.5213]) tensor([0.1956, 0.0598, 0.2478, 0.4968]) -Greedy action tensor([ 0.8921, 0.4382, 0.0060, -0.5838]) tensor([0.4394, 0.2791, 0.1811, 0.1004]) -Greedy action tensor([-0.4638, -0.9513, -0.0880, -0.3986]) tensor([0.2417, 0.1484, 0.3519, 0.2580]) -Greedy action tensor([ 0.7854, -1.0565, 0.2195, 0.4137]) tensor([0.4139, 0.0656, 0.2350, 0.2854]) -Greedy action tensor([ 0.3560, -1.4903, 0.8385, -1.1619]) tensor([0.3337, 0.0527, 0.5406, 0.0731]) -Greedy action tensor([ 0.6252, -0.7933, 0.0236, -0.1958]) tensor([0.4484, 0.1086, 0.2457, 0.1973]) -Greedy action tensor([-1.1323, -1.8694, 1.9642, -1.0826]) tensor([0.0406, 0.0194, 0.8974, 0.0426]) -Greedy action tensor([-0.1309, -1.9429, -0.6081, 0.8381]) tensor([0.2263, 0.0370, 0.1404, 0.5963]) -Greedy action tensor([ 0.1146, -0.8641, -0.8853, -0.1168]) tensor([0.3941, 0.1481, 0.1450, 0.3127]) -Greedy action tensor([ 1.2344, 0.4903, 0.2959, -0.1005]) tensor([0.4696, 0.2231, 0.1837, 0.1236]) -Greedy action tensor([ 1.1431, -0.3340, 1.2312, 0.5899]) tensor([0.3454, 0.0788, 0.3772, 0.1986]) -Greedy action tensor([-0.4404, 0.0854, -0.0790, -0.0483]) tensor([0.1783, 0.3017, 0.2560, 0.2640]) -Greedy action tensor([ 0.0218, -0.7049, 1.0646, -0.1736]) tensor([0.1944, 0.0940, 0.5516, 0.1599]) -Greedy action tensor([ 0.2096, -1.4072, 0.4377, 0.4907]) tensor([0.2646, 0.0525, 0.3324, 0.3505]) -Greedy action tensor([ 0.0840, -1.0042, 0.2646, -0.3145]) tensor([0.3119, 0.1051, 0.3736, 0.2094]) -Greedy action tensor([-0.0910, -0.9394, -0.1006, 1.6786]) tensor([0.1207, 0.0517, 0.1195, 0.7082]) -Greedy action tensor([ 1.1919, 0.4119, 0.7436, -0.4525]) tensor([0.4366, 0.2002, 0.2789, 0.0843]) -Greedy action tensor([ 1.6667, -1.0706, 1.2016, 2.0062]) tensor([0.3229, 0.0209, 0.2028, 0.4534]) -Greedy action tensor([-0.0951, 0.5770, 0.3626, -0.2837]) tensor([0.1863, 0.3649, 0.2945, 0.1543]) -Greedy action tensor([ 0.9977, -0.8070, 1.4357, -0.3378]) tensor([0.3359, 0.0553, 0.5205, 0.0883]) -Greedy action tensor([ 1.0254, -0.5707, 1.5860, 0.4374]) tensor([0.2849, 0.0577, 0.4991, 0.1583]) -Greedy action tensor([ 0.5993, -0.4585, -0.5039, 1.2614]) tensor([0.2764, 0.0960, 0.0917, 0.5359]) -Greedy action tensor([-0.1422, -1.6222, 0.3486, 1.5201]) tensor([0.1230, 0.0280, 0.2009, 0.6482]) -Greedy action tensor([-0.3689, 0.0205, -0.3202, -1.4673]) tensor([0.2591, 0.3825, 0.2720, 0.0864]) -Greedy action tensor([ 1.3722, -0.1194, 0.0532, 0.5860]) tensor([0.5133, 0.1155, 0.1373, 0.2339]) -Greedy action tensor([ 1.2270, 0.5901, -0.3355, 0.8352]) tensor([0.4142, 0.2191, 0.0868, 0.2799]) -Greedy action tensor([-0.4216, -0.5577, 0.6076, -1.1796]) tensor([0.1945, 0.1698, 0.5445, 0.0912]) -Greedy action tensor([ 1.5411, -0.2445, -0.4474, 0.3375]) tensor([0.6232, 0.1045, 0.0853, 0.1870]) -Greedy action tensor([ 0.8994, -0.1564, -0.0871, 0.5640]) tensor([0.4105, 0.1428, 0.1531, 0.2935]) -Greedy action tensor([ 1.2861, -0.3057, -0.0758, 0.0964]) tensor([0.5669, 0.1154, 0.1452, 0.1725]) -Greedy action tensor([ 1.2193, -0.2195, -0.3495, 0.6019]) tensor([0.5038, 0.1195, 0.1049, 0.2717]) -Greedy action tensor([ 0.7199, -0.1155, 0.0415, -0.0812]) tensor([0.4184, 0.1815, 0.2123, 0.1878]) -Greedy action tensor([ 2.0027, -1.2150, -0.2174, 0.1916]) tensor([0.7621, 0.0305, 0.0828, 0.1246]) -Greedy action tensor([ 1.5891, -0.6983, -0.2170, 0.3654]) tensor([0.6410, 0.0651, 0.1053, 0.1886]) -Greedy action tensor([ 1.0277, -0.3452, -0.2766, 0.0313]) tensor([0.5280, 0.1338, 0.1433, 0.1949]) -Greedy action tensor([ 0.7054, -0.3498, 0.1290, -0.2375]) tensor([0.4349, 0.1514, 0.2444, 0.1694]) -Greedy action tensor([ 1.6954, -0.6771, -0.4551, 0.5330]) tensor([0.6568, 0.0613, 0.0765, 0.2054]) -Greedy action tensor([ 1.3108, -0.7337, -0.0559, 0.0323]) tensor([0.6014, 0.0778, 0.1533, 0.1675]) -Greedy action tensor([ 1.9081, -0.6887, -0.6532, 0.7638]) tensor([0.6802, 0.0507, 0.0525, 0.2166]) -Greedy action tensor([ 1.2066, -0.2145, -0.6646, 0.3123]) tensor([0.5542, 0.1338, 0.0853, 0.2266]) -Greedy action tensor([ 1.0132, -0.4232, -0.3622, 0.5375]) tensor([0.4735, 0.1126, 0.1197, 0.2942]) -Greedy action tensor([ 1.2634, -0.1507, -0.0380, 0.0502]) tensor([0.5517, 0.1342, 0.1502, 0.1640]) -Greedy action tensor([ 1.2856, -0.4185, -0.5124, 0.1127]) tensor([0.6035, 0.1098, 0.1000, 0.1868]) -Greedy action tensor([ 1.0608, -0.6229, -0.0802, 0.2713]) tensor([0.5104, 0.0948, 0.1631, 0.2318]) -Greedy action tensor([ 1.1500, -0.5783, -0.1054, 0.3243]) tensor([0.5262, 0.0934, 0.1499, 0.2304]) -Greedy action tensor([ 0.6744, -0.4154, -0.6535, 0.5929]) tensor([0.3963, 0.1333, 0.1050, 0.3653]) -Greedy action tensor([ 1.8310, -0.4574, -0.0141, 0.2656]) tensor([0.6810, 0.0691, 0.1076, 0.1423]) -Greedy action tensor([ 1.4217, -0.3007, -0.2035, -0.0108]) tensor([0.6195, 0.1107, 0.1220, 0.1479]) -Greedy action tensor([ 1.4185, -0.7492, -0.3928, 0.7971]) tensor([0.5509, 0.0630, 0.0900, 0.2960]) -Greedy action tensor([ 0.8903, -0.6989, 0.2887, -0.4580]) tensor([0.4971, 0.1015, 0.2724, 0.1291]) -Greedy action tensor([ 1.5572, -0.6698, -0.1883, 0.2918]) tensor([0.6392, 0.0689, 0.1116, 0.1803]) -Greedy action tensor([ 1.2905, -0.5109, -0.3743, 0.3708]) tensor([0.5705, 0.0942, 0.1080, 0.2274]) -Greedy action tensor([ 0.9877, -0.3236, 0.1659, -0.0517]) tensor([0.4848, 0.1306, 0.2131, 0.1714]) -Greedy action tensor([ 1.4022, -0.5311, -0.2869, 0.3015]) tensor([0.6017, 0.0870, 0.1111, 0.2001]) -Greedy action tensor([ 0.0756, -0.1958, -0.1230, 0.5310]) tensor([0.2404, 0.1833, 0.1971, 0.3791]) -Greedy action tensor([ 0.9743, -0.4774, -0.3392, 0.5514]) tensor([0.4633, 0.1085, 0.1246, 0.3036]) -Greedy action tensor([ 0.8812, -0.4552, -0.4681, 0.3359]) tensor([0.4758, 0.1250, 0.1234, 0.2758]) -Greedy action tensor([ 1.8800, -1.0043, -0.2015, 0.4495]) tensor([0.7043, 0.0394, 0.0879, 0.1685]) -Greedy action tensor([ 1.2578, -0.3674, -0.4937, 0.0716]) tensor([0.5967, 0.1175, 0.1035, 0.1822]) -Greedy action tensor([ 1.9291, -0.4756, 0.1580, 0.8028]) tensor([0.6310, 0.0570, 0.1074, 0.2046]) -Greedy action tensor([ 1.4623, -0.7899, -0.1578, 0.2243]) tensor([0.6277, 0.0660, 0.1242, 0.1820]) -Greedy action tensor([ 1.3344, -0.4030, -0.1538, 0.3702]) tensor([0.5608, 0.0987, 0.1266, 0.2138]) -Greedy action tensor([ 2.4823, -0.8745, -0.4950, 0.9195]) tensor([0.7720, 0.0269, 0.0393, 0.1618]) -Greedy action tensor([ 1.3437, -0.0489, -0.8877, 0.5943]) tensor([0.5469, 0.1359, 0.0587, 0.2585]) -Greedy action tensor([ 1.1502, -0.0256, -0.3429, 0.6736]) tensor([0.4642, 0.1432, 0.1043, 0.2882]) -Greedy action tensor([ 1.1087, -0.5404, -0.0529, -0.2365]) tensor([0.5663, 0.1089, 0.1773, 0.1475]) -Greedy action tensor([ 1.4229, -0.4008, -0.6682, 0.0883]) tensor([0.6459, 0.1043, 0.0798, 0.1700]) -Greedy action tensor([ 1.5116, -0.7160, 0.1769, 0.1271]) tensor([0.6167, 0.0665, 0.1623, 0.1544]) -Greedy action tensor([ 2.0863, -1.0969, -0.3187, 0.6876]) tensor([0.7254, 0.0301, 0.0655, 0.1791]) -Greedy action tensor([ 1.1250, -0.4142, -0.4423, 0.2326]) tensor([0.5456, 0.1171, 0.1138, 0.2235]) -Greedy action tensor([1.1594, 0.0970, 0.1015, 0.0535]) tensor([0.4941, 0.1708, 0.1716, 0.1635]) -Greedy action tensor([ 1.1290, -0.2297, 0.0208, 0.1759]) tensor([0.5069, 0.1303, 0.1674, 0.1954]) -Greedy action tensor([ 0.8282, -0.2396, -0.6672, 0.3639]) tensor([0.4553, 0.1565, 0.1021, 0.2862]) -Greedy action tensor([ 1.2804, -0.2296, -0.4198, 0.2858]) tensor([0.5639, 0.1246, 0.1030, 0.2086]) -Greedy action tensor([ 0.8223, -0.4629, 0.0403, -0.2277]) tensor([0.4799, 0.1327, 0.2195, 0.1679]) -Greedy action tensor([ 1.3775, -0.7126, -0.0837, -0.1165]) tensor([0.6329, 0.0783, 0.1468, 0.1421]) -Greedy action tensor([ 1.9924, -0.7631, -0.2360, 0.3705]) tensor([0.7306, 0.0464, 0.0787, 0.1443]) -Greedy action tensor([ 1.1215, -0.1160, -0.3117, 0.4105]) tensor([0.4951, 0.1436, 0.1181, 0.2432]) -Greedy action tensor([ 1.2998, -0.5901, -0.2213, 0.1239]) tensor([0.5959, 0.0900, 0.1302, 0.1839]) -Greedy action tensor([ 1.2055, -0.4315, -0.1725, 0.2519]) tensor([0.5458, 0.1062, 0.1376, 0.2104]) -Greedy action tensor([ 1.5603, -0.2525, -0.0387, 0.2143]) tensor([0.6152, 0.1004, 0.1243, 0.1601]) -Greedy action tensor([ 1.2034, -0.4088, -0.1632, -0.2478]) tensor([0.5922, 0.1181, 0.1510, 0.1387]) -Greedy action tensor([ 1.0598, -0.2198, -0.4223, 0.4028]) tensor([0.4941, 0.1374, 0.1122, 0.2562]) -Greedy action tensor([ 1.4739, -0.8125, -0.5555, 0.4746]) tensor([0.6245, 0.0635, 0.0821, 0.2299]) -Greedy action tensor([ 1.5342, -0.5433, -0.0695, 0.9296]) tensor([0.5340, 0.0669, 0.1074, 0.2917]) -Greedy action tensor([ 1.1670, 0.3789, -0.5148, 0.1032]) tensor([0.5036, 0.2290, 0.0937, 0.1738]) -Greedy action tensor([ 2.3715, -0.8958, -0.2082, 0.8647]) tensor([0.7488, 0.0285, 0.0568, 0.1659]) -Greedy action tensor([ 1.7536, -0.6800, -0.0336, 0.3556]) tensor([0.6657, 0.0584, 0.1115, 0.1645]) -Greedy action tensor([ 1.1142, -0.5123, -0.3907, 0.2885]) tensor([0.5386, 0.1059, 0.1196, 0.2359]) -Greedy action tensor([ 1.3307, -0.0550, -0.4591, 0.1217]) tensor([0.5829, 0.1458, 0.0973, 0.1740]) -Greedy action tensor([ 1.0237, -0.2909, -0.2869, 0.0902]) tensor([0.5178, 0.1391, 0.1396, 0.2036]) -Greedy action tensor([ 1.0162, -0.1756, -0.0487, 0.3276]) tensor([0.4650, 0.1412, 0.1603, 0.2335]) -Greedy action tensor([ 1.0010, -0.6471, -0.7325, 0.8771]) tensor([0.4439, 0.0854, 0.0784, 0.3922]) -Greedy action tensor([ 1.4603, -0.3144, -0.4135, 0.3320]) tensor([0.6073, 0.1030, 0.0932, 0.1965]) -Greedy action tensor([ 1.0290, -0.3193, 0.0305, 0.2725]) tensor([0.4768, 0.1238, 0.1757, 0.2237]) -Greedy action tensor([ 1.1486, -0.3420, -0.5845, 0.4026]) tensor([0.5330, 0.1200, 0.0942, 0.2528]) -Greedy action tensor([ 1.5014, -0.2450, -0.6156, 0.7723]) tensor([0.5627, 0.0981, 0.0677, 0.2714]) -Greedy action tensor([ 0.7384, 0.2047, 0.1701, -0.3861]) tensor([0.4036, 0.2367, 0.2286, 0.1311]) -Greedy action tensor([ 1.2722, -0.5091, -0.4945, 0.2659]) tensor([0.5866, 0.0988, 0.1002, 0.2144]) -Greedy action tensor([ 0.4483, -0.2864, -0.1876, 0.2270]) tensor([0.3558, 0.1707, 0.1884, 0.2852]) -Greedy action tensor([ 1.1529, -0.4499, -0.7252, 0.8204]) tensor([0.4828, 0.0972, 0.0738, 0.3462]) -Greedy action tensor([ 1.8211, -0.5005, -0.4042, 0.7780]) tensor([0.6416, 0.0630, 0.0693, 0.2261]) -Greedy action tensor([ 0.7860, -0.2983, -0.5050, 0.5342]) tensor([0.4183, 0.1414, 0.1150, 0.3252]) -Greedy action tensor([ 1.7801, -0.2060, -0.2502, 0.3522]) tensor([0.6630, 0.0910, 0.0870, 0.1590]) -Greedy action tensor([ 0.3920, -0.3071, -0.1927, 0.2491]) tensor([0.3423, 0.1701, 0.1908, 0.2968]) -Greedy action tensor([ 0.8999, -0.0671, -0.1398, 0.0656]) tensor([0.4613, 0.1754, 0.1631, 0.2003]) -Greedy action tensor([ 1.0081, -0.0136, 0.0708, 0.0499]) tensor([0.4683, 0.1686, 0.1834, 0.1796]) -Greedy action tensor([ 0.9855, -0.3095, -0.4062, 0.2884]) tensor([0.4949, 0.1355, 0.1231, 0.2465]) -Greedy action tensor([-1.3964, -0.5701, 0.4488, 0.3064]) tensor([0.0662, 0.1513, 0.4191, 0.3634]) -Greedy action tensor([-1.8144, -0.4008, 0.6098, -0.0995]) tensor([0.0455, 0.1872, 0.5143, 0.2530]) -Greedy action tensor([-0.2863, -0.2066, 1.0085, 1.6992]) tensor([0.0768, 0.0832, 0.2804, 0.5595]) -Greedy action tensor([-1.5520, -0.4986, 0.6824, -0.4359]) tensor([0.0615, 0.1763, 0.5744, 0.1877]) -Greedy action tensor([-1.5053, -0.4991, 0.4607, 0.1422]) tensor([0.0622, 0.1702, 0.4444, 0.3232]) -Greedy action tensor([-1.1255, -0.6077, 0.2640, 0.3742]) tensor([0.0895, 0.1502, 0.3592, 0.4010]) -Greedy action tensor([-1.7102, -0.4540, 0.6003, 0.1647]) tensor([0.0474, 0.1664, 0.4774, 0.3088]) -Greedy action tensor([-0.7014, -0.4067, 0.1920, 0.1169]) tensor([0.1418, 0.1904, 0.3465, 0.3214]) -Greedy action tensor([-1.9012, -0.4508, 0.6491, -0.1573]) tensor([0.0420, 0.1792, 0.5384, 0.2404]) -Greedy action tensor([-0.9725, -0.6324, 0.3328, 0.3204]) tensor([0.1027, 0.1443, 0.3788, 0.3742]) -Greedy action tensor([-1.8004, -0.3371, 0.6275, -0.0501]) tensor([0.0446, 0.1928, 0.5057, 0.2569]) -Greedy action tensor([-1.2921, -0.6010, 0.3286, 0.1898]) tensor([0.0803, 0.1603, 0.4060, 0.3534]) -Greedy action tensor([-1.1999, -0.4314, 0.4127, 0.0158]) tensor([0.0866, 0.1868, 0.4344, 0.2921]) -Greedy action tensor([-1.6899, -0.2762, 0.5475, 0.1067]) tensor([0.0488, 0.2004, 0.4568, 0.2940]) -Greedy action tensor([-1.7070, -0.1840, 0.5965, -0.2572]) tensor([0.0504, 0.2309, 0.5041, 0.2146]) -Greedy action tensor([-1.7987, -0.5123, 0.9215, 0.3105]) tensor([0.0357, 0.1291, 0.5414, 0.2939]) -Greedy action tensor([-1.1407, -0.5925, 0.2682, 0.3061]) tensor([0.0903, 0.1563, 0.3696, 0.3838]) -Greedy action tensor([-1.8851, -0.4398, 0.7298, 0.0365]) tensor([0.0389, 0.1648, 0.5309, 0.2654]) -Greedy action tensor([-1.9421, -0.4596, 0.6763, -0.1768]) tensor([0.0401, 0.1764, 0.5494, 0.2341]) -Greedy action tensor([-1.5117, -0.1830, 0.7184, 0.3305]) tensor([0.0490, 0.1852, 0.4562, 0.3095]) -Greedy action tensor([-1.8618, -0.5037, 0.2376, -0.1875]) tensor([0.0544, 0.2115, 0.4439, 0.2902]) -Greedy action tensor([-1.4471, -0.6473, 0.6409, 0.0576]) tensor([0.0633, 0.1409, 0.5108, 0.2850]) -Greedy action tensor([-1.5909, -0.5890, 0.4853, 0.0122]) tensor([0.0600, 0.1634, 0.4785, 0.2981]) -Greedy action tensor([-1.9393, -0.4521, 0.6659, -0.1752]) tensor([0.0403, 0.1784, 0.5458, 0.2354]) -Greedy action tensor([-0.7355, -0.2478, 0.1544, -0.0587]) tensor([0.1422, 0.2316, 0.3463, 0.2798]) -Greedy action tensor([-1.6364, 0.1266, 0.4449, -0.0860]) tensor([0.0511, 0.2981, 0.4098, 0.2410]) -Greedy action tensor([-1.5386, -0.5898, 0.9854, 0.7725]) tensor([0.0382, 0.0988, 0.4773, 0.3857]) -Greedy action tensor([-1.5886, -0.4744, 0.6710, 0.3279]) tensor([0.0490, 0.1492, 0.4690, 0.3328]) -Greedy action tensor([-1.4985, -0.4207, 0.4519, -0.0654]) tensor([0.0660, 0.1938, 0.4638, 0.2765]) -Greedy action tensor([-1.8456, -0.3906, 0.6123, -0.0975]) tensor([0.0440, 0.1887, 0.5144, 0.2529]) -Greedy action tensor([-1.2583, -0.5896, 0.3044, 0.2219]) tensor([0.0825, 0.1611, 0.3938, 0.3626]) -Greedy action tensor([-1.8274, -0.3236, 0.5934, -0.0999]) tensor([0.0447, 0.2010, 0.5029, 0.2514]) -Greedy action tensor([-1.8956, -0.4658, 0.6407, -0.1446]) tensor([0.0424, 0.1772, 0.5360, 0.2444]) -Greedy action tensor([-0.9784, -0.5592, 0.2208, 0.3909]) tensor([0.1023, 0.1556, 0.3395, 0.4025]) -Greedy action tensor([-1.8998, -0.4420, 0.6412, -0.1551]) tensor([0.0422, 0.1812, 0.5353, 0.2414]) -Greedy action tensor([-1.4082, -0.3950, 0.3621, 0.0592]) tensor([0.0716, 0.1972, 0.4205, 0.3106]) -Greedy action tensor([-1.8603, -0.4719, 0.6292, -0.1423]) tensor([0.0442, 0.1771, 0.5325, 0.2462]) -Greedy action tensor([-1.7933, -0.1695, 0.5488, -0.0538]) tensor([0.0451, 0.2288, 0.4692, 0.2569]) -Greedy action tensor([-1.9058, -0.4219, 0.6575, -0.1464]) tensor([0.0413, 0.1823, 0.5364, 0.2401]) -Greedy action tensor([-1.1780, -0.5865, 0.2593, 0.2745]) tensor([0.0886, 0.1600, 0.3728, 0.3785]) -Greedy action tensor([-1.6842, -0.5024, 0.5118, 0.0043]) tensor([0.0536, 0.1747, 0.4817, 0.2900]) -Greedy action tensor([-1.7006, -0.5113, 0.5499, 0.0290]) tensor([0.0515, 0.1692, 0.4889, 0.2904]) -Greedy action tensor([-1.7585, -0.4735, 0.6876, 0.0983]) tensor([0.0443, 0.1602, 0.5117, 0.2838]) -Greedy action tensor([-1.9117, -0.4155, 0.6637, -0.1294]) tensor([0.0407, 0.1819, 0.5352, 0.2421]) -Greedy action tensor([-1.8213, -0.4623, 0.5973, -0.0941]) tensor([0.0460, 0.1790, 0.5164, 0.2587]) -Greedy action tensor([-1.6054, -0.4480, 0.6521, 0.3054]) tensor([0.0488, 0.1552, 0.4663, 0.3297]) -Greedy action tensor([-1.9152, -0.4512, 0.6487, -0.1643]) tensor([0.0415, 0.1796, 0.5395, 0.2393]) -Greedy action tensor([-1.9225, -0.3895, 0.6500, -0.1593]) tensor([0.0407, 0.1886, 0.5333, 0.2374]) -Greedy action tensor([-1.1896, -0.3194, 0.4391, 0.6662]) tensor([0.0672, 0.1604, 0.3425, 0.4299]) -Greedy action tensor([-1.5863, 0.2195, 0.4906, -0.2239]) tensor([0.0527, 0.3208, 0.4206, 0.2059]) -Greedy action tensor([-0.1002, 0.4864, -0.0083, 0.6735]) tensor([0.1650, 0.2966, 0.1808, 0.3576]) -Greedy action tensor([-1.2645, 0.7794, 0.2181, 0.1896]) tensor([0.0575, 0.4436, 0.2530, 0.2459]) -Greedy action tensor([-1.8643, -0.6676, 0.3052, -0.2423]) tensor([0.0552, 0.1826, 0.4829, 0.2793]) -Greedy action tensor([-1.7028, -0.4192, 0.5457, -0.0275]) tensor([0.0515, 0.1858, 0.4877, 0.2749]) -Greedy action tensor([-1.3396, -0.5360, 0.3775, 0.0918]) tensor([0.0770, 0.1720, 0.4288, 0.3222]) -Greedy action tensor([-0.6178, -0.5400, 0.1683, 0.3222]) tensor([0.1463, 0.1581, 0.3211, 0.3745]) -Greedy action tensor([-1.5596, -0.5439, 0.6272, 0.3099]) tensor([0.0522, 0.1442, 0.4650, 0.3386]) -Greedy action tensor([-1.6515, -0.4859, 0.6300, 0.2058]) tensor([0.0490, 0.1572, 0.4798, 0.3139]) -Greedy action tensor([-1.8386, -0.4670, 0.6217, -0.0780]) tensor([0.0445, 0.1755, 0.5212, 0.2589]) -Greedy action tensor([-1.8861, -0.3513, 0.6244, -0.1409]) tensor([0.0422, 0.1960, 0.5199, 0.2419]) -Greedy action tensor([-1.8377, -0.3176, 0.5959, -0.1409]) tensor([0.0446, 0.2039, 0.5083, 0.2433]) -Greedy action tensor([-1.5352, -0.4766, 0.4680, 0.0744]) tensor([0.0614, 0.1769, 0.4549, 0.3069]) -Greedy action tensor([-0.9829, -0.1274, 0.1409, -0.0335]) tensor([0.1109, 0.2610, 0.3413, 0.2867]) -Greedy action tensor([-1.8870, -0.7223, 1.2997, 0.5643]) tensor([0.0250, 0.0801, 0.6050, 0.2899]) -Greedy action tensor([-1.8062, -0.4844, 0.9578, 0.3973]) tensor([0.0337, 0.1264, 0.5347, 0.3053]) -Greedy action tensor([-1.8405, -0.4631, 0.6216, -0.1251]) tensor([0.0449, 0.1782, 0.5271, 0.2498]) -Greedy action tensor([-1.6631, -0.5273, 0.5154, 0.0278]) tensor([0.0544, 0.1695, 0.4808, 0.2953]) -Greedy action tensor([-1.6993, -0.4360, 0.6033, 0.0309]) tensor([0.0496, 0.1753, 0.4956, 0.2796]) -Greedy action tensor([-1.8895, -0.4718, 0.6764, -0.0994]) tensor([0.0414, 0.1711, 0.5393, 0.2482]) -Greedy action tensor([-1.8528, -0.4484, 0.6182, -0.1247]) tensor([0.0444, 0.1807, 0.5251, 0.2498]) -Greedy action tensor([-1.2623, -0.6253, 0.3029, 0.2180]) tensor([0.0829, 0.1567, 0.3963, 0.3641]) -Greedy action tensor([-1.8598, -0.3275, 0.6201, -0.1228]) tensor([0.0430, 0.1991, 0.5136, 0.2443]) -Greedy action tensor([-1.4271, -0.5625, 0.4331, 0.1122]) tensor([0.0692, 0.1642, 0.4443, 0.3223]) -Greedy action tensor([-1.9098, -0.4492, 0.6533, -0.1620]) tensor([0.0416, 0.1793, 0.5401, 0.2390]) -Greedy action tensor([-1.8139, -0.3590, 0.6367, -0.0478]) tensor([0.0440, 0.1885, 0.5102, 0.2573]) -Greedy action tensor([-0.9611, -0.4541, 0.5475, -0.1151]) tensor([0.1051, 0.1746, 0.4753, 0.2450]) -Greedy action tensor([-1.4549, -0.5083, 0.3866, 0.0743]) tensor([0.0690, 0.1778, 0.4350, 0.3183]) -Greedy action tensor([-1.7885, -0.3032, 0.5962, -0.0542]) tensor([0.0456, 0.2013, 0.4949, 0.2582]) -Greedy action tensor([-1.1239, 0.0190, 0.4672, 0.8675]) tensor([0.0611, 0.1915, 0.2999, 0.4475]) -Greedy action tensor([-1.0904, -0.1387, 0.5910, 0.8503]) tensor([0.0628, 0.1626, 0.3374, 0.4372]) -Greedy action tensor([-1.8888, -0.6143, 1.3389, 0.6877]) tensor([0.0233, 0.0833, 0.5872, 0.3062]) -Greedy action tensor([ 0.5246, -0.2978, 0.0358, -0.2597]) tensor([0.3985, 0.1751, 0.2444, 0.1819]) -Greedy action tensor([ 0.6211, -0.4479, 0.0504, -0.4637]) tensor([0.4452, 0.1528, 0.2516, 0.1504]) -Greedy action tensor([ 0.8461, -0.6529, 0.0979, -0.6215]) tensor([0.5189, 0.1159, 0.2456, 0.1196]) -Greedy action tensor([ 0.8404, -0.6101, 0.1091, -0.2347]) tensor([0.4862, 0.1140, 0.2340, 0.1659]) -Greedy action tensor([ 0.5049, -0.2640, -0.1090, -0.4081]) tensor([0.4156, 0.1927, 0.2249, 0.1668]) -Greedy action tensor([ 0.5680, -0.1979, -0.1262, -0.2368]) tensor([0.4147, 0.1928, 0.2071, 0.1854]) -Greedy action tensor([ 0.9320, -0.7289, 0.0862, -0.5337]) tensor([0.5405, 0.1027, 0.2320, 0.1248]) -Greedy action tensor([ 0.9546, -0.5458, -0.0174, -0.3360]) tensor([0.5329, 0.1189, 0.2016, 0.1466]) -Greedy action tensor([ 1.0257, -0.5819, 0.0997, -0.6638]) tensor([0.5614, 0.1125, 0.2224, 0.1036]) -Greedy action tensor([ 0.7589, -0.3908, -0.0857, -0.1941]) tensor([0.4690, 0.1486, 0.2016, 0.1809]) -Greedy action tensor([ 0.5969, -0.4920, -0.0384, -0.2375]) tensor([0.4347, 0.1463, 0.2303, 0.1887]) -Greedy action tensor([ 0.5559, -0.1276, -0.1544, -0.0283]) tensor([0.3916, 0.1977, 0.1925, 0.2183]) -Greedy action tensor([ 0.7889, -0.4235, -0.0671, -0.2072]) tensor([0.4781, 0.1422, 0.2031, 0.1766]) -Greedy action tensor([ 0.5914, 0.0675, -0.1137, -0.1144]) tensor([0.3876, 0.2295, 0.1915, 0.1914]) -Greedy action tensor([ 0.7175, 0.1542, -0.1386, -0.4747]) tensor([0.4352, 0.2478, 0.1849, 0.1321]) -Greedy action tensor([ 0.5908, -0.1814, -0.0180, -0.2556]) tensor([0.4107, 0.1897, 0.2234, 0.1762]) -Greedy action tensor([ 0.7860, -0.6468, 0.0292, -0.2632]) tensor([0.4859, 0.1160, 0.2280, 0.1702]) -Greedy action tensor([ 0.4098, -0.4699, -0.0460, -0.1417]) tensor([0.3809, 0.1581, 0.2415, 0.2195]) -Greedy action tensor([ 0.3218, -0.3171, -0.2760, -0.4163]) tensor([0.3913, 0.2065, 0.2152, 0.1870]) -Greedy action tensor([ 1.0831, -0.4271, -0.1900, -0.3740]) tensor([0.5768, 0.1274, 0.1615, 0.1343]) -Greedy action tensor([-0.0357, -0.0497, 0.0244, -0.1716]) tensor([0.2550, 0.2515, 0.2708, 0.2226]) -Greedy action tensor([ 0.4668, -0.3880, 0.0504, -0.2937]) tensor([0.3918, 0.1667, 0.2584, 0.1831]) -Greedy action tensor([ 0.6943, -0.8878, -0.0885, -0.4185]) tensor([0.5022, 0.1032, 0.2296, 0.1650]) -Greedy action tensor([ 0.6254, -0.0968, 0.0355, -0.4640]) tensor([0.4208, 0.2044, 0.2333, 0.1416]) -Greedy action tensor([ 0.6101, -0.2517, 0.0156, -0.2199]) tensor([0.4149, 0.1753, 0.2290, 0.1809]) -Greedy action tensor([ 0.7706, -0.6582, -0.1460, -0.5920]) tensor([0.5276, 0.1264, 0.2110, 0.1351]) -Greedy action tensor([ 0.8556, -0.8314, -0.1605, -0.5466]) tensor([0.5577, 0.1032, 0.2019, 0.1372]) -Greedy action tensor([ 0.7921, -0.6852, 0.0125, -0.4155]) tensor([0.5036, 0.1149, 0.2309, 0.1505]) -Greedy action tensor([ 0.5583, -0.0733, -0.0502, -0.2974]) tensor([0.3998, 0.2126, 0.2176, 0.1699]) -Greedy action tensor([ 0.8332, -0.8186, 0.0775, -0.3714]) tensor([0.5099, 0.0977, 0.2395, 0.1529]) -Greedy action tensor([ 0.6193, -0.3883, -0.0778, -0.2638]) tensor([0.4393, 0.1604, 0.2188, 0.1816]) -Greedy action tensor([ 0.7894, -0.8846, -0.1017, -0.3245]) tensor([0.5192, 0.0973, 0.2130, 0.1704]) -Greedy action tensor([ 1.0477, -0.8259, -0.0187, -0.5132]) tensor([0.5856, 0.0899, 0.2016, 0.1229]) -Greedy action tensor([ 0.4536, -0.4559, -0.0637, -0.1821]) tensor([0.3955, 0.1593, 0.2358, 0.2094]) -Greedy action tensor([ 0.7556, -0.4586, 0.0507, -0.1824]) tensor([0.4582, 0.1361, 0.2264, 0.1793]) -Greedy action tensor([ 0.3657, 0.1160, -0.0632, -0.1662]) tensor([0.3314, 0.2582, 0.2158, 0.1947]) -Greedy action tensor([ 0.9350, -0.4444, -0.0164, -0.6569]) tensor([0.5430, 0.1367, 0.2097, 0.1105]) -Greedy action tensor([ 0.7529, -0.2357, 0.0468, -0.3219]) tensor([0.4531, 0.1686, 0.2236, 0.1547]) -Greedy action tensor([ 1.1325, -0.7319, 0.0280, -0.2850]) tensor([0.5785, 0.0897, 0.1917, 0.1402]) -Greedy action tensor([ 0.6494, -0.3297, -0.1258, -0.5127]) tensor([0.4653, 0.1748, 0.2143, 0.1456]) -Greedy action tensor([ 0.5323, 0.0752, -0.1007, -0.2521]) tensor([0.3816, 0.2416, 0.2026, 0.1742]) -Greedy action tensor([ 0.3919, -0.0425, -0.0217, -0.1911]) tensor([0.3488, 0.2259, 0.2306, 0.1947]) -Greedy action tensor([ 0.4472, -0.2129, 0.1230, -0.3292]) tensor([0.3704, 0.1914, 0.2678, 0.1704]) -Greedy action tensor([ 0.4450, -0.3029, -0.0799, -0.2318]) tensor([0.3886, 0.1840, 0.2299, 0.1975]) -Greedy action tensor([ 0.6659, -0.4645, -0.0166, -0.2133]) tensor([0.4457, 0.1439, 0.2253, 0.1850]) -Greedy action tensor([ 0.4791, -0.1791, 0.1491, -0.2869]) tensor([0.3702, 0.1917, 0.2661, 0.1721]) -Greedy action tensor([ 0.8378, -0.3973, -0.0502, -0.3413]) tensor([0.4976, 0.1447, 0.2047, 0.1530]) -Greedy action tensor([ 0.8819, -0.5475, -0.0547, -0.4736]) tensor([0.5293, 0.1267, 0.2075, 0.1365]) -Greedy action tensor([ 0.8199, -0.6157, -0.1526, -0.1605]) tensor([0.5022, 0.1195, 0.1899, 0.1884]) -Greedy action tensor([ 0.5009, -0.0993, 0.1054, -0.2612]) tensor([0.3719, 0.2041, 0.2504, 0.1736]) -Greedy action tensor([ 0.5296, -0.3958, -0.0867, -0.3122]) tensor([0.4224, 0.1674, 0.2281, 0.1820]) -Greedy action tensor([ 0.7772, -0.4625, -0.2301, -0.2697]) tensor([0.4986, 0.1443, 0.1821, 0.1750]) -Greedy action tensor([ 0.8770, -0.3498, -0.0523, -0.3218]) tensor([0.5026, 0.1474, 0.1984, 0.1516]) -Greedy action tensor([ 0.8230, -0.3463, 0.0280, -0.5414]) tensor([0.4956, 0.1539, 0.2238, 0.1266]) -Greedy action tensor([ 0.6634, -0.6431, -0.1887, -0.1727]) tensor([0.4693, 0.1271, 0.2002, 0.2034]) -Greedy action tensor([ 0.3675, -0.1703, -0.0924, -0.1691]) tensor([0.3571, 0.2086, 0.2255, 0.2088]) -Greedy action tensor([ 0.7109, -0.6808, -0.1282, -0.2438]) tensor([0.4841, 0.1204, 0.2092, 0.1863]) -Greedy action tensor([ 0.7736, -0.2168, -0.0882, -0.3448]) tensor([0.4715, 0.1752, 0.1992, 0.1541]) -Greedy action tensor([ 0.6587, -0.7843, 0.0511, -0.1843]) tensor([0.4522, 0.1068, 0.2463, 0.1946]) -Greedy action tensor([ 0.6906, -0.4866, 0.0705, -0.2761]) tensor([0.4492, 0.1384, 0.2416, 0.1708]) -Greedy action tensor([ 0.3135, 0.0589, -0.0092, -0.1981]) tensor([0.3227, 0.2502, 0.2337, 0.1935]) -Greedy action tensor([ 0.6801, -0.4369, -0.0894, -0.4299]) tensor([0.4717, 0.1544, 0.2185, 0.1554]) -Greedy action tensor([ 0.4920, -0.2101, 0.0417, -0.2902]) tensor([0.3860, 0.1913, 0.2461, 0.1766]) -Greedy action tensor([ 0.2932, 0.0333, 0.0384, -0.3689]) tensor([0.3266, 0.2519, 0.2531, 0.1684]) -Greedy action tensor([ 0.6122, -0.3427, -0.0013, -0.1557]) tensor([0.4184, 0.1610, 0.2265, 0.1941]) -Greedy action tensor([ 1.3637, -1.6965, -0.0766, -0.5864]) tensor([0.7013, 0.0329, 0.1661, 0.0998]) -Greedy action tensor([ 0.7276, -0.4757, 0.1380, -0.3676]) tensor([0.4568, 0.1371, 0.2533, 0.1528]) -Greedy action tensor([ 0.4144, -0.0292, 0.1763, -0.2224]) tensor([0.3380, 0.2169, 0.2664, 0.1788]) -Greedy action tensor([ 0.6633, -0.3057, 0.1919, -0.3829]) tensor([0.4247, 0.1611, 0.2650, 0.1492]) -Greedy action tensor([ 0.4761, -0.0863, -0.0611, -0.2039]) tensor([0.3758, 0.2142, 0.2196, 0.1904]) -Greedy action tensor([ 0.9916, -0.8413, 0.1212, -0.4918]) tensor([0.5538, 0.0886, 0.2319, 0.1256]) -Greedy action tensor([ 0.7811, -0.6207, -0.1257, -0.6597]) tensor([0.5300, 0.1305, 0.2140, 0.1255]) -Greedy action tensor([ 0.2268, 0.1275, -0.0457, -0.2012]) tensor([0.3013, 0.2728, 0.2294, 0.1964]) -Greedy action tensor([ 1.0676, -0.8152, 0.0074, -0.6421]) tensor([0.5954, 0.0906, 0.2062, 0.1077]) -Greedy action tensor([ 0.5195, -0.3388, 0.0284, 0.0073]) tensor([0.3795, 0.1609, 0.2322, 0.2274]) -Greedy action tensor([ 1.3359, -0.5366, 0.1375, -0.8059]) tensor([0.6358, 0.0977, 0.1918, 0.0747]) -Greedy action tensor([ 0.6220, -0.4343, -0.0757, -0.4315]) tensor([0.4558, 0.1585, 0.2268, 0.1589]) -Greedy action tensor([ 0.9854, -0.6236, 0.0631, -0.2238]) tensor([0.5274, 0.1055, 0.2097, 0.1574]) -Greedy action tensor([ 0.7590, -0.5295, -0.1947, -0.1823]) tensor([0.4876, 0.1344, 0.1878, 0.1902]) -Greedy action tensor([ 0.7830, -0.4013, -0.1970, -0.1014]) tensor([0.4775, 0.1461, 0.1792, 0.1972]) -Greedy action tensor([ 0.6327, -0.4834, -0.0744, -0.3252]) tensor([0.4536, 0.1486, 0.2237, 0.1741]) -Greedy action tensor([ 2.3656, -0.1828, 0.8239, 0.9466]) tensor([0.6518, 0.0510, 0.1395, 0.1577]) -Greedy action tensor([ 0.4363, -0.8369, 0.3928, 1.8316]) tensor([0.1594, 0.0446, 0.1526, 0.6434]) -Greedy action tensor([1.0332, 0.1118, 1.9409, 0.9308]) tensor([0.2092, 0.0833, 0.5186, 0.1889]) -Greedy action tensor([-0.0230, -0.1190, 0.8510, -0.7305]) tensor([0.2084, 0.1894, 0.4995, 0.1027]) -Greedy action tensor([ 0.4779, -0.0549, 0.0545, 1.1241]) tensor([0.2410, 0.1414, 0.1578, 0.4598]) -Greedy action tensor([ 1.2217, -0.6636, 0.8745, 1.2482]) tensor([0.3466, 0.0526, 0.2449, 0.3559]) -Greedy action tensor([0.9181, 0.4048, 0.4171, 1.1300]) tensor([0.2907, 0.1740, 0.1761, 0.3593]) -Greedy action tensor([ 0.7040, 0.3685, -0.7955, 0.1765]) tensor([0.3955, 0.2828, 0.0883, 0.2334]) -Greedy action tensor([ 0.0276, -0.7731, 0.0287, 0.4211]) tensor([0.2543, 0.1142, 0.2546, 0.3769]) -Greedy action tensor([ 0.3197, -1.3676, -0.4337, 0.2276]) tensor([0.3894, 0.0721, 0.1833, 0.3552]) -Greedy action tensor([ 0.2016, -0.1525, -0.6917, 0.6591]) tensor([0.2709, 0.1901, 0.1109, 0.4281]) -Greedy action tensor([ 0.1617, -0.1831, 0.5627, -1.0195]) tensor([0.2850, 0.2019, 0.4256, 0.0875]) -Greedy action tensor([-1.3632, -0.8707, 0.4376, -1.3451]) tensor([0.1030, 0.1685, 0.6236, 0.1049]) -Greedy action tensor([ 0.8672, -0.1198, 0.3965, -0.0172]) tensor([0.4149, 0.1546, 0.2591, 0.1713]) -Greedy action tensor([ 0.2308, -0.9120, 0.3003, 0.4645]) tensor([0.2737, 0.0873, 0.2934, 0.3457]) -Greedy action tensor([-0.9198, -0.7752, -0.8706, -0.0881]) tensor([0.1817, 0.2100, 0.1909, 0.4174]) -Greedy action tensor([-0.2089, 0.6525, 0.9068, 0.1671]) tensor([0.1270, 0.3005, 0.3875, 0.1850]) -Greedy action tensor([-0.5597, -0.2246, -0.3559, -0.3347]) tensor([0.2051, 0.2867, 0.2514, 0.2568]) -Greedy action tensor([ 0.1631, 0.0977, 1.4745, -0.0249]) tensor([0.1544, 0.1446, 0.5730, 0.1279]) -Greedy action tensor([-1.1063, -0.0795, 0.5816, -0.0225]) tensor([0.0823, 0.2297, 0.4449, 0.2432]) -Greedy action tensor([ 0.3975, -0.6119, 2.4077, 0.3827]) tensor([0.1019, 0.0371, 0.7606, 0.1004]) -Greedy action tensor([-0.3833, -0.0390, -0.9013, -0.6379]) tensor([0.2644, 0.3731, 0.1575, 0.2050]) -Greedy action tensor([ 0.3624, -0.3475, 0.5160, 0.0088]) tensor([0.2976, 0.1463, 0.3470, 0.2090]) -Greedy action tensor([ 2.1705, -0.0660, 0.9273, 2.0806]) tensor([0.4330, 0.0463, 0.1249, 0.3958]) -Greedy action tensor([ 2.1866, -0.4786, 1.3324, 1.0968]) tensor([0.5460, 0.0380, 0.2324, 0.1836]) -Greedy action tensor([ 0.5775, -0.2829, 0.4245, 0.1304]) tensor([0.3424, 0.1448, 0.2938, 0.2189]) -Greedy action tensor([-0.3921, -0.3786, -0.5661, 0.6893]) tensor([0.1723, 0.1747, 0.1448, 0.5082]) -Greedy action tensor([ 0.3307, -0.8937, 0.1361, 0.4427]) tensor([0.3091, 0.0908, 0.2544, 0.3457]) -Greedy action tensor([ 0.8428, -1.0731, 1.4086, 0.8610]) tensor([0.2547, 0.0375, 0.4485, 0.2594]) -Greedy action tensor([-0.7077, -0.7621, -0.0729, 0.1740]) tensor([0.1600, 0.1516, 0.3019, 0.3865]) -Greedy action tensor([0.4128, 1.1771, 0.0104, 0.4058]) tensor([0.2079, 0.4465, 0.1390, 0.2065]) -Greedy action tensor([ 1.0702, -0.2114, -0.4917, 0.3865]) tensor([0.5020, 0.1394, 0.1053, 0.2534]) -Greedy action tensor([ 0.9830, -0.5912, -0.1884, 0.7128]) tensor([0.4385, 0.0909, 0.1359, 0.3347]) -Greedy action tensor([-0.3251, -0.0839, 0.2438, 0.7570]) tensor([0.1431, 0.1821, 0.2527, 0.4222]) -Greedy action tensor([ 1.2349, 0.4786, 0.2721, -0.1270]) tensor([0.4745, 0.2227, 0.1812, 0.1216]) -Greedy action tensor([ 0.3735, -0.5256, -0.0487, 1.4306]) tensor([0.2024, 0.0824, 0.1327, 0.5825]) -Greedy action tensor([ 0.5050, -1.2421, -0.4817, 0.1465]) tensor([0.4453, 0.0776, 0.1660, 0.3111]) -Greedy action tensor([-0.5042, -1.2906, -0.1549, -0.2883]) tensor([0.2430, 0.1107, 0.3447, 0.3016]) -Greedy action tensor([0.0774, 0.4689, 0.5068, 0.6795]) tensor([0.1712, 0.2532, 0.2630, 0.3126]) -Greedy action tensor([ 1.0638, 0.6351, 0.3626, -0.4915]) tensor([0.4240, 0.2762, 0.2103, 0.0895]) -Greedy action tensor([-0.5520, 0.2540, 0.2304, 0.1378]) tensor([0.1348, 0.3018, 0.2947, 0.2687]) -Greedy action tensor([-1.8481, -0.1321, 0.7804, -1.2620]) tensor([0.0450, 0.2504, 0.6237, 0.0809]) -Greedy action tensor([ 0.7886, -0.1092, -0.4576, 0.7818]) tensor([0.3720, 0.1516, 0.1070, 0.3695]) -Greedy action tensor([ 0.2868, -0.7215, 0.9745, -0.2745]) tensor([0.2548, 0.0930, 0.5069, 0.1454]) -Greedy action tensor([ 1.3003, -1.4069, 1.1732, 0.6049]) tensor([0.4088, 0.0273, 0.3600, 0.2039]) -Greedy action tensor([-0.6668, 0.0966, -0.4239, 0.7631]) tensor([0.1163, 0.2495, 0.1483, 0.4859]) -Greedy action tensor([-0.4126, -2.2709, -0.2318, 0.8237]) tensor([0.1725, 0.0269, 0.2067, 0.5939]) -Greedy action tensor([ 0.3550, -0.6072, 0.6681, 2.0050]) tensor([0.1257, 0.0480, 0.1719, 0.6544]) -Greedy action tensor([ 1.7273, -0.1319, 0.6970, -0.1885]) tensor([0.6024, 0.0939, 0.2150, 0.0887]) -Greedy action tensor([ 0.6385, 0.2639, 1.7684, -0.1119]) tensor([0.1903, 0.1308, 0.5890, 0.0898]) -Greedy action tensor([ 1.5062, 0.2073, -0.1686, 0.2448]) tensor([0.5736, 0.1565, 0.1075, 0.1625]) -Greedy action tensor([ 1.3566, -0.5271, 0.5369, 2.0907]) tensor([0.2720, 0.0414, 0.1198, 0.5668]) -Greedy action tensor([ 1.2820, -1.1180, 1.8705, 0.4188]) tensor([0.3018, 0.0274, 0.5436, 0.1273]) -Greedy action tensor([-0.2505, -1.0704, 0.4065, -0.1858]) tensor([0.2254, 0.0993, 0.4348, 0.2405]) -Greedy action tensor([ 0.4377, -0.1468, -0.5981, 2.0369]) tensor([0.1457, 0.0812, 0.0517, 0.7213]) -Greedy action tensor([ 0.3246, 0.4523, -0.1766, -0.1877]) tensor([0.2993, 0.3401, 0.1813, 0.1793]) -Greedy action tensor([ 0.7293, 0.7052, 1.4553, -0.0935]) tensor([0.2231, 0.2178, 0.4611, 0.0980]) -Greedy action tensor([-0.9949, -2.3070, 0.5130, -0.5752]) tensor([0.1368, 0.0368, 0.6181, 0.2082]) -Greedy action tensor([-0.9129, -1.3241, -0.3386, -1.4300]) tensor([0.2478, 0.1643, 0.4401, 0.1478]) -Greedy action tensor([-0.0932, 0.0895, 0.1960, -0.7214]) tensor([0.2457, 0.2950, 0.3281, 0.1311]) -Greedy action tensor([-0.2629, -0.5876, -0.8704, 1.6792]) tensor([0.1082, 0.0782, 0.0589, 0.7546]) -Greedy action tensor([ 0.8611, -0.3754, 0.9076, 1.1880]) tensor([0.2685, 0.0780, 0.2813, 0.3723]) -Greedy action tensor([0.5697, 0.7131, 1.5508, 0.2202]) tensor([0.1809, 0.2088, 0.4826, 0.1276]) -Greedy action tensor([ 1.5214, -1.0638, -0.3527, 0.9984]) tensor([0.5490, 0.0414, 0.0843, 0.3254]) -Greedy action tensor([1.2445, 0.1261, 0.1328, 0.6695]) tensor([0.4507, 0.1473, 0.1483, 0.2537]) -Greedy action tensor([-0.6796, -0.9636, -0.3566, 0.3355]) tensor([0.1697, 0.1277, 0.2344, 0.4682]) -Greedy action tensor([ 0.5403, -0.9372, 0.6850, 1.4750]) tensor([0.2028, 0.0463, 0.2344, 0.5165]) -Greedy action tensor([ 0.9352, 0.9461, -0.5703, 0.2334]) tensor([0.3665, 0.3705, 0.0813, 0.1817]) -Greedy action tensor([1.3736, 0.1577, 0.8070, 1.7109]) tensor([0.3063, 0.0908, 0.1738, 0.4291]) -Greedy action tensor([ 0.0646, -0.6815, -0.1112, 0.8812]) tensor([0.2185, 0.1036, 0.1833, 0.4945]) -Greedy action tensor([ 0.1689, -1.6754, 0.1319, 0.6561]) tensor([0.2667, 0.0422, 0.2570, 0.4341]) -Greedy action tensor([-0.0632, -0.8695, -0.1391, -1.1120]) tensor([0.3671, 0.1639, 0.3403, 0.1286]) -Greedy action tensor([-0.4149, -1.2888, -0.9417, -0.5710]) tensor([0.3492, 0.1457, 0.2062, 0.2988]) -Greedy action tensor([ 0.5226, 0.4397, 0.6015, -0.2747]) tensor([0.2896, 0.2666, 0.3134, 0.1305]) -Greedy action tensor([0.8768, 0.3131, 0.1090, 0.5697]) tensor([0.3612, 0.2055, 0.1676, 0.2657]) -Greedy action tensor([-1.2542, 0.8164, 0.2481, -0.2755]) tensor([0.0622, 0.4931, 0.2793, 0.1655]) -Greedy action tensor([ 0.9486, -1.7344, 0.2419, -0.0048]) tensor([0.5136, 0.0351, 0.2533, 0.1980]) -Greedy action tensor([ 0.6459, -1.7051, 2.2939, -0.6061]) tensor([0.1520, 0.0145, 0.7900, 0.0435]) -Greedy action tensor([0.2252, 1.4837, 0.9097, 0.5826]) tensor([0.1261, 0.4438, 0.2500, 0.1802]) -Greedy action tensor([-1.0962, -1.8612, 0.7171, 0.4346]) tensor([0.0818, 0.0381, 0.5018, 0.3783]) -Greedy action tensor([1.6177, 0.0151, 0.8688, 0.7439]) tensor([0.4781, 0.0963, 0.2261, 0.1995]) -Greedy action tensor([ 1.1667, -0.5803, 1.0185, 0.9225]) tensor([0.3546, 0.0618, 0.3058, 0.2778]) -Greedy action tensor([ 1.2254, -0.6677, -0.3411, 0.2599]) tensor([0.5747, 0.0865, 0.1200, 0.2188]) -Greedy action tensor([ 0.9102, -0.2034, -0.3761, 0.4032]) tensor([0.4531, 0.1488, 0.1252, 0.2729]) -Greedy action tensor([ 0.6342, -0.3145, -0.1630, 0.2092]) tensor([0.4013, 0.1554, 0.1808, 0.2624]) -Greedy action tensor([ 1.2849, -0.5441, -0.3704, 0.2487]) tensor([0.5860, 0.0941, 0.1120, 0.2079]) -Greedy action tensor([ 0.9630, -0.4527, -0.4070, 0.3848]) tensor([0.4860, 0.1180, 0.1235, 0.2726]) -Greedy action tensor([ 1.5575, -0.1291, -0.3390, 0.0583]) tensor([0.6416, 0.1188, 0.0963, 0.1433]) -Greedy action tensor([ 1.5686, 0.0045, 0.3255, -0.3655]) tensor([0.6089, 0.1274, 0.1756, 0.0880]) -Greedy action tensor([ 1.6490, -0.1050, 0.0162, 0.4629]) tensor([0.5974, 0.1034, 0.1167, 0.1825]) -Greedy action tensor([ 1.0289, -0.7513, -0.2128, 0.2995]) tensor([0.5156, 0.0869, 0.1489, 0.2486]) -Greedy action tensor([ 1.6565, -0.4836, -0.6976, 0.0037]) tensor([0.7122, 0.0838, 0.0676, 0.1364]) -Greedy action tensor([ 1.0464, -0.4280, -0.1989, 0.4311]) tensor([0.4861, 0.1113, 0.1399, 0.2627]) -Greedy action tensor([ 0.7675, -0.3001, -0.4331, 0.6727]) tensor([0.3915, 0.1346, 0.1178, 0.3561]) -Greedy action tensor([ 1.4008, -0.3170, -0.2593, 0.1481]) tensor([0.6041, 0.1084, 0.1149, 0.1726]) -Greedy action tensor([ 0.9392, -0.2124, -0.4784, 0.3301]) tensor([0.4757, 0.1504, 0.1153, 0.2587]) -Greedy action tensor([ 0.5988, -0.4734, -0.0749, 0.2784]) tensor([0.3879, 0.1328, 0.1978, 0.2816]) -Greedy action tensor([ 1.0986, -0.3616, -0.0824, 0.2778]) tensor([0.5052, 0.1173, 0.1551, 0.2224]) -Greedy action tensor([ 0.3946, -0.1322, 0.1414, 0.2320]) tensor([0.3109, 0.1836, 0.2413, 0.2642]) -Greedy action tensor([ 1.5384, -0.6058, -0.4303, 0.4408]) tensor([0.6288, 0.0737, 0.0878, 0.2098]) -Greedy action tensor([ 0.9467, 0.1074, -0.1329, 0.1069]) tensor([0.4538, 0.1961, 0.1542, 0.1960]) -Greedy action tensor([ 1.6293, -0.4113, -0.3309, 0.5478]) tensor([0.6212, 0.0807, 0.0875, 0.2106]) -Greedy action tensor([ 1.0268, -0.5024, -0.3256, 0.0455]) tensor([0.5405, 0.1171, 0.1398, 0.2026]) -Greedy action tensor([ 1.2322, -0.4513, -0.0054, 0.0678]) tensor([0.5593, 0.1039, 0.1622, 0.1746]) -Greedy action tensor([ 0.8824, -0.4106, -0.5544, 0.4960]) tensor([0.4563, 0.1252, 0.1085, 0.3100]) -Greedy action tensor([ 0.9137, -0.4741, -0.0454, -0.0111]) tensor([0.4927, 0.1230, 0.1888, 0.1954]) -Greedy action tensor([ 1.0574, -0.4336, -0.2221, 0.2625]) tensor([0.5115, 0.1152, 0.1423, 0.2310]) -Greedy action tensor([ 0.7480, -0.3711, 0.0253, 0.0836]) tensor([0.4298, 0.1404, 0.2086, 0.2212]) -Greedy action tensor([ 1.4594, -0.5942, -0.5373, 0.3852]) tensor([0.6228, 0.0799, 0.0846, 0.2127]) -Greedy action tensor([ 0.9769, 0.0115, 0.2515, -0.2027]) tensor([0.4603, 0.1753, 0.2229, 0.1415]) -Greedy action tensor([ 1.5871, -0.6501, -0.2963, 0.3338]) tensor([0.6475, 0.0691, 0.0985, 0.1849]) -Greedy action tensor([ 1.3905e+00, -1.3472e-03, 2.1803e-01, 2.6273e-01]) tensor([0.5314, 0.1321, 0.1645, 0.1720]) -Greedy action tensor([ 1.5129, -0.4525, -0.2738, 0.6522]) tensor([0.5779, 0.0810, 0.0968, 0.2444]) -Greedy action tensor([ 0.8610, -0.5050, 0.0944, 0.0796]) tensor([0.4593, 0.1172, 0.2134, 0.2102]) -Greedy action tensor([ 1.7910, -0.6574, -0.4285, 0.6761]) tensor([0.6566, 0.0568, 0.0713, 0.2153]) -Greedy action tensor([ 1.1030, -0.5702, -0.1222, 0.1423]) tensor([0.5365, 0.1007, 0.1576, 0.2053]) -Greedy action tensor([ 1.4130, -0.2937, -0.3449, 0.3111]) tensor([0.5931, 0.1076, 0.1023, 0.1971]) -Greedy action tensor([ 1.8612, -0.3579, -0.4186, 0.3855]) tensor([0.6946, 0.0755, 0.0711, 0.1588]) -Greedy action tensor([ 2.2244, -1.4416, -0.3133, 0.7500]) tensor([0.7499, 0.0192, 0.0593, 0.1717]) -Greedy action tensor([ 0.6737, -0.5138, -0.2422, 0.5100]) tensor([0.3915, 0.1194, 0.1567, 0.3324]) -Greedy action tensor([ 2.1214, -1.2408, -0.0923, 0.4614]) tensor([0.7496, 0.0260, 0.0819, 0.1425]) -Greedy action tensor([ 1.4957, -0.3344, -0.1036, 0.5026]) tensor([0.5771, 0.0926, 0.1166, 0.2138]) -Greedy action tensor([ 1.5261, -0.8721, -0.2634, 0.4338]) tensor([0.6276, 0.0570, 0.1048, 0.2105]) -Greedy action tensor([ 1.2472, -0.0642, -0.7476, 0.4053]) tensor([0.5446, 0.1467, 0.0741, 0.2346]) -Greedy action tensor([ 1.2473, -0.6092, -0.1943, 0.6581]) tensor([0.5135, 0.0802, 0.1215, 0.2848]) -Greedy action tensor([ 1.4762, -0.2242, -0.5049, 0.3710]) tensor([0.6054, 0.1106, 0.0835, 0.2005]) -Greedy action tensor([ 1.9299, -1.3305, -0.6834, 0.5608]) tensor([0.7321, 0.0281, 0.0537, 0.1862]) -Greedy action tensor([ 1.4310, -0.4834, -0.4214, 0.3541]) tensor([0.6079, 0.0896, 0.0954, 0.2071]) -Greedy action tensor([ 0.7479, -0.2945, -0.0637, -0.1429]) tensor([0.4531, 0.1598, 0.2012, 0.1859]) -Greedy action tensor([ 0.9013, -0.0649, -0.8245, 0.2481]) tensor([0.4810, 0.1830, 0.0856, 0.2503]) -Greedy action tensor([ 1.1605, -0.3213, -0.1341, -0.1608]) tensor([0.5656, 0.1285, 0.1550, 0.1509]) -Greedy action tensor([ 1.6112, -0.4841, -0.0118, 0.3850]) tensor([0.6197, 0.0762, 0.1223, 0.1818]) -Greedy action tensor([ 2.7327, -1.2179, -0.2895, 1.0994]) tensor([0.7916, 0.0152, 0.0385, 0.1546]) -Greedy action tensor([1.0079, 0.4215, 0.1596, 0.1354]) tensor([0.4163, 0.2316, 0.1782, 0.1740]) -Greedy action tensor([ 1.0538, 0.1551, -0.7590, 0.3396]) tensor([0.4855, 0.1976, 0.0792, 0.2377]) -Greedy action tensor([ 1.9287, -0.4301, -0.3621, 0.5789]) tensor([0.6873, 0.0650, 0.0695, 0.1782]) -Greedy action tensor([ 1.7420, -1.1598, -0.7762, 0.7528]) tensor([0.6634, 0.0364, 0.0535, 0.2467]) -Greedy action tensor([ 1.2672, -0.0293, -0.6371, 0.0921]) tensor([0.5776, 0.1580, 0.0860, 0.1784]) -Greedy action tensor([ 1.3870, -0.2666, -0.6590, 0.3030]) tensor([0.6028, 0.1154, 0.0779, 0.2039]) -Greedy action tensor([ 1.7140, -0.1696, -0.1328, 0.2766]) tensor([0.6463, 0.0983, 0.1019, 0.1535]) -Greedy action tensor([ 0.9946, -0.3229, -0.2744, 0.1570]) tensor([0.5046, 0.1351, 0.1419, 0.2184]) -Greedy action tensor([ 1.3281, -0.3160, -0.4236, 0.3858]) tensor([0.5694, 0.1100, 0.0988, 0.2219]) -Greedy action tensor([ 1.5724, -0.8973, -0.0860, 0.2759]) tensor([0.6458, 0.0546, 0.1230, 0.1766]) -Greedy action tensor([ 0.7350, -0.1898, -0.0166, 0.0096]) tensor([0.4251, 0.1686, 0.2005, 0.2058]) -Greedy action tensor([ 0.4146, -0.3140, -0.1190, -0.0804]) tensor([0.3733, 0.1802, 0.2189, 0.2276]) -Greedy action tensor([ 2.7921, -1.6267, -0.4460, 0.5989]) tensor([0.8600, 0.0104, 0.0337, 0.0959]) -Greedy action tensor([ 0.7637, -0.2729, -0.3647, 0.4664]) tensor([0.4130, 0.1465, 0.1336, 0.3068]) -Greedy action tensor([ 0.3957, -0.2676, 0.0543, 0.0541]) tensor([0.3406, 0.1754, 0.2420, 0.2420]) -Greedy action tensor([ 1.4888, -0.3599, -0.3566, 0.1804]) tensor([0.6306, 0.0993, 0.0996, 0.1704]) -Greedy action tensor([ 0.7896, 0.0787, -0.4427, 0.0215]) tensor([0.4451, 0.2186, 0.1298, 0.2065]) -Greedy action tensor([ 1.8026, -0.5378, -0.2269, 0.2673]) tensor([0.6930, 0.0667, 0.0911, 0.1493]) -Greedy action tensor([ 1.0327, -0.4352, -0.2986, 0.4141]) tensor([0.4918, 0.1133, 0.1299, 0.2649]) -Greedy action tensor([ 1.2306, -0.4164, -0.3783, -0.2094]) tensor([0.6136, 0.1182, 0.1228, 0.1454]) -Greedy action tensor([ 1.9697, -0.5911, -0.6373, 0.3087]) tensor([0.7457, 0.0576, 0.0550, 0.1417]) -Greedy action tensor([ 1.7123, -0.6218, -0.1120, 0.7440]) tensor([0.6105, 0.0592, 0.0985, 0.2318]) -Greedy action tensor([ 1.5412, -0.1382, -0.0438, 0.2232]) tensor([0.6028, 0.1124, 0.1235, 0.1613]) -Greedy action tensor([ 1.3752, -0.7046, -0.0450, 0.0203]) tensor([0.6155, 0.0769, 0.1488, 0.1588]) -Greedy action tensor([ 1.3462, -0.5656, -0.3303, 0.4939]) tensor([0.5678, 0.0839, 0.1062, 0.2421]) -Greedy action tensor([ 1.3658, -0.3161, -0.4781, 0.3656]) tensor([0.5841, 0.1087, 0.0924, 0.2148]) -Greedy action tensor([ 0.9802, -0.2285, -0.0728, -0.1389]) tensor([0.5066, 0.1513, 0.1767, 0.1654]) -Greedy action tensor([ 1.0498, -0.0439, -0.0820, 0.0773]) tensor([0.4913, 0.1646, 0.1584, 0.1858]) -Greedy action tensor([ 0.9069, -0.3258, 0.0988, 0.0921]) tensor([0.4587, 0.1337, 0.2045, 0.2031]) -Greedy action tensor([ 1.0441, -0.5625, -0.0926, 0.3993]) tensor([0.4887, 0.0980, 0.1568, 0.2565]) -Greedy action tensor([-1.7667, -0.2392, 0.5537, -0.0644]) tensor([0.0470, 0.2166, 0.4785, 0.2579]) -Greedy action tensor([-1.4555, -0.6054, 0.4455, 0.1482]) tensor([0.0667, 0.1560, 0.4461, 0.3313]) -Greedy action tensor([-1.6293, -0.3047, 0.5446, 0.0311]) tensor([0.0531, 0.1999, 0.4673, 0.2796]) -Greedy action tensor([-1.8380, -0.3041, 0.5953, -0.1101]) tensor([0.0441, 0.2046, 0.5029, 0.2484]) -Greedy action tensor([-1.9171, -0.4482, 0.6969, -0.1095]) tensor([0.0399, 0.1731, 0.5441, 0.2429]) -Greedy action tensor([-1.3694, -0.5521, 0.6099, 0.6531]) tensor([0.0554, 0.1254, 0.4008, 0.4185]) -Greedy action tensor([-1.8111, -0.3359, 0.5942, -0.0997]) tensor([0.0455, 0.1988, 0.5039, 0.2518]) -Greedy action tensor([-0.5895, -0.4666, 0.1587, 0.1361]) tensor([0.1585, 0.1792, 0.3349, 0.3274]) -Greedy action tensor([-1.8665, -0.4635, 0.7605, 0.0526]) tensor([0.0389, 0.1582, 0.5379, 0.2650]) -Greedy action tensor([-1.8646, -0.3915, 0.6184, -0.1525]) tensor([0.0437, 0.1907, 0.5235, 0.2422]) -Greedy action tensor([-1.6786, -0.4534, 0.6135, 0.1055]) tensor([0.0494, 0.1681, 0.4886, 0.2940]) -Greedy action tensor([-1.3841, -0.5372, 0.3716, 0.1391]) tensor([0.0730, 0.1702, 0.4222, 0.3346]) -Greedy action tensor([-1.2831, -0.5664, 0.3796, 0.4122]) tensor([0.0726, 0.1487, 0.3830, 0.3957]) -Greedy action tensor([-1.7710, -0.5095, 0.5816, -0.0800]) tensor([0.0489, 0.1725, 0.5136, 0.2650]) -Greedy action tensor([-1.0056, -0.5843, 0.2434, 0.3993]) tensor([0.0991, 0.1511, 0.3457, 0.4040]) -Greedy action tensor([-1.1349, -0.6087, 0.3161, 0.3846]) tensor([0.0867, 0.1468, 0.3701, 0.3964]) -Greedy action tensor([-1.9024, -0.3738, 0.6376, -0.1465]) tensor([0.0415, 0.1915, 0.5266, 0.2404]) -Greedy action tensor([-1.8523, -0.4804, 0.6715, -0.0593]) tensor([0.0427, 0.1683, 0.5326, 0.2564]) -Greedy action tensor([-1.9102, -0.4578, 0.6494, -0.1606]) tensor([0.0417, 0.1784, 0.5398, 0.2401]) -Greedy action tensor([-1.9299, -0.4530, 0.6588, -0.1712]) tensor([0.0408, 0.1788, 0.5434, 0.2370]) -Greedy action tensor([-1.8782, -0.4365, 0.6294, -0.1442]) tensor([0.0432, 0.1825, 0.5299, 0.2445]) -Greedy action tensor([-1.1527, -0.6002, 0.2757, 0.2310]) tensor([0.0918, 0.1594, 0.3828, 0.3660]) -Greedy action tensor([-1.9453, -0.4576, 0.6732, -0.1790]) tensor([0.0400, 0.1771, 0.5488, 0.2340]) -Greedy action tensor([-1.8905, -0.2306, 0.6080, -0.1391]) tensor([0.0413, 0.2174, 0.5030, 0.2383]) -Greedy action tensor([-1.6119, -0.5695, 0.5032, 0.0257]) tensor([0.0579, 0.1642, 0.4801, 0.2978]) -Greedy action tensor([-1.8452, -0.3616, 0.6485, -0.0374]) tensor([0.0424, 0.1867, 0.5127, 0.2582]) -Greedy action tensor([ 0.3798, -0.3586, 0.2028, 0.2448]) tensor([0.3135, 0.1498, 0.2627, 0.2740]) -Greedy action tensor([-1.5983, -0.5262, 0.4978, -0.0364]) tensor([0.0594, 0.1737, 0.4835, 0.2834]) -Greedy action tensor([-1.8062, -0.3804, 0.5919, -0.0966]) tensor([0.0461, 0.1918, 0.5072, 0.2548]) -Greedy action tensor([-1.8876, -0.3301, 0.6208, -0.1414]) tensor([0.0421, 0.1997, 0.5170, 0.2412]) -Greedy action tensor([-1.8608, -0.4660, 0.6205, -0.1244]) tensor([0.0441, 0.1780, 0.5275, 0.2504]) -Greedy action tensor([-1.2556, -0.5855, 0.6499, -0.1876]) tensor([0.0795, 0.1553, 0.5341, 0.2312]) -Greedy action tensor([-1.9103, -0.4278, 0.6493, -0.1561]) tensor([0.0415, 0.1826, 0.5362, 0.2397]) -Greedy action tensor([-1.9386, -0.4571, 0.6715, -0.1725]) tensor([0.0402, 0.1771, 0.5473, 0.2354]) -Greedy action tensor([-0.5984, -0.5701, 0.2509, 0.2145]) tensor([0.1510, 0.1554, 0.3531, 0.3405]) -Greedy action tensor([-1.9221, -0.4592, 0.6741, -0.1619]) tensor([0.0407, 0.1759, 0.5464, 0.2369]) -Greedy action tensor([-1.1489, -0.5870, 0.2450, 0.3612]) tensor([0.0884, 0.1551, 0.3563, 0.4002]) -Greedy action tensor([-1.6194, -0.3228, 0.5095, 0.1364]) tensor([0.0530, 0.1940, 0.4459, 0.3071]) -Greedy action tensor([-1.3844, -0.5652, 0.3711, 0.2752]) tensor([0.0699, 0.1585, 0.4043, 0.3673]) -Greedy action tensor([-1.0346, -0.5707, 0.3557, -0.1256]) tensor([0.1100, 0.1750, 0.4419, 0.2731]) -Greedy action tensor([-1.2836, -0.2672, 0.7493, -0.6991]) tensor([0.0758, 0.2094, 0.5788, 0.1360]) -Greedy action tensor([-1.7831, -0.5391, 0.8301, 0.1581]) tensor([0.0399, 0.1383, 0.5440, 0.2778]) -Greedy action tensor([-1.9117, -0.4199, 0.6483, -0.1548]) tensor([0.0414, 0.1839, 0.5351, 0.2397]) -Greedy action tensor([-1.6173, -0.5529, 0.5061, -0.0644]) tensor([0.0589, 0.1707, 0.4922, 0.2782]) -Greedy action tensor([-1.1226, -0.5992, 0.2427, 0.2818]) tensor([0.0937, 0.1581, 0.3668, 0.3815]) -Greedy action tensor([-1.9848, -0.5384, 0.9383, 0.3147]) tensor([0.0296, 0.1256, 0.5500, 0.2948]) -Greedy action tensor([-1.0537, -0.5494, 0.2196, 0.3727]) tensor([0.0962, 0.1593, 0.3438, 0.4006]) -Greedy action tensor([-1.1139, -0.5967, 0.2332, 0.3101]) tensor([0.0937, 0.1571, 0.3602, 0.3890]) -Greedy action tensor([-1.3415, 0.3208, 0.3251, -0.0321]) tensor([0.0655, 0.3452, 0.3467, 0.2426]) -Greedy action tensor([-0.3021, -0.0614, 0.1168, 0.0530]) tensor([0.1916, 0.2438, 0.2913, 0.2733]) -Greedy action tensor([-1.4918, -0.5731, 1.3701, 1.1572]) tensor([0.0285, 0.0713, 0.4978, 0.4024]) -Greedy action tensor([-0.9937, -0.4504, 0.4477, 0.6392]) tensor([0.0829, 0.1427, 0.3503, 0.4242]) -Greedy action tensor([-1.0125, -0.5907, 0.3807, 0.1424]) tensor([0.1028, 0.1568, 0.4141, 0.3263]) -Greedy action tensor([-1.7433, -0.4897, 0.6534, 0.1161]) tensor([0.0456, 0.1599, 0.5015, 0.2930]) -Greedy action tensor([-1.8613, -0.4398, 0.6351, -0.1135]) tensor([0.0434, 0.1800, 0.5272, 0.2494]) -Greedy action tensor([-1.6312, -0.2549, 0.5024, 0.0620]) tensor([0.0531, 0.2102, 0.4482, 0.2885]) -Greedy action tensor([-0.2466, 0.4127, 0.1937, 0.4416]) tensor([0.1544, 0.2985, 0.2398, 0.3073]) -Greedy action tensor([-0.8346, -0.0941, 0.1867, 1.0425]) tensor([0.0806, 0.1690, 0.2238, 0.5266]) -Greedy action tensor([-1.2200, -0.0085, 0.3263, 0.1690]) tensor([0.0766, 0.2571, 0.3593, 0.3070]) -Greedy action tensor([-1.8936, -0.3536, 0.6300, -0.1338]) tensor([0.0418, 0.1948, 0.5208, 0.2426]) -Greedy action tensor([-1.6687, -0.4093, 0.6830, 0.1643]) tensor([0.0470, 0.1656, 0.4936, 0.2938]) -Greedy action tensor([-1.8201, -0.4559, 0.6007, -0.1158]) tensor([0.0462, 0.1806, 0.5195, 0.2537]) -Greedy action tensor([-1.5776, -0.5580, 0.4846, 0.0977]) tensor([0.0589, 0.1633, 0.4632, 0.3146]) -Greedy action tensor([-1.7438, -0.4311, 0.6754, 0.0772]) tensor([0.0452, 0.1679, 0.5077, 0.2792]) -Greedy action tensor([-1.2676, -0.7055, -0.2642, -0.4634]) tensor([0.1296, 0.2273, 0.3535, 0.2896]) -Greedy action tensor([-1.9287, -0.4559, 0.6662, -0.1646]) tensor([0.0407, 0.1773, 0.5447, 0.2373]) -Greedy action tensor([-1.9310, -0.3965, 0.6487, -0.1695]) tensor([0.0406, 0.1882, 0.5352, 0.2361]) -Greedy action tensor([-1.9751, 0.1604, 0.6188, -0.0085]) tensor([0.0333, 0.2821, 0.4462, 0.2383]) -Greedy action tensor([-0.8866, -0.2970, 0.6775, 1.3254]) tensor([0.0598, 0.1079, 0.2859, 0.5465]) -Greedy action tensor([-0.6512, 0.2064, 0.6253, 1.1496]) tensor([0.0769, 0.1814, 0.2758, 0.4659]) -Greedy action tensor([-1.4212, -0.4673, 0.6000, 0.4374]) tensor([0.0570, 0.1478, 0.4299, 0.3653]) -Greedy action tensor([-1.8297, -0.3676, 0.6594, -0.0823]) tensor([0.0433, 0.1868, 0.5215, 0.2484]) -Greedy action tensor([-1.2093, -0.6967, 0.2846, 0.0367]) tensor([0.0943, 0.1575, 0.4202, 0.3280]) -Greedy action tensor([-1.7152, -0.5353, 0.5580, -0.0409]) tensor([0.0518, 0.1686, 0.5032, 0.2764]) -Greedy action tensor([-1.7902, -0.5517, 0.8181, 0.1753]) tensor([0.0397, 0.1371, 0.5395, 0.2837]) -Greedy action tensor([-1.9196, -0.4635, 0.6567, -0.1670]) tensor([0.0413, 0.1772, 0.5432, 0.2383]) -Greedy action tensor([-0.9814, -0.5603, 0.5090, 0.9514]) tensor([0.0721, 0.1098, 0.3200, 0.4981]) -Greedy action tensor([-1.8439, -0.3831, 0.6501, -0.1195]) tensor([0.0434, 0.1871, 0.5259, 0.2436]) -Greedy action tensor([-1.8873, -0.4320, 0.6384, -0.1416]) tensor([0.0425, 0.1823, 0.5315, 0.2437]) -Greedy action tensor([-1.9033, -0.4073, 0.6426, -0.1489]) tensor([0.0417, 0.1860, 0.5315, 0.2408]) -Greedy action tensor([-1.5159, -0.5774, 0.9784, 0.3437]) tensor([0.0453, 0.1157, 0.5483, 0.2907]) -Greedy action tensor([ 1.0381, -0.7708, -0.0280, -0.2907]) tensor([0.5640, 0.0924, 0.1942, 0.1494]) -Greedy action tensor([ 0.3416, -0.2795, 0.0156, -0.5149]) tensor([0.3726, 0.2002, 0.2689, 0.1582]) -Greedy action tensor([ 0.8777, -0.5589, -0.0239, -0.3453]) tensor([0.5160, 0.1227, 0.2095, 0.1519]) -Greedy action tensor([ 0.6205, -0.4081, -0.0307, -0.4135]) tensor([0.4475, 0.1600, 0.2333, 0.1591]) -Greedy action tensor([ 0.1204, 0.0253, -0.0395, -0.3295]) tensor([0.2942, 0.2675, 0.2507, 0.1876]) -Greedy action tensor([ 1.0870, -0.2154, -0.0922, -0.0276]) tensor([0.5243, 0.1425, 0.1612, 0.1720]) -Greedy action tensor([ 0.5755, -0.2102, -0.0945, -0.2759]) tensor([0.4177, 0.1904, 0.2137, 0.1783]) -Greedy action tensor([ 1.1942, -0.7021, 0.0820, -0.2534]) tensor([0.5834, 0.0876, 0.1918, 0.1372]) -Greedy action tensor([ 0.0467, -0.0571, -0.0094, -0.0508]) tensor([0.2664, 0.2401, 0.2518, 0.2416]) -Greedy action tensor([ 0.3199, -0.2632, 0.1170, -0.3065]) tensor([0.3438, 0.1919, 0.2806, 0.1837]) -Greedy action tensor([ 0.5950, -0.3008, 0.0274, -0.2453]) tensor([0.4155, 0.1696, 0.2355, 0.1793]) -Greedy action tensor([ 0.4380, -0.0195, 0.0543, -0.1453]) tensor([0.3482, 0.2203, 0.2372, 0.1943]) -Greedy action tensor([ 0.7302, -0.5189, -0.1011, -0.2063]) tensor([0.4730, 0.1356, 0.2060, 0.1854]) -Greedy action tensor([ 0.7345, -0.4149, -0.1025, -0.3872]) tensor([0.4818, 0.1526, 0.2086, 0.1569]) -Greedy action tensor([ 0.7122, -0.3042, 0.0234, -0.1530]) tensor([0.4376, 0.1584, 0.2198, 0.1842]) -Greedy action tensor([ 0.7078, -0.5929, 0.0043, -0.2957]) tensor([0.4686, 0.1276, 0.2319, 0.1718]) -Greedy action tensor([ 0.6486, -0.5011, -0.1134, -0.3924]) tensor([0.4680, 0.1482, 0.2184, 0.1653]) -Greedy action tensor([ 0.7601, -0.7617, 0.0668, -0.3857]) tensor([0.4911, 0.1072, 0.2455, 0.1562]) -Greedy action tensor([ 0.9974, -0.4981, 0.0896, -0.5684]) tensor([0.5445, 0.1221, 0.2197, 0.1138]) -Greedy action tensor([ 0.5053, 0.0110, 0.1253, -0.1276]) tensor([0.3540, 0.2159, 0.2421, 0.1880]) -Greedy action tensor([ 0.6080, -0.2962, 0.1312, -0.4978]) tensor([0.4243, 0.1718, 0.2634, 0.1404]) -Greedy action tensor([ 0.6403, -0.4017, -0.0946, -0.4635]) tensor([0.4621, 0.1630, 0.2216, 0.1532]) -Greedy action tensor([0.3336, 0.0596, 0.1788, 0.0461]) tensor([0.2970, 0.2258, 0.2544, 0.2228]) -Greedy action tensor([ 0.7832, -0.5134, -0.0600, -0.2704]) tensor([0.4872, 0.1332, 0.2097, 0.1699]) -Greedy action tensor([ 0.3500, -0.0087, 0.0625, -0.2829]) tensor([0.3356, 0.2344, 0.2517, 0.1782]) -Greedy action tensor([ 0.6203, -0.2120, -0.0872, -0.1373]) tensor([0.4173, 0.1815, 0.2056, 0.1956]) -Greedy action tensor([ 0.4798, -0.0955, -0.0747, -0.3506]) tensor([0.3887, 0.2186, 0.2232, 0.1694]) -Greedy action tensor([ 0.9086, -1.0078, 0.0029, -0.4488]) tensor([0.5529, 0.0813, 0.2235, 0.1423]) -Greedy action tensor([ 0.8021, -0.4602, -0.1760, -0.3426]) tensor([0.5057, 0.1431, 0.1902, 0.1610]) -Greedy action tensor([ 0.8209, -0.1503, -0.0511, -0.3922]) tensor([0.4776, 0.1808, 0.1997, 0.1420]) -Greedy action tensor([ 0.7062, -0.5836, -0.1776, -0.4302]) tensor([0.4976, 0.1370, 0.2056, 0.1597]) -Greedy action tensor([ 0.9639, -0.4589, -0.1107, -0.5094]) tensor([0.5520, 0.1331, 0.1885, 0.1265]) -Greedy action tensor([ 0.4295, -0.3075, -0.0603, -0.2457]) tensor([0.3846, 0.1840, 0.2356, 0.1958]) -Greedy action tensor([ 0.7879, -0.4448, -0.0893, -0.3351]) tensor([0.4919, 0.1434, 0.2046, 0.1600]) -Greedy action tensor([ 0.9520, -0.5405, -0.0252, -0.3871]) tensor([0.5367, 0.1207, 0.2020, 0.1407]) -Greedy action tensor([ 0.9997, -0.4365, -0.1767, -0.2814]) tensor([0.5483, 0.1304, 0.1691, 0.1523]) -Greedy action tensor([ 0.7111, -0.6038, 0.0008, -0.2178]) tensor([0.4640, 0.1246, 0.2281, 0.1833]) -Greedy action tensor([ 0.8763, -0.6253, -0.2178, -0.4654]) tensor([0.5498, 0.1225, 0.1841, 0.1437]) -Greedy action tensor([ 0.1601, -0.1290, -0.1722, -0.3340]) tensor([0.3251, 0.2435, 0.2332, 0.1983]) -Greedy action tensor([ 1.1435, -0.6400, -0.1229, -0.3939]) tensor([0.6007, 0.1009, 0.1693, 0.1291]) -Greedy action tensor([ 0.3737, 0.0441, -0.0681, -0.0146]) tensor([0.3289, 0.2366, 0.2114, 0.2231]) -Greedy action tensor([ 0.3223, 0.0879, -0.0919, -0.0976]) tensor([0.3216, 0.2544, 0.2126, 0.2114]) -Greedy action tensor([ 0.6494, -0.2461, -0.0379, -0.2969]) tensor([0.4349, 0.1776, 0.2187, 0.1688]) -Greedy action tensor([ 0.4619, -0.1575, -0.0107, -0.3158]) tensor([0.3815, 0.2054, 0.2378, 0.1753]) -Greedy action tensor([ 0.8240, -0.1904, 0.0258, -0.3102]) tensor([0.4685, 0.1699, 0.2109, 0.1507]) -Greedy action tensor([ 1.0107, -0.8491, 0.1510, -0.6630]) tensor([0.5661, 0.0881, 0.2396, 0.1062]) -Greedy action tensor([ 0.8755, -0.8595, -0.0133, -0.5431]) tensor([0.5466, 0.0964, 0.2247, 0.1323]) -Greedy action tensor([ 0.8518, -0.3968, -0.1409, -0.5463]) tensor([0.5251, 0.1506, 0.1946, 0.1297]) -Greedy action tensor([ 0.5413, -0.6184, -0.3328, -0.5275]) tensor([0.4821, 0.1512, 0.2012, 0.1656]) -Greedy action tensor([ 0.8154, -0.3972, -0.1020, -0.3529]) tensor([0.4980, 0.1481, 0.1990, 0.1548]) -Greedy action tensor([ 1.0364, -0.6791, -0.0768, -0.5403]) tensor([0.5831, 0.1049, 0.1916, 0.1205]) -Greedy action tensor([ 1.5421, -0.8740, -0.1435, -0.9818]) tensor([0.7381, 0.0659, 0.1368, 0.0592]) -Greedy action tensor([ 0.8778, -0.2594, 0.0502, -0.3552]) tensor([0.4880, 0.1565, 0.2133, 0.1422]) -Greedy action tensor([ 1.0424, -0.0454, -0.0429, -0.1263]) tensor([0.5036, 0.1697, 0.1701, 0.1565]) -Greedy action tensor([ 1.0563, -0.7301, 0.1721, -0.4045]) tensor([0.5517, 0.0924, 0.2279, 0.1280]) -Greedy action tensor([ 0.4593, -0.1160, -0.0624, -0.1812]) tensor([0.3727, 0.2097, 0.2212, 0.1964]) -Greedy action tensor([ 0.7401, 0.1535, -0.1195, -0.4713]) tensor([0.4391, 0.2442, 0.1859, 0.1308]) -Greedy action tensor([ 0.1913, -0.2039, -0.2071, -0.5304]) tensor([0.3532, 0.2379, 0.2372, 0.1716]) -Greedy action tensor([ 0.6712, -0.5044, -0.0790, -0.2170]) tensor([0.4562, 0.1408, 0.2154, 0.1877]) -Greedy action tensor([ 0.8885, -0.2700, -0.0456, -0.3297]) tensor([0.4993, 0.1568, 0.1962, 0.1477]) -Greedy action tensor([ 0.6153, -0.4493, -0.0422, -0.4896]) tensor([0.4557, 0.1572, 0.2361, 0.1510]) -Greedy action tensor([ 7.2933e-01, -3.9930e-01, 2.2434e-04, -2.5271e-01]) tensor([0.4586, 0.1484, 0.2212, 0.1718]) -Greedy action tensor([ 0.6576, -0.5928, 0.2530, -0.6696]) tensor([0.4507, 0.1291, 0.3007, 0.1195]) -Greedy action tensor([ 0.3260, 0.0482, -0.0930, -0.1020]) tensor([0.3260, 0.2470, 0.2144, 0.2125]) -Greedy action tensor([ 0.5576, -0.1695, 0.0592, -0.2831]) tensor([0.3965, 0.1916, 0.2409, 0.1710]) -Greedy action tensor([ 9.3036e-01, -3.5525e-01, 1.9684e-04, -5.2396e-01]) tensor([0.5251, 0.1452, 0.2071, 0.1226]) -Greedy action tensor([ 0.6834, -0.4929, 0.0685, -0.3489]) tensor([0.4535, 0.1398, 0.2452, 0.1615]) -Greedy action tensor([ 0.4724, -0.1364, 0.0295, -0.3878]) tensor([0.3832, 0.2085, 0.2461, 0.1621]) -Greedy action tensor([ 0.9061, -0.4423, -0.1155, -0.5344]) tensor([0.5386, 0.1399, 0.1939, 0.1276]) -Greedy action tensor([ 0.7469, -0.5023, -0.0827, -0.3774]) tensor([0.4883, 0.1400, 0.2130, 0.1586]) -Greedy action tensor([ 0.1250, 0.1009, -0.0338, -0.1962]) tensor([0.2813, 0.2746, 0.2400, 0.2040]) -Greedy action tensor([ 0.9358, -0.7248, 0.0732, -0.3937]) tensor([0.5328, 0.1013, 0.2249, 0.1410]) -Greedy action tensor([ 0.2856, -0.0749, -0.1229, -0.1611]) tensor([0.3332, 0.2323, 0.2214, 0.2131]) -Greedy action tensor([ 0.5741, -0.1490, -0.0531, -0.8277]) tensor([0.4414, 0.2142, 0.2358, 0.1087]) -Greedy action tensor([ 0.1941, -0.1670, 0.0658, -0.2306]) tensor([0.3096, 0.2157, 0.2723, 0.2024]) -Greedy action tensor([ 0.9000, -0.4936, 0.0187, -0.4049]) tensor([0.5172, 0.1283, 0.2142, 0.1403]) -Greedy action tensor([ 0.8018, -0.2831, 0.1094, -0.2811]) tensor([0.4594, 0.1552, 0.2298, 0.1556]) -Greedy action tensor([ 0.6856, -0.3512, 0.0983, -0.3317]) tensor([0.4401, 0.1561, 0.2446, 0.1591]) -Greedy action tensor([ 0.7162, -0.4297, 0.0067, -0.3521]) tensor([0.4644, 0.1476, 0.2284, 0.1596]) -Greedy action tensor([ 0.7067, 0.2065, -0.0770, -0.2312]) tensor([0.4074, 0.2470, 0.1861, 0.1595]) -Greedy action tensor([ 0.5519, -0.3757, -0.0766, -0.3411]) tensor([0.4277, 0.1691, 0.2281, 0.1751]) -Greedy action tensor([ 2.3542, 0.3619, -0.0398, 0.4369]) tensor([0.7275, 0.0992, 0.0664, 0.1069]) -Greedy action tensor([0.6123, 0.1151, 0.0442, 0.0377]) tensor([0.3652, 0.2222, 0.2070, 0.2056]) -Greedy action tensor([ 0.9907, -0.2965, -0.4228, 0.6087]) tensor([0.4542, 0.1254, 0.1105, 0.3100]) -Greedy action tensor([ 1.3250, -0.1329, -0.3336, 0.3560]) tensor([0.5548, 0.1291, 0.1056, 0.2105]) -Greedy action tensor([ 1.6990, -0.2842, -0.5929, 0.4105]) tensor([0.6603, 0.0909, 0.0667, 0.1820]) -Greedy action tensor([ 1.7074, -0.2975, -0.3472, 0.5680]) tensor([0.6318, 0.0851, 0.0810, 0.2022]) -Greedy action tensor([ 1.9010, -0.6376, -0.8655, 0.4449]) tensor([0.7273, 0.0574, 0.0457, 0.1696]) -Greedy action tensor([ 1.3623, -0.3564, -1.0248, 0.6562]) tensor([0.5667, 0.1016, 0.0521, 0.2797]) -Greedy action tensor([ 1.1648, -0.1791, -0.6235, 0.2924]) tensor([0.5417, 0.1413, 0.0906, 0.2264]) -Greedy action tensor([ 1.9057, -0.1607, -0.1677, 0.7310]) tensor([0.6405, 0.0811, 0.0805, 0.1979]) -Greedy action tensor([ 1.5778, -0.5082, -0.5061, 0.2497]) tensor([0.6607, 0.0820, 0.0822, 0.1751]) -Greedy action tensor([ 0.9040, -0.3026, -0.1274, 0.0672]) tensor([0.4787, 0.1432, 0.1707, 0.2073]) -Greedy action tensor([ 2.1241, -0.7291, -0.2261, 0.4126]) tensor([0.7498, 0.0432, 0.0715, 0.1354]) -Greedy action tensor([ 0.9994, -0.5270, -0.2802, 0.4609]) tensor([0.4810, 0.1045, 0.1338, 0.2807]) -Greedy action tensor([ 1.0502, -0.1883, -0.0173, -0.1293]) tensor([0.5152, 0.1493, 0.1771, 0.1584]) -Greedy action tensor([ 0.4034, -0.2765, 0.0743, 0.0055]) tensor([0.3451, 0.1748, 0.2483, 0.2318]) -Greedy action tensor([ 1.8275, -0.9403, -0.3046, 0.4557]) tensor([0.6968, 0.0438, 0.0826, 0.1768]) -Greedy action tensor([ 1.7280, -0.5506, -0.4029, 0.4077]) tensor([0.6719, 0.0688, 0.0798, 0.1795]) -Greedy action tensor([ 1.6314, -0.0498, -0.5074, 0.0852]) tensor([0.6592, 0.1227, 0.0777, 0.1404]) -Greedy action tensor([ 1.1368, -0.1946, -0.6340, 0.1911]) tensor([0.5486, 0.1449, 0.0934, 0.2131]) -Greedy action tensor([ 1.6556, -0.5170, -0.4297, 0.3789]) tensor([0.6592, 0.0751, 0.0819, 0.1839]) -Greedy action tensor([ 1.4611, -0.4843, -0.1622, 0.1780]) tensor([0.6183, 0.0884, 0.1220, 0.1714]) -Greedy action tensor([ 0.3743, -0.2856, 0.1089, -0.1464]) tensor([0.3475, 0.1796, 0.2665, 0.2064]) -Greedy action tensor([ 1.2232, -0.2576, -0.2013, 0.2729]) tensor([0.5392, 0.1226, 0.1297, 0.2085]) -Greedy action tensor([ 0.9466, -0.2445, -0.3212, 0.5643]) tensor([0.4410, 0.1340, 0.1241, 0.3009]) -Greedy action tensor([ 1.0801, -0.4789, -0.2546, 0.2293]) tensor([0.5261, 0.1107, 0.1385, 0.2247]) -Greedy action tensor([ 1.1538, -0.6105, -0.2490, 0.2664]) tensor([0.5468, 0.0937, 0.1344, 0.2251]) -Greedy action tensor([ 1.4556, -0.3673, -0.5439, 0.3227]) tensor([0.6176, 0.0998, 0.0836, 0.1989]) -Greedy action tensor([ 2.0663, -0.7618, -0.2419, 0.6001]) tensor([0.7198, 0.0426, 0.0716, 0.1661]) -Greedy action tensor([ 1.4270, 0.0017, -0.6660, 0.3625]) tensor([0.5853, 0.1407, 0.0722, 0.2019]) -Greedy action tensor([ 1.4947, -0.7853, -0.3175, 0.6607]) tensor([0.5883, 0.0602, 0.0961, 0.2555]) -Greedy action tensor([ 0.8837, -0.2409, -0.3456, 0.3861]) tensor([0.4494, 0.1460, 0.1314, 0.2732]) -Greedy action tensor([ 1.7786, -0.9036, -0.3710, 0.4096]) tensor([0.6948, 0.0475, 0.0810, 0.1767]) -Greedy action tensor([ 0.7423, -0.6222, -0.0563, 0.1889]) tensor([0.4385, 0.1120, 0.1973, 0.2521]) -Greedy action tensor([ 1.1776, -0.7647, -0.3215, 0.4182]) tensor([0.5451, 0.0782, 0.1217, 0.2551]) -Greedy action tensor([ 1.2942, -0.2288, -0.1973, 0.6019]) tensor([0.5145, 0.1122, 0.1158, 0.2575]) -Greedy action tensor([ 0.9188, -0.2042, -0.4868, 0.4625]) tensor([0.4537, 0.1476, 0.1113, 0.2875]) -Greedy action tensor([ 1.0801, -0.2532, -0.4445, 0.3144]) tensor([0.5138, 0.1354, 0.1119, 0.2389]) -Greedy action tensor([ 1.3059, -0.5943, -0.1991, 0.5463]) tensor([0.5436, 0.0813, 0.1207, 0.2544]) -Greedy action tensor([ 1.2503, -0.2987, -0.2392, 0.4390]) tensor([0.5313, 0.1129, 0.1198, 0.2360]) -Greedy action tensor([ 2.3392, -0.9556, -0.3923, 0.8467]) tensor([0.7536, 0.0279, 0.0491, 0.1694]) -Greedy action tensor([ 1.2415, -0.1674, -0.3650, 0.1769]) tensor([0.5587, 0.1366, 0.1121, 0.1927]) -Greedy action tensor([ 1.1190, -0.4296, -0.3135, 0.4468]) tensor([0.5097, 0.1083, 0.1217, 0.2603]) -Greedy action tensor([ 1.5717, -0.3626, -0.3237, 0.3502]) tensor([0.6291, 0.0909, 0.0945, 0.1855]) -Greedy action tensor([ 1.6895, -0.5173, -0.2929, 0.2860]) tensor([0.6696, 0.0737, 0.0922, 0.1645]) -Greedy action tensor([ 1.5726, -0.3778, -0.1276, 0.8046]) tensor([0.5590, 0.0795, 0.1021, 0.2594]) -Greedy action tensor([ 1.2310, -0.1514, -0.5399, 0.3252]) tensor([0.5478, 0.1375, 0.0932, 0.2214]) -Greedy action tensor([ 0.9145, -0.3133, -0.1165, -0.0020]) tensor([0.4879, 0.1429, 0.1740, 0.1951]) -Greedy action tensor([ 1.8408, -0.3499, -0.3551, 0.4428]) tensor([0.6802, 0.0761, 0.0757, 0.1681]) -Greedy action tensor([ 1.5944, -0.2896, -0.4599, 0.3422]) tensor([0.6386, 0.0970, 0.0819, 0.1825]) -Greedy action tensor([ 1.0390, -0.4522, -0.3095, 0.5669]) tensor([0.4743, 0.1068, 0.1231, 0.2958]) -Greedy action tensor([ 1.0250, -0.4889, -0.2136, 0.2422]) tensor([0.5084, 0.1119, 0.1473, 0.2324]) -Greedy action tensor([ 1.4361, -0.6711, -0.1336, 0.0293]) tensor([0.6351, 0.0772, 0.1322, 0.1555]) -Greedy action tensor([ 1.7517, -0.9233, -0.1624, 0.1640]) tensor([0.7038, 0.0485, 0.1038, 0.1439]) -Greedy action tensor([ 1.4334, -0.1767, -0.6046, 0.4168]) tensor([0.5910, 0.1181, 0.0770, 0.2138]) -Greedy action tensor([ 1.9504, -0.7452, -0.6383, 1.2068]) tensor([0.6180, 0.0417, 0.0464, 0.2938]) -Greedy action tensor([ 1.8268, -1.1351, -0.3712, 0.6505]) tensor([0.6797, 0.0352, 0.0755, 0.2096]) -Greedy action tensor([ 1.3134, -0.5356, -0.3488, 0.2822]) tensor([0.5870, 0.0924, 0.1114, 0.2093]) -Greedy action tensor([ 0.8565, 0.0312, -0.2456, 0.0917]) tensor([0.4473, 0.1960, 0.1486, 0.2082]) -Greedy action tensor([ 0.9920, -0.5175, -0.7071, 1.1115]) tensor([0.3951, 0.0873, 0.0722, 0.4453]) -Greedy action tensor([ 1.0490, -0.6576, -0.3872, 0.5809]) tensor([0.4889, 0.0887, 0.1163, 0.3061]) -Greedy action tensor([ 0.8879, -0.4319, 0.0051, 0.2307]) tensor([0.4547, 0.1215, 0.1881, 0.2357]) -Greedy action tensor([ 0.8888, -0.2559, 0.2424, 0.0555]) tensor([0.4392, 0.1398, 0.2301, 0.1909]) -Greedy action tensor([ 1.8542, -0.8948, -0.4317, 0.5535]) tensor([0.6954, 0.0445, 0.0707, 0.1894]) -Greedy action tensor([ 1.0872, -0.0749, -0.2682, 0.2500]) tensor([0.4991, 0.1561, 0.1287, 0.2161]) -Greedy action tensor([ 1.7146, -0.6035, -0.3487, 0.6117]) tensor([0.6421, 0.0632, 0.0816, 0.2131]) -Greedy action tensor([ 0.6371, -0.1893, 0.1141, 0.0915]) tensor([0.3832, 0.1677, 0.2271, 0.2220]) -Greedy action tensor([ 1.1736, -0.5002, -0.2600, 0.2488]) tensor([0.5487, 0.1029, 0.1308, 0.2176]) -Greedy action tensor([ 1.1778, -0.5285, -0.1617, 0.0751]) tensor([0.5632, 0.1022, 0.1476, 0.1870]) -Greedy action tensor([ 1.2591, -0.5062, -0.1673, 0.3218]) tensor([0.5546, 0.0949, 0.1332, 0.2172]) -Greedy action tensor([ 1.0271, -0.3586, -0.1751, 0.3048]) tensor([0.4911, 0.1228, 0.1476, 0.2385]) -Greedy action tensor([ 0.9262, -0.2884, -0.1437, 0.0213]) tensor([0.4891, 0.1452, 0.1678, 0.1979]) -Greedy action tensor([ 1.5028, 0.0879, -0.4005, 0.3266]) tensor([0.5881, 0.1429, 0.0877, 0.1814]) -Greedy action tensor([ 1.3152, -0.0624, -0.2653, -0.1315]) tensor([0.5905, 0.1489, 0.1216, 0.1390]) -Greedy action tensor([ 0.9429, 0.0727, 0.1726, -0.1680]) tensor([0.4523, 0.1895, 0.2093, 0.1489]) -Greedy action tensor([ 0.8553, -0.3974, -0.3770, 0.2789]) tensor([0.4674, 0.1336, 0.1363, 0.2627]) -Greedy action tensor([ 1.1184, -0.4529, -0.1375, 0.3875]) tensor([0.5066, 0.1053, 0.1443, 0.2439]) -Greedy action tensor([ 1.8868, -0.1922, -0.2695, 0.4927]) tensor([0.6717, 0.0840, 0.0777, 0.1666]) -Greedy action tensor([ 1.6534, -0.6343, -0.5290, 0.4722]) tensor([0.6574, 0.0667, 0.0741, 0.2018]) -Greedy action tensor([ 2.0896, -0.7908, -0.3666, 0.9911]) tensor([0.6778, 0.0380, 0.0581, 0.2260]) -Greedy action tensor([ 1.3614, -0.4209, -0.3033, 0.3033]) tensor([0.5866, 0.0987, 0.1110, 0.2036]) -Greedy action tensor([ 1.2886, -0.4010, 0.9503, 1.5644]) tensor([0.3110, 0.0574, 0.2218, 0.4098]) -Greedy action tensor([ 0.8563, -0.5031, 0.4365, 0.1956]) tensor([0.4114, 0.1057, 0.2704, 0.2125]) -Greedy action tensor([ 1.2349, -0.3047, 0.1889, -0.0764]) tensor([0.5449, 0.1169, 0.1914, 0.1468]) -Greedy action tensor([0.6568, 0.5717, 0.5657, 0.7947]) tensor([0.2513, 0.2308, 0.2294, 0.2885]) -Greedy action tensor([0.3852, 0.4427, 0.0862, 1.3140]) tensor([0.1875, 0.1986, 0.1391, 0.4747]) -Greedy action tensor([ 1.2447, -0.6726, 0.6313, 0.5722]) tensor([0.4548, 0.0669, 0.2462, 0.2321]) -Greedy action tensor([-0.8678, -1.4338, 0.3162, 0.6112]) tensor([0.1084, 0.0616, 0.3542, 0.4758]) -Greedy action tensor([ 0.8394, -0.9677, 1.3161, 1.2710]) tensor([0.2318, 0.0380, 0.3733, 0.3569]) -Greedy action tensor([-0.0572, -0.0793, 0.8850, 0.1350]) tensor([0.1737, 0.1699, 0.4458, 0.2106]) -Greedy action tensor([-0.3308, -0.4419, -0.3894, 0.3506]) tensor([0.2077, 0.1859, 0.1959, 0.4105]) -Greedy action tensor([-0.2781, -0.8975, 0.0430, 0.0294]) tensor([0.2338, 0.1259, 0.3223, 0.3180]) -Greedy action tensor([-0.3714, -0.6569, -0.3338, 0.3985]) tensor([0.2020, 0.1519, 0.2098, 0.4363]) -Greedy action tensor([-0.4815, -1.9743, -0.1301, -0.4164]) tensor([0.2693, 0.0605, 0.3827, 0.2874]) -Greedy action tensor([ 1.9230, -2.0627, 0.3074, 0.4029]) tensor([0.6964, 0.0129, 0.1384, 0.1523]) -Greedy action tensor([ 1.4360, -0.2861, 0.4757, 0.7148]) tensor([0.4884, 0.0873, 0.1869, 0.2374]) -Greedy action tensor([-0.6912, -0.1097, 0.4813, 0.4143]) tensor([0.1106, 0.1979, 0.3573, 0.3342]) -Greedy action tensor([-0.5751, -0.4575, 0.8031, -0.7307]) tensor([0.1439, 0.1619, 0.5710, 0.1232]) -Greedy action tensor([ 0.5549, -1.2145, 0.7418, 1.4826]) tensor([0.2039, 0.0347, 0.2458, 0.5156]) -Greedy action tensor([-0.1359, 0.4346, -0.1669, -1.0172]) tensor([0.2408, 0.4260, 0.2334, 0.0998]) -Greedy action tensor([-0.0541, 0.5410, 0.2831, -0.1305]) tensor([0.1945, 0.3527, 0.2725, 0.1802]) -Greedy action tensor([ 1.3887, -0.7593, 0.2316, 1.5893]) tensor([0.3769, 0.0440, 0.1185, 0.4606]) -Greedy action tensor([ 1.7468, -0.6690, 0.0059, 1.8650]) tensor([0.4184, 0.0374, 0.0734, 0.4709]) -Greedy action tensor([-0.1776, -1.0110, 1.7804, -0.6135]) tensor([0.1091, 0.0474, 0.7729, 0.0706]) -Greedy action tensor([ 0.4864, -1.3389, -0.4597, 0.1912]) tensor([0.4360, 0.0703, 0.1693, 0.3245]) -Greedy action tensor([ 0.8522, -1.0535, 0.6063, 1.3749]) tensor([0.2764, 0.0411, 0.2162, 0.4663]) -Greedy action tensor([-0.7553, -0.3835, 0.3099, -0.6949]) tensor([0.1559, 0.2261, 0.4524, 0.1656]) -Greedy action tensor([1.4671, 0.4376, 0.3297, 0.7509]) tensor([0.4616, 0.1649, 0.1480, 0.2255]) -Greedy action tensor([ 0.9084, -2.1495, -0.0566, 1.0286]) tensor([0.3913, 0.0184, 0.1491, 0.4413]) -Greedy action tensor([1.2822, 0.8146, 0.5300, 0.0325]) tensor([0.4194, 0.2627, 0.1977, 0.1202]) -Greedy action tensor([-0.6365, -1.3537, 0.1146, -0.4908]) tensor([0.2099, 0.1025, 0.4448, 0.2428]) -Greedy action tensor([0.3569, 0.2902, 0.3873, 0.2349]) tensor([0.2596, 0.2429, 0.2677, 0.2298]) -Greedy action tensor([ 0.4034, 0.7127, -0.1043, -0.6087]) tensor([0.3005, 0.4094, 0.1809, 0.1092]) -Greedy action tensor([ 0.9981, -0.4843, 0.0390, 1.7561]) tensor([0.2671, 0.0606, 0.1024, 0.5699]) -Greedy action tensor([-0.2438, -1.0119, 0.3618, 0.5002]) tensor([0.1852, 0.0859, 0.3393, 0.3896]) -Greedy action tensor([-0.5164, 0.1674, -0.7566, 0.7812]) tensor([0.1346, 0.2667, 0.1059, 0.4928]) -Greedy action tensor([ 1.5614, -1.2167, 1.1992, 0.6616]) tensor([0.4619, 0.0287, 0.3216, 0.1878]) -Greedy action tensor([-0.0719, -0.6317, -1.0058, 1.8917]) tensor([0.1100, 0.0629, 0.0432, 0.7839]) -Greedy action tensor([ 1.2088, 0.7423, 0.1586, -0.3602]) tensor([0.4576, 0.2870, 0.1601, 0.0953]) -Greedy action tensor([ 1.1803, -0.2340, 1.1505, 0.3720]) tensor([0.3760, 0.0914, 0.3650, 0.1676]) -Greedy action tensor([ 0.8200, -0.3739, 0.3739, 0.2079]) tensor([0.4023, 0.1219, 0.2576, 0.2182]) -Greedy action tensor([-1.0035, -2.5188, -0.1936, 0.9562]) tensor([0.0947, 0.0208, 0.2128, 0.6718]) -Greedy action tensor([-0.4002, -1.7883, -0.3865, 0.2008]) tensor([0.2447, 0.0611, 0.2480, 0.4462]) -Greedy action tensor([ 0.1994, 0.2434, -0.4003, -0.0187]) tensor([0.2943, 0.3075, 0.1616, 0.2366]) -Greedy action tensor([-0.5781, 0.0998, 0.7304, 0.7552]) tensor([0.0956, 0.1882, 0.3537, 0.3625]) -Greedy action tensor([-0.7857, -0.3358, 0.2525, -0.4854]) tensor([0.1483, 0.2326, 0.4188, 0.2003]) -Greedy action tensor([ 1.1943, -0.0315, 1.5560, 0.5279]) tensor([0.3084, 0.0905, 0.4428, 0.1584]) -Greedy action tensor([ 1.3685, -0.0506, 0.4802, 0.5466]) tensor([0.4778, 0.1156, 0.1965, 0.2100]) -Greedy action tensor([ 0.8861, -1.0799, 0.9297, 0.9646]) tensor([0.3062, 0.0429, 0.3198, 0.3312]) -Greedy action tensor([-0.3890, -0.1438, 0.9092, -0.4322]) tensor([0.1450, 0.1852, 0.5310, 0.1388]) -Greedy action tensor([ 0.9677, -0.3661, 0.3954, 0.2755]) tensor([0.4295, 0.1132, 0.2423, 0.2150]) -Greedy action tensor([ 0.9907, -0.2358, 0.8474, 1.3088]) tensor([0.2829, 0.0830, 0.2452, 0.3889]) -Greedy action tensor([ 0.9643, 0.3556, -0.3389, 0.4449]) tensor([0.4148, 0.2257, 0.1127, 0.2468]) -Greedy action tensor([ 2.0464e-01, -1.2433e+00, 7.5713e-01, -1.1059e-03]) tensor([0.2641, 0.0621, 0.4589, 0.2150]) -Greedy action tensor([ 0.5431, 0.2998, -0.7473, 0.6186]) tensor([0.3187, 0.2499, 0.0877, 0.3437]) -Greedy action tensor([ 1.0161, -1.3221, 0.6790, 1.1085]) tensor([0.3440, 0.0332, 0.2455, 0.3773]) -Greedy action tensor([-0.6273, 0.5460, 1.6267, -0.8969]) tensor([0.0689, 0.2226, 0.6559, 0.0526]) -Greedy action tensor([-0.5335, -0.5583, -0.8233, -0.4527]) tensor([0.2626, 0.2562, 0.1965, 0.2847]) -Greedy action tensor([-0.5681, 0.4111, -0.2632, 0.6639]) tensor([0.1184, 0.3152, 0.1606, 0.4058]) -Greedy action tensor([ 1.3359, -1.2128, 0.1197, 1.2159]) tensor([0.4422, 0.0346, 0.1310, 0.3922]) -Greedy action tensor([-1.4622, -1.1230, 1.7321, 0.2253]) tensor([0.0311, 0.0436, 0.7575, 0.1679]) -Greedy action tensor([-0.0116, -1.4644, 0.5358, 0.9850]) tensor([0.1763, 0.0412, 0.3048, 0.4776]) -Greedy action tensor([ 0.1277, -1.4834, 0.9152, 0.1710]) tensor([0.2251, 0.0450, 0.4948, 0.2351]) -Greedy action tensor([-0.1077, -2.8400, -0.3869, -0.1635]) tensor([0.3614, 0.0235, 0.2734, 0.3418]) -Greedy action tensor([ 0.6616, -0.2624, 0.6017, 0.6158]) tensor([0.3036, 0.1205, 0.2859, 0.2900]) -Greedy action tensor([ 1.4192, -0.9709, -0.5144, 1.6308]) tensor([0.4046, 0.0371, 0.0585, 0.4999]) -Greedy action tensor([ 0.2221, -0.1490, 0.1216, 0.2414]) tensor([0.2767, 0.1909, 0.2503, 0.2821]) -Greedy action tensor([0.0709, 1.0852, 0.3893, 0.4250]) tensor([0.1525, 0.4205, 0.2097, 0.2173]) -Greedy action tensor([ 1.2702, 0.3131, -0.5306, 1.3709]) tensor([0.3766, 0.1446, 0.0622, 0.4165]) -Greedy action tensor([-0.6096, -1.3892, -0.3422, -0.3714]) tensor([0.2479, 0.1137, 0.3239, 0.3146]) -Greedy action tensor([0.5806, 1.1623, 1.1012, 0.6040]) tensor([0.1820, 0.3255, 0.3062, 0.1863]) -Greedy action tensor([ 0.3574, -0.2578, 1.4213, 0.0212]) tensor([0.1941, 0.1049, 0.5623, 0.1387]) -Greedy action tensor([-0.5561, -0.4114, 1.7204, -0.6989]) tensor([0.0783, 0.0905, 0.7632, 0.0679]) -Greedy action tensor([-0.5715, -0.7503, -0.5481, -0.0630]) tensor([0.2211, 0.1849, 0.2263, 0.3676]) -Greedy action tensor([ 0.0976, -1.1643, 0.2728, -0.5781]) tensor([0.3352, 0.0949, 0.3994, 0.1705]) -Greedy action tensor([ 0.1781, -0.2135, 0.2787, 1.4867]) tensor([0.1543, 0.1043, 0.1706, 0.5709]) -Greedy action tensor([-0.7104, 0.5542, 1.5947, 0.3769]) tensor([0.0570, 0.2020, 0.5718, 0.1692]) -Greedy action tensor([ 1.2021, -1.0518, 1.5002, 0.7537]) tensor([0.3235, 0.0340, 0.4359, 0.2066]) -Greedy action tensor([1.2273, 0.2000, 0.4561, 0.4264]) tensor([0.4406, 0.1577, 0.2038, 0.1978]) -Greedy action tensor([ 0.0720, -0.8566, -0.0597, -0.3087]) tensor([0.3384, 0.1337, 0.2966, 0.2313]) -Greedy action tensor([ 0.1353, -1.5438, 1.6461, -0.5069]) tensor([0.1602, 0.0299, 0.7257, 0.0843]) -Greedy action tensor([ 0.0234, 0.2146, -0.0851, 0.5363]) tensor([0.2093, 0.2534, 0.1878, 0.3495]) -Greedy action tensor([ 0.0984, -2.3594, 0.4820, 0.5053]) tensor([0.2466, 0.0211, 0.3619, 0.3704]) -Greedy action tensor([-1.8887, -0.4338, 0.6369, -0.1412]) tensor([0.0425, 0.1821, 0.5313, 0.2440]) -Greedy action tensor([-1.8778, -0.4480, 0.6275, -0.1490]) tensor([0.0434, 0.1812, 0.5311, 0.2443]) -Greedy action tensor([-1.7900, -0.4201, 0.5971, -0.0586]) tensor([0.0466, 0.1833, 0.5070, 0.2631]) -Greedy action tensor([-1.5234, -0.5848, 0.5702, 0.0893]) tensor([0.0599, 0.1532, 0.4863, 0.3006]) -Greedy action tensor([-0.8201, 0.7059, 0.4744, 1.1465]) tensor([0.0610, 0.2806, 0.2226, 0.4359]) -Greedy action tensor([-9.6462e-01, -5.0715e-04, 2.2226e-01, 9.6116e-01]) tensor([0.0727, 0.1906, 0.2381, 0.4986]) -Greedy action tensor([-1.0988, -0.5872, 0.2399, 0.2888]) tensor([0.0954, 0.1590, 0.3637, 0.3819]) -Greedy action tensor([-1.6503, -0.4644, 1.2620, 0.9927]) tensor([0.0272, 0.0891, 0.5010, 0.3827]) -Greedy action tensor([-0.7347, -0.5110, 0.3412, 1.2222]) tensor([0.0816, 0.1020, 0.2392, 0.5772]) -Greedy action tensor([-1.8618, -0.4659, 0.6226, -0.1431]) tensor([0.0442, 0.1786, 0.5305, 0.2467]) -Greedy action tensor([-1.5716, 0.0837, 0.3950, -0.0229]) tensor([0.0553, 0.2894, 0.3951, 0.2602]) -Greedy action tensor([-1.9178, -0.4374, 0.6566, -0.1570]) tensor([0.0411, 0.1806, 0.5393, 0.2390]) -Greedy action tensor([-0.8921, -0.2988, 0.4508, -0.2500]) tensor([0.1171, 0.2119, 0.4485, 0.2225]) -Greedy action tensor([-1.8919, -0.3313, 0.6391, -0.1265]) tensor([0.0414, 0.1970, 0.5199, 0.2418]) -Greedy action tensor([-1.7424, -0.5032, 0.6062, -0.0153]) tensor([0.0487, 0.1680, 0.5096, 0.2737]) -Greedy action tensor([-1.7423, -0.3657, 0.5488, -0.0638]) tensor([0.0495, 0.1961, 0.4893, 0.2652]) -Greedy action tensor([-1.8568, -0.4776, 0.6363, -0.1214]) tensor([0.0440, 0.1746, 0.5320, 0.2494]) -Greedy action tensor([-0.7008, 0.5305, 0.1607, -0.1258]) tensor([0.1167, 0.3997, 0.2762, 0.2074]) -Greedy action tensor([-1.2635, -0.5946, 0.5467, 0.5495]) tensor([0.0658, 0.1285, 0.4023, 0.4034]) -Greedy action tensor([-1.7854, -0.4537, 0.6150, -0.0369]) tensor([0.0464, 0.1757, 0.5115, 0.2665]) -Greedy action tensor([-1.8403, -0.4858, 1.0183, 0.4222]) tensor([0.0313, 0.1214, 0.5463, 0.3010]) -Greedy action tensor([-1.9082, -0.4336, 0.6450, -0.1658]) tensor([0.0418, 0.1826, 0.5369, 0.2387]) -Greedy action tensor([-1.8121, -0.4465, 0.6003, -0.1016]) tensor([0.0463, 0.1813, 0.5164, 0.2560]) -Greedy action tensor([-0.7793, -0.5713, 0.3953, 0.1074]) tensor([0.1267, 0.1559, 0.4100, 0.3074]) -Greedy action tensor([-1.8393, -0.4790, 0.6440, -0.0682]) tensor([0.0439, 0.1713, 0.5265, 0.2583]) -Greedy action tensor([-1.2094, -0.4518, 0.6217, -0.5520]) tensor([0.0885, 0.1887, 0.5521, 0.1707]) -Greedy action tensor([-1.5023, -0.3691, 0.6625, 0.4464]) tensor([0.0504, 0.1565, 0.4392, 0.3538]) -Greedy action tensor([-1.6669, -0.5690, 0.5178, -0.0338]) tensor([0.0555, 0.1665, 0.4936, 0.2843]) -Greedy action tensor([-0.0805, 0.1722, 0.9316, 1.6925]) tensor([0.0915, 0.1178, 0.2518, 0.5389]) -Greedy action tensor([-0.5110, -0.4553, 0.1680, -0.0288]) tensor([0.1770, 0.1872, 0.3491, 0.2867]) -Greedy action tensor([-0.9551, -0.6192, 0.1479, 0.6073]) tensor([0.0982, 0.1374, 0.2959, 0.4685]) -Greedy action tensor([-0.7662, 0.8484, 0.0675, 0.1647]) tensor([0.0920, 0.4626, 0.2119, 0.2335]) -Greedy action tensor([-0.9649, -0.5914, 0.6549, 0.5348]) tensor([0.0834, 0.1212, 0.4215, 0.3738]) -Greedy action tensor([-1.8428, -0.4695, 0.6196, -0.1624]) tensor([0.0454, 0.1791, 0.5321, 0.2435]) -Greedy action tensor([-1.3222, -0.6006, 1.2368, 1.2292]) tensor([0.0347, 0.0714, 0.4486, 0.4452]) -Greedy action tensor([-1.8703, -0.3812, 0.6510, -0.1109]) tensor([0.0422, 0.1872, 0.5254, 0.2452]) -Greedy action tensor([-0.4002, 0.1850, 0.8708, 1.4248]) tensor([0.0796, 0.1429, 0.2837, 0.4937]) -Greedy action tensor([-1.8338, -0.4301, 0.9891, 0.4823]) tensor([0.0312, 0.1271, 0.5253, 0.3164]) -Greedy action tensor([-1.5109, -0.6082, 0.4466, 0.0369]) tensor([0.0656, 0.1617, 0.4644, 0.3083]) -Greedy action tensor([-1.9523, -0.8310, 0.5399, -0.2442]) tensor([0.0461, 0.1416, 0.5577, 0.2546]) -Greedy action tensor([-1.9242, -0.4457, 0.6522, -0.1713]) tensor([0.0411, 0.1805, 0.5410, 0.2374]) -Greedy action tensor([-1.5325, -0.5647, 0.7515, -0.5314]) tensor([0.0618, 0.1628, 0.6071, 0.1683]) -Greedy action tensor([-1.8825, -0.3856, 0.6267, -0.1358]) tensor([0.0426, 0.1901, 0.5232, 0.2441]) -Greedy action tensor([-1.0322, -0.5482, 0.2754, 0.0589]) tensor([0.1076, 0.1745, 0.3977, 0.3202]) -Greedy action tensor([-1.9042, -0.4569, 0.6521, -0.1527]) tensor([0.0418, 0.1779, 0.5392, 0.2411]) -Greedy action tensor([-0.8974, -0.5470, 0.2559, 0.1131]) tensor([0.1200, 0.1703, 0.3802, 0.3296]) -Greedy action tensor([-1.8822, -0.4611, 0.6385, -0.1313]) tensor([0.0428, 0.1775, 0.5329, 0.2468]) -Greedy action tensor([-1.8655, -0.4168, 0.6191, -0.1301]) tensor([0.0436, 0.1857, 0.5233, 0.2474]) -Greedy action tensor([-0.4447, -0.5234, 0.4037, 0.0431]) tensor([0.1698, 0.1570, 0.3967, 0.2766]) -Greedy action tensor([-1.9242, -0.4227, 0.6485, -0.1663]) tensor([0.0410, 0.1840, 0.5371, 0.2378]) -Greedy action tensor([-1.8680, -0.4731, 0.6544, -0.0949]) tensor([0.0428, 0.1725, 0.5328, 0.2519]) -Greedy action tensor([-1.6486, -0.2043, 0.6985, 0.1330]) tensor([0.0462, 0.1959, 0.4833, 0.2745]) -Greedy action tensor([-0.4214, -0.6783, 0.3315, 0.4883]) tensor([0.1567, 0.1212, 0.3328, 0.3893]) -Greedy action tensor([-0.8793, 0.1379, 0.0606, -0.4499]) tensor([0.1272, 0.3518, 0.3256, 0.1954]) -Greedy action tensor([-1.5783, -0.5392, 0.4831, 0.1195]) tensor([0.0583, 0.1649, 0.4582, 0.3186]) -Greedy action tensor([-1.9337, -0.4476, 0.6600, -0.1759]) tensor([0.0407, 0.1797, 0.5439, 0.2358]) -Greedy action tensor([-1.7452, -0.4038, 0.5571, -0.0337]) tensor([0.0491, 0.1878, 0.4910, 0.2720]) -Greedy action tensor([-1.7268, -0.0238, 0.5108, -0.1310]) tensor([0.0481, 0.2640, 0.4507, 0.2372]) -Greedy action tensor([-1.6910, -0.2003, 0.5031, -0.0635]) tensor([0.0513, 0.2277, 0.4600, 0.2610]) -Greedy action tensor([-1.8400, -0.4347, 0.6097, -0.1325]) tensor([0.0451, 0.1838, 0.5224, 0.2487]) -Greedy action tensor([-1.7041, -0.3735, 0.5247, -0.0199]) tensor([0.0514, 0.1944, 0.4773, 0.2769]) -Greedy action tensor([-1.4089, -0.8510, 0.3757, 0.2941]) tensor([0.0704, 0.1231, 0.4197, 0.3868]) -Greedy action tensor([-1.7157, -0.5061, 0.7861, 0.2840]) tensor([0.0418, 0.1400, 0.5097, 0.3085]) -Greedy action tensor([-1.9127, -0.4253, 0.6604, -0.1407]) tensor([0.0410, 0.1813, 0.5368, 0.2409]) -Greedy action tensor([-1.7529, -0.3505, 0.5757, -0.0476]) tensor([0.0480, 0.1951, 0.4927, 0.2642]) -Greedy action tensor([-1.8181, -0.5833, 1.2442, 0.6258]) tensor([0.0268, 0.0921, 0.5726, 0.3085]) -Greedy action tensor([-1.6613, -0.4862, 0.5288, 0.0107]) tensor([0.0541, 0.1751, 0.4831, 0.2878]) -Greedy action tensor([-1.9472, -0.4522, 0.6687, -0.1816]) tensor([0.0400, 0.1785, 0.5475, 0.2340]) -Greedy action tensor([-1.6489, -0.5113, 0.6035, 0.1350]) tensor([0.0511, 0.1593, 0.4856, 0.3040]) -Greedy action tensor([-1.7826, -0.4927, 0.5993, -0.0624]) tensor([0.0475, 0.1726, 0.5144, 0.2654]) -Greedy action tensor([-2.0097, -0.7766, 0.9607, 0.0206]) tensor([0.0317, 0.1088, 0.6181, 0.2414]) -Greedy action tensor([-1.3688, -0.5476, 0.3489, 0.2426]) tensor([0.0722, 0.1641, 0.4021, 0.3616]) -Greedy action tensor([-1.4700, -0.2998, 0.6332, 0.4718]) tensor([0.0516, 0.1662, 0.4226, 0.3596]) -Greedy action tensor([-1.1283, -0.6340, 0.5720, 0.9168]) tensor([0.0631, 0.1035, 0.3456, 0.4879]) -Greedy action tensor([-0.8993, -0.5981, 0.1767, 0.3348]) tensor([0.1147, 0.1550, 0.3364, 0.3940]) -Greedy action tensor([-1.2122, -0.5828, 0.3191, 0.2223]) tensor([0.0855, 0.1604, 0.3953, 0.3588]) -Greedy action tensor([-1.6577, -0.4669, 0.6552, 0.1737]) tensor([0.0485, 0.1594, 0.4896, 0.3025]) -Greedy action tensor([-1.5596, -0.5338, 0.4604, 0.0542]) tensor([0.0612, 0.1706, 0.4611, 0.3071]) -Greedy action tensor([-1.8288, -0.3506, 0.5842, -0.1189]) tensor([0.0453, 0.1986, 0.5058, 0.2504]) -Greedy action tensor([-1.6083, -0.5123, 0.5206, 0.1168]) tensor([0.0555, 0.1661, 0.4667, 0.3116]) -Greedy action tensor([-1.6305, -0.4738, 0.5964, 0.2935]) tensor([0.0493, 0.1566, 0.4567, 0.3374]) -Greedy action tensor([ 0.7233, -0.6145, 0.0789, -0.2194]) tensor([0.4594, 0.1205, 0.2412, 0.1789]) -Greedy action tensor([ 0.9677, -0.6456, 0.0707, -0.4794]) tensor([0.5428, 0.1081, 0.2213, 0.1277]) -Greedy action tensor([ 0.8129, -0.5386, 0.0901, -0.1547]) tensor([0.4708, 0.1219, 0.2285, 0.1789]) -Greedy action tensor([ 0.7690, -0.3963, -0.0822, -0.2647]) tensor([0.4775, 0.1489, 0.2038, 0.1698]) -Greedy action tensor([ 0.8830, -0.7918, -0.0163, -0.3961]) tensor([0.5340, 0.1001, 0.2173, 0.1486]) -Greedy action tensor([ 0.6999, -0.3173, 0.1441, -0.6653]) tensor([0.4565, 0.1651, 0.2619, 0.1166]) -Greedy action tensor([ 0.2185, -0.1536, 0.1680, -0.3008]) tensor([0.3091, 0.2131, 0.2939, 0.1839]) -Greedy action tensor([ 0.7090, -0.3942, 0.0341, -0.5460]) tensor([0.4703, 0.1561, 0.2395, 0.1341]) -Greedy action tensor([ 0.7515, -0.4234, -0.0877, -0.3513]) tensor([0.4824, 0.1490, 0.2084, 0.1601]) -Greedy action tensor([ 0.5985, -0.6168, 0.1586, -0.2918]) tensor([0.4253, 0.1262, 0.2739, 0.1746]) -Greedy action tensor([ 0.9060, -0.0479, 0.1632, -0.7133]) tensor([0.4857, 0.1871, 0.2311, 0.0962]) -Greedy action tensor([ 0.4443, -0.2613, -0.0473, -0.0679]) tensor([0.3697, 0.1826, 0.2262, 0.2215]) -Greedy action tensor([ 0.3507, 0.2403, -0.0555, -0.2104]) tensor([0.3193, 0.2859, 0.2127, 0.1822]) -Greedy action tensor([ 0.8991, -0.8192, -0.1626, -0.6943]) tensor([0.5786, 0.1038, 0.2001, 0.1176]) -Greedy action tensor([ 0.4327, -0.1402, -0.0080, -0.1276]) tensor([0.3599, 0.2030, 0.2316, 0.2055]) -Greedy action tensor([ 0.5267, -0.1152, 0.0039, -0.9033]) tensor([0.4240, 0.2231, 0.2514, 0.1015]) -Greedy action tensor([ 0.4129, -0.2564, -0.0253, -0.3413]) tensor([0.3806, 0.1949, 0.2455, 0.1790]) -Greedy action tensor([ 0.7335, -0.1866, 0.0239, -0.1225]) tensor([0.4319, 0.1721, 0.2124, 0.1835]) -Greedy action tensor([ 0.5762, -0.0008, 0.0083, -0.5053]) tensor([0.4053, 0.2276, 0.2297, 0.1374]) -Greedy action tensor([ 0.6391, -0.5074, -0.1180, -0.2114]) tensor([0.4517, 0.1435, 0.2118, 0.1930]) -Greedy action tensor([ 0.9626, -0.8009, -0.1475, -0.5477]) tensor([0.5808, 0.0996, 0.1914, 0.1283]) -Greedy action tensor([ 0.8160, -0.6415, -0.0771, -0.3089]) tensor([0.5084, 0.1184, 0.2081, 0.1651]) -Greedy action tensor([ 0.5702, -0.3243, -0.0531, -0.4533]) tensor([0.4340, 0.1774, 0.2327, 0.1559]) -Greedy action tensor([ 0.7055, -0.2715, 0.0032, -0.0224]) tensor([0.4247, 0.1599, 0.2104, 0.2051]) -Greedy action tensor([ 0.4140, -0.2112, -0.0656, -0.1527]) tensor([0.3674, 0.1966, 0.2274, 0.2085]) -Greedy action tensor([ 0.6843, -0.6459, -0.1321, -0.2788]) tensor([0.4789, 0.1266, 0.2117, 0.1828]) -Greedy action tensor([ 0.9727, -0.2802, -0.1048, -0.3324]) tensor([0.5271, 0.1506, 0.1794, 0.1429]) -Greedy action tensor([ 0.3788, -0.0984, -0.0874, -0.1218]) tensor([0.3504, 0.2174, 0.2198, 0.2124]) -Greedy action tensor([ 0.5811, -0.5021, -0.0814, -0.4424]) tensor([0.4518, 0.1529, 0.2329, 0.1624]) -Greedy action tensor([ 0.5663, -0.0575, -0.0815, -0.1115]) tensor([0.3896, 0.2088, 0.2038, 0.1978]) -Greedy action tensor([ 0.8752, -0.6006, -0.1178, -0.4891]) tensor([0.5392, 0.1233, 0.1998, 0.1378]) -Greedy action tensor([ 0.6807, -0.1581, -0.0269, -0.2089]) tensor([0.4281, 0.1850, 0.2110, 0.1759]) -Greedy action tensor([ 0.6279, -0.4449, -0.0055, -0.1489]) tensor([0.4287, 0.1466, 0.2275, 0.1971]) -Greedy action tensor([ 0.6677, -0.4380, -0.0527, -0.2639]) tensor([0.4522, 0.1497, 0.2200, 0.1781]) -Greedy action tensor([ 0.3888, 0.1076, -0.1239, -0.1915]) tensor([0.3432, 0.2591, 0.2056, 0.1921]) -Greedy action tensor([ 0.4692, -0.2049, -0.0631, -0.4514]) tensor([0.4008, 0.2042, 0.2354, 0.1596]) -Greedy action tensor([ 0.5455, -0.3441, -0.2964, -0.3498]) tensor([0.4444, 0.1826, 0.1915, 0.1815]) -Greedy action tensor([ 0.5161, 0.0480, 0.0821, -0.2398]) tensor([0.3645, 0.2282, 0.2361, 0.1712]) -Greedy action tensor([ 0.4730, -0.0418, -0.0984, -0.0536]) tensor([0.3632, 0.2171, 0.2051, 0.2145]) -Greedy action tensor([ 0.6911, -0.2910, -0.1204, -0.5716]) tensor([0.4758, 0.1782, 0.2114, 0.1346]) -Greedy action tensor([ 0.6272, -0.1997, -0.1063, -0.3025]) tensor([0.4325, 0.1892, 0.2077, 0.1707]) -Greedy action tensor([ 0.3454, -0.0050, -0.0224, -0.1369]) tensor([0.3318, 0.2337, 0.2297, 0.2048]) -Greedy action tensor([ 0.4412, -0.3415, 0.0016, -0.5010]) tensor([0.4014, 0.1835, 0.2586, 0.1565]) -Greedy action tensor([ 0.7121, -0.4102, -0.0256, -0.6171]) tensor([0.4834, 0.1574, 0.2312, 0.1280]) -Greedy action tensor([ 0.6899, -0.5993, -0.1061, -0.5150]) tensor([0.4935, 0.1360, 0.2226, 0.1479]) -Greedy action tensor([ 0.6880, -0.0750, -0.0705, -0.2027]) tensor([0.4264, 0.1988, 0.1997, 0.1750]) -Greedy action tensor([ 0.2392, -0.0378, -0.0335, -0.3905]) tensor([0.3276, 0.2484, 0.2494, 0.1746]) -Greedy action tensor([ 0.3499, -0.1344, -0.0698, -0.1036]) tensor([0.3438, 0.2118, 0.2259, 0.2185]) -Greedy action tensor([ 0.5734, -0.7024, -0.0588, -0.1396]) tensor([0.4346, 0.1214, 0.2310, 0.2130]) -Greedy action tensor([ 0.8349, -0.3347, 0.1348, -0.3877]) tensor([0.4758, 0.1477, 0.2363, 0.1401]) -Greedy action tensor([ 0.8290, -0.7130, 0.0495, -0.3747]) tensor([0.5069, 0.1085, 0.2325, 0.1521]) -Greedy action tensor([ 0.6635, -0.5772, 0.0145, -0.5675]) tensor([0.4753, 0.1375, 0.2484, 0.1388]) -Greedy action tensor([ 0.5995, -0.4760, 0.0374, -0.3234]) tensor([0.4332, 0.1478, 0.2469, 0.1721]) -Greedy action tensor([ 0.6949, -0.5394, -0.0656, -0.2978]) tensor([0.4697, 0.1367, 0.2195, 0.1741]) -Greedy action tensor([ 0.7682, -0.5095, -0.0334, -0.2639]) tensor([0.4799, 0.1338, 0.2153, 0.1710]) -Greedy action tensor([ 0.8978, -0.7373, 0.1957, -0.4162]) tensor([0.5104, 0.0995, 0.2529, 0.1372]) -Greedy action tensor([ 0.7862, -0.4833, -0.1224, -0.4244]) tensor([0.5045, 0.1418, 0.2034, 0.1504]) -Greedy action tensor([ 0.2184, -0.0087, -0.1075, -0.3443]) tensor([0.3238, 0.2580, 0.2337, 0.1844]) -Greedy action tensor([ 0.8299, -0.6800, 0.0227, -0.3967]) tensor([0.5101, 0.1127, 0.2276, 0.1496]) -Greedy action tensor([ 0.8336, -0.8215, -0.0322, -0.3571]) tensor([0.5220, 0.0997, 0.2196, 0.1587]) -Greedy action tensor([ 0.2901, 0.0479, 0.1135, -0.0937]) tensor([0.3026, 0.2375, 0.2537, 0.2062]) -Greedy action tensor([ 0.4884, -0.0779, -0.0388, -0.3204]) tensor([0.3841, 0.2180, 0.2267, 0.1711]) -Greedy action tensor([ 0.6629, -0.6555, 0.0065, -0.2910]) tensor([0.4605, 0.1232, 0.2389, 0.1774]) -Greedy action tensor([ 0.4894, -0.1538, -0.1077, -0.3696]) tensor([0.4001, 0.2103, 0.2202, 0.1695]) -Greedy action tensor([ 0.5234, -0.0346, -0.0242, -0.0580]) tensor([0.3690, 0.2112, 0.2134, 0.2063]) -Greedy action tensor([ 0.7238, -0.6725, -0.0904, -0.2154]) tensor([0.4804, 0.1189, 0.2128, 0.1878]) -Greedy action tensor([ 0.2729, -0.0953, 0.1258, -0.1434]) tensor([0.3111, 0.2152, 0.2685, 0.2052]) -Greedy action tensor([ 0.4305, -0.4605, -0.1193, -0.1732]) tensor([0.3946, 0.1619, 0.2277, 0.2158]) -Greedy action tensor([ 0.9036, -0.4814, -0.1249, -0.2610]) tensor([0.5209, 0.1304, 0.1862, 0.1625]) -Greedy action tensor([ 1.0285, -0.7984, -0.0886, -0.5855]) tensor([0.5927, 0.0954, 0.1939, 0.1180]) -Greedy action tensor([ 0.1873, 0.0647, -0.0999, -0.4288]) tensor([0.3150, 0.2786, 0.2363, 0.1701]) -Greedy action tensor([ 0.2314, -0.1219, -0.1160, -0.2605]) tensor([0.3311, 0.2326, 0.2339, 0.2024]) -Greedy action tensor([ 7.1352e-01, -3.6852e-01, 6.6863e-04, -2.8990e-01]) tensor([0.4554, 0.1543, 0.2233, 0.1670]) -Greedy action tensor([ 0.6397, -0.2522, -0.1074, -0.0935]) tensor([0.4230, 0.1734, 0.2004, 0.2032]) -Greedy action tensor([ 0.3473, -0.0821, -0.0642, -0.1676]) tensor([0.3435, 0.2236, 0.2276, 0.2053]) -Greedy action tensor([ 0.4410, -0.4206, -0.1960, -0.2991]) tensor([0.4118, 0.1740, 0.2178, 0.1965]) -Greedy action tensor([ 1.0994, -0.9457, 0.0325, -0.4483]) tensor([0.5930, 0.0767, 0.2041, 0.1262]) -Greedy action tensor([ 0.7149, -0.5807, -0.1112, -0.5084]) tensor([0.4986, 0.1365, 0.2183, 0.1467]) -Greedy action tensor([ 0.7920, -0.5910, -0.1509, -0.5724]) tensor([0.5275, 0.1323, 0.2054, 0.1348]) -Greedy action tensor([ 0.5605, -0.5579, -0.1113, -0.2833]) tensor([0.4410, 0.1441, 0.2253, 0.1897]) -Greedy action tensor([ 0.8482, -0.4399, -0.0857, -0.1907]) tensor([0.4944, 0.1363, 0.1943, 0.1749]) -Greedy action tensor([ 0.3645, -0.3922, -0.0942, 0.0857]) tensor([0.3499, 0.1642, 0.2212, 0.2648]) -Greedy action tensor([ 1.5103, -0.3935, -0.2734, 0.3982]) tensor([0.6076, 0.0905, 0.1021, 0.1998]) -Greedy action tensor([ 1.4102, -0.6350, -0.2814, 0.2324]) tensor([0.6167, 0.0798, 0.1136, 0.1899]) -Greedy action tensor([ 1.2827, -0.6527, -0.4865, 0.3994]) tensor([0.5786, 0.0835, 0.0986, 0.2392]) -Greedy action tensor([ 0.2531, -0.3699, 0.0918, 0.0016]) tensor([0.3159, 0.1695, 0.2689, 0.2457]) -Greedy action tensor([ 1.7261, -0.9140, -0.5612, 0.2136]) tensor([0.7178, 0.0512, 0.0729, 0.1582]) -Greedy action tensor([ 1.1651, -0.3983, -0.3674, 0.3482]) tensor([0.5355, 0.1122, 0.1157, 0.2366]) -Greedy action tensor([ 2.5991, -1.1951, -0.0243, 0.9693]) tensor([0.7746, 0.0174, 0.0562, 0.1518]) -Greedy action tensor([ 0.9168, -0.2738, -0.0821, 0.0393]) tensor([0.4789, 0.1456, 0.1764, 0.1991]) -Greedy action tensor([ 1.2946, -0.1064, 0.1432, -0.2213]) tensor([0.5611, 0.1382, 0.1774, 0.1232]) -Greedy action tensor([ 1.1717, -0.3370, -0.1871, 0.2093]) tensor([0.5376, 0.1189, 0.1381, 0.2054]) -Greedy action tensor([ 1.0138, -0.3586, -0.2320, 0.3153]) tensor([0.4906, 0.1244, 0.1411, 0.2440]) -Greedy action tensor([ 1.7033, -0.9551, -0.2646, 0.2266]) tensor([0.6953, 0.0487, 0.0972, 0.1588]) -Greedy action tensor([ 0.9240, 0.0487, -0.3850, 0.0971]) tensor([0.4708, 0.1962, 0.1271, 0.2059]) -Greedy action tensor([ 1.4241, -0.0780, -0.6533, 0.2745]) tensor([0.6007, 0.1338, 0.0752, 0.1903]) -Greedy action tensor([ 1.4531, -0.5137, -0.1021, 0.5568]) tensor([0.5685, 0.0795, 0.1200, 0.2320]) -Greedy action tensor([ 1.2404, -0.1071, -0.7137, -0.0368]) tensor([0.5951, 0.1547, 0.0843, 0.1659]) -Greedy action tensor([ 0.6603, -0.2401, -0.3036, 0.3451]) tensor([0.3972, 0.1614, 0.1515, 0.2898]) -Greedy action tensor([ 1.7331, 0.0900, -0.3278, 0.4097]) tensor([0.6301, 0.1219, 0.0802, 0.1678]) -Greedy action tensor([ 1.4314, -0.4071, -0.0582, 0.3296]) tensor([0.5825, 0.0926, 0.1313, 0.1935]) -Greedy action tensor([ 1.0706, -0.3512, -0.4811, 0.5177]) tensor([0.4930, 0.1189, 0.1045, 0.2836]) -Greedy action tensor([ 1.8463, -0.6456, -0.1915, 0.3702]) tensor([0.6937, 0.0574, 0.0904, 0.1585]) -Greedy action tensor([ 1.3947, -0.3883, 0.0366, 0.5195]) tensor([0.5429, 0.0913, 0.1396, 0.2262]) -Greedy action tensor([ 1.6253, -0.5064, -0.3115, 0.6116]) tensor([0.6151, 0.0730, 0.0887, 0.2232]) -Greedy action tensor([ 1.6377, -0.7588, -0.3987, 0.6653]) tensor([0.6251, 0.0569, 0.0816, 0.2364]) -Greedy action tensor([ 1.4473, -0.3544, -0.3714, 0.2498]) tensor([0.6138, 0.1013, 0.0996, 0.1853]) -Greedy action tensor([ 1.4326, -0.5637, -0.4344, 0.7970]) tensor([0.5494, 0.0746, 0.0849, 0.2910]) -Greedy action tensor([ 0.9282, -0.7020, 0.1232, -0.0155]) tensor([0.4921, 0.0964, 0.2200, 0.1915]) -Greedy action tensor([ 1.2322, -0.0769, -0.3670, 0.1535]) tensor([0.5518, 0.1490, 0.1115, 0.1876]) -Greedy action tensor([ 1.4084, -0.2775, -0.3510, 0.4949]) tensor([0.5687, 0.1054, 0.0979, 0.2281]) -Greedy action tensor([ 2.0237, -0.8937, -0.4124, 0.6744]) tensor([0.7138, 0.0386, 0.0625, 0.1852]) -Greedy action tensor([ 1.1330, -0.3475, -0.3548, 0.0876]) tensor([0.5540, 0.1261, 0.1251, 0.1948]) -Greedy action tensor([ 0.3585, -0.0958, 0.2448, -0.0279]) tensor([0.3118, 0.1980, 0.2783, 0.2119]) -Greedy action tensor([ 1.1203, -0.2731, -0.3937, 0.6047]) tensor([0.4842, 0.1202, 0.1065, 0.2891]) -Greedy action tensor([ 1.7312, -0.3231, -0.4397, 0.4834]) tensor([0.6539, 0.0838, 0.0746, 0.1877]) -Greedy action tensor([ 1.5422, -0.6907, -0.2836, 0.2636]) tensor([0.6465, 0.0693, 0.1042, 0.1800]) -Greedy action tensor([ 1.0708, -0.2428, 0.0415, 0.3759]) tensor([0.4705, 0.1265, 0.1681, 0.2349]) -Greedy action tensor([ 1.2701, -0.0931, -0.4839, 0.3238]) tensor([0.5503, 0.1408, 0.0952, 0.2136]) -Greedy action tensor([ 1.8220, -1.0184, -0.1763, 0.6073]) tensor([0.6708, 0.0392, 0.0909, 0.1991]) -Greedy action tensor([ 1.3718, -0.6723, -0.3670, 0.7943]) tensor([0.5358, 0.0694, 0.0942, 0.3007]) -Greedy action tensor([ 1.9559, -0.6172, -0.4526, 0.7860]) tensor([0.6772, 0.0517, 0.0609, 0.2102]) -Greedy action tensor([ 1.2448, -0.3124, -0.1138, 0.3262]) tensor([0.5357, 0.1129, 0.1377, 0.2138]) -Greedy action tensor([ 1.3132, -0.0094, -0.8545, 0.3224]) tensor([0.5707, 0.1521, 0.0653, 0.2119]) -Greedy action tensor([ 1.0011, 0.0763, -0.6930, 0.1280]) tensor([0.5005, 0.1985, 0.0920, 0.2090]) -Greedy action tensor([ 1.1206, -0.1142, -0.3733, 0.2781]) tensor([0.5139, 0.1495, 0.1154, 0.2213]) -Greedy action tensor([ 1.1046, -0.6554, -0.3256, 0.3991]) tensor([0.5249, 0.0903, 0.1256, 0.2592]) -Greedy action tensor([ 1.2804, -0.4666, -0.1011, -0.0454]) tensor([0.5913, 0.1031, 0.1485, 0.1571]) -Greedy action tensor([ 1.7668, -0.6126, -0.2992, 0.4160]) tensor([0.6764, 0.0626, 0.0857, 0.1752]) -Greedy action tensor([ 0.4573, -0.1470, 0.0391, 0.0056]) tensor([0.3520, 0.1923, 0.2317, 0.2240]) -Greedy action tensor([ 1.9752, -0.0497, -0.0896, 0.4410]) tensor([0.6782, 0.0895, 0.0860, 0.1462]) -Greedy action tensor([ 1.1230, -0.6479, -0.2339, 0.4361]) tensor([0.5179, 0.0881, 0.1333, 0.2606]) -Greedy action tensor([ 1.9898, -0.8380, -0.6155, 0.4254]) tensor([0.7450, 0.0441, 0.0550, 0.1559]) -Greedy action tensor([ 0.8869, -0.0954, -0.3993, 0.2676]) tensor([0.4568, 0.1711, 0.1262, 0.2459]) -Greedy action tensor([ 1.5674, -0.2773, -0.0622, 0.8303]) tensor([0.5457, 0.0863, 0.1070, 0.2611]) -Greedy action tensor([ 1.2053, -0.3769, -0.3481, 0.6885]) tensor([0.4967, 0.1021, 0.1051, 0.2962]) -Greedy action tensor([ 2.2112, -0.6469, -0.1245, 0.7020]) tensor([0.7272, 0.0417, 0.0703, 0.1608]) -Greedy action tensor([ 0.5626, -0.1169, -0.2408, -0.1983]) tensor([0.4129, 0.2093, 0.1849, 0.1929]) -Greedy action tensor([ 1.0304, -0.2971, -0.2626, 0.3642]) tensor([0.4870, 0.1291, 0.1337, 0.2502]) -Greedy action tensor([ 0.5862, -0.1960, -0.0876, 0.0213]) tensor([0.3944, 0.1804, 0.2011, 0.2242]) -Greedy action tensor([ 1.6616, -0.4545, -0.2448, 0.6383]) tensor([0.6141, 0.0740, 0.0913, 0.2207]) -Greedy action tensor([ 2.1847, -1.1007, -0.2027, 0.7463]) tensor([0.7317, 0.0274, 0.0672, 0.1737]) -Greedy action tensor([ 1.0486, -0.2861, -0.5565, 0.8408]) tensor([0.4393, 0.1156, 0.0882, 0.3569]) -Greedy action tensor([ 1.3824, 0.4820, -0.7765, 0.4401]) tensor([0.5231, 0.2126, 0.0604, 0.2039]) -Greedy action tensor([ 1.2081, -0.5476, -0.4502, 0.4438]) tensor([0.5468, 0.0945, 0.1041, 0.2546]) -Greedy action tensor([ 0.4609, -0.1182, 0.3140, -0.1699]) tensor([0.3383, 0.1896, 0.2921, 0.1800]) -Greedy action tensor([ 1.3876, -0.1330, -0.3531, 0.0673]) tensor([0.6020, 0.1316, 0.1056, 0.1608]) -Greedy action tensor([ 0.9331, -0.5558, -0.1435, 0.3864]) tensor([0.4661, 0.1052, 0.1588, 0.2698]) -Greedy action tensor([ 0.9718, -0.0639, -0.4093, 0.2094]) tensor([0.4824, 0.1713, 0.1212, 0.2251]) -Greedy action tensor([ 1.8569, -0.8102, -0.0138, 0.1570]) tensor([0.7112, 0.0494, 0.1095, 0.1299]) -Greedy action tensor([ 1.5045, -1.0536, -0.0873, 0.4486]) tensor([0.6139, 0.0475, 0.1250, 0.2136]) -Greedy action tensor([0.8681, 0.0430, 0.1025, 0.0397]) tensor([0.4274, 0.1873, 0.1987, 0.1866]) -Greedy action tensor([ 0.9244, -0.0971, -0.4494, 0.2190]) tensor([0.4746, 0.1709, 0.1201, 0.2344]) -Greedy action tensor([ 1.4126, -0.6012, -0.2462, 0.5268]) tensor([0.5760, 0.0769, 0.1096, 0.2375]) -Greedy action tensor([ 0.6322, -0.4721, -0.1143, -0.0101]) tensor([0.4289, 0.1421, 0.2033, 0.2256]) -Greedy action tensor([ 1.4858, -0.2529, -0.5083, 0.6595]) tensor([0.5716, 0.1005, 0.0778, 0.2502]) -Greedy action tensor([ 1.2303, 0.1316, -0.4216, 0.3805]) tensor([0.5122, 0.1707, 0.0982, 0.2190]) -Greedy action tensor([ 1.0959, -0.2267, -0.3174, -0.0677]) tensor([0.5488, 0.1462, 0.1335, 0.1714]) -Greedy action tensor([ 1.3837, -0.2094, -0.5420, 0.2445]) tensor([0.5991, 0.1218, 0.0873, 0.1918]) -Greedy action tensor([ 1.3613, -0.3439, -0.4669, 0.2860]) tensor([0.5940, 0.1079, 0.0954, 0.2027]) -Greedy action tensor([ 1.9265, 0.0798, -0.3915, 0.0098]) tensor([0.7126, 0.1124, 0.0702, 0.1048]) -Greedy action tensor([ 0.9104, -0.5192, 0.0059, 0.2365]) tensor([0.4643, 0.1112, 0.1879, 0.2366]) -Greedy action tensor([ 0.2844, -1.1900, -0.0780, 0.8959]) tensor([0.2654, 0.0608, 0.1847, 0.4892]) -Greedy action tensor([0.6639, 0.0194, 0.3770, 0.4276]) tensor([0.3263, 0.1713, 0.2449, 0.2576]) -Greedy action tensor([ 0.7623, -1.2734, 0.3168, 0.7923]) tensor([0.3570, 0.0466, 0.2286, 0.3678]) -Greedy action tensor([-0.5910, -1.6400, 0.0431, -0.2888]) tensor([0.2179, 0.0763, 0.4109, 0.2948]) -Greedy action tensor([ 0.4762, 0.1076, 0.2557, -0.2140]) tensor([0.3339, 0.2309, 0.2678, 0.1674]) -Greedy action tensor([ 0.7127, -1.5537, 1.4726, 0.4967]) tensor([0.2471, 0.0256, 0.5282, 0.1991]) -Greedy action tensor([ 1.3032, -1.4462, 0.9259, 0.6849]) tensor([0.4370, 0.0280, 0.2996, 0.2355]) -Greedy action tensor([1.7755, 0.5584, 0.4247, 1.2653]) tensor([0.4639, 0.1374, 0.1202, 0.2785]) -Greedy action tensor([-0.3496, 0.7969, 0.2365, -0.7399]) tensor([0.1510, 0.4753, 0.2714, 0.1022]) -Greedy action tensor([ 0.2301, -1.5635, -0.4503, 0.6228]) tensor([0.3171, 0.0528, 0.1606, 0.4696]) -Greedy action tensor([1.1963, 0.3116, 1.0151, 1.2573]) tensor([0.3021, 0.1247, 0.2520, 0.3211]) -Greedy action tensor([-0.2264, -0.8638, 0.2420, 0.9248]) tensor([0.1590, 0.0841, 0.2540, 0.5028]) -Greedy action tensor([ 1.0289, -0.9735, -0.4220, 1.3385]) tensor([0.3660, 0.0494, 0.0858, 0.4988]) -Greedy action tensor([ 0.0997, -2.4961, 0.6712, 0.1113]) tensor([0.2593, 0.0193, 0.4591, 0.2623]) -Greedy action tensor([ 0.3499, -2.1021, 0.2845, 0.7944]) tensor([0.2791, 0.0240, 0.2615, 0.4354]) -Greedy action tensor([-0.4783, -0.8969, -0.4577, 0.7693]) tensor([0.1623, 0.1068, 0.1657, 0.5652]) -Greedy action tensor([ 1.2979, -0.1578, 0.8112, 0.7283]) tensor([0.4143, 0.0966, 0.2547, 0.2344]) -Greedy action tensor([-1.2077, -0.5545, 0.5612, 0.2157]) tensor([0.0773, 0.1485, 0.4533, 0.3209]) -Greedy action tensor([-1.5123, -0.6378, -0.2303, 1.2859]) tensor([0.0427, 0.1024, 0.1539, 0.7010]) -Greedy action tensor([ 0.4895, -1.1771, 0.9491, -0.2358]) tensor([0.3071, 0.0580, 0.4862, 0.1487]) -Greedy action tensor([-0.6103, -0.8666, -1.0740, 0.3872]) tensor([0.1955, 0.1513, 0.1230, 0.5302]) -Greedy action tensor([-0.1654, -1.4433, 0.4112, 0.9841]) tensor([0.1609, 0.0448, 0.2864, 0.5079]) -Greedy action tensor([1.2991, 0.3073, 0.1955, 1.3713]) tensor([0.3600, 0.1335, 0.1194, 0.3870]) -Greedy action tensor([ 1.0551, -1.2496, 0.3545, 1.1304]) tensor([0.3739, 0.0373, 0.1856, 0.4032]) -Greedy action tensor([-0.6477, -2.3338, 0.3554, -0.0108]) tensor([0.1723, 0.0319, 0.4699, 0.3258]) -Greedy action tensor([0.6625, 0.3101, 0.8158, 0.4665]) tensor([0.2709, 0.1905, 0.3158, 0.2227]) -Greedy action tensor([-0.1087, 0.2095, 0.4571, -0.4579]) tensor([0.2066, 0.2840, 0.3638, 0.1457]) -Greedy action tensor([-0.6346, -0.3792, -0.0788, -0.1115]) tensor([0.1748, 0.2256, 0.3047, 0.2949]) -Greedy action tensor([ 0.8016, 1.3298, 0.6686, -0.0481]) tensor([0.2501, 0.4241, 0.2189, 0.1069]) -Greedy action tensor([ 1.5213, -0.4535, 0.4863, 0.9137]) tensor([0.4905, 0.0681, 0.1742, 0.2672]) -Greedy action tensor([-0.5050, -0.5543, 0.5719, 0.8877]) tensor([0.1122, 0.1068, 0.3293, 0.4517]) -Greedy action tensor([-0.6294, 0.2211, -0.2659, 1.3412]) tensor([0.0837, 0.1958, 0.1203, 0.6002]) -Greedy action tensor([ 0.9234, -0.5273, 0.7608, 0.3014]) tensor([0.3815, 0.0894, 0.3242, 0.2048]) -Greedy action tensor([ 1.1414, -1.8098, 0.0952, 0.6508]) tensor([0.4961, 0.0259, 0.1743, 0.3037]) -Greedy action tensor([-0.1977, -0.7055, 0.0448, 0.1967]) tensor([0.2294, 0.1380, 0.2923, 0.3403]) -Greedy action tensor([ 0.8675, 0.6417, -0.0227, -0.8542]) tensor([0.4189, 0.3342, 0.1720, 0.0749]) -Greedy action tensor([-0.3972, 1.4344, 0.3380, -0.4063]) tensor([0.0969, 0.6050, 0.2021, 0.0960]) -Greedy action tensor([ 1.0394, -1.0107, 0.1072, 0.7445]) tensor([0.4411, 0.0568, 0.1737, 0.3284]) -Greedy action tensor([-0.1202, 1.9770, 0.0713, -0.1869]) tensor([0.0886, 0.7213, 0.1073, 0.0829]) -Greedy action tensor([ 0.7892, -0.7259, -0.5968, 0.7551]) tensor([0.4105, 0.0902, 0.1026, 0.3967]) -Greedy action tensor([-0.3786, -1.5128, -0.1622, -0.4489]) tensor([0.2861, 0.0920, 0.3552, 0.2667]) -Greedy action tensor([-1.2381, -1.4371, 1.2329, -0.0807]) tensor([0.0594, 0.0487, 0.7029, 0.1890]) -Greedy action tensor([ 1.0927, -1.0612, 1.0998, 1.2651]) tensor([0.3020, 0.0350, 0.3041, 0.3588]) -Greedy action tensor([-0.6158, 0.3397, 0.1260, -0.2577]) tensor([0.1402, 0.3647, 0.2945, 0.2006]) -Greedy action tensor([ 0.0672, -0.2346, 1.6748, -0.2779]) tensor([0.1344, 0.0994, 0.6709, 0.0952]) -Greedy action tensor([ 0.6261, 0.9941, -0.4685, -0.6891]) tensor([0.3281, 0.4740, 0.1098, 0.0881]) -Greedy action tensor([ 0.5213, 0.0754, -0.5587, 0.0801]) tensor([0.3812, 0.2441, 0.1295, 0.2452]) -Greedy action tensor([ 0.1025, -1.3000, 0.3236, -0.0857]) tensor([0.3010, 0.0740, 0.3755, 0.2494]) -Greedy action tensor([-0.4107, 1.2774, 0.4954, -0.1206]) tensor([0.0978, 0.5293, 0.2421, 0.1308]) -Greedy action tensor([ 2.0875, -1.0776, 0.0757, 1.1276]) tensor([0.6415, 0.0271, 0.0858, 0.2456]) -Greedy action tensor([ 1.8385, -0.9431, 1.2146, 1.3097]) tensor([0.4572, 0.0283, 0.2450, 0.2694]) -Greedy action tensor([0.9796, 0.0741, 1.3029, 0.9185]) tensor([0.2683, 0.1085, 0.3707, 0.2524]) -Greedy action tensor([ 1.0434, -1.4665, -0.3596, 0.2100]) tensor([0.5676, 0.0461, 0.1396, 0.2467]) -Greedy action tensor([-1.3418, 0.5202, 1.7760, -0.3722]) tensor([0.0306, 0.1970, 0.6917, 0.0807]) -Greedy action tensor([-0.6940, -0.3072, -0.3263, 0.7722]) tensor([0.1212, 0.1785, 0.1751, 0.5252]) -Greedy action tensor([-0.4806, -1.3771, -0.4272, 1.3379]) tensor([0.1159, 0.0473, 0.1223, 0.7145]) -Greedy action tensor([-0.7752, -0.3733, 0.6665, -0.5945]) tensor([0.1262, 0.1887, 0.5338, 0.1513]) -Greedy action tensor([ 1.0516, -1.1840, -0.3539, 1.2178]) tensor([0.3948, 0.0422, 0.0968, 0.4662]) -Greedy action tensor([-2.2042, -0.1762, 0.7545, -0.0511]) tensor([0.0274, 0.2083, 0.5283, 0.2360]) -Greedy action tensor([ 0.2606, -0.9885, 0.5443, 1.6202]) tensor([0.1536, 0.0441, 0.2040, 0.5983]) -Greedy action tensor([ 0.7137, -0.0420, 1.3550, -0.2615]) tensor([0.2670, 0.1254, 0.5070, 0.1007]) -Greedy action tensor([ 0.7733, -0.1523, -0.4089, -0.2227]) tensor([0.4826, 0.1912, 0.1480, 0.1782]) -Greedy action tensor([ 0.6132, -0.1378, -0.9542, 1.8506]) tensor([0.1950, 0.0920, 0.0407, 0.6722]) -Greedy action tensor([1.1280, 0.2650, 0.5796, 0.2821]) tensor([0.4117, 0.1737, 0.2379, 0.1767]) -Greedy action tensor([-0.0450, 0.4283, -0.3038, 0.4511]) tensor([0.1992, 0.3198, 0.1538, 0.3272]) -Greedy action tensor([ 0.4016, -0.4668, 0.6664, -0.1217]) tensor([0.3016, 0.1266, 0.3931, 0.1787]) -Greedy action tensor([ 0.1310, -1.3281, 0.5378, -0.2564]) tensor([0.2930, 0.0681, 0.4401, 0.1989]) -Greedy action tensor([1.2919, 0.9751, 0.1778, 0.6541]) tensor([0.3868, 0.2818, 0.1270, 0.2044]) -Greedy action tensor([ 1.1136, -1.0323, 1.3582, 1.0452]) tensor([0.3005, 0.0351, 0.3838, 0.2806]) -Greedy action tensor([ 0.7530, -1.1343, 0.8606, 0.7497]) tensor([0.3066, 0.0464, 0.3414, 0.3056]) -Greedy action tensor([ 1.3604, -3.2132, -0.3278, 0.0472]) tensor([0.6830, 0.0070, 0.1263, 0.1837]) -Greedy action tensor([1.2971, 0.1656, 0.3268, 0.4805]) tensor([0.4665, 0.1505, 0.1768, 0.2062]) -Greedy action tensor([ 0.1753, 0.5269, -0.1399, 0.4756]) tensor([0.2222, 0.3158, 0.1621, 0.3000]) -Greedy action tensor([ 0.5127, -0.2243, -0.1201, 0.9063]) tensor([0.2864, 0.1370, 0.1521, 0.4245]) -Greedy action tensor([-1.3901, -1.2257, 1.4480, -0.8268]) tensor([0.0476, 0.0561, 0.8128, 0.0836]) -Greedy action tensor([-0.7904, -1.8319, 0.2483, -0.2195]) tensor([0.1681, 0.0593, 0.4750, 0.2975]) -Greedy action tensor([-0.6572, 0.3422, -0.0425, -0.1968]) tensor([0.1399, 0.3799, 0.2586, 0.2216]) -Greedy action tensor([ 0.1829, -1.5716, 0.1625, 0.0604]) tensor([0.3292, 0.0570, 0.3226, 0.2913]) -Greedy action tensor([ 1.2342, -2.0018, 1.7215, 0.9661]) tensor([0.2914, 0.0115, 0.4743, 0.2228]) -Greedy action tensor([-0.7688, -0.1157, 1.3882, -0.6728]) tensor([0.0789, 0.1517, 0.6825, 0.0869]) -Greedy action tensor([-0.8395, -1.5115, -1.0123, -0.6535]) tensor([0.2812, 0.1436, 0.2366, 0.3387]) -Greedy action tensor([-0.8525, -1.5065, -0.4567, 0.6358]) tensor([0.1345, 0.0699, 0.1998, 0.5958]) -Greedy action tensor([-1.8465, -0.4222, 0.6050, -0.1159]) tensor([0.0446, 0.1854, 0.5180, 0.2519]) -Greedy action tensor([-0.9588, 0.5068, 0.2298, -0.1713]) tensor([0.0925, 0.4006, 0.3036, 0.2033]) -Greedy action tensor([-1.7439, -0.5573, 0.7556, -0.3899]) tensor([0.0492, 0.1612, 0.5991, 0.1905]) -Greedy action tensor([-1.8151, -0.4512, 0.6571, -0.0312]) tensor([0.0440, 0.1722, 0.5217, 0.2621]) -Greedy action tensor([-1.9256, -0.3994, 0.6527, -0.1635]) tensor([0.0406, 0.1870, 0.5356, 0.2368]) -Greedy action tensor([-1.8597, -0.3213, 0.6040, -0.1201]) tensor([0.0433, 0.2016, 0.5086, 0.2465]) -Greedy action tensor([-1.7403, -0.5091, 0.6283, 0.0301]) tensor([0.0477, 0.1632, 0.5092, 0.2799]) -Greedy action tensor([-1.0103, -0.7295, 0.2207, -0.1936]) tensor([0.1248, 0.1653, 0.4274, 0.2825]) -Greedy action tensor([-1.8892, -0.4521, 0.6334, -0.1563]) tensor([0.0429, 0.1804, 0.5342, 0.2425]) -Greedy action tensor([-1.8324, -0.4877, 1.1291, 0.6310]) tensor([0.0278, 0.1069, 0.5382, 0.3271]) -Greedy action tensor([-0.9381, 0.9077, 0.2032, 0.5990]) tensor([0.0662, 0.4190, 0.2071, 0.3077]) -Greedy action tensor([-1.3745, -0.5999, 0.3560, 0.1755]) tensor([0.0739, 0.1604, 0.4173, 0.3484]) -Greedy action tensor([-1.7835, -0.4833, 0.7074, 0.0730]) tensor([0.0432, 0.1586, 0.5216, 0.2766]) -Greedy action tensor([-1.3479, 0.0561, 0.1093, 0.3360]) tensor([0.0678, 0.2760, 0.2911, 0.3651]) -Greedy action tensor([-0.7045, 0.3066, 0.3869, 1.2993]) tensor([0.0707, 0.1943, 0.2106, 0.5244]) -Greedy action tensor([-0.7490, 0.8656, 0.1114, -0.1965]) tensor([0.0987, 0.4963, 0.2334, 0.1716]) -Greedy action tensor([-1.7312, -0.3515, 0.5631, 0.0257]) tensor([0.0483, 0.1921, 0.4794, 0.2801]) -Greedy action tensor([-1.7281, -0.4565, 0.5642, -0.0480]) tensor([0.0504, 0.1799, 0.4991, 0.2706]) -Greedy action tensor([-1.7236, -0.5641, 0.8731, 0.5465]) tensor([0.0366, 0.1168, 0.4918, 0.3547]) -Greedy action tensor([-1.9387, -0.4466, 0.6680, -0.1744]) tensor([0.0403, 0.1790, 0.5457, 0.2350]) -Greedy action tensor([-1.9243, -0.4417, 0.6590, -0.1629]) tensor([0.0409, 0.1800, 0.5412, 0.2379]) -Greedy action tensor([-1.9384, -0.4566, 0.6677, -0.1749]) tensor([0.0404, 0.1776, 0.5467, 0.2354]) -Greedy action tensor([-1.7799, -0.4138, 0.5796, -0.1233]) tensor([0.0482, 0.1889, 0.5102, 0.2526]) -Greedy action tensor([-0.1017, -0.0820, 0.8725, 1.6186]) tensor([0.0975, 0.0994, 0.2583, 0.5447]) -Greedy action tensor([-1.1674, 0.0436, 0.4764, -0.4486]) tensor([0.0863, 0.2898, 0.4467, 0.1771]) -Greedy action tensor([-1.4011, -0.6107, 1.0249, 1.0352]) tensor([0.0385, 0.0849, 0.4360, 0.4405]) -Greedy action tensor([-1.8161, -0.2622, 0.6226, -0.0830]) tensor([0.0438, 0.2070, 0.5015, 0.2477]) -Greedy action tensor([-1.8879, -0.4533, 0.6508, -0.1448]) tensor([0.0424, 0.1781, 0.5371, 0.2424]) -Greedy action tensor([-1.9227, -0.5179, 0.8044, -0.1043]) tensor([0.0377, 0.1536, 0.5764, 0.2323]) -Greedy action tensor([-1.8559, -0.4235, 0.6236, -0.1164]) tensor([0.0438, 0.1836, 0.5230, 0.2496]) -Greedy action tensor([-1.7979, -0.4399, 0.5951, -0.0911]) tensor([0.0468, 0.1822, 0.5128, 0.2582]) -Greedy action tensor([-1.6883, -0.4602, 0.5820, 0.0426]) tensor([0.0507, 0.1730, 0.4904, 0.2860]) -Greedy action tensor([-1.9273, -0.4333, 0.6558, -0.1693]) tensor([0.0408, 0.1819, 0.5405, 0.2368]) -Greedy action tensor([-1.1425, -0.3282, 0.7050, 0.8828]) tensor([0.0582, 0.1314, 0.3693, 0.4411]) -Greedy action tensor([-1.1173, -0.1338, 0.5943, -0.6433]) tensor([0.0924, 0.2472, 0.5119, 0.1485]) -Greedy action tensor([-1.7590, -0.3754, 0.6090, -0.0511]) tensor([0.0472, 0.1883, 0.5040, 0.2605]) -Greedy action tensor([-1.8906, -0.3951, 0.6343, -0.1571]) tensor([0.0424, 0.1890, 0.5290, 0.2397]) -Greedy action tensor([0.2118, 0.1040, 0.9035, 1.6778]) tensor([0.1216, 0.1091, 0.2428, 0.5266]) -Greedy action tensor([-1.8083, -0.4748, 0.6780, 0.0300]) tensor([0.0433, 0.1643, 0.5203, 0.2722]) -Greedy action tensor([-1.8028, -0.4482, 0.5952, -0.1053]) tensor([0.0469, 0.1816, 0.5156, 0.2559]) -Greedy action tensor([-1.7829, -0.1666, 0.5499, -0.1234]) tensor([0.0463, 0.2331, 0.4772, 0.2434]) -Greedy action tensor([0.0114, 0.0532, 0.6673, 1.5859]) tensor([0.1137, 0.1185, 0.2190, 0.5488]) -Greedy action tensor([-1.6303, -0.3108, 0.5205, 0.0786]) tensor([0.0530, 0.1984, 0.4556, 0.2929]) -Greedy action tensor([-1.3497, -0.6055, 0.3951, 0.0233]) tensor([0.0783, 0.1647, 0.4481, 0.3089]) -Greedy action tensor([-1.8657, -0.4199, 0.8252, 0.2513]) tensor([0.0353, 0.1500, 0.5211, 0.2935]) -Greedy action tensor([-1.6574, -0.0956, 0.7129, -0.5386]) tensor([0.0512, 0.2441, 0.5479, 0.1567]) -Greedy action tensor([-1.9129, -0.4543, 0.6553, -0.1524]) tensor([0.0414, 0.1780, 0.5399, 0.2407]) -Greedy action tensor([-1.5355, -0.3737, 0.6670, 0.3914]) tensor([0.0497, 0.1589, 0.4499, 0.3415]) -Greedy action tensor([-1.3057, -0.5909, 0.3499, 0.1006]) tensor([0.0809, 0.1653, 0.4236, 0.3301]) -Greedy action tensor([-1.7216, 0.1321, 0.4813, -0.0462]) tensor([0.0459, 0.2931, 0.4156, 0.2453]) -Greedy action tensor([-1.3652, -0.3901, 0.4655, -0.1566]) tensor([0.0755, 0.2003, 0.4712, 0.2529]) -Greedy action tensor([-1.8948, -0.4435, 0.6408, -0.1525]) tensor([0.0424, 0.1808, 0.5348, 0.2419]) -Greedy action tensor([-1.7661, -0.4615, 0.5826, -0.0809]) tensor([0.0487, 0.1794, 0.5095, 0.2624]) -Greedy action tensor([-1.6874, -0.4952, 0.5334, -0.0790]) tensor([0.0540, 0.1780, 0.4980, 0.2699]) -Greedy action tensor([-1.0518, -0.7044, 0.9042, 1.4437]) tensor([0.0463, 0.0655, 0.3272, 0.5611]) -Greedy action tensor([-1.3298, -0.5998, 0.6919, 0.5969]) tensor([0.0572, 0.1186, 0.4317, 0.3925]) -Greedy action tensor([-1.8815, -0.4407, 0.8165, 0.1298]) tensor([0.0363, 0.1533, 0.5391, 0.2713]) -Greedy action tensor([ 0.4886, 1.2590, -0.3599, -0.0864]) tensor([0.2409, 0.5205, 0.1031, 0.1355]) -Greedy action tensor([-1.7529, -0.1812, 0.5305, -0.0994]) tensor([0.0480, 0.2309, 0.4705, 0.2506]) -Greedy action tensor([-0.7590, 0.0978, 0.1633, 0.7089]) tensor([0.0979, 0.2307, 0.2463, 0.4250]) -Greedy action tensor([-0.7468, 0.1042, 0.7071, 1.3156]) tensor([0.0646, 0.1512, 0.2764, 0.5079]) -Greedy action tensor([-1.8000, -0.5288, 0.5467, -0.1086]) tensor([0.0489, 0.1744, 0.5112, 0.2655]) -Greedy action tensor([-1.7407, -0.4276, 0.5915, -0.0589]) tensor([0.0490, 0.1823, 0.5051, 0.2636]) -Greedy action tensor([-1.8534, -0.4686, 0.6398, -0.1239]) tensor([0.0440, 0.1757, 0.5323, 0.2480]) -Greedy action tensor([-1.4585, -0.4426, 1.1704, 0.9839]) tensor([0.0343, 0.0948, 0.4759, 0.3949]) -Greedy action tensor([-1.4216, -0.4416, 0.4232, -0.0283]) tensor([0.0713, 0.1901, 0.4513, 0.2873]) -Greedy action tensor([-1.6103, -0.4311, 0.5335, -0.6351]) tensor([0.0648, 0.2107, 0.5528, 0.1718]) -Greedy action tensor([-1.8300, -0.4628, 0.6424, -0.0859]) tensor([0.0445, 0.1744, 0.5268, 0.2543]) -Greedy action tensor([-1.8323, -0.3556, 0.5975, -0.1146]) tensor([0.0448, 0.1963, 0.5091, 0.2498]) -Greedy action tensor([-0.8577, -0.5725, 0.3322, -0.1685]) tensor([0.1314, 0.1748, 0.4320, 0.2618]) -Greedy action tensor([-1.8107, -0.4717, 0.6938, 0.0113]) tensor([0.0430, 0.1642, 0.5267, 0.2661]) -Greedy action tensor([-1.2202, 0.0508, -0.0311, -0.1132]) tensor([0.0920, 0.3278, 0.3020, 0.2782]) -Greedy action tensor([-1.4537, -0.6102, 0.4222, 0.1552]) tensor([0.0673, 0.1565, 0.4396, 0.3365]) -Greedy action tensor([-1.8791, -0.8063, -0.0399, -0.3506]) tensor([0.0674, 0.1972, 0.4244, 0.3110]) -Greedy action tensor([-1.6613, -0.4884, 0.5061, 0.0199]) tensor([0.0545, 0.1762, 0.4763, 0.2929]) -Greedy action tensor([-1.9053, -0.4510, 0.6456, -0.1614]) tensor([0.0420, 0.1797, 0.5382, 0.2401]) -Greedy action tensor([-1.7283, -0.5046, 0.5535, -0.0365]) tensor([0.0510, 0.1733, 0.4991, 0.2767]) -Greedy action tensor([-1.9152, -0.4438, 0.6492, -0.1637]) tensor([0.0415, 0.1806, 0.5389, 0.2390]) -Greedy action tensor([-1.6397, -0.4924, 0.7736, 0.4484]) tensor([0.0428, 0.1347, 0.4776, 0.3450]) -Greedy action tensor([-1.4502, -0.4949, 0.6061, 0.5269]) tensor([0.0537, 0.1395, 0.4194, 0.3875]) -Greedy action tensor([-1.7160, -0.4509, 0.5758, 0.0142]) tensor([0.0498, 0.1765, 0.4927, 0.2810]) -Greedy action tensor([-0.5981, -0.7609, 0.6693, 0.0529]) tensor([0.1366, 0.1161, 0.4853, 0.2620]) -Greedy action tensor([ 0.3385, 0.1270, 0.1348, -0.1123]) tensor([0.3065, 0.2481, 0.2500, 0.1953]) -Greedy action tensor([ 1.3411, -0.1714, -0.4314, 0.3235]) tensor([0.5709, 0.1258, 0.0970, 0.2063]) -Greedy action tensor([ 1.3007, -0.2659, -0.4424, 0.2759]) tensor([0.5739, 0.1198, 0.1004, 0.2059]) -Greedy action tensor([ 1.8689, -0.2563, -0.6883, 0.2213]) tensor([0.7197, 0.0859, 0.0558, 0.1385]) -Greedy action tensor([ 2.5192, -0.8434, -0.4202, 0.3787]) tensor([0.8298, 0.0287, 0.0439, 0.0976]) -Greedy action tensor([ 1.8798, -0.1490, -0.6202, 0.7577]) tensor([0.6497, 0.0854, 0.0533, 0.2115]) -Greedy action tensor([ 0.4835, -0.2576, -0.0759, 0.3817]) tensor([0.3388, 0.1615, 0.1936, 0.3060]) -Greedy action tensor([ 2.2151, -1.0847, -0.5197, 0.9052]) tensor([0.7291, 0.0269, 0.0473, 0.1967]) -Greedy action tensor([ 1.1854, 0.1633, -0.2508, -0.1911]) tensor([0.5405, 0.1945, 0.1285, 0.1365]) -Greedy action tensor([ 1.4023, -0.7688, -0.3314, -0.1247]) tensor([0.6632, 0.0756, 0.1171, 0.1440]) -Greedy action tensor([ 2.1045, -0.3139, -0.5904, 0.4055]) tensor([0.7466, 0.0665, 0.0504, 0.1365]) -Greedy action tensor([ 1.6643, -0.0182, -1.1881, -0.0498]) tensor([0.7024, 0.1306, 0.0405, 0.1265]) -Greedy action tensor([ 2.0934, -0.4170, -0.2771, 0.4572]) tensor([0.7303, 0.0593, 0.0682, 0.1422]) -Greedy action tensor([ 1.1311, -0.1985, -0.1142, 0.2515]) tensor([0.5083, 0.1345, 0.1463, 0.2109]) -Greedy action tensor([ 1.3443, -0.2551, -0.4754, 0.5773]) tensor([0.5469, 0.1105, 0.0886, 0.2540]) -Greedy action tensor([ 1.2540, -0.0382, -0.9221, 0.3766]) tensor([0.5543, 0.1523, 0.0629, 0.2305]) -Greedy action tensor([ 0.1915, 0.1339, -0.1586, -0.1373]) tensor([0.2969, 0.2803, 0.2092, 0.2137]) -Greedy action tensor([ 1.7999, -0.5922, -0.1959, 0.4670]) tensor([0.6707, 0.0613, 0.0911, 0.1769]) -Greedy action tensor([ 1.2257, -0.0371, -0.1297, -0.3375]) tensor([0.5714, 0.1616, 0.1473, 0.1197]) -Greedy action tensor([ 1.1527, -0.0720, -0.5653, 0.1805]) tensor([0.5401, 0.1587, 0.0969, 0.2043]) -Greedy action tensor([ 1.1804, -0.2909, -0.1515, -0.1458]) tensor([0.5685, 0.1305, 0.1501, 0.1509]) -Greedy action tensor([ 0.0486, -0.4337, -0.3608, 0.4928]) tensor([0.2604, 0.1607, 0.1729, 0.4060]) -Greedy action tensor([ 0.9879, -0.4519, -0.2845, 0.3645]) tensor([0.4870, 0.1154, 0.1365, 0.2611]) -Greedy action tensor([ 1.0705, -0.5072, -0.5352, 0.5664]) tensor([0.4972, 0.1026, 0.0998, 0.3003]) -Greedy action tensor([ 1.1838, -0.7438, -0.2075, 0.2667]) tensor([0.5574, 0.0811, 0.1387, 0.2228]) -Greedy action tensor([ 1.0435, -0.5441, -0.1110, 0.3558]) tensor([0.4945, 0.1011, 0.1559, 0.2486]) -Greedy action tensor([ 1.1542, -0.4276, 0.0664, 0.2882]) tensor([0.5094, 0.1047, 0.1716, 0.2143]) -Greedy action tensor([ 1.3031, -0.5022, -0.4011, 0.2662]) tensor([0.5879, 0.0967, 0.1069, 0.2084]) -Greedy action tensor([ 2.0601, -0.7866, -0.0487, 0.5142]) tensor([0.7181, 0.0417, 0.0872, 0.1530]) -Greedy action tensor([ 1.4078, -0.4717, -0.4437, 0.8210]) tensor([0.5360, 0.0818, 0.0842, 0.2981]) -Greedy action tensor([ 1.2553, -0.3558, -0.4444, 0.5044]) tensor([0.5393, 0.1077, 0.0986, 0.2545]) -Greedy action tensor([ 2.0495, -0.6183, -0.2041, 0.8382]) tensor([0.6792, 0.0471, 0.0713, 0.2023]) -Greedy action tensor([ 1.8658, -0.6350, -0.4510, 0.8996]) tensor([0.6406, 0.0525, 0.0632, 0.2437]) -Greedy action tensor([ 1.3543, -0.2463, -0.6265, 0.2770]) tensor([0.5951, 0.1201, 0.0821, 0.2027]) -Greedy action tensor([ 1.3161, 0.0302, -0.6787, -0.1661]) tensor([0.6099, 0.1686, 0.0830, 0.1385]) -Greedy action tensor([ 1.1541, -0.3879, -0.2761, 0.4851]) tensor([0.5088, 0.1089, 0.1217, 0.2606]) -Greedy action tensor([ 1.6166, -0.2379, -0.1344, 0.4297]) tensor([0.6115, 0.0957, 0.1062, 0.1866]) -Greedy action tensor([ 1.3174, -0.0978, -0.6350, 0.5116]) tensor([0.5460, 0.1326, 0.0775, 0.2439]) -Greedy action tensor([ 1.7644, -0.4499, -0.3086, 0.6278]) tensor([0.6427, 0.0702, 0.0809, 0.2063]) -Greedy action tensor([ 1.7932, -0.7234, -0.2021, 0.6304]) tensor([0.6539, 0.0528, 0.0889, 0.2044]) -Greedy action tensor([ 1.5521, -0.1159, -0.8797, 0.6001]) tensor([0.6015, 0.1135, 0.0529, 0.2322]) -Greedy action tensor([ 0.9325, -0.5397, -0.0984, 0.2637]) tensor([0.4765, 0.1093, 0.1700, 0.2441]) -Greedy action tensor([ 0.9220, -0.3908, -0.2357, 0.0767]) tensor([0.4969, 0.1337, 0.1561, 0.2134]) -Greedy action tensor([ 1.3261, 0.0302, -0.5443, -0.0472]) tensor([0.5949, 0.1628, 0.0916, 0.1507]) -Greedy action tensor([ 1.7820, -0.1132, -0.5851, 0.1845]) tensor([0.6914, 0.1039, 0.0648, 0.1399]) -Greedy action tensor([ 1.1451, -0.1282, -0.1019, 0.1061]) tensor([0.5205, 0.1457, 0.1496, 0.1842]) -Greedy action tensor([ 1.0603, -0.1332, -0.2077, 0.4629]) tensor([0.4684, 0.1420, 0.1318, 0.2578]) -Greedy action tensor([ 0.9394, -0.3006, -0.5842, 0.7818]) tensor([0.4235, 0.1225, 0.0923, 0.3617]) -Greedy action tensor([ 1.7426, -0.1693, -0.5447, 0.3890]) tensor([0.6633, 0.0980, 0.0673, 0.1713]) -Greedy action tensor([ 1.4441, -0.4984, -0.1591, 0.1770]) tensor([0.6149, 0.0881, 0.1238, 0.1732]) -Greedy action tensor([ 1.2335, -0.3995, -0.5966, 1.1129]) tensor([0.4460, 0.0871, 0.0715, 0.3953]) -Greedy action tensor([ 2.0396, -0.9610, -0.2198, 0.3933]) tensor([0.7424, 0.0369, 0.0775, 0.1431]) -Greedy action tensor([ 1.3114, -0.4852, -0.4347, 0.3004]) tensor([0.5868, 0.0973, 0.1024, 0.2135]) -Greedy action tensor([ 1.2775, -0.6656, -0.5197, 0.3414]) tensor([0.5878, 0.0842, 0.0974, 0.2305]) -Greedy action tensor([ 1.3330, -0.0529, -0.0624, 0.4656]) tensor([0.5214, 0.1304, 0.1292, 0.2190]) -Greedy action tensor([ 1.7064, 0.1286, -0.3478, 0.2663]) tensor([0.6363, 0.1313, 0.0816, 0.1507]) -Greedy action tensor([ 0.6209, -0.3741, -0.0319, 0.0247]) tensor([0.4096, 0.1515, 0.2132, 0.2257]) -Greedy action tensor([ 1.4191, -0.8067, 0.0107, 0.2656]) tensor([0.5995, 0.0647, 0.1466, 0.1892]) -Greedy action tensor([ 1.4937, -0.8695, -0.3220, 0.6392]) tensor([0.5944, 0.0559, 0.0967, 0.2529]) -Greedy action tensor([ 1.0306, -0.9168, -0.2446, -0.0593]) tensor([0.5687, 0.0811, 0.1589, 0.1912]) -Greedy action tensor([ 1.0665, -0.2751, -0.2447, 0.6028]) tensor([0.4630, 0.1210, 0.1248, 0.2912]) -Greedy action tensor([ 1.1571, 0.0180, -0.0784, 0.0284]) tensor([0.5170, 0.1655, 0.1503, 0.1672]) -Greedy action tensor([ 0.8490, -0.3091, -0.2330, -0.0553]) tensor([0.4859, 0.1526, 0.1647, 0.1967]) -Greedy action tensor([ 1.3002, -0.3312, -0.4900, 0.2533]) tensor([0.5836, 0.1142, 0.0974, 0.2048]) -Greedy action tensor([ 1.7575, -0.7396, -0.1317, 0.5201]) tensor([0.6563, 0.0540, 0.0992, 0.1904]) -Greedy action tensor([ 1.7192, -1.0305, -0.1557, 0.1650]) tensor([0.6999, 0.0448, 0.1074, 0.1479]) -Greedy action tensor([ 1.4180, -0.5644, -0.3905, 0.4024]) tensor([0.6010, 0.0828, 0.0985, 0.2177]) -Greedy action tensor([ 1.8972, -1.0893, -0.1851, 0.6550]) tensor([0.6831, 0.0345, 0.0851, 0.1973]) -Greedy action tensor([ 1.1758, -0.4129, -0.3992, 0.4541]) tensor([0.5271, 0.1076, 0.1091, 0.2561]) -Greedy action tensor([ 1.4816, -0.5341, -0.2188, 0.4567]) tensor([0.5971, 0.0796, 0.1090, 0.2143]) -Greedy action tensor([ 1.8735, -0.2087, -0.3633, 0.0671]) tensor([0.7165, 0.0893, 0.0765, 0.1177]) -Greedy action tensor([ 1.1266, -0.2311, -0.2186, 0.5081]) tensor([0.4863, 0.1251, 0.1267, 0.2620]) -Greedy action tensor([ 1.5463, -0.7067, -0.2529, 0.4133]) tensor([0.6279, 0.0660, 0.1039, 0.2022]) -Greedy action tensor([ 0.4680, -0.2751, -0.0113, 0.1299]) tensor([0.3561, 0.1694, 0.2205, 0.2540]) -Greedy action tensor([ 2.1687, -1.0166, -0.2111, 0.5502]) tensor([0.7507, 0.0311, 0.0695, 0.1488]) -Greedy action tensor([ 1.4368, -0.6109, -0.3079, 0.2772]) tensor([0.6183, 0.0798, 0.1080, 0.1939]) -Greedy action tensor([ 1.4758, 0.0891, -0.3189, 0.2613]) tensor([0.5838, 0.1459, 0.0970, 0.1733]) -Greedy action tensor([ 1.0245, -0.1423, -0.4008, -0.1787]) tensor([0.5399, 0.1681, 0.1298, 0.1621]) -Greedy action tensor([ 1.6795, -0.2707, -0.7828, 0.3212]) tensor([0.6736, 0.0958, 0.0574, 0.1732]) -Greedy action tensor([ 1.3660, -0.5505, -0.6705, 0.1911]) tensor([0.6303, 0.0927, 0.0822, 0.1947]) -Greedy action tensor([ 1.3125, -0.1670, 0.2326, 0.1768]) tensor([0.5295, 0.1206, 0.1798, 0.1701]) -Greedy action tensor([ 0.8656, -0.7717, -0.1258, -0.5597]) tensor([0.5537, 0.1077, 0.2055, 0.1331]) -Greedy action tensor([ 1.0142, -0.7596, -0.0319, -0.4213]) tensor([0.5685, 0.0965, 0.1997, 0.1353]) -Greedy action tensor([ 0.2617, -0.2767, -0.1313, -0.1171]) tensor([0.3398, 0.1983, 0.2293, 0.2326]) -Greedy action tensor([ 1.0644, -0.7528, -0.0834, -0.6123]) tensor([0.5999, 0.0975, 0.1904, 0.1122]) -Greedy action tensor([ 0.5013, -0.0632, -0.1254, -0.1672]) tensor([0.3823, 0.2174, 0.2043, 0.1960]) -Greedy action tensor([ 0.6757, -0.2829, -0.1158, -0.1437]) tensor([0.4391, 0.1684, 0.1990, 0.1935]) -Greedy action tensor([ 0.5123, 0.0030, 0.0074, -0.0785]) tensor([0.3625, 0.2178, 0.2188, 0.2008]) -Greedy action tensor([ 0.1854, -0.1796, -0.3048, -0.2002]) tensor([0.3348, 0.2324, 0.2051, 0.2277]) -Greedy action tensor([ 0.6123, -0.1665, -0.1309, -0.0290]) tensor([0.4063, 0.1865, 0.1932, 0.2140]) -Greedy action tensor([ 0.8895, -0.8753, -0.0081, -0.4113]) tensor([0.5402, 0.0925, 0.2202, 0.1471]) -Greedy action tensor([ 0.4600, -0.1262, 0.0967, -0.4618]) tensor([0.3774, 0.2100, 0.2625, 0.1501]) -Greedy action tensor([ 0.3727, 0.1566, -0.0395, -0.1403]) tensor([0.3261, 0.2627, 0.2159, 0.1952]) -Greedy action tensor([ 0.9734, -0.5661, -0.0131, -0.4458]) tensor([0.5467, 0.1172, 0.2038, 0.1322]) -Greedy action tensor([ 0.5545, -0.1308, 0.0342, -0.5124]) tensor([0.4094, 0.2063, 0.2433, 0.1409]) -Greedy action tensor([ 0.8215, -0.3827, -0.0422, -0.3288]) tensor([0.4907, 0.1472, 0.2069, 0.1553]) -Greedy action tensor([ 0.8739, -0.7319, -0.0280, -0.3139]) tensor([0.5232, 0.1050, 0.2123, 0.1595]) -Greedy action tensor([ 0.3775, -0.4251, -0.1694, -0.0599]) tensor([0.3742, 0.1677, 0.2165, 0.2416]) -Greedy action tensor([ 0.8536, -0.3997, -0.0401, -0.3728]) tensor([0.5030, 0.1436, 0.2058, 0.1476]) -Greedy action tensor([ 0.9780, -0.4173, -0.0418, -0.3044]) tensor([0.5303, 0.1314, 0.1913, 0.1471]) -Greedy action tensor([ 0.3504, -0.0302, -0.1117, -0.1042]) tensor([0.3392, 0.2318, 0.2137, 0.2153]) -Greedy action tensor([ 0.5759, -0.3514, -0.2260, -0.3346]) tensor([0.4452, 0.1761, 0.1996, 0.1791]) -Greedy action tensor([ 0.5481, -0.1142, 0.0882, -0.0407]) tensor([0.3701, 0.1908, 0.2337, 0.2054]) -Greedy action tensor([ 0.4337, 0.0873, -0.0143, -0.2002]) tensor([0.3476, 0.2458, 0.2221, 0.1844]) -Greedy action tensor([ 0.5099, -0.2169, -0.0326, -0.5342]) tensor([0.4138, 0.2000, 0.2405, 0.1457]) -Greedy action tensor([ 0.6990, 0.1845, -0.1767, -0.3980]) tensor([0.4259, 0.2546, 0.1774, 0.1422]) -Greedy action tensor([ 0.8568, -0.6396, 0.2012, -0.4560]) tensor([0.4970, 0.1113, 0.2580, 0.1337]) -Greedy action tensor([ 0.5443, 0.1657, -0.1127, -0.1308]) tensor([0.3687, 0.2525, 0.1911, 0.1877]) -Greedy action tensor([ 0.4173, 0.2629, -0.0783, 0.0333]) tensor([0.3177, 0.2723, 0.1936, 0.2164]) -Greedy action tensor([ 0.5378, -0.1397, 0.1385, -0.2901]) tensor([0.3823, 0.1942, 0.2565, 0.1671]) -Greedy action tensor([ 0.9383, -0.6290, -0.0373, -0.5203]) tensor([0.5500, 0.1147, 0.2073, 0.1279]) -Greedy action tensor([ 1.1436, -1.2089, -0.1323, -0.6652]) tensor([0.6501, 0.0618, 0.1815, 0.1065]) -Greedy action tensor([ 0.6963, -0.3768, -0.1391, -0.1830]) tensor([0.4565, 0.1561, 0.1980, 0.1895]) -Greedy action tensor([ 0.7459, -0.3750, -0.2078, -0.5113]) tensor([0.5011, 0.1633, 0.1931, 0.1425]) -Greedy action tensor([ 0.6836, 0.0763, -0.0300, -0.5239]) tensor([0.4285, 0.2335, 0.2099, 0.1281]) -Greedy action tensor([ 0.4802, -0.2540, -0.2025, -0.3428]) tensor([0.4125, 0.1980, 0.2084, 0.1811]) -Greedy action tensor([ 0.6485, -0.5534, -0.0245, -0.7245]) tensor([0.4845, 0.1456, 0.2472, 0.1227]) -Greedy action tensor([ 0.6001, -0.1269, -0.0791, -0.1016]) tensor([0.4022, 0.1944, 0.2039, 0.1994]) -Greedy action tensor([ 0.9350, -0.4864, -0.1202, -0.5506]) tensor([0.5507, 0.1329, 0.1917, 0.1247]) -Greedy action tensor([ 0.6236, -0.2989, -0.0268, -0.3929]) tensor([0.4384, 0.1743, 0.2287, 0.1586]) -Greedy action tensor([ 0.5934, -0.1989, -0.2259, -0.2866]) tensor([0.4332, 0.1961, 0.1909, 0.1797]) -Greedy action tensor([ 0.4979, -0.4370, -0.1203, -0.2918]) tensor([0.4192, 0.1646, 0.2259, 0.1903]) -Greedy action tensor([ 0.4489, -0.1593, -0.1080, -0.2542]) tensor([0.3828, 0.2084, 0.2193, 0.1895]) -Greedy action tensor([ 0.4040, 0.1451, -0.1518, -0.3650]) tensor([0.3560, 0.2748, 0.2042, 0.1650]) -Greedy action tensor([ 0.7529, -0.4256, -0.0107, -0.2605]) tensor([0.4680, 0.1440, 0.2181, 0.1699]) -Greedy action tensor([ 0.6928, -0.3024, -0.0496, -0.1708]) tensor([0.4411, 0.1630, 0.2099, 0.1860]) -Greedy action tensor([ 0.6003, -0.3344, 0.0553, -0.0712]) tensor([0.4027, 0.1581, 0.2335, 0.2057]) -Greedy action tensor([ 0.3971, -0.0441, -0.0290, -0.1727]) tensor([0.3494, 0.2248, 0.2282, 0.1976]) -Greedy action tensor([ 1.1377, -0.9781, 0.0295, -0.5086]) tensor([0.6085, 0.0733, 0.2009, 0.1173]) -Greedy action tensor([ 0.8323, -0.3518, -0.0394, -0.2305]) tensor([0.4831, 0.1478, 0.2021, 0.1669]) -Greedy action tensor([ 0.6454, -0.2093, 0.2989, -0.4826]) tensor([0.4071, 0.1732, 0.2879, 0.1318]) -Greedy action tensor([ 0.4771, -0.2685, -0.0786, -0.2625]) tensor([0.3960, 0.1879, 0.2272, 0.1890]) -Greedy action tensor([ 0.7952, -0.6066, -0.0482, -0.3521]) tensor([0.5015, 0.1235, 0.2158, 0.1592]) -Greedy action tensor([ 0.7580, -0.5051, -0.1053, -0.3600]) tensor([0.4923, 0.1392, 0.2076, 0.1609]) -Greedy action tensor([ 0.6476, -0.2962, -0.1519, -0.2455]) tensor([0.4448, 0.1731, 0.2000, 0.1821]) -Greedy action tensor([ 0.2688, -0.1993, -0.1655, -0.1532]) tensor([0.3413, 0.2137, 0.2211, 0.2238]) -Greedy action tensor([ 1.0307, -0.6178, 0.0506, -0.2635]) tensor([0.5430, 0.1044, 0.2038, 0.1488]) -Greedy action tensor([ 0.0663, -0.0770, -0.0552, -0.1206]) tensor([0.2792, 0.2419, 0.2473, 0.2316]) -Greedy action tensor([ 0.6911, -0.3743, -0.2438, -0.3624]) tensor([0.4794, 0.1652, 0.1882, 0.1672]) -Greedy action tensor([ 0.4881, -0.4566, 0.2711, -0.4435]) tensor([0.3864, 0.1503, 0.3111, 0.1522]) -Greedy action tensor([ 0.9153, -0.5705, -0.0036, -0.4300]) tensor([0.5303, 0.1200, 0.2116, 0.1381]) -Greedy action tensor([ 0.4022, 0.1754, -0.0746, -0.2769]) tensor([0.3419, 0.2725, 0.2122, 0.1734]) -Greedy action tensor([ 0.6811, -0.6146, 0.0303, -0.2834]) tensor([0.4595, 0.1258, 0.2397, 0.1751]) -Greedy action tensor([ 1.0930, -0.0074, 0.2133, -0.5018]) tensor([0.5127, 0.1706, 0.2127, 0.1040]) -Greedy action tensor([ 0.7107, -0.7981, -0.0899, -0.2539]) tensor([0.4875, 0.1078, 0.2189, 0.1858]) -Greedy action tensor([ 0.7548, -0.7337, 0.0186, -0.4978]) tensor([0.5024, 0.1134, 0.2406, 0.1436]) -Greedy action tensor([ 0.8937, -0.2476, -0.1994, -0.3554]) tensor([0.5151, 0.1645, 0.1726, 0.1477]) -Greedy action tensor([ 0.4597, -0.1835, -0.1234, -0.3538]) tensor([0.3957, 0.2080, 0.2209, 0.1754]) -Greedy action tensor([ 0.6268, -0.4704, -0.0038, -0.2487]) tensor([0.4381, 0.1462, 0.2332, 0.1825]) -Greedy action tensor([ 0.5508, -0.4691, -0.0377, -0.3649]) tensor([0.4318, 0.1557, 0.2397, 0.1728]) -Greedy action tensor([ 0.3133, 0.0757, -0.1000, -0.3424]) tensor([0.3368, 0.2656, 0.2228, 0.1748]) -Greedy action tensor([ 0.4252, -0.2045, -0.0777, -0.2679]) tensor([0.3791, 0.2020, 0.2293, 0.1896]) -Greedy action tensor([ 0.6425, -0.2297, 0.0217, -0.4415]) tensor([0.4360, 0.1822, 0.2343, 0.1475]) -Greedy action tensor([ 0.2654, -0.1427, -0.1027, -0.1832]) tensor([0.3338, 0.2220, 0.2310, 0.2132]) -Greedy action tensor([ 0.8012, -0.6004, -0.1736, -0.3490]) tensor([0.5155, 0.1269, 0.1945, 0.1632]) -Greedy action tensor([ 0.5802, -0.1117, -0.1933, -0.2398]) tensor([0.4163, 0.2084, 0.1920, 0.1833]) -Greedy action tensor([ 0.7004, -0.5686, -0.1243, -0.5272]) tensor([0.4969, 0.1397, 0.2178, 0.1456]) -Greedy action tensor([ 1.0270, -0.8881, 0.0650, -0.7217]) tensor([0.5870, 0.0865, 0.2243, 0.1021]) -Greedy action tensor([ 0.5702, -0.1146, -0.0133, -0.1865]) tensor([0.3950, 0.1992, 0.2204, 0.1854]) -Greedy action tensor([ 0.1476, -0.1001, -0.1476, -0.0605]) tensor([0.2997, 0.2339, 0.2231, 0.2434]) -Greedy action tensor([ 0.7788, -0.4259, -0.1332, -0.3247]) tensor([0.4918, 0.1474, 0.1976, 0.1632]) -Greedy action tensor([ 0.4804, -0.2420, -0.0166, -0.3712]) tensor([0.3967, 0.1926, 0.2413, 0.1693]) -Greedy action tensor([-0.6898, -1.8576, -0.0485, -0.4385]) tensor([0.2224, 0.0692, 0.4224, 0.2860]) -Greedy action tensor([0.6106, 0.2618, 0.0260, 1.0796]) tensor([0.2590, 0.1827, 0.1443, 0.4140]) -Greedy action tensor([ 0.1811, -0.8039, 0.8348, -0.0878]) tensor([0.2463, 0.0920, 0.4735, 0.1882]) -Greedy action tensor([ 0.7432, -0.3840, -0.5165, -0.0683]) tensor([0.4874, 0.1579, 0.1383, 0.2165]) -Greedy action tensor([-0.1657, -0.1371, 1.6615, -0.6089]) tensor([0.1125, 0.1158, 0.6995, 0.0722]) -Greedy action tensor([-1.4844, -0.9085, 1.1804, -1.1423]) tensor([0.0539, 0.0959, 0.7743, 0.0759]) -Greedy action tensor([ 1.5673, -1.0272, 1.6294, 0.8778]) tensor([0.3787, 0.0283, 0.4030, 0.1900]) -Greedy action tensor([-0.0920, -0.3673, -1.4137, -0.3198]) tensor([0.3543, 0.2691, 0.0945, 0.2821]) -Greedy action tensor([ 0.7878, 0.5405, 0.1442, -0.3545]) tensor([0.3809, 0.2974, 0.2001, 0.1215]) -Greedy action tensor([ 0.0512, -0.8189, -0.1767, -0.3772]) tensor([0.3488, 0.1461, 0.2777, 0.2273]) -Greedy action tensor([ 0.6022, -1.8718, 0.1964, 0.3571]) tensor([0.3947, 0.0333, 0.2631, 0.3089]) -Greedy action tensor([-0.2124, -0.5935, 1.4600, -0.8892]) tensor([0.1330, 0.0909, 0.7085, 0.0676]) -Greedy action tensor([ 0.5120, -0.3378, 0.5754, -0.1610]) tensor([0.3330, 0.1423, 0.3548, 0.1699]) -Greedy action tensor([ 1.0524, -0.7040, 0.9001, -0.2001]) tensor([0.4315, 0.0745, 0.3706, 0.1233]) -Greedy action tensor([-0.3396, -0.4583, -0.9534, -0.2789]) tensor([0.2864, 0.2543, 0.1550, 0.3043]) -Greedy action tensor([ 0.1374, -0.3903, -0.0759, -0.2871]) tensor([0.3276, 0.1933, 0.2647, 0.2143]) -Greedy action tensor([ 0.6169, 1.0085, 0.3546, -1.1815]) tensor([0.2929, 0.4333, 0.2253, 0.0485]) -Greedy action tensor([ 1.1756, -1.0095, 1.4435, 0.5520]) tensor([0.3383, 0.0380, 0.4423, 0.1813]) -Greedy action tensor([ 1.1952, -1.6500, -0.5712, 0.5856]) tensor([0.5641, 0.0328, 0.0964, 0.3066]) -Greedy action tensor([ 1.4806, -1.3032, 1.3169, 1.1736]) tensor([0.3779, 0.0234, 0.3208, 0.2780]) -Greedy action tensor([0.6552, 0.3284, 0.0901, 1.7336]) tensor([0.1912, 0.1379, 0.1087, 0.5622]) -Greedy action tensor([ 0.9225, -0.7334, -0.9574, 0.6673]) tensor([0.4721, 0.0901, 0.0720, 0.3658]) -Greedy action tensor([-0.0575, 0.0045, -0.0011, -0.8808]) tensor([0.2808, 0.2988, 0.2971, 0.1233]) -Greedy action tensor([-0.0326, -0.7259, 1.0487, 0.4475]) tensor([0.1649, 0.0824, 0.4862, 0.2665]) -Greedy action tensor([-0.5040, -2.8519, 0.4678, -0.2582]) tensor([0.1993, 0.0190, 0.5268, 0.2549]) -Greedy action tensor([-0.3273, -0.4778, 0.4023, 0.0732]) tensor([0.1843, 0.1585, 0.3822, 0.2750]) -Greedy action tensor([ 0.4338, -1.6165, -0.2181, 0.3834]) tensor([0.3845, 0.0495, 0.2004, 0.3656]) -Greedy action tensor([ 0.6678, 0.2866, -0.0779, 0.7826]) tensor([0.3050, 0.2083, 0.1447, 0.3421]) -Greedy action tensor([-0.8320, 0.2674, -0.1747, 0.2517]) tensor([0.1125, 0.3378, 0.2171, 0.3326]) -Greedy action tensor([-1.2235, -1.1275, -0.0710, 0.7887]) tensor([0.0785, 0.0864, 0.2484, 0.5868]) -Greedy action tensor([1.0358, 0.1501, 0.7456, 0.3516]) tensor([0.3752, 0.1548, 0.2807, 0.1893]) -Greedy action tensor([-0.1334, -0.1367, 2.3102, -0.5240]) tensor([0.0705, 0.0703, 0.8116, 0.0477]) -Greedy action tensor([ 1.1026, -0.5855, 0.3369, 0.1217]) tensor([0.4939, 0.0913, 0.2296, 0.1852]) -Greedy action tensor([-0.9090, -0.9909, 0.1919, 0.8200]) tensor([0.0947, 0.0872, 0.2846, 0.5335]) -Greedy action tensor([-1.4583, -0.6216, 1.7482, -0.4414]) tensor([0.0325, 0.0750, 0.8026, 0.0899]) -Greedy action tensor([ 0.3020, -1.3345, -0.5316, 0.9315]) tensor([0.2852, 0.0555, 0.1239, 0.5353]) -Greedy action tensor([-0.1943, 0.5050, -0.0505, -0.9760]) tensor([0.2162, 0.4351, 0.2497, 0.0990]) -Greedy action tensor([ 0.2199, -1.2376, -0.2210, 0.2349]) tensor([0.3459, 0.0805, 0.2225, 0.3511]) -Greedy action tensor([ 0.1459, 0.9322, -0.3813, 0.9153]) tensor([0.1682, 0.3693, 0.0993, 0.3631]) -Greedy action tensor([0.8137, 0.2401, 0.5073, 1.4470]) tensor([0.2390, 0.1347, 0.1760, 0.4503]) -Greedy action tensor([ 0.1809, 0.8927, -0.2415, 0.3053]) tensor([0.2072, 0.4223, 0.1358, 0.2347]) -Greedy action tensor([-0.4423, -0.7807, -0.0966, 0.8740]) tensor([0.1459, 0.1040, 0.2061, 0.5440]) -Greedy action tensor([-0.8136, -0.5024, 2.2069, -0.4079]) tensor([0.0410, 0.0560, 0.8414, 0.0616]) -Greedy action tensor([ 0.1544, -1.4650, -0.3702, 0.6925]) tensor([0.2855, 0.0565, 0.1690, 0.4890]) -Greedy action tensor([ 0.7951, 0.2417, -0.5048, -0.4319]) tensor([0.4671, 0.2686, 0.1273, 0.1369]) -Greedy action tensor([ 1.0944, -0.1814, 0.4123, -0.0352]) tensor([0.4744, 0.1325, 0.2398, 0.1533]) -Greedy action tensor([ 0.8093, -0.9898, 0.8082, 1.0736]) tensor([0.2885, 0.0477, 0.2881, 0.3757]) -Greedy action tensor([ 0.7390, 0.6776, 0.7684, -0.6046]) tensor([0.3095, 0.2911, 0.3187, 0.0807]) -Greedy action tensor([ 1.0036, -0.5829, 0.9688, 0.5946]) tensor([0.3528, 0.0722, 0.3407, 0.2343]) -Greedy action tensor([-0.9239, -0.0432, 0.7630, 0.5396]) tensor([0.0761, 0.1837, 0.4113, 0.3289]) -Greedy action tensor([ 1.0284, -0.7049, 0.8219, 1.0450]) tensor([0.3326, 0.0588, 0.2705, 0.3381]) -Greedy action tensor([-0.3710, -1.5575, 0.9019, 0.7392]) tensor([0.1264, 0.0386, 0.4514, 0.3836]) -Greedy action tensor([-0.1274, -0.0257, 0.5783, -0.2131]) tensor([0.1980, 0.2192, 0.4010, 0.1817]) -Greedy action tensor([-0.2612, -2.3582, 0.6671, -0.0179]) tensor([0.2029, 0.0249, 0.5134, 0.2588]) -Greedy action tensor([ 0.6621, -0.5343, -0.1439, 0.7249]) tensor([0.3554, 0.1074, 0.1587, 0.3784]) -Greedy action tensor([ 0.4505, -1.1280, -0.3575, 0.5528]) tensor([0.3624, 0.0747, 0.1615, 0.4014]) -Greedy action tensor([ 0.8755, 0.2041, 0.0823, -0.3866]) tensor([0.4452, 0.2275, 0.2014, 0.1260]) -Greedy action tensor([ 0.7425, -0.4733, 0.1694, 0.0926]) tensor([0.4198, 0.1244, 0.2366, 0.2191]) -Greedy action tensor([ 0.1336, 0.1842, 0.1288, -0.6242]) tensor([0.2844, 0.2992, 0.2831, 0.1333]) -Greedy action tensor([-0.3665, -0.4328, 0.4915, -0.5867]) tensor([0.1962, 0.1836, 0.4627, 0.1574]) -Greedy action tensor([-1.2275, -0.9851, 1.3310, -0.6438]) tensor([0.0589, 0.0750, 0.7605, 0.1056]) -Greedy action tensor([ 1.1052, -0.0279, -1.1320, 0.5410]) tensor([0.5006, 0.1612, 0.0534, 0.2848]) -Greedy action tensor([-0.8293, -0.7952, -0.3663, -1.1374]) tensor([0.2295, 0.2374, 0.3645, 0.1686]) -Greedy action tensor([ 1.5986, -0.6653, 0.5922, 0.5414]) tensor([0.5504, 0.0572, 0.2012, 0.1912]) -Greedy action tensor([-0.1823, 0.3787, -0.7981, -0.0087]) tensor([0.2231, 0.3910, 0.1205, 0.2654]) -Greedy action tensor([ 0.8634, -0.8760, -0.2577, 0.1567]) tensor([0.5013, 0.0880, 0.1634, 0.2473]) -Greedy action tensor([1.0902, 0.8302, 0.2243, 0.3269]) tensor([0.3762, 0.2901, 0.1583, 0.1754]) -Greedy action tensor([-0.4836, -0.7642, -1.1741, -0.4188]) tensor([0.3009, 0.2273, 0.1508, 0.3210]) -Greedy action tensor([ 0.9131, -1.0512, 1.2813, -0.3217]) tensor([0.3477, 0.0488, 0.5024, 0.1011]) -Greedy action tensor([ 1.0831, -1.7231, 2.3831, -0.4533]) tensor([0.2022, 0.0122, 0.7420, 0.0435]) -Greedy action tensor([ 0.1042, -1.8184, 0.1692, 0.6376]) tensor([0.2552, 0.0373, 0.2724, 0.4351]) -Greedy action tensor([ 0.7003, -0.1930, 0.7270, 0.4457]) tensor([0.3114, 0.1274, 0.3198, 0.2414]) -Greedy action tensor([ 0.6637, -0.7679, 0.7464, 0.2753]) tensor([0.3330, 0.0796, 0.3617, 0.2258]) -Greedy action tensor([ 1.3733, -0.5451, -0.0363, 0.8684]) tensor([0.5014, 0.0736, 0.1224, 0.3026]) -Greedy action tensor([ 0.3491, 0.7668, 0.5548, -0.2160]) tensor([0.2317, 0.3519, 0.2847, 0.1317]) -Greedy action tensor([-1.5680, -0.4112, -0.0255, -0.7738]) tensor([0.0903, 0.2873, 0.4225, 0.1999]) -Greedy action tensor([-0.0531, -1.6451, 0.5944, 0.6153]) tensor([0.1974, 0.0402, 0.3772, 0.3852]) -Greedy action tensor([ 0.9361, 0.6466, -0.4632, -0.3646]) tensor([0.4410, 0.3301, 0.1088, 0.1201]) -Greedy action tensor([-0.6437, -1.3511, -0.6598, 0.3558]) tensor([0.1925, 0.0949, 0.1895, 0.5231]) -Greedy action tensor([-0.0054, -0.8124, 1.5888, -0.3987]) tensor([0.1419, 0.0633, 0.6989, 0.0958]) -Greedy action tensor([ 0.4125, -1.6330, -0.7742, 0.1039]) tensor([0.4610, 0.0596, 0.1407, 0.3386]) -Greedy action tensor([-0.0561, 0.9059, -0.5301, 0.6914]) tensor([0.1574, 0.4120, 0.0980, 0.3325]) -Greedy action tensor([-0.8093, 0.2702, 0.2034, -0.1135]) tensor([0.1149, 0.3382, 0.3164, 0.2305]) -Greedy action tensor([-0.5961, -0.5505, 0.1334, 0.0718]) tensor([0.1647, 0.1724, 0.3416, 0.3212]) -Greedy action tensor([-1.8982, -0.4605, 0.6501, -0.1547]) tensor([0.0422, 0.1776, 0.5391, 0.2411]) -Greedy action tensor([-1.9042, -0.3995, 0.6489, -0.1422]) tensor([0.0414, 0.1863, 0.5314, 0.2409]) -Greedy action tensor([-1.9241, -0.4468, 0.6598, -0.1627]) tensor([0.0409, 0.1792, 0.5419, 0.2381]) -Greedy action tensor([-0.7131, -0.4250, 0.2094, -0.0120]) tensor([0.1457, 0.1943, 0.3664, 0.2936]) -Greedy action tensor([-1.4617, -0.5923, 0.5948, 0.4410]) tensor([0.0558, 0.1332, 0.4366, 0.3743]) -Greedy action tensor([-1.0364, -0.5629, 0.3228, 0.8086]) tensor([0.0780, 0.1252, 0.3035, 0.4933]) -Greedy action tensor([-1.2066, -0.4776, 0.4629, 0.5691]) tensor([0.0700, 0.1451, 0.3716, 0.4133]) -Greedy action tensor([-1.3002, -0.3941, 0.4066, 0.4899]) tensor([0.0668, 0.1652, 0.3680, 0.4000]) -Greedy action tensor([-1.8490, -0.3430, 0.6212, -0.1868]) tensor([0.0442, 0.1995, 0.5231, 0.2332]) -Greedy action tensor([-1.7704, -0.4874, 0.5923, -0.0857]) tensor([0.0485, 0.1750, 0.5151, 0.2615]) -Greedy action tensor([-1.8661, -0.3721, 0.6457, -0.1180]) tensor([0.0425, 0.1894, 0.5240, 0.2441]) -Greedy action tensor([-1.9304, -0.4444, 0.6634, -0.1674]) tensor([0.0406, 0.1794, 0.5433, 0.2367]) -Greedy action tensor([-1.6250, -0.0569, 0.4692, -0.0823]) tensor([0.0538, 0.2580, 0.4366, 0.2516]) -Greedy action tensor([-1.5402, -0.4347, 0.5105, -0.1167]) tensor([0.0627, 0.1894, 0.4875, 0.2604]) -Greedy action tensor([-1.7405, -0.5174, 0.5810, -0.0402]) tensor([0.0498, 0.1693, 0.5079, 0.2729]) -Greedy action tensor([-1.9193, -0.4237, 0.6501, -0.1701]) tensor([0.0412, 0.1839, 0.5380, 0.2369]) -Greedy action tensor([-1.8789, -0.3317, 0.6381, -0.1218]) tensor([0.0419, 0.1967, 0.5188, 0.2426]) -Greedy action tensor([-1.1815, 0.1407, 0.2628, 0.7039]) tensor([0.0642, 0.2408, 0.2721, 0.4229]) -Greedy action tensor([-1.8114, -0.4142, 0.5901, -0.0950]) tensor([0.0462, 0.1868, 0.5100, 0.2570]) -Greedy action tensor([-1.8994, -0.4329, 0.6369, -0.1520]) tensor([0.0422, 0.1828, 0.5329, 0.2421]) -Greedy action tensor([-1.8354, -0.3271, 0.6028, -0.0823]) tensor([0.0440, 0.1987, 0.5035, 0.2538]) -Greedy action tensor([-1.3480, -0.5356, 0.4249, 0.0945]) tensor([0.0748, 0.1685, 0.4403, 0.3164]) -Greedy action tensor([-1.7647e+00, -4.5297e-01, 5.7967e-01, 4.2617e-04]) tensor([0.0477, 0.1769, 0.4969, 0.2784]) -Greedy action tensor([-1.7957, -0.4980, 0.6073, -0.0523]) tensor([0.0467, 0.1708, 0.5158, 0.2667]) -Greedy action tensor([-1.8375, -0.2951, 0.5908, -0.1158]) tensor([0.0442, 0.2068, 0.5016, 0.2474]) -Greedy action tensor([-1.5262, -0.5231, 0.4906, -0.1412]) tensor([0.0656, 0.1790, 0.4932, 0.2622]) -Greedy action tensor([-1.0738, 0.0556, 0.4236, 0.5165]) tensor([0.0742, 0.2297, 0.3319, 0.3642]) -Greedy action tensor([-1.8221, -0.4203, 0.6497, -0.0182]) tensor([0.0435, 0.1768, 0.5154, 0.2643]) -Greedy action tensor([-1.5572, -0.4315, 0.5332, 0.1011]) tensor([0.0574, 0.1769, 0.4643, 0.3014]) -Greedy action tensor([-1.8996, -0.3830, 0.6359, -0.1401]) tensor([0.0417, 0.1899, 0.5262, 0.2422]) -Greedy action tensor([-1.7997, -0.1856, 0.5708, -0.0603]) tensor([0.0446, 0.2241, 0.4774, 0.2540]) -Greedy action tensor([-1.2775, -0.6285, 0.9997, 1.0864]) tensor([0.0429, 0.0821, 0.4185, 0.4564]) -Greedy action tensor([-1.9030, -0.3344, 0.6401, -0.1428]) tensor([0.0411, 0.1973, 0.5227, 0.2389]) -Greedy action tensor([-1.9438, -0.4585, 0.6754, -0.1775]) tensor([0.0400, 0.1767, 0.5492, 0.2341]) -Greedy action tensor([-1.8011, -0.4092, 1.0564, 0.7560]) tensor([0.0283, 0.1138, 0.4929, 0.3650]) -Greedy action tensor([-1.9189, -0.4559, 0.6618, -0.1544]) tensor([0.0410, 0.1773, 0.5420, 0.2396]) -Greedy action tensor([-1.3851, -0.5906, 0.4391, 0.0420]) tensor([0.0736, 0.1630, 0.4565, 0.3069]) -Greedy action tensor([-1.6294, 0.1110, 0.4374, 0.0914]) tensor([0.0495, 0.2823, 0.3913, 0.2768]) -Greedy action tensor([-1.8889, -0.4495, 0.6383, -0.1518]) tensor([0.0427, 0.1801, 0.5346, 0.2426]) -Greedy action tensor([-1.8407, -0.3622, 0.6002, -0.1386]) tensor([0.0447, 0.1962, 0.5137, 0.2454]) -Greedy action tensor([-1.7655, -0.2553, 0.5450, -0.0730]) tensor([0.0475, 0.2152, 0.4791, 0.2582]) -Greedy action tensor([-1.2026, 0.5029, 0.3750, 0.4024]) tensor([0.0613, 0.3372, 0.2967, 0.3049]) -Greedy action tensor([-0.9072, -0.4628, 1.0583, 1.3590]) tensor([0.0517, 0.0806, 0.3691, 0.4986]) -Greedy action tensor([-1.1795, -0.6915, 1.1530, 1.2833]) tensor([0.0405, 0.0660, 0.4176, 0.4758]) -Greedy action tensor([-1.1598, -0.1933, 0.1933, 0.0356]) tensor([0.0926, 0.2433, 0.3582, 0.3059]) -Greedy action tensor([-1.6297, -0.5085, 0.4945, 0.0038]) tensor([0.0570, 0.1748, 0.4765, 0.2917]) -Greedy action tensor([-1.8352, -0.4645, 0.6637, -0.0670]) tensor([0.0435, 0.1715, 0.5298, 0.2552]) -Greedy action tensor([-0.8636, -0.5341, 0.2791, 0.9796]) tensor([0.0844, 0.1174, 0.2647, 0.5334]) -Greedy action tensor([-1.8744, -0.4288, 0.6282, -0.1437]) tensor([0.0433, 0.1837, 0.5287, 0.2443]) -Greedy action tensor([-1.8804, -0.4150, 0.6425, -0.1343]) tensor([0.0425, 0.1840, 0.5298, 0.2437]) -Greedy action tensor([-1.8710, -0.3846, 0.6337, -0.1320]) tensor([0.0428, 0.1893, 0.5241, 0.2437]) -Greedy action tensor([-1.8003, -0.3320, 0.5596, -0.0937]) tensor([0.0466, 0.2025, 0.4939, 0.2570]) -Greedy action tensor([-0.5095, -0.5289, 0.1753, 0.0817]) tensor([0.1733, 0.1700, 0.3437, 0.3130]) -Greedy action tensor([-1.9157, -0.4229, 0.6450, -0.1655]) tensor([0.0414, 0.1843, 0.5360, 0.2383]) -Greedy action tensor([-1.8932, -0.4497, 0.6421, -0.1547]) tensor([0.0425, 0.1799, 0.5360, 0.2416]) -Greedy action tensor([-1.9094, -0.4690, 0.6554, -0.1527]) tensor([0.0416, 0.1758, 0.5413, 0.2412]) -Greedy action tensor([-1.6214, -0.4071, 0.5281, 0.0557]) tensor([0.0546, 0.1841, 0.4689, 0.2924]) -Greedy action tensor([-1.5401, -0.5141, 0.4276, 0.1056]) tensor([0.0620, 0.1730, 0.4436, 0.3214]) -Greedy action tensor([-1.9015, -0.4382, 0.6368, -0.1529]) tensor([0.0422, 0.1821, 0.5335, 0.2422]) -Greedy action tensor([-1.8076, -0.2504, 0.5992, -0.0865]) tensor([0.0446, 0.2115, 0.4947, 0.2492]) -Greedy action tensor([-1.9167, -0.4618, 0.6514, -0.1636]) tensor([0.0415, 0.1778, 0.5412, 0.2396]) -Greedy action tensor([-1.6998, -0.4964, 0.5233, -0.0166]) tensor([0.0528, 0.1758, 0.4874, 0.2840]) -Greedy action tensor([-1.4503, -0.5371, 0.6058, 0.5925]) tensor([0.0526, 0.1310, 0.4109, 0.4055]) -Greedy action tensor([-1.3527, 0.6295, 0.2321, 0.1318]) tensor([0.0570, 0.4136, 0.2780, 0.2514]) -Greedy action tensor([-1.9209, -0.4449, 0.6515, -0.1667]) tensor([0.0412, 0.1804, 0.5401, 0.2383]) -Greedy action tensor([-0.7804, -0.5666, 0.1832, 0.3218]) tensor([0.1271, 0.1574, 0.3330, 0.3825]) -Greedy action tensor([-1.7445, -0.3818, 0.5623, -0.0613]) tensor([0.0492, 0.1922, 0.4939, 0.2648]) -Greedy action tensor([-1.6667, -0.1967, 0.5144, 0.0058]) tensor([0.0512, 0.2227, 0.4534, 0.2727]) -Greedy action tensor([-1.7210, -0.4810, 0.7085, 0.1622]) tensor([0.0447, 0.1544, 0.5072, 0.2937]) -Greedy action tensor([-1.5354, -0.3155, 0.6339, 0.1405]) tensor([0.0541, 0.1832, 0.4736, 0.2891]) -Greedy action tensor([-1.2711, 0.4331, 0.2593, 0.0928]) tensor([0.0665, 0.3658, 0.3074, 0.2603]) -Greedy action tensor([-1.0314, -0.5984, 0.2866, 0.0518]) tensor([0.1083, 0.1670, 0.4047, 0.3200]) -Greedy action tensor([-1.8193, -0.4538, 0.6052, -0.1167]) tensor([0.0461, 0.1805, 0.5205, 0.2529]) -Greedy action tensor([-1.0151, -0.6093, 0.1985, 0.3171]) tensor([0.1036, 0.1554, 0.3486, 0.3925]) -Greedy action tensor([-0.9383, -0.4062, 0.4791, 0.9523]) tensor([0.0743, 0.1266, 0.3067, 0.4924]) -Greedy action tensor([-1.3414, -0.6011, 0.3965, 0.1885]) tensor([0.0746, 0.1565, 0.4243, 0.3446]) -Greedy action tensor([-1.6373, -0.5286, 0.5027, 0.0253]) tensor([0.0562, 0.1702, 0.4774, 0.2962]) -Greedy action tensor([-1.8555, -0.2774, 0.5961, -0.1476]) tensor([0.0435, 0.2110, 0.5053, 0.2402]) -Greedy action tensor([-1.2224, 0.2369, 0.3198, -0.0730]) tensor([0.0761, 0.3276, 0.3559, 0.2403]) -Greedy action tensor([ 1.0668, -0.5162, -0.1142, 0.3121]) tensor([0.5044, 0.1036, 0.1548, 0.2371]) -Greedy action tensor([ 1.7053, -0.4619, -0.7577, 0.2489]) tensor([0.6980, 0.0799, 0.0594, 0.1627]) -Greedy action tensor([ 0.9314, -0.1358, -0.1929, 0.2105]) tensor([0.4640, 0.1596, 0.1507, 0.2256]) -Greedy action tensor([ 1.2235, -0.7322, -0.3938, 0.7751]) tensor([0.5054, 0.0715, 0.1003, 0.3228]) -Greedy action tensor([ 1.9753, -0.5792, -0.2733, 0.5020]) tensor([0.7080, 0.0550, 0.0747, 0.1622]) -Greedy action tensor([ 1.2045, -0.4834, -0.0439, -0.2292]) tensor([0.5847, 0.1081, 0.1678, 0.1394]) -Greedy action tensor([ 0.8449, -0.2257, -0.4586, 0.3888]) tensor([0.4448, 0.1525, 0.1208, 0.2819]) -Greedy action tensor([ 1.0497, -0.3135, -0.3389, 0.4124]) tensor([0.4916, 0.1258, 0.1226, 0.2599]) -Greedy action tensor([ 0.5571, -0.0775, 0.0453, 0.2420]) tensor([0.3497, 0.1854, 0.2096, 0.2552]) -Greedy action tensor([ 1.2201, -0.3313, -0.3509, 0.1347]) tensor([0.5690, 0.1206, 0.1183, 0.1922]) -Greedy action tensor([ 1.5489, -0.4844, -0.2124, 0.1063]) tensor([0.6498, 0.0851, 0.1116, 0.1535]) -Greedy action tensor([ 1.7922, -0.3525, -0.5826, 0.1102]) tensor([0.7163, 0.0839, 0.0666, 0.1332]) -Greedy action tensor([ 1.2019, -0.6884, -0.2250, 0.1864]) tensor([0.5703, 0.0861, 0.1369, 0.2066]) -Greedy action tensor([ 1.1914, -0.3774, -0.3186, 0.4505]) tensor([0.5247, 0.1093, 0.1159, 0.2501]) -Greedy action tensor([ 1.3664, -0.6675, -0.3821, 0.5723]) tensor([0.5692, 0.0745, 0.0991, 0.2573]) -Greedy action tensor([ 1.2641, -0.4113, -0.2326, 0.4272]) tensor([0.5422, 0.1015, 0.1214, 0.2348]) -Greedy action tensor([ 0.9602, -0.6660, -0.1203, 0.4262]) tensor([0.4712, 0.0927, 0.1599, 0.2762]) -Greedy action tensor([ 0.8966, -0.2554, -0.0598, 0.5219]) tensor([0.4188, 0.1323, 0.1609, 0.2879]) -Greedy action tensor([ 0.9531, -0.3015, -0.0731, 0.1830]) tensor([0.4747, 0.1354, 0.1701, 0.2198]) -Greedy action tensor([ 1.5912, -0.7578, -0.1817, 0.7964]) tensor([0.5824, 0.0556, 0.0989, 0.2631]) -Greedy action tensor([ 1.0941, -0.2137, -0.4977, 0.3664]) tensor([0.5110, 0.1382, 0.1040, 0.2468]) -Greedy action tensor([ 1.6116, -0.2387, -0.5310, 0.3762]) tensor([0.6389, 0.1004, 0.0750, 0.1857]) -Greedy action tensor([ 1.5942, -0.1043, -0.4754, 0.4905]) tensor([0.6094, 0.1115, 0.0769, 0.2021]) -Greedy action tensor([ 1.0064, -0.5450, -0.0684, -0.0242]) tensor([0.5235, 0.1110, 0.1787, 0.1868]) -Greedy action tensor([ 0.1381, -0.0105, -0.5021, -0.0152]) tensor([0.3080, 0.2655, 0.1624, 0.2642]) -Greedy action tensor([ 1.2202, -0.4119, -0.2975, 0.2186]) tensor([0.5612, 0.1097, 0.1230, 0.2061]) -Greedy action tensor([ 2.0333, -0.9933, -0.4091, 0.5465]) tensor([0.7345, 0.0356, 0.0639, 0.1661]) -Greedy action tensor([ 1.7759, -0.6495, -0.3739, 0.3635]) tensor([0.6904, 0.0611, 0.0804, 0.1681]) -Greedy action tensor([ 1.3827, -0.2208, -0.4076, 0.4287]) tensor([0.5704, 0.1148, 0.0952, 0.2197]) -Greedy action tensor([ 1.5459, -0.5831, -0.3745, 0.0809]) tensor([0.6682, 0.0795, 0.0979, 0.1544]) -Greedy action tensor([ 0.8371, -0.6643, -0.0907, 0.3476]) tensor([0.4482, 0.0999, 0.1772, 0.2747]) -Greedy action tensor([ 1.6336, 0.1957, -0.5985, 0.4852]) tensor([0.6017, 0.1429, 0.0646, 0.1908]) -Greedy action tensor([ 0.8627, -0.4402, -0.0298, 0.2108]) tensor([0.4541, 0.1234, 0.1860, 0.2366]) -Greedy action tensor([ 0.6533, -0.0177, -0.2918, -0.2691]) tensor([0.4353, 0.2225, 0.1692, 0.1730]) -Greedy action tensor([ 1.6580, -0.4491, -0.3222, 0.3293]) tensor([0.6560, 0.0798, 0.0906, 0.1737]) -Greedy action tensor([ 1.2889, -0.3597, -0.6830, 0.6417]) tensor([0.5391, 0.1037, 0.0750, 0.2822]) -Greedy action tensor([ 1.2586, -0.5098, -0.4463, 0.2671]) tensor([0.5802, 0.0990, 0.1055, 0.2153]) -Greedy action tensor([ 1.3917, -0.0201, -0.3654, 0.3176]) tensor([0.5689, 0.1386, 0.0982, 0.1943]) -Greedy action tensor([ 0.5553, -0.2368, -0.3458, -0.2563]) tensor([0.4342, 0.1967, 0.1763, 0.1928]) -Greedy action tensor([ 0.8258, -0.3750, -0.0125, -0.0315]) tensor([0.4635, 0.1395, 0.2004, 0.1966]) -Greedy action tensor([ 2.1399, -1.2483, -0.4720, 0.9287]) tensor([0.7117, 0.0240, 0.0522, 0.2120]) -Greedy action tensor([ 1.6312, 0.0320, -0.2560, 0.3177]) tensor([0.6164, 0.1245, 0.0934, 0.1657]) -Greedy action tensor([ 1.5455, -0.5198, -0.1789, 0.0965]) tensor([0.6494, 0.0823, 0.1158, 0.1525]) -Greedy action tensor([ 1.7426, 0.1363, -0.0991, 0.2612]) tensor([0.6303, 0.1265, 0.0999, 0.1433]) -Greedy action tensor([ 1.1381, -0.1184, -0.5903, 0.7966]) tensor([0.4602, 0.1310, 0.0817, 0.3271]) -Greedy action tensor([ 1.6510, -0.3305, -0.1439, 0.5749]) tensor([0.6079, 0.0838, 0.1010, 0.2073]) -Greedy action tensor([ 2.0902, -1.0205, -0.1818, 0.6381]) tensor([0.7237, 0.0323, 0.0746, 0.1694]) -Greedy action tensor([ 1.1382, -0.2581, -0.3118, 0.5268]) tensor([0.4939, 0.1222, 0.1159, 0.2680]) -Greedy action tensor([ 1.5702, -0.2110, -0.4165, -0.1710]) tensor([0.6753, 0.1137, 0.0926, 0.1184]) -Greedy action tensor([ 0.3623, 0.0721, -0.0550, -0.3765]) tensor([0.3467, 0.2594, 0.2284, 0.1656]) -Greedy action tensor([ 2.1404, -0.2157, -0.5496, 0.4470]) tensor([0.7426, 0.0704, 0.0504, 0.1366]) -Greedy action tensor([ 1.2982, -0.8716, 0.1439, 0.3394]) tensor([0.5516, 0.0630, 0.1739, 0.2115]) -Greedy action tensor([ 0.6742, -0.4750, 0.1395, -0.0536]) tensor([0.4192, 0.1328, 0.2456, 0.2024]) -Greedy action tensor([ 0.9439, -0.5361, -0.2013, 0.4721]) tensor([0.4609, 0.1049, 0.1466, 0.2875]) -Greedy action tensor([ 1.4209, -0.4273, -0.3670, 0.4041]) tensor([0.5929, 0.0934, 0.0992, 0.2145]) -Greedy action tensor([ 1.5429, -0.2185, -0.6401, 0.3166]) tensor([0.6338, 0.1089, 0.0714, 0.1859]) -Greedy action tensor([ 1.6656, -0.1455, -0.8153, 0.3347]) tensor([0.6616, 0.1082, 0.0554, 0.1748]) -Greedy action tensor([ 1.2492, -0.2710, -0.1647, 0.3332]) tensor([0.5371, 0.1174, 0.1306, 0.2149]) -Greedy action tensor([ 0.8984, -0.3566, -0.2198, 0.1423]) tensor([0.4804, 0.1370, 0.1570, 0.2256]) -Greedy action tensor([ 1.6242, -0.0732, -0.2835, 0.4335]) tensor([0.6114, 0.1120, 0.0907, 0.1859]) -Greedy action tensor([ 1.1323, 0.0241, -0.3017, 0.3209]) tensor([0.4968, 0.1640, 0.1184, 0.2207]) -Greedy action tensor([ 1.0185, -0.2448, -0.4046, 0.1056]) tensor([0.5195, 0.1469, 0.1252, 0.2085]) -Greedy action tensor([ 1.2024, -0.3112, -0.1920, 0.2982]) tensor([0.5339, 0.1175, 0.1324, 0.2162]) -Greedy action tensor([ 1.4232, -0.3760, -0.2977, 0.4098]) tensor([0.5857, 0.0969, 0.1048, 0.2126]) -Greedy action tensor([ 0.8692, -0.3884, -0.2427, 0.3249]) tensor([0.4559, 0.1296, 0.1499, 0.2645]) -Greedy action tensor([ 1.2246, -0.4245, -0.4601, 0.4185]) tensor([0.5481, 0.1054, 0.1017, 0.2448]) -Greedy action tensor([ 0.9065, -0.5667, -0.3779, 0.7504]) tensor([0.4235, 0.0971, 0.1172, 0.3623]) -Greedy action tensor([ 1.4354, -0.7704, -0.5083, 0.8057]) tensor([0.5599, 0.0617, 0.0802, 0.2983]) -Greedy action tensor([ 1.2158, -0.5151, -0.3450, 0.2201]) tensor([0.5693, 0.1008, 0.1195, 0.2103]) -Greedy action tensor([ 1.6366, -0.8511, -0.1602, 0.5156]) tensor([0.6350, 0.0528, 0.1053, 0.2070]) -Greedy action tensor([ 1.6870, 0.3544, -0.1300, 0.1759]) tensor([0.6072, 0.1602, 0.0987, 0.1340]) -Greedy action tensor([ 1.1686, -0.1505, -0.0868, 0.0999]) tensor([0.5275, 0.1410, 0.1503, 0.1812]) -Greedy action tensor([ 0.7875, -0.0302, -0.0560, 0.0334]) tensor([0.4270, 0.1885, 0.1837, 0.2009]) -Greedy action tensor([ 1.3797, -0.1935, -0.1009, 0.1124]) tensor([0.5826, 0.1208, 0.1325, 0.1640]) -Greedy action tensor([ 1.2288, -0.5591, -0.2653, 0.1683]) tensor([0.5754, 0.0963, 0.1291, 0.1992]) -Greedy action tensor([ 2.6847, -1.5001, -0.2081, 0.5841]) tensor([0.8382, 0.0128, 0.0465, 0.1026]) -Greedy action tensor([ 2.5410, -1.2943, -0.4178, 0.9858]) tensor([0.7784, 0.0168, 0.0404, 0.1644]) -Greedy action tensor([ 2.1576, -0.8708, -0.5253, 0.5170]) tensor([0.7630, 0.0369, 0.0522, 0.1479]) -Greedy action tensor([ 0.9053, -0.3675, -0.0984, 0.1733]) tensor([0.4700, 0.1316, 0.1723, 0.2261]) -Greedy action tensor([ 1.4723, -0.4357, -0.0581, 0.4374]) tensor([0.5814, 0.0863, 0.1258, 0.2065]) -Greedy action tensor([ 1.1657, -0.0928, -0.5677, 0.6100]) tensor([0.4916, 0.1396, 0.0868, 0.2820]) -Greedy action tensor([ 0.8045, -0.4730, -0.0647, -0.5494]) tensor([0.5112, 0.1425, 0.2143, 0.1320]) -Greedy action tensor([ 1.2640, -0.9469, -0.0057, -0.7940]) tensor([0.6587, 0.0722, 0.1850, 0.0841]) -Greedy action tensor([ 0.7632, -0.5218, 0.1548, -0.6551]) tensor([0.4847, 0.1341, 0.2638, 0.1174]) -Greedy action tensor([ 1.0686, -0.8386, 0.0195, -0.6572]) tensor([0.5964, 0.0886, 0.2089, 0.1062]) -Greedy action tensor([ 0.6453, -0.4847, -0.1250, -0.2729]) tensor([0.4576, 0.1478, 0.2118, 0.1827]) -Greedy action tensor([ 0.9865, -0.4347, -0.0937, -0.3345]) tensor([0.5412, 0.1307, 0.1837, 0.1444]) -Greedy action tensor([ 0.7225, -0.6667, 0.0628, -0.4603]) tensor([0.4825, 0.1203, 0.2494, 0.1478]) -Greedy action tensor([ 0.7794, -0.5726, -0.3494, -0.5673]) tensor([0.5428, 0.1404, 0.1756, 0.1412]) -Greedy action tensor([ 7.4309e-01, -3.6909e-01, 3.6509e-04, -4.0575e-01]) tensor([0.4713, 0.1550, 0.2243, 0.1494]) -Greedy action tensor([ 0.7014, -0.4644, -0.0623, -0.2628]) tensor([0.4632, 0.1444, 0.2158, 0.1766]) -Greedy action tensor([ 0.8563, -0.7295, 0.0929, -0.4383]) tensor([0.5142, 0.1053, 0.2396, 0.1409]) -Greedy action tensor([ 0.5315, -0.7111, -0.1518, -0.2268]) tensor([0.4421, 0.1276, 0.2232, 0.2071]) -Greedy action tensor([ 0.2305, -0.1111, -0.1204, -0.1191]) tensor([0.3205, 0.2278, 0.2257, 0.2260]) -Greedy action tensor([ 0.6701, -0.6824, 0.0988, -0.3996]) tensor([0.4616, 0.1194, 0.2607, 0.1584]) -Greedy action tensor([0.2809, 0.4389, 0.1241, 0.3590]) tensor([0.2435, 0.2851, 0.2081, 0.2632]) -Greedy action tensor([ 0.4956, -0.0186, 0.1650, -0.5321]) tensor([0.3739, 0.2236, 0.2687, 0.1338]) -Greedy action tensor([ 0.6432, -0.2407, 0.0694, -0.0566]) tensor([0.4043, 0.1670, 0.2278, 0.2008]) -Greedy action tensor([ 0.5468, -0.1152, 0.0877, -0.2827]) tensor([0.3870, 0.1996, 0.2445, 0.1688]) -Greedy action tensor([ 0.6583, -0.6858, -0.1220, -0.3370]) tensor([0.4788, 0.1249, 0.2194, 0.1770]) -Greedy action tensor([ 0.4570, -0.1819, -0.0643, -0.1898]) tensor([0.3780, 0.1995, 0.2245, 0.1980]) -Greedy action tensor([ 0.6702, -0.5936, -0.1607, -0.3838]) tensor([0.4839, 0.1367, 0.2108, 0.1686]) -Greedy action tensor([ 0.7692, -0.3575, 0.0717, -0.3784]) tensor([0.4674, 0.1515, 0.2327, 0.1484]) -Greedy action tensor([ 0.9738, -0.4911, -0.0844, -0.3919]) tensor([0.5454, 0.1261, 0.1893, 0.1392]) -Greedy action tensor([ 0.2927, 0.1085, -0.0163, -0.2112]) tensor([0.3154, 0.2624, 0.2316, 0.1906]) -Greedy action tensor([ 1.0174, -0.9591, 0.0945, -0.3495]) tensor([0.5584, 0.0774, 0.2219, 0.1423]) -Greedy action tensor([ 0.2883, 0.1651, -0.0544, -0.1792]) tensor([0.3105, 0.2745, 0.2204, 0.1946]) -Greedy action tensor([ 0.8387, -0.8080, 0.0957, -0.6115]) tensor([0.5255, 0.1013, 0.2500, 0.1232]) -Greedy action tensor([ 0.7833, -0.5694, -0.0415, -0.3555]) tensor([0.4958, 0.1282, 0.2173, 0.1588]) -Greedy action tensor([ 0.1642, 0.0587, -0.1437, -0.3363]) tensor([0.3085, 0.2777, 0.2268, 0.1870]) -Greedy action tensor([ 0.6614, -0.4003, -0.1219, -0.4641]) tensor([0.4701, 0.1626, 0.2148, 0.1525]) -Greedy action tensor([ 0.6980, -0.5249, -0.0180, -0.3610]) tensor([0.4695, 0.1382, 0.2295, 0.1628]) -Greedy action tensor([ 0.6342, -0.5510, -0.0209, -0.2178]) tensor([0.4441, 0.1358, 0.2307, 0.1894]) -Greedy action tensor([ 0.7996, -0.8423, 0.0502, -0.3711]) tensor([0.5060, 0.0980, 0.2391, 0.1569]) -Greedy action tensor([ 0.5451, -0.5865, -0.1435, -0.3315]) tensor([0.4462, 0.1439, 0.2241, 0.1857]) -Greedy action tensor([ 0.9810, -0.8805, -0.1281, -0.6871]) tensor([0.5974, 0.0929, 0.1971, 0.1127]) -Greedy action tensor([ 0.7586, -0.5472, 0.0793, -0.4135]) tensor([0.4790, 0.1298, 0.2429, 0.1484]) -Greedy action tensor([ 0.8095, -0.4075, -0.0837, -0.2821]) tensor([0.4899, 0.1451, 0.2006, 0.1645]) -Greedy action tensor([ 0.8398, -0.5634, -0.0098, -0.2876]) tensor([0.5007, 0.1231, 0.2141, 0.1622]) -Greedy action tensor([ 0.5981, -0.4105, 0.0541, -0.5311]) tensor([0.4408, 0.1608, 0.2559, 0.1425]) -Greedy action tensor([ 0.6264, -0.3371, 0.2827, -0.4860]) tensor([0.4133, 0.1577, 0.2931, 0.1359]) -Greedy action tensor([ 0.5738, -0.6469, -0.0523, -0.3336]) tensor([0.4478, 0.1321, 0.2394, 0.1807]) -Greedy action tensor([ 0.3566, 0.1744, -0.1542, 0.0134]) tensor([0.3182, 0.2652, 0.1909, 0.2257]) -Greedy action tensor([ 0.2043, -0.0851, -0.0809, 0.0618]) tensor([0.2969, 0.2223, 0.2233, 0.2575]) -Greedy action tensor([ 0.5182, -0.5892, 0.0766, -0.7600]) tensor([0.4441, 0.1467, 0.2855, 0.1237]) -Greedy action tensor([ 1.0817, -0.7519, -0.0256, -0.7730]) tensor([0.6072, 0.0971, 0.2007, 0.0950]) -Greedy action tensor([ 1.0184, -0.7763, 0.1681, -0.6381]) tensor([0.5605, 0.0931, 0.2395, 0.1069]) -Greedy action tensor([ 0.7491, -0.3968, -0.0845, -0.2365]) tensor([0.4705, 0.1496, 0.2044, 0.1756]) -Greedy action tensor([ 0.4690, -0.2747, 0.1661, -0.3329]) tensor([0.3756, 0.1785, 0.2774, 0.1684]) -Greedy action tensor([ 0.5250, -0.1984, -0.0016, -0.1766]) tensor([0.3889, 0.1886, 0.2297, 0.1928]) -Greedy action tensor([ 0.7027, -0.3124, 0.0645, -0.2392]) tensor([0.4385, 0.1589, 0.2316, 0.1710]) -Greedy action tensor([ 0.5083, -0.3066, -0.0262, -0.3269]) tensor([0.4061, 0.1798, 0.2380, 0.1762]) -Greedy action tensor([ 0.5988, -0.3731, -0.0466, -0.3584]) tensor([0.4373, 0.1655, 0.2293, 0.1679]) -Greedy action tensor([ 0.7846, -0.5820, -0.0998, -0.4931]) tensor([0.5137, 0.1310, 0.2121, 0.1432]) -Greedy action tensor([ 0.3339, 0.0479, -0.1694, -0.0372]) tensor([0.3283, 0.2467, 0.1985, 0.2265]) -Greedy action tensor([ 0.3591, -0.3542, -0.0642, -0.0531]) tensor([0.3563, 0.1746, 0.2333, 0.2359]) -Greedy action tensor([ 1.0634, -0.7924, -0.0207, -0.6652]) tensor([0.5981, 0.0935, 0.2023, 0.1062]) -Greedy action tensor([ 0.6872, -0.3730, -0.0635, -0.3369]) tensor([0.4592, 0.1591, 0.2168, 0.1649]) -Greedy action tensor([ 0.6820, -0.4982, 0.0711, -0.5531]) tensor([0.4671, 0.1435, 0.2536, 0.1358]) -Greedy action tensor([ 0.3030, 0.0179, -0.1334, -0.0229]) tensor([0.3205, 0.2410, 0.2072, 0.2314]) -Greedy action tensor([ 0.7277, -0.4386, -0.0907, -0.2470]) tensor([0.4695, 0.1463, 0.2071, 0.1771]) -Greedy action tensor([ 0.8412, -0.5287, -0.0145, -0.2669]) tensor([0.4977, 0.1265, 0.2115, 0.1643]) -Greedy action tensor([ 0.6585, -0.5553, 0.0254, -0.6903]) tensor([0.4790, 0.1423, 0.2543, 0.1243]) -Greedy action tensor([ 0.7732, -0.1043, 0.0444, -0.4478]) tensor([0.4560, 0.1896, 0.2200, 0.1345]) -Greedy action tensor([ 0.5982, -0.3583, 0.0139, -0.2960]) tensor([0.4254, 0.1635, 0.2372, 0.1740]) -Greedy action tensor([ 0.3688, -0.2227, -0.1428, -0.2534]) tensor([0.3718, 0.2058, 0.2229, 0.1996]) -Greedy action tensor([ 0.8108, -0.5814, -0.0062, -0.4119]) tensor([0.5039, 0.1252, 0.2226, 0.1483]) -Greedy action tensor([ 0.5192, -0.2785, -0.1717, -0.2577]) tensor([0.4147, 0.1868, 0.2078, 0.1907]) -Greedy action tensor([ 0.9666, -1.2109, -0.0290, -0.7629]) tensor([0.6023, 0.0683, 0.2226, 0.1068]) -Greedy action tensor([ 1.2433, -1.1729, 0.1365, -0.7245]) tensor([0.6412, 0.0572, 0.2120, 0.0896]) -Greedy action tensor([ 0.3804, 0.4277, -0.2134, -0.1258]) tensor([0.3122, 0.3273, 0.1724, 0.1882]) -Greedy action tensor([ 0.8518, -1.1384, -0.0827, -0.5563]) tensor([0.5637, 0.0770, 0.2214, 0.1379]) -Greedy action tensor([ 0.8758, -0.4715, 0.0599, -0.5316]) tensor([0.5136, 0.1335, 0.2271, 0.1257]) -Greedy action tensor([ 0.7863, -0.5531, -0.1375, -0.4632]) tensor([0.5140, 0.1347, 0.2041, 0.1473]) -Greedy action tensor([ 0.3240, -0.2370, -0.2040, -0.0533]) tensor([0.3513, 0.2005, 0.2072, 0.2409]) -Greedy action tensor([ 0.9397, -0.3149, 0.0316, -0.4865]) tensor([0.5185, 0.1479, 0.2091, 0.1245]) -Greedy action tensor([ 0.9151, -0.5380, -0.1393, -0.3052]) tensor([0.5327, 0.1246, 0.1856, 0.1572]) -Greedy action tensor([ 0.4350, -0.0349, -0.3137, -0.0365]) tensor([0.3674, 0.2296, 0.1738, 0.2293]) -Greedy action tensor([ 0.8978, -0.7592, 0.1265, -0.4680]) tensor([0.5240, 0.0999, 0.2423, 0.1337]) -Greedy action tensor([ 0.7165, -0.6914, -0.1040, -0.4290]) tensor([0.4993, 0.1221, 0.2198, 0.1588]) -Greedy action tensor([ 0.3781, 0.1034, -0.1994, -0.1412]) tensor([0.3429, 0.2605, 0.1925, 0.2040]) -Greedy action tensor([ 0.3340, -0.0816, -0.0876, -0.1225]) tensor([0.3390, 0.2238, 0.2224, 0.2148]) -Greedy action tensor([-0.1696, -0.9336, -0.9629, 0.2332]) tensor([0.2929, 0.1364, 0.1325, 0.4382]) -Greedy action tensor([ 1.3805, 0.0577, -0.9009, 1.7302]) tensor([0.3588, 0.0956, 0.0366, 0.5090]) -Greedy action tensor([ 1.1091, -0.0725, -0.2286, -0.1115]) tensor([0.5364, 0.1646, 0.1408, 0.1583]) -Greedy action tensor([-0.0722, -0.2649, -0.3735, 0.2552]) tensor([0.2531, 0.2087, 0.1872, 0.3511]) -Greedy action tensor([-0.1642, 0.7539, 1.0876, -0.1132]) tensor([0.1242, 0.3110, 0.4342, 0.1307]) -Greedy action tensor([-0.8715, -0.4865, -0.0169, -0.2220]) tensor([0.1485, 0.2182, 0.3490, 0.2843]) -Greedy action tensor([ 1.0193, -0.7572, -0.2406, 0.4798]) tensor([0.4912, 0.0831, 0.1393, 0.2864]) -Greedy action tensor([-0.3028, 0.4402, 0.0757, 0.5340]) tensor([0.1455, 0.3060, 0.2125, 0.3360]) -Greedy action tensor([ 2.0809, -0.4675, 0.3186, 1.0976]) tensor([0.6158, 0.0482, 0.1057, 0.2303]) -Greedy action tensor([ 0.1889, -0.3105, -0.5276, 0.5748]) tensor([0.2804, 0.1702, 0.1370, 0.4125]) -Greedy action tensor([-0.7219, 0.0561, 1.7523, -1.0012]) tensor([0.0633, 0.1377, 0.7511, 0.0479]) -Greedy action tensor([-0.3234, 0.5596, 1.6753, -1.0714]) tensor([0.0887, 0.2146, 0.6547, 0.0420]) -Greedy action tensor([ 0.6279, -1.3519, -0.0121, 0.0965]) tensor([0.4438, 0.0613, 0.2340, 0.2609]) -Greedy action tensor([ 0.4054, -0.4764, -0.5938, 1.0951]) tensor([0.2649, 0.1097, 0.0975, 0.5279]) -Greedy action tensor([ 0.3800, -0.8872, 2.0037, 0.3486]) tensor([0.1366, 0.0385, 0.6926, 0.1323]) -Greedy action tensor([ 0.5413, 0.8363, -0.0020, 0.2855]) tensor([0.2704, 0.3632, 0.1570, 0.2094]) -Greedy action tensor([ 1.5069, -0.9821, 0.7126, 0.6780]) tensor([0.5072, 0.0421, 0.2292, 0.2214]) -Greedy action tensor([ 0.6694, -1.2250, 0.8301, 0.7735]) tensor([0.2912, 0.0438, 0.3419, 0.3231]) -Greedy action tensor([ 0.6494, 1.3207, 0.2064, -0.0842]) tensor([0.2451, 0.4797, 0.1574, 0.1177]) -Greedy action tensor([0.2462, 0.5303, 0.2264, 0.5402]) tensor([0.2150, 0.2857, 0.2108, 0.2885]) -Greedy action tensor([-0.6070, -0.7220, -0.5835, 0.4143]) tensor([0.1757, 0.1566, 0.1799, 0.4878]) -Greedy action tensor([-0.1676, -0.8964, -0.3182, 1.0475]) tensor([0.1750, 0.0844, 0.1506, 0.5900]) -Greedy action tensor([ 0.4963, -0.1896, -0.0090, 1.6170]) tensor([0.1933, 0.0973, 0.1166, 0.5928]) -Greedy action tensor([ 0.2767, -1.3702, 1.3963, -0.5165]) tensor([0.2124, 0.0409, 0.6506, 0.0961]) -Greedy action tensor([ 0.5549, 0.3641, -0.0391, 1.2854]) tensor([0.2245, 0.1855, 0.1239, 0.4661]) -Greedy action tensor([-0.4814, 0.3477, 0.2007, -0.2184]) tensor([0.1522, 0.3487, 0.3011, 0.1980]) -Greedy action tensor([-0.7475, -2.1606, -0.1817, 1.0225]) tensor([0.1127, 0.0274, 0.1984, 0.6615]) -Greedy action tensor([-1.5929, -0.1413, 0.7495, -1.2841]) tensor([0.0587, 0.2506, 0.6108, 0.0799]) -Greedy action tensor([ 1.3006, -1.1119, 0.8835, 0.7214]) tensor([0.4331, 0.0388, 0.2854, 0.2427]) -Greedy action tensor([ 0.4996, -0.6315, -0.1952, 0.8950]) tensor([0.3024, 0.0976, 0.1510, 0.4491]) -Greedy action tensor([ 0.1333, -1.3431, 0.3949, 0.1115]) tensor([0.2852, 0.0652, 0.3705, 0.2791]) -Greedy action tensor([-0.4706, -0.1177, 1.4837, -0.3045]) tensor([0.0938, 0.1335, 0.6620, 0.1107]) -Greedy action tensor([ 1.1406, 0.1843, -0.5198, 1.3101]) tensor([0.3624, 0.1393, 0.0689, 0.4294]) -Greedy action tensor([ 0.4976, -0.5945, 0.5980, 0.6761]) tensor([0.2750, 0.0923, 0.3040, 0.3287]) -Greedy action tensor([-1.9596, -2.0431, 2.4123, -0.9081]) tensor([0.0119, 0.0110, 0.9431, 0.0341]) -Greedy action tensor([-0.0964, 0.6130, 0.6534, -0.9895]) tensor([0.1799, 0.3657, 0.3808, 0.0736]) -Greedy action tensor([-0.3415, -0.4428, -0.5057, 0.4117]) tensor([0.2051, 0.1853, 0.1740, 0.4356]) -Greedy action tensor([ 1.3216, -1.8173, 1.1530, 0.9977]) tensor([0.3829, 0.0166, 0.3235, 0.2770]) -Greedy action tensor([ 0.8823, -1.4254, 0.3259, 1.6052]) tensor([0.2679, 0.0266, 0.1536, 0.5519]) -Greedy action tensor([ 0.3394, 0.3243, 0.1047, -0.2877]) tensor([0.3021, 0.2976, 0.2389, 0.1614]) -Greedy action tensor([ 0.2891, 0.7194, -0.3712, 0.1949]) tensor([0.2522, 0.3879, 0.1303, 0.2296]) -Greedy action tensor([ 1.6284, -0.2742, 1.4624, 0.4405]) tensor([0.4346, 0.0648, 0.3681, 0.1325]) -Greedy action tensor([ 0.1524, 0.5612, 0.3302, -0.8709]) tensor([0.2464, 0.3708, 0.2943, 0.0885]) -Greedy action tensor([ 0.7455, -0.6892, 1.9539, -0.3568]) tensor([0.2033, 0.0484, 0.6807, 0.0675]) -Greedy action tensor([-0.9806, -2.5244, 0.4763, -0.0537]) tensor([0.1245, 0.0266, 0.5344, 0.3145]) -Greedy action tensor([ 0.6043, -0.6238, -0.4896, 1.5421]) tensor([0.2391, 0.0700, 0.0801, 0.6108]) -Greedy action tensor([-0.4529, -0.9495, -0.7558, 0.2767]) tensor([0.2262, 0.1376, 0.1671, 0.4691]) -Greedy action tensor([ 0.5286, -0.8444, -0.4058, 0.9757]) tensor([0.3115, 0.0789, 0.1224, 0.4872]) -Greedy action tensor([-0.2374, -1.1502, -0.8525, -0.3230]) tensor([0.3497, 0.1404, 0.1890, 0.3210]) -Greedy action tensor([-0.1611, -0.5779, -0.1126, 0.0952]) tensor([0.2499, 0.1647, 0.2624, 0.3230]) -Greedy action tensor([ 0.4927, -0.5354, 0.1903, 0.0301]) tensor([0.3668, 0.1312, 0.2711, 0.2310]) -Greedy action tensor([ 1.1364, -0.6627, -0.4375, 0.6113]) tensor([0.5091, 0.0842, 0.1055, 0.3011]) -Greedy action tensor([ 0.2148, -0.1217, 0.4696, 1.0851]) tensor([0.1855, 0.1325, 0.2393, 0.4428]) -Greedy action tensor([ 0.5137, -0.9589, 0.4686, 1.5906]) tensor([0.1953, 0.0448, 0.1867, 0.5733]) -Greedy action tensor([-1.1670, -0.6343, 1.0466, -1.3338]) tensor([0.0788, 0.1342, 0.7204, 0.0667]) -Greedy action tensor([ 0.4967, -0.1273, 0.8043, -0.0065]) tensor([0.2857, 0.1531, 0.3886, 0.1727]) -Greedy action tensor([ 0.0314, 0.1543, -0.2079, 1.1722]) tensor([0.1654, 0.1870, 0.1302, 0.5175]) -Greedy action tensor([1.0360, 0.3458, 0.1353, 0.3402]) tensor([0.4155, 0.2084, 0.1688, 0.2072]) -Greedy action tensor([-0.0680, -0.4378, -0.8151, 0.5900]) tensor([0.2442, 0.1687, 0.1157, 0.4715]) -Greedy action tensor([-1.0796, 0.2976, 1.0635, -0.3623]) tensor([0.0644, 0.2551, 0.5487, 0.1319]) -Greedy action tensor([ 0.9956, -1.6996, 0.8304, 0.2518]) tensor([0.4183, 0.0282, 0.3546, 0.1988]) -Greedy action tensor([-0.3957, -1.0326, 0.0908, 0.3640]) tensor([0.1889, 0.0999, 0.3073, 0.4039]) -Greedy action tensor([ 1.1498, -0.1421, 0.7419, 0.6293]) tensor([0.3946, 0.1084, 0.2625, 0.2345]) -Greedy action tensor([-0.1054, -1.6297, 2.6006, 0.0684]) tensor([0.0575, 0.0125, 0.8614, 0.0685]) -Greedy action tensor([1.6138, 0.2354, 1.4878, 0.6230]) tensor([0.3992, 0.1006, 0.3520, 0.1482]) -Greedy action tensor([ 1.3484, -1.1057, -0.5205, 0.8518]) tensor([0.5409, 0.0465, 0.0835, 0.3292]) -Greedy action tensor([ 0.4251, -1.9012, -0.6170, 0.1617]) tensor([0.4507, 0.0440, 0.1590, 0.3463]) -Greedy action tensor([2.0749, 0.1413, 0.8423, 0.7050]) tensor([0.5916, 0.0856, 0.1725, 0.1503]) -Greedy action tensor([-1.0441, -0.2289, 0.6071, -0.9347]) tensor([0.1043, 0.2357, 0.5437, 0.1164]) -Greedy action tensor([ 1.6513, -0.5031, 0.7605, -0.1142]) tensor([0.5891, 0.0683, 0.2418, 0.1008]) -Greedy action tensor([ 1.0095, 0.7150, 1.2494, -0.2741]) tensor([0.3037, 0.2262, 0.3860, 0.0841]) -Greedy action tensor([ 0.1997, -0.2422, -0.5221, 1.0119]) tensor([0.2282, 0.1467, 0.1109, 0.5142]) -Greedy action tensor([ 0.3665, -0.7149, 1.0925, 0.3114]) tensor([0.2298, 0.0779, 0.4749, 0.2174]) -Greedy action tensor([-1.0219, 0.4292, -0.1734, 0.3286]) tensor([0.0872, 0.3723, 0.2038, 0.3367]) -Greedy action tensor([ 0.7076, -1.8411, -0.0803, 1.1309]) tensor([0.3268, 0.0255, 0.1486, 0.4990]) -Greedy action tensor([ 0.1443, -0.9469, 0.7753, -0.6566]) tensor([0.2729, 0.0916, 0.5129, 0.1225]) -Greedy action tensor([-0.7640, -1.1509, -0.2354, -0.0225]) tensor([0.1827, 0.1241, 0.3099, 0.3834]) -Greedy action tensor([ 0.1976, -1.2919, -0.5609, -0.3133]) tensor([0.4360, 0.0983, 0.2042, 0.2615]) -Greedy action tensor([ 0.9808, -0.4734, -0.0741, 1.6887]) tensor([0.2769, 0.0647, 0.0964, 0.5620]) -Greedy action tensor([ 0.9454, 0.3965, -0.1879, 0.3580]) tensor([0.4073, 0.2352, 0.1311, 0.2263]) -Greedy action tensor([ 0.7445, 0.0897, 1.7034, -0.7655]) tensor([0.2299, 0.1195, 0.5998, 0.0508]) -Greedy action tensor([ 1.1727, -0.1932, 1.2211, 0.5966]) tensor([0.3488, 0.0890, 0.3661, 0.1961]) -Greedy action tensor([-1.9267, -0.4560, 0.6650, -0.1643]) tensor([0.0408, 0.1774, 0.5443, 0.2375]) -Greedy action tensor([-1.9252, -0.4451, 0.6550, -0.1677]) tensor([0.0410, 0.1801, 0.5412, 0.2377]) -Greedy action tensor([-1.1550, -0.6416, 0.9941, 1.1928]) tensor([0.0461, 0.0770, 0.3951, 0.4819]) -Greedy action tensor([-1.0118, -0.5880, 0.4556, -0.2389]) tensor([0.1107, 0.1691, 0.4803, 0.2398]) -Greedy action tensor([-1.8934, -0.4683, 0.6869, -0.0779]) tensor([0.0408, 0.1697, 0.5387, 0.2507]) -Greedy action tensor([-1.7970, -0.2594, 0.6076, -0.0505]) tensor([0.0445, 0.2072, 0.4930, 0.2553]) -Greedy action tensor([-1.8123, -0.4288, 0.6723, -0.0560]) tensor([0.0439, 0.1751, 0.5267, 0.2543]) -Greedy action tensor([-1.9272, -0.4227, 0.6555, -0.1848]) tensor([0.0409, 0.1842, 0.5413, 0.2336]) -Greedy action tensor([-1.3367, 0.4958, 0.2844, 0.0095]) tensor([0.0619, 0.3869, 0.3132, 0.2379]) -Greedy action tensor([-1.8959, -0.3855, 0.6396, -0.1375]) tensor([0.0417, 0.1890, 0.5269, 0.2423]) -Greedy action tensor([-1.8564, -0.4052, 0.6230, -0.1081]) tensor([0.0436, 0.1860, 0.5201, 0.2504]) -Greedy action tensor([-1.7601, -0.3366, 0.6424, -0.0139]) tensor([0.0456, 0.1893, 0.5038, 0.2613]) -Greedy action tensor([-0.8514, -0.4663, 0.2138, 0.0647]) tensor([0.1270, 0.1867, 0.3686, 0.3176]) -Greedy action tensor([-1.3918, 0.4943, 0.2366, 0.2274]) tensor([0.0564, 0.3717, 0.2873, 0.2846]) -Greedy action tensor([-1.7868, -0.4790, 0.5901, -0.1276]) tensor([0.0483, 0.1784, 0.5198, 0.2536]) -Greedy action tensor([-1.3522, -0.6006, 0.3502, 0.1603]) tensor([0.0761, 0.1613, 0.4174, 0.3452]) -Greedy action tensor([-1.4001, -0.7415, 1.0470, 1.0015]) tensor([0.0392, 0.0757, 0.4526, 0.4325]) -Greedy action tensor([-1.7843, -0.1736, 0.5556, -0.1098]) tensor([0.0460, 0.2305, 0.4778, 0.2457]) -Greedy action tensor([-0.5319, -0.9408, 0.7890, -0.0134]) tensor([0.1410, 0.0937, 0.5284, 0.2369]) -Greedy action tensor([-1.5713, 0.0037, 0.5031, 0.1411]) tensor([0.0517, 0.2499, 0.4117, 0.2867]) -Greedy action tensor([-1.8338, -0.5131, 0.6561, -0.1013]) tensor([0.0445, 0.1668, 0.5369, 0.2518]) -Greedy action tensor([-1.9172, -0.4206, 0.6588, -0.1572]) tensor([0.0409, 0.1829, 0.5382, 0.2380]) -Greedy action tensor([-1.6390, -0.0035, 0.4419, 0.0203]) tensor([0.0515, 0.2646, 0.4130, 0.2709]) -Greedy action tensor([-1.3839, -0.5211, 0.3758, 0.2690]) tensor([0.0694, 0.1645, 0.4035, 0.3626]) -Greedy action tensor([-1.9059, -0.4418, 0.6445, -0.1612]) tensor([0.0419, 0.1812, 0.5370, 0.2399]) -Greedy action tensor([-1.5885, 0.1859, 0.4282, -0.0416]) tensor([0.0523, 0.3086, 0.3932, 0.2458]) -Greedy action tensor([-1.8802, -0.4497, 0.6364, -0.1390]) tensor([0.0430, 0.1797, 0.5323, 0.2451]) -Greedy action tensor([-1.7301, -0.4535, 0.6457, 0.1276]) tensor([0.0460, 0.1648, 0.4946, 0.2946]) -Greedy action tensor([-1.8004, -0.3309, 0.5778, -0.1124]) tensor([0.0464, 0.2018, 0.5007, 0.2511]) -Greedy action tensor([-1.7115, -0.5796, 0.5425, -0.0324]) tensor([0.0527, 0.1633, 0.5017, 0.2823]) -Greedy action tensor([-1.8708, -0.4231, 0.6263, -0.1343]) tensor([0.0433, 0.1843, 0.5264, 0.2460]) -Greedy action tensor([-0.8989, 0.1543, 0.2952, 1.1285]) tensor([0.0677, 0.1942, 0.2236, 0.5145]) -Greedy action tensor([-1.7414, -0.4319, 0.5669, -0.0779]) tensor([0.0499, 0.1849, 0.5019, 0.2634]) -Greedy action tensor([-1.5323, -0.5139, 0.5521, -0.0641]) tensor([0.0619, 0.1714, 0.4978, 0.2688]) -Greedy action tensor([-1.2691, -0.5630, 0.3019, 0.2217]) tensor([0.0814, 0.1650, 0.3919, 0.3617]) -Greedy action tensor([-1.7630, -0.4714, 0.5777, -0.1127]) tensor([0.0494, 0.1798, 0.5134, 0.2574]) -Greedy action tensor([-1.3129, -0.6186, 0.3415, 0.2037]) tensor([0.0782, 0.1566, 0.4089, 0.3563]) -Greedy action tensor([-1.6627, -0.4888, 0.6842, 0.2289]) tensor([0.0469, 0.1517, 0.4903, 0.3110]) -Greedy action tensor([-1.8000, -0.4495, 0.6228, -0.0652]) tensor([0.0459, 0.1770, 0.5172, 0.2599]) -Greedy action tensor([-0.7957, 0.5960, 0.1627, -0.0820]) tensor([0.1034, 0.4159, 0.2696, 0.2111]) -Greedy action tensor([-1.9084, -0.4253, 0.8096, 0.1961]) tensor([0.0348, 0.1532, 0.5268, 0.2852]) -Greedy action tensor([-0.4985, -0.1761, 0.6837, 1.3674]) tensor([0.0826, 0.1141, 0.2695, 0.5339]) -Greedy action tensor([-1.9290, -0.4005, 0.6494, -0.1737]) tensor([0.0407, 0.1877, 0.5362, 0.2354]) -Greedy action tensor([-1.9358, -0.4499, 0.6647, -0.1746]) tensor([0.0405, 0.1788, 0.5452, 0.2355]) -Greedy action tensor([-0.0328, 0.2861, 0.7086, 1.6606]) tensor([0.1009, 0.1388, 0.2117, 0.5486]) -Greedy action tensor([-1.9261, -0.4452, 0.6578, -0.1690]) tensor([0.0409, 0.1799, 0.5421, 0.2371]) -Greedy action tensor([-0.7192, -0.3355, 0.2897, -0.1585]) tensor([0.1436, 0.2108, 0.3939, 0.2516]) -Greedy action tensor([-1.8703, -0.4720, 0.7190, -0.0059]) tensor([0.0403, 0.1631, 0.5367, 0.2599]) -Greedy action tensor([-1.8998, -0.4380, 0.7656, 0.0715]) tensor([0.0372, 0.1606, 0.5350, 0.2672]) -Greedy action tensor([-1.4862, -0.4080, 0.4451, 0.0082]) tensor([0.0654, 0.1922, 0.4510, 0.2914]) -Greedy action tensor([-1.7738, -0.4880, 0.5791, -0.1050]) tensor([0.0489, 0.1770, 0.5145, 0.2596]) -Greedy action tensor([-1.7811, -0.4108, 0.5853, -0.1680]) tensor([0.0485, 0.1910, 0.5171, 0.2434]) -Greedy action tensor([-1.8664, -0.3161, 0.6218, -0.1284]) tensor([0.0427, 0.2011, 0.5137, 0.2426]) -Greedy action tensor([-1.9218, -0.4099, 0.6529, -0.1659]) tensor([0.0409, 0.1855, 0.5369, 0.2368]) -Greedy action tensor([-1.8340, -0.4365, 0.6361, -0.0802]) tensor([0.0442, 0.1786, 0.5221, 0.2551]) -Greedy action tensor([-0.9727, -0.7263, 0.8550, 1.1172]) tensor([0.0603, 0.0772, 0.3751, 0.4875]) -Greedy action tensor([-1.5121, -0.5422, 0.4753, 0.2497]) tensor([0.0597, 0.1574, 0.4354, 0.3475]) -Greedy action tensor([0.1272, 0.3009, 0.7065, 1.6667]) tensor([0.1158, 0.1378, 0.2067, 0.5398]) -Greedy action tensor([-1.7734, -0.4374, 0.5836, -0.0683]) tensor([0.0479, 0.1823, 0.5061, 0.2637]) -Greedy action tensor([-0.8484, -0.2707, 0.6447, 1.0273]) tensor([0.0727, 0.1295, 0.3235, 0.4743]) -Greedy action tensor([-1.5560, -0.0705, 0.3484, -0.4607]) tensor([0.0661, 0.2921, 0.4441, 0.1977]) -Greedy action tensor([-1.6796, -0.0490, 0.4727, 0.0224]) tensor([0.0495, 0.2529, 0.4260, 0.2716]) -Greedy action tensor([-0.4036, -0.3698, 0.1631, 0.1397]) tensor([0.1812, 0.1874, 0.3194, 0.3120]) -Greedy action tensor([-1.8461, -0.4514, 0.6170, -0.1307]) tensor([0.0448, 0.1806, 0.5257, 0.2489]) -Greedy action tensor([-1.4719, -0.3800, 0.4520, 0.2089]) tensor([0.0617, 0.1840, 0.4228, 0.3315]) -Greedy action tensor([-1.9037, -0.4688, 0.6711, -0.1378]) tensor([0.0414, 0.1737, 0.5431, 0.2419]) -Greedy action tensor([-0.5714, -0.4938, 0.2196, 0.1641]) tensor([0.1569, 0.1696, 0.3461, 0.3274]) -Greedy action tensor([-1.9149, -0.3838, 0.6467, -0.1519]) tensor([0.0410, 0.1894, 0.5308, 0.2388]) -Greedy action tensor([-1.1388, -0.5731, 0.2445, 0.2765]) tensor([0.0920, 0.1620, 0.3670, 0.3789]) -Greedy action tensor([-1.9469, -0.4561, 0.6713, -0.1809]) tensor([0.0400, 0.1776, 0.5485, 0.2339]) -Greedy action tensor([-1.7751, -0.4602, 0.8018, 0.1768]) tensor([0.0401, 0.1494, 0.5279, 0.2825]) -Greedy action tensor([-1.5434, -0.5546, 0.4890, -0.1048]) tensor([0.0644, 0.1730, 0.4913, 0.2713]) -Greedy action tensor([-1.8717, -0.4063, 0.6186, -0.1381]) tensor([0.0434, 0.1878, 0.5233, 0.2455]) -Greedy action tensor([-1.9123, -0.4452, 0.6479, -0.1644]) tensor([0.0416, 0.1806, 0.5387, 0.2391]) -Greedy action tensor([-1.4803, -0.4744, 0.3949, 0.1132]) tensor([0.0659, 0.1802, 0.4297, 0.3242]) -Greedy action tensor([-1.5634, -0.2301, 0.4357, -0.0044]) tensor([0.0591, 0.2241, 0.4361, 0.2808]) -Greedy action tensor([-1.7670, -0.4864, 0.5637, -0.0485]) tensor([0.0489, 0.1759, 0.5027, 0.2725]) -Greedy action tensor([-1.8800, -0.1597, 0.5754, -0.1324]) tensor([0.0417, 0.2330, 0.4859, 0.2394]) -Greedy action tensor([-1.4546, -0.3280, 0.4436, -0.0869]) tensor([0.0681, 0.2101, 0.4545, 0.2674]) -Greedy action tensor([-1.7281, -0.4630, 0.6730, 0.1434]) tensor([0.0453, 0.1605, 0.4999, 0.2943]) -Greedy action tensor([-1.3390, 0.6227, 0.1464, 0.1335]) tensor([0.0592, 0.4211, 0.2615, 0.2582]) -Greedy action tensor([ 0.9167, -0.3550, -0.1491, 0.6058]) tensor([0.4242, 0.1189, 0.1461, 0.3108]) -Greedy action tensor([ 1.5792, -0.3710, -0.3487, 0.4864]) tensor([0.6162, 0.0876, 0.0896, 0.2066]) -Greedy action tensor([ 0.7329, 0.0212, -0.3277, 0.3610]) tensor([0.3958, 0.1943, 0.1370, 0.2729]) -Greedy action tensor([ 1.6137, -0.6301, -0.5024, -0.2902]) tensor([0.7270, 0.0771, 0.0876, 0.1083]) -Greedy action tensor([ 1.3694, -0.5282, -0.1468, 0.1491]) tensor([0.6007, 0.0901, 0.1319, 0.1773]) -Greedy action tensor([ 1.4660, -0.4846, -0.0149, 0.1572]) tensor([0.6098, 0.0867, 0.1387, 0.1647]) -Greedy action tensor([ 1.3193, -0.6550, -0.1139, 0.3148]) tensor([0.5735, 0.0796, 0.1368, 0.2100]) -Greedy action tensor([ 1.1836, -0.0065, -0.4523, 0.3714]) tensor([0.5147, 0.1566, 0.1002, 0.2285]) -Greedy action tensor([ 0.9097, -0.5179, -0.8360, 1.2570]) tensor([0.3534, 0.0848, 0.0617, 0.5001]) -Greedy action tensor([ 1.3669, -0.5411, -0.2128, 0.5128]) tensor([0.5618, 0.0834, 0.1157, 0.2391]) -Greedy action tensor([ 0.9751, -0.2118, -0.1642, 0.0768]) tensor([0.4920, 0.1502, 0.1575, 0.2004]) -Greedy action tensor([ 1.4144, -0.4961, -0.1612, 0.5769]) tensor([0.5594, 0.0828, 0.1157, 0.2421]) -Greedy action tensor([ 2.1574, -1.0187, -0.6040, 0.7930]) tensor([0.7350, 0.0307, 0.0465, 0.1878]) -Greedy action tensor([ 0.9900, -0.4222, -0.3508, 0.2491]) tensor([0.5046, 0.1229, 0.1320, 0.2405]) -Greedy action tensor([ 1.9300, -0.4107, -0.0221, 1.0337]) tensor([0.6074, 0.0585, 0.0862, 0.2479]) -Greedy action tensor([ 1.2519, -0.5327, 0.0897, 0.2682]) tensor([0.5392, 0.0905, 0.1687, 0.2016]) -Greedy action tensor([ 1.1149, -0.2263, -0.3046, 0.7040]) tensor([0.4616, 0.1207, 0.1116, 0.3060]) -Greedy action tensor([ 0.6891, -0.2558, -0.1215, 0.1605]) tensor([0.4128, 0.1605, 0.1835, 0.2433]) -Greedy action tensor([ 1.4021, -0.4496, -0.2131, 0.1294]) tensor([0.6113, 0.0960, 0.1216, 0.1712]) -Greedy action tensor([ 1.4050, 0.0415, -0.2326, 0.2191]) tensor([0.5696, 0.1457, 0.1107, 0.1740]) -Greedy action tensor([ 1.2209, -0.5581, -0.2166, 0.2639]) tensor([0.5586, 0.0943, 0.1327, 0.2145]) -Greedy action tensor([ 1.6765, -0.6123, -0.3973, 0.5662]) tensor([0.6424, 0.0651, 0.0808, 0.2117]) -Greedy action tensor([ 0.8131, -0.4473, 0.0844, 0.1495]) tensor([0.4384, 0.1243, 0.2115, 0.2258]) -Greedy action tensor([ 0.7966, -0.4130, -0.2769, 0.4885]) tensor([0.4210, 0.1256, 0.1439, 0.3094]) -Greedy action tensor([ 2.1590, -0.8803, -0.0070, 0.5587]) tensor([0.7330, 0.0351, 0.0840, 0.1479]) -Greedy action tensor([ 1.9312, -0.7607, -0.1501, 0.8072]) tensor([0.6590, 0.0446, 0.0822, 0.2142]) -Greedy action tensor([ 1.5341, -0.5640, -0.5653, 0.2823]) tensor([0.6531, 0.0801, 0.0800, 0.1868]) -Greedy action tensor([ 1.0406, -0.1932, -0.2675, 0.0667]) tensor([0.5157, 0.1502, 0.1394, 0.1947]) -Greedy action tensor([ 1.1289, -0.7099, 0.2983, 0.4372]) tensor([0.4772, 0.0759, 0.2080, 0.2389]) -Greedy action tensor([ 1.4003, -0.7291, 0.1191, 0.1209]) tensor([0.5971, 0.0710, 0.1658, 0.1661]) -Greedy action tensor([ 1.1814, -0.2568, -0.4473, 0.1108]) tensor([0.5630, 0.1336, 0.1104, 0.1930]) -Greedy action tensor([ 1.8071, -0.6837, -0.4077, 0.6336]) tensor([0.6661, 0.0552, 0.0727, 0.2060]) -Greedy action tensor([ 0.2107, 0.0675, -0.3819, 0.3028]) tensor([0.2844, 0.2465, 0.1573, 0.3119]) -Greedy action tensor([ 0.7988, -0.4267, -0.2795, 0.3166]) tensor([0.4442, 0.1304, 0.1511, 0.2743]) -Greedy action tensor([ 1.5762, -0.9168, -0.2986, 0.6677]) tensor([0.6101, 0.0504, 0.0936, 0.2459]) -Greedy action tensor([ 1.7262, -0.1345, -0.5140, 0.4873]) tensor([0.6445, 0.1002, 0.0686, 0.1867]) -Greedy action tensor([ 1.0976, -0.2747, -0.3023, 0.2039]) tensor([0.5238, 0.1328, 0.1292, 0.2143]) -Greedy action tensor([ 1.7455, -0.6327, -0.9606, 0.6911]) tensor([0.6632, 0.0615, 0.0443, 0.2311]) -Greedy action tensor([-0.1377, 0.2256, -0.4675, -0.0995]) tensor([0.2383, 0.3427, 0.1714, 0.2476]) -Greedy action tensor([ 1.5178, -0.4313, 0.0146, 0.2253]) tensor([0.6100, 0.0869, 0.1357, 0.1675]) -Greedy action tensor([ 1.4679, -0.3863, -0.2255, 0.4609]) tensor([0.5862, 0.0918, 0.1078, 0.2142]) -Greedy action tensor([ 1.3435, 0.0552, -0.2902, 0.5461]) tensor([0.5204, 0.1435, 0.1016, 0.2345]) -Greedy action tensor([ 1.0947, -0.2049, -0.8022, 0.2268]) tensor([0.5427, 0.1480, 0.0814, 0.2279]) -Greedy action tensor([ 1.8023, -0.2971, -1.1168, 0.2869]) tensor([0.7162, 0.0878, 0.0387, 0.1574]) -Greedy action tensor([ 1.3219, -0.5226, -0.1625, 0.1401]) tensor([0.5912, 0.0935, 0.1340, 0.1813]) -Greedy action tensor([ 1.1618, -0.1766, -0.1677, 0.2302]) tensor([0.5206, 0.1365, 0.1378, 0.2051]) -Greedy action tensor([ 1.3826, -0.6477, -0.0511, 0.4918]) tensor([0.5618, 0.0738, 0.1339, 0.2305]) -Greedy action tensor([ 1.1485, -0.4436, -0.3575, 0.0143]) tensor([0.5724, 0.1165, 0.1270, 0.1841]) -Greedy action tensor([ 1.1076, -0.7313, -0.0638, 0.1860]) tensor([0.5357, 0.0852, 0.1660, 0.2131]) -Greedy action tensor([ 1.1951, -0.5368, -0.1577, -0.1421]) tensor([0.5889, 0.1042, 0.1522, 0.1546]) -Greedy action tensor([ 1.3357, -0.3521, -0.1453, -0.0017]) tensor([0.5971, 0.1104, 0.1358, 0.1567]) -Greedy action tensor([ 1.3711, -0.5152, -0.5607, 0.3508]) tensor([0.6035, 0.0915, 0.0874, 0.2175]) -Greedy action tensor([ 1.2477, -0.2812, -0.3107, 0.2088]) tensor([0.5615, 0.1217, 0.1182, 0.1987]) -Greedy action tensor([ 1.4542, -0.8129, -0.4083, 0.4318]) tensor([0.6178, 0.0640, 0.0959, 0.2222]) -Greedy action tensor([ 1.3971, -0.0306, -0.0205, 0.6357]) tensor([0.5131, 0.1231, 0.1243, 0.2396]) -Greedy action tensor([ 1.4560, -0.1462, -0.3044, 0.3380]) tensor([0.5881, 0.1185, 0.1011, 0.1923]) -Greedy action tensor([ 1.2084, 0.0904, -0.5310, 0.3086]) tensor([0.5238, 0.1712, 0.0920, 0.2130]) -Greedy action tensor([ 1.4715, -0.2934, -0.6435, 0.2179]) tensor([0.6340, 0.1085, 0.0765, 0.1810]) -Greedy action tensor([ 0.3576, -0.3639, 0.0743, 0.1350]) tensor([0.3290, 0.1599, 0.2478, 0.2633]) -Greedy action tensor([ 0.6723, -0.1092, -0.0895, 0.0904]) tensor([0.4027, 0.1843, 0.1880, 0.2250]) -Greedy action tensor([ 1.1849, -0.3887, -0.1204, 0.3347]) tensor([0.5247, 0.1088, 0.1422, 0.2242]) -Greedy action tensor([ 2.3089, -0.6931, -0.0181, 0.4321]) tensor([0.7690, 0.0382, 0.0750, 0.1177]) -Greedy action tensor([ 1.2901, -0.0217, -0.9045, 0.2842]) tensor([0.5726, 0.1542, 0.0638, 0.2094]) -Greedy action tensor([ 2.2606, -0.8815, -0.2840, 0.4238]) tensor([0.7806, 0.0337, 0.0613, 0.1244]) -Greedy action tensor([ 1.5107, -0.3643, -0.4641, 0.3327]) tensor([0.6250, 0.0958, 0.0867, 0.1924]) -Greedy action tensor([ 1.7249, -0.0454, -0.8569, 0.3293]) tensor([0.6695, 0.1140, 0.0506, 0.1658]) -Greedy action tensor([ 1.4947, -0.5118, -0.1212, 0.3222]) tensor([0.6087, 0.0818, 0.1210, 0.1885]) -Greedy action tensor([ 1.6586, -0.1394, -0.4454, 0.5822]) tensor([0.6141, 0.1017, 0.0749, 0.2093]) -Greedy action tensor([ 2.1461, -0.9781, -0.2867, 0.9513]) tensor([0.6971, 0.0307, 0.0612, 0.2111]) -Greedy action tensor([ 1.1758, 0.1886, -0.2824, 0.1965]) tensor([0.5048, 0.1881, 0.1175, 0.1896]) -Greedy action tensor([ 1.2182, -0.2173, -0.0186, 0.1237]) tensor([0.5368, 0.1277, 0.1558, 0.1797]) -Greedy action tensor([ 2.6765, -1.5347, -0.4323, 1.1650]) tensor([0.7812, 0.0116, 0.0349, 0.1723]) -Greedy action tensor([ 1.1542, -0.2691, -0.1638, 0.3770]) tensor([0.5081, 0.1224, 0.1360, 0.2336]) -Greedy action tensor([ 0.3590, -0.0935, 0.0474, 0.1250]) tensor([0.3165, 0.2013, 0.2317, 0.2505]) -Greedy action tensor([ 0.9597, -0.2839, -0.2198, 0.0463]) tensor([0.5008, 0.1444, 0.1540, 0.2009]) -Greedy action tensor([ 1.3797, -0.0712, -0.5398, 0.3769]) tensor([0.5721, 0.1341, 0.0839, 0.2099]) -Greedy action tensor([ 1.6580, -0.5100, 0.2486, -0.1302]) tensor([0.6553, 0.0750, 0.1601, 0.1096]) -Greedy action tensor([ 1.4516, -0.4539, -1.0412, 0.6545]) tensor([0.5945, 0.0884, 0.0492, 0.2679]) -Greedy action tensor([ 1.6840, -0.4799, -0.3070, 0.2730]) tensor([0.6687, 0.0768, 0.0913, 0.1631]) -Greedy action tensor([ 1.0478, -0.4079, -0.2277, 0.1884]) tensor([0.5165, 0.1205, 0.1443, 0.2187]) -Greedy action tensor([ 0.1871, -0.1735, -0.0853, 0.1670]) tensor([0.2908, 0.2028, 0.2214, 0.2850]) -Greedy action tensor([ 0.7122, -0.1775, 0.0158, -0.2327]) tensor([0.4352, 0.1788, 0.2169, 0.1692]) -Greedy action tensor([ 0.9567, -0.5363, 0.0245, -0.3787]) tensor([0.5315, 0.1194, 0.2092, 0.1398]) -Greedy action tensor([ 0.6784, -0.3717, -0.0888, -0.3945]) tensor([0.4638, 0.1623, 0.2153, 0.1586]) -Greedy action tensor([ 0.9282, -0.5818, -0.0948, -0.3947]) tensor([0.5415, 0.1196, 0.1947, 0.1442]) -Greedy action tensor([ 0.4379, -0.0416, -0.1103, -0.2916]) tensor([0.3732, 0.2311, 0.2157, 0.1800]) -Greedy action tensor([ 0.5790, 0.0089, -0.0496, -0.2635]) tensor([0.3953, 0.2236, 0.2109, 0.1702]) -Greedy action tensor([ 0.3837, -0.0734, -0.2341, -0.2441]) tensor([0.3695, 0.2340, 0.1992, 0.1972]) -Greedy action tensor([ 0.6341, -0.4020, -0.1672, -0.1840]) tensor([0.4455, 0.1581, 0.1999, 0.1966]) -Greedy action tensor([ 1.1385, -0.9271, 0.0596, -0.5543]) tensor([0.6058, 0.0768, 0.2060, 0.1115]) -Greedy action tensor([ 0.2420, -0.0072, -0.0049, -0.2707]) tensor([0.3165, 0.2467, 0.2473, 0.1896]) -Greedy action tensor([ 0.5694, -0.1699, -0.0195, -0.2339]) tensor([0.4032, 0.1925, 0.2237, 0.1806]) -Greedy action tensor([ 0.5532, -0.4384, -0.1593, -0.5218]) tensor([0.4540, 0.1684, 0.2226, 0.1549]) -Greedy action tensor([ 1.0176, -0.8368, 0.1543, -0.4925]) tensor([0.5558, 0.0870, 0.2344, 0.1228]) -Greedy action tensor([ 0.7589, -0.3379, -0.0266, -0.0650]) tensor([0.4487, 0.1498, 0.2046, 0.1969]) -Greedy action tensor([ 0.1074, -0.0268, -0.0556, -0.2614]) tensor([0.2928, 0.2560, 0.2488, 0.2025]) -Greedy action tensor([ 0.8082, -0.6856, -0.1235, -0.8572]) tensor([0.5532, 0.1242, 0.2179, 0.1046]) -Greedy action tensor([ 0.7583, -0.4886, -0.1102, -0.1581]) tensor([0.4746, 0.1364, 0.1991, 0.1898]) -Greedy action tensor([ 0.8647, -0.2125, -0.1266, -0.0700]) tensor([0.4752, 0.1618, 0.1763, 0.1866]) -Greedy action tensor([ 1.1613, -0.6849, -0.0636, -0.5413]) tensor([0.6121, 0.0966, 0.1798, 0.1115]) -Greedy action tensor([ 0.3117, 0.1843, -0.1935, -0.1908]) tensor([0.3237, 0.2850, 0.1953, 0.1959]) -Greedy action tensor([ 0.4978, 0.0312, -0.1405, -0.2508]) tensor([0.3805, 0.2386, 0.2010, 0.1800]) -Greedy action tensor([ 0.3345, -0.2463, -0.0597, -0.3524]) tensor([0.3654, 0.2044, 0.2464, 0.1838]) -Greedy action tensor([ 0.3716, 0.0462, 0.1824, -0.5713]) tensor([0.3402, 0.2457, 0.2816, 0.1325]) -Greedy action tensor([ 0.6739, -0.4189, -0.0761, -0.4254]) tensor([0.4671, 0.1566, 0.2207, 0.1556]) -Greedy action tensor([ 0.5730, -0.3246, -0.0296, -0.2515]) tensor([0.4178, 0.1703, 0.2287, 0.1832]) -Greedy action tensor([ 0.3827, -0.0213, -0.0735, -0.1027]) tensor([0.3428, 0.2289, 0.2173, 0.2110]) -Greedy action tensor([ 1.0317, -0.5823, 0.0709, -0.5115]) tensor([0.5570, 0.1109, 0.2131, 0.1190]) -Greedy action tensor([ 0.9167, -0.8637, 0.1200, -0.4922]) tensor([0.5365, 0.0904, 0.2419, 0.1311]) -Greedy action tensor([ 1.1172, -0.8830, -0.0166, -0.4218]) tensor([0.5982, 0.0809, 0.1925, 0.1284]) -Greedy action tensor([ 0.9553, -0.5281, -0.0987, -0.6857]) tensor([0.5652, 0.1282, 0.1970, 0.1095]) -Greedy action tensor([ 0.5440, -0.3728, -0.0780, -0.1901]) tensor([0.4138, 0.1654, 0.2222, 0.1986]) -Greedy action tensor([ 1.1607, -0.4623, -0.1029, -0.4022]) tensor([0.5919, 0.1168, 0.1673, 0.1240]) -Greedy action tensor([ 0.4680, -0.3119, -0.3079, -0.5875]) tensor([0.4412, 0.2022, 0.2031, 0.1535]) -Greedy action tensor([ 0.4435, -0.1920, -0.0824, -0.2796]) tensor([0.3837, 0.2033, 0.2268, 0.1862]) -Greedy action tensor([ 0.7440, -0.2927, -0.0742, -0.1875]) tensor([0.4567, 0.1619, 0.2015, 0.1799]) -Greedy action tensor([ 0.6772, -0.2745, -0.1523, -0.4096]) tensor([0.4630, 0.1788, 0.2020, 0.1562]) -Greedy action tensor([ 1.0707, -0.6182, -0.1494, -0.4846]) tensor([0.5914, 0.1092, 0.1746, 0.1248]) -Greedy action tensor([ 0.4495, -0.1199, 0.0632, -0.3797]) tensor([0.3729, 0.2110, 0.2534, 0.1627]) -Greedy action tensor([ 1.0335, -0.5712, -0.1666, -0.1986]) tensor([0.5575, 0.1120, 0.1679, 0.1626]) -Greedy action tensor([ 0.7100, -0.1850, 0.1306, -0.4920]) tensor([0.4406, 0.1801, 0.2469, 0.1325]) -Greedy action tensor([ 0.8148, -0.5501, -0.1260, -0.5666]) tensor([0.5272, 0.1346, 0.2058, 0.1324]) -Greedy action tensor([ 0.6040, -0.4053, -0.1408, -0.4596]) tensor([0.4578, 0.1668, 0.2174, 0.1580]) -Greedy action tensor([ 0.7731, -0.4078, -0.0909, -0.2967]) tensor([0.4827, 0.1482, 0.2035, 0.1656]) -Greedy action tensor([ 0.7876, -0.7087, -0.0543, -0.5750]) tensor([0.5233, 0.1172, 0.2255, 0.1340]) -Greedy action tensor([ 0.6939, -0.3153, -0.0630, -0.3732]) tensor([0.4592, 0.1674, 0.2154, 0.1580]) -Greedy action tensor([ 0.3790, -0.0376, -0.0979, -0.1872]) tensor([0.3512, 0.2315, 0.2180, 0.1993]) -Greedy action tensor([ 1.0728, -1.4171, 0.1052, -0.8425]) tensor([0.6210, 0.0515, 0.2360, 0.0915]) -Greedy action tensor([ 0.3909, -0.0169, -0.0793, -0.4080]) tensor([0.3650, 0.2428, 0.2281, 0.1642]) -Greedy action tensor([ 0.6552, -0.7042, -0.0132, -0.4798]) tensor([0.4783, 0.1228, 0.2451, 0.1537]) -Greedy action tensor([ 0.1463, 0.4651, -0.0156, -0.1382]) tensor([0.2514, 0.3457, 0.2138, 0.1891]) -Greedy action tensor([ 0.5103, 0.1829, -0.1114, -0.0546]) tensor([0.3538, 0.2550, 0.1900, 0.2011]) -Greedy action tensor([ 0.4983, 0.0537, -0.1250, -0.3761]) tensor([0.3854, 0.2471, 0.2067, 0.1608]) -Greedy action tensor([ 0.3594, -0.1060, 0.1767, -0.2515]) tensor([0.3329, 0.2090, 0.2773, 0.1807]) -Greedy action tensor([ 1.0493, -0.5943, -0.0301, -0.4646]) tensor([0.5704, 0.1103, 0.1938, 0.1255]) -Greedy action tensor([ 0.4328, -0.1037, -0.0257, -0.0424]) tensor([0.3523, 0.2060, 0.2227, 0.2190]) -Greedy action tensor([ 0.6570, -0.1546, 0.2403, -0.6702]) tensor([0.4222, 0.1875, 0.2783, 0.1120]) -Greedy action tensor([ 0.5845, -0.3023, 0.1048, -0.2115]) tensor([0.4029, 0.1660, 0.2494, 0.1818]) -Greedy action tensor([ 0.3675, -0.0489, 0.0484, -0.2317]) tensor([0.3407, 0.2246, 0.2476, 0.1871]) -Greedy action tensor([ 0.5753, -0.3094, -0.0824, -0.2202]) tensor([0.4198, 0.1733, 0.2175, 0.1895]) -Greedy action tensor([ 0.4152, -0.1635, -0.0034, -0.1435]) tensor([0.3584, 0.2009, 0.2358, 0.2050]) -Greedy action tensor([ 0.3715, -0.1315, -0.2077, -0.3837]) tensor([0.3795, 0.2295, 0.2127, 0.1783]) -Greedy action tensor([ 0.8008, -0.2716, 0.0010, -0.2094]) tensor([0.4639, 0.1587, 0.2085, 0.1689]) -Greedy action tensor([ 0.7389, -0.2704, -0.0695, -0.4091]) tensor([0.4701, 0.1713, 0.2094, 0.1491]) -Greedy action tensor([ 0.3818, -0.1909, -0.0658, -0.0621]) tensor([0.3515, 0.1983, 0.2247, 0.2255]) -Greedy action tensor([ 0.5655, -0.2518, -0.0066, 0.0054]) tensor([0.3880, 0.1714, 0.2190, 0.2216]) -Greedy action tensor([ 0.4071, 0.0757, -0.0168, -0.1323]) tensor([0.3383, 0.2429, 0.2215, 0.1973]) -Greedy action tensor([ 0.3702, -0.8265, -0.2531, -0.2356]) tensor([0.4195, 0.1268, 0.2249, 0.2289]) -Greedy action tensor([ 0.6606, -0.4454, -0.0947, -0.1339]) tensor([0.4439, 0.1469, 0.2086, 0.2006]) -Greedy action tensor([ 0.9652, -0.8323, -0.0205, -0.4521]) tensor([0.5614, 0.0930, 0.2095, 0.1361]) -Greedy action tensor([ 0.5962, -0.0952, -0.0477, -0.1216]) tensor([0.3978, 0.1992, 0.2089, 0.1940]) -Greedy action tensor([ 0.5175, -0.4758, -0.1793, -0.5237]) tensor([0.4501, 0.1667, 0.2242, 0.1589]) -Greedy action tensor([ 0.7819, -0.5634, -0.3152, -0.5557]) tensor([0.5386, 0.1403, 0.1798, 0.1414]) -Greedy action tensor([ 0.3765, -0.2070, -0.1136, -0.0293]) tensor([0.3525, 0.1967, 0.2159, 0.2349]) -Greedy action tensor([ 0.6992, -0.5630, -0.0222, -0.3874]) tensor([0.4747, 0.1344, 0.2308, 0.1601]) -Greedy action tensor([ 0.6519, -0.3500, 0.0557, -0.2754]) tensor([0.4322, 0.1587, 0.2381, 0.1710]) -Greedy action tensor([ 0.2833, 0.2544, -0.0303, -0.3358]) tensor([0.3086, 0.2998, 0.2255, 0.1661]) -Greedy action tensor([ 0.8147, -0.5718, -0.0514, -0.3760]) tensor([0.5065, 0.1266, 0.2130, 0.1540]) -Greedy action tensor([ 0.5663, -0.4502, 0.0049, -0.3541]) tensor([0.4291, 0.1553, 0.2448, 0.1709]) -Greedy action tensor([ 0.4080, -0.1796, -0.1597, -0.3836]) tensor([0.3882, 0.2157, 0.2201, 0.1759]) -Greedy action tensor([ 0.5919, 0.4325, -0.1017, -0.2189]) tensor([0.3575, 0.3049, 0.1787, 0.1589]) -Greedy action tensor([ 0.5467, -0.1859, -0.0458, -0.2575]) tensor([0.4031, 0.1937, 0.2229, 0.1804]) -Greedy action tensor([ 0.6261, -1.0945, 0.2243, 0.1593]) tensor([0.4040, 0.0723, 0.2703, 0.2533]) -Greedy action tensor([-0.8206, 0.9613, 0.3964, -0.0921]) tensor([0.0807, 0.4795, 0.2726, 0.1672]) -Greedy action tensor([ 1.3223, -1.0642, -0.1223, 1.4617]) tensor([0.4037, 0.0371, 0.0952, 0.4640]) -Greedy action tensor([0.4780, 0.5225, 0.0380, 0.7526]) tensor([0.2497, 0.2610, 0.1608, 0.3285]) -Greedy action tensor([ 1.4780, -0.1560, 0.7628, 0.5953]) tensor([0.4767, 0.0930, 0.2331, 0.1972]) -Greedy action tensor([-0.9662, -1.8039, -0.1861, -0.1567]) tensor([0.1706, 0.0738, 0.3722, 0.3833]) -Greedy action tensor([ 1.0208, 0.6794, -0.1575, 0.1338]) tensor([0.4114, 0.2925, 0.1266, 0.1695]) -Greedy action tensor([ 1.3137, 0.6562, 0.2401, -0.2466]) tensor([0.4831, 0.2503, 0.1651, 0.1015]) -Greedy action tensor([ 0.3934, -0.2441, -0.1852, 0.3854]) tensor([0.3245, 0.1716, 0.1820, 0.3219]) -Greedy action tensor([-1.4906e+00, 9.8043e-02, -6.7175e-01, 3.5009e-04]) tensor([0.0793, 0.3885, 0.1799, 0.3523]) -Greedy action tensor([-1.1148, -0.8284, -0.1038, -0.7230]) tensor([0.1524, 0.2030, 0.4190, 0.2256]) -Greedy action tensor([ 0.0775, -0.2106, 1.0363, 0.4187]) tensor([0.1735, 0.1300, 0.4525, 0.2440]) -Greedy action tensor([ 0.8383, 0.4000, 0.2580, -0.1962]) tensor([0.3906, 0.2520, 0.2186, 0.1388]) -Greedy action tensor([ 0.4950, -0.2930, 1.0815, 2.1842]) tensor([0.1154, 0.0525, 0.2074, 0.6248]) -Greedy action tensor([-1.4204, -0.1950, -0.3214, -0.2993]) tensor([0.0955, 0.3251, 0.2865, 0.2929]) -Greedy action tensor([ 0.6298, -0.7975, -0.9550, 0.2743]) tensor([0.4660, 0.1118, 0.0955, 0.3266]) -Greedy action tensor([ 0.2281, 1.1492, -0.8033, 0.8288]) tensor([0.1757, 0.4413, 0.0626, 0.3203]) -Greedy action tensor([-0.3121, -1.0381, -0.2171, -0.6127]) tensor([0.3009, 0.1456, 0.3308, 0.2227]) -Greedy action tensor([-0.4063, -1.0295, -0.4678, -0.0592]) tensor([0.2570, 0.1378, 0.2416, 0.3636]) -Greedy action tensor([ 0.9685, 0.3788, 0.2410, -0.3525]) tensor([0.4340, 0.2406, 0.2096, 0.1158]) -Greedy action tensor([-0.1158, -0.2070, 0.1629, 0.8818]) tensor([0.1682, 0.1535, 0.2222, 0.4560]) -Greedy action tensor([-0.6096, -0.5188, -0.2571, 0.4705]) tensor([0.1547, 0.1694, 0.2201, 0.4557]) -Greedy action tensor([ 0.4295, -0.8277, -0.0567, 0.6458]) tensor([0.3184, 0.0906, 0.1958, 0.3953]) -Greedy action tensor([-0.2413, -0.9994, 0.0583, 0.4817]) tensor([0.2050, 0.0960, 0.2766, 0.4224]) -Greedy action tensor([-0.2628, -1.4075, 0.2437, 0.1915]) tensor([0.2196, 0.0699, 0.3645, 0.3460]) -Greedy action tensor([-0.2595, 0.1827, -0.2689, 2.1217]) tensor([0.0696, 0.1083, 0.0690, 0.7531]) -Greedy action tensor([ 1.3612, -0.8966, 0.7321, -0.2973]) tensor([0.5470, 0.0572, 0.2916, 0.1042]) -Greedy action tensor([ 0.3793, -1.7957, -0.8538, 0.2531]) tensor([0.4374, 0.0497, 0.1274, 0.3855]) -Greedy action tensor([ 0.3347, 0.8052, 0.1365, -0.8383]) tensor([0.2681, 0.4291, 0.2199, 0.0829]) -Greedy action tensor([-5.1677e-04, -2.6871e-01, -5.3683e-01, 7.8054e-02]) tensor([0.2914, 0.2229, 0.1705, 0.3152]) -Greedy action tensor([ 0.8815, 0.7098, -0.2425, 0.5951]) tensor([0.3427, 0.2886, 0.1114, 0.2573]) -Greedy action tensor([-1.2606, -1.1785, 0.2601, 0.1343]) tensor([0.0935, 0.1015, 0.4278, 0.3772]) -Greedy action tensor([ 0.8609, -0.1529, -0.2719, 0.5566]) tensor([0.4128, 0.1498, 0.1330, 0.3045]) -Greedy action tensor([ 1.3629, -0.8900, 1.3109, 0.0914]) tensor([0.4283, 0.0450, 0.4066, 0.1201]) -Greedy action tensor([ 1.1168, -1.3482, 0.8175, 0.7272]) tensor([0.3994, 0.0340, 0.2961, 0.2705]) -Greedy action tensor([-2.1259, -1.3142, 0.8396, 0.4280]) tensor([0.0282, 0.0634, 0.5464, 0.3620]) -Greedy action tensor([ 0.5147, -0.6793, -0.0302, 0.1795]) tensor([0.3849, 0.1166, 0.2232, 0.2753]) -Greedy action tensor([-0.1146, -0.9559, 1.4148, 0.3348]) tensor([0.1313, 0.0566, 0.6062, 0.2059]) -Greedy action tensor([-0.4499, 0.9320, 1.2621, -0.8255]) tensor([0.0892, 0.3553, 0.4942, 0.0613]) -Greedy action tensor([0.8272, 0.2146, 0.7219, 1.1282]) tensor([0.2636, 0.1429, 0.2373, 0.3562]) -Greedy action tensor([-0.3449, 0.1541, 0.9891, 0.0420]) tensor([0.1263, 0.2081, 0.4796, 0.1860]) -Greedy action tensor([ 1.1654, -0.0247, 0.6977, -0.4059]) tensor([0.4676, 0.1423, 0.2929, 0.0972]) -Greedy action tensor([-0.9099, 0.1318, 0.9077, -0.5786]) tensor([0.0878, 0.2490, 0.5409, 0.1223]) -Greedy action tensor([ 0.1304, -0.2315, 1.3032, -0.1585]) tensor([0.1762, 0.1227, 0.5692, 0.1320]) -Greedy action tensor([-0.9091, -0.4224, -0.9497, -0.0824]) tensor([0.1703, 0.2770, 0.1635, 0.3892]) -Greedy action tensor([-0.1657, -0.6450, 0.8105, 0.4233]) tensor([0.1646, 0.1019, 0.4369, 0.2966]) -Greedy action tensor([ 0.4909, -0.9022, 1.6009, 0.1430]) tensor([0.2005, 0.0498, 0.6082, 0.1415]) -Greedy action tensor([ 1.3704, -0.5753, 0.1888, 0.7511]) tensor([0.5030, 0.0719, 0.1543, 0.2708]) -Greedy action tensor([ 1.1834, -0.5337, 0.6023, 1.2361]) tensor([0.3580, 0.0643, 0.2002, 0.3774]) -Greedy action tensor([-0.0492, -1.4735, 1.1151, -0.3713]) tensor([0.1935, 0.0466, 0.6198, 0.1402]) -Greedy action tensor([-0.1429, -0.2337, 0.0049, -1.1855]) tensor([0.2920, 0.2666, 0.3385, 0.1029]) -Greedy action tensor([ 1.5843, -0.3337, 0.0909, 1.0977]) tensor([0.5035, 0.0740, 0.1131, 0.3095]) -Greedy action tensor([-0.7071, -0.5653, -0.5416, -0.5345]) tensor([0.2212, 0.2549, 0.2610, 0.2629]) -Greedy action tensor([ 0.7694, 0.0061, -0.1406, 0.8097]) tensor([0.3437, 0.1602, 0.1383, 0.3578]) -Greedy action tensor([ 0.6885, -1.9891, -0.0046, 0.8152]) tensor([0.3698, 0.0254, 0.1849, 0.4198]) -Greedy action tensor([-0.7673, -0.5391, -0.7859, 1.1249]) tensor([0.1013, 0.1273, 0.0994, 0.6720]) -Greedy action tensor([-0.1276, -0.9244, -0.0648, 0.8000]) tensor([0.1982, 0.0894, 0.2111, 0.5013]) -Greedy action tensor([-0.0437, -1.5515, -0.3559, 0.5626]) tensor([0.2641, 0.0585, 0.1933, 0.4842]) -Greedy action tensor([0.7383, 1.1212, 0.5894, 0.3428]) tensor([0.2499, 0.3665, 0.2153, 0.1683]) -Greedy action tensor([ 0.2817, -0.8323, 0.0945, 0.9535]) tensor([0.2430, 0.0798, 0.2015, 0.4757]) -Greedy action tensor([-0.6114, 0.3457, 0.7817, -0.9924]) tensor([0.1203, 0.3132, 0.4844, 0.0822]) -Greedy action tensor([ 0.8673, -1.0265, -0.2128, 0.6183]) tensor([0.4406, 0.0663, 0.1496, 0.3435]) -Greedy action tensor([-0.6612, -0.2069, 0.6723, 0.0483]) tensor([0.1190, 0.1875, 0.4516, 0.2419]) -Greedy action tensor([-1.2216, -0.4589, -1.2217, -0.0544]) tensor([0.1359, 0.2914, 0.1359, 0.4367]) -Greedy action tensor([-0.6614, -2.4760, -0.0834, 0.9287]) tensor([0.1274, 0.0208, 0.2271, 0.6248]) -Greedy action tensor([ 0.3608, -0.5596, -0.4835, 0.8917]) tensor([0.2834, 0.1129, 0.1218, 0.4819]) -Greedy action tensor([ 0.7541, -1.6665, -0.0164, 0.8502]) tensor([0.3770, 0.0335, 0.1745, 0.4150]) -Greedy action tensor([-0.8099, -0.1612, -0.4611, 0.1088]) tensor([0.1463, 0.2798, 0.2073, 0.3666]) -Greedy action tensor([-0.4661, -0.7752, 0.5745, -0.1729]) tensor([0.1693, 0.1243, 0.4793, 0.2270]) -Greedy action tensor([ 0.5552, -0.6779, 0.2130, 1.0045]) tensor([0.2802, 0.0816, 0.1990, 0.4392]) -Greedy action tensor([ 0.3169, -1.6224, 1.0340, 0.0039]) tensor([0.2549, 0.0367, 0.5221, 0.1864]) -Greedy action tensor([-0.0994, -0.5149, 0.5042, 0.5471]) tensor([0.1853, 0.1223, 0.3388, 0.3537]) -Greedy action tensor([0.5844, 0.0507, 1.2594, 1.5426]) tensor([0.1624, 0.0952, 0.3190, 0.4234]) -Greedy action tensor([ 0.9444, -1.2883, 0.2026, -0.5491]) tensor([0.5531, 0.0593, 0.2634, 0.1242]) -Greedy action tensor([ 0.1398, -0.5771, 0.0800, 0.7655]) tensor([0.2326, 0.1136, 0.2191, 0.4348]) -Greedy action tensor([ 0.3188, -2.8071, -0.1813, 0.3080]) tensor([0.3788, 0.0166, 0.2298, 0.3748]) -Greedy action tensor([-0.8070, 0.1837, 0.8354, -0.4357]) tensor([0.0970, 0.2612, 0.5012, 0.1406]) -Greedy action tensor([ 0.8952, 0.3351, -0.6240, -0.7898]) tensor([0.5062, 0.2891, 0.1108, 0.0939]) -Greedy action tensor([ 0.8088, -0.6228, -0.6996, 0.7121]) tensor([0.4223, 0.1009, 0.0934, 0.3834]) -Greedy action tensor([ 0.1922, -1.5669, 1.0931, -0.0464]) tensor([0.2261, 0.0389, 0.5568, 0.1781]) -Greedy action tensor([ 1.4552, -0.2923, -0.4488, 0.7513]) tensor([0.5501, 0.0958, 0.0820, 0.2721]) -Greedy action tensor([-1.8972, -0.4283, 0.6465, -0.1605]) tensor([0.0421, 0.1829, 0.5359, 0.2391]) -Greedy action tensor([-1.2800, -0.5701, 0.5023, -0.2150]) tensor([0.0842, 0.1712, 0.5004, 0.2442]) -Greedy action tensor([-1.7115, -0.3814, 0.6516, 0.0994]) tensor([0.0465, 0.1757, 0.4936, 0.2842]) -Greedy action tensor([-1.7727, -0.4230, 0.5812, -0.0857]) tensor([0.0481, 0.1855, 0.5064, 0.2599]) -Greedy action tensor([-1.7655, -0.4558, 0.5620, -0.0618]) tensor([0.0489, 0.1812, 0.5013, 0.2686]) -Greedy action tensor([-1.8431, -0.3962, 0.6200, -0.0975]) tensor([0.0440, 0.1871, 0.5168, 0.2522]) -Greedy action tensor([-1.4419, -0.5246, 0.8372, 0.6643]) tensor([0.0465, 0.1165, 0.4546, 0.3824]) -Greedy action tensor([-1.0488, -0.2654, 0.5072, -0.5250]) tensor([0.1040, 0.2276, 0.4928, 0.1756]) -Greedy action tensor([-1.7796, -0.4828, 0.6449, 0.0566]) tensor([0.0450, 0.1646, 0.5083, 0.2822]) -Greedy action tensor([-1.7182, -0.3360, 0.6840, 0.2219]) tensor([0.0435, 0.1733, 0.4805, 0.3027]) -Greedy action tensor([-1.5104, -0.5113, 0.6138, 0.6059]) tensor([0.0491, 0.1332, 0.4105, 0.4072]) -Greedy action tensor([-1.9668, -0.8497, 0.5508, -0.0479]) tensor([0.0430, 0.1313, 0.5328, 0.2928]) -Greedy action tensor([-1.3779, -0.5799, 0.5139, 0.5078]) tensor([0.0608, 0.1351, 0.4033, 0.4008]) -Greedy action tensor([-1.9035, -0.3877, 0.6353, -0.1549]) tensor([0.0417, 0.1900, 0.5285, 0.2398]) -Greedy action tensor([-1.5863, -0.4397, 0.5554, 0.1098]) tensor([0.0552, 0.1738, 0.4700, 0.3010]) -Greedy action tensor([-1.7913, -0.4658, 0.5898, -0.1041]) tensor([0.0477, 0.1794, 0.5154, 0.2575]) -Greedy action tensor([-1.7493, -0.4811, 0.6105, -0.1040]) tensor([0.0492, 0.1749, 0.5210, 0.2550]) -Greedy action tensor([-0.5366, -0.4410, 0.9837, 1.6292]) tensor([0.0650, 0.0715, 0.2971, 0.5665]) -Greedy action tensor([-1.8235, -0.3416, 0.6074, -0.0997]) tensor([0.0447, 0.1967, 0.5081, 0.2505]) -Greedy action tensor([-1.4358, -0.5373, 1.2643, 1.1414]) tensor([0.0317, 0.0780, 0.4725, 0.4178]) -Greedy action tensor([-1.9452, -0.4536, 0.6649, -0.1814]) tensor([0.0402, 0.1786, 0.5467, 0.2345]) -Greedy action tensor([-1.5123, -0.5222, 0.4411, 0.1098]) tensor([0.0633, 0.1703, 0.4461, 0.3203]) -Greedy action tensor([-1.1744, -0.6066, 0.2521, 0.4658]) tensor([0.0828, 0.1460, 0.3446, 0.4267]) -Greedy action tensor([-1.3476, -0.3894, 0.4907, -0.3987]) tensor([0.0802, 0.2090, 0.5038, 0.2070]) -Greedy action tensor([-1.5547, -0.6583, 1.2166, 1.0906]) tensor([0.0298, 0.0731, 0.4767, 0.4203]) -Greedy action tensor([-0.3907, -0.1080, 0.4071, 0.0520]) tensor([0.1638, 0.2173, 0.3638, 0.2551]) -Greedy action tensor([-1.3675, -0.5211, 0.4059, 0.3160]) tensor([0.0685, 0.1596, 0.4033, 0.3686]) -Greedy action tensor([-1.6319, -0.5429, 0.5114, -0.0394]) tensor([0.0574, 0.1706, 0.4897, 0.2823]) -Greedy action tensor([-1.6815, -0.3226, 0.5257, 0.0305]) tensor([0.0512, 0.1994, 0.4656, 0.2838]) -Greedy action tensor([-1.7070, -0.5236, 0.5510, -0.0763]) tensor([0.0528, 0.1724, 0.5051, 0.2697]) -Greedy action tensor([-1.5594, -0.5505, 0.8661, 0.6573]) tensor([0.0413, 0.1132, 0.4667, 0.3788]) -Greedy action tensor([-1.0937, -0.7667, 1.1559, 1.4831]) tensor([0.0400, 0.0554, 0.3790, 0.5257]) -Greedy action tensor([-1.8982, -0.3944, 0.6341, -0.1498]) tensor([0.0420, 0.1888, 0.5281, 0.2411]) -Greedy action tensor([-1.7527, -0.4827, 0.5774, -0.0615]) tensor([0.0493, 0.1757, 0.5072, 0.2677]) -Greedy action tensor([-1.8806, -0.4318, 0.6323, -0.1456]) tensor([0.0430, 0.1830, 0.5304, 0.2436]) -Greedy action tensor([-1.6786, -0.5125, 0.5615, -0.0904]) tensor([0.0541, 0.1735, 0.5078, 0.2646]) -Greedy action tensor([-1.7557, -0.2897, 0.5141, -0.0744]) tensor([0.0491, 0.2125, 0.4748, 0.2636]) -Greedy action tensor([-1.9338, -0.4535, 0.6596, -0.1739]) tensor([0.0407, 0.1788, 0.5441, 0.2364]) -Greedy action tensor([-1.7283, -0.3581, 0.5386, -0.0681]) tensor([0.0504, 0.1983, 0.4862, 0.2651]) -Greedy action tensor([-1.9148, -0.4395, 0.6504, -0.1604]) tensor([0.0414, 0.1810, 0.5383, 0.2393]) -Greedy action tensor([-1.8325, -0.4415, 0.6912, -0.3182]) tensor([0.0454, 0.1823, 0.5660, 0.2063]) -Greedy action tensor([-1.7639, -0.3610, 0.5897, -0.1724]) tensor([0.0488, 0.1984, 0.5133, 0.2395]) -Greedy action tensor([-1.1287, -0.4521, 0.4750, 0.7603]) tensor([0.0687, 0.1352, 0.3416, 0.4544]) -Greedy action tensor([-1.5720, -0.9031, 1.1674, -0.1543]) tensor([0.0443, 0.0865, 0.6862, 0.1830]) -Greedy action tensor([-0.9679, -0.6218, 0.2429, 0.1771]) tensor([0.1122, 0.1586, 0.3766, 0.3526]) -Greedy action tensor([-1.5156, -0.4460, -0.0247, -0.3677]) tensor([0.0869, 0.2532, 0.3860, 0.2739]) -Greedy action tensor([-0.1846, -0.0730, 0.1854, 0.2365]) tensor([0.1965, 0.2197, 0.2845, 0.2994]) -Greedy action tensor([-1.4574, -0.3227, 0.4924, -0.1717]) tensor([0.0678, 0.2108, 0.4763, 0.2451]) -Greedy action tensor([-1.8534, -0.4446, 0.8247, 0.1320]) tensor([0.0371, 0.1519, 0.5406, 0.2704]) -Greedy action tensor([-1.9007, -0.3866, 0.6358, -0.1512]) tensor([0.0418, 0.1899, 0.5280, 0.2403]) -Greedy action tensor([-0.8437, 0.0695, 0.1664, -0.0659]) tensor([0.1188, 0.2962, 0.3263, 0.2587]) -Greedy action tensor([-1.9178, -0.4300, 0.6497, -0.1696]) tensor([0.0413, 0.1829, 0.5384, 0.2373]) -Greedy action tensor([-1.9022, -0.4310, 0.6475, -0.1468]) tensor([0.0418, 0.1819, 0.5347, 0.2416]) -Greedy action tensor([-1.8482, -0.4052, 0.6206, -0.1182]) tensor([0.0441, 0.1866, 0.5206, 0.2487]) -Greedy action tensor([-1.9862, -0.8683, 0.2682, -0.2167]) tensor([0.0514, 0.1572, 0.4898, 0.3016]) -Greedy action tensor([-1.7696, 0.0574, 0.4859, -0.1142]) tensor([0.0455, 0.2826, 0.4338, 0.2381]) -Greedy action tensor([-1.7754, -0.4630, 0.5856, -0.0887]) tensor([0.0483, 0.1793, 0.5117, 0.2607]) -Greedy action tensor([-1.0151, -0.7235, 1.1562, 1.4728]) tensor([0.0432, 0.0578, 0.3789, 0.5200]) -Greedy action tensor([-1.7024, -0.5518, 0.6120, 0.0063]) tensor([0.0505, 0.1596, 0.5110, 0.2789]) -Greedy action tensor([-1.8992, -0.4418, 0.6407, -0.1609]) tensor([0.0423, 0.1815, 0.5358, 0.2404]) -Greedy action tensor([-1.4217, -0.5652, 0.7268, 0.5281]) tensor([0.0528, 0.1242, 0.4523, 0.3707]) -Greedy action tensor([-1.2809, -0.4291, 0.3157, 0.2163]) tensor([0.0784, 0.1838, 0.3872, 0.3505]) -Greedy action tensor([-1.7936, -0.1703, 0.5584, -0.0611]) tensor([0.0450, 0.2280, 0.4726, 0.2544]) -Greedy action tensor([-1.4366, -0.3934, 0.6219, -0.5319]) tensor([0.0707, 0.2007, 0.5539, 0.1747]) -Greedy action tensor([-1.1471, -0.5294, 0.8088, 1.0239]) tensor([0.0535, 0.0992, 0.3783, 0.4690]) -Greedy action tensor([-1.8565, -0.2122, 0.5866, -0.1271]) tensor([0.0429, 0.2220, 0.4934, 0.2417]) -Greedy action tensor([-0.9734, -0.4304, 0.4886, -0.4095]) tensor([0.1137, 0.1957, 0.4907, 0.1999]) -Greedy action tensor([-1.7331, -0.1097, 0.5122, 0.0067]) tensor([0.0471, 0.2390, 0.4452, 0.2686]) -Greedy action tensor([-1.1730, -0.5854, 0.8497, 1.1698]) tensor([0.0481, 0.0866, 0.3639, 0.5013]) -Greedy action tensor([-0.4738, -0.4617, 1.0869, 1.6186]) tensor([0.0672, 0.0680, 0.3201, 0.5447]) -Greedy action tensor([-1.2381, -0.5630, 0.3051, 0.2300]) tensor([0.0834, 0.1639, 0.3905, 0.3622]) -Greedy action tensor([-1.7711, -0.5093, 0.5751, -0.0639]) tensor([0.0488, 0.1724, 0.5098, 0.2691]) -Greedy action tensor([-1.7194, -0.3507, 0.5261, -0.0271]) tensor([0.0505, 0.1984, 0.4769, 0.2742]) -Greedy action tensor([-1.0345, 0.9761, 0.1482, 0.6761]) tensor([0.0579, 0.4326, 0.1890, 0.3205]) -Greedy action tensor([-1.8843, -0.4493, 0.6308, -0.1562]) tensor([0.0431, 0.1810, 0.5332, 0.2427]) -Greedy action tensor([-1.8826, -0.4422, 0.6333, -0.1480]) tensor([0.0430, 0.1815, 0.5320, 0.2436]) -Greedy action tensor([-1.8181, -0.4895, 0.6121, -0.0776]) tensor([0.0458, 0.1729, 0.5203, 0.2610]) -Greedy action tensor([-1.9126, -0.4613, 0.6466, -0.1663]) tensor([0.0418, 0.1784, 0.5402, 0.2396]) -Greedy action tensor([-1.7801, -0.4085, 0.6902, -0.0346]) tensor([0.0445, 0.1752, 0.5257, 0.2546]) -Greedy action tensor([-1.6865, -0.5109, 0.6378, 0.1019]) tensor([0.0489, 0.1585, 0.5000, 0.2926]) -Greedy action tensor([-0.8123, -0.5565, 0.1730, 0.2852]) tensor([0.1255, 0.1621, 0.3362, 0.3761]) -Greedy action tensor([ 1.9192, -1.0121, -0.3039, 0.8984]) tensor([0.6571, 0.0350, 0.0711, 0.2367]) -Greedy action tensor([ 1.2813, -0.1428, -0.0353, 0.1504]) tensor([0.5460, 0.1314, 0.1464, 0.1762]) -Greedy action tensor([ 1.9239, -1.2023, -0.2654, 0.5669]) tensor([0.7076, 0.0311, 0.0792, 0.1822]) -Greedy action tensor([ 0.8377, -0.5228, -0.1115, 0.4272]) tensor([0.4335, 0.1112, 0.1678, 0.2875]) -Greedy action tensor([ 1.4689, -0.2874, -0.3634, 0.2830]) tensor([0.6104, 0.1054, 0.0977, 0.1865]) -Greedy action tensor([ 1.1660, -0.1910, -0.3033, 0.2254]) tensor([0.5325, 0.1371, 0.1225, 0.2079]) -Greedy action tensor([ 1.7460, 0.3253, -0.1520, 0.3660]) tensor([0.6086, 0.1470, 0.0912, 0.1531]) -Greedy action tensor([ 1.6591, -0.2060, 0.3625, 0.3486]) tensor([0.5889, 0.0912, 0.1611, 0.1588]) -Greedy action tensor([ 1.4397, -0.5160, -0.5285, 0.4803]) tensor([0.6009, 0.0850, 0.0839, 0.2302]) -Greedy action tensor([ 1.8738, -0.4560, -0.4433, 0.1938]) tensor([0.7235, 0.0704, 0.0713, 0.1348]) -Greedy action tensor([ 1.2227, -0.2083, -0.3416, 0.1106]) tensor([0.5627, 0.1345, 0.1177, 0.1850]) -Greedy action tensor([ 1.3639, -0.3881, -0.3985, 0.5395]) tensor([0.5607, 0.0972, 0.0962, 0.2459]) -Greedy action tensor([ 2.1306, -1.4143, -0.5593, 0.5252]) tensor([0.7707, 0.0222, 0.0523, 0.1548]) -Greedy action tensor([ 1.9726, -0.0270, -0.5634, 0.2046]) tensor([0.7219, 0.0977, 0.0572, 0.1232]) -Greedy action tensor([ 1.8889, -0.4258, -0.1307, 0.1926]) tensor([0.7068, 0.0698, 0.0938, 0.1296]) -Greedy action tensor([ 1.2060, -0.2126, -0.3386, 0.4427]) tensor([0.5204, 0.1260, 0.1111, 0.2426]) -Greedy action tensor([ 1.0239, -0.2904, -0.4154, 0.0864]) tensor([0.5271, 0.1416, 0.1250, 0.2064]) -Greedy action tensor([ 1.9346, -0.5041, -0.2992, 0.4762]) tensor([0.7008, 0.0612, 0.0751, 0.1630]) -Greedy action tensor([ 1.7057, -0.5638, -0.3241, 0.2896]) tensor([0.6769, 0.0700, 0.0889, 0.1643]) -Greedy action tensor([ 1.6409, -0.6507, -0.2648, 0.8350]) tensor([0.5894, 0.0596, 0.0877, 0.2633]) -Greedy action tensor([ 1.4143, -0.1543, -0.8972, 0.4931]) tensor([0.5863, 0.1222, 0.0581, 0.2334]) -Greedy action tensor([ 2.3376, -1.0781, -0.4748, 0.8854]) tensor([0.7536, 0.0248, 0.0453, 0.1764]) -Greedy action tensor([ 1.0898, -0.9067, 0.1888, 0.1700]) tensor([0.5153, 0.0700, 0.2093, 0.2054]) -Greedy action tensor([ 1.6210, -0.9131, -0.0739, 0.4123]) tensor([0.6404, 0.0508, 0.1176, 0.1912]) -Greedy action tensor([ 1.2225, -0.2941, -0.1381, 0.4113]) tensor([0.5208, 0.1143, 0.1336, 0.2314]) -Greedy action tensor([ 2.1554, -0.6450, -0.4820, 0.4153]) tensor([0.7646, 0.0465, 0.0547, 0.1342]) -Greedy action tensor([ 1.1494, -0.6183, -0.4499, 0.6651]) tensor([0.5028, 0.0858, 0.1016, 0.3098]) -Greedy action tensor([ 1.4526, -0.3634, -0.4612, 0.0302]) tensor([0.6446, 0.1049, 0.0951, 0.1554]) -Greedy action tensor([ 0.7023, -0.2592, 0.2554, 0.1371]) tensor([0.3861, 0.1476, 0.2469, 0.2194]) -Greedy action tensor([ 1.6867, -0.7218, -0.3944, 0.3810]) tensor([0.6731, 0.0605, 0.0840, 0.1824]) -Greedy action tensor([ 1.0549, -0.1901, -0.0372, 0.1777]) tensor([0.4903, 0.1412, 0.1645, 0.2040]) -Greedy action tensor([ 1.4904, -1.0712, -0.1150, 0.2457]) tensor([0.6386, 0.0493, 0.1282, 0.1839]) -Greedy action tensor([ 1.7184, -0.5597, -0.4569, 0.5567]) tensor([0.6540, 0.0670, 0.0743, 0.2047]) -Greedy action tensor([ 1.2051, -0.3890, -0.2376, 0.0358]) tensor([0.5714, 0.1161, 0.1350, 0.1775]) -Greedy action tensor([ 1.6777, -0.4056, 0.0524, 0.2288]) tensor([0.6426, 0.0800, 0.1265, 0.1509]) -Greedy action tensor([ 1.7307, -0.7018, -0.3534, 0.5526]) tensor([0.6578, 0.0578, 0.0818, 0.2025]) -Greedy action tensor([ 1.4984, -0.4025, -0.5066, 0.3583]) tensor([0.6235, 0.0932, 0.0840, 0.1994]) -Greedy action tensor([ 1.5382, -0.5805, -0.3084, 0.3165]) tensor([0.6358, 0.0764, 0.1003, 0.1874]) -Greedy action tensor([ 1.1822, -0.4726, -0.1158, -0.1271]) tensor([0.5766, 0.1102, 0.1575, 0.1557]) -Greedy action tensor([ 1.1225, -0.2585, -0.1683, 0.0574]) tensor([0.5344, 0.1343, 0.1470, 0.1842]) -Greedy action tensor([ 1.4309, -0.0966, -0.1664, 0.7402]) tensor([0.5206, 0.1130, 0.1054, 0.2609]) -Greedy action tensor([ 1.5941, -0.2015, -0.3774, 0.2946]) tensor([0.6337, 0.1052, 0.0882, 0.1728]) -Greedy action tensor([ 0.8898, 0.0906, -0.1041, 0.2585]) tensor([0.4252, 0.1912, 0.1574, 0.2262]) -Greedy action tensor([ 1.0249, -0.5454, 0.3974, 0.0759]) tensor([0.4697, 0.0977, 0.2508, 0.1818]) -Greedy action tensor([ 1.0639, -0.3965, 0.0678, 0.0297]) tensor([0.5110, 0.1186, 0.1887, 0.1817]) -Greedy action tensor([ 0.8754, -0.1201, -0.2536, 0.3673]) tensor([0.4358, 0.1610, 0.1409, 0.2622]) -Greedy action tensor([ 0.9143, -0.3138, -0.3662, -0.0667]) tensor([0.5140, 0.1505, 0.1428, 0.1927]) -Greedy action tensor([ 1.4909, -0.6469, -0.6125, 0.7160]) tensor([0.5880, 0.0693, 0.0718, 0.2709]) -Greedy action tensor([ 0.8807, -0.1201, -0.2985, 0.3763]) tensor([0.4388, 0.1613, 0.1349, 0.2650]) -Greedy action tensor([ 0.9555, 0.0144, -0.2456, 0.1864]) tensor([0.4642, 0.1811, 0.1396, 0.2151]) -Greedy action tensor([ 1.0260, -0.0635, -0.1068, -0.1017]) tensor([0.5045, 0.1697, 0.1625, 0.1633]) -Greedy action tensor([ 0.5498, -0.3541, -0.4913, 0.5051]) tensor([0.3684, 0.1492, 0.1301, 0.3523]) -Greedy action tensor([ 1.4788, -0.5678, -0.7618, 0.1879]) tensor([0.6620, 0.0855, 0.0704, 0.1821]) -Greedy action tensor([ 1.5821, -0.4215, -0.2745, -0.0496]) tensor([0.6727, 0.0907, 0.1051, 0.1316]) -Greedy action tensor([ 1.1241, -0.2534, -0.0407, 0.0296]) tensor([0.5266, 0.1328, 0.1643, 0.1763]) -Greedy action tensor([ 1.3532, -0.7530, -0.6251, 0.8280]) tensor([0.5401, 0.0657, 0.0747, 0.3195]) -Greedy action tensor([ 1.6277, -0.3835, -0.4485, -0.3586]) tensor([0.7161, 0.0958, 0.0898, 0.0982]) -Greedy action tensor([ 1.3793, -0.3286, -0.1265, 0.4949]) tensor([0.5506, 0.0998, 0.1222, 0.2274]) -Greedy action tensor([ 1.7574, -0.3474, -0.2250, 0.2126]) tensor([0.6789, 0.0827, 0.0935, 0.1448]) -Greedy action tensor([ 1.4348, -0.4838, 0.2766, -0.1251]) tensor([0.5984, 0.0879, 0.1879, 0.1258]) -Greedy action tensor([ 1.3460, -0.3791, -0.6513, 0.5571]) tensor([0.5656, 0.1008, 0.0767, 0.2570]) -Greedy action tensor([ 1.1874, -0.5233, -0.2923, 0.1637]) tensor([0.5657, 0.1022, 0.1288, 0.2032]) -Greedy action tensor([ 1.3747, -0.5081, -0.1808, 0.4054]) tensor([0.5738, 0.0873, 0.1211, 0.2177]) -Greedy action tensor([ 0.4976, -0.3422, -0.0649, -0.1486]) tensor([0.3959, 0.1710, 0.2256, 0.2075]) -Greedy action tensor([ 0.5760, -0.4588, -0.0570, -0.0244]) tensor([0.4107, 0.1459, 0.2181, 0.2253]) -Greedy action tensor([ 1.6821, -0.8308, -0.3056, 0.3866]) tensor([0.6703, 0.0543, 0.0918, 0.1835]) -Greedy action tensor([ 2.2447, -0.6537, -0.2819, 0.8985]) tensor([0.7167, 0.0395, 0.0573, 0.1865]) -Greedy action tensor([ 1.4561, -0.0551, -0.6768, 0.4005]) tensor([0.5927, 0.1308, 0.0702, 0.2063]) -Greedy action tensor([ 1.6527, -0.4373, -0.4717, 0.1869]) tensor([0.6784, 0.0839, 0.0811, 0.1566]) -Greedy action tensor([ 1.2581, -0.9362, -0.2457, 0.4072]) tensor([0.5679, 0.0633, 0.1262, 0.2425]) -Greedy action tensor([ 0.9260, -0.0733, -0.1316, 0.1817]) tensor([0.4565, 0.1681, 0.1585, 0.2169]) -Greedy action tensor([ 1.9606, -0.4441, 0.2186, 0.9718]) tensor([0.6107, 0.0551, 0.1070, 0.2272]) -Greedy action tensor([ 1.1528, -0.2376, -0.1361, -0.2531]) tensor([0.5651, 0.1407, 0.1557, 0.1385]) -Greedy action tensor([ 1.5463, -0.3953, -0.3134, 0.3821]) tensor([0.6206, 0.0890, 0.0966, 0.1937]) -Greedy action tensor([ 1.3196, -0.3419, -0.4880, -0.2141]) tensor([0.6371, 0.1210, 0.1045, 0.1374]) -Greedy action tensor([ 0.9723, -0.3741, -0.4000, 0.3743]) tensor([0.4846, 0.1261, 0.1229, 0.2665]) -Greedy action tensor([ 1.2040, -0.1978, 0.3950, -0.3342]) tensor([0.5246, 0.1291, 0.2336, 0.1127]) -Greedy action tensor([ 1.5227, -0.5777, -0.3057, 0.0650]) tensor([0.6597, 0.0808, 0.1060, 0.1536]) -Greedy action tensor([ 0.8673, -0.2329, -0.1536, 0.1919]) tensor([0.4541, 0.1511, 0.1636, 0.2311]) -Greedy action tensor([ 0.8969, -0.0840, -0.5275, -0.3633]) tensor([0.5265, 0.1974, 0.1267, 0.1493]) -Greedy action tensor([ 0.8721, -0.1148, -0.0244, 0.0172]) tensor([0.4533, 0.1690, 0.1849, 0.1928]) -Greedy action tensor([ 1.0715, -0.5873, -0.0323, -0.5523]) tensor([0.5817, 0.1107, 0.1929, 0.1147]) -Greedy action tensor([ 1.0666, -0.8105, 0.0033, -0.5113]) tensor([0.5866, 0.0898, 0.2026, 0.1211]) -Greedy action tensor([ 0.5063, 0.0781, -0.1685, -0.2628]) tensor([0.3810, 0.2483, 0.1940, 0.1766]) -Greedy action tensor([ 0.3016, 0.1644, -0.0752, -0.0683]) tensor([0.3078, 0.2684, 0.2112, 0.2126]) -Greedy action tensor([ 0.3594, 0.0309, -0.1881, -0.2426]) tensor([0.3513, 0.2530, 0.2032, 0.1924]) -Greedy action tensor([ 0.3400, 0.0988, -0.1385, -0.2144]) tensor([0.3356, 0.2637, 0.2080, 0.1928]) -Greedy action tensor([ 0.6267, -0.2746, -0.0093, -0.2544]) tensor([0.4256, 0.1728, 0.2253, 0.1763]) -Greedy action tensor([ 0.2127, 0.1767, -0.0281, -0.3538]) tensor([0.3014, 0.2907, 0.2369, 0.1710]) -Greedy action tensor([ 0.2710, -0.2190, -0.1051, -0.2660]) tensor([0.3468, 0.2124, 0.2381, 0.2027]) -Greedy action tensor([ 1.1711, -0.7731, -0.0412, -0.5350]) tensor([0.6165, 0.0882, 0.1834, 0.1119]) -Greedy action tensor([ 0.4756, -0.0232, -0.1073, -0.2222]) tensor([0.3755, 0.2280, 0.2096, 0.1869]) -Greedy action tensor([ 0.5414, -0.0568, 0.0234, -0.6598]) tensor([0.4088, 0.2247, 0.2435, 0.1230]) -Greedy action tensor([ 0.9013, -0.5045, -0.0589, -0.2436]) tensor([0.5138, 0.1260, 0.1967, 0.1635]) -Greedy action tensor([ 1.2063, -0.5656, -0.0945, -0.3850]) tensor([0.6075, 0.1033, 0.1655, 0.1237]) -Greedy action tensor([ 0.9373, -0.0132, -0.0633, -0.5667]) tensor([0.5059, 0.1956, 0.1860, 0.1124]) -Greedy action tensor([ 0.5788, -0.2480, 0.1580, -0.6516]) tensor([0.4191, 0.1833, 0.2751, 0.1224]) -Greedy action tensor([ 0.3627, -0.1454, 0.0388, -0.2078]) tensor([0.3460, 0.2082, 0.2503, 0.1956]) -Greedy action tensor([ 0.3105, -0.2643, -0.1341, -0.1652]) tensor([0.3539, 0.1992, 0.2269, 0.2200]) -Greedy action tensor([ 0.4283, -0.1213, -0.0339, -0.1794]) tensor([0.3634, 0.2098, 0.2289, 0.1979]) -Greedy action tensor([ 0.4874, 0.0762, -0.1202, -0.0373]) tensor([0.3572, 0.2368, 0.1946, 0.2114]) -Greedy action tensor([ 0.8397, -0.2359, 0.0529, -0.4718]) tensor([0.4841, 0.1651, 0.2204, 0.1304]) -Greedy action tensor([ 0.5463, -0.3074, -0.0608, -0.1931]) tensor([0.4085, 0.1739, 0.2226, 0.1950]) -Greedy action tensor([ 0.5961, 0.3006, -0.2575, -0.0960]) tensor([0.3744, 0.2787, 0.1595, 0.1874]) -Greedy action tensor([ 0.9521, 0.6239, -0.1987, -0.1966]) tensor([0.4249, 0.3060, 0.1344, 0.1347]) -Greedy action tensor([ 0.6001, -0.3210, 0.1021, -0.3557]) tensor([0.4184, 0.1665, 0.2542, 0.1609]) -Greedy action tensor([ 0.6895, -0.2549, 0.0456, -0.4735]) tensor([0.4491, 0.1747, 0.2359, 0.1404]) -Greedy action tensor([ 0.3733, -0.1741, 0.0606, -0.3237]) tensor([0.3561, 0.2060, 0.2605, 0.1774]) -Greedy action tensor([ 0.6244, -0.2711, -0.0303, -0.3233]) tensor([0.4319, 0.1764, 0.2244, 0.1674]) -Greedy action tensor([ 0.4467, -0.2691, 0.0085, -0.3147]) tensor([0.3845, 0.1879, 0.2481, 0.1795]) -Greedy action tensor([ 0.9039, -0.6753, -0.1444, -0.4485]) tensor([0.5509, 0.1136, 0.1931, 0.1425]) -Greedy action tensor([ 0.4795, 0.0745, -0.0990, 0.1422]) tensor([0.3400, 0.2268, 0.1906, 0.2426]) -Greedy action tensor([ 0.9617, -0.5933, -0.0159, -0.5547]) tensor([0.5534, 0.1169, 0.2082, 0.1215]) -Greedy action tensor([ 0.3614, -0.2182, -0.1497, -0.3744]) tensor([0.3789, 0.2122, 0.2273, 0.1815]) -Greedy action tensor([ 0.8541, -0.5520, -0.0624, -0.4049]) tensor([0.5184, 0.1271, 0.2073, 0.1472]) -Greedy action tensor([ 0.3440, -0.1667, -0.1156, -0.2821]) tensor([0.3615, 0.2169, 0.2283, 0.1933]) -Greedy action tensor([ 0.2641, 0.1277, -0.0878, -0.2908]) tensor([0.3175, 0.2770, 0.2233, 0.1823]) -Greedy action tensor([ 0.5461, 0.2421, -0.0929, -0.2262]) tensor([0.3666, 0.2705, 0.1935, 0.1694]) -Greedy action tensor([ 1.0674, -1.0313, 0.0544, -0.6570]) tensor([0.6010, 0.0737, 0.2182, 0.1071]) -Greedy action tensor([ 0.9231, -0.6820, 0.0726, -0.7725]) tensor([0.5520, 0.1109, 0.2358, 0.1013]) -Greedy action tensor([ 0.4327, 0.2732, -0.0136, -0.0694]) tensor([0.3228, 0.2752, 0.2066, 0.1954]) -Greedy action tensor([ 0.7697, 0.1151, -0.0247, -0.1749]) tensor([0.4237, 0.2202, 0.1914, 0.1647]) -Greedy action tensor([ 1.0222, -0.1414, 0.1394, -0.3242]) tensor([0.5035, 0.1573, 0.2083, 0.1310]) -Greedy action tensor([ 0.3468, -0.0295, -0.1144, -0.1543]) tensor([0.3421, 0.2348, 0.2157, 0.2073]) -Greedy action tensor([ 0.8744, -0.6249, 0.0963, -0.3379]) tensor([0.5050, 0.1128, 0.2320, 0.1502]) -Greedy action tensor([ 0.6001, 0.1569, -0.1174, -0.3118]) tensor([0.3950, 0.2536, 0.1927, 0.1587]) -Greedy action tensor([ 0.4994, 0.0958, -0.0422, -0.2231]) tensor([0.3656, 0.2442, 0.2127, 0.1775]) -Greedy action tensor([ 0.4854, -0.5725, -0.1287, -0.3061]) tensor([0.4271, 0.1483, 0.2311, 0.1935]) -Greedy action tensor([ 0.8532, -0.5316, -0.0356, -0.2958]) tensor([0.5054, 0.1265, 0.2078, 0.1602]) -Greedy action tensor([ 0.2540, 0.0691, -0.0413, -0.2123]) tensor([0.3122, 0.2595, 0.2324, 0.1959]) -Greedy action tensor([ 1.0038, -0.6868, -0.0597, -0.5157]) tensor([0.5719, 0.1055, 0.1975, 0.1252]) -Greedy action tensor([ 0.3071, -0.2421, -0.1540, -0.1287]) tensor([0.3503, 0.2023, 0.2209, 0.2266]) -Greedy action tensor([ 0.7621, -0.4804, -0.0304, -0.4461]) tensor([0.4902, 0.1415, 0.2219, 0.1464]) -Greedy action tensor([ 0.6159, -0.0831, -0.0456, -0.2443]) tensor([0.4105, 0.2040, 0.2118, 0.1737]) -Greedy action tensor([ 0.7701, -0.5031, -0.1061, -0.3113]) tensor([0.4913, 0.1375, 0.2046, 0.1666]) -Greedy action tensor([ 0.6536, -0.3512, -0.0856, -0.1076]) tensor([0.4328, 0.1584, 0.2066, 0.2022]) -Greedy action tensor([ 0.8353, -0.7492, 0.0195, -0.5185]) tensor([0.5248, 0.1076, 0.2321, 0.1355]) -Greedy action tensor([ 0.4135, -0.1893, -0.0366, -0.2520]) tensor([0.3705, 0.2028, 0.2362, 0.1905]) -Greedy action tensor([ 0.6456, -0.2134, -0.1263, -0.2925]) tensor([0.4391, 0.1860, 0.2030, 0.1719]) -Greedy action tensor([ 0.8489, -0.5363, -0.1267, -0.3589]) tensor([0.5192, 0.1299, 0.1957, 0.1552]) -Greedy action tensor([ 0.4077, 0.0428, -0.0036, -0.2951]) tensor([0.3506, 0.2434, 0.2324, 0.1736]) -Greedy action tensor([ 0.5874, -0.0939, -0.1712, -0.4212]) tensor([0.4275, 0.2163, 0.2002, 0.1559]) -Greedy action tensor([ 0.7397, -0.4511, -0.0723, -0.2687]) tensor([0.4733, 0.1439, 0.2101, 0.1727]) -Greedy action tensor([ 0.7907, -0.5491, 0.0160, -0.1899]) tensor([0.4767, 0.1248, 0.2197, 0.1788]) -Greedy action tensor([ 0.6537, -0.4994, 0.0258, -0.2049]) tensor([0.4399, 0.1389, 0.2348, 0.1864]) -Greedy action tensor([ 0.7289, -0.3030, -0.0276, -0.3490]) tensor([0.4617, 0.1645, 0.2167, 0.1571]) -Greedy action tensor([ 0.7204, -0.8653, -0.1046, -0.2788]) tensor([0.4972, 0.1018, 0.2179, 0.1831]) -Greedy action tensor([ 0.5505, -0.2851, 0.0633, -0.4394]) tensor([0.4133, 0.1792, 0.2539, 0.1536]) -Greedy action tensor([ 0.5691, -0.3737, -0.0335, -0.2308]) tensor([0.4191, 0.1632, 0.2294, 0.1883]) -Greedy action tensor([ 0.7999, -0.4664, -0.1258, -0.3631]) tensor([0.5023, 0.1416, 0.1991, 0.1570]) -Greedy action tensor([ 0.6998, 0.0952, -0.0145, -0.3758]) tensor([0.4207, 0.2298, 0.2060, 0.1435]) -Greedy action tensor([ 0.4853, -0.5330, -0.1958, -0.4946]) tensor([0.4459, 0.1611, 0.2257, 0.1674]) -Greedy action tensor([ 0.5641, -0.3971, 0.1329, -0.4612]) tensor([0.4183, 0.1599, 0.2718, 0.1500]) -Greedy action tensor([ 0.3642, -0.1400, 0.1703, -0.5109]) tensor([0.3515, 0.2123, 0.2896, 0.1465]) -Greedy action tensor([ 0.3288, -0.0306, -0.0261, 0.0177]) tensor([0.3193, 0.2229, 0.2239, 0.2339]) -Greedy action tensor([ 0.2002, 0.1938, -0.0303, -0.0539]) tensor([0.2806, 0.2788, 0.2229, 0.2177]) -Greedy action tensor([ 0.6013, -0.2830, -0.0824, -0.1785]) tensor([0.4208, 0.1738, 0.2124, 0.1929]) -Greedy action tensor([ 0.3450, 0.0011, -0.1212, -0.2006]) tensor([0.3429, 0.2432, 0.2152, 0.1987]) -Greedy action tensor([ 0.7605, -0.4837, 0.0391, -0.6270]) tensor([0.4941, 0.1424, 0.2402, 0.1234]) -Greedy action tensor([ 0.4414, -0.1986, 0.0579, -0.2681]) tensor([0.3703, 0.1952, 0.2523, 0.1821]) -Greedy action tensor([ 0.5914, -0.3309, -0.2395, -0.4404]) tensor([0.4567, 0.1816, 0.1990, 0.1628]) -Greedy action tensor([ 0.6889, -0.1995, 0.1035, -0.3805]) tensor([0.4326, 0.1780, 0.2409, 0.1485]) -Greedy action tensor([ 1.9192, -0.5254, -0.1453, 1.0273]) tensor([0.6159, 0.0534, 0.0782, 0.2525]) -Greedy action tensor([-0.8438, -1.9767, -0.1489, -0.1587]) tensor([0.1883, 0.0607, 0.3773, 0.3736]) -Greedy action tensor([-1.2220, -1.3637, -0.0555, -0.6570]) tensor([0.1462, 0.1269, 0.4695, 0.2573]) -Greedy action tensor([-0.9058, -1.1596, 0.3225, 0.2074]) tensor([0.1214, 0.0942, 0.4147, 0.3696]) -Greedy action tensor([ 1.1272, 0.8263, 0.1615, -0.8422]) tensor([0.4424, 0.3274, 0.1684, 0.0617]) -Greedy action tensor([ 1.2181, -0.0923, 0.7352, 0.6991]) tensor([0.4029, 0.1087, 0.2486, 0.2398]) -Greedy action tensor([ 1.4053, -1.1014, 1.2373, 0.4674]) tensor([0.4313, 0.0352, 0.3646, 0.1689]) -Greedy action tensor([-0.5318, 1.1391, 0.5917, -0.3498]) tensor([0.0944, 0.5020, 0.2904, 0.1133]) -Greedy action tensor([-1.2161, -0.9446, 0.2325, -0.6937]) tensor([0.1211, 0.1589, 0.5157, 0.2043]) -Greedy action tensor([-5.4459e-01, -1.0742e+00, -1.1991e+00, -3.9047e-04]) tensor([0.2610, 0.1537, 0.1356, 0.4497]) -Greedy action tensor([ 1.0930, -0.3768, 0.6639, 0.3183]) tensor([0.4270, 0.0982, 0.2780, 0.1968]) -Greedy action tensor([ 0.8903, -0.3982, 1.4366, 0.1111]) tensor([0.2889, 0.0796, 0.4989, 0.1325]) -Greedy action tensor([-1.0637, -0.1461, 0.2728, -1.2298]) tensor([0.1226, 0.3069, 0.4666, 0.1038]) -Greedy action tensor([ 0.8196, -0.6420, -0.3193, 0.4404]) tensor([0.4471, 0.1037, 0.1432, 0.3060]) -Greedy action tensor([ 0.0407, -0.0565, 1.0178, 0.0124]) tensor([0.1806, 0.1639, 0.4799, 0.1756]) -Greedy action tensor([-0.4263, -2.5906, 0.3520, 0.3524]) tensor([0.1828, 0.0210, 0.3981, 0.3982]) -Greedy action tensor([-0.1434, -1.6191, 0.6345, 0.4265]) tensor([0.1933, 0.0442, 0.4208, 0.3417]) -Greedy action tensor([-0.0981, -1.5099, -0.1614, -0.7244]) tensor([0.3681, 0.0897, 0.3455, 0.1968]) -Greedy action tensor([ 0.4494, -1.4887, 1.3149, -0.5200]) tensor([0.2564, 0.0369, 0.6094, 0.0973]) -Greedy action tensor([ 1.4238, 0.2097, -0.0811, -0.0690]) tensor([0.5735, 0.1703, 0.1273, 0.1289]) -Greedy action tensor([ 1.6217, -1.3284, 1.4605, 0.6072]) tensor([0.4413, 0.0231, 0.3756, 0.1600]) -Greedy action tensor([ 0.2821, -0.3034, 0.4211, 0.5858]) tensor([0.2462, 0.1371, 0.2830, 0.3336]) -Greedy action tensor([ 0.3294, 0.2063, -0.0400, 0.7380]) tensor([0.2451, 0.2167, 0.1694, 0.3688]) -Greedy action tensor([-0.0945, -1.6506, 1.7413, -0.1497]) tensor([0.1187, 0.0250, 0.7440, 0.1123]) -Greedy action tensor([-0.3320, -0.7450, -0.1518, 1.0457]) tensor([0.1465, 0.0969, 0.1755, 0.5811]) -Greedy action tensor([ 0.4480, -1.3059, 2.3930, 0.4907]) tensor([0.1086, 0.0188, 0.7593, 0.1133]) -Greedy action tensor([ 1.4025, -0.1132, 1.0923, 0.5819]) tensor([0.4179, 0.0918, 0.3064, 0.1839]) -Greedy action tensor([ 0.7105, -1.2206, -0.0256, 0.3265]) tensor([0.4338, 0.0629, 0.2078, 0.2955]) -Greedy action tensor([-0.3923, -0.8955, 0.7692, 1.2014]) tensor([0.1029, 0.0622, 0.3286, 0.5063]) -Greedy action tensor([ 0.3119, 0.4276, 1.9281, -0.3936]) tensor([0.1307, 0.1467, 0.6580, 0.0646]) -Greedy action tensor([ 0.9522, -0.1501, -1.1887, -0.0180]) tensor([0.5468, 0.1816, 0.0643, 0.2073]) -Greedy action tensor([0.4297, 0.4625, 0.0232, 1.7681]) tensor([0.1536, 0.1587, 0.1023, 0.5855]) -Greedy action tensor([ 0.0949, -0.7049, -0.0194, 0.8237]) tensor([0.2265, 0.1018, 0.2021, 0.4696]) -Greedy action tensor([ 0.0425, -1.5276, -0.1136, 1.3610]) tensor([0.1724, 0.0359, 0.1475, 0.6443]) -Greedy action tensor([ 0.9285, -0.4894, 0.2006, 1.6588]) tensor([0.2631, 0.0637, 0.1271, 0.5461]) -Greedy action tensor([-1.0361, -0.6052, 0.3897, -0.1459]) tensor([0.1095, 0.1684, 0.4555, 0.2666]) -Greedy action tensor([ 0.8293, -0.9231, 0.7113, 0.3428]) tensor([0.3736, 0.0648, 0.3320, 0.2297]) -Greedy action tensor([0.4181, 0.6875, 0.9873, 0.0152]) tensor([0.2108, 0.2759, 0.3724, 0.1409]) -Greedy action tensor([ 1.1275, 0.4653, -0.3223, 1.1139]) tensor([0.3654, 0.1884, 0.0857, 0.3604]) -Greedy action tensor([ 0.9076, -1.9585, 0.0689, 0.8724]) tensor([0.4074, 0.0232, 0.1761, 0.3933]) -Greedy action tensor([ 1.5418, -0.9973, 1.4792, 0.3225]) tensor([0.4322, 0.0341, 0.4060, 0.1277]) -Greedy action tensor([ 0.1743, -1.8659, -0.1261, -0.2764]) tensor([0.3988, 0.0518, 0.2953, 0.2541]) -Greedy action tensor([ 0.9693, -0.8874, 0.7576, 0.0263]) tensor([0.4247, 0.0663, 0.3436, 0.1654]) -Greedy action tensor([-0.4601, -1.1898, 0.9715, 0.1999]) tensor([0.1315, 0.0634, 0.5505, 0.2545]) -Greedy action tensor([ 0.1360, -0.7656, -0.0685, 1.5996]) tensor([0.1528, 0.0620, 0.1246, 0.6605]) -Greedy action tensor([-0.5114, -1.5394, 0.3706, -0.6007]) tensor([0.2133, 0.0763, 0.5153, 0.1951]) -Greedy action tensor([ 0.6740, -0.0861, 0.3077, 1.1584]) tensor([0.2643, 0.1236, 0.1832, 0.4289]) -Greedy action tensor([ 1.0743, -1.6615, 2.8861, -0.5704]) tensor([0.1355, 0.0088, 0.8295, 0.0262]) -Greedy action tensor([-0.9976, 0.1145, -1.3306, 0.2619]) tensor([0.1208, 0.3672, 0.0866, 0.4255]) -Greedy action tensor([ 0.2211, 0.3436, 1.3393, -0.0042]) tensor([0.1670, 0.1888, 0.5109, 0.1333]) -Greedy action tensor([ 0.2296, -1.9916, -0.4255, -0.5339]) tensor([0.4776, 0.0518, 0.2480, 0.2226]) -Greedy action tensor([-0.5707, -1.8545, -0.0731, -0.4205]) tensor([0.2449, 0.0678, 0.4028, 0.2846]) -Greedy action tensor([ 0.1136, 0.8373, 1.3010, -1.0486]) tensor([0.1503, 0.3099, 0.4928, 0.0470]) -Greedy action tensor([ 0.6199, 0.4739, 1.2533, -0.5073]) tensor([0.2456, 0.2122, 0.4627, 0.0796]) -Greedy action tensor([ 0.7330, -0.4608, -0.0475, 0.2790]) tensor([0.4173, 0.1265, 0.1912, 0.2650]) -Greedy action tensor([ 0.6682, -0.5372, 0.1888, 1.1422]) tensor([0.2837, 0.0850, 0.1756, 0.4557]) -Greedy action tensor([ 1.1346, -0.3287, 1.4058, -0.2629]) tensor([0.3584, 0.0830, 0.4701, 0.0886]) -Greedy action tensor([-0.7536, 0.2573, 1.7612, -1.5848]) tensor([0.0604, 0.1661, 0.7472, 0.0263]) -Greedy action tensor([-0.0475, -0.8638, -0.3134, 1.7985]) tensor([0.1171, 0.0517, 0.0897, 0.7415]) -Greedy action tensor([ 0.7407, 0.2379, -0.0535, 0.4847]) tensor([0.3532, 0.2137, 0.1596, 0.2735]) -Greedy action tensor([-1.3088, -0.2098, 0.6597, -0.5429]) tensor([0.0751, 0.2254, 0.5379, 0.1616]) -Greedy action tensor([ 1.0005, -1.0104, 0.1409, 0.6072]) tensor([0.4480, 0.0600, 0.1897, 0.3023]) -Greedy action tensor([ 1.3106, -0.5806, 1.2584, 1.4586]) tensor([0.3068, 0.0463, 0.2912, 0.3557]) -Greedy action tensor([ 1.8100, -1.0599, 1.1863, 0.0984]) tensor([0.5639, 0.0320, 0.3022, 0.1018]) -Greedy action tensor([-0.7493, -1.7745, -0.3487, -0.2171]) tensor([0.2196, 0.0788, 0.3278, 0.3739]) -Greedy action tensor([ 0.4727, -0.2314, 0.3505, 0.0922]) tensor([0.3265, 0.1615, 0.2889, 0.2231]) -Greedy action tensor([ 0.1754, -0.4620, -0.0014, 0.4183]) tensor([0.2746, 0.1452, 0.2301, 0.3501]) -Greedy action tensor([ 0.6688, -0.0362, -0.7088, 1.7341]) tensor([0.2151, 0.1063, 0.0543, 0.6243]) -Greedy action tensor([-0.7768, -1.2057, 0.8871, 0.2493]) tensor([0.1029, 0.0670, 0.5431, 0.2870]) -Greedy action tensor([ 0.0912, -0.8402, 2.0109, -0.2130]) tensor([0.1117, 0.0440, 0.7618, 0.0824]) -Greedy action tensor([ 1.0889, -1.1175, 1.9315, -0.2256]) tensor([0.2702, 0.0297, 0.6275, 0.0726]) -Greedy action tensor([ 0.2373, -0.3851, -0.5427, 0.9854]) tensor([0.2434, 0.1306, 0.1116, 0.5143]) -Greedy action tensor([ 1.3881, -1.3656, 0.0968, 1.3103]) tensor([0.4417, 0.0281, 0.1214, 0.4087]) -Greedy action tensor([ 1.1855, 0.0658, 0.5949, -0.3480]) tensor([0.4771, 0.1557, 0.2643, 0.1029]) -Greedy action tensor([ 0.5931, -0.4245, 1.0916, 0.6276]) tensor([0.2474, 0.0894, 0.4072, 0.2560]) -Greedy action tensor([ 1.1902, -1.0185, 1.2842, 0.0484]) tensor([0.3956, 0.0435, 0.4346, 0.1263]) -Greedy action tensor([-0.8175, 0.4549, 0.7206, -1.1943]) tensor([0.1009, 0.3601, 0.4698, 0.0692]) -Greedy action tensor([0.4658, 0.0696, 0.3029, 1.0063]) tensor([0.2359, 0.1587, 0.2004, 0.4050]) -Greedy action tensor([-0.1566, 0.0400, -0.5508, -0.0606]) tensor([0.2505, 0.3049, 0.1689, 0.2757]) -Greedy action tensor([-0.5113, -0.9376, -0.0688, 0.6143]) tensor([0.1589, 0.1038, 0.2474, 0.4899]) -Greedy action tensor([ 1.2251, -0.0672, 0.0207, 0.0264]) tensor([0.5330, 0.1464, 0.1598, 0.1607]) -Greedy action tensor([ 1.9512, -0.2798, -0.4649, 0.8899]) tensor([0.6482, 0.0696, 0.0579, 0.2243]) -Greedy action tensor([ 1.7826, -0.4235, -0.4909, 0.3288]) tensor([0.6912, 0.0761, 0.0712, 0.1615]) -Greedy action tensor([ 1.7005, -0.8414, -0.1537, 0.1032]) tensor([0.6955, 0.0547, 0.1089, 0.1408]) -Greedy action tensor([ 1.2422, -0.2693, -0.3104, 0.3492]) tensor([0.5430, 0.1198, 0.1150, 0.2223]) -Greedy action tensor([ 0.6798, -0.1774, -0.1168, 0.0421]) tensor([0.4160, 0.1765, 0.1876, 0.2199]) -Greedy action tensor([ 0.5388, -0.3501, 0.1183, -0.0622]) tensor([0.3822, 0.1571, 0.2510, 0.2096]) -Greedy action tensor([ 1.4082, 0.0650, -0.4204, 0.0598]) tensor([0.5948, 0.1552, 0.0955, 0.1544]) -Greedy action tensor([ 1.4994, -0.8734, -0.2223, 0.4951]) tensor([0.6104, 0.0569, 0.1091, 0.2236]) -Greedy action tensor([ 1.1965, 0.0152, -0.6489, 0.2912]) tensor([0.5350, 0.1642, 0.0845, 0.2164]) -Greedy action tensor([ 1.8788, 0.2683, -0.2401, 0.3655]) tensor([0.6493, 0.1297, 0.0780, 0.1430]) -Greedy action tensor([ 1.5000, -0.9986, -0.6079, 0.9173]) tensor([0.5675, 0.0467, 0.0689, 0.3169]) -Greedy action tensor([ 0.6494, -0.5940, 0.1735, -0.1272]) tensor([0.4220, 0.1217, 0.2622, 0.1941]) -Greedy action tensor([ 0.9638, -0.4043, -0.4663, 0.2241]) tensor([0.5073, 0.1292, 0.1214, 0.2421]) -Greedy action tensor([ 1.7154, -0.5824, -0.5047, 0.3308]) tensor([0.6852, 0.0688, 0.0744, 0.1716]) -Greedy action tensor([ 0.7583, -0.1508, 0.1155, 0.0954]) tensor([0.4091, 0.1648, 0.2151, 0.2109]) -Greedy action tensor([ 1.5360, -0.3683, -0.1537, 0.3466]) tensor([0.6105, 0.0909, 0.1127, 0.1859]) -Greedy action tensor([ 1.0132, -0.8249, -0.0507, 0.5205]) tensor([0.4728, 0.0752, 0.1632, 0.2888]) -Greedy action tensor([ 1.0579, -0.0687, -0.0539, 0.0348]) tensor([0.4969, 0.1611, 0.1634, 0.1786]) -Greedy action tensor([ 0.9040, 0.0800, -0.2567, -0.0491]) tensor([0.4678, 0.2052, 0.1466, 0.1804]) -Greedy action tensor([ 0.8539, -0.1991, -0.1285, 0.2197]) tensor([0.4437, 0.1548, 0.1661, 0.2353]) -Greedy action tensor([ 2.1935, -0.8812, -0.3711, 0.5711]) tensor([0.7572, 0.0350, 0.0583, 0.1495]) -Greedy action tensor([ 0.4446, -0.3793, -0.1095, -0.1851]) tensor([0.3928, 0.1723, 0.2257, 0.2092]) -Greedy action tensor([ 2.1432, -0.8439, -0.2113, 0.2910]) tensor([0.7679, 0.0387, 0.0729, 0.1205]) -Greedy action tensor([ 1.2880, -0.7427, 0.2734, 0.0714]) tensor([0.5587, 0.0733, 0.2025, 0.1655]) -Greedy action tensor([ 1.7321, -0.5773, 0.2019, 0.0858]) tensor([0.6629, 0.0658, 0.1435, 0.1278]) -Greedy action tensor([ 1.2556, -0.3968, -0.2788, 0.0441]) tensor([0.5865, 0.1124, 0.1265, 0.1746]) -Greedy action tensor([ 0.8277, -0.4704, -0.0868, 0.0704]) tensor([0.4667, 0.1274, 0.1870, 0.2188]) -Greedy action tensor([ 1.9126, -0.5684, -0.2519, 0.6861]) tensor([0.6703, 0.0561, 0.0770, 0.1966]) -Greedy action tensor([ 1.6604, -0.7850, -0.5661, 0.7270]) tensor([0.6298, 0.0546, 0.0680, 0.2476]) -Greedy action tensor([ 0.8349, -0.1935, -0.3438, 0.1165]) tensor([0.4645, 0.1661, 0.1429, 0.2265]) -Greedy action tensor([ 1.8843, -0.6608, -0.3275, 0.2707]) tensor([0.7209, 0.0566, 0.0789, 0.1436]) -Greedy action tensor([ 1.1309, -0.0588, 0.0474, -0.0279]) tensor([0.5111, 0.1555, 0.1730, 0.1604]) -Greedy action tensor([ 1.7787, -1.1656, -0.4207, 0.7121]) tensor([0.6633, 0.0349, 0.0735, 0.2283]) -Greedy action tensor([ 1.3143, -0.0742, -0.0674, 0.2461]) tensor([0.5422, 0.1353, 0.1362, 0.1863]) -Greedy action tensor([ 1.9593, -0.6165, -0.1742, 0.7654]) tensor([0.6678, 0.0508, 0.0791, 0.2023]) -Greedy action tensor([ 1.2779, -0.4508, -0.1723, -0.0415]) tensor([0.5955, 0.1057, 0.1396, 0.1592]) -Greedy action tensor([ 2.7468, -0.9783, 0.0177, 0.8317]) tensor([0.8086, 0.0195, 0.0528, 0.1191]) -Greedy action tensor([ 1.0297, -0.4470, -0.2917, 0.2043]) tensor([0.5173, 0.1181, 0.1380, 0.2266]) -Greedy action tensor([ 1.6163, -1.0692, -0.1070, 0.3027]) tensor([0.6598, 0.0450, 0.1178, 0.1774]) -Greedy action tensor([ 2.2499, -0.6707, -0.2100, 0.6635]) tensor([0.7440, 0.0401, 0.0636, 0.1523]) -Greedy action tensor([ 1.9907, -0.4200, -0.4358, 0.3986]) tensor([0.7238, 0.0650, 0.0639, 0.1473]) -Greedy action tensor([ 1.6721, -0.7732, -0.3291, 0.5414]) tensor([0.6474, 0.0561, 0.0875, 0.2090]) -Greedy action tensor([ 1.1930, -0.2833, -0.1191, 0.2233]) tensor([0.5328, 0.1217, 0.1435, 0.2020]) -Greedy action tensor([ 1.7527, -0.5412, -0.2564, 0.3393]) tensor([0.6765, 0.0682, 0.0907, 0.1646]) -Greedy action tensor([ 0.9141, -0.4114, 0.1115, 0.1654]) tensor([0.4573, 0.1215, 0.2049, 0.2163]) -Greedy action tensor([ 1.6059, -0.2610, -0.3165, 0.2425]) tensor([0.6424, 0.0993, 0.0940, 0.1643]) -Greedy action tensor([ 1.7248, -0.3104, -0.2288, 0.3455]) tensor([0.6561, 0.0857, 0.0930, 0.1652]) -Greedy action tensor([ 1.5674, -0.7405, -0.2720, 0.5401]) tensor([0.6187, 0.0615, 0.0983, 0.2215]) -Greedy action tensor([ 1.3358, -0.3822, -0.4093, 0.5095]) tensor([0.5581, 0.1001, 0.0975, 0.2443]) -Greedy action tensor([ 1.2304, -0.4265, -0.2969, 0.1540]) tensor([0.5719, 0.1091, 0.1242, 0.1949]) -Greedy action tensor([ 1.8558, -0.1073, -1.0563, 0.3582]) tensor([0.7050, 0.0990, 0.0383, 0.1577]) -Greedy action tensor([ 1.5232, -0.5899, -0.3143, 0.4538]) tensor([0.6160, 0.0745, 0.0981, 0.2114]) -Greedy action tensor([ 1.2253, -0.4943, -0.4964, 0.5999]) tensor([0.5283, 0.0946, 0.0944, 0.2827]) -Greedy action tensor([ 1.2126, -0.6929, -0.0028, 0.1456]) tensor([0.5589, 0.0831, 0.1657, 0.1923]) -Greedy action tensor([ 1.8648, -0.2015, -0.2056, 0.1813]) tensor([0.6952, 0.0880, 0.0877, 0.1291]) -Greedy action tensor([ 1.1033, -0.3493, -0.4203, 0.2175]) tensor([0.5364, 0.1255, 0.1169, 0.2212]) -Greedy action tensor([ 0.9340, -0.6248, -0.4609, 0.4198]) tensor([0.4863, 0.1023, 0.1205, 0.2908]) -Greedy action tensor([ 1.2180, -0.2001, -0.2806, 0.1147]) tensor([0.5564, 0.1347, 0.1243, 0.1846]) -Greedy action tensor([ 1.0698, -0.3542, -0.3861, 0.1851]) tensor([0.5300, 0.1276, 0.1236, 0.2188]) -Greedy action tensor([ 1.1933, -0.7393, -0.3215, 0.0304]) tensor([0.5962, 0.0863, 0.1311, 0.1864]) -Greedy action tensor([ 1.4058, -0.4838, -0.0926, 0.1845]) tensor([0.5990, 0.0905, 0.1339, 0.1766]) -Greedy action tensor([ 1.4073, -0.8845, -0.1743, 0.1813]) tensor([0.6249, 0.0632, 0.1285, 0.1834]) -Greedy action tensor([ 1.8393, 0.3571, -0.2021, -0.0415]) tensor([0.6625, 0.1505, 0.0860, 0.1010]) -Greedy action tensor([ 0.8465, -0.0467, -0.1865, 0.1633]) tensor([0.4405, 0.1803, 0.1568, 0.2224]) -Greedy action tensor([ 1.4198, -0.2465, -0.2509, 0.2673]) tensor([0.5907, 0.1116, 0.1111, 0.1866]) -Greedy action tensor([ 0.6415, -0.1175, -0.5050, 0.2355]) tensor([0.4078, 0.1909, 0.1296, 0.2717]) -Greedy action tensor([ 1.2947, -0.2479, -0.7421, 0.1936]) tensor([0.5964, 0.1275, 0.0778, 0.1983]) -Greedy action tensor([ 1.1220, -0.3089, -0.5918, 0.1528]) tensor([0.5560, 0.1329, 0.1002, 0.2109]) -Greedy action tensor([ 1.5710, -0.3924, -0.2135, 0.2863]) tensor([0.6309, 0.0886, 0.1059, 0.1746]) -Greedy action tensor([ 1.5386, -0.5290, -0.2113, -0.0748]) tensor([0.6669, 0.0844, 0.1159, 0.1329]) -Greedy action tensor([ 1.4197, -0.3449, -0.6121, 0.7841]) tensor([0.5459, 0.0935, 0.0716, 0.2891]) -Greedy action tensor([ 1.2707, -0.5813, -0.4777, 0.5355]) tensor([0.5524, 0.0867, 0.0961, 0.2648]) -Greedy action tensor([ 1.3064, -0.1349, -0.2702, -0.2069]) tensor([0.6011, 0.1423, 0.1242, 0.1324]) -Greedy action tensor([ 1.0961, -0.1001, -0.4076, 0.0773]) tensor([0.5303, 0.1603, 0.1179, 0.1915]) -Greedy action tensor([ 0.6979, -0.4911, 0.1459, 0.5560]) tensor([0.3639, 0.1108, 0.2095, 0.3158]) -Greedy action tensor([ 0.8701, -0.4007, -0.1956, 0.5011]) tensor([0.4317, 0.1211, 0.1487, 0.2985]) -Greedy action tensor([ 1.9608, -0.5191, -0.2619, 0.7703]) tensor([0.6684, 0.0560, 0.0724, 0.2032]) -Greedy action tensor([ 1.3979, -0.2803, -0.6719, 0.3281]) tensor([0.6039, 0.1127, 0.0762, 0.2072]) -Greedy action tensor([ 1.9849, -0.9047, -0.1164, 0.7436]) tensor([0.6817, 0.0379, 0.0834, 0.1970]) -Greedy action tensor([ 1.5464, -0.4688, -0.2998, 0.3783]) tensor([0.6242, 0.0832, 0.0985, 0.1941]) -Greedy action tensor([ 1.0337, 0.0992, -0.1495, 0.1757]) tensor([0.4710, 0.1850, 0.1443, 0.1997]) -Greedy action tensor([-0.2234, -0.3511, 0.1949, 0.2652]) tensor([0.1988, 0.1750, 0.3021, 0.3241]) -Greedy action tensor([-1.5956, -0.4866, 0.5036, 0.0392]) tensor([0.0577, 0.1750, 0.4711, 0.2961]) -Greedy action tensor([-0.8887, -0.2867, 0.6856, 1.3160]) tensor([0.0598, 0.1092, 0.2887, 0.5423]) -Greedy action tensor([-1.6933, -0.2278, 0.5803, 0.1175]) tensor([0.0473, 0.2046, 0.4591, 0.2890]) -Greedy action tensor([-1.6565, -0.0913, 0.4844, -0.0881]) tensor([0.0524, 0.2506, 0.4456, 0.2514]) -Greedy action tensor([-1.9321, -0.4284, 0.6599, -0.1715]) tensor([0.0405, 0.1823, 0.5414, 0.2357]) -Greedy action tensor([-0.9749, -0.3121, 0.1668, 0.3911]) tensor([0.1001, 0.1942, 0.3135, 0.3923]) -Greedy action tensor([-0.8976, -0.1315, 0.6683, 1.1094]) tensor([0.0650, 0.1399, 0.3113, 0.4838]) -Greedy action tensor([-1.1955, -0.4179, 0.8583, 1.1207]) tensor([0.0474, 0.1031, 0.3694, 0.4802]) -Greedy action tensor([-1.9273, -0.4575, 0.6931, -0.1239]) tensor([0.0397, 0.1728, 0.5462, 0.2413]) -Greedy action tensor([-1.8875, -0.4356, 0.6819, -0.1012]) tensor([0.0412, 0.1758, 0.5374, 0.2456]) -Greedy action tensor([-1.9392, -0.4560, 0.6688, -0.1750]) tensor([0.0403, 0.1776, 0.5469, 0.2352]) -Greedy action tensor([-1.1920, 0.0629, -0.3755, -0.1515]) tensor([0.1042, 0.3653, 0.2357, 0.2948]) -Greedy action tensor([-1.8629, -0.4537, 0.6223, -0.1302]) tensor([0.0440, 0.1799, 0.5276, 0.2486]) -Greedy action tensor([-1.7434, -0.2869, 0.6075, -0.2346]) tensor([0.0492, 0.2113, 0.5168, 0.2226]) -Greedy action tensor([-1.1531, -0.7359, 0.0943, -0.4272]) tensor([0.1240, 0.1882, 0.4316, 0.2562]) -Greedy action tensor([-1.6136, -0.2072, 0.4671, -0.0188]) tensor([0.0555, 0.2265, 0.4446, 0.2734]) -Greedy action tensor([-1.9469, -0.4531, 0.6658, -0.1821]) tensor([0.0401, 0.1787, 0.5470, 0.2343]) -Greedy action tensor([-0.7294, -0.4012, 1.1868, 1.5597]) tensor([0.0525, 0.0729, 0.3567, 0.5179]) -Greedy action tensor([-1.8136, -0.4490, 0.5861, -0.1009]) tensor([0.0466, 0.1822, 0.5131, 0.2581]) -Greedy action tensor([-1.2692, -0.6466, 0.3370, 0.1766]) tensor([0.0827, 0.1541, 0.4121, 0.3510]) -Greedy action tensor([-1.9292, -0.4522, 0.6679, -0.1669]) tensor([0.0406, 0.1778, 0.5450, 0.2365]) -Greedy action tensor([-1.8747, -0.4150, 0.6369, -0.1211]) tensor([0.0427, 0.1839, 0.5266, 0.2468]) -Greedy action tensor([-2.0487, -0.7525, 1.3336, 0.5734]) tensor([0.0209, 0.0764, 0.6151, 0.2876]) -Greedy action tensor([-1.9255, -0.4397, 0.6589, -0.1698]) tensor([0.0409, 0.1806, 0.5419, 0.2366]) -Greedy action tensor([-1.4279, -0.3752, 0.4380, 0.1425]) tensor([0.0661, 0.1893, 0.4269, 0.3177]) -Greedy action tensor([-1.6550, -0.4452, 0.5687, 0.0371]) tensor([0.0526, 0.1762, 0.4857, 0.2855]) -Greedy action tensor([-1.1807, -0.2196, 0.6828, 0.6231]) tensor([0.0620, 0.1621, 0.3995, 0.3764]) -Greedy action tensor([-1.9323, -0.4584, 0.6686, -0.1632]) tensor([0.0405, 0.1767, 0.5454, 0.2374]) -Greedy action tensor([-1.4789, -0.4166, 0.5794, -0.2962]) tensor([0.0667, 0.1930, 0.5226, 0.2177]) -Greedy action tensor([-0.7627, 0.2779, 0.8481, 1.0649]) tensor([0.0664, 0.1880, 0.3325, 0.4130]) -Greedy action tensor([-1.1298, -0.5889, 0.2700, 0.2589]) tensor([0.0928, 0.1593, 0.3761, 0.3719]) -Greedy action tensor([-1.9258, -0.4599, 0.6743, -0.1596]) tensor([0.0406, 0.1757, 0.5464, 0.2373]) -Greedy action tensor([-1.8125, -0.4394, 0.5948, -0.1173]) tensor([0.0465, 0.1836, 0.5165, 0.2534]) -Greedy action tensor([-0.7509, 0.1587, 0.0321, 0.0604]) tensor([0.1262, 0.3135, 0.2762, 0.2841]) -Greedy action tensor([-1.6135, -0.5615, 0.4962, 0.0237]) tensor([0.0580, 0.1660, 0.4780, 0.2980]) -Greedy action tensor([-1.8548, -0.1567, 0.5698, -0.1097]) tensor([0.0426, 0.2326, 0.4810, 0.2438]) -Greedy action tensor([-1.6829, -0.8262, -0.0658, -0.2736]) tensor([0.0801, 0.1886, 0.4035, 0.3278]) -Greedy action tensor([-1.2792, -0.5493, 1.0496, 1.0089]) tensor([0.0431, 0.0894, 0.4425, 0.4249]) -Greedy action tensor([-1.7782, -0.3557, 0.5833, -0.1482]) tensor([0.0479, 0.1988, 0.5085, 0.2447]) -Greedy action tensor([-1.9328, -0.4382, 0.6600, -0.1712]) tensor([0.0406, 0.1809, 0.5424, 0.2362]) -Greedy action tensor([-1.6807, -0.4774, 0.5700, -0.1700]) tensor([0.0545, 0.1815, 0.5173, 0.2468]) -Greedy action tensor([-1.8441, -0.3982, 0.6107, -0.1351]) tensor([0.0446, 0.1894, 0.5195, 0.2464]) -Greedy action tensor([-1.7313, -0.0812, 0.5575, 0.0203]) tensor([0.0458, 0.2385, 0.4517, 0.2640]) -Greedy action tensor([-1.1735, -0.4977, 0.4369, 0.6163]) tensor([0.0716, 0.1408, 0.3586, 0.4290]) -Greedy action tensor([-1.9132, -0.4585, 0.6524, -0.1604]) tensor([0.0416, 0.1780, 0.5406, 0.2398]) -Greedy action tensor([-1.6905, 0.2432, 0.4641, 0.0264]) tensor([0.0452, 0.3128, 0.3901, 0.2518]) -Greedy action tensor([-1.5525, -0.5377, 0.5034, -0.0671]) tensor([0.0625, 0.1725, 0.4887, 0.2762]) -Greedy action tensor([-1.1867, -0.4429, 0.3515, -0.0517]) tensor([0.0920, 0.1935, 0.4283, 0.2862]) -Greedy action tensor([-1.9287, -0.4359, 0.6578, -0.1694]) tensor([0.0407, 0.1813, 0.5413, 0.2367]) -Greedy action tensor([-1.4106, -0.6146, 1.1305, 1.1158]) tensor([0.0352, 0.0780, 0.4467, 0.4401]) -Greedy action tensor([-0.7979, -0.4313, 1.1052, 1.5871]) tensor([0.0500, 0.0721, 0.3352, 0.5427]) -Greedy action tensor([-1.8235, -0.4974, 0.6832, -0.0152]) tensor([0.0432, 0.1628, 0.5302, 0.2637]) -Greedy action tensor([-1.4206, -0.7981, -0.3590, -0.4882]) tensor([0.1206, 0.2247, 0.3485, 0.3063]) -Greedy action tensor([-1.9678, -0.9265, 0.3215, -0.1344]) tensor([0.0501, 0.1420, 0.4945, 0.3134]) -Greedy action tensor([-1.9320, -0.4372, 0.6621, -0.1743]) tensor([0.0406, 0.1809, 0.5432, 0.2353]) -Greedy action tensor([-1.9452, -0.4536, 0.6665, -0.1811]) tensor([0.0402, 0.1785, 0.5470, 0.2344]) -Greedy action tensor([-1.9307, -0.4514, 0.6594, -0.1743]) tensor([0.0408, 0.1791, 0.5439, 0.2363]) -Greedy action tensor([-1.4359, -0.4920, 0.6844, 0.4936]) tensor([0.0532, 0.1368, 0.4435, 0.3665]) -Greedy action tensor([-0.9924, -0.5392, 0.2124, 0.5518]) tensor([0.0944, 0.1485, 0.3149, 0.4422]) -Greedy action tensor([-1.4502, 0.1378, 0.3901, 0.1640]) tensor([0.0581, 0.2843, 0.3658, 0.2918]) -Greedy action tensor([-1.9049, -0.4589, 0.6812, -0.1305]) tensor([0.0409, 0.1739, 0.5437, 0.2415]) -Greedy action tensor([-1.8810, -0.4481, 0.6395, -0.1445]) tensor([0.0429, 0.1798, 0.5336, 0.2436]) -Greedy action tensor([-1.8969, -0.3796, 0.6484, -0.1428]) tensor([0.0415, 0.1893, 0.5293, 0.2399]) -Greedy action tensor([-1.8780, -0.4586, 0.6350, -0.1333]) tensor([0.0431, 0.1782, 0.5320, 0.2467]) -Greedy action tensor([-1.7796, -0.2286, 0.5682, -0.0267]) tensor([0.0456, 0.2149, 0.4767, 0.2629]) -Greedy action tensor([-1.8041, -0.4176, 0.5936, -0.0844]) tensor([0.0463, 0.1854, 0.5096, 0.2587]) -Greedy action tensor([-1.2896, 0.2739, 0.3992, 0.6074]) tensor([0.0560, 0.2675, 0.3032, 0.3734]) -Greedy action tensor([-1.4757, -0.4722, 0.7831, 0.4824]) tensor([0.0491, 0.1338, 0.4695, 0.3476]) -Greedy action tensor([-1.4667, -0.4278, 0.5014, 0.2062]) tensor([0.0613, 0.1733, 0.4388, 0.3266]) -Greedy action tensor([-1.9688, -0.6045, 1.0373, 0.2435]) tensor([0.0292, 0.1142, 0.5899, 0.2667]) -Greedy action tensor([-1.6243, -0.5123, 0.7877, -0.3417]) tensor([0.0532, 0.1617, 0.5933, 0.1918]) -Greedy action tensor([-1.1101, -0.6153, 0.5981, 0.8316]) tensor([0.0661, 0.1084, 0.3648, 0.4607]) -Greedy action tensor([-1.8241, -0.3328, 0.5871, -0.1181]) tensor([0.0453, 0.2011, 0.5045, 0.2492]) -Greedy action tensor([-1.9435, -0.5445, 1.2359, 0.5274]) tensor([0.0244, 0.0990, 0.5873, 0.2892]) -Greedy action tensor([-1.8688, -0.4681, 0.6853, -0.0796]) tensor([0.0418, 0.1698, 0.5380, 0.2504]) -Greedy action tensor([-1.8111, -0.4566, 0.6211, -0.0805]) tensor([0.0457, 0.1769, 0.5197, 0.2577]) -Greedy action tensor([-1.8108, -0.4583, 0.8371, 0.3563]) tensor([0.0361, 0.1395, 0.5094, 0.3150]) -Greedy action tensor([-1.6448, 0.2512, 0.4524, 0.1568]) tensor([0.0457, 0.3046, 0.3725, 0.2772]) -Greedy action tensor([-1.6001, -0.2851, 0.4407, 0.0519]) tensor([0.0567, 0.2112, 0.4363, 0.2958]) -Greedy action tensor([-1.8882, -0.4515, 0.6421, -0.1476]) tensor([0.0426, 0.1793, 0.5352, 0.2429]) -Greedy action tensor([ 0.4623, -0.3882, -0.1183, -0.2194]) tensor([0.4012, 0.1714, 0.2245, 0.2029]) -Greedy action tensor([ 0.7457, -0.5584, -0.1290, -0.3150]) tensor([0.4915, 0.1334, 0.2049, 0.1702]) -Greedy action tensor([ 1.2628, -1.6143, -0.1030, -0.5882]) tensor([0.6809, 0.0383, 0.1738, 0.1070]) -Greedy action tensor([ 0.3608, -0.2479, -0.0492, -0.1506]) tensor([0.3562, 0.1938, 0.2364, 0.2136]) -Greedy action tensor([ 0.9917, -0.6743, -0.0525, -0.5070]) tensor([0.5668, 0.1071, 0.1995, 0.1266]) -Greedy action tensor([ 1.1951, -1.0400, -0.0292, -0.6515]) tensor([0.6416, 0.0686, 0.1886, 0.1012]) -Greedy action tensor([ 0.9492, -0.5544, 0.0717, -0.4411]) tensor([0.5299, 0.1178, 0.2203, 0.1319]) -Greedy action tensor([ 0.9812, -1.3227, -0.0164, -0.6920]) tensor([0.6038, 0.0603, 0.2226, 0.1133]) -Greedy action tensor([ 0.5004, -0.3776, -0.1382, -0.1223]) tensor([0.4032, 0.1676, 0.2129, 0.2163]) -Greedy action tensor([ 0.4960, -0.0264, -0.1459, -0.0197]) tensor([0.3681, 0.2183, 0.1937, 0.2198]) -Greedy action tensor([ 1.1812, -0.6679, 0.0106, -0.6763]) tensor([0.6159, 0.0969, 0.1910, 0.0961]) -Greedy action tensor([ 1.0197, -0.5446, 0.0149, -0.4368]) tensor([0.5530, 0.1157, 0.2025, 0.1289]) -Greedy action tensor([ 0.8888, -0.3460, -0.1107, -0.5729]) tensor([0.5289, 0.1538, 0.1947, 0.1226]) -Greedy action tensor([ 0.6712, -0.6857, -0.0220, -0.5056]) tensor([0.4841, 0.1246, 0.2420, 0.1492]) -Greedy action tensor([ 1.0281, -0.7931, 0.0201, -0.4620]) tensor([0.5707, 0.0924, 0.2083, 0.1286]) -Greedy action tensor([ 1.1196, -0.7978, -0.0547, -0.7455]) tensor([0.6208, 0.0912, 0.1918, 0.0961]) -Greedy action tensor([ 1.2847, -0.6245, -0.0487, -0.3522]) tensor([0.6225, 0.0923, 0.1641, 0.1211]) -Greedy action tensor([ 0.7859, -0.3626, -0.0932, -0.0690]) tensor([0.4635, 0.1470, 0.1924, 0.1971]) -Greedy action tensor([ 1.0014, -1.2341, 0.0018, -0.7148]) tensor([0.6043, 0.0646, 0.2224, 0.1086]) -Greedy action tensor([ 0.8755, -0.8066, -0.0610, -0.7287]) tensor([0.5621, 0.1045, 0.2203, 0.1130]) -Greedy action tensor([ 0.3602, -0.1271, 0.0898, -0.1930]) tensor([0.3387, 0.2081, 0.2585, 0.1948]) -Greedy action tensor([ 0.6315, -0.2379, 0.1283, -0.3851]) tensor([0.4192, 0.1757, 0.2534, 0.1517]) -Greedy action tensor([ 0.8049, -0.6113, -0.0231, -0.2109]) tensor([0.4898, 0.1188, 0.2140, 0.1774]) -Greedy action tensor([ 0.7588, -0.5393, -0.0124, -0.5829]) tensor([0.5008, 0.1367, 0.2316, 0.1309]) -Greedy action tensor([ 0.3698, -0.1068, -0.0676, -0.1423]) tensor([0.3489, 0.2167, 0.2253, 0.2091]) -Greedy action tensor([ 0.8612, -0.7484, 0.0858, -0.7220]) tensor([0.5360, 0.1072, 0.2468, 0.1100]) -Greedy action tensor([ 0.7929, 0.2847, 0.1364, -0.5620]) tensor([0.4205, 0.2529, 0.2181, 0.1085]) -Greedy action tensor([ 0.2371, -0.0029, -0.0736, -0.3650]) tensor([0.3260, 0.2565, 0.2390, 0.1786]) -Greedy action tensor([ 0.5278, -0.1709, 0.0650, -0.2226]) tensor([0.3848, 0.1913, 0.2422, 0.1817]) -Greedy action tensor([ 0.8407, -0.6987, -0.1127, -0.6681]) tensor([0.5491, 0.1178, 0.2117, 0.1215]) -Greedy action tensor([ 0.9040, -0.4666, -0.1312, -0.2618]) tensor([0.5206, 0.1322, 0.1849, 0.1623]) -Greedy action tensor([ 0.3773, -0.1099, 0.0127, -0.1458]) tensor([0.3446, 0.2117, 0.2394, 0.2043]) -Greedy action tensor([ 0.5062, -0.2566, -0.0469, -0.2733]) tensor([0.4000, 0.1865, 0.2300, 0.1834]) -Greedy action tensor([ 0.3541, -0.2096, -0.1337, -0.1183]) tensor([0.3563, 0.2028, 0.2188, 0.2222]) -Greedy action tensor([ 1.0161, -0.8904, -0.0069, -0.5189]) tensor([0.5802, 0.0862, 0.2086, 0.1250]) -Greedy action tensor([ 0.6602, -0.4068, -0.0948, -0.1976]) tensor([0.4468, 0.1537, 0.2100, 0.1895]) -Greedy action tensor([ 0.5789, -0.1512, 0.0330, -0.1106]) tensor([0.3902, 0.1880, 0.2260, 0.1958]) -Greedy action tensor([ 0.4165, -0.2713, -0.0636, -0.3207]) tensor([0.3846, 0.1934, 0.2380, 0.1840]) -Greedy action tensor([ 0.6733, -0.5724, 0.4189, -0.5715]) tensor([0.4253, 0.1224, 0.3298, 0.1225]) -Greedy action tensor([ 0.2367, 0.1198, 0.0420, -0.2591]) tensor([0.3010, 0.2678, 0.2478, 0.1834]) -Greedy action tensor([ 0.8656, -0.3952, -0.1258, -0.2169]) tensor([0.5017, 0.1422, 0.1862, 0.1700]) -Greedy action tensor([ 0.3357, -0.2891, -0.0575, -0.1508]) tensor([0.3540, 0.1895, 0.2389, 0.2176]) -Greedy action tensor([ 0.6600, -0.6985, -0.1859, -0.5963]) tensor([0.5074, 0.1304, 0.2178, 0.1445]) -Greedy action tensor([ 0.5457, -0.1324, -0.2511, -0.3121]) tensor([0.4197, 0.2130, 0.1892, 0.1780]) -Greedy action tensor([ 0.9190, -1.0984, 0.1160, -0.7382]) tensor([0.5644, 0.0751, 0.2529, 0.1076]) -Greedy action tensor([ 0.8758, 0.4186, -0.1759, -0.2839]) tensor([0.4355, 0.2757, 0.1522, 0.1366]) -Greedy action tensor([ 0.8208, -0.5541, -0.0681, -0.5609]) tensor([0.5222, 0.1320, 0.2147, 0.1311]) -Greedy action tensor([ 0.9488, -0.7365, 0.0010, -0.3783]) tensor([0.5440, 0.1008, 0.2108, 0.1443]) -Greedy action tensor([ 0.2737, 0.0423, -0.1344, -0.1575]) tensor([0.3217, 0.2553, 0.2139, 0.2091]) -Greedy action tensor([ 0.1479, 0.6895, -0.2706, 0.1194]) tensor([0.2300, 0.3952, 0.1513, 0.2235]) -Greedy action tensor([ 0.4296, -0.0622, -0.0595, -0.3800]) tensor([0.3746, 0.2291, 0.2297, 0.1667]) -Greedy action tensor([ 0.7163, -0.3268, -0.0097, -0.1910]) tensor([0.4465, 0.1573, 0.2160, 0.1802]) -Greedy action tensor([ 0.7479, -0.2438, -0.0661, -0.1203]) tensor([0.4477, 0.1661, 0.1984, 0.1879]) -Greedy action tensor([ 0.5853, -0.5821, -0.1185, -0.2769]) tensor([0.4488, 0.1397, 0.2220, 0.1895]) -Greedy action tensor([ 0.6088, -0.5190, -0.1208, -0.3750]) tensor([0.4588, 0.1485, 0.2212, 0.1715]) -Greedy action tensor([ 0.4721, -0.4840, -0.2336, -0.0876]) tensor([0.4082, 0.1569, 0.2016, 0.2333]) -Greedy action tensor([ 0.4007, 0.0565, 0.0100, -0.3299]) tensor([0.3488, 0.2472, 0.2360, 0.1680]) -Greedy action tensor([ 0.4472, -0.1742, -0.1045, -0.2920]) tensor([0.3860, 0.2074, 0.2223, 0.1843]) -Greedy action tensor([ 0.8184, -0.6654, -0.0046, -0.3135]) tensor([0.5029, 0.1141, 0.2209, 0.1622]) -Greedy action tensor([ 0.8016, -0.7062, -0.0525, -0.4383]) tensor([0.5164, 0.1143, 0.2198, 0.1495]) -Greedy action tensor([ 0.9556, 0.2741, -0.1046, -0.5029]) tensor([0.4797, 0.2426, 0.1661, 0.1116]) -Greedy action tensor([ 0.6423, -0.4927, -0.1720, -0.4958]) tensor([0.4797, 0.1542, 0.2125, 0.1537]) -Greedy action tensor([ 0.6691, -0.6670, -0.1588, -0.1910]) tensor([0.4710, 0.1238, 0.2058, 0.1993]) -Greedy action tensor([ 0.7478, -0.4724, 0.0700, -0.2059]) tensor([0.4570, 0.1349, 0.2320, 0.1761]) -Greedy action tensor([ 0.9016, -1.0089, 0.1017, -0.6408]) tensor([0.5521, 0.0817, 0.2481, 0.1181]) -Greedy action tensor([ 0.5325, -0.3716, -0.0759, -0.2135]) tensor([0.4126, 0.1671, 0.2246, 0.1957]) -Greedy action tensor([ 0.4831, -0.0363, -0.1092, -0.3720]) tensor([0.3886, 0.2312, 0.2149, 0.1653]) -Greedy action tensor([ 0.8536, -0.5338, -0.0916, -0.3718]) tensor([0.5176, 0.1293, 0.2011, 0.1520]) -Greedy action tensor([ 0.6894, -0.1090, -0.0538, -0.6217]) tensor([0.4556, 0.2050, 0.2166, 0.1228]) -Greedy action tensor([ 1.0027, -0.5424, 0.0197, -0.6139]) tensor([0.5599, 0.1194, 0.2095, 0.1112]) -Greedy action tensor([ 0.8901, -0.7240, 0.1001, -0.5903]) tensor([0.5318, 0.1059, 0.2413, 0.1210]) -Greedy action tensor([ 0.5255, 0.1205, -0.1537, -0.0298]) tensor([0.3639, 0.2427, 0.1845, 0.2088]) -Greedy action tensor([ 0.5750, -0.3720, -0.0670, -0.3079]) tensor([0.4296, 0.1667, 0.2261, 0.1777]) -Greedy action tensor([ 0.5243, -0.2499, 0.1204, -0.4562]) tensor([0.3994, 0.1841, 0.2667, 0.1498]) -Greedy action tensor([ 0.7306, -0.5717, -0.0042, -0.2935]) tensor([0.4738, 0.1288, 0.2272, 0.1702]) -Greedy action tensor([ 0.3897, -0.0903, -0.0485, -0.1214]) tensor([0.3492, 0.2161, 0.2253, 0.2095]) -Greedy action tensor([ 0.8360, -0.9211, -0.0625, -0.3157]) tensor([0.5275, 0.0910, 0.2148, 0.1667]) -Greedy action tensor([ 0.6788, -0.2180, 0.1471, -0.4020]) tensor([0.4283, 0.1747, 0.2517, 0.1453]) -Greedy action tensor([ 0.5349, -0.1068, -0.0861, -0.1657]) tensor([0.3906, 0.2056, 0.2099, 0.1938]) -Greedy action tensor([ 0.5597, -0.3414, 0.0423, -0.6129]) tensor([0.4326, 0.1757, 0.2578, 0.1339]) -Greedy action tensor([ 0.3000, 0.1014, -0.0452, -0.0275]) tensor([0.3078, 0.2524, 0.2180, 0.2218]) -Greedy action tensor([-0.7592, -1.8495, -0.4298, 0.2542]) tensor([0.1824, 0.0613, 0.2536, 0.5026]) -Greedy action tensor([ 0.0055, -0.8241, -0.1441, 0.2164]) tensor([0.2831, 0.1235, 0.2438, 0.3496]) -Greedy action tensor([ 2.0174, -0.8091, 1.5825, 0.8591]) tensor([0.4949, 0.0293, 0.3204, 0.1554]) -Greedy action tensor([-0.1756, 0.5450, -0.5339, 0.7868]) tensor([0.1569, 0.3226, 0.1097, 0.4108]) -Greedy action tensor([-0.0849, -1.9991, -0.3066, -0.2182]) tensor([0.3541, 0.0522, 0.2837, 0.3099]) -Greedy action tensor([ 1.0501, 0.3717, 0.1069, -0.8886]) tensor([0.4900, 0.2487, 0.1908, 0.0705]) -Greedy action tensor([-0.3986, -1.6075, 0.4154, 0.1768]) tensor([0.1875, 0.0560, 0.4232, 0.3334]) -Greedy action tensor([ 0.0725, 0.4581, 0.1474, -1.1133]) tensor([0.2595, 0.3816, 0.2797, 0.0793]) -Greedy action tensor([ 1.1810, -0.6867, 1.1336, 0.9034]) tensor([0.3490, 0.0539, 0.3328, 0.2643]) -Greedy action tensor([-0.1847, -0.0876, 0.2723, -0.8932]) tensor([0.2396, 0.2640, 0.3784, 0.1180]) -Greedy action tensor([ 0.6706, -0.5255, -0.0665, 1.3684]) tensor([0.2638, 0.0798, 0.1262, 0.5301]) -Greedy action tensor([ 0.9001, -1.4317, 1.4841, -0.0070]) tensor([0.3036, 0.0295, 0.5444, 0.1226]) -Greedy action tensor([-0.6137, 0.2504, -0.7166, 0.4441]) tensor([0.1398, 0.3316, 0.1261, 0.4025]) -Greedy action tensor([1.2880, 0.8805, 1.4687, 0.5676]) tensor([0.2985, 0.1986, 0.3576, 0.1452]) -Greedy action tensor([ 0.5700, -0.6349, 0.1409, 1.5464]) tensor([0.2171, 0.0651, 0.1414, 0.5764]) -Greedy action tensor([ 0.3197, -1.2277, 0.4440, 1.8873]) tensor([0.1401, 0.0298, 0.1586, 0.6715]) -Greedy action tensor([-1.4267, -1.1778, 0.5489, -0.2021]) tensor([0.0775, 0.0995, 0.5592, 0.2639]) -Greedy action tensor([ 0.0747, -2.3781, 0.0978, -0.0297]) tensor([0.3322, 0.0286, 0.3399, 0.2993]) -Greedy action tensor([ 0.9334, -1.1204, -1.0922, 0.6099]) tensor([0.5041, 0.0647, 0.0665, 0.3648]) -Greedy action tensor([ 0.0760, -1.5395, -0.0933, -0.3913]) tensor([0.3746, 0.0745, 0.3162, 0.2347]) -Greedy action tensor([-1.2728, 0.0649, -0.4039, -1.2081]) tensor([0.1210, 0.4612, 0.2886, 0.1291]) -Greedy action tensor([-0.0497, -1.6777, -0.6964, 0.5969]) tensor([0.2755, 0.0541, 0.1443, 0.5260]) -Greedy action tensor([-0.1137, -0.8175, -0.7827, 1.0621]) tensor([0.1906, 0.0943, 0.0976, 0.6176]) -Greedy action tensor([-0.2587, -0.5063, -0.3885, 0.8338]) tensor([0.1773, 0.1384, 0.1557, 0.5286]) -Greedy action tensor([0.9102, 0.1928, 0.2248, 0.2736]) tensor([0.3967, 0.1936, 0.1999, 0.2099]) -Greedy action tensor([ 0.2447, 0.0962, 0.1491, -0.7948]) tensor([0.3201, 0.2759, 0.2909, 0.1132]) -Greedy action tensor([ 0.0090, -0.2044, -0.6422, 1.8326]) tensor([0.1173, 0.0948, 0.0612, 0.7267]) -Greedy action tensor([ 0.7836, -0.9131, 0.2153, -0.1193]) tensor([0.4640, 0.0850, 0.2629, 0.1881]) -Greedy action tensor([ 0.3531, -1.4022, 0.3492, -0.0861]) tensor([0.3554, 0.0614, 0.3540, 0.2291]) -Greedy action tensor([-0.4480, -0.8071, 0.8031, -0.7800]) tensor([0.1692, 0.1182, 0.5912, 0.1214]) -Greedy action tensor([ 1.5614, -0.8325, 0.4591, 0.2501]) tensor([0.5907, 0.0539, 0.1962, 0.1592]) -Greedy action tensor([-0.0702, -0.5292, -0.2096, -0.4825]) tensor([0.3161, 0.1997, 0.2749, 0.2093]) -Greedy action tensor([ 0.3934, 0.6466, -0.2128, 0.2554]) tensor([0.2699, 0.3477, 0.1472, 0.2351]) -Greedy action tensor([-0.9753, -1.7599, -0.8848, -0.0117]) tensor([0.1933, 0.0882, 0.2116, 0.5068]) -Greedy action tensor([-0.3854, -2.7539, 0.3698, 0.2531]) tensor([0.1955, 0.0183, 0.4160, 0.3702]) -Greedy action tensor([ 0.8614, 0.0906, -1.1584, 0.5009]) tensor([0.4362, 0.2018, 0.0579, 0.3041]) -Greedy action tensor([1.5113, 0.2957, 0.3898, 2.4240]) tensor([0.2431, 0.0721, 0.0792, 0.6056]) -Greedy action tensor([0.6082, 0.8911, 0.8587, 0.3518]) tensor([0.2280, 0.3026, 0.2929, 0.1764]) -Greedy action tensor([ 0.3126, -1.4022, -0.1901, 0.7449]) tensor([0.3007, 0.0541, 0.1819, 0.4633]) -Greedy action tensor([-0.6207, -0.7042, 0.0018, 0.1432]) tensor([0.1686, 0.1551, 0.3143, 0.3620]) -Greedy action tensor([-0.6140, 0.3989, 0.2636, -0.6191]) tensor([0.1398, 0.3849, 0.3362, 0.1391]) -Greedy action tensor([ 0.4957, -0.7504, 0.2730, 0.5315]) tensor([0.3201, 0.0921, 0.2562, 0.3317]) -Greedy action tensor([-0.5333, -1.7473, 0.7964, 0.1988]) tensor([0.1397, 0.0415, 0.5282, 0.2906]) -Greedy action tensor([-0.9743, -1.8449, -0.1234, -0.2993]) tensor([0.1747, 0.0731, 0.4091, 0.3431]) -Greedy action tensor([-1.0621, -0.6239, -0.0826, 0.3498]) tensor([0.1073, 0.1664, 0.2859, 0.4404]) -Greedy action tensor([0.2434, 0.1681, 0.9015, 0.9692]) tensor([0.1688, 0.1565, 0.3259, 0.3488]) -Greedy action tensor([ 0.5133, 1.1401, 0.2758, -0.0763]) tensor([0.2373, 0.4441, 0.1871, 0.1316]) -Greedy action tensor([-0.7785, -0.6388, 0.4572, -0.4998]) tensor([0.1447, 0.1664, 0.4978, 0.1912]) -Greedy action tensor([-0.2710, -0.9641, 0.5257, 0.6323]) tensor([0.1617, 0.0808, 0.3586, 0.3989]) -Greedy action tensor([ 0.2757, -0.5470, 0.4446, 0.8319]) tensor([0.2290, 0.1006, 0.2711, 0.3993]) -Greedy action tensor([-0.4190, -0.2140, -0.0920, 0.5372]) tensor([0.1609, 0.1975, 0.2231, 0.4185]) -Greedy action tensor([ 0.5715, -0.4654, -0.5096, -0.7985]) tensor([0.5134, 0.1820, 0.1742, 0.1305]) -Greedy action tensor([ 1.0685, -1.0723, 0.3084, 0.3925]) tensor([0.4776, 0.0561, 0.2233, 0.2429]) -Greedy action tensor([ 1.2325, -1.2494, 2.1265, -0.5837]) tensor([0.2709, 0.0226, 0.6624, 0.0441]) -Greedy action tensor([ 0.1215, 0.0880, -0.2467, -0.0829]) tensor([0.2878, 0.2784, 0.1992, 0.2346]) -Greedy action tensor([ 0.3645, -0.5343, 1.3684, -1.0659]) tensor([0.2286, 0.0930, 0.6237, 0.0547]) -Greedy action tensor([ 1.5907, -0.5420, 0.6022, 0.7789]) tensor([0.5169, 0.0613, 0.1923, 0.2295]) -Greedy action tensor([ 2.0739, -0.4441, 0.8412, 0.5304]) tensor([0.6306, 0.0508, 0.1838, 0.1347]) -Greedy action tensor([-1.2490, -2.0503, -0.4064, 0.2404]) tensor([0.1219, 0.0547, 0.2830, 0.5404]) -Greedy action tensor([ 1.2411, 0.7480, 0.9641, -0.8486]) tensor([0.4012, 0.2450, 0.3041, 0.0496]) -Greedy action tensor([-1.2031, -1.6999, -0.2193, 0.7688]) tensor([0.0872, 0.0531, 0.2332, 0.6265]) -Greedy action tensor([ 0.1639, -0.1877, 1.4517, 0.3374]) tensor([0.1534, 0.1079, 0.5561, 0.1825]) -Greedy action tensor([-0.5432, 0.5227, 0.9491, -0.2715]) tensor([0.1035, 0.3005, 0.4602, 0.1358]) -Greedy action tensor([ 0.9898, -0.4785, 1.0871, -0.7027]) tensor([0.3974, 0.0915, 0.4380, 0.0731]) -Greedy action tensor([ 1.7716, -0.4234, 0.6492, 1.9325]) tensor([0.3829, 0.0426, 0.1246, 0.4498]) -Greedy action tensor([1.3520, 0.8261, 0.3439, 0.8459]) tensor([0.3908, 0.2310, 0.1426, 0.2356]) -Greedy action tensor([ 0.9406, -0.0345, 0.8951, 0.3824]) tensor([0.3442, 0.1298, 0.3289, 0.1970]) -Greedy action tensor([-1.6470, -1.9839, 0.7932, -0.6843]) tensor([0.0633, 0.0452, 0.7259, 0.1657]) -Greedy action tensor([-0.3276, -0.6678, 0.6902, 0.7385]) tensor([0.1355, 0.0964, 0.3748, 0.3933]) -Greedy action tensor([-1.6750, -0.9698, -0.0426, 0.0777]) tensor([0.0719, 0.1455, 0.3678, 0.4148]) -Greedy action tensor([ 0.5689, -1.5765, 0.8269, 0.3786]) tensor([0.3088, 0.0361, 0.3997, 0.2553]) -Greedy action tensor([ 0.1448, 0.2855, -0.4078, 2.0105]) tensor([0.1089, 0.1253, 0.0626, 0.7032]) -Greedy action tensor([-0.4831, -1.3996, -0.2216, 2.1412]) tensor([0.0606, 0.0242, 0.0788, 0.8364]) -Greedy action tensor([-1.0670, -1.2942, 0.1393, -0.8605]) tensor([0.1570, 0.1251, 0.5248, 0.1931]) -Greedy action tensor([-0.2320, -1.4245, 0.4865, 0.5973]) tensor([0.1771, 0.0537, 0.3633, 0.4059]) -Greedy action tensor([ 0.5064, 0.2090, -0.5965, 0.8471]) tensor([0.2873, 0.2134, 0.0954, 0.4039]) -Greedy action tensor([ 0.4950, -0.3706, 0.2488, 1.3638]) tensor([0.2180, 0.0917, 0.1704, 0.5198]) -Greedy action tensor([ 0.9982, -0.2009, 2.3171, 0.5191]) tensor([0.1767, 0.0533, 0.6606, 0.1094]) -Greedy action tensor([ 0.1929, -2.0785, 0.5694, 0.7733]) tensor([0.2300, 0.0237, 0.3352, 0.4110]) -Greedy action tensor([-1.3945, -1.8454, 0.3810, -0.3389]) tensor([0.0960, 0.0612, 0.5669, 0.2759]) -Greedy action tensor([ 0.7619, 1.2144, 0.8755, -0.4042]) tensor([0.2497, 0.3926, 0.2798, 0.0778]) -Greedy action tensor([-0.3040, -0.6920, -0.1120, -0.1395]) tensor([0.2458, 0.1667, 0.2978, 0.2897]) -Greedy action tensor([ 1.6833, -0.4941, -0.4278, 0.3302]) tensor([0.6698, 0.0759, 0.0811, 0.1731]) -Greedy action tensor([ 1.5838, -0.5615, -0.3107, -0.1162]) tensor([0.6896, 0.0807, 0.1037, 0.1260]) -Greedy action tensor([ 1.2311, -0.2471, -0.2729, 0.4487]) tensor([0.5242, 0.1195, 0.1165, 0.2397]) -Greedy action tensor([ 1.0724, -0.1588, -0.5115, 0.3628]) tensor([0.5028, 0.1468, 0.1032, 0.2473]) -Greedy action tensor([ 1.6313, -0.7980, -0.0688, 0.0855]) tensor([0.6739, 0.0594, 0.1231, 0.1436]) -Greedy action tensor([ 1.2674, -0.8123, 0.1282, 0.1577]) tensor([0.5635, 0.0704, 0.1804, 0.1858]) -Greedy action tensor([ 0.6366, -0.1089, 0.2151, -0.0363]) tensor([0.3787, 0.1797, 0.2484, 0.1932]) -Greedy action tensor([ 2.0137, -0.0825, -0.0395, 0.0856]) tensor([0.7160, 0.0880, 0.0919, 0.1041]) -Greedy action tensor([ 2.1334, -0.9940, -0.3316, 0.4507]) tensor([0.7606, 0.0333, 0.0647, 0.1414]) -Greedy action tensor([ 1.3150, -0.3909, -0.9016, 0.0714]) tensor([0.6333, 0.1150, 0.0690, 0.1826]) -Greedy action tensor([ 2.0200, -1.0751, -0.4530, 0.9107]) tensor([0.6852, 0.0310, 0.0578, 0.2260]) -Greedy action tensor([ 1.2927, -0.1683, -0.4870, 0.3618]) tensor([0.5571, 0.1293, 0.0940, 0.2196]) -Greedy action tensor([ 0.5972, -0.0648, -0.6318, 0.1930]) tensor([0.4039, 0.2083, 0.1182, 0.2696]) -Greedy action tensor([ 1.0853, -0.3960, -0.1538, 0.2866]) tensor([0.5084, 0.1156, 0.1473, 0.2287]) -Greedy action tensor([ 1.1611, -0.8364, -0.2972, 0.6180]) tensor([0.5130, 0.0696, 0.1193, 0.2980]) -Greedy action tensor([ 1.4504, 0.1908, -0.0428, 0.2441]) tensor([0.5532, 0.1570, 0.1243, 0.1656]) -Greedy action tensor([ 1.3104, -0.1200, -0.1330, 0.1281]) tensor([0.5612, 0.1342, 0.1325, 0.1720]) -Greedy action tensor([ 1.2926, -0.4549, -0.5314, 0.5320]) tensor([0.5546, 0.0966, 0.0895, 0.2592]) -Greedy action tensor([ 2.0594, -0.6210, -0.3706, 0.2628]) tensor([0.7562, 0.0518, 0.0666, 0.1254]) -Greedy action tensor([ 1.5175, -0.7887, -0.3785, 0.1271]) tensor([0.6672, 0.0665, 0.1002, 0.1661]) -Greedy action tensor([1.4181, 0.1548, 0.3833, 0.0883]) tensor([0.5256, 0.1486, 0.1868, 0.1390]) -Greedy action tensor([ 1.4460, -0.4435, -0.3861, 0.3489]) tensor([0.6079, 0.0919, 0.0973, 0.2029]) -Greedy action tensor([ 1.4007, 0.3363, -0.3669, 0.1361]) tensor([0.5562, 0.1918, 0.0950, 0.1570]) -Greedy action tensor([ 1.9574, -1.1062, -0.1477, 0.2632]) tensor([0.7395, 0.0345, 0.0901, 0.1359]) -Greedy action tensor([ 2.2769, -0.2757, -0.1048, 0.0622]) tensor([0.7816, 0.0609, 0.0722, 0.0853]) -Greedy action tensor([ 2.4808, -0.9193, -0.4434, 0.5266]) tensor([0.8138, 0.0272, 0.0437, 0.1153]) -Greedy action tensor([ 1.8816, -0.4265, -0.5504, 0.5715]) tensor([0.6863, 0.0683, 0.0603, 0.1852]) -Greedy action tensor([ 1.3123, 0.0816, -0.3954, 0.0943]) tensor([0.5652, 0.1651, 0.1025, 0.1672]) -Greedy action tensor([ 3.0296, -1.6005, -0.3258, 1.2539]) tensor([0.8237, 0.0080, 0.0287, 0.1395]) -Greedy action tensor([ 1.7582, -0.7862, -0.1764, 0.4937]) tensor([0.6643, 0.0522, 0.0960, 0.1876]) -Greedy action tensor([ 0.5120, -0.3078, -0.8645, 0.7833]) tensor([0.3328, 0.1466, 0.0840, 0.4366]) -Greedy action tensor([ 1.4292, -0.5386, -0.3769, 0.2344]) tensor([0.6224, 0.0870, 0.1022, 0.1884]) -Greedy action tensor([ 1.2180, 0.0034, -0.2222, -0.0292]) tensor([0.5491, 0.1630, 0.1301, 0.1578]) -Greedy action tensor([ 1.1281, -0.3410, -0.2395, 0.2716]) tensor([0.5237, 0.1205, 0.1334, 0.2224]) -Greedy action tensor([ 1.3681, -0.6685, -0.1961, 0.4624]) tensor([0.5734, 0.0748, 0.1200, 0.2318]) -Greedy action tensor([ 1.3155, -0.5023, -0.4966, 0.3921]) tensor([0.5804, 0.0943, 0.0948, 0.2305]) -Greedy action tensor([ 0.9761, 0.0400, -0.6499, 0.2249]) tensor([0.4853, 0.1903, 0.0955, 0.2290]) -Greedy action tensor([ 1.5034, -1.0548, 0.1184, -0.1469]) tensor([0.6580, 0.0510, 0.1647, 0.1263]) -Greedy action tensor([ 1.1270, -0.3210, -0.4589, 0.5131]) tensor([0.5048, 0.1186, 0.1034, 0.2732]) -Greedy action tensor([ 1.2180, -0.5361, -0.3238, 0.3322]) tensor([0.5557, 0.0962, 0.1189, 0.2292]) -Greedy action tensor([ 1.3167, -0.6499, -0.0028, 0.4927]) tensor([0.5417, 0.0758, 0.1448, 0.2377]) -Greedy action tensor([ 0.8088, -0.4890, -0.6315, 0.5948]) tensor([0.4315, 0.1179, 0.1022, 0.3484]) -Greedy action tensor([ 0.8469, -0.5778, -0.3823, 0.0697]) tensor([0.5018, 0.1207, 0.1468, 0.2307]) -Greedy action tensor([ 0.8618, -0.4262, -0.6984, 0.2709]) tensor([0.4903, 0.1352, 0.1030, 0.2715]) -Greedy action tensor([ 0.3706, -0.2199, 0.2278, 0.0371]) tensor([0.3187, 0.1766, 0.2763, 0.2283]) -Greedy action tensor([ 0.7314, -0.4756, -0.2182, 0.0673]) tensor([0.4544, 0.1359, 0.1758, 0.2339]) -Greedy action tensor([ 2.0132, -1.0307, -0.2348, 0.5927]) tensor([0.7169, 0.0342, 0.0757, 0.1732]) -Greedy action tensor([ 1.6356, -0.1178, -0.6768, 0.6799]) tensor([0.6036, 0.1045, 0.0598, 0.2321]) -Greedy action tensor([ 1.3116, -0.3025, -0.6271, 0.4360]) tensor([0.5683, 0.1131, 0.0818, 0.2368]) -Greedy action tensor([ 1.2589, -0.5960, -0.2071, 0.3824]) tensor([0.5545, 0.0868, 0.1280, 0.2308]) -Greedy action tensor([ 1.2788, -0.0855, -0.7371, 0.0184]) tensor([0.5980, 0.1528, 0.0797, 0.1696]) -Greedy action tensor([ 1.4601, -0.1977, -0.3828, 0.5665]) tensor([0.5688, 0.1084, 0.0901, 0.2327]) -Greedy action tensor([ 1.8625, -0.0755, -0.2769, 0.4307]) tensor([0.6664, 0.0960, 0.0785, 0.1592]) -Greedy action tensor([ 1.1230, -0.0369, -0.5407, 0.1532]) tensor([0.5313, 0.1666, 0.1007, 0.2015]) -Greedy action tensor([ 1.4528, -0.6677, -0.2800, 0.1418]) tensor([0.6384, 0.0766, 0.1129, 0.1721]) -Greedy action tensor([ 1.1074, -0.5223, -0.1482, 0.1829]) tensor([0.5326, 0.1044, 0.1517, 0.2113]) -Greedy action tensor([ 1.2854, -0.4127, -0.1947, 0.1432]) tensor([0.5781, 0.1058, 0.1316, 0.1845]) -Greedy action tensor([ 0.7005, 0.0193, -0.1945, 0.2975]) tensor([0.3872, 0.1959, 0.1582, 0.2587]) -Greedy action tensor([ 1.5497, 0.1661, -0.2664, 0.5926]) tensor([0.5564, 0.1395, 0.0905, 0.2136]) -Greedy action tensor([ 1.1619, -0.0032, 0.1273, 0.0399]) tensor([0.5018, 0.1565, 0.1783, 0.1634]) -Greedy action tensor([ 1.6328, -0.7430, -0.2392, 0.6068]) tensor([0.6230, 0.0579, 0.0958, 0.2233]) -Greedy action tensor([ 1.0820, 0.2068, -0.2998, -0.1355]) tensor([0.5092, 0.2122, 0.1279, 0.1507]) -Greedy action tensor([ 1.5322, -0.2450, -0.4018, 0.3862]) tensor([0.6129, 0.1036, 0.0886, 0.1948]) -Greedy action tensor([ 1.0431, 0.1162, -0.1894, 0.3362]) tensor([0.4586, 0.1815, 0.1337, 0.2262]) -Greedy action tensor([ 1.9077, -0.5501, -0.2580, 0.7543]) tensor([0.6597, 0.0565, 0.0757, 0.2082]) -Greedy action tensor([ 1.3855, 0.2906, -1.0164, 0.3140]) tensor([0.5657, 0.1893, 0.0512, 0.1938]) -Greedy action tensor([ 2.0601, -1.1774, -0.3817, 0.8384]) tensor([0.7037, 0.0276, 0.0612, 0.2074]) -Greedy action tensor([ 1.4736, -0.3285, -0.3481, 0.4103]) tensor([0.5981, 0.0987, 0.0967, 0.2065]) -Greedy action tensor([ 2.0326, -0.4901, 0.0190, 0.5692]) tensor([0.6919, 0.0555, 0.0924, 0.1602]) -Greedy action tensor([ 1.0333, -0.6595, -0.4333, 0.1345]) tensor([0.5489, 0.1010, 0.1266, 0.2234]) -Greedy action tensor([ 1.0725, -0.6492, 0.1391, 0.2453]) tensor([0.4977, 0.0890, 0.1957, 0.2176]) -Greedy action tensor([ 2.5047, -0.7598, -0.5108, 0.6588]) tensor([0.8031, 0.0307, 0.0394, 0.1268]) -Greedy action tensor([ 1.8618, 0.0086, -0.5399, 0.4239]) tensor([0.6735, 0.1056, 0.0610, 0.1599]) -Greedy action tensor([ 1.8470, -0.6989, 0.0714, 0.5696]) tensor([0.6551, 0.0514, 0.1110, 0.1826]) -Greedy action tensor([ 1.6164, 0.0137, -0.3291, 0.8913]) tensor([0.5469, 0.1101, 0.0782, 0.2649]) -Greedy action tensor([ 1.6600, -0.1810, -0.1425, 0.5224]) tensor([0.6082, 0.0965, 0.1003, 0.1950]) -Greedy action tensor([ 0.8782, -0.2741, -0.3551, 0.2019]) tensor([0.4727, 0.1493, 0.1377, 0.2403]) -Greedy action tensor([ 1.2593, -0.5031, -0.5848, 0.5117]) tensor([0.5545, 0.0952, 0.0877, 0.2626]) -Greedy action tensor([ 2.3623, -0.5355, -0.4208, 0.4700]) tensor([0.7888, 0.0435, 0.0488, 0.1189]) -Greedy action tensor([ 0.2028, -0.4143, -0.2808, 0.3002]) tensor([0.3069, 0.1656, 0.1892, 0.3383]) -Greedy action tensor([ 1.7345, -0.6051, -0.4211, 0.5215]) tensor([0.6625, 0.0638, 0.0767, 0.1970]) -Greedy action tensor([-1.8928, -0.5193, -0.1424, -0.5434]) tensor([0.0687, 0.2712, 0.3954, 0.2648]) -Greedy action tensor([-1.8396, -0.3181, 0.5953, -0.1368]) tensor([0.0445, 0.2037, 0.5077, 0.2442]) -Greedy action tensor([-1.8919, -0.3960, 0.6398, -0.1414]) tensor([0.0420, 0.1876, 0.5284, 0.2420]) -Greedy action tensor([-1.9180, -0.4687, 0.6598, -0.1572]) tensor([0.0412, 0.1757, 0.5431, 0.2399]) -Greedy action tensor([-1.9322, -0.4531, 0.6614, -0.1723]) tensor([0.0407, 0.1786, 0.5443, 0.2365]) -Greedy action tensor([-1.6456, -0.5211, 0.5473, -0.1391]) tensor([0.0570, 0.1754, 0.5106, 0.2570]) -Greedy action tensor([-1.6341, -0.4969, 0.5022, -0.1229]) tensor([0.0584, 0.1821, 0.4947, 0.2648]) -Greedy action tensor([ 0.0572, -0.5200, 0.1570, 0.7410]) tensor([0.2152, 0.1208, 0.2377, 0.4263]) -Greedy action tensor([-1.7972, -0.0588, 0.5383, -0.1184]) tensor([0.0447, 0.2541, 0.4617, 0.2394]) -Greedy action tensor([-1.8218, -0.4087, 0.5975, -0.1147]) tensor([0.0457, 0.1880, 0.5141, 0.2522]) -Greedy action tensor([-1.7019, -0.4462, 0.5408, -0.0585]) tensor([0.0524, 0.1838, 0.4931, 0.2708]) -Greedy action tensor([-0.4005, -0.2237, 0.1851, 0.2251]) tensor([0.1707, 0.2037, 0.3066, 0.3191]) -Greedy action tensor([-1.1653, -0.4881, 0.4036, 0.4298]) tensor([0.0787, 0.1550, 0.3781, 0.3881]) -Greedy action tensor([-1.3322, -0.2706, 0.6476, -0.6405]) tensor([0.0762, 0.2202, 0.5515, 0.1521]) -Greedy action tensor([-1.6696, -0.5069, 0.5304, 0.0355]) tensor([0.0534, 0.1708, 0.4820, 0.2938]) -Greedy action tensor([-1.8366, -0.4698, 0.6115, -0.1099]) tensor([0.0452, 0.1774, 0.5231, 0.2543]) -Greedy action tensor([-1.7197, -1.0321, 1.1411, 0.0682]) tensor([0.0378, 0.0752, 0.6609, 0.2260]) -Greedy action tensor([-1.9059, -0.4686, 0.6705, -0.1324]) tensor([0.0412, 0.1736, 0.5422, 0.2429]) -Greedy action tensor([-0.7763, -0.5828, 0.2053, 0.0727]) tensor([0.1385, 0.1681, 0.3697, 0.3238]) -Greedy action tensor([-1.9227, -0.4362, 0.6528, -0.1727]) tensor([0.0411, 0.1819, 0.5403, 0.2367]) -Greedy action tensor([-1.8123, -0.3979, 0.6069, -0.0962]) tensor([0.0456, 0.1877, 0.5128, 0.2538]) -Greedy action tensor([-1.8734, -0.3174, 0.6185, -0.1140]) tensor([0.0423, 0.2006, 0.5113, 0.2458]) -Greedy action tensor([-1.4436, -0.3661, 0.4178, 0.1748]) tensor([0.0649, 0.1906, 0.4173, 0.3273]) -Greedy action tensor([-0.2879, -0.3014, 0.1786, 0.2078]) tensor([0.1915, 0.1889, 0.3053, 0.3143]) -Greedy action tensor([-1.8195, -0.5113, 0.6332, -0.0993]) tensor([0.0457, 0.1689, 0.5305, 0.2550]) -Greedy action tensor([-1.4023, -0.5596, 0.3895, 0.1824]) tensor([0.0704, 0.1636, 0.4225, 0.3435]) -Greedy action tensor([-1.8431, -0.4325, 0.6116, -0.1256]) tensor([0.0448, 0.1837, 0.5218, 0.2497]) -Greedy action tensor([-1.8034, -0.3916, 0.5904, -0.1087]) tensor([0.0465, 0.1908, 0.5094, 0.2532]) -Greedy action tensor([-0.4728, -0.4603, 0.3257, 0.4414]) tensor([0.1486, 0.1505, 0.3302, 0.3707]) -Greedy action tensor([-1.7621, -0.4439, 0.5809, -0.0480]) tensor([0.0483, 0.1805, 0.5030, 0.2682]) -Greedy action tensor([-1.8868, -0.3909, 0.6351, -0.1435]) tensor([0.0423, 0.1889, 0.5269, 0.2419]) -Greedy action tensor([-0.9066, -0.3312, 0.2702, -0.3192]) tensor([0.1279, 0.2273, 0.4148, 0.2301]) -Greedy action tensor([-1.7044, -0.4620, 0.6443, 0.1191]) tensor([0.0473, 0.1639, 0.4956, 0.2931]) -Greedy action tensor([-1.3928, -0.4708, 0.7328, 0.8713]) tensor([0.0465, 0.1169, 0.3894, 0.4472]) -Greedy action tensor([-1.0677, -0.6114, 0.2158, 0.3165]) tensor([0.0982, 0.1550, 0.3546, 0.3921]) -Greedy action tensor([-1.3195, -0.6083, 0.3494, -0.0044]) tensor([0.0829, 0.1688, 0.4397, 0.3087]) -Greedy action tensor([-0.4483, -0.4569, 0.1913, 0.2499]) tensor([0.1696, 0.1681, 0.3215, 0.3409]) -Greedy action tensor([-1.9466, -0.4485, 0.6674, -0.1818]) tensor([0.0401, 0.1792, 0.5469, 0.2339]) -Greedy action tensor([-1.1090, -0.0372, 0.5746, -0.5904]) tensor([0.0910, 0.2659, 0.4902, 0.1529]) -Greedy action tensor([-0.9419, -0.5805, 0.2447, 0.0876]) tensor([0.1175, 0.1687, 0.3849, 0.3289]) -Greedy action tensor([-1.8982, -0.4134, 0.6300, -0.1436]) tensor([0.0421, 0.1860, 0.5281, 0.2437]) -Greedy action tensor([-1.9464, -0.4550, 0.6684, -0.1810]) tensor([0.0401, 0.1781, 0.5476, 0.2342]) -Greedy action tensor([-1.7030, 0.2874, 0.4947, 0.1269]) tensor([0.0425, 0.3107, 0.3822, 0.2646]) -Greedy action tensor([-1.0878, -0.6480, 0.8906, 1.1871]) tensor([0.0513, 0.0796, 0.3706, 0.4985]) -Greedy action tensor([-1.8216, -0.4937, 0.6370, -0.0414]) tensor([0.0447, 0.1685, 0.5220, 0.2649]) -Greedy action tensor([-1.6348, -0.4967, 0.5018, -0.0041]) tensor([0.0565, 0.1763, 0.4786, 0.2886]) -Greedy action tensor([-1.8981, -0.6380, 1.1329, 0.2138]) tensor([0.0298, 0.1052, 0.6183, 0.2466]) -Greedy action tensor([-1.9179, -0.4232, 0.6522, -0.1592]) tensor([0.0411, 0.1832, 0.5371, 0.2386]) -Greedy action tensor([-1.8975, -0.4266, 0.6405, -0.1578]) tensor([0.0422, 0.1836, 0.5339, 0.2403]) -Greedy action tensor([-1.4319, -0.2750, 0.7477, -0.6976]) tensor([0.0662, 0.2105, 0.5853, 0.1380]) -Greedy action tensor([-1.6186, -0.0660, 0.6286, 0.1718]) tensor([0.0472, 0.2231, 0.4468, 0.2829]) -Greedy action tensor([-1.6509, -0.0266, 0.5401, 0.0733]) tensor([0.0485, 0.2460, 0.4336, 0.2719]) -Greedy action tensor([-1.8519, -0.4443, 0.6090, -0.1304]) tensor([0.0447, 0.1825, 0.5232, 0.2497]) -Greedy action tensor([-1.7063, -0.1636, 0.5855, 0.0633]) tensor([0.0466, 0.2182, 0.4614, 0.2737]) -Greedy action tensor([-1.9191, -0.4632, 0.6971, -0.1267]) tensor([0.0400, 0.1717, 0.5479, 0.2404]) -Greedy action tensor([-1.3967, -0.1498, 0.4954, 0.6093]) tensor([0.0539, 0.1876, 0.3577, 0.4008]) -Greedy action tensor([-1.8856, -0.4661, 0.6805, -0.1034]) tensor([0.0415, 0.1716, 0.5402, 0.2467]) -Greedy action tensor([-1.7602, -0.4293, 0.5637, -0.0623]) tensor([0.0489, 0.1849, 0.4992, 0.2670]) -Greedy action tensor([-1.8858, -0.3720, 0.6250, -0.1344]) tensor([0.0423, 0.1924, 0.5213, 0.2440]) -Greedy action tensor([-1.8355, -0.4991, 0.6173, -0.1156]) tensor([0.0454, 0.1729, 0.5280, 0.2537]) -Greedy action tensor([-1.9326, -0.4444, 0.6646, -0.1762]) tensor([0.0406, 0.1797, 0.5447, 0.2350]) -Greedy action tensor([-0.4355, -0.0908, 0.9859, 1.5370]) tensor([0.0728, 0.1027, 0.3015, 0.5231]) -Greedy action tensor([-1.6741, -0.4390, 0.5943, 0.2234]) tensor([0.0481, 0.1655, 0.4652, 0.3211]) -Greedy action tensor([-1.6648, -0.5360, 0.5487, -0.0093]) tensor([0.0541, 0.1674, 0.4951, 0.2834]) -Greedy action tensor([-1.8464, -0.3834, 0.6163, -0.1121]) tensor([0.0440, 0.1901, 0.5166, 0.2493]) -Greedy action tensor([-1.6383, -0.4991, 0.5071, 0.0027]) tensor([0.0561, 0.1752, 0.4793, 0.2894]) -Greedy action tensor([-1.7801, -0.0313, 0.5213, -0.1247]) tensor([0.0455, 0.2616, 0.4546, 0.2383]) -Greedy action tensor([-1.8976, -0.4575, 0.6512, -0.1513]) tensor([0.0421, 0.1778, 0.5387, 0.2414]) -Greedy action tensor([-1.8465, -0.4604, 0.6209, -0.1283]) tensor([0.0447, 0.1788, 0.5272, 0.2492]) -Greedy action tensor([-1.8883, -0.4333, 0.6349, -0.1524]) tensor([0.0427, 0.1829, 0.5322, 0.2422]) -Greedy action tensor([-1.0653, -0.5866, 0.2426, 0.3559]) tensor([0.0957, 0.1544, 0.3538, 0.3962]) -Greedy action tensor([-1.9470, -0.4540, 0.6665, -0.1827]) tensor([0.0401, 0.1785, 0.5473, 0.2341]) -Greedy action tensor([-0.7821, -0.5761, 0.5283, -0.3919]) tensor([0.1349, 0.1657, 0.5001, 0.1993]) -Greedy action tensor([-1.8332, -0.4765, 0.6002, -0.0995]) tensor([0.0456, 0.1770, 0.5194, 0.2580]) -Greedy action tensor([-1.5165, 0.1443, 0.4073, 0.1373]) tensor([0.0545, 0.2870, 0.3734, 0.2850]) -Greedy action tensor([-1.0078, -0.5202, 1.3354, 1.4313]) tensor([0.0408, 0.0665, 0.4250, 0.4678]) -Greedy action tensor([-1.5358, -0.5168, 1.1040, 0.9981]) tensor([0.0329, 0.0912, 0.4611, 0.4148]) -Greedy action tensor([-1.6552, -0.5135, 0.4969, 0.0414]) tensor([0.0550, 0.1722, 0.4729, 0.2999]) -Greedy action tensor([-1.1676, -0.3946, 0.3331, -0.0405]) tensor([0.0931, 0.2017, 0.4177, 0.2874]) -Greedy action tensor([-0.7959, -0.5611, 0.2450, 0.3142]) tensor([0.1230, 0.1555, 0.3483, 0.3732]) -Greedy action tensor([-1.9106, -0.4081, 0.6519, -0.1532]) tensor([0.0412, 0.1852, 0.5346, 0.2390]) -Greedy action tensor([ 0.2964, 0.1763, -0.1580, -0.0156]) tensor([0.3073, 0.2726, 0.1951, 0.2250]) -Greedy action tensor([ 0.4484, -0.2744, -0.0298, -0.2927]) tensor([0.3873, 0.1880, 0.2401, 0.1846]) -Greedy action tensor([ 0.5654, 0.2193, -0.1264, -0.0668]) tensor([0.3650, 0.2582, 0.1828, 0.1940]) -Greedy action tensor([ 0.2991, -0.1153, 0.1182, -0.2659]) tensor([0.3264, 0.2157, 0.2724, 0.1855]) -Greedy action tensor([ 0.9146, -0.3912, -0.0868, -0.5552]) tensor([0.5352, 0.1450, 0.1966, 0.1231]) -Greedy action tensor([ 0.2457, -0.2448, -0.1678, -0.4858]) tensor([0.3630, 0.2223, 0.2401, 0.1747]) -Greedy action tensor([ 0.7186, -0.1100, -0.0973, -0.2585]) tensor([0.4434, 0.1936, 0.1961, 0.1669]) -Greedy action tensor([ 0.8388, -0.5101, -0.0097, -0.4527]) tensor([0.5096, 0.1322, 0.2181, 0.1401]) -Greedy action tensor([ 0.8292, -0.5181, -0.1178, -0.3703]) tensor([0.5130, 0.1334, 0.1990, 0.1546]) -Greedy action tensor([ 0.8058, -0.2895, 0.0177, -0.2962]) tensor([0.4714, 0.1577, 0.2143, 0.1566]) -Greedy action tensor([ 0.9712, -0.9173, 0.0596, -0.5042]) tensor([0.5612, 0.0849, 0.2255, 0.1283]) -Greedy action tensor([ 0.6079, -0.0922, -0.2851, -0.1384]) tensor([0.4202, 0.2086, 0.1720, 0.1992]) -Greedy action tensor([ 0.8770, -0.8134, -0.0306, -0.1140]) tensor([0.5104, 0.0942, 0.2060, 0.1895]) -Greedy action tensor([ 0.4646, -0.2126, -0.0213, -0.2780]) tensor([0.3847, 0.1955, 0.2367, 0.1831]) -Greedy action tensor([ 0.6743, -0.3984, -0.1956, -0.3597]) tensor([0.4724, 0.1616, 0.1980, 0.1680]) -Greedy action tensor([ 0.9236, -0.6408, -0.1420, -0.3823]) tensor([0.5480, 0.1147, 0.1888, 0.1485]) -Greedy action tensor([ 0.5883, -0.3861, -0.0572, -0.2593]) tensor([0.4291, 0.1620, 0.2250, 0.1839]) -Greedy action tensor([ 0.6782, -0.2200, -0.2768, -0.2984]) tensor([0.4611, 0.1878, 0.1774, 0.1736]) -Greedy action tensor([ 0.7870, -0.2480, 0.0421, -0.3541]) tensor([0.4652, 0.1653, 0.2209, 0.1486]) -Greedy action tensor([ 1.2667, -1.0571, 0.0864, -0.7640]) tensor([0.6509, 0.0637, 0.2000, 0.0854]) -Greedy action tensor([ 0.0730, 0.0058, -0.0444, -0.5038]) tensor([0.2953, 0.2761, 0.2626, 0.1659]) -Greedy action tensor([ 0.5548, 0.0028, -0.1602, -0.2155]) tensor([0.3956, 0.2278, 0.1935, 0.1831]) -Greedy action tensor([ 0.8224, -0.2399, -0.1777, -0.5124]) tensor([0.5059, 0.1749, 0.1861, 0.1332]) -Greedy action tensor([ 0.7190, -0.3566, -0.0463, -0.2407]) tensor([0.4568, 0.1558, 0.2125, 0.1749]) -Greedy action tensor([ 1.0198, -0.4137, -0.0063, -0.1751]) tensor([0.5264, 0.1255, 0.1887, 0.1594]) -Greedy action tensor([ 0.7663, -0.5419, 0.0058, -0.3968]) tensor([0.4877, 0.1318, 0.2280, 0.1524]) -Greedy action tensor([ 0.5188, -0.0176, -0.0392, 0.0059]) tensor([0.3628, 0.2122, 0.2077, 0.2173]) -Greedy action tensor([ 0.5334, -0.3543, 0.1721, -0.2697]) tensor([0.3912, 0.1610, 0.2726, 0.1752]) -Greedy action tensor([ 0.6662, -0.4819, -0.0013, -0.2529]) tensor([0.4486, 0.1423, 0.2301, 0.1789]) -Greedy action tensor([ 1.0046, -0.4463, 0.0825, -0.4523]) tensor([0.5362, 0.1257, 0.2132, 0.1249]) -Greedy action tensor([ 0.5631, -0.3033, -0.0776, -0.2434]) tensor([0.4177, 0.1756, 0.2201, 0.1865]) -Greedy action tensor([ 0.2910, 0.0120, 0.1454, -0.2108]) tensor([0.3099, 0.2345, 0.2679, 0.1877]) -Greedy action tensor([ 0.3162, -0.0530, 0.2488, -0.4819]) tensor([0.3251, 0.2247, 0.3039, 0.1463]) -Greedy action tensor([ 1.0841, -0.7060, -0.0276, -0.5892]) tensor([0.5940, 0.0992, 0.1954, 0.1114]) -Greedy action tensor([ 0.8843, -0.6665, -0.1116, -0.2256]) tensor([0.5233, 0.1110, 0.1933, 0.1725]) -Greedy action tensor([ 0.4816, -0.0984, -0.0324, -0.2729]) tensor([0.3805, 0.2130, 0.2276, 0.1789]) -Greedy action tensor([ 0.4476, 0.0477, -0.0664, -0.4340]) tensor([0.3728, 0.2499, 0.2229, 0.1544]) -Greedy action tensor([ 0.2063, 0.2105, -0.1436, -0.1857]) tensor([0.2955, 0.2967, 0.2082, 0.1996]) -Greedy action tensor([ 0.3576, -0.1010, 0.0198, -0.1466]) tensor([0.3390, 0.2143, 0.2419, 0.2048]) -Greedy action tensor([ 0.2033, -0.4231, -0.1242, -0.2034]) tensor([0.3423, 0.1830, 0.2467, 0.2279]) -Greedy action tensor([ 0.4811, -0.0515, -0.0522, -0.3566]) tensor([0.3837, 0.2252, 0.2251, 0.1660]) -Greedy action tensor([ 1.0452, -0.7312, 0.0303, -0.5647]) tensor([0.5775, 0.0977, 0.2093, 0.1155]) -Greedy action tensor([ 0.3827, -0.0875, 0.0056, -0.1867]) tensor([0.3476, 0.2172, 0.2384, 0.1967]) -Greedy action tensor([ 1.0344, 0.6310, -0.1894, -0.3183]) tensor([0.4503, 0.3008, 0.1324, 0.1164]) -Greedy action tensor([ 0.6761, -0.0275, 0.0043, -0.3679]) tensor([0.4241, 0.2099, 0.2167, 0.1493]) -Greedy action tensor([ 0.4893, 0.0020, -0.2586, -0.2670]) tensor([0.3911, 0.2402, 0.1851, 0.1836]) -Greedy action tensor([ 0.6406, -0.4845, 0.1428, -0.4098]) tensor([0.4382, 0.1422, 0.2663, 0.1533]) -Greedy action tensor([ 0.7470, -0.5172, -0.0572, -0.4806]) tensor([0.4943, 0.1396, 0.2212, 0.1448]) -Greedy action tensor([ 0.6787, 0.4650, -0.2291, -0.2837]) tensor([0.3857, 0.3114, 0.1556, 0.1473]) -Greedy action tensor([ 0.6909, 0.1015, -0.0927, -0.1231]) tensor([0.4074, 0.2260, 0.1861, 0.1805]) -Greedy action tensor([ 0.7600, -0.4703, -0.1170, -0.3611]) tensor([0.4916, 0.1436, 0.2045, 0.1602]) -Greedy action tensor([ 0.5536, -0.0648, -0.1610, -0.1626]) tensor([0.3973, 0.2141, 0.1945, 0.1941]) -Greedy action tensor([ 0.4773, -0.1845, -0.0674, -0.0762]) tensor([0.3744, 0.1932, 0.2172, 0.2152]) -Greedy action tensor([ 0.8467, -0.6237, -0.1555, -0.4174]) tensor([0.5321, 0.1223, 0.1953, 0.1503]) -Greedy action tensor([ 0.6730, -0.1760, -0.1616, -0.4199]) tensor([0.4551, 0.1947, 0.1975, 0.1526]) -Greedy action tensor([ 0.7499, -0.3043, -0.0721, -0.3452]) tensor([0.4711, 0.1642, 0.2071, 0.1576]) -Greedy action tensor([ 0.6663, -0.5820, -0.0358, -0.3785]) tensor([0.4685, 0.1345, 0.2322, 0.1648]) -Greedy action tensor([ 0.7232, -0.4970, 0.0504, -0.7076]) tensor([0.4891, 0.1444, 0.2496, 0.1170]) -Greedy action tensor([ 1.4786, -0.4199, 0.1714, -0.2925]) tensor([0.6287, 0.0942, 0.1701, 0.1070]) -Greedy action tensor([ 0.7513, -0.4552, 0.1069, -0.3527]) tensor([0.4639, 0.1388, 0.2435, 0.1538]) -Greedy action tensor([ 0.9447, -0.6018, -0.0563, -0.4136]) tensor([0.5442, 0.1159, 0.2000, 0.1399]) -Greedy action tensor([ 0.8892, -0.5771, 0.1679, -0.3257]) tensor([0.4966, 0.1146, 0.2414, 0.1474]) -Greedy action tensor([ 0.5461, -0.0806, -0.0392, -0.2728]) tensor([0.3949, 0.2110, 0.2199, 0.1741]) -Greedy action tensor([ 1.2071, -0.9456, 0.1364, -0.7059]) tensor([0.6224, 0.0723, 0.2134, 0.0919]) -Greedy action tensor([ 0.2339, -0.1878, 0.0400, -0.2712]) tensor([0.3243, 0.2128, 0.2672, 0.1957]) -Greedy action tensor([ 1.0471, -0.8594, 0.0205, -0.4029]) tensor([0.5743, 0.0853, 0.2057, 0.1347]) -Greedy action tensor([ 0.3263, -0.1631, -0.0211, -0.1868]) tensor([0.3427, 0.2101, 0.2421, 0.2051]) -Greedy action tensor([ 1.2724, -0.6890, 0.1550, -0.3540]) tensor([0.6008, 0.0845, 0.1965, 0.1181]) -Greedy action tensor([ 0.1950, 0.0770, -0.1818, -0.2895]) tensor([0.3134, 0.2785, 0.2150, 0.1930]) -Greedy action tensor([ 0.5723, -0.4149, -0.1006, -0.1864]) tensor([0.4253, 0.1585, 0.2170, 0.1992]) -Greedy action tensor([ 0.6827, -0.5925, -0.0074, -0.7342]) tensor([0.4942, 0.1381, 0.2479, 0.1198]) -Greedy action tensor([ 0.2899, 0.0090, -0.0304, -0.3895]) tensor([0.3347, 0.2527, 0.2430, 0.1697]) -Greedy action tensor([ 0.7306, 0.1844, 0.0328, -0.5816]) tensor([0.4263, 0.2469, 0.2121, 0.1148]) -Greedy action tensor([ 0.7103, -0.4597, -0.0512, -0.4477]) tensor([0.4781, 0.1484, 0.2233, 0.1502]) -Greedy action tensor([ 0.9833, -0.7478, -0.2479, -0.4709]) tensor([0.5873, 0.1040, 0.1715, 0.1372]) -Greedy action tensor([ 0.3852, -0.0895, -0.0868, -0.1333]) tensor([0.3520, 0.2189, 0.2195, 0.2096]) -Greedy action tensor([ 0.5589, -0.3785, 0.0972, -0.5386]) tensor([0.4245, 0.1663, 0.2675, 0.1417]) -Greedy action tensor([ 0.8259, -0.4308, -0.0899, -0.0942]) tensor([0.4800, 0.1366, 0.1921, 0.1913]) -Greedy action tensor([ 0.4037, -0.1301, -0.1025, -0.2911]) tensor([0.3720, 0.2181, 0.2242, 0.1857]) -Greedy action tensor([ 0.5286, -0.4780, 0.1590, -0.4247]) tensor([0.4095, 0.1497, 0.2830, 0.1579]) -Greedy action tensor([ 0.9706, -0.7584, -0.0374, -0.6568]) tensor([0.5751, 0.1021, 0.2099, 0.1130]) -Greedy action tensor([-0.3489, -1.4512, -0.4156, 0.6160]) tensor([0.2044, 0.0679, 0.1912, 0.5365]) -Greedy action tensor([-0.3542, -0.7859, 0.6056, -0.0866]) tensor([0.1796, 0.1166, 0.4690, 0.2347]) -Greedy action tensor([ 1.5860, -0.4210, 1.4720, 0.5611]) tensor([0.4192, 0.0563, 0.3740, 0.1504]) -Greedy action tensor([ 2.0198, -1.5152, -0.6447, 1.3093]) tensor([0.6289, 0.0183, 0.0438, 0.3090]) -Greedy action tensor([-0.4231, -1.6154, 1.2741, 0.2435]) tensor([0.1148, 0.0348, 0.6267, 0.2236]) -Greedy action tensor([ 1.2493, -0.3149, 0.5056, 2.0570]) tensor([0.2546, 0.0533, 0.1210, 0.5711]) -Greedy action tensor([ 0.5025, 0.0279, -0.5538, 0.3617]) tensor([0.3523, 0.2192, 0.1225, 0.3060]) -Greedy action tensor([-0.0334, -1.0473, -0.1577, -0.0369]) tensor([0.3084, 0.1119, 0.2724, 0.3073]) -Greedy action tensor([0.5466, 0.4279, 0.3131, 0.2442]) tensor([0.2925, 0.2598, 0.2316, 0.2162]) -Greedy action tensor([ 0.2185, -1.4591, 0.1829, -0.5663]) tensor([0.3834, 0.0716, 0.3700, 0.1749]) -Greedy action tensor([ 1.0485, -0.3980, 0.2359, 0.8805]) tensor([0.3961, 0.0932, 0.1758, 0.3349]) -Greedy action tensor([ 0.0511, -0.4212, -0.2528, 0.4249]) tensor([0.2621, 0.1635, 0.1934, 0.3810]) -Greedy action tensor([ 0.4717, -0.7119, 0.3455, -0.4846]) tensor([0.3888, 0.1190, 0.3427, 0.1494]) -Greedy action tensor([ 0.9107, 0.1785, 2.0337, -0.1734]) tensor([0.2044, 0.0983, 0.6283, 0.0691]) -Greedy action tensor([ 0.7775, 0.3365, -0.3992, 1.1306]) tensor([0.2963, 0.1906, 0.0913, 0.4218]) -Greedy action tensor([ 0.3503, -0.4079, -0.4747, -0.3108]) tensor([0.4127, 0.1934, 0.1809, 0.2131]) -Greedy action tensor([ 0.0898, -0.8586, 1.5753, -0.3748]) tensor([0.1554, 0.0602, 0.6867, 0.0977]) -Greedy action tensor([ 0.6636, -1.0280, -0.0913, 0.0806]) tensor([0.4520, 0.0833, 0.2124, 0.2523]) -Greedy action tensor([-0.0105, -1.1944, -0.5321, 0.6403]) tensor([0.2620, 0.0802, 0.1555, 0.5023]) -Greedy action tensor([ 0.5407, -1.7653, -0.5316, -0.4758]) tensor([0.5544, 0.0552, 0.1897, 0.2006]) -Greedy action tensor([ 0.6285, -1.0612, 1.4068, 1.4182]) tensor([0.1797, 0.0332, 0.3913, 0.3958]) -Greedy action tensor([ 0.3878, -0.8741, -1.0913, 0.2870]) tensor([0.4141, 0.1172, 0.0943, 0.3744]) -Greedy action tensor([-0.4430, -1.1594, -0.1178, 1.4996]) tensor([0.1015, 0.0496, 0.1405, 0.7083]) -Greedy action tensor([ 0.3403, -0.1579, 1.8043, -0.1677]) tensor([0.1531, 0.0930, 0.6618, 0.0921]) -Greedy action tensor([ 0.0977, -0.7443, 2.3532, -0.3986]) tensor([0.0864, 0.0372, 0.8239, 0.0526]) -Greedy action tensor([0.9271, 0.7722, 1.3709, 0.1499]) tensor([0.2581, 0.2210, 0.4023, 0.1186]) -Greedy action tensor([-0.0308, 1.0738, 1.7556, -0.3029]) tensor([0.0930, 0.2808, 0.5553, 0.0709]) -Greedy action tensor([ 0.0209, -0.0063, -0.1706, 0.3614]) tensor([0.2378, 0.2315, 0.1964, 0.3343]) -Greedy action tensor([-0.0847, -0.0027, -0.5338, -1.4044]) tensor([0.3343, 0.3629, 0.2134, 0.0893]) -Greedy action tensor([ 0.5134, -0.3047, 0.2043, -0.4408]) tensor([0.3905, 0.1723, 0.2867, 0.1504]) -Greedy action tensor([-0.1483, -0.6900, -0.3305, 0.7570]) tensor([0.2046, 0.1190, 0.1705, 0.5059]) -Greedy action tensor([-0.7957, -0.7267, 0.5011, -1.0038]) tensor([0.1529, 0.1638, 0.5592, 0.1242]) -Greedy action tensor([ 1.2530, 0.0136, -0.2890, 0.9725]) tensor([0.4427, 0.1282, 0.0947, 0.3344]) -Greedy action tensor([ 1.4016, 0.6918, -0.0669, 1.5061]) tensor([0.3531, 0.1736, 0.0813, 0.3920]) -Greedy action tensor([ 0.7233, 0.3202, 1.4594, -0.1382]) tensor([0.2393, 0.1599, 0.4997, 0.1011]) -Greedy action tensor([ 0.2615, -0.3126, -0.7750, 0.8728]) tensor([0.2659, 0.1498, 0.0943, 0.4900]) -Greedy action tensor([ 1.6360, -0.2837, 1.8661, -0.2104]) tensor([0.3901, 0.0572, 0.4911, 0.0616]) -Greedy action tensor([-2.0827, -0.1010, 0.9718, -0.6456]) tensor([0.0297, 0.2155, 0.6299, 0.1250]) -Greedy action tensor([ 0.9584, -1.6066, -0.1424, 0.9570]) tensor([0.4153, 0.0319, 0.1381, 0.4147]) -Greedy action tensor([ 1.2083, -0.0841, 0.5880, -0.8875]) tensor([0.5167, 0.1419, 0.2779, 0.0635]) -Greedy action tensor([ 1.2854, -1.6128, -0.0909, 0.7551]) tensor([0.5274, 0.0291, 0.1332, 0.3103]) -Greedy action tensor([ 0.4803, -2.1463, -0.5151, 0.6378]) tensor([0.3828, 0.0277, 0.1415, 0.4481]) -Greedy action tensor([-0.4086, -0.3785, 0.6411, -1.3925]) tensor([0.1901, 0.1959, 0.5430, 0.0711]) -Greedy action tensor([ 1.0297, -0.9714, -0.4604, 0.9696]) tensor([0.4344, 0.0587, 0.0979, 0.4090]) -Greedy action tensor([ 0.3934, -1.8310, 0.7456, 1.1157]) tensor([0.2179, 0.0236, 0.3099, 0.4487]) -Greedy action tensor([ 0.4763, -2.0421, 1.4370, -0.4352]) tensor([0.2441, 0.0197, 0.6381, 0.0981]) -Greedy action tensor([ 0.1831, 1.1632, -0.1087, -0.2883]) tensor([0.1986, 0.5292, 0.1483, 0.1239]) -Greedy action tensor([-1.7082, -0.0472, 0.5129, -0.9180]) tensor([0.0565, 0.2977, 0.5212, 0.1246]) -Greedy action tensor([ 1.7616, 0.2801, 0.5205, -0.2455]) tensor([0.6058, 0.1377, 0.1751, 0.0814]) -Greedy action tensor([-0.8252, -0.2348, 1.9640, -1.0849]) tensor([0.0504, 0.0910, 0.8198, 0.0389]) -Greedy action tensor([-0.8136, -0.0689, -0.7631, 0.7641]) tensor([0.1111, 0.2339, 0.1168, 0.5381]) -Greedy action tensor([ 0.1086, -1.4702, 1.0055, 0.1459]) tensor([0.2129, 0.0439, 0.5221, 0.2210]) -Greedy action tensor([-0.3811, -0.7791, 0.8378, -0.6328]) tensor([0.1715, 0.1152, 0.5801, 0.1333]) -Greedy action tensor([-0.7355, -1.4135, -0.4473, -0.0661]) tensor([0.2086, 0.1059, 0.2782, 0.4074]) -Greedy action tensor([-1.1763, 0.0568, -0.4620, -0.1159]) tensor([0.1068, 0.3666, 0.2182, 0.3084]) -Greedy action tensor([ 0.3516, 0.2481, -0.0578, 0.3794]) tensor([0.2782, 0.2509, 0.1848, 0.2861]) -Greedy action tensor([ 0.8623, -0.1943, 1.1193, 1.7832]) tensor([0.1941, 0.0675, 0.2510, 0.4875]) -Greedy action tensor([-0.2308, -0.5222, -1.3298, 0.3747]) tensor([0.2556, 0.1910, 0.0852, 0.4683]) -Greedy action tensor([ 0.4270, -0.0865, 0.8546, 0.7885]) tensor([0.2189, 0.1310, 0.3358, 0.3143]) -Greedy action tensor([0.4015, 0.2280, 0.1527, 1.5784]) tensor([0.1705, 0.1434, 0.1329, 0.5532]) -Greedy action tensor([ 1.9700, -1.1111, 2.0332, 1.5995]) tensor([0.3569, 0.0164, 0.3802, 0.2464]) -Greedy action tensor([ 0.2782, 0.5744, 1.2058, -0.1354]) tensor([0.1807, 0.2430, 0.4568, 0.1195]) -Greedy action tensor([ 0.4001, -1.3428, 0.6857, -0.0522]) tensor([0.3183, 0.0557, 0.4235, 0.2025]) -Greedy action tensor([ 0.0137, -1.6901, 0.3229, 0.0922]) tensor([0.2758, 0.0502, 0.3757, 0.2983]) -Greedy action tensor([-0.0806, 0.6254, -0.2746, -1.3077]) tensor([0.2414, 0.4890, 0.1988, 0.0708]) -Greedy action tensor([ 1.6147, -0.4105, 0.8780, 0.6961]) tensor([0.4976, 0.0657, 0.2382, 0.1986]) -Greedy action tensor([-1.6783, -0.8765, 0.7257, 0.4510]) tensor([0.0440, 0.0982, 0.4874, 0.3703]) -Greedy action tensor([ 0.5375, -0.4117, 0.6233, 0.8893]) tensor([0.2565, 0.0993, 0.2795, 0.3647]) -Greedy action tensor([ 0.2026, 0.6456, 1.2959, -0.8086]) tensor([0.1693, 0.2637, 0.5053, 0.0616]) -Greedy action tensor([ 0.0594, -0.9841, 1.2442, 0.4700]) tensor([0.1631, 0.0575, 0.5335, 0.2460]) -Greedy action tensor([-0.8458, -1.0347, -0.3406, 0.9603]) tensor([0.1045, 0.0865, 0.1731, 0.6359]) -Greedy action tensor([-0.9655, -0.3809, -0.1852, -0.8926]) tensor([0.1652, 0.2965, 0.3606, 0.1777]) -Greedy action tensor([ 1.9716, -0.8256, 0.8509, 0.9273]) tensor([0.5751, 0.0351, 0.1875, 0.2024]) -Greedy action tensor([ 0.8985, -0.2325, 1.2856, 0.8291]) tensor([0.2682, 0.0865, 0.3950, 0.2502]) -Greedy action tensor([ 0.1596, 0.1410, 0.5723, -0.1818]) tensor([0.2379, 0.2335, 0.3595, 0.1691]) -Greedy action tensor([ 0.6854, -0.4984, 0.0693, 1.0972]) tensor([0.2980, 0.0912, 0.1609, 0.4498]) -Greedy action tensor([ 1.6649, -0.5649, -0.4800, 1.2902]) tensor([0.5230, 0.0562, 0.0612, 0.3595]) -Greedy action tensor([-0.6212, -0.7036, 0.4762, -0.3461]) tensor([0.1604, 0.1477, 0.4806, 0.2112]) -Greedy action tensor([0.1269, 0.4601, 0.4683, 1.3973]) tensor([0.1358, 0.1895, 0.1910, 0.4837]) -Greedy action tensor([-0.0491, -1.9513, 0.5995, 0.0602]) tensor([0.2394, 0.0357, 0.4579, 0.2670]) -Greedy action tensor([-0.6117, -1.4016, -0.5920, 0.2193]) tensor([0.2097, 0.0952, 0.2138, 0.4813]) -Greedy action tensor([ 0.4903, -2.0277, -0.2643, 0.8460]) tensor([0.3358, 0.0271, 0.1579, 0.4792]) -Greedy action tensor([ 1.1387, -0.4356, 0.0796, 0.1621]) tensor([0.5180, 0.1073, 0.1796, 0.1951]) -Greedy action tensor([ 0.6283, 0.0025, -0.2431, 0.3336]) tensor([0.3707, 0.1982, 0.1551, 0.2760]) -Greedy action tensor([ 0.9568, -0.1244, -0.1485, 0.1950]) tensor([0.4679, 0.1587, 0.1549, 0.2184]) -Greedy action tensor([ 0.8295, -0.1805, -0.0625, 0.0594]) tensor([0.4470, 0.1628, 0.1832, 0.2070]) -Greedy action tensor([ 0.6458, -0.4937, -0.5377, 0.4203]) tensor([0.4125, 0.1320, 0.1263, 0.3292]) -Greedy action tensor([ 1.3772, -0.7837, -0.2022, 0.3492]) tensor([0.5956, 0.0686, 0.1227, 0.2131]) -Greedy action tensor([ 1.3749, -0.6393, -0.2477, 0.2840]) tensor([0.6000, 0.0801, 0.1184, 0.2015]) -Greedy action tensor([ 1.3690, -0.3257, -0.5620, -0.0663]) tensor([0.6383, 0.1172, 0.0926, 0.1519]) -Greedy action tensor([ 1.2206, -0.2897, -0.7489, 0.1941]) tensor([0.5819, 0.1285, 0.0812, 0.2085]) -Greedy action tensor([ 0.9091, -0.3201, -0.0787, -0.0277]) tensor([0.4862, 0.1422, 0.1811, 0.1905]) -Greedy action tensor([ 1.6161, -0.6231, -0.1073, 0.2664]) tensor([0.6475, 0.0690, 0.1156, 0.1679]) -Greedy action tensor([ 1.3942, -0.5681, -0.1642, 0.4941]) tensor([0.5690, 0.0800, 0.1197, 0.2313]) -Greedy action tensor([ 1.4554, -0.7843, -0.4160, 0.4870]) tensor([0.6097, 0.0649, 0.0938, 0.2315]) -Greedy action tensor([ 0.8246, -0.4102, -0.1170, 0.1174]) tensor([0.4600, 0.1338, 0.1794, 0.2268]) -Greedy action tensor([ 0.6952, -0.3060, 0.1223, 0.0558]) tensor([0.4067, 0.1494, 0.2293, 0.2146]) -Greedy action tensor([ 1.5095, -0.4659, -0.1605, 0.3888]) tensor([0.6050, 0.0839, 0.1139, 0.1972]) -Greedy action tensor([ 1.3126, -0.6579, -0.0381, 0.3741]) tensor([0.5588, 0.0779, 0.1448, 0.2186]) -Greedy action tensor([ 1.2739, -0.6365, -0.1810, 0.1690]) tensor([0.5839, 0.0864, 0.1363, 0.1934]) -Greedy action tensor([ 1.5869, -0.2718, -0.6650, 0.5559]) tensor([0.6181, 0.0964, 0.0650, 0.2205]) -Greedy action tensor([ 1.4548, -0.2540, -0.3244, 0.5052]) tensor([0.5758, 0.1043, 0.0972, 0.2228]) -Greedy action tensor([ 0.8934, -0.3633, -0.3125, 0.6719]) tensor([0.4192, 0.1193, 0.1255, 0.3359]) -Greedy action tensor([ 1.5842, -0.4680, -0.4982, 0.3861]) tensor([0.6432, 0.0826, 0.0802, 0.1941]) -Greedy action tensor([ 1.5902, -0.0171, 0.4385, -0.0043]) tensor([0.5815, 0.1166, 0.1838, 0.1181]) -Greedy action tensor([ 2.3668, -1.1375, -0.2638, 0.8398]) tensor([0.7580, 0.0228, 0.0546, 0.1646]) -Greedy action tensor([ 1.6084, -0.3959, -0.5487, 0.2466]) tensor([0.6637, 0.0894, 0.0768, 0.1700]) -Greedy action tensor([ 2.3604, -1.0315, 0.0172, 0.4487]) tensor([0.7828, 0.0263, 0.0752, 0.1157]) -Greedy action tensor([ 1.4738, -0.2600, -0.4832, 0.4190]) tensor([0.6002, 0.1060, 0.0848, 0.2090]) -Greedy action tensor([ 1.7424, -0.9234, 0.1585, 0.2949]) tensor([0.6623, 0.0461, 0.1359, 0.1558]) -Greedy action tensor([ 1.0077, 0.1281, -0.4124, 0.5690]) tensor([0.4345, 0.1803, 0.1050, 0.2802]) -Greedy action tensor([ 1.1342, -0.1904, -0.4309, 0.3322]) tensor([0.5199, 0.1382, 0.1087, 0.2332]) -Greedy action tensor([ 1.6417, -0.6212, -0.2581, 0.3486]) tensor([0.6544, 0.0681, 0.0979, 0.1796]) -Greedy action tensor([ 0.9332, -0.5634, 0.0261, 0.1455]) tensor([0.4802, 0.1075, 0.1939, 0.2184]) -Greedy action tensor([ 1.4515, -0.6418, 0.1511, 0.6643]) tensor([0.5403, 0.0666, 0.1472, 0.2459]) -Greedy action tensor([ 1.6776, -0.6733, -0.3998, 0.6685]) tensor([0.6309, 0.0601, 0.0790, 0.2300]) -Greedy action tensor([ 1.9061, -1.4666, 0.0768, 0.7193]) tensor([0.6666, 0.0229, 0.1070, 0.2035]) -Greedy action tensor([ 1.6701, -0.4465, -0.3579, 0.3528]) tensor([0.6579, 0.0792, 0.0866, 0.1762]) -Greedy action tensor([ 1.5072, -0.2108, -0.1421, -0.0180]) tensor([0.6293, 0.1129, 0.1209, 0.1369]) -Greedy action tensor([ 1.4695, -0.9327, -0.3690, 0.0896]) tensor([0.6661, 0.0603, 0.1060, 0.1676]) -Greedy action tensor([ 1.7261, -0.4758, -0.2691, 0.2548]) tensor([0.6774, 0.0749, 0.0921, 0.1556]) -Greedy action tensor([ 0.5843, -0.4581, -0.1401, 0.1988]) tensor([0.3972, 0.1401, 0.1925, 0.2702]) -Greedy action tensor([ 1.2582, -0.2867, -0.1515, 0.0740]) tensor([0.5670, 0.1210, 0.1385, 0.1735]) -Greedy action tensor([ 1.3254, -0.9071, 0.0674, 0.3276]) tensor([0.5681, 0.0609, 0.1615, 0.2095]) -Greedy action tensor([ 1.8091, -1.0685, -0.2510, 0.4673]) tensor([0.6920, 0.0389, 0.0882, 0.1809]) -Greedy action tensor([ 1.0880, 0.0027, -0.5114, 0.4838]) tensor([0.4793, 0.1619, 0.0968, 0.2619]) -Greedy action tensor([ 1.3049, -0.1410, -0.3056, 0.2215]) tensor([0.5638, 0.1328, 0.1126, 0.1908]) -Greedy action tensor([ 1.8051, -0.8641, -0.1340, 0.5660]) tensor([0.6654, 0.0461, 0.0957, 0.1927]) -Greedy action tensor([ 1.7088, -0.3910, -0.4271, 0.2439]) tensor([0.6795, 0.0832, 0.0803, 0.1570]) -Greedy action tensor([ 1.7710, -0.4148, -0.3719, 0.4310]) tensor([0.6704, 0.0754, 0.0787, 0.1756]) -Greedy action tensor([ 0.9858, -0.2068, -0.0917, 0.2906]) tensor([0.4667, 0.1416, 0.1589, 0.2329]) -Greedy action tensor([ 1.2811, -0.3825, -0.0469, 0.2119]) tensor([0.5563, 0.1054, 0.1474, 0.1910]) -Greedy action tensor([ 1.7545, -0.3668, -0.4870, 0.0870]) tensor([0.7068, 0.0847, 0.0751, 0.1334]) -Greedy action tensor([ 1.1788, 0.0627, -0.7423, 0.3228]) tensor([0.5266, 0.1725, 0.0771, 0.2237]) -Greedy action tensor([ 1.3458, 0.0316, -0.2792, 0.3668]) tensor([0.5431, 0.1459, 0.1069, 0.2040]) -Greedy action tensor([ 1.9952, -0.0339, -0.4661, 0.1584]) tensor([0.7267, 0.0955, 0.0620, 0.1158]) -Greedy action tensor([ 0.6242, -0.3178, -0.0241, 0.1730]) tensor([0.3922, 0.1529, 0.2051, 0.2498]) -Greedy action tensor([ 1.6768, -0.7983, -0.1629, 0.4088]) tensor([0.6560, 0.0552, 0.1042, 0.1846]) -Greedy action tensor([ 1.2550, -0.4867, -0.4496, 0.1027]) tensor([0.5977, 0.1047, 0.1087, 0.1888]) -Greedy action tensor([ 1.2289, -0.6215, -0.1267, 0.2443]) tensor([0.5591, 0.0879, 0.1441, 0.2089]) -Greedy action tensor([ 1.2974, -0.5343, -0.3037, 0.5162]) tensor([0.5495, 0.0880, 0.1108, 0.2516]) -Greedy action tensor([1.6163, 0.2816, 0.1261, 0.2966]) tensor([0.5696, 0.1499, 0.1283, 0.1522]) -Greedy action tensor([ 0.9504, -0.3869, -0.2522, 0.2380]) tensor([0.4870, 0.1279, 0.1463, 0.2389]) -Greedy action tensor([ 1.9059, -0.7920, -0.4844, 0.6347]) tensor([0.6947, 0.0468, 0.0636, 0.1949]) -Greedy action tensor([ 1.2576, -0.6083, -0.2967, 0.3320]) tensor([0.5674, 0.0878, 0.1199, 0.2249]) -Greedy action tensor([ 1.4847, -0.1102, -0.7305, 0.2328]) tensor([0.6258, 0.1270, 0.0683, 0.1790]) -Greedy action tensor([ 1.1365, -0.3448, -0.1337, 0.1598]) tensor([0.5306, 0.1206, 0.1490, 0.1998]) -Greedy action tensor([ 1.4302, -0.4426, -0.4667, 0.1975]) tensor([0.6269, 0.0963, 0.0940, 0.1827]) -Greedy action tensor([ 1.5578, 0.1241, -0.0132, 0.0779]) tensor([0.5974, 0.1424, 0.1242, 0.1360]) -Greedy action tensor([ 1.2467, -0.4338, -0.2926, -0.0122]) tensor([0.5935, 0.1106, 0.1273, 0.1686]) -Greedy action tensor([ 0.9944, -0.1653, -0.7112, 0.3026]) tensor([0.5010, 0.1571, 0.0910, 0.2508]) -Greedy action tensor([ 1.3784, -0.6069, -0.1644, 0.2884]) tensor([0.5926, 0.0814, 0.1267, 0.1993]) -Greedy action tensor([ 2.2631, -0.2124, -0.5195, 0.6499]) tensor([0.7434, 0.0625, 0.0460, 0.1481]) -Greedy action tensor([ 1.0800, -0.3456, -0.0344, 0.2458]) tensor([0.4993, 0.1200, 0.1638, 0.2168]) -Greedy action tensor([ 1.0225, -0.4614, -0.2212, 0.3891]) tensor([0.4888, 0.1108, 0.1409, 0.2595]) -Greedy action tensor([ 1.5964, -0.6175, -0.3672, 0.5956]) tensor([0.6184, 0.0676, 0.0868, 0.2273]) -Greedy action tensor([ 1.5869, -0.1974, 0.0752, 0.3606]) tensor([0.5946, 0.0998, 0.1311, 0.1744]) -Greedy action tensor([ 1.3916, -0.4907, -0.1481, -0.0980]) tensor([0.6281, 0.0956, 0.1347, 0.1416]) -Greedy action tensor([ 1.1232, -0.1811, -0.5669, -0.1068]) tensor([0.5720, 0.1552, 0.1055, 0.1672]) -Greedy action tensor([ 1.2976, -0.4508, -0.3975, 0.4490]) tensor([0.5600, 0.0975, 0.1028, 0.2397]) -Greedy action tensor([ 1.0058, -0.2384, -0.6171, 0.2944]) tensor([0.5060, 0.1458, 0.0998, 0.2484]) -Greedy action tensor([ 0.4310, -0.2616, -0.1725, 0.3051]) tensor([0.3414, 0.1708, 0.1867, 0.3010]) -Greedy action tensor([ 0.6945, -0.2440, -0.0569, 0.0498]) tensor([0.4188, 0.1638, 0.1976, 0.2198]) -Greedy action tensor([-1.6189, -0.6725, 1.0933, 0.6335]) tensor([0.0355, 0.0915, 0.5351, 0.3379]) -Greedy action tensor([-1.4552, -0.5164, 0.4412, 0.0099]) tensor([0.0687, 0.1758, 0.4579, 0.2975]) -Greedy action tensor([-1.8868, -0.4584, 0.6345, -0.1420]) tensor([0.0428, 0.1787, 0.5332, 0.2453]) -Greedy action tensor([-1.8405, -0.4869, 0.6196, -0.1583]) tensor([0.0455, 0.1763, 0.5332, 0.2449]) -Greedy action tensor([-0.5840, -0.4878, 0.6674, 0.9621]) tensor([0.0972, 0.1070, 0.3397, 0.4561]) -Greedy action tensor([-1.3041, -0.5502, 0.3352, 0.3931]) tensor([0.0728, 0.1547, 0.3751, 0.3974]) -Greedy action tensor([-1.8496, -0.4149, 0.6120, -0.1333]) tensor([0.0445, 0.1867, 0.5214, 0.2474]) -Greedy action tensor([-1.7824, -0.2217, 0.5737, -0.1545]) tensor([0.0467, 0.2225, 0.4929, 0.2379]) -Greedy action tensor([-1.1940, -0.5853, 0.3143, 0.4571]) tensor([0.0796, 0.1462, 0.3595, 0.4147]) -Greedy action tensor([-0.4425, -0.5143, 0.2220, 0.1520]) tensor([0.1759, 0.1637, 0.3418, 0.3187]) -Greedy action tensor([-1.7322, -0.5244, 0.6607, 0.0670]) tensor([0.0469, 0.1568, 0.5130, 0.2833]) -Greedy action tensor([-1.3951, -0.5242, 0.4245, 0.3841]) tensor([0.0646, 0.1543, 0.3985, 0.3827]) -Greedy action tensor([-1.3664, 0.6660, 0.2353, 0.3800]) tensor([0.0517, 0.3949, 0.2567, 0.2967]) -Greedy action tensor([-1.5881, -0.5435, 0.4787, 0.0816]) tensor([0.0586, 0.1667, 0.4632, 0.3114]) -Greedy action tensor([-1.5662, -0.5887, 0.6221, 0.1764]) tensor([0.0547, 0.1453, 0.4877, 0.3123]) -Greedy action tensor([-1.9110, -0.4093, 0.6487, -0.1612]) tensor([0.0414, 0.1857, 0.5349, 0.2380]) -Greedy action tensor([-1.2090, -0.4332, 0.3959, 0.6020]) tensor([0.0701, 0.1523, 0.3489, 0.4288]) -Greedy action tensor([-1.6297, -0.5218, 0.5021, 0.0143]) tensor([0.0567, 0.1717, 0.4781, 0.2935]) -Greedy action tensor([-1.8824, -0.2824, 0.6084, -0.1516]) tensor([0.0422, 0.2093, 0.5100, 0.2385]) -Greedy action tensor([-1.7673, -0.4963, 0.5481, -0.0816]) tensor([0.0498, 0.1774, 0.5042, 0.2686]) -Greedy action tensor([-1.9366, -0.4505, 0.6626, -0.1790]) tensor([0.0405, 0.1792, 0.5453, 0.2350]) -Greedy action tensor([-1.5387, -0.5155, 0.4475, -0.0258]) tensor([0.0641, 0.1782, 0.4669, 0.2908]) -Greedy action tensor([-1.7773, -0.5433, 1.2494, 0.8403]) tensor([0.0258, 0.0886, 0.5321, 0.3535]) -Greedy action tensor([-1.7280, -0.4798, 0.6646, 0.1348]) tensor([0.0457, 0.1593, 0.5004, 0.2946]) -Greedy action tensor([-1.9037, -0.4415, 0.6444, -0.1521]) tensor([0.0419, 0.1809, 0.5357, 0.2416]) -Greedy action tensor([-1.8264, -0.4326, 0.6006, -0.1166]) tensor([0.0457, 0.1842, 0.5175, 0.2526]) -Greedy action tensor([-1.8994, -0.4241, 0.6484, -0.1403]) tensor([0.0417, 0.1825, 0.5334, 0.2424]) -Greedy action tensor([-0.5242, -0.0375, 0.1420, -0.0050]) tensor([0.1599, 0.2601, 0.3113, 0.2687]) -Greedy action tensor([-1.7415, -0.6498, 0.1865, -0.4160]) tensor([0.0684, 0.2038, 0.4703, 0.2575]) -Greedy action tensor([-1.7372, -0.6560, 0.7073, -0.3548]) tensor([0.0514, 0.1515, 0.5923, 0.2048]) -Greedy action tensor([-0.6977, -0.3298, 0.6200, 1.2937]) tensor([0.0740, 0.1070, 0.2765, 0.5424]) -Greedy action tensor([-1.9134, -0.4258, 0.6476, -0.1615]) tensor([0.0414, 0.1834, 0.5364, 0.2388]) -Greedy action tensor([-1.8048, -0.3043, 0.5767, -0.0881]) tensor([0.0457, 0.2050, 0.4948, 0.2545]) -Greedy action tensor([-1.4885, -0.5424, 0.5576, 0.2827]) tensor([0.0582, 0.1498, 0.4501, 0.3419]) -Greedy action tensor([-1.6188, -0.4615, 0.6263, 0.3327]) tensor([0.0484, 0.1540, 0.4569, 0.3407]) -Greedy action tensor([-1.4533, -0.5229, 0.5700, 0.3832]) tensor([0.0576, 0.1459, 0.4353, 0.3612]) -Greedy action tensor([-1.7985, -0.1893, 0.5794, -0.1724]) tensor([0.0457, 0.2286, 0.4931, 0.2325]) -Greedy action tensor([-0.9672, 0.3025, 0.2863, 0.4689]) tensor([0.0815, 0.2902, 0.2855, 0.3427]) -Greedy action tensor([-1.5541, -0.4639, 0.4786, -0.0487]) tensor([0.0621, 0.1846, 0.4737, 0.2796]) -Greedy action tensor([-1.4130, -0.2290, 0.4068, 0.1289]) tensor([0.0662, 0.2162, 0.4083, 0.3093]) -Greedy action tensor([-1.9203, -0.4440, 0.6553, -0.1633]) tensor([0.0411, 0.1800, 0.5405, 0.2384]) -Greedy action tensor([-1.8572, -0.4661, 0.6093, -0.1267]) tensor([0.0446, 0.1791, 0.5249, 0.2514]) -Greedy action tensor([-1.8807, -0.4247, 0.6348, -0.1299]) tensor([0.0427, 0.1831, 0.5283, 0.2459]) -Greedy action tensor([-1.9250, -0.4227, 0.6547, -0.1689]) tensor([0.0409, 0.1835, 0.5390, 0.2366]) -Greedy action tensor([-1.3755, -0.1841, 0.5376, 0.6087]) tensor([0.0545, 0.1795, 0.3694, 0.3966]) -Greedy action tensor([-1.7975, 0.1366, 0.5305, -0.1110]) tensor([0.0424, 0.2934, 0.4351, 0.2291]) -Greedy action tensor([-0.9372, -0.2559, 0.2762, 0.9287]) tensor([0.0781, 0.1544, 0.2628, 0.5047]) -Greedy action tensor([-1.9038, -0.4523, 0.6459, -0.1546]) tensor([0.0420, 0.1792, 0.5374, 0.2414]) -Greedy action tensor([-1.8713, -0.3064, 0.6100, -0.1256]) tensor([0.0426, 0.2038, 0.5095, 0.2442]) -Greedy action tensor([-0.6951, -0.5441, 0.2245, 0.1816]) tensor([0.1414, 0.1644, 0.3546, 0.3397]) -Greedy action tensor([-1.5190, -0.5212, 0.5030, 0.2404]) tensor([0.0586, 0.1588, 0.4424, 0.3402]) -Greedy action tensor([-1.1109, -0.6184, 0.4355, -0.2935]) tensor([0.1042, 0.1705, 0.4892, 0.2360]) -Greedy action tensor([-1.6040, -0.4970, 0.5067, 0.0783]) tensor([0.0566, 0.1713, 0.4675, 0.3046]) -Greedy action tensor([-1.8812, -0.3561, 0.6187, -0.1415]) tensor([0.0426, 0.1958, 0.5190, 0.2427]) -Greedy action tensor([-1.9083, -0.5298, 0.6614, -0.1354]) tensor([0.0418, 0.1659, 0.5461, 0.2462]) -Greedy action tensor([-1.9040, -0.4580, 0.6496, -0.1444]) tensor([0.0418, 0.1776, 0.5376, 0.2430]) -Greedy action tensor([-0.4703, -0.5999, 0.2166, 0.7351]) tensor([0.1388, 0.1219, 0.2759, 0.4634]) -Greedy action tensor([-1.1820, -0.5230, 0.5703, 0.8325]) tensor([0.0617, 0.1193, 0.3561, 0.4628]) -Greedy action tensor([-1.8513, -0.4757, 0.6169, -0.1177]) tensor([0.0446, 0.1765, 0.5264, 0.2525]) -Greedy action tensor([-1.9004, -0.3548, 0.6331, -0.1477]) tensor([0.0416, 0.1950, 0.5236, 0.2398]) -Greedy action tensor([-1.7829, -0.4946, 0.6256, -0.0182]) tensor([0.0463, 0.1680, 0.5151, 0.2706]) -Greedy action tensor([-1.6865, -0.5674, 1.1456, 0.7030]) tensor([0.0313, 0.0958, 0.5315, 0.3414]) -Greedy action tensor([-0.3591, 0.3906, 0.7476, 1.3586]) tensor([0.0854, 0.1807, 0.2582, 0.4757]) -Greedy action tensor([-0.8423, 0.9535, 0.0877, 0.4942]) tensor([0.0748, 0.4508, 0.1896, 0.2847]) -Greedy action tensor([-1.0409, -0.2263, 0.2897, 0.0068]) tensor([0.1011, 0.2283, 0.3824, 0.2882]) -Greedy action tensor([-0.3733, 0.8017, 0.2415, 0.6592]) tensor([0.1124, 0.3640, 0.2079, 0.3157]) -Greedy action tensor([-1.8616, -0.4499, 0.6163, -0.1100]) tensor([0.0439, 0.1801, 0.5230, 0.2530]) -Greedy action tensor([-0.3427, 0.0177, 0.1179, -0.0208]) tensor([0.1852, 0.2656, 0.2936, 0.2556]) -Greedy action tensor([-1.8151, -0.3184, 0.6122, -0.0940]) tensor([0.0447, 0.1995, 0.5061, 0.2497]) -Greedy action tensor([-1.8810, -0.4393, 0.6460, -0.1212]) tensor([0.0425, 0.1795, 0.5314, 0.2467]) -Greedy action tensor([-1.4297, -0.5232, 0.9695, 0.9681]) tensor([0.0392, 0.0971, 0.4321, 0.4315]) -Greedy action tensor([-1.8991, -0.4173, 0.6356, -0.1534]) tensor([0.0421, 0.1854, 0.5312, 0.2413]) -Greedy action tensor([-0.6693, -0.2287, 0.4537, 0.6096]) tensor([0.1085, 0.1685, 0.3334, 0.3896]) -Greedy action tensor([-1.8050, -0.3333, 0.6176, 0.0052]) tensor([0.0440, 0.1916, 0.4957, 0.2687]) -Greedy action tensor([-1.7683, -0.5172, 0.9114, 0.3541]) tensor([0.0365, 0.1274, 0.5316, 0.3045]) -Greedy action tensor([-1.4303, -0.4189, 0.4406, 0.1659]) tensor([0.0659, 0.1811, 0.4279, 0.3251]) -Greedy action tensor([-1.8601, -0.4639, 0.6966, -0.0070]) tensor([0.0411, 0.1662, 0.5303, 0.2624]) -Greedy action tensor([-1.5849, -0.5582, 0.4741, 0.0715]) tensor([0.0593, 0.1655, 0.4646, 0.3106]) -Greedy action tensor([-1.5741, -0.5263, 0.4773, -0.0834]) tensor([0.0622, 0.1774, 0.4841, 0.2763]) -Greedy action tensor([-1.8174, -0.2088, 0.5761, -0.0956]) tensor([0.0444, 0.2216, 0.4858, 0.2482]) -Greedy action tensor([-1.6544, -0.3740, 0.6649, 0.0272]) tensor([0.0497, 0.1786, 0.5049, 0.2668]) -Greedy action tensor([ 0.4316, 0.1932, -0.0775, -0.2481]) tensor([0.3453, 0.2721, 0.2076, 0.1750]) -Greedy action tensor([ 0.5139, -0.3113, 0.0163, -0.2877]) tensor([0.4008, 0.1756, 0.2437, 0.1798]) -Greedy action tensor([ 0.6192, -0.1382, 0.0142, -0.1998]) tensor([0.4072, 0.1909, 0.2224, 0.1795]) -Greedy action tensor([ 0.5161, -0.1423, 0.0493, -0.3796]) tensor([0.3917, 0.2028, 0.2456, 0.1599]) -Greedy action tensor([ 0.3270, -0.3718, -0.1606, -0.4751]) tensor([0.3907, 0.1942, 0.2399, 0.1752]) -Greedy action tensor([ 0.4667, -0.0149, -0.0345, -0.2546]) tensor([0.3690, 0.2280, 0.2236, 0.1794]) -Greedy action tensor([ 1.0270, -0.5486, -0.1390, -0.2893]) tensor([0.5597, 0.1158, 0.1744, 0.1501]) -Greedy action tensor([ 0.6111, 0.0109, -0.1778, 0.0667]) tensor([0.3871, 0.2124, 0.1759, 0.2246]) -Greedy action tensor([ 1.0704, -1.0179, -0.0477, -0.6296]) tensor([0.6122, 0.0759, 0.2001, 0.1118]) -Greedy action tensor([ 0.7308, -0.1757, -0.0857, -0.3480]) tensor([0.4575, 0.1848, 0.2022, 0.1555]) -Greedy action tensor([ 0.7834, -0.3756, -0.0711, -0.3249]) tensor([0.4832, 0.1516, 0.2056, 0.1595]) -Greedy action tensor([ 0.7225, -0.4732, -0.1645, -0.5045]) tensor([0.4981, 0.1507, 0.2052, 0.1460]) -Greedy action tensor([ 1.1974, -0.7179, 0.1177, -0.4816]) tensor([0.5975, 0.0880, 0.2030, 0.1115]) -Greedy action tensor([ 0.9208, -0.8045, -0.1501, -0.5799]) tensor([0.5735, 0.1021, 0.1965, 0.1279]) -Greedy action tensor([ 0.1979, -0.2872, -0.3209, -0.1944]) tensor([0.3465, 0.2133, 0.2062, 0.2340]) -Greedy action tensor([ 0.5303, 0.0362, -0.0918, -0.4134]) tensor([0.3943, 0.2406, 0.2117, 0.1535]) -Greedy action tensor([ 0.7144, -0.2523, -0.0572, -0.1909]) tensor([0.4450, 0.1693, 0.2057, 0.1800]) -Greedy action tensor([ 0.6983, -0.7114, -0.1119, -0.2176]) tensor([0.4787, 0.1169, 0.2129, 0.1915]) -Greedy action tensor([ 0.5933, -0.2526, -0.1164, -0.3675]) tensor([0.4341, 0.1863, 0.2135, 0.1661]) -Greedy action tensor([ 0.6651, -0.3615, 0.0341, -0.0717]) tensor([0.4221, 0.1512, 0.2246, 0.2021]) -Greedy action tensor([ 0.8336, -0.5114, -0.0975, -0.3904]) tensor([0.5131, 0.1337, 0.2023, 0.1509]) -Greedy action tensor([ 0.6591, -0.7968, -0.1115, -0.3124]) tensor([0.4820, 0.1124, 0.2231, 0.1825]) -Greedy action tensor([ 0.5005, -0.4563, -0.1346, -0.2985]) tensor([0.4231, 0.1625, 0.2242, 0.1903]) -Greedy action tensor([ 0.8082, -0.2998, 0.0473, -0.4380]) tensor([0.4796, 0.1584, 0.2241, 0.1379]) -Greedy action tensor([ 0.6383, -0.3041, -0.0463, -0.3046]) tensor([0.4379, 0.1706, 0.2208, 0.1706]) -Greedy action tensor([ 0.5827, -0.2853, 0.0010, -0.3814]) tensor([0.4237, 0.1779, 0.2368, 0.1616]) -Greedy action tensor([ 0.4505, -0.1140, -0.0369, -0.1762]) tensor([0.3680, 0.2093, 0.2260, 0.1967]) -Greedy action tensor([ 0.5554, -0.3307, 0.2826, -0.5882]) tensor([0.4013, 0.1654, 0.3054, 0.1279]) -Greedy action tensor([ 0.5083, -0.3488, -0.2010, -0.4466]) tensor([0.4346, 0.1844, 0.2138, 0.1672]) -Greedy action tensor([ 0.7917, -0.4235, -0.0622, -0.3210]) tensor([0.4875, 0.1446, 0.2076, 0.1602]) -Greedy action tensor([ 0.5928, -0.2657, 0.0561, 0.0852]) tensor([0.3831, 0.1623, 0.2240, 0.2306]) -Greedy action tensor([ 0.6938, -0.5779, -0.0878, -0.3672]) tensor([0.4798, 0.1345, 0.2196, 0.1661]) -Greedy action tensor([ 0.5665, -0.3930, 0.0249, -0.4026]) tensor([0.4266, 0.1634, 0.2482, 0.1618]) -Greedy action tensor([ 0.2278, -0.0262, 0.0062, -0.2441]) tensor([0.3124, 0.2424, 0.2503, 0.1949]) -Greedy action tensor([ 0.8779, -0.1443, -0.0440, -0.9732]) tensor([0.5223, 0.1879, 0.2078, 0.0820]) -Greedy action tensor([ 0.9915, -1.1346, 0.0947, -0.5069]) tensor([0.5712, 0.0681, 0.2330, 0.1277]) -Greedy action tensor([ 0.4946, -0.2021, -0.0551, -0.4569]) tensor([0.4063, 0.2024, 0.2345, 0.1569]) -Greedy action tensor([ 0.5726, -0.4437, 0.0296, -0.2291]) tensor([0.4181, 0.1513, 0.2429, 0.1876]) -Greedy action tensor([ 0.7542, -0.5579, 0.0965, -0.5348]) tensor([0.4848, 0.1305, 0.2511, 0.1336]) -Greedy action tensor([ 0.4372, -0.1291, -0.0326, -0.1543]) tensor([0.3641, 0.2067, 0.2276, 0.2015]) -Greedy action tensor([ 0.6779, -0.3572, -0.0684, -0.4688]) tensor([0.4657, 0.1654, 0.2208, 0.1480]) -Greedy action tensor([ 0.2523, 0.1136, -0.1428, -0.0917]) tensor([0.3074, 0.2676, 0.2071, 0.2179]) -Greedy action tensor([ 1.2475, -0.9195, 0.0349, -0.8918]) tensor([0.6537, 0.0749, 0.1944, 0.0770]) -Greedy action tensor([ 0.4418, -0.0810, -0.0899, -0.1157]) tensor([0.3632, 0.2153, 0.2134, 0.2080]) -Greedy action tensor([ 0.2944, 0.0771, -0.0028, -0.2668]) tensor([0.3207, 0.2581, 0.2382, 0.1830]) -Greedy action tensor([ 0.5234, -0.0860, -0.0973, -0.1669]) tensor([0.3872, 0.2105, 0.2081, 0.1941]) -Greedy action tensor([ 0.7339, -0.2559, -0.0279, -0.2007]) tensor([0.4482, 0.1666, 0.2092, 0.1760]) -Greedy action tensor([ 0.3879, -0.0098, -0.1791, -0.0377]) tensor([0.3457, 0.2323, 0.1961, 0.2259]) -Greedy action tensor([ 0.7622, -0.7223, 0.2126, -0.2422]) tensor([0.4608, 0.1044, 0.2660, 0.1688]) -Greedy action tensor([ 0.8471, -0.0158, -0.1297, -0.2009]) tensor([0.4653, 0.1963, 0.1752, 0.1632]) -Greedy action tensor([ 0.5560, -0.4963, 0.0057, -0.2913]) tensor([0.4247, 0.1483, 0.2450, 0.1820]) -Greedy action tensor([ 0.5983, -0.3085, -0.0198, -0.2131]) tensor([0.4189, 0.1692, 0.2258, 0.1861]) -Greedy action tensor([ 0.5058, 0.0757, -0.0830, -0.2575]) tensor([0.3743, 0.2435, 0.2077, 0.1745]) -Greedy action tensor([ 0.4232, -0.3067, -0.0403, -0.2826]) tensor([0.3839, 0.1850, 0.2415, 0.1895]) -Greedy action tensor([ 0.5636, -0.4981, -0.1415, -0.2347]) tensor([0.4367, 0.1510, 0.2157, 0.1965]) -Greedy action tensor([ 0.5314, -0.3860, -0.0683, -0.0628]) tensor([0.3999, 0.1598, 0.2195, 0.2208]) -Greedy action tensor([ 0.4356, -0.0952, -0.1203, -0.1523]) tensor([0.3680, 0.2165, 0.2111, 0.2044]) -Greedy action tensor([ 0.5502, -0.4970, -0.0470, -0.2611]) tensor([0.4263, 0.1496, 0.2346, 0.1894]) -Greedy action tensor([ 0.6518, -0.2368, -0.0926, -0.1379]) tensor([0.4273, 0.1757, 0.2030, 0.1940]) -Greedy action tensor([ 0.5980, -0.2558, -0.0912, -0.4199]) tensor([0.4368, 0.1860, 0.2193, 0.1579]) -Greedy action tensor([ 0.7615, -0.3487, -0.0755, -0.4087]) tensor([0.4825, 0.1590, 0.2089, 0.1497]) -Greedy action tensor([ 0.5992, -0.5499, -0.2292, -0.4038]) tensor([0.4716, 0.1495, 0.2060, 0.1730]) -Greedy action tensor([ 0.4862, -0.4131, -0.1285, -0.1912]) tensor([0.4072, 0.1657, 0.2202, 0.2069]) -Greedy action tensor([ 0.8659, -0.1926, 0.0393, -0.4078]) tensor([0.4844, 0.1681, 0.2120, 0.1355]) -Greedy action tensor([ 1.0435, -0.9018, 0.1786, -0.5205]) tensor([0.5639, 0.0806, 0.2375, 0.1180]) -Greedy action tensor([ 0.8772, -0.5678, 0.0576, -0.6222]) tensor([0.5264, 0.1241, 0.2319, 0.1175]) -Greedy action tensor([ 0.2864, 0.0560, -0.0915, -0.0412]) tensor([0.3125, 0.2482, 0.2141, 0.2252]) -Greedy action tensor([ 0.3923, -0.0844, 0.0182, -0.3052]) tensor([0.3563, 0.2212, 0.2451, 0.1774]) -Greedy action tensor([ 0.8463, -0.4415, -0.2434, -0.6085]) tensor([0.5418, 0.1495, 0.1822, 0.1265]) -Greedy action tensor([ 0.7695, -0.7695, -0.1055, -0.3696]) tensor([0.5124, 0.1100, 0.2136, 0.1640]) -Greedy action tensor([ 0.6381, -0.1837, -0.0707, -0.5131]) tensor([0.4448, 0.1956, 0.2190, 0.1407]) -Greedy action tensor([ 0.3952, 0.1592, 0.0332, -0.3535]) tensor([0.3380, 0.2669, 0.2353, 0.1598]) -Greedy action tensor([ 0.3416, 0.0245, -0.0673, -0.0929]) tensor([0.3289, 0.2395, 0.2185, 0.2130]) -Greedy action tensor([ 0.3935, -0.2654, 0.0574, -0.0299]) tensor([0.3464, 0.1792, 0.2475, 0.2268]) -Greedy action tensor([ 0.2203, 0.1901, -0.0865, -0.3548]) tensor([0.3059, 0.2968, 0.2251, 0.1721]) -Greedy action tensor([ 0.6948, 0.1189, -0.1242, -0.0847]) tensor([0.4062, 0.2284, 0.1791, 0.1863]) -Greedy action tensor([ 0.4689, -0.2270, 0.1004, -0.2266]) tensor([0.3719, 0.1854, 0.2572, 0.1855]) -Greedy action tensor([ 0.7426, -0.6392, -0.0350, -0.3726]) tensor([0.4906, 0.1232, 0.2254, 0.1608]) -Greedy action tensor([ 0.2410, 0.0555, -0.0301, -0.0383]) tensor([0.2985, 0.2480, 0.2277, 0.2258]) -Greedy action tensor([ 0.9701, 0.0443, -0.1439, -0.5716]) tensor([0.5159, 0.2044, 0.1693, 0.1104]) -Greedy action tensor([ 1.1299, -0.7311, 0.0515, -0.8191]) tensor([0.6105, 0.0949, 0.2077, 0.0869]) -Greedy action tensor([-0.0423, 0.1036, 0.2178, -1.2048]) tensor([0.2655, 0.3072, 0.3443, 0.0830]) -Greedy action tensor([-0.8510, -0.7506, -0.4594, -0.1145]) tensor([0.1763, 0.1949, 0.2607, 0.3681]) -Greedy action tensor([ 0.1599, -0.8195, -1.0885, 0.8513]) tensor([0.2733, 0.1026, 0.0784, 0.5456]) -Greedy action tensor([-0.0113, -0.6019, 0.2021, 0.0164]) tensor([0.2618, 0.1450, 0.3241, 0.2691]) -Greedy action tensor([-0.3458, -0.6686, 0.4774, 1.0249]) tensor([0.1259, 0.0912, 0.2869, 0.4960]) -Greedy action tensor([-0.0806, -0.7716, 0.6556, 0.0323]) tensor([0.2124, 0.1064, 0.4434, 0.2378]) -Greedy action tensor([ 1.2949, -0.8616, 1.6638, 0.9837]) tensor([0.3035, 0.0351, 0.4390, 0.2224]) -Greedy action tensor([ 1.0967, 0.0504, -0.0451, 0.2482]) tensor([0.4765, 0.1674, 0.1521, 0.2040]) -Greedy action tensor([ 1.3244, -0.5607, 0.8597, 0.3184]) tensor([0.4660, 0.0707, 0.2928, 0.1704]) -Greedy action tensor([ 1.5698, -0.1734, -0.1315, 0.7896]) tensor([0.5507, 0.0964, 0.1005, 0.2524]) -Greedy action tensor([-0.6114, -0.5938, -0.8545, -0.7181]) tensor([0.2702, 0.2750, 0.2119, 0.2429]) -Greedy action tensor([ 1.4472, 0.7201, -0.8715, 0.9044]) tensor([0.4624, 0.2235, 0.0455, 0.2687]) -Greedy action tensor([0.9177, 0.9418, 0.8106, 0.4144]) tensor([0.2835, 0.2904, 0.2547, 0.1714]) -Greedy action tensor([ 0.3868, -1.5911, -0.2433, 0.1502]) tensor([0.4065, 0.0562, 0.2165, 0.3208]) -Greedy action tensor([-1.0899, 0.6439, -0.1554, -0.7944]) tensor([0.0948, 0.5366, 0.2413, 0.1274]) -Greedy action tensor([0.2867, 0.1481, 0.2336, 0.2046]) tensor([0.2674, 0.2328, 0.2535, 0.2463]) -Greedy action tensor([0.6910, 0.2511, 0.8669, 0.7139]) tensor([0.2591, 0.1669, 0.3089, 0.2651]) -Greedy action tensor([ 0.5152, -1.3011, 0.2321, -0.0387]) tensor([0.4015, 0.0653, 0.3025, 0.2307]) -Greedy action tensor([-0.5316, -1.5428, 0.3070, -0.0379]) tensor([0.1881, 0.0684, 0.4352, 0.3082]) -Greedy action tensor([ 0.0621, 0.0495, 0.4485, -0.6076]) tensor([0.2518, 0.2487, 0.3706, 0.1289]) -Greedy action tensor([ 1.8484, 0.1688, -0.2002, 0.1624]) tensor([0.6664, 0.1242, 0.0859, 0.1235]) -Greedy action tensor([-0.3298, -1.1568, 1.6012, 0.1729]) tensor([0.1001, 0.0438, 0.6905, 0.1655]) -Greedy action tensor([ 1.0884, -1.1609, 0.5998, 1.5443]) tensor([0.3034, 0.0320, 0.1861, 0.4786]) -Greedy action tensor([ 0.9221, -0.2747, 1.3423, -0.2037]) tensor([0.3176, 0.0960, 0.4834, 0.1030]) -Greedy action tensor([ 0.9365, -2.0290, 1.4938, 0.6095]) tensor([0.2842, 0.0146, 0.4962, 0.2049]) -Greedy action tensor([-0.2570, -1.8479, -0.4967, 0.2632]) tensor([0.2723, 0.0555, 0.2142, 0.4580]) -Greedy action tensor([-0.1656, -2.3035, 1.1372, -0.3527]) tensor([0.1777, 0.0210, 0.6539, 0.1474]) -Greedy action tensor([-0.1735, -0.3582, 0.3507, -1.2814]) tensor([0.2597, 0.2159, 0.4387, 0.0858]) -Greedy action tensor([ 0.7459, -1.1283, -0.0148, 0.5694]) tensor([0.4067, 0.0624, 0.1900, 0.3409]) -Greedy action tensor([ 0.1651, -0.1949, -0.2309, 1.1380]) tensor([0.1994, 0.1391, 0.1342, 0.5274]) -Greedy action tensor([ 1.0328, -1.8885, 0.3192, 1.3016]) tensor([0.3506, 0.0189, 0.1718, 0.4587]) -Greedy action tensor([-0.6913, -0.1206, -0.3897, 0.5496]) tensor([0.1319, 0.2334, 0.1784, 0.4563]) -Greedy action tensor([ 0.7461, -0.8346, 0.7319, 0.4292]) tensor([0.3424, 0.0705, 0.3376, 0.2494]) -Greedy action tensor([ 0.2494, -1.5006, 0.5549, -0.6945]) tensor([0.3424, 0.0595, 0.4648, 0.1332]) -Greedy action tensor([-0.3615, -1.5663, 0.2584, 1.6936]) tensor([0.0912, 0.0273, 0.1695, 0.7120]) -Greedy action tensor([ 0.2485, 0.4602, -0.1038, 0.2583]) tensor([0.2533, 0.3130, 0.1780, 0.2557]) -Greedy action tensor([ 0.2223, -1.1146, 0.5521, -0.2605]) tensor([0.3058, 0.0803, 0.4252, 0.1887]) -Greedy action tensor([ 0.6796, -1.6626, 0.0160, -0.0933]) tensor([0.4824, 0.0464, 0.2485, 0.2227]) -Greedy action tensor([-0.6297, -1.1804, 1.4903, 0.6636]) tensor([0.0738, 0.0425, 0.6147, 0.2689]) -Greedy action tensor([ 1.7665, -1.0654, 1.0740, -0.1342]) tensor([0.5852, 0.0345, 0.2928, 0.0875]) -Greedy action tensor([ 0.3286, -0.0833, 1.3167, -1.3470]) tensor([0.2205, 0.1460, 0.5922, 0.0413]) -Greedy action tensor([-0.3636, 0.2584, 0.4065, 0.1916]) tensor([0.1478, 0.2753, 0.3193, 0.2575]) -Greedy action tensor([ 2.2554, -0.3320, 0.0799, 1.8492]) tensor([0.5391, 0.0405, 0.0612, 0.3591]) -Greedy action tensor([-0.0683, -0.6317, 0.7987, -0.2493]) tensor([0.2091, 0.1190, 0.4975, 0.1744]) -Greedy action tensor([-0.0641, -0.3095, 0.4195, 1.2066]) tensor([0.1435, 0.1123, 0.2328, 0.5114]) -Greedy action tensor([ 0.1197, -0.5596, 0.2282, 0.5704]) tensor([0.2386, 0.1210, 0.2660, 0.3745]) -Greedy action tensor([-0.1686, -2.3295, 1.3197, 0.2188]) tensor([0.1425, 0.0164, 0.6312, 0.2099]) -Greedy action tensor([ 1.4640, -1.5503, -0.3502, 1.0147]) tensor([0.5405, 0.0265, 0.0881, 0.3449]) -Greedy action tensor([ 0.3296, -1.4507, 0.1141, 0.7342]) tensor([0.2879, 0.0485, 0.2321, 0.4315]) -Greedy action tensor([ 1.0105, -1.6266, 1.9443, 0.8247]) tensor([0.2249, 0.0161, 0.5722, 0.1868]) -Greedy action tensor([-0.1116, 0.3335, 0.0578, -0.8265]) tensor([0.2362, 0.3686, 0.2797, 0.1155]) -Greedy action tensor([ 1.3395, -0.2986, 1.4775, 0.7998]) tensor([0.3418, 0.0664, 0.3924, 0.1993]) -Greedy action tensor([-1.3360, -1.0450, -0.1282, 0.0134]) tensor([0.1048, 0.1402, 0.3508, 0.4041]) -Greedy action tensor([ 0.6555, -0.5249, -0.0395, 1.0259]) tensor([0.3073, 0.0944, 0.1534, 0.4450]) -Greedy action tensor([-1.0206, -0.9846, 2.1856, -0.5881]) tensor([0.0354, 0.0367, 0.8734, 0.0545]) -Greedy action tensor([-0.4287, -0.1420, 0.3910, -0.2579]) tensor([0.1728, 0.2301, 0.3922, 0.2050]) -Greedy action tensor([ 1.0748, -0.4088, -0.1504, 1.2491]) tensor([0.3689, 0.0837, 0.1083, 0.4391]) -Greedy action tensor([ 0.2818, 0.6843, 1.1599, -0.8454]) tensor([0.1914, 0.2862, 0.4605, 0.0620]) -Greedy action tensor([ 0.0232, -0.0433, 0.7824, -0.7325]) tensor([0.2202, 0.2060, 0.4704, 0.1034]) -Greedy action tensor([ 0.5509, -0.5633, -0.1225, 0.1232]) tensor([0.4016, 0.1318, 0.2048, 0.2618]) -Greedy action tensor([ 1.0852, -0.2176, 0.3134, 0.5153]) tensor([0.4349, 0.1182, 0.2010, 0.2460]) -Greedy action tensor([ 1.0468, -0.6007, 0.1891, 1.2185]) tensor([0.3566, 0.0687, 0.1513, 0.4234]) -Greedy action tensor([-0.4555, -1.1848, -0.3775, -0.0452]) tensor([0.2457, 0.1185, 0.2656, 0.3703]) -Greedy action tensor([ 1.4780, -0.5641, 0.4854, -0.0985]) tensor([0.5858, 0.0760, 0.2171, 0.1211]) -Greedy action tensor([ 1.2189, -0.6937, 0.4135, 0.3922]) tensor([0.4921, 0.0727, 0.2199, 0.2153]) -Greedy action tensor([ 0.2545, -1.4792, -0.6437, 0.0167]) tensor([0.4215, 0.0745, 0.1717, 0.3323]) -Greedy action tensor([ 1.5385, -0.8449, 0.1562, 0.9658]) tensor([0.5243, 0.0484, 0.1316, 0.2957]) -Greedy action tensor([ 1.0652, -0.7813, 0.9802, 0.2144]) tensor([0.3995, 0.0630, 0.3669, 0.1706]) -Greedy action tensor([-0.9933, -0.2207, 2.0350, -0.4409]) tensor([0.0391, 0.0847, 0.8082, 0.0680]) -Greedy action tensor([-0.6801, 0.4413, 0.1946, -0.7150]) tensor([0.1345, 0.4129, 0.3226, 0.1299]) -Greedy action tensor([-0.6884, -0.3584, 0.5703, 0.0319]) tensor([0.1255, 0.1746, 0.4419, 0.2580]) -Greedy action tensor([ 0.7000, 0.3385, -0.2237, 0.3907]) tensor([0.3536, 0.2464, 0.1404, 0.2596]) -Greedy action tensor([ 0.4733, -0.2165, -0.3941, -0.1794]) tensor([0.4094, 0.2054, 0.1720, 0.2132]) -Greedy action tensor([0.9930, 0.1043, 0.8494, 0.1719]) tensor([0.3680, 0.1513, 0.3188, 0.1619]) -Greedy action tensor([-0.1592, -1.0173, 0.1109, 0.2581]) tensor([0.2352, 0.0997, 0.3081, 0.3570]) -Greedy action tensor([ 1.2786, -0.0461, 0.6549, -0.0493]) tensor([0.4838, 0.1286, 0.2593, 0.1282]) -Greedy action tensor([ 0.7690, -1.4215, 1.0020, -0.0749]) tensor([0.3566, 0.0399, 0.4501, 0.1534]) -Greedy action tensor([ 1.7629, -0.2160, 0.1992, 1.0192]) tensor([0.5486, 0.0758, 0.1148, 0.2608]) -Greedy action tensor([0.4632, 0.8309, 0.1488, 0.5467]) tensor([0.2346, 0.3389, 0.1713, 0.2551]) -Greedy action tensor([ 0.0979, -1.2169, -1.0801, -0.1935]) tensor([0.4304, 0.1156, 0.1325, 0.3216]) -Greedy action tensor([-1.7442, -0.6167, -0.0360, 0.7724]) tensor([0.0455, 0.1404, 0.2509, 0.5632]) -Greedy action tensor([ 0.8248, 0.1542, -0.7021, 0.7915]) tensor([0.3709, 0.1897, 0.0806, 0.3588]) -Greedy action tensor([ 2.6206, -1.0804, -0.2425, 1.1217]) tensor([0.7662, 0.0189, 0.0437, 0.1712]) -Greedy action tensor([ 1.6345, -0.3916, -0.4670, 0.0785]) tensor([0.6826, 0.0900, 0.0835, 0.1440]) -Greedy action tensor([ 1.3667, -0.2492, -0.3251, 0.2099]) tensor([0.5891, 0.1171, 0.1085, 0.1853]) -Greedy action tensor([ 1.1857, -0.8500, -0.0769, 0.2872]) tensor([0.5492, 0.0717, 0.1554, 0.2236]) -Greedy action tensor([ 1.3593, -0.3910, -0.3540, 0.2372]) tensor([0.5954, 0.1034, 0.1073, 0.1939]) -Greedy action tensor([ 1.4776, -0.2675, -0.4555, 0.1984]) tensor([0.6259, 0.1093, 0.0906, 0.1742]) -Greedy action tensor([ 1.5505, -0.3503, -0.4146, 0.4327]) tensor([0.6186, 0.0924, 0.0867, 0.2023]) -Greedy action tensor([ 1.1961, -0.3268, -0.3891, 0.4013]) tensor([0.5334, 0.1163, 0.1093, 0.2409]) -Greedy action tensor([ 0.8591, -0.4361, -0.5468, 0.6897]) tensor([0.4232, 0.1159, 0.1037, 0.3572]) -Greedy action tensor([ 0.3157, -0.1751, 0.2113, 0.1020]) tensor([0.3011, 0.1843, 0.2713, 0.2432]) -Greedy action tensor([ 1.2444, -0.8496, -0.2876, 0.6740]) tensor([0.5250, 0.0647, 0.1135, 0.2968]) -Greedy action tensor([ 1.3013, -0.2896, -0.3711, 0.4761]) tensor([0.5465, 0.1114, 0.1026, 0.2395]) -Greedy action tensor([ 1.0328, -0.3574, -0.2369, 0.1505]) tensor([0.5145, 0.1281, 0.1445, 0.2129]) -Greedy action tensor([ 1.6154, -0.3468, -0.3267, 0.2505]) tensor([0.6496, 0.0913, 0.0932, 0.1659]) -Greedy action tensor([ 1.1342, -0.3293, -0.7530, 0.4881]) tensor([0.5244, 0.1214, 0.0794, 0.2748]) -Greedy action tensor([ 1.2833, -0.5524, -0.1384, 0.0656]) tensor([0.5894, 0.0940, 0.1422, 0.1744]) -Greedy action tensor([ 1.1747, -0.3938, -0.1349, 0.2635]) tensor([0.5318, 0.1108, 0.1436, 0.2138]) -Greedy action tensor([ 1.0934, -0.4199, -0.0367, 0.1425]) tensor([0.5182, 0.1141, 0.1674, 0.2003]) -Greedy action tensor([ 1.1921, -0.5517, -0.2920, 0.3804]) tensor([0.5418, 0.0947, 0.1228, 0.2406]) -Greedy action tensor([ 1.1535, -0.0833, -0.4133, 0.0777]) tensor([0.5435, 0.1578, 0.1134, 0.1853]) -Greedy action tensor([ 0.8875, -0.5678, -0.3720, 0.3116]) tensor([0.4809, 0.1122, 0.1365, 0.2704]) -Greedy action tensor([ 1.6282, -0.8774, -0.3819, 0.7742]) tensor([0.6093, 0.0497, 0.0816, 0.2594]) -Greedy action tensor([ 1.7612, -0.1001, 0.1230, -0.0280]) tensor([0.6593, 0.1025, 0.1281, 0.1102]) -Greedy action tensor([ 1.0851, -0.0849, -0.2048, 0.1586]) tensor([0.5046, 0.1566, 0.1389, 0.1998]) -Greedy action tensor([ 1.1223, -0.5813, -0.4553, 0.7548]) tensor([0.4805, 0.0875, 0.0992, 0.3328]) -Greedy action tensor([ 2.1319, -0.0205, -0.2849, 0.4714]) tensor([0.7166, 0.0833, 0.0639, 0.1362]) -Greedy action tensor([ 1.0523, -0.4823, -0.1275, 0.3172]) tensor([0.4994, 0.1076, 0.1535, 0.2394]) -Greedy action tensor([ 0.7954, -0.2939, 0.0675, -0.3006]) tensor([0.4643, 0.1562, 0.2243, 0.1552]) -Greedy action tensor([ 1.8284, -0.8458, -0.4981, 0.6761]) tensor([0.6745, 0.0465, 0.0659, 0.2131]) -Greedy action tensor([ 0.4424, -0.1812, -0.1705, 0.5040]) tensor([0.3183, 0.1706, 0.1725, 0.3386]) -Greedy action tensor([ 1.0674, -0.3341, -0.1487, 0.2189]) tensor([0.5074, 0.1250, 0.1504, 0.2172]) -Greedy action tensor([ 1.4377, -0.5387, -0.1654, 0.4038]) tensor([0.5898, 0.0817, 0.1187, 0.2098]) -Greedy action tensor([ 1.3989, -0.7878, -0.1491, 0.6860]) tensor([0.5509, 0.0619, 0.1172, 0.2701]) -Greedy action tensor([ 1.3899, -0.1890, -0.2884, 0.3669]) tensor([0.5706, 0.1177, 0.1065, 0.2052]) -Greedy action tensor([ 1.1771, -0.1181, -0.1030, -0.0826]) tensor([0.5448, 0.1492, 0.1515, 0.1546]) -Greedy action tensor([ 0.6921, -0.4778, -0.0123, -0.0890]) tensor([0.4419, 0.1372, 0.2185, 0.2024]) -Greedy action tensor([ 1.2641, -0.0906, -0.2585, 0.2515]) tensor([0.5437, 0.1403, 0.1186, 0.1975]) -Greedy action tensor([ 1.4838, -0.1223, -0.3382, 0.2921]) tensor([0.6002, 0.1204, 0.0971, 0.1823]) -Greedy action tensor([ 1.3515, -0.1682, -0.3304, 0.6439]) tensor([0.5270, 0.1153, 0.0980, 0.2597]) -Greedy action tensor([ 1.2803, -0.2196, -0.5983, 0.6777]) tensor([0.5199, 0.1160, 0.0794, 0.2846]) -Greedy action tensor([ 0.6426, -0.2661, -0.4147, 0.4760]) tensor([0.3851, 0.1552, 0.1338, 0.3260]) -Greedy action tensor([ 1.9064, -0.6797, -0.3088, 0.5678]) tensor([0.6913, 0.0521, 0.0754, 0.1813]) -Greedy action tensor([ 1.1902, -0.4584, -0.1975, 0.0520]) tensor([0.5674, 0.1091, 0.1416, 0.1818]) -Greedy action tensor([ 0.8458, -0.0760, -0.5364, 0.6419]) tensor([0.4058, 0.1614, 0.1019, 0.3309]) -Greedy action tensor([ 1.0734, -0.1729, -0.0398, 0.1125]) tensor([0.5004, 0.1439, 0.1644, 0.1914]) -Greedy action tensor([ 0.9422, 0.0523, -0.0460, -0.2507]) tensor([0.4793, 0.1969, 0.1784, 0.1454]) -Greedy action tensor([ 1.1782, -0.2155, -0.1111, 0.0588]) tensor([0.5405, 0.1341, 0.1489, 0.1765]) -Greedy action tensor([ 0.0288, 0.3172, -0.0344, -0.1289]) tensor([0.2423, 0.3233, 0.2275, 0.2069]) -Greedy action tensor([ 1.4696, -0.2615, -0.4231, 0.3074]) tensor([0.6096, 0.1079, 0.0918, 0.1907]) -Greedy action tensor([ 2.3244e+00, -1.3888e+00, 1.9067e-03, 5.1861e-01]) tensor([0.7771, 0.0190, 0.0762, 0.1277]) -Greedy action tensor([ 1.9431, -0.4272, -0.3727, 0.5769]) tensor([0.6910, 0.0646, 0.0682, 0.1762]) -Greedy action tensor([ 0.7271, -0.3301, 0.1266, 0.1615]) tensor([0.4059, 0.1410, 0.2226, 0.2305]) -Greedy action tensor([ 1.6391, -0.5115, -0.2118, 0.4063]) tensor([0.6390, 0.0744, 0.1004, 0.1863]) -Greedy action tensor([ 1.4243, -0.9510, -0.1554, 0.4310]) tensor([0.5990, 0.0557, 0.1234, 0.2219]) -Greedy action tensor([ 0.8781, 0.0189, -0.0144, 0.0453]) tensor([0.4409, 0.1867, 0.1806, 0.1917]) -Greedy action tensor([ 1.5339, -0.7103, -0.2296, 0.2448]) tensor([0.6439, 0.0683, 0.1104, 0.1774]) -Greedy action tensor([ 3.0468, -1.5775, -0.3029, 0.9245]) tensor([0.8586, 0.0084, 0.0301, 0.1028]) -Greedy action tensor([ 1.1610, -0.8187, -0.1035, 0.3513]) tensor([0.5361, 0.0740, 0.1514, 0.2385]) -Greedy action tensor([ 0.4182, -0.5261, 0.1141, -0.0502]) tensor([0.3633, 0.1413, 0.2680, 0.2274]) -Greedy action tensor([ 1.3761, -0.4912, -0.2587, -0.0559]) tensor([0.6296, 0.0973, 0.1228, 0.1504]) -Greedy action tensor([ 1.2187, -0.6979, -0.1782, 0.2305]) tensor([0.5660, 0.0833, 0.1400, 0.2107]) -Greedy action tensor([ 1.7139, -0.5814, -0.2729, 0.5859]) tensor([0.6404, 0.0645, 0.0878, 0.2073]) -Greedy action tensor([ 1.3556, -0.9578, -0.1986, 0.0729]) tensor([0.6299, 0.0623, 0.1331, 0.1747]) -Greedy action tensor([ 1.5367, -0.2733, -0.5274, 0.7207]) tensor([0.5771, 0.0944, 0.0733, 0.2552]) -Greedy action tensor([ 1.3664, -0.0796, -0.2846, 0.2090]) tensor([0.5742, 0.1352, 0.1102, 0.1805]) -Greedy action tensor([ 1.1591, 0.0747, -0.6421, 0.4588]) tensor([0.5001, 0.1691, 0.0826, 0.2483]) -Greedy action tensor([ 1.8006, -1.0509, -0.2309, 0.3183]) tensor([0.7062, 0.0408, 0.0926, 0.1604]) -Greedy action tensor([ 1.5646, -0.3672, -0.6052, 0.0033]) tensor([0.6808, 0.0986, 0.0777, 0.1429]) -Greedy action tensor([ 1.1125, -0.3813, -0.2378, 0.1843]) tensor([0.5322, 0.1195, 0.1379, 0.2104]) -Greedy action tensor([ 2.0680, -1.1740, -0.2154, 0.6074]) tensor([0.7283, 0.0285, 0.0742, 0.1690]) -Greedy action tensor([ 1.2944, -0.1025, -0.2553, 0.1941]) tensor([0.5579, 0.1380, 0.1185, 0.1857]) -Greedy action tensor([ 1.6016, -0.3506, -0.7398, 0.6621]) tensor([0.6139, 0.0871, 0.0590, 0.2399]) -Greedy action tensor([ 2.1788, -0.7424, -0.2130, 0.5595]) tensor([0.7444, 0.0401, 0.0681, 0.1474]) -Greedy action tensor([ 1.1002, -0.4995, -0.5420, 0.4136]) tensor([0.5267, 0.1064, 0.1019, 0.2650]) -Greedy action tensor([ 1.2636, -0.4790, 0.0087, -0.0534]) tensor([0.5787, 0.1013, 0.1650, 0.1550]) -Greedy action tensor([ 1.2200, -0.4333, -0.2256, -0.0644]) tensor([0.5869, 0.1123, 0.1383, 0.1625]) -Greedy action tensor([ 1.2291, -0.4812, -0.1925, 0.3020]) tensor([0.5501, 0.0995, 0.1328, 0.2177]) -Greedy action tensor([ 1.4227, -0.3292, -0.1127, 0.2868]) tensor([0.5848, 0.1014, 0.1259, 0.1878]) -Greedy action tensor([ 1.5762, -0.5333, -0.6239, 0.6996]) tensor([0.6067, 0.0736, 0.0672, 0.2525]) -Greedy action tensor([ 0.8688, -0.2993, 0.0487, 0.0813]) tensor([0.4533, 0.1409, 0.1996, 0.2062]) -Greedy action tensor([ 1.5156, -0.9103, -0.2731, 0.8086]) tensor([0.5719, 0.0506, 0.0956, 0.2820]) -Greedy action tensor([-1.2121, 0.0142, 0.3175, -0.0818]) tensor([0.0825, 0.2812, 0.3809, 0.2555]) -Greedy action tensor([-1.2717, -0.6882, 1.0423, 1.1212]) tensor([0.0419, 0.0751, 0.4240, 0.4589]) -Greedy action tensor([-1.9644, -0.8246, 0.2719, -0.2007]) tensor([0.0518, 0.1618, 0.4844, 0.3020]) -Greedy action tensor([-1.4821, -0.5746, 0.4967, 0.1260]) tensor([0.0637, 0.1578, 0.4606, 0.3179]) -Greedy action tensor([-1.5028, -0.5755, 0.6134, 0.3749]) tensor([0.0544, 0.1376, 0.4519, 0.3560]) -Greedy action tensor([-1.8356, -0.3950, 0.5905, -0.1294]) tensor([0.0454, 0.1916, 0.5132, 0.2498]) -Greedy action tensor([-1.9091, -0.4606, 0.6461, -0.1555]) tensor([0.0418, 0.1781, 0.5385, 0.2416]) -Greedy action tensor([-1.3914, -0.4284, 0.4291, -0.0567]) tensor([0.0736, 0.1927, 0.4543, 0.2795]) -Greedy action tensor([-1.7935, -0.1258, 0.5341, -0.0974]) tensor([0.0454, 0.2408, 0.4659, 0.2478]) -Greedy action tensor([-0.3383, -0.4356, 1.1476, 1.2794]) tensor([0.0880, 0.0798, 0.3887, 0.4435]) -Greedy action tensor([-1.7341, -0.3731, 0.5471, -0.0508]) tensor([0.0498, 0.1943, 0.4877, 0.2682]) -Greedy action tensor([-1.5936, -0.2220, 0.4620, -0.0199]) tensor([0.0569, 0.2242, 0.4444, 0.2745]) -Greedy action tensor([-1.8642, -0.4633, 0.6328, -0.1080]) tensor([0.0435, 0.1765, 0.5282, 0.2518]) -Greedy action tensor([-1.8813, -0.4688, 0.6343, -0.1463]) tensor([0.0432, 0.1774, 0.5345, 0.2449]) -Greedy action tensor([-0.9781, -0.3737, 0.6816, 1.2876]) tensor([0.0564, 0.1033, 0.2966, 0.5437]) -Greedy action tensor([-1.9044, -0.4335, 0.6465, -0.1592]) tensor([0.0418, 0.1822, 0.5364, 0.2396]) -Greedy action tensor([-1.9135, -0.4331, 0.6461, -0.1658]) tensor([0.0415, 0.1826, 0.5373, 0.2386]) -Greedy action tensor([-1.3516, -0.6031, 0.3411, 0.1627]) tensor([0.0764, 0.1614, 0.4150, 0.3472]) -Greedy action tensor([-1.9179, -0.4525, 0.6511, -0.1706]) tensor([0.0415, 0.1795, 0.5411, 0.2379]) -Greedy action tensor([-1.8185, -0.4043, 0.6292, -0.0857]) tensor([0.0448, 0.1842, 0.5177, 0.2533]) -Greedy action tensor([-1.7981, -0.4463, 0.6355, -0.0545]) tensor([0.0455, 0.1758, 0.5186, 0.2601]) -Greedy action tensor([-1.1144, -0.4517, 0.2327, 0.4560]) tensor([0.0862, 0.1673, 0.3317, 0.4147]) -Greedy action tensor([-1.7219, -0.0204, 0.5210, 0.0817]) tensor([0.0455, 0.2495, 0.4287, 0.2763]) -Greedy action tensor([-0.9092, -0.6755, 0.6601, 1.3775]) tensor([0.0591, 0.0747, 0.2841, 0.5821]) -Greedy action tensor([-1.7463, -0.2893, 0.5267, -0.0426]) tensor([0.0488, 0.2095, 0.4737, 0.2681]) -Greedy action tensor([-1.9006, -0.4352, 0.6409, -0.1597]) tensor([0.0421, 0.1824, 0.5351, 0.2403]) -Greedy action tensor([-1.9017, -0.3780, 0.6447, -0.1467]) tensor([0.0414, 0.1902, 0.5288, 0.2396]) -Greedy action tensor([-1.9011, -0.3779, 0.6388, -0.1500]) tensor([0.0416, 0.1909, 0.5277, 0.2398]) -Greedy action tensor([-0.1798, -0.0494, 0.0099, 0.6159]) tensor([0.1797, 0.2048, 0.2173, 0.3983]) -Greedy action tensor([-1.7045, -0.4818, 0.6800, 0.1859]) tensor([0.0457, 0.1553, 0.4962, 0.3028]) -Greedy action tensor([-1.8699, -0.3107, 0.6108, -0.1279]) tensor([0.0427, 0.2031, 0.5104, 0.2438]) -Greedy action tensor([-1.8228, -0.2502, 0.5769, -0.0963]) tensor([0.0445, 0.2146, 0.4907, 0.2503]) -Greedy action tensor([-0.7760, 0.3387, 0.1568, -0.0568]) tensor([0.1157, 0.3527, 0.2941, 0.2375]) -Greedy action tensor([-1.8257, -0.4720, 0.7092, 0.0415]) tensor([0.0417, 0.1616, 0.5266, 0.2701]) -Greedy action tensor([-1.8895, -0.4749, 0.6328, -0.1642]) tensor([0.0431, 0.1775, 0.5372, 0.2421]) -Greedy action tensor([-1.8480, -0.4228, 0.6085, -0.1244]) tensor([0.0446, 0.1854, 0.5201, 0.2499]) -Greedy action tensor([-1.7535, -0.4848, 0.5797, -0.0324]) tensor([0.0489, 0.1738, 0.5040, 0.2733]) -Greedy action tensor([-0.7901, 0.9699, 0.2363, 1.0820]) tensor([0.0621, 0.3609, 0.1733, 0.4037]) -Greedy action tensor([-1.3426, -0.5535, 0.4625, 0.3171]) tensor([0.0688, 0.1514, 0.4182, 0.3616]) -Greedy action tensor([-1.7188, -0.5378, 0.5576, -0.0484]) tensor([0.0518, 0.1687, 0.5044, 0.2752]) -Greedy action tensor([-1.8954, -0.4706, 0.6619, -0.1381]) tensor([0.0419, 0.1743, 0.5408, 0.2430]) -Greedy action tensor([-0.6872, -0.2416, 0.2459, 1.0533]) tensor([0.0926, 0.1445, 0.2353, 0.5276]) -Greedy action tensor([-1.7375, -0.4721, 0.5869, -0.0622]) tensor([0.0497, 0.1763, 0.5084, 0.2656]) -Greedy action tensor([-1.8538, -0.4623, 0.6383, -0.1120]) tensor([0.0438, 0.1762, 0.5298, 0.2502]) -Greedy action tensor([-1.2802, -0.4554, 0.9582, 1.1319]) tensor([0.0420, 0.0958, 0.3938, 0.4684]) -Greedy action tensor([-1.5089, -0.1256, 0.7673, 0.3961]) tensor([0.0466, 0.1860, 0.4541, 0.3133]) -Greedy action tensor([-0.9642, -0.5397, 0.4071, 0.7436]) tensor([0.0834, 0.1275, 0.3288, 0.4603]) -Greedy action tensor([-0.4241, -0.2646, 0.1539, 0.1696]) tensor([0.1734, 0.2034, 0.3091, 0.3140]) -Greedy action tensor([-1.4822, 0.1832, 0.4310, 0.3750]) tensor([0.0514, 0.2716, 0.3480, 0.3290]) -Greedy action tensor([-1.8664, -0.2762, 0.5969, -0.1329]) tensor([0.0429, 0.2104, 0.5038, 0.2428]) -Greedy action tensor([-1.9124, -0.4346, 0.6491, -0.1673]) tensor([0.0416, 0.1821, 0.5383, 0.2380]) -Greedy action tensor([-1.9241, -0.4239, 0.6558, -0.1664]) tensor([0.0409, 0.1831, 0.5391, 0.2369]) -Greedy action tensor([-1.6577, -0.5707, 0.5042, -0.0349]) tensor([0.0564, 0.1673, 0.4903, 0.2860]) -Greedy action tensor([-1.8382, -0.4035, 0.6067, -0.1328]) tensor([0.0450, 0.1888, 0.5186, 0.2476]) -Greedy action tensor([-1.7424, -0.4854, 0.6324, 0.0728]) tensor([0.0467, 0.1642, 0.5021, 0.2869]) -Greedy action tensor([-1.5301, -0.5514, 0.5514, 0.3235]) tensor([0.0554, 0.1473, 0.4439, 0.3534]) -Greedy action tensor([-1.9092, -0.3989, 0.6438, -0.1558]) tensor([0.0414, 0.1875, 0.5319, 0.2391]) -Greedy action tensor([-1.8788, -0.3607, 0.6255, -0.1275]) tensor([0.0424, 0.1937, 0.5193, 0.2446]) -Greedy action tensor([-1.9271, -0.4617, 0.6695, -0.1566]) tensor([0.0406, 0.1758, 0.5450, 0.2386]) -Greedy action tensor([-1.3012, -0.5916, 0.3610, 0.1288]) tensor([0.0801, 0.1629, 0.4223, 0.3347]) -Greedy action tensor([-1.8678, -0.4477, 0.6702, -0.0988]) tensor([0.0423, 0.1749, 0.5349, 0.2479]) -Greedy action tensor([-0.8013, -0.3513, 1.0597, 1.4093]) tensor([0.0552, 0.0866, 0.3549, 0.5034]) -Greedy action tensor([-1.8864, -0.4730, 0.6397, -0.1422]) tensor([0.0429, 0.1761, 0.5359, 0.2452]) -Greedy action tensor([-1.8099, -0.3393, 0.6138, -0.0669]) tensor([0.0447, 0.1947, 0.5050, 0.2556]) -Greedy action tensor([-1.9043, -0.3928, 0.6332, -0.1549]) tensor([0.0418, 0.1894, 0.5285, 0.2403]) -Greedy action tensor([-1.8882, -0.4681, 0.6360, -0.1587]) tensor([0.0430, 0.1779, 0.5367, 0.2424]) -Greedy action tensor([-1.0346, -0.6071, 0.2292, 0.2950]) tensor([0.1015, 0.1556, 0.3592, 0.3836]) -Greedy action tensor([-1.2006, -0.5773, 0.6440, 0.8123]) tensor([0.0600, 0.1118, 0.3793, 0.4488]) -Greedy action tensor([-1.2436, 0.1075, 0.5930, 0.9063]) tensor([0.0507, 0.1958, 0.3182, 0.4353]) -Greedy action tensor([-1.6252, -0.4203, 0.5156, 0.0375]) tensor([0.0552, 0.1842, 0.4695, 0.2911]) -Greedy action tensor([-1.7049, -0.4543, 0.5630, -0.1308]) tensor([0.0527, 0.1840, 0.5090, 0.2543]) -Greedy action tensor([-1.8328, -0.4902, 0.6176, -0.1453]) tensor([0.0458, 0.1754, 0.5311, 0.2477]) -Greedy action tensor([-1.6080, -0.2178, 0.4617, 0.0046]) tensor([0.0557, 0.2237, 0.4413, 0.2794]) -Greedy action tensor([-1.9102, -0.4112, 0.6502, -0.1549]) tensor([0.0413, 0.1850, 0.5347, 0.2390]) -Greedy action tensor([-1.8403, -0.2861, 0.5921, -0.1403]) tensor([0.0443, 0.2094, 0.5040, 0.2423]) -Greedy action tensor([-0.9915, 0.0146, 0.5129, 0.4133]) tensor([0.0812, 0.2222, 0.3656, 0.3310]) -Greedy action tensor([-1.8287, -0.4835, 0.6025, -0.1391]) tensor([0.0462, 0.1775, 0.5258, 0.2505]) -Greedy action tensor([-1.8905, -0.4146, 0.6385, -0.1406]) tensor([0.0422, 0.1848, 0.5298, 0.2431]) -Greedy action tensor([-1.9304, -0.4556, 0.6915, -0.1190]) tensor([0.0396, 0.1731, 0.5450, 0.2423]) -Greedy action tensor([-1.9124, -0.4409, 0.6612, -0.1510]) tensor([0.0412, 0.1793, 0.5399, 0.2396]) -Greedy action tensor([-1.4648, -0.3895, 0.4208, 0.1614]) tensor([0.0641, 0.1878, 0.4223, 0.3258]) -Greedy action tensor([ 0.5604, -0.3041, -0.0451, -0.1463]) tensor([0.4065, 0.1712, 0.2218, 0.2005]) -Greedy action tensor([ 0.3011, -0.2582, -0.0906, -0.1250]) tensor([0.3448, 0.1971, 0.2330, 0.2251]) -Greedy action tensor([ 0.4479, 0.1306, -0.1340, -0.0690]) tensor([0.3468, 0.2525, 0.1938, 0.2068]) -Greedy action tensor([ 0.8711, -1.1579, -0.1111, -0.5202]) tensor([0.5699, 0.0749, 0.2134, 0.1418]) -Greedy action tensor([ 0.4615, -0.4717, -0.1513, -0.2642]) tensor([0.4134, 0.1626, 0.2240, 0.2001]) -Greedy action tensor([ 0.4413, -0.1651, 0.0605, -0.1456]) tensor([0.3591, 0.1958, 0.2454, 0.1997]) -Greedy action tensor([ 0.6185, -0.7240, 0.1671, -0.6546]) tensor([0.4592, 0.1199, 0.2924, 0.1285]) -Greedy action tensor([ 0.7384, -0.5837, -0.0714, -0.3942]) tensor([0.4917, 0.1311, 0.2188, 0.1584]) -Greedy action tensor([ 0.7267, -0.6175, -0.1899, -0.0997]) tensor([0.4766, 0.1243, 0.1906, 0.2086]) -Greedy action tensor([ 0.5317, -0.1591, -0.0590, -0.1018]) tensor([0.3867, 0.1938, 0.2142, 0.2052]) -Greedy action tensor([ 1.3033, -0.4683, 0.2275, -0.7124]) tensor([0.6082, 0.1034, 0.2074, 0.0810]) -Greedy action tensor([ 0.6844, -0.2206, -0.0797, -0.4596]) tensor([0.4569, 0.1848, 0.2128, 0.1455]) -Greedy action tensor([ 0.4074, -0.4183, 0.0082, -0.1839]) tensor([0.3756, 0.1645, 0.2520, 0.2079]) -Greedy action tensor([ 0.9303, -0.8458, 0.1326, -0.5161]) tensor([0.5391, 0.0913, 0.2428, 0.1269]) -Greedy action tensor([ 0.7416, -0.4225, -0.0597, -0.3409]) tensor([0.4763, 0.1487, 0.2137, 0.1613]) -Greedy action tensor([ 0.2709, 0.0492, 0.0022, -0.2273]) tensor([0.3151, 0.2525, 0.2409, 0.1915]) -Greedy action tensor([ 0.8199, -0.4070, 0.0600, -0.5506]) tensor([0.4963, 0.1455, 0.2321, 0.1261]) -Greedy action tensor([ 0.5544, -0.2694, 0.0756, -0.6032]) tensor([0.4215, 0.1849, 0.2611, 0.1324]) -Greedy action tensor([ 0.9028, -0.7236, -0.1975, -0.5014]) tensor([0.5634, 0.1108, 0.1875, 0.1383]) -Greedy action tensor([ 0.7879, -0.4635, -0.0724, -0.4490]) tensor([0.5001, 0.1431, 0.2116, 0.1452]) -Greedy action tensor([ 0.4771, -0.1449, -0.1381, -0.3266]) tensor([0.3960, 0.2126, 0.2141, 0.1773]) -Greedy action tensor([ 0.7861, -0.5614, 0.0889, -0.3765]) tensor([0.4830, 0.1255, 0.2405, 0.1510]) -Greedy action tensor([ 0.4092, 0.0966, -0.1146, -0.0447]) tensor([0.3380, 0.2472, 0.2002, 0.2147]) -Greedy action tensor([ 0.9646, -0.4394, -0.1957, -0.4830]) tensor([0.5574, 0.1369, 0.1747, 0.1311]) -Greedy action tensor([ 0.3303, 0.3620, -0.1178, 0.1120]) tensor([0.2878, 0.2970, 0.1838, 0.2313]) -Greedy action tensor([ 0.6236, -0.3512, 0.0863, -0.4368]) tensor([0.4333, 0.1635, 0.2532, 0.1501]) -Greedy action tensor([ 0.8957, -0.3647, -0.0673, -0.5617]) tensor([0.5268, 0.1494, 0.2011, 0.1227]) -Greedy action tensor([ 0.9776, -0.6797, -0.0749, -0.5732]) tensor([0.5708, 0.1088, 0.1993, 0.1211]) -Greedy action tensor([ 0.7786, -0.4017, -0.0810, -0.3008]) tensor([0.4830, 0.1484, 0.2045, 0.1641]) -Greedy action tensor([ 0.3180, -0.0059, -0.1132, -0.4851]) tensor([0.3545, 0.2564, 0.2303, 0.1588]) -Greedy action tensor([ 7.1686e-01, -4.6744e-01, 1.3888e-04, -4.1985e-01]) tensor([0.4728, 0.1446, 0.2309, 0.1517]) -Greedy action tensor([ 0.3536, -0.0919, 0.0420, -0.0989]) tensor([0.3324, 0.2129, 0.2434, 0.2114]) -Greedy action tensor([ 0.4523, -0.2859, -0.0150, -0.1578]) tensor([0.3776, 0.1805, 0.2367, 0.2052]) -Greedy action tensor([ 0.5776, -0.1627, 0.0767, -0.2719]) tensor([0.3983, 0.1900, 0.2414, 0.1703]) -Greedy action tensor([ 0.7183, -0.5043, -0.0938, -0.3798]) tensor([0.4827, 0.1421, 0.2143, 0.1610]) -Greedy action tensor([ 0.7972, -0.3111, -0.0856, -0.4115]) tensor([0.4896, 0.1616, 0.2025, 0.1462]) -Greedy action tensor([ 0.8237, -1.1729, 0.0099, -0.4527]) tensor([0.5382, 0.0731, 0.2385, 0.1502]) -Greedy action tensor([ 0.6627, -0.2384, 0.0651, -0.2534]) tensor([0.4244, 0.1724, 0.2335, 0.1698]) -Greedy action tensor([ 0.7081, -0.1698, 0.1506, -0.1848]) tensor([0.4171, 0.1733, 0.2388, 0.1708]) -Greedy action tensor([ 0.8763, -0.4681, 0.0438, -0.4823]) tensor([0.5121, 0.1335, 0.2228, 0.1316]) -Greedy action tensor([ 0.7829, -0.5676, -0.0850, -0.3608]) tensor([0.5006, 0.1297, 0.2102, 0.1595]) -Greedy action tensor([ 0.8992, -0.1815, -0.1813, -0.3704]) tensor([0.5103, 0.1732, 0.1732, 0.1434]) -Greedy action tensor([ 0.9412, -0.4917, -0.1455, -0.2673]) tensor([0.5334, 0.1273, 0.1799, 0.1593]) -Greedy action tensor([ 1.4161, -0.5504, -0.0814, -0.2682]) tensor([0.6455, 0.0903, 0.1444, 0.1198]) -Greedy action tensor([ 0.6844, -0.4056, 0.0200, -0.1807]) tensor([0.4402, 0.1480, 0.2265, 0.1853]) -Greedy action tensor([ 0.6318, -0.3226, 0.0828, -0.7225]) tensor([0.4503, 0.1734, 0.2601, 0.1162]) -Greedy action tensor([ 0.8910, -0.4489, 0.0439, -0.3788]) tensor([0.5073, 0.1328, 0.2174, 0.1425]) -Greedy action tensor([ 0.5320, -0.4317, 0.0119, -0.2430]) tensor([0.4104, 0.1566, 0.2440, 0.1891]) -Greedy action tensor([ 1.0750, -0.4995, 0.0501, -0.4557]) tensor([0.5611, 0.1162, 0.2013, 0.1214]) -Greedy action tensor([ 0.8933, -0.5416, -0.0084, -0.3435]) tensor([0.5170, 0.1231, 0.2098, 0.1501]) -Greedy action tensor([ 0.7691, -0.6182, -0.0793, -0.5935]) tensor([0.5171, 0.1291, 0.2214, 0.1324]) -Greedy action tensor([ 0.4958, -0.3901, 0.0006, -0.4990]) tensor([0.4181, 0.1724, 0.2548, 0.1546]) -Greedy action tensor([ 0.7979, -0.3513, -0.0968, -0.6454]) tensor([0.5097, 0.1615, 0.2083, 0.1204]) -Greedy action tensor([ 0.8625, -0.5764, 0.0163, -0.5674]) tensor([0.5248, 0.1245, 0.2252, 0.1256]) -Greedy action tensor([ 0.9035, -0.1957, 0.0636, -0.3107]) tensor([0.4850, 0.1616, 0.2094, 0.1440]) -Greedy action tensor([ 0.5725, -0.1310, -0.1545, -0.3516]) tensor([0.4210, 0.2083, 0.2035, 0.1671]) -Greedy action tensor([ 0.7575, -0.4248, -0.0991, -0.2252]) tensor([0.4750, 0.1456, 0.2017, 0.1778]) -Greedy action tensor([ 0.4453, -0.2863, -0.0642, -0.1015]) tensor([0.3758, 0.1808, 0.2258, 0.2175]) -Greedy action tensor([ 0.4341, -0.0256, -0.0742, 0.0268]) tensor([0.3450, 0.2179, 0.2075, 0.2296]) -Greedy action tensor([ 0.8680, -0.5801, -0.0273, -0.3017]) tensor([0.5118, 0.1203, 0.2090, 0.1589]) -Greedy action tensor([ 0.6735, -0.4135, -0.1341, -0.3871]) tensor([0.4696, 0.1584, 0.2094, 0.1626]) -Greedy action tensor([ 0.6471, -0.5317, -0.0282, -0.4263]) tensor([0.4633, 0.1425, 0.2358, 0.1584]) -Greedy action tensor([ 0.6513, -0.6193, 0.1098, -0.7078]) tensor([0.4718, 0.1324, 0.2745, 0.1212]) -Greedy action tensor([ 0.9430, -0.9209, 0.0815, -0.4267]) tensor([0.5459, 0.0847, 0.2307, 0.1388]) -Greedy action tensor([ 0.7797, -0.7746, 0.0692, -0.3663]) tensor([0.4949, 0.1046, 0.2432, 0.1573]) -Greedy action tensor([ 0.9683, -0.4083, -0.1241, -0.7164]) tensor([0.5639, 0.1423, 0.1891, 0.1046]) -Greedy action tensor([ 0.6784, -0.4992, -0.0677, -0.3396]) tensor([0.4665, 0.1437, 0.2212, 0.1686]) -Greedy action tensor([ 1.0096, -0.7612, 0.0561, -0.5463]) tensor([0.5661, 0.0963, 0.2182, 0.1194]) -Greedy action tensor([ 0.5645, 0.0316, 0.0869, -0.1236]) tensor([0.3690, 0.2166, 0.2289, 0.1855]) -Greedy action tensor([ 0.4939, -0.0312, 0.0037, -0.1922]) tensor([0.3693, 0.2185, 0.2262, 0.1860]) -Greedy action tensor([ 0.9267, -0.3017, -0.0454, -0.1427]) tensor([0.4965, 0.1453, 0.1878, 0.1704]) -Greedy action tensor([ 0.3639, 0.3345, -0.1309, 0.0643]) tensor([0.3010, 0.2923, 0.1835, 0.2231]) -Greedy action tensor([ 0.7552, -0.2711, 0.0393, -0.5166]) tensor([0.4701, 0.1684, 0.2297, 0.1318]) -Greedy action tensor([ 0.3829, -0.1177, -0.1671, -0.1482]) tensor([0.3609, 0.2188, 0.2082, 0.2122]) -Greedy action tensor([ 0.2096, 0.0865, -0.1197, -0.2641]) tensor([0.3100, 0.2741, 0.2230, 0.1930]) -Greedy action tensor([ 0.7844, -0.3489, 0.0204, -0.5020]) tensor([0.4845, 0.1560, 0.2257, 0.1338]) -Greedy action tensor([ 1.0818, -0.8262, -0.1457, -0.5303]) tensor([0.6094, 0.0904, 0.1786, 0.1216]) -Greedy action tensor([ 0.8460, -0.2998, 0.0031, -0.5487]) tensor([0.5009, 0.1593, 0.2156, 0.1242]) -Greedy action tensor([ 1.1240, -0.8205, -0.1641, -0.5845]) tensor([0.6250, 0.0894, 0.1724, 0.1132]) -Greedy action tensor([ 0.9081, -0.6074, 0.0783, -0.4175]) tensor([0.5204, 0.1143, 0.2270, 0.1382]) -Greedy action tensor([ 0.7579, -0.4852, 0.1425, -0.6429]) tensor([0.4818, 0.1390, 0.2604, 0.1187]) -Greedy action tensor([ 1.4402, -1.2472, -0.2045, 0.6749]) tensor([0.5793, 0.0394, 0.1118, 0.2695]) -Greedy action tensor([ 2.2098, -0.8326, -0.2604, 0.5796]) tensor([0.7529, 0.0359, 0.0637, 0.1475]) -Greedy action tensor([ 1.3744, -0.6547, -0.4300, 0.1828]) tensor([0.6251, 0.0822, 0.1029, 0.1899]) -Greedy action tensor([ 1.2323, -0.5235, -0.2148, 0.2923]) tensor([0.5560, 0.0961, 0.1308, 0.2172]) -Greedy action tensor([ 1.2980, -0.6392, -0.2488, 0.3826]) tensor([0.5690, 0.0820, 0.1212, 0.2278]) -Greedy action tensor([ 0.7894, -0.4979, -0.0164, -0.0434]) tensor([0.4635, 0.1279, 0.2071, 0.2015]) -Greedy action tensor([ 1.2678, -0.6688, -0.2488, 0.6102]) tensor([0.5314, 0.0766, 0.1166, 0.2753]) -Greedy action tensor([ 0.8359, -0.4230, 0.0470, 0.1727]) tensor([0.4438, 0.1260, 0.2016, 0.2286]) -Greedy action tensor([ 1.5675, -0.4829, -0.3398, 0.4599]) tensor([0.6221, 0.0800, 0.0924, 0.2055]) -Greedy action tensor([ 1.2311, -0.3095, -0.3508, 0.3675]) tensor([0.5430, 0.1163, 0.1116, 0.2290]) -Greedy action tensor([ 1.3918, -0.5483, -0.6188, 0.7163]) tensor([0.5598, 0.0804, 0.0750, 0.2849]) -Greedy action tensor([ 1.4224, -0.1425, -0.6639, 0.2358]) tensor([0.6103, 0.1276, 0.0758, 0.1863]) -Greedy action tensor([ 1.5941, -0.4956, -0.2106, 0.3843]) tensor([0.6303, 0.0780, 0.1037, 0.1880]) -Greedy action tensor([ 1.8650, -0.9897, -0.2619, 0.5676]) tensor([0.6896, 0.0397, 0.0822, 0.1884]) -Greedy action tensor([ 1.9888, -0.6893, -0.3238, 0.9012]) tensor([0.6646, 0.0457, 0.0658, 0.2240]) -Greedy action tensor([ 1.4822, -0.7150, -0.0946, 0.4997]) tensor([0.5910, 0.0657, 0.1221, 0.2212]) -Greedy action tensor([ 1.2192, -0.6794, -0.3841, 0.5472]) tensor([0.5372, 0.0805, 0.1081, 0.2743]) -Greedy action tensor([ 1.3508, -0.6808, -0.0736, 0.7059]) tensor([0.5273, 0.0691, 0.1269, 0.2767]) -Greedy action tensor([ 1.2076, -0.3065, -0.6648, 0.3824]) tensor([0.5519, 0.1214, 0.0849, 0.2418]) -Greedy action tensor([ 1.6350, -0.6996, -0.1381, 0.2579]) tensor([0.6583, 0.0638, 0.1118, 0.1661]) -Greedy action tensor([ 1.5347, -0.4357, -0.6502, 0.2265]) tensor([0.6570, 0.0916, 0.0739, 0.1776]) -Greedy action tensor([ 1.1480, -0.3715, -0.0884, 0.2175]) tensor([0.5253, 0.1149, 0.1526, 0.2072]) -Greedy action tensor([ 1.8099, -0.6178, -0.4897, 1.0302]) tensor([0.6071, 0.0536, 0.0609, 0.2784]) -Greedy action tensor([ 1.4282, 0.1418, -0.8471, 0.3135]) tensor([0.5858, 0.1618, 0.0602, 0.1922]) -Greedy action tensor([ 0.9990, -0.5085, -0.2397, 0.4090]) tensor([0.4841, 0.1072, 0.1403, 0.2684]) -Greedy action tensor([ 1.1770, -0.7026, -0.1318, 0.1078]) tensor([0.5662, 0.0864, 0.1530, 0.1944]) -Greedy action tensor([ 1.2767, -0.3177, 0.0201, 0.0172]) tensor([0.5645, 0.1146, 0.1607, 0.1602]) -Greedy action tensor([ 1.2804, -0.3164, -0.2225, 0.2865]) tensor([0.5571, 0.1128, 0.1239, 0.2062]) -Greedy action tensor([ 1.6358, -0.2619, -0.2042, 0.7650]) tensor([0.5789, 0.0868, 0.0919, 0.2423]) -Greedy action tensor([ 0.9997, -0.3092, -0.4943, 0.2016]) tensor([0.5142, 0.1389, 0.1154, 0.2315]) -Greedy action tensor([ 0.8175, -0.2555, -0.1563, 0.5172]) tensor([0.4065, 0.1390, 0.1535, 0.3010]) -Greedy action tensor([ 1.2983, -0.6395, -0.3325, 0.6055]) tensor([0.5435, 0.0783, 0.1064, 0.2718]) -Greedy action tensor([ 0.7577, -0.5071, -0.0434, -0.3141]) tensor([0.4823, 0.1361, 0.2165, 0.1651]) -Greedy action tensor([ 1.6015, -0.7109, -0.2237, 0.2928]) tensor([0.6534, 0.0647, 0.1053, 0.1765]) -Greedy action tensor([ 1.2290, -0.1242, -0.7826, 0.3797]) tensor([0.5495, 0.1420, 0.0735, 0.2350]) -Greedy action tensor([ 1.3424, -0.6138, -0.0745, 0.2893]) tensor([0.5771, 0.0816, 0.1399, 0.2013]) -Greedy action tensor([ 0.9397, -0.4062, -0.4369, 0.3180]) tensor([0.4879, 0.1270, 0.1232, 0.2620]) -Greedy action tensor([ 1.3946, -0.6047, -0.3481, 0.3772]) tensor([0.5981, 0.0810, 0.1047, 0.2162]) -Greedy action tensor([ 1.2083, -0.4524, -0.1860, 0.1983]) tensor([0.5549, 0.1054, 0.1376, 0.2021]) -Greedy action tensor([ 1.1275, -0.0075, -0.0801, 0.2806]) tensor([0.4880, 0.1569, 0.1459, 0.2092]) -Greedy action tensor([ 1.3900, -0.1332, -0.3002, 0.1790]) tensor([0.5881, 0.1282, 0.1085, 0.1752]) -Greedy action tensor([ 0.8742, -0.2167, -0.1640, 0.2406]) tensor([0.4503, 0.1513, 0.1594, 0.2390]) -Greedy action tensor([ 1.4870, 0.2267, -0.2472, 0.2095]) tensor([0.5751, 0.1631, 0.1015, 0.1603]) -Greedy action tensor([ 0.8860, -0.2420, -0.2428, 0.3534]) tensor([0.4476, 0.1449, 0.1448, 0.2628]) -Greedy action tensor([ 1.0473, -0.1112, -0.1766, 0.3688]) tensor([0.4727, 0.1484, 0.1390, 0.2399]) -Greedy action tensor([ 0.6528, -0.4249, -0.1455, 0.0591]) tensor([0.4268, 0.1453, 0.1921, 0.2357]) -Greedy action tensor([ 1.9464, -1.3472, -0.5452, 0.9258]) tensor([0.6755, 0.0251, 0.0559, 0.2435]) -Greedy action tensor([ 0.8375, -0.3732, -0.4235, 0.3005]) tensor([0.4617, 0.1376, 0.1308, 0.2699]) -Greedy action tensor([ 1.3443, -0.1860, -0.7472, 0.2581]) tensor([0.5961, 0.1290, 0.0736, 0.2012]) -Greedy action tensor([ 1.5650, -0.6619, -0.1441, 0.4302]) tensor([0.6210, 0.0670, 0.1124, 0.1996]) -Greedy action tensor([ 2.7894, -0.7409, -0.0639, 0.5136]) tensor([0.8406, 0.0246, 0.0485, 0.0863]) -Greedy action tensor([ 1.0702, 0.0329, -0.3215, 0.3962]) tensor([0.4733, 0.1678, 0.1177, 0.2412]) -Greedy action tensor([ 1.1952, -0.3542, -0.2047, 0.1095]) tensor([0.5566, 0.1182, 0.1373, 0.1879]) -Greedy action tensor([ 1.5681, -0.5862, -0.1663, 0.1248]) tensor([0.6542, 0.0759, 0.1155, 0.1545]) -Greedy action tensor([ 1.6236, -0.4106, -0.3844, 0.2112]) tensor([0.6629, 0.0867, 0.0890, 0.1615]) -Greedy action tensor([ 0.8888, -0.3657, -0.1222, 0.0974]) tensor([0.4757, 0.1357, 0.1731, 0.2156]) -Greedy action tensor([ 1.0710, -0.0632, -0.2491, 0.4103]) tensor([0.4750, 0.1528, 0.1269, 0.2453]) -Greedy action tensor([ 1.4304, -0.6081, -0.1115, 0.3634]) tensor([0.5923, 0.0771, 0.1267, 0.2038]) -Greedy action tensor([ 1.5759, -0.5090, -0.1911, 0.3167]) tensor([0.6333, 0.0787, 0.1082, 0.1798]) -Greedy action tensor([ 1.5635, -0.2888, -0.1644, 0.3535]) tensor([0.6125, 0.0961, 0.1088, 0.1826]) -Greedy action tensor([ 0.7451, -0.2068, -0.0884, 0.1971]) tensor([0.4169, 0.1609, 0.1812, 0.2410]) -Greedy action tensor([ 1.3728, -0.6738, -0.5287, 0.8551]) tensor([0.5335, 0.0689, 0.0797, 0.3179]) -Greedy action tensor([ 1.3094, -0.3382, -0.1413, 0.7621]) tensor([0.4987, 0.0960, 0.1169, 0.2885]) -Greedy action tensor([ 2.0139, -0.6299, 0.2173, 0.9914]) tensor([0.6263, 0.0445, 0.1039, 0.2253]) -Greedy action tensor([ 1.3295, -0.2406, -0.2652, 0.2619]) tensor([0.5699, 0.1185, 0.1157, 0.1959]) -Greedy action tensor([ 1.1040, -0.3466, -0.1104, 0.1017]) tensor([0.5268, 0.1235, 0.1564, 0.1933]) -Greedy action tensor([ 0.0669, -0.0387, -0.2480, 0.1885]) tensor([0.2660, 0.2394, 0.1942, 0.3004]) -Greedy action tensor([-0.0566, 0.2208, -0.5238, 0.3928]) tensor([0.2215, 0.2924, 0.1389, 0.3472]) -Greedy action tensor([ 1.3447, 0.0573, -0.1213, 0.5848]) tensor([0.5065, 0.1398, 0.1169, 0.2369]) -Greedy action tensor([ 1.3288, -0.1886, -0.2959, 0.1410]) tensor([0.5810, 0.1274, 0.1144, 0.1772]) -Greedy action tensor([ 1.3859, -0.7788, -0.2893, 0.5619]) tensor([0.5745, 0.0659, 0.1076, 0.2520]) -Greedy action tensor([ 0.8877, -0.4233, -0.2962, 0.4025]) tensor([0.4564, 0.1230, 0.1397, 0.2809]) -Greedy action tensor([ 1.0013, -0.0693, 0.0451, -0.0749]) tensor([0.4836, 0.1658, 0.1859, 0.1648]) -Greedy action tensor([ 1.0217, 0.0170, -0.3516, 0.2516]) tensor([0.4802, 0.1758, 0.1216, 0.2223]) -Greedy action tensor([ 1.2606, -0.0579, -0.3088, 0.2701]) tensor([0.5414, 0.1448, 0.1127, 0.2011]) -Greedy action tensor([ 1.2649, -0.1068, -0.6767, 0.3737]) tensor([0.5533, 0.1404, 0.0794, 0.2269]) -Greedy action tensor([ 1.5879, -0.4523, -0.2786, 0.4161]) tensor([0.6272, 0.0815, 0.0970, 0.1943]) -Greedy action tensor([ 1.2933, -0.5266, -0.2051, 0.3551]) tensor([0.5628, 0.0912, 0.1258, 0.2202]) -Greedy action tensor([ 1.1240, -0.2552, -0.4267, 0.2988]) tensor([0.5258, 0.1324, 0.1115, 0.2304]) -Greedy action tensor([ 1.0915, -0.2295, -0.4195, -0.0071]) tensor([0.5492, 0.1466, 0.1212, 0.1831]) -Greedy action tensor([ 1.5307, -0.1942, -0.6220, 0.0581]) tensor([0.6563, 0.1170, 0.0762, 0.1505]) -Greedy action tensor([-1.2082, -1.5523, 1.7439, -1.0612]) tensor([0.0454, 0.0322, 0.8697, 0.0526]) -Greedy action tensor([ 0.5664, 0.0498, -0.1655, 0.2239]) tensor([0.3587, 0.2140, 0.1726, 0.2547]) -Greedy action tensor([ 0.2734, -0.3573, 0.8768, 0.2250]) tensor([0.2318, 0.1234, 0.4239, 0.2209]) -Greedy action tensor([-0.0053, -1.7243, -0.2270, 1.0574]) tensor([0.2051, 0.0368, 0.1643, 0.5937]) -Greedy action tensor([ 0.8027, -1.0762, 0.7127, -0.2395]) tensor([0.4133, 0.0631, 0.3777, 0.1458]) -Greedy action tensor([-0.1680, -1.3353, -0.5328, 0.7874]) tensor([0.2171, 0.0676, 0.1508, 0.5645]) -Greedy action tensor([ 0.8846, -0.4347, 1.5046, 0.3453]) tensor([0.2696, 0.0721, 0.5011, 0.1572]) -Greedy action tensor([-0.2245, 0.0641, -0.2591, 0.2843]) tensor([0.2015, 0.2689, 0.1946, 0.3351]) -Greedy action tensor([-1.2742, -0.9483, -1.2422, 0.1152]) tensor([0.1346, 0.1864, 0.1390, 0.5400]) -Greedy action tensor([-0.8374, -0.7951, 1.0405, -0.2945]) tensor([0.0971, 0.1012, 0.6347, 0.1670]) -Greedy action tensor([-0.7425, -0.5851, -0.2186, -0.1278]) tensor([0.1752, 0.2051, 0.2958, 0.3239]) -Greedy action tensor([0.8784, 0.0654, 0.1230, 1.2205]) tensor([0.3011, 0.1335, 0.1415, 0.4239]) -Greedy action tensor([-0.3910, -0.4393, -0.3184, -0.4795]) tensor([0.2536, 0.2416, 0.2727, 0.2321]) -Greedy action tensor([ 0.8018, -0.4661, 0.4668, -0.0328]) tensor([0.4114, 0.1158, 0.2943, 0.1786]) -Greedy action tensor([0.9170, 1.2549, 0.6180, 0.8308]) tensor([0.2462, 0.3452, 0.1826, 0.2259]) -Greedy action tensor([-1.3108, -1.1290, 1.2759, -1.0466]) tensor([0.0596, 0.0714, 0.7914, 0.0776]) -Greedy action tensor([ 0.2030, -0.1456, 0.4561, 0.3542]) tensor([0.2406, 0.1698, 0.3098, 0.2798]) -Greedy action tensor([ 1.3054, -0.1855, 1.4700, -0.1763]) tensor([0.3800, 0.0856, 0.4480, 0.0864]) -Greedy action tensor([ 0.6969, -1.3732, -0.1796, 0.4205]) tensor([0.4346, 0.0548, 0.1809, 0.3297]) -Greedy action tensor([ 0.5344, -0.2254, -0.1765, -0.7555]) tensor([0.4476, 0.2094, 0.2199, 0.1232]) -Greedy action tensor([ 1.0400, -1.1756, -0.1162, 1.0481]) tensor([0.4112, 0.0449, 0.1294, 0.4145]) -Greedy action tensor([ 0.2784, -2.0690, -0.2601, 1.1505]) tensor([0.2456, 0.0235, 0.1433, 0.5875]) -Greedy action tensor([-0.6676, -1.0509, -0.3596, 0.5795]) tensor([0.1533, 0.1045, 0.2086, 0.5336]) -Greedy action tensor([-0.2091, -0.3841, 0.2905, -0.3169]) tensor([0.2280, 0.1914, 0.3758, 0.2047]) -Greedy action tensor([-0.2908, -1.8297, 1.6071, -0.3531]) tensor([0.1133, 0.0243, 0.7559, 0.1065]) -Greedy action tensor([ 0.1203, -0.5686, -0.2891, -0.3512]) tensor([0.3584, 0.1800, 0.2380, 0.2237]) -Greedy action tensor([-1.3159, 0.9032, 1.6143, -0.9488]) tensor([0.0329, 0.3029, 0.6167, 0.0475]) -Greedy action tensor([ 0.3492, -1.3559, 0.5420, -0.5945]) tensor([0.3592, 0.0653, 0.4356, 0.1398]) -Greedy action tensor([-0.0888, -1.0236, -0.5833, 1.0951]) tensor([0.1898, 0.0745, 0.1157, 0.6200]) -Greedy action tensor([ 0.4539, -1.8938, -0.5747, 0.9306]) tensor([0.3264, 0.0312, 0.1167, 0.5257]) -Greedy action tensor([ 1.5937, 0.4590, -0.3022, 0.4525]) tensor([0.5583, 0.1795, 0.0838, 0.1783]) -Greedy action tensor([1.1915, 0.8048, 0.3792, 0.6898]) tensor([0.3665, 0.2489, 0.1627, 0.2219]) -Greedy action tensor([1.2128, 0.0084, 0.4088, 0.4867]) tensor([0.4482, 0.1344, 0.2006, 0.2168]) -Greedy action tensor([ 1.3533, 0.0448, -0.1138, 0.4971]) tensor([0.5193, 0.1403, 0.1198, 0.2206]) -Greedy action tensor([ 1.0184, -0.2317, 1.3866, 1.3891]) tensor([0.2392, 0.0685, 0.3457, 0.3466]) -Greedy action tensor([-0.0300, 0.3184, 1.1076, -1.0330]) tensor([0.1694, 0.2400, 0.5284, 0.0621]) -Greedy action tensor([0.8169, 0.0899, 0.6804, 0.6464]) tensor([0.3126, 0.1511, 0.2727, 0.2636]) -Greedy action tensor([ 0.7336, -1.2835, 0.3636, 0.7846]) tensor([0.3477, 0.0463, 0.2402, 0.3659]) -Greedy action tensor([ 1.3541, -0.5011, 0.2354, 0.4175]) tensor([0.5333, 0.0834, 0.1742, 0.2090]) -Greedy action tensor([-0.4541, -1.2252, -0.0242, -0.5271]) tensor([0.2545, 0.1177, 0.3912, 0.2366]) -Greedy action tensor([0.8924, 0.3374, 0.0441, 0.6830]) tensor([0.3554, 0.2041, 0.1522, 0.2883]) -Greedy action tensor([ 0.0291, 0.2319, -0.3274, 1.4683]) tensor([0.1400, 0.1715, 0.0980, 0.5905]) -Greedy action tensor([-0.1135, -0.7559, -0.4034, 0.0065]) tensor([0.2939, 0.1546, 0.2200, 0.3314]) -Greedy action tensor([-0.4884, -0.5678, 1.6791, -0.7608]) tensor([0.0876, 0.0809, 0.7649, 0.0667]) -Greedy action tensor([ 1.2056, -0.9796, 1.2727, 0.4193]) tensor([0.3792, 0.0426, 0.4055, 0.1727]) -Greedy action tensor([ 0.1066, 0.7882, 0.6422, -0.6562]) tensor([0.1941, 0.3838, 0.3316, 0.0905]) -Greedy action tensor([ 0.2112, -0.8976, 1.7504, 0.7217]) tensor([0.1306, 0.0431, 0.6087, 0.2176]) -Greedy action tensor([ 0.4169, -1.1480, 0.8571, -0.5170]) tensor([0.3169, 0.0663, 0.4922, 0.1246]) -Greedy action tensor([-0.5647, 0.0780, -0.8766, -0.4519]) tensor([0.2104, 0.4001, 0.1540, 0.2355]) -Greedy action tensor([ 0.8049, -1.0292, 0.8727, 1.4071]) tensor([0.2466, 0.0394, 0.2638, 0.4502]) -Greedy action tensor([ 1.1743, -1.3315, 0.3488, 0.8994]) tensor([0.4387, 0.0358, 0.1922, 0.3333]) -Greedy action tensor([-1.1976, -0.2841, 0.9153, 0.1682]) tensor([0.0638, 0.1590, 0.5274, 0.2499]) -Greedy action tensor([-0.2342, -1.4653, 0.3447, -0.1019]) tensor([0.2371, 0.0692, 0.4230, 0.2707]) -Greedy action tensor([ 1.1545, -1.1203, 0.5027, 1.1514]) tensor([0.3816, 0.0392, 0.1988, 0.3804]) -Greedy action tensor([ 2.0125, -1.3149, -0.0084, 0.8898]) tensor([0.6694, 0.0240, 0.0887, 0.2178]) -Greedy action tensor([1.3504, 0.6712, 0.2638, 0.2766]) tensor([0.4574, 0.2319, 0.1543, 0.1563]) -Greedy action tensor([ 0.3779, -1.3411, 1.0799, 0.3090]) tensor([0.2421, 0.0434, 0.4885, 0.2260]) -Greedy action tensor([0.5390, 0.4149, 1.2898, 1.2592]) tensor([0.1651, 0.1458, 0.3498, 0.3393]) -Greedy action tensor([ 0.6012, -0.7516, 0.3167, 0.4689]) tensor([0.3464, 0.0895, 0.2606, 0.3035]) -Greedy action tensor([0.9120, 0.1223, 0.1044, 0.4416]) tensor([0.3961, 0.1798, 0.1766, 0.2475]) -Greedy action tensor([ 1.2909, -1.7353, -0.2662, 0.7847]) tensor([0.5370, 0.0260, 0.1132, 0.3237]) -Greedy action tensor([-0.8129, -1.0445, -1.0332, 0.1821]) tensor([0.1887, 0.1497, 0.1514, 0.5103]) -Greedy action tensor([ 0.6086, 0.3621, 0.7866, -0.5228]) tensor([0.3031, 0.2369, 0.3622, 0.0978]) -Greedy action tensor([-0.2480, -0.0106, -0.7038, 0.8086]) tensor([0.1731, 0.2194, 0.1097, 0.4978]) -Greedy action tensor([-1.5825, 0.2365, 1.5932, -0.5113]) tensor([0.0294, 0.1812, 0.7036, 0.0858]) -Greedy action tensor([-0.2484, -0.4706, -0.2855, 0.3013]) tensor([0.2224, 0.1781, 0.2143, 0.3853]) -Greedy action tensor([-0.2465, -1.1515, 0.0089, 0.3741]) tensor([0.2195, 0.0888, 0.2834, 0.4083]) -Greedy action tensor([-0.5075, 0.1432, -1.0855, 0.0394]) tensor([0.1921, 0.3682, 0.1078, 0.3319]) -Greedy action tensor([-0.6689, -0.7882, 1.1081, -0.3491]) tensor([0.1090, 0.0967, 0.6443, 0.1500]) -Greedy action tensor([-0.5379, -0.0870, 0.3196, -0.6301]) tensor([0.1713, 0.2688, 0.4037, 0.1562]) -Greedy action tensor([-0.3102, -0.4744, 0.4693, -0.4945]) tensor([0.2057, 0.1746, 0.4486, 0.1711]) -Greedy action tensor([ 1.6529, -1.4357, 0.7625, 1.8895]) tensor([0.3673, 0.0167, 0.1508, 0.4653]) -Greedy action tensor([-0.6432, -1.2481, -0.5270, 1.4553]) tensor([0.0924, 0.0505, 0.1038, 0.7534]) -Greedy action tensor([-0.7740, -0.7646, 1.3983, 0.1687]) tensor([0.0749, 0.0756, 0.6573, 0.1922]) -Greedy action tensor([ 1.2513, 0.2785, -0.0014, 0.5879]) tensor([0.4590, 0.1735, 0.1311, 0.2364]) -Greedy action tensor([ 0.5897, -0.2716, -0.0435, -0.9174]) tensor([0.4598, 0.1943, 0.2441, 0.1019]) -Greedy action tensor([-1.1874, 0.3955, 1.5287, -0.5128]) tensor([0.0436, 0.2121, 0.6588, 0.0855]) -Greedy action tensor([ 0.0329, -0.2680, -0.7113, -0.0427]) tensor([0.3182, 0.2355, 0.1512, 0.2951]) -Greedy action tensor([-0.1748, 0.3471, 0.7378, -0.0075]) tensor([0.1573, 0.2650, 0.3918, 0.1859]) -Greedy action tensor([-0.9727, 0.5127, 1.2784, -1.4148]) tensor([0.0643, 0.2839, 0.6105, 0.0413]) -Greedy action tensor([-0.6189, -0.3571, 0.5781, 0.3267]) tensor([0.1222, 0.1588, 0.4045, 0.3146]) -Greedy action tensor([ 0.9936, -0.0604, 0.8757, -0.4505]) tensor([0.4043, 0.1409, 0.3594, 0.0954]) -Greedy action tensor([-1.9153, -0.4250, 0.6501, -0.1590]) tensor([0.0413, 0.1831, 0.5366, 0.2390]) -Greedy action tensor([-1.7611, -0.2406, 0.5567, -0.0951]) tensor([0.0476, 0.2176, 0.4831, 0.2517]) -Greedy action tensor([-1.9344, -0.4119, 0.6561, -0.1731]) tensor([0.0404, 0.1853, 0.5391, 0.2352]) -Greedy action tensor([-1.7689, -0.2985, 0.5806, -0.0412]) tensor([0.0466, 0.2028, 0.4884, 0.2623]) -Greedy action tensor([ 0.3406, -0.0172, 0.3449, 0.7788]) tensor([0.2351, 0.1644, 0.2361, 0.3644]) -Greedy action tensor([-1.9366, -0.4350, 0.6628, -0.1764]) tensor([0.0404, 0.1813, 0.5435, 0.2348]) -Greedy action tensor([-1.7922, -0.3233, 0.5958, -0.0698]) tensor([0.0458, 0.1990, 0.4988, 0.2564]) -Greedy action tensor([-1.4525, -0.5004, 0.4652, 0.2328]) tensor([0.0633, 0.1641, 0.4310, 0.3416]) -Greedy action tensor([-0.5756, -0.4668, 1.1835, 1.4800]) tensor([0.0636, 0.0709, 0.3691, 0.4965]) -Greedy action tensor([-1.7813, -0.4459, 0.6009, -0.0416]) tensor([0.0469, 0.1783, 0.5078, 0.2671]) -Greedy action tensor([-1.8087, -0.4884, 0.6000, -0.0780]) tensor([0.0465, 0.1741, 0.5170, 0.2624]) -Greedy action tensor([-1.4654, -0.1887, 0.5009, 0.3513]) tensor([0.0559, 0.2005, 0.3996, 0.3440]) -Greedy action tensor([-1.8508, -0.3146, 0.5980, -0.1194]) tensor([0.0437, 0.2032, 0.5061, 0.2470]) -Greedy action tensor([-1.7409, -0.4161, 0.5689, -0.1773]) tensor([0.0510, 0.1918, 0.5136, 0.2435]) -Greedy action tensor([-1.0160, -0.2633, 0.4085, 0.8092]) tensor([0.0742, 0.1574, 0.3082, 0.4602]) -Greedy action tensor([-1.9019, -0.4475, 0.6634, -0.1332]) tensor([0.0414, 0.1773, 0.5385, 0.2428]) -Greedy action tensor([-1.7518, -0.3614, 0.5498, -0.0693]) tensor([0.0491, 0.1970, 0.4901, 0.2639]) -Greedy action tensor([-1.8199, -0.4273, 0.6122, -0.0845]) tensor([0.0453, 0.1823, 0.5155, 0.2569]) -Greedy action tensor([-1.8997, -0.4361, 0.6379, -0.1524]) tensor([0.0422, 0.1823, 0.5335, 0.2421]) -Greedy action tensor([-1.2478, -0.5820, 0.3194, 0.2061]) tensor([0.0832, 0.1619, 0.3988, 0.3561]) -Greedy action tensor([-0.8982, -0.6912, 0.4076, 0.3145]) tensor([0.1077, 0.1325, 0.3976, 0.3622]) -Greedy action tensor([-1.8831, -0.4732, 0.6356, -0.1399]) tensor([0.0431, 0.1764, 0.5345, 0.2461]) -Greedy action tensor([-1.6250, -0.3911, 0.6302, 0.1800]) tensor([0.0499, 0.1713, 0.4756, 0.3032]) -Greedy action tensor([-1.8052, -0.3077, 0.5931, -0.0539]) tensor([0.0450, 0.2010, 0.4949, 0.2591]) -Greedy action tensor([-0.9668, -0.8229, 0.4471, 0.7569]) tensor([0.0842, 0.0973, 0.3464, 0.4721]) -Greedy action tensor([-1.9166, -0.3942, 0.6461, -0.1608]) tensor([0.0411, 0.1883, 0.5329, 0.2378]) -Greedy action tensor([-1.4531, -0.5544, 0.4204, 0.1742]) tensor([0.0664, 0.1631, 0.4324, 0.3380]) -Greedy action tensor([-1.7958, -0.1938, 0.6080, -0.0647]) tensor([0.0441, 0.2189, 0.4880, 0.2490]) -Greedy action tensor([-1.8995, -0.4767, 0.6991, -0.1006]) tensor([0.0406, 0.1684, 0.5457, 0.2453]) -Greedy action tensor([-1.0590, -0.6874, 0.6066, 0.3730]) tensor([0.0838, 0.1216, 0.4435, 0.3511]) -Greedy action tensor([-1.8566, -0.4716, 0.7300, 0.0102]) tensor([0.0404, 0.1614, 0.5368, 0.2613]) -Greedy action tensor([-1.9199, -0.4343, 0.6515, -0.1695]) tensor([0.0412, 0.1821, 0.5394, 0.2373]) -Greedy action tensor([-1.9128, -0.4635, 0.6507, -0.1610]) tensor([0.0417, 0.1774, 0.5408, 0.2401]) -Greedy action tensor([-1.8533, -0.3249, 0.6065, -0.1181]) tensor([0.0435, 0.2006, 0.5092, 0.2467]) -Greedy action tensor([-0.8148, -0.8721, 1.3249, 0.0594]) tensor([0.0779, 0.0736, 0.6618, 0.1867]) -Greedy action tensor([-1.8705, -0.4107, 0.6550, -0.1239]) tensor([0.0425, 0.1829, 0.5309, 0.2437]) -Greedy action tensor([-1.6233, -0.4338, 0.6078, -0.2575]) tensor([0.0571, 0.1876, 0.5316, 0.2237]) -Greedy action tensor([-1.9123, -0.4384, 0.6485, -0.1653]) tensor([0.0416, 0.1815, 0.5383, 0.2386]) -Greedy action tensor([-0.9456, -0.4375, 0.2795, -0.0088]) tensor([0.1160, 0.1929, 0.3950, 0.2961]) -Greedy action tensor([-1.7589, -0.2699, 0.5630, -0.0441]) tensor([0.0472, 0.2093, 0.4813, 0.2623]) -Greedy action tensor([-1.8830, -0.4261, 0.6338, -0.1364]) tensor([0.0427, 0.1833, 0.5291, 0.2449]) -Greedy action tensor([-1.7941, -0.3283, 0.5551, -0.0839]) tensor([0.0469, 0.2030, 0.4910, 0.2592]) -Greedy action tensor([-1.1129, -0.4210, 0.4924, 0.3352]) tensor([0.0818, 0.1633, 0.4071, 0.3479]) -Greedy action tensor([-0.5818, 0.2674, 0.6974, 1.3439]) tensor([0.0725, 0.1695, 0.2606, 0.4974]) -Greedy action tensor([-0.8449, -0.6032, 0.2895, 0.2067]) tensor([0.1213, 0.1544, 0.3771, 0.3471]) -Greedy action tensor([-1.8640, -0.4588, 0.6195, -0.1597]) tensor([0.0443, 0.1807, 0.5312, 0.2437]) -Greedy action tensor([-1.3114, 0.3440, 0.3521, 0.5985]) tensor([0.0547, 0.2866, 0.2889, 0.3697]) -Greedy action tensor([-1.7541, -0.5300, 0.8518, 0.2769]) tensor([0.0391, 0.1330, 0.5298, 0.2981]) -Greedy action tensor([-1.9074, -0.4198, 0.6559, -0.1409]) tensor([0.0412, 0.1825, 0.5351, 0.2412]) -Greedy action tensor([-1.8117, -0.3809, 0.6567, -0.2955]) tensor([0.0464, 0.1942, 0.5480, 0.2114]) -Greedy action tensor([-1.3088, -0.7270, 0.6550, 0.2400]) tensor([0.0684, 0.1224, 0.4874, 0.3219]) -Greedy action tensor([-1.8434, -0.4453, 0.6349, -0.0626]) tensor([0.0437, 0.1767, 0.5205, 0.2591]) -Greedy action tensor([-1.0730, -0.4466, -0.7343, -0.6517]) tensor([0.1725, 0.3227, 0.2420, 0.2629]) -Greedy action tensor([-1.5994, -0.5816, 0.5430, 0.0269]) tensor([0.0576, 0.1593, 0.4904, 0.2927]) -Greedy action tensor([-1.2098, 0.6470, 0.2908, -0.0966]) tensor([0.0670, 0.4288, 0.3003, 0.2039]) -Greedy action tensor([-1.2117, -0.5927, 0.3060, 0.1292]) tensor([0.0890, 0.1652, 0.4058, 0.3400]) -Greedy action tensor([-1.8824, -0.4249, 0.6301, -0.1476]) tensor([0.0429, 0.1844, 0.5294, 0.2433]) -Greedy action tensor([-1.2454, -0.9728, 1.0723, -0.0014]) tensor([0.0628, 0.0824, 0.6371, 0.2177]) -Greedy action tensor([-1.6183, 0.0049, 0.4723, -0.1088]) tensor([0.0535, 0.2713, 0.4330, 0.2422]) -Greedy action tensor([-1.6897, -0.4503, 0.5513, -0.0233]) tensor([0.0522, 0.1803, 0.4910, 0.2764]) -Greedy action tensor([-1.7184, -0.3176, 0.5209, -0.0474]) tensor([0.0506, 0.2054, 0.4750, 0.2691]) -Greedy action tensor([-1.4405, -0.4124, 0.4781, 0.2133]) tensor([0.0632, 0.1766, 0.4302, 0.3301]) -Greedy action tensor([-1.9008, -0.4653, 0.6430, -0.1513]) tensor([0.0422, 0.1774, 0.5375, 0.2429]) -Greedy action tensor([-1.9281, -0.4341, 0.6592, -0.1651]) tensor([0.0407, 0.1812, 0.5409, 0.2372]) -Greedy action tensor([-1.8602, -0.4075, 0.6176, -0.1206]) tensor([0.0437, 0.1868, 0.5207, 0.2489]) -Greedy action tensor([-1.8755, -0.4566, 0.6382, -0.1453]) tensor([0.0432, 0.1787, 0.5341, 0.2440]) -Greedy action tensor([-1.8299, -0.4717, 0.6201, -0.0979]) tensor([0.0452, 0.1757, 0.5237, 0.2554]) -Greedy action tensor([-1.9121, -0.4580, 0.6672, -0.1357]) tensor([0.0410, 0.1756, 0.5410, 0.2424]) -Greedy action tensor([-1.6931, 0.0731, 0.6649, -0.5664]) tensor([0.0488, 0.2853, 0.5155, 0.1505]) -Greedy action tensor([-1.3953, -0.6294, 0.7805, 0.6924]) tensor([0.0499, 0.1074, 0.4399, 0.4028]) -Greedy action tensor([-1.8560, -0.4624, 0.5999, -0.1382]) tensor([0.0449, 0.1810, 0.5237, 0.2504]) -Greedy action tensor([-1.8277, -0.5607, 1.2493, 0.7966]) tensor([0.0250, 0.0887, 0.5418, 0.3446]) -Greedy action tensor([-1.4111, -0.5478, 0.3842, 0.1801]) tensor([0.0699, 0.1658, 0.4210, 0.3433]) -Greedy action tensor([-1.7634, -0.4719, 0.6723, 0.0144]) tensor([0.0455, 0.1655, 0.5198, 0.2692]) -Greedy action tensor([-1.8822, -0.4379, 0.6568, -0.1268]) tensor([0.0422, 0.1789, 0.5347, 0.2442]) -Greedy action tensor([-1.6076, -0.5107, 0.4983, 0.0708]) tensor([0.0569, 0.1705, 0.4676, 0.3050]) -Greedy action tensor([-1.9055, -0.4658, 0.6459, -0.1583]) tensor([0.0420, 0.1774, 0.5393, 0.2413]) -Greedy action tensor([-1.6243, 0.0896, 0.4659, 0.2265]) tensor([0.0476, 0.2643, 0.3850, 0.3031]) -Greedy action tensor([-1.7826, -0.4696, 0.5792, -0.0813]) tensor([0.0481, 0.1786, 0.5099, 0.2634]) -Greedy action tensor([-1.5762, -0.3508, 0.9924, 0.6255]) tensor([0.0377, 0.1285, 0.4925, 0.3412]) -Greedy action tensor([-1.8698, -0.4672, 0.6304, -0.1256]) tensor([0.0435, 0.1770, 0.5304, 0.2490]) -Greedy action tensor([ 0.5132, -0.0157, -0.2637, -0.1083]) tensor([0.3867, 0.2278, 0.1778, 0.2077]) -Greedy action tensor([ 0.7336, -0.3914, 0.0491, -0.4668]) tensor([0.4695, 0.1524, 0.2368, 0.1413]) -Greedy action tensor([ 0.3681, 0.0267, 0.0755, -0.2994]) tensor([0.3367, 0.2393, 0.2513, 0.1727]) -Greedy action tensor([ 0.6907, -0.6136, -0.0634, -0.7450]) tensor([0.5051, 0.1371, 0.2376, 0.1202]) -Greedy action tensor([ 0.6819, -0.1748, 0.0094, -0.2115]) tensor([0.4266, 0.1811, 0.2177, 0.1746]) -Greedy action tensor([ 0.4380, -0.2121, -0.0825, -0.2755]) tensor([0.3837, 0.2003, 0.2280, 0.1880]) -Greedy action tensor([ 0.4585, -0.0408, -0.1581, -0.0690]) tensor([0.3654, 0.2218, 0.1972, 0.2156]) -Greedy action tensor([ 1.1441, -0.7273, 0.0244, -0.5300]) tensor([0.5996, 0.0923, 0.1957, 0.1124]) -Greedy action tensor([ 0.7519, -0.3005, -0.0935, -0.2830]) tensor([0.4687, 0.1636, 0.2012, 0.1665]) -Greedy action tensor([ 0.4145, -0.3050, 0.2005, -0.4576]) tensor([0.3687, 0.1795, 0.2977, 0.1541]) -Greedy action tensor([ 0.8884, -1.0127, 0.0225, -0.3705]) tensor([0.5394, 0.0806, 0.2269, 0.1532]) -Greedy action tensor([ 0.9144, -0.8004, -0.0464, -0.4393]) tensor([0.5492, 0.0988, 0.2101, 0.1418]) -Greedy action tensor([ 0.4612, 0.0683, -0.0497, -0.2416]) tensor([0.3610, 0.2437, 0.2166, 0.1787]) -Greedy action tensor([ 0.9205, -0.6054, -0.0908, -0.3351]) tensor([0.5359, 0.1165, 0.1949, 0.1527]) -Greedy action tensor([ 0.7724, -0.1283, -0.0571, -0.5556]) tensor([0.4745, 0.1928, 0.2070, 0.1257]) -Greedy action tensor([ 0.3963, -0.1007, -0.1162, -0.0912]) tensor([0.3544, 0.2156, 0.2123, 0.2177]) -Greedy action tensor([ 0.8835, -0.3470, 0.0050, -0.2358]) tensor([0.4916, 0.1436, 0.2042, 0.1605]) -Greedy action tensor([ 0.7237, -0.3242, 0.0287, -0.2072]) tensor([0.4456, 0.1563, 0.2224, 0.1757]) -Greedy action tensor([ 0.3919, -0.1739, 0.0383, -0.1012]) tensor([0.3471, 0.1971, 0.2437, 0.2120]) -Greedy action tensor([ 0.8214, -0.7408, -0.0378, -0.4965]) tensor([0.5261, 0.1103, 0.2228, 0.1408]) -Greedy action tensor([ 0.2900, -0.0844, -0.1054, -0.0657]) tensor([0.3266, 0.2246, 0.2199, 0.2288]) -Greedy action tensor([ 0.7813, -0.4726, -0.0824, -0.4011]) tensor([0.4966, 0.1417, 0.2094, 0.1522]) -Greedy action tensor([ 0.2010, -0.1708, -0.0760, -0.1053]) tensor([0.3141, 0.2166, 0.2381, 0.2312]) -Greedy action tensor([ 0.9566, -0.6697, -0.0697, -0.3872]) tensor([0.5507, 0.1083, 0.1973, 0.1437]) -Greedy action tensor([ 0.4290, -0.1377, 0.0115, -0.1915]) tensor([0.3618, 0.2053, 0.2383, 0.1945]) -Greedy action tensor([ 0.2931, 0.0912, -0.2053, 0.0325]) tensor([0.3130, 0.2558, 0.1901, 0.2412]) -Greedy action tensor([ 0.5458, -0.7011, -0.2114, -0.4736]) tensor([0.4723, 0.1357, 0.2215, 0.1704]) -Greedy action tensor([ 0.8864, -0.4863, -0.0767, -0.4367]) tensor([0.5259, 0.1333, 0.2008, 0.1401]) -Greedy action tensor([ 0.5174, -0.0308, -0.1065, -0.2909]) tensor([0.3907, 0.2258, 0.2094, 0.1741]) -Greedy action tensor([ 0.5219, 0.0711, -0.1577, -0.1185]) tensor([0.3744, 0.2385, 0.1897, 0.1973]) -Greedy action tensor([ 0.7406, -0.3205, -0.0790, -0.2094]) tensor([0.4601, 0.1592, 0.2027, 0.1779]) -Greedy action tensor([ 0.5935, -0.0765, 0.0214, -0.1200]) tensor([0.3897, 0.1994, 0.2199, 0.1909]) -Greedy action tensor([ 0.5356, -0.5397, -0.0431, -0.2732]) tensor([0.4260, 0.1454, 0.2388, 0.1898]) -Greedy action tensor([ 0.7347, -0.5711, -0.0764, -0.4170]) tensor([0.4923, 0.1334, 0.2187, 0.1556]) -Greedy action tensor([ 0.0772, -0.0194, -0.0138, -0.1584]) tensor([0.2769, 0.2514, 0.2528, 0.2188]) -Greedy action tensor([ 0.8953, -0.8345, 0.1288, -0.5385]) tensor([0.5318, 0.0943, 0.2471, 0.1268]) -Greedy action tensor([ 0.3798, -0.3114, -0.0634, -0.2639]) tensor([0.3748, 0.1878, 0.2406, 0.1969]) -Greedy action tensor([ 0.8226, -0.4550, -0.0109, -0.5204]) tensor([0.5065, 0.1412, 0.2201, 0.1322]) -Greedy action tensor([ 0.7657, 0.0738, -0.3452, -0.2264]) tensor([0.4544, 0.2275, 0.1496, 0.1685]) -Greedy action tensor([ 0.4323, -0.4489, 0.2185, -0.4188]) tensor([0.3775, 0.1564, 0.3049, 0.1612]) -Greedy action tensor([ 0.4305, 0.0386, -0.1285, -0.3823]) tensor([0.3716, 0.2511, 0.2125, 0.1648]) -Greedy action tensor([ 0.8151, -0.0952, -0.1243, -0.0936]) tensor([0.4553, 0.1832, 0.1780, 0.1835]) -Greedy action tensor([ 0.3400, -0.0450, 0.0091, -0.0752]) tensor([0.3269, 0.2224, 0.2348, 0.2158]) -Greedy action tensor([ 1.1151, -0.3986, -0.0086, -0.4803]) tensor([0.5721, 0.1259, 0.1860, 0.1160]) -Greedy action tensor([ 0.7035, -0.3564, -0.0535, -0.2812]) tensor([0.4568, 0.1583, 0.2143, 0.1706]) -Greedy action tensor([ 0.8126, -0.3711, -0.0663, -0.2202]) tensor([0.4814, 0.1474, 0.1999, 0.1714]) -Greedy action tensor([ 0.4557, -0.1451, -0.0691, -0.0641]) tensor([0.3657, 0.2005, 0.2164, 0.2174]) -Greedy action tensor([ 0.4619, -0.0073, -0.0457, -0.2347]) tensor([0.3669, 0.2295, 0.2208, 0.1828]) -Greedy action tensor([ 0.6293, -0.1935, 0.0501, -0.2069]) tensor([0.4110, 0.1805, 0.2303, 0.1781]) -Greedy action tensor([ 0.5747, 0.1259, -0.1267, -0.3555]) tensor([0.3955, 0.2525, 0.1961, 0.1560]) -Greedy action tensor([ 0.6417, -0.2331, -0.0857, -0.1847]) tensor([0.4278, 0.1784, 0.2067, 0.1872]) -Greedy action tensor([ 0.7727, -0.4754, -0.0089, -0.5306]) tensor([0.4959, 0.1424, 0.2270, 0.1347]) -Greedy action tensor([ 1.0521, -0.9392, 0.0838, -0.4322]) tensor([0.5737, 0.0783, 0.2179, 0.1301]) -Greedy action tensor([ 0.7081, -0.2216, 0.0750, -0.5319]) tensor([0.4515, 0.1782, 0.2397, 0.1307]) -Greedy action tensor([ 0.7689, -0.8280, -0.0033, -0.6930]) tensor([0.5273, 0.1068, 0.2436, 0.1222]) -Greedy action tensor([ 0.5843, 0.4116, -0.1352, -0.2427]) tensor([0.3616, 0.3042, 0.1761, 0.1581]) -Greedy action tensor([ 1.0387, -1.2815, 0.1460, -0.6583]) tensor([0.5914, 0.0581, 0.2422, 0.1084]) -Greedy action tensor([ 0.8481, -0.7714, -0.1454, -0.3357]) tensor([0.5335, 0.1056, 0.1975, 0.1633]) -Greedy action tensor([ 0.8609, -0.4394, -0.0983, -0.3440]) tensor([0.5114, 0.1393, 0.1960, 0.1533]) -Greedy action tensor([ 0.6380, -0.2660, -0.0688, -0.2412]) tensor([0.4323, 0.1751, 0.2132, 0.1794]) -Greedy action tensor([ 0.6671, -0.4275, -0.1588, -0.4545]) tensor([0.4766, 0.1595, 0.2087, 0.1553]) -Greedy action tensor([ 0.6964, -0.8007, -0.2385, -0.3122]) tensor([0.5048, 0.1130, 0.1982, 0.1841]) -Greedy action tensor([ 0.9787, -0.0824, -0.0194, -0.4817]) tensor([0.5137, 0.1778, 0.1893, 0.1192]) -Greedy action tensor([ 0.4349, -0.2530, -0.1669, -0.3646]) tensor([0.4000, 0.2011, 0.2191, 0.1798]) -Greedy action tensor([ 0.4164, -0.3997, -0.0360, -0.2055]) tensor([0.3824, 0.1691, 0.2432, 0.2053]) -Greedy action tensor([ 0.9555, -0.3165, -0.0725, -0.1179]) tensor([0.5051, 0.1416, 0.1807, 0.1727]) -Greedy action tensor([ 0.5794, 0.2893, -0.2288, 0.2793]) tensor([0.3408, 0.2550, 0.1519, 0.2524]) -Greedy action tensor([ 0.8226, -0.7557, -0.0900, -0.4724]) tensor([0.5314, 0.1096, 0.2134, 0.1456]) -Greedy action tensor([ 1.0070, -0.5282, -0.1040, -0.5472]) tensor([0.5695, 0.1227, 0.1875, 0.1204]) -Greedy action tensor([ 0.1680, 0.0013, -0.0153, -0.1746]) tensor([0.2951, 0.2498, 0.2457, 0.2095]) -Greedy action tensor([ 0.8736, -0.3881, -0.0731, -0.2935]) tensor([0.5044, 0.1428, 0.1957, 0.1570]) -Greedy action tensor([ 0.6893, -0.0093, -0.0059, -0.4020]) tensor([0.4288, 0.2133, 0.2140, 0.1440]) -Greedy action tensor([ 0.7567, -0.6292, -0.0273, -0.6377]) tensor([0.5116, 0.1279, 0.2336, 0.1269]) -Greedy action tensor([ 0.1626, 0.2293, -0.1241, 0.0012]) tensor([0.2724, 0.2912, 0.2045, 0.2318]) -Greedy action tensor([ 0.3983, 0.1543, -0.0632, -0.2177]) tensor([0.3385, 0.2652, 0.2134, 0.1828]) -Greedy action tensor([ 0.7832, -0.3879, -0.0494, -0.3442]) tensor([0.4834, 0.1499, 0.2102, 0.1566]) -Greedy action tensor([ 0.2312, 0.0861, -0.0112, -0.3637]) tensor([0.3124, 0.2702, 0.2451, 0.1723]) -Greedy action tensor([ 0.6780, -0.4828, -0.1085, -0.4532]) tensor([0.4782, 0.1498, 0.2178, 0.1543]) -Greedy action tensor([ 0.4099, -0.1219, -0.1384, -0.1501]) tensor([0.3654, 0.2147, 0.2112, 0.2087]) -Greedy action tensor([ 0.2750, 0.2659, -0.0582, -0.4147]) tensor([0.3116, 0.3088, 0.2233, 0.1563]) -Greedy action tensor([ 0.4849, -0.2469, 0.0193, -0.3678]) tensor([0.3945, 0.1898, 0.2476, 0.1682]) -Greedy action tensor([ 1.6654, -0.0374, -0.1731, 0.3769]) tensor([0.6185, 0.1127, 0.0984, 0.1705]) -Greedy action tensor([ 2.5177, -1.0599, -0.2213, 0.8761]) tensor([0.7775, 0.0217, 0.0503, 0.1506]) -Greedy action tensor([ 1.0919, -0.1050, -0.3603, -0.3597]) tensor([0.5649, 0.1707, 0.1322, 0.1323]) -Greedy action tensor([ 1.0839, -0.3026, -0.1422, 0.1798]) tensor([0.5133, 0.1283, 0.1506, 0.2078]) -Greedy action tensor([ 0.7846, -0.4240, -0.5380, -0.4465]) tensor([0.5385, 0.1608, 0.1435, 0.1572]) -Greedy action tensor([ 1.5120, -0.6579, -0.2983, 0.6547]) tensor([0.5875, 0.0671, 0.0961, 0.2493]) -Greedy action tensor([ 1.0874, -0.6455, -0.0116, 0.1988]) tensor([0.5205, 0.0920, 0.1734, 0.2140]) -Greedy action tensor([ 1.2628, 0.4303, 0.1495, -0.0169]) tensor([0.4898, 0.2131, 0.1609, 0.1362]) -Greedy action tensor([ 1.8587, -0.9297, -0.3106, 0.0591]) tensor([0.7456, 0.0459, 0.0852, 0.1233]) -Greedy action tensor([ 1.6484, -0.6884, -0.3234, 0.2909]) tensor([0.6697, 0.0647, 0.0932, 0.1723]) -Greedy action tensor([1.3979, 0.1265, 0.3821, 0.2546]) tensor([0.5099, 0.1430, 0.1846, 0.1625]) -Greedy action tensor([ 1.0924, -0.3241, -0.1788, -0.0214]) tensor([0.5401, 0.1310, 0.1515, 0.1773]) -Greedy action tensor([ 1.8176, -0.9098, 0.0281, 0.1497]) tensor([0.7037, 0.0460, 0.1175, 0.1327]) -Greedy action tensor([ 1.4874, -0.4003, -0.0582, -0.0646]) tensor([0.6343, 0.0961, 0.1352, 0.1344]) -Greedy action tensor([ 1.8941, -0.7568, -0.3394, 0.1743]) tensor([0.7370, 0.0520, 0.0790, 0.1320]) -Greedy action tensor([ 0.8491, -0.1608, -0.1594, 0.1139]) tensor([0.4528, 0.1649, 0.1652, 0.2171]) -Greedy action tensor([ 1.9508, -0.5809, -0.3270, 0.1193]) tensor([0.7451, 0.0592, 0.0764, 0.1193]) -Greedy action tensor([ 1.6101, -0.5452, -0.0491, 0.2788]) tensor([0.6368, 0.0738, 0.1212, 0.1682]) -Greedy action tensor([ 1.8570, -0.6862, -0.0386, 0.3657]) tensor([0.6878, 0.0541, 0.1033, 0.1548]) -Greedy action tensor([ 1.3639, -0.0438, -0.1764, 0.4701]) tensor([0.5353, 0.1310, 0.1147, 0.2190]) -Greedy action tensor([ 1.4381, -0.7679, -0.2747, 0.5784]) tensor([0.5835, 0.0643, 0.1052, 0.2470]) -Greedy action tensor([ 1.2611, -0.4987, -0.1243, 0.0609]) tensor([0.5802, 0.0998, 0.1452, 0.1747]) -Greedy action tensor([ 0.7181, -0.2805, -0.1870, 0.2317]) tensor([0.4188, 0.1543, 0.1694, 0.2575]) -Greedy action tensor([ 1.1520, -0.4764, -0.3804, 0.0992]) tensor([0.5678, 0.1114, 0.1227, 0.1981]) -Greedy action tensor([ 0.8688, -0.4324, -0.1741, 0.3011]) tensor([0.4563, 0.1242, 0.1608, 0.2587]) -Greedy action tensor([ 0.8778, -0.0591, 0.2476, 0.1753]) tensor([0.4133, 0.1619, 0.2201, 0.2047]) -Greedy action tensor([ 0.9387, -0.1576, -0.3382, -0.1304]) tensor([0.5112, 0.1708, 0.1426, 0.1755]) -Greedy action tensor([ 1.1992, -0.6670, -0.4293, 0.2996]) tensor([0.5689, 0.0880, 0.1116, 0.2314]) -Greedy action tensor([ 2.0701, -0.2412, -0.5617, 0.4520]) tensor([0.7303, 0.0724, 0.0525, 0.1448]) -Greedy action tensor([ 1.2746, -0.1707, -0.4479, 0.3222]) tensor([0.5555, 0.1309, 0.0992, 0.2143]) -Greedy action tensor([ 1.6198, -0.5177, -0.4222, 0.1414]) tensor([0.6776, 0.0799, 0.0879, 0.1545]) -Greedy action tensor([ 1.0213, -0.3588, -0.2658, 0.4563]) tensor([0.4771, 0.1200, 0.1317, 0.2712]) -Greedy action tensor([ 1.7810, -0.7012, -0.5139, 0.6323]) tensor([0.6661, 0.0557, 0.0671, 0.2112]) -Greedy action tensor([ 1.1731, 0.1461, -0.3823, 0.4273]) tensor([0.4893, 0.1752, 0.1033, 0.2321]) -Greedy action tensor([ 1.1376, -0.4304, -0.1847, 0.2868]) tensor([0.5258, 0.1096, 0.1401, 0.2245]) -Greedy action tensor([ 0.7946, -0.3287, 0.0782, 0.0699]) tensor([0.4351, 0.1415, 0.2126, 0.2108]) -Greedy action tensor([ 0.6118, -0.0125, -0.0093, 0.0210]) tensor([0.3807, 0.2039, 0.2046, 0.2109]) -Greedy action tensor([ 1.3303, -0.2603, -0.2109, -0.0110]) tensor([0.5955, 0.1213, 0.1275, 0.1557]) -Greedy action tensor([ 1.0836, 0.1545, -0.1460, 0.1161]) tensor([0.4837, 0.1910, 0.1414, 0.1838]) -Greedy action tensor([ 1.4610, -0.5275, -0.4183, 0.4296]) tensor([0.6075, 0.0832, 0.0928, 0.2166]) -Greedy action tensor([ 1.9398, -0.2684, -0.4525, 0.2404]) tensor([0.7225, 0.0794, 0.0660, 0.1321]) -Greedy action tensor([ 1.7242, -0.3202, -0.7923, 0.3253]) tensor([0.6863, 0.0889, 0.0554, 0.1694]) -Greedy action tensor([ 1.0852, -0.2163, -0.4221, 0.3278]) tensor([0.5095, 0.1387, 0.1129, 0.2389]) -Greedy action tensor([ 1.5329, -0.0443, -0.3716, -0.0472]) tensor([0.6404, 0.1323, 0.0954, 0.1319]) -Greedy action tensor([ 1.7491, 0.1359, -0.1970, 0.3009]) tensor([0.6341, 0.1263, 0.0906, 0.1490]) -Greedy action tensor([ 0.5839, -0.1227, -0.1976, 0.1267]) tensor([0.3870, 0.1909, 0.1771, 0.2450]) -Greedy action tensor([ 0.6643, -0.0272, -0.0951, 0.0803]) tensor([0.3958, 0.1982, 0.1852, 0.2207]) -Greedy action tensor([ 1.3286, -0.2204, -0.1790, 0.6425]) tensor([0.5161, 0.1097, 0.1143, 0.2599]) -Greedy action tensor([ 1.6314, -0.8660, -0.3957, 0.4962]) tensor([0.6513, 0.0536, 0.0858, 0.2093]) -Greedy action tensor([ 1.2560, -0.1400, -0.6457, 0.4397]) tensor([0.5438, 0.1346, 0.0812, 0.2404]) -Greedy action tensor([ 2.5195, -0.3466, 0.2856, 0.6418]) tensor([0.7593, 0.0432, 0.0813, 0.1161]) -Greedy action tensor([ 0.9326, -0.3657, -0.1800, 0.1437]) tensor([0.4864, 0.1328, 0.1599, 0.2210]) -Greedy action tensor([ 0.4794, -0.1332, -0.1514, 0.2376]) tensor([0.3497, 0.1895, 0.1861, 0.2746]) -Greedy action tensor([ 1.8991, -0.5127, -0.5749, 0.0525]) tensor([0.7509, 0.0673, 0.0633, 0.1185]) -Greedy action tensor([ 1.5305, -0.4590, -0.6342, 0.5105]) tensor([0.6203, 0.0848, 0.0712, 0.2237]) -Greedy action tensor([ 1.1297, -0.6997, -0.4652, 0.6632]) tensor([0.5023, 0.0806, 0.1019, 0.3151]) -Greedy action tensor([ 2.4890, -0.6353, -0.4937, 0.4235]) tensor([0.8188, 0.0360, 0.0415, 0.1038]) -Greedy action tensor([ 1.6959, -0.3089, -0.3130, 0.1603]) tensor([0.6738, 0.0907, 0.0904, 0.1451]) -Greedy action tensor([ 1.1079, -0.5128, 0.0265, 0.0302]) tensor([0.5327, 0.1053, 0.1807, 0.1813]) -Greedy action tensor([ 1.3976, -0.2778, -0.4557, 0.2346]) tensor([0.6037, 0.1130, 0.0946, 0.1887]) -Greedy action tensor([ 1.4892, -0.2765, -0.4563, 0.4080]) tensor([0.6049, 0.1035, 0.0864, 0.2052]) -Greedy action tensor([ 1.0657, -0.3209, -0.0862, 0.1351]) tensor([0.5101, 0.1275, 0.1612, 0.2012]) -Greedy action tensor([ 1.4458, -0.3884, -0.0033, 0.1897]) tensor([0.5955, 0.0951, 0.1398, 0.1696]) -Greedy action tensor([ 0.8272, -0.1950, -0.3356, -0.0613]) tensor([0.4799, 0.1727, 0.1500, 0.1974]) -Greedy action tensor([ 2.0317, -0.7260, -0.6820, 0.3604]) tensor([0.7589, 0.0481, 0.0503, 0.1427]) -Greedy action tensor([ 1.5221, -0.2466, -0.6312, 0.1393]) tensor([0.6504, 0.1109, 0.0755, 0.1632]) -Greedy action tensor([ 1.4338, -0.2580, -0.7111, 0.1899]) tensor([0.6291, 0.1159, 0.0737, 0.1813]) -Greedy action tensor([ 1.4941, -0.6376, -0.3807, 0.1346]) tensor([0.6541, 0.0776, 0.1003, 0.1680]) -Greedy action tensor([ 0.9069, -0.1587, -0.0673, -0.0468]) tensor([0.4745, 0.1635, 0.1791, 0.1828]) -Greedy action tensor([ 1.4256, 0.4197, 0.2112, -0.3569]) tensor([0.5462, 0.1998, 0.1622, 0.0919]) -Greedy action tensor([ 1.4062, -0.6816, -0.2152, 0.5608]) tensor([0.5711, 0.0708, 0.1129, 0.2452]) -Greedy action tensor([ 0.4065, -0.1736, 0.1553, 0.1369]) tensor([0.3225, 0.1805, 0.2508, 0.2462]) -Greedy action tensor([ 1.2343, -0.2921, -0.1221, 0.2731]) tensor([0.5384, 0.1170, 0.1387, 0.2059]) -Greedy action tensor([ 1.5299, -0.6842, 0.0662, 0.3276]) tensor([0.6093, 0.0666, 0.1410, 0.1831]) -Greedy action tensor([ 1.3031, -0.3186, -0.5159, 0.4157]) tensor([0.5645, 0.1115, 0.0916, 0.2324]) -Greedy action tensor([ 1.1446, -0.4156, -0.3308, 0.2646]) tensor([0.5395, 0.1133, 0.1234, 0.2238]) -Greedy action tensor([ 0.4976, -0.3226, -0.0871, -0.0383]) tensor([0.3872, 0.1705, 0.2158, 0.2266]) -Greedy action tensor([ 1.6239, -0.3977, -0.2902, 0.3509]) tensor([0.6411, 0.0849, 0.0945, 0.1795]) -Greedy action tensor([ 1.1978, -0.3188, -0.2489, 0.2119]) tensor([0.5471, 0.1201, 0.1288, 0.2041]) -Greedy action tensor([ 2.0391, -0.5006, -0.4169, 0.4777]) tensor([0.7275, 0.0574, 0.0624, 0.1527]) -Greedy action tensor([ 1.7947, -0.6490, -0.1742, 0.3010]) tensor([0.6892, 0.0599, 0.0962, 0.1548]) -Greedy action tensor([ 0.5107, -0.8544, -0.3317, 0.9454]) tensor([0.3095, 0.0790, 0.1333, 0.4781]) -Greedy action tensor([-1.1736, -1.5651, -0.1751, -1.4886]) tensor([0.1953, 0.1320, 0.5301, 0.1425]) -Greedy action tensor([-0.6093, 0.2079, 1.4781, -0.8861]) tensor([0.0827, 0.1873, 0.6672, 0.0627]) -Greedy action tensor([-1.0669, -0.0893, 1.3869, -0.6205]) tensor([0.0593, 0.1577, 0.6902, 0.0927]) -Greedy action tensor([ 1.0549, 0.3237, 1.5457, -0.4925]) tensor([0.3005, 0.1446, 0.4909, 0.0640]) -Greedy action tensor([2.0825, 0.0905, 1.0481, 1.0801]) tensor([0.5379, 0.0734, 0.1912, 0.1974]) -Greedy action tensor([ 0.5103, -0.0402, -0.1181, 0.6663]) tensor([0.3050, 0.1759, 0.1627, 0.3565]) -Greedy action tensor([-0.3125, -2.0903, -0.7948, 0.9828]) tensor([0.1839, 0.0311, 0.1135, 0.6715]) -Greedy action tensor([-0.6218, -1.9533, -0.6121, 1.0367]) tensor([0.1329, 0.0351, 0.1342, 0.6978]) -Greedy action tensor([1.0576, 0.4662, 0.8366, 0.2128]) tensor([0.3591, 0.1988, 0.2879, 0.1543]) -Greedy action tensor([-1.0606, -0.5875, 1.1432, 0.1899]) tensor([0.0660, 0.1059, 0.5977, 0.2304]) -Greedy action tensor([ 0.2856, -0.6635, -0.5375, 0.5202]) tensor([0.3236, 0.1252, 0.1421, 0.4091]) -Greedy action tensor([-0.0570, -0.0133, -0.4656, -0.0729]) tensor([0.2708, 0.2828, 0.1799, 0.2665]) -Greedy action tensor([ 0.5860, -1.4918, 0.6781, 1.1018]) tensor([0.2566, 0.0321, 0.2814, 0.4299]) -Greedy action tensor([ 1.4319, -0.7959, 1.0055, 0.6467]) tensor([0.4511, 0.0486, 0.2945, 0.2057]) -Greedy action tensor([-0.1856, 0.0059, 0.1930, -0.2304]) tensor([0.2161, 0.2617, 0.3155, 0.2066]) -Greedy action tensor([ 0.1065, 0.4035, 0.3157, -0.5198]) tensor([0.2431, 0.3272, 0.2997, 0.1300]) -Greedy action tensor([-0.0218, -1.4094, -0.5721, 0.2840]) tensor([0.3141, 0.0784, 0.1811, 0.4264]) -Greedy action tensor([ 0.3114, 0.7749, -0.0303, 0.5589]) tensor([0.2183, 0.3470, 0.1551, 0.2796]) -Greedy action tensor([ 1.0181, -0.8255, -0.4750, 0.7279]) tensor([0.4693, 0.0743, 0.1054, 0.3511]) -Greedy action tensor([-0.9684, -0.3970, 0.3971, 0.6819]) tensor([0.0841, 0.1488, 0.3293, 0.4378]) -Greedy action tensor([ 0.4541, 0.0224, -0.5546, 0.8362]) tensor([0.2874, 0.1866, 0.1048, 0.4211]) -Greedy action tensor([ 1.3666, 0.8099, 0.3765, -0.3991]) tensor([0.4727, 0.2709, 0.1756, 0.0809]) -Greedy action tensor([-0.9255, -1.6069, 0.2472, -0.2169]) tensor([0.1478, 0.0748, 0.4774, 0.3001]) -Greedy action tensor([-0.7301, -0.2632, -0.9019, -0.0914]) tensor([0.1876, 0.2992, 0.1580, 0.3553]) -Greedy action tensor([ 1.2695, -0.4054, -0.8315, 1.2750]) tensor([0.4319, 0.0809, 0.0528, 0.4343]) -Greedy action tensor([ 1.5491, -0.1049, -0.3662, 0.8428]) tensor([0.5458, 0.1044, 0.0804, 0.2694]) -Greedy action tensor([-0.2970, -2.4016, 0.2449, 0.4805]) tensor([0.1993, 0.0243, 0.3427, 0.4337]) -Greedy action tensor([ 1.5596, -0.6618, 1.3139, 0.2370]) tensor([0.4636, 0.0503, 0.3626, 0.1235]) -Greedy action tensor([ 0.2178, -0.9374, 1.6833, 0.3206]) tensor([0.1481, 0.0466, 0.6411, 0.1641]) -Greedy action tensor([-0.2070, -0.1159, -1.4027, 0.8782]) tensor([0.1866, 0.2044, 0.0565, 0.5525]) -Greedy action tensor([ 0.6499, 0.2124, -0.5572, 0.6266]) tensor([0.3423, 0.2210, 0.1024, 0.3344]) -Greedy action tensor([ 0.6766, -0.9932, 0.4461, 0.5320]) tensor([0.3511, 0.0661, 0.2789, 0.3039]) -Greedy action tensor([ 0.5306, -1.4244, 0.3868, 0.2873]) tensor([0.3582, 0.0507, 0.3102, 0.2809]) -Greedy action tensor([ 1.6786, -0.8476, 0.4119, 1.0199]) tensor([0.5321, 0.0426, 0.1499, 0.2754]) -Greedy action tensor([-0.2806, -2.5441, 0.2609, 0.3866]) tensor([0.2096, 0.0218, 0.3602, 0.4084]) -Greedy action tensor([ 0.1564, 0.3776, 0.9902, -0.1251]) tensor([0.1885, 0.2352, 0.4340, 0.1423]) -Greedy action tensor([-0.1630, -0.9824, 0.4976, 0.9357]) tensor([0.1568, 0.0691, 0.3036, 0.4705]) -Greedy action tensor([ 1.2058, -1.3046, 0.0955, 0.3010]) tensor([0.5509, 0.0447, 0.1815, 0.2229]) -Greedy action tensor([-0.1632, -2.1652, 2.3313, 0.2007]) tensor([0.0681, 0.0092, 0.8248, 0.0980]) -Greedy action tensor([-1.1188, -1.4458, 0.4333, -0.4157]) tensor([0.1182, 0.0852, 0.5579, 0.2387]) -Greedy action tensor([-1.0579, -1.3675, 0.7029, -0.6541]) tensor([0.1105, 0.0811, 0.6429, 0.1655]) -Greedy action tensor([-0.6511, -0.2349, 0.7527, -1.2134]) tensor([0.1397, 0.2119, 0.5688, 0.0796]) -Greedy action tensor([ 0.1330, -1.7380, 0.3526, 0.6530]) tensor([0.2450, 0.0377, 0.3052, 0.4121]) -Greedy action tensor([ 1.3388, -1.1535, 1.0262, 1.0066]) tensor([0.3950, 0.0327, 0.2890, 0.2834]) -Greedy action tensor([ 0.6373, 0.5770, 0.6799, -1.0949]) tensor([0.3163, 0.2977, 0.3300, 0.0559]) -Greedy action tensor([-0.4787, 0.0804, 0.1385, 0.0755]) tensor([0.1576, 0.2757, 0.2922, 0.2744]) -Greedy action tensor([-0.0167, -1.3480, 1.4485, 0.7697]) tensor([0.1284, 0.0339, 0.5558, 0.2819]) -Greedy action tensor([ 1.3331, -0.9555, -0.5715, 1.3586]) tensor([0.4393, 0.0446, 0.0654, 0.4507]) -Greedy action tensor([ 0.5520, 0.7917, 0.5301, -0.1501]) tensor([0.2670, 0.3394, 0.2612, 0.1323]) -Greedy action tensor([-1.1808, -0.2380, -0.2800, -0.2539]) tensor([0.1169, 0.3001, 0.2877, 0.2953]) -Greedy action tensor([-0.2408, 0.0888, 0.2629, 0.4522]) tensor([0.1654, 0.2300, 0.2738, 0.3308]) -Greedy action tensor([ 0.7190, 0.0663, 0.1202, -0.4155]) tensor([0.4181, 0.2177, 0.2297, 0.1345]) -Greedy action tensor([-1.4586, -1.1870, -0.3783, -0.1115]) tensor([0.1098, 0.1441, 0.3235, 0.4225]) -Greedy action tensor([ 0.4392, -1.1960, -0.9197, 0.5415]) tensor([0.3907, 0.0762, 0.1004, 0.4328]) -Greedy action tensor([-0.3866, 0.2287, 0.3911, 0.7645]) tensor([0.1221, 0.2260, 0.2658, 0.3861]) -Greedy action tensor([ 1.2944, 1.0090, 0.3880, -0.5298]) tensor([0.4316, 0.3244, 0.1744, 0.0696]) -Greedy action tensor([ 0.3719, -0.5631, 0.2380, -0.8976]) tensor([0.3924, 0.1541, 0.3432, 0.1103]) -Greedy action tensor([ 1.3706, -0.2618, -0.4608, 0.3506]) tensor([0.5827, 0.1139, 0.0933, 0.2101]) -Greedy action tensor([ 0.7874, -1.1562, 0.2828, 0.8260]) tensor([0.3589, 0.0514, 0.2167, 0.3730]) -Greedy action tensor([ 0.0240, 1.0678, 1.1364, -0.5555]) tensor([0.1344, 0.3816, 0.4087, 0.0753]) -Greedy action tensor([1.5311, 0.4212, 0.5128, 1.3637]) tensor([0.3942, 0.1299, 0.1424, 0.3335]) -Greedy action tensor([ 0.2864, -0.5240, -0.3483, 0.3714]) tensor([0.3264, 0.1452, 0.1730, 0.3554]) -Greedy action tensor([ 0.5652, -0.0857, 0.1978, -0.3175]) tensor([0.3806, 0.1985, 0.2635, 0.1574]) -Greedy action tensor([-0.2557, -0.3614, -0.5044, 1.0041]) tensor([0.1612, 0.1450, 0.1257, 0.5681]) -Greedy action tensor([-1.8175, -0.4875, -0.3522, -0.0730]) tensor([0.0674, 0.2549, 0.2918, 0.3858]) -Greedy action tensor([0.0648, 0.0420, 1.0806, 0.8722]) tensor([0.1433, 0.1400, 0.3956, 0.3212]) -Greedy action tensor([-0.0505, -1.0873, -0.0572, -0.6660]) tensor([0.3462, 0.1228, 0.3439, 0.1871]) -Greedy action tensor([ 0.3269, -1.6287, 1.0239, -0.0541]) tensor([0.2609, 0.0369, 0.5239, 0.1783]) -Greedy action tensor([ 1.2488, -0.1566, 1.1183, -0.4338]) tensor([0.4331, 0.1062, 0.3801, 0.0805]) -Greedy action tensor([ 0.7445, 0.4767, 1.0213, -0.1093]) tensor([0.2849, 0.2180, 0.3758, 0.1213]) -Greedy action tensor([-1.6465, 0.1028, 0.2013, -0.5992]) tensor([0.0627, 0.3606, 0.3979, 0.1787]) -Greedy action tensor([-1.4239, -0.9723, 1.2913, -0.5198]) tensor([0.0496, 0.0780, 0.7498, 0.1226]) -Greedy action tensor([ 0.1932, -0.4487, -0.4256, 0.9687]) tensor([0.2360, 0.1242, 0.1271, 0.5126]) -Greedy action tensor([ 0.2834, -1.4471, -0.2046, 1.2480]) tensor([0.2265, 0.0401, 0.1390, 0.5943]) -Greedy action tensor([0.8030, 0.3338, 0.1027, 0.6646]) tensor([0.3341, 0.2090, 0.1659, 0.2910]) -Greedy action tensor([ 0.5448, -0.5480, -0.1537, 0.6268]) tensor([0.3427, 0.1149, 0.1704, 0.3720]) -Greedy action tensor([-0.1155, -0.3439, -0.0162, -0.0569]) tensor([0.2525, 0.2009, 0.2789, 0.2677]) -Greedy action tensor([ 0.2950, 0.2288, 0.3299, -0.2000]) tensor([0.2792, 0.2614, 0.2892, 0.1702]) -Greedy action tensor([ 0.8808, -0.0732, 1.3014, 1.2626]) tensor([0.2287, 0.0881, 0.3482, 0.3350]) -Greedy action tensor([-1.2721, -1.0423, 1.3155, -1.0492]) tensor([0.0595, 0.0749, 0.7913, 0.0744]) -Greedy action tensor([ 2.3432, -0.3808, 0.7542, 1.4639]) tensor([0.5935, 0.0389, 0.1212, 0.2464]) -Greedy action tensor([-1.7403, -0.5715, 0.7061, 0.0592]) tensor([0.0459, 0.1475, 0.5294, 0.2772]) -Greedy action tensor([-1.9028, -0.3923, 0.6498, -0.1514]) tensor([0.0414, 0.1877, 0.5321, 0.2388]) -Greedy action tensor([-1.8978, -0.2296, 0.6140, -0.1671]) tensor([0.0412, 0.2184, 0.5078, 0.2325]) -Greedy action tensor([-1.5585, -0.5708, 0.6429, 0.1925]) tensor([0.0541, 0.1453, 0.4890, 0.3117]) -Greedy action tensor([-1.2609, -0.6214, 0.5514, 0.1549]) tensor([0.0761, 0.1443, 0.4661, 0.3135]) -Greedy action tensor([-1.3278, -0.3202, 0.3215, 0.1539]) tensor([0.0749, 0.2053, 0.3900, 0.3298]) -Greedy action tensor([-1.7567, -0.1909, 0.5419, -0.0609]) tensor([0.0472, 0.2258, 0.4699, 0.2572]) -Greedy action tensor([-1.9290, -0.4484, 0.6547, -0.1705]) tensor([0.0409, 0.1798, 0.5419, 0.2374]) -Greedy action tensor([-1.8429, -0.1737, 0.5945, -0.0715]) tensor([0.0423, 0.2246, 0.4843, 0.2488]) -Greedy action tensor([-0.6341, -0.3459, 0.9722, 1.5334]) tensor([0.0623, 0.0831, 0.3105, 0.5442]) -Greedy action tensor([-1.7937, -0.2063, 0.5534, -0.0930]) tensor([0.0458, 0.2241, 0.4791, 0.2510]) -Greedy action tensor([-1.8541, -0.3407, 0.6424, -0.0845]) tensor([0.0425, 0.1929, 0.5155, 0.2492]) -Greedy action tensor([-1.2464, -0.4897, 0.8521, 0.9585]) tensor([0.0491, 0.1047, 0.4006, 0.4456]) -Greedy action tensor([-1.8175, -0.4722, 0.6033, -0.1336]) tensor([0.0466, 0.1787, 0.5239, 0.2508]) -Greedy action tensor([-0.2477, -0.1185, 0.1472, 0.1514]) tensor([0.1956, 0.2226, 0.2903, 0.2915]) -Greedy action tensor([-1.7573, -0.4916, 0.7239, -0.2350]) tensor([0.0474, 0.1682, 0.5671, 0.2173]) -Greedy action tensor([-1.8914, -0.4373, 0.6317, -0.1553]) tensor([0.0427, 0.1828, 0.5323, 0.2423]) -Greedy action tensor([-1.5534, -0.4957, 0.5143, 0.1514]) tensor([0.0579, 0.1666, 0.4574, 0.3182]) -Greedy action tensor([-1.9015, -0.4475, 0.6424, -0.1546]) tensor([0.0421, 0.1802, 0.5361, 0.2416]) -Greedy action tensor([-1.3820, -0.5614, 0.4382, -0.0203]) tensor([0.0749, 0.1702, 0.4625, 0.2924]) -Greedy action tensor([-1.7726, -0.3130, 0.5840, -0.0258]) tensor([0.0463, 0.1993, 0.4888, 0.2656]) -Greedy action tensor([-1.6131, -0.5264, 0.4848, 0.0565]) tensor([0.0574, 0.1701, 0.4677, 0.3048]) -Greedy action tensor([-1.9132, -0.4482, 0.6494, -0.1598]) tensor([0.0415, 0.1798, 0.5388, 0.2399]) -Greedy action tensor([-1.8967, -0.3622, 0.6344, -0.1464]) tensor([0.0417, 0.1936, 0.5245, 0.2402]) -Greedy action tensor([-1.9288, -0.4417, 0.6589, -0.1716]) tensor([0.0408, 0.1804, 0.5424, 0.2364]) -Greedy action tensor([-1.9168, -0.4059, 0.6507, -0.1642]) tensor([0.0411, 0.1862, 0.5356, 0.2371]) -Greedy action tensor([-0.8691, -0.1523, 0.8053, 1.0604]) tensor([0.0655, 0.1341, 0.3494, 0.4510]) -Greedy action tensor([-1.9162, -0.4647, 0.6926, -0.1312]) tensor([0.0403, 0.1721, 0.5474, 0.2402]) -Greedy action tensor([-1.6736, -0.5102, 0.5428, 0.0738]) tensor([0.0523, 0.1675, 0.4800, 0.3003]) -Greedy action tensor([-1.8142, -0.4520, 0.6713, 0.0210]) tensor([0.0431, 0.1685, 0.5180, 0.2704]) -Greedy action tensor([-1.8239, -0.4517, 0.6016, -0.1218]) tensor([0.0460, 0.1814, 0.5202, 0.2523]) -Greedy action tensor([-1.4021, -0.5599, 0.3679, 0.1535]) tensor([0.0718, 0.1667, 0.4215, 0.3401]) -Greedy action tensor([-0.6637, -0.5633, 0.3474, 0.0929]) tensor([0.1432, 0.1583, 0.3935, 0.3051]) -Greedy action tensor([-1.4424, -0.1019, 0.6073, 0.3381]) tensor([0.0540, 0.2063, 0.4193, 0.3204]) -Greedy action tensor([-1.7909, -0.4294, 0.6467, 0.0166]) tensor([0.0446, 0.1739, 0.5100, 0.2716]) -Greedy action tensor([-1.6206, -0.4398, 1.0414, 0.8051]) tensor([0.0335, 0.1090, 0.4792, 0.3784]) -Greedy action tensor([-1.9106, -0.4415, 0.6515, -0.1604]) tensor([0.0416, 0.1806, 0.5387, 0.2392]) -Greedy action tensor([-1.6017, -0.4712, 0.4962, -0.0344]) tensor([0.0587, 0.1818, 0.4782, 0.2813]) -Greedy action tensor([-1.8632, -0.3053, 0.6072, -0.1204]) tensor([0.0429, 0.2039, 0.5078, 0.2453]) -Greedy action tensor([-1.8630, -0.4597, 0.6243, -0.1358]) tensor([0.0440, 0.1791, 0.5294, 0.2476]) -Greedy action tensor([-1.7756, -0.4498, 0.6326, -0.0154]) tensor([0.0461, 0.1736, 0.5123, 0.2680]) -Greedy action tensor([-1.4961, -0.5353, 0.4193, 0.1012]) tensor([0.0652, 0.1704, 0.4425, 0.3220]) -Greedy action tensor([-1.9109, -0.4590, 0.6470, -0.1663]) tensor([0.0418, 0.1787, 0.5400, 0.2394]) -Greedy action tensor([-1.6761, -0.3934, 0.5478, -0.0173]) tensor([0.0523, 0.1888, 0.4839, 0.2750]) -Greedy action tensor([-1.9020, -0.4665, 0.6761, -0.1297]) tensor([0.0412, 0.1732, 0.5430, 0.2426]) -Greedy action tensor([-1.8476, -0.2410, 0.5865, -0.1076]) tensor([0.0433, 0.2159, 0.4940, 0.2468]) -Greedy action tensor([-1.6513, -0.7205, 0.4807, 0.1028]) tensor([0.0564, 0.1429, 0.4751, 0.3256]) -Greedy action tensor([-1.3492, -0.6086, 1.0556, 1.0450]) tensor([0.0398, 0.0834, 0.4407, 0.4361]) -Greedy action tensor([-1.8957, -0.4691, 0.6425, -0.1505]) tensor([0.0425, 0.1768, 0.5375, 0.2432]) -Greedy action tensor([-1.7453, -0.3760, 0.6559, -0.0723]) tensor([0.0470, 0.1846, 0.5182, 0.2502]) -Greedy action tensor([-1.9007, -0.4347, 0.6414, -0.1588]) tensor([0.0421, 0.1824, 0.5351, 0.2404]) -Greedy action tensor([-1.6855, -0.4521, 0.5475, -0.0262]) tensor([0.0526, 0.1805, 0.4905, 0.2764]) -Greedy action tensor([-1.0534, -0.4902, 0.3511, 0.5896]) tensor([0.0833, 0.1464, 0.3394, 0.4309]) -Greedy action tensor([-1.7275, -0.4879, 0.5579, -0.0580]) tensor([0.0510, 0.1763, 0.5017, 0.2710]) -Greedy action tensor([-1.8179, -0.4441, 0.5979, -0.1178]) tensor([0.0462, 0.1827, 0.5179, 0.2532]) -Greedy action tensor([-1.8490, -0.4612, 0.6560, -0.0652]) tensor([0.0431, 0.1727, 0.5277, 0.2566]) -Greedy action tensor([-1.9043, -0.3300, 0.6329, -0.1465]) tensor([0.0412, 0.1989, 0.5210, 0.2390]) -Greedy action tensor([-1.7302, -0.4716, 0.5937, 0.0058]) tensor([0.0490, 0.1725, 0.5005, 0.2780]) -Greedy action tensor([-1.8630, -0.4708, 0.6262, -0.1459]) tensor([0.0442, 0.1777, 0.5322, 0.2459]) -Greedy action tensor([-1.8793, -0.4689, 0.6386, -0.1330]) tensor([0.0430, 0.1764, 0.5338, 0.2468]) -Greedy action tensor([-1.2877, -0.4596, 0.7107, -0.5652]) tensor([0.0786, 0.1799, 0.5797, 0.1618]) -Greedy action tensor([-1.0430, -0.0988, 1.0068, 1.0340]) tensor([0.0518, 0.1331, 0.4020, 0.4131]) -Greedy action tensor([-1.2801, -0.3741, 0.3741, 0.0519]) tensor([0.0801, 0.1981, 0.4186, 0.3033]) -Greedy action tensor([-1.5711, -0.5498, 0.4902, -0.0527]) tensor([0.0617, 0.1714, 0.4850, 0.2818]) -Greedy action tensor([-1.8557, -0.4745, 0.6683, -0.0524]) tensor([0.0425, 0.1692, 0.5304, 0.2580]) -Greedy action tensor([-1.8473, -0.4131, 0.6098, -0.1328]) tensor([0.0446, 0.1872, 0.5205, 0.2477]) -Greedy action tensor([-1.5147, -0.2584, 0.6419, 0.1627]) tensor([0.0540, 0.1898, 0.4670, 0.2892]) -Greedy action tensor([-1.7417, -0.4250, 0.5679, -0.0974]) tensor([0.0501, 0.1867, 0.5041, 0.2591]) -Greedy action tensor([-1.9177, -0.4437, 0.6521, -0.1674]) tensor([0.0413, 0.1806, 0.5401, 0.2380]) -Greedy action tensor([-0.4855, -0.3383, 0.1687, 0.0462]) tensor([0.1729, 0.2003, 0.3326, 0.2942]) -Greedy action tensor([-1.4574, -0.5571, 0.9135, 0.7650]) tensor([0.0427, 0.1052, 0.4576, 0.3945]) -Greedy action tensor([-1.7432, -0.3981, 0.5452, -0.0561]) tensor([0.0497, 0.1910, 0.4905, 0.2688]) -Greedy action tensor([-1.8090, -0.3235, 0.5802, -0.0946]) tensor([0.0457, 0.2019, 0.4985, 0.2539]) -Greedy action tensor([-1.5336, -0.4480, 0.7461, 0.4701]) tensor([0.0473, 0.1400, 0.4621, 0.3506]) -Greedy action tensor([-1.5189, -0.5012, 0.4516, 0.0716]) tensor([0.0631, 0.1746, 0.4527, 0.3096]) -Greedy action tensor([-1.7013, -0.4223, 0.6619, 0.2315]) tensor([0.0452, 0.1624, 0.4802, 0.3122]) -Greedy action tensor([-1.7179, -0.4619, 0.5629, -0.0455]) tensor([0.0510, 0.1790, 0.4987, 0.2714]) -Greedy action tensor([-1.2132, -0.4304, 0.7123, 1.1042]) tensor([0.0495, 0.1083, 0.3396, 0.5026]) -Greedy action tensor([-1.9194, -0.4125, 0.6509, -0.1651]) tensor([0.0410, 0.1852, 0.5365, 0.2372]) -Greedy action tensor([-1.9255, -0.4359, 0.6539, -0.1728]) tensor([0.0410, 0.1818, 0.5407, 0.2365]) -Greedy action tensor([-1.3940, -0.5272, 0.6436, -0.3543]) tensor([0.0721, 0.1714, 0.5527, 0.2038]) -Greedy action tensor([ 0.7494, -0.5036, -0.0482, -0.4234]) tensor([0.4889, 0.1396, 0.2202, 0.1513]) -Greedy action tensor([ 0.4640, 0.2283, -0.1500, 0.1955]) tensor([0.3230, 0.2552, 0.1748, 0.2470]) -Greedy action tensor([ 0.7507, 0.2432, -0.1775, -0.1164]) tensor([0.4137, 0.2490, 0.1635, 0.1738]) -Greedy action tensor([ 0.3598, -0.0596, -0.0832, -0.2259]) tensor([0.3501, 0.2302, 0.2248, 0.1949]) -Greedy action tensor([ 0.9810, -0.1913, 0.0439, -0.4161]) tensor([0.5131, 0.1589, 0.2010, 0.1269]) -Greedy action tensor([ 0.8678, -0.5734, -0.1738, -0.4503]) tensor([0.5385, 0.1274, 0.1900, 0.1441]) -Greedy action tensor([ 0.6124, -0.4257, -0.1024, -0.3931]) tensor([0.4526, 0.1603, 0.2215, 0.1656]) -Greedy action tensor([ 0.7991, -0.3701, -0.0622, -0.4198]) tensor([0.4929, 0.1531, 0.2083, 0.1457]) -Greedy action tensor([ 0.5962, -0.4872, 0.0762, -0.0847]) tensor([0.4100, 0.1388, 0.2438, 0.2075]) -Greedy action tensor([ 0.8732, -0.3560, 0.2170, -0.5712]) tensor([0.4885, 0.1429, 0.2534, 0.1152]) -Greedy action tensor([ 0.4631, 0.0146, -0.1007, -0.1333]) tensor([0.3625, 0.2315, 0.2063, 0.1997]) -Greedy action tensor([ 0.4386, -0.2576, -0.1535, -0.4650]) tensor([0.4070, 0.2029, 0.2252, 0.1649]) -Greedy action tensor([ 0.9997, -0.5449, -0.2650, -0.7579]) tensor([0.5994, 0.1279, 0.1692, 0.1034]) -Greedy action tensor([ 0.8360, -0.4907, -0.1597, -0.3535]) tensor([0.5157, 0.1368, 0.1905, 0.1570]) -Greedy action tensor([ 1.0943, -0.5675, 0.0012, -0.3586]) tensor([0.5686, 0.1079, 0.1906, 0.1330]) -Greedy action tensor([ 0.5182, -0.2743, 0.0166, -0.4982]) tensor([0.4132, 0.1871, 0.2502, 0.1495]) -Greedy action tensor([ 0.4991, -0.5666, 0.0184, -0.3647]) tensor([0.4194, 0.1445, 0.2593, 0.1768]) -Greedy action tensor([ 0.2805, -0.1573, 0.0266, -0.2904]) tensor([0.3349, 0.2161, 0.2598, 0.1892]) -Greedy action tensor([ 0.5009, -0.4696, -0.0665, -0.1535]) tensor([0.4056, 0.1537, 0.2300, 0.2108]) -Greedy action tensor([ 0.4603, -0.5027, -0.0566, -0.0899]) tensor([0.3914, 0.1494, 0.2334, 0.2258]) -Greedy action tensor([ 0.9012, -0.7772, 0.0533, -0.5338]) tensor([0.5396, 0.1007, 0.2311, 0.1285]) -Greedy action tensor([ 0.5924, -0.2403, -0.0079, -0.2384]) tensor([0.4134, 0.1798, 0.2268, 0.1801]) -Greedy action tensor([ 0.3536, -0.0225, -0.0282, -0.2455]) tensor([0.3426, 0.2352, 0.2339, 0.1882]) -Greedy action tensor([ 0.9820, -0.6327, -0.0455, -0.5407]) tensor([0.5634, 0.1121, 0.2016, 0.1229]) -Greedy action tensor([ 0.6529, -0.6158, -0.0218, -0.3463]) tensor([0.4632, 0.1303, 0.2359, 0.1706]) -Greedy action tensor([ 0.7144, -0.2796, -0.0905, -0.1939]) tensor([0.4504, 0.1667, 0.2014, 0.1816]) -Greedy action tensor([ 0.8365, -0.5146, -0.1407, -0.2775]) tensor([0.5093, 0.1319, 0.1917, 0.1672]) -Greedy action tensor([ 0.4561, -0.1640, 0.0818, -0.4310]) tensor([0.3791, 0.2039, 0.2608, 0.1561]) -Greedy action tensor([ 0.6995, -0.3611, -0.0643, -0.5202]) tensor([0.4745, 0.1643, 0.2211, 0.1401]) -Greedy action tensor([ 1.1092, -0.9489, 0.0369, -0.5439]) tensor([0.6019, 0.0769, 0.2060, 0.1152]) -Greedy action tensor([ 0.4621, -0.2095, -0.0088, -0.5728]) tensor([0.4015, 0.2051, 0.2507, 0.1426]) -Greedy action tensor([ 0.9961, -0.6241, -0.0678, -0.4740]) tensor([0.5640, 0.1116, 0.1947, 0.1297]) -Greedy action tensor([ 0.7028, -0.1933, -0.1124, -0.7288]) tensor([0.4786, 0.1953, 0.2118, 0.1143]) -Greedy action tensor([ 0.8496, -0.7585, -0.0167, -0.3778]) tensor([0.5225, 0.1046, 0.2197, 0.1531]) -Greedy action tensor([ 0.4474, -0.4288, -0.0896, -0.3437]) tensor([0.4075, 0.1697, 0.2382, 0.1847]) -Greedy action tensor([ 0.4121, -0.2553, 0.0181, -0.3350]) tensor([0.3758, 0.1928, 0.2534, 0.1780]) -Greedy action tensor([ 0.3567, -0.0570, -0.0473, 0.0614]) tensor([0.3254, 0.2152, 0.2173, 0.2422]) -Greedy action tensor([ 0.5058, -0.0766, -0.0553, -0.0736]) tensor([0.3718, 0.2077, 0.2122, 0.2083]) -Greedy action tensor([ 0.3463, -0.2786, 0.0803, -0.5355]) tensor([0.3682, 0.1971, 0.2822, 0.1525]) -Greedy action tensor([ 0.8447, 0.0618, -0.0684, -0.3254]) tensor([0.4611, 0.2108, 0.1850, 0.1431]) -Greedy action tensor([ 0.4697, -0.0778, -0.2088, -0.2743]) tensor([0.3905, 0.2258, 0.1981, 0.1856]) -Greedy action tensor([ 0.9530, -0.7786, -0.3130, -0.8427]) tensor([0.6154, 0.1089, 0.1735, 0.1022]) -Greedy action tensor([ 0.1174, 0.6143, -0.0733, 0.1271]) tensor([0.2232, 0.3669, 0.1845, 0.2254]) -Greedy action tensor([ 0.2054, 0.0465, -0.0851, -0.1742]) tensor([0.3044, 0.2597, 0.2277, 0.2083]) -Greedy action tensor([ 0.8524, -0.5348, 0.0415, -0.2916]) tensor([0.4968, 0.1241, 0.2208, 0.1583]) -Greedy action tensor([ 0.3658, -0.0785, -0.0575, -0.2868]) tensor([0.3550, 0.2277, 0.2325, 0.1849]) -Greedy action tensor([ 0.4344, -0.1519, 0.0074, -0.2617]) tensor([0.3694, 0.2055, 0.2410, 0.1841]) -Greedy action tensor([ 0.6062, -0.4490, -0.0656, -0.1995]) tensor([0.4337, 0.1510, 0.2215, 0.1938]) -Greedy action tensor([ 0.6695, -0.5138, -0.0701, -0.4151]) tensor([0.4713, 0.1444, 0.2250, 0.1593]) -Greedy action tensor([ 0.8524, -0.1725, -0.0239, -0.2186]) tensor([0.4722, 0.1694, 0.1966, 0.1618]) -Greedy action tensor([ 0.7381, -0.7251, 0.0043, -0.4576]) tensor([0.4965, 0.1149, 0.2384, 0.1502]) -Greedy action tensor([ 0.9758, -1.3209, -0.0836, -0.9379]) tensor([0.6270, 0.0631, 0.2174, 0.0925]) -Greedy action tensor([ 0.6773, -0.6725, -0.1389, -0.2111]) tensor([0.4733, 0.1227, 0.2093, 0.1947]) -Greedy action tensor([ 0.8433, -0.5683, 0.0406, -0.3980]) tensor([0.5048, 0.1231, 0.2262, 0.1459]) -Greedy action tensor([ 0.3506, -0.1016, -0.0893, -0.3583]) tensor([0.3607, 0.2295, 0.2323, 0.1775]) -Greedy action tensor([ 0.6659, -0.6751, -0.1971, -0.4574]) tensor([0.4978, 0.1302, 0.2100, 0.1619]) -Greedy action tensor([ 0.5222, -0.3140, -0.0564, -0.1998]) tensor([0.4033, 0.1748, 0.2261, 0.1959]) -Greedy action tensor([ 0.5344, -0.3995, -0.1613, -0.4733]) tensor([0.4431, 0.1741, 0.2210, 0.1618]) -Greedy action tensor([ 1.0016, -0.2819, -0.0730, -0.3311]) tensor([0.5313, 0.1472, 0.1814, 0.1401]) -Greedy action tensor([ 0.7720, -0.3494, 0.1365, -0.4939]) tensor([0.4679, 0.1524, 0.2478, 0.1319]) -Greedy action tensor([ 0.3784, -0.2338, -0.1822, -0.2147]) tensor([0.3751, 0.2034, 0.2141, 0.2073]) -Greedy action tensor([ 0.4431, -0.2464, -0.1731, -0.5254]) tensor([0.4130, 0.2072, 0.2230, 0.1568]) -Greedy action tensor([ 0.5636, 0.0782, 0.0388, -0.4227]) tensor([0.3876, 0.2385, 0.2293, 0.1446]) -Greedy action tensor([ 0.8353, -0.6933, -0.0203, -0.3965]) tensor([0.5172, 0.1121, 0.2198, 0.1509]) -Greedy action tensor([ 0.1931, -0.0901, 0.0038, -0.1468]) tensor([0.3037, 0.2288, 0.2513, 0.2162]) -Greedy action tensor([ 0.8556, -0.4547, 0.0377, -0.4118]) tensor([0.5018, 0.1354, 0.2215, 0.1413]) -Greedy action tensor([ 0.8946, -0.4774, -0.1167, -0.3573]) tensor([0.5254, 0.1332, 0.1911, 0.1502]) -Greedy action tensor([ 0.3230, -0.2720, 0.1890, -0.2450]) tensor([0.3341, 0.1843, 0.2922, 0.1893]) -Greedy action tensor([ 0.5924, -0.4306, 0.1365, -0.5091]) tensor([0.4300, 0.1546, 0.2725, 0.1429]) -Greedy action tensor([ 0.6061, -0.4352, 0.1201, -0.3712]) tensor([0.4266, 0.1506, 0.2624, 0.1605]) -Greedy action tensor([ 0.8085, -0.5155, -0.0417, -0.3134]) tensor([0.4953, 0.1318, 0.2117, 0.1613]) -Greedy action tensor([ 0.8910, -0.7654, -0.0778, -0.3428]) tensor([0.5372, 0.1025, 0.2039, 0.1564]) -Greedy action tensor([ 0.7543, -0.4402, -0.0645, -0.4209]) tensor([0.4872, 0.1475, 0.2148, 0.1504]) -Greedy action tensor([ 0.5502, -0.2535, 0.0707, -0.2896]) tensor([0.4002, 0.1792, 0.2478, 0.1728]) -Greedy action tensor([ 0.7665, -0.7391, -0.1082, -0.4063]) tensor([0.5132, 0.1139, 0.2140, 0.1589]) -Greedy action tensor([ 0.4926, 0.0757, -0.0733, 0.0715]) tensor([0.3468, 0.2286, 0.1970, 0.2276]) -Greedy action tensor([ 0.4987, -0.4384, 0.0357, -0.2117]) tensor([0.3980, 0.1559, 0.2505, 0.1956]) -Greedy action tensor([ 0.4882, -0.3587, 0.0123, -0.4989]) tensor([0.4128, 0.1770, 0.2565, 0.1538]) -Greedy action tensor([ 0.5416, -0.2788, -0.0811, -0.2153]) tensor([0.4088, 0.1800, 0.2194, 0.1918]) -Greedy action tensor([ 0.1074, 0.0642, -0.0287, -0.3196]) tensor([0.2871, 0.2750, 0.2506, 0.1873]) -Greedy action tensor([ 0.1801, -0.0460, -0.1260, -0.3010]) tensor([0.3173, 0.2531, 0.2336, 0.1961]) -Greedy action tensor([ 1.1085, -0.1565, -0.6911, 0.5234]) tensor([0.4989, 0.1408, 0.0825, 0.2779]) -Greedy action tensor([ 1.5429, -0.7556, -0.4747, 0.3190]) tensor([0.6547, 0.0657, 0.0871, 0.1925]) -Greedy action tensor([ 1.2961, -0.1893, -0.3811, 0.1197]) tensor([0.5808, 0.1315, 0.1086, 0.1791]) -Greedy action tensor([ 0.7648, 0.0315, -0.6540, 0.1917]) tensor([0.4374, 0.2101, 0.1059, 0.2466]) -Greedy action tensor([ 1.7425, -0.1771, -0.3186, 0.8460]) tensor([0.5945, 0.0872, 0.0757, 0.2426]) -Greedy action tensor([ 1.1052, -0.2227, -0.5143, 0.2502]) tensor([0.5296, 0.1404, 0.1049, 0.2252]) -Greedy action tensor([ 2.0094, -0.2725, -0.5698, 0.1968]) tensor([0.7456, 0.0761, 0.0565, 0.1217]) -Greedy action tensor([ 0.8323, 0.2064, -0.2514, -0.3419]) tensor([0.4583, 0.2451, 0.1550, 0.1416]) -Greedy action tensor([ 1.7984, -0.0601, -0.3570, 0.5424]) tensor([0.6424, 0.1002, 0.0744, 0.1830]) -Greedy action tensor([ 1.1658, -0.5806, -0.3257, 0.2866]) tensor([0.5511, 0.0961, 0.1240, 0.2288]) -Greedy action tensor([ 1.1069, -0.3312, -0.4423, 0.2985]) tensor([0.5276, 0.1252, 0.1121, 0.2351]) -Greedy action tensor([ 1.2515, -0.6802, -0.0365, 0.0070]) tensor([0.5852, 0.0848, 0.1614, 0.1686]) -Greedy action tensor([ 1.2642, -0.1960, -0.4460, 0.5517]) tensor([0.5254, 0.1220, 0.0950, 0.2577]) -Greedy action tensor([ 1.1174, -0.7424, -0.2753, 0.3610]) tensor([0.5338, 0.0831, 0.1326, 0.2505]) -Greedy action tensor([ 1.4466, -0.3902, -0.4005, 0.5598]) tensor([0.5784, 0.0921, 0.0912, 0.2383]) -Greedy action tensor([ 1.6740, -0.2082, -0.0257, 0.2790]) tensor([0.6318, 0.0962, 0.1154, 0.1566]) -Greedy action tensor([ 1.4388, -0.3624, -0.1944, 0.2211]) tensor([0.6037, 0.0997, 0.1179, 0.1787]) -Greedy action tensor([ 1.2529, -0.3127, -0.6098, 0.2474]) tensor([0.5780, 0.1208, 0.0897, 0.2115]) -Greedy action tensor([ 1.7025, -0.6204, -0.3473, 0.3656]) tensor([0.6714, 0.0658, 0.0865, 0.1763]) -Greedy action tensor([ 1.1731, -0.3867, -0.4509, 0.2121]) tensor([0.5587, 0.1174, 0.1101, 0.2137]) -Greedy action tensor([ 1.1421, -0.0987, -0.3524, 0.0061]) tensor([0.5451, 0.1576, 0.1223, 0.1750]) -Greedy action tensor([ 1.3724, -0.3969, -0.4195, -0.0514]) tensor([0.6338, 0.1080, 0.1056, 0.1526]) -Greedy action tensor([ 1.4568, 0.1865, -0.2444, 0.3034]) tensor([0.5622, 0.1578, 0.1026, 0.1774]) -Greedy action tensor([ 1.1528, -0.1326, -0.5745, -0.2389]) tensor([0.5872, 0.1624, 0.1044, 0.1460]) -Greedy action tensor([ 1.6362, -0.4936, -0.3366, 0.0839]) tensor([0.6804, 0.0809, 0.0946, 0.1441]) -Greedy action tensor([ 0.9873, -0.3371, -0.0306, 0.3682]) tensor([0.4617, 0.1228, 0.1668, 0.2486]) -Greedy action tensor([1.5750, 0.0501, 0.2033, 0.0748]) tensor([0.5902, 0.1284, 0.1497, 0.1317]) -Greedy action tensor([ 1.4170, -0.0636, -0.5886, 0.2541]) tensor([0.5971, 0.1359, 0.0804, 0.1867]) -Greedy action tensor([ 1.6139, -0.7688, -0.1478, 0.8258]) tensor([0.5818, 0.0537, 0.0999, 0.2646]) -Greedy action tensor([ 1.0298, -0.5192, -0.4304, 0.6085]) tensor([0.4760, 0.1011, 0.1105, 0.3123]) -Greedy action tensor([ 0.8439, -0.4691, -0.2589, 0.4236]) tensor([0.4429, 0.1192, 0.1470, 0.2909]) -Greedy action tensor([ 1.3641, -0.5841, -0.2943, 0.7934]) tensor([0.5268, 0.0751, 0.1003, 0.2977]) -Greedy action tensor([ 1.5119, -0.4233, -0.5076, 0.6673]) tensor([0.5859, 0.0846, 0.0778, 0.2518]) -Greedy action tensor([ 1.4400, -0.2796, -0.4144, 0.3589]) tensor([0.5971, 0.1070, 0.0935, 0.2025]) -Greedy action tensor([ 0.7886, -0.1481, -0.3254, 0.0850]) tensor([0.4515, 0.1769, 0.1482, 0.2234]) -Greedy action tensor([ 2.6025, -0.8491, -0.2329, 0.8685]) tensor([0.7893, 0.0250, 0.0463, 0.1394]) -Greedy action tensor([ 0.7323, 0.1050, -0.2743, -0.1042]) tensor([0.4287, 0.2289, 0.1567, 0.1857]) -Greedy action tensor([ 1.4872, -0.3088, -0.2384, 0.3640]) tensor([0.5991, 0.0994, 0.1067, 0.1948]) -Greedy action tensor([ 1.6457, -0.1738, -0.5990, 0.9876]) tensor([0.5599, 0.0908, 0.0593, 0.2900]) -Greedy action tensor([ 1.7893, -0.9121, 0.0094, -0.0525]) tensor([0.7172, 0.0481, 0.1210, 0.1137]) -Greedy action tensor([ 0.4635, -0.4244, 0.0253, 0.0952]) tensor([0.3638, 0.1497, 0.2347, 0.2517]) -Greedy action tensor([ 1.5758, 0.0723, -0.6476, 0.7115]) tensor([0.5708, 0.1269, 0.0618, 0.2405]) -Greedy action tensor([ 1.5345, -0.5438, -0.5477, 0.3431]) tensor([0.6437, 0.0806, 0.0802, 0.1956]) -Greedy action tensor([ 1.4308, -0.6610, -0.2325, 0.3818]) tensor([0.6012, 0.0742, 0.1139, 0.2106]) -Greedy action tensor([ 1.0612, -0.2870, -0.4919, 0.7721]) tensor([0.4504, 0.1170, 0.0953, 0.3373]) -Greedy action tensor([ 1.8870, -0.5308, -0.4387, 0.8544]) tensor([0.6481, 0.0578, 0.0633, 0.2308]) -Greedy action tensor([ 1.3469, -0.3805, -0.1039, 0.4082]) tensor([0.5546, 0.0986, 0.1300, 0.2169]) -Greedy action tensor([ 1.3361, -0.4250, -0.1375, 0.3393]) tensor([0.5650, 0.0971, 0.1294, 0.2085]) -Greedy action tensor([ 1.7510, -0.8094, -0.5411, 0.7176]) tensor([0.6518, 0.0504, 0.0659, 0.2319]) -Greedy action tensor([ 1.3696, -0.5162, -0.3313, 0.2860]) tensor([0.5979, 0.0907, 0.1091, 0.2023]) -Greedy action tensor([ 1.4771, -0.2293, -0.7280, 0.2454]) tensor([0.6315, 0.1146, 0.0696, 0.1843]) -Greedy action tensor([ 2.7711, -0.2115, 0.0989, 0.3235]) tensor([0.8290, 0.0420, 0.0573, 0.0717]) -Greedy action tensor([ 1.2853, -0.4130, -0.3219, 0.3181]) tensor([0.5670, 0.1038, 0.1137, 0.2155]) -Greedy action tensor([ 1.1782, -0.6065, -0.0608, 0.0600]) tensor([0.5604, 0.0941, 0.1623, 0.1832]) -Greedy action tensor([ 0.9169, -0.2201, -0.2160, 0.0671]) tensor([0.4830, 0.1549, 0.1556, 0.2065]) -Greedy action tensor([ 1.2260, -0.4150, -0.1228, 0.0582]) tensor([0.5668, 0.1098, 0.1471, 0.1763]) -Greedy action tensor([ 1.3932, -0.5639, -0.4560, 0.2075]) tensor([0.6234, 0.0881, 0.0981, 0.1905]) -Greedy action tensor([ 2.3165, -0.6184, -0.3020, 0.5421]) tensor([0.7718, 0.0410, 0.0563, 0.1309]) -Greedy action tensor([ 1.2999, -0.2148, -0.6123, 0.4485]) tensor([0.5573, 0.1225, 0.0823, 0.2379]) -Greedy action tensor([ 1.3247, -0.0456, -0.6876, 0.4895]) tensor([0.5490, 0.1395, 0.0734, 0.2381]) -Greedy action tensor([ 1.1954, -0.2131, -0.4622, 0.2952]) tensor([0.5430, 0.1328, 0.1035, 0.2207]) -Greedy action tensor([ 0.4711, -0.2707, -0.1510, 0.2241]) tensor([0.3579, 0.1704, 0.1921, 0.2796]) -Greedy action tensor([ 0.5908, -0.2583, -0.0211, 0.1208]) tensor([0.3853, 0.1648, 0.2090, 0.2408]) -Greedy action tensor([ 2.3722, -0.6959, -0.5881, 0.7585]) tensor([0.7707, 0.0358, 0.0399, 0.1535]) -Greedy action tensor([ 1.4207, -0.5096, -0.3284, 0.2442]) tensor([0.6145, 0.0892, 0.1069, 0.1895]) -Greedy action tensor([ 1.4023, -0.0721, -0.2642, 0.3988]) tensor([0.5604, 0.1283, 0.1059, 0.2054]) -Greedy action tensor([ 0.8405, -0.3268, -0.2221, 0.0062]) tensor([0.4782, 0.1488, 0.1653, 0.2077]) -Greedy action tensor([ 1.7474, -0.6537, -0.3218, 0.5479]) tensor([0.6586, 0.0597, 0.0832, 0.1985]) -Greedy action tensor([ 0.5087, -0.2430, -0.3504, -0.1015]) tensor([0.4101, 0.1934, 0.1737, 0.2228]) -Greedy action tensor([ 1.0894, -0.3611, -0.0080, 0.1216]) tensor([0.5133, 0.1203, 0.1713, 0.1950]) -Greedy action tensor([ 1.9568, -0.7350, -0.3282, 1.0862]) tensor([0.6296, 0.0427, 0.0641, 0.2636]) -Greedy action tensor([ 1.4741, -0.4177, -0.4663, 0.2820]) tensor([0.6258, 0.0944, 0.0899, 0.1900]) -Greedy action tensor([ 0.8814, -0.4691, -0.3205, 0.2390]) tensor([0.4794, 0.1242, 0.1441, 0.2522]) -Greedy action tensor([ 1.8656, -0.2888, -0.5726, 0.2247]) tensor([0.7158, 0.0830, 0.0625, 0.1387]) -Greedy action tensor([ 1.5355, -0.6127, -0.2798, 0.5561]) tensor([0.6042, 0.0705, 0.0984, 0.2269]) -Greedy action tensor([ 1.8337, -0.8221, -0.2521, 0.1333]) tensor([0.7262, 0.0510, 0.0902, 0.1326]) -Greedy action tensor([ 1.0997, -0.4063, -0.1217, 0.1628]) tensor([0.5240, 0.1162, 0.1545, 0.2053]) -Greedy action tensor([ 1.4191, -0.5956, -0.1155, 0.3444]) tensor([0.5916, 0.0789, 0.1275, 0.2020]) -Greedy action tensor([ 1.7647, -0.2612, -0.4944, 0.4286]) tensor([0.6670, 0.0880, 0.0697, 0.1753]) -Greedy action tensor([ 0.9590, 0.0929, -0.6332, -0.0266]) tensor([0.5007, 0.2106, 0.1019, 0.1869]) -Greedy action tensor([ 1.7671, -0.5644, -0.3306, 0.5779]) tensor([0.6560, 0.0637, 0.0805, 0.1997]) -Greedy action tensor([ 0.0067, -0.9899, -0.7411, 0.9107]) tensor([0.2319, 0.0856, 0.1098, 0.5727]) -Greedy action tensor([ 0.4459, 0.1935, -0.1115, 0.7617]) tensor([0.2687, 0.2088, 0.1539, 0.3685]) -Greedy action tensor([ 0.9720, 0.1080, -0.3735, -0.7331]) tensor([0.5366, 0.2261, 0.1397, 0.0975]) -Greedy action tensor([-0.4114, -1.2833, 0.2386, 0.3462]) tensor([0.1829, 0.0765, 0.3504, 0.3902]) -Greedy action tensor([0.7793, 0.4491, 0.2913, 0.4716]) tensor([0.3260, 0.2343, 0.2001, 0.2396]) -Greedy action tensor([0.9165, 0.1790, 0.3767, 0.4750]) tensor([0.3698, 0.1769, 0.2155, 0.2378]) -Greedy action tensor([-0.6057, -0.9386, -0.0081, -0.0237]) tensor([0.1878, 0.1346, 0.3414, 0.3361]) -Greedy action tensor([ 0.1961, -1.4506, 0.0025, 0.3274]) tensor([0.3167, 0.0610, 0.2610, 0.3612]) -Greedy action tensor([0.3332, 0.1840, 0.8324, 0.9101]) tensor([0.1891, 0.1629, 0.3115, 0.3366]) -Greedy action tensor([ 0.4640, 0.3326, 1.2570, -0.0746]) tensor([0.2141, 0.1877, 0.4732, 0.1250]) -Greedy action tensor([ 0.2051, -0.3620, 0.4757, -0.0556]) tensor([0.2741, 0.1555, 0.3593, 0.2112]) -Greedy action tensor([-0.1288, -0.5009, -0.3731, 0.0425]) tensor([0.2733, 0.1884, 0.2140, 0.3243]) -Greedy action tensor([1.6881, 0.7811, 0.1465, 1.2517]) tensor([0.4417, 0.1783, 0.0945, 0.2855]) -Greedy action tensor([-0.1900, -0.7831, -0.2236, 0.0519]) tensor([0.2636, 0.1457, 0.2549, 0.3358]) -Greedy action tensor([-0.3766, -0.8867, -0.0898, 0.3279]) tensor([0.2018, 0.1212, 0.2688, 0.4082]) -Greedy action tensor([-0.0457, -0.8749, -0.3694, -0.2993]) tensor([0.3406, 0.1487, 0.2464, 0.2643]) -Greedy action tensor([ 1.4166, -0.0082, 1.3800, 0.9226]) tensor([0.3552, 0.0855, 0.3425, 0.2168]) -Greedy action tensor([-0.4575, -0.8198, -0.4048, -0.3638]) tensor([0.2598, 0.1809, 0.2739, 0.2854]) -Greedy action tensor([-0.4562, -1.2145, -0.2010, 0.6403]) tensor([0.1738, 0.0814, 0.2244, 0.5204]) -Greedy action tensor([ 0.6473, -0.1918, -0.6777, 2.0260]) tensor([0.1764, 0.0762, 0.0469, 0.7004]) -Greedy action tensor([-0.3666, -1.0924, 0.2066, 0.9556]) tensor([0.1427, 0.0690, 0.2531, 0.5352]) -Greedy action tensor([ 0.4709, 0.2996, -0.9329, -0.4074]) tensor([0.3994, 0.3365, 0.0981, 0.1659]) -Greedy action tensor([ 1.1478, 0.4714, -0.5080, 0.8185]) tensor([0.4134, 0.2102, 0.0789, 0.2974]) -Greedy action tensor([-0.4378, -1.3867, -0.5053, 0.2082]) tensor([0.2364, 0.0915, 0.2210, 0.4510]) -Greedy action tensor([ 0.8923, -0.0780, 0.7050, 0.3423]) tensor([0.3591, 0.1361, 0.2977, 0.2072]) -Greedy action tensor([ 0.3937, -2.2390, 0.4616, 0.1870]) tensor([0.3384, 0.0243, 0.3621, 0.2752]) -Greedy action tensor([ 0.9296, -0.3569, 0.5867, 1.6684]) tensor([0.2451, 0.0677, 0.1740, 0.5132]) -Greedy action tensor([-0.9326, -1.3898, -0.5825, 0.0917]) tensor([0.1713, 0.1084, 0.2431, 0.4771]) -Greedy action tensor([ 0.0383, -0.4634, -0.7148, 0.5148]) tensor([0.2712, 0.1642, 0.1277, 0.4368]) -Greedy action tensor([ 0.3357, -0.5656, 1.2522, -0.4805]) tensor([0.2299, 0.0934, 0.5750, 0.1017]) -Greedy action tensor([0.4332, 0.3588, 0.9038, 1.3072]) tensor([0.1688, 0.1566, 0.2702, 0.4044]) -Greedy action tensor([ 0.2499, -0.5964, 0.1065, 0.3367]) tensor([0.2953, 0.1267, 0.2559, 0.3221]) -Greedy action tensor([1.4180, 1.1032, 0.3277, 0.0453]) tensor([0.4311, 0.3147, 0.1449, 0.1093]) -Greedy action tensor([ 1.0128, 0.1134, -0.1325, 0.8561]) tensor([0.3876, 0.1577, 0.1233, 0.3314]) -Greedy action tensor([-1.4740, -1.4563, 0.8628, 0.2822]) tensor([0.0551, 0.0561, 0.5699, 0.3189]) -Greedy action tensor([1.0415, 0.4362, 0.3800, 0.8568]) tensor([0.3456, 0.1887, 0.1784, 0.2873]) -Greedy action tensor([-0.6202, 0.1868, 0.5652, -1.0742]) tensor([0.1399, 0.3135, 0.4577, 0.0888]) -Greedy action tensor([-9.4786e-05, -1.0774e+00, -9.5148e-01, 4.6152e-01]) tensor([0.3018, 0.1028, 0.1166, 0.4789]) -Greedy action tensor([-1.3155, -0.5356, -0.8509, 0.4113]) tensor([0.0962, 0.2098, 0.1531, 0.5409]) -Greedy action tensor([ 0.8755, -0.7197, 0.4477, -0.4468]) tensor([0.4714, 0.0956, 0.3073, 0.1256]) -Greedy action tensor([ 0.5005, -0.8543, 0.7457, -0.1739]) tensor([0.3284, 0.0847, 0.4196, 0.1673]) -Greedy action tensor([-1.0131, -1.0301, -0.4945, 0.4665]) tensor([0.1242, 0.1221, 0.2085, 0.5452]) -Greedy action tensor([ 0.7348, -0.7865, 0.8560, -0.4354]) tensor([0.3763, 0.0822, 0.4248, 0.1168]) -Greedy action tensor([-0.3080, -0.7884, -0.4668, 0.3877]) tensor([0.2234, 0.1382, 0.1906, 0.4479]) -Greedy action tensor([-0.2963, 0.3152, 0.7077, -1.4052]) tensor([0.1694, 0.3123, 0.4624, 0.0559]) -Greedy action tensor([ 0.8221, -0.0225, 1.2998, 0.9960]) tensor([0.2363, 0.1015, 0.3810, 0.2812]) -Greedy action tensor([0.4956, 0.0131, 0.2647, 0.0597]) tensor([0.3270, 0.2019, 0.2596, 0.2115]) -Greedy action tensor([-0.0687, -1.7096, -0.2399, 0.8928]) tensor([0.2150, 0.0417, 0.1811, 0.5622]) -Greedy action tensor([-0.5703, -1.4504, 0.9463, 0.6023]) tensor([0.1087, 0.0451, 0.4952, 0.3510]) -Greedy action tensor([ 0.5542, 0.5417, 0.1807, -0.3043]) tensor([0.3226, 0.3186, 0.2221, 0.1367]) -Greedy action tensor([ 1.3357, -0.8397, 1.6575, 0.7962]) tensor([0.3251, 0.0369, 0.4485, 0.1895]) -Greedy action tensor([-0.6048, -1.1199, -0.1225, 0.2232]) tensor([0.1816, 0.1085, 0.2942, 0.4157]) -Greedy action tensor([-0.4214, -1.9354, -0.7504, -0.3399]) tensor([0.3306, 0.0727, 0.2379, 0.3587]) -Greedy action tensor([ 1.3713, -1.0976, 0.6682, 1.2850]) tensor([0.4005, 0.0339, 0.1982, 0.3674]) -Greedy action tensor([ 0.4087, 0.0398, 0.5946, -0.0067]) tensor([0.2812, 0.1945, 0.3387, 0.1856]) -Greedy action tensor([ 0.6628, 0.9383, -0.5281, -1.0115]) tensor([0.3561, 0.4690, 0.1082, 0.0667]) -Greedy action tensor([ 0.2938, -0.6727, 0.0708, -0.5854]) tensor([0.3853, 0.1466, 0.3083, 0.1599]) -Greedy action tensor([1.3715, 0.6265, 0.2435, 0.2183]) tensor([0.4730, 0.2246, 0.1531, 0.1493]) -Greedy action tensor([ 0.2776, -0.2797, -0.5819, 0.6973]) tensor([0.2843, 0.1628, 0.1204, 0.4325]) -Greedy action tensor([-0.4463, -1.2372, -0.2920, 0.2586]) tensor([0.2153, 0.0976, 0.2513, 0.4358]) -Greedy action tensor([-0.5913, -1.7204, 0.8383, 1.3353]) tensor([0.0809, 0.0261, 0.3378, 0.5552]) -Greedy action tensor([ 0.7723, -0.7345, 0.1453, 0.6596]) tensor([0.3775, 0.0837, 0.2016, 0.3372]) -Greedy action tensor([ 0.0519, -1.2564, 1.0711, 1.7463]) tensor([0.1054, 0.0285, 0.2922, 0.5739]) -Greedy action tensor([ 0.3807, -1.0153, -0.6039, 0.2690]) tensor([0.3975, 0.0984, 0.1485, 0.3555]) -Greedy action tensor([ 0.1801, 1.0057, -0.4470, -1.1856]) tensor([0.2455, 0.5606, 0.1312, 0.0627]) -Greedy action tensor([-0.7930, -0.5194, 0.4020, -1.1166]) tensor([0.1577, 0.2073, 0.5209, 0.1141]) -Greedy action tensor([0.7777, 1.0636, 0.4984, 0.0891]) tensor([0.2786, 0.3708, 0.2107, 0.1399]) -Greedy action tensor([ 1.0317, -1.0642, -0.3405, 1.9536]) tensor([0.2570, 0.0316, 0.0652, 0.6462]) -Greedy action tensor([-0.4602, -2.2236, 0.3075, 0.8393]) tensor([0.1430, 0.0245, 0.3081, 0.5244]) -Greedy action tensor([ 1.3063, 0.5593, -0.5272, 0.7010]) tensor([0.4588, 0.2174, 0.0733, 0.2505]) -Greedy action tensor([-0.1901, 1.2380, 0.7701, -0.6968]) tensor([0.1192, 0.4974, 0.3115, 0.0719]) -Greedy action tensor([ 0.6498, 0.1450, -0.9087, 1.3151]) tensor([0.2660, 0.1606, 0.0560, 0.5174]) -Greedy action tensor([ 0.4248, -1.1128, -0.4710, 0.5052]) tensor([0.3694, 0.0794, 0.1508, 0.4004]) -Greedy action tensor([ 0.0688, -1.5107, 0.2620, 0.9344]) tensor([0.2085, 0.0430, 0.2530, 0.4955]) -Greedy action tensor([ 1.9573, -1.1045, 0.0372, 1.4790]) tensor([0.5515, 0.0258, 0.0808, 0.3418]) -Greedy action tensor([ 1.7713, -0.2729, 1.5925, 0.2485]) tensor([0.4579, 0.0593, 0.3829, 0.0999]) -Greedy action tensor([-1.0940, 0.6028, -0.3790, -0.0225]) tensor([0.0876, 0.4778, 0.1790, 0.2557]) -Greedy action tensor([-0.0493, -1.6026, 0.5779, 0.4069]) tensor([0.2145, 0.0454, 0.4016, 0.3385]) -Greedy action tensor([ 0.1707, -1.3076, 0.0166, 0.9807]) tensor([0.2308, 0.0526, 0.1978, 0.5188]) -Greedy action tensor([-0.2338, -1.6997, 1.0011, 0.3539]) tensor([0.1546, 0.0357, 0.5315, 0.2782]) -Greedy action tensor([-0.4142, -0.3329, 1.0347, -0.0861]) tensor([0.1293, 0.1403, 0.5508, 0.1796]) -Greedy action tensor([ 1.5227, -1.1380, 0.6157, 1.0876]) tensor([0.4715, 0.0330, 0.1904, 0.3052]) -Greedy action tensor([ 0.7016, -0.6394, 0.1433, -0.2536]) tensor([0.4508, 0.1179, 0.2579, 0.1734]) -Greedy action tensor([ 0.7518, -0.7863, 0.0901, -0.6554]) tensor([0.5062, 0.1087, 0.2612, 0.1239]) -Greedy action tensor([ 0.4031, -0.0082, -0.1791, -0.0677]) tensor([0.3514, 0.2329, 0.1963, 0.2194]) -Greedy action tensor([ 0.6227, -0.4003, 0.1461, -0.3542]) tensor([0.4243, 0.1525, 0.2634, 0.1597]) -Greedy action tensor([ 0.3746, -0.0324, -0.1225, -0.0543]) tensor([0.3419, 0.2275, 0.2079, 0.2226]) -Greedy action tensor([ 0.9760, -1.0051, 0.0911, -0.5932]) tensor([0.5685, 0.0784, 0.2347, 0.1184]) -Greedy action tensor([ 0.4082, -0.0065, -0.0923, -0.2338]) tensor([0.3580, 0.2365, 0.2171, 0.1884]) -Greedy action tensor([ 0.8012, -0.7959, -0.1049, -0.2274]) tensor([0.5091, 0.1031, 0.2057, 0.1820]) -Greedy action tensor([ 0.6717, -0.3130, -0.0086, -0.1346]) tensor([0.4298, 0.1606, 0.2177, 0.1919]) -Greedy action tensor([ 0.5278, -0.2739, -0.0367, -0.2826]) tensor([0.4062, 0.1822, 0.2310, 0.1806]) -Greedy action tensor([ 0.6346, 0.0014, 0.0548, -0.1920]) tensor([0.3955, 0.2100, 0.2215, 0.1730]) -Greedy action tensor([ 0.8113, -0.6228, -0.0276, -0.4349]) tensor([0.5107, 0.1217, 0.2207, 0.1469]) -Greedy action tensor([ 0.4458, -0.2181, -0.1211, -0.3006]) tensor([0.3912, 0.2014, 0.2219, 0.1855]) -Greedy action tensor([ 0.7842, -0.7278, -0.0910, -0.2965]) tensor([0.5059, 0.1115, 0.2109, 0.1717]) -Greedy action tensor([ 0.7554, -0.6813, 0.0190, -0.3465]) tensor([0.4881, 0.1160, 0.2337, 0.1622]) -Greedy action tensor([ 1.0855, -0.8449, 0.0733, -0.6450]) tensor([0.5932, 0.0861, 0.2156, 0.1051]) -Greedy action tensor([ 0.8784, -0.3071, -0.1498, -0.1071]) tensor([0.4910, 0.1501, 0.1756, 0.1833]) -Greedy action tensor([ 1.0021, -0.9487, 0.0402, -0.5184]) tensor([0.5737, 0.0816, 0.2193, 0.1254]) -Greedy action tensor([ 0.6464, -0.2369, -0.0706, -0.1571]) tensor([0.4256, 0.1760, 0.2078, 0.1906]) -Greedy action tensor([ 0.8400, -0.4918, 0.0125, -0.6636]) tensor([0.5199, 0.1373, 0.2273, 0.1156]) -Greedy action tensor([ 1.0138, -0.8101, 0.0107, -0.3541]) tensor([0.5609, 0.0905, 0.2057, 0.1428]) -Greedy action tensor([ 0.9924, -0.6181, 0.0703, -0.7242]) tensor([0.5627, 0.1124, 0.2238, 0.1011]) -Greedy action tensor([ 0.5733, -0.2960, -0.0955, -0.2354]) tensor([0.4207, 0.1764, 0.2155, 0.1874]) -Greedy action tensor([ 0.5350, -0.5149, -0.1268, -0.2532]) tensor([0.4309, 0.1508, 0.2223, 0.1959]) -Greedy action tensor([ 0.8939, -0.5147, -0.0582, -0.3349]) tensor([0.5200, 0.1271, 0.2007, 0.1522]) -Greedy action tensor([-0.0731, 0.1479, -0.0267, -0.2456]) tensor([0.2417, 0.3016, 0.2532, 0.2035]) -Greedy action tensor([ 0.5281, -0.2130, -0.0942, -0.1904]) tensor([0.3999, 0.1906, 0.2146, 0.1949]) -Greedy action tensor([ 0.7138, -0.4664, -0.0726, -0.4640]) tensor([0.4829, 0.1484, 0.2200, 0.1487]) -Greedy action tensor([ 0.6379, -0.3651, 0.0327, -0.2810]) tensor([0.4326, 0.1587, 0.2362, 0.1726]) -Greedy action tensor([ 0.5027, -0.1614, -0.0941, -0.1602]) tensor([0.3875, 0.1995, 0.2133, 0.1997]) -Greedy action tensor([ 0.7424, -0.5397, 0.0177, -0.3713]) tensor([0.4784, 0.1327, 0.2318, 0.1571]) -Greedy action tensor([ 0.3783, 0.1620, 0.0674, -0.4172]) tensor([0.3345, 0.2694, 0.2451, 0.1510]) -Greedy action tensor([ 0.8237, -0.5103, -0.1684, -0.3589]) tensor([0.5153, 0.1357, 0.1911, 0.1579]) -Greedy action tensor([ 0.5946, -0.3018, -0.0730, -0.2469]) tensor([0.4252, 0.1735, 0.2181, 0.1833]) -Greedy action tensor([ 0.6888, -0.2255, -0.0859, -0.2683]) tensor([0.4453, 0.1785, 0.2052, 0.1710]) -Greedy action tensor([ 0.8812, -0.4576, 0.0339, -0.2350]) tensor([0.4955, 0.1299, 0.2124, 0.1623]) -Greedy action tensor([ 1.1914, -0.5930, -0.0172, -0.8492]) tensor([0.6264, 0.1052, 0.1871, 0.0814]) -Greedy action tensor([ 0.7939, -0.3468, -0.0173, -0.7150]) tensor([0.5038, 0.1610, 0.2238, 0.1114]) -Greedy action tensor([ 0.5073, -0.3794, -0.1436, -0.6233]) tensor([0.4432, 0.1826, 0.2312, 0.1431]) -Greedy action tensor([ 0.5617, -0.1707, -0.0315, -0.3215]) tensor([0.4087, 0.1965, 0.2258, 0.1690]) -Greedy action tensor([ 0.3880, -0.0307, 0.0025, -0.0343]) tensor([0.3340, 0.2198, 0.2272, 0.2190]) -Greedy action tensor([ 0.9221, -0.6784, 0.0606, -0.4294]) tensor([0.5310, 0.1072, 0.2244, 0.1375]) -Greedy action tensor([ 0.8212, -0.5832, 0.0585, -0.4088]) tensor([0.4989, 0.1225, 0.2327, 0.1458]) -Greedy action tensor([ 0.5306, -0.1915, 0.1020, -0.3529]) tensor([0.3921, 0.1904, 0.2554, 0.1621]) -Greedy action tensor([ 0.9771, -0.1174, 0.1016, -0.7004]) tensor([0.5160, 0.1727, 0.2150, 0.0964]) -Greedy action tensor([ 0.7896, -0.4430, -0.0420, -0.2993]) tensor([0.4846, 0.1413, 0.2110, 0.1631]) -Greedy action tensor([ 0.5814, -0.2247, 0.0685, -0.2079]) tensor([0.4001, 0.1787, 0.2396, 0.1817]) -Greedy action tensor([ 0.4287, -0.0977, -0.1567, -0.2767]) tensor([0.3786, 0.2236, 0.2108, 0.1870]) -Greedy action tensor([ 1.2318, -0.8099, 0.0520, -0.5008]) tensor([0.6196, 0.0804, 0.1904, 0.1096]) -Greedy action tensor([ 0.9024, -0.6873, -0.1891, -0.4569]) tensor([0.5566, 0.1135, 0.1869, 0.1430]) -Greedy action tensor([ 0.7234, -0.1073, -0.1621, -0.5210]) tensor([0.4681, 0.2040, 0.1931, 0.1349]) -Greedy action tensor([ 0.5348, -0.0466, -0.0043, -0.1223]) tensor([0.3758, 0.2101, 0.2192, 0.1948]) -Greedy action tensor([ 0.9461, -0.9606, 0.0228, -0.4643]) tensor([0.5587, 0.0830, 0.2219, 0.1364]) -Greedy action tensor([ 0.7942, -0.4580, -0.1861, -0.3617]) tensor([0.5061, 0.1447, 0.1899, 0.1593]) -Greedy action tensor([ 0.5678, -0.4501, -0.0313, -0.2067]) tensor([0.4217, 0.1524, 0.2316, 0.1944]) -Greedy action tensor([ 0.0921, 0.0998, -0.0237, -0.3542]) tensor([0.2826, 0.2848, 0.2517, 0.1809]) -Greedy action tensor([ 0.6755, -0.0607, 0.0307, -0.1352]) tensor([0.4085, 0.1956, 0.2143, 0.1816]) -Greedy action tensor([ 0.7587, 0.0518, 0.1674, -0.2104]) tensor([0.4122, 0.2033, 0.2282, 0.1564]) -Greedy action tensor([ 0.7175, -0.2109, -0.0673, -0.5379]) tensor([0.4681, 0.1850, 0.2135, 0.1334]) -Greedy action tensor([ 0.7667, -0.5975, -0.2050, -0.5082]) tensor([0.5226, 0.1336, 0.1978, 0.1460]) -Greedy action tensor([ 0.6303, -0.4054, -0.1812, -0.0745]) tensor([0.4360, 0.1548, 0.1937, 0.2155]) -Greedy action tensor([ 0.5672, -0.1213, -0.0464, -0.1450]) tensor([0.3946, 0.1982, 0.2136, 0.1936]) -Greedy action tensor([ 0.9996, -0.6566, 0.0942, -0.6022]) tensor([0.5565, 0.1062, 0.2251, 0.1122]) -Greedy action tensor([ 0.6366, -0.1691, -0.0600, -0.1723]) tensor([0.4184, 0.1869, 0.2084, 0.1863]) -Greedy action tensor([ 0.8784, -0.4149, -0.1036, -0.3727]) tensor([0.5168, 0.1418, 0.1936, 0.1479]) -Greedy action tensor([ 0.7688, -0.3550, -0.1399, -0.3444]) tensor([0.4862, 0.1580, 0.1960, 0.1597]) -Greedy action tensor([ 0.5930, -0.2412, -0.0828, -0.4875]) tensor([0.4381, 0.1902, 0.2229, 0.1487]) -Greedy action tensor([ 0.3759, -0.6239, -0.1617, -0.2274]) tensor([0.4002, 0.1472, 0.2337, 0.2189]) -Greedy action tensor([ 0.2328, 0.3007, 0.2398, -0.1159]) tensor([0.2643, 0.2829, 0.2662, 0.1865]) -Greedy action tensor([ 0.8931, -0.5629, -0.1004, -0.5253]) tensor([0.5419, 0.1263, 0.2006, 0.1312]) -Greedy action tensor([ 0.8016, -0.4818, -0.0385, -0.3918]) tensor([0.4970, 0.1377, 0.2146, 0.1507]) -Greedy action tensor([ 0.7338, -0.2333, 0.0115, -0.4101]) tensor([0.4578, 0.1740, 0.2223, 0.1458]) -Greedy action tensor([ 0.6328, -0.4018, 0.0067, -0.2642]) tensor([0.4352, 0.1547, 0.2327, 0.1775]) -Greedy action tensor([ 0.4585, 0.0033, 0.0080, -0.2217]) tensor([0.3600, 0.2283, 0.2294, 0.1823]) -Greedy action tensor([ 0.6277, -0.2652, -0.0473, -0.3678]) tensor([0.4370, 0.1789, 0.2225, 0.1615]) -Greedy action tensor([ 0.5595, -0.3643, -0.1148, -0.4413]) tensor([0.4397, 0.1746, 0.2240, 0.1616]) -Greedy action tensor([ 0.6539, -0.0161, 0.0505, -0.3084]) tensor([0.4097, 0.2097, 0.2241, 0.1565]) -Greedy action tensor([ 1.0922, -0.6990, -0.1281, -0.4223]) tensor([0.5946, 0.0992, 0.1755, 0.1308]) -Greedy action tensor([ 0.4064, -0.2297, -0.1235, -0.3808]) tensor([0.3886, 0.2057, 0.2288, 0.1769]) -Greedy action tensor([ 0.5864, -0.4977, -0.0616, -0.3373]) tensor([0.4428, 0.1498, 0.2316, 0.1758]) -Greedy action tensor([ 0.6287, -0.4332, -0.0201, -0.5527]) tensor([0.4597, 0.1590, 0.2403, 0.1411]) -Greedy action tensor([-1.8830, -0.4402, 0.6362, -0.1399]) tensor([0.0428, 0.1811, 0.5315, 0.2446]) -Greedy action tensor([-1.8660, -0.4408, 0.6136, -0.1392]) tensor([0.0440, 0.1831, 0.5254, 0.2475]) -Greedy action tensor([-1.0982, 0.7174, 0.1551, 0.0917]) tensor([0.0718, 0.4410, 0.2513, 0.2359]) -Greedy action tensor([-1.9018, -0.4330, 0.6444, -0.1495]) tensor([0.0419, 0.1820, 0.5345, 0.2416]) -Greedy action tensor([-1.8325, -0.4529, 0.6115, -0.1144]) tensor([0.0453, 0.1801, 0.5220, 0.2526]) -Greedy action tensor([-2.0055, -0.8968, 0.9037, 0.0432]) tensor([0.0332, 0.1006, 0.6088, 0.2575]) -Greedy action tensor([-1.3525, -0.3720, 0.3704, 0.1159]) tensor([0.0735, 0.1959, 0.4116, 0.3191]) -Greedy action tensor([-1.8939, -0.4148, 0.6781, -0.1013]) tensor([0.0408, 0.1792, 0.5347, 0.2453]) -Greedy action tensor([-1.8560, -0.4248, 0.6224, -0.1254]) tensor([0.0440, 0.1839, 0.5240, 0.2481]) -Greedy action tensor([-0.1779, 0.2510, 0.8142, 1.5141]) tensor([0.0938, 0.1440, 0.2529, 0.5093]) -Greedy action tensor([-1.9137, -0.4396, 0.6528, -0.1648]) tensor([0.0414, 0.1809, 0.5395, 0.2382]) -Greedy action tensor([-0.5173, -0.0277, 0.7243, 1.5513]) tensor([0.0714, 0.1165, 0.2471, 0.5650]) -Greedy action tensor([-1.6929, -0.4807, 0.5929, 0.1185]) tensor([0.0492, 0.1654, 0.4841, 0.3012]) -Greedy action tensor([-1.8500, -0.4234, 0.6414, -0.2091]) tensor([0.0446, 0.1859, 0.5392, 0.2303]) -Greedy action tensor([-1.8230, -0.4648, 0.6022, -0.1030]) tensor([0.0459, 0.1786, 0.5191, 0.2564]) -Greedy action tensor([-1.6453, -0.1269, 0.4907, -0.0942]) tensor([0.0533, 0.2435, 0.4516, 0.2516]) -Greedy action tensor([-1.8355, -0.3580, 0.6044, -0.1120]) tensor([0.0445, 0.1951, 0.5108, 0.2495]) -Greedy action tensor([-1.8847, -0.4493, 0.6899, -0.1066]) tensor([0.0412, 0.1733, 0.5414, 0.2441]) -Greedy action tensor([-1.7894, -0.4914, 0.6066, -0.0377]) tensor([0.0467, 0.1711, 0.5129, 0.2693]) -Greedy action tensor([-1.8766, -0.4423, 0.6291, -0.1476]) tensor([0.0433, 0.1818, 0.5308, 0.2441]) -Greedy action tensor([-1.9043, -0.4300, 0.6437, -0.1593]) tensor([0.0419, 0.1830, 0.5354, 0.2398]) -Greedy action tensor([-0.7142, -0.5958, 0.3263, 0.2609]) tensor([0.1314, 0.1480, 0.3721, 0.3485]) -Greedy action tensor([-0.7033, -0.1997, 0.2040, -0.0675]) tensor([0.1424, 0.2357, 0.3529, 0.2690]) -Greedy action tensor([-1.9137, -0.4155, 0.6456, -0.1603]) tensor([0.0414, 0.1851, 0.5347, 0.2388]) -Greedy action tensor([-1.6976, -0.4918, 0.9648, 0.5036]) tensor([0.0361, 0.1205, 0.5173, 0.3261]) -Greedy action tensor([-1.1289, -0.4959, 0.4976, 0.9349]) tensor([0.0631, 0.1188, 0.3210, 0.4971]) -Greedy action tensor([-1.9281, -0.4468, 0.6850, 0.0556]) tensor([0.0380, 0.1672, 0.5185, 0.2763]) -Greedy action tensor([-1.8443, -0.3375, 0.5968, -0.1217]) tensor([0.0443, 0.1997, 0.5083, 0.2478]) -Greedy action tensor([-1.3570, -0.3174, 0.4665, 0.4719]) tensor([0.0615, 0.1741, 0.3812, 0.3832]) -Greedy action tensor([-0.9492, -0.5828, 0.2139, 0.2865]) tensor([0.1101, 0.1588, 0.3523, 0.3788]) -Greedy action tensor([-1.9349, -0.4446, 0.6636, -0.1747]) tensor([0.0405, 0.1797, 0.5444, 0.2354]) -Greedy action tensor([-0.7400, -0.6254, 0.3791, -0.0862]) tensor([0.1407, 0.1578, 0.4309, 0.2706]) -Greedy action tensor([-1.5276, -0.5697, 0.4338, 0.0921]) tensor([0.0634, 0.1653, 0.4509, 0.3204]) -Greedy action tensor([-1.5632, -0.5542, 0.5172, 0.0982]) tensor([0.0588, 0.1612, 0.4706, 0.3095]) -Greedy action tensor([-1.9183, -0.4417, 0.6508, -0.1679]) tensor([0.0413, 0.1810, 0.5397, 0.2380]) -Greedy action tensor([-1.9189, -0.4395, 0.6554, -0.1639]) tensor([0.0412, 0.1807, 0.5401, 0.2380]) -Greedy action tensor([-1.1164, -0.2243, 0.9999, 1.1988]) tensor([0.0457, 0.1116, 0.3796, 0.4631]) -Greedy action tensor([-0.8978, -0.5690, 0.3409, 1.2047]) tensor([0.0713, 0.0990, 0.2460, 0.5836]) -Greedy action tensor([-1.9854, -0.7562, 0.5797, -0.1527]) tensor([0.0422, 0.1444, 0.5493, 0.2641]) -Greedy action tensor([-1.8959, -0.4328, 0.6422, -0.1536]) tensor([0.0422, 0.1824, 0.5343, 0.2411]) -Greedy action tensor([-1.0878, -0.3076, 0.4696, 0.6229]) tensor([0.0743, 0.1621, 0.3526, 0.4110]) -Greedy action tensor([-1.9015, -0.4489, 0.6580, -0.1348]) tensor([0.0416, 0.1777, 0.5375, 0.2433]) -Greedy action tensor([-1.7650, -0.4691, 0.6867, 0.1290]) tensor([0.0437, 0.1595, 0.5067, 0.2901]) -Greedy action tensor([-1.5552, -0.5224, 0.4917, -0.1050]) tensor([0.0632, 0.1776, 0.4896, 0.2696]) -Greedy action tensor([-1.9160, -0.2815, 0.6285, -0.1747]) tensor([0.0407, 0.2087, 0.5184, 0.2322]) -Greedy action tensor([-1.6459, -0.4876, 0.6421, 0.2781]) tensor([0.0479, 0.1525, 0.4718, 0.3279]) -Greedy action tensor([-1.0590, -0.6121, 0.3454, -0.0400]) tensor([0.1063, 0.1662, 0.4330, 0.2945]) -Greedy action tensor([-1.2591, -0.5725, 0.2989, 0.2414]) tensor([0.0818, 0.1626, 0.3887, 0.3669]) -Greedy action tensor([-1.9212, -0.4661, 0.6656, -0.1585]) tensor([0.0410, 0.1756, 0.5446, 0.2388]) -Greedy action tensor([-1.9287, -0.4360, 0.6587, -0.1718]) tensor([0.0408, 0.1813, 0.5418, 0.2361]) -Greedy action tensor([-1.8808, -0.4373, 0.6300, -0.1457]) tensor([0.0431, 0.1824, 0.5303, 0.2442]) -Greedy action tensor([-1.9264, -0.3525, 0.6422, -0.1626]) tensor([0.0405, 0.1953, 0.5281, 0.2361]) -Greedy action tensor([-0.4316, -0.3298, 0.2610, 0.7034]) tensor([0.1386, 0.1534, 0.2770, 0.4311]) -Greedy action tensor([-1.8556, -0.4155, 0.6206, -0.1362]) tensor([0.0441, 0.1860, 0.5241, 0.2459]) -Greedy action tensor([-1.2184, -0.5479, 0.3944, 0.5288]) tensor([0.0729, 0.1426, 0.3659, 0.4185]) -Greedy action tensor([-1.6660, -0.5089, 0.5423, -0.1070]) tensor([0.0554, 0.1764, 0.5046, 0.2636]) -Greedy action tensor([-1.5217, -0.4010, 0.5480, 0.3142]) tensor([0.0548, 0.1680, 0.4339, 0.3434]) -Greedy action tensor([-1.7382, -0.4305, 0.5900, -0.0137]) tensor([0.0486, 0.1798, 0.4988, 0.2728]) -Greedy action tensor([-1.8980, -0.4542, 0.6752, -0.1223]) tensor([0.0412, 0.1747, 0.5406, 0.2435]) -Greedy action tensor([-1.9192, -0.4622, 0.6691, -0.1443]) tensor([0.0408, 0.1752, 0.5432, 0.2408]) -Greedy action tensor([-1.0645, -0.5935, 0.2756, 0.4914]) tensor([0.0896, 0.1435, 0.3422, 0.4247]) -Greedy action tensor([-0.6932, -0.2410, 0.2939, -0.1834]) tensor([0.1445, 0.2271, 0.3878, 0.2406]) -Greedy action tensor([-1.2761, -0.4294, 0.4090, -0.0967]) tensor([0.0835, 0.1947, 0.4503, 0.2716]) -Greedy action tensor([-1.8869, -0.4495, 0.6372, -0.1483]) tensor([0.0428, 0.1801, 0.5338, 0.2434]) -Greedy action tensor([-1.4285, -0.1471, 0.5265, 0.2233]) tensor([0.0592, 0.2133, 0.4184, 0.3090]) -Greedy action tensor([-1.8600, -0.3989, 0.6331, -0.1106]) tensor([0.0432, 0.1861, 0.5224, 0.2483]) -Greedy action tensor([-1.4041, -0.3378, 0.6785, 0.7174]) tensor([0.0493, 0.1433, 0.3958, 0.4116]) -Greedy action tensor([-0.3901, -0.4369, 0.2075, 0.3309]) tensor([0.1716, 0.1637, 0.3119, 0.3528]) -Greedy action tensor([-1.1679, -0.4444, 0.3432, -0.0509]) tensor([0.0939, 0.1936, 0.4256, 0.2869]) -Greedy action tensor([-1.8559, -0.4452, 0.6191, -0.1310]) tensor([0.0443, 0.1814, 0.5259, 0.2484]) -Greedy action tensor([-1.5684, -0.1991, 0.5061, 0.1293]) tensor([0.0545, 0.2143, 0.4337, 0.2976]) -Greedy action tensor([-1.2784, -0.1414, 0.2633, 0.1765]) tensor([0.0765, 0.2384, 0.3574, 0.3277]) -Greedy action tensor([-1.8112, -0.4145, 0.6804, -0.0499]) tensor([0.0436, 0.1762, 0.5266, 0.2537]) -Greedy action tensor([-1.8195, -0.3085, 0.5722, -0.0903]) tensor([0.0453, 0.2050, 0.4947, 0.2550]) -Greedy action tensor([-1.6221, -0.4694, 0.6682, 0.3011]) tensor([0.0479, 0.1516, 0.4729, 0.3276]) -Greedy action tensor([-1.5050, -0.4719, 0.5481, 0.2842]) tensor([0.0569, 0.1598, 0.4431, 0.3403]) -Greedy action tensor([-1.8009, -0.4610, 0.5907, -0.1194]) tensor([0.0473, 0.1808, 0.5175, 0.2544]) -Greedy action tensor([-1.5889, -0.5365, 0.5126, 0.1582]) tensor([0.0562, 0.1611, 0.4599, 0.3227]) -Greedy action tensor([-1.0463, -0.5391, 0.2772, 0.4504]) tensor([0.0919, 0.1526, 0.3452, 0.4104]) -Greedy action tensor([-1.5338, -0.6246, 1.0844, 0.7969]) tensor([0.0364, 0.0903, 0.4990, 0.3743]) -Greedy action tensor([-1.9263, -0.4208, 0.6570, -0.1632]) tensor([0.0407, 0.1834, 0.5387, 0.2372]) -Greedy action tensor([ 1.4015, -0.4752, -0.2449, 0.5893]) tensor([0.5588, 0.0855, 0.1077, 0.2480]) -Greedy action tensor([ 1.4669, -0.3830, -0.2664, -0.0411]) tensor([0.6430, 0.1011, 0.1136, 0.1423]) -Greedy action tensor([ 1.2859, -0.3181, -0.5433, 0.3212]) tensor([0.5738, 0.1154, 0.0921, 0.2187]) -Greedy action tensor([ 1.0765, -0.0174, -0.5791, -0.4181]) tensor([0.5714, 0.1913, 0.1091, 0.1282]) -Greedy action tensor([ 1.4584, -0.3166, 0.0019, -0.0734]) tensor([0.6178, 0.1047, 0.1440, 0.1335]) -Greedy action tensor([ 1.3124, -0.6608, 0.0278, 0.2333]) tensor([0.5696, 0.0792, 0.1576, 0.1936]) -Greedy action tensor([ 1.2816, -0.2815, -0.2990, 0.3380]) tensor([0.5542, 0.1161, 0.1141, 0.2157]) -Greedy action tensor([ 1.5404, -1.0536, -0.1060, 0.5405]) tensor([0.6115, 0.0457, 0.1179, 0.2250]) -Greedy action tensor([-0.1431, -0.0563, -0.3605, 0.0909]) tensor([0.2404, 0.2622, 0.1935, 0.3038]) -Greedy action tensor([ 1.8818, -0.8200, -0.2512, 0.6354]) tensor([0.6788, 0.0455, 0.0804, 0.1952]) -Greedy action tensor([ 1.2380, -0.6556, -0.4440, 0.6808]) tensor([0.5238, 0.0788, 0.0974, 0.3000]) -Greedy action tensor([ 1.1256e+00, -4.3640e-01, -2.9400e-01, 5.9874e-04]) tensor([0.5630, 0.1181, 0.1361, 0.1828]) -Greedy action tensor([ 1.3360, -0.6471, -0.0935, 0.0779]) tensor([0.6019, 0.0829, 0.1441, 0.1711]) -Greedy action tensor([ 1.1772, -0.3466, -0.4760, 0.0462]) tensor([0.5774, 0.1258, 0.1105, 0.1863]) -Greedy action tensor([ 1.3173, -0.1889, -0.2865, 0.3912]) tensor([0.5498, 0.1219, 0.1106, 0.2178]) -Greedy action tensor([ 1.3150, -0.6281, -0.3134, 0.4913]) tensor([0.5623, 0.0806, 0.1104, 0.2468]) -Greedy action tensor([ 2.4236, -0.2162, -0.1332, 0.4452]) tensor([0.7769, 0.0555, 0.0603, 0.1074]) -Greedy action tensor([ 1.2728, -0.4275, -0.3209, 0.4259]) tensor([0.5511, 0.1006, 0.1120, 0.2363]) -Greedy action tensor([ 2.1355, -0.5077, -0.0740, 0.5066]) tensor([0.7262, 0.0517, 0.0797, 0.1424]) -Greedy action tensor([ 0.8557, -0.4667, -0.1140, 0.4763]) tensor([0.4292, 0.1144, 0.1627, 0.2937]) -Greedy action tensor([ 1.0233, -0.5375, 0.1441, -0.1055]) tensor([0.5132, 0.1078, 0.2131, 0.1660]) -Greedy action tensor([ 1.8090, -0.6908, -0.2577, 0.6406]) tensor([0.6581, 0.0540, 0.0833, 0.2046]) -Greedy action tensor([ 1.7586, -0.7174, -0.1275, 0.6242]) tensor([0.6421, 0.0540, 0.0974, 0.2065]) -Greedy action tensor([ 1.4732, -0.4770, -0.2726, 0.1842]) tensor([0.6280, 0.0893, 0.1096, 0.1731]) -Greedy action tensor([ 0.9127, -0.2375, -0.2183, 0.3890]) tensor([0.4481, 0.1419, 0.1446, 0.2654]) -Greedy action tensor([ 1.4260, -0.3577, -0.2587, 0.2597]) tensor([0.6006, 0.1009, 0.1114, 0.1871]) -Greedy action tensor([ 1.6998, -0.6889, -0.4087, 0.7617]) tensor([0.6232, 0.0572, 0.0757, 0.2439]) -Greedy action tensor([ 1.5338, -0.2461, -0.3897, 0.0963]) tensor([0.6442, 0.1087, 0.0941, 0.1530]) -Greedy action tensor([ 1.9562, -0.9026, -0.2391, 0.8716]) tensor([0.6637, 0.0381, 0.0739, 0.2244]) -Greedy action tensor([ 1.2966, -0.3272, -0.2719, 0.4454]) tensor([0.5457, 0.1076, 0.1137, 0.2330]) -Greedy action tensor([ 1.2834, -0.3036, -0.1982, 0.3151]) tensor([0.5520, 0.1129, 0.1255, 0.2096]) -Greedy action tensor([ 1.6177, -0.7710, -0.3386, 0.1868]) tensor([0.6792, 0.0623, 0.0960, 0.1624]) -Greedy action tensor([ 1.3971, -0.5768, 0.0639, 0.1888]) tensor([0.5878, 0.0817, 0.1550, 0.1756]) -Greedy action tensor([ 1.2032, -0.5808, -0.1523, 0.2060]) tensor([0.5572, 0.0936, 0.1437, 0.2056]) -Greedy action tensor([ 1.5721, -0.6110, -0.2547, 0.1701]) tensor([0.6580, 0.0742, 0.1059, 0.1619]) -Greedy action tensor([ 0.9261, -0.1273, -0.1068, 0.1051]) tensor([0.4663, 0.1626, 0.1660, 0.2051]) -Greedy action tensor([ 1.1424, -0.2664, -0.2613, 0.3445]) tensor([0.5154, 0.1260, 0.1266, 0.2321]) -Greedy action tensor([ 1.0841, -0.1021, -0.2911, 0.4928]) tensor([0.4735, 0.1446, 0.1197, 0.2621]) -Greedy action tensor([ 2.0022, -0.5823, -0.4531, 0.6892]) tensor([0.6992, 0.0527, 0.0600, 0.1881]) -Greedy action tensor([ 1.3384, -0.3474, -0.1209, 0.2609]) tensor([0.5688, 0.1054, 0.1322, 0.1936]) -Greedy action tensor([ 1.6031, -0.2262, -0.3047, 0.2604]) tensor([0.6369, 0.1022, 0.0945, 0.1663]) -Greedy action tensor([ 1.0887, -0.2192, -0.1298, -0.0567]) tensor([0.5307, 0.1435, 0.1569, 0.1688]) -Greedy action tensor([ 2.1227, -0.6686, -0.1133, 1.1314]) tensor([0.6496, 0.0398, 0.0694, 0.2411]) -Greedy action tensor([ 1.6282, -0.2078, -0.5743, 0.3286]) tensor([0.6482, 0.1034, 0.0717, 0.1767]) -Greedy action tensor([ 1.1961, -0.4388, -0.1337, 0.0487]) tensor([0.5628, 0.1097, 0.1489, 0.1786]) -Greedy action tensor([ 1.3159, -0.2555, -0.2518, 0.2596]) tensor([0.5669, 0.1178, 0.1182, 0.1971]) -Greedy action tensor([ 1.7089, -1.0137, 0.0227, 0.2047]) tensor([0.6788, 0.0446, 0.1257, 0.1508]) -Greedy action tensor([ 1.2200, -0.3179, -0.2065, 0.1554]) tensor([0.5556, 0.1194, 0.1334, 0.1916]) -Greedy action tensor([ 1.6966, -0.7225, -0.5474, 0.8485]) tensor([0.6160, 0.0548, 0.0653, 0.2638]) -Greedy action tensor([ 1.1449, 0.2126, -0.4490, 0.0516]) tensor([0.5176, 0.2038, 0.1052, 0.1735]) -Greedy action tensor([ 1.2518, -0.2512, -0.5257, 0.2740]) tensor([0.5657, 0.1259, 0.0956, 0.2128]) -Greedy action tensor([ 1.5595, -0.3360, -0.8053, 0.1204]) tensor([0.6751, 0.1014, 0.0634, 0.1601]) -Greedy action tensor([ 1.5323, -0.7466, -0.4799, 0.7990]) tensor([0.5826, 0.0597, 0.0779, 0.2798]) -Greedy action tensor([ 1.3852, -0.5103, 0.0385, 0.0068]) tensor([0.6016, 0.0904, 0.1565, 0.1516]) -Greedy action tensor([1.2342, 0.0215, 0.0856, 0.5113]) tensor([0.4762, 0.1416, 0.1510, 0.2311]) -Greedy action tensor([ 1.2625, -0.4187, -0.4565, -0.0158]) tensor([0.6083, 0.1132, 0.1090, 0.1694]) -Greedy action tensor([ 1.5201, -0.9623, -0.1649, 0.1744]) tensor([0.6539, 0.0546, 0.1213, 0.1702]) -Greedy action tensor([ 0.8053, -0.6206, 0.0104, 0.0073]) tensor([0.4668, 0.1122, 0.2108, 0.2102]) -Greedy action tensor([ 1.1615, -0.1621, -0.5446, 0.2227]) tensor([0.5438, 0.1447, 0.0987, 0.2127]) -Greedy action tensor([ 1.5811, -0.6661, -0.1970, 0.1397]) tensor([0.6617, 0.0699, 0.1118, 0.1565]) -Greedy action tensor([ 0.9579, -0.2694, -0.2514, 0.3081]) tensor([0.4731, 0.1387, 0.1412, 0.2470]) -Greedy action tensor([ 1.4916, -0.5523, -0.4190, 0.4048]) tensor([0.6193, 0.0802, 0.0916, 0.2089]) -Greedy action tensor([ 0.9596, -0.5433, 0.0481, 0.1324]) tensor([0.4850, 0.1079, 0.1950, 0.2121]) -Greedy action tensor([ 0.8442, -0.4007, -0.1090, 0.1611]) tensor([0.4590, 0.1322, 0.1770, 0.2318]) -Greedy action tensor([ 1.0889, -0.3020, -0.1802, -0.1884]) tensor([0.5529, 0.1376, 0.1554, 0.1541]) -Greedy action tensor([ 1.1641, -0.5799, -0.1389, 0.4034]) tensor([0.5225, 0.0913, 0.1420, 0.2442]) -Greedy action tensor([ 1.5138, -0.4242, -0.3607, 0.2743]) tensor([0.6301, 0.0907, 0.0967, 0.1824]) -Greedy action tensor([ 0.7770, -0.2977, 0.0293, 0.0423]) tensor([0.4358, 0.1488, 0.2063, 0.2090]) -Greedy action tensor([ 1.3347, -0.1566, -0.5053, 0.5042]) tensor([0.5495, 0.1237, 0.0873, 0.2395]) -Greedy action tensor([ 1.3547, -0.2833, -0.4109, 0.2506]) tensor([0.5893, 0.1145, 0.1008, 0.1954]) -Greedy action tensor([ 1.9700, -0.9883, -0.2190, 0.9693]) tensor([0.6529, 0.0339, 0.0731, 0.2400]) -Greedy action tensor([ 1.6035, -0.0321, -0.5446, 0.4143]) tensor([0.6188, 0.1206, 0.0722, 0.1884]) -Greedy action tensor([ 1.3572, -0.3034, -0.3381, 0.5057]) tensor([0.5555, 0.1055, 0.1019, 0.2371]) -Greedy action tensor([ 1.5340, 0.0087, -0.5077, 0.2300]) tensor([0.6177, 0.1344, 0.0802, 0.1677]) -Greedy action tensor([ 1.1668, -0.3400, -0.0383, 0.0076]) tensor([0.5449, 0.1208, 0.1633, 0.1710]) -Greedy action tensor([ 1.4531, -0.3097, 0.0727, 0.5767]) tensor([0.5437, 0.0933, 0.1367, 0.2263]) -Greedy action tensor([ 1.8848, -0.3722, -0.4333, 0.5338]) tensor([0.6839, 0.0716, 0.0673, 0.1771]) -Greedy action tensor([ 1.5124, 0.0747, -0.2810, -0.0097]) tensor([0.6165, 0.1464, 0.1026, 0.1345]) -Greedy action tensor([ 1.1681, -0.2475, -0.4939, 0.2301]) tensor([0.5483, 0.1331, 0.1040, 0.2146]) -Greedy action tensor([ 1.3826, -0.7264, -0.1439, 0.2533]) tensor([0.6017, 0.0730, 0.1307, 0.1945]) -Greedy action tensor([ 1.0980, -0.4110, -0.1498, -0.3037]) tensor([0.5700, 0.1260, 0.1637, 0.1403]) -Greedy action tensor([-0.6254, -0.7865, -0.2270, 0.5966]) tensor([0.1485, 0.1264, 0.2212, 0.5040]) -Greedy action tensor([-0.0357, -1.1320, -0.4977, -1.3319]) tensor([0.4469, 0.1493, 0.2816, 0.1223]) -Greedy action tensor([0.3241, 0.5094, 0.1659, 0.4257]) tensor([0.2401, 0.2890, 0.2050, 0.2658]) -Greedy action tensor([-0.2004, -0.4781, -0.6650, -0.2312]) tensor([0.2980, 0.2257, 0.1873, 0.2890]) -Greedy action tensor([ 1.2146, -1.0471, -0.4429, 1.4245]) tensor([0.3955, 0.0412, 0.0754, 0.4879]) -Greedy action tensor([ 1.2503, -1.9000, 0.8385, -0.2958]) tensor([0.5213, 0.0223, 0.3453, 0.1111]) -Greedy action tensor([-0.5991, -0.2181, 1.1444, -1.3802]) tensor([0.1158, 0.1694, 0.6618, 0.0530]) -Greedy action tensor([-0.0223, -1.6493, -0.3933, 0.0229]) tensor([0.3410, 0.0670, 0.2353, 0.3567]) -Greedy action tensor([ 1.7362, -0.0631, 1.4400, 0.5565]) tensor([0.4512, 0.0746, 0.3355, 0.1387]) -Greedy action tensor([ 0.4525, 0.0857, 0.4518, -0.3431]) tensor([0.3181, 0.2204, 0.3179, 0.1436]) -Greedy action tensor([-1.0594, -0.8013, 0.5893, 0.9355]) tensor([0.0674, 0.0872, 0.3503, 0.4952]) -Greedy action tensor([ 0.2335, 0.0924, 0.3358, -0.2508]) tensor([0.2784, 0.2418, 0.3084, 0.1715]) -Greedy action tensor([ 0.6685, 0.0643, -0.5542, 0.5388]) tensor([0.3678, 0.2010, 0.1083, 0.3230]) -Greedy action tensor([-0.7617, -0.4074, 0.0635, 0.6777]) tensor([0.1120, 0.1597, 0.2557, 0.4726]) -Greedy action tensor([-0.5310, -0.9156, 0.5675, -0.9528]) tensor([0.1874, 0.1276, 0.5621, 0.1229]) -Greedy action tensor([ 0.6218, 0.1956, -0.9602, 0.9121]) tensor([0.3130, 0.2044, 0.0643, 0.4183]) -Greedy action tensor([-0.3631, -1.5551, 0.8990, -0.4913]) tensor([0.1750, 0.0531, 0.6180, 0.1539]) -Greedy action tensor([ 1.2132, -0.5029, 0.7889, 1.0658]) tensor([0.3708, 0.0667, 0.2426, 0.3200]) -Greedy action tensor([-0.8308, 0.6690, 0.7132, -0.6315]) tensor([0.0878, 0.3936, 0.4114, 0.1072]) -Greedy action tensor([-0.1426, -0.5565, 0.4350, 0.3788]) tensor([0.1950, 0.1289, 0.3475, 0.3285]) -Greedy action tensor([-0.2530, -1.0637, 1.3747, -0.0767]) tensor([0.1294, 0.0575, 0.6588, 0.1543]) -Greedy action tensor([ 0.0013, 0.0063, -0.3541, 0.6657]) tensor([0.2151, 0.2162, 0.1508, 0.4180]) -Greedy action tensor([ 0.7221, -0.6930, 0.7552, 0.9677]) tensor([0.2813, 0.0683, 0.2908, 0.3596]) -Greedy action tensor([ 0.5202, 1.2821, -0.1368, 0.5465]) tensor([0.2133, 0.4571, 0.1106, 0.2190]) -Greedy action tensor([-0.1494, -1.5306, -0.1300, 0.8931]) tensor([0.1958, 0.0492, 0.1996, 0.5554]) -Greedy action tensor([ 0.7718, -0.9836, 0.7672, -0.5022]) tensor([0.4085, 0.0706, 0.4066, 0.1143]) -Greedy action tensor([1.4494, 0.3787, 0.9865, 0.9540]) tensor([0.3874, 0.1328, 0.2438, 0.2360]) -Greedy action tensor([ 1.1534, -0.4934, 0.0093, 0.9865]) tensor([0.4242, 0.0817, 0.1351, 0.3590]) -Greedy action tensor([-1.1690, -0.1494, 0.5155, -0.8310]) tensor([0.0947, 0.2624, 0.5102, 0.1327]) -Greedy action tensor([-0.1097, -0.6797, 0.9281, 0.3601]) tensor([0.1670, 0.0944, 0.4714, 0.2671]) -Greedy action tensor([-0.2826, -0.1265, 0.2599, -0.3938]) tensor([0.2090, 0.2443, 0.3596, 0.1870]) -Greedy action tensor([ 0.8534, -0.6484, -0.0381, -0.1748]) tensor([0.5024, 0.1119, 0.2060, 0.1797]) -Greedy action tensor([-1.1884, -0.7036, 0.0200, 0.5598]) tensor([0.0853, 0.1386, 0.2858, 0.4903]) -Greedy action tensor([ 1.0533, -1.7876, 1.9161, -0.0040]) tensor([0.2649, 0.0155, 0.6277, 0.0920]) -Greedy action tensor([-0.4934, -1.1130, -0.6671, 0.2729]) tensor([0.2207, 0.1188, 0.1855, 0.4750]) -Greedy action tensor([ 1.1610, -1.3760, -0.2592, 0.4228]) tensor([0.5560, 0.0440, 0.1343, 0.2657]) -Greedy action tensor([ 0.1507, -2.0304, 0.2941, 1.0514]) tensor([0.2115, 0.0239, 0.2441, 0.5205]) -Greedy action tensor([ 0.2377, -0.6239, 0.0156, 0.1787]) tensor([0.3159, 0.1334, 0.2529, 0.2978]) -Greedy action tensor([ 1.6738, -0.2985, 1.8869, -0.0146]) tensor([0.3904, 0.0543, 0.4831, 0.0722]) -Greedy action tensor([-0.6550, -1.7149, 0.8355, -0.8303]) tensor([0.1509, 0.0523, 0.6701, 0.1267]) -Greedy action tensor([0.1624, 1.0629, 0.6634, 0.3138]) tensor([0.1594, 0.3922, 0.2630, 0.1854]) -Greedy action tensor([ 0.1609, -0.3601, 0.2116, 1.1515]) tensor([0.1873, 0.1112, 0.1971, 0.5044]) -Greedy action tensor([-0.5884, -0.7710, 0.3546, 0.2800]) tensor([0.1474, 0.1228, 0.3785, 0.3513]) -Greedy action tensor([-0.1444, -1.3283, 0.9387, 0.1217]) tensor([0.1797, 0.0550, 0.5308, 0.2345]) -Greedy action tensor([-0.3051, 0.6289, 0.2577, -0.6496]) tensor([0.1664, 0.4235, 0.2922, 0.1179]) -Greedy action tensor([-0.8092, -0.1951, 1.1421, -1.0521]) tensor([0.0937, 0.1732, 0.6596, 0.0735]) -Greedy action tensor([-0.2633, -1.1146, 0.3464, -0.6524]) tensor([0.2535, 0.1082, 0.4665, 0.1718]) -Greedy action tensor([ 0.2166, -2.0682, 0.5241, 0.8227]) tensor([0.2328, 0.0237, 0.3166, 0.4268]) -Greedy action tensor([ 1.7047, -0.5881, 0.8065, 1.7040]) tensor([0.3988, 0.0403, 0.1624, 0.3985]) -Greedy action tensor([ 0.9780, -0.5823, 0.8193, 0.4600]) tensor([0.3761, 0.0790, 0.3209, 0.2240]) -Greedy action tensor([-0.8826, -0.3870, 0.4570, -0.9343]) tensor([0.1350, 0.2216, 0.5153, 0.1282]) -Greedy action tensor([ 0.8083, -1.1646, 0.1608, 0.5108]) tensor([0.4158, 0.0578, 0.2176, 0.3088]) -Greedy action tensor([ 1.2289, -0.0955, 1.6887, 0.6407]) tensor([0.2937, 0.0781, 0.4651, 0.1631]) -Greedy action tensor([ 1.0465, -0.5518, 1.4921, 1.3749]) tensor([0.2408, 0.0487, 0.3760, 0.3344]) -Greedy action tensor([-0.4106, 1.9639, 0.4552, -0.5768]) tensor([0.0668, 0.7179, 0.1588, 0.0566]) -Greedy action tensor([ 1.3445, 0.2829, -0.0279, 1.2328]) tensor([0.4010, 0.1387, 0.1017, 0.3586]) -Greedy action tensor([ 1.0778, -0.5638, -0.2621, 0.1158]) tensor([0.5442, 0.1054, 0.1425, 0.2079]) -Greedy action tensor([ 1.6715, -1.5416, 0.1287, 0.7422]) tensor([0.6065, 0.0244, 0.1297, 0.2395]) -Greedy action tensor([ 0.2775, -0.1442, -0.4048, 2.0252]) tensor([0.1265, 0.0830, 0.0640, 0.7265]) -Greedy action tensor([-0.3381, 0.0986, -0.7365, 0.6974]) tensor([0.1657, 0.2564, 0.1112, 0.4667]) -Greedy action tensor([ 0.4277, -1.7481, -0.2549, 1.0525]) tensor([0.2868, 0.0326, 0.1449, 0.5357]) -Greedy action tensor([ 1.0365, -1.2083, 0.4060, 0.2426]) tensor([0.4784, 0.0507, 0.2547, 0.2163]) -Greedy action tensor([ 0.2184, -0.4768, 0.1372, 1.3178]) tensor([0.1844, 0.0920, 0.1700, 0.5536]) -Greedy action tensor([-1.1763, -2.1646, -0.6498, 0.3631]) tensor([0.1294, 0.0482, 0.2191, 0.6033]) -Greedy action tensor([ 0.9029, -0.4182, 0.0734, 1.2751]) tensor([0.3171, 0.0846, 0.1383, 0.4600]) -Greedy action tensor([ 0.7063, -0.8534, 1.6857, -0.2343]) tensor([0.2345, 0.0493, 0.6246, 0.0916]) -Greedy action tensor([1.2152, 0.5394, 0.2642, 0.7133]) tensor([0.3999, 0.2035, 0.1545, 0.2421]) -Greedy action tensor([ 0.5307, -1.4652, 0.0126, 0.8922]) tensor([0.3158, 0.0429, 0.1881, 0.4532]) -Greedy action tensor([-1.1963, -0.2295, 2.0570, -0.6643]) tensor([0.0320, 0.0843, 0.8292, 0.0545]) -Greedy action tensor([-0.7053, -0.9859, 0.0015, -1.0005]) tensor([0.2209, 0.1668, 0.4478, 0.1644]) -Greedy action tensor([ 0.2270, 1.4026, 1.4595, -0.6349]) tensor([0.1236, 0.4004, 0.4238, 0.0522]) -Greedy action tensor([ 0.0971, 0.6284, -0.7396, 0.9514]) tensor([0.1823, 0.3102, 0.0790, 0.4285]) -Greedy action tensor([ 0.1807, -0.8180, 0.6214, 0.2802]) tensor([0.2483, 0.0915, 0.3859, 0.2743]) -Greedy action tensor([ 0.3075, -0.3134, -0.8650, 0.7706]) tensor([0.2910, 0.1564, 0.0901, 0.4625]) -Greedy action tensor([ 0.0621, -1.4607, 0.1965, 0.6787]) tensor([0.2373, 0.0517, 0.2714, 0.4396]) -Greedy action tensor([-0.2858, 0.6099, 0.1414, -0.2134]) tensor([0.1651, 0.4043, 0.2531, 0.1775]) -Greedy action tensor([ 0.9560, -1.2267, 1.1570, 0.1678]) tensor([0.3584, 0.0404, 0.4382, 0.1630]) -Greedy action tensor([ 0.9460, -0.1758, 1.9184, -0.1230]) tensor([0.2318, 0.0755, 0.6130, 0.0796]) -Greedy action tensor([ 0.6792, -0.4170, 0.0069, 0.6923]) tensor([0.3499, 0.1169, 0.1786, 0.3545]) -Greedy action tensor([-0.1335, -0.4173, 1.6883, -0.8729]) tensor([0.1189, 0.0895, 0.7349, 0.0567]) -Greedy action tensor([-1.3059, -1.0674, -0.8327, -0.7226]) tensor([0.1765, 0.2240, 0.2833, 0.3163]) -Greedy action tensor([-1.1874, -0.9020, 0.0386, -0.5887]) tensor([0.1323, 0.1760, 0.4509, 0.2408]) -Greedy action tensor([ 0.3117, -0.3974, -0.2605, -0.4256]) tensor([0.3945, 0.1941, 0.2226, 0.1887]) -Greedy action tensor([ 0.7421, -0.7554, -0.0610, -0.3041]) tensor([0.4943, 0.1106, 0.2214, 0.1736]) -Greedy action tensor([ 0.5089, 0.1103, -0.0698, 0.1803]) tensor([0.3388, 0.2274, 0.1899, 0.2439]) -Greedy action tensor([ 0.4546, -0.0899, -0.0993, -0.1590]) tensor([0.3709, 0.2152, 0.2132, 0.2008]) -Greedy action tensor([ 0.4149, -0.1380, -0.0684, -0.2753]) tensor([0.3713, 0.2136, 0.2290, 0.1862]) -Greedy action tensor([ 0.7872, -0.6603, 0.0770, -0.7633]) tensor([0.5158, 0.1213, 0.2535, 0.1094]) -Greedy action tensor([ 0.7203, -0.5457, -0.0481, -0.3664]) tensor([0.4801, 0.1354, 0.2226, 0.1619]) -Greedy action tensor([ 0.5866, -0.4461, -0.0665, -0.4749]) tensor([0.4500, 0.1602, 0.2342, 0.1557]) -Greedy action tensor([ 0.5835, -0.2108, 0.1672, -0.5432]) tensor([0.4106, 0.1856, 0.2708, 0.1331]) -Greedy action tensor([ 0.3068, -0.1368, 0.1037, -0.3851]) tensor([0.3380, 0.2169, 0.2759, 0.1692]) -Greedy action tensor([ 0.3256, -0.4703, -0.2094, -0.0703]) tensor([0.3690, 0.1665, 0.2161, 0.2484]) -Greedy action tensor([ 1.1428, -0.5970, -0.0232, -0.2351]) tensor([0.5750, 0.1009, 0.1792, 0.1449]) -Greedy action tensor([ 7.4674e-01, -5.5881e-02, -4.2293e-02, 4.3205e-04]) tensor([0.4208, 0.1886, 0.1912, 0.1995]) -Greedy action tensor([ 0.4300, -0.1533, -0.0117, -0.1711]) tensor([0.3637, 0.2030, 0.2339, 0.1994]) -Greedy action tensor([ 0.6502, -0.3774, 0.0549, -0.2822]) tensor([0.4343, 0.1554, 0.2394, 0.1709]) -Greedy action tensor([ 0.4669, -0.2689, -0.0228, -0.3590]) tensor([0.3953, 0.1894, 0.2422, 0.1731]) -Greedy action tensor([ 0.6320, -0.6805, -0.0294, -0.1788]) tensor([0.4485, 0.1207, 0.2315, 0.1993]) -Greedy action tensor([ 0.8827, -0.4254, 0.0147, -0.7324]) tensor([0.5294, 0.1431, 0.2222, 0.1053]) -Greedy action tensor([ 0.9400, -0.9153, -0.0545, -0.6306]) tensor([0.5766, 0.0902, 0.2133, 0.1199]) -Greedy action tensor([ 0.9408, -0.4266, -0.0400, -0.3967]) tensor([0.5285, 0.1346, 0.1982, 0.1387]) -Greedy action tensor([ 0.4469, 0.0678, -0.1116, 0.0132]) tensor([0.3443, 0.2356, 0.1970, 0.2231]) -Greedy action tensor([ 0.7141, -0.4217, 0.1569, -0.5067]) tensor([0.4568, 0.1467, 0.2617, 0.1348]) -Greedy action tensor([ 0.7411, -0.4826, 0.0579, -0.3480]) tensor([0.4682, 0.1377, 0.2365, 0.1576]) -Greedy action tensor([ 0.3865, -0.3317, -0.0141, -0.1178]) tensor([0.3621, 0.1766, 0.2426, 0.2187]) -Greedy action tensor([ 0.9420, -0.9887, 0.0349, -0.5176]) tensor([0.5615, 0.0814, 0.2267, 0.1304]) -Greedy action tensor([ 0.5144, -0.2891, 0.2809, -0.6481]) tensor([0.3918, 0.1754, 0.3102, 0.1225]) -Greedy action tensor([ 0.8332, -0.6370, -0.0528, -0.4594]) tensor([0.5217, 0.1199, 0.2151, 0.1432]) -Greedy action tensor([ 0.6008, -0.3232, 0.1823, -0.3610]) tensor([0.4103, 0.1629, 0.2700, 0.1568]) -Greedy action tensor([ 0.5089, -0.3358, 0.2598, -0.3808]) tensor([0.3817, 0.1640, 0.2975, 0.1568]) -Greedy action tensor([ 0.5471, -0.1916, -0.1049, -0.2666]) tensor([0.4095, 0.1956, 0.2133, 0.1815]) -Greedy action tensor([ 0.9243, -0.2606, -0.1583, -0.2371]) tensor([0.5108, 0.1562, 0.1730, 0.1599]) -Greedy action tensor([ 0.4169, 0.0579, 0.0806, -0.3026]) tensor([0.3449, 0.2408, 0.2464, 0.1679]) -Greedy action tensor([ 0.9448, -0.6693, 0.1897, -0.6996]) tensor([0.5370, 0.1069, 0.2524, 0.1037]) -Greedy action tensor([ 0.3594, -0.2724, 0.0351, -0.2581]) tensor([0.3579, 0.1903, 0.2588, 0.1930]) -Greedy action tensor([ 0.6276, -0.2570, 0.0451, -0.2838]) tensor([0.4214, 0.1740, 0.2353, 0.1694]) -Greedy action tensor([ 0.6898, -0.4447, -0.0655, -0.2379]) tensor([0.4573, 0.1470, 0.2149, 0.1808]) -Greedy action tensor([ 0.7645, 0.4833, -0.3901, -0.1488]) tensor([0.4046, 0.3055, 0.1275, 0.1623]) -Greedy action tensor([ 0.8373, -0.6345, -0.1779, -0.2576]) tensor([0.5191, 0.1191, 0.1881, 0.1737]) -Greedy action tensor([ 0.8618, -0.7182, 0.0342, -0.5347]) tensor([0.5290, 0.1089, 0.2312, 0.1309]) -Greedy action tensor([ 0.6205, -0.2444, -0.0995, -0.1639]) tensor([0.4230, 0.1781, 0.2059, 0.1930]) -Greedy action tensor([ 0.6932, -0.6240, 0.0300, -0.4724]) tensor([0.4774, 0.1279, 0.2459, 0.1488]) -Greedy action tensor([ 0.9680, -0.3256, -0.0501, -0.5333]) tensor([0.5381, 0.1476, 0.1944, 0.1199]) -Greedy action tensor([ 0.8319, -0.5688, -0.0083, -0.3442]) tensor([0.5034, 0.1240, 0.2173, 0.1553]) -Greedy action tensor([ 0.8107, -0.3048, 0.0803, -0.5125]) tensor([0.4818, 0.1579, 0.2321, 0.1283]) -Greedy action tensor([ 0.6036, -0.4618, 0.0066, -0.2505]) tensor([0.4309, 0.1485, 0.2372, 0.1834]) -Greedy action tensor([ 0.4827, -0.1161, -0.0756, -0.0511]) tensor([0.3693, 0.2029, 0.2113, 0.2165]) -Greedy action tensor([ 0.3138, -0.1638, -0.0665, -0.2487]) tensor([0.3480, 0.2158, 0.2379, 0.1983]) -Greedy action tensor([ 0.4435, 0.1147, -0.0087, -0.2706]) tensor([0.3514, 0.2529, 0.2236, 0.1721]) -Greedy action tensor([ 0.7454, 0.1155, -0.1343, -0.3354]) tensor([0.4373, 0.2329, 0.1814, 0.1484]) -Greedy action tensor([ 0.8816, -0.7055, -0.1539, -0.5927]) tensor([0.5591, 0.1143, 0.1985, 0.1280]) -Greedy action tensor([ 0.4729, -0.3333, 0.0813, -0.3164]) tensor([0.3881, 0.1733, 0.2623, 0.1763]) -Greedy action tensor([ 0.7004, -0.1454, 0.0974, -0.2252]) tensor([0.4215, 0.1809, 0.2306, 0.1670]) -Greedy action tensor([ 0.4314, -0.3071, 0.0292, -0.5257]) tensor([0.3951, 0.1888, 0.2643, 0.1517]) -Greedy action tensor([ 1.1330, -0.7541, -0.0937, -0.7704]) tensor([0.6274, 0.0951, 0.1840, 0.0935]) -Greedy action tensor([ 0.6108, -0.4428, -0.1058, -0.3120]) tensor([0.4475, 0.1560, 0.2186, 0.1779]) -Greedy action tensor([ 0.5047, -0.0933, -0.0366, -0.2226]) tensor([0.3824, 0.2103, 0.2226, 0.1848]) -Greedy action tensor([ 0.2388, -0.2345, -0.1425, -0.0316]) tensor([0.3259, 0.2030, 0.2225, 0.2486]) -Greedy action tensor([ 0.3215, -0.0983, -0.0346, -0.1419]) tensor([0.3348, 0.2200, 0.2345, 0.2106]) -Greedy action tensor([ 0.8355, -0.3682, 0.2122, -0.8398]) tensor([0.4942, 0.1483, 0.2650, 0.0925]) -Greedy action tensor([ 0.9548, -0.5930, -0.0933, -0.3331]) tensor([0.5437, 0.1157, 0.1906, 0.1500]) -Greedy action tensor([ 0.5931, -0.2606, 0.0116, -0.2603]) tensor([0.4148, 0.1766, 0.2319, 0.1767]) -Greedy action tensor([ 0.5530, -0.3323, 0.0670, -0.2219]) tensor([0.4019, 0.1658, 0.2472, 0.1852]) -Greedy action tensor([ 0.3646, -0.4618, -0.1380, -0.1572]) tensor([0.3794, 0.1660, 0.2295, 0.2251]) -Greedy action tensor([ 0.7732, -0.4865, -0.1322, -0.3357]) tensor([0.4955, 0.1406, 0.2004, 0.1635]) -Greedy action tensor([ 0.4819, 0.2067, -0.2213, -0.2206]) tensor([0.3637, 0.2762, 0.1800, 0.1801]) -Greedy action tensor([0.4436, 0.0710, 0.0362, 0.0713]) tensor([0.3286, 0.2264, 0.2186, 0.2264]) -Greedy action tensor([ 0.2780, 0.2794, -0.0481, -0.2033]) tensor([0.2993, 0.2997, 0.2160, 0.1850]) -Greedy action tensor([ 0.3442, 0.1953, 0.2166, -0.3066]) tensor([0.3064, 0.2640, 0.2697, 0.1598]) -Greedy action tensor([ 0.8175, -0.5382, 0.0146, -1.0392]) tensor([0.5371, 0.1384, 0.2406, 0.0839]) -Greedy action tensor([ 0.6143, -0.5385, -0.0641, -0.1367]) tensor([0.4357, 0.1376, 0.2211, 0.2056]) -Greedy action tensor([ 0.6048, -0.4462, -0.0456, -0.3853]) tensor([0.4458, 0.1559, 0.2327, 0.1657]) -Greedy action tensor([ 0.3471, -0.0452, 0.1414, -0.6051]) tensor([0.3478, 0.2349, 0.2831, 0.1342]) -Greedy action tensor([ 0.7414, -0.5154, -0.0699, -0.3318]) tensor([0.4829, 0.1374, 0.2146, 0.1651]) -Greedy action tensor([ 0.9992, -0.6561, 0.1649, -0.6553]) tensor([0.5505, 0.1052, 0.2390, 0.1053]) -Greedy action tensor([ 0.6374, -0.5306, 0.1029, -0.6872]) tensor([0.4624, 0.1438, 0.2709, 0.1229]) -Greedy action tensor([ 0.8620, 0.0370, 0.0245, -0.3554]) tensor([0.4615, 0.2022, 0.1997, 0.1366]) -Greedy action tensor([ 1.3154, -0.7426, -0.0463, -0.4928]) tensor([0.6460, 0.0825, 0.1655, 0.1059]) -Greedy action tensor([ 0.8128, -0.1906, -0.0934, -0.2766]) tensor([0.4746, 0.1740, 0.1918, 0.1597]) -Greedy action tensor([ 0.5469, -0.4845, 0.0283, -0.5165]) tensor([0.4353, 0.1552, 0.2592, 0.1503]) -Greedy action tensor([ 0.8894, -0.7671, -0.2448, -0.6883]) tensor([0.5817, 0.1110, 0.1871, 0.1201]) -Greedy action tensor([ 0.3216, -0.0875, -0.0677, -0.1612]) tensor([0.3380, 0.2245, 0.2290, 0.2085]) -Greedy action tensor([-1.8567, -0.3989, 0.6209, -0.1178]) tensor([0.0437, 0.1876, 0.5202, 0.2485]) -Greedy action tensor([-0.5399, -0.1510, 1.1400, 1.5351]) tensor([0.0633, 0.0933, 0.3394, 0.5039]) -Greedy action tensor([-1.8167, -0.4632, 0.6096, -0.1098]) tensor([0.0461, 0.1784, 0.5215, 0.2540]) -Greedy action tensor([-1.3355, -0.0595, 0.5610, -0.4417]) tensor([0.0730, 0.2617, 0.4867, 0.1786]) -Greedy action tensor([-1.7505, -0.4701, 0.6598, 0.0567]) tensor([0.0458, 0.1648, 0.5102, 0.2791]) -Greedy action tensor([-1.8786, -0.4388, 0.7318, -0.0100]) tensor([0.0395, 0.1668, 0.5377, 0.2561]) -Greedy action tensor([-1.9274, -0.4483, 0.6594, -0.1712]) tensor([0.0409, 0.1794, 0.5431, 0.2367]) -Greedy action tensor([-0.8802, -0.6112, 0.3120, 0.5333]) tensor([0.1030, 0.1347, 0.3392, 0.4232]) -Greedy action tensor([-1.2401, -0.2928, 0.3568, 0.0791]) tensor([0.0816, 0.2104, 0.4028, 0.3052]) -Greedy action tensor([-1.6404, -0.1078, 0.5924, 0.1015]) tensor([0.0484, 0.2241, 0.4513, 0.2762]) -Greedy action tensor([-1.5571, -0.4677, 0.5244, 0.0589]) tensor([0.0587, 0.1746, 0.4710, 0.2957]) -Greedy action tensor([-0.9239, -0.5832, 0.3059, 0.6004]) tensor([0.0960, 0.1349, 0.3283, 0.4407]) -Greedy action tensor([-1.7872, -0.4990, 0.5912, -0.1317]) tensor([0.0484, 0.1756, 0.5224, 0.2536]) -Greedy action tensor([-1.5333, -0.5750, 0.4444, 0.0857]) tensor([0.0630, 0.1642, 0.4550, 0.3179]) -Greedy action tensor([-1.5341, -0.5215, 0.5449, 0.2280]) tensor([0.0569, 0.1566, 0.4550, 0.3314]) -Greedy action tensor([-1.8534, -0.4152, 0.6175, -0.1265]) tensor([0.0441, 0.1859, 0.5220, 0.2481]) -Greedy action tensor([-1.3098, -0.4563, 0.8509, 0.9450]) tensor([0.0464, 0.1089, 0.4025, 0.4422]) -Greedy action tensor([-1.7672, -0.4594, 0.5996, -0.0632]) tensor([0.0479, 0.1773, 0.5113, 0.2635]) -Greedy action tensor([-1.6447, -0.4170, 0.5297, -0.0027]) tensor([0.0544, 0.1858, 0.4787, 0.2811]) -Greedy action tensor([-1.9068, -0.4164, 0.6471, -0.1551]) tensor([0.0416, 0.1845, 0.5344, 0.2396]) -Greedy action tensor([-1.2466, -0.3971, 0.8061, 0.8116]) tensor([0.0527, 0.1233, 0.4108, 0.4131]) -Greedy action tensor([-1.7865, -0.4887, 0.5847, -0.1105]) tensor([0.0483, 0.1767, 0.5170, 0.2580]) -Greedy action tensor([-1.9413, -0.4493, 0.6690, -0.1768]) tensor([0.0402, 0.1786, 0.5466, 0.2346]) -Greedy action tensor([-1.4309, -0.5238, 0.0124, -0.4018]) tensor([0.0951, 0.2357, 0.4029, 0.2663]) -Greedy action tensor([-1.6876, -0.3403, 0.6689, 0.0447]) tensor([0.0475, 0.1827, 0.5013, 0.2685]) -Greedy action tensor([-1.8753, -0.4332, 0.6311, -0.1499]) tensor([0.0433, 0.1831, 0.5307, 0.2430]) -Greedy action tensor([-1.0493, 0.2811, 0.2908, 0.1010]) tensor([0.0850, 0.3216, 0.3247, 0.2686]) -Greedy action tensor([-1.8747, -0.4150, 0.6282, -0.1335]) tensor([0.0431, 0.1853, 0.5260, 0.2456]) -Greedy action tensor([-1.3425, -0.6029, 0.5676, 0.6381]) tensor([0.0585, 0.1226, 0.3950, 0.4239]) -Greedy action tensor([-0.6172, -0.4193, 0.2804, 0.0391]) tensor([0.1515, 0.1847, 0.3718, 0.2920]) -Greedy action tensor([-1.8959, -0.4636, 0.6399, -0.1519]) tensor([0.0425, 0.1780, 0.5365, 0.2430]) -Greedy action tensor([-1.7559, -0.5364, 0.5940, -0.0719]) tensor([0.0494, 0.1671, 0.5176, 0.2659]) -Greedy action tensor([-1.8103, -0.3941, 0.5939, -0.1283]) tensor([0.0464, 0.1911, 0.5132, 0.2493]) -Greedy action tensor([-0.2974, 0.3163, 0.7159, 1.5922]) tensor([0.0818, 0.1512, 0.2255, 0.5415]) -Greedy action tensor([-1.8610, -0.2862, 0.6092, -0.1095]) tensor([0.0427, 0.2062, 0.5049, 0.2461]) -Greedy action tensor([-1.8250, -0.4252, 0.6027, -0.1165]) tensor([0.0456, 0.1851, 0.5173, 0.2520]) -Greedy action tensor([-0.8182, -0.5960, 0.1721, 0.3533]) tensor([0.1224, 0.1529, 0.3296, 0.3951]) -Greedy action tensor([-1.8835, -0.4139, 0.6328, -0.1511]) tensor([0.0428, 0.1859, 0.5295, 0.2418]) -Greedy action tensor([-1.7150, -0.2239, 0.5130, -0.0406]) tensor([0.0499, 0.2215, 0.4627, 0.2660]) -Greedy action tensor([-1.4642, 0.3785, 0.2966, 0.3071]) tensor([0.0526, 0.3321, 0.3060, 0.3092]) -Greedy action tensor([-0.3048, -0.3427, 0.1657, 0.1600]) tensor([0.1940, 0.1868, 0.3105, 0.3087]) -Greedy action tensor([-0.8152, 0.3809, 0.0796, 0.1735]) tensor([0.1059, 0.3503, 0.2592, 0.2847]) -Greedy action tensor([-1.7334, 0.0510, 0.4839, -0.0544]) tensor([0.0465, 0.2770, 0.4271, 0.2493]) -Greedy action tensor([-1.8966, -0.3775, 0.6331, -0.1472]) tensor([0.0419, 0.1914, 0.5258, 0.2409]) -Greedy action tensor([-1.8710, -0.4056, 0.6308, -0.1346]) tensor([0.0431, 0.1865, 0.5258, 0.2446]) -Greedy action tensor([-1.2106, -0.1181, 0.2585, 0.0789]) tensor([0.0836, 0.2493, 0.3634, 0.3037]) -Greedy action tensor([-1.8856, -0.4723, 0.6411, -0.1472]) tensor([0.0429, 0.1763, 0.5368, 0.2440]) -Greedy action tensor([-1.8725, -0.4519, 0.6551, -0.1060]) tensor([0.0425, 0.1761, 0.5326, 0.2488]) -Greedy action tensor([-1.8403, -0.2840, 0.5923, -0.1075]) tensor([0.0439, 0.2081, 0.4998, 0.2482]) -Greedy action tensor([-0.8139, -0.3063, 0.2539, 0.0164]) tensor([0.1272, 0.2112, 0.3699, 0.2917]) -Greedy action tensor([-1.8095, -0.1030, 0.5644, -0.0265]) tensor([0.0431, 0.2375, 0.4630, 0.2564]) -Greedy action tensor([-1.9089, -0.3127, 0.6203, -0.1561]) tensor([0.0412, 0.2035, 0.5173, 0.2380]) -Greedy action tensor([-1.8947, -0.4560, 0.6395, -0.1538]) tensor([0.0425, 0.1792, 0.5359, 0.2424]) -Greedy action tensor([-0.7791, -0.5679, 0.1903, 0.3902]) tensor([0.1236, 0.1527, 0.3258, 0.3979]) -Greedy action tensor([-1.9125, -0.3589, 0.6329, -0.1670]) tensor([0.0413, 0.1953, 0.5267, 0.2367]) -Greedy action tensor([-1.9422, -0.4523, 0.6682, -0.1777]) tensor([0.0402, 0.1783, 0.5468, 0.2347]) -Greedy action tensor([-0.7460, -0.1106, 0.7893, 1.2093]) tensor([0.0685, 0.1293, 0.3181, 0.4841]) -Greedy action tensor([-1.9136, -0.3670, 0.6404, -0.1557]) tensor([0.0411, 0.1928, 0.5280, 0.2382]) -Greedy action tensor([-1.2877, -0.4310, 0.6086, -0.5918]) tensor([0.0832, 0.1959, 0.5541, 0.1668]) -Greedy action tensor([-1.8825, -0.4596, 0.6353, -0.1475]) tensor([0.0431, 0.1787, 0.5341, 0.2441]) -Greedy action tensor([-1.8426, -0.4780, 0.6974, -0.0354]) tensor([0.0422, 0.1652, 0.5353, 0.2573]) -Greedy action tensor([-1.7222, -0.5098, 0.6087, 0.0364]) tensor([0.0489, 0.1644, 0.5030, 0.2838]) -Greedy action tensor([-1.5174, -0.5330, 0.6021, 0.3666]) tensor([0.0538, 0.1440, 0.4481, 0.3541]) -Greedy action tensor([-1.8671, -0.4823, 0.6846, -0.0724]) tensor([0.0419, 0.1675, 0.5381, 0.2524]) -Greedy action tensor([-1.7533, -0.0880, 0.5157, -0.0467]) tensor([0.0466, 0.2463, 0.4504, 0.2567]) -Greedy action tensor([-1.2402, 0.2626, 0.2812, -0.0166]) tensor([0.0742, 0.3336, 0.3399, 0.2523]) -Greedy action tensor([-0.7271, -0.4815, 1.0668, 1.5657]) tensor([0.0550, 0.0703, 0.3305, 0.5443]) -Greedy action tensor([-1.6662, -0.4690, 0.5793, 0.0706]) tensor([0.0515, 0.1704, 0.4860, 0.2922]) -Greedy action tensor([-1.9195, -0.4248, 0.6563, -0.1576]) tensor([0.0409, 0.1825, 0.5381, 0.2384]) -Greedy action tensor([-1.7253, -0.4774, 0.5708, -0.0773]) tensor([0.0510, 0.1776, 0.5065, 0.2649]) -Greedy action tensor([-0.9362, 0.8119, 0.2145, -0.1436]) tensor([0.0826, 0.4742, 0.2609, 0.1824]) -Greedy action tensor([-1.2793, 0.1740, 0.3546, -0.0905]) tensor([0.0731, 0.3126, 0.3744, 0.2399]) -Greedy action tensor([-1.1564, -0.6230, 0.3621, -0.0314]) tensor([0.0966, 0.1647, 0.4411, 0.2976]) -Greedy action tensor([-1.4971, -0.5188, 0.4163, 0.1463]) tensor([0.0641, 0.1704, 0.4341, 0.3314]) -Greedy action tensor([-1.7181, -0.4590, 0.7761, 0.3236]) tensor([0.0411, 0.1447, 0.4977, 0.3165]) -Greedy action tensor([-1.9194, -0.4576, 0.6570, -0.1647]) tensor([0.0412, 0.1779, 0.5424, 0.2385]) -Greedy action tensor([-1.1115, -0.5883, 0.2369, 0.3300]) tensor([0.0929, 0.1567, 0.3577, 0.3927]) -Greedy action tensor([-1.9210, -0.4557, 0.6985, -0.1398]) tensor([0.0400, 0.1732, 0.5493, 0.2375]) -Greedy action tensor([-1.7756, -0.5521, 0.2326, -0.3115]) tensor([0.0618, 0.2102, 0.4606, 0.2674]) -Greedy action tensor([-1.9058, -0.4202, 0.6624, -0.1464]) tensor([0.0412, 0.1820, 0.5374, 0.2394]) -Greedy action tensor([-1.6152, -0.4010, 0.5052, -0.0526]) tensor([0.0572, 0.1927, 0.4770, 0.2731]) -Greedy action tensor([ 1.3742, -0.5605, -0.1859, 0.2635]) tensor([0.5938, 0.0858, 0.1248, 0.1956]) -Greedy action tensor([ 0.5714, -0.4345, 0.0512, 0.3728]) tensor([0.3597, 0.1315, 0.2138, 0.2949]) -Greedy action tensor([ 1.0341, -1.0955, -0.5138, 0.2637]) tensor([0.5573, 0.0663, 0.1185, 0.2579]) -Greedy action tensor([ 1.8950, -0.6745, -0.5257, 0.7554]) tensor([0.6732, 0.0516, 0.0598, 0.2154]) -Greedy action tensor([ 1.6326, -0.2662, -0.9353, 0.1173]) tensor([0.6915, 0.1035, 0.0530, 0.1520]) -Greedy action tensor([ 1.5249, -0.1607, -0.2016, 0.2921]) tensor([0.6043, 0.1120, 0.1075, 0.1761]) -Greedy action tensor([ 0.8935, -0.6039, -0.6129, 0.9007]) tensor([0.4077, 0.0912, 0.0904, 0.4107]) -Greedy action tensor([ 1.4524, -0.4279, -0.3097, 0.2437]) tensor([0.6162, 0.0940, 0.1058, 0.1840]) -Greedy action tensor([ 1.8524, -0.5729, -0.0847, 0.3204]) tensor([0.6903, 0.0611, 0.0995, 0.1492]) -Greedy action tensor([ 1.4364, -0.2126, -0.2966, 0.0692]) tensor([0.6158, 0.1184, 0.1088, 0.1569]) -Greedy action tensor([ 1.1637, -0.4530, -0.2041, 0.3980]) tensor([0.5213, 0.1035, 0.1328, 0.2424]) -Greedy action tensor([ 1.3866, -0.8107, -0.3940, 0.1157]) tensor([0.6409, 0.0712, 0.1080, 0.1798]) -Greedy action tensor([ 1.6011, 0.0333, -0.5315, 0.3474]) tensor([0.6202, 0.1293, 0.0735, 0.1770]) -Greedy action tensor([ 1.7178, -0.3893, -0.1741, 0.4918]) tensor([0.6386, 0.0777, 0.0963, 0.1874]) -Greedy action tensor([ 1.8968, -0.7396, -0.2793, 0.7063]) tensor([0.6715, 0.0481, 0.0762, 0.2042]) -Greedy action tensor([ 7.4418e-01, -5.0392e-01, -4.6320e-04, -1.4453e-01]) tensor([0.4602, 0.1321, 0.2185, 0.1892]) -Greedy action tensor([ 1.5906, 0.1195, -0.0976, 0.2091]) tensor([0.6003, 0.1379, 0.1110, 0.1508]) -Greedy action tensor([0.6962, 0.0329, 0.0542, 0.0763]) tensor([0.3877, 0.1997, 0.2040, 0.2086]) -Greedy action tensor([ 0.7219, -0.0997, -0.1124, 0.6540]) tensor([0.3561, 0.1566, 0.1546, 0.3327]) -Greedy action tensor([ 1.5792, -0.5477, -0.0864, 0.1463]) tensor([0.6465, 0.0771, 0.1222, 0.1543]) -Greedy action tensor([ 1.3218, -0.3469, -0.3322, 0.0479]) tensor([0.6026, 0.1136, 0.1153, 0.1686]) -Greedy action tensor([ 2.4205, -1.3477, -0.0403, 0.3154]) tensor([0.8128, 0.0188, 0.0694, 0.0990]) -Greedy action tensor([ 1.4920, 0.1841, -0.1995, 0.1275]) tensor([0.5847, 0.1581, 0.1077, 0.1494]) -Greedy action tensor([ 1.6645, -0.1621, -0.2917, 0.3815]) tensor([0.6331, 0.1019, 0.0895, 0.1755]) -Greedy action tensor([ 1.0940, -0.3841, -0.2413, 0.5215]) tensor([0.4866, 0.1110, 0.1280, 0.2745]) -Greedy action tensor([ 1.0598, -0.0641, -0.6020, 0.5647]) tensor([0.4707, 0.1530, 0.0893, 0.2869]) -Greedy action tensor([ 1.0251, -0.1166, -0.4798, 0.3424]) tensor([0.4886, 0.1560, 0.1085, 0.2469]) -Greedy action tensor([ 0.8752, -0.5693, -0.4333, 0.5430]) tensor([0.4497, 0.1061, 0.1215, 0.3226]) -Greedy action tensor([ 1.0161, -0.3311, 0.0398, 0.5163]) tensor([0.4458, 0.1159, 0.1679, 0.2704]) -Greedy action tensor([ 1.4223, -0.3104, -0.3151, 0.4328]) tensor([0.5799, 0.1025, 0.1020, 0.2156]) -Greedy action tensor([ 1.5435, 0.2760, -0.1500, -0.3516]) tensor([0.6189, 0.1742, 0.1138, 0.0930]) -Greedy action tensor([ 0.7698, -0.5152, -0.4529, 0.7061]) tensor([0.3985, 0.1102, 0.1173, 0.3739]) -Greedy action tensor([ 1.2532, -0.3178, -0.2957, 0.1708]) tensor([0.5685, 0.1182, 0.1208, 0.1926]) -Greedy action tensor([ 1.8353, -0.3797, -0.6697, 0.6012]) tensor([0.6748, 0.0737, 0.0551, 0.1964]) -Greedy action tensor([ 0.9350, -0.3836, -0.2245, 0.0487]) tensor([0.5017, 0.1342, 0.1573, 0.2068]) -Greedy action tensor([ 1.7000, -0.5502, -0.4753, 0.2988]) tensor([0.6825, 0.0719, 0.0775, 0.1681]) -Greedy action tensor([ 1.7468, -0.0063, -0.1016, 0.3622]) tensor([0.6325, 0.1096, 0.0996, 0.1584]) -Greedy action tensor([ 2.3206, -0.8438, -0.1535, 0.7735]) tensor([0.7466, 0.0315, 0.0629, 0.1589]) -Greedy action tensor([ 2.3504, -1.1735, -0.4235, 0.8893]) tensor([0.7554, 0.0223, 0.0471, 0.1752]) -Greedy action tensor([ 0.7189, -0.3800, -0.2343, 0.2179]) tensor([0.4302, 0.1433, 0.1658, 0.2606]) -Greedy action tensor([ 0.7492, -0.1911, 0.2172, 0.0192]) tensor([0.4065, 0.1588, 0.2388, 0.1959]) -Greedy action tensor([ 1.1977, -0.3600, 0.0452, 0.8015]) tensor([0.4547, 0.0958, 0.1436, 0.3059]) -Greedy action tensor([ 0.8721, -0.4252, -0.5065, 0.5875]) tensor([0.4391, 0.1200, 0.1106, 0.3303]) -Greedy action tensor([ 1.7365, -0.5515, -0.2651, 0.2704]) tensor([0.6815, 0.0691, 0.0921, 0.1573]) -Greedy action tensor([ 1.5768, -0.9339, -0.3768, 0.3486]) tensor([0.6597, 0.0536, 0.0935, 0.1932]) -Greedy action tensor([ 1.5087, -0.5826, -0.2460, 0.4681]) tensor([0.6062, 0.0749, 0.1048, 0.2141]) -Greedy action tensor([ 1.3912, -0.4804, -0.3008, 0.8513]) tensor([0.5206, 0.0801, 0.0959, 0.3034]) -Greedy action tensor([ 0.1462, -0.0473, -0.1439, 0.1528]) tensor([0.2794, 0.2303, 0.2091, 0.2813]) -Greedy action tensor([ 1.1879, -0.2812, 0.0043, 0.3568]) tensor([0.5071, 0.1167, 0.1553, 0.2209]) -Greedy action tensor([ 1.3751, -0.0406, -0.4951, 0.3033]) tensor([0.5750, 0.1396, 0.0886, 0.1969]) -Greedy action tensor([ 1.5184, -0.4222, -0.5077, 0.3505]) tensor([0.6303, 0.0905, 0.0831, 0.1960]) -Greedy action tensor([ 1.4582, 0.3145, -0.1840, -0.1439]) tensor([0.5836, 0.1859, 0.1129, 0.1176]) -Greedy action tensor([ 1.4881, -0.6009, -0.0555, 0.6121]) tensor([0.5702, 0.0706, 0.1218, 0.2375]) -Greedy action tensor([ 0.6187, -0.1839, -0.2166, 0.2456]) tensor([0.3890, 0.1743, 0.1687, 0.2679]) -Greedy action tensor([ 0.8898, -0.4144, -0.1493, 0.2576]) tensor([0.4637, 0.1258, 0.1640, 0.2464]) -Greedy action tensor([ 1.8150, -0.8600, -0.2805, 0.3955]) tensor([0.6975, 0.0481, 0.0858, 0.1687]) -Greedy action tensor([ 0.8632, -0.4530, -0.1545, 0.2056]) tensor([0.4656, 0.1249, 0.1683, 0.2412]) -Greedy action tensor([ 0.6785, -0.3657, -0.2018, 0.1650]) tensor([0.4228, 0.1488, 0.1753, 0.2530]) -Greedy action tensor([ 1.2957, -0.6143, 0.0674, 0.0608]) tensor([0.5775, 0.0855, 0.1691, 0.1680]) -Greedy action tensor([ 2.2289, -0.6393, -0.0765, 0.7009]) tensor([0.7281, 0.0414, 0.0726, 0.1580]) -Greedy action tensor([ 0.0428, -0.0572, -0.1745, 0.0141]) tensor([0.2716, 0.2458, 0.2186, 0.2640]) -Greedy action tensor([ 1.3895, -0.6149, -0.3485, 0.5746]) tensor([0.5703, 0.0768, 0.1003, 0.2525]) -Greedy action tensor([ 1.7279, -0.2206, -0.2880, 0.3858]) tensor([0.6506, 0.0927, 0.0867, 0.1700]) -Greedy action tensor([ 0.9963, -0.3000, -0.6672, 0.4605]) tensor([0.4882, 0.1335, 0.0925, 0.2857]) -Greedy action tensor([ 1.4130, -0.3736, -0.3090, 0.4464]) tensor([0.5792, 0.0970, 0.1035, 0.2203]) -Greedy action tensor([ 1.7755, -0.0293, -0.3998, -0.4478]) tensor([0.7213, 0.1187, 0.0819, 0.0781]) -Greedy action tensor([ 0.3836, -0.2273, -0.0218, 0.0514]) tensor([0.3417, 0.1855, 0.2278, 0.2451]) -Greedy action tensor([ 1.5186, -0.4651, -0.2492, 0.4301]) tensor([0.6079, 0.0836, 0.1038, 0.2047]) -Greedy action tensor([ 1.5314, -0.3999, 0.1066, 0.0866]) tensor([0.6168, 0.0894, 0.1484, 0.1454]) -Greedy action tensor([ 1.2023, -0.1874, -0.8200, 0.2387]) tensor([0.5672, 0.1413, 0.0751, 0.2164]) -Greedy action tensor([ 1.2030, -0.7157, -0.0804, 0.2694]) tensor([0.5504, 0.0808, 0.1525, 0.2164]) -Greedy action tensor([ 1.7590, -0.3859, -0.1720, 0.4168]) tensor([0.6565, 0.0769, 0.0952, 0.1715]) -Greedy action tensor([ 1.1328, -0.5740, -0.2439, 0.2126]) tensor([0.5458, 0.0990, 0.1378, 0.2174]) -Greedy action tensor([ 0.8109, -0.3933, -0.0968, -0.0745]) tensor([0.4726, 0.1418, 0.1907, 0.1950]) -Greedy action tensor([ 1.5647, -0.7261, -0.3089, 0.4482]) tensor([0.6320, 0.0640, 0.0971, 0.2069]) -Greedy action tensor([ 1.4336, -0.1674, -0.8478, 0.4087]) tensor([0.6014, 0.1213, 0.0614, 0.2158]) -Greedy action tensor([ 1.6425, -0.6635, -0.3501, 0.5587]) tensor([0.6352, 0.0633, 0.0866, 0.2149]) -Greedy action tensor([ 1.6015, -0.6821, -0.0917, 0.4928]) tensor([0.6189, 0.0631, 0.1138, 0.2042]) -Greedy action tensor([ 0.7611, -0.5655, -0.2524, 0.2226]) tensor([0.4521, 0.1200, 0.1641, 0.2639]) -Greedy action tensor([ 0.9509, -0.2320, -0.3898, 0.3410]) tensor([0.4736, 0.1451, 0.1239, 0.2574]) -Greedy action tensor([ 1.3239, -0.4380, -0.3618, -0.1376]) tensor([0.6294, 0.1081, 0.1166, 0.1459]) -Greedy action tensor([ 0.4345, -0.1780, -0.0564, -0.4409]) tensor([0.3890, 0.2108, 0.2381, 0.1621]) -Greedy action tensor([ 1.2781, -0.5687, -0.1365, -0.2111]) tensor([0.6149, 0.0970, 0.1494, 0.1387]) -Greedy action tensor([ 1.2114, -1.5836, -0.2282, -0.6115]) tensor([0.6851, 0.0419, 0.1624, 0.1107]) -Greedy action tensor([ 0.9045, -0.9380, 0.0624, -0.3238]) tensor([0.5314, 0.0842, 0.2289, 0.1556]) -Greedy action tensor([ 0.5920, -0.2888, -0.2548, -0.5001]) tensor([0.4590, 0.1902, 0.1968, 0.1540]) -Greedy action tensor([ 0.6688, -0.5002, 0.1548, -0.6759]) tensor([0.4610, 0.1432, 0.2757, 0.1201]) -Greedy action tensor([ 0.7428, -0.2471, -0.0544, -0.0948]) tensor([0.4435, 0.1648, 0.1998, 0.1919]) -Greedy action tensor([ 0.9237, -0.5245, -0.0587, -0.4707]) tensor([0.5384, 0.1265, 0.2016, 0.1335]) -Greedy action tensor([ 0.6025, -0.6089, -0.0144, -0.4099]) tensor([0.4544, 0.1353, 0.2452, 0.1651]) -Greedy action tensor([ 0.5325, 0.1721, -0.1186, -0.1691]) tensor([0.3684, 0.2569, 0.1921, 0.1826]) -Greedy action tensor([ 1.1006, -0.6363, 0.0448, -0.3986]) tensor([0.5723, 0.1008, 0.1991, 0.1278]) -Greedy action tensor([ 0.6962, -0.4097, 0.0918, -0.4157]) tensor([0.4533, 0.1500, 0.2477, 0.1491]) -Greedy action tensor([ 0.5993, -0.1327, 0.1748, -0.3540]) tensor([0.3968, 0.1908, 0.2595, 0.1529]) -Greedy action tensor([ 1.0430, -0.6546, -0.1894, -0.5536]) tensor([0.5962, 0.1092, 0.1738, 0.1208]) -Greedy action tensor([ 0.8481, -0.2303, -0.0472, -0.4139]) tensor([0.4922, 0.1674, 0.2010, 0.1393]) -Greedy action tensor([ 0.5313, -0.3489, -0.0376, -0.4569]) tensor([0.4250, 0.1762, 0.2406, 0.1582]) -Greedy action tensor([ 0.4543, -0.2066, -0.0405, -0.0749]) tensor([0.3683, 0.1902, 0.2246, 0.2169]) -Greedy action tensor([ 0.1468, -0.0787, 0.0298, -0.2916]) tensor([0.3000, 0.2395, 0.2669, 0.1936]) -Greedy action tensor([ 0.8731, -0.2172, 0.1085, -0.9279]) tensor([0.5084, 0.1709, 0.2367, 0.0840]) -Greedy action tensor([ 0.4839, -0.1657, -0.0482, -0.2149]) tensor([0.3836, 0.2003, 0.2253, 0.1907]) -Greedy action tensor([ 0.6657, -0.6515, 0.0070, -0.4793]) tensor([0.4754, 0.1273, 0.2460, 0.1513]) -Greedy action tensor([ 0.4443, -0.1943, 0.0250, -0.1644]) tensor([0.3663, 0.1934, 0.2409, 0.1993]) -Greedy action tensor([ 0.2830, 0.1324, -0.0740, -0.0091]) tensor([0.3024, 0.2601, 0.2116, 0.2258]) -Greedy action tensor([ 0.6642, -0.3667, -0.0328, -0.5941]) tensor([0.4675, 0.1668, 0.2329, 0.1328]) -Greedy action tensor([ 0.5687, -0.0859, -0.0666, -0.0146]) tensor([0.3835, 0.1993, 0.2032, 0.2140]) -Greedy action tensor([ 0.1849, -0.0691, 0.0038, -0.1966]) tensor([0.3037, 0.2356, 0.2534, 0.2074]) -Greedy action tensor([ 1.0162, -0.7901, 0.1135, -0.4716]) tensor([0.5569, 0.0915, 0.2258, 0.1258]) -Greedy action tensor([ 0.8609, -0.6930, -0.0210, -0.2391]) tensor([0.5106, 0.1080, 0.2114, 0.1700]) -Greedy action tensor([ 0.7245, -0.6116, -0.0293, -0.1866]) tensor([0.4683, 0.1231, 0.2204, 0.1883]) -Greedy action tensor([ 1.0914, -0.7184, 0.0156, -0.5266]) tensor([0.5872, 0.0961, 0.2003, 0.1164]) -Greedy action tensor([ 0.9635, -0.5840, 0.0231, -0.5993]) tensor([0.5516, 0.1174, 0.2154, 0.1156]) -Greedy action tensor([ 0.3983, 0.0612, -0.2218, -0.3567]) tensor([0.3674, 0.2623, 0.1976, 0.1727]) -Greedy action tensor([ 0.6846, -0.3280, -0.1970, -0.3607]) tensor([0.4697, 0.1706, 0.1945, 0.1651]) -Greedy action tensor([ 1.3191, -1.1129, 0.0398, -0.5410]) tensor([0.6571, 0.0577, 0.1828, 0.1023]) -Greedy action tensor([ 0.6867, 0.1108, -0.1549, -0.2463]) tensor([0.4190, 0.2356, 0.1806, 0.1648]) -Greedy action tensor([ 1.0924, -1.0068, 0.0712, -0.6074]) tensor([0.6004, 0.0736, 0.2163, 0.1097]) -Greedy action tensor([ 0.8068, -0.4306, -0.1064, -0.5548]) tensor([0.5134, 0.1490, 0.2060, 0.1316]) -Greedy action tensor([ 0.5661, -0.3518, 0.0336, -0.0959]) tensor([0.3996, 0.1596, 0.2346, 0.2061]) -Greedy action tensor([ 0.6008, -0.4373, -0.0238, -0.2560]) tensor([0.4321, 0.1530, 0.2314, 0.1834]) -Greedy action tensor([ 0.5487, -0.4349, -0.1082, -0.2022]) tensor([0.4229, 0.1582, 0.2193, 0.1996]) -Greedy action tensor([ 0.4559, -0.0118, -0.1310, -0.0758]) tensor([0.3610, 0.2261, 0.2007, 0.2121]) -Greedy action tensor([ 0.7314, -0.4654, -0.0918, -0.0807]) tensor([0.4576, 0.1383, 0.2009, 0.2032]) -Greedy action tensor([ 0.5133, -0.0451, 0.0677, -0.1855]) tensor([0.3690, 0.2111, 0.2363, 0.1835]) -Greedy action tensor([ 0.6557, -0.3151, -0.0270, -0.1844]) tensor([0.4318, 0.1636, 0.2182, 0.1864]) -Greedy action tensor([ 0.4883, -0.3269, 0.2076, -0.5027]) tensor([0.3893, 0.1723, 0.2940, 0.1445]) -Greedy action tensor([ 0.3091, -0.1037, -0.1056, -0.3048]) tensor([0.3492, 0.2311, 0.2307, 0.1890]) -Greedy action tensor([ 0.2902, 0.1844, 0.0203, -0.2462]) tensor([0.3079, 0.2770, 0.2351, 0.1801]) -Greedy action tensor([ 0.4360, -0.4851, -0.0118, -0.2571]) tensor([0.3942, 0.1569, 0.2519, 0.1971]) -Greedy action tensor([ 0.8894, -0.4268, -0.0963, -0.3987]) tensor([0.5216, 0.1399, 0.1947, 0.1439]) -Greedy action tensor([ 0.6139, -0.1582, -0.0604, -0.0547]) tensor([0.4026, 0.1860, 0.2051, 0.2063]) -Greedy action tensor([ 0.8142, -0.4663, -0.0126, -0.3555]) tensor([0.4936, 0.1372, 0.2159, 0.1532]) -Greedy action tensor([ 0.4821, -0.6017, -0.2179, -0.0523]) tensor([0.4131, 0.1397, 0.2051, 0.2421]) -Greedy action tensor([ 0.9061, -0.7501, -0.0280, -0.4402]) tensor([0.5423, 0.1035, 0.2131, 0.1411]) -Greedy action tensor([ 0.5226, -0.0878, 0.2443, -0.3954]) tensor([0.3704, 0.2012, 0.2804, 0.1479]) -Greedy action tensor([ 0.8434, -0.4553, -0.0059, -0.2861]) tensor([0.4941, 0.1348, 0.2113, 0.1597]) -Greedy action tensor([ 0.5840, -0.3958, -0.0133, -0.7767]) tensor([0.4583, 0.1720, 0.2522, 0.1175]) -Greedy action tensor([ 0.4100, -0.3079, -0.0728, -0.1976]) tensor([0.3774, 0.1841, 0.2329, 0.2056]) -Greedy action tensor([ 0.8624, -0.9229, -0.0237, -0.5954]) tensor([0.5516, 0.0925, 0.2274, 0.1284]) -Greedy action tensor([ 0.5534, -0.1761, 0.0253, -0.1777]) tensor([0.3917, 0.1888, 0.2310, 0.1885]) -Greedy action tensor([ 0.3547, 0.0961, -0.0512, -0.3376]) tensor([0.3403, 0.2627, 0.2267, 0.1703]) -Greedy action tensor([ 0.6763, -0.2875, 0.0090, -0.3942]) tensor([0.4470, 0.1705, 0.2293, 0.1532]) -Greedy action tensor([ 0.6283, -0.4633, -0.0825, -0.5434]) tensor([0.4680, 0.1571, 0.2299, 0.1450]) -Greedy action tensor([ 0.8810, -0.7771, -0.0263, -0.4178]) tensor([0.5356, 0.1020, 0.2162, 0.1461]) -Greedy action tensor([ 0.6937, -0.2986, 0.0137, -0.1118]) tensor([0.4303, 0.1595, 0.2180, 0.1923]) -Greedy action tensor([ 0.7717, -0.4195, -0.1725, -0.1499]) tensor([0.4783, 0.1453, 0.1860, 0.1903]) -Greedy action tensor([ 0.6315, -0.5920, -0.0164, -0.3251]) tensor([0.4542, 0.1336, 0.2376, 0.1745]) -Greedy action tensor([ 0.6218, 0.1595, 0.1281, -0.4348]) tensor([0.3864, 0.2434, 0.2359, 0.1343]) -Greedy action tensor([ 0.6986, -0.4934, -0.0566, -0.3059]) tensor([0.4673, 0.1419, 0.2196, 0.1712]) -Greedy action tensor([ 0.1312, -0.2839, -0.2024, -0.0542]) tensor([0.3118, 0.2059, 0.2233, 0.2590]) -Greedy action tensor([ 0.6795, -0.7761, 0.1913, -0.5662]) tensor([0.4685, 0.1093, 0.2875, 0.1348]) -Greedy action tensor([ 0.5859, -0.3295, -0.0035, -0.2083]) tensor([0.4155, 0.1663, 0.2304, 0.1878]) -Greedy action tensor([ 0.4758, -0.2430, 0.0160, -0.1016]) tensor([0.3731, 0.1818, 0.2356, 0.2094]) -Greedy action tensor([ 0.4000, -0.1895, 0.1348, -0.4071]) tensor([0.3613, 0.2004, 0.2771, 0.1612]) -Greedy action tensor([ 0.3958, 0.0415, 0.0122, -0.1137]) tensor([0.3351, 0.2351, 0.2284, 0.2014]) -Greedy action tensor([ 0.1473, -0.1681, -0.0433, -0.2527]) tensor([0.3100, 0.2261, 0.2562, 0.2078]) -Greedy action tensor([ 0.2429, 0.2221, -0.2974, 0.0979]) tensor([0.2918, 0.2858, 0.1700, 0.2524]) -Greedy action tensor([ 0.4951, -0.2838, -0.0279, -0.5005]) tensor([0.4130, 0.1896, 0.2448, 0.1526]) -Greedy action tensor([ 0.5011, -0.3758, 0.1170, -0.5321]) tensor([0.4077, 0.1696, 0.2777, 0.1451]) -Greedy action tensor([ 0.8075, -0.2067, -0.0409, -0.1997]) tensor([0.4638, 0.1682, 0.1986, 0.1694]) -Greedy action tensor([ 0.9225, -0.3782, -0.1433, -0.4855]) tensor([0.5372, 0.1463, 0.1851, 0.1314]) -Greedy action tensor([ 0.6901, -0.2703, -0.0862, -0.1159]) tensor([0.4368, 0.1672, 0.2010, 0.1951]) -Greedy action tensor([ 0.9927, -0.4911, 1.2751, -0.2160]) tensor([0.3507, 0.0795, 0.4651, 0.1047]) -Greedy action tensor([ 1.1915, -0.9710, 1.3331, 1.4673]) tensor([0.2790, 0.0321, 0.3214, 0.3675]) -Greedy action tensor([ 0.1091, -1.4626, 1.0391, 0.6471]) tensor([0.1833, 0.0381, 0.4646, 0.3140]) -Greedy action tensor([ 0.2970, -1.1152, -0.2219, -0.2274]) tensor([0.4114, 0.1002, 0.2449, 0.2435]) -Greedy action tensor([-0.1540, -0.1377, -0.0174, 0.2605]) tensor([0.2138, 0.2174, 0.2451, 0.3237]) -Greedy action tensor([-0.7199, -0.6554, 1.2764, -0.2804]) tensor([0.0911, 0.0971, 0.6705, 0.1413]) -Greedy action tensor([-0.3757, -0.0105, 0.7123, 0.1175]) tensor([0.1419, 0.2045, 0.4212, 0.2324]) -Greedy action tensor([ 0.8566, -1.2330, 1.7784, -0.4548]) tensor([0.2559, 0.0317, 0.6434, 0.0690]) -Greedy action tensor([ 0.1362, -0.3189, 0.7292, 0.2678]) tensor([0.2181, 0.1384, 0.3947, 0.2488]) -Greedy action tensor([-4.3119e-01, -1.5736e-04, 1.3235e+00, -6.6889e-01]) tensor([0.1098, 0.1689, 0.6347, 0.0866]) -Greedy action tensor([ 1.0110, -1.0972, 0.7902, 0.4286]) tensor([0.4029, 0.0489, 0.3231, 0.2251]) -Greedy action tensor([-0.4179, 0.2731, 0.3979, -0.8543]) tensor([0.1694, 0.3381, 0.3830, 0.1095]) -Greedy action tensor([-0.5648, -1.4391, 0.7540, 0.6205]) tensor([0.1187, 0.0495, 0.4436, 0.3882]) -Greedy action tensor([-0.2520, -0.8899, -0.4850, 0.3590]) tensor([0.2402, 0.1269, 0.1903, 0.4426]) -Greedy action tensor([-0.2561, -0.5255, -0.1238, 0.1297]) tensor([0.2285, 0.1746, 0.2608, 0.3361]) -Greedy action tensor([-0.0997, -0.1295, 0.8588, 0.1956]) tensor([0.1689, 0.1639, 0.4403, 0.2269]) -Greedy action tensor([ 1.4377, -1.4039, 1.9320, 0.4692]) tensor([0.3250, 0.0190, 0.5327, 0.1234]) -Greedy action tensor([ 0.8896, -1.3883, -0.7111, 0.5065]) tensor([0.5035, 0.0516, 0.1016, 0.3433]) -Greedy action tensor([ 0.3347, -2.0353, 0.5111, 0.3689]) tensor([0.3011, 0.0281, 0.3592, 0.3116]) -Greedy action tensor([-0.1364, 0.1952, -0.3468, 0.4284]) tensor([0.2015, 0.2807, 0.1633, 0.3545]) -Greedy action tensor([-1.8308, -1.0087, -0.0538, -0.3106]) tensor([0.0727, 0.1654, 0.4296, 0.3323]) -Greedy action tensor([ 0.8175, -0.9898, -0.4569, 0.1321]) tensor([0.5135, 0.0843, 0.1436, 0.2587]) -Greedy action tensor([-0.8437, 0.4311, -0.3037, 0.0140]) tensor([0.1156, 0.4135, 0.1984, 0.2725]) -Greedy action tensor([ 0.1898, -0.3907, 0.5145, 0.2482]) tensor([0.2498, 0.1398, 0.3456, 0.2648]) -Greedy action tensor([-0.2119, -1.1576, -0.2870, 0.1451]) tensor([0.2670, 0.1037, 0.2477, 0.3816]) -Greedy action tensor([-0.3916, -0.2649, 0.1657, 0.0401]) tensor([0.1845, 0.2094, 0.3221, 0.2841]) -Greedy action tensor([ 0.1868, -0.0091, 1.2943, 0.8038]) tensor([0.1492, 0.1227, 0.4516, 0.2765]) -Greedy action tensor([-0.4330, -1.1619, 0.6469, 0.1224]) tensor([0.1621, 0.0782, 0.4772, 0.2825]) -Greedy action tensor([ 1.1496, -1.5043, 0.6317, 1.1281]) tensor([0.3781, 0.0266, 0.2252, 0.3701]) -Greedy action tensor([ 0.2978, 0.1194, -0.0657, 1.0346]) tensor([0.2164, 0.1810, 0.1505, 0.4521]) -Greedy action tensor([ 0.7150, 1.0949, 0.4676, -0.8868]) tensor([0.2903, 0.4245, 0.2267, 0.0585]) -Greedy action tensor([ 0.1558, -1.2499, 0.5527, -0.1563]) tensor([0.2887, 0.0708, 0.4293, 0.2113]) -Greedy action tensor([ 0.4754, -0.2565, 1.1319, 0.3673]) tensor([0.2322, 0.1117, 0.4477, 0.2084]) -Greedy action tensor([ 0.7148, -0.1995, 1.1316, -0.5189]) tensor([0.3116, 0.1249, 0.4727, 0.0907]) -Greedy action tensor([-0.6933, -1.4033, 0.2073, 0.0682]) tensor([0.1641, 0.0807, 0.4039, 0.3514]) -Greedy action tensor([-0.0559, -0.2238, -0.4993, 0.9386]) tensor([0.1927, 0.1629, 0.1237, 0.5208]) -Greedy action tensor([ 1.0858, 0.6463, -0.2100, 0.8875]) tensor([0.3652, 0.2353, 0.1000, 0.2995]) -Greedy action tensor([ 1.6330, -0.1432, 0.9721, 0.0879]) tensor([0.5266, 0.0891, 0.2719, 0.1123]) -Greedy action tensor([-0.9087, -1.2315, 0.4081, -0.7369]) tensor([0.1505, 0.1090, 0.5617, 0.1787]) -Greedy action tensor([ 0.6264, 0.4521, -0.9050, 1.2998]) tensor([0.2489, 0.2091, 0.0538, 0.4881]) -Greedy action tensor([ 1.6609, -0.2836, 1.7639, 0.5517]) tensor([0.3874, 0.0554, 0.4294, 0.1278]) -Greedy action tensor([0.9799, 1.3306, 0.2145, 0.4993]) tensor([0.2854, 0.4053, 0.1328, 0.1765]) -Greedy action tensor([ 0.3903, -1.2605, 0.3349, 0.1831]) tensor([0.3389, 0.0650, 0.3206, 0.2755]) -Greedy action tensor([-0.4701, -1.6236, 0.5518, 1.2823]) tensor([0.1014, 0.0320, 0.2817, 0.5849]) -Greedy action tensor([-0.7235, 0.5615, 0.3915, -0.3297]) tensor([0.1093, 0.3952, 0.3334, 0.1621]) -Greedy action tensor([-1.0843, -0.9633, -0.6977, -0.0871]) tensor([0.1584, 0.1788, 0.2332, 0.4295]) -Greedy action tensor([ 0.8045, -1.0976, 0.5740, 0.6547]) tensor([0.3566, 0.0532, 0.2832, 0.3070]) -Greedy action tensor([-0.0137, -1.3906, -0.7851, 0.0382]) tensor([0.3613, 0.0912, 0.1670, 0.3805]) -Greedy action tensor([-0.1435, 0.3225, -0.6005, 0.9072]) tensor([0.1643, 0.2618, 0.1040, 0.4698]) -Greedy action tensor([ 0.5898, 0.1532, -0.7074, -0.1550]) tensor([0.4177, 0.2699, 0.1141, 0.1983]) -Greedy action tensor([-0.8316, -0.7086, 0.4851, -1.3515]) tensor([0.1549, 0.1752, 0.5779, 0.0921]) -Greedy action tensor([ 0.5399, -1.1263, 1.4827, 0.1111]) tensor([0.2269, 0.0429, 0.5825, 0.1478]) -Greedy action tensor([ 0.1669, 0.5106, 1.0907, -0.0513]) tensor([0.1744, 0.2460, 0.4394, 0.1402]) -Greedy action tensor([ 0.2432, -1.6818, 1.0997, -0.4256]) tensor([0.2492, 0.0363, 0.5868, 0.1277]) -Greedy action tensor([ 1.4072, -1.0790, 0.8941, 0.2430]) tensor([0.5015, 0.0417, 0.3002, 0.1566]) -Greedy action tensor([-0.1669, -1.1116, 0.5737, 1.1295]) tensor([0.1400, 0.0544, 0.2936, 0.5119]) -Greedy action tensor([-0.0307, -0.8659, 1.3807, 0.2159]) tensor([0.1467, 0.0637, 0.6018, 0.1878]) -Greedy action tensor([ 0.6221, -0.1380, 0.2565, 0.5073]) tensor([0.3275, 0.1532, 0.2273, 0.2920]) -Greedy action tensor([ 1.0173, -0.0855, -0.0971, 1.5685]) tensor([0.2945, 0.0978, 0.0966, 0.5111]) -Greedy action tensor([ 1.1486, 0.7944, -0.5314, 0.6804]) tensor([0.3977, 0.2791, 0.0741, 0.2490]) -Greedy action tensor([-0.1688, 0.0759, -1.0203, 0.4631]) tensor([0.2181, 0.2786, 0.0931, 0.4103]) -Greedy action tensor([ 2.4835, -0.5853, 0.7674, 0.7346]) tensor([0.7142, 0.0332, 0.1284, 0.1242]) -Greedy action tensor([ 0.6025, -0.5719, 0.7983, 0.9064]) tensor([0.2577, 0.0796, 0.3134, 0.3492]) -Greedy action tensor([0.6235, 0.1754, 0.0314, 0.3034]) tensor([0.3427, 0.2189, 0.1896, 0.2488]) -Greedy action tensor([-0.1735, -0.3297, -0.4024, -0.9226]) tensor([0.3202, 0.2738, 0.2546, 0.1514]) -Greedy action tensor([-0.9072, -0.3960, -0.6847, -0.5914]) tensor([0.1891, 0.3153, 0.2362, 0.2593]) -Greedy action tensor([-0.5755, -0.9049, 2.2055, 0.3754]) tensor([0.0489, 0.0352, 0.7893, 0.1266]) -Greedy action tensor([ 0.5660, -0.0963, 0.1786, -1.0780]) tensor([0.4188, 0.2160, 0.2843, 0.0809]) -Greedy action tensor([-1.3234, -1.1862, 0.4347, -1.0935]) tensor([0.1086, 0.1246, 0.6301, 0.1367]) -Greedy action tensor([ 1.3067, -1.2591, 0.1458, 1.3095]) tensor([0.4179, 0.0321, 0.1309, 0.4191]) -Greedy action tensor([-1.7253, -0.9572, 0.8845, 0.5055]) tensor([0.0384, 0.0827, 0.5217, 0.3572]) -Greedy action tensor([ 6.8523e-01, -1.4246e+00, -1.7989e-04, 1.2420e+00]) tensor([0.2967, 0.0360, 0.1495, 0.5178]) -Greedy action tensor([ 0.8945, 0.3655, -0.2453, 1.4716]) tensor([0.2710, 0.1597, 0.0867, 0.4826]) -Greedy action tensor([0.0992, 0.0566, 1.2365, 1.2776]) tensor([0.1201, 0.1151, 0.3745, 0.3903]) -Greedy action tensor([ 1.2108, -0.5878, 0.1383, 0.5802]) tensor([0.4902, 0.0811, 0.1677, 0.2609]) -Greedy action tensor([ 0.1584, 0.6921, 0.0878, -0.0407]) tensor([0.2244, 0.3826, 0.2091, 0.1839]) -Greedy action tensor([ 0.6672, -2.7296, 0.3102, 0.3252]) tensor([0.4092, 0.0137, 0.2864, 0.2907]) -Greedy action tensor([ 1.7589, -1.1079, 0.2906, 0.5398]) tensor([0.6318, 0.0359, 0.1455, 0.1867]) -Greedy action tensor([-0.0560, -1.2735, 1.8525, -0.0342]) tensor([0.1104, 0.0327, 0.7442, 0.1128]) -Greedy action tensor([-0.2794, -1.3394, 0.1328, 0.4259]) tensor([0.2049, 0.0710, 0.3094, 0.4147]) -Greedy action tensor([-0.4124, -0.9375, -0.6745, 0.0052]) tensor([0.2578, 0.1525, 0.1983, 0.3914]) -Greedy action tensor([-1.8730, -0.4599, 0.6293, -0.1631]) tensor([0.0438, 0.1798, 0.5344, 0.2420]) -Greedy action tensor([-1.3625, -0.2598, 0.4491, 0.5216]) tensor([0.0598, 0.1802, 0.3662, 0.3937]) -Greedy action tensor([-1.8075, -0.4454, 0.5917, -0.1105]) tensor([0.0468, 0.1827, 0.5153, 0.2553]) -Greedy action tensor([-1.7826, -0.4864, 0.5761, -0.0719]) tensor([0.0482, 0.1760, 0.5094, 0.2664]) -Greedy action tensor([-1.8405, -0.3889, 0.6163, -0.1668]) tensor([0.0449, 0.1917, 0.5239, 0.2394]) -Greedy action tensor([-1.6162, -0.2891, 0.5709, 0.1002]) tensor([0.0520, 0.1959, 0.4630, 0.2892]) -Greedy action tensor([-1.1884, -0.4550, 0.7608, 1.1921]) tensor([0.0478, 0.0996, 0.3358, 0.5168]) -Greedy action tensor([-0.8371, 0.0744, 0.1887, -0.1332]) tensor([0.1205, 0.2998, 0.3361, 0.2436]) -Greedy action tensor([-1.8399, -0.4856, 0.6043, -0.1242]) tensor([0.0455, 0.1765, 0.5248, 0.2532]) -Greedy action tensor([-1.8047, -0.3812, 0.6485, -0.0995]) tensor([0.0449, 0.1863, 0.5218, 0.2470]) -Greedy action tensor([-1.8098, -0.3086, 0.6302, -0.0734]) tensor([0.0442, 0.1982, 0.5068, 0.2508]) -Greedy action tensor([-1.5093, 0.2128, 0.4397, 0.3512]) tensor([0.0499, 0.2792, 0.3503, 0.3206]) -Greedy action tensor([-1.9119, -0.4655, 0.6521, -0.1576]) tensor([0.0416, 0.1769, 0.5408, 0.2407]) -Greedy action tensor([-1.8718, -0.4230, 0.6599, -0.0675]) tensor([0.0418, 0.1781, 0.5260, 0.2541]) -Greedy action tensor([-1.9087, -0.4154, 0.6434, -0.1570]) tensor([0.0416, 0.1851, 0.5337, 0.2397]) -Greedy action tensor([-0.8974, -0.1230, -0.0056, 0.6836]) tensor([0.0955, 0.2072, 0.2330, 0.4642]) -Greedy action tensor([-0.0739, 1.2216, 0.0433, 0.5180]) tensor([0.1318, 0.4816, 0.1482, 0.2383]) -Greedy action tensor([-1.8779, -0.3398, 0.6316, -0.1075]) tensor([0.0420, 0.1954, 0.5161, 0.2465]) -Greedy action tensor([-1.8770, -0.4642, 0.6260, -0.1346]) tensor([0.0434, 0.1783, 0.5304, 0.2479]) -Greedy action tensor([-1.9093, -0.4384, 0.6472, -0.1590]) tensor([0.0417, 0.1814, 0.5371, 0.2398]) -Greedy action tensor([-1.8597, -0.4623, 0.6442, -0.0966]) tensor([0.0433, 0.1751, 0.5293, 0.2524]) -Greedy action tensor([-1.8569, -0.4564, 0.6206, -0.1342]) tensor([0.0443, 0.1798, 0.5278, 0.2481]) -Greedy action tensor([-1.7452, -0.5077, 0.6696, 0.0759]) tensor([0.0458, 0.1580, 0.5129, 0.2833]) -Greedy action tensor([-1.9164, -0.3720, 0.6438, -0.1563]) tensor([0.0409, 0.1917, 0.5295, 0.2379]) -Greedy action tensor([-1.6762, -0.0896, 0.4852, 0.0471]) tensor([0.0496, 0.2422, 0.4304, 0.2777]) -Greedy action tensor([-1.8749, -0.4480, 0.6325, -0.1394]) tensor([0.0433, 0.1803, 0.5310, 0.2454]) -Greedy action tensor([-1.8961, -0.3591, 0.6413, -0.1531]) tensor([0.0416, 0.1937, 0.5267, 0.2380]) -Greedy action tensor([-1.9142, -0.4567, 0.6701, -0.1490]) tensor([0.0410, 0.1761, 0.5434, 0.2395]) -Greedy action tensor([-1.8929, -0.2649, 0.6021, -0.1426]) tensor([0.0417, 0.2125, 0.5057, 0.2401]) -Greedy action tensor([-1.9114, -0.4367, 0.6471, -0.1670]) tensor([0.0417, 0.1820, 0.5380, 0.2383]) -Greedy action tensor([-1.8605, -0.3342, 0.6102, -0.1288]) tensor([0.0433, 0.1993, 0.5125, 0.2448]) -Greedy action tensor([-1.9405, -0.4514, 0.6630, -0.1777]) tensor([0.0404, 0.1789, 0.5454, 0.2353]) -Greedy action tensor([-1.5855, -0.4364, 0.9257, 0.7402]) tensor([0.0374, 0.1181, 0.4613, 0.3832]) -Greedy action tensor([-1.6020, -0.5414, 0.4782, 0.0309]) tensor([0.0588, 0.1698, 0.4706, 0.3009]) -Greedy action tensor([-1.1698, -0.5225, 0.3465, 0.4299]) tensor([0.0805, 0.1539, 0.3669, 0.3988]) -Greedy action tensor([-1.1331, 0.2445, 0.2836, -0.0601]) tensor([0.0832, 0.3301, 0.3433, 0.2434]) -Greedy action tensor([-0.4878, 0.2713, 0.1170, -0.0881]) tensor([0.1548, 0.3308, 0.2835, 0.2309]) -Greedy action tensor([-1.7371, 0.0228, 0.5076, 0.0135]) tensor([0.0454, 0.2641, 0.4288, 0.2616]) -Greedy action tensor([-1.7694, -0.3600, 0.5527, -0.0680]) tensor([0.0481, 0.1971, 0.4909, 0.2639]) -Greedy action tensor([-1.7961, -0.4356, 0.5893, -0.1112]) tensor([0.0473, 0.1843, 0.5136, 0.2549]) -Greedy action tensor([-1.8884, -0.3890, 0.6347, -0.1402]) tensor([0.0422, 0.1891, 0.5262, 0.2425]) -Greedy action tensor([-1.8438, -0.4619, 0.6187, -0.1520]) tensor([0.0452, 0.1798, 0.5299, 0.2452]) -Greedy action tensor([-1.7889, -0.4069, 0.6377, -0.2659]) tensor([0.0479, 0.1907, 0.5419, 0.2195]) -Greedy action tensor([-1.8456, -0.4427, 0.6142, -0.1287]) tensor([0.0448, 0.1821, 0.5239, 0.2492]) -Greedy action tensor([-1.7312, -0.3181, 0.7598, 0.1565]) tensor([0.0420, 0.1727, 0.5076, 0.2777]) -Greedy action tensor([-1.9083, -0.4722, 0.6793, -0.1444]) tensor([0.0411, 0.1728, 0.5464, 0.2398]) -Greedy action tensor([-1.8141, -0.4888, 0.7630, 0.1378]) tensor([0.0401, 0.1507, 0.5271, 0.2821]) -Greedy action tensor([-1.8220, -0.4807, 0.6305, -0.0536]) tensor([0.0448, 0.1715, 0.5209, 0.2628]) -Greedy action tensor([-0.9696, -0.5894, 0.2112, 0.3249]) tensor([0.1067, 0.1561, 0.3476, 0.3895]) -Greedy action tensor([-1.2051, -0.5701, 0.5685, 0.5914]) tensor([0.0675, 0.1274, 0.3979, 0.4071]) -Greedy action tensor([-1.6277, -0.6286, 0.5761, -0.2474]) tensor([0.0597, 0.1621, 0.5408, 0.2374]) -Greedy action tensor([-0.8670, 0.1684, 0.1927, -0.4667]) tensor([0.1220, 0.3437, 0.3522, 0.1821]) -Greedy action tensor([-1.7034, -0.5121, 0.5417, -0.0091]) tensor([0.0522, 0.1716, 0.4924, 0.2838]) -Greedy action tensor([-1.8645, -0.3493, 0.6118, -0.1241]) tensor([0.0432, 0.1966, 0.5140, 0.2462]) -Greedy action tensor([-1.8538, -0.4008, 0.6158, -0.1396]) tensor([0.0442, 0.1888, 0.5218, 0.2452]) -Greedy action tensor([-0.8833, 0.0635, 0.2338, 0.5473]) tensor([0.0925, 0.2383, 0.2826, 0.3866]) -Greedy action tensor([-1.8881, -0.4488, 0.6460, -0.1496]) tensor([0.0425, 0.1794, 0.5361, 0.2420]) -Greedy action tensor([-1.7336, -0.4190, 0.5579, -0.0598]) tensor([0.0501, 0.1867, 0.4958, 0.2673]) -Greedy action tensor([-1.9364, -0.4357, 0.6625, -0.1764]) tensor([0.0404, 0.1812, 0.5435, 0.2349]) -Greedy action tensor([-1.8948, -0.3951, 0.6286, -0.1535]) tensor([0.0423, 0.1894, 0.5272, 0.2411]) -Greedy action tensor([-0.9179, 0.1900, 0.5716, 1.2955]) tensor([0.0568, 0.1720, 0.2518, 0.5194]) -Greedy action tensor([-1.8942, -0.4512, 0.6807, -0.1206]) tensor([0.0412, 0.1745, 0.5413, 0.2429]) -Greedy action tensor([-1.8378, -0.3944, 0.6071, -0.1393]) tensor([0.0450, 0.1905, 0.5186, 0.2459]) -Greedy action tensor([-1.6145, -0.4971, 0.5917, -0.2340]) tensor([0.0584, 0.1786, 0.5306, 0.2324]) -Greedy action tensor([-1.8852, -0.4623, 0.6468, -0.1378]) tensor([0.0426, 0.1768, 0.5360, 0.2446]) -Greedy action tensor([-1.8769, -0.3855, 0.6362, -0.1330]) tensor([0.0425, 0.1890, 0.5251, 0.2433]) -Greedy action tensor([-1.6095, 0.3702, 0.3812, 0.1083]) tensor([0.0473, 0.3426, 0.3464, 0.2637]) -Greedy action tensor([-1.0456, 0.2093, 0.4113, -0.5830]) tensor([0.0963, 0.3376, 0.4132, 0.1529]) -Greedy action tensor([-1.8934, -0.3300, 0.6295, -0.1340]) tensor([0.0416, 0.1986, 0.5183, 0.2415]) -Greedy action tensor([-1.7553, -0.3527, 0.5528, -0.0719]) tensor([0.0488, 0.1983, 0.4904, 0.2626]) -Greedy action tensor([-1.2868, -0.5633, 0.3357, 0.1622]) tensor([0.0807, 0.1665, 0.4090, 0.3438]) -Greedy action tensor([-0.0068, 0.0138, 1.0279, 1.7571]) tensor([0.0937, 0.0957, 0.2638, 0.5469]) -Greedy action tensor([-1.9191, -0.4703, 0.6565, -0.1679]) tensor([0.0414, 0.1763, 0.5439, 0.2385]) -Greedy action tensor([-1.9459, -0.4527, 0.6667, -0.1815]) tensor([0.0401, 0.1786, 0.5470, 0.2342]) -Greedy action tensor([-1.8279, -0.4709, 0.7151, 0.0619]) tensor([0.0413, 0.1604, 0.5251, 0.2732]) -Greedy action tensor([-1.7240, -0.9183, 0.8217, 0.4342]) tensor([0.0406, 0.0908, 0.5174, 0.3512]) -Greedy action tensor([-1.8285, -0.4803, 0.6529, -0.0402]) tensor([0.0439, 0.1690, 0.5248, 0.2624]) -Greedy action tensor([-0.6097, -0.5269, 0.1745, 0.2610]) tensor([0.1500, 0.1630, 0.3286, 0.3583]) -Greedy action tensor([-1.8169, -0.4373, 0.6007, -0.1125]) tensor([0.0461, 0.1832, 0.5172, 0.2535]) -Greedy action tensor([-1.8673, -0.3246, 0.6125, -0.1156]) tensor([0.0428, 0.2000, 0.5106, 0.2465]) -Greedy action tensor([-1.6499, -0.5050, 0.6899, 0.2944]) tensor([0.0465, 0.1461, 0.4825, 0.3249]) -Greedy action tensor([ 2.0584, -0.7451, -0.2253, 0.6247]) tensor([0.7138, 0.0433, 0.0727, 0.1702]) -Greedy action tensor([ 1.7059, -1.1108, -0.0170, 0.3114]) tensor([0.6728, 0.0402, 0.1201, 0.1668]) -Greedy action tensor([ 0.5636, -0.3800, 0.0229, 0.1283]) tensor([0.3819, 0.1486, 0.2224, 0.2471]) -Greedy action tensor([ 1.4509, -0.9884, -0.6120, 1.1969]) tensor([0.5025, 0.0438, 0.0639, 0.3898]) -Greedy action tensor([ 1.9323, 0.3328, -0.3152, 0.5888]) tensor([0.6375, 0.1288, 0.0674, 0.1663]) -Greedy action tensor([ 1.6185, -0.9201, -0.0807, 0.3031]) tensor([0.6535, 0.0516, 0.1195, 0.1754]) -Greedy action tensor([ 1.2262, -0.6714, -0.2605, 0.4540]) tensor([0.5441, 0.0816, 0.1230, 0.2514]) -Greedy action tensor([ 0.8819, -0.1165, -0.0669, 0.3359]) tensor([0.4283, 0.1578, 0.1658, 0.2481]) -Greedy action tensor([ 1.6550, -0.7579, -0.3039, 0.3157]) tensor([0.6700, 0.0600, 0.0945, 0.1755]) -Greedy action tensor([ 0.7665, -0.1752, -0.0037, -0.1363]) tensor([0.4428, 0.1727, 0.2050, 0.1795]) -Greedy action tensor([ 1.7451, -0.7399, -0.3871, 0.4475]) tensor([0.6779, 0.0565, 0.0804, 0.1852]) -Greedy action tensor([ 1.7509, -0.3918, -0.2165, 0.9364]) tensor([0.5882, 0.0690, 0.0822, 0.2605]) -Greedy action tensor([ 1.0693, -0.5053, -0.5528, 0.4924]) tensor([0.5086, 0.1053, 0.1004, 0.2856]) -Greedy action tensor([ 1.4146, -0.5178, -0.3538, 0.1611]) tensor([0.6246, 0.0904, 0.1066, 0.1783]) -Greedy action tensor([ 1.4905, -0.4107, -0.2697, 0.4661]) tensor([0.5951, 0.0889, 0.1024, 0.2137]) -Greedy action tensor([ 0.8120, -0.1077, 0.1382, 0.0188]) tensor([0.4236, 0.1689, 0.2159, 0.1916]) -Greedy action tensor([ 1.7358, -0.5282, -0.2988, 0.3765]) tensor([0.6705, 0.0697, 0.0877, 0.1722]) -Greedy action tensor([ 1.7937, -0.5045, -0.5513, 0.3486]) tensor([0.6983, 0.0701, 0.0669, 0.1646]) -Greedy action tensor([ 1.3394, 0.1794, -0.9412, 0.5174]) tensor([0.5390, 0.1690, 0.0551, 0.2369]) -Greedy action tensor([ 1.3678, -0.5176, -0.1025, 0.6516]) tensor([0.5347, 0.0811, 0.1229, 0.2613]) -Greedy action tensor([ 1.3402, -0.1723, -0.2703, 0.6216]) tensor([0.5242, 0.1155, 0.1047, 0.2555]) -Greedy action tensor([ 1.3565, -0.2599, -0.0764, -0.0298]) tensor([0.5927, 0.1177, 0.1414, 0.1482]) -Greedy action tensor([ 1.0377, -0.0188, -0.2544, 0.4153]) tensor([0.4632, 0.1610, 0.1272, 0.2486]) -Greedy action tensor([ 1.0885, -0.4217, 0.0088, -0.1673]) tensor([0.5419, 0.1197, 0.1841, 0.1544]) -Greedy action tensor([ 1.6869, -0.1088, -0.4934, 0.9456]) tensor([0.5697, 0.0946, 0.0644, 0.2714]) -Greedy action tensor([ 2.1885, -0.6322, -0.1723, 0.9487]) tensor([0.6928, 0.0413, 0.0654, 0.2005]) -Greedy action tensor([ 1.3185, -0.1641, -0.4940, 0.6974]) tensor([0.5188, 0.1178, 0.0847, 0.2788]) -Greedy action tensor([ 1.0163, -0.2540, -0.5939, 0.8215]) tensor([0.4341, 0.1219, 0.0868, 0.3573]) -Greedy action tensor([ 1.2635, -0.5236, -0.3557, 0.5921]) tensor([0.5329, 0.0892, 0.1055, 0.2723]) -Greedy action tensor([ 0.7215, -0.3624, 0.1929, 0.0331]) tensor([0.4115, 0.1392, 0.2426, 0.2067]) -Greedy action tensor([ 1.6534, -0.5740, -0.3587, -0.0423]) tensor([0.7018, 0.0757, 0.0938, 0.1288]) -Greedy action tensor([ 1.2793, -0.6579, -0.2580, 0.3368]) tensor([0.5718, 0.0824, 0.1229, 0.2228]) -Greedy action tensor([ 1.8546, -0.1390, -0.0845, -0.0983]) tensor([0.7033, 0.0958, 0.1012, 0.0998]) -Greedy action tensor([1.4125, 0.0024, 0.0729, 0.1375]) tensor([0.5601, 0.1367, 0.1467, 0.1565]) -Greedy action tensor([ 1.7619, -0.7382, -0.5442, 0.0450]) tensor([0.7346, 0.0603, 0.0732, 0.1319]) -Greedy action tensor([ 0.8427, 0.0676, -0.2426, 0.4299]) tensor([0.4065, 0.1872, 0.1373, 0.2690]) -Greedy action tensor([ 1.7957, -0.4091, -0.2820, 0.6809]) tensor([0.6396, 0.0705, 0.0801, 0.2098]) -Greedy action tensor([ 1.2236, -0.7184, -0.1057, 0.4833]) tensor([0.5305, 0.0761, 0.1404, 0.2530]) -Greedy action tensor([ 1.2521, -0.0181, -0.2099, 0.2630]) tensor([0.5307, 0.1490, 0.1230, 0.1974]) -Greedy action tensor([ 0.9408, -0.1126, -0.2702, 0.1252]) tensor([0.4787, 0.1669, 0.1426, 0.2118]) -Greedy action tensor([ 0.9133, -0.0260, -0.7649, 0.2906]) tensor([0.4730, 0.1849, 0.0883, 0.2538]) -Greedy action tensor([ 1.0496, -0.7567, -0.1174, 0.6384]) tensor([0.4676, 0.0768, 0.1456, 0.3100]) -Greedy action tensor([ 2.0734, -0.6344, -0.5954, 0.8477]) tensor([0.6995, 0.0466, 0.0485, 0.2053]) -Greedy action tensor([ 1.8598, -0.1867, -0.0200, 0.2051]) tensor([0.6789, 0.0877, 0.1036, 0.1298]) -Greedy action tensor([ 1.5310, -0.2040, -0.5387, 0.0060]) tensor([0.6578, 0.1160, 0.0830, 0.1431]) -Greedy action tensor([ 1.0881, -0.1092, -0.6593, 0.1274]) tensor([0.5380, 0.1625, 0.0937, 0.2058]) -Greedy action tensor([ 1.6005, -0.4201, -0.1665, 0.3979]) tensor([0.6235, 0.0827, 0.1065, 0.1873]) -Greedy action tensor([ 2.6287, -1.4654, -0.3579, 1.2686]) tensor([0.7554, 0.0126, 0.0381, 0.1939]) -Greedy action tensor([ 1.5792, -0.8303, -0.0981, 0.3184]) tensor([0.6410, 0.0576, 0.1198, 0.1817]) -Greedy action tensor([ 1.7078, -0.6658, -0.1184, 0.1705]) tensor([0.6807, 0.0634, 0.1096, 0.1463]) -Greedy action tensor([ 1.7798, -0.6488, -0.3999, 0.6715]) tensor([0.6530, 0.0576, 0.0738, 0.2156]) -Greedy action tensor([ 2.8568, -1.4611, -0.5172, 0.9520]) tensor([0.8358, 0.0111, 0.0286, 0.1244]) -Greedy action tensor([ 1.2176, -0.5760, -0.1670, 0.6401]) tensor([0.5056, 0.0841, 0.1266, 0.2837]) -Greedy action tensor([ 1.3510, -0.4896, -0.3436, 0.1944]) tensor([0.6035, 0.0958, 0.1109, 0.1898]) -Greedy action tensor([ 0.4509, -0.3164, -0.0318, -0.4198]) tensor([0.4000, 0.1857, 0.2468, 0.1675]) -Greedy action tensor([ 1.5395, -0.0528, -0.7983, 0.4799]) tensor([0.6073, 0.1236, 0.0586, 0.2105]) -Greedy action tensor([ 1.3548, -0.1851, -0.6830, 0.3103]) tensor([0.5894, 0.1264, 0.0768, 0.2074]) -Greedy action tensor([ 1.4859, -0.7304, -0.3689, 0.4903]) tensor([0.6116, 0.0667, 0.0957, 0.2260]) -Greedy action tensor([ 0.8310, -0.7041, -0.7587, 0.6090]) tensor([0.4504, 0.0970, 0.0919, 0.3607]) -Greedy action tensor([ 0.5395, -0.0303, -0.3495, 0.2337]) tensor([0.3686, 0.2085, 0.1515, 0.2715]) -Greedy action tensor([ 1.5550, -0.3182, -0.2445, 0.5142]) tensor([0.5980, 0.0919, 0.0989, 0.2112]) -Greedy action tensor([ 0.9141, -0.6929, -0.0504, 0.2396]) tensor([0.4782, 0.0959, 0.1823, 0.2436]) -Greedy action tensor([ 1.4623, -0.7813, -0.2298, 0.5700]) tensor([0.5883, 0.0624, 0.1083, 0.2410]) -Greedy action tensor([ 1.2025, -0.4664, -0.1865, 0.2164]) tensor([0.5522, 0.1041, 0.1377, 0.2060]) -Greedy action tensor([ 1.2993, -0.7596, -0.0444, 0.3909]) tensor([0.5581, 0.0712, 0.1456, 0.2250]) -Greedy action tensor([ 1.4296, -0.4374, -0.4165, 0.4675]) tensor([0.5901, 0.0912, 0.0932, 0.2255]) -Greedy action tensor([ 1.5341, -0.2957, -0.0955, 0.4279]) tensor([0.5927, 0.0951, 0.1162, 0.1961]) -Greedy action tensor([ 1.2945, -0.1406, -0.3211, 0.0660]) tensor([0.5782, 0.1377, 0.1149, 0.1692]) -Greedy action tensor([ 1.4904, -0.5963, -0.3963, 0.5225]) tensor([0.6040, 0.0750, 0.0916, 0.2294]) -Greedy action tensor([ 1.3803, -0.5521, -0.3268, 0.4977]) tensor([0.5748, 0.0832, 0.1042, 0.2378]) -Greedy action tensor([ 1.0964, -0.2056, -0.5379, 0.5246]) tensor([0.4922, 0.1339, 0.0960, 0.2779]) -Greedy action tensor([ 1.0705, -0.3370, -0.2268, 0.3126]) tensor([0.5033, 0.1232, 0.1375, 0.2359]) -Greedy action tensor([ 1.5663, -0.3153, -0.7520, 0.6830]) tensor([0.6009, 0.0915, 0.0592, 0.2484]) -Greedy action tensor([ 1.5222, -0.7059, 0.0128, 0.1855]) tensor([0.6283, 0.0677, 0.1389, 0.1651]) -Greedy action tensor([ 1.3695, -0.4788, -0.2153, 0.1104]) tensor([0.6074, 0.0957, 0.1245, 0.1724]) -Greedy action tensor([ 1.4059, -0.5690, -0.5440, 0.9918]) tensor([0.5149, 0.0715, 0.0733, 0.3403]) -Greedy action tensor([ 1.6880, -0.7408, -0.2993, 0.6915]) tensor([0.6272, 0.0553, 0.0860, 0.2315]) -Greedy action tensor([ 1.4845, -0.2762, -0.2553, 0.4496]) tensor([0.5873, 0.1010, 0.1031, 0.2086]) -Greedy action tensor([ 1.4033, -0.5308, -0.1988, 0.5952]) tensor([0.5581, 0.0807, 0.1124, 0.2487]) -Greedy action tensor([ 1.3747, -0.1487, 0.0398, 0.1203]) tensor([0.5661, 0.1234, 0.1490, 0.1615]) -Greedy action tensor([ 1.5669, -0.2568, -0.5543, 0.5812]) tensor([0.6044, 0.0976, 0.0725, 0.2255]) -Greedy action tensor([ 0.4557, -0.1326, -0.0818, -0.0042]) tensor([0.3609, 0.2004, 0.2108, 0.2278]) -Greedy action tensor([ 0.5240, -0.3187, -0.0941, -0.3593]) tensor([0.4197, 0.1807, 0.2262, 0.1735]) -Greedy action tensor([ 0.5040, -0.2593, -0.0766, -0.2918]) tensor([0.4037, 0.1882, 0.2259, 0.1822]) -Greedy action tensor([ 1.2323, -0.7906, -0.2011, -0.4706]) tensor([0.6439, 0.0852, 0.1536, 0.1173]) -Greedy action tensor([ 0.4575, 0.0461, 0.0093, -0.1139]) tensor([0.3489, 0.2312, 0.2229, 0.1970]) -Greedy action tensor([ 0.5780, -0.3987, -0.0580, -0.5059]) tensor([0.4456, 0.1678, 0.2359, 0.1507]) -Greedy action tensor([ 0.3816, -0.4767, -0.1210, -0.2288]) tensor([0.3888, 0.1648, 0.2352, 0.2112]) -Greedy action tensor([ 0.9248, -0.5919, 0.0041, -0.4292]) tensor([0.5331, 0.1170, 0.2123, 0.1376]) -Greedy action tensor([ 1.0091, -0.3479, 0.1945, -0.4889]) tensor([0.5198, 0.1338, 0.2302, 0.1162]) -Greedy action tensor([ 0.4819, -0.1198, -0.0413, -0.3248]) tensor([0.3866, 0.2118, 0.2291, 0.1725]) -Greedy action tensor([ 0.1844, 0.1794, -0.1184, -0.2923]) tensor([0.2981, 0.2966, 0.2202, 0.1851]) -Greedy action tensor([ 0.5089, -0.2109, -0.0350, -0.3603]) tensor([0.4021, 0.1958, 0.2334, 0.1686]) -Greedy action tensor([ 0.5635, -0.4618, -0.1254, -0.1639]) tensor([0.4266, 0.1530, 0.2142, 0.2061]) -Greedy action tensor([ 0.7730, -0.5966, -0.0430, -0.4231]) tensor([0.5003, 0.1272, 0.2212, 0.1513]) -Greedy action tensor([ 0.8069, -0.4184, 0.2153, -0.3976]) tensor([0.4658, 0.1368, 0.2578, 0.1397]) -Greedy action tensor([ 0.9102, -0.9304, -0.0554, -0.4633]) tensor([0.5578, 0.0885, 0.2124, 0.1412]) -Greedy action tensor([ 0.9307, -0.9135, -0.1358, -0.4039]) tensor([0.5664, 0.0896, 0.1950, 0.1491]) -Greedy action tensor([ 0.5427, -0.7024, -0.1370, -0.0011]) tensor([0.4210, 0.1212, 0.2134, 0.2444]) -Greedy action tensor([ 0.6866, -0.4301, -0.1162, -0.1201]) tensor([0.4501, 0.1473, 0.2017, 0.2009]) -Greedy action tensor([ 0.9905, -0.9266, 0.0895, -0.4173]) tensor([0.5562, 0.0818, 0.2259, 0.1361]) -Greedy action tensor([ 0.5890, -0.5707, -0.0561, -0.2339]) tensor([0.4391, 0.1377, 0.2304, 0.1928]) -Greedy action tensor([ 0.3901, 0.1040, -0.1032, -0.2115]) tensor([0.3437, 0.2582, 0.2098, 0.1883]) -Greedy action tensor([ 0.6947, -0.6609, -0.0242, -0.3814]) tensor([0.4794, 0.1236, 0.2336, 0.1634]) -Greedy action tensor([ 0.6603, -0.4165, -0.0508, -0.3098]) tensor([0.4523, 0.1541, 0.2221, 0.1714]) -Greedy action tensor([ 0.4384, -0.2723, 0.0208, -0.0883]) tensor([0.3649, 0.1793, 0.2403, 0.2155]) -Greedy action tensor([ 0.6646, -0.8438, -0.1275, -0.3875]) tensor([0.4942, 0.1093, 0.2238, 0.1726]) -Greedy action tensor([ 0.7709, -0.4316, -0.0635, -0.5350]) tensor([0.4986, 0.1498, 0.2165, 0.1351]) -Greedy action tensor([ 0.4260, 0.0116, 0.0259, -0.1661]) tensor([0.3467, 0.2291, 0.2324, 0.1918]) -Greedy action tensor([ 0.3869, -0.0818, -0.0164, -0.2238]) tensor([0.3525, 0.2206, 0.2355, 0.1914]) -Greedy action tensor([ 0.7074, -0.2963, 0.0102, -0.2446]) tensor([0.4443, 0.1629, 0.2213, 0.1715]) -Greedy action tensor([ 0.8177, -0.8957, 0.2889, -0.9799]) tensor([0.5167, 0.0931, 0.3045, 0.0856]) -Greedy action tensor([ 0.7605, -0.5879, -0.1447, -0.3344]) tensor([0.5003, 0.1299, 0.2024, 0.1674]) -Greedy action tensor([ 0.3977, -0.2933, -0.0665, -0.2063]) tensor([0.3736, 0.1872, 0.2349, 0.2042]) -Greedy action tensor([ 0.7300, -0.4692, -0.0501, -0.3357]) tensor([0.4752, 0.1432, 0.2178, 0.1637]) -Greedy action tensor([ 0.7327, -0.5422, 0.0008, -0.3428]) tensor([0.4758, 0.1330, 0.2289, 0.1623]) -Greedy action tensor([ 0.3518, -0.2731, -0.0539, -0.1504]) tensor([0.3563, 0.1907, 0.2374, 0.2156]) -Greedy action tensor([ 0.8051, -0.1768, -0.0354, -0.0733]) tensor([0.4501, 0.1686, 0.1942, 0.1870]) -Greedy action tensor([ 0.7742, -0.8971, 0.1245, -0.3987]) tensor([0.4951, 0.0931, 0.2586, 0.1532]) -Greedy action tensor([ 0.2231, 0.3203, 0.0193, -0.2872]) tensor([0.2843, 0.3133, 0.2318, 0.1706]) -Greedy action tensor([ 0.5686, -0.4339, -0.0481, -0.5102]) tensor([0.4451, 0.1633, 0.2402, 0.1513]) -Greedy action tensor([ 0.6901, -0.6721, -0.1651, -0.2299]) tensor([0.4808, 0.1231, 0.2044, 0.1916]) -Greedy action tensor([ 0.2865, -0.0676, -0.0781, -0.3856]) tensor([0.3440, 0.2414, 0.2389, 0.1757]) -Greedy action tensor([ 0.6689, -0.1868, -0.0676, -0.3924]) tensor([0.4445, 0.1889, 0.2128, 0.1538]) -Greedy action tensor([ 0.4636, -0.1929, 0.2366, -0.6701]) tensor([0.3792, 0.1967, 0.3021, 0.1220]) -Greedy action tensor([ 1.1655, -0.7364, -0.0882, -0.6528]) tensor([0.6262, 0.0935, 0.1787, 0.1016]) -Greedy action tensor([ 0.4686, -0.1911, -0.0636, -0.3324]) tensor([0.3917, 0.2025, 0.2300, 0.1758]) -Greedy action tensor([ 0.5738, -0.2368, -0.0813, -0.1346]) tensor([0.4071, 0.1810, 0.2114, 0.2005]) -Greedy action tensor([ 0.9336, 0.0438, 0.1237, -0.7251]) tensor([0.4887, 0.2008, 0.2174, 0.0931]) -Greedy action tensor([ 0.8109, -0.1517, 0.0292, -0.3319]) tensor([0.4633, 0.1769, 0.2120, 0.1478]) -Greedy action tensor([ 0.8860, -0.4824, 0.0778, -0.2960]) tensor([0.4983, 0.1268, 0.2221, 0.1528]) -Greedy action tensor([ 0.4292, -0.2210, 0.0286, -0.2633]) tensor([0.3715, 0.1939, 0.2488, 0.1858]) -Greedy action tensor([ 0.5300, 0.2000, -0.2233, -0.2351]) tensor([0.3766, 0.2708, 0.1773, 0.1752]) -Greedy action tensor([ 0.7569, -0.6624, -0.1436, -0.3740]) tensor([0.5074, 0.1227, 0.2062, 0.1637]) -Greedy action tensor([ 0.6456, -0.6595, -0.1317, -0.0970]) tensor([0.4532, 0.1229, 0.2083, 0.2156]) -Greedy action tensor([ 0.9920, -0.5040, -0.1348, -0.3627]) tensor([0.5537, 0.1240, 0.1794, 0.1429]) -Greedy action tensor([ 0.7936, -0.5741, 0.0053, -0.3484]) tensor([0.4930, 0.1256, 0.2241, 0.1574]) -Greedy action tensor([ 0.3048, 0.1128, -0.0666, -0.1872]) tensor([0.3198, 0.2640, 0.2206, 0.1956]) -Greedy action tensor([ 0.0974, -0.1497, 0.0412, -0.1507]) tensor([0.2852, 0.2227, 0.2696, 0.2225]) -Greedy action tensor([ 0.3543, -0.2572, 0.0134, -0.0725]) tensor([0.3441, 0.1867, 0.2447, 0.2245]) -Greedy action tensor([ 0.6638, -0.1490, 0.0226, -0.2056]) tensor([0.4185, 0.1857, 0.2204, 0.1754]) -Greedy action tensor([ 0.9235, -0.8839, -0.0342, -0.3671]) tensor([0.5486, 0.0900, 0.2105, 0.1509]) -Greedy action tensor([ 0.8288, -0.4542, 0.0489, -0.5574]) tensor([0.5036, 0.1396, 0.2309, 0.1259]) -Greedy action tensor([ 0.8514, -0.6100, 0.0892, -0.5279]) tensor([0.5127, 0.1189, 0.2393, 0.1291]) -Greedy action tensor([ 0.4160, -0.2429, -0.0728, -0.0851]) tensor([0.3654, 0.1891, 0.2241, 0.2214]) -Greedy action tensor([ 0.5496, -0.1523, 0.0083, -0.2367]) tensor([0.3948, 0.1957, 0.2298, 0.1798]) -Greedy action tensor([ 0.3432, -0.1452, 0.0039, -0.2125]) tensor([0.3449, 0.2116, 0.2456, 0.1979]) -Greedy action tensor([ 0.4365, -0.0628, -0.0827, -0.0656]) tensor([0.3562, 0.2162, 0.2120, 0.2156]) -Greedy action tensor([ 0.5102, -0.2705, -0.1257, -0.7457]) tensor([0.4401, 0.2016, 0.2330, 0.1253]) -Greedy action tensor([ 0.2356, -0.4247, -0.1085, -0.2369]) tensor([0.3510, 0.1814, 0.2488, 0.2188]) -Greedy action tensor([ 0.8186, -0.7622, -0.1144, -0.3370]) tensor([0.5224, 0.1075, 0.2055, 0.1645]) -Greedy action tensor([ 0.3218, 0.0889, -0.0385, 0.0290]) tensor([0.3090, 0.2448, 0.2155, 0.2306]) -Greedy action tensor([ 0.7878, -0.5748, -0.0527, -0.3351]) tensor([0.4968, 0.1272, 0.2144, 0.1616]) -Greedy action tensor([ 0.6320, -0.1247, 0.0385, -0.3563]) tensor([0.4177, 0.1960, 0.2308, 0.1555]) -Greedy action tensor([ 0.5836, -0.4473, -0.0271, -0.5611]) tensor([0.4509, 0.1608, 0.2448, 0.1435]) -Greedy action tensor([ 0.6426, -0.3670, -0.1348, -0.3579]) tensor([0.4563, 0.1663, 0.2097, 0.1678]) -Greedy action tensor([ 0.8621, -0.7121, -0.1213, -0.1936]) tensor([0.5184, 0.1074, 0.1939, 0.1804]) -Greedy action tensor([ 0.6218, -0.1987, 0.0013, -0.2715]) tensor([0.4189, 0.1844, 0.2252, 0.1715]) -Greedy action tensor([ 0.9831, -0.9879, 0.0789, -0.4247]) tensor([0.5590, 0.0779, 0.2263, 0.1368]) -Greedy action tensor([ 0.6682, -0.5217, -0.0794, -0.5656]) tensor([0.4833, 0.1470, 0.2289, 0.1407]) -Greedy action tensor([ 0.9654, -0.7310, 0.0227, -0.6268]) tensor([0.5629, 0.1032, 0.2193, 0.1145]) -Greedy action tensor([ 0.8523, -0.5728, -0.1469, -0.3467]) tensor([0.5235, 0.1259, 0.1928, 0.1578]) -Greedy action tensor([-1.7909, -0.2245, 0.6089, -0.3234]) tensor([0.0473, 0.2265, 0.5211, 0.2051]) -Greedy action tensor([-0.5299, 0.5721, 0.6372, 1.2084]) tensor([0.0775, 0.2332, 0.2488, 0.4406]) -Greedy action tensor([-1.9124, -0.4389, 0.6530, -0.1645]) tensor([0.0415, 0.1810, 0.5394, 0.2382]) -Greedy action tensor([-1.9209, -0.4369, 0.6547, -0.1650]) tensor([0.0411, 0.1812, 0.5398, 0.2378]) -Greedy action tensor([-1.8247, -0.4707, 0.6167, -0.1029]) tensor([0.0455, 0.1764, 0.5233, 0.2548]) -Greedy action tensor([-1.0210, 0.1535, 0.6125, 0.7481]) tensor([0.0657, 0.2126, 0.3364, 0.3853]) -Greedy action tensor([-1.8058, -0.6587, 1.1174, 0.4274]) tensor([0.0312, 0.0982, 0.5798, 0.2908]) -Greedy action tensor([-1.7890, -0.5132, 0.6792, -0.0285]) tensor([0.0451, 0.1613, 0.5316, 0.2620]) -Greedy action tensor([-1.6232, 0.5988, 0.4270, 0.1613]) tensor([0.0418, 0.3852, 0.3244, 0.2487]) -Greedy action tensor([-1.9089, -0.4341, 0.6470, -0.1569]) tensor([0.0416, 0.1819, 0.5364, 0.2401]) -Greedy action tensor([-0.3922, 0.5206, 0.6752, 1.5324]) tensor([0.0755, 0.1880, 0.2194, 0.5171]) -Greedy action tensor([-1.5393, -0.5704, 0.4450, 0.0587]) tensor([0.0631, 0.1662, 0.4589, 0.3118]) -Greedy action tensor([-1.9165, -0.4064, 0.6491, -0.1590]) tensor([0.0411, 0.1860, 0.5346, 0.2383]) -Greedy action tensor([-0.7966, -0.5527, 0.2150, 1.0171]) tensor([0.0896, 0.1144, 0.2464, 0.5496]) -Greedy action tensor([-0.5531, -0.1810, 0.3691, 1.1618]) tensor([0.0950, 0.1379, 0.2390, 0.5280]) -Greedy action tensor([-1.8159, -0.0048, 0.5497, -0.1228]) tensor([0.0431, 0.2636, 0.4590, 0.2343]) -Greedy action tensor([-1.9326, -0.4404, 0.6601, -0.1707]) tensor([0.0406, 0.1805, 0.5425, 0.2364]) -Greedy action tensor([-1.5029, -0.5341, 0.4540, -0.0154]) tensor([0.0661, 0.1740, 0.4675, 0.2924]) -Greedy action tensor([-1.8716, -0.3652, 0.6415, -0.1162]) tensor([0.0423, 0.1908, 0.5221, 0.2448]) -Greedy action tensor([-1.3862, 0.1220, 0.2087, 0.2111]) tensor([0.0650, 0.2937, 0.3203, 0.3210]) -Greedy action tensor([-1.9283, -0.4590, 0.6573, -0.1708]) tensor([0.0410, 0.1780, 0.5436, 0.2375]) -Greedy action tensor([-1.7532, -0.5173, 0.5711, -0.0681]) tensor([0.0499, 0.1716, 0.5096, 0.2689]) -Greedy action tensor([-1.6435, -0.4892, 0.5200, 0.0034]) tensor([0.0554, 0.1756, 0.4817, 0.2874]) -Greedy action tensor([-1.9262, -0.4521, 0.6543, -0.1700]) tensor([0.0410, 0.1793, 0.5420, 0.2377]) -Greedy action tensor([-1.7464, -0.4578, 0.6700, 0.1530]) tensor([0.0444, 0.1611, 0.4977, 0.2968]) -Greedy action tensor([-1.9030, -0.3775, 0.6390, -0.1519]) tensor([0.0416, 0.1911, 0.5280, 0.2394]) -Greedy action tensor([-1.7278, -0.1262, 0.5221, -0.0570]) tensor([0.0482, 0.2389, 0.4569, 0.2560]) -Greedy action tensor([-1.6387, -0.5205, 0.5183, 0.0106]) tensor([0.0558, 0.1708, 0.4828, 0.2906]) -Greedy action tensor([-1.2162, -0.5530, 0.4025, -0.1011]) tensor([0.0906, 0.1759, 0.4572, 0.2763]) -Greedy action tensor([-1.9048, -0.2863, 0.6196, -0.1476]) tensor([0.0411, 0.2074, 0.5132, 0.2383]) -Greedy action tensor([-1.6015, -0.5334, 0.4738, 0.0228]) tensor([0.0590, 0.1717, 0.4700, 0.2994]) -Greedy action tensor([-1.8686, -0.2401, 0.6031, -0.1093]) tensor([0.0421, 0.2146, 0.4987, 0.2446]) -Greedy action tensor([-1.8080, -0.4346, 0.6359, -0.0740]) tensor([0.0452, 0.1784, 0.5205, 0.2559]) -Greedy action tensor([-1.9013, -0.6510, 0.6349, -0.1973]) tensor([0.0442, 0.1544, 0.5584, 0.2430]) -Greedy action tensor([-1.8972, -0.4072, 0.6454, -0.1364]) tensor([0.0417, 0.1851, 0.5304, 0.2427]) -Greedy action tensor([-1.5790, -0.5660, 0.4867, 0.0487]) tensor([0.0597, 0.1645, 0.4715, 0.3042]) -Greedy action tensor([-1.6817, -0.3991, 0.5312, -0.0487]) tensor([0.0530, 0.1911, 0.4845, 0.2713]) -Greedy action tensor([-1.7501, -0.3331, 0.6905, 0.0108]) tensor([0.0446, 0.1839, 0.5120, 0.2595]) -Greedy action tensor([-1.2893e+00, -2.4458e-01, 3.5706e-01, 4.6703e-04]) tensor([0.0790, 0.2245, 0.4097, 0.2868]) -Greedy action tensor([ 0.1404, -0.3765, 0.2856, 0.3151]) tensor([0.2536, 0.1512, 0.2932, 0.3020]) -Greedy action tensor([-1.2691, -0.4066, 0.5859, 0.9038]) tensor([0.0539, 0.1278, 0.3447, 0.4737]) -Greedy action tensor([-1.6232, -0.5349, 0.6086, 0.2318]) tensor([0.0508, 0.1509, 0.4735, 0.3248]) -Greedy action tensor([-1.9364, -0.4576, 0.6615, -0.1785]) tensor([0.0406, 0.1782, 0.5456, 0.2356]) -Greedy action tensor([-1.7560, -0.3482, 0.5612, -0.0322]) tensor([0.0480, 0.1961, 0.4869, 0.2690]) -Greedy action tensor([-1.6727, -1.0724, -0.0219, -0.7509]) tensor([0.0948, 0.1728, 0.4941, 0.2383]) -Greedy action tensor([-1.8860, -0.4294, 0.6339, -0.1441]) tensor([0.0427, 0.1832, 0.5305, 0.2437]) -Greedy action tensor([-1.4977, -0.5629, 0.7473, 0.5560]) tensor([0.0481, 0.1225, 0.4542, 0.3751]) -Greedy action tensor([-1.8134, -0.4596, 0.6048, -0.1083]) tensor([0.0463, 0.1793, 0.5197, 0.2547]) -Greedy action tensor([-1.8413, -0.2818, 0.6153, -0.0971]) tensor([0.0432, 0.2055, 0.5040, 0.2472]) -Greedy action tensor([-1.6225, -0.5171, 0.5088, 0.0640]) tensor([0.0560, 0.1692, 0.4721, 0.3026]) -Greedy action tensor([-1.3468, -0.5470, 0.3502, 0.2373]) tensor([0.0738, 0.1641, 0.4025, 0.3596]) -Greedy action tensor([-1.6737, -0.5221, 0.5375, -0.0383]) tensor([0.0543, 0.1717, 0.4954, 0.2786]) -Greedy action tensor([-1.7405, -0.2277, 0.6874, -0.4013]) tensor([0.0483, 0.2194, 0.5478, 0.1844]) -Greedy action tensor([-1.0667, 0.6927, 0.1667, 0.1123]) tensor([0.0741, 0.4305, 0.2544, 0.2410]) -Greedy action tensor([-1.7734, -0.3856, 1.0944, 0.8095]) tensor([0.0279, 0.1118, 0.4910, 0.3693]) -Greedy action tensor([-1.1724, -0.5955, 0.3156, 0.2496]) tensor([0.0881, 0.1568, 0.3900, 0.3651]) -Greedy action tensor([-1.9274, -0.4322, 0.6606, -0.1651]) tensor([0.0407, 0.1814, 0.5410, 0.2369]) -Greedy action tensor([-1.6224, 0.1713, 0.4118, 0.0315]) tensor([0.0503, 0.3023, 0.3845, 0.2629]) -Greedy action tensor([-1.7541, -0.6182, 0.5745, -0.0230]) tensor([0.0499, 0.1555, 0.5125, 0.2820]) -Greedy action tensor([-1.4400, -0.4032, 0.6333, 0.5679]) tensor([0.0520, 0.1467, 0.4137, 0.3875]) -Greedy action tensor([-0.8116, -0.1958, 0.3451, -0.2988]) tensor([0.1299, 0.2404, 0.4129, 0.2169]) -Greedy action tensor([-1.7664, -0.4508, 0.5729, -0.0840]) tensor([0.0488, 0.1820, 0.5066, 0.2626]) -Greedy action tensor([-1.6575, -0.3134, 0.5378, 0.0592]) tensor([0.0516, 0.1978, 0.4634, 0.2872]) -Greedy action tensor([-1.7075e+00, -3.6634e-01, 5.3524e-01, 1.2553e-03]) tensor([0.0506, 0.1935, 0.4766, 0.2794]) -Greedy action tensor([-1.0965, -0.5742, 1.0633, 1.2987]) tensor([0.0448, 0.0755, 0.3883, 0.4914]) -Greedy action tensor([-1.6870, -0.5233, 0.6481, 0.0200]) tensor([0.0499, 0.1597, 0.5154, 0.2750]) -Greedy action tensor([-0.6204, 0.1597, 0.6792, 1.2267]) tensor([0.0758, 0.1654, 0.2781, 0.4807]) -Greedy action tensor([-0.1319, 0.0627, 1.0441, 1.6894]) tensor([0.0859, 0.1044, 0.2786, 0.5311]) -Greedy action tensor([-0.7455, -0.5046, 0.1849, 0.1530]) tensor([0.1377, 0.1752, 0.3491, 0.3381]) -Greedy action tensor([-2.0228, -0.7506, 1.0316, 0.2200]) tensor([0.0284, 0.1014, 0.6026, 0.2676]) -Greedy action tensor([-1.8571, -0.4252, 0.6183, -0.1439]) tensor([0.0442, 0.1851, 0.5255, 0.2452]) -Greedy action tensor([-1.6943, -0.4491, 0.6163, 0.0518]) tensor([0.0493, 0.1712, 0.4969, 0.2826]) -Greedy action tensor([-1.9336, -0.4351, 0.6565, -0.1728]) tensor([0.0406, 0.1817, 0.5414, 0.2362]) -Greedy action tensor([-1.8100, -0.3728, 0.5887, -0.1050]) tensor([0.0460, 0.1938, 0.5069, 0.2533]) -Greedy action tensor([ 0.2411, -0.5856, 0.1089, -0.3523]) tensor([0.3489, 0.1526, 0.3057, 0.1927]) -Greedy action tensor([-1.6339, -0.4142, 0.7500, 0.4146]) tensor([0.0435, 0.1473, 0.4718, 0.3374]) -Greedy action tensor([-1.9142, -0.4419, 0.6560, -0.1619]) tensor([0.0413, 0.1802, 0.5401, 0.2384]) -Greedy action tensor([-1.9100, -0.3668, 0.6427, -0.1451]) tensor([0.0410, 0.1921, 0.5271, 0.2397]) -Greedy action tensor([-1.7344e+00, 2.5963e-01, 4.5515e-01, -4.9001e-04]) tensor([0.0436, 0.3202, 0.3893, 0.2469]) -Greedy action tensor([-1.7453, -0.4574, 0.7126, 0.1061]) tensor([0.0441, 0.1599, 0.5151, 0.2809]) -Greedy action tensor([-1.7191, -0.5110, 0.5826, 0.0116]) tensor([0.0500, 0.1675, 0.5000, 0.2825]) -Greedy action tensor([-0.8320, -0.8945, 0.2941, 0.0236]) tensor([0.1356, 0.1274, 0.4181, 0.3190]) -Greedy action tensor([-0.9618, -0.7081, 0.4154, -0.8947]) tensor([0.1366, 0.1760, 0.5413, 0.1461]) -Greedy action tensor([-0.0870, -1.5177, -0.4505, 0.4494]) tensor([0.2744, 0.0656, 0.1908, 0.4692]) -Greedy action tensor([-0.0347, -0.7516, -1.3773, 0.4391]) tensor([0.2980, 0.1455, 0.0778, 0.4786]) -Greedy action tensor([-0.1481, -1.3918, 0.3052, -0.5056]) tensor([0.2808, 0.0810, 0.4418, 0.1964]) -Greedy action tensor([-0.2083, 0.5297, 1.9281, -0.7978]) tensor([0.0825, 0.1727, 0.6990, 0.0458]) -Greedy action tensor([-0.2030, -0.2752, -0.5928, 0.1111]) tensor([0.2515, 0.2340, 0.1703, 0.3443]) -Greedy action tensor([ 0.8878, -0.7117, 0.7150, 0.3106]) tensor([0.3839, 0.0776, 0.3230, 0.2155]) -Greedy action tensor([ 0.9379, -0.4836, -0.0928, 1.0018]) tensor([0.3754, 0.0906, 0.1339, 0.4001]) -Greedy action tensor([ 1.2397, -0.9600, 1.5696, 1.3519]) tensor([0.2762, 0.0306, 0.3842, 0.3090]) -Greedy action tensor([-0.1755, -1.0903, -0.9615, 0.2184]) tensor([0.2995, 0.1200, 0.1365, 0.4441]) -Greedy action tensor([-0.0414, -0.8741, -0.0729, 1.3876]) tensor([0.1520, 0.0661, 0.1473, 0.6346]) -Greedy action tensor([ 0.9955, -1.2406, -0.1256, 1.0091]) tensor([0.4087, 0.0437, 0.1332, 0.4143]) -Greedy action tensor([ 0.8587, 0.8437, -0.2472, 1.4168]) tensor([0.2461, 0.2424, 0.0814, 0.4300]) -Greedy action tensor([ 0.7306, -0.0462, -0.8399, 1.3339]) tensor([0.2860, 0.1315, 0.0595, 0.5229]) -Greedy action tensor([ 1.8897, -0.9513, 0.7078, 0.9509]) tensor([0.5694, 0.0332, 0.1746, 0.2227]) -Greedy action tensor([-0.0111, -0.4929, -0.7842, 1.3909]) tensor([0.1628, 0.1006, 0.0751, 0.6615]) -Greedy action tensor([-0.3902, -1.3323, -0.4055, 1.2707]) tensor([0.1309, 0.0510, 0.1289, 0.6891]) -Greedy action tensor([-0.1006, 0.3401, -0.1762, -0.0087]) tensor([0.2185, 0.3395, 0.2026, 0.2395]) -Greedy action tensor([ 1.3622, -0.4284, 0.8118, 2.0547]) tensor([0.2672, 0.0446, 0.1541, 0.5341]) -Greedy action tensor([ 1.2476, -1.1603, 1.0743, 0.3970]) tensor([0.4241, 0.0382, 0.3566, 0.1812]) -Greedy action tensor([ 0.2498, -0.8751, 0.4618, -0.1903]) tensor([0.3120, 0.1013, 0.3857, 0.2009]) -Greedy action tensor([ 0.5371, -0.2679, 0.3210, -0.2157]) tensor([0.3671, 0.1641, 0.2958, 0.1729]) -Greedy action tensor([ 0.4905, 0.0280, -0.1172, 0.8234]) tensor([0.2802, 0.1764, 0.1526, 0.3909]) -Greedy action tensor([ 0.6576, 0.6484, -0.0076, 0.2808]) tensor([0.3134, 0.3105, 0.1611, 0.2150]) -Greedy action tensor([-0.6061, -0.2754, -0.8167, 0.6094]) tensor([0.1521, 0.2117, 0.1232, 0.5129]) -Greedy action tensor([ 1.3159, -0.0746, 0.8252, -0.1347]) tensor([0.4772, 0.1188, 0.2921, 0.1119]) -Greedy action tensor([0.7994, 0.8163, 0.7226, 0.3194]) tensor([0.2808, 0.2855, 0.2600, 0.1737]) -Greedy action tensor([-1.0069, -1.1420, 0.0551, 0.3422]) tensor([0.1160, 0.1014, 0.3355, 0.4471]) -Greedy action tensor([-0.8524, -1.7500, -1.4168, 0.7823]) tensor([0.1408, 0.0574, 0.0801, 0.7218]) -Greedy action tensor([-0.5522, -1.2279, 0.1710, 0.0407]) tensor([0.1859, 0.0946, 0.3832, 0.3363]) -Greedy action tensor([-0.9169, -0.7347, -1.0553, -0.1565]) tensor([0.1919, 0.2303, 0.1671, 0.4106]) -Greedy action tensor([ 1.2869, 0.6589, -0.0131, 0.3875]) tensor([0.4519, 0.2412, 0.1231, 0.1838]) -Greedy action tensor([-0.2121, -1.4476, -0.0055, -0.2192]) tensor([0.2847, 0.0827, 0.3500, 0.2826]) -Greedy action tensor([ 0.2319, 0.4062, 1.1839, -0.1772]) tensor([0.1836, 0.2186, 0.4758, 0.1220]) -Greedy action tensor([ 1.2311, -0.1938, 0.9852, 1.0368]) tensor([0.3514, 0.0845, 0.2748, 0.2893]) -Greedy action tensor([ 1.4899, -0.8512, 0.7132, 1.2559]) tensor([0.4260, 0.0410, 0.1959, 0.3371]) -Greedy action tensor([-1.5854, -0.0730, 0.2957, -0.8868]) tensor([0.0709, 0.3216, 0.4650, 0.1425]) -Greedy action tensor([ 1.5149, -0.9684, 0.6299, 1.3804]) tensor([0.4219, 0.0352, 0.1741, 0.3688]) -Greedy action tensor([ 1.2838, 1.0911, 0.6582, -0.2116]) tensor([0.3870, 0.3192, 0.2070, 0.0868]) -Greedy action tensor([ 1.9884, -0.6766, 0.6873, 1.1808]) tensor([0.5594, 0.0389, 0.1523, 0.2494]) -Greedy action tensor([ 0.4321, -0.2849, 0.2799, 0.2976]) tensor([0.3104, 0.1516, 0.2666, 0.2714]) -Greedy action tensor([ 1.6112, -0.1956, 0.5484, 0.7106]) tensor([0.5219, 0.0857, 0.1803, 0.2121]) -Greedy action tensor([-0.1314, 0.4672, 0.3277, 1.0588]) tensor([0.1300, 0.2366, 0.2058, 0.4276]) -Greedy action tensor([-1.2196, 0.4483, -0.2054, -1.3019]) tensor([0.1002, 0.5312, 0.2763, 0.0923]) -Greedy action tensor([ 0.5752, -0.4732, -0.0726, 1.3711]) tensor([0.2445, 0.0857, 0.1279, 0.5419]) -Greedy action tensor([ 1.2194, -1.3998, -0.0326, 1.6320]) tensor([0.3485, 0.0254, 0.0996, 0.5265]) -Greedy action tensor([ 0.3902, 0.2057, 0.9631, -1.2897]) tensor([0.2638, 0.2193, 0.4678, 0.0492]) -Greedy action tensor([-0.7445, -1.0438, 0.9263, -0.1621]) tensor([0.1130, 0.0838, 0.6008, 0.2024]) -Greedy action tensor([1.1881, 0.1478, 0.6482, 0.9409]) tensor([0.3680, 0.1300, 0.2145, 0.2874]) -Greedy action tensor([0.2278, 0.3319, 0.3187, 0.7730]) tensor([0.2029, 0.2251, 0.2221, 0.3499]) -Greedy action tensor([1.4253, 0.6807, 0.2489, 0.0115]) tensor([0.4935, 0.2344, 0.1522, 0.1200]) -Greedy action tensor([ 0.8100, -1.0661, 0.2306, 1.1090]) tensor([0.3266, 0.0500, 0.1830, 0.4404]) -Greedy action tensor([ 0.9181, -1.3049, 0.7939, 1.0119]) tensor([0.3236, 0.0350, 0.2858, 0.3555]) -Greedy action tensor([-0.4311, 0.0301, -0.5925, 0.8858]) tensor([0.1395, 0.2212, 0.1187, 0.5206]) -Greedy action tensor([ 1.1095, -0.5663, 0.4517, 0.6798]) tensor([0.4245, 0.0794, 0.2199, 0.2762]) -Greedy action tensor([ 0.2493, -0.1754, 0.5598, -0.3063]) tensor([0.2784, 0.1821, 0.3798, 0.1597]) -Greedy action tensor([ 0.2862, -0.8520, -0.4277, 0.1954]) tensor([0.3672, 0.1176, 0.1798, 0.3353]) -Greedy action tensor([ 1.6328, -0.1460, 1.3270, 0.3702]) tensor([0.4570, 0.0772, 0.3366, 0.1293]) -Greedy action tensor([-1.0695, 0.8219, 0.6733, -1.3288]) tensor([0.0709, 0.4697, 0.4048, 0.0547]) -Greedy action tensor([-0.3498, -1.2708, 0.1296, 0.8107]) tensor([0.1612, 0.0642, 0.2603, 0.5144]) -Greedy action tensor([ 0.5457, 0.3079, -0.4776, 0.4898]) tensor([0.3233, 0.2549, 0.1162, 0.3057]) -Greedy action tensor([0.5000, 0.6298, 0.6692, 0.4980]) tensor([0.2314, 0.2635, 0.2741, 0.2310]) -Greedy action tensor([ 0.1956, 0.5602, -0.2122, -0.4398]) tensor([0.2751, 0.3962, 0.1830, 0.1457]) -Greedy action tensor([ 1.6579, -1.5673, -0.3190, 1.6030]) tensor([0.4706, 0.0187, 0.0652, 0.4455]) -Greedy action tensor([-0.4505, 0.0285, 1.3592, -0.6392]) tensor([0.1047, 0.1690, 0.6396, 0.0867]) -Greedy action tensor([-0.3278, -0.8800, 1.3378, 0.3046]) tensor([0.1143, 0.0658, 0.6047, 0.2152]) -Greedy action tensor([ 0.5251, 0.8867, 0.1791, -0.6155]) tensor([0.2888, 0.4146, 0.2043, 0.0923]) -Greedy action tensor([1.0807, 0.6261, 0.9041, 0.1161]) tensor([0.3504, 0.2224, 0.2937, 0.1336]) -Greedy action tensor([-1.7030, -0.7998, 0.6740, -1.2604]) tensor([0.0633, 0.1562, 0.6820, 0.0985]) -Greedy action tensor([ 1.0623, 0.2803, 0.2778, -0.0226]) tensor([0.4441, 0.2032, 0.2027, 0.1501]) -Greedy action tensor([-0.8532, 0.1008, -0.2489, -0.5024]) tensor([0.1461, 0.3792, 0.2673, 0.2074]) -Greedy action tensor([ 0.9548, 1.1527, -0.4541, -0.1562]) tensor([0.3581, 0.4365, 0.0875, 0.1179]) -Greedy action tensor([ 0.1096, -1.5086, 1.9815, -0.4556]) tensor([0.1210, 0.0240, 0.7863, 0.0687]) -Greedy action tensor([ 0.5268, -0.0483, 1.1271, -0.7153]) tensor([0.2722, 0.1531, 0.4961, 0.0786]) -Greedy action tensor([ 0.3993, -1.0615, 0.5822, 0.5685]) tensor([0.2765, 0.0642, 0.3319, 0.3274]) -Greedy action tensor([-0.1475, -0.9183, 0.8765, 1.0462]) tensor([0.1325, 0.0613, 0.3690, 0.4372]) -Greedy action tensor([ 0.9336, -1.9354, 1.4318, 0.9055]) tensor([0.2721, 0.0154, 0.4478, 0.2646]) -Greedy action tensor([ 0.0980, -2.5013, 0.1678, 0.2887]) tensor([0.2979, 0.0221, 0.3195, 0.3605]) -Greedy action tensor([ 0.4086, 0.4262, -0.3331, -0.7772]) tensor([0.3572, 0.3636, 0.1701, 0.1091]) -Greedy action tensor([-0.3775, -0.5258, 0.3043, -0.5019]) tensor([0.2118, 0.1826, 0.4187, 0.1870]) -Greedy action tensor([-0.0371, 0.4478, -0.4393, -1.0175]) tensor([0.2726, 0.4427, 0.1823, 0.1023]) -Greedy action tensor([ 1.4427, -0.3483, -0.4289, 0.2102]) tensor([0.6202, 0.1035, 0.0954, 0.1808]) -Greedy action tensor([ 1.2575, -0.8434, -0.2100, 0.6653]) tensor([0.5247, 0.0642, 0.1209, 0.2902]) -Greedy action tensor([ 0.6515, -0.4965, 0.2119, 0.1261]) tensor([0.3917, 0.1243, 0.2524, 0.2316]) -Greedy action tensor([ 1.9142, -0.4458, -0.4968, 0.8311]) tensor([0.6567, 0.0620, 0.0589, 0.2223]) -Greedy action tensor([ 1.1376, 0.1589, -0.1260, 0.4184]) tensor([0.4661, 0.1751, 0.1317, 0.2270]) -Greedy action tensor([ 1.1631, -0.7572, -0.2099, 0.5788]) tensor([0.5109, 0.0749, 0.1294, 0.2848]) -Greedy action tensor([ 0.8258, -0.2011, -0.1684, 0.2188]) tensor([0.4399, 0.1575, 0.1628, 0.2397]) -Greedy action tensor([ 2.0533, -0.8881, -0.3927, 0.6896]) tensor([0.7168, 0.0378, 0.0621, 0.1833]) -Greedy action tensor([ 2.1223, -0.4831, -0.3713, 0.7572]) tensor([0.7083, 0.0523, 0.0585, 0.1809]) -Greedy action tensor([ 1.5532, -0.0589, -0.2648, 0.1172]) tensor([0.6251, 0.1247, 0.1015, 0.1487]) -Greedy action tensor([ 1.6048, -0.1548, -0.4154, 0.1289]) tensor([0.6522, 0.1122, 0.0865, 0.1491]) -Greedy action tensor([ 1.0514, -0.7247, -0.3203, 0.3176]) tensor([0.5255, 0.0890, 0.1333, 0.2523]) -Greedy action tensor([ 1.7361, -1.0942, -0.0829, 0.5300]) tensor([0.6577, 0.0388, 0.1067, 0.1969]) -Greedy action tensor([ 0.9282, -0.1647, -0.1086, 0.0496]) tensor([0.4750, 0.1592, 0.1684, 0.1973]) -Greedy action tensor([ 0.6660, -0.3971, -0.1241, 0.0658]) tensor([0.4259, 0.1471, 0.1933, 0.2337]) -Greedy action tensor([ 0.9653, -0.1334, -0.0925, 0.4304]) tensor([0.4413, 0.1471, 0.1532, 0.2584]) -Greedy action tensor([ 0.8701, -0.4387, 0.1928, 0.1760]) tensor([0.4391, 0.1186, 0.2230, 0.2193]) -Greedy action tensor([ 1.5980, -0.3577, -0.6392, 0.9038]) tensor([0.5722, 0.0809, 0.0611, 0.2858]) -Greedy action tensor([ 1.3151, -0.4417, -0.3647, 0.0265]) tensor([0.6117, 0.1056, 0.1140, 0.1686]) -Greedy action tensor([ 0.9384, -0.6813, -0.5184, 0.5917]) tensor([0.4677, 0.0926, 0.1090, 0.3307]) -Greedy action tensor([ 1.7581, -0.5266, -0.4711, 0.2876]) tensor([0.6948, 0.0707, 0.0748, 0.1597]) -Greedy action tensor([ 1.6459, -0.3533, -0.5981, 0.4003]) tensor([0.6539, 0.0886, 0.0693, 0.1882]) -Greedy action tensor([ 1.0331, -0.1829, -0.4526, 0.1779]) tensor([0.5134, 0.1522, 0.1162, 0.2183]) -Greedy action tensor([ 1.1339, -0.4689, -0.0658, 0.4313]) tensor([0.5005, 0.1008, 0.1508, 0.2479]) -Greedy action tensor([ 0.7217, -0.1058, -0.1959, 0.3736]) tensor([0.3933, 0.1719, 0.1571, 0.2777]) -Greedy action tensor([ 0.7102, -0.2536, -0.4843, 0.7044]) tensor([0.3733, 0.1424, 0.1131, 0.3712]) -Greedy action tensor([ 0.3108, -0.3571, -0.0797, 0.0004]) tensor([0.3422, 0.1755, 0.2315, 0.2508]) -Greedy action tensor([ 1.5680, -0.4749, -0.2286, 0.3705]) tensor([0.6260, 0.0812, 0.1038, 0.1890]) -Greedy action tensor([ 1.1522, -0.4752, -0.2760, 0.5503]) tensor([0.5041, 0.0990, 0.1208, 0.2761]) -Greedy action tensor([ 0.9991, -0.3350, -0.2416, 0.1701]) tensor([0.5027, 0.1324, 0.1454, 0.2194]) -Greedy action tensor([ 1.3612, -0.8725, -0.0171, 0.0707]) tensor([0.6119, 0.0656, 0.1542, 0.1683]) -Greedy action tensor([ 0.9570, 0.0201, -0.3409, -0.0126]) tensor([0.4892, 0.1917, 0.1336, 0.1855]) -Greedy action tensor([ 1.5968, -0.4224, -0.4049, 0.3824]) tensor([0.6391, 0.0848, 0.0863, 0.1897]) -Greedy action tensor([ 1.5644, -0.8487, -0.0317, 0.4702]) tensor([0.6146, 0.0550, 0.1246, 0.2058]) -Greedy action tensor([ 1.2061, -0.5062, -0.0278, 0.0500]) tensor([0.5598, 0.1010, 0.1630, 0.1762]) -Greedy action tensor([ 2.2174, -0.8251, -0.6262, 0.5301]) tensor([0.7746, 0.0370, 0.0451, 0.1433]) -Greedy action tensor([ 1.1852, 0.1141, -0.5362, 0.2289]) tensor([0.5247, 0.1798, 0.0938, 0.2017]) -Greedy action tensor([ 1.6109, -0.6269, -0.2718, 0.4625]) tensor([0.6345, 0.0677, 0.0966, 0.2012]) -Greedy action tensor([ 1.5404, -0.3345, -0.3654, 0.1363]) tensor([0.6461, 0.0991, 0.0961, 0.1587]) -Greedy action tensor([ 0.6366, -0.1011, -0.3717, 0.4649]) tensor([0.3724, 0.1781, 0.1359, 0.3136]) -Greedy action tensor([ 1.0555, -0.0418, -0.0120, 0.2271]) tensor([0.4729, 0.1579, 0.1626, 0.2066]) -Greedy action tensor([ 1.2564, 0.1723, -0.3501, 0.4484]) tensor([0.5039, 0.1704, 0.1011, 0.2246]) -Greedy action tensor([ 1.2420, -0.7412, -0.1224, 0.1092]) tensor([0.5830, 0.0802, 0.1490, 0.1878]) -Greedy action tensor([ 1.8955, -0.9630, -0.4545, 0.5305]) tensor([0.7102, 0.0407, 0.0677, 0.1814]) -Greedy action tensor([ 2.3974, -0.4108, -0.4355, 0.5177]) tensor([0.7863, 0.0474, 0.0463, 0.1200]) -Greedy action tensor([ 0.9213, 0.0314, -0.1909, 0.3236]) tensor([0.4367, 0.1794, 0.1436, 0.2402]) -Greedy action tensor([ 1.1853, -0.4914, -0.0114, 0.1794]) tensor([0.5391, 0.1008, 0.1629, 0.1972]) -Greedy action tensor([ 1.1475, -0.5724, 0.0484, -0.0047]) tensor([0.5470, 0.0980, 0.1822, 0.1728]) -Greedy action tensor([ 1.2013, -0.1053, -0.6727, 0.5312]) tensor([0.5166, 0.1399, 0.0793, 0.2643]) -Greedy action tensor([ 1.4907, -0.4733, -0.4663, 0.3946]) tensor([0.6189, 0.0868, 0.0874, 0.2068]) -Greedy action tensor([ 1.9057, -1.1120, -0.0926, 0.5105]) tensor([0.6982, 0.0342, 0.0947, 0.1730]) -Greedy action tensor([ 1.5421, -0.6093, -0.1705, 0.5237]) tensor([0.6032, 0.0702, 0.1088, 0.2179]) -Greedy action tensor([ 1.4957, -0.7114, -0.3771, 0.4855]) tensor([0.6143, 0.0676, 0.0944, 0.2237]) -Greedy action tensor([ 1.6426, -0.1737, -0.4325, -0.0717]) tensor([0.6811, 0.1108, 0.0855, 0.1227]) -Greedy action tensor([ 1.0292, -0.2907, -0.6411, 0.2329]) tensor([0.5246, 0.1401, 0.0987, 0.2366]) -Greedy action tensor([ 1.8976, -0.3620, -0.3572, 0.1971]) tensor([0.7184, 0.0750, 0.0754, 0.1312]) -Greedy action tensor([ 0.9186, -0.2471, -0.0845, 0.1881]) tensor([0.4629, 0.1443, 0.1698, 0.2230]) -Greedy action tensor([ 1.4089, -0.7689, -0.3013, 0.0056]) tensor([0.6494, 0.0736, 0.1174, 0.1596]) -Greedy action tensor([ 1.2185, -0.3794, -0.7354, 0.5920]) tensor([0.5324, 0.1077, 0.0754, 0.2845]) -Greedy action tensor([ 2.1846, -0.5423, -0.3129, -0.1678]) tensor([0.8046, 0.0526, 0.0662, 0.0765]) -Greedy action tensor([ 1.0530, -0.4253, -0.1425, 0.2268]) tensor([0.5081, 0.1159, 0.1537, 0.2224]) -Greedy action tensor([ 1.4298, -0.3212, -0.1877, 0.2185]) tensor([0.5989, 0.1040, 0.1188, 0.1783]) -Greedy action tensor([ 1.2233, -0.5133, -0.4076, 0.0577]) tensor([0.5940, 0.1046, 0.1163, 0.1852]) -Greedy action tensor([ 0.8989, -0.1006, -0.3837, 0.3060]) tensor([0.4549, 0.1674, 0.1262, 0.2514]) -Greedy action tensor([ 0.9155, -0.4281, -0.1757, 0.2372]) tensor([0.4753, 0.1240, 0.1596, 0.2412]) -Greedy action tensor([ 1.6098, -0.0285, -0.5165, -0.2018]) tensor([0.6770, 0.1316, 0.0808, 0.1106]) -Greedy action tensor([ 1.3524, -0.4658, -0.4291, 0.1712]) tensor([0.6106, 0.0991, 0.1028, 0.1874]) -Greedy action tensor([ 1.4850, -0.1371, -0.7863, 0.2607]) tensor([0.6271, 0.1238, 0.0647, 0.1843]) -Greedy action tensor([ 1.6635, -0.1060, -0.5862, 0.6457]) tensor([0.6108, 0.1041, 0.0644, 0.2207]) -Greedy action tensor([ 1.2061, -0.6147, -0.0682, 0.3700]) tensor([0.5334, 0.0863, 0.1491, 0.2312]) -Greedy action tensor([ 1.4097, -0.0509, -0.5103, 0.3513]) tensor([0.5795, 0.1345, 0.0850, 0.2011]) -Greedy action tensor([ 1.2812, -0.8342, -0.0021, 0.4211]) tensor([0.5492, 0.0662, 0.1522, 0.2324]) -Greedy action tensor([ 1.2243, -0.0930, -0.4565, 0.4029]) tensor([0.5280, 0.1414, 0.0983, 0.2322]) -Greedy action tensor([ 1.1207, -0.2907, -0.3489, 0.5286]) tensor([0.4933, 0.1203, 0.1135, 0.2729]) -Greedy action tensor([ 1.6048, -0.2540, 0.2367, -0.0698]) tensor([0.6258, 0.0975, 0.1593, 0.1173]) -Greedy action tensor([ 1.3770, -0.5300, -0.3389, 0.5124]) tensor([0.5716, 0.0849, 0.1028, 0.2408]) -Greedy action tensor([ 1.4622, -0.2816, -0.7831, 0.4768]) tensor([0.6046, 0.1057, 0.0640, 0.2257]) -Greedy action tensor([ 1.8098, -0.2528, -0.5877, -0.2004]) tensor([0.7396, 0.0940, 0.0673, 0.0991]) -Greedy action tensor([ 1.2860, -0.4176, -0.3340, 0.3545]) tensor([0.5637, 0.1026, 0.1116, 0.2221]) -Greedy action tensor([ 1.0291, -0.2471, -0.0032, -0.0694]) tensor([0.5080, 0.1418, 0.1809, 0.1693]) -Greedy action tensor([ 0.9779, 0.1752, -0.2719, 0.1058]) tensor([0.4645, 0.2082, 0.1331, 0.1942]) -Greedy action tensor([ 0.1990, 0.2768, -0.1142, -0.4652]) tensor([0.3006, 0.3249, 0.2198, 0.1547]) -Greedy action tensor([ 0.5742, -0.3536, -0.0085, -0.4067]) tensor([0.4294, 0.1698, 0.2398, 0.1610]) -Greedy action tensor([ 0.7935, -0.3294, -0.0185, -0.1476]) tensor([0.4631, 0.1507, 0.2056, 0.1807]) -Greedy action tensor([ 0.4950, -0.3698, 0.0439, -0.2386]) tensor([0.3940, 0.1659, 0.2509, 0.1892]) -Greedy action tensor([ 0.6707, -0.4290, -0.0657, -0.3463]) tensor([0.4601, 0.1532, 0.2203, 0.1664]) -Greedy action tensor([ 0.9017, -0.8633, 0.1441, -0.4661]) tensor([0.5278, 0.0904, 0.2474, 0.1344]) -Greedy action tensor([ 0.7619, -0.6457, 0.0277, -0.3298]) tensor([0.4854, 0.1188, 0.2329, 0.1629]) -Greedy action tensor([ 1.0123, -0.6218, -0.0649, -0.5463]) tensor([0.5727, 0.1117, 0.1950, 0.1205]) -Greedy action tensor([ 0.4330, -0.0155, -0.0586, 0.1160]) tensor([0.3357, 0.2144, 0.2053, 0.2445]) -Greedy action tensor([ 0.8011, -0.6250, -0.2057, -0.6062]) tensor([0.5404, 0.1298, 0.1975, 0.1323]) -Greedy action tensor([ 0.3755, -0.1918, -0.1655, -0.4465]) tensor([0.3863, 0.2190, 0.2249, 0.1698]) -Greedy action tensor([ 0.8913, -0.8886, 0.1075, -0.6228]) tensor([0.5419, 0.0914, 0.2475, 0.1192]) -Greedy action tensor([ 0.4555, -0.7346, -0.2125, -0.2772]) tensor([0.4353, 0.1324, 0.2232, 0.2092]) -Greedy action tensor([ 0.2825, 0.0736, -0.0994, -0.1064]) tensor([0.3153, 0.2558, 0.2152, 0.2137]) -Greedy action tensor([ 0.4989, -0.0279, -0.1337, -0.0834]) tensor([0.3731, 0.2203, 0.1982, 0.2084]) -Greedy action tensor([ 0.8973, -0.5979, 0.0516, -0.2793]) tensor([0.5097, 0.1143, 0.2188, 0.1572]) -Greedy action tensor([ 0.6308, -0.3317, -0.0711, -0.2096]) tensor([0.4331, 0.1654, 0.2146, 0.1869]) -Greedy action tensor([ 0.8765, -0.5842, -0.1585, -0.2172]) tensor([0.5202, 0.1207, 0.1848, 0.1743]) -Greedy action tensor([ 0.6903, -0.4158, 0.1338, -0.5011]) tensor([0.4529, 0.1499, 0.2596, 0.1376]) -Greedy action tensor([ 0.4221, -0.0718, -0.1080, -0.3302]) tensor([0.3745, 0.2285, 0.2204, 0.1765]) -Greedy action tensor([ 0.9589, -0.7421, 0.0382, -0.5232]) tensor([0.5531, 0.1009, 0.2203, 0.1256]) -Greedy action tensor([ 0.6204, -0.3286, -0.0466, -0.2826]) tensor([0.4337, 0.1679, 0.2226, 0.1758]) -Greedy action tensor([ 0.9823, -0.7977, 0.0266, -0.5496]) tensor([0.5652, 0.0953, 0.2173, 0.1222]) -Greedy action tensor([ 0.4080, 0.0790, 0.0417, -0.3953]) tensor([0.3495, 0.2516, 0.2423, 0.1566]) -Greedy action tensor([ 0.7573, -0.7274, 0.0106, -0.2697]) tensor([0.4858, 0.1101, 0.2302, 0.1740]) -Greedy action tensor([ 0.7491, -0.3876, -0.0339, -0.4581]) tensor([0.4815, 0.1545, 0.2200, 0.1440]) -Greedy action tensor([ 0.3562, -0.0590, 0.0582, -0.0066]) tensor([0.3228, 0.2131, 0.2396, 0.2246]) -Greedy action tensor([ 1.2037, -0.6192, 0.0052, -0.3979]) tensor([0.6007, 0.0970, 0.1812, 0.1211]) -Greedy action tensor([ 0.8293, -0.5898, -0.0357, -0.2448]) tensor([0.4989, 0.1207, 0.2100, 0.1704]) -Greedy action tensor([ 0.8217, -0.3004, -0.0708, -0.1603]) tensor([0.4740, 0.1543, 0.1942, 0.1775]) -Greedy action tensor([ 0.5698, -0.4968, -0.1066, -0.4219]) tensor([0.4497, 0.1548, 0.2287, 0.1668]) -Greedy action tensor([ 0.3292, -0.1351, -0.0009, -0.1579]) tensor([0.3376, 0.2122, 0.2427, 0.2074]) -Greedy action tensor([ 0.8050, -0.5930, 0.1020, -0.3501]) tensor([0.4861, 0.1201, 0.2407, 0.1531]) -Greedy action tensor([ 0.4666, 0.0186, -0.0381, -0.1764]) tensor([0.3612, 0.2308, 0.2181, 0.1899]) -Greedy action tensor([ 0.7739, -0.2780, -0.0149, -0.5963]) tensor([0.4860, 0.1697, 0.2208, 0.1235]) -Greedy action tensor([ 0.5826, -0.1411, -0.2579, -0.4214]) tensor([0.4381, 0.2124, 0.1890, 0.1605]) -Greedy action tensor([ 0.8662, -0.6508, 0.0250, -0.4508]) tensor([0.5212, 0.1143, 0.2248, 0.1397]) -Greedy action tensor([ 0.6738, -0.7162, -0.0306, -0.2012]) tensor([0.4629, 0.1153, 0.2289, 0.1930]) -Greedy action tensor([ 0.6827, -0.4010, 0.0820, -0.5004]) tensor([0.4560, 0.1543, 0.2501, 0.1397]) -Greedy action tensor([ 0.5704, -0.0752, -0.1011, -0.1825]) tensor([0.3990, 0.2092, 0.2039, 0.1879]) -Greedy action tensor([ 0.6565, -0.4942, -0.0627, -0.4842]) tensor([0.4710, 0.1490, 0.2294, 0.1505]) -Greedy action tensor([ 0.3601, 0.2655, -0.0528, -0.1154]) tensor([0.3132, 0.2849, 0.2072, 0.1947]) -Greedy action tensor([ 0.4284, -0.2031, -0.0089, -0.2056]) tensor([0.3693, 0.1964, 0.2385, 0.1959]) -Greedy action tensor([ 0.6224, -0.4545, 0.0045, -0.4363]) tensor([0.4491, 0.1530, 0.2421, 0.1558]) -Greedy action tensor([ 0.5736, -0.4559, -0.1143, -0.1486]) tensor([0.4264, 0.1523, 0.2143, 0.2071]) -Greedy action tensor([ 0.3710, 0.3212, -0.2024, -0.3273]) tensor([0.3320, 0.3158, 0.1871, 0.1651]) -Greedy action tensor([ 0.2833, 0.0788, -0.1032, -0.3214]) tensor([0.3289, 0.2681, 0.2234, 0.1796]) -Greedy action tensor([ 0.4522, -0.1497, -0.0032, -0.1157]) tensor([0.3638, 0.1993, 0.2307, 0.2062]) -Greedy action tensor([ 0.3377, -0.0866, -0.0929, -0.2483]) tensor([0.3495, 0.2287, 0.2273, 0.1945]) -Greedy action tensor([ 0.7941, 0.0559, -0.4358, -0.5044]) tensor([0.4894, 0.2339, 0.1431, 0.1336]) -Greedy action tensor([ 0.6873, -0.4411, -0.1025, -0.2335]) tensor([0.4596, 0.1487, 0.2086, 0.1830]) -Greedy action tensor([ 0.6693, 0.0087, 0.0673, -0.1413]) tensor([0.3986, 0.2059, 0.2183, 0.1772]) -Greedy action tensor([ 0.5137, -0.2634, -0.1289, -0.4281]) tensor([0.4210, 0.1935, 0.2214, 0.1641]) -Greedy action tensor([ 1.1699, -0.8864, 0.1580, -0.4898]) tensor([0.5946, 0.0761, 0.2162, 0.1131]) -Greedy action tensor([ 0.9375, -0.5022, -0.2047, -0.4750]) tensor([0.5557, 0.1317, 0.1773, 0.1353]) -Greedy action tensor([ 0.8501, -0.2266, -0.0484, -0.4746]) tensor([0.4966, 0.1692, 0.2022, 0.1320]) -Greedy action tensor([ 0.8757, -0.6654, -0.0388, -0.4682]) tensor([0.5331, 0.1142, 0.2136, 0.1391]) -Greedy action tensor([ 0.7652, -0.3072, -0.0375, -0.5319]) tensor([0.4846, 0.1658, 0.2172, 0.1325]) -Greedy action tensor([ 0.4296, -0.1193, -0.0637, -0.3664]) tensor([0.3789, 0.2188, 0.2313, 0.1709]) -Greedy action tensor([ 0.4258, -0.3757, 0.0392, -0.1980]) tensor([0.3754, 0.1684, 0.2550, 0.2012]) -Greedy action tensor([ 0.4085, 0.1221, -0.0098, -0.0407]) tensor([0.3282, 0.2464, 0.2160, 0.2094]) -Greedy action tensor([ 0.6711, 0.0432, -0.0332, -0.1281]) tensor([0.4036, 0.2154, 0.1996, 0.1815]) -Greedy action tensor([ 0.6840, -0.0013, -0.0586, -0.3343]) tensor([0.4272, 0.2153, 0.2033, 0.1543]) -Greedy action tensor([ 0.8017, -0.6271, -0.0372, -0.4827]) tensor([0.5132, 0.1230, 0.2218, 0.1421]) -Greedy action tensor([ 0.4877, -0.5318, -0.1225, -0.6092]) tensor([0.4468, 0.1612, 0.2427, 0.1492]) -Greedy action tensor([ 0.4350, 0.0021, -0.0784, -0.3455]) tensor([0.3696, 0.2398, 0.2212, 0.1694]) -Greedy action tensor([ 1.0225, -0.5505, -0.1435, -0.2463]) tensor([0.5555, 0.1152, 0.1731, 0.1562]) -Greedy action tensor([ 0.8326, -0.5959, -0.1109, -0.2676]) tensor([0.5098, 0.1222, 0.1984, 0.1696]) -Greedy action tensor([ 0.2278, -0.2759, 0.1921, -0.2715]) tensor([0.3148, 0.1903, 0.3038, 0.1911]) -Greedy action tensor([ 0.3926, -0.0655, -0.0760, -0.4471]) tensor([0.3717, 0.2351, 0.2326, 0.1605]) -Greedy action tensor([ 0.6758, -0.4476, -0.0312, -0.5095]) tensor([0.4708, 0.1531, 0.2322, 0.1439]) -Greedy action tensor([ 0.6908, -0.3427, -0.0921, -0.2957]) tensor([0.4575, 0.1628, 0.2091, 0.1706]) -Greedy action tensor([ 0.6015, -0.1775, -0.0102, -0.3246]) tensor([0.4171, 0.1914, 0.2263, 0.1652]) -Greedy action tensor([ 0.7493, -0.3777, -0.1763, -0.2834]) tensor([0.4816, 0.1560, 0.1909, 0.1715]) -Greedy action tensor([ 0.6224, -0.4013, -0.1051, -0.2795]) tensor([0.4448, 0.1598, 0.2149, 0.1805]) -Greedy action tensor([ 0.8196, -0.3910, 0.0522, -0.4523]) tensor([0.4896, 0.1459, 0.2273, 0.1372]) -Greedy action tensor([ 0.8232, -0.5882, 0.0583, -0.2465]) tensor([0.4873, 0.1188, 0.2268, 0.1672]) -Greedy action tensor([ 0.6583, -0.2903, 0.1256, -0.3537]) tensor([0.4277, 0.1657, 0.2511, 0.1555]) -Greedy action tensor([ 0.8908, -0.1635, -0.0181, -0.2040]) tensor([0.4794, 0.1670, 0.1932, 0.1604]) -Greedy action tensor([ 0.5616, -0.4233, 0.0878, -0.7158]) tensor([0.4396, 0.1642, 0.2737, 0.1225]) -Greedy action tensor([ 0.8168, -0.4130, 0.0158, -0.2203]) tensor([0.4772, 0.1395, 0.2142, 0.1691]) -Greedy action tensor([-1.9204, -0.4302, 0.6488, -0.1741]) tensor([0.0413, 0.1832, 0.5389, 0.2367]) -Greedy action tensor([-1.3308, -0.4941, 0.4268, 0.4220]) tensor([0.0672, 0.1552, 0.3897, 0.3879]) -Greedy action tensor([-1.2628, -0.5595, 0.6541, 0.9327]) tensor([0.0532, 0.1074, 0.3616, 0.4778]) -Greedy action tensor([-1.9252, -0.4646, 0.6614, -0.1698]) tensor([0.0410, 0.1767, 0.5449, 0.2373]) -Greedy action tensor([-1.6572, -0.0994, 0.3558, -0.1107]) tensor([0.0558, 0.2648, 0.4175, 0.2619]) -Greedy action tensor([-1.5995, -0.2918, 0.4610, -0.0363]) tensor([0.0577, 0.2135, 0.4532, 0.2756]) -Greedy action tensor([-1.6445, -0.4307, 0.5962, 0.0730]) tensor([0.0517, 0.1741, 0.4861, 0.2881]) -Greedy action tensor([-1.7166, -0.6151, 0.8332, 0.0375]) tensor([0.0443, 0.1332, 0.5668, 0.2558]) -Greedy action tensor([-1.8814, -0.3748, 0.6345, -0.1432]) tensor([0.0424, 0.1913, 0.5250, 0.2412]) -Greedy action tensor([-1.8703, -0.3317, 0.6211, -0.1050]) tensor([0.0424, 0.1976, 0.5122, 0.2478]) -Greedy action tensor([-1.9110, -0.4564, 0.6715, -0.1608]) tensor([0.0412, 0.1765, 0.5452, 0.2372]) -Greedy action tensor([-0.7544, -0.1430, 1.3318, 1.4046]) tensor([0.0511, 0.0942, 0.4118, 0.4429]) -Greedy action tensor([-1.6167, -0.3481, 0.6546, 0.0582]) tensor([0.0511, 0.1815, 0.4948, 0.2726]) -Greedy action tensor([-0.5240, -0.6115, 0.4579, 0.6578]) tensor([0.1275, 0.1168, 0.3402, 0.4155]) -Greedy action tensor([-1.3307, 0.9603, 0.3142, -0.0051]) tensor([0.0504, 0.4985, 0.2613, 0.1898]) -Greedy action tensor([-1.0039, -0.5632, 0.2647, 0.4150]) tensor([0.0976, 0.1517, 0.3472, 0.4035]) -Greedy action tensor([-0.8129, -0.6066, 0.4003, 0.5258]) tensor([0.1063, 0.1307, 0.3576, 0.4054]) -Greedy action tensor([-1.0096, -0.4673, 0.2942, 0.2829]) tensor([0.0995, 0.1712, 0.3667, 0.3625]) -Greedy action tensor([-1.9323, -0.4488, 0.6636, -0.1730]) tensor([0.0406, 0.1790, 0.5445, 0.2359]) -Greedy action tensor([-1.1547, 0.6383, 0.3411, 0.5206]) tensor([0.0595, 0.3573, 0.2655, 0.3177]) -Greedy action tensor([-1.6239, -0.5534, 0.4954, 0.0033]) tensor([0.0577, 0.1683, 0.4804, 0.2937]) -Greedy action tensor([-1.6908, -0.4773, 0.5813, 0.0405]) tensor([0.0507, 0.1707, 0.4921, 0.2865]) -Greedy action tensor([-1.7215, -0.1034, 0.5349, -0.1854]) tensor([0.0494, 0.2492, 0.4718, 0.2296]) -Greedy action tensor([-1.7994, -0.2325, 0.5590, -0.0980]) tensor([0.0458, 0.2193, 0.4840, 0.2509]) -Greedy action tensor([-1.8083, -0.5372, 0.5977, -0.0962]) tensor([0.0472, 0.1682, 0.5232, 0.2614]) -Greedy action tensor([-1.6808, -0.5451, 0.5791, 0.0270]) tensor([0.0520, 0.1620, 0.4988, 0.2872]) -Greedy action tensor([-1.8274, -0.3144, 0.6376, -0.0899]) tensor([0.0435, 0.1975, 0.5117, 0.2472]) -Greedy action tensor([-1.7085, -0.5081, 0.6856, 0.1424]) tensor([0.0462, 0.1534, 0.5063, 0.2941]) -Greedy action tensor([-1.8476, -0.2471, 0.5897, -0.1066]) tensor([0.0433, 0.2145, 0.4953, 0.2469]) -Greedy action tensor([-1.9255, -0.4508, 0.6615, -0.1661]) tensor([0.0409, 0.1786, 0.5431, 0.2374]) -Greedy action tensor([-1.9274, -0.4067, 0.6539, -0.1639]) tensor([0.0406, 0.1858, 0.5367, 0.2369]) -Greedy action tensor([-1.6712, -0.5201, 0.5279, -0.0904]) tensor([0.0554, 0.1753, 0.4999, 0.2694]) -Greedy action tensor([-1.8256, -0.3309, 0.6151, -0.0969]) tensor([0.0443, 0.1975, 0.5086, 0.2496]) -Greedy action tensor([-1.5165, -0.4127, 0.6505, 0.3841]) tensor([0.0514, 0.1551, 0.4492, 0.3442]) -Greedy action tensor([-1.1586, -0.6142, 0.6125, 0.7575]) tensor([0.0650, 0.1119, 0.3818, 0.4413]) -Greedy action tensor([-1.8525, -0.4698, 0.6165, -0.1323]) tensor([0.0447, 0.1781, 0.5277, 0.2496]) -Greedy action tensor([-1.3329, -0.6194, 0.3289, 0.2091]) tensor([0.0770, 0.1572, 0.4058, 0.3600]) -Greedy action tensor([-1.7684, -0.1747, 0.5330, -0.0799]) tensor([0.0469, 0.2308, 0.4685, 0.2538]) -Greedy action tensor([-1.7874, -0.3829, 0.5846, -0.0977]) tensor([0.0471, 0.1920, 0.5054, 0.2554]) -Greedy action tensor([-1.5880, -0.3602, 0.6301, 0.1153]) tensor([0.0524, 0.1788, 0.4812, 0.2876]) -Greedy action tensor([-1.6479, -0.3639, 0.6120, -0.2817]) tensor([0.0552, 0.1993, 0.5290, 0.2164]) -Greedy action tensor([-1.8381, -0.3869, 0.6345, -0.1037]) tensor([0.0439, 0.1873, 0.5202, 0.2486]) -Greedy action tensor([-1.4994, -0.4773, 0.7921, -0.5725]) tensor([0.0617, 0.1716, 0.6106, 0.1560]) -Greedy action tensor([-1.6059, -0.5338, 0.4626, 0.0251]) tensor([0.0590, 0.1724, 0.4670, 0.3015]) -Greedy action tensor([-1.6507, -0.1866, 0.5022, -0.0060]) tensor([0.0523, 0.2262, 0.4505, 0.2710]) -Greedy action tensor([-1.8101, -0.4931, 0.6675, -0.0205]) tensor([0.0442, 0.1649, 0.5264, 0.2646]) -Greedy action tensor([-1.3358, -0.5419, 0.4131, 0.2623]) tensor([0.0719, 0.1591, 0.4134, 0.3556]) -Greedy action tensor([-1.1222, -0.5665, 0.6732, 1.1094]) tensor([0.0553, 0.0964, 0.3331, 0.5152]) -Greedy action tensor([-1.7581, -0.4670, 0.5794, -0.0858]) tensor([0.0492, 0.1790, 0.5097, 0.2621]) -Greedy action tensor([-1.5777, -0.3978, 0.4909, 0.0655]) tensor([0.0577, 0.1877, 0.4564, 0.2983]) -Greedy action tensor([-1.6871, -0.3064, 0.4835, -0.0373]) tensor([0.0528, 0.2099, 0.4625, 0.2748]) -Greedy action tensor([-1.4562, -0.6626, 0.7484, -0.5525]) tensor([0.0678, 0.1500, 0.6148, 0.1674]) -Greedy action tensor([-1.8331, -0.4513, 0.6186, -0.1172]) tensor([0.0451, 0.1798, 0.5240, 0.2511]) -Greedy action tensor([-1.5131, -0.5279, 0.4274, 0.0883]) tensor([0.0641, 0.1717, 0.4463, 0.3179]) -Greedy action tensor([-1.3574, -0.6083, 0.3411, 0.2037]) tensor([0.0749, 0.1585, 0.4096, 0.3570]) -Greedy action tensor([-1.8122, -0.4776, 0.6828, 0.0362]) tensor([0.0430, 0.1632, 0.5209, 0.2729]) -Greedy action tensor([-1.8247, -0.3867, 0.6020, -0.1056]) tensor([0.0452, 0.1905, 0.5120, 0.2523]) -Greedy action tensor([-1.8595, -0.2351, 0.5961, -0.1184]) tensor([0.0427, 0.2166, 0.4973, 0.2434]) -Greedy action tensor([-1.8675, -0.4522, 0.6734, -0.0127]) tensor([0.0413, 0.1702, 0.5244, 0.2641]) -Greedy action tensor([-1.9284, -0.4617, 0.6813, -0.1599]) tensor([0.0403, 0.1748, 0.5484, 0.2365]) -Greedy action tensor([-1.9298, -0.4527, 0.6573, -0.1713]) tensor([0.0409, 0.1790, 0.5431, 0.2371]) -Greedy action tensor([-1.9367, -0.4358, 0.6615, -0.1758]) tensor([0.0404, 0.1813, 0.5432, 0.2351]) -Greedy action tensor([-1.6215, -0.0254, 0.5517, 0.0904]) tensor([0.0494, 0.2435, 0.4337, 0.2734]) -Greedy action tensor([-1.1888, -0.6006, 0.2732, 0.2456]) tensor([0.0884, 0.1592, 0.3814, 0.3710]) -Greedy action tensor([-1.9127, -0.4075, 0.6417, -0.1640]) tensor([0.0415, 0.1868, 0.5334, 0.2383]) -Greedy action tensor([-1.0565, -0.6636, 0.6790, 0.8923]) tensor([0.0659, 0.0976, 0.3738, 0.4627]) -Greedy action tensor([-1.5199, -0.5603, 0.4538, 0.0982]) tensor([0.0631, 0.1647, 0.4540, 0.3182]) -Greedy action tensor([-1.8779, -0.4771, 0.6318, -0.1432]) tensor([0.0434, 0.1763, 0.5342, 0.2461]) -Greedy action tensor([-0.1442, 0.1093, 0.9931, 1.6580]) tensor([0.0872, 0.1123, 0.2719, 0.5286]) -Greedy action tensor([-1.6974, -0.3846, 0.5291, -0.0281]) tensor([0.0518, 0.1927, 0.4804, 0.2751]) -Greedy action tensor([-1.2654, -0.3445, 1.1095, 1.1861]) tensor([0.0387, 0.0971, 0.4156, 0.4487]) -Greedy action tensor([-1.9467, -0.4544, 0.6676, -0.1816]) tensor([0.0401, 0.1783, 0.5475, 0.2342]) -Greedy action tensor([-0.5471, -0.3989, 0.1710, -0.0086]) tensor([0.1688, 0.1958, 0.3462, 0.2893]) -Greedy action tensor([-1.7002, -0.3750, 0.5912, -0.2211]) tensor([0.0525, 0.1976, 0.5193, 0.2305]) -Greedy action tensor([-1.4829, -0.4552, 0.5887, 0.3280]) tensor([0.0560, 0.1566, 0.4447, 0.3427]) -Greedy action tensor([-1.2092, 0.0661, 0.4107, 0.3867]) tensor([0.0687, 0.2458, 0.3469, 0.3387]) -Greedy action tensor([-1.9112, -0.4720, 0.6630, -0.1591]) tensor([0.0415, 0.1750, 0.5443, 0.2392]) -Greedy action tensor([-1.9173, -0.3894, 0.6464, -0.1596]) tensor([0.0410, 0.1889, 0.5323, 0.2377]) -Greedy action tensor([-0.4977, -0.4655, -0.1650, 0.0063]) tensor([0.1967, 0.2032, 0.2744, 0.3257]) -Greedy action tensor([-1.9049, -0.4593, 0.6546, -0.1578]) tensor([0.0418, 0.1775, 0.5407, 0.2400]) -Greedy action tensor([-1.9270, -0.4541, 0.6551, -0.1682]) tensor([0.0410, 0.1788, 0.5422, 0.2380]) -Greedy action tensor([ 1.8973, -0.4414, -0.3802, 0.7193]) tensor([0.6636, 0.0640, 0.0680, 0.2043]) -Greedy action tensor([ 1.3290, -0.3052, -0.4086, 0.1051]) tensor([0.6006, 0.1172, 0.1057, 0.1766]) -Greedy action tensor([ 0.6280, -0.3246, -0.1640, 0.1657]) tensor([0.4051, 0.1563, 0.1835, 0.2551]) -Greedy action tensor([ 1.5218, 0.1231, -0.5499, 0.2372]) tensor([0.6062, 0.1497, 0.0764, 0.1678]) -Greedy action tensor([ 0.3991, -0.3205, -0.0323, -0.0596]) tensor([0.3612, 0.1759, 0.2346, 0.2283]) -Greedy action tensor([ 1.7685, -0.8209, -0.0856, 0.3655]) tensor([0.6768, 0.0508, 0.1060, 0.1664]) -Greedy action tensor([ 1.4667, -0.3676, -0.4774, 0.4450]) tensor([0.6014, 0.0961, 0.0861, 0.2165]) -Greedy action tensor([ 1.6287, -0.4519, -0.6109, 0.6351]) tensor([0.6244, 0.0780, 0.0665, 0.2312]) -Greedy action tensor([ 0.6740, -0.4854, -0.0141, -0.0332]) tensor([0.4330, 0.1358, 0.2176, 0.2135]) -Greedy action tensor([ 1.6767, -0.2060, -0.2204, 0.1586]) tensor([0.6573, 0.1000, 0.0986, 0.1440]) -Greedy action tensor([ 1.6235, -0.6670, -0.4321, 0.3758]) tensor([0.6595, 0.0667, 0.0844, 0.1894]) -Greedy action tensor([ 2.1363, -1.0822, -0.1062, 0.8994]) tensor([0.6961, 0.0279, 0.0739, 0.2021]) -Greedy action tensor([ 1.4819, -0.6880, -0.4898, 0.4763]) tensor([0.6176, 0.0705, 0.0860, 0.2259]) -Greedy action tensor([ 1.4857, -0.4288, -0.3365, 0.1684]) tensor([0.6341, 0.0935, 0.1025, 0.1699]) -Greedy action tensor([ 0.9561, -0.1645, -0.5126, 0.4071]) tensor([0.4686, 0.1528, 0.1079, 0.2707]) -Greedy action tensor([ 1.1439, -0.2327, -0.3625, 0.0922]) tensor([0.5484, 0.1384, 0.1216, 0.1916]) -Greedy action tensor([ 1.3484, -0.1180, -0.1940, 0.4735]) tensor([0.5372, 0.1240, 0.1149, 0.2240]) -Greedy action tensor([ 1.3456, -0.3785, -0.5755, 0.1945]) tensor([0.6094, 0.1087, 0.0892, 0.1927]) -Greedy action tensor([ 1.8595, -1.1568, -0.5421, 0.9074]) tensor([0.6555, 0.0321, 0.0594, 0.2530]) -Greedy action tensor([ 0.9960, -0.3463, -0.2534, 0.3971]) tensor([0.4768, 0.1246, 0.1367, 0.2620]) -Greedy action tensor([ 1.4163, -0.2837, -0.5917, 0.1770]) tensor([0.6225, 0.1137, 0.0836, 0.1803]) -Greedy action tensor([ 0.8906, -0.3592, -0.1047, 0.2481]) tensor([0.4583, 0.1313, 0.1694, 0.2410]) -Greedy action tensor([ 1.5048, -0.7850, 0.1514, 0.4443]) tensor([0.5862, 0.0594, 0.1515, 0.2030]) -Greedy action tensor([ 1.1864, 0.3671, -0.9092, 0.1321]) tensor([0.5229, 0.2305, 0.0643, 0.1822]) -Greedy action tensor([ 1.1816, -0.3238, -0.5567, 0.3273]) tensor([0.5485, 0.1217, 0.0964, 0.2334]) -Greedy action tensor([ 1.1975, -0.2046, -0.2188, 0.3544]) tensor([0.5211, 0.1282, 0.1264, 0.2243]) -Greedy action tensor([ 1.3967, -0.3573, -0.3298, 0.2392]) tensor([0.6005, 0.1039, 0.1068, 0.1887]) -Greedy action tensor([ 0.4640, -0.0506, -0.1968, 0.0235]) tensor([0.3626, 0.2167, 0.1873, 0.2334]) -Greedy action tensor([ 1.2351, -0.0404, -0.3060, 0.2855]) tensor([0.5318, 0.1485, 0.1139, 0.2058]) -Greedy action tensor([ 0.8787, -0.5353, -0.2387, 0.2723]) tensor([0.4727, 0.1149, 0.1546, 0.2578]) -Greedy action tensor([ 1.4105, -0.3992, -0.2704, 0.2106]) tensor([0.6056, 0.0991, 0.1128, 0.1824]) -Greedy action tensor([ 0.8428, 0.1887, -0.2254, 0.2543]) tensor([0.4134, 0.2150, 0.1421, 0.2295]) -Greedy action tensor([ 1.4774, -0.4378, -0.7664, 1.2352]) tensor([0.4906, 0.0723, 0.0520, 0.3851]) -Greedy action tensor([ 1.7843, -0.4534, -0.2944, 0.1333]) tensor([0.7024, 0.0750, 0.0879, 0.1348]) -Greedy action tensor([ 1.1597, -0.3843, -0.4041, 0.4258]) tensor([0.5255, 0.1122, 0.1100, 0.2523]) -Greedy action tensor([ 0.6236, -0.3549, 0.0230, 0.0301]) tensor([0.4037, 0.1518, 0.2214, 0.2230]) -Greedy action tensor([ 0.5898, -0.1420, -0.2020, 0.2251]) tensor([0.3805, 0.1830, 0.1723, 0.2642]) -Greedy action tensor([ 1.4621, -0.7323, -0.1972, 0.2799]) tensor([0.6218, 0.0693, 0.1183, 0.1906]) -Greedy action tensor([ 1.6696, -0.5856, -0.2271, 0.4199]) tensor([0.6487, 0.0680, 0.0973, 0.1859]) -Greedy action tensor([ 1.4683, -0.4559, -0.2003, 0.2183]) tensor([0.6169, 0.0901, 0.1163, 0.1767]) -Greedy action tensor([ 1.5626, -0.4290, -0.3305, 0.7036]) tensor([0.5846, 0.0798, 0.0880, 0.2476]) -Greedy action tensor([ 1.7847, -0.9390, -0.4574, 0.3245]) tensor([0.7122, 0.0467, 0.0757, 0.1654]) -Greedy action tensor([ 1.3134, -0.5010, -0.4236, 0.1314]) tensor([0.6077, 0.0990, 0.1070, 0.1864]) -Greedy action tensor([ 1.4234, -0.2874, -0.3470, 0.6806]) tensor([0.5474, 0.0989, 0.0932, 0.2604]) -Greedy action tensor([ 2.1277, 0.1049, -0.0815, 0.1596]) tensor([0.7237, 0.0957, 0.0795, 0.1011]) -Greedy action tensor([ 0.8729, -0.5643, -0.3981, 0.3851]) tensor([0.4690, 0.1114, 0.1316, 0.2880]) -Greedy action tensor([ 1.3552, -0.7230, -0.3696, 0.5240]) tensor([0.5751, 0.0720, 0.1025, 0.2505]) -Greedy action tensor([ 1.0778, -0.0599, -0.6700, 0.0255]) tensor([0.5423, 0.1739, 0.0945, 0.1893]) -Greedy action tensor([ 1.4870, -0.4496, -0.2855, 0.3982]) tensor([0.6058, 0.0874, 0.1029, 0.2039]) -Greedy action tensor([ 1.6804, -0.5968, -0.2232, 0.4442]) tensor([0.6485, 0.0665, 0.0966, 0.1884]) -Greedy action tensor([ 1.7542, -0.2060, -0.3099, 0.0701]) tensor([0.6881, 0.0969, 0.0873, 0.1277]) -Greedy action tensor([ 0.8659, 0.1553, -0.2843, 0.2550]) tensor([0.4254, 0.2090, 0.1347, 0.2309]) -Greedy action tensor([ 1.1229, -0.2271, -0.2990, 0.5172]) tensor([0.4887, 0.1267, 0.1179, 0.2667]) -Greedy action tensor([ 1.2995, -0.1563, 0.0613, -0.1750]) tensor([0.5708, 0.1331, 0.1655, 0.1306]) -Greedy action tensor([ 1.5354, -0.1855, -0.6467, 0.7292]) tensor([0.5753, 0.1029, 0.0649, 0.2569]) -Greedy action tensor([ 1.4486, -0.3398, -0.4074, 0.5761]) tensor([0.5742, 0.0960, 0.0898, 0.2400]) -Greedy action tensor([ 0.9206, -0.4297, -0.0324, 0.1915]) tensor([0.4701, 0.1218, 0.1813, 0.2268]) -Greedy action tensor([ 1.1987, -0.3264, -0.4398, 0.3977]) tensor([0.5374, 0.1169, 0.1044, 0.2412]) -Greedy action tensor([ 0.1191, -0.6094, -0.1343, 0.0944]) tensor([0.3092, 0.1492, 0.2400, 0.3016]) -Greedy action tensor([ 1.6119, -0.7485, -0.4819, 0.6799]) tensor([0.6206, 0.0586, 0.0765, 0.2444]) -Greedy action tensor([ 1.2830, -0.9486, -0.1924, 0.1915]) tensor([0.5982, 0.0642, 0.1368, 0.2008]) -Greedy action tensor([ 0.9307, -0.2478, -0.1011, 0.5470]) tensor([0.4264, 0.1312, 0.1519, 0.2905]) -Greedy action tensor([ 1.5416, -0.7807, -0.2185, 0.3126]) tensor([0.6399, 0.0627, 0.1101, 0.1872]) -Greedy action tensor([ 1.5458, -0.3000, -0.6565, 0.6035]) tensor([0.6031, 0.0952, 0.0667, 0.2350]) -Greedy action tensor([ 0.8196, -0.2332, -0.6159, 0.6627]) tensor([0.4095, 0.1429, 0.0975, 0.3501]) -Greedy action tensor([ 1.5815, 0.0249, -0.3909, 0.3751]) tensor([0.6063, 0.1279, 0.0844, 0.1815]) -Greedy action tensor([ 0.4719, -0.4432, 0.1848, 0.0287]) tensor([0.3580, 0.1434, 0.2687, 0.2299]) -Greedy action tensor([ 0.8111, -0.2060, -0.0294, -0.1782]) tensor([0.4619, 0.1670, 0.1993, 0.1717]) -Greedy action tensor([ 0.7736, -0.3468, -0.0138, -0.0645]) tensor([0.4517, 0.1473, 0.2056, 0.1954]) -Greedy action tensor([ 1.3236, -0.5601, -0.3130, 0.0318]) tensor([0.6167, 0.0938, 0.1200, 0.1695]) -Greedy action tensor([ 1.9535, -0.9667, -0.3800, 0.8430]) tensor([0.6756, 0.0364, 0.0655, 0.2225]) -Greedy action tensor([ 1.4247, -0.4368, -0.5073, 0.4635]) tensor([0.5943, 0.0924, 0.0861, 0.2273]) -Greedy action tensor([ 1.0336, -0.4269, -0.0764, 0.1162]) tensor([0.5099, 0.1184, 0.1680, 0.2037]) -Greedy action tensor([ 2.0017, -1.1130, -0.1449, 0.4969]) tensor([0.7229, 0.0321, 0.0845, 0.1605]) -Greedy action tensor([ 1.6891, -0.0697, -0.3158, 0.2256]) tensor([0.6500, 0.1120, 0.0875, 0.1504]) -Greedy action tensor([ 0.4985, -0.2883, -0.1391, 0.2223]) tensor([0.3646, 0.1660, 0.1927, 0.2766]) -Greedy action tensor([ 1.1332, -0.5214, -0.0964, 0.1040]) tensor([0.5432, 0.1038, 0.1588, 0.1941]) -Greedy action tensor([ 1.6094, -0.6013, -0.4295, 0.4832]) tensor([0.6394, 0.0701, 0.0832, 0.2073]) -Greedy action tensor([ 1.7260, 0.2447, -0.0330, 0.0413]) tensor([0.6309, 0.1434, 0.1086, 0.1170]) -Greedy action tensor([ 1.3275, -0.2477, -0.6001, 0.1989]) tensor([0.5967, 0.1235, 0.0868, 0.1930]) -Greedy action tensor([ 1.3786, -0.4969, -0.5548, 0.7714]) tensor([0.5427, 0.0832, 0.0785, 0.2957]) -Greedy action tensor([ 0.8058, -0.2815, 0.1498, 0.4084]) tensor([0.3956, 0.1334, 0.2053, 0.2658]) -Greedy action tensor([ 0.5297, -1.0812, 0.6378, 0.6910]) tensor([0.2866, 0.0572, 0.3193, 0.3368]) -Greedy action tensor([ 0.4161, -1.2126, -0.9617, 0.4452]) tensor([0.4036, 0.0792, 0.1018, 0.4155]) -Greedy action tensor([-0.1315, -0.3092, 0.5842, -0.7917]) tensor([0.2273, 0.1903, 0.4650, 0.1175]) -Greedy action tensor([ 2.0249, -1.6319, 0.7092, 0.3436]) tensor([0.6756, 0.0174, 0.1813, 0.1257]) -Greedy action tensor([-0.4334, -0.6282, -0.3351, -0.6653]) tensor([0.2689, 0.2213, 0.2966, 0.2132]) -Greedy action tensor([-0.1716, 0.0652, -0.8230, -0.8586]) tensor([0.3038, 0.3850, 0.1584, 0.1528]) -Greedy action tensor([ 0.2555, -0.1458, -0.4264, -1.0241]) tensor([0.4076, 0.2729, 0.2061, 0.1134]) -Greedy action tensor([ 1.6459, -0.6762, 1.4281, -0.0612]) tensor([0.4799, 0.0471, 0.3860, 0.0871]) -Greedy action tensor([-0.3164, -1.3645, 0.6566, -0.4298]) tensor([0.2045, 0.0717, 0.5412, 0.1826]) -Greedy action tensor([1.2314, 0.9287, 0.5658, 0.3165]) tensor([0.3769, 0.2784, 0.1937, 0.1510]) -Greedy action tensor([-1.0493, -0.8060, 1.5704, -0.4255]) tensor([0.0559, 0.0714, 0.7683, 0.1044]) -Greedy action tensor([ 0.3262, -0.5000, 0.0688, 0.8895]) tensor([0.2521, 0.1103, 0.1949, 0.4427]) -Greedy action tensor([ 0.8130, -0.2390, 0.4816, 1.0771]) tensor([0.2968, 0.1036, 0.2131, 0.3865]) -Greedy action tensor([-1.0552, -1.0448, 0.8697, -0.9502]) tensor([0.1002, 0.1013, 0.6871, 0.1113]) -Greedy action tensor([ 0.1023, -1.3971, 1.2743, -0.4949]) tensor([0.1999, 0.0446, 0.6454, 0.1100]) -Greedy action tensor([-0.7955, 0.4328, 2.3437, -0.6311]) tensor([0.0349, 0.1191, 0.8049, 0.0411]) -Greedy action tensor([ 0.4602, 0.4611, 0.3409, -0.5599]) tensor([0.3078, 0.3081, 0.2732, 0.1110]) -Greedy action tensor([-0.4823, -1.6635, 0.4099, -0.0110]) tensor([0.1869, 0.0574, 0.4562, 0.2995]) -Greedy action tensor([ 0.0249, -1.0809, -0.0134, -0.3676]) tensor([0.3368, 0.1115, 0.3242, 0.2275]) -Greedy action tensor([ 2.3366, -0.4978, 0.6662, 1.8944]) tensor([0.5292, 0.0311, 0.0996, 0.3401]) -Greedy action tensor([ 0.8684, 0.7890, 0.1601, -0.2553]) tensor([0.3648, 0.3370, 0.1797, 0.1186]) -Greedy action tensor([ 0.8749, -1.3301, -0.3735, 0.1723]) tensor([0.5284, 0.0583, 0.1516, 0.2617]) -Greedy action tensor([ 0.3690, -0.7937, -0.2745, 0.7337]) tensor([0.3050, 0.0954, 0.1603, 0.4393]) -Greedy action tensor([-1.7613, -1.4786, 1.8446, -0.6499]) tensor([0.0237, 0.0315, 0.8728, 0.0720]) -Greedy action tensor([0.5584, 0.7830, 0.4697, 0.9234]) tensor([0.2171, 0.2717, 0.1986, 0.3126]) -Greedy action tensor([ 1.3009, -1.1057, -0.2483, 0.8072]) tensor([0.5228, 0.0471, 0.1110, 0.3191]) -Greedy action tensor([ 1.1500, -0.0265, 0.7324, 0.6781]) tensor([0.3860, 0.1190, 0.2542, 0.2408]) -Greedy action tensor([ 0.9475, -1.5987, 0.1635, 0.8578]) tensor([0.4083, 0.0320, 0.1864, 0.3733]) -Greedy action tensor([-0.4412, -0.2894, 0.4648, 0.1265]) tensor([0.1562, 0.1818, 0.3865, 0.2755]) -Greedy action tensor([ 0.3804, -0.4029, 0.1673, 0.1283]) tensor([0.3287, 0.1502, 0.2656, 0.2555]) -Greedy action tensor([ 0.4779, -0.2710, 0.5812, -0.3999]) tensor([0.3336, 0.1578, 0.3699, 0.1387]) -Greedy action tensor([-0.8278, -0.7858, 0.5706, -0.4597]) tensor([0.1327, 0.1384, 0.5372, 0.1917]) -Greedy action tensor([ 0.2188, -1.0992, -0.0674, 0.0811]) tensor([0.3460, 0.0926, 0.2599, 0.3015]) -Greedy action tensor([-0.0182, -0.9613, -0.1767, 0.1779]) tensor([0.2890, 0.1126, 0.2467, 0.3517]) -Greedy action tensor([-0.7197, -0.0421, 0.2590, -0.2755]) tensor([0.1391, 0.2739, 0.3701, 0.2169]) -Greedy action tensor([-0.1466, -0.7155, 0.0015, -0.5460]) tensor([0.2944, 0.1667, 0.3414, 0.1975]) -Greedy action tensor([-0.9415, -1.0760, 0.5193, -0.4553]) tensor([0.1280, 0.1119, 0.5518, 0.2082]) -Greedy action tensor([ 0.7343, 0.3167, 0.7040, -0.2371]) tensor([0.3325, 0.2190, 0.3226, 0.1259]) -Greedy action tensor([ 0.7215, 0.4403, -0.3824, 0.1851]) tensor([0.3744, 0.2826, 0.1241, 0.2189]) -Greedy action tensor([-0.6023, -0.9147, -0.6047, 0.4362]) tensor([0.1800, 0.1317, 0.1796, 0.5086]) -Greedy action tensor([-0.0870, -2.4883, 0.5253, 0.6833]) tensor([0.1962, 0.0178, 0.3620, 0.4240]) -Greedy action tensor([0.7117, 0.5989, 0.6683, 0.0600]) tensor([0.2965, 0.2649, 0.2840, 0.1546]) -Greedy action tensor([-0.0742, -2.2843, 0.4412, 0.2718]) tensor([0.2383, 0.0261, 0.3989, 0.3367]) -Greedy action tensor([-0.3854, -0.6620, 2.6896, -0.9880]) tensor([0.0417, 0.0317, 0.9038, 0.0228]) -Greedy action tensor([-0.7925, -0.1988, 1.5951, -1.3348]) tensor([0.0700, 0.1268, 0.7624, 0.0407]) -Greedy action tensor([-1.2486, -0.1896, 0.3695, -0.8310]) tensor([0.0957, 0.2760, 0.4829, 0.1454]) -Greedy action tensor([1.0702, 0.3243, 1.1640, 0.5573]) tensor([0.3153, 0.1496, 0.3463, 0.1888]) -Greedy action tensor([ 0.0365, -0.7063, 0.1711, -0.3525]) tensor([0.3033, 0.1443, 0.3469, 0.2055]) -Greedy action tensor([0.0254, 0.8267, 0.0642, 1.6483]) tensor([0.1071, 0.2387, 0.1114, 0.5428]) -Greedy action tensor([-0.3581, 0.0853, -0.3119, -0.0812]) tensor([0.2031, 0.3164, 0.2127, 0.2679]) -Greedy action tensor([-1.3917, -0.2628, -1.0982, -0.6409]) tensor([0.1324, 0.4095, 0.1776, 0.2805]) -Greedy action tensor([-0.1269, -0.1468, -0.4724, 1.6188]) tensor([0.1188, 0.1165, 0.0841, 0.6807]) -Greedy action tensor([ 0.0790, -0.5339, 1.3475, 0.7119]) tensor([0.1433, 0.0776, 0.5094, 0.2698]) -Greedy action tensor([-0.5867, -1.7984, 0.5912, 0.8327]) tensor([0.1152, 0.0343, 0.3742, 0.4763]) -Greedy action tensor([-0.7161, -1.9847, 0.7207, 0.3698]) tensor([0.1183, 0.0333, 0.4979, 0.3505]) -Greedy action tensor([1.1132, 0.2955, 1.7334, 0.0378]) tensor([0.2746, 0.1212, 0.5105, 0.0937]) -Greedy action tensor([-0.4476, -1.8686, 0.8440, 1.1169]) tensor([0.1035, 0.0250, 0.3767, 0.4948]) -Greedy action tensor([-0.1297, -0.3329, -0.2381, 0.0740]) tensor([0.2538, 0.2072, 0.2278, 0.3112]) -Greedy action tensor([ 0.0425, -1.2545, 0.1731, 1.0885]) tensor([0.1901, 0.0520, 0.2167, 0.5412]) -Greedy action tensor([ 1.9572, -0.1574, 1.4487, 0.3791]) tensor([0.5186, 0.0626, 0.3118, 0.1070]) -Greedy action tensor([-0.3140, -1.1962, 0.3166, 0.7382]) tensor([0.1624, 0.0672, 0.3052, 0.4652]) -Greedy action tensor([ 1.1498, -1.1248, 0.2636, -0.3768]) tensor([0.5773, 0.0594, 0.2380, 0.1254]) -Greedy action tensor([-0.1163, -0.5637, -0.4736, 0.0945]) tensor([0.2798, 0.1789, 0.1958, 0.3455]) -Greedy action tensor([-1.1435, -1.2449, -1.1214, 0.7089]) tensor([0.1075, 0.0971, 0.1099, 0.6854]) -Greedy action tensor([-0.1345, -0.5087, -0.0759, -0.6632]) tensor([0.2996, 0.2061, 0.3177, 0.1766]) -Greedy action tensor([ 0.8200, 0.2283, 0.7562, -0.1395]) tensor([0.3479, 0.1925, 0.3264, 0.1333]) -Greedy action tensor([ 1.4109, -0.8012, 0.3236, 0.3960]) tensor([0.5528, 0.0605, 0.1863, 0.2004]) -Greedy action tensor([-1.3631, -0.1333, 1.3457, -0.2486]) tensor([0.0445, 0.1522, 0.6678, 0.1356]) -Greedy action tensor([ 0.7392, -0.9059, 1.9892, 0.8388]) tensor([0.1728, 0.0333, 0.6030, 0.1909]) -Greedy action tensor([ 0.4466, 0.2485, 0.5345, -0.2371]) tensor([0.2927, 0.2401, 0.3196, 0.1477]) -Greedy action tensor([-0.9661, -0.6421, 1.6401, -0.5793]) tensor([0.0575, 0.0795, 0.7785, 0.0846]) -Greedy action tensor([-1.1498, 1.0607, -0.8447, -0.3556]) tensor([0.0730, 0.6662, 0.0991, 0.1616]) -Greedy action tensor([ 0.6300, -1.2046, 0.8079, -0.5055]) tensor([0.3737, 0.0597, 0.4465, 0.1201]) -Greedy action tensor([-0.0706, -0.9947, 0.0989, 0.0774]) tensor([0.2673, 0.1061, 0.3167, 0.3099]) -Greedy action tensor([ 0.7273, -2.1701, -0.4288, 0.6977]) tensor([0.4272, 0.0236, 0.1345, 0.4148]) -Greedy action tensor([ 0.9873, -0.4659, -0.6507, 1.0182]) tensor([0.4066, 0.0951, 0.0790, 0.4193]) -Greedy action tensor([ 0.3504, 0.1109, -0.1153, 0.9217]) tensor([0.2389, 0.1880, 0.1500, 0.4231]) -Greedy action tensor([-0.3538, -0.3869, -0.7675, 0.0733]) tensor([0.2403, 0.2325, 0.1589, 0.3683]) -Greedy action tensor([ 0.1181, 0.6725, -0.2704, 0.5013]) tensor([0.2047, 0.3563, 0.1388, 0.3002]) -Greedy action tensor([ 0.1780, 0.3396, -0.2028, 1.1557]) tensor([0.1813, 0.2130, 0.1239, 0.4819]) -Greedy action tensor([ 0.1379, -0.3227, -0.6510, -0.5804]) tensor([0.3887, 0.2452, 0.1766, 0.1895]) -Greedy action tensor([ 0.6054, -0.5962, -0.1927, -0.1478]) tensor([0.4501, 0.1353, 0.2026, 0.2119]) -Greedy action tensor([ 1.0681, -1.2172, 0.0956, -0.7310]) tensor([0.6078, 0.0618, 0.2298, 0.1006]) -Greedy action tensor([ 0.3860, -0.1795, -0.0700, -0.3221]) tensor([0.3711, 0.2108, 0.2352, 0.1828]) -Greedy action tensor([ 0.8474, -0.4379, 0.1063, -0.5520]) tensor([0.5000, 0.1383, 0.2383, 0.1234]) -Greedy action tensor([ 0.9055, -0.6155, 0.0309, -0.4045]) tensor([0.5248, 0.1147, 0.2189, 0.1416]) -Greedy action tensor([ 0.4121, 0.0611, -0.1459, -0.0088]) tensor([0.3410, 0.2400, 0.1951, 0.2238]) -Greedy action tensor([ 0.7998, -0.5900, -0.0851, -0.4782]) tensor([0.5153, 0.1284, 0.2127, 0.1436]) -Greedy action tensor([ 0.4247, -0.7443, -0.2207, -0.2755]) tensor([0.4289, 0.1332, 0.2249, 0.2129]) -Greedy action tensor([ 0.9273, -0.1309, -0.0447, -0.1375]) tensor([0.4830, 0.1677, 0.1827, 0.1665]) -Greedy action tensor([ 0.5847, -0.7096, -0.1399, -0.2213]) tensor([0.4535, 0.1243, 0.2197, 0.2025]) -Greedy action tensor([ 0.4605, -0.0356, 0.0385, -0.2745]) tensor([0.3644, 0.2219, 0.2390, 0.1747]) -Greedy action tensor([ 0.7306, -0.6033, -0.0535, -0.2078]) tensor([0.4737, 0.1248, 0.2162, 0.1853]) -Greedy action tensor([ 0.9780, -0.7459, 0.0601, -0.4796]) tensor([0.5523, 0.0985, 0.2206, 0.1286]) -Greedy action tensor([ 0.1208, 0.2580, -0.0636, -0.2431]) tensor([0.2722, 0.3122, 0.2264, 0.1892]) -Greedy action tensor([ 0.3295, -0.2107, -0.0599, -0.1621]) tensor([0.3482, 0.2029, 0.2359, 0.2130]) -Greedy action tensor([ 0.9449, -0.8892, -0.0370, -0.4740]) tensor([0.5630, 0.0899, 0.2109, 0.1362]) -Greedy action tensor([ 0.9554, -0.3713, 0.0667, -0.4310]) tensor([0.5191, 0.1377, 0.2134, 0.1298]) -Greedy action tensor([ 0.3585, -0.2204, 0.0300, -0.0448]) tensor([0.3391, 0.1901, 0.2442, 0.2266]) -Greedy action tensor([ 0.8146, -0.4768, 0.0749, -0.5266]) tensor([0.4966, 0.1365, 0.2370, 0.1299]) -Greedy action tensor([ 0.5804, -0.4989, -0.0096, -0.2951]) tensor([0.4327, 0.1471, 0.2399, 0.1803]) -Greedy action tensor([ 0.4158, -0.1198, -0.1165, -0.0911]) tensor([0.3604, 0.2109, 0.2116, 0.2171]) -Greedy action tensor([ 0.4028, -0.0105, -0.0846, -0.1917]) tensor([0.3537, 0.2339, 0.2172, 0.1952]) -Greedy action tensor([ 0.4647, 0.0012, -0.1683, -0.1126]) tensor([0.3674, 0.2312, 0.1951, 0.2063]) -Greedy action tensor([ 0.1964, 0.2738, -0.1511, -0.3544]) tensor([0.2973, 0.3212, 0.2100, 0.1714]) -Greedy action tensor([ 0.7707, -0.4002, -0.0638, -0.3461]) tensor([0.4827, 0.1497, 0.2096, 0.1580]) -Greedy action tensor([ 0.3485, -0.0672, 0.0416, -0.1497]) tensor([0.3330, 0.2197, 0.2450, 0.2023]) -Greedy action tensor([ 0.7466, -0.4279, -0.0764, -0.4183]) tensor([0.4854, 0.1500, 0.2132, 0.1514]) -Greedy action tensor([ 0.3696, 0.1391, -0.1626, 0.0865]) tensor([0.3190, 0.2533, 0.1873, 0.2403]) -Greedy action tensor([ 0.2768, -0.1031, -0.0479, -0.2299]) tensor([0.3323, 0.2273, 0.2402, 0.2002]) -Greedy action tensor([ 0.5557, -0.5039, -0.1210, -0.1035]) tensor([0.4216, 0.1461, 0.2143, 0.2180]) -Greedy action tensor([ 0.4302, -0.4865, -0.1784, -0.2167]) tensor([0.4053, 0.1620, 0.2205, 0.2122]) -Greedy action tensor([ 0.7642, -0.5791, -0.0032, -0.5916]) tensor([0.5043, 0.1316, 0.2341, 0.1300]) -Greedy action tensor([ 0.7436, -0.6614, -0.1045, -0.2942]) tensor([0.4931, 0.1210, 0.2112, 0.1747]) -Greedy action tensor([ 0.4954, -0.1615, 0.0163, -0.2608]) tensor([0.3835, 0.1989, 0.2376, 0.1800]) -Greedy action tensor([ 0.4623, 0.0805, -0.0140, -0.4755]) tensor([0.3710, 0.2533, 0.2304, 0.1453]) -Greedy action tensor([ 0.6958, -0.1309, -0.0228, -0.0669]) tensor([0.4182, 0.1829, 0.2038, 0.1950]) -Greedy action tensor([ 0.4113, 0.0504, -0.1345, -0.3327]) tensor([0.3634, 0.2533, 0.2106, 0.1727]) -Greedy action tensor([ 0.3946, 0.0719, -0.0856, 0.0042]) tensor([0.3312, 0.2398, 0.2049, 0.2241]) -Greedy action tensor([ 0.6610, -0.3836, -0.0148, -0.3788]) tensor([0.4516, 0.1589, 0.2298, 0.1597]) -Greedy action tensor([ 0.4256, -0.4544, -0.1173, -0.1346]) tensor([0.3896, 0.1616, 0.2264, 0.2225]) -Greedy action tensor([ 0.5707, -0.1592, 0.2179, -0.3495]) tensor([0.3871, 0.1866, 0.2720, 0.1543]) -Greedy action tensor([ 0.4395, -0.2114, -0.0578, -0.1080]) tensor([0.3693, 0.1926, 0.2246, 0.2136]) -Greedy action tensor([ 0.4212, -0.0143, -0.0627, -0.2244]) tensor([0.3587, 0.2321, 0.2211, 0.1881]) -Greedy action tensor([ 0.7502, -0.2815, -0.0609, -0.2303]) tensor([0.4596, 0.1638, 0.2042, 0.1724]) -Greedy action tensor([ 0.8125, -0.4506, -0.2851, -0.3924]) tensor([0.5219, 0.1476, 0.1741, 0.1564]) -Greedy action tensor([ 0.7256, -0.4704, -0.1317, -0.2817]) tensor([0.4780, 0.1446, 0.2028, 0.1746]) -Greedy action tensor([ 0.3594, 0.1149, -0.1208, -0.0138]) tensor([0.3236, 0.2534, 0.2002, 0.2228]) -Greedy action tensor([ 0.2391, 0.0461, -0.0482, -0.2300]) tensor([0.3125, 0.2576, 0.2344, 0.1955]) -Greedy action tensor([ 0.7690, -0.3611, 0.0602, -0.1923]) tensor([0.4550, 0.1470, 0.2240, 0.1740]) -Greedy action tensor([ 0.4844, -0.1345, 0.0143, -0.0734]) tensor([0.3655, 0.1968, 0.2284, 0.2092]) -Greedy action tensor([ 0.7557, -0.4794, 0.0405, -0.4684]) tensor([0.4822, 0.1402, 0.2358, 0.1418]) -Greedy action tensor([ 0.4145, -0.0745, -0.1240, -0.0268]) tensor([0.3521, 0.2159, 0.2055, 0.2265]) -Greedy action tensor([ 0.4702, 0.1491, -0.0740, -0.0198]) tensor([0.3427, 0.2485, 0.1989, 0.2099]) -Greedy action tensor([ 0.4263, -0.0158, 0.0941, -0.3752]) tensor([0.3560, 0.2288, 0.2554, 0.1597]) -Greedy action tensor([ 0.4115, -0.1802, 0.1028, -0.2523]) tensor([0.3568, 0.1975, 0.2620, 0.1837]) -Greedy action tensor([ 0.7757, -0.0724, 0.0489, 0.1212]) tensor([0.4113, 0.1761, 0.1988, 0.2137]) -Greedy action tensor([ 0.5208, -0.1361, 0.1928, -0.3401]) tensor([0.3757, 0.1948, 0.2707, 0.1588]) -Greedy action tensor([ 0.5836, -0.0957, -0.0008, -0.5084]) tensor([0.4167, 0.2112, 0.2323, 0.1398]) -Greedy action tensor([ 0.4437, -0.1291, -0.0510, -0.2504]) tensor([0.3741, 0.2110, 0.2281, 0.1869]) -Greedy action tensor([ 0.7097, -0.2314, 0.1185, -0.3672]) tensor([0.4377, 0.1708, 0.2423, 0.1491]) -Greedy action tensor([ 0.4814, -0.1191, -0.0333, -0.2679]) tensor([0.3818, 0.2094, 0.2282, 0.1805]) -Greedy action tensor([ 0.7968, -0.9557, -0.1860, -0.3161]) tensor([0.5330, 0.0924, 0.1995, 0.1751]) -Greedy action tensor([ 0.5091, -0.3457, -0.0785, -0.6755]) tensor([0.4373, 0.1860, 0.2430, 0.1337]) -Greedy action tensor([ 0.9315, -0.9523, 0.0464, -0.5250]) tensor([0.5563, 0.0846, 0.2295, 0.1296]) -Greedy action tensor([ 0.4078, -0.4074, -0.1237, -0.0709]) tensor([0.3774, 0.1670, 0.2218, 0.2338]) -Greedy action tensor([ 0.3083, 0.2590, -0.1058, -0.1556]) tensor([0.3085, 0.2937, 0.2039, 0.1940]) -Greedy action tensor([ 0.3022, 0.2393, 0.1246, -0.2607]) tensor([0.2989, 0.2807, 0.2502, 0.1702]) -Greedy action tensor([ 0.3948, 0.0761, -0.1720, -0.1870]) tensor([0.3505, 0.2548, 0.1988, 0.1959]) -Greedy action tensor([0.2561, 0.2911, 0.0878, 0.1901]) tensor([0.2620, 0.2713, 0.2214, 0.2453]) -Greedy action tensor([ 0.4356, -0.1512, 0.0630, -0.2512]) tensor([0.3639, 0.2024, 0.2507, 0.1831]) -Greedy action tensor([ 0.8581, 0.0646, -0.1782, -0.3297]) tensor([0.4735, 0.2141, 0.1680, 0.1444]) -Greedy action tensor([ 1.0927, -0.8335, -0.0928, -0.7454]) tensor([0.6210, 0.0905, 0.1897, 0.0988]) -Greedy action tensor([ 0.5812, -0.2659, -0.0756, -0.1358]) tensor([0.4106, 0.1760, 0.2129, 0.2005]) -Greedy action tensor([ 0.5574, -0.2353, 0.1307, -0.6033]) tensor([0.4135, 0.1871, 0.2698, 0.1295]) -Greedy action tensor([ 0.2512, -0.0937, -0.0432, -0.3294]) tensor([0.3319, 0.2351, 0.2473, 0.1857]) -Greedy action tensor([ 0.4000, -0.1055, -0.1349, -0.2356]) tensor([0.3678, 0.2219, 0.2155, 0.1948]) -Greedy action tensor([ 0.8582, -0.7179, -0.1106, -0.2935]) tensor([0.5257, 0.1087, 0.1995, 0.1662]) -Greedy action tensor([ 0.8454, -0.4929, 0.0376, -0.3489]) tensor([0.4973, 0.1304, 0.2217, 0.1506]) -Greedy action tensor([ 0.6502, -0.4692, -0.0083, -0.3567]) tensor([0.4526, 0.1478, 0.2343, 0.1654]) -Greedy action tensor([ 0.5452, -0.4760, -0.0915, -0.2438]) tensor([0.4267, 0.1537, 0.2258, 0.1939]) -Greedy action tensor([ 8.1695e-01, -5.6155e-01, 3.6548e-04, -4.9072e-01]) tensor([0.5091, 0.1283, 0.2250, 0.1377]) -Greedy action tensor([ 1.7052, -0.8490, 0.1568, 0.1415]) tensor([0.6668, 0.0518, 0.1417, 0.1396]) -Greedy action tensor([ 1.0827, -0.2653, -0.1042, -0.2260]) tensor([0.5449, 0.1415, 0.1663, 0.1472]) -Greedy action tensor([ 1.0353, -0.0729, -0.1131, -0.1753]) tensor([0.5141, 0.1697, 0.1630, 0.1532]) -Greedy action tensor([ 1.3199, -0.3044, -0.5017, 0.1344]) tensor([0.6008, 0.1184, 0.0972, 0.1836]) -Greedy action tensor([ 1.2315, -0.5871, 0.0456, 0.0286]) tensor([0.5656, 0.0918, 0.1728, 0.1699]) -Greedy action tensor([ 2.0375, -1.0761, -0.4611, 0.1767]) tensor([0.7799, 0.0347, 0.0641, 0.1213]) -Greedy action tensor([ 0.6515, -0.2478, -0.1528, 0.0658]) tensor([0.4148, 0.1688, 0.1856, 0.2309]) -Greedy action tensor([ 1.6124, -0.6826, -0.2774, 0.2833]) tensor([0.6594, 0.0664, 0.0996, 0.1745]) -Greedy action tensor([ 1.7580, -0.7599, 0.2032, -0.0370]) tensor([0.6859, 0.0553, 0.1449, 0.1139]) -Greedy action tensor([ 2.5042, -1.5097, 0.1762, 0.0632]) tensor([0.8315, 0.0150, 0.0811, 0.0724]) -Greedy action tensor([ 0.5069, -0.1013, 0.1384, -0.1100]) tensor([0.3603, 0.1961, 0.2492, 0.1944]) -Greedy action tensor([ 1.5435, -0.7008, -0.1029, 0.3119]) tensor([0.6287, 0.0666, 0.1212, 0.1835]) -Greedy action tensor([ 1.5458, -0.4283, -0.3492, 0.4563]) tensor([0.6152, 0.0854, 0.0925, 0.2069]) -Greedy action tensor([ 0.7890, -0.0542, -0.0443, 0.2988]) tensor([0.4036, 0.1737, 0.1754, 0.2472]) -Greedy action tensor([ 1.4044, -0.2979, -0.4243, 0.2443]) tensor([0.6037, 0.1100, 0.0970, 0.1892]) -Greedy action tensor([ 0.8828, -0.6706, -0.5270, 0.5202]) tensor([0.4648, 0.0983, 0.1135, 0.3234]) -Greedy action tensor([ 1.0565, -0.2654, -0.1995, 0.2424]) tensor([0.5014, 0.1337, 0.1428, 0.2221]) -Greedy action tensor([ 0.5353, -0.4968, -0.4478, 0.2311]) tensor([0.4052, 0.1443, 0.1516, 0.2989]) -Greedy action tensor([ 0.8492, -0.3678, -0.4304, 0.3434]) tensor([0.4593, 0.1360, 0.1277, 0.2770]) -Greedy action tensor([ 1.4494, -0.8158, -0.3561, 0.0919]) tensor([0.6555, 0.0681, 0.1078, 0.1687]) -Greedy action tensor([ 1.3029, -0.7091, 0.0854, -0.1611]) tensor([0.6020, 0.0805, 0.1782, 0.1393]) -Greedy action tensor([ 1.0324, -0.3829, -0.2239, 0.0643]) tensor([0.5243, 0.1273, 0.1493, 0.1991]) -Greedy action tensor([ 1.3614, -0.4565, -0.1779, 0.3379]) tensor([0.5760, 0.0935, 0.1236, 0.2070]) -Greedy action tensor([ 1.5021, -0.6230, -0.1829, 0.0406]) tensor([0.6507, 0.0777, 0.1207, 0.1509]) -Greedy action tensor([ 1.2330, -0.0199, -0.2124, 0.2671]) tensor([0.5258, 0.1502, 0.1239, 0.2001]) -Greedy action tensor([ 0.5774, -0.4253, 0.2635, -0.0943]) tensor([0.3834, 0.1407, 0.2801, 0.1959]) -Greedy action tensor([ 1.0781, -0.3668, -0.3116, 0.1473]) tensor([0.5322, 0.1255, 0.1326, 0.2098]) -Greedy action tensor([ 1.2687, -0.5201, -0.0404, 0.2002]) tensor([0.5616, 0.0939, 0.1517, 0.1929]) -Greedy action tensor([ 1.1765, -0.3751, -0.1286, 0.3204]) tensor([0.5241, 0.1111, 0.1421, 0.2227]) -Greedy action tensor([ 0.8632, -0.5344, -0.1682, 0.1969]) tensor([0.4723, 0.1167, 0.1684, 0.2426]) -Greedy action tensor([ 1.1277, -0.3305, -0.0673, 0.2055]) tensor([0.5173, 0.1204, 0.1566, 0.2057]) -Greedy action tensor([ 1.9859, -0.5859, -0.0424, 0.0202]) tensor([0.7418, 0.0567, 0.0976, 0.1039]) -Greedy action tensor([ 0.4866, -0.5042, -0.0629, 0.2282]) tensor([0.3675, 0.1365, 0.2122, 0.2839]) -Greedy action tensor([ 1.2729, -0.9159, -0.1649, 0.2795]) tensor([0.5815, 0.0652, 0.1381, 0.2153]) -Greedy action tensor([ 0.7050, -0.0219, -0.1628, 0.1054]) tensor([0.4078, 0.1971, 0.1712, 0.2239]) -Greedy action tensor([ 1.1684, -0.5053, -0.2131, 0.3962]) tensor([0.5261, 0.0987, 0.1322, 0.2431]) -Greedy action tensor([ 2.1127, -0.8867, -0.2976, 0.1008]) tensor([0.7853, 0.0391, 0.0705, 0.1050]) -Greedy action tensor([ 1.1155, -0.2760, -0.5072, 0.4260]) tensor([0.5134, 0.1277, 0.1013, 0.2576]) -Greedy action tensor([ 1.8265, -0.2553, -0.3932, 0.5147]) tensor([0.6655, 0.0830, 0.0723, 0.1792]) -Greedy action tensor([ 1.2897, 0.2118, -0.3176, 0.5753]) tensor([0.4926, 0.1676, 0.0987, 0.2411]) -Greedy action tensor([ 1.9457, -0.1751, -0.5312, 0.5756]) tensor([0.6859, 0.0823, 0.0576, 0.1743]) -Greedy action tensor([ 2.0240, -0.2927, -0.1992, -0.0393]) tensor([0.7497, 0.0739, 0.0812, 0.0952]) -Greedy action tensor([ 1.1638, -0.5432, -0.0264, 0.1989]) tensor([0.5357, 0.0972, 0.1629, 0.2041]) -Greedy action tensor([ 1.2759, 0.0054, -0.0838, 0.3872]) tensor([0.5132, 0.1441, 0.1318, 0.2110]) -Greedy action tensor([ 1.4751, -0.4710, -0.1783, 0.1046]) tensor([0.6296, 0.0899, 0.1205, 0.1599]) -Greedy action tensor([ 1.5319, -0.6860, -0.3134, 0.2353]) tensor([0.6492, 0.0707, 0.1026, 0.1775]) -Greedy action tensor([ 1.5806, -0.1226, -0.1622, 0.1154]) tensor([0.6297, 0.1147, 0.1102, 0.1455]) -Greedy action tensor([ 1.7493, -0.2345, -0.3791, 0.1827]) tensor([0.6824, 0.0939, 0.0812, 0.1425]) -Greedy action tensor([ 0.4378, -0.3385, -0.1269, 0.4263]) tensor([0.3314, 0.1525, 0.1884, 0.3276]) -Greedy action tensor([ 1.0954, -0.3641, -0.4668, 0.2007]) tensor([0.5403, 0.1255, 0.1133, 0.2209]) -Greedy action tensor([ 1.5601, -0.0042, -0.5719, 0.4071]) tensor([0.6084, 0.1273, 0.0722, 0.1921]) -Greedy action tensor([ 1.8669, -0.3720, -0.5078, 0.1689]) tensor([0.7232, 0.0771, 0.0673, 0.1324]) -Greedy action tensor([ 1.3383, -0.7147, -0.2351, 0.4192]) tensor([0.5765, 0.0740, 0.1195, 0.2300]) -Greedy action tensor([ 0.8078, -0.3210, -0.1645, 0.6780]) tensor([0.3876, 0.1254, 0.1466, 0.3404]) -Greedy action tensor([ 1.6054, -0.4374, -0.2780, 0.1756]) tensor([0.6574, 0.0852, 0.1000, 0.1574]) -Greedy action tensor([ 1.3364, -0.2125, -0.3941, 0.1933]) tensor([0.5853, 0.1244, 0.1037, 0.1866]) -Greedy action tensor([ 1.8671, -0.1419, -0.1442, 0.3334]) tensor([0.6740, 0.0904, 0.0902, 0.1454]) -Greedy action tensor([ 1.0612, -0.1313, -0.3046, 0.3057]) tensor([0.4930, 0.1496, 0.1258, 0.2316]) -Greedy action tensor([ 1.8839, -0.1368, -0.1874, 0.4923]) tensor([0.6635, 0.0879, 0.0836, 0.1650]) -Greedy action tensor([ 1.0396, -1.0009, -0.0499, 0.1347]) tensor([0.5345, 0.0695, 0.1798, 0.2163]) -Greedy action tensor([ 0.7872, -0.5964, -0.4120, 0.3357]) tensor([0.4569, 0.1145, 0.1377, 0.2909]) -Greedy action tensor([ 1.4209, -0.2529, -0.6536, 0.3381]) tensor([0.6054, 0.1135, 0.0761, 0.2050]) -Greedy action tensor([ 1.9534, -0.4493, -0.0480, 0.3583]) tensor([0.7000, 0.0633, 0.0946, 0.1420]) -Greedy action tensor([ 1.1493, -0.3486, -0.1292, 0.3556]) tensor([0.5117, 0.1144, 0.1425, 0.2314]) -Greedy action tensor([ 0.7970, -0.3753, -0.1622, 0.3398]) tensor([0.4299, 0.1331, 0.1648, 0.2722]) -Greedy action tensor([ 1.5251, -0.5943, -0.1548, 0.0710]) tensor([0.6493, 0.0780, 0.1210, 0.1517]) -Greedy action tensor([ 0.7891, -0.0576, -0.7604, -0.0505]) tensor([0.4824, 0.2069, 0.1024, 0.2083]) -Greedy action tensor([ 1.6232, -0.4435, -0.2255, 0.1667]) tensor([0.6592, 0.0835, 0.1038, 0.1536]) -Greedy action tensor([ 2.3074, -1.6573, -0.4590, 0.8108]) tensor([0.7658, 0.0145, 0.0482, 0.1715]) -Greedy action tensor([ 1.4245, -0.7914, -0.3257, 0.2880]) tensor([0.6236, 0.0680, 0.1083, 0.2001]) -Greedy action tensor([ 0.9032, -0.6470, -0.4489, 0.4895]) tensor([0.4690, 0.0995, 0.1213, 0.3101]) -Greedy action tensor([ 1.9749, -1.0993, -0.3061, 0.5804]) tensor([0.7162, 0.0331, 0.0732, 0.1776]) -Greedy action tensor([ 1.7813, -0.5466, -0.8115, 0.4860]) tensor([0.6915, 0.0674, 0.0517, 0.1893]) -Greedy action tensor([ 2.4682, -1.0881, -0.5367, 0.8904]) tensor([0.7785, 0.0222, 0.0386, 0.1607]) -Greedy action tensor([ 1.2698, -0.2170, -1.0179, 0.4836]) tensor([0.5608, 0.1268, 0.0569, 0.2555]) -Greedy action tensor([ 1.3357, -0.6186, -0.4389, 0.5439]) tensor([0.5668, 0.0803, 0.0961, 0.2568]) -Greedy action tensor([ 1.7418, -0.6462, -0.2507, 0.5203]) tensor([0.6566, 0.0603, 0.0895, 0.1936]) -Greedy action tensor([ 1.6358, -0.8367, -0.1027, 0.4094]) tensor([0.6437, 0.0543, 0.1132, 0.1888]) -Greedy action tensor([ 1.1942, -0.3004, -0.5126, 0.0442]) tensor([0.5806, 0.1302, 0.1053, 0.1838]) -Greedy action tensor([ 2.0098, 0.0805, -0.1814, 0.5581]) tensor([0.6706, 0.0974, 0.0750, 0.1570]) -Greedy action tensor([ 1.8979, -0.7385, -0.1856, 0.4798]) tensor([0.6953, 0.0498, 0.0866, 0.1684]) -Greedy action tensor([-1.5528, -0.4054, 0.7988, 0.4846]) tensor([0.0448, 0.1411, 0.4705, 0.3436]) -Greedy action tensor([-1.9135, -0.4606, 0.6522, -0.1634]) tensor([0.0416, 0.1778, 0.5412, 0.2394]) -Greedy action tensor([-1.9937, -0.7863, 0.4036, -0.1050]) tensor([0.0456, 0.1524, 0.5009, 0.3012]) -Greedy action tensor([-1.3691, -0.5864, 0.6996, 0.6283]) tensor([0.0541, 0.1184, 0.4285, 0.3990]) -Greedy action tensor([-1.0493, 0.6864, 0.1787, 0.0076]) tensor([0.0771, 0.4376, 0.2634, 0.2219]) -Greedy action tensor([-1.5592, -0.5704, 0.4644, 0.0453]) tensor([0.0616, 0.1656, 0.4662, 0.3066]) -Greedy action tensor([-1.7914e+00, -4.9654e-01, 6.5834e-01, -1.0054e-03]) tensor([0.0450, 0.1642, 0.5212, 0.2696]) -Greedy action tensor([-1.9555, -0.8056, 1.5520, 0.7368]) tensor([0.0191, 0.0604, 0.6381, 0.2824]) -Greedy action tensor([-1.6541, -0.3852, 0.5231, 0.0127]) tensor([0.0536, 0.1905, 0.4724, 0.2836]) -Greedy action tensor([-1.8280, -0.4707, 0.6194, -0.1425]) tensor([0.0458, 0.1779, 0.5292, 0.2470]) -Greedy action tensor([-1.5116, -0.4499, 0.4619, -0.0280]) tensor([0.0645, 0.1866, 0.4644, 0.2845]) -Greedy action tensor([-1.7956, 0.5940, 0.4041, -0.3996]) tensor([0.0400, 0.4369, 0.3613, 0.1618]) -Greedy action tensor([-1.7558, -0.3019, 0.5936, -0.0316]) tensor([0.0468, 0.2003, 0.4904, 0.2625]) -Greedy action tensor([-1.0251, -0.5956, 0.2038, 0.2999]) tensor([0.1029, 0.1581, 0.3517, 0.3872]) -Greedy action tensor([-1.7337, -0.6087, 1.0993, 0.5419]) tensor([0.0325, 0.1000, 0.5517, 0.3159]) -Greedy action tensor([-1.8750, -0.4055, 0.6262, -0.1366]) tensor([0.0430, 0.1871, 0.5250, 0.2448]) -Greedy action tensor([-1.8753, -0.3972, 0.6269, -0.1446]) tensor([0.0430, 0.1887, 0.5254, 0.2429]) -Greedy action tensor([-1.7004, -0.2594, 0.5172, -0.0696]) tensor([0.0512, 0.2165, 0.4706, 0.2617]) -Greedy action tensor([-1.6837, -0.5086, 0.5234, -0.0147]) tensor([0.0537, 0.1738, 0.4878, 0.2848]) -Greedy action tensor([-1.3028, 0.2834, 0.3483, -0.1138]) tensor([0.0695, 0.3397, 0.3624, 0.2283]) -Greedy action tensor([-1.9115, -0.4622, 0.6663, -0.1664]) tensor([0.0414, 0.1764, 0.5452, 0.2371]) -Greedy action tensor([-1.1972, -0.5684, 0.3161, 0.3077]) tensor([0.0839, 0.1573, 0.3810, 0.3778]) -Greedy action tensor([-1.8667, -0.3398, 0.6420, -0.0680]) tensor([0.0418, 0.1924, 0.5134, 0.2524]) -Greedy action tensor([-1.9181, -0.4279, 0.6643, -0.1584]) tensor([0.0409, 0.1813, 0.5405, 0.2374]) -Greedy action tensor([-0.9343, -0.6629, 0.4189, 1.0152]) tensor([0.0757, 0.0993, 0.2930, 0.5319]) -Greedy action tensor([-1.8253, -0.3976, 0.5304, -0.2169]) tensor([0.0483, 0.2013, 0.5092, 0.2412]) -Greedy action tensor([-1.7130, -0.5089, 0.5699, -0.0773]) tensor([0.0519, 0.1730, 0.5088, 0.2663]) -Greedy action tensor([-1.8810, -0.4320, 0.6379, -0.1441]) tensor([0.0428, 0.1824, 0.5316, 0.2432]) -Greedy action tensor([-1.8635, -0.3948, 0.6199, -0.1530]) tensor([0.0438, 0.1900, 0.5242, 0.2420]) -Greedy action tensor([-0.7705, -0.6036, 0.1764, 0.3773]) tensor([0.1264, 0.1494, 0.3259, 0.3984]) -Greedy action tensor([-1.4628, -0.4031, 0.6500, 0.6993]) tensor([0.0480, 0.1384, 0.3968, 0.4168]) -Greedy action tensor([-1.8696, -0.5090, 0.7043, -0.1132]) tensor([0.0420, 0.1638, 0.5510, 0.2433]) -Greedy action tensor([-1.4335, -0.4194, 0.1528, -0.3039]) tensor([0.0852, 0.2349, 0.4163, 0.2636]) -Greedy action tensor([-1.7427, -0.0798, 0.5291, -0.1098]) tensor([0.0474, 0.2501, 0.4598, 0.2427]) -Greedy action tensor([-0.9324, 0.6401, 0.1497, 0.0136]) tensor([0.0881, 0.4247, 0.2601, 0.2270]) -Greedy action tensor([-1.8429, -0.4785, 0.6228, -0.0927]) tensor([0.0446, 0.1744, 0.5246, 0.2565]) -Greedy action tensor([-0.7163, 0.2916, -0.0079, 0.6794]) tensor([0.1020, 0.2793, 0.2070, 0.4117]) -Greedy action tensor([-0.1282, 0.4142, 0.2190, 0.4559]) tensor([0.1687, 0.2901, 0.2387, 0.3025]) -Greedy action tensor([-1.8499, -0.4061, 0.6405, -0.0994]) tensor([0.0434, 0.1837, 0.5232, 0.2497]) -Greedy action tensor([-1.2386, -0.2907, 0.9805, 1.0645]) tensor([0.0439, 0.1132, 0.4037, 0.4391]) -Greedy action tensor([-1.8304, -0.3848, 0.6020, -0.1330]) tensor([0.0453, 0.1921, 0.5155, 0.2471]) -Greedy action tensor([-1.9050, -0.9184, 0.0488, -0.4909]) tensor([0.0673, 0.1806, 0.4751, 0.2769]) -Greedy action tensor([-1.9276, -0.4346, 0.6602, -0.1641]) tensor([0.0407, 0.1810, 0.5410, 0.2373]) -Greedy action tensor([-1.9316, -0.4243, 0.6598, -0.1688]) tensor([0.0405, 0.1828, 0.5406, 0.2360]) -Greedy action tensor([-1.9117, -0.4616, 0.6499, -0.1614]) tensor([0.0417, 0.1778, 0.5404, 0.2401]) -Greedy action tensor([-1.8200, -0.3703, 0.5896, -0.0896]) tensor([0.0454, 0.1934, 0.5051, 0.2561]) -Greedy action tensor([-1.8074, -0.5079, 0.6057, -0.0977]) tensor([0.0468, 0.1717, 0.5228, 0.2587]) -Greedy action tensor([-1.6197, -0.3068, 0.4951, -0.0694]) tensor([0.0564, 0.2098, 0.4678, 0.2660]) -Greedy action tensor([-1.5804, -0.4938, 0.7720, 0.4209]) tensor([0.0457, 0.1355, 0.4805, 0.3382]) -Greedy action tensor([-1.5885, 0.2651, 0.2973, -0.0768]) tensor([0.0540, 0.3448, 0.3562, 0.2450]) -Greedy action tensor([-1.7747, -0.4629, 0.6030, -0.0282]) tensor([0.0471, 0.1749, 0.5078, 0.2702]) -Greedy action tensor([-1.4083, 0.5664, 0.2821, 0.0137]) tensor([0.0563, 0.4054, 0.3051, 0.2333]) -Greedy action tensor([-1.8846, -0.4835, 0.6731, -0.1331]) tensor([0.0421, 0.1711, 0.5439, 0.2429]) -Greedy action tensor([-1.9377, -0.4577, 0.6639, -0.1761]) tensor([0.0405, 0.1779, 0.5459, 0.2357]) -Greedy action tensor([-0.7793, -0.5633, 0.2623, 0.0336]) tensor([0.1364, 0.1693, 0.3866, 0.3076]) -Greedy action tensor([-1.9342, -0.4523, 0.6691, -0.1705]) tensor([0.0404, 0.1779, 0.5459, 0.2358]) -Greedy action tensor([-1.9363, -0.4526, 0.6624, -0.1788]) tensor([0.0406, 0.1788, 0.5454, 0.2352]) -Greedy action tensor([-1.0564, -0.3717, 0.3276, 0.5799]) tensor([0.0826, 0.1638, 0.3295, 0.4241]) -Greedy action tensor([-1.8322, -0.4097, 0.6030, -0.1198]) tensor([0.0452, 0.1876, 0.5165, 0.2507]) -Greedy action tensor([-0.8991, -0.2274, 0.6999, 0.8831]) tensor([0.0722, 0.1414, 0.3573, 0.4291]) -Greedy action tensor([-1.2618, -0.2951, 0.3527, 0.0322]) tensor([0.0813, 0.2137, 0.4085, 0.2965]) -Greedy action tensor([-1.8354, -0.2728, 0.5912, -0.1144]) tensor([0.0441, 0.2103, 0.4991, 0.2465]) -Greedy action tensor([-1.7932e+00, -4.7998e-01, 6.4078e-01, -3.4285e-04]) tensor([0.0452, 0.1680, 0.5154, 0.2714]) -Greedy action tensor([-1.2255, -0.6364, 0.2809, 0.2902]) tensor([0.0843, 0.1519, 0.3801, 0.3837]) -Greedy action tensor([-1.6706, -0.3320, 0.7169, 0.3179]) tensor([0.0435, 0.1658, 0.4732, 0.3175]) -Greedy action tensor([-1.6414, -0.4284, 0.5256, 0.1699]) tensor([0.0520, 0.1751, 0.4545, 0.3184]) -Greedy action tensor([-1.8708, -0.4080, 0.6254, -0.1380]) tensor([0.0433, 0.1868, 0.5251, 0.2448]) -Greedy action tensor([-0.9672, -0.8455, 1.4963, 0.4882]) tensor([0.0551, 0.0622, 0.6468, 0.2360]) -Greedy action tensor([-0.9676, 0.1935, 0.0614, -0.4714]) tensor([0.1158, 0.3699, 0.3241, 0.1902]) -Greedy action tensor([-1.4407, 0.0480, 0.6416, 0.3626]) tensor([0.0512, 0.2270, 0.4109, 0.3109]) -Greedy action tensor([-1.9168, -0.4316, 0.6615, -0.1624]) tensor([0.0410, 0.1812, 0.5406, 0.2372]) -Greedy action tensor([-1.2928, -0.5762, 0.4637, 0.5718]) tensor([0.0654, 0.1339, 0.3788, 0.4220]) -Greedy action tensor([-1.7559, -0.4183, 0.5709, -0.0829]) tensor([0.0491, 0.1869, 0.5026, 0.2614]) -Greedy action tensor([-1.9248, -0.4607, 0.6719, -0.1587]) tensor([0.0407, 0.1758, 0.5457, 0.2378]) -Greedy action tensor([-1.7399, -0.0807, 0.5653, 0.0196]) tensor([0.0453, 0.2379, 0.4539, 0.2630]) -Greedy action tensor([-1.9308, -0.4235, 0.6568, -0.1704]) tensor([0.0406, 0.1833, 0.5400, 0.2361]) -Greedy action tensor([-1.8516, -0.2263, 0.5955, -0.1326]) tensor([0.0431, 0.2188, 0.4978, 0.2403]) -Greedy action tensor([-1.7549, -0.3970, 0.5460, -0.0800]) tensor([0.0495, 0.1924, 0.4940, 0.2641]) -Greedy action tensor([-1.7754, -0.5114, 0.5790, -0.0846]) tensor([0.0488, 0.1727, 0.5139, 0.2646]) -Greedy action tensor([-0.7020, -0.5207, 0.2727, 0.4040]) tensor([0.1270, 0.1523, 0.3367, 0.3840]) -Greedy action tensor([-1.8293, -0.3960, 0.6357, -0.0373]) tensor([0.0436, 0.1826, 0.5124, 0.2614]) -Greedy action tensor([ 1.4761, -1.6117, 2.2979, 0.7133]) tensor([0.2641, 0.0120, 0.6007, 0.1232]) -Greedy action tensor([-0.8958, 0.1167, 1.2597, -0.1290]) tensor([0.0688, 0.1893, 0.5938, 0.1481]) -Greedy action tensor([0.1560, 0.1628, 1.1484, 0.4900]) tensor([0.1639, 0.1650, 0.4422, 0.2289]) -Greedy action tensor([ 0.3923, -0.1185, 0.0431, -0.4230]) tensor([0.3639, 0.2184, 0.2567, 0.1610]) -Greedy action tensor([ 0.9169, -0.5997, 0.1251, 0.9400]) tensor([0.3710, 0.0814, 0.1680, 0.3796]) -Greedy action tensor([-1.7343, -1.1951, 1.8043, -0.5137]) tensor([0.0247, 0.0423, 0.8494, 0.0836]) -Greedy action tensor([-0.4072, -0.7863, 0.0485, -0.1039]) tensor([0.2166, 0.1483, 0.3417, 0.2934]) -Greedy action tensor([ 1.4474, -1.2323, -0.3934, 1.3051]) tensor([0.4774, 0.0327, 0.0758, 0.4141]) -Greedy action tensor([ 1.2219, -1.5575, -1.2192, 1.0727]) tensor([0.4974, 0.0309, 0.0433, 0.4284]) -Greedy action tensor([-0.1588, -2.0917, 1.0146, 1.1064]) tensor([0.1262, 0.0183, 0.4081, 0.4474]) -Greedy action tensor([ 0.3525, -1.1065, 1.2155, 0.6924]) tensor([0.1997, 0.0464, 0.4733, 0.2805]) -Greedy action tensor([-0.7936, -0.7595, 1.4975, -0.7305]) tensor([0.0770, 0.0797, 0.7613, 0.0820]) -Greedy action tensor([-0.0372, -1.0903, 1.1373, -0.1510]) tensor([0.1826, 0.0637, 0.5908, 0.1629]) -Greedy action tensor([-0.5461, -1.4475, -0.7681, 0.5056]) tensor([0.1973, 0.0801, 0.1580, 0.5646]) -Greedy action tensor([1.0262, 0.4383, 0.0461, 0.0402]) tensor([0.4341, 0.2411, 0.1629, 0.1619]) -Greedy action tensor([-0.3066, -0.3770, 0.3861, -0.8190]) tensor([0.2207, 0.2057, 0.4413, 0.1322]) -Greedy action tensor([ 0.0945, -1.0641, 0.0737, 2.1215]) tensor([0.1012, 0.0318, 0.0991, 0.7680]) -Greedy action tensor([ 0.4452, -0.1231, -0.1815, 0.8016]) tensor([0.2834, 0.1605, 0.1514, 0.4047]) -Greedy action tensor([ 0.7395, -0.5933, -0.1443, 0.5382]) tensor([0.4009, 0.1057, 0.1656, 0.3278]) -Greedy action tensor([ 0.3974, -0.1012, 0.2465, -0.7104]) tensor([0.3574, 0.2171, 0.3074, 0.1181]) -Greedy action tensor([ 0.5612, -1.6823, -0.2870, 0.3735]) tensor([0.4232, 0.0449, 0.1812, 0.3507]) -Greedy action tensor([0.5651, 1.3846, 0.0189, 0.9577]) tensor([0.1876, 0.4258, 0.1087, 0.2779]) -Greedy action tensor([-0.3306, -1.2974, -0.2928, 0.1064]) tensor([0.2521, 0.0959, 0.2618, 0.3902]) -Greedy action tensor([-0.7644, 0.2420, 1.7255, -1.5622]) tensor([0.0616, 0.1684, 0.7423, 0.0277]) -Greedy action tensor([-0.7616, -0.3245, 0.6186, -0.2614]) tensor([0.1224, 0.1894, 0.4865, 0.2018]) -Greedy action tensor([ 0.0786, 0.1059, 0.7340, -0.1656]) tensor([0.2111, 0.2169, 0.4066, 0.1654]) -Greedy action tensor([-1.0959, -0.9749, 2.0412, -0.3731]) tensor([0.0367, 0.0415, 0.8461, 0.0757]) -Greedy action tensor([ 1.0300, -1.4989, -0.2376, 0.0864]) tensor([0.5713, 0.0456, 0.1608, 0.2224]) -Greedy action tensor([ 1.0733, 0.0760, -0.3156, 0.6448]) tensor([0.4406, 0.1625, 0.1099, 0.2870]) -Greedy action tensor([ 1.1699, -0.7916, 0.5983, -0.0559]) tensor([0.5003, 0.0704, 0.2825, 0.1468]) -Greedy action tensor([ 0.9888, -1.6541, -0.6962, 1.1193]) tensor([0.4174, 0.0297, 0.0774, 0.4755]) -Greedy action tensor([ 0.6426, -0.8295, -0.8935, 0.9689]) tensor([0.3533, 0.0811, 0.0760, 0.4896]) -Greedy action tensor([-0.7035, -1.2124, -0.1798, 0.0737]) tensor([0.1830, 0.1100, 0.3089, 0.3981]) -Greedy action tensor([-1.0126, 0.0711, -0.4981, 0.3536]) tensor([0.1047, 0.3095, 0.1752, 0.4106]) -Greedy action tensor([ 0.8521, -1.2383, 0.5352, -0.0984]) tensor([0.4467, 0.0552, 0.3254, 0.1727]) -Greedy action tensor([ 0.3504, -1.0974, -0.1442, -0.2663]) tensor([0.4193, 0.0986, 0.2557, 0.2263]) -Greedy action tensor([ 1.0860, -1.3563, -0.7715, 0.2669]) tensor([0.5939, 0.0516, 0.0927, 0.2618]) -Greedy action tensor([ 0.1192, -0.4685, 0.2440, 1.0669]) tensor([0.1898, 0.1055, 0.2150, 0.4897]) -Greedy action tensor([-0.3336, -0.2676, 0.0044, 0.8621]) tensor([0.1476, 0.1576, 0.2069, 0.4879]) -Greedy action tensor([0.6135, 1.0071, 0.0253, 0.7294]) tensor([0.2404, 0.3563, 0.1335, 0.2699]) -Greedy action tensor([ 0.7157, 0.6489, 0.4772, -0.5526]) tensor([0.3328, 0.3113, 0.2622, 0.0936]) -Greedy action tensor([ 0.4307, -0.1504, 1.5876, -0.7855]) tensor([0.1986, 0.1111, 0.6315, 0.0589]) -Greedy action tensor([-0.2611, -0.6129, -0.8682, 0.4731]) tensor([0.2308, 0.1624, 0.1258, 0.4810]) -Greedy action tensor([ 1.2982, -1.6969, 0.0595, 0.5703]) tensor([0.5486, 0.0274, 0.1590, 0.2649]) -Greedy action tensor([ 1.2956, -1.0103, 2.4983, 0.5687]) tensor([0.2036, 0.0203, 0.6777, 0.0984]) -Greedy action tensor([-0.3288, -0.3509, -0.2174, 0.8933]) tensor([0.1541, 0.1507, 0.1722, 0.5230]) -Greedy action tensor([-0.8146, -2.4812, 0.3034, 0.9960]) tensor([0.0965, 0.0182, 0.2952, 0.5901]) -Greedy action tensor([ 0.1176, -0.0794, 0.1696, 0.7165]) tensor([0.2130, 0.1749, 0.2244, 0.3877]) -Greedy action tensor([ 0.7202, -0.9286, 0.8621, 0.6447]) tensor([0.3056, 0.0588, 0.3522, 0.2834]) -Greedy action tensor([1.0293, 0.2776, 0.1660, 0.1885]) tensor([0.4302, 0.2028, 0.1814, 0.1856]) -Greedy action tensor([ 0.6961, 0.8863, -0.7430, -0.1571]) tensor([0.3481, 0.4210, 0.0826, 0.1483]) -Greedy action tensor([-0.5436, -0.9559, -0.8676, -0.0694]) tensor([0.2505, 0.1659, 0.1812, 0.4025]) -Greedy action tensor([-1.1292, 0.6432, -1.0641, 0.5001]) tensor([0.0766, 0.4509, 0.0818, 0.3908]) -Greedy action tensor([-0.8673, -1.3461, 1.1557, -0.8002]) tensor([0.0976, 0.0604, 0.7377, 0.1043]) -Greedy action tensor([-1.3331, -1.9522, 0.2595, -0.9762]) tensor([0.1268, 0.0683, 0.6236, 0.1812]) -Greedy action tensor([ 1.1253, 0.4850, -0.3072, -0.4665]) tensor([0.5078, 0.2677, 0.1212, 0.1034]) -Greedy action tensor([ 0.2723, -0.2202, -0.8117, 0.8137]) tensor([0.2726, 0.1666, 0.0922, 0.4685]) -Greedy action tensor([ 0.4354, 0.3558, -0.5416, 0.2791]) tensor([0.3169, 0.2927, 0.1193, 0.2711]) -Greedy action tensor([ 0.3286, -0.7589, -0.5677, 0.3405]) tensor([0.3627, 0.1223, 0.1480, 0.3670]) -Greedy action tensor([ 1.1579, 0.1773, -0.5617, 0.9447]) tensor([0.4233, 0.1588, 0.0758, 0.3420]) -Greedy action tensor([ 0.1576, -2.1411, 0.3032, 1.0253]) tensor([0.2156, 0.0216, 0.2494, 0.5134]) -Greedy action tensor([ 0.4233, -2.1823, -0.1462, 1.0962]) tensor([0.2778, 0.0205, 0.1572, 0.5445]) -Greedy action tensor([-1.1575, -0.7855, -0.5793, -0.1063]) tensor([0.1410, 0.2045, 0.2513, 0.4033]) -Greedy action tensor([-0.1348, -0.1077, -1.0507, -0.2537]) tensor([0.3016, 0.3099, 0.1207, 0.2678]) -Greedy action tensor([ 0.0225, -1.5084, 0.8891, -0.8291]) tensor([0.2486, 0.0538, 0.5915, 0.1061]) -Greedy action tensor([ 1.0870, -0.8073, 0.6064, 0.8580]) tensor([0.3900, 0.0587, 0.2412, 0.3102]) -Greedy action tensor([ 1.9009, -0.4793, 0.4990, 1.2503]) tensor([0.5375, 0.0497, 0.1323, 0.2804]) -Greedy action tensor([ 0.1178, -0.0212, 0.2816, 1.7848]) tensor([0.1198, 0.1043, 0.1412, 0.6347]) -Greedy action tensor([ 0.5361, 0.8589, 0.2497, -0.8758]) tensor([0.2962, 0.4091, 0.2225, 0.0722]) -Greedy action tensor([ 1.2081, -0.8577, 0.8401, 1.5920]) tensor([0.3042, 0.0386, 0.2106, 0.4466]) -Greedy action tensor([-0.2708, -0.0078, 0.8553, 0.0580]) tensor([0.1476, 0.1921, 0.4552, 0.2051]) -Greedy action tensor([ 1.3274, 0.5707, 0.4247, -0.0272]) tensor([0.4689, 0.2200, 0.1901, 0.1210]) -Greedy action tensor([ 1.7554, -0.5085, 0.6068, 1.7633]) tensor([0.4117, 0.0428, 0.1305, 0.4150]) -Greedy action tensor([ 0.0799, -1.0889, -1.3941, 0.2875]) tensor([0.3609, 0.1122, 0.0827, 0.4442]) -Greedy action tensor([ 1.0755, -1.5719, 1.0261, 1.1099]) tensor([0.3271, 0.0232, 0.3113, 0.3385]) -Greedy action tensor([ 1.4898, -0.8185, 1.4641, 0.6370]) tensor([0.4000, 0.0398, 0.3898, 0.1705]) -Greedy action tensor([ 1.1230, 0.1792, -0.5706, 0.8213]) tensor([0.4324, 0.1683, 0.0795, 0.3198]) -Greedy action tensor([ 0.1432, -1.7971, 0.5018, 0.8488]) tensor([0.2174, 0.0312, 0.3112, 0.4402]) -Greedy action tensor([ 0.4692, -1.2129, 0.2026, -0.2525]) tensor([0.4102, 0.0763, 0.3142, 0.1993]) -Greedy action tensor([-0.4741, -1.4967, -0.1305, 0.2575]) tensor([0.2063, 0.0742, 0.2908, 0.4287]) -Greedy action tensor([ 0.8201, -1.2948, 1.4266, -0.2763]) tensor([0.3041, 0.0367, 0.5577, 0.1016]) -Greedy action tensor([ 0.1836, -0.0943, -0.6720, -0.4429]) tensor([0.3681, 0.2788, 0.1564, 0.1967]) -Greedy action tensor([ 0.7092, -0.3938, -0.0747, -0.3719]) tensor([0.4700, 0.1560, 0.2146, 0.1594]) -Greedy action tensor([ 0.2604, -0.1267, -0.0572, -0.5248]) tensor([0.3493, 0.2372, 0.2542, 0.1593]) -Greedy action tensor([ 0.4737, -0.0878, -0.0670, -0.1694]) tensor([0.3734, 0.2129, 0.2174, 0.1963]) -Greedy action tensor([ 0.5336, -0.1461, -0.0084, -0.3920]) tensor([0.4025, 0.2040, 0.2341, 0.1595]) -Greedy action tensor([ 0.4051, -0.0945, -0.1210, -0.0059]) tensor([0.3496, 0.2121, 0.2066, 0.2318]) -Greedy action tensor([ 0.3638, -0.1756, -0.0285, -0.1235]) tensor([0.3481, 0.2030, 0.2351, 0.2138]) -Greedy action tensor([ 0.1558, 0.2176, -0.1322, -0.4104]) tensor([0.2957, 0.3146, 0.2217, 0.1679]) -Greedy action tensor([ 0.6059, -0.4425, -0.0560, -0.2718]) tensor([0.4382, 0.1536, 0.2260, 0.1822]) -Greedy action tensor([ 0.8042, -0.5787, -0.1625, -0.4377]) tensor([0.5208, 0.1307, 0.1981, 0.1504]) -Greedy action tensor([ 0.6500, -0.3096, -0.1403, -0.0864]) tensor([0.4319, 0.1654, 0.1959, 0.2068]) -Greedy action tensor([ 1.0479, -0.9281, -0.1009, -0.5661]) tensor([0.6043, 0.0838, 0.1916, 0.1203]) -Greedy action tensor([ 0.6702, -0.3473, 0.0055, -0.5137]) tensor([0.4583, 0.1657, 0.2358, 0.1403]) -Greedy action tensor([ 1.1378, -0.6968, 0.0995, -0.5781]) tensor([0.5905, 0.0943, 0.2091, 0.1062]) -Greedy action tensor([ 0.3064, -0.0721, -0.1078, -0.2353]) tensor([0.3416, 0.2339, 0.2257, 0.1987]) -Greedy action tensor([ 0.8089, -0.4495, 0.0164, -0.4450]) tensor([0.4945, 0.1405, 0.2239, 0.1411]) -Greedy action tensor([ 0.5331, 0.2221, 0.2124, -0.2631]) tensor([0.3437, 0.2518, 0.2494, 0.1550]) -Greedy action tensor([ 0.9034, -0.4057, -0.0039, -0.4236]) tensor([0.5157, 0.1393, 0.2082, 0.1368]) -Greedy action tensor([ 0.5755, -0.3104, -0.0651, -0.4167]) tensor([0.4329, 0.1785, 0.2281, 0.1605]) -Greedy action tensor([ 0.3604, 0.0810, -0.1237, -0.1987]) tensor([0.3396, 0.2569, 0.2093, 0.1942]) -Greedy action tensor([ 0.3922, -0.3162, -0.0518, -0.4780]) tensor([0.3917, 0.1929, 0.2513, 0.1641]) -Greedy action tensor([ 0.9759, -0.4461, 0.0276, -0.5124]) tensor([0.5393, 0.1301, 0.2089, 0.1217]) -Greedy action tensor([ 0.3000, 0.0636, -0.0447, -0.2550]) tensor([0.3255, 0.2570, 0.2306, 0.1869]) -Greedy action tensor([ 0.1703, -0.0019, -0.1118, -0.1418]) tensor([0.3005, 0.2530, 0.2266, 0.2199]) -Greedy action tensor([ 0.9529, -0.7889, 0.0073, -0.4987]) tensor([0.5562, 0.0975, 0.2161, 0.1303]) -Greedy action tensor([ 0.7098, -0.2999, 0.0204, -0.3738]) tensor([0.4536, 0.1653, 0.2276, 0.1535]) -Greedy action tensor([ 0.8077, -0.3092, -0.1069, -0.6852]) tensor([0.5121, 0.1676, 0.2052, 0.1151]) -Greedy action tensor([ 0.7464, -0.5254, -0.0939, -0.4410]) tensor([0.4958, 0.1390, 0.2140, 0.1512]) -Greedy action tensor([ 0.9096, -0.5222, 0.0178, -0.6764]) tensor([0.5395, 0.1289, 0.2211, 0.1105]) -Greedy action tensor([ 0.8848, -0.6538, -0.0411, -0.2032]) tensor([0.5134, 0.1102, 0.2034, 0.1730]) -Greedy action tensor([ 0.4003, 0.3829, -0.1120, -0.0486]) tensor([0.3105, 0.3052, 0.1861, 0.1982]) -Greedy action tensor([ 0.7494, -0.3802, 0.0018, -0.3803]) tensor([0.4717, 0.1524, 0.2234, 0.1524]) -Greedy action tensor([ 0.7260, -0.3828, -0.0146, -0.1898]) tensor([0.4531, 0.1495, 0.2161, 0.1813]) -Greedy action tensor([ 0.3631, -0.1582, 0.0014, -0.2886]) tensor([0.3557, 0.2112, 0.2477, 0.1854]) -Greedy action tensor([ 0.8122, -0.5725, -0.1169, -0.2264]) tensor([0.5002, 0.1252, 0.1975, 0.1770]) -Greedy action tensor([ 0.5884, -0.2807, -0.0141, -0.3231]) tensor([0.4222, 0.1770, 0.2311, 0.1697]) -Greedy action tensor([ 0.6372, -0.4971, 0.1182, -0.7236]) tensor([0.4602, 0.1480, 0.2738, 0.1180]) -Greedy action tensor([ 0.8694, -0.4798, -0.1274, -0.4173]) tensor([0.5250, 0.1362, 0.1938, 0.1450]) -Greedy action tensor([ 0.5479, 0.0819, 0.0224, -0.3317]) tensor([0.3797, 0.2383, 0.2245, 0.1576]) -Greedy action tensor([ 0.9580, -0.8102, -0.0350, -0.4738]) tensor([0.5618, 0.0959, 0.2081, 0.1342]) -Greedy action tensor([ 0.4132, -0.3116, -0.1855, -0.5323]) tensor([0.4128, 0.2000, 0.2268, 0.1604]) -Greedy action tensor([ 0.7518, -0.5862, -0.0581, -0.3515]) tensor([0.4904, 0.1287, 0.2182, 0.1627]) -Greedy action tensor([ 0.9930, -0.7691, 0.0043, -0.3931]) tensor([0.5575, 0.0957, 0.2074, 0.1394]) -Greedy action tensor([ 0.5239, -0.1439, 0.0739, -0.3154]) tensor([0.3872, 0.1986, 0.2469, 0.1673]) -Greedy action tensor([ 0.5928, -0.2191, -0.0110, -0.2706]) tensor([0.4145, 0.1841, 0.2266, 0.1748]) -Greedy action tensor([ 0.3550, -0.2271, -0.0439, -0.2556]) tensor([0.3606, 0.2015, 0.2420, 0.1958]) -Greedy action tensor([ 0.5295, -0.4781, -0.1385, -0.1929]) tensor([0.4231, 0.1545, 0.2169, 0.2055]) -Greedy action tensor([ 0.7769, -0.0475, 0.1052, -0.4344]) tensor([0.4450, 0.1951, 0.2273, 0.1325]) -Greedy action tensor([ 0.5191, -0.1333, -0.0455, -0.4760]) tensor([0.4066, 0.2118, 0.2312, 0.1503]) -Greedy action tensor([ 0.7524, -0.2625, 0.0203, -0.2161]) tensor([0.4498, 0.1630, 0.2163, 0.1708]) -Greedy action tensor([ 0.8469, -0.6931, 0.0038, -0.4571]) tensor([0.5219, 0.1119, 0.2246, 0.1417]) -Greedy action tensor([ 0.8129, -0.3646, -0.0084, -0.1808]) tensor([0.4721, 0.1454, 0.2077, 0.1748]) -Greedy action tensor([ 0.5711, -0.2856, -0.1022, -0.2465]) tensor([0.4208, 0.1787, 0.2147, 0.1858]) -Greedy action tensor([ 0.7700, -0.8005, 0.1263, -0.5568]) tensor([0.5004, 0.1040, 0.2629, 0.1328]) -Greedy action tensor([ 0.4998, -0.0230, -0.0723, -0.2583]) tensor([0.3808, 0.2258, 0.2149, 0.1784]) -Greedy action tensor([ 0.2592, 0.0827, -0.0288, -0.3421]) tensor([0.3189, 0.2673, 0.2391, 0.1748]) -Greedy action tensor([ 0.3980, 0.0822, -0.0395, -0.0809]) tensor([0.3340, 0.2435, 0.2156, 0.2069]) -Greedy action tensor([ 0.7285, -0.6179, -0.0914, -0.2774]) tensor([0.4839, 0.1259, 0.2132, 0.1770]) -Greedy action tensor([ 0.6831, -0.4802, -0.0165, -0.3536]) tensor([0.4621, 0.1444, 0.2296, 0.1639]) -Greedy action tensor([ 0.7168, -0.4692, 0.0345, -0.3030]) tensor([0.4605, 0.1407, 0.2327, 0.1661]) -Greedy action tensor([ 0.3379, -0.2359, 0.1276, -0.1809]) tensor([0.3368, 0.1898, 0.2729, 0.2005]) -Greedy action tensor([ 0.9601, -0.3645, -0.0369, -0.7316]) tensor([0.5497, 0.1462, 0.2028, 0.1013]) -Greedy action tensor([ 0.5496, -0.2272, 0.2261, -0.3546]) tensor([0.3864, 0.1777, 0.2796, 0.1564]) -Greedy action tensor([ 1.1341, -0.8410, 0.0041, -0.5234]) tensor([0.6052, 0.0840, 0.1955, 0.1154]) -Greedy action tensor([ 0.9634, -0.6939, 0.0427, -0.3331]) tensor([0.5369, 0.1024, 0.2138, 0.1469]) -Greedy action tensor([ 0.7291, -0.1929, 0.0488, -0.4623]) tensor([0.4529, 0.1801, 0.2294, 0.1376]) -Greedy action tensor([ 0.4644, -0.4142, -0.0632, -0.1672]) tensor([0.3941, 0.1637, 0.2326, 0.2096]) -Greedy action tensor([ 0.9185, -0.8669, -0.0930, -0.4789]) tensor([0.5622, 0.0943, 0.2045, 0.1390]) -Greedy action tensor([ 0.4857, -0.3128, -0.0182, -0.6548]) tensor([0.4213, 0.1896, 0.2545, 0.1347]) -Greedy action tensor([ 0.7476, -0.3121, -0.1140, -0.3580]) tensor([0.4762, 0.1650, 0.2012, 0.1576]) -Greedy action tensor([ 0.7168, -0.5135, -0.0419, -0.3966]) tensor([0.4787, 0.1399, 0.2242, 0.1572]) -Greedy action tensor([ 0.3718, -0.2103, -0.1068, -0.3282]) tensor([0.3738, 0.2089, 0.2316, 0.1856]) -Greedy action tensor([ 1.3342, -0.7860, -0.0795, -0.6427]) tensor([0.6659, 0.0799, 0.1620, 0.0922]) -Greedy action tensor([ 0.9076, -0.2050, -0.0848, -0.2533]) tensor([0.4969, 0.1633, 0.1842, 0.1556]) -Greedy action tensor([ 0.4433, -0.1646, -0.1210, -0.0658]) tensor([0.3684, 0.2006, 0.2095, 0.2214]) -Greedy action tensor([ 0.9317, -0.7876, 0.0330, -0.5737]) tensor([0.5530, 0.0991, 0.2251, 0.1227]) -Greedy action tensor([ 0.6504, -0.4031, 0.0134, -0.5119]) tensor([0.4566, 0.1592, 0.2415, 0.1428]) -Greedy action tensor([ 0.3019, -0.1064, 0.0397, -0.3699]) tensor([0.3396, 0.2257, 0.2613, 0.1734]) -Greedy action tensor([ 0.9548, -0.6619, 0.1168, -0.4278]) tensor([0.5313, 0.1055, 0.2298, 0.1333]) -Greedy action tensor([ 0.9438, -1.1654, 0.1720, -0.8597]) tensor([0.5720, 0.0694, 0.2644, 0.0942]) -Greedy action tensor([ 0.7892, -0.5341, -0.1250, -0.2470]) tensor([0.4946, 0.1317, 0.1983, 0.1755]) -Greedy action tensor([ 0.8323, -0.3684, -0.0430, -0.2443]) tensor([0.4858, 0.1462, 0.2025, 0.1655]) -Greedy action tensor([ 0.8854, -0.6760, -0.2841, -0.0799]) tensor([0.5260, 0.1104, 0.1633, 0.2003]) -Greedy action tensor([ 0.6487, -0.1000, -0.4663, 0.0638]) tensor([0.4241, 0.2006, 0.1391, 0.2363]) -Greedy action tensor([ 0.7107, -0.0364, 0.0467, -0.1691]) tensor([0.4161, 0.1971, 0.2142, 0.1726]) -Greedy action tensor([ 1.9354, -0.7775, -0.2677, 0.5738]) tensor([0.6978, 0.0463, 0.0771, 0.1788]) -Greedy action tensor([ 1.6122, -0.5092, -0.7619, 0.3511]) tensor([0.6683, 0.0801, 0.0622, 0.1894]) -Greedy action tensor([ 1.7426, 0.0130, -0.2413, -0.0670]) tensor([0.6763, 0.1199, 0.0930, 0.1107]) -Greedy action tensor([ 0.4518, -0.3008, -0.3537, 0.4172]) tensor([0.3468, 0.1634, 0.1549, 0.3349]) -Greedy action tensor([ 1.6233, -0.2101, -0.0641, 0.6662]) tensor([0.5784, 0.0925, 0.1070, 0.2221]) -Greedy action tensor([ 1.2127, 0.1137, -0.1451, 0.2154]) tensor([0.5104, 0.1701, 0.1313, 0.1883]) -Greedy action tensor([ 0.2968, -0.0293, -0.2773, 0.3305]) tensor([0.3013, 0.2174, 0.1697, 0.3116]) -Greedy action tensor([ 1.1452, -0.0789, 0.1221, 0.3284]) tensor([0.4773, 0.1403, 0.1716, 0.2109]) -Greedy action tensor([ 1.2466, -0.1397, -0.1001, 0.7363]) tensor([0.4738, 0.1185, 0.1232, 0.2845]) -Greedy action tensor([ 1.1236, -0.1014, -0.6169, 0.2916]) tensor([0.5251, 0.1542, 0.0921, 0.2285]) -Greedy action tensor([ 1.2679, 0.0025, -0.6723, 0.4212]) tensor([0.5392, 0.1521, 0.0775, 0.2312]) -Greedy action tensor([ 2.0354, -0.7277, -0.3406, 0.8142]) tensor([0.6892, 0.0435, 0.0640, 0.2032]) -Greedy action tensor([ 1.2720, -0.0054, -0.5774, -0.3130]) tensor([0.6094, 0.1699, 0.0959, 0.1249]) -Greedy action tensor([ 0.5446, -0.1867, -0.0243, 0.2838]) tensor([0.3549, 0.1708, 0.2009, 0.2734]) -Greedy action tensor([ 1.0942, -0.8680, -0.1480, 0.5076]) tensor([0.5036, 0.0708, 0.1454, 0.2801]) -Greedy action tensor([ 1.1778, -0.6486, -0.0327, 0.2077]) tensor([0.5440, 0.0876, 0.1622, 0.2062]) -Greedy action tensor([ 1.5316, -0.5656, 0.0094, 0.1474]) tensor([0.6283, 0.0772, 0.1371, 0.1574]) -Greedy action tensor([ 0.9099, -0.1280, -0.1100, 0.1685]) tensor([0.4564, 0.1616, 0.1646, 0.2174]) -Greedy action tensor([ 0.5534, -0.2970, -0.2970, -0.1568]) tensor([0.4263, 0.1821, 0.1821, 0.2095]) -Greedy action tensor([ 1.1558, -0.3681, -0.6313, 0.3090]) tensor([0.5512, 0.1201, 0.0923, 0.2364]) -Greedy action tensor([ 1.2228, -0.7322, -0.2084, 0.3844]) tensor([0.5516, 0.0781, 0.1318, 0.2385]) -Greedy action tensor([ 0.5272, -0.5809, 0.0873, 0.1691]) tensor([0.3741, 0.1235, 0.2409, 0.2615]) -Greedy action tensor([ 1.4432, -0.2533, -0.4430, 0.2712]) tensor([0.6080, 0.1115, 0.0922, 0.1883]) -Greedy action tensor([ 0.9639, -0.4879, -0.1630, 0.2329]) tensor([0.4903, 0.1148, 0.1589, 0.2360]) -Greedy action tensor([ 0.7196, -0.4568, -0.3105, 0.2988]) tensor([0.4307, 0.1328, 0.1537, 0.2828]) -Greedy action tensor([ 0.8093, -0.3482, -0.1514, -0.0094]) tensor([0.4677, 0.1470, 0.1790, 0.2063]) -Greedy action tensor([ 1.1875, -0.7408, -0.0593, 0.3456]) tensor([0.5366, 0.0780, 0.1542, 0.2312]) -Greedy action tensor([ 0.9997, -0.5873, -0.6681, 0.5755]) tensor([0.4884, 0.0999, 0.0921, 0.3195]) -Greedy action tensor([ 1.3429, -0.2952, -0.7392, 0.5910]) tensor([0.5585, 0.1085, 0.0696, 0.2633]) -Greedy action tensor([ 1.6004, 0.2857, -0.0309, 0.2934]) tensor([0.5764, 0.1548, 0.1128, 0.1560]) -Greedy action tensor([ 1.2132, -0.1223, -0.1446, 0.2523]) tensor([0.5256, 0.1382, 0.1352, 0.2010]) -Greedy action tensor([ 2.1873, -0.6417, -0.6700, 0.4078]) tensor([0.7781, 0.0460, 0.0447, 0.1313]) -Greedy action tensor([ 2.5644, -1.0001, -0.2709, 0.7510]) tensor([0.7999, 0.0226, 0.0470, 0.1305]) -Greedy action tensor([ 1.3469, -0.8446, -0.0673, 0.1257]) tensor([0.6062, 0.0677, 0.1474, 0.1787]) -Greedy action tensor([ 0.6539, -0.2751, 0.4037, 0.2617]) tensor([0.3510, 0.1386, 0.2733, 0.2371]) -Greedy action tensor([ 1.3000, -0.6227, -0.1804, 0.1968]) tensor([0.5863, 0.0857, 0.1334, 0.1945]) -Greedy action tensor([ 1.7785, 0.1974, -0.3086, 0.2185]) tensor([0.6494, 0.1336, 0.0805, 0.1365]) -Greedy action tensor([ 0.5856, -0.3625, 0.1671, -0.0072]) tensor([0.3849, 0.1491, 0.2533, 0.2127]) -Greedy action tensor([ 0.5728, -0.3147, 0.1801, 0.0127]) tensor([0.3762, 0.1549, 0.2540, 0.2149]) -Greedy action tensor([ 0.7863, -0.0709, -0.1545, 0.1178]) tensor([0.4297, 0.1823, 0.1677, 0.2202]) -Greedy action tensor([ 1.7052, 0.3771, -0.2455, 0.0269]) tensor([0.6274, 0.1663, 0.0892, 0.1171]) -Greedy action tensor([ 1.5649, -0.5136, -0.1755, 0.4029]) tensor([0.6198, 0.0775, 0.1087, 0.1939]) -Greedy action tensor([ 1.4504, -0.4987, -0.7222, 0.9641]) tensor([0.5344, 0.0761, 0.0609, 0.3286]) -Greedy action tensor([ 1.7678, -0.3830, -0.2785, 0.6822]) tensor([0.6316, 0.0735, 0.0816, 0.2133]) -Greedy action tensor([ 1.9374, -0.9294, -0.1504, 0.5589]) tensor([0.6979, 0.0397, 0.0865, 0.1759]) -Greedy action tensor([ 2.1001, -0.1444, -0.3808, 0.8905]) tensor([0.6721, 0.0712, 0.0562, 0.2005]) -Greedy action tensor([ 0.6397, -0.2327, -0.1424, 0.2585]) tensor([0.3909, 0.1634, 0.1788, 0.2670]) -Greedy action tensor([ 1.2137, -0.4951, -0.0504, 0.5298]) tensor([0.5081, 0.0920, 0.1435, 0.2564]) -Greedy action tensor([ 1.4632, -0.1330, -0.5843, 0.4638]) tensor([0.5883, 0.1192, 0.0759, 0.2165]) -Greedy action tensor([0.7218, 0.0657, 0.1272, 0.0161]) tensor([0.3900, 0.2023, 0.2152, 0.1925]) -Greedy action tensor([ 2.3898, -1.3504, -0.5527, 0.8636]) tensor([0.7729, 0.0184, 0.0408, 0.1680]) -Greedy action tensor([ 1.3689, -0.6943, -0.2737, 0.9003]) tensor([0.5138, 0.0653, 0.0994, 0.3216]) -Greedy action tensor([ 1.1734, -0.1858, -0.2498, 0.1536]) tensor([0.5381, 0.1382, 0.1296, 0.1941]) -Greedy action tensor([ 0.9740, -0.1073, 0.2031, 0.0243]) tensor([0.4569, 0.1550, 0.2114, 0.1768]) -Greedy action tensor([ 1.7203, -0.7713, -0.2167, 0.4828]) tensor([0.6592, 0.0546, 0.0950, 0.1912]) -Greedy action tensor([ 0.7048, -0.4070, -0.1399, 0.1428]) tensor([0.4294, 0.1413, 0.1845, 0.2448]) -Greedy action tensor([ 1.3310, -0.6631, -0.2898, 0.3570]) tensor([0.5843, 0.0795, 0.1155, 0.2206]) -Greedy action tensor([ 1.2891, -0.3828, -0.2757, 0.0657]) tensor([0.5913, 0.1111, 0.1236, 0.1740]) -Greedy action tensor([ 1.8052, -0.3438, -0.2791, 0.4800]) tensor([0.6637, 0.0774, 0.0826, 0.1764]) -Greedy action tensor([ 1.6430, -0.3183, -0.2257, 0.5660]) tensor([0.6114, 0.0860, 0.0943, 0.2082]) -Greedy action tensor([ 1.5373, -0.6916, -0.2355, 0.4504]) tensor([0.6193, 0.0667, 0.1052, 0.2088]) -Greedy action tensor([ 1.0735, -0.1441, -0.1932, 0.1699]) tensor([0.5043, 0.1493, 0.1421, 0.2043]) -Greedy action tensor([ 1.6436, -0.0864, -0.1902, 0.5685]) tensor([0.5958, 0.1056, 0.0952, 0.2033]) -Greedy action tensor([ 1.1100, 0.0114, -0.2712, -0.1723]) tensor([0.5371, 0.1790, 0.1350, 0.1490]) -Greedy action tensor([ 1.5903, -0.5830, -0.5199, 0.2302]) tensor([0.6704, 0.0763, 0.0813, 0.1720]) -Greedy action tensor([ 1.3457, -0.1789, -0.2942, 0.2907]) tensor([0.5682, 0.1237, 0.1102, 0.1978]) -Greedy action tensor([ 1.1788, -0.6696, 0.0780, -0.1579]) tensor([0.5705, 0.0899, 0.1898, 0.1499]) -Greedy action tensor([ 1.1012, -0.3411, -0.2166, 0.2292]) tensor([0.5202, 0.1230, 0.1393, 0.2175]) -Greedy action tensor([ 1.2377, -0.1031, -0.4694, -0.1532]) tensor([0.5911, 0.1546, 0.1072, 0.1471]) -Greedy action tensor([ 1.2778, -0.3927, -0.1461, -0.0044]) tensor([0.5861, 0.1103, 0.1411, 0.1626]) -Greedy action tensor([ 1.7943, -0.5696, -0.5022, 0.7749]) tensor([0.6429, 0.0605, 0.0647, 0.2320]) -Greedy action tensor([ 1.1973, -0.3608, -0.1001, 0.2901]) tensor([0.5298, 0.1115, 0.1448, 0.2139]) -Greedy action tensor([ 2.1522, -0.8332, -0.3455, 0.7015]) tensor([0.7314, 0.0370, 0.0602, 0.1715]) -Greedy action tensor([ 1.6984, -0.3197, -0.3008, 0.4010]) tensor([0.6487, 0.0862, 0.0879, 0.1772]) -Greedy action tensor([ 0.8967, -0.0526, -0.0704, 0.0058]) tensor([0.4592, 0.1777, 0.1746, 0.1884]) -Greedy action tensor([ 0.9195, -0.6336, -0.5049, 1.0731]) tensor([0.3819, 0.0808, 0.0919, 0.4454]) -Greedy action tensor([ 1.1669, -0.1574, -0.0496, 0.4013]) tensor([0.4933, 0.1312, 0.1461, 0.2294]) -Greedy action tensor([ 0.4661, -0.3402, 0.2862, 0.0165]) tensor([0.3425, 0.1529, 0.2861, 0.2185]) -Greedy action tensor([-1.5788, 0.0904, 0.4237, 0.1115]) tensor([0.0523, 0.2774, 0.3871, 0.2833]) -Greedy action tensor([-1.8767, -0.4797, 0.6303, -0.1480]) tensor([0.0436, 0.1762, 0.5347, 0.2455]) -Greedy action tensor([-1.2253, -0.3616, 0.6212, 0.7198]) tensor([0.0599, 0.1420, 0.3794, 0.4187]) -Greedy action tensor([-1.8328, -0.4360, 0.6088, -0.1271]) tensor([0.0454, 0.1834, 0.5214, 0.2498]) -Greedy action tensor([-1.8049, -0.3934, 0.5832, -0.0959]) tensor([0.0465, 0.1906, 0.5062, 0.2567]) -Greedy action tensor([-1.6816, -0.6356, 0.3294, -0.2082]) tensor([0.0638, 0.1815, 0.4764, 0.2783]) -Greedy action tensor([-1.2969, 0.1546, 0.2986, -0.0034]) tensor([0.0722, 0.3084, 0.3561, 0.2633]) -Greedy action tensor([-1.7261, -0.5010, 0.5493, -0.0803]) tensor([0.0518, 0.1762, 0.5037, 0.2684]) -Greedy action tensor([-0.9144, -0.6209, 0.7991, 1.3156]) tensor([0.0582, 0.0780, 0.3228, 0.5410]) -Greedy action tensor([-1.9425, -0.4471, 0.6662, -0.1785]) tensor([0.0402, 0.1793, 0.5459, 0.2346]) -Greedy action tensor([-1.4203, -0.3327, 0.6984, 0.2051]) tensor([0.0576, 0.1708, 0.4791, 0.2925]) -Greedy action tensor([-1.6242, -0.4504, 0.5380, 0.0722]) tensor([0.0544, 0.1760, 0.4728, 0.2968]) -Greedy action tensor([-1.9008, -0.4105, 0.6345, -0.1602]) tensor([0.0421, 0.1868, 0.5312, 0.2399]) -Greedy action tensor([-1.7818, -0.4734, 0.5952, -0.0884]) tensor([0.0478, 0.1770, 0.5152, 0.2600]) -Greedy action tensor([-1.4083, 0.7175, 0.2216, 0.1881]) tensor([0.0515, 0.4315, 0.2628, 0.2542]) -Greedy action tensor([-1.7312, -0.4638, 0.6162, -0.0052]) tensor([0.0485, 0.1722, 0.5070, 0.2724]) -Greedy action tensor([-1.6846, -0.2253, 0.4926, -0.0502]) tensor([0.0519, 0.2235, 0.4582, 0.2663]) -Greedy action tensor([-1.8767, -0.4742, 0.6334, -0.1508]) tensor([0.0435, 0.1768, 0.5353, 0.2444]) -Greedy action tensor([-1.6933, -0.2150, 0.6585, 0.0196]) tensor([0.0467, 0.2046, 0.4901, 0.2587]) -Greedy action tensor([-1.5869, -0.4897, 0.9199, 0.5884]) tensor([0.0399, 0.1195, 0.4893, 0.3513]) -Greedy action tensor([-1.3840, -0.4417, 1.0259, 1.0552]) tensor([0.0382, 0.0981, 0.4255, 0.4382]) -Greedy action tensor([-1.9168, -0.4181, 0.6490, -0.1615]) tensor([0.0412, 0.1844, 0.5361, 0.2383]) -Greedy action tensor([-1.7585, -0.4117, 0.5897, -0.0544]) tensor([0.0481, 0.1848, 0.5030, 0.2642]) -Greedy action tensor([-1.7995, -0.3958, 0.6360, -0.0441]) tensor([0.0449, 0.1827, 0.5127, 0.2597]) -Greedy action tensor([-1.9212, -0.4380, 0.6519, -0.1697]) tensor([0.0412, 0.1815, 0.5399, 0.2374]) -Greedy action tensor([-1.2109, -0.5990, 0.2861, 0.2576]) tensor([0.0858, 0.1582, 0.3834, 0.3726]) -Greedy action tensor([-1.7017, -0.2354, 0.5683, 0.0023]) tensor([0.0488, 0.2113, 0.4720, 0.2680]) -Greedy action tensor([-1.8874, -0.3493, 0.6381, -0.1238]) tensor([0.0417, 0.1941, 0.5210, 0.2432]) -Greedy action tensor([-1.1912, -0.6671, 0.7505, 0.3280]) tensor([0.0703, 0.1187, 0.4899, 0.3211]) -Greedy action tensor([-1.8235, -0.3924, 0.5966, -0.1085]) tensor([0.0455, 0.1903, 0.5115, 0.2527]) -Greedy action tensor([-1.7611, -0.3351, 0.5473, -0.0671]) tensor([0.0484, 0.2014, 0.4868, 0.2634]) -Greedy action tensor([-1.7027, -0.2666, 0.6552, 0.0211]) tensor([0.0468, 0.1967, 0.4944, 0.2622]) -Greedy action tensor([-1.5498, -0.6406, 0.8225, 0.0076]) tensor([0.0528, 0.1310, 0.5658, 0.2505]) -Greedy action tensor([-1.8021, -0.5020, 0.6516, -0.0307]) tensor([0.0451, 0.1655, 0.5244, 0.2651]) -Greedy action tensor([-1.6907, -0.4755, 0.5402, -0.0387]) tensor([0.0529, 0.1784, 0.4926, 0.2761]) -Greedy action tensor([-1.8678, -0.4433, 0.6296, -0.1418]) tensor([0.0436, 0.1813, 0.5300, 0.2451]) -Greedy action tensor([-1.7896, -0.4380, 0.5900, -0.1047]) tensor([0.0475, 0.1835, 0.5129, 0.2561]) -Greedy action tensor([-1.7195, 0.2416, 0.4503, -0.0532]) tensor([0.0451, 0.3208, 0.3952, 0.2389]) -Greedy action tensor([-1.8543, -0.3966, 0.6168, -0.1151]) tensor([0.0438, 0.1882, 0.5185, 0.2494]) -Greedy action tensor([-1.9106, -0.4017, 0.6389, -0.1529]) tensor([0.0415, 0.1875, 0.5307, 0.2404]) -Greedy action tensor([-1.5792, -0.1807, 0.3725, -0.4343]) tensor([0.0657, 0.2658, 0.4622, 0.2063]) -Greedy action tensor([-0.6338, -0.5984, 0.2184, 0.2319]) tensor([0.1480, 0.1533, 0.3470, 0.3517]) -Greedy action tensor([-1.8824, -0.3923, 0.6511, -0.1368]) tensor([0.0421, 0.1867, 0.5301, 0.2411]) -Greedy action tensor([-1.9061, -0.4492, 0.6599, -0.1461]) tensor([0.0415, 0.1780, 0.5396, 0.2410]) -Greedy action tensor([-1.8375, -0.4339, 0.6378, -0.1412]) tensor([0.0446, 0.1816, 0.5304, 0.2434]) -Greedy action tensor([-0.5123, -0.2807, 0.9071, 1.5603]) tensor([0.0697, 0.0879, 0.2883, 0.5541]) -Greedy action tensor([-1.8485, -0.4772, 0.6153, -0.1355]) tensor([0.0450, 0.1772, 0.5284, 0.2494]) -Greedy action tensor([-1.8701, -0.4599, 0.6238, -0.1328]) tensor([0.0437, 0.1790, 0.5291, 0.2482]) -Greedy action tensor([-1.8875, -0.4377, 0.6314, -0.1452]) tensor([0.0428, 0.1822, 0.5308, 0.2442]) -Greedy action tensor([-1.7445, -0.3866, 0.5517, -0.0850]) tensor([0.0498, 0.1936, 0.4948, 0.2618]) -Greedy action tensor([-1.6907, -0.3805, 0.7147, 0.2516]) tensor([0.0439, 0.1628, 0.4869, 0.3064]) -Greedy action tensor([-1.2429, 0.4732, 0.5758, -0.6293]) tensor([0.0686, 0.3817, 0.4229, 0.1267]) -Greedy action tensor([-1.8363, -0.4747, 0.7057, -0.0343]) tensor([0.0423, 0.1649, 0.5368, 0.2561]) -Greedy action tensor([-1.7907, -0.1428, 0.5588, -0.0728]) tensor([0.0449, 0.2335, 0.4711, 0.2505]) -Greedy action tensor([-1.0696, -0.5802, 1.3033, 1.4530]) tensor([0.0387, 0.0632, 0.4155, 0.4826]) -Greedy action tensor([-1.8856, -0.4374, 0.6324, -0.1511]) tensor([0.0429, 0.1824, 0.5318, 0.2429]) -Greedy action tensor([-0.8153, -0.6196, 1.3186, 1.3468]) tensor([0.0517, 0.0628, 0.4365, 0.4490]) -Greedy action tensor([-1.9765, -0.5285, 1.3023, 0.6941]) tensor([0.0216, 0.0920, 0.5740, 0.3124]) -Greedy action tensor([-0.9779, -0.5139, 0.3632, 0.6857]) tensor([0.0855, 0.1360, 0.3270, 0.4515]) -Greedy action tensor([1.1185, 0.4927, 0.2106, 0.8157]) tensor([0.3736, 0.1998, 0.1507, 0.2760]) -Greedy action tensor([-1.8240, -0.1775, 0.5719, -0.1512]) tensor([0.0445, 0.2307, 0.4881, 0.2368]) -Greedy action tensor([-1.7952, -0.3033, 0.5724, -0.1127]) tensor([0.0465, 0.2068, 0.4964, 0.2502]) -Greedy action tensor([-0.2661, 0.8172, -0.6273, -0.2283]) tensor([0.1758, 0.5193, 0.1225, 0.1825]) -Greedy action tensor([-0.2280, -0.5676, 0.1409, 0.5381]) tensor([0.1883, 0.1341, 0.2724, 0.4052]) -Greedy action tensor([-1.8849, -0.3569, 0.6235, -0.1413]) tensor([0.0423, 0.1952, 0.5203, 0.2422]) -Greedy action tensor([-1.8949, -0.4258, 0.6446, -0.1502]) tensor([0.0421, 0.1830, 0.5338, 0.2411]) -Greedy action tensor([-1.6727, -0.4994, 0.5222, 0.0081]) tensor([0.0538, 0.1740, 0.4832, 0.2890]) -Greedy action tensor([-1.9102, -0.3935, 0.6398, -0.1581]) tensor([0.0414, 0.1888, 0.5307, 0.2390]) -Greedy action tensor([-1.3369, -0.4717, 0.4283, 0.3003]) tensor([0.0696, 0.1654, 0.4069, 0.3580]) -Greedy action tensor([-1.4409, -0.5287, 0.4844, 0.4138]) tensor([0.0597, 0.1488, 0.4097, 0.3818]) -Greedy action tensor([-0.9639, -0.5645, 0.3566, 0.2244]) tensor([0.1051, 0.1566, 0.3935, 0.3448]) -Greedy action tensor([-1.4577, -0.6086, 0.4126, 0.0950]) tensor([0.0687, 0.1606, 0.4460, 0.3247]) -Greedy action tensor([-1.6455, -0.5060, 0.5165, 0.0418]) tensor([0.0549, 0.1715, 0.4769, 0.2967]) -Greedy action tensor([-0.7168, -0.4707, 0.2374, -0.0149]) tensor([0.1451, 0.1855, 0.3767, 0.2927]) -Greedy action tensor([-1.4706, -0.4251, 0.5555, 0.2855]) tensor([0.0581, 0.1652, 0.4405, 0.3362]) -Greedy action tensor([-0.7355, -0.5716, 0.1562, 0.2916]) tensor([0.1349, 0.1590, 0.3292, 0.3769]) -Greedy action tensor([-1.2585, -0.6000, 0.9876, 1.1595]) tensor([0.0424, 0.0818, 0.4004, 0.4754]) -Greedy action tensor([-1.9401, -0.4375, 0.6637, -0.1774]) tensor([0.0403, 0.1809, 0.5442, 0.2347]) -Greedy action tensor([-1.8164, -0.4788, 0.6677, -0.0880]) tensor([0.0446, 0.1698, 0.5345, 0.2510]) -Greedy action tensor([-0.0381, -0.0359, 0.9931, 1.7597]) tensor([0.0922, 0.0924, 0.2586, 0.5567]) -Greedy action tensor([-1.2500, 0.1570, 0.3957, -0.1974]) tensor([0.0761, 0.3109, 0.3948, 0.2181]) -Greedy action tensor([-0.6599, -1.1653, 2.6313, -0.7226]) tensor([0.0340, 0.0205, 0.9136, 0.0319]) -Greedy action tensor([ 0.6005, -1.1901, 0.7479, -0.4365]) tensor([0.3731, 0.0623, 0.4324, 0.1323]) -Greedy action tensor([ 0.3161, -0.6236, 1.0095, 0.9942]) tensor([0.1865, 0.0729, 0.3731, 0.3675]) -Greedy action tensor([ 1.4689, -1.6414, 1.1117, 0.2825]) tensor([0.4879, 0.0218, 0.3414, 0.1490]) -Greedy action tensor([-0.1869, -1.1806, 0.2381, 1.0311]) tensor([0.1592, 0.0589, 0.2436, 0.5383]) -Greedy action tensor([ 2.1854, -0.2306, 1.7663, 1.1387]) tensor([0.4766, 0.0426, 0.3135, 0.1673]) -Greedy action tensor([-0.6237, -1.4734, -0.2109, 1.4523]) tensor([0.0916, 0.0392, 0.1385, 0.7307]) -Greedy action tensor([-0.1171, -0.4096, 1.2377, -0.8547]) tensor([0.1639, 0.1223, 0.6353, 0.0784]) -Greedy action tensor([-0.9227, -1.4553, 0.0378, -1.1389]) tensor([0.1998, 0.1173, 0.5220, 0.1609]) -Greedy action tensor([ 1.1904, -0.1183, -0.3769, 0.6947]) tensor([0.4790, 0.1294, 0.0999, 0.2917]) -Greedy action tensor([-0.1271, -0.7364, 2.6202, 0.0627]) tensor([0.0545, 0.0296, 0.8500, 0.0659]) -Greedy action tensor([-0.4324, -0.7781, -0.7270, 0.5195]) tensor([0.1983, 0.1403, 0.1477, 0.5137]) -Greedy action tensor([ 0.5599, 0.1131, -0.4303, -0.3229]) tensor([0.4124, 0.2638, 0.1532, 0.1706]) -Greedy action tensor([0.9421, 0.2142, 1.1614, 0.9822]) tensor([0.2653, 0.1281, 0.3304, 0.2762]) -Greedy action tensor([ 0.8126, 0.1630, -0.1454, 1.3481]) tensor([0.2767, 0.1445, 0.1062, 0.4727]) -Greedy action tensor([-0.7218, -0.6409, 0.9702, -0.2009]) tensor([0.1087, 0.1179, 0.5904, 0.1830]) -Greedy action tensor([-0.7272, -1.0026, 1.1902, -0.1651]) tensor([0.0969, 0.0736, 0.6594, 0.1700]) -Greedy action tensor([ 1.0434, -0.7756, 1.3274, 0.7240]) tensor([0.3108, 0.0504, 0.4129, 0.2258]) -Greedy action tensor([-0.7417, -1.6152, 2.0106, 0.4783]) tensor([0.0488, 0.0204, 0.7654, 0.1654]) -Greedy action tensor([-0.6184, -0.4827, 0.6895, -0.2889]) tensor([0.1382, 0.1583, 0.5113, 0.1922]) -Greedy action tensor([-0.2248, 0.1460, 0.6683, -0.5382]) tensor([0.1779, 0.2577, 0.4345, 0.1300]) -Greedy action tensor([ 0.3963, -1.2697, 0.0267, -0.9549]) tensor([0.4675, 0.0884, 0.3231, 0.1211]) -Greedy action tensor([1.4778, 0.3788, 0.3454, 0.4844]) tensor([0.4936, 0.1645, 0.1591, 0.1828]) -Greedy action tensor([-1.3106, -0.1488, -0.4514, -0.0862]) tensor([0.1004, 0.3209, 0.2371, 0.3416]) -Greedy action tensor([-1.4037, -0.1183, 0.6053, -0.9840]) tensor([0.0736, 0.2660, 0.5485, 0.1119]) -Greedy action tensor([-1.6076, -0.4002, 0.7056, -1.3710]) tensor([0.0636, 0.2128, 0.6430, 0.0806]) -Greedy action tensor([ 1.1915, -0.2403, 2.3683, -0.0414]) tensor([0.2094, 0.0500, 0.6795, 0.0610]) -Greedy action tensor([ 0.4934, 0.5670, -0.2610, -0.1622]) tensor([0.3262, 0.3511, 0.1534, 0.1693]) -Greedy action tensor([ 0.3195, -0.1936, 1.0853, 0.4281]) tensor([0.2056, 0.1231, 0.4422, 0.2292]) -Greedy action tensor([-1.0029, -0.5287, 1.5179, 0.0441]) tensor([0.0559, 0.0898, 0.6951, 0.1592]) -Greedy action tensor([-1.0745, 0.2607, -1.0491, -0.0821]) tensor([0.1173, 0.4459, 0.1203, 0.3165]) -Greedy action tensor([ 0.8542, -1.3546, 1.6807, -0.6368]) tensor([0.2762, 0.0303, 0.6312, 0.0622]) -Greedy action tensor([-0.1823, -0.0499, 1.4245, -0.1077]) tensor([0.1219, 0.1391, 0.6077, 0.1313]) -Greedy action tensor([0.1082, 0.1235, 0.6476, 0.1152]) tensor([0.2111, 0.2143, 0.3620, 0.2126]) -Greedy action tensor([ 0.7982, -3.2320, 0.2821, 0.3223]) tensor([0.4472, 0.0079, 0.2669, 0.2779]) -Greedy action tensor([-0.2677, -0.3806, 0.5544, -1.2374]) tensor([0.2199, 0.1964, 0.5003, 0.0834]) -Greedy action tensor([-0.5545, -0.1531, -1.3756, 0.5286]) tensor([0.1698, 0.2537, 0.0747, 0.5017]) -Greedy action tensor([ 0.8115, 0.2566, 0.2846, -0.3985]) tensor([0.4060, 0.2331, 0.2397, 0.1211]) -Greedy action tensor([-0.6452, -0.9944, 0.9275, -1.4693]) tensor([0.1436, 0.1013, 0.6921, 0.0630]) -Greedy action tensor([1.4448, 0.2811, 0.3879, 0.8984]) tensor([0.4467, 0.1395, 0.1552, 0.2586]) -Greedy action tensor([0.3924, 1.1749, 0.9345, 0.2865]) tensor([0.1722, 0.3767, 0.2962, 0.1549]) -Greedy action tensor([-0.0054, -1.3038, -0.0827, -0.6172]) tensor([0.3648, 0.0996, 0.3377, 0.1979]) -Greedy action tensor([1.2160, 0.2430, 0.9858, 0.2043]) tensor([0.3943, 0.1490, 0.3132, 0.1434]) -Greedy action tensor([ 0.5562, -0.7168, 0.7427, -0.1299]) tensor([0.3346, 0.0937, 0.4032, 0.1685]) -Greedy action tensor([ 0.6660, -0.5886, -0.8466, 0.7665]) tensor([0.3829, 0.1092, 0.0844, 0.4235]) -Greedy action tensor([-0.5151, 0.4282, 0.8546, -0.3444]) tensor([0.1151, 0.2956, 0.4528, 0.1365]) -Greedy action tensor([-0.2919, -1.1985, -1.4068, -0.3217]) tensor([0.3700, 0.1494, 0.1214, 0.3592]) -Greedy action tensor([-0.9824, -0.6025, 1.5874, -0.3725]) tensor([0.0576, 0.0842, 0.7522, 0.1060]) -Greedy action tensor([ 0.6869, -1.1198, -0.7493, 1.3660]) tensor([0.2964, 0.0487, 0.0705, 0.5845]) -Greedy action tensor([ 0.9183, -0.6457, 0.5162, 0.7268]) tensor([0.3698, 0.0774, 0.2474, 0.3054]) -Greedy action tensor([ 0.0026, -0.7859, -0.0024, -0.2932]) tensor([0.3131, 0.1423, 0.3116, 0.2330]) -Greedy action tensor([-1.2949, -0.0611, 0.9993, -1.4764]) tensor([0.0659, 0.2262, 0.6531, 0.0549]) -Greedy action tensor([ 0.1516, -0.9096, 0.3157, -0.4885]) tensor([0.3277, 0.1134, 0.3861, 0.1728]) -Greedy action tensor([-0.4306, -2.3276, -0.3564, 0.3067]) tensor([0.2316, 0.0347, 0.2495, 0.4841]) -Greedy action tensor([-0.5380, -1.1697, 0.7387, 1.0067]) tensor([0.1020, 0.0542, 0.3657, 0.4781]) -Greedy action tensor([-0.2954, 0.5984, 1.8631, -0.3684]) tensor([0.0767, 0.1876, 0.6644, 0.0713]) -Greedy action tensor([-1.4085, -0.4003, -0.0204, 0.2256]) tensor([0.0777, 0.2129, 0.3113, 0.3981]) -Greedy action tensor([ 0.6974, 0.4212, 1.0438, -0.7001]) tensor([0.2924, 0.2218, 0.4135, 0.0723]) -Greedy action tensor([ 0.6972, 0.9315, -0.3473, 1.4463]) tensor([0.2114, 0.2672, 0.0744, 0.4471]) -Greedy action tensor([ 0.6896, 0.2851, -0.5131, -0.1551]) tensor([0.4171, 0.2784, 0.1253, 0.1792]) -Greedy action tensor([-0.6954, -0.4710, 1.5672, 0.4111]) tensor([0.0672, 0.0841, 0.6455, 0.2032]) -Greedy action tensor([ 0.6526, -0.0801, -0.0992, 1.6767]) tensor([0.2111, 0.1015, 0.0995, 0.5879]) -Greedy action tensor([-1.3677, -0.3409, 1.8745, -0.2064]) tensor([0.0307, 0.0857, 0.7855, 0.0980]) -Greedy action tensor([ 0.1610, -0.2821, 0.1110, 0.2895]) tensor([0.2681, 0.1721, 0.2550, 0.3048]) -Greedy action tensor([ 1.5809, -0.4861, 1.1860, 1.2976]) tensor([0.3916, 0.0496, 0.2638, 0.2950]) -Greedy action tensor([ 0.8930, -0.5670, 0.0843, -0.0065]) tensor([0.4797, 0.1114, 0.2137, 0.1952]) -Greedy action tensor([ 0.0255, -2.4381, 0.0307, 0.2888]) tensor([0.2948, 0.0251, 0.2964, 0.3837]) -Greedy action tensor([ 0.7352, -0.9329, -0.0133, 1.0559]) tensor([0.3290, 0.0620, 0.1556, 0.4533]) -Greedy action tensor([ 1.7932, 0.8850, -0.2053, 0.6443]) tensor([0.5389, 0.2173, 0.0730, 0.1708]) -Greedy action tensor([-0.2487, -0.9987, -0.0875, 0.0402]) tensor([0.2511, 0.1186, 0.2951, 0.3352]) -Greedy action tensor([ 1.0366, -0.8253, -0.9573, 0.2098]) tensor([0.5784, 0.0899, 0.0788, 0.2530]) -Greedy action tensor([ 0.2577, 0.8665, -0.0674, -0.3436]) tensor([0.2434, 0.4474, 0.1758, 0.1334]) -Greedy action tensor([ 0.8900, 0.5817, -0.2757, -0.5799]) tensor([0.4393, 0.3227, 0.1369, 0.1010]) -Greedy action tensor([-0.2905, -0.2356, -0.0927, 0.5295]) tensor([0.1803, 0.1905, 0.2198, 0.4094]) -Greedy action tensor([ 0.5364, -0.0785, 0.6518, 0.2069]) tensor([0.2957, 0.1599, 0.3318, 0.2127]) -Greedy action tensor([-1.1173, 0.3837, 0.7550, -0.8375]) tensor([0.0751, 0.3370, 0.4885, 0.0994]) -Greedy action tensor([-0.8014, -1.1134, 0.4834, -1.0603]) tensor([0.1635, 0.1196, 0.5907, 0.1262]) -Greedy action tensor([-0.8726, -0.5799, -0.8739, 0.4950]) tensor([0.1376, 0.1845, 0.1375, 0.5404]) -Greedy action tensor([-1.0374, 0.2941, -1.1163, 1.0019]) tensor([0.0746, 0.2827, 0.0690, 0.5737]) -Greedy action tensor([ 0.0606, -1.1884, -0.8975, 0.3309]) tensor([0.3355, 0.0962, 0.1287, 0.4396]) -Greedy action tensor([ 1.0731, -0.6234, 0.7350, 2.0623]) tensor([0.2181, 0.0400, 0.1555, 0.5864]) -Greedy action tensor([ 0.1113, -0.8105, 1.3727, 0.0591]) tensor([0.1701, 0.0677, 0.6007, 0.1615]) -Greedy action tensor([ 0.6148, -0.4101, -0.0872, -0.1503]) tensor([0.4311, 0.1547, 0.2136, 0.2006]) -Greedy action tensor([ 0.4395, -0.1204, -0.1034, -0.2394]) tensor([0.3760, 0.2148, 0.2185, 0.1907]) -Greedy action tensor([ 0.4334, -0.4859, -0.1479, -0.4402]) tensor([0.4210, 0.1679, 0.2354, 0.1757]) -Greedy action tensor([ 0.5747, -0.2523, -0.0293, -0.1963]) tensor([0.4088, 0.1788, 0.2234, 0.1891]) -Greedy action tensor([ 0.7616, -0.5141, -0.0136, -0.2935]) tensor([0.4789, 0.1337, 0.2206, 0.1667]) -Greedy action tensor([ 0.4943, 0.0509, 0.0133, -0.0886]) tensor([0.3548, 0.2277, 0.2193, 0.1981]) -Greedy action tensor([ 0.9940, -1.1094, -0.0801, -0.6856]) tensor([0.6060, 0.0740, 0.2070, 0.1130]) -Greedy action tensor([ 0.7374, -0.6427, 0.0692, -0.4343]) tensor([0.4822, 0.1213, 0.2472, 0.1494]) -Greedy action tensor([ 0.3560, 0.0082, -0.0453, -0.3508]) tensor([0.3486, 0.2462, 0.2334, 0.1719]) -Greedy action tensor([ 0.2635, -0.3363, 0.0378, -0.3760]) tensor([0.3479, 0.1910, 0.2776, 0.1835]) -Greedy action tensor([ 0.6127, -0.6291, 0.0339, -0.4016]) tensor([0.4521, 0.1306, 0.2534, 0.1639]) -Greedy action tensor([ 0.8675, -0.1768, 0.0794, -0.3663]) tensor([0.4767, 0.1678, 0.2168, 0.1388]) -Greedy action tensor([ 0.8002, -0.6850, -0.0612, -0.5087]) tensor([0.5211, 0.1180, 0.2202, 0.1408]) -Greedy action tensor([ 0.7423, -1.1101, 0.0251, -0.5011]) tensor([0.5172, 0.0811, 0.2525, 0.1492]) -Greedy action tensor([ 0.3145, -0.1606, -0.0470, -0.5106]) tensor([0.3628, 0.2256, 0.2527, 0.1590]) -Greedy action tensor([ 0.8244, -0.6022, -0.0269, -0.3924]) tensor([0.5094, 0.1223, 0.2174, 0.1509]) -Greedy action tensor([ 0.5394, -0.1619, -0.0771, -0.3918]) tensor([0.4115, 0.2041, 0.2222, 0.1622]) -Greedy action tensor([ 0.5328, -0.3763, -0.3865, -0.4318]) tensor([0.4581, 0.1846, 0.1827, 0.1746]) -Greedy action tensor([ 0.7568, -0.2819, 0.0630, -0.0957]) tensor([0.4386, 0.1552, 0.2192, 0.1870]) -Greedy action tensor([ 0.7871, -0.4611, -0.0238, -0.3138]) tensor([0.4845, 0.1391, 0.2153, 0.1611]) -Greedy action tensor([ 0.7565, -0.4067, -0.0451, -0.3520]) tensor([0.4782, 0.1494, 0.2145, 0.1578]) -Greedy action tensor([ 0.6331, -0.2508, 0.1839, -0.4017]) tensor([0.4155, 0.1717, 0.2652, 0.1476]) -Greedy action tensor([ 0.4999, -0.2523, -0.1568, -0.1051]) tensor([0.3943, 0.1859, 0.2045, 0.2153]) -Greedy action tensor([ 0.6308, -0.7307, -0.1120, -0.1701]) tensor([0.4585, 0.1175, 0.2182, 0.2058]) -Greedy action tensor([ 0.4326, -0.1188, -0.0771, -0.1342]) tensor([0.3644, 0.2099, 0.2189, 0.2067]) -Greedy action tensor([ 0.7132, -0.5945, 0.0430, -0.2268]) tensor([0.4603, 0.1245, 0.2355, 0.1798]) -Greedy action tensor([ 0.7948, -0.0561, 0.0200, -0.4220]) tensor([0.4579, 0.1955, 0.2110, 0.1356]) -Greedy action tensor([ 0.4907, -0.2462, -0.1036, -0.4587]) tensor([0.4136, 0.1980, 0.2283, 0.1601]) -Greedy action tensor([ 0.4202, -0.1617, 0.0202, -0.1482]) tensor([0.3577, 0.1999, 0.2398, 0.2026]) -Greedy action tensor([ 0.5519, -0.1459, -0.1122, -0.3777]) tensor([0.4154, 0.2068, 0.2138, 0.1640]) -Greedy action tensor([ 0.5108, -0.1090, -0.0723, -0.4798]) tensor([0.4053, 0.2180, 0.2262, 0.1505]) -Greedy action tensor([ 0.5744, -0.2305, -0.0124, -0.2360]) tensor([0.4085, 0.1827, 0.2272, 0.1817]) -Greedy action tensor([ 0.9617, -0.4228, 0.0410, -0.4397]) tensor([0.5277, 0.1322, 0.2102, 0.1300]) -Greedy action tensor([ 0.3575, -0.1268, -0.0184, -0.2126]) tensor([0.3486, 0.2148, 0.2394, 0.1971]) -Greedy action tensor([ 0.5405, -0.3007, -0.0927, -0.1658]) tensor([0.4072, 0.1756, 0.2162, 0.2010]) -Greedy action tensor([ 0.0921, 0.1572, 0.0277, -0.0884]) tensor([0.2604, 0.2779, 0.2442, 0.2174]) -Greedy action tensor([ 0.3653, -0.2084, -0.1870, -0.1433]) tensor([0.3649, 0.2056, 0.2100, 0.2194]) -Greedy action tensor([ 0.4795, -0.0262, 0.0041, -0.0324]) tensor([0.3541, 0.2136, 0.2201, 0.2122]) -Greedy action tensor([ 0.7185, -0.4925, -0.0044, -0.0436]) tensor([0.4445, 0.1324, 0.2157, 0.2074]) -Greedy action tensor([ 0.6184, -0.5578, 0.0303, -0.3521]) tensor([0.4459, 0.1375, 0.2476, 0.1689]) -Greedy action tensor([ 0.9466, -0.5558, -0.0541, -0.4247]) tensor([0.5423, 0.1207, 0.1994, 0.1376]) -Greedy action tensor([ 0.5576, 0.0873, -0.0662, -0.1166]) tensor([0.3745, 0.2340, 0.2007, 0.1908]) -Greedy action tensor([ 0.6156, -0.3136, -0.2007, -0.2854]) tensor([0.4458, 0.1760, 0.1971, 0.1811]) -Greedy action tensor([ 0.8819, -0.3677, -0.0248, -0.7373]) tensor([0.5295, 0.1518, 0.2138, 0.1049]) -Greedy action tensor([ 1.0592, -0.8546, -0.0837, -0.5942]) tensor([0.6032, 0.0890, 0.1924, 0.1155]) -Greedy action tensor([ 0.9927, -0.4706, -0.0748, -0.3085]) tensor([0.5413, 0.1253, 0.1861, 0.1473]) -Greedy action tensor([ 0.6895, -0.4225, -0.1133, -0.1260]) tensor([0.4506, 0.1482, 0.2019, 0.1993]) -Greedy action tensor([ 0.8277, -0.2241, -0.0092, -0.2559]) tensor([0.4715, 0.1647, 0.2042, 0.1596]) -Greedy action tensor([ 0.8558, -0.7797, 0.0119, -0.3795]) tensor([0.5220, 0.1017, 0.2245, 0.1518]) -Greedy action tensor([ 0.9452, -0.6001, -0.0464, -0.3953]) tensor([0.5417, 0.1155, 0.2010, 0.1418]) -Greedy action tensor([ 0.8621, -0.3372, -0.0730, -0.2618]) tensor([0.4953, 0.1493, 0.1944, 0.1610]) -Greedy action tensor([ 0.6237, -0.5018, 0.0196, -0.4293]) tensor([0.4505, 0.1462, 0.2462, 0.1572]) -Greedy action tensor([ 0.7016, -0.2729, 0.1363, -0.6399]) tensor([0.4531, 0.1710, 0.2575, 0.1185]) -Greedy action tensor([ 0.8299, -0.7738, 0.0822, -0.2527]) tensor([0.4967, 0.0999, 0.2352, 0.1682]) -Greedy action tensor([ 0.8935, -0.7815, 0.0624, -0.6157]) tensor([0.5423, 0.1016, 0.2362, 0.1199]) -Greedy action tensor([ 0.9372, -0.4303, -0.1928, -0.5816]) tensor([0.5566, 0.1418, 0.1798, 0.1219]) -Greedy action tensor([ 0.7699, -0.5344, 0.0514, -0.3209]) tensor([0.4774, 0.1295, 0.2327, 0.1604]) -Greedy action tensor([ 0.1609, 0.1786, -0.0998, -0.3585]) tensor([0.2956, 0.3009, 0.2277, 0.1758]) -Greedy action tensor([ 0.5897, -0.5546, -0.0661, -0.5440]) tensor([0.4631, 0.1475, 0.2404, 0.1490]) -Greedy action tensor([ 0.5678, -0.5960, -0.2167, -0.7465]) tensor([0.4908, 0.1533, 0.2240, 0.1319]) -Greedy action tensor([ 0.6544, -0.1217, -0.0392, -0.3791]) tensor([0.4318, 0.1987, 0.2158, 0.1536]) -Greedy action tensor([ 0.8254, -0.2555, 0.0347, -0.3365]) tensor([0.4749, 0.1611, 0.2154, 0.1486]) -Greedy action tensor([ 0.9503, -0.2535, 0.1355, -0.3124]) tensor([0.4937, 0.1481, 0.2186, 0.1396]) -Greedy action tensor([ 0.7708, -0.3453, -0.0402, -0.1967]) tensor([0.4647, 0.1522, 0.2065, 0.1766]) -Greedy action tensor([ 0.7340, -0.4299, 0.0960, -0.3970]) tensor([0.4623, 0.1443, 0.2442, 0.1492]) -Greedy action tensor([ 0.9438, -0.6637, 0.0135, -0.4934]) tensor([0.5457, 0.1094, 0.2153, 0.1297]) -Greedy action tensor([ 0.9957, -0.8669, -0.1222, -0.3620]) tensor([0.5749, 0.0893, 0.1880, 0.1479]) -Greedy action tensor([ 0.8110, -0.6520, -0.0371, -0.4443]) tensor([0.5142, 0.1191, 0.2202, 0.1466]) -Greedy action tensor([ 0.3467, -0.0433, -0.0651, -0.0849]) tensor([0.3346, 0.2265, 0.2216, 0.2173]) -Greedy action tensor([ 0.4042, 0.4254, -0.1597, 0.0075]) tensor([0.3065, 0.3130, 0.1744, 0.2061]) -Greedy action tensor([ 0.7811, -0.5631, -0.0874, -0.4593]) tensor([0.5077, 0.1324, 0.2130, 0.1469]) -Greedy action tensor([ 0.2784, -0.5136, -0.1853, -0.2502]) tensor([0.3744, 0.1696, 0.2354, 0.2206]) -Greedy action tensor([ 0.5628, -0.1610, 0.1025, -0.2302]) tensor([0.3893, 0.1888, 0.2457, 0.1762]) -Greedy action tensor([ 0.7631, -0.2440, -0.0595, -0.1067]) tensor([0.4497, 0.1643, 0.1976, 0.1884]) -Greedy action tensor([ 0.2408, -0.2496, -0.0988, -0.1303]) tensor([0.3317, 0.2031, 0.2362, 0.2289]) -Greedy action tensor([ 0.4553, 0.1021, 0.1038, -0.2911]) tensor([0.3472, 0.2439, 0.2443, 0.1646]) -Greedy action tensor([ 1.0008, -0.4462, -0.1795, -0.3207]) tensor([0.5527, 0.1300, 0.1698, 0.1474]) -Greedy action tensor([ 0.8365, -1.1079, -0.0518, -0.5746]) tensor([0.5561, 0.0796, 0.2288, 0.1356]) -Greedy action tensor([ 0.3600, 0.1642, -0.0873, -0.2060]) tensor([0.3301, 0.2714, 0.2111, 0.1874]) -Greedy action tensor([ 0.6731, -0.2684, -0.0284, -0.4269]) tensor([0.4507, 0.1758, 0.2235, 0.1500]) -Greedy action tensor([ 0.3164, 0.2242, 0.0484, -0.0003]) tensor([0.2937, 0.2678, 0.2246, 0.2139]) -Greedy action tensor([ 1.3505, -0.1609, -0.5316, 0.3597]) tensor([0.5733, 0.1265, 0.0873, 0.2129]) -Greedy action tensor([ 2.1753, -0.4641, -0.5849, 0.6927]) tensor([0.7344, 0.0524, 0.0465, 0.1667]) -Greedy action tensor([ 1.2500, -0.0649, 0.0293, 0.1860]) tensor([0.5239, 0.1407, 0.1546, 0.1808]) -Greedy action tensor([ 1.2465, -0.2191, -0.3672, 0.2866]) tensor([0.5516, 0.1274, 0.1098, 0.2112]) -Greedy action tensor([ 1.1780, -0.4712, -0.1731, 0.1569]) tensor([0.5521, 0.1061, 0.1430, 0.1988]) -Greedy action tensor([ 0.4790, 0.0093, 0.2507, -0.1309]) tensor([0.3373, 0.2109, 0.2685, 0.1833]) -Greedy action tensor([ 1.3914, -0.6201, -0.5266, 0.2484]) tensor([0.6252, 0.0836, 0.0918, 0.1993]) -Greedy action tensor([ 1.9502, -0.6918, 0.1746, 0.5283]) tensor([0.6748, 0.0481, 0.1143, 0.1628]) -Greedy action tensor([ 1.6999, -0.3785, -0.7915, 0.3265]) tensor([0.6844, 0.0856, 0.0567, 0.1733]) -Greedy action tensor([ 0.8211, -0.7169, -0.1038, 0.0710]) tensor([0.4799, 0.1031, 0.1903, 0.2267]) -Greedy action tensor([ 2.1566, 0.0311, -0.4739, 0.3184]) tensor([0.7405, 0.0884, 0.0533, 0.1178]) -Greedy action tensor([ 1.4214, -0.5933, -0.5554, 0.9331]) tensor([0.5304, 0.0707, 0.0735, 0.3255]) -Greedy action tensor([ 2.1476, -0.0044, -0.2232, 0.7572]) tensor([0.6856, 0.0797, 0.0640, 0.1707]) -Greedy action tensor([ 1.8183, -0.9549, -0.3453, 0.6701]) tensor([0.6691, 0.0418, 0.0769, 0.2122]) -Greedy action tensor([ 1.1902, -0.3327, -0.2046, 0.4668]) tensor([0.5125, 0.1118, 0.1271, 0.2486]) -Greedy action tensor([ 1.6577, -0.5614, -0.4542, 0.4743]) tensor([0.6510, 0.0708, 0.0788, 0.1994]) -Greedy action tensor([ 1.4545, -0.6410, -0.3562, 0.4240]) tensor([0.6085, 0.0749, 0.0995, 0.2171]) -Greedy action tensor([ 1.6427, -0.4781, -0.5029, 0.6011]) tensor([0.6290, 0.0754, 0.0736, 0.2220]) -Greedy action tensor([ 2.0707, -0.5958, -0.6156, 0.3774]) tensor([0.7567, 0.0526, 0.0516, 0.1392]) -Greedy action tensor([2.0633, 0.1570, 0.0305, 0.3134]) tensor([0.6880, 0.1023, 0.0901, 0.1196]) -Greedy action tensor([ 2.0682, 0.5518, -0.3824, -0.8541]) tensor([0.7356, 0.1614, 0.0634, 0.0396]) -Greedy action tensor([ 1.7111, -0.5961, -0.5539, 0.6102]) tensor([0.6511, 0.0648, 0.0676, 0.2165]) -Greedy action tensor([ 0.7635, -0.0377, -0.0511, 0.2767]) tensor([0.3990, 0.1791, 0.1767, 0.2452]) -Greedy action tensor([ 1.7852, -0.1007, -1.0310, 0.4401]) tensor([0.6793, 0.1031, 0.0406, 0.1770]) -Greedy action tensor([ 2.0223, -0.9131, -0.7868, 0.7594]) tensor([0.7162, 0.0380, 0.0432, 0.2026]) -Greedy action tensor([ 1.1541, -0.1483, -0.2307, 0.4417]) tensor([0.4968, 0.1351, 0.1244, 0.2437]) -Greedy action tensor([ 0.4352, -0.7195, 0.0150, 0.2582]) tensor([0.3559, 0.1122, 0.2338, 0.2982]) -Greedy action tensor([ 0.9968, -0.2441, -0.2693, 0.3717]) tensor([0.4748, 0.1373, 0.1339, 0.2541]) -Greedy action tensor([ 1.2559, -0.0660, -0.8354, 0.3556]) tensor([0.5566, 0.1484, 0.0688, 0.2262]) -Greedy action tensor([ 0.9790, -0.5098, -0.1315, 0.0632]) tensor([0.5114, 0.1154, 0.1685, 0.2047]) -Greedy action tensor([ 1.1409, -0.5287, -0.1261, 0.3527]) tensor([0.5196, 0.0978, 0.1463, 0.2362]) -Greedy action tensor([ 1.6347, -0.8070, -0.2133, 0.4210]) tensor([0.6487, 0.0564, 0.1022, 0.1927]) -Greedy action tensor([ 1.0934, -0.2191, -0.1579, -0.3638]) tensor([0.5592, 0.1505, 0.1600, 0.1302]) -Greedy action tensor([ 1.7155, -0.7019, -0.3509, 0.3278]) tensor([0.6824, 0.0608, 0.0864, 0.1704]) -Greedy action tensor([ 1.4548, -0.4545, -0.3220, 0.3031]) tensor([0.6122, 0.0907, 0.1036, 0.1935]) -Greedy action tensor([ 1.0360, -0.2619, -0.0714, 0.1879]) tensor([0.4922, 0.1344, 0.1626, 0.2108]) -Greedy action tensor([ 1.7869, -0.6938, -0.2966, 0.4229]) tensor([0.6832, 0.0572, 0.0850, 0.1746]) -Greedy action tensor([ 1.3460, -0.5291, -0.4816, 0.8720]) tensor([0.5164, 0.0792, 0.0830, 0.3214]) -Greedy action tensor([ 1.4004, 0.1245, -0.5707, 0.6133]) tensor([0.5337, 0.1490, 0.0743, 0.2429]) -Greedy action tensor([ 1.3498, -0.7990, -0.4060, 0.8463]) tensor([0.5280, 0.0616, 0.0912, 0.3192]) -Greedy action tensor([ 1.7007, -0.5375, -0.0651, 0.6742]) tensor([0.6113, 0.0652, 0.1046, 0.2190]) -Greedy action tensor([ 1.1897, -0.1780, -0.4362, 0.0701]) tensor([0.5625, 0.1433, 0.1107, 0.1836]) -Greedy action tensor([ 2.0221, -0.9119, -0.3615, 0.6630]) tensor([0.7131, 0.0379, 0.0658, 0.1832]) -Greedy action tensor([ 2.0161, -0.7437, -0.3576, 0.3826]) tensor([0.7398, 0.0468, 0.0689, 0.1444]) -Greedy action tensor([ 0.6013, -0.3129, -0.0433, 0.2039]) tensor([0.3849, 0.1543, 0.2020, 0.2587]) -Greedy action tensor([ 1.0278, -0.4102, -0.6098, 0.5993]) tensor([0.4800, 0.1140, 0.0933, 0.3127]) -Greedy action tensor([ 1.4354, -0.5072, -0.3821, 0.4296]) tensor([0.5983, 0.0857, 0.0972, 0.2188]) -Greedy action tensor([ 1.7787, -0.1400, -0.1999, 0.0751]) tensor([0.6816, 0.1001, 0.0942, 0.1241]) -Greedy action tensor([ 1.2953, -0.4678, -0.3458, 0.3927]) tensor([0.5647, 0.0969, 0.1094, 0.2290]) -Greedy action tensor([ 0.9097, -0.7675, 0.0974, 0.3090]) tensor([0.4589, 0.0858, 0.2037, 0.2517]) -Greedy action tensor([ 1.8227, -1.1516, -0.1014, 0.3202]) tensor([0.7044, 0.0360, 0.1029, 0.1568]) -Greedy action tensor([ 1.7270, -0.5781, -0.3359, 0.5059]) tensor([0.6571, 0.0656, 0.0835, 0.1938]) -Greedy action tensor([ 1.2790, -0.4938, -0.3055, 0.5401]) tensor([0.5398, 0.0917, 0.1107, 0.2578]) -Greedy action tensor([ 1.3588, -1.0978, -0.2817, 0.0290]) tensor([0.6476, 0.0555, 0.1256, 0.1713]) -Greedy action tensor([ 1.3608, -0.3179, -0.0888, 0.1852]) tensor([0.5781, 0.1079, 0.1357, 0.1784]) -Greedy action tensor([ 1.2435, -0.4868, 0.0153, 0.1072]) tensor([0.5583, 0.0989, 0.1635, 0.1792]) -Greedy action tensor([ 0.4290, -0.2312, -0.4912, 0.1093]) tensor([0.3786, 0.1956, 0.1508, 0.2750]) -Greedy action tensor([ 1.2292, -0.2354, -0.0139, 0.0402]) tensor([0.5482, 0.1267, 0.1581, 0.1669]) -Greedy action tensor([ 1.0969, -0.5774, -0.2210, -0.3630]) tensor([0.5926, 0.1111, 0.1586, 0.1376]) -Greedy action tensor([ 0.9217, -0.2843, -0.1510, 0.3939]) tensor([0.4481, 0.1342, 0.1533, 0.2644]) -Greedy action tensor([ 1.3832, -0.2699, -0.3569, 0.5941]) tensor([0.5491, 0.1051, 0.0964, 0.2494]) -Greedy action tensor([ 0.2763, -0.2907, -0.1421, 0.3049]) tensor([0.3073, 0.1743, 0.2022, 0.3162]) -Greedy action tensor([ 1.1670, -0.6328, -0.1467, 0.2143]) tensor([0.5495, 0.0908, 0.1477, 0.2119]) -Greedy action tensor([ 1.4331, -0.9424, 0.1349, 0.1394]) tensor([0.6097, 0.0567, 0.1665, 0.1672]) -Greedy action tensor([ 1.3426, -0.0218, -0.2962, 0.3633]) tensor([0.5479, 0.1400, 0.1064, 0.2058]) -Greedy action tensor([ 1.5785, -0.6061, -0.5234, 0.6128]) tensor([0.6190, 0.0696, 0.0757, 0.2357]) -Greedy action tensor([ 5.9193e-01, -3.4271e-01, 1.0383e-04, -1.0822e-01]) tensor([0.4094, 0.1608, 0.2265, 0.2033]) -Greedy action tensor([ 1.1554, -0.1145, -0.2516, 0.3501]) tensor([0.5069, 0.1424, 0.1241, 0.2266]) -Greedy action tensor([ 0.8543, -0.2183, -0.2175, 0.3645]) tensor([0.4353, 0.1489, 0.1490, 0.2667]) -Greedy action tensor([ 0.7450, -0.2014, 0.2277, -0.0321]) tensor([0.4092, 0.1588, 0.2439, 0.1881]) -Greedy action tensor([ 1.4712, -0.5449, -0.2164, 0.3595]) tensor([0.6071, 0.0808, 0.1123, 0.1997]) -Greedy action tensor([ 1.0088, -0.0969, -0.2168, 0.0062]) tensor([0.5021, 0.1662, 0.1474, 0.1842]) -Greedy action tensor([ 1.5299, -0.4674, -0.8178, 0.4936]) tensor([0.6305, 0.0856, 0.0603, 0.2237]) -Greedy action tensor([ 0.7247, -0.1936, -0.0471, 0.1814]) tensor([0.4095, 0.1635, 0.1893, 0.2378]) -Greedy action tensor([ 1.2199, -0.3054, -0.4093, 0.1751]) tensor([0.5665, 0.1232, 0.1111, 0.1992]) -Greedy action tensor([ 0.8154, -0.2467, -0.0492, 0.0529]) tensor([0.4477, 0.1548, 0.1886, 0.2089]) -Greedy action tensor([ 0.7727, -0.1117, 0.0576, 0.0358]) tensor([0.4200, 0.1735, 0.2055, 0.2010]) -Greedy action tensor([ 1.4321, -0.0479, -0.5110, 0.5694]) tensor([0.5578, 0.1270, 0.0799, 0.2354]) -Greedy action tensor([ 1.8503, -0.5233, -0.3247, 0.3872]) tensor([0.6953, 0.0648, 0.0790, 0.1610]) -Greedy action tensor([ 1.8754, -0.7231, -0.4891, 0.4486]) tensor([0.7100, 0.0528, 0.0667, 0.1705]) -Greedy action tensor([ 0.9592, -0.6799, 0.2222, 0.2376]) tensor([0.4633, 0.0899, 0.2217, 0.2251]) -Greedy action tensor([-0.2769, -2.5456, 0.0056, 0.5328]) tensor([0.2138, 0.0221, 0.2836, 0.4805]) -Greedy action tensor([ 1.2385, -0.5591, 1.1646, -0.0704]) tensor([0.4229, 0.0701, 0.3928, 0.1142]) -Greedy action tensor([ 0.0534, 0.0452, 1.9081, -0.9287]) tensor([0.1142, 0.1133, 0.7297, 0.0428]) -Greedy action tensor([ 0.2148, -0.3892, 0.8194, 1.3147]) tensor([0.1567, 0.0857, 0.2869, 0.4707]) -Greedy action tensor([-0.8965, -0.5270, -0.5417, 1.2786]) tensor([0.0789, 0.1142, 0.1125, 0.6945]) -Greedy action tensor([ 0.7051, -0.3212, -0.4183, -0.1590]) tensor([0.4751, 0.1702, 0.1545, 0.2002]) -Greedy action tensor([ 0.2795, -1.5232, -0.6291, 0.4766]) tensor([0.3590, 0.0592, 0.1447, 0.4372]) -Greedy action tensor([-0.0249, -0.0878, -0.0425, 0.9081]) tensor([0.1830, 0.1719, 0.1798, 0.4653]) -Greedy action tensor([-0.2776, -0.6853, 0.2280, -0.5816]) tensor([0.2462, 0.1638, 0.4083, 0.1817]) -Greedy action tensor([ 1.7824, -0.1350, 1.2611, 0.3825]) tensor([0.5032, 0.0740, 0.2988, 0.1241]) -Greedy action tensor([-0.1140, -0.9693, 0.7500, -0.0814]) tensor([0.2070, 0.0880, 0.4911, 0.2139]) -Greedy action tensor([ 0.0358, 0.1722, 0.0618, -0.0257]) tensor([0.2431, 0.2787, 0.2496, 0.2286]) -Greedy action tensor([-0.3042, -1.2547, 0.5592, -0.0282]) tensor([0.1970, 0.0762, 0.4672, 0.2596]) -Greedy action tensor([ 2.1238, -0.1334, 1.0014, -0.0892]) tensor([0.6495, 0.0680, 0.2114, 0.0710]) -Greedy action tensor([ 1.5653, -0.5615, 0.7787, 0.5664]) tensor([0.5147, 0.0614, 0.2344, 0.1896]) -Greedy action tensor([-1.3078, -1.1685, 0.5429, -0.1306]) tensor([0.0850, 0.0978, 0.5412, 0.2760]) -Greedy action tensor([ 0.4596, 0.8721, 0.2482, -0.6406]) tensor([0.2738, 0.4135, 0.2216, 0.0911]) -Greedy action tensor([-0.3345, -2.1078, 0.7240, 0.2755]) tensor([0.1697, 0.0288, 0.4891, 0.3123]) -Greedy action tensor([ 0.6726, -0.3487, 0.6063, 0.9556]) tensor([0.2760, 0.0994, 0.2583, 0.3663]) -Greedy action tensor([ 1.4432, 0.5102, 0.9840, -0.5053]) tensor([0.4613, 0.1815, 0.2915, 0.0657]) -Greedy action tensor([ 0.0369, -0.7269, 1.2320, 0.3898]) tensor([0.1615, 0.0752, 0.5335, 0.2298]) -Greedy action tensor([ 1.3088, -0.7446, 1.2544, 0.6100]) tensor([0.3887, 0.0499, 0.3681, 0.1933]) -Greedy action tensor([-0.4014, -0.5295, -0.2440, -1.0187]) tensor([0.2786, 0.2451, 0.3261, 0.1503]) -Greedy action tensor([ 1.2471, -1.4668, 0.8298, 0.6579]) tensor([0.4386, 0.0291, 0.2890, 0.2433]) -Greedy action tensor([ 0.3781, -1.8082, 0.4454, 0.4338]) tensor([0.3087, 0.0347, 0.3302, 0.3264]) -Greedy action tensor([-1.4263, -0.3169, 0.9103, 0.1916]) tensor([0.0515, 0.1561, 0.5327, 0.2596]) -Greedy action tensor([-0.4422, -1.1373, -0.8228, 0.3895]) tensor([0.2232, 0.1114, 0.1526, 0.5128]) -Greedy action tensor([ 0.4098, 0.1571, 0.5277, -0.1264]) tensor([0.2868, 0.2228, 0.3227, 0.1678]) -Greedy action tensor([ 1.1281, 0.3212, -0.1712, 0.6747]) tensor([0.4247, 0.1895, 0.1158, 0.2699]) -Greedy action tensor([ 0.5989, -0.4349, -0.4223, 1.6367]) tensor([0.2203, 0.0784, 0.0794, 0.6220]) -Greedy action tensor([ 0.8625, -1.6118, 0.5444, 0.0587]) tensor([0.4426, 0.0373, 0.3220, 0.1981]) -Greedy action tensor([ 1.0104, -0.9523, 0.6494, 0.0164]) tensor([0.4530, 0.0636, 0.3157, 0.1677]) -Greedy action tensor([-0.5458, -0.6011, -0.1010, 0.5846]) tensor([0.1514, 0.1433, 0.2363, 0.4690]) -Greedy action tensor([-0.0443, -0.7224, 0.7324, -0.1763]) tensor([0.2194, 0.1114, 0.4770, 0.1923]) -Greedy action tensor([ 0.1739, -1.6448, -0.0970, 0.5398]) tensor([0.2970, 0.0482, 0.2265, 0.4283]) -Greedy action tensor([-0.4591, -2.1980, -0.1438, 0.1475]) tensor([0.2283, 0.0401, 0.3129, 0.4187]) -Greedy action tensor([ 0.8941, -0.4103, 0.7607, -0.2703]) tensor([0.4067, 0.1104, 0.3560, 0.1269]) -Greedy action tensor([ 1.2980, -1.4196, -0.1306, 1.2605]) tensor([0.4407, 0.0291, 0.1056, 0.4245]) -Greedy action tensor([-0.1574, -1.2375, 0.5475, 0.0180]) tensor([0.2195, 0.0745, 0.4443, 0.2616]) -Greedy action tensor([ 1.0477, 0.3868, 1.2008, -0.4864]) tensor([0.3451, 0.1782, 0.4022, 0.0744]) -Greedy action tensor([-1.7390, -0.0041, -0.0257, -0.1531]) tensor([0.0585, 0.3315, 0.3244, 0.2856]) -Greedy action tensor([-0.1799, -0.5488, -0.4539, 1.2501]) tensor([0.1508, 0.1043, 0.1147, 0.6302]) -Greedy action tensor([ 0.6217, -0.9888, -0.1219, 0.4495]) tensor([0.3973, 0.0794, 0.1889, 0.3344]) -Greedy action tensor([ 0.1545, -0.8818, 0.8495, -0.2424]) tensor([0.2481, 0.0880, 0.4971, 0.1668]) -Greedy action tensor([-0.7167, -1.8469, 0.9195, 0.3013]) tensor([0.1084, 0.0350, 0.5566, 0.3000]) -Greedy action tensor([-1.1857, -1.0683, 0.4733, -0.7475]) tensor([0.1120, 0.1260, 0.5885, 0.1736]) -Greedy action tensor([ 0.4482, -0.5213, 1.0546, -0.1261]) tensor([0.2648, 0.1004, 0.4856, 0.1491]) -Greedy action tensor([ 2.1131, -1.2185, 0.8713, 1.1804]) tensor([0.5821, 0.0208, 0.1681, 0.2290]) -Greedy action tensor([ 1.4189, -0.7353, 0.8154, 0.8914]) tensor([0.4439, 0.0515, 0.2427, 0.2619]) -Greedy action tensor([-0.3487, 0.4068, 0.7345, 0.4509]) tensor([0.1204, 0.2562, 0.3556, 0.2678]) -Greedy action tensor([0.6252, 0.8968, 0.2681, 0.1110]) tensor([0.2770, 0.3635, 0.1938, 0.1657]) -Greedy action tensor([-0.7219, 0.0886, 1.4756, -0.3324]) tensor([0.0728, 0.1638, 0.6558, 0.1075]) -Greedy action tensor([-0.7381, -1.4572, 0.4335, -0.5310]) tensor([0.1682, 0.0820, 0.5429, 0.2069]) -Greedy action tensor([-0.1180, -1.2590, 0.0973, 0.5170]) tensor([0.2249, 0.0719, 0.2789, 0.4244]) -Greedy action tensor([-0.1238, -2.7825, 0.0284, -0.0089]) tensor([0.2980, 0.0209, 0.3469, 0.3342]) -Greedy action tensor([ 1.0580, -1.5361, 1.0229, -0.2432]) tensor([0.4324, 0.0323, 0.4175, 0.1177]) -Greedy action tensor([ 0.8042, -1.1940, 1.3147, 0.4523]) tensor([0.2853, 0.0387, 0.4753, 0.2007]) -Greedy action tensor([ 0.9918, -0.3188, 0.7154, -0.4207]) tensor([0.4402, 0.1187, 0.3339, 0.1072]) -Greedy action tensor([ 1.2321, -0.2266, 0.7582, 0.0924]) tensor([0.4598, 0.1069, 0.2862, 0.1471]) -Greedy action tensor([-0.8207, -1.0728, 1.7507, -0.6235]) tensor([0.0622, 0.0483, 0.8137, 0.0757]) -Greedy action tensor([ 0.5267, 0.7972, 1.8644, -0.5602]) tensor([0.1548, 0.2030, 0.5900, 0.0522]) -Greedy action tensor([-0.3651, -1.0294, -0.0712, 0.8840]) tensor([0.1576, 0.0811, 0.2115, 0.5497]) -Greedy action tensor([-0.6351, -0.9623, -0.4813, -0.2963]) tensor([0.2331, 0.1680, 0.2718, 0.3271]) -Greedy action tensor([-1.5679, -0.8980, 0.6501, 0.4355]) tensor([0.0511, 0.0999, 0.4698, 0.3791]) -Greedy action tensor([ 0.5130, 0.6482, -0.6478, 0.3678]) tensor([0.3009, 0.3445, 0.0943, 0.2603]) -Greedy action tensor([ 7.0113e-01, -1.9895e-01, -2.4522e-04, 2.3170e-01]) tensor([0.3956, 0.1608, 0.1962, 0.2474]) -Greedy action tensor([ 0.4205, -0.1925, -0.9028, 0.2272]) tensor([0.3799, 0.2058, 0.1012, 0.3131]) -Greedy action tensor([ 0.8974, -0.0235, 1.1909, 0.2579]) tensor([0.3061, 0.1219, 0.4105, 0.1615]) -Greedy action tensor([ 1.9355, -0.7652, 1.0473, 0.7965]) tensor([0.5560, 0.0373, 0.2287, 0.1780]) -Greedy action tensor([-0.5579, -0.1612, 1.2451, -1.3059]) tensor([0.1108, 0.1647, 0.6721, 0.0524]) -Greedy action tensor([ 0.0339, -1.7043, 0.9023, 0.4621]) tensor([0.1963, 0.0345, 0.4679, 0.3013]) -Greedy action tensor([-0.3774, -0.7306, -0.0599, -0.2487]) tensor([0.2373, 0.1667, 0.3260, 0.2699]) -Greedy action tensor([ 0.4411, -1.9701, 0.5097, 0.3964]) tensor([0.3208, 0.0288, 0.3436, 0.3068]) -Greedy action tensor([-0.2752, -1.3633, -0.7535, 0.3893]) tensor([0.2564, 0.0864, 0.1589, 0.4983]) -Greedy action tensor([ 0.8877, -0.9022, 1.4492, 0.8796]) tensor([0.2556, 0.0427, 0.4482, 0.2536]) -Greedy action tensor([1.2306, 0.5028, 0.0858, 0.2411]) tensor([0.4602, 0.2223, 0.1465, 0.1711]) -Greedy action tensor([ 1.0399, -1.5501, 0.7843, -0.1652]) tensor([0.4653, 0.0349, 0.3604, 0.1394]) -Greedy action tensor([ 1.1421, -0.6947, 0.8616, -0.0808]) tensor([0.4527, 0.0721, 0.3420, 0.1332]) -Greedy action tensor([ 1.4853, -1.0262, 1.0460, -0.4578]) tensor([0.5351, 0.0434, 0.3449, 0.0767]) -Greedy action tensor([-0.1246, -1.6384, 0.3784, 0.9627]) tensor([0.1712, 0.0377, 0.2832, 0.5079]) -Greedy action tensor([-0.3040, -0.2459, 1.4504, 0.2300]) tensor([0.1048, 0.1110, 0.6055, 0.1787]) -Greedy action tensor([-1.7378, -0.1727, 0.5367, -0.1214]) tensor([0.0487, 0.2329, 0.4734, 0.2451]) -Greedy action tensor([-1.7621, -0.4097, 0.5374, -0.0789]) tensor([0.0495, 0.1912, 0.4931, 0.2662]) -Greedy action tensor([-1.8557, -0.4510, 0.6284, -0.1296]) tensor([0.0441, 0.1796, 0.5286, 0.2477]) -Greedy action tensor([-0.8394, 0.0190, 0.3026, -0.4559]) tensor([0.1256, 0.2964, 0.3936, 0.1843]) -Greedy action tensor([-1.9225, -0.3800, 0.6430, -0.1673]) tensor([0.0409, 0.1911, 0.5316, 0.2364]) -Greedy action tensor([-0.6897, -0.5788, 0.1955, 0.3109]) tensor([0.1377, 0.1539, 0.3338, 0.3746]) -Greedy action tensor([-2.0574, -0.7665, 0.7348, 0.2434]) tensor([0.0323, 0.1175, 0.5275, 0.3227]) -Greedy action tensor([-0.7383, 0.9917, 0.1201, 0.6382]) tensor([0.0772, 0.4352, 0.1820, 0.3056]) -Greedy action tensor([-1.1694, -0.5791, 0.6086, 0.9569]) tensor([0.0585, 0.1055, 0.3460, 0.4901]) -Greedy action tensor([-1.4286, -0.7826, -0.2834, -0.5519]) tensor([0.1183, 0.2257, 0.3718, 0.2842]) -Greedy action tensor([-1.8439, -0.3276, 0.6260, -0.0693]) tensor([0.0430, 0.1957, 0.5079, 0.2534]) -Greedy action tensor([-1.9062, -0.4275, 0.6447, -0.1569]) tensor([0.0417, 0.1831, 0.5351, 0.2400]) -Greedy action tensor([-1.8621, -0.4567, 0.6420, -0.1087]) tensor([0.0433, 0.1766, 0.5299, 0.2501]) -Greedy action tensor([-1.6390, -0.4884, 0.6804, 0.2927]) tensor([0.0471, 0.1488, 0.4790, 0.3251]) -Greedy action tensor([-1.7560, 0.0430, 0.5066, -0.0735]) tensor([0.0454, 0.2743, 0.4361, 0.2441]) -Greedy action tensor([-1.9375, -0.4235, 0.6556, -0.1742]) tensor([0.0404, 0.1836, 0.5403, 0.2357]) -Greedy action tensor([-1.9300, -0.4203, 0.6594, -0.1683]) tensor([0.0405, 0.1834, 0.5400, 0.2360]) -Greedy action tensor([-0.7936, -0.6055, 0.1487, 0.2916]) tensor([0.1293, 0.1561, 0.3318, 0.3828]) -Greedy action tensor([-1.7451, -0.2688, 0.5478, -0.0581]) tensor([0.0483, 0.2116, 0.4788, 0.2612]) -Greedy action tensor([-1.2679, 0.5841, 0.3368, -0.2309]) tensor([0.0659, 0.4201, 0.3280, 0.1860]) -Greedy action tensor([-1.1806, 0.3408, 0.3911, 0.8507]) tensor([0.0555, 0.2541, 0.2672, 0.4232]) -Greedy action tensor([-1.6018, -0.4825, 0.5029, 0.0895]) tensor([0.0565, 0.1731, 0.4637, 0.3067]) -Greedy action tensor([-1.2845, -0.3132, 0.3584, -0.0058]) tensor([0.0806, 0.2130, 0.4168, 0.2896]) -Greedy action tensor([-1.8275, -0.4736, 0.6431, -0.0372]) tensor([0.0441, 0.1706, 0.5213, 0.2640]) -Greedy action tensor([-1.3879, -0.6934, 0.3275, 0.1223]) tensor([0.0764, 0.1530, 0.4247, 0.3459]) -Greedy action tensor([-1.7377, -0.3336, 0.5764, -0.0283]) tensor([0.0483, 0.1966, 0.4884, 0.2668]) -Greedy action tensor([-1.7980, -0.4085, 0.6491, -0.0349]) tensor([0.0446, 0.1792, 0.5159, 0.2603]) -Greedy action tensor([-1.7359, -0.1516, 0.6357, 0.0303]) tensor([0.0446, 0.2173, 0.4775, 0.2606]) -Greedy action tensor([-1.8972, -0.4131, 0.6629, -0.1324]) tensor([0.0413, 0.1824, 0.5349, 0.2414]) -Greedy action tensor([-1.8306, -0.4655, 0.6776, -0.0092]) tensor([0.0428, 0.1675, 0.5254, 0.2644]) -Greedy action tensor([-1.3410, 0.0395, 0.3386, -0.0499]) tensor([0.0715, 0.2845, 0.3837, 0.2602]) -Greedy action tensor([-1.7990, -0.1899, 0.6005, -0.0549]) tensor([0.0440, 0.2198, 0.4846, 0.2516]) -Greedy action tensor([-0.7674, 0.2337, 0.4395, 0.5592]) tensor([0.0923, 0.2512, 0.3086, 0.3479]) -Greedy action tensor([-1.7626, -0.4630, 0.6840, -0.4384]) tensor([0.0501, 0.1836, 0.5781, 0.1882]) -Greedy action tensor([-1.9222, -0.4215, 0.6568, -0.1625]) tensor([0.0409, 0.1832, 0.5386, 0.2374]) -Greedy action tensor([-0.7025, -0.0137, 1.2611, 1.3659]) tensor([0.0555, 0.1105, 0.3952, 0.4389]) -Greedy action tensor([-0.6884, -0.7200, 0.2619, -0.0758]) tensor([0.1562, 0.1514, 0.4041, 0.2883]) -Greedy action tensor([-1.1972, -0.5752, 1.1834, 1.3354]) tensor([0.0381, 0.0709, 0.4117, 0.4793]) -Greedy action tensor([-1.3134, -0.5183, 0.6158, -0.4460]) tensor([0.0801, 0.1775, 0.5516, 0.1908]) -Greedy action tensor([-0.9268, 0.0337, 0.1504, -0.3326]) tensor([0.1196, 0.3125, 0.3512, 0.2167]) -Greedy action tensor([-1.8857, -0.4454, 0.6362, -0.1494]) tensor([0.0428, 0.1808, 0.5333, 0.2431]) -Greedy action tensor([-1.2899, -0.5725, 0.4695, 0.6696]) tensor([0.0627, 0.1284, 0.3641, 0.4448]) -Greedy action tensor([-1.8652, -0.4550, 0.6217, -0.1288]) tensor([0.0439, 0.1797, 0.5274, 0.2490]) -Greedy action tensor([-1.9143, -0.5202, 1.3451, 0.7529]) tensor([0.0220, 0.0887, 0.5726, 0.3167]) -Greedy action tensor([-1.8824, -0.4394, 0.6333, -0.1489]) tensor([0.0430, 0.1819, 0.5318, 0.2433]) -Greedy action tensor([-1.8677, -0.4218, 0.6329, -0.1717]) tensor([0.0437, 0.1855, 0.5326, 0.2382]) -Greedy action tensor([-1.7317, -0.4874, 0.7540, 0.2136]) tensor([0.0426, 0.1478, 0.5116, 0.2980]) -Greedy action tensor([-1.7884, -0.3861, 0.5874, -0.0881]) tensor([0.0469, 0.1908, 0.5052, 0.2571]) -Greedy action tensor([-1.4841, -0.5380, 0.4552, 0.0877]) tensor([0.0652, 0.1679, 0.4532, 0.3138]) -Greedy action tensor([-1.0225, -0.6130, 0.2215, 0.3453]) tensor([0.1010, 0.1521, 0.3504, 0.3965]) -Greedy action tensor([-0.3945, 0.2124, 0.6372, 1.3725]) tensor([0.0870, 0.1596, 0.2441, 0.5093]) -Greedy action tensor([-1.8934, -0.3944, 0.5953, -0.2655]) tensor([0.0442, 0.1980, 0.5326, 0.2252]) -Greedy action tensor([-1.8246, -0.4077, 0.5860, -0.1063]) tensor([0.0458, 0.1888, 0.5101, 0.2553]) -Greedy action tensor([-1.7398, -0.5180, 0.6404, 0.0550]) tensor([0.0471, 0.1599, 0.5093, 0.2836]) -Greedy action tensor([-1.5105, -0.5435, 0.4408, 0.1709]) tensor([0.0623, 0.1640, 0.4388, 0.3350]) -Greedy action tensor([-1.2120, -0.5838, 0.2968, 0.2895]) tensor([0.0841, 0.1577, 0.3805, 0.3777]) -Greedy action tensor([-1.7138, -0.4407, 0.6040, 0.1076]) tensor([0.0478, 0.1709, 0.4857, 0.2956]) -Greedy action tensor([0.1671, 0.1481, 0.5725, 1.5657]) tensor([0.1328, 0.1303, 0.1992, 0.5377]) -Greedy action tensor([-1.8386, -0.3709, 0.6062, -0.1149]) tensor([0.0445, 0.1931, 0.5130, 0.2494]) -Greedy action tensor([-1.5612, -0.5714, 0.4629, 0.0528]) tensor([0.0614, 0.1653, 0.4649, 0.3085]) -Greedy action tensor([-1.8935, -0.4354, 0.6451, -0.1503]) tensor([0.0422, 0.1815, 0.5348, 0.2414]) -Greedy action tensor([-1.7758, -0.4904, 0.5673, -0.0741]) tensor([0.0488, 0.1763, 0.5077, 0.2673]) -Greedy action tensor([-1.7872, -0.3172, 0.6510, -0.0789]) tensor([0.0448, 0.1948, 0.5131, 0.2473]) -Greedy action tensor([-1.8364, -0.4166, 0.6032, -0.1148]) tensor([0.0450, 0.1863, 0.5166, 0.2520]) -Greedy action tensor([-0.5776, -0.4685, 0.1584, 0.1293]) tensor([0.1605, 0.1790, 0.3351, 0.3254]) -Greedy action tensor([-1.5454, -0.5594, 0.4545, 0.1107]) tensor([0.0613, 0.1644, 0.4531, 0.3212]) -Greedy action tensor([-1.4799, -0.5410, 0.6698, 0.4981]) tensor([0.0516, 0.1320, 0.4431, 0.3732]) -Greedy action tensor([-1.8263, -0.1459, 0.5627, -0.0829]) tensor([0.0435, 0.2335, 0.4743, 0.2487]) -Greedy action tensor([-1.5601, -0.5631, 0.4776, -0.0200]) tensor([0.0623, 0.1689, 0.4781, 0.2907]) -Greedy action tensor([-1.8819, -0.4686, 0.6473, -0.1239]) tensor([0.0426, 0.1752, 0.5348, 0.2473]) -Greedy action tensor([-1.7901, -0.4755, 0.5737, -0.0919]) tensor([0.0480, 0.1788, 0.5107, 0.2625]) -Greedy action tensor([-1.7729, -0.4051, 0.5738, -0.0798]) tensor([0.0480, 0.1887, 0.5021, 0.2612]) -Greedy action tensor([-1.8745, -0.3844, 0.6474, -0.1170]) tensor([0.0422, 0.1873, 0.5257, 0.2448]) -Greedy action tensor([-1.7739, -0.3535, 0.5668, -0.0676]) tensor([0.0475, 0.1968, 0.4938, 0.2619]) -Greedy action tensor([-1.6875, -0.3907, 0.6495, 0.1126]) tensor([0.0475, 0.1737, 0.4915, 0.2873]) -Greedy action tensor([-1.8389, -0.4396, 0.6163, -0.1195]) tensor([0.0449, 0.1819, 0.5228, 0.2505]) -Greedy action tensor([-1.0974, -0.6044, 0.2399, 0.2714]) tensor([0.0964, 0.1578, 0.3670, 0.3788]) -Greedy action tensor([-1.8026, -0.4863, 0.5950, -0.0788]) tensor([0.0469, 0.1748, 0.5155, 0.2628]) -Greedy action tensor([-1.7062, -0.3772, 1.1425, 0.8950]) tensor([0.0282, 0.1063, 0.4860, 0.3795]) -Greedy action tensor([-1.8428, -0.4864, 0.6689, -0.0633]) tensor([0.0432, 0.1678, 0.5328, 0.2562]) -Greedy action tensor([-0.7124, 0.6462, 0.0286, 0.2963]) tensor([0.1028, 0.3998, 0.2156, 0.2818]) -Greedy action tensor([ 0.7553, -0.5395, -0.0959, -0.4796]) tensor([0.5021, 0.1375, 0.2143, 0.1460]) -Greedy action tensor([ 0.7289, -0.8144, -0.0165, -0.8545]) tensor([0.5281, 0.1128, 0.2506, 0.1084]) -Greedy action tensor([ 0.7550, -0.5437, -0.0472, -0.3603]) tensor([0.4880, 0.1332, 0.2188, 0.1600]) -Greedy action tensor([ 0.8785, -0.6209, 0.1082, -0.7735]) tensor([0.5325, 0.1189, 0.2465, 0.1021]) -Greedy action tensor([ 1.0369, -0.8336, 0.0195, -0.4608]) tensor([0.5750, 0.0886, 0.2079, 0.1286]) -Greedy action tensor([ 0.6948, -0.4038, -0.0450, -0.3847]) tensor([0.4650, 0.1550, 0.2219, 0.1580]) -Greedy action tensor([ 0.7684, -0.3299, -0.2055, -0.4902]) tensor([0.5012, 0.1671, 0.1893, 0.1424]) -Greedy action tensor([ 0.7813, -0.7663, -0.0337, -0.4859]) tensor([0.5163, 0.1098, 0.2285, 0.1454]) -Greedy action tensor([ 1.0857, -0.9526, -0.0195, -0.7033]) tensor([0.6141, 0.0800, 0.2033, 0.1026]) -Greedy action tensor([ 0.8180, -0.4552, 0.0119, -0.2963]) tensor([0.4867, 0.1362, 0.2174, 0.1597]) -Greedy action tensor([ 0.7078, -0.6658, 0.0777, -0.4664]) tensor([0.4774, 0.1209, 0.2542, 0.1475]) -Greedy action tensor([ 0.5289, -0.4403, -0.2148, -0.0674]) tensor([0.4157, 0.1577, 0.1976, 0.2290]) -Greedy action tensor([ 0.7553, -0.4768, -0.0925, -0.2158]) tensor([0.4765, 0.1390, 0.2041, 0.1804]) -Greedy action tensor([ 0.5512, 0.1201, -0.1535, -0.1302]) tensor([0.3774, 0.2452, 0.1865, 0.1909]) -Greedy action tensor([ 0.5294, -0.0943, -0.0605, -0.3115]) tensor([0.3966, 0.2125, 0.2198, 0.1711]) -Greedy action tensor([ 0.4612, -0.3953, 0.1940, -0.5742]) tensor([0.3929, 0.1668, 0.3008, 0.1395]) -Greedy action tensor([ 0.7001, -0.2079, 0.0113, -0.1200]) tensor([0.4263, 0.1719, 0.2141, 0.1877]) -Greedy action tensor([ 0.4657, 0.0811, -0.0300, -0.0961]) tensor([0.3497, 0.2380, 0.2130, 0.1994]) -Greedy action tensor([ 0.2680, 0.1212, -0.1336, -0.3018]) tensor([0.3227, 0.2787, 0.2160, 0.1826]) -Greedy action tensor([ 0.7318, -0.4363, 0.0907, -0.4924]) tensor([0.4691, 0.1459, 0.2471, 0.1379]) -Greedy action tensor([ 0.8199, -0.6316, 0.0174, -0.6548]) tensor([0.5232, 0.1225, 0.2345, 0.1197]) -Greedy action tensor([ 0.8356, -0.4812, -0.1414, -0.4098]) tensor([0.5175, 0.1387, 0.1948, 0.1490]) -Greedy action tensor([ 0.6534, -0.3908, -0.0054, -0.1106]) tensor([0.4282, 0.1507, 0.2216, 0.1995]) -Greedy action tensor([ 0.7124, -0.1674, -0.0494, -0.1571]) tensor([0.4346, 0.1803, 0.2029, 0.1822]) -Greedy action tensor([ 0.4861, 0.0158, -0.0245, -0.2879]) tensor([0.3723, 0.2326, 0.2234, 0.1717]) -Greedy action tensor([ 0.1256, 0.2128, -0.0648, -0.3863]) tensor([0.2843, 0.3102, 0.2350, 0.1704]) -Greedy action tensor([ 0.6340, 0.2396, -0.2276, -0.1408]) tensor([0.3910, 0.2636, 0.1652, 0.1802]) -Greedy action tensor([ 0.5633, -0.5750, 0.1077, -0.5991]) tensor([0.4411, 0.1413, 0.2797, 0.1379]) -Greedy action tensor([ 0.5272, 0.2255, -0.0671, -0.0482]) tensor([0.3504, 0.2591, 0.1934, 0.1971]) -Greedy action tensor([ 0.9740, -0.6867, 0.1031, -0.3284]) tensor([0.5318, 0.1010, 0.2226, 0.1446]) -Greedy action tensor([ 1.2134, -0.8199, 0.0299, -0.3505]) tensor([0.6074, 0.0795, 0.1860, 0.1271]) -Greedy action tensor([ 0.7882, -0.2641, 0.0375, -0.4568]) tensor([0.4741, 0.1655, 0.2238, 0.1365]) -Greedy action tensor([ 0.7964, -0.6615, 0.0747, -0.9174]) tensor([0.5266, 0.1226, 0.2559, 0.0949]) -Greedy action tensor([ 0.3640, -0.4116, -0.0569, -0.1076]) tensor([0.3649, 0.1680, 0.2395, 0.2277]) -Greedy action tensor([ 0.8781, -1.1438, -0.0947, -0.6523]) tensor([0.5791, 0.0767, 0.2189, 0.1253]) -Greedy action tensor([ 0.4544, -0.3326, -0.1202, -0.1494]) tensor([0.3899, 0.1775, 0.2195, 0.2131]) -Greedy action tensor([ 1.1562, -0.5842, -0.0758, -0.5265]) tensor([0.6049, 0.1061, 0.1765, 0.1124]) -Greedy action tensor([ 0.8879, -0.6245, -0.0327, -0.5664]) tensor([0.5399, 0.1190, 0.2150, 0.1261]) -Greedy action tensor([ 0.8588, -0.6691, -0.0393, -0.4678]) tensor([0.5292, 0.1148, 0.2156, 0.1404]) -Greedy action tensor([ 0.7854, -0.2269, 0.1426, -0.4398]) tensor([0.4581, 0.1665, 0.2409, 0.1345]) -Greedy action tensor([ 0.4947, -0.1513, -0.0276, -0.0650]) tensor([0.3719, 0.1949, 0.2206, 0.2125]) -Greedy action tensor([ 0.7844, -0.3756, -0.0707, -0.2216]) tensor([0.4752, 0.1490, 0.2021, 0.1738]) -Greedy action tensor([ 0.6961, -0.2215, -0.1181, -0.1271]) tensor([0.4383, 0.1751, 0.1942, 0.1924]) -Greedy action tensor([ 0.6174, -0.3604, -0.0493, -0.4527]) tensor([0.4479, 0.1685, 0.2300, 0.1536]) -Greedy action tensor([ 0.9308, -0.9306, 0.0904, -0.5663]) tensor([0.5522, 0.0859, 0.2383, 0.1236]) -Greedy action tensor([ 0.4387, -0.2349, -0.0757, -0.1128]) tensor([0.3726, 0.1900, 0.2228, 0.2146]) -Greedy action tensor([ 0.7169, -0.5879, -0.1410, -0.2135]) tensor([0.4785, 0.1298, 0.2029, 0.1887]) -Greedy action tensor([ 0.6778, -0.0962, -0.1498, -0.3958]) tensor([0.4464, 0.2059, 0.1951, 0.1526]) -Greedy action tensor([ 0.4120, 0.0700, -0.0340, -0.2183]) tensor([0.3469, 0.2464, 0.2221, 0.1847]) -Greedy action tensor([ 0.6498, -0.5168, -0.0140, -0.2760]) tensor([0.4499, 0.1401, 0.2317, 0.1783]) -Greedy action tensor([ 0.4127, 0.0411, -0.0590, -0.2766]) tensor([0.3552, 0.2449, 0.2216, 0.1783]) -Greedy action tensor([ 0.6072, -0.2349, -0.0028, -0.1759]) tensor([0.4113, 0.1772, 0.2235, 0.1880]) -Greedy action tensor([ 0.8477, -0.7386, 0.0149, -0.4604]) tensor([0.5236, 0.1072, 0.2277, 0.1415]) -Greedy action tensor([ 0.8309, -0.4192, -0.1859, -0.4220]) tensor([0.5171, 0.1481, 0.1871, 0.1477]) -Greedy action tensor([ 0.3584, -0.4088, 0.0188, -0.2992]) tensor([0.3711, 0.1723, 0.2643, 0.1923]) -Greedy action tensor([ 0.3573, 0.0523, -0.0395, -0.0216]) tensor([0.3232, 0.2382, 0.2173, 0.2213]) -Greedy action tensor([ 0.7641, -0.5924, -0.0314, -0.2190]) tensor([0.4801, 0.1236, 0.2167, 0.1796]) -Greedy action tensor([ 0.8673, -0.9277, -0.0340, -0.5706]) tensor([0.5526, 0.0918, 0.2244, 0.1312]) -Greedy action tensor([ 0.7331, -0.5613, 0.1125, -0.4832]) tensor([0.4744, 0.1300, 0.2550, 0.1406]) -Greedy action tensor([ 0.8584, -0.7737, -0.0743, -0.4050]) tensor([0.5343, 0.1045, 0.2102, 0.1510]) -Greedy action tensor([ 0.3961, 0.0309, 0.1624, -0.2882]) tensor([0.3344, 0.2321, 0.2647, 0.1687]) -Greedy action tensor([ 0.6686, -0.0652, 0.1276, -0.4918]) tensor([0.4210, 0.2021, 0.2451, 0.1319]) -Greedy action tensor([ 0.7599, -0.3756, -0.0329, -0.2162]) tensor([0.4650, 0.1494, 0.2104, 0.1752]) -Greedy action tensor([ 0.5562, -0.6461, -0.1727, -0.2667]) tensor([0.4500, 0.1352, 0.2171, 0.1976]) -Greedy action tensor([ 1.0795, -0.4755, -0.1541, -0.5043]) tensor([0.5856, 0.1237, 0.1705, 0.1202]) -Greedy action tensor([ 0.8512, -0.7586, 0.1118, -0.3210]) tensor([0.5033, 0.1006, 0.2403, 0.1559]) -Greedy action tensor([ 0.7646, -0.1407, 0.0476, -0.1081]) tensor([0.4328, 0.1750, 0.2113, 0.1808]) -Greedy action tensor([ 0.8335, -0.5487, -0.0183, -0.3628]) tensor([0.5051, 0.1268, 0.2155, 0.1527]) -Greedy action tensor([ 0.5150, -0.4090, 0.0178, -0.1304]) tensor([0.3953, 0.1569, 0.2404, 0.2073]) -Greedy action tensor([ 0.2074, 0.1829, -0.1976, -0.0505]) tensor([0.2928, 0.2857, 0.1953, 0.2262]) -Greedy action tensor([ 0.7766, -0.6765, 0.0129, -0.4604]) tensor([0.5025, 0.1175, 0.2341, 0.1459]) -Greedy action tensor([ 0.6635, -0.4903, -0.0915, -0.3689]) tensor([0.4669, 0.1473, 0.2195, 0.1663]) -Greedy action tensor([ 0.5981, -0.1986, 0.1062, -0.3237]) tensor([0.4065, 0.1833, 0.2486, 0.1617]) -Greedy action tensor([ 0.4090, -0.2867, -0.0635, -0.4241]) tensor([0.3911, 0.1951, 0.2438, 0.1700]) -Greedy action tensor([ 0.4505, -0.0744, -0.0886, -0.0904]) tensor([0.3627, 0.2146, 0.2115, 0.2112]) -Greedy action tensor([ 0.6578, -0.2669, -0.0830, -0.3195]) tensor([0.4445, 0.1763, 0.2119, 0.1673]) -Greedy action tensor([ 0.7905, -0.7967, -0.0110, -0.3815]) tensor([0.5094, 0.1042, 0.2286, 0.1578]) -Greedy action tensor([ 0.6197, -0.3607, -0.0574, -0.2168]) tensor([0.4317, 0.1620, 0.2193, 0.1870]) -Greedy action tensor([ 0.5612, -0.1751, -0.0082, -0.2046]) tensor([0.3985, 0.1908, 0.2255, 0.1853]) -Greedy action tensor([ 1.3535, -1.0622, 0.0332, -0.8719]) tensor([0.6829, 0.0610, 0.1824, 0.0738]) -Greedy action tensor([ 0.4781, -0.5088, -0.0734, -0.6251]) tensor([0.4385, 0.1634, 0.2526, 0.1455]) -Greedy action tensor([ 1.6870, -0.3492, -0.6075, 0.6777]) tensor([0.6266, 0.0818, 0.0632, 0.2284]) -Greedy action tensor([ 1.5600, 0.3262, -0.5223, 0.1638]) tensor([0.6012, 0.1751, 0.0749, 0.1488]) -Greedy action tensor([ 1.9306, -0.6769, -0.3177, 0.3517]) tensor([0.7218, 0.0532, 0.0762, 0.1488]) -Greedy action tensor([ 1.0685, -0.3418, -0.2895, 0.1643]) tensor([0.5246, 0.1280, 0.1349, 0.2124]) -Greedy action tensor([ 1.8985, -0.4196, -0.1969, 0.5174]) tensor([0.6790, 0.0669, 0.0835, 0.1706]) -Greedy action tensor([ 1.5786, -0.6644, -0.2567, 0.3521]) tensor([0.6414, 0.0681, 0.1024, 0.1881]) -Greedy action tensor([ 1.8883, -0.7720, -0.4318, 0.2207]) tensor([0.7370, 0.0515, 0.0724, 0.1391]) -Greedy action tensor([ 1.2841, 0.0292, -0.9810, 0.2799]) tensor([0.5697, 0.1624, 0.0591, 0.2087]) -Greedy action tensor([ 1.7077, -0.4669, -0.0550, 0.4592]) tensor([0.6361, 0.0723, 0.1091, 0.1825]) -Greedy action tensor([ 1.0647, 0.0576, -0.1877, -0.1850]) tensor([0.5161, 0.1885, 0.1475, 0.1479]) -Greedy action tensor([ 1.6967, -0.6063, -0.1567, 0.5765]) tensor([0.6318, 0.0631, 0.0990, 0.2061]) -Greedy action tensor([ 1.9371, -0.8477, -0.3887, 0.8824]) tensor([0.6632, 0.0410, 0.0648, 0.2310]) -Greedy action tensor([ 0.4912, -0.3284, -0.1184, 0.2177]) tensor([0.3643, 0.1605, 0.1980, 0.2771]) -Greedy action tensor([ 1.7019, 0.0398, -1.0858, 0.5214]) tensor([0.6417, 0.1217, 0.0395, 0.1971]) -Greedy action tensor([ 1.3118, -0.4898, -0.3247, 0.2160]) tensor([0.5903, 0.0974, 0.1149, 0.1973]) -Greedy action tensor([ 1.5875, -0.2924, -0.2448, 0.3964]) tensor([0.6186, 0.0944, 0.0990, 0.1880]) -Greedy action tensor([ 0.9034, -0.6190, 0.0741, 0.2162]) tensor([0.4635, 0.1011, 0.2022, 0.2331]) -Greedy action tensor([ 1.5890, -0.6478, -0.5670, 0.8632]) tensor([0.5860, 0.0626, 0.0678, 0.2836]) -Greedy action tensor([ 1.3467, -0.1529, -0.4765, 0.3658]) tensor([0.5683, 0.1268, 0.0918, 0.2131]) -Greedy action tensor([ 1.5908, -0.4636, -0.3970, 0.3225]) tensor([0.6466, 0.0829, 0.0886, 0.1819]) -Greedy action tensor([ 1.1903, -0.3660, 0.0089, 0.1382]) tensor([0.5356, 0.1130, 0.1644, 0.1870]) -Greedy action tensor([ 1.2406, -0.6471, -0.0631, 0.2342]) tensor([0.5591, 0.0847, 0.1518, 0.2044]) -Greedy action tensor([ 1.2083, -0.5053, -0.3135, 0.0769]) tensor([0.5810, 0.1047, 0.1268, 0.1874]) -Greedy action tensor([ 1.0453, -0.0443, -0.2026, 0.1090]) tensor([0.4961, 0.1669, 0.1425, 0.1945]) -Greedy action tensor([ 1.1414, -0.4981, -0.1365, 0.3708]) tensor([0.5167, 0.1003, 0.1440, 0.2391]) -Greedy action tensor([ 0.6692, -0.3462, -0.2285, 0.2674]) tensor([0.4100, 0.1485, 0.1671, 0.2743]) -Greedy action tensor([ 1.6123, -0.5884, 0.0237, 0.3635]) tensor([0.6243, 0.0691, 0.1275, 0.1791]) -Greedy action tensor([ 0.8513, -0.5018, 0.0322, -0.0044]) tensor([0.4708, 0.1217, 0.2075, 0.2001]) -Greedy action tensor([ 0.4251, -0.5359, 0.1274, 0.1697]) tensor([0.3449, 0.1319, 0.2561, 0.2671]) -Greedy action tensor([ 1.4934, 0.0182, -1.0852, 0.7303]) tensor([0.5647, 0.1292, 0.0428, 0.2633]) -Greedy action tensor([ 1.7410, -0.8268, -0.0286, 0.4561]) tensor([0.6563, 0.0503, 0.1118, 0.1816]) -Greedy action tensor([ 1.4736, 0.0586, -0.5408, 0.5317]) tensor([0.5662, 0.1375, 0.0755, 0.2207]) -Greedy action tensor([ 0.7576, -0.3098, -0.2296, 0.2333]) tensor([0.4332, 0.1490, 0.1614, 0.2564]) -Greedy action tensor([ 0.9583, -0.3418, -0.0762, 0.4747]) tensor([0.4455, 0.1214, 0.1583, 0.2747]) -Greedy action tensor([ 1.6069, -0.5608, -0.5083, 0.5884]) tensor([0.6265, 0.0717, 0.0756, 0.2263]) -Greedy action tensor([ 0.9337, 0.2049, -1.1829, 0.2805]) tensor([0.4710, 0.2272, 0.0567, 0.2451]) -Greedy action tensor([ 1.6375, -0.1187, -0.5828, 0.5452]) tensor([0.6185, 0.1068, 0.0672, 0.2075]) -Greedy action tensor([ 1.1367, -0.1891, -0.2054, 0.0252]) tensor([0.5388, 0.1431, 0.1408, 0.1773]) -Greedy action tensor([ 1.4806, -0.6697, -0.2773, 0.1587]) tensor([0.6429, 0.0749, 0.1108, 0.1714]) -Greedy action tensor([ 2.3004, -0.7364, -0.1138, 0.3698]) tensor([0.7797, 0.0374, 0.0697, 0.1131]) -Greedy action tensor([ 1.2023, 0.1603, -0.6896, 0.1847]) tensor([0.5362, 0.1891, 0.0808, 0.1938]) -Greedy action tensor([ 1.7011, -0.4878, -0.4533, 0.2218]) tensor([0.6869, 0.0770, 0.0797, 0.1565]) -Greedy action tensor([ 1.0178, 0.0960, -0.1983, 0.1199]) tensor([0.4758, 0.1893, 0.1410, 0.1939]) -Greedy action tensor([ 1.1685, -0.4276, -0.6878, 0.3374]) tensor([0.5573, 0.1129, 0.0871, 0.2427]) -Greedy action tensor([ 0.8349, -0.1798, -0.0521, 0.1519]) tensor([0.4387, 0.1590, 0.1807, 0.2216]) -Greedy action tensor([ 1.2810, -0.1594, -0.8044, 0.3324]) tensor([0.5720, 0.1355, 0.0711, 0.2215]) -Greedy action tensor([ 1.9152, -0.4189, -0.3330, 0.2143]) tensor([0.7220, 0.0700, 0.0762, 0.1318]) -Greedy action tensor([ 1.0116, -0.0878, -0.5843, 0.5759]) tensor([0.4582, 0.1526, 0.0929, 0.2963]) -Greedy action tensor([ 0.8173, -0.4524, -0.1673, 0.6078]) tensor([0.4056, 0.1139, 0.1515, 0.3289]) -Greedy action tensor([ 1.1886, -0.3694, -0.1467, 0.2547]) tensor([0.5357, 0.1128, 0.1409, 0.2106]) -Greedy action tensor([ 1.0067, -0.4491, -0.4819, 0.2340]) tensor([0.5207, 0.1214, 0.1175, 0.2404]) -Greedy action tensor([ 1.3103, -0.7080, -0.3406, 0.6730]) tensor([0.5395, 0.0717, 0.1035, 0.2853]) -Greedy action tensor([ 0.8653, -0.4906, 0.0419, -0.1576]) tensor([0.4863, 0.1253, 0.2135, 0.1749]) -Greedy action tensor([ 1.9405, -1.1030, 0.0588, 0.5822]) tensor([0.6863, 0.0327, 0.1045, 0.1764]) -Greedy action tensor([ 1.3300, -0.4295, -0.2094, 0.3682]) tensor([0.5654, 0.0973, 0.1213, 0.2161]) -Greedy action tensor([ 0.9491, -0.2872, -0.2014, -0.0150]) tensor([0.5030, 0.1461, 0.1592, 0.1918]) -Greedy action tensor([ 0.9349, -0.3577, -0.2518, 0.4001]) tensor([0.4618, 0.1268, 0.1409, 0.2705]) -Greedy action tensor([ 1.3556, -0.3241, -0.1044, 0.4738]) tensor([0.5456, 0.1017, 0.1267, 0.2259]) -Greedy action tensor([ 1.1491, -0.0934, -0.5141, 0.6099]) tensor([0.4851, 0.1400, 0.0919, 0.2829]) -Greedy action tensor([ 1.3448, -0.4530, -0.5554, 0.6456]) tensor([0.5518, 0.0914, 0.0825, 0.2742]) -Greedy action tensor([ 1.7310, -0.2505, -0.4001, 0.4241]) tensor([0.6548, 0.0903, 0.0777, 0.1772]) -Greedy action tensor([ 1.3867, -0.4471, -0.3656, 0.2456]) tensor([0.6051, 0.0967, 0.1049, 0.1933]) -Greedy action tensor([ 1.2287, 0.0993, -1.4412, -0.1321]) tensor([0.6065, 0.1960, 0.0420, 0.1555]) -Greedy action tensor([ 1.4369, -0.4976, -0.4540, 0.0888]) tensor([0.6430, 0.0929, 0.0971, 0.1670]) -Greedy action tensor([ 1.4634, -0.7136, -0.4204, 0.2850]) tensor([0.6357, 0.0721, 0.0966, 0.1956]) -Greedy action tensor([ 1.7176, -0.5413, -0.4452, 0.5986]) tensor([0.6468, 0.0676, 0.0744, 0.2113]) -Greedy action tensor([ 1.5246, -0.3049, -0.2740, -0.0328]) tensor([0.6507, 0.1044, 0.1077, 0.1371]) -Greedy action tensor([1.9762, 0.8077, 0.4160, 0.1762]) tensor([0.5930, 0.1843, 0.1246, 0.0980]) -Greedy action tensor([ 1.5457, -0.1328, -0.8336, 0.4502]) tensor([0.6197, 0.1157, 0.0574, 0.2072]) -Greedy action tensor([ 1.6574, -0.7141, -0.3062, 0.3196]) tensor([0.6684, 0.0624, 0.0938, 0.1754]) -Greedy action tensor([ 1.5687, -0.5830, -0.4386, 0.4235]) tensor([0.6374, 0.0741, 0.0856, 0.2028]) -Greedy action tensor([ 1.5426, -0.3673, 0.0532, 0.3290]) tensor([0.5985, 0.0886, 0.1350, 0.1778]) -Greedy action tensor([ 1.5244, -0.6042, -0.4058, 0.4246]) tensor([0.6261, 0.0745, 0.0909, 0.2085]) -Greedy action tensor([ 1.3535, -0.1205, -0.0726, 0.6919]) tensor([0.5037, 0.1153, 0.1210, 0.2599]) -Greedy action tensor([ 1.4297, -0.7190, -0.4719, 0.6714]) tensor([0.5766, 0.0672, 0.0861, 0.2701]) -Greedy action tensor([ 1.9714, -0.7605, -0.5924, 0.6055]) tensor([0.7157, 0.0466, 0.0551, 0.1826]) -Greedy action tensor([ 0.8413, -0.2910, -0.1429, 0.1200]) tensor([0.4583, 0.1477, 0.1713, 0.2228]) -Greedy action tensor([ 1.7216, -0.4275, -0.1720, 0.8686]) tensor([0.5906, 0.0689, 0.0889, 0.2517]) -Greedy action tensor([ 1.1862, -0.2658, -0.4355, 0.3544]) tensor([0.5356, 0.1254, 0.1058, 0.2331]) -Greedy action tensor([ 0.8306, -0.0077, -0.1823, 0.1577]) tensor([0.4337, 0.1875, 0.1575, 0.2213]) -Greedy action tensor([ 1.7053, -0.3427, -0.1362, 0.4997]) tensor([0.6301, 0.0813, 0.0999, 0.1887]) -Greedy action tensor([-0.4988, 0.8786, 1.3892, -1.6997]) tensor([0.0842, 0.3339, 0.5565, 0.0253]) -Greedy action tensor([ 0.3804, -0.3351, 0.6222, -0.2843]) tensor([0.3052, 0.1492, 0.3886, 0.1570]) -Greedy action tensor([-0.1872, -0.1220, -0.6315, 0.3299]) tensor([0.2280, 0.2434, 0.1462, 0.3824]) -Greedy action tensor([ 0.0149, -0.3014, 0.7997, -1.0038]) tensor([0.2336, 0.1702, 0.5119, 0.0843]) -Greedy action tensor([-1.6925, -0.9642, 0.1166, -0.3850]) tensor([0.0777, 0.1609, 0.4742, 0.2872]) -Greedy action tensor([ 0.9368, -0.3940, 0.1378, -0.3253]) tensor([0.5007, 0.1323, 0.2252, 0.1417]) -Greedy action tensor([-0.3767, -0.5202, 0.8358, -1.2835]) tensor([0.1776, 0.1538, 0.5969, 0.0717]) -Greedy action tensor([-0.4547, -0.7654, -0.7029, 0.9750]) tensor([0.1495, 0.1095, 0.1166, 0.6244]) -Greedy action tensor([ 0.9791, 0.4148, -0.1070, -0.3397]) tensor([0.4600, 0.2617, 0.1553, 0.1230]) -Greedy action tensor([-0.0884, -0.7935, 1.5354, -0.0127]) tensor([0.1308, 0.0646, 0.6635, 0.1411]) -Greedy action tensor([ 0.6372, -1.3273, 1.8041, -0.1921]) tensor([0.2088, 0.0293, 0.6708, 0.0911]) -Greedy action tensor([ 0.0658, -0.7869, -0.9410, 1.6928]) tensor([0.1454, 0.0620, 0.0531, 0.7396]) -Greedy action tensor([0.2318, 0.4577, 0.3165, 0.2695]) tensor([0.2283, 0.2862, 0.2485, 0.2371]) -Greedy action tensor([ 1.0807, -0.7682, -1.2515, 0.4760]) tensor([0.5553, 0.0874, 0.0539, 0.3034]) -Greedy action tensor([-0.2605, -2.1750, -1.1878, 1.0165]) tensor([0.1950, 0.0287, 0.0771, 0.6991]) -Greedy action tensor([ 1.2042, -0.7370, -0.2663, -0.5380]) tensor([0.6458, 0.0927, 0.1484, 0.1131]) -Greedy action tensor([ 1.6068, -0.6177, 0.5319, -0.6228]) tensor([0.6422, 0.0694, 0.2192, 0.0691]) -Greedy action tensor([ 0.4540, -1.8188, -0.2332, 0.4733]) tensor([0.3809, 0.0392, 0.1916, 0.3883]) -Greedy action tensor([ 0.0620, -1.1991, 0.0637, -0.2455]) tensor([0.3311, 0.0938, 0.3317, 0.2435]) -Greedy action tensor([-0.1505, -0.1084, -0.9282, 0.9715]) tensor([0.1794, 0.1871, 0.0824, 0.5510]) -Greedy action tensor([-0.0623, -1.1942, -0.6645, -0.4939]) tensor([0.3969, 0.1280, 0.2174, 0.2578]) -Greedy action tensor([ 0.1943, -0.3177, -0.4659, 0.2443]) tensor([0.3157, 0.1892, 0.1632, 0.3319]) -Greedy action tensor([-1.2138, -0.1708, -1.1147, 0.3523]) tensor([0.1028, 0.2917, 0.1135, 0.4921]) -Greedy action tensor([ 1.3158, -1.0195, 1.4706, -0.2599]) tensor([0.4047, 0.0392, 0.4724, 0.0837]) -Greedy action tensor([ 0.5943, 0.0123, 0.0305, -0.0496]) tensor([0.3769, 0.2106, 0.2145, 0.1980]) -Greedy action tensor([ 0.6717, -1.6570, 0.4134, 1.2255]) tensor([0.2770, 0.0270, 0.2140, 0.4820]) -Greedy action tensor([0.9417, 0.5548, 0.1823, 1.9490]) tensor([0.2047, 0.1390, 0.0958, 0.5605]) -Greedy action tensor([-0.0855, -0.4092, 1.0054, -0.3874]) tensor([0.1838, 0.1330, 0.5473, 0.1359]) -Greedy action tensor([-0.7473, -0.1411, 0.4399, -0.4262]) tensor([0.1335, 0.2448, 0.4376, 0.1841]) -Greedy action tensor([-0.2729, 0.3318, -0.1072, 0.9705]) tensor([0.1337, 0.2448, 0.1578, 0.4636]) -Greedy action tensor([-0.8237, -0.8609, -0.6737, -0.4220]) tensor([0.2165, 0.2086, 0.2515, 0.3235]) -Greedy action tensor([ 0.5172, -1.1013, 1.2717, -1.1111]) tensor([0.2840, 0.0563, 0.6040, 0.0557]) -Greedy action tensor([ 1.3708, -0.2701, 1.5212, 0.6498]) tensor([0.3518, 0.0682, 0.4089, 0.1711]) -Greedy action tensor([-1.2891, -1.4959, -1.1361, -0.2166]) tensor([0.1695, 0.1378, 0.1975, 0.4953]) -Greedy action tensor([ 0.8846, -0.3303, -0.6830, 0.6538]) tensor([0.4349, 0.1291, 0.0907, 0.3453]) -Greedy action tensor([ 1.8914, -1.1033, 0.6381, 1.4576]) tensor([0.5041, 0.0252, 0.1440, 0.3267]) -Greedy action tensor([-0.9725, -0.0456, -0.5194, -0.5444]) tensor([0.1507, 0.3808, 0.2371, 0.2313]) -Greedy action tensor([-0.1677, -0.9271, 0.1467, -0.1373]) tensor([0.2585, 0.1210, 0.3540, 0.2665]) -Greedy action tensor([ 0.5803, -0.9675, 0.2909, 0.1161]) tensor([0.3861, 0.0821, 0.2891, 0.2427]) -Greedy action tensor([0.8172, 0.2075, 0.4829, 0.5362]) tensor([0.3317, 0.1803, 0.2375, 0.2505]) -Greedy action tensor([ 1.0604, -1.9256, 0.5641, 0.7902]) tensor([0.4128, 0.0208, 0.2513, 0.3151]) -Greedy action tensor([-0.5655, -0.4226, 1.6096, -0.2198]) tensor([0.0808, 0.0933, 0.7117, 0.1142]) -Greedy action tensor([-0.8093, -0.7599, -0.0222, 0.4657]) tensor([0.1278, 0.1342, 0.2807, 0.4573]) -Greedy action tensor([ 0.2649, -1.0840, 0.5539, 0.1834]) tensor([0.2844, 0.0738, 0.3797, 0.2621]) -Greedy action tensor([ 0.1843, -1.0196, 0.0547, 0.0898]) tensor([0.3238, 0.0972, 0.2844, 0.2946]) -Greedy action tensor([-1.7632, -1.1079, 0.1370, 0.0370]) tensor([0.0638, 0.1229, 0.4269, 0.3863]) -Greedy action tensor([ 0.8996, -0.1186, -0.0501, 1.4333]) tensor([0.2896, 0.1046, 0.1120, 0.4938]) -Greedy action tensor([ 0.0064, -0.3658, 0.8091, 0.1315]) tensor([0.1979, 0.1364, 0.4415, 0.2242]) -Greedy action tensor([ 0.1821, 0.1665, -0.1305, -1.3082]) tensor([0.3400, 0.3347, 0.2487, 0.0766]) -Greedy action tensor([-0.4297, 0.7202, 0.2556, -0.3239]) tensor([0.1379, 0.4353, 0.2736, 0.1532]) -Greedy action tensor([-1.0305, -1.2066, 0.0790, -1.5263]) tensor([0.1825, 0.1530, 0.5534, 0.1111]) -Greedy action tensor([ 0.5648, -0.8145, -0.3039, 1.3531]) tensor([0.2583, 0.0650, 0.1084, 0.5682]) -Greedy action tensor([-0.5561, -1.5971, 0.4197, 0.7687]) tensor([0.1287, 0.0455, 0.3416, 0.4842]) -Greedy action tensor([ 1.3351, -0.2488, 1.4649, 0.3422]) tensor([0.3684, 0.0756, 0.4195, 0.1365]) -Greedy action tensor([ 0.4855, -0.4391, -0.6332, 1.3008]) tensor([0.2510, 0.0996, 0.0820, 0.5673]) -Greedy action tensor([ 0.0649, -0.2047, 1.6091, -0.7047]) tensor([0.1447, 0.1105, 0.6778, 0.0670]) -Greedy action tensor([ 1.2401, -0.4633, 0.1229, 1.5951]) tensor([0.3407, 0.0620, 0.1115, 0.4859]) -Greedy action tensor([ 0.8050, -0.7464, 1.2544, -0.2941]) tensor([0.3213, 0.0681, 0.5036, 0.1070]) -Greedy action tensor([ 0.2413, 0.3572, -0.1439, 0.2415]) tensor([0.2629, 0.2952, 0.1789, 0.2630]) -Greedy action tensor([ 1.1073, -0.0352, 0.6268, -0.6519]) tensor([0.4740, 0.1512, 0.2932, 0.0816]) -Greedy action tensor([-0.0388, -1.2513, 1.1066, -0.3620]) tensor([0.1936, 0.0576, 0.6087, 0.1401]) -Greedy action tensor([-0.9565, -2.1349, -0.5240, 0.4380]) tensor([0.1453, 0.0447, 0.2239, 0.5860]) -Greedy action tensor([ 0.7594, 0.1910, -0.2465, 0.5166]) tensor([0.3681, 0.2085, 0.1346, 0.2888]) -Greedy action tensor([-0.3758, -1.0257, 0.0131, 0.1089]) tensor([0.2164, 0.1130, 0.3193, 0.3514]) -Greedy action tensor([-1.4483, 0.2371, 0.3904, -0.2350]) tensor([0.0623, 0.3362, 0.3918, 0.2097]) -Greedy action tensor([-0.5001, 1.1411, 1.4523, -0.1117]) tensor([0.0681, 0.3516, 0.4799, 0.1004]) -Greedy action tensor([ 0.2215, -1.5696, 0.2857, 0.7273]) tensor([0.2570, 0.0429, 0.2740, 0.4262]) -Greedy action tensor([ 0.6188, -1.3944, 0.3786, 0.6327]) tensor([0.3408, 0.0455, 0.2680, 0.3456]) -Greedy action tensor([-1.0266, -0.1221, 0.0684, -0.2944]) tensor([0.1171, 0.2893, 0.3500, 0.2435]) -Greedy action tensor([-1.3509, -0.3381, 1.2238, -1.0004]) tensor([0.0546, 0.1504, 0.7173, 0.0776]) -Greedy action tensor([-0.2273, -1.2512, 1.4046, -0.8060]) tensor([0.1422, 0.0511, 0.7270, 0.0797]) -Greedy action tensor([ 0.6070, -0.5664, 0.3795, 1.0694]) tensor([0.2707, 0.0837, 0.2157, 0.4299]) -Greedy action tensor([-0.4861, -0.0268, 0.5096, 1.1437]) tensor([0.0962, 0.1523, 0.2604, 0.4910]) -Greedy action tensor([-1.1791, -0.8289, 0.9194, -0.0737]) tensor([0.0736, 0.1044, 0.5998, 0.2222]) -Greedy action tensor([-0.1984, -2.2552, -0.9527, 1.0213]) tensor([0.2006, 0.0257, 0.0944, 0.6794]) -Greedy action tensor([ 1.2743, 0.5432, -0.2842, -0.1886]) tensor([0.5199, 0.2503, 0.1094, 0.1204]) -Greedy action tensor([-0.4456, 0.3649, 1.1419, -0.3681]) tensor([0.1084, 0.2439, 0.5305, 0.1172]) -Greedy action tensor([ 1.0131, -0.3191, 0.1466, 0.8914]) tensor([0.3891, 0.1027, 0.1636, 0.3446]) -Greedy action tensor([ 1.6491, -0.4286, 1.9075, 0.6840]) tensor([0.3570, 0.0447, 0.4623, 0.1360]) -Greedy action tensor([-0.1509, -0.3463, 0.5268, 0.7886]) tensor([0.1575, 0.1295, 0.3101, 0.4029]) -Greedy action tensor([ 0.0328, -2.6672, 0.2568, 1.2335]) tensor([0.1773, 0.0119, 0.2218, 0.5890]) -Greedy action tensor([-0.4355, -1.6318, -0.6762, -0.5736]) tensor([0.3379, 0.1022, 0.2656, 0.2943]) -Greedy action tensor([-1.9049, -0.4133, 0.6402, -0.1679]) tensor([0.0419, 0.1862, 0.5339, 0.2380]) -Greedy action tensor([-1.7704, -0.2017, 0.5424, -0.0997]) tensor([0.0471, 0.2262, 0.4761, 0.2505]) -Greedy action tensor([-1.8844, -0.4106, 0.6500, -0.1351]) tensor([0.0422, 0.1840, 0.5314, 0.2424]) -Greedy action tensor([-1.7318, 0.1070, 0.5011, -0.0137]) tensor([0.0451, 0.2834, 0.4203, 0.2512]) -Greedy action tensor([-0.3591, 1.1156, 0.0078, 0.4762]) tensor([0.1097, 0.4792, 0.1583, 0.2528]) -Greedy action tensor([-1.3836, -0.5106, 0.4838, 0.3615]) tensor([0.0641, 0.1535, 0.4150, 0.3673]) -Greedy action tensor([-1.6515, -0.5139, 1.3958, 1.0066]) tensor([0.0254, 0.0791, 0.5338, 0.3617]) -Greedy action tensor([ 0.2510, -0.7634, 1.0056, 1.7668]) tensor([0.1243, 0.0451, 0.2644, 0.5661]) -Greedy action tensor([-1.8816, -0.3078, 0.6152, -0.1375]) tensor([0.0422, 0.2037, 0.5126, 0.2415]) -Greedy action tensor([-1.8993, -0.4464, 0.6480, -0.1553]) tensor([0.0421, 0.1799, 0.5374, 0.2407]) -Greedy action tensor([-0.3703, -0.0167, 0.3114, 1.2390]) tensor([0.1064, 0.1515, 0.2103, 0.5318]) -Greedy action tensor([-1.0122, -0.2548, 0.1387, 0.1805]) tensor([0.1043, 0.2224, 0.3296, 0.3437]) -Greedy action tensor([-1.8453, -0.4310, 0.6152, -0.1275]) tensor([0.0446, 0.1837, 0.5229, 0.2488]) -Greedy action tensor([-1.7266, -0.5933, 0.5858, 0.0055]) tensor([0.0504, 0.1564, 0.5086, 0.2847]) -Greedy action tensor([-1.5631, -0.2244, 0.5667, 0.3299]) tensor([0.0503, 0.1920, 0.4235, 0.3342]) -Greedy action tensor([-1.4806, -0.5496, 0.4353, 0.1690]) tensor([0.0644, 0.1633, 0.4373, 0.3350]) -Greedy action tensor([-1.3428, -0.4883, 0.4808, 0.6049]) tensor([0.0604, 0.1420, 0.3741, 0.4235]) -Greedy action tensor([-1.4135, -0.2972, 0.6013, 0.5873]) tensor([0.0528, 0.1611, 0.3958, 0.3903]) -Greedy action tensor([-1.8825, -0.4710, 0.6388, -0.1321]) tensor([0.0429, 0.1760, 0.5340, 0.2470]) -Greedy action tensor([-1.7671, -0.4753, 0.6769, 0.0760]) tensor([0.0445, 0.1619, 0.5125, 0.2810]) -Greedy action tensor([-0.7807, 0.0286, 0.1839, 0.0026]) tensor([0.1241, 0.2787, 0.3256, 0.2716]) -Greedy action tensor([-1.5236, -0.4880, 0.5810, 0.3345]) tensor([0.0543, 0.1528, 0.4451, 0.3479]) -Greedy action tensor([-1.9064, -0.4633, 0.6848, -0.1357]) tensor([0.0409, 0.1731, 0.5457, 0.2402]) -Greedy action tensor([-1.7221, -0.5118, 0.5997, 0.0438]) tensor([0.0490, 0.1645, 0.4998, 0.2867]) -Greedy action tensor([-1.1195, -0.5046, 0.2911, 0.5969]) tensor([0.0799, 0.1478, 0.3276, 0.4447]) -Greedy action tensor([-1.7288, -0.5037, 0.5487, -0.0279]) tensor([0.0509, 0.1734, 0.4967, 0.2790]) -Greedy action tensor([-1.7680, 0.1509, 0.4947, -0.1619]) tensor([0.0446, 0.3041, 0.4289, 0.2224]) -Greedy action tensor([-1.4905, -0.3986, 0.6471, 0.5035]) tensor([0.0505, 0.1505, 0.4282, 0.3709]) -Greedy action tensor([-1.8920, -0.3541, 0.6264, -0.1461]) tensor([0.0420, 0.1956, 0.5215, 0.2409]) -Greedy action tensor([-2.0429, -0.7982, 1.4796, 0.7325]) tensor([0.0184, 0.0638, 0.6228, 0.2950]) -Greedy action tensor([-1.9001, -0.4084, 0.6437, -0.1478]) tensor([0.0418, 0.1856, 0.5317, 0.2409]) -Greedy action tensor([-1.7335, -0.4801, 0.5476, -0.0449]) tensor([0.0508, 0.1778, 0.4968, 0.2747]) -Greedy action tensor([-1.7653, -0.4857, 0.5708, -0.0610]) tensor([0.0489, 0.1759, 0.5061, 0.2690]) -Greedy action tensor([-1.9276, -0.4335, 0.6580, -0.1660]) tensor([0.0407, 0.1815, 0.5406, 0.2372]) -Greedy action tensor([-1.8788, -0.3844, 0.6522, -0.1051]) tensor([0.0418, 0.1864, 0.5255, 0.2464]) -Greedy action tensor([-1.7822, -0.3296, 0.6040, -0.0574]) tensor([0.0460, 0.1964, 0.4997, 0.2579]) -Greedy action tensor([-1.5971, -0.4465, 0.5110, 0.0397]) tensor([0.0570, 0.1803, 0.4696, 0.2931]) -Greedy action tensor([-1.0681, -0.4916, 1.2111, 1.4159]) tensor([0.0408, 0.0725, 0.3981, 0.4886]) -Greedy action tensor([-1.9017, -0.3568, 0.6347, -0.1494]) tensor([0.0415, 0.1946, 0.5245, 0.2394]) -Greedy action tensor([-0.7541, -0.6216, 0.3482, 0.2077]) tensor([0.1287, 0.1470, 0.3876, 0.3368]) -Greedy action tensor([-1.9227, -0.4174, 0.6563, -0.1626]) tensor([0.0408, 0.1839, 0.5381, 0.2372]) -Greedy action tensor([-0.4949, -0.1384, 0.2178, 0.4362]) tensor([0.1428, 0.2039, 0.2911, 0.3622]) -Greedy action tensor([-1.8589, -0.4559, 0.6386, -0.1321]) tensor([0.0438, 0.1781, 0.5320, 0.2462]) -Greedy action tensor([-1.4002, 0.2519, 0.3497, -0.0303]) tensor([0.0629, 0.3280, 0.3617, 0.2474]) -Greedy action tensor([-1.9258, -0.4616, 0.6667, -0.1614]) tensor([0.0408, 0.1763, 0.5449, 0.2380]) -Greedy action tensor([-1.8240, -0.3613, 0.6417, -0.0911]) tensor([0.0440, 0.1898, 0.5175, 0.2487]) -Greedy action tensor([-1.6583e+00, -4.8463e-01, 5.3039e-01, 3.2300e-04]) tensor([0.0543, 0.1757, 0.4847, 0.2853]) -Greedy action tensor([-1.5376, -0.5680, 0.4649, 0.1024]) tensor([0.0617, 0.1628, 0.4573, 0.3182]) -Greedy action tensor([-1.7100, -0.4644, 0.6306, 0.1965]) tensor([0.0463, 0.1609, 0.4811, 0.3117]) -Greedy action tensor([-1.1829, 0.4682, 0.3236, -0.1798]) tensor([0.0743, 0.3875, 0.3354, 0.2027]) -Greedy action tensor([-1.8113, -0.4676, 0.6035, -0.1100]) tensor([0.0465, 0.1783, 0.5203, 0.2549]) -Greedy action tensor([-1.2540, -0.5831, 0.2818, 0.2929]) tensor([0.0813, 0.1591, 0.3777, 0.3819]) -Greedy action tensor([-1.8868, -0.4370, 0.6411, -0.1463]) tensor([0.0426, 0.1814, 0.5333, 0.2427]) -Greedy action tensor([-1.7606, -0.3958, 0.5607, -0.0528]) tensor([0.0485, 0.1899, 0.4941, 0.2675]) -Greedy action tensor([-1.5684, -0.3792, 0.4667, 0.0968]) tensor([0.0581, 0.1907, 0.4443, 0.3069]) -Greedy action tensor([-1.5950, -0.5211, 0.4867, 0.1138]) tensor([0.0573, 0.1676, 0.4590, 0.3161]) -Greedy action tensor([-1.8803, -0.4777, 0.6853, -0.0968]) tensor([0.0416, 0.1692, 0.5414, 0.2477]) -Greedy action tensor([-1.8638, -0.4548, 0.6925, -0.0307]) tensor([0.0413, 0.1689, 0.5318, 0.2581]) -Greedy action tensor([-1.8670, -0.5181, 1.2191, 0.5466]) tensor([0.0264, 0.1016, 0.5773, 0.2947]) -Greedy action tensor([-1.8257, -0.0879, 0.5510, -0.0916]) tensor([0.0433, 0.2459, 0.4658, 0.2450]) -Greedy action tensor([-1.6116, -0.5689, 0.5415, 0.0700]) tensor([0.0561, 0.1592, 0.4832, 0.3015]) -Greedy action tensor([-1.6863, -0.5337, 0.5583, -0.0355]) tensor([0.0532, 0.1683, 0.5016, 0.2770]) -Greedy action tensor([-1.8288, -0.3599, 0.5982, -0.1253]) tensor([0.0451, 0.1960, 0.5110, 0.2478]) -Greedy action tensor([-0.4740, 0.2433, 0.8539, 1.3616]) tensor([0.0764, 0.1565, 0.2882, 0.4789]) -Greedy action tensor([-0.9085, 0.2597, 0.4794, 1.1684]) tensor([0.0617, 0.1985, 0.2473, 0.4925]) -Greedy action tensor([-1.4635, 0.5759, 0.2676, 0.3347]) tensor([0.0491, 0.3773, 0.2772, 0.2964]) -Greedy action tensor([-1.8163, -0.4372, 0.6014, -0.1108]) tensor([0.0461, 0.1830, 0.5172, 0.2537]) -Greedy action tensor([-1.5513, -0.5069, 0.7556, 0.5402]) tensor([0.0455, 0.1293, 0.4569, 0.3683]) -Greedy action tensor([-1.2302, 0.7329, 0.1941, 0.1100]) tensor([0.0621, 0.4424, 0.2581, 0.2373]) -Greedy action tensor([-0.8504, -0.6188, 0.3652, 0.4158]) tensor([0.1089, 0.1373, 0.3673, 0.3864]) -Greedy action tensor([-1.7412, -0.4852, 0.5609, -0.0194]) tensor([0.0497, 0.1747, 0.4973, 0.2783]) -Greedy action tensor([-1.2936, -0.6234, 0.3548, 0.3349]) tensor([0.0755, 0.1475, 0.3924, 0.3846]) -Greedy action tensor([-1.3302, -0.5451, 0.3739, 0.1192]) tensor([0.0772, 0.1693, 0.4244, 0.3290]) -Greedy action tensor([-1.8775, -0.3634, 0.6241, -0.1270]) tensor([0.0425, 0.1934, 0.5191, 0.2449]) -Greedy action tensor([-1.4024, 0.4792, 0.4948, -0.7275]) tensor([0.0617, 0.4053, 0.4117, 0.1213]) -Greedy action tensor([-1.0958, -0.3372, 0.4675, -0.3635]) tensor([0.1001, 0.2137, 0.4780, 0.2082]) -Greedy action tensor([-0.6084, 1.0278, 0.1885, 1.0057]) tensor([0.0748, 0.3839, 0.1658, 0.3755]) -Greedy action tensor([-1.8345, -0.4679, 0.6394, -0.0490]) tensor([0.0440, 0.1724, 0.5216, 0.2620]) -Greedy action tensor([-1.8553, -0.4663, 0.6434, -0.1557]) tensor([0.0442, 0.1771, 0.5372, 0.2416]) -Greedy action tensor([-1.3175, -0.6229, 0.3267, 0.1672]) tensor([0.0794, 0.1590, 0.4111, 0.3505]) -Greedy action tensor([-1.5166, 0.2561, 0.4130, 0.0798]) tensor([0.0535, 0.3146, 0.3681, 0.2638]) -Greedy action tensor([ 0.8387, -0.6600, 0.0902, -0.3826]) tensor([0.5022, 0.1122, 0.2376, 0.1481]) -Greedy action tensor([ 0.9158, -0.9631, -0.0038, -0.4406]) tensor([0.5528, 0.0844, 0.2204, 0.1424]) -Greedy action tensor([ 0.7555, -0.3777, -0.0862, -0.1749]) tensor([0.4657, 0.1500, 0.2007, 0.1837]) -Greedy action tensor([0.4158, 0.1392, 0.0914, 0.2117]) tensor([0.3033, 0.2300, 0.2193, 0.2473]) -Greedy action tensor([ 0.5620, -0.8336, -0.0775, -0.2859]) tensor([0.4538, 0.1124, 0.2394, 0.1944]) -Greedy action tensor([ 0.6091, -0.2594, 0.0169, -0.5964]) tensor([0.4401, 0.1847, 0.2434, 0.1318]) -Greedy action tensor([ 0.8549, -0.5051, -0.0194, -0.3174]) tensor([0.5042, 0.1294, 0.2103, 0.1561]) -Greedy action tensor([ 0.6779, -0.1988, -0.0526, -0.3122]) tensor([0.4406, 0.1834, 0.2123, 0.1637]) -Greedy action tensor([ 0.4215, -0.2929, -0.1219, -0.7436]) tensor([0.4198, 0.2055, 0.2438, 0.1309]) -Greedy action tensor([ 0.7812, -0.4142, -0.0408, -0.2359]) tensor([0.4753, 0.1438, 0.2089, 0.1719]) -Greedy action tensor([ 0.9714, -0.7953, 0.0677, -0.5326]) tensor([0.5561, 0.0950, 0.2253, 0.1236]) -Greedy action tensor([ 0.8054, -0.5001, -0.1303, -0.3218]) tensor([0.5032, 0.1364, 0.1974, 0.1630]) -Greedy action tensor([ 0.8274, -0.6671, 0.1044, -0.3169]) tensor([0.4931, 0.1106, 0.2393, 0.1570]) -Greedy action tensor([ 0.7760, -0.5156, -0.0423, -0.1930]) tensor([0.4772, 0.1312, 0.2105, 0.1811]) -Greedy action tensor([ 0.9275, -0.5955, -0.2244, -0.4670]) tensor([0.5611, 0.1224, 0.1774, 0.1391]) -Greedy action tensor([ 0.1174, 0.3776, -0.0955, -0.0977]) tensor([0.2556, 0.3316, 0.2066, 0.2062]) -Greedy action tensor([ 0.3958, -0.2777, 0.0315, -0.1078]) tensor([0.3560, 0.1815, 0.2473, 0.2151]) -Greedy action tensor([ 0.7810, -0.3605, -0.1065, -0.1852]) tensor([0.4736, 0.1512, 0.1950, 0.1802]) -Greedy action tensor([ 0.2253, 0.0022, 0.0831, -0.3343]) tensor([0.3087, 0.2470, 0.2678, 0.1764]) -Greedy action tensor([ 0.6715, -0.2850, -0.0348, -0.3310]) tensor([0.4455, 0.1712, 0.2198, 0.1635]) -Greedy action tensor([ 0.5750, -0.2472, -0.0336, -0.1163]) tensor([0.4025, 0.1769, 0.2190, 0.2016]) -Greedy action tensor([ 1.1514, -0.4872, 0.2756, -0.6221]) tensor([0.5616, 0.1091, 0.2339, 0.0953]) -Greedy action tensor([ 1.1523, -0.8977, 0.0599, -0.5238]) tensor([0.6056, 0.0780, 0.2031, 0.1133]) -Greedy action tensor([ 0.5259, -0.4376, -0.0297, -0.2463]) tensor([0.4137, 0.1578, 0.2373, 0.1911]) -Greedy action tensor([ 0.7299, -0.3094, 0.0142, -0.2810]) tensor([0.4532, 0.1603, 0.2216, 0.1649]) -Greedy action tensor([ 0.9135, -0.4386, 0.0677, -0.4758]) tensor([0.5162, 0.1335, 0.2216, 0.1287]) -Greedy action tensor([ 0.7263, -0.2303, 0.0782, -0.3962]) tensor([0.4479, 0.1721, 0.2343, 0.1458]) -Greedy action tensor([ 0.6380, -0.5821, -0.1867, -0.0964]) tensor([0.4518, 0.1334, 0.1981, 0.2168]) -Greedy action tensor([ 0.5201, -0.2875, -0.0221, -0.2218]) tensor([0.3994, 0.1781, 0.2322, 0.1902]) -Greedy action tensor([ 0.7877, -0.3268, 0.2382, -0.4903]) tensor([0.4579, 0.1502, 0.2643, 0.1276]) -Greedy action tensor([ 0.8910, -0.4066, -0.1083, -0.4234]) tensor([0.5236, 0.1430, 0.1927, 0.1407]) -Greedy action tensor([ 0.5904, -0.6330, 0.1325, -0.4585]) tensor([0.4392, 0.1292, 0.2778, 0.1538]) -Greedy action tensor([ 0.9846, -0.6817, 0.0639, -0.5807]) tensor([0.5567, 0.1052, 0.2217, 0.1164]) -Greedy action tensor([ 0.3954, -0.1764, -0.0971, -0.0451]) tensor([0.3547, 0.2002, 0.2168, 0.2283]) -Greedy action tensor([ 0.3081, 0.2867, -0.2351, -0.1003]) tensor([0.3101, 0.3036, 0.1802, 0.2062]) -Greedy action tensor([0.3831, 0.3165, 0.0577, 0.0377]) tensor([0.2971, 0.2780, 0.2146, 0.2103]) -Greedy action tensor([ 0.6755, -0.4361, -0.0021, -0.4661]) tensor([0.4638, 0.1526, 0.2355, 0.1481]) -Greedy action tensor([ 0.5692, -0.5529, -0.0129, -0.4180]) tensor([0.4431, 0.1443, 0.2476, 0.1651]) -Greedy action tensor([ 0.6476, -0.3377, -0.0386, -0.3781]) tensor([0.4474, 0.1670, 0.2252, 0.1604]) -Greedy action tensor([ 0.4522, -0.4876, 0.3219, -0.7226]) tensor([0.3880, 0.1516, 0.3406, 0.1198]) -Greedy action tensor([ 0.6684, -0.4178, -0.0282, -0.3388]) tensor([0.4543, 0.1533, 0.2264, 0.1659]) -Greedy action tensor([ 0.9148, -0.4194, -0.1552, -0.3935]) tensor([0.5329, 0.1403, 0.1828, 0.1440]) -Greedy action tensor([ 0.6991, -0.0347, 0.3232, -0.3897]) tensor([0.3995, 0.1918, 0.2743, 0.1345]) -Greedy action tensor([ 0.6693, -0.3878, -0.1527, -0.4284]) tensor([0.4715, 0.1638, 0.2073, 0.1573]) -Greedy action tensor([ 1.1003, -0.9448, 0.0218, -0.3957]) tensor([0.5905, 0.0764, 0.2008, 0.1323]) -Greedy action tensor([ 1.0774e+00, -5.3476e-01, 1.0524e-03, -6.4874e-01]) tensor([0.5820, 0.1161, 0.1984, 0.1036]) -Greedy action tensor([ 0.6656, 0.1154, -0.1284, -0.3050]) tensor([0.4153, 0.2396, 0.1877, 0.1573]) -Greedy action tensor([ 0.3044, 0.0489, -0.1491, -0.3223]) tensor([0.3396, 0.2631, 0.2158, 0.1815]) -Greedy action tensor([ 0.7829, -0.4266, 0.0428, -0.2469]) tensor([0.4689, 0.1399, 0.2237, 0.1675]) -Greedy action tensor([ 1.0077, -0.8608, -0.0018, -0.3741]) tensor([0.5650, 0.0872, 0.2059, 0.1419]) -Greedy action tensor([ 0.6983, -0.5700, -0.0081, -0.6146]) tensor([0.4893, 0.1376, 0.2414, 0.1316]) -Greedy action tensor([ 0.5206, -0.5771, -0.2318, -0.3023]) tensor([0.4456, 0.1487, 0.2100, 0.1957]) -Greedy action tensor([ 0.2986, -0.0878, -0.0957, -0.2406]) tensor([0.3405, 0.2314, 0.2295, 0.1986]) -Greedy action tensor([ 0.8508, -0.6009, -0.0947, -0.3064]) tensor([0.5162, 0.1209, 0.2006, 0.1623]) -Greedy action tensor([ 0.9517, -0.8240, 0.0775, -0.6823]) tensor([0.5613, 0.0951, 0.2342, 0.1095]) -Greedy action tensor([ 0.7985, -0.3958, -0.0481, -0.3645]) tensor([0.4891, 0.1482, 0.2098, 0.1529]) -Greedy action tensor([ 0.5264, 0.0128, -0.1146, -0.1098]) tensor([0.3767, 0.2254, 0.1985, 0.1994]) -Greedy action tensor([ 0.7473, -0.3335, -0.0314, -0.3224]) tensor([0.4670, 0.1585, 0.2143, 0.1602]) -Greedy action tensor([ 0.7264, -0.3658, -0.0170, -0.3481]) tensor([0.4646, 0.1559, 0.2209, 0.1586]) -Greedy action tensor([ 0.7031, -0.5490, 0.0810, -0.3449]) tensor([0.4601, 0.1315, 0.2470, 0.1613]) -Greedy action tensor([ 0.5848, -0.0370, 0.0214, -0.1727]) tensor([0.3884, 0.2085, 0.2211, 0.1821]) -Greedy action tensor([ 0.9136, -0.4702, -0.0168, -0.1264]) tensor([0.5004, 0.1254, 0.1974, 0.1769]) -Greedy action tensor([ 0.7892, -0.3608, -0.0515, -0.2038]) tensor([0.4720, 0.1495, 0.2036, 0.1749]) -Greedy action tensor([ 0.9506, -1.1790, -0.1071, -0.5494]) tensor([0.5920, 0.0704, 0.2056, 0.1321]) -Greedy action tensor([ 0.4236, 0.0108, -0.1317, 0.0449]) tensor([0.3424, 0.2266, 0.1965, 0.2345]) -Greedy action tensor([ 0.3696, -0.1929, -0.0865, -0.0667]) tensor([0.3509, 0.1999, 0.2224, 0.2268]) -Greedy action tensor([ 0.4562, -0.4554, 0.0121, -0.2563]) tensor([0.3947, 0.1586, 0.2532, 0.1935]) -Greedy action tensor([ 0.5994, 0.2287, -0.0917, 0.0220]) tensor([0.3633, 0.2508, 0.1820, 0.2039]) -Greedy action tensor([ 0.6346, -0.5048, -0.0218, -0.4854]) tensor([0.4619, 0.1478, 0.2396, 0.1507]) -Greedy action tensor([ 0.7953, -0.5868, -0.0085, -0.4055]) tensor([0.5001, 0.1255, 0.2239, 0.1505]) -Greedy action tensor([ 0.6164, -0.4786, 0.0328, -0.4002]) tensor([0.4436, 0.1484, 0.2475, 0.1605]) -Greedy action tensor([ 0.6498, -0.5013, -0.0899, -0.1638]) tensor([0.4471, 0.1414, 0.2134, 0.1982]) -Greedy action tensor([ 0.5051, -0.1301, 0.0081, -0.4150]) tensor([0.3942, 0.2089, 0.2398, 0.1571]) -Greedy action tensor([ 0.8717, -0.6527, 0.0430, -0.4162]) tensor([0.5181, 0.1128, 0.2262, 0.1429]) -Greedy action tensor([ 0.5328, -0.2364, -0.0600, -0.1513]) tensor([0.3967, 0.1838, 0.2193, 0.2002]) -Greedy action tensor([ 0.2878, -0.0410, 0.0364, -0.1129]) tensor([0.3157, 0.2272, 0.2455, 0.2115]) -Greedy action tensor([ 0.3798, 0.0187, 0.1844, -0.5481]) tensor([0.3431, 0.2391, 0.2822, 0.1357]) -Greedy action tensor([ 0.3902, -0.0085, -0.0123, -0.3548]) tensor([0.3553, 0.2385, 0.2376, 0.1687]) -Greedy action tensor([ 1.1278, -1.3006, 0.0304, -0.9886]) tensor([0.6483, 0.0572, 0.2164, 0.0781]) -Greedy action tensor([ 0.6192, -0.5200, -0.0755, -0.4862]) tensor([0.4650, 0.1488, 0.2322, 0.1540]) -Greedy action tensor([ 0.1907, 0.2407, -0.0919, 0.0314]) tensor([0.2734, 0.2874, 0.2061, 0.2331]) -Greedy action tensor([ 1.3935, -0.1711, -0.3007, 0.2206]) tensor([0.5874, 0.1229, 0.1079, 0.1818]) -Greedy action tensor([ 1.4738, -0.5467, -0.5056, 0.4265]) tensor([0.6167, 0.0818, 0.0852, 0.2164]) -Greedy action tensor([ 1.5926, -0.5874, -0.2300, 0.1623]) tensor([0.6606, 0.0747, 0.1067, 0.1580]) -Greedy action tensor([ 1.1141, -0.0845, -0.8926, 0.1415]) tensor([0.5512, 0.1663, 0.0741, 0.2084]) -Greedy action tensor([ 1.1362, -0.7812, -0.4389, 0.4927]) tensor([0.5321, 0.0782, 0.1101, 0.2796]) -Greedy action tensor([ 0.7458, -0.2523, -0.0716, 0.0647]) tensor([0.4317, 0.1591, 0.1906, 0.2185]) -Greedy action tensor([ 1.3620, -0.3163, -0.8747, 0.6668]) tensor([0.5579, 0.1042, 0.0596, 0.2784]) -Greedy action tensor([ 1.5273, -0.1417, -0.2089, 0.6888]) tensor([0.5565, 0.1049, 0.0980, 0.2406]) -Greedy action tensor([ 1.5987, -0.4278, -0.3985, 1.0187]) tensor([0.5472, 0.0721, 0.0743, 0.3064]) -Greedy action tensor([ 1.3663, -0.4274, -0.3453, 0.1940]) tensor([0.6037, 0.1004, 0.1090, 0.1869]) -Greedy action tensor([ 0.8999, -0.1732, -0.0675, 0.0223]) tensor([0.4678, 0.1600, 0.1778, 0.1945]) -Greedy action tensor([ 1.6453, -0.6595, -0.1030, -0.0158]) tensor([0.6832, 0.0682, 0.1189, 0.1298]) -Greedy action tensor([ 1.1501, -0.0594, -0.3635, 0.2841]) tensor([0.5157, 0.1539, 0.1135, 0.2169]) -Greedy action tensor([ 1.7612, -0.9773, -0.1606, 0.3367]) tensor([0.6889, 0.0445, 0.1008, 0.1658]) -Greedy action tensor([ 1.4474, -0.1467, -0.2666, 0.2498]) tensor([0.5934, 0.1205, 0.1069, 0.1792]) -Greedy action tensor([ 1.0987, -0.5486, -0.3004, 0.6374]) tensor([0.4831, 0.0930, 0.1192, 0.3046]) -Greedy action tensor([ 1.4235, -0.6705, -0.0388, 0.4279]) tensor([0.5799, 0.0714, 0.1344, 0.2143]) -Greedy action tensor([ 0.8365, -0.1909, -0.2454, 0.3165]) tensor([0.4364, 0.1562, 0.1479, 0.2594]) -Greedy action tensor([ 1.2641, -0.3950, -0.1019, -0.2250]) tensor([0.5984, 0.1139, 0.1527, 0.1350]) -Greedy action tensor([ 1.4675, -0.1754, -0.3199, 0.3927]) tensor([0.5875, 0.1136, 0.0983, 0.2005]) -Greedy action tensor([ 0.9982, -0.0430, -0.7602, 0.5643]) tensor([0.4601, 0.1624, 0.0793, 0.2982]) -Greedy action tensor([ 1.0300, -0.5910, -0.0497, 0.2737]) tensor([0.4983, 0.0985, 0.1693, 0.2339]) -Greedy action tensor([ 1.3162, -0.3937, -0.3640, -0.1912]) tensor([0.6294, 0.1139, 0.1173, 0.1394]) -Greedy action tensor([ 1.4497, -0.5100, -0.3540, 0.6329]) tensor([0.5723, 0.0806, 0.0942, 0.2528]) -Greedy action tensor([ 1.8148, -0.8363, -0.1443, 0.3640]) tensor([0.6916, 0.0488, 0.0975, 0.1621]) -Greedy action tensor([ 1.9153, -1.2878, -0.1343, 0.5005]) tensor([0.7080, 0.0288, 0.0912, 0.1720]) -Greedy action tensor([ 1.0343, -0.2712, -0.1027, 0.0907]) tensor([0.5048, 0.1368, 0.1619, 0.1965]) -Greedy action tensor([ 1.5788, -0.5537, -0.1740, 0.4375]) tensor([0.6206, 0.0736, 0.1076, 0.1982]) -Greedy action tensor([ 1.6207, -0.1209, -0.5397, 0.5566]) tensor([0.6114, 0.1071, 0.0705, 0.2110]) -Greedy action tensor([ 1.5746, -0.6480, -0.2084, 0.2981]) tensor([0.6429, 0.0696, 0.1081, 0.1794]) -Greedy action tensor([ 1.1120, -0.3755, -0.2853, 0.2142]) tensor([0.5317, 0.1201, 0.1315, 0.2167]) -Greedy action tensor([ 1.4558, 0.3531, 0.3239, -0.3057]) tensor([0.5476, 0.1818, 0.1766, 0.0941]) -Greedy action tensor([ 1.3414, -0.2678, -0.3131, 0.1674]) tensor([0.5881, 0.1177, 0.1124, 0.1818]) -Greedy action tensor([ 1.3260, -0.1773, -0.6070, 0.3667]) tensor([0.5713, 0.1271, 0.0827, 0.2189]) -Greedy action tensor([ 1.3922, -0.0548, -0.6065, 0.2950]) tensor([0.5867, 0.1380, 0.0795, 0.1958]) -Greedy action tensor([ 1.4753, -0.7067, -0.1845, 0.3542]) tensor([0.6139, 0.0693, 0.1168, 0.2001]) -Greedy action tensor([ 1.2967, -0.1295, -0.1422, 0.1747]) tensor([0.5546, 0.1332, 0.1315, 0.1806]) -Greedy action tensor([ 1.2926, -0.2684, -0.3926, 0.3408]) tensor([0.5614, 0.1178, 0.1041, 0.2167]) -Greedy action tensor([ 1.0611, -0.5667, -0.0746, 0.0062]) tensor([0.5360, 0.1052, 0.1722, 0.1866]) -Greedy action tensor([ 1.8190, -0.6309, -0.4541, 0.5030]) tensor([0.6861, 0.0592, 0.0707, 0.1840]) -Greedy action tensor([ 1.3622, 0.0725, -0.1970, 0.5088]) tensor([0.5231, 0.1440, 0.1100, 0.2228]) -Greedy action tensor([ 1.4992, -0.1681, -0.8595, 0.5766]) tensor([0.5950, 0.1123, 0.0563, 0.2365]) -Greedy action tensor([ 1.5956, -0.2092, -0.3719, 0.4705]) tensor([0.6139, 0.1010, 0.0858, 0.1993]) -Greedy action tensor([ 1.6870, -0.3264, -0.4417, 0.6871]) tensor([0.6171, 0.0824, 0.0734, 0.2270]) -Greedy action tensor([ 1.1121, 0.0856, -0.3173, 0.5790]) tensor([0.4578, 0.1640, 0.1096, 0.2686]) -Greedy action tensor([ 0.9135, 0.1092, -0.0780, 0.1349]) tensor([0.4391, 0.1964, 0.1629, 0.2016]) -Greedy action tensor([ 2.2628, -0.4096, -0.6044, 0.3552]) tensor([0.7847, 0.0542, 0.0446, 0.1165]) -Greedy action tensor([ 0.3074, -0.2510, -0.1495, 0.1159]) tensor([0.3299, 0.1888, 0.2089, 0.2724]) -Greedy action tensor([ 2.3779, -0.5002, -0.5561, 0.9417]) tensor([0.7422, 0.0417, 0.0395, 0.1765]) -Greedy action tensor([ 1.2383, -0.2759, -0.7588, 0.2777]) tensor([0.5752, 0.1265, 0.0781, 0.2201]) -Greedy action tensor([ 1.4028, -0.5826, -0.2573, 0.6165]) tensor([0.5609, 0.0770, 0.1066, 0.2555]) -Greedy action tensor([ 0.8764, -0.4738, -0.1160, 0.2435]) tensor([0.4628, 0.1199, 0.1715, 0.2458]) -Greedy action tensor([ 2.6781, -0.9220, 0.1800, 0.9827]) tensor([0.7733, 0.0211, 0.0636, 0.1419]) -Greedy action tensor([ 0.8557, -0.2669, -0.2430, 0.3129]) tensor([0.4465, 0.1453, 0.1488, 0.2594]) -Greedy action tensor([ 1.5395, -1.2828, 0.0911, 0.0623]) tensor([0.6567, 0.0391, 0.1543, 0.1499]) -Greedy action tensor([ 2.5870, -1.3177, -0.1580, 1.1477]) tensor([0.7567, 0.0152, 0.0486, 0.1794]) -Greedy action tensor([ 2.0764, -0.7133, -0.4853, 0.5346]) tensor([0.7393, 0.0454, 0.0571, 0.1582]) -Greedy action tensor([ 1.3371, -0.4200, -0.1383, 0.2839]) tensor([0.5714, 0.0986, 0.1307, 0.1993]) -Greedy action tensor([ 1.5305, -0.1955, -1.3294, 0.1893]) tensor([0.6681, 0.1189, 0.0383, 0.1747]) -Greedy action tensor([ 1.1912, -0.3257, -0.5245, -0.0572]) tensor([0.5931, 0.1301, 0.1067, 0.1702]) -Greedy action tensor([ 1.1660, -0.6170, -0.0775, -0.1095]) tensor([0.5761, 0.0969, 0.1661, 0.1609]) -Greedy action tensor([ 1.7549, -0.4142, -0.5979, 0.5784]) tensor([0.6589, 0.0753, 0.0627, 0.2032]) -Greedy action tensor([ 1.7193, -0.3614, -0.3639, 0.6870]) tensor([0.6228, 0.0778, 0.0776, 0.2218]) -Greedy action tensor([ 1.6631, -0.5062, -0.1660, 0.5373]) tensor([0.6253, 0.0714, 0.1004, 0.2029]) -Greedy action tensor([ 1.2878, 0.1142, -0.3481, 0.4755]) tensor([0.5134, 0.1588, 0.1000, 0.2279]) -Greedy action tensor([ 2.2948, -1.4538, -0.2813, 0.8182]) tensor([0.7530, 0.0177, 0.0573, 0.1720]) -Greedy action tensor([ 1.3108, -0.2498, -0.2725, 0.2759]) tensor([0.5648, 0.1186, 0.1159, 0.2007]) -Greedy action tensor([ 1.4984, -0.2880, -0.6373, 0.3346]) tensor([0.6258, 0.1049, 0.0739, 0.1954]) -Greedy action tensor([ 1.1158, -0.2059, 0.0820, -0.2022]) tensor([0.5291, 0.1411, 0.1882, 0.1416]) -Greedy action tensor([ 1.7373, -0.0516, 0.2046, -0.1513]) tensor([0.6517, 0.1089, 0.1407, 0.0986]) -Greedy action tensor([ 1.8029, -0.2392, -0.0989, 0.5103]) tensor([0.6437, 0.0835, 0.0961, 0.1767]) -Greedy action tensor([ 1.6949, -0.3617, -0.4469, 0.5589]) tensor([0.6384, 0.0816, 0.0750, 0.2050]) -Greedy action tensor([ 1.5120, -0.3110, -0.7969, 0.2612]) tensor([0.6463, 0.1044, 0.0642, 0.1850]) -Greedy action tensor([ 1.8030, -0.7611, -0.3242, 0.9168]) tensor([0.6217, 0.0479, 0.0741, 0.2563]) -Greedy action tensor([ 1.6900, -0.4034, -0.0682, 0.7567]) tensor([0.5921, 0.0730, 0.1021, 0.2328]) -Greedy action tensor([ 1.2667, -0.6834, -0.1794, 0.4153]) tensor([0.5542, 0.0788, 0.1305, 0.2365]) -Greedy action tensor([ 1.0122, -0.1226, -0.7059, 0.5005]) tensor([0.4761, 0.1531, 0.0854, 0.2854]) -Greedy action tensor([ 1.9914, -0.4103, -0.3401, 0.6607]) tensor([0.6887, 0.0624, 0.0669, 0.1820]) -Greedy action tensor([ 1.8145, -0.7760, -0.4000, 0.7216]) tensor([0.6581, 0.0493, 0.0719, 0.2206]) -Greedy action tensor([ 1.5719, -0.6738, -0.1317, 0.0591]) tensor([0.6630, 0.0702, 0.1207, 0.1461]) -Greedy action tensor([ 1.1572, -0.7507, 0.1718, -0.0587]) tensor([0.5500, 0.0816, 0.2053, 0.1630]) -Greedy action tensor([ 0.7597, -1.2101, 1.0707, 0.2171]) tensor([0.3241, 0.0452, 0.4423, 0.1884]) -Greedy action tensor([-1.3234, -0.9291, -0.7809, 0.7322]) tensor([0.0832, 0.1235, 0.1432, 0.6502]) -Greedy action tensor([ 0.3358, -0.7964, 1.5280, -0.3579]) tensor([0.1954, 0.0630, 0.6439, 0.0977]) -Greedy action tensor([ 1.3951, -0.4122, -0.5519, 0.0479]) tensor([0.6382, 0.1047, 0.0911, 0.1659]) -Greedy action tensor([-0.2741, -0.2314, -0.4883, 2.3781]) tensor([0.0587, 0.0613, 0.0474, 0.8327]) -Greedy action tensor([0.6388, 0.2645, 0.3671, 0.8263]) tensor([0.2735, 0.1881, 0.2085, 0.3299]) -Greedy action tensor([ 0.1081, -1.8649, 0.5288, 1.2182]) tensor([0.1755, 0.0244, 0.2673, 0.5327]) -Greedy action tensor([ 0.8676, -0.8487, 1.1408, 1.0609]) tensor([0.2697, 0.0485, 0.3545, 0.3273]) -Greedy action tensor([ 0.6648, -0.6588, -0.0325, 0.1288]) tensor([0.4257, 0.1133, 0.2120, 0.2491]) -Greedy action tensor([ 0.8394, -0.6448, 0.7120, -0.0311]) tensor([0.3959, 0.0897, 0.3486, 0.1658]) -Greedy action tensor([ 0.3220, -0.3239, 0.5958, -0.9891]) tensor([0.3217, 0.1686, 0.4230, 0.0867]) -Greedy action tensor([ 0.2962, 0.8956, 0.7113, -0.0324]) tensor([0.1978, 0.3602, 0.2996, 0.1424]) -Greedy action tensor([ 0.8046, -1.1217, -0.4261, -0.7217]) tensor([0.6042, 0.0880, 0.1765, 0.1313]) -Greedy action tensor([-0.5183, 0.8273, -0.5175, -0.1819]) tensor([0.1381, 0.5304, 0.1382, 0.1933]) -Greedy action tensor([-0.1319, -0.5893, -0.7450, 0.5026]) tensor([0.2463, 0.1559, 0.1334, 0.4645]) -Greedy action tensor([ 0.3005, -0.2453, 0.5515, 0.7581]) tensor([0.2250, 0.1303, 0.2892, 0.3555]) -Greedy action tensor([ 1.8331, 0.4456, -0.0394, 0.0300]) tensor([0.6377, 0.1592, 0.0980, 0.1051]) -Greedy action tensor([ 0.0216, -0.2810, -0.5966, -0.3395]) tensor([0.3362, 0.2484, 0.1812, 0.2343]) -Greedy action tensor([-0.1096, -0.7342, -0.0547, 0.5310]) tensor([0.2227, 0.1193, 0.2353, 0.4227]) -Greedy action tensor([-0.3476, -0.9922, 0.2605, -0.1146]) tensor([0.2163, 0.1135, 0.3972, 0.2730]) -Greedy action tensor([ 1.0447, -0.0748, 1.5346, 0.8629]) tensor([0.2637, 0.0861, 0.4304, 0.2199]) -Greedy action tensor([ 1.4859, 0.3908, -0.0330, 0.1290]) tensor([0.5522, 0.1847, 0.1209, 0.1422]) -Greedy action tensor([1.2436, 0.4267, 0.3553, 0.0150]) tensor([0.4660, 0.2059, 0.1917, 0.1364]) -Greedy action tensor([ 0.4512, -0.8104, -0.5596, 0.3330]) tensor([0.3944, 0.1117, 0.1435, 0.3504]) -Greedy action tensor([-1.2846, -0.2058, -0.1577, 0.0783]) tensor([0.0915, 0.2690, 0.2822, 0.3573]) -Greedy action tensor([ 0.0881, -0.0195, 0.4910, 0.3306]) tensor([0.2142, 0.1924, 0.3205, 0.2730]) -Greedy action tensor([ 0.8711, -0.5012, 0.1522, 2.0711]) tensor([0.1976, 0.0501, 0.0963, 0.6560]) -Greedy action tensor([-0.1477, 0.9132, 0.3973, -0.0870]) tensor([0.1498, 0.4327, 0.2583, 0.1592]) -Greedy action tensor([-0.1192, -0.9520, 0.7666, 0.8266]) tensor([0.1554, 0.0676, 0.3769, 0.4002]) -Greedy action tensor([-0.6807, -1.3791, 0.1791, -0.9624]) tensor([0.2167, 0.1078, 0.5120, 0.1635]) -Greedy action tensor([-0.4780, 1.5266, 1.2778, -0.2746]) tensor([0.0648, 0.4809, 0.3750, 0.0794]) -Greedy action tensor([ 1.5625, -1.6126, 1.8592, 0.8142]) tensor([0.3496, 0.0146, 0.4704, 0.1654]) -Greedy action tensor([ 1.2059, -0.1681, -0.8236, 0.5817]) tensor([0.5208, 0.1318, 0.0684, 0.2790]) -Greedy action tensor([ 0.6171, -0.7042, 0.4345, 0.7980]) tensor([0.3032, 0.0809, 0.2526, 0.3633]) -Greedy action tensor([ 0.7584, 0.9336, 0.6900, -1.1892]) tensor([0.3060, 0.3646, 0.2858, 0.0436]) -Greedy action tensor([-0.0901, 0.2604, 0.3298, -0.5394]) tensor([0.2183, 0.3100, 0.3323, 0.1393]) -Greedy action tensor([-0.6569, -1.7147, -0.3575, -0.8636]) tensor([0.2849, 0.0989, 0.3844, 0.2317]) -Greedy action tensor([ 0.7443, 0.0810, -0.6213, 0.7623]) tensor([0.3586, 0.1847, 0.0915, 0.3651]) -Greedy action tensor([ 0.3270, -0.0347, 0.6891, -0.5309]) tensor([0.2811, 0.1958, 0.4038, 0.1192]) -Greedy action tensor([ 0.1968, -1.4975, 0.0270, 0.3031]) tensor([0.3185, 0.0585, 0.2687, 0.3542]) -Greedy action tensor([ 0.1303, -1.1837, -0.0064, 0.3776]) tensor([0.2923, 0.0785, 0.2549, 0.3742]) -Greedy action tensor([-0.4868, 0.2594, -0.7178, 0.6884]) tensor([0.1400, 0.2953, 0.1111, 0.4535]) -Greedy action tensor([ 0.8019, -1.0486, 1.0464, 0.9105]) tensor([0.2818, 0.0443, 0.3598, 0.3141]) -Greedy action tensor([ 0.3054, -0.5587, 1.5448, -0.7259]) tensor([0.1911, 0.0806, 0.6601, 0.0682]) -Greedy action tensor([ 0.5396, -0.5223, 1.3662, 0.6881]) tensor([0.2087, 0.0722, 0.4770, 0.2421]) -Greedy action tensor([ 1.0684, -1.6582, 1.6430, 1.7508]) tensor([0.2075, 0.0136, 0.3685, 0.4105]) -Greedy action tensor([-0.4752, -0.0694, 0.9176, -0.2805]) tensor([0.1292, 0.1938, 0.5201, 0.1569]) -Greedy action tensor([-1.6704, -0.3427, 0.6030, -1.7280]) tensor([0.0648, 0.2445, 0.6295, 0.0612]) -Greedy action tensor([0.1991, 0.2193, 0.6134, 0.1253]) tensor([0.2241, 0.2287, 0.3391, 0.2081]) -Greedy action tensor([ 0.3821, -1.1318, -0.0393, 0.2743]) tensor([0.3605, 0.0793, 0.2365, 0.3236]) -Greedy action tensor([ 2.0401, -1.4267, 0.3626, 0.3015]) tensor([0.7175, 0.0224, 0.1340, 0.1261]) -Greedy action tensor([ 0.4619, -0.4455, 1.0518, -0.1090]) tensor([0.2651, 0.1070, 0.4782, 0.1498]) -Greedy action tensor([-0.1074, -1.9652, 0.5042, 0.5106]) tensor([0.2060, 0.0321, 0.3797, 0.3822]) -Greedy action tensor([ 0.9657, -0.4045, 1.0727, 1.1273]) tensor([0.2823, 0.0717, 0.3142, 0.3318]) -Greedy action tensor([-0.1437, 0.2547, -0.4563, -0.0272]) tensor([0.2302, 0.3428, 0.1684, 0.2586]) -Greedy action tensor([ 0.3544, -0.3088, -0.2728, 0.0414]) tensor([0.3596, 0.1853, 0.1921, 0.2630]) -Greedy action tensor([-0.0978, -0.0208, 0.5363, -1.0435]) tensor([0.2297, 0.2481, 0.4330, 0.0892]) -Greedy action tensor([-1.1793, -1.8472, 0.3256, 0.4140]) tensor([0.0914, 0.0469, 0.4118, 0.4499]) -Greedy action tensor([ 0.4707, 0.1220, -0.6832, 1.1009]) tensor([0.2565, 0.1810, 0.0809, 0.4817]) -Greedy action tensor([-1.5076, 0.0481, 0.9613, -0.7950]) tensor([0.0511, 0.2419, 0.6029, 0.1041]) -Greedy action tensor([-0.1547, -0.3640, 0.6063, -0.6418]) tensor([0.2190, 0.1777, 0.4688, 0.1346]) -Greedy action tensor([ 0.7174, -2.7795, 0.4797, 1.0458]) tensor([0.3118, 0.0094, 0.2458, 0.4330]) -Greedy action tensor([1.0185, 0.8700, 1.2019, 0.9905]) tensor([0.2478, 0.2136, 0.2977, 0.2410]) -Greedy action tensor([ 0.6794, 0.0358, -0.8259, 0.6765]) tensor([0.3644, 0.1914, 0.0809, 0.3633]) -Greedy action tensor([-0.8243, -0.6259, -0.3064, -0.3214]) tensor([0.1801, 0.2197, 0.3024, 0.2979]) -Greedy action tensor([-0.1879, 0.4887, -0.2306, 0.5693]) tensor([0.1651, 0.3247, 0.1582, 0.3520]) -Greedy action tensor([ 0.9856, -1.2866, 0.0468, 1.4916]) tensor([0.3172, 0.0327, 0.1241, 0.5261]) -Greedy action tensor([ 0.6099, 0.2562, -0.4755, 0.8028]) tensor([0.3074, 0.2159, 0.1038, 0.3729]) -Greedy action tensor([ 1.5017, -1.4386, -0.0611, 0.7642]) tensor([0.5745, 0.0304, 0.1204, 0.2748]) -Greedy action tensor([ 0.8275, -1.1970, 0.4585, 0.7759]) tensor([0.3606, 0.0476, 0.2493, 0.3425]) -Greedy action tensor([ 1.6008, -0.3234, -0.7712, 0.9438]) tensor([0.5689, 0.0831, 0.0531, 0.2949]) -Greedy action tensor([ 1.8064, 0.1117, -0.5297, 1.2680]) tensor([0.5365, 0.0985, 0.0519, 0.3131]) -Greedy action tensor([ 0.2736, -0.2502, 0.0655, 0.0466]) tensor([0.3124, 0.1850, 0.2537, 0.2489]) -Greedy action tensor([-0.6952, -1.5959, -0.3280, -0.7846]) tensor([0.2656, 0.1079, 0.3835, 0.2429]) -Greedy action tensor([-0.4018, 0.0300, -1.2074, 1.2109]) tensor([0.1250, 0.1924, 0.0558, 0.6268]) -Greedy action tensor([-1.5747, 0.1738, 0.7676, -0.5174]) tensor([0.0499, 0.2869, 0.5195, 0.1437]) -Greedy action tensor([-0.1907, -1.1050, -0.5391, -0.2844]) tensor([0.3314, 0.1328, 0.2339, 0.3018]) -Greedy action tensor([-1.1998, -0.2414, -0.7949, 0.5465]) tensor([0.0923, 0.2406, 0.1383, 0.5289]) -Greedy action tensor([-1.0567, -0.4516, 0.6832, 0.4596]) tensor([0.0764, 0.1400, 0.4354, 0.3482]) -Greedy action tensor([ 1.0119, -0.8453, 1.4176, -0.2584]) tensor([0.3405, 0.0531, 0.5108, 0.0956]) -Greedy action tensor([ 0.5148, -0.2731, 0.5286, -0.4044]) tensor([0.3487, 0.1586, 0.3536, 0.1391]) -Greedy action tensor([ 0.0742, -1.5308, 0.6721, -0.2289]) tensor([0.2661, 0.0535, 0.4839, 0.1965]) -Greedy action tensor([ 0.5831, -0.0314, -0.0921, -0.0420]) tensor([0.3868, 0.2092, 0.1969, 0.2070]) -Greedy action tensor([ 0.6425, -0.2679, -0.0530, -0.3173]) tensor([0.4378, 0.1762, 0.2184, 0.1677]) -Greedy action tensor([ 0.3098, -0.2628, -0.0494, -0.2091]) tensor([0.3500, 0.1974, 0.2444, 0.2083]) -Greedy action tensor([ 0.3815, 0.0248, -0.0471, -0.4062]) tensor([0.3563, 0.2494, 0.2321, 0.1621]) -Greedy action tensor([ 0.8166, -0.2326, -0.0018, -0.0129]) tensor([0.4489, 0.1572, 0.1980, 0.1958]) -Greedy action tensor([ 0.4894, -0.0955, -0.0997, -0.1495]) tensor([0.3788, 0.2111, 0.2102, 0.2000]) -Greedy action tensor([ 1.2072, -0.4640, -0.0654, -0.6570]) tensor([0.6161, 0.1158, 0.1726, 0.0955]) -Greedy action tensor([ 0.6769, -0.2766, -0.1961, -0.2769]) tensor([0.4570, 0.1761, 0.1909, 0.1761]) -Greedy action tensor([ 0.4044, -0.0339, -0.0779, -0.1034]) tensor([0.3491, 0.2252, 0.2155, 0.2101]) -Greedy action tensor([ 0.7560, -0.9307, 0.0177, -0.2631]) tensor([0.4941, 0.0915, 0.2361, 0.1783]) -Greedy action tensor([ 0.2525, -0.2778, -0.0691, -0.0920]) tensor([0.3309, 0.1947, 0.2399, 0.2345]) -Greedy action tensor([ 0.7902, -0.6134, -0.0600, -0.2403]) tensor([0.4926, 0.1211, 0.2105, 0.1758]) -Greedy action tensor([ 0.9317, -0.7018, 0.0271, -0.5512]) tensor([0.5474, 0.1069, 0.2215, 0.1242]) -Greedy action tensor([ 1.0471, -0.1760, -0.2027, -0.1956]) tensor([0.5349, 0.1574, 0.1533, 0.1544]) -Greedy action tensor([ 0.3905, -0.3600, -0.0854, -0.1603]) tensor([0.3746, 0.1768, 0.2327, 0.2159]) -Greedy action tensor([ 0.9966, -0.8587, -0.0946, -0.3501]) tensor([0.5707, 0.0893, 0.1917, 0.1484]) -Greedy action tensor([ 0.9285, -0.5521, -0.0114, -0.5091]) tensor([0.5389, 0.1226, 0.2105, 0.1280]) -Greedy action tensor([ 0.8605, -0.1839, 0.0363, -0.4546]) tensor([0.4857, 0.1709, 0.2130, 0.1304]) -Greedy action tensor([ 0.6781, -0.3119, 0.0576, -0.3838]) tensor([0.4434, 0.1648, 0.2384, 0.1533]) -Greedy action tensor([ 0.6910, -0.4651, -0.1032, -0.3024]) tensor([0.4679, 0.1473, 0.2115, 0.1733]) -Greedy action tensor([ 0.2364, -0.4554, -0.1840, -0.2498]) tensor([0.3607, 0.1806, 0.2369, 0.2218]) -Greedy action tensor([ 0.9816, -0.8100, 0.0886, -0.4790]) tensor([0.5530, 0.0922, 0.2264, 0.1284]) -Greedy action tensor([ 0.2935, 0.2695, -0.1430, 0.0920]) tensor([0.2907, 0.2838, 0.1879, 0.2376]) -Greedy action tensor([ 0.7101, -0.5646, -0.1092, -0.3617]) tensor([0.4848, 0.1355, 0.2137, 0.1660]) -Greedy action tensor([ 0.6004, -0.2358, -0.0375, -0.2828]) tensor([0.4210, 0.1825, 0.2225, 0.1741]) -Greedy action tensor([ 0.9259, -0.8343, 0.0031, -0.4305]) tensor([0.5473, 0.0942, 0.2175, 0.1410]) -Greedy action tensor([ 0.7727, -0.8957, -0.1199, -0.3622]) tensor([0.5209, 0.0982, 0.2134, 0.1675]) -Greedy action tensor([ 0.6693, -0.3263, -0.1275, -0.4106]) tensor([0.4630, 0.1711, 0.2087, 0.1572]) -Greedy action tensor([ 0.5301, -0.1055, 0.0372, -0.2519]) tensor([0.3849, 0.2039, 0.2351, 0.1761]) -Greedy action tensor([ 0.6634, -0.4044, 0.2923, -0.7264]) tensor([0.4380, 0.1506, 0.3022, 0.1091]) -Greedy action tensor([ 0.7935, -0.3991, -0.0927, -0.3510]) tensor([0.4916, 0.1492, 0.2027, 0.1565]) -Greedy action tensor([ 0.4477, -0.0867, 0.2233, -0.4330]) tensor([0.3572, 0.2093, 0.2854, 0.1481]) -Greedy action tensor([ 0.6062, -0.2823, 0.0359, -0.2217]) tensor([0.4143, 0.1704, 0.2342, 0.1810]) -Greedy action tensor([ 0.8259, -0.5489, -0.0221, -0.3234]) tensor([0.5005, 0.1266, 0.2143, 0.1586]) -Greedy action tensor([ 0.7907, -0.0520, 0.0694, -0.3410]) tensor([0.4466, 0.1923, 0.2171, 0.1440]) -Greedy action tensor([ 7.9105e-01, -2.6038e-01, 8.6367e-05, -2.8273e-01]) tensor([0.4663, 0.1629, 0.2114, 0.1593]) -Greedy action tensor([ 1.0148, -0.7329, 0.0469, -0.5368]) tensor([0.5663, 0.0986, 0.2151, 0.1200]) -Greedy action tensor([ 0.9172, -0.2478, -0.0061, -0.3464]) tensor([0.5020, 0.1566, 0.1994, 0.1419]) -Greedy action tensor([ 0.8014, -0.0469, 0.2133, -0.2704]) tensor([0.4299, 0.1841, 0.2388, 0.1472]) -Greedy action tensor([ 0.6466, -0.5994, -0.1263, -0.4659]) tensor([0.4812, 0.1384, 0.2222, 0.1582]) -Greedy action tensor([ 0.7219, -0.5672, -0.1100, -0.3551]) tensor([0.4875, 0.1343, 0.2122, 0.1660]) -Greedy action tensor([ 0.7216, -0.5531, -0.0839, -0.2988]) tensor([0.4792, 0.1339, 0.2141, 0.1727]) -Greedy action tensor([0.1626, 0.6014, 0.0029, 0.4164]) tensor([0.2131, 0.3305, 0.1817, 0.2747]) -Greedy action tensor([ 0.5471, -0.3429, -0.0293, -0.1577]) tensor([0.4054, 0.1665, 0.2278, 0.2003]) -Greedy action tensor([ 0.6835, -0.3854, -0.0714, -0.1591]) tensor([0.4456, 0.1530, 0.2095, 0.1919]) -Greedy action tensor([ 0.7019, -0.6478, 0.2443, -0.9187]) tensor([0.4785, 0.1241, 0.3028, 0.0946]) -Greedy action tensor([ 1.2305, -1.0960, -0.0612, -0.7606]) tensor([0.6627, 0.0647, 0.1821, 0.0905]) -Greedy action tensor([ 0.9653, -0.3231, -0.0185, -0.2486]) tensor([0.5137, 0.1416, 0.1921, 0.1526]) -Greedy action tensor([ 0.3126, 0.1165, -0.1673, -0.3628]) tensor([0.3390, 0.2787, 0.2098, 0.1725]) -Greedy action tensor([ 0.3663, 0.3180, -0.2469, 0.1979]) tensor([0.2995, 0.2853, 0.1622, 0.2530]) -Greedy action tensor([ 0.5575, -0.3669, 0.0151, -0.4385]) tensor([0.4260, 0.1690, 0.2477, 0.1573]) -Greedy action tensor([ 0.7500, -0.3958, -0.0419, -0.2661]) tensor([0.4688, 0.1491, 0.2124, 0.1697]) -Greedy action tensor([ 0.9725, -0.8340, 0.1384, -0.5350]) tensor([0.5495, 0.0902, 0.2386, 0.1217]) -Greedy action tensor([ 0.1311, -0.1148, -0.0782, -0.3415]) tensor([0.3109, 0.2431, 0.2522, 0.1938]) -Greedy action tensor([ 0.7601, -0.8037, -0.0599, -0.2239]) tensor([0.4942, 0.1034, 0.2177, 0.1847]) -Greedy action tensor([ 1.3570, -0.6746, -0.0451, -0.5808]) tensor([0.6574, 0.0862, 0.1618, 0.0947]) -Greedy action tensor([ 0.7576, -0.5462, 0.0296, -0.3568]) tensor([0.4802, 0.1304, 0.2319, 0.1576]) -Greedy action tensor([ 0.8405, -0.7224, 0.0235, -0.2131]) tensor([0.5000, 0.1048, 0.2209, 0.1743]) -Greedy action tensor([ 0.4180, -0.5437, -0.1309, -0.1795]) tensor([0.3984, 0.1523, 0.2301, 0.2192]) -Greedy action tensor([ 0.6667, -0.7632, -0.1936, -0.2218]) tensor([0.4823, 0.1154, 0.2040, 0.1983]) -Greedy action tensor([ 0.5349, -0.3872, -0.1489, -0.2622]) tensor([0.4250, 0.1690, 0.2145, 0.1915]) -Greedy action tensor([ 1.0829, -1.1557, 0.0401, -0.7483]) tensor([0.6175, 0.0658, 0.2177, 0.0989]) -Greedy action tensor([ 0.3219, 0.0083, -0.0557, -0.0602]) tensor([0.3227, 0.2358, 0.2212, 0.2202]) -Greedy action tensor([ 0.5687, -0.0119, -0.0475, -0.2360]) tensor([0.3926, 0.2197, 0.2120, 0.1756]) -Greedy action tensor([ 0.4617, -0.0416, -0.1110, 0.0866]) tensor([0.3502, 0.2117, 0.1975, 0.2406]) -Greedy action tensor([ 0.8127, -0.3094, -0.0094, -0.2442]) tensor([0.4733, 0.1541, 0.2080, 0.1645]) -Greedy action tensor([ 0.6170, -0.7729, -0.0357, -0.2659]) tensor([0.4580, 0.1141, 0.2385, 0.1894]) -Greedy action tensor([ 0.5565, -0.4579, 0.2380, -0.3851]) tensor([0.4032, 0.1462, 0.2933, 0.1573]) -Greedy action tensor([ 0.3157, 0.0448, -0.1746, -0.0467]) tensor([0.3256, 0.2484, 0.1994, 0.2266]) -Greedy action tensor([ 1.0072, -0.8594, -0.0747, -0.5563]) tensor([0.5872, 0.0908, 0.1990, 0.1230]) -Greedy action tensor([ 0.7879, -0.1161, -0.0271, -0.2802]) tensor([0.4564, 0.1848, 0.2020, 0.1568]) -Greedy action tensor([ 0.5227, 0.1542, 0.1182, -0.4567]) tensor([0.3657, 0.2530, 0.2440, 0.1373]) -Greedy action tensor([ 0.4927, -0.1783, 0.0693, -0.5420]) tensor([0.3966, 0.2027, 0.2597, 0.1409]) -Greedy action tensor([ 0.5093, -0.2217, -0.1397, -0.0254]) tensor([0.3861, 0.1859, 0.2018, 0.2262]) -Greedy action tensor([ 0.6214, -0.1930, -0.0977, -0.1165]) tensor([0.4153, 0.1839, 0.2023, 0.1985]) -Greedy action tensor([ 0.4998, 0.0673, -0.0957, -0.1762]) tensor([0.3692, 0.2395, 0.2035, 0.1878]) -Greedy action tensor([ 0.7636, -0.3824, -0.0630, -0.4398]) tensor([0.4865, 0.1547, 0.2128, 0.1460]) -Greedy action tensor([ 0.6589, -0.3446, -0.1334, -0.4194]) tensor([0.4630, 0.1698, 0.2097, 0.1575]) -Greedy action tensor([ 0.9442, -0.5668, 0.0923, -0.4659]) tensor([0.5287, 0.1167, 0.2256, 0.1291]) -Greedy action tensor([ 0.6265, -0.5460, -0.1065, -0.2658]) tensor([0.4546, 0.1407, 0.2184, 0.1862]) -Greedy action tensor([ 0.4707, 0.2582, -0.3576, -0.0894]) tensor([0.3551, 0.2871, 0.1551, 0.2028]) -Greedy action tensor([-1.5348, -0.5852, 0.4612, 0.0852]) tensor([0.0625, 0.1616, 0.4601, 0.3159]) -Greedy action tensor([-1.9210, -0.4506, 0.6549, -0.1732]) tensor([0.0413, 0.1795, 0.5423, 0.2369]) -Greedy action tensor([-1.6525, -0.5004, 0.8326, 0.3798]) tensor([0.0420, 0.1330, 0.5043, 0.3207]) -Greedy action tensor([-1.4161, 0.1780, 0.6455, -0.7195]) tensor([0.0633, 0.3119, 0.4977, 0.1271]) -Greedy action tensor([-1.5532, 0.2669, 0.3442, 0.0614]) tensor([0.0530, 0.3271, 0.3535, 0.2664]) -Greedy action tensor([-1.9002, -0.2976, 0.6100, -0.1557]) tensor([0.0417, 0.2069, 0.5129, 0.2385]) -Greedy action tensor([-1.0633, -0.6892, 0.4066, -0.2390]) tensor([0.1101, 0.1600, 0.4788, 0.2511]) -Greedy action tensor([-1.9321, -0.4529, 0.6585, -0.1745]) tensor([0.0408, 0.1790, 0.5438, 0.2364]) -Greedy action tensor([-1.3304, 0.1285, 0.2754, 0.3623]) tensor([0.0636, 0.2737, 0.3170, 0.3458]) -Greedy action tensor([-0.5295, -0.3300, 0.2076, -0.1373]) tensor([0.1727, 0.2108, 0.3609, 0.2556]) -Greedy action tensor([-1.1032, 0.7756, 0.1363, 0.1567]) tensor([0.0688, 0.4507, 0.2378, 0.2427]) -Greedy action tensor([-1.0023, 0.1063, 0.4526, 0.3815]) tensor([0.0813, 0.2463, 0.3482, 0.3243]) -Greedy action tensor([-1.8718, -0.3680, 0.6253, -0.1177]) tensor([0.0427, 0.1921, 0.5186, 0.2467]) -Greedy action tensor([-1.5521, -0.5340, 0.5751, 0.2420]) tensor([0.0550, 0.1523, 0.4617, 0.3309]) -Greedy action tensor([-1.7954, -0.4424, 0.8520, 0.3579]) tensor([0.0362, 0.1402, 0.5115, 0.3121]) -Greedy action tensor([-0.9804, 0.0638, 0.2822, -0.1288]) tensor([0.1029, 0.2923, 0.3637, 0.2411]) -Greedy action tensor([-1.9123, -0.3853, 0.6353, -0.1522]) tensor([0.0413, 0.1903, 0.5281, 0.2403]) -Greedy action tensor([-0.6537, -0.4636, 0.0909, 0.0922]) tensor([0.1557, 0.1883, 0.3278, 0.3282]) -Greedy action tensor([-1.8217, -0.3747, 0.6054, -0.0970]) tensor([0.0451, 0.1916, 0.5105, 0.2529]) -Greedy action tensor([-1.7640, -0.4920, 0.6159, -0.0114]) tensor([0.0473, 0.1688, 0.5110, 0.2729]) -Greedy action tensor([-1.8476, -0.4525, 0.7536, 0.1277]) tensor([0.0389, 0.1569, 0.5240, 0.2802]) -Greedy action tensor([-1.6046, -0.5080, 0.5135, 0.0956]) tensor([0.0562, 0.1683, 0.4676, 0.3079]) -Greedy action tensor([-1.9080, -0.4623, 0.6600, -0.1496]) tensor([0.0415, 0.1762, 0.5414, 0.2409]) -Greedy action tensor([ 0.0439, -0.5302, 0.3280, 0.8851]) tensor([0.1919, 0.1081, 0.2550, 0.4451]) -Greedy action tensor([-1.8184, -0.4256, 0.6828, -0.0343]) tensor([0.0431, 0.1737, 0.5262, 0.2569]) -Greedy action tensor([-1.9149, -0.4317, 0.6492, -0.1651]) tensor([0.0414, 0.1825, 0.5379, 0.2382]) -Greedy action tensor([-1.9084, -0.3301, 0.6304, -0.1609]) tensor([0.0412, 0.1999, 0.5222, 0.2367]) -Greedy action tensor([-0.9370, -0.5974, 0.1903, 0.3319]) tensor([0.1105, 0.1552, 0.3412, 0.3931]) -Greedy action tensor([-1.5650, -0.4748, 0.7620, 0.5661]) tensor([0.0442, 0.1314, 0.4525, 0.3720]) -Greedy action tensor([-1.8976, -0.4245, 0.6409, -0.1503]) tensor([0.0421, 0.1836, 0.5328, 0.2415]) -Greedy action tensor([-1.7797, -0.4399, 0.6976, 0.0443]) tensor([0.0436, 0.1666, 0.5195, 0.2703]) -Greedy action tensor([-1.6671, -0.4921, 0.5264, 0.0234]) tensor([0.0537, 0.1738, 0.4814, 0.2911]) -Greedy action tensor([-1.3489, -0.2751, 0.4503, -0.1471]) tensor([0.0752, 0.2201, 0.4546, 0.2501]) -Greedy action tensor([-1.7724, -0.4507, 0.7164, 0.0620]) tensor([0.0434, 0.1626, 0.5225, 0.2715]) -Greedy action tensor([-1.9019, -0.3536, 0.6399, -0.1354]) tensor([0.0412, 0.1939, 0.5237, 0.2412]) -Greedy action tensor([-1.7918, -0.4677, 0.6164, -0.0470]) tensor([0.0463, 0.1740, 0.5146, 0.2651]) -Greedy action tensor([-1.1077, 0.3865, 0.6082, 0.6293]) tensor([0.0599, 0.2668, 0.3331, 0.3402]) -Greedy action tensor([-1.4479, -0.4588, 0.4172, 0.0299]) tensor([0.0688, 0.1851, 0.4444, 0.3017]) -Greedy action tensor([-1.7938, -0.3643, 0.5774, -0.1150]) tensor([0.0471, 0.1966, 0.5041, 0.2523]) -Greedy action tensor([-1.9442, -0.4496, 0.6675, -0.1805]) tensor([0.0401, 0.1789, 0.5468, 0.2342]) -Greedy action tensor([-1.9179, -0.4515, 0.6746, -0.1591]) tensor([0.0408, 0.1769, 0.5454, 0.2369]) -Greedy action tensor([-1.9246, -0.4395, 0.6541, -0.1716]) tensor([0.0410, 0.1812, 0.5409, 0.2369]) -Greedy action tensor([-1.8673, -0.4229, 0.6139, -0.1424]) tensor([0.0438, 0.1859, 0.5242, 0.2461]) -Greedy action tensor([-1.3652, 0.5824, 0.2748, -0.0015]) tensor([0.0586, 0.4106, 0.3019, 0.2290]) -Greedy action tensor([-0.7268, -0.5777, 0.1555, 0.3206]) tensor([0.1346, 0.1563, 0.3253, 0.3837]) -Greedy action tensor([-1.1976, -0.5889, 0.3803, 0.6505]) tensor([0.0713, 0.1310, 0.3453, 0.4524]) -Greedy action tensor([-1.1119, -0.5700, 0.2498, 0.5590]) tensor([0.0838, 0.1440, 0.3269, 0.4454]) -Greedy action tensor([-1.7791, -0.2922, 0.6189, -0.0619]) tensor([0.0455, 0.2011, 0.5002, 0.2532]) -Greedy action tensor([-0.9513, -0.2147, 0.7004, 1.3166]) tensor([0.0557, 0.1163, 0.2903, 0.5377]) -Greedy action tensor([0.2710, 0.3776, 0.7124, 1.6585]) tensor([0.1303, 0.1450, 0.2027, 0.5220]) -Greedy action tensor([-1.2703, 0.7314, 0.1841, 0.2210]) tensor([0.0584, 0.4322, 0.2500, 0.2594]) -Greedy action tensor([-1.9234, -0.4544, 0.6535, -0.1727]) tensor([0.0412, 0.1791, 0.5423, 0.2374]) -Greedy action tensor([-1.5085, -0.3023, 0.5440, 0.1115]) tensor([0.0582, 0.1944, 0.4532, 0.2941]) -Greedy action tensor([-0.9270, 0.8984, 0.0704, 0.3174]) tensor([0.0747, 0.4635, 0.2025, 0.2593]) -Greedy action tensor([-1.8519, -0.4670, 0.7226, -0.0201]) tensor([0.0410, 0.1639, 0.5387, 0.2563]) -Greedy action tensor([-1.7532, 0.0246, 0.4939, -0.0422]) tensor([0.0456, 0.2700, 0.4317, 0.2526]) -Greedy action tensor([-1.8937, -0.4605, 0.6432, -0.1655]) tensor([0.0426, 0.1787, 0.5387, 0.2400]) -Greedy action tensor([-1.9121, -0.3794, 0.6452, -0.1536]) tensor([0.0411, 0.1903, 0.5301, 0.2385]) -Greedy action tensor([-1.2800, -0.5834, 0.4657, 0.5527]) tensor([0.0667, 0.1339, 0.3823, 0.4171]) -Greedy action tensor([-1.6804, -0.5569, 0.6997, 0.2141]) tensor([0.0464, 0.1428, 0.5019, 0.3088]) -Greedy action tensor([-0.9500, 0.5584, 0.4221, 0.4244]) tensor([0.0745, 0.3369, 0.2939, 0.2946]) -Greedy action tensor([-1.7251, -0.4210, 0.8534, 0.4692]) tensor([0.0373, 0.1373, 0.4910, 0.3344]) -Greedy action tensor([-1.9188, -0.4241, 0.6507, -0.1628]) tensor([0.0411, 0.1834, 0.5373, 0.2382]) -Greedy action tensor([-1.9235, -0.4040, 0.6510, -0.1647]) tensor([0.0408, 0.1865, 0.5357, 0.2370]) -Greedy action tensor([-0.8776, -0.3434, 0.2650, -0.2680]) tensor([0.1302, 0.2221, 0.4082, 0.2395]) -Greedy action tensor([-1.9080, -0.4603, 0.6506, -0.1558]) tensor([0.0418, 0.1777, 0.5396, 0.2409]) -Greedy action tensor([-1.8590, -0.4352, 0.6596, -0.0973]) tensor([0.0428, 0.1776, 0.5307, 0.2490]) -Greedy action tensor([-1.1069, -0.2491, 0.2975, 0.0266]) tensor([0.0949, 0.2238, 0.3865, 0.2948]) -Greedy action tensor([-1.8833, -0.4663, 0.6787, -0.1071]) tensor([0.0417, 0.1719, 0.5402, 0.2462]) -Greedy action tensor([-1.2827, -0.5427, 0.5258, 0.6876]) tensor([0.0611, 0.1280, 0.3727, 0.4382]) -Greedy action tensor([-1.3630, 0.5857, 0.2156, 0.2145]) tensor([0.0565, 0.3963, 0.2738, 0.2734]) -Greedy action tensor([-1.9228, -0.4227, 0.6566, -0.1573]) tensor([0.0408, 0.1828, 0.5380, 0.2384]) -Greedy action tensor([-0.1547, 0.3437, 0.3416, 1.2859]) tensor([0.1175, 0.1934, 0.1930, 0.4961]) -Greedy action tensor([-1.6947, -0.0358, 0.6188, -0.4423]) tensor([0.0503, 0.2645, 0.5090, 0.1762]) -Greedy action tensor([-1.7198, -0.5532, 0.5679, -0.0879]) tensor([0.0521, 0.1674, 0.5138, 0.2667]) -Greedy action tensor([-1.8952, -0.4272, 0.6490, -0.1438]) tensor([0.0420, 0.1821, 0.5342, 0.2418]) -Greedy action tensor([-1.8409, -0.1572, 0.5992, -0.1876]) tensor([0.0433, 0.2333, 0.4971, 0.2263]) -Greedy action tensor([-0.2817, -0.2944, 0.1912, 0.2007]) tensor([0.1919, 0.1894, 0.3079, 0.3108]) -Greedy action tensor([-1.8462, -0.2538, 0.5991, -0.1201]) tensor([0.0434, 0.2131, 0.5000, 0.2436]) -Greedy action tensor([-1.4864, -0.1676, 0.6735, 0.2578]) tensor([0.0523, 0.1954, 0.4532, 0.2991]) -Greedy action tensor([-1.5934, -0.4869, 0.5044, -0.0057]) tensor([0.0586, 0.1772, 0.4775, 0.2867]) -Greedy action tensor([ 0.7970, -0.4932, -0.4510, 0.2084]) tensor([0.4723, 0.1300, 0.1356, 0.2622]) -Greedy action tensor([ 1.7378, -0.4188, -0.4502, 0.4359]) tensor([0.6667, 0.0771, 0.0748, 0.1814]) -Greedy action tensor([ 1.1634, -0.7288, -0.2139, 0.3415]) tensor([0.5427, 0.0818, 0.1369, 0.2386]) -Greedy action tensor([ 1.6123, -0.8100, -0.5022, 0.5828]) tensor([0.6383, 0.0566, 0.0770, 0.2280]) -Greedy action tensor([ 1.1135, -0.4047, -0.1114, 0.2132]) tensor([0.5210, 0.1142, 0.1531, 0.2118]) -Greedy action tensor([ 2.4714, -0.6953, -0.2423, 0.7794]) tensor([0.7736, 0.0326, 0.0513, 0.1425]) -Greedy action tensor([ 1.5508, -0.5215, -0.4375, 0.5011]) tensor([0.6200, 0.0781, 0.0849, 0.2170]) -Greedy action tensor([ 0.6095, -0.1346, -0.0316, -0.0315]) tensor([0.3955, 0.1879, 0.2083, 0.2083]) -Greedy action tensor([ 1.8443, -0.7627, -0.2166, 0.6295]) tensor([0.6676, 0.0492, 0.0850, 0.1981]) -Greedy action tensor([ 0.9709, -0.5092, -0.2071, 0.4505]) tensor([0.4695, 0.1069, 0.1446, 0.2790]) -Greedy action tensor([ 0.5280, -0.2640, 0.0132, 0.2124]) tensor([0.3597, 0.1629, 0.2150, 0.2624]) -Greedy action tensor([ 1.1449, -0.3055, -0.5500, 0.0175]) tensor([0.5741, 0.1346, 0.1054, 0.1859]) -Greedy action tensor([ 1.9931, -0.6216, -0.2840, 0.4226]) tensor([0.7227, 0.0529, 0.0741, 0.1503]) -Greedy action tensor([ 0.7236, -0.2646, -0.3571, 0.6028]) tensor([0.3849, 0.1433, 0.1306, 0.3411]) -Greedy action tensor([ 1.2538, -0.5105, -0.3185, -0.0381]) tensor([0.6047, 0.1036, 0.1255, 0.1662]) -Greedy action tensor([ 1.6286, -1.3358, 0.1090, 0.0859]) tensor([0.6738, 0.0348, 0.1474, 0.1440]) -Greedy action tensor([ 2.1232, -1.1608, -0.1551, 0.0382]) tensor([0.7910, 0.0296, 0.0810, 0.0983]) -Greedy action tensor([ 1.0728, -0.4973, -0.4892, 0.7225]) tensor([0.4712, 0.0980, 0.0988, 0.3319]) -Greedy action tensor([ 1.1499, -0.9066, -0.1782, 0.3544]) tensor([0.5422, 0.0693, 0.1437, 0.2447]) -Greedy action tensor([ 1.5746, -0.7447, -0.2562, 0.6076]) tensor([0.6102, 0.0600, 0.0978, 0.2320]) -Greedy action tensor([ 1.1706, 0.0414, -0.7961, 0.5963]) tensor([0.4935, 0.1595, 0.0690, 0.2779]) -Greedy action tensor([ 1.7956, -0.5081, -0.3546, 0.5027]) tensor([0.6708, 0.0670, 0.0781, 0.1841]) -Greedy action tensor([ 0.8265, -0.2704, -0.1363, 0.0807]) tensor([0.4566, 0.1525, 0.1743, 0.2166]) -Greedy action tensor([ 0.2730, -0.0645, 0.1853, 0.0717]) tensor([0.2901, 0.2070, 0.2657, 0.2372]) -Greedy action tensor([ 1.3078, -0.3374, -0.2880, -0.0464]) tensor([0.6046, 0.1167, 0.1226, 0.1561]) -Greedy action tensor([ 1.1724, -0.8705, -0.0789, 0.3345]) tensor([0.5410, 0.0701, 0.1548, 0.2340]) -Greedy action tensor([ 2.1487, -0.4868, 0.3314, 1.0730]) tensor([0.6348, 0.0455, 0.1031, 0.2165]) -Greedy action tensor([ 1.5961, -1.2311, -0.3556, 0.7062]) tensor([0.6204, 0.0367, 0.0881, 0.2548]) -Greedy action tensor([ 1.8469, -0.8676, -0.4731, 0.9905]) tensor([0.6293, 0.0417, 0.0618, 0.2672]) -Greedy action tensor([ 0.7544, -0.3017, -0.0177, -0.0208]) tensor([0.4404, 0.1532, 0.2035, 0.2029]) -Greedy action tensor([ 1.0791, -0.3789, -0.2099, 0.6646]) tensor([0.4611, 0.1073, 0.1270, 0.3046]) -Greedy action tensor([ 1.3387, 0.1390, -0.7636, 0.1865]) tensor([0.5749, 0.1732, 0.0702, 0.1816]) -Greedy action tensor([ 1.4961, -0.1335, -0.2068, 0.2325]) tensor([0.6021, 0.1180, 0.1097, 0.1702]) -Greedy action tensor([ 0.7409, 0.0328, -0.2090, -0.3654]) tensor([0.4525, 0.2229, 0.1750, 0.1497]) -Greedy action tensor([ 1.3937, -0.6001, -0.4343, 0.2819]) tensor([0.6151, 0.0838, 0.0989, 0.2023]) -Greedy action tensor([ 1.1592, -0.6725, 0.0068, 0.2831]) tensor([0.5284, 0.0846, 0.1669, 0.2200]) -Greedy action tensor([ 1.4313, 0.0279, -0.5060, 0.2492]) tensor([0.5895, 0.1449, 0.0849, 0.1807]) -Greedy action tensor([ 1.1344, -0.2357, -0.5411, 0.0616]) tensor([0.5607, 0.1425, 0.1050, 0.1918]) -Greedy action tensor([ 1.1234, -0.5603, -0.6362, 1.0012]) tensor([0.4459, 0.0828, 0.0767, 0.3946]) -Greedy action tensor([ 1.7758, -0.6318, -0.2382, 0.3555]) tensor([0.6825, 0.0614, 0.0911, 0.1649]) -Greedy action tensor([ 1.4466, -0.6007, -0.3639, 0.8840]) tensor([0.5370, 0.0693, 0.0878, 0.3059]) -Greedy action tensor([ 0.8269, -0.3871, 0.0311, 0.2907]) tensor([0.4286, 0.1273, 0.1934, 0.2507]) -Greedy action tensor([ 0.9095, -0.1804, -0.1747, 0.0595]) tensor([0.4758, 0.1600, 0.1609, 0.2034]) -Greedy action tensor([ 1.4137, -0.3194, -0.3400, 0.0890]) tensor([0.6189, 0.1094, 0.1071, 0.1646]) -Greedy action tensor([ 0.5861, 0.0264, 0.0092, -0.2199]) tensor([0.3876, 0.2215, 0.2177, 0.1731]) -Greedy action tensor([ 1.0422, -0.2042, -0.0394, 0.0661]) tensor([0.4992, 0.1435, 0.1692, 0.1881]) -Greedy action tensor([ 1.5474, -0.4958, -0.2870, 0.3583]) tensor([0.6274, 0.0813, 0.1002, 0.1911]) -Greedy action tensor([ 1.6434, -0.3828, -0.5847, 0.1639]) tensor([0.6815, 0.0898, 0.0734, 0.1552]) -Greedy action tensor([ 2.2701, -0.7203, -0.6332, 0.4935]) tensor([0.7847, 0.0394, 0.0430, 0.1328]) -Greedy action tensor([ 2.0034, -0.4350, -0.3292, 0.4372]) tensor([0.7178, 0.0627, 0.0697, 0.1499]) -Greedy action tensor([ 0.5565, -0.3659, 0.0290, -0.1532]) tensor([0.4033, 0.1603, 0.2380, 0.1984]) -Greedy action tensor([ 1.5195, -0.2650, -0.1345, -0.1105]) tensor([0.6431, 0.1080, 0.1230, 0.1260]) -Greedy action tensor([ 0.3878, -0.2992, 0.2479, -0.1499]) tensor([0.3382, 0.1701, 0.2941, 0.1976]) -Greedy action tensor([ 1.2750, -0.1770, -0.4343, 0.5894]) tensor([0.5211, 0.1220, 0.0943, 0.2625]) -Greedy action tensor([ 1.2451, -0.5898, -0.6231, 0.7857]) tensor([0.5140, 0.0820, 0.0794, 0.3246]) -Greedy action tensor([ 2.1050, -0.7990, -0.1016, 0.3911]) tensor([0.7435, 0.0407, 0.0818, 0.1339]) -Greedy action tensor([ 0.9348, -0.2442, -0.1239, 0.4491]) tensor([0.4406, 0.1355, 0.1528, 0.2711]) -Greedy action tensor([ 2.2786, -1.1645, -0.2891, 0.6112]) tensor([0.7708, 0.0246, 0.0591, 0.1455]) -Greedy action tensor([ 1.2655, -0.5957, -0.1705, 0.5819]) tensor([0.5268, 0.0819, 0.1253, 0.2659]) -Greedy action tensor([ 1.3503, -0.4376, -0.3902, 0.3847]) tensor([0.5802, 0.0971, 0.1018, 0.2209]) -Greedy action tensor([ 0.6666, -0.1717, 0.0458, -0.1663]) tensor([0.4158, 0.1798, 0.2235, 0.1808]) -Greedy action tensor([ 1.1532, -0.5581, -0.4477, 0.3763]) tensor([0.5428, 0.0981, 0.1095, 0.2496]) -Greedy action tensor([ 0.6412, -0.2659, -0.1065, 0.0087]) tensor([0.4152, 0.1676, 0.1966, 0.2206]) -Greedy action tensor([ 0.5634, -0.0943, 0.2218, 0.0435]) tensor([0.3542, 0.1835, 0.2517, 0.2106]) -Greedy action tensor([ 1.0302, -0.0943, -0.2006, 0.1771]) tensor([0.4895, 0.1590, 0.1430, 0.2086]) -Greedy action tensor([ 1.1363, -0.4839, -0.2541, 0.5302]) tensor([0.5019, 0.0993, 0.1250, 0.2738]) -Greedy action tensor([ 1.0385, -0.6403, -0.1155, 0.2094]) tensor([0.5159, 0.0963, 0.1627, 0.2252]) -Greedy action tensor([ 1.6245, -0.5876, -0.5901, 1.0730]) tensor([0.5572, 0.0610, 0.0608, 0.3210]) -Greedy action tensor([ 1.7326, -0.8819, -0.1832, 0.6348]) tensor([0.6435, 0.0471, 0.0947, 0.2147]) -Greedy action tensor([ 1.4416, -0.4865, -0.1122, 0.2425]) tensor([0.6030, 0.0877, 0.1275, 0.1818]) -Greedy action tensor([ 0.9476, -0.3655, -0.0162, 0.2086]) tensor([0.4699, 0.1264, 0.1793, 0.2244]) -Greedy action tensor([ 1.5179, -0.7756, -0.6567, 0.6705]) tensor([0.6086, 0.0614, 0.0692, 0.2608]) -Greedy action tensor([ 1.5482, -0.0544, -0.4629, 0.3666]) tensor([0.6090, 0.1226, 0.0815, 0.1868]) -Greedy action tensor([ 1.3213, -0.2823, -0.5520, -0.0984]) tensor([0.6263, 0.1260, 0.0962, 0.1514]) -Greedy action tensor([ 1.2333, -0.4881, -0.3172, 0.3028]) tensor([0.5601, 0.1002, 0.1188, 0.2209]) -Greedy action tensor([ 0.7615, -0.1054, -0.2270, 0.2758]) tensor([0.4153, 0.1745, 0.1546, 0.2555]) -Greedy action tensor([ 1.4508, -0.2053, -0.3578, -0.0759]) tensor([0.6361, 0.1214, 0.1042, 0.1382]) -Greedy action tensor([ 1.9590, -0.5843, -0.3134, 0.2373]) tensor([0.7351, 0.0578, 0.0758, 0.1314]) -Greedy action tensor([ 1.0437, -0.5254, -0.5150, -0.0187]) tensor([0.5668, 0.1180, 0.1193, 0.1959]) -Greedy action tensor([ 2.0282, -1.0741, -0.2348, 0.7171]) tensor([0.7050, 0.0317, 0.0733, 0.1900]) -Greedy action tensor([ 1.4461, -0.3992, -0.8270, 0.3430]) tensor([0.6278, 0.0992, 0.0647, 0.2083]) -Greedy action tensor([ 0.0271, -0.3657, -0.3054, 0.9026]) tensor([0.2087, 0.1409, 0.1496, 0.5008]) -Greedy action tensor([ 0.4442, 0.7341, -0.1437, 0.2050]) tensor([0.2718, 0.3632, 0.1510, 0.2140]) -Greedy action tensor([ 1.1492, -0.8657, -0.8204, 0.7879]) tensor([0.5077, 0.0677, 0.0708, 0.3538]) -Greedy action tensor([ 0.1841, -0.8274, 0.9464, 0.9654]) tensor([0.1757, 0.0639, 0.3766, 0.3838]) -Greedy action tensor([ 0.2282, -0.8195, -0.9250, 0.3559]) tensor([0.3568, 0.1252, 0.1126, 0.4054]) -Greedy action tensor([-0.9360, -2.0619, 2.3933, -0.5321]) tensor([0.0325, 0.0106, 0.9082, 0.0487]) -Greedy action tensor([ 0.6208, -0.7377, -0.5578, -0.2581]) tensor([0.5050, 0.1298, 0.1554, 0.2097]) -Greedy action tensor([ 0.2323, 0.4129, 1.6390, -0.1908]) tensor([0.1442, 0.1727, 0.5886, 0.0944]) -Greedy action tensor([ 0.4056, -1.1613, -0.2305, 1.0450]) tensor([0.2752, 0.0574, 0.1457, 0.5216]) -Greedy action tensor([ 1.1141, -1.4978, -0.0879, 1.4414]) tensor([0.3622, 0.0266, 0.1089, 0.5024]) -Greedy action tensor([-0.6488, -0.6921, -0.3514, -1.2335]) tensor([0.2590, 0.2480, 0.3487, 0.1443]) -Greedy action tensor([ 0.0098, -1.2221, 0.1683, 0.5366]) tensor([0.2406, 0.0702, 0.2819, 0.4074]) -Greedy action tensor([ 0.1219, -0.8364, 2.5132, -0.1206]) tensor([0.0764, 0.0293, 0.8344, 0.0599]) -Greedy action tensor([ 0.9186, -0.2824, 1.0657, 1.3225]) tensor([0.2527, 0.0760, 0.2928, 0.3785]) -Greedy action tensor([ 0.8277, 0.1515, 0.9643, -0.0972]) tensor([0.3277, 0.1667, 0.3757, 0.1300]) -Greedy action tensor([ 0.9918, -1.7252, 0.4121, 0.8738]) tensor([0.3976, 0.0263, 0.2227, 0.3534]) -Greedy action tensor([ 1.0205, -0.9420, -0.2986, 1.0986]) tensor([0.4017, 0.0564, 0.1074, 0.4344]) -Greedy action tensor([ 0.1558, -1.2723, 0.5262, 0.2289]) tensor([0.2657, 0.0637, 0.3848, 0.2858]) -Greedy action tensor([-0.2428, -1.7563, 1.7337, -0.5653]) tensor([0.1091, 0.0240, 0.7878, 0.0791]) -Greedy action tensor([ 1.5198, -0.9634, 0.2297, 0.5415]) tensor([0.5765, 0.0481, 0.1587, 0.2167]) -Greedy action tensor([-0.1733, -0.5249, 0.4053, -0.5761]) tensor([0.2406, 0.1693, 0.4292, 0.1609]) -Greedy action tensor([ 1.2660, -1.2252, 1.0970, 1.3105]) tensor([0.3364, 0.0279, 0.2841, 0.3517]) -Greedy action tensor([ 0.2483, -0.9988, -0.0205, -0.2151]) tensor([0.3730, 0.1072, 0.2851, 0.2347]) -Greedy action tensor([ 0.1637, -0.1038, -0.6296, -0.0690]) tensor([0.3322, 0.2542, 0.1503, 0.2632]) -Greedy action tensor([ 0.8600, -0.6072, 1.4387, 0.7233]) tensor([0.2573, 0.0593, 0.4590, 0.2244]) -Greedy action tensor([-1.0554, -0.2565, -0.6631, -0.0777]) tensor([0.1358, 0.3020, 0.2011, 0.3611]) -Greedy action tensor([ 0.1044, 0.1133, 1.0014, -0.9252]) tensor([0.2075, 0.2094, 0.5090, 0.0741]) -Greedy action tensor([ 0.6453, -2.0179, 1.0229, 0.0662]) tensor([0.3237, 0.0226, 0.4723, 0.1814]) -Greedy action tensor([-1.3260, 0.9310, -0.1107, -0.4885]) tensor([0.0616, 0.5885, 0.2076, 0.1423]) -Greedy action tensor([ 0.0611, -0.1152, 0.4495, -0.5102]) tensor([0.2579, 0.2162, 0.3803, 0.1456]) -Greedy action tensor([ 1.3254, 0.3875, 0.5353, -0.1130]) tensor([0.4802, 0.1880, 0.2179, 0.1140]) -Greedy action tensor([ 1.2537, -0.8009, 0.2870, -0.2889]) tensor([0.5806, 0.0744, 0.2208, 0.1242]) -Greedy action tensor([ 0.6960, -1.0061, -0.3271, 0.9022]) tensor([0.3609, 0.0658, 0.1297, 0.4436]) -Greedy action tensor([ 1.3071, -1.4073, -0.4534, 1.1793]) tensor([0.4721, 0.0313, 0.0812, 0.4155]) -Greedy action tensor([-1.0890, -0.5612, 0.3829, -1.0467]) tensor([0.1235, 0.2094, 0.5382, 0.1289]) -Greedy action tensor([ 1.0072, -0.9898, 0.6899, -0.4347]) tensor([0.4761, 0.0646, 0.3467, 0.1126]) -Greedy action tensor([ 0.4679, -1.1792, 0.3167, 0.0546]) tensor([0.3685, 0.0710, 0.3168, 0.2437]) -Greedy action tensor([-1.5350, -2.7787, -0.1029, -0.1023]) tensor([0.1035, 0.0298, 0.4332, 0.4335]) -Greedy action tensor([ 0.3602, -0.7668, -0.0765, -0.1291]) tensor([0.3871, 0.1254, 0.2501, 0.2373]) -Greedy action tensor([ 0.4511, -0.6826, -0.2971, 0.9957]) tensor([0.2842, 0.0915, 0.1345, 0.4899]) -Greedy action tensor([ 0.4569, -1.7560, -0.5067, 0.7836]) tensor([0.3475, 0.0380, 0.1326, 0.4818]) -Greedy action tensor([-0.7241, -0.7934, -0.1718, 0.1895]) tensor([0.1622, 0.1514, 0.2819, 0.4045]) -Greedy action tensor([ 0.0398, 0.1261, 0.5839, -0.1491]) tensor([0.2155, 0.2349, 0.3713, 0.1784]) -Greedy action tensor([ 0.7077, 0.2816, -0.3401, -0.4724]) tensor([0.4327, 0.2826, 0.1517, 0.1329]) -Greedy action tensor([ 0.4492, -1.8872, 0.2511, 0.7792]) tensor([0.3023, 0.0292, 0.2480, 0.4205]) -Greedy action tensor([-0.1187, 0.4429, -0.8670, -0.0130]) tensor([0.2305, 0.4042, 0.1091, 0.2562]) -Greedy action tensor([0.5688, 0.2415, 0.3461, 0.2731]) tensor([0.3063, 0.2208, 0.2451, 0.2279]) -Greedy action tensor([ 0.5667, -1.8102, 0.3695, 1.0807]) tensor([0.2789, 0.0259, 0.2290, 0.4663]) -Greedy action tensor([-0.1522, 0.0914, -0.5569, 0.7022]) tensor([0.1889, 0.2411, 0.1260, 0.4440]) -Greedy action tensor([-1.0841, -1.0674, -0.8025, 0.4117]) tensor([0.1281, 0.1303, 0.1698, 0.5718]) -Greedy action tensor([ 0.1483, -0.1177, 1.6118, -0.9312]) tensor([0.1556, 0.1192, 0.6723, 0.0529]) -Greedy action tensor([ 0.4295, -0.3969, -0.2792, 0.0779]) tensor([0.3797, 0.1662, 0.1869, 0.2672]) -Greedy action tensor([ 0.2193, -0.1451, 0.2969, 0.3206]) tensor([0.2576, 0.1789, 0.2784, 0.2851]) -Greedy action tensor([-0.0083, -1.8834, 0.1872, 0.4032]) tensor([0.2578, 0.0395, 0.3135, 0.3891]) -Greedy action tensor([ 1.0245, -0.1915, 0.0598, 1.2375]) tensor([0.3431, 0.1017, 0.1307, 0.4245]) -Greedy action tensor([ 0.4148, -0.5145, 0.6403, 0.9626]) tensor([0.2285, 0.0902, 0.2862, 0.3951]) -Greedy action tensor([0.5432, 0.0601, 0.3967, 1.0655]) tensor([0.2400, 0.1481, 0.2073, 0.4046]) -Greedy action tensor([-0.3599, -1.0337, 0.7762, 0.4698]) tensor([0.1446, 0.0737, 0.4503, 0.3314]) -Greedy action tensor([-0.0727, 0.8390, 0.1050, -0.8440]) tensor([0.1943, 0.4837, 0.2321, 0.0899]) -Greedy action tensor([-1.0864, -1.6211, -0.4348, -0.4160]) tensor([0.1832, 0.1073, 0.3514, 0.3581]) -Greedy action tensor([0.9417, 0.3756, 0.2686, 1.4276]) tensor([0.2700, 0.1533, 0.1377, 0.4389]) -Greedy action tensor([-1.7624, 0.0559, 0.4411, -0.7697]) tensor([0.0529, 0.3257, 0.4788, 0.1427]) -Greedy action tensor([ 1.3830, -0.7360, 0.5035, 0.3802]) tensor([0.5258, 0.0632, 0.2182, 0.1929]) -Greedy action tensor([-0.3418, -1.2209, -0.6046, 0.3085]) tensor([0.2439, 0.1013, 0.1875, 0.4673]) -Greedy action tensor([-0.5280, 0.0151, -0.7854, 0.1793]) tensor([0.1811, 0.3117, 0.1400, 0.3673]) -Greedy action tensor([-1.2002, -0.4758, 1.4232, -0.4400]) tensor([0.0527, 0.1087, 0.7260, 0.1126]) -Greedy action tensor([ 0.1023, 0.3668, -0.4901, 1.1922]) tensor([0.1715, 0.2235, 0.0949, 0.5101]) -Greedy action tensor([ 1.2711, 0.1837, -0.5958, 0.7916]) tensor([0.4737, 0.1597, 0.0732, 0.2933]) -Greedy action tensor([-0.3388, -0.5531, 0.0471, -0.5408]) tensor([0.2442, 0.1971, 0.3592, 0.1995]) -Greedy action tensor([-1.3704, -0.8953, -0.1539, -0.5570]) tensor([0.1214, 0.1952, 0.4097, 0.2738]) -Greedy action tensor([-0.7687, -0.5848, -0.8536, -0.1882]) tensor([0.2038, 0.2449, 0.1872, 0.3641]) -Greedy action tensor([ 0.3694, -0.3381, 0.9300, 1.1593]) tensor([0.1836, 0.0905, 0.3215, 0.4044]) -Greedy action tensor([ 0.5622, -1.3689, 0.6268, 0.7622]) tensor([0.2913, 0.0422, 0.3107, 0.3558]) -Greedy action tensor([ 1.6513, -1.2607, 0.4234, 0.2813]) tensor([0.6245, 0.0340, 0.1829, 0.1587]) -Greedy action tensor([-0.1629, 0.2906, -0.8348, 0.6883]) tensor([0.1843, 0.2900, 0.0941, 0.4316]) -Greedy action tensor([ 0.5655, 0.7710, -0.0063, -0.9106]) tensor([0.3310, 0.4065, 0.1869, 0.0756]) -Greedy action tensor([-0.0329, 0.5612, -0.9274, 1.3843]) tensor([0.1361, 0.2466, 0.0557, 0.5616]) -Greedy action tensor([ 0.2948, -1.0626, -0.2324, -0.0599]) tensor([0.3923, 0.1009, 0.2316, 0.2752]) -Greedy action tensor([ 0.6246, -0.7079, 2.1733, -0.3684]) tensor([0.1577, 0.0416, 0.7422, 0.0584]) -Greedy action tensor([ 1.5871, -0.1087, 1.4876, 1.2830]) tensor([0.3538, 0.0649, 0.3203, 0.2610]) -Greedy action tensor([ 0.2206, -0.0321, 0.0922, -1.3705]) tensor([0.3497, 0.2716, 0.3075, 0.0712]) -Greedy action tensor([-0.5492, 0.1774, 0.0110, -0.3973]) tensor([0.1671, 0.3456, 0.2927, 0.1946]) -Greedy action tensor([ 0.9332, -0.3779, 0.0018, -0.4549]) tensor([0.5227, 0.1409, 0.2060, 0.1304]) -Greedy action tensor([ 0.3810, -0.0093, -0.0825, -0.3151]) tensor([0.3566, 0.2413, 0.2243, 0.1778]) -Greedy action tensor([ 0.7973, -0.5439, -0.2365, -0.4109]) tensor([0.5219, 0.1365, 0.1856, 0.1559]) -Greedy action tensor([ 0.7048, -0.7257, -0.1255, -0.2818]) tensor([0.4883, 0.1168, 0.2129, 0.1821]) -Greedy action tensor([ 0.5587, -0.3174, -0.1390, -0.2536]) tensor([0.4241, 0.1766, 0.2111, 0.1882]) -Greedy action tensor([ 0.9845, -1.2057, 0.0617, -0.6367]) tensor([0.5858, 0.0656, 0.2328, 0.1158]) -Greedy action tensor([ 0.3100, 0.0696, -0.0301, -0.0472]) tensor([0.3127, 0.2459, 0.2226, 0.2188]) -Greedy action tensor([ 0.1993, 0.0138, -0.0710, -0.3378]) tensor([0.3146, 0.2614, 0.2401, 0.1839]) -Greedy action tensor([ 0.5977, -0.1849, -0.0621, -0.1527]) tensor([0.4088, 0.1869, 0.2113, 0.1930]) -Greedy action tensor([ 1.0600, -0.7859, -0.1048, -0.7539]) tensor([0.6124, 0.0967, 0.1911, 0.0998]) -Greedy action tensor([ 1.0437, -0.4133, -0.0548, -0.3961]) tensor([0.5546, 0.1292, 0.1849, 0.1314]) -Greedy action tensor([ 0.4791, -0.2357, -0.0078, -0.2387]) tensor([0.3859, 0.1888, 0.2371, 0.1882]) -Greedy action tensor([ 1.0094, -0.9188, 0.0807, -0.5371]) tensor([0.5703, 0.0829, 0.2253, 0.1215]) -Greedy action tensor([ 0.8682, -0.2467, 0.0230, -0.3633]) tensor([0.4880, 0.1600, 0.2096, 0.1424]) -Greedy action tensor([ 0.8231, -0.1823, 0.0883, -1.1032]) tensor([0.5022, 0.1838, 0.2409, 0.0732]) -Greedy action tensor([ 1.1511, -0.6980, 0.2015, -0.4938]) tensor([0.5756, 0.0906, 0.2227, 0.1111]) -Greedy action tensor([ 0.9017, -0.6615, -0.1518, -0.5936]) tensor([0.5611, 0.1175, 0.1956, 0.1258]) -Greedy action tensor([ 0.6277, -0.1435, -0.0802, -0.0565]) tensor([0.4066, 0.1880, 0.2003, 0.2051]) -Greedy action tensor([ 0.8011, -0.4274, 0.0313, -0.6655]) tensor([0.5034, 0.1474, 0.2331, 0.1161]) -Greedy action tensor([ 0.5871, -0.2496, -0.2303, -0.3061]) tensor([0.4378, 0.1896, 0.1933, 0.1792]) -Greedy action tensor([ 0.5130, 0.0272, -0.0948, -0.1316]) tensor([0.3725, 0.2292, 0.2028, 0.1955]) -Greedy action tensor([ 0.8124, -0.3932, -0.0731, -0.2320]) tensor([0.4845, 0.1451, 0.1999, 0.1705]) -Greedy action tensor([ 0.7570, -0.7502, 0.0085, -0.3526]) tensor([0.4940, 0.1094, 0.2337, 0.1629]) -Greedy action tensor([ 0.4690, -0.3114, -0.1604, -0.1616]) tensor([0.3963, 0.1816, 0.2112, 0.2109]) -Greedy action tensor([ 0.3011, -0.3917, -0.1006, -0.2506]) tensor([0.3643, 0.1822, 0.2438, 0.2098]) -Greedy action tensor([ 0.4389, -0.2550, -0.1071, -0.1739]) tensor([0.3816, 0.1906, 0.2210, 0.2068]) -Greedy action tensor([ 0.7328, -0.4365, 0.0322, -0.4284]) tensor([0.4717, 0.1465, 0.2341, 0.1477]) -Greedy action tensor([ 0.5464, -0.3044, -0.0131, -0.2071]) tensor([0.4050, 0.1730, 0.2314, 0.1906]) -Greedy action tensor([ 0.7077, -0.3222, -0.0711, -0.2904]) tensor([0.4578, 0.1634, 0.2101, 0.1687]) -Greedy action tensor([ 0.6183, -0.2749, -0.2062, -0.1589]) tensor([0.4334, 0.1774, 0.1900, 0.1992]) -Greedy action tensor([ 0.7323, 0.0458, -0.2389, 0.0443]) tensor([0.4194, 0.2111, 0.1588, 0.2108]) -Greedy action tensor([ 0.7710, -0.3802, -0.0636, -0.4247]) tensor([0.4871, 0.1541, 0.2114, 0.1474]) -Greedy action tensor([ 0.6465, -0.2197, -0.0032, -0.1276]) tensor([0.4160, 0.1749, 0.2172, 0.1918]) -Greedy action tensor([ 0.8306, -0.7030, 0.0072, -0.5265]) tensor([0.5230, 0.1128, 0.2296, 0.1346]) -Greedy action tensor([ 0.4214, 0.1177, 0.0783, -0.2463]) tensor([0.3378, 0.2493, 0.2397, 0.1732]) -Greedy action tensor([ 0.5506, -0.3876, 0.0297, -0.2686]) tensor([0.4122, 0.1613, 0.2448, 0.1817]) -Greedy action tensor([ 0.8258, -0.4676, -0.0977, -0.4099]) tensor([0.5097, 0.1398, 0.2024, 0.1481]) -Greedy action tensor([ 0.5597, -0.3139, -0.0190, -0.2853]) tensor([0.4153, 0.1734, 0.2329, 0.1784]) -Greedy action tensor([ 0.8733, -0.2868, -0.0491, -0.2550]) tensor([0.4915, 0.1541, 0.1954, 0.1590]) -Greedy action tensor([ 0.6575, -0.2033, -0.0530, -0.1960]) tensor([0.4273, 0.1807, 0.2100, 0.1820]) -Greedy action tensor([ 0.9449, -0.8159, -0.1680, -0.8687]) tensor([0.6011, 0.1033, 0.1975, 0.0980]) -Greedy action tensor([ 0.8716, -0.2277, -0.0480, -0.6042]) tensor([0.5101, 0.1699, 0.2034, 0.1166]) -Greedy action tensor([ 0.5491, 0.2459, -0.1030, -0.3923]) tensor([0.3774, 0.2787, 0.1966, 0.1472]) -Greedy action tensor([ 0.5256, -0.1298, -0.0027, -0.0358]) tensor([0.3732, 0.1938, 0.2201, 0.2129]) -Greedy action tensor([ 5.1574e-01, -3.2878e-04, 3.5102e-02, -1.1836e-01]) tensor([0.3642, 0.2174, 0.2252, 0.1932]) -Greedy action tensor([ 0.3717, -0.2200, -0.0122, -0.4319]) tensor([0.3728, 0.2063, 0.2540, 0.1669]) -Greedy action tensor([ 0.7457, -0.5341, -0.0506, -0.0902]) tensor([0.4624, 0.1286, 0.2085, 0.2004]) -Greedy action tensor([ 0.8076, -0.8412, 0.1318, -0.2849]) tensor([0.4911, 0.0944, 0.2498, 0.1647]) -Greedy action tensor([ 0.5833, -0.3133, -0.0093, -0.2410]) tensor([0.4168, 0.1700, 0.2304, 0.1828]) -Greedy action tensor([ 0.9292, -0.7105, 0.0208, -0.4114]) tensor([0.5379, 0.1044, 0.2169, 0.1408]) -Greedy action tensor([ 0.5485, -0.4578, 0.0299, -0.8086]) tensor([0.4508, 0.1648, 0.2684, 0.1160]) -Greedy action tensor([ 0.7425, -0.7235, 0.0796, -0.6623]) tensor([0.5021, 0.1159, 0.2588, 0.1232]) -Greedy action tensor([ 0.7288, -0.6215, -0.0431, -0.4495]) tensor([0.4928, 0.1277, 0.2277, 0.1517]) -Greedy action tensor([ 0.4617, -0.0178, 0.0147, -0.2042]) tensor([0.3607, 0.2233, 0.2307, 0.1853]) -Greedy action tensor([ 0.5062, -0.0839, -0.0449, -0.3155]) tensor([0.3891, 0.2156, 0.2242, 0.1711]) -Greedy action tensor([ 0.6358, 0.0330, 0.1204, -0.1323]) tensor([0.3834, 0.2098, 0.2290, 0.1779]) -Greedy action tensor([ 0.2642, -0.0753, -0.1079, -0.1865]) tensor([0.3291, 0.2344, 0.2268, 0.2097]) -Greedy action tensor([ 0.8471, -0.3600, -0.2622, -0.4861]) tensor([0.5284, 0.1580, 0.1743, 0.1393]) -Greedy action tensor([ 0.7332, -0.5824, -0.0862, -0.4752]) tensor([0.4981, 0.1336, 0.2195, 0.1488]) -Greedy action tensor([ 0.6464, -0.4797, -0.0846, -0.1599]) tensor([0.4440, 0.1440, 0.2138, 0.1982]) -Greedy action tensor([ 0.5834, -0.3324, 0.0560, -0.3808]) tensor([0.4217, 0.1687, 0.2488, 0.1608]) -Greedy action tensor([ 0.6807, -0.4345, -0.0200, -0.3907]) tensor([0.4616, 0.1513, 0.2290, 0.1581]) -Greedy action tensor([ 0.6596, -0.1597, -0.0367, -0.3371]) tensor([0.4332, 0.1909, 0.2159, 0.1599]) -Greedy action tensor([ 0.5323, 0.2527, 0.0386, -0.2233]) tensor([0.3526, 0.2666, 0.2152, 0.1656]) -Greedy action tensor([ 0.6933, -0.5250, -0.0606, -0.2318]) tensor([0.4624, 0.1367, 0.2176, 0.1833]) -Greedy action tensor([ 0.6264, -0.3407, -0.0098, -0.1355]) tensor([0.4208, 0.1600, 0.2227, 0.1964]) -Greedy action tensor([ 0.8716, -0.6840, 0.0286, -0.4827]) tensor([0.5264, 0.1111, 0.2266, 0.1359]) -Greedy action tensor([ 0.8221, -0.3250, -0.0310, -0.2710]) tensor([0.4810, 0.1528, 0.2050, 0.1612]) -Greedy action tensor([ 0.9892, -0.8466, 0.0228, -0.3740]) tensor([0.5569, 0.0888, 0.2119, 0.1425]) -Greedy action tensor([ 0.5426, -0.1919, 0.0769, -0.3388]) tensor([0.3966, 0.1903, 0.2489, 0.1643]) -Greedy action tensor([ 0.8911, -0.6761, -0.0246, -0.6075]) tensor([0.5458, 0.1139, 0.2184, 0.1219]) -Greedy action tensor([ 0.8823, -0.1581, -0.0271, -0.5441]) tensor([0.5009, 0.1770, 0.2018, 0.1203]) -Greedy action tensor([ 0.5830, -0.1734, -0.1185, -0.0794]) tensor([0.4031, 0.1892, 0.1999, 0.2078]) -Greedy action tensor([ 0.8546, -0.3748, 0.0249, -0.4170]) tensor([0.4978, 0.1456, 0.2171, 0.1396]) -Greedy action tensor([ 1.2831, -0.5141, -0.1260, -0.5102]) tensor([0.6343, 0.1051, 0.1550, 0.1056]) -Greedy action tensor([ 0.3900, 0.2105, -0.1592, 0.1520]) tensor([0.3124, 0.2610, 0.1804, 0.2462]) -Greedy action tensor([ 0.9603, -0.4260, 0.0944, -0.5623]) tensor([0.5294, 0.1324, 0.2227, 0.1155]) -Greedy action tensor([ 0.3521, -0.1258, -0.0315, -0.1936]) tensor([0.3471, 0.2152, 0.2365, 0.2011]) -Greedy action tensor([ 1.1149, -0.8594, -0.0301, -0.6548]) tensor([0.6144, 0.0853, 0.1955, 0.1047]) -Greedy action tensor([ 0.7510, -0.5312, -0.1174, -0.5579]) tensor([0.5084, 0.1410, 0.2133, 0.1373]) -Greedy action tensor([ 0.3985, -0.2716, -0.2268, -0.0593]) tensor([0.3732, 0.1910, 0.1997, 0.2361]) -Greedy action tensor([-1.5451, -0.5005, 0.5595, 0.2105]) tensor([0.0561, 0.1594, 0.4600, 0.3245]) -Greedy action tensor([-1.2234, -0.5293, 0.2832, 0.3767]) tensor([0.0802, 0.1606, 0.3619, 0.3973]) -Greedy action tensor([-1.7284, -0.3853, 0.5522, -0.0463]) tensor([0.0500, 0.1916, 0.4894, 0.2690]) -Greedy action tensor([-1.8230, -0.3040, 0.5868, -0.0871]) tensor([0.0447, 0.2042, 0.4975, 0.2536]) -Greedy action tensor([-1.4781, -0.5021, 0.3836, 0.1231]) tensor([0.0665, 0.1764, 0.4276, 0.3296]) -Greedy action tensor([-1.3372, -0.6704, 0.3684, 0.1190]) tensor([0.0785, 0.1529, 0.4320, 0.3366]) -Greedy action tensor([-1.1413, -0.4916, 1.0610, 1.2933]) tensor([0.0428, 0.0819, 0.3870, 0.4883]) -Greedy action tensor([-1.7815, -0.5213, 0.6990, 0.0417]) tensor([0.0441, 0.1556, 0.5271, 0.2732]) -Greedy action tensor([-1.8191, -0.4598, 0.6508, -0.0856]) tensor([0.0447, 0.1740, 0.5283, 0.2530]) -Greedy action tensor([-1.8128, -0.4269, 0.6346, -0.0658]) tensor([0.0449, 0.1794, 0.5184, 0.2574]) -Greedy action tensor([-1.8688, -0.4562, 0.6270, -0.1447]) tensor([0.0438, 0.1798, 0.5310, 0.2454]) -Greedy action tensor([-1.0004, 0.5090, 0.2243, -0.1056]) tensor([0.0879, 0.3978, 0.2992, 0.2151]) -Greedy action tensor([-1.8036, -0.2121, 0.5702, -0.1332]) tensor([0.0455, 0.2236, 0.4889, 0.2420]) -Greedy action tensor([-1.8519, -0.4771, 0.6150, -0.1257]) tensor([0.0447, 0.1769, 0.5271, 0.2513]) -Greedy action tensor([-1.0699, -0.4850, 0.8612, 1.0559]) tensor([0.0553, 0.0993, 0.3817, 0.4637]) -Greedy action tensor([-0.9331, 0.3302, 0.9561, 1.1358]) tensor([0.0524, 0.1855, 0.3469, 0.4152]) -Greedy action tensor([-1.9187, -0.4346, 0.6514, -0.1643]) tensor([0.0412, 0.1818, 0.5387, 0.2383]) -Greedy action tensor([-1.6533, -0.1851, 0.7124, -0.5336]) tensor([0.0525, 0.2278, 0.5589, 0.1608]) -Greedy action tensor([-0.9904, -0.7142, 0.6204, 1.3709]) tensor([0.0558, 0.0735, 0.2792, 0.5915]) -Greedy action tensor([-1.8975, -0.4257, 0.6383, -0.1537]) tensor([0.0422, 0.1838, 0.5327, 0.2413]) -Greedy action tensor([-1.9334, -0.4279, 0.6610, -0.1698]) tensor([0.0404, 0.1822, 0.5414, 0.2359]) -Greedy action tensor([-1.8937, -0.4387, 0.6406, -0.1560]) tensor([0.0424, 0.1817, 0.5348, 0.2411]) -Greedy action tensor([-1.6680, -0.2042, 0.4909, -0.0290]) tensor([0.0523, 0.2259, 0.4527, 0.2692]) -Greedy action tensor([-1.2717, -0.5199, 0.6291, 0.7644]) tensor([0.0572, 0.1214, 0.3830, 0.4384]) -Greedy action tensor([-1.0942, -0.5963, 0.3117, 0.0194]) tensor([0.1024, 0.1684, 0.4175, 0.3117]) -Greedy action tensor([-1.5530, -0.5396, 0.6553, 0.3160]) tensor([0.0517, 0.1425, 0.4706, 0.3352]) -Greedy action tensor([-1.5859, -0.3769, 0.4749, 0.0626]) tensor([0.0575, 0.1925, 0.4513, 0.2988]) -Greedy action tensor([-1.2001, 0.4794, 0.2767, -0.0270]) tensor([0.0716, 0.3838, 0.3134, 0.2313]) -Greedy action tensor([-1.7375, -0.5178, 0.5582, -0.0364]) tensor([0.0505, 0.1710, 0.5017, 0.2768]) -Greedy action tensor([-1.7261, -0.4966, 0.5509, -0.0869]) tensor([0.0518, 0.1770, 0.5046, 0.2666]) -Greedy action tensor([-1.7116, -0.3785, 0.5649, -0.1520]) tensor([0.0518, 0.1966, 0.5050, 0.2466]) -Greedy action tensor([-1.3907, -0.4425, 0.5433, 0.4239]) tensor([0.0601, 0.1551, 0.4158, 0.3690]) -Greedy action tensor([-1.9365, -0.4509, 0.6608, -0.1758]) tensor([0.0405, 0.1791, 0.5445, 0.2359]) -Greedy action tensor([-1.7290, -0.5391, 0.5506, -0.0699]) tensor([0.0518, 0.1702, 0.5060, 0.2721]) -Greedy action tensor([-1.4983, -0.5452, 0.4566, 0.1271]) tensor([0.0635, 0.1648, 0.4488, 0.3228]) -Greedy action tensor([-1.0111, -0.0360, 0.8754, 1.1992]) tensor([0.0516, 0.1369, 0.3406, 0.4708]) -Greedy action tensor([-1.7183, -0.4484, 0.6175, 0.0150]) tensor([0.0486, 0.1732, 0.5029, 0.2753]) -Greedy action tensor([-1.8083, -0.0756, 0.5422, -0.1111]) tensor([0.0442, 0.2502, 0.4641, 0.2415]) -Greedy action tensor([-1.0471, -0.6339, 0.5602, 0.7395]) tensor([0.0742, 0.1122, 0.3704, 0.4431]) -Greedy action tensor([-1.7416, -0.4443, 0.6671, 0.0578]) tensor([0.0458, 0.1677, 0.5095, 0.2770]) -Greedy action tensor([-1.6516, -0.5124, 0.5074, -0.0046]) tensor([0.0556, 0.1738, 0.4818, 0.2888]) -Greedy action tensor([-1.9264, -0.4568, 0.6802, -0.1378]) tensor([0.0402, 0.1747, 0.5447, 0.2404]) -Greedy action tensor([-1.6187, -0.3276, 0.6408, -0.3837]) tensor([0.0566, 0.2060, 0.5426, 0.1948]) -Greedy action tensor([-1.7891, -0.3781, 0.6383, -0.0265]) tensor([0.0449, 0.1842, 0.5090, 0.2618]) -Greedy action tensor([-1.8167, -0.4880, 0.6096, -0.1158]) tensor([0.0464, 0.1750, 0.5246, 0.2540]) -Greedy action tensor([-1.5847, 0.0725, 0.4181, -0.0421]) tensor([0.0546, 0.2861, 0.4042, 0.2551]) -Greedy action tensor([-1.8237, -0.5089, 0.6305, -0.1004]) tensor([0.0455, 0.1696, 0.5298, 0.2551]) -Greedy action tensor([-1.4480, -0.5585, 0.3834, 0.1747]) tensor([0.0678, 0.1651, 0.4234, 0.3437]) -Greedy action tensor([-1.2206, -0.7120, 0.6075, 0.7005]) tensor([0.0636, 0.1058, 0.3960, 0.4346]) -Greedy action tensor([-1.9049, -0.3782, 0.6369, -0.1506]) tensor([0.0415, 0.1911, 0.5274, 0.2400]) -Greedy action tensor([-1.3058, 0.6168, 0.2299, 0.1444]) tensor([0.0597, 0.4083, 0.2773, 0.2546]) -Greedy action tensor([-1.9236, -0.3660, 0.6433, -0.1651]) tensor([0.0407, 0.1932, 0.5300, 0.2361]) -Greedy action tensor([-1.0565, 0.8970, 0.1143, 0.2723]) tensor([0.0664, 0.4685, 0.2142, 0.2509]) -Greedy action tensor([-1.8982, -0.4314, 0.6443, -0.1456]) tensor([0.0420, 0.1820, 0.5337, 0.2423]) -Greedy action tensor([-0.7514, 1.0523, 0.0985, 0.4720]) tensor([0.0781, 0.4740, 0.1826, 0.2653]) -Greedy action tensor([-1.9097, -0.3872, 0.6430, -0.1552]) tensor([0.0413, 0.1894, 0.5305, 0.2388]) -Greedy action tensor([-0.4803, -0.2956, 0.1483, 0.0918]) tensor([0.1709, 0.2056, 0.3205, 0.3029]) -Greedy action tensor([-1.7491, 0.0199, 0.5001, -0.0829]) tensor([0.0462, 0.2711, 0.4381, 0.2446]) -Greedy action tensor([-1.7638, -0.4083, 0.5723, -0.0642]) tensor([0.0483, 0.1875, 0.4998, 0.2644]) -Greedy action tensor([-1.5967, -0.4908, 0.5491, 0.2363]) tensor([0.0531, 0.1605, 0.4542, 0.3322]) -Greedy action tensor([-1.7806, -0.4535, 0.6904, 0.1361]) tensor([0.0427, 0.1611, 0.5057, 0.2905]) -Greedy action tensor([-1.8927, -0.4741, 0.7028, -0.0999]) tensor([0.0407, 0.1684, 0.5462, 0.2447]) -Greedy action tensor([-1.7897, -0.2938, 0.5576, -0.0679]) tensor([0.0465, 0.2074, 0.4860, 0.2600]) -Greedy action tensor([-1.3181, -0.4778, 0.5420, 0.6491]) tensor([0.0592, 0.1372, 0.3803, 0.4233]) -Greedy action tensor([-0.4758, -0.6683, 0.6783, 1.3591]) tensor([0.0888, 0.0733, 0.2816, 0.5563]) -Greedy action tensor([-1.6167, -0.5487, 0.5196, 0.0769]) tensor([0.0561, 0.1633, 0.4753, 0.3053]) -Greedy action tensor([-2.0163, -0.7692, 0.8259, -0.0310]) tensor([0.0346, 0.1204, 0.5932, 0.2518]) -Greedy action tensor([-1.0749, -0.9167, 0.5023, 1.2838]) tensor([0.0569, 0.0666, 0.2752, 0.6013]) -Greedy action tensor([-1.8897, -0.4698, 0.6390, -0.1453]) tensor([0.0427, 0.1768, 0.5359, 0.2446]) -Greedy action tensor([-1.7620, -0.4437, 0.5716, -0.1311]) tensor([0.0496, 0.1854, 0.5116, 0.2534]) -Greedy action tensor([-1.7867, -0.4383, 0.6351, 0.0198]) tensor([0.0450, 0.1734, 0.5073, 0.2742]) -Greedy action tensor([-1.1334, -0.4005, 0.3337, -0.0513]) tensor([0.0964, 0.2007, 0.4183, 0.2846]) -Greedy action tensor([-1.6657, -0.5438, 0.9160, 0.5224]) tensor([0.0382, 0.1172, 0.5044, 0.3403]) -Greedy action tensor([ 0.2064, 1.1385, -0.1572, 0.3132]) tensor([0.1870, 0.4749, 0.1300, 0.2081]) -Greedy action tensor([-1.3776, -0.3809, 0.3905, 0.0946]) tensor([0.0718, 0.1945, 0.4207, 0.3130]) -Greedy action tensor([-1.4906, -0.5023, 0.7040, -0.4408]) tensor([0.0644, 0.1731, 0.5784, 0.1841]) -Greedy action tensor([-1.7871, -0.4511, 0.6362, 0.0187]) tensor([0.0451, 0.1716, 0.5089, 0.2744]) -Greedy action tensor([-1.8918, -0.5037, 0.8152, 0.0309]) tensor([0.0373, 0.1493, 0.5585, 0.2549]) -Greedy action tensor([-1.9079, -0.3377, 0.6395, -0.1413]) tensor([0.0409, 0.1968, 0.5228, 0.2395]) -Greedy action tensor([-1.8801, -0.4253, 0.6360, -0.1338]) tensor([0.0427, 0.1831, 0.5291, 0.2450]) -Greedy action tensor([-1.7675, -0.4643, 0.5810, -0.0864]) tensor([0.0487, 0.1794, 0.5102, 0.2617]) -Greedy action tensor([ 0.7358, -0.2605, -0.0563, 0.1146]) tensor([0.4238, 0.1565, 0.1920, 0.2277]) -Greedy action tensor([ 1.6037, -0.0858, -0.5327, 0.0654]) tensor([0.6590, 0.1217, 0.0778, 0.1415]) -Greedy action tensor([ 1.7904, 0.1498, -0.9984, 0.6384]) tensor([0.6364, 0.1234, 0.0391, 0.2011]) -Greedy action tensor([ 1.8598, -0.3008, -0.5360, 0.7916]) tensor([0.6452, 0.0744, 0.0588, 0.2217]) -Greedy action tensor([ 1.1855, -0.3525, -0.2186, 0.0054]) tensor([0.5657, 0.1215, 0.1389, 0.1738]) -Greedy action tensor([ 1.4352, -0.0719, -0.1699, 0.3648]) tensor([0.5665, 0.1255, 0.1138, 0.1942]) -Greedy action tensor([ 1.5391, 0.1603, 0.2667, -0.1811]) tensor([0.5844, 0.1472, 0.1637, 0.1046]) -Greedy action tensor([ 1.3652, -0.1066, -0.3682, 0.2468]) tensor([0.5770, 0.1324, 0.1020, 0.1886]) -Greedy action tensor([ 1.0982, -0.0176, -0.0982, 0.0953]) tensor([0.5008, 0.1641, 0.1514, 0.1837]) -Greedy action tensor([ 1.1976, -0.0375, -0.4419, 0.2256]) tensor([0.5367, 0.1561, 0.1042, 0.2031]) -Greedy action tensor([ 0.8875, -0.3772, -0.6691, 0.4211]) tensor([0.4716, 0.1332, 0.0994, 0.2958]) -Greedy action tensor([ 1.3319, -0.5029, -0.0989, 0.1238]) tensor([0.5891, 0.0940, 0.1409, 0.1760]) -Greedy action tensor([ 2.2474, -0.4519, -0.5454, 0.5657]) tensor([0.7607, 0.0512, 0.0466, 0.1415]) -Greedy action tensor([ 0.8174, -0.3485, -0.3531, 0.2839]) tensor([0.4528, 0.1411, 0.1405, 0.2656]) -Greedy action tensor([ 1.7162, -0.5264, -0.2219, 0.2280]) tensor([0.6775, 0.0719, 0.0976, 0.1530]) -Greedy action tensor([ 1.8800, -0.3976, -0.4491, 0.3264]) tensor([0.7085, 0.0726, 0.0690, 0.1498]) -Greedy action tensor([ 1.4178, -0.2579, -0.5538, 0.2832]) tensor([0.6068, 0.1136, 0.0845, 0.1951]) -Greedy action tensor([ 1.1220, -0.2288, -0.3858, 0.2728]) tensor([0.5241, 0.1357, 0.1160, 0.2242]) -Greedy action tensor([ 1.5211, -0.3418, -0.3379, 0.3907]) tensor([0.6120, 0.0950, 0.0954, 0.1976]) -Greedy action tensor([ 1.2189, -0.1512, -1.0029, 0.2610]) tensor([0.5727, 0.1455, 0.0621, 0.2197]) -Greedy action tensor([ 1.2302, -0.0867, -0.2776, -0.1498]) tensor([0.5744, 0.1539, 0.1272, 0.1445]) -Greedy action tensor([ 1.3031, -0.0682, 0.1339, 0.1202]) tensor([0.5345, 0.1357, 0.1660, 0.1638]) -Greedy action tensor([ 1.4185, -0.5490, -0.1264, 0.4486]) tensor([0.5773, 0.0807, 0.1232, 0.2189]) -Greedy action tensor([ 1.0278, -0.5188, -0.4376, 0.0563]) tensor([0.5487, 0.1169, 0.1267, 0.2077]) -Greedy action tensor([ 1.9676, -0.9683, -0.2887, 0.7273]) tensor([0.6910, 0.0367, 0.0724, 0.1999]) -Greedy action tensor([ 1.0849, -0.4710, -0.2541, 0.5187]) tensor([0.4900, 0.1034, 0.1284, 0.2782]) -Greedy action tensor([ 0.5965, -0.4547, -0.4708, 0.2814]) tensor([0.4127, 0.1442, 0.1419, 0.3011]) -Greedy action tensor([ 1.1879, -0.3298, -0.6894, 0.3450]) tensor([0.5547, 0.1216, 0.0849, 0.2388]) -Greedy action tensor([ 1.4144, -0.1481, -0.1073, -0.0415]) tensor([0.6020, 0.1262, 0.1314, 0.1404]) -Greedy action tensor([ 1.4608, -0.2474, -0.0452, 0.3312]) tensor([0.5793, 0.1050, 0.1285, 0.1872]) -Greedy action tensor([ 1.1997, -0.4094, -0.1801, 0.2147]) tensor([0.5479, 0.1096, 0.1379, 0.2046]) -Greedy action tensor([ 0.9893, -0.2222, -0.6293, 0.3385]) tensor([0.4956, 0.1476, 0.0982, 0.2586]) -Greedy action tensor([ 1.2591, -0.2051, -0.0451, 0.0965]) tensor([0.5509, 0.1274, 0.1495, 0.1722]) -Greedy action tensor([ 1.5450, -0.4922, -0.2649, 0.5273]) tensor([0.6041, 0.0788, 0.0989, 0.2183]) -Greedy action tensor([ 1.2520, -0.2440, -0.0064, -0.1591]) tensor([0.5708, 0.1279, 0.1622, 0.1392]) -Greedy action tensor([ 1.0527, -0.0504, -0.6679, 0.3183]) tensor([0.5024, 0.1667, 0.0899, 0.2410]) -Greedy action tensor([ 1.1854, -0.3967, -0.6509, 0.3685]) tensor([0.5535, 0.1138, 0.0882, 0.2445]) -Greedy action tensor([ 1.9616, -0.7772, -0.6188, 0.8802]) tensor([0.6759, 0.0437, 0.0512, 0.2292]) -Greedy action tensor([ 1.4572, -0.5824, -0.2798, 0.5879]) tensor([0.5796, 0.0754, 0.1020, 0.2430]) -Greedy action tensor([ 1.5915, -0.0714, 0.0780, -0.3398]) tensor([0.6432, 0.1219, 0.1416, 0.0932]) -Greedy action tensor([ 1.3577, -0.0020, -0.1872, 0.5799]) tensor([0.5183, 0.1331, 0.1106, 0.2381]) -Greedy action tensor([ 1.2192, -0.2986, -0.1045, 0.0792]) tensor([0.5540, 0.1214, 0.1474, 0.1772]) -Greedy action tensor([ 1.4447, -0.7281, -0.3101, 0.6915]) tensor([0.5689, 0.0648, 0.0984, 0.2679]) -Greedy action tensor([ 0.8571, -0.0748, 0.0216, 0.0260]) tensor([0.4419, 0.1740, 0.1916, 0.1925]) -Greedy action tensor([ 1.3024, -0.0316, -0.8033, 0.3747]) tensor([0.5616, 0.1479, 0.0684, 0.2221]) -Greedy action tensor([ 1.4506, -0.4210, -0.4282, 0.4110]) tensor([0.6023, 0.0927, 0.0920, 0.2130]) -Greedy action tensor([ 2.1175, -1.0206, -0.1018, 0.7819]) tensor([0.7067, 0.0306, 0.0768, 0.1859]) -Greedy action tensor([ 2.5627, -0.7064, -0.5210, 0.7315]) tensor([0.8038, 0.0306, 0.0368, 0.1288]) -Greedy action tensor([ 1.0137, -0.5048, -0.2584, 0.5336]) tensor([0.4722, 0.1034, 0.1323, 0.2921]) -Greedy action tensor([ 2.3141, -0.2006, -0.6833, 0.0604]) tensor([0.8092, 0.0655, 0.0404, 0.0850]) -Greedy action tensor([ 2.0566, -0.3491, -0.2242, 0.3089]) tensor([0.7318, 0.0660, 0.0748, 0.1274]) -Greedy action tensor([ 1.8295, -1.0013, 0.0488, 0.2518]) tensor([0.6974, 0.0411, 0.1175, 0.1440]) -Greedy action tensor([ 1.8258, -0.0137, -0.2352, -0.0862]) tensor([0.6974, 0.1108, 0.0888, 0.1031]) -Greedy action tensor([ 1.2530, -0.5385, -0.4746, 0.5164]) tensor([0.5485, 0.0914, 0.0975, 0.2626]) -Greedy action tensor([ 1.5850, -0.6723, -0.0200, 0.2307]) tensor([0.6395, 0.0669, 0.1285, 0.1651]) -Greedy action tensor([ 1.3396, -0.6396, -0.3590, 0.2799]) tensor([0.5996, 0.0829, 0.1097, 0.2078]) -Greedy action tensor([ 2.0872, -0.3777, -0.4112, 0.4620]) tensor([0.7331, 0.0623, 0.0603, 0.1443]) -Greedy action tensor([ 1.4727, -0.4907, -0.5633, 0.0774]) tensor([0.6585, 0.0924, 0.0860, 0.1631]) -Greedy action tensor([ 1.2790, -0.4786, -0.4509, 0.0096]) tensor([0.6132, 0.1058, 0.1087, 0.1723]) -Greedy action tensor([ 0.6566, -0.2544, -0.0356, 0.0680]) tensor([0.4069, 0.1636, 0.2036, 0.2259]) -Greedy action tensor([ 0.5612, -0.2287, -0.0904, 0.0075]) tensor([0.3922, 0.1780, 0.2044, 0.2254]) -Greedy action tensor([ 1.6839e+00, -9.4989e-01, 1.5934e-03, 3.0952e-02]) tensor([0.6900, 0.0495, 0.1283, 0.1321]) -Greedy action tensor([ 1.4329, -0.8086, -0.3053, 0.0231]) tensor([0.6552, 0.0696, 0.1152, 0.1600]) -Greedy action tensor([ 0.8364, -0.3445, -0.0623, 0.1711]) tensor([0.4488, 0.1378, 0.1827, 0.2307]) -Greedy action tensor([ 0.5527, -0.2401, 0.3021, 0.0180]) tensor([0.3550, 0.1607, 0.2763, 0.2080]) -Greedy action tensor([ 2.3992, -1.7718, -0.3769, 0.8102]) tensor([0.7801, 0.0120, 0.0486, 0.1592]) -Greedy action tensor([ 0.9224, -0.2894, 0.1732, -0.0793]) tensor([0.4678, 0.1392, 0.2212, 0.1718]) -Greedy action tensor([ 1.5410, -0.4355, -0.0877, 0.6957]) tensor([0.5668, 0.0785, 0.1112, 0.2434]) -Greedy action tensor([ 1.3417, -0.1952, -0.4839, 0.0425]) tensor([0.6065, 0.1304, 0.0977, 0.1654]) -Greedy action tensor([ 0.8038, -0.0827, -0.5850, 0.2103]) tensor([0.4517, 0.1861, 0.1126, 0.2495]) -Greedy action tensor([ 1.3525, -0.6351, -0.1686, 0.1921]) tensor([0.5992, 0.0821, 0.1309, 0.1878]) -Greedy action tensor([ 1.8531, -0.8826, -0.0282, 0.0708]) tensor([0.7218, 0.0468, 0.1100, 0.1214]) -Greedy action tensor([ 1.5649, -0.6293, -0.4783, 0.1499]) tensor([0.6739, 0.0751, 0.0873, 0.1637]) -Greedy action tensor([1.6691, 0.4528, 0.0872, 0.0645]) tensor([0.5872, 0.1740, 0.1207, 0.1180]) -Greedy action tensor([ 1.4391, -0.3978, -0.4533, 0.2842]) tensor([0.6153, 0.0980, 0.0927, 0.1939]) -Greedy action tensor([ 2.0073, 0.1126, -0.1332, 0.2543]) tensor([0.6939, 0.1043, 0.0816, 0.1202]) -Greedy action tensor([ 1.4505, -0.2325, -0.4498, 0.2702]) tensor([0.6088, 0.1131, 0.0910, 0.1870]) -Greedy action tensor([ 1.3671, -0.1157, -0.7799, 0.2445]) tensor([0.5991, 0.1360, 0.0700, 0.1950]) -Greedy action tensor([ 1.0649, -0.0817, -0.7243, 0.9480]) tensor([0.4212, 0.1338, 0.0704, 0.3747]) -Greedy action tensor([ 0.4409, -0.4198, -0.1195, 0.0885]) tensor([0.3708, 0.1568, 0.2117, 0.2607]) -Greedy action tensor([ 1.4779, -0.3970, -0.1444, 0.2269]) tensor([0.6109, 0.0937, 0.1206, 0.1748]) -Greedy action tensor([ 0.6000, -1.2172, 0.1813, -1.0188]) tensor([0.4954, 0.0805, 0.3259, 0.0982]) -Greedy action tensor([-1.3022, 0.3288, -0.2185, -1.0726]) tensor([0.0969, 0.4949, 0.2863, 0.1219]) -Greedy action tensor([ 1.0228, -1.0680, 0.1408, 0.0896]) tensor([0.5179, 0.0640, 0.2144, 0.2037]) -Greedy action tensor([ 0.0544, -0.9136, -0.3352, 0.1663]) tensor([0.3149, 0.1196, 0.2133, 0.3522]) -Greedy action tensor([ 0.9963, 0.7697, 0.2572, -0.1515]) tensor([0.3858, 0.3076, 0.1842, 0.1224]) -Greedy action tensor([-0.1043, -1.7874, 1.0698, 0.5593]) tensor([0.1572, 0.0292, 0.5085, 0.3052]) -Greedy action tensor([1.9900, 0.2602, 0.8254, 0.5510]) tensor([0.5792, 0.1027, 0.1807, 0.1374]) -Greedy action tensor([-0.2444, 1.1717, -0.0350, 0.0658]) tensor([0.1296, 0.5340, 0.1598, 0.1767]) -Greedy action tensor([-0.0359, -0.4115, -0.7465, 0.2715]) tensor([0.2826, 0.1941, 0.1389, 0.3844]) -Greedy action tensor([ 0.0495, -0.7063, 0.1818, 0.0152]) tensor([0.2795, 0.1313, 0.3191, 0.2701]) -Greedy action tensor([ 0.0872, -0.7486, 1.4760, 0.8705]) tensor([0.1310, 0.0568, 0.5254, 0.2868]) -Greedy action tensor([ 0.5981, -0.2466, -0.4274, 0.5616]) tensor([0.3633, 0.1561, 0.1303, 0.3503]) -Greedy action tensor([-0.3031, -0.5009, 0.4085, 0.6664]) tensor([0.1540, 0.1263, 0.3137, 0.4060]) -Greedy action tensor([ 0.5622, -0.9734, -0.2889, 0.6345]) tensor([0.3680, 0.0792, 0.1571, 0.3956]) -Greedy action tensor([ 0.4005, -1.9840, 1.0262, 0.7571]) tensor([0.2278, 0.0210, 0.4259, 0.3254]) -Greedy action tensor([-0.7826, -2.3352, -0.1781, 0.6560]) tensor([0.1378, 0.0292, 0.2522, 0.5808]) -Greedy action tensor([-0.3725, -1.0865, 0.0081, -0.7129]) tensor([0.2729, 0.1336, 0.3993, 0.1942]) -Greedy action tensor([-0.9156, -0.4394, 1.1451, -0.4364]) tensor([0.0828, 0.1333, 0.6502, 0.1337]) -Greedy action tensor([ 1.2580, -1.0449, 1.4419, 0.6609]) tensor([0.3506, 0.0350, 0.4214, 0.1930]) -Greedy action tensor([-0.4092, -0.1778, -0.4477, 0.6698]) tensor([0.1622, 0.2045, 0.1561, 0.4772]) -Greedy action tensor([-0.6939, -0.4256, 0.4635, -0.5996]) tensor([0.1518, 0.1985, 0.4829, 0.1668]) -Greedy action tensor([-1.4043, -1.3785, -0.0522, -0.3179]) tensor([0.1129, 0.1159, 0.4365, 0.3347]) -Greedy action tensor([-0.1740, -0.4280, 0.4443, 0.1890]) tensor([0.1973, 0.1530, 0.3661, 0.2836]) -Greedy action tensor([ 0.6776, -0.9425, -0.2361, -0.1448]) tensor([0.4906, 0.0971, 0.1967, 0.2156]) -Greedy action tensor([-0.1599, -1.3447, -0.0996, 0.8375]) tensor([0.1969, 0.0602, 0.2091, 0.5338]) -Greedy action tensor([ 0.5242, 0.9672, 0.8385, -0.6486]) tensor([0.2361, 0.3676, 0.3232, 0.0731]) -Greedy action tensor([-1.4733, -0.0719, 0.1732, -0.0772]) tensor([0.0700, 0.2842, 0.3631, 0.2827]) -Greedy action tensor([ 0.4990, -0.2688, 0.1301, 0.4776]) tensor([0.3190, 0.1480, 0.2206, 0.3123]) -Greedy action tensor([-0.0446, -0.9006, -0.6716, 0.8209]) tensor([0.2307, 0.0980, 0.1232, 0.5481]) -Greedy action tensor([ 0.8948, -0.7198, 0.1388, 1.0224]) tensor([0.3566, 0.0709, 0.1674, 0.4051]) -Greedy action tensor([ 0.0313, -0.9727, 0.4440, -0.0338]) tensor([0.2622, 0.0961, 0.3961, 0.2456]) -Greedy action tensor([-0.5783, 0.0513, -0.6847, 1.1007]) tensor([0.1095, 0.2054, 0.0984, 0.5867]) -Greedy action tensor([ 0.3409, -0.3489, 0.3261, -0.1083]) tensor([0.3200, 0.1605, 0.3153, 0.2042]) -Greedy action tensor([-0.2317, -2.1056, -0.8084, 0.2989]) tensor([0.2928, 0.0450, 0.1645, 0.4978]) -Greedy action tensor([ 0.3929, -0.2782, 0.4326, 0.3396]) tensor([0.2857, 0.1460, 0.2973, 0.2709]) -Greedy action tensor([ 0.3229, -1.2047, -0.1652, 0.0928]) tensor([0.3809, 0.0827, 0.2338, 0.3026]) -Greedy action tensor([ 1.2017, -0.7857, 1.0473, 0.0435]) tensor([0.4333, 0.0594, 0.3713, 0.1361]) -Greedy action tensor([ 0.8909, 0.4675, 0.1251, -0.0979]) tensor([0.4013, 0.2628, 0.1866, 0.1493]) -Greedy action tensor([ 0.0149, -0.8047, 1.5731, -0.4640]) tensor([0.1468, 0.0647, 0.6975, 0.0910]) -Greedy action tensor([ 1.2939, -0.1943, 1.3112, 1.1963]) tensor([0.3174, 0.0717, 0.3230, 0.2879]) -Greedy action tensor([ 1.0226e+00, 1.0899e+00, 8.3628e-02, -1.0030e-03]) tensor([0.3546, 0.3793, 0.1387, 0.1274]) -Greedy action tensor([-0.1839, -0.5239, -0.3433, 1.0531]) tensor([0.1664, 0.1184, 0.1419, 0.5733]) -Greedy action tensor([ 0.9277, 0.5459, 0.0954, -0.0647]) tensor([0.4019, 0.2743, 0.1748, 0.1490]) -Greedy action tensor([0.9082, 0.7329, 0.2721, 0.0072]) tensor([0.3604, 0.3025, 0.1908, 0.1464]) -Greedy action tensor([ 1.3718, -0.0715, 0.4123, -0.3786]) tensor([0.5577, 0.1317, 0.2137, 0.0969]) -Greedy action tensor([ 0.3489, -1.9682, -0.2374, 0.7601]) tensor([0.3161, 0.0312, 0.1759, 0.4769]) -Greedy action tensor([-1.1220, -2.1013, -0.6350, 0.9272]) tensor([0.0929, 0.0349, 0.1512, 0.7210]) -Greedy action tensor([ 0.6413, 0.3742, -0.1965, 0.5629]) tensor([0.3202, 0.2452, 0.1385, 0.2961]) -Greedy action tensor([ 0.1691, -1.8630, 2.0141, -0.5199]) tensor([0.1256, 0.0165, 0.7949, 0.0631]) -Greedy action tensor([-0.0124, -2.1369, -0.5169, 0.0515]) tensor([0.3585, 0.0428, 0.2165, 0.3822]) -Greedy action tensor([-0.9880, -2.4080, -0.5923, 0.1235]) tensor([0.1734, 0.0419, 0.2576, 0.5270]) -Greedy action tensor([-2.0256, -0.2566, 0.4653, -0.8130]) tensor([0.0448, 0.2630, 0.5414, 0.1508]) -Greedy action tensor([-0.4511, -1.1819, 1.3664, -0.2906]) tensor([0.1135, 0.0546, 0.6986, 0.1332]) -Greedy action tensor([-0.1001, -1.8558, 0.5603, 1.1321]) tensor([0.1530, 0.0264, 0.2961, 0.5245]) -Greedy action tensor([ 0.3216, 0.0401, -0.8348, 1.1269]) tensor([0.2322, 0.1752, 0.0731, 0.5195]) -Greedy action tensor([0.9986, 0.0602, 1.1998, 0.4340]) tensor([0.3142, 0.1229, 0.3842, 0.1787]) -Greedy action tensor([-0.3565, 0.0854, 1.6070, -0.8778]) tensor([0.0973, 0.1514, 0.6935, 0.0578]) -Greedy action tensor([-0.3964, -1.2358, 0.6618, 0.8778]) tensor([0.1268, 0.0548, 0.3652, 0.4533]) -Greedy action tensor([-0.0637, -1.1192, -0.1705, -0.4310]) tensor([0.3402, 0.1184, 0.3058, 0.2356]) -Greedy action tensor([ 0.5508, -0.7913, -0.0243, 1.5387]) tensor([0.2217, 0.0579, 0.1248, 0.5955]) -Greedy action tensor([-0.4580, -1.4130, 0.1679, -0.9375]) tensor([0.2582, 0.0993, 0.4827, 0.1598]) -Greedy action tensor([ 0.3869, 0.1874, 0.7815, -0.4375]) tensor([0.2673, 0.2189, 0.3966, 0.1172]) -Greedy action tensor([ 0.4077, -0.4452, 1.4655, 0.0606]) tensor([0.1995, 0.0850, 0.5745, 0.1410]) -Greedy action tensor([ 0.7363, 0.1485, -0.7823, 1.8642]) tensor([0.2056, 0.1142, 0.0450, 0.6351]) -Greedy action tensor([ 1.1942, -1.2089, -0.2832, 0.7157]) tensor([0.5159, 0.0467, 0.1177, 0.3197]) -Greedy action tensor([-0.3034, -1.8732, 0.4017, 0.5055]) tensor([0.1826, 0.0380, 0.3695, 0.4099]) -Greedy action tensor([-0.3221, -2.0757, -0.0023, 0.5064]) tensor([0.2066, 0.0358, 0.2845, 0.4731]) -Greedy action tensor([-0.5835, 0.0699, 1.4496, -1.3410]) tensor([0.0907, 0.1743, 0.6925, 0.0425]) -Greedy action tensor([ 0.3496, 0.7001, -0.2181, 0.7326]) tensor([0.2245, 0.3188, 0.1273, 0.3293]) -Greedy action tensor([ 0.5272, -0.4645, 0.1575, 1.4693]) tensor([0.2161, 0.0802, 0.1493, 0.5544]) -Greedy action tensor([-0.6710, 0.4707, 1.0610, -0.5513]) tensor([0.0916, 0.2871, 0.5180, 0.1033]) -Greedy action tensor([-0.2808, -0.6827, -0.5971, 0.2180]) tensor([0.2472, 0.1654, 0.1802, 0.4072]) -Greedy action tensor([ 0.0255, -0.7332, 0.8401, -0.0850]) tensor([0.2164, 0.1013, 0.4886, 0.1937]) -Greedy action tensor([ 0.1356, -2.0877, 0.0090, 0.5234]) tensor([0.2888, 0.0313, 0.2544, 0.4256]) -Greedy action tensor([ 1.6524, -1.2031, 2.1084, 1.3721]) tensor([0.2949, 0.0170, 0.4653, 0.2228]) -Greedy action tensor([-0.0347, -0.6527, -0.8113, 0.5393]) tensor([0.2649, 0.1428, 0.1219, 0.4704]) -Greedy action tensor([-0.0484, -1.4567, -0.1761, 0.5649]) tensor([0.2518, 0.0616, 0.2216, 0.4650]) -Greedy action tensor([ 0.2591, -0.1899, 0.6079, -0.4771]) tensor([0.2829, 0.1806, 0.4010, 0.1355]) -Greedy action tensor([ 0.2947, -0.7687, 0.7346, 0.2118]) tensor([0.2619, 0.0904, 0.4066, 0.2411]) -Greedy action tensor([ 0.1591, -1.1733, 1.1629, 0.0108]) tensor([0.2060, 0.0543, 0.5621, 0.1776]) -Greedy action tensor([-0.9832, -1.1517, 0.8045, -0.4305]) tensor([0.1046, 0.0884, 0.6252, 0.1818]) -Greedy action tensor([ 0.4701, -0.1220, 0.0357, -0.2681]) tensor([0.3733, 0.2065, 0.2418, 0.1784]) -Greedy action tensor([ 0.7141, -0.4085, -0.0973, -0.1373]) tensor([0.4553, 0.1482, 0.2022, 0.1943]) -Greedy action tensor([ 0.7501, -0.6627, 0.1004, -0.3341]) tensor([0.4753, 0.1157, 0.2482, 0.1607]) -Greedy action tensor([ 0.4304, 0.1358, -0.2393, -0.0819]) tensor([0.3502, 0.2608, 0.1792, 0.2098]) -Greedy action tensor([ 0.3324, 0.1292, -0.0413, -0.2332]) tensor([0.3255, 0.2656, 0.2240, 0.1849]) -Greedy action tensor([ 0.4895, -0.4134, 0.0251, -0.5236]) tensor([0.4172, 0.1691, 0.2622, 0.1515]) -Greedy action tensor([ 0.3516, -0.1766, -0.1116, -0.3488]) tensor([0.3683, 0.2172, 0.2318, 0.1828]) -Greedy action tensor([ 0.4186, -0.1557, 0.0103, -0.2339]) tensor([0.3638, 0.2049, 0.2419, 0.1895]) -Greedy action tensor([ 0.7736, -0.4505, -0.0054, -0.3993]) tensor([0.4849, 0.1426, 0.2225, 0.1500]) -Greedy action tensor([ 0.6851, -0.3502, -0.0826, -0.3496]) tensor([0.4599, 0.1633, 0.2134, 0.1634]) -Greedy action tensor([ 0.8135, -1.0740, 0.0909, -0.5466]) tensor([0.5281, 0.0800, 0.2564, 0.1355]) -Greedy action tensor([ 0.5689, -0.2772, -0.1144, -0.3143]) tensor([0.4260, 0.1828, 0.2151, 0.1761]) -Greedy action tensor([ 0.9159, -0.8804, -0.1090, -0.8428]) tensor([0.5893, 0.0978, 0.2115, 0.1015]) -Greedy action tensor([ 1.0124, -0.5060, -0.2288, -0.3660]) tensor([0.5682, 0.1245, 0.1642, 0.1432]) -Greedy action tensor([ 0.8299, -0.9447, 0.0813, -0.5154]) tensor([0.5255, 0.0891, 0.2486, 0.1369]) -Greedy action tensor([ 0.8039, -0.4771, -0.0768, -0.4406]) tensor([0.5050, 0.1403, 0.2093, 0.1455]) -Greedy action tensor([ 0.3875, -0.1077, -0.1012, -0.0647]) tensor([0.3498, 0.2132, 0.2146, 0.2225]) -Greedy action tensor([ 0.3022, -0.1373, -0.0954, -0.2624]) tensor([0.3466, 0.2234, 0.2329, 0.1971]) -Greedy action tensor([ 0.8710, -0.7096, 0.0275, -0.4021]) tensor([0.5219, 0.1074, 0.2245, 0.1461]) -Greedy action tensor([ 0.4824, -0.1466, -0.0400, -0.1246]) tensor([0.3744, 0.1996, 0.2220, 0.2040]) -Greedy action tensor([ 0.9597, -0.8211, 0.1036, -0.4395]) tensor([0.5434, 0.0916, 0.2309, 0.1341]) -Greedy action tensor([ 1.1268, -0.7884, 0.0122, -0.3862]) tensor([0.5898, 0.0869, 0.1935, 0.1299]) -Greedy action tensor([0.3063, 0.0755, 0.0350, 0.2293]) tensor([0.2872, 0.2280, 0.2189, 0.2659]) -Greedy action tensor([ 1.0094, -0.7035, -0.0880, -0.4416]) tensor([0.5719, 0.1031, 0.1909, 0.1340]) -Greedy action tensor([ 0.3291, 0.1110, -0.0241, -0.4472]) tensor([0.3371, 0.2710, 0.2368, 0.1551]) -Greedy action tensor([ 0.3993, 0.0358, -0.0329, -0.0933]) tensor([0.3384, 0.2353, 0.2196, 0.2067]) -Greedy action tensor([ 0.8997, -0.7716, 0.1469, -0.5014]) tensor([0.5248, 0.0987, 0.2472, 0.1293]) -Greedy action tensor([ 0.7116, -0.2490, -0.0324, -0.0907]) tensor([0.4336, 0.1659, 0.2061, 0.1944]) -Greedy action tensor([ 0.5501, -0.5540, -0.0536, -0.5096]) tensor([0.4495, 0.1490, 0.2458, 0.1558]) -Greedy action tensor([ 1.2728, -0.8438, 0.2224, -0.7362]) tensor([0.6233, 0.0751, 0.2180, 0.0836]) -Greedy action tensor([ 9.3293e-01, 8.6144e-02, -4.4155e-04, -2.4067e-01]) tensor([0.4692, 0.2012, 0.1845, 0.1451]) -Greedy action tensor([ 0.6931, -0.3741, -0.0376, -0.1045]) tensor([0.4394, 0.1511, 0.2116, 0.1979]) -Greedy action tensor([ 0.5020, 0.3727, -0.2565, -0.0552]) tensor([0.3425, 0.3009, 0.1604, 0.1962]) -Greedy action tensor([ 0.3137, -0.1647, 0.0618, -0.2634]) tensor([0.3380, 0.2095, 0.2627, 0.1898]) -Greedy action tensor([ 0.6478, -0.1334, -0.0777, -0.3016]) tensor([0.4294, 0.1966, 0.2079, 0.1662]) -Greedy action tensor([ 0.6028, -0.4790, -0.1665, -0.3161]) tensor([0.4543, 0.1540, 0.2105, 0.1812]) -Greedy action tensor([ 0.9284, -0.4934, -0.0370, -0.3485]) tensor([0.5260, 0.1269, 0.2003, 0.1467]) -Greedy action tensor([ 0.4219, -0.0294, -0.0095, -0.3856]) tensor([0.3660, 0.2331, 0.2377, 0.1632]) -Greedy action tensor([ 1.0119, -0.5215, -0.1427, -0.4028]) tensor([0.5637, 0.1216, 0.1777, 0.1370]) -Greedy action tensor([ 0.9822, -0.9390, 0.0251, -0.6720]) tensor([0.5808, 0.0851, 0.2230, 0.1111]) -Greedy action tensor([ 0.6652, -0.6657, -0.0608, -0.2629]) tensor([0.4666, 0.1233, 0.2257, 0.1844]) -Greedy action tensor([ 1.2717, -0.6215, -0.2202, -0.3514]) tensor([0.6358, 0.0957, 0.1430, 0.1254]) -Greedy action tensor([ 0.4807, 0.1137, -0.0896, 0.0766]) tensor([0.3418, 0.2368, 0.1932, 0.2282]) -Greedy action tensor([ 0.8008, -0.7245, 0.0016, -0.4888]) tensor([0.5148, 0.1120, 0.2315, 0.1418]) -Greedy action tensor([ 0.5420, -0.5474, 0.0616, -0.4183]) tensor([0.4278, 0.1439, 0.2646, 0.1637]) -Greedy action tensor([ 0.5671, -0.2644, -0.2021, -0.4435]) tensor([0.4419, 0.1924, 0.2048, 0.1609]) -Greedy action tensor([ 0.7973, -0.6311, -0.0855, -0.3368]) tensor([0.5063, 0.1214, 0.2094, 0.1629]) -Greedy action tensor([ 0.4913, -0.1023, -0.0278, -0.0211]) tensor([0.3641, 0.2011, 0.2167, 0.2181]) -Greedy action tensor([ 0.5861, -0.1868, -0.1528, -0.0661]) tensor([0.4065, 0.1877, 0.1942, 0.2117]) -Greedy action tensor([ 0.5082, 0.0420, -0.0863, -0.1119]) tensor([0.3680, 0.2309, 0.2031, 0.1980]) -Greedy action tensor([ 0.8479, -0.6087, -0.0786, -0.3848]) tensor([0.5207, 0.1213, 0.2062, 0.1518]) -Greedy action tensor([ 1.3023, -0.9014, -0.0216, -0.3919]) tensor([0.6409, 0.0708, 0.1706, 0.1178]) -Greedy action tensor([ 1.1746, -0.6044, -0.1182, -0.4433]) tensor([0.6092, 0.1028, 0.1672, 0.1208]) -Greedy action tensor([ 0.6026, -0.6908, 0.1281, -0.3501]) tensor([0.4382, 0.1202, 0.2726, 0.1690]) -Greedy action tensor([ 0.9517, -0.7422, -0.0554, -0.7440]) tensor([0.5772, 0.1061, 0.2108, 0.1059]) -Greedy action tensor([ 0.2231, 0.0851, 0.1876, -0.1546]) tensor([0.2840, 0.2474, 0.2740, 0.1946]) -Greedy action tensor([ 1.1710, -0.4798, 0.0259, -0.4116]) tensor([0.5829, 0.1119, 0.1855, 0.1198]) -Greedy action tensor([ 1.0333, -0.7866, 0.0872, -0.5022]) tensor([0.5664, 0.0918, 0.2199, 0.1220]) -Greedy action tensor([ 0.8996, -0.3356, -0.0226, -0.1839]) tensor([0.4934, 0.1435, 0.1962, 0.1670]) -Greedy action tensor([ 0.7348, -0.4592, -0.1038, -0.4231]) tensor([0.4879, 0.1479, 0.2109, 0.1533]) -Greedy action tensor([ 0.9672, -0.4395, 0.0934, -0.5887]) tensor([0.5338, 0.1308, 0.2228, 0.1126]) -Greedy action tensor([ 0.5514, -0.2853, -0.0850, -0.3041]) tensor([0.4189, 0.1814, 0.2217, 0.1780]) -Greedy action tensor([ 0.1463, -0.1190, 0.0640, -0.1354]) tensor([0.2905, 0.2228, 0.2675, 0.2192]) -Greedy action tensor([ 0.4768, -0.0932, -0.0823, -0.1207]) tensor([0.3721, 0.2104, 0.2127, 0.2047]) -Greedy action tensor([ 0.9259, -0.3924, 0.1088, -0.4217]) tensor([0.5078, 0.1359, 0.2243, 0.1320]) -Greedy action tensor([ 0.3642, 0.0051, -0.1418, -0.0276]) tensor([0.3359, 0.2346, 0.2025, 0.2270]) -Greedy action tensor([ 0.4332, -0.5979, -0.0264, -0.1257]) tensor([0.3906, 0.1393, 0.2467, 0.2234]) -Greedy action tensor([ 0.4353, -0.0149, 0.0589, -0.1267]) tensor([0.3456, 0.2203, 0.2372, 0.1970]) -Greedy action tensor([ 0.4403, -0.0212, -0.1314, -0.0229]) tensor([0.3541, 0.2232, 0.1999, 0.2228]) -Greedy action tensor([ 0.6752, -0.2365, -0.1992, -0.2757]) tensor([0.4534, 0.1822, 0.1891, 0.1752]) -Greedy action tensor([ 0.2011, 0.2051, -0.0061, -0.1039]) tensor([0.2814, 0.2825, 0.2287, 0.2074]) -Greedy action tensor([ 0.3353, 0.2570, -0.1375, 0.0347]) tensor([0.3041, 0.2812, 0.1895, 0.2252]) -Greedy action tensor([ 0.7317, -0.3001, -0.0362, -0.2038]) tensor([0.4519, 0.1610, 0.2097, 0.1773]) -Greedy action tensor([ 0.4067, -0.1154, -0.0829, -0.1694]) tensor([0.3612, 0.2143, 0.2214, 0.2031]) -Greedy action tensor([ 1.0469, -0.9997, -0.0087, -0.4613]) tensor([0.5888, 0.0761, 0.2049, 0.1303]) -Greedy action tensor([ 0.4908, 0.2521, -0.1480, -0.0554]) tensor([0.3455, 0.2721, 0.1824, 0.2001]) -Greedy action tensor([ 0.9028, -0.4867, -0.0909, -0.5656]) tensor([0.5406, 0.1347, 0.2001, 0.1245]) -Greedy action tensor([ 0.3335, -0.0881, -0.0753, -0.0435]) tensor([0.3326, 0.2182, 0.2210, 0.2281]) -Greedy action tensor([ 0.7702, -0.3685, -0.1484, -0.3265]) tensor([0.4870, 0.1560, 0.1944, 0.1626]) -Greedy action tensor([ 0.2409, 0.2981, 0.0517, -0.2553]) tensor([0.2861, 0.3029, 0.2368, 0.1742]) -Greedy action tensor([ 0.3973, -0.0448, 0.1684, -0.2576]) tensor([0.3381, 0.2173, 0.2689, 0.1756]) -Greedy action tensor([-1.8104, -0.1729, 0.5410, -0.0895]) tensor([0.0450, 0.2313, 0.4723, 0.2514]) -Greedy action tensor([-1.8253, -0.3733, 0.6241, -0.0857]) tensor([0.0443, 0.1894, 0.5136, 0.2526]) -Greedy action tensor([-1.7062, -0.5080, 0.5486, 0.0094]) tensor([0.0515, 0.1708, 0.4912, 0.2865]) -Greedy action tensor([-1.4050, -0.5587, 0.4025, 0.1847]) tensor([0.0698, 0.1627, 0.4254, 0.3421]) -Greedy action tensor([-1.0673, -0.7440, 0.0244, -0.1512]) tensor([0.1272, 0.1758, 0.3790, 0.3180]) -Greedy action tensor([-1.8953, -0.4410, 0.6385, -0.1571]) tensor([0.0424, 0.1817, 0.5346, 0.2413]) -Greedy action tensor([-1.8132, -0.4455, 0.6009, -0.1007]) tensor([0.0462, 0.1814, 0.5164, 0.2560]) -Greedy action tensor([-1.8700, -0.4127, 0.6260, -0.1335]) tensor([0.0433, 0.1859, 0.5251, 0.2457]) -Greedy action tensor([-1.9024, -0.4042, 0.6416, -0.1546]) tensor([0.0418, 0.1868, 0.5316, 0.2398]) -Greedy action tensor([-0.9311, -0.4918, 0.5868, 0.6054]) tensor([0.0850, 0.1319, 0.3879, 0.3952]) -Greedy action tensor([-1.0371, -0.5470, 0.4056, 0.7364]) tensor([0.0784, 0.1280, 0.3318, 0.4619]) -Greedy action tensor([-1.9331, -0.4450, 0.6623, -0.1745]) tensor([0.0406, 0.1798, 0.5440, 0.2356]) -Greedy action tensor([-1.1396, 0.2566, 0.2829, -0.0637]) tensor([0.0825, 0.3333, 0.3422, 0.2420]) -Greedy action tensor([-1.9127, -0.4590, 0.6591, -0.1571]) tensor([0.0414, 0.1771, 0.5419, 0.2396]) -Greedy action tensor([-1.9103, -0.4322, 0.6462, -0.1675]) tensor([0.0417, 0.1828, 0.5374, 0.2382]) -Greedy action tensor([-1.8527, -0.0168, 0.5388, -0.0993]) tensor([0.0417, 0.2616, 0.4559, 0.2409]) -Greedy action tensor([-1.5351, -0.5277, 0.5114, 0.1599]) tensor([0.0591, 0.1618, 0.4573, 0.3218]) -Greedy action tensor([-1.8731, -0.4425, 0.6342, -0.1284]) tensor([0.0431, 0.1804, 0.5295, 0.2470]) -Greedy action tensor([-1.7459, -0.2326, 0.5982, 0.0995]) tensor([0.0449, 0.2037, 0.4675, 0.2839]) -Greedy action tensor([-1.7725, -0.4442, 0.6908, 0.0429]) tensor([0.0441, 0.1666, 0.5182, 0.2711]) -Greedy action tensor([-1.6569, -0.2600, 0.6120, 0.1191]) tensor([0.0485, 0.1961, 0.4689, 0.2865]) -Greedy action tensor([-1.4052, -0.5738, 0.4593, 0.1202]) tensor([0.0697, 0.1601, 0.4498, 0.3204]) -Greedy action tensor([-1.8021, -0.5125, 0.9066, 0.2933]) tensor([0.0360, 0.1308, 0.5405, 0.2927]) -Greedy action tensor([-1.7668, -0.5903, 1.2853, 0.8741]) tensor([0.0254, 0.0823, 0.5366, 0.3557]) -Greedy action tensor([-1.5362, -0.6493, 0.6711, 0.3400]) tensor([0.0525, 0.1274, 0.4773, 0.3428]) -Greedy action tensor([-1.8796, -0.4044, 0.6556, -0.1182]) tensor([0.0420, 0.1836, 0.5299, 0.2444]) -Greedy action tensor([-0.2184, 0.1324, 0.9329, 1.6811]) tensor([0.0815, 0.1158, 0.2578, 0.5448]) -Greedy action tensor([-1.8074, -0.3359, 0.5768, -0.1055]) tensor([0.0461, 0.2008, 0.5002, 0.2529]) -Greedy action tensor([-1.7750, -0.4314, 0.5923, -0.0810]) tensor([0.0478, 0.1830, 0.5094, 0.2598]) -Greedy action tensor([-1.7913, -0.4839, 0.5759, -0.1252]) tensor([0.0484, 0.1790, 0.5165, 0.2562]) -Greedy action tensor([-1.8311, -0.3207, 0.5970, -0.0774]) tensor([0.0442, 0.2000, 0.5007, 0.2551]) -Greedy action tensor([-1.8859, -0.4465, 0.6394, -0.1494]) tensor([0.0428, 0.1803, 0.5342, 0.2427]) -Greedy action tensor([-1.2749, -0.2320, 0.6920, -0.7473]) tensor([0.0789, 0.2238, 0.5637, 0.1337]) -Greedy action tensor([-1.8945, -0.4129, 0.6462, -0.1453]) tensor([0.0420, 0.1846, 0.5323, 0.2412]) -Greedy action tensor([-1.3464, -0.4549, 0.8423, 0.8693]) tensor([0.0464, 0.1133, 0.4145, 0.4258]) -Greedy action tensor([-1.4851, -0.4115, 0.5659, -0.3294]) tensor([0.0672, 0.1967, 0.5226, 0.2135]) -Greedy action tensor([-1.8160, -0.4862, 0.6310, -0.0623]) tensor([0.0452, 0.1710, 0.5226, 0.2612]) -Greedy action tensor([-1.6190, -0.5260, 1.0025, 0.7588]) tensor([0.0351, 0.1046, 0.4823, 0.3780]) -Greedy action tensor([-1.0898, -0.3876, 0.3797, 0.2752]) tensor([0.0886, 0.1789, 0.3853, 0.3471]) -Greedy action tensor([-1.8820, -0.3549, 0.6255, -0.1379]) tensor([0.0424, 0.1951, 0.5201, 0.2424]) -Greedy action tensor([-1.9197, -0.4186, 0.6538, -0.1675]) tensor([0.0410, 0.1841, 0.5381, 0.2367]) -Greedy action tensor([-1.2337, -0.6406, 0.5608, 0.0883]) tensor([0.0795, 0.1439, 0.4784, 0.2982]) -Greedy action tensor([-1.5247, -0.5586, 0.4307, 0.0888]) tensor([0.0636, 0.1672, 0.4497, 0.3195]) -Greedy action tensor([-1.9333, -0.4074, 0.6512, -0.1759]) tensor([0.0406, 0.1866, 0.5377, 0.2352]) -Greedy action tensor([-1.8477e+00, -5.0190e-01, 7.5558e-01, -7.9048e-04]) tensor([0.0405, 0.1556, 0.5471, 0.2568]) -Greedy action tensor([-1.7964, -0.2863, 0.5824, -0.1430]) tensor([0.0464, 0.2101, 0.5009, 0.2425]) -Greedy action tensor([-0.7157, -0.5713, 0.1979, 0.3087]) tensor([0.1345, 0.1554, 0.3354, 0.3747]) -Greedy action tensor([-1.8922, -0.4562, 0.6398, -0.1634]) tensor([0.0427, 0.1795, 0.5372, 0.2406]) -Greedy action tensor([-1.9019, -0.3716, 0.6340, -0.1538]) tensor([0.0417, 0.1925, 0.5264, 0.2394]) -Greedy action tensor([-1.6952, 0.2827, 0.4372, 0.0656]) tensor([0.0445, 0.3215, 0.3752, 0.2588]) -Greedy action tensor([-1.7568, -0.4599, 0.6695, 0.1058]) tensor([0.0446, 0.1632, 0.5049, 0.2873]) -Greedy action tensor([-1.4217e+00, -6.1658e-01, 5.7619e-01, -2.1189e-04]) tensor([0.0678, 0.1516, 0.4998, 0.2808]) -Greedy action tensor([-1.9160, -0.4584, 0.6546, -0.1636]) tensor([0.0414, 0.1780, 0.5416, 0.2390]) -Greedy action tensor([-1.7166, -0.4881, 0.5667, -0.0334]) tensor([0.0510, 0.1742, 0.5003, 0.2745]) -Greedy action tensor([-1.8116, -0.4866, 0.6523, -0.0239]) tensor([0.0445, 0.1673, 0.5225, 0.2657]) -Greedy action tensor([-1.3192, -0.5080, 0.5039, -0.2276]) tensor([0.0805, 0.1812, 0.4984, 0.2398]) -Greedy action tensor([-1.7808, -0.4794, 0.6871, 0.0668]) tensor([0.0438, 0.1610, 0.5171, 0.2781]) -Greedy action tensor([-1.2180, -0.1953, 0.2786, 0.0035]) tensor([0.0859, 0.2389, 0.3837, 0.2914]) -Greedy action tensor([-0.5727, -0.3843, 0.2217, 0.2901]) tensor([0.1473, 0.1778, 0.3259, 0.3490]) -Greedy action tensor([-1.6806, -0.4447, 0.6673, 0.2773]) tensor([0.0455, 0.1565, 0.4758, 0.3222]) -Greedy action tensor([-1.7790, -0.5193, 0.5993, -0.0778]) tensor([0.0481, 0.1695, 0.5188, 0.2636]) -Greedy action tensor([-1.6176, -0.4470, 0.5357, 0.0547]) tensor([0.0551, 0.1775, 0.4743, 0.2932]) -Greedy action tensor([-1.6897, -0.5179, 0.5905, -0.0848]) tensor([0.0527, 0.1700, 0.5151, 0.2622]) -Greedy action tensor([-1.9114, -0.4130, 0.6479, -0.1521]) tensor([0.0413, 0.1848, 0.5339, 0.2399]) -Greedy action tensor([-1.9160, -0.4484, 0.6755, -0.1430]) tensor([0.0407, 0.1765, 0.5432, 0.2396]) -Greedy action tensor([-1.7587, -0.5041, 0.5789, -0.1125]) tensor([0.0499, 0.1749, 0.5165, 0.2587]) -Greedy action tensor([-1.5824, -0.5663, 0.8241, 0.5198]) tensor([0.0434, 0.1199, 0.4815, 0.3552]) -Greedy action tensor([-1.7541, -0.4273, 0.5840, 0.0085]) tensor([0.0477, 0.1798, 0.4944, 0.2781]) -Greedy action tensor([-1.5421, -0.5458, 0.4455, 0.0764]) tensor([0.0623, 0.1687, 0.4547, 0.3143]) -Greedy action tensor([-1.7864, -0.4510, 0.5911, -0.0762]) tensor([0.0474, 0.1801, 0.5106, 0.2620]) -Greedy action tensor([-1.0846, -0.4960, 1.1612, 1.3897]) tensor([0.0415, 0.0747, 0.3917, 0.4922]) -Greedy action tensor([-1.8205, -0.3014, 0.6160, -0.0135]) tensor([0.0433, 0.1978, 0.4951, 0.2638]) -Greedy action tensor([-1.9201, -0.4371, 0.6662, -0.1612]) tensor([0.0408, 0.1799, 0.5422, 0.2371]) -Greedy action tensor([-1.3596, -0.6110, 0.3652, 0.2037]) tensor([0.0741, 0.1566, 0.4157, 0.3537]) -Greedy action tensor([-1.9398, -0.4218, 0.6575, -0.1783]) tensor([0.0403, 0.1839, 0.5412, 0.2346]) -Greedy action tensor([-1.6601, -0.2951, 0.5294, -0.1493]) tensor([0.0544, 0.2131, 0.4860, 0.2465]) -Greedy action tensor([-1.8546, -0.4198, 0.6220, -0.1270]) tensor([0.0440, 0.1847, 0.5236, 0.2476]) -Greedy action tensor([-0.9727, -0.3728, 0.4953, 0.9648]) tensor([0.0709, 0.1292, 0.3078, 0.4922]) -Greedy action tensor([-1.4438, -0.5336, 0.4963, -0.1526]) tensor([0.0710, 0.1765, 0.4942, 0.2583]) -Greedy action tensor([-0.6015, -0.8444, 0.9110, 0.3771]) tensor([0.1113, 0.0873, 0.5052, 0.2962]) -Greedy action tensor([-1.1158, 0.0490, 0.4968, -0.6462]) tensor([0.0924, 0.2962, 0.4636, 0.1478]) -Greedy action tensor([ 1.9885, -0.9672, -0.0266, 0.6282]) tensor([0.6935, 0.0361, 0.0925, 0.1780]) -Greedy action tensor([ 1.2743, -0.2254, -1.0214, 0.4287]) tensor([0.5704, 0.1273, 0.0574, 0.2449]) -Greedy action tensor([ 0.9196, -0.3998, -0.1057, 0.3068]) tensor([0.4613, 0.1233, 0.1655, 0.2499]) -Greedy action tensor([ 1.2903, -0.1894, -0.2323, 0.1889]) tensor([0.5623, 0.1281, 0.1227, 0.1869]) -Greedy action tensor([ 0.5537, -0.4039, 0.0216, -0.1654]) tensor([0.4068, 0.1561, 0.2389, 0.1982]) -Greedy action tensor([ 1.4173, -0.8439, -0.2779, 0.4495]) tensor([0.5996, 0.0625, 0.1101, 0.2278]) -Greedy action tensor([ 1.5485, 0.0117, -0.1182, 0.1531]) tensor([0.6054, 0.1302, 0.1144, 0.1500]) -Greedy action tensor([ 0.7658, -0.3048, -0.4297, 0.1220]) tensor([0.4607, 0.1579, 0.1394, 0.2420]) -Greedy action tensor([ 1.7893, -0.8039, -0.2274, 0.1386]) tensor([0.7144, 0.0534, 0.0951, 0.1371]) -Greedy action tensor([ 1.8971, -0.4572, -0.4035, 0.5807]) tensor([0.6834, 0.0649, 0.0685, 0.1832]) -Greedy action tensor([ 1.2929, -0.1506, -0.1080, 0.3927]) tensor([0.5294, 0.1250, 0.1304, 0.2152]) -Greedy action tensor([ 1.3084, -0.4934, -0.1337, 0.2158]) tensor([0.5758, 0.0950, 0.1361, 0.1931]) -Greedy action tensor([ 1.2877, -0.2046, -0.3283, 0.3068]) tensor([0.5560, 0.1250, 0.1105, 0.2085]) -Greedy action tensor([ 1.2471, -0.6676, 0.0643, 0.1811]) tensor([0.5561, 0.0820, 0.1704, 0.1915]) -Greedy action tensor([ 1.2327, -0.1289, -0.0452, -0.1092]) tensor([0.5567, 0.1427, 0.1551, 0.1455]) -Greedy action tensor([ 1.5241, -0.4821, -0.5494, 0.2539]) tensor([0.6489, 0.0873, 0.0816, 0.1822]) -Greedy action tensor([ 1.4560, -0.4731, -0.3170, 0.3724]) tensor([0.6048, 0.0879, 0.1027, 0.2047]) -Greedy action tensor([ 2.6990, -1.2479, -0.3762, 0.8338]) tensor([0.8194, 0.0158, 0.0378, 0.1269]) -Greedy action tensor([ 1.2586, -0.0430, -0.1649, 0.1369]) tensor([0.5439, 0.1480, 0.1310, 0.1771]) -Greedy action tensor([ 1.2840, -0.4589, -0.0864, 0.1626]) tensor([0.5698, 0.0997, 0.1447, 0.1857]) -Greedy action tensor([ 1.2745, -0.2582, -0.8275, 0.4411]) tensor([0.5641, 0.1218, 0.0689, 0.2451]) -Greedy action tensor([ 1.4307, -0.6055, -0.3459, 0.8504]) tensor([0.5378, 0.0702, 0.0910, 0.3010]) -Greedy action tensor([ 1.4027, -0.2645, -0.4791, 1.0095]) tensor([0.4960, 0.0936, 0.0756, 0.3348]) -Greedy action tensor([ 0.7956, -0.2567, 0.1093, 0.1674]) tensor([0.4191, 0.1463, 0.2110, 0.2236]) -Greedy action tensor([ 1.4677, -0.5453, -0.2436, 0.6102]) tensor([0.5752, 0.0768, 0.1039, 0.2440]) -Greedy action tensor([ 2.1307, -0.5853, -0.4867, 0.4614]) tensor([0.7533, 0.0498, 0.0550, 0.1419]) -Greedy action tensor([ 1.3313, -0.3506, -0.7246, 0.2626]) tensor([0.6033, 0.1122, 0.0772, 0.2072]) -Greedy action tensor([ 0.8247, -0.4637, -0.4246, 0.1814]) tensor([0.4789, 0.1320, 0.1373, 0.2517]) -Greedy action tensor([ 1.3309, -0.2774, -0.4272, 0.2943]) tensor([0.5789, 0.1159, 0.0998, 0.2053]) -Greedy action tensor([ 2.4184, -1.1080, -0.2156, 1.0162]) tensor([0.7422, 0.0218, 0.0533, 0.1826]) -Greedy action tensor([ 7.7956e-01, -6.7775e-01, 3.3301e-04, -6.5808e-01]) tensor([0.5184, 0.1207, 0.2378, 0.1231]) -Greedy action tensor([ 0.7433, -0.2425, -0.4298, 0.1457]) tensor([0.4479, 0.1671, 0.1386, 0.2464]) -Greedy action tensor([ 0.8175, 0.2441, -0.1574, 0.4843]) tensor([0.3763, 0.2121, 0.1420, 0.2697]) -Greedy action tensor([ 2.5724, -0.9963, -0.3566, 1.1553]) tensor([0.7553, 0.0213, 0.0404, 0.1831]) -Greedy action tensor([ 1.7825, -0.5863, -0.4850, 0.4793]) tensor([0.6808, 0.0637, 0.0705, 0.1849]) -Greedy action tensor([ 1.1865, -0.6232, -0.4156, 0.2337]) tensor([0.5712, 0.0935, 0.1151, 0.2203]) -Greedy action tensor([ 1.5397, -0.8595, 0.0846, 0.4786]) tensor([0.5987, 0.0544, 0.1397, 0.2072]) -Greedy action tensor([ 1.3845, -0.3187, -0.7141, 0.4292]) tensor([0.5919, 0.1078, 0.0726, 0.2277]) -Greedy action tensor([ 1.7493, -0.8557, 0.0166, 0.4905]) tensor([0.6516, 0.0482, 0.1152, 0.1851]) -Greedy action tensor([ 1.7937, -0.6499, -0.2384, 0.4487]) tensor([0.6764, 0.0587, 0.0886, 0.1762]) -Greedy action tensor([ 1.4779, -0.3474, -0.5387, 0.5715]) tensor([0.5888, 0.0949, 0.0784, 0.2379]) -Greedy action tensor([ 2.0758, -0.1573, -0.6428, 0.6763]) tensor([0.7043, 0.0755, 0.0465, 0.1738]) -Greedy action tensor([ 1.5027, -0.0903, -0.5620, 0.4680]) tensor([0.5933, 0.1206, 0.0753, 0.2108]) -Greedy action tensor([ 1.4739, -0.2451, -0.7802, 0.3791]) tensor([0.6177, 0.1107, 0.0648, 0.2067]) -Greedy action tensor([ 1.1921, 0.1980, -0.4826, 0.4590]) tensor([0.4907, 0.1816, 0.0919, 0.2357]) -Greedy action tensor([ 1.8164, -0.7645, -0.2266, 0.4913]) tensor([0.6797, 0.0515, 0.0881, 0.1807]) -Greedy action tensor([ 2.0232, -0.2606, -0.1331, 0.1630]) tensor([0.7282, 0.0742, 0.0843, 0.1133]) -Greedy action tensor([ 1.2651, -0.4663, -0.0755, 0.4237]) tensor([0.5348, 0.0947, 0.1400, 0.2306]) -Greedy action tensor([ 1.2400, -0.0926, -0.4699, -0.0201]) tensor([0.5786, 0.1526, 0.1047, 0.1641]) -Greedy action tensor([ 1.4859, -0.3587, -1.0343, 0.3871]) tensor([0.6362, 0.1006, 0.0512, 0.2120]) -Greedy action tensor([ 1.3321, 0.0509, -0.4655, -0.1087]) tensor([0.5952, 0.1653, 0.0986, 0.1409]) -Greedy action tensor([ 1.4304, -0.5861, 0.2562, -0.1362]) tensor([0.6057, 0.0806, 0.1872, 0.1265]) -Greedy action tensor([ 0.6293, 0.0322, -0.0261, -0.0892]) tensor([0.3911, 0.2152, 0.2031, 0.1906]) -Greedy action tensor([ 1.4308, -0.7827, -0.3213, 0.6556]) tensor([0.5736, 0.0627, 0.0995, 0.2642]) -Greedy action tensor([ 0.3565, -0.2904, -0.4242, 0.5472]) tensor([0.3133, 0.1641, 0.1435, 0.3791]) -Greedy action tensor([ 1.5288, -0.3764, -0.4071, 0.5243]) tensor([0.6027, 0.0897, 0.0870, 0.2207]) -Greedy action tensor([ 0.7357, -0.0110, -0.1187, 0.2486]) tensor([0.3978, 0.1885, 0.1693, 0.2444]) -Greedy action tensor([ 2.1975, -1.3001, -0.1079, 0.3928]) tensor([0.7725, 0.0234, 0.0770, 0.1271]) -Greedy action tensor([ 1.9205, -0.5316, -0.4780, 0.8638]) tensor([0.6559, 0.0565, 0.0596, 0.2280]) -Greedy action tensor([ 2.2442, 0.3217, -0.1376, 0.4101]) tensor([0.7151, 0.1046, 0.0661, 0.1142]) -Greedy action tensor([ 1.4689, -0.4725, -0.1193, 0.1697]) tensor([0.6171, 0.0886, 0.1261, 0.1683]) -Greedy action tensor([ 0.8544, -0.5883, -0.0320, 0.0088]) tensor([0.4813, 0.1137, 0.1984, 0.2066]) -Greedy action tensor([ 1.3617, -0.5051, -0.5555, 0.4373]) tensor([0.5888, 0.0910, 0.0866, 0.2336]) -Greedy action tensor([ 1.3586, -0.2039, -0.1184, 0.1362]) tensor([0.5772, 0.1210, 0.1318, 0.1700]) -Greedy action tensor([ 1.9131, -0.0682, -0.5639, 0.5037]) tensor([0.6820, 0.0941, 0.0573, 0.1666]) -Greedy action tensor([ 0.5428, -0.2359, 0.0691, 0.3250]) tensor([0.3465, 0.1590, 0.2158, 0.2787]) -Greedy action tensor([ 0.5356, -0.3866, -0.0442, 0.1612]) tensor([0.3780, 0.1503, 0.2117, 0.2600]) -Greedy action tensor([ 1.7486, -0.7986, -0.0513, 0.4709]) tensor([0.6569, 0.0514, 0.1086, 0.1831]) -Greedy action tensor([ 1.3069, -0.7493, -0.1583, 0.0734]) tensor([0.6060, 0.0775, 0.1400, 0.1765]) -Greedy action tensor([ 1.0595, -0.1704, -0.7954, 0.3408]) tensor([0.5165, 0.1510, 0.0808, 0.2517]) -Greedy action tensor([ 1.6622, -0.2642, -0.8931, 0.2119]) tensor([0.6859, 0.0999, 0.0533, 0.1609]) -Greedy action tensor([ 1.0750, -0.2733, -0.7161, 0.4791]) tensor([0.5057, 0.1313, 0.0843, 0.2787]) -Greedy action tensor([ 1.6233, -0.4839, -0.6060, 0.3535]) tensor([0.6622, 0.0805, 0.0713, 0.1860]) -Greedy action tensor([ 0.9393, -0.5267, -0.0798, 0.4820]) tensor([0.4495, 0.1038, 0.1622, 0.2845]) -Greedy action tensor([ 1.4835, -0.5860, -0.3390, 0.8195]) tensor([0.5547, 0.0700, 0.0897, 0.2856]) -Greedy action tensor([ 1.8548, -0.5644, -0.5349, 0.3278]) tensor([0.7154, 0.0637, 0.0656, 0.1554]) -Greedy action tensor([ 1.8087, -0.4885, -0.4696, 0.5823]) tensor([0.6683, 0.0672, 0.0685, 0.1960]) -Greedy action tensor([ 1.3863, -0.5311, -0.2347, 0.6818]) tensor([0.5438, 0.0799, 0.1075, 0.2688]) -Greedy action tensor([ 1.3993, -0.1965, -0.2216, 0.2332]) tensor([0.5841, 0.1184, 0.1155, 0.1820]) -Greedy action tensor([ 1.5519, -0.6796, -0.4707, 0.5553]) tensor([0.6216, 0.0667, 0.0822, 0.2294]) -Greedy action tensor([ 1.3193, -0.2548, -0.3599, 0.3877]) tensor([0.5594, 0.1159, 0.1043, 0.2203]) -Greedy action tensor([ 0.7108, -0.3981, -0.0776, -0.3126]) tensor([0.4664, 0.1539, 0.2120, 0.1676]) -Greedy action tensor([ 0.6175, -0.5045, 0.0523, -0.6723]) tensor([0.4610, 0.1501, 0.2620, 0.1269]) -Greedy action tensor([ 0.9843, -0.7035, -0.0829, -0.6503]) tensor([0.5801, 0.1073, 0.1995, 0.1131]) -Greedy action tensor([ 0.8891, -0.3770, -0.1017, -0.1122]) tensor([0.4949, 0.1395, 0.1838, 0.1818]) -Greedy action tensor([ 0.4529, 0.0264, -0.1122, -0.3085]) tensor([0.3720, 0.2429, 0.2114, 0.1737]) -Greedy action tensor([ 0.4795, -0.1994, -0.1494, -0.1559]) tensor([0.3891, 0.1973, 0.2074, 0.2061]) -Greedy action tensor([ 0.9099, 0.0378, -0.2069, -0.2552]) tensor([0.4861, 0.2032, 0.1591, 0.1516]) -Greedy action tensor([ 0.7203, -0.0578, -0.0329, 0.0441]) tensor([0.4101, 0.1883, 0.1931, 0.2085]) -Greedy action tensor([ 0.6817, -0.6503, -0.0042, -0.1447]) tensor([0.4535, 0.1197, 0.2284, 0.1985]) -Greedy action tensor([ 2.7427e-01, -2.2085e-01, -2.0082e-04, -1.4511e-01]) tensor([0.3304, 0.2014, 0.2511, 0.2172]) -Greedy action tensor([ 0.7411, -0.3900, -0.1086, -0.2348]) tensor([0.4701, 0.1517, 0.2010, 0.1772]) -Greedy action tensor([ 0.8997, -0.5218, -0.0224, -0.9158]) tensor([0.5550, 0.1339, 0.2207, 0.0903]) -Greedy action tensor([ 0.7202, -0.2695, 0.0171, -0.2950]) tensor([0.4486, 0.1667, 0.2221, 0.1626]) -Greedy action tensor([ 0.8069, -0.2855, -0.0079, -0.5341]) tensor([0.4903, 0.1644, 0.2171, 0.1282]) -Greedy action tensor([ 0.6930, -0.3795, -0.0602, -0.1797]) tensor([0.4483, 0.1534, 0.2111, 0.1873]) -Greedy action tensor([ 0.3613, 0.0264, 0.0518, -0.5808]) tensor([0.3522, 0.2520, 0.2585, 0.1373]) -Greedy action tensor([ 1.3071, -1.1854, 0.1084, -0.6720]) tensor([0.6568, 0.0543, 0.1981, 0.0908]) -Greedy action tensor([ 0.4011, -0.3294, -0.0614, -0.2029]) tensor([0.3762, 0.1812, 0.2369, 0.2056]) -Greedy action tensor([ 0.6012, -0.4526, -0.1053, -0.3942]) tensor([0.4522, 0.1576, 0.2231, 0.1671]) -Greedy action tensor([ 0.9625, -0.3460, -0.1165, -0.3275]) tensor([0.5304, 0.1433, 0.1803, 0.1460]) -Greedy action tensor([ 0.8990, -0.5820, -0.0728, -0.5348]) tensor([0.5422, 0.1233, 0.2052, 0.1293]) -Greedy action tensor([ 0.5160, -0.3103, -0.0475, -0.3665]) tensor([0.4131, 0.1808, 0.2351, 0.1709]) -Greedy action tensor([ 0.7709, -0.4874, 0.0750, -0.1554]) tensor([0.4590, 0.1304, 0.2289, 0.1818]) -Greedy action tensor([ 1.1289, -0.8164, -0.0588, -0.5477]) tensor([0.6117, 0.0874, 0.1865, 0.1144]) -Greedy action tensor([ 0.4106, -0.2095, -0.0046, -0.2633]) tensor([0.3693, 0.1986, 0.2438, 0.1882]) -Greedy action tensor([ 0.7397, -0.2532, 0.0472, -0.4737]) tensor([0.4613, 0.1709, 0.2308, 0.1371]) -Greedy action tensor([ 0.7613, -0.3801, 0.0879, -0.3443]) tensor([0.4629, 0.1478, 0.2361, 0.1532]) -Greedy action tensor([ 1.0359, -0.5438, 0.1631, -0.4819]) tensor([0.5426, 0.1118, 0.2267, 0.1189]) -Greedy action tensor([ 0.7203, -0.6275, -0.2067, -0.4552]) tensor([0.5091, 0.1323, 0.2015, 0.1571]) -Greedy action tensor([ 0.3861, -0.1025, 0.0980, -0.2941]) tensor([0.3485, 0.2138, 0.2612, 0.1765]) -Greedy action tensor([ 0.6838, -0.3195, -0.2261, -0.2834]) tensor([0.4652, 0.1706, 0.1873, 0.1769]) -Greedy action tensor([ 0.6010, -0.1387, -0.1141, -0.3748]) tensor([0.4267, 0.2037, 0.2087, 0.1608]) -Greedy action tensor([ 0.4770, -0.1592, 0.0981, -0.0902]) tensor([0.3596, 0.1903, 0.2462, 0.2039]) -Greedy action tensor([ 0.4871, -0.2651, -0.1203, -0.1141]) tensor([0.3900, 0.1838, 0.2125, 0.2138]) -Greedy action tensor([ 0.4102, -0.3876, 0.0475, -0.6048]) tensor([0.3986, 0.1795, 0.2774, 0.1445]) -Greedy action tensor([ 0.4158, -0.1713, -0.0463, -0.1255]) tensor([0.3613, 0.2008, 0.2276, 0.2103]) -Greedy action tensor([ 0.3499, 0.3660, -0.0367, 0.0235]) tensor([0.2926, 0.2974, 0.1988, 0.2111]) -Greedy action tensor([ 0.8431, -0.6306, 0.0016, -0.3553]) tensor([0.5097, 0.1168, 0.2197, 0.1538]) -Greedy action tensor([ 0.6545, -0.3250, 0.0038, -0.0504]) tensor([0.4182, 0.1570, 0.2182, 0.2066]) -Greedy action tensor([ 0.6383, -0.4258, -0.1652, -0.2331]) tensor([0.4522, 0.1560, 0.2025, 0.1892]) -Greedy action tensor([ 5.7669e-01, 1.6403e-01, -1.3039e-06, -3.2389e-01]) tensor([0.3802, 0.2517, 0.2136, 0.1545]) -Greedy action tensor([ 0.8085, -0.4074, 0.0296, -0.2612]) tensor([0.4766, 0.1413, 0.2187, 0.1635]) -Greedy action tensor([ 1.2766, -1.3282, 0.0738, -0.6300]) tensor([0.6567, 0.0485, 0.1972, 0.0976]) -Greedy action tensor([ 0.4524, -0.1020, -0.0107, -0.1014]) tensor([0.3599, 0.2067, 0.2265, 0.2069]) -Greedy action tensor([ 0.5997, -0.6266, -0.0118, -0.3092]) tensor([0.4466, 0.1310, 0.2423, 0.1800]) -Greedy action tensor([ 0.5829, -0.0072, -0.0130, 0.0231]) tensor([0.3736, 0.2071, 0.2059, 0.2134]) -Greedy action tensor([ 0.3927, -0.5428, 0.0329, -0.4162]) tensor([0.3944, 0.1548, 0.2752, 0.1756]) -Greedy action tensor([ 0.6278, -0.1386, -0.0015, -0.2615]) tensor([0.4152, 0.1929, 0.2213, 0.1706]) -Greedy action tensor([ 0.2689, -0.0809, -0.1373, -0.3701]) tensor([0.3450, 0.2431, 0.2298, 0.1821]) -Greedy action tensor([ 0.7025, -0.2968, -0.1643, -0.1764]) tensor([0.4538, 0.1671, 0.1907, 0.1884]) -Greedy action tensor([ 0.8468, -0.8902, -0.0424, -0.4163]) tensor([0.5348, 0.0941, 0.2198, 0.1512]) -Greedy action tensor([ 0.4070, -0.1372, -0.0057, -0.2615]) tensor([0.3630, 0.2107, 0.2403, 0.1860]) -Greedy action tensor([ 0.9166, -0.6686, 0.0331, -0.3651]) tensor([0.5275, 0.1081, 0.2180, 0.1464]) -Greedy action tensor([ 0.6562, -0.5211, -0.1642, -0.1235]) tensor([0.4531, 0.1396, 0.1995, 0.2078]) -Greedy action tensor([ 0.6301, -0.2097, -0.1150, -0.0985]) tensor([0.4186, 0.1807, 0.1987, 0.2020]) -Greedy action tensor([ 0.7130, -0.1562, 0.0125, -0.3076]) tensor([0.4394, 0.1842, 0.2181, 0.1583]) -Greedy action tensor([ 0.2944, -0.2998, -0.2124, -0.3899]) tensor([0.3761, 0.2076, 0.2266, 0.1897]) -Greedy action tensor([ 1.1540, -0.8019, 0.2341, -0.4880]) tensor([0.5768, 0.0816, 0.2299, 0.1117]) -Greedy action tensor([ 0.8782, -0.8165, -0.0422, -0.4881]) tensor([0.5444, 0.1000, 0.2168, 0.1388]) -Greedy action tensor([ 0.7205, -0.4764, -0.0184, -0.1790]) tensor([0.4574, 0.1382, 0.2184, 0.1860]) -Greedy action tensor([ 0.7146, -0.2535, -0.1830, -0.3517]) tensor([0.4691, 0.1782, 0.1912, 0.1615]) -Greedy action tensor([ 0.6098, -0.1504, -0.0147, -0.0350]) tensor([0.3956, 0.1850, 0.2118, 0.2076]) -Greedy action tensor([ 0.7675, -0.2144, 0.0497, -0.6977]) tensor([0.4777, 0.1789, 0.2330, 0.1104]) -Greedy action tensor([ 0.4518, 0.0954, -0.2369, 0.1388]) tensor([0.3409, 0.2387, 0.1712, 0.2493]) -Greedy action tensor([ 0.6432, -0.2793, 0.0164, -0.2337]) tensor([0.4259, 0.1693, 0.2276, 0.1772]) -Greedy action tensor([ 0.7757, -0.0710, 0.1679, -0.3602]) tensor([0.4358, 0.1869, 0.2373, 0.1400]) -Greedy action tensor([ 1.2613, -1.1225, 0.1133, -0.6463]) tensor([0.6419, 0.0592, 0.2037, 0.0953]) -Greedy action tensor([ 0.7094, -0.1571, -0.0152, -0.2104]) tensor([0.4341, 0.1825, 0.2103, 0.1730]) -Greedy action tensor([ 0.6921, -0.4310, -0.2153, -0.4425]) tensor([0.4877, 0.1586, 0.1968, 0.1568]) -Greedy action tensor([ 0.8968, -0.9069, 0.0421, -0.3448]) tensor([0.5322, 0.0876, 0.2264, 0.1538]) -Greedy action tensor([ 0.5903, -0.4356, 0.0121, -0.6221]) tensor([0.4511, 0.1617, 0.2530, 0.1342]) -Greedy action tensor([ 0.8528, -0.7802, 0.1195, -0.6434]) tensor([0.5264, 0.1028, 0.2528, 0.1179]) -Greedy action tensor([ 0.9267, -0.6661, 0.1207, -0.5391]) tensor([0.5317, 0.1081, 0.2375, 0.1228]) -Greedy action tensor([ 0.1430, 0.3613, -0.1285, 0.0909]) tensor([0.2528, 0.3145, 0.1927, 0.2400]) -Greedy action tensor([ 1.0965, -0.8461, 0.1354, -0.7372]) tensor([0.5932, 0.0850, 0.2269, 0.0948]) -Greedy action tensor([ 0.4075, -0.3736, 0.1222, -0.7865]) tensor([0.3980, 0.1822, 0.2992, 0.1206]) -Greedy action tensor([ 0.3897, -0.0556, 0.0381, -0.3192]) tensor([0.3526, 0.2259, 0.2480, 0.1735]) -Greedy action tensor([ 0.5329, -0.0610, -0.1290, -0.0662]) tensor([0.3821, 0.2110, 0.1971, 0.2099]) -Greedy action tensor([ 0.6566, -0.7353, 0.2260, -0.8134]) tensor([0.4698, 0.1168, 0.3054, 0.1080]) -Greedy action tensor([ 0.3488, -0.2432, -0.0580, -0.2210]) tensor([0.3591, 0.1987, 0.2391, 0.2031]) -Greedy action tensor([ 0.3358, -0.0761, 0.0072, -0.1946]) tensor([0.3366, 0.2230, 0.2424, 0.1981]) -Greedy action tensor([ 0.6242, 0.1793, -0.0645, -0.5139]) tensor([0.4059, 0.2601, 0.2039, 0.1301]) -Greedy action tensor([ 0.9704, -0.1038, 0.3808, 0.2238]) tensor([0.4219, 0.1441, 0.2340, 0.2000]) -Greedy action tensor([-0.9083, -0.6183, -0.1854, -0.1103]) tensor([0.1511, 0.2019, 0.3113, 0.3356]) -Greedy action tensor([ 0.7366, 0.1255, -0.0674, 1.1078]) tensor([0.2907, 0.1578, 0.1301, 0.4214]) -Greedy action tensor([ 1.7448, -0.0580, 1.5023, 1.0595]) tensor([0.4076, 0.0672, 0.3198, 0.2054]) -Greedy action tensor([-0.6680, -0.5658, -0.3568, 0.1472]) tensor([0.1745, 0.1932, 0.2381, 0.3942]) -Greedy action tensor([ 0.0345, -0.6638, 0.3142, 0.4988]) tensor([0.2267, 0.1128, 0.2999, 0.3606]) -Greedy action tensor([ 0.4659, -0.3790, 1.7789, -1.1385]) tensor([0.1870, 0.0803, 0.6951, 0.0376]) -Greedy action tensor([ 0.8454, 0.6958, -0.1994, 1.1773]) tensor([0.2773, 0.2388, 0.0975, 0.3864]) -Greedy action tensor([ 1.2213, 0.3429, -0.7267, -0.4572]) tensor([0.5732, 0.2381, 0.0817, 0.1070]) -Greedy action tensor([ 0.7466, -0.5623, 1.0776, 1.4930]) tensor([0.2096, 0.0566, 0.2918, 0.4421]) -Greedy action tensor([ 0.3948, -1.0333, 1.1243, 1.3646]) tensor([0.1680, 0.0403, 0.3485, 0.4432]) -Greedy action tensor([ 0.5881, -1.7161, 0.3506, 0.9156]) tensor([0.3053, 0.0305, 0.2407, 0.4235]) -Greedy action tensor([-0.0971, 0.1073, -0.2485, 1.6852]) tensor([0.1107, 0.1359, 0.0952, 0.6582]) -Greedy action tensor([0.5896, 0.5648, 0.5734, 1.0165]) tensor([0.2226, 0.2172, 0.2190, 0.3412]) -Greedy action tensor([0.4764, 0.7915, 0.0579, 0.0746]) tensor([0.2705, 0.3706, 0.1780, 0.1810]) -Greedy action tensor([ 0.5450, -0.9525, -0.5426, 0.8652]) tensor([0.3404, 0.0761, 0.1147, 0.4688]) -Greedy action tensor([-1.0316, -1.8726, 1.4033, 0.2951]) tensor([0.0602, 0.0260, 0.6870, 0.2268]) -Greedy action tensor([ 0.7339, 0.6580, 0.0326, -0.4296]) tensor([0.3656, 0.3389, 0.1813, 0.1142]) -Greedy action tensor([ 1.2540, -0.3257, -0.4300, 0.6241]) tensor([0.5197, 0.1071, 0.0965, 0.2768]) -Greedy action tensor([ 1.6724, -0.7296, -0.0903, 0.4385]) tensor([0.6438, 0.0583, 0.1105, 0.1874]) -Greedy action tensor([ 0.3257, -1.0421, -0.0823, 0.9966]) tensor([0.2580, 0.0657, 0.1716, 0.5047]) -Greedy action tensor([-0.0413, -0.7221, 0.3747, 1.4800]) tensor([0.1316, 0.0666, 0.1994, 0.6024]) -Greedy action tensor([ 0.3344, 0.1763, -0.2321, 0.8554]) tensor([0.2436, 0.2080, 0.1383, 0.4102]) -Greedy action tensor([ 0.4382, -0.4118, -0.1130, 1.5510]) tensor([0.1982, 0.0847, 0.1142, 0.6030]) -Greedy action tensor([ 1.0024e+00, 2.3940e-01, 8.9484e-04, -5.6662e-01]) tensor([0.4898, 0.2284, 0.1799, 0.1020]) -Greedy action tensor([ 0.2822, -1.1856, 1.6271, 0.0283]) tensor([0.1711, 0.0394, 0.6567, 0.1327]) -Greedy action tensor([-0.0227, -0.4751, -0.7909, 0.1443]) tensor([0.3047, 0.1938, 0.1413, 0.3601]) -Greedy action tensor([-0.3832, -0.6664, -1.8085, 0.5037]) tensor([0.2262, 0.1704, 0.0544, 0.5491]) -Greedy action tensor([-0.6423, -0.3224, 0.7671, 0.0534]) tensor([0.1180, 0.1625, 0.4830, 0.2366]) -Greedy action tensor([-0.8545, 0.2348, 1.0846, -1.0633]) tensor([0.0852, 0.2533, 0.5924, 0.0692]) -Greedy action tensor([-0.3666, -0.9123, 0.6131, -0.8965]) tensor([0.2070, 0.1199, 0.5513, 0.1218]) -Greedy action tensor([ 0.2681, -0.2105, 0.0182, -0.2019]) tensor([0.3307, 0.2049, 0.2576, 0.2067]) -Greedy action tensor([ 1.0904, -0.8656, 0.8742, 0.5366]) tensor([0.3965, 0.0561, 0.3194, 0.2279]) -Greedy action tensor([ 2.0924, -0.0103, 1.5828, 1.1616]) tensor([0.4723, 0.0577, 0.2838, 0.1862]) -Greedy action tensor([ 0.6295, 0.6826, -0.2125, -0.5820]) tensor([0.3593, 0.3789, 0.1548, 0.1070]) -Greedy action tensor([0.6010, 0.1507, 0.7511, 0.4997]) tensor([0.2701, 0.1721, 0.3138, 0.2440]) -Greedy action tensor([ 0.6735, -0.9453, -0.2875, 1.0688]) tensor([0.3262, 0.0646, 0.1248, 0.4844]) -Greedy action tensor([ 0.6808, -1.3738, -0.3319, 0.2194]) tensor([0.4713, 0.0604, 0.1712, 0.2971]) -Greedy action tensor([-1.4876e+00, -1.2348e+00, 1.2871e+00, 5.7578e-04]) tensor([0.0440, 0.0566, 0.7048, 0.1947]) -Greedy action tensor([ 0.7363, -0.8527, 1.2256, 0.8247]) tensor([0.2546, 0.0520, 0.4153, 0.2781]) -Greedy action tensor([ 0.8588, 0.0687, -0.0964, 0.3904]) tensor([0.4058, 0.1841, 0.1561, 0.2540]) -Greedy action tensor([ 0.9042, 0.7609, -1.1265, 0.5216]) tensor([0.3732, 0.3233, 0.0490, 0.2545]) -Greedy action tensor([-0.2803, -1.0211, 1.3712, -0.8631]) tensor([0.1379, 0.0658, 0.7193, 0.0770]) -Greedy action tensor([ 0.5832, -0.6822, 0.0745, 0.0530]) tensor([0.4045, 0.1141, 0.2433, 0.2381]) -Greedy action tensor([ 0.7569, -0.4370, 1.0542, 1.9394]) tensor([0.1692, 0.0513, 0.2277, 0.5519]) -Greedy action tensor([0.5890, 0.1752, 0.2186, 0.4488]) tensor([0.3105, 0.2053, 0.2144, 0.2699]) -Greedy action tensor([ 0.0705, -0.6040, 0.2320, 0.7764]) tensor([0.2123, 0.1082, 0.2495, 0.4300]) -Greedy action tensor([ 0.4999, -0.7410, 0.1198, 0.9434]) tensor([0.2832, 0.0819, 0.1936, 0.4413]) -Greedy action tensor([-0.0887, 0.2828, -0.9006, 1.1613]) tensor([0.1566, 0.2271, 0.0696, 0.5467]) -Greedy action tensor([-1.4898, -0.1834, 1.7746, -1.6489]) tensor([0.0315, 0.1165, 0.8251, 0.0269]) -Greedy action tensor([ 1.9055, -0.5096, 0.8388, 1.7285]) tensor([0.4403, 0.0393, 0.1515, 0.3689]) -Greedy action tensor([ 1.1808, 0.0048, -0.8193, 0.9231]) tensor([0.4511, 0.1392, 0.0610, 0.3487]) -Greedy action tensor([-0.8268, -0.2146, -0.0185, 1.5444]) tensor([0.0633, 0.1167, 0.1420, 0.6779]) -Greedy action tensor([-0.5906, 0.3269, 0.0795, 0.0010]) tensor([0.1377, 0.3446, 0.2690, 0.2487]) -Greedy action tensor([ 0.9857, -1.5667, 1.6409, 0.1199]) tensor([0.2921, 0.0227, 0.5623, 0.1229]) -Greedy action tensor([-0.1461, -0.3961, 0.0486, 0.4512]) tensor([0.2079, 0.1619, 0.2525, 0.3777]) -Greedy action tensor([ 0.1668, -0.6197, -0.4482, 1.3962]) tensor([0.1847, 0.0841, 0.0998, 0.6314]) -Greedy action tensor([ 0.4678, -1.1443, 0.6370, 0.5351]) tensor([0.2896, 0.0578, 0.3430, 0.3097]) -Greedy action tensor([ 0.2856, -0.9937, 0.6631, 1.0020]) tensor([0.2090, 0.0582, 0.3049, 0.4279]) -Greedy action tensor([ 0.0921, 0.0469, -0.7212, 0.4713]) tensor([0.2590, 0.2476, 0.1149, 0.3785]) -Greedy action tensor([ 1.5779, -1.0373, 0.6167, -0.1430]) tensor([0.6118, 0.0448, 0.2340, 0.1095]) -Greedy action tensor([ 1.2312, 0.7176, 1.4280, -0.1264]) tensor([0.3254, 0.1947, 0.3962, 0.0837]) -Greedy action tensor([ 0.0200, 0.0174, 0.3400, -1.2789]) tensor([0.2742, 0.2735, 0.3776, 0.0748]) -Greedy action tensor([-0.6929, -0.4244, -0.4308, -0.8695]) tensor([0.2249, 0.2942, 0.2923, 0.1885]) -Greedy action tensor([-0.0434, -0.6788, 0.2825, -0.5354]) tensor([0.2836, 0.1502, 0.3928, 0.1734]) -Greedy action tensor([-0.1245, -0.0864, 0.3513, 0.4169]) tensor([0.1863, 0.1936, 0.2999, 0.3202]) -Greedy action tensor([0.1661, 0.2861, 0.4786, 0.3239]) tensor([0.2143, 0.2417, 0.2930, 0.2510]) -Greedy action tensor([-0.4642, -0.5769, 0.6567, -0.4016]) tensor([0.1660, 0.1483, 0.5091, 0.1767]) -Greedy action tensor([-1.9541, -0.5956, 0.4999, -0.4619]) tensor([0.0477, 0.1855, 0.5548, 0.2120]) -Greedy action tensor([ 0.6657, -0.7923, 1.1340, 1.0909]) tensor([0.2294, 0.0534, 0.3664, 0.3509]) -Greedy action tensor([0.0521, 0.3307, 0.8556, 0.3107]) tensor([0.1710, 0.2259, 0.3818, 0.2214]) -Greedy action tensor([ 0.3501, -0.8779, 1.1553, -0.0826]) tensor([0.2393, 0.0701, 0.5354, 0.1553]) -Greedy action tensor([ 0.8941, -0.1541, 1.2890, 0.1298]) tensor([0.3030, 0.1062, 0.4497, 0.1411]) -Greedy action tensor([ 0.8238, 0.1427, -0.9864, 0.7944]) tensor([0.3787, 0.1916, 0.0620, 0.3677]) -Greedy action tensor([-1.0049, -0.9847, -0.2365, -0.8307]) tensor([0.1863, 0.1901, 0.4018, 0.2218]) -Greedy action tensor([ 0.8386, -0.6399, 0.5142, 0.8208]) tensor([0.3409, 0.0777, 0.2465, 0.3349]) -Greedy action tensor([-0.5392, -1.3456, -0.4134, 0.2981]) tensor([0.2045, 0.0913, 0.2319, 0.4724]) -Greedy action tensor([-0.9088, -1.2724, 0.0756, 0.8973]) tensor([0.0956, 0.0665, 0.2559, 0.5820]) -Greedy action tensor([-0.3517, 0.0407, 0.8663, 0.5222]) tensor([0.1211, 0.1793, 0.4094, 0.2902]) -Greedy action tensor([ 0.1656, -1.4348, 0.4905, 1.0562]) tensor([0.1991, 0.0402, 0.2756, 0.4851]) -Greedy action tensor([-1.0719, -0.1966, 0.2468, 0.7397]) tensor([0.0754, 0.1810, 0.2820, 0.4616]) -Greedy action tensor([-1.5843, -0.5216, 0.4801, 0.0755]) tensor([0.0587, 0.1699, 0.4627, 0.3087]) -Greedy action tensor([-1.8876, -0.4920, 0.7400, -0.0528]) tensor([0.0398, 0.1606, 0.5505, 0.2491]) -Greedy action tensor([-1.7546, -0.2692, 0.6163, -0.0458]) tensor([0.0462, 0.2040, 0.4946, 0.2551]) -Greedy action tensor([-1.7578, -0.5141, 0.6229, 0.0137]) tensor([0.0473, 0.1639, 0.5110, 0.2779]) -Greedy action tensor([-1.6291, -0.3699, 0.5654, 0.1047]) tensor([0.0522, 0.1839, 0.4685, 0.2955]) -Greedy action tensor([-1.9212, -0.4081, 0.6491, -0.1647]) tensor([0.0410, 0.1861, 0.5356, 0.2374]) -Greedy action tensor([-1.9045, -0.3878, 0.6546, -0.1456]) tensor([0.0412, 0.1876, 0.5321, 0.2391]) -Greedy action tensor([-1.8732, -0.4299, 0.6302, -0.1431]) tensor([0.0433, 0.1833, 0.5292, 0.2442]) -Greedy action tensor([-1.9138, -0.4323, 0.6438, -0.1603]) tensor([0.0415, 0.1827, 0.5359, 0.2398]) -Greedy action tensor([-1.7652, -0.4594, 0.5774, -0.0847]) tensor([0.0489, 0.1803, 0.5085, 0.2623]) -Greedy action tensor([-1.7438, -0.1604, 0.5401, -0.0501]) tensor([0.0473, 0.2306, 0.4646, 0.2575]) -Greedy action tensor([-1.2997, -0.5378, 0.7622, 0.7760]) tensor([0.0527, 0.1129, 0.4143, 0.4201]) -Greedy action tensor([-0.3483, -0.0511, 0.1499, 0.2103]) tensor([0.1742, 0.2345, 0.2867, 0.3046]) -Greedy action tensor([-1.9316, -0.4189, 0.6595, -0.1691]) tensor([0.0405, 0.1837, 0.5400, 0.2358]) -Greedy action tensor([-1.9183, -0.5902, 1.1206, 0.2234]) tensor([0.0293, 0.1104, 0.6111, 0.2492]) -Greedy action tensor([-1.9416, -0.4431, 0.6654, -0.1780]) tensor([0.0402, 0.1800, 0.5452, 0.2346]) -Greedy action tensor([-1.8649, -0.5001, 0.6214, -0.1346]) tensor([0.0443, 0.1734, 0.5323, 0.2499]) -Greedy action tensor([-1.5797, -0.1711, 0.6266, 0.1568]) tensor([0.0504, 0.2061, 0.4576, 0.2860]) -Greedy action tensor([-1.9334, -0.4376, 0.6616, -0.1704]) tensor([0.0405, 0.1808, 0.5426, 0.2361]) -Greedy action tensor([-1.9276, -0.4257, 0.6563, -0.1678]) tensor([0.0407, 0.1829, 0.5397, 0.2367]) -Greedy action tensor([-0.6293, -0.6412, 0.2860, -0.0973]) tensor([0.1616, 0.1597, 0.4036, 0.2751]) -Greedy action tensor([-1.0435, -0.2309, 0.3366, -0.1739]) tensor([0.1040, 0.2344, 0.4134, 0.2482]) -Greedy action tensor([-0.5505, -0.3830, 0.8468, 1.3763]) tensor([0.0764, 0.0903, 0.3089, 0.5245]) -Greedy action tensor([-1.2500, 0.4968, 0.2324, 0.2688]) tensor([0.0637, 0.3652, 0.2804, 0.2908]) -Greedy action tensor([-1.7661, -0.2374, 0.5496, -0.0935]) tensor([0.0475, 0.2189, 0.4809, 0.2528]) -Greedy action tensor([-1.5110, 0.5537, 0.3389, 0.0997]) tensor([0.0494, 0.3893, 0.3141, 0.2472]) -Greedy action tensor([-1.9081, -0.4393, 0.6474, -0.1620]) tensor([0.0417, 0.1813, 0.5376, 0.2393]) -Greedy action tensor([-1.4971, -0.2232, 0.6002, 0.1108]) tensor([0.0565, 0.2018, 0.4598, 0.2819]) -Greedy action tensor([-1.4631, -0.5907, 0.4340, 0.1126]) tensor([0.0671, 0.1607, 0.4476, 0.3246]) -Greedy action tensor([-1.9190, -0.4205, 0.6601, -0.1559]) tensor([0.0408, 0.1827, 0.5384, 0.2381]) -Greedy action tensor([-1.8766, -0.4408, 0.6513, -0.1080]) tensor([0.0424, 0.1781, 0.5310, 0.2485]) -Greedy action tensor([-1.7548, -0.0583, 0.5175, -0.1111]) tensor([0.0469, 0.2557, 0.4548, 0.2426]) -Greedy action tensor([-1.5222, -0.5328, 0.4403, 0.0679]) tensor([0.0636, 0.1712, 0.4530, 0.3122]) -Greedy action tensor([-0.3725, -0.1693, 0.1897, 0.2433]) tensor([0.1715, 0.2101, 0.3009, 0.3175]) -Greedy action tensor([-1.9258, -0.4509, 0.6585, -0.1710]) tensor([0.0410, 0.1791, 0.5431, 0.2369]) -Greedy action tensor([-1.2387, -0.5523, 0.3760, 0.5041]) tensor([0.0729, 0.1447, 0.3662, 0.4162]) -Greedy action tensor([-0.8011, -0.3374, -0.2019, -0.0497]) tensor([0.1531, 0.2435, 0.2788, 0.3246]) -Greedy action tensor([-1.5213, -0.4477, 0.4478, 0.0708]) tensor([0.0625, 0.1828, 0.4476, 0.3070]) -Greedy action tensor([-1.8225, -0.4531, 0.6010, -0.1263]) tensor([0.0461, 0.1815, 0.5207, 0.2516]) -Greedy action tensor([-1.7195, -0.1549, 0.5728, 0.0586]) tensor([0.0463, 0.2214, 0.4583, 0.2741]) -Greedy action tensor([-1.8113, -0.4548, 0.6108, -0.0668]) tensor([0.0457, 0.1775, 0.5152, 0.2616]) -Greedy action tensor([-1.8605, -0.4595, 0.7304, -0.0579]) tensor([0.0409, 0.1659, 0.5453, 0.2479]) -Greedy action tensor([-1.5274, -0.5417, 0.4852, 0.1132]) tensor([0.0613, 0.1642, 0.4585, 0.3160]) -Greedy action tensor([-1.0772, 0.3740, 0.5728, 0.6895]) tensor([0.0612, 0.2614, 0.3189, 0.3584]) -Greedy action tensor([-1.9441, -0.4488, 0.6672, -0.1792]) tensor([0.0401, 0.1790, 0.5464, 0.2344]) -Greedy action tensor([-1.7038, -0.5256, 0.5414, -0.0308]) tensor([0.0526, 0.1708, 0.4965, 0.2801]) -Greedy action tensor([-0.6064, -0.5420, 1.3070, 0.9192]) tensor([0.0744, 0.0793, 0.5042, 0.3421]) -Greedy action tensor([-0.7125, -0.6742, 0.2115, 0.5014]) tensor([0.1262, 0.1311, 0.3179, 0.4248]) -Greedy action tensor([-1.8933, -0.4024, 0.6311, -0.1494]) tensor([0.0423, 0.1878, 0.5280, 0.2419]) -Greedy action tensor([-1.8442, -0.4466, 0.6300, -0.0917]) tensor([0.0441, 0.1783, 0.5233, 0.2543]) -Greedy action tensor([-1.9369, -0.4411, 0.6640, -0.1751]) tensor([0.0404, 0.1802, 0.5442, 0.2352]) -Greedy action tensor([-1.7675, -0.3106, 0.5643, -0.0510]) tensor([0.0473, 0.2029, 0.4867, 0.2631]) -Greedy action tensor([-1.3593, 0.7778, 0.2396, 0.2574]) tensor([0.0514, 0.4355, 0.2543, 0.2588]) -Greedy action tensor([-1.5173, -0.4618, 0.6586, 0.3904]) tensor([0.0515, 0.1479, 0.4536, 0.3469]) -Greedy action tensor([-1.3700, -0.5993, 0.4338, 0.1868]) tensor([0.0715, 0.1546, 0.4344, 0.3394]) -Greedy action tensor([-1.9021, -0.4395, 0.6424, -0.1694]) tensor([0.0422, 0.1821, 0.5372, 0.2386]) -Greedy action tensor([-0.9788, -0.5774, 0.3331, 0.4207]) tensor([0.0975, 0.1456, 0.3619, 0.3950]) -Greedy action tensor([-1.8369, -0.3649, 0.6186, -0.1083]) tensor([0.0442, 0.1925, 0.5146, 0.2488]) -Greedy action tensor([-1.8917, -0.3248, 0.6211, -0.1482]) tensor([0.0419, 0.2009, 0.5174, 0.2397]) -Greedy action tensor([-1.8891, -0.4227, 0.6339, -0.1593]) tensor([0.0427, 0.1849, 0.5318, 0.2406]) -Greedy action tensor([-1.2974, -0.4092, 0.7470, -0.6492]) tensor([0.0765, 0.1860, 0.5911, 0.1463]) -Greedy action tensor([-1.8128, -0.3244, 0.6494, -0.0713]) tensor([0.0437, 0.1937, 0.5130, 0.2495]) -Greedy action tensor([-1.7948, -0.2615, 0.5707, -0.1400]) tensor([0.0465, 0.2153, 0.4950, 0.2432]) -Greedy action tensor([-1.6205, -0.3901, 0.5147, -0.1376]) tensor([0.0578, 0.1980, 0.4893, 0.2549]) -Greedy action tensor([-1.3279, -0.3545, 0.5869, 0.5552]) tensor([0.0588, 0.1556, 0.3990, 0.3866]) -Greedy action tensor([ 0.7975, 1.2746, -0.0205, 0.5341]) tensor([0.2617, 0.4217, 0.1155, 0.2011]) -Greedy action tensor([-1.9404, -0.4366, 0.6628, -0.1782]) tensor([0.0403, 0.1812, 0.5440, 0.2346]) -Greedy action tensor([-1.0001, -0.5994, 0.3095, 0.9483]) tensor([0.0757, 0.1130, 0.2803, 0.5310]) -Greedy action tensor([-1.8915, -0.4223, 0.6368, -0.1494]) tensor([0.0424, 0.1842, 0.5313, 0.2421]) -Greedy action tensor([-1.9282, -0.4435, 0.6603, -0.1703]) tensor([0.0408, 0.1800, 0.5427, 0.2365]) -Greedy action tensor([-1.8185, -0.5851, 0.9805, 0.1173]) tensor([0.0360, 0.1235, 0.5911, 0.2493]) -Greedy action tensor([-1.7737, -0.4498, 0.6486, 0.0310]) tensor([0.0452, 0.1700, 0.5098, 0.2749]) -Greedy action tensor([-0.9901, -0.3129, 0.3299, -0.5254]) tensor([0.1204, 0.2371, 0.4509, 0.1917]) -Greedy action tensor([-0.7699, -0.1768, 0.8444, 1.2053]) tensor([0.0665, 0.1203, 0.3340, 0.4792]) -Greedy action tensor([-1.8632, -0.2603, 0.6025, -0.1587]) tensor([0.0430, 0.2138, 0.5066, 0.2366]) -Greedy action tensor([-1.5395, 0.0069, 0.6082, 0.1900]) tensor([0.0503, 0.2359, 0.4305, 0.2833]) -Greedy action tensor([-1.7804, -0.5139, 0.5987, -0.0824]) tensor([0.0481, 0.1705, 0.5188, 0.2626]) -Greedy action tensor([-1.6092, -0.2615, 0.5413, 0.0326]) tensor([0.0538, 0.2069, 0.4617, 0.2776]) -Greedy action tensor([-1.8460, -0.3477, 0.5932, -0.1128]) tensor([0.0443, 0.1980, 0.5073, 0.2504]) -Greedy action tensor([-1.4922, -0.2912, 0.4680, 0.1703]) tensor([0.0599, 0.1991, 0.4253, 0.3158]) -Greedy action tensor([ 1.4667, -0.5273, -0.4132, 0.4123]) tensor([0.6108, 0.0832, 0.0932, 0.2128]) -Greedy action tensor([ 1.4695, -0.1414, -1.0874, 0.3440]) tensor([0.6243, 0.1247, 0.0484, 0.2026]) -Greedy action tensor([ 1.4378, -0.4177, -0.5151, 0.3410]) tensor([0.6127, 0.0958, 0.0869, 0.2046]) -Greedy action tensor([ 2.5878, -0.8455, -0.4243, 0.5813]) tensor([0.8224, 0.0265, 0.0405, 0.1106]) -Greedy action tensor([ 0.7358, -0.0716, 0.0276, -0.1588]) tensor([0.4260, 0.1900, 0.2098, 0.1741]) -Greedy action tensor([ 1.7288, -0.2508, -0.1831, 0.6883]) tensor([0.6101, 0.0843, 0.0902, 0.2155]) -Greedy action tensor([ 1.6880, -0.7601, -0.2992, 0.6403]) tensor([0.6352, 0.0549, 0.0871, 0.2228]) -Greedy action tensor([ 1.9551, -0.4627, -0.5561, 0.0705]) tensor([0.7563, 0.0674, 0.0614, 0.1149]) -Greedy action tensor([ 1.0133, -0.6556, -0.3981, 0.3901]) tensor([0.5080, 0.0957, 0.1238, 0.2724]) -Greedy action tensor([ 1.6198, -0.4156, -0.2938, 0.5040]) tensor([0.6227, 0.0813, 0.0919, 0.2040]) -Greedy action tensor([ 1.2911, -0.1684, -0.5298, 0.6068]) tensor([0.5267, 0.1224, 0.0853, 0.2657]) -Greedy action tensor([ 0.7684, -0.1491, -0.0717, 0.0425]) tensor([0.4320, 0.1726, 0.1865, 0.2090]) -Greedy action tensor([ 1.0951, -0.3028, -0.1844, -0.0251]) tensor([0.5401, 0.1335, 0.1502, 0.1762]) -Greedy action tensor([ 0.7669, -0.0977, -0.2067, 0.2076]) tensor([0.4218, 0.1777, 0.1593, 0.2411]) -Greedy action tensor([ 0.7215, -0.3522, -0.2378, 0.2458]) tensor([0.4262, 0.1457, 0.1633, 0.2649]) -Greedy action tensor([ 1.1641, -0.4127, -0.2770, 0.2107]) tensor([0.5468, 0.1130, 0.1294, 0.2108]) -Greedy action tensor([ 1.2267, -0.6120, -0.3483, 0.6312]) tensor([0.5216, 0.0829, 0.1080, 0.2875]) -Greedy action tensor([ 1.4829, -0.4158, -0.3352, 0.7026]) tensor([0.5649, 0.0846, 0.0917, 0.2589]) -Greedy action tensor([ 1.6765, -0.4463, -0.2320, 0.2083]) tensor([0.6674, 0.0799, 0.0990, 0.1537]) -Greedy action tensor([ 1.9985, -0.8576, -0.4501, 0.5217]) tensor([0.7287, 0.0419, 0.0630, 0.1664]) -Greedy action tensor([ 1.4235, -0.7903, -0.0662, 0.1294]) tensor([0.6215, 0.0679, 0.1401, 0.1704]) -Greedy action tensor([ 1.1031, 0.0529, -0.0376, 0.0817]) tensor([0.4927, 0.1724, 0.1575, 0.1774]) -Greedy action tensor([ 1.4769, -0.3697, -0.3443, 0.2923]) tensor([0.6152, 0.0971, 0.0996, 0.1882]) -Greedy action tensor([ 1.9064, -0.9806, -0.3455, 0.8350]) tensor([0.6651, 0.0371, 0.0700, 0.2278]) -Greedy action tensor([ 1.5164, -0.7031, -0.1866, -0.2409]) tensor([0.6834, 0.0743, 0.1245, 0.1179]) -Greedy action tensor([ 1.2883, -0.2180, -0.1360, 0.4110]) tensor([0.5324, 0.1181, 0.1281, 0.2214]) -Greedy action tensor([ 1.7663, -0.2190, -0.5449, 0.4572]) tensor([0.6638, 0.0912, 0.0658, 0.1793]) -Greedy action tensor([ 1.0205, -0.1879, -0.1237, -0.0081]) tensor([0.5064, 0.1513, 0.1613, 0.1811]) -Greedy action tensor([ 0.8621, -0.3885, -0.0303, 0.0423]) tensor([0.4681, 0.1340, 0.1917, 0.2062]) -Greedy action tensor([ 1.1959, -0.7321, -0.1516, 0.2213]) tensor([0.5610, 0.0816, 0.1458, 0.2117]) -Greedy action tensor([ 0.7260, -0.1707, 0.0558, -0.1375]) tensor([0.4271, 0.1742, 0.2185, 0.1801]) -Greedy action tensor([ 1.6551, -0.5736, 0.0927, 0.7743]) tensor([0.5775, 0.0622, 0.1210, 0.2393]) -Greedy action tensor([ 1.4103, -0.2816, -0.3364, 0.2222]) tensor([0.6012, 0.1107, 0.1048, 0.1832]) -Greedy action tensor([ 0.8691, -0.3941, -0.5420, 0.4925]) tensor([0.4519, 0.1278, 0.1102, 0.3101]) -Greedy action tensor([ 1.5901, -1.2962, -0.3272, 0.2591]) tensor([0.6817, 0.0380, 0.1002, 0.1801]) -Greedy action tensor([ 0.8888, -0.7048, 0.0483, 0.1754]) tensor([0.4707, 0.0956, 0.2031, 0.2306]) -Greedy action tensor([ 1.2073, -0.4990, -0.1000, 0.3197]) tensor([0.5366, 0.0974, 0.1452, 0.2209]) -Greedy action tensor([ 1.9495, -0.3468, -0.1477, 0.9734]) tensor([0.6249, 0.0629, 0.0767, 0.2355]) -Greedy action tensor([ 1.7270, -0.3041, -0.5802, 0.7783]) tensor([0.6181, 0.0811, 0.0615, 0.2393]) -Greedy action tensor([ 1.7298, -0.2534, -0.3151, -0.0127]) tensor([0.6934, 0.0954, 0.0897, 0.1214]) -Greedy action tensor([ 0.7133, 0.1385, -0.4193, -0.0457]) tensor([0.4250, 0.2392, 0.1369, 0.1989]) -Greedy action tensor([ 2.0912, -0.8510, -0.3458, 0.3974]) tensor([0.7553, 0.0398, 0.0660, 0.1388]) -Greedy action tensor([ 1.1371, -0.4032, -0.2985, 0.2228]) tensor([0.5396, 0.1157, 0.1284, 0.2163]) -Greedy action tensor([ 2.0696, -1.3995, 0.0819, 0.1980]) tensor([0.7564, 0.0236, 0.1036, 0.1164]) -Greedy action tensor([ 1.9290, -0.5347, -0.3623, 0.4424]) tensor([0.7080, 0.0603, 0.0716, 0.1601]) -Greedy action tensor([ 1.8381, -0.4925, -0.2360, 1.0683]) tensor([0.5931, 0.0577, 0.0745, 0.2747]) -Greedy action tensor([ 1.8640, -0.1154, -0.2182, 0.5825]) tensor([0.6492, 0.0897, 0.0809, 0.1802]) -Greedy action tensor([ 1.3649, -0.1624, -0.0285, 0.2562]) tensor([0.5570, 0.1209, 0.1383, 0.1838]) -Greedy action tensor([ 1.4416, -0.4934, -0.6052, 0.6652]) tensor([0.5768, 0.0833, 0.0745, 0.2654]) -Greedy action tensor([ 1.5201, -0.8291, 0.1441, 0.2837]) tensor([0.6103, 0.0583, 0.1542, 0.1773]) -Greedy action tensor([ 1.9309, -0.3523, -0.4650, 0.4812]) tensor([0.7004, 0.0714, 0.0638, 0.1644]) -Greedy action tensor([ 1.1411, -0.6992, -0.1652, 0.2156]) tensor([0.5477, 0.0870, 0.1483, 0.2171]) -Greedy action tensor([ 1.4163, 0.0222, -0.0578, 0.3111]) tensor([0.5530, 0.1372, 0.1266, 0.1831]) -Greedy action tensor([ 1.1005, -0.2349, -0.2229, -0.1766]) tensor([0.5531, 0.1455, 0.1472, 0.1542]) -Greedy action tensor([ 0.6304, -0.4310, -0.1403, 0.0027]) tensor([0.4269, 0.1477, 0.1975, 0.2279]) -Greedy action tensor([ 1.1217, -0.4795, 0.0278, -0.2004]) tensor([0.5546, 0.1118, 0.1857, 0.1478]) -Greedy action tensor([ 1.1981, -0.2176, -0.3794, 0.0107]) tensor([0.5701, 0.1384, 0.1177, 0.1739]) -Greedy action tensor([ 0.8883, -0.2568, -0.2406, 0.3954]) tensor([0.4440, 0.1413, 0.1436, 0.2712]) -Greedy action tensor([ 1.5260, -0.6485, -0.4324, 0.7933]) tensor([0.5762, 0.0655, 0.0813, 0.2770]) -Greedy action tensor([ 1.4495, -0.2141, -0.4590, 0.1728]) tensor([0.6185, 0.1172, 0.0917, 0.1725]) -Greedy action tensor([ 1.5892, -0.5074, -0.3890, 0.1821]) tensor([0.6640, 0.0816, 0.0918, 0.1626]) -Greedy action tensor([ 2.0110, -0.6676, -0.4484, 0.4605]) tensor([0.7319, 0.0503, 0.0626, 0.1553]) -Greedy action tensor([ 1.3316, -0.5710, -0.3544, 0.5249]) tensor([0.5615, 0.0838, 0.1040, 0.2506]) -Greedy action tensor([ 1.3867, -0.0368, -0.3339, 0.0515]) tensor([0.5942, 0.1431, 0.1063, 0.1563]) -Greedy action tensor([ 1.3268, 0.0708, -0.3612, 0.0493]) tensor([0.5719, 0.1629, 0.1058, 0.1594]) -Greedy action tensor([ 0.9747, 0.0416, 0.2222, -0.3611]) tensor([0.4700, 0.1849, 0.2215, 0.1236]) -Greedy action tensor([ 1.5581, -0.6312, 0.0616, -0.0973]) tensor([0.6549, 0.0733, 0.1466, 0.1251]) -Greedy action tensor([ 1.7447, -1.2024, -0.1128, 0.5421]) tensor([0.6627, 0.0348, 0.1034, 0.1991]) -Greedy action tensor([ 0.6696, -0.0606, -0.2496, 0.2737]) tensor([0.3916, 0.1887, 0.1562, 0.2636]) -Greedy action tensor([ 0.6018, -0.5371, 0.1022, 0.0206]) tensor([0.4022, 0.1288, 0.2441, 0.2249]) -Greedy action tensor([ 1.2073, -0.4471, -0.2906, 0.7229]) tensor([0.4924, 0.0942, 0.1101, 0.3034]) -Greedy action tensor([ 1.7125, -0.3714, -0.2570, 0.4909]) tensor([0.6415, 0.0798, 0.0895, 0.1891]) -Greedy action tensor([ 1.2202, -0.1719, -0.7491, 0.1781]) tensor([0.5745, 0.1428, 0.0802, 0.2026]) -Greedy action tensor([ 1.5999, -0.2749, -0.3376, 0.1844]) tensor([0.6492, 0.0996, 0.0935, 0.1576]) -Greedy action tensor([ 1.4881, -0.7679, -0.6250, 1.0842]) tensor([0.5282, 0.0553, 0.0638, 0.3527]) -Greedy action tensor([ 1.6107, -0.4002, -0.0348, 0.3933]) tensor([0.6162, 0.0825, 0.1189, 0.1824]) -Greedy action tensor([ 1.2029, -0.0098, -0.0388, 0.3292]) tensor([0.4991, 0.1484, 0.1442, 0.2083]) -Greedy action tensor([ 1.4979, -0.0562, -0.4444, 0.1030]) tensor([0.6240, 0.1319, 0.0895, 0.1547]) -Greedy action tensor([ 1.6711, -0.5104, -0.2437, 0.3528]) tensor([0.6545, 0.0739, 0.0965, 0.1751]) -Greedy action tensor([ 1.1478, -0.2233, -0.0623, 0.1847]) tensor([0.5171, 0.1313, 0.1542, 0.1974]) -Greedy action tensor([ 1.8716, -0.2856, -0.1493, 0.8923]) tensor([0.6159, 0.0712, 0.0816, 0.2313]) -Greedy action tensor([ 0.5135, 0.0273, -0.0422, -0.3752]) tensor([0.3846, 0.2365, 0.2207, 0.1582]) -Greedy action tensor([ 0.7413, -0.5166, -0.0085, -0.4872]) tensor([0.4879, 0.1387, 0.2305, 0.1428]) -Greedy action tensor([ 0.5764, -0.6520, -0.0014, -0.4471]) tensor([0.4518, 0.1323, 0.2535, 0.1624]) -Greedy action tensor([ 0.8258, -0.5493, -0.1509, -0.3602]) tensor([0.5168, 0.1307, 0.1946, 0.1579]) -Greedy action tensor([ 0.4676, -0.1277, 0.0464, -0.2343]) tensor([0.3699, 0.2040, 0.2428, 0.1834]) -Greedy action tensor([ 0.7469, -0.4854, 0.0178, -0.1039]) tensor([0.4543, 0.1325, 0.2191, 0.1940]) -Greedy action tensor([ 0.4010, -0.5013, 0.0181, -0.1691]) tensor([0.3769, 0.1529, 0.2570, 0.2131]) -Greedy action tensor([ 0.6431, -0.3374, -0.1020, -0.1964]) tensor([0.4382, 0.1644, 0.2080, 0.1893]) -Greedy action tensor([ 0.8512, -0.1201, -0.0753, -0.6475]) tensor([0.5005, 0.1895, 0.1982, 0.1118]) -Greedy action tensor([ 1.1491, -0.9984, 0.0901, -0.5613]) tensor([0.6081, 0.0710, 0.2109, 0.1099]) -Greedy action tensor([ 0.8144, -0.4207, 0.0888, -0.0763]) tensor([0.4576, 0.1331, 0.2215, 0.1878]) -Greedy action tensor([ 0.7656, -0.5433, 0.0197, -0.4328]) tensor([0.4887, 0.1320, 0.2318, 0.1474]) -Greedy action tensor([ 0.3484, 0.1637, 0.0131, -0.2813]) tensor([0.3248, 0.2700, 0.2322, 0.1730]) -Greedy action tensor([ 1.1619, -0.4690, 0.0519, -0.6829]) tensor([0.5940, 0.1163, 0.1958, 0.0939]) -Greedy action tensor([ 0.7079, -0.6609, 0.0791, -0.1848]) tensor([0.4551, 0.1158, 0.2427, 0.1864]) -Greedy action tensor([ 0.9426, -0.3819, -0.0410, -0.5914]) tensor([0.5389, 0.1433, 0.2015, 0.1162]) -Greedy action tensor([ 0.0307, 0.4144, -0.2027, -0.5995]) tensor([0.2637, 0.3871, 0.2088, 0.1404]) -Greedy action tensor([ 0.6478, -0.1382, 0.0694, -0.4869]) tensor([0.4277, 0.1949, 0.2399, 0.1375]) -Greedy action tensor([ 0.9820, -1.0049, 0.0147, -0.3568]) tensor([0.5620, 0.0771, 0.2136, 0.1473]) -Greedy action tensor([ 0.8247, -0.1677, -0.0635, -0.2581]) tensor([0.4715, 0.1748, 0.1940, 0.1597]) -Greedy action tensor([0.2233, 0.0924, 0.1000, 0.3385]) tensor([0.2575, 0.2259, 0.2276, 0.2889]) -Greedy action tensor([ 0.8100, -0.4414, 0.0075, -0.4781]) tensor([0.4975, 0.1423, 0.2230, 0.1372]) -Greedy action tensor([ 1.1786, -0.7042, 0.0536, -0.4722]) tensor([0.5993, 0.0912, 0.1945, 0.1150]) -Greedy action tensor([ 0.6563, -0.3067, -0.0280, -0.0563]) tensor([0.4208, 0.1606, 0.2123, 0.2063]) -Greedy action tensor([ 0.8387, -0.7925, -0.0346, -0.3111]) tensor([0.5182, 0.1014, 0.2164, 0.1641]) -Greedy action tensor([ 0.2718, -0.2499, -0.0261, -0.1889]) tensor([0.3371, 0.2001, 0.2502, 0.2126]) -Greedy action tensor([ 0.9164, -1.2123, 0.0428, -0.5718]) tensor([0.5675, 0.0675, 0.2369, 0.1281]) -Greedy action tensor([ 0.6539, -0.3386, -0.0066, -0.3651]) tensor([0.4448, 0.1649, 0.2298, 0.1606]) -Greedy action tensor([ 0.5309, -0.3114, 0.0352, -0.1023]) tensor([0.3890, 0.1675, 0.2370, 0.2065]) -Greedy action tensor([ 0.6508, -0.1152, -0.0557, -0.2891]) tensor([0.4257, 0.1979, 0.2101, 0.1663]) -Greedy action tensor([ 1.3040, -0.2670, -0.2926, -0.2721]) tensor([0.6183, 0.1285, 0.1253, 0.1279]) -Greedy action tensor([ 0.0721, -0.0058, -0.1570, -0.2106]) tensor([0.2878, 0.2663, 0.2289, 0.2170]) -Greedy action tensor([ 0.6663, -0.7378, -0.0323, -0.3113]) tensor([0.4719, 0.1159, 0.2347, 0.1775]) -Greedy action tensor([ 0.4253, -0.1440, -0.0721, -0.0486]) tensor([0.3576, 0.2024, 0.2174, 0.2226]) -Greedy action tensor([-0.0755, -0.0804, -0.0072, -0.0394]) tensor([0.2437, 0.2426, 0.2610, 0.2527]) -Greedy action tensor([ 0.9311, -0.5303, -0.0570, -0.5335]) tensor([0.5449, 0.1264, 0.2028, 0.1260]) -Greedy action tensor([ 0.6084, -0.3043, -0.1416, -0.0913]) tensor([0.4218, 0.1694, 0.1993, 0.2095]) -Greedy action tensor([ 0.5719, -0.1711, 0.1214, -0.5199]) tensor([0.4084, 0.1943, 0.2603, 0.1371]) -Greedy action tensor([ 0.7972, -0.5035, 0.0781, -0.4457]) tensor([0.4883, 0.1330, 0.2379, 0.1409]) -Greedy action tensor([ 0.5031, -0.1330, 0.0100, -0.0794]) tensor([0.3706, 0.1962, 0.2263, 0.2070]) -Greedy action tensor([ 0.6974, 0.2250, -0.1282, -0.1371]) tensor([0.4007, 0.2498, 0.1755, 0.1739]) -Greedy action tensor([ 0.7247, -0.4318, -0.0360, -0.1065]) tensor([0.4510, 0.1419, 0.2108, 0.1964]) -Greedy action tensor([ 0.7008, -0.5744, -0.0278, -0.3700]) tensor([0.4751, 0.1327, 0.2293, 0.1628]) -Greedy action tensor([ 0.5395, 0.1999, -0.1475, 0.1854]) tensor([0.3428, 0.2441, 0.1725, 0.2406]) -Greedy action tensor([ 1.0594, -0.6571, -0.1849, -0.3962]) tensor([0.5879, 0.1056, 0.1694, 0.1371]) -Greedy action tensor([ 0.5632, -0.0638, 0.0675, -0.2606]) tensor([0.3873, 0.2069, 0.2359, 0.1699]) -Greedy action tensor([ 0.5616, -0.3673, -0.1525, -0.1074]) tensor([0.4172, 0.1648, 0.2043, 0.2137]) -Greedy action tensor([ 0.6613, -0.4576, -0.1731, -0.2495]) tensor([0.4623, 0.1510, 0.2007, 0.1859]) -Greedy action tensor([ 0.8938, -0.6078, -0.0841, -0.3290]) tensor([0.5282, 0.1177, 0.1987, 0.1555]) -Greedy action tensor([ 0.7350, -0.3617, -0.0789, -0.3710]) tensor([0.4744, 0.1584, 0.2102, 0.1570]) -Greedy action tensor([ 0.6810, -0.1359, 0.0128, -0.3658]) tensor([0.4338, 0.1916, 0.2224, 0.1523]) -Greedy action tensor([ 0.7374, -0.5283, -0.0763, -0.5708]) tensor([0.5011, 0.1413, 0.2221, 0.1354]) -Greedy action tensor([ 0.5066, -0.2188, 0.0760, -0.3923]) tensor([0.3935, 0.1905, 0.2558, 0.1602]) -Greedy action tensor([ 0.9826, -0.5917, -0.3121, -0.4375]) tensor([0.5804, 0.1202, 0.1590, 0.1403]) -Greedy action tensor([ 0.4975, -0.0306, 0.0354, -0.1820]) tensor([0.3668, 0.2163, 0.2310, 0.1859]) -Greedy action tensor([ 0.3903, -0.0508, -0.0130, -0.3737]) tensor([0.3601, 0.2316, 0.2406, 0.1677]) -Greedy action tensor([ 0.3883, 0.0323, -0.0969, 0.1543]) tensor([0.3218, 0.2254, 0.1981, 0.2547]) -Greedy action tensor([ 0.5374, -0.3892, -0.1070, -0.3618]) tensor([0.4296, 0.1701, 0.2255, 0.1748]) -Greedy action tensor([ 0.8625, -0.7706, -0.1465, -0.3334]) tensor([0.5369, 0.1049, 0.1958, 0.1624]) -Greedy action tensor([ 0.6834, -0.6143, -0.2896, -0.3263]) tensor([0.4962, 0.1355, 0.1875, 0.1808]) -Greedy action tensor([ 0.2267, -0.0153, -0.0357, -0.2505]) tensor([0.3150, 0.2473, 0.2423, 0.1955]) -Greedy action tensor([ 0.9325, -0.6046, 0.0741, -0.4740]) tensor([0.5308, 0.1141, 0.2250, 0.1300]) -Greedy action tensor([ 0.2754, 0.0783, -0.1109, -0.1116]) tensor([0.3145, 0.2582, 0.2137, 0.2136]) -Greedy action tensor([ 0.3729, -0.2170, -0.0961, -0.2389]) tensor([0.3673, 0.2036, 0.2298, 0.1992]) -Greedy action tensor([ 0.6800, -0.3989, -0.1107, -0.3936]) tensor([0.4683, 0.1592, 0.2124, 0.1601]) -Greedy action tensor([ 0.9016, -0.5070, -0.1081, -0.4299]) tensor([0.5339, 0.1305, 0.1945, 0.1410]) -Greedy action tensor([ 1.1998, -0.7966, 0.0480, -0.5028]) tensor([0.6120, 0.0831, 0.1934, 0.1115]) -Greedy action tensor([ 0.9305, -0.8704, 0.1034, -0.5020]) tensor([0.5431, 0.0897, 0.2375, 0.1297]) -Greedy action tensor([ 0.8849, -0.5865, 0.0478, -0.4299]) tensor([0.5178, 0.1189, 0.2242, 0.1391]) -Greedy action tensor([ 0.8001, -0.3317, 0.0062, -0.2193]) tensor([0.4683, 0.1510, 0.2117, 0.1690]) -Greedy action tensor([ 0.8643, -0.5558, -0.0238, -0.4970]) tensor([0.5237, 0.1266, 0.2155, 0.1342]) -Greedy action tensor([0.4971, 0.1162, 0.0021, 0.0517]) tensor([0.3409, 0.2329, 0.2078, 0.2184]) -Greedy action tensor([ 0.7293, -0.2470, -0.1070, -0.3235]) tensor([0.4632, 0.1745, 0.2007, 0.1616]) -Greedy action tensor([ 0.4776, -0.1365, -0.0787, -0.4635]) tensor([0.3992, 0.2161, 0.2289, 0.1558]) -Greedy action tensor([ 0.5470, -0.4866, -0.1433, -0.2320]) tensor([0.4318, 0.1536, 0.2165, 0.1981]) -Greedy action tensor([ 0.7206, -0.5923, -0.1521, -0.2325]) tensor([0.4825, 0.1298, 0.2016, 0.1860]) -Greedy action tensor([ 0.7570, -0.6711, -0.0798, -0.3390]) tensor([0.4982, 0.1195, 0.2158, 0.1665]) -Greedy action tensor([ 0.8117, -0.1079, -0.0889, -0.3605]) tensor([0.4729, 0.1885, 0.1922, 0.1464]) -Greedy action tensor([ 0.9197, -0.5497, -0.1511, -0.3027]) tensor([0.5355, 0.1232, 0.1835, 0.1577]) -Greedy action tensor([ 0.6457, -0.2012, -0.0463, -0.1316]) tensor([0.4186, 0.1795, 0.2095, 0.1924]) -Greedy action tensor([ 0.7294, -0.3750, -0.0732, -0.2489]) tensor([0.4639, 0.1538, 0.2079, 0.1744]) -Greedy action tensor([ 0.8041, -0.6337, -0.0145, -0.6654]) tensor([0.5240, 0.1244, 0.2311, 0.1205]) -Greedy action tensor([-0.1203, -0.0262, -0.9103, 0.2656]) tensor([0.2486, 0.2731, 0.1128, 0.3656]) -Greedy action tensor([-0.4731, -0.8029, -0.5002, 1.4401]) tensor([0.1056, 0.0760, 0.1028, 0.7156]) -Greedy action tensor([-0.1285, -2.5093, -0.3300, -0.4748]) tensor([0.3821, 0.0353, 0.3123, 0.2702]) -Greedy action tensor([ 0.1885, -0.6418, -0.4172, 0.6799]) tensor([0.2765, 0.1205, 0.1509, 0.4520]) -Greedy action tensor([1.1771, 0.3922, 0.7389, 0.6595]) tensor([0.3708, 0.1691, 0.2392, 0.2209]) -Greedy action tensor([2.0197, 0.1526, 0.1398, 0.1215]) tensor([0.6863, 0.1061, 0.1047, 0.1028]) -Greedy action tensor([ 0.1584, -1.8974, 1.5779, -0.5264]) tensor([0.1734, 0.0222, 0.7170, 0.0874]) -Greedy action tensor([ 0.4333, -0.0730, -1.2476, 0.0169]) tensor([0.4084, 0.2462, 0.0761, 0.2693]) -Greedy action tensor([-0.0967, 0.8849, 0.0992, -0.4432]) tensor([0.1788, 0.4772, 0.2175, 0.1265]) -Greedy action tensor([-0.1278, -1.6665, 1.3799, -0.2963]) tensor([0.1521, 0.0326, 0.6868, 0.1285]) -Greedy action tensor([-0.1453, -0.5582, -0.7423, 0.4222]) tensor([0.2515, 0.1664, 0.1384, 0.4436]) -Greedy action tensor([ 0.5931, -1.8174, -0.3496, 0.9181]) tensor([0.3492, 0.0314, 0.1361, 0.4834]) -Greedy action tensor([-0.1292, -0.3545, -0.6564, 1.3330]) tensor([0.1492, 0.1191, 0.0880, 0.6437]) -Greedy action tensor([-2.1448, -0.2028, 0.3331, -0.2173]) tensor([0.0374, 0.2605, 0.4453, 0.2568]) -Greedy action tensor([ 0.9718, -0.1535, 0.3011, -0.3546]) tensor([0.4759, 0.1544, 0.2433, 0.1263]) -Greedy action tensor([-1.1930, -0.6498, 0.4001, -0.1356]) tensor([0.0951, 0.1637, 0.4676, 0.2737]) -Greedy action tensor([1.1255, 0.3969, 0.9756, 0.5517]) tensor([0.3440, 0.1660, 0.2961, 0.1938]) -Greedy action tensor([-0.6640, -0.4100, -0.3339, -0.5548]) tensor([0.2085, 0.2688, 0.2901, 0.2326]) -Greedy action tensor([ 1.4246, -1.1598, 1.1008, 0.9815]) tensor([0.4097, 0.0309, 0.2964, 0.2630]) -Greedy action tensor([ 0.5958, -0.2144, 0.7659, 1.3397]) tensor([0.2112, 0.0939, 0.2504, 0.4444]) -Greedy action tensor([-0.1165, -1.6227, -0.6040, 1.1626]) tensor([0.1842, 0.0408, 0.1131, 0.6619]) -Greedy action tensor([-0.4627, -0.5103, 1.6560, 0.3670]) tensor([0.0796, 0.0759, 0.6621, 0.1824]) -Greedy action tensor([ 0.3049, -1.3730, -0.9716, 0.0733]) tensor([0.4427, 0.0827, 0.1235, 0.3511]) -Greedy action tensor([-1.0655, 0.2036, -0.3546, -0.0244]) tensor([0.1061, 0.3774, 0.2160, 0.3005]) -Greedy action tensor([-0.0277, -0.9169, -0.5508, 0.0570]) tensor([0.3234, 0.1329, 0.1917, 0.3520]) -Greedy action tensor([ 0.2468, -0.0249, -0.0329, -0.5154]) tensor([0.3350, 0.2553, 0.2533, 0.1563]) -Greedy action tensor([ 0.4655, -0.5532, 0.0817, 1.1671]) tensor([0.2463, 0.0889, 0.1678, 0.4969]) -Greedy action tensor([-0.1507, -1.5088, 0.9158, -0.2516]) tensor([0.1974, 0.0508, 0.5734, 0.1784]) -Greedy action tensor([ 1.3622, 0.3416, -0.0166, 1.5371]) tensor([0.3567, 0.1286, 0.0898, 0.4249]) -Greedy action tensor([-0.2411, 0.2038, -0.3704, -0.5254]) tensor([0.2386, 0.3723, 0.2096, 0.1795]) -Greedy action tensor([-0.8153, -0.1106, -0.3087, 0.3521]) tensor([0.1266, 0.2562, 0.2102, 0.4070]) -Greedy action tensor([ 0.2468, 0.6566, 0.5353, -0.8234]) tensor([0.2390, 0.3601, 0.3190, 0.0820]) -Greedy action tensor([ 0.1531, -0.1160, -0.7199, 0.5047]) tensor([0.2775, 0.2121, 0.1159, 0.3945]) -Greedy action tensor([ 1.3390, -0.7450, 0.7497, 0.5911]) tensor([0.4646, 0.0578, 0.2577, 0.2199]) -Greedy action tensor([-0.2101, 0.3679, 0.9413, 0.3203]) tensor([0.1308, 0.2332, 0.4137, 0.2223]) -Greedy action tensor([ 0.2281, -0.8085, -0.0951, 0.0564]) tensor([0.3424, 0.1214, 0.2478, 0.2884]) -Greedy action tensor([ 0.1983, -1.4088, 1.1803, -0.1193]) tensor([0.2175, 0.0436, 0.5806, 0.1583]) -Greedy action tensor([ 0.3426, -0.5466, -0.5021, -0.2319]) tensor([0.4160, 0.1710, 0.1788, 0.2342]) -Greedy action tensor([ 0.9267, -0.0507, -0.4106, 0.0096]) tensor([0.4905, 0.1846, 0.1288, 0.1961]) -Greedy action tensor([ 0.4171, -0.2192, 0.9502, 0.3161]) tensor([0.2417, 0.1279, 0.4119, 0.2185]) -Greedy action tensor([ 1.2444, -1.3366, 1.9555, 1.5579]) tensor([0.2232, 0.0169, 0.4545, 0.3054]) -Greedy action tensor([1.0193, 0.2797, 0.4051, 1.1185]) tensor([0.3202, 0.1529, 0.1733, 0.3537]) -Greedy action tensor([-1.1274, -1.5304, 0.3318, -0.4635]) tensor([0.1264, 0.0845, 0.5437, 0.2455]) -Greedy action tensor([ 0.1874, 0.1215, 1.1296, -0.3903]) tensor([0.1975, 0.1849, 0.5067, 0.1108]) -Greedy action tensor([ 0.2237, 0.9444, 1.1079, -0.3330]) tensor([0.1653, 0.3398, 0.4002, 0.0947]) -Greedy action tensor([-0.2269, -0.0690, 0.1092, 1.5917]) tensor([0.1027, 0.1203, 0.1438, 0.6332]) -Greedy action tensor([ 1.0000, -0.2267, 0.0880, 0.2884]) tensor([0.4575, 0.1342, 0.1838, 0.2246]) -Greedy action tensor([-0.7657, -0.3783, 0.7669, -0.5089]) tensor([0.1191, 0.1754, 0.5515, 0.1540]) -Greedy action tensor([-0.1538, -0.2261, -1.1520, 0.1615]) tensor([0.2725, 0.2535, 0.1004, 0.3735]) -Greedy action tensor([0.1683, 1.0139, 0.5861, 0.4489]) tensor([0.1620, 0.3774, 0.2461, 0.2145]) -Greedy action tensor([-0.8945, -1.6778, 0.2733, 0.3824]) tensor([0.1211, 0.0553, 0.3893, 0.4342]) -Greedy action tensor([-1.6428, 0.4989, 1.1945, -0.8681]) tensor([0.0348, 0.2961, 0.5937, 0.0755]) -Greedy action tensor([ 0.5740, -0.7972, -0.3978, 1.0944]) tensor([0.3017, 0.0766, 0.1141, 0.5076]) -Greedy action tensor([ 0.5991, -0.0399, 0.4833, -0.5286]) tensor([0.3647, 0.1925, 0.3248, 0.1181]) -Greedy action tensor([-0.6476, -0.4881, 0.5094, -1.1442]) tensor([0.1677, 0.1967, 0.5335, 0.1021]) -Greedy action tensor([ 0.8439, 0.2274, 0.4139, -0.2868]) tensor([0.3979, 0.2148, 0.2588, 0.1284]) -Greedy action tensor([-0.2129, -0.8481, -0.4452, -0.9144]) tensor([0.3548, 0.1880, 0.2813, 0.1759]) -Greedy action tensor([-0.9218, -2.2119, -0.2908, 1.0883]) tensor([0.0942, 0.0259, 0.1770, 0.7029]) -Greedy action tensor([ 0.3395, -1.4056, -0.1450, 0.2189]) tensor([0.3735, 0.0652, 0.2301, 0.3311]) -Greedy action tensor([-0.2081, 0.4209, 0.4288, -0.0333]) tensor([0.1678, 0.3149, 0.3174, 0.1999]) -Greedy action tensor([-1.0020, 0.0319, 0.4555, -0.4545]) tensor([0.1017, 0.2859, 0.4367, 0.1758]) -Greedy action tensor([ 0.1276, -1.4049, -0.7958, 0.2202]) tensor([0.3690, 0.0797, 0.1465, 0.4048]) -Greedy action tensor([-0.4243, -0.3790, 0.9522, 0.4442]) tensor([0.1192, 0.1247, 0.4721, 0.2840]) -Greedy action tensor([ 2.0274, -1.1843, 0.4851, 1.4379]) tensor([0.5529, 0.0223, 0.1182, 0.3066]) -Greedy action tensor([ 0.1752, -0.3532, -0.8305, 0.4488]) tensor([0.3058, 0.1803, 0.1119, 0.4021]) -Greedy action tensor([ 0.1955, 1.0736, 1.3579, -0.3531]) tensor([0.1392, 0.3351, 0.4452, 0.0805]) -Greedy action tensor([ 1.0788, -0.3196, 1.1261, 0.0039]) tensor([0.3792, 0.0937, 0.3976, 0.1294]) -Greedy action tensor([ 0.4968, -0.8986, 0.4892, 1.1878]) tensor([0.2361, 0.0585, 0.2343, 0.4711]) -Greedy action tensor([ 0.8184, 0.3859, -0.2274, 0.0850]) tensor([0.4031, 0.2616, 0.1417, 0.1936]) -Greedy action tensor([-1.4541, -0.7098, 0.1232, -0.1960]) tensor([0.0872, 0.1836, 0.4223, 0.3069]) -Greedy action tensor([-0.3914, -0.0503, -0.2227, -0.8045]) tensor([0.2352, 0.3308, 0.2784, 0.1556]) -Greedy action tensor([ 1.6281, -1.1261, 1.4648, 0.1694]) tensor([0.4661, 0.0297, 0.3959, 0.1084]) -Greedy action tensor([-0.3271, -1.8805, 0.6082, 0.2071]) tensor([0.1830, 0.0387, 0.4662, 0.3121]) -Greedy action tensor([-0.0962, -1.8652, 0.5526, -0.2003]) tensor([0.2509, 0.0428, 0.4801, 0.2261]) -Greedy action tensor([-0.3111, -1.0550, -0.1489, 0.9202]) tensor([0.1646, 0.0782, 0.1935, 0.5637]) -Greedy action tensor([-1.2599, -0.4856, -0.4843, -0.1488]) tensor([0.1193, 0.2589, 0.2592, 0.3626]) -Greedy action tensor([-0.1844, 0.5873, 0.1840, -0.0920]) tensor([0.1753, 0.3792, 0.2533, 0.1922]) -Greedy action tensor([ 1.1851, -1.7764, 0.2423, 1.0698]) tensor([0.4288, 0.0222, 0.1670, 0.3821]) -Greedy action tensor([0.9900, 0.8206, 0.7023, 0.6229]) tensor([0.3042, 0.2568, 0.2282, 0.2108]) -Greedy action tensor([-0.7999, -1.1062, 0.1266, -0.8331]) tensor([0.1912, 0.1408, 0.4830, 0.1850]) -Greedy action tensor([-1.3194, 0.1401, -0.7058, -0.3969]) tensor([0.1034, 0.4452, 0.1911, 0.2602]) -Greedy action tensor([-0.0748, -0.1191, -0.0014, 0.8317]) tensor([0.1815, 0.1737, 0.1954, 0.4494]) -Greedy action tensor([-1.5626, 0.4530, 0.5776, -0.6766]) tensor([0.0515, 0.3862, 0.4375, 0.1248]) -Greedy action tensor([-1.9463, -0.4525, 0.6662, -0.1826]) tensor([0.0401, 0.1787, 0.5471, 0.2341]) -Greedy action tensor([-1.8286, -0.4425, 0.6506, 0.0317]) tensor([0.0428, 0.1712, 0.5109, 0.2751]) -Greedy action tensor([-1.2746, -0.5758, 1.0615, 1.2496]) tensor([0.0387, 0.0779, 0.4003, 0.4831]) -Greedy action tensor([-1.8734, -0.4043, 0.6259, -0.1543]) tensor([0.0433, 0.1881, 0.5270, 0.2415]) -Greedy action tensor([-1.9053, -0.4437, 0.6520, -0.1499]) tensor([0.0417, 0.1797, 0.5375, 0.2411]) -Greedy action tensor([-1.6949, -0.4419, 0.6363, 0.1748]) tensor([0.0470, 0.1645, 0.4836, 0.3048]) -Greedy action tensor([-1.5542, -0.2764, 0.6316, -0.4494]) tensor([0.0606, 0.2174, 0.5391, 0.1829]) -Greedy action tensor([-1.4682, -0.1098, 0.7135, 0.3143]) tensor([0.0508, 0.1975, 0.4499, 0.3018]) -Greedy action tensor([-1.0890, -0.6423, 0.3192, 0.0199]) tensor([0.1033, 0.1614, 0.4223, 0.3130]) -Greedy action tensor([-1.9255, -0.4606, 0.6650, -0.1675]) tensor([0.0409, 0.1769, 0.5451, 0.2371]) -Greedy action tensor([-1.9143, -0.4177, 0.6547, -0.1993]) tensor([0.0415, 0.1855, 0.5421, 0.2308]) -Greedy action tensor([-1.8748, -0.4365, 0.6271, -0.1476]) tensor([0.0434, 0.1828, 0.5297, 0.2441]) -Greedy action tensor([-1.2565, -0.1291, 0.1946, -0.4727]) tensor([0.0948, 0.2928, 0.4047, 0.2077]) -Greedy action tensor([-1.8716, -0.4392, 0.6303, -0.1422]) tensor([0.0434, 0.1819, 0.5300, 0.2448]) -Greedy action tensor([-1.1641, -0.1734, 0.4886, -0.3108]) tensor([0.0888, 0.2391, 0.4636, 0.2084]) -Greedy action tensor([-1.3766, -0.3871, 0.5440, 0.6113]) tensor([0.0561, 0.1510, 0.3831, 0.4098]) -Greedy action tensor([-1.2063, 0.2908, 0.2624, -0.0091]) tensor([0.0762, 0.3405, 0.3310, 0.2523]) -Greedy action tensor([-1.2556, -0.5867, 0.2871, 0.2541]) tensor([0.0823, 0.1606, 0.3848, 0.3723]) -Greedy action tensor([-1.5539, -0.0152, 0.5241, 0.1864]) tensor([0.0517, 0.2408, 0.4129, 0.2946]) -Greedy action tensor([-0.5926, 0.0951, 1.0599, 1.3826]) tensor([0.0649, 0.1290, 0.3386, 0.4675]) -Greedy action tensor([-1.8203, -0.3994, 0.6930, 0.0107]) tensor([0.0421, 0.1745, 0.5203, 0.2630]) -Greedy action tensor([-1.8672, -0.3858, 0.6219, -0.1316]) tensor([0.0432, 0.1903, 0.5212, 0.2453]) -Greedy action tensor([-1.6785, -0.5168, 0.6943, 0.2283]) tensor([0.0462, 0.1476, 0.4954, 0.3109]) -Greedy action tensor([-1.4150, -0.0093, 0.4966, 0.2153]) tensor([0.0590, 0.2406, 0.3991, 0.3013]) -Greedy action tensor([-1.7377, -0.4961, 0.6200, 0.0806]) tensor([0.0472, 0.1634, 0.4987, 0.2908]) -Greedy action tensor([-1.9360, -0.4523, 0.6671, -0.1715]) tensor([0.0404, 0.1781, 0.5456, 0.2359]) -Greedy action tensor([-1.8627, 0.1073, 0.5106, -0.2377]) tensor([0.0417, 0.2990, 0.4475, 0.2118]) -Greedy action tensor([-1.8671, -0.2674, 0.5794, -0.1220]) tensor([0.0431, 0.2132, 0.4972, 0.2465]) -Greedy action tensor([-0.8150, -0.5724, 0.1718, 0.3266]) tensor([0.1236, 0.1576, 0.3316, 0.3872]) -Greedy action tensor([-1.8194, -0.2966, 0.5899, -0.1017]) tensor([0.0449, 0.2058, 0.4993, 0.2500]) -Greedy action tensor([-0.4308, -0.2884, 0.1698, 0.1898]) tensor([0.1713, 0.1976, 0.3124, 0.3187]) -Greedy action tensor([-1.5188, -0.2821, 0.6120, 0.1639]) tensor([0.0548, 0.1888, 0.4616, 0.2949]) -Greedy action tensor([-1.8629, -0.3568, 0.6163, -0.1312]) tensor([0.0433, 0.1953, 0.5167, 0.2447]) -Greedy action tensor([-1.8540, -0.4329, 0.6270, -0.1131]) tensor([0.0439, 0.1817, 0.5243, 0.2501]) -Greedy action tensor([-1.7240, -0.5187, 0.5623, -0.1128]) tensor([0.0521, 0.1740, 0.5128, 0.2611]) -Greedy action tensor([-1.7453, 0.0687, 0.5052, -0.0907]) tensor([0.0457, 0.2807, 0.4343, 0.2393]) -Greedy action tensor([-1.9278, -0.4206, 0.6588, -0.1650]) tensor([0.0406, 0.1833, 0.5394, 0.2367]) -Greedy action tensor([-1.8560, -0.3948, 0.6414, -0.2108]) tensor([0.0442, 0.1904, 0.5366, 0.2288]) -Greedy action tensor([-0.6725, -0.5853, 0.2557, 0.1810]) tensor([0.1435, 0.1566, 0.3630, 0.3369]) -Greedy action tensor([-1.9252, -0.4641, 0.6659, -0.1606]) tensor([0.0408, 0.1760, 0.5448, 0.2384]) -Greedy action tensor([-1.8144, -0.1325, 0.5741, -0.0656]) tensor([0.0434, 0.2335, 0.4734, 0.2497]) -Greedy action tensor([-1.3922, -0.1885, 0.6758, 0.1537]) tensor([0.0591, 0.1968, 0.4670, 0.2771]) -Greedy action tensor([-0.5141, -0.0358, 0.0301, 0.4965]) tensor([0.1412, 0.2277, 0.2433, 0.3878]) -Greedy action tensor([-1.9479, -0.4501, 0.6658, -0.1830]) tensor([0.0401, 0.1791, 0.5468, 0.2340]) -Greedy action tensor([-1.6648, -0.3872, 0.5073, -0.0171]) tensor([0.0539, 0.1933, 0.4729, 0.2799]) -Greedy action tensor([-1.9029, -0.3642, 0.6427, -0.1598]) tensor([0.0415, 0.1931, 0.5285, 0.2369]) -Greedy action tensor([-1.8717, -0.2342, 0.5952, -0.1427]) tensor([0.0424, 0.2182, 0.5002, 0.2391]) -Greedy action tensor([-1.8583, -0.4292, 0.6251, -0.1332]) tensor([0.0439, 0.1834, 0.5262, 0.2465]) -Greedy action tensor([-0.7185, -0.4596, 0.3189, 0.7601]) tensor([0.1052, 0.1363, 0.2969, 0.4616]) -Greedy action tensor([-1.8814, -0.4526, 0.6336, -0.1473]) tensor([0.0431, 0.1799, 0.5330, 0.2441]) -Greedy action tensor([-1.7766, -0.4676, 0.6131, -0.0399]) tensor([0.0470, 0.1739, 0.5124, 0.2667]) -Greedy action tensor([-1.9010, -0.4251, 0.6417, -0.1591]) tensor([0.0420, 0.1838, 0.5343, 0.2399]) -Greedy action tensor([-1.9266, -0.4427, 0.6599, -0.1699]) tensor([0.0408, 0.1801, 0.5425, 0.2366]) -Greedy action tensor([-1.8621, -0.4055, 0.6230, -0.1354]) tensor([0.0436, 0.1873, 0.5238, 0.2453]) -Greedy action tensor([-1.5316, -0.4426, 0.5951, 0.3628]) tensor([0.0526, 0.1563, 0.4413, 0.3498]) -Greedy action tensor([-1.7976, -0.0889, 0.5500, -0.0285]) tensor([0.0438, 0.2417, 0.4579, 0.2567]) -Greedy action tensor([-1.7778, -0.4798, 0.5778, -0.1708]) tensor([0.0495, 0.1813, 0.5222, 0.2470]) -Greedy action tensor([-1.2302, -0.5732, 0.2739, 0.2878]) tensor([0.0834, 0.1609, 0.3752, 0.3805]) -Greedy action tensor([-1.6242, -0.5150, 0.5867, 0.1458]) tensor([0.0526, 0.1593, 0.4795, 0.3086]) -Greedy action tensor([-1.4025, -0.1339, 0.5488, 0.1690]) tensor([0.0610, 0.2167, 0.4289, 0.2934]) -Greedy action tensor([-1.7106, -0.2766, 0.5163, -0.0627]) tensor([0.0509, 0.2134, 0.4715, 0.2643]) -Greedy action tensor([-1.8482, -0.2812, 0.6001, -0.1420]) tensor([0.0437, 0.2095, 0.5059, 0.2409]) -Greedy action tensor([-1.9412, -0.4491, 0.6667, -0.1794]) tensor([0.0403, 0.1790, 0.5463, 0.2344]) -Greedy action tensor([-0.6456, -0.5832, 0.1606, 0.2685]) tensor([0.1471, 0.1566, 0.3294, 0.3669]) -Greedy action tensor([-1.8506, -0.3323, 0.5998, -0.1319]) tensor([0.0440, 0.2008, 0.5099, 0.2453]) -Greedy action tensor([-1.5677, -0.0717, 0.4536, -0.0544]) tensor([0.0570, 0.2543, 0.4300, 0.2587]) -Greedy action tensor([-1.6794, -0.5161, 0.5336, -0.0408]) tensor([0.0541, 0.1731, 0.4944, 0.2784]) -Greedy action tensor([-1.9350, -0.5127, 1.0980, 0.4910]) tensor([0.0269, 0.1114, 0.5577, 0.3040]) -Greedy action tensor([-1.8338, -0.4054, 0.6008, -0.1200]) tensor([0.0452, 0.1885, 0.5156, 0.2507]) -Greedy action tensor([-1.2249, -0.4515, 0.5364, 0.4948]) tensor([0.0686, 0.1487, 0.3994, 0.3832]) -Greedy action tensor([-1.9004, -0.3835, 0.6457, -0.1560]) tensor([0.0416, 0.1896, 0.5307, 0.2381]) -Greedy action tensor([-1.9115, -0.3945, 0.6512, -0.1453]) tensor([0.0410, 0.1870, 0.5321, 0.2399]) -Greedy action tensor([-1.6732, -0.3190, 0.5368, 0.0198]) tensor([0.0515, 0.1994, 0.4693, 0.2798]) -Greedy action tensor([-1.9363, -0.4446, 0.6678, -0.1729]) tensor([0.0403, 0.1792, 0.5452, 0.2352]) -Greedy action tensor([-1.9102, -0.5058, 1.2955, 0.7042]) tensor([0.0230, 0.0938, 0.5684, 0.3147]) -Greedy action tensor([-1.6882, -0.4146, 0.6632, -0.0286]) tensor([0.0492, 0.1758, 0.5165, 0.2586]) -Greedy action tensor([-0.5435, 1.1078, 0.0983, 0.3090]) tensor([0.0956, 0.4985, 0.1816, 0.2242]) -Greedy action tensor([-1.7204, -0.3591, 0.5565, -0.0281]) tensor([0.0498, 0.1943, 0.4854, 0.2705]) -Greedy action tensor([-1.4535, -0.3906, 0.7072, -0.6243]) tensor([0.0673, 0.1948, 0.5838, 0.1542]) -Greedy action tensor([-1.8457, -0.4786, 0.6226, -0.1204]) tensor([0.0448, 0.1756, 0.5283, 0.2513]) -Greedy action tensor([ 2.1067, -0.9799, -0.5320, 0.6051]) tensor([0.7463, 0.0341, 0.0533, 0.1663]) -Greedy action tensor([ 1.1620, -0.0193, -1.0425, 0.2612]) tensor([0.5484, 0.1683, 0.0605, 0.2228]) -Greedy action tensor([ 1.3955, -0.7051, -0.3057, 0.5118]) tensor([0.5820, 0.0712, 0.1062, 0.2405]) -Greedy action tensor([ 1.8300, -0.4412, -0.3406, 0.3113]) tensor([0.6962, 0.0718, 0.0794, 0.1525]) -Greedy action tensor([ 2.0258, -0.8550, -0.4080, 0.7304]) tensor([0.7054, 0.0396, 0.0619, 0.1931]) -Greedy action tensor([ 1.0116, -0.0802, -0.5087, 0.5146]) tensor([0.4624, 0.1552, 0.1011, 0.2813]) -Greedy action tensor([ 0.5507, -0.4403, 0.0173, 0.0319]) tensor([0.3917, 0.1454, 0.2298, 0.2331]) -Greedy action tensor([ 0.9292, -0.0040, -0.0834, -0.1085]) tensor([0.4737, 0.1863, 0.1721, 0.1678]) -Greedy action tensor([ 1.4767, -0.6795, -0.2748, 0.0763]) tensor([0.6511, 0.0754, 0.1130, 0.1605]) -Greedy action tensor([ 1.5478, -0.6972, -0.3512, 0.2096]) tensor([0.6588, 0.0698, 0.0986, 0.1728]) -Greedy action tensor([ 1.5171, -0.1335, -0.5642, 0.4320]) tensor([0.6044, 0.1160, 0.0754, 0.2042]) -Greedy action tensor([ 1.6677, 0.2866, 0.4022, -0.2724]) tensor([0.5963, 0.1498, 0.1682, 0.0857]) -Greedy action tensor([ 1.6143, -0.4380, -0.3853, 0.2506]) tensor([0.6581, 0.0845, 0.0891, 0.1683]) -Greedy action tensor([ 1.4146, -0.4669, -0.4348, 0.1965]) tensor([0.6229, 0.0949, 0.0980, 0.1842]) -Greedy action tensor([ 1.3256, -0.7049, -0.1464, 0.1190]) tensor([0.6024, 0.0791, 0.1382, 0.1802]) -Greedy action tensor([ 2.0910, -0.1106, -0.5034, 0.4949]) tensor([0.7205, 0.0797, 0.0538, 0.1460]) -Greedy action tensor([ 1.0691, -0.0530, -0.1937, 0.3742]) tensor([0.4745, 0.1545, 0.1342, 0.2368]) -Greedy action tensor([ 0.7884, -0.4786, -0.3174, 0.4041]) tensor([0.4360, 0.1228, 0.1443, 0.2969]) -Greedy action tensor([ 1.0236, -0.3899, -0.1714, 0.1198]) tensor([0.5125, 0.1247, 0.1552, 0.2076]) -Greedy action tensor([ 1.4342, -0.2470, -0.6078, 0.4086]) tensor([0.5972, 0.1112, 0.0775, 0.2141]) -Greedy action tensor([ 1.3682, -0.5139, -0.4569, 0.2383]) tensor([0.6110, 0.0930, 0.0985, 0.1974]) -Greedy action tensor([ 0.6212, -0.4515, -0.0206, 0.1929]) tensor([0.3968, 0.1357, 0.2089, 0.2586]) -Greedy action tensor([ 1.6941, -0.6383, -0.2805, 0.2727]) tensor([0.6769, 0.0657, 0.0940, 0.1634]) -Greedy action tensor([ 1.5749, -0.4238, -0.1155, 0.1011]) tensor([0.6456, 0.0875, 0.1191, 0.1479]) -Greedy action tensor([ 1.4149, -0.9711, -0.3056, 0.6802]) tensor([0.5712, 0.0526, 0.1022, 0.2740]) -Greedy action tensor([ 0.7955, -0.4921, -0.3599, 0.4871]) tensor([0.4300, 0.1187, 0.1354, 0.3159]) -Greedy action tensor([ 3.1559, -1.4713, -0.2363, 1.4478]) tensor([0.8166, 0.0080, 0.0275, 0.1480]) -Greedy action tensor([ 1.2505, -0.3376, -0.5927, 0.5110]) tensor([0.5435, 0.1110, 0.0860, 0.2594]) -Greedy action tensor([ 1.1572, -0.3385, -0.7195, 0.3718]) tensor([0.5455, 0.1222, 0.0835, 0.2487]) -Greedy action tensor([ 1.7654, -0.6384, -0.4067, 0.4575]) tensor([0.6781, 0.0613, 0.0773, 0.1834]) -Greedy action tensor([ 1.2053, -0.1564, -0.3138, 0.0919]) tensor([0.5545, 0.1421, 0.1214, 0.1821]) -Greedy action tensor([ 1.1655, -0.5611, -0.3798, 0.1632]) tensor([0.5688, 0.1012, 0.1213, 0.2088]) -Greedy action tensor([ 1.7466, -1.0253, -0.0747, 0.1474]) tensor([0.7011, 0.0438, 0.1134, 0.1417]) -Greedy action tensor([ 1.7760, 0.0225, -0.2641, -0.0110]) tensor([0.6800, 0.1177, 0.0884, 0.1139]) -Greedy action tensor([ 1.6156, -0.8882, -0.1691, 0.7313]) tensor([0.6015, 0.0492, 0.1010, 0.2484]) -Greedy action tensor([ 1.4975, -0.4160, -0.2559, 0.4655]) tensor([0.5963, 0.0880, 0.1033, 0.2125]) -Greedy action tensor([ 1.0273, -0.2285, -0.3427, 0.1527]) tensor([0.5113, 0.1456, 0.1299, 0.2132]) -Greedy action tensor([ 1.3622, -0.1585, -0.4605, 0.0225]) tensor([0.6090, 0.1331, 0.0984, 0.1595]) -Greedy action tensor([ 2.2865, -0.6904, -0.0997, 0.4990]) tensor([0.7632, 0.0389, 0.0702, 0.1277]) -Greedy action tensor([ 1.6378, -0.3325, -0.1158, 0.2095]) tensor([0.6442, 0.0898, 0.1115, 0.1544]) -Greedy action tensor([ 1.2769, -0.1158, -0.1942, -0.0023]) tensor([0.5694, 0.1414, 0.1308, 0.1584]) -Greedy action tensor([ 1.9719, -0.6723, -0.0969, 0.4770]) tensor([0.7034, 0.0500, 0.0889, 0.1578]) -Greedy action tensor([ 2.1525, -1.4205, -0.2939, 0.4124]) tensor([0.7751, 0.0218, 0.0671, 0.1360]) -Greedy action tensor([ 1.0649, -0.3016, -0.6082, 0.1957]) tensor([0.5371, 0.1369, 0.1008, 0.2252]) -Greedy action tensor([ 1.2531, -0.2461, -0.1709, 0.1229]) tensor([0.5596, 0.1250, 0.1347, 0.1807]) -Greedy action tensor([ 2.0247, 0.4821, -0.7816, -0.4875]) tensor([0.7378, 0.1578, 0.0446, 0.0598]) -Greedy action tensor([ 1.0556, -0.3526, -0.4402, 0.1502]) tensor([0.5339, 0.1306, 0.1196, 0.2159]) -Greedy action tensor([ 1.5167, 0.2297, -0.1319, 0.1570]) tensor([0.5797, 0.1600, 0.1115, 0.1488]) -Greedy action tensor([ 1.3051, -0.4394, -0.4005, 0.4873]) tensor([0.5562, 0.0972, 0.1010, 0.2455]) -Greedy action tensor([ 1.1882, -0.6630, -0.1723, 0.0616]) tensor([0.5755, 0.0904, 0.1476, 0.1865]) -Greedy action tensor([ 2.6569, -1.5880, -0.3743, 0.8307]) tensor([0.8172, 0.0117, 0.0394, 0.1316]) -Greedy action tensor([ 2.2996, -0.5650, -0.4555, 0.8296]) tensor([0.7405, 0.0422, 0.0471, 0.1702]) -Greedy action tensor([ 0.7554, -0.0877, -0.1932, 0.0277]) tensor([0.4347, 0.1871, 0.1683, 0.2099]) -Greedy action tensor([ 2.0792, -1.3439, -0.3942, 0.3367]) tensor([0.7740, 0.0252, 0.0652, 0.1355]) -Greedy action tensor([ 1.8207, -0.9806, -0.3158, 0.5995]) tensor([0.6786, 0.0412, 0.0801, 0.2001]) -Greedy action tensor([ 1.6760, -0.5429, -0.2306, 0.3729]) tensor([0.6540, 0.0711, 0.0972, 0.1777]) -Greedy action tensor([ 2.0351, -0.7635, -0.3668, 0.7461]) tensor([0.7008, 0.0427, 0.0634, 0.1931]) -Greedy action tensor([ 0.6993, -0.0335, -0.2527, 0.6308]) tensor([0.3571, 0.1716, 0.1378, 0.3335]) -Greedy action tensor([ 1.8481, -1.0736, -0.2178, 0.4525]) tensor([0.7002, 0.0377, 0.0887, 0.1734]) -Greedy action tensor([ 1.2891, -0.5528, -0.1637, 0.2942]) tensor([0.5675, 0.0900, 0.1327, 0.2098]) -Greedy action tensor([ 1.4641, -0.1601, -0.3587, 0.3684]) tensor([0.5907, 0.1164, 0.0954, 0.1975]) -Greedy action tensor([ 1.8554, -0.9698, -0.0398, 0.2529]) tensor([0.7087, 0.0420, 0.1065, 0.1427]) -Greedy action tensor([ 2.2671, -0.8928, -0.3191, 0.5020]) tensor([0.7759, 0.0329, 0.0584, 0.1328]) -Greedy action tensor([ 1.3587, -0.1404, -0.1548, -0.0518]) tensor([0.5926, 0.1323, 0.1305, 0.1446]) -Greedy action tensor([ 0.8747, -0.0417, 0.0114, -0.2740]) tensor([0.4676, 0.1870, 0.1972, 0.1482]) -Greedy action tensor([ 1.3211, -0.4384, -0.4252, 0.3851]) tensor([0.5751, 0.0990, 0.1003, 0.2256]) -Greedy action tensor([ 0.7905, -0.1318, -0.4151, 0.3656]) tensor([0.4254, 0.1691, 0.1274, 0.2781]) -Greedy action tensor([ 1.6636, -0.8762, -0.2191, 0.2792]) tensor([0.6750, 0.0532, 0.1027, 0.1691]) -Greedy action tensor([ 1.5450, -0.7425, -0.2440, 0.1307]) tensor([0.6615, 0.0672, 0.1106, 0.1608]) -Greedy action tensor([ 1.6801, -0.1837, -0.5735, 0.4856]) tensor([0.6398, 0.0992, 0.0672, 0.1938]) -Greedy action tensor([ 1.0915, -0.5155, 0.0926, 0.1710]) tensor([0.5084, 0.1019, 0.1872, 0.2025]) -Greedy action tensor([ 1.4385, -0.4771, -0.4169, 0.3906]) tensor([0.6045, 0.0890, 0.0945, 0.2120]) -Greedy action tensor([ 1.6057, -0.6375, -0.4625, -0.0371]) tensor([0.7013, 0.0744, 0.0887, 0.1357]) -Greedy action tensor([ 0.7121, -0.1708, -0.1699, 0.0053]) tensor([0.4309, 0.1782, 0.1784, 0.2125]) -Greedy action tensor([ 0.6625, -0.1951, -0.0364, 0.1704]) tensor([0.3948, 0.1675, 0.1963, 0.2414]) -Greedy action tensor([ 1.4569, 0.0093, -0.3460, 0.5008]) tensor([0.5604, 0.1318, 0.0924, 0.2154]) -Greedy action tensor([ 1.8718, 0.1984, -0.2626, 0.2513]) tensor([0.6650, 0.1248, 0.0787, 0.1315]) -Greedy action tensor([ 1.4432, -0.6318, 0.0230, 0.1263]) tensor([0.6116, 0.0768, 0.1478, 0.1639]) -Greedy action tensor([ 1.4679, -0.6354, -0.3345, 0.8486]) tensor([0.5479, 0.0669, 0.0903, 0.2949]) -Greedy action tensor([ 1.1757, -0.6323, -0.2132, 0.4381]) tensor([0.5287, 0.0867, 0.1318, 0.2528]) -Greedy action tensor([ 2.2129, -1.2587, -0.2404, 1.0788]) tensor([0.6950, 0.0216, 0.0598, 0.2236]) -Greedy action tensor([ 0.5395, -0.2599, 0.0360, -0.1835]) tensor([0.3938, 0.1771, 0.2380, 0.1911]) -Greedy action tensor([ 0.2553, 0.0771, -0.1464, -0.3447]) tensor([0.3274, 0.2739, 0.2191, 0.1797]) -Greedy action tensor([ 0.4445, -0.2921, -0.1264, -0.4198]) tensor([0.4057, 0.1942, 0.2292, 0.1709]) -Greedy action tensor([ 0.5526, -0.3152, 0.1755, -0.6389]) tensor([0.4150, 0.1743, 0.2846, 0.1261]) -Greedy action tensor([ 0.9706, -0.1450, -0.1574, -0.2777]) tensor([0.5159, 0.1691, 0.1670, 0.1481]) -Greedy action tensor([ 0.2096, -0.0078, -0.1278, -0.4597]) tensor([0.3300, 0.2655, 0.2355, 0.1690]) -Greedy action tensor([ 0.4936, 0.0972, 0.0049, -0.6703]) tensor([0.3849, 0.2589, 0.2361, 0.1202]) -Greedy action tensor([ 0.8853, -0.2454, -0.1380, -0.3645]) tensor([0.5079, 0.1640, 0.1825, 0.1456]) -Greedy action tensor([ 0.8776, -0.5846, -0.0784, -0.4890]) tensor([0.5344, 0.1238, 0.2054, 0.1363]) -Greedy action tensor([ 0.4421, -0.1859, -0.3297, -0.3742]) tensor([0.4102, 0.2189, 0.1896, 0.1813]) -Greedy action tensor([ 0.8849, -0.7138, 0.0161, -0.4420]) tensor([0.5300, 0.1071, 0.2223, 0.1406]) -Greedy action tensor([ 0.7348, -0.1965, -0.0492, -0.0340]) tensor([0.4321, 0.1703, 0.1973, 0.2003]) -Greedy action tensor([ 0.5753, -0.1866, -0.0765, -0.2065]) tensor([0.4089, 0.1909, 0.2131, 0.1871]) -Greedy action tensor([ 0.4229, 0.2884, -0.1202, -0.1133]) tensor([0.3289, 0.2875, 0.1911, 0.1924]) -Greedy action tensor([ 0.9883, -0.7707, -0.0276, -0.5312]) tensor([0.5704, 0.0982, 0.2065, 0.1248]) -Greedy action tensor([ 0.7941, -0.2512, -0.0350, -0.2123]) tensor([0.4643, 0.1633, 0.2027, 0.1697]) -Greedy action tensor([ 0.7892, -0.7980, -0.1528, -0.2720]) tensor([0.5154, 0.1054, 0.2009, 0.1783]) -Greedy action tensor([ 0.3633, -0.1153, -0.0580, -0.0740]) tensor([0.3423, 0.2121, 0.2246, 0.2210]) -Greedy action tensor([ 0.4066, -0.0452, 0.1105, -0.3587]) tensor([0.3515, 0.2237, 0.2614, 0.1635]) -Greedy action tensor([ 0.6149, -0.4109, -0.0710, -0.2851]) tensor([0.4408, 0.1580, 0.2220, 0.1792]) -Greedy action tensor([ 0.8209, -0.8549, 0.0046, -0.4443]) tensor([0.5232, 0.0979, 0.2313, 0.1476]) -Greedy action tensor([ 0.6921, -0.2466, -0.1193, -0.3229]) tensor([0.4550, 0.1780, 0.2021, 0.1649]) -Greedy action tensor([ 0.6353, -0.2974, -0.1018, -0.1667]) tensor([0.4310, 0.1696, 0.2062, 0.1933]) -Greedy action tensor([ 0.7753, 0.2048, -0.0999, -0.3795]) tensor([0.4353, 0.2461, 0.1814, 0.1372]) -Greedy action tensor([ 0.9662, -0.7822, 0.1234, -0.3419]) tensor([0.5334, 0.0928, 0.2296, 0.1442]) -Greedy action tensor([ 0.7812, -0.4211, 0.0070, -0.2806]) tensor([0.4745, 0.1426, 0.2188, 0.1641]) -Greedy action tensor([ 0.8008, -0.4590, 0.1959, -0.5040]) tensor([0.4760, 0.1350, 0.2599, 0.1291]) -Greedy action tensor([ 0.9473, -0.3846, -0.0853, -0.2512]) tensor([0.5204, 0.1374, 0.1853, 0.1570]) -Greedy action tensor([ 0.8215, -0.3632, -0.0732, -0.3616]) tensor([0.4948, 0.1513, 0.2023, 0.1516]) -Greedy action tensor([ 0.6523, -0.2326, -0.2061, -0.2361]) tensor([0.4449, 0.1836, 0.1885, 0.1830]) -Greedy action tensor([ 0.6736, -0.6613, -0.1555, -0.5602]) tensor([0.5023, 0.1322, 0.2192, 0.1463]) -Greedy action tensor([ 0.5127, -0.2153, -0.0304, -0.3221]) tensor([0.4004, 0.1933, 0.2326, 0.1737]) -Greedy action tensor([ 0.3355, 0.0206, -0.0017, -0.0964]) tensor([0.3233, 0.2360, 0.2308, 0.2099]) -Greedy action tensor([ 0.8665, -0.5562, -0.1329, -0.3808]) tensor([0.5273, 0.1271, 0.1941, 0.1515]) -Greedy action tensor([ 0.6198, -0.1680, -0.0993, -0.2060]) tensor([0.4202, 0.1911, 0.2047, 0.1840]) -Greedy action tensor([ 0.3418, -0.2189, -0.0471, -0.1470]) tensor([0.3494, 0.1994, 0.2368, 0.2143]) -Greedy action tensor([ 0.6817, -0.4184, 0.0314, -0.1710]) tensor([0.4384, 0.1459, 0.2288, 0.1869]) -Greedy action tensor([ 0.4803, -0.3702, -0.1741, -0.0094]) tensor([0.3907, 0.1669, 0.2030, 0.2394]) -Greedy action tensor([ 0.0923, -0.0738, 0.0773, -0.1076]) tensor([0.2739, 0.2320, 0.2698, 0.2243]) -Greedy action tensor([ 0.8911, -0.3636, -0.0748, -0.8367]) tensor([0.5425, 0.1547, 0.2065, 0.0964]) -Greedy action tensor([ 0.6364, -0.1620, 0.0369, -0.0881]) tensor([0.4026, 0.1812, 0.2211, 0.1951]) -Greedy action tensor([ 0.3317, 0.0562, -0.0526, -0.0118]) tensor([0.3175, 0.2411, 0.2162, 0.2252]) -Greedy action tensor([ 0.7440, -0.3559, -0.0275, -0.2248]) tensor([0.4598, 0.1531, 0.2126, 0.1745]) -Greedy action tensor([ 0.5274, -0.2477, -0.0526, -0.2402]) tensor([0.4025, 0.1854, 0.2253, 0.1868]) -Greedy action tensor([ 0.4855, -0.4016, -0.0975, -0.4167]) tensor([0.4209, 0.1733, 0.2350, 0.1708]) -Greedy action tensor([ 0.8654, -0.4761, 0.0204, -0.5264]) tensor([0.5156, 0.1348, 0.2215, 0.1282]) -Greedy action tensor([ 0.3852, 0.0171, -0.1350, -0.2458]) tensor([0.3548, 0.2455, 0.2109, 0.1888]) -Greedy action tensor([ 0.6736, -0.7506, -0.1066, -0.3376]) tensor([0.4848, 0.1167, 0.2222, 0.1764]) -Greedy action tensor([ 0.5802, -0.4070, -0.0817, -0.2577]) tensor([0.4308, 0.1605, 0.2222, 0.1864]) -Greedy action tensor([ 0.4080, -0.4049, -0.1960, -0.2465]) tensor([0.3984, 0.1767, 0.2178, 0.2071]) -Greedy action tensor([ 0.3280, 0.0255, -0.2575, -0.0985]) tensor([0.3392, 0.2506, 0.1888, 0.2214]) -Greedy action tensor([ 0.3609, -0.3283, 0.0329, -0.1738]) tensor([0.3561, 0.1788, 0.2565, 0.2086]) -Greedy action tensor([ 0.7614, -0.5288, -0.0512, -0.1873]) tensor([0.4748, 0.1307, 0.2107, 0.1839]) -Greedy action tensor([ 0.8126, -0.5670, -0.2625, -0.5904]) tensor([0.5438, 0.1369, 0.1856, 0.1337]) -Greedy action tensor([ 0.9236, -0.2806, -0.3036, -0.4007]) tensor([0.5379, 0.1613, 0.1577, 0.1431]) -Greedy action tensor([ 0.5527, -0.0885, 0.0729, -0.2096]) tensor([0.3828, 0.2016, 0.2369, 0.1786]) -Greedy action tensor([ 0.6737, -0.3916, -0.0869, -0.2732]) tensor([0.4546, 0.1566, 0.2125, 0.1763]) -Greedy action tensor([-0.0024, 0.1684, 0.0685, 0.0172]) tensor([0.2337, 0.2772, 0.2508, 0.2383]) -Greedy action tensor([ 0.4136, -0.2840, -0.0582, -0.0905]) tensor([0.3669, 0.1826, 0.2289, 0.2216]) -Greedy action tensor([ 0.4558, -0.0878, -0.0165, -0.3329]) tensor([0.3761, 0.2184, 0.2346, 0.1709]) -Greedy action tensor([ 0.7618, -0.5474, 0.1430, -0.5880]) tensor([0.4836, 0.1306, 0.2605, 0.1254]) -Greedy action tensor([ 0.9983, -0.7205, -0.1607, -0.4109]) tensor([0.5756, 0.1032, 0.1806, 0.1406]) -Greedy action tensor([ 0.8778, -0.8020, -0.0674, -0.4774]) tensor([0.5456, 0.1017, 0.2120, 0.1407]) -Greedy action tensor([ 0.3350, -0.0841, 0.1556, -0.1960]) tensor([0.3245, 0.2134, 0.2712, 0.1908]) -Greedy action tensor([ 1.4388, -0.9380, 0.1763, -0.7987]) tensor([0.6745, 0.0626, 0.1909, 0.0720]) -Greedy action tensor([ 1.1883, -0.9939, 0.0220, -0.6565]) tensor([0.6320, 0.0713, 0.1969, 0.0999]) -Greedy action tensor([ 0.6359, -0.0103, -0.1776, -0.1256]) tensor([0.4108, 0.2153, 0.1821, 0.1918]) -Greedy action tensor([ 1.0657, -0.6666, -0.0614, -0.3903]) tensor([0.5767, 0.1020, 0.1868, 0.1345]) -Greedy action tensor([ 0.5673, -0.1834, -0.0994, -0.0736]) tensor([0.3980, 0.1879, 0.2044, 0.2097]) -Greedy action tensor([ 0.3358, -0.0046, 0.0328, 0.0917]) tensor([0.3093, 0.2200, 0.2284, 0.2423]) -Greedy action tensor([ 0.9701, -0.5622, -0.0776, -0.8305]) tensor([0.5774, 0.1247, 0.2025, 0.0954]) -Greedy action tensor([ 0.7869, -0.6376, -0.1125, -0.0468]) tensor([0.4803, 0.1156, 0.1954, 0.2087]) -Greedy action tensor([ 0.9380, -0.7651, 0.0856, -0.7271]) tensor([0.5563, 0.1013, 0.2372, 0.1052]) -Greedy action tensor([ 0.3883, -0.2296, -0.0203, -0.3382]) tensor([0.3721, 0.2006, 0.2473, 0.1800]) -Greedy action tensor([ 0.6145, -0.5276, -0.0365, -0.3456]) tensor([0.4497, 0.1435, 0.2345, 0.1722]) -Greedy action tensor([ 0.7616, -0.4081, -0.0652, -0.2783]) tensor([0.4759, 0.1477, 0.2082, 0.1682]) -Greedy action tensor([ 0.4844, -0.2839, 0.0418, -0.1948]) tensor([0.3827, 0.1775, 0.2458, 0.1940]) -Greedy action tensor([ 0.6294, -0.2654, -0.0303, -0.0614]) tensor([0.4121, 0.1684, 0.2130, 0.2065]) -Greedy action tensor([ 0.7331, -0.7196, -0.0116, -0.2321]) tensor([0.4785, 0.1119, 0.2272, 0.1823]) -Greedy action tensor([ 0.9136, -0.3313, -0.1019, -0.2601]) tensor([0.5104, 0.1470, 0.1849, 0.1578]) -Greedy action tensor([ 0.8801, -0.5030, -0.0638, -0.2327]) tensor([0.5080, 0.1274, 0.1977, 0.1669]) -Greedy action tensor([ 0.4126, -0.3426, -0.0578, -0.2642]) tensor([0.3842, 0.1805, 0.2400, 0.1953]) -Greedy action tensor([ 1.9500, -1.4144, 0.6596, 1.5645]) tensor([0.5025, 0.0174, 0.1383, 0.3418]) -Greedy action tensor([-0.0826, -0.8391, -0.9179, 1.0091]) tensor([0.2048, 0.0961, 0.0888, 0.6102]) -Greedy action tensor([-0.3216, -1.1180, 1.0106, 0.6758]) tensor([0.1258, 0.0567, 0.4765, 0.3410]) -Greedy action tensor([-0.8687, -0.5299, 1.0233, -1.1856]) tensor([0.1024, 0.1437, 0.6793, 0.0746]) -Greedy action tensor([-0.8184, -1.7848, 0.4615, -0.3673]) tensor([0.1527, 0.0581, 0.5493, 0.2398]) -Greedy action tensor([-1.2618, -0.3047, -0.2249, 0.3161]) tensor([0.0887, 0.2311, 0.2503, 0.4299]) -Greedy action tensor([ 1.2527, 0.1429, -0.1309, 0.7982]) tensor([0.4515, 0.1488, 0.1132, 0.2866]) -Greedy action tensor([ 0.8904, 0.3234, -0.3722, 0.3322]) tensor([0.4128, 0.2342, 0.1168, 0.2362]) -Greedy action tensor([ 0.6477, -0.6189, 0.0519, 1.2368]) tensor([0.2751, 0.0775, 0.1516, 0.4958]) -Greedy action tensor([-1.5446, -0.5327, -0.7447, 0.2317]) tensor([0.0841, 0.2315, 0.1872, 0.4971]) -Greedy action tensor([ 1.4521, 0.1672, -0.4317, 0.4790]) tensor([0.5535, 0.1531, 0.0841, 0.2092]) -Greedy action tensor([ 0.4096, -0.8917, -0.3820, 1.0644]) tensor([0.2740, 0.0746, 0.1241, 0.5273]) -Greedy action tensor([ 1.3025, 0.3213, 1.1721, -0.2868]) tensor([0.4071, 0.1526, 0.3573, 0.0831]) -Greedy action tensor([-0.0769, -1.2065, -0.2955, 0.9644]) tensor([0.2016, 0.0652, 0.1620, 0.5712]) -Greedy action tensor([ 1.1337, 0.4433, 1.1442, -0.4225]) tensor([0.3673, 0.1841, 0.3711, 0.0775]) -Greedy action tensor([ 0.6882, -0.6328, 0.9583, -0.7336]) tensor([0.3548, 0.0947, 0.4649, 0.0856]) -Greedy action tensor([-1.3142, -0.2335, -0.5563, -0.0585]) tensor([0.1043, 0.3073, 0.2225, 0.3660]) -Greedy action tensor([-0.1284, -0.6788, -0.0476, 0.5815]) tensor([0.2130, 0.1228, 0.2309, 0.4332]) -Greedy action tensor([-0.2903, 0.6288, 0.5294, -0.6445]) tensor([0.1544, 0.3870, 0.3504, 0.1083]) -Greedy action tensor([ 1.4295, -1.5928, 0.5474, 0.8216]) tensor([0.4982, 0.0243, 0.2062, 0.2713]) -Greedy action tensor([ 0.5842, -0.7463, 0.0592, 0.7270]) tensor([0.3323, 0.0878, 0.1966, 0.3833]) -Greedy action tensor([ 1.3496, -0.2580, -0.3755, 0.5970]) tensor([0.5406, 0.1083, 0.0963, 0.2547]) -Greedy action tensor([ 1.3010, -0.6832, 1.2058, 0.3897]) tensor([0.4084, 0.0561, 0.3713, 0.1642]) -Greedy action tensor([ 1.6281, -0.5554, 0.4018, 1.1538]) tensor([0.4930, 0.0555, 0.1446, 0.3068]) -Greedy action tensor([ 0.2711, -0.3009, 0.4017, 0.5136]) tensor([0.2514, 0.1419, 0.2864, 0.3203]) -Greedy action tensor([ 0.4660, -1.3039, 0.4040, 0.4452]) tensor([0.3237, 0.0551, 0.3042, 0.3170]) -Greedy action tensor([-0.0801, 0.3600, 0.5378, -0.1007]) tensor([0.1856, 0.2882, 0.3443, 0.1818]) -Greedy action tensor([ 0.7168, -1.5521, 0.0604, 1.0201]) tensor([0.3360, 0.0347, 0.1743, 0.4550]) -Greedy action tensor([ 0.0770, -1.2166, 0.0531, -0.1720]) tensor([0.3300, 0.0905, 0.3222, 0.2573]) -Greedy action tensor([-1.0061, -0.3507, 1.1935, 0.5175]) tensor([0.0605, 0.1165, 0.5456, 0.2775]) -Greedy action tensor([ 0.6719, -2.0723, -0.1895, 0.9321]) tensor([0.3592, 0.0231, 0.1518, 0.4659]) -Greedy action tensor([ 0.4092, -0.5341, -0.5637, -0.0323]) tensor([0.4149, 0.1615, 0.1568, 0.2668]) -Greedy action tensor([ 0.2719, 0.3191, -0.2904, 1.2188]) tensor([0.1925, 0.2018, 0.1097, 0.4961]) -Greedy action tensor([0.1069, 1.4036, 0.5720, 1.0992]) tensor([0.1118, 0.4088, 0.1780, 0.3015]) -Greedy action tensor([ 0.7370, -0.6749, 0.1255, 0.5788]) tensor([0.3788, 0.0923, 0.2055, 0.3234]) -Greedy action tensor([-1.0923, -0.6925, -1.5736, -0.2720]) tensor([0.1859, 0.2772, 0.1148, 0.4221]) -Greedy action tensor([ 0.3223, -2.1127, -0.3404, -0.2212]) tensor([0.4579, 0.0401, 0.2360, 0.2659]) -Greedy action tensor([ 0.8008, 0.5810, -0.0365, -0.3752]) tensor([0.3931, 0.3155, 0.1701, 0.1213]) -Greedy action tensor([-1.2585, -1.5703, 0.5191, -0.6265]) tensor([0.1049, 0.0768, 0.6208, 0.1974]) -Greedy action tensor([-0.9813, -0.9306, -0.1328, 0.1114]) tensor([0.1357, 0.1427, 0.3170, 0.4046]) -Greedy action tensor([-0.0854, -0.3775, 0.0852, -0.4940]) tensor([0.2780, 0.2076, 0.3297, 0.1847]) -Greedy action tensor([-0.0635, -2.6081, 0.5931, 0.7084]) tensor([0.1934, 0.0152, 0.3729, 0.4185]) -Greedy action tensor([ 0.5730, -0.1281, 1.6499, 1.9727]) tensor([0.1179, 0.0585, 0.3460, 0.4777]) -Greedy action tensor([ 0.7563, 0.0943, 0.4246, -0.4608]) tensor([0.3953, 0.2039, 0.2837, 0.1170]) -Greedy action tensor([-0.2362, -1.7212, -0.3788, 0.8775]) tensor([0.1946, 0.0441, 0.1687, 0.5926]) -Greedy action tensor([ 0.8218, 0.1316, 0.6514, -0.4223]) tensor([0.3798, 0.1905, 0.3203, 0.1095]) -Greedy action tensor([ 4.8706e-01, -1.2738e+00, -7.7605e-05, -2.3200e-01]) tensor([0.4399, 0.0756, 0.2702, 0.2143]) -Greedy action tensor([ 0.9358, 0.4281, -0.7189, 0.9887]) tensor([0.3512, 0.2114, 0.0671, 0.3703]) -Greedy action tensor([-0.8574, -0.6659, -0.3316, -0.4294]) tensor([0.1839, 0.2228, 0.3112, 0.2822]) -Greedy action tensor([ 0.0224, -0.4578, 0.3017, 1.2148]) tensor([0.1604, 0.0992, 0.2120, 0.5284]) -Greedy action tensor([ 0.6219, 0.8905, -0.2601, 0.9824]) tensor([0.2406, 0.3147, 0.0996, 0.3450]) -Greedy action tensor([-1.3421, -0.1794, -0.4315, -0.3833]) tensor([0.1076, 0.3442, 0.2675, 0.2807]) -Greedy action tensor([ 1.3359, -0.4142, 0.3081, 1.1665]) tensor([0.4209, 0.0731, 0.1506, 0.3553]) -Greedy action tensor([ 1.7535, -0.3721, 0.5570, -0.3065]) tensor([0.6455, 0.0771, 0.1951, 0.0823]) -Greedy action tensor([-1.1333, 0.3276, 1.3957, -0.5638]) tensor([0.0510, 0.2197, 0.6392, 0.0901]) -Greedy action tensor([ 1.3911, -1.3668, -0.0476, 0.4340]) tensor([0.5936, 0.0376, 0.1408, 0.2280]) -Greedy action tensor([-0.5009, -0.8671, 0.2579, 0.2406]) tensor([0.1687, 0.1170, 0.3603, 0.3541]) -Greedy action tensor([ 0.1035, -1.9237, -0.1519, 0.9430]) tensor([0.2369, 0.0312, 0.1835, 0.5484]) -Greedy action tensor([ 0.5474, -0.6785, -0.5353, 0.6914]) tensor([0.3588, 0.1053, 0.1215, 0.4144]) -Greedy action tensor([ 0.8462, -2.0416, 0.1948, 0.7937]) tensor([0.3959, 0.0221, 0.2064, 0.3757]) -Greedy action tensor([-0.8426, -0.9381, 0.7732, -1.2587]) tensor([0.1316, 0.1196, 0.6621, 0.0868]) -Greedy action tensor([ 0.5733, -0.4778, 0.0628, 0.6172]) tensor([0.3339, 0.1167, 0.2004, 0.3489]) -Greedy action tensor([ 1.1334, -0.8571, 1.7093, 0.7680]) tensor([0.2771, 0.0379, 0.4928, 0.1923]) -Greedy action tensor([ 0.0224, -0.9334, 1.4602, 0.0121]) tensor([0.1518, 0.0584, 0.6395, 0.1503]) -Greedy action tensor([-0.0967, 0.7154, -0.5614, 0.1524]) tensor([0.1936, 0.4362, 0.1217, 0.2484]) -Greedy action tensor([0.7481, 0.9125, 1.0427, 0.4061]) tensor([0.2363, 0.2785, 0.3173, 0.1679]) -Greedy action tensor([-0.1657, -0.0768, -0.9531, 0.6500]) tensor([0.2080, 0.2273, 0.0946, 0.4701]) -Greedy action tensor([ 0.2728, -0.4861, 0.9992, 0.3471]) tensor([0.2168, 0.1015, 0.4482, 0.2335]) -Greedy action tensor([ 0.6689, 0.6657, -1.7402, 0.8132]) tensor([0.3085, 0.3075, 0.0277, 0.3563]) -Greedy action tensor([-0.3966, -1.1797, 0.2836, -0.5164]) tensor([0.2316, 0.1058, 0.4572, 0.2054]) -Greedy action tensor([-1.3905e+00, 6.6420e-01, -6.9600e-04, -1.1415e+00]) tensor([0.0709, 0.5535, 0.2847, 0.0910]) -Greedy action tensor([-0.9294, -1.2868, -0.8750, 0.0931]) tensor([0.1806, 0.1264, 0.1908, 0.5022]) -Greedy action tensor([-0.1802, 0.3788, 0.3798, -0.5042]) tensor([0.1915, 0.3349, 0.3352, 0.1385]) -Greedy action tensor([ 1.0776, 0.3596, 0.7413, -0.7013]) tensor([0.4218, 0.2057, 0.3013, 0.0712]) -Greedy action tensor([0.2938, 0.1723, 0.9191, 0.2830]) tensor([0.2108, 0.1867, 0.3940, 0.2085]) -Greedy action tensor([ 1.6606, -1.0334, 0.8984, 0.4979]) tensor([0.5414, 0.0366, 0.2527, 0.1693]) -Greedy action tensor([-0.4687, -1.3364, 0.9172, -0.2191]) tensor([0.1492, 0.0627, 0.5966, 0.1915]) -Greedy action tensor([-1.6788, -0.3045, -0.2657, -0.6493]) tensor([0.0843, 0.3332, 0.3464, 0.2361]) -Greedy action tensor([1.6279, 0.0180, 0.9015, 0.3894]) tensor([0.5067, 0.1013, 0.2451, 0.1469]) -Greedy action tensor([-0.0912, -1.8887, -0.2658, 0.3304]) tensor([0.2833, 0.0469, 0.2379, 0.4319]) -Greedy action tensor([-1.8105, -0.3247, 0.5814, -0.1093]) tensor([0.0458, 0.2024, 0.5008, 0.2510]) -Greedy action tensor([-1.8308, -0.3862, 0.6031, -0.1299]) tensor([0.0452, 0.1917, 0.5155, 0.2477]) -Greedy action tensor([-1.9161, -0.4555, 0.6813, -0.1471]) tensor([0.0406, 0.1751, 0.5458, 0.2384]) -Greedy action tensor([-1.8322, -0.4973, 0.5720, -0.1254]) tensor([0.0468, 0.1777, 0.5177, 0.2578]) -Greedy action tensor([-1.7735, -0.4621, 0.5907, -0.0856]) tensor([0.0482, 0.1788, 0.5124, 0.2606]) -Greedy action tensor([-1.7033, 0.0284, 0.4754, -0.0566]) tensor([0.0484, 0.2733, 0.4273, 0.2510]) -Greedy action tensor([-1.6780, -0.4204, 0.5353, 0.0277]) tensor([0.0522, 0.1835, 0.4771, 0.2872]) -Greedy action tensor([-1.2076, -0.5972, 0.2728, 0.2601]) tensor([0.0864, 0.1591, 0.3797, 0.3749]) -Greedy action tensor([-1.8602, -0.4970, 0.6842, -0.0756]) tensor([0.0424, 0.1656, 0.5396, 0.2524]) -Greedy action tensor([-1.5999, -0.5176, 0.5168, 0.0858]) tensor([0.0567, 0.1672, 0.4704, 0.3057]) -Greedy action tensor([-0.9972, -0.6284, 0.1770, 0.5984]) tensor([0.0942, 0.1362, 0.3049, 0.4646]) -Greedy action tensor([-1.9071, -0.4074, 0.6398, -0.1587]) tensor([0.0417, 0.1867, 0.5321, 0.2395]) -Greedy action tensor([-1.8787, -0.2888, 0.6154, -0.1231]) tensor([0.0420, 0.2060, 0.5088, 0.2431]) -Greedy action tensor([-1.2174, -0.1377, 0.7003, -0.7216]) tensor([0.0807, 0.2376, 0.5492, 0.1325]) -Greedy action tensor([-1.5647, -0.5542, 0.5530, 0.1564]) tensor([0.0567, 0.1556, 0.4709, 0.3168]) -Greedy action tensor([-1.9094, -0.4295, 0.6460, -0.1603]) tensor([0.0416, 0.1829, 0.5361, 0.2394]) -Greedy action tensor([-1.1727, -0.5286, 0.3051, 0.7111]) tensor([0.0721, 0.1373, 0.3161, 0.4744]) -Greedy action tensor([-1.8698, -0.4007, 0.6217, -0.1330]) tensor([0.0433, 0.1881, 0.5228, 0.2458]) -Greedy action tensor([-1.7894, -0.4160, 0.5880, -0.1055]) tensor([0.0474, 0.1870, 0.5104, 0.2551]) -Greedy action tensor([-1.7929, -0.4989, 0.4417, -0.1707]) tensor([0.0525, 0.1914, 0.4903, 0.2658]) -Greedy action tensor([-1.7903, -0.5022, 0.6433, -0.0106]) tensor([0.0455, 0.1652, 0.5193, 0.2700]) -Greedy action tensor([-1.3542, -0.1626, 0.3996, 0.2403]) tensor([0.0667, 0.2196, 0.3852, 0.3285]) -Greedy action tensor([-1.5580, -0.2695, 0.6164, 0.0776]) tensor([0.0539, 0.1955, 0.4740, 0.2766]) -Greedy action tensor([-1.5012, 0.5171, 0.3042, 0.1026]) tensor([0.0511, 0.3844, 0.3106, 0.2539]) -Greedy action tensor([-1.9330, -0.4113, 0.6555, -0.1709]) tensor([0.0405, 0.1853, 0.5385, 0.2357]) -Greedy action tensor([-1.6165, -0.0573, 0.4486, -0.0105]) tensor([0.0537, 0.2553, 0.4234, 0.2675]) -Greedy action tensor([-1.6724, -0.5696, 1.0221, 0.6936]) tensor([0.0339, 0.1022, 0.5022, 0.3616]) -Greedy action tensor([-1.9220, -0.4538, 0.6526, -0.1670]) tensor([0.0412, 0.1790, 0.5413, 0.2385]) -Greedy action tensor([-0.8822, -0.4784, 0.9403, 1.4355]) tensor([0.0531, 0.0795, 0.3285, 0.5390]) -Greedy action tensor([-1.9251, -0.4248, 0.6533, -0.1675]) tensor([0.0409, 0.1833, 0.5387, 0.2371]) -Greedy action tensor([-1.8419, -0.2334, 0.6015, -0.0731]) tensor([0.0428, 0.2137, 0.4926, 0.2509]) -Greedy action tensor([-1.3922, -0.5809, 0.4370, 0.0744]) tensor([0.0724, 0.1629, 0.4509, 0.3138]) -Greedy action tensor([-1.8678, -0.4002, 0.6185, -0.1382]) tensor([0.0435, 0.1887, 0.5226, 0.2452]) -Greedy action tensor([-1.0658, 0.8798, 0.1911, 0.1053]) tensor([0.0679, 0.4748, 0.2385, 0.2189]) -Greedy action tensor([-1.5671, -0.4840, 0.4701, 0.0210]) tensor([0.0605, 0.1788, 0.4643, 0.2963]) -Greedy action tensor([-1.8229, -0.5133, 0.6391, -0.0991]) tensor([0.0454, 0.1681, 0.5322, 0.2544]) -Greedy action tensor([-1.8329, -0.4779, 0.6028, -0.1289]) tensor([0.0459, 0.1779, 0.5241, 0.2521]) -Greedy action tensor([-1.8615, -0.4602, 0.6320, -0.1250]) tensor([0.0438, 0.1778, 0.5299, 0.2486]) -Greedy action tensor([-0.3427, -0.3109, 0.1663, 0.1660]) tensor([0.1866, 0.1926, 0.3104, 0.3103]) -Greedy action tensor([-1.7224e+00, 4.3980e-02, 4.8232e-01, -3.6174e-04]) tensor([0.0465, 0.2719, 0.4215, 0.2601]) -Greedy action tensor([-1.6660, -0.5979, 1.3152, 0.9139]) tensor([0.0272, 0.0790, 0.5354, 0.3584]) -Greedy action tensor([-1.0231, -0.5354, 0.2916, -0.0041]) tensor([0.1096, 0.1785, 0.4082, 0.3037]) -Greedy action tensor([-1.8471, -0.2655, 0.5967, -0.1107]) tensor([0.0434, 0.2109, 0.4995, 0.2462]) -Greedy action tensor([-1.9012, -0.4382, 0.6742, -0.1422]) tensor([0.0412, 0.1780, 0.5415, 0.2393]) -Greedy action tensor([-1.8674, -0.3830, 0.6283, -0.1349]) tensor([0.0431, 0.1902, 0.5229, 0.2438]) -Greedy action tensor([-1.5302, -0.0830, 0.3755, 0.0579]) tensor([0.0593, 0.2520, 0.3986, 0.2901]) -Greedy action tensor([-1.9033, -0.4452, 0.6440, -0.1603]) tensor([0.0420, 0.1807, 0.5370, 0.2403]) -Greedy action tensor([-1.1475, 0.1656, -0.0814, 0.9942]) tensor([0.0620, 0.2304, 0.1800, 0.5276]) -Greedy action tensor([-1.5024, -0.3961, 0.4325, 0.0731]) tensor([0.0634, 0.1916, 0.4388, 0.3063]) -Greedy action tensor([-1.3140, -0.4090, 0.3891, -0.0343]) tensor([0.0796, 0.1968, 0.4372, 0.2863]) -Greedy action tensor([-1.1225, -0.5795, 0.2627, 0.2743]) tensor([0.0929, 0.1600, 0.3714, 0.3757]) -Greedy action tensor([-1.9219, -0.4383, 0.6574, -0.1661]) tensor([0.0410, 0.1808, 0.5408, 0.2374]) -Greedy action tensor([-1.3964, -0.5457, 0.3920, 0.2353]) tensor([0.0693, 0.1622, 0.4143, 0.3542]) -Greedy action tensor([-1.9083, -0.3704, 0.6403, -0.1547]) tensor([0.0413, 0.1922, 0.5281, 0.2385]) -Greedy action tensor([-1.8821, -0.3903, 0.6289, -0.1379]) tensor([0.0426, 0.1893, 0.5245, 0.2436]) -Greedy action tensor([-1.8361, -0.5136, 0.6448, -0.0781]) tensor([0.0444, 0.1667, 0.5311, 0.2578]) -Greedy action tensor([-1.8559, -0.4234, 0.6148, -0.1340]) tensor([0.0442, 0.1852, 0.5231, 0.2474]) -Greedy action tensor([-1.6623, -0.2519, 0.4814, 0.0264]) tensor([0.0525, 0.2152, 0.4480, 0.2843]) -Greedy action tensor([-1.7292, -0.4096, 0.5721, 0.0574]) tensor([0.0483, 0.1808, 0.4825, 0.2884]) -Greedy action tensor([-0.4401, 0.8159, -0.3926, 0.0722]) tensor([0.1383, 0.4857, 0.1451, 0.2309]) -Greedy action tensor([-1.2742, -0.5795, 0.3984, 0.4322]) tensor([0.0723, 0.1448, 0.3849, 0.3981]) -Greedy action tensor([-0.6953, -0.6041, 0.3007, 0.3961]) tensor([0.1285, 0.1408, 0.3479, 0.3828]) -Greedy action tensor([-1.5962, -0.5835, 0.4799, -0.0643]) tensor([0.0611, 0.1683, 0.4876, 0.2829]) -Greedy action tensor([-1.1195, -0.4122, 0.1993, 0.3000]) tensor([0.0917, 0.1861, 0.3429, 0.3793]) -Greedy action tensor([-1.8183, -0.3929, 0.5928, -0.1101]) tensor([0.0458, 0.1906, 0.5107, 0.2529]) -Greedy action tensor([-1.5014, -0.5280, 0.4676, 0.1627]) tensor([0.0621, 0.1645, 0.4452, 0.3282]) -Greedy action tensor([-1.9093, -0.2843, 0.6267, -0.1774]) tensor([0.0411, 0.2085, 0.5185, 0.2320]) -Greedy action tensor([-1.4835, -0.5304, 0.4723, -0.1053]) tensor([0.0683, 0.1773, 0.4832, 0.2712]) -Greedy action tensor([-1.9177, -0.3978, 0.6433, -0.1696]) tensor([0.0412, 0.1884, 0.5337, 0.2367]) -Greedy action tensor([-1.5798, -0.4631, 0.4945, -0.0730]) tensor([0.0605, 0.1848, 0.4816, 0.2730]) -Greedy action tensor([-1.7438, -0.2507, 0.6340, 0.0322]) tensor([0.0452, 0.2011, 0.4870, 0.2668]) -Greedy action tensor([-1.7436, -0.3756, 0.6621, -0.0331]) tensor([0.0464, 0.1823, 0.5146, 0.2567]) -Greedy action tensor([-1.9165, -0.4449, 0.6514, -0.1709]) tensor([0.0415, 0.1806, 0.5405, 0.2375]) -Greedy action tensor([-1.4535, -0.4315, 0.7530, 0.5958]) tensor([0.0485, 0.1347, 0.4404, 0.3764]) -Greedy action tensor([-1.7782, -0.1575, 0.5620, 0.0231]) tensor([0.0445, 0.2248, 0.4615, 0.2693]) -Greedy action tensor([-1.7549, -0.0837, 0.6214, -0.3087]) tensor([0.0469, 0.2494, 0.5047, 0.1991]) -Greedy action tensor([-1.8223, -0.3772, 0.6550, -0.1032]) tensor([0.0440, 0.1866, 0.5239, 0.2455]) -Greedy action tensor([-0.9255, -0.0599, 0.2615, -0.1545]) tensor([0.1134, 0.2696, 0.3718, 0.2452]) -Greedy action tensor([-1.7887, -0.3748, 0.5628, -0.0878]) tensor([0.0474, 0.1950, 0.4979, 0.2597]) -Greedy action tensor([-1.6942, -0.5217, 0.5276, -0.0348]) tensor([0.0534, 0.1726, 0.4930, 0.2809]) -Greedy action tensor([-1.1807, 0.0062, 0.2863, 0.4867]) tensor([0.0719, 0.2356, 0.3117, 0.3809]) -Greedy action tensor([ 1.1358, -0.6457, -0.0697, 0.1129]) tensor([0.5472, 0.0921, 0.1639, 0.1967]) -Greedy action tensor([ 1.4192, -0.4288, -0.2109, 0.3445]) tensor([0.5900, 0.0930, 0.1156, 0.2014]) -Greedy action tensor([ 0.6776, -0.1884, -0.1755, -0.0387]) tensor([0.4282, 0.1801, 0.1825, 0.2092]) -Greedy action tensor([ 1.1105, -0.1591, -0.4011, 0.2803]) tensor([0.5161, 0.1450, 0.1138, 0.2250]) -Greedy action tensor([ 1.5622, -0.7425, -0.2421, 0.1448]) tensor([0.6637, 0.0662, 0.1092, 0.1608]) -Greedy action tensor([ 1.5808, -0.7467, -0.4404, 0.5895]) tensor([0.6246, 0.0609, 0.0828, 0.2318]) -Greedy action tensor([ 1.3367, 0.0639, -0.6424, 0.0870]) tensor([0.5866, 0.1643, 0.0811, 0.1681]) -Greedy action tensor([ 1.6394, -0.4269, -0.2892, 0.1897]) tensor([0.6637, 0.0841, 0.0965, 0.1557]) -Greedy action tensor([ 0.5232, -0.9042, -0.3136, -0.5699]) tensor([0.4980, 0.1195, 0.2157, 0.1669]) -Greedy action tensor([ 0.4071, -0.3764, 0.0037, 0.1998]) tensor([0.3404, 0.1555, 0.2274, 0.2767]) -Greedy action tensor([ 1.5476, -0.6814, -0.3670, 0.6153]) tensor([0.6065, 0.0653, 0.0894, 0.2388]) -Greedy action tensor([ 2.3812, -1.4942, -0.0147, 0.6920]) tensor([0.7713, 0.0160, 0.0703, 0.1424]) -Greedy action tensor([ 0.8055, -0.3493, -0.2248, 0.0132]) tensor([0.4706, 0.1483, 0.1680, 0.2131]) -Greedy action tensor([ 0.2372, -0.0488, -0.2740, 0.0485]) tensor([0.3146, 0.2363, 0.1887, 0.2605]) -Greedy action tensor([ 1.8314, -0.2416, -0.9546, 0.2951]) tensor([0.7129, 0.0897, 0.0440, 0.1534]) -Greedy action tensor([ 1.8070, -0.6663, -0.4540, 0.1616]) tensor([0.7239, 0.0610, 0.0755, 0.1397]) -Greedy action tensor([ 0.4467, 0.1709, 0.1261, -0.4309]) tensor([0.3448, 0.2617, 0.2502, 0.1433]) -Greedy action tensor([ 1.5615, -0.8191, -0.5090, 0.5077]) tensor([0.6381, 0.0590, 0.0805, 0.2224]) -Greedy action tensor([ 0.8156, -0.2298, -0.0783, -0.2296]) tensor([0.4734, 0.1664, 0.1937, 0.1665]) -Greedy action tensor([ 1.2454, -0.3761, -0.0903, -0.2134]) tensor([0.5906, 0.1167, 0.1553, 0.1373]) -Greedy action tensor([ 1.9705, -0.9527, -0.5125, 0.9600]) tensor([0.6661, 0.0358, 0.0556, 0.2425]) -Greedy action tensor([ 0.7305, -0.1627, -0.3689, 0.1389]) tensor([0.4356, 0.1783, 0.1451, 0.2411]) -Greedy action tensor([ 1.4166, -0.2113, -0.1262, 0.1623]) tensor([0.5898, 0.1158, 0.1261, 0.1683]) -Greedy action tensor([ 1.6288, -0.2453, -0.5861, 0.6458]) tensor([0.6109, 0.0938, 0.0667, 0.2286]) -Greedy action tensor([ 0.4292, -0.4672, -0.2075, -0.0263]) tensor([0.3889, 0.1587, 0.2058, 0.2466]) -Greedy action tensor([ 1.0479, -0.4719, 0.0455, 0.0709]) tensor([0.5096, 0.1115, 0.1870, 0.1918]) -Greedy action tensor([ 1.3093, -0.2266, -0.3483, 0.4077]) tensor([0.5519, 0.1188, 0.1052, 0.2240]) -Greedy action tensor([ 0.6903, -0.1950, -0.0086, 0.0773]) tensor([0.4079, 0.1683, 0.2028, 0.2210]) -Greedy action tensor([ 0.8849, 0.0061, -0.2479, 0.2385]) tensor([0.4422, 0.1836, 0.1424, 0.2317]) -Greedy action tensor([ 0.4855, -0.2146, 0.0632, 0.0468]) tensor([0.3575, 0.1775, 0.2344, 0.2306]) -Greedy action tensor([ 1.0044, -0.2514, -0.1893, 0.2617]) tensor([0.4845, 0.1380, 0.1469, 0.2306]) -Greedy action tensor([ 1.6958, -0.2878, -0.5122, 0.4194]) tensor([0.6551, 0.0901, 0.0720, 0.1828]) -Greedy action tensor([-0.0504, -0.0949, -0.1531, 0.3112]) tensor([0.2329, 0.2227, 0.2101, 0.3343]) -Greedy action tensor([ 1.5531, -0.6690, -0.3523, 0.4963]) tensor([0.6232, 0.0675, 0.0927, 0.2166]) -Greedy action tensor([ 1.3855, -0.5697, -0.3596, 0.3931]) tensor([0.5928, 0.0839, 0.1035, 0.2197]) -Greedy action tensor([ 1.6371, -0.8855, -0.2539, 0.3691]) tensor([0.6611, 0.0531, 0.0998, 0.1860]) -Greedy action tensor([ 1.2215, -0.0585, -0.4689, 0.4378]) tensor([0.5211, 0.1449, 0.0961, 0.2380]) -Greedy action tensor([ 1.1251, -0.3909, -0.1067, 0.0689]) tensor([0.5379, 0.1181, 0.1569, 0.1871]) -Greedy action tensor([ 1.7274, -0.6993, -0.1560, 0.1522]) tensor([0.6909, 0.0610, 0.1051, 0.1430]) -Greedy action tensor([ 1.1898, -0.1237, -0.6142, 0.3526]) tensor([0.5358, 0.1441, 0.0882, 0.2319]) -Greedy action tensor([ 0.5147, -0.3279, -0.0517, -0.1109]) tensor([0.3948, 0.1700, 0.2241, 0.2112]) -Greedy action tensor([ 1.8801, -0.5935, -0.5029, 0.5083]) tensor([0.6992, 0.0589, 0.0645, 0.1774]) -Greedy action tensor([ 1.7476, -0.6774, -0.2883, 0.3043]) tensor([0.6872, 0.0608, 0.0897, 0.1623]) -Greedy action tensor([ 1.2038, -0.4417, -0.3088, 0.2794]) tensor([0.5525, 0.1066, 0.1217, 0.2192]) -Greedy action tensor([ 1.5831, -0.6410, 0.0329, 0.0835]) tensor([0.6479, 0.0701, 0.1375, 0.1446]) -Greedy action tensor([ 1.4091, -0.9326, -0.2071, 0.6076]) tensor([0.5736, 0.0552, 0.1139, 0.2573]) -Greedy action tensor([ 1.3369, -0.5340, -0.2200, 0.1531]) tensor([0.5985, 0.0922, 0.1262, 0.1832]) -Greedy action tensor([ 1.2202, -0.7940, -0.5394, 0.8122]) tensor([0.5075, 0.0677, 0.0873, 0.3375]) -Greedy action tensor([ 1.6548, -0.9008, -0.4336, 0.1491]) tensor([0.7025, 0.0545, 0.0870, 0.1559]) -Greedy action tensor([ 1.4749, -0.3601, -0.3739, 0.2491]) tensor([0.6209, 0.0991, 0.0978, 0.1822]) -Greedy action tensor([ 1.6215, -0.2694, -0.1519, -0.0363]) tensor([0.6617, 0.0999, 0.1123, 0.1261]) -Greedy action tensor([ 2.2494, -0.9319, -0.5661, 1.0902]) tensor([0.7066, 0.0293, 0.0423, 0.2217]) -Greedy action tensor([ 1.5536, -1.2338, -0.2471, 0.4071]) tensor([0.6475, 0.0399, 0.1069, 0.2057]) -Greedy action tensor([ 1.3978, 0.0181, -0.3852, 0.3293]) tensor([0.5671, 0.1427, 0.0954, 0.1948]) -Greedy action tensor([ 1.3001, -0.4452, -0.4723, 0.3746]) tensor([0.5744, 0.1003, 0.0976, 0.2277]) -Greedy action tensor([ 1.7254, -0.5398, -0.1712, 0.1869]) tensor([0.6809, 0.0707, 0.1022, 0.1462]) -Greedy action tensor([ 0.6898, -0.3435, -0.2418, 0.4396]) tensor([0.3955, 0.1407, 0.1558, 0.3080]) -Greedy action tensor([ 1.6308, -0.2658, -0.4680, 0.0572]) tensor([0.6757, 0.1014, 0.0828, 0.1401]) -Greedy action tensor([ 0.9131, -0.2618, -0.4374, 0.2103]) tensor([0.4847, 0.1497, 0.1256, 0.2400]) -Greedy action tensor([ 1.1435, -0.6157, -0.0971, 0.3822]) tensor([0.5185, 0.0893, 0.1500, 0.2422]) -Greedy action tensor([ 0.9076, -0.4095, -0.2654, 0.5143]) tensor([0.4440, 0.1190, 0.1374, 0.2996]) -Greedy action tensor([ 1.3614, -0.1860, -0.2170, 0.3050]) tensor([0.5660, 0.1204, 0.1168, 0.1968]) -Greedy action tensor([ 0.5382, -0.5329, -0.1733, 0.1007]) tensor([0.4034, 0.1382, 0.1980, 0.2604]) -Greedy action tensor([ 1.7796, -0.1864, 0.1810, 0.4087]) tensor([0.6265, 0.0877, 0.1267, 0.1591]) -Greedy action tensor([ 1.0676, -0.0041, 0.0313, 0.1382]) tensor([0.4780, 0.1637, 0.1696, 0.1887]) -Greedy action tensor([ 1.4485, -0.8799, -0.0867, 0.3021]) tensor([0.6133, 0.0598, 0.1321, 0.1949]) -Greedy action tensor([ 2.0736, -0.7966, -0.8596, 0.6121]) tensor([0.7453, 0.0422, 0.0397, 0.1728]) -Greedy action tensor([ 1.1340, -0.9766, -0.1802, 0.5969]) tensor([0.5065, 0.0614, 0.1361, 0.2960]) -Greedy action tensor([ 1.5981, -0.1411, -0.6619, 0.8026]) tensor([0.5776, 0.1015, 0.0603, 0.2607]) -Greedy action tensor([ 1.4549, -0.3963, -0.5680, 0.4888]) tensor([0.5989, 0.0940, 0.0792, 0.2279]) -Greedy action tensor([ 1.4024, -0.3638, -0.1527, 0.0235]) tensor([0.6120, 0.1046, 0.1292, 0.1541]) -Greedy action tensor([ 2.1679, -0.7352, -0.4386, 0.6875]) tensor([0.7373, 0.0404, 0.0544, 0.1678]) -Greedy action tensor([ 1.5519, -0.4964, -0.2799, 0.5267]) tensor([0.6069, 0.0783, 0.0972, 0.2177]) -Greedy action tensor([ 1.9517, -0.9390, -0.2903, 0.8241]) tensor([0.6731, 0.0374, 0.0715, 0.2180]) -Greedy action tensor([ 1.5793, -0.5238, -0.4980, 0.6699]) tensor([0.6060, 0.0740, 0.0759, 0.2441]) -Greedy action tensor([ 1.3829, -0.0793, -0.3473, 0.1536]) tensor([0.5877, 0.1362, 0.1042, 0.1719]) -Greedy action tensor([ 1.5102, -0.4427, -0.3933, 0.4088]) tensor([0.6160, 0.0874, 0.0918, 0.2048]) -Greedy action tensor([ 1.6788, -0.6417, -0.1824, 0.4195]) tensor([0.6504, 0.0639, 0.1011, 0.1846]) -Greedy action tensor([ 1.6916, -0.6536, -0.1844, -0.0284]) tensor([0.7002, 0.0671, 0.1073, 0.1254]) -Greedy action tensor([ 1.4907, -0.8158, -0.0805, 0.4698]) tensor([0.5996, 0.0597, 0.1246, 0.2160]) -Greedy action tensor([ 1.6731, -0.5425, -0.5478, 0.6177]) tensor([0.6387, 0.0697, 0.0693, 0.2223]) -Greedy action tensor([ 0.8501, -0.6469, -0.0282, -0.3150]) tensor([0.5125, 0.1147, 0.2129, 0.1599]) -Greedy action tensor([ 0.9092, -1.1206, 0.1157, -0.9457]) tensor([0.5747, 0.0755, 0.2599, 0.0899]) -Greedy action tensor([ 0.3791, -0.2768, 0.0097, -0.5278]) tensor([0.3826, 0.1986, 0.2644, 0.1545]) -Greedy action tensor([ 0.8998, -0.6368, -0.0989, -0.3873]) tensor([0.5378, 0.1157, 0.1981, 0.1485]) -Greedy action tensor([ 0.6950, -0.5435, 0.0287, -0.6868]) tensor([0.4867, 0.1411, 0.2500, 0.1222]) -Greedy action tensor([ 0.4848, -0.3215, -0.1113, -0.3932]) tensor([0.4144, 0.1850, 0.2283, 0.1722]) -Greedy action tensor([ 0.2534, 0.0301, -0.0826, -0.6572]) tensor([0.3429, 0.2742, 0.2450, 0.1379]) -Greedy action tensor([ 0.8551, -0.6372, -0.0993, -0.3677]) tensor([0.5251, 0.1181, 0.2022, 0.1546]) -Greedy action tensor([ 0.2945, -0.3107, -0.1448, -0.1132]) tensor([0.3502, 0.1912, 0.2257, 0.2329]) -Greedy action tensor([ 0.4630, -0.4123, -0.0636, -0.0078]) tensor([0.3800, 0.1583, 0.2244, 0.2373]) -Greedy action tensor([ 1.0690, -0.5031, 0.0259, -0.2049]) tensor([0.5436, 0.1128, 0.1915, 0.1521]) -Greedy action tensor([ 0.4653, -0.3462, -0.0610, -0.1669]) tensor([0.3897, 0.1731, 0.2302, 0.2071]) -Greedy action tensor([ 1.1644, -0.9630, 0.1716, -0.6985]) tensor([0.6079, 0.0724, 0.2253, 0.0944]) -Greedy action tensor([ 0.2201, -0.0423, 0.0830, 0.0775]) tensor([0.2851, 0.2193, 0.2485, 0.2472]) -Greedy action tensor([ 0.4547, 0.2596, -0.2055, 0.2470]) tensor([0.3173, 0.2610, 0.1640, 0.2578]) -Greedy action tensor([ 0.6845, -0.2709, 0.1311, -0.4014]) tensor([0.4353, 0.1674, 0.2503, 0.1470]) -Greedy action tensor([ 0.4921, -0.2318, -0.1266, -0.2335]) tensor([0.3988, 0.1934, 0.2148, 0.1930]) -Greedy action tensor([ 0.8919, -0.6178, -0.0065, -0.4658]) tensor([0.5304, 0.1172, 0.2160, 0.1364]) -Greedy action tensor([ 0.9858, -0.4796, -0.0820, -0.6490]) tensor([0.5651, 0.1305, 0.1942, 0.1102]) -Greedy action tensor([ 0.5679, -0.2009, 0.1150, -0.1599]) tensor([0.3873, 0.1795, 0.2462, 0.1870]) -Greedy action tensor([ 0.6519, -0.5583, -0.0016, -0.2214]) tensor([0.4472, 0.1333, 0.2327, 0.1868]) -Greedy action tensor([ 0.6192, -0.3074, -0.0753, -0.3161]) tensor([0.4371, 0.1731, 0.2183, 0.1716]) -Greedy action tensor([ 0.6293, -0.4465, -0.1024, -0.3928]) tensor([0.4583, 0.1563, 0.2205, 0.1649]) -Greedy action tensor([ 0.6507, -0.4341, 0.0354, -0.1362]) tensor([0.4285, 0.1448, 0.2316, 0.1951]) -Greedy action tensor([ 0.9240, -0.4852, -0.1004, -0.6740]) tensor([0.5538, 0.1353, 0.1988, 0.1120]) -Greedy action tensor([ 1.0248, -0.6071, 0.0320, -0.2922]) tensor([0.5453, 0.1066, 0.2020, 0.1461]) -Greedy action tensor([ 0.7496, -0.7571, 0.0954, -0.4815]) tensor([0.4918, 0.1090, 0.2557, 0.1436]) -Greedy action tensor([ 1.0820, -0.3377, -0.4405, -0.5555]) tensor([0.6044, 0.1461, 0.1319, 0.1175]) -Greedy action tensor([ 0.9357, -0.4690, 0.0870, -0.6053]) tensor([0.5298, 0.1300, 0.2267, 0.1135]) -Greedy action tensor([ 0.6764, -0.2177, -0.1003, -0.1241]) tensor([0.4314, 0.1764, 0.1984, 0.1938]) -Greedy action tensor([ 0.8579, -0.7592, 0.1217, -0.4125]) tensor([0.5107, 0.1014, 0.2446, 0.1434]) -Greedy action tensor([ 0.5923, -0.7661, -0.1556, -0.4289]) tensor([0.4783, 0.1230, 0.2264, 0.1723]) -Greedy action tensor([ 0.3024, -0.1601, -0.1247, -0.3048]) tensor([0.3537, 0.2227, 0.2308, 0.1927]) -Greedy action tensor([ 0.4488, -0.1676, -0.0157, -0.2460]) tensor([0.3749, 0.2024, 0.2356, 0.1871]) -Greedy action tensor([ 0.9699, -0.4989, -0.2063, -0.2972]) tensor([0.5493, 0.1265, 0.1695, 0.1547]) -Greedy action tensor([ 0.8239, -0.3518, 0.0878, -0.6377]) tensor([0.4952, 0.1528, 0.2372, 0.1148]) -Greedy action tensor([ 0.4070, -0.3742, -0.0285, -0.1514]) tensor([0.3736, 0.1710, 0.2417, 0.2137]) -Greedy action tensor([ 0.5912, 0.0601, -0.1082, -0.4254]) tensor([0.4087, 0.2403, 0.2031, 0.1479]) -Greedy action tensor([ 0.4462, -0.1170, 0.0972, -0.1426]) tensor([0.3534, 0.2012, 0.2493, 0.1961]) -Greedy action tensor([ 0.5999, -0.1373, -0.0868, -0.1276]) tensor([0.4057, 0.1941, 0.2042, 0.1960]) -Greedy action tensor([ 0.4180, -0.1666, -0.1457, -0.1650]) tensor([0.3725, 0.2076, 0.2120, 0.2079]) -Greedy action tensor([ 0.6220, -0.3440, 0.0534, -0.3590]) tensor([0.4307, 0.1639, 0.2439, 0.1615]) -Greedy action tensor([ 0.4869, 0.0985, -0.1502, -0.1750]) tensor([0.3673, 0.2491, 0.1942, 0.1895]) -Greedy action tensor([ 0.9777, -0.4464, -0.1175, -0.3504]) tensor([0.5434, 0.1308, 0.1818, 0.1440]) -Greedy action tensor([ 0.9800, -0.4885, -0.1375, -0.2661]) tensor([0.5420, 0.1248, 0.1773, 0.1559]) -Greedy action tensor([ 0.2721, 0.0699, -0.1085, -0.2892]) tensor([0.3256, 0.2660, 0.2226, 0.1858]) -Greedy action tensor([ 0.5477, -0.0826, 0.0813, -0.2367]) tensor([0.3823, 0.2035, 0.2398, 0.1745]) -Greedy action tensor([ 1.0746, -0.7841, 0.0250, -0.4664]) tensor([0.5814, 0.0906, 0.2035, 0.1245]) -Greedy action tensor([ 0.5910, 0.1767, -0.1800, -0.3829]) tensor([0.3998, 0.2642, 0.1850, 0.1510]) -Greedy action tensor([ 0.6208, -0.4873, -0.0302, -0.6983]) tensor([0.4719, 0.1558, 0.2461, 0.1262]) -Greedy action tensor([ 0.4045, 0.1364, -0.2203, -0.1265]) tensor([0.3462, 0.2648, 0.1854, 0.2036]) -Greedy action tensor([ 0.4844, -0.0907, 0.0324, -0.3665]) tensor([0.3808, 0.2143, 0.2423, 0.1626]) -Greedy action tensor([ 0.8203, -0.4645, -0.1703, -0.1991]) tensor([0.4978, 0.1377, 0.1849, 0.1796]) -Greedy action tensor([ 0.4824, -0.2257, -0.0837, -0.2809]) tensor([0.3958, 0.1950, 0.2247, 0.1845]) -Greedy action tensor([ 0.5762, -0.1077, -0.0875, -0.4044]) tensor([0.4176, 0.2107, 0.2150, 0.1566]) -Greedy action tensor([ 0.8805, -0.3303, 0.0875, -0.1987]) tensor([0.4784, 0.1425, 0.2165, 0.1626]) -Greedy action tensor([ 0.3654, -0.3810, -0.0617, -0.0223]) tensor([0.3565, 0.1690, 0.2326, 0.2419]) -Greedy action tensor([ 0.4661, 0.0459, -0.0807, -0.3055]) tensor([0.3706, 0.2435, 0.2145, 0.1713]) -Greedy action tensor([ 0.5799, 0.1572, -0.0976, -0.1931]) tensor([0.3810, 0.2497, 0.1935, 0.1759]) -Greedy action tensor([ 0.8919, -0.7790, -0.1395, -0.5350]) tensor([0.5603, 0.1054, 0.1998, 0.1345]) -Greedy action tensor([ 0.7383, -0.2089, 0.0558, -0.1529]) tensor([0.4342, 0.1684, 0.2194, 0.1781]) -Greedy action tensor([ 0.3478, -0.0689, -0.0958, -0.2920]) tensor([0.3536, 0.2331, 0.2269, 0.1865]) -Greedy action tensor([ 1.0115, -0.5646, -0.1206, -0.3604]) tensor([0.5609, 0.1160, 0.1808, 0.1423]) -Greedy action tensor([ 1.2655, -0.9102, 0.0752, -0.6036]) tensor([0.6362, 0.0722, 0.1935, 0.0981]) -Greedy action tensor([ 0.6810, -0.2733, -0.1263, -0.1510]) tensor([0.4413, 0.1699, 0.1968, 0.1920]) -Greedy action tensor([ 0.7451, -0.3714, -0.1206, -0.2598]) tensor([0.4730, 0.1549, 0.1990, 0.1732]) -Greedy action tensor([ 0.9307, -0.4462, -0.0329, -0.2604]) tensor([0.5161, 0.1302, 0.1969, 0.1568]) -Greedy action tensor([ 0.5313, -0.7104, -0.1581, -0.2715]) tensor([0.4467, 0.1290, 0.2242, 0.2001]) -Greedy action tensor([ 0.5367, -0.1611, -0.0466, -0.0966]) tensor([0.3866, 0.1924, 0.2157, 0.2052]) -Greedy action tensor([ 0.4698, -0.6324, -0.0936, -0.0568]) tensor([0.4013, 0.1333, 0.2284, 0.2370]) -Greedy action tensor([ 0.7827, -0.4135, -0.0604, -0.2718]) tensor([0.4805, 0.1453, 0.2068, 0.1674]) -Greedy action tensor([ 0.6686, -0.6041, 0.1254, -0.4948]) tensor([0.4601, 0.1289, 0.2673, 0.1438]) -Greedy action tensor([ 0.5525, -0.0717, -0.2093, -0.1468]) tensor([0.4001, 0.2143, 0.1868, 0.1988]) -Greedy action tensor([ 1.1236, -0.7298, 0.0425, -0.7547]) tensor([0.6065, 0.0950, 0.2058, 0.0927]) -Greedy action tensor([ 0.3782, -0.3916, -0.1992, -0.0897]) tensor([0.3772, 0.1747, 0.2118, 0.2363]) -Greedy action tensor([ 0.7289, -0.4211, -0.0279, -0.4165]) tensor([0.4753, 0.1505, 0.2230, 0.1512]) -Greedy action tensor([ 0.8806, -0.6729, 0.1338, -0.3765]) tensor([0.5077, 0.1074, 0.2406, 0.1444]) -Greedy action tensor([ 0.8208, -0.3595, -0.0761, -0.2234]) tensor([0.4838, 0.1486, 0.1973, 0.1703]) -Greedy action tensor([ 0.5986, -0.5160, -0.1582, -0.0885]) tensor([0.4347, 0.1426, 0.2040, 0.2187]) -Greedy action tensor([ 0.7956, -0.2671, -0.6638, -0.3613]) tensor([0.5284, 0.1826, 0.1228, 0.1662]) -Greedy action tensor([ 0.1854, 0.1358, -0.0533, -0.4141]) tensor([0.3041, 0.2894, 0.2395, 0.1670]) -Greedy action tensor([-0.1632, -0.4386, -0.3572, 0.6953]) tensor([0.2023, 0.1536, 0.1666, 0.4774]) -Greedy action tensor([-0.1367, 0.1518, 0.6490, -0.9873]) tensor([0.2018, 0.2693, 0.4427, 0.0862]) -Greedy action tensor([ 2.0204, -0.5596, 0.9025, -0.1811]) tensor([0.6608, 0.0501, 0.2161, 0.0731]) -Greedy action tensor([ 0.2607, 0.2971, 0.1538, -0.9654]) tensor([0.3097, 0.3212, 0.2783, 0.0909]) -Greedy action tensor([ 0.0596, -1.5318, -0.0344, 0.6457]) tensor([0.2557, 0.0521, 0.2327, 0.4595]) -Greedy action tensor([ 1.1275, -0.8565, 1.2139, 1.5650]) tensor([0.2648, 0.0364, 0.2887, 0.4101]) -Greedy action tensor([1.0955, 0.8663, 0.7507, 0.4187]) tensor([0.3320, 0.2640, 0.2352, 0.1688]) -Greedy action tensor([ 1.4926, -0.9740, 1.2850, 1.6143]) tensor([0.3304, 0.0280, 0.2684, 0.3731]) -Greedy action tensor([ 0.5909, -1.0911, -0.8569, 0.5903]) tensor([0.4131, 0.0768, 0.0971, 0.4129]) -Greedy action tensor([0.7866, 0.0954, 0.9245, 0.0592]) tensor([0.3193, 0.1600, 0.3665, 0.1543]) -Greedy action tensor([ 1.5544, -0.1534, 0.4358, 0.6577]) tensor([0.5219, 0.0946, 0.1705, 0.2129]) -Greedy action tensor([ 0.1455, 0.3184, 0.9817, -0.8020]) tensor([0.2047, 0.2434, 0.4725, 0.0794]) -Greedy action tensor([-0.7694, 0.2618, -0.3066, -0.4411]) tensor([0.1475, 0.4135, 0.2343, 0.2048]) -Greedy action tensor([ 0.3978, 0.6656, 2.1573, -0.5146]) tensor([0.1174, 0.1535, 0.6820, 0.0471]) -Greedy action tensor([ 0.4574, -0.5126, 0.0451, -0.0306]) tensor([0.3766, 0.1428, 0.2494, 0.2312]) -Greedy action tensor([1.1876, 0.1249, 1.1846, 0.3578]) tensor([0.3599, 0.1244, 0.3588, 0.1570]) -Greedy action tensor([ 1.5772, 0.8656, -0.1531, 1.4050]) tensor([0.3984, 0.1956, 0.0706, 0.3354]) -Greedy action tensor([ 1.5164, -0.7267, 0.6346, 1.4331]) tensor([0.4098, 0.0435, 0.1697, 0.3771]) -Greedy action tensor([-0.3007, -0.2809, 0.7301, -0.7473]) tensor([0.1830, 0.1867, 0.5131, 0.1171]) -Greedy action tensor([ 0.6926, -1.3368, 1.2407, -0.2046]) tensor([0.3059, 0.0402, 0.5292, 0.1247]) -Greedy action tensor([ 1.1457, 0.2464, 1.9944, -0.0792]) tensor([0.2477, 0.1008, 0.5788, 0.0728]) -Greedy action tensor([1.3639, 0.9981, 0.8551, 0.7752]) tensor([0.3509, 0.2434, 0.2110, 0.1948]) -Greedy action tensor([ 2.0210, -2.0105, 0.5789, 0.9759]) tensor([0.6227, 0.0111, 0.1472, 0.2190]) -Greedy action tensor([0.9098, 0.4268, 0.5997, 0.4888]) tensor([0.3326, 0.2052, 0.2439, 0.2183]) -Greedy action tensor([-0.5500, -1.6637, 0.9097, 1.1262]) tensor([0.0911, 0.0299, 0.3921, 0.4869]) -Greedy action tensor([-0.8044, 0.7450, 0.2052, 0.5219]) tensor([0.0818, 0.3853, 0.2246, 0.3083]) -Greedy action tensor([ 1.1518, 0.0034, -0.3837, -0.0905]) tensor([0.5491, 0.1741, 0.1182, 0.1585]) -Greedy action tensor([-0.2192, -2.0767, -0.3940, -0.2615]) tensor([0.3385, 0.0528, 0.2842, 0.3245]) -Greedy action tensor([-0.9294, -0.7597, 1.0401, -0.9999]) tensor([0.0972, 0.1152, 0.6969, 0.0906]) -Greedy action tensor([ 1.1753, -0.3334, -0.4694, 1.0671]) tensor([0.4326, 0.0957, 0.0835, 0.3882]) -Greedy action tensor([-0.7822, 0.7730, -0.0548, -1.2022]) tensor([0.1182, 0.5596, 0.2446, 0.0776]) -Greedy action tensor([-1.1986, 0.2095, 0.0665, 0.2714]) tensor([0.0770, 0.3150, 0.2730, 0.3350]) -Greedy action tensor([ 0.0264, -1.0631, 1.3140, 0.4727]) tensor([0.1533, 0.0516, 0.5556, 0.2395]) -Greedy action tensor([ 1.5483, -0.7870, 0.1109, 0.2577]) tensor([0.6213, 0.0601, 0.1476, 0.1709]) -Greedy action tensor([-1.5060, -0.0261, 0.7380, -1.8766]) tensor([0.0645, 0.2831, 0.6079, 0.0445]) -Greedy action tensor([ 0.2948, -1.4497, -1.1095, -1.1709]) tensor([0.6056, 0.1058, 0.1487, 0.1398]) -Greedy action tensor([-0.8977, 0.8413, 0.1195, -1.2667]) tensor([0.0985, 0.5608, 0.2725, 0.0681]) -Greedy action tensor([ 1.0750, -1.0830, -0.1178, 0.8970]) tensor([0.4433, 0.0512, 0.1345, 0.3710]) -Greedy action tensor([-0.4046, -1.8575, -0.3727, 1.0421]) tensor([0.1535, 0.0359, 0.1585, 0.6522]) -Greedy action tensor([ 0.8118, 0.5020, 1.2194, -0.6175]) tensor([0.2877, 0.2110, 0.4324, 0.0689]) -Greedy action tensor([ 2.0061, -0.5851, 0.0056, 1.9658]) tensor([0.4607, 0.0345, 0.0623, 0.4425]) -Greedy action tensor([ 0.2529, -0.5229, -0.0871, 0.5456]) tensor([0.2847, 0.1311, 0.2027, 0.3815]) -Greedy action tensor([-0.3742, -0.2478, 0.9380, -0.9755]) tensor([0.1563, 0.1774, 0.5806, 0.0857]) -Greedy action tensor([-0.6107, 0.2364, 0.2380, -0.6058]) tensor([0.1498, 0.3495, 0.3501, 0.1506]) -Greedy action tensor([ 1.7993, 0.3533, -0.1406, 0.1325]) tensor([0.6377, 0.1502, 0.0917, 0.1204]) -Greedy action tensor([0.0312, 0.4631, 0.7142, 0.4215]) tensor([0.1667, 0.2568, 0.3301, 0.2463]) -Greedy action tensor([-1.2812, -0.4278, -0.5615, -0.2230]) tensor([0.1207, 0.2834, 0.2480, 0.3479]) -Greedy action tensor([0.7886, 0.4720, 0.1082, 0.0102]) tensor([0.3712, 0.2704, 0.1880, 0.1704]) -Greedy action tensor([-1.4693, -0.0153, 0.5578, -0.9240]) tensor([0.0685, 0.2932, 0.5201, 0.1182]) -Greedy action tensor([ 1.3421, -0.5596, -0.7257, 0.8194]) tensor([0.5351, 0.0799, 0.0677, 0.3173]) -Greedy action tensor([-1.5999, -0.1847, 1.4902, -1.0671]) tensor([0.0347, 0.1430, 0.7632, 0.0592]) -Greedy action tensor([-0.1797, -0.5102, -0.9859, 0.1239]) tensor([0.2841, 0.2041, 0.1269, 0.3849]) -Greedy action tensor([-1.4128, 0.5647, 0.3261, -0.7049]) tensor([0.0627, 0.4531, 0.3569, 0.1273]) -Greedy action tensor([-0.0342, -1.1601, 1.2022, 0.8244]) tensor([0.1403, 0.0455, 0.4831, 0.3311]) -Greedy action tensor([0.6199, 0.1391, 0.4878, 0.2287]) tensor([0.3154, 0.1950, 0.2763, 0.2133]) -Greedy action tensor([ 1.6352, -1.0615, -0.4987, 1.1905]) tensor([0.5474, 0.0369, 0.0648, 0.3509]) -Greedy action tensor([-0.2266, -0.2861, -0.4065, 0.5568]) tensor([0.2014, 0.1897, 0.1682, 0.4407]) -Greedy action tensor([ 0.2204, -0.2299, 0.0294, 0.4217]) tensor([0.2713, 0.1729, 0.2241, 0.3317]) -Greedy action tensor([-0.0914, -1.0998, -0.2301, -0.2208]) tensor([0.3211, 0.1172, 0.2795, 0.2822]) -Greedy action tensor([ 1.2647, -1.3093, 1.8749, 0.5768]) tensor([0.2924, 0.0223, 0.5383, 0.1470]) -Greedy action tensor([ 0.2517, -0.9916, 0.5792, -0.3366]) tensor([0.3095, 0.0893, 0.4294, 0.1718]) -Greedy action tensor([-0.2035, -0.6532, 1.4078, 0.0913]) tensor([0.1251, 0.0798, 0.6270, 0.1681]) -Greedy action tensor([ 0.4888, -1.9857, 0.2335, 1.0109]) tensor([0.2821, 0.0238, 0.2186, 0.4756]) -Greedy action tensor([ 0.3161, -0.1952, 0.5068, 1.0544]) tensor([0.2040, 0.1223, 0.2469, 0.4268]) -Greedy action tensor([-0.0826, 0.0706, 0.0016, 0.8261]) tensor([0.1744, 0.2033, 0.1897, 0.4327]) -Greedy action tensor([-0.0694, -1.0834, 0.8567, -0.6534]) tensor([0.2250, 0.0816, 0.5680, 0.1255]) -Greedy action tensor([-0.8323, -1.0584, -0.6847, 0.2325]) tensor([0.1707, 0.1362, 0.1979, 0.4952]) -Greedy action tensor([ 0.0137, -1.1057, 0.0095, -0.0710]) tensor([0.3085, 0.1007, 0.3072, 0.2835]) -Greedy action tensor([-0.8698, -2.1462, 0.4437, 0.3025]) tensor([0.1215, 0.0339, 0.4520, 0.3925]) -Greedy action tensor([ 0.8442, -0.9337, 0.7919, 0.0586]) tensor([0.3885, 0.0657, 0.3687, 0.1771]) -Greedy action tensor([-1.2488, -2.5453, -0.2011, -0.2413]) tensor([0.1457, 0.0398, 0.4154, 0.3990]) -Greedy action tensor([ 0.0810, -0.6278, 0.5236, -0.7166]) tensor([0.2858, 0.1407, 0.4449, 0.1287]) -Greedy action tensor([-0.3893, -0.5836, -1.5614, 0.8855]) tensor([0.1751, 0.1442, 0.0542, 0.6265]) -Greedy action tensor([-0.6569, -0.3678, -0.5876, -0.0054]) tensor([0.1878, 0.2507, 0.2013, 0.3602]) -Greedy action tensor([ 0.3170, -0.3616, 0.2189, 0.0279]) tensor([0.3162, 0.1604, 0.2866, 0.2368]) -Greedy action tensor([ 0.3719, 0.4186, 0.7593, -0.5467]) tensor([0.2551, 0.2673, 0.3758, 0.1018]) -Greedy action tensor([ 0.1897, -1.4942, 0.4366, -1.0231]) tensor([0.3619, 0.0672, 0.4633, 0.1076]) -Greedy action tensor([-0.3875, 0.0335, 2.2377, -1.1260]) tensor([0.0595, 0.0906, 0.8214, 0.0284]) -Greedy action tensor([-0.8432, -2.0262, -0.0496, 0.5399]) tensor([0.1332, 0.0408, 0.2947, 0.5313]) -Greedy action tensor([-0.9414, -0.4032, -0.9213, -0.5422]) tensor([0.1914, 0.3279, 0.1953, 0.2853]) -Greedy action tensor([ 0.0238, -0.8619, 0.8293, 0.8261]) tensor([0.1700, 0.0701, 0.3805, 0.3793]) -Greedy action tensor([-1.3642, 0.0385, 0.8908, -1.7725]) tensor([0.0655, 0.2664, 0.6246, 0.0435]) -Greedy action tensor([-1.1452, 0.5458, 0.0960, -0.1033]) tensor([0.0786, 0.4265, 0.2720, 0.2229]) -Greedy action tensor([-1.8909, -0.4564, 0.6412, -0.1542]) tensor([0.0426, 0.1789, 0.5363, 0.2421]) -Greedy action tensor([-0.8959, -0.6107, 0.0663, 0.6781]) tensor([0.1023, 0.1361, 0.2678, 0.4938]) -Greedy action tensor([-0.7762, -0.5610, 0.3460, -0.1719]) tensor([0.1400, 0.1736, 0.4301, 0.2562]) -Greedy action tensor([-1.8435, -0.4379, 0.6371, -0.0843]) tensor([0.0438, 0.1786, 0.5233, 0.2543]) -Greedy action tensor([-1.8016, -0.4035, 0.5692, -0.0856]) tensor([0.0469, 0.1899, 0.5023, 0.2609]) -Greedy action tensor([-1.9130, -0.4377, 0.6527, -0.1604]) tensor([0.0414, 0.1810, 0.5387, 0.2389]) -Greedy action tensor([-1.7831, -0.5169, 0.5988, -0.0857]) tensor([0.0480, 0.1703, 0.5196, 0.2621]) -Greedy action tensor([-1.8999, -0.4557, 0.6413, -0.1527]) tensor([0.0422, 0.1791, 0.5363, 0.2424]) -Greedy action tensor([-1.4924, -0.4984, 0.4588, -0.0470]) tensor([0.0667, 0.1803, 0.4697, 0.2832]) -Greedy action tensor([-1.7864, -0.2936, 0.5917, -0.0779]) tensor([0.0460, 0.2045, 0.4957, 0.2538]) -Greedy action tensor([-1.1277, -0.5347, 0.3815, 0.5626]) tensor([0.0784, 0.1419, 0.3546, 0.4251]) -Greedy action tensor([-1.3542, -0.5443, 0.4138, 0.3013]) tensor([0.0697, 0.1567, 0.4085, 0.3650]) -Greedy action tensor([-1.3797, -0.3277, 0.4288, 0.2787]) tensor([0.0657, 0.1882, 0.4010, 0.3451]) -Greedy action tensor([-1.9471, -0.4517, 0.6685, -0.1821]) tensor([0.0400, 0.1786, 0.5475, 0.2339]) -Greedy action tensor([-1.5753, -0.4217, 1.0718, 0.8144]) tensor([0.0343, 0.1086, 0.4834, 0.3737]) -Greedy action tensor([-1.5908, -0.3018, 0.6641, 0.1732]) tensor([0.0500, 0.1815, 0.4767, 0.2918]) -Greedy action tensor([-1.2504, -0.5506, 0.3716, 0.3104]) tensor([0.0779, 0.1568, 0.3944, 0.3709]) -Greedy action tensor([-0.9134, 0.1968, 0.1583, 0.0052]) tensor([0.1057, 0.3208, 0.3087, 0.2648]) -Greedy action tensor([-1.2814, -0.2026, 0.7826, 0.9160]) tensor([0.0480, 0.1413, 0.3784, 0.4323]) -Greedy action tensor([-0.7958, -0.3657, 0.3608, -0.3489]) tensor([0.1374, 0.2112, 0.4367, 0.2148]) -Greedy action tensor([-1.8349, -0.3357, 0.6007, -0.1217]) tensor([0.0445, 0.1995, 0.5089, 0.2471]) -Greedy action tensor([-1.8831, -0.4862, 0.6517, -0.1421]) tensor([0.0428, 0.1731, 0.5400, 0.2441]) -Greedy action tensor([-1.3680, 0.2521, 0.3332, -0.0227]) tensor([0.0650, 0.3287, 0.3565, 0.2497]) -Greedy action tensor([-0.2634, 0.2978, 0.2097, 0.4358]) tensor([0.1570, 0.2752, 0.2520, 0.3159]) -Greedy action tensor([-1.8057, -0.4776, 0.6679, -0.0222]) tensor([0.0443, 0.1671, 0.5252, 0.2634]) -Greedy action tensor([-1.7434, -0.4808, 0.5603, -0.0212]) tensor([0.0496, 0.1755, 0.4970, 0.2779]) -Greedy action tensor([-1.5916, -0.6005, 0.4705, -0.0454]) tensor([0.0615, 0.1658, 0.4838, 0.2888]) -Greedy action tensor([-1.7172, -0.4907, 0.6739, 0.1726]) tensor([0.0456, 0.1553, 0.4977, 0.3015]) -Greedy action tensor([-1.7785, -0.4824, 0.6033, -0.0403]) tensor([0.0472, 0.1727, 0.5114, 0.2687]) -Greedy action tensor([-1.7235, -0.0268, 0.5035, -0.0523]) tensor([0.0475, 0.2592, 0.4406, 0.2527]) -Greedy action tensor([-1.8952, -0.4431, 0.6431, -0.1548]) tensor([0.0423, 0.1808, 0.5357, 0.2412]) -Greedy action tensor([-1.8677, -0.4624, 0.6221, -0.1416]) tensor([0.0439, 0.1792, 0.5300, 0.2469]) -Greedy action tensor([-0.9407, -0.5783, 0.2492, 0.3005]) tensor([0.1089, 0.1565, 0.3579, 0.3767]) -Greedy action tensor([-1.8982, -0.3431, 0.6379, -0.1414]) tensor([0.0414, 0.1960, 0.5228, 0.2398]) -Greedy action tensor([-1.8605, -0.4621, 0.6353, -0.1234]) tensor([0.0437, 0.1771, 0.5307, 0.2485]) -Greedy action tensor([-1.5769, -0.6175, 0.6124, 0.2885]) tensor([0.0526, 0.1374, 0.4700, 0.3400]) -Greedy action tensor([-1.7701, -0.4637, 0.6074, -0.0501]) tensor([0.0475, 0.1754, 0.5119, 0.2652]) -Greedy action tensor([-1.6408, -0.4169, 0.6328, 0.1859]) tensor([0.0492, 0.1673, 0.4779, 0.3056]) -Greedy action tensor([-1.8874, -0.4832, 0.6491, -0.1457]) tensor([0.0427, 0.1739, 0.5396, 0.2437]) -Greedy action tensor([-1.9103, -0.4296, 0.6452, -0.1652]) tensor([0.0417, 0.1832, 0.5366, 0.2386]) -Greedy action tensor([-0.7465, -0.5426, 0.2540, 0.1957]) tensor([0.1331, 0.1632, 0.3621, 0.3416]) -Greedy action tensor([-1.9439, -0.4556, 0.6718, -0.1776]) tensor([0.0401, 0.1775, 0.5481, 0.2344]) -Greedy action tensor([-0.0108, 0.6011, 0.6183, 1.3922]) tensor([0.1138, 0.2099, 0.2135, 0.4629]) -Greedy action tensor([-1.9817, -0.8924, 0.6801, 0.1221]) tensor([0.0378, 0.1122, 0.5406, 0.3094]) -Greedy action tensor([-1.8959, -0.3741, 0.6359, -0.1470]) tensor([0.0418, 0.1916, 0.5261, 0.2405]) -Greedy action tensor([-1.8526, -0.4354, 0.6260, -0.1142]) tensor([0.0440, 0.1814, 0.5244, 0.2502]) -Greedy action tensor([-1.8629, -0.4501, 0.6435, -0.0987]) tensor([0.0431, 0.1770, 0.5284, 0.2515]) -Greedy action tensor([-1.8517, -0.4807, 0.6651, -0.0790]) tensor([0.0431, 0.1697, 0.5337, 0.2536]) -Greedy action tensor([-1.4520, -0.3034, 0.5753, 0.3065]) tensor([0.0570, 0.1797, 0.4327, 0.3307]) -Greedy action tensor([-0.5376, -0.3137, 0.9399, 1.6197]) tensor([0.0654, 0.0819, 0.2868, 0.5659]) -Greedy action tensor([-1.6950, -0.3226, 0.6408, 0.0837]) tensor([0.0472, 0.1860, 0.4875, 0.2793]) -Greedy action tensor([-0.3764, -0.2670, 0.2114, 0.2736]) tensor([0.1715, 0.1913, 0.3087, 0.3285]) -Greedy action tensor([-1.7787, -0.4707, 0.6184, 0.0072]) tensor([0.0462, 0.1708, 0.5076, 0.2755]) -Greedy action tensor([-1.8022, -0.4543, 0.5953, -0.1066]) tensor([0.0470, 0.1808, 0.5163, 0.2559]) -Greedy action tensor([-1.8346e+00, 1.4839e-03, 5.6710e-01, -1.7580e-01]) tensor([0.0424, 0.2661, 0.4685, 0.2229]) -Greedy action tensor([-1.7527, -0.1841, 0.5738, 0.0305]) tensor([0.0455, 0.2183, 0.4657, 0.2705]) -Greedy action tensor([-1.9155, -0.4280, 0.6495, -0.1672]) tensor([0.0414, 0.1831, 0.5379, 0.2377]) -Greedy action tensor([-1.9076, -0.4561, 0.6512, -0.1609]) tensor([0.0418, 0.1784, 0.5400, 0.2397]) -Greedy action tensor([-1.9032, -0.3100, 0.6250, -0.1560]) tensor([0.0413, 0.2034, 0.5181, 0.2372]) -Greedy action tensor([-1.8140, -0.3333, 0.6337, -0.1148]) tensor([0.0446, 0.1960, 0.5155, 0.2439]) -Greedy action tensor([-1.6159, -0.4588, 0.6216, 0.2208]) tensor([0.0504, 0.1604, 0.4726, 0.3165]) -Greedy action tensor([-1.0517, -0.6590, 1.0112, 1.3701]) tensor([0.0463, 0.0685, 0.3640, 0.5212]) -Greedy action tensor([-1.4277, -0.6170, 0.4458, 0.0354]) tensor([0.0710, 0.1598, 0.4624, 0.3068]) -Greedy action tensor([-0.6900, 0.4327, -0.2297, -0.2372]) tensor([0.1383, 0.4250, 0.2192, 0.2175]) -Greedy action tensor([-1.2314, -0.5629, 0.3120, 0.2971]) tensor([0.0817, 0.1594, 0.3823, 0.3767]) -Greedy action tensor([-1.6111, -0.3010, 0.5699, 0.0822]) tensor([0.0526, 0.1951, 0.4661, 0.2862]) -Greedy action tensor([-1.8834, -0.4422, 0.6392, -0.1458]) tensor([0.0428, 0.1808, 0.5332, 0.2432]) -Greedy action tensor([-1.5287, 0.0445, 0.4013, -0.0235]) tensor([0.0581, 0.2801, 0.4002, 0.2617]) -Greedy action tensor([-1.7113, -0.8986, 0.7130, -0.2164]) tensor([0.0526, 0.1186, 0.5942, 0.2346]) -Greedy action tensor([-0.9600, -0.6314, 0.2555, -0.5108]) tensor([0.1365, 0.1895, 0.4601, 0.2139]) -Greedy action tensor([-1.6336, -0.5368, 0.5848, 0.1066]) tensor([0.0530, 0.1586, 0.4867, 0.3017]) -Greedy action tensor([-1.6371, -0.3633, 0.5288, 0.0311]) tensor([0.0538, 0.1922, 0.4690, 0.2851]) -Greedy action tensor([-1.7816, -0.3953, 0.6454, -0.0180]) tensor([0.0451, 0.1805, 0.5111, 0.2633]) -Greedy action tensor([-1.8590, -0.3594, 0.6187, -0.1219]) tensor([0.0433, 0.1942, 0.5163, 0.2462]) -Greedy action tensor([-0.8513, -0.7777, 1.1806, 0.5826]) tensor([0.0719, 0.0774, 0.5488, 0.3018]) -Greedy action tensor([-1.7634, -0.6192, 0.6971, -0.4307]) tensor([0.0509, 0.1599, 0.5962, 0.1930]) -Greedy action tensor([-1.2621, -0.6703, 0.3755, 0.3900]) tensor([0.0759, 0.1372, 0.3906, 0.3963]) -Greedy action tensor([-1.8700, -0.7048, 1.6453, 0.8743]) tensor([0.0187, 0.0601, 0.6299, 0.2913]) -Greedy action tensor([-1.8969, -0.2528, 0.6459, -0.2354]) tensor([0.0414, 0.2143, 0.5263, 0.2180]) -Greedy action tensor([-1.9222, -0.4381, 0.6572, -0.1675]) tensor([0.0410, 0.1809, 0.5409, 0.2371]) -Greedy action tensor([ 1.7932, -0.6116, -0.3115, 0.0855]) tensor([0.7176, 0.0648, 0.0875, 0.1301]) -Greedy action tensor([ 1.4154, -0.4672, -0.2157, 0.4542]) tensor([0.5779, 0.0880, 0.1131, 0.2210]) -Greedy action tensor([ 0.5782, -0.2009, -0.0393, 0.0228]) tensor([0.3888, 0.1784, 0.2097, 0.2231]) -Greedy action tensor([ 1.0748, -0.4684, -0.2063, 0.2113]) tensor([0.5227, 0.1117, 0.1452, 0.2204]) -Greedy action tensor([ 1.4569, -0.2196, -0.5197, 0.6137]) tensor([0.5695, 0.1065, 0.0789, 0.2451]) -Greedy action tensor([ 1.5337, -0.8724, -0.5147, 0.1941]) tensor([0.6752, 0.0609, 0.0871, 0.1769]) -Greedy action tensor([ 1.7697, -0.3648, -0.4394, 0.8708]) tensor([0.6116, 0.0724, 0.0672, 0.2489]) -Greedy action tensor([ 1.3103, -0.7022, -0.1055, 0.5809]) tensor([0.5381, 0.0719, 0.1306, 0.2594]) -Greedy action tensor([ 1.4263, -0.2146, -0.3186, 0.1056]) tensor([0.6115, 0.1185, 0.1068, 0.1632]) -Greedy action tensor([ 1.4904, -0.3749, -0.1527, 0.6134]) tensor([0.5668, 0.0878, 0.1096, 0.2358]) -Greedy action tensor([ 1.1072, -0.3611, -0.3107, 0.5843]) tensor([0.4842, 0.1115, 0.1173, 0.2870]) -Greedy action tensor([ 1.4612, -0.4531, -0.3990, 0.5072]) tensor([0.5923, 0.0873, 0.0922, 0.2282]) -Greedy action tensor([ 1.6326, -0.8074, -0.3469, 0.5661]) tensor([0.6372, 0.0555, 0.0880, 0.2193]) -Greedy action tensor([ 1.5421, -0.5305, -0.5450, 0.1234]) tensor([0.6703, 0.0844, 0.0831, 0.1622]) -Greedy action tensor([ 1.0836, -0.1646, -0.4195, 0.3832]) tensor([0.4985, 0.1431, 0.1109, 0.2475]) -Greedy action tensor([ 1.3134, -0.4068, 0.1861, 0.1285]) tensor([0.5529, 0.0990, 0.1791, 0.1691]) -Greedy action tensor([ 0.8200, -0.6098, -0.1370, 0.4348]) tensor([0.4341, 0.1039, 0.1667, 0.2953]) -Greedy action tensor([ 0.8129, -0.1814, -0.2076, 0.1557]) tensor([0.4447, 0.1645, 0.1603, 0.2305]) -Greedy action tensor([ 1.5162, -0.4602, -0.2199, 0.5247]) tensor([0.5932, 0.0822, 0.1045, 0.2201]) -Greedy action tensor([ 0.7476, -0.3081, -0.1015, -0.3383]) tensor([0.4732, 0.1646, 0.2024, 0.1597]) -Greedy action tensor([ 1.1735, -0.1111, -0.1343, 0.1144]) tensor([0.5280, 0.1461, 0.1428, 0.1831]) -Greedy action tensor([ 0.7402, -0.0823, 0.0394, 0.2047]) tensor([0.3967, 0.1743, 0.1968, 0.2322]) -Greedy action tensor([ 1.2122, -0.7182, 0.1425, -0.0828]) tensor([0.5675, 0.0823, 0.1947, 0.1554]) -Greedy action tensor([ 1.6001, -0.6575, -0.1429, 0.3965]) tensor([0.6330, 0.0662, 0.1108, 0.1900]) -Greedy action tensor([ 1.7597, -0.3886, -0.3041, 0.4277]) tensor([0.6633, 0.0774, 0.0842, 0.1751]) -Greedy action tensor([ 0.9174, -0.1847, -0.3236, 0.3994]) tensor([0.4511, 0.1498, 0.1304, 0.2687]) -Greedy action tensor([ 0.3584, -0.3299, -0.4413, 0.0228]) tensor([0.3750, 0.1884, 0.1685, 0.2681]) -Greedy action tensor([ 1.8729, -0.6646, -0.3323, 0.2515]) tensor([0.7210, 0.0570, 0.0795, 0.1425]) -Greedy action tensor([ 1.0852, 0.1743, 0.0301, -0.0140]) tensor([0.4800, 0.1930, 0.1671, 0.1599]) -Greedy action tensor([ 1.6806, -0.8414, -0.5308, 0.0603]) tensor([0.7206, 0.0579, 0.0789, 0.1426]) -Greedy action tensor([ 1.2267, -1.0094, -0.3417, 0.7028]) tensor([0.5243, 0.0560, 0.1092, 0.3105]) -Greedy action tensor([ 1.2410, -0.5080, -0.4513, 0.2979]) tensor([0.5723, 0.0995, 0.1053, 0.2229]) -Greedy action tensor([ 2.5028, -1.4829, -0.3551, 0.8210]) tensor([0.7924, 0.0147, 0.0455, 0.1474]) -Greedy action tensor([ 2.0217, -0.2497, -1.0449, 0.3008]) tensor([0.7526, 0.0776, 0.0351, 0.1347]) -Greedy action tensor([ 1.0233, -0.6746, 0.0454, 0.2886]) tensor([0.4905, 0.0898, 0.1845, 0.2353]) -Greedy action tensor([ 1.5594, -0.3935, -0.6167, 0.3833]) tensor([0.6395, 0.0907, 0.0726, 0.1973]) -Greedy action tensor([ 1.9952, -0.9504, -0.2555, 0.8592]) tensor([0.6761, 0.0355, 0.0712, 0.2171]) -Greedy action tensor([ 0.9108, -0.4722, -0.2884, -0.1244]) tensor([0.5243, 0.1315, 0.1580, 0.1862]) -Greedy action tensor([ 1.0334, -0.4984, -0.4452, 0.5579]) tensor([0.4841, 0.1046, 0.1104, 0.3009]) -Greedy action tensor([ 1.9531, 0.4213, 0.0444, -0.0170]) tensor([0.6650, 0.1437, 0.0986, 0.0927]) -Greedy action tensor([ 1.3219, -0.1921, -0.3251, 0.4041]) tensor([0.5519, 0.1214, 0.1063, 0.2204]) -Greedy action tensor([ 0.7439, -0.5714, -0.1013, 0.3443]) tensor([0.4222, 0.1133, 0.1813, 0.2831]) -Greedy action tensor([ 1.2009, -0.2767, -0.2985, -0.0165]) tensor([0.5723, 0.1306, 0.1278, 0.1694]) -Greedy action tensor([ 1.8417, -0.1287, -0.4216, 0.4098]) tensor([0.6746, 0.0940, 0.0702, 0.1611]) -Greedy action tensor([ 1.5153, 0.0392, -0.9023, 0.4614]) tensor([0.6002, 0.1371, 0.0535, 0.2092]) -Greedy action tensor([ 1.6980, -1.1963, -0.1350, 0.0849]) tensor([0.7069, 0.0391, 0.1131, 0.1409]) -Greedy action tensor([ 1.5286, -0.2118, -0.6370, 0.4273]) tensor([0.6163, 0.1081, 0.0707, 0.2049]) -Greedy action tensor([ 1.4417, -0.1776, -0.2158, 0.2455]) tensor([0.5914, 0.1171, 0.1127, 0.1788]) -Greedy action tensor([ 1.1490, 0.1813, -0.6289, 0.0229]) tensor([0.5338, 0.2028, 0.0902, 0.1731]) -Greedy action tensor([ 0.7125, -0.0694, -0.1811, 0.3915]) tensor([0.3858, 0.1765, 0.1578, 0.2798]) -Greedy action tensor([ 1.0252, -0.3621, -0.1384, 0.2708]) tensor([0.4920, 0.1229, 0.1537, 0.2314]) -Greedy action tensor([ 1.3464, -0.5794, 0.0967, 0.2388]) tensor([0.5673, 0.0827, 0.1626, 0.1874]) -Greedy action tensor([ 2.0359, -0.5672, -0.3872, 0.1952]) tensor([0.7568, 0.0560, 0.0671, 0.1201]) -Greedy action tensor([ 1.6498, -0.2750, -0.2565, 0.5333]) tensor([0.6165, 0.0900, 0.0916, 0.2019]) -Greedy action tensor([ 1.3434, -0.0767, -0.1153, 0.2876]) tensor([0.5488, 0.1326, 0.1276, 0.1909]) -Greedy action tensor([ 1.2108, -0.3388, -0.3879, 0.7086]) tensor([0.4951, 0.1051, 0.1001, 0.2997]) -Greedy action tensor([ 1.5253, -0.4075, -0.6112, 0.4655]) tensor([0.6214, 0.0899, 0.0734, 0.2153]) -Greedy action tensor([ 1.8930, -0.7815, -0.2048, 0.6063]) tensor([0.6813, 0.0470, 0.0836, 0.1882]) -Greedy action tensor([ 1.1124, -0.4668, -0.1006, -0.0402]) tensor([0.5497, 0.1133, 0.1634, 0.1736]) -Greedy action tensor([ 1.6432, -0.4633, -0.1284, 0.2541]) tensor([0.6489, 0.0790, 0.1104, 0.1618]) -Greedy action tensor([ 0.7647, -0.3852, 0.0596, 0.1704]) tensor([0.4233, 0.1340, 0.2091, 0.2336]) -Greedy action tensor([ 1.1408, -0.0624, -0.4696, 0.0637]) tensor([0.5433, 0.1631, 0.1086, 0.1850]) -Greedy action tensor([ 1.1756, -0.3895, -0.8199, 0.4241]) tensor([0.5505, 0.1151, 0.0748, 0.2596]) -Greedy action tensor([ 1.3395, -0.4906, -0.2018, 0.6029]) tensor([0.5396, 0.0866, 0.1155, 0.2583]) -Greedy action tensor([ 1.4040, -0.4542, -0.7995, 0.4691]) tensor([0.6028, 0.0940, 0.0666, 0.2367]) -Greedy action tensor([ 0.5734, -0.6302, -0.2224, 0.2915]) tensor([0.3991, 0.1198, 0.1801, 0.3011]) -Greedy action tensor([ 1.8550, -0.4233, -0.4470, 1.0143]) tensor([0.6120, 0.0627, 0.0612, 0.2640]) -Greedy action tensor([ 0.9694, -0.2036, -0.1532, 0.0211]) tensor([0.4945, 0.1530, 0.1609, 0.1916]) -Greedy action tensor([ 1.5288, 0.1848, -0.7253, 0.3253]) tensor([0.6003, 0.1565, 0.0630, 0.1802]) -Greedy action tensor([ 2.0577, -1.0319, -0.1565, 0.4731]) tensor([0.7354, 0.0335, 0.0803, 0.1508]) -Greedy action tensor([ 1.4316, -0.4739, -0.3310, 0.1739]) tensor([0.6232, 0.0927, 0.1069, 0.1772]) -Greedy action tensor([ 1.5735, -0.3197, -0.4236, 0.0334]) tensor([0.6664, 0.1003, 0.0904, 0.1428]) -Greedy action tensor([ 0.7919, -0.4204, 0.3170, 0.0290]) tensor([0.4192, 0.1247, 0.2607, 0.1954]) -Greedy action tensor([ 1.1750, -0.6053, -0.3084, 0.3156]) tensor([0.5498, 0.0927, 0.1247, 0.2328]) -Greedy action tensor([ 1.5697, -0.4959, -0.2992, 0.2039]) tensor([0.6510, 0.0825, 0.1004, 0.1661]) -Greedy action tensor([ 1.5466, -0.7232, 0.0409, 0.2703]) tensor([0.6234, 0.0644, 0.1383, 0.1739]) -Greedy action tensor([ 1.5151, -0.2874, -0.2039, 0.7405]) tensor([0.5540, 0.0914, 0.0993, 0.2553]) -Greedy action tensor([ 0.8217, -0.1094, -0.2115, 0.0072]) tensor([0.4560, 0.1797, 0.1623, 0.2020]) -Greedy action tensor([ 1.5864, -0.1908, -1.0911, -0.2091]) tensor([0.7123, 0.1205, 0.0490, 0.1183]) -Greedy action tensor([ 0.9909, -0.6121, -0.1517, 0.3622]) tensor([0.4870, 0.0980, 0.1553, 0.2597]) -Greedy action tensor([ 2.1095, -1.2298, 0.0497, 0.4255]) tensor([0.7415, 0.0263, 0.0945, 0.1377]) -Greedy action tensor([ 0.7895, -0.3463, -0.0023, -0.0750]) tensor([0.4555, 0.1463, 0.2063, 0.1919]) -Greedy action tensor([ 0.9120, -0.5116, -0.1600, -0.1931]) tensor([0.5224, 0.1258, 0.1788, 0.1730]) -Greedy action tensor([ 0.6171, -0.6998, -0.0983, -0.3548]) tensor([0.4683, 0.1255, 0.2290, 0.1772]) -Greedy action tensor([ 0.6225, 0.1754, -0.1905, 0.1616]) tensor([0.3685, 0.2356, 0.1634, 0.2324]) -Greedy action tensor([ 0.6299, -0.5594, -0.0090, -0.1733]) tensor([0.4385, 0.1335, 0.2315, 0.1964]) -Greedy action tensor([ 0.4464, -0.5482, 0.0478, -0.6843]) tensor([0.4230, 0.1565, 0.2840, 0.1366]) -Greedy action tensor([ 0.4754, -0.1290, -0.1049, -0.1810]) tensor([0.3810, 0.2082, 0.2132, 0.1976]) -Greedy action tensor([ 0.4098, -0.2871, 0.0342, -0.3206]) tensor([0.3750, 0.1868, 0.2576, 0.1806]) -Greedy action tensor([ 0.7474, -0.3421, -0.0772, -0.4094]) tensor([0.4786, 0.1610, 0.2098, 0.1505]) -Greedy action tensor([ 0.4854, -0.1388, -0.0806, -0.4345]) tensor([0.3997, 0.2141, 0.2269, 0.1593]) -Greedy action tensor([ 0.6497, -0.4106, -0.0134, -0.2505]) tensor([0.4409, 0.1527, 0.2272, 0.1792]) -Greedy action tensor([ 0.5434, -0.0475, 0.0006, 0.0490]) tensor([0.3643, 0.2018, 0.2117, 0.2222]) -Greedy action tensor([ 1.2177, -0.9185, -0.0883, -0.9197]) tensor([0.6636, 0.0784, 0.1798, 0.0783]) -Greedy action tensor([ 0.5155, 0.0126, 0.0431, -0.0825]) tensor([0.3599, 0.2177, 0.2244, 0.1979]) -Greedy action tensor([ 0.1420, 0.0056, -0.0311, -0.2157]) tensor([0.2930, 0.2556, 0.2464, 0.2049]) -Greedy action tensor([ 0.4329, -0.4256, -0.0242, -0.3808]) tensor([0.4000, 0.1695, 0.2532, 0.1773]) -Greedy action tensor([ 0.6974, -0.2939, 0.0275, -0.4291]) tensor([0.4531, 0.1681, 0.2319, 0.1469]) -Greedy action tensor([ 0.2640, 0.0325, -0.1552, -0.2544]) tensor([0.3283, 0.2604, 0.2158, 0.1955]) -Greedy action tensor([ 0.6159, -0.3842, -0.1256, -0.2717]) tensor([0.4433, 0.1631, 0.2112, 0.1825]) -Greedy action tensor([ 0.3597, -0.2489, -0.0819, -0.3702]) tensor([0.3747, 0.2039, 0.2409, 0.1806]) -Greedy action tensor([ 0.7318, -0.8850, -0.2282, -0.2859]) tensor([0.5147, 0.1022, 0.1971, 0.1860]) -Greedy action tensor([ 0.5131, -0.2504, -0.0166, -0.2664]) tensor([0.3979, 0.1854, 0.2342, 0.1825]) -Greedy action tensor([ 0.3723, -0.1462, -0.0102, -0.1028]) tensor([0.3449, 0.2054, 0.2353, 0.2145]) -Greedy action tensor([ 0.8801, -0.4678, -0.0339, -0.3357]) tensor([0.5109, 0.1327, 0.2048, 0.1515]) -Greedy action tensor([ 1.0093, -0.7756, 0.1516, -0.5180]) tensor([0.5528, 0.0928, 0.2345, 0.1200]) -Greedy action tensor([ 0.8840, -0.4078, -0.0825, -0.5433]) tensor([0.5277, 0.1450, 0.2007, 0.1266]) -Greedy action tensor([ 0.5119, -0.3417, -0.0508, -0.0896]) tensor([0.3932, 0.1674, 0.2240, 0.2154]) -Greedy action tensor([ 0.3674, -0.0287, -0.1421, 0.0084]) tensor([0.3365, 0.2264, 0.2022, 0.2350]) -Greedy action tensor([ 0.2056, 0.0836, 0.0893, -0.2505]) tensor([0.2933, 0.2596, 0.2611, 0.1859]) -Greedy action tensor([ 0.6555, -0.4537, -0.0947, -0.2418]) tensor([0.4525, 0.1493, 0.2137, 0.1845]) -Greedy action tensor([ 0.5445, -0.0262, -0.0573, -0.1804]) tensor([0.3850, 0.2176, 0.2109, 0.1865]) -Greedy action tensor([ 0.5664, 0.0465, -0.0853, -0.0399]) tensor([0.3758, 0.2234, 0.1958, 0.2049]) -Greedy action tensor([ 0.7917, -0.3039, 0.0535, -0.3999]) tensor([0.4726, 0.1580, 0.2259, 0.1435]) -Greedy action tensor([ 0.5509, -0.6613, -0.1332, -0.2363]) tensor([0.4430, 0.1318, 0.2235, 0.2016]) -Greedy action tensor([ 0.9706, -0.4361, -0.0606, -0.2859]) tensor([0.5302, 0.1299, 0.1891, 0.1509]) -Greedy action tensor([ 0.9416, -0.8047, 0.0293, -0.5131]) tensor([0.5526, 0.0964, 0.2219, 0.1290]) -Greedy action tensor([ 0.4149, -0.2079, -0.0023, -0.1771]) tensor([0.3638, 0.1952, 0.2397, 0.2013]) -Greedy action tensor([ 0.7108, -0.3756, 0.0384, -0.3577]) tensor([0.4563, 0.1540, 0.2330, 0.1568]) -Greedy action tensor([ 0.9465, -0.5458, 0.0423, -0.4270]) tensor([0.5311, 0.1194, 0.2150, 0.1345]) -Greedy action tensor([ 0.7786, -0.7497, -0.1175, -0.1941]) tensor([0.4992, 0.1083, 0.2038, 0.1887]) -Greedy action tensor([ 0.9109, -0.5365, 0.0362, -0.2960]) tensor([0.5125, 0.1205, 0.2137, 0.1533]) -Greedy action tensor([ 0.6872, -0.0803, -0.0787, -0.3956]) tensor([0.4410, 0.2047, 0.2050, 0.1493]) -Greedy action tensor([ 0.9689, -0.9359, -0.0334, -0.3940]) tensor([0.5644, 0.0840, 0.2072, 0.1444]) -Greedy action tensor([ 0.3129, -0.0262, -0.1876, -0.1707]) tensor([0.3407, 0.2427, 0.2065, 0.2101]) -Greedy action tensor([ 0.9047, -0.6262, 0.1187, -0.3955]) tensor([0.5143, 0.1113, 0.2343, 0.1401]) -Greedy action tensor([ 0.6104, -0.3696, 0.0352, -0.6003]) tensor([0.4472, 0.1679, 0.2516, 0.1333]) -Greedy action tensor([ 0.5976, -0.3676, -0.4172, -0.4755]) tensor([0.4795, 0.1827, 0.1738, 0.1640]) -Greedy action tensor([ 0.7084, -0.2617, -0.0236, -0.3627]) tensor([0.4540, 0.1721, 0.2184, 0.1556]) -Greedy action tensor([ 0.4852, -0.4426, 0.1317, -0.4635]) tensor([0.4024, 0.1591, 0.2826, 0.1558]) -Greedy action tensor([ 0.7160, -0.5351, 0.1646, -0.5412]) tensor([0.4658, 0.1333, 0.2684, 0.1325]) -Greedy action tensor([ 0.5699, -0.3229, -0.0024, -0.2370]) tensor([0.4132, 0.1692, 0.2332, 0.1844]) -Greedy action tensor([ 0.6499, -0.1651, 0.1456, -0.2478]) tensor([0.4075, 0.1804, 0.2461, 0.1661]) -Greedy action tensor([ 0.8101, -0.3964, -0.0015, -0.3545]) tensor([0.4865, 0.1456, 0.2161, 0.1518]) -Greedy action tensor([ 0.4795, -0.0318, -0.1355, -0.2911]) tensor([0.3842, 0.2304, 0.2077, 0.1778]) -Greedy action tensor([ 0.9315, -0.0446, -0.1768, -0.0825]) tensor([0.4832, 0.1820, 0.1595, 0.1753]) -Greedy action tensor([ 0.6176, -0.5644, -0.1524, -0.0501]) tensor([0.4381, 0.1343, 0.2029, 0.2247]) -Greedy action tensor([ 0.5128, -0.2243, 0.1093, -0.2117]) tensor([0.3801, 0.1819, 0.2539, 0.1842]) -Greedy action tensor([ 0.7268, -0.7639, -0.0049, -0.2983]) tensor([0.4842, 0.1091, 0.2330, 0.1737]) -Greedy action tensor([ 0.0708, 0.0908, 0.0472, -0.1415]) tensor([0.2628, 0.2681, 0.2566, 0.2125]) -Greedy action tensor([ 0.6314, -0.4070, -0.0345, -0.3385]) tensor([0.4450, 0.1576, 0.2287, 0.1687]) -Greedy action tensor([ 0.9541, -0.3785, -0.0999, -0.0184]) tensor([0.5024, 0.1325, 0.1751, 0.1900]) -Greedy action tensor([ 0.5644, -0.0963, -0.0979, -0.1605]) tensor([0.3974, 0.2052, 0.2049, 0.1925]) -Greedy action tensor([ 0.8137, -0.3848, 0.0296, -0.5775]) tensor([0.4983, 0.1503, 0.2275, 0.1240]) -Greedy action tensor([ 0.8436, -0.3757, 0.1856, -0.3458]) tensor([0.4722, 0.1395, 0.2445, 0.1437]) -Greedy action tensor([ 0.7525, -0.5817, -0.1080, -0.5589]) tensor([0.5113, 0.1347, 0.2163, 0.1378]) -Greedy action tensor([ 0.4909, 0.0718, -0.0699, 0.0327]) tensor([0.3496, 0.2299, 0.1995, 0.2211]) -Greedy action tensor([ 0.6409, -0.5524, -0.1590, -0.3772]) tensor([0.4731, 0.1434, 0.2126, 0.1709]) -Greedy action tensor([ 0.6543, 0.0201, -0.3132, -0.3062]) tensor([0.4361, 0.2313, 0.1657, 0.1669]) -Greedy action tensor([ 0.3886, 0.0395, -0.0286, -0.0036]) tensor([0.3290, 0.2320, 0.2168, 0.2222]) -Greedy action tensor([ 0.3299, -0.0205, -0.0971, -0.0712]) tensor([0.3304, 0.2327, 0.2156, 0.2212]) -Greedy action tensor([ 0.9423, -0.7490, 0.0766, -0.8667]) tensor([0.5653, 0.1042, 0.2379, 0.0926]) -Greedy action tensor([ 0.6714, -0.3486, -0.0398, -0.2560]) tensor([0.4450, 0.1605, 0.2185, 0.1760]) -Greedy action tensor([ 0.8080, -0.7698, -0.1323, -0.2084]) tensor([0.5105, 0.1054, 0.1994, 0.1847]) -Greedy action tensor([ 0.5242, -0.0979, -0.1015, -0.3454]) tensor([0.4015, 0.2155, 0.2148, 0.1683]) -Greedy action tensor([ 1.0321, -0.7413, -0.0739, -0.6996]) tensor([0.5961, 0.1012, 0.1972, 0.1055]) -Greedy action tensor([ 1.0158, -0.3098, -0.0561, -0.2884]) tensor([0.5321, 0.1413, 0.1822, 0.1444]) -Greedy action tensor([ 0.4828, -0.0979, -0.1114, -0.0049]) tensor([0.3669, 0.2053, 0.2025, 0.2253]) -Greedy action tensor([ 0.4626, -0.2091, -0.0521, -0.1433]) tensor([0.3768, 0.1925, 0.2252, 0.2056]) -Greedy action tensor([ 0.8431, -0.5701, 0.0918, -0.3100]) tensor([0.4924, 0.1198, 0.2323, 0.1554]) -Greedy action tensor([ 0.5062, -0.1566, -0.0679, -0.3534]) tensor([0.3997, 0.2060, 0.2251, 0.1692]) -Greedy action tensor([ 1.1636, -1.1701, 0.0527, -0.6729]) tensor([0.6307, 0.0611, 0.2077, 0.1005]) -Greedy action tensor([ 1.2383, -1.6525, 0.2392, 1.2483]) tensor([0.4109, 0.0228, 0.1513, 0.4150]) -Greedy action tensor([ 0.9582, -0.1710, 0.8714, 0.5327]) tensor([0.3456, 0.1117, 0.3169, 0.2258]) -Greedy action tensor([-0.1330, -1.0008, -0.2027, 1.9363]) tensor([0.0974, 0.0409, 0.0908, 0.7710]) -Greedy action tensor([-0.2881, -1.7237, 0.2580, 0.7264]) tensor([0.1748, 0.0416, 0.3017, 0.4820]) -Greedy action tensor([0.9895, 0.2564, 0.3478, 0.6390]) tensor([0.3689, 0.1772, 0.1942, 0.2598]) -Greedy action tensor([-0.3085, -1.0267, -0.9805, 0.0383]) tensor([0.2930, 0.1429, 0.1496, 0.4145]) -Greedy action tensor([ 0.2525, -1.5601, -1.1672, 0.7200]) tensor([0.3332, 0.0544, 0.0806, 0.5318]) -Greedy action tensor([-1.4091, -0.1697, 1.5431, -1.5176]) tensor([0.0408, 0.1410, 0.7816, 0.0366]) -Greedy action tensor([ 0.5927, 0.5996, -1.7034, -0.2826]) tensor([0.3962, 0.3989, 0.0399, 0.1651]) -Greedy action tensor([-0.4510, 0.8086, -0.6157, -0.6520]) tensor([0.1615, 0.5693, 0.1370, 0.1321]) -Greedy action tensor([ 2.0571, -0.8150, 0.4634, 0.4242]) tensor([0.6872, 0.0389, 0.1396, 0.1343]) -Greedy action tensor([ 1.2736, -0.4250, 1.4010, 0.5947]) tensor([0.3539, 0.0647, 0.4019, 0.1795]) -Greedy action tensor([1.0638, 0.3388, 0.4785, 0.9446]) tensor([0.3414, 0.1654, 0.1901, 0.3031]) -Greedy action tensor([ 1.4467, -0.2921, 0.3456, 0.6430]) tensor([0.5113, 0.0898, 0.1700, 0.2289]) -Greedy action tensor([ 1.6947, -2.0360, 0.3652, 1.2693]) tensor([0.5149, 0.0123, 0.1362, 0.3365]) -Greedy action tensor([-0.8482, -1.3286, -0.0935, -0.1618]) tensor([0.1745, 0.1079, 0.3711, 0.3466]) -Greedy action tensor([-0.7757, -1.0553, -0.0208, 0.2659]) tensor([0.1489, 0.1126, 0.3167, 0.4219]) -Greedy action tensor([ 1.6116, -0.5601, 0.4438, 0.3427]) tensor([0.5861, 0.0668, 0.1823, 0.1648]) -Greedy action tensor([ 6.6066e-04, -9.2828e-01, 4.9701e-01, -9.7769e-03]) tensor([0.2483, 0.0981, 0.4079, 0.2457]) -Greedy action tensor([ 1.6099, -1.5542, 0.2423, 0.7460]) tensor([0.5819, 0.0246, 0.1482, 0.2453]) -Greedy action tensor([1.5780, 1.2280, 0.5487, 0.0960]) tensor([0.4369, 0.3078, 0.1561, 0.0992]) -Greedy action tensor([-0.8309, -0.8112, -0.2431, 0.3048]) tensor([0.1442, 0.1471, 0.2596, 0.4490]) -Greedy action tensor([ 1.0484, 0.2453, -0.0303, 0.9813]) tensor([0.3672, 0.1645, 0.1249, 0.3434]) -Greedy action tensor([-0.3149, 0.2519, -0.0542, -0.6761]) tensor([0.2102, 0.3705, 0.2728, 0.1465]) -Greedy action tensor([-0.6733, -1.1149, 0.8293, -1.1055]) tensor([0.1474, 0.0948, 0.6622, 0.0957]) -Greedy action tensor([ 1.7093, -0.9559, 0.6260, 1.2394]) tensor([0.4918, 0.0342, 0.1665, 0.3075]) -Greedy action tensor([ 1.4166, -0.1928, -0.1093, 0.4186]) tensor([0.5599, 0.1120, 0.1217, 0.2064]) -Greedy action tensor([ 1.1233e+00, 8.5443e-01, 6.8188e-04, -1.1461e-01]) tensor([0.4202, 0.3212, 0.1368, 0.1219]) -Greedy action tensor([0.9311, 0.2534, 1.0547, 0.9298]) tensor([0.2749, 0.1396, 0.3110, 0.2745]) -Greedy action tensor([ 0.0794, 0.0749, 0.1288, -0.6707]) tensor([0.2842, 0.2829, 0.2986, 0.1342]) -Greedy action tensor([ 0.1548, 0.3005, 0.6014, -0.5012]) tensor([0.2359, 0.2729, 0.3687, 0.1224]) -Greedy action tensor([ 0.0764, 0.1652, -0.2999, 1.4720]) tensor([0.1467, 0.1603, 0.1007, 0.5923]) -Greedy action tensor([ 1.9593, 0.2666, 1.0549, -0.2287]) tensor([0.5879, 0.1082, 0.2380, 0.0659]) -Greedy action tensor([0.9438, 0.3971, 0.6336, 1.0784]) tensor([0.2893, 0.1675, 0.2122, 0.3310]) -Greedy action tensor([-0.0613, -0.8642, 0.8247, -0.9602]) tensor([0.2336, 0.1047, 0.5666, 0.0951]) -Greedy action tensor([-0.3774, 0.3279, -0.1359, 0.8588]) tensor([0.1292, 0.2616, 0.1645, 0.4448]) -Greedy action tensor([ 1.2484, -1.6026, 0.1697, 0.8621]) tensor([0.4814, 0.0278, 0.1637, 0.3271]) -Greedy action tensor([ 0.8993, 0.7071, 1.0549, -0.1979]) tensor([0.3005, 0.2480, 0.3511, 0.1003]) -Greedy action tensor([-0.1553, -1.2809, 0.6775, -0.3939]) tensor([0.2267, 0.0735, 0.5213, 0.1785]) -Greedy action tensor([-0.9792, -1.3040, -0.3915, 0.6734]) tensor([0.1144, 0.0827, 0.2059, 0.5971]) -Greedy action tensor([ 0.6718, -1.1075, -0.2523, 1.4154]) tensor([0.2725, 0.0460, 0.1082, 0.5733]) -Greedy action tensor([ 0.8885, -0.4348, 0.7516, 0.7033]) tensor([0.3368, 0.0897, 0.2937, 0.2798]) -Greedy action tensor([0.9523, 0.1323, 0.4460, 0.8473]) tensor([0.3397, 0.1496, 0.2048, 0.3059]) -Greedy action tensor([1.1444, 0.4109, 1.7292, 0.1113]) tensor([0.2754, 0.1323, 0.4943, 0.0980]) -Greedy action tensor([ 0.3848, 0.7966, 0.5684, -0.5990]) tensor([0.2448, 0.3695, 0.2941, 0.0915]) -Greedy action tensor([-0.0509, -0.4242, -0.4852, 0.9663]) tensor([0.1960, 0.1349, 0.1270, 0.5421]) -Greedy action tensor([ 0.6388, 0.2984, 0.9461, -0.3671]) tensor([0.2909, 0.2070, 0.3956, 0.1064]) -Greedy action tensor([ 0.8617, -1.8583, -0.0294, 1.0137]) tensor([0.3787, 0.0249, 0.1554, 0.4409]) -Greedy action tensor([-0.0994, 0.2070, -0.4427, 0.0846]) tensor([0.2342, 0.3182, 0.1661, 0.2815]) -Greedy action tensor([-0.3317, -0.3395, -0.6323, -0.2918]) tensor([0.2650, 0.2630, 0.1962, 0.2758]) -Greedy action tensor([-0.0112, -0.8352, 0.3606, 0.8595]) tensor([0.1895, 0.0831, 0.2748, 0.4526]) -Greedy action tensor([ 0.4082, -0.8906, 0.8792, 0.8476]) tensor([0.2259, 0.0616, 0.3619, 0.3506]) -Greedy action tensor([ 1.2107, -0.2431, 1.3140, 1.6445]) tensor([0.2574, 0.0601, 0.2854, 0.3971]) -Greedy action tensor([-0.3711, -0.8313, 0.0697, 0.0585]) tensor([0.2118, 0.1337, 0.3291, 0.3254]) -Greedy action tensor([0.9306, 0.6794, 0.2839, 0.4882]) tensor([0.3396, 0.2642, 0.1779, 0.2182]) -Greedy action tensor([-0.1413, -0.4708, 0.8161, -1.0227]) tensor([0.2110, 0.1518, 0.5498, 0.0874]) -Greedy action tensor([-0.1116, -0.5326, -0.5017, 0.7259]) tensor([0.2153, 0.1413, 0.1458, 0.4975]) -Greedy action tensor([-0.4762, -0.6908, 0.6477, 0.2688]) tensor([0.1431, 0.1154, 0.4402, 0.3013]) -Greedy action tensor([-0.2169, -0.7687, 0.4706, -0.3093]) tensor([0.2234, 0.1287, 0.4443, 0.2037]) -Greedy action tensor([-0.3890, -0.3445, 0.7907, 0.7592]) tensor([0.1183, 0.1237, 0.3850, 0.3730]) -Greedy action tensor([ 0.6203, -0.5809, 0.8850, -0.1956]) tensor([0.3283, 0.0988, 0.4278, 0.1452]) -Greedy action tensor([ 0.3550, 1.1758, 0.9798, -0.5041]) tensor([0.1797, 0.4084, 0.3357, 0.0761]) -Greedy action tensor([ 1.2374, 0.1137, 0.2735, -0.1271]) tensor([0.5097, 0.1657, 0.1944, 0.1302]) -Greedy action tensor([-0.8442, 0.0386, 1.1249, -0.5251]) tensor([0.0836, 0.2022, 0.5991, 0.1151]) -Greedy action tensor([ 1.4877, -0.1268, 0.6325, 0.5240]) tensor([0.4986, 0.0992, 0.2120, 0.1902]) -Greedy action tensor([ 0.5261, -0.6531, 0.3555, -0.5228]) tensor([0.3999, 0.1230, 0.3371, 0.1401]) -Greedy action tensor([ 0.5920, -1.6391, -0.3030, -0.1526]) tensor([0.5023, 0.0539, 0.2052, 0.2385]) -Greedy action tensor([ 0.4841, -2.1649, -0.3886, 0.7563]) tensor([0.3570, 0.0252, 0.1492, 0.4686]) -Greedy action tensor([ 0.7853, -0.0792, -0.7641, 0.2597]) tensor([0.4495, 0.1893, 0.0955, 0.2657]) -Greedy action tensor([ 0.4957, -0.0323, 1.2395, 0.6477]) tensor([0.2059, 0.1214, 0.4331, 0.2397]) -Greedy action tensor([ 2.4276, -0.5323, 0.7377, 1.5480]) tensor([0.6056, 0.0314, 0.1118, 0.2513]) -Greedy action tensor([-0.7760, -1.0591, 0.0257, -0.7271]) tensor([0.1987, 0.1497, 0.4430, 0.2087]) -Greedy action tensor([ 0.4442, -1.6197, -0.4592, 1.3883]) tensor([0.2437, 0.0309, 0.0988, 0.6265]) -Greedy action tensor([ 0.6477, 0.7072, -1.7678, 0.4653]) tensor([0.3351, 0.3557, 0.0299, 0.2792]) -Greedy action tensor([-0.5750, -2.3521, 0.3724, 0.5323]) tensor([0.1476, 0.0250, 0.3807, 0.4467]) -Greedy action tensor([ 1.8418, -1.4694, 0.6967, 0.9316]) tensor([0.5691, 0.0208, 0.1811, 0.2290]) -Greedy action tensor([-0.0594, -0.2735, -1.0734, 1.5740]) tensor([0.1371, 0.1107, 0.0498, 0.7024]) -Greedy action tensor([ 0.0403, -0.0753, 0.7938, -0.1099]) tensor([0.2051, 0.1827, 0.4357, 0.1765]) -Greedy action tensor([ 0.1721, -1.3015, 0.7505, 0.3809]) tensor([0.2356, 0.0540, 0.4201, 0.2903]) -Greedy action tensor([1.0541, 0.1856, 0.3863, 1.6391]) tensor([0.2683, 0.1126, 0.1376, 0.4816]) -Greedy action tensor([-1.0842, -0.7678, 1.3831, -0.0370]) tensor([0.0588, 0.0807, 0.6931, 0.1675]) -Greedy action tensor([ 1.3991, -0.2276, -0.2459, 0.5772]) tensor([0.5467, 0.1075, 0.1055, 0.2403]) -Greedy action tensor([ 2.1966, -1.0061, -0.1534, 0.5968]) tensor([0.7474, 0.0304, 0.0713, 0.1509]) -Greedy action tensor([ 0.9351, -0.3460, -0.0112, -0.1604]) tensor([0.4999, 0.1388, 0.1941, 0.1672]) -Greedy action tensor([ 1.1327, -0.5729, 0.0948, 0.0572]) tensor([0.5328, 0.0968, 0.1887, 0.1817]) -Greedy action tensor([ 1.7414, -0.6929, -0.4662, 0.0782]) tensor([0.7209, 0.0632, 0.0793, 0.1366]) -Greedy action tensor([ 1.3247, -0.5070, -0.3284, 0.4988]) tensor([0.5588, 0.0895, 0.1070, 0.2447]) -Greedy action tensor([ 1.3696, -0.3127, -0.4449, 0.2184]) tensor([0.6006, 0.1117, 0.0978, 0.1899]) -Greedy action tensor([ 1.5263, -0.5200, -0.4266, 0.2565]) tensor([0.6443, 0.0833, 0.0914, 0.1810]) -Greedy action tensor([ 0.9399, -0.2714, -0.0460, -0.0282]) tensor([0.4876, 0.1452, 0.1819, 0.1852]) -Greedy action tensor([ 1.3873, -0.4532, -0.1197, 0.2937]) tensor([0.5830, 0.0925, 0.1292, 0.1953]) -Greedy action tensor([ 1.1610, -0.5933, -0.1707, 0.2052]) tensor([0.5490, 0.0950, 0.1450, 0.2111]) -Greedy action tensor([ 2.3285, -0.2323, -0.6673, 0.4663]) tensor([0.7797, 0.0602, 0.0390, 0.1211]) -Greedy action tensor([ 0.9756, -0.0069, -0.9049, 0.2337]) tensor([0.4992, 0.1869, 0.0761, 0.2377]) -Greedy action tensor([ 0.7824, -0.5436, 0.0474, -0.1736]) tensor([0.4696, 0.1247, 0.2252, 0.1805]) -Greedy action tensor([ 2.5009, -1.0736, 0.0949, 0.8268]) tensor([0.7659, 0.0215, 0.0691, 0.1436]) -Greedy action tensor([ 1.9707, -0.2944, 0.0293, 0.2546]) tensor([0.7007, 0.0727, 0.1006, 0.1260]) -Greedy action tensor([ 1.2049, -0.5086, -0.3836, 0.7014]) tensor([0.5028, 0.0906, 0.1027, 0.3039]) -Greedy action tensor([ 1.8411, -0.6032, -0.4176, 0.3680]) tensor([0.7040, 0.0611, 0.0736, 0.1614]) -Greedy action tensor([ 1.3690, -0.7246, -0.5244, 0.2045]) tensor([0.6306, 0.0777, 0.0949, 0.1968]) -Greedy action tensor([ 1.5171, -0.4524, -0.4297, 0.5498]) tensor([0.6016, 0.0839, 0.0859, 0.2287]) -Greedy action tensor([ 2.0320, -0.6565, -0.4658, 0.3427]) tensor([0.7491, 0.0509, 0.0616, 0.1383]) -Greedy action tensor([ 1.0735, -0.4733, -0.4728, 0.5915]) tensor([0.4894, 0.1042, 0.1043, 0.3022]) -Greedy action tensor([ 1.6455, 0.0726, -0.5318, 0.8305]) tensor([0.5671, 0.1176, 0.0643, 0.2510]) -Greedy action tensor([ 1.4342, -0.5785, -0.3246, 0.0075]) tensor([0.6468, 0.0864, 0.1114, 0.1553]) -Greedy action tensor([ 1.6856, -0.7335, 0.0423, 0.2077]) tensor([0.6621, 0.0589, 0.1280, 0.1510]) -Greedy action tensor([ 0.8786, -0.4971, -0.0985, 0.0984]) tensor([0.4791, 0.1210, 0.1803, 0.2196]) -Greedy action tensor([ 0.3102, -0.2181, 0.1327, 0.3409]) tensor([0.2892, 0.1705, 0.2421, 0.2982]) -Greedy action tensor([ 1.0955, -0.6141, -0.1703, 0.4265]) tensor([0.5063, 0.0916, 0.1428, 0.2593]) -Greedy action tensor([ 0.9303, -0.3813, -0.4296, 0.4356]) tensor([0.4682, 0.1261, 0.1202, 0.2855]) -Greedy action tensor([ 0.6837, -0.2588, 0.1532, -0.0693]) tensor([0.4084, 0.1591, 0.2402, 0.1923]) -Greedy action tensor([ 0.9625, -0.0286, -0.0553, 0.0036]) tensor([0.4726, 0.1754, 0.1708, 0.1812]) -Greedy action tensor([ 1.8611, -0.5948, -0.3336, 0.9285]) tensor([0.6286, 0.0539, 0.0700, 0.2474]) -Greedy action tensor([ 1.5136, -0.5636, -0.2957, 0.5443]) tensor([0.5994, 0.0751, 0.0982, 0.2274]) -Greedy action tensor([ 1.4806, -0.4845, -0.5396, 0.2718]) tensor([0.6364, 0.0892, 0.0844, 0.1900]) -Greedy action tensor([ 2.5544, -0.9832, -0.3045, 0.6922]) tensor([0.8053, 0.0234, 0.0462, 0.1251]) -Greedy action tensor([ 1.7018, -0.2456, -0.4603, 0.5739]) tensor([0.6323, 0.0902, 0.0728, 0.2047]) -Greedy action tensor([ 1.3649, -0.3115, -0.2145, 0.2670]) tensor([0.5791, 0.1083, 0.1194, 0.1932]) -Greedy action tensor([ 1.1194, -0.3181, -0.2265, 0.3680]) tensor([0.5077, 0.1206, 0.1322, 0.2395]) -Greedy action tensor([ 1.5655, -0.6247, -0.0993, 0.2685]) tensor([0.6351, 0.0711, 0.1202, 0.1736]) -Greedy action tensor([ 1.1444, -0.3690, -0.4053, 0.3364]) tensor([0.5324, 0.1172, 0.1130, 0.2373]) -Greedy action tensor([ 1.2156, -0.5014, -0.2376, 0.2320]) tensor([0.5595, 0.1005, 0.1308, 0.2092]) -Greedy action tensor([ 0.4778, -0.1914, -0.0589, 0.0754]) tensor([0.3616, 0.1852, 0.2114, 0.2418]) -Greedy action tensor([ 0.9496, -0.5591, -0.0474, 0.1125]) tensor([0.4943, 0.1093, 0.1824, 0.2140]) -Greedy action tensor([ 1.8021, -0.0688, -0.7462, 0.0051]) tensor([0.7153, 0.1101, 0.0559, 0.1186]) -Greedy action tensor([ 1.0293, -0.2068, -0.4249, 0.3088]) tensor([0.4974, 0.1445, 0.1162, 0.2420]) -Greedy action tensor([ 1.6443, -0.2526, -0.2651, 0.2625]) tensor([0.6454, 0.0968, 0.0956, 0.1621]) -Greedy action tensor([ 0.6686, 0.0434, 0.0498, -0.1866]) tensor([0.4002, 0.2142, 0.2155, 0.1701]) -Greedy action tensor([ 1.2386, -0.4089, -0.4350, 0.1919]) tensor([0.5776, 0.1112, 0.1083, 0.2028]) -Greedy action tensor([ 1.5711, 0.1919, -0.4266, 0.1755]) tensor([0.6116, 0.1540, 0.0830, 0.1515]) -Greedy action tensor([ 1.5684, -0.8664, -0.0943, 0.2417]) tensor([0.6483, 0.0568, 0.1229, 0.1720]) -Greedy action tensor([ 1.2854, -0.1331, -0.5129, 0.3369]) tensor([0.5571, 0.1349, 0.0922, 0.2158]) -Greedy action tensor([ 1.4968, 0.4113, 0.2199, -0.1679]) tensor([0.5537, 0.1870, 0.1544, 0.1048]) -Greedy action tensor([ 0.6344, -0.4104, 0.0089, 0.1742]) tensor([0.3972, 0.1397, 0.2125, 0.2507]) -Greedy action tensor([ 1.7185, 0.3792, -0.5430, 0.1192]) tensor([0.6377, 0.1671, 0.0664, 0.1288]) -Greedy action tensor([ 1.5008, -0.4649, -0.2807, 0.1012]) tensor([0.6430, 0.0901, 0.1083, 0.1586]) -Greedy action tensor([ 1.0437, 0.0479, -0.1085, 0.4929]) tensor([0.4421, 0.1633, 0.1397, 0.2549]) -Greedy action tensor([ 1.7013, -0.2762, -0.7471, 0.8220]) tensor([0.6098, 0.0844, 0.0527, 0.2531]) -Greedy action tensor([ 1.1662, -0.4381, -0.7406, 0.7095]) tensor([0.5043, 0.1014, 0.0749, 0.3194]) -Greedy action tensor([ 1.5296, -0.5232, -0.3357, 0.3898]) tensor([0.6238, 0.0801, 0.0966, 0.1995]) -Greedy action tensor([ 1.6081, -0.5813, -0.3082, 0.3489]) tensor([0.6481, 0.0726, 0.0954, 0.1840]) -Greedy action tensor([ 1.2357, -0.6622, -0.5645, 0.4217]) tensor([0.5687, 0.0852, 0.0940, 0.2520]) -Greedy action tensor([ 1.9691, -0.9918, 0.2578, 0.4470]) tensor([0.6893, 0.0357, 0.1245, 0.1505]) -Greedy action tensor([ 1.6449, -0.8668, -0.5551, 1.1159]) tensor([0.5614, 0.0456, 0.0622, 0.3308]) -Greedy action tensor([ 1.0761, -0.2913, -0.7223, 0.4726]) tensor([0.5083, 0.1295, 0.0842, 0.2780]) -Greedy action tensor([ 2.0283, -0.7402, -0.4806, 0.8211]) tensor([0.6929, 0.0435, 0.0564, 0.2072]) -Greedy action tensor([ 1.8017, -0.7773, -0.4442, 0.6834]) tensor([0.6629, 0.0503, 0.0702, 0.2167]) -Greedy action tensor([ 1.2932, -0.4227, 0.1781, -0.0434]) tensor([0.5648, 0.1016, 0.1852, 0.1484]) -Greedy action tensor([ 1.2323, -0.1305, -0.2959, 0.1837]) tensor([0.5485, 0.1404, 0.1190, 0.1922]) -Greedy action tensor([ 1.7599, -0.6883, -0.0786, 0.3675]) tensor([0.6693, 0.0579, 0.1065, 0.1663]) -Greedy action tensor([ 1.6723, -0.3157, -0.6827, 0.2714]) tensor([0.6765, 0.0927, 0.0642, 0.1667]) -Greedy action tensor([ 0.8447, -0.4236, -0.5280, 0.4380]) tensor([0.4544, 0.1278, 0.1152, 0.3026]) -Greedy action tensor([ 1.6132, -0.5293, -0.2836, 0.5870]) tensor([0.6151, 0.0722, 0.0923, 0.2204]) -Greedy action tensor([ 1.4138, -0.0665, -0.9882, 0.5710]) tensor([0.5719, 0.1301, 0.0518, 0.2462]) -Greedy action tensor([ 1.4803, -0.3958, -0.3103, 0.1577]) tensor([0.6303, 0.0966, 0.1052, 0.1679]) -Greedy action tensor([ 1.1744, -0.5528, -0.2988, 0.1953]) tensor([0.5610, 0.0997, 0.1286, 0.2107]) -Greedy action tensor([ 1.2128, -0.2016, -0.3927, 0.1282]) tensor([0.5612, 0.1364, 0.1127, 0.1897]) -Greedy action tensor([ 1.3906, -0.2934, -0.5221, 0.2234]) tensor([0.6081, 0.1129, 0.0898, 0.1893]) -Greedy action tensor([ 1.0205, -0.2379, -0.7970, 0.6547]) tensor([0.4672, 0.1327, 0.0759, 0.3241]) -Greedy action tensor([ 1.1336, -0.4497, -0.2269, 0.1675]) tensor([0.5428, 0.1114, 0.1392, 0.2066]) -Greedy action tensor([ 1.1168, 0.0128, -0.3055, 0.1912]) tensor([0.5079, 0.1684, 0.1225, 0.2013]) -Greedy action tensor([ 1.7090, -0.5546, -0.1611, 0.3092]) tensor([0.6646, 0.0691, 0.1024, 0.1639]) -Greedy action tensor([-1.7131, -0.3988, 0.6720, 0.0042]) tensor([0.0473, 0.1760, 0.5134, 0.2633]) -Greedy action tensor([-1.8426, -0.4682, 0.6503, -0.0884]) tensor([0.0438, 0.1732, 0.5299, 0.2531]) -Greedy action tensor([-0.5945, -0.5098, 0.1673, 0.1320]) tensor([0.1588, 0.1728, 0.3401, 0.3283]) -Greedy action tensor([-1.6175, -0.4863, 0.5077, 0.0729]) tensor([0.0559, 0.1732, 0.4680, 0.3030]) -Greedy action tensor([-0.8857, 0.8114, 0.1344, 0.0993]) tensor([0.0840, 0.4583, 0.2329, 0.2248]) -Greedy action tensor([-1.4436, -0.1899, 0.5224, 0.3567]) tensor([0.0565, 0.1980, 0.4036, 0.3419]) -Greedy action tensor([-1.5874, -0.4831, 0.4976, 0.0482]) tensor([0.0582, 0.1755, 0.4679, 0.2985]) -Greedy action tensor([-0.4445, -0.0557, 0.1694, 0.2448]) tensor([0.1584, 0.2336, 0.2926, 0.3155]) -Greedy action tensor([-1.9054, -0.4246, 0.6414, -0.1576]) tensor([0.0418, 0.1839, 0.5341, 0.2402]) -Greedy action tensor([-1.9162, -0.4828, 0.7493, -0.0972]) tensor([0.0389, 0.1629, 0.5586, 0.2396]) -Greedy action tensor([-1.4458, -0.6014, 1.0420, 0.9723]) tensor([0.0376, 0.0875, 0.4527, 0.4222]) -Greedy action tensor([-1.7896, -0.3022, 0.5741, -0.0952]) tensor([0.0465, 0.2059, 0.4944, 0.2532]) -Greedy action tensor([-1.2564, -0.3883, 0.4457, -0.2184]) tensor([0.0855, 0.2038, 0.4692, 0.2415]) -Greedy action tensor([-1.4274, -0.4347, 0.4566, 0.1338]) tensor([0.0665, 0.1794, 0.4374, 0.3167]) -Greedy action tensor([-1.8304, -0.3670, 0.6103, -0.1042]) tensor([0.0446, 0.1927, 0.5121, 0.2506]) -Greedy action tensor([-1.0044, 0.4662, 0.4255, 1.0743]) tensor([0.0571, 0.2483, 0.2384, 0.4562]) -Greedy action tensor([-1.9148, -0.4299, 0.6494, -0.1680]) tensor([0.0414, 0.1829, 0.5381, 0.2376]) -Greedy action tensor([-1.8960, -0.4299, 0.6390, -0.1519]) tensor([0.0422, 0.1830, 0.5330, 0.2417]) -Greedy action tensor([-1.9448, -0.4522, 0.6654, -0.1819]) tensor([0.0402, 0.1788, 0.5467, 0.2343]) -Greedy action tensor([-1.8517, -0.3378, 0.6126, -0.0981]) tensor([0.0433, 0.1969, 0.5094, 0.2503]) -Greedy action tensor([-1.2530, -0.6459, 0.5098, -0.2068]) tensor([0.0869, 0.1594, 0.5064, 0.2473]) -Greedy action tensor([-1.5316, -0.5499, 0.4532, 0.1629]) tensor([0.0610, 0.1628, 0.4440, 0.3321]) -Greedy action tensor([-1.0191, -0.6262, 0.2402, 0.1646]) tensor([0.1079, 0.1598, 0.3800, 0.3523]) -Greedy action tensor([-1.6964, -0.3601, 0.5865, 0.0058]) tensor([0.0498, 0.1893, 0.4879, 0.2730]) -Greedy action tensor([-1.8291, -0.4463, 0.6110, -0.1205]) tensor([0.0455, 0.1813, 0.5220, 0.2512]) -Greedy action tensor([-1.5488, -0.3016, 0.4482, -0.0398]) tensor([0.0611, 0.2126, 0.4500, 0.2763]) -Greedy action tensor([-1.8217, -0.4796, 0.6178, -0.0657]) tensor([0.0453, 0.1733, 0.5193, 0.2622]) -Greedy action tensor([-1.9210, -0.4358, 0.6544, -0.1675]) tensor([0.0411, 0.1815, 0.5400, 0.2374]) -Greedy action tensor([-1.8878, -0.4044, 0.6318, -0.1403]) tensor([0.0424, 0.1870, 0.5271, 0.2435]) -Greedy action tensor([-1.9002, -0.4598, 0.6381, -0.1615]) tensor([0.0424, 0.1791, 0.5370, 0.2414]) -Greedy action tensor([-1.9071, -0.4331, 0.6452, -0.1627]) tensor([0.0418, 0.1825, 0.5365, 0.2392]) -Greedy action tensor([-1.8997, -0.3971, 0.6435, -0.1468]) tensor([0.0417, 0.1873, 0.5303, 0.2406]) -Greedy action tensor([-1.9029, -0.4512, 0.6441, -0.1621]) tensor([0.0421, 0.1799, 0.5378, 0.2402]) -Greedy action tensor([-1.7910, -0.3475, 0.6691, -0.0435]) tensor([0.0441, 0.1867, 0.5161, 0.2531]) -Greedy action tensor([-1.9119, -0.4527, 0.6789, -0.1409]) tensor([0.0408, 0.1755, 0.5441, 0.2397]) -Greedy action tensor([-1.3154, -0.2313, 0.3443, -0.0071]) tensor([0.0774, 0.2289, 0.4071, 0.2865]) -Greedy action tensor([-0.9121, -0.3413, 0.4960, 1.0398]) tensor([0.0719, 0.1273, 0.2941, 0.5066]) -Greedy action tensor([-1.1768, -0.5249, 0.2608, 0.2515]) tensor([0.0885, 0.1698, 0.3726, 0.3691]) -Greedy action tensor([-0.7087, -0.4848, 0.3511, 0.0805]) tensor([0.1363, 0.1705, 0.3933, 0.3000]) -Greedy action tensor([-1.6404, -0.5087, 0.5100, 0.0809]) tensor([0.0547, 0.1696, 0.4698, 0.3059]) -Greedy action tensor([-1.3456, -0.4795, 0.4646, 0.3727]) tensor([0.0664, 0.1578, 0.4057, 0.3701]) -Greedy action tensor([-1.7951, -0.3360, 0.6498, -0.0888]) tensor([0.0448, 0.1926, 0.5161, 0.2466]) -Greedy action tensor([-1.3482, -0.5825, 0.3649, 0.1371]) tensor([0.0763, 0.1640, 0.4229, 0.3368]) -Greedy action tensor([-1.7452, -0.3954, 0.5429, -0.0303]) tensor([0.0493, 0.1903, 0.4863, 0.2741]) -Greedy action tensor([-1.7961, -0.4121, 0.5836, -0.0799]) tensor([0.0468, 0.1869, 0.5058, 0.2605]) -Greedy action tensor([-1.8906, -0.4489, 0.7095, -0.0569]) tensor([0.0401, 0.1695, 0.5397, 0.2508]) -Greedy action tensor([-1.7988, -0.5007, 0.6018, -0.1006]) tensor([0.0473, 0.1731, 0.5214, 0.2583]) -Greedy action tensor([-1.9164, -0.4283, 0.6514, -0.1663]) tensor([0.0413, 0.1828, 0.5383, 0.2376]) -Greedy action tensor([-1.9412, -0.4395, 0.6639, -0.1775]) tensor([0.0402, 0.1806, 0.5444, 0.2347]) -Greedy action tensor([-1.3720, 0.1347, 0.5227, 0.4524]) tensor([0.0545, 0.2457, 0.3622, 0.3376]) -Greedy action tensor([-1.8453, -0.4051, 0.6134, -0.1247]) tensor([0.0444, 0.1876, 0.5196, 0.2484]) -Greedy action tensor([-1.6310, -0.4686, 0.6423, 0.2398]) tensor([0.0490, 0.1567, 0.4760, 0.3183]) -Greedy action tensor([-1.7046, -0.9033, 0.1578, -0.4542]) tensor([0.0760, 0.1693, 0.4893, 0.2653]) -Greedy action tensor([-1.7471, -0.3274, 0.5621, -0.0149]) tensor([0.0480, 0.1983, 0.4827, 0.2711]) -Greedy action tensor([-1.8793, -0.4543, 0.6401, -0.1471]) tensor([0.0430, 0.1790, 0.5347, 0.2433]) -Greedy action tensor([-1.8175, -0.5104, 0.5959, -0.1170]) tensor([0.0469, 0.1731, 0.5234, 0.2566]) -Greedy action tensor([-1.3894, -0.4659, 0.3973, 0.1171]) tensor([0.0714, 0.1799, 0.4264, 0.3222]) -Greedy action tensor([-1.9001, -0.3858, 0.6515, -0.1363]) tensor([0.0413, 0.1878, 0.5299, 0.2410]) -Greedy action tensor([-1.1082e+00, -1.5938e-01, 2.7748e-01, -4.4313e-04]) tensor([0.0943, 0.2435, 0.3769, 0.2854]) -Greedy action tensor([-1.3303, -0.3862, 0.7534, 0.6714]) tensor([0.0526, 0.1353, 0.4227, 0.3894]) -Greedy action tensor([-1.6419, -0.2434, 0.6326, 0.1003]) tensor([0.0488, 0.1977, 0.4747, 0.2788]) -Greedy action tensor([-1.6324, -0.5259, 0.4861, 0.0262]) tensor([0.0568, 0.1719, 0.4728, 0.2985]) -Greedy action tensor([-1.5105, 0.3383, 0.3918, 0.3226]) tensor([0.0492, 0.3128, 0.3300, 0.3079]) -Greedy action tensor([-1.7400, -0.2688, 0.6402, 0.0207]) tensor([0.0455, 0.1981, 0.4917, 0.2646]) -Greedy action tensor([-1.4848, 0.2803, 0.2988, 0.1144]) tensor([0.0564, 0.3293, 0.3354, 0.2789]) -Greedy action tensor([-1.7898, -0.4449, 0.7088, 0.0510]) tensor([0.0429, 0.1647, 0.5220, 0.2704]) -Greedy action tensor([-1.8037, -0.4452, 0.5972, -0.1028]) tensor([0.0467, 0.1818, 0.5155, 0.2560]) -Greedy action tensor([-1.5568, 0.3291, 0.3432, 0.0766]) tensor([0.0515, 0.3398, 0.3446, 0.2640]) -Greedy action tensor([-1.9255, -0.3861, 0.6446, -0.1684]) tensor([0.0408, 0.1901, 0.5328, 0.2363]) -Greedy action tensor([-1.9418, -0.4529, 0.6636, -0.1794]) tensor([0.0403, 0.1788, 0.5459, 0.2350]) -Greedy action tensor([-0.6667, -0.0766, -0.5105, -0.0194]) tensor([0.1700, 0.3067, 0.1987, 0.3247]) -Greedy action tensor([-1.7439, -0.2686, 0.5785, -0.0374]) tensor([0.0474, 0.2074, 0.4838, 0.2613]) -Greedy action tensor([-1.7204, -0.6528, 1.0523, 0.1494]) tensor([0.0379, 0.1102, 0.6062, 0.2457]) -Greedy action tensor([-0.8455, -0.5749, 0.5792, -0.0369]) tensor([0.1148, 0.1504, 0.4771, 0.2577]) -Greedy action tensor([-0.4472, 0.4506, 0.1269, -0.0243]) tensor([0.1480, 0.3633, 0.2628, 0.2259]) -Greedy action tensor([-1.8593, -0.4692, 0.6550, -0.0707]) tensor([0.0428, 0.1719, 0.5292, 0.2561]) -Greedy action tensor([-1.1651, -0.6566, 0.3103, 0.2718]) tensor([0.0889, 0.1479, 0.3889, 0.3742]) -Greedy action tensor([-1.8534, -0.4554, 0.6465, -0.0791]) tensor([0.0432, 0.1750, 0.5268, 0.2550]) -Greedy action tensor([-1.8564, -0.4842, 0.6219, -0.1287]) tensor([0.0445, 0.1753, 0.5300, 0.2502]) -Greedy action tensor([-1.8544, -0.4260, 0.6178, -0.1305]) tensor([0.0442, 0.1844, 0.5236, 0.2478]) -Greedy action tensor([-1.9050, -0.4604, 0.6538, -0.1578]) tensor([0.0418, 0.1774, 0.5406, 0.2401]) -Greedy action tensor([ 0.7889, -0.5354, -0.0344, -0.4844]) tensor([0.5038, 0.1340, 0.2212, 0.1410]) -Greedy action tensor([ 0.9688, -0.6189, -0.0995, -0.1446]) tensor([0.5329, 0.1089, 0.1831, 0.1750]) -Greedy action tensor([ 0.6987, -0.0547, -0.0319, 0.0646]) tensor([0.4028, 0.1896, 0.1940, 0.2136]) -Greedy action tensor([ 0.6079, -0.0303, 0.0153, -0.0754]) tensor([0.3867, 0.2043, 0.2138, 0.1953]) -Greedy action tensor([ 1.0229, -0.2019, -0.0919, -0.5094]) tensor([0.5441, 0.1599, 0.1785, 0.1176]) -Greedy action tensor([ 1.1239, -0.3858, 0.0188, -0.2573]) tensor([0.5545, 0.1225, 0.1836, 0.1393]) -Greedy action tensor([ 0.1486, 0.1771, -0.1152, -0.3278]) tensor([0.2926, 0.3010, 0.2247, 0.1817]) -Greedy action tensor([ 0.7251, -0.5046, -0.0271, -0.3596]) tensor([0.4758, 0.1391, 0.2243, 0.1608]) -Greedy action tensor([ 0.8420, -0.6292, -0.1360, -0.3340]) tensor([0.5224, 0.1200, 0.1965, 0.1612]) -Greedy action tensor([ 0.3919, -0.3615, -0.0395, -0.1506]) tensor([0.3701, 0.1743, 0.2404, 0.2152]) -Greedy action tensor([ 0.2486, -0.2560, -0.2005, -0.1662]) tensor([0.3445, 0.2080, 0.2199, 0.2276]) -Greedy action tensor([ 0.5509, -0.1970, -0.1000, -0.0797]) tensor([0.3957, 0.1873, 0.2064, 0.2106]) -Greedy action tensor([ 0.8344, -0.4932, 0.1373, -0.5122]) tensor([0.4942, 0.1310, 0.2462, 0.1286]) -Greedy action tensor([ 0.4652, -0.5242, 0.0287, -0.2685]) tensor([0.4003, 0.1488, 0.2587, 0.1922]) -Greedy action tensor([ 0.6873, -0.3376, 0.0050, -0.3871]) tensor([0.4534, 0.1627, 0.2292, 0.1548]) -Greedy action tensor([ 0.8692, -0.5775, -0.0599, -0.4369]) tensor([0.5260, 0.1238, 0.2077, 0.1425]) -Greedy action tensor([ 0.4108, -0.2124, -0.1177, -0.1267]) tensor([0.3690, 0.1979, 0.2175, 0.2156]) -Greedy action tensor([ 0.0145, 0.2062, -0.1291, -0.5392]) tensor([0.2738, 0.3317, 0.2372, 0.1574]) -Greedy action tensor([ 0.8766, -0.6768, 0.1357, -0.4000]) tensor([0.5083, 0.1075, 0.2423, 0.1418]) -Greedy action tensor([ 0.6112, -0.1611, -0.0584, -0.0686]) tensor([0.4031, 0.1862, 0.2064, 0.2043]) -Greedy action tensor([ 0.4686, -0.3410, -0.1803, -0.3544]) tensor([0.4155, 0.1849, 0.2171, 0.1825]) -Greedy action tensor([ 0.7611, -0.8053, -0.2159, -0.4283]) tensor([0.5292, 0.1105, 0.1992, 0.1611]) -Greedy action tensor([ 0.9335, -1.3955, 0.1302, -0.7845]) tensor([0.5798, 0.0565, 0.2597, 0.1040]) -Greedy action tensor([ 0.7451, -0.0374, -0.2624, -0.6094]) tensor([0.4807, 0.2198, 0.1755, 0.1240]) -Greedy action tensor([ 0.5125, -0.4335, -0.0463, -0.2560]) tensor([0.4126, 0.1602, 0.2359, 0.1913]) -Greedy action tensor([ 1.0618, -0.8221, 0.0977, -0.4783]) tensor([0.5722, 0.0870, 0.2182, 0.1227]) -Greedy action tensor([ 0.1761, -0.1989, 0.1720, -0.2576]) tensor([0.3002, 0.2063, 0.2989, 0.1945]) -Greedy action tensor([ 0.3795, -0.1493, -0.0406, -0.1199]) tensor([0.3505, 0.2066, 0.2303, 0.2127]) -Greedy action tensor([ 0.3803, 0.0067, -0.1271, -0.0932]) tensor([0.3433, 0.2363, 0.2067, 0.2138]) -Greedy action tensor([ 1.1381, -1.0636, -0.0283, -0.5593]) tensor([0.6230, 0.0689, 0.1940, 0.1141]) -Greedy action tensor([ 0.5849, -0.0789, -0.0573, -0.0473]) tensor([0.3887, 0.2002, 0.2045, 0.2066]) -Greedy action tensor([ 0.7533, -0.4152, -0.1590, -0.1535]) tensor([0.4725, 0.1469, 0.1898, 0.1908]) -Greedy action tensor([ 0.8574, -0.5250, -0.0054, -0.4642]) tensor([0.5156, 0.1294, 0.2175, 0.1375]) -Greedy action tensor([ 0.7389, -0.1232, -0.0956, -0.5317]) tensor([0.4679, 0.1976, 0.2031, 0.1313]) -Greedy action tensor([ 0.3143, -0.1084, -0.0292, -0.1392]) tensor([0.3333, 0.2184, 0.2364, 0.2118]) -Greedy action tensor([ 0.7954, -0.3719, 0.1936, -0.4078]) tensor([0.4631, 0.1441, 0.2537, 0.1390]) -Greedy action tensor([ 0.2714, 0.3979, -0.1292, 0.0880]) tensor([0.2749, 0.3120, 0.1842, 0.2289]) -Greedy action tensor([ 0.7302, -0.3216, 0.1791, -0.2158]) tensor([0.4322, 0.1510, 0.2491, 0.1678]) -Greedy action tensor([ 0.7336, -0.3924, -0.2561, -0.3950]) tensor([0.4952, 0.1606, 0.1841, 0.1602]) -Greedy action tensor([ 0.7493, -0.8782, -0.0283, -0.3591]) tensor([0.5035, 0.0989, 0.2314, 0.1662]) -Greedy action tensor([ 1.3017, -0.7033, 0.0485, -0.5374]) tensor([0.6332, 0.0853, 0.1808, 0.1007]) -Greedy action tensor([ 0.4204, -0.3530, -0.1326, -0.2686]) tensor([0.3939, 0.1818, 0.2266, 0.1978]) -Greedy action tensor([ 0.6097, -0.1927, -0.0341, -0.2942]) tensor([0.4204, 0.1885, 0.2209, 0.1703]) -Greedy action tensor([ 0.5358, -0.2571, -0.1207, -0.0377]) tensor([0.3945, 0.1785, 0.2046, 0.2223]) -Greedy action tensor([ 0.7960, -0.2401, 0.1140, -0.5191]) tensor([0.4697, 0.1667, 0.2375, 0.1261]) -Greedy action tensor([ 0.7890, -0.4121, -0.1713, -0.2614]) tensor([0.4918, 0.1480, 0.1882, 0.1720]) -Greedy action tensor([ 0.6350, -0.3169, -0.0732, -0.1668]) tensor([0.4297, 0.1659, 0.2117, 0.1927]) -Greedy action tensor([ 0.8213, -0.4608, -0.0136, -0.6280]) tensor([0.5139, 0.1426, 0.2230, 0.1206]) -Greedy action tensor([ 0.3839, 0.0817, -0.3043, -0.2036]) tensor([0.3575, 0.2642, 0.1796, 0.1987]) -Greedy action tensor([ 0.9958, -1.0452, 0.0160, -0.4276]) tensor([0.5727, 0.0744, 0.2150, 0.1380]) -Greedy action tensor([ 0.4181, -0.1155, -0.1353, -0.2427]) tensor([0.3734, 0.2190, 0.2147, 0.1928]) -Greedy action tensor([ 0.6856, -0.3820, -0.1656, -0.4487]) tensor([0.4779, 0.1643, 0.2040, 0.1537]) -Greedy action tensor([ 0.8944, -0.5189, 0.0060, -0.3679]) tensor([0.5161, 0.1256, 0.2123, 0.1461]) -Greedy action tensor([ 0.4504, 0.0327, -0.0286, -0.0537]) tensor([0.3470, 0.2285, 0.2149, 0.2096]) -Greedy action tensor([ 0.5572, -0.1991, -0.0436, -0.0320]) tensor([0.3887, 0.1825, 0.2132, 0.2157]) -Greedy action tensor([ 0.4286, -0.2446, -0.3300, -0.4594]) tensor([0.4184, 0.2134, 0.1960, 0.1722]) -Greedy action tensor([ 1.0780, -0.7392, 0.0386, -0.6932]) tensor([0.5930, 0.0964, 0.2097, 0.1009]) -Greedy action tensor([ 0.3189, -0.1418, -0.1406, -0.4046]) tensor([0.3640, 0.2296, 0.2299, 0.1765]) -Greedy action tensor([ 1.0278, -1.1264, -0.0463, -0.6123]) tensor([0.6055, 0.0702, 0.2068, 0.1174]) -Greedy action tensor([ 0.7930, -0.4585, 0.0947, -0.7712]) tensor([0.5018, 0.1436, 0.2496, 0.1050]) -Greedy action tensor([ 0.3312, 0.0070, 0.0047, -0.1952]) tensor([0.3295, 0.2382, 0.2377, 0.1946]) -Greedy action tensor([ 0.1700, 0.0307, -0.0398, -0.0519]) tensor([0.2872, 0.2499, 0.2329, 0.2301]) -Greedy action tensor([ 0.8141, -0.6104, -0.0713, -0.2063]) tensor([0.4966, 0.1195, 0.2049, 0.1790]) -Greedy action tensor([ 1.4192, -0.8833, -0.1646, -1.0925]) tensor([0.7213, 0.0721, 0.1480, 0.0585]) -Greedy action tensor([ 1.1163, -0.7086, -0.0289, -0.4284]) tensor([0.5908, 0.0952, 0.1879, 0.1260]) -Greedy action tensor([ 0.7134, -0.3434, -0.1139, -0.2639]) tensor([0.4627, 0.1608, 0.2023, 0.1741]) -Greedy action tensor([ 0.4341, -0.1949, 0.0550, -0.2209]) tensor([0.3654, 0.1948, 0.2501, 0.1898]) -Greedy action tensor([ 0.3608, 0.0123, -0.0896, -0.2675]) tensor([0.3476, 0.2453, 0.2216, 0.1855]) -Greedy action tensor([ 0.3971, -0.1005, -0.0555, -0.4565]) tensor([0.3745, 0.2277, 0.2382, 0.1595]) -Greedy action tensor([ 0.3795, -0.1822, -0.0646, -0.3928]) tensor([0.3740, 0.2133, 0.2399, 0.1728]) -Greedy action tensor([ 0.5116, -0.1639, -0.0624, -0.2470]) tensor([0.3936, 0.2003, 0.2217, 0.1844]) -Greedy action tensor([ 0.7277, -0.2954, 0.0407, -0.4767]) tensor([0.4624, 0.1662, 0.2326, 0.1387]) -Greedy action tensor([ 0.3887, -0.4421, -0.0894, -0.3461]) tensor([0.3944, 0.1719, 0.2445, 0.1892]) -Greedy action tensor([ 0.4884, -0.0712, -0.0413, -0.2008]) tensor([0.3756, 0.2146, 0.2212, 0.1886]) -Greedy action tensor([ 1.2512, -0.7443, -0.1357, -0.5680]) tensor([0.6460, 0.0878, 0.1614, 0.1048]) -Greedy action tensor([ 0.4210, -0.5725, -0.1226, -0.3938]) tensor([0.4178, 0.1547, 0.2426, 0.1850]) -Greedy action tensor([ 1.2330, -0.6094, 0.0965, -0.6295]) tensor([0.6118, 0.0969, 0.1963, 0.0950]) -Greedy action tensor([ 0.8903, -0.7602, -0.1146, -0.3089]) tensor([0.5378, 0.1032, 0.1969, 0.1621]) -Greedy action tensor([ 0.7689, -0.5424, -0.1172, -0.3075]) tensor([0.4944, 0.1332, 0.2038, 0.1685]) -Greedy action tensor([ 0.5160, -0.1412, 0.1540, -0.4198]) tensor([0.3836, 0.1988, 0.2671, 0.1505]) -Greedy action tensor([ 0.2722, -0.0711, -0.1646, -0.3316]) tensor([0.3446, 0.2444, 0.2226, 0.1884]) -Greedy action tensor([-0.4935, -0.1095, -0.0514, 0.0617]) tensor([0.1734, 0.2546, 0.2698, 0.3021]) -Greedy action tensor([ 0.4745, -0.1289, -0.0225, 0.5664]) tensor([0.3075, 0.1682, 0.1871, 0.3371]) -Greedy action tensor([-0.6030, 0.4122, 1.2065, -0.6246]) tensor([0.0922, 0.2545, 0.5631, 0.0902]) -Greedy action tensor([-1.0465, -0.5007, 1.3005, -0.6072]) tensor([0.0679, 0.1172, 0.7096, 0.1053]) -Greedy action tensor([ 1.4763, -0.5806, 1.4343, 0.3884]) tensor([0.4126, 0.0528, 0.3956, 0.1390]) -Greedy action tensor([-0.2757, -0.3381, -1.0546, -0.3896]) tensor([0.3039, 0.2855, 0.1395, 0.2712]) -Greedy action tensor([ 1.1345, -0.6580, 2.0573, 0.2991]) tensor([0.2429, 0.0405, 0.6113, 0.1054]) -Greedy action tensor([ 1.5620, -0.9571, 1.2552, 0.8092]) tensor([0.4372, 0.0352, 0.3217, 0.2059]) -Greedy action tensor([-0.3854, -0.6135, 1.0106, -0.1688]) tensor([0.1413, 0.1125, 0.5707, 0.1755]) -Greedy action tensor([ 1.2591, 0.1089, -0.0278, 0.6318]) tensor([0.4702, 0.1489, 0.1298, 0.2511]) -Greedy action tensor([ 0.4381, -0.2220, 0.4353, -0.0361]) tensor([0.3188, 0.1648, 0.3180, 0.1984]) -Greedy action tensor([-0.4671, -1.6745, 2.3345, 0.4272]) tensor([0.0495, 0.0148, 0.8148, 0.1210]) -Greedy action tensor([0.8531, 0.1898, 0.1038, 0.1748]) tensor([0.4007, 0.2065, 0.1894, 0.2034]) -Greedy action tensor([ 0.1316, -0.4165, -0.0696, -0.2378]) tensor([0.3240, 0.1872, 0.2649, 0.2239]) -Greedy action tensor([-0.1698, -1.3161, -0.0504, 1.1620]) tensor([0.1605, 0.0510, 0.1808, 0.6078]) -Greedy action tensor([ 0.5279, -0.1038, -0.4353, 0.2147]) tensor([0.3782, 0.2011, 0.1443, 0.2765]) -Greedy action tensor([-0.3762, -0.3496, 0.3117, -0.1684]) tensor([0.1906, 0.1957, 0.3792, 0.2346]) -Greedy action tensor([ 0.4884, -0.2723, 0.3067, 1.5593]) tensor([0.1916, 0.0895, 0.1598, 0.5591]) -Greedy action tensor([-0.2578, 0.3735, 0.6987, -0.4806]) tensor([0.1592, 0.2992, 0.4142, 0.1274]) -Greedy action tensor([-0.2686, -0.9419, -0.6688, -0.0612]) tensor([0.2932, 0.1495, 0.1965, 0.3608]) -Greedy action tensor([ 0.0722, -0.5624, 0.7749, -0.3627]) tensor([0.2383, 0.1263, 0.4811, 0.1542]) -Greedy action tensor([-1.0173, -0.6341, 0.6914, 0.4088]) tensor([0.0823, 0.1207, 0.4544, 0.3426]) -Greedy action tensor([ 1.0425, 0.5353, -0.0393, 0.6782]) tensor([0.3794, 0.2285, 0.1286, 0.2636]) -Greedy action tensor([ 0.3802, 0.2572, -0.1861, 0.0564]) tensor([0.3149, 0.2785, 0.1788, 0.2278]) -Greedy action tensor([1.2453, 0.2386, 1.1584, 0.2011]) tensor([0.3796, 0.1387, 0.3480, 0.1336]) -Greedy action tensor([-1.3101, -0.3872, -1.1608, -0.0625]) tensor([0.1226, 0.3084, 0.1423, 0.4267]) -Greedy action tensor([-0.0589, -1.1441, 0.2403, 0.6523]) tensor([0.2117, 0.0715, 0.2856, 0.4312]) -Greedy action tensor([-0.8522, -1.3670, 0.0335, -0.1807]) tensor([0.1672, 0.0999, 0.4055, 0.3273]) -Greedy action tensor([ 0.3978, -0.4776, 0.4768, 0.8230]) tensor([0.2482, 0.1034, 0.2686, 0.3797]) -Greedy action tensor([-0.8578, -1.3357, -1.2338, -0.2019]) tensor([0.2362, 0.1465, 0.1622, 0.4551]) -Greedy action tensor([ 1.4880, -1.5269, 0.9235, 0.7410]) tensor([0.4781, 0.0235, 0.2719, 0.2265]) -Greedy action tensor([ 0.3555, 0.3656, 0.8389, -0.6459]) tensor([0.2501, 0.2526, 0.4055, 0.0919]) -Greedy action tensor([ 1.0967, -0.0399, -0.2007, 0.4569]) tensor([0.4714, 0.1513, 0.1288, 0.2486]) -Greedy action tensor([-0.6729, -1.1946, 0.5670, -0.4039]) tensor([0.1573, 0.0934, 0.5435, 0.2058]) -Greedy action tensor([ 0.1778, -0.1710, 0.9351, -0.2355]) tensor([0.2222, 0.1568, 0.4739, 0.1470]) -Greedy action tensor([0.8216, 0.2553, 0.4732, 1.3381]) tensor([0.2532, 0.1437, 0.1787, 0.4244]) -Greedy action tensor([ 1.0982, -1.6991, 1.0648, 0.3495]) tensor([0.3998, 0.0244, 0.3867, 0.1891]) -Greedy action tensor([ 1.1799, -1.0321, 0.1281, 0.7062]) tensor([0.4804, 0.0526, 0.1678, 0.2992]) -Greedy action tensor([-0.1099, -1.3045, 0.1878, 0.7659]) tensor([0.1980, 0.0600, 0.2667, 0.4754]) -Greedy action tensor([1.1411, 0.3933, 0.3078, 0.9883]) tensor([0.3615, 0.1711, 0.1571, 0.3103]) -Greedy action tensor([ 0.1408, -0.8078, 1.9285, -0.1655]) tensor([0.1235, 0.0478, 0.7378, 0.0909]) -Greedy action tensor([-0.2542, -1.3218, -0.4273, -0.1850]) tensor([0.3071, 0.1056, 0.2583, 0.3291]) -Greedy action tensor([ 0.6185, 0.4237, -0.2020, 0.1645]) tensor([0.3450, 0.2840, 0.1519, 0.2191]) -Greedy action tensor([-0.3091, -0.7376, 1.3224, 0.6456]) tensor([0.1068, 0.0696, 0.5461, 0.2775]) -Greedy action tensor([ 0.6353, -1.5517, 0.0776, -0.1376]) tensor([0.4659, 0.0523, 0.2667, 0.2151]) -Greedy action tensor([-2.1829, -0.5824, 2.2715, -0.6903]) tensor([0.0104, 0.0514, 0.8921, 0.0461]) -Greedy action tensor([ 1.6929, -0.7379, -0.1791, 1.3415]) tensor([0.5140, 0.0452, 0.0791, 0.3617]) -Greedy action tensor([ 0.1000, 0.2541, -0.8128, -0.2374]) tensor([0.3047, 0.3555, 0.1223, 0.2175]) -Greedy action tensor([-0.0641, -0.5629, -0.1853, 0.2244]) tensor([0.2613, 0.1587, 0.2315, 0.3486]) -Greedy action tensor([ 0.4982, -0.5990, -0.0508, 0.1815]) tensor([0.3788, 0.1264, 0.2188, 0.2760]) -Greedy action tensor([ 1.6101, -0.7604, 1.2376, 0.5185]) tensor([0.4721, 0.0441, 0.3253, 0.1585]) -Greedy action tensor([-0.4788, -0.0508, -0.2953, 0.9402]) tensor([0.1271, 0.1950, 0.1527, 0.5252]) -Greedy action tensor([ 0.7777, -1.5544, -0.0611, 1.2047]) tensor([0.3266, 0.0317, 0.1412, 0.5005]) -Greedy action tensor([-0.5392, 0.1710, 1.1880, -0.4965]) tensor([0.1031, 0.2097, 0.5797, 0.1076]) -Greedy action tensor([ 0.0044, -1.6122, 0.8437, -0.9867]) tensor([0.2574, 0.0511, 0.5959, 0.0955]) -Greedy action tensor([ 0.7339, -1.0723, 0.5086, 1.3202]) tensor([0.2660, 0.0437, 0.2123, 0.4780]) -Greedy action tensor([ 0.8743, -0.0693, 1.1144, 0.8807]) tensor([0.2727, 0.1061, 0.3467, 0.2745]) -Greedy action tensor([ 1.5918e-01, -2.8138e-01, 2.9346e-04, 5.7829e-01]) tensor([0.2489, 0.1602, 0.2124, 0.3785]) -Greedy action tensor([-1.4493, 0.7960, -0.0084, -0.0501]) tensor([0.0534, 0.5045, 0.2257, 0.2164]) -Greedy action tensor([-0.4225, -0.4733, 1.3084, -1.0098]) tensor([0.1227, 0.1166, 0.6926, 0.0682]) -Greedy action tensor([ 1.2683, 0.7001, -0.1956, -0.3596]) tensor([0.5015, 0.2841, 0.1160, 0.0985]) -Greedy action tensor([ 0.9038, -1.0138, -0.3359, 1.0582]) tensor([0.3841, 0.0564, 0.1112, 0.4483]) -Greedy action tensor([1.0826, 0.1293, 0.7643, 1.0951]) tensor([0.3200, 0.1233, 0.2327, 0.3240]) -Greedy action tensor([ 1.3030, -1.4178, 0.7705, 0.4910]) tensor([0.4769, 0.0314, 0.2800, 0.2117]) -Greedy action tensor([ 0.2244, -2.3352, 0.0062, 1.1842]) tensor([0.2226, 0.0172, 0.1790, 0.5812]) -Greedy action tensor([0.7767, 0.1418, 0.3992, 1.0760]) tensor([0.2805, 0.1487, 0.1923, 0.3784]) -Greedy action tensor([-0.9153, -1.1740, -0.3977, -0.6417]) tensor([0.2099, 0.1620, 0.3522, 0.2759]) -Greedy action tensor([-1.5237, -1.4565, 2.1246, 0.0826]) tensor([0.0220, 0.0235, 0.8448, 0.1096]) -Greedy action tensor([-0.8238, -0.0399, 0.3503, -0.7532]) tensor([0.1334, 0.2921, 0.4314, 0.1431]) -Greedy action tensor([-0.4975, -1.5770, 0.1372, 0.2805]) tensor([0.1851, 0.0629, 0.3491, 0.4029]) -Greedy action tensor([ 1.2390, -1.6896, 0.4710, 0.6593]) tensor([0.4813, 0.0257, 0.2233, 0.2696]) -Greedy action tensor([ 0.6141, -0.2209, -0.3376, -0.0787]) tensor([0.4310, 0.1870, 0.1664, 0.2156]) -Greedy action tensor([ 0.7208, -1.3814, -0.4212, 0.2629]) tensor([0.4822, 0.0589, 0.1539, 0.3050]) -Greedy action tensor([ 0.0990, -1.7697, 1.2568, -1.4526]) tensor([0.2198, 0.0339, 0.6997, 0.0466]) -Greedy action tensor([ 0.9287, -0.2848, -0.7640, -0.3590]) tensor([0.5691, 0.1691, 0.1047, 0.1570]) -Greedy action tensor([ 1.4361, -0.0755, -1.2331, 0.7731]) tensor([0.5540, 0.1222, 0.0384, 0.2855]) -Greedy action tensor([ 1.3508, -0.8928, 0.7148, 0.4540]) tensor([0.4894, 0.0519, 0.2591, 0.1996]) -Greedy action tensor([0.9171, 0.5225, 0.0041, 1.0206]) tensor([0.3140, 0.2116, 0.1260, 0.3483]) -Greedy action tensor([-0.4616, -2.4714, -0.2100, 0.3307]) tensor([0.2160, 0.0290, 0.2779, 0.4771]) -Greedy action tensor([ 0.6962, -0.4611, 0.5108, -0.4983]) tensor([0.4085, 0.1284, 0.3394, 0.1237]) -Greedy action tensor([ 0.1125, -0.1123, -0.5802, 0.1788]) tensor([0.2970, 0.2372, 0.1485, 0.3173]) -Greedy action tensor([ 1.2805, -0.5327, 0.0942, 0.6681]) tensor([0.4974, 0.0811, 0.1519, 0.2696]) -Greedy action tensor([ 0.9815, -0.0995, -0.0090, 0.3123]) tensor([0.4499, 0.1526, 0.1671, 0.2304]) -Greedy action tensor([0.9521, 0.0626, 0.0459, 0.5382]) tensor([0.4039, 0.1659, 0.1632, 0.2670]) -Greedy action tensor([ 1.5703, -0.5603, -0.2798, 0.3886]) tensor([0.6318, 0.0750, 0.0993, 0.1938]) -Greedy action tensor([ 1.0641, -0.0792, -0.1653, 0.3144]) tensor([0.4799, 0.1530, 0.1404, 0.2268]) -Greedy action tensor([ 1.3075, -0.1778, -1.1326, 0.4598]) tensor([0.5741, 0.1300, 0.0500, 0.2459]) -Greedy action tensor([ 0.8323, -0.6718, -0.3965, 0.2743]) tensor([0.4791, 0.1065, 0.1402, 0.2742]) -Greedy action tensor([ 0.5222, -0.5393, -0.4343, -0.0372]) tensor([0.4345, 0.1503, 0.1669, 0.2483]) -Greedy action tensor([ 0.9905, -0.1327, -0.1330, 0.2695]) tensor([0.4680, 0.1522, 0.1522, 0.2276]) -Greedy action tensor([ 1.2102, -0.3757, -0.1832, -0.0444]) tensor([0.5753, 0.1178, 0.1428, 0.1641]) -Greedy action tensor([ 0.2706, -0.2346, -0.2827, 0.2572]) tensor([0.3159, 0.1906, 0.1817, 0.3117]) -Greedy action tensor([ 1.3421, 0.2790, -0.0559, 0.1528]) tensor([0.5272, 0.1821, 0.1303, 0.1605]) -Greedy action tensor([ 1.3445, -0.2992, -0.0432, 0.1276]) tensor([0.5750, 0.1111, 0.1436, 0.1703]) -Greedy action tensor([ 0.9727, -0.4612, 0.2333, 0.0480]) tensor([0.4734, 0.1128, 0.2260, 0.1878]) -Greedy action tensor([ 0.5702, -0.5030, -0.5935, 0.5538]) tensor([0.3791, 0.1296, 0.1184, 0.3729]) -Greedy action tensor([ 0.7028, -0.2278, -0.2265, 0.3024]) tensor([0.4066, 0.1603, 0.1606, 0.2725]) -Greedy action tensor([ 1.8330, -0.7891, -0.3425, 0.7344]) tensor([0.6581, 0.0478, 0.0747, 0.2194]) -Greedy action tensor([ 1.0133, -0.6758, 0.1424, 0.2648]) tensor([0.4816, 0.0889, 0.2016, 0.2278]) -Greedy action tensor([ 1.3868, -0.4854, -0.3315, 0.4370]) tensor([0.5814, 0.0894, 0.1043, 0.2249]) -Greedy action tensor([ 1.3990, -0.8869, -0.3041, 0.2926]) tensor([0.6194, 0.0630, 0.1128, 0.2048]) -Greedy action tensor([ 1.2872, -0.5697, -0.3925, 0.3067]) tensor([0.5822, 0.0909, 0.1085, 0.2184]) -Greedy action tensor([ 1.2824, -0.6272, -0.1066, 0.4532]) tensor([0.5453, 0.0808, 0.1360, 0.2380]) -Greedy action tensor([ 1.5179, -0.5411, -0.2976, 0.3809]) tensor([0.6207, 0.0792, 0.1010, 0.1991]) -Greedy action tensor([ 1.9811, -0.6639, -0.4493, 0.6888]) tensor([0.6975, 0.0495, 0.0614, 0.1916]) -Greedy action tensor([ 1.5669, -0.8390, -0.2492, 0.1680]) tensor([0.6668, 0.0601, 0.1085, 0.1646]) -Greedy action tensor([ 1.4058, 0.0190, -0.1992, 0.3754]) tensor([0.5532, 0.1382, 0.1111, 0.1974]) -Greedy action tensor([ 1.6010, 0.0603, -0.2571, 0.7405]) tensor([0.5577, 0.1195, 0.0870, 0.2359]) -Greedy action tensor([ 1.4652, -0.7185, -0.4291, -0.1511]) tensor([0.6841, 0.0771, 0.1029, 0.1359]) -Greedy action tensor([ 1.1221, -0.3845, -0.1777, 0.4110]) tensor([0.5037, 0.1116, 0.1373, 0.2474]) -Greedy action tensor([ 1.2714, -0.6418, -0.3003, -0.0795]) tensor([0.6195, 0.0914, 0.1287, 0.1604]) -Greedy action tensor([ 1.5071, -0.3246, -0.5511, 0.2151]) tensor([0.6400, 0.1025, 0.0817, 0.1758]) -Greedy action tensor([ 0.9251, -0.3012, -0.2963, -0.0276]) tensor([0.5066, 0.1486, 0.1494, 0.1954]) -Greedy action tensor([ 1.5650, -0.6325, -0.3244, 0.6415]) tensor([0.6026, 0.0669, 0.0911, 0.2393]) -Greedy action tensor([ 1.5359, -0.0895, -0.6426, 0.2565]) tensor([0.6296, 0.1239, 0.0713, 0.1752]) -Greedy action tensor([ 2.4538, -1.0442, -0.2843, 0.7629]) tensor([0.7817, 0.0237, 0.0506, 0.1441]) -Greedy action tensor([ 1.0552, -0.4961, -0.3291, 0.9943]) tensor([0.4161, 0.0882, 0.1042, 0.3915]) -Greedy action tensor([ 1.1255, -0.0297, -0.6879, 0.0887]) tensor([0.5456, 0.1719, 0.0890, 0.1935]) -Greedy action tensor([1.5584, 0.0250, 0.0647, 0.2903]) tensor([0.5808, 0.1253, 0.1304, 0.1634]) -Greedy action tensor([ 1.2970, -0.2721, -0.4653, 0.2437]) tensor([0.5785, 0.1205, 0.0993, 0.2018]) -Greedy action tensor([ 1.2840, -0.6328, -0.2867, 0.4274]) tensor([0.5619, 0.0826, 0.1168, 0.2386]) -Greedy action tensor([ 1.1019, -0.4101, -0.2823, 0.1212]) tensor([0.5417, 0.1194, 0.1357, 0.2032]) -Greedy action tensor([ 1.9303, 0.1125, 0.1102, -0.3982]) tensor([0.7033, 0.1142, 0.1139, 0.0685]) -Greedy action tensor([ 1.3543, -0.2795, -0.3899, 0.3298]) tensor([0.5784, 0.1129, 0.1011, 0.2076]) -Greedy action tensor([ 1.5065, -0.4702, -0.3654, 0.2431]) tensor([0.6349, 0.0879, 0.0977, 0.1795]) -Greedy action tensor([ 1.5070, -0.4857, -0.0884, 0.4050]) tensor([0.5983, 0.0816, 0.1214, 0.1988]) -Greedy action tensor([ 0.5387, 0.0052, -0.4559, 0.4933]) tensor([0.3434, 0.2014, 0.1270, 0.3282]) -Greedy action tensor([ 1.2598, -0.7931, -0.0469, 0.4546]) tensor([0.5417, 0.0695, 0.1466, 0.2421]) -Greedy action tensor([ 1.7786, -0.2263, -0.6853, 0.2729]) tensor([0.6937, 0.0934, 0.0590, 0.1539]) -Greedy action tensor([ 2.1680, 0.1174, -0.0811, 0.1735]) tensor([0.7298, 0.0939, 0.0770, 0.0993]) -Greedy action tensor([ 1.6280, -0.9326, -0.4404, 0.1333]) tensor([0.7003, 0.0541, 0.0885, 0.1571]) -Greedy action tensor([ 0.7186, -0.5163, -0.1530, 0.2324]) tensor([0.4303, 0.1252, 0.1800, 0.2646]) -Greedy action tensor([ 2.0254, -0.8129, -0.3021, 0.5321]) tensor([0.7243, 0.0424, 0.0706, 0.1627]) -Greedy action tensor([ 0.3892, -0.4216, -0.1002, -0.0182]) tensor([0.3673, 0.1633, 0.2251, 0.2444]) -Greedy action tensor([ 1.2053, -0.1747, -0.1350, 0.1647]) tensor([0.5357, 0.1348, 0.1402, 0.1892]) -Greedy action tensor([ 1.0509, -0.3598, -0.3806, 0.3436]) tensor([0.5061, 0.1235, 0.1209, 0.2495]) -Greedy action tensor([ 1.7040, -0.5282, -0.1900, 0.3143]) tensor([0.6636, 0.0712, 0.0999, 0.1653]) -Greedy action tensor([ 1.2373, -0.7127, -0.4890, 0.3625]) tensor([0.5756, 0.0819, 0.1024, 0.2400]) -Greedy action tensor([ 1.2288, -0.1485, -0.5168, 0.1631]) tensor([0.5646, 0.1424, 0.0985, 0.1945]) -Greedy action tensor([ 1.3805, -0.5745, -0.5362, 0.5713]) tensor([0.5768, 0.0816, 0.0848, 0.2568]) -Greedy action tensor([ 1.4446, -0.6519, -0.2059, 0.6204]) tensor([0.5703, 0.0701, 0.1095, 0.2501]) -Greedy action tensor([ 1.6694, -0.5416, -0.2490, 0.6423]) tensor([0.6194, 0.0679, 0.0910, 0.2218]) -Greedy action tensor([ 1.4737, -0.5864, -0.2773, 0.5397]) tensor([0.5903, 0.0752, 0.1025, 0.2320]) -Greedy action tensor([ 1.8450, -0.7089, -0.4375, 0.4569]) tensor([0.6996, 0.0544, 0.0714, 0.1746]) -Greedy action tensor([ 1.6567, -0.4244, -0.3928, 0.8189]) tensor([0.5930, 0.0740, 0.0764, 0.2566]) -Greedy action tensor([ 1.7571, -0.7303, -0.4128, 0.2934]) tensor([0.6999, 0.0582, 0.0799, 0.1620]) -Greedy action tensor([ 1.4136, -0.0932, -0.3834, 0.4657]) tensor([0.5634, 0.1249, 0.0934, 0.2183]) -Greedy action tensor([ 1.4885, -0.7123, -0.5182, 0.6253]) tensor([0.5999, 0.0664, 0.0806, 0.2530]) -Greedy action tensor([ 1.0060, 0.1372, -0.3707, -0.0695]) tensor([0.4968, 0.2084, 0.1254, 0.1695]) -Greedy action tensor([ 1.2320, -0.0367, -0.2333, 0.3167]) tensor([0.5228, 0.1470, 0.1208, 0.2094]) -Greedy action tensor([ 1.3767, -0.8037, -0.2427, 0.3719]) tensor([0.5963, 0.0674, 0.1181, 0.2183]) -Greedy action tensor([ 1.6828, -0.9502, 0.0568, 0.2541]) tensor([0.6631, 0.0476, 0.1304, 0.1589]) -Greedy action tensor([ 1.7090, -0.2205, 0.1632, 0.4116]) tensor([0.6129, 0.0890, 0.1306, 0.1675]) -Greedy action tensor([ 0.9928, -0.2393, -0.5388, 0.3377]) tensor([0.4933, 0.1439, 0.1066, 0.2562]) -Greedy action tensor([ 1.0497, -0.5565, -0.1327, 0.1347]) tensor([0.5242, 0.1052, 0.1607, 0.2099]) -Greedy action tensor([ 0.9758, -0.0419, 0.0135, -0.2898]) tensor([0.4937, 0.1784, 0.1886, 0.1393]) -Greedy action tensor([ 0.8466, -0.2573, -0.2661, 0.1077]) tensor([0.4678, 0.1551, 0.1537, 0.2234]) -Greedy action tensor([ 0.9625, -0.2969, -0.2302, 0.1246]) tensor([0.4951, 0.1405, 0.1502, 0.2142]) -Greedy action tensor([ 1.5519, -0.6796, -0.4879, 0.7416]) tensor([0.5945, 0.0638, 0.0773, 0.2644]) -Greedy action tensor([ 1.0811, -0.1447, -0.3102, 0.2600]) tensor([0.5045, 0.1481, 0.1255, 0.2220]) -Greedy action tensor([ 1.2947, -0.3541, -0.3481, 0.2585]) tensor([0.5746, 0.1105, 0.1111, 0.2038]) -Greedy action tensor([ 0.7563, -0.3362, -0.3408, -0.0974]) tensor([0.4773, 0.1601, 0.1593, 0.2033]) -Greedy action tensor([ 1.6716, -0.6805, -0.1011, 0.1194]) tensor([0.6771, 0.0644, 0.1150, 0.1434]) -Greedy action tensor([ 1.5542, -0.5641, -0.9471, 0.5356]) tensor([0.6397, 0.0769, 0.0524, 0.2310]) -Greedy action tensor([-1.8634, -0.4391, 0.6355, -0.1315]) tensor([0.0435, 0.1808, 0.5297, 0.2460]) -Greedy action tensor([-1.9223, -0.4439, 0.6532, -0.1708]) tensor([0.0412, 0.1806, 0.5409, 0.2373]) -Greedy action tensor([-1.7160, -0.2971, 0.5255, -0.0191]) tensor([0.0500, 0.2067, 0.4704, 0.2729]) -Greedy action tensor([-1.8233, -0.3766, 0.6518, -0.1059]) tensor([0.0440, 0.1872, 0.5234, 0.2454]) -Greedy action tensor([-1.9193, -0.4421, 0.6514, -0.1708]) tensor([0.0413, 0.1810, 0.5403, 0.2374]) -Greedy action tensor([-1.9140, -0.4010, 0.6480, -0.1553]) tensor([0.0411, 0.1868, 0.5333, 0.2388]) -Greedy action tensor([-1.6455, -0.5076, 0.5266, 0.0443]) tensor([0.0546, 0.1704, 0.4792, 0.2958]) -Greedy action tensor([-1.6921, -0.6048, 0.9856, 0.1468]) tensor([0.0403, 0.1196, 0.5866, 0.2535]) -Greedy action tensor([-1.8693, -0.3818, 0.6596, -0.1069]) tensor([0.0420, 0.1860, 0.5271, 0.2449]) -Greedy action tensor([-1.9016, -0.1321, -0.2221, -0.4444]) tensor([0.0605, 0.3551, 0.3245, 0.2598]) -Greedy action tensor([-1.6930, -0.5026, 0.5421, 0.0067]) tensor([0.0523, 0.1721, 0.4892, 0.2864]) -Greedy action tensor([-1.8483, -0.4144, 0.6109, -0.1339]) tensor([0.0446, 0.1869, 0.5211, 0.2474]) -Greedy action tensor([-1.8796, -0.1435, 0.5860, -0.1380]) tensor([0.0414, 0.2350, 0.4874, 0.2363]) -Greedy action tensor([-1.5340, -0.4268, 0.5259, 0.2158]) tensor([0.0567, 0.1717, 0.4451, 0.3264]) -Greedy action tensor([-1.7119, 0.2230, 0.5792, -0.3186]) tensor([0.0458, 0.3170, 0.4527, 0.1845]) -Greedy action tensor([-1.7797, -0.4810, 0.5851, -0.0634]) tensor([0.0479, 0.1756, 0.5099, 0.2666]) -Greedy action tensor([-1.8864, -0.3849, 0.6526, -0.0981]) tensor([0.0414, 0.1860, 0.5249, 0.2477]) -Greedy action tensor([-1.8098, -0.4999, 0.5878, -0.1114]) tensor([0.0472, 0.1751, 0.5195, 0.2582]) -Greedy action tensor([-1.6606, -0.3949, 0.5085, -0.0193]) tensor([0.0542, 0.1921, 0.4741, 0.2797]) -Greedy action tensor([-1.1004, -0.5942, 0.2511, 0.2284]) tensor([0.0971, 0.1611, 0.3751, 0.3667]) -Greedy action tensor([-1.3339, 0.4151, 0.3185, -0.0457]) tensor([0.0641, 0.3686, 0.3347, 0.2325]) -Greedy action tensor([-1.3227, -0.4888, 0.5176, -0.1855]) tensor([0.0786, 0.1810, 0.4952, 0.2452]) -Greedy action tensor([-1.8449, -0.3247, 0.6164, -0.0876]) tensor([0.0433, 0.1980, 0.5076, 0.2511]) -Greedy action tensor([-1.9144, -0.4361, 0.6533, -0.1629]) tensor([0.0413, 0.1813, 0.5390, 0.2383]) -Greedy action tensor([-1.6491, -0.2106, 0.4694, -0.0475]) tensor([0.0541, 0.2279, 0.4498, 0.2682]) -Greedy action tensor([-1.4801, -0.5457, 0.4124, 0.1087]) tensor([0.0663, 0.1688, 0.4401, 0.3248]) -Greedy action tensor([-1.2586, -0.6121, 0.2776, 0.2337]) tensor([0.0833, 0.1590, 0.3871, 0.3705]) -Greedy action tensor([-1.2634, -0.4570, 0.2835, -0.1296]) tensor([0.0905, 0.2028, 0.4253, 0.2814]) -Greedy action tensor([-1.7438, -0.2733, 0.6610, -0.0349]) tensor([0.0456, 0.1982, 0.5046, 0.2516]) -Greedy action tensor([-1.1553, 0.2567, 0.2779, 0.5195]) tensor([0.0683, 0.2805, 0.2865, 0.3648]) -Greedy action tensor([-1.7366, -0.5458, 0.6317, -0.0054]) tensor([0.0485, 0.1596, 0.5180, 0.2739]) -Greedy action tensor([-1.8540, -0.3099, 0.6314, -0.0914]) tensor([0.0425, 0.1992, 0.5105, 0.2478]) -Greedy action tensor([-1.3668, -0.1074, 0.2757, 0.2127]) tensor([0.0688, 0.2423, 0.3553, 0.3337]) -Greedy action tensor([-1.8877, -0.3435, 0.6190, -0.1525]) tensor([0.0423, 0.1983, 0.5193, 0.2401]) -Greedy action tensor([-0.6432, 0.4635, 0.5876, 1.1572]) tensor([0.0741, 0.2240, 0.2536, 0.4483]) -Greedy action tensor([-1.8586, -0.3810, 0.6321, -0.1053]) tensor([0.0431, 0.1887, 0.5197, 0.2486]) -Greedy action tensor([-1.7838, -0.4403, 0.6451, -0.0196]) tensor([0.0454, 0.1741, 0.5154, 0.2651]) -Greedy action tensor([-1.7988, -0.5404, 0.6622, -0.0900]) tensor([0.0460, 0.1618, 0.5385, 0.2538]) -Greedy action tensor([-0.4142, -0.3594, 0.1814, 0.1250]) tensor([0.1790, 0.1891, 0.3248, 0.3070]) -Greedy action tensor([-0.7809, -0.4062, 0.6024, 1.0680]) tensor([0.0782, 0.1137, 0.3117, 0.4965]) -Greedy action tensor([-1.9296, -0.4188, 0.6568, -0.1699]) tensor([0.0406, 0.1840, 0.5394, 0.2360]) -Greedy action tensor([-0.8656, -0.5357, 0.2571, 0.4614]) tensor([0.1083, 0.1506, 0.3328, 0.4082]) -Greedy action tensor([-1.8921, -0.4102, 0.6357, -0.1563]) tensor([0.0424, 0.1865, 0.5307, 0.2404]) -Greedy action tensor([-1.9207, -0.4072, 0.6482, -0.1595]) tensor([0.0410, 0.1861, 0.5346, 0.2384]) -Greedy action tensor([-1.9391, -0.4535, 0.6638, -0.1781]) tensor([0.0404, 0.1786, 0.5458, 0.2352]) -Greedy action tensor([-1.8080, -0.3992, 0.6503, -0.0442]) tensor([0.0442, 0.1809, 0.5168, 0.2580]) -Greedy action tensor([-1.1698, 0.4421, 0.3337, -0.1914]) tensor([0.0759, 0.3806, 0.3415, 0.2020]) -Greedy action tensor([-0.7905, -0.4843, 0.4900, 0.8542]) tensor([0.0898, 0.1220, 0.3231, 0.4651]) -Greedy action tensor([-1.8173, -0.4730, 0.6108, -0.1504]) tensor([0.0466, 0.1786, 0.5281, 0.2467]) -Greedy action tensor([-1.3170, -0.5796, 0.3306, 0.1791]) tensor([0.0784, 0.1640, 0.4074, 0.3502]) -Greedy action tensor([-0.4600, -0.0028, 0.1161, -0.0269]) tensor([0.1695, 0.2677, 0.3015, 0.2613]) -Greedy action tensor([-1.7571, -0.4854, 0.5884, -0.0357]) tensor([0.0485, 0.1732, 0.5068, 0.2715]) -Greedy action tensor([-1.9199, -0.4514, 0.6570, -0.1691]) tensor([0.0412, 0.1790, 0.5424, 0.2374]) -Greedy action tensor([-1.1828, 0.3142, 0.4206, 0.5529]) tensor([0.0621, 0.2773, 0.3085, 0.3521]) -Greedy action tensor([-1.3120, -0.3397, 0.3523, 0.1549]) tensor([0.0754, 0.1994, 0.3983, 0.3269]) -Greedy action tensor([-0.1173, 0.2546, 0.9439, 1.6242]) tensor([0.0905, 0.1313, 0.2616, 0.5165]) -Greedy action tensor([-1.9200, -0.4279, 0.6627, -0.1553]) tensor([0.0408, 0.1813, 0.5397, 0.2382]) -Greedy action tensor([-0.9861, -0.6024, 0.7257, 1.2263]) tensor([0.0583, 0.0856, 0.3231, 0.5330]) -Greedy action tensor([-1.8385, -0.4365, 0.6646, -0.0460]) tensor([0.0429, 0.1745, 0.5248, 0.2578]) -Greedy action tensor([-1.4598, -0.5758, 0.4986, 0.3157]) tensor([0.0609, 0.1475, 0.4319, 0.3597]) -Greedy action tensor([-1.8032, -0.5399, 0.7785, -0.0070]) tensor([0.0420, 0.1487, 0.5558, 0.2534]) -Greedy action tensor([-1.8744, -0.4214, 0.6273, -0.1565]) tensor([0.0434, 0.1855, 0.5294, 0.2418]) -Greedy action tensor([1.0783, 1.3340, 0.1315, 0.8286]) tensor([0.2892, 0.3734, 0.1122, 0.2253]) -Greedy action tensor([-1.8528, -0.4156, 0.6225, -0.1265]) tensor([0.0440, 0.1853, 0.5233, 0.2474]) -Greedy action tensor([-1.6517, -0.3189, 0.5139, -0.0801]) tensor([0.0546, 0.2069, 0.4758, 0.2627]) -Greedy action tensor([-1.9180, -0.4474, 0.6552, -0.1602]) tensor([0.0412, 0.1794, 0.5403, 0.2391]) -Greedy action tensor([-1.9278, -0.4472, 0.6558, -0.1703]) tensor([0.0409, 0.1799, 0.5420, 0.2373]) -Greedy action tensor([-1.6625, -0.2558, 0.5721, 0.0318]) tensor([0.0503, 0.2055, 0.4702, 0.2740]) -Greedy action tensor([-1.9353, -0.4355, 0.6629, -0.1751]) tensor([0.0404, 0.1812, 0.5434, 0.2350]) -Greedy action tensor([-1.2637, -0.5011, 0.3202, 0.4208]) tensor([0.0746, 0.1599, 0.3635, 0.4020]) -Greedy action tensor([-1.5260, -0.3890, 0.4514, -0.0229]) tensor([0.0631, 0.1968, 0.4561, 0.2839]) -Greedy action tensor([-1.8251, -0.3391, 0.5937, -0.1252]) tensor([0.0452, 0.1997, 0.5077, 0.2474]) -Greedy action tensor([-1.9087, -0.4660, 0.7020, -0.1124]) tensor([0.0402, 0.1702, 0.5472, 0.2424]) -Greedy action tensor([-1.6312, -0.4802, 0.5333, 0.1462]) tensor([0.0532, 0.1683, 0.4636, 0.3148]) -Greedy action tensor([-1.7319, -0.4509, 0.5520, -0.0681]) tensor([0.0508, 0.1828, 0.4984, 0.2681]) -Greedy action tensor([-0.8244, 0.5810, 0.1642, -0.0061]) tensor([0.0997, 0.4064, 0.2679, 0.2260]) -Greedy action tensor([-1.4522, -0.4934, 0.4820, -0.1018]) tensor([0.0695, 0.1813, 0.4809, 0.2682]) -Greedy action tensor([-1.9017, -0.4425, 0.6499, -0.1462]) tensor([0.0418, 0.1799, 0.5364, 0.2419]) -Greedy action tensor([-1.8295, -0.6364, 1.5995, 0.8853]) tensor([0.0199, 0.0656, 0.6139, 0.3006]) -Greedy action tensor([-1.2204, 0.5699, 0.2674, 0.4157]) tensor([0.0604, 0.3619, 0.2674, 0.3102]) -Greedy action tensor([-1.8909, -0.4334, 0.6504, -0.1293]) tensor([0.0420, 0.1804, 0.5332, 0.2445]) -Greedy action tensor([ 0.3073, -0.0028, -0.1404, -0.0620]) tensor([0.3264, 0.2394, 0.2086, 0.2256]) -Greedy action tensor([ 0.5669, -0.5831, 0.2366, -0.5199]) tensor([0.4215, 0.1335, 0.3029, 0.1422]) -Greedy action tensor([ 0.5226, -0.1377, 0.0429, -0.1375]) tensor([0.3770, 0.1948, 0.2334, 0.1948]) -Greedy action tensor([ 0.2952, 0.0267, -0.1155, -0.2470]) tensor([0.3323, 0.2541, 0.2204, 0.1932]) -Greedy action tensor([ 0.6578, -0.6007, -0.1556, -0.3717]) tensor([0.4797, 0.1363, 0.2127, 0.1713]) -Greedy action tensor([ 0.4238, -0.3397, -0.2155, -0.3380]) tensor([0.4064, 0.1894, 0.2145, 0.1897]) -Greedy action tensor([ 0.5889, -0.4182, -0.2576, -0.5950]) tensor([0.4761, 0.1739, 0.2042, 0.1457]) -Greedy action tensor([ 0.6658, -0.3019, -0.0286, -0.1841]) tensor([0.4335, 0.1647, 0.2165, 0.1853]) -Greedy action tensor([ 0.3924, -0.0146, 0.0163, 0.0127]) tensor([0.3294, 0.2192, 0.2261, 0.2253]) -Greedy action tensor([ 0.6734, -0.5728, -0.0543, -0.1646]) tensor([0.4539, 0.1305, 0.2192, 0.1963]) -Greedy action tensor([ 0.8643, -0.8573, -0.0404, -0.2364]) tensor([0.5219, 0.0933, 0.2112, 0.1736]) -Greedy action tensor([ 0.7875, -0.2091, 0.0637, -0.3982]) tensor([0.4631, 0.1709, 0.2245, 0.1415]) -Greedy action tensor([ 0.6865, -0.2524, 0.0156, -0.2377]) tensor([0.4350, 0.1701, 0.2224, 0.1726]) -Greedy action tensor([ 0.2874, 0.3079, -0.0423, 0.0732]) tensor([0.2819, 0.2878, 0.2027, 0.2276]) -Greedy action tensor([ 0.9886, -0.5067, -0.1247, -0.0897]) tensor([0.5283, 0.1184, 0.1735, 0.1797]) -Greedy action tensor([ 0.8694, -0.4922, 0.0933, -0.2673]) tensor([0.4908, 0.1258, 0.2259, 0.1575]) -Greedy action tensor([ 0.5522, 0.0156, 0.0974, -0.2062]) tensor([0.3721, 0.2176, 0.2361, 0.1743]) -Greedy action tensor([ 0.7199, -0.6242, 0.3019, -0.6491]) tensor([0.4601, 0.1200, 0.3029, 0.1170]) -Greedy action tensor([ 0.5768, -0.2478, -0.0640, -0.2713]) tensor([0.4178, 0.1832, 0.2201, 0.1789]) -Greedy action tensor([ 0.4320, -0.1672, 0.1199, -0.4684]) tensor([0.3721, 0.2044, 0.2723, 0.1512]) -Greedy action tensor([ 0.6420, -0.2734, 0.0205, -0.3433]) tensor([0.4328, 0.1733, 0.2324, 0.1616]) -Greedy action tensor([ 0.7842, -0.6089, -0.0462, -0.1872]) tensor([0.4848, 0.1204, 0.2113, 0.1835]) -Greedy action tensor([ 0.5336, -0.1744, 0.0285, -0.1071]) tensor([0.3812, 0.1878, 0.2301, 0.2009]) -Greedy action tensor([ 0.8579, -0.2418, -0.0103, -0.0365]) tensor([0.4626, 0.1540, 0.1942, 0.1891]) -Greedy action tensor([ 0.3013, -0.1897, -0.0785, -0.2203]) tensor([0.3461, 0.2118, 0.2367, 0.2054]) -Greedy action tensor([ 0.2815, -0.2452, -0.1395, -0.2772]) tensor([0.3548, 0.2095, 0.2329, 0.2029]) -Greedy action tensor([ 0.3397, 0.0107, -0.1408, -0.1458]) tensor([0.3386, 0.2437, 0.2094, 0.2084]) -Greedy action tensor([ 0.3704, -0.0099, 0.0602, -0.4999]) tensor([0.3526, 0.2411, 0.2586, 0.1477]) -Greedy action tensor([ 0.5265, -0.3672, -0.0587, -0.1853]) tensor([0.4070, 0.1665, 0.2267, 0.1997]) -Greedy action tensor([ 0.6725, -0.7453, -0.0848, -0.2988]) tensor([0.4785, 0.1159, 0.2244, 0.1812]) -Greedy action tensor([ 0.9638, -0.5688, -0.0712, -0.4958]) tensor([0.5545, 0.1198, 0.1970, 0.1288]) -Greedy action tensor([ 0.7665, -0.6480, 0.0656, -0.2816]) tensor([0.4785, 0.1163, 0.2374, 0.1678]) -Greedy action tensor([ 0.4857, -0.3222, 0.0258, -0.3967]) tensor([0.4015, 0.1790, 0.2534, 0.1661]) -Greedy action tensor([ 0.3612, 0.0966, -0.0505, 0.1326]) tensor([0.3100, 0.2379, 0.2054, 0.2467]) -Greedy action tensor([ 0.4717, 0.2116, -0.3532, -0.0225]) tensor([0.3547, 0.2735, 0.1555, 0.2164]) -Greedy action tensor([ 1.0594, -0.3051, -0.3362, -0.4190]) tensor([0.5776, 0.1476, 0.1431, 0.1317]) -Greedy action tensor([ 1.1170, -0.9150, 0.0575, -0.4658]) tensor([0.5941, 0.0779, 0.2060, 0.1220]) -Greedy action tensor([ 0.7031, -0.3051, -0.0718, -0.2408]) tensor([0.4515, 0.1648, 0.2080, 0.1757]) -Greedy action tensor([ 0.6445, -0.2557, -0.0992, 0.0320]) tensor([0.4126, 0.1677, 0.1961, 0.2236]) -Greedy action tensor([ 0.2816, 0.0993, -0.1998, -0.1153]) tensor([0.3201, 0.2668, 0.1978, 0.2153]) -Greedy action tensor([ 1.4697, -0.9562, -0.0115, -1.0211]) tensor([0.7150, 0.0632, 0.1626, 0.0592]) -Greedy action tensor([ 0.6367, -0.6185, -0.1394, -0.6714]) tensor([0.4961, 0.1414, 0.2283, 0.1341]) -Greedy action tensor([ 1.1821, -0.9012, 0.2452, -0.6819]) tensor([0.5983, 0.0745, 0.2344, 0.0928]) -Greedy action tensor([ 0.7492, -0.5993, 0.2968, -0.7730]) tensor([0.4731, 0.1228, 0.3009, 0.1032]) -Greedy action tensor([ 0.6735, -0.5900, 0.1353, -0.5412]) tensor([0.4623, 0.1307, 0.2699, 0.1372]) -Greedy action tensor([ 0.4229, -0.4479, -0.0708, -0.2043]) tensor([0.3901, 0.1633, 0.2381, 0.2084]) -Greedy action tensor([ 0.2172, 0.5699, -0.1794, -0.1175]) tensor([0.2624, 0.3734, 0.1765, 0.1878]) -Greedy action tensor([ 0.9203, -0.4150, -0.1092, -0.3160]) tensor([0.5234, 0.1377, 0.1869, 0.1520]) -Greedy action tensor([ 0.7956, 0.0027, 0.0062, -0.1802]) tensor([0.4379, 0.1982, 0.1989, 0.1651]) -Greedy action tensor([ 0.5191, 0.1968, -0.1021, 0.1964]) tensor([0.3349, 0.2426, 0.1799, 0.2425]) -Greedy action tensor([ 0.6919, -0.5295, -0.0603, -0.4661]) tensor([0.4807, 0.1417, 0.2266, 0.1510]) -Greedy action tensor([ 0.4905, -0.2444, 0.0179, -0.3241]) tensor([0.3928, 0.1884, 0.2449, 0.1739]) -Greedy action tensor([ 0.0492, -0.2251, 0.0487, -0.4444]) tensor([0.2967, 0.2255, 0.2966, 0.1811]) -Greedy action tensor([ 0.5242, 0.0616, -0.0162, -0.3753]) tensor([0.3818, 0.2404, 0.2224, 0.1553]) -Greedy action tensor([ 0.5594, -0.1559, -0.1486, -0.1202]) tensor([0.4019, 0.1965, 0.1980, 0.2037]) -Greedy action tensor([ 0.5878, -0.1585, 0.0297, -0.2933]) tensor([0.4064, 0.1927, 0.2326, 0.1684]) -Greedy action tensor([ 0.4444, -0.2135, -0.0195, -0.2606]) tensor([0.3787, 0.1961, 0.2381, 0.1871]) -Greedy action tensor([ 0.9634, -0.3157, 0.0571, -0.1778]) tensor([0.4996, 0.1390, 0.2018, 0.1596]) -Greedy action tensor([ 0.7299, 0.1210, -0.1091, -0.0444]) tensor([0.4103, 0.2232, 0.1773, 0.1892]) -Greedy action tensor([ 0.4429, -0.3939, -0.0088, -0.2660]) tensor([0.3903, 0.1691, 0.2485, 0.1921]) -Greedy action tensor([ 0.5368, -0.3360, -0.0305, -0.1644]) tensor([0.4031, 0.1684, 0.2286, 0.1999]) -Greedy action tensor([ 0.6947, -0.2973, -0.3904, -0.5292]) tensor([0.4993, 0.1852, 0.1687, 0.1468]) -Greedy action tensor([ 0.3949, -0.0045, -0.0174, -0.0872]) tensor([0.3389, 0.2273, 0.2244, 0.2093]) -Greedy action tensor([ 0.7283, -0.5427, -0.2363, -0.3762]) tensor([0.5017, 0.1408, 0.1912, 0.1663]) -Greedy action tensor([ 0.6070, -0.6713, -0.1279, -0.3803]) tensor([0.4694, 0.1307, 0.2251, 0.1749]) -Greedy action tensor([ 0.3849, -0.0825, -0.1368, -0.1300]) tensor([0.3549, 0.2224, 0.2106, 0.2121]) -Greedy action tensor([ 0.4570, 0.2514, -0.1572, 0.1140]) tensor([0.3263, 0.2656, 0.1765, 0.2315]) -Greedy action tensor([ 0.8123, -0.3076, -0.1281, -0.2436]) tensor([0.4843, 0.1580, 0.1891, 0.1685]) -Greedy action tensor([ 1.1369, -0.5270, 0.0059, -0.4434]) tensor([0.5821, 0.1102, 0.1878, 0.1199]) -Greedy action tensor([ 1.1842, -0.6755, 0.0343, -0.4999]) tensor([0.6031, 0.0939, 0.1910, 0.1120]) -Greedy action tensor([ 0.5768, -0.4099, -0.0907, -0.2070]) tensor([0.4269, 0.1591, 0.2190, 0.1950]) -Greedy action tensor([ 1.2802, -1.3054, 0.1496, -0.4898]) tensor([0.6376, 0.0480, 0.2058, 0.1086]) -Greedy action tensor([ 0.6470, -0.3590, 0.0678, -0.5306]) tensor([0.4476, 0.1637, 0.2508, 0.1379]) -Greedy action tensor([ 0.5060, -0.5004, -0.2330, -0.1631]) tensor([0.4246, 0.1552, 0.2028, 0.2174]) -Greedy action tensor([ 0.9531, -0.5261, -0.0109, -0.6297]) tensor([0.5511, 0.1255, 0.2102, 0.1132]) -Greedy action tensor([ 1.1131, -0.8731, -0.0337, -0.8930]) tensor([0.6292, 0.0863, 0.1999, 0.0846]) -Greedy action tensor([ 0.4697, -0.3321, 0.0214, -0.0488]) tensor([0.3728, 0.1672, 0.2381, 0.2219]) -Greedy action tensor([-0.0172, -0.3181, -0.1595, -0.2032]) tensor([0.2909, 0.2153, 0.2523, 0.2415]) -Greedy action tensor([ 0.9884, -0.8202, 0.0173, -0.6104]) tensor([0.5732, 0.0939, 0.2170, 0.1159]) -Greedy action tensor([ 0.7565, -0.4344, -0.0417, -0.4022]) tensor([0.4835, 0.1470, 0.2177, 0.1518]) -Greedy action tensor([ 0.9124, -0.9145, 0.0964, -0.8599]) tensor([0.5640, 0.0908, 0.2494, 0.0958]) -Greedy action tensor([ 0.8647, -0.3746, 0.2944, -0.0180]) tensor([0.4408, 0.1276, 0.2492, 0.1823]) -Greedy action tensor([ 1.3194, -0.6608, 0.0701, 0.1107]) tensor([0.5803, 0.0801, 0.1664, 0.1733]) -Greedy action tensor([ 1.5275, -0.3864, -0.2200, 0.5681]) tensor([0.5866, 0.0865, 0.1022, 0.2247]) -Greedy action tensor([ 1.6432, -0.3568, -0.4679, 0.1727]) tensor([0.6728, 0.0911, 0.0815, 0.1546]) -Greedy action tensor([ 1.2494, -0.3857, -0.3603, 0.4789]) tensor([0.5383, 0.1049, 0.1076, 0.2491]) -Greedy action tensor([ 1.3101, -0.5031, 0.1902, 0.2203]) tensor([0.5477, 0.0894, 0.1787, 0.1842]) -Greedy action tensor([ 1.4605, -0.3373, -0.4994, 0.4205]) tensor([0.6024, 0.0998, 0.0849, 0.2129]) -Greedy action tensor([ 1.1828, 0.1980, -0.3906, 0.1159]) tensor([0.5195, 0.1940, 0.1077, 0.1787]) -Greedy action tensor([ 1.4548, -0.3979, -0.2375, 0.4388]) tensor([0.5872, 0.0921, 0.1081, 0.2126]) -Greedy action tensor([ 1.6171, -0.2626, -0.9396, 0.5284]) tensor([0.6382, 0.0974, 0.0495, 0.2149]) -Greedy action tensor([ 1.1099, -0.3415, -0.3535, 0.5129]) tensor([0.4960, 0.1162, 0.1148, 0.2730]) -Greedy action tensor([ 1.2964, -0.3034, -0.6466, 0.1496]) tensor([0.6014, 0.1214, 0.0862, 0.1910]) -Greedy action tensor([ 1.9169, -0.9079, -0.6839, 1.0999]) tensor([0.6348, 0.0377, 0.0471, 0.2804]) -Greedy action tensor([ 1.8596, -0.3060, -0.3378, 0.2537]) tensor([0.7010, 0.0804, 0.0779, 0.1407]) -Greedy action tensor([ 1.4157, -0.3211, -0.1269, 0.3070]) tensor([0.5814, 0.1024, 0.1243, 0.1919]) -Greedy action tensor([ 1.5051, -0.2955, -0.4915, 0.6801]) tensor([0.5750, 0.0950, 0.0781, 0.2520]) -Greedy action tensor([ 1.5532, -0.4785, -0.3999, 0.3437]) tensor([0.6364, 0.0834, 0.0903, 0.1899]) -Greedy action tensor([ 2.6468, -1.2361, -0.0601, 0.8874]) tensor([0.7940, 0.0163, 0.0530, 0.1367]) -Greedy action tensor([ 1.4547, 0.1237, -0.7252, 0.5122]) tensor([0.5660, 0.1495, 0.0640, 0.2205]) -Greedy action tensor([ 1.2488, 0.0383, -0.6495, 0.5209]) tensor([0.5179, 0.1544, 0.0776, 0.2501]) -Greedy action tensor([ 1.5051, -0.5371, -0.3271, 0.2115]) tensor([0.6393, 0.0830, 0.1023, 0.1754]) -Greedy action tensor([ 1.8192, -0.6269, -0.4757, 0.1190]) tensor([0.7299, 0.0632, 0.0736, 0.1333]) -Greedy action tensor([ 0.6619, -0.3266, -0.0633, 0.0120]) tensor([0.4204, 0.1565, 0.2036, 0.2195]) -Greedy action tensor([ 1.7482, 0.4582, -0.1712, 0.3387]) tensor([0.6002, 0.1652, 0.0880, 0.1466]) -Greedy action tensor([ 1.2574, -0.6426, -0.1057, 0.2747]) tensor([0.5619, 0.0840, 0.1438, 0.2103]) -Greedy action tensor([ 1.0115, -0.1071, -0.3081, 0.5013]) tensor([0.4557, 0.1489, 0.1218, 0.2736]) -Greedy action tensor([ 1.0057, -0.4505, -0.2087, 0.2378]) tensor([0.5015, 0.1169, 0.1489, 0.2327]) -Greedy action tensor([ 1.3747, -0.5404, -0.2565, 0.1996]) tensor([0.6054, 0.0892, 0.1185, 0.1869]) -Greedy action tensor([ 1.2404, -0.1756, -0.5940, 0.4625]) tensor([0.5371, 0.1304, 0.0858, 0.2467]) -Greedy action tensor([ 1.2331, -0.0879, -0.0112, 0.2917]) tensor([0.5141, 0.1372, 0.1481, 0.2005]) -Greedy action tensor([ 0.8829, -0.4536, -0.5318, 0.4110]) tensor([0.4696, 0.1234, 0.1141, 0.2929]) -Greedy action tensor([ 1.3689, -0.1533, -1.0191, 0.4636]) tensor([0.5833, 0.1273, 0.0536, 0.2359]) -Greedy action tensor([ 1.0664, -0.4188, -0.4105, 0.2425]) tensor([0.5281, 0.1196, 0.1206, 0.2317]) -Greedy action tensor([ 1.2877, -0.2756, -0.3347, 0.2011]) tensor([0.5733, 0.1201, 0.1132, 0.1934]) -Greedy action tensor([ 1.6157, -0.4485, -0.0943, 0.8728]) tensor([0.5607, 0.0712, 0.1014, 0.2667]) -Greedy action tensor([ 0.7266, -0.2652, -0.3069, 0.1659]) tensor([0.4353, 0.1614, 0.1549, 0.2485]) -Greedy action tensor([ 0.3252, -0.2661, -0.1196, -0.0055]) tensor([0.3433, 0.1901, 0.2200, 0.2466]) -Greedy action tensor([ 0.9889, 0.2467, -0.3025, -0.3712]) tensor([0.4981, 0.2371, 0.1369, 0.1278]) -Greedy action tensor([ 1.1798, -0.8719, -0.1154, 0.0467]) tensor([0.5799, 0.0745, 0.1588, 0.1868]) -Greedy action tensor([ 1.4078, -0.5784, -0.4988, 0.5517]) tensor([0.5846, 0.0802, 0.0869, 0.2483]) -Greedy action tensor([ 1.7966, -0.3800, -0.6917, 0.6957]) tensor([0.6540, 0.0742, 0.0543, 0.2175]) -Greedy action tensor([ 1.1562, -0.5253, 0.1825, 0.1703]) tensor([0.5163, 0.0961, 0.1950, 0.1926]) -Greedy action tensor([ 1.5095, -0.3826, -0.4655, 0.1360]) tensor([0.6482, 0.0977, 0.0899, 0.1641]) -Greedy action tensor([ 0.4194, -0.3212, -0.1222, 0.1326]) tensor([0.3560, 0.1697, 0.2071, 0.2672]) -Greedy action tensor([ 1.9024, 0.1646, -0.5200, 0.5112]) tensor([0.6608, 0.1162, 0.0586, 0.1644]) -Greedy action tensor([ 0.8551, -0.2112, -0.1145, 0.4233]) tensor([0.4214, 0.1451, 0.1598, 0.2737]) -Greedy action tensor([ 1.2276, -0.3237, -0.2717, 0.1783]) tensor([0.5601, 0.1187, 0.1251, 0.1961]) -Greedy action tensor([ 1.4213, -0.9129, -0.1915, 0.4569]) tensor([0.5962, 0.0578, 0.1188, 0.2273]) -Greedy action tensor([ 1.8109, -0.5215, -0.3048, 0.6061]) tensor([0.6590, 0.0640, 0.0794, 0.1976]) -Greedy action tensor([ 1.2248e+00, 1.0691e-01, -3.3988e-01, -4.3713e-04]) tensor([0.5465, 0.1787, 0.1143, 0.1605]) -Greedy action tensor([ 1.7116, -0.5416, -0.3772, 0.8064]) tensor([0.6122, 0.0643, 0.0758, 0.2476]) -Greedy action tensor([ 1.1938, -0.9152, -0.3315, 0.6588]) tensor([0.5196, 0.0631, 0.1130, 0.3043]) -Greedy action tensor([ 2.0366, -0.1276, -0.4441, 0.6938]) tensor([0.6851, 0.0787, 0.0573, 0.1789]) -Greedy action tensor([ 1.3619, -0.3277, -0.4584, 0.0024]) tensor([0.6237, 0.1151, 0.1010, 0.1602]) -Greedy action tensor([ 1.3724, -0.8974, -0.4017, 0.8854]) tensor([0.5298, 0.0547, 0.0899, 0.3255]) -Greedy action tensor([ 0.9476, -0.4534, -0.2031, -0.0664]) tensor([0.5193, 0.1279, 0.1643, 0.1884]) -Greedy action tensor([ 1.5048, -0.5750, -0.4482, 0.4493]) tensor([0.6193, 0.0774, 0.0878, 0.2155]) -Greedy action tensor([ 1.3035, -0.4426, -0.4992, 0.3047]) tensor([0.5856, 0.1022, 0.0965, 0.2157]) -Greedy action tensor([ 1.0431, 0.0324, -0.1827, 0.4792]) tensor([0.4491, 0.1635, 0.1318, 0.2555]) -Greedy action tensor([ 1.3699, -0.6432, -0.2243, 0.3557]) tensor([0.5885, 0.0786, 0.1195, 0.2134]) -Greedy action tensor([ 1.5869, -0.8414, -0.2424, 0.0444]) tensor([0.6837, 0.0603, 0.1098, 0.1462]) -Greedy action tensor([ 1.7023, -0.9147, -0.1746, 0.4974]) tensor([0.6554, 0.0479, 0.1003, 0.1964]) -Greedy action tensor([ 1.2789, -0.4351, -0.2754, 0.4214]) tensor([0.5508, 0.0992, 0.1164, 0.2336]) -Greedy action tensor([ 1.8440, -1.0673, -0.2360, 0.7745]) tensor([0.6568, 0.0357, 0.0821, 0.2254]) -Greedy action tensor([ 2.3932, -1.5404, -0.6139, 0.9544]) tensor([0.7656, 0.0150, 0.0378, 0.1816]) -Greedy action tensor([ 1.4072, -0.2278, -0.4111, 0.0696]) tensor([0.6174, 0.1204, 0.1002, 0.1620]) -Greedy action tensor([ 2.1319, -0.8015, -0.2193, 0.3810]) tensor([0.7564, 0.0403, 0.0720, 0.1313]) -Greedy action tensor([ 2.0479, -0.8738, -0.1762, 0.4366]) tensor([0.7344, 0.0395, 0.0794, 0.1466]) -Greedy action tensor([ 1.7560, -0.4436, -0.3839, 0.6516]) tensor([0.6410, 0.0711, 0.0754, 0.2125]) -Greedy action tensor([ 1.4801, -0.6468, -0.0383, 0.4480]) tensor([0.5901, 0.0703, 0.1293, 0.2102]) -Greedy action tensor([ 1.4440, -0.0540, -0.6099, 0.3128]) tensor([0.5972, 0.1335, 0.0766, 0.1927]) -Greedy action tensor([ 1.9958, -0.8177, -0.2751, 0.7168]) tensor([0.6937, 0.0416, 0.0716, 0.1931]) -Greedy action tensor([ 1.9637, -1.2506, -0.1548, 0.4485]) tensor([0.7246, 0.0291, 0.0871, 0.1592]) -Greedy action tensor([ 1.9982, -0.0473, -0.8240, -0.0334]) tensor([0.7576, 0.0980, 0.0451, 0.0993]) -Greedy action tensor([ 1.1714, -0.4336, -0.3772, 0.1090]) tensor([0.5685, 0.1142, 0.1208, 0.1965]) -Greedy action tensor([ 1.0209, -0.1600, -0.3332, 0.4900]) tensor([0.4644, 0.1426, 0.1199, 0.2731]) -Greedy action tensor([ 1.5435, -0.5448, -0.2010, 0.5471]) tensor([0.5996, 0.0743, 0.1048, 0.2214]) -Greedy action tensor([ 1.1051, -0.5684, -0.5579, 0.5185]) tensor([0.5172, 0.0970, 0.0980, 0.2877]) -Greedy action tensor([ 1.4122, -0.3566, -0.2817, 0.2408]) tensor([0.6009, 0.1025, 0.1104, 0.1862]) -Greedy action tensor([ 1.7284, -0.9078, -0.2852, 0.5610]) tensor([0.6595, 0.0472, 0.0880, 0.2052]) -Greedy action tensor([ 1.6694, -0.3589, -0.5277, 0.3310]) tensor([0.6645, 0.0874, 0.0738, 0.1743]) -Greedy action tensor([-0.2271, -0.8491, 0.8891, 0.1524]) tensor([0.1652, 0.0887, 0.5045, 0.2415]) -Greedy action tensor([-0.0441, -1.2976, -0.1163, 1.1438]) tensor([0.1819, 0.0519, 0.1693, 0.5968]) -Greedy action tensor([ 0.5120, 0.7217, -0.6757, -0.1636]) tensor([0.3282, 0.4048, 0.1001, 0.1670]) -Greedy action tensor([ 0.1162, -1.1788, -0.4502, -0.2661]) tensor([0.3962, 0.1085, 0.2249, 0.2704]) -Greedy action tensor([ 0.3628, -1.5136, 0.4680, 0.7949]) tensor([0.2628, 0.0403, 0.2920, 0.4049]) -Greedy action tensor([-0.0508, -1.0645, -1.0776, -0.7070]) tensor([0.4465, 0.1620, 0.1599, 0.2316]) -Greedy action tensor([ 0.3974, -2.1223, -0.1972, 1.2389]) tensor([0.2530, 0.0204, 0.1396, 0.5870]) -Greedy action tensor([-0.3885, 0.9782, -0.5820, 1.3850]) tensor([0.0859, 0.3370, 0.0708, 0.5062]) -Greedy action tensor([0.1120, 0.5110, 0.7112, 0.2102]) tensor([0.1847, 0.2753, 0.3363, 0.2038]) -Greedy action tensor([ 0.5929, -1.0172, 1.8523, -0.7543]) tensor([0.2007, 0.0401, 0.7070, 0.0522]) -Greedy action tensor([-0.0939, -0.5948, -0.1224, 0.3722]) tensor([0.2397, 0.1453, 0.2330, 0.3821]) -Greedy action tensor([ 0.8912, 0.9128, 0.7820, -1.1603]) tensor([0.3282, 0.3354, 0.2942, 0.0422]) -Greedy action tensor([ 0.2609, -0.9583, 1.8227, 0.3345]) tensor([0.1401, 0.0414, 0.6678, 0.1508]) -Greedy action tensor([ 0.3647, -0.7540, 0.3581, -0.0629]) tensor([0.3364, 0.1099, 0.3342, 0.2194]) -Greedy action tensor([-1.0016, -0.1878, 0.2433, -0.9739]) tensor([0.1289, 0.2909, 0.4477, 0.1325]) -Greedy action tensor([ 1.5049, -0.7678, 0.6087, 1.3855]) tensor([0.4169, 0.0430, 0.1701, 0.3700]) -Greedy action tensor([-0.0656, -0.6194, 0.5652, 0.1169]) tensor([0.2149, 0.1235, 0.4038, 0.2579]) -Greedy action tensor([ 0.8533, 0.5109, 0.8782, -0.7305]) tensor([0.3401, 0.2415, 0.3487, 0.0698]) -Greedy action tensor([ 1.4435, 0.5314, 1.0933, -0.0939]) tensor([0.4308, 0.1731, 0.3035, 0.0926]) -Greedy action tensor([ 0.7792, -0.2496, 0.1317, -0.6050]) tensor([0.4692, 0.1677, 0.2456, 0.1175]) -Greedy action tensor([ 1.4552, -0.2513, 1.3211, 1.1491]) tensor([0.3581, 0.0650, 0.3132, 0.2637]) -Greedy action tensor([-0.4609, -0.3464, -0.2536, -0.8356]) tensor([0.2476, 0.2776, 0.3046, 0.1702]) -Greedy action tensor([ 0.6249, -1.2011, 0.5777, 0.6131]) tensor([0.3223, 0.0519, 0.3074, 0.3185]) -Greedy action tensor([0.9813, 0.6361, 0.2111, 0.7482]) tensor([0.3375, 0.2390, 0.1562, 0.2673]) -Greedy action tensor([0.9422, 0.3468, 0.9034, 0.1062]) tensor([0.3393, 0.1871, 0.3265, 0.1471]) -Greedy action tensor([ 0.2527, -0.2836, -0.1861, 0.4665]) tensor([0.2883, 0.1687, 0.1859, 0.3571]) -Greedy action tensor([ 0.8180, 0.4810, -0.2288, 0.9596]) tensor([0.3108, 0.2219, 0.1091, 0.3581]) -Greedy action tensor([ 0.0722, -0.0317, 0.9280, -0.7534]) tensor([0.2131, 0.1921, 0.5015, 0.0933]) -Greedy action tensor([ 0.2290, -0.2169, -0.5836, 1.3643]) tensor([0.1925, 0.1232, 0.0854, 0.5989]) -Greedy action tensor([-0.2480, 1.6422, 0.2779, -0.3898]) tensor([0.0982, 0.6503, 0.1662, 0.0852]) -Greedy action tensor([-0.9837, -0.5667, -0.1058, -0.5831]) tensor([0.1559, 0.2365, 0.3750, 0.2327]) -Greedy action tensor([ 1.0647, -0.5056, 1.1558, -0.9369]) tensor([0.4101, 0.0853, 0.4492, 0.0554]) -Greedy action tensor([ 1.0097, -0.1759, 0.3653, 0.8716]) tensor([0.3702, 0.1131, 0.1943, 0.3224]) -Greedy action tensor([-0.9275, -1.3051, 0.5612, -0.0022]) tensor([0.1158, 0.0793, 0.5129, 0.2920]) -Greedy action tensor([-0.1125, -0.4320, 0.1846, -0.5129]) tensor([0.2672, 0.1941, 0.3596, 0.1790]) -Greedy action tensor([-0.2987, -1.4692, -0.7717, 1.0585]) tensor([0.1719, 0.0533, 0.1071, 0.6677]) -Greedy action tensor([ 1.0032, -1.8816, -0.1022, 0.9560]) tensor([0.4272, 0.0239, 0.1414, 0.4075]) -Greedy action tensor([-1.8302, -0.1281, 0.3667, -0.7866]) tensor([0.0546, 0.2994, 0.4911, 0.1550]) -Greedy action tensor([ 1.2643, -0.0871, 0.9624, 1.0227]) tensor([0.3592, 0.0930, 0.2656, 0.2821]) -Greedy action tensor([-0.3837, -1.0543, -0.0720, -0.7165]) tensor([0.2782, 0.1423, 0.3800, 0.1995]) -Greedy action tensor([ 1.1472, -1.1494, 0.7743, 0.9433]) tensor([0.3839, 0.0386, 0.2644, 0.3131]) -Greedy action tensor([-0.4166, -0.9629, 0.3655, -0.5637]) tensor([0.2161, 0.1251, 0.4723, 0.1865]) -Greedy action tensor([ 0.3810, -2.4282, -0.4914, 0.1304]) tensor([0.4432, 0.0267, 0.1852, 0.3449]) -Greedy action tensor([ 0.9759, -0.9604, 0.2293, 1.0360]) tensor([0.3731, 0.0538, 0.1768, 0.3962]) -Greedy action tensor([0.4921, 0.3400, 1.2371, 0.5310]) tensor([0.1998, 0.1716, 0.4209, 0.2077]) -Greedy action tensor([-1.3194, 0.1221, -0.7579, -0.8717]) tensor([0.1170, 0.4947, 0.2052, 0.1831]) -Greedy action tensor([-0.6445, -0.7657, 0.0221, -0.4513]) tensor([0.1982, 0.1755, 0.3859, 0.2404]) -Greedy action tensor([ 0.8949, 0.8750, -0.2722, 0.3061]) tensor([0.3513, 0.3444, 0.1094, 0.1950]) -Greedy action tensor([-0.4994, -0.4714, 0.0524, -0.0160]) tensor([0.1857, 0.1909, 0.3224, 0.3011]) -Greedy action tensor([-0.5705, -0.3637, 0.3316, -0.3842]) tensor([0.1695, 0.2085, 0.4178, 0.2042]) -Greedy action tensor([0.1532, 0.1475, 0.3178, 0.8374]) tensor([0.1940, 0.1929, 0.2287, 0.3845]) -Greedy action tensor([ 0.4086, -0.4133, 0.1045, -0.1159]) tensor([0.3611, 0.1587, 0.2664, 0.2137]) -Greedy action tensor([0.2616, 0.2689, 0.0045, 1.3929]) tensor([0.1701, 0.1713, 0.1315, 0.5271]) -Greedy action tensor([ 0.6349, 0.3971, -0.5957, 0.3630]) tensor([0.3518, 0.2774, 0.1028, 0.2681]) -Greedy action tensor([ 0.1989, -0.4253, 1.3317, -0.7143]) tensor([0.1984, 0.1063, 0.6158, 0.0796]) -Greedy action tensor([ 1.3102, -0.0282, 1.3213, -0.2959]) tensor([0.4042, 0.1060, 0.4087, 0.0811]) -Greedy action tensor([-0.5897, -0.6334, 0.2942, 0.2562]) tensor([0.1491, 0.1427, 0.3608, 0.3474]) -Greedy action tensor([ 0.0473, -0.9873, -0.1165, -0.4319]) tensor([0.3542, 0.1259, 0.3007, 0.2193]) -Greedy action tensor([ 1.1739, -0.1547, -0.6333, 0.9372]) tensor([0.4508, 0.1194, 0.0740, 0.3558]) -Greedy action tensor([-0.5208, 0.0723, 1.4614, -0.8270]) tensor([0.0925, 0.1675, 0.6718, 0.0681]) -Greedy action tensor([ 1.3534, -0.9429, 1.4640, 1.1398]) tensor([0.3305, 0.0333, 0.3692, 0.2670]) -Greedy action tensor([ 0.6213, -0.6908, -0.7090, 0.7582]) tensor([0.3731, 0.1005, 0.0986, 0.4278]) -Greedy action tensor([ 0.1636, -1.2468, 0.8685, 0.5571]) tensor([0.2105, 0.0514, 0.4260, 0.3121]) -Greedy action tensor([-0.5739, 0.5017, 0.1884, -0.9985]) tensor([0.1486, 0.4357, 0.3185, 0.0972]) -Greedy action tensor([-0.1494, -0.1297, 1.1159, -0.2140]) tensor([0.1538, 0.1569, 0.5451, 0.1442]) -Greedy action tensor([ 0.9411, 0.5041, 0.3090, -0.4346]) tensor([0.4115, 0.2658, 0.2187, 0.1040]) -Greedy action tensor([ 0.2929, 0.5892, 1.7188, -0.3553]) tensor([0.1423, 0.1913, 0.5920, 0.0744]) -Greedy action tensor([-0.1827, -2.0482, -0.0087, 0.6823]) tensor([0.2119, 0.0328, 0.2521, 0.5032]) -Greedy action tensor([-0.2875, -0.9106, 0.1073, 0.0183]) tensor([0.2284, 0.1225, 0.3390, 0.3101]) -Greedy action tensor([-0.1506, -1.2220, 0.1840, 0.2278]) tensor([0.2381, 0.0816, 0.3327, 0.3476]) -Greedy action tensor([-0.0682, -0.0723, 0.8857, -1.0896]) tensor([0.2019, 0.2011, 0.5242, 0.0727]) -Greedy action tensor([ 1.4069, -0.6153, 0.7282, 0.9600]) tensor([0.4387, 0.0581, 0.2226, 0.2806]) -Greedy action tensor([ 1.1023, 0.3115, -0.4761, 0.8015]) tensor([0.4167, 0.1889, 0.0860, 0.3084]) -Greedy action tensor([-0.7578, -1.4046, 0.5649, 0.8108]) tensor([0.0992, 0.0520, 0.3725, 0.4763]) -Greedy action tensor([-1.0275, -0.6770, 0.3487, -0.3705]) tensor([0.1204, 0.1709, 0.4766, 0.2322]) -Greedy action tensor([ 0.1761, 0.4825, 1.1018, -0.3579]) tensor([0.1829, 0.2484, 0.4615, 0.1072]) -Greedy action tensor([ 0.7341, -0.1718, 0.3491, -0.1241]) tensor([0.3986, 0.1611, 0.2713, 0.1690]) -Greedy action tensor([ 0.3402, 0.2547, 0.1765, -0.4380]) tensor([0.3100, 0.2845, 0.2632, 0.1423]) -Greedy action tensor([ 1.5109, -0.4977, 0.8444, 0.1675]) tensor([0.5239, 0.0703, 0.2690, 0.1367]) -Greedy action tensor([ 1.8588, -0.6737, 1.0788, 1.6179]) tensor([0.4303, 0.0342, 0.1973, 0.3382]) -Greedy action tensor([ 1.3048, -0.0546, 0.9922, 0.4225]) tensor([0.4163, 0.1069, 0.3045, 0.1723]) -Greedy action tensor([ 0.9841, -1.6060, -0.0928, 0.7532]) tensor([0.4526, 0.0340, 0.1542, 0.3593]) -Greedy action tensor([-1.9460, -0.4505, 0.6682, -0.1814]) tensor([0.0401, 0.1788, 0.5472, 0.2340]) -Greedy action tensor([-1.3168, -0.5485, 0.3771, 0.0454]) tensor([0.0800, 0.1725, 0.4352, 0.3123]) -Greedy action tensor([-1.6217, -0.4819, 0.6281, 0.3243]) tensor([0.0485, 0.1517, 0.4602, 0.3396]) -Greedy action tensor([-1.3772, -0.5614, 0.3573, 0.1751]) tensor([0.0733, 0.1656, 0.4151, 0.3460]) -Greedy action tensor([-1.4878, -0.5054, 0.4365, 0.0265]) tensor([0.0664, 0.1773, 0.4546, 0.3017]) -Greedy action tensor([-1.2257, -0.6421, 0.2961, 0.3416]) tensor([0.0822, 0.1473, 0.3765, 0.3940]) -Greedy action tensor([-1.7018, -0.5450, 0.5427, -0.0494]) tensor([0.0531, 0.1688, 0.5010, 0.2771]) -Greedy action tensor([-0.4080, -0.4281, 0.4124, 0.5804]) tensor([0.1441, 0.1413, 0.3274, 0.3873]) -Greedy action tensor([-1.4614, -0.5052, 0.6373, 0.7152]) tensor([0.0486, 0.1265, 0.3964, 0.4285]) -Greedy action tensor([-0.8162, -0.3616, 0.2764, -0.1092]) tensor([0.1318, 0.2077, 0.3931, 0.2673]) -Greedy action tensor([-1.2641, 0.0047, 0.3326, -0.0399]) tensor([0.0775, 0.2758, 0.3829, 0.2638]) -Greedy action tensor([-1.8183, -0.4399, 0.6015, -0.1394]) tensor([0.0464, 0.1840, 0.5212, 0.2485]) -Greedy action tensor([-0.6458, -0.4847, 1.1445, 1.6134]) tensor([0.0564, 0.0662, 0.3377, 0.5397]) -Greedy action tensor([-1.8326, -0.3803, 0.6075, -0.0862]) tensor([0.0445, 0.1901, 0.5104, 0.2551]) -Greedy action tensor([-1.7773, -0.4727, 0.5898, -0.0893]) tensor([0.0482, 0.1776, 0.5138, 0.2605]) -Greedy action tensor([-1.4275, -1.3193, 0.4870, 0.8816]) tensor([0.0527, 0.0588, 0.3577, 0.5308]) -Greedy action tensor([-1.7884, -0.4810, 0.5870, -0.0559]) tensor([0.0474, 0.1751, 0.5096, 0.2679]) -Greedy action tensor([-1.8092, 0.0507, 0.5428, -0.1130]) tensor([0.0428, 0.2747, 0.4493, 0.2332]) -Greedy action tensor([-1.7703, -0.4552, 0.5818, -0.1357]) tensor([0.0491, 0.1830, 0.5161, 0.2518]) -Greedy action tensor([-1.8676, -0.4597, 0.6274, -0.1371]) tensor([0.0438, 0.1789, 0.5304, 0.2470]) -Greedy action tensor([-1.4957, -0.3117, 0.5177, 0.1857]) tensor([0.0584, 0.1907, 0.4372, 0.3137]) -Greedy action tensor([ 1.1775, -0.7559, 0.9965, 1.8223]) tensor([0.2574, 0.0372, 0.2148, 0.4905]) -Greedy action tensor([-1.0329, -0.4550, 0.3623, 0.5993]) tensor([0.0838, 0.1493, 0.3382, 0.4287]) -Greedy action tensor([-1.5828e+00, -1.0920e-01, 4.2317e-01, -6.8253e-04]) tensor([0.0566, 0.2471, 0.4208, 0.2754]) -Greedy action tensor([-1.5860, 0.0385, 0.4699, -0.1268]) tensor([0.0550, 0.2790, 0.4295, 0.2365]) -Greedy action tensor([-1.9117, -0.4539, 0.6472, -0.1596]) tensor([0.0417, 0.1791, 0.5387, 0.2404]) -Greedy action tensor([-1.4068, -0.0400, 0.3768, 0.2799]) tensor([0.0614, 0.2410, 0.3657, 0.3319]) -Greedy action tensor([-1.8155, -0.2572, 0.5801, -0.0973]) tensor([0.0448, 0.2130, 0.4921, 0.2500]) -Greedy action tensor([-1.8799, -0.4307, 0.6332, -0.1438]) tensor([0.0430, 0.1830, 0.5303, 0.2438]) -Greedy action tensor([-1.7525, 0.2445, 0.4646, -0.0782]) tensor([0.0437, 0.3219, 0.4012, 0.2332]) -Greedy action tensor([-1.7838, -0.3282, 0.5796, -0.1394]) tensor([0.0474, 0.2033, 0.5038, 0.2455]) -Greedy action tensor([-1.9336, -0.4512, 0.6621, -0.1748]) tensor([0.0406, 0.1789, 0.5446, 0.2358]) -Greedy action tensor([-1.8741, -0.4454, 0.6293, -0.1424]) tensor([0.0434, 0.1811, 0.5304, 0.2452]) -Greedy action tensor([-1.8390, -0.4728, 0.6119, -0.0919]) tensor([0.0449, 0.1762, 0.5211, 0.2578]) -Greedy action tensor([-1.9081, -0.3472, 0.6386, -0.1500]) tensor([0.0411, 0.1958, 0.5247, 0.2385]) -Greedy action tensor([-1.6904, -0.5063, 0.5209, -0.0193]) tensor([0.0534, 0.1746, 0.4878, 0.2842]) -Greedy action tensor([-1.7571, -0.3533, 0.5578, -0.0681]) tensor([0.0485, 0.1975, 0.4913, 0.2627]) -Greedy action tensor([-1.7404, -0.4996, 0.5528, -0.0380]) tensor([0.0504, 0.1742, 0.4990, 0.2764]) -Greedy action tensor([-1.9020, -0.4464, 0.6439, -0.1524]) tensor([0.0420, 0.1802, 0.5360, 0.2417]) -Greedy action tensor([-1.8509, -0.4883, 0.6152, -0.1308]) tensor([0.0449, 0.1754, 0.5288, 0.2508]) -Greedy action tensor([-1.2587, -0.5977, 0.9209, 1.1512]) tensor([0.0436, 0.0845, 0.3859, 0.4859]) -Greedy action tensor([-0.4709, -0.5302, 0.1903, 0.1625]) tensor([0.1735, 0.1635, 0.3361, 0.3269]) -Greedy action tensor([-0.1851, 0.7017, 0.3413, 0.8527]) tensor([0.1259, 0.3056, 0.2131, 0.3554]) -Greedy action tensor([-1.4593, -0.1363, 0.6883, 0.1791]) tensor([0.0542, 0.2033, 0.4638, 0.2787]) -Greedy action tensor([-1.8163, -0.4206, 0.5927, -0.1009]) tensor([0.0460, 0.1859, 0.5121, 0.2559]) -Greedy action tensor([-1.9656, -0.6456, 0.9122, -0.0400]) tensor([0.0340, 0.1274, 0.6050, 0.2335]) -Greedy action tensor([-1.9102, -0.4395, 0.6718, -0.1538]) tensor([0.0410, 0.1786, 0.5427, 0.2377]) -Greedy action tensor([-0.8092, 0.6155, 0.1726, -0.0973]) tensor([0.1014, 0.4214, 0.2706, 0.2066]) -Greedy action tensor([-1.8860, -0.4602, 0.6358, -0.1502]) tensor([0.0429, 0.1787, 0.5347, 0.2436]) -Greedy action tensor([-1.9366, -0.4580, 0.6700, -0.1672]) tensor([0.0403, 0.1768, 0.5463, 0.2365]) -Greedy action tensor([-1.6663, -0.6648, 0.1083, -0.3769]) tensor([0.0755, 0.2054, 0.4451, 0.2740]) -Greedy action tensor([-1.7868, -0.4851, 0.6069, -0.0024]) tensor([0.0463, 0.1703, 0.5075, 0.2759]) -Greedy action tensor([-1.6792, -0.2318, 0.5017, -0.0415]) tensor([0.0519, 0.2209, 0.4600, 0.2672]) -Greedy action tensor([-1.0206, 0.0271, 0.3619, -0.2075]) tensor([0.0991, 0.2825, 0.3949, 0.2234]) -Greedy action tensor([-1.1771, -0.4634, 0.4812, -0.2600]) tensor([0.0926, 0.1891, 0.4864, 0.2318]) -Greedy action tensor([-1.8623, -0.3471, 0.6236, -0.1289]) tensor([0.0431, 0.1959, 0.5173, 0.2437]) -Greedy action tensor([-1.8784, -0.3117, 0.6275, -0.1215]) tensor([0.0419, 0.2010, 0.5140, 0.2431]) -Greedy action tensor([-1.7617, -0.4186, 0.6194, 0.0228]) tensor([0.0463, 0.1773, 0.5007, 0.2757]) -Greedy action tensor([-1.8522, -0.2958, 0.6029, -0.1184]) tensor([0.0434, 0.2057, 0.5053, 0.2456]) -Greedy action tensor([-1.9195, -0.4167, 0.6505, -0.1684]) tensor([0.0411, 0.1848, 0.5372, 0.2369]) -Greedy action tensor([-1.9143, -0.4644, 0.6760, -0.1499]) tensor([0.0409, 0.1745, 0.5457, 0.2389]) -Greedy action tensor([-1.2566, -0.3483, 0.9511, 1.0039]) tensor([0.0451, 0.1119, 0.4104, 0.4326]) -Greedy action tensor([-1.8639, -0.3959, 0.6398, -0.1205]) tensor([0.0429, 0.1864, 0.5251, 0.2455]) -Greedy action tensor([-1.8083, -0.3280, 0.5985, -0.0890]) tensor([0.0453, 0.1991, 0.5028, 0.2528]) -Greedy action tensor([-1.8941, -0.3627, 0.6310, -0.1435]) tensor([0.0419, 0.1937, 0.5232, 0.2412]) -Greedy action tensor([-1.8616, -0.1564, 0.5810, -0.1114]) tensor([0.0421, 0.2316, 0.4841, 0.2422]) -Greedy action tensor([-1.8417, -0.4921, 0.7067, -0.0115]) tensor([0.0419, 0.1615, 0.5355, 0.2611]) -Greedy action tensor([-1.2227, -0.4584, 0.5499, 0.5832]) tensor([0.0661, 0.1420, 0.3893, 0.4025]) -Greedy action tensor([-1.6066, -0.5353, 0.4764, 0.0357]) tensor([0.0584, 0.1706, 0.4691, 0.3019]) -Greedy action tensor([-1.2224, -0.5296, 0.3481, 0.5892]) tensor([0.0718, 0.1435, 0.3453, 0.4394]) -Greedy action tensor([-0.8803, -0.8657, -0.5445, -0.2584]) tensor([0.1895, 0.1923, 0.2652, 0.3530]) -Greedy action tensor([-1.7662, -0.4446, 0.6071, -0.0019]) tensor([0.0469, 0.1759, 0.5034, 0.2738]) -Greedy action tensor([-1.4244, -0.3325, 0.5159, 0.4367]) tensor([0.0576, 0.1715, 0.4007, 0.3702]) -Greedy action tensor([-1.6951, -0.3710, 0.7624, 0.5024]) tensor([0.0393, 0.1478, 0.4590, 0.3539]) -Greedy action tensor([-1.8779, -0.4210, 0.6504, -0.1247]) tensor([0.0424, 0.1819, 0.5311, 0.2446]) -Greedy action tensor([-1.3693, -0.4085, 0.4179, 0.1237]) tensor([0.0712, 0.1862, 0.4255, 0.3171]) -Greedy action tensor([-1.9384, -0.4421, 0.6634, -0.1755]) tensor([0.0404, 0.1802, 0.5443, 0.2352]) -Greedy action tensor([-1.5990, -0.5519, 0.5540, -0.1249]) tensor([0.0594, 0.1693, 0.5117, 0.2595]) -Greedy action tensor([-1.8751, -0.4678, 0.6403, -0.1415]) tensor([0.0433, 0.1767, 0.5352, 0.2449]) -Greedy action tensor([-1.8194, -0.1720, 0.5644, -0.0922]) tensor([0.0441, 0.2291, 0.4786, 0.2482]) -Greedy action tensor([-1.8780, -0.4101, 0.6273, -0.1390]) tensor([0.0430, 0.1864, 0.5261, 0.2445]) -Greedy action tensor([ 1.4073, -1.2732, 0.0628, -0.8487]) tensor([0.6974, 0.0478, 0.1818, 0.0731]) -Greedy action tensor([ 0.6335, -0.0581, -0.0733, -0.1957]) tensor([0.4114, 0.2061, 0.2029, 0.1796]) -Greedy action tensor([ 0.4969, -0.3080, 0.0264, -0.3906]) tensor([0.4027, 0.1800, 0.2515, 0.1658]) -Greedy action tensor([ 1.0383, -0.8588, -0.0625, -0.4620]) tensor([0.5863, 0.0879, 0.1950, 0.1308]) -Greedy action tensor([ 0.6741, -0.3028, -0.0936, -0.3596]) tensor([0.4553, 0.1714, 0.2113, 0.1620]) -Greedy action tensor([ 0.6085, -0.4252, -0.0783, -0.1794]) tensor([0.4322, 0.1537, 0.2175, 0.1966]) -Greedy action tensor([ 0.3519, -0.1843, 0.0424, -0.1274]) tensor([0.3404, 0.1991, 0.2498, 0.2108]) -Greedy action tensor([ 0.5543, -0.3163, -0.0328, -0.1592]) tensor([0.4057, 0.1699, 0.2256, 0.1988]) -Greedy action tensor([ 0.6492, -0.3987, -0.1260, -0.3902]) tensor([0.4619, 0.1620, 0.2128, 0.1634]) -Greedy action tensor([ 0.9034, -0.4628, -0.1965, -0.2832]) tensor([0.5282, 0.1347, 0.1758, 0.1612]) -Greedy action tensor([ 0.8162, -0.5587, -0.0889, -0.1623]) tensor([0.4918, 0.1244, 0.1989, 0.1849]) -Greedy action tensor([ 0.3895, -0.2843, -0.0881, -0.3500]) tensor([0.3835, 0.1955, 0.2379, 0.1831]) -Greedy action tensor([ 0.3256, 0.0842, 0.1124, -0.5631]) tensor([0.3328, 0.2614, 0.2689, 0.1368]) -Greedy action tensor([ 0.5850, -0.4185, -0.0570, -0.4445]) tensor([0.4445, 0.1629, 0.2339, 0.1587]) -Greedy action tensor([ 0.6157, -0.2396, -0.0925, -0.3273]) tensor([0.4334, 0.1843, 0.2135, 0.1688]) -Greedy action tensor([ 0.5002, -0.4107, -0.0893, -0.4146]) tensor([0.4242, 0.1706, 0.2353, 0.1699]) -Greedy action tensor([ 0.8658, -0.3566, 0.1304, -0.2765]) tensor([0.4778, 0.1407, 0.2290, 0.1525]) -Greedy action tensor([ 0.5213, -0.4895, -0.1360, -0.2090]) tensor([0.4230, 0.1539, 0.2192, 0.2038]) -Greedy action tensor([ 8.0397e-01, -5.1725e-01, 7.6669e-04, -1.9358e-01]) tensor([0.4800, 0.1281, 0.2150, 0.1770]) -Greedy action tensor([ 0.5274, -0.4460, 0.1202, -0.3547]) tensor([0.4070, 0.1538, 0.2708, 0.1684]) -Greedy action tensor([ 0.7150, -0.3729, -0.0644, -0.4125]) tensor([0.4718, 0.1590, 0.2164, 0.1528]) -Greedy action tensor([ 1.1151, -0.5329, -0.1466, -0.2689]) tensor([0.5793, 0.1115, 0.1640, 0.1452]) -Greedy action tensor([ 0.8871, -0.6771, -0.0953, -0.7190]) tensor([0.5604, 0.1173, 0.2098, 0.1125]) -Greedy action tensor([ 1.0667, -0.8857, 0.1084, -0.5793]) tensor([0.5820, 0.0826, 0.2232, 0.1122]) -Greedy action tensor([ 0.3809, -0.2180, -0.0169, -0.2566]) tensor([0.3637, 0.1998, 0.2443, 0.1922]) -Greedy action tensor([ 0.7148, -0.3890, 0.0225, -0.3084]) tensor([0.4563, 0.1513, 0.2283, 0.1640]) -Greedy action tensor([0.3209, 0.0503, 0.0291, 0.0744]) tensor([0.3038, 0.2318, 0.2269, 0.2375]) -Greedy action tensor([ 0.7150, -0.7358, -0.1077, -0.2576]) tensor([0.4874, 0.1142, 0.2141, 0.1843]) -Greedy action tensor([ 0.6508, -0.2595, -0.0612, -0.1888]) tensor([0.4301, 0.1731, 0.2110, 0.1858]) -Greedy action tensor([ 0.8026, -0.3677, -0.0108, -0.3264]) tensor([0.4815, 0.1494, 0.2135, 0.1557]) -Greedy action tensor([ 0.5460, -0.1126, -0.1948, -0.0607]) tensor([0.3938, 0.2038, 0.1877, 0.2147]) -Greedy action tensor([ 0.9295, -0.6862, -0.0485, -0.3467]) tensor([0.5394, 0.1072, 0.2028, 0.1505]) -Greedy action tensor([ 0.6110, -0.3405, -0.0372, -0.2807]) tensor([0.4312, 0.1665, 0.2255, 0.1768]) -Greedy action tensor([ 0.5899, 0.1775, -0.0859, 0.1241]) tensor([0.3574, 0.2366, 0.1818, 0.2243]) -Greedy action tensor([ 0.7299, -0.8357, -0.1397, -0.4899]) tensor([0.5199, 0.1086, 0.2179, 0.1535]) -Greedy action tensor([ 0.5752, -0.2206, -0.0781, -0.0398]) tensor([0.3981, 0.1796, 0.2071, 0.2152]) -Greedy action tensor([ 0.7487, 0.0869, -0.2055, -0.2741]) tensor([0.4424, 0.2282, 0.1704, 0.1591]) -Greedy action tensor([ 0.1388, -0.0764, -0.0461, -0.1657]) tensor([0.2963, 0.2389, 0.2463, 0.2185]) -Greedy action tensor([ 1.1994, -0.7979, 0.0206, -0.6930]) tensor([0.6273, 0.0851, 0.1930, 0.0945]) -Greedy action tensor([ 0.1063, -0.0129, 0.2271, -0.1287]) tensor([0.2627, 0.2332, 0.2964, 0.2077]) -Greedy action tensor([ 0.6080, -0.1103, -0.0628, -0.5943]) tensor([0.4349, 0.2120, 0.2224, 0.1307]) -Greedy action tensor([ 0.7042, 0.0098, -0.0534, -0.4265]) tensor([0.4365, 0.2180, 0.2046, 0.1409]) -Greedy action tensor([ 0.5563, -0.1846, -0.0827, -0.0952]) tensor([0.3959, 0.1887, 0.2090, 0.2064]) -Greedy action tensor([ 0.4887, 0.1392, 0.0028, -0.0440]) tensor([0.3440, 0.2425, 0.2116, 0.2019]) -Greedy action tensor([ 0.5103, -0.6722, -0.2198, -0.2484]) tensor([0.4431, 0.1358, 0.2135, 0.2075]) -Greedy action tensor([ 0.4357, 0.0481, -0.0296, -0.1743]) tensor([0.3509, 0.2381, 0.2203, 0.1906]) -Greedy action tensor([ 0.6940, 0.0342, -0.0232, 0.0593]) tensor([0.3945, 0.2039, 0.1925, 0.2091]) -Greedy action tensor([ 0.9257, -0.5340, -0.1683, -0.2888]) tensor([0.5365, 0.1246, 0.1796, 0.1593]) -Greedy action tensor([ 0.8373, -0.4074, 0.0110, -0.2270]) tensor([0.4829, 0.1391, 0.2114, 0.1666]) -Greedy action tensor([ 1.0257, -0.3204, 0.0633, -0.6150]) tensor([0.5446, 0.1418, 0.2080, 0.1056]) -Greedy action tensor([ 0.3597, 0.0995, -0.1602, -0.2461]) tensor([0.3435, 0.2648, 0.2042, 0.1874]) -Greedy action tensor([ 0.7096, -0.5271, -0.0449, -0.3370]) tensor([0.4736, 0.1375, 0.2227, 0.1663]) -Greedy action tensor([ 0.5846, -0.0554, -0.5040, -0.4871]) tensor([0.4532, 0.2390, 0.1526, 0.1552]) -Greedy action tensor([ 0.5562, -0.0576, -0.1792, -0.1818]) tensor([0.4002, 0.2166, 0.1918, 0.1913]) -Greedy action tensor([ 0.4602, -0.1515, -0.0799, -0.1719]) tensor([0.3764, 0.2042, 0.2193, 0.2001]) -Greedy action tensor([ 0.2895, -0.2101, -0.1149, -0.2195]) tensor([0.3478, 0.2110, 0.2321, 0.2091]) -Greedy action tensor([ 0.8391, -0.9699, 0.2974, -0.7635]) tensor([0.5136, 0.0841, 0.2988, 0.1034]) -Greedy action tensor([ 0.6633, -0.4487, -0.0197, -0.2164]) tensor([0.4447, 0.1462, 0.2246, 0.1845]) -Greedy action tensor([ 0.4795, 0.2373, 0.0455, -0.2734]) tensor([0.3444, 0.2703, 0.2231, 0.1622]) -Greedy action tensor([ 0.6409, -0.1559, 0.0615, -0.1624]) tensor([0.4067, 0.1833, 0.2278, 0.1821]) -Greedy action tensor([ 0.4581, -0.3524, -0.1097, -0.3393]) tensor([0.4062, 0.1806, 0.2302, 0.1830]) -Greedy action tensor([ 0.9982, -0.6133, -0.1256, -0.5498]) tensor([0.5756, 0.1149, 0.1871, 0.1224]) -Greedy action tensor([0.1359, 0.7462, 0.0119, 0.2060]) tensor([0.2085, 0.3838, 0.1842, 0.2236]) -Greedy action tensor([ 0.7475, -0.3067, -0.1044, -0.1749]) tensor([0.4603, 0.1604, 0.1963, 0.1830]) -Greedy action tensor([ 1.0454, -0.6868, 0.0601, -0.9285]) tensor([0.5920, 0.1047, 0.2210, 0.0822]) -Greedy action tensor([ 0.4705, -0.0882, -0.0904, -0.1351]) tensor([0.3720, 0.2127, 0.2123, 0.2030]) -Greedy action tensor([ 0.8050, -0.2144, 0.0053, -0.2719]) tensor([0.4649, 0.1677, 0.2090, 0.1584]) -Greedy action tensor([ 0.8491, -0.9462, 0.0690, -0.3124]) tensor([0.5161, 0.0857, 0.2366, 0.1616]) -Greedy action tensor([ 0.4425, -0.3320, -0.0645, -0.1810]) tensor([0.3847, 0.1773, 0.2317, 0.2062]) -Greedy action tensor([ 0.1485, 0.2396, -0.0580, 0.0021]) tensor([0.2651, 0.2904, 0.2156, 0.2290]) -Greedy action tensor([ 0.9607, -0.2255, -0.1390, -0.6415]) tensor([0.5435, 0.1660, 0.1810, 0.1095]) -Greedy action tensor([ 1.3875, -0.7885, 0.4005, -0.9614]) tensor([0.6322, 0.0718, 0.2356, 0.0604]) -Greedy action tensor([ 0.4834, -0.4461, -0.1822, -0.2017]) tensor([0.4145, 0.1636, 0.2130, 0.2089]) -Greedy action tensor([ 0.7667, -0.5594, 0.0193, -0.4237]) tensor([0.4894, 0.1299, 0.2318, 0.1488]) -Greedy action tensor([ 0.8783, -0.7385, 0.0562, -0.6460]) tensor([0.5388, 0.1070, 0.2368, 0.1174]) -Greedy action tensor([ 0.2585, 0.0875, -0.0922, -0.4277]) tensor([0.3278, 0.2763, 0.2309, 0.1651]) -Greedy action tensor([ 0.4331, -0.0754, -0.1515, -0.0596]) tensor([0.3611, 0.2171, 0.2012, 0.2206]) -Greedy action tensor([ 0.5241, -0.1834, -0.1334, -0.3869]) tensor([0.4144, 0.2042, 0.2147, 0.1666]) -Greedy action tensor([ 0.6911, -0.3750, -0.1313, -0.2001]) tensor([0.4558, 0.1570, 0.2003, 0.1870]) -Greedy action tensor([ 0.8549, -0.5472, -0.0963, -0.6352]) tensor([0.5383, 0.1325, 0.2079, 0.1213]) -Greedy action tensor([ 0.8540, -0.5070, 0.0161, -0.3704]) tensor([0.5043, 0.1293, 0.2182, 0.1482]) -Greedy action tensor([ 2.4135, -0.3937, -0.5956, 0.6065]) tensor([0.7850, 0.0474, 0.0387, 0.1289]) -Greedy action tensor([ 1.3202, -0.0798, -0.4669, 0.3311]) tensor([0.5599, 0.1381, 0.0938, 0.2082]) -Greedy action tensor([ 2.0781, -0.7923, -0.0995, 0.5802]) tensor([0.7176, 0.0407, 0.0813, 0.1604]) -Greedy action tensor([ 1.5160, -0.6861, -0.4709, 0.0069]) tensor([0.6808, 0.0753, 0.0934, 0.1505]) -Greedy action tensor([ 1.0426, -0.4465, -0.4040, 0.1769]) tensor([0.5314, 0.1199, 0.1251, 0.2236]) -Greedy action tensor([ 1.9440, -0.8313, -0.4406, 0.4784]) tensor([0.7218, 0.0450, 0.0665, 0.1667]) -Greedy action tensor([ 3.0164, -1.3776, 0.0126, 1.0565]) tensor([0.8314, 0.0103, 0.0412, 0.1171]) -Greedy action tensor([ 1.5285, -0.9596, -0.0769, 0.5855]) tensor([0.5976, 0.0496, 0.1200, 0.2328]) -Greedy action tensor([ 1.2945, -0.4103, -0.6085, 0.0195]) tensor([0.6210, 0.1129, 0.0926, 0.1735]) -Greedy action tensor([ 0.7592, -0.0436, 0.0769, -0.0037]) tensor([0.4133, 0.1852, 0.2089, 0.1927]) -Greedy action tensor([ 1.5058, -0.2912, -0.6641, 0.3623]) tensor([0.6255, 0.1037, 0.0714, 0.1993]) -Greedy action tensor([ 1.6411, 0.0772, -0.1939, 0.0549]) tensor([0.6355, 0.1330, 0.1014, 0.1301]) -Greedy action tensor([ 1.3850, -0.2102, -0.4677, 0.5487]) tensor([0.5577, 0.1131, 0.0875, 0.2417]) -Greedy action tensor([ 2.1731, -0.9841, -0.4576, 0.0976]) tensor([0.8064, 0.0343, 0.0581, 0.1012]) -Greedy action tensor([ 1.7922, -0.0198, -0.2727, 0.1590]) tensor([0.6732, 0.1100, 0.0854, 0.1315]) -Greedy action tensor([ 1.5211, -0.4051, -0.3389, 0.1262]) tensor([0.6455, 0.0940, 0.1005, 0.1600]) -Greedy action tensor([ 1.8660, -0.6902, -0.1562, 0.3593]) tensor([0.6985, 0.0542, 0.0925, 0.1548]) -Greedy action tensor([ 0.8597, -0.5878, -0.0918, 0.1090]) tensor([0.4777, 0.1123, 0.1845, 0.2255]) -Greedy action tensor([ 1.4661, -0.1544, -0.3379, 0.2039]) tensor([0.6077, 0.1202, 0.1001, 0.1720]) -Greedy action tensor([ 1.4769, -0.4536, -0.1444, 0.5568]) tensor([0.5743, 0.0833, 0.1135, 0.2289]) -Greedy action tensor([ 1.2771, -0.6599, -0.0310, 0.3521]) tensor([0.5522, 0.0796, 0.1493, 0.2190]) -Greedy action tensor([ 1.1017, -0.1428, -0.4087, 0.0713]) tensor([0.5360, 0.1544, 0.1183, 0.1913]) -Greedy action tensor([ 1.5975, -0.2696, -0.6121, 0.3349]) tensor([0.6463, 0.0999, 0.0709, 0.1829]) -Greedy action tensor([ 0.6466, -0.4219, -0.1198, 0.0810]) tensor([0.4208, 0.1446, 0.1956, 0.2390]) -Greedy action tensor([ 1.6944, -0.7249, -0.3457, 0.5481]) tensor([0.6507, 0.0579, 0.0846, 0.2068]) -Greedy action tensor([ 1.1852, -0.2197, -0.3805, 0.2713]) tensor([0.5390, 0.1323, 0.1126, 0.2161]) -Greedy action tensor([ 0.9676, -0.1885, -0.3578, -0.2358]) tensor([0.5318, 0.1673, 0.1413, 0.1596]) -Greedy action tensor([ 1.2416, -0.0912, -0.9283, 0.1824]) tensor([0.5798, 0.1529, 0.0662, 0.2010]) -Greedy action tensor([ 1.3363, -0.4218, -0.3742, 0.3694]) tensor([0.5769, 0.0994, 0.1043, 0.2194]) -Greedy action tensor([ 1.1995, 0.0582, -0.8405, 0.3068]) tensor([0.5379, 0.1718, 0.0700, 0.2203]) -Greedy action tensor([ 1.0167, -0.6255, -0.1801, 0.1994]) tensor([0.5162, 0.0999, 0.1560, 0.2279]) -Greedy action tensor([ 1.1393, -0.2617, -0.6273, 0.3353]) tensor([0.5363, 0.1321, 0.0917, 0.2400]) -Greedy action tensor([ 1.0180, -0.2405, -0.2348, 0.3909]) tensor([0.4753, 0.1350, 0.1358, 0.2539]) -Greedy action tensor([ 1.5231, -0.4302, -0.3389, 0.3515]) tensor([0.6223, 0.0882, 0.0967, 0.1928]) -Greedy action tensor([ 1.5047, -0.8373, -0.1620, 0.1773]) tensor([0.6451, 0.0620, 0.1218, 0.1711]) -Greedy action tensor([ 0.7906, -0.6040, 0.2316, 0.0785]) tensor([0.4328, 0.1073, 0.2475, 0.2124]) -Greedy action tensor([ 1.1980, -0.0245, -0.1348, 0.4087]) tensor([0.4969, 0.1463, 0.1311, 0.2257]) -Greedy action tensor([ 1.4596, -0.4780, 0.0020, 0.5389]) tensor([0.5633, 0.0811, 0.1312, 0.2244]) -Greedy action tensor([ 1.4771, -0.6248, -0.2729, 0.1076]) tensor([0.6451, 0.0788, 0.1121, 0.1640]) -Greedy action tensor([ 0.7262, -0.2898, 0.1844, 0.0382]) tensor([0.4088, 0.1480, 0.2378, 0.2054]) -Greedy action tensor([ 1.4149, -0.5611, 0.0897, 0.5462]) tensor([0.5483, 0.0760, 0.1457, 0.2300]) -Greedy action tensor([ 1.7664, -0.3070, -0.6687, 0.3845]) tensor([0.6829, 0.0859, 0.0598, 0.1715]) -Greedy action tensor([ 1.4564, -0.4568, -0.4086, 0.4642]) tensor([0.5976, 0.0882, 0.0926, 0.2216]) -Greedy action tensor([ 1.4978, -0.1346, -0.5598, 0.2507]) tensor([0.6209, 0.1214, 0.0793, 0.1784]) -Greedy action tensor([ 1.0803, 0.0532, -0.2706, 0.0077]) tensor([0.5104, 0.1827, 0.1322, 0.1746]) -Greedy action tensor([ 2.0493, -0.9824, 0.0497, 0.4781]) tensor([0.7187, 0.0347, 0.0973, 0.1493]) -Greedy action tensor([ 1.3666, -0.1265, -0.9746, 0.2480]) tensor([0.6069, 0.1364, 0.0584, 0.1983]) -Greedy action tensor([ 2.4306, -0.0736, -0.2992, 0.6365]) tensor([0.7615, 0.0622, 0.0497, 0.1266]) -Greedy action tensor([ 1.3826, -0.1070, -0.2635, -0.0962]) tensor([0.6075, 0.1370, 0.1171, 0.1385]) -Greedy action tensor([ 0.8250, -0.3730, -0.1141, 0.0675]) tensor([0.4626, 0.1396, 0.1809, 0.2169]) -Greedy action tensor([ 1.6824, -0.0521, -0.0401, 0.1105]) tensor([0.6399, 0.1129, 0.1143, 0.1329]) -Greedy action tensor([ 1.4861, -0.1641, -0.3480, 0.3707]) tensor([0.5954, 0.1143, 0.0951, 0.1952]) -Greedy action tensor([ 1.9844, -0.8663, 0.0599, -0.0889]) tensor([0.7521, 0.0435, 0.1098, 0.0946]) -Greedy action tensor([ 1.5065, -0.2426, -0.6321, 0.8780]) tensor([0.5479, 0.0953, 0.0646, 0.2922]) -Greedy action tensor([ 1.7716, -0.1945, -0.7437, 0.3892]) tensor([0.6794, 0.0951, 0.0549, 0.1705]) -Greedy action tensor([ 0.8511, -0.4458, -0.1597, 0.4036]) tensor([0.4393, 0.1201, 0.1599, 0.2808]) -Greedy action tensor([ 0.9100, -0.1631, -0.4151, 0.3905]) tensor([0.4540, 0.1552, 0.1207, 0.2700]) -Greedy action tensor([ 2.1460, -1.0367, -0.1454, 0.4587]) tensor([0.7532, 0.0312, 0.0762, 0.1394]) -Greedy action tensor([ 1.4912, -0.3097, -0.0617, 0.1925]) tensor([0.6062, 0.1001, 0.1283, 0.1654]) -Greedy action tensor([ 1.1952, -0.3383, -0.1508, 0.1306]) tensor([0.5492, 0.1185, 0.1429, 0.1894]) -Greedy action tensor([ 1.4203, -0.6399, -0.2050, 0.1237]) tensor([0.6259, 0.0798, 0.1232, 0.1711]) -Greedy action tensor([ 1.6238, -0.5896, -0.1885, 0.3409]) tensor([0.6452, 0.0705, 0.1054, 0.1789]) -Greedy action tensor([0.6519, 0.0273, 0.0174, 0.0232]) tensor([0.3848, 0.2060, 0.2040, 0.2052]) -Greedy action tensor([ 1.4173, -0.2907, -0.5693, 0.1693]) tensor([0.6229, 0.1129, 0.0854, 0.1788]) -Greedy action tensor([ 1.2698, -0.0176, -0.2499, 0.4547]) tensor([0.5162, 0.1425, 0.1129, 0.2285]) -Greedy action tensor([ 0.7050, -0.1746, -0.4301, 0.3214]) tensor([0.4136, 0.1716, 0.1329, 0.2818]) -Greedy action tensor([ 2.2140, 0.4972, -0.2703, 0.1260]) tensor([0.7210, 0.1295, 0.0601, 0.0894]) -Greedy action tensor([ 1.0391, 0.0116, -0.2576, 0.4676]) tensor([0.4554, 0.1630, 0.1245, 0.2571]) -Greedy action tensor([ 1.1285, -0.5369, -0.2826, 0.3249]) tensor([0.5317, 0.1006, 0.1297, 0.2380]) -Greedy action tensor([ 1.7624, -0.3064, -0.3484, 0.3801]) tensor([0.6674, 0.0843, 0.0808, 0.1675]) -Greedy action tensor([ 1.7321, -1.0002, -0.4375, 0.9242]) tensor([0.6153, 0.0400, 0.0703, 0.2743]) -Greedy action tensor([ 0.8299, -0.4224, -0.2705, 0.3453]) tensor([0.4475, 0.1279, 0.1489, 0.2757]) -Greedy action tensor([ 1.8967, -0.5936, -0.2345, 0.5922]) tensor([0.6789, 0.0563, 0.0806, 0.1842]) -Greedy action tensor([ 1.3317, -0.2064, -0.3233, 0.4036]) tensor([0.5552, 0.1192, 0.1061, 0.2195]) -Greedy action tensor([ 1.5908, -0.3522, -0.4434, 0.5016]) tensor([0.6209, 0.0890, 0.0812, 0.2089]) -Greedy action tensor([ 0.9619, -0.3912, 0.2376, 0.3376]) tensor([0.4388, 0.1134, 0.2127, 0.2351]) -Greedy action tensor([ 1.7638, -0.9590, -0.4274, 0.3568]) tensor([0.7031, 0.0462, 0.0786, 0.1722]) -Greedy action tensor([ 1.6280, -0.2987, -0.4765, 0.2316]) tensor([0.6601, 0.0961, 0.0805, 0.1633]) -Greedy action tensor([ 1.2734, -0.2490, -0.0478, 0.5966]) tensor([0.5017, 0.1095, 0.1339, 0.2550]) -Greedy action tensor([ 0.8517, -0.1625, -0.0601, 0.1098]) tensor([0.4463, 0.1619, 0.1793, 0.2125]) -Greedy action tensor([ 0.9867, -0.4421, -0.3143, 0.2336]) tensor([0.5043, 0.1208, 0.1373, 0.2375]) -Greedy action tensor([ 1.5580, -0.4459, 0.0673, 0.7460]) tensor([0.5543, 0.0747, 0.1248, 0.2461]) -Greedy action tensor([ 0.4640, -1.0453, -0.1266, 0.9105]) tensor([0.2996, 0.0662, 0.1660, 0.4682]) -Greedy action tensor([ 0.2435, -1.1514, -0.3504, 1.0810]) tensor([0.2433, 0.0603, 0.1343, 0.5621]) -Greedy action tensor([ 0.6171, -1.0346, 0.6233, 0.9251]) tensor([0.2810, 0.0539, 0.2827, 0.3824]) -Greedy action tensor([ 1.8529, -0.6722, -0.2456, 1.0573]) tensor([0.6046, 0.0484, 0.0741, 0.2729]) -Greedy action tensor([ 1.2675, 0.1215, 0.9633, -0.1380]) tensor([0.4346, 0.1382, 0.3206, 0.1066]) -Greedy action tensor([-0.8646, -2.4078, -0.7950, 0.4925]) tensor([0.1621, 0.0346, 0.1737, 0.6296]) -Greedy action tensor([-1.9353, 0.0072, 0.4399, -0.9150]) tensor([0.0465, 0.3244, 0.5001, 0.1290]) -Greedy action tensor([-1.1002, 0.0036, -0.6832, 0.2116]) tensor([0.1082, 0.3262, 0.1641, 0.4016]) -Greedy action tensor([-0.0907, -0.6012, 2.2421, 0.9135]) tensor([0.0683, 0.0410, 0.7042, 0.1865]) -Greedy action tensor([1.7855, 0.3665, 1.2859, 1.2878]) tensor([0.4070, 0.0985, 0.2470, 0.2475]) -Greedy action tensor([-0.2470, 0.6100, -0.1446, 0.4017]) tensor([0.1568, 0.3695, 0.1737, 0.3000]) -Greedy action tensor([ 0.3249, -1.6546, 0.7774, -0.5159]) tensor([0.3183, 0.0440, 0.5004, 0.1373]) -Greedy action tensor([ 0.5030, -1.1385, 0.1983, -0.5123]) tensor([0.4360, 0.0845, 0.3215, 0.1580]) -Greedy action tensor([ 1.2159, -0.5312, 1.2519, 0.1729]) tensor([0.3901, 0.0680, 0.4044, 0.1375]) -Greedy action tensor([ 0.1703, -1.2868, 1.2028, 0.4488]) tensor([0.1865, 0.0434, 0.5237, 0.2464]) -Greedy action tensor([-0.5609, 0.0895, 1.7310, -0.8926]) tensor([0.0739, 0.1417, 0.7314, 0.0531]) -Greedy action tensor([-0.8972, -1.6203, -0.5078, -0.3448]) tensor([0.2128, 0.1033, 0.3142, 0.3697]) -Greedy action tensor([ 1.0611, -0.2445, 1.7589, 1.2609]) tensor([0.2221, 0.0602, 0.4464, 0.2713]) -Greedy action tensor([ 0.9466, -0.4364, -0.1802, 0.0281]) tensor([0.5066, 0.1271, 0.1642, 0.2022]) -Greedy action tensor([1.5593, 0.4087, 0.4202, 0.6505]) tensor([0.4903, 0.1552, 0.1569, 0.1976]) -Greedy action tensor([ 0.6017, -1.2161, 0.4041, 0.6106]) tensor([0.3342, 0.0543, 0.2743, 0.3372]) -Greedy action tensor([ 0.3638, -1.1731, -0.4885, 0.4080]) tensor([0.3722, 0.0800, 0.1587, 0.3890]) -Greedy action tensor([ 1.1510, -0.6963, 1.1310, 0.9300]) tensor([0.3402, 0.0536, 0.3335, 0.2727]) -Greedy action tensor([0.8367, 1.6667, 0.9590, 0.3132]) tensor([0.1994, 0.4572, 0.2253, 0.1181]) -Greedy action tensor([ 0.3399, -0.5655, -0.2654, -0.2489]) tensor([0.3992, 0.1614, 0.2179, 0.2215]) -Greedy action tensor([0.4988, 0.8128, 1.1058, 0.2521]) tensor([0.2006, 0.2746, 0.3681, 0.1567]) -Greedy action tensor([-0.5042, -1.8341, -0.0614, 0.9759]) tensor([0.1386, 0.0367, 0.2158, 0.6089]) -Greedy action tensor([-0.8149, -0.8269, -0.6131, 0.6856]) tensor([0.1299, 0.1284, 0.1590, 0.5827]) -Greedy action tensor([-0.0073, -1.4184, -0.6275, 1.5490]) tensor([0.1533, 0.0374, 0.0825, 0.7268]) -Greedy action tensor([-0.3998, -0.7123, 0.9452, -0.8410]) tensor([0.1610, 0.1178, 0.6178, 0.1035]) -Greedy action tensor([1.4588, 0.1083, 0.2459, 0.8288]) tensor([0.4787, 0.1240, 0.1423, 0.2549]) -Greedy action tensor([ 0.5420, -1.2774, -0.1760, 0.8497]) tensor([0.3322, 0.0539, 0.1620, 0.4519]) -Greedy action tensor([ 0.9083, -0.2732, -0.0112, 1.0032]) tensor([0.3565, 0.1094, 0.1421, 0.3920]) -Greedy action tensor([ 0.3456, 0.6906, 1.3702, -0.6519]) tensor([0.1796, 0.2536, 0.5005, 0.0662]) -Greedy action tensor([-1.3450, 0.8414, 0.2283, -0.5841]) tensor([0.0593, 0.5279, 0.2859, 0.1269]) -Greedy action tensor([1.2016, 0.4867, 0.3655, 0.2191]) tensor([0.4354, 0.2130, 0.1887, 0.1630]) -Greedy action tensor([ 0.8303, -0.7569, 0.8920, 0.4464]) tensor([0.3391, 0.0693, 0.3606, 0.2310]) -Greedy action tensor([ 0.1799, -1.4831, -0.6463, 0.3468]) tensor([0.3560, 0.0675, 0.1558, 0.4207]) -Greedy action tensor([-0.1384, -0.4874, -0.1714, 0.4686]) tensor([0.2218, 0.1565, 0.2146, 0.4070]) -Greedy action tensor([-0.0157, 0.5879, -1.3821, 2.2626]) tensor([0.0779, 0.1424, 0.0199, 0.7599]) -Greedy action tensor([-0.2027, -1.4445, 0.7271, -0.0629]) tensor([0.2011, 0.0581, 0.5096, 0.2313]) -Greedy action tensor([-0.2341, -1.5448, 0.9696, 0.8123]) tensor([0.1342, 0.0362, 0.4473, 0.3822]) -Greedy action tensor([-0.8498, -0.4513, 0.6240, 0.0599]) tensor([0.1071, 0.1595, 0.4675, 0.2659]) -Greedy action tensor([ 0.9435, 0.2605, 0.8224, -0.9736]) tensor([0.3940, 0.1990, 0.3491, 0.0579]) -Greedy action tensor([ 1.3168, -0.9513, -0.2349, -0.0858]) tensor([0.6405, 0.0663, 0.1357, 0.1575]) -Greedy action tensor([ 1.2249, 1.0370, -0.7927, 0.9562]) tensor([0.3668, 0.3040, 0.0488, 0.2804]) -Greedy action tensor([ 0.9758, 0.3236, 0.7670, -0.7828]) tensor([0.3992, 0.2080, 0.3240, 0.0688]) -Greedy action tensor([ 2.2509, -2.0139, 1.0232, 0.5071]) tensor([0.6748, 0.0095, 0.1977, 0.1180]) -Greedy action tensor([ 0.9398, 1.1638, -1.2101, 1.6040]) tensor([0.2320, 0.2902, 0.0270, 0.4507]) -Greedy action tensor([0.9957, 0.3822, 1.2703, 0.1298]) tensor([0.3050, 0.1652, 0.4015, 0.1283]) -Greedy action tensor([ 1.0354, -0.0789, 0.0411, 0.8257]) tensor([0.3986, 0.1308, 0.1475, 0.3232]) -Greedy action tensor([ 0.8641, -1.4820, 0.6555, 0.5364]) tensor([0.3805, 0.0364, 0.3089, 0.2742]) -Greedy action tensor([ 1.5729, -1.8310, 0.2852, 0.0720]) tensor([0.6527, 0.0217, 0.1801, 0.1455]) -Greedy action tensor([ 0.6082, 0.2560, -0.0666, -0.1063]) tensor([0.3701, 0.2602, 0.1885, 0.1812]) -Greedy action tensor([ 0.6846, -0.4608, -0.0208, -0.2545]) tensor([0.4539, 0.1444, 0.2242, 0.1775]) -Greedy action tensor([ 0.3936, 0.5406, 2.6365, -0.8783]) tensor([0.0843, 0.0977, 0.7944, 0.0236]) -Greedy action tensor([-1.9936, -1.1919, -0.2213, 0.1885]) tensor([0.0556, 0.1240, 0.3273, 0.4931]) -Greedy action tensor([-1.5479, -0.0177, -0.7100, -1.1391]) tensor([0.1060, 0.4895, 0.2450, 0.1595]) -Greedy action tensor([ 0.7115, -1.4530, 0.4170, 0.3642]) tensor([0.3897, 0.0447, 0.2903, 0.2753]) -Greedy action tensor([-0.1603, -0.3794, 0.1687, 1.9688]) tensor([0.0862, 0.0692, 0.1198, 0.7248]) -Greedy action tensor([ 0.1137, -1.9608, 0.2907, 0.0263]) tensor([0.3091, 0.0388, 0.3689, 0.2832]) -Greedy action tensor([-1.0526, -0.6205, -1.4784, -0.1073]) tensor([0.1734, 0.2671, 0.1133, 0.4462]) -Greedy action tensor([-0.4136, -1.1981, -0.5671, 0.9090]) tensor([0.1648, 0.0752, 0.1414, 0.6186]) -Greedy action tensor([-0.2262, -0.7454, 0.8151, -0.6587]) tensor([0.1970, 0.1172, 0.5580, 0.1278]) -Greedy action tensor([-0.2189, -0.6271, 2.7652, -1.4065]) tensor([0.0460, 0.0306, 0.9094, 0.0140]) -Greedy action tensor([-0.6290, -0.3762, 2.0316, -1.3514]) tensor([0.0586, 0.0754, 0.8376, 0.0284]) -Greedy action tensor([-0.0497, -0.8593, 0.5686, 0.4323]) tensor([0.2032, 0.0905, 0.3772, 0.3291]) -Greedy action tensor([ 0.2302, -0.1964, -0.3738, -1.2137]) tensor([0.4106, 0.2680, 0.2244, 0.0969]) -Greedy action tensor([-0.2106, 0.4401, -0.6667, -0.7863]) tensor([0.2431, 0.4661, 0.1541, 0.1367]) -Greedy action tensor([-0.7950, -2.5905, -0.5399, 0.8109]) tensor([0.1344, 0.0223, 0.1735, 0.6698]) -Greedy action tensor([ 0.0356, -0.8113, 1.7616, -0.4484]) tensor([0.1305, 0.0559, 0.7331, 0.0804]) -Greedy action tensor([ 0.8213, -0.1558, -0.7575, -0.1020]) tensor([0.5051, 0.1901, 0.1042, 0.2006]) -Greedy action tensor([0.8780, 0.0186, 0.2466, 0.2100]) tensor([0.4052, 0.1716, 0.2155, 0.2078]) -Greedy action tensor([ 0.6285, -0.6437, 0.5501, 1.1748]) tensor([0.2543, 0.0713, 0.2352, 0.4392]) -Greedy action tensor([ 0.0892, -0.0507, 0.5401, 0.1855]) tensor([0.2203, 0.1915, 0.3457, 0.2425]) -Greedy action tensor([ 0.8585, -0.7275, 0.7965, -0.4421]) tensor([0.4137, 0.0847, 0.3889, 0.1127]) -Greedy action tensor([ 0.8877, -1.0218, 0.1672, 0.2627]) tensor([0.4608, 0.0683, 0.2242, 0.2467]) -Greedy action tensor([ 1.0260, 0.4490, 0.6898, -0.0129]) tensor([0.3803, 0.2135, 0.2717, 0.1345]) -Greedy action tensor([ 1.5983, -0.4918, 1.5507, 1.2456]) tensor([0.3597, 0.0445, 0.3430, 0.2528]) -Greedy action tensor([ 0.2564, -1.9092, 0.5628, 0.2452]) tensor([0.2888, 0.0331, 0.3924, 0.2856]) -Greedy action tensor([ 0.0462, -1.1359, 0.4515, 0.5485]) tensor([0.2243, 0.0688, 0.3364, 0.3706]) -Greedy action tensor([-1.5675, -0.2277, 0.5835, 0.1419]) tensor([0.0528, 0.2016, 0.4538, 0.2918]) -Greedy action tensor([-1.2833, -0.5115, 0.6892, 1.1110]) tensor([0.0469, 0.1015, 0.3373, 0.5143]) -Greedy action tensor([-1.7568, -0.3744, 0.6409, 0.0112]) tensor([0.0458, 0.1824, 0.5035, 0.2683]) -Greedy action tensor([-1.7906, -0.4908, 0.6065, -0.0379]) tensor([0.0467, 0.1712, 0.5129, 0.2693]) -Greedy action tensor([-1.8061, -0.4707, 0.7930, 0.2144]) tensor([0.0388, 0.1474, 0.5215, 0.2924]) -Greedy action tensor([-1.7595, -0.4427, 0.5800, -0.0802]) tensor([0.0489, 0.1823, 0.5069, 0.2619]) -Greedy action tensor([-1.5290, -0.4809, 0.6045, 0.4748]) tensor([0.0507, 0.1447, 0.4283, 0.3763]) -Greedy action tensor([-1.2651, -0.6256, 0.7651, 1.0950]) tensor([0.0474, 0.0898, 0.3609, 0.5019]) -Greedy action tensor([-1.8657, -0.4039, 0.6356, -0.1024]) tensor([0.0428, 0.1848, 0.5225, 0.2498]) -Greedy action tensor([-0.7761, -0.5544, 0.1819, 0.1644]) tensor([0.1348, 0.1683, 0.3515, 0.3454]) -Greedy action tensor([-1.5305, -0.5208, 0.4358, 0.1052]) tensor([0.0624, 0.1713, 0.4459, 0.3204]) -Greedy action tensor([-1.9191, -0.4394, 0.6505, -0.1645]) tensor([0.0413, 0.1812, 0.5390, 0.2386]) -Greedy action tensor([-1.7759, -0.6748, 0.5093, -0.0832]) tensor([0.0519, 0.1561, 0.5100, 0.2820]) -Greedy action tensor([-1.8055, -0.4573, 0.6145, -0.1020]) tensor([0.0463, 0.1784, 0.5209, 0.2544]) -Greedy action tensor([-1.7422, -0.0228, 0.5007, -0.0162]) tensor([0.0463, 0.2582, 0.4357, 0.2599]) -Greedy action tensor([-1.1147, -0.3314, 0.5893, 0.8010]) tensor([0.0646, 0.1414, 0.3551, 0.4388]) -Greedy action tensor([-1.7767, -0.4307, 0.6587, -0.0026]) tensor([0.0451, 0.1734, 0.5154, 0.2661]) -Greedy action tensor([-1.8633, -0.2748, 0.6200, -0.0959]) tensor([0.0421, 0.2063, 0.5048, 0.2467]) -Greedy action tensor([-1.0395, -0.4565, 0.2875, -0.2733]) tensor([0.1148, 0.2056, 0.4327, 0.2470]) -Greedy action tensor([-1.9163, -0.3659, 0.6333, -0.1861]) tensor([0.0414, 0.1951, 0.5299, 0.2335]) -Greedy action tensor([-1.8662, -0.4764, 0.6258, -0.1411]) tensor([0.0440, 0.1767, 0.5321, 0.2471]) -Greedy action tensor([-1.9215, -0.4649, 0.6813, -0.1606]) tensor([0.0406, 0.1744, 0.5486, 0.2364]) -Greedy action tensor([-1.7650, -0.5219, 0.6287, -0.0041]) tensor([0.0471, 0.1632, 0.5158, 0.2739]) -Greedy action tensor([-0.8955, -0.5663, 0.2745, 0.2263]) tensor([0.1152, 0.1601, 0.3711, 0.3536]) -Greedy action tensor([-1.7533, -0.3149, 0.5475, -0.0225]) tensor([0.0480, 0.2022, 0.4790, 0.2709]) -Greedy action tensor([-1.9106, -0.4103, 0.6553, -0.1520]) tensor([0.0412, 0.1845, 0.5355, 0.2389]) -Greedy action tensor([-1.9262, -0.4371, 0.6626, -0.1620]) tensor([0.0407, 0.1803, 0.5416, 0.2374]) -Greedy action tensor([-1.1983, 0.5440, 0.4202, -0.5990]) tensor([0.0737, 0.4206, 0.3716, 0.1341]) -Greedy action tensor([-0.9075, -0.5460, 0.2371, 0.2142]) tensor([0.1157, 0.1660, 0.3633, 0.3551]) -Greedy action tensor([-1.9147, -0.4230, 0.6552, -0.1626]) tensor([0.0412, 0.1831, 0.5382, 0.2376]) -Greedy action tensor([-1.2906, -0.6035, 0.5516, 0.5364]) tensor([0.0645, 0.1281, 0.4068, 0.4006]) -Greedy action tensor([-1.8758, -0.3060, 0.6188, -0.1395]) tensor([0.0424, 0.2036, 0.5134, 0.2405]) -Greedy action tensor([-1.6486, -0.3040, 0.6412, 0.0179]) tensor([0.0500, 0.1918, 0.4936, 0.2646]) -Greedy action tensor([-1.8641, -0.3264, 0.6045, -0.1452]) tensor([0.0434, 0.2020, 0.5124, 0.2421]) -Greedy action tensor([-1.7431, -0.3076, 0.6666, 0.0566]) tensor([0.0447, 0.1877, 0.4973, 0.2702]) -Greedy action tensor([-1.9258, -0.4269, 0.6544, -0.1718]) tensor([0.0409, 0.1831, 0.5398, 0.2363]) -Greedy action tensor([-1.1062, -0.5706, 0.3325, 0.4225]) tensor([0.0867, 0.1481, 0.3654, 0.3998]) -Greedy action tensor([-1.4808, -0.4686, 0.6498, 0.4846]) tensor([0.0518, 0.1425, 0.4361, 0.3696]) -Greedy action tensor([-1.8249, -0.2864, 0.6089, -0.1152]) tensor([0.0443, 0.2062, 0.5048, 0.2447]) -Greedy action tensor([-1.8859, -0.3310, 0.6200, -0.1400]) tensor([0.0422, 0.1996, 0.5166, 0.2416]) -Greedy action tensor([-1.8172, -0.2012, 0.5727, -0.1268]) tensor([0.0447, 0.2250, 0.4879, 0.2424]) -Greedy action tensor([-0.4449, -0.1743, 0.1626, 0.1825]) tensor([0.1661, 0.2178, 0.3050, 0.3111]) -Greedy action tensor([-1.8475, -0.4476, 0.6233, -0.1055]) tensor([0.0443, 0.1795, 0.5236, 0.2526]) -Greedy action tensor([-1.7836, -0.4016, 0.5741, -0.0894]) tensor([0.0476, 0.1897, 0.5034, 0.2593]) -Greedy action tensor([-1.1865, -0.5543, 0.3334, 0.3248]) tensor([0.0834, 0.1570, 0.3814, 0.3782]) -Greedy action tensor([-1.9317, -0.4016, 0.6551, -0.1700]) tensor([0.0404, 0.1868, 0.5373, 0.2355]) -Greedy action tensor([-0.9821, -0.2131, 0.5102, 0.9096]) tensor([0.0702, 0.1516, 0.3124, 0.4658]) -Greedy action tensor([-0.7409, -0.6425, 0.2986, 0.0560]) tensor([0.1399, 0.1543, 0.3955, 0.3103]) -Greedy action tensor([-1.3589, 0.2551, 0.4993, 0.2776]) tensor([0.0569, 0.2858, 0.3649, 0.2924]) -Greedy action tensor([-1.6022, -0.3226, 0.6375, 0.0588]) tensor([0.0519, 0.1868, 0.4878, 0.2735]) -Greedy action tensor([-1.0697, 0.8354, 0.0988, 0.2825]) tensor([0.0675, 0.4540, 0.2173, 0.2612]) -Greedy action tensor([-1.6690e+00, -3.5936e-01, 6.4151e-01, -2.3484e-04]) tensor([0.0498, 0.1844, 0.5017, 0.2641]) -Greedy action tensor([-1.6320, -0.3567, 0.6105, 0.1351]) tensor([0.0504, 0.1803, 0.4744, 0.2949]) -Greedy action tensor([-0.8773, 1.0010, 0.0591, 0.6704]) tensor([0.0676, 0.4423, 0.1724, 0.3177]) -Greedy action tensor([-1.5930, -0.5180, 1.1489, 0.8520]) tensor([0.0323, 0.0946, 0.5009, 0.3722]) -Greedy action tensor([0.0465, 1.1917, 0.0435, 0.4198]) tensor([0.1517, 0.4768, 0.1512, 0.2203]) -Greedy action tensor([-1.8079, -0.4733, 0.6173, -0.0201]) tensor([0.0453, 0.1720, 0.5120, 0.2707]) -Greedy action tensor([-1.7464, -0.4439, 0.5657, -0.1061]) tensor([0.0502, 0.1846, 0.5065, 0.2587]) -Greedy action tensor([-1.9354, -0.4420, 0.6615, -0.1761]) tensor([0.0405, 0.1804, 0.5438, 0.2353]) -Greedy action tensor([-1.9143, -0.2688, 0.6226, -0.1715]) tensor([0.0408, 0.2113, 0.5152, 0.2328]) -Greedy action tensor([-1.7376, -0.5364, 0.6464, 0.0031]) tensor([0.0479, 0.1593, 0.5197, 0.2731]) -Greedy action tensor([-0.6982, -0.5764, 0.1995, 0.0347]) tensor([0.1500, 0.1695, 0.3682, 0.3123]) -Greedy action tensor([-1.5766, -0.3127, 0.7875, 0.2642]) tensor([0.0466, 0.1648, 0.4952, 0.2934]) -Greedy action tensor([-1.8166, -0.4097, 0.5941, -0.1114]) tensor([0.0460, 0.1879, 0.5128, 0.2533]) -Greedy action tensor([-1.8741, -0.4635, 0.7175, -0.0746]) tensor([0.0408, 0.1673, 0.5450, 0.2468]) -Greedy action tensor([-1.4853, -0.4617, 0.7424, 0.4937]) tensor([0.0493, 0.1371, 0.4571, 0.3565]) -Greedy action tensor([-1.1225, -0.5293, 0.2959, 0.0903]) tensor([0.0971, 0.1756, 0.4009, 0.3264]) -Greedy action tensor([-1.8883, -0.4432, 0.6359, -0.1586]) tensor([0.0428, 0.1816, 0.5342, 0.2414]) -Greedy action tensor([-1.9350, -0.4547, 0.6611, -0.1746]) tensor([0.0406, 0.1785, 0.5447, 0.2362]) -Greedy action tensor([-0.5109, 0.5571, 0.1097, 0.1266]) tensor([0.1305, 0.3798, 0.2428, 0.2469]) -Greedy action tensor([-1.9014, -0.4459, 0.7040, -0.0853]) tensor([0.0400, 0.1717, 0.5421, 0.2462]) -Greedy action tensor([-1.6853, -0.5439, 0.8091, 0.3806]) tensor([0.0414, 0.1297, 0.5019, 0.3270]) -Greedy action tensor([-0.7094, 0.8970, 0.2239, 0.5215]) tensor([0.0837, 0.4171, 0.2128, 0.2865]) -Greedy action tensor([-1.0584, 0.8382, 0.1317, 0.2063]) tensor([0.0690, 0.4597, 0.2268, 0.2444]) -Greedy action tensor([-1.5893, -0.5390, 0.5496, 0.1277]) tensor([0.0558, 0.1596, 0.4739, 0.3108]) -Greedy action tensor([-1.5908, -0.1430, 0.5387, 0.0848]) tensor([0.0526, 0.2238, 0.4425, 0.2811]) -Greedy action tensor([-1.8573, -0.1568, 0.5746, -0.1209]) tensor([0.0425, 0.2327, 0.4836, 0.2412]) -Greedy action tensor([-1.6134, -0.4751, 0.4993, -0.0317]) tensor([0.0580, 0.1809, 0.4793, 0.2818]) -Greedy action tensor([-1.1796, -0.6048, 0.3159, 0.3024]) tensor([0.0859, 0.1526, 0.3833, 0.3782]) -Greedy action tensor([-1.9132, -0.4389, 0.6493, -0.1658]) tensor([0.0415, 0.1814, 0.5386, 0.2384]) -Greedy action tensor([-1.8965, -0.4253, 0.6406, -0.1486]) tensor([0.0421, 0.1834, 0.5325, 0.2419]) -Greedy action tensor([ 0.8898, -0.4646, -0.0623, 0.1281]) tensor([0.4737, 0.1223, 0.1828, 0.2212]) -Greedy action tensor([ 1.2084, -0.2982, -0.7846, 0.2592]) tensor([0.5731, 0.1270, 0.0781, 0.2218]) -Greedy action tensor([ 1.3032, -0.7096, -0.3607, 0.5278]) tensor([0.5607, 0.0749, 0.1062, 0.2582]) -Greedy action tensor([ 1.0515, -0.3402, -0.2114, 0.1711]) tensor([0.5138, 0.1278, 0.1453, 0.2131]) -Greedy action tensor([ 1.7433, -0.5656, -0.3612, 0.7288]) tensor([0.6314, 0.0627, 0.0770, 0.2289]) -Greedy action tensor([ 1.5231, -0.4459, -0.6483, 0.3506]) tensor([0.6397, 0.0893, 0.0729, 0.1981]) -Greedy action tensor([ 1.1470, -0.6042, -0.4295, 0.3568]) tensor([0.5453, 0.0946, 0.1127, 0.2474]) -Greedy action tensor([ 0.8379, -0.3975, -0.3002, 0.1355]) tensor([0.4747, 0.1380, 0.1521, 0.2352]) -Greedy action tensor([ 0.8374, -0.0658, 0.1054, -0.1733]) tensor([0.4444, 0.1801, 0.2137, 0.1617]) -Greedy action tensor([ 1.0087, -0.5300, -0.2762, 0.4591]) tensor([0.4834, 0.1038, 0.1338, 0.2790]) -Greedy action tensor([ 1.2861, -0.1424, -0.3274, 0.0967]) tensor([0.5736, 0.1375, 0.1143, 0.1746]) -Greedy action tensor([ 0.7039, -0.0859, -0.1294, 0.1187]) tensor([0.4089, 0.1856, 0.1777, 0.2278]) -Greedy action tensor([ 1.5209, -0.4325, -0.3234, 0.4123]) tensor([0.6135, 0.0870, 0.0970, 0.2025]) -Greedy action tensor([ 2.0635, -0.5047, -0.3536, 0.4099]) tensor([0.7368, 0.0565, 0.0657, 0.1410]) -Greedy action tensor([ 1.3864, 0.2249, -0.3014, 0.3571]) tensor([0.5390, 0.1687, 0.0997, 0.1926]) -Greedy action tensor([ 1.3662, -0.5479, -0.1450, 0.2987]) tensor([0.5841, 0.0861, 0.1289, 0.2009]) -Greedy action tensor([ 1.1629, -0.2507, -0.0176, 0.2404]) tensor([0.5134, 0.1249, 0.1577, 0.2041]) -Greedy action tensor([ 0.9564, -0.3744, -0.0800, 0.2600]) tensor([0.4723, 0.1248, 0.1675, 0.2354]) -Greedy action tensor([ 1.1251, -0.2834, -0.3863, 0.3968]) tensor([0.5134, 0.1255, 0.1133, 0.2478]) -Greedy action tensor([ 1.2643, 0.0473, -0.3637, 0.2719]) tensor([0.5367, 0.1589, 0.1054, 0.1990]) -Greedy action tensor([ 1.5716, -0.5811, -0.1190, 0.2287]) tensor([0.6403, 0.0744, 0.1181, 0.1672]) -Greedy action tensor([ 1.4855, -0.2684, -0.9655, 0.2800]) tensor([0.6415, 0.1110, 0.0553, 0.1922]) -Greedy action tensor([ 0.8238, -0.2235, -0.6176, 0.6471]) tensor([0.4123, 0.1447, 0.0975, 0.3455]) -Greedy action tensor([ 1.3777, -0.0291, -1.3234, 0.2360]) tensor([0.6130, 0.1501, 0.0412, 0.1957]) -Greedy action tensor([ 1.6476, -0.0719, -0.5398, 0.3604]) tensor([0.6380, 0.1143, 0.0716, 0.1761]) -Greedy action tensor([ 0.7600, -0.3625, -0.0908, 0.0160]) tensor([0.4489, 0.1461, 0.1917, 0.2133]) -Greedy action tensor([ 2.4507, -0.6078, -0.7885, 0.1861]) tensor([0.8403, 0.0395, 0.0329, 0.0873]) -Greedy action tensor([ 1.8630, -0.3867, -0.5932, 0.0856]) tensor([0.7351, 0.0775, 0.0630, 0.1243]) -Greedy action tensor([ 2.0626, -0.7072, 0.0027, 0.6919]) tensor([0.6925, 0.0434, 0.0883, 0.1759]) -Greedy action tensor([ 1.5036, -0.3031, -0.2394, 0.2522]) tensor([0.6153, 0.1010, 0.1077, 0.1760]) -Greedy action tensor([ 1.6424, -0.1937, -0.5000, 0.2781]) tensor([0.6526, 0.1040, 0.0766, 0.1668]) -Greedy action tensor([ 0.7588, -0.2927, -0.2747, 0.4502]) tensor([0.4099, 0.1432, 0.1458, 0.3010]) -Greedy action tensor([ 1.7334, -0.5707, -0.3635, 0.5211]) tensor([0.6578, 0.0657, 0.0808, 0.1957]) -Greedy action tensor([ 2.2278, -0.7473, -0.3767, 1.1630]) tensor([0.6804, 0.0347, 0.0503, 0.2346]) -Greedy action tensor([ 1.4037, -0.3085, -0.5588, 0.9473]) tensor([0.5116, 0.0923, 0.0719, 0.3241]) -Greedy action tensor([ 2.1931, -0.4571, -0.3191, 0.4706]) tensor([0.7517, 0.0531, 0.0610, 0.1343]) -Greedy action tensor([ 1.5896, -0.4368, -1.0603, 0.2784]) tensor([0.6794, 0.0895, 0.0480, 0.1831]) -Greedy action tensor([ 1.3228, -0.7329, -0.2777, 0.8137]) tensor([0.5179, 0.0663, 0.1045, 0.3113]) -Greedy action tensor([ 1.0930, -0.3208, -0.2889, -0.0622]) tensor([0.5527, 0.1344, 0.1388, 0.1741]) -Greedy action tensor([ 1.2014, -0.4207, 0.0021, 0.1767]) tensor([0.5383, 0.1063, 0.1622, 0.1932]) -Greedy action tensor([ 1.5187, 0.1077, -0.4206, 0.1741]) tensor([0.6067, 0.1480, 0.0872, 0.1581]) -Greedy action tensor([ 0.6345, -0.1348, 0.0319, 0.2085]) tensor([0.3754, 0.1739, 0.2055, 0.2452]) -Greedy action tensor([ 1.2897, -0.1914, -0.4037, 0.0847]) tensor([0.5845, 0.1329, 0.1075, 0.1752]) -Greedy action tensor([ 1.0990, -0.2130, -0.2044, 0.0777]) tensor([0.5260, 0.1417, 0.1429, 0.1894]) -Greedy action tensor([ 1.9028, -0.0561, -0.2151, -0.0372]) tensor([0.7118, 0.1004, 0.0856, 0.1023]) -Greedy action tensor([ 0.9560, -0.4592, -0.2386, 0.4282]) tensor([0.4683, 0.1137, 0.1418, 0.2762]) -Greedy action tensor([ 1.7505, -0.6990, -0.7789, 0.4736]) tensor([0.6921, 0.0598, 0.0552, 0.1930]) -Greedy action tensor([ 1.1113, -0.5335, -0.1355, 0.1509]) tensor([0.5367, 0.1036, 0.1543, 0.2054]) -Greedy action tensor([ 1.4967, -0.1661, 0.1442, -0.5488]) tensor([0.6339, 0.1202, 0.1639, 0.0820]) -Greedy action tensor([ 1.3863, -0.1318, -0.3686, 0.1558]) tensor([0.5937, 0.1301, 0.1027, 0.1735]) -Greedy action tensor([ 1.2781, -0.6022, -0.2685, 0.8740]) tensor([0.4919, 0.0750, 0.1048, 0.3283]) -Greedy action tensor([ 1.8223, -0.0141, 0.1260, 0.1364]) tensor([0.6544, 0.1043, 0.1200, 0.1212]) -Greedy action tensor([ 1.3710, -0.3416, -0.5176, 0.4978]) tensor([0.5717, 0.1031, 0.0865, 0.2387]) -Greedy action tensor([ 1.3098, 0.2709, -0.4690, 0.3479]) tensor([0.5250, 0.1858, 0.0886, 0.2006]) -Greedy action tensor([ 1.3159, -0.4533, -0.3553, 0.9601]) tensor([0.4856, 0.0828, 0.0913, 0.3403]) -Greedy action tensor([ 1.4300, -0.1942, -0.5266, 0.2828]) tensor([0.6039, 0.1190, 0.0854, 0.1917]) -Greedy action tensor([ 1.8202, -0.3990, -0.7135, 0.8468]) tensor([0.6386, 0.0694, 0.0507, 0.2413]) -Greedy action tensor([ 1.5243, -0.6322, -0.4332, 0.7221]) tensor([0.5864, 0.0679, 0.0828, 0.2629]) -Greedy action tensor([ 1.4969, -0.7047, -0.4010, 0.6241]) tensor([0.5958, 0.0659, 0.0893, 0.2489]) -Greedy action tensor([ 1.4889, -0.5080, -0.5693, 0.2565]) tensor([0.6431, 0.0873, 0.0821, 0.1875]) -Greedy action tensor([ 1.3491, -0.3809, -0.5336, 0.2409]) tensor([0.6026, 0.1068, 0.0917, 0.1989]) -Greedy action tensor([ 2.0570, -1.1019, -0.2139, 0.6821]) tensor([0.7150, 0.0304, 0.0738, 0.1808]) -Greedy action tensor([ 1.1208, -0.3350, 0.1636, 0.1416]) tensor([0.5018, 0.1170, 0.1927, 0.1885]) -Greedy action tensor([ 1.3612, -0.4769, -0.1653, -0.1948]) tensor([0.6300, 0.1002, 0.1369, 0.1329]) -Greedy action tensor([ 0.5986, 0.0112, -0.0991, 0.1318]) tensor([0.3731, 0.2073, 0.1857, 0.2339]) -Greedy action tensor([ 2.4605, -1.5671, -0.4606, 1.2086]) tensor([0.7366, 0.0131, 0.0397, 0.2106]) -Greedy action tensor([ 1.8002, -0.8447, -0.4034, 0.6752]) tensor([0.6640, 0.0471, 0.0733, 0.2155]) -Greedy action tensor([ 2.2365, -0.8271, -0.7118, 0.7894]) tensor([0.7494, 0.0350, 0.0393, 0.1763]) -Greedy action tensor([ 2.6997, -1.2134, 0.0796, 1.2107]) tensor([0.7585, 0.0152, 0.0552, 0.1711]) -Greedy action tensor([ 1.7699, -0.6479, -0.2327, 0.3483]) tensor([0.6824, 0.0608, 0.0921, 0.1647]) -Greedy action tensor([ 1.3931, -0.3781, -0.3009, 0.1156]) tensor([0.6125, 0.1042, 0.1126, 0.1707]) -Greedy action tensor([ 2.0187, -0.8129, -0.1360, 0.6314]) tensor([0.7020, 0.0414, 0.0814, 0.1753]) -Greedy action tensor([ 1.1421, -0.2067, -0.2516, 0.3827]) tensor([0.5062, 0.1314, 0.1256, 0.2369]) -Greedy action tensor([ 2.2465, -0.8084, -0.4506, 0.2068]) tensor([0.8035, 0.0379, 0.0542, 0.1045]) -Greedy action tensor([ 1.3587, -0.7257, -0.0495, 0.3092]) tensor([0.5817, 0.0724, 0.1423, 0.2037]) -Greedy action tensor([ 2.1180, -0.7216, -0.2750, 0.5025]) tensor([0.7415, 0.0433, 0.0677, 0.1474]) -Greedy action tensor([ 0.8783, -0.5725, -0.3260, 0.6882]) tensor([0.4235, 0.0993, 0.1270, 0.3502]) -Greedy action tensor([ 1.6237, -0.0682, -0.2171, 0.2153]) tensor([0.6300, 0.1160, 0.1000, 0.1540]) -Greedy action tensor([ 1.2079, -0.4628, -0.2920, 0.0500]) tensor([0.5796, 0.1090, 0.1293, 0.1821]) -Greedy action tensor([ 1.2162, -0.6076, -0.2740, 0.7608]) tensor([0.4948, 0.0799, 0.1115, 0.3138]) -Greedy action tensor([ 0.3772, -0.2445, -0.2865, 0.6484]) tensor([0.2973, 0.1597, 0.1531, 0.3899]) -Greedy action tensor([ 0.2658, 0.0294, -0.1985, -0.1903]) tensor([0.3277, 0.2587, 0.2060, 0.2077]) -Greedy action tensor([ 0.2314, -0.1896, -0.1795, -0.1227]) tensor([0.3310, 0.2173, 0.2195, 0.2323]) -Greedy action tensor([ 0.4446, -0.3041, -0.0725, -0.1215]) tensor([0.3792, 0.1794, 0.2261, 0.2153]) -Greedy action tensor([ 0.2662, -0.2782, -0.0260, -0.0855]) tensor([0.3300, 0.1915, 0.2464, 0.2322]) -Greedy action tensor([ 0.4453, -0.6456, -0.1159, -0.1579]) tensor([0.4076, 0.1369, 0.2325, 0.2230]) -Greedy action tensor([ 0.8911, -0.7250, 0.0373, -0.4303]) tensor([0.5288, 0.1050, 0.2251, 0.1410]) -Greedy action tensor([ 0.5791, -0.2691, -0.0909, -0.4138]) tensor([0.4328, 0.1853, 0.2215, 0.1604]) -Greedy action tensor([ 0.4830, 0.1632, -0.0581, -0.1871]) tensor([0.3546, 0.2575, 0.2064, 0.1814]) -Greedy action tensor([ 0.6065, -0.2110, -0.0580, -0.0558]) tensor([0.4046, 0.1786, 0.2082, 0.2086]) -Greedy action tensor([ 0.4788, -0.4654, -0.1166, -0.3429]) tensor([0.4202, 0.1634, 0.2316, 0.1847]) -Greedy action tensor([ 0.7589, -0.5634, 0.0230, -0.3066]) tensor([0.4784, 0.1275, 0.2292, 0.1648]) -Greedy action tensor([ 0.5945, -0.2175, -0.1585, -0.3175]) tensor([0.4317, 0.1916, 0.2033, 0.1734]) -Greedy action tensor([ 0.4722, -0.4002, -0.2160, -0.0402]) tensor([0.3969, 0.1659, 0.1994, 0.2378]) -Greedy action tensor([ 0.5443, -0.2304, 0.0021, -0.2239]) tensor([0.3990, 0.1839, 0.2320, 0.1851]) -Greedy action tensor([ 0.1519, 0.6230, -0.1036, 0.0807]) tensor([0.2321, 0.3719, 0.1798, 0.2162]) -Greedy action tensor([ 0.4494, -0.2504, -0.0121, -0.3394]) tensor([0.3874, 0.1924, 0.2442, 0.1760]) -Greedy action tensor([ 0.6386, -0.6217, 0.0930, -0.4692]) tensor([0.4559, 0.1293, 0.2642, 0.1506]) -Greedy action tensor([ 0.8190, -0.6876, 0.0010, -0.1514]) tensor([0.4897, 0.1086, 0.2161, 0.1856]) -Greedy action tensor([ 0.5377, 0.0406, -0.0686, -0.4352]) tensor([0.3950, 0.2403, 0.2154, 0.1493]) -Greedy action tensor([ 0.6695, -0.3438, -0.1961, -0.1316]) tensor([0.4479, 0.1626, 0.1885, 0.2010]) -Greedy action tensor([ 0.4904, 0.1536, -0.2703, -0.3843]) tensor([0.3848, 0.2748, 0.1799, 0.1605]) -Greedy action tensor([ 0.5552, -0.5061, -0.2017, -0.1065]) tensor([0.4290, 0.1484, 0.2012, 0.2213]) -Greedy action tensor([ 0.7886, -0.5153, 0.0153, -0.2271]) tensor([0.4773, 0.1296, 0.2203, 0.1729]) -Greedy action tensor([ 0.8069, -0.3072, -0.0713, -0.4661]) tensor([0.4941, 0.1622, 0.2053, 0.1384]) -Greedy action tensor([ 0.5553, -0.3803, -0.2066, -0.4384]) tensor([0.4486, 0.1760, 0.2094, 0.1661]) -Greedy action tensor([ 0.7370, -0.1965, 0.0743, -0.5488]) tensor([0.4576, 0.1799, 0.2359, 0.1265]) -Greedy action tensor([ 0.3088, 0.0240, -0.0450, -0.2414]) tensor([0.3299, 0.2482, 0.2316, 0.1903]) -Greedy action tensor([ 0.8845, -0.6218, 0.0546, -0.2909]) tensor([0.5085, 0.1127, 0.2218, 0.1570]) -Greedy action tensor([ 0.4282, -0.2017, -0.0053, -0.4075]) tensor([0.3825, 0.2037, 0.2479, 0.1658]) -Greedy action tensor([ 0.6026, -0.6886, -0.1124, -0.2191]) tensor([0.4538, 0.1248, 0.2220, 0.1995]) -Greedy action tensor([ 0.8952, -0.4240, -0.1221, -0.5958]) tensor([0.5394, 0.1442, 0.1950, 0.1214]) -Greedy action tensor([ 0.9388, -0.6751, -0.0827, -0.4758]) tensor([0.5549, 0.1105, 0.1998, 0.1348]) -Greedy action tensor([ 0.5945, 0.0478, 0.0936, -0.2182]) tensor([0.3804, 0.2202, 0.2305, 0.1688]) -Greedy action tensor([ 0.7149, -0.3861, -0.2292, -0.1799]) tensor([0.4694, 0.1561, 0.1826, 0.1918]) -Greedy action tensor([ 0.9214, -0.4212, -0.1017, -0.1424]) tensor([0.5087, 0.1329, 0.1829, 0.1756]) -Greedy action tensor([ 0.8893, -1.0444, -0.0699, -0.4740]) tensor([0.5607, 0.0811, 0.2148, 0.1434]) -Greedy action tensor([ 1.0090, -0.9183, -0.0059, -0.4815]) tensor([0.5770, 0.0840, 0.2091, 0.1300]) -Greedy action tensor([ 0.7074, -0.3794, 0.2179, -0.2720]) tensor([0.4300, 0.1450, 0.2635, 0.1615]) -Greedy action tensor([ 0.5102, -0.0569, 0.0528, -0.2403]) tensor([0.3742, 0.2122, 0.2369, 0.1767]) -Greedy action tensor([ 0.3130, 0.4261, -0.1511, -0.3186]) tensor([0.3049, 0.3414, 0.1917, 0.1621]) -Greedy action tensor([ 0.6327, -0.1991, 0.1192, -0.3649]) tensor([0.4163, 0.1812, 0.2491, 0.1535]) -Greedy action tensor([ 0.8335, -0.3677, -0.1049, -0.2005]) tensor([0.4884, 0.1469, 0.1911, 0.1736]) -Greedy action tensor([ 0.6010, -0.5094, 0.0011, -0.4553]) tensor([0.4492, 0.1480, 0.2466, 0.1562]) -Greedy action tensor([ 0.3063, 0.3351, -0.1989, 0.0072]) tensor([0.2964, 0.3050, 0.1788, 0.2198]) -Greedy action tensor([ 0.2989, -0.3865, 0.0537, -0.3855]) tensor([0.3583, 0.1806, 0.2804, 0.1807]) -Greedy action tensor([ 0.5392, 0.0420, 0.0233, -0.2242]) tensor([0.3743, 0.2277, 0.2235, 0.1745]) -Greedy action tensor([ 0.2797, 0.1378, -0.1162, -0.2203]) tensor([0.3177, 0.2757, 0.2139, 0.1927]) -Greedy action tensor([ 0.9265, -0.3436, -0.0848, -0.1532]) tensor([0.5040, 0.1415, 0.1833, 0.1712]) -Greedy action tensor([ 0.8647, -0.0975, -0.1071, -0.2938]) tensor([0.4821, 0.1842, 0.1824, 0.1513]) -Greedy action tensor([ 0.7726, -0.4115, -0.1492, -0.3045]) tensor([0.4891, 0.1497, 0.1946, 0.1666]) -Greedy action tensor([ 1.0259, -0.5643, -0.1576, -0.4131]) tensor([0.5723, 0.1167, 0.1752, 0.1357]) -Greedy action tensor([ 0.6261, -0.2919, -0.1045, -0.2190]) tensor([0.4328, 0.1728, 0.2085, 0.1859]) -Greedy action tensor([ 0.5295, -0.0730, -0.1536, -0.2195]) tensor([0.3960, 0.2168, 0.2000, 0.1872]) -Greedy action tensor([ 0.8492, -0.6816, -0.1027, -0.4401]) tensor([0.5325, 0.1152, 0.2055, 0.1467]) -Greedy action tensor([ 0.8391, -0.3552, 0.0718, -0.2751]) tensor([0.4772, 0.1446, 0.2216, 0.1566]) -Greedy action tensor([ 0.5634, -0.0081, -0.0848, -0.1059]) tensor([0.3847, 0.2172, 0.2012, 0.1970]) -Greedy action tensor([ 0.5524, -0.1723, -0.1999, -0.2078]) tensor([0.4127, 0.1999, 0.1945, 0.1929]) -Greedy action tensor([ 0.5093, -0.6917, -0.0738, -0.2314]) tensor([0.4281, 0.1288, 0.2390, 0.2041]) -Greedy action tensor([ 0.5116, -0.2690, -0.0725, -0.3440]) tensor([0.4097, 0.1877, 0.2285, 0.1741]) -Greedy action tensor([ 0.9365, -0.0388, -0.0606, -0.7460]) tensor([0.5176, 0.1952, 0.1910, 0.0962]) -Greedy action tensor([ 0.9284, -0.1346, 0.0278, -0.3141]) tensor([0.4901, 0.1693, 0.1991, 0.1415]) -Greedy action tensor([ 0.6867, -0.8534, -0.0456, -0.2999]) tensor([0.4835, 0.1037, 0.2325, 0.1803]) -Greedy action tensor([ 0.5727, -0.3831, -0.1328, -0.1111]) tensor([0.4196, 0.1613, 0.2072, 0.2118]) -Greedy action tensor([ 1.3189, -0.6062, 0.1002, -0.4797]) tensor([0.6223, 0.0908, 0.1839, 0.1030]) -Greedy action tensor([ 0.9913, -0.5217, -0.2430, -0.4707]) tensor([0.5737, 0.1264, 0.1670, 0.1330]) -Greedy action tensor([ 0.6921, -0.2387, -0.1564, -0.1966]) tensor([0.4477, 0.1765, 0.1917, 0.1841]) -Greedy action tensor([ 0.4928, 0.2217, -0.2199, 0.0298]) tensor([0.3469, 0.2646, 0.1701, 0.2184]) -Greedy action tensor([ 0.6152, -0.3200, -0.1305, -0.5415]) tensor([0.4584, 0.1799, 0.2175, 0.1442]) -Greedy action tensor([ 0.5954, 0.3661, -0.2487, -0.1099]) tensor([0.3678, 0.2924, 0.1581, 0.1817]) -Greedy action tensor([ 1.1119, -0.8551, 0.0973, -0.7036]) tensor([0.6005, 0.0840, 0.2177, 0.0977]) -Greedy action tensor([ 0.7629, -0.5506, -0.0403, -0.5551]) tensor([0.5039, 0.1355, 0.2257, 0.1349]) -Greedy action tensor([ 0.6992, -0.4199, 0.0058, -0.4091]) tensor([0.4637, 0.1514, 0.2318, 0.1531]) -Greedy action tensor([ 0.5692, -0.3948, 0.0017, -0.5300]) tensor([0.4383, 0.1672, 0.2485, 0.1460]) -Greedy action tensor([ 0.3911, -0.3101, -0.0018, -0.3329]) tensor([0.3765, 0.1868, 0.2542, 0.1825]) -Greedy action tensor([ 0.7294, -0.4274, -0.2253, -0.4586]) tensor([0.4989, 0.1569, 0.1921, 0.1521]) -Greedy action tensor([ 0.2528, -0.0588, 0.0564, -0.5229]) tensor([0.3318, 0.2429, 0.2726, 0.1527]) -Greedy action tensor([ 0.4584, -0.1639, -0.0290, -0.2961]) tensor([0.3815, 0.2048, 0.2343, 0.1794]) -Greedy action tensor([ 0.7270, -0.1887, -0.0731, -0.0897]) tensor([0.4364, 0.1747, 0.1961, 0.1928]) -Greedy action tensor([ 0.6589, -0.3397, -0.0400, -0.2375]) tensor([0.4398, 0.1620, 0.2187, 0.1795]) -Greedy action tensor([ 0.5890, -0.4235, -0.0101, -0.3961]) tensor([0.4374, 0.1589, 0.2403, 0.1633]) -Greedy action tensor([ 0.9481, -0.5543, -0.0211, -0.8241]) tensor([0.5644, 0.1256, 0.2141, 0.0959]) -Greedy action tensor([ 1.8450, -1.1257, 0.7465, 0.4522]) tensor([0.6124, 0.0314, 0.2041, 0.1521]) -Greedy action tensor([-0.9531, -0.2736, -0.4679, 1.1765]) tensor([0.0769, 0.1517, 0.1249, 0.6466]) -Greedy action tensor([-0.1255, 0.0909, 1.9154, 0.0162]) tensor([0.0902, 0.1119, 0.6940, 0.1039]) -Greedy action tensor([-0.1812, -1.8317, -0.4846, 0.8299]) tensor([0.2137, 0.0410, 0.1578, 0.5875]) -Greedy action tensor([ 0.9700, -1.1933, 2.0327, 1.0899]) tensor([0.1947, 0.0224, 0.5635, 0.2195]) -Greedy action tensor([ 0.2207, 0.4942, -0.2942, -0.0213]) tensor([0.2705, 0.3556, 0.1616, 0.2123]) -Greedy action tensor([ 0.2387, 0.7544, -0.5535, 0.7250]) tensor([0.2104, 0.3523, 0.0953, 0.3421]) -Greedy action tensor([ 1.2246, -0.0064, -0.1202, 0.3701]) tensor([0.5055, 0.1476, 0.1317, 0.2151]) -Greedy action tensor([ 0.0988, -1.2695, 1.1541, 1.2236]) tensor([0.1388, 0.0353, 0.3986, 0.4273]) -Greedy action tensor([-0.7837, -1.3387, 0.5497, -0.5749]) tensor([0.1515, 0.0870, 0.5748, 0.1867]) -Greedy action tensor([ 1.0810, -0.7253, 0.9306, 0.3434]) tensor([0.3995, 0.0656, 0.3437, 0.1911]) -Greedy action tensor([ 1.0847, -0.9573, 0.3554, 1.3282]) tensor([0.3463, 0.0449, 0.1670, 0.4418]) -Greedy action tensor([-0.6569, -0.2843, 0.6848, 0.0193]) tensor([0.1213, 0.1761, 0.4641, 0.2385]) -Greedy action tensor([0.6987, 0.3933, 0.7275, 1.1434]) tensor([0.2312, 0.1703, 0.2379, 0.3606]) -Greedy action tensor([-0.7886, 1.0085, -0.7636, 0.0803]) tensor([0.0958, 0.5777, 0.0982, 0.2283]) -Greedy action tensor([-0.3558, -0.5596, 1.0996, -0.5046]) tensor([0.1436, 0.1171, 0.6155, 0.1237]) -Greedy action tensor([0.5665, 0.4935, 1.0430, 1.3596]) tensor([0.1739, 0.1617, 0.2801, 0.3844]) -Greedy action tensor([ 0.9045, -0.5063, 1.4567, 0.0843]) tensor([0.2923, 0.0713, 0.5077, 0.1287]) -Greedy action tensor([ 0.3189, -0.6258, -0.0994, -0.3755]) tensor([0.3927, 0.1527, 0.2585, 0.1961]) -Greedy action tensor([ 1.7475, 0.2692, -0.1804, 0.5310]) tensor([0.5989, 0.1366, 0.0871, 0.1774]) -Greedy action tensor([-0.9068, -1.0627, 0.7108, -0.4612]) tensor([0.1182, 0.1012, 0.5960, 0.1846]) -Greedy action tensor([ 1.1912, -0.1893, 0.9489, 0.9412]) tensor([0.3552, 0.0893, 0.2788, 0.2766]) -Greedy action tensor([-0.1285, 0.5283, -0.1278, -0.5465]) tensor([0.2180, 0.4204, 0.2181, 0.1435]) -Greedy action tensor([ 1.3778, -0.3077, 0.5249, 0.0926]) tensor([0.5296, 0.0982, 0.2257, 0.1465]) -Greedy action tensor([-1.9533, -0.0326, -0.1181, -0.9373]) tensor([0.0593, 0.4050, 0.3718, 0.1639]) -Greedy action tensor([ 0.7644, -0.5571, -0.1151, 0.2067]) tensor([0.4436, 0.1183, 0.1841, 0.2540]) -Greedy action tensor([ 0.3430, -0.9354, 0.8486, 1.1282]) tensor([0.1950, 0.0543, 0.3233, 0.4275]) -Greedy action tensor([ 0.3605, -0.0420, -0.2772, -0.3117]) tensor([0.3693, 0.2469, 0.1952, 0.1886]) -Greedy action tensor([ 0.0948, -1.5183, -0.3735, 1.8884]) tensor([0.1276, 0.0254, 0.0799, 0.7671]) -Greedy action tensor([ 0.7237, -0.2900, 0.9407, 0.5219]) tensor([0.2922, 0.1060, 0.3630, 0.2388]) -Greedy action tensor([ 1.2065, 0.1979, -0.2823, 1.2782]) tensor([0.3753, 0.1369, 0.0847, 0.4032]) -Greedy action tensor([-0.2540, -1.4890, 0.1344, 0.1183]) tensor([0.2372, 0.0690, 0.3497, 0.3441]) -Greedy action tensor([-0.1983, -2.2634, -0.6030, 0.3160]) tensor([0.2885, 0.0366, 0.1925, 0.4825]) -Greedy action tensor([ 0.0196, -1.7298, -1.0019, 0.6592]) tensor([0.2916, 0.0507, 0.1050, 0.5528]) -Greedy action tensor([ 1.0471, -0.1299, 0.1814, -0.1970]) tensor([0.4957, 0.1528, 0.2086, 0.1429]) -Greedy action tensor([-0.0417, 0.2193, -0.1759, -0.5636]) tensor([0.2655, 0.3447, 0.2322, 0.1576]) -Greedy action tensor([ 0.4108, -1.0144, 0.6075, 1.3586]) tensor([0.1985, 0.0477, 0.2416, 0.5121]) -Greedy action tensor([ 0.0871, -1.5847, -0.6241, 0.5810]) tensor([0.3014, 0.0566, 0.1480, 0.4939]) -Greedy action tensor([-0.7409, 0.0729, -0.5827, 0.5532]) tensor([0.1238, 0.2794, 0.1451, 0.4517]) -Greedy action tensor([1.5263, 0.4577, 0.7950, 0.8139]) tensor([0.4319, 0.1484, 0.2079, 0.2118]) -Greedy action tensor([ 0.5555, -0.4165, 0.0409, 0.4654]) tensor([0.3460, 0.1309, 0.2068, 0.3162]) -Greedy action tensor([ 0.9237, -1.9962, -0.7091, 0.2030]) tensor([0.5761, 0.0311, 0.1126, 0.2802]) -Greedy action tensor([ 0.3392, 0.5264, 0.3122, -0.4751]) tensor([0.2761, 0.3329, 0.2687, 0.1223]) -Greedy action tensor([ 0.7939, -0.4869, 0.3812, 0.5608]) tensor([0.3661, 0.1017, 0.2423, 0.2900]) -Greedy action tensor([-1.0763, -1.2316, -0.4323, -0.6808]) tensor([0.1906, 0.1632, 0.3630, 0.2831]) -Greedy action tensor([-0.1949, 0.3949, -0.6380, -0.6845]) tensor([0.2464, 0.4444, 0.1582, 0.1510]) -Greedy action tensor([-1.1789, -0.7380, -0.3460, -0.5011]) tensor([0.1466, 0.2277, 0.3371, 0.2886]) -Greedy action tensor([-0.3156, -1.6164, -0.4538, 1.3646]) tensor([0.1332, 0.0363, 0.1160, 0.7146]) -Greedy action tensor([-0.5305, -1.8330, -0.0163, 0.1340]) tensor([0.2046, 0.0556, 0.3422, 0.3976]) -Greedy action tensor([ 0.3968, -1.6518, 1.5772, 0.2874]) tensor([0.1894, 0.0244, 0.6165, 0.1697]) -Greedy action tensor([-0.4000, -1.0048, 0.8870, 0.7195]) tensor([0.1215, 0.0663, 0.4400, 0.3722]) -Greedy action tensor([ 0.0607, 0.2390, -0.5676, 0.1730]) tensor([0.2599, 0.3106, 0.1387, 0.2908]) -Greedy action tensor([ 0.3677, 1.1483, -0.1353, -0.2199]) tensor([0.2302, 0.5026, 0.1392, 0.1279]) -Greedy action tensor([ 1.0907, -0.5443, 0.2851, 0.8708]) tensor([0.4091, 0.0798, 0.1828, 0.3284]) -Greedy action tensor([1.1431, 0.2433, 0.7312, 1.2447]) tensor([0.3149, 0.1280, 0.2086, 0.3485]) -Greedy action tensor([-0.2763, -0.2442, 1.1407, 0.8718]) tensor([0.1074, 0.1109, 0.4431, 0.3386]) -Greedy action tensor([ 1.1724, -1.2849, 0.9363, -0.3363]) tensor([0.4770, 0.0409, 0.3767, 0.1055]) -Greedy action tensor([ 0.2539, -0.9798, 0.8142, -0.0655]) tensor([0.2653, 0.0773, 0.4646, 0.1928]) -Greedy action tensor([ 0.3925, -0.2036, -0.3144, 0.0111]) tensor([0.3667, 0.2020, 0.1808, 0.2504]) -Greedy action tensor([ 0.8795, -0.2325, 0.4835, 0.7034]) tensor([0.3521, 0.1158, 0.2369, 0.2952]) -Greedy action tensor([ 1.7165, -1.0944, 0.8990, 0.8223]) tensor([0.5234, 0.0315, 0.2311, 0.2140]) -Greedy action tensor([-1.9381, -0.7475, 0.9770, -0.4923]) tensor([0.0371, 0.1219, 0.6837, 0.1573]) -Greedy action tensor([ 1.3197, -1.7040, 0.6163, 0.4181]) tensor([0.5130, 0.0249, 0.2539, 0.2082]) -Greedy action tensor([-0.1438, -1.0904, 0.9888, 0.1351]) tensor([0.1720, 0.0668, 0.5339, 0.2273]) -Greedy action tensor([ 0.3147, -0.7198, 0.0639, 1.1999]) tensor([0.2194, 0.0780, 0.1708, 0.5318]) -Greedy action tensor([-0.4225, -2.2836, 0.9309, 0.5530]) tensor([0.1302, 0.0203, 0.5041, 0.3454]) -Greedy action tensor([-0.4073, -0.9193, 1.3412, -0.2396]) tensor([0.1173, 0.0703, 0.6738, 0.1387]) -Greedy action tensor([0.6476, 0.9051, 0.6225, 0.4645]) tensor([0.2438, 0.3154, 0.2378, 0.2030]) -Greedy action tensor([ 0.1353, -0.6799, 2.0876, 0.6542]) tensor([0.0984, 0.0435, 0.6929, 0.1652]) -Greedy action tensor([-0.8980, -1.1701, 0.2246, -0.2502]) tensor([0.1482, 0.1129, 0.4555, 0.2833]) -Greedy action tensor([ 0.2809, -0.3771, 0.2727, -0.5318]) tensor([0.3386, 0.1754, 0.3358, 0.1502]) -Greedy action tensor([-1.4281, -0.9407, 0.9981, 0.0554]) tensor([0.0545, 0.0887, 0.6166, 0.2402]) -Greedy action tensor([-0.6249, -1.4924, -0.1886, 0.4762]) tensor([0.1674, 0.0703, 0.2589, 0.5034]) -Greedy action tensor([-0.4273, -0.8018, -0.7831, 1.0988]) tensor([0.1431, 0.0984, 0.1003, 0.6583]) -Greedy action tensor([ 1.8319, -0.4762, 1.5265, 0.6884]) tensor([0.4641, 0.0461, 0.3419, 0.1479]) -Greedy action tensor([ 0.7160, -1.2488, -0.1583, 0.9509]) tensor([0.3543, 0.0497, 0.1478, 0.4482]) -Greedy action tensor([ 1.1243, -0.5681, 0.1966, -0.2215]) tensor([0.5435, 0.1001, 0.2149, 0.1415]) -Greedy action tensor([0.3947, 0.6685, 0.6958, 0.3791]) tensor([0.2150, 0.2827, 0.2906, 0.2117]) -Greedy action tensor([ 1.8394, -0.9925, 1.2571, 1.6920]) tensor([0.4031, 0.0237, 0.2252, 0.3479]) -Greedy action tensor([ 0.5552, -0.0792, -0.5937, 1.1641]) tensor([0.2713, 0.1439, 0.0860, 0.4988]) -Greedy action tensor([ 0.3316, -1.0159, 1.5819, -1.0987]) tensor([0.2004, 0.0521, 0.6996, 0.0479]) -Greedy action tensor([ 1.3925, -0.0370, 0.3026, 0.9193]) tensor([0.4548, 0.1089, 0.1529, 0.2834]) -Greedy action tensor([-1.3331, 0.4189, 0.4104, 0.3676]) tensor([0.0557, 0.3210, 0.3183, 0.3050]) -Greedy action tensor([-0.4261, 0.0949, 0.1087, 0.0481]) tensor([0.1667, 0.2807, 0.2846, 0.2679]) -Greedy action tensor([-1.9127, -0.4104, 0.6471, -0.1605]) tensor([0.0413, 0.1857, 0.5346, 0.2384]) -Greedy action tensor([-1.9106, -0.3661, 0.6411, -0.1471]) tensor([0.0411, 0.1925, 0.5269, 0.2396]) -Greedy action tensor([-1.2288, -0.5941, 0.4822, 0.7669]) tensor([0.0634, 0.1196, 0.3508, 0.4663]) -Greedy action tensor([-1.9088, -0.4309, 0.6440, -0.1635]) tensor([0.0417, 0.1830, 0.5362, 0.2391]) -Greedy action tensor([-1.8252, -0.6351, 0.6700, -0.0370]) tensor([0.0447, 0.1468, 0.5415, 0.2670]) -Greedy action tensor([-1.4190, -0.5609, 0.3770, 0.1981]) tensor([0.0693, 0.1635, 0.4178, 0.3493]) -Greedy action tensor([-0.7469, -0.3773, -0.8194, -0.5021]) tensor([0.2148, 0.3109, 0.1998, 0.2744]) -Greedy action tensor([-1.8218, -0.3905, 0.5910, -0.0951]) tensor([0.0455, 0.1904, 0.5082, 0.2559]) -Greedy action tensor([-1.4235, -0.6132, 0.4026, 0.1455]) tensor([0.0701, 0.1577, 0.4354, 0.3367]) -Greedy action tensor([-1.6434, -0.3409, 0.6937, 0.2967]) tensor([0.0455, 0.1673, 0.4707, 0.3165]) -Greedy action tensor([-1.7493, -0.2194, 0.6370, 0.0239]) tensor([0.0447, 0.2063, 0.4858, 0.2632]) -Greedy action tensor([-1.3914, -0.1594, 0.4495, -0.1492]) tensor([0.0705, 0.2415, 0.4440, 0.2440]) -Greedy action tensor([-1.8192, 0.1317, 0.5186, -0.1046]) tensor([0.0418, 0.2938, 0.4325, 0.2319]) -Greedy action tensor([-0.5358, -0.4610, 0.2458, 0.1168]) tensor([0.1617, 0.1743, 0.3534, 0.3106]) -Greedy action tensor([-0.9624, -0.0067, 0.7798, 1.1072]) tensor([0.0580, 0.1509, 0.3314, 0.4597]) -Greedy action tensor([-1.9402, -0.4459, 0.6636, -0.1770]) tensor([0.0403, 0.1797, 0.5449, 0.2351]) -Greedy action tensor([-1.1605, -0.3300, 0.3437, 0.2698]) tensor([0.0835, 0.1916, 0.3758, 0.3491]) -Greedy action tensor([-1.9395, -0.4411, 0.6610, -0.1778]) tensor([0.0404, 0.1807, 0.5439, 0.2351]) -Greedy action tensor([-1.8085, -0.4588, 0.8809, 0.3640]) tensor([0.0353, 0.1360, 0.5192, 0.3096]) -Greedy action tensor([-0.3866, 0.4139, 0.5698, 1.5836]) tensor([0.0769, 0.1713, 0.2002, 0.5517]) -Greedy action tensor([-1.9337, -0.4566, 0.6580, -0.1766]) tensor([0.0408, 0.1786, 0.5444, 0.2363]) -Greedy action tensor([-1.6917, -0.2833, 0.5220, -0.0268]) tensor([0.0512, 0.2095, 0.4686, 0.2707]) -Greedy action tensor([-1.0936, -0.5317, 0.5005, 0.6163]) tensor([0.0757, 0.1328, 0.3728, 0.4186]) -Greedy action tensor([-1.8040, -0.0864, 0.5540, -0.0725]) tensor([0.0439, 0.2445, 0.4638, 0.2479]) -Greedy action tensor([-1.8308, -0.3440, 0.6103, -0.0879]) tensor([0.0442, 0.1955, 0.5077, 0.2526]) -Greedy action tensor([-1.7831, -0.4787, 0.5885, -0.0942]) tensor([0.0480, 0.1771, 0.5148, 0.2601]) -Greedy action tensor([-1.5899, -0.4608, 0.4748, -0.1002]) tensor([0.0609, 0.1885, 0.4803, 0.2703]) -Greedy action tensor([-1.8227, -0.4981, 0.6553, -0.0654]) tensor([0.0445, 0.1673, 0.5303, 0.2579]) -Greedy action tensor([-1.8515, 0.0441, 0.5339, -0.1235]) tensor([0.0414, 0.2756, 0.4498, 0.2331]) -Greedy action tensor([-1.6735, -0.3596, 0.5093, -0.0121]) tensor([0.0530, 0.1973, 0.4704, 0.2793]) -Greedy action tensor([-1.1039, -0.1470, 0.2711, 0.0370]) tensor([0.0936, 0.2436, 0.3701, 0.2928]) -Greedy action tensor([-1.8659, -0.4067, 0.6541, -0.1318]) tensor([0.0427, 0.1839, 0.5313, 0.2421]) -Greedy action tensor([-1.8099, -0.4472, 0.5953, -0.1169]) tensor([0.0467, 0.1824, 0.5172, 0.2537]) -Greedy action tensor([-1.7884, -0.5043, 0.6087, -0.1206]) tensor([0.0478, 0.1728, 0.5258, 0.2536]) -Greedy action tensor([-1.9025, -0.4434, 0.6421, -0.1539]) tensor([0.0420, 0.1809, 0.5355, 0.2416]) -Greedy action tensor([-1.6873, -0.5448, 0.5416, -0.0399]) tensor([0.0537, 0.1684, 0.4990, 0.2790]) -Greedy action tensor([-0.6402, -0.6335, -0.0336, -0.0513]) tensor([0.1772, 0.1784, 0.3251, 0.3193]) -Greedy action tensor([-1.5713, -0.0119, 0.4984, 0.2264]) tensor([0.0507, 0.2412, 0.4019, 0.3062]) -Greedy action tensor([-1.8567, -0.3730, 0.6292, -0.1243]) tensor([0.0433, 0.1911, 0.5205, 0.2450]) -Greedy action tensor([-1.8955, -0.5727, 0.7423, -0.0929]) tensor([0.0403, 0.1514, 0.5638, 0.2446]) -Greedy action tensor([-1.7833, -0.4032, 0.5810, -0.0769]) tensor([0.0473, 0.1882, 0.5036, 0.2608]) -Greedy action tensor([-1.4053, -0.4926, 0.4661, 0.2663]) tensor([0.0653, 0.1627, 0.4244, 0.3475]) -Greedy action tensor([-0.7437, -0.5262, 0.6067, 1.0076]) tensor([0.0843, 0.1048, 0.3253, 0.4857]) -Greedy action tensor([-1.0366, -0.4490, 0.3331, 0.3592]) tensor([0.0928, 0.1671, 0.3652, 0.3749]) -Greedy action tensor([-1.7395, -0.2586, 0.6051, -0.0564]) tensor([0.0471, 0.2073, 0.4918, 0.2538]) -Greedy action tensor([-1.0540, 0.4670, 0.1359, 0.1819]) tensor([0.0813, 0.3719, 0.2671, 0.2797]) -Greedy action tensor([-1.2069, 0.0343, 0.3090, 0.0328]) tensor([0.0802, 0.2775, 0.3652, 0.2771]) -Greedy action tensor([-1.4599, -0.4949, 0.4144, 0.0616]) tensor([0.0679, 0.1783, 0.4427, 0.3111]) -Greedy action tensor([-1.5515, -0.5817, 0.4753, 0.0792]) tensor([0.0612, 0.1615, 0.4646, 0.3127]) -Greedy action tensor([-1.2611, -0.5363, 0.3308, 0.2543]) tensor([0.0798, 0.1648, 0.3921, 0.3633]) -Greedy action tensor([-1.9251, -0.4379, 0.6582, -0.1697]) tensor([0.0409, 0.1810, 0.5415, 0.2366]) -Greedy action tensor([-1.9271, -0.4528, 0.6567, -0.1723]) tensor([0.0410, 0.1790, 0.5430, 0.2370]) -Greedy action tensor([-1.8971, -0.4571, 0.6481, -0.1581]) tensor([0.0423, 0.1784, 0.5387, 0.2406]) -Greedy action tensor([-1.7187, 0.0441, 0.4732, -0.0126]) tensor([0.0470, 0.2738, 0.4205, 0.2587]) -Greedy action tensor([-0.6642, 0.9808, 0.0795, 0.4488]) tensor([0.0883, 0.4573, 0.1857, 0.2687]) -Greedy action tensor([-1.8083, -0.3178, 0.6099, -0.0887]) tensor([0.0450, 0.1995, 0.5046, 0.2509]) -Greedy action tensor([-1.4412, -0.5621, 0.3953, 0.1169]) tensor([0.0693, 0.1669, 0.4347, 0.3291]) -Greedy action tensor([-1.5775, -0.6771, 0.6849, 0.3376]) tensor([0.0504, 0.1239, 0.4838, 0.3419]) -Greedy action tensor([-1.5493, 0.0987, 0.7003, -0.6850]) tensor([0.0554, 0.2878, 0.5253, 0.1315]) -Greedy action tensor([-1.8730, -0.4679, 0.6306, -0.1575]) tensor([0.0437, 0.1783, 0.5348, 0.2432]) -Greedy action tensor([-1.9095, -0.4380, 0.6443, -0.1631]) tensor([0.0418, 0.1819, 0.5369, 0.2395]) -Greedy action tensor([-1.8790, -0.4719, 0.6800, -0.0978]) tensor([0.0418, 0.1706, 0.5397, 0.2480]) -Greedy action tensor([-1.9026, -0.4538, 0.6425, -0.1573]) tensor([0.0421, 0.1794, 0.5371, 0.2414]) -Greedy action tensor([-1.9213, -0.4749, 0.6852, -0.1524]) tensor([0.0405, 0.1722, 0.5494, 0.2378]) -Greedy action tensor([-1.8171, -0.4710, 0.6002, -0.1046]) tensor([0.0463, 0.1779, 0.5192, 0.2566]) -Greedy action tensor([-1.2538, -0.3762, 0.4418, 0.6075]) tensor([0.0654, 0.1573, 0.3565, 0.4208]) -Greedy action tensor([-1.0310, -0.6094, 0.2178, 0.3493]) tensor([0.1001, 0.1526, 0.3491, 0.3982]) -Greedy action tensor([-1.7315, -0.3361, 0.6210, -0.0093]) tensor([0.0473, 0.1909, 0.4971, 0.2647]) -Greedy action tensor([-1.4050, 0.4678, 0.2493, 0.2113]) tensor([0.0563, 0.3661, 0.2943, 0.2833]) -Greedy action tensor([-1.2538, -0.3679, 0.8081, 1.0932]) tensor([0.0460, 0.1116, 0.3616, 0.4809]) -Greedy action tensor([-1.9277, -0.4543, 0.6564, -0.1695]) tensor([0.0410, 0.1787, 0.5427, 0.2376]) -Greedy action tensor([-1.0917, 0.5299, 0.7678, 1.2216]) tensor([0.0443, 0.2240, 0.2842, 0.4475]) -Greedy action tensor([-0.7469, -0.2438, 0.7990, 1.1784]) tensor([0.0704, 0.1164, 0.3304, 0.4828]) -Greedy action tensor([-1.8620, -0.3536, 0.6375, -0.0989]) tensor([0.0425, 0.1921, 0.5176, 0.2478]) -Greedy action tensor([-0.6750, -0.5402, 0.1828, 0.3561]) tensor([0.1369, 0.1566, 0.3227, 0.3838]) -Greedy action tensor([-1.7627, -0.1117, 0.5244, -0.0605]) tensor([0.0464, 0.2419, 0.4570, 0.2546]) -Greedy action tensor([-0.3805, 1.2066, 0.0442, 0.8078]) tensor([0.0935, 0.4569, 0.1429, 0.3067]) -Greedy action tensor([-1.8865, -0.8308, -0.0454, -0.6115]) tensor([0.0727, 0.2089, 0.4582, 0.2601]) -Greedy action tensor([-1.4047, 0.6066, 0.2717, 0.0620]) tensor([0.0551, 0.4116, 0.2945, 0.2388]) -Greedy action tensor([ 0.3775, -0.4407, -0.1060, -0.1261]) tensor([0.3756, 0.1657, 0.2316, 0.2270]) -Greedy action tensor([ 0.3696, -0.2776, -0.3164, -0.2830]) tensor([0.3925, 0.2055, 0.1977, 0.2044]) -Greedy action tensor([ 0.5775, 0.0192, -0.1353, -0.3169]) tensor([0.4046, 0.2315, 0.1984, 0.1654]) -Greedy action tensor([ 0.7395, -0.1824, -0.0397, -0.1145]) tensor([0.4382, 0.1743, 0.2010, 0.1865]) -Greedy action tensor([ 0.9263, -0.7939, 0.0033, -0.5007]) tensor([0.5506, 0.0986, 0.2187, 0.1321]) -Greedy action tensor([ 0.4610, 0.3187, -0.3230, 0.0725]) tensor([0.3331, 0.2889, 0.1521, 0.2259]) -Greedy action tensor([ 0.6602, -0.7075, 0.0443, -0.4972]) tensor([0.4741, 0.1208, 0.2561, 0.1490]) -Greedy action tensor([ 0.7795, -0.5567, -0.1128, -0.4211]) tensor([0.5067, 0.1332, 0.2076, 0.1525]) -Greedy action tensor([ 0.5949, -0.2765, -0.0822, -0.2608]) tensor([0.4253, 0.1779, 0.2161, 0.1807]) -Greedy action tensor([ 0.6798, -0.5381, -0.0755, -0.2852]) tensor([0.4658, 0.1378, 0.2189, 0.1775]) -Greedy action tensor([ 1.1178, -0.4070, 0.2208, -0.8959]) tensor([0.5685, 0.1237, 0.2318, 0.0759]) -Greedy action tensor([ 0.9709, -0.4477, -0.0946, -0.2442]) tensor([0.5310, 0.1285, 0.1830, 0.1575]) -Greedy action tensor([ 0.2078, -0.1567, -0.0984, -0.3401]) tensor([0.3324, 0.2308, 0.2447, 0.1921]) -Greedy action tensor([ 0.7772, -0.4703, -0.1931, -0.3467]) tensor([0.5022, 0.1442, 0.1903, 0.1632]) -Greedy action tensor([ 0.9774, -0.7438, -0.0687, -0.5417]) tensor([0.5717, 0.1023, 0.2009, 0.1252]) -Greedy action tensor([ 0.6678, -0.5890, -0.1685, -0.1196]) tensor([0.4602, 0.1310, 0.1994, 0.2094]) -Greedy action tensor([ 0.8901, -0.5157, 0.0138, -0.2511]) tensor([0.5048, 0.1238, 0.2102, 0.1613]) -Greedy action tensor([ 0.6185, -0.5000, 0.1117, -0.5267]) tensor([0.4450, 0.1454, 0.2680, 0.1416]) -Greedy action tensor([ 0.3007, 0.5272, -0.3268, 0.1430]) tensor([0.2746, 0.3443, 0.1466, 0.2345]) -Greedy action tensor([ 0.3542, -0.5508, -0.1646, -0.2555]) tensor([0.3932, 0.1591, 0.2340, 0.2137]) -Greedy action tensor([ 0.7189, -0.2463, -0.1856, 0.0416]) tensor([0.4360, 0.1661, 0.1765, 0.2215]) -Greedy action tensor([ 0.8840, -0.7698, -0.0469, -0.5629]) tensor([0.5492, 0.1051, 0.2165, 0.1292]) -Greedy action tensor([ 0.5131, -0.3298, 0.0260, -0.0831]) tensor([0.3852, 0.1658, 0.2367, 0.2122]) -Greedy action tensor([ 0.5134, 0.0811, -0.0130, -0.0377]) tensor([0.3551, 0.2305, 0.2098, 0.2047]) -Greedy action tensor([ 0.5298, -0.3330, 0.0796, -0.5534]) tensor([0.4170, 0.1760, 0.2658, 0.1412]) -Greedy action tensor([ 0.4192, 0.4752, -0.2613, 0.0743]) tensor([0.3056, 0.3232, 0.1547, 0.2164]) -Greedy action tensor([ 0.3226, 0.0467, -0.0913, -0.4404]) tensor([0.3465, 0.2629, 0.2290, 0.1615]) -Greedy action tensor([ 0.7117, -0.1848, -0.0678, -0.0724]) tensor([0.4305, 0.1756, 0.1974, 0.1965]) -Greedy action tensor([ 0.1995, -0.2561, -0.1304, 0.0005]) tensor([0.3152, 0.1999, 0.2266, 0.2583]) -Greedy action tensor([ 0.6123, -0.0379, 0.1780, -0.4964]) tensor([0.4001, 0.2088, 0.2591, 0.1320]) -Greedy action tensor([ 0.5763, -0.1326, -0.0849, -0.1356]) tensor([0.4002, 0.1969, 0.2066, 0.1963]) -Greedy action tensor([ 0.7650, -0.6298, -0.1165, -0.4727]) tensor([0.5123, 0.1270, 0.2122, 0.1486]) -Greedy action tensor([ 0.4890, -0.2355, -0.1386, -0.0744]) tensor([0.3864, 0.1873, 0.2063, 0.2200]) -Greedy action tensor([ 0.8656, -0.6247, -0.1406, -0.3090]) tensor([0.5264, 0.1186, 0.1924, 0.1626]) -Greedy action tensor([ 0.9541, -0.8319, 0.0453, -0.6269]) tensor([0.5629, 0.0944, 0.2269, 0.1158]) -Greedy action tensor([ 1.0091, -0.6013, 0.0439, -0.5402]) tensor([0.5577, 0.1114, 0.2124, 0.1185]) -Greedy action tensor([ 0.5178, -0.1700, -0.0229, -0.3380]) tensor([0.3984, 0.2003, 0.2320, 0.1693]) -Greedy action tensor([ 0.5761, 0.0722, 0.0451, -0.1375]) tensor([0.3728, 0.2253, 0.2192, 0.1827]) -Greedy action tensor([ 0.9632, -0.6053, -0.1313, -0.3464]) tensor([0.5516, 0.1149, 0.1846, 0.1489]) -Greedy action tensor([ 0.7487, -0.7590, 0.3616, -0.7196]) tensor([0.4693, 0.1039, 0.3187, 0.1081]) -Greedy action tensor([ 0.4789, 0.0009, -0.0051, -0.1936]) tensor([0.3641, 0.2257, 0.2244, 0.1858]) -Greedy action tensor([ 0.9818, -0.8707, -0.0470, -0.4732]) tensor([0.5722, 0.0897, 0.2045, 0.1336]) -Greedy action tensor([ 0.9017, -0.5048, 0.1355, -0.6653]) tensor([0.5213, 0.1277, 0.2423, 0.1088]) -Greedy action tensor([ 0.8814, -0.5890, -0.0639, -0.3945]) tensor([0.5270, 0.1211, 0.2048, 0.1471]) -Greedy action tensor([ 0.6434, 0.0150, -0.0688, -0.0669]) tensor([0.3975, 0.2121, 0.1950, 0.1954]) -Greedy action tensor([ 0.7017, -0.3199, -0.0989, -0.3755]) tensor([0.4652, 0.1675, 0.2089, 0.1584]) -Greedy action tensor([ 0.4755, 0.1198, -0.1487, -0.5436]) tensor([0.3850, 0.2698, 0.2062, 0.1390]) -Greedy action tensor([ 0.9656, -0.4517, 0.0289, -0.4164]) tensor([0.5304, 0.1285, 0.2079, 0.1332]) -Greedy action tensor([ 0.8530, -0.1942, -0.0528, 0.0276]) tensor([0.4560, 0.1600, 0.1843, 0.1997]) -Greedy action tensor([ 1.2723, -0.3068, 0.1436, -0.6235]) tensor([0.5953, 0.1227, 0.1926, 0.0894]) -Greedy action tensor([ 0.5305, -0.3949, 0.0011, -0.3777]) tensor([0.4187, 0.1659, 0.2466, 0.1688]) -Greedy action tensor([ 0.4558, 0.1027, 0.0425, -0.3877]) tensor([0.3579, 0.2514, 0.2367, 0.1540]) -Greedy action tensor([ 0.9020, -0.5873, -0.0722, -0.5516]) tensor([0.5444, 0.1228, 0.2055, 0.1272]) -Greedy action tensor([ 0.6663, -0.4916, -0.0490, -0.2599]) tensor([0.4547, 0.1428, 0.2224, 0.1801]) -Greedy action tensor([ 0.7778, -0.5060, -0.0706, -0.3035]) tensor([0.4892, 0.1355, 0.2094, 0.1659]) -Greedy action tensor([ 0.3744, -0.0281, 0.0345, -0.1770]) tensor([0.3382, 0.2262, 0.2408, 0.1949]) -Greedy action tensor([ 0.8690, -0.3177, -0.1183, -0.1757]) tensor([0.4927, 0.1504, 0.1836, 0.1733]) -Greedy action tensor([ 0.9158, -0.5732, 0.1140, -0.1101]) tensor([0.4920, 0.1110, 0.2207, 0.1764]) -Greedy action tensor([ 0.5742, -0.1862, -0.0749, -0.0581]) tensor([0.3966, 0.1854, 0.2072, 0.2108]) -Greedy action tensor([ 0.6958, -0.7213, -0.1212, -0.4731]) tensor([0.5013, 0.1215, 0.2214, 0.1558]) -Greedy action tensor([ 0.4550, 0.2949, 0.1280, -0.3487]) tensor([0.3310, 0.2821, 0.2387, 0.1482]) -Greedy action tensor([ 0.9837, -0.5072, -0.0633, -0.4329]) tensor([0.5498, 0.1238, 0.1930, 0.1334]) -Greedy action tensor([ 0.9250, -0.9380, 0.1096, -0.4883]) tensor([0.5432, 0.0843, 0.2403, 0.1322]) -Greedy action tensor([ 0.6869, -0.0856, -0.1710, -0.4111]) tensor([0.4506, 0.2081, 0.1911, 0.1503]) -Greedy action tensor([ 0.7919, -0.6772, 0.0120, -0.4437]) tensor([0.5052, 0.1163, 0.2316, 0.1469]) -Greedy action tensor([ 0.5947, -0.4193, 0.0413, -0.2695]) tensor([0.4239, 0.1538, 0.2437, 0.1786]) -Greedy action tensor([ 1.3524, -0.6485, -0.0313, -0.3178]) tensor([0.6353, 0.0859, 0.1592, 0.1196]) -Greedy action tensor([ 0.8407, -0.6683, 0.0528, -0.3370]) tensor([0.5041, 0.1115, 0.2292, 0.1552]) -Greedy action tensor([ 0.4054, 0.0904, -0.0007, -0.4067]) tensor([0.3521, 0.2570, 0.2346, 0.1563]) -Greedy action tensor([ 0.6633, -0.3064, -0.0679, -0.4141]) tensor([0.4543, 0.1723, 0.2187, 0.1547]) -Greedy action tensor([ 0.6805, -0.3281, -0.0127, -0.1565]) tensor([0.4352, 0.1587, 0.2176, 0.1884]) -Greedy action tensor([ 0.5070, -0.4402, -0.1871, -0.1480]) tensor([0.4155, 0.1611, 0.2075, 0.2158]) -Greedy action tensor([ 0.4146, 0.2161, -0.1327, -0.0738]) tensor([0.3320, 0.2722, 0.1921, 0.2037]) -Greedy action tensor([ 0.7905, -0.9074, -0.0696, -0.2812]) tensor([0.5132, 0.0939, 0.2171, 0.1757]) -Greedy action tensor([ 0.2967, 0.0500, 0.0815, -0.1741]) tensor([0.3113, 0.2432, 0.2510, 0.1944]) -Greedy action tensor([ 0.2845, 0.0028, -0.1055, -0.0480]) tensor([0.3176, 0.2396, 0.2150, 0.2277]) -Greedy action tensor([ 0.4667, 0.1084, 0.0180, -0.2022]) tensor([0.3509, 0.2452, 0.2240, 0.1798]) -Greedy action tensor([ 0.3710, -0.0428, -0.1362, -0.0567]) tensor([0.3430, 0.2268, 0.2066, 0.2237]) -Greedy action tensor([ 1.0676, -0.8848, -0.0627, -0.3563]) tensor([0.5863, 0.0832, 0.1893, 0.1412]) -Greedy action tensor([ 0.7147, -0.2843, -0.0337, -0.2045]) tensor([0.4464, 0.1644, 0.2112, 0.1780]) -Greedy action tensor([ 0.7018, -0.4667, 0.0772, -0.6738]) tensor([0.4764, 0.1481, 0.2551, 0.1204]) -Greedy action tensor([ 1.8622, -0.2844, -0.1336, 0.6243]) tensor([0.6482, 0.0758, 0.0881, 0.1880]) -Greedy action tensor([ 1.8453, -0.2510, -0.4397, 0.5504]) tensor([0.6673, 0.0820, 0.0679, 0.1828]) -Greedy action tensor([ 1.5208, -0.1008, -0.7572, 0.3003]) tensor([0.6269, 0.1239, 0.0642, 0.1850]) -Greedy action tensor([ 0.9090, -0.3194, 0.0374, 0.1315]) tensor([0.4607, 0.1349, 0.1927, 0.2117]) -Greedy action tensor([ 1.5697, -0.1230, -0.2456, 0.2850]) tensor([0.6159, 0.1133, 0.1003, 0.1705]) -Greedy action tensor([ 1.5401, -0.6827, -0.4502, 0.5864]) tensor([0.6134, 0.0664, 0.0838, 0.2363]) -Greedy action tensor([ 1.5834, -0.4477, -0.4689, 0.4482]) tensor([0.6325, 0.0830, 0.0812, 0.2033]) -Greedy action tensor([ 1.8296, -0.7124, -0.1403, 0.3140]) tensor([0.6955, 0.0547, 0.0970, 0.1528]) -Greedy action tensor([ 1.9099, -0.3829, -0.3691, 0.1827]) tensor([0.7240, 0.0731, 0.0741, 0.1287]) -Greedy action tensor([ 1.0821, -0.2521, -0.1674, 0.2962]) tensor([0.4986, 0.1313, 0.1429, 0.2272]) -Greedy action tensor([ 1.4080, -0.5712, -0.1635, 0.1408]) tensor([0.6144, 0.0849, 0.1276, 0.1730]) -Greedy action tensor([ 1.5114, -0.5610, -0.1565, 0.4140]) tensor([0.6067, 0.0764, 0.1145, 0.2025]) -Greedy action tensor([ 2.4098, -0.7401, -0.1236, 0.6436]) tensor([0.7733, 0.0331, 0.0614, 0.1322]) -Greedy action tensor([ 1.5300, -0.3543, -0.3230, 0.4167]) tensor([0.6108, 0.0928, 0.0958, 0.2006]) -Greedy action tensor([ 1.2805, -0.1788, -0.5669, 0.7433]) tensor([0.5065, 0.1177, 0.0798, 0.2960]) -Greedy action tensor([ 1.3223, -0.7096, -0.1017, 0.4526]) tensor([0.5584, 0.0732, 0.1344, 0.2340]) -Greedy action tensor([ 1.1049, -0.3898, -0.1946, 0.2032]) tensor([0.5255, 0.1179, 0.1433, 0.2133]) -Greedy action tensor([ 1.6936, 0.5161, 0.2374, -0.0334]) tensor([0.5817, 0.1792, 0.1356, 0.1034]) -Greedy action tensor([ 1.6726, -0.2662, -0.2421, 0.2859]) tensor([0.6489, 0.0934, 0.0956, 0.1622]) -Greedy action tensor([ 1.1388, -0.5628, -0.3301, 0.5357]) tensor([0.5103, 0.0931, 0.1175, 0.2792]) -Greedy action tensor([ 1.2714, -0.5334, 0.0030, 0.2284]) tensor([0.5561, 0.0915, 0.1564, 0.1960]) -Greedy action tensor([ 0.9646, 0.0817, -0.2697, 0.0089]) tensor([0.4787, 0.1980, 0.1393, 0.1841]) -Greedy action tensor([ 1.4802, -0.4399, -0.6808, 0.3343]) tensor([0.6330, 0.0928, 0.0729, 0.2013]) -Greedy action tensor([ 1.4951, -0.7106, -0.0459, 0.4295]) tensor([0.5992, 0.0660, 0.1283, 0.2064]) -Greedy action tensor([ 1.1155, -0.0336, -0.4187, 0.0602]) tensor([0.5317, 0.1685, 0.1147, 0.1851]) -Greedy action tensor([ 1.3622, -0.7579, -0.1007, 0.4960]) tensor([0.5643, 0.0677, 0.1307, 0.2373]) -Greedy action tensor([ 1.3927, -0.2297, -0.2659, 0.0487]) tensor([0.6066, 0.1198, 0.1155, 0.1582]) -Greedy action tensor([ 1.0542, -0.1632, -0.2650, 0.1443]) tensor([0.5087, 0.1506, 0.1360, 0.2048]) -Greedy action tensor([ 1.1284, -0.6487, -0.2464, 0.6420]) tensor([0.4910, 0.0830, 0.1242, 0.3019]) -Greedy action tensor([0.8169, 0.1291, 0.0426, 0.1088]) tensor([0.4071, 0.2046, 0.1877, 0.2005]) -Greedy action tensor([ 1.3135, -0.3431, -0.2313, 0.6430]) tensor([0.5220, 0.0996, 0.1114, 0.2670]) -Greedy action tensor([ 0.9923, -0.2706, -0.5063, 0.3585]) tensor([0.4909, 0.1389, 0.1097, 0.2605]) -Greedy action tensor([ 1.9213, -0.8187, -0.2556, 0.4146]) tensor([0.7145, 0.0461, 0.0810, 0.1584]) -Greedy action tensor([ 2.2879, -0.5357, -0.5335, 0.6905]) tensor([0.7568, 0.0449, 0.0450, 0.1532]) -Greedy action tensor([ 2.4495, -0.8252, -0.3933, 1.2558]) tensor([0.7147, 0.0270, 0.0416, 0.2166]) -Greedy action tensor([ 1.7353, -0.6768, -0.1554, 0.5656]) tensor([0.6447, 0.0578, 0.0973, 0.2002]) -Greedy action tensor([ 0.7808, -0.1162, -0.3702, 0.4089]) tensor([0.4143, 0.1690, 0.1311, 0.2856]) -Greedy action tensor([ 1.1202, -0.3456, -0.5126, 0.0665]) tensor([0.5634, 0.1301, 0.1101, 0.1964]) -Greedy action tensor([ 1.6619, -0.6607, -0.2678, 0.1901]) tensor([0.6790, 0.0666, 0.0986, 0.1558]) -Greedy action tensor([ 1.7239, -0.2789, -0.6625, 0.5725]) tensor([0.6481, 0.0875, 0.0596, 0.2049]) -Greedy action tensor([ 0.9035, -0.3995, -0.3295, 0.5448]) tensor([0.4421, 0.1201, 0.1288, 0.3089]) -Greedy action tensor([ 0.7574, -0.1498, -0.0943, 0.2682]) tensor([0.4092, 0.1652, 0.1746, 0.2509]) -Greedy action tensor([ 1.3086, -0.5099, -0.6040, 0.4575]) tensor([0.5757, 0.0934, 0.0850, 0.2458]) -Greedy action tensor([ 1.3150, 0.3303, 0.2261, -0.3482]) tensor([0.5264, 0.1966, 0.1772, 0.0998]) -Greedy action tensor([ 0.5044, -0.3851, -0.0177, 0.3437]) tensor([0.3502, 0.1439, 0.2077, 0.2982]) -Greedy action tensor([ 1.3453, -0.2347, -0.4506, 0.1261]) tensor([0.5997, 0.1235, 0.0995, 0.1772]) -Greedy action tensor([ 1.0496, -0.3432, 0.0644, 0.2012]) tensor([0.4878, 0.1212, 0.1821, 0.2088]) -Greedy action tensor([ 1.2806, -0.7815, -0.4151, 0.4219]) tensor([0.5766, 0.0733, 0.1058, 0.2443]) -Greedy action tensor([ 1.7024, -0.5371, -0.1811, 0.6639]) tensor([0.6201, 0.0661, 0.0943, 0.2195]) -Greedy action tensor([ 1.9691, -0.9878, -0.1609, 0.3494]) tensor([0.7306, 0.0380, 0.0868, 0.1446]) -Greedy action tensor([ 1.1946, -0.1555, -0.4727, 0.0129]) tensor([0.5699, 0.1477, 0.1076, 0.1748]) -Greedy action tensor([ 1.3847, -0.5113, -0.3358, 0.3802]) tensor([0.5898, 0.0886, 0.1056, 0.2160]) -Greedy action tensor([ 1.5489, -0.5493, -0.3319, 0.4711]) tensor([0.6190, 0.0759, 0.0944, 0.2107]) -Greedy action tensor([ 1.6385, -0.2598, -0.8638, 0.4244]) tensor([0.6541, 0.0980, 0.0536, 0.1943]) -Greedy action tensor([ 1.3516, -0.5562, 0.1962, 0.2488]) tensor([0.5570, 0.0827, 0.1754, 0.1849]) -Greedy action tensor([0.9813, 0.1249, 0.0341, 0.0987]) tensor([0.4492, 0.1908, 0.1742, 0.1858]) -Greedy action tensor([ 1.4163, -0.1781, -0.2389, 0.3780]) tensor([0.5720, 0.1161, 0.1093, 0.2025]) -Greedy action tensor([ 1.4675, -0.1418, -0.5078, 0.3382]) tensor([0.6017, 0.1204, 0.0835, 0.1945]) -Greedy action tensor([ 0.9104, -0.4383, -0.0969, 0.1311]) tensor([0.4800, 0.1246, 0.1753, 0.2202]) -Greedy action tensor([ 0.2421, -0.1586, -0.1756, 0.6347]) tensor([0.2625, 0.1758, 0.1729, 0.3888]) -Greedy action tensor([ 1.3231, -0.4012, -0.6199, 0.6380]) tensor([0.5478, 0.0977, 0.0785, 0.2761]) -Greedy action tensor([ 1.7762, -0.1999, -0.3926, 0.3565]) tensor([0.6690, 0.0927, 0.0765, 0.1618]) -Greedy action tensor([ 1.3549, -0.4064, -0.4092, 0.4447]) tensor([0.5729, 0.0984, 0.0982, 0.2305]) -Greedy action tensor([ 2.0367, -0.6589, -0.2799, 0.5090]) tensor([0.7230, 0.0488, 0.0713, 0.1569]) -Greedy action tensor([ 0.9466, -0.4974, -0.0644, 0.2046]) tensor([0.4817, 0.1137, 0.1753, 0.2294]) -Greedy action tensor([ 1.2531, -0.4335, -0.7026, 0.4800]) tensor([0.5592, 0.1035, 0.0791, 0.2581]) -Greedy action tensor([ 0.6916, -0.4461, -0.0861, 0.2679]) tensor([0.4107, 0.1317, 0.1887, 0.2689]) -Greedy action tensor([ 1.7540, -0.2368, -0.5979, 0.6459]) tensor([0.6402, 0.0874, 0.0609, 0.2114]) -Greedy action tensor([ 1.1357, -0.3034, 0.3011, 0.1267]) tensor([0.4912, 0.1165, 0.2132, 0.1791]) -Greedy action tensor([ 1.7035, -0.6361, -0.3678, 0.3587]) tensor([0.6743, 0.0650, 0.0850, 0.1757]) -Greedy action tensor([ 0.2365, -0.2375, -0.2420, 0.0968]) tensor([0.3214, 0.2000, 0.1991, 0.2795]) -Greedy action tensor([ 1.2520, -0.3068, -0.4288, 0.3833]) tensor([0.5506, 0.1158, 0.1025, 0.2310]) -Greedy action tensor([ 1.2321, -0.6945, -0.0670, 0.6109]) tensor([0.5113, 0.0745, 0.1395, 0.2747]) -Greedy action tensor([ 1.5979, -0.4907, -0.2935, 0.9203]) tensor([0.5610, 0.0695, 0.0846, 0.2849]) -Greedy action tensor([1.6534, 0.1839, 0.1369, 0.2711]) tensor([0.5881, 0.1353, 0.1291, 0.1476]) -Greedy action tensor([ 1.1820, -0.3982, -0.6239, 0.2199]) tensor([0.5707, 0.1175, 0.0938, 0.2180]) -Greedy action tensor([ 1.8834, -1.2753, -0.0838, 0.0957]) tensor([0.7409, 0.0315, 0.1036, 0.1240]) -Greedy action tensor([ 1.4799, -0.8735, -0.1792, 0.2908]) tensor([0.6290, 0.0598, 0.1197, 0.1915]) -Greedy action tensor([ 1.4460, -0.3649, -0.4993, 0.4873]) tensor([0.5918, 0.0968, 0.0846, 0.2269]) -Greedy action tensor([ 1.7139, -0.6778, -0.2096, 0.3160]) tensor([0.6735, 0.0616, 0.0984, 0.1664]) -Greedy action tensor([ 1.6935, 0.0879, -0.2367, -0.0962]) tensor([0.6610, 0.1327, 0.0959, 0.1104]) -Greedy action tensor([ 1.4230, -0.2589, -0.4371, 0.3878]) tensor([0.5893, 0.1096, 0.0917, 0.2093]) -Greedy action tensor([ 0.9409, -1.0976, 0.2953, 1.4969]) tensor([0.2943, 0.0383, 0.1543, 0.5131]) -Greedy action tensor([ 1.3043, -0.2224, 1.1365, 1.9031]) tensor([0.2576, 0.0560, 0.2178, 0.4687]) -Greedy action tensor([ 0.0595, 0.6096, -0.4607, -0.4777]) tensor([0.2556, 0.4431, 0.1519, 0.1494]) -Greedy action tensor([-1.2128, -0.9528, 1.7585, -1.8391]) tensor([0.0447, 0.0580, 0.8733, 0.0239]) -Greedy action tensor([-0.9982, -1.7290, 1.5474, -0.9248]) tensor([0.0653, 0.0315, 0.8329, 0.0703]) -Greedy action tensor([ 1.2058, -0.6495, 1.7122, 0.2489]) tensor([0.3125, 0.0489, 0.5186, 0.1200]) -Greedy action tensor([-0.8262, -0.9901, 0.4156, 0.0358]) tensor([0.1302, 0.1105, 0.4508, 0.3084]) -Greedy action tensor([ 1.5517, -0.0940, 1.1533, 0.1263]) tensor([0.4751, 0.0916, 0.3190, 0.1142]) -Greedy action tensor([ 0.9982, -0.5905, -0.3232, 0.4129]) tensor([0.4931, 0.1007, 0.1315, 0.2746]) -Greedy action tensor([ 1.2921, -1.4021, 1.0818, -0.4079]) tensor([0.4853, 0.0328, 0.3933, 0.0886]) -Greedy action tensor([ 0.9103, -1.2697, 0.4419, -0.3649]) tensor([0.4954, 0.0560, 0.3102, 0.1384]) -Greedy action tensor([ 0.5982, 0.3973, 0.2501, -0.5009]) tensor([0.3500, 0.2863, 0.2471, 0.1166]) -Greedy action tensor([ 0.0746, -0.7588, 1.1353, -1.1224]) tensor([0.2162, 0.0940, 0.6245, 0.0653]) -Greedy action tensor([1.1797, 0.3355, 0.2965, 0.6513]) tensor([0.4110, 0.1767, 0.1699, 0.2423]) -Greedy action tensor([-0.0303, -0.1417, 0.0954, -0.4579]) tensor([0.2717, 0.2431, 0.3081, 0.1772]) -Greedy action tensor([ 0.4431, -1.4346, -0.3233, 0.3759]) tensor([0.3918, 0.0599, 0.1820, 0.3663]) -Greedy action tensor([ 1.2268, 0.0845, -0.9144, 0.8799]) tensor([0.4665, 0.1489, 0.0548, 0.3298]) -Greedy action tensor([-0.6215, -1.0788, -0.4136, -0.4775]) tensor([0.2488, 0.1575, 0.3063, 0.2874]) -Greedy action tensor([ 0.9053, -0.4740, 2.7372, 0.3443]) tensor([0.1239, 0.0312, 0.7741, 0.0707]) -Greedy action tensor([ 0.6104, -1.0326, 0.6156, 2.3087]) tensor([0.1305, 0.0252, 0.1312, 0.7131]) -Greedy action tensor([ 0.4997, -1.0320, -0.3355, 0.4546]) tensor([0.3837, 0.0830, 0.1665, 0.3668]) -Greedy action tensor([ 1.3068, 1.1606, 0.6655, -0.8281]) tensor([0.3986, 0.3444, 0.2099, 0.0471]) -Greedy action tensor([-1.2150, 0.1257, -0.5377, 1.0077]) tensor([0.0624, 0.2385, 0.1229, 0.5762]) -Greedy action tensor([ 0.7382, -0.9001, 0.2580, 0.8378]) tensor([0.3427, 0.0666, 0.2120, 0.3786]) -Greedy action tensor([ 1.4838, -0.2900, 0.0316, -0.1495]) tensor([0.6254, 0.1061, 0.1464, 0.1221]) -Greedy action tensor([-2.0647, 0.7724, 0.4728, -0.8892]) tensor([0.0295, 0.5026, 0.3725, 0.0954]) -Greedy action tensor([ 0.4072, -2.0200, 0.4843, -0.4878]) tensor([0.3881, 0.0343, 0.4191, 0.1586]) -Greedy action tensor([-1.2739, -0.7145, 0.7503, -0.8242]) tensor([0.0841, 0.1472, 0.6368, 0.1319]) -Greedy action tensor([-0.5194, -2.3465, -0.1368, 0.0514]) tensor([0.2274, 0.0366, 0.3335, 0.4025]) -Greedy action tensor([-0.3921, 0.5201, 0.5875, 0.8192]) tensor([0.1051, 0.2618, 0.2800, 0.3530]) -Greedy action tensor([-0.8542, -2.2314, 0.3078, 0.1981]) tensor([0.1367, 0.0345, 0.4371, 0.3917]) -Greedy action tensor([ 0.4373, -1.6023, 0.3669, -0.2149]) tensor([0.3872, 0.0504, 0.3608, 0.2017]) -Greedy action tensor([ 0.3613, -0.6904, 0.4885, -0.4091]) tensor([0.3392, 0.1185, 0.3852, 0.1570]) -Greedy action tensor([ 0.9356, -0.9236, 0.9928, 0.7966]) tensor([0.3242, 0.0505, 0.3433, 0.2821]) -Greedy action tensor([ 0.9143, 0.3303, -0.4070, -0.1948]) tensor([0.4642, 0.2589, 0.1238, 0.1531]) -Greedy action tensor([ 0.2420, -1.4738, -0.9844, -0.0490]) tensor([0.4503, 0.0810, 0.1321, 0.3366]) -Greedy action tensor([ 0.7909, -0.0735, 0.7587, 0.3291]) tensor([0.3312, 0.1395, 0.3207, 0.2087]) -Greedy action tensor([-0.7047, 0.9627, 0.4988, 0.3239]) tensor([0.0805, 0.4263, 0.2681, 0.2251]) -Greedy action tensor([-0.8206, -0.0639, 1.2578, -0.5169]) tensor([0.0801, 0.1708, 0.6405, 0.1086]) -Greedy action tensor([ 0.1047, -0.9263, 0.2788, -0.0389]) tensor([0.2930, 0.1045, 0.3487, 0.2538]) -Greedy action tensor([ 0.6442, 0.8847, 0.3004, -0.5348]) tensor([0.3041, 0.3868, 0.2156, 0.0935]) -Greedy action tensor([-0.6338, -0.8178, 0.2661, -0.6619]) tensor([0.1900, 0.1580, 0.4672, 0.1847]) -Greedy action tensor([ 1.2036, -1.2914, 0.2124, 1.1337]) tensor([0.4191, 0.0346, 0.1555, 0.3908]) -Greedy action tensor([-0.8745, -1.2778, 0.5170, -0.5806]) tensor([0.1422, 0.0950, 0.5719, 0.1908]) -Greedy action tensor([ 0.0076, -0.5861, -1.1758, -0.0343]) tensor([0.3549, 0.1960, 0.1087, 0.3404]) -Greedy action tensor([ 1.2411, -0.9465, -0.3791, 1.5560]) tensor([0.3731, 0.0419, 0.0738, 0.5112]) -Greedy action tensor([-0.6078, 0.2151, -0.3125, 0.9434]) tensor([0.1071, 0.2439, 0.1439, 0.5052]) -Greedy action tensor([ 0.2424, 0.0128, 0.7065, -0.0398]) tensor([0.2416, 0.1920, 0.3842, 0.1822]) -Greedy action tensor([ 1.0221, -0.7391, 0.8068, 0.9143]) tensor([0.3477, 0.0597, 0.2804, 0.3122]) -Greedy action tensor([-0.0475, 0.5379, 0.5118, -1.1813]) tensor([0.2055, 0.3689, 0.3595, 0.0661]) -Greedy action tensor([ 0.1478, -1.3471, 2.1612, -0.4184]) tensor([0.1078, 0.0242, 0.8069, 0.0612]) -Greedy action tensor([-0.5279, -1.4934, -0.6452, -0.2057]) tensor([0.2739, 0.1043, 0.2436, 0.3781]) -Greedy action tensor([1.1824, 0.3014, 0.5535, 1.0389]) tensor([0.3554, 0.1473, 0.1895, 0.3079]) -Greedy action tensor([ 0.1617, -0.9886, 1.3455, -0.3957]) tensor([0.1940, 0.0614, 0.6336, 0.1111]) -Greedy action tensor([ 1.7134, -1.0450, 1.0747, 1.3487]) tensor([0.4375, 0.0277, 0.2310, 0.3038]) -Greedy action tensor([ 0.7670, -1.4291, 0.0219, 1.3668]) tensor([0.2935, 0.0326, 0.1393, 0.5346]) -Greedy action tensor([-0.4482, -1.3657, 0.0364, -0.1895]) tensor([0.2316, 0.0925, 0.3760, 0.2999]) -Greedy action tensor([ 1.4877, 0.0379, -0.6495, 0.1757]) tensor([0.6166, 0.1447, 0.0727, 0.1660]) -Greedy action tensor([-0.9138, -0.7456, 0.6291, 0.4052]) tensor([0.0943, 0.1116, 0.4413, 0.3528]) -Greedy action tensor([ 1.1009, -1.2100, -0.6346, 0.4135]) tensor([0.5623, 0.0558, 0.0991, 0.2828]) -Greedy action tensor([ 2.0611, -2.2379, 0.8412, 1.3019]) tensor([0.5628, 0.0076, 0.1662, 0.2634]) -Greedy action tensor([ 0.5581, -1.0058, -0.1269, -0.0333]) tensor([0.4411, 0.0923, 0.2224, 0.2442]) -Greedy action tensor([ 2.4288, -1.2243, 0.9404, 0.1741]) tensor([0.7372, 0.0191, 0.1664, 0.0773]) -Greedy action tensor([-0.0640, -0.9266, -0.7003, 1.0751]) tensor([0.1970, 0.0832, 0.1043, 0.6155]) -Greedy action tensor([ 0.8554, -1.9029, 0.0415, 0.4622]) tensor([0.4584, 0.0291, 0.2031, 0.3094]) -Greedy action tensor([ 0.1954, -0.3766, 0.1762, 0.8848]) tensor([0.2204, 0.1244, 0.2162, 0.4391]) -Greedy action tensor([ 2.6308e-02, -2.1405e+00, -1.9958e-03, 3.4887e-01]) tensor([0.2884, 0.0330, 0.2804, 0.3982]) -Greedy action tensor([-0.8801, -0.9086, 1.0614, -0.5899]) tensor([0.0973, 0.0946, 0.6781, 0.1301]) -Greedy action tensor([ 1.2399, -1.6011, -0.4400, 0.9769]) tensor([0.4966, 0.0290, 0.0926, 0.3818]) -Greedy action tensor([-0.0836, -0.9085, 0.9262, 0.3660]) tensor([0.1739, 0.0762, 0.4773, 0.2726]) -Greedy action tensor([ 0.6417, -1.0893, 0.7636, 0.0182]) tensor([0.3518, 0.0623, 0.3974, 0.1886]) -Greedy action tensor([ 0.5803, 0.6127, 0.6803, -0.5835]) tensor([0.2898, 0.2994, 0.3203, 0.0905]) -Greedy action tensor([-0.6128, -0.9947, 2.5897, -0.1634]) tensor([0.0359, 0.0245, 0.8833, 0.0563]) -Greedy action tensor([ 1.3077, -1.3995, 1.1353, 0.5379]) tensor([0.4217, 0.0281, 0.3549, 0.1953]) -Greedy action tensor([ 0.6830, -1.3356, 0.4462, 0.4624]) tensor([0.3671, 0.0488, 0.2897, 0.2944]) -Greedy action tensor([ 0.1600, 0.6348, 0.5200, -0.4666]) tensor([0.2186, 0.3514, 0.3133, 0.1168]) -Greedy action tensor([ 0.4808, -0.3385, 0.7772, 0.4065]) tensor([0.2692, 0.1187, 0.3621, 0.2500]) -Greedy action tensor([ 1.1300, -0.0828, -0.0143, 0.1248]) tensor([0.5046, 0.1500, 0.1607, 0.1847]) -Greedy action tensor([ 0.7501, -0.7593, 0.0968, 0.7989]) tensor([0.3582, 0.0792, 0.1864, 0.3762]) -Greedy action tensor([ 1.4618, -0.7348, 1.7019, 0.7489]) tensor([0.3481, 0.0387, 0.4426, 0.1706]) -Greedy action tensor([-0.1940, -1.4510, -0.1027, 0.6412]) tensor([0.2134, 0.0607, 0.2338, 0.4920]) -Greedy action tensor([-1.5033, 0.0216, 0.3291, 0.1113]) tensor([0.0593, 0.2723, 0.3704, 0.2979]) -Greedy action tensor([-1.3869, 0.1918, 0.3925, -0.1177]) tensor([0.0652, 0.3162, 0.3865, 0.2320]) -Greedy action tensor([-1.0519, 0.4388, 0.1584, 0.0434]) tensor([0.0849, 0.3768, 0.2846, 0.2537]) -Greedy action tensor([-1.5738e+00, -5.8233e-01, 4.7000e-01, -8.3333e-04]) tensor([0.0616, 0.1660, 0.4755, 0.2969]) -Greedy action tensor([-1.8928, -0.3193, 0.6336, -0.1168]) tensor([0.0413, 0.1990, 0.5161, 0.2437]) -Greedy action tensor([-1.7410, -0.5086, 0.5734, -0.0922]) tensor([0.0506, 0.1737, 0.5124, 0.2633]) -Greedy action tensor([-1.5348, -0.5028, 0.4651, 0.1948]) tensor([0.0594, 0.1667, 0.4389, 0.3349]) -Greedy action tensor([-0.9924, -0.7475, 0.2619, 0.1128]) tensor([0.1136, 0.1451, 0.3982, 0.3431]) -Greedy action tensor([-0.6983, -0.5587, 0.1615, 0.2918]) tensor([0.1388, 0.1596, 0.3280, 0.3736]) -Greedy action tensor([-0.8484, 0.9529, 0.0710, 0.4303]) tensor([0.0760, 0.4604, 0.1906, 0.2730]) -Greedy action tensor([-1.8630, -0.4526, 0.6220, -0.1339]) tensor([0.0440, 0.1802, 0.5279, 0.2479]) -Greedy action tensor([-1.9457, -0.4546, 0.6657, -0.1818]) tensor([0.0402, 0.1784, 0.5470, 0.2344]) -Greedy action tensor([-1.6297, -0.3697, 0.4887, -0.0273]) tensor([0.0562, 0.1980, 0.4671, 0.2788]) -Greedy action tensor([-1.9302, -0.4484, 0.6695, -0.1685]) tensor([0.0405, 0.1783, 0.5453, 0.2359]) -Greedy action tensor([-1.7406, -0.5180, 0.5604, -0.0702]) tensor([0.0508, 0.1724, 0.5070, 0.2698]) -Greedy action tensor([-1.9232, -0.4153, 0.6528, -0.1666]) tensor([0.0409, 0.1847, 0.5375, 0.2369]) -Greedy action tensor([-1.6312, 0.0579, 0.4693, 0.0373]) tensor([0.0503, 0.2722, 0.4108, 0.2667]) -Greedy action tensor([-1.9036, -0.3977, 0.6428, -0.1553]) tensor([0.0416, 0.1877, 0.5314, 0.2392]) -Greedy action tensor([-1.0119, -0.5531, 0.1780, 0.0735]) tensor([0.1133, 0.1792, 0.3722, 0.3353]) -Greedy action tensor([-1.8554, -0.4664, 0.6235, -0.1416]) tensor([0.0445, 0.1783, 0.5304, 0.2468]) -Greedy action tensor([-1.7461, -0.4847, 0.6025, 0.0415]) tensor([0.0477, 0.1683, 0.4992, 0.2849]) -Greedy action tensor([-1.7963, -0.5095, 0.5998, -0.1012]) tensor([0.0475, 0.1720, 0.5216, 0.2588]) -Greedy action tensor([-1.9110, -0.4715, 0.6516, -0.1632]) tensor([0.0418, 0.1763, 0.5420, 0.2400]) -Greedy action tensor([-1.8515, -0.4086, 0.6135, -0.1198]) tensor([0.0442, 0.1869, 0.5194, 0.2495]) -Greedy action tensor([-1.5394, 0.2676, 0.4495, 0.2659]) tensor([0.0488, 0.2974, 0.3568, 0.2970]) -Greedy action tensor([-1.6300, -0.3758, 0.5124, -0.0680]) tensor([0.0562, 0.1970, 0.4788, 0.2680]) -Greedy action tensor([-1.4775, -0.4388, 0.4431, 0.0941]) tensor([0.0647, 0.1827, 0.4413, 0.3113]) -Greedy action tensor([-0.7619, -0.5354, 0.3651, 0.5417]) tensor([0.1108, 0.1390, 0.3421, 0.4081]) -Greedy action tensor([-1.9186, -0.4415, 0.6545, -0.1675]) tensor([0.0412, 0.1807, 0.5405, 0.2376]) -Greedy action tensor([-1.2158, -0.5313, 0.2827, 0.3144]) tensor([0.0828, 0.1642, 0.3706, 0.3825]) -Greedy action tensor([-1.4948, -0.4163, 0.5911, 0.2915]) tensor([0.0557, 0.1637, 0.4483, 0.3323]) -Greedy action tensor([-1.8461, -0.4772, 0.6128, -0.1202]) tensor([0.0450, 0.1768, 0.5257, 0.2526]) -Greedy action tensor([-1.8669, -0.4753, 0.6328, -0.1327]) tensor([0.0437, 0.1759, 0.5327, 0.2477]) -Greedy action tensor([-1.7335, -0.4692, 0.5762, 0.0149]) tensor([0.0491, 0.1739, 0.4947, 0.2822]) -Greedy action tensor([-1.7505, -0.4760, 0.7417, 0.0829]) tensor([0.0436, 0.1561, 0.5274, 0.2729]) -Greedy action tensor([-1.2701, 0.2254, 0.5077, 0.5969]) tensor([0.0560, 0.2500, 0.3315, 0.3624]) -Greedy action tensor([-0.8621, 0.0858, 0.6499, 1.2772]) tensor([0.0602, 0.1554, 0.2731, 0.5114]) -Greedy action tensor([-1.6406, -0.2807, 0.5261, -0.1304]) tensor([0.0551, 0.2146, 0.4809, 0.2494]) -Greedy action tensor([-1.8887, -0.3331, 0.6330, -0.1107]) tensor([0.0415, 0.1966, 0.5165, 0.2455]) -Greedy action tensor([-1.8955, -0.4348, 0.6453, -0.1543]) tensor([0.0422, 0.1818, 0.5354, 0.2406]) -Greedy action tensor([-1.7789, -0.1115, 0.5450, -0.1040]) tensor([0.0458, 0.2425, 0.4675, 0.2443]) -Greedy action tensor([-1.8109, -0.4411, 0.5979, -0.1170]) tensor([0.0465, 0.1830, 0.5173, 0.2531]) -Greedy action tensor([-1.8803, -0.4041, 0.6474, -0.1344]) tensor([0.0423, 0.1852, 0.5300, 0.2425]) -Greedy action tensor([-1.1023, -0.4838, 0.2958, 0.1798]) tensor([0.0952, 0.1766, 0.3852, 0.3430]) -Greedy action tensor([-1.0023, 0.0579, 0.2869, 0.5964]) tensor([0.0802, 0.2316, 0.2912, 0.3969]) -Greedy action tensor([-1.9093, -0.4655, 0.6896, -0.1133]) tensor([0.0405, 0.1715, 0.5442, 0.2438]) -Greedy action tensor([-1.3161, -0.6897, 0.4161, 0.1546]) tensor([0.0777, 0.1453, 0.4390, 0.3380]) -Greedy action tensor([-1.7555, -0.4179, 0.5687, -0.1245]) tensor([0.0497, 0.1892, 0.5074, 0.2537]) -Greedy action tensor([-1.8192, -0.2857, 0.5834, -0.1388]) tensor([0.0453, 0.2101, 0.5011, 0.2434]) -Greedy action tensor([-1.8490, -0.4341, 0.6497, -0.1218]) tensor([0.0437, 0.1797, 0.5311, 0.2455]) -Greedy action tensor([-1.8686, -0.4379, 0.6237, -0.1295]) tensor([0.0436, 0.1821, 0.5265, 0.2479]) -Greedy action tensor([-1.7938, -0.2766, 0.6314, -0.2546]) tensor([0.0465, 0.2118, 0.5252, 0.2165]) -Greedy action tensor([-1.6279, 0.1156, 0.4692, 0.0656]) tensor([0.0493, 0.2817, 0.4012, 0.2679]) -Greedy action tensor([-1.3535, -0.5937, 0.3140, 0.1427]) tensor([0.0775, 0.1657, 0.4107, 0.3461]) -Greedy action tensor([-1.6447, -0.3237, 0.6864, 0.2217]) tensor([0.0465, 0.1743, 0.4785, 0.3007]) -Greedy action tensor([-1.2981, 0.2243, 0.6165, 0.5084]) tensor([0.0542, 0.2483, 0.3676, 0.3299]) -Greedy action tensor([-1.3896, -0.4719, 0.4875, 0.2975]) tensor([0.0648, 0.1621, 0.4232, 0.3499]) -Greedy action tensor([-1.8824, -0.4526, 0.6384, -0.1450]) tensor([0.0429, 0.1793, 0.5339, 0.2439]) -Greedy action tensor([-1.8629, -0.4733, 0.6211, -0.1256]) tensor([0.0441, 0.1769, 0.5285, 0.2505]) -Greedy action tensor([-1.9429, -0.4539, 0.6699, -0.1773]) tensor([0.0401, 0.1779, 0.5474, 0.2346]) -Greedy action tensor([-1.6097, -0.4686, 0.7556, 0.2182]) tensor([0.0476, 0.1491, 0.5071, 0.2962]) -Greedy action tensor([-0.4656, -0.7639, 1.2586, 0.3268]) tensor([0.1046, 0.0776, 0.5867, 0.2311]) -Greedy action tensor([-0.9984, -0.6257, 0.2340, 0.4706]) tensor([0.0978, 0.1420, 0.3354, 0.4249]) -Greedy action tensor([-1.8602, -0.4469, 0.6280, -0.1353]) tensor([0.0439, 0.1805, 0.5290, 0.2466]) -Greedy action tensor([-0.7563, -0.6029, 0.3069, 0.0817]) tensor([0.1356, 0.1581, 0.3927, 0.3135]) -Greedy action tensor([-1.9314, -0.3937, 0.6525, -0.1697]) tensor([0.0404, 0.1882, 0.5358, 0.2355]) -Greedy action tensor([-1.2932, -0.5006, 1.2105, 1.1999]) tensor([0.0363, 0.0802, 0.4441, 0.4394]) -Greedy action tensor([-1.5257, -0.4184, 0.4983, 0.1178]) tensor([0.0596, 0.1805, 0.4514, 0.3085]) -Greedy action tensor([-1.6032, -0.3520, 0.6780, 0.0927]) tensor([0.0507, 0.1771, 0.4960, 0.2762]) -Greedy action tensor([-1.9379, -0.4447, 0.6659, -0.1763]) tensor([0.0403, 0.1796, 0.5452, 0.2349]) -Greedy action tensor([-2.0156, -0.6519, 1.3732, 0.5775]) tensor([0.0209, 0.0816, 0.6184, 0.2791]) -Greedy action tensor([-1.4185, 0.0426, 0.7761, 0.4655]) tensor([0.0479, 0.2066, 0.4302, 0.3153]) -Greedy action tensor([-1.8614, -0.2453, 0.6110, -0.1095]) tensor([0.0423, 0.2128, 0.5011, 0.2438]) -Greedy action tensor([-1.8701, -0.4400, 0.6330, -0.1378]) tensor([0.0434, 0.1813, 0.5301, 0.2453]) -Greedy action tensor([-0.7259, 0.9289, 0.0585, 0.2326]) tensor([0.0907, 0.4743, 0.1986, 0.2364]) -Greedy action tensor([-1.8228, -0.4015, 0.6643, -0.0835]) tensor([0.0437, 0.1812, 0.5260, 0.2490]) -Greedy action tensor([-1.6728, 0.0456, 0.4353, -0.0032]) tensor([0.0497, 0.2771, 0.4092, 0.2639]) -Greedy action tensor([-1.8664, -0.4359, 0.6934, -0.0304]) tensor([0.0410, 0.1714, 0.5304, 0.2572]) -Greedy action tensor([-1.9051, -0.4432, 0.6408, -0.1555]) tensor([0.0420, 0.1811, 0.5354, 0.2415]) -Greedy action tensor([-1.6945, -0.4527, 0.5998, 0.0699]) tensor([0.0495, 0.1712, 0.4905, 0.2888]) -Greedy action tensor([-0.5926, -0.2792, 0.1852, -0.1028]) tensor([0.1619, 0.2215, 0.3524, 0.2642]) -Greedy action tensor([ 0.4485, -0.2901, -0.0531, -0.2252]) tensor([0.3856, 0.1842, 0.2335, 0.1966]) -Greedy action tensor([ 0.5190, -0.1361, -0.1676, -0.4274]) tensor([0.4148, 0.2154, 0.2087, 0.1610]) -Greedy action tensor([ 0.4787, -0.0708, -0.1650, -0.2427]) tensor([0.3863, 0.2230, 0.2029, 0.1878]) -Greedy action tensor([ 0.5953, -0.1035, -0.1801, -0.3262]) tensor([0.4245, 0.2111, 0.1955, 0.1689]) -Greedy action tensor([ 0.8260, -0.4695, -0.0642, -0.3290]) tensor([0.5002, 0.1369, 0.2053, 0.1576]) -Greedy action tensor([ 0.5080, -0.3229, 0.0213, -0.2753]) tensor([0.3988, 0.1738, 0.2452, 0.1822]) -Greedy action tensor([ 0.4688, 0.3486, -0.1853, -0.1052]) tensor([0.3367, 0.2986, 0.1751, 0.1897]) -Greedy action tensor([ 0.9373, -0.5633, -0.2156, -0.5885]) tensor([0.5694, 0.1270, 0.1798, 0.1238]) -Greedy action tensor([ 0.7404, -0.5036, -0.1476, -0.3260]) tensor([0.4892, 0.1410, 0.2013, 0.1684]) -Greedy action tensor([ 0.9339, -0.9184, -0.1282, -0.6513]) tensor([0.5856, 0.0919, 0.2025, 0.1200]) -Greedy action tensor([ 0.5428, 0.0493, -0.1739, -0.2059]) tensor([0.3888, 0.2374, 0.1899, 0.1839]) -Greedy action tensor([ 0.8548, -1.0532, -0.0651, -0.3501]) tensor([0.5415, 0.0803, 0.2158, 0.1623]) -Greedy action tensor([ 0.5637, 0.0140, -0.1121, -0.1638]) tensor([0.3893, 0.2246, 0.1980, 0.1881]) -Greedy action tensor([ 0.6074, -0.5617, 0.3158, -0.4978]) tensor([0.4186, 0.1300, 0.3127, 0.1386]) -Greedy action tensor([ 0.7445, -0.5404, -0.0892, -0.2277]) tensor([0.4786, 0.1324, 0.2079, 0.1810]) -Greedy action tensor([ 0.5522, -0.0980, -0.1064, -0.3822]) tensor([0.4111, 0.2146, 0.2128, 0.1615]) -Greedy action tensor([ 0.9557, -0.6371, 0.0250, -0.5031]) tensor([0.5464, 0.1111, 0.2154, 0.1270]) -Greedy action tensor([ 0.5146, -0.0808, -0.0407, -0.1688]) tensor([0.3802, 0.2096, 0.2182, 0.1920]) -Greedy action tensor([ 0.5185, -0.2589, 0.1003, -0.2952]) tensor([0.3905, 0.1795, 0.2570, 0.1731]) -Greedy action tensor([ 0.7979, -0.5533, 0.0569, -0.3814]) tensor([0.4895, 0.1267, 0.2333, 0.1505]) -Greedy action tensor([ 0.4518, -0.2158, -0.0123, -0.2825]) tensor([0.3815, 0.1957, 0.2398, 0.1830]) -Greedy action tensor([ 0.4022, -0.3123, -0.1074, -0.4231]) tensor([0.3955, 0.1936, 0.2376, 0.1733]) -Greedy action tensor([ 0.9008, -0.7436, 0.0400, -0.3137]) tensor([0.5228, 0.1010, 0.2211, 0.1552]) -Greedy action tensor([ 0.9493, -0.5594, -0.2312, -0.9854]) tensor([0.5978, 0.1322, 0.1836, 0.0864]) -Greedy action tensor([ 0.8488, -0.3885, 0.0178, -0.4131]) tensor([0.4978, 0.1444, 0.2168, 0.1409]) -Greedy action tensor([ 0.5904, 0.2955, -0.1945, -0.3049]) tensor([0.3833, 0.2854, 0.1748, 0.1566]) -Greedy action tensor([ 0.8515, -0.6423, -0.1427, -0.4630]) tensor([0.5367, 0.1205, 0.1986, 0.1442]) -Greedy action tensor([ 0.4067, -0.1596, -0.1656, -0.2723]) tensor([0.3789, 0.2151, 0.2138, 0.1922]) -Greedy action tensor([ 0.7126, -0.4941, -0.1205, -0.3312]) tensor([0.4794, 0.1434, 0.2084, 0.1688]) -Greedy action tensor([ 1.0424, -0.8877, -0.0051, -0.4787]) tensor([0.5833, 0.0847, 0.2046, 0.1274]) -Greedy action tensor([ 0.8091, -0.5599, -0.1334, -0.5516]) tensor([0.5262, 0.1338, 0.2050, 0.1349]) -Greedy action tensor([ 1.0456, -0.5489, -0.0575, -0.2608]) tensor([0.5538, 0.1124, 0.1838, 0.1500]) -Greedy action tensor([ 0.9414, -0.9499, -0.1043, -0.5065]) tensor([0.5756, 0.0868, 0.2023, 0.1353]) -Greedy action tensor([ 0.8877, -0.9523, -0.0466, -0.4767]) tensor([0.5533, 0.0879, 0.2174, 0.1414]) -Greedy action tensor([ 0.6460, -0.2049, -0.0193, -0.3000]) tensor([0.4293, 0.1833, 0.2207, 0.1667]) -Greedy action tensor([ 0.9078, -0.7511, -0.1663, -0.6100]) tensor([0.5710, 0.1087, 0.1951, 0.1252]) -Greedy action tensor([ 0.4789, -0.1744, -0.0175, -0.3457]) tensor([0.3895, 0.2027, 0.2371, 0.1708]) -Greedy action tensor([ 0.8416, -0.5005, -0.1306, -0.0972]) tensor([0.4925, 0.1287, 0.1863, 0.1926]) -Greedy action tensor([ 5.8359e-01, -3.3621e-01, -7.8440e-05, -3.4922e-01]) tensor([0.4256, 0.1696, 0.2374, 0.1674]) -Greedy action tensor([ 0.5995, -0.8487, -0.1145, -0.2853]) tensor([0.4679, 0.1099, 0.2291, 0.1931]) -Greedy action tensor([ 0.8930, -1.0245, 0.0742, -0.5525]) tensor([0.5484, 0.0806, 0.2418, 0.1292]) -Greedy action tensor([ 0.6574, -0.2958, -0.1250, -0.2302]) tensor([0.4436, 0.1710, 0.2029, 0.1826]) -Greedy action tensor([ 0.6926, -0.2445, 0.0454, -0.2079]) tensor([0.4307, 0.1687, 0.2255, 0.1750]) -Greedy action tensor([ 0.2480, -0.3293, -0.0122, -0.0319]) tensor([0.3238, 0.1818, 0.2496, 0.2448]) -Greedy action tensor([ 0.9137, -0.6072, -0.0642, -0.4652]) tensor([0.5416, 0.1183, 0.2037, 0.1364]) -Greedy action tensor([ 0.6530, 0.0873, -0.1025, 0.1097]) tensor([0.3819, 0.2169, 0.1794, 0.2218]) -Greedy action tensor([ 0.9128, -0.7191, -0.1711, -0.3785]) tensor([0.5529, 0.1081, 0.1870, 0.1520]) -Greedy action tensor([ 1.0806, -0.3651, -0.0499, -0.1366]) tensor([0.5392, 0.1270, 0.1741, 0.1596]) -Greedy action tensor([ 0.7691, -0.4626, -0.0185, -0.2307]) tensor([0.4729, 0.1380, 0.2151, 0.1740]) -Greedy action tensor([ 0.5733, -0.1340, -0.0593, -0.4821]) tensor([0.4215, 0.2078, 0.2239, 0.1467]) -Greedy action tensor([ 0.6726, 0.4384, -0.1670, 0.2710]) tensor([0.3458, 0.2735, 0.1493, 0.2314]) -Greedy action tensor([ 0.6176, -0.4159, -0.0869, -0.2740]) tensor([0.4425, 0.1574, 0.2187, 0.1814]) -Greedy action tensor([ 0.6169, -0.7340, -0.1487, -0.2009]) tensor([0.4618, 0.1196, 0.2148, 0.2038]) -Greedy action tensor([ 0.1460, -0.3179, -0.1112, -0.2333]) tensor([0.3240, 0.2038, 0.2505, 0.2217]) -Greedy action tensor([ 0.6087, -0.1002, -0.0021, -0.2600]) tensor([0.4074, 0.2005, 0.2212, 0.1709]) -Greedy action tensor([ 0.4344, -0.2331, 0.0479, -0.0915]) tensor([0.3593, 0.1843, 0.2441, 0.2123]) -Greedy action tensor([ 0.5860, -0.3126, -0.1015, -0.2857]) tensor([0.4295, 0.1749, 0.2160, 0.1796]) -Greedy action tensor([ 0.6797, -0.5280, -0.1073, -0.3779]) tensor([0.4759, 0.1422, 0.2166, 0.1653]) -Greedy action tensor([ 0.4838, -0.2299, -0.1781, 0.0526]) tensor([0.3766, 0.1845, 0.1943, 0.2447]) -Greedy action tensor([ 0.6335, -0.1481, -0.0208, -0.2014]) tensor([0.4147, 0.1898, 0.2156, 0.1799]) -Greedy action tensor([ 0.3789, -0.2175, -0.0598, -0.2895]) tensor([0.3692, 0.2034, 0.2381, 0.1893]) -Greedy action tensor([ 0.8320, -0.4826, 0.0045, -0.3621]) tensor([0.4978, 0.1337, 0.2176, 0.1508]) -Greedy action tensor([ 1.0747, -0.8592, 0.0258, -0.5561]) tensor([0.5915, 0.0855, 0.2072, 0.1158]) -Greedy action tensor([ 0.6101, -0.2769, -0.0282, -0.2905]) tensor([0.4262, 0.1755, 0.2251, 0.1732]) -Greedy action tensor([ 0.5026, -0.6840, -0.0805, -0.2254]) tensor([0.4262, 0.1301, 0.2379, 0.2058]) -Greedy action tensor([ 0.9416, -0.3164, -0.0672, -0.3793]) tensor([0.5220, 0.1484, 0.1904, 0.1393]) -Greedy action tensor([ 0.7017, -0.1135, -0.2000, -0.0428]) tensor([0.4304, 0.1905, 0.1747, 0.2044]) -Greedy action tensor([ 0.4899, 0.2130, -0.1182, 0.0713]) tensor([0.3378, 0.2561, 0.1839, 0.2222]) -Greedy action tensor([ 1.0721, -0.3687, -0.2406, -0.2926]) tensor([0.5678, 0.1344, 0.1528, 0.1450]) -Greedy action tensor([ 0.5321, -0.1213, -0.1844, -0.1105]) tensor([0.3945, 0.2053, 0.1927, 0.2075]) -Greedy action tensor([ 0.5911, -0.2525, 0.0754, -0.4434]) tensor([0.4197, 0.1805, 0.2506, 0.1492]) -Greedy action tensor([ 0.3838, -0.2542, -0.0984, -0.3891]) tensor([0.3835, 0.2026, 0.2368, 0.1771]) -Greedy action tensor([ 0.7557, -0.2301, 0.1211, -0.5351]) tensor([0.4591, 0.1713, 0.2434, 0.1263]) -Greedy action tensor([ 0.7752, -0.3544, -0.0786, -0.3063]) tensor([0.4789, 0.1548, 0.2039, 0.1624]) -Greedy action tensor([ 0.8727, -0.6799, 0.0275, -0.6319]) tensor([0.5367, 0.1136, 0.2305, 0.1192]) -Greedy action tensor([ 0.4344, 0.2543, -0.2037, 0.0823]) tensor([0.3261, 0.2724, 0.1723, 0.2293]) -Greedy action tensor([ 0.7793, -0.5346, 0.0282, -0.1865]) tensor([0.4714, 0.1267, 0.2224, 0.1794]) -Greedy action tensor([ 0.9203, -0.7171, -0.0337, -0.3352]) tensor([0.5363, 0.1043, 0.2066, 0.1528]) -Greedy action tensor([ 0.6407, -0.5624, -0.1442, -0.4019]) tensor([0.4742, 0.1424, 0.2163, 0.1672]) -Greedy action tensor([ 0.6520, -0.3743, -0.0690, -0.4947]) tensor([0.4625, 0.1657, 0.2249, 0.1469]) -Greedy action tensor([ 1.0834, -0.8425, -0.0048, -0.6036]) tensor([0.5997, 0.0874, 0.2020, 0.1110]) -Greedy action tensor([ 1.1837, -0.0021, -0.5191, 0.1520]) tensor([0.5423, 0.1657, 0.0988, 0.1933]) -Greedy action tensor([ 1.6078, -0.1112, -0.4268, 0.5452]) tensor([0.6040, 0.1083, 0.0790, 0.2087]) -Greedy action tensor([ 0.9319, -0.3744, -0.1919, 0.1611]) tensor([0.4858, 0.1316, 0.1579, 0.2247]) -Greedy action tensor([ 2.1105, -0.5127, -0.4358, 0.5303]) tensor([0.7370, 0.0535, 0.0578, 0.1518]) -Greedy action tensor([ 2.0998, -1.1055, -0.2344, 0.4296]) tensor([0.7544, 0.0306, 0.0731, 0.1420]) -Greedy action tensor([ 1.9757, -0.7025, -0.2814, 0.3602]) tensor([0.7288, 0.0501, 0.0763, 0.1449]) -Greedy action tensor([ 1.1756, -0.2520, -0.2052, 0.5929]) tensor([0.4879, 0.1170, 0.1226, 0.2724]) -Greedy action tensor([ 1.9904, -0.6647, -0.4426, 0.1201]) tensor([0.7621, 0.0536, 0.0669, 0.1174]) -Greedy action tensor([ 0.9931, -0.0956, -0.0839, 0.3090]) tensor([0.4583, 0.1543, 0.1561, 0.2313]) -Greedy action tensor([ 1.1361, 0.0377, -0.3560, 0.0708]) tensor([0.5255, 0.1752, 0.1182, 0.1811]) -Greedy action tensor([ 2.4583, -1.5144, -0.1077, 0.8632]) tensor([0.7701, 0.0145, 0.0592, 0.1562]) -Greedy action tensor([ 0.8646, -0.5230, -0.4959, 0.8061]) tensor([0.4083, 0.1019, 0.1047, 0.3851]) -Greedy action tensor([ 1.4023, -0.4568, -0.7443, 0.5564]) tensor([0.5876, 0.0916, 0.0687, 0.2522]) -Greedy action tensor([ 0.6865, -0.1384, -0.4818, 0.3949]) tensor([0.4006, 0.1756, 0.1245, 0.2993]) -Greedy action tensor([ 1.3458, -0.2745, -0.3093, 0.3367]) tensor([0.5703, 0.1128, 0.1090, 0.2079]) -Greedy action tensor([ 1.4613, -0.0701, 0.0712, 0.4397]) tensor([0.5479, 0.1185, 0.1364, 0.1972]) -Greedy action tensor([ 1.1543, -0.5840, -0.1586, 0.3391]) tensor([0.5298, 0.0932, 0.1425, 0.2345]) -Greedy action tensor([ 1.5887, -0.8711, -0.0828, 0.1806]) tensor([0.6587, 0.0563, 0.1238, 0.1611]) -Greedy action tensor([ 2.0842, -0.9715, -0.1480, 0.8660]) tensor([0.6896, 0.0325, 0.0740, 0.2040]) -Greedy action tensor([ 2.4403, -0.8249, -0.2196, 0.7942]) tensor([0.7687, 0.0294, 0.0538, 0.1482]) -Greedy action tensor([ 0.9877, -0.3984, -0.3223, 0.2890]) tensor([0.4958, 0.1240, 0.1338, 0.2465]) -Greedy action tensor([ 1.1904, -0.4694, -0.2908, 0.1573]) tensor([0.5639, 0.1072, 0.1282, 0.2007]) -Greedy action tensor([ 1.7353, -0.5519, -0.1948, 0.3400]) tensor([0.6691, 0.0680, 0.0971, 0.1658]) -Greedy action tensor([ 0.7380, -0.3196, 0.0172, 0.2761]) tensor([0.4059, 0.1410, 0.1974, 0.2557]) -Greedy action tensor([ 1.4279, -0.3569, -0.0782, 0.3461]) tensor([0.5785, 0.0971, 0.1283, 0.1961]) -Greedy action tensor([ 1.5381, -0.6643, -0.2423, 0.3309]) tensor([0.6337, 0.0700, 0.1068, 0.1895]) -Greedy action tensor([ 1.2819, -0.6926, -0.1906, 0.4966]) tensor([0.5482, 0.0761, 0.1257, 0.2500]) -Greedy action tensor([ 2.0213, -0.3489, -0.6132, 0.2487]) tensor([0.7490, 0.0700, 0.0537, 0.1273]) -Greedy action tensor([ 2.0258, -0.1333, -0.5443, 0.1723]) tensor([0.7415, 0.0856, 0.0567, 0.1162]) -Greedy action tensor([ 1.3572, -0.3502, -0.3716, 0.2669]) tensor([0.5900, 0.1070, 0.1047, 0.1983]) -Greedy action tensor([ 1.5236, -0.1382, -0.7794, 0.3199]) tensor([0.6290, 0.1194, 0.0629, 0.1888]) -Greedy action tensor([ 2.0724, -0.7950, -0.7030, 0.4227]) tensor([0.7626, 0.0434, 0.0475, 0.1465]) -Greedy action tensor([ 1.4871, -0.5104, -0.1879, -0.1911]) tensor([0.6624, 0.0899, 0.1241, 0.1237]) -Greedy action tensor([ 1.8053, -0.9244, -0.4699, 0.8857]) tensor([0.6383, 0.0416, 0.0656, 0.2545]) -Greedy action tensor([ 1.1557, 0.1196, -0.6383, 0.4631]) tensor([0.4947, 0.1755, 0.0823, 0.2475]) -Greedy action tensor([ 1.3278, 0.0356, -0.3031, 0.5274]) tensor([0.5210, 0.1431, 0.1020, 0.2340]) -Greedy action tensor([ 1.6063, -0.3320, -0.3347, 0.3669]) tensor([0.6341, 0.0913, 0.0910, 0.1836]) -Greedy action tensor([ 1.2541, -0.1215, -0.5831, 0.2896]) tensor([0.5577, 0.1409, 0.0888, 0.2126]) -Greedy action tensor([ 0.7364, -0.3228, 0.2113, -0.2755]) tensor([0.4344, 0.1506, 0.2570, 0.1579]) -Greedy action tensor([ 1.1268, -0.5745, 0.0464, 0.1648]) tensor([0.5252, 0.0958, 0.1783, 0.2007]) -Greedy action tensor([ 1.3450, -0.4763, -0.1697, 0.7098]) tensor([0.5231, 0.0847, 0.1150, 0.2772]) -Greedy action tensor([ 1.5201, -0.4524, -0.2023, 0.6042]) tensor([0.5821, 0.0810, 0.1040, 0.2329]) -Greedy action tensor([ 2.7018, -0.9235, -0.2421, 0.4253]) tensor([0.8461, 0.0225, 0.0446, 0.0868]) -Greedy action tensor([ 1.7154, -0.5155, -0.2771, 0.3236]) tensor([0.6701, 0.0720, 0.0914, 0.1666]) -Greedy action tensor([ 1.8025, -0.3461, -0.3019, 0.7682]) tensor([0.6273, 0.0732, 0.0765, 0.2230]) -Greedy action tensor([ 1.4024, -0.4522, -0.1802, 0.2304]) tensor([0.5982, 0.0936, 0.1229, 0.1853]) -Greedy action tensor([ 2.0682, -0.3416, -0.6009, 0.6395]) tensor([0.7149, 0.0642, 0.0496, 0.1713]) -Greedy action tensor([ 0.6925, -0.1623, -0.1080, 0.3416]) tensor([0.3878, 0.1650, 0.1742, 0.2730]) -Greedy action tensor([ 2.2410, -0.6007, -0.4747, 0.4044]) tensor([0.7789, 0.0454, 0.0515, 0.1241]) -Greedy action tensor([ 2.7503, -0.7605, -0.5549, 0.9260]) tensor([0.8144, 0.0243, 0.0299, 0.1314]) -Greedy action tensor([ 1.7354, -0.2440, -0.4146, 0.7390]) tensor([0.6158, 0.0851, 0.0717, 0.2274]) -Greedy action tensor([ 1.2056, -0.2726, -0.0781, -0.0454]) tensor([0.5583, 0.1273, 0.1546, 0.1598]) -Greedy action tensor([ 1.1508, -0.0045, -0.8619, 0.1085]) tensor([0.5552, 0.1749, 0.0742, 0.1958]) -Greedy action tensor([ 1.8828, 0.1300, -0.5594, 0.7153]) tensor([0.6364, 0.1103, 0.0553, 0.1980]) -Greedy action tensor([ 1.8720, -1.0107, -0.0591, 0.9286]) tensor([0.6288, 0.0352, 0.0912, 0.2448]) -Greedy action tensor([ 1.0370, -0.2752, -0.8758, 0.5103]) tensor([0.4981, 0.1341, 0.0736, 0.2942]) -Greedy action tensor([ 1.0307, -0.4820, 0.2332, 0.4069]) tensor([0.4532, 0.0998, 0.2041, 0.2428]) -Greedy action tensor([ 1.7614, -0.7450, -0.1472, 0.4266]) tensor([0.6698, 0.0546, 0.0993, 0.1763]) -Greedy action tensor([ 1.4546, -0.7372, -0.1272, 0.4537]) tensor([0.5935, 0.0663, 0.1220, 0.2181]) -Greedy action tensor([ 0.9434, 0.0056, -0.3237, 0.2323]) tensor([0.4620, 0.1809, 0.1301, 0.2269]) -Greedy action tensor([ 0.9021, -0.1597, -0.2665, 0.0864]) tensor([0.4764, 0.1648, 0.1481, 0.2107]) -Greedy action tensor([ 0.8054, -0.3007, -0.2479, -0.0168]) tensor([0.4719, 0.1561, 0.1646, 0.2074]) -Greedy action tensor([ 1.2226, -0.6211, -0.2834, 0.3721]) tensor([0.5533, 0.0876, 0.1227, 0.2364]) -Greedy action tensor([ 1.3330, -0.5107, -0.2917, 0.5953]) tensor([0.5454, 0.0863, 0.1074, 0.2608]) -Greedy action tensor([ 1.2275, -0.1990, -0.4251, 0.7008]) tensor([0.4945, 0.1187, 0.0947, 0.2920]) -Greedy action tensor([ 1.0149, -0.5036, -0.0097, 0.2758]) tensor([0.4865, 0.1066, 0.1746, 0.2323]) -Greedy action tensor([ 1.9093, -0.1206, -0.5710, 0.3585]) tensor([0.7007, 0.0920, 0.0587, 0.1486]) -Greedy action tensor([ 1.5520, -0.2145, -0.7540, 0.4584]) tensor([0.6228, 0.1065, 0.0621, 0.2087]) -Greedy action tensor([ 0.5519, -0.7215, -0.1196, 0.2764]) tensor([0.3922, 0.1098, 0.2004, 0.2977]) -Greedy action tensor([ 1.4698, -0.1577, -0.8355, 0.0737]) tensor([0.6478, 0.1272, 0.0646, 0.1604]) -Greedy action tensor([ 1.5065, -0.0080, -0.1414, -0.0538]) tensor([0.6164, 0.1355, 0.1186, 0.1295]) -Greedy action tensor([ 2.2482, -0.6092, -0.1547, 0.4155]) tensor([0.7646, 0.0439, 0.0692, 0.1223]) -Greedy action tensor([ 0.9133, -0.2483, -0.1799, 0.1907]) tensor([0.4687, 0.1467, 0.1571, 0.2275]) -Greedy action tensor([ 1.3912, -0.0976, -0.6362, 0.1138]) tensor([0.6112, 0.1379, 0.0805, 0.1704]) -Greedy action tensor([ 1.0946, -0.2991, -0.0994, 0.0921]) tensor([0.5213, 0.1294, 0.1580, 0.1913]) -Greedy action tensor([ 1.6629, -0.6099, 0.0310, 0.4303]) tensor([0.6289, 0.0648, 0.1230, 0.1833]) -Greedy action tensor([ 1.3408, -0.6325, -0.1101, 0.1904]) tensor([0.5918, 0.0823, 0.1387, 0.1873]) -Greedy action tensor([ 1.8691, -1.0196, -0.4253, 0.6249]) tensor([0.6922, 0.0385, 0.0698, 0.1995]) -Greedy action tensor([ 1.3304, 0.2171, -0.4171, 0.1007]) tensor([0.5571, 0.1830, 0.0970, 0.1629]) -Greedy action tensor([ 0.9897, -0.4014, -0.4011, 0.4927]) tensor([0.4748, 0.1181, 0.1182, 0.2889]) -Greedy action tensor([ 1.6934, -0.7238, -0.4368, 0.5818]) tensor([0.6506, 0.0580, 0.0773, 0.2141]) -Greedy action tensor([ 0.1054, -0.0121, -0.2413, -0.3066]) tensor([0.3069, 0.2729, 0.2170, 0.2033]) -Greedy action tensor([-0.1048, -0.8593, -0.2276, 0.1597]) tensor([0.2734, 0.1286, 0.2418, 0.3562]) -Greedy action tensor([ 0.4694, -1.3755, -0.3461, 1.5091]) tensor([0.2258, 0.0357, 0.0999, 0.6386]) -Greedy action tensor([-0.7197, 0.1084, 0.2972, -0.8087]) tensor([0.1435, 0.3285, 0.3968, 0.1313]) -Greedy action tensor([ 2.1713, -1.0308, 0.6017, 1.4862]) tensor([0.5705, 0.0232, 0.1187, 0.2875]) -Greedy action tensor([ 0.7195, -0.3251, 0.0641, 0.2960]) tensor([0.3959, 0.1393, 0.2056, 0.2592]) -Greedy action tensor([ 0.2809, -0.7830, 0.3410, 1.6248]) tensor([0.1602, 0.0553, 0.1702, 0.6143]) -Greedy action tensor([ 1.1000, -0.5302, 0.4731, 1.5819]) tensor([0.2986, 0.0585, 0.1595, 0.4834]) -Greedy action tensor([ 0.2392, -2.3732, -0.1557, 0.6525]) tensor([0.3068, 0.0225, 0.2067, 0.4639]) -Greedy action tensor([-0.0633, -1.2732, -0.2348, -0.0273]) tensor([0.3147, 0.0939, 0.2651, 0.3263]) -Greedy action tensor([-0.4402, -0.1878, -0.4607, -1.2837]) tensor([0.2705, 0.3481, 0.2650, 0.1164]) -Greedy action tensor([-1.1166, -0.5566, -0.2992, -0.5834]) tensor([0.1488, 0.2605, 0.3370, 0.2536]) -Greedy action tensor([ 0.0718, -0.6808, 0.7941, 0.0624]) tensor([0.2212, 0.1042, 0.4555, 0.2191]) -Greedy action tensor([-1.1430, 0.3966, -0.4276, -0.1649]) tensor([0.0965, 0.4498, 0.1973, 0.2565]) -Greedy action tensor([-0.5308, -1.5609, 0.5243, -0.3724]) tensor([0.1852, 0.0661, 0.5318, 0.2169]) -Greedy action tensor([ 1.0957, -0.9956, 1.2708, -0.2534]) tensor([0.3884, 0.0480, 0.4628, 0.1008]) -Greedy action tensor([ 0.5880, -1.0038, 1.4028, 0.8359]) tensor([0.2108, 0.0429, 0.4762, 0.2701]) -Greedy action tensor([ 0.3469, 0.0667, 2.0782, -0.5968]) tensor([0.1283, 0.0970, 0.7248, 0.0499]) -Greedy action tensor([ 0.7329, -0.4822, -1.0459, 1.1655]) tensor([0.3326, 0.0987, 0.0562, 0.5126]) -Greedy action tensor([ 0.5657, 0.2811, -0.0660, 0.5748]) tensor([0.3037, 0.2284, 0.1615, 0.3064]) -Greedy action tensor([-1.6045, -0.3243, -0.1548, 0.4111]) tensor([0.0611, 0.2198, 0.2604, 0.4586]) -Greedy action tensor([ 1.8769, -1.3889, 0.4899, 0.6416]) tensor([0.6334, 0.0242, 0.1582, 0.1842]) -Greedy action tensor([ 0.5164, 0.1238, 1.3364, -0.6701]) tensor([0.2352, 0.1589, 0.5341, 0.0718]) -Greedy action tensor([ 0.2647, -0.0308, 1.0692, -0.9178]) tensor([0.2333, 0.1736, 0.5216, 0.0715]) -Greedy action tensor([ 1.4723, -0.3450, 0.6077, 1.1689]) tensor([0.4307, 0.0700, 0.1814, 0.3180]) -Greedy action tensor([-0.1452, -1.5896, 1.6929, -0.4883]) tensor([0.1215, 0.0287, 0.7636, 0.0862]) -Greedy action tensor([ 0.7167, -0.0780, 0.7415, -0.4146]) tensor([0.3572, 0.1614, 0.3662, 0.1152]) -Greedy action tensor([-1.0144, -0.8816, -0.0905, -0.2361]) tensor([0.1462, 0.1670, 0.3684, 0.3184]) -Greedy action tensor([ 1.6166, -0.1984, 0.9714, 1.7294]) tensor([0.3563, 0.0580, 0.1869, 0.3988]) -Greedy action tensor([0.5083, 0.2252, 0.5575, 0.4370]) tensor([0.2677, 0.2017, 0.2812, 0.2493]) -Greedy action tensor([ 0.0198, -1.7457, 0.3889, -0.0835]) tensor([0.2841, 0.0486, 0.4110, 0.2563]) -Greedy action tensor([ 0.7132, -0.5223, -0.3191, 1.0965]) tensor([0.3211, 0.0934, 0.1144, 0.4711]) -Greedy action tensor([ 0.4924, -1.2556, 1.1976, -1.0621]) tensor([0.2933, 0.0511, 0.5937, 0.0620]) -Greedy action tensor([ 0.2804, -1.1172, 0.1786, -0.1945]) tensor([0.3607, 0.0892, 0.3258, 0.2243]) -Greedy action tensor([ 1.8976, -0.7847, 0.4769, 1.3034]) tensor([0.5371, 0.0367, 0.1297, 0.2965]) -Greedy action tensor([-0.5706, 0.1835, -0.8852, 0.0836]) tensor([0.1730, 0.3678, 0.1263, 0.3328]) -Greedy action tensor([ 1.2331, -0.3347, -0.0816, 1.0129]) tensor([0.4387, 0.0915, 0.1178, 0.3520]) -Greedy action tensor([ 0.5280, -0.7252, 1.6203, 1.4238]) tensor([0.1489, 0.0425, 0.4439, 0.3647]) -Greedy action tensor([ 0.2051, -0.5684, -0.8708, 0.6760]) tensor([0.2938, 0.1356, 0.1002, 0.4705]) -Greedy action tensor([ 0.8019, 0.6920, -0.1872, 1.1322]) tensor([0.2733, 0.2448, 0.1016, 0.3802]) -Greedy action tensor([-0.2456, 0.7624, 0.0094, 0.3766]) tensor([0.1451, 0.3975, 0.1872, 0.2703]) -Greedy action tensor([ 0.1213, -1.4352, -1.5489, 0.2312]) tensor([0.3976, 0.0838, 0.0748, 0.4438]) -Greedy action tensor([ 1.7822, -0.1799, 0.6235, 2.0280]) tensor([0.3659, 0.0514, 0.1148, 0.4678]) -Greedy action tensor([ 1.0093, 0.9528, -0.8535, 1.1819]) tensor([0.3041, 0.2874, 0.0472, 0.3614]) -Greedy action tensor([ 0.4999, 1.1324, -0.1128, 0.4398]) tensor([0.2291, 0.4311, 0.1241, 0.2157]) -Greedy action tensor([-1.0413, -2.4200, -0.6364, 0.2613]) tensor([0.1555, 0.0392, 0.2331, 0.5722]) -Greedy action tensor([ 0.2365, -1.0489, 0.6121, 0.2808]) tensor([0.2647, 0.0732, 0.3854, 0.2767]) -Greedy action tensor([ 1.1203, -0.8453, -0.8341, 1.3789]) tensor([0.3881, 0.0544, 0.0550, 0.5026]) -Greedy action tensor([1.2636, 0.0447, 0.3528, 0.6713]) tensor([0.4443, 0.1313, 0.1787, 0.2457]) -Greedy action tensor([-1.0147, 0.0865, 0.7017, -0.4990]) tensor([0.0889, 0.2674, 0.4947, 0.1489]) -Greedy action tensor([-0.0126, -0.2287, 1.4429, -0.5386]) tensor([0.1496, 0.1205, 0.6414, 0.0884]) -Greedy action tensor([ 0.2803, 0.4400, 0.3230, -0.5781]) tensor([0.2747, 0.3222, 0.2867, 0.1164]) -Greedy action tensor([-0.4835, 1.0117, 0.4095, -0.2985]) tensor([0.1098, 0.4898, 0.2682, 0.1321]) -Greedy action tensor([ 0.9810, -0.1358, -0.9930, 1.6501]) tensor([0.2925, 0.0957, 0.0406, 0.5711]) -Greedy action tensor([0.5038, 0.4228, 1.0007, 0.4839]) tensor([0.2200, 0.2028, 0.3616, 0.2156]) -Greedy action tensor([ 0.4026, -1.4833, -0.4836, -0.4409]) tensor([0.5015, 0.0761, 0.2067, 0.2157]) -Greedy action tensor([ 0.2365, -0.9458, -0.3030, 1.5735]) tensor([0.1755, 0.0538, 0.1023, 0.6683]) -Greedy action tensor([-0.1319, -1.2983, 0.2569, -0.4614]) tensor([0.2852, 0.0888, 0.4208, 0.2052]) -Greedy action tensor([ 0.7436, -0.5748, -0.2405, 2.7077]) tensor([0.1140, 0.0305, 0.0426, 0.8128]) -Greedy action tensor([-0.1642, -1.8738, 0.3519, 0.9840]) tensor([0.1664, 0.0301, 0.2788, 0.5247]) -Greedy action tensor([-0.4898, -1.2785, 1.3963, -0.1707]) tensor([0.1061, 0.0482, 0.6997, 0.1460]) -Greedy action tensor([ 0.7680, -0.0911, 0.1993, 0.7667]) tensor([0.3346, 0.1417, 0.1895, 0.3342]) -Greedy action tensor([ 1.8509, -0.9561, -0.2165, 0.4871]) tensor([0.6932, 0.0419, 0.0877, 0.1772]) -Greedy action tensor([ 0.6607, -1.7328, 0.8568, 1.0536]) tensor([0.2639, 0.0241, 0.3211, 0.3909]) -Greedy action tensor([ 1.0737, -1.0255, 1.8550, 0.3914]) tensor([0.2623, 0.0321, 0.5730, 0.1326]) -Greedy action tensor([0.5255, 0.3553, 0.3436, 0.6686]) tensor([0.2610, 0.2202, 0.2176, 0.3012]) -Greedy action tensor([ 1.4992, -0.2387, 0.8075, 1.2343]) tensor([0.4092, 0.0720, 0.2049, 0.3140]) -Greedy action tensor([ 1.1778, -0.9593, 0.4864, 1.2260]) tensor([0.3748, 0.0442, 0.1877, 0.3933]) -Greedy action tensor([-0.0024, -1.5006, 1.5730, 0.0260]) tensor([0.1411, 0.0316, 0.6821, 0.1452]) -Greedy action tensor([-0.4434, -1.4144, -0.4095, 0.3817]) tensor([0.2130, 0.0807, 0.2203, 0.4861]) -Greedy action tensor([ 0.1309, -0.9662, -0.1859, 1.4042]) tensor([0.1775, 0.0592, 0.1293, 0.6340]) -Greedy action tensor([-0.0756, 1.2329, -0.1334, -0.8649]) tensor([0.1640, 0.6068, 0.1548, 0.0745]) -Greedy action tensor([ 0.1447, -0.0390, -0.6696, 0.8463]) tensor([0.2330, 0.1939, 0.1032, 0.4699]) -Greedy action tensor([-0.3299, -0.7502, -0.8041, 0.9016]) tensor([0.1753, 0.1151, 0.1091, 0.6005]) -Greedy action tensor([ 0.0158, -0.7414, 1.3889, -0.0473]) tensor([0.1573, 0.0738, 0.6211, 0.1477]) -Greedy action tensor([ 1.2400, -1.0308, 0.2704, 0.2092]) tensor([0.5437, 0.0561, 0.2062, 0.1939]) -Greedy action tensor([ 0.0657, -0.7746, -0.3113, 1.2567]) tensor([0.1849, 0.0798, 0.1268, 0.6084]) -Greedy action tensor([-1.1557, -0.0633, -0.0172, 0.0871]) tensor([0.0946, 0.2821, 0.2954, 0.3279]) -Greedy action tensor([1.9327, 0.0275, 1.0787, 1.5395]) tensor([0.4446, 0.0661, 0.1892, 0.3000]) -Greedy action tensor([ 0.4345, -0.0651, 1.4005, 0.7681]) tensor([0.1776, 0.1078, 0.4666, 0.2480]) -Greedy action tensor([-1.3780, 0.4243, 0.0044, -0.3734]) tensor([0.0726, 0.4401, 0.2892, 0.1982]) -Greedy action tensor([ 0.3919, -0.5049, 2.1867, 0.3641]) tensor([0.1191, 0.0486, 0.7166, 0.1158]) -Greedy action tensor([-1.9111, -0.3979, 0.6426, -0.1598]) tensor([0.0414, 0.1880, 0.5321, 0.2385]) -Greedy action tensor([ 0.1184, -0.6170, 0.8178, 0.6465]) tensor([0.1928, 0.0924, 0.3880, 0.3269]) -Greedy action tensor([-1.9151, -0.3815, 0.6439, -0.1647]) tensor([0.0411, 0.1906, 0.5315, 0.2368]) -Greedy action tensor([-1.1681, -0.6484, 0.2531, 0.2880]) tensor([0.0900, 0.1513, 0.3727, 0.3860]) -Greedy action tensor([-1.8376, -0.3788, 0.5991, -0.1280]) tensor([0.0449, 0.1932, 0.5137, 0.2482]) -Greedy action tensor([-1.8966, -0.3774, 0.6468, -0.1494]) tensor([0.0416, 0.1901, 0.5295, 0.2388]) -Greedy action tensor([-1.9022, -0.3479, 0.6332, -0.1487]) tensor([0.0414, 0.1961, 0.5231, 0.2393]) -Greedy action tensor([-1.6558, 0.0978, 0.4687, -0.0994]) tensor([0.0503, 0.2904, 0.4209, 0.2384]) -Greedy action tensor([-1.7850, -0.4818, 0.6032, -0.0651]) tensor([0.0473, 0.1740, 0.5149, 0.2639]) -Greedy action tensor([-1.9318, -0.4557, 0.6645, -0.1730]) tensor([0.0407, 0.1779, 0.5454, 0.2360]) -Greedy action tensor([-1.8572, -0.4454, 0.6268, -0.1369]) tensor([0.0441, 0.1809, 0.5287, 0.2463]) -Greedy action tensor([-1.8663, -0.3518, 0.6089, -0.1253]) tensor([0.0432, 0.1965, 0.5137, 0.2465]) -Greedy action tensor([-1.9134, -0.4438, 0.6545, -0.1460]) tensor([0.0412, 0.1793, 0.5378, 0.2416]) -Greedy action tensor([-1.9013, -0.4465, 0.6488, -0.1549]) tensor([0.0420, 0.1798, 0.5376, 0.2407]) -Greedy action tensor([-1.5436, 0.5391, 0.5115, -0.3621]) tensor([0.0498, 0.3994, 0.3886, 0.1622]) -Greedy action tensor([-1.5625, -0.4949, 0.4930, -0.0609]) tensor([0.0617, 0.1795, 0.4819, 0.2770]) -Greedy action tensor([-1.9320, -0.4481, 0.6630, -0.1708]) tensor([0.0406, 0.1791, 0.5440, 0.2363]) -Greedy action tensor([-1.5421, -0.5331, 0.4894, 0.2483]) tensor([0.0576, 0.1580, 0.4393, 0.3451]) -Greedy action tensor([-1.8268, -0.3594, 0.6123, -0.0799]) tensor([0.0444, 0.1925, 0.5086, 0.2546]) -Greedy action tensor([-1.8571, -0.3865, 0.6512, -0.1217]) tensor([0.0429, 0.1867, 0.5271, 0.2433]) -Greedy action tensor([-1.9437, -0.4403, 0.6644, -0.1801]) tensor([0.0402, 0.1806, 0.5450, 0.2343]) -Greedy action tensor([-1.8885, -0.4675, 0.6343, -0.1499]) tensor([0.0429, 0.1778, 0.5351, 0.2442]) -Greedy action tensor([-1.7066, -0.0811, 0.6066, -0.5044]) tensor([0.0512, 0.2604, 0.5179, 0.1705]) -Greedy action tensor([-1.8747, -0.4821, 0.6730, -0.0970]) tensor([0.0422, 0.1697, 0.5387, 0.2494]) -Greedy action tensor([-1.8573, -0.4648, 0.6307, -0.1258]) tensor([0.0440, 0.1772, 0.5300, 0.2487]) -Greedy action tensor([-1.4944, -0.2806, 0.5853, -0.3725]) tensor([0.0648, 0.2180, 0.5183, 0.1989]) -Greedy action tensor([-1.9314, -0.4638, 0.6694, -0.1706]) tensor([0.0406, 0.1762, 0.5471, 0.2362]) -Greedy action tensor([-1.7848, -0.4785, 0.6196, -0.0431]) tensor([0.0466, 0.1720, 0.5157, 0.2658]) -Greedy action tensor([-1.2478, 0.0856, 0.3501, -0.0991]) tensor([0.0776, 0.2943, 0.3834, 0.2447]) -Greedy action tensor([-1.8241, -0.4379, 0.6118, -0.1039]) tensor([0.0454, 0.1817, 0.5191, 0.2538]) -Greedy action tensor([-1.5340, -0.5129, 0.5602, -0.5567]) tensor([0.0687, 0.1908, 0.5579, 0.1826]) -Greedy action tensor([-1.8369, -0.4675, 0.6142, -0.1459]) tensor([0.0455, 0.1791, 0.5283, 0.2470]) -Greedy action tensor([-1.6188, -0.5875, 0.5177, -0.0106]) tensor([0.0579, 0.1624, 0.4905, 0.2892]) -Greedy action tensor([-1.7711, -0.3438, 0.5894, -0.0605]) tensor([0.0470, 0.1957, 0.4976, 0.2598]) -Greedy action tensor([-0.3917, -0.2646, 0.2302, 0.2999]) tensor([0.1668, 0.1894, 0.3107, 0.3331]) -Greedy action tensor([-1.8045, -0.4674, 0.6369, -0.0662]) tensor([0.0455, 0.1732, 0.5226, 0.2587]) -Greedy action tensor([-7.0220e-01, 8.3898e-01, 1.6093e-05, 4.2481e-01]) tensor([0.0928, 0.4334, 0.1873, 0.2864]) -Greedy action tensor([-1.6956, -0.5130, 0.5379, -0.0451]) tensor([0.0532, 0.1735, 0.4963, 0.2770]) -Greedy action tensor([-1.8085, -0.2769, 0.5765, -0.0693]) tensor([0.0451, 0.2086, 0.4897, 0.2567]) -Greedy action tensor([-1.8762, -0.4717, 0.6272, -0.1410]) tensor([0.0435, 0.1774, 0.5322, 0.2469]) -Greedy action tensor([-1.7012, 0.2031, 0.4461, 0.0189]) tensor([0.0457, 0.3071, 0.3916, 0.2555]) -Greedy action tensor([-1.9002, -0.3714, 0.6454, -0.1429]) tensor([0.0414, 0.1909, 0.5278, 0.2399]) -Greedy action tensor([-1.9398, -0.4431, 0.6637, -0.1767]) tensor([0.0403, 0.1801, 0.5446, 0.2350]) -Greedy action tensor([-0.7904, 0.4527, 0.1840, -0.0921]) tensor([0.1096, 0.3798, 0.2903, 0.2203]) -Greedy action tensor([-1.9249, -0.6137, 0.8284, 0.1053]) tensor([0.0357, 0.1324, 0.5601, 0.2718]) -Greedy action tensor([-1.7602, -0.0128, 0.5415, 0.0167]) tensor([0.0442, 0.2535, 0.4413, 0.2611]) -Greedy action tensor([-1.4831, -0.4859, 0.5373, 0.1623]) tensor([0.0608, 0.1649, 0.4589, 0.3154]) -Greedy action tensor([-1.9230, -0.4597, 0.6596, -0.1614]) tensor([0.0410, 0.1772, 0.5429, 0.2389]) -Greedy action tensor([-1.8971, -0.4198, 0.6483, -0.1445]) tensor([0.0418, 0.1833, 0.5334, 0.2414]) -Greedy action tensor([-1.9441, -0.4486, 0.6670, -0.1804]) tensor([0.0401, 0.1791, 0.5465, 0.2342]) -Greedy action tensor([-1.8490, -0.3910, 0.6133, -0.1290]) tensor([0.0442, 0.1900, 0.5188, 0.2470]) -Greedy action tensor([-1.8576, -0.4295, 0.6607, -0.1195]) tensor([0.0430, 0.1793, 0.5333, 0.2444]) -Greedy action tensor([-1.9301, -0.4086, 0.6570, -0.1687]) tensor([0.0405, 0.1855, 0.5383, 0.2357]) -Greedy action tensor([-1.9320, -0.4231, 0.6578, -0.1738]) tensor([0.0406, 0.1834, 0.5406, 0.2354]) -Greedy action tensor([-1.2541, -0.5769, 0.3218, 0.4093]) tensor([0.0764, 0.1505, 0.3696, 0.4034]) -Greedy action tensor([-1.9233, -0.4590, 0.6770, -0.1488]) tensor([0.0405, 0.1752, 0.5455, 0.2389]) -Greedy action tensor([-1.9278, -0.4317, 0.6542, -0.1775]) tensor([0.0409, 0.1826, 0.5410, 0.2355]) -Greedy action tensor([-1.4573, -0.4433, 0.4402, 0.1427]) tensor([0.0650, 0.1792, 0.4337, 0.3221]) -Greedy action tensor([-1.8769, -0.4793, 0.6371, -0.1392]) tensor([0.0433, 0.1752, 0.5352, 0.2462]) -Greedy action tensor([-1.8809, -0.4901, 0.6637, -0.1211]) tensor([0.0424, 0.1705, 0.5405, 0.2466]) -Greedy action tensor([-1.7573, -0.2235, 0.5496, -0.1277]) tensor([0.0481, 0.2231, 0.4833, 0.2455]) -Greedy action tensor([-1.9042, -0.3487, 0.6340, -0.1533]) tensor([0.0414, 0.1961, 0.5240, 0.2385]) -Greedy action tensor([-1.8601, -0.4192, 0.6252, -0.1313]) tensor([0.0437, 0.1848, 0.5251, 0.2464]) -Greedy action tensor([-1.9223, -0.4079, 0.6538, -0.1588]) tensor([0.0408, 0.1854, 0.5360, 0.2378]) -Greedy action tensor([-0.2155, 0.1806, 0.8720, 1.6286]) tensor([0.0849, 0.1262, 0.2520, 0.5369]) -Greedy action tensor([-0.7844, -0.5332, 0.2030, -0.0092]) tensor([0.1400, 0.1800, 0.3759, 0.3040]) -Greedy action tensor([-1.4602, -0.4982, 0.4190, 0.1210]) tensor([0.0666, 0.1742, 0.4358, 0.3235]) -Greedy action tensor([-1.8344, -0.3933, 0.6026, -0.1180]) tensor([0.0450, 0.1901, 0.5146, 0.2503]) -Greedy action tensor([-1.4812, -0.5128, 0.4407, 0.2370]) tensor([0.0623, 0.1642, 0.4260, 0.3475]) -Greedy action tensor([-1.8541, -0.3654, 0.6442, -0.0940]) tensor([0.0427, 0.1893, 0.5196, 0.2484]) -Greedy action tensor([-1.9212, -0.4382, 0.6522, -0.1698]) tensor([0.0412, 0.1815, 0.5400, 0.2373]) -Greedy action tensor([-1.4442, 0.5108, 0.3236, 0.2318]) tensor([0.0519, 0.3667, 0.3041, 0.2774]) -Greedy action tensor([-1.8903, -0.4118, 0.6326, -0.1541]) tensor([0.0425, 0.1864, 0.5298, 0.2412]) -Greedy action tensor([-1.8161, -0.3695, 0.5903, -0.1114]) tensor([0.0458, 0.1945, 0.5079, 0.2518]) -Greedy action tensor([-1.3037, 0.5702, 0.1923, 0.2623]) tensor([0.0596, 0.3885, 0.2663, 0.2856]) -Greedy action tensor([-1.9576, -0.5558, 0.9885, 0.2606]) tensor([0.0300, 0.1220, 0.5718, 0.2761]) -Greedy action tensor([-1.3713, -0.7646, 1.0695, 0.9703]) tensor([0.0405, 0.0742, 0.4646, 0.4207]) -Greedy action tensor([-1.8850, -0.4318, 0.6358, -0.1497]) tensor([0.0428, 0.1829, 0.5319, 0.2425]) -Greedy action tensor([-1.1358, -0.5564, 0.3540, -0.0918]) tensor([0.0994, 0.1774, 0.4409, 0.2823]) -Greedy action tensor([-1.9270, -0.4484, 0.6570, -0.1741]) tensor([0.0410, 0.1797, 0.5429, 0.2364]) -Greedy action tensor([-0.9588, -0.6338, 0.2422, 0.1759]) tensor([0.1134, 0.1570, 0.3769, 0.3527]) -Greedy action tensor([ 0.4508, -0.3563, -0.0971, -0.4792]) tensor([0.4134, 0.1844, 0.2390, 0.1631]) -Greedy action tensor([ 1.3275, -0.6305, -0.1358, -0.4938]) tensor([0.6517, 0.0920, 0.1509, 0.1055]) -Greedy action tensor([ 0.4645, -0.3939, -0.0895, -0.2430]) tensor([0.4014, 0.1701, 0.2307, 0.1978]) -Greedy action tensor([ 0.6349, -0.1443, -0.0703, -0.1237]) tensor([0.4130, 0.1895, 0.2040, 0.1934]) -Greedy action tensor([ 0.4022, -0.6335, -0.0403, -0.2031]) tensor([0.3932, 0.1396, 0.2526, 0.2146]) -Greedy action tensor([ 0.7082, -0.3451, -0.0533, -0.2472]) tensor([0.4545, 0.1585, 0.2122, 0.1748]) -Greedy action tensor([ 0.6635, -0.4042, -0.0212, -0.1928]) tensor([0.4400, 0.1513, 0.2219, 0.1869]) -Greedy action tensor([ 0.5472, -0.3478, 0.1688, -0.4583]) tensor([0.4066, 0.1661, 0.2785, 0.1488]) -Greedy action tensor([ 0.6026, -0.1266, -0.0128, -0.4392]) tensor([0.4209, 0.2030, 0.2275, 0.1485]) -Greedy action tensor([ 0.3022, -0.0100, -0.0719, -0.0871]) tensor([0.3229, 0.2363, 0.2221, 0.2187]) -Greedy action tensor([ 0.2666, -0.1888, -0.0176, -0.1263]) tensor([0.3266, 0.2071, 0.2458, 0.2205]) -Greedy action tensor([ 0.6643, -0.1304, -0.1043, -0.2692]) tensor([0.4332, 0.1957, 0.2008, 0.1703]) -Greedy action tensor([ 0.2003, -0.2116, -0.0226, -0.1678]) tensor([0.3170, 0.2100, 0.2537, 0.2194]) -Greedy action tensor([ 0.7900, 0.1144, -0.0396, -0.0957]) tensor([0.4242, 0.2158, 0.1850, 0.1749]) -Greedy action tensor([ 0.5538, -0.2770, 0.0203, -0.2355]) tensor([0.4038, 0.1759, 0.2368, 0.1834]) -Greedy action tensor([ 0.6617, -0.6306, -0.0016, -0.2294]) tensor([0.4545, 0.1248, 0.2342, 0.1865]) -Greedy action tensor([ 0.4225, -0.3302, 0.1059, -0.0831]) tensor([0.3568, 0.1681, 0.2600, 0.2152]) -Greedy action tensor([ 0.7494, -0.6189, -0.0567, -0.5398]) tensor([0.5059, 0.1288, 0.2259, 0.1394]) -Greedy action tensor([ 0.4813, -0.4164, -0.0780, -0.1571]) tensor([0.3988, 0.1625, 0.2280, 0.2106]) -Greedy action tensor([ 0.5422, 0.0468, 0.0752, -0.5046]) tensor([0.3865, 0.2355, 0.2423, 0.1357]) -Greedy action tensor([ 0.9903, -0.5034, -0.1758, -0.3644]) tensor([0.5574, 0.1251, 0.1737, 0.1438]) -Greedy action tensor([ 0.3351, -0.2730, -0.0336, -0.1052]) tensor([0.3472, 0.1890, 0.2402, 0.2236]) -Greedy action tensor([ 0.6936, -0.4866, -0.2137, -0.3066]) tensor([0.4811, 0.1478, 0.1942, 0.1769]) -Greedy action tensor([ 0.5923, 0.1608, -0.0998, -0.1255]) tensor([0.3791, 0.2462, 0.1897, 0.1849]) -Greedy action tensor([ 0.6188, -0.3469, -0.0076, -0.3685]) tensor([0.4371, 0.1664, 0.2336, 0.1629]) -Greedy action tensor([ 0.3543, 0.0334, -0.0525, -0.0443]) tensor([0.3265, 0.2369, 0.2174, 0.2192]) -Greedy action tensor([ 0.6061, 0.0723, -0.0223, -0.2849]) tensor([0.3952, 0.2318, 0.2109, 0.1621]) -Greedy action tensor([ 0.7558, -0.4452, -0.0663, -0.3906]) tensor([0.4859, 0.1462, 0.2135, 0.1544]) -Greedy action tensor([ 0.4770, 0.0663, 0.1440, -0.4526]) tensor([0.3604, 0.2390, 0.2583, 0.1423]) -Greedy action tensor([ 0.6892, -0.3186, -0.0629, -0.2788]) tensor([0.4512, 0.1647, 0.2127, 0.1714]) -Greedy action tensor([ 0.9191, -0.2568, -0.0132, -0.3306]) tensor([0.5028, 0.1551, 0.1979, 0.1441]) -Greedy action tensor([ 0.7428, -0.2579, -0.0062, -0.6541]) tensor([0.4790, 0.1761, 0.2265, 0.1185]) -Greedy action tensor([ 1.1568, -0.3239, -0.0054, -0.4462]) tensor([0.5742, 0.1306, 0.1796, 0.1156]) -Greedy action tensor([ 0.3035, 0.2996, -0.1830, -0.4222]) tensor([0.3231, 0.3219, 0.1986, 0.1564]) -Greedy action tensor([ 0.5579, -0.5728, -0.0969, -0.2651]) tensor([0.4383, 0.1415, 0.2277, 0.1925]) -Greedy action tensor([ 1.2395, -0.8177, 0.1352, -0.6863]) tensor([0.6230, 0.0796, 0.2065, 0.0908]) -Greedy action tensor([ 0.8755, -0.3363, 0.0881, -0.3888]) tensor([0.4914, 0.1463, 0.2236, 0.1388]) -Greedy action tensor([ 0.3565, 0.1117, 0.0266, -0.3801]) tensor([0.3355, 0.2627, 0.2412, 0.1606]) -Greedy action tensor([ 0.6792, -0.4704, -0.1025, -0.3885]) tensor([0.4721, 0.1495, 0.2161, 0.1623]) -Greedy action tensor([ 0.5466, -0.3205, -0.1113, -0.2312]) tensor([0.4171, 0.1752, 0.2160, 0.1916]) -Greedy action tensor([ 0.8951, -0.6238, -0.3304, -0.8729]) tensor([0.5941, 0.1301, 0.1744, 0.1014]) -Greedy action tensor([ 0.4848, -0.4312, -0.1248, -0.0516]) tensor([0.3955, 0.1582, 0.2150, 0.2313]) -Greedy action tensor([ 0.2670, 0.2430, -0.0999, -0.0898]) tensor([0.2968, 0.2898, 0.2057, 0.2077]) -Greedy action tensor([ 0.9725, -0.6453, -0.1369, -0.3793]) tensor([0.5596, 0.1110, 0.1845, 0.1448]) -Greedy action tensor([ 0.4238, -0.0603, 0.0552, -0.1945]) tensor([0.3513, 0.2165, 0.2430, 0.1893]) -Greedy action tensor([ 0.6413, -0.4454, -0.1158, -0.2076]) tensor([0.4476, 0.1510, 0.2099, 0.1915]) -Greedy action tensor([ 0.7493, -0.1557, 0.0603, -0.3601]) tensor([0.4472, 0.1809, 0.2245, 0.1474]) -Greedy action tensor([ 0.6622, -0.3344, -0.0650, -0.0350]) tensor([0.4255, 0.1571, 0.2056, 0.2119]) -Greedy action tensor([ 0.8095, -0.8484, 0.0386, -0.3669]) tensor([0.5098, 0.0971, 0.2358, 0.1572]) -Greedy action tensor([ 0.2583, 0.0326, -0.0045, -0.0548]) tensor([0.3032, 0.2419, 0.2331, 0.2217]) -Greedy action tensor([ 0.7183, -0.4185, -0.0154, -0.1802]) tensor([0.4529, 0.1453, 0.2174, 0.1844]) -Greedy action tensor([ 1.1180, -0.8980, 0.1013, -0.5671]) tensor([0.5951, 0.0793, 0.2153, 0.1103]) -Greedy action tensor([ 0.8041, -0.3450, 0.0540, -0.1856]) tensor([0.4627, 0.1467, 0.2186, 0.1720]) -Greedy action tensor([ 0.7890, -0.0525, -0.0557, -0.2600]) tensor([0.4523, 0.1950, 0.1943, 0.1584]) -Greedy action tensor([ 0.5502, -0.5316, -0.0498, -0.3738]) tensor([0.4377, 0.1484, 0.2402, 0.1737]) -Greedy action tensor([ 0.9826, -0.2303, -0.4379, -0.2531]) tensor([0.5466, 0.1625, 0.1321, 0.1589]) -Greedy action tensor([ 0.7197, -0.2350, 0.0270, -0.1790]) tensor([0.4362, 0.1679, 0.2182, 0.1776]) -Greedy action tensor([ 0.2724, -0.0245, -0.0883, -0.2965]) tensor([0.3326, 0.2472, 0.2319, 0.1883]) -Greedy action tensor([ 0.9618, -0.3572, -0.0725, -0.3281]) tensor([0.5268, 0.1409, 0.1873, 0.1450]) -Greedy action tensor([ 0.5158, 0.4023, -0.1592, -0.2801]) tensor([0.3505, 0.3129, 0.1785, 0.1581]) -Greedy action tensor([ 0.9525, -0.5583, -0.0925, -0.6883]) tensor([0.5662, 0.1250, 0.1991, 0.1097]) -Greedy action tensor([ 0.7230, -0.4413, 0.0190, -0.2765]) tensor([0.4598, 0.1435, 0.2274, 0.1692]) -Greedy action tensor([ 0.7115, -0.4052, 0.0433, -0.5990]) tensor([0.4740, 0.1552, 0.2430, 0.1278]) -Greedy action tensor([ 1.1286, -0.3847, 0.1411, -0.3110]) tensor([0.5465, 0.1203, 0.2036, 0.1295]) -Greedy action tensor([ 0.3195, -0.1650, 0.0306, -0.2184]) tensor([0.3391, 0.2089, 0.2540, 0.1980]) -Greedy action tensor([ 0.6730, -0.7571, -0.0545, -0.1673]) tensor([0.4643, 0.1111, 0.2243, 0.2004]) -Greedy action tensor([ 0.4174, 0.0181, -0.0494, -0.2628]) tensor([0.3566, 0.2392, 0.2236, 0.1806]) -Greedy action tensor([ 0.8833, -0.3674, -0.1654, -0.3221]) tensor([0.5165, 0.1479, 0.1810, 0.1547]) -Greedy action tensor([ 0.8122, -0.5980, -0.0427, -0.2438]) tensor([0.4957, 0.1210, 0.2108, 0.1724]) -Greedy action tensor([ 0.1482, 0.4645, -0.2888, -0.4901]) tensor([0.2820, 0.3869, 0.1822, 0.1489]) -Greedy action tensor([ 0.3391, -0.0838, -0.1426, -0.1682]) tensor([0.3478, 0.2279, 0.2149, 0.2094]) -Greedy action tensor([ 0.5056, -0.2283, -0.0765, -0.1461]) tensor([0.3906, 0.1875, 0.2183, 0.2036]) -Greedy action tensor([ 0.4598, -0.4654, -0.2395, -0.1288]) tensor([0.4084, 0.1619, 0.2030, 0.2267]) -Greedy action tensor([ 0.6770, -0.3307, -0.0577, -0.1830]) tensor([0.4409, 0.1610, 0.2115, 0.1866]) -Greedy action tensor([ 0.8890, -0.5652, 0.0249, -0.5143]) tensor([0.5261, 0.1229, 0.2217, 0.1293]) -Greedy action tensor([ 0.8328, -0.5286, -0.0442, -0.4744]) tensor([0.5147, 0.1319, 0.2141, 0.1393]) -Greedy action tensor([ 0.5740, -0.2743, -0.1017, -0.2320]) tensor([0.4195, 0.1796, 0.2135, 0.1874]) -Greedy action tensor([ 0.6222, -0.1634, -0.0898, -0.3155]) tensor([0.4277, 0.1950, 0.2099, 0.1675]) -Greedy action tensor([ 0.6565, -0.4563, 0.0654, -0.2616]) tensor([0.4383, 0.1440, 0.2427, 0.1750]) -Greedy action tensor([ 0.8263, -0.4806, -0.0753, -0.5151]) tensor([0.5160, 0.1397, 0.2095, 0.1349]) -Greedy action tensor([ 0.7219, -0.3712, 0.0893, -0.3932]) tensor([0.4557, 0.1528, 0.2421, 0.1494]) -Greedy action tensor([ 1.2178, -0.1468, -0.3996, -0.0892]) tensor([0.5799, 0.1481, 0.1151, 0.1569]) -Greedy action tensor([ 1.0944, -0.4638, -0.0155, 0.1320]) tensor([0.5203, 0.1095, 0.1715, 0.1987]) -Greedy action tensor([ 1.2212, -0.6373, -0.1649, 0.4458]) tensor([0.5358, 0.0835, 0.1340, 0.2467]) -Greedy action tensor([ 1.2139, -0.1671, 0.0759, -0.0792]) tensor([0.5416, 0.1361, 0.1736, 0.1486]) -Greedy action tensor([ 1.7297, -0.3276, -0.5720, 0.6291]) tensor([0.6408, 0.0819, 0.0641, 0.2132]) -Greedy action tensor([ 1.1347, -0.2164, -0.1023, 0.1098]) tensor([0.5241, 0.1357, 0.1521, 0.1881]) -Greedy action tensor([ 1.3785, -0.5990, 0.0741, 0.3456]) tensor([0.5664, 0.0784, 0.1537, 0.2016]) -Greedy action tensor([ 1.9084, -0.6019, -0.2881, 0.5310]) tensor([0.6922, 0.0562, 0.0770, 0.1746]) -Greedy action tensor([ 1.1859, -0.2380, -0.3736, 0.3585]) tensor([0.5296, 0.1275, 0.1113, 0.2315]) -Greedy action tensor([ 1.5942, -0.7726, 0.0240, 0.2538]) tensor([0.6396, 0.0600, 0.1330, 0.1674]) -Greedy action tensor([ 1.3907, -0.3489, -0.3786, 0.3574]) tensor([0.5876, 0.1032, 0.1002, 0.2091]) -Greedy action tensor([ 1.2925, -0.4851, -0.1661, 0.4241]) tensor([0.5491, 0.0928, 0.1277, 0.2304]) -Greedy action tensor([ 1.7794, -0.5534, -0.2276, 0.4149]) tensor([0.6725, 0.0653, 0.0904, 0.1718]) -Greedy action tensor([ 1.1425, -0.2042, -0.5632, 0.3338]) tensor([0.5299, 0.1378, 0.0963, 0.2360]) -Greedy action tensor([ 1.3551, -0.9379, -0.4360, 0.2191]) tensor([0.6294, 0.0635, 0.1050, 0.2021]) -Greedy action tensor([ 1.8749, 0.1478, -0.0465, 0.2020]) tensor([0.6614, 0.1176, 0.0968, 0.1241]) -Greedy action tensor([ 1.6941, -0.4517, -0.6959, 0.7820]) tensor([0.6210, 0.0726, 0.0569, 0.2494]) -Greedy action tensor([ 1.2724, -0.3387, -0.2223, 0.1363]) tensor([0.5730, 0.1144, 0.1285, 0.1840]) -Greedy action tensor([ 1.5728, -0.2605, -0.2203, 0.1483]) tensor([0.6382, 0.1020, 0.1062, 0.1536]) -Greedy action tensor([ 1.8704, -0.9081, -0.4309, 0.3617]) tensor([0.7228, 0.0449, 0.0724, 0.1599]) -Greedy action tensor([ 2.0020, -1.1797, -0.1727, 0.7983]) tensor([0.6872, 0.0285, 0.0781, 0.2062]) -Greedy action tensor([ 1.9509, -0.6126, -0.3928, 0.5020]) tensor([0.7103, 0.0547, 0.0682, 0.1668]) -Greedy action tensor([ 1.3695, -0.5975, -0.4656, 0.3698]) tensor([0.5997, 0.0839, 0.0957, 0.2207]) -Greedy action tensor([ 0.9844, -0.5152, -0.3568, 0.6925]) tensor([0.4481, 0.1000, 0.1172, 0.3347]) -Greedy action tensor([ 1.8109, -0.7717, -0.5941, 0.4424]) tensor([0.7041, 0.0532, 0.0636, 0.1792]) -Greedy action tensor([ 2.0622, -1.1162, -0.4708, 0.3891]) tensor([0.7641, 0.0318, 0.0607, 0.1434]) -Greedy action tensor([ 1.7545, -0.1600, -0.2906, -0.4096]) tensor([0.7186, 0.1059, 0.0930, 0.0825]) -Greedy action tensor([ 0.8739, -0.0197, -0.2432, 0.5680]) tensor([0.4044, 0.1655, 0.1323, 0.2978]) -Greedy action tensor([ 1.6188, -0.3696, -0.4297, 0.5639]) tensor([0.6196, 0.0848, 0.0799, 0.2157]) -Greedy action tensor([ 1.9958, -0.7760, -0.2815, 0.7656]) tensor([0.6862, 0.0429, 0.0704, 0.2005]) -Greedy action tensor([ 1.2415, -0.3685, -0.8145, 0.6764]) tensor([0.5274, 0.1054, 0.0675, 0.2997]) -Greedy action tensor([ 2.4153, -1.5447, -0.2278, 0.9587]) tensor([0.7557, 0.0144, 0.0538, 0.1761]) -Greedy action tensor([ 1.1197, 0.0708, -0.2694, 0.3473]) tensor([0.4851, 0.1699, 0.1209, 0.2241]) -Greedy action tensor([ 1.3500, -0.4692, -0.3670, 0.5886]) tensor([0.5529, 0.0896, 0.0993, 0.2582]) -Greedy action tensor([ 2.0774, -1.1185, -0.1275, 0.3048]) tensor([0.7569, 0.0310, 0.0835, 0.1286]) -Greedy action tensor([ 1.3397, -0.5744, -0.1924, -0.0233]) tensor([0.6175, 0.0911, 0.1334, 0.1580]) -Greedy action tensor([ 0.9178, -0.6311, -0.2006, 0.1372]) tensor([0.5006, 0.1064, 0.1636, 0.2294]) -Greedy action tensor([ 1.5283, -0.2865, -0.3376, 0.5782]) tensor([0.5867, 0.0956, 0.0908, 0.2269]) -Greedy action tensor([ 0.8704, -0.3367, 0.0856, -0.2702]) tensor([0.4820, 0.1441, 0.2199, 0.1540]) -Greedy action tensor([ 1.2226, -0.6007, -0.2860, 0.4413]) tensor([0.5433, 0.0877, 0.1202, 0.2488]) -Greedy action tensor([ 1.2582, -0.4577, -0.0313, 0.3514]) tensor([0.5379, 0.0967, 0.1482, 0.2172]) -Greedy action tensor([ 1.6847, -0.0529, -0.0921, 0.1455]) tensor([0.6411, 0.1128, 0.1085, 0.1376]) -Greedy action tensor([ 1.5118, -0.2753, -0.4302, 0.3812]) tensor([0.6121, 0.1025, 0.0878, 0.1976]) -Greedy action tensor([ 0.6729, -0.2975, 0.1166, 0.1057]) tensor([0.3969, 0.1504, 0.2276, 0.2251]) -Greedy action tensor([ 1.2138, 0.0017, -0.6099, 0.2535]) tensor([0.5429, 0.1616, 0.0877, 0.2078]) -Greedy action tensor([ 1.5509, -0.3018, -0.2768, 0.2969]) tensor([0.6239, 0.0978, 0.1003, 0.1780]) -Greedy action tensor([ 1.8305, -0.8091, -0.5045, 0.6990]) tensor([0.6708, 0.0479, 0.0649, 0.2164]) -Greedy action tensor([ 0.9638, -0.4990, -0.3421, 0.0935]) tensor([0.5205, 0.1205, 0.1410, 0.2180]) -Greedy action tensor([ 1.6517, -0.7716, -0.1111, 0.4178]) tensor([0.6446, 0.0571, 0.1106, 0.1877]) -Greedy action tensor([ 1.2759, -0.0320, -0.0250, 0.2844]) tensor([0.5226, 0.1413, 0.1423, 0.1939]) -Greedy action tensor([ 1.2486, -0.0276, -0.5188, 0.1286]) tensor([0.5630, 0.1571, 0.0961, 0.1837]) -Greedy action tensor([ 1.7221, -0.4281, -0.5686, 0.7592]) tensor([0.6252, 0.0728, 0.0633, 0.2387]) -Greedy action tensor([ 0.6917, -0.3803, -0.2668, 0.1896]) tensor([0.4290, 0.1468, 0.1645, 0.2597]) -Greedy action tensor([ 1.7484, -0.2207, -0.6181, 0.6900]) tensor([0.6328, 0.0883, 0.0594, 0.2196]) -Greedy action tensor([ 2.1383, -1.1572, -0.1352, 0.6487]) tensor([0.7324, 0.0271, 0.0754, 0.1651]) -Greedy action tensor([ 1.5853, -0.0996, -0.1117, -0.2563]) tensor([0.6548, 0.1214, 0.1200, 0.1038]) -Greedy action tensor([ 1.0055, -0.3281, -0.2954, 0.4200]) tensor([0.4779, 0.1259, 0.1301, 0.2661]) -Greedy action tensor([ 1.9840, -1.2645, -0.4917, 0.3876]) tensor([0.7544, 0.0293, 0.0634, 0.1528]) -Greedy action tensor([ 1.5472, -0.2278, -0.4429, 0.2566]) tensor([0.6324, 0.1072, 0.0864, 0.1740]) -Greedy action tensor([ 0.8546, -0.5520, -0.4145, 0.8417]) tensor([0.3979, 0.0975, 0.1118, 0.3928]) -Greedy action tensor([ 1.1298, -0.3634, -0.2114, -0.1567]) tensor([0.5674, 0.1275, 0.1484, 0.1567]) -Greedy action tensor([ 0.9940, -0.1864, -0.3229, -0.0106]) tensor([0.5151, 0.1582, 0.1380, 0.1886]) -Greedy action tensor([ 1.3222, -0.6231, -0.2780, 0.3092]) tensor([0.5855, 0.0837, 0.1182, 0.2126]) -Greedy action tensor([ 1.0079, 0.0337, -0.3225, 0.7522]) tensor([0.4139, 0.1562, 0.1094, 0.3205]) -Greedy action tensor([ 1.8257, -0.6767, -0.3354, 0.4449]) tensor([0.6904, 0.0565, 0.0795, 0.1736]) -Greedy action tensor([ 1.6380, -0.1441, -0.6386, 0.5852]) tensor([0.6173, 0.1039, 0.0634, 0.2154]) -Greedy action tensor([ 2.4606, -1.1821, -0.4495, 0.3856]) tensor([0.8290, 0.0217, 0.0452, 0.1041]) -Greedy action tensor([ 2.2612, -1.3779, -0.6930, 0.6232]) tensor([0.7857, 0.0206, 0.0410, 0.1527]) -Greedy action tensor([ 2.4665, -1.0592, -0.4621, 0.4404]) tensor([0.8232, 0.0242, 0.0440, 0.1085]) -Greedy action tensor([ 1.2797, -0.6379, -0.0231, 0.5796]) tensor([0.5221, 0.0767, 0.1419, 0.2592]) -Greedy action tensor([ 1.6426, -0.8585, -0.0159, 0.0978]) tensor([0.6731, 0.0552, 0.1282, 0.1436]) -Greedy action tensor([ 1.6516, -0.1803, 0.0376, 0.3356]) tensor([0.6145, 0.0984, 0.1223, 0.1648]) -Greedy action tensor([ 1.2969, -0.6087, -0.4845, 1.3463]) tensor([0.4223, 0.0628, 0.0711, 0.4437]) -Greedy action tensor([ 1.0796, 0.1157, -0.0343, 0.0644]) tensor([0.4826, 0.1841, 0.1584, 0.1749]) -Greedy action tensor([ 1.1654, -0.4042, -0.4085, 0.4592]) tensor([0.5239, 0.1090, 0.1086, 0.2586]) -Greedy action tensor([ 1.3691, -0.0828, -0.5912, 0.3029]) tensor([0.5817, 0.1362, 0.0819, 0.2003]) -Greedy action tensor([ 1.6197, -0.5915, -0.3674, 0.5385]) tensor([0.6306, 0.0691, 0.0864, 0.2139]) -Greedy action tensor([ 1.1703, -0.5116, 0.2254, 0.3734]) tensor([0.4937, 0.0918, 0.1919, 0.2225]) -Greedy action tensor([ 0.1960, -0.2655, -0.2988, 0.0381]) tensor([0.3232, 0.2037, 0.1970, 0.2760]) -Greedy action tensor([ 1.0580, -0.0781, -0.1358, 0.3339]) tensor([0.4742, 0.1522, 0.1437, 0.2299]) -Greedy action tensor([ 1.5094, -0.0961, -0.4490, 0.1801]) tensor([0.6225, 0.1250, 0.0878, 0.1647]) -Greedy action tensor([-0.4959, -1.4330, 1.2283, -0.6159]) tensor([0.1268, 0.0497, 0.7111, 0.1125]) -Greedy action tensor([-0.5215, -0.8844, 1.1448, -1.0654]) tensor([0.1321, 0.0919, 0.6993, 0.0767]) -Greedy action tensor([ 0.5271, -0.0770, 0.8000, -0.2712]) tensor([0.3021, 0.1651, 0.3968, 0.1360]) -Greedy action tensor([-0.1773, -0.3775, 1.5082, -0.9070]) tensor([0.1299, 0.1064, 0.7011, 0.0626]) -Greedy action tensor([0.4365, 0.0418, 0.5704, 0.9270]) tensor([0.2247, 0.1514, 0.2569, 0.3670]) -Greedy action tensor([ 0.0327, -0.9592, -0.6580, 0.7698]) tensor([0.2524, 0.0936, 0.1265, 0.5275]) -Greedy action tensor([ 0.6932, -0.8773, 0.1922, 0.0684]) tensor([0.4257, 0.0885, 0.2579, 0.2279]) -Greedy action tensor([ 0.3214, 0.8756, -0.1607, 0.5732]) tensor([0.2153, 0.3748, 0.1330, 0.2770]) -Greedy action tensor([0.4036, 0.9130, 0.7825, 1.2018]) tensor([0.1576, 0.2622, 0.2301, 0.3500]) -Greedy action tensor([ 1.7028, -0.3389, -0.7181, 0.2644]) tensor([0.6868, 0.0892, 0.0610, 0.1630]) -Greedy action tensor([-0.0676, -2.2113, 0.0789, 0.6758]) tensor([0.2284, 0.0268, 0.2644, 0.4804]) -Greedy action tensor([1.2362, 0.9257, 0.4208, 0.2508]) tensor([0.3923, 0.2876, 0.1736, 0.1465]) -Greedy action tensor([0.2340, 1.4482, 0.9633, 0.1740]) tensor([0.1354, 0.4561, 0.2809, 0.1276]) -Greedy action tensor([ 0.6174, -0.7789, 0.1345, 0.3848]) tensor([0.3764, 0.0932, 0.2322, 0.2983]) -Greedy action tensor([ 0.3163, -2.1000, -0.1733, 1.4120]) tensor([0.2131, 0.0190, 0.1306, 0.6373]) -Greedy action tensor([-0.3393, 0.6352, 0.0839, 0.0737]) tensor([0.1495, 0.3962, 0.2283, 0.2260]) -Greedy action tensor([-0.2567, -0.4534, 0.1579, 0.1952]) tensor([0.2038, 0.1674, 0.3085, 0.3203]) -Greedy action tensor([ 1.2471, -1.8838, 1.4501, 0.9604]) tensor([0.3312, 0.0145, 0.4057, 0.2486]) -Greedy action tensor([ 0.1963, 0.3201, 0.0061, -0.3903]) tensor([0.2845, 0.3220, 0.2352, 0.1582]) -Greedy action tensor([ 0.5678, -0.9990, -0.3315, 1.1388]) tensor([0.2954, 0.0616, 0.1202, 0.5228]) -Greedy action tensor([-0.1373, -0.8840, -0.5527, 0.2485]) tensor([0.2774, 0.1315, 0.1831, 0.4080]) -Greedy action tensor([-0.2362, 0.4827, 1.7446, -0.8513]) tensor([0.0922, 0.1893, 0.6686, 0.0499]) -Greedy action tensor([ 0.8611, -0.8522, 1.6272, -0.0880]) tensor([0.2689, 0.0485, 0.5785, 0.1041]) -Greedy action tensor([-0.8967, 0.0373, 0.8353, -1.1744]) tensor([0.1005, 0.2556, 0.5678, 0.0761]) -Greedy action tensor([-0.5934, -0.2652, 0.4140, -0.4628]) tensor([0.1596, 0.2216, 0.4370, 0.1818]) -Greedy action tensor([ 0.7840, 0.3368, 2.9406, -0.9048]) tensor([0.0955, 0.0611, 0.8257, 0.0177]) -Greedy action tensor([ 0.1319, -0.1860, 0.0532, 0.0743]) tensor([0.2781, 0.2024, 0.2570, 0.2625]) -Greedy action tensor([ 0.8814, -0.5410, 0.6898, 0.6787]) tensor([0.3468, 0.0836, 0.2863, 0.2832]) -Greedy action tensor([-0.6533, -1.4118, -0.2082, -0.0588]) tensor([0.2066, 0.0967, 0.3224, 0.3743]) -Greedy action tensor([ 1.0998, -0.3023, 0.7185, 0.5154]) tensor([0.4022, 0.0990, 0.2747, 0.2242]) -Greedy action tensor([ 1.0139, -1.3211, -0.1501, 1.3866]) tensor([0.3496, 0.0338, 0.1091, 0.5075]) -Greedy action tensor([-0.9826, 0.0651, 1.4885, -0.8641]) tensor([0.0595, 0.1696, 0.7040, 0.0670]) -Greedy action tensor([ 0.0277, -0.2253, -0.2838, 0.1798]) tensor([0.2722, 0.2114, 0.1994, 0.3170]) -Greedy action tensor([-0.1524, -0.3213, 0.0962, 0.1201]) tensor([0.2252, 0.1902, 0.2888, 0.2958]) -Greedy action tensor([ 0.2377, 0.7399, 0.2832, -0.0399]) tensor([0.2244, 0.3708, 0.2348, 0.1700]) -Greedy action tensor([ 0.8395, 0.4961, -0.5070, -0.4566]) tensor([0.4458, 0.3162, 0.1160, 0.1220]) -Greedy action tensor([ 0.7963, -1.4572, 0.7491, 0.2713]) tensor([0.3773, 0.0396, 0.3599, 0.2232]) -Greedy action tensor([ 1.2088, -0.2616, 0.1877, -0.0701]) tensor([0.5352, 0.1230, 0.1928, 0.1490]) -Greedy action tensor([-0.3662, -1.1900, -0.5857, -0.1536]) tensor([0.2875, 0.1261, 0.2308, 0.3556]) -Greedy action tensor([ 0.5007, -0.7962, 2.1379, 0.7497]) tensor([0.1299, 0.0355, 0.6679, 0.1667]) -Greedy action tensor([0.3329, 0.3269, 0.5628, 0.1042]) tensor([0.2470, 0.2456, 0.3109, 0.1965]) -Greedy action tensor([-0.1789, -1.6168, 0.3597, -0.5356]) tensor([0.2739, 0.0650, 0.4693, 0.1917]) -Greedy action tensor([ 0.5873, -1.4637, -0.1089, 0.8775]) tensor([0.3374, 0.0434, 0.1682, 0.4510]) -Greedy action tensor([1.4638, 0.6386, 1.0985, 1.7497]) tensor([0.2888, 0.1265, 0.2004, 0.3843]) -Greedy action tensor([ 0.7397, -1.6976, 0.5203, 0.5804]) tensor([0.3646, 0.0319, 0.2927, 0.3109]) -Greedy action tensor([ 0.5267, -1.3750, 0.8211, -0.3652]) tensor([0.3446, 0.0515, 0.4626, 0.1413]) -Greedy action tensor([-0.9699, -0.7846, -0.3878, 0.5933]) tensor([0.1141, 0.1373, 0.2041, 0.5445]) -Greedy action tensor([-0.3797, 0.4236, -0.9125, -1.4852]) tensor([0.2409, 0.5379, 0.1414, 0.0798]) -Greedy action tensor([ 0.3507, -0.7310, 0.8906, 0.1764]) tensor([0.2568, 0.0870, 0.4405, 0.2157]) -Greedy action tensor([ 0.5232, -1.5839, -0.3767, 0.0443]) tensor([0.4656, 0.0566, 0.1893, 0.2884]) -Greedy action tensor([ 1.4600, -0.4989, 0.1111, 0.3254]) tensor([0.5807, 0.0819, 0.1507, 0.1867]) -Greedy action tensor([ 0.1929, -0.1727, 0.4186, -0.8193]) tensor([0.3021, 0.2096, 0.3786, 0.1098]) -Greedy action tensor([ 1.6484, -0.0102, 0.5433, 0.8133]) tensor([0.5114, 0.0974, 0.1694, 0.2219]) -Greedy action tensor([-0.7928, -0.3402, 0.4149, -1.0484]) tensor([0.1494, 0.2349, 0.4999, 0.1157]) -Greedy action tensor([-0.9257, -0.5794, 1.4910, -0.0728]) tensor([0.0626, 0.0885, 0.7019, 0.1469]) -Greedy action tensor([1.8793, 0.1287, 1.1526, 0.8931]) tensor([0.4926, 0.0855, 0.2381, 0.1837]) -Greedy action tensor([-0.3860, 0.3723, 0.2911, -0.4089]) tensor([0.1645, 0.3511, 0.3237, 0.1608]) -Greedy action tensor([-0.9765, 0.7602, 0.2651, -0.1859]) tensor([0.0810, 0.4600, 0.2804, 0.1786]) -Greedy action tensor([-0.1071, -1.9920, 0.3140, 0.7086]) tensor([0.2026, 0.0308, 0.3087, 0.4580]) -Greedy action tensor([ 0.2543, -0.3067, 0.2759, 0.6328]) tensor([0.2468, 0.1408, 0.2521, 0.3603]) -Greedy action tensor([-0.2571, -0.8008, 0.1876, 0.5495]) tensor([0.1858, 0.1079, 0.2899, 0.4163]) -Greedy action tensor([-1.1264, -0.9107, 1.4096, -0.5828]) tensor([0.0603, 0.0748, 0.7612, 0.1038]) -Greedy action tensor([0.7304, 0.5541, 0.0023, 0.3646]) tensor([0.3317, 0.2781, 0.1602, 0.2301]) -Greedy action tensor([ 0.9901, -1.1467, 0.2050, 0.4784]) tensor([0.4601, 0.0543, 0.2098, 0.2758]) -Greedy action tensor([-0.0021, -0.2192, 0.6318, 1.2081]) tensor([0.1420, 0.1143, 0.2676, 0.4762]) -Greedy action tensor([ 0.9542, -2.8061, 0.1880, 0.9065]) tensor([0.4096, 0.0095, 0.1904, 0.3905]) -Greedy action tensor([-0.3895, -1.7316, 0.2868, 0.4405]) tensor([0.1811, 0.0473, 0.3562, 0.4154]) -Greedy action tensor([-0.4491, -0.0030, 1.5145, 0.1696]) tensor([0.0866, 0.1353, 0.6172, 0.1608]) -Greedy action tensor([ 0.5579, 0.1567, -0.4546, -0.2380]) tensor([0.4026, 0.2695, 0.1463, 0.1816]) -Greedy action tensor([ 0.5392, 0.0551, 1.6214, -0.2306]) tensor([0.1988, 0.1225, 0.5866, 0.0921]) -Greedy action tensor([ 1.3108, -0.9960, 1.8710, -0.5030]) tensor([0.3318, 0.0330, 0.5810, 0.0541]) -Greedy action tensor([ 1.2967, -0.0844, 0.3985, 0.8306]) tensor([0.4374, 0.1099, 0.1782, 0.2745]) -Greedy action tensor([ 0.1615, -0.1431, -0.2174, 0.1914]) tensor([0.2897, 0.2136, 0.1983, 0.2985]) -Greedy action tensor([-0.9558, 1.0811, 0.7963, -0.0864]) tensor([0.0595, 0.4558, 0.3429, 0.1418]) -Greedy action tensor([ 1.3397, -1.4584, 0.3738, 1.0613]) tensor([0.4548, 0.0277, 0.1731, 0.3443]) -Greedy action tensor([-1.2047, 0.1063, -0.5566, 1.0268]) tensor([0.0628, 0.2328, 0.1200, 0.5845]) -Greedy action tensor([ 0.4373, -0.4413, -0.7935, -0.0482]) tensor([0.4305, 0.1788, 0.1257, 0.2649]) -Greedy action tensor([ 0.2153, -0.8052, -0.1959, 0.1406]) tensor([0.3388, 0.1221, 0.2246, 0.3144]) -Greedy action tensor([ 0.1301, -1.5688, -0.7294, 0.3816]) tensor([0.3458, 0.0632, 0.1464, 0.4446]) -Greedy action tensor([ 0.8771, 0.1343, 1.9044, -0.7182]) tensor([0.2236, 0.1064, 0.6246, 0.0454]) -Greedy action tensor([ 0.6305, 0.4004, -0.9930, 0.7604]) tensor([0.3194, 0.2538, 0.0630, 0.3638]) -Greedy action tensor([ 0.6788, 1.1867, -0.4620, 0.1797]) tensor([0.2787, 0.4631, 0.0891, 0.1692]) -Greedy action tensor([ 0.6868, -0.2544, -0.0254, -0.1201]) tensor([0.4297, 0.1677, 0.2108, 0.1918]) -Greedy action tensor([ 0.2164, 0.2215, 0.2371, -0.3018]) tensor([0.2761, 0.2775, 0.2819, 0.1645]) -Greedy action tensor([ 0.5242, -0.2409, 0.0746, -0.1164]) tensor([0.3802, 0.1769, 0.2425, 0.2004]) -Greedy action tensor([ 0.4460, -0.7244, -0.1337, -0.3464]) tensor([0.4304, 0.1336, 0.2411, 0.1949]) -Greedy action tensor([ 0.7408, -0.7068, 0.1326, -0.5764]) tensor([0.4884, 0.1148, 0.2659, 0.1309]) -Greedy action tensor([ 0.8030, -0.2606, -0.0162, -0.2616]) tensor([0.4693, 0.1620, 0.2069, 0.1618]) -Greedy action tensor([ 0.8423, -0.4840, -0.1006, -0.7244]) tensor([0.5366, 0.1424, 0.2090, 0.1120]) -Greedy action tensor([ 0.2321, -0.1004, -0.0803, -0.1040]) tensor([0.3161, 0.2267, 0.2313, 0.2259]) -Greedy action tensor([ 0.8469, -0.7034, 0.0849, -0.5441]) tensor([0.5188, 0.1101, 0.2421, 0.1291]) -Greedy action tensor([ 0.9677, -0.5577, -0.0116, -0.6651]) tensor([0.5591, 0.1216, 0.2100, 0.1092]) -Greedy action tensor([ 0.7972, -0.9604, 0.0722, -0.4058]) tensor([0.5110, 0.0881, 0.2475, 0.1534]) -Greedy action tensor([ 0.6075, -0.4198, 0.2915, -0.4655]) tensor([0.4117, 0.1474, 0.3001, 0.1408]) -Greedy action tensor([ 0.8587, -0.3927, -0.0673, -0.3609]) tensor([0.5057, 0.1447, 0.2003, 0.1493]) -Greedy action tensor([ 0.7219, -0.5736, 0.0055, -0.2110]) tensor([0.4639, 0.1270, 0.2266, 0.1825]) -Greedy action tensor([ 0.9557, -0.9062, 0.1444, -0.6997]) tensor([0.5585, 0.0868, 0.2481, 0.1067]) -Greedy action tensor([ 0.4228, -0.1261, -0.0996, -0.1802]) tensor([0.3679, 0.2125, 0.2182, 0.2013]) -Greedy action tensor([ 0.6467, -0.7020, 0.1371, -0.2966]) tensor([0.4445, 0.1154, 0.2670, 0.1731]) -Greedy action tensor([ 0.6165, -0.3767, 0.0115, -0.4937]) tensor([0.4452, 0.1649, 0.2431, 0.1467]) -Greedy action tensor([ 0.0677, -0.2135, 0.0288, -0.1568]) tensor([0.2844, 0.2147, 0.2736, 0.2273]) -Greedy action tensor([ 0.6112, -0.3341, -0.0561, -0.5243]) tensor([0.4499, 0.1748, 0.2308, 0.1445]) -Greedy action tensor([ 0.7621, -0.3171, -0.1052, -0.3755]) tensor([0.4807, 0.1633, 0.2019, 0.1541]) -Greedy action tensor([ 0.3731, -0.6056, -0.1957, -0.2262]) tensor([0.4014, 0.1509, 0.2273, 0.2205]) -Greedy action tensor([ 0.4730, 0.0369, -0.0567, 0.1083]) tensor([0.3413, 0.2207, 0.2010, 0.2370]) -Greedy action tensor([ 0.4216, -0.2416, -0.0528, -0.1703]) tensor([0.3716, 0.1915, 0.2313, 0.2056]) -Greedy action tensor([ 0.5705, -0.4037, -0.0063, -0.4168]) tensor([0.4326, 0.1633, 0.2430, 0.1612]) -Greedy action tensor([ 0.7461, -0.3781, 0.0689, -0.4037]) tensor([0.4652, 0.1512, 0.2363, 0.1473]) -Greedy action tensor([ 0.5091, -0.0364, -0.1625, -0.0534]) tensor([0.3759, 0.2178, 0.1920, 0.2142]) -Greedy action tensor([ 0.2732, -0.0254, 0.1772, -0.1834]) tensor([0.3045, 0.2259, 0.2767, 0.1929]) -Greedy action tensor([ 0.7678, 0.0025, 0.0511, -0.6434]) tensor([0.4551, 0.2117, 0.2222, 0.1110]) -Greedy action tensor([ 1.0693, -0.7329, 0.1373, -0.3521]) tensor([0.5556, 0.0916, 0.2187, 0.1341]) -Greedy action tensor([ 0.8543, -0.6406, -0.0953, -0.4815]) tensor([0.5336, 0.1197, 0.2064, 0.1403]) -Greedy action tensor([ 0.7184, -0.3662, -0.0664, -0.3248]) tensor([0.4659, 0.1575, 0.2125, 0.1641]) -Greedy action tensor([ 0.4393, -0.2448, -0.0446, -0.2649]) tensor([0.3823, 0.1929, 0.2357, 0.1891]) -Greedy action tensor([ 0.7396, -0.6317, -0.0482, -0.2815]) tensor([0.4834, 0.1227, 0.2199, 0.1741]) -Greedy action tensor([ 0.4602, -0.0482, 0.2825, -0.5838]) tensor([0.3583, 0.2155, 0.3000, 0.1262]) -Greedy action tensor([ 0.8880, -0.8863, 0.0043, -0.4083]) tensor([0.5387, 0.0914, 0.2226, 0.1474]) -Greedy action tensor([ 0.9984, -1.1891, 0.0284, -0.7274]) tensor([0.5990, 0.0672, 0.2271, 0.1066]) -Greedy action tensor([ 0.8385, -0.7489, -0.1765, -0.4287]) tensor([0.5410, 0.1106, 0.1961, 0.1523]) -Greedy action tensor([ 0.7500, -0.3045, -0.0524, -0.3886]) tensor([0.4724, 0.1646, 0.2118, 0.1513]) -Greedy action tensor([ 0.7961, -0.5017, -0.0424, -0.3520]) tensor([0.4944, 0.1350, 0.2138, 0.1568]) -Greedy action tensor([ 0.6016, -0.1717, 0.0064, -0.0011]) tensor([0.3906, 0.1803, 0.2154, 0.2138]) -Greedy action tensor([ 0.9160, -0.6485, -0.0897, -0.1808]) tensor([0.5238, 0.1096, 0.1916, 0.1749]) -Greedy action tensor([ 0.4060, -0.2067, 0.0513, -0.2477]) tensor([0.3619, 0.1961, 0.2538, 0.1882]) -Greedy action tensor([ 0.2186, 0.1608, -0.0695, -0.1145]) tensor([0.2932, 0.2768, 0.2198, 0.2102]) -Greedy action tensor([ 0.9113, -0.5576, 0.0979, -0.9096]) tensor([0.5448, 0.1254, 0.2416, 0.0882]) -Greedy action tensor([ 0.2109, -0.0473, -0.2188, -0.5117]) tensor([0.3438, 0.2656, 0.2237, 0.1669]) -Greedy action tensor([ 0.9029, -0.7951, 0.0316, -0.6334]) tensor([0.5505, 0.1008, 0.2303, 0.1185]) -Greedy action tensor([ 0.9039, -0.5179, -0.0767, -0.7282]) tensor([0.5519, 0.1332, 0.2070, 0.1079]) -Greedy action tensor([ 0.5613, -0.2125, -0.0461, -0.3000]) tensor([0.4118, 0.1899, 0.2243, 0.1740]) -Greedy action tensor([ 1.1356, -0.9341, 0.1973, -0.4882]) tensor([0.5832, 0.0736, 0.2282, 0.1150]) -Greedy action tensor([ 0.6782, -0.4952, -0.0848, -0.3190]) tensor([0.4663, 0.1442, 0.2174, 0.1720]) -Greedy action tensor([ 0.2723, -0.1407, 0.0295, -0.3344]) tensor([0.3343, 0.2212, 0.2622, 0.1822]) -Greedy action tensor([ 0.5658, -0.0213, -0.0881, -0.2982]) tensor([0.4004, 0.2226, 0.2082, 0.1688]) -Greedy action tensor([ 0.8883, -0.2357, 0.0771, -0.2846]) tensor([0.4810, 0.1563, 0.2137, 0.1489]) -Greedy action tensor([ 0.8350, -0.5270, -0.0060, -0.3611]) tensor([0.5026, 0.1287, 0.2167, 0.1520]) -Greedy action tensor([ 0.5133, -0.1567, -0.1160, -0.1543]) tensor([0.3910, 0.2001, 0.2084, 0.2006]) -Greedy action tensor([ 0.6824, -0.4185, -0.1369, -0.2343]) tensor([0.4602, 0.1530, 0.2028, 0.1840]) -Greedy action tensor([ 0.5626, -0.0617, -0.0814, -0.2034]) tensor([0.3959, 0.2121, 0.2079, 0.1841]) -Greedy action tensor([ 0.6949, -0.3602, -0.1660, -0.2159]) tensor([0.4602, 0.1602, 0.1945, 0.1851]) -Greedy action tensor([ 0.3980, -0.5449, -0.0884, -0.4166]) tensor([0.4086, 0.1592, 0.2513, 0.1809]) -Greedy action tensor([ 0.7991, -0.6624, -0.0514, -0.2565]) tensor([0.4982, 0.1155, 0.2128, 0.1734]) -Greedy action tensor([ 0.7780, -0.4845, -0.2304, -0.1604]) tensor([0.4905, 0.1388, 0.1789, 0.1919]) -Greedy action tensor([ 0.8003, -0.6450, -0.1155, -0.0992]) tensor([0.4896, 0.1154, 0.1959, 0.1991]) -Greedy action tensor([ 0.7081, -0.4768, -0.0412, -0.2940]) tensor([0.4661, 0.1425, 0.2203, 0.1711]) -Greedy action tensor([ 0.4419, -0.2599, -0.1097, -0.3254]) tensor([0.3943, 0.1955, 0.2271, 0.1831]) -Greedy action tensor([ 0.3509, 0.4028, -0.0966, -0.0175]) tensor([0.2955, 0.3112, 0.1889, 0.2044]) -Greedy action tensor([ 0.1988, 0.1694, -0.0456, -0.1768]) tensor([0.2906, 0.2822, 0.2276, 0.1996]) -Greedy action tensor([ 0.4390, 0.1457, -0.0844, -0.3561]) tensor([0.3585, 0.2673, 0.2124, 0.1619]) -Greedy action tensor([ 0.5094, -0.4547, 0.0823, -0.4580]) tensor([0.4143, 0.1580, 0.2703, 0.1575]) -Greedy action tensor([ 0.9987, -0.7343, -0.1432, -0.5747]) tensor([0.5871, 0.1038, 0.1874, 0.1217]) -Greedy action tensor([ 1.1895, -0.4238, 0.0118, -0.6408]) tensor([0.5997, 0.1195, 0.1847, 0.0962]) -Greedy action tensor([ 0.8516, -0.4944, -0.0264, -0.1685]) tensor([0.4910, 0.1278, 0.2041, 0.1771]) -Greedy action tensor([ 0.6700, -0.0616, -0.0029, -0.3446]) tensor([0.4248, 0.2044, 0.2168, 0.1540]) -Greedy action tensor([ 1.0863, -0.6880, 0.0702, -0.4783]) tensor([0.5744, 0.0974, 0.2080, 0.1202]) -Greedy action tensor([ 0.5912, 0.2806, -0.1670, -0.0784]) tensor([0.3685, 0.2701, 0.1727, 0.1887]) -Greedy action tensor([ 0.2559, -0.2863, -0.0510, -0.0735]) tensor([0.3293, 0.1915, 0.2423, 0.2369]) -Greedy action tensor([ 0.7284, -0.4838, -0.0017, -0.4990]) tensor([0.4825, 0.1436, 0.2325, 0.1414]) -Greedy action tensor([ 0.8579, -0.3713, -0.0649, -0.4483]) tensor([0.5100, 0.1492, 0.2027, 0.1381]) -Greedy action tensor([ 0.4219, 0.3677, -0.0747, -0.2993]) tensor([0.3287, 0.3114, 0.2001, 0.1598]) -Greedy action tensor([-0.0389, -0.2282, -0.0975, -0.1525]) tensor([0.2730, 0.2259, 0.2574, 0.2437]) -Greedy action tensor([ 0.2759, -0.0575, -0.0082, -0.4830]) tensor([0.3404, 0.2439, 0.2563, 0.1594]) -Greedy action tensor([-1.8407, -0.1774, 0.5900, -0.1108]) tensor([0.0429, 0.2266, 0.4882, 0.2422]) -Greedy action tensor([-1.8961, -0.4170, 0.6464, -0.1424]) tensor([0.0419, 0.1838, 0.5324, 0.2419]) -Greedy action tensor([-1.9121, -0.4279, 0.6565, -0.1489]) tensor([0.0412, 0.1816, 0.5371, 0.2401]) -Greedy action tensor([-1.9453, -0.4518, 0.6657, -0.1813]) tensor([0.0402, 0.1788, 0.5467, 0.2343]) -Greedy action tensor([-1.8458, -0.4290, 0.6182, -0.1312]) tensor([0.0446, 0.1839, 0.5239, 0.2476]) -Greedy action tensor([-1.2430, -0.4723, 0.3828, 0.3207]) tensor([0.0768, 0.1660, 0.3903, 0.3669]) -Greedy action tensor([-1.9180, -0.4427, 0.6499, -0.1674]) tensor([0.0414, 0.1809, 0.5395, 0.2383]) -Greedy action tensor([-1.5352, -0.4626, 0.6642, 0.2813]) tensor([0.0524, 0.1531, 0.4724, 0.3221]) -Greedy action tensor([-1.8009, -0.4670, 0.6087, -0.0155]) tensor([0.0457, 0.1734, 0.5085, 0.2724]) -Greedy action tensor([-0.8263, -0.5405, 0.1822, 0.3334]) tensor([0.1210, 0.1611, 0.3319, 0.3860]) -Greedy action tensor([-1.7226, -0.3618, 0.6464, 0.0511]) tensor([0.0466, 0.1815, 0.4976, 0.2743]) -Greedy action tensor([-1.5029, -0.5089, 0.6908, 0.3247]) tensor([0.0529, 0.1430, 0.4748, 0.3292]) -Greedy action tensor([-1.7990, -0.4687, 0.5920, -0.1300]) tensor([0.0476, 0.1800, 0.5199, 0.2525]) -Greedy action tensor([-1.5857, -0.5376, 0.4643, 0.0588]) tensor([0.0595, 0.1698, 0.4624, 0.3083]) -Greedy action tensor([-1.7679, -0.2796, 0.5890, -0.0510]) tensor([0.0464, 0.2055, 0.4898, 0.2583]) -Greedy action tensor([-0.9762, -0.6003, 0.3793, -0.2484]) tensor([0.1190, 0.1733, 0.4614, 0.2463]) -Greedy action tensor([-1.4241, -0.6658, 0.7456, 0.5873]) tensor([0.0516, 0.1102, 0.4522, 0.3860]) -Greedy action tensor([-1.9211, -0.4513, 0.6558, -0.1683]) tensor([0.0412, 0.1791, 0.5420, 0.2377]) -Greedy action tensor([-1.9091, -0.4393, 0.6464, -0.1637]) tensor([0.0417, 0.1815, 0.5376, 0.2391]) -Greedy action tensor([-1.9047, -0.4479, 0.6438, -0.1595]) tensor([0.0420, 0.1803, 0.5371, 0.2406]) -Greedy action tensor([-1.8259, -0.3326, 0.6144, -0.0769]) tensor([0.0441, 0.1963, 0.5061, 0.2535]) -Greedy action tensor([-1.8704, -0.4407, 0.6550, -0.1179]) tensor([0.0427, 0.1782, 0.5331, 0.2461]) -Greedy action tensor([-1.8101, -0.4788, 0.5955, -0.1115]) tensor([0.0469, 0.1774, 0.5195, 0.2562]) -Greedy action tensor([-1.8133, -0.4734, 0.5898, -0.1621]) tensor([0.0474, 0.1811, 0.5243, 0.2472]) -Greedy action tensor([-0.3769, 0.8183, -0.0139, 0.3669]) tensor([0.1275, 0.4212, 0.1832, 0.2682]) -Greedy action tensor([-1.9047, -0.3648, 0.6408, -0.1551]) tensor([0.0414, 0.1930, 0.5276, 0.2380]) -Greedy action tensor([-1.0913, 0.1150, 0.3528, -0.2290]) tensor([0.0913, 0.3052, 0.3871, 0.2164]) -Greedy action tensor([-1.8917, -0.4120, 0.6430, -0.1493]) tensor([0.0422, 0.1852, 0.5318, 0.2408]) -Greedy action tensor([-1.3928, -0.6045, 0.4068, 0.0558]) tensor([0.0741, 0.1629, 0.4478, 0.3153]) -Greedy action tensor([-1.7236, -0.3173, 0.6568, -0.0417]) tensor([0.0470, 0.1919, 0.5083, 0.2528]) -Greedy action tensor([-1.3322, -0.5328, 0.4940, -0.2471]) tensor([0.0807, 0.1795, 0.5011, 0.2388]) -Greedy action tensor([-1.7875, -0.4292, 0.5854, -0.1390]) tensor([0.0480, 0.1868, 0.5154, 0.2498]) -Greedy action tensor([-1.9204, -0.4048, 0.6491, -0.1693]) tensor([0.0410, 0.1868, 0.5358, 0.2364]) -Greedy action tensor([-1.3769, -0.1247, 0.6306, 0.3258]) tensor([0.0574, 0.2007, 0.4271, 0.3149]) -Greedy action tensor([-1.8791, -0.4079, 0.6321, -0.1516]) tensor([0.0429, 0.1869, 0.5287, 0.2415]) -Greedy action tensor([-1.9124, -0.5252, 0.6982, -0.1387]) tensor([0.0408, 0.1634, 0.5553, 0.2405]) -Greedy action tensor([-1.0554, -0.3487, 0.5245, 0.6126]) tensor([0.0759, 0.1538, 0.3682, 0.4021]) -Greedy action tensor([-1.8766, -0.4383, 0.6290, -0.1468]) tensor([0.0433, 0.1824, 0.5302, 0.2441]) -Greedy action tensor([-1.8706, -0.4590, 0.6369, -0.1411]) tensor([0.0434, 0.1783, 0.5333, 0.2450]) -Greedy action tensor([-1.9021, -0.4301, 0.6638, -0.1504]) tensor([0.0414, 0.1806, 0.5392, 0.2388]) -Greedy action tensor([-1.4469, -0.1459, 0.4252, -0.0623]) tensor([0.0659, 0.2421, 0.4287, 0.2633]) -Greedy action tensor([-0.6108, -0.9869, 1.2448, 0.8150]) tensor([0.0817, 0.0561, 0.5224, 0.3399]) -Greedy action tensor([-1.4702, -0.4713, 0.7163, 0.5600]) tensor([0.0494, 0.1342, 0.4400, 0.3764]) -Greedy action tensor([-1.9045, -0.5367, 0.8641, -0.0559]) tensor([0.0367, 0.1443, 0.5856, 0.2334]) -Greedy action tensor([-1.7449, -0.3954, 0.5592, -0.0660]) tensor([0.0494, 0.1906, 0.4951, 0.2649]) -Greedy action tensor([-1.8708, -0.4508, 0.6990, -0.0788]) tensor([0.0413, 0.1709, 0.5398, 0.2480]) -Greedy action tensor([-1.9385, -0.4375, 0.6622, -0.1760]) tensor([0.0403, 0.1810, 0.5436, 0.2351]) -Greedy action tensor([-1.8961, -0.4353, 0.6386, -0.1576]) tensor([0.0424, 0.1825, 0.5342, 0.2409]) -Greedy action tensor([-1.3307, -0.5058, 0.4262, 0.3026]) tensor([0.0704, 0.1607, 0.4081, 0.3607]) -Greedy action tensor([-1.9020, -0.4427, 0.6420, -0.1593]) tensor([0.0421, 0.1812, 0.5361, 0.2406]) -Greedy action tensor([-1.8125, -0.3584, 0.6539, -0.0501]) tensor([0.0437, 0.1870, 0.5147, 0.2546]) -Greedy action tensor([-1.0774, -0.6463, 0.2735, 0.6111]) tensor([0.0847, 0.1303, 0.3269, 0.4582]) -Greedy action tensor([-1.8194, -0.3279, 0.5996, -0.0527]) tensor([0.0444, 0.1972, 0.4987, 0.2597]) -Greedy action tensor([-1.8539, -0.3518, 0.6402, -0.0638]) tensor([0.0424, 0.1904, 0.5133, 0.2539]) -Greedy action tensor([-1.8438, -0.3851, 0.6475, -0.1167]) tensor([0.0435, 0.1870, 0.5251, 0.2445]) -Greedy action tensor([-1.6825, -0.4724, 0.5391, 0.0155]) tensor([0.0525, 0.1762, 0.4844, 0.2869]) -Greedy action tensor([-0.9128, -0.4375, 0.2262, -0.0916]) tensor([0.1249, 0.2009, 0.3902, 0.2840]) -Greedy action tensor([-1.5903, -0.4877, 0.5192, 0.1126]) tensor([0.0564, 0.1697, 0.4646, 0.3093]) -Greedy action tensor([-1.8707, -0.4161, 0.6445, -0.1217]) tensor([0.0427, 0.1830, 0.5286, 0.2457]) -Greedy action tensor([-1.2361, -0.5078, 1.2236, 1.3051]) tensor([0.0364, 0.0754, 0.4260, 0.4622]) -Greedy action tensor([-1.0005, 0.7678, 0.1084, 0.2204]) tensor([0.0753, 0.4413, 0.2282, 0.2552]) -Greedy action tensor([-1.0727, -0.6224, 0.1795, 0.3243]) tensor([0.0989, 0.1552, 0.3460, 0.3999]) -Greedy action tensor([-1.9236, -0.4497, 0.6588, -0.1670]) tensor([0.0410, 0.1790, 0.5424, 0.2375]) -Greedy action tensor([-1.8195, -0.4515, 0.5986, -0.1144]) tensor([0.0462, 0.1814, 0.5184, 0.2541]) -Greedy action tensor([-1.9323, -0.4567, 0.6768, -0.1630]) tensor([0.0403, 0.1762, 0.5472, 0.2363]) -Greedy action tensor([-2.0013, -0.7645, 0.5167, -0.1770]) tensor([0.0434, 0.1495, 0.5382, 0.2689]) -Greedy action tensor([-1.5494, -0.0815, 0.6879, 0.2496]) tensor([0.0482, 0.2091, 0.4514, 0.2912]) -Greedy action tensor([-0.7090, 0.2838, 0.1622, -0.0874]) tensor([0.1258, 0.3395, 0.3006, 0.2342]) -Greedy action tensor([-1.7235, -0.2139, 0.6159, 0.0381]) tensor([0.0460, 0.2083, 0.4776, 0.2680]) -Greedy action tensor([-1.5002, -0.5499, 0.4245, 0.0895]) tensor([0.0652, 0.1686, 0.4467, 0.3195]) -Greedy action tensor([-1.7605, -0.1123, 0.6643, -0.3790]) tensor([0.0466, 0.2420, 0.5261, 0.1853]) -Greedy action tensor([0.0628, 0.0056, 0.6239, 1.3594]) tensor([0.1360, 0.1284, 0.2383, 0.4973]) -Greedy action tensor([-1.9110, -0.4669, 0.6566, -0.1552]) tensor([0.0416, 0.1761, 0.5417, 0.2406]) -Greedy action tensor([-1.8779, -0.4835, 0.6404, -0.1430]) tensor([0.0433, 0.1745, 0.5369, 0.2453]) -Greedy action tensor([-1.9111, -0.3647, 0.6421, -0.1605]) tensor([0.0412, 0.1932, 0.5287, 0.2369]) -Greedy action tensor([-1.1509, -0.3883, 0.3411, 0.4445]) tensor([0.0799, 0.1712, 0.3551, 0.3938]) -Greedy action tensor([-1.7887, -0.4592, 0.6264, 0.0178]) tensor([0.0453, 0.1713, 0.5073, 0.2760]) -Greedy action tensor([-0.8399, -0.2396, 0.7846, 1.4062]) tensor([0.0576, 0.1051, 0.2926, 0.5447]) -Greedy action tensor([-1.9382, -0.4363, 0.6633, -0.1747]) tensor([0.0403, 0.1810, 0.5436, 0.2351]) -Greedy action tensor([-1.4083, -0.1043, 0.5211, 0.2119]) tensor([0.0602, 0.2216, 0.4142, 0.3040]) -Greedy action tensor([-1.6547, -0.4952, 0.5282, -0.0388]) tensor([0.0553, 0.1762, 0.4904, 0.2781]) -Greedy action tensor([ 0.5477, -0.5426, 0.1513, 0.1562]) tensor([0.3725, 0.1252, 0.2506, 0.2518]) -Greedy action tensor([ 1.3573, -0.6808, -0.0758, 0.3142]) tensor([0.5810, 0.0757, 0.1386, 0.2047]) -Greedy action tensor([ 0.6701, -0.2984, -0.2186, 0.3478]) tensor([0.3975, 0.1509, 0.1635, 0.2880]) -Greedy action tensor([ 1.6369, -0.3730, -0.2113, 0.4350]) tensor([0.6281, 0.0842, 0.0989, 0.1888]) -Greedy action tensor([ 1.3298, -0.3917, -0.4863, 0.3741]) tensor([0.5794, 0.1036, 0.0942, 0.2228]) -Greedy action tensor([ 1.6536, -0.2945, -0.3947, 0.3112]) tensor([0.6524, 0.0930, 0.0841, 0.1704]) -Greedy action tensor([ 0.8567, 0.0674, -0.7824, 0.0195]) tensor([0.4805, 0.2182, 0.0933, 0.2080]) -Greedy action tensor([ 1.1180, 0.1001, -1.3538, 0.0994]) tensor([0.5534, 0.2000, 0.0467, 0.1998]) -Greedy action tensor([ 1.5343, -0.2563, -0.6873, 0.1024]) tensor([0.6604, 0.1102, 0.0716, 0.1577]) -Greedy action tensor([ 1.5146, -0.4625, -0.0688, 0.6889]) tensor([0.5613, 0.0777, 0.1152, 0.2458]) -Greedy action tensor([ 1.1008, 0.0379, -0.5595, 0.3092]) tensor([0.5029, 0.1737, 0.0956, 0.2279]) -Greedy action tensor([ 1.5729, -0.5351, -0.2608, 0.0444]) tensor([0.6675, 0.0811, 0.1067, 0.1447]) -Greedy action tensor([ 1.6974, -0.6184, -0.5643, 0.3939]) tensor([0.6782, 0.0669, 0.0706, 0.1842]) -Greedy action tensor([ 1.1293, -0.5168, -0.3936, 0.3956]) tensor([0.5288, 0.1020, 0.1153, 0.2539]) -Greedy action tensor([ 1.7427, -0.5358, -0.3357, 0.4171]) tensor([0.6697, 0.0686, 0.0838, 0.1779]) -Greedy action tensor([ 1.9122, 0.0471, -0.9126, -0.0577]) tensor([0.7387, 0.1144, 0.0438, 0.1030]) -Greedy action tensor([ 1.1551, -0.5729, -0.1822, -0.0593]) tensor([0.5757, 0.1023, 0.1511, 0.1709]) -Greedy action tensor([ 1.3988, -0.4596, -0.4112, 0.5729]) tensor([0.5690, 0.0887, 0.0931, 0.2492]) -Greedy action tensor([ 1.0651, -0.2361, -0.2800, 0.2577]) tensor([0.5054, 0.1376, 0.1317, 0.2254]) -Greedy action tensor([ 1.3917, -0.6406, -0.3019, 0.4941]) tensor([0.5806, 0.0761, 0.1067, 0.2366]) -Greedy action tensor([ 1.2180, -0.0314, -0.8392, 0.2919]) tensor([0.5523, 0.1583, 0.0706, 0.2188]) -Greedy action tensor([ 1.6205, -0.6844, -0.3432, 0.1864]) tensor([0.6764, 0.0675, 0.0949, 0.1612]) -Greedy action tensor([ 1.0884, -0.5266, -0.1605, 0.1199]) tensor([0.5361, 0.1066, 0.1538, 0.2035]) -Greedy action tensor([ 1.3876, -0.4828, -0.2091, 0.3319]) tensor([0.5867, 0.0904, 0.1188, 0.2041]) -Greedy action tensor([ 1.8689, -0.6153, -0.6276, 0.3585]) tensor([0.7212, 0.0601, 0.0594, 0.1593]) -Greedy action tensor([ 1.9431, -0.7778, 0.0727, 0.6283]) tensor([0.6719, 0.0442, 0.1035, 0.1804]) -Greedy action tensor([ 1.9323, -0.8117, -0.1155, 0.4015]) tensor([0.7094, 0.0456, 0.0915, 0.1535]) -Greedy action tensor([0.9633, 0.0446, 0.0159, 0.0779]) tensor([0.4547, 0.1814, 0.1763, 0.1876]) -Greedy action tensor([ 2.0348, -0.7692, -0.5805, 0.7527]) tensor([0.7086, 0.0429, 0.0518, 0.1966]) -Greedy action tensor([ 2.1511, -0.7442, -0.0564, 0.3339]) tensor([0.7532, 0.0416, 0.0828, 0.1224]) -Greedy action tensor([ 1.5656, -0.4731, -0.5702, 0.3506]) tensor([0.6472, 0.0843, 0.0765, 0.1920]) -Greedy action tensor([ 2.5861, -1.0067, 0.0244, 1.1824]) tensor([0.7405, 0.0204, 0.0571, 0.1819]) -Greedy action tensor([ 1.8285, -0.8127, -0.0794, 0.5479]) tensor([0.6678, 0.0476, 0.0991, 0.1856]) -Greedy action tensor([ 1.8544, -0.5088, -0.4376, 0.8526]) tensor([0.6400, 0.0602, 0.0647, 0.2350]) -Greedy action tensor([ 1.4738, -0.7750, -0.5439, 0.8778]) tensor([0.5588, 0.0590, 0.0743, 0.3079]) -Greedy action tensor([ 1.1582, -0.8989, -0.1223, 0.0761]) tensor([0.5732, 0.0733, 0.1593, 0.1942]) -Greedy action tensor([ 1.2929, -0.2632, -0.4943, 0.2105]) tensor([0.5824, 0.1229, 0.0975, 0.1973]) -Greedy action tensor([ 0.7243, -0.2923, -0.0833, 0.2846]) tensor([0.4078, 0.1476, 0.1819, 0.2627]) -Greedy action tensor([ 0.8504, -0.1518, -0.1355, 0.3378]) tensor([0.4275, 0.1569, 0.1595, 0.2561]) -Greedy action tensor([ 1.1776, -0.1164, -0.5203, 0.6393]) tensor([0.4900, 0.1343, 0.0897, 0.2860]) -Greedy action tensor([ 1.7805, -0.8745, -0.5678, 0.5748]) tensor([0.6824, 0.0480, 0.0652, 0.2044]) -Greedy action tensor([ 1.2808, -0.3758, -0.6171, 0.0883]) tensor([0.6082, 0.1160, 0.0912, 0.1846]) -Greedy action tensor([ 1.8764, -0.7261, -0.3237, 0.4493]) tensor([0.7018, 0.0520, 0.0778, 0.1684]) -Greedy action tensor([ 1.8329, -0.5090, -0.3280, 0.2755]) tensor([0.7032, 0.0676, 0.0810, 0.1482]) -Greedy action tensor([ 0.9448, -0.5883, -0.4040, 0.8238]) tensor([0.4235, 0.0914, 0.1099, 0.3752]) -Greedy action tensor([ 1.6465, -0.3998, -0.4914, 0.3786]) tensor([0.6542, 0.0845, 0.0771, 0.1841]) -Greedy action tensor([ 1.4323, -0.4495, -0.4809, 0.1746]) tensor([0.6312, 0.0961, 0.0932, 0.1795]) -Greedy action tensor([ 0.8992, 0.0348, -0.3469, 0.3477]) tensor([0.4376, 0.1844, 0.1259, 0.2521]) -Greedy action tensor([ 2.1986, -0.8890, -0.1676, 0.4034]) tensor([0.7660, 0.0349, 0.0719, 0.1272]) -Greedy action tensor([ 1.6745, -0.5139, -0.3443, 0.6599]) tensor([0.6221, 0.0697, 0.0826, 0.2255]) -Greedy action tensor([ 0.8128, -0.4216, 0.1579, 0.1369]) tensor([0.4312, 0.1255, 0.2240, 0.2193]) -Greedy action tensor([ 1.2523, -0.9197, -0.3558, 0.4893]) tensor([0.5616, 0.0640, 0.1125, 0.2619]) -Greedy action tensor([ 1.3436, -0.7963, -0.2019, 0.5095]) tensor([0.5665, 0.0667, 0.1208, 0.2460]) -Greedy action tensor([ 1.4140, -0.4298, -0.4844, 0.7281]) tensor([0.5520, 0.0873, 0.0827, 0.2780]) -Greedy action tensor([ 2.3184, -0.6611, -0.3496, 0.2758]) tensor([0.8001, 0.0407, 0.0555, 0.1038]) -Greedy action tensor([ 1.4644, -0.5333, -0.4478, 0.1919]) tensor([0.6396, 0.0868, 0.0945, 0.1792]) -Greedy action tensor([ 1.4652, -0.4392, -0.2426, 0.3037]) tensor([0.6086, 0.0906, 0.1103, 0.1905]) -Greedy action tensor([ 1.0875, 0.0385, -0.7151, 0.3727]) tensor([0.4989, 0.1748, 0.0823, 0.2441]) -Greedy action tensor([ 0.4607, 0.0045, -0.4244, 0.1618]) tensor([0.3587, 0.2273, 0.1480, 0.2660]) -Greedy action tensor([ 1.5150, -0.5678, -0.3701, 0.1886]) tensor([0.6486, 0.0808, 0.0985, 0.1722]) -Greedy action tensor([ 0.9830, -0.1802, -0.2330, 0.6941]) tensor([0.4241, 0.1325, 0.1257, 0.3177]) -Greedy action tensor([ 0.4647, -0.7120, -0.0610, 0.1236]) tensor([0.3831, 0.1181, 0.2265, 0.2724]) -Greedy action tensor([ 1.8425, -0.6253, -0.4026, 0.5953]) tensor([0.6766, 0.0574, 0.0717, 0.1944]) -Greedy action tensor([ 1.5676, -0.7618, 0.0297, 0.5140]) tensor([0.6021, 0.0586, 0.1293, 0.2099]) -Greedy action tensor([ 1.1136, -0.3140, -0.5887, 0.3049]) tensor([0.5354, 0.1284, 0.0976, 0.2385]) -Greedy action tensor([ 1.9528, -0.5493, -0.6491, 0.5956]) tensor([0.7075, 0.0580, 0.0525, 0.1821]) -Greedy action tensor([ 1.8035, -0.7391, -0.0532, 0.2856]) tensor([0.6877, 0.0541, 0.1074, 0.1507]) -Greedy action tensor([ 1.1799, -0.2594, 0.0824, 0.1959]) tensor([0.5142, 0.1219, 0.1716, 0.1922]) -Greedy action tensor([ 1.4399, -0.5236, -0.1416, 0.5116]) tensor([0.5743, 0.0806, 0.1181, 0.2270]) -Greedy action tensor([ 1.4757, 0.2007, -0.0754, 0.2882]) tensor([0.5567, 0.1556, 0.1180, 0.1698]) -Greedy action tensor([ 1.6548, 0.2766, -0.2703, 0.4835]) tensor([0.5855, 0.1476, 0.0854, 0.1815]) -Greedy action tensor([ 1.4035, -0.6156, -0.5910, 1.0875]) tensor([0.5005, 0.0665, 0.0681, 0.3649]) -Greedy action tensor([ 1.5623, -0.4282, -0.2344, 0.4852]) tensor([0.6086, 0.0832, 0.1009, 0.2073]) -Greedy action tensor([ 1.4978, -0.1011, -0.5253, 0.5118]) tensor([0.5857, 0.1184, 0.0775, 0.2185]) -Greedy action tensor([ 1.3934, 0.0510, -0.4735, 0.3399]) tensor([0.5667, 0.1480, 0.0876, 0.1976]) -Greedy action tensor([ 0.6357, -0.1650, -0.1444, 0.2492]) tensor([0.3866, 0.1736, 0.1772, 0.2627]) -Greedy action tensor([ 0.9468, -0.5168, -0.1598, 0.4926]) tensor([0.4552, 0.1053, 0.1505, 0.2890]) -Greedy action tensor([ 1.4683, -0.6138, -0.2725, 0.3299]) tensor([0.6171, 0.0769, 0.1082, 0.1977]) -Greedy action tensor([ 0.8069, -0.2043, -0.1927, 0.2928]) tensor([0.4292, 0.1561, 0.1580, 0.2567]) -Greedy action tensor([ 1.6989, -0.6910, -0.4661, -0.1761]) tensor([0.7354, 0.0674, 0.0844, 0.1128]) -Greedy action tensor([ 1.2870, 0.0505, -0.4741, 0.5908]) tensor([0.5100, 0.1481, 0.0876, 0.2542]) -Greedy action tensor([-1.1760, -2.2210, -0.3634, -0.1410]) tensor([0.1558, 0.0548, 0.3510, 0.4384]) -Greedy action tensor([ 0.3430, -0.2365, 1.2711, -0.1145]) tensor([0.2117, 0.1186, 0.5356, 0.1340]) -Greedy action tensor([-0.1589, -1.7350, 0.0425, 0.8504]) tensor([0.1933, 0.0400, 0.2364, 0.5303]) -Greedy action tensor([ 0.6695, -0.1152, 0.4430, 0.8891]) tensor([0.2858, 0.1304, 0.2279, 0.3560]) -Greedy action tensor([-0.2577, -1.1182, -0.9796, 0.1737]) tensor([0.2900, 0.1227, 0.1409, 0.4464]) -Greedy action tensor([-0.4467, 0.4251, -0.1434, 0.8185]) tensor([0.1206, 0.2885, 0.1634, 0.4275]) -Greedy action tensor([ 0.4268, -1.0727, -0.5449, 0.6214]) tensor([0.3550, 0.0793, 0.1344, 0.4313]) -Greedy action tensor([-0.6328, -1.8692, -0.3397, 0.6491]) tensor([0.1604, 0.0466, 0.2150, 0.5780]) -Greedy action tensor([-0.0602, -0.0353, 0.8051, -0.5650]) tensor([0.1998, 0.2049, 0.4747, 0.1206]) -Greedy action tensor([ 0.9476, -0.5533, 0.6006, -0.3816]) tensor([0.4557, 0.1016, 0.3221, 0.1206]) -Greedy action tensor([ 0.5601, -1.6331, 0.3941, 0.3517]) tensor([0.3609, 0.0403, 0.3057, 0.2931]) -Greedy action tensor([-1.5340, -0.0622, 0.9569, -0.6000]) tensor([0.0501, 0.2181, 0.6044, 0.1274]) -Greedy action tensor([-0.4454, -1.2334, -0.8784, -0.1615]) tensor([0.2914, 0.1325, 0.1890, 0.3871]) -Greedy action tensor([ 0.8779, 0.4237, 0.8389, -0.6239]) tensor([0.3547, 0.2252, 0.3411, 0.0790]) -Greedy action tensor([ 0.7461, -0.3050, -0.9000, 1.6430]) tensor([0.2503, 0.0875, 0.0483, 0.6139]) -Greedy action tensor([ 0.4959, -1.4751, 0.6637, 0.6571]) tensor([0.2860, 0.0398, 0.3382, 0.3360]) -Greedy action tensor([ 0.9978, -0.5352, 1.0175, 1.6602]) tensor([0.2395, 0.0517, 0.2443, 0.4645]) -Greedy action tensor([ 1.1399, -0.5748, -0.0770, 0.8857]) tensor([0.4441, 0.0799, 0.1315, 0.3444]) -Greedy action tensor([ 0.7653, -0.4421, 0.7624, -0.4327]) tensor([0.3849, 0.1151, 0.3838, 0.1162]) -Greedy action tensor([ 0.3239, -0.5132, 0.6227, 1.7928]) tensor([0.1403, 0.0608, 0.1892, 0.6097]) -Greedy action tensor([ 0.7554, -0.9920, 0.2689, -0.3227]) tensor([0.4697, 0.0818, 0.2887, 0.1598]) -Greedy action tensor([-1.3188, -1.2386, 1.7965, -1.1385]) tensor([0.0387, 0.0420, 0.8729, 0.0464]) -Greedy action tensor([ 0.3683, 0.2175, -0.2524, 0.3556]) tensor([0.2954, 0.2541, 0.1588, 0.2917]) -Greedy action tensor([ 1.0082, -0.8398, 0.2080, 0.1170]) tensor([0.4958, 0.0781, 0.2227, 0.2034]) -Greedy action tensor([-0.8047, -0.3090, 1.3411, -0.7528]) tensor([0.0817, 0.1341, 0.6982, 0.0860]) -Greedy action tensor([ 0.2831, 0.2873, 0.7445, -0.3917]) tensor([0.2439, 0.2449, 0.3869, 0.1242]) -Greedy action tensor([1.5441, 0.1968, 1.6328, 0.9748]) tensor([0.3426, 0.0891, 0.3744, 0.1939]) -Greedy action tensor([-0.0941, 0.8508, 0.1254, -0.6420]) tensor([0.1853, 0.4767, 0.2308, 0.1071]) -Greedy action tensor([-0.1942, 0.2189, -0.4902, 0.3175]) tensor([0.2031, 0.3070, 0.1511, 0.3388]) -Greedy action tensor([-0.5330, -1.2624, -0.7327, 0.6113]) tensor([0.1838, 0.0886, 0.1505, 0.5771]) -Greedy action tensor([ 0.6954, -0.9266, 0.7981, 0.7847]) tensor([0.2942, 0.0581, 0.3260, 0.3217]) -Greedy action tensor([ 0.9604, 0.5988, -0.0465, -0.0194]) tensor([0.4103, 0.2858, 0.1499, 0.1540]) -Greedy action tensor([ 0.9083, -0.2369, 0.4455, -0.0762]) tensor([0.4308, 0.1371, 0.2712, 0.1610]) -Greedy action tensor([ 0.5913, -1.5590, -0.3225, 0.5508]) tensor([0.4036, 0.0470, 0.1618, 0.3876]) -Greedy action tensor([-0.7617, -1.8002, 0.0403, 1.3251]) tensor([0.0859, 0.0304, 0.1915, 0.6922]) -Greedy action tensor([ 0.2642, -0.1973, -0.2253, 0.9402]) tensor([0.2376, 0.1497, 0.1456, 0.4671]) -Greedy action tensor([ 2.0395, -0.2738, -0.3286, 1.7178]) tensor([0.5215, 0.0516, 0.0488, 0.3781]) -Greedy action tensor([ 0.4631, -0.2802, -0.2046, 0.9752]) tensor([0.2734, 0.1300, 0.1402, 0.4563]) -Greedy action tensor([-0.8533, -1.9970, 0.4711, 0.3145]) tensor([0.1206, 0.0384, 0.4534, 0.3876]) -Greedy action tensor([ 0.0801, -0.6953, 0.1985, 0.4464]) tensor([0.2482, 0.1143, 0.2794, 0.3580]) -Greedy action tensor([-1.1767, -0.2999, -0.7646, -1.0669]) tensor([0.1659, 0.3986, 0.2504, 0.1851]) -Greedy action tensor([-0.1873, -2.2096, -0.0318, 0.2092]) tensor([0.2641, 0.0349, 0.3085, 0.3925]) -Greedy action tensor([ 0.3227, -1.5582, -0.9265, 1.8464]) tensor([0.1659, 0.0253, 0.0476, 0.7613]) -Greedy action tensor([-0.2752, 0.0866, -1.2223, 0.0706]) tensor([0.2360, 0.3389, 0.0916, 0.3335]) -Greedy action tensor([ 0.6182, -0.6379, 0.1362, 0.4542]) tensor([0.3635, 0.1035, 0.2245, 0.3085]) -Greedy action tensor([-0.2494, -0.4037, -0.4575, 0.7876]) tensor([0.1821, 0.1561, 0.1479, 0.5138]) -Greedy action tensor([ 0.5085, -0.0181, 0.9375, 0.6319]) tensor([0.2349, 0.1387, 0.3607, 0.2657]) -Greedy action tensor([-0.3766, 0.9721, 0.6649, -0.4625]) tensor([0.1162, 0.4478, 0.3293, 0.1067]) -Greedy action tensor([-0.0183, -1.0597, 0.2749, 0.1164]) tensor([0.2606, 0.0920, 0.3493, 0.2981]) -Greedy action tensor([-1.0144, -1.2767, 0.1511, -0.1418]) tensor([0.1357, 0.1044, 0.4352, 0.3247]) -Greedy action tensor([-0.0825, -1.3711, 0.2020, -0.4060]) tensor([0.3004, 0.0828, 0.3993, 0.2174]) -Greedy action tensor([ 0.4872, -0.6086, 1.2628, 1.0708]) tensor([0.1887, 0.0631, 0.4099, 0.3383]) -Greedy action tensor([ 0.7752, -0.8286, -0.6701, -0.4643]) tensor([0.5793, 0.1165, 0.1365, 0.1677]) -Greedy action tensor([-0.5081, -0.5969, 0.9551, -0.7200]) tensor([0.1420, 0.1299, 0.6133, 0.1149]) -Greedy action tensor([ 0.3080, 0.2310, -0.1261, -1.2634]) tensor([0.3595, 0.3329, 0.2329, 0.0747]) -Greedy action tensor([ 0.5089, -1.8989, 0.1024, 0.5799]) tensor([0.3534, 0.0318, 0.2354, 0.3794]) -Greedy action tensor([ 1.1061, 0.0671, -0.0036, -0.7874]) tensor([0.5452, 0.1929, 0.1797, 0.0821]) -Greedy action tensor([-0.5295, 0.0850, -1.0209, -0.4784]) tensor([0.2216, 0.4096, 0.1356, 0.2332]) -Greedy action tensor([1.3760, 0.5416, 0.5811, 1.7972]) tensor([0.2933, 0.1273, 0.1325, 0.4469]) -Greedy action tensor([ 1.5497, -1.6193, 0.7272, 0.8516]) tensor([0.5053, 0.0212, 0.2220, 0.2514]) -Greedy action tensor([ 0.3852, -0.5296, 0.8056, 0.8515]) tensor([0.2214, 0.0887, 0.3370, 0.3529]) -Greedy action tensor([ 1.2744, 1.5058, -0.5618, 1.0784]) tensor([0.3085, 0.3888, 0.0492, 0.2536]) -Greedy action tensor([ 0.5363, 0.4588, 0.7403, -0.6172]) tensor([0.2884, 0.2669, 0.3537, 0.0910]) -Greedy action tensor([-0.8226, -1.4464, 2.0637, -1.2427]) tensor([0.0497, 0.0266, 0.8910, 0.0327]) -Greedy action tensor([ 1.6274, -0.2531, 0.8846, 0.7389]) tensor([0.4903, 0.0748, 0.2333, 0.2016]) -Greedy action tensor([-0.7481, 0.1260, 0.9871, -0.2120]) tensor([0.0928, 0.2224, 0.5262, 0.1586]) -Greedy action tensor([-0.2942, 0.7117, 0.9524, -0.2000]) tensor([0.1203, 0.3290, 0.4185, 0.1322]) -Greedy action tensor([-0.0781, -0.9022, 0.9266, -1.1647]) tensor([0.2219, 0.0973, 0.6060, 0.0748]) -Greedy action tensor([-0.0928, -0.5386, -0.1509, 0.2648]) tensor([0.2491, 0.1595, 0.2351, 0.3562]) -Greedy action tensor([ 1.2780, -0.8552, 0.8537, 1.1458]) tensor([0.3775, 0.0447, 0.2470, 0.3308]) -Greedy action tensor([-0.6902, -1.3581, -0.0772, 0.8429]) tensor([0.1251, 0.0642, 0.2310, 0.5797]) -Greedy action tensor([-0.2808, -0.5518, 0.0381, 1.9545]) tensor([0.0801, 0.0611, 0.1102, 0.7487]) -Greedy action tensor([ 0.7933, -0.4934, 0.0709, 0.8649]) tensor([0.3526, 0.0974, 0.1712, 0.3788]) -Greedy action tensor([-0.2725, -0.2890, -0.8466, 0.3692]) tensor([0.2249, 0.2212, 0.1267, 0.4272]) -Greedy action tensor([-0.9870, -0.6041, 1.1211, -0.2012]) tensor([0.0776, 0.1138, 0.6385, 0.1702]) -Greedy action tensor([ 0.2586, 1.1609, 0.5777, -0.7022]) tensor([0.1914, 0.4719, 0.2634, 0.0732]) -Greedy action tensor([ 0.2495, -1.0222, 0.9294, 0.7090]) tensor([0.2067, 0.0580, 0.4080, 0.3273]) -Greedy action tensor([-1.1533, -1.6641, -0.3318, 0.1676]) tensor([0.1312, 0.0787, 0.2984, 0.4917]) -Greedy action tensor([ 1.0195, -0.4942, 0.2512, -0.0959]) tensor([0.4971, 0.1094, 0.2306, 0.1629]) -Greedy action tensor([-0.4622, 0.1498, 0.6603, 0.0484]) tensor([0.1319, 0.2432, 0.4052, 0.2197]) -Greedy action tensor([ 1.0368, 0.2199, 1.6099, -0.0125]) tensor([0.2805, 0.1239, 0.4974, 0.0982]) -Greedy action tensor([ 0.3642, -0.5797, 1.4374, -0.2212]) tensor([0.2053, 0.0799, 0.6005, 0.1143]) -Greedy action tensor([ 0.4975, -0.1511, -0.0251, -0.2988]) tensor([0.3896, 0.2037, 0.2310, 0.1757]) -Greedy action tensor([ 0.9092, -0.9283, 0.2347, -0.6298]) tensor([0.5310, 0.0845, 0.2705, 0.1140]) -Greedy action tensor([ 0.7258, -0.3584, 0.0493, -0.5276]) tensor([0.4690, 0.1586, 0.2384, 0.1339]) -Greedy action tensor([ 0.7792, -0.3454, -0.0070, -0.3598]) tensor([0.4761, 0.1546, 0.2169, 0.1524]) -Greedy action tensor([ 0.2951, -0.0334, -0.0069, 0.0071]) tensor([0.3116, 0.2244, 0.2304, 0.2336]) -Greedy action tensor([ 0.5761, 0.0127, -0.1430, -0.1566]) tensor([0.3941, 0.2244, 0.1920, 0.1894]) -Greedy action tensor([ 0.7760, -0.3407, -0.0171, -0.2044]) tensor([0.4641, 0.1519, 0.2099, 0.1741]) -Greedy action tensor([ 0.6958, -0.4948, -0.1663, -0.3892]) tensor([0.4845, 0.1473, 0.2046, 0.1637]) -Greedy action tensor([ 0.7520, -0.3928, -0.2148, -0.5179]) tensor([0.5052, 0.1608, 0.1921, 0.1419]) -Greedy action tensor([ 0.1991, 0.2459, -0.0266, -0.0139]) tensor([0.2737, 0.2868, 0.2184, 0.2212]) -Greedy action tensor([ 0.7549, -0.2648, 0.0817, -0.3837]) tensor([0.4564, 0.1646, 0.2328, 0.1462]) -Greedy action tensor([ 1.4127, -0.3944, -0.1337, -0.4553]) tensor([0.6529, 0.1072, 0.1391, 0.1008]) -Greedy action tensor([ 0.7819, -0.5881, 0.1724, -0.5949]) tensor([0.4878, 0.1239, 0.2652, 0.1231]) -Greedy action tensor([ 1.0871, -0.5751, -0.0823, -0.6105]) tensor([0.5940, 0.1127, 0.1845, 0.1088]) -Greedy action tensor([ 0.2597, -0.0422, 0.0055, -0.2530]) tensor([0.3212, 0.2375, 0.2491, 0.1923]) -Greedy action tensor([ 0.9053, -0.5052, -0.0511, -0.4487]) tensor([0.5301, 0.1294, 0.2037, 0.1369]) -Greedy action tensor([ 0.6939, -0.4787, 0.0904, -0.4536]) tensor([0.4600, 0.1424, 0.2516, 0.1460]) -Greedy action tensor([ 0.5776, -0.3418, -0.0544, -0.2123]) tensor([0.4194, 0.1672, 0.2229, 0.1904]) -Greedy action tensor([ 0.9092, -0.6016, 0.0058, -0.3358]) tensor([0.5225, 0.1153, 0.2117, 0.1504]) -Greedy action tensor([ 0.8855, -0.5587, 0.0573, -0.2287]) tensor([0.4998, 0.1179, 0.2183, 0.1640]) -Greedy action tensor([ 0.9011, -0.2953, -0.0010, -0.3156]) tensor([0.4989, 0.1508, 0.2024, 0.1478]) -Greedy action tensor([ 0.6524, -0.4551, 0.3012, -0.4088]) tensor([0.4201, 0.1388, 0.2957, 0.1454]) -Greedy action tensor([ 0.3494, -0.1297, -0.0462, -0.2144]) tensor([0.3494, 0.2164, 0.2353, 0.1989]) -Greedy action tensor([ 0.5228, -0.0184, -0.0862, -0.1226]) tensor([0.3773, 0.2196, 0.2052, 0.1979]) -Greedy action tensor([ 0.5612, -0.4541, 0.3116, -0.5081]) tensor([0.4025, 0.1458, 0.3136, 0.1381]) -Greedy action tensor([ 0.9018, -0.6972, -0.1059, -0.6112]) tensor([0.5595, 0.1131, 0.2042, 0.1232]) -Greedy action tensor([ 1.0560, -0.4071, -0.1500, -0.3165]) tensor([0.5604, 0.1298, 0.1678, 0.1421]) -Greedy action tensor([ 0.1885, -0.0299, -0.0116, -0.1729]) tensor([0.3013, 0.2422, 0.2466, 0.2099]) -Greedy action tensor([ 0.7777, -0.5400, 0.0101, -0.2498]) tensor([0.4785, 0.1281, 0.2221, 0.1713]) -Greedy action tensor([ 0.8398, -0.0685, -0.1701, -0.1285]) tensor([0.4657, 0.1878, 0.1696, 0.1768]) -Greedy action tensor([ 8.5236e-01, -7.1393e-01, 1.4536e-04, -4.2188e-01]) tensor([0.5222, 0.1090, 0.2227, 0.1460]) -Greedy action tensor([ 0.9013, -0.8793, -0.0137, -0.3741]) tensor([0.5410, 0.0912, 0.2167, 0.1511]) -Greedy action tensor([ 0.8762, -0.3318, -0.2210, -0.0956]) tensor([0.4973, 0.1486, 0.1660, 0.1882]) -Greedy action tensor([ 1.1853, -0.7445, -0.1988, -0.5726]) tensor([0.6377, 0.0926, 0.1598, 0.1099]) -Greedy action tensor([ 1.1897, -0.8896, -0.0271, -0.6291]) tensor([0.6315, 0.0790, 0.1871, 0.1025]) -Greedy action tensor([ 0.7988, -0.7146, 0.0651, -0.5083]) tensor([0.5074, 0.1117, 0.2436, 0.1373]) -Greedy action tensor([ 0.5153, -0.3003, -0.0351, -0.2985]) tensor([0.4061, 0.1797, 0.2342, 0.1800]) -Greedy action tensor([ 1.0580, -1.0553, 0.0532, -0.6178]) tensor([0.5973, 0.0722, 0.2187, 0.1118]) -Greedy action tensor([ 0.6306, -0.5680, -0.1155, -0.2485]) tensor([0.4564, 0.1377, 0.2164, 0.1895]) -Greedy action tensor([ 0.3007, 0.1917, -0.0926, -0.3201]) tensor([0.3216, 0.2884, 0.2171, 0.1729]) -Greedy action tensor([ 1.2845, -0.6652, -0.1107, -0.5364]) tensor([0.6443, 0.0917, 0.1597, 0.1043]) -Greedy action tensor([ 0.6692, -0.3639, -0.0554, -0.0314]) tensor([0.4279, 0.1523, 0.2074, 0.2124]) -Greedy action tensor([ 0.0421, 0.1736, 0.0018, -0.4648]) tensor([0.2700, 0.3080, 0.2593, 0.1626]) -Greedy action tensor([ 1.1452, -0.6547, -0.0885, -0.4827]) tensor([0.6050, 0.1000, 0.1762, 0.1188]) -Greedy action tensor([ 0.6343, -0.1808, -0.1228, -0.2521]) tensor([0.4303, 0.1905, 0.2018, 0.1774]) -Greedy action tensor([ 0.7670, -0.6221, -0.0698, -0.3693]) tensor([0.4992, 0.1244, 0.2162, 0.1602]) -Greedy action tensor([ 0.7690, -0.3968, 0.0069, -0.5905]) tensor([0.4914, 0.1531, 0.2293, 0.1262]) -Greedy action tensor([ 0.4770, -0.7418, -0.1800, -0.3735]) tensor([0.4462, 0.1319, 0.2313, 0.1906]) -Greedy action tensor([ 0.6136, -0.2539, -0.2302, 0.0484]) tensor([0.4135, 0.1737, 0.1778, 0.2350]) -Greedy action tensor([ 0.9773, -0.6114, 0.1183, -0.5075]) tensor([0.5393, 0.1101, 0.2284, 0.1222]) -Greedy action tensor([ 0.4716, -0.3349, 0.0799, -0.3853]) tensor([0.3927, 0.1753, 0.2654, 0.1667]) -Greedy action tensor([ 0.4837, -0.5495, -0.0927, -0.0850]) tensor([0.4026, 0.1433, 0.2262, 0.2280]) -Greedy action tensor([ 0.6538, -0.3712, -0.1155, -0.3050]) tensor([0.4534, 0.1627, 0.2101, 0.1738]) -Greedy action tensor([ 0.7601, -0.5267, 0.0678, -0.2749]) tensor([0.4691, 0.1295, 0.2347, 0.1666]) -Greedy action tensor([ 0.6672, -0.1123, -0.0257, -0.4873]) tensor([0.4398, 0.2017, 0.2199, 0.1386]) -Greedy action tensor([ 0.9543, -0.5270, -0.0530, -0.4112]) tensor([0.5412, 0.1230, 0.1976, 0.1381]) -Greedy action tensor([ 0.5611, -0.3810, -0.0513, -0.1949]) tensor([0.4164, 0.1623, 0.2257, 0.1955]) -Greedy action tensor([ 0.6889, -0.1752, -0.0024, -0.1595]) tensor([0.4254, 0.1793, 0.2131, 0.1821]) -Greedy action tensor([ 0.7355, -0.3464, -0.0815, -0.0018]) tensor([0.4427, 0.1500, 0.1955, 0.2118]) -Greedy action tensor([ 0.8481, -1.0345, 0.1354, -0.8213]) tensor([0.5462, 0.0831, 0.2678, 0.1029]) -Greedy action tensor([ 1.1865, -0.8493, -0.1780, -0.7281]) tensor([0.6521, 0.0852, 0.1666, 0.0961]) -Greedy action tensor([ 0.8391, -0.2252, -0.0179, -0.1697]) tensor([0.4686, 0.1617, 0.1989, 0.1709]) -Greedy action tensor([ 0.8109, -0.6561, -0.0691, -0.5284]) tensor([0.5243, 0.1209, 0.2175, 0.1374]) -Greedy action tensor([ 0.4646, 0.0241, -0.0605, -0.1878]) tensor([0.3628, 0.2336, 0.2146, 0.1890]) -Greedy action tensor([ 0.4619, -0.2143, -0.0878, -0.2284]) tensor([0.3865, 0.1966, 0.2231, 0.1938]) -Greedy action tensor([ 0.8229, -0.5781, -0.2282, -0.1933]) tensor([0.5108, 0.1258, 0.1785, 0.1849]) -Greedy action tensor([ 0.5230, -0.3643, 0.0792, -0.5049]) tensor([0.4147, 0.1708, 0.2661, 0.1484]) -Greedy action tensor([ 0.6996, -0.3899, -0.0037, -0.2623]) tensor([0.4518, 0.1520, 0.2236, 0.1727]) -Greedy action tensor([ 0.8078, -0.5345, 0.0021, -0.3787]) tensor([0.4967, 0.1298, 0.2219, 0.1516]) -Greedy action tensor([ 0.4252, -0.2856, -0.0870, -0.2936]) tensor([0.3879, 0.1906, 0.2325, 0.1891]) -Greedy action tensor([ 0.9566, -0.7325, -0.1325, -0.4799]) tensor([0.5685, 0.1050, 0.1913, 0.1352]) -Greedy action tensor([ 1.0925, -0.5054, 0.0142, -0.5378]) tensor([0.5753, 0.1164, 0.1957, 0.1127]) -Greedy action tensor([ 0.7768, -0.4300, 0.0209, -0.3024]) tensor([0.4742, 0.1419, 0.2227, 0.1612]) -Greedy action tensor([ 0.5441, -0.1191, -0.1575, -0.1833]) tensor([0.4009, 0.2066, 0.1988, 0.1937]) -Greedy action tensor([ 1.0326, -0.9722, 0.1796, -0.8260]) tensor([0.5825, 0.0785, 0.2482, 0.0908]) -Greedy action tensor([ 0.4036, -0.1912, -0.0085, -0.2413]) tensor([0.3651, 0.2014, 0.2418, 0.1916]) -Greedy action tensor([ 1.0340, -0.5366, -0.0472, -0.2306]) tensor([0.5466, 0.1137, 0.1854, 0.1543]) -Greedy action tensor([ 0.2305, 0.0118, -0.0899, -0.4104]) tensor([0.3272, 0.2629, 0.2375, 0.1724]) -Greedy action tensor([ 0.5862, -0.3619, 0.0707, -0.3105]) tensor([0.4180, 0.1619, 0.2496, 0.1705]) -Greedy action tensor([ 0.3557, -0.0453, -0.1886, -0.0742]) tensor([0.3448, 0.2309, 0.2001, 0.2243]) -Greedy action tensor([ 0.6790, -0.1268, 0.3756, -0.5948]) tensor([0.4057, 0.1812, 0.2995, 0.1135]) -Greedy action tensor([-1.5324, -0.6022, 0.5202, 0.0701]) tensor([0.0614, 0.1556, 0.4781, 0.3049]) -Greedy action tensor([-0.8373, -0.2446, 0.2732, 0.5457]) tensor([0.1017, 0.1840, 0.3088, 0.4055]) -Greedy action tensor([-1.7621, -0.5084, 0.5735, -0.0877]) tensor([0.0496, 0.1737, 0.5123, 0.2645]) -Greedy action tensor([-1.3996, -0.2235, 0.6743, -0.6327]) tensor([0.0697, 0.2259, 0.5544, 0.1500]) -Greedy action tensor([-1.9199, -0.3871, 0.6438, -0.1662]) tensor([0.0410, 0.1899, 0.5323, 0.2368]) -Greedy action tensor([-1.6614, -0.3314, 0.6101, 0.0495]) tensor([0.0500, 0.1890, 0.4845, 0.2766]) -Greedy action tensor([-1.8407, -0.4528, 0.6661, -0.1291]) tensor([0.0438, 0.1756, 0.5377, 0.2428]) -Greedy action tensor([-1.5812, 0.3639, 0.3698, 0.0123]) tensor([0.0501, 0.3506, 0.3526, 0.2467]) -Greedy action tensor([-1.9088, -0.4470, 0.6510, -0.1557]) tensor([0.0416, 0.1796, 0.5384, 0.2403]) -Greedy action tensor([-1.4944, -0.5270, 0.4384, 0.1402]) tensor([0.0638, 0.1679, 0.4410, 0.3273]) -Greedy action tensor([-1.4250, 0.0059, 0.7185, 0.3635]) tensor([0.0508, 0.2124, 0.4331, 0.3037]) -Greedy action tensor([-1.8993, -0.4400, 0.6474, -0.1541]) tensor([0.0420, 0.1808, 0.5365, 0.2407]) -Greedy action tensor([-1.9356, -0.4087, 0.6555, -0.1739]) tensor([0.0404, 0.1859, 0.5387, 0.2350]) -Greedy action tensor([-1.2254, -0.6698, 0.3298, 0.0529]) tensor([0.0903, 0.1575, 0.4278, 0.3244]) -Greedy action tensor([-1.8036, -0.3900, 0.5880, -0.1083]) tensor([0.0465, 0.1913, 0.5087, 0.2535]) -Greedy action tensor([-1.9185, -0.4147, 0.6604, -0.1581]) tensor([0.0408, 0.1836, 0.5381, 0.2374]) -Greedy action tensor([-1.8909, -0.3731, 0.6293, -0.1440]) tensor([0.0421, 0.1922, 0.5239, 0.2418]) -Greedy action tensor([-1.9264, -0.4102, 0.6543, -0.1666]) tensor([0.0407, 0.1854, 0.5374, 0.2365]) -Greedy action tensor([-1.6419, -0.5369, 0.5116, 0.0411]) tensor([0.0555, 0.1676, 0.4782, 0.2987]) -Greedy action tensor([-1.8888, -0.3789, 0.6384, -0.1468]) tensor([0.0421, 0.1906, 0.5270, 0.2403]) -Greedy action tensor([-1.1310, -0.5548, 0.2845, 0.1660]) tensor([0.0947, 0.1686, 0.3902, 0.3465]) -Greedy action tensor([-1.6474, -0.3869, 0.6217, -0.1379]) tensor([0.0534, 0.1884, 0.5165, 0.2417]) -Greedy action tensor([-1.7662, -0.1341, 0.5380, -0.0433]) tensor([0.0460, 0.2354, 0.4609, 0.2577]) -Greedy action tensor([-1.4555, -0.5840, 0.4401, -0.0352]) tensor([0.0705, 0.1685, 0.4693, 0.2917]) -Greedy action tensor([-1.6015, -0.5366, 0.4755, 0.0475]) tensor([0.0585, 0.1698, 0.4672, 0.3045]) -Greedy action tensor([-1.8430, -0.4585, 0.6533, -0.1028]) tensor([0.0438, 0.1749, 0.5317, 0.2496]) -Greedy action tensor([-1.3214, -0.1725, 0.8785, 0.7157]) tensor([0.0480, 0.1513, 0.4329, 0.3678]) -Greedy action tensor([-1.8642, -0.4693, 0.7033, -0.0027]) tensor([0.0408, 0.1647, 0.5319, 0.2626]) -Greedy action tensor([-1.8285, -0.4107, 0.6083, -0.1488]) tensor([0.0456, 0.1882, 0.5215, 0.2446]) -Greedy action tensor([-1.7864, -0.4821, 0.6003, -0.0702]) tensor([0.0473, 0.1744, 0.5149, 0.2633]) -Greedy action tensor([-1.6352, -0.4951, 0.5247, -0.1039]) tensor([0.0574, 0.1795, 0.4977, 0.2654]) -Greedy action tensor([-1.8914, -0.3110, 0.6183, -0.1699]) tensor([0.0421, 0.2045, 0.5179, 0.2355]) -Greedy action tensor([-1.7741, -0.4747, 0.5875, -0.0932]) tensor([0.0484, 0.1776, 0.5138, 0.2601]) -Greedy action tensor([-0.4634, 0.9521, 0.0326, 0.0919]) tensor([0.1176, 0.4844, 0.1931, 0.2049]) -Greedy action tensor([-1.2337, -0.5052, 1.1414, 1.1875]) tensor([0.0399, 0.0826, 0.4287, 0.4489]) -Greedy action tensor([-1.8978, -0.4225, 0.6412, -0.1526]) tensor([0.0421, 0.1840, 0.5330, 0.2410]) -Greedy action tensor([-1.7177, -0.3602, 0.6265, 0.0677]) tensor([0.0470, 0.1827, 0.4900, 0.2802]) -Greedy action tensor([-1.7213, -0.4848, 0.6923, 0.1860]) tensor([0.0447, 0.1541, 0.4999, 0.3013]) -Greedy action tensor([-1.8983, -0.3739, 0.6567, -0.1418]) tensor([0.0412, 0.1893, 0.5307, 0.2388]) -Greedy action tensor([-0.9408, -0.6120, 0.9334, 1.4045]) tensor([0.0517, 0.0718, 0.3369, 0.5396]) -Greedy action tensor([-1.7991, -0.4520, 0.6034, -0.0975]) tensor([0.0468, 0.1799, 0.5169, 0.2564]) -Greedy action tensor([-1.9069, -0.4361, 0.6459, -0.1658]) tensor([0.0418, 0.1821, 0.5374, 0.2387]) -Greedy action tensor([-1.8288, -0.4740, 0.6121, -0.1125]) tensor([0.0456, 0.1768, 0.5238, 0.2538]) -Greedy action tensor([-1.1693, -0.6097, 0.3087, 0.1451]) tensor([0.0921, 0.1612, 0.4038, 0.3429]) -Greedy action tensor([-1.6566, -0.5559, 0.5369, -0.0396]) tensor([0.0555, 0.1669, 0.4978, 0.2797]) -Greedy action tensor([-1.6711, -0.2572, 0.5107, -0.0843]) tensor([0.0530, 0.2180, 0.4698, 0.2591]) -Greedy action tensor([-1.2061, -0.6554, 0.3435, -0.0066]) tensor([0.0929, 0.1612, 0.4376, 0.3083]) -Greedy action tensor([-1.8667, -0.5030, 0.7608, -0.0248]) tensor([0.0399, 0.1561, 0.5523, 0.2517]) -Greedy action tensor([-1.6888, -0.4918, 0.5449, 0.0167]) tensor([0.0522, 0.1729, 0.4875, 0.2874]) -Greedy action tensor([-0.8927, -0.7172, 0.7582, 0.2789]) tensor([0.0941, 0.1121, 0.4903, 0.3036]) -Greedy action tensor([-1.5983, -0.1266, 0.5796, 0.3004]) tensor([0.0479, 0.2088, 0.4232, 0.3201]) -Greedy action tensor([-1.3154, -0.5517, 0.3807, 0.1524]) tensor([0.0773, 0.1659, 0.4214, 0.3354]) -Greedy action tensor([ 0.1938, -0.3236, 1.0373, 1.6166]) tensor([0.1239, 0.0739, 0.2881, 0.5141]) -Greedy action tensor([-1.7940, -0.5032, 0.7632, 0.1184]) tensor([0.0411, 0.1496, 0.5307, 0.2785]) -Greedy action tensor([-1.5859, -0.4997, 0.4775, 0.0052]) tensor([0.0597, 0.1770, 0.4702, 0.2932]) -Greedy action tensor([-1.8359, -0.4064, 0.6065, -0.1279]) tensor([0.0451, 0.1882, 0.5182, 0.2486]) -Greedy action tensor([-1.3607, 0.4362, 0.4925, 0.3634]) tensor([0.0526, 0.3171, 0.3355, 0.2948]) -Greedy action tensor([-1.7498, -0.4889, 0.6071, 0.0104]) tensor([0.0478, 0.1688, 0.5052, 0.2782]) -Greedy action tensor([-1.7915, -0.3874, 0.6015, -0.0792]) tensor([0.0464, 0.1889, 0.5077, 0.2570]) -Greedy action tensor([-1.5326, -0.5280, 0.4333, 0.1024]) tensor([0.0625, 0.1707, 0.4463, 0.3206]) -Greedy action tensor([-1.0214, -0.1073, 0.2689, -0.0886]) tensor([0.1034, 0.2580, 0.3758, 0.2628]) -Greedy action tensor([-1.7274, -0.3163, 0.5414, -0.0530]) tensor([0.0497, 0.2040, 0.4809, 0.2654]) -Greedy action tensor([-0.6390, 0.9729, 0.3296, 0.7986]) tensor([0.0778, 0.3898, 0.2049, 0.3275]) -Greedy action tensor([-0.9159, -0.5835, 0.2094, 0.3479]) tensor([0.1109, 0.1547, 0.3418, 0.3926]) -Greedy action tensor([-1.2277, -0.5071, 0.4101, 0.5565]) tensor([0.0706, 0.1452, 0.3634, 0.4207]) -Greedy action tensor([-1.5678, -0.3070, 0.7778, 0.3658]) tensor([0.0457, 0.1612, 0.4771, 0.3160]) -Greedy action tensor([-1.8726, -0.3972, 0.6224, -0.1364]) tensor([0.0432, 0.1887, 0.5232, 0.2450]) -Greedy action tensor([-1.2612, -0.5119, 1.0618, 1.1760]) tensor([0.0404, 0.0854, 0.4121, 0.4620]) -Greedy action tensor([-1.9290, -0.4060, 0.6491, -0.1678]) tensor([0.0407, 0.1866, 0.5359, 0.2368]) -Greedy action tensor([-1.7717, -0.3548, 0.5976, -0.0639]) tensor([0.0469, 0.1933, 0.5011, 0.2586]) -Greedy action tensor([-1.9463, -0.4545, 0.6704, -0.1807]) tensor([0.0400, 0.1779, 0.5480, 0.2340]) -Greedy action tensor([-1.9289, -0.3988, 0.6537, -0.1722]) tensor([0.0406, 0.1874, 0.5369, 0.2351]) -Greedy action tensor([-1.6836, -0.4094, 0.6171, 0.1666]) tensor([0.0478, 0.1709, 0.4772, 0.3041]) -Greedy action tensor([-0.8436, -0.7349, 0.1806, -0.0748]) tensor([0.1417, 0.1580, 0.3946, 0.3057]) -Greedy action tensor([-1.8957, -0.4288, 0.6418, -0.1553]) tensor([0.0422, 0.1831, 0.5340, 0.2407]) -Greedy action tensor([-1.7728, 0.0760, 0.6179, -0.4703]) tensor([0.0456, 0.2894, 0.4975, 0.1676]) -Greedy action tensor([ 0.1585, -0.2360, 0.9827, 1.6922]) tensor([0.1164, 0.0785, 0.2655, 0.5397]) -Greedy action tensor([-0.4948, -0.8010, 0.9276, 0.4848]) tensor([0.1170, 0.0861, 0.4852, 0.3116]) -Greedy action tensor([-1.8108, -0.3762, 0.6508, -0.0716]) tensor([0.0442, 0.1856, 0.5184, 0.2517]) -Greedy action tensor([-1.2555, -0.4093, 0.4670, 0.7243]) tensor([0.0618, 0.1441, 0.3462, 0.4478]) -Greedy action tensor([-1.9023, -0.4073, 0.6408, -0.1620]) tensor([0.0419, 0.1868, 0.5327, 0.2387]) -Greedy action tensor([ 1.2801, -0.2828, -0.8553, 0.4879]) tensor([0.5616, 0.1177, 0.0664, 0.2543]) -Greedy action tensor([ 1.3582, -0.3651, -0.5549, 0.3501]) tensor([0.5914, 0.1055, 0.0873, 0.2158]) -Greedy action tensor([ 1.4422, -0.4448, -0.2304, 0.1550]) tensor([0.6191, 0.0938, 0.1162, 0.1709]) -Greedy action tensor([ 2.5061, -0.7460, -0.3647, 0.7578]) tensor([0.7878, 0.0305, 0.0446, 0.1371]) -Greedy action tensor([ 1.3666, -0.5101, -0.0522, 0.3747]) tensor([0.5663, 0.0867, 0.1370, 0.2100]) -Greedy action tensor([ 1.4944, -0.7653, -0.1622, 0.2777]) tensor([0.6284, 0.0656, 0.1199, 0.1861]) -Greedy action tensor([ 1.3294, -0.4732, -0.1408, 0.2518]) tensor([0.5763, 0.0950, 0.1325, 0.1962]) -Greedy action tensor([ 2.0774, -0.7900, -0.2060, 0.8111]) tensor([0.6941, 0.0395, 0.0708, 0.1956]) -Greedy action tensor([ 1.3290, -0.4021, -0.2083, -0.0300]) tensor([0.6064, 0.1074, 0.1304, 0.1558]) -Greedy action tensor([ 1.7831, -0.6722, -0.4604, 1.1618]) tensor([0.5783, 0.0496, 0.0614, 0.3107]) -Greedy action tensor([ 1.2102, -0.5765, -0.1030, 0.4933]) tensor([0.5195, 0.0870, 0.1397, 0.2537]) -Greedy action tensor([ 0.8985, -0.3966, -0.0487, 0.1291]) tensor([0.4706, 0.1289, 0.1825, 0.2180]) -Greedy action tensor([ 1.5689, -0.5587, -0.4174, 0.5249]) tensor([0.6218, 0.0741, 0.0853, 0.2189]) -Greedy action tensor([ 1.1503, -0.3101, -0.4460, -0.2818]) tensor([0.5975, 0.1387, 0.1211, 0.1427]) -Greedy action tensor([ 1.3274, -0.1629, -0.5260, 0.4254]) tensor([0.5594, 0.1260, 0.0877, 0.2270]) -Greedy action tensor([ 0.5276, -0.2511, -0.4981, 0.5404]) tensor([0.3533, 0.1622, 0.1267, 0.3579]) -Greedy action tensor([ 1.3545, -0.4233, -0.4313, 0.5436]) tensor([0.5614, 0.0949, 0.0941, 0.2495]) -Greedy action tensor([ 2.3382, -1.1484, -0.1854, 0.6192]) tensor([0.7752, 0.0237, 0.0622, 0.1390]) -Greedy action tensor([ 1.2788, -0.8366, -0.2523, 0.3703]) tensor([0.5747, 0.0693, 0.1243, 0.2317]) -Greedy action tensor([ 1.9377, -0.1801, -0.3074, 0.1094]) tensor([0.7210, 0.0867, 0.0764, 0.1159]) -Greedy action tensor([ 0.7743, -0.2490, -0.0346, 0.1476]) tensor([0.4275, 0.1536, 0.1904, 0.2284]) -Greedy action tensor([ 1.4112, -0.4954, -0.2570, 0.4861]) tensor([0.5768, 0.0857, 0.1088, 0.2287]) -Greedy action tensor([ 1.9990, -0.9122, -0.3983, 0.7078]) tensor([0.7041, 0.0383, 0.0640, 0.1936]) -Greedy action tensor([ 1.0740, -0.1956, -0.2349, 0.3708]) tensor([0.4887, 0.1373, 0.1320, 0.2419]) -Greedy action tensor([ 1.6461, -0.3091, -0.3991, 0.3908]) tensor([0.6427, 0.0910, 0.0831, 0.1832]) -Greedy action tensor([ 0.6101, -0.1354, 0.0255, 0.0287]) tensor([0.3860, 0.1831, 0.2151, 0.2158]) -Greedy action tensor([ 1.6818, -0.6113, -0.4867, 0.3129]) tensor([0.6804, 0.0687, 0.0778, 0.1731]) -Greedy action tensor([ 1.9868, 0.3117, -0.9088, -0.0751]) tensor([0.7301, 0.1367, 0.0403, 0.0929]) -Greedy action tensor([ 0.3987, -0.2358, 0.0077, -0.0075]) tensor([0.3481, 0.1846, 0.2354, 0.2319]) -Greedy action tensor([ 1.5867, -0.5284, -0.2632, 0.2904]) tensor([0.6446, 0.0777, 0.1014, 0.1763]) -Greedy action tensor([ 0.6789, -0.3491, -0.0020, -0.1298]) tensor([0.4330, 0.1549, 0.2192, 0.1929]) -Greedy action tensor([ 1.7057, -0.8149, -0.3777, 0.1225]) tensor([0.7091, 0.0570, 0.0883, 0.1456]) -Greedy action tensor([ 1.4239, -0.2540, -0.8054, 0.2766]) tensor([0.6204, 0.1159, 0.0668, 0.1970]) -Greedy action tensor([ 1.0131, -0.3334, 0.2955, 0.2221]) tensor([0.4542, 0.1182, 0.2216, 0.2060]) -Greedy action tensor([ 1.4534, -0.6610, -0.3269, 0.6404]) tensor([0.5771, 0.0697, 0.0973, 0.2560]) -Greedy action tensor([ 2.1063, -0.5682, -0.4011, 0.3526]) tensor([0.7555, 0.0521, 0.0616, 0.1308]) -Greedy action tensor([ 1.7760, -0.4438, -0.4346, 0.3682]) tensor([0.6835, 0.0743, 0.0749, 0.1673]) -Greedy action tensor([ 1.6293, -0.7647, -0.0956, 0.0155]) tensor([0.6809, 0.0621, 0.1213, 0.1356]) -Greedy action tensor([ 2.2568, -1.0054, 0.1102, 0.9007]) tensor([0.7078, 0.0271, 0.0827, 0.1824]) -Greedy action tensor([ 0.8875, -0.0748, -0.3557, -0.0610]) tensor([0.4859, 0.1856, 0.1402, 0.1882]) -Greedy action tensor([ 0.7658, -0.5505, -0.2221, 0.3571]) tensor([0.4338, 0.1163, 0.1615, 0.2883]) -Greedy action tensor([ 1.0813, -0.5212, -0.3261, 0.4427]) tensor([0.5065, 0.1020, 0.1240, 0.2675]) -Greedy action tensor([ 1.8378, -0.6727, -0.4770, 0.5965]) tensor([0.6807, 0.0553, 0.0672, 0.1967]) -Greedy action tensor([ 1.1095, -0.5031, -0.3628, 0.8762]) tensor([0.4503, 0.0898, 0.1033, 0.3566]) -Greedy action tensor([ 1.5612, -0.4119, -0.4954, 0.3854]) tensor([0.6347, 0.0882, 0.0812, 0.1959]) -Greedy action tensor([ 1.2824, -0.6325, -0.1007, 0.3268]) tensor([0.5609, 0.0827, 0.1407, 0.2157]) -Greedy action tensor([ 1.6100, -0.3014, -0.2791, 0.3213]) tensor([0.6350, 0.0939, 0.0960, 0.1750]) -Greedy action tensor([ 1.5503, -0.3437, -0.3380, 0.0469]) tensor([0.6561, 0.0987, 0.0993, 0.1459]) -Greedy action tensor([ 1.5357, -0.4762, -0.3758, 0.2006]) tensor([0.6474, 0.0866, 0.0957, 0.1703]) -Greedy action tensor([ 1.1427, -0.3106, -0.1024, 0.2007]) tensor([0.5231, 0.1223, 0.1506, 0.2039]) -Greedy action tensor([ 1.0348, -0.0781, -0.5278, -0.2548]) tensor([0.5514, 0.1812, 0.1156, 0.1518]) -Greedy action tensor([ 1.7559, -0.7344, -0.1887, 0.3053]) tensor([0.6848, 0.0568, 0.0980, 0.1605]) -Greedy action tensor([ 1.4422, 0.1274, -0.6559, -0.0041]) tensor([0.6148, 0.1651, 0.0754, 0.1447]) -Greedy action tensor([ 1.4041, -0.4422, -0.0677, 0.3580]) tensor([0.5752, 0.0908, 0.1320, 0.2020]) -Greedy action tensor([ 1.2967, -0.4850, -0.1806, 0.0044]) tensor([0.5984, 0.1007, 0.1366, 0.1643]) -Greedy action tensor([ 0.8926, -0.6058, 0.0267, 0.0171]) tensor([0.4853, 0.1084, 0.2041, 0.2022]) -Greedy action tensor([ 1.3522, -0.6751, -0.4580, 0.8594]) tensor([0.5246, 0.0691, 0.0858, 0.3205]) -Greedy action tensor([ 0.8678, -0.1683, -0.4058, 0.2652]) tensor([0.4583, 0.1626, 0.1282, 0.2509]) -Greedy action tensor([ 0.9156, -0.5041, -0.3538, 0.3399]) tensor([0.4796, 0.1160, 0.1348, 0.2697]) -Greedy action tensor([ 1.4935, -0.5930, -0.2906, 0.3798]) tensor([0.6171, 0.0766, 0.1036, 0.2026]) -Greedy action tensor([ 0.4709, -0.3563, 0.1286, 0.0009]) tensor([0.3607, 0.1577, 0.2561, 0.2254]) -Greedy action tensor([ 1.2924, -0.3838, -0.2979, 0.4277]) tensor([0.5518, 0.1032, 0.1125, 0.2324]) -Greedy action tensor([ 1.3708, -0.7252, -0.3027, 0.6447]) tensor([0.5573, 0.0685, 0.1045, 0.2696]) -Greedy action tensor([ 1.4298, -0.8515, -0.0216, 0.2304]) tensor([0.6106, 0.0624, 0.1430, 0.1840]) -Greedy action tensor([ 1.5270, -0.8225, -0.3410, 0.6225]) tensor([0.6044, 0.0577, 0.0933, 0.2446]) -Greedy action tensor([ 1.2643, -0.6728, -0.1316, 0.6352]) tensor([0.5195, 0.0749, 0.1286, 0.2770]) -Greedy action tensor([ 1.6305, -0.8743, -0.8082, 0.0445]) tensor([0.7280, 0.0595, 0.0635, 0.1490]) -Greedy action tensor([ 2.1135, -0.8573, -0.6529, 0.5779]) tensor([0.7522, 0.0386, 0.0473, 0.1620]) -Greedy action tensor([ 1.5335, -0.2465, -0.6128, 0.1883]) tensor([0.6468, 0.1091, 0.0756, 0.1685]) -Greedy action tensor([ 1.9694, -0.7282, -0.3311, 0.3391]) tensor([0.7334, 0.0494, 0.0735, 0.1437]) -Greedy action tensor([ 1.2349, -0.6040, 0.0027, 0.1510]) tensor([0.5590, 0.0889, 0.1630, 0.1891]) -Greedy action tensor([ 1.2972, -0.3899, -0.2285, 0.4617]) tensor([0.5446, 0.1008, 0.1184, 0.2362]) -Greedy action tensor([ 0.2887, -0.4070, -0.2929, 0.4303]) tensor([0.3115, 0.1554, 0.1742, 0.3589]) -Greedy action tensor([ 1.7066, -0.4312, -0.4686, 0.0741]) tensor([0.7008, 0.0826, 0.0796, 0.1370]) -Greedy action tensor([ 1.5394, -0.6221, -0.2787, 0.3538]) tensor([0.6317, 0.0727, 0.1025, 0.1930]) -Greedy action tensor([ 1.3105, -0.2800, -0.7749, 0.3479]) tensor([0.5848, 0.1192, 0.0727, 0.2233]) -Greedy action tensor([ 2.6637, -0.4608, -0.6980, 0.4005]) tensor([0.8456, 0.0372, 0.0293, 0.0880]) -Greedy action tensor([ 1.3149, -0.4816, -0.2134, 0.5803]) tensor([0.5369, 0.0891, 0.1165, 0.2576]) -Greedy action tensor([ 1.4289, -0.7005, -0.4760, 0.6401]) tensor([0.5807, 0.0690, 0.0864, 0.2638]) -Greedy action tensor([ 0.6222, -0.3631, 0.0536, -0.2426]) tensor([0.4236, 0.1581, 0.2399, 0.1784]) -Greedy action tensor([ 1.9962, -0.7615, -0.3580, 0.6825]) tensor([0.7006, 0.0444, 0.0665, 0.1884]) -Greedy action tensor([ 0.4037, -0.8707, 0.4673, 1.6981]) tensor([0.1668, 0.0466, 0.1778, 0.6087]) -Greedy action tensor([ 1.1396, 0.1861, -0.1556, -0.3196]) tensor([0.5286, 0.2037, 0.1448, 0.1229]) -Greedy action tensor([-0.0582, -0.5897, -0.3956, 1.1042]) tensor([0.1819, 0.1069, 0.1298, 0.5815]) -Greedy action tensor([ 0.2846, -0.3725, -0.5370, 1.5989]) tensor([0.1760, 0.0913, 0.0774, 0.6553]) -Greedy action tensor([ 1.6950, -0.1544, 0.2242, 0.5496]) tensor([0.5864, 0.0923, 0.1347, 0.1865]) -Greedy action tensor([-0.6334, -1.3900, -0.5858, 1.2582]) tensor([0.1093, 0.0513, 0.1146, 0.7247]) -Greedy action tensor([ 1.1985, -0.4463, 0.4684, 0.1903]) tensor([0.4902, 0.0946, 0.2362, 0.1789]) -Greedy action tensor([1.3318, 0.1089, 0.1943, 0.7973]) tensor([0.4544, 0.1337, 0.1457, 0.2662]) -Greedy action tensor([-0.1459, -1.1583, 0.8243, 0.8480]) tensor([0.1492, 0.0542, 0.3936, 0.4030]) -Greedy action tensor([0.5849, 0.1559, 0.4849, 0.7230]) tensor([0.2700, 0.1758, 0.2443, 0.3099]) -Greedy action tensor([-0.6146, -1.1456, 0.0828, -0.4428]) tensor([0.2090, 0.1229, 0.4198, 0.2482]) -Greedy action tensor([ 0.3638, -0.5608, 1.5636, 0.5117]) tensor([0.1702, 0.0675, 0.5650, 0.1973]) -Greedy action tensor([ 0.0179, -0.5517, -0.8409, -0.0061]) tensor([0.3372, 0.1908, 0.1429, 0.3292]) -Greedy action tensor([ 1.2697, 1.4401, -1.2709, 1.0087]) tensor([0.3295, 0.3907, 0.0260, 0.2538]) -Greedy action tensor([ 0.4542, -1.1562, -0.1770, 0.4082]) tensor([0.3722, 0.0744, 0.1980, 0.3555]) -Greedy action tensor([-0.5059, -0.4878, 0.6803, 0.1579]) tensor([0.1382, 0.1407, 0.4526, 0.2684]) -Greedy action tensor([ 0.1822, -0.3025, -0.1868, -0.2321]) tensor([0.3369, 0.2075, 0.2330, 0.2226]) -Greedy action tensor([ 0.7519, 0.5514, 0.5039, -0.5370]) tensor([0.3479, 0.2847, 0.2715, 0.0959]) -Greedy action tensor([ 0.2221, -0.5210, -0.2308, 1.1796]) tensor([0.2120, 0.1008, 0.1348, 0.5523]) -Greedy action tensor([-0.7206, -0.4731, 1.2664, -0.8339]) tensor([0.0955, 0.1224, 0.6968, 0.0853]) -Greedy action tensor([ 0.9757, 0.1765, -0.6484, 1.2068]) tensor([0.3440, 0.1547, 0.0678, 0.4335]) -Greedy action tensor([ 0.8988, -0.2581, 0.9833, -0.3412]) tensor([0.3715, 0.1168, 0.4042, 0.1075]) -Greedy action tensor([ 0.9452, -0.7715, 0.9686, -0.4025]) tensor([0.4060, 0.0729, 0.4156, 0.1055]) -Greedy action tensor([ 0.8944, -1.4068, 0.4135, 0.1043]) tensor([0.4604, 0.0461, 0.2846, 0.2089]) -Greedy action tensor([-0.9022, 0.2691, -1.0028, -0.6989]) tensor([0.1573, 0.5076, 0.1423, 0.1928]) -Greedy action tensor([-0.3583, 0.3775, 0.2420, 0.9254]) tensor([0.1174, 0.2450, 0.2139, 0.4237]) -Greedy action tensor([1.3842, 0.3692, 1.8853, 0.0443]) tensor([0.3054, 0.1107, 0.5040, 0.0800]) -Greedy action tensor([ 0.7641, -1.3781, 0.1014, 1.1608]) tensor([0.3205, 0.0376, 0.1652, 0.4766]) -Greedy action tensor([ 0.8448, 0.2484, -0.7507, 0.7936]) tensor([0.3699, 0.2037, 0.0750, 0.3514]) -Greedy action tensor([ 0.4738, -0.1927, -0.0940, 0.7278]) tensor([0.2968, 0.1524, 0.1682, 0.3826]) -Greedy action tensor([ 0.2811, -1.3836, 0.5869, 0.0935]) tensor([0.2962, 0.0561, 0.4022, 0.2455]) -Greedy action tensor([ 0.0468, -0.8893, 2.0798, 0.0139]) tensor([0.1000, 0.0392, 0.7639, 0.0968]) -Greedy action tensor([ 0.4980, -1.4102, 0.1880, -0.7820]) tensor([0.4630, 0.0687, 0.3396, 0.1287]) -Greedy action tensor([-1.6627, 0.0915, 0.4660, -0.6079]) tensor([0.0554, 0.3201, 0.4655, 0.1590]) -Greedy action tensor([-0.0374, -1.2703, 1.9461, -0.4331]) tensor([0.1083, 0.0316, 0.7872, 0.0729]) -Greedy action tensor([ 0.7853, -0.5069, 0.0512, 1.9849]) tensor([0.1971, 0.0541, 0.0946, 0.6541]) -Greedy action tensor([ 0.3372, -1.0206, 0.6449, 0.3727]) tensor([0.2737, 0.0704, 0.3723, 0.2836]) -Greedy action tensor([ 1.2373, -1.0897, -0.5427, 0.9559]) tensor([0.4948, 0.0483, 0.0834, 0.3734]) -Greedy action tensor([-0.0250, 0.0949, 0.6080, -0.7238]) tensor([0.2218, 0.2501, 0.4178, 0.1103]) -Greedy action tensor([ 0.5549, -1.9306, 0.3633, -0.5432]) tensor([0.4460, 0.0371, 0.3682, 0.1487]) -Greedy action tensor([-1.2483, -0.6391, -0.7297, 0.4793]) tensor([0.0986, 0.1813, 0.1656, 0.5546]) -Greedy action tensor([-1.0813, -2.8097, 0.0763, 0.0477]) tensor([0.1342, 0.0238, 0.4270, 0.4150]) -Greedy action tensor([ 0.4868, -1.0500, 0.9194, 0.7276]) tensor([0.2482, 0.0534, 0.3826, 0.3158]) -Greedy action tensor([ 0.6910, -0.6985, 0.8771, 1.3263]) tensor([0.2303, 0.0574, 0.2774, 0.4348]) -Greedy action tensor([-0.9563, -1.9211, 0.8471, -0.4211]) tensor([0.1092, 0.0416, 0.6628, 0.1865]) -Greedy action tensor([ 0.4352, -2.3739, 0.1657, 0.1632]) tensor([0.3867, 0.0233, 0.2954, 0.2946]) -Greedy action tensor([ 0.3823, -1.7913, -0.4551, 0.4929]) tensor([0.3754, 0.0427, 0.1625, 0.4194]) -Greedy action tensor([ 0.5461, -1.5567, 0.3826, 0.3718]) tensor([0.3557, 0.0434, 0.3020, 0.2988]) -Greedy action tensor([ 0.7430, 0.1777, -1.0149, 1.7494]) tensor([0.2234, 0.1269, 0.0385, 0.6111]) -Greedy action tensor([ 0.2799, -0.9351, 0.2406, 1.3230]) tensor([0.1962, 0.0582, 0.1887, 0.5569]) -Greedy action tensor([ 0.0742, -1.1256, -0.1395, -0.5978]) tensor([0.3817, 0.1150, 0.3083, 0.1950]) -Greedy action tensor([ 0.1725, -0.2307, -1.1438, 0.6295]) tensor([0.2844, 0.1901, 0.0763, 0.4492]) -Greedy action tensor([-0.6767, -1.6546, 0.6772, -0.3810]) tensor([0.1517, 0.0570, 0.5874, 0.2039]) -Greedy action tensor([ 0.5437, 1.1875, -0.4637, -0.1188]) tensor([0.2642, 0.5030, 0.0965, 0.1362]) -Greedy action tensor([ 1.1861, -0.8849, 1.1985, 0.3288]) tensor([0.3902, 0.0492, 0.3951, 0.1656]) -Greedy action tensor([-0.8970, -0.9814, -0.1810, 0.4052]) tensor([0.1308, 0.1203, 0.2677, 0.4812]) -Greedy action tensor([ 0.7478, -0.7591, -0.3774, 0.4789]) tensor([0.4328, 0.0959, 0.1405, 0.3308]) -Greedy action tensor([ 0.1144, -0.6609, 0.1943, -0.9345]) tensor([0.3455, 0.1591, 0.3743, 0.1210]) -Greedy action tensor([ 0.0701, -1.0181, 0.5136, -0.4383]) tensor([0.2860, 0.0963, 0.4456, 0.1720]) -Greedy action tensor([-0.4184, -0.9642, -0.3057, -0.0329]) tensor([0.2399, 0.1390, 0.2685, 0.3527]) -Greedy action tensor([ 1.3328, -1.4451, 1.0562, 0.2240]) tensor([0.4650, 0.0289, 0.3526, 0.1534]) -Greedy action tensor([ 0.3617, -1.3627, 0.2822, 0.7643]) tensor([0.2780, 0.0496, 0.2567, 0.4157]) -Greedy action tensor([ 0.8501, 0.8047, 1.0805, -0.7501]) tensor([0.2927, 0.2797, 0.3685, 0.0591]) -Greedy action tensor([0.9573, 0.3183, 0.2324, 0.3327]) tensor([0.3925, 0.2072, 0.1901, 0.2102]) -Greedy action tensor([-0.1727, -1.7365, -0.0335, -0.2193]) tensor([0.3018, 0.0632, 0.3469, 0.2881]) -Greedy action tensor([-0.2843, -1.4764, 1.1991, -0.5629]) tensor([0.1546, 0.0469, 0.6815, 0.1170]) -Greedy action tensor([ 0.6525, -0.4210, -0.0750, 0.4861]) tensor([0.3743, 0.1279, 0.1808, 0.3169]) -Greedy action tensor([-1.0157, -0.6322, 0.9457, -2.2103]) tensor([0.1012, 0.1485, 0.7196, 0.0307]) -Greedy action tensor([-0.4089, -0.0896, 0.3381, -0.1010]) tensor([0.1710, 0.2353, 0.3610, 0.2327]) -Greedy action tensor([-0.4944, -1.3882, 0.0177, 0.0548]) tensor([0.2079, 0.0851, 0.3470, 0.3601]) -Greedy action tensor([ 0.5921, -0.7444, 0.0384, -0.7474]) tensor([0.4763, 0.1252, 0.2738, 0.1248]) -Greedy action tensor([ 1.0954, -0.7038, -0.5929, 1.1377]) tensor([0.4178, 0.0691, 0.0772, 0.4358]) -Greedy action tensor([ 0.2537, -1.1120, 0.5249, 0.1812]) tensor([0.2860, 0.0730, 0.3751, 0.2660]) -Greedy action tensor([ 0.7373, 0.8757, -0.3185, 1.2807]) tensor([0.2371, 0.2723, 0.0825, 0.4082]) -Greedy action tensor([-0.0249, -1.3877, 0.8878, -0.2731]) tensor([0.2209, 0.0565, 0.5502, 0.1723]) -Greedy action tensor([-0.6968, -1.0893, 0.0440, -0.3106]) tensor([0.1907, 0.1288, 0.4000, 0.2806]) -Greedy action tensor([ 1.1791, 0.1171, -1.6547, 1.0728]) tensor([0.4341, 0.1501, 0.0255, 0.3903]) -Greedy action tensor([ 0.2102, -0.1613, 1.6583, 0.0476]) tensor([0.1472, 0.1015, 0.6262, 0.1251]) -Greedy action tensor([ 1.9361, -1.7254, 0.4642, 1.4509]) tensor([0.5345, 0.0137, 0.1227, 0.3291]) -Greedy action tensor([ 0.7811, -1.4898, 0.0542, 0.1143]) tensor([0.4762, 0.0492, 0.2302, 0.2445]) -Greedy action tensor([ 0.9692, -0.7641, 0.1485, 0.9501]) tensor([0.3849, 0.0680, 0.1694, 0.3776]) -Greedy action tensor([-1.9548, 0.0198, 0.9346, -0.6283]) tensor([0.0334, 0.2405, 0.6004, 0.1258]) -Greedy action tensor([ 0.5459, -0.6006, -0.1857, -0.4662]) tensor([0.4625, 0.1469, 0.2225, 0.1681]) -Greedy action tensor([ 0.7898, 0.2796, -0.1481, -0.1373]) tensor([0.4188, 0.2515, 0.1640, 0.1657]) -Greedy action tensor([ 0.6823, -0.0389, -0.1174, -0.4073]) tensor([0.4402, 0.2140, 0.1978, 0.1480]) -Greedy action tensor([ 0.2960, -0.2216, -0.0244, -0.1600]) tensor([0.3383, 0.2016, 0.2456, 0.2144]) -Greedy action tensor([ 0.7890, -0.2096, -0.0334, -0.2836]) tensor([0.4652, 0.1713, 0.2044, 0.1591]) -Greedy action tensor([ 0.9203, -0.4026, 0.0796, -0.4619]) tensor([0.5131, 0.1367, 0.2214, 0.1288]) -Greedy action tensor([ 0.8840, -0.5920, 0.0476, -0.4420]) tensor([0.5188, 0.1186, 0.2248, 0.1378]) -Greedy action tensor([ 0.8657, -0.2262, 0.1541, -0.7381]) tensor([0.4932, 0.1655, 0.2421, 0.0992]) -Greedy action tensor([ 0.8989, -0.4194, 0.0066, -0.3497]) tensor([0.5091, 0.1362, 0.2086, 0.1461]) -Greedy action tensor([ 0.6336, -0.1591, -0.2032, -0.5176]) tensor([0.4541, 0.2055, 0.1967, 0.1436]) -Greedy action tensor([ 0.7801, -0.4778, -0.0236, -0.2778]) tensor([0.4810, 0.1367, 0.2153, 0.1670]) -Greedy action tensor([ 0.4616, -0.0793, 0.0184, -0.3623]) tensor([0.3755, 0.2186, 0.2411, 0.1647]) -Greedy action tensor([ 0.9004, -0.5843, -0.1346, -0.2508]) tensor([0.5268, 0.1194, 0.1871, 0.1666]) -Greedy action tensor([ 0.9121, -0.4892, -0.3292, -0.3191]) tensor([0.5473, 0.1348, 0.1582, 0.1598]) -Greedy action tensor([ 0.0422, 0.0076, 0.0609, -0.0369]) tensor([0.2558, 0.2471, 0.2607, 0.2364]) -Greedy action tensor([ 0.6323, -0.5572, -0.0450, -0.2727]) tensor([0.4511, 0.1373, 0.2292, 0.1825]) -Greedy action tensor([ 0.3374, -0.0032, -0.0570, -0.2733]) tensor([0.3415, 0.2429, 0.2302, 0.1854]) -Greedy action tensor([ 0.3786, -0.5592, -0.1854, -0.2089]) tensor([0.3974, 0.1556, 0.2261, 0.2209]) -Greedy action tensor([ 0.8689, -0.7056, 0.0152, -0.5147]) tensor([0.5309, 0.1100, 0.2261, 0.1331]) -Greedy action tensor([ 0.6019, 0.2145, -0.0138, -0.1043]) tensor([0.3686, 0.2503, 0.1992, 0.1819]) -Greedy action tensor([ 0.6708, -0.4168, -0.1123, -0.1488]) tensor([0.4475, 0.1508, 0.2045, 0.1972]) -Greedy action tensor([ 0.3662, -0.1548, -0.0312, -0.1236]) tensor([0.3474, 0.2063, 0.2335, 0.2129]) -Greedy action tensor([ 0.7972, -0.5672, 0.0335, -0.5793]) tensor([0.5066, 0.1295, 0.2361, 0.1279]) -Greedy action tensor([ 0.8616, -0.5640, 0.0767, -0.2150]) tensor([0.4908, 0.1180, 0.2239, 0.1673]) -Greedy action tensor([ 0.4226, -0.0179, -0.0742, -0.2742]) tensor([0.3636, 0.2340, 0.2212, 0.1811]) -Greedy action tensor([ 0.6655, -0.3657, 0.0566, -0.3037]) tensor([0.4386, 0.1564, 0.2386, 0.1664]) -Greedy action tensor([ 0.3512, -0.1163, 0.0597, -0.0218]) tensor([0.3266, 0.2046, 0.2440, 0.2249]) -Greedy action tensor([ 0.7949, -0.8581, 0.0326, -0.3862]) tensor([0.5089, 0.0974, 0.2375, 0.1562]) -Greedy action tensor([ 3.9443e-01, -2.3555e-01, -3.6041e-04, -4.4678e-01]) tensor([0.3791, 0.2019, 0.2555, 0.1635]) -Greedy action tensor([ 1.1833, -0.5436, 0.0069, -0.6087]) tensor([0.6050, 0.1076, 0.1866, 0.1008]) -Greedy action tensor([ 0.4751, -0.0452, -0.1681, -0.2055]) tensor([0.3808, 0.2263, 0.2001, 0.1928]) -Greedy action tensor([ 0.6002, -0.4344, 0.0552, -0.2442]) tensor([0.4228, 0.1503, 0.2452, 0.1817]) -Greedy action tensor([ 0.8896, -0.5728, -0.0692, -0.4970]) tensor([0.5362, 0.1242, 0.2056, 0.1340]) -Greedy action tensor([ 0.6399, -0.1062, 0.0499, -0.0810]) tensor([0.3976, 0.1886, 0.2204, 0.1934]) -Greedy action tensor([ 1.0839, -0.4164, 0.0375, -0.7316]) tensor([0.5757, 0.1284, 0.2022, 0.0937]) -Greedy action tensor([ 0.6250, -0.5055, -0.0652, -0.1635]) tensor([0.4388, 0.1417, 0.2201, 0.1995]) -Greedy action tensor([ 1.1879, -0.7149, 0.0830, -0.4982]) tensor([0.6004, 0.0895, 0.1989, 0.1112]) -Greedy action tensor([ 0.1760, -0.1467, -0.0044, -0.3122]) tensor([0.3152, 0.2282, 0.2631, 0.1934]) -Greedy action tensor([ 0.8701, -0.4011, 0.1199, -0.2786]) tensor([0.4831, 0.1355, 0.2282, 0.1532]) -Greedy action tensor([ 0.5361, -0.3728, 0.0540, -0.3262]) tensor([0.4094, 0.1650, 0.2528, 0.1728]) -Greedy action tensor([ 0.7239, -0.4239, -0.1838, -0.4012]) tensor([0.4889, 0.1551, 0.1972, 0.1587]) -Greedy action tensor([ 0.6496, -0.1931, -0.0717, -0.1134]) tensor([0.4197, 0.1807, 0.2040, 0.1957]) -Greedy action tensor([ 0.8259, -0.4301, 0.0449, -0.4295]) tensor([0.4932, 0.1404, 0.2259, 0.1405]) -Greedy action tensor([ 0.7281, -0.3376, 0.1123, -0.2967]) tensor([0.4457, 0.1535, 0.2408, 0.1600]) -Greedy action tensor([ 0.8856, -0.3410, 0.0757, -0.3902]) tensor([0.4957, 0.1454, 0.2205, 0.1384]) -Greedy action tensor([ 0.4706, -0.4794, -0.0712, -0.1996]) tensor([0.4032, 0.1559, 0.2346, 0.2063]) -Greedy action tensor([ 0.6053, -0.1011, -0.1175, -0.0405]) tensor([0.3995, 0.1971, 0.1939, 0.2094]) -Greedy action tensor([ 0.5157, -0.3877, -0.2039, -0.2309]) tensor([0.4226, 0.1713, 0.2058, 0.2003]) -Greedy action tensor([ 0.5887, -0.3296, -0.0832, -0.1775]) tensor([0.4211, 0.1681, 0.2151, 0.1957]) -Greedy action tensor([ 1.0410, -0.9007, 0.1382, -0.5590]) tensor([0.5712, 0.0819, 0.2316, 0.1153]) -Greedy action tensor([ 0.5848, -0.3354, 0.2869, -0.4294]) tensor([0.3994, 0.1591, 0.2965, 0.1449]) -Greedy action tensor([ 0.6010, 0.0486, -0.0164, -0.1959]) tensor([0.3898, 0.2243, 0.2102, 0.1757]) -Greedy action tensor([ 1.3172, -0.6487, -0.1048, -0.3751]) tensor([0.6388, 0.0895, 0.1541, 0.1176]) -Greedy action tensor([ 1.2643, -0.5680, 0.0371, -0.3385]) tensor([0.6044, 0.0967, 0.1772, 0.1217]) -Greedy action tensor([ 0.6566, -0.6591, -0.1045, -0.1201]) tensor([0.4555, 0.1222, 0.2128, 0.2095]) -Greedy action tensor([ 0.6832, -0.7609, -0.1986, -0.2202]) tensor([0.4866, 0.1148, 0.2015, 0.1972]) -Greedy action tensor([ 0.3974, -0.1396, 0.0334, -0.3864]) tensor([0.3655, 0.2136, 0.2540, 0.1669]) -Greedy action tensor([ 0.6501, 0.0296, -0.0525, -0.1143]) tensor([0.4002, 0.2152, 0.1982, 0.1863]) -Greedy action tensor([ 0.6538, -0.6864, -0.0446, -0.2652]) tensor([0.4634, 0.1213, 0.2305, 0.1848]) -Greedy action tensor([ 0.7174, -0.2702, 0.0674, -0.3414]) tensor([0.4462, 0.1662, 0.2329, 0.1548]) -Greedy action tensor([ 0.6901, -0.5143, -0.0228, -0.3399]) tensor([0.4658, 0.1397, 0.2283, 0.1663]) -Greedy action tensor([ 0.3367, -0.2070, -0.1067, -0.6581]) tensor([0.3858, 0.2240, 0.2476, 0.1427]) -Greedy action tensor([ 0.4954, -0.0818, -0.0116, -0.1565]) tensor([0.3725, 0.2091, 0.2243, 0.1941]) -Greedy action tensor([ 0.9066, -0.3587, 0.0984, -0.2878]) tensor([0.4924, 0.1389, 0.2195, 0.1492]) -Greedy action tensor([ 0.9767, -0.7466, 0.1420, -0.7945]) tensor([0.5610, 0.1001, 0.2435, 0.0954]) -Greedy action tensor([ 0.6347, -0.3295, -0.0045, -0.1935]) tensor([0.4263, 0.1625, 0.2250, 0.1862]) -Greedy action tensor([ 0.5874, -0.2691, -0.0364, -0.4110]) tensor([0.4294, 0.1823, 0.2301, 0.1582]) -Greedy action tensor([ 0.3048, -0.0126, -0.1126, -0.1029]) tensor([0.3277, 0.2385, 0.2158, 0.2180]) -Greedy action tensor([ 0.6233, -0.4885, 0.1221, -0.5089]) tensor([0.4431, 0.1457, 0.2684, 0.1428]) -Greedy action tensor([ 0.8138, -0.2399, -0.0400, -0.1894]) tensor([0.4670, 0.1628, 0.1989, 0.1713]) -Greedy action tensor([ 0.6079, -0.0061, 0.1126, 0.0131]) tensor([0.3701, 0.2003, 0.2255, 0.2042]) -Greedy action tensor([ 0.6733, -0.4193, 0.0358, -0.2729]) tensor([0.4440, 0.1489, 0.2347, 0.1724]) -Greedy action tensor([ 0.3509, -0.0336, -0.1986, 0.0100]) tensor([0.3368, 0.2293, 0.1944, 0.2395]) -Greedy action tensor([ 0.4703, 0.0134, -0.1622, -0.0396]) tensor([0.3617, 0.2290, 0.1921, 0.2172]) -Greedy action tensor([ 0.8641, -0.6541, 0.1170, -0.6462]) tensor([0.5226, 0.1145, 0.2475, 0.1154]) -Greedy action tensor([ 0.5787, -0.2640, -0.0148, -0.2745]) tensor([0.4151, 0.1787, 0.2293, 0.1769]) -Greedy action tensor([ 0.6431, -0.2248, -0.0695, -0.3663]) tensor([0.4396, 0.1846, 0.2156, 0.1602]) -Greedy action tensor([ 1.0223, -0.5243, 0.0600, -0.8379]) tensor([0.5712, 0.1216, 0.2182, 0.0889]) -Greedy action tensor([ 0.8121, -0.6835, -0.0319, -0.5843]) tensor([0.5259, 0.1179, 0.2261, 0.1301]) -Greedy action tensor([ 0.5660, -0.3030, 0.0027, -0.3555]) tensor([0.4190, 0.1757, 0.2385, 0.1667]) -Greedy action tensor([ 0.8738, -0.5187, -0.2235, -0.4588]) tensor([0.5417, 0.1346, 0.1808, 0.1429]) -Greedy action tensor([ 1.2377, -0.6788, 0.0893, 0.2804]) tensor([0.5411, 0.0796, 0.1716, 0.2077]) -Greedy action tensor([ 2.5876, -0.3329, -0.5157, 0.5642]) tensor([0.8123, 0.0438, 0.0365, 0.1074]) -Greedy action tensor([ 1.2302, -0.1757, 0.0216, 0.1859]) tensor([0.5275, 0.1293, 0.1575, 0.1856]) -Greedy action tensor([ 1.7588, -0.2172, -1.3609, 0.3703]) tensor([0.6982, 0.0968, 0.0308, 0.1742]) -Greedy action tensor([ 1.0753, -0.5674, 0.0383, 0.0777]) tensor([0.5217, 0.1009, 0.1850, 0.1924]) -Greedy action tensor([ 0.8565, -0.7143, 0.3163, 0.1296]) tensor([0.4398, 0.0914, 0.2562, 0.2126]) -Greedy action tensor([ 1.1819, -0.0197, -0.4691, -0.2132]) tensor([0.5746, 0.1728, 0.1102, 0.1424]) -Greedy action tensor([ 1.1363, -0.8224, -0.0692, 0.1491]) tensor([0.5515, 0.0778, 0.1652, 0.2055]) -Greedy action tensor([ 1.3292, -0.5253, -0.6716, 1.3831]) tensor([0.4261, 0.0667, 0.0576, 0.4496]) -Greedy action tensor([ 1.7240, -0.0663, -1.0120, 0.0084]) tensor([0.7084, 0.1182, 0.0459, 0.1274]) -Greedy action tensor([ 1.5286, -0.7417, -0.2443, 0.0067]) tensor([0.6705, 0.0692, 0.1139, 0.1464]) -Greedy action tensor([ 1.5714, -0.2681, -0.7380, 0.4686]) tensor([0.6289, 0.0999, 0.0625, 0.2088]) -Greedy action tensor([ 1.1991, -0.3873, -0.2280, 0.1459]) tensor([0.5576, 0.1141, 0.1338, 0.1945]) -Greedy action tensor([ 1.2457, -0.4152, -0.1432, 0.4009]) tensor([0.5351, 0.1016, 0.1334, 0.2299]) -Greedy action tensor([ 1.3147, -0.4701, 0.0070, 0.0235]) tensor([0.5837, 0.0980, 0.1579, 0.1605]) -Greedy action tensor([ 1.2808, 0.1670, -0.3799, 0.5555]) tensor([0.4994, 0.1640, 0.0949, 0.2418]) -Greedy action tensor([ 1.1152, -0.0356, -0.6382, -0.0083]) tensor([0.5511, 0.1743, 0.0954, 0.1792]) -Greedy action tensor([ 0.8092, -0.0376, -0.0837, 0.2721]) tensor([0.4128, 0.1770, 0.1690, 0.2412]) -Greedy action tensor([ 0.7967, -0.5467, -0.2631, 0.5255]) tensor([0.4220, 0.1101, 0.1462, 0.3217]) -Greedy action tensor([ 1.2786, -0.4593, -0.3490, 0.2110]) tensor([0.5827, 0.1025, 0.1144, 0.2004]) -Greedy action tensor([ 1.0853, 0.1447, -0.5159, 0.4188]) tensor([0.4749, 0.1854, 0.0958, 0.2439]) -Greedy action tensor([ 1.3262, -0.2110, -0.5210, 0.3761]) tensor([0.5684, 0.1222, 0.0896, 0.2198]) -Greedy action tensor([ 1.7147, -0.4143, -0.8511, 0.2388]) tensor([0.7021, 0.0835, 0.0540, 0.1605]) -Greedy action tensor([ 1.5199, -0.1676, -0.2800, -0.3341]) tensor([0.6636, 0.1228, 0.1097, 0.1039]) -Greedy action tensor([ 1.5796, -0.2041, -0.1081, 0.5615]) tensor([0.5834, 0.0980, 0.1079, 0.2108]) -Greedy action tensor([ 1.7143, 0.0024, -0.1559, 0.5352]) tensor([0.6090, 0.1099, 0.0938, 0.1873]) -Greedy action tensor([ 1.3147, -0.4397, -0.3200, 0.5009]) tensor([0.5521, 0.0955, 0.1077, 0.2447]) -Greedy action tensor([ 1.4948, -0.5361, -0.2788, 0.2096]) tensor([0.6339, 0.0832, 0.1076, 0.1753]) -Greedy action tensor([ 1.6801, -0.5266, -0.0854, 0.4001]) tensor([0.6414, 0.0706, 0.1097, 0.1783]) -Greedy action tensor([ 1.1252, -0.0752, -0.3483, -0.0708]) tensor([0.5457, 0.1643, 0.1250, 0.1650]) -Greedy action tensor([ 0.5240, -0.1202, 0.0382, 0.0062]) tensor([0.3655, 0.1919, 0.2248, 0.2178]) -Greedy action tensor([ 1.4799, -0.6946, -0.1667, 0.4505]) tensor([0.6011, 0.0683, 0.1158, 0.2147]) -Greedy action tensor([ 1.9534, 0.2124, -0.3702, 0.2902]) tensor([0.6836, 0.1199, 0.0669, 0.1296]) -Greedy action tensor([ 1.4996, -0.2667, -0.3139, 0.1049]) tensor([0.6321, 0.1081, 0.1031, 0.1567]) -Greedy action tensor([ 1.0485, -0.3113, -0.0043, 0.3294]) tensor([0.4778, 0.1227, 0.1667, 0.2328]) -Greedy action tensor([ 1.0056, -0.1273, -0.6195, 0.3055]) tensor([0.4961, 0.1598, 0.0977, 0.2464]) -Greedy action tensor([ 1.1451, -0.0078, -0.6692, 0.6985]) tensor([0.4721, 0.1490, 0.0769, 0.3020]) -Greedy action tensor([ 0.7335, -0.6029, 0.0603, 0.3385]) tensor([0.4087, 0.1074, 0.2085, 0.2754]) -Greedy action tensor([ 0.9670, -0.6913, -0.0802, 0.4217]) tensor([0.4715, 0.0898, 0.1655, 0.2733]) -Greedy action tensor([ 1.0819, -0.2444, -0.0799, 0.1283]) tensor([0.5092, 0.1352, 0.1594, 0.1962]) -Greedy action tensor([ 1.6075, -0.6297, -0.4743, 0.4671]) tensor([0.6447, 0.0688, 0.0804, 0.2061]) -Greedy action tensor([ 1.4597, -0.5851, -0.3017, 0.2093]) tensor([0.6299, 0.0815, 0.1082, 0.1804]) -Greedy action tensor([ 0.9554, -0.0356, -0.5744, -0.0019]) tensor([0.5072, 0.1883, 0.1098, 0.1947]) -Greedy action tensor([ 0.5310, -0.0829, -0.0101, -0.1201]) tensor([0.3781, 0.2046, 0.2201, 0.1972]) -Greedy action tensor([ 2.4844, -0.6325, -0.0683, 0.4504]) tensor([0.7981, 0.0354, 0.0621, 0.1044]) -Greedy action tensor([ 1.4036, -0.8029, -0.1074, -0.0744]) tensor([0.6415, 0.0706, 0.1416, 0.1463]) -Greedy action tensor([ 2.5759, 0.7209, -0.2025, -0.1236]) tensor([0.7777, 0.1217, 0.0483, 0.0523]) -Greedy action tensor([ 2.0847, -0.5044, -0.5454, 0.5383]) tensor([0.7352, 0.0552, 0.0530, 0.1566]) -Greedy action tensor([ 1.1919, -0.5345, -0.2695, 0.1771]) tensor([0.5642, 0.1004, 0.1309, 0.2045]) -Greedy action tensor([ 1.9750, -0.7016, -0.3890, 0.7278]) tensor([0.6896, 0.0474, 0.0649, 0.1981]) -Greedy action tensor([ 1.0979, -0.0162, -0.2914, 0.4479]) tensor([0.4763, 0.1563, 0.1187, 0.2486]) -Greedy action tensor([0.4325, 0.0590, 0.1766, 0.0246]) tensor([0.3197, 0.2201, 0.2476, 0.2126]) -Greedy action tensor([ 1.4873, -0.9980, 0.0630, 0.2379]) tensor([0.6209, 0.0517, 0.1494, 0.1780]) -Greedy action tensor([ 1.6168, -0.7167, -0.3426, 0.2471]) tensor([0.6702, 0.0650, 0.0945, 0.1704]) -Greedy action tensor([ 2.0144, -0.9333, -0.2049, 0.5010]) tensor([0.7240, 0.0380, 0.0787, 0.1594]) -Greedy action tensor([ 1.0255, -0.5993, -0.4042, 0.2375]) tensor([0.5288, 0.1041, 0.1266, 0.2405]) -Greedy action tensor([ 0.9272, -0.2035, -0.1863, 0.0409]) tensor([0.4846, 0.1564, 0.1592, 0.1998]) -Greedy action tensor([ 1.7139, -1.0378, -0.2699, 0.4534]) tensor([0.6735, 0.0430, 0.0926, 0.1909]) -Greedy action tensor([ 1.2174, -0.6750, -0.3019, 0.3785]) tensor([0.5550, 0.0837, 0.1215, 0.2399]) -Greedy action tensor([ 1.4145, -0.4088, -0.2352, 0.3629]) tensor([0.5872, 0.0948, 0.1128, 0.2051]) -Greedy action tensor([ 0.6873, -0.3561, -0.1671, 0.0051]) tensor([0.4380, 0.1543, 0.1864, 0.2214]) -Greedy action tensor([ 1.6564, 0.1562, -0.3305, -0.2195]) tensor([0.6608, 0.1474, 0.0906, 0.1012]) -Greedy action tensor([ 0.8463, -0.2890, -0.4050, 0.1892]) tensor([0.4704, 0.1512, 0.1346, 0.2438]) -Greedy action tensor([ 1.3651, -0.7447, -0.0483, 0.3208]) tensor([0.5826, 0.0706, 0.1417, 0.2050]) -Greedy action tensor([ 1.6885, -0.9379, -0.4061, 0.0875]) tensor([0.7157, 0.0518, 0.0881, 0.1444]) -Greedy action tensor([ 2.0132, -0.5601, -0.2429, 0.1120]) tensor([0.7517, 0.0573, 0.0787, 0.1123]) -Greedy action tensor([ 2.1380, -0.5086, -0.6196, 0.6587]) tensor([0.7341, 0.0520, 0.0466, 0.1672]) -Greedy action tensor([ 1.4418, -0.4560, -0.2311, 0.2137]) tensor([0.6133, 0.0919, 0.1151, 0.1796]) -Greedy action tensor([ 1.5410, -0.5383, -0.3635, 0.1050]) tensor([0.6615, 0.0827, 0.0985, 0.1573]) -Greedy action tensor([ 1.6054, -0.2632, -0.2885, 0.1804]) tensor([0.6471, 0.0999, 0.0974, 0.1556]) -Greedy action tensor([ 1.1914, 0.3723, 0.1887, -0.4136]) tensor([0.4979, 0.2195, 0.1827, 0.1000]) -Greedy action tensor([ 1.5658, -0.8531, -0.5281, 0.8608]) tensor([0.5861, 0.0522, 0.0722, 0.2896]) -Greedy action tensor([ 1.2678, -0.2690, -0.3128, 0.2398]) tensor([0.5622, 0.1209, 0.1157, 0.2011]) -Greedy action tensor([ 1.5099, 0.3369, -0.4624, 0.3824]) tensor([0.5642, 0.1746, 0.0785, 0.1827]) -Greedy action tensor([ 1.2004, -0.6671, -0.0552, -0.1162]) tensor([0.5857, 0.0905, 0.1669, 0.1570]) -Greedy action tensor([ 1.1185, -0.4903, -0.1292, 0.1448]) tensor([0.5362, 0.1073, 0.1540, 0.2025]) -Greedy action tensor([ 2.0969, -1.0172, -0.5904, 0.2000]) tensor([0.7921, 0.0352, 0.0539, 0.1188]) -Greedy action tensor([ 2.3687, -0.9308, -0.2904, 0.5640]) tensor([0.7865, 0.0290, 0.0551, 0.1294]) -Greedy action tensor([ 1.7382, -0.3493, -0.3251, 0.4503]) tensor([0.6549, 0.0812, 0.0832, 0.1807]) -Greedy action tensor([ 1.8769, -0.7238, -0.1871, 0.3187]) tensor([0.7084, 0.0526, 0.0899, 0.1491]) -Greedy action tensor([ 1.3170, -0.1818, -0.3224, 0.4730]) tensor([0.5413, 0.1209, 0.1051, 0.2327]) -Greedy action tensor([-1.8812, -0.4503, 0.6353, -0.1351]) tensor([0.0429, 0.1795, 0.5316, 0.2460]) -Greedy action tensor([-1.8901, -0.4296, 0.6351, -0.1556]) tensor([0.0426, 0.1836, 0.5324, 0.2414]) -Greedy action tensor([-1.7062, -0.4766, 0.5316, -0.0535]) tensor([0.0526, 0.1799, 0.4929, 0.2746]) -Greedy action tensor([-1.7586, 0.0483, 0.4955, -0.0691]) tensor([0.0454, 0.2765, 0.4323, 0.2458]) -Greedy action tensor([-0.3599, -0.4362, 0.2589, 0.1187]) tensor([0.1853, 0.1717, 0.3440, 0.2990]) -Greedy action tensor([-1.7407, -0.2694, 0.5411, -0.1029]) tensor([0.0493, 0.2146, 0.4826, 0.2535]) -Greedy action tensor([-1.9276, -0.4340, 0.6564, -0.1715]) tensor([0.0408, 0.1818, 0.5410, 0.2364]) -Greedy action tensor([-1.9166, -0.4669, 0.6551, -0.1681]) tensor([0.0415, 0.1769, 0.5432, 0.2385]) -Greedy action tensor([-1.5000, -0.3725, 0.4728, 0.0090]) tensor([0.0633, 0.1954, 0.4551, 0.2862]) -Greedy action tensor([-1.5526, -0.5480, 0.4624, 0.0352]) tensor([0.0620, 0.1694, 0.4652, 0.3035]) -Greedy action tensor([-1.6852, -0.5359, 0.7232, 0.2214]) tensor([0.0454, 0.1434, 0.5052, 0.3059]) -Greedy action tensor([-1.8463, -0.4563, 0.6116, -0.1164]) tensor([0.0448, 0.1798, 0.5229, 0.2525]) -Greedy action tensor([0.6862, 0.6063, 0.5293, 1.4211]) tensor([0.2056, 0.1898, 0.1758, 0.4288]) -Greedy action tensor([-1.6488, -0.3100, 0.4984, 0.0306]) tensor([0.0534, 0.2036, 0.4569, 0.2862]) -Greedy action tensor([-1.7658, -0.4873, 0.5793, -0.0382]) tensor([0.0484, 0.1739, 0.5052, 0.2725]) -Greedy action tensor([-1.7179, -0.4322, 0.6208, 0.1482]) tensor([0.0466, 0.1686, 0.4834, 0.3013]) -Greedy action tensor([-1.8763, -0.2745, 0.6029, -0.1453]) tensor([0.0425, 0.2108, 0.5069, 0.2399]) -Greedy action tensor([-1.2942, -0.2781, 0.5595, -0.6195]) tensor([0.0826, 0.2281, 0.5272, 0.1621]) -Greedy action tensor([-1.7852, -0.4633, 0.5891, -0.0915]) tensor([0.0478, 0.1792, 0.5132, 0.2598]) -Greedy action tensor([-1.9008, -0.4496, 0.6483, -0.1574]) tensor([0.0420, 0.1795, 0.5381, 0.2404]) -Greedy action tensor([-1.7369, -0.5065, 0.5718, -0.0205]) tensor([0.0499, 0.1707, 0.5019, 0.2776]) -Greedy action tensor([-0.9160, -0.2999, 1.0907, 1.4047]) tensor([0.0488, 0.0904, 0.3633, 0.4974]) -Greedy action tensor([-1.7875, -0.2508, 0.5309, -0.0784]) tensor([0.0469, 0.2179, 0.4762, 0.2590]) -Greedy action tensor([-1.6930, -0.4544, 0.5474, -0.0489]) tensor([0.0526, 0.1814, 0.4940, 0.2721]) -Greedy action tensor([-1.7926, -0.5573, 1.4520, 0.9637]) tensor([0.0218, 0.0750, 0.5597, 0.3435]) -Greedy action tensor([-1.8538, -0.2828, 0.5958, -0.1230]) tensor([0.0434, 0.2088, 0.5028, 0.2450]) -Greedy action tensor([-1.8716, -0.2582, 0.6136, -0.1784]) tensor([0.0426, 0.2140, 0.5117, 0.2318]) -Greedy action tensor([-1.6992, -0.4637, 0.5752, -0.0776]) tensor([0.0520, 0.1790, 0.5057, 0.2633]) -Greedy action tensor([-1.7177, -0.2403, 0.5251, -0.0178]) tensor([0.0493, 0.2161, 0.4646, 0.2700]) -Greedy action tensor([-1.7801, -0.4608, 0.6036, -0.0934]) tensor([0.0476, 0.1782, 0.5167, 0.2574]) -Greedy action tensor([-1.9088, -0.4363, 0.6532, -0.1501]) tensor([0.0415, 0.1807, 0.5372, 0.2406]) -Greedy action tensor([-1.4025, -0.5342, 0.8580, 0.9840]) tensor([0.0419, 0.0999, 0.4021, 0.4561]) -Greedy action tensor([-1.5844, -0.2539, 0.4829, -0.1162]) tensor([0.0587, 0.2222, 0.4642, 0.2550]) -Greedy action tensor([-0.2621, -0.5118, 0.2882, 0.7442]) tensor([0.1600, 0.1247, 0.2775, 0.4378]) -Greedy action tensor([-1.4546, -0.5347, 0.4522, 0.2091]) tensor([0.0644, 0.1617, 0.4337, 0.3402]) -Greedy action tensor([-1.8894, -0.4399, 0.6391, -0.1477]) tensor([0.0425, 0.1813, 0.5333, 0.2428]) -Greedy action tensor([-1.8844, -0.4117, 0.6411, -0.1436]) tensor([0.0424, 0.1851, 0.5304, 0.2420]) -Greedy action tensor([-1.8620, -0.4450, 0.6251, -0.1420]) tensor([0.0440, 0.1814, 0.5290, 0.2456]) -Greedy action tensor([-1.7157, -0.2280, 0.1593, -0.3601]) tensor([0.0632, 0.2797, 0.4120, 0.2451]) -Greedy action tensor([-1.9138, -0.4189, 0.6552, -0.1568]) tensor([0.0411, 0.1834, 0.5370, 0.2384]) -Greedy action tensor([-1.9412, -0.4487, 0.6665, -0.1790]) tensor([0.0403, 0.1791, 0.5462, 0.2345]) -Greedy action tensor([-1.7979, -0.4574, 0.7408, 0.0407]) tensor([0.0421, 0.1607, 0.5327, 0.2645]) -Greedy action tensor([-1.8860, -0.4482, 0.6402, -0.1395]) tensor([0.0426, 0.1796, 0.5333, 0.2445]) -Greedy action tensor([-1.7272, -0.4233, 0.5589, -0.0631]) tensor([0.0505, 0.1860, 0.4968, 0.2667]) -Greedy action tensor([-1.8737, -0.4404, 0.6354, -0.1448]) tensor([0.0433, 0.1813, 0.5317, 0.2437]) -Greedy action tensor([-1.8446, -0.3471, 0.6017, -0.1168]) tensor([0.0442, 0.1974, 0.5099, 0.2485]) -Greedy action tensor([-1.6947, -0.4680, 0.5352, -0.0444]) tensor([0.0529, 0.1803, 0.4916, 0.2753]) -Greedy action tensor([-1.8177, -0.2464, 0.5789, -0.1027]) tensor([0.0447, 0.2153, 0.4914, 0.2486]) -Greedy action tensor([-1.1958, 0.3956, 0.5311, 0.5036]) tensor([0.0588, 0.2888, 0.3307, 0.3217]) -Greedy action tensor([-1.6339, -0.5532, 0.5084, -0.0474]) tensor([0.0576, 0.1698, 0.4909, 0.2816]) -Greedy action tensor([-1.0995, -0.7097, 0.4625, 0.3269]) tensor([0.0876, 0.1294, 0.4180, 0.3650]) -Greedy action tensor([-1.7848, -0.4418, 0.6434, 0.0025]) tensor([0.0452, 0.1730, 0.5121, 0.2698]) -Greedy action tensor([-1.7625, -0.4407, 0.5861, -0.0769]) tensor([0.0485, 0.1819, 0.5079, 0.2617]) -Greedy action tensor([-0.9550, -0.8723, 1.0826, 1.3903]) tensor([0.0495, 0.0538, 0.3799, 0.5168]) -Greedy action tensor([-1.2792, -0.6158, 0.3349, 0.1196]) tensor([0.0832, 0.1616, 0.4181, 0.3371]) -Greedy action tensor([-1.9473, -0.4528, 0.6661, -0.1825]) tensor([0.0401, 0.1787, 0.5471, 0.2341]) -Greedy action tensor([-1.9417, -0.4468, 0.6674, -0.1761]) tensor([0.0402, 0.1791, 0.5459, 0.2348]) -Greedy action tensor([-0.8961, 0.5239, 0.2628, -0.4147]) tensor([0.1006, 0.4161, 0.3205, 0.1628]) -Greedy action tensor([-1.7217, -0.4327, 0.7148, 0.1684]) tensor([0.0441, 0.1600, 0.5040, 0.2919]) -Greedy action tensor([-1.9065, -0.3152, 0.6255, -0.1555]) tensor([0.0412, 0.2025, 0.5187, 0.2376]) -Greedy action tensor([-1.5963, 0.2877, 0.2888, -0.1035]) tensor([0.0537, 0.3535, 0.3538, 0.2390]) -Greedy action tensor([-1.5272, -0.5167, 1.4719, 1.0473]) tensor([0.0271, 0.0744, 0.5433, 0.3553]) -Greedy action tensor([-1.2851, 0.6804, 0.2798, -0.0166]) tensor([0.0607, 0.4333, 0.2903, 0.2158]) -Greedy action tensor([-0.6263, -0.5313, 0.1611, 0.1145]) tensor([0.1564, 0.1720, 0.3437, 0.3280]) -Greedy action tensor([-1.8389, -0.4664, 0.6754, -0.0524]) tensor([0.0430, 0.1695, 0.5310, 0.2565]) -Greedy action tensor([-1.7941, -0.2485, 0.5668, -0.0466]) tensor([0.0454, 0.2129, 0.4812, 0.2606]) -Greedy action tensor([-1.5492, -0.5468, 0.4500, 0.0779]) tensor([0.0617, 0.1682, 0.4558, 0.3142]) -Greedy action tensor([-1.8648, -0.3404, 0.6098, -0.1473]) tensor([0.0434, 0.1993, 0.5155, 0.2418]) -Greedy action tensor([-1.5654, -0.4089, 0.4667, -0.0028]) tensor([0.0603, 0.1917, 0.4602, 0.2878]) -Greedy action tensor([-1.8508, -0.3595, 0.6203, -0.1197]) tensor([0.0436, 0.1938, 0.5163, 0.2463]) -Greedy action tensor([-1.7010, -0.2395, 0.5546, 0.1057]) tensor([0.0477, 0.2059, 0.4556, 0.2908]) -Greedy action tensor([-1.9179, -0.4019, 0.6552, -0.1509]) tensor([0.0408, 0.1858, 0.5346, 0.2388]) -Greedy action tensor([-1.4186, -0.2269, 0.6951, 0.4179]) tensor([0.0531, 0.1747, 0.4393, 0.3329]) -Greedy action tensor([-1.7316, -0.5265, 0.5878, -0.0481]) tensor([0.0503, 0.1678, 0.5113, 0.2707]) -Greedy action tensor([-1.4403, 0.1725, 0.5758, -0.4836]) tensor([0.0620, 0.3111, 0.4656, 0.1614]) -Greedy action tensor([-0.0566, 0.3922, 0.1954, 0.3932]) tensor([0.1845, 0.2890, 0.2373, 0.2892]) -Greedy action tensor([-1.8428, -0.4721, 0.6247, -0.1103]) tensor([0.0447, 0.1759, 0.5268, 0.2526]) -Greedy action tensor([-1.7818, -0.4584, 0.5885, -0.0926]) tensor([0.0479, 0.1800, 0.5127, 0.2594]) -Greedy action tensor([-1.8749, -0.1508, 0.5885, -0.1512]) tensor([0.0417, 0.2341, 0.4902, 0.2340]) -Greedy action tensor([-1.9461, -0.4563, 0.6710, -0.1812]) tensor([0.0400, 0.1776, 0.5484, 0.2339]) -Greedy action tensor([-1.8635, -0.4623, 0.6374, -0.1303]) tensor([0.0436, 0.1772, 0.5322, 0.2470]) -Greedy action tensor([ 0.2459, 0.2196, -0.4563, 0.9731]) tensor([0.2203, 0.2146, 0.1092, 0.4559]) -Greedy action tensor([ 0.0621, -1.4281, 0.9251, -1.1435]) tensor([0.2567, 0.0579, 0.6085, 0.0769]) -Greedy action tensor([-1.4377, -1.3562, 0.5861, -0.7615]) tensor([0.0861, 0.0934, 0.6513, 0.1693]) -Greedy action tensor([0.4295, 0.5401, 0.6652, 0.0117]) tensor([0.2475, 0.2764, 0.3132, 0.1629]) -Greedy action tensor([-1.2588, -1.8756, -0.7343, 0.2416]) tensor([0.1297, 0.0700, 0.2191, 0.5813]) -Greedy action tensor([1.5149, 0.0670, 0.7133, 0.9235]) tensor([0.4470, 0.1051, 0.2005, 0.2474]) -Greedy action tensor([ 0.8841, -0.4109, 0.0663, 1.0151]) tensor([0.3502, 0.0959, 0.1546, 0.3993]) -Greedy action tensor([-0.0727, -0.9730, 1.0683, 1.5056]) tensor([0.1066, 0.0433, 0.3336, 0.5165]) -Greedy action tensor([ 0.7101, -0.0717, 0.3578, 0.2634]) tensor([0.3571, 0.1634, 0.2511, 0.2284]) -Greedy action tensor([ 0.5833, -0.4519, -0.0149, 0.2326]) tensor([0.3833, 0.1361, 0.2107, 0.2699]) -Greedy action tensor([1.2592, 0.3144, 0.3165, 0.7753]) tensor([0.4176, 0.1623, 0.1627, 0.2574]) -Greedy action tensor([-1.2191, -0.1881, 0.4138, -0.8012]) tensor([0.0958, 0.2685, 0.4902, 0.1455]) -Greedy action tensor([ 0.5051, -0.0900, 0.0841, 0.5774]) tensor([0.3046, 0.1680, 0.2000, 0.3274]) -Greedy action tensor([ 0.1417, -0.9715, 1.1019, -0.6769]) tensor([0.2282, 0.0750, 0.5962, 0.1007]) -Greedy action tensor([-0.7369, -1.6566, -0.4877, 0.4555]) tensor([0.1673, 0.0667, 0.2147, 0.5513]) -Greedy action tensor([-0.6026, -0.6556, 1.8570, -1.0395]) tensor([0.0700, 0.0663, 0.8185, 0.0452]) -Greedy action tensor([1.1736, 0.4159, 0.8128, 1.4686]) tensor([0.2850, 0.1336, 0.1987, 0.3828]) -Greedy action tensor([ 0.1121, -0.2958, -0.7378, 0.9518]) tensor([0.2268, 0.1509, 0.0970, 0.5253]) -Greedy action tensor([-0.3854, -1.0463, 0.4489, 0.4098]) tensor([0.1657, 0.0856, 0.3817, 0.3670]) -Greedy action tensor([-0.0359, -2.1787, -0.8078, 0.1338]) tensor([0.3617, 0.0424, 0.1672, 0.4287]) -Greedy action tensor([ 1.1308, -1.3315, 0.1283, 0.3849]) tensor([0.5191, 0.0442, 0.1905, 0.2462]) -Greedy action tensor([ 0.4798, -1.1895, -0.0491, 0.7655]) tensor([0.3217, 0.0606, 0.1896, 0.4281]) -Greedy action tensor([0.1689, 0.8240, 0.3406, 0.4706]) tensor([0.1830, 0.3523, 0.2173, 0.2474]) -Greedy action tensor([ 0.4246, -0.7661, 0.4354, 1.1417]) tensor([0.2292, 0.0697, 0.2317, 0.4695]) -Greedy action tensor([ 0.0914, -0.1834, 1.0375, -0.9479]) tensor([0.2133, 0.1620, 0.5493, 0.0754]) -Greedy action tensor([ 0.6167, 0.1729, -0.9935, 0.1342]) tensor([0.4067, 0.2610, 0.0813, 0.2510]) -Greedy action tensor([ 0.9559, -0.2935, -0.1269, 0.4984]) tensor([0.4428, 0.1270, 0.1500, 0.2803]) -Greedy action tensor([-0.4924, 0.1349, -0.8061, -0.3734]) tensor([0.2114, 0.3959, 0.1545, 0.2381]) -Greedy action tensor([0.5365, 0.7451, 0.5762, 0.8694]) tensor([0.2142, 0.2639, 0.2229, 0.2989]) -Greedy action tensor([ 1.4116, -1.6863, 1.1541, 1.1924]) tensor([0.3815, 0.0172, 0.2949, 0.3064]) -Greedy action tensor([0.4633, 0.4563, 0.6846, 0.1490]) tensor([0.2518, 0.2501, 0.3142, 0.1839]) -Greedy action tensor([ 1.0016, -1.0354, 0.4222, 0.7690]) tensor([0.4027, 0.0525, 0.2256, 0.3191]) -Greedy action tensor([ 1.3396, 0.0227, 1.1478, -0.1995]) tensor([0.4333, 0.1161, 0.3577, 0.0930]) -Greedy action tensor([-0.2507, -0.3492, 0.8872, -0.3240]) tensor([0.1679, 0.1522, 0.5239, 0.1560]) -Greedy action tensor([ 0.0991, 0.3251, 0.8173, -0.7183]) tensor([0.2107, 0.2641, 0.4321, 0.0930]) -Greedy action tensor([-1.2979, -0.8036, -0.6185, -0.6689]) tensor([0.1541, 0.2527, 0.3041, 0.2891]) -Greedy action tensor([-1.1805, -0.6865, -1.3104, -0.0018]) tensor([0.1478, 0.2422, 0.1298, 0.4803]) -Greedy action tensor([-0.1170, -0.4873, -0.7467, 0.3249]) tensor([0.2646, 0.1827, 0.1410, 0.4117]) -Greedy action tensor([ 0.7610, -0.6114, 0.7500, 1.0246]) tensor([0.2822, 0.0715, 0.2791, 0.3672]) -Greedy action tensor([-0.0033, 0.5376, 1.3196, -0.6544]) tensor([0.1430, 0.2456, 0.5368, 0.0746]) -Greedy action tensor([ 0.7689, -0.2770, 1.2146, -0.1855]) tensor([0.3032, 0.1065, 0.4735, 0.1168]) -Greedy action tensor([ 0.9051, -0.2652, -0.3545, 0.2869]) tensor([0.4688, 0.1455, 0.1330, 0.2527]) -Greedy action tensor([ 0.6060, 0.1730, -0.0357, 0.1055]) tensor([0.3596, 0.2332, 0.1893, 0.2180]) -Greedy action tensor([ 1.2357, 0.2435, -1.1280, 0.8219]) tensor([0.4704, 0.1744, 0.0442, 0.3110]) -Greedy action tensor([-0.6538, -1.0908, 1.5120, 1.2272]) tensor([0.0591, 0.0382, 0.5152, 0.3875]) -Greedy action tensor([ 0.9020, -1.5796, 0.1324, 0.8771]) tensor([0.3965, 0.0331, 0.1836, 0.3867]) -Greedy action tensor([-0.8062, 0.5336, -0.3161, 0.2109]) tensor([0.1085, 0.4143, 0.1771, 0.3000]) -Greedy action tensor([-1.0380, -1.5364, -0.0481, -0.7950]) tensor([0.1794, 0.1090, 0.4828, 0.2288]) -Greedy action tensor([-0.4153, 0.1697, 0.2018, -0.0780]) tensor([0.1653, 0.2967, 0.3064, 0.2316]) -Greedy action tensor([ 0.9044, -1.2348, -0.5725, 0.2875]) tensor([0.5303, 0.0624, 0.1211, 0.2862]) -Greedy action tensor([ 0.7773, -0.7583, -0.4127, 0.4524]) tensor([0.4460, 0.0960, 0.1357, 0.3223]) -Greedy action tensor([-0.3297, 0.7070, 1.0415, -0.8255]) tensor([0.1195, 0.3369, 0.4708, 0.0728]) -Greedy action tensor([-0.5151, 0.1112, 0.4021, 0.5679]) tensor([0.1201, 0.2247, 0.3005, 0.3547]) -Greedy action tensor([ 0.8056, -1.2276, -0.0845, 1.2163]) tensor([0.3279, 0.0429, 0.1347, 0.4945]) -Greedy action tensor([ 1.1410, -0.3863, 0.5022, -0.5026]) tensor([0.5159, 0.1120, 0.2724, 0.0997]) -Greedy action tensor([ 0.0476, -0.2230, -0.2668, 0.3689]) tensor([0.2583, 0.1970, 0.1886, 0.3561]) -Greedy action tensor([ 1.8466, -1.4455, -0.2426, 0.8019]) tensor([0.6610, 0.0246, 0.0818, 0.2326]) -Greedy action tensor([ 0.1536, -0.3090, 0.6452, 0.3824]) tensor([0.2212, 0.1392, 0.3616, 0.2780]) -Greedy action tensor([ 0.1875, -1.6845, 0.8110, 1.3307]) tensor([0.1624, 0.0250, 0.3030, 0.5096]) -Greedy action tensor([ 0.3549, -0.8537, -0.3883, 0.7616]) tensor([0.3053, 0.0912, 0.1452, 0.4584]) -Greedy action tensor([ 0.9276, -0.7117, -0.0163, 0.6178]) tensor([0.4316, 0.0838, 0.1679, 0.3166]) -Greedy action tensor([ 2.2478, -1.0474, -0.0404, 0.6490]) tensor([0.7459, 0.0276, 0.0757, 0.1508]) -Greedy action tensor([ 0.3333, -0.2948, 0.4800, 0.4144]) tensor([0.2648, 0.1413, 0.3067, 0.2872]) -Greedy action tensor([ 1.0975, -0.4839, -0.3923, 0.3929]) tensor([0.5194, 0.1068, 0.1171, 0.2567]) -Greedy action tensor([-0.3530, -1.7361, 0.0641, -0.9686]) tensor([0.3022, 0.0758, 0.4587, 0.1633]) -Greedy action tensor([-0.3385, -0.3894, -0.9808, 0.9145]) tensor([0.1673, 0.1590, 0.0880, 0.5857]) -Greedy action tensor([-0.7618, -1.2556, 0.3431, -1.5565]) tensor([0.1968, 0.1201, 0.5941, 0.0889]) -Greedy action tensor([ 1.1667, -0.7483, 1.2993, 1.5752]) tensor([0.2636, 0.0388, 0.3010, 0.3966]) -Greedy action tensor([-0.2928, 0.7309, -0.1923, -0.2179]) tensor([0.1676, 0.4665, 0.1853, 0.1806]) -Greedy action tensor([ 0.1725, -0.9086, -0.4601, 1.3922]) tensor([0.1902, 0.0645, 0.1010, 0.6442]) -Greedy action tensor([ 0.5599, -0.1562, 0.7170, -0.3757]) tensor([0.3278, 0.1602, 0.3835, 0.1286]) -Greedy action tensor([-1.1853, -0.3998, -0.2545, -0.4229]) tensor([0.1270, 0.2786, 0.3222, 0.2722]) -Greedy action tensor([-0.2312, -1.3880, 1.2432, 0.6624]) tensor([0.1231, 0.0387, 0.5375, 0.3007]) -Greedy action tensor([-0.0632, 0.9132, -0.3051, 1.5412]) tensor([0.1062, 0.2820, 0.0834, 0.5284]) -Greedy action tensor([-0.8490, -1.3008, -0.1371, -0.5240]) tensor([0.1977, 0.1258, 0.4029, 0.2736]) -Greedy action tensor([ 1.5662, -0.2260, 0.2686, 0.8836]) tensor([0.5141, 0.0856, 0.1404, 0.2598]) -Greedy action tensor([-0.7991, -0.7843, -1.0122, 0.0557]) tensor([0.1933, 0.1962, 0.1562, 0.4544]) -Greedy action tensor([-0.6067, -1.0161, 0.5687, 1.1816]) tensor([0.0919, 0.0610, 0.2977, 0.5494]) -Greedy action tensor([ 0.9251, 0.2337, -0.3067, 0.1232]) tensor([0.4462, 0.2235, 0.1302, 0.2001]) -Greedy action tensor([ 0.0304, -1.2730, 0.1943, 0.6612]) tensor([0.2310, 0.0627, 0.2722, 0.4341]) -Greedy action tensor([ 0.1630, -1.8988, 0.3979, 0.5053]) tensor([0.2631, 0.0335, 0.3328, 0.3705]) -Greedy action tensor([ 1.8146, -1.0603, -0.2915, 0.7334]) tensor([0.6591, 0.0372, 0.0802, 0.2235]) -Greedy action tensor([ 0.8755, -0.4567, -0.0871, -0.2140]) tensor([0.5045, 0.1331, 0.1927, 0.1697]) -Greedy action tensor([ 0.7763, -0.6602, 0.0224, -0.5839]) tensor([0.5089, 0.1210, 0.2395, 0.1306]) -Greedy action tensor([ 0.5614, -0.1855, -0.0712, -0.1010]) tensor([0.3967, 0.1880, 0.2107, 0.2046]) -Greedy action tensor([ 0.6835, -0.3059, 0.0487, -0.2691]) tensor([0.4372, 0.1625, 0.2317, 0.1686]) -Greedy action tensor([ 0.6610, -0.2745, -0.0991, -0.1569]) tensor([0.4345, 0.1705, 0.2032, 0.1918]) -Greedy action tensor([ 0.4392, -0.2416, 0.0116, -0.2208]) tensor([0.3738, 0.1892, 0.2437, 0.1932]) -Greedy action tensor([ 0.9338, -0.5481, -0.2129, -0.1381]) tensor([0.5299, 0.1204, 0.1683, 0.1814]) -Greedy action tensor([ 0.9815, -0.6794, -0.0367, -0.3562]) tensor([0.5514, 0.1047, 0.1992, 0.1447]) -Greedy action tensor([ 1.0951, -0.9516, 0.0723, -0.4444]) tensor([0.5871, 0.0758, 0.2111, 0.1259]) -Greedy action tensor([ 0.4813, -0.2463, 0.0490, -0.4385]) tensor([0.3952, 0.1909, 0.2565, 0.1575]) -Greedy action tensor([ 1.0152, -0.3596, 0.2071, -0.4609]) tensor([0.5189, 0.1312, 0.2313, 0.1186]) -Greedy action tensor([ 0.2921, 0.2226, 0.0087, -0.2750]) tensor([0.3074, 0.2867, 0.2315, 0.1743]) -Greedy action tensor([ 0.6032, -0.4712, 0.0032, -0.3440]) tensor([0.4389, 0.1499, 0.2409, 0.1702]) -Greedy action tensor([ 0.8412, -0.5162, -0.0868, -0.3095]) tensor([0.5079, 0.1307, 0.2008, 0.1607]) -Greedy action tensor([ 0.3549, 0.0696, -0.0072, -0.4387]) tensor([0.3448, 0.2592, 0.2401, 0.1559]) -Greedy action tensor([ 0.7773, -0.3093, 0.0811, -0.4578]) tensor([0.4702, 0.1586, 0.2344, 0.1367]) -Greedy action tensor([ 0.6171, -0.0864, 0.1111, -0.2142]) tensor([0.3947, 0.1953, 0.2380, 0.1719]) -Greedy action tensor([ 1.1815, -0.8206, 0.0281, -0.7964]) tensor([0.6293, 0.0850, 0.1986, 0.0871]) -Greedy action tensor([ 0.7785, -0.6954, -0.0218, -0.3898]) tensor([0.5027, 0.1151, 0.2258, 0.1563]) -Greedy action tensor([ 0.6767, -0.2775, -0.0244, -0.1867]) tensor([0.4342, 0.1672, 0.2154, 0.1831]) -Greedy action tensor([ 0.7939, -0.1450, -0.0526, -0.0676]) tensor([0.4459, 0.1744, 0.1913, 0.1884]) -Greedy action tensor([ 0.7819, -0.4050, -0.1796, -0.1208]) tensor([0.4778, 0.1458, 0.1827, 0.1937]) -Greedy action tensor([ 0.5961, -0.4334, -0.1461, -0.4852]) tensor([0.4603, 0.1644, 0.2191, 0.1561]) -Greedy action tensor([ 0.5876, -0.4603, 0.0032, -0.3252]) tensor([0.4330, 0.1518, 0.2414, 0.1738]) -Greedy action tensor([ 1.0129, -0.3699, -0.0378, -0.3491]) tensor([0.5386, 0.1351, 0.1883, 0.1380]) -Greedy action tensor([ 0.3520, 0.0837, -0.0735, 0.0500]) tensor([0.3167, 0.2422, 0.2070, 0.2342]) -Greedy action tensor([ 1.2138, -0.5606, -0.0374, -0.3604]) tensor([0.6013, 0.1020, 0.1721, 0.1246]) -Greedy action tensor([ 0.7399, -0.3153, 0.0644, -0.3626]) tensor([0.4568, 0.1590, 0.2325, 0.1517]) -Greedy action tensor([ 0.8988, -0.5718, -0.0739, -0.3443]) tensor([0.5273, 0.1212, 0.1994, 0.1521]) -Greedy action tensor([ 0.5884, -0.0812, -0.0993, -0.2111]) tensor([0.4058, 0.2077, 0.2040, 0.1824]) -Greedy action tensor([ 0.8536, -0.7555, -0.0387, -0.3920]) tensor([0.5270, 0.1054, 0.2159, 0.1517]) -Greedy action tensor([ 0.5723, -0.0292, 0.0781, -0.3553]) tensor([0.3916, 0.2146, 0.2389, 0.1549]) -Greedy action tensor([ 0.7595, 0.0627, 0.1336, -0.6329]) tensor([0.4383, 0.2184, 0.2344, 0.1089]) -Greedy action tensor([ 0.5152, 0.1003, -0.1396, -0.3383]) tensor([0.3837, 0.2534, 0.1994, 0.1634]) -Greedy action tensor([ 0.8064, -0.4663, -0.0656, -0.5447]) tensor([0.5110, 0.1431, 0.2136, 0.1323]) -Greedy action tensor([ 0.6824, -0.5137, -0.0359, -0.2270]) tensor([0.4560, 0.1379, 0.2224, 0.1837]) -Greedy action tensor([ 0.6596, -0.4154, -0.0100, -0.2035]) tensor([0.4395, 0.1500, 0.2250, 0.1854]) -Greedy action tensor([ 0.3827, -0.1264, 0.0107, -0.2882]) tensor([0.3569, 0.2145, 0.2461, 0.1825]) -Greedy action tensor([ 0.2839, 0.0458, -0.0588, -0.2925]) tensor([0.3268, 0.2576, 0.2320, 0.1836]) -Greedy action tensor([ 0.6585, -0.6863, -0.0427, -0.2703]) tensor([0.4648, 0.1211, 0.2305, 0.1836]) -Greedy action tensor([ 0.8124, -0.6440, -0.0793, -0.1547]) tensor([0.4943, 0.1152, 0.2026, 0.1879]) -Greedy action tensor([ 0.9646, -0.7263, 0.0101, -0.4359]) tensor([0.5507, 0.1015, 0.2120, 0.1357]) -Greedy action tensor([ 0.5041, -0.0058, -0.0020, -0.2055]) tensor([0.3710, 0.2228, 0.2237, 0.1825]) -Greedy action tensor([ 0.2661, 0.0932, -0.0747, -0.1739]) tensor([0.3129, 0.2632, 0.2225, 0.2015]) -Greedy action tensor([ 0.7391, -0.4707, 0.0292, -0.5239]) tensor([0.4825, 0.1439, 0.2372, 0.1364]) -Greedy action tensor([ 1.0061, -0.9247, 0.0616, -0.6743]) tensor([0.5813, 0.0843, 0.2261, 0.1083]) -Greedy action tensor([ 0.6143, -0.2400, -0.0597, -0.2004]) tensor([0.4205, 0.1790, 0.2143, 0.1862]) -Greedy action tensor([ 0.7473, -0.2556, -0.0314, -0.1615]) tensor([0.4487, 0.1646, 0.2059, 0.1808]) -Greedy action tensor([ 1.2057, -0.9196, 0.0573, -0.6777]) tensor([0.6295, 0.0752, 0.1996, 0.0957]) -Greedy action tensor([ 0.7102, -0.7476, 0.1701, -0.7237]) tensor([0.4869, 0.1133, 0.2837, 0.1161]) -Greedy action tensor([ 0.4221, -0.2870, -0.0920, -0.0635]) tensor([0.3696, 0.1819, 0.2210, 0.2274]) -Greedy action tensor([ 0.9392, -0.7896, -0.0508, -0.5569]) tensor([0.5640, 0.1001, 0.2096, 0.1263]) -Greedy action tensor([ 0.5930, 0.1102, -0.1266, -0.0995]) tensor([0.3840, 0.2369, 0.1870, 0.1921]) -Greedy action tensor([ 0.8673, -0.3444, -0.0801, -0.1909]) tensor([0.4920, 0.1465, 0.1908, 0.1708]) -Greedy action tensor([ 0.7378, -0.5544, -0.1456, -0.1785]) tensor([0.4789, 0.1315, 0.1980, 0.1916]) -Greedy action tensor([ 0.6212, -0.2646, 0.0395, -0.5340]) tensor([0.4374, 0.1804, 0.2445, 0.1378]) -Greedy action tensor([ 0.8538, -0.5902, -0.0892, -0.3236]) tensor([0.5172, 0.1221, 0.2014, 0.1593]) -Greedy action tensor([ 0.6179, -0.5545, -0.0968, -0.1838]) tensor([0.4449, 0.1378, 0.2177, 0.1996]) -Greedy action tensor([ 0.2318, -0.2895, -0.0494, -0.2592]) tensor([0.3378, 0.2005, 0.2550, 0.2067]) -Greedy action tensor([ 0.5839, -0.3262, 0.0163, -0.7874]) tensor([0.4498, 0.1810, 0.2550, 0.1142]) -Greedy action tensor([ 0.6006, -0.5709, -0.0387, -0.3166]) tensor([0.4470, 0.1385, 0.2359, 0.1786]) -Greedy action tensor([0.1538, 0.3539, 0.0597, 0.1821]) tensor([0.2404, 0.2936, 0.2188, 0.2473]) -Greedy action tensor([ 1.1492, -0.9837, 0.0813, -0.5831]) tensor([0.6101, 0.0723, 0.2097, 0.1079]) -Greedy action tensor([ 0.6788, -0.3272, -0.0113, -0.2432]) tensor([0.4415, 0.1615, 0.2214, 0.1756]) -Greedy action tensor([ 0.2049, 0.0199, -0.0860, -0.4947]) tensor([0.3252, 0.2702, 0.2431, 0.1615]) -Greedy action tensor([ 0.8960, -0.5785, 0.1848, -0.7552]) tensor([0.5231, 0.1197, 0.2569, 0.1003]) -Greedy action tensor([ 0.9470, -0.4989, -0.2436, -0.5397]) tensor([0.5664, 0.1334, 0.1722, 0.1281]) -Greedy action tensor([ 0.5925, -0.2185, -0.1782, -0.1297]) tensor([0.4179, 0.1857, 0.1934, 0.2030]) -Greedy action tensor([ 0.4837, 0.1573, -0.0353, -0.1011]) tensor([0.3480, 0.2511, 0.2071, 0.1939]) -Greedy action tensor([ 0.8580, 0.2840, -0.0769, 0.2282]) tensor([0.4018, 0.2263, 0.1578, 0.2141]) -Greedy action tensor([ 0.8669, -0.2165, 0.0045, -0.1314]) tensor([0.4697, 0.1590, 0.1983, 0.1731]) -Greedy action tensor([ 0.2614, -0.0087, -0.0923, -0.4272]) tensor([0.3370, 0.2572, 0.2366, 0.1692]) -Greedy action tensor([ 0.4810, -0.1851, -0.0328, -0.2400]) tensor([0.3849, 0.1977, 0.2302, 0.1872]) -Greedy action tensor([ 0.7064, -0.5597, -0.0663, -0.3776]) tensor([0.4803, 0.1354, 0.2218, 0.1625]) -Greedy action tensor([ 0.8659, -0.6607, 0.0773, -0.7489]) tensor([0.5346, 0.1161, 0.2429, 0.1063]) -Greedy action tensor([ 0.6064, -0.1541, -0.1064, -0.1348]) tensor([0.4108, 0.1920, 0.2014, 0.1958]) -Greedy action tensor([ 0.7370, -0.4353, 0.1351, -0.5271]) tensor([0.4673, 0.1447, 0.2560, 0.1320]) -Greedy action tensor([ 0.8085, -0.7950, 0.1170, -0.7340]) tensor([0.5220, 0.1050, 0.2614, 0.1116]) -Greedy action tensor([ 0.6432, -0.6685, -0.0445, -0.1429]) tensor([0.4489, 0.1209, 0.2257, 0.2045]) -Greedy action tensor([ 0.6782, -0.5009, -0.1352, -0.1466]) tensor([0.4568, 0.1405, 0.2025, 0.2002]) -Greedy action tensor([ 0.6581, -0.4382, -0.0239, -0.3118]) tensor([0.4507, 0.1506, 0.2279, 0.1709]) -Greedy action tensor([ 1.5714, -0.5879, -0.3225, 0.7377]) tensor([0.5881, 0.0679, 0.0885, 0.2555]) -Greedy action tensor([ 1.1561, -0.3290, 0.1145, 0.1431]) tensor([0.5148, 0.1166, 0.1817, 0.1869]) -Greedy action tensor([ 1.6928, -0.6178, -0.3685, 0.2385]) tensor([0.6849, 0.0679, 0.0872, 0.1600]) -Greedy action tensor([ 0.5536, -0.1254, -0.1360, 0.1776]) tensor([0.3710, 0.1881, 0.1862, 0.2547]) -Greedy action tensor([ 1.2609, 0.3150, -0.4650, -0.2305]) tensor([0.5582, 0.2168, 0.0994, 0.1256]) -Greedy action tensor([ 1.8356, -0.7184, -0.2198, 0.6184]) tensor([0.6658, 0.0518, 0.0853, 0.1971]) -Greedy action tensor([ 1.3723, -0.7145, -0.5932, 0.5779]) tensor([0.5827, 0.0723, 0.0816, 0.2633]) -Greedy action tensor([ 1.4556, -0.7462, -0.2224, 0.3011]) tensor([0.6201, 0.0686, 0.1158, 0.1955]) -Greedy action tensor([ 1.5337, -1.0123, -0.2665, 0.2053]) tensor([0.6629, 0.0520, 0.1096, 0.1756]) -Greedy action tensor([ 1.7790, -0.1855, -0.4978, 0.1162]) tensor([0.6981, 0.0979, 0.0716, 0.1324]) -Greedy action tensor([ 1.4087, -0.3802, -0.3532, 0.0521]) tensor([0.6264, 0.1047, 0.1076, 0.1613]) -Greedy action tensor([ 0.9308, -0.2250, -0.0180, 0.2807]) tensor([0.4496, 0.1415, 0.1741, 0.2347]) -Greedy action tensor([ 1.5163, -0.6399, -0.1503, 0.2523]) tensor([0.6301, 0.0729, 0.1190, 0.1780]) -Greedy action tensor([ 1.3366, -0.5478, -0.2630, 0.2257]) tensor([0.5941, 0.0903, 0.1200, 0.1956]) -Greedy action tensor([ 1.1512, -0.4183, -0.0885, 0.1688]) tensor([0.5342, 0.1112, 0.1546, 0.2000]) -Greedy action tensor([ 1.0909, -0.2711, -0.4704, 0.2762]) tensor([0.5239, 0.1342, 0.1099, 0.2320]) -Greedy action tensor([ 1.6418, -0.1800, -0.2397, 0.9087]) tensor([0.5573, 0.0901, 0.0849, 0.2677]) -Greedy action tensor([ 0.7064, -0.4226, 0.3020, -0.1200]) tensor([0.4118, 0.1332, 0.2748, 0.1802]) -Greedy action tensor([ 1.8784, -0.7612, -0.1570, 0.4897]) tensor([0.6890, 0.0492, 0.0900, 0.1718]) -Greedy action tensor([ 1.6144, -0.6471, -0.3438, 0.1145]) tensor([0.6810, 0.0710, 0.0961, 0.1520]) -Greedy action tensor([ 1.5259, -0.4814, -0.0177, 0.2113]) tensor([0.6186, 0.0831, 0.1321, 0.1661]) -Greedy action tensor([ 1.9021, 0.0225, -0.2630, 0.5030]) tensor([0.6604, 0.1008, 0.0758, 0.1630]) -Greedy action tensor([ 1.1718, -0.2669, -0.2793, 0.6764]) tensor([0.4806, 0.1140, 0.1126, 0.2928]) -Greedy action tensor([ 1.1849, -0.5933, 0.2080, 0.0558]) tensor([0.5351, 0.0904, 0.2015, 0.1730]) -Greedy action tensor([ 1.5418, -0.4598, -0.3828, 0.3683]) tensor([0.6288, 0.0850, 0.0918, 0.1945]) -Greedy action tensor([ 1.5897, -0.2585, -0.3982, 0.2579]) tensor([0.6416, 0.1011, 0.0879, 0.1694]) -Greedy action tensor([ 1.3639, -0.2101, -0.2381, 0.1742]) tensor([0.5838, 0.1210, 0.1176, 0.1776]) -Greedy action tensor([ 2.0546, -0.8941, -0.3985, 0.8954]) tensor([0.6886, 0.0361, 0.0592, 0.2160]) -Greedy action tensor([ 1.6973, -0.7983, -0.4507, 0.4536]) tensor([0.6723, 0.0554, 0.0785, 0.1938]) -Greedy action tensor([ 0.3061, 0.0490, -0.0428, 0.0048]) tensor([0.3107, 0.2403, 0.2192, 0.2299]) -Greedy action tensor([ 1.4241, -0.1288, -0.4843, -0.2798]) tensor([0.6485, 0.1373, 0.0962, 0.1180]) -Greedy action tensor([ 1.0503, -0.5743, -0.1023, 0.6517]) tensor([0.4579, 0.0902, 0.1446, 0.3073]) -Greedy action tensor([ 1.7369, 0.5562, 0.1679, -0.7199]) tensor([0.6246, 0.1918, 0.1301, 0.0535]) -Greedy action tensor([-0.1047, -0.3433, 0.1356, 0.0749]) tensor([0.2350, 0.1851, 0.2988, 0.2812]) -Greedy action tensor([ 1.4989, -0.6847, -0.1843, 0.3098]) tensor([0.6239, 0.0703, 0.1159, 0.1900]) -Greedy action tensor([ 1.4245, -0.6704, 0.0361, 0.3926]) tensor([0.5784, 0.0712, 0.1443, 0.2061]) -Greedy action tensor([ 1.0527, -0.2742, -0.1467, 0.1566]) tensor([0.5064, 0.1343, 0.1526, 0.2067]) -Greedy action tensor([ 1.3413, 0.2368, -0.4979, 0.2868]) tensor([0.5439, 0.1802, 0.0864, 0.1895]) -Greedy action tensor([ 0.8083, -0.6638, -0.3355, 0.4966]) tensor([0.4385, 0.1006, 0.1397, 0.3211]) -Greedy action tensor([ 1.6151, -0.8942, -0.0548, 0.4077]) tensor([0.6375, 0.0519, 0.1200, 0.1906]) -Greedy action tensor([ 1.0639, -0.5338, -0.4725, 0.2240]) tensor([0.5408, 0.1094, 0.1163, 0.2335]) -Greedy action tensor([ 1.6496, -0.7597, -0.2938, 0.3116]) tensor([0.6687, 0.0601, 0.0958, 0.1754]) -Greedy action tensor([ 1.1502, -0.1821, -0.4447, 0.4038]) tensor([0.5152, 0.1359, 0.1046, 0.2443]) -Greedy action tensor([ 1.4858, -0.3437, -0.6118, 0.5552]) tensor([0.5961, 0.0957, 0.0732, 0.2350]) -Greedy action tensor([ 0.5043, -0.2108, -0.0846, 0.2049]) tensor([0.3590, 0.1756, 0.1992, 0.2661]) -Greedy action tensor([ 1.5668, -0.5138, -0.5857, 0.3750]) tensor([0.6474, 0.0808, 0.0752, 0.1966]) -Greedy action tensor([ 1.3638, -0.7010, -0.2689, 0.5160]) tensor([0.5712, 0.0725, 0.1116, 0.2447]) -Greedy action tensor([ 1.2580, -0.3441, -0.4702, 0.1615]) tensor([0.5837, 0.1176, 0.1037, 0.1950]) -Greedy action tensor([ 1.0559, -0.0228, -0.1756, 0.3339]) tensor([0.4722, 0.1606, 0.1378, 0.2294]) -Greedy action tensor([ 1.6872, -0.5231, -0.4191, 0.4259]) tensor([0.6602, 0.0724, 0.0803, 0.1870]) -Greedy action tensor([ 1.5153, -0.1103, -0.4747, 0.1810]) tensor([0.6262, 0.1232, 0.0856, 0.1649]) -Greedy action tensor([ 1.0591, 0.0488, -0.3876, 0.1247]) tensor([0.5019, 0.1828, 0.1181, 0.1972]) -Greedy action tensor([ 1.6382, -0.0027, -1.0840, 0.2929]) tensor([0.6579, 0.1275, 0.0432, 0.1714]) -Greedy action tensor([ 1.5521, -0.6014, -0.1252, 0.2474]) tensor([0.6352, 0.0737, 0.1187, 0.1723]) -Greedy action tensor([ 1.5281, -0.3975, -0.4716, 0.5463]) tensor([0.6039, 0.0880, 0.0818, 0.2263]) -Greedy action tensor([ 1.5592, -0.4137, -0.2130, 0.2348]) tensor([0.6349, 0.0883, 0.1079, 0.1689]) -Greedy action tensor([ 0.4765, -0.2912, 0.0535, 0.0085]) tensor([0.3642, 0.1690, 0.2386, 0.2281]) -Greedy action tensor([ 1.9190, -0.6692, -0.3750, 0.9271]) tensor([0.6465, 0.0486, 0.0652, 0.2398]) -Greedy action tensor([ 1.2675, -0.3463, -0.3883, 0.2491]) tensor([0.5710, 0.1137, 0.1090, 0.2062]) -Greedy action tensor([ 1.4435, -0.4038, -0.3141, 0.2826]) tensor([0.6085, 0.0959, 0.1049, 0.1906]) -Greedy action tensor([ 0.1294, 0.0350, -0.1502, -0.0911]) tensor([0.2883, 0.2624, 0.2180, 0.2313]) -Greedy action tensor([ 1.5450, -0.1524, 0.1339, 0.0346]) tensor([0.6068, 0.1111, 0.1480, 0.1340]) -Greedy action tensor([ 1.3307, -0.2514, -0.0906, 0.2953]) tensor([0.5549, 0.1141, 0.1340, 0.1971]) -Greedy action tensor([ 1.4745, -0.2598, -0.1642, 0.6205]) tensor([0.5567, 0.0983, 0.1081, 0.2370]) -Greedy action tensor([ 1.7882, -1.1756, -0.4685, 0.6882]) tensor([0.6715, 0.0347, 0.0703, 0.2235]) -Greedy action tensor([ 1.7636, -0.7384, 0.0628, 0.5195]) tensor([0.6441, 0.0528, 0.1176, 0.1856]) -Greedy action tensor([ 1.6767, -0.3111, -0.5485, 0.1077]) tensor([0.6881, 0.0943, 0.0743, 0.1433]) -Greedy action tensor([ 1.8454, -0.8675, -0.3594, -0.0353]) tensor([0.7524, 0.0499, 0.0830, 0.1147]) -Greedy action tensor([ 1.5792, -0.8132, -0.3772, 0.9965]) tensor([0.5583, 0.0510, 0.0789, 0.3117]) -Greedy action tensor([ 0.1740, 0.0321, -0.2924, 0.2975]) tensor([0.2757, 0.2393, 0.1730, 0.3120]) -Greedy action tensor([ 1.5096, -0.6733, -0.1405, 0.3292]) tensor([0.6204, 0.0699, 0.1191, 0.1906]) -Greedy action tensor([ 1.5155, -0.5818, -0.3910, 0.0272]) tensor([0.6679, 0.0820, 0.0993, 0.1508]) -Greedy action tensor([ 1.0110, -0.0491, -0.3002, 0.6189]) tensor([0.4364, 0.1512, 0.1176, 0.2948]) -Greedy action tensor([ 1.5960, -0.1898, -0.4184, 0.2117]) tensor([0.6445, 0.1081, 0.0860, 0.1614]) -Greedy action tensor([ 1.6690, -0.4184, -0.2362, 0.1520]) tensor([0.6702, 0.0831, 0.0997, 0.1470]) -Greedy action tensor([ 0.7565, -0.2459, -0.5666, 0.2437]) tensor([0.4480, 0.1644, 0.1193, 0.2683]) -Greedy action tensor([ 1.8848, -0.6290, -0.2319, 0.5769]) tensor([0.6794, 0.0550, 0.0818, 0.1837]) -Greedy action tensor([ 1.2479, -0.4652, -0.0754, 0.5378]) tensor([0.5160, 0.0930, 0.1374, 0.2536]) -Greedy action tensor([ 1.4948, -0.5257, -0.6305, 0.2798]) tensor([0.6457, 0.0856, 0.0771, 0.1916]) -Greedy action tensor([ 1.4847, 0.0042, -0.5917, 0.3226]) tensor([0.6003, 0.1366, 0.0753, 0.1878]) -Greedy action tensor([ 1.5081, -0.4603, -0.2680, 0.4139]) tensor([0.6084, 0.0850, 0.1030, 0.2037]) -Greedy action tensor([-1.3099, -0.5889, 0.7660, 0.6767]) tensor([0.0546, 0.1123, 0.4352, 0.3980]) -Greedy action tensor([-1.4929, -0.5261, 0.4545, 0.0465]) tensor([0.0654, 0.1718, 0.4581, 0.3047]) -Greedy action tensor([-8.0761e-01, -5.5182e-04, 1.4070e-01, 5.1746e-01]) tensor([0.1043, 0.2338, 0.2693, 0.3925]) -Greedy action tensor([-1.9147, -0.4280, 0.6477, -0.1618]) tensor([0.0414, 0.1830, 0.5367, 0.2389]) -Greedy action tensor([-1.5154, -0.3567, 0.7519, 0.4751]) tensor([0.0473, 0.1506, 0.4562, 0.3459]) -Greedy action tensor([-1.9323, -0.4301, 0.6551, -0.1728]) tensor([0.0407, 0.1826, 0.5405, 0.2362]) -Greedy action tensor([-1.5122, -0.0663, 0.7712, 0.4545]) tensor([0.0450, 0.1912, 0.4418, 0.3219]) -Greedy action tensor([-1.2519, 0.6312, 0.3054, -0.1680]) tensor([0.0655, 0.4303, 0.3107, 0.1935]) -Greedy action tensor([-1.8396, -0.3765, 0.6162, -0.1196]) tensor([0.0443, 0.1915, 0.5167, 0.2475]) -Greedy action tensor([-1.9184, -0.4370, 0.6707, -0.1491]) tensor([0.0407, 0.1789, 0.5417, 0.2386]) -Greedy action tensor([-1.8538, -0.4521, 0.6193, -0.1443]) tensor([0.0445, 0.1810, 0.5283, 0.2462]) -Greedy action tensor([-1.8886, -0.4458, 0.6405, -0.1496]) tensor([0.0426, 0.1804, 0.5345, 0.2425]) -Greedy action tensor([-1.5397, -0.3390, 0.7431, 0.3355]) tensor([0.0484, 0.1609, 0.4748, 0.3159]) -Greedy action tensor([-1.8350, -0.4902, 0.6396, -0.0935]) tensor([0.0446, 0.1712, 0.5297, 0.2545]) -Greedy action tensor([-0.8440, 0.8591, 0.0326, 0.3032]) tensor([0.0830, 0.4559, 0.1995, 0.2615]) -Greedy action tensor([-1.7676, -0.2857, 0.6343, -0.0384]) tensor([0.0453, 0.1993, 0.5002, 0.2552]) -Greedy action tensor([-1.8661, -0.3916, 0.6544, -0.1196]) tensor([0.0425, 0.1856, 0.5283, 0.2436]) -Greedy action tensor([-1.9140, -0.4511, 0.6557, -0.1637]) tensor([0.0414, 0.1789, 0.5412, 0.2385]) -Greedy action tensor([ 0.0511, -0.5387, 0.4362, 1.0739]) tensor([0.1723, 0.0955, 0.2532, 0.4790]) -Greedy action tensor([-1.8036, -0.2781, 0.5756, -0.0783]) tensor([0.0454, 0.2089, 0.4905, 0.2551]) -Greedy action tensor([-1.4779, -0.3461, 0.5295, 0.3196]) tensor([0.0569, 0.1764, 0.4234, 0.3433]) -Greedy action tensor([-1.6092, -0.5403, 0.5612, 0.1055]) tensor([0.0549, 0.1598, 0.4807, 0.3047]) -Greedy action tensor([-1.8926, -0.3203, 0.6244, -0.1572]) tensor([0.0419, 0.2017, 0.5189, 0.2375]) -Greedy action tensor([-0.8509, -0.8239, 0.1057, -0.5579]) tensor([0.1675, 0.1721, 0.4359, 0.2245]) -Greedy action tensor([-1.9135, -0.4037, 0.6391, -0.1724]) tensor([0.0415, 0.1880, 0.5335, 0.2370]) -Greedy action tensor([-0.9602, -0.0412, 0.2366, -0.2855]) tensor([0.1139, 0.2855, 0.3770, 0.2236]) -Greedy action tensor([-1.8393, -0.4528, 0.6144, -0.1328]) tensor([0.0452, 0.1807, 0.5253, 0.2488]) -Greedy action tensor([-1.8794, -0.4167, 0.6331, -0.1344]) tensor([0.0428, 0.1847, 0.5276, 0.2449]) -Greedy action tensor([-1.8997, -0.3177, 0.6277, -0.1435]) tensor([0.0414, 0.2012, 0.5179, 0.2395]) -Greedy action tensor([-1.0983, -0.8136, 0.6160, 0.4830]) tensor([0.0785, 0.1043, 0.4358, 0.3815]) -Greedy action tensor([-1.8514, -0.3152, 0.6156, -0.1222]) tensor([0.0433, 0.2014, 0.5109, 0.2443]) -Greedy action tensor([-1.9127, -0.4226, 0.6532, -0.1594]) tensor([0.0413, 0.1832, 0.5372, 0.2383]) -Greedy action tensor([-1.9184, -0.4013, 0.6519, -0.1953]) tensor([0.0413, 0.1882, 0.5394, 0.2312]) -Greedy action tensor([-1.9287, -0.4211, 0.6554, -0.1705]) tensor([0.0407, 0.1838, 0.5393, 0.2361]) -Greedy action tensor([-1.9288, -0.4440, 0.6695, -0.1703]) tensor([0.0406, 0.1790, 0.5451, 0.2354]) -Greedy action tensor([-1.1919, 0.4358, 0.2797, -0.1082]) tensor([0.0746, 0.3799, 0.3250, 0.2205]) -Greedy action tensor([-1.6119, -0.4221, 0.6578, 0.0800]) tensor([0.0516, 0.1695, 0.4990, 0.2800]) -Greedy action tensor([-1.7647, -0.4655, 0.5737, -0.0960]) tensor([0.0492, 0.1803, 0.5097, 0.2609]) -Greedy action tensor([-1.8715, -0.4712, 0.6750, -0.0709]) tensor([0.0419, 0.1699, 0.5346, 0.2536]) -Greedy action tensor([-1.9088, -0.3907, 0.6357, -0.1661]) tensor([0.0416, 0.1900, 0.5304, 0.2379]) -Greedy action tensor([-1.7812, -0.4054, 0.6582, -0.0727]) tensor([0.0456, 0.1804, 0.5225, 0.2516]) -Greedy action tensor([-1.8880, -0.4564, 0.6857, -0.1070]) tensor([0.0413, 0.1727, 0.5411, 0.2449]) -Greedy action tensor([-1.9134, -0.3854, 0.6560, -0.1544]) tensor([0.0409, 0.1883, 0.5336, 0.2373]) -Greedy action tensor([-1.7594e+00, -3.7274e-01, 5.7752e-01, -1.5947e-03]) tensor([0.0473, 0.1892, 0.4893, 0.2742]) -Greedy action tensor([-0.3311, 0.3558, 0.4267, 1.1267]) tensor([0.1062, 0.2110, 0.2266, 0.4562]) -Greedy action tensor([-0.8044, 0.1353, 0.2196, -0.2306]) tensor([0.1232, 0.3152, 0.3429, 0.2186]) -Greedy action tensor([-1.7906, -0.4528, 0.5799, -0.0820]) tensor([0.0475, 0.1812, 0.5088, 0.2625]) -Greedy action tensor([-1.9036, -0.4265, 0.6446, -0.1607]) tensor([0.0419, 0.1834, 0.5354, 0.2393]) -Greedy action tensor([-1.8682, -0.4051, 0.6095, -0.1475]) tensor([0.0438, 0.1893, 0.5220, 0.2449]) -Greedy action tensor([-1.8396, -0.3737, 0.6386, -0.1168]) tensor([0.0438, 0.1895, 0.5216, 0.2451]) -Greedy action tensor([-1.7192, -0.4859, 0.5519, -0.0082]) tensor([0.0509, 0.1746, 0.4930, 0.2816]) -Greedy action tensor([-1.7869, -0.4927, 0.5817, -0.1058]) tensor([0.0483, 0.1762, 0.5160, 0.2595]) -Greedy action tensor([-1.8587, -0.4476, 0.6244, -0.1203]) tensor([0.0439, 0.1801, 0.5261, 0.2498]) -Greedy action tensor([-1.9414, -0.4513, 0.6652, -0.1781]) tensor([0.0403, 0.1788, 0.5460, 0.2349]) -Greedy action tensor([-1.9253, -0.4358, 0.6533, -0.1723]) tensor([0.0410, 0.1819, 0.5404, 0.2367]) -Greedy action tensor([-1.9400, -0.4505, 0.6699, -0.1771]) tensor([0.0402, 0.1784, 0.5469, 0.2345]) -Greedy action tensor([-1.3691, -0.3397, 0.7338, 0.6813]) tensor([0.0506, 0.1417, 0.4145, 0.3932]) -Greedy action tensor([-1.5974, -0.5486, 0.4734, 0.0161]) tensor([0.0595, 0.1698, 0.4719, 0.2987]) -Greedy action tensor([-1.2962, -0.5064, 0.3503, -0.0153]) tensor([0.0834, 0.1837, 0.4327, 0.3002]) -Greedy action tensor([-1.6758, -0.4509, 0.5273, -0.0414]) tensor([0.0538, 0.1832, 0.4872, 0.2759]) -Greedy action tensor([-1.6597, 0.3451, 0.4112, 0.0818]) tensor([0.0453, 0.3365, 0.3595, 0.2586]) -Greedy action tensor([-1.1192, -0.5721, 0.2390, 0.2768]) tensor([0.0938, 0.1622, 0.3650, 0.3790]) -Greedy action tensor([-1.5233, -0.4492, 0.5218, 0.1405]) tensor([0.0590, 0.1728, 0.4564, 0.3117]) -Greedy action tensor([-1.4440, -0.5753, 0.4279, 0.2333]) tensor([0.0656, 0.1565, 0.4267, 0.3512]) -Greedy action tensor([-1.4267, -0.7036, 0.3169, 0.1018]) tensor([0.0747, 0.1539, 0.4270, 0.3444]) -Greedy action tensor([-1.8153, -0.3639, 0.5877, -0.1139]) tensor([0.0459, 0.1958, 0.5070, 0.2514]) -Greedy action tensor([-1.0088, -0.6479, 0.1910, 0.7229]) tensor([0.0877, 0.1258, 0.2911, 0.4955]) -Greedy action tensor([-1.3778, -0.0705, 0.5321, 0.2144]) tensor([0.0611, 0.2259, 0.4127, 0.3003]) -Greedy action tensor([-1.8929, -0.3897, 0.6416, -0.1479]) tensor([0.0420, 0.1887, 0.5291, 0.2403]) -Greedy action tensor([-1.9258, -0.4123, 0.6536, -0.1669]) tensor([0.0408, 0.1851, 0.5375, 0.2366]) -Greedy action tensor([-1.9039, -0.4711, 0.6506, -0.1587]) tensor([0.0421, 0.1762, 0.5409, 0.2408]) -Greedy action tensor([-1.9085, -0.3459, 0.6379, -0.1573]) tensor([0.0412, 0.1964, 0.5253, 0.2371]) -Greedy action tensor([-0.8850, -0.3807, 0.4499, 0.9800]) tensor([0.0775, 0.1282, 0.2943, 0.5000]) -Greedy action tensor([-1.9150, -0.4460, 0.6525, -0.1633]) tensor([0.0414, 0.1800, 0.5399, 0.2388]) -Greedy action tensor([-1.6777, -0.3274, 0.5470, 0.0152]) tensor([0.0512, 0.1974, 0.4733, 0.2781]) -Greedy action tensor([-1.9071, -0.4523, 0.6529, -0.1613]) tensor([0.0418, 0.1789, 0.5401, 0.2393]) -Greedy action tensor([-1.8081, -0.5004, 0.6087, -0.0894]) tensor([0.0465, 0.1721, 0.5218, 0.2596]) -Greedy action tensor([-1.8182, -0.4473, 0.6151, -0.1705]) tensor([0.0464, 0.1829, 0.5293, 0.2413]) -Greedy action tensor([-1.7932, -0.4064, 0.5970, -0.0818]) tensor([0.0466, 0.1865, 0.5088, 0.2581]) -Greedy action tensor([-1.8215, -0.3502, 0.6204, -0.0631]) tensor([0.0441, 0.1922, 0.5074, 0.2562]) -Greedy action tensor([-1.2903, -0.5376, 0.3145, 0.2122]) tensor([0.0794, 0.1686, 0.3952, 0.3568]) -Greedy action tensor([ 0.2266, -0.0782, -0.1413, -0.1906]) tensor([0.3238, 0.2387, 0.2241, 0.2134]) -Greedy action tensor([ 0.7410, -0.3509, -0.0660, -0.1724]) tensor([0.4581, 0.1537, 0.2044, 0.1838]) -Greedy action tensor([ 0.3962, -0.0834, 0.0163, -0.2356]) tensor([0.3528, 0.2184, 0.2413, 0.1876]) -Greedy action tensor([ 0.6597, -0.7470, 0.0053, -0.3825]) tensor([0.4723, 0.1157, 0.2455, 0.1666]) -Greedy action tensor([ 0.5060, -0.5699, -0.1926, -0.2366]) tensor([0.4321, 0.1473, 0.2149, 0.2056]) -Greedy action tensor([ 0.5080, -0.1377, 0.1174, -0.3819]) tensor([0.3829, 0.2008, 0.2591, 0.1573]) -Greedy action tensor([ 0.6755, -0.7304, -0.0572, -0.2414]) tensor([0.4705, 0.1153, 0.2261, 0.1881]) -Greedy action tensor([ 0.5230, -0.1513, 0.0855, -0.8682]) tensor([0.4160, 0.2119, 0.2686, 0.1035]) -Greedy action tensor([ 0.4022, -0.2757, -0.1003, -0.3817]) tensor([0.3892, 0.1976, 0.2355, 0.1777]) -Greedy action tensor([ 0.5684, -0.2050, 0.0197, -0.4292]) tensor([0.4153, 0.1916, 0.2399, 0.1531]) -Greedy action tensor([ 0.6628, -0.2334, -0.1155, -0.5776]) tensor([0.4637, 0.1892, 0.2129, 0.1341]) -Greedy action tensor([ 0.5148, 0.1231, -0.0746, -0.0050]) tensor([0.3540, 0.2392, 0.1963, 0.2105]) -Greedy action tensor([ 0.6097, 0.0191, -0.0814, 0.0754]) tensor([0.3786, 0.2098, 0.1897, 0.2219]) -Greedy action tensor([ 0.6571, -0.5516, -0.0576, -0.2228]) tensor([0.4540, 0.1356, 0.2221, 0.1883]) -Greedy action tensor([ 0.7490, -0.4120, 0.0063, -0.3296]) tensor([0.4697, 0.1471, 0.2235, 0.1597]) -Greedy action tensor([ 0.5516, -0.2794, 0.0475, -0.3834]) tensor([0.4111, 0.1791, 0.2484, 0.1614]) -Greedy action tensor([ 1.2432, -0.9522, 0.2176, -0.6783]) tensor([0.6187, 0.0689, 0.2218, 0.0906]) -Greedy action tensor([ 0.6146, -0.4244, 0.0921, -0.4268]) tensor([0.4348, 0.1538, 0.2579, 0.1535]) -Greedy action tensor([ 0.7220, -0.3239, -0.0332, -0.3588]) tensor([0.4628, 0.1626, 0.2175, 0.1571]) -Greedy action tensor([ 0.2499, 0.0486, -0.0506, -0.1897]) tensor([0.3123, 0.2553, 0.2312, 0.2012]) -Greedy action tensor([ 0.9418, -0.2738, 0.0144, -0.5250]) tensor([0.5201, 0.1542, 0.2057, 0.1200]) -Greedy action tensor([ 0.6221, -0.4464, -0.0289, -0.3672]) tensor([0.4471, 0.1536, 0.2331, 0.1662]) -Greedy action tensor([ 0.5618, -0.4502, -0.1300, -0.2573]) tensor([0.4338, 0.1577, 0.2172, 0.1913]) -Greedy action tensor([ 0.7501, -0.3186, -0.1165, -0.2226]) tensor([0.4669, 0.1604, 0.1963, 0.1765]) -Greedy action tensor([ 0.3914, -0.2325, -0.0707, -0.0906]) tensor([0.3593, 0.1925, 0.2263, 0.2219]) -Greedy action tensor([ 0.5997, -0.3368, 0.0204, -0.3647]) tensor([0.4285, 0.1680, 0.2401, 0.1634]) -Greedy action tensor([ 0.5067, -0.2275, -0.0654, -0.3638]) tensor([0.4060, 0.1948, 0.2291, 0.1700]) -Greedy action tensor([ 0.5755, -0.4865, -0.1407, -0.1851]) tensor([0.4344, 0.1502, 0.2123, 0.2031]) -Greedy action tensor([ 0.6356, -0.2027, 0.1875, -0.1077]) tensor([0.3927, 0.1698, 0.2508, 0.1867]) -Greedy action tensor([ 0.9551, -0.8398, -0.0879, -0.4254]) tensor([0.5650, 0.0939, 0.1991, 0.1421]) -Greedy action tensor([ 0.4688, -0.2531, -0.0870, -0.1329]) tensor([0.3835, 0.1863, 0.2200, 0.2101]) -Greedy action tensor([ 0.9369, -0.6106, -0.0530, -0.3006]) tensor([0.5335, 0.1135, 0.1982, 0.1548]) -Greedy action tensor([ 0.7303, 0.1974, -0.0181, -0.5677]) tensor([0.4286, 0.2516, 0.2028, 0.1170]) -Greedy action tensor([ 0.7458, -0.8327, -0.0727, -0.3733]) tensor([0.5066, 0.1045, 0.2235, 0.1654]) -Greedy action tensor([ 0.4591, -0.3529, -0.1519, -0.1184]) tensor([0.3924, 0.1742, 0.2130, 0.2203]) -Greedy action tensor([ 1.0755, -0.5049, -0.0654, -0.2805]) tensor([0.5608, 0.1155, 0.1792, 0.1445]) -Greedy action tensor([ 0.8854, -0.9248, -0.0447, -0.3752]) tensor([0.5430, 0.0888, 0.2142, 0.1539]) -Greedy action tensor([ 0.6002, -0.5174, -0.0495, -0.1508]) tensor([0.4308, 0.1409, 0.2250, 0.2033]) -Greedy action tensor([ 0.9613, -0.4554, -0.0120, -0.6021]) tensor([0.5465, 0.1325, 0.2065, 0.1145]) -Greedy action tensor([ 0.6680, 0.2527, 0.0312, -0.0745]) tensor([0.3752, 0.2477, 0.1985, 0.1786]) -Greedy action tensor([ 0.7928, -0.6219, -0.0150, -0.3756]) tensor([0.5001, 0.1215, 0.2230, 0.1554]) -Greedy action tensor([ 0.1965, -0.1891, -0.0190, -0.2550]) tensor([0.3202, 0.2178, 0.2581, 0.2039]) -Greedy action tensor([ 0.3425, -0.1143, -0.1206, -0.1506]) tensor([0.3480, 0.2204, 0.2190, 0.2126]) -Greedy action tensor([ 0.4597, 0.0116, -0.0032, -0.0751]) tensor([0.3504, 0.2238, 0.2205, 0.2052]) -Greedy action tensor([ 0.8540, -0.4596, -0.1580, -0.4021]) tensor([0.5216, 0.1402, 0.1896, 0.1485]) -Greedy action tensor([ 0.6842, -0.4631, 0.2124, -0.6253]) tensor([0.4522, 0.1436, 0.2821, 0.1221]) -Greedy action tensor([ 0.4828, -0.0806, 0.0836, -0.3486]) tensor([0.3737, 0.2128, 0.2507, 0.1628]) -Greedy action tensor([ 0.9301, -0.7279, -0.0230, -0.5220]) tensor([0.5525, 0.1053, 0.2130, 0.1293]) -Greedy action tensor([ 0.7695, -0.4467, -0.0446, -0.6834]) tensor([0.5068, 0.1502, 0.2245, 0.1185]) -Greedy action tensor([ 1.0955, -0.6800, -0.0788, -0.3477]) tensor([0.5832, 0.0988, 0.1802, 0.1377]) -Greedy action tensor([ 0.4781, -0.1774, -0.1689, -0.1224]) tensor([0.3859, 0.2004, 0.2021, 0.2117]) -Greedy action tensor([ 0.5602, -0.4649, 0.0784, -0.0662]) tensor([0.3983, 0.1429, 0.2460, 0.2129]) -Greedy action tensor([ 0.9011, -0.5668, -0.1280, -0.3882]) tensor([0.5367, 0.1237, 0.1918, 0.1478]) -Greedy action tensor([ 0.3525, -0.2216, 0.1484, -0.2759]) tensor([0.3434, 0.1934, 0.2800, 0.1832]) -Greedy action tensor([ 1.0996, -0.5981, 0.0486, -0.7662]) tensor([0.5926, 0.1085, 0.2072, 0.0917]) -Greedy action tensor([ 0.4722, -0.1084, 0.0070, -0.2783]) tensor([0.3760, 0.2104, 0.2361, 0.1775]) -Greedy action tensor([ 1.1243, -0.5079, 0.0328, -0.3487]) tensor([0.5680, 0.1110, 0.1907, 0.1302]) -Greedy action tensor([ 0.7250, -0.2320, 0.1424, -0.0250]) tensor([0.4141, 0.1590, 0.2313, 0.1956]) -Greedy action tensor([ 1.0203, -0.7968, 0.1395, -0.5813]) tensor([0.5623, 0.0914, 0.2330, 0.1133]) -Greedy action tensor([ 0.7656, -0.5363, -0.0104, -0.4044]) tensor([0.4896, 0.1332, 0.2253, 0.1519]) -Greedy action tensor([ 0.7447, -0.3276, -0.0098, -0.4502]) tensor([0.4728, 0.1618, 0.2223, 0.1431]) -Greedy action tensor([ 1.0632, -0.7289, 0.0692, -0.4542]) tensor([0.5695, 0.0949, 0.2108, 0.1249]) -Greedy action tensor([ 0.2823, 0.0425, -0.0490, -0.2100]) tensor([0.3209, 0.2525, 0.2304, 0.1962]) -Greedy action tensor([ 7.6426e-01, -6.4081e-01, 5.5295e-04, -4.0230e-01]) tensor([0.4944, 0.1213, 0.2304, 0.1540]) -Greedy action tensor([ 0.7592, -0.2325, -0.0263, -0.3897]) tensor([0.4665, 0.1730, 0.2127, 0.1479]) -Greedy action tensor([ 0.6684, -0.6355, 0.2469, -0.6893]) tensor([0.4577, 0.1243, 0.3003, 0.1178]) -Greedy action tensor([ 0.6356, -0.5405, 0.0247, -0.3479]) tensor([0.4494, 0.1386, 0.2440, 0.1681]) -Greedy action tensor([ 0.6706, -0.1010, 0.0241, -0.2625]) tensor([0.4203, 0.1943, 0.2202, 0.1653]) -Greedy action tensor([ 0.6935, -0.4853, -0.0070, -0.3964]) tensor([0.4672, 0.1437, 0.2319, 0.1571]) -Greedy action tensor([ 0.3763, 0.0987, -0.1264, 0.0517]) tensor([0.3241, 0.2455, 0.1961, 0.2343]) -Greedy action tensor([ 0.6367, -0.2215, -0.1563, -0.0567]) tensor([0.4208, 0.1784, 0.1904, 0.2104]) -Greedy action tensor([ 0.5288, -0.3737, -0.1511, -0.2003]) tensor([0.4176, 0.1694, 0.2116, 0.2014]) -Greedy action tensor([ 1.2128, -0.7337, -0.0858, -0.5105]) tensor([0.6273, 0.0896, 0.1712, 0.1120]) -Greedy action tensor([ 0.6618, -0.7031, -0.0602, -0.3600]) tensor([0.4759, 0.1216, 0.2312, 0.1713]) -Greedy action tensor([ 0.5249, -0.3398, -0.0699, -0.2161]) tensor([0.4082, 0.1719, 0.2252, 0.1946]) -Greedy action tensor([ 0.4855, -0.0847, -0.1551, -0.1340]) tensor([0.3801, 0.2149, 0.2003, 0.2046]) -Greedy action tensor([ 0.8900, -0.8488, 0.1130, -0.4213]) tensor([0.5249, 0.0922, 0.2414, 0.1415]) -Greedy action tensor([ 0.4089, -0.1382, 0.0090, -0.1178]) tensor([0.3522, 0.2038, 0.2361, 0.2080]) -Greedy action tensor([ 0.4772, -0.1994, -0.0389, -0.4438]) tensor([0.3995, 0.2031, 0.2384, 0.1590]) -Greedy action tensor([ 1.0421, -0.6258, -0.0333, -0.6147]) tensor([0.5812, 0.1096, 0.1983, 0.1109]) -Greedy action tensor([ 0.6919, -0.4785, -0.0546, -0.2436]) tensor([0.4594, 0.1425, 0.2178, 0.1803]) -Greedy action tensor([ 0.1536, -1.6957, -0.2053, -0.2437]) tensor([0.3956, 0.0622, 0.2763, 0.2659]) -Greedy action tensor([ 1.8448, -0.6543, 0.0555, 0.8251]) tensor([0.6211, 0.0510, 0.1038, 0.2240]) -Greedy action tensor([ 0.3329, 0.6330, 1.6024, -0.3394]) tensor([0.1558, 0.2103, 0.5544, 0.0795]) -Greedy action tensor([ 0.8183, -0.0330, -0.8053, -0.1466]) tensor([0.4987, 0.2129, 0.0983, 0.1900]) -Greedy action tensor([-1.3229, -2.3018, 0.3452, 0.1936]) tensor([0.0890, 0.0334, 0.4720, 0.4056]) -Greedy action tensor([-1.2198, -2.3611, -0.0296, -0.1936]) tensor([0.1352, 0.0432, 0.4444, 0.3772]) -Greedy action tensor([-0.0267, -1.0844, 0.1722, 1.2653]) tensor([0.1611, 0.0559, 0.1966, 0.5864]) -Greedy action tensor([-0.1016, -0.3312, 1.7331, -0.5110]) tensor([0.1146, 0.0911, 0.7181, 0.0761]) -Greedy action tensor([1.1049, 0.1287, 0.6231, 0.7303]) tensor([0.3728, 0.1405, 0.2303, 0.2564]) -Greedy action tensor([ 0.8924, -1.5429, -0.2203, 1.5778]) tensor([0.2941, 0.0257, 0.0966, 0.5836]) -Greedy action tensor([ 0.6647, -0.1748, -0.0112, 0.2437]) tensor([0.3851, 0.1663, 0.1959, 0.2527]) -Greedy action tensor([ 0.6280, -0.6213, -0.2100, 0.7235]) tensor([0.3547, 0.1017, 0.1534, 0.3902]) -Greedy action tensor([-0.7341, -0.4980, 0.7692, -1.6402]) tensor([0.1395, 0.1767, 0.6274, 0.0564]) -Greedy action tensor([ 0.5838, -0.3379, 0.2531, 0.5501]) tensor([0.3243, 0.1290, 0.2330, 0.3136]) -Greedy action tensor([ 0.8455, -1.3018, 0.4116, 1.0005]) tensor([0.3410, 0.0398, 0.2210, 0.3982]) -Greedy action tensor([ 0.3057, -1.0979, -0.7372, 0.3184]) tensor([0.3830, 0.0941, 0.1350, 0.3879]) -Greedy action tensor([ 0.4210, 0.2897, 0.4688, -0.2669]) tensor([0.2917, 0.2558, 0.3060, 0.1466]) -Greedy action tensor([-0.1294, -1.7071, -0.3276, -0.4133]) tensor([0.3598, 0.0743, 0.2951, 0.2709]) -Greedy action tensor([ 0.1656, 0.6076, -0.9846, 0.4669]) tensor([0.2367, 0.3683, 0.0749, 0.3200]) -Greedy action tensor([-0.1172, -2.4323, -0.7202, 0.2388]) tensor([0.3254, 0.0321, 0.1780, 0.4645]) -Greedy action tensor([ 0.4801, -0.8463, -1.1447, 0.9778]) tensor([0.3218, 0.0854, 0.0634, 0.5294]) -Greedy action tensor([ 1.1983, -0.9729, 0.5349, 0.7929]) tensor([0.4356, 0.0497, 0.2244, 0.2904]) -Greedy action tensor([-0.5856, -0.3945, 0.3664, 0.2665]) tensor([0.1399, 0.1694, 0.3626, 0.3281]) -Greedy action tensor([-0.4916, -0.0574, 2.0535, -0.1832]) tensor([0.0601, 0.0927, 0.7655, 0.0818]) -Greedy action tensor([1.2494, 0.5257, 0.2908, 0.2768]) tensor([0.4451, 0.2159, 0.1707, 0.1683]) -Greedy action tensor([ 0.6239, 0.6322, 0.0891, -1.0372]) tensor([0.3592, 0.3622, 0.2104, 0.0682]) -Greedy action tensor([ 0.0807, -0.2274, -0.2546, -0.9529]) tensor([0.3564, 0.2619, 0.2549, 0.1268]) -Greedy action tensor([1.2934, 0.4223, 1.5471, 0.1839]) tensor([0.3293, 0.1378, 0.4244, 0.1086]) -Greedy action tensor([ 0.6916, -1.3017, 0.0557, 0.7636]) tensor([0.3649, 0.0497, 0.1932, 0.3922]) -Greedy action tensor([ 0.0996, -0.9155, 1.1960, 0.2579]) tensor([0.1809, 0.0656, 0.5416, 0.2120]) -Greedy action tensor([ 0.3609, -1.8509, 1.0818, -0.4176]) tensor([0.2759, 0.0302, 0.5673, 0.1267]) -Greedy action tensor([-0.8896, -0.6893, 0.3214, -0.6111]) tensor([0.1449, 0.1771, 0.4865, 0.1915]) -Greedy action tensor([-0.4541, 0.1907, -0.3216, -0.7254]) tensor([0.2079, 0.3962, 0.2374, 0.1585]) -Greedy action tensor([ 1.1518, -0.6954, 1.3835, 0.4829]) tensor([0.3412, 0.0538, 0.4302, 0.1748]) -Greedy action tensor([1.7295, 0.0073, 1.2537, 0.3991]) tensor([0.4844, 0.0866, 0.3010, 0.1281]) -Greedy action tensor([ 0.0469, -1.0949, 0.3735, -0.3090]) tensor([0.2936, 0.0937, 0.4070, 0.2057]) -Greedy action tensor([ 0.0556, -1.2449, -0.8330, 0.7793]) tensor([0.2670, 0.0727, 0.1098, 0.5505]) -Greedy action tensor([ 0.8469, 0.0729, 0.6770, -0.0642]) tensor([0.3694, 0.1704, 0.3117, 0.1485]) -Greedy action tensor([-0.4155, -2.5321, -0.2934, 0.3978]) tensor([0.2220, 0.0267, 0.2508, 0.5005]) -Greedy action tensor([ 0.9128, -0.4555, 0.5772, 0.6427]) tensor([0.3659, 0.0931, 0.2616, 0.2793]) -Greedy action tensor([ 0.4599, -1.0432, 0.8157, -0.0888]) tensor([0.3099, 0.0689, 0.4422, 0.1790]) -Greedy action tensor([-1.4190, -1.6208, -1.0718, 0.3956]) tensor([0.1067, 0.0872, 0.1510, 0.6551]) -Greedy action tensor([ 0.7236, -0.0868, 0.0239, -0.0246]) tensor([0.4141, 0.1842, 0.2057, 0.1960]) -Greedy action tensor([-0.1692, 0.3011, -0.7200, 0.2337]) tensor([0.2140, 0.3425, 0.1234, 0.3202]) -Greedy action tensor([-0.5422, -2.4543, -0.3731, 0.4566]) tensor([0.1981, 0.0293, 0.2346, 0.5379]) -Greedy action tensor([0.7249, 0.4074, 1.0564, 0.4660]) tensor([0.2569, 0.1870, 0.3578, 0.1983]) -Greedy action tensor([ 0.2040, 0.4826, 0.9596, -0.7102]) tensor([0.2061, 0.2724, 0.4389, 0.0826]) -Greedy action tensor([-0.1655, -0.1543, -0.7391, -0.0726]) tensor([0.2723, 0.2754, 0.1535, 0.2988]) -Greedy action tensor([ 0.2309, -0.7591, 0.6716, -1.0014]) tensor([0.3108, 0.1155, 0.4830, 0.0907]) -Greedy action tensor([ 0.1289, -0.7712, -0.1911, 0.2289]) tensor([0.3088, 0.1256, 0.2243, 0.3413]) -Greedy action tensor([ 0.5332, -1.4445, 0.2311, 0.7831]) tensor([0.3163, 0.0438, 0.2338, 0.4061]) -Greedy action tensor([-0.3459, -0.8548, 1.4557, 0.9718]) tensor([0.0878, 0.0528, 0.5318, 0.3277]) -Greedy action tensor([-0.8949, 0.2655, 0.9956, -0.0519]) tensor([0.0761, 0.2429, 0.5041, 0.1768]) -Greedy action tensor([-0.2484, 0.2111, 0.7091, -0.3933]) tensor([0.1652, 0.2615, 0.4304, 0.1429]) -Greedy action tensor([ 0.3336, -0.4023, 0.0059, -0.5115]) tensor([0.3804, 0.1822, 0.2741, 0.1634]) -Greedy action tensor([ 2.4194, -0.7223, -0.1020, 0.8080]) tensor([0.7558, 0.0327, 0.0607, 0.1509]) -Greedy action tensor([-0.7747, -1.0427, 1.2048, -0.0132]) tensor([0.0897, 0.0686, 0.6495, 0.1921]) -Greedy action tensor([ 1.0099, -0.3845, 0.7361, 1.5759]) tensor([0.2653, 0.0658, 0.2017, 0.4672]) -Greedy action tensor([-0.3248, -1.3121, -0.8041, -0.7032]) tensor([0.3736, 0.1392, 0.2313, 0.2559]) -Greedy action tensor([1.2597, 0.9696, 0.3075, 0.2726]) tensor([0.3989, 0.2985, 0.1539, 0.1487]) -Greedy action tensor([ 1.0846, 0.3104, -1.1862, 1.0836]) tensor([0.3901, 0.1799, 0.0403, 0.3897]) -Greedy action tensor([ 0.9132, -0.0102, 0.5719, 0.8288]) tensor([0.3304, 0.1312, 0.2348, 0.3036]) -Greedy action tensor([-0.5476, -0.9404, -0.8189, 0.7266]) tensor([0.1663, 0.1123, 0.1268, 0.5946]) -Greedy action tensor([ 1.0499, -1.0644, 1.1294, 0.5695]) tensor([0.3544, 0.0428, 0.3837, 0.2192]) -Greedy action tensor([ 0.1810, -0.7430, 2.1257, -0.4951]) tensor([0.1124, 0.0446, 0.7858, 0.0572]) -Greedy action tensor([ 0.9779, -0.4893, -0.2008, 0.5696]) tensor([0.4539, 0.1047, 0.1397, 0.3018]) -Greedy action tensor([1.6681, 0.2410, 0.6419, 0.6737]) tensor([0.5080, 0.1219, 0.1821, 0.1879]) -Greedy action tensor([ 0.7679, -0.6254, -0.4008, -0.2426]) tensor([0.5200, 0.1291, 0.1616, 0.1893]) -Greedy action tensor([-0.6208, -0.0606, -1.4019, -0.4961]) tensor([0.2303, 0.4033, 0.1055, 0.2609]) -Greedy action tensor([ 0.7415, -0.7255, -0.7438, 0.9522]) tensor([0.3715, 0.0857, 0.0841, 0.4587]) -Greedy action tensor([-0.9379, -1.0674, -0.0970, -0.2521]) tensor([0.1617, 0.1421, 0.3750, 0.3211]) -Greedy action tensor([1.2492, 0.0608, 0.8007, 1.1321]) tensor([0.3530, 0.1076, 0.2254, 0.3140]) -Greedy action tensor([-0.5216, -1.0695, 0.6991, 1.4451]) tensor([0.0825, 0.0477, 0.2798, 0.5900]) -Greedy action tensor([ 0.4805, -1.3885, -0.1874, 0.0314]) tensor([0.4338, 0.0669, 0.2224, 0.2768]) -Greedy action tensor([ 0.7602, 0.1443, 1.9889, -0.6530]) tensor([0.1923, 0.1039, 0.6570, 0.0468]) -Greedy action tensor([ 0.2379, -1.3666, -0.2733, 0.2015]) tensor([0.3617, 0.0727, 0.2169, 0.3487]) -Greedy action tensor([ 0.2059, -0.4900, 0.7856, -0.5732]) tensor([0.2672, 0.1332, 0.4770, 0.1226]) -Greedy action tensor([ 0.6550, -0.3319, 0.7180, 0.7278]) tensor([0.2846, 0.1061, 0.3031, 0.3061]) -Greedy action tensor([1.0842, 0.0722, 0.7517, 1.2605]) tensor([0.3055, 0.1110, 0.2191, 0.3644]) -Greedy action tensor([ 1.5587, 0.4151, -0.0750, 1.0046]) tensor([0.4788, 0.1526, 0.0935, 0.2751]) -Greedy action tensor([ 1.0508, -0.2640, 2.5270, 0.4585]) tensor([0.1613, 0.0433, 0.7061, 0.0892]) -Greedy action tensor([ 0.0994, -0.2590, -0.3066, 1.8738]) tensor([0.1210, 0.0846, 0.0807, 0.7137]) -Greedy action tensor([ 0.9360, -0.0137, -0.1145, 0.1825]) tensor([0.4530, 0.1753, 0.1585, 0.2132]) -Greedy action tensor([ 1.4499, -0.4802, -0.1004, 0.3603]) tensor([0.5904, 0.0857, 0.1253, 0.1986]) -Greedy action tensor([ 2.5438, -1.4346, 0.1388, 0.1138]) tensor([0.8354, 0.0156, 0.0754, 0.0735]) -Greedy action tensor([ 1.4305, -0.0767, -0.1009, 0.2662]) tensor([0.5715, 0.1266, 0.1236, 0.1784]) -Greedy action tensor([ 1.3555, -0.3052, -0.5101, 0.4094]) tensor([0.5770, 0.1096, 0.0893, 0.2240]) -Greedy action tensor([ 1.7732, 0.0442, -0.0200, 0.0529]) tensor([0.6566, 0.1165, 0.1093, 0.1175]) -Greedy action tensor([ 1.5446, -0.6130, -0.6402, 0.1084]) tensor([0.6822, 0.0789, 0.0767, 0.1622]) -Greedy action tensor([ 1.3358, 0.1619, -0.0996, 0.4188]) tensor([0.5136, 0.1588, 0.1223, 0.2053]) -Greedy action tensor([ 0.6713, -0.2880, -0.1028, -0.0268]) tensor([0.4270, 0.1636, 0.1969, 0.2125]) -Greedy action tensor([ 1.5690, -0.7116, -0.2451, 0.1784]) tensor([0.6604, 0.0675, 0.1076, 0.1644]) -Greedy action tensor([ 1.1740, -0.3995, -0.0557, 0.3679]) tensor([0.5138, 0.1065, 0.1502, 0.2295]) -Greedy action tensor([ 1.3287, -0.8037, -0.0086, 0.3024]) tensor([0.5749, 0.0682, 0.1509, 0.2060]) -Greedy action tensor([ 1.5607, -0.3912, -0.4215, 0.1472]) tensor([0.6566, 0.0932, 0.0905, 0.1597]) -Greedy action tensor([ 1.4290, -0.5618, -0.1139, 0.5185]) tensor([0.5706, 0.0779, 0.1220, 0.2295]) -Greedy action tensor([ 1.3194, -0.1129, -0.5581, 0.5276]) tensor([0.5421, 0.1294, 0.0829, 0.2456]) -Greedy action tensor([ 0.5464, -0.1787, -0.2771, -0.1522]) tensor([0.4132, 0.2001, 0.1813, 0.2054]) -Greedy action tensor([ 1.1758, -0.1376, -0.7205, 0.2250]) tensor([0.5539, 0.1489, 0.0831, 0.2140]) -Greedy action tensor([ 1.4512, -0.6518, -0.1237, 0.4652]) tensor([0.5875, 0.0717, 0.1216, 0.2192]) -Greedy action tensor([ 1.2729, 0.1948, 0.1323, -0.0199]) tensor([0.5170, 0.1759, 0.1652, 0.1419]) -Greedy action tensor([ 1.4387, -0.3301, -0.5836, 0.1975]) tensor([0.6282, 0.1071, 0.0831, 0.1816]) -Greedy action tensor([ 1.8096, -0.2954, -0.8090, 0.9790]) tensor([0.6133, 0.0747, 0.0447, 0.2673]) -Greedy action tensor([ 1.1141, -0.3899, -0.3301, 0.1187]) tensor([0.5471, 0.1216, 0.1291, 0.2022]) -Greedy action tensor([ 1.1077, -0.2217, -0.1809, 0.4983]) tensor([0.4799, 0.1270, 0.1323, 0.2609]) -Greedy action tensor([ 1.7313, -0.4336, -0.4871, 0.6156]) tensor([0.6447, 0.0740, 0.0701, 0.2112]) -Greedy action tensor([ 1.4401, -0.2815, -0.5750, -0.0275]) tensor([0.6483, 0.1159, 0.0864, 0.1494]) -Greedy action tensor([ 1.1875, -0.5179, -0.1020, 0.3608]) tensor([0.5278, 0.0959, 0.1454, 0.2309]) -Greedy action tensor([ 1.2454, 0.1925, -0.0782, 0.6165]) tensor([0.4655, 0.1624, 0.1239, 0.2482]) -Greedy action tensor([ 1.3972, 0.1270, -0.3987, 0.3178]) tensor([0.5597, 0.1572, 0.0929, 0.1902]) -Greedy action tensor([ 2.0030, -1.0157, -0.1500, 0.9083]) tensor([0.6668, 0.0326, 0.0774, 0.2232]) -Greedy action tensor([ 1.5799, -0.9335, -0.2994, 0.7748]) tensor([0.5950, 0.0482, 0.0909, 0.2660]) -Greedy action tensor([ 1.7196, 0.0588, -0.4620, 0.6551]) tensor([0.6069, 0.1153, 0.0685, 0.2093]) -Greedy action tensor([ 2.2157, -0.7270, -0.1123, 0.1655]) tensor([0.7819, 0.0412, 0.0762, 0.1006]) -Greedy action tensor([ 1.8601, -0.7556, -0.2925, 0.4782]) tensor([0.6942, 0.0508, 0.0807, 0.1743]) -Greedy action tensor([ 1.9633, -0.8156, -0.2532, 0.5202]) tensor([0.7106, 0.0441, 0.0774, 0.1678]) -Greedy action tensor([ 0.7583, -0.4989, -0.1861, 0.3249]) tensor([0.4307, 0.1225, 0.1675, 0.2792]) -Greedy action tensor([ 1.8393, 0.1142, -0.1600, 0.0080]) tensor([0.6785, 0.1209, 0.0919, 0.1087]) -Greedy action tensor([ 1.4501, -0.7338, 0.1770, 0.0286]) tensor([0.6120, 0.0689, 0.1713, 0.1477]) -Greedy action tensor([ 1.6157, -0.4660, -0.2396, 0.4806]) tensor([0.6240, 0.0778, 0.0976, 0.2005]) -Greedy action tensor([ 1.3199, -0.4971, -0.3625, 0.4351]) tensor([0.5678, 0.0923, 0.1056, 0.2344]) -Greedy action tensor([ 1.8821, -0.3084, -0.5751, 0.7432]) tensor([0.6589, 0.0737, 0.0565, 0.2110]) -Greedy action tensor([ 1.5796, -0.3584, 0.1578, 0.4730]) tensor([0.5828, 0.0839, 0.1406, 0.1927]) -Greedy action tensor([ 1.3943, -0.5198, 0.0106, 0.4868]) tensor([0.5550, 0.0819, 0.1391, 0.2240]) -Greedy action tensor([ 1.1729, -0.3295, -0.5480, 0.2925]) tensor([0.5506, 0.1226, 0.0985, 0.2283]) -Greedy action tensor([ 0.8123, -0.6493, -0.0841, -0.0603]) tensor([0.4860, 0.1127, 0.1983, 0.2031]) -Greedy action tensor([ 1.5313, -0.6562, -0.8948, 0.2758]) tensor([0.6732, 0.0755, 0.0595, 0.1918]) -Greedy action tensor([ 1.4898, -0.3841, -0.3764, 0.1977]) tensor([0.6317, 0.0970, 0.0977, 0.1735]) -Greedy action tensor([ 0.9317, -0.3785, -0.2543, 0.2896]) tensor([0.4759, 0.1284, 0.1454, 0.2504]) -Greedy action tensor([ 2.0088, -0.4923, -0.0446, 0.9576]) tensor([0.6411, 0.0526, 0.0823, 0.2241]) -Greedy action tensor([ 1.2482, -0.8923, -0.2018, 0.1575]) tensor([0.5924, 0.0697, 0.1390, 0.1990]) -Greedy action tensor([ 1.4826, -0.6231, -0.3751, 0.0421]) tensor([0.6602, 0.0804, 0.1030, 0.1563]) -Greedy action tensor([ 1.8118, -1.3265, -0.0915, 0.2233]) tensor([0.7160, 0.0310, 0.1067, 0.1462]) -Greedy action tensor([ 1.5166, -0.3991, -0.1333, 0.1714]) tensor([0.6251, 0.0920, 0.1201, 0.1628]) -Greedy action tensor([ 1.0813, -0.6272, -0.3903, 0.4613]) tensor([0.5132, 0.0930, 0.1178, 0.2761]) -Greedy action tensor([ 1.6252, -0.7574, -0.3887, -0.0563]) tensor([0.7083, 0.0654, 0.0945, 0.1318]) -Greedy action tensor([ 0.7412, -0.0176, -0.3361, -0.0774]) tensor([0.4445, 0.2081, 0.1513, 0.1960]) -Greedy action tensor([ 1.3004, -0.5472, -0.6427, 0.2001]) tensor([0.6121, 0.0965, 0.0877, 0.2037]) -Greedy action tensor([ 1.3698, -1.0451, 0.0540, 0.3578]) tensor([0.5810, 0.0519, 0.1559, 0.2112]) -Greedy action tensor([ 1.7908, -0.9637, -0.6035, 0.4073]) tensor([0.7115, 0.0453, 0.0649, 0.1784]) -Greedy action tensor([ 1.7725, -0.0478, -0.3261, 0.3794]) tensor([0.6524, 0.1057, 0.0800, 0.1620]) -Greedy action tensor([ 2.1238, -0.6943, -0.4267, 1.1913]) tensor([0.6530, 0.0390, 0.0510, 0.2570]) -Greedy action tensor([ 2.7178, -1.2322, 0.0210, 0.6842]) tensor([0.8213, 0.0158, 0.0554, 0.1075]) -Greedy action tensor([ 1.9524, -0.6889, -0.6980, 0.5536]) tensor([0.7200, 0.0513, 0.0509, 0.1778]) -Greedy action tensor([ 1.0523, -0.3870, -0.3037, 0.2642]) tensor([0.5129, 0.1216, 0.1322, 0.2332]) -Greedy action tensor([ 2.0132, -0.7786, -0.1985, 0.5268]) tensor([0.7158, 0.0439, 0.0784, 0.1619]) -Greedy action tensor([ 1.5729, -0.1666, 0.0116, -0.0889]) tensor([0.6348, 0.1115, 0.1332, 0.1205]) -Greedy action tensor([ 0.8403, -0.6076, -0.1559, 0.4887]) tensor([0.4333, 0.1019, 0.1600, 0.3049]) -Greedy action tensor([ 1.1714, -0.7810, -0.4568, 0.4614]) tensor([0.5465, 0.0776, 0.1073, 0.2687]) -Greedy action tensor([ 2.0369, -1.0930, -0.2844, 0.5237]) tensor([0.7342, 0.0321, 0.0721, 0.1617]) -Greedy action tensor([ 1.8486, -0.8861, -0.5027, 0.3220]) tensor([0.7260, 0.0471, 0.0692, 0.1577]) -Greedy action tensor([ 1.2798, -0.4136, -0.0846, 0.0598]) tensor([0.5765, 0.1060, 0.1473, 0.1702]) -Greedy action tensor([ 1.2616, -0.4998, -0.2737, 0.4874]) tensor([0.5410, 0.0929, 0.1165, 0.2495]) -Greedy action tensor([ 1.1278, -0.3089, -0.1528, 0.4728]) tensor([0.4914, 0.1168, 0.1365, 0.2552]) -Greedy action tensor([ 2.5862, -1.0816, -0.5890, 0.9100]) tensor([0.7972, 0.0204, 0.0333, 0.1491]) -Greedy action tensor([ 1.3913, -0.6780, -0.1997, 0.5269]) tensor([0.5710, 0.0721, 0.1163, 0.2406]) -Greedy action tensor([ 1.8825, -0.1197, 0.2315, 0.1257]) tensor([0.6669, 0.0901, 0.1279, 0.1151]) -Greedy action tensor([ 2.1171, -0.4715, -0.4164, 0.2649]) tensor([0.7625, 0.0573, 0.0605, 0.1196]) -Greedy action tensor([ 1.7662, -0.6200, -0.4482, 0.5446]) tensor([0.6685, 0.0615, 0.0730, 0.1970]) -Greedy action tensor([ 1.9314, -0.3550, -0.4719, 0.4261]) tensor([0.7072, 0.0719, 0.0639, 0.1570]) -Greedy action tensor([ 1.8032, -0.7134, -0.3238, 0.3617]) tensor([0.6961, 0.0562, 0.0830, 0.1647]) -Greedy action tensor([ 1.2739, -0.7042, -0.2583, 0.3953]) tensor([0.5650, 0.0782, 0.1221, 0.2347]) -Greedy action tensor([ 2.1474, -0.7439, -0.4461, 0.4418]) tensor([0.7622, 0.0423, 0.0570, 0.1385]) -Greedy action tensor([-1.7855, -0.2891, 0.6161, -0.2329]) tensor([0.0471, 0.2103, 0.5201, 0.2225]) -Greedy action tensor([-0.6802, 0.8108, 0.3067, 1.1077]) tensor([0.0709, 0.3150, 0.1903, 0.4239]) -Greedy action tensor([-1.7151, -0.5110, 0.5362, -0.0309]) tensor([0.0520, 0.1734, 0.4942, 0.2803]) -Greedy action tensor([-1.6026, -0.1432, 0.5708, 0.1020]) tensor([0.0510, 0.2197, 0.4486, 0.2807]) -Greedy action tensor([-1.8258, -0.1599, 0.5735, -0.0578]) tensor([0.0432, 0.2284, 0.4755, 0.2529]) -Greedy action tensor([-1.8728, -0.4860, 0.6358, -0.1636]) tensor([0.0438, 0.1754, 0.5386, 0.2421]) -Greedy action tensor([-1.9403, -0.4493, 0.6637, -0.1785]) tensor([0.0404, 0.1792, 0.5454, 0.2350]) -Greedy action tensor([-1.8140, -0.4728, 0.6242, -0.1688]) tensor([0.0466, 0.1782, 0.5337, 0.2415]) -Greedy action tensor([-1.9029, -0.3850, 0.6572, -0.1462]) tensor([0.0412, 0.1878, 0.5325, 0.2385]) -Greedy action tensor([-0.4498, -0.9628, 0.7195, -0.1680]) tensor([0.1628, 0.0974, 0.5240, 0.2157]) -Greedy action tensor([-1.8670, -0.4389, 0.6303, -0.1439]) tensor([0.0436, 0.1820, 0.5300, 0.2444]) -Greedy action tensor([-0.6605, -0.5751, 0.2184, -0.1117]) tensor([0.1605, 0.1749, 0.3867, 0.2779]) -Greedy action tensor([-1.9192, -0.4538, 0.6571, -0.1681]) tensor([0.0413, 0.1786, 0.5425, 0.2377]) -Greedy action tensor([-1.8035, -0.4360, 0.5900, -0.1063]) tensor([0.0469, 0.1840, 0.5133, 0.2558]) -Greedy action tensor([-1.8916, -0.3536, 0.6294, -0.1432]) tensor([0.0419, 0.1953, 0.5218, 0.2410]) -Greedy action tensor([-1.8397, -0.5497, 0.9839, 0.2533]) tensor([0.0338, 0.1228, 0.5692, 0.2741]) -Greedy action tensor([-1.1344, -0.0210, 0.3424, -0.1244]) tensor([0.0895, 0.2726, 0.3920, 0.2458]) -Greedy action tensor([-0.7848, 0.8293, 0.0835, -0.0461]) tensor([0.0952, 0.4784, 0.2270, 0.1994]) -Greedy action tensor([-1.7801, -0.3139, 0.6078, -0.0329]) tensor([0.0455, 0.1973, 0.4959, 0.2613]) -Greedy action tensor([-1.7557, -0.5973, 0.7864, 0.0904]) tensor([0.0431, 0.1371, 0.5471, 0.2727]) -Greedy action tensor([-0.2092, -0.3490, 1.0572, 1.5450]) tensor([0.0893, 0.0777, 0.3169, 0.5161]) -Greedy action tensor([-1.4216, 0.3489, 0.3151, 0.0051]) tensor([0.0598, 0.3514, 0.3397, 0.2491]) -Greedy action tensor([-1.8326, -0.4274, 0.6058, -0.1152]) tensor([0.0452, 0.1845, 0.5183, 0.2520]) -Greedy action tensor([-1.6261, -0.5573, 0.5720, 0.0721]) tensor([0.0544, 0.1584, 0.4900, 0.2972]) -Greedy action tensor([-1.6349, -0.5764, 0.6719, 0.1749]) tensor([0.0499, 0.1439, 0.5013, 0.3049]) -Greedy action tensor([-1.8787, -0.5235, 0.8016, -0.0543]) tensor([0.0390, 0.1511, 0.5684, 0.2415]) -Greedy action tensor([-1.4000, -0.4126, 0.9813, 1.0334]) tensor([0.0386, 0.1036, 0.4177, 0.4401]) -Greedy action tensor([-1.2084, 0.2969, 0.2462, 0.0112]) tensor([0.0759, 0.3420, 0.3251, 0.2570]) -Greedy action tensor([-1.9335, -0.4278, 0.6618, -0.1715]) tensor([0.0404, 0.1822, 0.5418, 0.2355]) -Greedy action tensor([-1.7415, -0.5078, 0.7836, 0.2740]) tensor([0.0409, 0.1406, 0.5113, 0.3072]) -Greedy action tensor([-1.5029, -0.2497, 0.5887, 0.4323]) tensor([0.0512, 0.1793, 0.4147, 0.3547]) -Greedy action tensor([-1.9389, -0.4461, 0.6636, -0.1776]) tensor([0.0404, 0.1797, 0.5450, 0.2350]) -Greedy action tensor([-1.5563, -0.5655, 0.4538, 0.0613]) tensor([0.0617, 0.1663, 0.4608, 0.3112]) -Greedy action tensor([-1.9114, -0.2621, 0.6211, -0.1562]) tensor([0.0407, 0.2118, 0.5121, 0.2354]) -Greedy action tensor([-1.8525, -0.3595, 0.6538, -0.1238]) tensor([0.0428, 0.1906, 0.5252, 0.2413]) -Greedy action tensor([-1.9430, -0.4524, 0.6662, -0.1792]) tensor([0.0402, 0.1786, 0.5465, 0.2347]) -Greedy action tensor([-0.5799, -0.4194, 0.5324, 0.7594]) tensor([0.1107, 0.1300, 0.3367, 0.4225]) -Greedy action tensor([-1.8869, -0.4438, 0.6397, -0.1550]) tensor([0.0427, 0.1810, 0.5347, 0.2416]) -Greedy action tensor([-1.5496, -0.2550, 0.5053, 0.1248]) tensor([0.0562, 0.2051, 0.4388, 0.2999]) -Greedy action tensor([-1.9215, -0.4256, 0.6616, -0.1580]) tensor([0.0408, 0.1819, 0.5396, 0.2377]) -Greedy action tensor([-1.6744, 0.1382, 0.4353, -0.0157]) tensor([0.0485, 0.2970, 0.3998, 0.2547]) -Greedy action tensor([-0.9251, 0.9354, 0.0973, 0.2797]) tensor([0.0738, 0.4746, 0.2053, 0.2463]) -Greedy action tensor([-1.5647, -0.5749, 0.6308, 0.2083]) tensor([0.0539, 0.1450, 0.4840, 0.3172]) -Greedy action tensor([-0.7459, -0.1889, 0.3483, 0.0517]) tensor([0.1257, 0.2195, 0.3756, 0.2792]) -Greedy action tensor([-1.8928, -0.3939, 0.6393, -0.1394]) tensor([0.0420, 0.1879, 0.5279, 0.2423]) -Greedy action tensor([-1.8257, -0.4138, 0.6071, -0.1214]) tensor([0.0455, 0.1866, 0.5180, 0.2500]) -Greedy action tensor([-1.9019, -0.3917, 0.6423, -0.1473]) tensor([0.0416, 0.1883, 0.5296, 0.2405]) -Greedy action tensor([-1.8848, -0.4171, 0.6338, -0.1528]) tensor([0.0427, 0.1854, 0.5303, 0.2415]) -Greedy action tensor([-1.7560, -0.3626, 0.6221, -0.0248]) tensor([0.0466, 0.1877, 0.5025, 0.2631]) -Greedy action tensor([-0.8828, -0.6128, 0.1515, 0.7237]) tensor([0.0989, 0.1296, 0.2783, 0.4932]) -Greedy action tensor([0.0545, 1.0997, 0.0788, 0.8006]) tensor([0.1433, 0.4076, 0.1468, 0.3022]) -Greedy action tensor([-0.8709, -0.4509, 0.3100, 0.6091]) tensor([0.0983, 0.1496, 0.3202, 0.4319]) -Greedy action tensor([-1.8362, -0.0369, 0.5484, -0.0930]) tensor([0.0423, 0.2560, 0.4596, 0.2420]) -Greedy action tensor([-1.9281, -0.4451, 0.6567, -0.1729]) tensor([0.0409, 0.1802, 0.5423, 0.2366]) -Greedy action tensor([-1.7442, -0.0783, 0.5085, -0.0751]) tensor([0.0474, 0.2506, 0.4506, 0.2514]) -Greedy action tensor([-1.4350, -0.5869, 0.5765, 0.5070]) tensor([0.0562, 0.1313, 0.4203, 0.3921]) -Greedy action tensor([-1.7098, -0.4896, 0.5309, -0.0089]) tensor([0.0519, 0.1758, 0.4879, 0.2844]) -Greedy action tensor([-1.9454, -0.4532, 0.6658, -0.1811]) tensor([0.0402, 0.1786, 0.5468, 0.2345]) -Greedy action tensor([-1.9344, -0.4522, 0.6651, -0.1765]) tensor([0.0406, 0.1785, 0.5457, 0.2352]) -Greedy action tensor([-1.8828, -0.4560, 0.6656, -0.0759]) tensor([0.0416, 0.1732, 0.5318, 0.2534]) -Greedy action tensor([-1.8226, -0.3711, 0.5972, -0.0989]) tensor([0.0452, 0.1930, 0.5084, 0.2534]) -Greedy action tensor([-0.7066, -0.4920, 0.2176, 0.0433]) tensor([0.1454, 0.1802, 0.3665, 0.3078]) -Greedy action tensor([-1.3226, -0.6147, 1.0965, 1.1312]) tensor([0.0386, 0.0784, 0.4339, 0.4491]) -Greedy action tensor([-1.5530, 0.1223, 0.7203, -0.6517]) tensor([0.0540, 0.2884, 0.5245, 0.1330]) -Greedy action tensor([-1.0610, -0.3562, 0.7528, 1.0993]) tensor([0.0561, 0.1135, 0.3440, 0.4865]) -Greedy action tensor([-1.9337, -0.4582, 0.6779, -0.1591]) tensor([0.0402, 0.1757, 0.5472, 0.2369]) -Greedy action tensor([-1.8169, -0.1260, 0.5665, -0.1053]) tensor([0.0439, 0.2379, 0.4754, 0.2428]) -Greedy action tensor([-1.8160, -0.4242, 0.6045, -0.1048]) tensor([0.0459, 0.1844, 0.5159, 0.2538]) -Greedy action tensor([-1.4196, -0.6176, 0.3786, 0.1460]) tensor([0.0712, 0.1587, 0.4297, 0.3405]) -Greedy action tensor([-1.6897, 0.1197, 0.4650, 0.0967]) tensor([0.0461, 0.2814, 0.3975, 0.2750]) -Greedy action tensor([-1.6741, -0.3726, 0.5414, -0.1233]) tensor([0.0539, 0.1980, 0.4940, 0.2541]) -Greedy action tensor([-1.8911, -0.4508, 0.6405, -0.1548]) tensor([0.0426, 0.1799, 0.5357, 0.2418]) -Greedy action tensor([-1.6532, -0.2029, 0.6250, 0.0357]) tensor([0.0489, 0.2087, 0.4775, 0.2649]) -Greedy action tensor([-1.5456, -0.1343, 0.4201, 0.0509]) tensor([0.0582, 0.2388, 0.4157, 0.2873]) -Greedy action tensor([-1.6677, -0.4153, 0.5195, -0.0754]) tensor([0.0546, 0.1909, 0.4863, 0.2682]) -Greedy action tensor([-1.8594, -0.2073, 0.5905, -0.1096]) tensor([0.0424, 0.2215, 0.4918, 0.2442]) -Greedy action tensor([-1.0696, -0.5280, 0.2365, 0.4228]) tensor([0.0921, 0.1583, 0.3400, 0.4096]) -Greedy action tensor([-1.8807, -0.3549, 0.6532, -0.1330]) tensor([0.0418, 0.1921, 0.5264, 0.2398]) -Greedy action tensor([-1.5853, -0.4805, 0.4806, -0.0025]) tensor([0.0596, 0.1799, 0.4704, 0.2901]) -Greedy action tensor([-1.7270, -0.5427, 0.5775, 0.0248]) tensor([0.0499, 0.1630, 0.4996, 0.2875]) -Greedy action tensor([-1.9315, -0.4406, 0.6578, -0.1727]) tensor([0.0407, 0.1808, 0.5422, 0.2363]) -Greedy action tensor([ 0.8942, -0.5051, 0.1706, -0.3493]) tensor([0.4950, 0.1222, 0.2401, 0.1427]) -Greedy action tensor([ 0.9860, -0.5703, 0.0240, -0.1741]) tensor([0.5245, 0.1106, 0.2004, 0.1644]) -Greedy action tensor([ 0.9116, -0.6021, 0.1151, -0.5678]) tensor([0.5267, 0.1159, 0.2375, 0.1200]) -Greedy action tensor([ 1.2504, -0.6181, -0.0302, -0.7140]) tensor([0.6359, 0.0982, 0.1767, 0.0892]) -Greedy action tensor([ 0.5587, -0.0079, -0.1310, 0.0044]) tensor([0.3783, 0.2147, 0.1898, 0.2173]) -Greedy action tensor([ 0.2028, 0.0643, -0.1109, -0.2529]) tensor([0.3091, 0.2691, 0.2259, 0.1960]) -Greedy action tensor([ 0.4270, -0.1466, -0.0938, -0.1453]) tensor([0.3674, 0.2070, 0.2183, 0.2073]) -Greedy action tensor([ 0.6686, -0.2575, -0.1650, -0.3762]) tensor([0.4582, 0.1815, 0.1991, 0.1612]) -Greedy action tensor([ 0.7024, 0.1020, -0.1967, -0.1351]) tensor([0.4187, 0.2297, 0.1704, 0.1812]) -Greedy action tensor([ 0.5881, -0.3638, -0.1565, -0.1597]) tensor([0.4284, 0.1654, 0.2034, 0.2028]) -Greedy action tensor([ 0.7639, -0.7067, 0.0938, -0.9411]) tensor([0.5200, 0.1195, 0.2660, 0.0945]) -Greedy action tensor([ 0.9482, -0.7865, -0.0505, -0.5795]) tensor([0.5676, 0.1001, 0.2091, 0.1232]) -Greedy action tensor([ 0.8243, -0.9177, -0.0860, -0.4351]) tensor([0.5372, 0.0941, 0.2162, 0.1525]) -Greedy action tensor([ 0.8294, -0.4977, -0.0546, -0.3544]) tensor([0.5039, 0.1337, 0.2082, 0.1543]) -Greedy action tensor([ 0.6215, -0.3138, -0.1618, -0.0845]) tensor([0.4268, 0.1675, 0.1950, 0.2107]) -Greedy action tensor([ 0.4196, -0.2305, -0.0289, -0.1793]) tensor([0.3690, 0.1926, 0.2356, 0.2027]) -Greedy action tensor([ 0.5407, 0.2737, -0.1211, -0.1460]) tensor([0.3591, 0.2749, 0.1853, 0.1807]) -Greedy action tensor([ 0.3382, -0.2952, -0.0282, -0.3372]) tensor([0.3659, 0.1942, 0.2537, 0.1862]) -Greedy action tensor([ 0.8450, -0.4666, -0.0908, -0.4542]) tensor([0.5170, 0.1393, 0.2028, 0.1410]) -Greedy action tensor([ 0.8839, -1.0204, 0.0841, -0.4944]) tensor([0.5404, 0.0805, 0.2429, 0.1362]) -Greedy action tensor([ 0.7373, -0.6061, -0.0200, -0.3219]) tensor([0.4816, 0.1257, 0.2258, 0.1670]) -Greedy action tensor([ 0.7143, -0.5267, -0.1874, -0.4249]) tensor([0.4963, 0.1435, 0.2014, 0.1588]) -Greedy action tensor([ 0.9276, -0.9404, -0.0562, -0.3827]) tensor([0.5562, 0.0859, 0.2079, 0.1500]) -Greedy action tensor([ 0.6593, 0.0317, -0.1091, -0.0581]) tensor([0.4023, 0.2148, 0.1866, 0.1963]) -Greedy action tensor([ 0.4693, -0.0397, -0.0592, -0.4539]) tensor([0.3864, 0.2323, 0.2278, 0.1535]) -Greedy action tensor([ 0.4814, -0.5569, -0.0749, -0.3157]) tensor([0.4205, 0.1489, 0.2411, 0.1895]) -Greedy action tensor([ 0.4266, -0.6702, -0.1860, -0.2544]) tensor([0.4198, 0.1402, 0.2275, 0.2125]) -Greedy action tensor([ 0.2400, -0.1722, 0.1657, -0.1868]) tensor([0.3083, 0.2042, 0.2863, 0.2012]) -Greedy action tensor([ 0.6446, -0.4783, -0.1070, -0.3052]) tensor([0.4579, 0.1490, 0.2160, 0.1771]) -Greedy action tensor([ 0.8784, -0.8954, 0.0155, -0.3552]) tensor([0.5311, 0.0901, 0.2241, 0.1547]) -Greedy action tensor([ 0.6418, -0.3130, -0.0196, -0.2378]) tensor([0.4318, 0.1662, 0.2229, 0.1792]) -Greedy action tensor([ 0.8904, -0.4223, -0.1448, -0.2193]) tensor([0.5118, 0.1377, 0.1818, 0.1687]) -Greedy action tensor([ 0.6074, -0.8285, -0.1531, -0.1487]) tensor([0.4598, 0.1094, 0.2149, 0.2159]) -Greedy action tensor([ 0.3561, -0.0717, 0.1169, -0.3064]) tensor([0.3384, 0.2206, 0.2664, 0.1745]) -Greedy action tensor([ 0.4230, 0.3033, -0.1187, 0.0036]) tensor([0.3199, 0.2838, 0.1861, 0.2103]) -Greedy action tensor([ 0.3773, 0.4500, -0.1989, 0.1453]) tensor([0.2915, 0.3135, 0.1638, 0.2312]) -Greedy action tensor([ 0.7955, -0.4427, -0.1395, -0.3935]) tensor([0.5033, 0.1459, 0.1976, 0.1533]) -Greedy action tensor([ 0.4342, 0.1101, -0.1912, -0.1747]) tensor([0.3569, 0.2581, 0.1909, 0.1941]) -Greedy action tensor([ 0.6534, 0.0923, -0.0668, -0.0392]) tensor([0.3910, 0.2231, 0.1903, 0.1956]) -Greedy action tensor([ 0.6534, -0.3617, -0.1230, -0.2772]) tensor([0.4511, 0.1635, 0.2075, 0.1779]) -Greedy action tensor([ 1.0871, -0.5324, -0.1786, -0.7488]) tensor([0.6099, 0.1208, 0.1720, 0.0973]) -Greedy action tensor([ 4.8381e-01, -3.6589e-01, 2.1337e-04, -1.7188e-01]) tensor([0.3901, 0.1668, 0.2405, 0.2025]) -Greedy action tensor([ 0.7192, -0.1833, -0.0554, -0.0913]) tensor([0.4327, 0.1755, 0.1994, 0.1924]) -Greedy action tensor([ 0.7339, -0.3332, -0.0786, -0.0810]) tensor([0.4483, 0.1542, 0.1989, 0.1985]) -Greedy action tensor([ 0.4344, -0.1467, -0.0830, -0.1613]) tensor([0.3695, 0.2066, 0.2202, 0.2037]) -Greedy action tensor([ 1.0310, -0.6226, -0.0141, -0.5608]) tensor([0.5726, 0.1096, 0.2013, 0.1165]) -Greedy action tensor([ 0.6560, -0.4598, -0.0891, -0.2094]) tensor([0.4498, 0.1474, 0.2135, 0.1893]) -Greedy action tensor([ 1.1837, -0.7145, -0.1343, -0.6104]) tensor([0.6314, 0.0946, 0.1690, 0.1050]) -Greedy action tensor([ 0.6997, -0.5870, -0.0835, -0.4759]) tensor([0.4898, 0.1353, 0.2238, 0.1512]) -Greedy action tensor([ 0.5595, -0.2943, -0.0702, -0.1748]) tensor([0.4101, 0.1746, 0.2185, 0.1968]) -Greedy action tensor([ 0.4184, -0.0179, -0.0372, -0.2462]) tensor([0.3578, 0.2313, 0.2269, 0.1841]) -Greedy action tensor([ 0.5727, -0.0061, 0.0660, -0.1603]) tensor([0.3783, 0.2120, 0.2279, 0.1818]) -Greedy action tensor([ 0.9688, -0.7450, -0.1107, -0.5658]) tensor([0.5762, 0.1038, 0.1958, 0.1242]) -Greedy action tensor([ 0.5571, -0.1954, -0.0408, -0.1174]) tensor([0.3952, 0.1862, 0.2173, 0.2013]) -Greedy action tensor([ 0.7461, -0.6036, -0.1909, -0.2549]) tensor([0.4954, 0.1285, 0.1941, 0.1821]) -Greedy action tensor([ 0.5679, -0.0329, 0.0117, -0.0831]) tensor([0.3783, 0.2075, 0.2169, 0.1973]) -Greedy action tensor([ 0.7402, -0.7638, -0.2290, -0.3395]) tensor([0.5151, 0.1145, 0.1954, 0.1750]) -Greedy action tensor([ 0.2522, -0.1273, 0.0779, -0.2939]) tensor([0.3222, 0.2205, 0.2707, 0.1866]) -Greedy action tensor([ 0.5044, -0.0448, -0.0608, -0.0379]) tensor([0.3667, 0.2117, 0.2084, 0.2132]) -Greedy action tensor([ 0.4219, -0.0018, -0.1395, -0.0097]) tensor([0.3479, 0.2277, 0.1984, 0.2259]) -Greedy action tensor([ 0.6649, -0.5345, -0.0856, -0.3581]) tensor([0.4688, 0.1413, 0.2214, 0.1686]) -Greedy action tensor([ 0.9158, -0.4993, 0.0802, -0.4413]) tensor([0.5171, 0.1256, 0.2242, 0.1331]) -Greedy action tensor([ 0.8855, -0.6661, -0.0354, -0.3438]) tensor([0.5256, 0.1114, 0.2093, 0.1537]) -Greedy action tensor([ 0.7262, -0.1333, -0.0657, -0.0578]) tensor([0.4286, 0.1815, 0.1942, 0.1957]) -Greedy action tensor([ 0.7454, -0.3345, -0.0814, -0.2600]) tensor([0.4666, 0.1585, 0.2041, 0.1707]) -Greedy action tensor([ 0.7253, -0.3166, -0.0955, -0.0974]) tensor([0.4480, 0.1580, 0.1972, 0.1968]) -Greedy action tensor([ 0.3918, -0.0306, 0.0744, -0.1669]) tensor([0.3384, 0.2218, 0.2463, 0.1935]) -Greedy action tensor([ 1.0159, -0.5718, -0.0926, -0.2479]) tensor([0.5503, 0.1125, 0.1817, 0.1555]) -Greedy action tensor([ 0.6368, -0.1866, -0.0062, -0.3470]) tensor([0.4276, 0.1877, 0.2248, 0.1599]) -Greedy action tensor([ 0.5675, 0.4318, -0.1024, -0.0102]) tensor([0.3394, 0.2964, 0.1737, 0.1905]) -Greedy action tensor([ 0.4860, -0.2782, 0.0013, -0.2367]) tensor([0.3896, 0.1814, 0.2399, 0.1891]) -Greedy action tensor([ 1.4178, -1.1496, -0.0138, -0.9086]) tensor([0.7075, 0.0543, 0.1691, 0.0691]) -Greedy action tensor([ 0.6937, -0.5081, -0.0113, -0.2639]) tensor([0.4590, 0.1380, 0.2268, 0.1762]) -Greedy action tensor([ 0.6353, -0.2339, -0.0388, -0.5191]) tensor([0.4456, 0.1868, 0.2271, 0.1405]) -Greedy action tensor([ 1.0021, -0.8470, -0.0902, -0.5267]) tensor([0.5849, 0.0921, 0.1962, 0.1268]) -Greedy action tensor([ 0.7589, -0.4870, -0.1826, -0.4517]) tensor([0.5061, 0.1456, 0.1974, 0.1508]) -Greedy action tensor([ 0.4405, -0.5477, -0.1218, -0.2128]) tensor([0.4061, 0.1512, 0.2314, 0.2113]) -Greedy action tensor([ 0.5970, -0.0648, -0.0930, -0.0326]) tensor([0.3921, 0.2023, 0.1967, 0.2089]) -Greedy action tensor([ 0.5748, -0.1649, -0.1051, -0.0756]) tensor([0.3991, 0.1905, 0.2022, 0.2083]) -Greedy action tensor([ 0.6041, -0.4975, -0.0747, -0.1894]) tensor([0.4363, 0.1450, 0.2213, 0.1973]) -Greedy action tensor([ 0.3724, -0.3736, -0.1049, -0.0759]) tensor([0.3658, 0.1735, 0.2270, 0.2337]) -Greedy action tensor([-0.8241, -1.4430, 0.6088, -0.1596]) tensor([0.1303, 0.0702, 0.5462, 0.2533]) -Greedy action tensor([0.8212, 0.6250, 0.1393, 0.2548]) tensor([0.3454, 0.2839, 0.1747, 0.1960]) -Greedy action tensor([ 0.9078, -2.6485, -0.0820, 0.2651]) tensor([0.5192, 0.0148, 0.1930, 0.2730]) -Greedy action tensor([ 0.3994, -0.0240, 0.1001, 1.2588]) tensor([0.2102, 0.1376, 0.1558, 0.4964]) -Greedy action tensor([ 0.1840, -0.1628, -0.5428, 0.6087]) tensor([0.2689, 0.1901, 0.1300, 0.4111]) -Greedy action tensor([-1.6053e-03, -2.2419e+00, 3.8553e-01, 6.2927e-01]) tensor([0.2243, 0.0239, 0.3303, 0.4215]) -Greedy action tensor([ 0.1364, -0.1237, 0.2325, 0.1561]) tensor([0.2570, 0.1981, 0.2829, 0.2621]) -Greedy action tensor([ 0.5313, -1.1778, 0.5820, 0.3704]) tensor([0.3242, 0.0587, 0.3411, 0.2760]) -Greedy action tensor([ 0.0407, -0.6116, -0.0234, 0.7916]) tensor([0.2184, 0.1138, 0.2049, 0.4629]) -Greedy action tensor([-0.4645, -0.2173, -1.2537, 0.2858]) tensor([0.2061, 0.2639, 0.0936, 0.4364]) -Greedy action tensor([ 0.6461, -0.3193, 1.2713, 1.5171]) tensor([0.1774, 0.0675, 0.3314, 0.4237]) -Greedy action tensor([-0.3973, 0.5283, 1.1059, -0.9984]) tensor([0.1167, 0.2945, 0.5248, 0.0640]) -Greedy action tensor([ 1.4628, 0.2360, 0.2764, -0.4058]) tensor([0.5705, 0.1673, 0.1742, 0.0880]) -Greedy action tensor([-0.5107, -1.1249, 0.9134, 0.8165]) tensor([0.1056, 0.0572, 0.4389, 0.3983]) -Greedy action tensor([-1.2013, 0.5411, 1.4240, -1.0719]) tensor([0.0462, 0.2637, 0.6376, 0.0525]) -Greedy action tensor([-0.7332, -0.3422, -0.4020, -0.9546]) tensor([0.2140, 0.3164, 0.2980, 0.1715]) -Greedy action tensor([ 0.4613, -0.6767, 1.3409, 1.0492]) tensor([0.1808, 0.0579, 0.4357, 0.3255]) -Greedy action tensor([1.6482, 0.2696, 1.0154, 0.8252]) tensor([0.4500, 0.1134, 0.2390, 0.1976]) -Greedy action tensor([-0.4767, 0.7938, 0.3254, -0.4708]) tensor([0.1282, 0.4568, 0.2860, 0.1290]) -Greedy action tensor([-0.8850, 1.3570, 0.4351, -1.4606]) tensor([0.0679, 0.6395, 0.2544, 0.0382]) -Greedy action tensor([ 0.0539, 0.4964, -0.0744, -0.5483]) tensor([0.2510, 0.3907, 0.2208, 0.1375]) -Greedy action tensor([0.3666, 0.1416, 0.2020, 0.6282]) tensor([0.2534, 0.2024, 0.2150, 0.3292]) -Greedy action tensor([ 0.9833, -0.9782, 0.2564, 0.5283]) tensor([0.4428, 0.0623, 0.2140, 0.2809]) -Greedy action tensor([-0.2506, -0.6703, -0.3154, 2.0372]) tensor([0.0803, 0.0528, 0.0753, 0.7916]) -Greedy action tensor([1.1364, 0.6285, 0.8819, 0.0861]) tensor([0.3667, 0.2207, 0.2843, 0.1283]) -Greedy action tensor([-1.0759, -1.4570, -0.2430, 0.5012]) tensor([0.1133, 0.0774, 0.2607, 0.5486]) -Greedy action tensor([ 0.7999, 0.4661, -0.7951, 1.2718]) tensor([0.2839, 0.2033, 0.0576, 0.4551]) -Greedy action tensor([ 1.8910, -1.7626, 0.9403, 0.3442]) tensor([0.6153, 0.0159, 0.2378, 0.1310]) -Greedy action tensor([0.7531, 0.3329, 1.2991, 0.6878]) tensor([0.2315, 0.1521, 0.3996, 0.2168]) -Greedy action tensor([ 0.3175, -1.8684, 2.1313, -0.4236]) tensor([0.1295, 0.0146, 0.7943, 0.0617]) -Greedy action tensor([ 0.5496, -0.1758, 1.3918, 0.3001]) tensor([0.2181, 0.1056, 0.5063, 0.1699]) -Greedy action tensor([-1.1608, -0.5715, -0.2569, -0.7992]) tensor([0.1491, 0.2688, 0.3681, 0.2140]) -Greedy action tensor([ 0.3698, -0.4244, 0.6666, -0.0800]) tensor([0.2911, 0.1316, 0.3917, 0.1857]) -Greedy action tensor([ 1.3159, -0.8472, 0.3118, 1.5724]) tensor([0.3605, 0.0414, 0.1321, 0.4660]) -Greedy action tensor([-0.1186, 0.1090, -0.0031, -0.4104]) tensor([0.2424, 0.3044, 0.2721, 0.1811]) -Greedy action tensor([-1.0088, -1.0734, -1.2124, -0.0786]) tensor([0.1891, 0.1773, 0.1543, 0.4794]) -Greedy action tensor([ 1.2364, -0.8284, 1.1358, 1.6368]) tensor([0.2838, 0.0360, 0.2566, 0.4236]) -Greedy action tensor([0.0019, 0.1556, 0.3132, 0.5313]) tensor([0.1912, 0.2230, 0.2611, 0.3247]) -Greedy action tensor([-0.9515, 0.2141, 0.1253, -0.0924]) tensor([0.1052, 0.3375, 0.3089, 0.2484]) -Greedy action tensor([ 0.5400, -1.0721, 0.3941, -0.0823]) tensor([0.3845, 0.0767, 0.3324, 0.2064]) -Greedy action tensor([ 0.2201, -0.2257, 0.5501, 1.7172]) tensor([0.1333, 0.0854, 0.1855, 0.5958]) -Greedy action tensor([ 0.6894, 0.5984, -0.0656, -0.0343]) tensor([0.3487, 0.3184, 0.1639, 0.1691]) -Greedy action tensor([ 0.8710, 0.7679, -0.1215, -0.8099]) tensor([0.4067, 0.3668, 0.1507, 0.0757]) -Greedy action tensor([0.3453, 0.2581, 0.6600, 1.7582]) tensor([0.1352, 0.1239, 0.1853, 0.5555]) -Greedy action tensor([-0.1505, -1.6713, 0.0819, -0.3378]) tensor([0.3022, 0.0660, 0.3812, 0.2506]) -Greedy action tensor([-0.2585, -0.2228, 1.1539, -1.0035]) tensor([0.1511, 0.1566, 0.6205, 0.0717]) -Greedy action tensor([ 0.1634, -0.1371, 0.2396, 0.4160]) tensor([0.2435, 0.1803, 0.2628, 0.3135]) -Greedy action tensor([-1.2805, -0.0961, 0.8859, -1.8053]) tensor([0.0736, 0.2406, 0.6423, 0.0435]) -Greedy action tensor([-1.1874, 0.8962, 2.2454, -1.1528]) tensor([0.0244, 0.1958, 0.7546, 0.0252]) -Greedy action tensor([-0.8684, 0.7673, 1.3684, -0.3949]) tensor([0.0585, 0.3001, 0.5475, 0.0939]) -Greedy action tensor([ 0.7665, 0.1321, 0.0835, -0.5770]) tensor([0.4355, 0.2309, 0.2200, 0.1136]) -Greedy action tensor([0.6204, 0.7914, 1.1370, 0.3006]) tensor([0.2179, 0.2585, 0.3653, 0.1583]) -Greedy action tensor([ 1.1352, 0.2242, 1.1883, -0.4096]) tensor([0.3745, 0.1506, 0.3950, 0.0799]) -Greedy action tensor([ 0.5181, -0.0019, 1.4335, 1.2741]) tensor([0.1607, 0.0955, 0.4014, 0.3423]) -Greedy action tensor([-0.1305, -0.7892, -0.5542, 1.4095]) tensor([0.1463, 0.0757, 0.0957, 0.6823]) -Greedy action tensor([ 1.0889, -1.2946, 0.4251, 0.0608]) tensor([0.5090, 0.0469, 0.2621, 0.1820]) -Greedy action tensor([ 1.4863, -0.7199, 0.1221, 1.4763]) tensor([0.4245, 0.0467, 0.1085, 0.4203]) -Greedy action tensor([ 0.9118, -0.2101, 0.4322, 1.3368]) tensor([0.2878, 0.0937, 0.1782, 0.4403]) -Greedy action tensor([ 1.6132, -1.7845, 1.2286, 0.7187]) tensor([0.4710, 0.0158, 0.3206, 0.1926]) -Greedy action tensor([ 0.4408, -0.2597, 0.1086, -0.2168]) tensor([0.3661, 0.1817, 0.2626, 0.1897]) -Greedy action tensor([ 0.2203, -0.8362, 1.5736, 0.4194]) tensor([0.1553, 0.0540, 0.6011, 0.1895]) -Greedy action tensor([-0.9787, -0.1169, 0.8199, 0.0412]) tensor([0.0821, 0.1943, 0.4959, 0.2276]) -Greedy action tensor([-1.0471, -0.4362, 0.3633, -1.3782]) tensor([0.1306, 0.2406, 0.5351, 0.0938]) -Greedy action tensor([-0.1928, 0.0047, 0.9735, -0.1587]) tensor([0.1547, 0.1885, 0.4967, 0.1601]) -Greedy action tensor([-0.0578, -1.7414, 0.0868, -0.2350]) tensor([0.3146, 0.0584, 0.3635, 0.2635]) -Greedy action tensor([-0.2464, 0.5702, 0.0665, -0.5769]) tensor([0.1870, 0.4231, 0.2556, 0.1343]) -Greedy action tensor([ 1.4017, -0.2923, 1.2485, 0.9616]) tensor([0.3723, 0.0684, 0.3194, 0.2398]) -Greedy action tensor([-0.4255, -0.0789, 0.5157, -1.1947]) tensor([0.1838, 0.2599, 0.4711, 0.0852]) -Greedy action tensor([-0.8852, -0.7337, -0.4598, -0.2893]) tensor([0.1815, 0.2112, 0.2778, 0.3294]) -Greedy action tensor([ 0.9596, -0.6529, -0.0857, 0.2086]) tensor([0.4943, 0.0986, 0.1738, 0.2333]) -Greedy action tensor([-1.0686, -1.3448, 0.3487, -0.5622]) tensor([0.1326, 0.1006, 0.5469, 0.2200]) -Greedy action tensor([-0.0135, -1.4404, 0.9027, -0.1158]) tensor([0.2154, 0.0517, 0.5384, 0.1944]) -Greedy action tensor([ 1.1413, -1.4665, 1.2186, 1.6949]) tensor([0.2568, 0.0189, 0.2775, 0.4468]) -Greedy action tensor([-1.0151, -0.2433, 0.3854, 0.0739]) tensor([0.0981, 0.2123, 0.3981, 0.2915]) -Greedy action tensor([1.1813, 0.3816, 0.2099, 0.7788]) tensor([0.4005, 0.1800, 0.1516, 0.2678]) -Greedy action tensor([-0.7869, -2.0107, -0.2909, 1.3418]) tensor([0.0882, 0.0259, 0.1448, 0.7411]) -Greedy action tensor([-0.0273, 0.6540, 0.7698, 0.4678]) tensor([0.1463, 0.2891, 0.3246, 0.2400]) -Greedy action tensor([-0.5279, -0.0721, 0.3533, -0.1113]) tensor([0.1537, 0.2424, 0.3709, 0.2331]) -Greedy action tensor([ 0.8546, -0.4826, -0.2184, 1.6316]) tensor([0.2646, 0.0695, 0.0905, 0.5755]) -Greedy action tensor([ 0.4890, -0.8784, 0.3542, -0.4744]) tensor([0.3984, 0.1015, 0.3481, 0.1520]) -Greedy action tensor([ 1.1194, -0.1929, 0.4161, 1.0630]) tensor([0.3691, 0.0994, 0.1827, 0.3489]) -Greedy action tensor([ 0.4980, 0.1196, 1.0026, -0.6332]) tensor([0.2729, 0.1870, 0.4521, 0.0881]) -Greedy action tensor([ 1.5516, -0.7979, -0.4776, 0.8799]) tensor([0.5755, 0.0549, 0.0756, 0.2940]) -Greedy action tensor([ 0.9768, -0.4766, -0.3433, 0.0121]) tensor([0.5313, 0.1242, 0.1419, 0.2025]) -Greedy action tensor([ 1.0903, -0.0881, -0.5418, 0.4653]) tensor([0.4905, 0.1510, 0.0959, 0.2626]) -Greedy action tensor([ 1.2481, -0.6811, -0.2514, 0.6004]) tensor([0.5286, 0.0768, 0.1180, 0.2766]) -Greedy action tensor([ 2.0935, -1.0275, -0.3351, 0.8305]) tensor([0.7067, 0.0312, 0.0623, 0.1999]) -Greedy action tensor([ 1.6434, -0.4401, -0.5220, 0.2938]) tensor([0.6673, 0.0831, 0.0765, 0.1731]) -Greedy action tensor([ 1.0145, -0.5359, -0.1262, 0.2777]) tensor([0.4974, 0.1055, 0.1590, 0.2381]) -Greedy action tensor([ 0.7222, -0.1196, -0.0164, 0.2678]) tensor([0.3931, 0.1694, 0.1878, 0.2496]) -Greedy action tensor([ 1.4723, -0.8542, -0.0479, 0.3924]) tensor([0.6039, 0.0590, 0.1320, 0.2051]) -Greedy action tensor([ 1.8753, -0.3966, -0.8317, 0.5517]) tensor([0.6964, 0.0718, 0.0465, 0.1854]) -Greedy action tensor([ 1.2228, 0.2154, -0.1401, 0.3449]) tensor([0.4910, 0.1793, 0.1257, 0.2041]) -Greedy action tensor([ 1.4833, -0.4574, -0.3242, 0.2888]) tensor([0.6209, 0.0892, 0.1019, 0.1880]) -Greedy action tensor([ 1.3064, -0.3317, -0.3453, 0.4247]) tensor([0.5555, 0.1080, 0.1065, 0.2300]) -Greedy action tensor([ 2.0226, -0.8783, -0.5825, 0.3770]) tensor([0.7566, 0.0416, 0.0559, 0.1459]) -Greedy action tensor([ 1.1837, -0.3565, -0.1427, 0.2984]) tensor([0.5285, 0.1133, 0.1403, 0.2180]) -Greedy action tensor([ 1.5688, -0.3231, -0.3148, 0.5975]) tensor([0.5947, 0.0897, 0.0904, 0.2252]) -Greedy action tensor([ 1.8692, -0.6241, -0.2529, 0.4595]) tensor([0.6913, 0.0571, 0.0828, 0.1688]) -Greedy action tensor([ 1.5367, -0.5933, -0.2740, 0.3038]) tensor([0.6354, 0.0755, 0.1039, 0.1852]) -Greedy action tensor([ 1.9980, -1.0896, -0.3177, 0.3886]) tensor([0.7439, 0.0339, 0.0734, 0.1488]) -Greedy action tensor([ 0.3010, -0.0560, -0.0007, -0.0177]) tensor([0.3158, 0.2210, 0.2335, 0.2296]) -Greedy action tensor([ 0.8493, -0.2925, -0.0381, 0.0366]) tensor([0.4599, 0.1468, 0.1893, 0.2040]) -Greedy action tensor([ 1.0995, -0.2597, -0.3592, 0.4752]) tensor([0.4938, 0.1268, 0.1148, 0.2645]) -Greedy action tensor([ 2.0393, -0.9508, -0.1448, 0.7112]) tensor([0.7004, 0.0352, 0.0788, 0.1856]) -Greedy action tensor([ 0.6344, -0.4714, -0.3210, 0.1012]) tensor([0.4344, 0.1437, 0.1671, 0.2548]) -Greedy action tensor([ 1.2119, -0.3439, -0.4202, 0.2732]) tensor([0.5563, 0.1174, 0.1088, 0.2176]) -Greedy action tensor([ 2.2028, -0.9884, -0.6148, 1.1745]) tensor([0.6857, 0.0282, 0.0410, 0.2452]) -Greedy action tensor([ 0.7791, -0.4425, 0.0018, 0.1183]) tensor([0.4404, 0.1298, 0.2024, 0.2274]) -Greedy action tensor([ 1.4409, -0.3459, -0.4250, 0.1761]) tensor([0.6232, 0.1044, 0.0964, 0.1759]) -Greedy action tensor([ 1.1480, -0.9047, -0.5525, 0.4181]) tensor([0.5577, 0.0716, 0.1018, 0.2688]) -Greedy action tensor([ 1.6885, -0.6663, -0.3305, 0.4586]) tensor([0.6579, 0.0624, 0.0874, 0.1923]) -Greedy action tensor([ 1.6896, 0.1292, -0.5588, 0.4532]) tensor([0.6227, 0.1308, 0.0657, 0.1808]) -Greedy action tensor([ 2.1285, -1.3829, -0.1171, -0.0377]) tensor([0.7998, 0.0239, 0.0847, 0.0917]) -Greedy action tensor([ 1.1405, -0.2305, -0.0382, 0.1937]) tensor([0.5129, 0.1302, 0.1578, 0.1990]) -Greedy action tensor([ 1.6525, -0.4356, -0.4919, 0.4406]) tensor([0.6499, 0.0805, 0.0761, 0.1934]) -Greedy action tensor([ 1.6537, -0.7587, -0.2145, 0.4773]) tensor([0.6442, 0.0577, 0.0995, 0.1987]) -Greedy action tensor([ 1.2021, -0.4897, -0.2183, 0.2633]) tensor([0.5504, 0.1014, 0.1330, 0.2153]) -Greedy action tensor([ 1.2232, -0.3639, -0.5405, 0.2060]) tensor([0.5755, 0.1177, 0.0987, 0.2081]) -Greedy action tensor([ 2.3041, -0.8835, -0.4802, 0.6919]) tensor([0.7678, 0.0317, 0.0474, 0.1531]) -Greedy action tensor([ 1.1769, -0.3760, -0.3302, 0.2811]) tensor([0.5430, 0.1149, 0.1203, 0.2217]) -Greedy action tensor([ 1.9110, -0.4080, -0.5252, 0.2551]) tensor([0.7263, 0.0715, 0.0635, 0.1387]) -Greedy action tensor([ 1.2700, -0.4310, -0.2946, 0.5561]) tensor([0.5315, 0.0970, 0.1112, 0.2603]) -Greedy action tensor([ 1.6102, -0.3962, 0.1464, 0.1318]) tensor([0.6274, 0.0844, 0.1451, 0.1431]) -Greedy action tensor([ 1.3700, 0.1766, 0.1160, -0.3184]) tensor([0.5639, 0.1710, 0.1609, 0.1042]) -Greedy action tensor([ 0.7283, -0.2218, 0.6848, -0.0835]) tensor([0.3587, 0.1387, 0.3434, 0.1593]) -Greedy action tensor([ 1.4297, -0.1340, -0.1458, 0.0775]) tensor([0.5971, 0.1250, 0.1235, 0.1544]) -Greedy action tensor([0.9541, 0.0380, 0.0310, 0.1623]) tensor([0.4444, 0.1778, 0.1765, 0.2013]) -Greedy action tensor([ 1.3970, -0.4783, -0.3333, 0.1234]) tensor([0.6210, 0.0952, 0.1101, 0.1738]) -Greedy action tensor([ 1.5700, -0.7966, -0.1974, 0.4663]) tensor([0.6265, 0.0588, 0.1070, 0.2078]) -Greedy action tensor([ 1.2385, -0.0457, -0.5323, 0.1773]) tensor([0.5577, 0.1544, 0.0949, 0.1930]) -Greedy action tensor([ 1.4788, -0.5206, -0.6421, 0.1674]) tensor([0.6558, 0.0888, 0.0787, 0.1767]) -Greedy action tensor([ 1.3126, -0.7498, 0.0055, 0.0977]) tensor([0.5902, 0.0750, 0.1597, 0.1751]) -Greedy action tensor([ 1.5584, -0.5448, -0.3564, 0.4498]) tensor([0.6252, 0.0763, 0.0921, 0.2063]) -Greedy action tensor([ 1.4618, -0.4934, -0.3166, 0.2021]) tensor([0.6273, 0.0888, 0.1059, 0.1780]) -Greedy action tensor([ 0.3964, -0.2698, 0.0451, 0.0446]) tensor([0.3424, 0.1759, 0.2409, 0.2408]) -Greedy action tensor([ 1.4614, -0.2273, -0.4763, 0.2242]) tensor([0.6177, 0.1141, 0.0890, 0.1792]) -Greedy action tensor([ 1.6739, -0.8461, -0.3680, 0.7076]) tensor([0.6286, 0.0506, 0.0816, 0.2392]) -Greedy action tensor([ 1.6988, -1.0206, -0.3562, 0.5629]) tensor([0.6600, 0.0435, 0.0845, 0.2120]) -Greedy action tensor([ 1.4332, -0.5932, -0.1676, 0.1772]) tensor([0.6179, 0.0814, 0.1247, 0.1760]) -Greedy action tensor([ 1.7958, -0.5655, -0.4508, 0.6747]) tensor([0.6553, 0.0618, 0.0693, 0.2136]) -Greedy action tensor([ 1.5035, -0.9091, 0.0529, -0.0855]) tensor([0.6544, 0.0586, 0.1534, 0.1336]) -Greedy action tensor([ 2.2512, -0.5165, -0.2549, 0.9723]) tensor([0.7029, 0.0441, 0.0573, 0.1956]) -Greedy action tensor([ 1.4298, -0.6277, -0.7608, 0.2587]) tensor([0.6453, 0.0825, 0.0722, 0.2001]) -Greedy action tensor([ 1.9230, -0.7331, -0.2564, 0.7040]) tensor([0.6762, 0.0475, 0.0765, 0.1998]) -Greedy action tensor([ 2.6064, -0.9577, -0.0948, 1.0568]) tensor([0.7647, 0.0217, 0.0513, 0.1624]) -Greedy action tensor([ 1.2578, -0.7003, -0.1143, 0.3823]) tensor([0.5521, 0.0779, 0.1400, 0.2300]) -Greedy action tensor([ 1.3585, -0.7051, -0.2411, 0.3537]) tensor([0.5899, 0.0749, 0.1192, 0.2160]) -Greedy action tensor([ 1.4251, -0.4398, -0.5997, 0.4024]) tensor([0.6073, 0.0941, 0.0802, 0.2184]) -Greedy action tensor([ 1.8972, -0.3394, -0.8022, 0.4016]) tensor([0.7152, 0.0764, 0.0481, 0.1603]) -Greedy action tensor([ 2.0330, -0.5618, -0.6473, 0.7111]) tensor([0.7093, 0.0530, 0.0486, 0.1891]) -Greedy action tensor([ 1.2769, -0.3985, -0.3069, 0.2843]) tensor([0.5672, 0.1062, 0.1164, 0.2102]) -Greedy action tensor([ 2.0827, -1.1892, -0.5451, 0.5999]) tensor([0.7478, 0.0284, 0.0540, 0.1698]) -Greedy action tensor([ 1.2140, -0.5672, -0.0964, 0.4722]) tensor([0.5224, 0.0880, 0.1409, 0.2488]) -Greedy action tensor([ 0.4522, -0.1012, -0.1020, 0.2702]) tensor([0.3352, 0.1927, 0.1926, 0.2794]) -Greedy action tensor([ 1.0515, -0.2655, -0.2719, 0.3711]) tensor([0.4901, 0.1313, 0.1305, 0.2482]) -Greedy action tensor([ 1.3873, -0.2927, -0.9672, 0.5001]) tensor([0.5906, 0.1101, 0.0561, 0.2432]) -Greedy action tensor([ 1.2540, -0.5460, -0.5364, 0.3432]) tensor([0.5766, 0.0953, 0.0962, 0.2319]) -Greedy action tensor([ 2.5174, -1.1124, -0.2642, 0.5828]) tensor([0.8111, 0.0215, 0.0502, 0.1172]) -Greedy action tensor([ 1.4880, -0.1878, -0.3036, -0.0081]) tensor([0.6338, 0.1186, 0.1056, 0.1420]) -Greedy action tensor([ 1.1062, -0.5936, -0.3055, 0.2778]) tensor([0.5367, 0.0981, 0.1308, 0.2344]) -Greedy action tensor([ 1.8647, -0.8120, -0.2150, 0.3310]) tensor([0.7095, 0.0488, 0.0887, 0.1531]) -Greedy action tensor([ 0.3130, -0.0144, 0.0313, 0.1300]) tensor([0.3023, 0.2179, 0.2281, 0.2517]) -Greedy action tensor([ 0.7085, -0.2276, -0.0432, 0.0137]) tensor([0.4232, 0.1660, 0.1996, 0.2113]) -Greedy action tensor([ 1.0196, -0.4710, -0.0692, -0.4942]) tensor([0.5612, 0.1264, 0.1889, 0.1235]) -Greedy action tensor([ 0.7443, -0.6630, -0.1120, -0.4978]) tensor([0.5106, 0.1250, 0.2169, 0.1475]) -Greedy action tensor([ 0.8112, -0.4424, -0.1350, -0.4890]) tensor([0.5138, 0.1467, 0.1995, 0.1400]) -Greedy action tensor([ 0.4158, -0.2430, 0.0723, -0.2959]) tensor([0.3680, 0.1904, 0.2610, 0.1806]) -Greedy action tensor([ 1.1146, -1.0133, 0.0168, -0.5452]) tensor([0.6087, 0.0725, 0.2031, 0.1158]) -Greedy action tensor([ 0.4513, -0.0380, -0.0264, -0.2554]) tensor([0.3668, 0.2248, 0.2275, 0.1809]) -Greedy action tensor([ 0.3661, 0.0712, -0.1031, -0.0512]) tensor([0.3302, 0.2458, 0.2065, 0.2175]) -Greedy action tensor([ 0.8269, -0.6643, -0.0698, -0.8520]) tensor([0.5496, 0.1237, 0.2242, 0.1025]) -Greedy action tensor([ 1.1151, -1.1888, 0.0439, -0.4661]) tensor([0.6067, 0.0606, 0.2079, 0.1248]) -Greedy action tensor([ 0.4134, -0.2601, -0.1590, -0.2251]) tensor([0.3843, 0.1960, 0.2168, 0.2030]) -Greedy action tensor([ 0.5313, -0.0822, -0.0156, -0.1936]) tensor([0.3839, 0.2079, 0.2222, 0.1860]) -Greedy action tensor([ 0.6287, -0.3857, -0.1385, -0.0983]) tensor([0.4329, 0.1570, 0.2010, 0.2092]) -Greedy action tensor([ 0.6634, -0.4917, -0.0724, -0.7693]) tensor([0.4919, 0.1550, 0.2357, 0.1174]) -Greedy action tensor([ 0.4676, -0.2954, 0.0712, -0.5048]) tensor([0.3973, 0.1852, 0.2673, 0.1502]) -Greedy action tensor([ 0.7840, -0.3039, -0.0214, -0.8119]) tensor([0.5034, 0.1696, 0.2250, 0.1020]) -Greedy action tensor([ 0.6671, -0.5731, -0.1899, -0.4224]) tensor([0.4878, 0.1411, 0.2070, 0.1641]) -Greedy action tensor([ 0.6142, -0.5160, -0.0280, -0.2687]) tensor([0.4420, 0.1427, 0.2325, 0.1828]) -Greedy action tensor([ 0.2435, 0.2754, -0.1044, -0.1521]) tensor([0.2931, 0.3026, 0.2070, 0.1973]) -Greedy action tensor([ 0.7932, -0.4621, -0.0631, -0.4249]) tensor([0.4986, 0.1421, 0.2118, 0.1475]) -Greedy action tensor([ 0.4956, -0.2378, 0.1819, -0.3069]) tensor([0.3760, 0.1806, 0.2748, 0.1685]) -Greedy action tensor([ 1.0237, -0.7212, 0.1426, -0.6843]) tensor([0.5649, 0.0987, 0.2341, 0.1024]) -Greedy action tensor([ 0.8393, -0.9059, -0.0295, -0.5866]) tensor([0.5452, 0.0952, 0.2287, 0.1310]) -Greedy action tensor([ 0.6448, -0.5076, 0.1844, -0.6493]) tensor([0.4502, 0.1422, 0.2841, 0.1234]) -Greedy action tensor([ 0.7780, -0.1231, 0.0831, -0.0641]) tensor([0.4281, 0.1739, 0.2136, 0.1844]) -Greedy action tensor([ 0.4274, -0.1332, -0.1313, -0.1549]) tensor([0.3702, 0.2113, 0.2117, 0.2068]) -Greedy action tensor([ 0.7928, -0.1546, -0.4253, -0.4752]) tensor([0.5089, 0.1973, 0.1505, 0.1432]) -Greedy action tensor([ 0.9143, -0.5704, -0.0596, -0.3497]) tensor([0.5300, 0.1201, 0.2001, 0.1497]) -Greedy action tensor([ 0.7089, -0.3616, -0.0231, -0.2512]) tensor([0.4532, 0.1554, 0.2179, 0.1735]) -Greedy action tensor([ 0.4060, -0.5209, -0.1371, -0.2649]) tensor([0.4019, 0.1591, 0.2335, 0.2055]) -Greedy action tensor([ 0.9361, -0.4202, -0.0161, -0.2789]) tensor([0.5154, 0.1328, 0.1989, 0.1529]) -Greedy action tensor([ 0.5193, -0.1946, 0.0111, -0.2418]) tensor([0.3909, 0.1914, 0.2351, 0.1826]) -Greedy action tensor([ 0.9382, -0.6118, 0.0132, -0.3911]) tensor([0.5338, 0.1133, 0.2117, 0.1413]) -Greedy action tensor([ 0.9852, -0.7991, 0.1079, -0.4671]) tensor([0.5501, 0.0924, 0.2288, 0.1287]) -Greedy action tensor([ 0.6179, -0.1771, -0.1656, -0.5621]) tensor([0.4513, 0.2038, 0.2062, 0.1387]) -Greedy action tensor([ 0.2926, -0.2507, 0.0414, -0.2184]) tensor([0.3380, 0.1963, 0.2629, 0.2028]) -Greedy action tensor([ 0.6385, -0.4579, -0.0600, -0.0787]) tensor([0.4311, 0.1440, 0.2144, 0.2104]) -Greedy action tensor([ 0.7401, -0.3726, -0.0362, -0.1486]) tensor([0.4546, 0.1494, 0.2091, 0.1869]) -Greedy action tensor([ 0.1358, 0.2805, -0.1088, -0.1761]) tensor([0.2724, 0.3148, 0.2133, 0.1994]) -Greedy action tensor([ 0.6558, 0.1573, -0.1648, -0.1710]) tensor([0.4024, 0.2444, 0.1771, 0.1760]) -Greedy action tensor([ 0.8242, -0.6488, -0.0019, -0.4565]) tensor([0.5142, 0.1179, 0.2251, 0.1429]) -Greedy action tensor([ 0.3683, -0.2987, -0.1157, -0.2121]) tensor([0.3719, 0.1909, 0.2292, 0.2081]) -Greedy action tensor([ 1.0300, -0.4647, -0.2583, -0.2660]) tensor([0.5638, 0.1265, 0.1555, 0.1543]) -Greedy action tensor([ 0.5705, 0.2543, -0.2646, 0.2971]) tensor([0.3420, 0.2493, 0.1484, 0.2602]) -Greedy action tensor([ 1.1088, -0.9567, 0.1843, -0.4406]) tensor([0.5761, 0.0730, 0.2285, 0.1223]) -Greedy action tensor([ 0.8208, -0.2881, 0.1326, -0.2660]) tensor([0.4609, 0.1521, 0.2316, 0.1555]) -Greedy action tensor([ 0.7424, -0.7697, 0.0308, -0.6106]) tensor([0.5077, 0.1119, 0.2492, 0.1312]) -Greedy action tensor([ 1.0318, -0.8471, 0.0186, -0.4625]) tensor([0.5746, 0.0878, 0.2086, 0.1290]) -Greedy action tensor([ 0.7839, -0.1558, 0.0377, -0.4471]) tensor([0.4636, 0.1812, 0.2198, 0.1354]) -Greedy action tensor([ 0.7848, -0.2924, 0.0636, -0.0896]) tensor([0.4457, 0.1518, 0.2167, 0.1859]) -Greedy action tensor([ 0.7924, -0.0066, 0.1193, -0.1910]) tensor([0.4285, 0.1927, 0.2186, 0.1603]) -Greedy action tensor([ 0.6223, -0.2900, -0.0350, -0.0178]) tensor([0.4086, 0.1641, 0.2118, 0.2155]) -Greedy action tensor([ 0.8295, -0.5172, 0.0176, -0.2453]) tensor([0.4889, 0.1271, 0.2171, 0.1669]) -Greedy action tensor([ 0.2144, -0.0789, 0.0593, -0.1715]) tensor([0.3047, 0.2272, 0.2609, 0.2071]) -Greedy action tensor([ 0.3502, -0.1262, -0.0867, -0.2807]) tensor([0.3573, 0.2219, 0.2308, 0.1901]) -Greedy action tensor([ 0.6172, -0.1804, -0.0954, -0.1557]) tensor([0.4162, 0.1875, 0.2041, 0.1922]) -Greedy action tensor([ 0.7282, 0.2610, -0.0663, -0.1003]) tensor([0.3976, 0.2492, 0.1796, 0.1736]) -Greedy action tensor([ 0.7310, -0.4337, -0.0259, -0.2955]) tensor([0.4674, 0.1458, 0.2193, 0.1675]) -Greedy action tensor([ 0.2690, -0.1043, -0.0710, -0.3110]) tensor([0.3378, 0.2326, 0.2405, 0.1892]) -Greedy action tensor([ 1.1981, -0.7666, -0.0700, -0.4097]) tensor([0.6166, 0.0864, 0.1735, 0.1235]) -Greedy action tensor([ 0.7397, -0.5321, -0.1594, -0.7022]) tensor([0.5198, 0.1457, 0.2115, 0.1229]) -Greedy action tensor([ 0.7613, -0.5973, -0.0092, -0.6249]) tensor([0.5077, 0.1305, 0.2349, 0.1269]) -Greedy action tensor([ 8.5140e-01, -6.0054e-01, 2.7794e-04, -4.0379e-01]) tensor([0.5139, 0.1203, 0.2194, 0.1465]) -Greedy action tensor([ 0.6032, -0.4068, -0.0705, -0.3402]) tensor([0.4418, 0.1609, 0.2252, 0.1720]) -Greedy action tensor([ 0.7269, -0.8395, -0.0129, -0.3788]) tensor([0.4958, 0.1035, 0.2366, 0.1641]) -Greedy action tensor([ 0.2316, -0.2866, -0.1699, 0.0187]) tensor([0.3254, 0.1938, 0.2178, 0.2630]) -Greedy action tensor([ 1.1216, -0.5764, 0.0060, -0.6063]) tensor([0.5923, 0.1084, 0.1941, 0.1052]) -Greedy action tensor([ 0.6799, -0.4914, 0.0804, -0.2690]) tensor([0.4452, 0.1380, 0.2444, 0.1724]) -Greedy action tensor([ 0.4754, 0.0610, -0.1266, -0.0819]) tensor([0.3596, 0.2376, 0.1969, 0.2059]) -Greedy action tensor([ 0.7046, -0.6357, 0.0654, -0.7116]) tensor([0.4921, 0.1288, 0.2597, 0.1194]) -Greedy action tensor([ 0.9202, -0.6946, -0.0237, -0.4395]) tensor([0.5421, 0.1078, 0.2109, 0.1392]) -Greedy action tensor([ 0.8869, -0.3764, 0.0781, -0.6105]) tensor([0.5123, 0.1448, 0.2282, 0.1146]) -Greedy action tensor([ 1.2009, -0.4986, -0.2932, -0.2928]) tensor([0.6128, 0.1120, 0.1375, 0.1376]) -Greedy action tensor([ 0.3563, -0.1746, 0.2847, -0.3929]) tensor([0.3342, 0.1966, 0.3112, 0.1580]) -Greedy action tensor([ 0.5238, 0.0573, -0.0468, -0.1092]) tensor([0.3672, 0.2303, 0.2075, 0.1950]) -Greedy action tensor([ 0.1831, -0.0929, 0.0819, -0.3266]) tensor([0.3065, 0.2325, 0.2769, 0.1841]) -Greedy action tensor([ 0.1757, -0.1720, -0.0326, -0.3497]) tensor([0.3216, 0.2271, 0.2611, 0.1902]) -Greedy action tensor([ 0.1221, -0.0237, -0.3451, -0.4497]) tensor([0.3273, 0.2829, 0.2051, 0.1848]) -Greedy action tensor([ 0.3642, -0.4498, -0.2247, -0.2161]) tensor([0.3910, 0.1732, 0.2170, 0.2188]) -Greedy action tensor([ 0.9386, -0.5682, 0.0715, -0.2300]) tensor([0.5121, 0.1135, 0.2152, 0.1592]) -Greedy action tensor([ 0.8249, -0.4077, -0.1369, -0.3426]) tensor([0.5038, 0.1469, 0.1926, 0.1568]) -Greedy action tensor([-1.6450, -0.4055, 0.5736, 0.1016]) tensor([0.0516, 0.1782, 0.4743, 0.2959]) -Greedy action tensor([-1.4322, -0.6652, 1.2595, 1.0951]) tensor([0.0329, 0.0708, 0.4849, 0.4114]) -Greedy action tensor([-1.9402, -0.4575, 0.6642, -0.1784]) tensor([0.0404, 0.1780, 0.5464, 0.2353]) -Greedy action tensor([-1.7215, -0.5185, 0.5595, -0.0212]) tensor([0.0510, 0.1700, 0.4995, 0.2795]) -Greedy action tensor([-0.7247, -0.6073, 0.1769, 0.2281]) tensor([0.1393, 0.1566, 0.3431, 0.3611]) -Greedy action tensor([-1.8640, -0.4591, 0.6378, -0.1287]) tensor([0.0436, 0.1776, 0.5318, 0.2471]) -Greedy action tensor([-1.9048, -0.4595, 0.6466, -0.1623]) tensor([0.0421, 0.1784, 0.5393, 0.2402]) -Greedy action tensor([-1.9099, -0.4291, 0.6450, -0.1576]) tensor([0.0416, 0.1829, 0.5355, 0.2400]) -Greedy action tensor([-0.3832, -0.5531, 0.6057, 1.4077]) tensor([0.0950, 0.0802, 0.2554, 0.5695]) -Greedy action tensor([-1.5414, -0.3703, 0.4033, 0.0903]) tensor([0.0612, 0.1975, 0.4282, 0.3131]) -Greedy action tensor([-1.7462, -0.4985, 0.6949, 0.1357]) tensor([0.0444, 0.1545, 0.5097, 0.2914]) -Greedy action tensor([-1.9463, -0.4501, 0.6681, -0.1814]) tensor([0.0401, 0.1788, 0.5471, 0.2340]) -Greedy action tensor([-1.9107, -0.4491, 0.6536, -0.1601]) tensor([0.0416, 0.1792, 0.5399, 0.2393]) -Greedy action tensor([-1.7684, -0.3988, 0.5701, -0.0951]) tensor([0.0485, 0.1907, 0.5025, 0.2584]) -Greedy action tensor([-1.7880, -0.3554, 0.5942, -0.0783]) tensor([0.0464, 0.1944, 0.5026, 0.2565]) -Greedy action tensor([-1.9307, -0.4255, 0.6572, -0.1723]) tensor([0.0406, 0.1830, 0.5405, 0.2358]) -Greedy action tensor([-1.5621, -0.0846, 0.6430, 0.1859]) tensor([0.0495, 0.2170, 0.4491, 0.2844]) -Greedy action tensor([-1.6432, -0.4521, 0.7036, 0.4236]) tensor([0.0442, 0.1453, 0.4616, 0.3489]) -Greedy action tensor([-1.7603, -0.4765, 0.6360, -0.0726]) tensor([0.0476, 0.1719, 0.5230, 0.2575]) -Greedy action tensor([-1.7512, -0.4950, 0.6201, -0.0031]) tensor([0.0477, 0.1675, 0.5109, 0.2739]) -Greedy action tensor([-1.6382, -0.4785, 0.5528, 0.1303]) tensor([0.0526, 0.1679, 0.4709, 0.3086]) -Greedy action tensor([-1.6999, -0.1265, 0.5217, -0.1300]) tensor([0.0504, 0.2430, 0.4646, 0.2421]) -Greedy action tensor([-1.8268, -0.1853, 0.5889, -0.0516]) tensor([0.0430, 0.2219, 0.4814, 0.2537]) -Greedy action tensor([-0.8068, 0.9821, 0.1373, 0.1431]) tensor([0.0824, 0.4929, 0.2118, 0.2130]) -Greedy action tensor([-1.9075, -0.4602, 0.6564, -0.1593]) tensor([0.0417, 0.1773, 0.5415, 0.2395]) -Greedy action tensor([-1.8782, -0.4575, 0.6386, -0.1453]) tensor([0.0431, 0.1786, 0.5343, 0.2440]) -Greedy action tensor([-1.4040, -0.3534, 0.7127, 0.6050]) tensor([0.0510, 0.1457, 0.4232, 0.3800]) -Greedy action tensor([-1.0801, -0.5940, 0.4032, 0.3046]) tensor([0.0907, 0.1475, 0.3997, 0.3622]) -Greedy action tensor([-1.8524, -0.0772, 0.5648, -0.0958]) tensor([0.0418, 0.2468, 0.4691, 0.2423]) -Greedy action tensor([-1.8768, -0.4580, 0.6406, -0.1322]) tensor([0.0430, 0.1777, 0.5331, 0.2462]) -Greedy action tensor([-1.1349, 0.1495, 0.3128, -0.0877]) tensor([0.0854, 0.3084, 0.3631, 0.2432]) -Greedy action tensor([-1.9433, -0.4554, 0.6730, -0.1761]) tensor([0.0401, 0.1773, 0.5481, 0.2345]) -Greedy action tensor([-1.8664, -0.3785, 0.6571, -0.1233]) tensor([0.0423, 0.1875, 0.5282, 0.2420]) -Greedy action tensor([-1.9314, -0.4536, 0.6757, -0.1606]) tensor([0.0403, 0.1766, 0.5463, 0.2367]) -Greedy action tensor([-1.9144, -0.4090, 0.6581, -0.1505]) tensor([0.0409, 0.1844, 0.5360, 0.2388]) -Greedy action tensor([-1.8946, -0.3498, 0.6394, -0.1511]) tensor([0.0417, 0.1952, 0.5250, 0.2381]) -Greedy action tensor([-1.8568, -0.4876, 0.7149, -0.0125]) tensor([0.0411, 0.1615, 0.5376, 0.2598]) -Greedy action tensor([-1.8659, -0.3451, 0.6285, -0.1057]) tensor([0.0425, 0.1947, 0.5154, 0.2474]) -Greedy action tensor([-1.9204, -0.4224, 0.6543, -0.1637]) tensor([0.0410, 0.1834, 0.5381, 0.2375]) -Greedy action tensor([-0.9852, -0.5531, 0.2364, 0.0092]) tensor([0.1158, 0.1784, 0.3928, 0.3130]) -Greedy action tensor([-1.4279, 0.7138, 0.3410, 0.0386]) tensor([0.0507, 0.4319, 0.2975, 0.2199]) -Greedy action tensor([-1.8839, -0.4335, 0.6458, -0.1212]) tensor([0.0423, 0.1804, 0.5308, 0.2465]) -Greedy action tensor([-0.7253, 0.4614, 0.1810, -0.1098]) tensor([0.1163, 0.3809, 0.2877, 0.2151]) -Greedy action tensor([-1.9091, -0.4451, 0.6525, -0.1559]) tensor([0.0416, 0.1797, 0.5387, 0.2400]) -Greedy action tensor([-1.7355, -0.5007, 0.5776, -0.0623]) tensor([0.0503, 0.1730, 0.5085, 0.2682]) -Greedy action tensor([-1.8801, -0.4097, 0.6284, -0.1579]) tensor([0.0430, 0.1873, 0.5288, 0.2409]) -Greedy action tensor([-1.8852, -0.1783, 0.6036, -0.1505]) tensor([0.0413, 0.2275, 0.4973, 0.2339]) -Greedy action tensor([ 0.0444, -0.5670, 0.5093, 1.1584]) tensor([0.1618, 0.0878, 0.2575, 0.4929]) -Greedy action tensor([-1.9305, -0.4313, 0.6590, -0.1729]) tensor([0.0407, 0.1820, 0.5416, 0.2357]) -Greedy action tensor([-1.8906, -0.4843, 0.6582, -0.1442]) tensor([0.0424, 0.1729, 0.5419, 0.2429]) -Greedy action tensor([-1.9366, -0.4576, 0.6674, -0.1737]) tensor([0.0404, 0.1774, 0.5465, 0.2357]) -Greedy action tensor([-1.1724, -0.3773, 0.4151, -0.1770]) tensor([0.0925, 0.2048, 0.4524, 0.2503]) -Greedy action tensor([-1.9028, -0.4697, 0.6559, -0.1354]) tensor([0.0417, 0.1749, 0.5391, 0.2443]) -Greedy action tensor([-1.4588, 0.4242, 0.2533, 0.1921]) tensor([0.0546, 0.3587, 0.3023, 0.2844]) -Greedy action tensor([-1.8698, -0.4647, 0.6165, -0.1448]) tensor([0.0440, 0.1795, 0.5292, 0.2472]) -Greedy action tensor([-1.7511, -0.4433, 0.5683, -0.0818]) tensor([0.0496, 0.1833, 0.5040, 0.2631]) -Greedy action tensor([-1.0622, 0.8564, 0.1338, 0.1829]) tensor([0.0685, 0.4668, 0.2266, 0.2380]) -Greedy action tensor([-1.5216, -0.5858, 0.4775, -0.0653]) tensor([0.0657, 0.1675, 0.4850, 0.2819]) -Greedy action tensor([-1.8984, -0.4130, 0.6397, -0.1347]) tensor([0.0418, 0.1848, 0.5294, 0.2440]) -Greedy action tensor([-1.9052, -0.4555, 0.6768, -0.1480]) tensor([0.0412, 0.1755, 0.5446, 0.2387]) -Greedy action tensor([-0.1256, -0.3742, 0.2014, 0.2369]) tensor([0.2172, 0.1694, 0.3012, 0.3121]) -Greedy action tensor([-0.8273, 0.8497, 0.1279, -0.0329]) tensor([0.0896, 0.4793, 0.2329, 0.1983]) -Greedy action tensor([-1.8784, -0.4081, 0.6376, -0.1423]) tensor([0.0427, 0.1859, 0.5289, 0.2425]) -Greedy action tensor([-1.7828, -0.4750, 0.5846, -0.1046]) tensor([0.0483, 0.1785, 0.5148, 0.2585]) -Greedy action tensor([-1.4756, -0.4854, 0.8924, 0.0322]) tensor([0.0530, 0.1425, 0.5653, 0.2392]) -Greedy action tensor([-1.9260, -0.3892, 0.6496, -0.1693]) tensor([0.0407, 0.1891, 0.5345, 0.2357]) -Greedy action tensor([-1.8808, -0.4450, 0.6413, -0.1472]) tensor([0.0429, 0.1802, 0.5341, 0.2428]) -Greedy action tensor([-0.7250, -0.5188, 1.0534, 1.5521]) tensor([0.0559, 0.0687, 0.3308, 0.5447]) -Greedy action tensor([-1.9108, -0.4399, 0.6487, -0.1634]) tensor([0.0416, 0.1812, 0.5382, 0.2389]) -Greedy action tensor([-1.7756, -0.4748, 0.5947, -0.0433]) tensor([0.0476, 0.1747, 0.5089, 0.2689]) -Greedy action tensor([-1.1260, -0.6246, 0.3487, 0.0395]) tensor([0.0978, 0.1614, 0.4272, 0.3136]) -Greedy action tensor([-1.8171, -0.3992, 0.5947, -0.1057]) tensor([0.0458, 0.1892, 0.5112, 0.2538]) -Greedy action tensor([-1.7330, -0.4041, 0.5294, -0.0760]) tensor([0.0510, 0.1924, 0.4895, 0.2672]) -Greedy action tensor([-1.6783, -0.5160, 0.5219, 0.0069]) tensor([0.0537, 0.1717, 0.4849, 0.2897]) -Greedy action tensor([-0.9021, -0.4279, 0.2619, 0.0115]) tensor([0.1204, 0.1935, 0.3858, 0.3003]) -Greedy action tensor([-1.2629, -0.6313, 0.3205, 0.1435]) tensor([0.0845, 0.1589, 0.4117, 0.3449]) -Greedy action tensor([-1.4253, -1.0033, 0.6820, 0.5722]) tensor([0.0552, 0.0842, 0.4539, 0.4067]) -Greedy action tensor([-1.6916, -0.5315, 0.6750, 0.1516]) tensor([0.0472, 0.1507, 0.5036, 0.2984]) -Greedy action tensor([-1.8915, -0.4277, 0.6375, -0.1549]) tensor([0.0425, 0.1836, 0.5327, 0.2412]) -Greedy action tensor([-1.8537, -0.3627, 0.6403, -0.1248]) tensor([0.0431, 0.1916, 0.5223, 0.2430]) -Greedy action tensor([-1.8660, -0.4073, 0.6683, -0.1250]) tensor([0.0424, 0.1821, 0.5340, 0.2415]) -Greedy action tensor([ 1.3518, -0.7832, 0.7254, -0.2859]) tensor([0.5414, 0.0640, 0.2894, 0.1053]) -Greedy action tensor([ 0.5341, -0.9801, -0.2341, 0.9686]) tensor([0.3098, 0.0681, 0.1437, 0.4784]) -Greedy action tensor([ 1.2496, -1.1755, 0.6563, 1.1723]) tensor([0.3896, 0.0345, 0.2153, 0.3606]) -Greedy action tensor([-1.2981, -0.2888, 1.1264, -1.2256]) tensor([0.0621, 0.1702, 0.7010, 0.0667]) -Greedy action tensor([-0.1570, 0.7315, -0.0234, -0.0674]) tensor([0.1764, 0.4290, 0.2016, 0.1930]) -Greedy action tensor([ 0.6033, -1.7083, 0.8226, -0.0435]) tensor([0.3487, 0.0346, 0.4342, 0.1826]) -Greedy action tensor([0.0309, 0.4785, 1.1440, 0.0476]) tensor([0.1509, 0.2362, 0.4594, 0.1535]) -Greedy action tensor([-0.9235, -0.4038, 0.4241, 0.1404]) tensor([0.1061, 0.1784, 0.4082, 0.3074]) -Greedy action tensor([ 0.2971, -1.1104, 1.0456, 0.4251]) tensor([0.2225, 0.0545, 0.4702, 0.2528]) -Greedy action tensor([ 1.0567, 0.1584, -0.1006, -0.2998]) tensor([0.5053, 0.2058, 0.1588, 0.1301]) -Greedy action tensor([-0.1542, 0.0794, 1.6696, -0.1030]) tensor([0.1051, 0.1328, 0.6514, 0.1107]) -Greedy action tensor([-0.0164, -1.4555, -0.4626, -0.0432]) tensor([0.3508, 0.0832, 0.2245, 0.3415]) -Greedy action tensor([ 0.5527, -1.1364, -0.0899, 1.7204]) tensor([0.2030, 0.0375, 0.1068, 0.6527]) -Greedy action tensor([ 0.2460, -0.4479, -0.3453, 0.3854]) tensor([0.3122, 0.1560, 0.1728, 0.3589]) -Greedy action tensor([-0.4363, -0.7887, -0.0918, 2.3151]) tensor([0.0533, 0.0374, 0.0752, 0.8342]) -Greedy action tensor([-0.2774, -1.2013, -0.0379, 0.0304]) tensor([0.2483, 0.0985, 0.3154, 0.3378]) -Greedy action tensor([-0.0823, 0.3028, -0.1768, -0.8868]) tensor([0.2613, 0.3841, 0.2377, 0.1169]) -Greedy action tensor([ 0.4072, 0.7579, 0.2919, -0.6532]) tensor([0.2734, 0.3883, 0.2436, 0.0947]) -Greedy action tensor([ 0.3543, -1.0408, 0.5556, 0.9526]) tensor([0.2331, 0.0578, 0.2851, 0.4240]) -Greedy action tensor([ 1.1138, 0.3400, -0.3875, -0.1457]) tensor([0.5082, 0.2344, 0.1132, 0.1442]) -Greedy action tensor([ 0.6373, -0.3976, -0.1102, -0.1421]) tensor([0.4372, 0.1553, 0.2070, 0.2005]) -Greedy action tensor([-0.2941, -0.4287, -0.5966, 0.0530]) tensor([0.2483, 0.2170, 0.1835, 0.3513]) -Greedy action tensor([ 0.2594, -0.4701, 1.8871, -0.4535]) tensor([0.1415, 0.0682, 0.7208, 0.0694]) -Greedy action tensor([ 1.3568, 0.6666, 1.1819, -0.0862]) tensor([0.3880, 0.1946, 0.3258, 0.0917]) -Greedy action tensor([-0.3480, -0.5751, 0.1260, -0.8055]) tensor([0.2478, 0.1974, 0.3980, 0.1568]) -Greedy action tensor([-0.8198, -0.8927, 2.1039, -1.1397]) tensor([0.0470, 0.0437, 0.8751, 0.0342]) -Greedy action tensor([-0.3760, -1.0174, 0.2748, 0.5787]) tensor([0.1655, 0.0872, 0.3173, 0.4300]) -Greedy action tensor([ 1.4023, -0.2785, 1.0852, 0.5240]) tensor([0.4292, 0.0799, 0.3126, 0.1783]) -Greedy action tensor([-0.7748, -0.8536, 0.9770, 0.1321]) tensor([0.0984, 0.0909, 0.5671, 0.2436]) -Greedy action tensor([ 1.0025, -1.2289, -0.1506, 1.2393]) tensor([0.3717, 0.0399, 0.1173, 0.4710]) -Greedy action tensor([ 1.0250, -1.2268, 0.2518, 0.5465]) tensor([0.4574, 0.0481, 0.2111, 0.2834]) -Greedy action tensor([ 0.4353, -0.2395, -0.4452, 0.1736]) tensor([0.3713, 0.1891, 0.1539, 0.2858]) -Greedy action tensor([ 1.1016, 0.4999, 0.7122, -0.4877]) tensor([0.4116, 0.2255, 0.2789, 0.0840]) -Greedy action tensor([ 0.0757, -0.1705, 1.3528, -0.9200]) tensor([0.1743, 0.1363, 0.6250, 0.0644]) -Greedy action tensor([-0.1384, 0.4060, 1.0739, -0.2192]) tensor([0.1427, 0.2460, 0.4797, 0.1316]) -Greedy action tensor([-0.3528, -0.8902, 1.2638, 0.4327]) tensor([0.1135, 0.0663, 0.5714, 0.2489]) -Greedy action tensor([ 1.1028, -0.7950, -0.3971, 1.1421]) tensor([0.4144, 0.0621, 0.0925, 0.4310]) -Greedy action tensor([0.6142, 0.6288, 0.5411, 1.2082]) tensor([0.2103, 0.2134, 0.1955, 0.3809]) -Greedy action tensor([ 0.8797, -0.8370, 0.5658, 0.8455]) tensor([0.3476, 0.0625, 0.2540, 0.3359]) -Greedy action tensor([-1.0616e+00, -1.0131e+00, 4.9591e-04, -6.0388e-01]) tensor([0.1533, 0.1609, 0.4434, 0.2423]) -Greedy action tensor([-0.2266, -0.3415, 0.2122, -0.1392]) tensor([0.2206, 0.1966, 0.3421, 0.2407]) -Greedy action tensor([ 1.6432, -0.3289, 0.1839, 1.0391]) tensor([0.5213, 0.0726, 0.1212, 0.2849]) -Greedy action tensor([-0.0542, -1.2339, 0.8710, -0.5892]) tensor([0.2265, 0.0696, 0.5713, 0.1326]) -Greedy action tensor([-0.4567, -0.9161, 0.2837, -0.0524]) tensor([0.1913, 0.1209, 0.4012, 0.2867]) -Greedy action tensor([-0.1015, -1.0571, 0.8655, 0.3917]) tensor([0.1769, 0.0680, 0.4653, 0.2897]) -Greedy action tensor([ 0.2985, -1.3163, -0.2270, 0.2831]) tensor([0.3604, 0.0717, 0.2131, 0.3549]) -Greedy action tensor([-0.7150, -0.1095, 0.4836, -0.4174]) tensor([0.1334, 0.2445, 0.4424, 0.1797]) -Greedy action tensor([ 0.3534, -0.5882, 0.8607, -0.0591]) tensor([0.2693, 0.1050, 0.4473, 0.1783]) -Greedy action tensor([ 0.2947, -0.9307, -0.1505, 0.5457]) tensor([0.3106, 0.0912, 0.1990, 0.3992]) -Greedy action tensor([-1.9951, -1.2312, 0.6700, -0.5440]) tensor([0.0459, 0.0985, 0.6596, 0.1959]) -Greedy action tensor([-0.8015, 0.1348, 0.4066, -0.7656]) tensor([0.1260, 0.3215, 0.4219, 0.1306]) -Greedy action tensor([ 0.2208, -0.7941, 0.9709, 0.3689]) tensor([0.2155, 0.0781, 0.4564, 0.2500]) -Greedy action tensor([ 0.3597, -1.4175, -0.0265, 0.2796]) tensor([0.3608, 0.0610, 0.2452, 0.3330]) -Greedy action tensor([ 1.0527, -0.7970, 0.1147, -0.7269]) tensor([0.5823, 0.0916, 0.2279, 0.0982]) -Greedy action tensor([ 1.0886, -1.4612, 0.9139, 1.0034]) tensor([0.3526, 0.0275, 0.2961, 0.3238]) -Greedy action tensor([ 0.4510, -0.5847, -0.7719, 1.3149]) tensor([0.2486, 0.0883, 0.0732, 0.5899]) -Greedy action tensor([-0.0069, -0.7006, -0.5332, 0.4954]) tensor([0.2672, 0.1335, 0.1578, 0.4415]) -Greedy action tensor([-0.8590, -0.3770, 0.5413, -0.4702]) tensor([0.1227, 0.1987, 0.4977, 0.1810]) -Greedy action tensor([ 0.8232, -0.1204, 1.1588, -0.4280]) tensor([0.3253, 0.1266, 0.4550, 0.0931]) -Greedy action tensor([ 1.2334, -1.4045, 1.5791, 1.3698]) tensor([0.2754, 0.0197, 0.3892, 0.3157]) -Greedy action tensor([ 0.2740, 0.5654, -0.5874, -1.6392]) tensor([0.3438, 0.4601, 0.1453, 0.0508]) -Greedy action tensor([0.9257, 1.0000, 0.5240, 0.5103]) tensor([0.2936, 0.3162, 0.1964, 0.1938]) -Greedy action tensor([ 0.1156, -0.4898, -1.2579, -0.9582]) tensor([0.4671, 0.2550, 0.1183, 0.1596]) -Greedy action tensor([-0.3574, -0.5613, -0.1932, 0.4536]) tensor([0.1907, 0.1555, 0.2247, 0.4291]) -Greedy action tensor([ 0.5021, -1.4893, 1.6163, -0.8193]) tensor([0.2247, 0.0307, 0.6847, 0.0599]) -Greedy action tensor([-0.6772, -0.3734, 0.6060, -0.6758]) tensor([0.1436, 0.1946, 0.5181, 0.1438]) -Greedy action tensor([0.4374, 0.2497, 0.7830, 0.0526]) tensor([0.2550, 0.2113, 0.3602, 0.1735]) -Greedy action tensor([ 0.1154, -1.9494, 0.1615, 0.1229]) tensor([0.3143, 0.0399, 0.3291, 0.3167]) -Greedy action tensor([0.6570, 0.1005, 0.1989, 0.2931]) tensor([0.3447, 0.1976, 0.2180, 0.2396]) -Greedy action tensor([ 1.0168, 0.4434, 0.2587, -0.0135]) tensor([0.4186, 0.2359, 0.1961, 0.1494]) -Greedy action tensor([ 0.7702, -1.2056, 0.5023, 1.3809]) tensor([0.2670, 0.0370, 0.2043, 0.4917]) -Greedy action tensor([ 1.2385, 0.4405, -0.1446, 0.8931]) tensor([0.4151, 0.1869, 0.1041, 0.2939]) -Greedy action tensor([-1.3343, -0.8232, 1.6013, -0.7027]) tensor([0.0428, 0.0713, 0.8055, 0.0804]) -Greedy action tensor([ 0.0692, 0.3358, 0.9345, -0.1589]) tensor([0.1826, 0.2384, 0.4337, 0.1453]) -Greedy action tensor([ 0.6807, 0.3491, -0.1076, 0.6573]) tensor([0.3175, 0.2279, 0.1444, 0.3102]) -Greedy action tensor([-0.2119, -0.6167, -0.7017, 0.3282]) tensor([0.2502, 0.1670, 0.1533, 0.4295]) -Greedy action tensor([ 0.1521, 0.2374, -0.6725, -0.7850]) tensor([0.3425, 0.3731, 0.1502, 0.1342]) -Greedy action tensor([ 0.7524, 0.5735, 0.8365, -0.3316]) tensor([0.3065, 0.2563, 0.3334, 0.1037]) -Greedy action tensor([ 0.7108, -1.3868, 0.6305, -0.0163]) tensor([0.3954, 0.0485, 0.3649, 0.1911]) -Greedy action tensor([ 1.9425, -0.3980, 0.0244, -0.1863]) tensor([0.7341, 0.0707, 0.1078, 0.0874]) -Greedy action tensor([-0.0352, -0.5294, 1.6548, 0.0719]) tensor([0.1228, 0.0749, 0.6656, 0.1367]) -Greedy action tensor([ 2.5621, -1.0245, -0.0611, 0.5260]) tensor([0.8125, 0.0225, 0.0590, 0.1061]) -Greedy action tensor([ 2.3677, -0.6207, -0.5145, 0.1873]) tensor([0.8201, 0.0413, 0.0459, 0.0927]) -Greedy action tensor([ 1.3384, -0.6919, -0.3776, 0.5801]) tensor([0.5619, 0.0738, 0.1010, 0.2632]) -Greedy action tensor([ 1.0592, -0.5925, -0.1787, 0.0545]) tensor([0.5412, 0.1038, 0.1569, 0.1981]) -Greedy action tensor([ 1.3557, -0.4631, -0.1308, 0.4819]) tensor([0.5538, 0.0898, 0.1253, 0.2311]) -Greedy action tensor([ 2.2508, -1.0284, -0.1396, 0.7976]) tensor([0.7336, 0.0276, 0.0672, 0.1715]) -Greedy action tensor([ 1.8409, -1.1746, -0.6604, 0.8545]) tensor([0.6649, 0.0326, 0.0545, 0.2480]) -Greedy action tensor([ 1.2528, -0.2318, -0.6241, 0.3297]) tensor([0.5628, 0.1275, 0.0861, 0.2236]) -Greedy action tensor([ 1.3438, -0.5957, -0.1644, 0.2374]) tensor([0.5897, 0.0848, 0.1305, 0.1950]) -Greedy action tensor([ 1.4599, -0.1809, -0.2339, 0.5986]) tensor([0.5555, 0.1077, 0.1021, 0.2348]) -Greedy action tensor([ 1.2843, -0.2545, 0.1857, -0.0664]) tensor([0.5534, 0.1188, 0.1845, 0.1434]) -Greedy action tensor([ 1.8314, -0.3710, -0.6905, 1.0669]) tensor([0.6037, 0.0667, 0.0485, 0.2811]) -Greedy action tensor([ 1.4523, -0.3421, -0.8924, 0.5193]) tensor([0.6041, 0.1004, 0.0579, 0.2376]) -Greedy action tensor([ 1.4100, -0.6428, -0.1924, 0.3492]) tensor([0.5967, 0.0766, 0.1202, 0.2066]) -Greedy action tensor([ 1.0513, -0.0864, -0.4093, 0.2720]) tensor([0.4972, 0.1594, 0.1154, 0.2281]) -Greedy action tensor([ 1.1571, 0.1108, -0.3458, -0.1972]) tensor([0.5459, 0.1917, 0.1215, 0.1409]) -Greedy action tensor([ 1.1802, -0.8601, 0.0641, 0.1752]) tensor([0.5484, 0.0713, 0.1796, 0.2007]) -Greedy action tensor([ 1.3135, -0.5696, -0.1198, 0.4884]) tensor([0.5468, 0.0832, 0.1304, 0.2396]) -Greedy action tensor([ 1.7533, 0.0771, -0.2519, 0.5008]) tensor([0.6221, 0.1164, 0.0838, 0.1778]) -Greedy action tensor([ 1.7828, -0.1166, -0.5320, 0.1894]) tensor([0.6889, 0.1031, 0.0681, 0.1400]) -Greedy action tensor([ 2.3937, -1.3832, -0.2602, 0.6562]) tensor([0.7879, 0.0180, 0.0554, 0.1386]) -Greedy action tensor([ 1.6067, -0.2634, -0.5296, 0.5217]) tensor([0.6211, 0.0957, 0.0733, 0.2099]) -Greedy action tensor([ 1.7179, -0.0213, -0.6361, 0.4548]) tensor([0.6437, 0.1131, 0.0611, 0.1820]) -Greedy action tensor([ 0.9473, -0.3210, -0.4459, 0.3501]) tensor([0.4808, 0.1353, 0.1194, 0.2646]) -Greedy action tensor([ 1.0124, -0.3271, -0.4710, -0.2439]) tensor([0.5638, 0.1477, 0.1279, 0.1605]) -Greedy action tensor([ 0.5532, 0.2155, 0.0360, -0.3984]) tensor([0.3710, 0.2646, 0.2212, 0.1432]) -Greedy action tensor([ 1.1862, -0.4603, -0.1923, 0.3420]) tensor([0.5335, 0.1028, 0.1344, 0.2293]) -Greedy action tensor([ 1.5805, -0.7755, -0.4179, -0.0084]) tensor([0.6971, 0.0661, 0.0945, 0.1423]) -Greedy action tensor([ 1.6465, -0.7637, -0.3822, 0.4193]) tensor([0.6603, 0.0593, 0.0868, 0.1935]) -Greedy action tensor([ 1.2757, -0.7333, 0.0111, 0.2003]) tensor([0.5689, 0.0763, 0.1606, 0.1941]) -Greedy action tensor([ 1.3277, -0.1309, -0.2001, 0.4661]) tensor([0.5342, 0.1242, 0.1159, 0.2257]) -Greedy action tensor([ 1.8562, -1.3549, -0.4021, 0.5907]) tensor([0.7008, 0.0283, 0.0733, 0.1977]) -Greedy action tensor([ 1.1044, -0.5303, 0.2171, 0.0987]) tensor([0.5070, 0.0989, 0.2087, 0.1854]) -Greedy action tensor([ 1.1292, -0.3354, 0.0970, 0.0552]) tensor([0.5184, 0.1198, 0.1847, 0.1771]) -Greedy action tensor([ 1.2710, -0.1770, -0.1659, 0.3651]) tensor([0.5328, 0.1252, 0.1266, 0.2154]) -Greedy action tensor([ 1.1319, -0.0095, -0.2170, 0.4613]) tensor([0.4784, 0.1528, 0.1242, 0.2447]) -Greedy action tensor([ 0.7531, -0.3984, 0.2526, 0.1651]) tensor([0.4036, 0.1276, 0.2447, 0.2242]) -Greedy action tensor([ 1.5420, -0.3370, -0.3176, -0.5238]) tensor([0.6968, 0.1064, 0.1085, 0.0883]) -Greedy action tensor([ 1.3680, -0.2839, 0.0090, 0.1857]) tensor([0.5697, 0.1092, 0.1464, 0.1747]) -Greedy action tensor([ 1.3775, -0.6342, -0.4122, -0.1100]) tensor([0.6550, 0.0876, 0.1094, 0.1480]) -Greedy action tensor([ 1.7683, -0.7405, -0.0030, 0.5867]) tensor([0.6417, 0.0522, 0.1092, 0.1969]) -Greedy action tensor([ 2.2804, -0.6764, -0.0413, 0.9985]) tensor([0.7005, 0.0364, 0.0687, 0.1944]) -Greedy action tensor([ 1.2889, -0.5550, -0.5636, 0.0674]) tensor([0.6212, 0.0983, 0.0974, 0.1831]) -Greedy action tensor([ 1.3353, -0.7031, -0.1426, 0.3760]) tensor([0.5742, 0.0748, 0.1310, 0.2200]) -Greedy action tensor([ 1.0419, -0.6276, -0.2267, 0.0512]) tensor([0.5432, 0.1023, 0.1528, 0.2017]) -Greedy action tensor([ 1.0458, -0.5792, -0.2339, 0.2247]) tensor([0.5222, 0.1028, 0.1452, 0.2297]) -Greedy action tensor([ 1.5584, 0.2582, -0.3765, 0.4214]) tensor([0.5755, 0.1568, 0.0831, 0.1846]) -Greedy action tensor([ 1.1576, -0.7958, -0.2375, 0.4659]) tensor([0.5290, 0.0750, 0.1311, 0.2649]) -Greedy action tensor([ 1.2646, -0.3863, -0.1396, 0.0639]) tensor([0.5752, 0.1104, 0.1413, 0.1731]) -Greedy action tensor([ 1.6841, -0.0806, -0.3055, 0.0840]) tensor([0.6623, 0.1134, 0.0906, 0.1337]) -Greedy action tensor([ 1.2817, 0.0933, -0.3499, 0.2069]) tensor([0.5430, 0.1654, 0.1062, 0.1854]) -Greedy action tensor([ 1.4188, -0.4447, -0.5996, 0.6018]) tensor([0.5781, 0.0897, 0.0768, 0.2554]) -Greedy action tensor([ 0.7594, -0.4239, -0.4978, 1.0029]) tensor([0.3489, 0.1068, 0.0992, 0.4450]) -Greedy action tensor([ 0.7368, 0.0735, -0.2280, -0.1340]) tensor([0.4320, 0.2225, 0.1646, 0.1808]) -Greedy action tensor([ 1.7214, -0.4667, -0.7082, 0.5537]) tensor([0.6617, 0.0742, 0.0583, 0.2058]) -Greedy action tensor([ 0.5466, -0.4211, 0.1246, 0.1657]) tensor([0.3678, 0.1397, 0.2412, 0.2513]) -Greedy action tensor([ 1.2435, -0.5336, 0.2606, 0.1333]) tensor([0.5339, 0.0903, 0.1998, 0.1759]) -Greedy action tensor([ 1.8268, -0.7192, -0.6052, 0.3281]) tensor([0.7196, 0.0564, 0.0632, 0.1608]) -Greedy action tensor([ 1.2086, -0.0892, -0.5076, 0.2653]) tensor([0.5428, 0.1483, 0.0976, 0.2113]) -Greedy action tensor([ 1.6676, -0.3246, -0.7935, 0.7281]) tensor([0.6201, 0.0846, 0.0529, 0.2424]) -Greedy action tensor([ 1.4778, -0.3238, -0.1885, 0.2639]) tensor([0.6057, 0.1000, 0.1144, 0.1799]) -Greedy action tensor([ 1.4109, -0.3387, -0.2904, 0.1624]) tensor([0.6086, 0.1058, 0.1110, 0.1746]) -Greedy action tensor([ 1.5718, -0.7521, -0.3213, 0.5312]) tensor([0.6243, 0.0611, 0.0940, 0.2205]) -Greedy action tensor([ 1.3340, -0.4737, -0.4204, 0.2803]) tensor([0.5932, 0.0973, 0.1026, 0.2068]) -Greedy action tensor([ 1.6275, -0.5420, -0.9381, 0.0480]) tensor([0.7157, 0.0818, 0.0550, 0.1475]) -Greedy action tensor([ 2.2674, -0.9736, -0.3625, 0.6960]) tensor([0.7582, 0.0297, 0.0547, 0.1575]) -Greedy action tensor([ 1.6827, 0.1079, -0.3727, 0.3688]) tensor([0.6235, 0.1291, 0.0798, 0.1676]) -Greedy action tensor([ 1.7697, -0.8580, -0.5381, 0.8545]) tensor([0.6361, 0.0460, 0.0633, 0.2547]) -Greedy action tensor([ 1.3393, -0.4197, -0.0354, 0.0366]) tensor([0.5893, 0.1015, 0.1490, 0.1602]) -Greedy action tensor([ 1.2746, 0.2108, -0.0870, 0.0909]) tensor([0.5242, 0.1809, 0.1343, 0.1605]) -Greedy action tensor([ 1.2857, -0.2912, -0.4650, 0.2326]) tensor([0.5783, 0.1195, 0.1004, 0.2017]) -Greedy action tensor([ 1.5915, -0.7972, -0.3295, 0.2168]) tensor([0.6706, 0.0615, 0.0982, 0.1696]) -Greedy action tensor([ 1.5498, -0.2733, -0.6349, 0.5389]) tensor([0.6105, 0.0986, 0.0687, 0.2222]) -Greedy action tensor([ 1.8028, -0.1603, 0.3174, -0.0168]) tensor([0.6541, 0.0918, 0.1481, 0.1060]) -Greedy action tensor([ 1.4667, -0.6040, -0.3702, 0.2980]) tensor([0.6265, 0.0790, 0.0998, 0.1947]) -Greedy action tensor([ 2.3368, -0.7051, -0.3509, 0.7009]) tensor([0.7630, 0.0364, 0.0519, 0.1486]) -Greedy action tensor([ 0.4476, -0.0516, 0.1062, -0.0286]) tensor([0.3403, 0.2065, 0.2419, 0.2113]) -Greedy action tensor([ 1.1146, -0.3166, -0.1629, 0.1950]) tensor([0.5218, 0.1247, 0.1454, 0.2080]) -Greedy action tensor([ 1.8350, -0.5769, -0.6261, 0.3001]) tensor([0.7192, 0.0645, 0.0614, 0.1550]) -Greedy action tensor([ 1.7680, -0.2792, -0.2741, 0.5372]) tensor([0.6448, 0.0832, 0.0837, 0.1883]) -Greedy action tensor([ 1.9642, -0.9338, -0.3878, 0.9157]) tensor([0.6663, 0.0367, 0.0634, 0.2335]) -Greedy action tensor([ 0.7893, -0.0834, -0.0919, -0.1022]) tensor([0.4460, 0.1864, 0.1848, 0.1829]) -Greedy action tensor([ 0.8491, -0.2606, -0.1488, -0.1400]) tensor([0.4830, 0.1592, 0.1781, 0.1797]) -Greedy action tensor([ 0.7642, -0.7539, 0.1526, -0.4424]) tensor([0.4853, 0.1063, 0.2632, 0.1452]) -Greedy action tensor([ 0.7629, -0.5276, 0.0247, -0.1998]) tensor([0.4684, 0.1289, 0.2239, 0.1789]) -Greedy action tensor([ 0.4240, 0.1466, -0.2740, 0.0598]) tensor([0.3390, 0.2569, 0.1687, 0.2355]) -Greedy action tensor([ 6.6806e-01, -3.9556e-01, 3.6952e-04, -2.2805e-01]) tensor([0.4413, 0.1523, 0.2263, 0.1801]) -Greedy action tensor([ 1.0781, -0.4937, -0.0148, -0.3898]) tensor([0.5639, 0.1171, 0.1890, 0.1299]) -Greedy action tensor([ 0.0439, 0.1060, -0.3642, -0.3707]) tensor([0.2950, 0.3139, 0.1962, 0.1949]) -Greedy action tensor([ 0.9568, -0.5309, -0.0786, -0.5849]) tensor([0.5571, 0.1259, 0.1978, 0.1192]) -Greedy action tensor([ 0.9574, -0.6052, -0.0749, -0.4772]) tensor([0.5543, 0.1162, 0.1974, 0.1320]) -Greedy action tensor([ 0.5002, -0.0323, -0.0503, -0.1581]) tensor([0.3729, 0.2190, 0.2151, 0.1931]) -Greedy action tensor([ 0.9162, -0.7138, -0.0631, -0.6375]) tensor([0.5609, 0.1099, 0.2106, 0.1186]) -Greedy action tensor([ 0.7328, -0.4541, -0.1165, -0.4178]) tensor([0.4880, 0.1489, 0.2087, 0.1544]) -Greedy action tensor([ 0.5599, -0.2962, 0.0843, -0.4443]) tensor([0.4145, 0.1761, 0.2576, 0.1518]) -Greedy action tensor([ 0.5105, 0.0725, -0.1296, -0.2762]) tensor([0.3805, 0.2456, 0.2006, 0.1733]) -Greedy action tensor([ 0.6798, -0.3724, -0.0345, -0.4169]) tensor([0.4603, 0.1607, 0.2253, 0.1537]) -Greedy action tensor([ 0.5392, -0.2747, -0.0622, -0.1751]) tensor([0.4031, 0.1786, 0.2209, 0.1973]) -Greedy action tensor([ 0.8928, -0.6133, -0.1231, -0.3632]) tensor([0.5351, 0.1187, 0.1938, 0.1524]) -Greedy action tensor([ 0.7823, -0.2150, -0.0309, -0.2773]) tensor([0.4632, 0.1709, 0.2054, 0.1605]) -Greedy action tensor([ 0.7711, -0.5249, 0.0323, -0.4764]) tensor([0.4905, 0.1342, 0.2343, 0.1409]) -Greedy action tensor([ 0.9109, -0.4246, -0.2332, -0.2826]) tensor([0.5306, 0.1396, 0.1690, 0.1609]) -Greedy action tensor([ 0.9412, -0.4580, 0.2088, -0.4788]) tensor([0.5078, 0.1253, 0.2441, 0.1227]) -Greedy action tensor([ 0.7864, -0.4853, -0.0813, -0.3620]) tensor([0.4957, 0.1390, 0.2081, 0.1572]) -Greedy action tensor([ 0.9033, -0.7833, 0.1048, -0.4189]) tensor([0.5258, 0.0974, 0.2366, 0.1402]) -Greedy action tensor([ 0.6269, -0.3823, -0.0388, -0.4987]) tensor([0.4539, 0.1655, 0.2333, 0.1473]) -Greedy action tensor([ 0.8330, -0.5655, -0.1824, -0.3591]) tensor([0.5228, 0.1291, 0.1894, 0.1587]) -Greedy action tensor([ 0.7294, -0.3113, -0.0600, -0.4799]) tensor([0.4749, 0.1677, 0.2157, 0.1417]) -Greedy action tensor([ 0.7038, -0.4791, -0.0041, -0.2584]) tensor([0.4585, 0.1405, 0.2259, 0.1752]) -Greedy action tensor([ 0.2392, 0.1453, 0.0080, -0.0158]) tensor([0.2874, 0.2617, 0.2281, 0.2227]) -Greedy action tensor([ 0.8004, -0.3143, -0.0038, -0.1577]) tensor([0.4632, 0.1519, 0.2072, 0.1777]) -Greedy action tensor([ 0.9542, -0.4318, -0.1694, -0.4124]) tensor([0.5464, 0.1366, 0.1776, 0.1393]) -Greedy action tensor([ 0.8257, -0.3928, -0.0804, -0.3147]) tensor([0.4952, 0.1464, 0.2001, 0.1583]) -Greedy action tensor([ 0.6225, -0.1591, 0.1494, -0.3359]) tensor([0.4058, 0.1857, 0.2528, 0.1556]) -Greedy action tensor([ 1.1349, -0.6988, -0.1279, -0.5402]) tensor([0.6135, 0.0981, 0.1735, 0.1149]) -Greedy action tensor([ 0.8736, -0.4042, -0.0541, -0.3368]) tensor([0.5071, 0.1413, 0.2005, 0.1511]) -Greedy action tensor([ 0.5732, -0.4101, -0.0330, -0.1877]) tensor([0.4190, 0.1567, 0.2285, 0.1958]) -Greedy action tensor([ 0.7789, 0.1897, -0.3534, -0.4966]) tensor([0.4637, 0.2573, 0.1495, 0.1295]) -Greedy action tensor([ 3.8123e-01, -1.5698e-01, -2.6792e-04, -2.9412e-01]) tensor([0.3603, 0.2103, 0.2460, 0.1834]) -Greedy action tensor([ 1.0151, -0.6278, -0.0520, -0.4531]) tensor([0.5657, 0.1094, 0.1946, 0.1303]) -Greedy action tensor([ 0.2599, -0.1547, -0.1051, -0.1977]) tensor([0.3347, 0.2211, 0.2324, 0.2118]) -Greedy action tensor([ 0.6745, -0.5132, 0.0859, -0.5316]) tensor([0.4631, 0.1412, 0.2571, 0.1386]) -Greedy action tensor([ 0.1077, 0.1701, -0.0776, -0.4219]) tensor([0.2870, 0.3055, 0.2385, 0.1690]) -Greedy action tensor([ 0.1909, 0.3461, -0.0082, -0.0887]) tensor([0.2671, 0.3120, 0.2189, 0.2020]) -Greedy action tensor([ 0.4564, 0.1582, -0.1408, -0.0371]) tensor([0.3445, 0.2557, 0.1896, 0.2103]) -Greedy action tensor([ 0.9558, -0.5306, -0.1541, -0.2307]) tensor([0.5373, 0.1215, 0.1771, 0.1640]) -Greedy action tensor([ 0.7161, -0.6267, -0.1103, -0.4447]) tensor([0.4970, 0.1298, 0.2175, 0.1557]) -Greedy action tensor([ 0.6207, -0.4425, -0.1078, -0.1292]) tensor([0.4347, 0.1501, 0.2098, 0.2054]) -Greedy action tensor([ 0.1412, -0.1916, -0.0982, -0.1218]) tensor([0.3056, 0.2191, 0.2405, 0.2349]) -Greedy action tensor([ 0.2893, 0.0459, -0.1312, -0.0375]) tensor([0.3163, 0.2479, 0.2077, 0.2281]) -Greedy action tensor([ 0.7477, -0.3327, -0.2175, -0.1247]) tensor([0.4677, 0.1588, 0.1781, 0.1955]) -Greedy action tensor([ 0.6949, -0.4780, 0.0037, -0.1837]) tensor([0.4493, 0.1390, 0.2251, 0.1866]) -Greedy action tensor([ 0.1423, 0.0939, 0.0903, -0.2777]) tensor([0.2810, 0.2677, 0.2667, 0.1846]) -Greedy action tensor([ 0.6004, -0.0369, -0.1290, -0.0028]) tensor([0.3909, 0.2067, 0.1885, 0.2139]) -Greedy action tensor([ 0.5472, -0.4108, -0.1410, -0.1999]) tensor([0.4237, 0.1626, 0.2129, 0.2008]) -Greedy action tensor([ 0.5523, -0.4232, -0.0830, -0.6601]) tensor([0.4537, 0.1710, 0.2403, 0.1350]) -Greedy action tensor([ 0.6552, -0.3281, -0.2133, -0.5816]) tensor([0.4799, 0.1795, 0.2013, 0.1393]) -Greedy action tensor([ 0.7363, -0.0342, 0.1089, -0.1998]) tensor([0.4186, 0.1937, 0.2235, 0.1642]) -Greedy action tensor([ 0.8651, -0.5693, 0.1207, -0.7429]) tensor([0.5226, 0.1245, 0.2482, 0.1047]) -Greedy action tensor([ 0.7320, -0.7074, -0.0244, -0.3215]) tensor([0.4866, 0.1154, 0.2284, 0.1697]) -Greedy action tensor([ 0.6951, -0.7080, 0.2675, -0.9010]) tensor([0.4760, 0.1170, 0.3104, 0.0965]) -Greedy action tensor([ 0.4683, 0.0347, -0.0964, -0.0787]) tensor([0.3577, 0.2319, 0.2034, 0.2070]) -Greedy action tensor([ 1.1215, -0.6471, -0.1333, -0.8387]) tensor([0.6264, 0.1068, 0.1786, 0.0882]) -Greedy action tensor([ 0.7118, -0.7576, 0.0056, -0.3281]) tensor([0.4815, 0.1108, 0.2376, 0.1702]) -Greedy action tensor([ 0.6204, -0.1956, -0.0844, -0.2271]) tensor([0.4229, 0.1870, 0.2090, 0.1812]) -Greedy action tensor([ 0.9961, -0.9248, 0.2330, -0.6221]) tensor([0.5522, 0.0809, 0.2574, 0.1095]) -Greedy action tensor([ 0.8561, -0.6456, -0.0233, -0.7600]) tensor([0.5445, 0.1213, 0.2260, 0.1082]) -Greedy action tensor([ 0.5505, -0.5407, -0.2033, -0.3112]) tensor([0.4487, 0.1507, 0.2111, 0.1895]) -Greedy action tensor([ 0.6398, -0.4539, -0.1296, -0.4299]) tensor([0.4670, 0.1564, 0.2163, 0.1602]) -Greedy action tensor([ 0.7308, -0.3521, -0.0158, -0.0754]) tensor([0.4426, 0.1499, 0.2098, 0.1977]) -Greedy action tensor([ 0.3928, 0.0227, -0.0956, -0.4110]) tensor([0.3634, 0.2510, 0.2230, 0.1627]) -Greedy action tensor([ 0.1647, 0.2315, -0.3272, 0.1034]) tensor([0.2762, 0.2952, 0.1689, 0.2597]) -Greedy action tensor([ 0.4717, 0.0020, -0.0443, -0.0811]) tensor([0.3575, 0.2235, 0.2134, 0.2057]) -Greedy action tensor([ 0.4613, -0.1797, -0.0045, -0.4442]) tensor([0.3908, 0.2059, 0.2453, 0.1580]) -Greedy action tensor([ 0.8187, -0.4696, -0.0285, -0.3385]) tensor([0.4954, 0.1366, 0.2123, 0.1557]) -Greedy action tensor([ 1.0021, -0.4260, 0.2102, -0.6078]) tensor([0.5284, 0.1267, 0.2393, 0.1056]) -Greedy action tensor([ 0.4682, 0.0822, -0.0437, -0.1438]) tensor([0.3544, 0.2409, 0.2124, 0.1922]) -Greedy action tensor([ 0.5252, -0.4408, 0.0036, -0.2203]) tensor([0.4084, 0.1554, 0.2424, 0.1938]) -Greedy action tensor([ 0.7172, -0.2938, -0.0805, -0.2769]) tensor([0.4578, 0.1666, 0.2062, 0.1694]) -Greedy action tensor([ 0.8200, -0.7755, -0.0504, -0.3523]) tensor([0.5178, 0.1050, 0.2169, 0.1603]) -Greedy action tensor([ 0.7592, -0.2903, -0.0078, -0.1310]) tensor([0.4494, 0.1573, 0.2087, 0.1845]) -Greedy action tensor([ 0.3433, -0.4173, -0.1373, -0.2595]) tensor([0.3798, 0.1775, 0.2349, 0.2078]) -Greedy action tensor([-1.2745, -0.2610, 0.7479, 0.7017]) tensor([0.0540, 0.1487, 0.4079, 0.3895]) -Greedy action tensor([-1.5890, -0.3937, 0.4647, 0.0346]) tensor([0.0582, 0.1924, 0.4540, 0.2953]) -Greedy action tensor([-1.8634, -0.4353, 0.6258, -0.1407]) tensor([0.0438, 0.1827, 0.5281, 0.2454]) -Greedy action tensor([-1.8498, -0.4980, 0.8585, 0.1624]) tensor([0.0366, 0.1413, 0.5486, 0.2735]) -Greedy action tensor([-1.8663, -0.3505, 0.6137, -0.1509]) tensor([0.0434, 0.1975, 0.5180, 0.2411]) -Greedy action tensor([-1.8916, -0.4303, 0.6387, -0.1503]) tensor([0.0424, 0.1829, 0.5327, 0.2420]) -Greedy action tensor([-3.3876e-01, 1.0500e+00, 3.6865e-05, 3.9358e-01]) tensor([0.1177, 0.4721, 0.1652, 0.2449]) -Greedy action tensor([-1.8196, -0.3887, 0.6919, 0.0997]) tensor([0.0411, 0.1720, 0.5067, 0.2802]) -Greedy action tensor([-1.9027, -0.4377, 0.6424, -0.1599]) tensor([0.0420, 0.1819, 0.5358, 0.2402]) -Greedy action tensor([-1.4478, 0.1697, 0.4282, 0.4025]) tensor([0.0528, 0.2663, 0.3448, 0.3361]) -Greedy action tensor([-1.8660, -0.3600, 0.6092, -0.1152]) tensor([0.0432, 0.1947, 0.5133, 0.2488]) -Greedy action tensor([-1.6445, 0.0566, 0.5161, 0.0220]) tensor([0.0489, 0.2680, 0.4243, 0.2589]) -Greedy action tensor([-1.7439, -0.2695, 0.5507, -0.0909]) tensor([0.0488, 0.2130, 0.4837, 0.2546]) -Greedy action tensor([-1.9249, -0.4500, 0.6536, -0.1694]) tensor([0.0411, 0.1796, 0.5415, 0.2378]) -Greedy action tensor([-1.9353, -0.4296, 0.6611, -0.1733]) tensor([0.0404, 0.1821, 0.5421, 0.2353]) -Greedy action tensor([-1.9208, -0.4337, 0.6603, -0.1614]) tensor([0.0409, 0.1810, 0.5405, 0.2376]) -Greedy action tensor([-1.8498, -0.4657, 0.6178, -0.1339]) tensor([0.0447, 0.1786, 0.5278, 0.2489]) -Greedy action tensor([-1.8656, -0.4057, 0.6581, -0.1270]) tensor([0.0426, 0.1834, 0.5315, 0.2424]) -Greedy action tensor([-1.8768, -0.3139, 0.6243, -0.1227]) tensor([0.0421, 0.2010, 0.5136, 0.2433]) -Greedy action tensor([-1.9366, -0.4302, 0.6609, -0.1763]) tensor([0.0404, 0.1822, 0.5425, 0.2349]) -Greedy action tensor([-1.7127, -0.3722, 0.5393, -0.1043]) tensor([0.0518, 0.1977, 0.4920, 0.2585]) -Greedy action tensor([-1.8106, -0.4829, 0.6560, -0.0098]) tensor([0.0442, 0.1669, 0.5211, 0.2678]) -Greedy action tensor([-1.6674, -0.5505, 0.5202, -0.0176]) tensor([0.0550, 0.1681, 0.4904, 0.2864]) -Greedy action tensor([-1.8029, -0.3498, 0.6345, -0.1092]) tensor([0.0451, 0.1930, 0.5164, 0.2455]) -Greedy action tensor([-1.7608, -0.4064, 0.5936, -0.0679]) tensor([0.0480, 0.1859, 0.5053, 0.2608]) -Greedy action tensor([-1.7782, -0.4593, 0.5887, -0.0847]) tensor([0.0480, 0.1794, 0.5117, 0.2609]) -Greedy action tensor([-1.8650, -0.4605, 0.6317, -0.1329]) tensor([0.0437, 0.1781, 0.5310, 0.2472]) -Greedy action tensor([-1.5949, -0.4932, 0.4658, 0.0599]) tensor([0.0585, 0.1761, 0.4593, 0.3061]) -Greedy action tensor([-0.7852, 0.0219, 0.3230, -0.1999]) tensor([0.1240, 0.2779, 0.3755, 0.2226]) -Greedy action tensor([-1.3806, -0.8434, 0.5520, 0.4774]) tensor([0.0624, 0.1067, 0.4309, 0.4000]) -Greedy action tensor([-1.8922, -0.4684, 0.6412, -0.1445]) tensor([0.0426, 0.1768, 0.5362, 0.2444]) -Greedy action tensor([-1.9184, -0.4424, 0.6502, -0.1671]) tensor([0.0413, 0.1809, 0.5395, 0.2383]) -Greedy action tensor([-1.2603, -0.2275, 0.8566, 0.9147]) tensor([0.0478, 0.1343, 0.3971, 0.4208]) -Greedy action tensor([-1.8883, -0.4564, 0.6703, -0.0956]) tensor([0.0415, 0.1736, 0.5358, 0.2491]) -Greedy action tensor([-1.8631, -0.3165, 0.6469, -0.1168]) tensor([0.0421, 0.1979, 0.5184, 0.2416]) -Greedy action tensor([-0.3639, 0.2532, 0.7708, 1.6415]) tensor([0.0747, 0.1384, 0.2322, 0.5547]) -Greedy action tensor([-1.7619, 0.0697, 0.5521, -0.2117]) tensor([0.0453, 0.2829, 0.4583, 0.2135]) -Greedy action tensor([-1.2505, 0.0577, 0.4473, 0.2174]) tensor([0.0690, 0.2551, 0.3767, 0.2993]) -Greedy action tensor([-1.8994, -0.3872, 0.6406, -0.1428]) tensor([0.0416, 0.1890, 0.5281, 0.2413]) -Greedy action tensor([-1.5165, 0.1997, 0.3789, -0.0625]) tensor([0.0571, 0.3179, 0.3803, 0.2446]) -Greedy action tensor([-1.6393, -0.4101, 0.5079, -0.0683]) tensor([0.0562, 0.1922, 0.4812, 0.2704]) -Greedy action tensor([-1.9231, -0.4567, 0.6682, -0.1591]) tensor([0.0408, 0.1768, 0.5444, 0.2380]) -Greedy action tensor([-1.8845, -0.3364, 0.6111, -0.1374]) tensor([0.0424, 0.1995, 0.5146, 0.2434]) -Greedy action tensor([-1.8024, -0.4576, 0.6493, 0.0727]) tensor([0.0435, 0.1671, 0.5054, 0.2839]) -Greedy action tensor([-1.6202, -0.3512, 0.5140, -0.0483]) tensor([0.0561, 0.1996, 0.4741, 0.2702]) -Greedy action tensor([-1.6775, -0.4522, 0.5498, -0.1207]) tensor([0.0543, 0.1848, 0.5034, 0.2575]) -Greedy action tensor([-1.8926, -0.4139, 0.6356, -0.1598]) tensor([0.0424, 0.1861, 0.5315, 0.2399]) -Greedy action tensor([-1.6922, -0.4336, 0.5348, -0.0497]) tensor([0.0527, 0.1857, 0.4890, 0.2726]) -Greedy action tensor([-1.9130, -0.4104, 0.6486, -0.1608]) tensor([0.0413, 0.1855, 0.5350, 0.2382]) -Greedy action tensor([-1.9454, -0.4530, 0.6709, -0.1809]) tensor([0.0400, 0.1781, 0.5480, 0.2338]) -Greedy action tensor([-1.8288, -0.3900, 0.6329, -0.0824]) tensor([0.0441, 0.1859, 0.5171, 0.2529]) -Greedy action tensor([-1.8346, -0.4735, 0.6238, -0.0786]) tensor([0.0447, 0.1743, 0.5223, 0.2587]) -Greedy action tensor([-1.7600, -0.4691, 0.5821, -0.0793]) tensor([0.0490, 0.1782, 0.5097, 0.2631]) -Greedy action tensor([-0.8934, 0.9198, 0.0868, 0.2818]) tensor([0.0767, 0.4703, 0.2045, 0.2485]) -Greedy action tensor([-1.8046, -0.3405, 0.6185, -0.0504]) tensor([0.0447, 0.1932, 0.5040, 0.2582]) -Greedy action tensor([-1.9010, -0.3663, 0.6370, -0.1526]) tensor([0.0416, 0.1930, 0.5264, 0.2390]) -Greedy action tensor([-1.7615, -0.4575, 0.5793, -0.0312]) tensor([0.0483, 0.1778, 0.5015, 0.2724]) -Greedy action tensor([-1.7401, -0.4875, 0.6043, 0.0597]) tensor([0.0477, 0.1668, 0.4971, 0.2884]) -Greedy action tensor([-1.8213, -0.4192, 0.5991, -0.1255]) tensor([0.0459, 0.1867, 0.5169, 0.2504]) -Greedy action tensor([-0.4712, -0.5031, 0.1785, 0.0794]) tensor([0.1780, 0.1724, 0.3409, 0.3087]) -Greedy action tensor([-1.5435, -0.5061, 0.5513, 0.2447]) tensor([0.0558, 0.1574, 0.4532, 0.3335]) -Greedy action tensor([-1.3149, -0.6020, 0.3202, 0.2403]) tensor([0.0775, 0.1581, 0.3975, 0.3670]) -Greedy action tensor([-1.2901, -0.5266, 0.3528, 0.3683]) tensor([0.0737, 0.1582, 0.3811, 0.3870]) -Greedy action tensor([-1.2616, -0.2408, 0.2818, 0.3955]) tensor([0.0730, 0.2026, 0.3416, 0.3828]) -Greedy action tensor([-1.7640, -0.3295, 0.5766, -0.0705]) tensor([0.0476, 0.1997, 0.4941, 0.2587]) -Greedy action tensor([-1.8260, -0.3171, 0.6031, -0.0997]) tensor([0.0445, 0.2011, 0.5046, 0.2499]) -Greedy action tensor([-1.3125, -0.6527, 0.3382, 0.2021]) tensor([0.0788, 0.1524, 0.4105, 0.3583]) -Greedy action tensor([-1.4342, 0.0476, 0.3503, 0.0743]) tensor([0.0630, 0.2772, 0.3752, 0.2847]) -Greedy action tensor([-1.9250, -0.4405, 0.6523, -0.1691]) tensor([0.0410, 0.1811, 0.5402, 0.2376]) -Greedy action tensor([-1.8884, -0.2844, 0.6179, -0.1554]) tensor([0.0419, 0.2082, 0.5132, 0.2368]) -Greedy action tensor([-0.9641, -0.1191, 0.2952, -0.1849]) tensor([0.1107, 0.2578, 0.3901, 0.2414]) -Greedy action tensor([-1.8375, -0.4873, 0.6085, -0.1176]) tensor([0.0455, 0.1755, 0.5250, 0.2540]) -Greedy action tensor([-1.8356, -0.4648, 0.6129, -0.0938]) tensor([0.0450, 0.1773, 0.5208, 0.2569]) -Greedy action tensor([-1.8227, -0.3603, 0.7009, -0.0286]) tensor([0.0420, 0.1813, 0.5240, 0.2527]) -Greedy action tensor([-1.9398, -0.4421, 0.6639, -0.1782]) tensor([0.0403, 0.1802, 0.5447, 0.2347]) -Greedy action tensor([-1.7366, -0.2954, 0.5383, -0.0508]) tensor([0.0491, 0.2077, 0.4780, 0.2652]) -Greedy action tensor([-1.6204, -0.6077, 0.6886, 0.1131]) tensor([0.0513, 0.1413, 0.5167, 0.2906]) -Greedy action tensor([-0.8548, -0.5193, 0.2156, 0.4656]) tensor([0.1104, 0.1544, 0.3219, 0.4133]) -Greedy action tensor([-1.5680, -0.2398, 0.4655, -0.0537]) tensor([0.0590, 0.2225, 0.4505, 0.2680]) -Greedy action tensor([-1.8545, -0.4377, 0.6192, -0.1329]) tensor([0.0443, 0.1826, 0.5254, 0.2477]) -Greedy action tensor([-1.8656, -0.6066, 1.5892, 0.8755]) tensor([0.0194, 0.0682, 0.6125, 0.3000]) -Greedy action tensor([ 0.4136, -0.7155, 0.9856, 0.6376]) tensor([0.2301, 0.0744, 0.4077, 0.2878]) -Greedy action tensor([ 1.4457, -1.1411, -0.4724, -0.3824]) tensor([0.7231, 0.0544, 0.1062, 0.1162]) -Greedy action tensor([ 0.0513, -1.1250, 0.5466, 0.0925]) tensor([0.2506, 0.0773, 0.4111, 0.2611]) -Greedy action tensor([-0.3910, -0.7581, -0.5451, 0.0372]) tensor([0.2448, 0.1696, 0.2099, 0.3757]) -Greedy action tensor([-0.6434, 0.0999, 0.3009, -1.3903]) tensor([0.1627, 0.3421, 0.4182, 0.0771]) -Greedy action tensor([1.1548, 0.1252, 0.9214, 0.1877]) tensor([0.3954, 0.1412, 0.3131, 0.1503]) -Greedy action tensor([ 0.9413, -0.1514, 0.3803, -0.1471]) tensor([0.4459, 0.1495, 0.2544, 0.1502]) -Greedy action tensor([ 0.5564, -0.7085, 0.9348, 0.5138]) tensor([0.2702, 0.0763, 0.3945, 0.2590]) -Greedy action tensor([ 1.3771, -0.3557, 0.3129, 0.9124]) tensor([0.4651, 0.0822, 0.1604, 0.2922]) -Greedy action tensor([ 1.0154, 0.1225, -0.0872, -0.1173]) tensor([0.4846, 0.1984, 0.1609, 0.1561]) -Greedy action tensor([ 1.5748, -0.5612, 1.2346, 0.9424]) tensor([0.4235, 0.0500, 0.3014, 0.2250]) -Greedy action tensor([ 1.3753, -0.9268, 2.0295, 1.2085]) tensor([0.2584, 0.0259, 0.4971, 0.2187]) -Greedy action tensor([ 1.1565, -1.3897, 0.3293, 0.3892]) tensor([0.5051, 0.0396, 0.2208, 0.2345]) -Greedy action tensor([ 0.7662, -0.5861, -0.0649, 0.1183]) tensor([0.4510, 0.1166, 0.1964, 0.2359]) -Greedy action tensor([ 0.0506, -1.5197, -0.8099, -0.1442]) tensor([0.4075, 0.0848, 0.1724, 0.3354]) -Greedy action tensor([-0.6441, -1.9497, 0.1186, 0.6805]) tensor([0.1394, 0.0378, 0.2988, 0.5241]) -Greedy action tensor([-0.5380, 0.8644, -1.0502, 0.3294]) tensor([0.1243, 0.5053, 0.0745, 0.2959]) -Greedy action tensor([ 1.7099, -0.5726, 0.1948, 1.5478]) tensor([0.4604, 0.0470, 0.1012, 0.3915]) -Greedy action tensor([ 0.0243, -0.4618, -0.2336, -0.7646]) tensor([0.3519, 0.2164, 0.2719, 0.1599]) -Greedy action tensor([-0.0682, 0.0722, -0.9250, -0.4972]) tensor([0.3100, 0.3567, 0.1316, 0.2018]) -Greedy action tensor([ 0.2933, 0.7778, -0.3358, -0.2257]) tensor([0.2665, 0.4327, 0.1421, 0.1586]) -Greedy action tensor([ 0.7260, -0.6726, -0.5732, 1.2311]) tensor([0.3148, 0.0777, 0.0859, 0.5216]) -Greedy action tensor([1.0296, 0.0288, 2.1324, 0.2841]) tensor([0.2060, 0.0757, 0.6206, 0.0977]) -Greedy action tensor([-0.1010, 0.8467, -0.1024, 0.1639]) tensor([0.1700, 0.4386, 0.1698, 0.2216]) -Greedy action tensor([-0.6648, -0.2531, -1.1085, -0.1023]) tensor([0.2038, 0.3076, 0.1308, 0.3577]) -Greedy action tensor([ 0.1229, -0.5306, 1.9059, 0.7145]) tensor([0.1078, 0.0561, 0.6413, 0.1948]) -Greedy action tensor([ 0.2850, 0.5962, -0.9473, -1.0931]) tensor([0.3438, 0.4693, 0.1003, 0.0867]) -Greedy action tensor([ 0.6648, 0.0374, -0.0193, 1.8511]) tensor([0.1882, 0.1005, 0.0950, 0.6164]) -Greedy action tensor([ 0.8964, -0.6573, 0.7009, 0.1539]) tensor([0.3984, 0.0843, 0.3277, 0.1896]) -Greedy action tensor([-0.8189, -2.3392, -0.2758, 0.8379]) tensor([0.1222, 0.0267, 0.2104, 0.6407]) -Greedy action tensor([-0.8927, -0.9895, 0.0664, -0.7507]) tensor([0.1764, 0.1601, 0.4602, 0.2033]) -Greedy action tensor([ 0.7402, -0.2131, -0.2941, 0.9062]) tensor([0.3423, 0.1319, 0.1217, 0.4041]) -Greedy action tensor([-0.5888, -0.2649, 0.5415, 1.3560]) tensor([0.0802, 0.1109, 0.2483, 0.5607]) -Greedy action tensor([ 0.6768, -0.0582, -0.7160, -0.0330]) tensor([0.4505, 0.2160, 0.1119, 0.2215]) -Greedy action tensor([ 0.2828, -1.0514, -0.3779, 0.4006]) tensor([0.3442, 0.0907, 0.1778, 0.3873]) -Greedy action tensor([ 1.0950, -1.2841, 2.0556, 0.2722]) tensor([0.2413, 0.0223, 0.6304, 0.1060]) -Greedy action tensor([ 0.2644, -0.9920, 1.2534, -0.1228]) tensor([0.2150, 0.0612, 0.5779, 0.1459]) -Greedy action tensor([-0.3633, -1.1649, 0.0586, 0.5825]) tensor([0.1802, 0.0809, 0.2748, 0.4641]) -Greedy action tensor([ 1.0089, -2.7923, -0.2662, 0.2608]) tensor([0.5634, 0.0126, 0.1574, 0.2666]) -Greedy action tensor([-0.2237, -0.5960, 0.7111, 0.3604]) tensor([0.1659, 0.1143, 0.4224, 0.2974]) -Greedy action tensor([-0.0568, -1.2316, 1.6299, -0.3327]) tensor([0.1339, 0.0414, 0.7232, 0.1016]) -Greedy action tensor([0.3444, 1.1250, 0.2969, 1.1900]) tensor([0.1547, 0.3376, 0.1475, 0.3603]) -Greedy action tensor([-0.1669, 0.5817, 0.1414, 0.8345]) tensor([0.1389, 0.2937, 0.1891, 0.3782]) -Greedy action tensor([ 1.0806, 0.5646, 0.8459, -0.9650]) tensor([0.3973, 0.2372, 0.3142, 0.0514]) -Greedy action tensor([ 0.5265, -1.3137, -0.2958, 0.7437]) tensor([0.3520, 0.0559, 0.1547, 0.4374]) -Greedy action tensor([ 1.8800, -0.9999, -0.2214, 0.4365]) tensor([0.7070, 0.0397, 0.0865, 0.1669]) -Greedy action tensor([ 0.4105, -0.9879, 0.3349, 1.1304]) tensor([0.2365, 0.0584, 0.2193, 0.4858]) -Greedy action tensor([ 0.1416, 0.1233, 0.1909, -1.2324]) tensor([0.3044, 0.2988, 0.3198, 0.0770]) -Greedy action tensor([ 1.1351, -0.6589, 0.9078, -0.4317]) tensor([0.4605, 0.0766, 0.3668, 0.0961]) -Greedy action tensor([ 0.3327, 0.3783, -0.0169, -0.8659]) tensor([0.3275, 0.3428, 0.2309, 0.0988]) -Greedy action tensor([-0.7067, -0.0878, 0.6285, 0.7642]) tensor([0.0908, 0.1686, 0.3452, 0.3954]) -Greedy action tensor([1.5594, 0.0923, 0.3025, 1.2232]) tensor([0.4485, 0.1034, 0.1276, 0.3204]) -Greedy action tensor([ 0.3553, -0.4079, -0.1340, 1.6627]) tensor([0.1731, 0.0807, 0.1061, 0.6400]) -Greedy action tensor([ 0.8710, -0.0472, 0.3110, 0.0201]) tensor([0.4171, 0.1665, 0.2383, 0.1781]) -Greedy action tensor([ 0.0986, -0.5506, 1.1325, 0.7012]) tensor([0.1623, 0.0848, 0.4564, 0.2965]) -Greedy action tensor([ 1.2519, -1.1736, -0.3461, 0.4906]) tensor([0.5689, 0.0503, 0.1151, 0.2657]) -Greedy action tensor([ 0.7058, -1.6311, 0.0960, 1.3623]) tensor([0.2803, 0.0271, 0.1523, 0.5403]) -Greedy action tensor([ 0.0682, 0.4251, 0.7821, -0.9134]) tensor([0.2064, 0.2949, 0.4214, 0.0773]) -Greedy action tensor([-0.6478, 0.6345, 0.0065, -0.7848]) tensor([0.1351, 0.4871, 0.2599, 0.1178]) -Greedy action tensor([ 0.1685, -1.4944, -0.2091, 1.3045]) tensor([0.2004, 0.0380, 0.1374, 0.6242]) -Greedy action tensor([ 1.3372, -1.3255, 0.5301, -0.1566]) tensor([0.5746, 0.0401, 0.2564, 0.1290]) -Greedy action tensor([-0.1893, -0.8430, 1.3135, -0.2096]) tensor([0.1430, 0.0744, 0.6426, 0.1401]) -Greedy action tensor([-0.1064, -0.5758, 0.2574, 0.0242]) tensor([0.2379, 0.1488, 0.3423, 0.2711]) -Greedy action tensor([ 0.1886, 0.0131, -1.1465, -0.3349]) tensor([0.3711, 0.3114, 0.0977, 0.2198]) -Greedy action tensor([1.3385, 0.3454, 1.0159, 1.1004]) tensor([0.3469, 0.1285, 0.2512, 0.2734]) -Greedy action tensor([ 0.7657, -0.2714, 1.2847, -0.0697]) tensor([0.2883, 0.1022, 0.4845, 0.1250]) -Greedy action tensor([ 0.6631, -0.1217, 0.0442, -0.6051]) tensor([0.4393, 0.2004, 0.2366, 0.1236]) -Greedy action tensor([ 0.3084, -1.3782, 1.1702, 0.2019]) tensor([0.2246, 0.0416, 0.5318, 0.2019]) -Greedy action tensor([-0.2972, -0.7413, 1.9761, -0.4653]) tensor([0.0820, 0.0526, 0.7961, 0.0693]) -Greedy action tensor([-0.2511, -2.3733, 0.2765, 0.6082]) tensor([0.1932, 0.0231, 0.3274, 0.4562]) -Greedy action tensor([ 0.6254, -0.4426, 0.6612, 0.2106]) tensor([0.3289, 0.1130, 0.3409, 0.2172]) -Greedy action tensor([ 0.9427, -0.2779, 1.0913, 0.0205]) tensor([0.3505, 0.1034, 0.4067, 0.1394]) -Greedy action tensor([ 0.3136, -1.0207, 0.1396, 0.3209]) tensor([0.3214, 0.0847, 0.2701, 0.3238]) -Greedy action tensor([-1.1869, -0.2110, -0.7880, 0.3209]) tensor([0.1035, 0.2747, 0.1542, 0.4676]) -Greedy action tensor([ 0.3733, -1.2224, -0.7221, 0.6535]) tensor([0.3496, 0.0709, 0.1169, 0.4626]) -Greedy action tensor([-0.8443, -0.6103, 0.8916, -0.0681]) tensor([0.0989, 0.1250, 0.5612, 0.2149]) -Greedy action tensor([-0.9146, -0.5362, -0.9243, 0.5090]) tensor([0.1315, 0.1920, 0.1303, 0.5462]) -Greedy action tensor([-0.1168, -0.5551, 0.9695, -0.8073]) tensor([0.1957, 0.1263, 0.5799, 0.0981]) -Greedy action tensor([ 0.6663, 0.7424, 0.7663, -0.4555]) tensor([0.2849, 0.3074, 0.3149, 0.0928]) -Greedy action tensor([-0.4309, -0.7844, 0.0720, 0.2942]) tensor([0.1845, 0.1295, 0.3050, 0.3809]) -Greedy action tensor([-0.3967, -0.2256, -0.1247, -0.1093]) tensor([0.2069, 0.2456, 0.2716, 0.2759]) -Greedy action tensor([-0.8814, -0.5017, -0.1269, -0.7519]) tensor([0.1746, 0.2553, 0.3713, 0.1988]) -Greedy action tensor([ 1.0528, -0.1376, -0.4605, 0.3782]) tensor([0.4917, 0.1495, 0.1083, 0.2505]) -Greedy action tensor([ 0.9162, -0.2094, -0.6243, 0.4193]) tensor([0.4657, 0.1511, 0.0998, 0.2834]) -Greedy action tensor([ 1.4785, -0.4977, -0.1687, 0.4085]) tensor([0.5973, 0.0828, 0.1150, 0.2049]) -Greedy action tensor([ 1.1304, -0.4432, -0.2190, 0.2388]) tensor([0.5329, 0.1105, 0.1382, 0.2185]) -Greedy action tensor([ 1.6177, -0.4517, -0.1745, 0.2717]) tensor([0.6439, 0.0813, 0.1073, 0.1676]) -Greedy action tensor([ 1.2845, -0.5868, -0.4167, 0.3734]) tensor([0.5752, 0.0885, 0.1050, 0.2313]) -Greedy action tensor([ 1.5605, -0.4630, -0.3439, 0.2540]) tensor([0.6444, 0.0852, 0.0960, 0.1745]) -Greedy action tensor([ 1.6034, -0.1142, -1.0538, 0.1104]) tensor([0.6783, 0.1217, 0.0476, 0.1524]) -Greedy action tensor([ 1.3636, -0.2503, -0.4057, 0.3692]) tensor([0.5749, 0.1145, 0.0980, 0.2127]) -Greedy action tensor([ 1.6000, -0.5875, -0.2502, 0.3525]) tensor([0.6424, 0.0721, 0.1010, 0.1845]) -Greedy action tensor([ 1.6556, -0.4691, -0.3611, 0.3963]) tensor([0.6508, 0.0778, 0.0866, 0.1848]) -Greedy action tensor([ 0.4374, -0.1580, 0.0506, 0.0475]) tensor([0.3439, 0.1896, 0.2336, 0.2329]) -Greedy action tensor([ 1.8938, 0.4092, -0.1234, 0.0442]) tensor([0.6592, 0.1494, 0.0877, 0.1037]) -Greedy action tensor([ 1.7598, 0.1384, -0.0778, 0.4949]) tensor([0.6101, 0.1206, 0.0971, 0.1722]) -Greedy action tensor([ 1.3723, -0.5650, -0.2993, 0.4754]) tensor([0.5747, 0.0828, 0.1080, 0.2344]) -Greedy action tensor([ 1.8011, -0.3718, -0.2110, 0.1008]) tensor([0.6992, 0.0796, 0.0935, 0.1277]) -Greedy action tensor([ 1.1293, -0.5875, -0.2838, 0.1513]) tensor([0.5558, 0.0998, 0.1353, 0.2090]) -Greedy action tensor([ 2.1375, -0.2158, -0.0533, 0.2876]) tensor([0.7331, 0.0697, 0.0820, 0.1153]) -Greedy action tensor([ 0.9359, -0.3063, -0.1472, 0.2600]) tensor([0.4682, 0.1352, 0.1585, 0.2382]) -Greedy action tensor([ 2.0147, -0.6941, -0.6311, 0.5978]) tensor([0.7246, 0.0483, 0.0514, 0.1757]) -Greedy action tensor([ 2.1807, -0.3568, -0.1801, 0.3524]) tensor([0.7496, 0.0593, 0.0707, 0.1204]) -Greedy action tensor([ 2.4023, -0.7658, -0.1642, 0.8573]) tensor([0.7506, 0.0316, 0.0577, 0.1601]) -Greedy action tensor([ 0.7153, -0.2128, 0.0540, 0.2069]) tensor([0.3979, 0.1573, 0.2054, 0.2394]) -Greedy action tensor([ 1.2753, -0.4923, -0.5685, 0.4463]) tensor([0.5664, 0.0967, 0.0896, 0.2472]) -Greedy action tensor([ 1.4339, -0.6086, -0.2810, 0.3995]) tensor([0.6006, 0.0779, 0.1081, 0.2135]) -Greedy action tensor([ 0.8830, -0.5682, -0.5061, 0.4318]) tensor([0.4716, 0.1105, 0.1176, 0.3003]) -Greedy action tensor([1.9123, 0.1178, 0.1382, 0.1182]) tensor([0.6657, 0.1107, 0.1129, 0.1107]) -Greedy action tensor([ 1.5091, -0.8142, -0.2754, 0.7080]) tensor([0.5832, 0.0571, 0.0979, 0.2618]) -Greedy action tensor([ 0.7895, 0.1947, -0.0576, 0.1670]) tensor([0.3973, 0.2192, 0.1703, 0.2132]) -Greedy action tensor([ 1.4846, -0.1130, -0.6577, 0.7296]) tensor([0.5587, 0.1131, 0.0656, 0.2626]) -Greedy action tensor([ 1.8729, -0.5742, -0.3321, 0.4438]) tensor([0.6962, 0.0603, 0.0768, 0.1668]) -Greedy action tensor([ 1.2821, -0.6166, -0.0850, -0.1015]) tensor([0.6041, 0.0905, 0.1540, 0.1514]) -Greedy action tensor([ 0.9617, -0.5247, -0.5961, 0.4608]) tensor([0.4895, 0.1107, 0.1031, 0.2967]) -Greedy action tensor([ 1.8392, -0.4424, -0.4717, 0.5149]) tensor([0.6815, 0.0696, 0.0676, 0.1813]) -Greedy action tensor([ 1.7888, -0.3473, -0.3981, 0.8922]) tensor([0.6104, 0.0721, 0.0685, 0.2490]) -Greedy action tensor([ 1.6408, -0.1406, -0.6288, 0.7399]) tensor([0.5960, 0.1004, 0.0616, 0.2421]) -Greedy action tensor([ 1.8116, -0.4701, -0.6105, 0.9122]) tensor([0.6259, 0.0639, 0.0555, 0.2546]) -Greedy action tensor([ 2.0484, -0.2980, 0.1704, 0.1339]) tensor([0.7163, 0.0686, 0.1095, 0.1056]) -Greedy action tensor([ 1.1086, -0.4224, -0.3877, 0.1872]) tensor([0.5440, 0.1177, 0.1218, 0.2165]) -Greedy action tensor([0.8876, 0.0594, 0.1179, 0.1586]) tensor([0.4198, 0.1834, 0.1944, 0.2025]) -Greedy action tensor([ 0.7977, -0.2855, -0.2089, 0.1849]) tensor([0.4453, 0.1507, 0.1627, 0.2413]) -Greedy action tensor([ 1.7527, -0.0912, -0.3758, 0.2724]) tensor([0.6646, 0.1051, 0.0791, 0.1512]) -Greedy action tensor([ 2.7186, -1.0207, -0.6886, 1.1143]) tensor([0.7950, 0.0189, 0.0263, 0.1598]) -Greedy action tensor([ 0.8753, -0.2663, -0.1873, 0.0233]) tensor([0.4782, 0.1527, 0.1652, 0.2040]) -Greedy action tensor([ 1.2830, -0.2958, -0.0908, 0.1146]) tensor([0.5649, 0.1165, 0.1430, 0.1756]) -Greedy action tensor([ 0.9363, -0.2369, 0.0734, 0.1342]) tensor([0.4588, 0.1419, 0.1936, 0.2057]) -Greedy action tensor([ 1.7644, -0.9178, -0.1992, 0.3188]) tensor([0.6923, 0.0474, 0.0972, 0.1631]) -Greedy action tensor([ 0.9175, -0.2542, -0.4011, 0.6398]) tensor([0.4283, 0.1327, 0.1146, 0.3244]) -Greedy action tensor([ 1.6173, -0.6266, -0.0440, 0.3072]) tensor([0.6387, 0.0677, 0.1213, 0.1723]) -Greedy action tensor([ 2.4311, -0.7322, -0.7391, 0.7509]) tensor([0.7870, 0.0333, 0.0331, 0.1466]) -Greedy action tensor([ 0.8838, -0.6447, 0.0321, 0.1477]) tensor([0.4711, 0.1022, 0.2010, 0.2257]) -Greedy action tensor([ 1.5823, -0.5417, -0.4121, 0.3164]) tensor([0.6503, 0.0778, 0.0885, 0.1834]) -Greedy action tensor([ 1.3565, -0.1248, -0.6919, 0.1702]) tensor([0.6018, 0.1368, 0.0776, 0.1838]) -Greedy action tensor([ 1.5105, -0.3602, -0.5271, 0.4541]) tensor([0.6127, 0.0944, 0.0799, 0.2131]) -Greedy action tensor([ 1.2416, -0.2115, -0.3795, 0.2926]) tensor([0.5499, 0.1286, 0.1087, 0.2129]) -Greedy action tensor([ 1.9178, -0.4197, 0.0736, 0.1499]) tensor([0.7015, 0.0677, 0.1110, 0.1198]) -Greedy action tensor([ 1.3183, -0.8117, -0.4310, 0.4441]) tensor([0.5848, 0.0695, 0.1017, 0.2440]) -Greedy action tensor([ 1.5804, -0.0891, -0.3133, 0.2865]) tensor([0.6200, 0.1168, 0.0933, 0.1700]) -Greedy action tensor([ 1.7282, -0.5227, -0.4554, 0.4058]) tensor([0.6737, 0.0709, 0.0759, 0.1795]) -Greedy action tensor([ 1.3899, -0.7356, -0.1745, 0.5107]) tensor([0.5735, 0.0685, 0.1200, 0.2381]) -Greedy action tensor([ 1.1038, -0.2558, -0.2905, 0.2165]) tensor([0.5218, 0.1340, 0.1294, 0.2148]) -Greedy action tensor([ 1.5520, -0.8727, -0.2446, 0.0817]) tensor([0.6738, 0.0596, 0.1117, 0.1549]) -Greedy action tensor([ 1.5053, -0.4682, -0.6558, 0.5425]) tensor([0.6112, 0.0849, 0.0704, 0.2334]) -Greedy action tensor([ 1.5467, -0.9878, -0.0376, 0.0548]) tensor([0.6625, 0.0525, 0.1359, 0.1490]) -Greedy action tensor([ 1.1966, -0.1089, -0.1713, 0.5837]) tensor([0.4837, 0.1311, 0.1232, 0.2620]) -Greedy action tensor([ 1.7087, -0.7977, -0.2719, 0.2524]) tensor([0.6884, 0.0561, 0.0950, 0.1605]) -Greedy action tensor([ 1.6726, -0.4230, -0.2408, 0.7831]) tensor([0.5947, 0.0732, 0.0878, 0.2443]) -Greedy action tensor([ 1.8060, -0.5905, -0.1896, 0.3249]) tensor([0.6876, 0.0626, 0.0935, 0.1563]) -Greedy action tensor([ 1.8205, -0.8303, -0.2383, 0.4502]) tensor([0.6886, 0.0486, 0.0879, 0.1749]) -Greedy action tensor([ 1.5009, -0.7657, -0.2761, 0.5525]) tensor([0.6024, 0.0624, 0.1019, 0.2333]) -Greedy action tensor([ 1.8338, -0.6332, -0.4824, 0.6122]) tensor([0.6765, 0.0574, 0.0667, 0.1994]) -Greedy action tensor([ 2.0603, -0.8611, -0.2433, 0.3052]) tensor([0.7538, 0.0406, 0.0753, 0.1303]) -Greedy action tensor([ 1.5749, 0.0023, -0.7804, 0.5800]) tensor([0.5980, 0.1241, 0.0567, 0.2211]) -Greedy action tensor([ 1.1588, -0.9501, -0.2601, 0.2750]) tensor([0.5629, 0.0683, 0.1362, 0.2326]) -Greedy action tensor([ 1.0096, -0.5136, -0.0692, 0.2772]) tensor([0.4905, 0.1069, 0.1668, 0.2358]) -Greedy action tensor([ 0.9837, -0.0279, -0.0941, 0.0136]) tensor([0.4801, 0.1746, 0.1634, 0.1820]) -Greedy action tensor([ 1.5042, 0.0573, -0.1522, 0.4827]) tensor([0.5598, 0.1317, 0.1068, 0.2016]) -Greedy action tensor([ 1.0342, -0.2775, -0.3489, 0.4745]) tensor([0.4781, 0.1288, 0.1199, 0.2732]) -Greedy action tensor([ 2.2064, -0.9065, -0.4296, 0.3698]) tensor([0.7840, 0.0349, 0.0562, 0.1249]) -Greedy action tensor([ 1.5653, -0.7515, -0.3605, 0.5268]) tensor([0.6257, 0.0617, 0.0912, 0.2215]) -Greedy action tensor([ 1.2608, -0.2438, -0.1233, -0.1243]) tensor([0.5804, 0.1289, 0.1454, 0.1453]) -Greedy action tensor([ 1.4227, -0.6375, -0.3147, 0.0937]) tensor([0.6377, 0.0813, 0.1122, 0.1688]) -Greedy action tensor([ 0.5626, -0.1472, -0.0513, -0.0526]) tensor([0.3886, 0.1911, 0.2103, 0.2100]) -Greedy action tensor([ 0.5363, -0.0054, -0.2560, -0.3878]) tensor([0.4113, 0.2393, 0.1862, 0.1632]) -Greedy action tensor([ 1.1113, -0.6340, -0.3347, -0.6647]) tensor([0.6331, 0.1105, 0.1491, 0.1072]) -Greedy action tensor([ 0.7266, -0.3142, 0.0504, -0.3026]) tensor([0.4507, 0.1592, 0.2292, 0.1610]) -Greedy action tensor([ 0.7941, -0.3169, -0.0011, -0.1130]) tensor([0.4578, 0.1507, 0.2067, 0.1848]) -Greedy action tensor([ 0.8929, -0.4615, -0.0613, -0.4768]) tensor([0.5270, 0.1360, 0.2030, 0.1340]) -Greedy action tensor([ 0.7676, 0.1138, 0.1047, -0.1632]) tensor([0.4116, 0.2140, 0.2121, 0.1623]) -Greedy action tensor([ 0.9970, -0.6536, -0.1979, -0.7524]) tensor([0.5993, 0.1150, 0.1814, 0.1042]) -Greedy action tensor([ 0.5484, -0.3126, -0.0320, -0.3285]) tensor([0.4169, 0.1762, 0.2333, 0.1735]) -Greedy action tensor([ 0.5151, -0.1169, -0.4271, -0.3760]) tensor([0.4289, 0.2280, 0.1672, 0.1760]) -Greedy action tensor([ 0.6711, -0.2596, -0.0324, -0.4175]) tensor([0.4493, 0.1771, 0.2223, 0.1513]) -Greedy action tensor([ 0.4172, 0.1412, -0.0684, -0.1978]) tensor([0.3431, 0.2603, 0.2111, 0.1855]) -Greedy action tensor([ 0.3381, 0.0724, -0.1076, -0.1104]) tensor([0.3283, 0.2517, 0.2103, 0.2097]) -Greedy action tensor([ 0.7833, -0.6001, -0.0581, -0.4560]) tensor([0.5073, 0.1272, 0.2187, 0.1469]) -Greedy action tensor([ 0.4381, 0.1671, -0.1134, -0.2257]) tensor([0.3504, 0.2673, 0.2019, 0.1804]) -Greedy action tensor([ 0.5196, 0.0710, -0.1592, -0.0718]) tensor([0.3705, 0.2366, 0.1879, 0.2051]) -Greedy action tensor([ 0.7398, -0.2456, -0.1789, -0.2389]) tensor([0.4655, 0.1738, 0.1858, 0.1750]) -Greedy action tensor([ 0.8014, -0.5727, -0.0333, -0.2320]) tensor([0.4895, 0.1239, 0.2125, 0.1742]) -Greedy action tensor([ 0.6379, -0.3490, -0.0033, -0.1220]) tensor([0.4224, 0.1575, 0.2225, 0.1976]) -Greedy action tensor([ 0.8017, -0.7505, -0.0082, -0.2864]) tensor([0.5016, 0.1062, 0.2232, 0.1690]) -Greedy action tensor([ 0.5272, -0.0121, 0.0516, -0.1602]) tensor([0.3693, 0.2154, 0.2295, 0.1857]) -Greedy action tensor([ 0.7127, -0.3449, 0.0218, -0.2048]) tensor([0.4449, 0.1545, 0.2229, 0.1777]) -Greedy action tensor([ 0.3663, -0.2189, -0.0110, -0.1530]) tensor([0.3524, 0.1963, 0.2416, 0.2097]) -Greedy action tensor([ 0.5042, -0.4228, -0.0597, -0.2219]) tensor([0.4084, 0.1616, 0.2324, 0.1976]) -Greedy action tensor([ 0.8605, -0.7395, -0.0638, -0.5765]) tensor([0.5446, 0.1099, 0.2161, 0.1294]) -Greedy action tensor([ 0.3166, -0.0490, -0.0257, -0.4711]) tensor([0.3498, 0.2427, 0.2484, 0.1591]) -Greedy action tensor([ 0.4208, -0.1634, 0.0167, -0.1977]) tensor([0.3618, 0.2017, 0.2415, 0.1949]) -Greedy action tensor([ 1.4153, -0.5962, -0.0309, -0.3983]) tensor([0.6526, 0.0873, 0.1537, 0.1064]) -Greedy action tensor([ 0.9560, -0.6265, -0.0870, -0.4162]) tensor([0.5520, 0.1134, 0.1945, 0.1400]) -Greedy action tensor([ 0.9698, -0.8498, -0.0965, -0.4903]) tensor([0.5752, 0.0932, 0.1980, 0.1336]) -Greedy action tensor([ 1.0523, -0.6789, 0.0057, -0.3709]) tensor([0.5653, 0.1001, 0.1985, 0.1362]) -Greedy action tensor([ 0.4945, -0.2160, -0.0399, -0.3355]) tensor([0.3979, 0.1955, 0.2332, 0.1735]) -Greedy action tensor([ 0.7110, -0.4514, -0.0886, -0.5296]) tensor([0.4875, 0.1524, 0.2191, 0.1410]) -Greedy action tensor([ 1.1259, -0.5451, -0.1351, -0.4055]) tensor([0.5925, 0.1114, 0.1679, 0.1281]) -Greedy action tensor([ 0.3677, -0.0260, -0.0220, -0.1583]) tensor([0.3398, 0.2292, 0.2301, 0.2008]) -Greedy action tensor([ 0.7111, -0.5883, -0.0042, -0.3097]) tensor([0.4712, 0.1285, 0.2305, 0.1698]) -Greedy action tensor([ 0.6041, -0.4301, 0.0215, -0.2561]) tensor([0.4279, 0.1521, 0.2390, 0.1810]) -Greedy action tensor([ 0.6445, -0.1403, 0.0065, -0.3357]) tensor([0.4238, 0.1933, 0.2239, 0.1590]) -Greedy action tensor([ 0.4757, -0.2281, -0.1071, -0.1372]) tensor([0.3854, 0.1907, 0.2152, 0.2088]) -Greedy action tensor([ 0.8686, -0.4499, -0.0768, -0.2768]) tensor([0.5065, 0.1355, 0.1968, 0.1611]) -Greedy action tensor([ 0.0947, 0.1983, -0.0426, -0.0204]) tensor([0.2583, 0.2864, 0.2251, 0.2302]) -Greedy action tensor([ 0.2083, -0.1260, -0.0980, -0.3703]) tensor([0.3319, 0.2376, 0.2444, 0.1861]) -Greedy action tensor([ 0.5822, -0.0510, 0.0223, -0.0953]) tensor([0.3831, 0.2034, 0.2189, 0.1946]) -Greedy action tensor([ 1.0059, -0.8408, 0.0594, -0.2654]) tensor([0.5475, 0.0864, 0.2125, 0.1536]) -Greedy action tensor([ 1.2226, -0.7117, -0.0066, -0.3620]) tensor([0.6090, 0.0880, 0.1781, 0.1249]) -Greedy action tensor([ 0.6937, -0.4924, -0.1318, -0.4967]) tensor([0.4884, 0.1492, 0.2139, 0.1485]) -Greedy action tensor([ 0.7068, -0.4207, 0.0447, -0.8347]) tensor([0.4869, 0.1577, 0.2512, 0.1042]) -Greedy action tensor([ 0.9421, -0.5295, 0.0979, -0.1212]) tensor([0.4988, 0.1145, 0.2144, 0.1723]) -Greedy action tensor([ 0.9187, -0.4122, -0.1623, -0.4360]) tensor([0.5372, 0.1419, 0.1822, 0.1386]) -Greedy action tensor([ 0.5262, 0.0454, -0.2207, -0.2718]) tensor([0.3933, 0.2432, 0.1864, 0.1771]) -Greedy action tensor([ 0.4341, -0.1409, -0.0217, -0.1114]) tensor([0.3602, 0.2027, 0.2283, 0.2088]) -Greedy action tensor([ 0.6803, -0.5723, -0.1530, -0.5845]) tensor([0.4993, 0.1427, 0.2170, 0.1410]) -Greedy action tensor([ 0.3176, 0.4935, -0.0187, -0.4177]) tensor([0.2953, 0.3521, 0.2110, 0.1416]) -Greedy action tensor([ 1.0523, -1.0147, 0.0886, -0.6111]) tensor([0.5891, 0.0746, 0.2247, 0.1116]) -Greedy action tensor([ 1.0251, -0.6030, -0.0562, -0.6042]) tensor([0.5775, 0.1134, 0.1959, 0.1132]) -Greedy action tensor([ 0.3622, -0.0049, -0.0431, -0.0262]) tensor([0.3292, 0.2280, 0.2195, 0.2232]) -Greedy action tensor([ 0.8218, -0.6841, -0.1662, -0.4070]) tensor([0.5300, 0.1176, 0.1973, 0.1551]) -Greedy action tensor([ 0.8390, -0.2514, 0.0218, -0.4601]) tensor([0.4877, 0.1639, 0.2154, 0.1330]) -Greedy action tensor([ 0.8070, -0.4392, -0.1667, -0.3705]) tensor([0.5068, 0.1457, 0.1914, 0.1561]) -Greedy action tensor([ 0.8932, -0.4611, 0.0598, -0.2300]) tensor([0.4955, 0.1279, 0.2153, 0.1612]) -Greedy action tensor([ 1.0799, -0.5477, -0.1369, -0.4614]) tensor([0.5859, 0.1151, 0.1735, 0.1255]) -Greedy action tensor([ 0.5646, 0.1562, -0.0377, -0.0368]) tensor([0.3623, 0.2408, 0.1984, 0.1985]) -Greedy action tensor([ 0.6684, -0.4543, -0.0936, -0.2638]) tensor([0.4575, 0.1489, 0.2135, 0.1801]) -Greedy action tensor([ 0.4580, 0.0672, -0.1635, -0.2926]) tensor([0.3723, 0.2519, 0.2000, 0.1758]) -Greedy action tensor([ 0.6528, -0.2918, -0.0027, -0.1914]) tensor([0.4277, 0.1663, 0.2221, 0.1839]) -Greedy action tensor([ 1.0566, -0.3539, 0.1321, -0.5909]) tensor([0.5455, 0.1331, 0.2164, 0.1050]) -Greedy action tensor([ 0.6168, -0.4106, 0.0340, -0.4228]) tensor([0.4406, 0.1577, 0.2460, 0.1558]) -Greedy action tensor([ 0.0881, 0.3976, -0.0986, -0.3755]) tensor([0.2617, 0.3566, 0.2171, 0.1646]) -Greedy action tensor([ 0.4326, 0.0629, -0.0038, -0.0525]) tensor([0.3386, 0.2340, 0.2189, 0.2085]) -Greedy action tensor([ 0.8155, -0.5141, -0.1216, -0.2397]) tensor([0.4989, 0.1320, 0.1954, 0.1737]) -Greedy action tensor([ 0.7756, -0.3716, -0.0489, -0.3545]) tensor([0.4810, 0.1527, 0.2109, 0.1554]) -Greedy action tensor([ 0.6941, -0.2028, 0.2379, -0.3459]) tensor([0.4175, 0.1703, 0.2646, 0.1476]) -Greedy action tensor([ 1.0112, -0.7638, 0.0241, -0.6673]) tensor([0.5784, 0.0980, 0.2156, 0.1080]) -Greedy action tensor([ 0.7387, -0.1168, 0.0078, -0.4278]) tensor([0.4509, 0.1916, 0.2171, 0.1404]) -Greedy action tensor([ 0.6637, -0.7177, -0.0183, -0.2055]) tensor([0.4595, 0.1154, 0.2323, 0.1927]) -Greedy action tensor([ 1.1292, -0.8105, 0.0150, -0.5312]) tensor([0.6017, 0.0865, 0.1975, 0.1144]) -Greedy action tensor([ 0.4177, 0.1214, -0.0096, -0.0686]) tensor([0.3321, 0.2470, 0.2166, 0.2042]) -Greedy action tensor([ 0.4509, -0.2123, 0.0236, -0.3905]) tensor([0.3848, 0.1983, 0.2510, 0.1659]) -Greedy action tensor([ 0.4209, -0.2901, -0.0896, -0.2961]) tensor([0.3877, 0.1904, 0.2327, 0.1893]) -Greedy action tensor([ 0.6824, -0.1018, -0.0423, -0.5097]) tensor([0.4455, 0.2034, 0.2158, 0.1353]) -Greedy action tensor([ 0.6554, -0.4012, -0.0855, -0.3563]) tensor([0.4571, 0.1589, 0.2179, 0.1662]) -Greedy action tensor([-0.5186, -0.7768, 0.0817, -0.3765]) tensor([0.2106, 0.1627, 0.3839, 0.2428]) -Greedy action tensor([1.1712, 0.1567, 0.6915, 0.0085]) tensor([0.4359, 0.1580, 0.2698, 0.1363]) -Greedy action tensor([-0.4420, -1.6554, 0.2909, 0.6023]) tensor([0.1608, 0.0478, 0.3346, 0.4568]) -Greedy action tensor([-0.6334, -2.2735, -0.1198, 0.1724]) tensor([0.1959, 0.0380, 0.3275, 0.4386]) -Greedy action tensor([-0.1255, -0.8785, 0.3612, 0.2585]) tensor([0.2190, 0.1031, 0.3563, 0.3215]) -Greedy action tensor([0.1533, 0.2000, 0.3552, 0.2283]) tensor([0.2299, 0.2409, 0.2814, 0.2478]) -Greedy action tensor([-0.6654, -1.2138, 0.7832, -0.2419]) tensor([0.1358, 0.0785, 0.5782, 0.2074]) -Greedy action tensor([ 0.5140, -0.4798, -0.4519, 0.6784]) tensor([0.3414, 0.1264, 0.1299, 0.4024]) -Greedy action tensor([-0.2045, -0.8533, 0.0099, -0.2070]) tensor([0.2660, 0.1390, 0.3296, 0.2654]) -Greedy action tensor([-0.0293, -0.3048, 0.2879, -0.6465]) tensor([0.2723, 0.2068, 0.3740, 0.1469]) -Greedy action tensor([-0.2419, 1.0342, -0.6758, -0.2483]) tensor([0.1607, 0.5756, 0.1041, 0.1596]) -Greedy action tensor([-0.2106, -0.5431, 1.5946, -0.6795]) tensor([0.1187, 0.0851, 0.7219, 0.0743]) -Greedy action tensor([-0.0836, -1.9074, -0.3210, 0.6970]) tensor([0.2420, 0.0391, 0.1908, 0.5282]) -Greedy action tensor([1.3265, 0.0823, 1.4090, 0.0896]) tensor([0.3753, 0.1082, 0.4076, 0.1089]) -Greedy action tensor([-0.2332, -0.6491, -0.6718, -0.2204]) tensor([0.3014, 0.1989, 0.1944, 0.3053]) -Greedy action tensor([ 0.2158, 0.6060, 0.6329, -0.6924]) tensor([0.2274, 0.3359, 0.3451, 0.0917]) -Greedy action tensor([-0.5181, 0.6586, 0.1935, 0.2603]) tensor([0.1182, 0.3835, 0.2408, 0.2575]) -Greedy action tensor([-1.5425, -0.5559, 0.4995, -0.3288]) tensor([0.0678, 0.1818, 0.5223, 0.2281]) -Greedy action tensor([ 0.0945, 0.5688, 0.1462, -0.0602]) tensor([0.2214, 0.3558, 0.2331, 0.1897]) -Greedy action tensor([1.0384, 0.5810, 0.0369, 1.3568]) tensor([0.2963, 0.1875, 0.1088, 0.4074]) -Greedy action tensor([ 0.8312, -0.1358, 1.1136, 0.2476]) tensor([0.3063, 0.1165, 0.4063, 0.1709]) -Greedy action tensor([ 0.9287, -0.5790, 0.3964, 0.6549]) tensor([0.3892, 0.0862, 0.2286, 0.2960]) -Greedy action tensor([-1.1826, -1.1965, -0.5412, -0.6954]) tensor([0.1814, 0.1789, 0.3445, 0.2952]) -Greedy action tensor([ 0.9205, -1.0225, 0.0417, -0.1658]) tensor([0.5274, 0.0756, 0.2190, 0.1780]) -Greedy action tensor([1.7697, 0.0493, 0.9332, 0.4454]) tensor([0.5324, 0.0953, 0.2307, 0.1416]) -Greedy action tensor([-0.9988, -0.5443, 0.4706, -0.0936]) tensor([0.1064, 0.1677, 0.4627, 0.2632]) -Greedy action tensor([-1.5724, -0.2456, -0.0122, 0.5913]) tensor([0.0548, 0.2067, 0.2611, 0.4774]) -Greedy action tensor([-0.5635, -0.7493, 0.3138, -0.9242]) tensor([0.2028, 0.1684, 0.4875, 0.1414]) -Greedy action tensor([1.5080, 0.3516, 0.9469, 0.6000]) tensor([0.4370, 0.1375, 0.2493, 0.1762]) -Greedy action tensor([ 0.8187, -1.1402, -0.4473, -0.8935]) tensor([0.6237, 0.0879, 0.1758, 0.1126]) -Greedy action tensor([ 1.0944, -0.9124, 0.2450, 0.2765]) tensor([0.4991, 0.0671, 0.2135, 0.2203]) -Greedy action tensor([-0.2054, 0.4448, 0.0881, -0.8122]) tensor([0.2082, 0.3990, 0.2793, 0.1135]) -Greedy action tensor([-0.8939, -0.4570, -1.1801, -0.3328]) tensor([0.1980, 0.3064, 0.1487, 0.3469]) -Greedy action tensor([ 0.9424, -0.1882, -0.1264, 0.3366]) tensor([0.4521, 0.1460, 0.1553, 0.2467]) -Greedy action tensor([-0.5945, -0.2243, -0.6177, 0.2985]) tensor([0.1704, 0.2468, 0.1665, 0.4163]) -Greedy action tensor([ 0.2628, 0.6476, 0.4519, -0.1780]) tensor([0.2314, 0.3400, 0.2796, 0.1489]) -Greedy action tensor([ 0.1352, 0.6428, 0.6301, -0.9672]) tensor([0.2158, 0.3585, 0.3540, 0.0717]) -Greedy action tensor([-0.6963, -1.0793, -0.1119, -0.4252]) tensor([0.2089, 0.1424, 0.3747, 0.2739]) -Greedy action tensor([-0.1907, -0.3418, 0.6525, -0.1110]) tensor([0.1899, 0.1633, 0.4412, 0.2056]) -Greedy action tensor([ 0.4624, -0.9988, -0.5394, 0.4793]) tensor([0.3822, 0.0887, 0.1404, 0.3887]) -Greedy action tensor([ 0.3665, -1.8710, 0.5273, 0.9115]) tensor([0.2496, 0.0266, 0.2932, 0.4305]) -Greedy action tensor([ 0.6039, -2.1638, 0.1608, 0.9030]) tensor([0.3275, 0.0206, 0.2103, 0.4417]) -Greedy action tensor([ 0.3757, -0.2136, 1.1860, 1.2736]) tensor([0.1598, 0.0886, 0.3593, 0.3922]) -Greedy action tensor([-0.9101, -0.8247, -0.3625, -0.3229]) tensor([0.1780, 0.1939, 0.3078, 0.3202]) -Greedy action tensor([ 1.4784, -0.4134, 0.2820, 0.6268]) tensor([0.5320, 0.0802, 0.1608, 0.2270]) -Greedy action tensor([ 0.4269, -1.1550, 0.3193, 0.0012]) tensor([0.3627, 0.0746, 0.3257, 0.2370]) -Greedy action tensor([ 0.1091, -1.5973, 0.1822, 0.3961]) tensor([0.2786, 0.0506, 0.2997, 0.3712]) -Greedy action tensor([ 1.3277, -0.8149, 0.5967, 0.2658]) tensor([0.5143, 0.0603, 0.2476, 0.1778]) -Greedy action tensor([ 0.0923, -1.4139, 1.3511, -0.3428]) tensor([0.1855, 0.0411, 0.6533, 0.1201]) -Greedy action tensor([ 0.5220, -1.0306, 0.3500, 2.0939]) tensor([0.1456, 0.0308, 0.1226, 0.7011]) -Greedy action tensor([-1.4211, 1.3600, 0.1061, -1.1154]) tensor([0.0433, 0.6986, 0.1994, 0.0588]) -Greedy action tensor([ 1.1915, -0.3921, 0.7115, 0.2466]) tensor([0.4519, 0.0927, 0.2797, 0.1757]) -Greedy action tensor([-0.0975, -0.6427, 0.1153, 0.5942]) tensor([0.2077, 0.1204, 0.2570, 0.4148]) -Greedy action tensor([-0.3987, -0.3466, -0.5547, 0.1505]) tensor([0.2155, 0.2270, 0.1844, 0.3732]) -Greedy action tensor([-1.5629, -0.4647, 1.5592, -0.1634]) tensor([0.0325, 0.0975, 0.7381, 0.1318]) -Greedy action tensor([ 1.0465, -0.4159, 0.9033, 1.6284]) tensor([0.2572, 0.0596, 0.2229, 0.4603]) -Greedy action tensor([ 1.2819, -1.2739, 1.4665, 0.2501]) tensor([0.3793, 0.0294, 0.4561, 0.1352]) -Greedy action tensor([-0.8550, -0.7902, -0.8032, -0.3498]) tensor([0.2093, 0.2233, 0.2204, 0.3469]) -Greedy action tensor([-0.8630, -0.1357, -0.8436, -0.2030]) tensor([0.1660, 0.3435, 0.1693, 0.3212]) -Greedy action tensor([ 0.7958, -0.8983, -0.4061, 2.0013]) tensor([0.2073, 0.0381, 0.0623, 0.6922]) -Greedy action tensor([-0.1026, -0.9664, 1.3672, 0.3123]) tensor([0.1373, 0.0579, 0.5970, 0.2079]) -Greedy action tensor([ 0.8431, -0.3343, -0.4120, 0.4637]) tensor([0.4391, 0.1353, 0.1252, 0.3005]) -Greedy action tensor([-0.0975, -1.0759, 0.5141, -0.5676]) tensor([0.2601, 0.0978, 0.4795, 0.1626]) -Greedy action tensor([-0.8220, 0.1211, -0.1921, 0.8836]) tensor([0.0913, 0.2345, 0.1714, 0.5027]) -Greedy action tensor([ 1.2291, -1.4130, 2.0137, -0.0762]) tensor([0.2830, 0.0202, 0.6201, 0.0767]) -Greedy action tensor([ 0.8554, -1.1878, 0.7680, 0.1565]) tensor([0.3932, 0.0510, 0.3603, 0.1955]) -Greedy action tensor([-0.9310, 0.1581, -0.5051, -0.0139]) tensor([0.1249, 0.3712, 0.1913, 0.3126]) -Greedy action tensor([ 0.3872, 0.3855, 0.2448, -0.2167]) tensor([0.2931, 0.2926, 0.2542, 0.1602]) -Greedy action tensor([ 0.9639, -0.1113, 1.6369, -0.6359]) tensor([0.2854, 0.0974, 0.5595, 0.0576]) -Greedy action tensor([-0.0975, -0.9798, 0.3567, -0.5806]) tensor([0.2773, 0.1148, 0.4368, 0.1711]) -Greedy action tensor([ 1.0702, -1.1880, 1.1573, 1.7487]) tensor([0.2400, 0.0251, 0.2618, 0.4731]) -Greedy action tensor([-0.4894, -2.7625, -0.3566, 0.2802]) tensor([0.2271, 0.0234, 0.2593, 0.4902]) -Greedy action tensor([ 0.8783, -0.2910, 0.9693, 0.5962]) tensor([0.3165, 0.0983, 0.3466, 0.2387]) -Greedy action tensor([ 0.3468, -0.6090, 1.1636, 0.0041]) tensor([0.2295, 0.0882, 0.5194, 0.1629]) -Greedy action tensor([ 0.5683, -0.4411, 0.0415, 0.1009]) tensor([0.3874, 0.1412, 0.2287, 0.2427]) -Greedy action tensor([ 2.0259, -1.2486, 0.8214, -0.0529]) tensor([0.6836, 0.0259, 0.2050, 0.0855]) -Greedy action tensor([ 1.0139, -0.0319, 0.2062, 0.2056]) tensor([0.4458, 0.1567, 0.1988, 0.1987]) -Greedy action tensor([ 1.7148, -1.1589, 0.8443, 1.7187]) tensor([0.4034, 0.0228, 0.1689, 0.4049]) -Greedy action tensor([ 1.1849, -0.5255, 0.5406, 0.5948]) tensor([0.4425, 0.0800, 0.2323, 0.2452]) -Greedy action tensor([ 0.3562, -0.7816, -0.6071, 0.8177]) tensor([0.3041, 0.0975, 0.1160, 0.4824]) -Greedy action tensor([ 0.8991, -0.1578, 0.2397, 0.0719]) tensor([0.4344, 0.1510, 0.2247, 0.1900]) -Greedy action tensor([ 1.2714, -0.3532, 0.8118, 0.3542]) tensor([0.4488, 0.0884, 0.2834, 0.1794]) -Greedy action tensor([-1.9336, -0.4535, 0.6675, -0.1726]) tensor([0.0405, 0.1779, 0.5459, 0.2357]) -Greedy action tensor([-0.8590, -0.3499, 0.3779, -0.2718]) tensor([0.1265, 0.2104, 0.4356, 0.2275]) -Greedy action tensor([-1.7981, -0.4169, 0.6324, -0.0488]) tensor([0.0453, 0.1801, 0.5143, 0.2603]) -Greedy action tensor([-1.8782, -0.4580, 0.6300, -0.1415]) tensor([0.0433, 0.1791, 0.5317, 0.2458]) -Greedy action tensor([-1.2735, 0.5763, 0.1480, 0.2783]) tensor([0.0616, 0.3920, 0.2554, 0.2910]) -Greedy action tensor([-1.9284, -0.4307, 0.6575, -0.1706]) tensor([0.0407, 0.1822, 0.5408, 0.2363]) -Greedy action tensor([-1.2017, -0.6653, 0.8967, 1.1086]) tensor([0.0478, 0.0817, 0.3894, 0.4812]) -Greedy action tensor([0.2146, 1.1107, 0.0162, 0.7807]) tensor([0.1658, 0.4062, 0.1360, 0.2920]) -Greedy action tensor([-1.6182, 0.2999, 0.4242, 0.0171]) tensor([0.0484, 0.3297, 0.3734, 0.2485]) -Greedy action tensor([-1.6409, -0.1590, 0.4808, -0.0805]) tensor([0.0540, 0.2378, 0.4509, 0.2572]) -Greedy action tensor([-1.9333, -0.4381, 0.6612, -0.1741]) tensor([0.0406, 0.1809, 0.5430, 0.2355]) -Greedy action tensor([-0.3909, 0.1386, 0.7539, 1.4669]) tensor([0.0816, 0.1386, 0.2565, 0.5232]) -Greedy action tensor([-1.6797, -0.3419, 0.5293, -0.0991]) tensor([0.0533, 0.2030, 0.4850, 0.2587]) -Greedy action tensor([-1.3131, -0.6113, 0.4069, 0.1973]) tensor([0.0762, 0.1536, 0.4253, 0.3449]) -Greedy action tensor([-1.6822, -0.1244, 0.4944, -0.0628]) tensor([0.0510, 0.2421, 0.4495, 0.2575]) -Greedy action tensor([-1.9004, -0.4971, 0.7477, -0.1034]) tensor([0.0396, 0.1613, 0.5600, 0.2391]) -Greedy action tensor([-1.6696, -0.2178, 0.5629, 0.1234]) tensor([0.0485, 0.2073, 0.4526, 0.2916]) -Greedy action tensor([-0.8560, -0.1666, 0.0487, 0.6621]) tensor([0.0997, 0.1987, 0.2464, 0.4551]) -Greedy action tensor([-1.9019, -0.4500, 0.6518, -0.1435]) tensor([0.0418, 0.1785, 0.5372, 0.2425]) -Greedy action tensor([-1.9120, -0.4652, 0.6446, -0.1683]) tensor([0.0419, 0.1781, 0.5403, 0.2397]) -Greedy action tensor([-1.3617, -0.5066, 0.4451, 0.2512]) tensor([0.0692, 0.1626, 0.4212, 0.3470]) -Greedy action tensor([-1.9082, -0.3725, 0.6347, -0.1666]) tensor([0.0415, 0.1930, 0.5284, 0.2371]) -Greedy action tensor([-1.9452, -0.4473, 0.6664, -0.1814]) tensor([0.0401, 0.1794, 0.5464, 0.2341]) -Greedy action tensor([-1.9041, -0.4555, 0.6512, -0.1596]) tensor([0.0419, 0.1785, 0.5397, 0.2399]) -Greedy action tensor([-1.7704, 0.0709, 0.5022, -0.0475]) tensor([0.0442, 0.2788, 0.4292, 0.2477]) -Greedy action tensor([-1.7637, -0.3106, 0.5814, -0.0497]) tensor([0.0470, 0.2011, 0.4908, 0.2611]) -Greedy action tensor([-1.7215, -0.4758, 0.7357, -0.1906]) tensor([0.0481, 0.1673, 0.5620, 0.2226]) -Greedy action tensor([-1.5222, -0.2024, 0.6309, 0.1386]) tensor([0.0537, 0.2010, 0.4626, 0.2827]) -Greedy action tensor([-1.4859, -0.6043, 0.4386, 0.1152]) tensor([0.0657, 0.1586, 0.4500, 0.3257]) -Greedy action tensor([-1.6763, 0.1721, 0.4657, -0.0726]) tensor([0.0480, 0.3047, 0.4087, 0.2386]) -Greedy action tensor([-1.5662, 0.3862, 0.4012, -0.1481]) tensor([0.0517, 0.3646, 0.3700, 0.2137]) -Greedy action tensor([-1.2376, 0.5766, 0.1612, 0.1803]) tensor([0.0653, 0.4007, 0.2645, 0.2696]) -Greedy action tensor([-1.8433, -0.4179, 0.6775, -0.0553]) tensor([0.0424, 0.1764, 0.5276, 0.2535]) -Greedy action tensor([-1.5232, -0.6513, 0.4523, 0.0082]) tensor([0.0657, 0.1571, 0.4735, 0.3037]) -Greedy action tensor([-1.6685, -0.5304, 0.5338, -0.0885]) tensor([0.0555, 0.1732, 0.5019, 0.2694]) -Greedy action tensor([-1.7553, -0.2749, 0.5493, -0.0934]) tensor([0.0483, 0.2125, 0.4844, 0.2548]) -Greedy action tensor([-1.2765, -0.5781, 0.3162, 0.2220]) tensor([0.0806, 0.1621, 0.3965, 0.3608]) -Greedy action tensor([-1.8615, -0.3250, 0.6056, -0.1283]) tensor([0.0433, 0.2013, 0.5104, 0.2450]) -Greedy action tensor([-1.7998, -0.4935, 0.6276, -0.0398]) tensor([0.0458, 0.1691, 0.5189, 0.2662]) -Greedy action tensor([-1.9303, -0.4018, 0.6501, -0.1768]) tensor([0.0407, 0.1875, 0.5369, 0.2349]) -Greedy action tensor([-1.0417, -0.6480, 0.1945, 0.3514]) tensor([0.1005, 0.1490, 0.3459, 0.4047]) -Greedy action tensor([-1.1520, -0.6843, 1.1948, 1.3359]) tensor([0.0399, 0.0636, 0.4167, 0.4798]) -Greedy action tensor([-1.7545, -0.4806, 0.5527, -0.0927]) tensor([0.0503, 0.1797, 0.5051, 0.2649]) -Greedy action tensor([-1.8124, -0.4862, 0.6003, -0.0780]) tensor([0.0463, 0.1744, 0.5170, 0.2623]) -Greedy action tensor([-1.8808, -0.4300, 0.6332, -0.1376]) tensor([0.0429, 0.1828, 0.5294, 0.2449]) -Greedy action tensor([-1.8470, -0.3380, 0.6082, -0.1170]) tensor([0.0438, 0.1982, 0.5106, 0.2473]) -Greedy action tensor([-1.6945, -0.4837, 0.5169, 0.0042]) tensor([0.0528, 0.1771, 0.4817, 0.2885]) -Greedy action tensor([-1.3800, -0.5927, 0.3428, 0.1903]) tensor([0.0735, 0.1615, 0.4116, 0.3534]) -Greedy action tensor([-1.2780, 0.7078, 0.2204, 0.0815]) tensor([0.0600, 0.4374, 0.2687, 0.2338]) -Greedy action tensor([-1.6799, -0.4862, 0.5596, 0.0149]) tensor([0.0523, 0.1724, 0.4907, 0.2846]) -Greedy action tensor([-1.8923, -0.3973, 0.6324, -0.1576]) tensor([0.0423, 0.1888, 0.5288, 0.2400]) -Greedy action tensor([-1.1213, 0.8403, 0.1582, 0.1605]) tensor([0.0653, 0.4645, 0.2348, 0.2354]) -Greedy action tensor([-1.8929, -0.4549, 0.6369, -0.1496]) tensor([0.0426, 0.1794, 0.5346, 0.2435]) -Greedy action tensor([-1.8476, -0.2933, 0.6004, -0.1148]) tensor([0.0436, 0.2061, 0.5039, 0.2464]) -Greedy action tensor([-1.7362, 0.1528, 0.4664, -0.0314]) tensor([0.0451, 0.2984, 0.4083, 0.2482]) -Greedy action tensor([-0.7666, 0.3599, 0.6423, 1.1340]) tensor([0.0673, 0.2075, 0.2752, 0.4500]) -Greedy action tensor([-1.8900, -0.3468, 0.6244, -0.1433]) tensor([0.0421, 0.1968, 0.5198, 0.2413]) -Greedy action tensor([-1.9372, -0.4440, 0.6660, -0.1720]) tensor([0.0403, 0.1795, 0.5446, 0.2356]) -Greedy action tensor([-1.6682, -0.5410, 0.5352, -0.0865]) tensor([0.0555, 0.1714, 0.5029, 0.2701]) -Greedy action tensor([-1.7082, -0.3857, 0.6651, 0.0408]) tensor([0.0471, 0.1767, 0.5054, 0.2707]) -Greedy action tensor([-1.8444, -0.1490, 0.5737, -0.0881]) tensor([0.0426, 0.2322, 0.4784, 0.2468]) -Greedy action tensor([-1.8324, -0.4049, 0.6677, -0.0926]) tensor([0.0434, 0.1808, 0.5286, 0.2471]) -Greedy action tensor([-1.5090, 0.3191, 0.4229, 0.1858]) tensor([0.0511, 0.3179, 0.3527, 0.2783]) -Greedy action tensor([-1.9441, -0.4507, 0.6648, -0.1810]) tensor([0.0402, 0.1790, 0.5463, 0.2345]) -Greedy action tensor([-1.4596, 0.6144, 0.1417, 0.0356]) tensor([0.0544, 0.4330, 0.2699, 0.2427]) -Greedy action tensor([-0.9980, -0.2714, 1.0481, 1.2841]) tensor([0.0485, 0.1004, 0.3755, 0.4755]) -Greedy action tensor([-1.5539, 0.1609, 0.4168, -0.0260]) tensor([0.0545, 0.3029, 0.3913, 0.2513]) -Greedy action tensor([-1.7119, -0.5152, 0.6083, 0.0452]) tensor([0.0493, 0.1632, 0.5018, 0.2857]) -Greedy action tensor([-1.9069, -0.3455, 0.6312, -0.1507]) tensor([0.0413, 0.1968, 0.5227, 0.2391]) -Greedy action tensor([-1.8482, -0.4207, 0.6550, -0.1359]) tensor([0.0436, 0.1818, 0.5330, 0.2417]) -Greedy action tensor([-1.8259, -0.5023, 0.7587, 0.0250]) tensor([0.0410, 0.1541, 0.5438, 0.2611]) -Greedy action tensor([-1.8400, -0.4711, 0.6292, -0.1074]) tensor([0.0446, 0.1755, 0.5274, 0.2525]) -Greedy action tensor([-1.8254, -0.2633, 0.5822, -0.1211]) tensor([0.0447, 0.2132, 0.4964, 0.2457]) -Greedy action tensor([-0.9178, -0.4543, 0.3223, -0.1385]) tensor([0.1216, 0.1932, 0.4202, 0.2650]) -Greedy action tensor([-1.8766, -0.4641, 0.6726, -0.0847]) tensor([0.0418, 0.1718, 0.5353, 0.2510]) -Greedy action tensor([-1.9226, -0.4600, 0.6544, -0.1714]) tensor([0.0413, 0.1781, 0.5429, 0.2377]) -Greedy action tensor([-1.8071, -0.4662, 0.6307, -0.1007]) tensor([0.0459, 0.1755, 0.5256, 0.2529]) -Greedy action tensor([-1.8610, -0.4603, 0.6213, -0.1385]) tensor([0.0442, 0.1794, 0.5290, 0.2474]) -Greedy action tensor([-1.6123, -0.4346, 0.6206, 0.2037]) tensor([0.0507, 0.1646, 0.4729, 0.3117]) -Greedy action tensor([-1.8701, -0.4453, 0.6365, -0.1405]) tensor([0.0434, 0.1803, 0.5318, 0.2445]) -Greedy action tensor([-1.6161, -0.5131, 0.6470, 0.3183]) tensor([0.0487, 0.1467, 0.4679, 0.3368]) -Greedy action tensor([ 1.1900, -0.5016, -0.3122, 0.0117]) tensor([0.5832, 0.1074, 0.1298, 0.1795]) -Greedy action tensor([ 1.6068e+00, -5.2029e-04, -2.6114e-01, 4.3197e-01]) tensor([0.6011, 0.1205, 0.0928, 0.1856]) -Greedy action tensor([ 1.4026, -0.8099, -0.1307, 0.6567]) tensor([0.5557, 0.0608, 0.1199, 0.2636]) -Greedy action tensor([ 1.6601, -0.5821, -0.3073, -0.0804]) tensor([0.7035, 0.0747, 0.0984, 0.1234]) -Greedy action tensor([ 1.6394, -0.6978, -0.1349, 0.3408]) tensor([0.6497, 0.0628, 0.1102, 0.1773]) -Greedy action tensor([ 0.9221, -0.2602, -0.4425, 0.4019]) tensor([0.4637, 0.1422, 0.1185, 0.2756]) -Greedy action tensor([ 0.8300, -0.1873, -0.1090, -0.0788]) tensor([0.4639, 0.1677, 0.1814, 0.1870]) -Greedy action tensor([ 0.7711, -0.2181, -0.0758, -0.0882]) tensor([0.4496, 0.1672, 0.1928, 0.1904]) -Greedy action tensor([ 1.8087, -0.9058, -0.0562, 1.0621]) tensor([0.5899, 0.0391, 0.0914, 0.2796]) -Greedy action tensor([ 1.5733, 0.0226, -0.4731, 0.6012]) tensor([0.5815, 0.1233, 0.0751, 0.2200]) -Greedy action tensor([ 1.1886, 0.0980, -0.6164, 0.1943]) tensor([0.5346, 0.1796, 0.0879, 0.1978]) -Greedy action tensor([ 1.9559, 0.1471, -0.3989, 0.7343]) tensor([0.6437, 0.1055, 0.0611, 0.1897]) -Greedy action tensor([ 2.3853, -0.6912, -0.1665, 0.0767]) tensor([0.8174, 0.0377, 0.0637, 0.0812]) -Greedy action tensor([ 1.3292, -0.2107, -0.3068, -0.2075]) tensor([0.6157, 0.1320, 0.1199, 0.1324]) -Greedy action tensor([ 1.4898, -0.1858, -0.1215, -0.0025]) tensor([0.6205, 0.1161, 0.1239, 0.1395]) -Greedy action tensor([ 2.2362, -1.0023, -0.3399, 0.5275]) tensor([0.7714, 0.0303, 0.0587, 0.1397]) -Greedy action tensor([ 1.4409, -0.3371, -0.4006, 0.5417]) tensor([0.5766, 0.0974, 0.0914, 0.2346]) -Greedy action tensor([ 1.4271, 0.0394, 0.1441, -0.3796]) tensor([0.5914, 0.1476, 0.1639, 0.0971]) -Greedy action tensor([ 1.7044, -0.8817, 0.0744, 0.1302]) tensor([0.6764, 0.0509, 0.1325, 0.1401]) -Greedy action tensor([ 1.4176, -0.0626, -0.0222, -0.0626]) tensor([0.5909, 0.1345, 0.1400, 0.1345]) -Greedy action tensor([ 2.0700, -0.1028, -0.1593, 0.8061]) tensor([0.6649, 0.0757, 0.0715, 0.1879]) -Greedy action tensor([ 1.8852, -0.3021, -0.4901, 0.2631]) tensor([0.7129, 0.0800, 0.0663, 0.1408]) -Greedy action tensor([ 1.1395, -0.5040, -0.3416, 0.4399]) tensor([0.5215, 0.1008, 0.1186, 0.2591]) -Greedy action tensor([ 2.0208, -0.9411, 0.0046, 0.3194]) tensor([0.7314, 0.0378, 0.0974, 0.1334]) -Greedy action tensor([ 1.2464, -0.4825, -0.0543, 0.2501]) tensor([0.5497, 0.0976, 0.1497, 0.2030]) -Greedy action tensor([ 2.0677, -0.2729, -1.1074, 0.3232]) tensor([0.7617, 0.0733, 0.0318, 0.1331]) -Greedy action tensor([ 1.5181, 0.3536, 0.2988, -0.5043]) tensor([0.5748, 0.1794, 0.1698, 0.0761]) -Greedy action tensor([ 1.0638, -0.6361, -0.4839, 0.6678]) tensor([0.4834, 0.0883, 0.1028, 0.3254]) -Greedy action tensor([ 1.0985, -0.5948, 0.2048, -0.0947]) tensor([0.5273, 0.0970, 0.2158, 0.1599]) -Greedy action tensor([ 1.6419, -0.3177, -0.3054, 0.3288]) tensor([0.6441, 0.0908, 0.0919, 0.1732]) -Greedy action tensor([ 1.8725, -0.5456, -0.5737, 0.2864]) tensor([0.7244, 0.0645, 0.0628, 0.1483]) -Greedy action tensor([ 1.3079, -0.3051, -0.2727, -0.0018]) tensor([0.5970, 0.1190, 0.1229, 0.1611]) -Greedy action tensor([ 2.2444, -1.0776, -0.2052, 0.9160]) tensor([0.7208, 0.0260, 0.0622, 0.1909]) -Greedy action tensor([ 2.0766, -0.7493, -0.5071, 0.3479]) tensor([0.7620, 0.0452, 0.0575, 0.1353]) -Greedy action tensor([ 1.2467, -0.4366, -0.3065, 0.1925]) tensor([0.5728, 0.1064, 0.1212, 0.1996]) -Greedy action tensor([ 1.0635, -0.2202, -0.4107, 0.1557]) tensor([0.5237, 0.1451, 0.1199, 0.2113]) -Greedy action tensor([ 1.7163, -0.3600, -0.4830, 0.2602]) tensor([0.6805, 0.0853, 0.0755, 0.1587]) -Greedy action tensor([ 2.0781, -1.0973, -0.2566, 0.3277]) tensor([0.7620, 0.0318, 0.0738, 0.1324]) -Greedy action tensor([ 1.1070, -0.4190, -0.1820, 0.0254]) tensor([0.5459, 0.1187, 0.1504, 0.1851]) -Greedy action tensor([ 1.2802, -0.2683, -0.4770, 0.5419]) tensor([0.5368, 0.1141, 0.0926, 0.2565]) -Greedy action tensor([ 1.7683, -0.9140, -0.1197, 0.6907]) tensor([0.6409, 0.0438, 0.0970, 0.2182]) -Greedy action tensor([ 0.7743, -0.6731, -0.4997, 0.3644]) tensor([0.4590, 0.1080, 0.1284, 0.3046]) -Greedy action tensor([ 0.9795, -0.1292, -0.2898, 0.3080]) tensor([0.4713, 0.1555, 0.1324, 0.2408]) -Greedy action tensor([ 1.2995, -0.5210, -0.2576, 0.0140]) tensor([0.6064, 0.0982, 0.1278, 0.1677]) -Greedy action tensor([ 2.2503, -0.8845, -0.4346, 1.2007]) tensor([0.6841, 0.0298, 0.0467, 0.2395]) -Greedy action tensor([ 1.4143, -0.5562, -0.4870, 0.3293]) tensor([0.6148, 0.0857, 0.0918, 0.2077]) -Greedy action tensor([ 1.9661, -0.4139, -0.2588, 0.4796]) tensor([0.7009, 0.0649, 0.0758, 0.1585]) -Greedy action tensor([ 1.9753, -0.5262, -0.3031, 0.3423]) tensor([0.7248, 0.0594, 0.0742, 0.1416]) -Greedy action tensor([ 2.1476, -1.2052, -0.2257, 0.4164]) tensor([0.7661, 0.0268, 0.0714, 0.1357]) -Greedy action tensor([ 1.7609e+00, -5.1846e-01, -1.5947e-03, 1.9241e-01]) tensor([0.6746, 0.0690, 0.1158, 0.1406]) -Greedy action tensor([ 0.9857, -0.0881, 0.0553, -0.1019]) tensor([0.4824, 0.1648, 0.1902, 0.1626]) -Greedy action tensor([ 1.1010, -0.3419, -0.3804, 0.4517]) tensor([0.5035, 0.1190, 0.1145, 0.2631]) -Greedy action tensor([ 1.5763, -0.1979, -0.5318, 0.3998]) tensor([0.6252, 0.1061, 0.0759, 0.1928]) -Greedy action tensor([ 0.9504, -0.3928, -0.4922, 0.5053]) tensor([0.4677, 0.1221, 0.1105, 0.2997]) -Greedy action tensor([ 1.1417, -0.3985, -0.1979, -0.1274]) tensor([0.5690, 0.1220, 0.1491, 0.1599]) -Greedy action tensor([ 1.2016, -0.5582, -0.6622, 0.4892]) tensor([0.5502, 0.0947, 0.0853, 0.2698]) -Greedy action tensor([ 1.3212, -1.0687, -0.4186, -0.2487]) tensor([0.6779, 0.0621, 0.1190, 0.1410]) -Greedy action tensor([ 0.8376, 0.2080, -0.7614, -0.1170]) tensor([0.4717, 0.2513, 0.0953, 0.1816]) -Greedy action tensor([ 1.9705, -0.7817, -0.4626, 0.9906]) tensor([0.6549, 0.0418, 0.0575, 0.2458]) -Greedy action tensor([ 1.4279, -0.4665, -0.1184, 0.0945]) tensor([0.6146, 0.0924, 0.1309, 0.1620]) -Greedy action tensor([ 0.7505, -0.4535, -0.2859, -0.3234]) tensor([0.5009, 0.1503, 0.1777, 0.1711]) -Greedy action tensor([ 1.2181, -0.8622, 0.0596, -0.0226]) tensor([0.5787, 0.0723, 0.1817, 0.1673]) -Greedy action tensor([ 1.1781, -0.5176, 0.0067, 0.3312]) tensor([0.5202, 0.0955, 0.1613, 0.2231]) -Greedy action tensor([ 2.1360, -0.8299, -0.3817, 0.9530]) tensor([0.6952, 0.0358, 0.0561, 0.2130]) -Greedy action tensor([ 1.3322, -0.2110, -0.7104, 0.3238]) tensor([0.5854, 0.1251, 0.0759, 0.2136]) -Greedy action tensor([ 1.0841, -0.5898, -0.0047, 0.8287]) tensor([0.4350, 0.0816, 0.1464, 0.3370]) -Greedy action tensor([ 1.4968, -0.3795, -0.6131, 0.3844]) tensor([0.6238, 0.0955, 0.0756, 0.2051]) -Greedy action tensor([ 2.0729, -0.9663, -0.3559, 0.6333]) tensor([0.7283, 0.0349, 0.0642, 0.1726]) -Greedy action tensor([ 0.9230, -0.1822, -0.3348, 0.3250]) tensor([0.4618, 0.1529, 0.1313, 0.2540]) -Greedy action tensor([ 1.0912, -0.2696, 0.1285, 0.0460]) tensor([0.5025, 0.1289, 0.1919, 0.1767]) -Greedy action tensor([ 1.7747, -0.7488, -0.1363, 0.6669]) tensor([0.6417, 0.0514, 0.0949, 0.2119]) -Greedy action tensor([ 1.1637, -0.5246, -0.2604, 0.3223]) tensor([0.5386, 0.0996, 0.1297, 0.2322]) -Greedy action tensor([ 1.4786, -0.5132, -0.4429, 0.8616]) tensor([0.5487, 0.0749, 0.0803, 0.2961]) -Greedy action tensor([ 2.0550, -0.3429, -0.3082, 0.7958]) tensor([0.6808, 0.0619, 0.0641, 0.1933]) -Greedy action tensor([ 1.7071, -0.9581, -0.0521, 0.6406]) tensor([0.6305, 0.0439, 0.1086, 0.2170]) -Greedy action tensor([ 1.5579, -0.6838, -0.6563, 0.5974]) tensor([0.6257, 0.0665, 0.0683, 0.2394]) -Greedy action tensor([ 0.3266, -0.1695, 0.0478, -0.1234]) tensor([0.3330, 0.2027, 0.2520, 0.2123]) -Greedy action tensor([ 1.1995, -0.2533, -0.6985, 0.3391]) tensor([0.5535, 0.1295, 0.0829, 0.2341]) -Greedy action tensor([ 1.2196, -0.4636, -0.2612, 0.4820]) tensor([0.5287, 0.0982, 0.1202, 0.2528]) -Greedy action tensor([ 1.2295, -0.7671, -0.3345, 0.4248]) tensor([0.5580, 0.0758, 0.1168, 0.2495]) -Greedy action tensor([ 0.3731, -0.3316, -0.1617, 0.3777]) tensor([0.3242, 0.1602, 0.1899, 0.3257]) -Greedy action tensor([ 1.1121, -1.0995, 0.0220, -0.5138]) tensor([0.6088, 0.0667, 0.2047, 0.1198]) -Greedy action tensor([ 0.4430, -0.1969, 0.0024, -0.2619]) tensor([0.3752, 0.1979, 0.2415, 0.1854]) -Greedy action tensor([ 0.7157, -0.3819, -0.0142, -0.3609]) tensor([0.4637, 0.1547, 0.2235, 0.1580]) -Greedy action tensor([ 0.4867, -0.4924, -0.2061, -0.1116]) tensor([0.4123, 0.1549, 0.2062, 0.2267]) -Greedy action tensor([ 0.8906, -0.2524, -0.0685, -0.0560]) tensor([0.4784, 0.1526, 0.1834, 0.1857]) -Greedy action tensor([ 0.5938, -0.0310, 0.0912, -0.2952]) tensor([0.3919, 0.2098, 0.2371, 0.1611]) -Greedy action tensor([ 0.7771, -0.4846, -0.0205, -0.4190]) tensor([0.4912, 0.1391, 0.2212, 0.1485]) -Greedy action tensor([ 0.9578, -0.6235, 0.0401, -0.4324]) tensor([0.5393, 0.1109, 0.2154, 0.1343]) -Greedy action tensor([ 0.6574, -0.5715, -0.1603, -0.1752]) tensor([0.4610, 0.1349, 0.2035, 0.2005]) -Greedy action tensor([ 0.5482, -0.3567, -0.0753, -0.4173]) tensor([0.4308, 0.1743, 0.2309, 0.1640]) -Greedy action tensor([ 0.3444, 0.0179, -0.0739, -0.2036]) tensor([0.3381, 0.2439, 0.2225, 0.1955]) -Greedy action tensor([ 0.9721, -1.3050, 0.0182, -0.6428]) tensor([0.5929, 0.0608, 0.2284, 0.1179]) -Greedy action tensor([ 0.4511, 0.3787, -0.1227, -0.0844]) tensor([0.3248, 0.3021, 0.1830, 0.1901]) -Greedy action tensor([ 1.3609, -0.7716, 0.1610, -0.7409]) tensor([0.6485, 0.0769, 0.1953, 0.0793]) -Greedy action tensor([ 0.5449, -0.2852, 0.1611, -0.3540]) tensor([0.3962, 0.1727, 0.2699, 0.1612]) -Greedy action tensor([ 0.6829, -0.4935, 0.0114, -0.3371]) tensor([0.4587, 0.1415, 0.2344, 0.1654]) -Greedy action tensor([ 0.3039, -0.1566, -0.0020, -0.2063]) tensor([0.3370, 0.2126, 0.2481, 0.2023]) -Greedy action tensor([ 0.5661, -0.1187, 0.0904, -0.4895]) tensor([0.4043, 0.2038, 0.2512, 0.1407]) -Greedy action tensor([ 1.0275, -0.6217, 0.0575, -0.8890]) tensor([0.5819, 0.1119, 0.2206, 0.0856]) -Greedy action tensor([ 1.0454, -1.0915, 0.1046, -0.4660]) tensor([0.5784, 0.0683, 0.2258, 0.1276]) -Greedy action tensor([ 1.0272, -0.7563, -0.0219, -0.5702]) tensor([0.5811, 0.0977, 0.2036, 0.1176]) -Greedy action tensor([ 0.9842, -0.5207, -0.0454, -0.4493]) tensor([0.5501, 0.1222, 0.1965, 0.1312]) -Greedy action tensor([ 0.7331, -0.4925, -0.1283, -0.6231]) tensor([0.5066, 0.1487, 0.2141, 0.1305]) -Greedy action tensor([ 0.6256, -0.3548, -0.2964, -0.3298]) tensor([0.4635, 0.1739, 0.1843, 0.1783]) -Greedy action tensor([ 1.2685, -0.9180, 0.1244, -0.7604]) tensor([0.6401, 0.0719, 0.2039, 0.0842]) -Greedy action tensor([ 0.5025, -0.5189, -0.0253, -0.1211]) tensor([0.4023, 0.1448, 0.2373, 0.2156]) -Greedy action tensor([ 0.5280, -0.2713, 0.0876, -0.4183]) tensor([0.4030, 0.1812, 0.2594, 0.1564]) -Greedy action tensor([ 0.6095, -0.6020, 0.0101, -0.3898]) tensor([0.4515, 0.1344, 0.2479, 0.1662]) -Greedy action tensor([ 0.9826, -0.3272, 0.0272, -0.2123]) tensor([0.5109, 0.1379, 0.1965, 0.1547]) -Greedy action tensor([ 0.7015, -0.5490, -0.0529, -0.1919]) tensor([0.4617, 0.1322, 0.2171, 0.1890]) -Greedy action tensor([ 0.5259, 0.1972, -0.1827, -0.0194]) tensor([0.3582, 0.2579, 0.1763, 0.2076]) -Greedy action tensor([ 0.3434, -0.1577, -0.0647, -0.1878]) tensor([0.3498, 0.2119, 0.2326, 0.2056]) -Greedy action tensor([ 0.3183, 0.0537, -0.0027, -0.3101]) tensor([0.3304, 0.2536, 0.2397, 0.1763]) -Greedy action tensor([ 0.6006, -0.7987, 0.0182, -0.3018]) tensor([0.4523, 0.1116, 0.2526, 0.1835]) -Greedy action tensor([ 0.6401, -0.3210, -0.0401, -0.2166]) tensor([0.4322, 0.1653, 0.2189, 0.1835]) -Greedy action tensor([ 0.7422, -0.4608, -0.0858, -0.4123]) tensor([0.4872, 0.1463, 0.2129, 0.1536]) -Greedy action tensor([ 0.6976, -0.2988, -0.0589, -0.3790]) tensor([0.4589, 0.1694, 0.2154, 0.1564]) -Greedy action tensor([ 0.3191, 0.2573, -0.1806, -0.0513]) tensor([0.3089, 0.2904, 0.1874, 0.2133]) -Greedy action tensor([ 0.9862, -0.5256, 0.0541, -0.8784]) tensor([0.5652, 0.1246, 0.2225, 0.0876]) -Greedy action tensor([ 0.5240, -0.4968, -0.1718, -0.1393]) tensor([0.4212, 0.1518, 0.2100, 0.2170]) -Greedy action tensor([ 0.5849, -0.1211, -0.1010, -0.3321]) tensor([0.4172, 0.2059, 0.2101, 0.1668]) -Greedy action tensor([ 0.8450, -0.4162, -0.1237, -0.1374]) tensor([0.4908, 0.1391, 0.1863, 0.1838]) -Greedy action tensor([ 0.9912, -0.4865, 0.0019, -0.1362]) tensor([0.5198, 0.1186, 0.1933, 0.1683]) -Greedy action tensor([ 0.0881, -0.2467, -0.3156, -0.1939]) tensor([0.3187, 0.2280, 0.2129, 0.2404]) -Greedy action tensor([ 1.2713, -0.9318, 0.0891, -0.6242]) tensor([0.6380, 0.0705, 0.1956, 0.0959]) -Greedy action tensor([ 0.3434, -0.0330, -0.0279, -0.4955]) tensor([0.3561, 0.2444, 0.2456, 0.1539]) -Greedy action tensor([ 0.7719, -0.5953, 0.0108, -0.7799]) tensor([0.5171, 0.1318, 0.2416, 0.1096]) -Greedy action tensor([ 0.6161, -0.5093, -0.0696, -0.1398]) tensor([0.4352, 0.1412, 0.2192, 0.2044]) -Greedy action tensor([ 0.9890, -1.1211, -0.0466, -0.4758]) tensor([0.5857, 0.0710, 0.2079, 0.1354]) -Greedy action tensor([ 0.7022, -0.3389, -0.1278, -0.2725]) tensor([0.4616, 0.1630, 0.2013, 0.1742]) -Greedy action tensor([ 0.4486, -0.1889, 0.1331, -0.3442]) tensor([0.3689, 0.1950, 0.2691, 0.1670]) -Greedy action tensor([ 0.7483, -0.2927, -0.0980, -0.1564]) tensor([0.4573, 0.1615, 0.1962, 0.1850]) -Greedy action tensor([ 0.7834, -0.5454, -0.0668, -0.4011]) tensor([0.5005, 0.1325, 0.2139, 0.1531]) -Greedy action tensor([ 0.8410, -0.7156, 0.0362, -0.2737]) tensor([0.5035, 0.1062, 0.2252, 0.1652]) -Greedy action tensor([ 0.2807, -0.0777, -0.0138, -0.2394]) tensor([0.3291, 0.2300, 0.2452, 0.1957]) -Greedy action tensor([ 1.5891e-01, 1.4894e-01, -1.8289e-03, -1.1964e-04]) tensor([0.2707, 0.2680, 0.2305, 0.2309]) -Greedy action tensor([ 0.8384, -0.5569, -0.2528, -0.2463]) tensor([0.5204, 0.1289, 0.1748, 0.1759]) -Greedy action tensor([ 0.5342, -0.3627, -0.0556, -0.0935]) tensor([0.4006, 0.1634, 0.2221, 0.2139]) -Greedy action tensor([ 0.7081, -0.3677, -0.0098, -0.3619]) tensor([0.4605, 0.1570, 0.2246, 0.1579]) -Greedy action tensor([ 0.3947, 0.0502, -0.1396, -0.2467]) tensor([0.3545, 0.2512, 0.2077, 0.1866]) -Greedy action tensor([ 0.2976, -0.2789, -0.0270, -0.3265]) tensor([0.3546, 0.1992, 0.2563, 0.1900]) -Greedy action tensor([ 0.8714, -0.4824, 0.1239, -0.6926]) tensor([0.5152, 0.1330, 0.2440, 0.1078]) -Greedy action tensor([ 0.4134, -0.1820, 0.1775, -0.2679]) tensor([0.3512, 0.1936, 0.2774, 0.1777]) -Greedy action tensor([ 0.6625, -0.2142, 0.0092, -0.0926]) tensor([0.4156, 0.1729, 0.2162, 0.1953]) -Greedy action tensor([ 0.5913, -0.5045, -0.0679, -0.2520]) tensor([0.4382, 0.1465, 0.2267, 0.1886]) -Greedy action tensor([ 1.1145, -0.6809, -0.0702, -0.4083]) tensor([0.5917, 0.0983, 0.1810, 0.1291]) -Greedy action tensor([ 0.6040, -0.3079, 0.0326, -0.2813]) tensor([0.4203, 0.1689, 0.2374, 0.1734]) -Greedy action tensor([ 0.7365, -0.1987, -0.0273, -0.2449]) tensor([0.4478, 0.1758, 0.2086, 0.1678]) -Greedy action tensor([ 0.7680, -0.7213, -0.1114, -0.3694]) tensor([0.5099, 0.1150, 0.2116, 0.1635]) -Greedy action tensor([ 0.5224, -0.3909, 0.2301, -0.5747]) tensor([0.4030, 0.1617, 0.3008, 0.1345]) -Greedy action tensor([ 0.6354, -0.5555, -0.1603, -0.1850]) tensor([0.4555, 0.1384, 0.2055, 0.2005]) -Greedy action tensor([ 0.4103, -0.1321, -0.0622, -0.0675]) tensor([0.3540, 0.2058, 0.2207, 0.2195]) -Greedy action tensor([ 0.8662, -0.7073, 0.0364, -0.3930]) tensor([0.5188, 0.1076, 0.2263, 0.1473]) -Greedy action tensor([ 0.8729, -0.8956, 0.0698, -0.3132]) tensor([0.5198, 0.0887, 0.2328, 0.1587]) -Greedy action tensor([ 0.8264, -0.3035, -0.0033, -0.1355]) tensor([0.4670, 0.1509, 0.2037, 0.1785]) -Greedy action tensor([ 0.4196, -0.1269, -0.0811, 0.0268]) tensor([0.3496, 0.2024, 0.2119, 0.2361]) -Greedy action tensor([ 0.8548, -0.1678, -0.1982, -0.3015]) tensor([0.4943, 0.1778, 0.1724, 0.1555]) -Greedy action tensor([ 1.0040, -0.6522, 0.1027, -0.4682]) tensor([0.5476, 0.1045, 0.2223, 0.1256]) -Greedy action tensor([ 0.7983, -0.3682, 0.0776, -0.1999]) tensor([0.4616, 0.1438, 0.2245, 0.1701]) -Greedy action tensor([ 0.9121, -0.4725, -0.0917, -0.2796]) tensor([0.5207, 0.1304, 0.1908, 0.1581]) -Greedy action tensor([ 0.4666, 0.1762, -0.1462, -0.0844]) tensor([0.3489, 0.2610, 0.1891, 0.2011]) -Greedy action tensor([ 1.4628, -0.5778, -0.4655, 0.4066]) tensor([0.6161, 0.0801, 0.0896, 0.2143]) -Greedy action tensor([ 1.8038, -1.0609, -0.2825, 0.4670]) tensor([0.6926, 0.0395, 0.0860, 0.1819]) -Greedy action tensor([ 1.8930, -0.9572, -0.4177, -0.1076]) tensor([0.7738, 0.0447, 0.0768, 0.1047]) -Greedy action tensor([ 1.6077, -0.1910, -0.4157, 0.6599]) tensor([0.5934, 0.0982, 0.0784, 0.2300]) -Greedy action tensor([ 1.8393, -0.4443, -0.3319, 0.1188]) tensor([0.7169, 0.0731, 0.0818, 0.1283]) -Greedy action tensor([ 1.6398, -0.0815, -0.3200, 0.5317]) tensor([0.6061, 0.1084, 0.0854, 0.2001]) -Greedy action tensor([ 1.4862, -0.4106, -0.1144, 0.2674]) tensor([0.6070, 0.0911, 0.1225, 0.1794]) -Greedy action tensor([ 2.4364, -0.1806, -0.2895, 0.5721]) tensor([0.7731, 0.0564, 0.0506, 0.1198]) -Greedy action tensor([ 1.3296, -0.0587, -0.6185, 0.2835]) tensor([0.5736, 0.1431, 0.0818, 0.2015]) -Greedy action tensor([ 2.4286, -1.0299, -0.4134, 0.5618]) tensor([0.8036, 0.0253, 0.0469, 0.1243]) -Greedy action tensor([ 1.9579, -0.4759, -0.2801, 0.2211]) tensor([0.7297, 0.0640, 0.0778, 0.1285]) -Greedy action tensor([ 1.9757, -0.7752, -0.3284, 0.5786]) tensor([0.7087, 0.0453, 0.0708, 0.1753]) -Greedy action tensor([ 1.1059, -0.5024, -0.1485, 0.4362]) tensor([0.5007, 0.1003, 0.1428, 0.2563]) -Greedy action tensor([ 1.6186, -0.2766, -0.2028, 0.5318]) tensor([0.6063, 0.0911, 0.0981, 0.2045]) -Greedy action tensor([ 2.1758, -0.7161, -0.5209, 1.0619]) tensor([0.6891, 0.0382, 0.0465, 0.2262]) -Greedy action tensor([ 2.1698, -0.5667, -0.0163, 0.7337]) tensor([0.7067, 0.0458, 0.0794, 0.1681]) -Greedy action tensor([ 1.3261, -0.6113, -0.4032, 0.5118]) tensor([0.5668, 0.0817, 0.1005, 0.2510]) -Greedy action tensor([ 1.6426, -1.4393, -0.0306, -0.1055]) tensor([0.7104, 0.0326, 0.1333, 0.1237]) -Greedy action tensor([ 1.6955, -0.3866, -0.4593, 0.5151]) tensor([0.6461, 0.0805, 0.0749, 0.1985]) -Greedy action tensor([ 0.4646, -0.0232, -0.5704, 0.5393]) tensor([0.3282, 0.2015, 0.1166, 0.3537]) -Greedy action tensor([ 1.8193, 0.2964, -0.4349, -0.0728]) tensor([0.6785, 0.1480, 0.0712, 0.1023]) -Greedy action tensor([ 1.1466, -0.4726, -0.3560, 0.3896]) tensor([0.5292, 0.1048, 0.1178, 0.2482]) -Greedy action tensor([ 1.2430, -0.3807, -0.1883, 0.1050]) tensor([0.5693, 0.1122, 0.1361, 0.1824]) -Greedy action tensor([ 1.6410, 0.2744, -0.2025, 0.2873]) tensor([0.5983, 0.1525, 0.0947, 0.1545]) -Greedy action tensor([ 1.3583, -0.4509, -0.4707, 0.6408]) tensor([0.5518, 0.0904, 0.0886, 0.2693]) -Greedy action tensor([ 1.8192, -0.8880, -0.3588, 0.2902]) tensor([0.7159, 0.0478, 0.0811, 0.1552]) -Greedy action tensor([ 0.6821, -0.2366, -0.2645, 0.2650]) tensor([0.4088, 0.1631, 0.1587, 0.2694]) -Greedy action tensor([ 0.7262, -0.5429, 0.1120, 0.2218]) tensor([0.4122, 0.1159, 0.2230, 0.2489]) -Greedy action tensor([ 2.3321, -1.1943, -0.1664, 0.9277]) tensor([0.7368, 0.0217, 0.0606, 0.1809]) -Greedy action tensor([ 1.9820, -0.4528, -0.3493, -0.0083]) tensor([0.7567, 0.0663, 0.0735, 0.1034]) -Greedy action tensor([ 1.9013, -0.5448, -0.1001, 0.6671]) tensor([0.6610, 0.0573, 0.0893, 0.1924]) -Greedy action tensor([ 0.8405, -0.0205, 0.1989, -0.3516]) tensor([0.4439, 0.1877, 0.2337, 0.1348]) -Greedy action tensor([ 0.4627, -0.3141, -0.2433, -0.0646]) tensor([0.3931, 0.1808, 0.1941, 0.2320]) -Greedy action tensor([ 1.0217, -0.3833, -0.1725, 0.3920]) tensor([0.4805, 0.1179, 0.1456, 0.2560]) -Greedy action tensor([ 1.7156, -0.5387, -0.3331, 0.0964]) tensor([0.6984, 0.0733, 0.0900, 0.1383]) -Greedy action tensor([ 2.3191, -1.4009, -0.1063, 0.6275]) tensor([0.7711, 0.0187, 0.0682, 0.1420]) -Greedy action tensor([ 2.1566, -0.1204, -0.5284, 0.3418]) tensor([0.7498, 0.0769, 0.0512, 0.1221]) -Greedy action tensor([ 1.3680, -0.6009, -0.4569, 0.2921]) tensor([0.6091, 0.0850, 0.0982, 0.2077]) -Greedy action tensor([ 2.5067, -1.0481, -0.1695, 0.6504]) tensor([0.7977, 0.0228, 0.0549, 0.1246]) -Greedy action tensor([ 0.9763, -0.5363, 0.0366, 0.1975]) tensor([0.4831, 0.1064, 0.1888, 0.2217]) -Greedy action tensor([ 1.2276, 0.0081, -0.8783, 0.4091]) tensor([0.5381, 0.1590, 0.0655, 0.2374]) -Greedy action tensor([ 1.4703, -0.6308, -0.3013, 0.1280]) tensor([0.6437, 0.0787, 0.1095, 0.1681]) -Greedy action tensor([ 1.7265, -0.3812, -0.1716, 0.4087]) tensor([0.6497, 0.0790, 0.0974, 0.1740]) -Greedy action tensor([ 1.1759, -0.0664, -0.6488, 0.2518]) tensor([0.5414, 0.1563, 0.0873, 0.2149]) -Greedy action tensor([ 1.8000, -0.7606, -0.1091, 0.4845]) tensor([0.6694, 0.0517, 0.0992, 0.1796]) -Greedy action tensor([ 1.5245, 0.0152, -0.3873, 0.3077]) tensor([0.6006, 0.1328, 0.0888, 0.1779]) -Greedy action tensor([ 1.0482, -0.4879, 0.0849, 0.0547]) tensor([0.5084, 0.1094, 0.1940, 0.1882]) -Greedy action tensor([ 1.7660, 0.2791, -0.0205, 0.4275]) tensor([0.6039, 0.1365, 0.1012, 0.1584]) -Greedy action tensor([ 1.8654, -0.4835, -0.4360, 0.4042]) tensor([0.7005, 0.0669, 0.0701, 0.1625]) -Greedy action tensor([ 2.0769, -0.2812, -0.2986, 0.5999]) tensor([0.7063, 0.0668, 0.0657, 0.1612]) -Greedy action tensor([ 1.9200, -0.3782, -0.0307, 0.5645]) tensor([0.6665, 0.0669, 0.0948, 0.1718]) -Greedy action tensor([ 1.7933, -0.6856, -0.2088, 0.4742]) tensor([0.6728, 0.0564, 0.0909, 0.1799]) -Greedy action tensor([ 1.9636, -0.7176, -0.4509, 0.7935]) tensor([0.6811, 0.0466, 0.0609, 0.2114]) -Greedy action tensor([ 1.2473, -0.4947, -0.5071, 0.5375]) tensor([0.5435, 0.0952, 0.0940, 0.2673]) -Greedy action tensor([ 1.1114, -0.2132, -0.1180, 0.3309]) tensor([0.4959, 0.1319, 0.1450, 0.2272]) -Greedy action tensor([ 1.1016, -0.4668, -0.1129, 0.1784]) tensor([0.5256, 0.1095, 0.1560, 0.2088]) -Greedy action tensor([ 1.2503, 0.0262, -0.4390, 0.2457]) tensor([0.5420, 0.1594, 0.1001, 0.1985]) -Greedy action tensor([ 1.4694, -0.3414, -0.5401, 0.4228]) tensor([0.6065, 0.0992, 0.0813, 0.2130]) -Greedy action tensor([ 1.0369, -0.1496, -0.5992, 0.3084]) tensor([0.5044, 0.1540, 0.0982, 0.2434]) -Greedy action tensor([ 0.8475, -0.5544, -0.2054, 0.3629]) tensor([0.4523, 0.1113, 0.1578, 0.2786]) -Greedy action tensor([ 0.5979, -0.5509, 0.0538, 0.2420]) tensor([0.3849, 0.1220, 0.2234, 0.2697]) -Greedy action tensor([ 1.5098, -0.5327, -0.2498, 0.5965]) tensor([0.5872, 0.0762, 0.1011, 0.2356]) -Greedy action tensor([ 1.2947, -0.3858, -0.4264, 0.4500]) tensor([0.5571, 0.1038, 0.0997, 0.2394]) -Greedy action tensor([ 1.2783, 0.0237, -0.1698, 0.2422]) tensor([0.5333, 0.1521, 0.1253, 0.1892]) -Greedy action tensor([ 1.4180, -0.4048, -0.8225, 0.4343]) tensor([0.6091, 0.0984, 0.0648, 0.2277]) -Greedy action tensor([ 1.9597, -1.0727, -0.3529, 0.9386]) tensor([0.6634, 0.0320, 0.0657, 0.2390]) -Greedy action tensor([ 0.6183, -0.3062, 0.1509, -0.0908]) tensor([0.3975, 0.1577, 0.2491, 0.1956]) -Greedy action tensor([ 0.8963, -0.3517, -0.4727, 0.1097]) tensor([0.5008, 0.1438, 0.1274, 0.2281]) -Greedy action tensor([ 1.3362, -0.2252, -0.4202, -0.1772]) tensor([0.6240, 0.1309, 0.1077, 0.1374]) -Greedy action tensor([ 2.0001, -0.5632, -0.5282, 0.4925]) tensor([0.7255, 0.0559, 0.0579, 0.1607]) -Greedy action tensor([ 1.1631, -0.2630, -0.4755, 0.0202]) tensor([0.5703, 0.1370, 0.1108, 0.1819]) -Greedy action tensor([ 1.2639, -0.4830, -0.2512, 0.7022]) tensor([0.5091, 0.0887, 0.1119, 0.2903]) -Greedy action tensor([ 2.0985, -0.7474, -0.2554, 0.7376]) tensor([0.7095, 0.0412, 0.0674, 0.1819]) -Greedy action tensor([ 1.2301, -0.3393, -0.6148, 0.2066]) tensor([0.5795, 0.1206, 0.0916, 0.2082]) -Greedy action tensor([ 2.0913, -0.7613, -0.3066, 0.3666]) tensor([0.7537, 0.0435, 0.0685, 0.1343]) -Greedy action tensor([ 1.1461, -0.4355, -0.3477, 1.0230]) tensor([0.4321, 0.0889, 0.0970, 0.3820]) -Greedy action tensor([ 2.1884, -1.0135, -0.3135, 1.0704]) tensor([0.6899, 0.0281, 0.0565, 0.2255]) -Greedy action tensor([ 2.0734, -0.6964, -0.6593, 0.4989]) tensor([0.7492, 0.0470, 0.0487, 0.1552]) -Greedy action tensor([ 2.9887, -1.4519, -0.2670, 0.8343]) tensor([0.8574, 0.0101, 0.0331, 0.0994]) -Greedy action tensor([ 1.8584, -1.0681, -0.2046, 0.5217]) tensor([0.6928, 0.0371, 0.0880, 0.1820]) -Greedy action tensor([ 1.1581, -0.0605, -0.3812, 0.0104]) tensor([0.5472, 0.1618, 0.1174, 0.1736]) -Greedy action tensor([ 0.0611, -0.3969, 1.1852, 0.5526]) tensor([0.1576, 0.0997, 0.4851, 0.2576]) -Greedy action tensor([ 1.1943, -1.4641, 0.5201, 0.5118]) tensor([0.4796, 0.0336, 0.2444, 0.2424]) -Greedy action tensor([ 0.1725, -0.9536, -0.2606, 1.1739]) tensor([0.2130, 0.0691, 0.1381, 0.5798]) -Greedy action tensor([-1.4360, -0.4260, -1.0483, 0.4523]) tensor([0.0846, 0.2321, 0.1246, 0.5587]) -Greedy action tensor([-0.7717, -2.0943, -0.4340, 0.5085]) tensor([0.1596, 0.0425, 0.2237, 0.5742]) -Greedy action tensor([ 1.6082, 0.1181, 1.6749, -0.4924]) tensor([0.4138, 0.0932, 0.4423, 0.0506]) -Greedy action tensor([-0.5359, -0.3531, -0.3347, 0.6619]) tensor([0.1485, 0.1782, 0.1815, 0.4918]) -Greedy action tensor([-0.0859, -0.2994, -0.0570, 0.2724]) tensor([0.2343, 0.1893, 0.2412, 0.3353]) -Greedy action tensor([-0.3047, -2.0092, 0.6986, 0.8573]) tensor([0.1407, 0.0256, 0.3838, 0.4498]) -Greedy action tensor([ 1.1418, 0.1521, 0.0089, -0.2164]) tensor([0.5126, 0.1905, 0.1651, 0.1318]) -Greedy action tensor([-0.0559, -1.5175, 0.0487, 0.7297]) tensor([0.2205, 0.0511, 0.2448, 0.4836]) -Greedy action tensor([1.3400, 0.3765, 0.1948, 0.1680]) tensor([0.4976, 0.1899, 0.1583, 0.1541]) -Greedy action tensor([ 0.5414, -0.6553, -0.3609, 0.3803]) tensor([0.3908, 0.1181, 0.1585, 0.3326]) -Greedy action tensor([ 0.1159, -2.4050, 0.3670, 1.0045]) tensor([0.2084, 0.0168, 0.2679, 0.5069]) -Greedy action tensor([ 0.0578, -0.5402, -0.5214, 0.8837]) tensor([0.2276, 0.1251, 0.1275, 0.5198]) -Greedy action tensor([ 1.1539, -0.6429, 0.9563, 0.0563]) tensor([0.4310, 0.0715, 0.3537, 0.1438]) -Greedy action tensor([-0.9175, 0.9578, -0.2990, 1.0732]) tensor([0.0599, 0.3906, 0.1111, 0.4384]) -Greedy action tensor([ 0.7526, -0.4583, 0.2054, 0.5470]) tensor([0.3717, 0.1107, 0.2150, 0.3026]) -Greedy action tensor([-0.4705, -0.6458, 0.6132, -0.3810]) tensor([0.1698, 0.1425, 0.5019, 0.1857]) -Greedy action tensor([ 1.0182, -0.2449, 0.4805, 1.0075]) tensor([0.3501, 0.0990, 0.2045, 0.3464]) -Greedy action tensor([-0.0150, 0.0915, -0.3908, -0.1545]) tensor([0.2726, 0.3032, 0.1872, 0.2371]) -Greedy action tensor([-0.1202, -0.3679, 0.6920, -0.1034]) tensor([0.1980, 0.1546, 0.4461, 0.2014]) -Greedy action tensor([1.5356, 0.8796, 0.2368, 0.9334]) tensor([0.4275, 0.2218, 0.1166, 0.2341]) -Greedy action tensor([ 0.5923, 0.7487, -0.3158, -0.1595]) tensor([0.3285, 0.3841, 0.1325, 0.1549]) -Greedy action tensor([-0.7147, -0.8074, -1.4999, 0.1752]) tensor([0.2082, 0.1898, 0.0950, 0.5070]) -Greedy action tensor([ 1.6060, -0.3097, 1.2417, -0.3079]) tensor([0.5027, 0.0740, 0.3492, 0.0741]) -Greedy action tensor([ 0.6209, 0.7853, -0.9230, 0.1666]) tensor([0.3303, 0.3894, 0.0705, 0.2097]) -Greedy action tensor([ 1.7592, 0.7562, 0.5537, -0.0395]) tensor([0.5459, 0.2002, 0.1635, 0.0904]) -Greedy action tensor([-1.1907, -0.3922, -0.1864, -0.6444]) tensor([0.1302, 0.2894, 0.3555, 0.2249]) -Greedy action tensor([ 0.5053, -0.9861, 0.9267, 1.1975]) tensor([0.2106, 0.0474, 0.3210, 0.4209]) -Greedy action tensor([-0.7435, -0.4881, 0.5201, -1.0361]) tensor([0.1521, 0.1963, 0.5381, 0.1135]) -Greedy action tensor([ 1.2983, 0.6307, -0.8472, 1.1647]) tensor([0.3992, 0.2048, 0.0467, 0.3493]) -Greedy action tensor([ 0.7723, 0.2646, 0.5219, -0.3109]) tensor([0.3678, 0.2214, 0.2863, 0.1245]) -Greedy action tensor([-0.1674, -1.3905, 0.0090, -0.1618]) tensor([0.2863, 0.0843, 0.3415, 0.2879]) -Greedy action tensor([ 1.2170, -0.3581, 1.0170, 0.1872]) tensor([0.4197, 0.0869, 0.3436, 0.1499]) -Greedy action tensor([ 1.1823, -1.5915, 1.0648, 1.0818]) tensor([0.3502, 0.0219, 0.3113, 0.3167]) -Greedy action tensor([ 0.1344, 0.3538, -0.4605, 0.8936]) tensor([0.2027, 0.2524, 0.1118, 0.4331]) -Greedy action tensor([ 1.1477, -1.1826, 0.2855, 1.4513]) tensor([0.3479, 0.0338, 0.1469, 0.4713]) -Greedy action tensor([ 0.0193, -1.1740, 0.5943, 1.0025]) tensor([0.1738, 0.0527, 0.3089, 0.4646]) -Greedy action tensor([-0.6613, -0.9084, -0.2219, 0.2499]) tensor([0.1718, 0.1342, 0.2666, 0.4274]) -Greedy action tensor([-0.0986, -0.2768, 0.7739, 0.8185]) tensor([0.1485, 0.1243, 0.3555, 0.3717]) -Greedy action tensor([ 0.9142, -1.1812, -0.0430, 0.9288]) tensor([0.3966, 0.0488, 0.1523, 0.4024]) -Greedy action tensor([ 0.2141, 0.3274, -1.3157, -0.0304]) tensor([0.3206, 0.3590, 0.0694, 0.2510]) -Greedy action tensor([ 1.1775, -1.4191, 1.1892, 0.1564]) tensor([0.4087, 0.0305, 0.4136, 0.1472]) -Greedy action tensor([ 1.6083, 0.3988, -0.0196, 0.5309]) tensor([0.5449, 0.1626, 0.1070, 0.1855]) -Greedy action tensor([ 2.1590, -0.2495, 0.4483, 0.5119]) tensor([0.6834, 0.0615, 0.1235, 0.1316]) -Greedy action tensor([ 0.6425, -1.0388, 0.2578, 0.8980]) tensor([0.3167, 0.0589, 0.2155, 0.4088]) -Greedy action tensor([ 0.5310, 0.3716, 0.2663, -0.1607]) tensor([0.3204, 0.2732, 0.2459, 0.1604]) -Greedy action tensor([ 0.4462, 0.0177, -1.1055, 1.6790]) tensor([0.1889, 0.1231, 0.0400, 0.6480]) -Greedy action tensor([ 0.9272, -0.3882, -0.9018, 0.8293]) tensor([0.4281, 0.1149, 0.0687, 0.3882]) -Greedy action tensor([-0.2214, -0.9722, 0.2418, 1.1387]) tensor([0.1437, 0.0678, 0.2284, 0.5600]) -Greedy action tensor([-1.3824, -1.5673, 1.3246, 0.6488]) tensor([0.0409, 0.0340, 0.6131, 0.3119]) -Greedy action tensor([ 0.0571, -0.2648, -1.1137, 0.0983]) tensor([0.3250, 0.2355, 0.1008, 0.3387]) -Greedy action tensor([ 0.7482, -0.6731, -0.2370, 0.6512]) tensor([0.3964, 0.0957, 0.1480, 0.3598]) -Greedy action tensor([-0.6755, -0.3114, -1.2019, -0.4193]) tensor([0.2314, 0.3330, 0.1367, 0.2989]) -Greedy action tensor([ 0.2080, 0.8403, 0.9008, -0.4631]) tensor([0.1854, 0.3490, 0.3708, 0.0948]) -Greedy action tensor([-0.2801, -0.9952, 0.5371, -0.0130]) tensor([0.1977, 0.0967, 0.4475, 0.2582]) -Greedy action tensor([ 0.6386, -1.1192, 0.0299, 0.0321]) tensor([0.4421, 0.0762, 0.2405, 0.2411]) -Greedy action tensor([-0.3446, -2.8177, -0.3084, 0.2703]) tensor([0.2519, 0.0212, 0.2611, 0.4658]) -Greedy action tensor([ 1.0217, -1.1282, 0.7683, 0.3373]) tensor([0.4172, 0.0486, 0.3238, 0.2104]) -Greedy action tensor([-0.4183, -1.1120, -0.3813, -0.4266]) tensor([0.2834, 0.1416, 0.2940, 0.2810]) -Greedy action tensor([-0.1657, 0.6276, 0.6068, -0.4771]) tensor([0.1637, 0.3619, 0.3545, 0.1199]) -Greedy action tensor([-0.6997, -1.3256, 0.6824, 0.3860]) tensor([0.1179, 0.0631, 0.4698, 0.3493]) -Greedy action tensor([ 1.1001, -1.1167, 1.0171, 1.5091]) tensor([0.2829, 0.0308, 0.2604, 0.4259]) -Greedy action tensor([ 1.1332, -0.9342, 0.4881, 0.2367]) tensor([0.4857, 0.0614, 0.2548, 0.1981]) -Greedy action tensor([ 1.4793, -0.0766, 1.0907, 0.8695]) tensor([0.4111, 0.0867, 0.2787, 0.2234]) -Greedy action tensor([-0.4236, 1.3720, 0.0904, -0.3593]) tensor([0.1024, 0.6170, 0.1713, 0.1092]) -Greedy action tensor([ 1.3634, -2.0985, -0.0536, 1.1902]) tensor([0.4729, 0.0148, 0.1146, 0.3976]) -Greedy action tensor([ 0.6669, 0.0328, -0.9900, 1.3172]) tensor([0.2749, 0.1458, 0.0524, 0.5268]) -Greedy action tensor([ 0.1718, 0.7350, 0.2106, -0.0467]) tensor([0.2174, 0.3818, 0.2260, 0.1747]) -Greedy action tensor([-0.4978, 0.1643, 0.1861, -0.5785]) tensor([0.1711, 0.3318, 0.3391, 0.1579]) -Greedy action tensor([-0.4141, 0.4252, -0.7572, 0.5910]) tensor([0.1480, 0.3426, 0.1050, 0.4044]) -Greedy action tensor([-0.1912, -1.1999, 0.2165, 0.4292]) tensor([0.2115, 0.0771, 0.3180, 0.3934]) -Greedy action tensor([-0.0774, 0.5057, 1.5991, -0.9848]) tensor([0.1171, 0.2097, 0.6260, 0.0472]) -Greedy action tensor([ 0.8843, -0.0381, 1.1843, 0.3509]) tensor([0.2999, 0.1192, 0.4049, 0.1759]) -Greedy action tensor([ 0.1527, 0.1852, 0.3577, -1.0527]) tensor([0.2809, 0.2902, 0.3448, 0.0842]) -Greedy action tensor([ 1.1664, -1.0256, 0.8365, -0.0422]) tensor([0.4696, 0.0525, 0.3377, 0.1402]) -Greedy action tensor([-0.3246, 0.5235, -0.5621, -0.2421]) tensor([0.1919, 0.4482, 0.1514, 0.2085]) -Greedy action tensor([ 1.0081, -1.5898, -0.1715, 1.1073]) tensor([0.4022, 0.0299, 0.1237, 0.4442]) -Greedy action tensor([-0.9844, -1.1599, 0.2150, 0.4456]) tensor([0.1071, 0.0899, 0.3554, 0.4476]) -Greedy action tensor([0.6377, 0.2070, 0.6621, 0.1235]) tensor([0.3056, 0.1986, 0.3131, 0.1827]) -Greedy action tensor([ 1.0726, -0.0073, -0.0655, 0.4240]) tensor([0.4581, 0.1556, 0.1468, 0.2395]) -Greedy action tensor([-1.8775, -0.4330, 0.6314, -0.1482]) tensor([0.0432, 0.1830, 0.5305, 0.2433]) -Greedy action tensor([-1.9432, -0.4463, 0.6659, -0.1796]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-1.8858, -0.4622, 0.6458, -0.1486]) tensor([0.0427, 0.1774, 0.5372, 0.2427]) -Greedy action tensor([-1.8991, -0.4634, 0.6907, -0.0979]) tensor([0.0407, 0.1709, 0.5420, 0.2464]) -Greedy action tensor([-1.8074, -0.5082, 0.5960, -0.1078]) tensor([0.0472, 0.1730, 0.5218, 0.2581]) -Greedy action tensor([-1.9012, -0.4401, 0.6422, -0.1559]) tensor([0.0421, 0.1814, 0.5354, 0.2411]) -Greedy action tensor([-1.4445, -0.4294, 0.5642, 0.3585]) tensor([0.0579, 0.1597, 0.4313, 0.3511]) -Greedy action tensor([-1.8635, -0.4447, 0.6447, -0.0760]) tensor([0.0428, 0.1767, 0.5251, 0.2554]) -Greedy action tensor([-1.5577, -0.5506, 0.5243, -0.1554]) tensor([0.0632, 0.1730, 0.5069, 0.2569]) -Greedy action tensor([-1.3900, 0.6157, 0.2369, 0.1809]) tensor([0.0546, 0.4054, 0.2776, 0.2625]) -Greedy action tensor([-1.9154, -0.4443, 0.6478, -0.1615]) tensor([0.0415, 0.1806, 0.5383, 0.2396]) -Greedy action tensor([-0.8505, 0.5625, 0.1284, -0.0806]) tensor([0.1007, 0.4137, 0.2681, 0.2175]) -Greedy action tensor([-1.8336, -0.2552, 0.6159, -0.1052]) tensor([0.0434, 0.2102, 0.5023, 0.2442]) -Greedy action tensor([-1.9426, -0.4520, 0.6659, -0.1803]) tensor([0.0403, 0.1787, 0.5466, 0.2345]) -Greedy action tensor([-1.7422, -0.3470, 0.5367, -0.0451]) tensor([0.0494, 0.1992, 0.4820, 0.2694]) -Greedy action tensor([-1.7751, -0.0605, 0.5311, -0.0288]) tensor([0.0448, 0.2488, 0.4496, 0.2568]) -Greedy action tensor([-1.9041, -0.4263, 0.6442, -0.1625]) tensor([0.0419, 0.1836, 0.5355, 0.2390]) -Greedy action tensor([-0.9194, -0.1407, 0.6834, 1.2282]) tensor([0.0598, 0.1304, 0.2972, 0.5125]) -Greedy action tensor([-1.9011, -0.4376, 0.6420, -0.1515]) tensor([0.0420, 0.1816, 0.5346, 0.2418]) -Greedy action tensor([-1.0532, -0.3690, 0.2983, 0.5221]) tensor([0.0856, 0.1697, 0.3308, 0.4138]) -Greedy action tensor([-1.9177, -0.4188, 0.6546, -0.1641]) tensor([0.0411, 0.1839, 0.5379, 0.2372]) -Greedy action tensor([-1.9240, -0.4093, 0.6433, -0.1702]) tensor([0.0411, 0.1867, 0.5350, 0.2372]) -Greedy action tensor([-1.6562, -0.0235, 0.5211, -0.0422]) tensor([0.0501, 0.2564, 0.4420, 0.2516]) -Greedy action tensor([-1.8916, -0.4195, 0.6337, -0.1467]) tensor([0.0424, 0.1848, 0.5299, 0.2428]) -Greedy action tensor([-1.8985, -0.3777, 0.6330, -0.1514]) tensor([0.0419, 0.1916, 0.5263, 0.2402]) -Greedy action tensor([-1.7389, -0.4453, 0.5594, -0.0877]) tensor([0.0505, 0.1840, 0.5025, 0.2631]) -Greedy action tensor([-1.6957, -0.4617, 0.6139, 0.0602]) tensor([0.0493, 0.1693, 0.4962, 0.2853]) -Greedy action tensor([-1.9003, -0.3785, 0.6418, -0.1583]) tensor([0.0417, 0.1909, 0.5295, 0.2379]) -Greedy action tensor([-1.8783, -0.4034, 0.6287, -0.1536]) tensor([0.0430, 0.1880, 0.5277, 0.2413]) -Greedy action tensor([-1.3651, -0.7022, 0.5108, 0.5589]) tensor([0.0613, 0.1189, 0.4000, 0.4197]) -Greedy action tensor([-1.7071, -0.4014, 0.5644, -0.1738]) tensor([0.0526, 0.1940, 0.5097, 0.2436]) -Greedy action tensor([-1.4026, 0.5095, 0.2554, 0.1950]) tensor([0.0557, 0.3769, 0.2923, 0.2752]) -Greedy action tensor([-1.9376, -0.4504, 0.6656, -0.1741]) tensor([0.0404, 0.1787, 0.5454, 0.2355]) -Greedy action tensor([-1.8783, -0.3649, 0.6242, -0.1281]) tensor([0.0425, 0.1932, 0.5195, 0.2448]) -Greedy action tensor([-1.5802, -0.0847, 0.5409, -0.2842]) tensor([0.0573, 0.2556, 0.4778, 0.2094]) -Greedy action tensor([-1.5850, -0.4601, 0.6810, 0.3101]) tensor([0.0491, 0.1512, 0.4732, 0.3266]) -Greedy action tensor([-1.4757, 0.5017, 0.3276, -0.0344]) tensor([0.0540, 0.3901, 0.3277, 0.2282]) -Greedy action tensor([-1.5602, -0.4367, 0.4761, 0.0140]) tensor([0.0604, 0.1857, 0.4626, 0.2914]) -Greedy action tensor([-1.2316, 0.6380, 0.1367, 0.2260]) tensor([0.0637, 0.4128, 0.2501, 0.2734]) -Greedy action tensor([-1.7988, -0.1886, 0.5907, -0.0488]) tensor([0.0441, 0.2208, 0.4812, 0.2539]) -Greedy action tensor([-1.8079, -0.2739, 0.5812, -0.0920]) tensor([0.0452, 0.2098, 0.4933, 0.2516]) -Greedy action tensor([-1.2902, -0.3742, 0.3460, 0.1381]) tensor([0.0781, 0.1952, 0.4010, 0.3257]) -Greedy action tensor([-1.6560, -0.1998, 0.5131, 0.0403]) tensor([0.0513, 0.2201, 0.4489, 0.2798]) -Greedy action tensor([0.7999, 1.2656, 0.0828, 0.8012]) tensor([0.2449, 0.3902, 0.1196, 0.2453]) -Greedy action tensor([-0.2645, 0.5408, 0.4185, 1.2497]) tensor([0.1024, 0.2292, 0.2028, 0.4656]) -Greedy action tensor([-1.1415, -0.3605, 0.3214, 0.4468]) tensor([0.0807, 0.1761, 0.3483, 0.3949]) -Greedy action tensor([-1.4472, -0.2615, 0.6755, 0.2999]) tensor([0.0545, 0.1782, 0.4549, 0.3125]) -Greedy action tensor([-1.8759, -0.4137, 0.6326, -0.1437]) tensor([0.0430, 0.1856, 0.5283, 0.2431]) -Greedy action tensor([-1.8597, -0.4846, 0.6208, -0.1379]) tensor([0.0444, 0.1758, 0.5310, 0.2487]) -Greedy action tensor([-1.8296, -0.4336, 0.6198, -0.1146]) tensor([0.0451, 0.1821, 0.5222, 0.2506]) -Greedy action tensor([-1.6540, -0.3953, 0.6511, 0.1368]) tensor([0.0487, 0.1714, 0.4881, 0.2918]) -Greedy action tensor([-1.7946, -0.2402, 0.5769, -0.0251]) tensor([0.0448, 0.2121, 0.4801, 0.2630]) -Greedy action tensor([-1.8237, -0.5036, 0.6077, -0.1231]) tensor([0.0463, 0.1734, 0.5267, 0.2536]) -Greedy action tensor([-1.7623, -0.4269, 0.5830, -0.0854]) tensor([0.0486, 0.1846, 0.5069, 0.2598]) -Greedy action tensor([-1.6796, -0.3946, 0.5370, -0.0133]) tensor([0.0524, 0.1894, 0.4808, 0.2773]) -Greedy action tensor([-0.6477, 0.7983, 0.0490, -0.0913]) tensor([0.1111, 0.4719, 0.2231, 0.1939]) -Greedy action tensor([-0.2434, 0.2007, 0.2111, 0.4004]) tensor([0.1656, 0.2582, 0.2609, 0.3153]) -Greedy action tensor([-0.8172, -0.4437, 0.1779, 0.1134]) tensor([0.1300, 0.1888, 0.3516, 0.3296]) -Greedy action tensor([-1.3599, -0.6006, 0.3446, 0.1993]) tensor([0.0747, 0.1596, 0.4106, 0.3551]) -Greedy action tensor([-1.6987, -0.3849, 0.5941, 0.0123]) tensor([0.0496, 0.1846, 0.4913, 0.2746]) -Greedy action tensor([-1.2835, 0.5588, 0.2436, -0.0537]) tensor([0.0652, 0.4115, 0.3003, 0.2230]) -Greedy action tensor([-1.7788, -0.4016, 0.5888, -0.0800]) tensor([0.0474, 0.1878, 0.5057, 0.2591]) -Greedy action tensor([-1.7650, -0.3290, 0.5741, -0.0544]) tensor([0.0474, 0.1992, 0.4914, 0.2621]) -Greedy action tensor([-1.2426, -0.0613, 0.6977, 0.7647]) tensor([0.0536, 0.1746, 0.3730, 0.3988]) -Greedy action tensor([-1.9034, -0.4522, 0.6484, -0.1576]) tensor([0.0420, 0.1791, 0.5384, 0.2405]) -Greedy action tensor([-1.5652, -0.1863, 0.5002, 0.2691]) tensor([0.0523, 0.2077, 0.4126, 0.3274]) -Greedy action tensor([-1.6980e+00, -5.4005e-01, 5.6862e-01, -1.1045e-03]) tensor([0.0518, 0.1651, 0.5002, 0.2829]) -Greedy action tensor([-1.7870, -0.3586, 0.5814, -0.0876]) tensor([0.0469, 0.1956, 0.5009, 0.2566]) -Greedy action tensor([-1.7222, -0.4999, 0.8474, 0.3510]) tensor([0.0394, 0.1336, 0.5141, 0.3129]) -Greedy action tensor([-1.9216, -0.4415, 0.6503, -0.1671]) tensor([0.0412, 0.1811, 0.5395, 0.2382]) -Greedy action tensor([-1.8027, -0.4055, 0.6720, -0.0271]) tensor([0.0438, 0.1772, 0.5204, 0.2586]) -Greedy action tensor([-0.7818, -0.6217, 0.2062, 0.0961]) tensor([0.1376, 0.1615, 0.3697, 0.3311]) -Greedy action tensor([-1.8446, -0.4434, 0.6179, -0.1316]) tensor([0.0448, 0.1817, 0.5253, 0.2482]) -Greedy action tensor([-0.8102, -0.6671, 0.1942, 0.1881]) tensor([0.1316, 0.1519, 0.3594, 0.3571]) -Greedy action tensor([-1.8797, -0.4329, 0.6308, -0.1541]) tensor([0.0431, 0.1834, 0.5312, 0.2423]) -Greedy action tensor([-1.0825, -0.6151, 0.7233, -0.4387]) tensor([0.0945, 0.1508, 0.5749, 0.1799]) -Greedy action tensor([-1.4855, -0.1664, 0.6645, 0.3688]) tensor([0.0507, 0.1897, 0.4355, 0.3240]) -Greedy action tensor([-1.9315, -0.4238, 0.6585, -0.1743]) tensor([0.0406, 0.1833, 0.5409, 0.2352]) -Greedy action tensor([-1.9149, -0.4783, 0.7049, -0.1402]) tensor([0.0403, 0.1694, 0.5529, 0.2375]) -Greedy action tensor([-0.4893, 0.1195, 0.1290, 0.1660]) tensor([0.1511, 0.2777, 0.2803, 0.2909]) -Greedy action tensor([-1.6714, -0.0386, 0.5024, 0.1419]) tensor([0.0475, 0.2433, 0.4178, 0.2914]) -Greedy action tensor([ 0.7424, -0.3074, -0.0206, -0.0852]) tensor([0.4438, 0.1553, 0.2069, 0.1940]) -Greedy action tensor([ 0.6440, -0.2584, 0.0058, -0.2231]) tensor([0.4248, 0.1723, 0.2244, 0.1785]) -Greedy action tensor([ 0.9177, -0.7062, 0.0309, -0.4586]) tensor([0.5372, 0.1059, 0.2213, 0.1356]) -Greedy action tensor([ 0.3748, -0.2127, -0.1676, -0.3973]) tensor([0.3848, 0.2138, 0.2237, 0.1778]) -Greedy action tensor([ 0.5697, -0.4618, -0.0089, -0.2389]) tensor([0.4233, 0.1509, 0.2373, 0.1886]) -Greedy action tensor([ 0.8162, -0.4487, -0.0493, -0.5462]) tensor([0.5104, 0.1441, 0.2148, 0.1307]) -Greedy action tensor([ 0.6171, -0.2424, 0.0726, -0.2490]) tensor([0.4125, 0.1746, 0.2393, 0.1735]) -Greedy action tensor([ 0.7853, -0.3363, -0.0996, -0.1803]) tensor([0.4719, 0.1537, 0.1948, 0.1797]) -Greedy action tensor([ 0.6902, -0.4019, 0.0695, -0.1010]) tensor([0.4298, 0.1442, 0.2311, 0.1949]) -Greedy action tensor([ 0.4104, 0.3294, 0.0118, -0.2535]) tensor([0.3217, 0.2967, 0.2160, 0.1656]) -Greedy action tensor([ 0.5078, 0.2642, 0.0916, -0.2153]) tensor([0.3415, 0.2676, 0.2252, 0.1657]) -Greedy action tensor([ 0.6150, -0.4419, -0.0043, -0.3168]) tensor([0.4387, 0.1524, 0.2361, 0.1728]) -Greedy action tensor([ 0.6254, -0.2306, 0.1042, -0.2056]) tensor([0.4075, 0.1731, 0.2419, 0.1775]) -Greedy action tensor([ 1.1675, -0.5846, -0.0557, -0.5179]) tensor([0.6049, 0.1049, 0.1780, 0.1121]) -Greedy action tensor([ 0.6951, -0.2608, -0.1653, -0.2105]) tensor([0.4521, 0.1738, 0.1913, 0.1828]) -Greedy action tensor([ 0.3655, -0.1459, -0.0880, -0.2610]) tensor([0.3611, 0.2165, 0.2294, 0.1930]) -Greedy action tensor([ 0.5688, 0.2870, -0.1919, -0.2322]) tensor([0.3744, 0.2825, 0.1750, 0.1681]) -Greedy action tensor([ 0.5861, -0.1245, -0.1692, -0.1568]) tensor([0.4104, 0.2016, 0.1928, 0.1952]) -Greedy action tensor([ 0.3855, -0.1006, -0.1069, -0.1224]) tensor([0.3536, 0.2175, 0.2161, 0.2128]) -Greedy action tensor([ 0.9245, -0.8733, 0.1890, -0.3910]) tensor([0.5227, 0.0866, 0.2505, 0.1402]) -Greedy action tensor([ 0.1796, 0.6759, -0.1064, 0.1097]) tensor([0.2311, 0.3797, 0.1736, 0.2155]) -Greedy action tensor([ 0.8424, -0.2490, -0.1152, -0.1630]) tensor([0.4795, 0.1610, 0.1840, 0.1755]) -Greedy action tensor([ 0.7211, -0.8141, -0.1697, -0.3535]) tensor([0.5083, 0.1095, 0.2086, 0.1736]) -Greedy action tensor([ 0.8117, -0.4503, -0.0891, -0.4659]) tensor([0.5081, 0.1439, 0.2064, 0.1416]) -Greedy action tensor([ 0.7067, -0.4519, 0.0041, -0.3223]) tensor([0.4616, 0.1449, 0.2286, 0.1649]) -Greedy action tensor([ 0.4497, -0.1156, -0.0567, -0.3569]) tensor([0.3821, 0.2171, 0.2303, 0.1706]) -Greedy action tensor([ 1.0167, -0.5649, -0.0975, -0.5282]) tensor([0.5723, 0.1177, 0.1878, 0.1221]) -Greedy action tensor([ 0.4730, -0.2325, -0.1581, -0.1732]) tensor([0.3922, 0.1937, 0.2086, 0.2055]) -Greedy action tensor([ 0.3877, -0.0899, -0.0167, -0.3679]) tensor([0.3627, 0.2250, 0.2420, 0.1704]) -Greedy action tensor([ 1.4073, -0.6994, -0.2814, -0.2674]) tensor([0.6695, 0.0814, 0.1237, 0.1254]) -Greedy action tensor([ 0.4493, -0.1943, -0.0029, -0.3633]) tensor([0.3838, 0.2017, 0.2442, 0.1703]) -Greedy action tensor([ 0.8523, -0.5378, 0.0859, -0.4514]) tensor([0.5037, 0.1254, 0.2341, 0.1368]) -Greedy action tensor([ 0.7245, -0.3597, -0.0335, -0.3122]) tensor([0.4627, 0.1565, 0.2168, 0.1641]) -Greedy action tensor([ 0.9089, -0.1700, 0.1152, -0.1290]) tensor([0.4659, 0.1584, 0.2107, 0.1650]) -Greedy action tensor([ 0.2506, 0.0808, -0.0695, -0.3312]) tensor([0.3196, 0.2697, 0.2321, 0.1786]) -Greedy action tensor([ 0.5440, -0.0543, -0.0627, 0.0054]) tensor([0.3733, 0.2053, 0.2035, 0.2179]) -Greedy action tensor([ 0.8759, -0.5070, -0.1041, -0.2084]) tensor([0.5091, 0.1277, 0.1911, 0.1722]) -Greedy action tensor([ 0.4341, -0.6671, -0.1676, -0.2766]) tensor([0.4217, 0.1402, 0.2310, 0.2072]) -Greedy action tensor([ 0.9590, -0.3013, -0.0949, -0.1422]) tensor([0.5090, 0.1443, 0.1774, 0.1692]) -Greedy action tensor([ 0.4545, -0.0289, 0.1143, -0.3956]) tensor([0.3629, 0.2238, 0.2582, 0.1551]) -Greedy action tensor([ 0.4511, -0.4805, 0.0468, -0.4502]) tensor([0.4053, 0.1597, 0.2705, 0.1646]) -Greedy action tensor([ 0.6824, -0.3517, -0.0750, -0.1939]) tensor([0.4463, 0.1587, 0.2092, 0.1858]) -Greedy action tensor([ 0.8380, -0.6758, 0.0603, -0.5793]) tensor([0.5203, 0.1145, 0.2391, 0.1261]) -Greedy action tensor([ 0.5797, -0.5325, -0.1671, -0.1412]) tensor([0.4369, 0.1437, 0.2070, 0.2125]) -Greedy action tensor([ 0.5839, -0.0756, -0.2754, 0.0173]) tensor([0.3987, 0.2062, 0.1688, 0.2262]) -Greedy action tensor([ 1.0535, -0.1523, -0.2613, -0.2199]) tensor([0.5412, 0.1621, 0.1453, 0.1515]) -Greedy action tensor([ 0.9833, -0.5008, -0.1759, -0.3557]) tensor([0.5548, 0.1258, 0.1741, 0.1454]) -Greedy action tensor([ 0.7835, -0.2962, 0.0032, -0.1272]) tensor([0.4545, 0.1544, 0.2083, 0.1828]) -Greedy action tensor([ 0.9943, -0.9455, 0.0338, -0.6069]) tensor([0.5787, 0.0832, 0.2215, 0.1167]) -Greedy action tensor([ 0.6795, -0.2588, -0.0719, -0.1631]) tensor([0.4360, 0.1706, 0.2057, 0.1877]) -Greedy action tensor([ 0.5553, -0.3031, -0.0571, -0.1901]) tensor([0.4097, 0.1737, 0.2221, 0.1945]) -Greedy action tensor([ 0.6374, -0.2278, -0.0731, -0.1161]) tensor([0.4196, 0.1767, 0.2062, 0.1975]) -Greedy action tensor([ 0.9087, -0.6693, -0.0408, -0.4497]) tensor([0.5404, 0.1115, 0.2091, 0.1389]) -Greedy action tensor([ 0.7467, -0.6085, 0.0257, -0.2448]) tensor([0.4728, 0.1219, 0.2299, 0.1754]) -Greedy action tensor([ 0.7171, -0.5019, -0.0874, -0.2892]) tensor([0.4743, 0.1402, 0.2122, 0.1734]) -Greedy action tensor([ 0.5986, -0.7032, -0.0412, -0.3138]) tensor([0.4543, 0.1236, 0.2396, 0.1824]) -Greedy action tensor([ 8.7691e-01, -7.0962e-01, 5.0384e-04, -3.8231e-01]) tensor([0.5250, 0.1074, 0.2185, 0.1490]) -Greedy action tensor([ 0.6950, -0.2905, -0.0546, -0.1597]) tensor([0.4403, 0.1643, 0.2081, 0.1873]) -Greedy action tensor([ 0.5435, -0.2281, 0.0327, -0.1853]) tensor([0.3930, 0.1816, 0.2358, 0.1896]) -Greedy action tensor([ 0.7857, -0.7701, -0.1084, -0.3485]) tensor([0.5150, 0.1087, 0.2106, 0.1657]) -Greedy action tensor([ 0.3172, -0.0203, 0.0417, -0.2534]) tensor([0.3292, 0.2349, 0.2499, 0.1860]) -Greedy action tensor([ 1.0383, -0.4415, -0.2289, -0.5160]) tensor([0.5812, 0.1323, 0.1637, 0.1228]) -Greedy action tensor([ 0.7169, -0.5077, -0.0626, -0.1023]) tensor([0.4559, 0.1340, 0.2091, 0.2010]) -Greedy action tensor([ 0.4461, -0.3315, -0.0433, -0.3521]) tensor([0.3964, 0.1822, 0.2430, 0.1784]) -Greedy action tensor([ 0.5233, -0.2493, -0.1959, -0.2295]) tensor([0.4132, 0.1908, 0.2013, 0.1946]) -Greedy action tensor([ 0.7510, -0.6502, -0.1545, -0.3314]) tensor([0.5027, 0.1238, 0.2032, 0.1703]) -Greedy action tensor([ 0.5970, -0.1342, -0.1077, -0.1837]) tensor([0.4109, 0.1978, 0.2031, 0.1882]) -Greedy action tensor([ 0.7849, -0.3651, 0.0554, -0.5011]) tensor([0.4819, 0.1526, 0.2323, 0.1332]) -Greedy action tensor([ 0.6960, -0.8048, -0.1303, -0.5162]) tensor([0.5107, 0.1139, 0.2235, 0.1519]) -Greedy action tensor([ 0.6751, -0.4534, 0.1418, -0.5156]) tensor([0.4516, 0.1461, 0.2650, 0.1373]) -Greedy action tensor([ 0.4565, -0.3265, -0.1109, -0.2335]) tensor([0.3959, 0.1810, 0.2245, 0.1986]) -Greedy action tensor([ 0.7871, -0.8796, -0.0215, -0.3599]) tensor([0.5123, 0.0968, 0.2282, 0.1627]) -Greedy action tensor([ 0.8208, -0.2015, -0.0143, -0.5760]) tensor([0.4900, 0.1763, 0.2126, 0.1212]) -Greedy action tensor([ 0.8078, -0.1941, -0.0136, -0.3001]) tensor([0.4679, 0.1718, 0.2058, 0.1545]) -Greedy action tensor([ 0.9209, -0.7027, -0.2412, -0.6144]) tensor([0.5796, 0.1143, 0.1813, 0.1248]) -Greedy action tensor([ 0.7835, -0.7547, 0.1359, -0.2873]) tensor([0.4806, 0.1032, 0.2515, 0.1647]) -Greedy action tensor([ 0.4824, 0.0359, -0.1331, -0.0173]) tensor([0.3588, 0.2296, 0.1939, 0.2177]) -Greedy action tensor([ 0.5218, -0.4103, -0.1243, -0.3992]) tensor([0.4318, 0.1700, 0.2263, 0.1719]) -Greedy action tensor([ 0.7599, -0.4579, -0.1540, -0.2285]) tensor([0.4833, 0.1430, 0.1938, 0.1799]) -Greedy action tensor([ 0.6178, -0.3778, -0.0454, -0.3812]) tensor([0.4439, 0.1640, 0.2287, 0.1635]) -Greedy action tensor([ 0.7845, -0.6198, -0.0857, -0.2576]) tensor([0.4958, 0.1217, 0.2077, 0.1749]) -Greedy action tensor([ 0.6408, 0.0463, -0.6723, -0.0828]) tensor([0.4337, 0.2393, 0.1166, 0.2103]) -Greedy action tensor([ 0.4095, -0.2448, 0.0528, -0.1231]) tensor([0.3563, 0.1852, 0.2494, 0.2092]) -Greedy action tensor([ 1.3683, -0.4139, -0.3072, 0.5147]) tensor([0.5614, 0.0945, 0.1051, 0.2391]) -Greedy action tensor([ 2.5846, -1.6163, -0.3131, 0.8805]) tensor([0.7987, 0.0120, 0.0440, 0.1453]) -Greedy action tensor([ 1.5217, -0.1279, -0.8200, 0.7314]) tensor([0.5741, 0.1103, 0.0552, 0.2604]) -Greedy action tensor([ 1.7110, -0.3181, -1.0240, 0.5069]) tensor([0.6683, 0.0879, 0.0434, 0.2005]) -Greedy action tensor([ 1.2388, -0.4161, -0.7682, 0.2303]) tensor([0.5916, 0.1131, 0.0795, 0.2158]) -Greedy action tensor([ 1.4172, 0.1195, -1.0468, 0.4037]) tensor([0.5810, 0.1587, 0.0494, 0.2109]) -Greedy action tensor([ 0.9296, -0.1704, -0.1630, 0.2040]) tensor([0.4646, 0.1547, 0.1558, 0.2249]) -Greedy action tensor([ 2.1213, -0.6085, -0.5455, 0.1807]) tensor([0.7823, 0.0510, 0.0544, 0.1124]) -Greedy action tensor([ 2.1306, -0.8126, -0.5077, 0.6679]) tensor([0.7376, 0.0389, 0.0527, 0.1708]) -Greedy action tensor([ 1.2260, -0.1404, -0.1700, 0.2508]) tensor([0.5320, 0.1357, 0.1317, 0.2006]) -Greedy action tensor([ 1.1200, -0.5710, -0.0702, 0.3329]) tensor([0.5145, 0.0948, 0.1565, 0.2342]) -Greedy action tensor([ 1.3463, -0.5300, -0.5990, 0.2065]) tensor([0.6188, 0.0948, 0.0885, 0.1980]) -Greedy action tensor([ 1.1394, -0.5366, -0.2980, 0.8619]) tensor([0.4582, 0.0857, 0.1088, 0.3472]) -Greedy action tensor([ 1.5079, -0.8260, -0.2772, 0.8347]) tensor([0.5634, 0.0546, 0.0945, 0.2874]) -Greedy action tensor([ 1.3917, -0.5498, -0.2154, 0.4810]) tensor([0.5727, 0.0822, 0.1148, 0.2304]) -Greedy action tensor([ 2.0735, -0.9143, -0.0743, 0.5427]) tensor([0.7228, 0.0364, 0.0844, 0.1564]) -Greedy action tensor([ 1.5281, -0.5329, -0.1843, 0.2010]) tensor([0.6357, 0.0810, 0.1147, 0.1686]) -Greedy action tensor([ 1.5253, -0.4838, -0.5360, 0.0902]) tensor([0.6669, 0.0894, 0.0849, 0.1588]) -Greedy action tensor([ 1.0013, -0.1241, -0.3284, 0.2668]) tensor([0.4834, 0.1569, 0.1279, 0.2319]) -Greedy action tensor([ 1.8477, -1.1130, -0.1690, 0.5192]) tensor([0.6898, 0.0357, 0.0918, 0.1827]) -Greedy action tensor([ 2.1266, -0.8681, -0.2025, 0.4400]) tensor([0.7504, 0.0376, 0.0731, 0.1389]) -Greedy action tensor([ 1.6085, -0.3819, -0.7125, 0.6821]) tensor([0.6132, 0.0838, 0.0602, 0.2428]) -Greedy action tensor([ 1.7361, -0.6637, -0.9298, 0.8256]) tensor([0.6400, 0.0581, 0.0445, 0.2575]) -Greedy action tensor([ 1.4677, -0.3338, -0.8151, 0.4477]) tensor([0.6144, 0.1014, 0.0627, 0.2215]) -Greedy action tensor([ 0.8494, -0.1850, -0.1981, -0.0155]) tensor([0.4701, 0.1671, 0.1649, 0.1979]) -Greedy action tensor([ 0.7766, -0.5316, 0.0890, 0.2133]) tensor([0.4269, 0.1154, 0.2146, 0.2430]) -Greedy action tensor([ 1.3528, 0.1018, -0.4892, 0.4933]) tensor([0.5353, 0.1532, 0.0848, 0.2266]) -Greedy action tensor([ 1.6430, -0.9394, -0.2229, 0.7135]) tensor([0.6153, 0.0465, 0.0952, 0.2429]) -Greedy action tensor([ 1.3303, -0.4181, -0.4551, 0.3524]) tensor([0.5821, 0.1013, 0.0976, 0.2189]) -Greedy action tensor([ 1.5531, -0.3610, -0.2208, 0.8178]) tensor([0.5566, 0.0821, 0.0944, 0.2668]) -Greedy action tensor([ 1.5584, -0.7913, -0.0683, 0.2532]) tensor([0.6398, 0.0610, 0.1258, 0.1734]) -Greedy action tensor([ 1.9004, -0.5581, -0.8133, 0.0157]) tensor([0.7670, 0.0656, 0.0508, 0.1165]) -Greedy action tensor([ 1.6534, 0.0639, -0.3213, 0.1636]) tensor([0.6377, 0.1301, 0.0885, 0.1437]) -Greedy action tensor([ 1.8642, -0.4405, 0.0421, 0.8119]) tensor([0.6209, 0.0620, 0.1004, 0.2168]) -Greedy action tensor([ 1.4068, -0.4150, -0.4600, 0.2533]) tensor([0.6128, 0.0991, 0.0947, 0.1933]) -Greedy action tensor([ 0.9013, -0.2211, -0.5480, 0.7998]) tensor([0.4059, 0.1321, 0.0953, 0.3667]) -Greedy action tensor([ 1.5641, -0.3257, -0.0597, 0.6249]) tensor([0.5750, 0.0869, 0.1134, 0.2248]) -Greedy action tensor([ 1.2755, -0.2717, -0.7266, -0.1225]) tensor([0.6270, 0.1334, 0.0847, 0.1549]) -Greedy action tensor([ 1.4203, 0.0652, -0.3934, 0.0132]) tensor([0.6003, 0.1548, 0.0979, 0.1470]) -Greedy action tensor([ 1.5097, -0.0126, -0.5501, 0.6244]) tensor([0.5687, 0.1241, 0.0725, 0.2347]) -Greedy action tensor([ 1.1209, -0.4073, -0.1242, 0.0042]) tensor([0.5458, 0.1184, 0.1571, 0.1787]) -Greedy action tensor([ 1.6156, -0.5042, -0.3485, 0.7057]) tensor([0.6014, 0.0722, 0.0844, 0.2421]) -Greedy action tensor([ 1.4700, -0.4628, -0.4211, 0.3954]) tensor([0.6109, 0.0884, 0.0922, 0.2086]) -Greedy action tensor([ 2.2537, -1.0280, -0.3007, 0.8694]) tensor([0.7322, 0.0275, 0.0569, 0.1834]) -Greedy action tensor([ 1.7959, 0.0670, -0.0519, -0.0040]) tensor([0.6665, 0.1183, 0.1050, 0.1102]) -Greedy action tensor([ 0.9936, -0.3289, -0.1384, 0.3692]) tensor([0.4707, 0.1254, 0.1518, 0.2521]) -Greedy action tensor([ 2.1716, -0.8258, -0.5023, 0.8412]) tensor([0.7229, 0.0361, 0.0499, 0.1911]) -Greedy action tensor([ 1.0232, -0.4782, -0.0279, 0.1534]) tensor([0.5022, 0.1119, 0.1755, 0.2104]) -Greedy action tensor([ 1.6304, -0.6083, -0.1265, 0.2781]) tensor([0.6503, 0.0693, 0.1122, 0.1682]) -Greedy action tensor([ 0.9595, -0.6171, -0.2095, 0.4122]) tensor([0.4772, 0.0986, 0.1482, 0.2760]) -Greedy action tensor([ 1.2005, -0.1000, -0.5585, 0.7092]) tensor([0.4863, 0.1325, 0.0837, 0.2975]) -Greedy action tensor([ 1.6269, -0.6023, -0.0877, 0.3119]) tensor([0.6426, 0.0692, 0.1157, 0.1725]) -Greedy action tensor([ 1.3949, -0.9061, -0.1798, 0.3233]) tensor([0.6062, 0.0607, 0.1255, 0.2076]) -Greedy action tensor([ 1.4690, -0.3541, -0.4440, 0.7841]) tensor([0.5515, 0.0891, 0.0814, 0.2780]) -Greedy action tensor([ 1.5660, -0.1868, -0.0301, 0.4555]) tensor([0.5864, 0.1016, 0.1189, 0.1932]) -Greedy action tensor([ 1.2596, -0.1048, -0.2494, 0.7257]) tensor([0.4847, 0.1239, 0.1072, 0.2842]) -Greedy action tensor([ 1.8262, -0.4271, -0.6598, 0.1811]) tensor([0.7240, 0.0761, 0.0603, 0.1397]) -Greedy action tensor([ 1.9249, -0.7375, -0.8889, 0.4145]) tensor([0.7404, 0.0517, 0.0444, 0.1635]) -Greedy action tensor([ 1.0255, 0.0369, -0.8680, 0.1868]) tensor([0.5115, 0.1903, 0.0770, 0.2211]) -Greedy action tensor([ 1.2091, -0.0841, -0.0483, 0.1576]) tensor([0.5241, 0.1438, 0.1490, 0.1831]) -Greedy action tensor([ 1.7538, -0.0361, -0.8607, 0.3888]) tensor([0.6686, 0.1116, 0.0489, 0.1708]) -Greedy action tensor([ 1.4966, -0.4190, -0.2800, 0.3234]) tensor([0.6151, 0.0906, 0.1041, 0.1903]) -Greedy action tensor([ 1.6164, -0.2736, -0.3392, 0.4574]) tensor([0.6225, 0.0941, 0.0881, 0.1954]) -Greedy action tensor([ 1.2113, -0.1324, -0.6011, 0.1938]) tensor([0.5600, 0.1461, 0.0914, 0.2024]) -Greedy action tensor([ 1.8811, 0.5870, -0.0951, -0.0758]) tensor([0.6435, 0.1764, 0.0892, 0.0909]) -Greedy action tensor([ 1.8645, -0.6445, -0.3104, 0.0571]) tensor([0.7358, 0.0599, 0.0836, 0.1207]) -Greedy action tensor([ 1.4446, -0.5300, -0.2437, 0.6141]) tensor([0.5683, 0.0789, 0.1051, 0.2477]) -Greedy action tensor([ 2.1813, -1.2125, -0.2416, 0.7798]) tensor([0.7307, 0.0245, 0.0648, 0.1799]) -Greedy action tensor([ 1.5573, -0.5054, -0.7559, 0.1083]) tensor([0.6845, 0.0870, 0.0677, 0.1607]) -Greedy action tensor([ 1.4715, -0.6223, -0.4289, 0.5551]) tensor([0.5978, 0.0737, 0.0894, 0.2391]) -Greedy action tensor([ 1.1559, 0.2505, -0.6194, 0.2572]) tensor([0.5048, 0.2041, 0.0855, 0.2055]) -Greedy action tensor([ 2.4900, -1.0428, -0.5648, 0.9871]) tensor([0.7699, 0.0225, 0.0363, 0.1713]) -Greedy action tensor([ 1.6010, -0.4467, -0.2505, 0.7109]) tensor([0.5894, 0.0761, 0.0925, 0.2420]) -Greedy action tensor([ 1.1929, -0.3973, -0.4024, 0.5662]) tensor([0.5152, 0.1050, 0.1045, 0.2753]) -Greedy action tensor([ 1.7531, -1.1679, 0.1247, 0.5579]) tensor([0.6440, 0.0347, 0.1264, 0.1949]) -Greedy action tensor([ 1.7692, -0.9418, -0.0917, 0.2754]) tensor([0.6913, 0.0460, 0.1075, 0.1552]) -Greedy action tensor([ 1.2723, -0.7254, -0.0653, 0.3059]) tensor([0.5622, 0.0763, 0.1476, 0.2139]) -Greedy action tensor([ 1.6746, -0.4631, -0.4982, 0.6001]) tensor([0.6356, 0.0750, 0.0724, 0.2171]) -Greedy action tensor([ 2.0320, -0.6941, -0.1768, 0.3569]) tensor([0.7339, 0.0480, 0.0806, 0.1374]) -Greedy action tensor([-0.1490, 0.1043, 0.0711, -0.1984]) tensor([0.2229, 0.2872, 0.2778, 0.2122]) -Greedy action tensor([-0.5392, -0.8680, 1.5021, -0.2775]) tensor([0.0933, 0.0671, 0.7184, 0.1212]) -Greedy action tensor([ 0.0470, -1.1477, -0.4660, -0.0451]) tensor([0.3554, 0.1076, 0.2128, 0.3242]) -Greedy action tensor([ 0.0921, -1.2503, 0.1422, 0.5807]) tensor([0.2536, 0.0663, 0.2667, 0.4134]) -Greedy action tensor([-0.2570, 1.2822, 0.8350, -1.1277]) tensor([0.1104, 0.5145, 0.3290, 0.0462]) -Greedy action tensor([1.7096, 0.5709, 1.0335, 0.1838]) tensor([0.4887, 0.1565, 0.2485, 0.1063]) -Greedy action tensor([-0.0665, -0.8738, 0.6184, -0.6144]) tensor([0.2495, 0.1113, 0.4949, 0.1443]) -Greedy action tensor([-0.1503, 0.8645, -0.0913, 1.1514]) tensor([0.1177, 0.3247, 0.1249, 0.4327]) -Greedy action tensor([ 0.5832, -0.8297, 0.2789, 0.5432]) tensor([0.3399, 0.0827, 0.2507, 0.3266]) -Greedy action tensor([ 0.7597, 0.5982, 0.4142, -0.2725]) tensor([0.3431, 0.2919, 0.2428, 0.1222]) -Greedy action tensor([-0.6491, -0.7854, -0.6109, -0.3536]) tensor([0.2350, 0.2051, 0.2442, 0.3158]) -Greedy action tensor([-1.5280, -1.2388, 0.0342, -1.1639]) tensor([0.1170, 0.1563, 0.5582, 0.1684]) -Greedy action tensor([ 0.8324, -0.7072, 0.0102, -0.1217]) tensor([0.4904, 0.1052, 0.2155, 0.1889]) -Greedy action tensor([ 0.5848, -0.2619, 0.8750, 1.0414]) tensor([0.2302, 0.0987, 0.3077, 0.3634]) -Greedy action tensor([ 0.0140, -0.8553, -0.2120, 0.7050]) tensor([0.2374, 0.0995, 0.1894, 0.4737]) -Greedy action tensor([ 1.1319, -1.8697, 0.0251, 0.5397]) tensor([0.5172, 0.0257, 0.1710, 0.2861]) -Greedy action tensor([-1.7308, -0.5315, -0.8065, 0.0911]) tensor([0.0768, 0.2548, 0.1935, 0.4749]) -Greedy action tensor([ 1.1270, 0.1247, -0.2123, -0.3865]) tensor([0.5408, 0.1985, 0.1417, 0.1190]) -Greedy action tensor([-0.7533, 0.7937, 1.0115, -0.5403]) tensor([0.0783, 0.3677, 0.4572, 0.0969]) -Greedy action tensor([ 0.1894, -1.2908, -0.5451, 0.8102]) tensor([0.2803, 0.0638, 0.1345, 0.5215]) -Greedy action tensor([ 0.8060, -1.5168, 0.8587, -0.2928]) tensor([0.4024, 0.0394, 0.4241, 0.1341]) -Greedy action tensor([ 1.8298, -1.8376, 0.4563, 1.0624]) tensor([0.5737, 0.0147, 0.1453, 0.2663]) -Greedy action tensor([-0.6844, -1.1642, 0.8705, -0.6144]) tensor([0.1347, 0.0833, 0.6376, 0.1444]) -Greedy action tensor([ 0.9400, -0.7241, 0.6010, 1.9617]) tensor([0.2137, 0.0405, 0.1522, 0.5936]) -Greedy action tensor([ 1.2187, -0.3849, 0.5361, 0.7975]) tensor([0.4232, 0.0851, 0.2139, 0.2778]) -Greedy action tensor([ 0.1932, -0.8090, -0.2809, 0.4028]) tensor([0.3103, 0.1139, 0.1931, 0.3827]) -Greedy action tensor([-0.1399, -1.8578, 0.0687, 0.5712]) tensor([0.2248, 0.0403, 0.2770, 0.4578]) -Greedy action tensor([ 0.3323, -1.5562, -0.4492, -0.2175]) tensor([0.4574, 0.0692, 0.2094, 0.2640]) -Greedy action tensor([1.1629, 0.0547, 0.1402, 0.1237]) tensor([0.4894, 0.1616, 0.1760, 0.1731]) -Greedy action tensor([-0.2723, 0.7397, -0.1161, -1.2289]) tensor([0.1885, 0.5186, 0.2204, 0.0724]) -Greedy action tensor([ 0.4443, 0.8669, -0.7425, 0.4484]) tensor([0.2607, 0.3979, 0.0796, 0.2618]) -Greedy action tensor([ 0.1852, 0.4746, 0.9247, -0.8915]) tensor([0.2096, 0.2799, 0.4391, 0.0714]) -Greedy action tensor([ 0.3141, -0.4244, 0.8422, -0.7890]) tensor([0.2853, 0.1363, 0.4838, 0.0947]) -Greedy action tensor([ 0.4466, 0.3084, -0.0415, -1.1299]) tensor([0.3716, 0.3236, 0.2280, 0.0768]) -Greedy action tensor([ 0.7309, -0.9627, 0.8753, 1.0383]) tensor([0.2703, 0.0497, 0.3123, 0.3676]) -Greedy action tensor([0.4192, 0.1365, 0.5940, 0.3808]) tensor([0.2560, 0.1929, 0.3048, 0.2463]) -Greedy action tensor([ 0.8531, -0.2826, 0.9130, 1.1391]) tensor([0.2693, 0.0865, 0.2859, 0.3584]) -Greedy action tensor([ 0.2001, -0.0959, 0.5612, 0.0436]) tensor([0.2479, 0.1844, 0.3557, 0.2120]) -Greedy action tensor([ 0.2927, -0.4222, 0.6444, 0.2428]) tensor([0.2589, 0.1267, 0.3681, 0.2463]) -Greedy action tensor([ 0.5775, -0.3663, 0.6161, -0.2217]) tensor([0.3474, 0.1352, 0.3611, 0.1562]) -Greedy action tensor([ 0.1164, -0.7045, 0.9505, 1.1626]) tensor([0.1518, 0.0668, 0.3495, 0.4320]) -Greedy action tensor([-0.0304, -0.0496, -0.0305, -1.1230]) tensor([0.3015, 0.2958, 0.3015, 0.1011]) -Greedy action tensor([-0.2883, -0.4701, 0.5543, 0.7082]) tensor([0.1457, 0.1214, 0.3383, 0.3946]) -Greedy action tensor([-0.2461, -0.8406, -0.2565, -0.0580]) tensor([0.2668, 0.1472, 0.2640, 0.3220]) -Greedy action tensor([-0.3010, -0.9626, 1.2618, -0.4829]) tensor([0.1404, 0.0725, 0.6701, 0.1171]) -Greedy action tensor([ 2.1630, -1.5796, -0.0333, 1.0994]) tensor([0.6756, 0.0160, 0.0751, 0.2332]) -Greedy action tensor([-0.6312, 0.4857, -0.6048, -1.5473]) tensor([0.1824, 0.5573, 0.1873, 0.0730]) -Greedy action tensor([ 0.1818, -1.3976, 1.0187, -0.3070]) tensor([0.2422, 0.0499, 0.5593, 0.1486]) -Greedy action tensor([-0.7137, -0.9927, -0.0368, -0.8679]) tensor([0.2183, 0.1651, 0.4295, 0.1871]) -Greedy action tensor([-0.1063, -1.9930, -1.1217, 1.5180]) tensor([0.1518, 0.0230, 0.0550, 0.7702]) -Greedy action tensor([ 0.1022, 1.2605, -0.3006, -0.0746]) tensor([0.1757, 0.5596, 0.1175, 0.1472]) -Greedy action tensor([ 0.5358, 0.7728, -1.0075, -0.2701]) tensor([0.3415, 0.4329, 0.0730, 0.1526]) -Greedy action tensor([ 0.2716, -0.8445, 0.1789, 0.8926]) tensor([0.2439, 0.0799, 0.2223, 0.4539]) -Greedy action tensor([ 0.3081, -0.7140, 0.8538, 0.6157]) tensor([0.2249, 0.0809, 0.3882, 0.3059]) -Greedy action tensor([-0.6152, -1.3902, 0.5198, -0.8002]) tensor([0.1851, 0.0853, 0.5758, 0.1538]) -Greedy action tensor([ 0.1069, -0.9965, -0.9706, -0.4616]) tensor([0.4467, 0.1482, 0.1521, 0.2530]) -Greedy action tensor([ 0.6209, -0.1090, 0.7182, -0.2701]) tensor([0.3339, 0.1610, 0.3681, 0.1370]) -Greedy action tensor([ 4.4900e-04, -8.1029e-02, 1.5855e+00, -2.2458e-01]) tensor([0.1316, 0.1213, 0.6421, 0.1051]) -Greedy action tensor([-0.0589, -2.2057, 0.1105, 0.5680]) tensor([0.2396, 0.0280, 0.2838, 0.4485]) -Greedy action tensor([ 1.4611, -0.6941, -0.2343, 1.4572]) tensor([0.4356, 0.0505, 0.0800, 0.4339]) -Greedy action tensor([ 0.7960, 0.1043, 0.8086, -0.3071]) tensor([0.3514, 0.1760, 0.3559, 0.1166]) -Greedy action tensor([-0.4199, -1.6981, -0.2597, 0.9477]) tensor([0.1568, 0.0437, 0.1840, 0.6155]) -Greedy action tensor([ 1.0378, -0.3158, -0.0948, 0.8480]) tensor([0.4153, 0.1073, 0.1338, 0.3435]) -Greedy action tensor([ 0.6703, 1.0836, -0.2249, 0.6896]) tensor([0.2538, 0.3837, 0.1037, 0.2588]) -Greedy action tensor([ 2.2744, -0.3843, -0.0836, 1.7625]) tensor([0.5669, 0.0397, 0.0536, 0.3398]) -Greedy action tensor([ 1.7083, -0.1819, -1.1910, 0.5234]) tensor([0.6614, 0.0999, 0.0364, 0.2023]) -Greedy action tensor([ 1.5014, -0.2117, 0.2225, 1.3262]) tensor([0.4352, 0.0785, 0.1211, 0.3652]) -Greedy action tensor([ 0.9867, -1.5922, 0.2250, 1.2722]) tensor([0.3481, 0.0264, 0.1625, 0.4631]) -Greedy action tensor([ 0.4008, -1.3331, -0.1353, 0.4389]) tensor([0.3571, 0.0631, 0.2089, 0.3709]) -Greedy action tensor([ 0.7272, -0.3132, -0.6491, 2.1285]) tensor([0.1765, 0.0624, 0.0446, 0.7166]) -Greedy action tensor([ 0.9310, -0.3042, -0.1818, -0.1430]) tensor([0.5099, 0.1483, 0.1676, 0.1742]) -Greedy action tensor([0.5630, 0.3840, 0.5216, 0.0616]) tensor([0.2940, 0.2458, 0.2821, 0.1781]) -Greedy action tensor([-1.0050, -0.1271, 0.8318, -0.3922]) tensor([0.0867, 0.2087, 0.5445, 0.1601]) -Greedy action tensor([ 0.7609, -0.3254, -0.6786, 0.7823]) tensor([0.3852, 0.1300, 0.0913, 0.3935]) -Greedy action tensor([-0.4743, -1.7275, 1.8725, -0.1371]) tensor([0.0761, 0.0217, 0.7955, 0.1066]) -Greedy action tensor([ 1.0587, -1.0091, 0.2901, 0.5174]) tensor([0.4604, 0.0582, 0.2134, 0.2679]) -Greedy action tensor([ 1.5516, -0.6603, 0.3850, 1.3991]) tensor([0.4387, 0.0480, 0.1366, 0.3766]) -Greedy action tensor([ 0.9182, -1.6740, -0.0494, 1.3346]) tensor([0.3365, 0.0252, 0.1279, 0.5104]) -Greedy action tensor([ 0.3809, -1.1454, 0.3756, -0.0996]) tensor([0.3533, 0.0768, 0.3514, 0.2185]) -Greedy action tensor([ 1.5199, 1.0172, 0.7889, -0.4340]) tensor([0.4488, 0.2715, 0.2161, 0.0636]) -Greedy action tensor([1.0188, 0.8731, 1.0581, 0.2845]) tensor([0.2955, 0.2554, 0.3073, 0.1418]) -Greedy action tensor([-1.7851, -0.4595, 0.7902, 0.2190]) tensor([0.0395, 0.1487, 0.5188, 0.2930]) -Greedy action tensor([-1.6747, -0.2672, 0.5010, -0.0426]) tensor([0.0526, 0.2149, 0.4634, 0.2691]) -Greedy action tensor([-1.8549, -0.2680, 0.6133, -0.1088]) tensor([0.0427, 0.2087, 0.5038, 0.2447]) -Greedy action tensor([-1.8816, -0.4528, 0.6405, -0.1487]) tensor([0.0429, 0.1792, 0.5349, 0.2430]) -Greedy action tensor([-1.6651, -0.4947, 0.5128, 0.0393]) tensor([0.0539, 0.1738, 0.4759, 0.2964]) -Greedy action tensor([-1.3965, -0.4081, 0.4084, -0.0075]) tensor([0.0726, 0.1950, 0.4413, 0.2911]) -Greedy action tensor([-1.9019, -0.4056, 0.6441, -0.1470]) tensor([0.0417, 0.1860, 0.5314, 0.2409]) -Greedy action tensor([-1.9288, -0.3921, 0.6511, -0.1674]) tensor([0.0405, 0.1885, 0.5350, 0.2360]) -Greedy action tensor([-1.8328, -0.4342, 0.6070, -0.1413]) tensor([0.0456, 0.1845, 0.5226, 0.2473]) -Greedy action tensor([-1.8034, -0.5097, 0.6047, -0.0983]) tensor([0.0470, 0.1715, 0.5227, 0.2588]) -Greedy action tensor([-1.8375, -0.2796, 0.5806, -0.1035]) tensor([0.0442, 0.2098, 0.4959, 0.2502]) -Greedy action tensor([-1.6967, -0.3916, 0.8827, 0.3420]) tensor([0.0391, 0.1443, 0.5161, 0.3005]) -Greedy action tensor([-0.6041, 0.2676, 0.0828, 0.8377]) tensor([0.1041, 0.2489, 0.2069, 0.4401]) -Greedy action tensor([-1.8566, -0.4437, 0.6187, -0.1354]) tensor([0.0443, 0.1819, 0.5263, 0.2476]) -Greedy action tensor([-1.9242, -0.4506, 0.6735, -0.1593]) tensor([0.0406, 0.1772, 0.5452, 0.2371]) -Greedy action tensor([-1.9050, -0.4200, 0.6437, -0.1625]) tensor([0.0418, 0.1846, 0.5348, 0.2388]) -Greedy action tensor([-0.3779, 0.1217, 0.5360, 1.1584]) tensor([0.1022, 0.1684, 0.2548, 0.4747]) -Greedy action tensor([-1.8957, -0.4485, 0.6439, -0.1547]) tensor([0.0423, 0.1799, 0.5364, 0.2414]) -Greedy action tensor([-1.8365, -0.3657, 0.6025, -0.1109]) tensor([0.0446, 0.1941, 0.5110, 0.2504]) -Greedy action tensor([-1.8017, -0.3081, 0.5795, -0.1034]) tensor([0.0460, 0.2049, 0.4977, 0.2514]) -Greedy action tensor([-1.8844, -0.2440, 0.6082, -0.1337]) tensor([0.0417, 0.2148, 0.5037, 0.2399]) -Greedy action tensor([-1.4590, -0.7470, 0.3622, 0.0920]) tensor([0.0718, 0.1463, 0.4435, 0.3385]) -Greedy action tensor([-1.8749, -0.4572, 0.6299, -0.1440]) tensor([0.0434, 0.1793, 0.5319, 0.2453]) -Greedy action tensor([-1.6244, -0.0319, 0.4476, -0.2681]) tensor([0.0564, 0.2771, 0.4476, 0.2188]) -Greedy action tensor([-1.8397, -0.3095, 0.6045, -0.1014]) tensor([0.0438, 0.2023, 0.5047, 0.2492]) -Greedy action tensor([-1.1125, 0.8624, 0.1510, 0.1994]) tensor([0.0647, 0.4662, 0.2289, 0.2402]) -Greedy action tensor([-1.6355, 0.0701, 0.4630, 0.1859]) tensor([0.0480, 0.2641, 0.3913, 0.2966]) -Greedy action tensor([-1.8942, -0.3636, 0.6462, -0.1205]) tensor([0.0413, 0.1910, 0.5242, 0.2435]) -Greedy action tensor([-1.9278, -0.4463, 0.6569, -0.1732]) tensor([0.0409, 0.1800, 0.5425, 0.2365]) -Greedy action tensor([-1.9092, -0.4398, 0.6482, -0.1647]) tensor([0.0417, 0.1813, 0.5382, 0.2387]) -Greedy action tensor([-1.6862, -0.5092, 0.5656, 0.0893]) tensor([0.0509, 0.1651, 0.4836, 0.3004]) -Greedy action tensor([-1.2460, -0.5791, 0.6271, 0.7822]) tensor([0.0586, 0.1142, 0.3816, 0.4456]) -Greedy action tensor([-1.8185, -0.4932, 0.5934, -0.1137]) tensor([0.0467, 0.1757, 0.5208, 0.2568]) -Greedy action tensor([-1.7204, -0.3579, 0.5644, -0.0415]) tensor([0.0498, 0.1944, 0.4890, 0.2668]) -Greedy action tensor([-1.8005, -0.4977, 0.6021, -0.0849]) tensor([0.0470, 0.1728, 0.5191, 0.2611]) -Greedy action tensor([-1.5585, -0.4890, 0.4628, -0.0066]) tensor([0.0618, 0.1801, 0.4664, 0.2917]) -Greedy action tensor([-1.9018, -0.3315, 0.6297, -0.1575]) tensor([0.0415, 0.1995, 0.5216, 0.2374]) -Greedy action tensor([-1.1944, 0.0183, 0.4680, 0.3856]) tensor([0.0690, 0.2321, 0.3639, 0.3351]) -Greedy action tensor([-1.9098, -0.4423, 0.6469, -0.1681]) tensor([0.0418, 0.1812, 0.5386, 0.2384]) -Greedy action tensor([-1.8506, -0.3193, 0.6486, -0.1012]) tensor([0.0425, 0.1964, 0.5169, 0.2442]) -Greedy action tensor([-1.8989, -0.3510, 0.6350, -0.1394]) tensor([0.0415, 0.1950, 0.5226, 0.2409]) -Greedy action tensor([-1.8443, -0.8645, 0.3421, 0.0377]) tensor([0.0523, 0.1392, 0.4653, 0.3432]) -Greedy action tensor([-1.8118, -0.4675, 0.5847, -0.0996]) tensor([0.0468, 0.1796, 0.5142, 0.2594]) -Greedy action tensor([-1.5540, -0.0204, 0.4709, -0.1224]) tensor([0.0575, 0.2664, 0.4355, 0.2406]) -Greedy action tensor([-1.5953, -0.4513, 0.7142, 0.3347]) tensor([0.0474, 0.1488, 0.4773, 0.3265]) -Greedy action tensor([-1.7623, -0.2439, 0.5506, -0.0416]) tensor([0.0470, 0.2147, 0.4753, 0.2629]) -Greedy action tensor([-1.8870, -0.4693, 0.6473, -0.1310]) tensor([0.0425, 0.1755, 0.5359, 0.2461]) -Greedy action tensor([-0.2115, -0.1813, 1.0198, 1.7112]) tensor([0.0813, 0.0838, 0.2786, 0.5562]) -Greedy action tensor([-1.0004, -0.5880, 0.2610, 0.1477]) tensor([0.1088, 0.1643, 0.3840, 0.3429]) -Greedy action tensor([-0.9065, -0.2520, 0.2519, -0.0680]) tensor([0.1187, 0.2285, 0.3782, 0.2746]) -Greedy action tensor([-1.9307, -0.4481, 0.6715, -0.1714]) tensor([0.0405, 0.1783, 0.5462, 0.2351]) -Greedy action tensor([-1.8049, -0.4469, 0.5928, -0.1437]) tensor([0.0473, 0.1838, 0.5200, 0.2489]) -Greedy action tensor([-1.8996, -0.4615, 0.7081, -0.0947]) tensor([0.0402, 0.1695, 0.5458, 0.2446]) -Greedy action tensor([-1.7682, -0.3838, 0.6639, -0.0296]) tensor([0.0453, 0.1809, 0.5159, 0.2579]) -Greedy action tensor([-1.4679, -0.4757, 0.5817, 0.2881]) tensor([0.0580, 0.1563, 0.4501, 0.3356]) -Greedy action tensor([-1.7999, -0.2206, 0.6046, -0.0328]) tensor([0.0439, 0.2130, 0.4861, 0.2570]) -Greedy action tensor([-1.8487, -0.4708, 0.6201, -0.1508]) tensor([0.0450, 0.1784, 0.5310, 0.2456]) -Greedy action tensor([-1.9288, -0.4404, 0.6570, -0.1733]) tensor([0.0408, 0.1809, 0.5420, 0.2363]) -Greedy action tensor([-1.8115, -0.2916, 0.5843, -0.0804]) tensor([0.0451, 0.2060, 0.4946, 0.2544]) -Greedy action tensor([-1.6596, -0.4773, 0.5341, -0.0650]) tensor([0.0551, 0.1796, 0.4939, 0.2713]) -Greedy action tensor([-1.9427, -0.4471, 0.6667, -0.1796]) tensor([0.0402, 0.1793, 0.5462, 0.2343]) -Greedy action tensor([-1.7226, 0.0154, 0.4858, -0.0137]) tensor([0.0469, 0.2668, 0.4271, 0.2592]) -Greedy action tensor([-1.2288, 0.3164, 0.4124, 0.2326]) tensor([0.0660, 0.3092, 0.3404, 0.2844]) -Greedy action tensor([-1.8459, -0.3350, 0.6049, -0.1167]) tensor([0.0439, 0.1990, 0.5095, 0.2476]) -Greedy action tensor([-1.0000, -0.1801, 0.2047, -0.1008]) tensor([0.1103, 0.2505, 0.3680, 0.2712]) -Greedy action tensor([-0.5179, 0.0966, 0.1301, 0.0817]) tensor([0.1519, 0.2809, 0.2905, 0.2767]) -Greedy action tensor([-1.1185, -0.6350, 0.4021, 0.0596]) tensor([0.0957, 0.1553, 0.4380, 0.3110]) -Greedy action tensor([-1.5674, 0.1013, 0.3847, 0.0975]) tensor([0.0537, 0.2847, 0.3780, 0.2836]) -Greedy action tensor([-1.3863, -0.5557, 0.5475, 0.5048]) tensor([0.0594, 0.1363, 0.4107, 0.3936]) -Greedy action tensor([-1.8820, -0.3736, 0.6332, -0.1419]) tensor([0.0424, 0.1916, 0.5244, 0.2416]) -Greedy action tensor([-1.9063, -0.4422, 0.6476, -0.1624]) tensor([0.0418, 0.1809, 0.5379, 0.2393]) -Greedy action tensor([-1.4541, -0.5362, 0.4693, 0.1831]) tensor([0.0646, 0.1617, 0.4419, 0.3319]) -Greedy action tensor([-1.6755, -0.2908, 0.5063, -0.0726]) tensor([0.0531, 0.2122, 0.4708, 0.2639]) -Greedy action tensor([-1.8790, -0.3725, 0.6521, -0.2159]) tensor([0.0428, 0.1932, 0.5381, 0.2259]) -Greedy action tensor([-1.8851, -0.4599, 0.7347, 0.0115]) tensor([0.0391, 0.1627, 0.5374, 0.2607]) -Greedy action tensor([-0.8424, 0.8768, 0.1624, -0.0283]) tensor([0.0864, 0.4824, 0.2361, 0.1951]) -Greedy action tensor([-1.9368, -0.4505, 0.6620, -0.1760]) tensor([0.0405, 0.1791, 0.5448, 0.2356]) -Greedy action tensor([-1.4702, -0.0434, 0.5190, 0.4725]) tensor([0.0514, 0.2141, 0.3758, 0.3587]) -Greedy action tensor([-1.6538, 0.1244, 0.5602, 0.1718]) tensor([0.0449, 0.2657, 0.4108, 0.2786]) -Greedy action tensor([-1.8162, -0.3768, 0.5918, -0.1043]) tensor([0.0457, 0.1929, 0.5081, 0.2533]) -Greedy action tensor([-1.9042, -0.4638, 0.6440, -0.1625]) tensor([0.0422, 0.1781, 0.5391, 0.2407]) -Greedy action tensor([ 0.6937, -0.2340, 0.0645, -0.4733]) tensor([0.4465, 0.1766, 0.2380, 0.1390]) -Greedy action tensor([ 0.9450, -0.7712, -0.1284, -0.7500]) tensor([0.5864, 0.1054, 0.2005, 0.1077]) -Greedy action tensor([ 0.5390, -0.0534, 0.1402, -0.3159]) tensor([0.3774, 0.2087, 0.2533, 0.1605]) -Greedy action tensor([ 0.5678, -0.1291, -0.1290, -0.3907]) tensor([0.4202, 0.2093, 0.2093, 0.1611]) -Greedy action tensor([ 0.4922, -0.2736, -0.0773, -0.2149]) tensor([0.3962, 0.1842, 0.2242, 0.1954]) -Greedy action tensor([ 0.2581, 0.0367, 0.0389, -0.3670]) tensor([0.3185, 0.2552, 0.2558, 0.1705]) -Greedy action tensor([ 0.7183, -0.2583, -0.0105, -0.1094]) tensor([0.4355, 0.1640, 0.2101, 0.1903]) -Greedy action tensor([ 0.4738, 0.1031, 0.0116, -0.2547]) tensor([0.3568, 0.2463, 0.2247, 0.1722]) -Greedy action tensor([ 0.8801, -0.8170, 0.0582, -0.2977]) tensor([0.5179, 0.0949, 0.2277, 0.1595]) -Greedy action tensor([ 1.0125, -0.7145, 0.1441, -0.6607]) tensor([0.5602, 0.0996, 0.2351, 0.1051]) -Greedy action tensor([ 0.6651, -0.5896, -0.1698, -0.4507]) tensor([0.4886, 0.1393, 0.2120, 0.1601]) -Greedy action tensor([ 0.5938, -0.1313, -0.0944, -0.0267]) tensor([0.3961, 0.1918, 0.1990, 0.2130]) -Greedy action tensor([ 0.3560, -0.2778, 0.1320, -0.3318]) tensor([0.3530, 0.1873, 0.2822, 0.1775]) -Greedy action tensor([ 0.6782, -0.5228, -0.1355, -0.3839]) tensor([0.4785, 0.1440, 0.2121, 0.1654]) -Greedy action tensor([ 0.8573, -0.5886, -0.0250, -0.6967]) tensor([0.5374, 0.1266, 0.2224, 0.1136]) -Greedy action tensor([ 0.1533, 0.4339, -0.0680, 0.0263]) tensor([0.2496, 0.3305, 0.2001, 0.2198]) -Greedy action tensor([ 0.8806, -0.7006, -0.0329, -0.1976]) tensor([0.5136, 0.1057, 0.2060, 0.1747]) -Greedy action tensor([ 1.0615, -0.7911, 0.1390, -0.4692]) tensor([0.5647, 0.0886, 0.2245, 0.1222]) -Greedy action tensor([ 0.5267, -0.2493, 0.0986, -0.4632]) tensor([0.4027, 0.1853, 0.2624, 0.1496]) -Greedy action tensor([ 0.5459, -0.1987, -0.1379, -0.1812]) tensor([0.4060, 0.1928, 0.2049, 0.1962]) -Greedy action tensor([ 0.8155, -0.3624, 0.1848, -0.7739]) tensor([0.4892, 0.1506, 0.2604, 0.0998]) -Greedy action tensor([ 0.6519, -0.3853, -0.0022, -0.2271]) tensor([0.4368, 0.1548, 0.2271, 0.1813]) -Greedy action tensor([ 0.7722, -0.3397, -0.0466, -0.3150]) tensor([0.4746, 0.1561, 0.2093, 0.1600]) -Greedy action tensor([ 0.6376, -0.3706, 0.0801, -0.4119]) tensor([0.4371, 0.1595, 0.2503, 0.1530]) -Greedy action tensor([ 0.2744, -0.0588, -0.2455, -0.0619]) tensor([0.3305, 0.2369, 0.1965, 0.2361]) -Greedy action tensor([ 0.6183, -0.4536, 0.0531, -0.2518]) tensor([0.4293, 0.1470, 0.2439, 0.1798]) -Greedy action tensor([ 0.6665, -0.4786, -0.1056, -0.2635]) tensor([0.4598, 0.1463, 0.2125, 0.1814]) -Greedy action tensor([ 0.4813, -0.1317, -0.0626, -0.4466]) tensor([0.3972, 0.2152, 0.2306, 0.1571]) -Greedy action tensor([ 0.6724, -0.1823, -0.1944, -0.2714]) tensor([0.4475, 0.1904, 0.1881, 0.1741]) -Greedy action tensor([ 0.3130, 0.0880, -0.0817, -0.2326]) tensor([0.3277, 0.2617, 0.2208, 0.1899]) -Greedy action tensor([ 0.8417, -0.7788, 0.0162, -0.4239]) tensor([0.5214, 0.1031, 0.2284, 0.1471]) -Greedy action tensor([ 0.2545, 0.5035, -0.0384, 0.1149]) tensor([0.2565, 0.3290, 0.1914, 0.2231]) -Greedy action tensor([ 0.5076, 0.1387, -0.0552, -0.1065]) tensor([0.3569, 0.2468, 0.2033, 0.1931]) -Greedy action tensor([ 1.3032, -1.4425, -0.0387, -0.4173]) tensor([0.6647, 0.0427, 0.1737, 0.1190]) -Greedy action tensor([ 0.6807, -0.3735, 0.1608, -0.5600]) tensor([0.4480, 0.1561, 0.2664, 0.1295]) -Greedy action tensor([ 0.7448, -0.4777, -0.1301, -0.2803]) tensor([0.4831, 0.1423, 0.2014, 0.1733]) -Greedy action tensor([ 0.8415, -0.4620, 0.0850, -0.0453]) tensor([0.4645, 0.1262, 0.2180, 0.1914]) -Greedy action tensor([ 0.4178, 0.2394, -0.0753, -0.2632]) tensor([0.3386, 0.2833, 0.2068, 0.1714]) -Greedy action tensor([ 0.0739, -0.0442, -0.1200, -0.1155]) tensor([0.2825, 0.2510, 0.2327, 0.2338]) -Greedy action tensor([ 0.5258, -0.1776, 0.0290, -0.6005]) tensor([0.4119, 0.2039, 0.2507, 0.1336]) -Greedy action tensor([ 0.6053, -0.4821, -0.1240, -0.2349]) tensor([0.4443, 0.1498, 0.2142, 0.1918]) -Greedy action tensor([ 1.1127, -0.7183, 0.0196, -0.6032]) tensor([0.5969, 0.0957, 0.2001, 0.1073]) -Greedy action tensor([ 0.9233, -0.5577, -0.1892, -0.6164]) tensor([0.5648, 0.1284, 0.1857, 0.1211]) -Greedy action tensor([ 0.8444, -0.5532, -0.1128, -0.4217]) tensor([0.5227, 0.1292, 0.2007, 0.1474]) -Greedy action tensor([ 0.1757, 0.1761, -0.1811, -0.0676]) tensor([0.2870, 0.2871, 0.2009, 0.2250]) -Greedy action tensor([ 1.0327, -0.5161, 0.1595, -0.2522]) tensor([0.5244, 0.1115, 0.2190, 0.1451]) -Greedy action tensor([ 0.3112, 0.1424, 0.0570, -0.6866]) tensor([0.3346, 0.2826, 0.2595, 0.1234]) -Greedy action tensor([ 0.5819, -0.0266, -0.0472, -0.3403]) tensor([0.4041, 0.2199, 0.2154, 0.1607]) -Greedy action tensor([ 0.4800, -0.3002, -0.1782, -0.5897]) tensor([0.4312, 0.1976, 0.2233, 0.1479]) -Greedy action tensor([ 0.6944, -0.8337, -0.1970, -0.4976]) tensor([0.5179, 0.1124, 0.2124, 0.1573]) -Greedy action tensor([ 1.0728, -0.7723, -0.0595, -0.7738]) tensor([0.6105, 0.0965, 0.1967, 0.0963]) -Greedy action tensor([ 0.4372, -0.1812, -0.0977, -0.0965]) tensor([0.3689, 0.1988, 0.2160, 0.2163]) -Greedy action tensor([ 0.7463, -0.4840, -0.1459, -0.5863]) tensor([0.5087, 0.1486, 0.2084, 0.1342]) -Greedy action tensor([ 0.7487, -0.0828, 0.0261, 0.0892]) tensor([0.4102, 0.1786, 0.1991, 0.2121]) -Greedy action tensor([ 0.9012, -0.6198, -0.0400, -0.3409]) tensor([0.5270, 0.1151, 0.2056, 0.1522]) -Greedy action tensor([ 0.6507, -0.0616, -0.0769, -0.0114]) tensor([0.4017, 0.1971, 0.1941, 0.2072]) -Greedy action tensor([ 0.7820, -0.6328, -0.1189, -0.4735]) tensor([0.5171, 0.1256, 0.2100, 0.1473]) -Greedy action tensor([ 0.5917, -0.3726, -0.1489, -0.2314]) tensor([0.4353, 0.1660, 0.2076, 0.1911]) -Greedy action tensor([ 0.7200, -0.3941, -0.0541, -0.3278]) tensor([0.4673, 0.1534, 0.2155, 0.1639]) -Greedy action tensor([ 0.8168, -0.6673, -0.0351, -0.2764]) tensor([0.5029, 0.1140, 0.2145, 0.1685]) -Greedy action tensor([ 0.9369, -0.5995, -0.0160, -0.5024]) tensor([0.5441, 0.1171, 0.2098, 0.1290]) -Greedy action tensor([ 0.4064, 0.0636, -0.1772, -0.2306]) tensor([0.3576, 0.2538, 0.1995, 0.1891]) -Greedy action tensor([ 0.5846, -0.3602, -0.1439, -0.2080]) tensor([0.4303, 0.1673, 0.2077, 0.1948]) -Greedy action tensor([ 0.9413, -0.3085, 0.0823, -0.4448]) tensor([0.5102, 0.1462, 0.2161, 0.1276]) -Greedy action tensor([ 0.4102, 0.1980, -0.1183, 0.0295]) tensor([0.3245, 0.2624, 0.1913, 0.2218]) -Greedy action tensor([ 0.8702, -0.5356, -0.0839, -0.3003]) tensor([0.5153, 0.1263, 0.1985, 0.1599]) -Greedy action tensor([ 0.4190, -0.0712, -0.0301, -0.0395]) tensor([0.3469, 0.2125, 0.2214, 0.2193]) -Greedy action tensor([ 0.4201, -0.1763, -0.0361, -0.2945]) tensor([0.3740, 0.2060, 0.2370, 0.1830]) -Greedy action tensor([ 0.3338, -0.0939, -0.1251, -0.1309]) tensor([0.3434, 0.2239, 0.2170, 0.2157]) -Greedy action tensor([ 0.8071, -0.5483, 0.0587, -0.3924]) tensor([0.4920, 0.1269, 0.2328, 0.1483]) -Greedy action tensor([ 0.7264, -0.7842, -0.1169, -0.3137]) tensor([0.4989, 0.1101, 0.2147, 0.1763]) -Greedy action tensor([ 0.4760, -0.1152, 0.0707, -0.2037]) tensor([0.3667, 0.2030, 0.2445, 0.1858]) -Greedy action tensor([ 0.9838, -0.4622, -0.2934, -0.3786]) tensor([0.5649, 0.1330, 0.1575, 0.1446]) -Greedy action tensor([ 0.8950, -0.3680, -0.1338, -0.2119]) tensor([0.5074, 0.1435, 0.1814, 0.1677]) -Greedy action tensor([ 0.8030, -0.6681, -0.0125, -0.4654]) tensor([0.5119, 0.1176, 0.2265, 0.1440]) -Greedy action tensor([ 0.4052, -0.0745, -0.0348, -0.0711]) tensor([0.3467, 0.2146, 0.2233, 0.2154]) -Greedy action tensor([ 0.2741, 0.1275, -0.1589, -0.2961]) tensor([0.3249, 0.2806, 0.2107, 0.1837]) -Greedy action tensor([ 1.2325, -0.8551, -0.0823, -0.6880]) tensor([0.6497, 0.0806, 0.1745, 0.0952]) -Greedy action tensor([ 0.1922, 0.0159, -0.1184, -0.1871]) tensor([0.3072, 0.2575, 0.2251, 0.2102]) -Greedy action tensor([ 0.9001, -0.4236, -0.1395, -0.6387]) tensor([0.5451, 0.1451, 0.1928, 0.1170]) -Greedy action tensor([ 0.6588, -0.3159, -0.0774, -0.4021]) tensor([0.4540, 0.1713, 0.2175, 0.1572]) -Greedy action tensor([ 1.4134, -0.5667, -0.3940, 0.3566]) tensor([0.6062, 0.0837, 0.0995, 0.2107]) -Greedy action tensor([ 1.1905, -0.1279, -0.1791, 0.4291]) tensor([0.5028, 0.1345, 0.1278, 0.2348]) -Greedy action tensor([ 1.2718, -0.2348, -0.2762, 0.2190]) tensor([0.5608, 0.1243, 0.1193, 0.1957]) -Greedy action tensor([ 2.1935, -1.1136, -0.4349, 0.6778]) tensor([0.7527, 0.0276, 0.0543, 0.1653]) -Greedy action tensor([ 0.4870, -0.0239, -0.1687, 0.2046]) tensor([0.3481, 0.2088, 0.1807, 0.2624]) -Greedy action tensor([ 1.6967, -0.4402, -0.1923, 0.8836]) tensor([0.5839, 0.0689, 0.0883, 0.2589]) -Greedy action tensor([ 1.8152, -0.5015, -0.5737, 0.1344]) tensor([0.7264, 0.0716, 0.0666, 0.1353]) -Greedy action tensor([ 1.7255, -0.1450, -0.5961, 0.3960]) tensor([0.6593, 0.1016, 0.0647, 0.1744]) -Greedy action tensor([ 1.8718, -0.2542, -0.7398, 0.4210]) tensor([0.7007, 0.0836, 0.0514, 0.1642]) -Greedy action tensor([ 1.7842, 0.4659, -0.3185, 0.4293]) tensor([0.6069, 0.1624, 0.0741, 0.1566]) -Greedy action tensor([ 1.3323, -0.8110, -0.3975, 0.7473]) tensor([0.5400, 0.0633, 0.0958, 0.3009]) -Greedy action tensor([ 0.8743, -0.7633, -0.3468, 0.2638]) tensor([0.4920, 0.0957, 0.1451, 0.2672]) -Greedy action tensor([ 1.7946, -0.4367, -0.4863, 0.4720]) tensor([0.6775, 0.0728, 0.0692, 0.1805]) -Greedy action tensor([ 1.4555, 0.1077, -0.3761, 0.4838]) tensor([0.5561, 0.1445, 0.0891, 0.2104]) -Greedy action tensor([ 1.7066, -0.5814, -0.4446, 0.4670]) tensor([0.6634, 0.0673, 0.0772, 0.1921]) -Greedy action tensor([ 2.2486, -0.9730, -0.2033, 0.1479]) tensor([0.8010, 0.0320, 0.0690, 0.0980]) -Greedy action tensor([ 1.1728, -0.2932, -0.3777, 0.6193]) tensor([0.4956, 0.1144, 0.1051, 0.2849]) -Greedy action tensor([ 1.2017, -0.1667, -0.3001, 0.2819]) tensor([0.5331, 0.1357, 0.1187, 0.2125]) -Greedy action tensor([ 2.2083, -0.8469, 0.0666, 0.5748]) tensor([0.7354, 0.0346, 0.0864, 0.1436]) -Greedy action tensor([ 2.1838, -0.1531, -0.7030, 0.2946]) tensor([0.7671, 0.0741, 0.0428, 0.1160]) -Greedy action tensor([ 1.7090, -0.1254, -0.4369, 0.4671]) tensor([0.6388, 0.1020, 0.0747, 0.1845]) -Greedy action tensor([ 1.8696, -0.8290, -0.4414, 0.7881]) tensor([0.6642, 0.0447, 0.0659, 0.2252]) -Greedy action tensor([ 1.3193, -0.2014, -0.7548, 0.3177]) tensor([0.5843, 0.1277, 0.0734, 0.2146]) -Greedy action tensor([ 1.1151, -0.6031, -0.0282, 0.1292]) tensor([0.5344, 0.0959, 0.1704, 0.1994]) -Greedy action tensor([ 1.9443, -1.0239, -0.2674, 0.5772]) tensor([0.7063, 0.0363, 0.0774, 0.1800]) -Greedy action tensor([ 1.4788, -0.7107, -0.1934, 0.6025]) tensor([0.5827, 0.0652, 0.1095, 0.2426]) -Greedy action tensor([ 1.1157, -0.2304, -0.2348, 0.3076]) tensor([0.5089, 0.1324, 0.1319, 0.2268]) -Greedy action tensor([ 1.6087, -0.3999, -0.5409, 0.5754]) tensor([0.6225, 0.0835, 0.0725, 0.2215]) -Greedy action tensor([ 1.6354, -0.1674, -0.2778, 0.3078]) tensor([0.6339, 0.1045, 0.0936, 0.1681]) -Greedy action tensor([ 1.3543, -0.0026, -0.2564, 0.6098]) tensor([0.5176, 0.1332, 0.1034, 0.2458]) -Greedy action tensor([ 0.6312, -0.2459, -0.2028, 0.1037]) tensor([0.4098, 0.1705, 0.1780, 0.2418]) -Greedy action tensor([ 1.8697, -0.6449, -0.4359, 0.5535]) tensor([0.6902, 0.0558, 0.0688, 0.1851]) -Greedy action tensor([ 1.0310, -0.2842, -0.0796, 0.5524]) tensor([0.4510, 0.1210, 0.1485, 0.2794]) -Greedy action tensor([ 1.7152, -0.7185, -0.0691, 0.0601]) tensor([0.6912, 0.0606, 0.1161, 0.1321]) -Greedy action tensor([ 0.8912, -0.4153, -0.1127, -0.1061]) tensor([0.4985, 0.1350, 0.1827, 0.1839]) -Greedy action tensor([ 1.2582, 0.1028, -0.7221, 0.3356]) tensor([0.5404, 0.1702, 0.0746, 0.2148]) -Greedy action tensor([ 1.0302, -0.4797, -0.2012, 0.3183]) tensor([0.4991, 0.1103, 0.1457, 0.2449]) -Greedy action tensor([ 1.1937, -0.5668, 0.3503, 0.2417]) tensor([0.5030, 0.0865, 0.2164, 0.1941]) -Greedy action tensor([ 1.4648, -0.2626, -0.7877, 0.3943]) tensor([0.6151, 0.1093, 0.0647, 0.2109]) -Greedy action tensor([ 1.7733, -0.6005, -0.4293, 0.4790]) tensor([0.6767, 0.0630, 0.0748, 0.1855]) -Greedy action tensor([0.7306, 0.0787, 0.0702, 0.3507]) tensor([0.3674, 0.1914, 0.1898, 0.2513]) -Greedy action tensor([ 1.6195, -0.5494, -0.5321, 0.2933]) tensor([0.6684, 0.0764, 0.0777, 0.1775]) -Greedy action tensor([ 0.9526, -0.0875, -0.6723, 0.3207]) tensor([0.4803, 0.1698, 0.0946, 0.2553]) -Greedy action tensor([ 1.3962, -0.8355, -0.3314, 0.8989]) tensor([0.5282, 0.0567, 0.0939, 0.3212]) -Greedy action tensor([ 1.0696, -0.4872, -0.2507, 0.4065]) tensor([0.5017, 0.1058, 0.1340, 0.2585]) -Greedy action tensor([ 2.0857, -0.7479, -0.9715, 0.2785]) tensor([0.7874, 0.0463, 0.0370, 0.1292]) -Greedy action tensor([ 1.9054, -0.3586, -0.7391, 0.5421]) tensor([0.6989, 0.0726, 0.0497, 0.1788]) -Greedy action tensor([ 1.4837, -0.5882, -0.1436, 0.5396]) tensor([0.5843, 0.0736, 0.1148, 0.2273]) -Greedy action tensor([ 1.3157, -0.2978, -0.0334, 0.0812]) tensor([0.5715, 0.1138, 0.1483, 0.1663]) -Greedy action tensor([ 2.3803, -0.8424, -0.1243, 0.0041]) tensor([0.8234, 0.0328, 0.0673, 0.0765]) -Greedy action tensor([ 1.8543, -0.3638, -0.0914, 0.6330]) tensor([0.6466, 0.0704, 0.0924, 0.1906]) -Greedy action tensor([ 0.4311, -0.1552, -0.7225, 0.5285]) tensor([0.3362, 0.1871, 0.1061, 0.3706]) -Greedy action tensor([ 1.6155, -0.5654, -0.3128, 0.3622]) tensor([0.6477, 0.0732, 0.0942, 0.1850]) -Greedy action tensor([ 1.9655, -0.7106, -0.4117, 0.6055]) tensor([0.7051, 0.0485, 0.0654, 0.1810]) -Greedy action tensor([ 1.3683, -0.5772, 0.0096, 0.2281]) tensor([0.5815, 0.0831, 0.1494, 0.1859]) -Greedy action tensor([ 1.3509, -0.2594, -0.6614, 0.2091]) tensor([0.6051, 0.1209, 0.0809, 0.1932]) -Greedy action tensor([ 2.7548, -1.0444, -0.3840, 0.7324]) tensor([0.8347, 0.0187, 0.0362, 0.1105]) -Greedy action tensor([ 0.9681, -0.2716, -0.6086, 0.1766]) tensor([0.5130, 0.1485, 0.1060, 0.2325]) -Greedy action tensor([ 1.9680, -0.7486, -0.3427, 0.5250]) tensor([0.7135, 0.0472, 0.0708, 0.1685]) -Greedy action tensor([ 1.1449, -0.1998, -0.6506, 0.5796]) tensor([0.5013, 0.1306, 0.0832, 0.2848]) -Greedy action tensor([ 1.5217, -0.2667, -0.0390, 0.2483]) tensor([0.6035, 0.1009, 0.1267, 0.1689]) -Greedy action tensor([ 1.1902, -0.0067, -0.7117, 0.2913]) tensor([0.5381, 0.1626, 0.0803, 0.2190]) -Greedy action tensor([ 1.4818, -0.2146, -1.1268, 0.2095]) tensor([0.6506, 0.1193, 0.0479, 0.1823]) -Greedy action tensor([ 0.9082, -0.3063, -0.3663, 0.3399]) tensor([0.4667, 0.1385, 0.1305, 0.2644]) -Greedy action tensor([ 0.8709, -0.3610, -0.4392, 0.2822]) tensor([0.4725, 0.1378, 0.1275, 0.2622]) -Greedy action tensor([ 1.4787, -0.4591, -0.2645, 0.2606]) tensor([0.6193, 0.0892, 0.1083, 0.1832]) -Greedy action tensor([ 1.5473, -0.1491, -0.9347, 0.0439]) tensor([0.6715, 0.1231, 0.0561, 0.1493]) -Greedy action tensor([ 1.5195, -0.1856, -0.6430, 0.6837]) tensor([0.5779, 0.1050, 0.0665, 0.2505]) -Greedy action tensor([ 1.9794, -0.5808, -0.2686, 0.4621]) tensor([0.7132, 0.0551, 0.0753, 0.1564]) -Greedy action tensor([ 1.3324, -0.1306, -0.3877, 0.1152]) tensor([0.5859, 0.1357, 0.1049, 0.1735]) -Greedy action tensor([ 1.1588, 0.0117, -0.8506, 0.0952]) tensor([0.5565, 0.1767, 0.0746, 0.1921]) -Greedy action tensor([ 2.4434, -0.8355, -0.0481, 0.1627]) tensor([0.8179, 0.0308, 0.0677, 0.0836]) -Greedy action tensor([ 1.2458, 0.1277, 0.1917, -0.4643]) tensor([0.5387, 0.1761, 0.1878, 0.0974]) -Greedy action tensor([ 1.8067, 0.0836, -0.2087, 0.1336]) tensor([0.6669, 0.1191, 0.0889, 0.1252]) -Greedy action tensor([ 1.6614, -0.1673, -0.7565, 0.3700]) tensor([0.6559, 0.1054, 0.0584, 0.1803]) -Greedy action tensor([ 1.9835, -0.4944, -0.6915, 0.5886]) tensor([0.7139, 0.0599, 0.0492, 0.1770]) -Greedy action tensor([ 2.1974, -0.7971, -0.4969, 0.4101]) tensor([0.7782, 0.0390, 0.0526, 0.1303]) -Greedy action tensor([ 1.5059, -0.6195, -0.1912, 0.3030]) tensor([0.6239, 0.0745, 0.1143, 0.1874]) -Greedy action tensor([ 1.1210, -0.5377, -0.3255, 0.3395]) tensor([0.5309, 0.1011, 0.1250, 0.2430]) -Greedy action tensor([ 2.0778, -1.0177, -0.0309, 0.9574]) tensor([0.6699, 0.0303, 0.0813, 0.2185]) -Greedy action tensor([ 1.3044, -0.4309, -0.5592, 0.0441]) tensor([0.6192, 0.1092, 0.0960, 0.1756]) -Greedy action tensor([-0.1474, -0.7473, 0.8716, -0.4994]) tensor([0.1991, 0.1093, 0.5516, 0.1400]) -Greedy action tensor([ 1.2263, -0.2252, 0.6719, 0.9954]) tensor([0.3843, 0.0900, 0.2207, 0.3050]) -Greedy action tensor([ 0.6422, -1.4890, 0.2088, -0.5043]) tensor([0.4797, 0.0569, 0.3110, 0.1524]) -Greedy action tensor([ 0.4476, -0.2999, -0.4759, 0.0259]) tensor([0.3958, 0.1874, 0.1572, 0.2596]) -Greedy action tensor([ 0.9703, -0.2363, -0.5670, 0.5991]) tensor([0.4537, 0.1358, 0.0975, 0.3130]) -Greedy action tensor([-0.2766, -0.8529, -0.7512, 1.7056]) tensor([0.1059, 0.0595, 0.0659, 0.7687]) -Greedy action tensor([-0.2488, -1.6977, -0.2142, -0.6035]) tensor([0.3365, 0.0790, 0.3484, 0.2360]) -Greedy action tensor([0.7685, 0.2245, 0.6123, 1.2517]) tensor([0.2465, 0.1431, 0.2108, 0.3996]) -Greedy action tensor([0.2836, 0.5161, 0.5026, 0.2368]) tensor([0.2242, 0.2829, 0.2791, 0.2139]) -Greedy action tensor([-0.1002, -1.7712, 0.0300, -0.2182]) tensor([0.3110, 0.0585, 0.3542, 0.2764]) -Greedy action tensor([-0.2162, 0.3127, 0.0885, -0.4971]) tensor([0.2080, 0.3529, 0.2821, 0.1570]) -Greedy action tensor([ 1.9010, -0.8098, 0.0350, 1.5910]) tensor([0.5116, 0.0340, 0.0792, 0.3752]) -Greedy action tensor([ 0.7677, -1.9797, 0.5065, 0.5733]) tensor([0.3763, 0.0241, 0.2898, 0.3098]) -Greedy action tensor([ 0.5608, -0.4337, 0.1283, 0.4633]) tensor([0.3418, 0.1264, 0.2218, 0.3100]) -Greedy action tensor([0.6664, 0.6543, 0.5133, 0.2794]) tensor([0.2837, 0.2803, 0.2434, 0.1926]) -Greedy action tensor([ 0.8963, -0.7001, -0.3494, 0.5176]) tensor([0.4597, 0.0932, 0.1323, 0.3148]) -Greedy action tensor([ 1.4782, -1.2707, 1.1334, 0.1169]) tensor([0.4929, 0.0315, 0.3492, 0.1264]) -Greedy action tensor([ 0.1085, 0.9222, 0.1366, -0.0901]) tensor([0.1959, 0.4420, 0.2015, 0.1606]) -Greedy action tensor([ 0.1090, 0.4126, 0.7643, -0.0872]) tensor([0.1960, 0.2655, 0.3774, 0.1611]) -Greedy action tensor([-0.8060, -0.9005, -0.9101, -0.3906]) tensor([0.2312, 0.2103, 0.2083, 0.3502]) -Greedy action tensor([-0.0095, -0.4149, 0.1062, 0.1895]) tensor([0.2494, 0.1663, 0.2800, 0.3043]) -Greedy action tensor([-0.4867, 0.2875, 0.3766, -0.3132]) tensor([0.1486, 0.3223, 0.3523, 0.1768]) -Greedy action tensor([ 1.7031, -0.5046, -0.1022, 0.8116]) tensor([0.5937, 0.0653, 0.0976, 0.2434]) -Greedy action tensor([ 0.9285, -0.6894, -0.5446, 0.3630]) tensor([0.5011, 0.0994, 0.1149, 0.2847]) -Greedy action tensor([0.5791, 0.9842, 0.7703, 0.4435]) tensor([0.2182, 0.3272, 0.2641, 0.1905]) -Greedy action tensor([ 0.7073, -0.9600, 0.9600, 0.6939]) tensor([0.2888, 0.0545, 0.3718, 0.2849]) -Greedy action tensor([ 0.4230, -1.1436, 0.2686, 0.0248]) tensor([0.3653, 0.0763, 0.3131, 0.2453]) -Greedy action tensor([ 0.5160, -0.9444, -0.1699, 0.0125]) tensor([0.4273, 0.0992, 0.2152, 0.2583]) -Greedy action tensor([-1.1677, -1.2181, 0.2887, -0.4041]) tensor([0.1192, 0.1134, 0.5116, 0.2559]) -Greedy action tensor([ 0.7173, -0.4476, 1.1984, 0.0497]) tensor([0.2905, 0.0906, 0.4699, 0.1490]) -Greedy action tensor([-0.9093, -0.0550, 0.0067, 0.7046]) tensor([0.0920, 0.2161, 0.2299, 0.4620]) -Greedy action tensor([ 0.1494, -0.4527, -0.2320, 0.8571]) tensor([0.2348, 0.1286, 0.1603, 0.4764]) -Greedy action tensor([ 0.3715, -0.7079, 0.3188, 0.6781]) tensor([0.2742, 0.0932, 0.2601, 0.3725]) -Greedy action tensor([-1.5952, -0.4889, -0.3468, 0.0131]) tensor([0.0800, 0.2418, 0.2787, 0.3995]) -Greedy action tensor([ 0.1378, -0.5694, -0.1939, 0.5807]) tensor([0.2654, 0.1308, 0.1905, 0.4133]) -Greedy action tensor([-0.2143, 0.5778, -0.2745, -0.6370]) tensor([0.2081, 0.4595, 0.1960, 0.1364]) -Greedy action tensor([ 0.0460, -1.9152, -0.8147, 1.7087]) tensor([0.1463, 0.0206, 0.0619, 0.7713]) -Greedy action tensor([ 1.0681, -0.3950, 0.6878, 1.4681]) tensor([0.2935, 0.0680, 0.2007, 0.4379]) -Greedy action tensor([ 0.1495, 0.1793, 0.2106, -0.1377]) tensor([0.2602, 0.2680, 0.2766, 0.1952]) -Greedy action tensor([ 0.1048, -0.2466, 0.0588, -0.5700]) tensor([0.3157, 0.2221, 0.3015, 0.1607]) -Greedy action tensor([-0.6060, -0.6083, -0.2641, -0.3797]) tensor([0.2146, 0.2141, 0.3021, 0.2691]) -Greedy action tensor([0.7862, 0.3840, 0.2016, 0.5965]) tensor([0.3275, 0.2191, 0.1825, 0.2709]) -Greedy action tensor([-0.3064, -0.0151, -1.2926, 0.1472]) tensor([0.2334, 0.3123, 0.0870, 0.3673]) -Greedy action tensor([ 0.5249, 0.4786, 0.6214, -0.1102]) tensor([0.2789, 0.2662, 0.3071, 0.1478]) -Greedy action tensor([ 0.1990, 0.8560, 1.3760, -0.7201]) tensor([0.1521, 0.2935, 0.4937, 0.0607]) -Greedy action tensor([-0.9463, 1.0568, -1.1229, 0.2267]) tensor([0.0801, 0.5938, 0.0671, 0.2589]) -Greedy action tensor([-0.4104, -0.0022, -0.4730, -1.2336]) tensor([0.2576, 0.3874, 0.2419, 0.1131]) -Greedy action tensor([ 1.5322, 0.0677, 1.1222, -0.0788]) tensor([0.4774, 0.1104, 0.3169, 0.0953]) -Greedy action tensor([-0.1966, -0.0030, -0.5003, 0.2775]) tensor([0.2194, 0.2662, 0.1619, 0.3525]) -Greedy action tensor([-0.1893, 0.2328, -0.3943, -0.2654]) tensor([0.2344, 0.3575, 0.1909, 0.2172]) -Greedy action tensor([ 1.2853, -1.7777, -0.0248, 1.2716]) tensor([0.4342, 0.0203, 0.1172, 0.4283]) -Greedy action tensor([0.3026, 0.1420, 1.4114, 0.5611]) tensor([0.1619, 0.1379, 0.4906, 0.2096]) -Greedy action tensor([ 0.0782, -1.8854, -0.2669, 0.6799]) tensor([0.2722, 0.0382, 0.1928, 0.4968]) -Greedy action tensor([-0.0894, -1.9680, -0.1520, 0.7831]) tensor([0.2230, 0.0341, 0.2094, 0.5335]) -Greedy action tensor([ 1.0596, -0.1241, 1.1387, 1.2123]) tensor([0.2814, 0.0862, 0.3046, 0.3278]) -Greedy action tensor([-0.1450, -0.4265, -0.6975, 0.4861]) tensor([0.2375, 0.1793, 0.1367, 0.4465]) -Greedy action tensor([ 0.4722, 0.0347, 1.3802, -0.0668]) tensor([0.2124, 0.1371, 0.5266, 0.1239]) -Greedy action tensor([-1.1436, -0.8903, 0.6594, -0.5006]) tensor([0.0975, 0.1256, 0.5915, 0.1854]) -Greedy action tensor([ 0.7834, 0.5073, -0.7432, 0.1514]) tensor([0.3988, 0.3026, 0.0866, 0.2120]) -Greedy action tensor([ 1.5644, -1.0558, 1.2545, 1.0991]) tensor([0.4108, 0.0299, 0.3013, 0.2580]) -Greedy action tensor([-0.0828, -1.7860, 0.1372, 0.4373]) tensor([0.2433, 0.0443, 0.3031, 0.4093]) -Greedy action tensor([-0.6698, -0.2753, -0.2700, 0.7271]) tensor([0.1247, 0.1851, 0.1860, 0.5042]) -Greedy action tensor([ 1.6892, -0.3786, -0.2616, 1.2576]) tensor([0.5214, 0.0659, 0.0741, 0.3386]) -Greedy action tensor([ 0.1712, -0.4670, 0.6289, -0.9911]) tensor([0.2923, 0.1544, 0.4619, 0.0914]) -Greedy action tensor([ 0.3033, -0.6946, -1.0501, -0.2459]) tensor([0.4536, 0.1672, 0.1172, 0.2619]) -Greedy action tensor([-0.5396, -1.1549, -0.8753, 1.3241]) tensor([0.1149, 0.0621, 0.0821, 0.7409]) -Greedy action tensor([ 0.4623, -2.2412, 0.4257, 0.0890]) tensor([0.3677, 0.0246, 0.3545, 0.2532]) -Greedy action tensor([ 1.3190, -0.2622, 0.9914, 0.7888]) tensor([0.3976, 0.0818, 0.2866, 0.2340]) -Greedy action tensor([ 0.2233, -1.5931, 0.5629, 0.3450]) tensor([0.2705, 0.0440, 0.3799, 0.3055]) -Greedy action tensor([-0.0194, -1.0928, 0.2051, 0.7798]) tensor([0.2076, 0.0710, 0.2598, 0.4616]) -Greedy action tensor([-0.7559, -0.3267, -0.5516, 0.6422]) tensor([0.1280, 0.1967, 0.1571, 0.5182]) -Greedy action tensor([-0.2896, -2.1599, -0.1286, -0.2925]) tensor([0.3007, 0.0463, 0.3532, 0.2998]) -Greedy action tensor([ 0.3740, -0.0249, -0.2986, 1.6904]) tensor([0.1692, 0.1135, 0.0863, 0.6310]) -Greedy action tensor([ 0.0628, -1.2461, 1.3612, -0.2654]) tensor([0.1769, 0.0478, 0.6480, 0.1274]) -Greedy action tensor([ 1.6040, -0.4432, 1.2818, 0.8436]) tensor([0.4308, 0.0556, 0.3121, 0.2014]) -Greedy action tensor([ 0.5539, 0.9155, 0.2268, -0.6006]) tensor([0.2880, 0.4135, 0.2077, 0.0908]) -Greedy action tensor([ 1.4223, -0.2114, -0.7971, -0.1362]) tensor([0.6604, 0.1289, 0.0718, 0.1390]) -Greedy action tensor([-0.2401, -0.8295, -0.1335, 0.1758]) tensor([0.2391, 0.1326, 0.2660, 0.3624]) -Greedy action tensor([ 1.4849, -1.5947, 1.2531, 0.8643]) tensor([0.4207, 0.0193, 0.3337, 0.2262]) -Greedy action tensor([-0.5830, -0.0357, 1.2760, -1.0395]) tensor([0.1023, 0.1768, 0.6562, 0.0648]) -Greedy action tensor([ 1.6307, -0.4176, -0.4404, -0.2103]) tensor([0.7074, 0.0912, 0.0892, 0.1122]) -Greedy action tensor([ 0.3410, -1.9228, 0.2180, 0.5100]) tensor([0.3152, 0.0328, 0.2787, 0.3733]) -Greedy action tensor([ 0.7428, -0.2269, -0.0302, -0.0584]) tensor([0.4368, 0.1656, 0.2016, 0.1960]) -Greedy action tensor([ 1.1083, -0.5478, 0.1107, -0.4963]) tensor([0.5680, 0.1084, 0.2095, 0.1142]) -Greedy action tensor([ 0.4569, 0.0203, -0.1389, -0.2353]) tensor([0.3707, 0.2395, 0.2043, 0.1855]) -Greedy action tensor([ 0.6000, -0.2999, -0.0994, -0.1232]) tensor([0.4186, 0.1702, 0.2080, 0.2031]) -Greedy action tensor([ 1.0426, -0.3935, -0.0758, -0.1114]) tensor([0.5319, 0.1265, 0.1738, 0.1677]) -Greedy action tensor([ 1.0123, -0.9649, 0.0530, -0.6282]) tensor([0.5829, 0.0807, 0.2234, 0.1130]) -Greedy action tensor([ 0.8359, -0.4953, -0.0855, -0.2840]) tensor([0.5029, 0.1329, 0.2001, 0.1641]) -Greedy action tensor([ 0.8418, -0.3505, 0.0260, -0.6907]) tensor([0.5097, 0.1547, 0.2255, 0.1101]) -Greedy action tensor([ 0.7651, -0.3927, -0.0701, -0.0391]) tensor([0.4555, 0.1431, 0.1976, 0.2038]) -Greedy action tensor([ 0.6963, -0.5860, -0.0048, -0.6135]) tensor([0.4894, 0.1358, 0.2428, 0.1321]) -Greedy action tensor([ 0.9793, -0.7295, 0.1139, -0.4303]) tensor([0.5416, 0.0981, 0.2280, 0.1323]) -Greedy action tensor([ 0.5361, -0.0150, -0.1526, -0.0833]) tensor([0.3821, 0.2202, 0.1919, 0.2057]) -Greedy action tensor([ 0.5246, -0.3095, -0.0919, -0.2814]) tensor([0.4131, 0.1794, 0.2230, 0.1845]) -Greedy action tensor([ 0.2744, -0.1434, -0.0561, -0.3273]) tensor([0.3419, 0.2251, 0.2457, 0.1873]) -Greedy action tensor([ 0.5406, -0.3937, -0.1336, -0.2466]) tensor([0.4242, 0.1666, 0.2161, 0.1931]) -Greedy action tensor([ 0.7925, -0.5803, 0.1699, -0.2162]) tensor([0.4641, 0.1176, 0.2490, 0.1693]) -Greedy action tensor([ 0.4886, 0.0011, -0.1281, -0.2989]) tensor([0.3833, 0.2354, 0.2069, 0.1744]) -Greedy action tensor([ 0.5597, -0.3535, 0.0066, -0.3096]) tensor([0.4174, 0.1675, 0.2401, 0.1750]) -Greedy action tensor([ 0.7938, -0.3602, -0.3051, -0.3053]) tensor([0.5046, 0.1591, 0.1682, 0.1681]) -Greedy action tensor([ 0.5935, -0.5642, -0.1478, -0.2505]) tensor([0.4503, 0.1415, 0.2146, 0.1936]) -Greedy action tensor([ 0.6514, -0.3593, 0.1250, -0.4074]) tensor([0.4345, 0.1581, 0.2567, 0.1507]) -Greedy action tensor([ 0.8575, -0.4109, -0.2457, -0.2225]) tensor([0.5121, 0.1440, 0.1699, 0.1739]) -Greedy action tensor([ 0.3994, -0.1836, -0.0570, -0.2236]) tensor([0.3666, 0.2046, 0.2322, 0.1966]) -Greedy action tensor([ 0.9014, -0.9650, 0.0024, -0.3029]) tensor([0.5372, 0.0831, 0.2186, 0.1611]) -Greedy action tensor([ 1.2733, -0.7174, -0.2488, -0.4372]) tensor([0.6512, 0.0890, 0.1421, 0.1177]) -Greedy action tensor([ 0.8219, -0.5123, -0.0502, -0.3395]) tensor([0.5014, 0.1320, 0.2096, 0.1570]) -Greedy action tensor([ 0.4099, -0.0269, 0.0715, -0.4036]) tensor([0.3569, 0.2306, 0.2544, 0.1582]) -Greedy action tensor([ 0.9997, -0.8301, -0.0227, -0.6296]) tensor([0.5827, 0.0935, 0.2096, 0.1142]) -Greedy action tensor([ 0.7282, -0.3671, -0.0301, -0.3138]) tensor([0.4639, 0.1552, 0.2173, 0.1636]) -Greedy action tensor([ 1.0304, -0.8701, 0.0493, -0.5181]) tensor([0.5757, 0.0861, 0.2158, 0.1224]) -Greedy action tensor([ 0.2758, -0.0154, -0.1405, -0.2017]) tensor([0.3303, 0.2469, 0.2179, 0.2049]) -Greedy action tensor([ 0.6216, -0.1619, 0.0548, -0.4062]) tensor([0.4198, 0.1918, 0.2382, 0.1502]) -Greedy action tensor([ 0.5634, -0.2447, -0.0187, -0.1497]) tensor([0.4009, 0.1787, 0.2240, 0.1965]) -Greedy action tensor([ 0.9200, -0.5226, 0.0802, -0.5026]) tensor([0.5238, 0.1238, 0.2262, 0.1263]) -Greedy action tensor([ 0.3735, 0.1743, -0.1567, 0.1883]) tensor([0.3088, 0.2530, 0.1817, 0.2566]) -Greedy action tensor([ 0.8948, -0.7603, -0.0532, -0.4516]) tensor([0.5438, 0.1039, 0.2107, 0.1415]) -Greedy action tensor([ 0.7608, -0.4502, -0.0467, -0.5730]) tensor([0.4982, 0.1484, 0.2222, 0.1313]) -Greedy action tensor([ 0.5186, -0.1043, 0.2237, -0.5046]) tensor([0.3787, 0.2031, 0.2820, 0.1361]) -Greedy action tensor([ 1.0210, -0.4316, -0.0908, -0.4676]) tensor([0.5591, 0.1308, 0.1839, 0.1262]) -Greedy action tensor([ 0.7863, -0.0835, 0.0123, -0.2184]) tensor([0.4452, 0.1865, 0.2053, 0.1630]) -Greedy action tensor([ 0.9108, -0.8773, 0.1169, -0.5023]) tensor([0.5369, 0.0898, 0.2427, 0.1307]) -Greedy action tensor([ 0.9706, -0.6788, -0.0032, -0.5578]) tensor([0.5597, 0.1076, 0.2114, 0.1214]) -Greedy action tensor([ 0.9209, -0.6356, -0.2600, -0.5049]) tensor([0.5688, 0.1199, 0.1746, 0.1367]) -Greedy action tensor([ 0.8650, -0.5355, -0.0554, -0.6554]) tensor([0.5366, 0.1323, 0.2138, 0.1173]) -Greedy action tensor([ 0.5559, -0.3373, 0.0142, -0.2956]) tensor([0.4136, 0.1693, 0.2406, 0.1765]) -Greedy action tensor([ 0.4501, 0.1811, -0.3150, -0.3234]) tensor([0.3716, 0.2840, 0.1729, 0.1715]) -Greedy action tensor([ 0.6650, -0.5165, 0.0178, -0.4231]) tensor([0.4614, 0.1416, 0.2416, 0.1554]) -Greedy action tensor([ 0.8276, -0.6609, -0.0988, -0.3914]) tensor([0.5216, 0.1177, 0.2065, 0.1541]) -Greedy action tensor([ 0.5410, -0.3583, -0.1251, -0.1385]) tensor([0.4120, 0.1676, 0.2116, 0.2088]) -Greedy action tensor([ 0.4168, -0.1639, 0.0017, -0.2559]) tensor([0.3663, 0.2049, 0.2418, 0.1869]) -Greedy action tensor([ 0.8466, -0.6689, -0.0779, -0.5792]) tensor([0.5386, 0.1183, 0.2137, 0.1294]) -Greedy action tensor([ 0.2941, -0.4094, -0.1639, -0.1510]) tensor([0.3613, 0.1788, 0.2285, 0.2315]) -Greedy action tensor([ 0.6037, -0.1439, -0.0975, -0.0516]) tensor([0.4018, 0.1903, 0.1993, 0.2087]) -Greedy action tensor([ 0.3081, 0.0812, -0.0981, -0.2838]) tensor([0.3315, 0.2642, 0.2209, 0.1834]) -Greedy action tensor([ 0.6448, -0.4500, 0.1013, -0.4194]) tensor([0.4424, 0.1480, 0.2569, 0.1526]) -Greedy action tensor([ 0.2951, 0.0585, -0.0993, -0.2660]) tensor([0.3296, 0.2602, 0.2222, 0.1881]) -Greedy action tensor([ 0.7689, -0.1987, -0.1395, -0.0309]) tensor([0.4479, 0.1702, 0.1806, 0.2013]) -Greedy action tensor([ 0.9110, -0.5460, 0.0041, -0.4406]) tensor([0.5276, 0.1229, 0.2130, 0.1365]) -Greedy action tensor([ 0.6734, -0.2856, -0.2602, -0.4850]) tensor([0.4784, 0.1833, 0.1881, 0.1502]) -Greedy action tensor([ 0.5662, 0.1382, -0.0236, -0.2590]) tensor([0.3782, 0.2465, 0.2097, 0.1657]) -Greedy action tensor([ 0.6162, -0.3163, -0.2422, -0.4483]) tensor([0.4625, 0.1820, 0.1960, 0.1595]) -Greedy action tensor([ 1.1572, -0.3510, -0.0212, -0.2386]) tensor([0.5628, 0.1246, 0.1732, 0.1394]) -Greedy action tensor([ 0.7598, -0.5600, -0.0454, -0.3174]) tensor([0.4867, 0.1300, 0.2175, 0.1657]) -Greedy action tensor([ 0.5231, -0.0672, -0.1190, -0.2329]) tensor([0.3922, 0.2173, 0.2064, 0.1841]) -Greedy action tensor([ 0.7672, -0.3762, -0.0830, -0.3313]) tensor([0.4809, 0.1533, 0.2055, 0.1603]) -Greedy action tensor([ 0.7087, 0.3408, -0.4618, -0.3588]) tensor([0.4262, 0.2950, 0.1322, 0.1466]) -Greedy action tensor([ 0.8891, -0.5367, -0.0039, -0.2756]) tensor([0.5098, 0.1225, 0.2087, 0.1590]) -Greedy action tensor([ 0.6405, -0.2989, -0.0227, -0.1245]) tensor([0.4217, 0.1648, 0.2173, 0.1962]) -Greedy action tensor([ 0.6707, -0.7437, 0.1584, -0.8160]) tensor([0.4835, 0.1175, 0.2897, 0.1093]) -Greedy action tensor([ 0.3663, 0.4433, -0.3874, 0.2798]) tensor([0.2884, 0.3114, 0.1357, 0.2645]) -Greedy action tensor([ 0.9386, -0.5748, -0.1024, -0.3591]) tensor([0.5416, 0.1192, 0.1912, 0.1479]) -Greedy action tensor([ 0.9063, -1.1199, -0.0475, -0.4513]) tensor([0.5636, 0.0743, 0.2171, 0.1450]) -Greedy action tensor([ 0.7092, -0.1078, -0.1169, -0.0760]) tensor([0.4282, 0.1892, 0.1874, 0.1953]) -Greedy action tensor([ 0.8362, -0.3860, -0.0717, -0.2978]) tensor([0.4951, 0.1459, 0.1997, 0.1593]) -Greedy action tensor([ 0.3794, -0.0421, -0.2028, -0.1476]) tensor([0.3565, 0.2339, 0.1992, 0.2105]) -Greedy action tensor([0.2753, 0.4762, 0.0213, 0.0953]) tensor([0.2609, 0.3189, 0.2024, 0.2179]) -Greedy action tensor([ 0.5792, 0.0160, -0.3818, -0.2587]) tensor([0.4194, 0.2388, 0.1604, 0.1814]) -Greedy action tensor([ 0.5395, -0.3805, -0.1045, -0.3068]) tensor([0.4250, 0.1694, 0.2232, 0.1823]) -Greedy action tensor([ 0.6438, -0.2067, 0.0252, -0.2189]) tensor([0.4188, 0.1789, 0.2256, 0.1767]) -Greedy action tensor([ 0.3543, 0.0593, -0.0553, 0.0356]) tensor([0.3189, 0.2375, 0.2117, 0.2319]) -Greedy action tensor([ 1.0660, -0.2741, -0.0256, -0.4736]) tensor([0.5519, 0.1445, 0.1853, 0.1184]) -Greedy action tensor([-1.6535, -0.2959, 0.6483, 0.0125]) tensor([0.0496, 0.1927, 0.4954, 0.2623]) -Greedy action tensor([-1.7012, -0.5322, 0.5368, -0.0418]) tensor([0.0531, 0.1708, 0.4973, 0.2789]) -Greedy action tensor([-1.8861, -0.4175, 0.6335, -0.1666]) tensor([0.0428, 0.1860, 0.5321, 0.2391]) -Greedy action tensor([-1.8984, -0.4463, 0.6408, -0.1527]) tensor([0.0422, 0.1805, 0.5352, 0.2421]) -Greedy action tensor([-1.8390, -0.3084, 0.6449, -0.1066]) tensor([0.0430, 0.1986, 0.5153, 0.2431]) -Greedy action tensor([-1.9292, -0.4393, 0.6580, -0.1723]) tensor([0.0408, 0.1809, 0.5420, 0.2363]) -Greedy action tensor([-1.6760, 0.0230, 0.4744, -0.0828]) tensor([0.0501, 0.2738, 0.4299, 0.2463]) -Greedy action tensor([-1.9291, -0.4293, 0.6570, -0.1730]) tensor([0.0407, 0.1825, 0.5409, 0.2358]) -Greedy action tensor([-1.8402, -0.3927, 0.6560, -0.1082]) tensor([0.0434, 0.1846, 0.5267, 0.2453]) -Greedy action tensor([-1.8997, -0.4538, 0.6404, -0.1587]) tensor([0.0423, 0.1797, 0.5367, 0.2413]) -Greedy action tensor([-1.2039, -1.0954, 0.0412, -0.1880]) tensor([0.1198, 0.1335, 0.4160, 0.3308]) -Greedy action tensor([-1.8823, -0.3936, 0.6348, -0.1771]) tensor([0.0429, 0.1900, 0.5313, 0.2359]) -Greedy action tensor([-1.8396, -0.4187, 0.6118, -0.1220]) tensor([0.0448, 0.1856, 0.5200, 0.2496]) -Greedy action tensor([-1.7290, -0.5082, 0.5543, -0.0621]) tensor([0.0513, 0.1739, 0.5032, 0.2716]) -Greedy action tensor([-1.7135, -0.5051, 0.5674, 0.0212]) tensor([0.0505, 0.1691, 0.4942, 0.2862]) -Greedy action tensor([-1.4480, 0.0998, 0.5507, -0.4936]) tensor([0.0638, 0.2999, 0.4707, 0.1657]) -Greedy action tensor([-1.8841, -0.4411, 0.6380, -0.1505]) tensor([0.0428, 0.1813, 0.5334, 0.2424]) -Greedy action tensor([-1.8454, -0.4598, 0.6822, -0.0599]) tensor([0.0426, 0.1702, 0.5333, 0.2539]) -Greedy action tensor([-1.4489, -0.5319, 0.4421, 0.0647]) tensor([0.0682, 0.1705, 0.4516, 0.3097]) -Greedy action tensor([-1.7319, -0.2735, 0.5867, -0.0445]) tensor([0.0479, 0.2060, 0.4870, 0.2591]) -Greedy action tensor([-1.9236, -0.4350, 0.6594, -0.1679]) tensor([0.0409, 0.1812, 0.5413, 0.2367]) -Greedy action tensor([-1.7161, -0.5090, 0.5492, -0.0049]) tensor([0.0512, 0.1714, 0.4937, 0.2837]) -Greedy action tensor([-1.8876, -0.4130, 0.6810, -0.0894]) tensor([0.0409, 0.1787, 0.5335, 0.2469]) -Greedy action tensor([-0.4865, -0.5088, 0.2237, 0.2936]) tensor([0.1614, 0.1579, 0.3284, 0.3522]) -Greedy action tensor([-1.8794, -0.4628, 0.6332, -0.1440]) tensor([0.0432, 0.1783, 0.5333, 0.2452]) -Greedy action tensor([-1.8978, -0.3865, 0.6388, -0.1527]) tensor([0.0418, 0.1897, 0.5288, 0.2396]) -Greedy action tensor([-1.8601, -0.4382, 0.6255, -0.1385]) tensor([0.0440, 0.1822, 0.5279, 0.2459]) -Greedy action tensor([-1.8735, -0.4582, 0.6265, -0.1356]) tensor([0.0435, 0.1791, 0.5300, 0.2473]) -Greedy action tensor([-1.9232, -0.4043, 0.6535, -0.1683]) tensor([0.0408, 0.1864, 0.5368, 0.2360]) -Greedy action tensor([-1.4949, -0.4002, 0.4314, 0.0737]) tensor([0.0639, 0.1909, 0.4385, 0.3067]) -Greedy action tensor([-1.1076, -0.5872, 0.4392, -0.1039]) tensor([0.0989, 0.1665, 0.4646, 0.2699]) -Greedy action tensor([-1.7270, -0.3560, 0.5455, -0.0955]) tensor([0.0506, 0.1994, 0.4912, 0.2587]) -Greedy action tensor([-1.4464, -0.0282, 0.3963, -0.0577]) tensor([0.0647, 0.2672, 0.4086, 0.2595]) -Greedy action tensor([-1.8458, -0.4863, 0.6459, -0.1000]) tensor([0.0440, 0.1715, 0.5321, 0.2524]) -Greedy action tensor([-1.4286, -0.2454, 0.7682, 0.3623]) tensor([0.0519, 0.1695, 0.4672, 0.3113]) -Greedy action tensor([-1.6801, -0.1587, 0.6719, 0.0788]) tensor([0.0457, 0.2092, 0.4799, 0.2652]) -Greedy action tensor([-1.9311, -0.4548, 0.6613, -0.1740]) tensor([0.0408, 0.1784, 0.5446, 0.2362]) -Greedy action tensor([-1.7432, -0.5175, 0.5581, -0.0765]) tensor([0.0508, 0.1730, 0.5073, 0.2689]) -Greedy action tensor([-1.8909, -0.3198, 0.5859, -0.2069]) tensor([0.0433, 0.2083, 0.5152, 0.2332]) -Greedy action tensor([-1.0831, -0.6326, 1.2837, 1.4831]) tensor([0.0381, 0.0598, 0.4062, 0.4959]) -Greedy action tensor([-1.6090, -0.2685, 0.5401, 0.1761]) tensor([0.0517, 0.1974, 0.4431, 0.3079]) -Greedy action tensor([-1.6049, -0.7068, 0.6547, 0.0614]) tensor([0.0546, 0.1340, 0.5227, 0.2888]) -Greedy action tensor([-1.8192, -0.4400, 0.6909, -0.0627]) tensor([0.0433, 0.1722, 0.5334, 0.2511]) -Greedy action tensor([-1.6517, -0.4967, 0.6051, 0.1372]) tensor([0.0507, 0.1610, 0.4847, 0.3036]) -Greedy action tensor([-1.9108, -0.4541, 0.6754, -0.1518]) tensor([0.0410, 0.1761, 0.5447, 0.2382]) -Greedy action tensor([-1.8991, -0.4643, 0.6693, -0.1254]) tensor([0.0414, 0.1740, 0.5405, 0.2441]) -Greedy action tensor([-0.7331, 0.3821, 0.3931, 0.8110]) tensor([0.0846, 0.2581, 0.2610, 0.3963]) -Greedy action tensor([-1.8015, -0.3814, 0.5782, -0.1077]) tensor([0.0468, 0.1935, 0.5052, 0.2545]) -Greedy action tensor([-1.7620, -0.4248, 0.6926, 0.0689]) tensor([0.0441, 0.1678, 0.5131, 0.2750]) -Greedy action tensor([-1.6193, -0.0882, 0.5795, 0.0948]) tensor([0.0495, 0.2290, 0.4465, 0.2750]) -Greedy action tensor([-1.7557, -0.4844, 0.5716, -0.0644]) tensor([0.0494, 0.1761, 0.5064, 0.2681]) -Greedy action tensor([-1.8978, -0.4460, 0.6411, -0.1539]) tensor([0.0423, 0.1805, 0.5354, 0.2418]) -Greedy action tensor([-1.7624, -0.2489, 0.5402, -0.0665]) tensor([0.0476, 0.2164, 0.4763, 0.2597]) -Greedy action tensor([-1.9213, -0.4405, 0.6522, -0.1702]) tensor([0.0412, 0.1811, 0.5403, 0.2374]) -Greedy action tensor([-1.9443, -0.4502, 0.6670, -0.1797]) tensor([0.0401, 0.1788, 0.5466, 0.2344]) -Greedy action tensor([-1.8719, -0.4045, 0.6256, -0.1359]) tensor([0.0432, 0.1873, 0.5246, 0.2450]) -Greedy action tensor([-1.8185, -0.3417, 0.5921, -0.1007]) tensor([0.0453, 0.1982, 0.5043, 0.2522]) -Greedy action tensor([-1.9016, -0.3890, 0.6447, -0.1461]) tensor([0.0415, 0.1884, 0.5298, 0.2402]) -Greedy action tensor([-1.8170, -0.4374, 0.6041, -0.1084]) tensor([0.0460, 0.1827, 0.5176, 0.2538]) -Greedy action tensor([-1.8472, -0.4240, 0.6745, -0.0777]) tensor([0.0426, 0.1768, 0.5305, 0.2500]) -Greedy action tensor([-0.8670, -0.5131, 0.3487, -0.1305]) tensor([0.1268, 0.1807, 0.4277, 0.2648]) -Greedy action tensor([-1.9186, -0.4466, 0.6549, -0.1679]) tensor([0.0413, 0.1799, 0.5412, 0.2377]) -Greedy action tensor([-1.9351, -0.4022, 0.6532, -0.1736]) tensor([0.0404, 0.1871, 0.5375, 0.2351]) -Greedy action tensor([-1.8462, -0.4435, 0.6070, -0.1349]) tensor([0.0450, 0.1829, 0.5230, 0.2491]) -Greedy action tensor([-1.7401, -0.3065, 0.0352, -0.4790]) tensor([0.0684, 0.2868, 0.4035, 0.2413]) -Greedy action tensor([-1.8346, -0.3128, 0.6163, -0.1034]) tensor([0.0438, 0.2007, 0.5081, 0.2474]) -Greedy action tensor([-1.6742, -0.3523, 0.5253, -0.0256]) tensor([0.0527, 0.1977, 0.4755, 0.2741]) -Greedy action tensor([-1.7741, 0.0205, 0.5109, -0.0750]) tensor([0.0448, 0.2697, 0.4404, 0.2451]) -Greedy action tensor([-1.8946, -0.4767, 0.7518, -0.0265]) tensor([0.0389, 0.1606, 0.5486, 0.2519]) -Greedy action tensor([-1.4532, -0.5739, 0.4019, 0.1053]) tensor([0.0687, 0.1655, 0.4392, 0.3265]) -Greedy action tensor([-1.3043, -0.4572, 0.3696, 0.0608]) tensor([0.0795, 0.1854, 0.4238, 0.3112]) -Greedy action tensor([-1.0193, 0.3513, 0.2199, 0.0391]) tensor([0.0887, 0.3493, 0.3063, 0.2557]) -Greedy action tensor([-1.7030, -0.4514, 0.9467, 0.5315]) tensor([0.0357, 0.1249, 0.5056, 0.3338]) -Greedy action tensor([-1.9444, -0.4509, 0.6660, -0.1804]) tensor([0.0402, 0.1789, 0.5465, 0.2344]) -Greedy action tensor([-1.8542, -0.3847, 0.6158, -0.1568]) tensor([0.0442, 0.1921, 0.5224, 0.2413]) -Greedy action tensor([-1.8540, -0.1836, 0.5863, -0.1272]) tensor([0.0427, 0.2270, 0.4902, 0.2401]) -Greedy action tensor([-1.7567, -0.3986, 0.5675, -0.0480]) tensor([0.0485, 0.1885, 0.4954, 0.2677]) -Greedy action tensor([-1.9083, -0.4419, 0.6503, -0.1635]) tensor([0.0417, 0.1807, 0.5388, 0.2388]) -Greedy action tensor([-1.9106, -0.3435, 0.6255, -0.1576]) tensor([0.0413, 0.1981, 0.5220, 0.2386]) -Greedy action tensor([-1.7505, -0.4614, 0.7054, 0.1182]) tensor([0.0439, 0.1594, 0.5120, 0.2846]) -Greedy action tensor([-0.2197, 0.4876, 0.5686, 1.5960]) tensor([0.0879, 0.1784, 0.1934, 0.5403]) -Greedy action tensor([ 1.7421, -0.7181, -0.2295, 0.0831]) tensor([0.7067, 0.0604, 0.0984, 0.1345]) -Greedy action tensor([ 0.9505, -0.3596, -0.6636, 0.2402]) tensor([0.5101, 0.1376, 0.1015, 0.2507]) -Greedy action tensor([ 1.5172, -0.4079, -0.4163, 0.4632]) tensor([0.6101, 0.0890, 0.0882, 0.2127]) -Greedy action tensor([ 1.8312, -0.6090, -0.3158, 0.3518]) tensor([0.6984, 0.0609, 0.0816, 0.1591]) -Greedy action tensor([ 0.7789, -0.1548, -0.0142, -0.1450]) tensor([0.4459, 0.1753, 0.2018, 0.1770]) -Greedy action tensor([ 0.8175, -0.2952, -0.1952, 0.4666]) tensor([0.4174, 0.1372, 0.1516, 0.2939]) -Greedy action tensor([ 1.2368, -0.0469, -0.3780, 0.1078]) tensor([0.5558, 0.1540, 0.1106, 0.1797]) -Greedy action tensor([ 1.5499, 0.2110, -1.2631, -0.2259]) tensor([0.6705, 0.1758, 0.0402, 0.1135]) -Greedy action tensor([ 1.1820, -0.1186, -0.8361, 0.1756]) tensor([0.5647, 0.1538, 0.0751, 0.2064]) -Greedy action tensor([1.1476, 0.1237, 0.2119, 0.0893]) tensor([0.4765, 0.1712, 0.1869, 0.1654]) -Greedy action tensor([ 1.6080, 0.0448, -0.2841, 0.6052]) tensor([0.5790, 0.1213, 0.0873, 0.2124]) -Greedy action tensor([ 1.5079, -0.5750, 0.0084, 0.4856]) tensor([0.5856, 0.0730, 0.1307, 0.2107]) -Greedy action tensor([ 0.6769, 0.0626, 0.1159, -0.2111]) tensor([0.3963, 0.2144, 0.2262, 0.1631]) -Greedy action tensor([ 0.3383, -0.2233, -0.1506, -0.0004]) tensor([0.3453, 0.1969, 0.2118, 0.2461]) -Greedy action tensor([ 0.9710, -0.3906, -0.2820, 0.2822]) tensor([0.4892, 0.1254, 0.1397, 0.2457]) -Greedy action tensor([ 1.3790, -0.6193, -0.5838, 0.4581]) tensor([0.5973, 0.0810, 0.0839, 0.2378]) -Greedy action tensor([ 1.2176, -0.2523, -0.1727, 0.2972]) tensor([0.5327, 0.1225, 0.1326, 0.2122]) -Greedy action tensor([ 2.1182, -0.9513, -0.1875, 0.7494]) tensor([0.7140, 0.0332, 0.0712, 0.1817]) -Greedy action tensor([ 0.8133, -0.6193, -0.4437, 0.2649]) tensor([0.4760, 0.1136, 0.1354, 0.2750]) -Greedy action tensor([ 1.8695, -0.3190, -0.3581, 0.5223]) tensor([0.6757, 0.0757, 0.0728, 0.1757]) -Greedy action tensor([ 0.9842, -0.1071, 0.1711, 0.2636]) tensor([0.4414, 0.1482, 0.1957, 0.2147]) -Greedy action tensor([ 1.9509, -0.0623, -0.8430, 0.4437]) tensor([0.7061, 0.0943, 0.0432, 0.1564]) -Greedy action tensor([ 2.0951, -0.3667, -0.6577, 0.7796]) tensor([0.7055, 0.0602, 0.0450, 0.1893]) -Greedy action tensor([ 1.7964, -0.6974, -0.5943, 0.8793]) tensor([0.6354, 0.0525, 0.0582, 0.2540]) -Greedy action tensor([ 2.7822, -1.1561, -0.3449, 0.6017]) tensor([0.8501, 0.0166, 0.0373, 0.0960]) -Greedy action tensor([ 1.4847, -0.6971, -0.2715, 0.2467]) tensor([0.6347, 0.0716, 0.1096, 0.1841]) -Greedy action tensor([ 1.8499, -0.2375, -0.4778, 0.3121]) tensor([0.6962, 0.0863, 0.0679, 0.1496]) -Greedy action tensor([ 1.4655, -0.2934, -0.4878, 0.2287]) tensor([0.6233, 0.1074, 0.0884, 0.1810]) -Greedy action tensor([ 1.6637, -0.7921, -0.4925, 0.7028]) tensor([0.6313, 0.0542, 0.0731, 0.2415]) -Greedy action tensor([ 1.8837, -0.8519, -0.1718, 0.4873]) tensor([0.6943, 0.0450, 0.0889, 0.1718]) -Greedy action tensor([ 1.3770, -0.4674, -0.4317, 0.2106]) tensor([0.6122, 0.0968, 0.1003, 0.1907]) -Greedy action tensor([ 0.8783, -0.5403, -0.2984, 0.6614]) tensor([0.4246, 0.1028, 0.1309, 0.3418]) -Greedy action tensor([ 1.1682, -0.0809, 0.0100, -0.4188]) tensor([0.5539, 0.1588, 0.1740, 0.1133]) -Greedy action tensor([ 1.2908, -0.0600, -0.3632, 0.1847]) tensor([0.5614, 0.1454, 0.1074, 0.1858]) -Greedy action tensor([ 1.5306, -0.5683, -0.4831, 0.5687]) tensor([0.6104, 0.0748, 0.0815, 0.2333]) -Greedy action tensor([ 1.6537, -0.6339, -0.4236, 0.2521]) tensor([0.6789, 0.0689, 0.0850, 0.1671]) -Greedy action tensor([ 1.0490, -0.3289, -0.5506, 0.2598]) tensor([0.5240, 0.1321, 0.1058, 0.2380]) -Greedy action tensor([ 1.5831, -0.4869, -0.7781, 0.8292]) tensor([0.5914, 0.0746, 0.0558, 0.2782]) -Greedy action tensor([ 1.1589, -0.0613, -0.2966, 0.4479]) tensor([0.4952, 0.1462, 0.1155, 0.2432]) -Greedy action tensor([ 0.9923, -0.7710, 0.1129, 0.0207]) tensor([0.5089, 0.0873, 0.2112, 0.1926]) -Greedy action tensor([ 1.5851, -0.4030, -0.3795, 0.2996]) tensor([0.6436, 0.0882, 0.0902, 0.1780]) -Greedy action tensor([ 1.2777, -0.1214, -0.8918, 0.2859]) tensor([0.5774, 0.1425, 0.0660, 0.2142]) -Greedy action tensor([ 0.9347, -0.5032, -0.2530, 0.5619]) tensor([0.4482, 0.1064, 0.1367, 0.3087]) -Greedy action tensor([ 1.3915, -0.6898, -0.1836, 0.0678]) tensor([0.6258, 0.0781, 0.1295, 0.1666]) -Greedy action tensor([ 2.3527, 0.0119, -0.3119, 0.4677]) tensor([0.7589, 0.0730, 0.0528, 0.1152]) -Greedy action tensor([ 1.3440, -0.3248, -0.3621, 0.3863]) tensor([0.5702, 0.1075, 0.1035, 0.2188]) -Greedy action tensor([ 1.9752, -0.9762, -0.4273, 0.5056]) tensor([0.7284, 0.0381, 0.0659, 0.1676]) -Greedy action tensor([ 1.6613, -0.7480, -0.4531, 0.6821]) tensor([0.6304, 0.0567, 0.0761, 0.2368]) -Greedy action tensor([ 2.4653, -0.8895, -0.4987, 0.9806]) tensor([0.7616, 0.0266, 0.0393, 0.1725]) -Greedy action tensor([ 1.9178, -0.3334, 0.1671, 0.4989]) tensor([0.6575, 0.0692, 0.1142, 0.1591]) -Greedy action tensor([ 1.5644, -0.6453, -0.2997, 0.2556]) tensor([0.6515, 0.0715, 0.1010, 0.1760]) -Greedy action tensor([ 1.6452, -0.2059, -1.2026, 0.1960]) tensor([0.6898, 0.1083, 0.0400, 0.1619]) -Greedy action tensor([ 1.5368, -0.2828, -0.6129, 0.5201]) tensor([0.6096, 0.0988, 0.0710, 0.2206]) -Greedy action tensor([ 1.8977, -0.6011, -0.2393, 0.1272]) tensor([0.7297, 0.0600, 0.0861, 0.1242]) -Greedy action tensor([ 1.0154, -0.7548, -0.4239, 0.4791]) tensor([0.5019, 0.0855, 0.1190, 0.2936]) -Greedy action tensor([ 0.9758, 0.1225, -0.7181, 0.1690]) tensor([0.4863, 0.2072, 0.0894, 0.2171]) -Greedy action tensor([ 1.6830, -0.4992, -0.3467, 0.6277]) tensor([0.6280, 0.0708, 0.0825, 0.2186]) -Greedy action tensor([ 2.0657, -0.5345, -0.5565, 0.8570]) tensor([0.6918, 0.0514, 0.0503, 0.2066]) -Greedy action tensor([ 1.2061, -0.0551, -1.0403, 0.1885]) tensor([0.5712, 0.1618, 0.0604, 0.2065]) -Greedy action tensor([ 2.0506, -0.1712, -0.5599, 0.5158]) tensor([0.7156, 0.0776, 0.0526, 0.1542]) -Greedy action tensor([ 1.9526, -0.6562, -0.3995, 0.7136]) tensor([0.6857, 0.0505, 0.0653, 0.1986]) -Greedy action tensor([ 1.6607, 0.0421, -0.1451, -0.0957]) tensor([0.6514, 0.1291, 0.1071, 0.1125]) -Greedy action tensor([ 2.0767, -0.6827, -0.4773, 0.4016]) tensor([0.7528, 0.0477, 0.0585, 0.1410]) -Greedy action tensor([ 0.7313, -0.1062, -0.0744, 0.0350]) tensor([0.4205, 0.1820, 0.1879, 0.2096]) -Greedy action tensor([ 2.2111, -1.0706, -0.4838, 0.0676]) tensor([0.8181, 0.0307, 0.0553, 0.0959]) -Greedy action tensor([ 1.3043, 0.1017, -0.5126, 0.1083]) tensor([0.5665, 0.1702, 0.0921, 0.1713]) -Greedy action tensor([ 0.8705, -0.2010, -0.0261, 0.2719]) tensor([0.4348, 0.1489, 0.1774, 0.2389]) -Greedy action tensor([ 1.5234, -0.8125, -0.0546, 0.3129]) tensor([0.6245, 0.0604, 0.1289, 0.1861]) -Greedy action tensor([ 0.9738, -0.0608, -0.4947, -0.0583]) tensor([0.5150, 0.1830, 0.1186, 0.1835]) -Greedy action tensor([ 1.2592, -0.2622, -0.1306, 0.1638]) tensor([0.5550, 0.1212, 0.1382, 0.1856]) -Greedy action tensor([ 1.5003, -0.5728, -0.3876, -0.1143]) tensor([0.6774, 0.0852, 0.1026, 0.1348]) -Greedy action tensor([ 1.4406, -0.5588, -0.9077, 0.4482]) tensor([0.6244, 0.0845, 0.0596, 0.2314]) -Greedy action tensor([ 2.6437, -1.1399, -0.1771, 0.7040]) tensor([0.8156, 0.0185, 0.0486, 0.1173]) -Greedy action tensor([ 1.1887, -0.2949, -0.7325, 0.0512]) tensor([0.5904, 0.1339, 0.0864, 0.1893]) -Greedy action tensor([ 1.5213, -0.7855, -0.2328, 0.3202]) tensor([0.6355, 0.0633, 0.1100, 0.1912]) -Greedy action tensor([ 0.8388, -0.0747, -0.1731, 0.2732]) tensor([0.4287, 0.1720, 0.1558, 0.2435]) -Greedy action tensor([ 1.6995, -1.1928, -0.0031, 0.1503]) tensor([0.6896, 0.0382, 0.1256, 0.1465]) -Greedy action tensor([ 1.1505, -0.4808, -0.1798, 0.1273]) tensor([0.5496, 0.1075, 0.1453, 0.1975]) -Greedy action tensor([ 1.8821, -0.6515, -0.3924, 0.8347]) tensor([0.6523, 0.0518, 0.0671, 0.2289]) -Greedy action tensor([ 1.3971, -0.1352, -0.7037, 0.3494]) tensor([0.5920, 0.1279, 0.0724, 0.2076]) -Greedy action tensor([ 1.1688, -0.2761, -0.5457, -0.0277]) tensor([0.5820, 0.1372, 0.1048, 0.1759]) -Greedy action tensor([ 0.1327, 0.0024, 0.0093, -0.0813]) tensor([0.2802, 0.2460, 0.2477, 0.2262]) -Greedy action tensor([ 0.8561, -0.5983, 0.0614, -0.3567]) tensor([0.5044, 0.1178, 0.2278, 0.1500]) -Greedy action tensor([ 0.9315, -0.8279, -0.1467, -0.5809]) tensor([0.5771, 0.0994, 0.1963, 0.1272]) -Greedy action tensor([ 0.8426, -0.4785, -0.0552, -0.2064]) tensor([0.4939, 0.1318, 0.2013, 0.1730]) -Greedy action tensor([ 0.7602, -0.4740, -0.1709, -0.3881]) tensor([0.4994, 0.1454, 0.1968, 0.1584]) -Greedy action tensor([ 0.4830, -0.2832, -0.0642, -0.1884]) tensor([0.3915, 0.1819, 0.2265, 0.2001]) -Greedy action tensor([ 0.5314, -0.6162, 0.1197, -0.5390]) tensor([0.4305, 0.1366, 0.2852, 0.1476]) -Greedy action tensor([ 0.2893, -0.0374, 0.0263, -0.2030]) tensor([0.3224, 0.2326, 0.2479, 0.1971]) -Greedy action tensor([ 1.0659, -0.3998, 0.0635, -0.4174]) tensor([0.5480, 0.1265, 0.2011, 0.1243]) -Greedy action tensor([ 0.8250, -0.1631, -0.0271, -0.2548]) tensor([0.4676, 0.1741, 0.1994, 0.1588]) -Greedy action tensor([ 0.4071, -0.1720, -0.0830, -0.1976]) tensor([0.3677, 0.2061, 0.2253, 0.2009]) -Greedy action tensor([ 0.5418, -0.5723, -0.1160, -0.1939]) tensor([0.4300, 0.1411, 0.2228, 0.2061]) -Greedy action tensor([ 1.1748, -0.8284, 0.0664, -0.7912]) tensor([0.6231, 0.0841, 0.2057, 0.0872]) -Greedy action tensor([ 0.4261, -0.1970, -0.1078, -0.1831]) tensor([0.3750, 0.2011, 0.2199, 0.2039]) -Greedy action tensor([ 0.7738, -0.8433, -0.1012, -0.5136]) tensor([0.5287, 0.1049, 0.2204, 0.1459]) -Greedy action tensor([ 0.5729, -0.1392, 0.0944, -0.3907]) tensor([0.4013, 0.1969, 0.2487, 0.1531]) -Greedy action tensor([ 0.5747, -0.6839, -0.1540, -0.1804]) tensor([0.4471, 0.1270, 0.2157, 0.2101]) -Greedy action tensor([ 0.4792, -0.0528, -0.2293, -0.1227]) tensor([0.3806, 0.2236, 0.1874, 0.2085]) -Greedy action tensor([ 0.9627, -0.9697, 0.0435, -0.6764]) tensor([0.5754, 0.0833, 0.2295, 0.1117]) -Greedy action tensor([ 0.6672, -0.2431, -0.0760, -0.1755]) tensor([0.4332, 0.1743, 0.2060, 0.1865]) -Greedy action tensor([ 0.5217, -0.1764, -0.0404, -0.2639]) tensor([0.3963, 0.1972, 0.2259, 0.1806]) -Greedy action tensor([ 0.6639, -0.7751, -0.0291, -0.3246]) tensor([0.4741, 0.1124, 0.2371, 0.1764]) -Greedy action tensor([ 0.7915, -0.9369, 0.0938, -0.4007]) tensor([0.5053, 0.0897, 0.2515, 0.1534]) -Greedy action tensor([ 0.0498, 0.1299, -0.0696, -0.2107]) tensor([0.2673, 0.2896, 0.2372, 0.2060]) -Greedy action tensor([ 0.6962, -0.4204, 0.0425, -0.2222]) tensor([0.4451, 0.1457, 0.2315, 0.1777]) -Greedy action tensor([ 0.7431, -0.5648, -0.0203, -0.3658]) tensor([0.4839, 0.1309, 0.2256, 0.1597]) -Greedy action tensor([ 0.7730, -0.2033, -0.0507, -0.1842]) tensor([0.4546, 0.1713, 0.1995, 0.1746]) -Greedy action tensor([ 0.5837, -0.4357, -0.0421, -0.4161]) tensor([0.4418, 0.1594, 0.2363, 0.1626]) -Greedy action tensor([ 0.3859, 0.0405, -0.0456, -0.0246]) tensor([0.3310, 0.2343, 0.2150, 0.2196]) -Greedy action tensor([ 0.4797, -0.0635, -0.0599, -0.0612]) tensor([0.3642, 0.2115, 0.2123, 0.2120]) -Greedy action tensor([ 0.3621, -0.2279, 0.0930, -0.2926]) tensor([0.3524, 0.1953, 0.2692, 0.1831]) -Greedy action tensor([ 0.7690, -0.4747, -0.1349, -0.2131]) tensor([0.4836, 0.1394, 0.1959, 0.1811]) -Greedy action tensor([ 0.7235, -0.3835, -0.1512, -0.3079]) tensor([0.4753, 0.1571, 0.1982, 0.1694]) -Greedy action tensor([ 0.8540, -0.3703, -0.1420, -0.1265]) tensor([0.4906, 0.1442, 0.1812, 0.1840]) -Greedy action tensor([ 0.6843, -0.1235, -0.4453, -0.6773]) tensor([0.4938, 0.2201, 0.1596, 0.1265]) -Greedy action tensor([ 1.0544, -1.1403, 0.0932, -0.6611]) tensor([0.5975, 0.0666, 0.2285, 0.1075]) -Greedy action tensor([ 0.6656, -0.7630, -0.1296, -0.4269]) tensor([0.4935, 0.1183, 0.2228, 0.1655]) -Greedy action tensor([ 1.1226, -1.1253, 0.0103, -0.6519]) tensor([0.6234, 0.0658, 0.2050, 0.1057]) -Greedy action tensor([ 1.1083, -0.6226, -0.0307, -0.4921]) tensor([0.5886, 0.1042, 0.1884, 0.1188]) -Greedy action tensor([ 0.5384, -0.1578, -0.0178, -0.6034]) tensor([0.4182, 0.2085, 0.2398, 0.1335]) -Greedy action tensor([ 0.7260, -0.3086, -0.0318, -0.0838]) tensor([0.4407, 0.1566, 0.2066, 0.1961]) -Greedy action tensor([ 0.5084, 0.0906, -0.1042, -0.2026]) tensor([0.3715, 0.2446, 0.2013, 0.1825]) -Greedy action tensor([ 0.8069, -0.4835, -0.1965, -0.3423]) tensor([0.5105, 0.1405, 0.1872, 0.1618]) -Greedy action tensor([ 0.3661, -0.0979, -0.0846, -0.2245]) tensor([0.3546, 0.2230, 0.2260, 0.1965]) -Greedy action tensor([ 0.8706, -0.4848, -0.0127, -0.5155]) tensor([0.5205, 0.1342, 0.2152, 0.1301]) -Greedy action tensor([ 0.4230, -0.0482, 0.0352, -0.1848]) tensor([0.3512, 0.2192, 0.2383, 0.1912]) -Greedy action tensor([ 0.7134, -0.5826, -0.1845, -0.5680]) tensor([0.5105, 0.1397, 0.2080, 0.1418]) -Greedy action tensor([ 0.5934, -0.2307, -0.0181, -0.1385]) tensor([0.4061, 0.1781, 0.2204, 0.1954]) -Greedy action tensor([ 1.0535, -1.0429, -0.1270, -0.5436]) tensor([0.6126, 0.0753, 0.1881, 0.1240]) -Greedy action tensor([ 0.4489, -0.1901, 0.0910, -0.3638]) tensor([0.3744, 0.1976, 0.2618, 0.1661]) -Greedy action tensor([ 0.9003, -0.1454, 0.1421, -0.6022]) tensor([0.4896, 0.1721, 0.2294, 0.1090]) -Greedy action tensor([ 0.8871, -0.1788, -0.3588, -0.4629]) tensor([0.5287, 0.1821, 0.1521, 0.1371]) -Greedy action tensor([ 0.4058, -0.1326, 0.0166, -0.2327]) tensor([0.3585, 0.2092, 0.2429, 0.1893]) -Greedy action tensor([ 0.7030, 0.0883, -0.1501, -0.2945]) tensor([0.4281, 0.2315, 0.1824, 0.1579]) -Greedy action tensor([ 0.2337, -0.1371, -0.1145, -0.2831]) tensor([0.3342, 0.2306, 0.2359, 0.1993]) -Greedy action tensor([ 0.9886, -0.7309, 0.1385, -0.2071]) tensor([0.5238, 0.0938, 0.2239, 0.1585]) -Greedy action tensor([ 0.6688, -0.0585, -0.4149, -0.5598]) tensor([0.4730, 0.2286, 0.1600, 0.1384]) -Greedy action tensor([ 0.8149, -0.5975, 0.0366, -0.4376]) tensor([0.5029, 0.1225, 0.2309, 0.1437]) -Greedy action tensor([ 0.4942, -0.1538, 0.0295, -0.2928]) tensor([0.3836, 0.2007, 0.2410, 0.1746]) -Greedy action tensor([ 0.9460, -0.5040, -0.0729, -0.4133]) tensor([0.5398, 0.1266, 0.1949, 0.1387]) -Greedy action tensor([ 0.6917, -0.4939, -0.1603, 0.0081]) tensor([0.4470, 0.1366, 0.1907, 0.2257]) -Greedy action tensor([ 0.9181, -0.0675, -0.0477, -0.1296]) tensor([0.4751, 0.1773, 0.1809, 0.1667]) -Greedy action tensor([ 0.9332, -0.6563, 0.1491, -0.5644]) tensor([0.5307, 0.1083, 0.2423, 0.1187]) -Greedy action tensor([ 1.0018, -0.2263, 0.0444, -0.4337]) tensor([0.5223, 0.1529, 0.2005, 0.1243]) -Greedy action tensor([ 0.7323, -0.3749, -0.0257, -0.1132]) tensor([0.4487, 0.1483, 0.2103, 0.1927]) -Greedy action tensor([ 0.4825, -0.2247, -0.2006, 0.0864]) tensor([0.3744, 0.1846, 0.1891, 0.2519]) -Greedy action tensor([ 0.5762, -0.1524, 0.0234, -0.0616]) tensor([0.3866, 0.1866, 0.2224, 0.2043]) -Greedy action tensor([ 1.0828, -0.7028, -0.0600, -0.5843]) tensor([0.5969, 0.1001, 0.1904, 0.1127]) -Greedy action tensor([ 0.7372, -0.6467, 0.0153, -0.3017]) tensor([0.4784, 0.1199, 0.2324, 0.1693]) -Greedy action tensor([ 0.8098, -0.4064, -0.2935, -0.2922]) tensor([0.5101, 0.1512, 0.1692, 0.1695]) -Greedy action tensor([ 0.2314, 0.2604, -0.1459, 0.0244]) tensor([0.2834, 0.2918, 0.1944, 0.2304]) -Greedy action tensor([ 0.8034, -0.5975, -0.0166, -0.2861]) tensor([0.4943, 0.1218, 0.2177, 0.1663]) -Greedy action tensor([ 0.7149, -0.5927, -0.0417, -0.5102]) tensor([0.4918, 0.1330, 0.2308, 0.1444]) -Greedy action tensor([ 0.7060, -0.3711, -0.0871, -0.2896]) tensor([0.4624, 0.1575, 0.2092, 0.1709]) -Greedy action tensor([ 0.5260, -0.2062, 0.0074, -0.1538]) tensor([0.3872, 0.1862, 0.2305, 0.1962]) -Greedy action tensor([ 0.7876, -0.2806, 0.0805, -0.4373]) tensor([0.4694, 0.1613, 0.2314, 0.1379]) -Greedy action tensor([ 0.8681, -0.4133, -0.1368, -0.4576]) tensor([0.5238, 0.1454, 0.1917, 0.1391]) -Greedy action tensor([ 0.4013, -0.0365, -0.0866, 0.0115]) tensor([0.3405, 0.2198, 0.2091, 0.2306]) -Greedy action tensor([ 0.5098, -0.6526, 0.2672, -0.4803]) tensor([0.4051, 0.1267, 0.3178, 0.1505]) -Greedy action tensor([ 0.7384, -0.4849, -0.1658, -0.4749]) tensor([0.5009, 0.1474, 0.2028, 0.1489]) -Greedy action tensor([ 0.5041, 0.1148, -0.1892, -0.1067]) tensor([0.3676, 0.2491, 0.1838, 0.1996]) -Greedy action tensor([ 0.6975, -0.4395, 2.4095, 0.2597]) tensor([0.1332, 0.0427, 0.7381, 0.0860]) -Greedy action tensor([-0.2582, -0.1668, 0.0705, 0.0933]) tensor([0.2038, 0.2233, 0.2832, 0.2897]) -Greedy action tensor([1.0368, 0.2809, 0.0232, 0.2022]) tensor([0.4412, 0.2072, 0.1601, 0.1915]) -Greedy action tensor([ 0.7165, 0.4453, -0.0912, 1.0275]) tensor([0.2799, 0.2134, 0.1248, 0.3819]) -Greedy action tensor([ 0.2096, 0.0366, 0.0466, -0.1188]) tensor([0.2932, 0.2466, 0.2491, 0.2111]) -Greedy action tensor([-0.4826, -0.1819, -0.4652, -0.0367]) tensor([0.2028, 0.2740, 0.2064, 0.3168]) -Greedy action tensor([-0.8759, 0.8795, 0.0725, -0.5951]) tensor([0.0935, 0.5411, 0.2415, 0.1239]) -Greedy action tensor([-0.1062, -1.1499, 0.2017, 1.0571]) tensor([0.1691, 0.0596, 0.2301, 0.5412]) -Greedy action tensor([-0.7168, -0.5950, -0.9254, -0.0409]) tensor([0.2038, 0.2302, 0.1654, 0.4006]) -Greedy action tensor([ 0.7796, -1.1275, 0.4457, -0.0204]) tensor([0.4322, 0.0642, 0.3095, 0.1942]) -Greedy action tensor([-1.2553, -1.1054, -0.8921, 0.3359]) tensor([0.1175, 0.1365, 0.1690, 0.5770]) -Greedy action tensor([-0.5591, -0.2208, -1.0755, 0.1037]) tensor([0.2024, 0.2839, 0.1208, 0.3928]) -Greedy action tensor([ 0.9228, -0.0089, 0.4730, 1.4182]) tensor([0.2723, 0.1072, 0.1737, 0.4468]) -Greedy action tensor([ 0.5045, -0.1857, 0.9517, -0.8036]) tensor([0.2998, 0.1503, 0.4689, 0.0810]) -Greedy action tensor([ 0.7250, -1.4532, 2.0521, -0.5126]) tensor([0.1933, 0.0219, 0.7287, 0.0561]) -Greedy action tensor([-0.3783, -0.1501, -0.1433, 0.0730]) tensor([0.1964, 0.2467, 0.2484, 0.3084]) -Greedy action tensor([1.7000, 0.0587, 0.0782, 0.3190]) tensor([0.6088, 0.1179, 0.1203, 0.1530]) -Greedy action tensor([ 1.7201, -0.7840, -0.3453, 0.8954]) tensor([0.6072, 0.0496, 0.0770, 0.2662]) -Greedy action tensor([-0.0097, 0.7296, 0.7843, -0.0545]) tensor([0.1597, 0.3344, 0.3532, 0.1527]) -Greedy action tensor([ 1.4818, -0.9330, -0.1010, 1.2311]) tensor([0.4824, 0.0431, 0.0991, 0.3754]) -Greedy action tensor([ 0.1164, -1.2092, -0.1620, 0.5885]) tensor([0.2758, 0.0733, 0.2088, 0.4422]) -Greedy action tensor([0.9143, 0.0617, 0.6133, 0.0779]) tensor([0.3847, 0.1640, 0.2847, 0.1667]) -Greedy action tensor([ 0.1993, -0.7847, 0.7161, 0.1568]) tensor([0.2495, 0.0932, 0.4182, 0.2391]) -Greedy action tensor([-0.1416, -0.7936, 1.3331, 0.5138]) tensor([0.1279, 0.0667, 0.5590, 0.2464]) -Greedy action tensor([-0.2766, -0.4592, -0.5658, -0.6757]) tensor([0.3074, 0.2561, 0.2302, 0.2063]) -Greedy action tensor([ 0.1379, 1.0013, -0.2307, 0.0904]) tensor([0.1993, 0.4727, 0.1379, 0.1901]) -Greedy action tensor([ 0.1333, -0.6088, 0.3264, -0.1911]) tensor([0.2931, 0.1395, 0.3555, 0.2119]) -Greedy action tensor([ 0.2832, -1.4846, -0.1161, 1.4251]) tensor([0.2010, 0.0343, 0.1348, 0.6298]) -Greedy action tensor([ 0.9544, -1.3161, -0.9177, 0.7299]) tensor([0.4864, 0.0502, 0.0748, 0.3886]) -Greedy action tensor([-1.6440, -1.2260, 0.2054, -0.5599]) tensor([0.0845, 0.1284, 0.5372, 0.2499]) -Greedy action tensor([-0.7263, -1.5360, -0.5089, 0.5398]) tensor([0.1604, 0.0714, 0.1993, 0.5689]) -Greedy action tensor([0.7967, 0.2280, 0.2413, 0.6247]) tensor([0.3353, 0.1899, 0.1924, 0.2824]) -Greedy action tensor([-0.9846, -1.2864, 0.2314, -1.2870]) tensor([0.1709, 0.1264, 0.5765, 0.1263]) -Greedy action tensor([1.4082, 0.0800, 1.9185, 1.0919]) tensor([0.2733, 0.0724, 0.4552, 0.1991]) -Greedy action tensor([-1.2795, -0.2589, 1.6553, -1.8403]) tensor([0.0432, 0.1198, 0.8124, 0.0246]) -Greedy action tensor([ 0.0792, -0.0242, -0.2248, 0.6171]) tensor([0.2298, 0.2072, 0.1695, 0.3935]) -Greedy action tensor([-0.6091, 0.0655, -0.5985, -0.0487]) tensor([0.1747, 0.3429, 0.1765, 0.3059]) -Greedy action tensor([-0.8433, -0.6185, -0.4175, 1.0503]) tensor([0.0959, 0.1201, 0.1468, 0.6372]) -Greedy action tensor([-0.2981, -1.2709, 1.7073, -0.1658]) tensor([0.1005, 0.0380, 0.7467, 0.1147]) -Greedy action tensor([-0.5803, -2.1233, 0.2298, -0.3532]) tensor([0.2120, 0.0453, 0.4766, 0.2661]) -Greedy action tensor([-0.9006, -0.8678, -1.2255, 0.2984]) tensor([0.1647, 0.1702, 0.1190, 0.5462]) -Greedy action tensor([ 0.6606, -0.3848, -0.0243, -0.0129]) tensor([0.4227, 0.1486, 0.2131, 0.2156]) -Greedy action tensor([ 0.7638, -0.0213, 0.2132, 1.0873]) tensor([0.2929, 0.1336, 0.1689, 0.4047]) -Greedy action tensor([-0.7396, 0.8708, -0.0271, -0.8285]) tensor([0.1116, 0.5586, 0.2276, 0.1021]) -Greedy action tensor([ 2.0166, -1.7416, 1.3243, 1.3345]) tensor([0.4928, 0.0115, 0.2466, 0.2491]) -Greedy action tensor([-1.0230, -1.0807, -0.8837, 0.3692]) tensor([0.1405, 0.1326, 0.1615, 0.5654]) -Greedy action tensor([ 0.9885, -1.8397, -0.1896, 0.0146]) tensor([0.5732, 0.0339, 0.1765, 0.2164]) -Greedy action tensor([-0.1939, -0.3010, -0.1102, -0.5456]) tensor([0.2711, 0.2435, 0.2947, 0.1907]) -Greedy action tensor([ 0.1672, -0.8361, -0.4832, 0.9148]) tensor([0.2500, 0.0917, 0.1304, 0.5279]) -Greedy action tensor([ 0.6396, -0.2977, -0.6023, 0.6404]) tensor([0.3729, 0.1461, 0.1077, 0.3733]) -Greedy action tensor([-0.2060, -0.5910, -0.3007, -0.0229]) tensor([0.2638, 0.1795, 0.2399, 0.3168]) -Greedy action tensor([ 0.9132, -0.6760, 0.4080, 1.8524]) tensor([0.2291, 0.0468, 0.1382, 0.5860]) -Greedy action tensor([-0.4805, -1.0149, 0.1426, -0.8964]) tensor([0.2433, 0.1426, 0.4537, 0.1605]) -Greedy action tensor([ 0.4156, -1.7338, 0.5719, 0.9980]) tensor([0.2453, 0.0286, 0.2868, 0.4392]) -Greedy action tensor([-0.1874, -0.4744, 0.0999, 0.7544]) tensor([0.1770, 0.1329, 0.2360, 0.4541]) -Greedy action tensor([-0.4843, -0.6644, 0.1440, -0.2416]) tensor([0.2006, 0.1676, 0.3761, 0.2557]) -Greedy action tensor([ 0.2126, 0.2676, 0.4271, -0.2874]) tensor([0.2563, 0.2707, 0.3176, 0.1554]) -Greedy action tensor([-0.1557, -1.9142, -0.0359, 1.1343]) tensor([0.1686, 0.0290, 0.1900, 0.6124]) -Greedy action tensor([ 0.0767, -0.7589, 0.3058, 0.8638]) tensor([0.2046, 0.0887, 0.2572, 0.4495]) -Greedy action tensor([-1.5316, -0.4061, 0.6673, -0.4773]) tensor([0.0626, 0.1930, 0.5646, 0.1797]) -Greedy action tensor([ 0.1114, 0.1074, 0.9926, -0.9458]) tensor([0.2102, 0.2094, 0.5074, 0.0730]) -Greedy action tensor([ 1.5893, -0.5813, 0.9293, 0.2589]) tensor([0.5276, 0.0602, 0.2727, 0.1395]) -Greedy action tensor([0.2680, 0.0965, 0.2018, 0.4650]) tensor([0.2502, 0.2108, 0.2342, 0.3047]) -Greedy action tensor([-1.2519, -0.3706, 0.0907, 0.4781]) tensor([0.0776, 0.1874, 0.2972, 0.4378]) -Greedy action tensor([ 0.6614, 0.8726, 0.7524, -0.6760]) tensor([0.2783, 0.3438, 0.3048, 0.0731]) -Greedy action tensor([-0.0690, 0.5037, 1.5426, -0.6777]) tensor([0.1201, 0.2129, 0.6017, 0.0653]) -Greedy action tensor([-0.7680, 0.5765, 1.8341, -0.9831]) tensor([0.0523, 0.2005, 0.7051, 0.0421]) -Greedy action tensor([-1.0167, -0.4951, 0.6355, -0.8944]) tensor([0.1107, 0.1865, 0.5777, 0.1251]) -Greedy action tensor([ 0.3938, -1.2505, -0.0958, 2.3304]) tensor([0.1144, 0.0221, 0.0701, 0.7934]) -Greedy action tensor([-0.9467, 0.2749, -0.8983, 0.2309]) tensor([0.1151, 0.3905, 0.1208, 0.3737]) -Greedy action tensor([ 0.3668, -0.0715, -0.4213, -0.9898]) tensor([0.4242, 0.2737, 0.1929, 0.1092]) -Greedy action tensor([0.5751, 0.0913, 0.1086, 0.1044]) tensor([0.3487, 0.2149, 0.2187, 0.2178]) -Greedy action tensor([-1.0386, 0.1540, 0.3547, -0.9148]) tensor([0.1058, 0.3486, 0.4260, 0.1197]) -Greedy action tensor([-1.9188, -0.3410, 0.8485, -0.5470]) tensor([0.0389, 0.1885, 0.6192, 0.1534]) -Greedy action tensor([-0.7097, -0.0270, 0.8114, -1.9781]) tensor([0.1276, 0.2525, 0.5840, 0.0359]) -Greedy action tensor([ 0.5781, -0.9344, 2.0670, -0.6040]) tensor([0.1678, 0.0370, 0.7438, 0.0515]) -Greedy action tensor([-1.0337, -0.9508, -0.1773, 0.5667]) tensor([0.1064, 0.1156, 0.2506, 0.5273]) -Greedy action tensor([-0.0550, -0.0077, -0.9614, -0.8317]) tensor([0.3434, 0.3600, 0.1387, 0.1579]) -Greedy action tensor([ 1.1396, -0.1584, -0.1329, 0.4466]) tensor([0.4870, 0.1330, 0.1364, 0.2435]) -Greedy action tensor([-0.2222, -0.8542, -0.6089, -0.1480]) tensor([0.3041, 0.1617, 0.2066, 0.3276]) -Greedy action tensor([ 0.9975, 0.0094, 1.6035, -0.8955]) tensor([0.2980, 0.1109, 0.5462, 0.0449]) -Greedy action tensor([ 1.0591, -1.0940, -0.3332, 0.7718]) tensor([0.4729, 0.0549, 0.1175, 0.3547]) -Greedy action tensor([-1.4208, -0.0705, 0.5893, 0.1340]) tensor([0.0586, 0.2262, 0.4376, 0.2776]) -Greedy action tensor([-1.8871, -0.4636, 0.6466, -0.1485]) tensor([0.0427, 0.1771, 0.5375, 0.2427]) -Greedy action tensor([-1.8810, -0.4776, 0.6712, -0.1135]) tensor([0.0421, 0.1713, 0.5402, 0.2465]) -Greedy action tensor([-1.3837, -0.0716, 0.4036, -0.1568]) tensor([0.0709, 0.2634, 0.4237, 0.2419]) -Greedy action tensor([-1.6690, -0.5061, 0.5118, -0.0047]) tensor([0.0545, 0.1745, 0.4829, 0.2881]) -Greedy action tensor([-1.4369, 0.2114, 0.2791, 0.1563]) tensor([0.0600, 0.3116, 0.3335, 0.2949]) -Greedy action tensor([-1.9181, -0.4462, 0.6505, -0.1651]) tensor([0.0414, 0.1802, 0.5397, 0.2387]) -Greedy action tensor([-1.9167, -0.4017, 0.6505, -0.1643]) tensor([0.0411, 0.1869, 0.5352, 0.2369]) -Greedy action tensor([-1.3611, -0.6032, 0.4003, -0.0053]) tensor([0.0779, 0.1663, 0.4535, 0.3023]) -Greedy action tensor([-1.8340, -0.2546, 0.5910, -0.1098]) tensor([0.0439, 0.2132, 0.4965, 0.2464]) -Greedy action tensor([-1.8750, -0.4591, 0.6375, -0.1122]) tensor([0.0429, 0.1769, 0.5298, 0.2503]) -Greedy action tensor([-1.0831, -1.0521, 1.0568, 1.2647]) tensor([0.0476, 0.0491, 0.4048, 0.4984]) -Greedy action tensor([-0.5646, -0.4486, 0.2231, -0.0100]) tensor([0.1649, 0.1852, 0.3626, 0.2872]) -Greedy action tensor([-1.8099, -0.3410, 0.5848, -0.1049]) tensor([0.0458, 0.1992, 0.5027, 0.2522]) -Greedy action tensor([-1.8522, -0.0874, 0.5542, -0.1073]) tensor([0.0423, 0.2469, 0.4689, 0.2420]) -Greedy action tensor([-1.9455, -0.4489, 0.6671, -0.1812]) tensor([0.0401, 0.1791, 0.5467, 0.2341]) -Greedy action tensor([-1.8197, -0.1291, 0.5775, -0.0733]) tensor([0.0432, 0.2343, 0.4749, 0.2477]) -Greedy action tensor([-1.4359, 0.4729, 0.3019, 0.1657]) tensor([0.0544, 0.3667, 0.3091, 0.2697]) -Greedy action tensor([-1.7031, -0.2656, 0.6533, -0.0128]) tensor([0.0472, 0.1987, 0.4982, 0.2559]) -Greedy action tensor([-1.9422, -0.4513, 0.6683, -0.1780]) tensor([0.0402, 0.1785, 0.5468, 0.2346]) -Greedy action tensor([-0.8023, -0.3308, 0.3725, -0.3412]) tensor([0.1347, 0.2158, 0.4360, 0.2136]) -Greedy action tensor([-1.1523, -0.3064, 0.4042, -0.2082]) tensor([0.0940, 0.2189, 0.4456, 0.2415]) -Greedy action tensor([-1.8379, -0.3040, 0.6008, -0.1073]) tensor([0.0440, 0.2039, 0.5039, 0.2482]) -Greedy action tensor([-1.6241, -0.5169, 0.5110, -0.0547]) tensor([0.0578, 0.1750, 0.4892, 0.2779]) -Greedy action tensor([-1.8873, -0.4698, 0.6376, -0.1581]) tensor([0.0430, 0.1775, 0.5371, 0.2424]) -Greedy action tensor([-1.0312, -0.5870, 0.3549, -0.1135]) tensor([0.1104, 0.1721, 0.4413, 0.2763]) -Greedy action tensor([-1.7629, -0.3032, 0.5569, -0.0651]) tensor([0.0478, 0.2056, 0.4859, 0.2608]) -Greedy action tensor([-1.9171, -0.4423, 0.6505, -0.1674]) tensor([0.0414, 0.1809, 0.5396, 0.2381]) -Greedy action tensor([-0.6528, 0.3612, 0.3197, 0.8141]) tensor([0.0931, 0.2567, 0.2463, 0.4038]) -Greedy action tensor([-1.9219, -0.4465, 0.6551, -0.1693]) tensor([0.0412, 0.1800, 0.5415, 0.2374]) -Greedy action tensor([-1.9223, -0.4284, 0.6557, -0.1620]) tensor([0.0409, 0.1823, 0.5389, 0.2379]) -Greedy action tensor([-1.2913, 0.6939, 0.1964, 0.1914]) tensor([0.0584, 0.4255, 0.2587, 0.2574]) -Greedy action tensor([-1.8939, -0.3368, 0.6288, -0.1360]) tensor([0.0417, 0.1976, 0.5191, 0.2416]) -Greedy action tensor([-1.8589, -0.4227, 0.6182, -0.1396]) tensor([0.0441, 0.1853, 0.5247, 0.2459]) -Greedy action tensor([-1.7035, -0.2701, 0.5323, -0.0344]) tensor([0.0504, 0.2112, 0.4711, 0.2673]) -Greedy action tensor([-1.1431, -0.8306, 1.2483, 0.2649]) tensor([0.0575, 0.0786, 0.6287, 0.2351]) -Greedy action tensor([-1.0861, -0.0735, 0.4644, 0.4505]) tensor([0.0762, 0.2099, 0.3594, 0.3544]) -Greedy action tensor([-1.8820, -0.4569, 0.6452, -0.1311]) tensor([0.0427, 0.1774, 0.5341, 0.2458]) -Greedy action tensor([-1.8468, -0.2402, 0.5941, -0.1084]) tensor([0.0432, 0.2153, 0.4959, 0.2456]) -Greedy action tensor([-1.8826, -0.3222, 0.6221, -0.1370]) tensor([0.0421, 0.2006, 0.5158, 0.2414]) -Greedy action tensor([-1.6280, -0.3525, 0.4878, 0.0420]) tensor([0.0550, 0.1968, 0.4561, 0.2921]) -Greedy action tensor([-1.9316, -0.4448, 0.6681, -0.1708]) tensor([0.0405, 0.1791, 0.5449, 0.2355]) -Greedy action tensor([-1.9217, -0.4055, 0.6512, -0.1645]) tensor([0.0409, 0.1863, 0.5358, 0.2370]) -Greedy action tensor([-1.9230, -0.4372, 0.6554, -0.1687]) tensor([0.0410, 0.1813, 0.5406, 0.2371]) -Greedy action tensor([-1.8963, -0.3390, 0.6306, -0.1419]) tensor([0.0416, 0.1974, 0.5205, 0.2404]) -Greedy action tensor([-1.4229, -0.4745, 1.2545, 1.0956]) tensor([0.0327, 0.0845, 0.4763, 0.4064]) -Greedy action tensor([-1.6568, -0.2779, 0.7776, 0.3515]) tensor([0.0420, 0.1666, 0.4788, 0.3127]) -Greedy action tensor([-1.9075, -0.4605, 0.6538, -0.1671]) tensor([0.0418, 0.1778, 0.5419, 0.2384]) -Greedy action tensor([-1.7571, -0.3563, 0.6050, -0.0430]) tensor([0.0471, 0.1912, 0.5001, 0.2616]) -Greedy action tensor([-1.8212, -0.3331, 0.6215, -0.1026]) tensor([0.0444, 0.1968, 0.5111, 0.2477]) -Greedy action tensor([-1.8987, -0.4142, 0.6449, -0.1504]) tensor([0.0419, 0.1848, 0.5328, 0.2406]) -Greedy action tensor([-1.9250, -0.4449, 0.6649, -0.1603]) tensor([0.0407, 0.1789, 0.5427, 0.2378]) -Greedy action tensor([-1.7584, -0.4561, 0.6343, -0.0129]) tensor([0.0468, 0.1723, 0.5126, 0.2683]) -Greedy action tensor([-1.8076, -0.3619, 0.5835, -0.1109]) tensor([0.0462, 0.1963, 0.5052, 0.2523]) -Greedy action tensor([-1.9239, -0.4833, 0.7910, -0.0767]) tensor([0.0375, 0.1584, 0.5663, 0.2378]) -Greedy action tensor([-1.8814, -0.4340, 0.6342, -0.1493]) tensor([0.0430, 0.1827, 0.5315, 0.2428]) -Greedy action tensor([-0.7817, -0.5587, 0.5738, 0.8547]) tensor([0.0888, 0.1109, 0.3443, 0.4560]) -Greedy action tensor([-1.4939, -0.2089, 0.4890, 0.1568]) tensor([0.0585, 0.2115, 0.4251, 0.3049]) -Greedy action tensor([-1.9428, -0.4532, 0.6685, -0.1783]) tensor([0.0402, 0.1782, 0.5471, 0.2346]) -Greedy action tensor([-1.8473, -0.4677, 0.6069, -0.1502]) tensor([0.0453, 0.1800, 0.5273, 0.2473]) -Greedy action tensor([-1.2825, -0.9939, 0.6170, -0.2591]) tensor([0.0847, 0.1131, 0.5663, 0.2358]) -Greedy action tensor([-1.7947, -0.4693, 0.5931, -0.1320]) tensor([0.0478, 0.1798, 0.5204, 0.2520]) -Greedy action tensor([-1.9234, -0.4227, 0.6567, -0.1680]) tensor([0.0409, 0.1833, 0.5394, 0.2365]) -Greedy action tensor([-1.7624, -0.5138, 0.5752, -0.0772]) tensor([0.0494, 0.1722, 0.5118, 0.2665]) -Greedy action tensor([-1.8889, -0.3582, 0.6281, -0.1475]) tensor([0.0422, 0.1949, 0.5225, 0.2405]) -Greedy action tensor([-1.8898, -0.3486, 0.6308, -0.1498]) tensor([0.0420, 0.1962, 0.5224, 0.2394]) -Greedy action tensor([-1.6938, -0.5296, 0.5389, -0.0323]) tensor([0.0532, 0.1704, 0.4961, 0.2802]) -Greedy action tensor([-1.1227, -0.4757, 0.8250, 0.9881]) tensor([0.0550, 0.1051, 0.3858, 0.4541]) -Greedy action tensor([-1.8359, -0.3838, 0.6128, -0.0897]) tensor([0.0443, 0.1892, 0.5126, 0.2539]) -Greedy action tensor([-1.5000, -0.0830, 0.4889, 0.1945]) tensor([0.0559, 0.2307, 0.4088, 0.3045]) -Greedy action tensor([ 0.3242, -0.4874, 0.9724, 1.7017]) tensor([0.1366, 0.0607, 0.2612, 0.5416]) -Greedy action tensor([-1.8135, -0.1568, 0.5639, -0.0888]) tensor([0.0442, 0.2316, 0.4762, 0.2479]) -Greedy action tensor([-1.3186, -0.0880, 0.4464, -0.2094]) tensor([0.0752, 0.2574, 0.4393, 0.2280]) -Greedy action tensor([-1.4160, -0.5467, 0.3974, 0.3035]) tensor([0.0662, 0.1580, 0.4061, 0.3697]) -Greedy action tensor([-1.7513, -0.4503, 0.5804, -0.0723]) tensor([0.0492, 0.1807, 0.5064, 0.2637]) -Greedy action tensor([-1.8493, -0.3416, 0.6075, -0.1226]) tensor([0.0438, 0.1980, 0.5116, 0.2465]) -Greedy action tensor([-1.8377, -0.4203, 0.6313, -0.1342]) tensor([0.0446, 0.1840, 0.5266, 0.2449]) -Greedy action tensor([-0.5087, 0.0235, 0.1173, -0.0411]) tensor([0.1621, 0.2760, 0.3031, 0.2587]) -Greedy action tensor([-1.9097, -0.3415, 0.6276, -0.1477]) tensor([0.0412, 0.1977, 0.5211, 0.2400]) -Greedy action tensor([-1.9416, -0.4609, 0.6692, -0.1796]) tensor([0.0403, 0.1770, 0.5481, 0.2346]) -Greedy action tensor([-0.7788, 0.0225, 0.2729, -0.1227]) tensor([0.1247, 0.2779, 0.3570, 0.2404]) -Greedy action tensor([ 1.2072, -0.9087, -0.0210, 0.5711]) tensor([0.5147, 0.0620, 0.1507, 0.2725]) -Greedy action tensor([ 1.3827, -0.3181, -0.5523, 0.2817]) tensor([0.6026, 0.1100, 0.0870, 0.2004]) -Greedy action tensor([ 2.1368, -0.7810, -0.4254, 0.5691]) tensor([0.7464, 0.0403, 0.0576, 0.1556]) -Greedy action tensor([ 1.7536, -0.5319, -0.3007, 0.2333]) tensor([0.6903, 0.0702, 0.0885, 0.1509]) -Greedy action tensor([ 1.2066, -0.8185, 0.1282, -0.1578]) tensor([0.5788, 0.0764, 0.1969, 0.1479]) -Greedy action tensor([ 0.6756, -0.3651, 0.2523, 0.2952]) tensor([0.3715, 0.1312, 0.2433, 0.2540]) -Greedy action tensor([ 1.6050, -0.5460, -0.3953, 0.3153]) tensor([0.6549, 0.0762, 0.0886, 0.1803]) -Greedy action tensor([ 1.5092, -0.4500, -0.9267, 0.4914]) tensor([0.6290, 0.0887, 0.0550, 0.2273]) -Greedy action tensor([ 2.1467, -0.6437, -0.6461, 0.6305]) tensor([0.7450, 0.0457, 0.0456, 0.1636]) -Greedy action tensor([ 1.5335, -0.6727, -0.0026, 0.2770]) tensor([0.6211, 0.0684, 0.1337, 0.1768]) -Greedy action tensor([ 0.9945, -0.6625, -0.5806, 0.7470]) tensor([0.4590, 0.0875, 0.0950, 0.3584]) -Greedy action tensor([ 1.8678, -0.6478, -0.5416, 0.5112]) tensor([0.7002, 0.0566, 0.0629, 0.1803]) -Greedy action tensor([ 1.6329, -0.8164, -0.1523, 0.1605]) tensor([0.6741, 0.0582, 0.1131, 0.1546]) -Greedy action tensor([ 1.2781, -0.4286, -0.2550, 0.4331]) tensor([0.5474, 0.0993, 0.1182, 0.2351]) -Greedy action tensor([ 1.6568, -0.9796, -0.2319, 0.2950]) tensor([0.6761, 0.0484, 0.1023, 0.1732]) -Greedy action tensor([ 1.0950, -0.3203, -0.8061, 0.5752]) tensor([0.5033, 0.1222, 0.0752, 0.2993]) -Greedy action tensor([ 1.5433, -0.8240, -0.2979, 0.4830]) tensor([0.6255, 0.0586, 0.0992, 0.2166]) -Greedy action tensor([ 1.4156, -0.2806, -0.2946, -0.0977]) tensor([0.6311, 0.1157, 0.1141, 0.1390]) -Greedy action tensor([ 1.2631, -0.3715, -0.0787, -0.1598]) tensor([0.5891, 0.1149, 0.1540, 0.1420]) -Greedy action tensor([ 1.0016, -0.6806, -0.0118, 0.3747]) tensor([0.4801, 0.0893, 0.1742, 0.2564]) -Greedy action tensor([ 0.4408, -0.3157, -0.1135, 0.3015]) tensor([0.3432, 0.1611, 0.1972, 0.2986]) -Greedy action tensor([ 1.0278, -0.2653, -0.2740, -0.0035]) tensor([0.5255, 0.1442, 0.1429, 0.1874]) -Greedy action tensor([ 0.8360, -0.2855, -0.2008, 0.0656]) tensor([0.4666, 0.1520, 0.1654, 0.2159]) -Greedy action tensor([ 0.8509, -0.7737, -0.1259, 0.5240]) tensor([0.4358, 0.0858, 0.1641, 0.3143]) -Greedy action tensor([ 1.8544, -0.3838, -0.3409, 0.4533]) tensor([0.6829, 0.0728, 0.0760, 0.1682]) -Greedy action tensor([ 0.4660, -0.4743, 0.0876, -0.0807]) tensor([0.3768, 0.1471, 0.2580, 0.2181]) -Greedy action tensor([ 0.7118, -0.4065, 0.2091, -0.1311]) tensor([0.4233, 0.1384, 0.2561, 0.1822]) -Greedy action tensor([ 1.4772, -0.1304, -0.2771, 0.2926]) tensor([0.5955, 0.1193, 0.1030, 0.1821]) -Greedy action tensor([ 1.5433, -0.3144, 0.1533, -0.4861]) tensor([0.6508, 0.1015, 0.1621, 0.0855]) -Greedy action tensor([ 1.5853, -0.3758, -0.3547, -0.0208]) tensor([0.6734, 0.0947, 0.0968, 0.1351]) -Greedy action tensor([ 1.6079, -0.3599, -0.5759, 0.2560]) tensor([0.6618, 0.0925, 0.0745, 0.1712]) -Greedy action tensor([ 1.4114, -0.5885, -0.2163, 0.4594]) tensor([0.5822, 0.0788, 0.1143, 0.2247]) -Greedy action tensor([ 1.3357, -0.5887, -0.2225, 0.1231]) tensor([0.6046, 0.0883, 0.1273, 0.1798]) -Greedy action tensor([ 1.8900, -0.5123, 0.8099, 0.1323]) tensor([0.6240, 0.0565, 0.2119, 0.1076]) -Greedy action tensor([ 2.0159, -1.0234, -0.2807, 0.3913]) tensor([0.7432, 0.0356, 0.0748, 0.1464]) -Greedy action tensor([ 1.4619, -0.2801, -0.2627, 0.4047]) tensor([0.5879, 0.1030, 0.1048, 0.2043]) -Greedy action tensor([ 1.9131, -0.6051, -0.2825, 0.4037]) tensor([0.7077, 0.0570, 0.0788, 0.1564]) -Greedy action tensor([ 1.4287, -0.0189, -0.2298, 0.1286]) tensor([0.5889, 0.1385, 0.1121, 0.1605]) -Greedy action tensor([ 1.7476, -0.2739, -0.5195, 0.1417]) tensor([0.6960, 0.0922, 0.0721, 0.1397]) -Greedy action tensor([ 1.7973, -0.6053, -0.1963, 0.8671]) tensor([0.6168, 0.0558, 0.0840, 0.2433]) -Greedy action tensor([ 1.3389, 0.0136, -0.4243, 0.3561]) tensor([0.5520, 0.1467, 0.0947, 0.2066]) -Greedy action tensor([ 1.4679, -0.8304, -0.1919, 0.0482]) tensor([0.6526, 0.0655, 0.1241, 0.1578]) -Greedy action tensor([ 1.5991, -0.5144, -0.3301, 0.2220]) tensor([0.6586, 0.0796, 0.0957, 0.1662]) -Greedy action tensor([ 1.3935, -0.1707, -0.4797, 0.6640]) tensor([0.5420, 0.1134, 0.0833, 0.2613]) -Greedy action tensor([ 1.7440, -0.7636, -0.4378, 0.3024]) tensor([0.6989, 0.0569, 0.0789, 0.1653]) -Greedy action tensor([ 1.9379, -0.7559, -0.6568, 0.6903]) tensor([0.6996, 0.0473, 0.0522, 0.2009]) -Greedy action tensor([ 0.6607, 0.1457, -0.4235, 0.0910]) tensor([0.3998, 0.2389, 0.1352, 0.2262]) -Greedy action tensor([ 1.5925, -0.3990, -0.5312, 0.5557]) tensor([0.6209, 0.0847, 0.0742, 0.2201]) -Greedy action tensor([ 2.2044, 0.1036, -0.0601, 0.1904]) tensor([0.7354, 0.0900, 0.0764, 0.0982]) -Greedy action tensor([ 1.7713, -0.6549, -0.4700, 0.5729]) tensor([0.6683, 0.0591, 0.0711, 0.2016]) -Greedy action tensor([ 1.8536, -1.1181, -0.0883, 0.7693]) tensor([0.6524, 0.0334, 0.0936, 0.2206]) -Greedy action tensor([ 0.6804, -0.2652, -0.6344, 0.6698]) tensor([0.3779, 0.1468, 0.1015, 0.3739]) -Greedy action tensor([ 1.5940, 0.0048, -0.8828, -0.0452]) tensor([0.6746, 0.1377, 0.0567, 0.1310]) -Greedy action tensor([ 2.0195, -0.6819, -0.3223, 0.3333]) tensor([0.7416, 0.0498, 0.0713, 0.1374]) -Greedy action tensor([ 0.2990, -0.2894, -0.6184, -0.0733]) tensor([0.3782, 0.2100, 0.1511, 0.2606]) -Greedy action tensor([ 1.7199, -0.6505, -0.0436, 0.3767]) tensor([0.6553, 0.0612, 0.1124, 0.1711]) -Greedy action tensor([1.2504, 0.0246, 0.0303, 0.4271]) tensor([0.4932, 0.1448, 0.1456, 0.2165]) -Greedy action tensor([ 1.9025, -0.5806, -0.6473, 0.2891]) tensor([0.7349, 0.0613, 0.0574, 0.1464]) -Greedy action tensor([ 1.4981, -0.1688, -0.4832, 0.4848]) tensor([0.5918, 0.1118, 0.0816, 0.2148]) -Greedy action tensor([ 1.1684, -0.4079, -0.3149, 0.0885]) tensor([0.5639, 0.1166, 0.1280, 0.1915]) -Greedy action tensor([ 1.6144, -0.4311, -0.7270, 0.3067]) tensor([0.6685, 0.0864, 0.0643, 0.1808]) -Greedy action tensor([ 1.7631, -0.6713, -0.4751, 0.3425]) tensor([0.6964, 0.0610, 0.0743, 0.1682]) -Greedy action tensor([ 1.5877, -0.4901, -0.8294, 0.5974]) tensor([0.6306, 0.0790, 0.0562, 0.2342]) -Greedy action tensor([ 1.6289, -0.2817, -0.5557, 0.7238]) tensor([0.6006, 0.0889, 0.0676, 0.2429]) -Greedy action tensor([ 1.4277, -1.0577, -0.3012, 0.2578]) tensor([0.6365, 0.0530, 0.1130, 0.1975]) -Greedy action tensor([ 2.0824, -0.1678, -0.2110, 0.7135]) tensor([0.6846, 0.0721, 0.0691, 0.1742]) -Greedy action tensor([ 1.0356, -0.3022, -0.3430, 0.4032]) tensor([0.4888, 0.1283, 0.1232, 0.2597]) -Greedy action tensor([ 0.9358, -0.2635, 0.1007, 0.0661]) tensor([0.4642, 0.1399, 0.2014, 0.1945]) -Greedy action tensor([ 1.3327, -0.5989, -0.5872, 0.4344]) tensor([0.5886, 0.0853, 0.0863, 0.2397]) -Greedy action tensor([ 0.7102, -0.0506, -0.1111, 0.5296]) tensor([0.3647, 0.1704, 0.1604, 0.3044]) -Greedy action tensor([ 1.2325, -0.5052, -0.3732, 0.1158]) tensor([0.5869, 0.1032, 0.1178, 0.1921]) -Greedy action tensor([ 1.2773, -0.5034, -0.0239, 0.1124]) tensor([0.5706, 0.0962, 0.1553, 0.1780]) -Greedy action tensor([ 0.8449, -0.4087, -0.0042, 0.0548]) tensor([0.4615, 0.1317, 0.1974, 0.2094]) -Greedy action tensor([ 1.0022, -0.0508, -0.7250, 0.2744]) tensor([0.4976, 0.1736, 0.0885, 0.2403]) -Greedy action tensor([ 1.7750, -0.7570, -0.1693, 0.3545]) tensor([0.6830, 0.0543, 0.0977, 0.1650]) -Greedy action tensor([ 1.7512, -1.0978, -0.1539, 0.7929]) tensor([0.6288, 0.0364, 0.0936, 0.2412]) -Greedy action tensor([ 1.4463, -0.4548, -0.4494, 0.2966]) tensor([0.6187, 0.0924, 0.0929, 0.1960]) -Greedy action tensor([ 1.5566, -0.1406, -0.3865, 0.4062]) tensor([0.6086, 0.1115, 0.0872, 0.1926]) -Greedy action tensor([ 0.3998, -0.6171, 0.0359, 0.1609]) tensor([0.3516, 0.1272, 0.2443, 0.2769]) -Greedy action tensor([ 1.2108, -0.2892, -0.8777, 0.3921]) tensor([0.5593, 0.1248, 0.0693, 0.2466]) -Greedy action tensor([ 1.6236, -0.3960, -0.2281, 0.6225]) tensor([0.6035, 0.0801, 0.0947, 0.2217]) -Greedy action tensor([ 0.3595, 0.1581, -0.0803, -0.1198]) tensor([0.3246, 0.2654, 0.2091, 0.2010]) -Greedy action tensor([ 0.8917, -0.8806, -0.0370, -0.4789]) tensor([0.5498, 0.0934, 0.2172, 0.1396]) -Greedy action tensor([ 0.8875, -0.7464, 0.1190, -0.4499]) tensor([0.5205, 0.1016, 0.2413, 0.1366]) -Greedy action tensor([ 0.9261, -0.4197, -0.2808, -0.3571]) tensor([0.5445, 0.1417, 0.1629, 0.1509]) -Greedy action tensor([ 0.8518, -0.6324, -0.0789, -0.4429]) tensor([0.5277, 0.1196, 0.2081, 0.1446]) -Greedy action tensor([ 0.6040, -0.1620, -0.0184, -0.0425]) tensor([0.3960, 0.1841, 0.2125, 0.2074]) -Greedy action tensor([ 0.5809, -0.1550, 0.0255, -0.3020]) tensor([0.4054, 0.1942, 0.2327, 0.1677]) -Greedy action tensor([ 0.9958, -0.5057, -0.1377, -0.2656]) tensor([0.5470, 0.1219, 0.1761, 0.1550]) -Greedy action tensor([ 0.9855, -0.7535, 0.1053, -0.4248]) tensor([0.5451, 0.0958, 0.2261, 0.1330]) -Greedy action tensor([ 0.6114, -0.4487, -0.0400, -0.1563]) tensor([0.4289, 0.1486, 0.2236, 0.1990]) -Greedy action tensor([ 1.1140, -0.8238, 0.0086, -0.6304]) tensor([0.6061, 0.0873, 0.2007, 0.1059]) -Greedy action tensor([ 0.8744, -0.2864, 0.0144, -0.4887]) tensor([0.5020, 0.1572, 0.2124, 0.1284]) -Greedy action tensor([ 0.5132, -0.4427, -0.0980, -0.1310]) tensor([0.4078, 0.1568, 0.2213, 0.2141]) -Greedy action tensor([ 0.6207, -0.3520, 0.0150, -0.2133]) tensor([0.4241, 0.1603, 0.2314, 0.1842]) -Greedy action tensor([ 0.7872, -0.5746, 0.0276, -0.4692]) tensor([0.4978, 0.1275, 0.2329, 0.1417]) -Greedy action tensor([ 1.1281, -1.1151, 0.0780, -0.6909]) tensor([0.6180, 0.0656, 0.2162, 0.1002]) -Greedy action tensor([ 0.6828, -0.5434, -0.1530, -0.1090]) tensor([0.4587, 0.1346, 0.1989, 0.2078]) -Greedy action tensor([ 0.9209, -0.3810, -0.2093, -0.1016]) tensor([0.5116, 0.1392, 0.1652, 0.1840]) -Greedy action tensor([ 0.7147, -0.4599, -0.1370, -0.1212]) tensor([0.4610, 0.1424, 0.1967, 0.1998]) -Greedy action tensor([ 0.1370, -0.1301, -0.1094, -0.1258]) tensor([0.3016, 0.2309, 0.2357, 0.2319]) -Greedy action tensor([ 0.9646, -0.4464, -0.1249, -0.3001]) tensor([0.5369, 0.1309, 0.1806, 0.1516]) -Greedy action tensor([ 0.9350, -0.6963, -0.0065, -0.3704]) tensor([0.5386, 0.1054, 0.2101, 0.1460]) -Greedy action tensor([ 1.0023, -0.5272, -0.1106, -0.3100]) tensor([0.5511, 0.1194, 0.1811, 0.1484]) -Greedy action tensor([ 0.4109, -0.2995, -0.0231, -0.2517]) tensor([0.3767, 0.1851, 0.2440, 0.1942]) -Greedy action tensor([ 0.8102, -0.7725, 0.0138, -0.3936]) tensor([0.5111, 0.1050, 0.2305, 0.1534]) -Greedy action tensor([ 0.7726, -0.4999, -0.2451, -0.3082]) tensor([0.5048, 0.1414, 0.1825, 0.1713]) -Greedy action tensor([ 0.6007, -0.5061, -0.1119, -0.3431]) tensor([0.4525, 0.1496, 0.2219, 0.1761]) -Greedy action tensor([ 0.4828, -0.2880, -0.0793, -0.1627]) tensor([0.3911, 0.1809, 0.2229, 0.2051]) -Greedy action tensor([ 0.7927, -0.1963, -0.0874, -0.0133]) tensor([0.4478, 0.1665, 0.1857, 0.2000]) -Greedy action tensor([ 0.4502, 0.0047, -0.0112, -0.2215]) tensor([0.3595, 0.2303, 0.2266, 0.1836]) -Greedy action tensor([ 0.4142, -0.0969, 0.1570, -0.0266]) tensor([0.3315, 0.1988, 0.2563, 0.2133]) -Greedy action tensor([ 0.6096, 0.0164, -0.0954, -0.1931]) tensor([0.4008, 0.2215, 0.1981, 0.1796]) -Greedy action tensor([ 0.5762, 0.0266, 0.0195, -0.0611]) tensor([0.3733, 0.2154, 0.2139, 0.1974]) -Greedy action tensor([ 0.9425, -0.7268, -0.0262, -0.5224]) tensor([0.5559, 0.1047, 0.2110, 0.1285]) -Greedy action tensor([ 0.8244, -0.6924, -0.0520, -0.2198]) tensor([0.5031, 0.1104, 0.2094, 0.1771]) -Greedy action tensor([ 0.8029, -0.3598, -0.1093, -0.5986]) tensor([0.5101, 0.1595, 0.2049, 0.1256]) -Greedy action tensor([ 0.5965, -0.3860, -0.0121, -0.2223]) tensor([0.4238, 0.1587, 0.2306, 0.1869]) -Greedy action tensor([ 0.8541, -0.5176, -0.0310, -0.4529]) tensor([0.5163, 0.1310, 0.2130, 0.1397]) -Greedy action tensor([ 0.5634, -0.4443, -0.0549, -0.2583]) tensor([0.4267, 0.1558, 0.2299, 0.1876]) -Greedy action tensor([ 0.5824, 0.0744, -0.1866, 0.2238]) tensor([0.3618, 0.2177, 0.1677, 0.2528]) -Greedy action tensor([ 0.7265, -0.1050, -0.0191, -0.0676]) tensor([0.4234, 0.1844, 0.2009, 0.1914]) -Greedy action tensor([ 0.7439, -0.4402, -0.0888, -0.2144]) tensor([0.4707, 0.1440, 0.2047, 0.1805]) -Greedy action tensor([ 0.3101, -0.0370, -0.0982, -0.3297]) tensor([0.3450, 0.2438, 0.2293, 0.1819]) -Greedy action tensor([ 0.3904, -0.5041, -0.2042, -0.1622]) tensor([0.3943, 0.1612, 0.2176, 0.2269]) -Greedy action tensor([ 0.6016, 0.1435, 0.2261, -0.4236]) tensor([0.3734, 0.2362, 0.2565, 0.1339]) -Greedy action tensor([ 0.2862, 0.3509, -0.2674, -0.0109]) tensor([0.2954, 0.3152, 0.1698, 0.2195]) -Greedy action tensor([ 0.4930, 0.0292, 0.0294, -0.2089]) tensor([0.3632, 0.2284, 0.2284, 0.1800]) -Greedy action tensor([ 1.0113, -0.4809, 0.0524, -0.5197]) tensor([0.5481, 0.1233, 0.2101, 0.1186]) -Greedy action tensor([ 0.6967, -0.1966, 0.1616, -0.4283]) tensor([0.4311, 0.1765, 0.2525, 0.1400]) -Greedy action tensor([ 0.7747, -0.8334, -0.1406, -0.2765]) tensor([0.5128, 0.1027, 0.2053, 0.1792]) -Greedy action tensor([ 0.4391, -0.0900, -0.0422, -0.2488]) tensor([0.3690, 0.2174, 0.2281, 0.1855]) -Greedy action tensor([ 0.3967, 0.3319, -0.0535, -0.3603]) tensor([0.3285, 0.3079, 0.2094, 0.1541]) -Greedy action tensor([ 0.8582, -0.2978, -0.1443, -0.0800]) tensor([0.4824, 0.1518, 0.1770, 0.1888]) -Greedy action tensor([ 0.4767, -0.2995, -0.0757, -0.4056]) tensor([0.4082, 0.1879, 0.2350, 0.1689]) -Greedy action tensor([ 0.6288, -0.3079, -0.1277, -0.3659]) tensor([0.4482, 0.1757, 0.2104, 0.1658]) -Greedy action tensor([ 0.8094, -0.4322, -0.0262, -0.4144]) tensor([0.4959, 0.1433, 0.2150, 0.1458]) -Greedy action tensor([ 0.4458, -0.4896, -0.0634, -0.3121]) tensor([0.4062, 0.1594, 0.2441, 0.1904]) -Greedy action tensor([ 0.8900, -0.4969, -0.1358, -0.2991]) tensor([0.5228, 0.1306, 0.1874, 0.1592]) -Greedy action tensor([ 0.1878, 0.2246, -0.1765, -0.5111]) tensor([0.3097, 0.3213, 0.2151, 0.1539]) -Greedy action tensor([ 0.3628, -0.2981, 0.0234, -0.4671]) tensor([0.3753, 0.1938, 0.2673, 0.1637]) -Greedy action tensor([ 0.5935, -0.5268, -0.1331, -0.1738]) tensor([0.4398, 0.1434, 0.2126, 0.2042]) -Greedy action tensor([ 0.8677, -0.4505, -0.0612, -0.4540]) tensor([0.5183, 0.1387, 0.2047, 0.1382]) -Greedy action tensor([ 0.9623, -0.7696, 0.1186, -0.6354]) tensor([0.5527, 0.0978, 0.2377, 0.1118]) -Greedy action tensor([ 0.6060, -0.4012, -0.1095, -0.4521]) tensor([0.4543, 0.1659, 0.2221, 0.1577]) -Greedy action tensor([ 0.5662, -0.2673, -0.0508, -0.1525]) tensor([0.4063, 0.1765, 0.2192, 0.1980]) -Greedy action tensor([ 0.5829, -0.3989, -0.1839, -0.0880]) tensor([0.4255, 0.1594, 0.1976, 0.2175]) -Greedy action tensor([ 0.6270, -0.4770, 0.2513, -0.3999]) tensor([0.4208, 0.1395, 0.2890, 0.1507]) -Greedy action tensor([ 0.7737, -0.5908, -0.0027, -0.3460]) tensor([0.4897, 0.1251, 0.2253, 0.1598]) -Greedy action tensor([ 0.6195, 0.0878, 0.1189, -0.0543]) tensor([0.3699, 0.2173, 0.2242, 0.1886]) -Greedy action tensor([ 0.3825, -0.3448, -0.0439, -0.0854]) tensor([0.3620, 0.1749, 0.2363, 0.2267]) -Greedy action tensor([ 0.7954, -0.1041, -0.1246, -0.1306]) tensor([0.4542, 0.1848, 0.1810, 0.1799]) -Greedy action tensor([ 0.6156, -0.0998, -0.0932, -0.3125]) tensor([0.4208, 0.2058, 0.2071, 0.1663]) -Greedy action tensor([ 0.7128, -0.4528, -0.0998, -0.4661]) tensor([0.4847, 0.1511, 0.2151, 0.1491]) -Greedy action tensor([ 1.0018, -0.6529, -0.1068, -0.3533]) tensor([0.5621, 0.1074, 0.1855, 0.1450]) -Greedy action tensor([ 0.8790, -0.6670, -0.2669, -0.5841]) tensor([0.5674, 0.1209, 0.1804, 0.1314]) -Greedy action tensor([ 0.6534, -0.5854, -0.1753, -0.2142]) tensor([0.4659, 0.1350, 0.2034, 0.1957]) -Greedy action tensor([ 0.5821, -0.3574, -0.1502, -0.3980]) tensor([0.4451, 0.1739, 0.2140, 0.1670]) -Greedy action tensor([ 0.4373, -0.5422, -0.1279, -0.2159]) tensor([0.4058, 0.1524, 0.2306, 0.2112]) -Greedy action tensor([ 0.5343, -0.5009, -0.1421, -0.1130]) tensor([0.4189, 0.1488, 0.2130, 0.2193]) -Greedy action tensor([ 0.3924, -0.2959, -0.1173, -0.2325]) tensor([0.3790, 0.1904, 0.2277, 0.2029]) -Greedy action tensor([ 0.4219, -0.2216, -0.0855, -0.4417]) tensor([0.3923, 0.2061, 0.2362, 0.1654]) -Greedy action tensor([ 0.8470, -0.3687, 0.4385, 1.4608]) tensor([0.2626, 0.0779, 0.1745, 0.4851]) -Greedy action tensor([ 0.6818, -1.4289, 1.0397, -0.1414]) tensor([0.3344, 0.0405, 0.4783, 0.1468]) -Greedy action tensor([ 1.4653, -1.4124, -0.1424, 1.0368]) tensor([0.5241, 0.0295, 0.1050, 0.3414]) -Greedy action tensor([-0.4159, -0.9864, 1.0425, -0.3880]) tensor([0.1451, 0.0820, 0.6237, 0.1492]) -Greedy action tensor([1.1762, 0.0515, 0.2483, 0.1832]) tensor([0.4783, 0.1553, 0.1891, 0.1772]) -Greedy action tensor([ 0.5512, 0.1809, -0.0688, 0.9221]) tensor([0.2719, 0.1878, 0.1463, 0.3940]) -Greedy action tensor([ 0.4000, -1.0034, 0.4559, -0.0361]) tensor([0.3390, 0.0833, 0.3585, 0.2192]) -Greedy action tensor([ 1.9059, 0.3467, -0.3537, 1.3520]) tensor([0.5293, 0.1113, 0.0553, 0.3042]) -Greedy action tensor([ 1.0578, -1.8708, -0.1629, 1.2810]) tensor([0.3848, 0.0206, 0.1135, 0.4811]) -Greedy action tensor([ 0.7562, 0.0127, 1.8928, -0.8063]) tensor([0.2083, 0.0990, 0.6490, 0.0437]) -Greedy action tensor([ 1.0769, -0.3141, 0.7423, -0.1147]) tensor([0.4409, 0.1097, 0.3155, 0.1339]) -Greedy action tensor([ 1.2661, -1.3738, 0.5870, 0.7207]) tensor([0.4634, 0.0331, 0.2350, 0.2686]) -Greedy action tensor([-0.4659, -1.8329, -0.7266, 0.3368]) tensor([0.2349, 0.0599, 0.1810, 0.5242]) -Greedy action tensor([-0.5048, -0.3309, 0.6926, 0.2191]) tensor([0.1322, 0.1573, 0.4378, 0.2727]) -Greedy action tensor([ 0.7083, -1.0637, -0.1643, 0.1342]) tensor([0.4649, 0.0790, 0.1942, 0.2618]) -Greedy action tensor([ 1.5728, -0.8368, 0.7418, 1.3771]) tensor([0.4259, 0.0383, 0.1856, 0.3502]) -Greedy action tensor([-0.7104, -1.9871, -0.1520, -0.0097]) tensor([0.1983, 0.0553, 0.3467, 0.3997]) -Greedy action tensor([-0.5247, -2.4197, -0.0745, 0.7242]) tensor([0.1612, 0.0242, 0.2528, 0.5618]) -Greedy action tensor([ 0.5811, -0.4481, 0.9356, 0.1706]) tensor([0.2902, 0.1037, 0.4137, 0.1925]) -Greedy action tensor([ 1.3180, -0.8083, -0.1201, 0.5249]) tensor([0.5528, 0.0659, 0.1312, 0.2501]) -Greedy action tensor([-1.5666, -0.0887, 0.5932, -0.5036]) tensor([0.0590, 0.2587, 0.5115, 0.1708]) -Greedy action tensor([ 0.4141, 0.0973, 0.3207, -0.5771]) tensor([0.3322, 0.2420, 0.3026, 0.1233]) -Greedy action tensor([-0.0403, -0.0975, 0.7576, -0.5617]) tensor([0.2101, 0.1984, 0.4667, 0.1247]) -Greedy action tensor([ 0.3083, -0.3431, 0.9964, -0.2195]) tensor([0.2438, 0.1271, 0.4852, 0.1438]) -Greedy action tensor([ 0.3473, -2.0414, 0.4038, -0.6475]) tensor([0.3969, 0.0364, 0.4199, 0.1468]) -Greedy action tensor([ 0.5158, -0.0762, -0.3966, 0.2687]) tensor([0.3655, 0.2022, 0.1468, 0.2855]) -Greedy action tensor([-0.5070, 0.1557, -0.2020, -0.9235]) tensor([0.2018, 0.3915, 0.2737, 0.1330]) -Greedy action tensor([ 0.4247, -0.6272, 1.0518, -0.4423]) tensor([0.2746, 0.0959, 0.5141, 0.1154]) -Greedy action tensor([ 2.4248, -1.0714, -0.4235, 0.4843]) tensor([0.8118, 0.0246, 0.0470, 0.1166]) -Greedy action tensor([ 1.2761, -1.3774, 0.5193, 1.3394]) tensor([0.3839, 0.0270, 0.1801, 0.4090]) -Greedy action tensor([ 0.5924, -2.4216, -0.0814, -0.1916]) tensor([0.4962, 0.0244, 0.2529, 0.2265]) -Greedy action tensor([-1.1137, -1.3320, 1.7616, -1.3580]) tensor([0.0492, 0.0396, 0.8727, 0.0385]) -Greedy action tensor([ 1.6053, -1.9935, 0.3985, 0.6681]) tensor([0.5820, 0.0159, 0.1741, 0.2280]) -Greedy action tensor([-0.0184, -0.5071, 0.4070, 0.4429]) tensor([0.2114, 0.1297, 0.3235, 0.3353]) -Greedy action tensor([-0.0492, 0.2025, 0.1376, -0.4282]) tensor([0.2395, 0.3080, 0.2886, 0.1639]) -Greedy action tensor([-0.1464, -0.6448, 0.1811, 0.1916]) tensor([0.2274, 0.1382, 0.3155, 0.3189]) -Greedy action tensor([ 1.7451, -1.7756, -0.2132, 0.8340]) tensor([0.6358, 0.0188, 0.0897, 0.2557]) -Greedy action tensor([-1.0097, -0.7556, 0.7004, -0.4316]) tensor([0.1041, 0.1343, 0.5759, 0.1857]) -Greedy action tensor([-0.7353, -1.0419, 1.3058, 0.2268]) tensor([0.0830, 0.0611, 0.6388, 0.2172]) -Greedy action tensor([ 0.0932, -0.9476, -0.0877, 0.3787]) tensor([0.2843, 0.1004, 0.2372, 0.3781]) -Greedy action tensor([-0.0994, -0.5912, -0.1708, 0.6673]) tensor([0.2130, 0.1302, 0.1983, 0.4585]) -Greedy action tensor([-0.4649, -0.8086, 0.1670, -0.3019]) tensor([0.2098, 0.1488, 0.3946, 0.2469]) -Greedy action tensor([-1.0930, 0.1335, -0.2939, 0.4602]) tensor([0.0880, 0.3001, 0.1957, 0.4161]) -Greedy action tensor([-0.8378, -1.1268, -0.2313, -0.9616]) tensor([0.2239, 0.1677, 0.4106, 0.1978]) -Greedy action tensor([-1.0504, -0.2685, -0.3157, -1.7443]) tensor([0.1733, 0.3788, 0.3613, 0.0866]) -Greedy action tensor([ 0.5208, -1.1126, 0.5614, 0.5640]) tensor([0.3048, 0.0595, 0.3174, 0.3183]) -Greedy action tensor([0.7903, 0.3397, 0.1764, 0.5116]) tensor([0.3407, 0.2171, 0.1844, 0.2578]) -Greedy action tensor([0.2248, 0.0394, 1.6085, 0.0761]) tensor([0.1496, 0.1243, 0.5971, 0.1290]) -Greedy action tensor([ 0.3426, -1.1555, 0.4257, 1.3678]) tensor([0.1962, 0.0439, 0.2132, 0.5468]) -Greedy action tensor([ 0.9059, 1.0376, -0.0293, 0.6557]) tensor([0.3019, 0.3444, 0.1185, 0.2351]) -Greedy action tensor([ 0.3844, -0.0464, -0.0346, -0.0345]) tensor([0.3372, 0.2192, 0.2218, 0.2218]) -Greedy action tensor([ 0.2909, -1.0548, 0.0394, -0.7078]) tensor([0.4155, 0.1082, 0.3232, 0.1531]) -Greedy action tensor([ 0.5256, -0.0600, -0.3367, 0.2996]) tensor([0.3601, 0.2005, 0.1521, 0.2873]) -Greedy action tensor([ 0.6861, -0.3249, 0.4550, 0.5960]) tensor([0.3256, 0.1185, 0.2584, 0.2975]) -Greedy action tensor([ 0.1196, -2.0755, -0.0785, 0.2788]) tensor([0.3221, 0.0359, 0.2643, 0.3777]) -Greedy action tensor([-0.4865, -1.2995, -0.2658, 0.4608]) tensor([0.1898, 0.0842, 0.2366, 0.4894]) -Greedy action tensor([-0.1420, -2.1710, 0.0108, 0.2207]) tensor([0.2678, 0.0352, 0.3121, 0.3849]) -Greedy action tensor([ 0.8593, 0.4449, -0.0185, 0.3870]) tensor([0.3704, 0.2447, 0.1540, 0.2309]) -Greedy action tensor([ 0.0039, 0.1575, -0.5079, 0.4831]) tensor([0.2283, 0.2662, 0.1368, 0.3687]) -Greedy action tensor([ 0.2955, -1.8055, 0.0286, 0.9751]) tensor([0.2590, 0.0317, 0.1983, 0.5110]) -Greedy action tensor([ 0.3346, -1.6187, -0.1399, 0.4831]) tensor([0.3420, 0.0485, 0.2128, 0.3967]) -Greedy action tensor([-0.3377, -1.6836, -0.4193, 1.2394]) tensor([0.1424, 0.0371, 0.1312, 0.6893]) -Greedy action tensor([-0.2727, 0.7153, 1.3983, -0.6108]) tensor([0.1029, 0.2764, 0.5473, 0.0734]) -Greedy action tensor([-0.7567, -0.7462, 2.2139, -0.0474]) tensor([0.0425, 0.0429, 0.8283, 0.0863]) -Greedy action tensor([-0.6931, -0.5404, 0.1539, -0.1405]) tensor([0.1604, 0.1868, 0.3741, 0.2787]) -Greedy action tensor([ 0.4116, -1.8825, 0.8430, 0.0701]) tensor([0.2984, 0.0301, 0.4594, 0.2121]) -Greedy action tensor([ 0.6882, -0.2291, 0.3523, -1.1633]) tensor([0.4403, 0.1759, 0.3147, 0.0691]) -Greedy action tensor([ 1.2024, -0.6333, 0.1498, 1.0430]) tensor([0.4235, 0.0676, 0.1478, 0.3611]) -Greedy action tensor([-0.6248, -1.3296, 0.2357, 0.4722]) tensor([0.1459, 0.0721, 0.3450, 0.4370]) -Greedy action tensor([ 0.3764, -1.0621, 0.9562, -0.4327]) tensor([0.2883, 0.0684, 0.5149, 0.1284]) -Greedy action tensor([ 1.1548, 0.3453, 0.6120, -0.2911]) tensor([0.4422, 0.1968, 0.2569, 0.1041]) -Greedy action tensor([ 0.7334, 0.6161, 1.7185, -0.2698]) tensor([0.2027, 0.1802, 0.5428, 0.0743]) -Greedy action tensor([-0.9611, 0.1222, 1.6507, -1.0521]) tensor([0.0541, 0.1598, 0.7368, 0.0494]) -Greedy action tensor([-1.0914, -2.0010, 0.9887, -0.4135]) tensor([0.0879, 0.0354, 0.7036, 0.1731]) -Greedy action tensor([1.0703, 0.2386, 0.6390, 1.2379]) tensor([0.3061, 0.1332, 0.1988, 0.3619]) -Greedy action tensor([-0.0933, -1.2114, 1.3661, 0.0616]) tensor([0.1471, 0.0481, 0.6330, 0.1718]) -Greedy action tensor([ 0.7194, -1.1331, 0.3239, 0.2912]) tensor([0.4029, 0.0632, 0.2713, 0.2626]) -Greedy action tensor([ 0.8318, 0.4421, 0.3321, -0.9777]) tensor([0.4085, 0.2767, 0.2479, 0.0669]) -Greedy action tensor([-0.1155, 0.1091, 0.9226, -0.6844]) tensor([0.1773, 0.2219, 0.5005, 0.1004]) -Greedy action tensor([ 1.4403, -0.7365, -0.3274, 0.9968]) tensor([0.5192, 0.0589, 0.0886, 0.3332]) -Greedy action tensor([0.3622, 0.5191, 0.9209, 1.0273]) tensor([0.1706, 0.1995, 0.2982, 0.3317]) -Greedy action tensor([1.0914, 0.1355, 1.2558, 0.0685]) tensor([0.3422, 0.1315, 0.4033, 0.1230]) -Greedy action tensor([-1.8163, -0.4222, 0.6297, -0.0501]) tensor([0.0446, 0.1798, 0.5148, 0.2608]) -Greedy action tensor([-1.8264, -0.4488, 0.6669, -0.0473]) tensor([0.0435, 0.1725, 0.5263, 0.2577]) -Greedy action tensor([-1.3310, -0.6142, 0.3385, 0.1759]) tensor([0.0777, 0.1591, 0.4126, 0.3506]) -Greedy action tensor([-1.7005, -0.0743, 0.5126, -0.0646]) tensor([0.0491, 0.2497, 0.4491, 0.2521]) -Greedy action tensor([-0.9316, -0.5985, 0.1992, 0.3939]) tensor([0.1080, 0.1507, 0.3347, 0.4066]) -Greedy action tensor([-1.9043, -0.4293, 0.6449, -0.1623]) tensor([0.0419, 0.1831, 0.5360, 0.2391]) -Greedy action tensor([-0.9101, 0.3426, -0.4205, -0.0023]) tensor([0.1161, 0.4065, 0.1895, 0.2879]) -Greedy action tensor([-1.7949, -0.0474, 0.5279, -0.0891]) tensor([0.0445, 0.2557, 0.4545, 0.2452]) -Greedy action tensor([-1.8525, -0.4857, 0.6222, -0.1283]) tensor([0.0446, 0.1750, 0.5301, 0.2503]) -Greedy action tensor([-1.7500, -0.0498, 0.4997, -0.0416]) tensor([0.0466, 0.2549, 0.4416, 0.2570]) -Greedy action tensor([-1.8738, -0.3499, 0.6346, -0.1129]) tensor([0.0422, 0.1937, 0.5185, 0.2455]) -Greedy action tensor([-1.9077, -0.4477, 0.6427, -0.1569]) tensor([0.0419, 0.1803, 0.5366, 0.2412]) -Greedy action tensor([-1.7598, -0.3056, 0.5633, -0.0733]) tensor([0.0479, 0.2050, 0.4886, 0.2585]) -Greedy action tensor([-1.9457, -0.4550, 0.6694, -0.1804]) tensor([0.0401, 0.1779, 0.5478, 0.2342]) -Greedy action tensor([-1.8904, -0.4516, 0.6419, -0.1438]) tensor([0.0425, 0.1791, 0.5347, 0.2437]) -Greedy action tensor([-1.9167, -0.4396, 0.6495, -0.1671]) tensor([0.0414, 0.1814, 0.5390, 0.2382]) -Greedy action tensor([-1.3579, -0.1070, 0.4058, -0.1183]) tensor([0.0726, 0.2535, 0.4233, 0.2506]) -Greedy action tensor([-1.7224, -0.3842, 0.6656, -0.0476]) tensor([0.0475, 0.1812, 0.5176, 0.2537]) -Greedy action tensor([-1.8773, -0.4629, 0.6484, -0.1312]) tensor([0.0428, 0.1762, 0.5354, 0.2455]) -Greedy action tensor([-1.6171, -0.5163, 0.4934, -0.0181]) tensor([0.0581, 0.1747, 0.4796, 0.2876]) -Greedy action tensor([-1.1079, -0.1420, 0.9838, 0.9692]) tensor([0.0507, 0.1333, 0.4109, 0.4050]) -Greedy action tensor([-1.9231, -0.7195, 1.1550, 0.0694]) tensor([0.0300, 0.0998, 0.6505, 0.2197]) -Greedy action tensor([-1.8510, -0.4136, 0.6564, -0.1273]) tensor([0.0433, 0.1823, 0.5316, 0.2428]) -Greedy action tensor([-0.7771, 0.9605, 0.1431, -0.2857]) tensor([0.0924, 0.5249, 0.2318, 0.1510]) -Greedy action tensor([-1.9166, -0.3844, 0.6467, -0.1563]) tensor([0.0410, 0.1895, 0.5314, 0.2381]) -Greedy action tensor([-1.5230, 0.1749, 0.3900, -0.0476]) tensor([0.0568, 0.3102, 0.3847, 0.2483]) -Greedy action tensor([-1.8476, -0.4671, 0.7066, -0.0351]) tensor([0.0417, 0.1660, 0.5367, 0.2556]) -Greedy action tensor([-1.9291, -0.4613, 0.6573, -0.1731]) tensor([0.0410, 0.1778, 0.5441, 0.2372]) -Greedy action tensor([-1.5024, 0.1398, 0.4960, 0.0660]) tensor([0.0545, 0.2817, 0.4022, 0.2616]) -Greedy action tensor([-1.7119, -0.3526, 0.6184, 0.0688]) tensor([0.0474, 0.1844, 0.4871, 0.2811]) -Greedy action tensor([-1.5638, -0.5458, 0.4665, 0.0938]) tensor([0.0601, 0.1664, 0.4580, 0.3155]) -Greedy action tensor([-1.9204, -0.4447, 0.6515, -0.1652]) tensor([0.0412, 0.1804, 0.5398, 0.2386]) -Greedy action tensor([-1.8386, -0.4299, 0.7032, -0.0394]) tensor([0.0420, 0.1716, 0.5329, 0.2536]) -Greedy action tensor([-1.8244, -0.3626, 0.5913, -0.1129]) tensor([0.0454, 0.1956, 0.5079, 0.2511]) -Greedy action tensor([-1.5213, -0.5374, 0.4345, 0.0738]) tensor([0.0638, 0.1707, 0.4510, 0.3145]) -Greedy action tensor([-1.4360, 0.7109, 0.2857, 0.1849]) tensor([0.0495, 0.4235, 0.2768, 0.2503]) -Greedy action tensor([-1.7290, -0.2450, 0.5334, 0.0038]) tensor([0.0484, 0.2133, 0.4647, 0.2736]) -Greedy action tensor([-1.7745, -0.4651, 0.5754, -0.0766]) tensor([0.0484, 0.1794, 0.5077, 0.2645]) -Greedy action tensor([-1.7951, -0.3488, 0.5760, -0.1258]) tensor([0.0470, 0.1997, 0.5036, 0.2496]) -Greedy action tensor([-1.7213, -0.4729, 0.5628, -0.0597]) tensor([0.0511, 0.1781, 0.5016, 0.2692]) -Greedy action tensor([-1.9185, -0.4023, 0.6435, -0.1663]) tensor([0.0412, 0.1876, 0.5338, 0.2375]) -Greedy action tensor([-0.2666, -0.0860, 0.2608, 0.4148]) tensor([0.1704, 0.2041, 0.2887, 0.3368]) -Greedy action tensor([-1.9360, -0.4420, 0.6622, -0.1755]) tensor([0.0405, 0.1803, 0.5439, 0.2353]) -Greedy action tensor([-1.8803, -0.5951, 0.8634, -0.0447]) tensor([0.0378, 0.1368, 0.5882, 0.2372]) -Greedy action tensor([-1.8856, -0.4586, 0.6388, -0.1346]) tensor([0.0427, 0.1780, 0.5332, 0.2461]) -Greedy action tensor([-1.9081, -0.4284, 0.6522, -0.1605]) tensor([0.0415, 0.1824, 0.5375, 0.2385]) -Greedy action tensor([-1.8426, -0.4169, 0.6156, -0.1229]) tensor([0.0446, 0.1855, 0.5210, 0.2489]) -Greedy action tensor([-1.5296, -0.4912, 0.5301, 0.2021]) tensor([0.0577, 0.1631, 0.4529, 0.3263]) -Greedy action tensor([-1.5647, -0.8995, 0.8433, -0.3112]) tensor([0.0570, 0.1108, 0.6328, 0.1995]) -Greedy action tensor([-0.5006, -0.4532, 0.1845, 0.1795]) tensor([0.1665, 0.1746, 0.3303, 0.3287]) -Greedy action tensor([-1.9034, -0.4422, 0.6436, -0.1652]) tensor([0.0421, 0.1814, 0.5373, 0.2393]) -Greedy action tensor([-1.0855, -0.1437, 0.3321, -0.1134]) tensor([0.0968, 0.2481, 0.3993, 0.2558]) -Greedy action tensor([-1.6327, -0.5427, 0.5074, -0.0463]) tensor([0.0576, 0.1713, 0.4896, 0.2814]) -Greedy action tensor([1.1549, 1.2394, 0.0863, 0.8456]) tensor([0.3159, 0.3437, 0.1085, 0.2319]) -Greedy action tensor([-0.6128, -0.5544, 0.1647, 0.2805]) tensor([0.1497, 0.1587, 0.3258, 0.3658]) -Greedy action tensor([-1.8935, -0.4650, 0.6343, -0.1604]) tensor([0.0428, 0.1786, 0.5363, 0.2423]) -Greedy action tensor([-1.9096, -0.4218, 0.6446, -0.1661]) tensor([0.0417, 0.1844, 0.5357, 0.2382]) -Greedy action tensor([-1.6196, -0.5317, 0.4916, -0.0194]) tensor([0.0582, 0.1728, 0.4807, 0.2884]) -Greedy action tensor([-1.1475, 0.7837, 0.1632, 0.1565]) tensor([0.0654, 0.4511, 0.2425, 0.2409]) -Greedy action tensor([-1.2862, -0.5101, 0.7706, 0.9051]) tensor([0.0501, 0.1090, 0.3922, 0.4487]) -Greedy action tensor([-1.7535, -0.4006, 0.5899, -0.0598]) tensor([0.0483, 0.1867, 0.5026, 0.2625]) -Greedy action tensor([-1.7399, -0.5063, 0.5646, -0.1015]) tensor([0.0510, 0.1752, 0.5112, 0.2626]) -Greedy action tensor([-1.6576, -0.4881, 0.5557, 0.0692]) tensor([0.0527, 0.1696, 0.4816, 0.2961]) -Greedy action tensor([-1.4828, -0.2558, 0.4230, 0.0358]) tensor([0.0637, 0.2172, 0.4283, 0.2908]) -Greedy action tensor([-1.7863, -0.4559, 0.6638, 0.0400]) tensor([0.0443, 0.1675, 0.5132, 0.2750]) -Greedy action tensor([-1.8835, -0.3725, 0.6259, -0.1583]) tensor([0.0427, 0.1933, 0.5246, 0.2395]) -Greedy action tensor([-1.8324, -0.4139, 0.6047, -0.1306]) tensor([0.0453, 0.1873, 0.5187, 0.2487]) -Greedy action tensor([-1.9315, -0.4479, 0.6593, -0.1726]) tensor([0.0407, 0.1795, 0.5433, 0.2364]) -Greedy action tensor([-1.9200, -0.4415, 0.6630, -0.1620]) tensor([0.0409, 0.1796, 0.5420, 0.2375]) -Greedy action tensor([-1.7564, -0.2790, 0.6016, -0.0634]) tensor([0.0468, 0.2049, 0.4942, 0.2542]) -Greedy action tensor([-1.4794, -0.5164, 1.1044, 0.8777]) tensor([0.0365, 0.0955, 0.4830, 0.3850]) -Greedy action tensor([-1.9018, -0.4386, 0.6421, -0.1575]) tensor([0.0421, 0.1817, 0.5355, 0.2407]) -Greedy action tensor([-1.8815, -0.3741, 0.6240, -0.1426]) tensor([0.0426, 0.1925, 0.5222, 0.2426]) -Greedy action tensor([-1.3395, -0.6153, 0.3178, 0.1799]) tensor([0.0777, 0.1602, 0.4073, 0.3549]) -Greedy action tensor([-1.2770, -0.6751, 0.2665, 0.2766]) tensor([0.0817, 0.1492, 0.3826, 0.3865]) -Greedy action tensor([-0.8739, -0.5443, 1.0142, 1.2432]) tensor([0.0578, 0.0804, 0.3818, 0.4800]) -Greedy action tensor([-1.8115, -0.4755, 0.6049, -0.1452]) tensor([0.0469, 0.1786, 0.5260, 0.2485]) -Greedy action tensor([-1.1092, -0.5959, 0.2866, 0.1126]) tensor([0.0990, 0.1654, 0.3997, 0.3359]) -Greedy action tensor([-1.0752, -0.5964, 0.2169, 0.2863]) tensor([0.0985, 0.1589, 0.3584, 0.3842]) -Greedy action tensor([-1.6794, -0.4779, 0.5375, -0.0575]) tensor([0.0539, 0.1791, 0.4944, 0.2727]) -Greedy action tensor([-1.4572, -0.4921, 0.4539, 0.0965]) tensor([0.0662, 0.1737, 0.4473, 0.3129]) -Greedy action tensor([ 1.6412, -0.7938, -0.1824, 0.5462]) tensor([0.6315, 0.0553, 0.1019, 0.2112]) -Greedy action tensor([ 1.5863, -0.6156, -0.5906, 0.6034]) tensor([0.6257, 0.0692, 0.0709, 0.2342]) -Greedy action tensor([ 1.3000, -0.2014, -0.1197, 0.2706]) tensor([0.5489, 0.1223, 0.1327, 0.1961]) -Greedy action tensor([ 2.2420, -0.9037, -0.5393, 0.5576]) tensor([0.7749, 0.0333, 0.0480, 0.1438]) -Greedy action tensor([ 1.4423, -0.3635, -0.4652, 0.2910]) tensor([0.6139, 0.1009, 0.0911, 0.1941]) -Greedy action tensor([ 1.4472, -0.4381, -0.5875, 0.5949]) tensor([0.5852, 0.0888, 0.0765, 0.2495]) -Greedy action tensor([ 1.3488, -0.5414, -0.1891, 0.3159]) tensor([0.5808, 0.0877, 0.1248, 0.2067]) -Greedy action tensor([ 1.3216, -0.3306, -0.3525, 0.4550]) tensor([0.5557, 0.1065, 0.1042, 0.2336]) -Greedy action tensor([ 1.0228, -0.5384, -0.2519, 0.2852]) tensor([0.5082, 0.1067, 0.1421, 0.2431]) -Greedy action tensor([ 2.0178, -1.0772, -0.2554, 0.4369]) tensor([0.7385, 0.0334, 0.0761, 0.1520]) -Greedy action tensor([ 1.4217, -0.4099, -0.3574, 0.1630]) tensor([0.6200, 0.0993, 0.1047, 0.1761]) -Greedy action tensor([ 1.7320, -0.2942, -0.2439, 0.6451]) tensor([0.6220, 0.0820, 0.0862, 0.2098]) -Greedy action tensor([ 1.4878, -0.5920, -0.2448, 0.5656]) tensor([0.5884, 0.0735, 0.1041, 0.2340]) -Greedy action tensor([ 1.3579, -0.8408, -0.4029, 0.4569]) tensor([0.5920, 0.0657, 0.1018, 0.2405]) -Greedy action tensor([ 8.9451e-01, 6.0171e-05, -7.4625e-01, 1.8915e-01]) tensor([0.4770, 0.1950, 0.0925, 0.2356]) -Greedy action tensor([ 0.8362, -0.3980, -0.3148, 0.8107]) tensor([0.3873, 0.1127, 0.1225, 0.3775]) -Greedy action tensor([ 1.5584, -0.5305, -0.9850, 0.7172]) tensor([0.6121, 0.0758, 0.0481, 0.2640]) -Greedy action tensor([ 1.4132, -0.2590, -0.5064, 0.5955]) tensor([0.5631, 0.1058, 0.0826, 0.2486]) -Greedy action tensor([ 1.2304, -0.2910, -0.4360, 0.4669]) tensor([0.5338, 0.1166, 0.1009, 0.2488]) -Greedy action tensor([ 0.8957, -0.5288, -0.1147, 0.1998]) tensor([0.4754, 0.1144, 0.1731, 0.2371]) -Greedy action tensor([ 1.3464, -0.6544, -0.2432, 0.2612]) tensor([0.5963, 0.0806, 0.1216, 0.2014]) -Greedy action tensor([ 1.7634, -0.7157, -0.0079, 0.8086]) tensor([0.6102, 0.0511, 0.1038, 0.2349]) -Greedy action tensor([ 1.8837, -0.3880, -0.5332, 0.0028]) tensor([0.7436, 0.0767, 0.0663, 0.1134]) -Greedy action tensor([ 1.3790, -0.4877, -0.3492, 0.1916]) tensor([0.6108, 0.0944, 0.1085, 0.1863]) -Greedy action tensor([ 1.5604, -0.1008, -0.3813, 0.1983]) tensor([0.6291, 0.1195, 0.0903, 0.1611]) -Greedy action tensor([ 1.5802, 0.0241, -0.7353, 0.5475]) tensor([0.6003, 0.1267, 0.0593, 0.2137]) -Greedy action tensor([ 1.3379, -0.7474, 0.1312, 0.1191]) tensor([0.5817, 0.0723, 0.1740, 0.1719]) -Greedy action tensor([ 1.3041, -0.5180, -0.4896, 0.7258]) tensor([0.5294, 0.0856, 0.0881, 0.2969]) -Greedy action tensor([ 1.6057, -0.6439, -0.3916, 0.4552]) tensor([0.6420, 0.0677, 0.0871, 0.2032]) -Greedy action tensor([ 1.5212, -0.3112, -0.5204, 0.1235]) tensor([0.6506, 0.1041, 0.0845, 0.1608]) -Greedy action tensor([ 1.5061, -0.3422, -0.4684, -0.5967]) tensor([0.7050, 0.1110, 0.0979, 0.0861]) -Greedy action tensor([ 1.5838, -0.4129, -0.3820, 0.3902]) tensor([0.6333, 0.0860, 0.0887, 0.1920]) -Greedy action tensor([ 1.4557, -0.1858, -0.4749, 0.5202]) tensor([0.5776, 0.1119, 0.0838, 0.2267]) -Greedy action tensor([ 1.1713, -0.1703, -0.2482, 0.2297]) tensor([0.5282, 0.1381, 0.1277, 0.2060]) -Greedy action tensor([ 1.0358, -0.6968, -0.2154, 0.3404]) tensor([0.5097, 0.0901, 0.1459, 0.2543]) -Greedy action tensor([ 1.4868, -0.7361, -0.9039, 0.3906]) tensor([0.6519, 0.0706, 0.0597, 0.2178]) -Greedy action tensor([ 1.5188, -0.4889, -0.7073, 0.6916]) tensor([0.5954, 0.0800, 0.0643, 0.2604]) -Greedy action tensor([ 1.6896, -0.8863, -0.5137, 0.5077]) tensor([0.6697, 0.0509, 0.0740, 0.2054]) -Greedy action tensor([ 1.1260, -0.2573, -0.1577, 0.4933]) tensor([0.4857, 0.1218, 0.1345, 0.2580]) -Greedy action tensor([ 1.1828, -0.8038, -0.2886, -0.0918]) tensor([0.6074, 0.0833, 0.1395, 0.1698]) -Greedy action tensor([ 1.0306, -0.4550, -0.3582, 0.0149]) tensor([0.5441, 0.1232, 0.1357, 0.1970]) -Greedy action tensor([ 2.0851, -0.9409, -0.1234, 0.7698]) tensor([0.7009, 0.0340, 0.0770, 0.1881]) -Greedy action tensor([ 1.5238, -0.1449, -0.3572, 0.5420]) tensor([0.5829, 0.1099, 0.0889, 0.2184]) -Greedy action tensor([ 1.8181, -0.5177, -0.3808, 0.1643]) tensor([0.7148, 0.0691, 0.0793, 0.1368]) -Greedy action tensor([ 1.0097, -0.2415, -0.4894, 0.4516]) tensor([0.4804, 0.1375, 0.1073, 0.2749]) -Greedy action tensor([ 1.2545, -0.2606, -0.9728, 0.2217]) tensor([0.5940, 0.1305, 0.0640, 0.2115]) -Greedy action tensor([ 0.9021, -0.2652, -0.1612, 0.2129]) tensor([0.4633, 0.1442, 0.1600, 0.2326]) -Greedy action tensor([ 1.5748, -0.0042, -0.4894, 0.2658]) tensor([0.6238, 0.1286, 0.0792, 0.1685]) -Greedy action tensor([ 1.7439, -0.1184, -0.4541, 0.5569]) tensor([0.6363, 0.0988, 0.0706, 0.1942]) -Greedy action tensor([ 1.2491, -0.3162, -0.5584, 0.2766]) tensor([0.5710, 0.1194, 0.0937, 0.2159]) -Greedy action tensor([ 2.6644, -1.4582, -0.0174, 0.5711]) tensor([0.8279, 0.0134, 0.0567, 0.1021]) -Greedy action tensor([ 1.4665, -0.3290, -0.2850, 0.6375]) tensor([0.5630, 0.0935, 0.0977, 0.2458]) -Greedy action tensor([ 0.4827, -0.3654, 0.0230, -0.0786]) tensor([0.3802, 0.1628, 0.2401, 0.2169]) -Greedy action tensor([ 1.1656, -0.7158, -0.5092, 0.0134]) tensor([0.6040, 0.0920, 0.1132, 0.1908]) -Greedy action tensor([ 1.2787, -0.2459, -0.6268, 0.4755]) tensor([0.5512, 0.1200, 0.0820, 0.2469]) -Greedy action tensor([ 0.6262, -0.9280, -0.2516, 0.1236]) tensor([0.4480, 0.0947, 0.1862, 0.2711]) -Greedy action tensor([ 1.5807, -0.4632, -0.2744, 0.5946]) tensor([0.6028, 0.0781, 0.0943, 0.2249]) -Greedy action tensor([ 2.0177, 0.2321, -0.1148, -0.3315]) tensor([0.7237, 0.1214, 0.0858, 0.0691]) -Greedy action tensor([ 1.7753, -0.9831, -0.3008, 0.5906]) tensor([0.6690, 0.0424, 0.0839, 0.2046]) -Greedy action tensor([ 1.2184, -0.2368, -0.2154, 0.4826]) tensor([0.5126, 0.1196, 0.1222, 0.2456]) -Greedy action tensor([ 2.4940, -1.2462, -0.2024, 0.8026]) tensor([0.7840, 0.0186, 0.0529, 0.1445]) -Greedy action tensor([ 1.4919, -0.6863, -0.1406, 0.4446]) tensor([0.6026, 0.0682, 0.1178, 0.2114]) -Greedy action tensor([ 1.3180, -0.4025, -0.1689, -0.1070]) tensor([0.6077, 0.1088, 0.1374, 0.1462]) -Greedy action tensor([ 1.2911, -0.2689, -0.4431, 0.3022]) tensor([0.5686, 0.1195, 0.1004, 0.2115]) -Greedy action tensor([ 1.8122, -0.5503, -0.4956, 0.1302]) tensor([0.7248, 0.0683, 0.0721, 0.1348]) -Greedy action tensor([ 1.0746, -0.3789, -0.1550, 0.4701]) tensor([0.4825, 0.1128, 0.1411, 0.2636]) -Greedy action tensor([ 1.2242, -0.3982, -0.2131, 0.1126]) tensor([0.5669, 0.1119, 0.1347, 0.1865]) -Greedy action tensor([ 1.7401, -0.6396, -0.4477, 0.6842]) tensor([0.6441, 0.0596, 0.0722, 0.2241]) -Greedy action tensor([ 2.2237, -0.3637, 0.0296, 0.6400]) tensor([0.7184, 0.0540, 0.0801, 0.1474]) -Greedy action tensor([ 1.4542, -0.5471, -0.0631, 0.7835]) tensor([0.5360, 0.0724, 0.1175, 0.2741]) -Greedy action tensor([ 1.0648, -0.2006, -0.5537, 0.2610]) tensor([0.5187, 0.1463, 0.1028, 0.2322]) -Greedy action tensor([ 1.4573, -0.2631, -0.3953, -0.3426]) tensor([0.6662, 0.1192, 0.1045, 0.1101]) -Greedy action tensor([ 1.8441, -0.6261, -0.1843, 0.7322]) tensor([0.6472, 0.0547, 0.0851, 0.2129]) -Greedy action tensor([ 0.9863, 0.0559, -0.1447, 0.2454]) tensor([0.4558, 0.1798, 0.1471, 0.2173]) -Greedy action tensor([ 1.1959, 0.0432, -0.5956, 0.2549]) tensor([0.5340, 0.1686, 0.0890, 0.2084]) -Greedy action tensor([ 2.5665, -1.0001, -0.6349, 0.2732]) tensor([0.8548, 0.0241, 0.0348, 0.0863]) -Greedy action tensor([ 1.3470, 0.1328, -0.1459, -0.1170]) tensor([0.5705, 0.1694, 0.1282, 0.1320]) -Greedy action tensor([ 1.3715, -0.7206, -0.4220, 0.3758]) tensor([0.6027, 0.0744, 0.1003, 0.2227]) -Greedy action tensor([ 1.5369, -0.6327, 0.0172, 0.4510]) tensor([0.5986, 0.0684, 0.1309, 0.2021]) -Greedy action tensor([ 1.1134, -0.3956, -0.1603, 0.4293]) tensor([0.4986, 0.1103, 0.1395, 0.2516]) -Greedy action tensor([ 1.6161, -0.5123, -0.2635, 0.2251]) tensor([0.6577, 0.0783, 0.1004, 0.1637]) -Greedy action tensor([ 0.2625, 0.1416, 0.0371, -0.1360]) tensor([0.2980, 0.2641, 0.2379, 0.2001]) -Greedy action tensor([ 0.7306, -0.2313, -0.0234, -0.3604]) tensor([0.4569, 0.1746, 0.2150, 0.1535]) -Greedy action tensor([ 1.0952, -1.6314, -0.2226, -0.7462]) tensor([0.6703, 0.0439, 0.1795, 0.1063]) -Greedy action tensor([ 0.6486, -0.4854, -0.0618, -0.4028]) tensor([0.4624, 0.1488, 0.2272, 0.1616]) -Greedy action tensor([ 0.5295, -0.2381, -0.0490, -0.2264]) tensor([0.4009, 0.1861, 0.2248, 0.1883]) -Greedy action tensor([ 0.8146, -0.3892, -0.0302, -0.4014]) tensor([0.4935, 0.1481, 0.2120, 0.1463]) -Greedy action tensor([ 0.8638, -0.2417, 0.0201, -0.3163]) tensor([0.4835, 0.1600, 0.2079, 0.1485]) -Greedy action tensor([ 0.7403, -0.3163, 0.1323, -0.9359]) tensor([0.4810, 0.1672, 0.2619, 0.0900]) -Greedy action tensor([ 0.7356, -0.4110, -0.0810, -0.3399]) tensor([0.4760, 0.1512, 0.2104, 0.1624]) -Greedy action tensor([ 0.5456, -0.3595, 0.0275, -0.2111]) tensor([0.4050, 0.1638, 0.2412, 0.1900]) -Greedy action tensor([ 0.5782, -0.1914, -0.0357, -0.1010]) tensor([0.3982, 0.1844, 0.2155, 0.2019]) -Greedy action tensor([ 0.3461, -0.0610, -0.0449, -0.0927]) tensor([0.3348, 0.2228, 0.2265, 0.2159]) -Greedy action tensor([ 0.5219, 0.1511, -0.1325, -0.1798]) tensor([0.3696, 0.2551, 0.1921, 0.1832]) -Greedy action tensor([ 1.1688, -0.7858, 0.0721, -0.3998]) tensor([0.5939, 0.0841, 0.1983, 0.1237]) -Greedy action tensor([ 0.4939, -0.4809, -0.0804, -0.4372]) tensor([0.4284, 0.1616, 0.2412, 0.1688]) -Greedy action tensor([ 0.3823, -0.0173, -0.0656, 0.0252]) tensor([0.3323, 0.2228, 0.2123, 0.2325]) -Greedy action tensor([ 0.8825, -0.7837, -0.0041, -0.3079]) tensor([0.5249, 0.0992, 0.2163, 0.1596]) -Greedy action tensor([ 0.7552, -0.3104, -0.1211, -0.1310]) tensor([0.4602, 0.1585, 0.1916, 0.1897]) -Greedy action tensor([ 0.8936, -0.4046, -0.1675, -0.2894]) tensor([0.5193, 0.1418, 0.1797, 0.1591]) -Greedy action tensor([ 0.7263, -0.3008, -0.0135, -0.1091]) tensor([0.4407, 0.1578, 0.2103, 0.1911]) -Greedy action tensor([ 0.6492, -0.5292, -0.1282, -0.4003]) tensor([0.4723, 0.1453, 0.2170, 0.1654]) -Greedy action tensor([ 0.4197, -0.0551, -0.0306, -0.0157]) tensor([0.3441, 0.2140, 0.2193, 0.2226]) -Greedy action tensor([ 0.5286, -0.1076, -0.2175, -0.3237]) tensor([0.4115, 0.2178, 0.1952, 0.1755]) -Greedy action tensor([ 0.9224, -0.3054, -0.2115, -0.4091]) tensor([0.5323, 0.1559, 0.1713, 0.1406]) -Greedy action tensor([ 1.0831, -0.5254, 0.0903, -0.7104]) tensor([0.5757, 0.1152, 0.2133, 0.0958]) -Greedy action tensor([ 0.9466, -0.6977, 0.0574, -0.3766]) tensor([0.5347, 0.1033, 0.2197, 0.1424]) -Greedy action tensor([ 0.3216, -0.0671, -0.0470, -0.0975]) tensor([0.3303, 0.2239, 0.2285, 0.2172]) -Greedy action tensor([ 1.1256, -0.4201, 0.2174, -0.7520]) tensor([0.5652, 0.1205, 0.2279, 0.0865]) -Greedy action tensor([ 0.5788, -0.1645, -0.0070, -0.2191]) tensor([0.4028, 0.1916, 0.2242, 0.1814]) -Greedy action tensor([ 0.8156, -0.7925, -0.1853, -0.5320]) tensor([0.5471, 0.1096, 0.2011, 0.1422]) -Greedy action tensor([ 0.2682, 0.1043, 0.1835, -0.2981]) tensor([0.2998, 0.2545, 0.2755, 0.1702]) -Greedy action tensor([ 0.6337, -0.4720, -0.0895, -0.2355]) tensor([0.4473, 0.1481, 0.2171, 0.1876]) -Greedy action tensor([ 0.8300, -0.8450, 0.0918, -0.3276]) tensor([0.5052, 0.0946, 0.2415, 0.1587]) -Greedy action tensor([ 0.6543, -0.2088, -0.1505, -0.0253]) tensor([0.4209, 0.1776, 0.1882, 0.2133]) -Greedy action tensor([ 0.5201, -0.3406, 0.0280, -0.3299]) tensor([0.4062, 0.1718, 0.2483, 0.1736]) -Greedy action tensor([ 0.7737, -0.6461, -0.0009, -0.4563]) tensor([0.5013, 0.1212, 0.2310, 0.1465]) -Greedy action tensor([ 1.0554, -0.5591, -0.0160, -0.2798]) tensor([0.5541, 0.1103, 0.1898, 0.1458]) -Greedy action tensor([ 1.2281, -0.4651, -0.0305, -0.5269]) tensor([0.6094, 0.1121, 0.1731, 0.1054]) -Greedy action tensor([ 0.4532, 0.2660, -0.0112, -0.1223]) tensor([0.3311, 0.2746, 0.2081, 0.1862]) -Greedy action tensor([ 0.5619, -0.6892, -0.1607, -0.1400]) tensor([0.4411, 0.1262, 0.2141, 0.2186]) -Greedy action tensor([ 1.0930, -0.1571, 0.1645, -0.7096]) tensor([0.5416, 0.1552, 0.2140, 0.0893]) -Greedy action tensor([ 0.8450, -0.4239, -0.0420, -0.5810]) tensor([0.5172, 0.1454, 0.2131, 0.1243]) -Greedy action tensor([ 0.7440, -0.0473, -0.1837, -0.2086]) tensor([0.4475, 0.2029, 0.1770, 0.1726]) -Greedy action tensor([ 0.6674, -0.3777, -0.1053, -0.1044]) tensor([0.4394, 0.1545, 0.2029, 0.2031]) -Greedy action tensor([ 0.3926, -0.0263, -0.2175, -0.1595]) tensor([0.3601, 0.2369, 0.1957, 0.2073]) -Greedy action tensor([ 0.3874, 0.0026, 0.0270, -0.2314]) tensor([0.3429, 0.2333, 0.2391, 0.1847]) -Greedy action tensor([ 0.5472, 0.4760, -0.2684, 0.0306]) tensor([0.3367, 0.3135, 0.1489, 0.2008]) -Greedy action tensor([ 0.9608, -0.0506, -0.0895, -0.4519]) tensor([0.5110, 0.1859, 0.1788, 0.1244]) -Greedy action tensor([ 1.4870, -1.0443, 0.1031, -0.7172]) tensor([0.6942, 0.0552, 0.1740, 0.0766]) -Greedy action tensor([ 0.5598, -0.5024, -0.1280, -0.3077]) tensor([0.4408, 0.1524, 0.2216, 0.1852]) -Greedy action tensor([ 0.4004, -0.2014, -0.1670, -0.2904]) tensor([0.3823, 0.2094, 0.2168, 0.1916]) -Greedy action tensor([ 0.7507, -0.3899, -0.1062, -0.3388]) tensor([0.4807, 0.1536, 0.2040, 0.1617]) -Greedy action tensor([ 1.1477, -0.6497, 0.1798, -0.4648]) tensor([0.5731, 0.0950, 0.2177, 0.1143]) -Greedy action tensor([ 0.8045, -0.0286, -0.0524, -0.3680]) tensor([0.4611, 0.2004, 0.1957, 0.1427]) -Greedy action tensor([ 0.8397, -0.5443, -0.0136, -0.3316]) tensor([0.5034, 0.1261, 0.2144, 0.1560]) -Greedy action tensor([ 0.5212, -0.1168, 0.0018, -0.0606]) tensor([0.3728, 0.1970, 0.2218, 0.2084]) -Greedy action tensor([ 0.4529, -0.5536, -0.2105, -0.0830]) tensor([0.4056, 0.1482, 0.2089, 0.2373]) -Greedy action tensor([ 0.5468, -0.3753, 0.1431, -0.5981]) tensor([0.4195, 0.1668, 0.2801, 0.1335]) -Greedy action tensor([ 0.6941, -0.5471, 0.0633, -0.3582]) tensor([0.4608, 0.1332, 0.2452, 0.1609]) -Greedy action tensor([ 0.6846, -0.6701, 0.0387, -0.4179]) tensor([0.4730, 0.1220, 0.2479, 0.1570]) -Greedy action tensor([ 0.6332, 0.1236, -0.1574, -0.2561]) tensor([0.4056, 0.2437, 0.1840, 0.1667]) -Greedy action tensor([ 0.8718, -0.5310, -0.1008, -0.4204]) tensor([0.5267, 0.1295, 0.1991, 0.1447]) -Greedy action tensor([ 0.8477, -0.4531, -0.1119, -0.2147]) tensor([0.4998, 0.1361, 0.1914, 0.1727]) -Greedy action tensor([ 0.6769, -0.3191, -0.0596, -0.3840]) tensor([0.4557, 0.1683, 0.2182, 0.1577]) -Greedy action tensor([ 0.4502, -0.5074, -0.1394, -0.1985]) tensor([0.4063, 0.1559, 0.2253, 0.2124]) -Greedy action tensor([ 0.6049, 0.0926, -0.1155, -0.2181]) tensor([0.3961, 0.2373, 0.1927, 0.1739]) -Greedy action tensor([ 0.8363, -0.5778, 0.0912, -0.5198]) tensor([0.5062, 0.1231, 0.2403, 0.1304]) -Greedy action tensor([ 1.0327, -0.6170, -0.0613, -0.3823]) tensor([0.5650, 0.1085, 0.1892, 0.1373]) -Greedy action tensor([ 1.0499, -0.8219, -0.0897, -0.4248]) tensor([0.5873, 0.0904, 0.1879, 0.1344]) -Greedy action tensor([ 0.7795, -0.5783, 0.0710, -0.4018]) tensor([0.4863, 0.1251, 0.2394, 0.1492]) -Greedy action tensor([ 0.4709, 0.4421, -0.2248, 0.0354]) tensor([0.3208, 0.3117, 0.1600, 0.2075]) -Greedy action tensor([ 0.4734, -0.1661, -0.0906, -0.2367]) tensor([0.3864, 0.2038, 0.2198, 0.1899]) -Greedy action tensor([ 0.5489, -0.6139, -0.0121, -0.2625]) tensor([0.4297, 0.1343, 0.2452, 0.1909]) -Greedy action tensor([ 0.8342, -0.4232, 0.0014, -0.0991]) tensor([0.4734, 0.1346, 0.2058, 0.1862]) -Greedy action tensor([ 0.4150, -0.1804, 0.2473, -0.4297]) tensor([0.3538, 0.1951, 0.2992, 0.1520]) -Greedy action tensor([ 0.7459, -0.2177, -0.0213, -0.5210]) tensor([0.4700, 0.1793, 0.2182, 0.1324]) -Greedy action tensor([ 0.6545, -0.4584, 0.0230, -0.5179]) tensor([0.4608, 0.1514, 0.2451, 0.1427]) -Greedy action tensor([ 0.1931, 0.0136, -0.1544, -0.4252]) tensor([0.3246, 0.2712, 0.2293, 0.1749]) -Greedy action tensor([ 0.3655, 0.0481, -0.0246, 0.0917]) tensor([0.3159, 0.2300, 0.2139, 0.2402]) -Greedy action tensor([ 0.5028, -0.1991, -0.2061, -0.2136]) tensor([0.4038, 0.2002, 0.1988, 0.1973]) -Greedy action tensor([ 0.3329, -0.2284, -0.0334, -0.1929]) tensor([0.3503, 0.1998, 0.2428, 0.2070]) -Greedy action tensor([-0.0625, -0.8875, 1.0010, -0.6608]) tensor([0.2047, 0.0897, 0.5930, 0.1125]) -Greedy action tensor([-0.1251, 0.0633, 1.1161, -0.4448]) tensor([0.1564, 0.1888, 0.5411, 0.1136]) -Greedy action tensor([-1.0367, -2.8737, -0.1670, 0.5452]) tensor([0.1189, 0.0189, 0.2837, 0.5784]) -Greedy action tensor([ 0.8338, -0.4485, 0.4053, 0.7738]) tensor([0.3483, 0.0966, 0.2269, 0.3281]) -Greedy action tensor([0.7116, 0.0396, 0.5251, 0.6640]) tensor([0.3036, 0.1550, 0.2519, 0.2895]) -Greedy action tensor([ 1.8938, -1.7696, -0.3036, 1.0633]) tensor([0.6359, 0.0163, 0.0706, 0.2772]) -Greedy action tensor([-1.2744, -0.4187, 0.2871, -1.3427]) tensor([0.1105, 0.2599, 0.5265, 0.1032]) -Greedy action tensor([-0.6085, 0.0960, 0.1393, 0.1515]) tensor([0.1375, 0.2781, 0.2904, 0.2940]) -Greedy action tensor([-0.3587, -1.4206, -1.0144, 0.3704]) tensor([0.2539, 0.0878, 0.1318, 0.5264]) -Greedy action tensor([ 0.5329, -0.6961, 1.7779, 0.9152]) tensor([0.1605, 0.0470, 0.5573, 0.2352]) -Greedy action tensor([ 0.1954, 0.6035, 0.3186, -0.2696]) tensor([0.2346, 0.3528, 0.2653, 0.1473]) -Greedy action tensor([-0.9339, -0.1759, -0.3932, -0.1409]) tensor([0.1416, 0.3022, 0.2432, 0.3130]) -Greedy action tensor([-1.2309, -0.2548, 1.3745, -1.1395]) tensor([0.0547, 0.1451, 0.7403, 0.0599]) -Greedy action tensor([ 0.6574, -0.6310, 1.4118, -0.1499]) tensor([0.2599, 0.0717, 0.5526, 0.1159]) -Greedy action tensor([ 0.2556, 0.6184, -0.4328, -1.4737]) tensor([0.3208, 0.4611, 0.1612, 0.0569]) -Greedy action tensor([ 1.0757, -0.9389, 0.2020, 1.4139]) tensor([0.3386, 0.0452, 0.1413, 0.4749]) -Greedy action tensor([-0.3914, -0.9446, 1.0942, 0.0273]) tensor([0.1331, 0.0766, 0.5880, 0.2023]) -Greedy action tensor([0.7151, 0.4198, 0.1429, 0.3869]) tensor([0.3301, 0.2457, 0.1863, 0.2378]) -Greedy action tensor([-0.1955, -0.5687, -0.2283, 1.0790]) tensor([0.1604, 0.1105, 0.1553, 0.5739]) -Greedy action tensor([-0.4453, 0.5130, 0.0037, -0.0588]) tensor([0.1505, 0.3923, 0.2358, 0.2215]) -Greedy action tensor([-1.8241, 0.4579, 1.0687, -0.4288]) tensor([0.0304, 0.2980, 0.5488, 0.1228]) -Greedy action tensor([-1.2116, -0.1900, -0.4111, -0.2889]) tensor([0.1174, 0.3260, 0.2613, 0.2953]) -Greedy action tensor([ 0.5707, -0.9890, -0.2725, 0.2440]) tensor([0.4234, 0.0890, 0.1822, 0.3054]) -Greedy action tensor([ 0.8187, -0.4111, 0.5750, 0.8818]) tensor([0.3183, 0.0931, 0.2495, 0.3391]) -Greedy action tensor([1.3143, 0.9411, 0.5312, 0.5657]) tensor([0.3819, 0.2629, 0.1745, 0.1806]) -Greedy action tensor([-0.2365, -1.1801, -0.4985, -0.2591]) tensor([0.3188, 0.1241, 0.2453, 0.3117]) -Greedy action tensor([-0.7522, -0.5262, 0.0560, 0.5355]) tensor([0.1231, 0.1543, 0.2763, 0.4462]) -Greedy action tensor([ 0.6463, -1.8105, 0.5385, 0.8129]) tensor([0.3160, 0.0271, 0.2837, 0.3733]) -Greedy action tensor([0.3376, 0.0481, 0.1508, 0.6972]) tensor([0.2493, 0.1867, 0.2068, 0.3572]) -Greedy action tensor([ 0.5940, 0.5837, -0.4549, 1.5848]) tensor([0.1987, 0.1966, 0.0696, 0.5351]) -Greedy action tensor([0.7082, 0.6833, 1.1614, 0.5830]) tensor([0.2257, 0.2201, 0.3551, 0.1991]) -Greedy action tensor([ 0.2262, 0.1778, -0.0461, 0.2659]) tensor([0.2663, 0.2537, 0.2028, 0.2771]) -Greedy action tensor([ 0.9612, -0.7597, 2.5053, -0.1239]) tensor([0.1613, 0.0289, 0.7554, 0.0545]) -Greedy action tensor([ 0.7319, -0.1235, -0.9233, 0.0195]) tensor([0.4747, 0.2018, 0.0907, 0.2328]) -Greedy action tensor([ 0.3590, -0.9128, -0.1070, -0.5645]) tensor([0.4338, 0.1216, 0.2722, 0.1723]) -Greedy action tensor([ 1.2486, -0.5295, 1.4007, 1.3608]) tensor([0.2897, 0.0489, 0.3373, 0.3241]) -Greedy action tensor([-0.4454, 0.0168, -0.2804, -1.2991]) tensor([0.2385, 0.3786, 0.2813, 0.1016]) -Greedy action tensor([ 0.5448, -1.2318, -0.4156, -0.3740]) tensor([0.5126, 0.0867, 0.1962, 0.2045]) -Greedy action tensor([ 0.9820, -0.8887, 0.0569, 0.4626]) tensor([0.4661, 0.0718, 0.1848, 0.2773]) -Greedy action tensor([0.0954, 0.1104, 0.8008, 0.7811]) tensor([0.1660, 0.1685, 0.3360, 0.3295]) -Greedy action tensor([-0.0916, -0.1182, -0.0430, -0.2219]) tensor([0.2563, 0.2496, 0.2691, 0.2250]) -Greedy action tensor([ 1.7129, -1.2307, 1.7109, 1.0967]) tensor([0.3860, 0.0203, 0.3852, 0.2084]) -Greedy action tensor([ 1.0952, -1.0054, 1.5507, -0.1181]) tensor([0.3337, 0.0408, 0.5263, 0.0992]) -Greedy action tensor([ 0.0038, -1.1169, -0.0041, 0.6190]) tensor([0.2399, 0.0782, 0.2380, 0.4439]) -Greedy action tensor([-1.3271, -0.5515, 0.1247, -0.8917]) tensor([0.1113, 0.2416, 0.4751, 0.1720]) -Greedy action tensor([0.0813, 0.0923, 1.0527, 0.3718]) tensor([0.1670, 0.1688, 0.4410, 0.2232]) -Greedy action tensor([ 0.3876, -0.0253, 1.0869, 0.7185]) tensor([0.1974, 0.1306, 0.3972, 0.2748]) -Greedy action tensor([-0.3429, -0.2366, -0.5050, 0.7529]) tensor([0.1679, 0.1868, 0.1428, 0.5024]) -Greedy action tensor([-0.1480, -0.2539, 0.5987, -0.2287]) tensor([0.2028, 0.1824, 0.4278, 0.1870]) -Greedy action tensor([-0.2049, -0.8016, -0.2789, 0.0946]) tensor([0.2612, 0.1438, 0.2426, 0.3524]) -Greedy action tensor([-0.0838, -1.0247, -0.1000, -0.4257]) tensor([0.3242, 0.1265, 0.3190, 0.2303]) -Greedy action tensor([ 1.1982, 0.4355, 0.3012, -0.3995]) tensor([0.4816, 0.2246, 0.1964, 0.0975]) -Greedy action tensor([ 0.5606, -0.5549, 0.6241, 1.1473]) tensor([0.2386, 0.0782, 0.2542, 0.4290]) -Greedy action tensor([ 1.3849, -0.4287, 0.8269, 0.0720]) tensor([0.4989, 0.0814, 0.2855, 0.1342]) -Greedy action tensor([-0.5935, -0.2435, -0.4827, 1.3125]) tensor([0.0974, 0.1383, 0.1089, 0.6554]) -Greedy action tensor([ 0.3067, -0.1946, -0.4036, 0.4862]) tensor([0.3036, 0.1839, 0.1492, 0.3633]) -Greedy action tensor([-1.1484, -2.0129, -0.1167, 0.7616]) tensor([0.0911, 0.0384, 0.2555, 0.6150]) -Greedy action tensor([ 1.4122, -1.2316, 0.4992, 0.5846]) tensor([0.5237, 0.0372, 0.2102, 0.2289]) -Greedy action tensor([ 0.3603, -1.6044, -0.3124, -0.0104]) tensor([0.4272, 0.0599, 0.2180, 0.2949]) -Greedy action tensor([ 1.0698, -1.7757, -0.0744, -0.0552]) tensor([0.5878, 0.0342, 0.1872, 0.1908]) -Greedy action tensor([-0.5219, -0.6876, -0.9158, 0.0715]) tensor([0.2309, 0.1956, 0.1557, 0.4179]) -Greedy action tensor([-1.6688, -1.1876, -0.1187, -1.2510]) tensor([0.1130, 0.1829, 0.5325, 0.1716]) -Greedy action tensor([ 0.8744, -1.1963, 0.1886, 1.2357]) tensor([0.3263, 0.0411, 0.1643, 0.4683]) -Greedy action tensor([ 0.7334, -1.8757, 0.2535, 0.3599]) tensor([0.4200, 0.0309, 0.2599, 0.2891]) -Greedy action tensor([ 0.7361, -0.1080, -0.4938, 0.3944]) tensor([0.4110, 0.1767, 0.1202, 0.2921]) -Greedy action tensor([ 0.5488, -0.2297, -0.7112, -0.0060]) tensor([0.4316, 0.1981, 0.1224, 0.2478]) -Greedy action tensor([ 1.6835, -1.3845, 1.1290, 0.6901]) tensor([0.5022, 0.0234, 0.2885, 0.1860]) -Greedy action tensor([-0.3499, -0.6615, 0.4877, -0.5950]) tensor([0.2072, 0.1517, 0.4789, 0.1622]) -Greedy action tensor([ 0.1666, -1.7982, -0.1806, 0.7115]) tensor([0.2800, 0.0393, 0.1979, 0.4829]) -Greedy action tensor([ 0.7561, 0.0051, 0.4784, -0.6142]) tensor([0.4027, 0.1900, 0.3050, 0.1023]) -Greedy action tensor([-0.3528, 0.6735, 1.1613, -0.4933]) tensor([0.1086, 0.3032, 0.4938, 0.0944]) -Greedy action tensor([ 0.7738, -0.1309, -0.0874, -0.3387]) tensor([0.4638, 0.1877, 0.1960, 0.1525]) -Greedy action tensor([ 1.0997, -0.6790, 0.4400, 0.1741]) tensor([0.4803, 0.0811, 0.2483, 0.1903]) -Greedy action tensor([ 0.4597, -0.9517, -0.0071, 0.1208]) tensor([0.3871, 0.0944, 0.2427, 0.2758]) -Greedy action tensor([ 0.0554, -0.0777, 0.5751, -0.9220]) tensor([0.2543, 0.2226, 0.4275, 0.0957]) -Greedy action tensor([-3.7810e-01, -1.7373e+00, 6.4472e-01, 9.5975e-04]) tensor([0.1819, 0.0467, 0.5058, 0.2657]) -Greedy action tensor([-0.1198, -1.4311, -0.2281, 0.7087]) tensor([0.2244, 0.0605, 0.2014, 0.5138]) -Greedy action tensor([-0.2962, 0.7458, 0.5233, -0.6054]) tensor([0.1462, 0.4146, 0.3319, 0.1073]) -Greedy action tensor([ 1.7534, -0.9629, 0.1828, 0.9744]) tensor([0.5771, 0.0382, 0.1200, 0.2648]) -Greedy action tensor([-0.4822, -1.4449, 0.7646, 1.3628]) tensor([0.0894, 0.0341, 0.3109, 0.5656]) -Greedy action tensor([-0.4647, -0.3401, -0.7768, -0.1217]) tensor([0.2340, 0.2650, 0.1713, 0.3297]) -Greedy action tensor([-0.5369, -0.4723, -0.2943, -1.2423]) tensor([0.2608, 0.2781, 0.3323, 0.1288]) -Greedy action tensor([-1.7565, -0.1088, 0.6389, -0.3881]) tensor([0.0474, 0.2462, 0.5201, 0.1862]) -Greedy action tensor([-1.8804, -0.2876, 0.6153, -0.1685]) tensor([0.0424, 0.2085, 0.5143, 0.2349]) -Greedy action tensor([-1.8373, -0.4862, 0.6132, -0.1367]) tensor([0.0456, 0.1761, 0.5286, 0.2497]) -Greedy action tensor([-1.9458, -0.4501, 0.6677, -0.1813]) tensor([0.0401, 0.1789, 0.5470, 0.2340]) -Greedy action tensor([-1.6895, -0.5238, 0.5325, -0.0524]) tensor([0.0538, 0.1727, 0.4967, 0.2767]) -Greedy action tensor([-0.6069, 0.1631, 0.1439, -0.0781]) tensor([0.1434, 0.3096, 0.3037, 0.2433]) -Greedy action tensor([-1.9241, -0.4347, 0.6553, -0.1707]) tensor([0.0410, 0.1818, 0.5406, 0.2367]) -Greedy action tensor([-1.9055, -0.4135, 0.6459, -0.1576]) tensor([0.0416, 0.1851, 0.5341, 0.2391]) -Greedy action tensor([-1.1590, -0.5433, 0.3219, 0.0595]) tensor([0.0941, 0.1741, 0.4136, 0.3182]) -Greedy action tensor([-1.4008, -0.3028, 0.7486, 0.5692]) tensor([0.0506, 0.1518, 0.4345, 0.3631]) -Greedy action tensor([-1.5760, -0.4470, 0.5491, 0.1152]) tensor([0.0559, 0.1728, 0.4680, 0.3033]) -Greedy action tensor([-0.4250, 0.7287, 0.1741, 0.5411]) tensor([0.1160, 0.3678, 0.2112, 0.3049]) -Greedy action tensor([-1.5522, -0.0078, 0.5866, 0.1415]) tensor([0.0510, 0.2389, 0.4328, 0.2773]) -Greedy action tensor([-1.7940, -0.0168, 0.5390, -0.0474]) tensor([0.0436, 0.2576, 0.4491, 0.2498]) -Greedy action tensor([-1.8946, -0.4255, 0.6374, -0.1550]) tensor([0.0423, 0.1840, 0.5326, 0.2411]) -Greedy action tensor([-1.7744, -0.2260, 0.5508, -0.1121]) tensor([0.0472, 0.2218, 0.4824, 0.2486]) -Greedy action tensor([-1.8601, -0.2237, 0.5991, -0.1106]) tensor([0.0424, 0.2178, 0.4959, 0.2439]) -Greedy action tensor([-1.5065, -0.3964, 1.1899, 1.0276]) tensor([0.0318, 0.0964, 0.4712, 0.4006]) -Greedy action tensor([-1.2996, -0.2518, 0.4645, -0.2287]) tensor([0.0793, 0.2262, 0.4630, 0.2315]) -Greedy action tensor([-1.4318, -0.5918, 0.3886, 0.1925]) tensor([0.0687, 0.1590, 0.4239, 0.3484]) -Greedy action tensor([-1.8795, -0.4367, 0.6362, -0.1496]) tensor([0.0430, 0.1821, 0.5323, 0.2426]) -Greedy action tensor([-1.4852, -0.3549, 1.0129, 0.7498]) tensor([0.0391, 0.1209, 0.4749, 0.3651]) -Greedy action tensor([-1.8200, -0.4831, 0.6738, -0.0076]) tensor([0.0434, 0.1652, 0.5255, 0.2659]) -Greedy action tensor([-0.6220, -0.0649, 0.1621, 0.1300]) tensor([0.1417, 0.2473, 0.3104, 0.3006]) -Greedy action tensor([-1.9015, -0.4476, 0.6831, -0.1172]) tensor([0.0408, 0.1747, 0.5413, 0.2431]) -Greedy action tensor([-1.8991, -0.3931, 0.6492, -0.1319]) tensor([0.0414, 0.1867, 0.5294, 0.2424]) -Greedy action tensor([-1.5573, -0.4640, 0.6967, 0.3861]) tensor([0.0488, 0.1456, 0.4648, 0.3408]) -Greedy action tensor([-1.0996, 0.4987, 0.4192, 0.4924]) tensor([0.0648, 0.3206, 0.2961, 0.3186]) -Greedy action tensor([-1.9355, -0.4366, 0.6607, -0.1733]) tensor([0.0405, 0.1811, 0.5427, 0.2357]) -Greedy action tensor([-1.2870, 0.6485, 0.1703, 0.2378]) tensor([0.0595, 0.4119, 0.2554, 0.2732]) -Greedy action tensor([-1.6845, -0.4459, 0.5328, -0.0524]) tensor([0.0533, 0.1841, 0.4898, 0.2728]) -Greedy action tensor([-1.6864, -0.5168, 0.5322, 0.0042]) tensor([0.0531, 0.1710, 0.4881, 0.2879]) -Greedy action tensor([-0.6843, -0.5508, 0.1617, 0.2996]) tensor([0.1399, 0.1599, 0.3260, 0.3742]) -Greedy action tensor([-1.6815e+00, -1.6777e-01, 5.5956e-01, 9.0301e-04]) tensor([0.0492, 0.2235, 0.4626, 0.2646]) -Greedy action tensor([-1.8977, -0.4540, 0.6788, -0.1149]) tensor([0.0411, 0.1741, 0.5404, 0.2444]) -Greedy action tensor([-1.9199, -0.4362, 0.6512, -0.1712]) tensor([0.0413, 0.1819, 0.5397, 0.2371]) -Greedy action tensor([-0.4598, -0.3078, 0.1583, 0.1683]) tensor([0.1697, 0.1975, 0.3148, 0.3180]) -Greedy action tensor([-1.9185, -0.4150, 0.6476, -0.1643]) tensor([0.0412, 0.1851, 0.5358, 0.2379]) -Greedy action tensor([-1.9282, -0.3183, 0.6360, -0.1656]) tensor([0.0403, 0.2016, 0.5234, 0.2348]) -Greedy action tensor([-1.6942, -0.4584, 0.5821, 0.0272]) tensor([0.0506, 0.1740, 0.4926, 0.2828]) -Greedy action tensor([-1.8430, -0.4217, 0.6134, -0.1243]) tensor([0.0447, 0.1851, 0.5211, 0.2492]) -Greedy action tensor([-1.4445, -0.5055, 0.4275, 0.1367]) tensor([0.0670, 0.1714, 0.4357, 0.3258]) -Greedy action tensor([-1.5156, -0.1941, 0.6448, 0.2929]) tensor([0.0512, 0.1920, 0.4443, 0.3125]) -Greedy action tensor([-1.9463, -0.4507, 0.6691, -0.1815]) tensor([0.0400, 0.1787, 0.5475, 0.2339]) -Greedy action tensor([-2.0355, -0.6801, 1.4023, 0.6731]) tensor([0.0196, 0.0760, 0.6101, 0.2942]) -Greedy action tensor([-1.9200, -0.4421, 0.6556, -0.1651]) tensor([0.0411, 0.1804, 0.5406, 0.2379]) -Greedy action tensor([-1.3146, 0.0895, 0.5967, 0.3742]) tensor([0.0580, 0.2361, 0.3921, 0.3139]) -Greedy action tensor([-1.9114, -0.4420, 0.6516, -0.1636]) tensor([0.0416, 0.1806, 0.5392, 0.2386]) -Greedy action tensor([-1.5464, -0.3811, 0.5386, 0.0984]) tensor([0.0574, 0.1840, 0.4615, 0.2972]) -Greedy action tensor([-1.8693, -0.1689, 0.5854, -0.1263]) tensor([0.0420, 0.2298, 0.4885, 0.2398]) -Greedy action tensor([-0.6755, -0.0625, 0.2022, -0.0456]) tensor([0.1403, 0.2589, 0.3374, 0.2634]) -Greedy action tensor([-1.8345, -0.5186, 0.7229, -0.0215]) tensor([0.0421, 0.1569, 0.5430, 0.2580]) -Greedy action tensor([-1.6876, -0.4413, 0.9978, 0.5547]) tensor([0.0350, 0.1218, 0.5135, 0.3297]) -Greedy action tensor([-1.1610, -0.7506, 1.2053, 1.4191]) tensor([0.0379, 0.0572, 0.4043, 0.5006]) -Greedy action tensor([-0.3182, 0.0220, 0.4580, 0.0217]) tensor([0.1671, 0.2349, 0.3632, 0.2348]) -Greedy action tensor([-0.3941, -0.0324, 0.6153, 0.7543]) tensor([0.1200, 0.1723, 0.3293, 0.3784]) -Greedy action tensor([-1.2334, -0.7131, 0.7675, -0.3316]) tensor([0.0797, 0.1342, 0.5897, 0.1965]) -Greedy action tensor([-1.8761, -0.3733, 0.6299, -0.1285]) tensor([0.0426, 0.1913, 0.5217, 0.2444]) -Greedy action tensor([-1.7931, -0.2796, 0.5641, -0.0979]) tensor([0.0464, 0.2108, 0.4900, 0.2528]) -Greedy action tensor([-1.3413, -0.5589, 0.5587, 0.5624]) tensor([0.0603, 0.1319, 0.4032, 0.4047]) -Greedy action tensor([-1.9009, -0.4583, 0.6709, -0.1551]) tensor([0.0416, 0.1759, 0.5442, 0.2383]) -Greedy action tensor([-1.5515, -0.5487, 0.4555, 0.1188]) tensor([0.0607, 0.1654, 0.4515, 0.3224]) -Greedy action tensor([-0.4877, 0.3200, 0.1830, 0.1656]) tensor([0.1404, 0.3150, 0.2746, 0.2699]) -Greedy action tensor([-1.9386, -0.4420, 0.6651, -0.1766]) tensor([0.0403, 0.1801, 0.5448, 0.2348]) -Greedy action tensor([-1.7020, -0.5616, 0.6174, -0.0223]) tensor([0.0509, 0.1591, 0.5172, 0.2728]) -Greedy action tensor([-1.8760, -0.3183, 0.6250, -0.1415]) tensor([0.0424, 0.2011, 0.5165, 0.2400]) -Greedy action tensor([-1.8967, -0.4207, 0.6499, -0.1549]) tensor([0.0419, 0.1835, 0.5352, 0.2394]) -Greedy action tensor([-1.0512, -0.1402, 0.5342, 0.4126]) tensor([0.0788, 0.1960, 0.3846, 0.3406]) -Greedy action tensor([-1.7008, -0.4814, 0.5828, 0.0683]) tensor([0.0498, 0.1687, 0.4891, 0.2924]) -Greedy action tensor([-1.8554, -0.2362, 0.5899, -0.1499]) tensor([0.0433, 0.2187, 0.4996, 0.2384]) -Greedy action tensor([-1.9118, -0.4685, 0.6476, -0.1642]) tensor([0.0418, 0.1772, 0.5408, 0.2402]) -Greedy action tensor([-1.7814, -0.4499, 0.6295, -0.0751]) tensor([0.0466, 0.1766, 0.5198, 0.2569]) -Greedy action tensor([-1.7821, -0.5908, 0.6471, -0.0188]) tensor([0.0466, 0.1533, 0.5286, 0.2716]) -Greedy action tensor([-1.6664, -0.1081, 0.5045, -0.1038]) tensor([0.0518, 0.2463, 0.4545, 0.2474]) -Greedy action tensor([-1.8899, -0.4512, 0.6442, -0.1539]) tensor([0.0426, 0.1794, 0.5365, 0.2415]) -Greedy action tensor([-1.2539, 0.5601, 0.2567, -0.0761]) tensor([0.0671, 0.4114, 0.3038, 0.2178]) -Greedy action tensor([-1.8151, -0.3616, 0.6463, -0.1096]) tensor([0.0444, 0.1901, 0.5209, 0.2446]) -Greedy action tensor([-1.4539, 0.6530, 0.3302, 0.0626]) tensor([0.0507, 0.4167, 0.3017, 0.2309]) -Greedy action tensor([-0.0668, 0.0376, 0.7485, 1.7076]) tensor([0.0974, 0.1081, 0.2201, 0.5744]) -Greedy action tensor([-1.8641, -0.4026, 0.6248, -0.1348]) tensor([0.0435, 0.1875, 0.5239, 0.2451]) -Greedy action tensor([-0.6888, 0.4558, 0.0496, 0.0013]) tensor([0.1215, 0.3818, 0.2543, 0.2423]) -Greedy action tensor([ 2.2274, -0.8526, -0.5924, 1.1289]) tensor([0.6950, 0.0319, 0.0414, 0.2317]) -Greedy action tensor([ 1.7745, -0.7456, 0.1356, 0.2011]) tensor([0.6748, 0.0543, 0.1310, 0.1399]) -Greedy action tensor([ 1.5646, -0.3636, -0.5846, 0.0461]) tensor([0.6752, 0.0982, 0.0787, 0.1479]) -Greedy action tensor([ 1.5189, -0.2062, -0.6021, 0.5078]) tensor([0.6017, 0.1072, 0.0722, 0.2189]) -Greedy action tensor([ 1.0648, -0.3073, -0.4187, 0.5665]) tensor([0.4789, 0.1214, 0.1086, 0.2910]) -Greedy action tensor([ 1.5135, -0.3393, -0.0864, 0.4922]) tensor([0.5818, 0.0912, 0.1175, 0.2095]) -Greedy action tensor([ 0.3506, -0.3634, -0.1991, 0.1796]) tensor([0.3437, 0.1683, 0.1983, 0.2897]) -Greedy action tensor([ 1.5388, -0.2457, -0.0811, 0.4427]) tensor([0.5882, 0.0988, 0.1164, 0.1966]) -Greedy action tensor([ 1.4171, 0.1290, -0.8920, 0.4583]) tensor([0.5687, 0.1568, 0.0565, 0.2180]) -Greedy action tensor([ 2.1214e+00, -1.6649e-03, 5.3150e-02, 2.7738e-01]) tensor([0.7121, 0.0852, 0.0900, 0.1126]) -Greedy action tensor([ 1.2149, -0.5708, -0.1496, 0.1708]) tensor([0.5633, 0.0945, 0.1439, 0.1983]) -Greedy action tensor([ 1.9109, -0.4567, -0.6727, 0.3707]) tensor([0.7228, 0.0677, 0.0546, 0.1549]) -Greedy action tensor([ 1.6264, -0.3811, -0.2894, 0.3444]) tensor([0.6414, 0.0862, 0.0944, 0.1780]) -Greedy action tensor([ 2.3571, -1.3172, -0.1745, 0.7882]) tensor([0.7615, 0.0193, 0.0606, 0.1586]) -Greedy action tensor([ 1.2657, -0.7201, -0.2183, 0.3470]) tensor([0.5672, 0.0779, 0.1286, 0.2263]) -Greedy action tensor([ 1.7766, -0.8944, -0.2875, 0.6508]) tensor([0.6577, 0.0455, 0.0835, 0.2133]) -Greedy action tensor([ 0.5154, -0.3367, -0.2342, 0.3380]) tensor([0.3654, 0.1559, 0.1727, 0.3060]) -Greedy action tensor([ 0.8346, -0.4893, -0.3619, 0.2549]) tensor([0.4698, 0.1250, 0.1420, 0.2631]) -Greedy action tensor([ 1.0962, -0.5438, -0.2955, 0.1660]) tensor([0.5443, 0.1056, 0.1354, 0.2147]) -Greedy action tensor([ 0.7619, -0.3419, -0.0698, 0.0307]) tensor([0.4448, 0.1475, 0.1936, 0.2141]) -Greedy action tensor([ 1.4707, -0.4048, -0.8267, 0.4781]) tensor([0.6156, 0.0944, 0.0619, 0.2282]) -Greedy action tensor([ 1.8397, -0.5095, -0.4034, 0.5773]) tensor([0.6736, 0.0643, 0.0715, 0.1906]) -Greedy action tensor([ 0.9759, -0.5050, 0.1541, -0.0750]) tensor([0.4959, 0.1128, 0.2180, 0.1734]) -Greedy action tensor([ 2.8843, -1.1280, -0.1061, 0.3885]) tensor([0.8690, 0.0157, 0.0437, 0.0716]) -Greedy action tensor([ 0.9630, -0.1030, -0.2969, 0.1664]) tensor([0.4810, 0.1657, 0.1365, 0.2169]) -Greedy action tensor([ 1.3425, -0.3540, -0.5452, 0.3032]) tensor([0.5923, 0.1086, 0.0897, 0.2095]) -Greedy action tensor([ 2.0331, -0.8860, -0.0525, 0.3247]) tensor([0.7356, 0.0397, 0.0914, 0.1333]) -Greedy action tensor([ 0.9247, -0.4691, -0.1546, 0.4493]) tensor([0.4526, 0.1123, 0.1538, 0.2813]) -Greedy action tensor([ 0.8381, -0.2031, -0.0081, -0.1225]) tensor([0.4619, 0.1631, 0.1982, 0.1768]) -Greedy action tensor([ 2.3013, -1.0333, -0.2563, 1.0545]) tensor([0.7140, 0.0254, 0.0553, 0.2052]) -Greedy action tensor([ 1.7396, -0.9622, -0.3081, 0.3664]) tensor([0.6899, 0.0463, 0.0890, 0.1748]) -Greedy action tensor([ 2.2869, 0.2115, -0.0808, 0.5190]) tensor([0.7195, 0.0903, 0.0674, 0.1228]) -Greedy action tensor([ 1.3276, -0.8416, -0.1472, 0.4987]) tensor([0.5619, 0.0642, 0.1286, 0.2453]) -Greedy action tensor([ 1.5066, -0.4940, -0.5900, 0.4980]) tensor([0.6162, 0.0833, 0.0757, 0.2247]) -Greedy action tensor([ 2.4419, -1.1137, -0.1399, 0.5559]) tensor([0.7963, 0.0227, 0.0602, 0.1208]) -Greedy action tensor([ 1.5369, -0.6062, -0.1770, 0.1770]) tensor([0.6434, 0.0755, 0.1159, 0.1652]) -Greedy action tensor([ 1.1792, -0.0719, -0.3736, 0.6995]) tensor([0.4724, 0.1352, 0.1000, 0.2924]) -Greedy action tensor([ 1.4295e+00, 1.4070e-03, -4.9945e-01, 5.7139e-01]) tensor([0.5528, 0.1325, 0.0803, 0.2344]) -Greedy action tensor([ 1.7180, -0.7847, -0.1658, 0.6060]) tensor([0.6399, 0.0524, 0.0973, 0.2105]) -Greedy action tensor([ 1.2455, -0.5301, -0.0996, 0.1900]) tensor([0.5625, 0.0953, 0.1465, 0.1957]) -Greedy action tensor([ 1.2976, -0.5971, -0.3725, 0.0381]) tensor([0.6164, 0.0927, 0.1160, 0.1749]) -Greedy action tensor([ 1.4867, -0.2035, -0.2296, 0.1034]) tensor([0.6192, 0.1142, 0.1113, 0.1553]) -Greedy action tensor([ 0.3984, 0.0829, 0.0600, -0.1237]) tensor([0.3294, 0.2403, 0.2349, 0.1954]) -Greedy action tensor([ 0.7935, -0.2775, -0.2504, 0.3084]) tensor([0.4328, 0.1483, 0.1524, 0.2665]) -Greedy action tensor([ 1.1649, 0.0141, -0.0051, 0.2594]) tensor([0.4924, 0.1558, 0.1528, 0.1991]) -Greedy action tensor([ 1.7258, -0.4222, -0.4770, 0.2613]) tensor([0.6857, 0.0800, 0.0758, 0.1585]) -Greedy action tensor([ 1.7799, -0.3235, -0.7148, 0.3443]) tensor([0.6932, 0.0846, 0.0572, 0.1650]) -Greedy action tensor([ 1.1637, -0.0197, -0.5994, 0.3177]) tensor([0.5244, 0.1606, 0.0899, 0.2250]) -Greedy action tensor([ 1.0236, -0.2994, -0.3466, 0.1771]) tensor([0.5130, 0.1366, 0.1303, 0.2200]) -Greedy action tensor([ 1.5621, -0.2359, -0.5978, 0.2796]) tensor([0.6417, 0.1063, 0.0740, 0.1780]) -Greedy action tensor([ 1.8197, -0.3758, -0.5883, 0.4280]) tensor([0.6897, 0.0768, 0.0621, 0.1715]) -Greedy action tensor([ 1.7037, -0.7954, -0.3397, 0.3926]) tensor([0.6751, 0.0555, 0.0875, 0.1820]) -Greedy action tensor([ 2.6778, -1.3512, -0.5323, 0.9459]) tensor([0.8097, 0.0144, 0.0327, 0.1433]) -Greedy action tensor([ 1.6974, -0.8502, -0.1879, 0.3547]) tensor([0.6706, 0.0525, 0.1018, 0.1751]) -Greedy action tensor([ 1.5793, -0.5944, -0.1622, 0.1415]) tensor([0.6551, 0.0745, 0.1148, 0.1556]) -Greedy action tensor([ 1.3883, -0.2092, -0.3163, 0.1578]) tensor([0.5965, 0.1207, 0.1085, 0.1743]) -Greedy action tensor([ 1.0953, -0.2231, -0.4956, 0.3539]) tensor([0.5134, 0.1374, 0.1046, 0.2446]) -Greedy action tensor([ 2.3825, -0.8449, -0.2691, 0.9223]) tensor([0.7449, 0.0295, 0.0525, 0.1730]) -Greedy action tensor([ 0.6026, -0.3042, -0.2616, 0.3373]) tensor([0.3858, 0.1558, 0.1626, 0.2959]) -Greedy action tensor([ 0.8099, -0.0513, 0.0120, -0.0978]) tensor([0.4393, 0.1857, 0.1978, 0.1772]) -Greedy action tensor([ 1.4055, -0.9270, 0.0881, 0.5476]) tensor([0.5590, 0.0543, 0.1497, 0.2370]) -Greedy action tensor([ 2.4438, -0.8265, -0.2754, 0.6240]) tensor([0.7899, 0.0300, 0.0521, 0.1280]) -Greedy action tensor([ 0.6395, -0.3629, -0.0979, 0.3748]) tensor([0.3827, 0.1405, 0.1831, 0.2937]) -Greedy action tensor([ 1.3712, -0.9818, -0.0527, 0.0913]) tensor([0.6196, 0.0589, 0.1492, 0.1723]) -Greedy action tensor([ 1.2143, -0.3622, -0.1572, 0.1771]) tensor([0.5510, 0.1139, 0.1398, 0.1953]) -Greedy action tensor([ 1.2678, -0.5558, -0.4471, 0.7603]) tensor([0.5146, 0.0831, 0.0926, 0.3098]) -Greedy action tensor([ 1.3411, -0.4274, -0.6470, 0.4890]) tensor([0.5767, 0.0984, 0.0790, 0.2460]) -Greedy action tensor([ 1.2724, -0.9560, -0.1042, 0.2867]) tensor([0.5769, 0.0621, 0.1456, 0.2153]) -Greedy action tensor([ 1.1198, -0.4407, -0.0322, 0.2248]) tensor([0.5169, 0.1086, 0.1633, 0.2112]) -Greedy action tensor([ 1.4457, 0.3319, -0.2388, 0.7670]) tensor([0.4948, 0.1624, 0.0918, 0.2510]) -Greedy action tensor([ 1.3889, 0.1045, -0.3776, 0.3822]) tensor([0.5515, 0.1527, 0.0943, 0.2015]) -Greedy action tensor([ 1.2253, -0.4018, -0.2515, 0.1877]) tensor([0.5621, 0.1104, 0.1284, 0.1991]) -Greedy action tensor([ 1.6130, -0.6711, -0.6432, 0.6211]) tensor([0.6339, 0.0646, 0.0664, 0.2351]) -Greedy action tensor([ 1.5029, -0.4428, -0.1591, 0.0902]) tensor([0.6345, 0.0907, 0.1204, 0.1545]) -Greedy action tensor([ 1.3779, -0.4244, -0.3361, 0.2026]) tensor([0.6047, 0.0997, 0.1089, 0.1867]) -Greedy action tensor([ 0.7295, -0.3603, -0.3695, 0.4526]) tensor([0.4119, 0.1385, 0.1373, 0.3123]) -Greedy action tensor([ 1.7953, -1.0329, -0.5698, 0.6999]) tensor([0.6723, 0.0397, 0.0632, 0.2248]) -Greedy action tensor([ 2.1284, -0.8490, -0.4047, 0.8367]) tensor([0.7117, 0.0362, 0.0565, 0.1956]) -Greedy action tensor([ 1.3033, -0.3756, -0.5950, 0.2671]) tensor([0.5913, 0.1103, 0.0886, 0.2098]) -Greedy action tensor([ 1.2762, -0.3608, -0.2051, 0.3807]) tensor([0.5463, 0.1063, 0.1242, 0.2231]) -Greedy action tensor([ 0.9345, -0.0166, -0.0854, 0.0883]) tensor([0.4596, 0.1775, 0.1657, 0.1972]) -Greedy action tensor([ 0.6265, -0.3195, -0.0552, -0.3197]) tensor([0.4382, 0.1701, 0.2216, 0.1701]) -Greedy action tensor([ 0.6454, -0.4077, -0.0991, -0.1109]) tensor([0.4361, 0.1521, 0.2071, 0.2047]) -Greedy action tensor([ 0.7869, -0.5050, -0.0212, -0.2468]) tensor([0.4817, 0.1323, 0.2147, 0.1713]) -Greedy action tensor([ 0.7115, -0.3844, -0.0376, -0.2080]) tensor([0.4533, 0.1515, 0.2143, 0.1808]) -Greedy action tensor([ 0.2304, -0.3483, -0.0932, -0.1751]) tensor([0.3389, 0.1900, 0.2452, 0.2259]) -Greedy action tensor([ 0.6912, -0.3142, -0.0148, -0.3028]) tensor([0.4485, 0.1641, 0.2214, 0.1660]) -Greedy action tensor([ 0.8226, -0.4082, -0.2480, -0.1821]) tensor([0.4998, 0.1460, 0.1713, 0.1830]) -Greedy action tensor([ 0.4937, -0.2662, 0.0751, -0.3627]) tensor([0.3921, 0.1834, 0.2580, 0.1665]) -Greedy action tensor([ 0.7726, -0.4837, 0.0470, -0.2595]) tensor([0.4706, 0.1340, 0.2278, 0.1677]) -Greedy action tensor([ 0.6238, -0.1237, 0.1507, -0.1240]) tensor([0.3891, 0.1843, 0.2424, 0.1842]) -Greedy action tensor([ 0.5012, -0.4311, -0.0275, -0.0151]) tensor([0.3876, 0.1526, 0.2285, 0.2313]) -Greedy action tensor([ 0.7450, -0.3407, 0.1510, -0.5905]) tensor([0.4645, 0.1568, 0.2565, 0.1222]) -Greedy action tensor([ 0.9555, -0.6078, 0.2247, -0.5266]) tensor([0.5214, 0.1092, 0.2510, 0.1184]) -Greedy action tensor([ 0.4829, 0.2219, -0.1476, -0.1168]) tensor([0.3507, 0.2701, 0.1867, 0.1925]) -Greedy action tensor([ 0.6472, -0.6353, -0.0389, -0.2030]) tensor([0.4529, 0.1256, 0.2280, 0.1935]) -Greedy action tensor([ 0.5224, -0.6699, 0.0438, -0.4683]) tensor([0.4358, 0.1323, 0.2701, 0.1618]) -Greedy action tensor([ 0.9817, -0.7620, 0.0014, -0.7071]) tensor([0.5764, 0.1008, 0.2163, 0.1065]) -Greedy action tensor([ 0.8394, -0.4655, 0.0876, -0.5774]) tensor([0.5037, 0.1366, 0.2375, 0.1221]) -Greedy action tensor([ 0.4100, 0.0676, -0.1437, -0.0479]) tensor([0.3428, 0.2434, 0.1970, 0.2168]) -Greedy action tensor([ 0.2690, -0.1649, -0.0124, -0.1400]) tensor([0.3261, 0.2113, 0.2461, 0.2166]) -Greedy action tensor([ 1.1257, -0.6666, -0.0424, -0.6780]) tensor([0.6089, 0.1014, 0.1893, 0.1003]) -Greedy action tensor([ 0.5150, -0.0345, -0.0489, -0.1961]) tensor([0.3792, 0.2189, 0.2157, 0.1862]) -Greedy action tensor([ 0.2863, -0.1863, 0.0680, -0.3022]) tensor([0.3353, 0.2090, 0.2695, 0.1861]) -Greedy action tensor([ 0.6886, -0.4073, 0.2387, -0.7320]) tensor([0.4518, 0.1510, 0.2881, 0.1091]) -Greedy action tensor([ 0.7184, -0.3432, 0.0361, -0.3424]) tensor([0.4550, 0.1574, 0.2300, 0.1575]) -Greedy action tensor([ 0.8818, -0.7177, 0.1686, -0.7905]) tensor([0.5319, 0.1075, 0.2607, 0.0999]) -Greedy action tensor([ 0.5052, -0.1724, -0.0334, -0.3038]) tensor([0.3942, 0.2002, 0.2300, 0.1755]) -Greedy action tensor([ 0.5165, 0.1014, -0.0630, -0.1284]) tensor([0.3643, 0.2405, 0.2041, 0.1911]) -Greedy action tensor([ 0.2834, 0.2594, -0.0700, -0.2841]) tensor([0.3081, 0.3008, 0.2164, 0.1747]) -Greedy action tensor([ 0.9974, -0.5323, -0.1289, -0.2565]) tensor([0.5476, 0.1186, 0.1775, 0.1563]) -Greedy action tensor([ 1.1643, -1.1193, -0.0115, -0.7230]) tensor([0.6402, 0.0652, 0.1975, 0.0970]) -Greedy action tensor([ 0.5268, -0.4208, -0.0074, -0.4684]) tensor([0.4267, 0.1654, 0.2501, 0.1577]) -Greedy action tensor([ 0.3968, 0.0633, -0.0161, -0.2751]) tensor([0.3462, 0.2480, 0.2291, 0.1768]) -Greedy action tensor([ 0.9661, -0.3348, -0.0270, -0.1282]) tensor([0.5057, 0.1377, 0.1873, 0.1693]) -Greedy action tensor([ 0.4922, 0.1165, -0.0926, 0.0918]) tensor([0.3432, 0.2357, 0.1912, 0.2299]) -Greedy action tensor([ 0.5916, 0.3889, -0.2621, -0.2512]) tensor([0.3741, 0.3055, 0.1593, 0.1611]) -Greedy action tensor([ 0.3926, -0.0723, 0.0473, -0.0684]) tensor([0.3371, 0.2117, 0.2386, 0.2126]) -Greedy action tensor([ 0.9265, -0.8508, 0.0996, -0.5134]) tensor([0.5425, 0.0917, 0.2373, 0.1285]) -Greedy action tensor([ 0.5922, -0.5598, -0.0636, -0.1537]) tensor([0.4330, 0.1368, 0.2247, 0.2054]) -Greedy action tensor([ 0.2373, 0.2596, 0.1392, -0.3442]) tensor([0.2867, 0.2931, 0.2599, 0.1603]) -Greedy action tensor([ 0.6596, -0.5332, -0.0990, -0.5914]) tensor([0.4859, 0.1474, 0.2276, 0.1391]) -Greedy action tensor([ 0.7708, -0.3528, -0.0148, -0.3330]) tensor([0.4734, 0.1539, 0.2158, 0.1570]) -Greedy action tensor([ 0.7305, -0.5565, 0.0419, -0.3820]) tensor([0.4746, 0.1310, 0.2384, 0.1560]) -Greedy action tensor([ 0.6417, -0.3491, -0.0398, -0.1813]) tensor([0.4317, 0.1603, 0.2184, 0.1896]) -Greedy action tensor([ 0.9947, -0.2526, -0.0629, -0.4678]) tensor([0.5358, 0.1539, 0.1861, 0.1241]) -Greedy action tensor([ 0.7013, -0.3412, -0.1396, -0.2168]) tensor([0.4581, 0.1615, 0.1976, 0.1829]) -Greedy action tensor([ 0.6197, -0.4720, -0.0748, -0.0728]) tensor([0.4282, 0.1437, 0.2138, 0.2142]) -Greedy action tensor([ 0.8907, -0.5480, 0.0535, -0.4685]) tensor([0.5189, 0.1231, 0.2247, 0.1333]) -Greedy action tensor([ 0.6440, -0.5522, -0.0864, -0.1166]) tensor([0.4441, 0.1343, 0.2140, 0.2076]) -Greedy action tensor([ 0.3968, -0.1105, -0.0057, -0.0454]) tensor([0.3432, 0.2067, 0.2295, 0.2206]) -Greedy action tensor([ 0.9783, -0.3532, -0.0366, -0.1435]) tensor([0.5122, 0.1353, 0.1857, 0.1668]) -Greedy action tensor([ 0.8913, -0.4002, -0.0263, -0.2020]) tensor([0.4976, 0.1368, 0.1988, 0.1668]) -Greedy action tensor([ 0.9973, -0.5936, -0.0448, -0.3454]) tensor([0.5502, 0.1121, 0.1941, 0.1437]) -Greedy action tensor([ 0.5827, 0.1309, -0.0394, -0.5633]) tensor([0.4014, 0.2555, 0.2155, 0.1276]) -Greedy action tensor([ 0.1242, -0.0115, 0.1063, -0.2491]) tensor([0.2822, 0.2464, 0.2772, 0.1943]) -Greedy action tensor([ 0.4758, -0.2785, -0.1297, -0.1660]) tensor([0.3933, 0.1850, 0.2147, 0.2070]) -Greedy action tensor([ 1.1741, -1.1343, 0.0670, -0.5498]) tensor([0.6218, 0.0618, 0.2055, 0.1109]) -Greedy action tensor([ 0.5654, 0.2114, -0.0875, -0.3781]) tensor([0.3829, 0.2687, 0.1993, 0.1490]) -Greedy action tensor([ 1.1443, -0.8024, 0.0965, -0.4292]) tensor([0.5880, 0.0839, 0.2062, 0.1219]) -Greedy action tensor([ 0.7870, -0.2968, 0.0659, -0.1155]) tensor([0.4484, 0.1517, 0.2180, 0.1819]) -Greedy action tensor([ 0.5850, -0.1107, 0.0693, -0.1687]) tensor([0.3896, 0.1943, 0.2327, 0.1834]) -Greedy action tensor([ 1.1180, -0.8244, 0.1027, -0.8337]) tensor([0.6069, 0.0870, 0.2199, 0.0862]) -Greedy action tensor([ 0.6018, -0.3965, 0.0738, -0.3781]) tensor([0.4285, 0.1579, 0.2527, 0.1608]) -Greedy action tensor([ 0.8342, -0.4419, -0.1216, -0.1784]) tensor([0.4934, 0.1377, 0.1897, 0.1792]) -Greedy action tensor([ 0.7251, -0.6498, -0.1910, -0.1771]) tensor([0.4858, 0.1228, 0.1943, 0.1971]) -Greedy action tensor([ 1.1973, -0.8162, 0.0350, -0.8664]) tensor([0.6356, 0.0849, 0.1988, 0.0807]) -Greedy action tensor([ 0.4160, -0.1344, 0.1588, -0.3964]) tensor([0.3579, 0.2064, 0.2768, 0.1589]) -Greedy action tensor([ 0.6257, -0.7440, 0.1194, -0.4910]) tensor([0.4578, 0.1164, 0.2759, 0.1499]) -Greedy action tensor([ 0.5516, -0.4540, -0.0346, -0.4147]) tensor([0.4343, 0.1589, 0.2416, 0.1652]) -Greedy action tensor([ 0.1452, -0.0600, 0.0722, -0.1353]) tensor([0.2858, 0.2327, 0.2656, 0.2159]) -Greedy action tensor([ 0.5626, -0.3301, -0.0430, -0.3506]) tensor([0.4243, 0.1738, 0.2316, 0.1703]) -Greedy action tensor([ 0.7050, 0.0459, -0.1693, -0.3391]) tensor([0.4373, 0.2263, 0.1824, 0.1539]) -Greedy action tensor([ 0.8739, -0.2836, -0.1257, -0.5280]) tensor([0.5186, 0.1630, 0.1908, 0.1276]) -Greedy action tensor([ 0.5472, -0.1729, -0.0234, 0.0460]) tensor([0.3763, 0.1831, 0.2127, 0.2279]) -Greedy action tensor([ 1.0907, -0.6710, -0.1059, -0.4455]) tensor([0.5920, 0.1017, 0.1789, 0.1274]) -Greedy action tensor([ 0.9286, -0.5682, -0.0414, -0.3576]) tensor([0.5321, 0.1191, 0.2017, 0.1470]) -Greedy action tensor([ 0.6199, -0.2762, -0.0549, -0.2336]) tensor([0.4268, 0.1742, 0.2173, 0.1817]) -Greedy action tensor([ 0.9150, -0.4613, 0.1335, -0.5608]) tensor([0.5158, 0.1302, 0.2361, 0.1179]) -Greedy action tensor([ 0.7575, -0.4136, -0.0142, -0.3393]) tensor([0.4748, 0.1472, 0.2195, 0.1586]) -Greedy action tensor([ 0.4246, -0.2815, -0.0073, -0.4198]) tensor([0.3887, 0.1918, 0.2524, 0.1671]) -Greedy action tensor([ 0.7349, -0.4771, -0.0316, -0.0598]) tensor([0.4517, 0.1344, 0.2099, 0.2040]) -Greedy action tensor([ 1.2789, -1.1880, 1.4110, 0.9203]) tensor([0.3419, 0.0290, 0.3902, 0.2389]) -Greedy action tensor([-0.0814, 0.5253, 0.0748, -0.5311]) tensor([0.2155, 0.3952, 0.2519, 0.1374]) -Greedy action tensor([ 0.6744, 0.8649, -0.0589, -0.3364]) tensor([0.3274, 0.3961, 0.1573, 0.1192]) -Greedy action tensor([-0.8317, 0.3384, -0.9529, -0.0096]) tensor([0.1354, 0.4364, 0.1200, 0.3081]) -Greedy action tensor([ 0.6100, -1.1674, -0.1182, 0.4498]) tensor([0.3994, 0.0675, 0.1928, 0.3403]) -Greedy action tensor([ 0.8186, -0.0383, -0.2901, 0.4888]) tensor([0.4043, 0.1716, 0.1334, 0.2907]) -Greedy action tensor([ 0.5287, -2.0084, -0.1417, 0.9446]) tensor([0.3219, 0.0255, 0.1647, 0.4880]) -Greedy action tensor([ 0.6341, -0.1451, 0.3183, 0.2445]) tensor([0.3490, 0.1601, 0.2545, 0.2364]) -Greedy action tensor([-0.0362, -1.6422, 0.3271, 0.3166]) tensor([0.2462, 0.0494, 0.3540, 0.3504]) -Greedy action tensor([ 0.0732, -1.0630, 0.8125, 0.4937]) tensor([0.2025, 0.0650, 0.4241, 0.3084]) -Greedy action tensor([ 0.6125, -0.4570, 0.9967, -0.1497]) tensor([0.3050, 0.1047, 0.4479, 0.1424]) -Greedy action tensor([ 0.8148, -1.3867, 0.6075, 0.0588]) tensor([0.4179, 0.0462, 0.3397, 0.1962]) -Greedy action tensor([-1.8213, -1.1060, 0.1882, -0.3864]) tensor([0.0680, 0.1391, 0.5073, 0.2856]) -Greedy action tensor([ 1.2910, -1.9073, -0.1614, 1.8045]) tensor([0.3395, 0.0139, 0.0794, 0.5672]) -Greedy action tensor([ 0.4585, -1.6522, -0.7849, 0.8255]) tensor([0.3505, 0.0425, 0.1011, 0.5059]) -Greedy action tensor([ 0.7609, -0.4867, -0.2066, 0.6518]) tensor([0.3900, 0.1120, 0.1482, 0.3497]) -Greedy action tensor([-0.3400, 0.6365, 0.1313, -0.9841]) tensor([0.1729, 0.4592, 0.2771, 0.0908]) -Greedy action tensor([ 0.2913, -2.3254, -0.3733, 0.4557]) tensor([0.3615, 0.0264, 0.1860, 0.4261]) -Greedy action tensor([ 0.9954, -1.1086, -0.3901, 0.2135]) tensor([0.5465, 0.0667, 0.1367, 0.2501]) -Greedy action tensor([ 1.1991, -0.3125, 1.6032, 0.4875]) tensor([0.3116, 0.0687, 0.4667, 0.1529]) -Greedy action tensor([ 1.8346, -0.7003, 0.7561, 0.8030]) tensor([0.5631, 0.0446, 0.1915, 0.2007]) -Greedy action tensor([-1.2026, 0.0730, 0.4890, -0.7636]) tensor([0.0865, 0.3098, 0.4696, 0.1342]) -Greedy action tensor([ 1.3880, -1.2798, 0.6177, -0.1650]) tensor([0.5734, 0.0398, 0.2654, 0.1213]) -Greedy action tensor([-1.1578, -1.6877, -0.7760, -0.6386]) tensor([0.2112, 0.1243, 0.3094, 0.3550]) -Greedy action tensor([-0.2062, -2.8506, 0.1120, 0.5179]) tensor([0.2218, 0.0158, 0.3049, 0.4575]) -Greedy action tensor([ 0.2201, -1.0538, -0.5817, 1.1685]) tensor([0.2320, 0.0649, 0.1041, 0.5990]) -Greedy action tensor([ 1.3566, -1.0291, 0.8037, 0.6123]) tensor([0.4668, 0.0430, 0.2685, 0.2218]) -Greedy action tensor([-0.5427, -1.9758, -0.3170, -0.1386]) tensor([0.2507, 0.0598, 0.3141, 0.3755]) -Greedy action tensor([ 0.6077, -0.5652, 1.1478, -0.0952]) tensor([0.2840, 0.0879, 0.4874, 0.1406]) -Greedy action tensor([-0.8724, -1.7055, 0.2627, -0.5543]) tensor([0.1689, 0.0734, 0.5255, 0.2322]) -Greedy action tensor([-0.2816, -0.7056, -0.4148, -0.9155]) tensor([0.3268, 0.2139, 0.2860, 0.1734]) -Greedy action tensor([ 1.9267e-01, -2.0185e+00, -1.9248e-03, 1.7323e+00]) tensor([0.1516, 0.0166, 0.1248, 0.7070]) -Greedy action tensor([ 0.0058, -1.2401, -0.6082, 0.9698]) tensor([0.2247, 0.0646, 0.1216, 0.5891]) -Greedy action tensor([ 0.0229, 0.3343, 0.0810, -0.0734]) tensor([0.2308, 0.3151, 0.2446, 0.2096]) -Greedy action tensor([ 0.2441, -2.4427, 0.1821, 0.2907]) tensor([0.3272, 0.0223, 0.3076, 0.3429]) -Greedy action tensor([ 0.2026, -2.1309, -0.3603, -0.0977]) tensor([0.4154, 0.0403, 0.2366, 0.3077]) -Greedy action tensor([ 0.2952, -0.6302, 0.4033, -0.1364]) tensor([0.3165, 0.1254, 0.3526, 0.2055]) -Greedy action tensor([-0.2632, 0.4401, -1.2255, 0.2342]) tensor([0.1981, 0.4003, 0.0757, 0.3258]) -Greedy action tensor([-0.4845, -1.0874, 1.2453, -0.3407]) tensor([0.1199, 0.0656, 0.6761, 0.1384]) -Greedy action tensor([ 1.7539, -1.2107, 1.0218, 0.6779]) tensor([0.5338, 0.0275, 0.2567, 0.1820]) -Greedy action tensor([ 0.8770, -1.5288, 0.2088, 1.1701]) tensor([0.3398, 0.0306, 0.1742, 0.4554]) -Greedy action tensor([ 0.6144, -0.5388, 0.0473, -0.3691]) tensor([0.4431, 0.1399, 0.2513, 0.1657]) -Greedy action tensor([ 1.1062, -1.2237, 0.5250, 1.4163]) tensor([0.3311, 0.0322, 0.1852, 0.4515]) -Greedy action tensor([-0.8448, -1.0564, -0.9482, 0.2795]) tensor([0.1727, 0.1398, 0.1558, 0.5317]) -Greedy action tensor([ 0.5533, -2.5244, 0.3377, 0.6104]) tensor([0.3435, 0.0158, 0.2769, 0.3637]) -Greedy action tensor([ 1.7595, -0.7851, 1.1724, 1.1286]) tensor([0.4616, 0.0362, 0.2566, 0.2456]) -Greedy action tensor([ 1.7571, -1.5819, 0.9843, -0.0202]) tensor([0.6001, 0.0213, 0.2771, 0.1015]) -Greedy action tensor([ 0.3994, -1.3370, -0.2273, 0.0499]) tensor([0.4140, 0.0729, 0.2212, 0.2919]) -Greedy action tensor([ 0.2967, -0.7554, -0.8413, 0.1199]) tensor([0.3988, 0.1393, 0.1278, 0.3342]) -Greedy action tensor([ 0.4671, 0.5473, -0.2572, 0.9697]) tensor([0.2369, 0.2567, 0.1148, 0.3916]) -Greedy action tensor([ 0.6799, -1.4421, 0.7678, 0.9957]) tensor([0.2791, 0.0334, 0.3047, 0.3827]) -Greedy action tensor([-0.3384, -2.0970, 0.5843, 0.1735]) tensor([0.1867, 0.0322, 0.4697, 0.3115]) -Greedy action tensor([1.1766, 0.2476, 0.7472, 0.0097]) tensor([0.4242, 0.1676, 0.2761, 0.1321]) -Greedy action tensor([ 0.2664, -1.3674, 0.0197, 0.5312]) tensor([0.3049, 0.0595, 0.2383, 0.3973]) -Greedy action tensor([ 0.5477, -0.8292, 0.3662, -0.0827]) tensor([0.3819, 0.0964, 0.3185, 0.2033]) -Greedy action tensor([-0.4311, 0.2052, 0.5678, -0.8607]) tensor([0.1599, 0.3021, 0.4341, 0.1040]) -Greedy action tensor([ 0.3887, -1.4664, 0.7574, -0.0778]) tensor([0.3096, 0.0484, 0.4477, 0.1942]) -Greedy action tensor([-0.1548, 0.1871, -0.4579, 2.0745]) tensor([0.0804, 0.1132, 0.0594, 0.7471]) -Greedy action tensor([ 0.0600, 0.0667, 1.3045, -0.7675]) tensor([0.1691, 0.1702, 0.5869, 0.0739]) -Greedy action tensor([-1.7439, 0.0392, -1.0414, 0.0183]) tensor([0.0676, 0.4021, 0.1365, 0.3938]) -Greedy action tensor([ 0.7167, -0.6184, 1.2359, -0.8119]) tensor([0.3164, 0.0833, 0.5317, 0.0686]) -Greedy action tensor([ 1.5256, -0.9566, 0.3878, 0.0541]) tensor([0.6121, 0.0512, 0.1962, 0.1405]) -Greedy action tensor([-0.4182, -2.4843, 0.3773, 1.0704]) tensor([0.1286, 0.0163, 0.2850, 0.5700]) -Greedy action tensor([ 0.8314, -1.6456, 0.5748, 1.0584]) tensor([0.3213, 0.0270, 0.2486, 0.4031]) -Greedy action tensor([ 0.1464, -0.8833, 0.4556, 0.5927]) tensor([0.2335, 0.0834, 0.3181, 0.3649]) -Greedy action tensor([-0.0927, 0.0572, 1.1063, -0.7471]) tensor([0.1667, 0.1937, 0.5530, 0.0867]) -Greedy action tensor([ 1.2077, -0.1925, 0.7202, 0.5904]) tensor([0.4167, 0.1027, 0.2559, 0.2247]) -Greedy action tensor([ 0.7726, -0.3944, 0.8622, 0.9520]) tensor([0.2777, 0.0864, 0.3037, 0.3322]) -Greedy action tensor([ 0.7215, -0.4449, 0.6436, -0.3379]) tensor([0.3871, 0.1206, 0.3581, 0.1342]) -Greedy action tensor([-0.3181, -0.1785, 0.2385, -1.7154]) tensor([0.2414, 0.2776, 0.4212, 0.0597]) -Greedy action tensor([0.3165, 0.3961, 1.3105, 0.7710]) tensor([0.1572, 0.1703, 0.4248, 0.2477]) -Greedy action tensor([ 0.7356, -0.2883, -0.0801, 0.9897]) tensor([0.3236, 0.1162, 0.1431, 0.4171]) -Greedy action tensor([-1.9352, -1.6526, 1.1371, -0.2700]) tensor([0.0342, 0.0454, 0.7393, 0.1810]) -Greedy action tensor([-0.8603, -2.2359, 0.5643, 0.8734]) tensor([0.0903, 0.0228, 0.3754, 0.5114]) -Greedy action tensor([ 0.3696, -0.6837, 1.5147, 1.1146]) tensor([0.1516, 0.0529, 0.4763, 0.3193]) -Greedy action tensor([-0.3908, -0.3693, 0.9941, -0.4957]) tensor([0.1446, 0.1477, 0.5775, 0.1302]) -Greedy action tensor([0.8290, 0.0145, 0.0370, 0.2097]) tensor([0.4108, 0.1819, 0.1861, 0.2212]) -Greedy action tensor([-0.4076, -1.7225, -0.0134, 0.5146]) tensor([0.1899, 0.0510, 0.2816, 0.4775]) -Greedy action tensor([ 0.8908, -0.1821, 0.6387, 0.5016]) tensor([0.3576, 0.1223, 0.2779, 0.2423]) -Greedy action tensor([ 0.2135, -0.8905, -0.8466, 0.0499]) tensor([0.3957, 0.1312, 0.1371, 0.3360]) -Greedy action tensor([ 0.1918, -1.7933, -0.1623, -0.7827]) tensor([0.4512, 0.0620, 0.3166, 0.1703]) -Greedy action tensor([-1.3538, -0.5689, 0.3611, 0.1011]) tensor([0.0767, 0.1682, 0.4263, 0.3287]) -Greedy action tensor([-1.4474, 0.0845, 0.4154, -0.0911]) tensor([0.0627, 0.2901, 0.4039, 0.2434]) -Greedy action tensor([-1.5012, 0.4963, 0.5676, -0.5159]) tensor([0.0527, 0.3887, 0.4174, 0.1412]) -Greedy action tensor([-1.8824, -0.4092, 0.6336, -0.1380]) tensor([0.0426, 0.1860, 0.5275, 0.2439]) -Greedy action tensor([-1.9002, -0.3909, 0.6440, -0.1523]) tensor([0.0417, 0.1885, 0.5305, 0.2393]) -Greedy action tensor([-1.9315, -0.4212, 0.6605, -0.1644]) tensor([0.0404, 0.1830, 0.5399, 0.2366]) -Greedy action tensor([-1.7936, -0.0501, 0.5394, -0.0513]) tensor([0.0440, 0.2515, 0.4534, 0.2511]) -Greedy action tensor([-1.4525, -0.0157, 0.4574, -0.6810]) tensor([0.0708, 0.2979, 0.4781, 0.1532]) -Greedy action tensor([-1.7753, -0.4912, 0.5776, -0.0731]) tensor([0.0485, 0.1752, 0.5102, 0.2661]) -Greedy action tensor([-1.9097, -0.4574, 0.6581, -0.1577]) tensor([0.0415, 0.1775, 0.5415, 0.2395]) -Greedy action tensor([-1.5523, -0.4945, 0.5934, 0.2394]) tensor([0.0543, 0.1563, 0.4639, 0.3256]) -Greedy action tensor([-1.9070, -0.4662, 0.7098, -0.1500]) tensor([0.0405, 0.1709, 0.5541, 0.2345]) -Greedy action tensor([-1.8008, -0.2282, 0.6056, -0.2164]) tensor([0.0459, 0.2212, 0.5091, 0.2238]) -Greedy action tensor([-1.8452, -0.4540, 0.6149, -0.1370]) tensor([0.0450, 0.1807, 0.5262, 0.2481]) -Greedy action tensor([-1.9409, -0.4631, 0.6826, -0.1722]) tensor([0.0399, 0.1751, 0.5507, 0.2342]) -Greedy action tensor([-1.5691, -0.4386, 0.6782, 0.3850]) tensor([0.0485, 0.1502, 0.4589, 0.3423]) -Greedy action tensor([-1.9305, -0.4327, 0.6579, -0.1723]) tensor([0.0407, 0.1819, 0.5414, 0.2360]) -Greedy action tensor([-1.8144, -0.3264, 0.5430, -0.1177]) tensor([0.0466, 0.2065, 0.4925, 0.2544]) -Greedy action tensor([-1.6748, -0.2706, 0.6783, 0.1508]) tensor([0.0459, 0.1868, 0.4826, 0.2847]) -Greedy action tensor([-1.7733, -0.4834, 0.5843, -0.1031]) tensor([0.0488, 0.1771, 0.5151, 0.2590]) -Greedy action tensor([-0.6686, 0.8883, 0.2633, 0.5914]) tensor([0.0847, 0.4017, 0.2150, 0.2985]) -Greedy action tensor([-1.8237, -0.4264, 0.6039, -0.1231]) tensor([0.0458, 0.1851, 0.5185, 0.2506]) -Greedy action tensor([-0.9982, -0.5926, 0.1921, 0.3408]) tensor([0.1041, 0.1562, 0.3424, 0.3973]) -Greedy action tensor([-0.6300, -0.3962, 0.3352, 0.5447]) tensor([0.1231, 0.1555, 0.3231, 0.3984]) -Greedy action tensor([-1.2366, -0.6443, 0.2796, 0.2766]) tensor([0.0840, 0.1519, 0.3826, 0.3815]) -Greedy action tensor([-1.7451, -0.3504, 0.5532, -0.0868]) tensor([0.0494, 0.1993, 0.4919, 0.2594]) -Greedy action tensor([-1.4743, -0.4106, 0.5100, 0.3210]) tensor([0.0582, 0.1685, 0.4231, 0.3502]) -Greedy action tensor([-1.8578, -0.4336, 0.6738, -0.0441]) tensor([0.0419, 0.1741, 0.5269, 0.2570]) -Greedy action tensor([-1.8517, -0.4159, 0.6199, -0.1246]) tensor([0.0441, 0.1854, 0.5224, 0.2481]) -Greedy action tensor([-1.9007, -0.4291, 0.6524, -0.1321]) tensor([0.0416, 0.1810, 0.5338, 0.2436]) -Greedy action tensor([-0.9654, -0.2264, 0.2097, 0.4086]) tensor([0.0972, 0.2036, 0.3149, 0.3842]) -Greedy action tensor([-1.8323, -0.1162, 0.5795, -0.1177]) tensor([0.0430, 0.2391, 0.4793, 0.2387]) -Greedy action tensor([-1.9329, -0.4028, 0.6537, -0.1711]) tensor([0.0404, 0.1868, 0.5373, 0.2355]) -Greedy action tensor([-1.8320, -0.4011, 0.6046, -0.1327]) tensor([0.0453, 0.1894, 0.5177, 0.2477]) -Greedy action tensor([-1.7695, -0.4333, 0.5884, -0.0574]) tensor([0.0478, 0.1819, 0.5054, 0.2649]) -Greedy action tensor([-1.7527, -0.2393, 0.5517, -0.0358]) tensor([0.0473, 0.2150, 0.4742, 0.2635]) -Greedy action tensor([-1.7253, -0.3888, 0.6580, 0.0883]) tensor([0.0459, 0.1747, 0.4978, 0.2816]) -Greedy action tensor([-1.6576, -0.1264, 0.6148, 0.0070]) tensor([0.0485, 0.2243, 0.4708, 0.2563]) -Greedy action tensor([-1.9208, -0.4217, 0.6484, -0.1707]) tensor([0.0412, 0.1843, 0.5375, 0.2369]) -Greedy action tensor([-0.7924, 0.7549, 0.3407, 0.4984]) tensor([0.0804, 0.3777, 0.2496, 0.2923]) -Greedy action tensor([-1.0148, 0.7005, 0.1122, -0.0704]) tensor([0.0819, 0.4550, 0.2526, 0.2105]) -Greedy action tensor([-1.7289, -0.4272, 0.5540, -0.0555]) tensor([0.0505, 0.1855, 0.4949, 0.2691]) -Greedy action tensor([-1.9392, -0.4578, 0.6639, -0.1790]) tensor([0.0405, 0.1780, 0.5464, 0.2352]) -Greedy action tensor([-1.6611, -0.4803, 0.5865, 0.0884]) tensor([0.0513, 0.1673, 0.4860, 0.2954]) -Greedy action tensor([-1.9306, -0.4500, 0.6594, -0.1757]) tensor([0.0408, 0.1794, 0.5439, 0.2359]) -Greedy action tensor([-1.8218, -0.4645, 0.6228, -0.0856]) tensor([0.0453, 0.1759, 0.5218, 0.2570]) -Greedy action tensor([-0.2256, 0.5996, 0.3467, 0.7693]) tensor([0.1289, 0.2942, 0.2284, 0.3485]) -Greedy action tensor([-1.9117, -0.3984, 0.6490, -0.1624]) tensor([0.0413, 0.1874, 0.5341, 0.2373]) -Greedy action tensor([-1.9191, -0.4084, 0.6484, -0.1691]) tensor([0.0411, 0.1863, 0.5360, 0.2367]) -Greedy action tensor([-1.7940, -0.4250, 0.6395, 0.0193]) tensor([0.0445, 0.1750, 0.5075, 0.2730]) -Greedy action tensor([-1.9112, -0.4443, 0.6716, -0.1596]) tensor([0.0411, 0.1782, 0.5438, 0.2369]) -Greedy action tensor([-1.2085, 0.3403, 0.5301, 0.3570]) tensor([0.0618, 0.2908, 0.3516, 0.2957]) -Greedy action tensor([-1.8963, -0.4170, 0.6382, -0.1604]) tensor([0.0422, 0.1854, 0.5326, 0.2397]) -Greedy action tensor([-1.9235, -0.4405, 0.6514, -0.1676]) tensor([0.0411, 0.1811, 0.5398, 0.2380]) -Greedy action tensor([-1.8026, -0.4593, 0.5961, -0.0932]) tensor([0.0468, 0.1793, 0.5152, 0.2586]) -Greedy action tensor([-1.9388, -0.4395, 0.6633, -0.1761]) tensor([0.0403, 0.1806, 0.5441, 0.2350]) -Greedy action tensor([-1.7513, -0.2566, 0.5861, -0.0220]) tensor([0.0466, 0.2078, 0.4828, 0.2628]) -Greedy action tensor([-1.8952, -0.4356, 0.6387, -0.1573]) tensor([0.0424, 0.1824, 0.5342, 0.2410]) -Greedy action tensor([ 0.0461, -0.2744, 0.8477, 1.7211]) tensor([0.1076, 0.0781, 0.2398, 0.5745]) -Greedy action tensor([-1.8677, -0.4538, 0.6185, -0.1304]) tensor([0.0438, 0.1803, 0.5268, 0.2491]) -Greedy action tensor([-1.1987, 0.7251, 0.1682, 0.1693]) tensor([0.0637, 0.4362, 0.2499, 0.2502]) -Greedy action tensor([-1.5990, -0.4069, 0.4813, 0.0303]) tensor([0.0575, 0.1893, 0.4601, 0.2931]) -Greedy action tensor([-1.8955, -0.4054, 0.6416, -0.1507]) tensor([0.0420, 0.1864, 0.5311, 0.2405]) -Greedy action tensor([-1.8409, -0.3927, 0.6073, -0.1138]) tensor([0.0445, 0.1896, 0.5153, 0.2506]) -Greedy action tensor([-1.9404, -0.4509, 0.6667, -0.1788]) tensor([0.0403, 0.1787, 0.5464, 0.2346]) -Greedy action tensor([-1.1410, 0.3803, 0.4002, 0.3549]) tensor([0.0680, 0.3112, 0.3175, 0.3034]) -Greedy action tensor([-1.4088, 0.5878, 0.2497, 0.1150]) tensor([0.0549, 0.4045, 0.2885, 0.2521]) -Greedy action tensor([-0.7990, 0.9922, 0.0664, 0.4699]) tensor([0.0773, 0.4638, 0.1838, 0.2751]) -Greedy action tensor([-1.9223, -0.4379, 0.6520, -0.1667]) tensor([0.0411, 0.1814, 0.5395, 0.2379]) -Greedy action tensor([-1.8937, -0.4447, 0.6390, -0.1551]) tensor([0.0425, 0.1810, 0.5348, 0.2417]) -Greedy action tensor([-1.6455e+00, -5.4753e-01, 5.1353e-01, -1.8477e-06]) tensor([0.0560, 0.1680, 0.4855, 0.2905]) -Greedy action tensor([-1.5144, -0.2196, 0.3978, 0.0632]) tensor([0.0615, 0.2245, 0.4162, 0.2978]) -Greedy action tensor([-1.4847, -0.3963, 0.4383, 0.1509]) tensor([0.0627, 0.1862, 0.4291, 0.3219]) -Greedy action tensor([-1.9275, -0.4234, 0.6582, -0.1709]) tensor([0.0407, 0.1832, 0.5403, 0.2358]) -Greedy action tensor([-1.8064, -0.3534, 0.6115, -0.0485]) tensor([0.0448, 0.1918, 0.5033, 0.2601]) -Greedy action tensor([-1.9031, -0.4624, 0.6437, -0.1493]) tensor([0.0421, 0.1777, 0.5372, 0.2430]) -Greedy action tensor([-1.9359, -0.4458, 0.6600, -0.1769]) tensor([0.0406, 0.1800, 0.5439, 0.2355]) -Greedy action tensor([-1.3972, -0.5852, 0.3671, 0.1540]) tensor([0.0724, 0.1631, 0.4228, 0.3416]) -Greedy action tensor([-1.0532, -0.6408, 0.2818, 0.0602]) tensor([0.1069, 0.1615, 0.4062, 0.3255]) -Greedy action tensor([-0.7354, 0.4968, 0.1483, 0.5840]) tensor([0.0944, 0.3238, 0.2285, 0.3533]) -Greedy action tensor([-0.9424, -0.2107, 0.4435, 0.4875]) tensor([0.0888, 0.1847, 0.3553, 0.3712]) -Greedy action tensor([ 1.8347, -1.0763, -0.1752, 0.3456]) tensor([0.7072, 0.0385, 0.0948, 0.1595]) -Greedy action tensor([ 1.7679, -0.7395, 0.0071, 0.1106]) tensor([0.6925, 0.0564, 0.1190, 0.1320]) -Greedy action tensor([ 1.7528, -0.6180, -0.5174, 0.4603]) tensor([0.6797, 0.0635, 0.0702, 0.1866]) -Greedy action tensor([ 1.3879, -0.2923, -0.4142, 0.6517]) tensor([0.5464, 0.1018, 0.0901, 0.2617]) -Greedy action tensor([ 1.9809, -0.5293, -0.3828, 1.0059]) tensor([0.6441, 0.0523, 0.0606, 0.2430]) -Greedy action tensor([ 1.1803, -0.1563, -0.3178, 0.1378]) tensor([0.5438, 0.1429, 0.1216, 0.1917]) -Greedy action tensor([ 1.5863, -0.4318, -0.0179, 0.0608]) tensor([0.6445, 0.0857, 0.1296, 0.1402]) -Greedy action tensor([ 2.0536, -0.7912, -0.5497, 0.9378]) tensor([0.6850, 0.0398, 0.0507, 0.2244]) -Greedy action tensor([ 1.4280, -0.1774, -0.5556, 0.3832]) tensor([0.5917, 0.1188, 0.0814, 0.2081]) -Greedy action tensor([ 1.1955, -0.2567, -0.3170, 0.3047]) tensor([0.5363, 0.1255, 0.1182, 0.2200]) -Greedy action tensor([ 1.3426, -0.5743, 0.1492, 0.2420]) tensor([0.5609, 0.0825, 0.1701, 0.1866]) -Greedy action tensor([ 0.6210, -0.3015, -0.1020, 0.2857]) tensor([0.3849, 0.1530, 0.1868, 0.2753]) -Greedy action tensor([ 1.4449, -0.2870, -0.3000, 0.3345]) tensor([0.5949, 0.1053, 0.1039, 0.1960]) -Greedy action tensor([ 1.3346, -0.1904, -0.3619, 0.1064]) tensor([0.5904, 0.1285, 0.1082, 0.1729]) -Greedy action tensor([ 1.2354, 0.0750, -0.5106, -0.0279]) tensor([0.5648, 0.1770, 0.0985, 0.1597]) -Greedy action tensor([ 1.8843, -0.6487, -0.6401, 0.5377]) tensor([0.7044, 0.0559, 0.0564, 0.1832]) -Greedy action tensor([ 1.4713, -0.0883, -0.0948, 0.1664]) tensor([0.5916, 0.1244, 0.1236, 0.1604]) -Greedy action tensor([ 1.2897, -0.1445, -0.7386, 0.2448]) tensor([0.5809, 0.1384, 0.0764, 0.2043]) -Greedy action tensor([ 1.5516, -0.4430, -0.6365, 0.3941]) tensor([0.6400, 0.0871, 0.0718, 0.2011]) -Greedy action tensor([ 1.4018, -0.1346, -0.2556, -0.0735]) tensor([0.6118, 0.1316, 0.1166, 0.1399]) -Greedy action tensor([ 0.7530, -0.4608, -0.0436, 0.0477]) tensor([0.4461, 0.1325, 0.2011, 0.2203]) -Greedy action tensor([ 0.9247, -0.2524, -0.1646, 0.1057]) tensor([0.4795, 0.1478, 0.1613, 0.2114]) -Greedy action tensor([ 2.0979, -1.1411, -0.1673, 0.7252]) tensor([0.7161, 0.0281, 0.0743, 0.1815]) -Greedy action tensor([ 1.2345, -0.3625, -0.6392, 0.6202]) tensor([0.5271, 0.1067, 0.0809, 0.2852]) -Greedy action tensor([ 2.8465, -1.3887, -0.3539, 0.5858]) tensor([0.8624, 0.0125, 0.0351, 0.0899]) -Greedy action tensor([ 2.1638, -1.0450, -0.2078, 0.7060]) tensor([0.7318, 0.0296, 0.0683, 0.1703]) -Greedy action tensor([1.0394, 0.0955, 0.2011, 0.0520]) tensor([0.4558, 0.1773, 0.1971, 0.1698]) -Greedy action tensor([ 1.5477, -0.9173, -0.2049, 0.5633]) tensor([0.6127, 0.0521, 0.1062, 0.2290]) -Greedy action tensor([ 0.3736, -0.0147, -0.4032, -0.1303]) tensor([0.3647, 0.2473, 0.1677, 0.2203]) -Greedy action tensor([ 0.9402, 0.0276, -0.1918, 0.4435]) tensor([0.4287, 0.1721, 0.1382, 0.2609]) -Greedy action tensor([ 1.2222, -0.7751, -0.0703, 0.2689]) tensor([0.5569, 0.0756, 0.1529, 0.2147]) -Greedy action tensor([ 2.0212, -0.3544, -0.1483, 0.3876]) tensor([0.7130, 0.0663, 0.0815, 0.1392]) -Greedy action tensor([ 1.3271, -0.6195, 0.0111, 0.2273]) tensor([0.5734, 0.0819, 0.1538, 0.1909]) -Greedy action tensor([ 1.3641, -0.6203, -0.5672, 0.7602]) tensor([0.5467, 0.0751, 0.0793, 0.2989]) -Greedy action tensor([ 1.5092, -0.2684, 0.2564, 0.8368]) tensor([0.5088, 0.0860, 0.1454, 0.2598]) -Greedy action tensor([ 1.3132, -0.2207, -0.9525, 0.4555]) tensor([0.5735, 0.1237, 0.0595, 0.2433]) -Greedy action tensor([ 2.0058, -0.8971, -0.6826, 0.4687]) tensor([0.7475, 0.0410, 0.0508, 0.1607]) -Greedy action tensor([ 1.1429, -0.5187, -0.0667, 0.4282]) tensor([0.5057, 0.0960, 0.1509, 0.2475]) -Greedy action tensor([ 1.4813, -0.5315, -0.5374, 0.3091]) tensor([0.6344, 0.0848, 0.0843, 0.1965]) -Greedy action tensor([ 1.6406, -0.5216, -0.3282, 0.0990]) tensor([0.6809, 0.0783, 0.0951, 0.1457]) -Greedy action tensor([ 1.9613, -0.6343, -0.1238, 0.3251]) tensor([0.7176, 0.0535, 0.0892, 0.1397]) -Greedy action tensor([ 1.5483, -0.3585, -0.1257, 0.4612]) tensor([0.5977, 0.0888, 0.1121, 0.2015]) -Greedy action tensor([ 1.9733, -0.8021, -0.4881, 0.6011]) tensor([0.7137, 0.0445, 0.0609, 0.1810]) -Greedy action tensor([ 0.4160, -0.6315, 0.1096, 0.0727]) tensor([0.3576, 0.1254, 0.2632, 0.2537]) -Greedy action tensor([ 0.6869, -0.3620, 0.1069, 0.1073]) tensor([0.4048, 0.1418, 0.2267, 0.2267]) -Greedy action tensor([ 1.0177, -0.0221, 0.0518, 0.0678]) tensor([0.4715, 0.1667, 0.1795, 0.1824]) -Greedy action tensor([ 0.7078, -0.0796, -0.4795, 0.2416]) tensor([0.4189, 0.1906, 0.1278, 0.2628]) -Greedy action tensor([ 1.4348, -0.4684, -0.5034, 0.7461]) tensor([0.5570, 0.0830, 0.0802, 0.2797]) -Greedy action tensor([ 1.6242, -0.2243, -0.1573, 0.5092]) tensor([0.6047, 0.0952, 0.1018, 0.1983]) -Greedy action tensor([ 1.2587, -0.1800, -0.5563, 0.3529]) tensor([0.5542, 0.1315, 0.0902, 0.2240]) -Greedy action tensor([ 1.0836, -0.5073, -0.2642, 0.3840]) tensor([0.5101, 0.1039, 0.1325, 0.2534]) -Greedy action tensor([ 1.0727, -0.3196, -0.2403, 0.3486]) tensor([0.4994, 0.1241, 0.1344, 0.2421]) -Greedy action tensor([ 1.3995, 0.0286, -0.2275, 0.3930]) tensor([0.5507, 0.1398, 0.1082, 0.2013]) -Greedy action tensor([ 1.5250, -0.1825, -0.0486, 0.4303]) tensor([0.5803, 0.1052, 0.1203, 0.1942]) -Greedy action tensor([ 1.7437, -0.4912, -0.2422, 0.4023]) tensor([0.6641, 0.0711, 0.0912, 0.1737]) -Greedy action tensor([ 1.0374, -0.0563, -0.0628, 0.0099]) tensor([0.4937, 0.1654, 0.1643, 0.1767]) -Greedy action tensor([ 1.0072, -0.6472, -0.0062, 0.2294]) tensor([0.4966, 0.0950, 0.1803, 0.2282]) -Greedy action tensor([ 0.8639, -0.5651, 0.0067, 0.2085]) tensor([0.4581, 0.1097, 0.1944, 0.2378]) -Greedy action tensor([ 1.0206, -0.5442, -0.1191, 0.5067]) tensor([0.4701, 0.0983, 0.1504, 0.2812]) -Greedy action tensor([ 1.5681, -1.0727, -0.1190, 0.1500]) tensor([0.6673, 0.0476, 0.1235, 0.1616]) -Greedy action tensor([ 1.4849, 0.2563, 0.0785, -0.5153]) tensor([0.5977, 0.1750, 0.1464, 0.0809]) -Greedy action tensor([ 0.2724, -0.2411, -0.4443, 0.3982]) tensor([0.3105, 0.1858, 0.1516, 0.3521]) -Greedy action tensor([ 1.8015, -0.8736, -0.2970, 0.6012]) tensor([0.6700, 0.0462, 0.0822, 0.2017]) -Greedy action tensor([ 1.2976, -0.3149, -0.7467, 0.5161]) tensor([0.5597, 0.1116, 0.0725, 0.2562]) -Greedy action tensor([ 1.7274, -0.9319, -0.1305, 0.3000]) tensor([0.6822, 0.0477, 0.1064, 0.1637]) -Greedy action tensor([ 1.3994, -0.5862, -0.6515, 1.1533]) tensor([0.4883, 0.0670, 0.0628, 0.3818]) -Greedy action tensor([ 1.6444, -0.4066, -0.4700, 0.0626]) tensor([0.6873, 0.0884, 0.0830, 0.1413]) -Greedy action tensor([ 1.6179, -1.0590, -0.4598, 0.8469]) tensor([0.6037, 0.0415, 0.0756, 0.2792]) -Greedy action tensor([ 1.3788, -0.6180, -0.7393, 0.8174]) tensor([0.5475, 0.0743, 0.0658, 0.3123]) -Greedy action tensor([ 1.5307, -0.5138, -0.3770, 0.0118]) tensor([0.6681, 0.0865, 0.0992, 0.1463]) -Greedy action tensor([ 1.7632, -0.1896, -0.5351, 0.2215]) tensor([0.6867, 0.0974, 0.0690, 0.1470]) -Greedy action tensor([ 0.8792, -0.4095, -0.2003, 0.1209]) tensor([0.4799, 0.1323, 0.1630, 0.2248]) -Greedy action tensor([ 1.0155, -0.5487, -0.2880, 0.4496]) tensor([0.4881, 0.1021, 0.1326, 0.2772]) -Greedy action tensor([ 1.7570, -0.4205, -0.1170, 0.1897]) tensor([0.6778, 0.0768, 0.1040, 0.1414]) -Greedy action tensor([ 0.7597, -0.4477, -0.3010, 0.7428]) tensor([0.3805, 0.1137, 0.1317, 0.3741]) -Greedy action tensor([ 2.4538, -1.3390, -0.1611, 0.7952]) tensor([0.7775, 0.0175, 0.0569, 0.1481]) -Greedy action tensor([ 1.7554, -0.3746, -0.4583, 0.4227]) tensor([0.6703, 0.0797, 0.0733, 0.1768]) -Greedy action tensor([ 1.2168, -0.7848, 0.1272, 0.1449]) tensor([0.5513, 0.0745, 0.1854, 0.1888]) -Greedy action tensor([ 2.3763, -1.1668, -0.4272, 0.9040]) tensor([0.7582, 0.0219, 0.0459, 0.1739]) -Greedy action tensor([ 1.5125, -0.2948, -0.5857, 0.2029]) tensor([0.6424, 0.1054, 0.0788, 0.1734]) -Greedy action tensor([ 1.5449, -0.3277, -0.4150, 0.6752]) tensor([0.5836, 0.0897, 0.0822, 0.2445]) -Greedy action tensor([ 0.7927, -0.4752, -0.0853, -0.2457]) tensor([0.4876, 0.1372, 0.2026, 0.1726]) -Greedy action tensor([0.2146, 0.6557, 0.0488, 0.0868]) tensor([0.2335, 0.3631, 0.1979, 0.2055]) -Greedy action tensor([ 0.9710, -0.9036, 0.1527, -0.5474]) tensor([0.5514, 0.0846, 0.2433, 0.1208]) -Greedy action tensor([ 0.7900, -0.5924, -0.0326, -0.3318]) tensor([0.4960, 0.1245, 0.2179, 0.1616]) -Greedy action tensor([ 0.9941, -0.2724, -0.0292, -0.1526]) tensor([0.5105, 0.1439, 0.1835, 0.1622]) -Greedy action tensor([ 0.6858, -0.1902, -0.1859, -0.1452]) tensor([0.4405, 0.1834, 0.1842, 0.1919]) -Greedy action tensor([ 0.6835, -0.4743, -0.0270, -0.2249]) tensor([0.4527, 0.1422, 0.2225, 0.1825]) -Greedy action tensor([ 0.7841, -0.5122, -0.2050, -0.3583]) tensor([0.5090, 0.1392, 0.1893, 0.1624]) -Greedy action tensor([ 0.7225, -0.2359, -0.0944, -0.4126]) tensor([0.4658, 0.1786, 0.2058, 0.1497]) -Greedy action tensor([ 0.7311, -0.7962, 0.0158, -0.3140]) tensor([0.4859, 0.1055, 0.2377, 0.1709]) -Greedy action tensor([ 0.1802, -0.0352, 0.0518, -0.1278]) tensor([0.2923, 0.2357, 0.2571, 0.2149]) -Greedy action tensor([ 0.7625, -0.5817, -0.0566, -0.2535]) tensor([0.4846, 0.1264, 0.2136, 0.1754]) -Greedy action tensor([ 0.6766, -0.6336, -0.0427, -0.4638]) tensor([0.4816, 0.1299, 0.2346, 0.1540]) -Greedy action tensor([ 0.8648, -0.4469, 0.0614, -0.5564]) tensor([0.5106, 0.1375, 0.2286, 0.1233]) -Greedy action tensor([ 0.7625, 0.1302, -0.0431, -0.0619]) tensor([0.4138, 0.2199, 0.1849, 0.1814]) -Greedy action tensor([ 0.8425, -0.4876, -0.0383, -0.2752]) tensor([0.4985, 0.1318, 0.2066, 0.1630]) -Greedy action tensor([ 0.4269, -0.1331, -0.1067, -0.1828]) tensor([0.3702, 0.2115, 0.2171, 0.2012]) -Greedy action tensor([ 0.5744, -0.1364, 0.0914, -0.1598]) tensor([0.3864, 0.1898, 0.2384, 0.1854]) -Greedy action tensor([ 0.2375, 0.0479, -0.0470, -0.0778]) tensor([0.3022, 0.2500, 0.2274, 0.2205]) -Greedy action tensor([ 0.7818, -0.4606, -0.0990, -0.5348]) tensor([0.5073, 0.1465, 0.2103, 0.1360]) -Greedy action tensor([ 0.9928, -0.6828, 0.0221, -0.3996]) tensor([0.5511, 0.1032, 0.2088, 0.1369]) -Greedy action tensor([ 0.3319, -0.0387, 0.0532, -0.2029]) tensor([0.3297, 0.2276, 0.2495, 0.1931]) -Greedy action tensor([ 0.7016, 0.0888, -0.0974, -0.0268]) tensor([0.4041, 0.2190, 0.1818, 0.1951]) -Greedy action tensor([ 0.5189, -0.0164, -0.1397, -0.2626]) tensor([0.3905, 0.2286, 0.2021, 0.1787]) -Greedy action tensor([ 0.9637, -0.8012, 0.0916, -0.5513]) tensor([0.5528, 0.0946, 0.2311, 0.1215]) -Greedy action tensor([ 0.4616, 0.1368, -0.0982, -0.0738]) tensor([0.3473, 0.2510, 0.1984, 0.2033]) -Greedy action tensor([ 0.4815, -0.5282, -0.1215, -0.2918]) tensor([0.4214, 0.1535, 0.2306, 0.1945]) -Greedy action tensor([ 0.9894, -0.5300, -0.1031, -0.2178]) tensor([0.5396, 0.1181, 0.1810, 0.1614]) -Greedy action tensor([ 0.7492, -0.3285, -0.0322, -0.2507]) tensor([0.4617, 0.1571, 0.2113, 0.1699]) -Greedy action tensor([ 0.5385, -0.0422, -0.0844, 0.0416]) tensor([0.3698, 0.2069, 0.1983, 0.2250]) -Greedy action tensor([ 0.6802, -0.4740, 0.0242, -0.3791]) tensor([0.4585, 0.1446, 0.2379, 0.1590]) -Greedy action tensor([ 0.6182, -0.3792, -0.0907, -0.1443]) tensor([0.4296, 0.1585, 0.2115, 0.2004]) -Greedy action tensor([ 0.5725, -0.2773, -0.0549, -0.0538]) tensor([0.4006, 0.1713, 0.2139, 0.2142]) -Greedy action tensor([ 1.1112, -0.8490, 0.0213, -0.5930]) tensor([0.6028, 0.0849, 0.2027, 0.1097]) -Greedy action tensor([ 0.6108, -0.4349, 0.0205, -0.3280]) tensor([0.4354, 0.1530, 0.2413, 0.1703]) -Greedy action tensor([ 0.7332, -0.5513, -0.1341, -0.2899]) tensor([0.4863, 0.1346, 0.2043, 0.1748]) -Greedy action tensor([ 0.3999, 0.0117, -0.0690, -0.0892]) tensor([0.3428, 0.2325, 0.2145, 0.2102]) -Greedy action tensor([ 0.3717, -0.2821, -0.0122, -0.1313]) tensor([0.3564, 0.1853, 0.2428, 0.2155]) -Greedy action tensor([ 0.5530, -0.1218, -0.1474, -0.2358]) tensor([0.4065, 0.2070, 0.2018, 0.1847]) -Greedy action tensor([ 0.3538, -0.1169, -0.0418, -0.4047]) tensor([0.3615, 0.2258, 0.2434, 0.1693]) -Greedy action tensor([ 0.6343, -0.3158, -0.2752, -0.3534]) tensor([0.4626, 0.1789, 0.1863, 0.1723]) -Greedy action tensor([ 0.5967, -0.1571, -0.0718, -0.0483]) tensor([0.3988, 0.1876, 0.2044, 0.2092]) -Greedy action tensor([ 0.4494, -0.3127, 0.1903, -0.5663]) tensor([0.3845, 0.1795, 0.2968, 0.1393]) -Greedy action tensor([ 0.7823, -0.3196, -0.0906, -0.2666]) tensor([0.4761, 0.1582, 0.1989, 0.1668]) -Greedy action tensor([ 0.4441, -0.0361, 0.0949, -0.1939]) tensor([0.3506, 0.2169, 0.2473, 0.1852]) -Greedy action tensor([ 0.8373, -0.4972, -0.0783, -0.7493]) tensor([0.5353, 0.1409, 0.2143, 0.1095]) -Greedy action tensor([ 0.3289, -0.0549, -0.0413, -0.2279]) tensor([0.3396, 0.2313, 0.2345, 0.1946]) -Greedy action tensor([ 0.7893, -0.2824, -0.0538, -0.1765]) tensor([0.4644, 0.1590, 0.1998, 0.1768]) -Greedy action tensor([ 0.4561, -0.2524, -0.0846, -0.1579]) tensor([0.3823, 0.1882, 0.2226, 0.2069]) -Greedy action tensor([ 0.4767, 0.0374, -0.0278, -0.0934]) tensor([0.3554, 0.2290, 0.2146, 0.2010]) -Greedy action tensor([ 0.6058, -0.1064, 0.1076, -0.1574]) tensor([0.3900, 0.1913, 0.2369, 0.1818]) -Greedy action tensor([ 0.5232, -0.3238, -0.0394, -0.3198]) tensor([0.4117, 0.1765, 0.2346, 0.1772]) -Greedy action tensor([ 0.7862, -0.2534, -0.0279, -0.1831]) tensor([0.4596, 0.1625, 0.2036, 0.1743]) -Greedy action tensor([0.4868, 0.0523, 0.0290, 0.1430]) tensor([0.3345, 0.2166, 0.2116, 0.2372]) -Greedy action tensor([ 1.1502, -0.7631, 0.0618, -0.4691]) tensor([0.5944, 0.0877, 0.2002, 0.1177]) -Greedy action tensor([ 0.3283, -0.1157, 0.0397, -0.3268]) tensor([0.3436, 0.2204, 0.2575, 0.1785]) -Greedy action tensor([ 0.7157, -0.5869, -0.0548, -0.3346]) tensor([0.4797, 0.1304, 0.2220, 0.1678]) -Greedy action tensor([ 0.5935, -0.7551, 0.0067, -0.1299]) tensor([0.4346, 0.1128, 0.2417, 0.2108]) -Greedy action tensor([ 0.8764, -0.6920, -0.1321, -0.2639]) tensor([0.5283, 0.1101, 0.1927, 0.1689]) -Greedy action tensor([ 0.6827, -0.0452, -0.0585, 0.0169]) tensor([0.4043, 0.1953, 0.1927, 0.2078]) -Greedy action tensor([ 0.5215, -0.1948, -0.0552, -0.0926]) tensor([0.3859, 0.1885, 0.2168, 0.2088]) -Greedy action tensor([ 0.4504, -0.1681, -0.1010, -0.2845]) tensor([0.3854, 0.2077, 0.2221, 0.1848]) -Greedy action tensor([ 0.6607, -0.2161, 0.0332, -0.4402]) tensor([0.4381, 0.1823, 0.2339, 0.1457]) -Greedy action tensor([ 0.8776, -0.4272, -0.0096, -0.3327]) tensor([0.5048, 0.1369, 0.2079, 0.1505]) -Greedy action tensor([ 0.8020, -0.5777, -0.1243, -0.2994]) tensor([0.5050, 0.1271, 0.2000, 0.1679]) -Greedy action tensor([ 0.7487, -0.4831, 0.0416, -0.2504]) tensor([0.4645, 0.1355, 0.2290, 0.1710]) -Greedy action tensor([ 0.7343, -0.2652, 0.0682, -0.4072]) tensor([0.4543, 0.1672, 0.2334, 0.1451]) -Greedy action tensor([ 0.4093, -0.3614, -0.0916, -0.1307]) tensor([0.3772, 0.1745, 0.2286, 0.2198]) -Greedy action tensor([ 1.4602e+00, -7.4979e-01, 1.3857e-03, -5.9075e-01]) tensor([0.6799, 0.0746, 0.1581, 0.0874]) -Greedy action tensor([ 0.7872, -0.3841, 0.0792, -0.0891]) tensor([0.4507, 0.1397, 0.2220, 0.1876]) -Greedy action tensor([ 0.5860, -0.6077, -0.1319, -0.1988]) tensor([0.4450, 0.1349, 0.2171, 0.2030]) -Greedy action tensor([ 0.8702, -0.4991, -0.0014, -0.3615]) tensor([0.5091, 0.1294, 0.2129, 0.1485]) -Greedy action tensor([ 0.8236, -0.7292, -0.0691, -0.4359]) tensor([0.5249, 0.1111, 0.2150, 0.1490]) -Greedy action tensor([ 0.9176, -0.8255, -0.0946, -0.2517]) tensor([0.5408, 0.0946, 0.1966, 0.1680]) -Greedy action tensor([ 0.8245, -0.4496, 0.2126, -0.5211]) tensor([0.4802, 0.1343, 0.2604, 0.1250]) -Greedy action tensor([ 0.6241, -0.3171, 0.3423, -0.2774]) tensor([0.3921, 0.1530, 0.2958, 0.1592]) -Greedy action tensor([ 0.5859, -0.2355, 0.2555, -0.4794]) tensor([0.3995, 0.1757, 0.2871, 0.1377]) -Greedy action tensor([ 0.4395, -0.1885, -0.1808, -0.4438]) tensor([0.4024, 0.2148, 0.2164, 0.1664]) -Greedy action tensor([ 0.8358, -0.4803, -0.0370, -0.4628]) tensor([0.5105, 0.1369, 0.2133, 0.1393]) -Greedy action tensor([ 0.6298, -0.7813, 0.2702, -0.3105]) tensor([0.4288, 0.1046, 0.2993, 0.1674]) -Greedy action tensor([ 0.4239, 0.0859, 0.1101, -0.0261]) tensor([0.3245, 0.2314, 0.2371, 0.2069]) -Greedy action tensor([ 0.3117, -1.1001, -0.6472, 0.2060]) tensor([0.3958, 0.0965, 0.1517, 0.3561]) -Greedy action tensor([ 0.3619, 0.0078, -0.3549, -0.5401]) tensor([0.3852, 0.2704, 0.1881, 0.1563]) -Greedy action tensor([0.3347, 0.0643, 1.5450, 0.5423]) tensor([0.1575, 0.1202, 0.5284, 0.1939]) -Greedy action tensor([-0.1435, 0.1990, -0.8123, 0.2394]) tensor([0.2279, 0.3210, 0.1168, 0.3343]) -Greedy action tensor([-0.6012, 0.0522, -0.7010, -0.3907]) tensor([0.1976, 0.3797, 0.1788, 0.2439]) -Greedy action tensor([ 0.5573, -0.3988, -0.5875, 1.2089]) tensor([0.2761, 0.1061, 0.0879, 0.5298]) -Greedy action tensor([-0.2870, 0.1869, -0.1129, 0.9096]) tensor([0.1407, 0.2261, 0.1675, 0.4657]) -Greedy action tensor([-0.0945, -0.8870, 0.4957, 0.4591]) tensor([0.2001, 0.0906, 0.3611, 0.3481]) -Greedy action tensor([ 0.9327, -0.8559, 0.5299, -0.0801]) tensor([0.4548, 0.0760, 0.3040, 0.1652]) -Greedy action tensor([1.5013, 0.0633, 0.1576, 0.5402]) tensor([0.5317, 0.1262, 0.1387, 0.2034]) -Greedy action tensor([ 0.1795, -0.7866, -0.7698, -1.0513]) tensor([0.4855, 0.1848, 0.1879, 0.1418]) -Greedy action tensor([-0.1603, 0.0225, -0.9364, -0.3214]) tensor([0.2847, 0.3419, 0.1310, 0.2424]) -Greedy action tensor([-0.1680, 0.0837, 0.8080, -1.1345]) tensor([0.1880, 0.2418, 0.4988, 0.0715]) -Greedy action tensor([ 0.8424, -0.7867, 0.0316, 1.6153]) tensor([0.2627, 0.0515, 0.1168, 0.5690]) -Greedy action tensor([ 0.7683, -1.4330, 0.3673, 1.1529]) tensor([0.3078, 0.0341, 0.2061, 0.4521]) -Greedy action tensor([-0.0503, -0.8208, -0.2345, 1.7700]) tensor([0.1181, 0.0547, 0.0982, 0.7290]) -Greedy action tensor([1.0008, 0.5845, 0.0565, 0.2168]) tensor([0.3992, 0.2633, 0.1553, 0.1823]) -Greedy action tensor([-0.1685, 0.2478, -0.2895, -0.5191]) tensor([0.2435, 0.3692, 0.2158, 0.1715]) -Greedy action tensor([ 1.8140, -1.1846, 0.5158, 0.6006]) tensor([0.6173, 0.0308, 0.1685, 0.1834]) -Greedy action tensor([ 1.1183, -0.1940, 1.2458, 0.9686]) tensor([0.3062, 0.0824, 0.3478, 0.2636]) -Greedy action tensor([ 2.1990, -1.7761, 0.2532, 0.8258]) tensor([0.7067, 0.0133, 0.1010, 0.1790]) -Greedy action tensor([-1.7105, -0.7335, -0.8465, 0.2330]) tensor([0.0768, 0.2042, 0.1823, 0.5367]) -Greedy action tensor([ 0.5719, -0.5314, -0.4189, 0.0642]) tensor([0.4339, 0.1439, 0.1611, 0.2611]) -Greedy action tensor([ 1.2013, -1.7455, 1.0191, 0.0480]) tensor([0.4542, 0.0239, 0.3786, 0.1434]) -Greedy action tensor([ 0.4232, -0.9623, -0.4113, 0.6362]) tensor([0.3423, 0.0856, 0.1486, 0.4235]) -Greedy action tensor([0.2782, 0.0828, 0.5153, 0.5127]) tensor([0.2297, 0.1889, 0.2911, 0.2903]) -Greedy action tensor([ 1.5471, -0.7480, 0.4786, 0.8183]) tensor([0.5190, 0.0523, 0.1783, 0.2504]) -Greedy action tensor([-0.1237, -0.3613, 0.4057, -0.8752]) tensor([0.2526, 0.1992, 0.4290, 0.1192]) -Greedy action tensor([-0.1800, -1.1607, -0.4699, 0.6646]) tensor([0.2247, 0.0843, 0.1682, 0.5229]) -Greedy action tensor([ 0.2022, -1.4499, -1.0877, 0.0340]) tensor([0.4325, 0.0829, 0.1191, 0.3656]) -Greedy action tensor([ 0.6539, 0.0772, -0.3777, 0.1579]) tensor([0.3957, 0.2223, 0.1410, 0.2410]) -Greedy action tensor([ 0.1512, -1.4214, 0.6890, -0.4502]) tensor([0.2884, 0.0598, 0.4938, 0.1580]) -Greedy action tensor([0.0775, 0.2813, 0.1355, 0.1184]) tensor([0.2311, 0.2833, 0.2449, 0.2407]) -Greedy action tensor([ 2.2543, -0.8839, -0.0907, 1.1827]) tensor([0.6749, 0.0293, 0.0647, 0.2311]) -Greedy action tensor([-1.5741, 0.2992, 0.3387, 0.1743]) tensor([0.0499, 0.3250, 0.3381, 0.2869]) -Greedy action tensor([ 1.1778, -0.0121, -0.0990, 0.3401]) tensor([0.4960, 0.1509, 0.1384, 0.2147]) -Greedy action tensor([ 0.2668, -0.3584, -0.3293, 0.8596]) tensor([0.2567, 0.1374, 0.1415, 0.4644]) -Greedy action tensor([-0.1882, -0.0723, 0.5662, 0.7906]) tensor([0.1447, 0.1625, 0.3077, 0.3851]) -Greedy action tensor([-0.5297, -0.6446, 1.1395, -1.2129]) tensor([0.1298, 0.1157, 0.6889, 0.0656]) -Greedy action tensor([0.3493, 0.0058, 0.4376, 1.4207]) tensor([0.1748, 0.1240, 0.1909, 0.5103]) -Greedy action tensor([ 1.3229, -0.5574, 1.5190, 1.0234]) tensor([0.3215, 0.0490, 0.3912, 0.2383]) -Greedy action tensor([ 0.6700, -0.0651, -0.2940, 0.0211]) tensor([0.4196, 0.2012, 0.1600, 0.2193]) -Greedy action tensor([-1.4384, 0.4586, -0.0999, -0.0746]) tensor([0.0650, 0.4331, 0.2478, 0.2541]) -Greedy action tensor([ 1.0435, -0.6372, 0.4651, 1.2574]) tensor([0.3349, 0.0624, 0.1878, 0.4148]) -Greedy action tensor([1.3559, 0.3529, 0.2971, 0.6295]) tensor([0.4551, 0.1669, 0.1579, 0.2201]) -Greedy action tensor([-0.2822, 1.1978, 0.5363, -0.0655]) tensor([0.1123, 0.4935, 0.2547, 0.1395]) -Greedy action tensor([ 0.6431, -0.4533, -0.0391, 1.4783]) tensor([0.2413, 0.0806, 0.1220, 0.5562]) -Greedy action tensor([ 0.4580, 0.2743, 0.5616, -0.0938]) tensor([0.2843, 0.2366, 0.3153, 0.1637]) -Greedy action tensor([-1.1264, -0.8550, 0.2929, -0.9526]) tensor([0.1310, 0.1718, 0.5414, 0.1558]) -Greedy action tensor([ 0.5031, 0.3726, 1.4280, -0.5079]) tensor([0.2099, 0.1843, 0.5294, 0.0764]) -Greedy action tensor([-0.7445, -0.1533, -0.3521, 0.2911]) tensor([0.1408, 0.2543, 0.2084, 0.3965]) -Greedy action tensor([-0.2044, -0.7581, 2.0338, 0.8672]) tensor([0.0721, 0.0414, 0.6760, 0.2105]) -Greedy action tensor([ 0.1857, -2.4511, 0.8213, 0.5748]) tensor([0.2255, 0.0161, 0.4257, 0.3327]) -Greedy action tensor([ 0.5745, -2.0887, 0.3679, 1.0539]) tensor([0.2859, 0.0199, 0.2325, 0.4617]) -Greedy action tensor([-0.0737, -1.0320, 2.3531, 0.4009]) tensor([0.0699, 0.0268, 0.7910, 0.1123]) -Greedy action tensor([ 0.0886, -0.8989, -0.2279, 0.2801]) tensor([0.3019, 0.1125, 0.2200, 0.3656]) -Greedy action tensor([-0.3554, -0.4084, 0.7754, -0.6288]) tensor([0.1722, 0.1633, 0.5335, 0.1310]) -Greedy action tensor([ 0.7688, -0.4437, -0.1900, 0.8758]) tensor([0.3579, 0.1065, 0.1372, 0.3984]) -Greedy action tensor([-0.7798, -1.5633, 0.1436, -0.1816]) tensor([0.1726, 0.0789, 0.4346, 0.3139]) -Greedy action tensor([-0.0807, -0.0132, 1.2625, -1.1330]) tensor([0.1600, 0.1712, 0.6130, 0.0559]) -Greedy action tensor([-0.1666, 0.2956, 0.4256, -0.3906]) tensor([0.1925, 0.3056, 0.3480, 0.1539]) -Greedy action tensor([-1.7121, -1.3789, -0.8232, -0.1508]) tensor([0.1042, 0.1455, 0.2536, 0.4967]) -Greedy action tensor([ 0.0195, -0.7266, -0.6088, 0.0565]) tensor([0.3283, 0.1557, 0.1752, 0.3408]) -Greedy action tensor([ 1.1583, -1.1458, 0.4845, -0.3033]) tensor([0.5430, 0.0542, 0.2768, 0.1259]) -Greedy action tensor([ 1.3112, -0.6046, 0.0751, 0.7618]) tensor([0.4963, 0.0731, 0.1442, 0.2865]) -Greedy action tensor([ 0.7611, 0.1554, 1.9032, -0.2009]) tensor([0.1976, 0.1078, 0.6191, 0.0755]) -Greedy action tensor([ 1.2774, -0.6438, 0.9230, 1.9941]) tensor([0.2567, 0.0376, 0.1801, 0.5256]) -Greedy action tensor([ 1.1336, -1.0547, 1.6330, -0.6976]) tensor([0.3424, 0.0384, 0.5643, 0.0549]) -Greedy action tensor([-0.2863, -2.9733, -0.2568, 1.1480]) tensor([0.1589, 0.0108, 0.1636, 0.6667]) -Greedy action tensor([-0.3837, -1.6155, 2.3745, -0.9663]) tensor([0.0568, 0.0166, 0.8950, 0.0317]) -Greedy action tensor([-0.0893, -1.9411, 2.1304, -0.8475]) tensor([0.0923, 0.0145, 0.8499, 0.0433]) -Greedy action tensor([ 0.4868, -1.7231, -0.6948, 0.5986]) tensor([0.3945, 0.0433, 0.1210, 0.4412]) -Greedy action tensor([ 0.3209, 0.5075, 0.3245, -0.4466]) tensor([0.2723, 0.3281, 0.2732, 0.1264]) -Greedy action tensor([-0.0037, -0.0107, 0.0801, -0.1267]) tensor([0.2522, 0.2505, 0.2743, 0.2230]) -Greedy action tensor([ 0.5113, -0.8546, 0.6696, 0.4844]) tensor([0.2941, 0.0750, 0.3446, 0.2863]) -Greedy action tensor([ 1.4292, -1.6907, -0.1306, 0.5110]) tensor([0.6047, 0.0267, 0.1271, 0.2414]) -Greedy action tensor([ 0.4329, -1.1654, 0.6937, -0.5190]) tensor([0.3465, 0.0701, 0.4497, 0.1337]) -Greedy action tensor([ 0.0577, -0.6744, -0.3072, 1.0185]) tensor([0.2088, 0.1004, 0.1450, 0.5458]) -Greedy action tensor([ 0.6439, -0.5786, 0.6130, 0.3300]) tensor([0.3339, 0.0983, 0.3238, 0.2440]) -Greedy action tensor([ 0.7293, 0.4463, -0.5191, -0.4505]) tensor([0.4259, 0.3209, 0.1222, 0.1309]) -Greedy action tensor([-0.8084, -1.2789, -0.5573, -0.7320]) tensor([0.2507, 0.1566, 0.3222, 0.2706]) -Greedy action tensor([ 0.3296, -0.1136, 1.2964, -1.3477]) tensor([0.2243, 0.1440, 0.5898, 0.0419]) -Greedy action tensor([-1.8190, -0.3275, 0.6067, -0.0881]) tensor([0.0446, 0.1984, 0.5049, 0.2520]) -Greedy action tensor([-1.3460, 0.1163, 0.3807, -0.0913]) tensor([0.0692, 0.2988, 0.3892, 0.2428]) -Greedy action tensor([-1.9296, -0.4759, 0.8148, -0.0024]) tensor([0.0361, 0.1544, 0.5615, 0.2480]) -Greedy action tensor([-1.6886, -0.2712, 0.5087, -0.0619]) tensor([0.0520, 0.2148, 0.4684, 0.2648]) -Greedy action tensor([-1.7977, -0.4504, 0.5915, -0.1005]) tensor([0.0471, 0.1814, 0.5141, 0.2574]) -Greedy action tensor([-1.7696, -0.4668, 0.7652, 0.1009]) tensor([0.0420, 0.1547, 0.5303, 0.2729]) -Greedy action tensor([-1.8352, -0.4383, 0.6187, -0.1308]) tensor([0.0451, 0.1823, 0.5246, 0.2480]) -Greedy action tensor([-1.7790, -0.3264, 0.5885, -0.0640]) tensor([0.0465, 0.1988, 0.4963, 0.2584]) -Greedy action tensor([-1.0244, -0.6059, 0.4508, 0.2150]) tensor([0.0967, 0.1469, 0.4226, 0.3338]) -Greedy action tensor([-1.7934, -0.3762, 0.5775, -0.0874]) tensor([0.0469, 0.1933, 0.5017, 0.2581]) -Greedy action tensor([-1.8966, -0.3521, 0.6352, -0.1462]) tensor([0.0416, 0.1951, 0.5236, 0.2397]) -Greedy action tensor([-1.8581, -0.4256, 0.6220, -0.1269]) tensor([0.0439, 0.1839, 0.5243, 0.2479]) -Greedy action tensor([-1.9227, -0.4434, 0.6573, -0.1723]) tensor([0.0411, 0.1803, 0.5421, 0.2365]) -Greedy action tensor([-1.9170, -0.4345, 0.6699, -0.1557]) tensor([0.0408, 0.1797, 0.5421, 0.2374]) -Greedy action tensor([-1.8682, -0.4191, 0.6272, -0.1381]) tensor([0.0434, 0.1850, 0.5266, 0.2450]) -Greedy action tensor([-0.6063, -0.6257, 0.1813, 0.1175]) tensor([0.1602, 0.1572, 0.3522, 0.3304]) -Greedy action tensor([0.0698, 1.0640, 0.1545, 0.7729]) tensor([0.1468, 0.3968, 0.1598, 0.2966]) -Greedy action tensor([-1.4099, -0.6257, 0.4205, 0.1611]) tensor([0.0702, 0.1539, 0.4380, 0.3379]) -Greedy action tensor([-1.8841, -0.4579, 0.6928, -0.0982]) tensor([0.0412, 0.1714, 0.5418, 0.2456]) -Greedy action tensor([-1.4848, -0.0805, 0.5596, 0.1854]) tensor([0.0552, 0.2249, 0.4265, 0.2934]) -Greedy action tensor([-1.9460, -0.4539, 0.6705, -0.1810]) tensor([0.0400, 0.1780, 0.5480, 0.2339]) -Greedy action tensor([-1.4108, -0.1573, 0.5679, 0.1756]) tensor([0.0602, 0.2107, 0.4352, 0.2940]) -Greedy action tensor([-1.7760, -0.5883, 0.8778, 0.2353]) tensor([0.0385, 0.1263, 0.5473, 0.2879]) -Greedy action tensor([-1.4987, -0.4541, 0.4746, -0.0846]) tensor([0.0660, 0.1876, 0.4749, 0.2715]) -Greedy action tensor([-1.6936, -0.3837, 0.5271, -0.0580]) tensor([0.0525, 0.1945, 0.4836, 0.2694]) -Greedy action tensor([-1.9209, -0.3367, 0.6385, -0.1778]) tensor([0.0408, 0.1988, 0.5273, 0.2331]) -Greedy action tensor([-1.8733, -0.1477, 0.5900, -0.1332]) tensor([0.0416, 0.2334, 0.4881, 0.2369]) -Greedy action tensor([-1.4228, -0.5350, 0.4896, 0.1886]) tensor([0.0658, 0.1598, 0.4451, 0.3294]) -Greedy action tensor([-1.3792, 0.3249, 0.3912, -0.1435]) tensor([0.0632, 0.3477, 0.3715, 0.2176]) -Greedy action tensor([-1.3243, -0.7041, 0.7838, 0.2082]) tensor([0.0636, 0.1183, 0.5237, 0.2945]) -Greedy action tensor([-1.5515, -0.4257, 0.5926, 0.2187]) tensor([0.0541, 0.1667, 0.4616, 0.3176]) -Greedy action tensor([-1.9251, -0.4096, 0.6500, -0.1677]) tensor([0.0408, 0.1859, 0.5364, 0.2368]) -Greedy action tensor([-1.7472, -0.3856, 0.5480, -0.0913]) tensor([0.0498, 0.1945, 0.4947, 0.2610]) -Greedy action tensor([-1.9058, -0.4409, 0.6469, -0.1605]) tensor([0.0418, 0.1811, 0.5374, 0.2397]) -Greedy action tensor([-1.6600, -0.3253, 0.6184, 0.0424]) tensor([0.0499, 0.1895, 0.4869, 0.2737]) -Greedy action tensor([-1.5024, -0.3343, 0.4583, 0.1502]) tensor([0.0605, 0.1944, 0.4295, 0.3156]) -Greedy action tensor([-1.9054, -0.4074, 0.6429, -0.1618]) tensor([0.0417, 0.1865, 0.5333, 0.2385]) -Greedy action tensor([-1.7888, -0.3187, 0.5895, -0.0352]) tensor([0.0456, 0.1985, 0.4923, 0.2636]) -Greedy action tensor([-0.8523, -0.4288, 0.4447, 0.7686]) tensor([0.0889, 0.1358, 0.3254, 0.4498]) -Greedy action tensor([-1.8779, -0.4569, 0.6860, -0.1183]) tensor([0.0418, 0.1730, 0.5425, 0.2427]) -Greedy action tensor([-1.8540, -0.4107, 0.6069, -0.1207]) tensor([0.0442, 0.1873, 0.5182, 0.2503]) -Greedy action tensor([-1.9409, -0.4495, 0.6682, -0.1771]) tensor([0.0402, 0.1787, 0.5464, 0.2347]) -Greedy action tensor([-1.1956, -0.6705, 0.9749, 1.1959]) tensor([0.0447, 0.0755, 0.3915, 0.4883]) -Greedy action tensor([-1.9035, -0.3407, 0.6199, -0.1513]) tensor([0.0416, 0.1988, 0.5194, 0.2402]) -Greedy action tensor([-1.7660e+00, -1.3543e-01, 5.7114e-01, 9.9325e-04]) tensor([0.0448, 0.2289, 0.4640, 0.2623]) -Greedy action tensor([-1.8502, -0.4372, 0.6174, -0.1266]) tensor([0.0444, 0.1825, 0.5240, 0.2490]) -Greedy action tensor([-1.9308, -0.4282, 0.6599, -0.1729]) tensor([0.0406, 0.1824, 0.5415, 0.2355]) -Greedy action tensor([-1.8791, -0.4220, 0.6299, -0.1481]) tensor([0.0430, 0.1848, 0.5291, 0.2430]) -Greedy action tensor([-0.8701, -0.5811, 0.1443, 0.4191]) tensor([0.1146, 0.1531, 0.3162, 0.4161]) -Greedy action tensor([-1.8511, -0.4829, 0.6224, -0.1199]) tensor([0.0446, 0.1751, 0.5287, 0.2517]) -Greedy action tensor([-0.8520, -0.2790, 0.2631, -0.0896]) tensor([0.1255, 0.2226, 0.3828, 0.2690]) -Greedy action tensor([-1.8245, -0.3333, 0.6109, -0.0986]) tensor([0.0445, 0.1976, 0.5080, 0.2499]) -Greedy action tensor([-1.9155, -0.8433, 0.0974, -0.4757]) tensor([0.0640, 0.1870, 0.4790, 0.2700]) -Greedy action tensor([-1.2157, 0.1671, 0.3398, 0.6829]) tensor([0.0610, 0.2431, 0.2889, 0.4071]) -Greedy action tensor([-1.5585, -0.5776, 0.8982, 0.2675]) tensor([0.0464, 0.1238, 0.5416, 0.2882]) -Greedy action tensor([-1.8600, -0.3863, 0.6212, -0.1288]) tensor([0.0435, 0.1901, 0.5205, 0.2459]) -Greedy action tensor([-0.2561, 0.5192, 0.4658, 1.2670]) tensor([0.1019, 0.2212, 0.2097, 0.4672]) -Greedy action tensor([-1.9410, -0.4498, 0.6629, -0.1790]) tensor([0.0404, 0.1793, 0.5454, 0.2350]) -Greedy action tensor([-1.5309, -0.5437, 0.5126, 0.2226]) tensor([0.0582, 0.1562, 0.4493, 0.3362]) -Greedy action tensor([-0.8644, 0.3537, 0.3617, 1.0555]) tensor([0.0685, 0.2314, 0.2333, 0.4668]) -Greedy action tensor([-1.9256, -0.4510, 0.6694, -0.1553]) tensor([0.0406, 0.1773, 0.5437, 0.2384]) -Greedy action tensor([-1.5703, -0.4533, 0.4901, -0.0605]) tensor([0.0609, 0.1860, 0.4777, 0.2755]) -Greedy action tensor([-1.9479, -0.4532, 0.6692, -0.1822]) tensor([0.0400, 0.1783, 0.5478, 0.2338]) -Greedy action tensor([-1.9117, -0.6006, 0.8970, -0.0540]) tensor([0.0361, 0.1339, 0.5987, 0.2313]) -Greedy action tensor([-1.1907, 0.0677, 0.7875, 0.7201]) tensor([0.0540, 0.1902, 0.3906, 0.3652]) -Greedy action tensor([-1.9114, -0.4533, 0.6568, -0.1618]) tensor([0.0415, 0.1784, 0.5413, 0.2388]) -Greedy action tensor([-1.8151, -0.4093, 0.6231, -0.0752]) tensor([0.0450, 0.1835, 0.5152, 0.2563]) -Greedy action tensor([-1.5019, -0.1310, 0.3594, 0.1077]) tensor([0.0611, 0.2406, 0.3929, 0.3055]) -Greedy action tensor([-1.8093, -0.3115, 0.6053, -0.0848]) tensor([0.0449, 0.2008, 0.5023, 0.2519]) -Greedy action tensor([-1.0605, -0.4419, 0.5391, 1.0792]) tensor([0.0613, 0.1139, 0.3037, 0.5211]) -Greedy action tensor([-1.6567, -0.4908, 0.5841, -0.2139]) tensor([0.0560, 0.1799, 0.5269, 0.2372]) -Greedy action tensor([-1.8209, -0.3482, 0.5906, -0.1279]) tensor([0.0456, 0.1987, 0.5081, 0.2477]) -Greedy action tensor([-1.8158, -0.4796, 0.7103, 0.0600]) tensor([0.0420, 0.1596, 0.5246, 0.2738]) -Greedy action tensor([-1.3607, -0.6180, 0.3500, 0.1888]) tensor([0.0749, 0.1575, 0.4146, 0.3529]) -Greedy action tensor([-1.9212, -0.3931, 0.6488, -0.1634]) tensor([0.0409, 0.1883, 0.5338, 0.2370]) -Greedy action tensor([-1.8015, -0.2809, 0.5548, -0.0742]) tensor([0.0460, 0.2103, 0.4851, 0.2586]) -Greedy action tensor([-1.7900, -0.3567, 0.5721, -0.0810]) tensor([0.0469, 0.1966, 0.4976, 0.2590]) -Greedy action tensor([-1.5426, -0.6088, 0.4742, -0.1388]) tensor([0.0661, 0.1682, 0.4967, 0.2690]) -Greedy action tensor([-1.5879, -0.5894, 0.5701, -0.0040]) tensor([0.0580, 0.1574, 0.5019, 0.2827]) -Greedy action tensor([-1.8940, -0.4469, 0.6438, -0.1531]) tensor([0.0424, 0.1801, 0.5360, 0.2416]) -Greedy action tensor([-1.7241, -0.3925, 0.6064, 0.0112]) tensor([0.0482, 0.1826, 0.4958, 0.2734]) -Greedy action tensor([ 1.0441, -0.1266, -0.1064, -0.1661]) tensor([0.5195, 0.1611, 0.1644, 0.1549]) -Greedy action tensor([ 0.6623, -0.2293, -0.3210, 0.3525]) tensor([0.3972, 0.1628, 0.1486, 0.2914]) -Greedy action tensor([ 1.8752, -1.1036, -0.4009, 1.0332]) tensor([0.6312, 0.0321, 0.0648, 0.2719]) -Greedy action tensor([ 1.6594, -0.7639, -0.3376, 0.3662]) tensor([0.6672, 0.0591, 0.0906, 0.1831]) -Greedy action tensor([ 1.4895, -0.0046, -0.1907, 0.7834]) tensor([0.5251, 0.1179, 0.0979, 0.2592]) -Greedy action tensor([ 1.3988, 0.3095, -0.0466, 0.3309]) tensor([0.5220, 0.1756, 0.1230, 0.1794]) -Greedy action tensor([ 1.6833, -0.6267, -0.2390, 0.4022]) tensor([0.6565, 0.0652, 0.0960, 0.1823]) -Greedy action tensor([ 1.1441, -0.1206, -0.0463, 0.1247]) tensor([0.5136, 0.1450, 0.1562, 0.1853]) -Greedy action tensor([ 1.9214, -0.4835, -0.3769, 0.2068]) tensor([0.7295, 0.0659, 0.0733, 0.1313]) -Greedy action tensor([ 1.2123, -0.7385, -0.1040, -0.0332]) tensor([0.5889, 0.0837, 0.1579, 0.1695]) -Greedy action tensor([ 1.6660, -0.6553, -0.4781, 0.2893]) tensor([0.6813, 0.0669, 0.0798, 0.1720]) -Greedy action tensor([ 0.9047, -0.3174, -0.3391, 0.4158]) tensor([0.4553, 0.1341, 0.1313, 0.2792]) -Greedy action tensor([ 0.9197, -0.1160, -0.0677, -0.1548]) tensor([0.4833, 0.1716, 0.1801, 0.1650]) -Greedy action tensor([ 1.1785, -0.2708, -0.3898, 0.2080]) tensor([0.5489, 0.1288, 0.1144, 0.2079]) -Greedy action tensor([ 1.5602, -0.2812, 0.0259, 0.3382]) tensor([0.5992, 0.0950, 0.1292, 0.1766]) -Greedy action tensor([ 1.3072, -0.3407, -0.6435, 0.0969]) tensor([0.6125, 0.1179, 0.0871, 0.1826]) -Greedy action tensor([ 0.6820, -0.4499, 0.2877, 0.3182]) tensor([0.3715, 0.1198, 0.2505, 0.2582]) -Greedy action tensor([ 1.4761, -0.5934, -0.1923, 0.4062]) tensor([0.6032, 0.0762, 0.1137, 0.2069]) -Greedy action tensor([ 1.7172, -0.4978, -0.2905, 0.5774]) tensor([0.6397, 0.0698, 0.0859, 0.2046]) -Greedy action tensor([ 1.4932, 0.1356, -0.1689, 0.0505]) tensor([0.5941, 0.1528, 0.1127, 0.1404]) -Greedy action tensor([ 5.1658e-01, -4.0930e-01, -4.5659e-04, 6.1405e-02]) tensor([0.3807, 0.1508, 0.2270, 0.2415]) -Greedy action tensor([ 1.4198, -0.2177, -0.3329, 0.3444]) tensor([0.5852, 0.1138, 0.1014, 0.1996]) -Greedy action tensor([ 1.4732, -0.2781, -0.3360, 0.4787]) tensor([0.5857, 0.1017, 0.0959, 0.2167]) -Greedy action tensor([ 1.6431, -0.4688, -0.5971, 0.0430]) tensor([0.6996, 0.0847, 0.0745, 0.1412]) -Greedy action tensor([ 1.9508, -0.7426, -0.5325, 0.6226]) tensor([0.7062, 0.0478, 0.0589, 0.1871]) -Greedy action tensor([1.6023, 0.0092, 0.0581, 0.0256]) tensor([0.6160, 0.1252, 0.1315, 0.1273]) -Greedy action tensor([ 0.8155, -0.2086, -0.6064, 1.0010]) tensor([0.3566, 0.1281, 0.0860, 0.4293]) -Greedy action tensor([ 1.7504, -0.4452, -0.9475, 1.0087]) tensor([0.6043, 0.0673, 0.0407, 0.2878]) -Greedy action tensor([ 1.5213, 0.1135, -0.8042, 0.5487]) tensor([0.5812, 0.1422, 0.0568, 0.2198]) -Greedy action tensor([ 1.7333, -0.1083, -0.5358, 0.4526]) tensor([0.6494, 0.1030, 0.0672, 0.1804]) -Greedy action tensor([ 1.6409, -0.4285, -0.3171, 0.2966]) tensor([0.6544, 0.0826, 0.0924, 0.1706]) -Greedy action tensor([ 1.2079, -0.4800, -0.4558, 0.0614]) tensor([0.5910, 0.1093, 0.1120, 0.1878]) -Greedy action tensor([ 1.2757, -0.4126, -0.3564, 0.2972]) tensor([0.5694, 0.1052, 0.1113, 0.2140]) -Greedy action tensor([ 1.5857, -0.7459, -0.2558, 0.3795]) tensor([0.6431, 0.0625, 0.1020, 0.1925]) -Greedy action tensor([ 1.1212, 0.0241, -0.3404, -0.4473]) tensor([0.5637, 0.1882, 0.1307, 0.1175]) -Greedy action tensor([ 1.3032, -0.2243, -0.4561, 0.1679]) tensor([0.5846, 0.1269, 0.1007, 0.1879]) -Greedy action tensor([ 1.6613, -0.9676, -0.2688, 0.4227]) tensor([0.6636, 0.0479, 0.0963, 0.1923]) -Greedy action tensor([ 1.1851, -0.2777, -0.1698, 0.0382]) tensor([0.5533, 0.1282, 0.1428, 0.1757]) -Greedy action tensor([ 1.4996, -0.6543, -0.0645, 0.3908]) tensor([0.6041, 0.0701, 0.1264, 0.1993]) -Greedy action tensor([ 1.1063, 0.1875, -0.1045, 0.0732]) tensor([0.4871, 0.1944, 0.1451, 0.1734]) -Greedy action tensor([ 1.0201, 0.1502, -1.0966, 0.0346]) tensor([0.5228, 0.2191, 0.0630, 0.1951]) -Greedy action tensor([ 1.1860, -0.2702, -0.3023, 0.2436]) tensor([0.5410, 0.1261, 0.1221, 0.2108]) -Greedy action tensor([1.7170, 0.6308, 0.5781, 0.1727]) tensor([0.5344, 0.1804, 0.1711, 0.1141]) -Greedy action tensor([ 1.7463, -0.2792, -0.1655, 0.7121]) tensor([0.6115, 0.0807, 0.0904, 0.2174]) -Greedy action tensor([ 1.6768, -0.8249, -0.4003, 0.5948]) tensor([0.6468, 0.0530, 0.0810, 0.2192]) -Greedy action tensor([ 2.1079, 0.2942, -0.3132, 0.3077]) tensor([0.7057, 0.1150, 0.0627, 0.1166]) -Greedy action tensor([ 1.7881, -0.7058, -0.3822, 0.5905]) tensor([0.6673, 0.0551, 0.0762, 0.2015]) -Greedy action tensor([ 1.5663, -0.7124, -0.4080, 0.6590]) tensor([0.6080, 0.0623, 0.0844, 0.2454]) -Greedy action tensor([ 1.0794, -0.3629, -0.3428, 0.5495]) tensor([0.4840, 0.1144, 0.1167, 0.2849]) -Greedy action tensor([ 1.0658, -0.3599, -0.8641, 0.5103]) tensor([0.5104, 0.1227, 0.0741, 0.2929]) -Greedy action tensor([ 0.9144, -0.4238, -0.0529, 0.1963]) tensor([0.4695, 0.1231, 0.1784, 0.2290]) -Greedy action tensor([ 1.2312, -0.6118, 0.0636, -0.0352]) tensor([0.5710, 0.0904, 0.1776, 0.1609]) -Greedy action tensor([ 1.9177, -0.3428, -0.7544, 0.1027]) tensor([0.7484, 0.0781, 0.0517, 0.1219]) -Greedy action tensor([ 1.3247, -0.3491, -0.5583, 0.1905]) tensor([0.6019, 0.1129, 0.0916, 0.1936]) -Greedy action tensor([ 2.1166, -0.7093, -0.2357, 0.5882]) tensor([0.7293, 0.0432, 0.0694, 0.1582]) -Greedy action tensor([ 1.0609, -0.1519, -0.4230, 0.1299]) tensor([0.5213, 0.1550, 0.1182, 0.2055]) -Greedy action tensor([ 1.5268, -0.3992, -0.7954, 0.6159]) tensor([0.6076, 0.0885, 0.0596, 0.2443]) -Greedy action tensor([ 1.0635, -0.4681, -0.3437, 0.3781]) tensor([0.5089, 0.1100, 0.1246, 0.2564]) -Greedy action tensor([ 0.9533, -0.0550, -0.2334, 0.6964]) tensor([0.4093, 0.1493, 0.1249, 0.3165]) -Greedy action tensor([ 1.1110, -0.6433, -0.4684, 0.6142]) tensor([0.5031, 0.0871, 0.1037, 0.3061]) -Greedy action tensor([ 1.3749, -0.4611, -0.5129, 0.6083]) tensor([0.5632, 0.0898, 0.0853, 0.2617]) -Greedy action tensor([ 1.0768, -0.2221, -0.0027, 0.3820]) tensor([0.4735, 0.1292, 0.1609, 0.2364]) -Greedy action tensor([ 1.2552, 0.0163, -0.4411, 0.5673]) tensor([0.5061, 0.1466, 0.0928, 0.2544]) -Greedy action tensor([ 1.4442, -0.3689, -0.1359, 0.2577]) tensor([0.5972, 0.0974, 0.1230, 0.1823]) -Greedy action tensor([ 1.3323, -0.4001, -0.3855, 0.4782]) tensor([0.5612, 0.0993, 0.1007, 0.2389]) -Greedy action tensor([ 1.0620, -0.5245, 0.1037, 0.0997]) tensor([0.5076, 0.1039, 0.1947, 0.1939]) -Greedy action tensor([ 2.2474, -1.3069, 0.0130, 0.4573]) tensor([0.7677, 0.0220, 0.0822, 0.1282]) -Greedy action tensor([ 1.4308, -0.2733, -0.7367, 0.4660]) tensor([0.5961, 0.1085, 0.0682, 0.2272]) -Greedy action tensor([ 1.2243, -0.3783, -0.5978, 0.3256]) tensor([0.5649, 0.1138, 0.0913, 0.2300]) -Greedy action tensor([ 1.5804, 0.3761, -0.2630, 0.0491]) tensor([0.5972, 0.1791, 0.0945, 0.1292]) -Greedy action tensor([ 1.6599, -0.5719, -0.5417, 0.2567]) tensor([0.6832, 0.0733, 0.0756, 0.1679]) -Greedy action tensor([ 1.6081, -0.2176, -0.6090, 0.4909]) tensor([0.6261, 0.1009, 0.0682, 0.2048]) -Greedy action tensor([ 1.4259, -0.2492, -0.4546, 0.2089]) tensor([0.6113, 0.1145, 0.0932, 0.1810]) -Greedy action tensor([ 1.3177, -0.6557, -0.0134, 0.1603]) tensor([0.5823, 0.0809, 0.1538, 0.1830]) -Greedy action tensor([ 1.0381, -0.2645, -0.2666, 0.0752]) tensor([0.5195, 0.1412, 0.1409, 0.1983]) -Greedy action tensor([ 1.5855, -0.6657, -0.3132, 0.0971]) tensor([0.6753, 0.0711, 0.1011, 0.1524]) -Greedy action tensor([ 1.6387, -0.6138, -0.4715, -0.0888]) tensor([0.7122, 0.0749, 0.0863, 0.1266]) -Greedy action tensor([ 1.1194, -0.2280, -0.0749, 0.3664]) tensor([0.4917, 0.1278, 0.1489, 0.2316]) -Greedy action tensor([ 2.0671, -0.7380, -0.3877, 0.3100]) tensor([0.7582, 0.0459, 0.0651, 0.1308]) -Greedy action tensor([ 2.0535, 0.1576, -0.2974, 0.5100]) tensor([0.6854, 0.1029, 0.0653, 0.1464]) -Greedy action tensor([ 1.4855, -0.7969, -0.0064, 0.4412]) tensor([0.5956, 0.0608, 0.1340, 0.2096]) -Greedy action tensor([ 0.3744, -0.1987, 0.2568, -0.4470]) tensor([0.3457, 0.1949, 0.3073, 0.1520]) -Greedy action tensor([ 0.5646, -0.2216, 0.0296, -0.5196]) tensor([0.4203, 0.1915, 0.2461, 0.1421]) -Greedy action tensor([ 0.3911, 0.1365, -0.1269, -0.2133]) tensor([0.3428, 0.2657, 0.2042, 0.1873]) -Greedy action tensor([ 0.4403, 0.0477, -0.0349, 0.1348]) tensor([0.3296, 0.2226, 0.2049, 0.2428]) -Greedy action tensor([ 0.5008, 0.2203, -0.1469, 0.1668]) tensor([0.3339, 0.2522, 0.1747, 0.2391]) -Greedy action tensor([ 0.9146, -0.5141, 0.0873, -0.3446]) tensor([0.5100, 0.1222, 0.2230, 0.1448]) -Greedy action tensor([ 0.6262, -0.2729, -0.1698, -0.1833]) tensor([0.4342, 0.1767, 0.1959, 0.1932]) -Greedy action tensor([ 0.9122, -0.3992, -0.2740, -0.4646]) tensor([0.5473, 0.1475, 0.1671, 0.1381]) -Greedy action tensor([ 0.6213, -0.0897, 0.0118, -0.1084]) tensor([0.3973, 0.1952, 0.2160, 0.1915]) -Greedy action tensor([ 0.5987, -0.2153, 0.0109, -0.6337]) tensor([0.4366, 0.1935, 0.2426, 0.1273]) -Greedy action tensor([ 0.7042, -0.5599, -0.1358, -0.2963]) tensor([0.4803, 0.1357, 0.2074, 0.1766]) -Greedy action tensor([ 0.5511, -0.3365, -0.1107, -0.2631]) tensor([0.4219, 0.1736, 0.2176, 0.1869]) -Greedy action tensor([ 0.5634, 0.3651, -0.1910, 0.2500]) tensor([0.3310, 0.2714, 0.1557, 0.2419]) -Greedy action tensor([ 0.8747, -0.5181, -0.0823, -0.3415]) tensor([0.5185, 0.1288, 0.1991, 0.1536]) -Greedy action tensor([ 0.6035, -0.3575, -0.1098, -0.0369]) tensor([0.4167, 0.1594, 0.2042, 0.2197]) -Greedy action tensor([ 0.5426, -0.1220, 0.0010, -0.5180]) tensor([0.4094, 0.2106, 0.2382, 0.1418]) -Greedy action tensor([ 1.1498, -1.3180, 0.1729, -1.0223]) tensor([0.6348, 0.0538, 0.2390, 0.0723]) -Greedy action tensor([ 0.5658, 0.0217, -0.1072, -0.2207]) tensor([0.3928, 0.2280, 0.2004, 0.1789]) -Greedy action tensor([ 0.7759, -0.5007, -0.0087, -0.4664]) tensor([0.4941, 0.1378, 0.2255, 0.1426]) -Greedy action tensor([ 0.9863, -0.4778, 0.0163, -0.5045]) tensor([0.5448, 0.1260, 0.2065, 0.1227]) -Greedy action tensor([ 0.5522, -0.3251, 0.0660, -0.3525]) tensor([0.4106, 0.1708, 0.2525, 0.1662]) -Greedy action tensor([ 0.7347, -0.4887, -0.0409, -0.6040]) tensor([0.4958, 0.1459, 0.2283, 0.1300]) -Greedy action tensor([ 0.8636, -0.3321, 0.1822, -0.6999]) tensor([0.4956, 0.1499, 0.2507, 0.1038]) -Greedy action tensor([ 0.4582, -0.0375, 0.1534, -0.3490]) tensor([0.3581, 0.2181, 0.2640, 0.1598]) -Greedy action tensor([ 0.1807, 0.3347, -0.1088, -0.2329]) tensor([0.2796, 0.3262, 0.2093, 0.1849]) -Greedy action tensor([ 0.5971, -0.4488, 0.0386, -0.3609]) tensor([0.4334, 0.1523, 0.2480, 0.1663]) -Greedy action tensor([ 0.4475, -0.2800, 0.0970, -0.4990]) tensor([0.3883, 0.1876, 0.2735, 0.1507]) -Greedy action tensor([ 0.3161, -0.1828, -0.0361, -0.0665]) tensor([0.3342, 0.2029, 0.2350, 0.2279]) -Greedy action tensor([ 0.6365, -0.9829, -0.0468, -0.4369]) tensor([0.4890, 0.0968, 0.2469, 0.1672]) -Greedy action tensor([ 1.0501, -0.7735, -0.0136, -0.6013]) tensor([0.5888, 0.0951, 0.2032, 0.1129]) -Greedy action tensor([ 0.3908, -0.0248, -0.0778, -0.0044]) tensor([0.3379, 0.2230, 0.2115, 0.2276]) -Greedy action tensor([ 0.4327, -0.2230, 0.0972, -0.2734]) tensor([0.3666, 0.1903, 0.2621, 0.1809]) -Greedy action tensor([ 0.7532, -0.2836, -0.0633, -0.0668]) tensor([0.4470, 0.1585, 0.1976, 0.1969]) -Greedy action tensor([ 0.5920, -0.4554, -0.1381, -0.1097]) tensor([0.4295, 0.1507, 0.2069, 0.2129]) -Greedy action tensor([ 0.6012, -0.0750, -0.0020, -0.1994]) tensor([0.3993, 0.2030, 0.2184, 0.1793]) -Greedy action tensor([ 0.4993, -0.0379, -0.1635, -0.1279]) tensor([0.3797, 0.2219, 0.1957, 0.2028]) -Greedy action tensor([ 0.6462, -0.4571, -0.1969, -0.1388]) tensor([0.4508, 0.1496, 0.1940, 0.2056]) -Greedy action tensor([ 0.9554, -0.5295, 0.0170, -0.4942]) tensor([0.5398, 0.1223, 0.2112, 0.1267]) -Greedy action tensor([ 0.8998, -0.5817, -0.1246, -0.2686]) tensor([0.5271, 0.1198, 0.1892, 0.1638]) -Greedy action tensor([ 0.8452, -0.5218, -0.0466, -0.4674]) tensor([0.5171, 0.1318, 0.2120, 0.1392]) -Greedy action tensor([ 0.5720, -0.0565, 0.0436, -0.2882]) tensor([0.3928, 0.2095, 0.2316, 0.1662]) -Greedy action tensor([ 0.3495, -0.3212, -0.0633, -0.1805]) tensor([0.3621, 0.1852, 0.2396, 0.2131]) -Greedy action tensor([ 0.8157, -0.4142, -0.0626, -0.5405]) tensor([0.5088, 0.1487, 0.2114, 0.1311]) -Greedy action tensor([ 0.7202, -0.4644, -0.0729, -0.2439]) tensor([0.4674, 0.1430, 0.2115, 0.1782]) -Greedy action tensor([ 0.5473, -0.4169, -0.1203, -0.2178]) tensor([0.4238, 0.1616, 0.2174, 0.1972]) -Greedy action tensor([-0.0096, 0.4041, -0.2175, -0.5763]) tensor([0.2569, 0.3886, 0.2087, 0.1458]) -Greedy action tensor([ 0.0270, -0.0656, 0.0327, -0.0636]) tensor([0.2610, 0.2380, 0.2626, 0.2384]) -Greedy action tensor([ 0.9370, -0.2253, -0.1182, -0.1574]) tensor([0.5011, 0.1567, 0.1744, 0.1677]) -Greedy action tensor([ 0.8790, -1.3726, -0.1853, -0.8652]) tensor([0.6154, 0.0648, 0.2123, 0.1076]) -Greedy action tensor([ 0.8485, -0.6130, 0.0081, -0.4243]) tensor([0.5145, 0.1193, 0.2220, 0.1441]) -Greedy action tensor([ 0.5949, -0.3816, 0.0109, -0.2394]) tensor([0.4222, 0.1590, 0.2354, 0.1833]) -Greedy action tensor([ 1.2117, -1.2483, 0.0772, -0.4689]) tensor([0.6276, 0.0536, 0.2018, 0.1169]) -Greedy action tensor([ 0.4478, -0.1381, -0.1141, -0.2190]) tensor([0.3788, 0.2108, 0.2160, 0.1944]) -Greedy action tensor([ 0.8282, -0.4769, 0.0325, -0.2733]) tensor([0.4867, 0.1320, 0.2196, 0.1618]) -Greedy action tensor([ 0.6275, -0.2469, -0.0277, -0.1558]) tensor([0.4178, 0.1743, 0.2170, 0.1909]) -Greedy action tensor([ 0.3444, -0.0959, -0.0906, -0.2170]) tensor([0.3495, 0.2250, 0.2262, 0.1993]) -Greedy action tensor([ 0.8129, -0.4830, 0.1083, -0.3968]) tensor([0.4840, 0.1324, 0.2392, 0.1444]) -Greedy action tensor([ 0.7465, -0.7723, 0.0792, -0.2349]) tensor([0.4746, 0.1039, 0.2435, 0.1779]) -Greedy action tensor([ 0.9833, -0.5784, 0.0103, -0.2195]) tensor([0.5296, 0.1111, 0.2002, 0.1591]) -Greedy action tensor([ 0.7978, -0.3169, -0.1128, -0.1821]) tensor([0.4749, 0.1558, 0.1910, 0.1783]) -Greedy action tensor([ 1.0068, -0.4847, 0.0656, -0.4863]) tensor([0.5435, 0.1223, 0.2121, 0.1221]) -Greedy action tensor([ 0.1685, -0.1088, 0.0683, -0.2226]) tensor([0.2995, 0.2270, 0.2710, 0.2026]) -Greedy action tensor([ 0.5348, -0.3674, -0.1119, -0.3148]) tensor([0.4243, 0.1721, 0.2222, 0.1814]) -Greedy action tensor([ 0.8237, 0.1685, -0.0447, -0.0507]) tensor([0.4244, 0.2204, 0.1781, 0.1770]) -Greedy action tensor([ 0.7652, -0.4642, -0.0432, -0.5512]) tensor([0.4985, 0.1458, 0.2221, 0.1336]) -Greedy action tensor([ 0.7569, -0.6194, 0.0650, -0.2672]) tensor([0.4734, 0.1196, 0.2370, 0.1700]) -Greedy action tensor([ 0.3313, -0.3398, -0.1126, -0.2503]) tensor([0.3688, 0.1885, 0.2366, 0.2061]) -Greedy action tensor([ 0.8323, -0.2375, -0.0068, -0.2705]) tensor([0.4746, 0.1628, 0.2051, 0.1575]) -Greedy action tensor([ 0.3491, 0.2192, -0.2624, 0.0069]) tensor([0.3194, 0.2805, 0.1733, 0.2268]) -Greedy action tensor([ 0.8355, -0.2261, 0.1267, -0.6509]) tensor([0.4844, 0.1676, 0.2385, 0.1096]) -Greedy action tensor([ 0.7640, -0.6663, 0.2008, -0.6169]) tensor([0.4855, 0.1161, 0.2764, 0.1220]) -Greedy action tensor([ 0.7636, -0.9529, 0.0232, -0.4210]) tensor([0.5096, 0.0916, 0.2430, 0.1559]) -Greedy action tensor([ 0.7770, -0.4346, 0.1158, -0.1992]) tensor([0.4565, 0.1359, 0.2357, 0.1720]) -Greedy action tensor([ 0.5189, -0.3475, -0.3967, -0.1950]) tensor([0.4328, 0.1820, 0.1732, 0.2120]) -Greedy action tensor([ 0.7711, -0.6383, 0.0353, -0.3553]) tensor([0.4884, 0.1193, 0.2340, 0.1583]) -Greedy action tensor([0.0188, 0.3966, 0.0649, 0.0264]) tensor([0.2215, 0.3232, 0.2320, 0.2232]) -Greedy action tensor([ 0.6274, -0.2527, 0.1126, -0.3357]) tensor([0.4177, 0.1732, 0.2496, 0.1594]) -Greedy action tensor([ 0.6117, -0.1000, -0.0097, -0.0278]) tensor([0.3913, 0.1921, 0.2102, 0.2064]) -Greedy action tensor([ 0.6133, 0.0539, -0.1105, 0.1269]) tensor([0.3744, 0.2140, 0.1815, 0.2302]) -Greedy action tensor([ 0.8180, -0.2794, -0.0520, -0.1172]) tensor([0.4662, 0.1556, 0.1953, 0.1830]) -Greedy action tensor([ 0.4926, -0.2673, -0.0795, -0.1259]) tensor([0.3890, 0.1819, 0.2195, 0.2096]) -Greedy action tensor([ 0.1622, 0.1531, -0.0969, -0.2443]) tensor([0.2917, 0.2890, 0.2251, 0.1942]) -Greedy action tensor([ 0.4398, 0.2475, -0.0165, 0.3816]) tensor([0.2939, 0.2425, 0.1862, 0.2773]) -Greedy action tensor([ 1.0568, -0.6312, 1.2086, 0.7802]) tensor([0.3218, 0.0595, 0.3746, 0.2441]) -Greedy action tensor([ 0.6693, -0.9054, -0.2895, 0.9425]) tensor([0.3443, 0.0713, 0.1320, 0.4524]) -Greedy action tensor([ 0.3810, -0.5516, 1.2855, 0.6946]) tensor([0.1911, 0.0752, 0.4722, 0.2615]) -Greedy action tensor([-0.5482, -1.1151, -0.6918, 0.0328]) tensor([0.2369, 0.1344, 0.2052, 0.4235]) -Greedy action tensor([-0.1270, -1.4873, -0.6922, 0.6825]) tensor([0.2456, 0.0630, 0.1396, 0.5518]) -Greedy action tensor([-0.3711, 0.1302, 0.6808, 0.1567]) tensor([0.1387, 0.2290, 0.3971, 0.2351]) -Greedy action tensor([-0.4244, -0.2253, 0.9079, -1.2197]) tensor([0.1548, 0.1889, 0.5865, 0.0699]) -Greedy action tensor([-0.6646, -1.6733, 0.6794, 0.2069]) tensor([0.1318, 0.0481, 0.5052, 0.3150]) -Greedy action tensor([ 0.3466, -1.0520, 2.2658, 0.8782]) tensor([0.1024, 0.0253, 0.6980, 0.1743]) -Greedy action tensor([1.0255, 0.0773, 0.9330, 0.7516]) tensor([0.3268, 0.1266, 0.2980, 0.2486]) -Greedy action tensor([ 0.6986, -1.4853, 1.4371, -0.1568]) tensor([0.2754, 0.0310, 0.5764, 0.1171]) -Greedy action tensor([ 0.4732, -1.2063, 0.3791, 0.3977]) tensor([0.3307, 0.0617, 0.3010, 0.3066]) -Greedy action tensor([-1.0891, -1.6588, 0.7749, 0.2605]) tensor([0.0842, 0.0477, 0.5433, 0.3248]) -Greedy action tensor([-0.5990, -0.4890, 0.0326, 0.3466]) tensor([0.1522, 0.1699, 0.2862, 0.3918]) -Greedy action tensor([-0.7782, -1.0973, -0.4132, 0.3251]) tensor([0.1618, 0.1176, 0.2330, 0.4876]) -Greedy action tensor([ 0.3551, -0.9626, 0.2203, -0.3052]) tensor([0.3762, 0.1007, 0.3287, 0.1944]) -Greedy action tensor([ 0.1087, -1.0674, 0.2792, 0.7763]) tensor([0.2250, 0.0694, 0.2669, 0.4387]) -Greedy action tensor([-0.4078, -2.4384, 0.0736, 0.1424]) tensor([0.2231, 0.0293, 0.3610, 0.3867]) -Greedy action tensor([-0.8431, -0.4556, -0.3652, 1.3867]) tensor([0.0747, 0.1101, 0.1205, 0.6947]) -Greedy action tensor([ 0.6244, -0.9552, 0.1824, -0.0533]) tensor([0.4244, 0.0874, 0.2727, 0.2155]) -Greedy action tensor([ 0.1124, -0.9950, 0.4247, 0.4840]) tensor([0.2411, 0.0797, 0.3295, 0.3497]) -Greedy action tensor([ 0.0894, -1.3350, -0.2229, 0.0795]) tensor([0.3375, 0.0812, 0.2470, 0.3342]) -Greedy action tensor([ 0.1243, -1.0704, 0.1484, 0.0856]) tensor([0.3040, 0.0921, 0.3114, 0.2925]) -Greedy action tensor([-0.1514, 0.0561, -0.2065, 0.7944]) tensor([0.1739, 0.2139, 0.1645, 0.4477]) -Greedy action tensor([-0.8443, -0.5052, -0.6027, 0.3181]) tensor([0.1455, 0.2042, 0.1852, 0.4651]) -Greedy action tensor([ 0.2371, -0.1571, 0.2115, -0.0225]) tensor([0.2924, 0.1971, 0.2850, 0.2255]) -Greedy action tensor([ 0.1703, 0.5033, 0.1023, -0.4471]) tensor([0.2585, 0.3606, 0.2415, 0.1394]) -Greedy action tensor([-0.2784, -2.8121, 0.2056, 0.3419]) tensor([0.2192, 0.0174, 0.3557, 0.4077]) -Greedy action tensor([ 0.0874, -0.7651, 0.3023, -1.0451]) tensor([0.3346, 0.1427, 0.4149, 0.1078]) -Greedy action tensor([ 1.1379, -0.5679, 1.4447, 1.1782]) tensor([0.2792, 0.0507, 0.3794, 0.2907]) -Greedy action tensor([ 1.1273, -0.0930, 1.3777, -0.1800]) tensor([0.3508, 0.1036, 0.4507, 0.0949]) -Greedy action tensor([ 0.7491, -1.6431, 1.2069, -0.4051]) tensor([0.3348, 0.0306, 0.5291, 0.1055]) -Greedy action tensor([0.6458, 0.0170, 0.2081, 1.4593]) tensor([0.2255, 0.1202, 0.1456, 0.5087]) -Greedy action tensor([-1.3622, -2.2984, 0.5495, 0.0597]) tensor([0.0813, 0.0319, 0.5499, 0.3369]) -Greedy action tensor([ 0.5797, -0.0499, 0.1570, 0.5155]) tensor([0.3199, 0.1704, 0.2096, 0.3000]) -Greedy action tensor([ 0.4551, -0.8950, -0.5431, -0.0995]) tensor([0.4541, 0.1177, 0.1674, 0.2608]) -Greedy action tensor([-1.0312, -0.6598, 0.4651, 0.2582]) tensor([0.0948, 0.1375, 0.4234, 0.3443]) -Greedy action tensor([ 1.1336, -0.0561, 0.3724, 0.7111]) tensor([0.4121, 0.1254, 0.1925, 0.2701]) -Greedy action tensor([ 0.0844, -0.2852, -0.2693, 0.7891]) tensor([0.2264, 0.1565, 0.1590, 0.4581]) -Greedy action tensor([0.2854, 0.4818, 0.5565, 0.0749]) tensor([0.2305, 0.2805, 0.3023, 0.1867]) -Greedy action tensor([-0.7677, -1.1019, -1.1721, -0.0651]) tensor([0.2272, 0.1626, 0.1516, 0.4586]) -Greedy action tensor([ 0.3431, -0.0164, -0.1833, 1.7936]) tensor([0.1526, 0.1065, 0.0901, 0.6508]) -Greedy action tensor([ 0.7679, 0.5099, 0.4662, -0.2520]) tensor([0.3481, 0.2689, 0.2574, 0.1255]) -Greedy action tensor([ 0.1917, -0.0502, -1.0849, 0.5986]) tensor([0.2804, 0.2202, 0.0782, 0.4212]) -Greedy action tensor([ 1.0794, 1.3452, 1.0029, -0.1720]) tensor([0.2843, 0.3709, 0.2634, 0.0813]) -Greedy action tensor([ 0.8529, -0.0395, 1.2118, 0.0631]) tensor([0.3035, 0.1243, 0.4345, 0.1378]) -Greedy action tensor([ 1.8545, -1.0744, 0.8694, 0.8098]) tensor([0.5622, 0.0301, 0.2099, 0.1978]) -Greedy action tensor([-0.7036, -1.5305, -0.3547, 1.6972]) tensor([0.0720, 0.0315, 0.1021, 0.7944]) -Greedy action tensor([-0.9812, -0.5976, -0.9355, -0.2475]) tensor([0.1787, 0.2622, 0.1870, 0.3721]) -Greedy action tensor([ 0.3219, -0.2437, 0.9941, -0.0137]) tensor([0.2358, 0.1339, 0.4617, 0.1686]) -Greedy action tensor([ 1.7408, -0.2363, 0.8946, 1.4553]) tensor([0.4312, 0.0597, 0.1850, 0.3241]) -Greedy action tensor([ 0.6246, -0.9998, 0.8476, 0.3375]) tensor([0.3128, 0.0616, 0.3909, 0.2347]) -Greedy action tensor([-0.9030, 0.0773, 0.0417, -0.1539]) tensor([0.1197, 0.3191, 0.3080, 0.2532]) -Greedy action tensor([-1.1355, 0.0904, 1.3828, -1.4550]) tensor([0.0570, 0.1943, 0.7073, 0.0414]) -Greedy action tensor([-0.4756, 0.1850, 1.3947, -0.4367]) tensor([0.0955, 0.1850, 0.6201, 0.0993]) -Greedy action tensor([-0.7286, -0.7591, 0.5471, -0.3636]) tensor([0.1430, 0.1387, 0.5122, 0.2060]) -Greedy action tensor([ 0.9867, -1.1446, 1.3008, 1.0299]) tensor([0.2831, 0.0336, 0.3876, 0.2956]) -Greedy action tensor([ 0.0868, -1.2388, 1.2663, 0.4238]) tensor([0.1689, 0.0449, 0.5495, 0.2366]) -Greedy action tensor([-0.1431, 0.8453, 1.0038, -0.3777]) tensor([0.1311, 0.3523, 0.4128, 0.1037]) -Greedy action tensor([-0.4873, -0.1834, 0.4586, -0.1881]) tensor([0.1593, 0.2158, 0.4101, 0.2148]) -Greedy action tensor([-0.2843, -0.1379, 1.6839, -0.2044]) tensor([0.0962, 0.1113, 0.6883, 0.1042]) -Greedy action tensor([ 0.4611, -2.1198, -0.1056, 0.0446]) tensor([0.4343, 0.0329, 0.2464, 0.2864]) -Greedy action tensor([-0.0385, -0.0994, 0.5947, -0.6317]) tensor([0.2285, 0.2150, 0.4303, 0.1262]) -Greedy action tensor([ 0.1480, -1.0300, 1.5013, -0.3370]) tensor([0.1726, 0.0531, 0.6680, 0.1063]) -Greedy action tensor([ 0.9659, 0.1630, 0.0369, -0.0181]) tensor([0.4511, 0.2021, 0.1782, 0.1686]) -Greedy action tensor([0.9277, 0.6313, 0.3493, 0.1524]) tensor([0.3617, 0.2689, 0.2028, 0.1666]) -Greedy action tensor([ 0.1279, -1.2778, 0.8230, -0.8275]) tensor([0.2752, 0.0675, 0.5515, 0.1059]) -Greedy action tensor([-0.2030, -0.3832, 0.0496, -0.0125]) tensor([0.2308, 0.1928, 0.2971, 0.2793]) -Greedy action tensor([1.0116, 0.2886, 0.5351, 1.1053]) tensor([0.3121, 0.1514, 0.1938, 0.3427]) -Greedy action tensor([ 0.4258, -0.7272, 0.7359, 0.0281]) tensor([0.2984, 0.0942, 0.4069, 0.2005]) -Greedy action tensor([-0.9134, -1.1748, -0.8354, 0.4691]) tensor([0.1463, 0.1126, 0.1582, 0.5829]) -Greedy action tensor([-1.4219, -1.9239, 0.8354, -0.6066]) tensor([0.0745, 0.0451, 0.7120, 0.1684]) -Greedy action tensor([ 0.0113, -0.0190, 0.0323, 0.8893]) tensor([0.1853, 0.1797, 0.1892, 0.4458]) -Greedy action tensor([-1.1613, -0.1787, -0.3741, -0.8339]) tensor([0.1378, 0.3682, 0.3028, 0.1912]) -Greedy action tensor([ 0.3689, -0.5397, 0.7056, -0.3928]) tensor([0.3058, 0.1233, 0.4282, 0.1428]) -Greedy action tensor([ 1.3745, -1.4278, 0.4965, 1.6576]) tensor([0.3567, 0.0216, 0.1483, 0.4734]) -Greedy action tensor([ 2.2410, -0.6743, 1.9706, 1.3377]) tensor([0.4499, 0.0244, 0.3433, 0.1823]) -Greedy action tensor([-0.3411, -1.1749, 0.5219, 0.3157]) tensor([0.1744, 0.0758, 0.4134, 0.3364]) -Greedy action tensor([ 0.5947, -1.3237, -0.3628, -0.5609]) tensor([0.5418, 0.0796, 0.2080, 0.1706]) -Greedy action tensor([ 0.7451, -0.9671, 0.5013, -0.5228]) tensor([0.4453, 0.0804, 0.3490, 0.1253]) -Greedy action tensor([ 1.4215, -0.3297, -0.2659, 0.3007]) tensor([0.5936, 0.1030, 0.1098, 0.1935]) -Greedy action tensor([ 1.4354, -0.6308, -0.1790, 0.2724]) tensor([0.6104, 0.0773, 0.1215, 0.1908]) -Greedy action tensor([ 2.1023, -0.3161, 0.0430, 1.0598]) tensor([0.6373, 0.0568, 0.0813, 0.2247]) -Greedy action tensor([ 0.7182, -0.1011, -0.3431, 0.1427]) tensor([0.4257, 0.1876, 0.1473, 0.2394]) -Greedy action tensor([ 1.6074, -0.9533, -0.1924, 0.6090]) tensor([0.6207, 0.0479, 0.1026, 0.2287]) -Greedy action tensor([ 1.2397, -0.0978, -0.2721, 0.1170]) tensor([0.5530, 0.1452, 0.1219, 0.1799]) -Greedy action tensor([ 1.7632, -0.7277, -0.2179, 0.2760]) tensor([0.6912, 0.0573, 0.0953, 0.1562]) -Greedy action tensor([ 1.3325, -0.0726, -0.3937, 0.3097]) tensor([0.5609, 0.1376, 0.0998, 0.2017]) -Greedy action tensor([ 0.9531, -0.5808, -0.4045, 0.2900]) tensor([0.5030, 0.1085, 0.1294, 0.2592]) -Greedy action tensor([ 0.8969, -0.3501, -0.0339, 0.2379]) tensor([0.4547, 0.1307, 0.1793, 0.2353]) -Greedy action tensor([ 2.0004, -0.5704, -0.4150, 0.8568]) tensor([0.6736, 0.0515, 0.0602, 0.2147]) -Greedy action tensor([ 1.1920, -0.6903, -0.0970, 0.4272]) tensor([0.5282, 0.0804, 0.1456, 0.2458]) -Greedy action tensor([ 0.8089, -0.2095, -0.5109, 0.1280]) tensor([0.4685, 0.1692, 0.1252, 0.2371]) -Greedy action tensor([ 1.2992, -0.6228, -0.2673, 0.2472]) tensor([0.5867, 0.0859, 0.1225, 0.2049]) -Greedy action tensor([ 2.4352, -0.3346, -0.7873, 0.3468]) tensor([0.8154, 0.0511, 0.0325, 0.1010]) -Greedy action tensor([ 0.6556, -0.2495, -0.1271, -0.0955]) tensor([0.4285, 0.1733, 0.1959, 0.2022]) -Greedy action tensor([ 1.6441, -0.3293, -0.2879, 0.5420]) tensor([0.6188, 0.0860, 0.0896, 0.2056]) -Greedy action tensor([ 1.0385, -0.3457, -0.7459, 0.1051]) tensor([0.5520, 0.1383, 0.0927, 0.2171]) -Greedy action tensor([ 1.0824, 0.1712, -1.0311, 0.4652]) tensor([0.4849, 0.1949, 0.0586, 0.2616]) -Greedy action tensor([ 1.0830, -0.3909, -0.1218, 0.3390]) tensor([0.4990, 0.1143, 0.1496, 0.2371]) -Greedy action tensor([ 1.4014, -0.2526, -0.4628, 0.2038]) tensor([0.6067, 0.1161, 0.0941, 0.1832]) -Greedy action tensor([ 1.6445, -0.2862, -0.7843, 0.9210]) tensor([0.5820, 0.0844, 0.0513, 0.2823]) -Greedy action tensor([ 0.7620, -0.3950, -0.5755, 0.9760]) tensor([0.3552, 0.1117, 0.0932, 0.4399]) -Greedy action tensor([ 0.8743, -0.4110, -0.3917, 0.4190]) tensor([0.4561, 0.1261, 0.1286, 0.2892]) -Greedy action tensor([ 0.7109, -0.5046, -0.3368, -0.3316]) tensor([0.5000, 0.1483, 0.1754, 0.1763]) -Greedy action tensor([ 1.8432, -0.6975, -0.3551, 0.5905]) tensor([0.6777, 0.0534, 0.0752, 0.1937]) -Greedy action tensor([ 1.4708, -0.1331, -0.2271, 0.1909]) tensor([0.6016, 0.1210, 0.1101, 0.1673]) -Greedy action tensor([ 1.3234, -0.2816, -1.0711, 0.4649]) tensor([0.5828, 0.1171, 0.0532, 0.2470]) -Greedy action tensor([ 1.7288, -0.4485, -0.4295, 0.2774]) tensor([0.6835, 0.0775, 0.0790, 0.1601]) -Greedy action tensor([ 1.7823, -0.0069, -0.1210, 0.1544]) tensor([0.6612, 0.1105, 0.0986, 0.1298]) -Greedy action tensor([ 1.1747, -0.5961, 0.1151, 0.0915]) tensor([0.5390, 0.0917, 0.1868, 0.1824]) -Greedy action tensor([ 1.8871, -0.4849, -0.6393, 0.2672]) tensor([0.7293, 0.0680, 0.0583, 0.1443]) -Greedy action tensor([ 1.8695, -0.5175, -0.3516, 0.3708]) tensor([0.7023, 0.0645, 0.0762, 0.1569]) -Greedy action tensor([ 1.6591, -0.3664, -0.8471, 0.6297]) tensor([0.6367, 0.0840, 0.0519, 0.2274]) -Greedy action tensor([ 1.1390, -0.7462, 0.0373, 0.1116]) tensor([0.5429, 0.0824, 0.1804, 0.1943]) -Greedy action tensor([ 2.5088, -1.4131, 0.0592, 0.8036]) tensor([0.7765, 0.0154, 0.0670, 0.1411]) -Greedy action tensor([ 1.6367, -0.3163, -0.3898, 0.9094]) tensor([0.5692, 0.0807, 0.0750, 0.2750]) -Greedy action tensor([ 1.7829, -0.5937, -0.5260, 0.4731]) tensor([0.6840, 0.0635, 0.0680, 0.1846]) -Greedy action tensor([ 2.2304, -0.4920, -0.7546, 0.2012]) tensor([0.8015, 0.0527, 0.0405, 0.1054]) -Greedy action tensor([ 1.6652, -0.8881, -0.3247, 0.6723]) tensor([0.6309, 0.0491, 0.0863, 0.2338]) -Greedy action tensor([ 1.8240, -0.1874, 0.0469, 0.0152]) tensor([0.6818, 0.0912, 0.1153, 0.1117]) -Greedy action tensor([ 0.5791, -0.0959, -0.1908, 0.1017]) tensor([0.3857, 0.1964, 0.1786, 0.2393]) -Greedy action tensor([ 0.9243, -0.1589, 0.1332, -0.0884]) tensor([0.4640, 0.1571, 0.2104, 0.1685]) -Greedy action tensor([ 0.8871, -0.5226, -0.0043, 0.2848]) tensor([0.4542, 0.1109, 0.1862, 0.2487]) -Greedy action tensor([ 1.7033, -0.2414, -1.0255, 0.1295]) tensor([0.7064, 0.1010, 0.0461, 0.1464]) -Greedy action tensor([ 1.2491, -0.6075, -0.2078, 0.3753]) tensor([0.5536, 0.0865, 0.1289, 0.2310]) -Greedy action tensor([ 1.0112, -0.1622, -0.3839, 0.5488]) tensor([0.4573, 0.1414, 0.1133, 0.2880]) -Greedy action tensor([ 1.1975, -0.9487, -0.0673, 0.1029]) tensor([0.5767, 0.0674, 0.1628, 0.1930]) -Greedy action tensor([ 1.7754, -0.6694, -0.2505, 0.3617]) tensor([0.6841, 0.0593, 0.0902, 0.1664]) -Greedy action tensor([ 0.5224, -0.2003, 0.0252, -0.2624]) tensor([0.3922, 0.1904, 0.2385, 0.1789]) -Greedy action tensor([ 1.6958, -0.4838, -0.4683, 0.8963]) tensor([0.5961, 0.0674, 0.0685, 0.2680]) -Greedy action tensor([ 0.7275, -0.0360, -0.0590, -0.3165]) tensor([0.4398, 0.2050, 0.2003, 0.1548]) -Greedy action tensor([ 1.0604, -0.2655, -0.1312, 0.3573]) tensor([0.4844, 0.1286, 0.1471, 0.2398]) -Greedy action tensor([ 0.7015, 0.0120, -0.1489, 0.4800]) tensor([0.3662, 0.1838, 0.1565, 0.2935]) -Greedy action tensor([ 1.0879, -0.2319, -0.3401, 0.5652]) tensor([0.4762, 0.1272, 0.1142, 0.2823]) -Greedy action tensor([ 1.3645, -0.4477, -0.9000, 0.2501]) tensor([0.6268, 0.1024, 0.0651, 0.2057]) -Greedy action tensor([ 1.1380, -0.2529, -0.2123, 0.0583]) tensor([0.5412, 0.1347, 0.1403, 0.1838]) -Greedy action tensor([ 1.5048, -0.7944, 0.0245, 0.5632]) tensor([0.5821, 0.0584, 0.1325, 0.2270]) -Greedy action tensor([ 1.6267, -0.1160, -0.8187, 0.6228]) tensor([0.6142, 0.1075, 0.0532, 0.2251]) -Greedy action tensor([ 1.4737, -0.2890, -0.5268, 0.3812]) tensor([0.6089, 0.1045, 0.0824, 0.2042]) -Greedy action tensor([ 1.2158, -0.6228, -0.1462, 0.3326]) tensor([0.5469, 0.0870, 0.1401, 0.2261]) -Greedy action tensor([0.6501, 0.0270, 0.0711, 0.1553]) tensor([0.3695, 0.1982, 0.2071, 0.2253]) -Greedy action tensor([ 1.0739, -0.6234, -0.0495, 0.3107]) tensor([0.5064, 0.0928, 0.1647, 0.2361]) -Greedy action tensor([ 0.7158, 0.0329, -0.1601, -0.1205]) tensor([0.4246, 0.2145, 0.1769, 0.1840]) -Greedy action tensor([2.5096, 0.8747, 0.4760, 0.0330]) tensor([0.7093, 0.1383, 0.0928, 0.0596]) -Greedy action tensor([ 1.7600, -0.6176, -0.3243, 0.6418]) tensor([0.6477, 0.0601, 0.0806, 0.2117]) -Greedy action tensor([ 1.0921, -0.2451, -0.6056, 0.1860]) tensor([0.5406, 0.1419, 0.0990, 0.2185]) -Greedy action tensor([ 0.7155, -0.2414, -0.0383, 0.1701]) tensor([0.4108, 0.1578, 0.1933, 0.2381]) -Greedy action tensor([ 1.3561, -0.2176, -0.7958, 0.2031]) tensor([0.6100, 0.1264, 0.0709, 0.1926]) -Greedy action tensor([ 1.5459, -0.4777, -0.6504, 0.5313]) tensor([0.6227, 0.0823, 0.0693, 0.2258]) -Greedy action tensor([ 1.6699, -0.2227, -0.6237, 0.5313]) tensor([0.6362, 0.0959, 0.0642, 0.2037]) -Greedy action tensor([ 3.3402, -1.8322, -0.3773, 0.8424]) tensor([0.8991, 0.0051, 0.0218, 0.0740]) -Greedy action tensor([ 0.4434, -0.4255, 0.0244, -0.0839]) tensor([0.3749, 0.1572, 0.2466, 0.2213]) -Greedy action tensor([ 1.7545, -0.8810, 0.1514, -0.0107]) tensor([0.6925, 0.0496, 0.1394, 0.1185]) -Greedy action tensor([ 1.1860, 0.2521, -0.1389, 0.1877]) tensor([0.4933, 0.1939, 0.1311, 0.1818]) -Greedy action tensor([ 1.5192, -0.5392, -0.2219, 0.3165]) tensor([0.6237, 0.0796, 0.1093, 0.1874]) -Greedy action tensor([ 0.9742, -0.2741, -0.0073, 0.2072]) tensor([0.4703, 0.1350, 0.1763, 0.2184]) -Greedy action tensor([ 1.4480, -0.4131, -0.3072, 0.0490]) tensor([0.6348, 0.0987, 0.1097, 0.1567]) -Greedy action tensor([ 1.5668, -0.4855, -0.5423, 0.3975]) tensor([0.6409, 0.0823, 0.0778, 0.1990]) -Greedy action tensor([ 1.0643, -0.3519, -0.3011, 0.2764]) tensor([0.5121, 0.1243, 0.1307, 0.2329]) -Greedy action tensor([ 1.1601, -0.1434, -0.1795, 0.3942]) tensor([0.5004, 0.1359, 0.1311, 0.2326]) -Greedy action tensor([ 1.5194, -0.8317, -0.2223, 0.7185]) tensor([0.5816, 0.0554, 0.1019, 0.2611]) -Greedy action tensor([-0.4135, -0.2417, 0.1527, 0.1529]) tensor([0.1751, 0.2079, 0.3085, 0.3085]) -Greedy action tensor([-1.2444, -0.5670, 0.3479, 0.3424]) tensor([0.0783, 0.1542, 0.3848, 0.3827]) -Greedy action tensor([-1.8597, -0.4291, 0.6248, -0.1410]) tensor([0.0439, 0.1838, 0.5272, 0.2451]) -Greedy action tensor([-1.3561, 0.1581, 0.5977, 0.3721]) tensor([0.0548, 0.2493, 0.3870, 0.3088]) -Greedy action tensor([-1.7952, -0.4021, 0.6172, -0.0389]) tensor([0.0455, 0.1832, 0.5078, 0.2635]) -Greedy action tensor([0.0615, 0.0740, 0.6752, 1.2578]) tensor([0.1395, 0.1413, 0.2577, 0.4615]) -Greedy action tensor([-0.5401, -0.4936, 0.2616, -0.1759]) tensor([0.1749, 0.1833, 0.3900, 0.2518]) -Greedy action tensor([-1.9443, -0.4494, 0.6676, -0.1803]) tensor([0.0401, 0.1789, 0.5467, 0.2342]) -Greedy action tensor([-1.9199, -0.4540, 0.6524, -0.1728]) tensor([0.0414, 0.1792, 0.5419, 0.2375]) -Greedy action tensor([-1.9006, -0.3738, 0.6334, -0.1507]) tensor([0.0417, 0.1921, 0.5260, 0.2401]) -Greedy action tensor([-1.5330, -0.1580, 0.5089, 0.1407]) tensor([0.0556, 0.2198, 0.4282, 0.2964]) -Greedy action tensor([-1.6850, -0.4804, 0.5711, 0.1339]) tensor([0.0499, 0.1664, 0.4762, 0.3076]) -Greedy action tensor([-1.1187, -0.5679, 0.2947, 0.1685]) tensor([0.0955, 0.1657, 0.3927, 0.3461]) -Greedy action tensor([-1.7843, -0.4669, 0.5681, -0.0855]) tensor([0.0483, 0.1803, 0.5075, 0.2640]) -Greedy action tensor([-1.9334, -0.4351, 0.6612, -0.1712]) tensor([0.0405, 0.1812, 0.5424, 0.2359]) -Greedy action tensor([-1.7410, -0.2448, 0.6279, -0.3575]) tensor([0.0497, 0.2217, 0.5306, 0.1981]) -Greedy action tensor([-1.9316, -0.4662, 0.6657, -0.1736]) tensor([0.0407, 0.1763, 0.5468, 0.2362]) -Greedy action tensor([-1.7531, -0.3998, 0.5638, -0.0792]) tensor([0.0491, 0.1902, 0.4986, 0.2621]) -Greedy action tensor([-1.7015, -0.2390, 0.5235, -0.1011]) tensor([0.0512, 0.2211, 0.4739, 0.2538]) -Greedy action tensor([-1.1960, -0.6192, 0.2940, 0.2073]) tensor([0.0886, 0.1577, 0.3932, 0.3605]) -Greedy action tensor([-1.8956, -0.4408, 0.6402, -0.1556]) tensor([0.0424, 0.1814, 0.5349, 0.2413]) -Greedy action tensor([-1.7889, -0.4260, 0.6014, -0.0958]) tensor([0.0470, 0.1838, 0.5135, 0.2557]) -Greedy action tensor([-0.5751, -0.4501, 1.1681, 1.6125]) tensor([0.0597, 0.0676, 0.3410, 0.5318]) -Greedy action tensor([-1.8639, -0.2986, 0.6017, -0.1311]) tensor([0.0431, 0.2061, 0.5071, 0.2437]) -Greedy action tensor([-1.6693, -0.4557, 0.5225, -0.0457]) tensor([0.0544, 0.1830, 0.4868, 0.2758]) -Greedy action tensor([-1.3143, 0.0963, 0.6134, 0.3870]) tensor([0.0573, 0.2348, 0.3938, 0.3140]) -Greedy action tensor([-1.9428, -0.4530, 0.6636, -0.1802]) tensor([0.0403, 0.1788, 0.5461, 0.2348]) -Greedy action tensor([-1.9254, -0.4051, 0.6524, -0.1678]) tensor([0.0407, 0.1864, 0.5366, 0.2363]) -Greedy action tensor([-1.7657, -0.1854, 0.5677, -0.0469]) tensor([0.0460, 0.2233, 0.4742, 0.2565]) -Greedy action tensor([-1.8217, -0.3578, 0.5892, -0.1139]) tensor([0.0455, 0.1966, 0.5069, 0.2510]) -Greedy action tensor([-1.2455, 0.0057, 0.3338, -0.0698]) tensor([0.0794, 0.2776, 0.3855, 0.2575]) -Greedy action tensor([-1.8516, -0.4318, 0.6170, -0.1367]) tensor([0.0444, 0.1839, 0.5247, 0.2470]) -Greedy action tensor([-1.7442, -0.3577, 0.6324, -0.0863]) tensor([0.0476, 0.1904, 0.5123, 0.2497]) -Greedy action tensor([-1.8848, -0.0521, 0.6018, -0.1986]) tensor([0.0405, 0.2534, 0.4873, 0.2188]) -Greedy action tensor([-1.3204, -0.5752, 0.3928, 0.0248]) tensor([0.0800, 0.1686, 0.4440, 0.3073]) -Greedy action tensor([-1.5574, -0.5588, 0.4593, 0.0848]) tensor([0.0610, 0.1656, 0.4583, 0.3151]) -Greedy action tensor([-1.7423, -0.1657, 0.5723, 0.0404]) tensor([0.0457, 0.2209, 0.4620, 0.2714]) -Greedy action tensor([-1.8185, -0.5122, 0.6915, -0.0310]) tensor([0.0435, 0.1607, 0.5356, 0.2601]) -Greedy action tensor([-0.7614, -0.5854, 0.1615, 0.3575]) tensor([0.1287, 0.1535, 0.3239, 0.3940]) -Greedy action tensor([-1.8435, -0.4651, 0.6124, -0.1290]) tensor([0.0451, 0.1789, 0.5256, 0.2504]) -Greedy action tensor([-1.8439, -0.4472, 0.6200, -0.1300]) tensor([0.0448, 0.1809, 0.5259, 0.2484]) -Greedy action tensor([-1.6042, -0.2214, 0.5938, 0.0658]) tensor([0.0518, 0.2065, 0.4666, 0.2752]) -Greedy action tensor([-1.9269, -0.4373, 0.6553, -0.1728]) tensor([0.0409, 0.1815, 0.5412, 0.2364]) -Greedy action tensor([-1.9244, -0.4520, 0.6566, -0.1638]) tensor([0.0410, 0.1788, 0.5417, 0.2385]) -Greedy action tensor([-1.8627, -0.4789, 1.0884, 0.5527]) tensor([0.0283, 0.1130, 0.5417, 0.3170]) -Greedy action tensor([-1.5915, -0.0512, 0.4971, 0.2907]) tensor([0.0492, 0.2298, 0.3976, 0.3234]) -Greedy action tensor([-1.8945, -0.4015, 0.6559, -0.1401]) tensor([0.0416, 0.1851, 0.5329, 0.2404]) -Greedy action tensor([-1.6533, -0.2548, 0.6176, -0.0067]) tensor([0.0502, 0.2032, 0.4862, 0.2604]) -Greedy action tensor([-1.9421, -0.4536, 0.6657, -0.1784]) tensor([0.0403, 0.1784, 0.5464, 0.2349]) -Greedy action tensor([-1.8817, -0.4215, 0.6413, -0.1257]) tensor([0.0424, 0.1828, 0.5291, 0.2457]) -Greedy action tensor([-1.7554, -0.5055, 0.5734, -0.0759]) tensor([0.0497, 0.1735, 0.5103, 0.2666]) -Greedy action tensor([-1.7588, -0.4128, 0.6631, -0.3239]) tensor([0.0492, 0.1892, 0.5548, 0.2068]) -Greedy action tensor([-1.3259, 0.1100, 0.6364, 0.4128]) tensor([0.0555, 0.2334, 0.3951, 0.3160]) -Greedy action tensor([-1.3794, -0.2514, 0.5047, 0.5156]) tensor([0.0577, 0.1784, 0.3799, 0.3841]) -Greedy action tensor([-1.9290, -0.4259, 0.6598, -0.1714]) tensor([0.0406, 0.1827, 0.5410, 0.2356]) -Greedy action tensor([-1.9013, -0.4430, 0.6472, -0.1588]) tensor([0.0420, 0.1806, 0.5374, 0.2400]) -Greedy action tensor([-1.8922, -0.4513, 0.6435, -0.1564]) tensor([0.0425, 0.1796, 0.5367, 0.2412]) -Greedy action tensor([-1.4140, -0.6755, 0.5176, 0.0927]) tensor([0.0689, 0.1443, 0.4757, 0.3111]) -Greedy action tensor([-1.8846, -0.4679, 0.6401, -0.1479]) tensor([0.0429, 0.1771, 0.5362, 0.2438]) -Greedy action tensor([-1.7464, -0.4095, 0.5746, -0.0585]) tensor([0.0490, 0.1866, 0.4993, 0.2651]) -Greedy action tensor([-1.9224, -0.3701, 0.6452, -0.1669]) tensor([0.0407, 0.1924, 0.5311, 0.2358]) -Greedy action tensor([-1.9265, -0.4125, 0.6547, -0.1690]) tensor([0.0407, 0.1851, 0.5381, 0.2361]) -Greedy action tensor([-1.7480, -0.3421, 0.5175, -0.0537]) tensor([0.0496, 0.2024, 0.4780, 0.2700]) -Greedy action tensor([-1.5088, -0.5352, 0.4452, 0.1106]) tensor([0.0635, 0.1681, 0.4479, 0.3205]) -Greedy action tensor([-1.4641, -0.2881, 0.5660, 0.2241]) tensor([0.0579, 0.1877, 0.4410, 0.3133]) -Greedy action tensor([-1.9315, -0.4442, 0.6612, -0.1710]) tensor([0.0406, 0.1798, 0.5432, 0.2363]) -Greedy action tensor([-1.9565, -0.8453, 0.0724, -0.6194]) tensor([0.0647, 0.1966, 0.4922, 0.2464]) -Greedy action tensor([-1.9149, -0.3595, 0.6250, -0.1768]) tensor([0.0415, 0.1965, 0.5260, 0.2359]) -Greedy action tensor([-1.8998, -0.3669, 0.6330, -0.1513]) tensor([0.0417, 0.1932, 0.5253, 0.2397]) -Greedy action tensor([-0.7701, 0.5627, -0.0148, -0.3239]) tensor([0.1179, 0.4470, 0.2509, 0.1842]) -Greedy action tensor([-0.8173, -0.0853, 0.8131, 1.1396]) tensor([0.0655, 0.1362, 0.3345, 0.4637]) -Greedy action tensor([-1.9199, -0.4411, 0.6565, -0.1660]) tensor([0.0411, 0.1805, 0.5408, 0.2376]) -Greedy action tensor([-1.5241, -0.3684, 0.5107, 0.2800]) tensor([0.0559, 0.1774, 0.4274, 0.3393]) -Greedy action tensor([-1.8945, -0.3309, 0.6288, -0.1386]) tensor([0.0416, 0.1987, 0.5188, 0.2409]) -Greedy action tensor([-0.7048, -0.0024, -0.3922, -0.2681]) tensor([0.1685, 0.3402, 0.2304, 0.2608]) -Greedy action tensor([-1.8174, -0.4381, 0.6007, -0.1213]) tensor([0.0462, 0.1835, 0.5185, 0.2519]) -Greedy action tensor([-1.8954, -0.4518, 0.6525, -0.1649]) tensor([0.0423, 0.1790, 0.5402, 0.2385]) -Greedy action tensor([-1.7511, -0.0072, 0.6657, -0.4225]) tensor([0.0461, 0.2635, 0.5164, 0.1740]) -Greedy action tensor([-1.9061, -0.4323, 0.6443, -0.1613]) tensor([0.0418, 0.1827, 0.5360, 0.2395]) -Greedy action tensor([-1.7686, -0.2789, 0.6275, -0.0682]) tensor([0.0457, 0.2026, 0.5016, 0.2501]) -Greedy action tensor([-1.5696, -0.4700, 0.7378, 0.4015]) tensor([0.0471, 0.1415, 0.4733, 0.3381]) -Greedy action tensor([ 0.3499, 0.0046, -0.0766, -0.0052]) tensor([0.3266, 0.2312, 0.2132, 0.2290]) -Greedy action tensor([ 0.8665, -0.6500, -0.1490, -0.6633]) tensor([0.5561, 0.1221, 0.2014, 0.1204]) -Greedy action tensor([ 0.5843, -0.1896, -0.0940, -0.3864]) tensor([0.4260, 0.1965, 0.2162, 0.1614]) -Greedy action tensor([ 0.8984, -0.7755, -0.0470, -0.6002]) tensor([0.5557, 0.1042, 0.2159, 0.1242]) -Greedy action tensor([ 0.8114, -0.4010, 0.0302, -0.1969]) tensor([0.4716, 0.1403, 0.2160, 0.1721]) -Greedy action tensor([ 0.4980, -0.3604, -0.0631, -0.4396]) tensor([0.4191, 0.1776, 0.2391, 0.1641]) -Greedy action tensor([ 0.8077, -0.4732, -0.1723, -0.3305]) tensor([0.5067, 0.1408, 0.1902, 0.1624]) -Greedy action tensor([ 0.5774, 0.0166, -0.0576, -0.0762]) tensor([0.3816, 0.2178, 0.2022, 0.1985]) -Greedy action tensor([ 0.2201, -0.1280, 0.0152, -0.2831]) tensor([0.3200, 0.2259, 0.2607, 0.1935]) -Greedy action tensor([ 1.2408, -1.1197, 0.1707, -0.6009]) tensor([0.6266, 0.0591, 0.2149, 0.0993]) -Greedy action tensor([ 0.1918, 0.2072, -0.1561, 0.2028]) tensor([0.2679, 0.2721, 0.1892, 0.2709]) -Greedy action tensor([ 0.6910, -0.4927, -0.0423, -0.3873]) tensor([0.4702, 0.1439, 0.2259, 0.1600]) -Greedy action tensor([ 0.9415, -0.4870, -0.1164, -0.1887]) tensor([0.5236, 0.1255, 0.1818, 0.1691]) -Greedy action tensor([ 0.8766, -0.4842, -0.0418, -0.3656]) tensor([0.5143, 0.1319, 0.2053, 0.1485]) -Greedy action tensor([ 0.5101, -0.1111, 0.1251, -0.3301]) tensor([0.3775, 0.2028, 0.2568, 0.1629]) -Greedy action tensor([ 0.5873, -0.1943, -0.0964, -0.0702]) tensor([0.4031, 0.1845, 0.2035, 0.2089]) -Greedy action tensor([ 0.6012, -0.0218, -0.1189, -0.0824]) tensor([0.3956, 0.2122, 0.1925, 0.1997]) -Greedy action tensor([ 0.8462, -0.3963, -0.1118, -0.2873]) tensor([0.5015, 0.1448, 0.1924, 0.1614]) -Greedy action tensor([ 0.7075, -0.5451, -0.1464, -0.6131]) tensor([0.5054, 0.1444, 0.2152, 0.1349]) -Greedy action tensor([ 0.3880, -0.1863, 0.0167, -0.2417]) tensor([0.3590, 0.2021, 0.2476, 0.1913]) -Greedy action tensor([ 0.4762, -0.1797, 0.0071, -0.0606]) tensor([0.3664, 0.1902, 0.2292, 0.2142]) -Greedy action tensor([ 0.7135, -0.8399, -0.1429, -0.2237]) tensor([0.4931, 0.1043, 0.2094, 0.1932]) -Greedy action tensor([ 0.3604, -0.2096, -0.1239, -0.4013]) tensor([0.3776, 0.2135, 0.2326, 0.1763]) -Greedy action tensor([ 0.6345, -0.2363, -0.0450, -0.2343]) tensor([0.4265, 0.1785, 0.2162, 0.1789]) -Greedy action tensor([ 0.7132, -0.4460, -0.0173, -0.0817]) tensor([0.4450, 0.1396, 0.2144, 0.2010]) -Greedy action tensor([ 0.4867, -0.1514, 0.0081, -0.0807]) tensor([0.3683, 0.1946, 0.2282, 0.2089]) -Greedy action tensor([ 0.6048, -0.4642, -0.0258, -0.3057]) tensor([0.4390, 0.1507, 0.2337, 0.1766]) -Greedy action tensor([ 0.6146, -0.3446, -0.0033, -0.3033]) tensor([0.4307, 0.1651, 0.2322, 0.1720]) -Greedy action tensor([ 0.6470, 0.1565, 0.0823, -0.5218]) tensor([0.4013, 0.2458, 0.2282, 0.1247]) -Greedy action tensor([ 0.7368, -0.6530, -0.1538, -0.4585]) tensor([0.5096, 0.1270, 0.2092, 0.1542]) -Greedy action tensor([ 0.5833, 0.2565, -0.0804, -0.1949]) tensor([0.3710, 0.2676, 0.1910, 0.1704]) -Greedy action tensor([ 0.9108, -0.6269, -0.1040, -0.3211]) tensor([0.5350, 0.1150, 0.1939, 0.1561]) -Greedy action tensor([ 0.5980, -0.2479, 0.0127, -0.2793]) tensor([0.4163, 0.1787, 0.2319, 0.1732]) -Greedy action tensor([ 1.0097, -0.6428, -0.1134, -0.4011]) tensor([0.5679, 0.1088, 0.1847, 0.1385]) -Greedy action tensor([ 0.7928, -0.7352, 0.0161, -0.2478]) tensor([0.4926, 0.1069, 0.2266, 0.1740]) -Greedy action tensor([ 0.3582, 0.1201, -0.1158, -0.0489]) tensor([0.3251, 0.2562, 0.2024, 0.2164]) -Greedy action tensor([ 0.1628, -0.1275, 0.1849, 0.0026]) tensor([0.2761, 0.2065, 0.2822, 0.2352]) -Greedy action tensor([ 0.4669, -0.0087, 0.0582, -0.0381]) tensor([0.3461, 0.2151, 0.2300, 0.2089]) -Greedy action tensor([ 0.7118, -0.1102, -0.0482, -0.3421]) tensor([0.4433, 0.1948, 0.2073, 0.1545]) -Greedy action tensor([ 0.6482, -0.3399, -0.1332, -0.1260]) tensor([0.4365, 0.1625, 0.1998, 0.2012]) -Greedy action tensor([ 1.4136, -0.4845, 0.2297, -0.6555]) tensor([0.6320, 0.0947, 0.1935, 0.0798]) -Greedy action tensor([ 0.9523, -0.5724, 0.0699, -0.3381]) tensor([0.5245, 0.1142, 0.2170, 0.1443]) -Greedy action tensor([ 0.6754, -0.3205, -0.1194, -0.4171]) tensor([0.4637, 0.1713, 0.2095, 0.1555]) -Greedy action tensor([ 0.5312, -0.3418, -0.0761, -0.0056]) tensor([0.3926, 0.1640, 0.2139, 0.2295]) -Greedy action tensor([ 1.0192, -0.6195, 0.0030, -0.2749]) tensor([0.5463, 0.1061, 0.1978, 0.1498]) -Greedy action tensor([ 0.4496, -0.2274, -0.0044, -0.2071]) tensor([0.3757, 0.1909, 0.2386, 0.1948]) -Greedy action tensor([ 0.4640, -0.4285, -0.0569, -0.5950]) tensor([0.4255, 0.1743, 0.2527, 0.1476]) -Greedy action tensor([ 0.5188, -0.2804, 0.0085, -0.3145]) tensor([0.4025, 0.1810, 0.2416, 0.1749]) -Greedy action tensor([ 0.4460, -0.0092, -0.1411, -0.2313]) tensor([0.3706, 0.2351, 0.2060, 0.1883]) -Greedy action tensor([ 0.9416, -0.6997, 0.0638, -0.5356]) tensor([0.5442, 0.1054, 0.2262, 0.1242]) -Greedy action tensor([ 0.8688, -0.5815, 0.0106, -0.4065]) tensor([0.5161, 0.1210, 0.2188, 0.1442]) -Greedy action tensor([ 0.6294, -0.4882, 0.1056, -0.3672]) tensor([0.4370, 0.1429, 0.2588, 0.1613]) -Greedy action tensor([ 0.4353, -0.4509, -0.1377, -0.1006]) tensor([0.3904, 0.1609, 0.2201, 0.2285]) -Greedy action tensor([ 0.4920, 0.2573, -0.1235, 0.0439]) tensor([0.3367, 0.2663, 0.1819, 0.2151]) -Greedy action tensor([ 0.8085, -0.2920, 0.0109, -0.3171]) tensor([0.4745, 0.1579, 0.2137, 0.1540]) -Greedy action tensor([0.0036, 0.6391, 0.0979, 0.3236]) tensor([0.1864, 0.3520, 0.2049, 0.2567]) -Greedy action tensor([ 0.4230, 0.0021, -0.0736, -0.1257]) tensor([0.3518, 0.2309, 0.2141, 0.2032]) -Greedy action tensor([ 0.6514, -0.3210, -0.0481, -0.2661]) tensor([0.4396, 0.1663, 0.2184, 0.1756]) -Greedy action tensor([ 0.4327, -0.0808, -0.1480, -0.0566]) tensor([0.3609, 0.2160, 0.2019, 0.2212]) -Greedy action tensor([ 0.8779, -0.6357, -0.0375, -0.3416]) tensor([0.5220, 0.1149, 0.2090, 0.1542]) -Greedy action tensor([ 0.3023, 0.2644, -0.1010, -0.0355]) tensor([0.2990, 0.2879, 0.1998, 0.2133]) -Greedy action tensor([ 0.7894, -0.2396, -0.2134, -0.7579]) tensor([0.5163, 0.1845, 0.1894, 0.1099]) -Greedy action tensor([ 0.9282, -0.8706, -0.0498, -0.3946]) tensor([0.5531, 0.0915, 0.2080, 0.1473]) -Greedy action tensor([ 0.2585, -0.0255, -0.0858, -0.3892]) tensor([0.3350, 0.2522, 0.2375, 0.1753]) -Greedy action tensor([ 0.1384, 0.5419, -0.1614, -0.1014]) tensor([0.2485, 0.3720, 0.1841, 0.1955]) -Greedy action tensor([ 0.1331, -0.2219, -0.0797, -0.0968]) tensor([0.3027, 0.2122, 0.2446, 0.2405]) -Greedy action tensor([ 0.3912, -0.2931, 0.2103, -0.3732]) tensor([0.3566, 0.1799, 0.2976, 0.1660]) -Greedy action tensor([ 0.5720, -0.6205, -0.1692, -0.2950]) tensor([0.4545, 0.1379, 0.2166, 0.1910]) -Greedy action tensor([ 0.5281, 0.0067, -0.0764, 0.0020]) tensor([0.3662, 0.2174, 0.2001, 0.2164]) -Greedy action tensor([ 0.6537, 0.1998, -0.1079, -0.2886]) tensor([0.4013, 0.2549, 0.1874, 0.1564]) -Greedy action tensor([ 0.6700, -0.3495, 0.0347, -0.3378]) tensor([0.4433, 0.1600, 0.2349, 0.1618]) -Greedy action tensor([ 0.6694, -0.4954, 0.2405, -0.4703]) tensor([0.4380, 0.1367, 0.2852, 0.1401]) -Greedy action tensor([ 0.6819, -0.4206, 0.0495, -0.3083]) tensor([0.4475, 0.1486, 0.2377, 0.1662]) -Greedy action tensor([ 0.6902, -0.2318, -0.0164, -0.1765]) tensor([0.4326, 0.1721, 0.2134, 0.1819]) -Greedy action tensor([ 0.5262, 0.2654, 0.0484, -0.1331]) tensor([0.3439, 0.2650, 0.2133, 0.1779]) -Greedy action tensor([ 1.0313, -0.7400, -0.0089, -0.5877]) tensor([0.5809, 0.0988, 0.2053, 0.1151]) -Greedy action tensor([ 0.3570, -0.1879, -0.0349, -0.2910]) tensor([0.3599, 0.2087, 0.2432, 0.1883]) -Greedy action tensor([ 0.5024, -0.1688, 0.1511, -0.4025]) tensor([0.3817, 0.1951, 0.2687, 0.1545]) -Greedy action tensor([ 0.8401, -0.6447, -0.0860, -0.5983]) tensor([0.5377, 0.1218, 0.2130, 0.1276]) -Greedy action tensor([ 0.9264, -0.9178, 0.0463, -0.3893]) tensor([0.5431, 0.0859, 0.2253, 0.1457]) -Greedy action tensor([ 0.7128, -0.3952, -0.1353, -0.1326]) tensor([0.4571, 0.1509, 0.1957, 0.1963]) -Greedy action tensor([ 1.0983, 0.6072, -0.5620, -0.4375]) tensor([0.4957, 0.3033, 0.0942, 0.1067]) -Greedy action tensor([-0.4310, -0.6665, 0.0425, -0.5897]) tensor([0.2353, 0.1860, 0.3779, 0.2008]) -Greedy action tensor([-0.3463, 0.2346, 0.2562, -0.4153]) tensor([0.1803, 0.3222, 0.3293, 0.1682]) -Greedy action tensor([ 0.1118, 0.0870, 0.1116, -0.4339]) tensor([0.2813, 0.2744, 0.2813, 0.1630]) -Greedy action tensor([-0.7038, -0.3057, -0.1016, -0.3886]) tensor([0.1759, 0.2619, 0.3212, 0.2410]) -Greedy action tensor([-0.2218, -0.6366, 0.4307, 0.6718]) tensor([0.1660, 0.1096, 0.3188, 0.4056]) -Greedy action tensor([ 0.3315, -1.3702, 1.0964, 1.2942]) tensor([0.1681, 0.0307, 0.3611, 0.4401]) -Greedy action tensor([ 0.5678, -1.4341, -0.2186, -0.1351]) tensor([0.4794, 0.0648, 0.2184, 0.2374]) -Greedy action tensor([ 0.1597, -0.9056, 0.6514, 0.7280]) tensor([0.2107, 0.0726, 0.3446, 0.3720]) -Greedy action tensor([-0.1924, -0.0548, 0.0458, 1.4280]) tensor([0.1180, 0.1355, 0.1498, 0.5967]) -Greedy action tensor([1.5667, 0.2886, 0.8926, 0.6002]) tensor([0.4611, 0.1285, 0.2350, 0.1754]) -Greedy action tensor([ 0.9046, -0.6070, -0.4264, 0.2799]) tensor([0.4950, 0.1092, 0.1308, 0.2650]) -Greedy action tensor([ 0.4810, 1.0336, 0.4526, -0.1007]) tensor([0.2343, 0.4071, 0.2277, 0.1309]) -Greedy action tensor([-0.5045, -1.3701, 1.6038, -0.3479]) tensor([0.0924, 0.0389, 0.7607, 0.1081]) -Greedy action tensor([ 0.5247, -0.1376, 0.3939, -0.3167]) tensor([0.3541, 0.1826, 0.3107, 0.1527]) -Greedy action tensor([ 0.9924, -0.4184, 0.6513, 0.8698]) tensor([0.3522, 0.0859, 0.2504, 0.3115]) -Greedy action tensor([-0.0279, -0.2522, -0.2339, 0.4590]) tensor([0.2358, 0.1885, 0.1919, 0.3838]) -Greedy action tensor([ 0.0677, -0.5072, -0.2922, -0.2109]) tensor([0.3314, 0.1865, 0.2313, 0.2508]) -Greedy action tensor([ 0.7549, 1.4643, 0.0236, -0.3955]) tensor([0.2611, 0.5307, 0.1256, 0.0826]) -Greedy action tensor([ 0.2111, -0.1439, 1.2304, 0.8689]) tensor([0.1562, 0.1095, 0.4328, 0.3015]) -Greedy action tensor([-0.3345, -0.3062, 1.1830, 0.0916]) tensor([0.1231, 0.1267, 0.5616, 0.1886]) -Greedy action tensor([-0.2533, -2.2900, 0.2612, -0.1057]) tensor([0.2524, 0.0329, 0.4222, 0.2925]) -Greedy action tensor([-1.2925, -2.2522, -0.1740, -0.4301]) tensor([0.1468, 0.0562, 0.4492, 0.3477]) -Greedy action tensor([-1.0887, -1.0319, 0.9735, -1.7123]) tensor([0.0956, 0.1012, 0.7519, 0.0513]) -Greedy action tensor([ 0.6175, -1.4231, 0.4315, 1.2340]) tensor([0.2623, 0.0341, 0.2178, 0.4859]) -Greedy action tensor([ 1.4949, -0.0794, -0.3432, 0.2033]) tensor([0.6093, 0.1262, 0.0970, 0.1675]) -Greedy action tensor([ 0.7709, -0.0724, -0.3655, 1.7579]) tensor([0.2255, 0.0970, 0.0724, 0.6051]) -Greedy action tensor([ 1.3435, -1.0100, -0.6441, 0.7899]) tensor([0.5534, 0.0526, 0.0758, 0.3181]) -Greedy action tensor([ 0.9016, -0.9778, -0.5307, -0.2078]) tensor([0.5810, 0.0887, 0.1387, 0.1916]) -Greedy action tensor([ 0.6928, -0.1036, 0.8504, -0.5180]) tensor([0.3425, 0.1545, 0.4010, 0.1021]) -Greedy action tensor([ 1.1847, 0.1560, 0.6597, -1.1543]) tensor([0.4889, 0.1748, 0.2892, 0.0471]) -Greedy action tensor([ 0.8546, -0.5518, -0.4613, 0.7611]) tensor([0.4125, 0.1011, 0.1107, 0.3757]) -Greedy action tensor([ 0.3810, -0.7425, 0.8016, 1.9336]) tensor([0.1321, 0.0429, 0.2011, 0.6239]) -Greedy action tensor([-0.0558, -0.6652, 0.3305, -0.9410]) tensor([0.2917, 0.1586, 0.4293, 0.1204]) -Greedy action tensor([ 0.4448, 0.2272, -0.2181, -1.1797]) tensor([0.3973, 0.3196, 0.2048, 0.0783]) -Greedy action tensor([-0.2566, -1.1758, 0.3438, -1.4322]) tensor([0.2833, 0.1130, 0.5163, 0.0874]) -Greedy action tensor([ 0.0222, -0.7067, -1.2245, 0.3442]) tensor([0.3175, 0.1532, 0.0913, 0.4381]) -Greedy action tensor([ 1.0752, -2.0399, 0.0099, 1.0046]) tensor([0.4309, 0.0191, 0.1485, 0.4015]) -Greedy action tensor([ 0.5024, -1.3687, 0.7885, 1.1273]) tensor([0.2297, 0.0354, 0.3058, 0.4291]) -Greedy action tensor([-0.4829, -1.3998, -0.1618, -0.2625]) tensor([0.2484, 0.0993, 0.3425, 0.3097]) -Greedy action tensor([ 1.7390, -0.9845, -0.5464, 0.1819]) tensor([0.7256, 0.0476, 0.0738, 0.1529]) -Greedy action tensor([-0.5180, -1.3297, 0.9207, -0.3630]) tensor([0.1465, 0.0650, 0.6174, 0.1710]) -Greedy action tensor([0.5761, 0.2560, 1.2429, 0.0277]) tensor([0.2352, 0.1708, 0.4581, 0.1359]) -Greedy action tensor([ 0.7478, -1.1587, 1.3056, -0.2640]) tensor([0.3068, 0.0456, 0.5360, 0.1116]) -Greedy action tensor([-1.0531, -1.1970, 0.2486, 0.1587]) tensor([0.1124, 0.0973, 0.4129, 0.3774]) -Greedy action tensor([-0.3220, 0.9847, -0.0786, 0.1752]) tensor([0.1313, 0.4852, 0.1675, 0.2159]) -Greedy action tensor([ 0.4635, -0.2664, 0.3247, -0.1885]) tensor([0.3480, 0.1677, 0.3029, 0.1813]) -Greedy action tensor([ 0.1987, 0.8053, -0.2093, -0.8307]) tensor([0.2593, 0.4756, 0.1724, 0.0926]) -Greedy action tensor([-0.9338, -0.8930, 1.2899, 0.1125]) tensor([0.0708, 0.0737, 0.6540, 0.2015]) -Greedy action tensor([ 0.9360, -0.1837, 0.6815, 0.8959]) tensor([0.3266, 0.1066, 0.2532, 0.3137]) -Greedy action tensor([-0.2796, -0.9029, -0.2437, 0.5094]) tensor([0.2095, 0.1123, 0.2171, 0.4611]) -Greedy action tensor([-0.9847, -0.6570, 0.9895, -0.4451]) tensor([0.0885, 0.1228, 0.6370, 0.1517]) -Greedy action tensor([ 0.2798, -0.3093, 0.3725, 0.8225]) tensor([0.2287, 0.1269, 0.2509, 0.3935]) -Greedy action tensor([1.4450, 0.7160, 0.2791, 0.9043]) tensor([0.4208, 0.2030, 0.1311, 0.2450]) -Greedy action tensor([-1.0267, -1.7384, -1.0317, 1.0243]) tensor([0.0975, 0.0478, 0.0970, 0.7577]) -Greedy action tensor([ 0.0894, -1.2319, -0.5420, 0.5689]) tensor([0.2929, 0.0781, 0.1558, 0.4732]) -Greedy action tensor([ 0.3638, 0.1642, -0.0987, -0.2611]) tensor([0.3351, 0.2745, 0.2110, 0.1794]) -Greedy action tensor([ 0.4077, -0.1970, -0.1245, 0.3465]) tensor([0.3253, 0.1777, 0.1911, 0.3060]) -Greedy action tensor([-0.7484, -0.5776, -0.7427, 0.7871]) tensor([0.1276, 0.1514, 0.1284, 0.5926]) -Greedy action tensor([ 0.3572, -2.2504, -0.2755, 0.5275]) tensor([0.3584, 0.0264, 0.1903, 0.4249]) -Greedy action tensor([ 7.8407e-01, -8.1538e-01, 1.0858e+00, 7.4029e-04]) tensor([0.3321, 0.0671, 0.4491, 0.1517]) -Greedy action tensor([-1.1520, 0.2136, -0.0864, 0.4356]) tensor([0.0787, 0.3082, 0.2283, 0.3848]) -Greedy action tensor([ 1.7794, -0.8708, 0.7258, 1.5751]) tensor([0.4475, 0.0316, 0.1560, 0.3648]) -Greedy action tensor([ 0.6381, -0.3266, 1.6000, -0.3755]) tensor([0.2293, 0.0874, 0.6001, 0.0832]) -Greedy action tensor([-0.4931, 0.0469, -0.9397, 0.4077]) tensor([0.1719, 0.2950, 0.1100, 0.4231]) -Greedy action tensor([ 0.4374, -1.6034, 0.3312, -0.3165]) tensor([0.4001, 0.0520, 0.3597, 0.1882]) -Greedy action tensor([ 0.7416, -1.4754, 2.5272, -1.3348]) tensor([0.1389, 0.0151, 0.8285, 0.0174]) -Greedy action tensor([-0.1230, -1.9481, -0.7246, -0.1095]) tensor([0.3673, 0.0592, 0.2013, 0.3723]) -Greedy action tensor([-1.8330, -1.3482, 0.5275, 0.0648]) tensor([0.0503, 0.0816, 0.5327, 0.3354]) -Greedy action tensor([ 0.4420, -0.8417, 0.2669, -0.8589]) tensor([0.4187, 0.1160, 0.3514, 0.1140]) -Greedy action tensor([-1.5402, 0.3849, -0.0624, 0.1853]) tensor([0.0560, 0.3840, 0.2455, 0.3145]) -Greedy action tensor([-0.6781, -1.1365, 0.2461, 0.5125]) tensor([0.1344, 0.0850, 0.3386, 0.4420]) -Greedy action tensor([ 0.1938, 0.2161, -0.2136, -0.1845]) tensor([0.2965, 0.3031, 0.1973, 0.2031]) -Greedy action tensor([-1.3525, -0.7519, -0.2633, 0.3435]) tensor([0.0889, 0.1621, 0.2642, 0.4848]) -Greedy action tensor([1.1336, 0.4623, 1.3522, 0.2232]) tensor([0.3167, 0.1618, 0.3940, 0.1274]) -Greedy action tensor([-2.0663, -0.3106, -0.3300, 0.7764]) tensor([0.0338, 0.1954, 0.1916, 0.5793]) -Greedy action tensor([0.0838, 0.2938, 0.2741, 0.3705]) tensor([0.2094, 0.2583, 0.2533, 0.2789]) -Greedy action tensor([ 0.1727, -1.7049, 1.6711, 0.6668]) tensor([0.1376, 0.0211, 0.6158, 0.2256]) -Greedy action tensor([ 0.6385, -0.3845, -0.0448, 0.2189]) tensor([0.3965, 0.1426, 0.2002, 0.2607]) -Greedy action tensor([-0.4648, -0.3747, -0.0851, -0.0556]) tensor([0.1976, 0.2162, 0.2888, 0.2974]) -Greedy action tensor([-1.3506, -0.6482, -0.8503, 0.1476]) tensor([0.1094, 0.2208, 0.1804, 0.4894]) -Greedy action tensor([ 1.3173, -0.1939, -0.8476, 0.7337]) tensor([0.5282, 0.1165, 0.0606, 0.2947]) -Greedy action tensor([ 1.5761, -0.6502, -0.2883, 0.3377]) tensor([0.6440, 0.0695, 0.0998, 0.1867]) -Greedy action tensor([ 1.3800, 0.0167, -0.5599, -0.1371]) tensor([0.6177, 0.1580, 0.0888, 0.1355]) -Greedy action tensor([ 2.1437, -0.1508, -1.0414, 0.8913]) tensor([0.7003, 0.0706, 0.0290, 0.2002]) -Greedy action tensor([ 1.0025, -0.2446, -0.1694, 0.1928]) tensor([0.4897, 0.1407, 0.1517, 0.2179]) -Greedy action tensor([ 1.8434, -1.0679, -0.1135, 0.6824]) tensor([0.6627, 0.0361, 0.0936, 0.2076]) -Greedy action tensor([ 1.3209, -0.4344, -0.4393, 0.4018]) tensor([0.5735, 0.0991, 0.0986, 0.2288]) -Greedy action tensor([ 0.8754, -0.2228, -0.3114, -0.1578]) tensor([0.5014, 0.1672, 0.1530, 0.1784]) -Greedy action tensor([ 1.5651, -0.2947, -0.7247, 0.2240]) tensor([0.6585, 0.1025, 0.0667, 0.1722]) -Greedy action tensor([ 0.4344, -0.1296, -0.2002, 0.2184]) tensor([0.3442, 0.1959, 0.1825, 0.2774]) -Greedy action tensor([ 1.6862, -0.5186, -0.3603, 0.4470]) tensor([0.6540, 0.0721, 0.0845, 0.1894]) -Greedy action tensor([ 1.5735, -0.3757, -0.2416, 0.1752]) tensor([0.6442, 0.0917, 0.1049, 0.1591]) -Greedy action tensor([ 1.1093, -0.4341, -0.3083, 0.3211]) tensor([0.5234, 0.1118, 0.1268, 0.2380]) -Greedy action tensor([ 1.6087, -0.5121, -0.2775, 0.1337]) tensor([0.6665, 0.0799, 0.1011, 0.1525]) -Greedy action tensor([ 0.2968, -0.3942, 0.0131, -0.0698]) tensor([0.3393, 0.1700, 0.2555, 0.2352]) -Greedy action tensor([ 1.5419, -0.2790, -0.8743, 0.5207]) tensor([0.6206, 0.1005, 0.0554, 0.2235]) -Greedy action tensor([ 1.6654, 0.1047, -0.4318, -0.1685]) tensor([0.6700, 0.1407, 0.0823, 0.1071]) -Greedy action tensor([ 1.4232, -0.2621, 0.0113, 0.3437]) tensor([0.5653, 0.1048, 0.1378, 0.1921]) -Greedy action tensor([ 0.8706, -0.4603, -0.2176, 0.4489]) tensor([0.4431, 0.1171, 0.1492, 0.2906]) -Greedy action tensor([ 1.4125, -0.1986, -0.3480, 0.5008]) tensor([0.5639, 0.1126, 0.0970, 0.2266]) -Greedy action tensor([ 1.1467, -0.1376, -0.3218, 0.1714]) tensor([0.5307, 0.1469, 0.1222, 0.2001]) -Greedy action tensor([ 1.3737, -0.0905, -0.4369, 0.1784]) tensor([0.5891, 0.1362, 0.0964, 0.1783]) -Greedy action tensor([ 1.0932, -0.1606, -0.0449, 0.0127]) tensor([0.5141, 0.1467, 0.1647, 0.1745]) -Greedy action tensor([ 0.8026, -0.3172, -0.1504, 0.0593]) tensor([0.4572, 0.1492, 0.1763, 0.2174]) -Greedy action tensor([ 2.4341, -1.4463, -0.2376, 0.3818]) tensor([0.8209, 0.0169, 0.0568, 0.1054]) -Greedy action tensor([ 1.5854, -0.4960, -0.9737, 0.6971]) tensor([0.6198, 0.0773, 0.0480, 0.2550]) -Greedy action tensor([ 0.3188, -0.5492, 0.0159, 0.0092]) tensor([0.3458, 0.1451, 0.2554, 0.2537]) -Greedy action tensor([ 1.3435, -0.5497, 0.1826, 0.0432]) tensor([0.5759, 0.0867, 0.1804, 0.1569]) -Greedy action tensor([ 1.1137, -0.2922, -0.3470, 0.1665]) tensor([0.5362, 0.1314, 0.1244, 0.2080]) -Greedy action tensor([ 0.9956, -0.2973, -0.2776, 0.3534]) tensor([0.4807, 0.1319, 0.1345, 0.2529]) -Greedy action tensor([ 1.3414, 0.4188, -0.2277, -0.5863]) tensor([0.5710, 0.2270, 0.1189, 0.0831]) -Greedy action tensor([ 1.4121, -0.8307, 0.0126, 0.3878]) tensor([0.5841, 0.0620, 0.1441, 0.2097]) -Greedy action tensor([ 1.1043, -0.1726, -0.7335, 0.2688]) tensor([0.5343, 0.1490, 0.0850, 0.2317]) -Greedy action tensor([ 1.7722, 0.0499, -0.4538, -0.3541]) tensor([0.7113, 0.1271, 0.0768, 0.0848]) -Greedy action tensor([ 2.2431, -0.5501, -0.3341, 0.4790]) tensor([0.7642, 0.0468, 0.0581, 0.1309]) -Greedy action tensor([ 1.9738, 0.2257, -0.3069, 0.4861]) tensor([0.6657, 0.1159, 0.0680, 0.1504]) -Greedy action tensor([ 2.2566, -0.9683, -0.3247, 0.6398]) tensor([0.7611, 0.0303, 0.0576, 0.1511]) -Greedy action tensor([ 1.0453, -0.0435, -0.7465, 0.0653]) tensor([0.5323, 0.1792, 0.0887, 0.1998]) -Greedy action tensor([ 1.2227, 0.1153, -0.4062, 0.2664]) tensor([0.5233, 0.1729, 0.1026, 0.2011]) -Greedy action tensor([ 1.6235, -0.3220, 0.0205, 0.4489]) tensor([0.6049, 0.0865, 0.1218, 0.1869]) -Greedy action tensor([ 1.8420, -0.5237, 0.2451, 0.5438]) tensor([0.6372, 0.0598, 0.1290, 0.1740]) -Greedy action tensor([ 1.5409, -0.5592, -0.2993, 0.4900]) tensor([0.6132, 0.0751, 0.0974, 0.2144]) -Greedy action tensor([ 1.4490, -0.8180, -0.3202, 0.5100]) tensor([0.6006, 0.0622, 0.1024, 0.2348]) -Greedy action tensor([ 1.2713, -0.1506, -0.1878, -0.3674]) tensor([0.5995, 0.1446, 0.1394, 0.1164]) -Greedy action tensor([ 0.9291, 0.0521, -0.3342, 0.2059]) tensor([0.4579, 0.1905, 0.1295, 0.2222]) -Greedy action tensor([ 0.6758, 0.1715, -0.5066, -0.0049]) tensor([0.4138, 0.2499, 0.1268, 0.2095]) -Greedy action tensor([ 1.5682, -0.4560, -0.3821, 0.3953]) tensor([0.6314, 0.0834, 0.0898, 0.1954]) -Greedy action tensor([ 1.3342, -0.1452, -0.3051, 0.6555]) tensor([0.5184, 0.1181, 0.1006, 0.2629]) -Greedy action tensor([ 0.2815, -0.4761, -0.0020, 0.0118]) tensor([0.3350, 0.1570, 0.2523, 0.2558]) -Greedy action tensor([ 0.9500, -0.3236, -0.5786, 0.0872]) tensor([0.5212, 0.1458, 0.1130, 0.2199]) -Greedy action tensor([ 1.9831, -0.5554, -0.5159, 0.9505]) tensor([0.6591, 0.0521, 0.0542, 0.2347]) -Greedy action tensor([ 1.2072, -0.3943, -0.5391, 0.3565]) tensor([0.5546, 0.1118, 0.0967, 0.2369]) -Greedy action tensor([ 0.8178, -0.3328, -0.0519, 0.0653]) tensor([0.4532, 0.1434, 0.1899, 0.2135]) -Greedy action tensor([ 2.4922, -0.7733, -0.4426, 0.5066]) tensor([0.8139, 0.0311, 0.0433, 0.1118]) -Greedy action tensor([ 1.4211, -0.1646, -0.1626, 0.2651]) tensor([0.5798, 0.1188, 0.1190, 0.1825]) -Greedy action tensor([ 1.9266, -0.3660, -0.4813, 0.2557]) tensor([0.7251, 0.0732, 0.0653, 0.1364]) -Greedy action tensor([ 2.9258, -1.2499, -0.0154, 1.2566]) tensor([0.7958, 0.0122, 0.0420, 0.1499]) -Greedy action tensor([ 1.2908, -0.2794, -0.1230, -0.1446]) tensor([0.5920, 0.1231, 0.1440, 0.1409]) -Greedy action tensor([ 1.3387, -0.2170, -0.3551, 0.2362]) tensor([0.5791, 0.1222, 0.1064, 0.1923]) -Greedy action tensor([ 1.5393, -0.3309, -0.6644, -0.0234]) tensor([0.6784, 0.1045, 0.0749, 0.1422]) -Greedy action tensor([ 2.0043, -1.1035, -0.6202, 0.6982]) tensor([0.7204, 0.0322, 0.0522, 0.1952]) -Greedy action tensor([ 2.4344, -1.4229, -0.2499, 0.5146]) tensor([0.8090, 0.0171, 0.0552, 0.1186]) -Greedy action tensor([ 1.2530, -0.6014, -0.1949, 0.2673]) tensor([0.5666, 0.0887, 0.1332, 0.2115]) -Greedy action tensor([ 1.6917, -0.3207, -0.3340, 0.4695]) tensor([0.6410, 0.0857, 0.0845, 0.1888]) -Greedy action tensor([ 2.2576, 0.2681, -0.2867, 0.5378]) tensor([0.7171, 0.0981, 0.0563, 0.1285]) -Greedy action tensor([ 1.5881, -0.7418, -0.1306, 0.4485]) tensor([0.6263, 0.0609, 0.1123, 0.2004]) -Greedy action tensor([ 1.7774, -0.4287, -0.4921, 0.1698]) tensor([0.7073, 0.0779, 0.0731, 0.1417]) -Greedy action tensor([ 2.1047, -0.1626, -1.0203, 0.1696]) tensor([0.7740, 0.0802, 0.0340, 0.1118]) -Greedy action tensor([ 1.5925, 0.0768, -0.3303, -0.0093]) tensor([0.6380, 0.1401, 0.0933, 0.1286]) -Greedy action tensor([ 1.6917, -0.6968, -0.0397, 0.0458]) tensor([0.6842, 0.0628, 0.1211, 0.1319]) -Greedy action tensor([1.7777, 0.4407, 0.2534, 0.1767]) tensor([0.5945, 0.1561, 0.1295, 0.1199]) -Greedy action tensor([ 1.4171, -0.6488, -0.5464, 0.8538]) tensor([0.5446, 0.0690, 0.0764, 0.3100]) -Greedy action tensor([ 1.9197, 0.0857, -0.2790, 0.5645]) tensor([0.6542, 0.1045, 0.0726, 0.1687]) -Greedy action tensor([ 2.6224, -1.2621, -0.1016, 0.3635]) tensor([0.8399, 0.0173, 0.0551, 0.0877]) -Greedy action tensor([ 1.4469, -0.7485, -0.2965, 0.5668]) tensor([0.5879, 0.0654, 0.1028, 0.2438]) -Greedy action tensor([ 1.8012, -0.6287, -0.4012, 0.2272]) tensor([0.7113, 0.0626, 0.0786, 0.1474]) -Greedy action tensor([ 1.9889, 0.0929, -0.2106, 0.4856]) tensor([0.6741, 0.1012, 0.0747, 0.1499]) -Greedy action tensor([ 1.3498, -0.6417, -0.2217, 0.2443]) tensor([0.5969, 0.0815, 0.1240, 0.1976]) -Greedy action tensor([ 1.4308, -0.0354, -0.5942, 0.4754]) tensor([0.5723, 0.1321, 0.0755, 0.2201]) -Greedy action tensor([ 1.1674, 0.1029, -0.0753, 0.4283]) tensor([0.4737, 0.1634, 0.1367, 0.2262]) -Greedy action tensor([ 1.2248, -0.0908, -0.8816, 0.2908]) tensor([0.5609, 0.1505, 0.0682, 0.2204]) -Greedy action tensor([ 0.4129, -0.0602, -0.0685, -0.0749]) tensor([0.3503, 0.2182, 0.2164, 0.2151]) -Greedy action tensor([ 0.7133, -0.5717, 0.0053, -0.2747]) tensor([0.4669, 0.1292, 0.2300, 0.1739]) -Greedy action tensor([ 0.9684, -0.4908, -0.3105, -0.5095]) tensor([0.5751, 0.1337, 0.1601, 0.1312]) -Greedy action tensor([ 0.5382, -0.1352, 0.1462, -0.3999]) tensor([0.3880, 0.1979, 0.2622, 0.1519]) -Greedy action tensor([ 0.7106, -0.8909, -0.0199, -0.3666]) tensor([0.4941, 0.0996, 0.2380, 0.1683]) -Greedy action tensor([ 0.2177, -0.1459, -0.0565, -0.2107]) tensor([0.3219, 0.2237, 0.2447, 0.2097]) -Greedy action tensor([ 1.1560, -0.7399, 0.0754, -0.4646]) tensor([0.5926, 0.0890, 0.2011, 0.1172]) -Greedy action tensor([ 0.6983, -0.6158, -0.1990, -0.3257]) tensor([0.4913, 0.1320, 0.2003, 0.1764]) -Greedy action tensor([ 0.6302, -0.3739, 0.0609, -0.4285]) tensor([0.4388, 0.1607, 0.2483, 0.1522]) -Greedy action tensor([ 0.8039, -0.3097, -0.0143, -0.4499]) tensor([0.4866, 0.1598, 0.2147, 0.1389]) -Greedy action tensor([ 0.8447, -0.5834, -0.0113, -0.3256]) tensor([0.5063, 0.1214, 0.2151, 0.1571]) -Greedy action tensor([ 0.5418, 0.3309, -0.0351, 0.1178]) tensor([0.3305, 0.2676, 0.1856, 0.2163]) -Greedy action tensor([ 0.9343, -0.7457, -0.0056, -0.2731]) tensor([0.5331, 0.0993, 0.2082, 0.1594]) -Greedy action tensor([ 0.8036, -1.1431, 0.2251, -0.7940]) tensor([0.5247, 0.0749, 0.2942, 0.1062]) -Greedy action tensor([ 0.7209, -0.2107, -0.1807, -0.0238]) tensor([0.4396, 0.1732, 0.1784, 0.2088]) -Greedy action tensor([ 0.6820, -0.4375, -0.0506, -0.2413]) tensor([0.4537, 0.1481, 0.2180, 0.1802]) -Greedy action tensor([ 0.6770, -0.1305, -0.1414, -0.2066]) tensor([0.4347, 0.1939, 0.1918, 0.1797]) -Greedy action tensor([ 0.9369, -0.2265, -0.2615, -0.3746]) tensor([0.5309, 0.1659, 0.1602, 0.1430]) -Greedy action tensor([ 0.6937, -0.8036, -0.1502, -0.2559]) tensor([0.4900, 0.1096, 0.2107, 0.1896]) -Greedy action tensor([ 0.6254, -0.0790, -0.0569, -0.1661]) tensor([0.4077, 0.2015, 0.2061, 0.1847]) -Greedy action tensor([ 0.3522, -0.4540, -0.1477, -0.0864]) tensor([0.3706, 0.1655, 0.2248, 0.2390]) -Greedy action tensor([ 0.4787, -0.3173, -0.1424, -0.1068]) tensor([0.3929, 0.1772, 0.2111, 0.2188]) -Greedy action tensor([ 0.5248, -0.2324, -0.0639, -0.1012]) tensor([0.3908, 0.1833, 0.2169, 0.2090]) -Greedy action tensor([ 0.6992, -0.5978, 0.1119, -0.3894]) tensor([0.4617, 0.1262, 0.2566, 0.1554]) -Greedy action tensor([ 1.1582, -0.5600, -0.0173, -0.5886]) tensor([0.6015, 0.1079, 0.1857, 0.1049]) -Greedy action tensor([ 1.1918, -0.6554, -0.1111, -0.4022]) tensor([0.6126, 0.0966, 0.1665, 0.1244]) -Greedy action tensor([ 0.8207, -0.3513, -0.3698, -0.3467]) tensor([0.5195, 0.1609, 0.1580, 0.1616]) -Greedy action tensor([ 0.6641, -0.5907, -0.2076, -0.2476]) tensor([0.4750, 0.1354, 0.1987, 0.1909]) -Greedy action tensor([ 1.0743, -0.6518, 0.0073, -0.3670]) tensor([0.5686, 0.1012, 0.1956, 0.1345]) -Greedy action tensor([ 0.4634, -0.5000, 0.0841, -0.4676]) tensor([0.4065, 0.1551, 0.2782, 0.1602]) -Greedy action tensor([ 0.2209, -0.0196, -0.1615, -0.1665]) tensor([0.3177, 0.2498, 0.2168, 0.2157]) -Greedy action tensor([ 0.6130, -0.2935, 0.0663, -0.2344]) tensor([0.4147, 0.1675, 0.2401, 0.1777]) -Greedy action tensor([ 1.1776, -0.6228, -0.0327, -0.2287]) tensor([0.5853, 0.0967, 0.1745, 0.1434]) -Greedy action tensor([ 1.0278, -0.5323, -0.2588, -0.5889]) tensor([0.5935, 0.1247, 0.1639, 0.1178]) -Greedy action tensor([ 0.9531, -0.4848, -0.0822, -0.2775]) tensor([0.5306, 0.1260, 0.1884, 0.1550]) -Greedy action tensor([ 0.8401, -0.6572, 0.1615, -0.5324]) tensor([0.5039, 0.1127, 0.2556, 0.1277]) -Greedy action tensor([ 0.9935, -0.5921, -0.0431, -0.2951]) tensor([0.5449, 0.1116, 0.1933, 0.1502]) -Greedy action tensor([ 0.5265, 0.1143, -0.1809, -0.5137]) tensor([0.3987, 0.2640, 0.1965, 0.1409]) -Greedy action tensor([ 0.1982, -0.0034, -0.1344, -0.0308]) tensor([0.3003, 0.2455, 0.2153, 0.2389]) -Greedy action tensor([ 0.7937, -0.6560, -0.1668, -0.5832]) tensor([0.5348, 0.1255, 0.2047, 0.1350]) -Greedy action tensor([0.3412, 0.1533, 0.0033, 0.0817]) tensor([0.3018, 0.2501, 0.2153, 0.2328]) -Greedy action tensor([ 0.3705, -0.1880, -0.1126, -0.2400]) tensor([0.3660, 0.2094, 0.2258, 0.1988]) -Greedy action tensor([ 0.8327, -0.5215, -0.1128, -0.1250]) tensor([0.4925, 0.1271, 0.1913, 0.1890]) -Greedy action tensor([ 0.6856, -0.5172, 0.0100, -0.3831]) tensor([0.4645, 0.1395, 0.2364, 0.1595]) -Greedy action tensor([ 0.5471, 0.1232, -0.1340, -0.2545]) tensor([0.3833, 0.2508, 0.1940, 0.1719]) -Greedy action tensor([ 0.7402, -0.3037, -0.0864, -0.1762]) tensor([0.4567, 0.1608, 0.1998, 0.1827]) -Greedy action tensor([ 0.8148, 0.0618, -0.0272, -0.2525]) tensor([0.4453, 0.2097, 0.1918, 0.1531]) -Greedy action tensor([ 0.4520, -0.0099, -0.1760, -0.0024]) tensor([0.3573, 0.2251, 0.1907, 0.2268]) -Greedy action tensor([ 0.8299, -0.4000, -0.0646, -0.1947]) tensor([0.4854, 0.1419, 0.1985, 0.1742]) -Greedy action tensor([ 0.6883, -0.5287, -0.0900, -0.2103]) tensor([0.4624, 0.1369, 0.2123, 0.1883]) -Greedy action tensor([ 0.6740, -0.2027, 0.0323, 0.1176]) tensor([0.3975, 0.1654, 0.2092, 0.2279]) -Greedy action tensor([ 0.5007, -0.0106, -0.0563, -0.0542]) tensor([0.3641, 0.2183, 0.2086, 0.2090]) -Greedy action tensor([ 0.8412, -0.6356, 0.0881, -0.4364]) tensor([0.5056, 0.1155, 0.2381, 0.1409]) -Greedy action tensor([ 0.5811, -0.7394, -0.0985, -0.2401]) tensor([0.4517, 0.1206, 0.2289, 0.1987]) -Greedy action tensor([ 0.8371, -0.8676, 0.0274, -0.5115]) tensor([0.5301, 0.0964, 0.2359, 0.1376]) -Greedy action tensor([ 0.9565, 0.1774, 0.0037, -0.1974]) tensor([0.4630, 0.2124, 0.1786, 0.1460]) -Greedy action tensor([ 0.9424, -0.5293, 0.1138, -0.6326]) tensor([0.5338, 0.1225, 0.2331, 0.1105]) -Greedy action tensor([ 0.5850, -0.1767, 0.0464, -0.3119]) tensor([0.4068, 0.1899, 0.2374, 0.1659]) -Greedy action tensor([ 1.0449, -0.4426, 0.0080, -0.2277]) tensor([0.5375, 0.1214, 0.1906, 0.1505]) -Greedy action tensor([ 0.9767, -0.6633, -0.0044, -0.5009]) tensor([0.5565, 0.1079, 0.2086, 0.1270]) -Greedy action tensor([ 0.8011, -0.7879, -0.1189, -0.2480]) tensor([0.5120, 0.1045, 0.2041, 0.1794]) -Greedy action tensor([ 0.4193, 0.0349, -0.0620, -0.1622]) tensor([0.3499, 0.2382, 0.2162, 0.1956]) -Greedy action tensor([ 0.9874, -0.7257, 0.0992, -0.6570]) tensor([0.5603, 0.1010, 0.2305, 0.1082]) -Greedy action tensor([ 0.9155, -0.5096, -0.1102, -0.1243]) tensor([0.5122, 0.1232, 0.1836, 0.1811]) -Greedy action tensor([ 0.5619, -0.2305, -0.0327, -0.3365]) tensor([0.4146, 0.1877, 0.2288, 0.1689]) -Greedy action tensor([ 0.7110, -0.5248, -0.0368, -0.1672]) tensor([0.4588, 0.1333, 0.2172, 0.1906]) -Greedy action tensor([ 0.2559, -0.0448, -0.0648, -0.2909]) tensor([0.3284, 0.2431, 0.2383, 0.1901]) -Greedy action tensor([ 0.5856, -0.3874, 0.1029, -0.3586]) tensor([0.4195, 0.1585, 0.2588, 0.1632]) -Greedy action tensor([ 0.6750, -0.7609, 0.1078, -0.7632]) tensor([0.4896, 0.1165, 0.2777, 0.1162]) -Greedy action tensor([ 0.6477, -0.2591, 0.0284, -0.6490]) tensor([0.4514, 0.1823, 0.2430, 0.1234]) -Greedy action tensor([ 0.4315, 0.0765, -0.0957, 0.0831]) tensor([0.3336, 0.2340, 0.1969, 0.2355]) -Greedy action tensor([ 0.4860, -0.3568, -0.0692, -0.0637]) tensor([0.3874, 0.1668, 0.2223, 0.2235]) -Greedy action tensor([ 0.9702, -0.3406, -0.2308, -0.8028]) tensor([0.5746, 0.1549, 0.1729, 0.0976]) -Greedy action tensor([ 1.1379, -0.8204, 0.0476, -0.3445]) tensor([0.5868, 0.0828, 0.1972, 0.1332]) -Greedy action tensor([ 0.3199, 0.0087, 0.0489, -0.6593]) tensor([0.3483, 0.2552, 0.2656, 0.1308]) -Greedy action tensor([ 1.1105, -0.4441, -0.2252, -0.2673]) tensor([0.5792, 0.1224, 0.1523, 0.1460]) -Greedy action tensor([ 0.7789, -0.4881, 0.1624, -0.4981]) tensor([0.4761, 0.1341, 0.2570, 0.1328]) -Greedy action tensor([ 1.0253, -0.4842, -0.0731, -0.5850]) tensor([0.5700, 0.1260, 0.1901, 0.1139]) -Greedy action tensor([ 0.4483, -0.2754, 0.0930, -0.1879]) tensor([0.3683, 0.1786, 0.2582, 0.1949]) -Greedy action tensor([ 0.9062, -0.3432, -0.0474, -0.1375]) tensor([0.4940, 0.1416, 0.1904, 0.1740]) -Greedy action tensor([ 0.5474, 0.0621, -0.0962, 0.0720]) tensor([0.3620, 0.2228, 0.1902, 0.2250]) -Greedy action tensor([-1.7728, -0.1493, 0.5514, -0.0666]) tensor([0.0459, 0.2326, 0.4688, 0.2527]) -Greedy action tensor([-1.9198, -0.4610, 0.6622, -0.1661]) tensor([0.0412, 0.1770, 0.5442, 0.2377]) -Greedy action tensor([-1.8963, -0.3277, 0.6281, -0.1373]) tensor([0.0415, 0.1992, 0.5182, 0.2410]) -Greedy action tensor([-1.1559, -0.6440, 0.5386, 0.3016]) tensor([0.0806, 0.1345, 0.4388, 0.3462]) -Greedy action tensor([-1.9189, -0.4367, 0.6549, -0.1655]) tensor([0.0412, 0.1812, 0.5399, 0.2377]) -Greedy action tensor([-1.9289, -0.4099, 0.6517, -0.1691]) tensor([0.0407, 0.1858, 0.5371, 0.2364]) -Greedy action tensor([-0.9195, -0.5274, 0.2215, 0.2256]) tensor([0.1142, 0.1691, 0.3576, 0.3591]) -Greedy action tensor([-1.8873, -0.4307, 0.6385, -0.1528]) tensor([0.0426, 0.1829, 0.5329, 0.2415]) -Greedy action tensor([-1.8403, -0.4623, 0.6207, -0.1231]) tensor([0.0449, 0.1783, 0.5265, 0.2503]) -Greedy action tensor([-1.7379, -0.5057, 0.6265, -0.0592]) tensor([0.0490, 0.1679, 0.5208, 0.2623]) -Greedy action tensor([-1.8410, -0.4403, 0.6699, -0.0848]) tensor([0.0432, 0.1752, 0.5317, 0.2500]) -Greedy action tensor([-1.6255, 0.3958, 0.3857, 0.0784]) tensor([0.0465, 0.3508, 0.3473, 0.2554]) -Greedy action tensor([-1.9015, -0.4090, 0.6526, -0.1294]) tensor([0.0413, 0.1839, 0.5316, 0.2432]) -Greedy action tensor([-1.3910, -0.1447, 0.5094, -0.5864]) tensor([0.0746, 0.2595, 0.4991, 0.1668]) -Greedy action tensor([-0.9849, -0.4765, 0.4898, 0.8502]) tensor([0.0752, 0.1250, 0.3286, 0.4712]) -Greedy action tensor([-1.9295, -0.4157, 0.6576, -0.1727]) tensor([0.0406, 0.1845, 0.5396, 0.2352]) -Greedy action tensor([-1.7096, -0.4211, 0.5719, -0.0480]) tensor([0.0508, 0.1843, 0.4974, 0.2676]) -Greedy action tensor([-1.8447, -0.4350, 0.6244, -0.1339]) tensor([0.0446, 0.1825, 0.5264, 0.2466]) -Greedy action tensor([-1.9304, -0.4211, 0.6570, -0.1719]) tensor([0.0406, 0.1837, 0.5400, 0.2357]) -Greedy action tensor([-1.7279, -0.5387, 0.4538, -0.0811]) tensor([0.0545, 0.1791, 0.4833, 0.2831]) -Greedy action tensor([-1.3915, -0.5769, 0.3657, 0.1576]) tensor([0.0727, 0.1641, 0.4212, 0.3421]) -Greedy action tensor([-1.8762, -0.4400, 0.6525, -0.1227]) tensor([0.0425, 0.1788, 0.5331, 0.2456]) -Greedy action tensor([-1.6491, -0.1314, 0.5865, -0.0020]) tensor([0.0497, 0.2269, 0.4651, 0.2582]) -Greedy action tensor([-1.8803, -0.4492, 0.6371, -0.1447]) tensor([0.0430, 0.1799, 0.5331, 0.2440]) -Greedy action tensor([-1.7154, -0.4547, 0.5540, -0.0485]) tensor([0.0513, 0.1809, 0.4962, 0.2716]) -Greedy action tensor([-1.8953, -0.4459, 0.6424, -0.1563]) tensor([0.0424, 0.1805, 0.5360, 0.2411]) -Greedy action tensor([-1.7045, -0.4987, 0.5385, -0.0068]) tensor([0.0520, 0.1737, 0.4901, 0.2841]) -Greedy action tensor([-1.9262, -0.4391, 0.6571, -0.1713]) tensor([0.0409, 0.1810, 0.5416, 0.2365]) -Greedy action tensor([-0.3850, 0.2127, 1.1187, 1.4508]) tensor([0.0736, 0.1338, 0.3311, 0.4615]) -Greedy action tensor([-1.7610, -0.4920, 0.5727, -0.1114]) tensor([0.0498, 0.1772, 0.5138, 0.2592]) -Greedy action tensor([-1.8654, -0.4768, 0.6206, -0.1364]) tensor([0.0441, 0.1769, 0.5302, 0.2487]) -Greedy action tensor([-1.7981, -0.2104, 0.5620, -0.1094]) tensor([0.0457, 0.2234, 0.4837, 0.2472]) -Greedy action tensor([-1.2602, -0.4494, 0.6114, 0.7314]) tensor([0.0586, 0.1318, 0.3806, 0.4291]) -Greedy action tensor([-1.9453, -0.4503, 0.6681, -0.1806]) tensor([0.0401, 0.1788, 0.5470, 0.2341]) -Greedy action tensor([-1.8878, -0.4322, 0.6384, -0.1517]) tensor([0.0426, 0.1827, 0.5329, 0.2418]) -Greedy action tensor([-1.1648, -0.5925, 0.2850, 0.2390]) tensor([0.0900, 0.1596, 0.3838, 0.3666]) -Greedy action tensor([-1.6209, -0.4972, 0.4845, -0.0053]) tensor([0.0577, 0.1776, 0.4741, 0.2905]) -Greedy action tensor([-1.9384, -0.4426, 0.6592, -0.1777]) tensor([0.0405, 0.1806, 0.5435, 0.2354]) -Greedy action tensor([-1.7818, -0.3349, 0.6535, -0.0672]) tensor([0.0450, 0.1912, 0.5138, 0.2499]) -Greedy action tensor([-1.7802, -0.3076, 0.6514, -0.0671]) tensor([0.0449, 0.1957, 0.5106, 0.2489]) -Greedy action tensor([-1.7708, -0.3732, 0.5694, -0.1065]) tensor([0.0483, 0.1953, 0.5013, 0.2550]) -Greedy action tensor([-1.5223, 0.3262, 0.3428, 0.0181]) tensor([0.0541, 0.3438, 0.3495, 0.2526]) -Greedy action tensor([-1.8518, -0.3655, 0.6143, -0.0994]) tensor([0.0435, 0.1925, 0.5128, 0.2512]) -Greedy action tensor([-1.9164, -0.4434, 0.6546, -0.1649]) tensor([0.0413, 0.1802, 0.5404, 0.2381]) -Greedy action tensor([-1.8167, -0.4742, 0.6012, -0.1377]) tensor([0.0467, 0.1788, 0.5241, 0.2504]) -Greedy action tensor([-1.5973, -0.5433, 0.6854, 0.1166]) tensor([0.0520, 0.1493, 0.5100, 0.2888]) -Greedy action tensor([-1.7948, -0.1927, 0.6088, -0.0696]) tensor([0.0442, 0.2192, 0.4887, 0.2480]) -Greedy action tensor([-1.7138, -0.3644, 0.6576, 0.1259]) tensor([0.0457, 0.1763, 0.4900, 0.2879]) -Greedy action tensor([-1.8883, -0.4598, 0.6363, -0.1607]) tensor([0.0429, 0.1792, 0.5362, 0.2416]) -Greedy action tensor([-1.3873, 0.1941, 0.3858, -0.1134]) tensor([0.0653, 0.3172, 0.3843, 0.2333]) -Greedy action tensor([-1.7072, -0.4888, 0.5764, -0.1640]) tensor([0.0530, 0.1792, 0.5199, 0.2480]) -Greedy action tensor([-0.8378, -0.3347, 0.1604, 0.3695]) tensor([0.1148, 0.1898, 0.3115, 0.3839]) -Greedy action tensor([-1.5673, -0.2076, 0.6158, -0.5005]) tensor([0.0600, 0.2336, 0.5322, 0.1743]) -Greedy action tensor([-1.8625, -0.4773, 0.6254, -0.1495]) tensor([0.0443, 0.1770, 0.5331, 0.2456]) -Greedy action tensor([-1.7452, -0.4361, 0.6998, -0.0252]) tensor([0.0458, 0.1697, 0.5285, 0.2560]) -Greedy action tensor([-1.9466, -0.4526, 0.6685, -0.1812]) tensor([0.0401, 0.1784, 0.5475, 0.2341]) -Greedy action tensor([-0.8233, -0.4797, 0.3254, 0.8104]) tensor([0.0936, 0.1319, 0.2951, 0.4793]) -Greedy action tensor([-0.8461, -0.0995, 0.3804, 0.5610]) tensor([0.0943, 0.1990, 0.3215, 0.3852]) -Greedy action tensor([-1.7547, 0.1640, 0.4981, -0.1017]) tensor([0.0443, 0.3021, 0.4219, 0.2316]) -Greedy action tensor([-1.9376, -0.4515, 0.6616, -0.1764]) tensor([0.0405, 0.1790, 0.5448, 0.2357]) -Greedy action tensor([-1.7515, 0.1299, 0.4868, -0.0906]) tensor([0.0450, 0.2956, 0.4223, 0.2371]) -Greedy action tensor([-1.5210, -0.5639, 0.5651, 0.1742]) tensor([0.0585, 0.1522, 0.4708, 0.3185]) -Greedy action tensor([-1.9018, -0.4035, 0.6460, -0.1565]) tensor([0.0417, 0.1866, 0.5329, 0.2388]) -Greedy action tensor([-1.9402, -0.4498, 0.6619, -0.1792]) tensor([0.0404, 0.1793, 0.5452, 0.2351]) -Greedy action tensor([-1.9277, -0.4422, 0.6548, -0.1724]) tensor([0.0409, 0.1808, 0.5415, 0.2368]) -Greedy action tensor([-1.6791, -0.3842, 0.5597, -0.1640]) tensor([0.0538, 0.1964, 0.5049, 0.2448]) -Greedy action tensor([-1.9107, -0.4435, 0.6812, -0.1407]) tensor([0.0407, 0.1766, 0.5437, 0.2390]) -Greedy action tensor([-1.7712, -0.0473, 0.5524, 0.0211]) tensor([0.0438, 0.2456, 0.4475, 0.2631]) -Greedy action tensor([-1.9249, -0.3663, 0.6436, -0.1657]) tensor([0.0406, 0.1931, 0.5302, 0.2360]) -Greedy action tensor([-1.8610, -0.4460, 0.6237, -0.1355]) tensor([0.0440, 0.1811, 0.5278, 0.2471]) -Greedy action tensor([-1.3140, -0.4146, 0.5694, -0.6395]) tensor([0.0834, 0.2049, 0.5481, 0.1636]) -Greedy action tensor([-1.8221, -0.4021, 0.5973, -0.1112]) tensor([0.0456, 0.1888, 0.5130, 0.2526]) -Greedy action tensor([-1.8175, -0.4533, 0.6010, -0.1306]) tensor([0.0464, 0.1816, 0.5212, 0.2508]) -Greedy action tensor([-1.1600, -0.5740, 0.2732, 0.3951]) tensor([0.0853, 0.1533, 0.3576, 0.4039]) -Greedy action tensor([-1.8828, -0.4204, 0.6325, -0.1479]) tensor([0.0428, 0.1848, 0.5297, 0.2427]) -Greedy action tensor([-1.8654, -0.4195, 0.6224, -0.1518]) tensor([0.0438, 0.1860, 0.5272, 0.2431]) -Greedy action tensor([-0.7267, -0.5191, 0.2258, 0.0389]) tensor([0.1434, 0.1765, 0.3717, 0.3084]) -Greedy action tensor([-1.8014, -0.4385, 0.5908, -0.1149]) tensor([0.0471, 0.1839, 0.5148, 0.2542]) -Greedy action tensor([-1.8802, -0.2359, 0.6041, -0.1263]) tensor([0.0418, 0.2162, 0.5008, 0.2413]) -Greedy action tensor([-1.8857, -0.4135, 0.6334, -0.1467]) tensor([0.0426, 0.1857, 0.5291, 0.2425]) -Greedy action tensor([-1.7299, -0.4445, 0.5707, -0.0485]) tensor([0.0501, 0.1811, 0.4998, 0.2691]) -Greedy action tensor([ 2.0027, -0.5271, -0.1144, 0.6643]) tensor([0.6838, 0.0545, 0.0823, 0.1794]) -Greedy action tensor([ 1.8596, -1.0617, -0.4283, 0.4989]) tensor([0.7083, 0.0382, 0.0719, 0.1817]) -Greedy action tensor([ 1.1709, -0.1107, -0.4578, 0.3821]) tensor([0.5186, 0.1440, 0.1017, 0.2357]) -Greedy action tensor([ 1.4701, -0.4081, -0.0962, 0.2880]) tensor([0.5994, 0.0916, 0.1252, 0.1838]) -Greedy action tensor([ 0.6816, -0.3038, -0.0341, 0.5318]) tensor([0.3672, 0.1371, 0.1795, 0.3161]) -Greedy action tensor([ 1.1234, -0.3829, -0.2275, 0.4330]) tensor([0.5045, 0.1119, 0.1307, 0.2530]) -Greedy action tensor([ 0.8767, -0.1708, -0.1659, 0.2623]) tensor([0.4456, 0.1563, 0.1571, 0.2410]) -Greedy action tensor([ 1.1763, -0.3298, -0.5965, 0.4296]) tensor([0.5360, 0.1189, 0.0910, 0.2541]) -Greedy action tensor([ 1.2573, -0.3776, -0.1648, 0.0216]) tensor([0.5791, 0.1129, 0.1397, 0.1683]) -Greedy action tensor([ 1.2269, -0.4526, -0.2903, 0.3329]) tensor([0.5510, 0.1027, 0.1208, 0.2254]) -Greedy action tensor([ 1.1409, -0.5788, -0.1212, 0.4759]) tensor([0.5060, 0.0906, 0.1432, 0.2602]) -Greedy action tensor([ 1.4603, -0.5188, -0.4110, 0.3039]) tensor([0.6224, 0.0860, 0.0958, 0.1958]) -Greedy action tensor([ 1.7629, -0.9651, -0.1636, 0.6891]) tensor([0.6440, 0.0421, 0.0938, 0.2201]) -Greedy action tensor([ 1.0719, -0.4347, -0.5398, 0.8572]) tensor([0.4488, 0.0995, 0.0896, 0.3621]) -Greedy action tensor([ 0.5855, -0.2178, 0.2224, 0.0204]) tensor([0.3688, 0.1652, 0.2565, 0.2096]) -Greedy action tensor([ 1.6380, -0.7826, -0.3148, 0.4361]) tensor([0.6530, 0.0580, 0.0926, 0.1963]) -Greedy action tensor([ 1.5876, -0.6208, -0.2460, 0.3703]) tensor([0.6387, 0.0702, 0.1021, 0.1891]) -Greedy action tensor([ 1.5420, -0.0924, -0.3943, 0.3963]) tensor([0.6034, 0.1177, 0.0870, 0.1919]) -Greedy action tensor([ 1.4408, 0.0351, -0.8059, 0.5267]) tensor([0.5708, 0.1400, 0.0604, 0.2288]) -Greedy action tensor([ 1.6459, -0.0971, -0.5997, 0.3768]) tensor([0.6402, 0.1120, 0.0678, 0.1800]) -Greedy action tensor([ 1.7191, -0.6897, -0.5647, 0.6843]) tensor([0.6464, 0.0581, 0.0659, 0.2297]) -Greedy action tensor([ 1.0099, -0.1890, -0.3366, -0.0099]) tensor([0.5202, 0.1569, 0.1353, 0.1876]) -Greedy action tensor([ 1.6734, -0.4190, -0.5608, 0.2604]) tensor([0.6785, 0.0837, 0.0727, 0.1651]) -Greedy action tensor([ 1.2360, -0.2884, -0.6527, 0.3554]) tensor([0.5607, 0.1221, 0.0848, 0.2324]) -Greedy action tensor([ 0.7560, -0.6658, 0.0280, -0.0584]) tensor([0.4615, 0.1113, 0.2228, 0.2044]) -Greedy action tensor([ 1.7291, -0.1075, -0.4691, 0.8948]) tensor([0.5867, 0.0935, 0.0651, 0.2547]) -Greedy action tensor([ 2.0212, -0.6616, -0.4573, 0.6152]) tensor([0.7156, 0.0489, 0.0600, 0.1754]) -Greedy action tensor([ 1.2024, -0.1150, -0.0051, 0.0453]) tensor([0.5316, 0.1424, 0.1589, 0.1671]) -Greedy action tensor([ 1.6492, -0.2738, 0.1224, 0.1023]) tensor([0.6344, 0.0927, 0.1378, 0.1351]) -Greedy action tensor([ 1.3370, -0.3576, -0.5878, 0.2106]) tensor([0.6047, 0.1111, 0.0882, 0.1960]) -Greedy action tensor([ 1.0086, -0.2230, 0.0658, 0.2238]) tensor([0.4678, 0.1365, 0.1822, 0.2134]) -Greedy action tensor([ 1.5781, -0.1112, -0.8743, 0.2928]) tensor([0.6463, 0.1193, 0.0556, 0.1787]) -Greedy action tensor([ 1.3361, -0.3950, -0.2924, 0.3070]) tensor([0.5778, 0.1023, 0.1134, 0.2065]) -Greedy action tensor([ 1.7479, -0.6907, -0.1181, 0.2205]) tensor([0.6853, 0.0598, 0.1061, 0.1488]) -Greedy action tensor([ 0.8971, 0.1589, 0.2308, -0.3908]) tensor([0.4410, 0.2108, 0.2265, 0.1217]) -Greedy action tensor([ 1.3661, -0.1915, -0.5128, 0.1534]) tensor([0.6021, 0.1268, 0.0920, 0.1791]) -Greedy action tensor([ 2.4529, -1.0677, -0.1588, 0.3096]) tensor([0.8195, 0.0242, 0.0602, 0.0961]) -Greedy action tensor([ 1.2610, -0.2151, -0.4161, 0.3861]) tensor([0.5457, 0.1247, 0.1020, 0.2275]) -Greedy action tensor([ 1.4255, -0.3251, -0.5013, 0.0452]) tensor([0.6366, 0.1106, 0.0927, 0.1601]) -Greedy action tensor([ 1.4809, -0.2546, -1.2507, 0.3325]) tensor([0.6416, 0.1131, 0.0418, 0.2035]) -Greedy action tensor([ 1.4042, -0.5805, -0.2776, 0.6716]) tensor([0.5543, 0.0762, 0.1031, 0.2664]) -Greedy action tensor([ 1.5085, -0.7970, -0.0161, 0.3070]) tensor([0.6180, 0.0616, 0.1345, 0.1859]) -Greedy action tensor([ 0.8435, -0.0673, -0.0791, -0.3211]) tensor([0.4736, 0.1905, 0.1882, 0.1478]) -Greedy action tensor([ 1.7809, -0.3272, -0.6326, 0.6342]) tensor([0.6542, 0.0795, 0.0586, 0.2078]) -Greedy action tensor([ 0.4724, -0.3520, -0.4118, 0.6705]) tensor([0.3257, 0.1428, 0.1345, 0.3970]) -Greedy action tensor([ 0.5058, -0.2341, 0.1955, -0.0888]) tensor([0.3620, 0.1728, 0.2654, 0.1998]) -Greedy action tensor([ 0.0788, -0.2180, 0.0901, 0.1510]) tensor([0.2611, 0.1941, 0.2641, 0.2807]) -Greedy action tensor([ 1.2038, -0.6490, -0.2670, 0.7214]) tensor([0.4990, 0.0782, 0.1147, 0.3081]) -Greedy action tensor([ 1.4322, -0.7912, -0.0671, 0.2317]) tensor([0.6125, 0.0663, 0.1368, 0.1844]) -Greedy action tensor([ 1.8277, -0.5713, -0.1283, 0.4336]) tensor([0.6756, 0.0613, 0.0955, 0.1676]) -Greedy action tensor([ 1.4311, -0.6477, -0.0404, 0.1894]) tensor([0.6084, 0.0761, 0.1397, 0.1758]) -Greedy action tensor([ 2.2247, -0.4880, -0.3964, 0.3892]) tensor([0.7701, 0.0511, 0.0560, 0.1228]) -Greedy action tensor([ 1.7680, -0.8204, -0.4432, 0.7662]) tensor([0.6444, 0.0484, 0.0706, 0.2366]) -Greedy action tensor([ 1.5778, -0.3713, -0.5974, 0.3018]) tensor([0.6514, 0.0928, 0.0740, 0.1818]) -Greedy action tensor([ 0.7815, -0.6750, -0.0883, 0.3937]) tensor([0.4291, 0.1000, 0.1798, 0.2911]) -Greedy action tensor([ 1.3059, 0.2050, -0.1780, 0.1277]) tensor([0.5356, 0.1781, 0.1214, 0.1649]) -Greedy action tensor([ 2.0150, -0.2506, -0.4492, 0.6121]) tensor([0.6970, 0.0723, 0.0593, 0.1714]) -Greedy action tensor([ 1.4826, -0.3684, -0.1425, -0.0838]) tensor([0.6399, 0.1005, 0.1260, 0.1336]) -Greedy action tensor([ 1.5638, -0.1542, -0.7352, 0.3041]) tensor([0.6396, 0.1148, 0.0642, 0.1815]) -Greedy action tensor([ 1.3918, -0.0275, -0.7057, 0.1959]) tensor([0.5999, 0.1451, 0.0736, 0.1814]) -Greedy action tensor([ 1.8257, 0.1044, -0.2366, 0.1011]) tensor([0.6737, 0.1205, 0.0857, 0.1201]) -Greedy action tensor([ 1.5809, -0.1814, -0.5162, 0.4266]) tensor([0.6212, 0.1066, 0.0763, 0.1958]) -Greedy action tensor([ 2.0403, 0.1482, -0.2083, 0.3253]) tensor([0.6963, 0.1050, 0.0735, 0.1253]) -Greedy action tensor([ 1.4588, -0.5121, -0.4786, -0.1057]) tensor([0.6700, 0.0933, 0.0965, 0.1402]) -Greedy action tensor([ 1.3005, -0.5913, -0.5154, 0.4966]) tensor([0.5678, 0.0856, 0.0924, 0.2541]) -Greedy action tensor([ 1.2422, -0.5887, -0.3521, 0.3619]) tensor([0.5624, 0.0901, 0.1142, 0.2332]) -Greedy action tensor([ 1.3776, -0.4839, -0.4453, 0.1021]) tensor([0.6264, 0.0974, 0.1012, 0.1750]) -Greedy action tensor([ 2.2237, -1.0252, -0.0183, 0.4284]) tensor([0.7627, 0.0296, 0.0810, 0.1267]) -Greedy action tensor([ 1.3656, -0.4010, -0.2105, 0.2283]) tensor([0.5888, 0.1006, 0.1218, 0.1888]) -Greedy action tensor([ 1.6819, -0.4527, -0.4684, 0.0892]) tensor([0.6954, 0.0823, 0.0810, 0.1414]) -Greedy action tensor([ 1.3905, -0.5560, -0.1511, 0.2543]) tensor([0.5960, 0.0851, 0.1276, 0.1913]) -Greedy action tensor([ 1.0734, -0.4770, -0.2018, 0.3286]) tensor([0.5086, 0.1079, 0.1421, 0.2415]) -Greedy action tensor([ 1.3660, -0.3203, -0.9816, 0.4642]) tensor([0.5929, 0.1098, 0.0567, 0.2406]) -Greedy action tensor([ 1.5422, -0.7346, -0.6219, 0.3921]) tensor([0.6519, 0.0669, 0.0749, 0.2064]) -Greedy action tensor([ 1.6686, -0.3928, -0.3799, 0.1402]) tensor([0.6789, 0.0864, 0.0875, 0.1472]) -Greedy action tensor([ 1.1150, -0.5900, -0.3559, 0.4808]) tensor([0.5150, 0.0936, 0.1183, 0.2731]) -Greedy action tensor([ 1.7173, -0.7147, -0.4000, 0.6058]) tensor([0.6505, 0.0572, 0.0783, 0.2141]) -Greedy action tensor([ 1.0130, 0.0380, -0.2868, -0.0928]) tensor([0.5049, 0.1904, 0.1376, 0.1671]) -Greedy action tensor([ 1.8844, -0.8654, -0.4282, 0.4013]) tensor([0.7195, 0.0460, 0.0712, 0.1633]) -Greedy action tensor([ 1.5167, -0.4874, -0.5380, 0.4322]) tensor([0.6246, 0.0842, 0.0800, 0.2112]) -Greedy action tensor([ 1.5702, -0.3480, -0.3428, 0.4085]) tensor([0.6221, 0.0914, 0.0918, 0.1947]) -Greedy action tensor([ 0.0769, -1.0940, -0.0624, 0.2219]) tensor([0.2997, 0.0929, 0.2608, 0.3465]) -Greedy action tensor([ 0.3408, -0.3539, -0.7706, 0.6908]) tensor([0.3080, 0.1537, 0.1013, 0.4370]) -Greedy action tensor([ 0.7184, -2.2053, 0.2529, -0.0403]) tensor([0.4652, 0.0250, 0.2920, 0.2178]) -Greedy action tensor([0.9691, 0.0405, 0.2346, 0.2148]) tensor([0.4264, 0.1685, 0.2046, 0.2005]) -Greedy action tensor([ 0.9543, -1.4211, -0.2510, 0.5167]) tensor([0.4906, 0.0456, 0.1470, 0.3168]) -Greedy action tensor([ 0.2611, -1.2652, 1.5175, -0.5839]) tensor([0.1938, 0.0421, 0.6808, 0.0833]) -Greedy action tensor([ 0.3022, -0.0275, 0.5146, -1.2253]) tensor([0.3152, 0.2267, 0.3897, 0.0684]) -Greedy action tensor([ 0.2876, 0.0629, -0.1971, 0.2075]) tensor([0.2996, 0.2393, 0.1845, 0.2765]) -Greedy action tensor([-1.3305, -0.0785, -1.2089, 0.6183]) tensor([0.0791, 0.2765, 0.0893, 0.5551]) -Greedy action tensor([ 0.9152, 0.0278, 0.7453, -0.1441]) tensor([0.3843, 0.1582, 0.3242, 0.1332]) -Greedy action tensor([-0.9493, -0.4503, 0.5701, 0.3099]) tensor([0.0931, 0.1534, 0.4255, 0.3280]) -Greedy action tensor([ 0.5847, -0.0985, -0.1073, -0.5003]) tensor([0.4267, 0.2155, 0.2136, 0.1442]) -Greedy action tensor([ 0.7278, -0.5011, 1.5668, 0.3473]) tensor([0.2331, 0.0682, 0.5394, 0.1593]) -Greedy action tensor([ 0.2080, -1.6657, -0.1757, 0.9674]) tensor([0.2518, 0.0387, 0.1715, 0.5380]) -Greedy action tensor([ 1.2887, 0.5714, -0.0178, 1.2069]) tensor([0.3731, 0.1821, 0.1010, 0.3438]) -Greedy action tensor([ 2.1269, -0.2779, 0.8210, 0.9629]) tensor([0.5976, 0.0540, 0.1619, 0.1866]) -Greedy action tensor([-0.1605, -0.0715, -1.1551, 0.0238]) tensor([0.2728, 0.2982, 0.1009, 0.3281]) -Greedy action tensor([0.6917, 0.3943, 1.2623, 0.1926]) tensor([0.2428, 0.1803, 0.4295, 0.1474]) -Greedy action tensor([-0.6231, 0.3705, -0.0247, -0.8316]) tensor([0.1579, 0.4266, 0.2873, 0.1282]) -Greedy action tensor([ 0.1118, -1.4217, -0.7313, 0.2778]) tensor([0.3538, 0.0763, 0.1522, 0.4176]) -Greedy action tensor([ 0.0406, -1.7517, 0.3018, 1.1503]) tensor([0.1819, 0.0303, 0.2362, 0.5517]) -Greedy action tensor([-0.2119, -0.4669, -0.0778, 1.1558]) tensor([0.1461, 0.1132, 0.1671, 0.5736]) -Greedy action tensor([-0.6421, -1.1886, 0.5517, 0.5774]) tensor([0.1210, 0.0701, 0.3993, 0.4097]) -Greedy action tensor([ 2.2055, 0.1559, -0.2611, -0.2031]) tensor([0.7671, 0.0988, 0.0651, 0.0690]) -Greedy action tensor([ 0.4334, 0.0204, 0.6944, -0.3191]) tensor([0.2915, 0.1928, 0.3784, 0.1373]) -Greedy action tensor([-0.6643, -0.7571, 1.4223, -0.9095]) tensor([0.0930, 0.0848, 0.7494, 0.0728]) -Greedy action tensor([ 1.8027, -0.3200, -0.8190, 1.1528]) tensor([0.5833, 0.0698, 0.0424, 0.3045]) -Greedy action tensor([ 1.3957, -0.2720, 0.1104, 1.2297]) tensor([0.4325, 0.0816, 0.1196, 0.3663]) -Greedy action tensor([-0.3681, 0.2731, 0.3735, 0.8107]) tensor([0.1212, 0.2302, 0.2545, 0.3941]) -Greedy action tensor([ 0.7220, -1.5065, 0.3873, 0.0419]) tensor([0.4292, 0.0462, 0.3071, 0.2174]) -Greedy action tensor([-0.0083, -0.5060, -0.5689, 0.8086]) tensor([0.2251, 0.1368, 0.1285, 0.5095]) -Greedy action tensor([ 0.1934, -0.6625, 0.2290, -0.1716]) tensor([0.3169, 0.1347, 0.3284, 0.2200]) -Greedy action tensor([ 0.0669, -1.2232, 1.1066, 0.2125]) tensor([0.1901, 0.0523, 0.5377, 0.2199]) -Greedy action tensor([ 1.1253, -0.7485, 1.7833, 0.6683]) tensor([0.2690, 0.0413, 0.5194, 0.1703]) -Greedy action tensor([ 1.1276, -0.3235, 0.3599, 0.4052]) tensor([0.4579, 0.1073, 0.2125, 0.2223]) -Greedy action tensor([-0.2405, -1.8622, 0.0454, 0.8968]) tensor([0.1771, 0.0350, 0.2357, 0.5522]) -Greedy action tensor([ 0.3006, -0.8932, -0.2399, 0.4407]) tensor([0.3294, 0.0998, 0.1919, 0.3789]) -Greedy action tensor([-0.3146, -0.9024, 0.0088, 0.3579]) tensor([0.2042, 0.1135, 0.2822, 0.4001]) -Greedy action tensor([-0.1236, -0.3871, 0.6712, -0.8746]) tensor([0.2245, 0.1725, 0.4971, 0.1059]) -Greedy action tensor([ 1.0337, -0.6819, -0.1414, 1.4440]) tensor([0.3338, 0.0600, 0.1031, 0.5031]) -Greedy action tensor([-0.7940, -0.0175, -0.8657, -0.2318]) tensor([0.1707, 0.3710, 0.1589, 0.2995]) -Greedy action tensor([ 0.1058, 0.6100, 0.5399, -0.0642]) tensor([0.1983, 0.3283, 0.3061, 0.1673]) -Greedy action tensor([ 0.0257, -1.1104, -1.0196, 0.6904]) tensor([0.2765, 0.0888, 0.0972, 0.5375]) -Greedy action tensor([-0.7427, -0.8426, 0.8482, 0.0493]) tensor([0.1109, 0.1003, 0.5441, 0.2447]) -Greedy action tensor([-0.9952, -0.7601, 1.8859, -1.0856]) tensor([0.0476, 0.0602, 0.8487, 0.0435]) -Greedy action tensor([-0.4466, -1.4980, 0.2711, -0.6260]) tensor([0.2361, 0.0825, 0.4840, 0.1974]) -Greedy action tensor([1.2143, 0.7946, 0.1740, 0.1775]) tensor([0.4228, 0.2779, 0.1494, 0.1499]) -Greedy action tensor([ 0.4485, 0.4760, 0.0746, -0.0405]) tensor([0.3004, 0.3087, 0.2067, 0.1842]) -Greedy action tensor([ 0.2161, 0.1341, 1.6168, -0.6605]) tensor([0.1564, 0.1441, 0.6345, 0.0651]) -Greedy action tensor([ 1.0394, -0.5891, 0.5471, 1.6085]) tensor([0.2798, 0.0549, 0.1710, 0.4943]) -Greedy action tensor([ 1.2016, -1.1785, -0.5222, -0.4361]) tensor([0.6824, 0.0632, 0.1217, 0.1327]) -Greedy action tensor([-0.8567, -1.0929, 0.1089, 0.9071]) tensor([0.0976, 0.0770, 0.2562, 0.5692]) -Greedy action tensor([-0.1013, 0.1033, -0.7432, 0.5340]) tensor([0.2155, 0.2644, 0.1134, 0.4067]) -Greedy action tensor([ 0.4626, -0.0934, 0.7877, 0.9735]) tensor([0.2163, 0.1240, 0.2993, 0.3604]) -Greedy action tensor([-0.2859, -0.5330, 0.0257, -0.2451]) tensor([0.2388, 0.1865, 0.3260, 0.2487]) -Greedy action tensor([-1.0894, -0.5418, -0.3715, -0.0250]) tensor([0.1302, 0.2252, 0.2670, 0.3776]) -Greedy action tensor([-0.9001, -0.3739, 0.2684, 0.1690]) tensor([0.1134, 0.1918, 0.3647, 0.3301]) -Greedy action tensor([ 0.1702, -0.0043, -1.0102, 0.7124]) tensor([0.2586, 0.2172, 0.0794, 0.4448]) -Greedy action tensor([-0.6689, -0.0611, 0.2857, -0.2106]) tensor([0.1425, 0.2618, 0.3703, 0.2254]) -Greedy action tensor([ 0.7416, -1.2916, -0.0520, -0.0924]) tensor([0.4957, 0.0649, 0.2242, 0.2153]) -Greedy action tensor([-0.0322, -1.3543, 0.3308, -0.0065]) tensor([0.2681, 0.0715, 0.3854, 0.2751]) -Greedy action tensor([0.2659, 0.4323, 0.4451, 0.3128]) tensor([0.2260, 0.2669, 0.2703, 0.2368]) -Greedy action tensor([ 1.1997, -0.2887, -0.1670, 0.4601]) tensor([0.5107, 0.1153, 0.1302, 0.2438]) -Greedy action tensor([-0.7858, 0.5818, -0.2905, 0.8061]) tensor([0.0871, 0.3420, 0.1429, 0.4280]) -Greedy action tensor([-1.4151, -0.4880, -1.1230, 0.1226]) tensor([0.1050, 0.2655, 0.1407, 0.4888]) -Greedy action tensor([ 0.4561, -0.6585, -0.1684, -0.5149]) tensor([0.4460, 0.1463, 0.2388, 0.1689]) -Greedy action tensor([ 1.1682, -0.9635, 1.5487, 0.5595]) tensor([0.3199, 0.0380, 0.4681, 0.1741]) -Greedy action tensor([-1.4082, -0.3962, 0.6647, -0.7763]) tensor([0.0736, 0.2026, 0.5853, 0.1385]) -Greedy action tensor([-0.7185, -0.7145, 0.3154, -0.4559]) tensor([0.1635, 0.1641, 0.4598, 0.2126]) -Greedy action tensor([-1.2097, 0.0277, -0.4195, -0.2646]) tensor([0.1084, 0.3737, 0.2389, 0.2790]) -Greedy action tensor([-0.1872, -1.6969, -0.6089, 0.2201]) tensor([0.2959, 0.0654, 0.1941, 0.4447]) -Greedy action tensor([0.4714, 0.2246, 0.6432, 0.6388]) tensor([0.2409, 0.1882, 0.2861, 0.2848]) -Greedy action tensor([ 0.2886, 0.1704, -0.3668, 0.3532]) tensor([0.2878, 0.2557, 0.1494, 0.3070]) -Greedy action tensor([ 1.5109, -0.4015, -0.7170, -0.3577]) tensor([0.7093, 0.1048, 0.0764, 0.1095]) -Greedy action tensor([-1.7010, -0.7423, -1.0582, 0.2377]) tensor([0.0803, 0.2093, 0.1526, 0.5578]) -Greedy action tensor([-0.6911, -1.0701, 0.8289, 0.0940]) tensor([0.1183, 0.0810, 0.5411, 0.2595]) -Greedy action tensor([ 0.3875, -0.1234, 0.8976, 0.4424]) tensor([0.2314, 0.1388, 0.3854, 0.2444]) -Greedy action tensor([-0.3914, -1.0925, 0.4294, -0.5602]) tensor([0.2168, 0.1075, 0.4926, 0.1831]) -Greedy action tensor([ 1.0694, 0.1203, 0.8087, -0.5009]) tensor([0.4227, 0.1636, 0.3257, 0.0879]) -Greedy action tensor([-0.4620, -1.1313, -0.7337, 0.3385]) tensor([0.2222, 0.1138, 0.1693, 0.4947]) -Greedy action tensor([ 1.8450, -0.4287, 0.9395, 1.3650]) tensor([0.4703, 0.0484, 0.1902, 0.2911]) -Greedy action tensor([ 0.0638, -1.0026, 0.3947, -0.5031]) tensor([0.3027, 0.1042, 0.4214, 0.1717]) -Greedy action tensor([ 0.7595, -0.4043, -0.0703, -0.2945]) tensor([0.4769, 0.1489, 0.2080, 0.1662]) -Greedy action tensor([ 0.4727, -0.3264, -0.1680, -0.3590]) tensor([0.4146, 0.1865, 0.2185, 0.1805]) -Greedy action tensor([ 0.8655, -0.3597, -0.0275, -0.3132]) tensor([0.4973, 0.1461, 0.2036, 0.1530]) -Greedy action tensor([ 0.3625, 0.2078, -0.0652, -0.1681]) tensor([0.3229, 0.2766, 0.2105, 0.1900]) -Greedy action tensor([ 0.1831, 0.2278, -0.0624, -0.5926]) tensor([0.3041, 0.3180, 0.2379, 0.1400]) -Greedy action tensor([ 1.1062, -0.6200, -0.0704, -0.3734]) tensor([0.5834, 0.1038, 0.1799, 0.1329]) -Greedy action tensor([ 0.8538, -0.5843, -0.1411, -0.1439]) tensor([0.5061, 0.1201, 0.1871, 0.1866]) -Greedy action tensor([ 1.0129, -0.9068, -0.0221, -0.3390]) tensor([0.5680, 0.0833, 0.2018, 0.1470]) -Greedy action tensor([ 1.1477, -0.7374, -0.0387, -0.7866]) tensor([0.6244, 0.0948, 0.1906, 0.0902]) -Greedy action tensor([ 0.7558, -0.6014, -0.0230, -0.3462]) tensor([0.4882, 0.1256, 0.2240, 0.1622]) -Greedy action tensor([ 1.1224, -0.6985, 0.0534, -0.5133]) tensor([0.5882, 0.0952, 0.2020, 0.1146]) -Greedy action tensor([ 0.5838, -0.3776, -0.0479, -0.4590]) tensor([0.4412, 0.1687, 0.2346, 0.1555]) -Greedy action tensor([ 0.7132, -0.5087, 0.0414, -0.3413]) tensor([0.4643, 0.1368, 0.2372, 0.1617]) -Greedy action tensor([ 0.8360, -0.3641, -0.0227, -0.3229]) tensor([0.4905, 0.1477, 0.2078, 0.1539]) -Greedy action tensor([ 0.6512, -0.3136, -0.0480, -0.1743]) tensor([0.4318, 0.1645, 0.2146, 0.1891]) -Greedy action tensor([ 0.5347, -0.4305, -0.0828, -0.0500]) tensor([0.4036, 0.1537, 0.2177, 0.2249]) -Greedy action tensor([ 0.0998, -0.1135, 0.0686, -0.2061]) tensor([0.2846, 0.2299, 0.2759, 0.2096]) -Greedy action tensor([ 0.6967, -0.4872, 0.2175, -0.6097]) tensor([0.4553, 0.1394, 0.2820, 0.1233]) -Greedy action tensor([ 0.4885, -0.3370, -0.0180, -0.2558]) tensor([0.3975, 0.1741, 0.2395, 0.1888]) -Greedy action tensor([ 0.7080, -0.1712, -0.2555, -0.1298]) tensor([0.4486, 0.1862, 0.1712, 0.1941]) -Greedy action tensor([ 0.9878, -0.4059, -0.2244, -0.2092]) tensor([0.5412, 0.1343, 0.1610, 0.1635]) -Greedy action tensor([ 0.3540, -0.2280, -0.0851, -0.1434]) tensor([0.3557, 0.1988, 0.2293, 0.2163]) -Greedy action tensor([ 0.6050, -0.4272, -0.0420, -0.0987]) tensor([0.4211, 0.1500, 0.2205, 0.2084]) -Greedy action tensor([ 0.7263, -0.3797, -0.0930, -0.5822]) tensor([0.4897, 0.1621, 0.2159, 0.1323]) -Greedy action tensor([ 0.7658, -0.2200, 0.0144, -0.0028]) tensor([0.4332, 0.1616, 0.2043, 0.2008]) -Greedy action tensor([ 0.8821, -0.5886, -0.1437, -0.3255]) tensor([0.5299, 0.1217, 0.1900, 0.1584]) -Greedy action tensor([ 0.8967, -0.6568, -0.1132, -0.6808]) tensor([0.5611, 0.1187, 0.2044, 0.1159]) -Greedy action tensor([ 1.2949, -0.6871, -0.0600, -0.5495]) tensor([0.6436, 0.0887, 0.1660, 0.1018]) -Greedy action tensor([ 0.5137, 0.3679, -0.1125, 0.1416]) tensor([0.3238, 0.2799, 0.1731, 0.2232]) -Greedy action tensor([ 0.8195, -0.5990, 0.1121, -0.6822]) tensor([0.5108, 0.1237, 0.2518, 0.1138]) -Greedy action tensor([ 1.0945, -0.5047, -0.0797, -0.7095]) tensor([0.5967, 0.1206, 0.1844, 0.0982]) -Greedy action tensor([ 0.9808, -0.8318, -0.0714, -0.6839]) tensor([0.5877, 0.0959, 0.2052, 0.1112]) -Greedy action tensor([ 0.6163, -0.4640, 0.0175, -0.3091]) tensor([0.4376, 0.1486, 0.2404, 0.1734]) -Greedy action tensor([ 1.1216, -0.6375, -0.1318, -0.6539]) tensor([0.6146, 0.1058, 0.1755, 0.1041]) -Greedy action tensor([ 0.8021, -0.1627, -0.0630, -0.1392]) tensor([0.4562, 0.1738, 0.1921, 0.1780]) -Greedy action tensor([ 0.2482, -0.0574, -0.1026, -0.4213]) tensor([0.3387, 0.2495, 0.2385, 0.1734]) -Greedy action tensor([ 0.5110, -0.3695, -0.0927, -0.2323]) tensor([0.4104, 0.1701, 0.2244, 0.1951]) -Greedy action tensor([ 0.7334, -0.4379, -0.0112, -0.3541]) tensor([0.4713, 0.1461, 0.2238, 0.1588]) -Greedy action tensor([ 0.7512, -0.7213, 0.0093, -0.3221]) tensor([0.4884, 0.1120, 0.2326, 0.1670]) -Greedy action tensor([ 0.5552, -0.3344, -0.1709, -0.2878]) tensor([0.4301, 0.1767, 0.2081, 0.1851]) -Greedy action tensor([ 0.7281, -0.1398, -0.0368, -0.3545]) tensor([0.4497, 0.1888, 0.2093, 0.1523]) -Greedy action tensor([ 0.5154, -0.1980, 0.0708, -0.0705]) tensor([0.3721, 0.1823, 0.2385, 0.2071]) -Greedy action tensor([ 0.5730, -0.1591, -0.1315, -0.4297]) tensor([0.4270, 0.2053, 0.2111, 0.1566]) -Greedy action tensor([-0.1248, 0.2119, -0.2265, -0.1867]) tensor([0.2357, 0.3300, 0.2129, 0.2215]) -Greedy action tensor([ 0.4159, -0.1587, -0.2775, -0.6260]) tensor([0.4140, 0.2330, 0.2069, 0.1460]) -Greedy action tensor([ 0.9217, -0.5515, -0.1608, -0.3255]) tensor([0.5390, 0.1235, 0.1826, 0.1549]) -Greedy action tensor([ 0.3805, -0.2650, -0.0309, -0.4309]) tensor([0.3800, 0.1993, 0.2519, 0.1688]) -Greedy action tensor([ 0.4184, -0.0852, -0.0666, -0.1110]) tensor([0.3560, 0.2152, 0.2192, 0.2097]) -Greedy action tensor([ 1.0160, -1.2392, -0.0782, -0.6565]) tensor([0.6145, 0.0644, 0.2057, 0.1154]) -Greedy action tensor([ 0.4889, -0.1541, -0.0148, -0.1432]) tensor([0.3757, 0.1975, 0.2270, 0.1997]) -Greedy action tensor([ 1.1883, -0.6827, -0.1549, -0.3434]) tensor([0.6131, 0.0944, 0.1600, 0.1325]) -Greedy action tensor([ 0.7871, -0.4742, 0.0264, -0.5025]) tensor([0.4936, 0.1398, 0.2307, 0.1359]) -Greedy action tensor([ 0.3404, -0.0715, -0.0246, -0.2225]) tensor([0.3417, 0.2264, 0.2372, 0.1946]) -Greedy action tensor([ 0.5285, -0.0069, -0.1769, 0.0050]) tensor([0.3743, 0.2191, 0.1849, 0.2217]) -Greedy action tensor([ 0.5546, -0.1532, 0.0064, -0.0546]) tensor([0.3825, 0.1885, 0.2211, 0.2080]) -Greedy action tensor([ 0.8894, -0.5588, 0.1323, -0.8285]) tensor([0.5309, 0.1248, 0.2490, 0.0953]) -Greedy action tensor([ 0.7230, -0.2966, -0.1075, -0.2157]) tensor([0.4571, 0.1649, 0.1992, 0.1788]) -Greedy action tensor([ 0.7508, -0.1845, -0.0232, -0.0497]) tensor([0.4343, 0.1704, 0.2003, 0.1950]) -Greedy action tensor([ 0.8486, -0.5252, 0.0578, -0.4185]) tensor([0.5030, 0.1273, 0.2281, 0.1416]) -Greedy action tensor([ 1.0443, -0.4043, -0.2115, -0.4296]) tensor([0.5718, 0.1343, 0.1629, 0.1310]) -Greedy action tensor([ 0.5330, -0.0106, -0.1324, 0.1599]) tensor([0.3593, 0.2086, 0.1847, 0.2474]) -Greedy action tensor([ 0.6658, -0.6456, 0.1159, -0.6874]) tensor([0.4751, 0.1280, 0.2741, 0.1228]) -Greedy action tensor([ 0.9324, -0.5992, -0.0176, -0.5014]) tensor([0.5431, 0.1174, 0.2100, 0.1295]) -Greedy action tensor([ 0.9106, -0.5585, -0.0640, -0.2825]) tensor([0.5234, 0.1204, 0.1975, 0.1587]) -Greedy action tensor([ 1.1801, -0.7340, -0.0275, -0.4683]) tensor([0.6102, 0.0900, 0.1824, 0.1174]) -Greedy action tensor([ 0.4527, -0.2549, -0.1441, -0.2054]) tensor([0.3904, 0.1924, 0.2150, 0.2022]) -Greedy action tensor([ 0.1957, 0.1352, -0.3114, -0.4057]) tensor([0.3235, 0.3045, 0.1948, 0.1773]) -Greedy action tensor([ 0.8184, -0.5391, 0.0967, -0.4296]) tensor([0.4925, 0.1267, 0.2393, 0.1414]) -Greedy action tensor([ 1.1826, -0.7260, -0.0674, -0.6297]) tensor([0.6257, 0.0928, 0.1793, 0.1022]) -Greedy action tensor([ 0.5110, -0.2508, -0.0358, -0.0941]) tensor([0.3858, 0.1801, 0.2233, 0.2107]) -Greedy action tensor([ 0.7306, -0.8024, 0.0181, -0.3727]) tensor([0.4907, 0.1059, 0.2406, 0.1628]) -Greedy action tensor([ 0.4845, 0.2245, -0.1177, 0.1845]) tensor([0.3269, 0.2520, 0.1790, 0.2421]) -Greedy action tensor([ 0.4513, -0.2864, -0.0543, -0.2266]) tensor([0.3862, 0.1847, 0.2330, 0.1961]) -Greedy action tensor([ 0.8194, -0.4189, -0.0742, -0.3686]) tensor([0.4990, 0.1447, 0.2042, 0.1521]) -Greedy action tensor([ 0.7373, -0.4155, 0.0227, -0.4573]) tensor([0.4744, 0.1498, 0.2322, 0.1437]) -Greedy action tensor([ 0.2036, 0.2486, 0.0514, -0.2263]) tensor([0.2813, 0.2942, 0.2415, 0.1830]) -Greedy action tensor([ 0.8950, -0.4040, -0.0633, -0.2847]) tensor([0.5092, 0.1389, 0.1953, 0.1565]) -Greedy action tensor([ 1.0157, -0.5937, -0.1101, -0.4208]) tensor([0.5675, 0.1135, 0.1841, 0.1349]) -Greedy action tensor([ 0.0877, 0.1181, -0.1683, -0.2886]) tensor([0.2864, 0.2953, 0.2217, 0.1966]) -Greedy action tensor([ 0.5785, -0.3854, -0.1648, -0.1217]) tensor([0.4249, 0.1621, 0.2021, 0.2110]) -Greedy action tensor([ 0.4112, -0.1910, 0.0013, -0.2035]) tensor([0.3633, 0.1990, 0.2412, 0.1965]) -Greedy action tensor([-1.8487, -0.4244, 0.6670, -0.1076]) tensor([0.0430, 0.1788, 0.5326, 0.2455]) -Greedy action tensor([ 0.5970, -0.0527, 0.0081, 0.5364]) tensor([0.3313, 0.1730, 0.1839, 0.3118]) -Greedy action tensor([-1.0568, 0.2250, 0.5281, 0.6510]) tensor([0.0667, 0.2402, 0.3253, 0.3678]) -Greedy action tensor([-1.9286, -0.4565, 0.6679, -0.1589]) tensor([0.0406, 0.1769, 0.5444, 0.2381]) -Greedy action tensor([-1.9396, -0.4595, 0.6685, -0.1775]) tensor([0.0403, 0.1772, 0.5475, 0.2350]) -Greedy action tensor([-1.8590, -0.3624, 0.6239, -0.1158]) tensor([0.0432, 0.1929, 0.5171, 0.2468]) -Greedy action tensor([-1.9192, -0.3923, 0.6479, -0.1681]) tensor([0.0410, 0.1887, 0.5341, 0.2362]) -Greedy action tensor([-1.9080, -0.4669, 0.6529, -0.1573]) tensor([0.0418, 0.1766, 0.5410, 0.2406]) -Greedy action tensor([-1.3187, -0.1989, 0.3176, 0.0176]) tensor([0.0769, 0.2356, 0.3949, 0.2926]) -Greedy action tensor([-1.8693, -0.4434, 0.6305, -0.1392]) tensor([0.0435, 0.1811, 0.5300, 0.2455]) -Greedy action tensor([-0.9482, -0.5606, 0.4011, -0.1734]) tensor([0.1177, 0.1734, 0.4536, 0.2554]) -Greedy action tensor([-1.3274, -0.5620, 0.4762, -0.1126]) tensor([0.0794, 0.1707, 0.4822, 0.2676]) -Greedy action tensor([-1.7794, -0.4519, 0.5829, -0.0860]) tensor([0.0480, 0.1811, 0.5097, 0.2611]) -Greedy action tensor([-1.9188, -0.3732, 0.6221, -0.1670]) tensor([0.0414, 0.1943, 0.5256, 0.2387]) -Greedy action tensor([-1.9328, -0.4509, 0.6584, -0.1734]) tensor([0.0407, 0.1792, 0.5435, 0.2366]) -Greedy action tensor([-1.4314, 0.2066, 0.3506, -0.0251]) tensor([0.0619, 0.3182, 0.3675, 0.2524]) -Greedy action tensor([-1.9308, -0.7430, 0.7908, -0.0836]) tensor([0.0387, 0.1270, 0.5887, 0.2456]) -Greedy action tensor([-1.9064, -0.3561, 0.6192, -0.1681]) tensor([0.0418, 0.1972, 0.5230, 0.2380]) -Greedy action tensor([-1.9275, -0.4180, 0.6556, -0.1679]) tensor([0.0407, 0.1841, 0.5387, 0.2364]) -Greedy action tensor([-1.8936, -0.4473, 0.6422, -0.1545]) tensor([0.0424, 0.1802, 0.5358, 0.2415]) -Greedy action tensor([-1.7961, -0.1281, 0.5654, -0.0984]) tensor([0.0447, 0.2370, 0.4742, 0.2441]) -Greedy action tensor([-0.5664, -0.5425, 0.2124, 0.2681]) tensor([0.1537, 0.1574, 0.3349, 0.3540]) -Greedy action tensor([-1.9277, -0.4484, 0.6710, -0.1647]) tensor([0.0405, 0.1780, 0.5451, 0.2363]) -Greedy action tensor([-1.8115, -0.4746, 0.5987, -0.0864]) tensor([0.0464, 0.1766, 0.5166, 0.2604]) -Greedy action tensor([-1.5041, -0.5275, 0.4411, 0.0364]) tensor([0.0653, 0.1734, 0.4567, 0.3047]) -Greedy action tensor([-1.6058, -0.3256, 0.5716, 0.0919]) tensor([0.0530, 0.1905, 0.4673, 0.2892]) -Greedy action tensor([-1.8669, -0.3451, 0.6288, -0.0856]) tensor([0.0423, 0.1937, 0.5130, 0.2511]) -Greedy action tensor([-1.9058, -0.4412, 0.6479, -0.1616]) tensor([0.0418, 0.1810, 0.5378, 0.2394]) -Greedy action tensor([-1.7676, -0.4651, 0.7068, 0.0833]) tensor([0.0436, 0.1605, 0.5181, 0.2777]) -Greedy action tensor([-1.5314, -0.4891, 0.4874, 0.0354]) tensor([0.0619, 0.1755, 0.4660, 0.2966]) -Greedy action tensor([-1.9274, -0.4254, 0.6571, -0.1695]) tensor([0.0407, 0.1829, 0.5401, 0.2363]) -Greedy action tensor([-1.9190, -0.4714, 0.6812, -0.1438]) tensor([0.0406, 0.1727, 0.5470, 0.2397]) -Greedy action tensor([-1.7495, -0.5097, 0.5631, -0.0482]) tensor([0.0499, 0.1724, 0.5041, 0.2736]) -Greedy action tensor([-1.8947, -0.4063, 0.6428, -0.1406]) tensor([0.0419, 0.1857, 0.5302, 0.2422]) -Greedy action tensor([-1.9222, -0.4130, 0.6517, -0.1665]) tensor([0.0409, 0.1852, 0.5370, 0.2369]) -Greedy action tensor([-1.8496, -0.4878, 0.6191, -0.1277]) tensor([0.0448, 0.1750, 0.5293, 0.2508]) -Greedy action tensor([-1.9291, -0.4457, 0.6608, -0.1687]) tensor([0.0407, 0.1795, 0.5429, 0.2368]) -Greedy action tensor([-1.8220, -0.4041, 0.6063, -0.1098]) tensor([0.0454, 0.1876, 0.5152, 0.2518]) -Greedy action tensor([-0.8195, -0.6040, 0.1951, 0.1329]) tensor([0.1317, 0.1634, 0.3634, 0.3415]) -Greedy action tensor([-1.8875, -0.4416, 0.6387, -0.1451]) tensor([0.0426, 0.1810, 0.5330, 0.2434]) -Greedy action tensor([-1.8482, -0.4709, 0.6143, -0.1073]) tensor([0.0446, 0.1770, 0.5238, 0.2546]) -Greedy action tensor([-1.0172, 0.6555, 0.0412, 0.2636]) tensor([0.0781, 0.4159, 0.2250, 0.2810]) -Greedy action tensor([-1.7915, -0.7224, 1.4709, 0.4920]) tensor([0.0251, 0.0731, 0.6555, 0.2463]) -Greedy action tensor([-1.8434, -0.3528, 0.6465, -0.0941]) tensor([0.0430, 0.1909, 0.5187, 0.2473]) -Greedy action tensor([-0.4006, -0.4692, 0.1772, 0.2580]) tensor([0.1771, 0.1653, 0.3155, 0.3421]) -Greedy action tensor([-1.7958, -0.4782, 0.6197, -0.0611]) tensor([0.0463, 0.1729, 0.5184, 0.2624]) -Greedy action tensor([-1.3351, 0.5995, 0.2447, 0.0666]) tensor([0.0594, 0.4111, 0.2883, 0.2413]) -Greedy action tensor([-1.9213, -0.4586, 0.6656, -0.1595]) tensor([0.0409, 0.1767, 0.5439, 0.2384]) -Greedy action tensor([-1.6682, -0.4718, 0.5313, -0.0428]) tensor([0.0543, 0.1797, 0.4900, 0.2760]) -Greedy action tensor([-1.8503, -0.5028, 0.6531, -0.0968]) tensor([0.0438, 0.1684, 0.5350, 0.2528]) -Greedy action tensor([-1.8849, -0.4123, 0.6354, -0.1390]) tensor([0.0425, 0.1854, 0.5285, 0.2436]) -Greedy action tensor([-1.3357, 0.0798, 0.2987, 0.0584]) tensor([0.0700, 0.2885, 0.3591, 0.2824]) -Greedy action tensor([-1.8528, -0.3873, 0.6147, -0.1368]) tensor([0.0441, 0.1909, 0.5198, 0.2452]) -Greedy action tensor([-0.6593, 1.0372, 0.0385, 0.4118]) tensor([0.0879, 0.4792, 0.1765, 0.2564]) -Greedy action tensor([-1.8598, -0.4242, 0.6206, -0.1341]) tensor([0.0439, 0.1846, 0.5248, 0.2467]) -Greedy action tensor([-0.7319, -0.4479, 0.8014, 1.3111]) tensor([0.0681, 0.0905, 0.3157, 0.5256]) -Greedy action tensor([-1.6245, -0.1214, 0.5358, 0.0487]) tensor([0.0513, 0.2306, 0.4448, 0.2733]) -Greedy action tensor([-1.4915, 0.6056, 0.3434, 0.1349]) tensor([0.0488, 0.3973, 0.3057, 0.2482]) -Greedy action tensor([-1.7816, -0.4936, 0.5694, -0.1256]) tensor([0.0491, 0.1781, 0.5155, 0.2573]) -Greedy action tensor([-1.8339, -0.4929, 0.6190, -0.1144]) tensor([0.0454, 0.1736, 0.5276, 0.2534]) -Greedy action tensor([-1.7314, -0.5363, 0.5601, -0.0583]) tensor([0.0512, 0.1692, 0.5066, 0.2729]) -Greedy action tensor([-1.6322, -0.3876, 0.4976, -0.0151]) tensor([0.0558, 0.1937, 0.4694, 0.2811]) -Greedy action tensor([-1.5314, -0.0865, 0.6541, 0.3915]) tensor([0.0477, 0.2022, 0.4241, 0.3261]) -Greedy action tensor([-1.8615, -0.2832, 0.6285, -0.1106]) tensor([0.0423, 0.2048, 0.5096, 0.2434]) -Greedy action tensor([-1.9164, -0.3955, 0.6515, -0.1547]) tensor([0.0409, 0.1873, 0.5336, 0.2382]) -Greedy action tensor([-1.9269, -0.4254, 0.6583, -0.1670]) tensor([0.0407, 0.1827, 0.5400, 0.2366]) -Greedy action tensor([-1.7894, -0.4253, 0.6698, -0.0868]) tensor([0.0453, 0.1771, 0.5293, 0.2484]) -Greedy action tensor([-1.8981, -0.4539, 0.6496, -0.1571]) tensor([0.0422, 0.1787, 0.5387, 0.2404]) -Greedy action tensor([-1.8927, -0.4740, 0.6408, -0.1459]) tensor([0.0426, 0.1761, 0.5368, 0.2445]) -Greedy action tensor([-1.9011, -0.4369, 0.6475, -0.1553]) tensor([0.0419, 0.1813, 0.5364, 0.2403]) -Greedy action tensor([-1.6598, -0.4471, 0.5359, -0.0170]) tensor([0.0540, 0.1816, 0.4853, 0.2792]) -Greedy action tensor([-1.3972, -0.5640, 0.3841, 0.1261]) tensor([0.0723, 0.1664, 0.4295, 0.3318]) -Greedy action tensor([-1.8989, -0.3574, 0.6376, -0.1468]) tensor([0.0415, 0.1941, 0.5249, 0.2395]) -Greedy action tensor([-1.8416, -0.2281, 0.5893, -0.1180]) tensor([0.0435, 0.2183, 0.4944, 0.2437]) -Greedy action tensor([-1.1869, -0.0074, 0.4768, 0.3633]) tensor([0.0702, 0.2283, 0.3706, 0.3308]) -Greedy action tensor([-1.5996, -0.1788, 0.6567, -0.5818]) tensor([0.0573, 0.2372, 0.5470, 0.1585]) -Greedy action tensor([-0.6376, -0.4548, 0.3152, 0.0644]) tensor([0.1468, 0.1763, 0.3807, 0.2963]) -Greedy action tensor([-1.7746, -0.4450, 0.5925, -0.1670]) tensor([0.0489, 0.1849, 0.5219, 0.2442]) -Greedy action tensor([-1.9130, -0.4463, 0.6705, -0.1601]) tensor([0.0411, 0.1780, 0.5439, 0.2370]) -Greedy action tensor([-1.7897, -0.4654, 0.6237, 0.0303]) tensor([0.0452, 0.1701, 0.5054, 0.2792]) -Greedy action tensor([-1.9150, -0.4031, 0.6414, -0.1738]) tensor([0.0414, 0.1880, 0.5342, 0.2364]) -Greedy action tensor([ 1.1624, -0.3513, -0.1483, 0.1476]) tensor([0.5399, 0.1188, 0.1456, 0.1957]) -Greedy action tensor([ 1.2611, -0.4352, -0.3159, -0.0240]) tensor([0.6000, 0.1100, 0.1240, 0.1660]) -Greedy action tensor([ 0.6940, -0.0650, -0.3702, 0.2939]) tensor([0.4027, 0.1885, 0.1389, 0.2699]) -Greedy action tensor([ 2.5625, -0.6124, -0.6493, 0.6819]) tensor([0.8100, 0.0339, 0.0326, 0.1235]) -Greedy action tensor([ 1.3589, -0.2789, -0.3644, 0.4039]) tensor([0.5689, 0.1106, 0.1015, 0.2189]) -Greedy action tensor([ 1.0237, -0.0042, 0.0247, 0.1200]) tensor([0.4692, 0.1679, 0.1728, 0.1901]) -Greedy action tensor([ 0.9653, -0.5203, -0.1387, 0.2662]) tensor([0.4866, 0.1102, 0.1613, 0.2419]) -Greedy action tensor([ 1.3881, -0.8092, -0.2822, 0.5588]) tensor([0.5762, 0.0640, 0.1084, 0.2514]) -Greedy action tensor([ 1.8147, -1.0017, -0.3603, 0.0423]) tensor([0.7444, 0.0445, 0.0846, 0.1265]) -Greedy action tensor([ 1.3476, -0.5785, -0.2299, 0.3750]) tensor([0.5779, 0.0842, 0.1193, 0.2185]) -Greedy action tensor([ 1.5706, -1.0178, 0.1027, 0.4658]) tensor([0.6110, 0.0459, 0.1408, 0.2024]) -Greedy action tensor([ 1.3374, -0.3557, -0.4407, 0.3434]) tensor([0.5804, 0.1068, 0.0981, 0.2148]) -Greedy action tensor([ 0.9719, -0.2398, 0.1854, 0.1863]) tensor([0.4527, 0.1348, 0.2062, 0.2064]) -Greedy action tensor([ 1.6473, -0.4683, -0.2874, 0.4534]) tensor([0.6377, 0.0769, 0.0921, 0.1933]) -Greedy action tensor([ 1.7486, -0.6656, 0.0717, 0.0593]) tensor([0.6844, 0.0612, 0.1280, 0.1264]) -Greedy action tensor([ 1.3420, -0.4467, -0.2573, 0.2013]) tensor([0.5921, 0.0990, 0.1196, 0.1892]) -Greedy action tensor([ 2.3490, 0.3266, -0.0517, 0.3341]) tensor([0.7373, 0.0976, 0.0668, 0.0983]) -Greedy action tensor([ 0.8222, -0.3397, 0.0753, -0.1668]) tensor([0.4633, 0.1449, 0.2195, 0.1723]) -Greedy action tensor([ 1.2874, -0.2548, -0.4399, 0.1582]) tensor([0.5831, 0.1247, 0.1036, 0.1885]) -Greedy action tensor([ 0.9396, -0.4269, 0.0047, 0.0675]) tensor([0.4841, 0.1234, 0.1901, 0.2024]) -Greedy action tensor([ 2.1563, -1.0138, -0.2331, 0.5356]) tensor([0.7511, 0.0315, 0.0689, 0.1485]) -Greedy action tensor([ 2.1977, -0.1123, -0.4214, -0.3552]) tensor([0.8000, 0.0794, 0.0583, 0.0623]) -Greedy action tensor([ 2.1916, -0.3847, -0.0337, 0.1884]) tensor([0.7582, 0.0577, 0.0819, 0.1023]) -Greedy action tensor([ 1.3103, 0.0248, -0.5405, 0.3218]) tensor([0.5538, 0.1531, 0.0870, 0.2061]) -Greedy action tensor([ 2.2815, -0.4700, -0.4475, 0.6138]) tensor([0.7589, 0.0484, 0.0495, 0.1432]) -Greedy action tensor([ 0.2542, -0.0515, 0.1768, -0.1252]) tensor([0.2988, 0.2201, 0.2766, 0.2045]) -Greedy action tensor([ 1.3675, -0.2645, -0.6418, 0.3256]) tensor([0.5944, 0.1162, 0.0797, 0.2097]) -Greedy action tensor([ 1.7857, -0.7346, -0.2302, 0.4167]) tensor([0.6812, 0.0548, 0.0907, 0.1733]) -Greedy action tensor([ 1.5950, -0.7252, -0.6946, 0.3309]) tensor([0.6747, 0.0663, 0.0684, 0.1906]) -Greedy action tensor([ 1.7263, -0.5792, -0.4742, 0.6800]) tensor([0.6403, 0.0638, 0.0709, 0.2249]) -Greedy action tensor([ 1.4469, -0.1938, -0.4450, 0.1647]) tensor([0.6165, 0.1195, 0.0930, 0.1710]) -Greedy action tensor([ 1.4174, -0.3397, -0.3007, 0.2346]) tensor([0.6030, 0.1040, 0.1082, 0.1848]) -Greedy action tensor([ 2.0729, -0.9334, -0.3133, 0.3756]) tensor([0.7549, 0.0374, 0.0694, 0.1383]) -Greedy action tensor([ 1.4842, -0.1458, -0.3996, 0.3891]) tensor([0.5944, 0.1165, 0.0904, 0.1988]) -Greedy action tensor([ 1.1348, -0.6381, -0.6284, -0.1324]) tensor([0.6162, 0.1046, 0.1057, 0.1735]) -Greedy action tensor([ 0.5753, -0.2526, 0.0891, 0.0210]) tensor([0.3807, 0.1664, 0.2341, 0.2187]) -Greedy action tensor([ 0.9057, -0.1209, -0.0075, 0.1598]) tensor([0.4477, 0.1604, 0.1796, 0.2123]) -Greedy action tensor([ 0.8154, -0.2924, -0.3163, 0.3354]) tensor([0.4402, 0.1454, 0.1420, 0.2724]) -Greedy action tensor([ 1.6277, -0.4296, -0.5566, 0.4023]) tensor([0.6519, 0.0833, 0.0734, 0.1914]) -Greedy action tensor([ 1.1927, -0.0051, -0.2136, 0.3446]) tensor([0.5063, 0.1528, 0.1241, 0.2168]) -Greedy action tensor([ 1.5350, -0.2165, -0.7651, 0.3331]) tensor([0.6352, 0.1102, 0.0637, 0.1910]) -Greedy action tensor([ 2.3985, -1.0803, 0.0092, 1.0267]) tensor([0.7267, 0.0224, 0.0666, 0.1843]) -Greedy action tensor([ 2.4157, -1.1538, -0.1879, 0.1106]) tensor([0.8320, 0.0234, 0.0616, 0.0830]) -Greedy action tensor([ 1.5952, -0.5293, -0.0274, 0.4687]) tensor([0.6093, 0.0728, 0.1203, 0.1975]) -Greedy action tensor([ 2.1353, -0.8244, -0.2780, 0.3600]) tensor([0.7629, 0.0395, 0.0683, 0.1293]) -Greedy action tensor([ 1.9023, -0.1906, -0.4495, 0.2405]) tensor([0.7101, 0.0876, 0.0676, 0.1348]) -Greedy action tensor([ 1.7812, -0.5569, -0.3001, 0.5497]) tensor([0.6609, 0.0638, 0.0825, 0.1929]) -Greedy action tensor([ 1.9131, -0.2578, -0.4732, 0.4096]) tensor([0.7001, 0.0799, 0.0644, 0.1557]) -Greedy action tensor([ 1.2049, -0.4543, -0.4891, 0.2667]) tensor([0.5664, 0.1078, 0.1041, 0.2217]) -Greedy action tensor([ 1.8984, -0.4974, -1.1368, 0.3064]) tensor([0.7448, 0.0679, 0.0358, 0.1516]) -Greedy action tensor([ 0.6958, -0.5955, 0.4233, -0.0215]) tensor([0.3961, 0.1089, 0.3016, 0.1933]) -Greedy action tensor([ 1.4365, -0.1635, -0.8716, 0.2759]) tensor([0.6193, 0.1250, 0.0616, 0.1940]) -Greedy action tensor([ 2.0350, -0.4781, -0.2091, 0.2316]) tensor([0.7398, 0.0599, 0.0784, 0.1219]) -Greedy action tensor([ 1.5350, -0.5032, -0.2344, 0.6227]) tensor([0.5875, 0.0765, 0.1001, 0.2359]) -Greedy action tensor([ 1.4648, -0.2191, -0.1683, 0.4984]) tensor([0.5677, 0.1054, 0.1109, 0.2160]) -Greedy action tensor([ 0.7852, -0.1059, -0.5423, 0.8482]) tensor([0.3649, 0.1497, 0.0968, 0.3886]) -Greedy action tensor([ 0.8097, -0.4048, -0.2584, 0.4025]) tensor([0.4336, 0.1287, 0.1490, 0.2886]) -Greedy action tensor([ 2.0153, -0.2033, -1.0410, 1.0206]) tensor([0.6555, 0.0713, 0.0308, 0.2424]) -Greedy action tensor([ 0.9444, -0.5667, -0.6863, 0.3738]) tensor([0.5046, 0.1114, 0.0988, 0.2852]) -Greedy action tensor([ 2.0535, -0.1515, -0.5122, 0.1719]) tensor([0.7466, 0.0823, 0.0574, 0.1137]) -Greedy action tensor([ 1.5232, -0.4549, -0.3754, 0.2001]) tensor([0.6433, 0.0890, 0.0964, 0.1713]) -Greedy action tensor([ 0.5529, -0.1576, -0.1692, 0.1911]) tensor([0.3740, 0.1838, 0.1817, 0.2605]) -Greedy action tensor([ 0.4319, -0.0984, 0.1525, 0.1282]) tensor([0.3244, 0.1909, 0.2453, 0.2394]) -Greedy action tensor([ 0.7624, -0.2558, -0.2557, 0.6684]) tensor([0.3798, 0.1372, 0.1372, 0.3458]) -Greedy action tensor([ 1.2829, -0.4619, -0.4279, 0.2443]) tensor([0.5850, 0.1022, 0.1057, 0.2071]) -Greedy action tensor([ 1.3166, -0.6507, -0.4514, -0.1448]) tensor([0.6483, 0.0907, 0.1106, 0.1504]) -Greedy action tensor([ 1.9913, 0.1819, -0.3667, 0.6954]) tensor([0.6527, 0.1069, 0.0618, 0.1786]) -Greedy action tensor([ 1.6008, -0.6923, -0.5213, 0.2478]) tensor([0.6761, 0.0682, 0.0810, 0.1747]) -Greedy action tensor([ 1.6470, -0.6803, -0.1967, 0.0225]) tensor([0.6883, 0.0671, 0.1089, 0.1356]) -Greedy action tensor([ 1.3931, -0.1523, -0.2316, 0.3271]) tensor([0.5699, 0.1215, 0.1123, 0.1963]) -Greedy action tensor([ 2.0390, -0.7180, -0.4452, 0.8981]) tensor([0.6819, 0.0433, 0.0569, 0.2179]) -Greedy action tensor([ 1.5365, -0.9597, -0.0186, 0.1303]) tensor([0.6499, 0.0536, 0.1372, 0.1593]) -Greedy action tensor([ 1.6366, -0.7413, -0.1086, 0.5242]) tensor([0.6265, 0.0581, 0.1094, 0.2060]) -Greedy action tensor([ 1.2587, -0.6791, -0.0739, 0.2359]) tensor([0.5658, 0.0815, 0.1493, 0.2035]) -Greedy action tensor([ 0.8857, -0.2355, -0.2348, -0.2734]) tensor([0.5087, 0.1658, 0.1659, 0.1596]) -Greedy action tensor([ 1.4332, -0.3849, -0.7847, 0.4419]) tensor([0.6089, 0.0988, 0.0663, 0.2260]) -Greedy action tensor([ 1.2603, -0.3836, -0.6123, 0.4039]) tensor([0.5644, 0.1091, 0.0868, 0.2397]) -Greedy action tensor([ 1.4210, -0.6974, -0.4445, 0.3116]) tensor([0.6231, 0.0749, 0.0965, 0.2055]) -Greedy action tensor([ 2.3708, -0.2303, -0.5974, 0.3658]) tensor([0.7935, 0.0589, 0.0408, 0.1069]) -Greedy action tensor([ 1.3466, -1.0102, -0.4502, 0.5787]) tensor([0.5799, 0.0549, 0.0962, 0.2690]) -Greedy action tensor([ 0.9211, -0.5218, -0.6121, 0.5264]) tensor([0.4704, 0.1111, 0.1015, 0.3170]) -Greedy action tensor([ 0.0365, -0.8484, 0.9756, -0.6504]) tensor([0.2235, 0.0923, 0.5717, 0.1125]) -Greedy action tensor([-0.7142, -1.0083, 0.0836, -0.6095]) tensor([0.1970, 0.1468, 0.4375, 0.2187]) -Greedy action tensor([ 0.0284, -0.4669, 0.4741, -1.0609]) tensor([0.2851, 0.1737, 0.4452, 0.0959]) -Greedy action tensor([ 0.5207, 0.1017, 0.4653, -0.2465]) tensor([0.3259, 0.2144, 0.3084, 0.1513]) -Greedy action tensor([-0.3038, -0.3417, 1.0866, -0.3471]) tensor([0.1442, 0.1388, 0.5790, 0.1381]) -Greedy action tensor([ 1.2467, -0.1230, -0.0082, 1.6082]) tensor([0.3362, 0.0855, 0.0958, 0.4825]) -Greedy action tensor([ 0.1016, -0.8954, 0.6196, 0.3255]) tensor([0.2326, 0.0858, 0.3905, 0.2910]) -Greedy action tensor([ 1.2363, 0.1217, 0.0784, -0.5368]) tensor([0.5519, 0.1810, 0.1734, 0.0937]) -Greedy action tensor([ 1.2244, -0.5934, 1.5999, 0.2985]) tensor([0.3318, 0.0539, 0.4829, 0.1314]) -Greedy action tensor([-1.0126, -1.0784, -0.7161, 0.2791]) tensor([0.1445, 0.1353, 0.1944, 0.5258]) -Greedy action tensor([-0.9986, -0.2614, -0.2064, 0.2777]) tensor([0.1126, 0.2353, 0.2486, 0.4034]) -Greedy action tensor([-0.5839, -1.2438, 0.1039, 0.5588]) tensor([0.1506, 0.0778, 0.2995, 0.4721]) -Greedy action tensor([-0.4337, -0.9211, 1.4058, -0.2351]) tensor([0.1096, 0.0673, 0.6895, 0.1336]) -Greedy action tensor([ 0.1819, -1.1953, -0.3031, -0.0390]) tensor([0.3745, 0.0945, 0.2306, 0.3003]) -Greedy action tensor([-0.2923, -1.5867, 0.1822, 1.6679]) tensor([0.1002, 0.0275, 0.1610, 0.7114]) -Greedy action tensor([-0.6180, -0.4030, 0.3851, -0.2152]) tensor([0.1547, 0.1918, 0.4219, 0.2315]) -Greedy action tensor([ 1.2500, 0.5044, 1.1901, -0.3202]) tensor([0.3811, 0.1808, 0.3589, 0.0793]) -Greedy action tensor([-0.7041, -2.1246, -0.2458, -0.0992]) tensor([0.2149, 0.0519, 0.3398, 0.3934]) -Greedy action tensor([ 0.4794, -0.4950, 0.4035, 1.6819]) tensor([0.1775, 0.0670, 0.1646, 0.5909]) -Greedy action tensor([-0.6147, 0.0134, -0.0511, 0.0437]) tensor([0.1524, 0.2856, 0.2677, 0.2944]) -Greedy action tensor([ 0.0748, -0.8462, -0.4147, -0.1881]) tensor([0.3597, 0.1432, 0.2205, 0.2766]) -Greedy action tensor([0.6754, 0.4042, 0.3000, 1.4182]) tensor([0.2197, 0.1675, 0.1509, 0.4618]) -Greedy action tensor([ 1.2005, -0.3215, 1.1156, 2.1796]) tensor([0.2084, 0.0455, 0.1914, 0.5547]) -Greedy action tensor([-0.2388, -0.3669, -1.0503, 0.4630]) tensor([0.2303, 0.2027, 0.1023, 0.4647]) -Greedy action tensor([ 0.4847, -0.7308, -0.1081, -0.4787]) tensor([0.4483, 0.1329, 0.2478, 0.1710]) -Greedy action tensor([ 1.8567, -1.1939, 1.1620, 0.8097]) tensor([0.5270, 0.0249, 0.2631, 0.1850]) -Greedy action tensor([-0.3182, -2.5612, -0.2793, 0.5946]) tensor([0.2156, 0.0229, 0.2242, 0.5373]) -Greedy action tensor([-0.0113, -0.5813, -0.0450, -0.3447]) tensor([0.3078, 0.1741, 0.2976, 0.2205]) -Greedy action tensor([-0.1161, 0.2373, 0.3285, 1.4020]) tensor([0.1170, 0.1666, 0.1825, 0.5339]) -Greedy action tensor([-0.1214, -1.7445, 1.0117, -1.4632]) tensor([0.2191, 0.0432, 0.6804, 0.0573]) -Greedy action tensor([ 0.4255, -1.7711, -0.1880, -0.4189]) tensor([0.4802, 0.0534, 0.2600, 0.2064]) -Greedy action tensor([-0.4337, -1.1996, 1.9329, 0.5939]) tensor([0.0670, 0.0312, 0.7145, 0.1873]) -Greedy action tensor([ 0.1175, -1.0915, 1.0874, 0.6733]) tensor([0.1761, 0.0526, 0.4644, 0.3070]) -Greedy action tensor([-0.7205, -0.6581, -0.4575, 0.7329]) tensor([0.1308, 0.1393, 0.1702, 0.5597]) -Greedy action tensor([ 0.3229, -1.3147, -0.9674, -0.0854]) tensor([0.4685, 0.0911, 0.1289, 0.3115]) -Greedy action tensor([-0.5521, -0.9447, 1.1857, -1.2422]) tensor([0.1272, 0.0859, 0.7231, 0.0638]) -Greedy action tensor([-0.1560, -1.1205, 0.7697, -0.4929]) tensor([0.2165, 0.0825, 0.5464, 0.1546]) -Greedy action tensor([ 0.8460, -0.7017, 0.1450, -0.6141]) tensor([0.5152, 0.1096, 0.2556, 0.1196]) -Greedy action tensor([ 0.3497, -0.0187, 0.0773, 0.2826]) tensor([0.2951, 0.2042, 0.2247, 0.2760]) -Greedy action tensor([-0.5502, 0.1307, -0.5386, 0.4810]) tensor([0.1472, 0.2909, 0.1490, 0.4129]) -Greedy action tensor([-1.0769, -0.6114, -0.3738, 0.9849]) tensor([0.0802, 0.1277, 0.1620, 0.6302]) -Greedy action tensor([ 0.2154, -1.8924, -0.2559, 1.0791]) tensor([0.2429, 0.0295, 0.1516, 0.5760]) -Greedy action tensor([-0.0380, -0.9699, 1.3416, -1.0203]) tensor([0.1742, 0.0686, 0.6920, 0.0652]) -Greedy action tensor([ 0.9290, -1.0045, 0.6415, 0.2043]) tensor([0.4203, 0.0608, 0.3153, 0.2036]) -Greedy action tensor([ 0.9435, -0.4650, 1.3113, 0.2012]) tensor([0.3159, 0.0773, 0.4564, 0.1504]) -Greedy action tensor([-0.4341, -0.7439, 0.9119, 1.3467]) tensor([0.0869, 0.0637, 0.3338, 0.5156]) -Greedy action tensor([ 1.4580, -0.5564, -0.1016, 0.8451]) tensor([0.5304, 0.0708, 0.1115, 0.2874]) -Greedy action tensor([ 0.9364, -0.0356, 0.7310, 1.3984]) tensor([0.2646, 0.1001, 0.2154, 0.4199]) -Greedy action tensor([-1.3695, 0.0399, -0.5346, -1.2177]) tensor([0.1168, 0.4781, 0.2692, 0.1359]) -Greedy action tensor([ 1.5746, -1.0469, 1.2166, 0.3196]) tensor([0.4862, 0.0353, 0.3399, 0.1386]) -Greedy action tensor([ 1.1325, -1.7482, -0.4138, 0.8813]) tensor([0.4885, 0.0274, 0.1041, 0.3800]) -Greedy action tensor([-0.3195, -1.4386, -0.2332, 0.3555]) tensor([0.2283, 0.0745, 0.2488, 0.4483]) -Greedy action tensor([ 0.0742, -0.5932, 0.7999, -1.2823]) tensor([0.2606, 0.1337, 0.5385, 0.0671]) -Greedy action tensor([-0.8151, 0.0023, -0.7268, 0.8248]) tensor([0.1051, 0.2381, 0.1148, 0.5419]) -Greedy action tensor([ 0.2704, -0.0734, -0.4429, -0.0493]) tensor([0.3418, 0.2424, 0.1675, 0.2483]) -Greedy action tensor([ 0.7012, -0.8944, 1.1974, -0.0578]) tensor([0.3018, 0.0612, 0.4957, 0.1413]) -Greedy action tensor([ 0.3125, 0.2183, 0.8886, -0.0481]) tensor([0.2280, 0.2075, 0.4056, 0.1590]) -Greedy action tensor([ 0.7715, -0.4294, 1.8221, -0.3260]) tensor([0.2225, 0.0670, 0.6363, 0.0743]) -Greedy action tensor([ 0.4895, -0.0242, 0.7672, 1.0737]) tensor([0.2122, 0.1270, 0.2802, 0.3806]) -Greedy action tensor([ 0.5057, -0.9939, 0.7534, 0.1832]) tensor([0.3097, 0.0691, 0.3968, 0.2244]) -Greedy action tensor([ 0.1761, -0.9246, -0.0783, -0.0091]) tensor([0.3403, 0.1132, 0.2638, 0.2827]) -Greedy action tensor([ 0.7322, 0.1456, -0.1299, -0.1698]) tensor([0.4194, 0.2333, 0.1771, 0.1702]) -Greedy action tensor([-0.1522, -0.0439, -1.0226, 0.6653]) tensor([0.2084, 0.2323, 0.0873, 0.4720]) -Greedy action tensor([-0.0597, -1.6325, 0.1187, -0.0912]) tensor([0.2966, 0.0615, 0.3545, 0.2874]) -Greedy action tensor([ 0.4212, -0.1056, 0.1099, 0.9458]) tensor([0.2492, 0.1472, 0.1825, 0.4211]) -Greedy action tensor([ 0.6032, -0.2644, 0.0408, 0.0609]) tensor([0.3889, 0.1633, 0.2216, 0.2261]) -Greedy action tensor([-0.6086, -1.4233, -0.3916, 0.7115]) tensor([0.1555, 0.0689, 0.1933, 0.5823]) -Greedy action tensor([-0.6418, -1.7661, 2.0055, -0.4285]) tensor([0.0600, 0.0195, 0.8464, 0.0742]) -Greedy action tensor([-1.0124, -0.8806, 0.4507, 0.1867]) tensor([0.1023, 0.1167, 0.4418, 0.3393]) -Greedy action tensor([-0.9380, -1.7270, -0.0722, 0.0843]) tensor([0.1513, 0.0687, 0.3595, 0.4205]) -Greedy action tensor([-0.0973, 0.4251, -0.6734, 0.0970]) tensor([0.2241, 0.3778, 0.1259, 0.2721]) -Greedy action tensor([-0.8523, -0.7150, 1.4275, -1.2657]) tensor([0.0795, 0.0912, 0.7768, 0.0526]) -Greedy action tensor([-1.0193, -1.3289, -0.6314, 0.8818]) tensor([0.1010, 0.0741, 0.1489, 0.6760]) -Greedy action tensor([-0.4374, -0.3427, 2.1619, -0.3561]) tensor([0.0601, 0.0661, 0.8086, 0.0652]) -Greedy action tensor([-1.1889, -1.3319, 1.0259, 0.5323]) tensor([0.0602, 0.0522, 0.5512, 0.3365]) -Greedy action tensor([ 0.7704, -1.6407, 0.5765, 0.2604]) tensor([0.3978, 0.0357, 0.3277, 0.2389]) -Greedy action tensor([-0.5337, 0.3641, 0.9576, -0.5077]) tensor([0.1121, 0.2750, 0.4979, 0.1150]) -Greedy action tensor([ 1.5020, 0.8097, -0.3839, 0.5454]) tensor([0.4911, 0.2458, 0.0745, 0.1887]) -Greedy action tensor([ 1.4833, 0.8279, -0.5832, -0.0341]) tensor([0.5361, 0.2784, 0.0679, 0.1176]) -Greedy action tensor([ 1.5558, -2.2651, 0.5531, 0.0958]) tensor([0.6169, 0.0135, 0.2263, 0.1433]) -Greedy action tensor([-0.6703, 0.9505, 0.0288, -0.9927]) tensor([0.1137, 0.5751, 0.2288, 0.0824]) -Greedy action tensor([ 0.5110, -0.0501, -0.1010, -0.0222]) tensor([0.3704, 0.2114, 0.2009, 0.2173]) -Greedy action tensor([ 0.6549, -0.3228, -0.0787, -0.4320]) tensor([0.4559, 0.1715, 0.2189, 0.1537]) -Greedy action tensor([ 0.7486, -0.2602, 0.0079, -0.3188]) tensor([0.4576, 0.1669, 0.2182, 0.1574]) -Greedy action tensor([ 0.5914, -0.2942, 0.0122, -0.1623]) tensor([0.4093, 0.1688, 0.2293, 0.1926]) -Greedy action tensor([ 0.7273, -0.5508, -0.0740, -0.3280]) tensor([0.4818, 0.1342, 0.2162, 0.1677]) -Greedy action tensor([ 6.7367e-01, -4.0985e-01, 9.4354e-05, -9.4853e-02]) tensor([0.4325, 0.1464, 0.2205, 0.2006]) -Greedy action tensor([ 0.3813, -0.2338, -0.1779, -0.0480]) tensor([0.3619, 0.1956, 0.2069, 0.2356]) -Greedy action tensor([ 0.8757, -0.7830, 0.1361, -0.5332]) tensor([0.5230, 0.0996, 0.2496, 0.1278]) -Greedy action tensor([ 0.5703, -0.0677, -0.0316, -0.2885]) tensor([0.4000, 0.2114, 0.2191, 0.1695]) -Greedy action tensor([ 0.4584, 0.0218, -0.0869, -0.0649]) tensor([0.3548, 0.2293, 0.2057, 0.2102]) -Greedy action tensor([ 0.3786, 0.1370, -0.1664, 0.0293]) tensor([0.3257, 0.2558, 0.1888, 0.2297]) -Greedy action tensor([ 0.5018, -0.0292, -0.0883, -0.1036]) tensor([0.3720, 0.2187, 0.2062, 0.2031]) -Greedy action tensor([ 1.1142, -1.0363, 0.1015, -0.6470]) tensor([0.6055, 0.0705, 0.2199, 0.1041]) -Greedy action tensor([ 0.3668, -0.0176, 0.0187, -0.1821]) tensor([0.3373, 0.2297, 0.2382, 0.1948]) -Greedy action tensor([ 0.7388, -0.3622, -0.1112, -0.2692]) tensor([0.4706, 0.1565, 0.2011, 0.1718]) -Greedy action tensor([ 0.9902, -0.9776, 0.0578, -0.7241]) tensor([0.5836, 0.0816, 0.2297, 0.1051]) -Greedy action tensor([ 0.7767, -0.6320, -0.0805, -0.2225]) tensor([0.4909, 0.1200, 0.2083, 0.1807]) -Greedy action tensor([ 0.6279, -0.5412, 0.0164, -0.2480]) tensor([0.4406, 0.1369, 0.2390, 0.1835]) -Greedy action tensor([ 0.8067, -0.7289, 0.1477, -0.6361]) tensor([0.5079, 0.1094, 0.2628, 0.1200]) -Greedy action tensor([ 0.8148, -0.3645, -0.1391, -0.3370]) tensor([0.4978, 0.1531, 0.1918, 0.1573]) -Greedy action tensor([ 0.8604, -0.5603, -0.1386, -0.6115]) tensor([0.5437, 0.1313, 0.2002, 0.1248]) -Greedy action tensor([ 0.9198, -0.9176, -0.0832, -0.5135]) tensor([0.5667, 0.0902, 0.2079, 0.1352]) -Greedy action tensor([ 0.7056, -0.7187, -0.0786, -0.3039]) tensor([0.4851, 0.1167, 0.2214, 0.1768]) -Greedy action tensor([ 0.5355, -0.2004, -0.1092, -0.4415]) tensor([0.4201, 0.2013, 0.2205, 0.1581]) -Greedy action tensor([ 0.9771, -0.6385, -0.1148, -0.2788]) tensor([0.5497, 0.1093, 0.1845, 0.1566]) -Greedy action tensor([ 0.9027, -0.4363, -0.0717, -0.4781]) tensor([0.5289, 0.1386, 0.1996, 0.1329]) -Greedy action tensor([ 1.0126, -0.5762, 0.0198, -0.3295]) tensor([0.5447, 0.1112, 0.2018, 0.1423]) -Greedy action tensor([ 0.4928, -0.0330, -0.0749, -0.1013]) tensor([0.3690, 0.2181, 0.2092, 0.2037]) -Greedy action tensor([ 0.7505, -0.4364, -0.0345, -0.2004]) tensor([0.4656, 0.1421, 0.2124, 0.1799]) -Greedy action tensor([ 0.8957, -0.6263, -0.0417, -0.3413]) tensor([0.5263, 0.1149, 0.2061, 0.1527]) -Greedy action tensor([ 0.8266, -0.3534, 0.1308, -0.4369]) tensor([0.4788, 0.1471, 0.2388, 0.1353]) -Greedy action tensor([ 0.5758, 0.3374, -0.1247, 0.0650]) tensor([0.3467, 0.2732, 0.1721, 0.2080]) -Greedy action tensor([ 0.6421, -0.4586, -0.0176, -0.4454]) tensor([0.4573, 0.1521, 0.2364, 0.1541]) -Greedy action tensor([ 0.8618, -0.6804, -0.0418, -0.3860]) tensor([0.5246, 0.1122, 0.2125, 0.1506]) -Greedy action tensor([ 0.5012, -0.7395, -0.1423, -0.2282]) tensor([0.4354, 0.1259, 0.2288, 0.2099]) -Greedy action tensor([ 0.2458, -0.1444, 0.1543, -0.3339]) tensor([0.3175, 0.2149, 0.2897, 0.1778]) -Greedy action tensor([ 0.5974, -0.1294, 0.0121, -0.2937]) tensor([0.4081, 0.1973, 0.2273, 0.1674]) -Greedy action tensor([ 0.8480, -0.4265, -0.2382, -0.5706]) tensor([0.5379, 0.1504, 0.1815, 0.1302]) -Greedy action tensor([ 0.4675, -0.0642, 0.3603, -0.0193]) tensor([0.3225, 0.1895, 0.2897, 0.1982]) -Greedy action tensor([ 0.4845, -0.3390, 0.0014, -0.1024]) tensor([0.3829, 0.1680, 0.2362, 0.2129]) -Greedy action tensor([ 0.4900, -0.2097, 0.0713, -0.3312]) tensor([0.3854, 0.1915, 0.2536, 0.1696]) -Greedy action tensor([ 0.8051, -0.4761, -0.1135, -0.4351]) tensor([0.5086, 0.1413, 0.2030, 0.1472]) -Greedy action tensor([ 0.5229, -0.5050, 0.0048, -0.1384]) tensor([0.4049, 0.1449, 0.2412, 0.2090]) -Greedy action tensor([ 0.7513, -0.5298, -0.1210, -0.1287]) tensor([0.4738, 0.1316, 0.1980, 0.1965]) -Greedy action tensor([ 0.7593, -0.9150, -0.2075, -0.3188]) tensor([0.5241, 0.0982, 0.1993, 0.1783]) -Greedy action tensor([ 0.6599, -0.5977, -0.0711, -0.2747]) tensor([0.4633, 0.1317, 0.2230, 0.1819]) -Greedy action tensor([ 1.0371, -0.4947, 0.0256, -0.5884]) tensor([0.5629, 0.1217, 0.2047, 0.1108]) -Greedy action tensor([ 0.4830, 0.1420, -0.1194, 0.1082]) tensor([0.3394, 0.2414, 0.1859, 0.2333]) -Greedy action tensor([ 0.8111, -0.6851, -0.1794, -0.6228]) tensor([0.5453, 0.1221, 0.2025, 0.1300]) -Greedy action tensor([ 0.6556, -0.2093, 0.1292, -0.0859]) tensor([0.4019, 0.1692, 0.2374, 0.1915]) -Greedy action tensor([ 0.7049, -0.4402, 0.1272, -0.5723]) tensor([0.4634, 0.1474, 0.2600, 0.1292]) -Greedy action tensor([ 0.7875, -0.6890, 0.1348, -0.3302]) tensor([0.4817, 0.1100, 0.2508, 0.1575]) -Greedy action tensor([ 0.4205, -0.0631, 0.1088, -0.3368]) tensor([0.3549, 0.2188, 0.2599, 0.1664]) -Greedy action tensor([ 0.5587, -0.3973, -0.0247, -0.3370]) tensor([0.4254, 0.1635, 0.2374, 0.1737]) -Greedy action tensor([ 1.0173, -0.4986, -0.0797, -0.2637]) tensor([0.5461, 0.1199, 0.1823, 0.1517]) -Greedy action tensor([ 0.7212, -0.7544, -0.0301, -0.3277]) tensor([0.4876, 0.1115, 0.2301, 0.1708]) -Greedy action tensor([ 0.5043, -0.4092, -0.0883, -0.1129]) tensor([0.4010, 0.1609, 0.2217, 0.2163]) -Greedy action tensor([ 0.9257, -0.7921, 0.0096, -0.4908]) tensor([0.5488, 0.0985, 0.2196, 0.1331]) -Greedy action tensor([ 0.3816, -0.3929, -0.0545, -0.5532]) tensor([0.4000, 0.1844, 0.2586, 0.1571]) -Greedy action tensor([ 0.5964, -0.3373, -0.0696, -0.1484]) tensor([0.4199, 0.1650, 0.2157, 0.1994]) -Greedy action tensor([ 0.8206, -0.4939, -0.1475, -0.2535]) tensor([0.5025, 0.1350, 0.1909, 0.1717]) -Greedy action tensor([ 0.8090, -0.7257, 0.0496, -0.3487]) tensor([0.5006, 0.1079, 0.2343, 0.1573]) -Greedy action tensor([ 0.8404, -0.5058, -0.0366, -0.4300]) tensor([0.5110, 0.1330, 0.2126, 0.1434]) -Greedy action tensor([ 0.5075, -0.6219, -0.1467, -0.1786]) tensor([0.4262, 0.1377, 0.2215, 0.2146]) -Greedy action tensor([ 1.1414, -0.6901, -0.0493, -0.3928]) tensor([0.5953, 0.0954, 0.1810, 0.1284]) -Greedy action tensor([ 1.1276, -0.4592, -0.0639, -0.3221]) tensor([0.5737, 0.1174, 0.1743, 0.1346]) -Greedy action tensor([ 0.2549, -0.1542, -0.1091, 0.0016]) tensor([0.3189, 0.2118, 0.2216, 0.2476]) -Greedy action tensor([ 0.6067, -0.5647, -0.2071, -0.1060]) tensor([0.4457, 0.1382, 0.1975, 0.2186]) -Greedy action tensor([ 1.2075, -0.5715, -0.0446, -0.6337]) tensor([0.6198, 0.1046, 0.1772, 0.0983]) -Greedy action tensor([ 0.3820, -0.0279, 0.0053, 0.0800]) tensor([0.3237, 0.2149, 0.2221, 0.2393]) -Greedy action tensor([ 0.8105, -0.3865, 0.0410, -0.3197]) tensor([0.4789, 0.1447, 0.2218, 0.1547]) -Greedy action tensor([ 0.9178, -0.6856, -0.0495, -0.4804]) tensor([0.5469, 0.1100, 0.2079, 0.1351]) -Greedy action tensor([ 1.0279, -0.7160, 0.0191, -0.4670]) tensor([0.5670, 0.0991, 0.2067, 0.1272]) -Greedy action tensor([ 0.5440, -0.4462, -0.0211, -0.1823]) tensor([0.4126, 0.1533, 0.2345, 0.1996]) -Greedy action tensor([ 0.7608, -0.5368, -0.0331, -0.5021]) tensor([0.4980, 0.1360, 0.2251, 0.1408]) -Greedy action tensor([ 0.5057, -0.2678, 0.0279, -0.1533]) tensor([0.3848, 0.1775, 0.2386, 0.1991]) -Greedy action tensor([ 0.6028, -0.3610, -0.0974, -0.5102]) tensor([0.4532, 0.1729, 0.2250, 0.1489]) -Greedy action tensor([ 0.5495, -0.4580, -0.0392, -0.1609]) tensor([0.4147, 0.1514, 0.2302, 0.2038]) -Greedy action tensor([ 6.7424e-01, -5.6455e-04, 1.6360e-02, -2.6288e-01]) tensor([0.4134, 0.2105, 0.2141, 0.1620]) -Greedy action tensor([ 1.0339, -1.1542, 0.1231, -0.7807]) tensor([0.5962, 0.0669, 0.2398, 0.0971]) -Greedy action tensor([ 0.7714, -0.9192, -0.1290, -0.2223]) tensor([0.5099, 0.0940, 0.2072, 0.1888]) -Greedy action tensor([-1.9020, -0.5197, 0.8035, -0.0670]) tensor([0.0382, 0.1520, 0.5708, 0.2390]) -Greedy action tensor([-1.9399, -0.4484, 0.6664, -0.1755]) tensor([0.0403, 0.1790, 0.5456, 0.2351]) -Greedy action tensor([-1.8940, -0.3451, 0.6354, -0.1443]) tensor([0.0417, 0.1960, 0.5226, 0.2397]) -Greedy action tensor([-1.0442, 0.4418, 0.2490, 0.8047]) tensor([0.0649, 0.2867, 0.2364, 0.4121]) -Greedy action tensor([-1.6511, -0.2699, 0.6517, 0.0634]) tensor([0.0487, 0.1938, 0.4871, 0.2705]) -Greedy action tensor([-1.8979, -0.4416, 0.6452, -0.1581]) tensor([0.0422, 0.1810, 0.5365, 0.2403]) -Greedy action tensor([-1.6852, -0.5387, 0.5339, -0.0277]) tensor([0.0538, 0.1693, 0.4948, 0.2822]) -Greedy action tensor([0.2428, 0.4249, 0.7327, 1.4178]) tensor([0.1414, 0.1697, 0.2308, 0.4580]) -Greedy action tensor([-0.7658, -0.6548, 0.7201, 1.2303]) tensor([0.0720, 0.0804, 0.3180, 0.5297]) -Greedy action tensor([-0.9555, -0.4358, 0.4274, 0.8589]) tensor([0.0781, 0.1313, 0.3113, 0.4793]) -Greedy action tensor([-1.9113, -0.4614, 0.6667, -0.1445]) tensor([0.0412, 0.1755, 0.5423, 0.2410]) -Greedy action tensor([-1.8847, -0.4241, 0.6360, -0.1446]) tensor([0.0427, 0.1838, 0.5305, 0.2430]) -Greedy action tensor([-1.8008, -0.2629, 0.5931, -0.0928]) tensor([0.0452, 0.2104, 0.4951, 0.2494]) -Greedy action tensor([-1.3764, -0.5440, 0.4135, -0.0219]) tensor([0.0760, 0.1747, 0.4550, 0.2944]) -Greedy action tensor([-0.3289, -0.2379, 0.2286, 0.3130]) tensor([0.1742, 0.1908, 0.3042, 0.3309]) -Greedy action tensor([-1.9312, -0.4286, 0.6547, -0.1711]) tensor([0.0407, 0.1828, 0.5400, 0.2365]) -Greedy action tensor([-1.9455, -0.4495, 0.6677, -0.1802]) tensor([0.0401, 0.1789, 0.5468, 0.2342]) -Greedy action tensor([-1.3513, 0.1826, 0.2930, 0.1902]) tensor([0.0646, 0.2994, 0.3344, 0.3017]) -Greedy action tensor([-1.9297, -0.4587, 0.6749, -0.1560]) tensor([0.0404, 0.1757, 0.5460, 0.2379]) -Greedy action tensor([-1.7710, -0.4403, 0.5851, -0.0845]) tensor([0.0482, 0.1825, 0.5088, 0.2605]) -Greedy action tensor([-1.0889, -0.5839, 0.2488, 0.1930]) tensor([0.0993, 0.1645, 0.3784, 0.3578]) -Greedy action tensor([-1.8967, -0.4327, 0.6472, -0.1461]) tensor([0.0420, 0.1816, 0.5346, 0.2418]) -Greedy action tensor([-1.7947, 0.0514, 0.5208, -0.1012]) tensor([0.0437, 0.2766, 0.4423, 0.2375]) -Greedy action tensor([-1.4368, -0.1609, 0.5081, 0.2292]) tensor([0.0593, 0.2124, 0.4146, 0.3137]) -Greedy action tensor([-1.6861, -0.0574, 0.5013, -0.0611]) tensor([0.0498, 0.2537, 0.4437, 0.2528]) -Greedy action tensor([0.0800, 1.0508, 0.1678, 0.8237]) tensor([0.1463, 0.3862, 0.1597, 0.3078]) -Greedy action tensor([-1.9299, -0.3885, 0.6490, -0.1704]) tensor([0.0405, 0.1894, 0.5345, 0.2356]) -Greedy action tensor([-1.5319, -0.3770, 0.5571, 0.1350]) tensor([0.0570, 0.1809, 0.4603, 0.3018]) -Greedy action tensor([-1.9373, -0.4468, 0.6663, -0.1755]) tensor([0.0404, 0.1792, 0.5454, 0.2350]) -Greedy action tensor([-1.6393, -0.5377, 0.5202, -0.0779]) tensor([0.0573, 0.1725, 0.4969, 0.2732]) -Greedy action tensor([-1.8905, -0.4390, 0.6405, -0.1551]) tensor([0.0425, 0.1816, 0.5346, 0.2413]) -Greedy action tensor([-1.7214, -0.4837, 0.6041, 0.0672]) tensor([0.0484, 0.1669, 0.4952, 0.2895]) -Greedy action tensor([-1.4863, 0.0392, 0.6081, 0.3365]) tensor([0.0502, 0.2309, 0.4079, 0.3109]) -Greedy action tensor([-1.1759, 0.2695, 0.2467, -0.0385]) tensor([0.0799, 0.3392, 0.3316, 0.2493]) -Greedy action tensor([-1.7212, -0.0693, 0.5413, 0.0862]) tensor([0.0456, 0.2380, 0.4383, 0.2780]) -Greedy action tensor([-1.9430, -0.4484, 0.6670, -0.1794]) tensor([0.0402, 0.1791, 0.5464, 0.2344]) -Greedy action tensor([-0.6818, -0.6830, 0.2204, 0.5917]) tensor([0.1244, 0.1243, 0.3067, 0.4446]) -Greedy action tensor([-1.0072, 0.8444, 0.0618, 0.3252]) tensor([0.0711, 0.4527, 0.2070, 0.2693]) -Greedy action tensor([-1.7391, -0.2196, 0.5906, 0.1035]) tensor([0.0451, 0.2062, 0.4637, 0.2849]) -Greedy action tensor([-1.4471, -0.4859, 0.7330, 0.6260]) tensor([0.0490, 0.1281, 0.4334, 0.3895]) -Greedy action tensor([-1.7125, -0.4861, 0.5706, -0.0063]) tensor([0.0507, 0.1728, 0.4972, 0.2793]) -Greedy action tensor([-1.7633, 0.1550, 0.4779, -0.0671]) tensor([0.0441, 0.3004, 0.4149, 0.2406]) -Greedy action tensor([-1.9313, -0.4262, 0.6590, -0.1708]) tensor([0.0406, 0.1827, 0.5408, 0.2359]) -Greedy action tensor([-1.8353, -0.4536, 0.6155, -0.1206]) tensor([0.0452, 0.1799, 0.5239, 0.2510]) -Greedy action tensor([-1.8782, -0.3576, 0.6222, -0.1559]) tensor([0.0428, 0.1958, 0.5217, 0.2396]) -Greedy action tensor([-1.2913, 0.0496, 0.4453, 0.3581]) tensor([0.0637, 0.2434, 0.3616, 0.3314]) -Greedy action tensor([-1.2538, -0.6708, 0.3064, 0.1662]) tensor([0.0856, 0.1533, 0.4072, 0.3539]) -Greedy action tensor([-1.8331, -0.0936, 0.5462, -0.1057]) tensor([0.0433, 0.2463, 0.4670, 0.2434]) -Greedy action tensor([-1.6638, -0.5605, 0.5369, -0.0021]) tensor([0.0546, 0.1646, 0.4931, 0.2877]) -Greedy action tensor([-1.7803, -0.5067, 0.5709, -0.0939]) tensor([0.0488, 0.1746, 0.5128, 0.2638]) -Greedy action tensor([-1.9376, -0.4443, 0.6631, -0.1768]) tensor([0.0404, 0.1799, 0.5445, 0.2351]) -Greedy action tensor([-1.0831, 0.7394, 0.0660, 0.3469]) tensor([0.0689, 0.4261, 0.2173, 0.2878]) -Greedy action tensor([-1.8857, -0.4248, 0.6271, -0.1406]) tensor([0.0428, 0.1844, 0.5279, 0.2450]) -Greedy action tensor([-1.7403, -0.4849, 0.5715, -0.0742]) tensor([0.0503, 0.1764, 0.5073, 0.2660]) -Greedy action tensor([-1.7876, -0.4920, 0.5982, -0.0507]) tensor([0.0472, 0.1723, 0.5126, 0.2679]) -Greedy action tensor([-1.0520, -0.6519, 0.5605, 0.8786]) tensor([0.0694, 0.1036, 0.3482, 0.4787]) -Greedy action tensor([-1.5970, -0.7662, -0.1517, -0.5666]) tensor([0.0967, 0.2220, 0.4103, 0.2710]) -Greedy action tensor([-1.6596, -0.5176, 0.5224, 0.0456]) tensor([0.0541, 0.1694, 0.4791, 0.2974]) -Greedy action tensor([-0.5564, -0.0623, 0.1674, -0.0800]) tensor([0.1584, 0.2597, 0.3268, 0.2551]) -Greedy action tensor([-1.9279, -0.4140, 0.6566, -0.1681]) tensor([0.0406, 0.1846, 0.5386, 0.2361]) -Greedy action tensor([-1.7399, -0.2661, 0.5559, -0.0315]) tensor([0.0480, 0.2097, 0.4771, 0.2652]) -Greedy action tensor([-1.9206, -0.4093, 0.6516, -0.1654]) tensor([0.0410, 0.1857, 0.5364, 0.2370]) -Greedy action tensor([-1.9174, -0.3926, 0.6477, -0.1655]) tensor([0.0410, 0.1886, 0.5337, 0.2367]) -Greedy action tensor([-1.8936, -0.3374, 0.6348, -0.1496]) tensor([0.0417, 0.1976, 0.5223, 0.2384]) -Greedy action tensor([-1.7390, -0.1403, 0.5383, -0.0277]) tensor([0.0471, 0.2330, 0.4592, 0.2607]) -Greedy action tensor([-1.8049, -0.3648, 0.6336, -0.0630]) tensor([0.0447, 0.1886, 0.5117, 0.2550]) -Greedy action tensor([-1.1594, -0.7089, 0.6319, 0.6879]) tensor([0.0671, 0.1052, 0.4023, 0.4254]) -Greedy action tensor([-1.8812, -0.4189, 0.6332, -0.1397]) tensor([0.0428, 0.1846, 0.5286, 0.2440]) -Greedy action tensor([-1.9374, -0.4409, 0.6649, -0.1745]) tensor([0.0403, 0.1802, 0.5443, 0.2352]) -Greedy action tensor([-1.7073, -0.4598, 0.5756, -0.0403]) tensor([0.0511, 0.1778, 0.5007, 0.2705]) -Greedy action tensor([-1.7041, -0.4934, 0.5541, -0.0944]) tensor([0.0528, 0.1773, 0.5055, 0.2643]) -Greedy action tensor([-1.7092, -0.8320, 0.5987, -0.3564]) tensor([0.0577, 0.1388, 0.5802, 0.2233]) -Greedy action tensor([-1.8985, -0.4473, 0.6457, -0.1586]) tensor([0.0422, 0.1801, 0.5373, 0.2404]) -Greedy action tensor([-1.6493, 0.3107, 0.4090, -0.0200]) tensor([0.0475, 0.3376, 0.3724, 0.2425]) -Greedy action tensor([-1.3472, -0.5928, 0.3764, 0.1358]) tensor([0.0761, 0.1619, 0.4266, 0.3354]) -Greedy action tensor([-1.8684, -0.4550, 0.7657, 0.1004]) tensor([0.0382, 0.1569, 0.5316, 0.2733]) -Greedy action tensor([-1.6072, 0.2263, 0.3760, 0.0240]) tensor([0.0509, 0.3187, 0.3701, 0.2603]) -Greedy action tensor([-1.8891, -0.4237, 0.6455, -0.1308]) tensor([0.0421, 0.1823, 0.5312, 0.2444]) -Greedy action tensor([-1.6493, 0.0238, 0.4496, -0.0170]) tensor([0.0510, 0.2718, 0.4161, 0.2610]) -Greedy action tensor([-1.4448, -0.4831, 0.5334, 0.3247]) tensor([0.0598, 0.1565, 0.4326, 0.3511]) -Greedy action tensor([-1.5347, -0.0674, 0.4654, -0.1119]) tensor([0.0593, 0.2570, 0.4379, 0.2458]) -Greedy action tensor([-1.7025, -0.1119, 0.4995, -0.0869]) tensor([0.0501, 0.2456, 0.4526, 0.2518]) -Greedy action tensor([ 0.9691, 0.2036, -0.2898, 0.1129]) tensor([0.4600, 0.2140, 0.1306, 0.1954]) -Greedy action tensor([ 1.7142, -1.0483, -0.2665, 1.0785]) tensor([0.5778, 0.0365, 0.0797, 0.3060]) -Greedy action tensor([ 1.5267, -0.3618, -0.2198, 0.5579]) tensor([0.5864, 0.0887, 0.1023, 0.2226]) -Greedy action tensor([ 1.4648, -0.4168, -0.7467, 0.5580]) tensor([0.6004, 0.0915, 0.0658, 0.2424]) -Greedy action tensor([ 1.3657, -0.7759, -0.3652, 0.4917]) tensor([0.5841, 0.0686, 0.1035, 0.2438]) -Greedy action tensor([ 1.0351, 0.0355, -0.3319, 0.5615]) tensor([0.4453, 0.1639, 0.1135, 0.2773]) -Greedy action tensor([ 2.0651, -1.0387, -0.3685, 0.7204]) tensor([0.7178, 0.0322, 0.0630, 0.1871]) -Greedy action tensor([ 1.3974, -0.3067, -0.5964, -0.0618]) tensor([0.6449, 0.1173, 0.0878, 0.1499]) -Greedy action tensor([ 1.9921, -0.9997, -0.1795, 0.5015]) tensor([0.7197, 0.0361, 0.0820, 0.1621]) -Greedy action tensor([ 1.1901, -0.5473, -0.5851, 0.4757]) tensor([0.5450, 0.0959, 0.0923, 0.2668]) -Greedy action tensor([ 1.5004, 0.1150, -0.1037, 0.2326]) tensor([0.5771, 0.1444, 0.1160, 0.1624]) -Greedy action tensor([ 0.2724, -0.2669, -0.0894, 0.0349]) tensor([0.3259, 0.1901, 0.2270, 0.2570]) -Greedy action tensor([ 1.8037, -0.9266, -0.4511, 0.4848]) tensor([0.6956, 0.0454, 0.0730, 0.1860]) -Greedy action tensor([ 1.8768, -1.1233, 0.0453, 0.1694]) tensor([0.7188, 0.0358, 0.1151, 0.1303]) -Greedy action tensor([ 2.5166, -0.7356, -0.4913, 0.7459]) tensor([0.7947, 0.0307, 0.0393, 0.1353]) -Greedy action tensor([ 1.7589, -0.3911, -0.0886, 0.2625]) tensor([0.6675, 0.0778, 0.1052, 0.1495]) -Greedy action tensor([ 1.0632, -0.4963, -0.2136, 0.6588]) tensor([0.4637, 0.0975, 0.1293, 0.3095]) -Greedy action tensor([ 1.8410, -0.9989, -0.1593, 0.7657]) tensor([0.6515, 0.0381, 0.0881, 0.2223]) -Greedy action tensor([ 0.8319, -0.3556, -0.7667, 0.8177]) tensor([0.4011, 0.1223, 0.0811, 0.3955]) -Greedy action tensor([ 1.1928, -0.3012, -0.0670, 0.2677]) tensor([0.5250, 0.1179, 0.1490, 0.2082]) -Greedy action tensor([ 1.1849, -0.1452, -0.1252, 0.2606]) tensor([0.5178, 0.1370, 0.1397, 0.2055]) -Greedy action tensor([ 1.7198, -1.0703, -0.0863, 0.3932]) tensor([0.6707, 0.0412, 0.1102, 0.1780]) -Greedy action tensor([ 2.3278, -1.3516, -0.0985, 0.5375]) tensor([0.7809, 0.0197, 0.0690, 0.1304]) -Greedy action tensor([ 1.3669, -0.0849, -0.5590, 0.5852]) tensor([0.5442, 0.1274, 0.0793, 0.2490]) -Greedy action tensor([ 1.2608, -0.5099, -0.2393, 0.2879]) tensor([0.5646, 0.0961, 0.1260, 0.2134]) -Greedy action tensor([ 1.7506, -0.1439, -0.1801, 0.2755]) tensor([0.6561, 0.0987, 0.0952, 0.1501]) -Greedy action tensor([ 1.4251, -0.5734, -0.1814, 0.3449]) tensor([0.5968, 0.0809, 0.1197, 0.2026]) -Greedy action tensor([ 1.2902, -0.7408, -0.1410, 0.2221]) tensor([0.5835, 0.0765, 0.1395, 0.2005]) -Greedy action tensor([ 1.2673, -0.5830, -0.1244, 0.3634]) tensor([0.5522, 0.0868, 0.1373, 0.2237]) -Greedy action tensor([ 0.6501, 0.0570, -0.3363, -0.0649]) tensor([0.4141, 0.2289, 0.1544, 0.2026]) -Greedy action tensor([ 2.2201, -0.9280, -0.2737, 0.6423]) tensor([0.7508, 0.0322, 0.0620, 0.1550]) -Greedy action tensor([ 1.5024, -0.4531, 0.0058, 0.4145]) tensor([0.5874, 0.0831, 0.1315, 0.1979]) -Greedy action tensor([ 0.9390, -0.4925, -0.3294, 0.2764]) tensor([0.4912, 0.1174, 0.1382, 0.2532]) -Greedy action tensor([ 1.3271, -0.5217, -0.1228, 0.0810]) tensor([0.5954, 0.0937, 0.1397, 0.1712]) -Greedy action tensor([ 2.4208, -1.5536, -0.0748, 0.2642]) tensor([0.8217, 0.0154, 0.0677, 0.0951]) -Greedy action tensor([ 0.7692, -0.2470, -0.1850, -0.3005]) tensor([0.4784, 0.1732, 0.1842, 0.1642]) -Greedy action tensor([ 1.7406, -0.7543, -0.0736, 0.1756]) tensor([0.6875, 0.0567, 0.1120, 0.1437]) -Greedy action tensor([ 1.3822, -0.3590, -0.6919, 0.3815]) tensor([0.5993, 0.1051, 0.0753, 0.2203]) -Greedy action tensor([ 1.1556, -0.2530, -0.4394, 0.0103]) tensor([0.5664, 0.1385, 0.1149, 0.1802]) -Greedy action tensor([ 1.5455, 0.3126, -0.3986, 0.2145]) tensor([0.5887, 0.1716, 0.0842, 0.1555]) -Greedy action tensor([ 1.5439, -0.2160, -0.3312, 0.8285]) tensor([0.5512, 0.0948, 0.0845, 0.2695]) -Greedy action tensor([ 1.3785, -0.6523, -0.0223, 0.4099]) tensor([0.5691, 0.0747, 0.1402, 0.2160]) -Greedy action tensor([ 0.7981, -0.1018, 0.0131, 0.0986]) tensor([0.4238, 0.1723, 0.1933, 0.2106]) -Greedy action tensor([ 0.6976, -0.1678, -0.2022, 0.1965]) tensor([0.4109, 0.1730, 0.1671, 0.2490]) -Greedy action tensor([ 1.1988, -0.6746, -0.1821, 0.0285]) tensor([0.5830, 0.0896, 0.1465, 0.1809]) -Greedy action tensor([ 1.9614, -0.1407, -0.7322, 0.6447]) tensor([0.6859, 0.0838, 0.0464, 0.1838]) -Greedy action tensor([ 1.5304, -0.5953, -0.2422, 0.3840]) tensor([0.6223, 0.0743, 0.1057, 0.1978]) -Greedy action tensor([ 2.0009, -0.6627, -0.2211, 0.7202]) tensor([0.6868, 0.0479, 0.0744, 0.1908]) -Greedy action tensor([ 1.0462, -0.6227, -0.4036, -0.1844]) tensor([0.5830, 0.1099, 0.1368, 0.1703]) -Greedy action tensor([ 1.0451, -0.3376, -0.6223, 0.4205]) tensor([0.5063, 0.1270, 0.0956, 0.2711]) -Greedy action tensor([ 0.8177, -0.5156, 0.2109, -0.1826]) tensor([0.4595, 0.1211, 0.2505, 0.1690]) -Greedy action tensor([ 1.2060, -0.8045, 0.2064, 0.3853]) tensor([0.5149, 0.0690, 0.1895, 0.2266]) -Greedy action tensor([ 1.6760, -0.5336, -0.5205, 0.6249]) tensor([0.6367, 0.0699, 0.0708, 0.2226]) -Greedy action tensor([ 2.6264, -0.3497, 0.4035, 0.0515]) tensor([0.8094, 0.0413, 0.0877, 0.0616]) -Greedy action tensor([2.3521, 0.5264, 0.1391, 0.0728]) tensor([0.7284, 0.1174, 0.0797, 0.0746]) -Greedy action tensor([ 1.7664, -1.0830, -0.1235, 0.4992]) tensor([0.6709, 0.0388, 0.1014, 0.1889]) -Greedy action tensor([ 2.5917, -1.4853, -0.2248, 0.1355]) tensor([0.8602, 0.0146, 0.0515, 0.0738]) -Greedy action tensor([ 1.6439, -0.5158, -0.4890, 0.2874]) tensor([0.6705, 0.0773, 0.0795, 0.1727]) -Greedy action tensor([ 1.9196, -0.9435, -0.2041, 0.6636]) tensor([0.6842, 0.0391, 0.0818, 0.1949]) -Greedy action tensor([ 2.3795, -0.5163, -0.5721, 0.6650]) tensor([0.7767, 0.0429, 0.0406, 0.1398]) -Greedy action tensor([ 1.5725, -0.2574, -0.1617, 0.7034]) tensor([0.5694, 0.0913, 0.1005, 0.2388]) -Greedy action tensor([ 1.7713, -0.8251, -0.2245, 0.3265]) tensor([0.6915, 0.0515, 0.0940, 0.1630]) -Greedy action tensor([ 1.5548, -0.4678, -0.2924, 0.0290]) tensor([0.6634, 0.0878, 0.1046, 0.1442]) -Greedy action tensor([ 1.3756, -0.8503, -0.2299, 0.3245]) tensor([0.6030, 0.0651, 0.1211, 0.2108]) -Greedy action tensor([ 1.2305, -0.1076, 0.0370, 0.1119]) tensor([0.5285, 0.1386, 0.1602, 0.1727]) -Greedy action tensor([ 0.9180, -0.4090, 0.0887, 0.0360]) tensor([0.4727, 0.1254, 0.2063, 0.1957]) -Greedy action tensor([ 1.3575, -0.2640, -0.3160, 0.0870]) tensor([0.6003, 0.1186, 0.1126, 0.1685]) -Greedy action tensor([ 0.5407, -0.0032, 0.1816, -0.3139]) tensor([0.3698, 0.2147, 0.2582, 0.1573]) -Greedy action tensor([ 1.3379, -0.2015, -0.1373, 0.1864]) tensor([0.5684, 0.1219, 0.1300, 0.1797]) -Greedy action tensor([ 1.2533, -0.2241, -0.8121, 0.2197]) tensor([0.5846, 0.1334, 0.0741, 0.2079]) -Greedy action tensor([ 0.7653, -0.4415, -0.2404, 0.0218]) tensor([0.4672, 0.1398, 0.1709, 0.2221]) -Greedy action tensor([ 1.4732, -0.5050, -0.3169, 0.0781]) tensor([0.6439, 0.0891, 0.1075, 0.1596]) -Greedy action tensor([ 1.3995, -0.2821, -0.5308, 0.4576]) tensor([0.5810, 0.1081, 0.0843, 0.2265]) -Greedy action tensor([ 1.9783, -0.8925, -0.1561, 0.9687]) tensor([0.6496, 0.0368, 0.0769, 0.2367]) -Greedy action tensor([ 1.3444, -0.2970, -1.1004, 0.3557]) tensor([0.6051, 0.1172, 0.0525, 0.2251]) -Greedy action tensor([ 1.3257, -0.4891, -0.6862, 1.1508]) tensor([0.4681, 0.0762, 0.0626, 0.3930]) -Greedy action tensor([ 1.3969, -0.4791, -0.4448, 0.3796]) tensor([0.5976, 0.0916, 0.0948, 0.2161]) -Greedy action tensor([ 2.1613, -0.4813, -0.3725, 0.4047]) tensor([0.7558, 0.0538, 0.0600, 0.1305]) -Greedy action tensor([ 1.7028, -0.7833, -0.1031, 0.5272]) tensor([0.6426, 0.0535, 0.1056, 0.1983]) -Greedy action tensor([ 0.6548, -0.2683, -0.5499, 0.6181]) tensor([0.3758, 0.1493, 0.1127, 0.3622]) -Greedy action tensor([ 1.5131, -0.1709, -0.3884, 0.6782]) tensor([0.5653, 0.1049, 0.0844, 0.2453]) -Greedy action tensor([ 0.7820, -0.6184, -0.0779, -0.3664]) tensor([0.5033, 0.1241, 0.2130, 0.1596]) -Greedy action tensor([ 0.3038, 0.0828, -0.0046, -0.3034]) tensor([0.3246, 0.2602, 0.2384, 0.1768]) -Greedy action tensor([ 0.5800, -0.5731, -0.1481, -0.1500]) tensor([0.4385, 0.1384, 0.2117, 0.2113]) -Greedy action tensor([ 0.4054, 0.0762, 0.0117, -0.1387]) tensor([0.3362, 0.2419, 0.2268, 0.1951]) -Greedy action tensor([ 0.3682, -0.0726, -0.1342, 0.0221]) tensor([0.3383, 0.2177, 0.2047, 0.2393]) -Greedy action tensor([ 0.7789, -0.3994, 0.0700, -0.4324]) tensor([0.4767, 0.1467, 0.2346, 0.1420]) -Greedy action tensor([ 0.8392, -0.5661, -0.2293, -0.7679]) tensor([0.5589, 0.1371, 0.1920, 0.1120]) -Greedy action tensor([ 0.3239, 0.2130, -0.0950, 0.1016]) tensor([0.2982, 0.2669, 0.1962, 0.2388]) -Greedy action tensor([ 0.5463, -0.3820, -0.0821, -0.1815]) tensor([0.4146, 0.1639, 0.2212, 0.2003]) -Greedy action tensor([ 0.1601, 0.1425, -0.2018, -0.3424]) tensor([0.3045, 0.2992, 0.2121, 0.1842]) -Greedy action tensor([ 0.8216, -0.4239, -0.0938, -0.2273]) tensor([0.4906, 0.1412, 0.1964, 0.1719]) -Greedy action tensor([ 0.5660, -0.2597, -0.0774, -0.0230]) tensor([0.3971, 0.1739, 0.2087, 0.2203]) -Greedy action tensor([ 1.2210, -0.6618, 0.0707, -0.6558]) tensor([0.6166, 0.0938, 0.1952, 0.0944]) -Greedy action tensor([ 0.2919, -0.0650, -0.0819, -0.1628]) tensor([0.3308, 0.2315, 0.2277, 0.2100]) -Greedy action tensor([ 0.7460, -0.9311, -0.0658, -0.5179]) tensor([0.5226, 0.0977, 0.2321, 0.1477]) -Greedy action tensor([ 0.5391, -0.3911, 0.0946, -0.3551]) tensor([0.4091, 0.1614, 0.2623, 0.1673]) -Greedy action tensor([ 0.6752, -0.5045, -0.0195, -0.3409]) tensor([0.4611, 0.1417, 0.2302, 0.1669]) -Greedy action tensor([ 0.8140, -0.4780, -0.0204, -0.5183]) tensor([0.5069, 0.1393, 0.2201, 0.1338]) -Greedy action tensor([ 0.6546, -0.1719, -0.0171, -0.0424]) tensor([0.4087, 0.1789, 0.2088, 0.2036]) -Greedy action tensor([ 0.6636, -0.5388, 0.1857, -0.9272]) tensor([0.4707, 0.1414, 0.2919, 0.0959]) -Greedy action tensor([ 0.1601, -0.0832, -0.1468, -0.3198]) tensor([0.3186, 0.2498, 0.2344, 0.1972]) -Greedy action tensor([ 0.9879, -0.8368, 0.0014, -0.4474]) tensor([0.5643, 0.0910, 0.2104, 0.1343]) -Greedy action tensor([ 0.8643, -0.6130, 0.0090, -0.5103]) tensor([0.5246, 0.1197, 0.2230, 0.1327]) -Greedy action tensor([ 0.8226, -0.4552, -0.2092, -0.2411]) tensor([0.5050, 0.1407, 0.1800, 0.1743]) -Greedy action tensor([ 0.9109, -0.5538, -0.0662, -0.6025]) tensor([0.5471, 0.1265, 0.2059, 0.1204]) -Greedy action tensor([ 0.8195, -0.6770, 0.0033, -0.3640]) tensor([0.5070, 0.1135, 0.2242, 0.1553]) -Greedy action tensor([ 0.7850, 0.0377, 0.1026, -0.0871]) tensor([0.4172, 0.1976, 0.2108, 0.1744]) -Greedy action tensor([ 0.8850, -0.5599, -0.0568, -0.2914]) tensor([0.5170, 0.1219, 0.2016, 0.1594]) -Greedy action tensor([ 0.7641, -0.6197, -0.0619, -0.5789]) tensor([0.5130, 0.1286, 0.2246, 0.1339]) -Greedy action tensor([ 0.9063, -0.3484, -0.0685, -0.1455]) tensor([0.4971, 0.1417, 0.1875, 0.1736]) -Greedy action tensor([ 0.6804, -0.3397, 0.0741, -0.5738]) tensor([0.4564, 0.1645, 0.2489, 0.1302]) -Greedy action tensor([ 0.6534, -0.3842, -0.1192, -0.3701]) tensor([0.4597, 0.1629, 0.2123, 0.1652]) -Greedy action tensor([ 0.6843, -0.1786, -0.1434, -0.1841]) tensor([0.4388, 0.1852, 0.1918, 0.1842]) -Greedy action tensor([ 1.1591, -0.6879, -0.0487, -0.3732]) tensor([0.5979, 0.0943, 0.1787, 0.1292]) -Greedy action tensor([ 0.6912, -0.4361, -0.1397, -0.4985]) tensor([0.4845, 0.1569, 0.2111, 0.1474]) -Greedy action tensor([ 0.3641, -0.2174, -0.1295, -0.1932]) tensor([0.3647, 0.2039, 0.2226, 0.2089]) -Greedy action tensor([ 0.4215, -0.2893, -0.0440, -0.1565]) tensor([0.3731, 0.1833, 0.2343, 0.2093]) -Greedy action tensor([ 0.6795, -0.1282, -0.0047, -0.3114]) tensor([0.4307, 0.1921, 0.2173, 0.1599]) -Greedy action tensor([ 0.9206, -0.8849, 0.1331, -0.5328]) tensor([0.5396, 0.0887, 0.2455, 0.1261]) -Greedy action tensor([ 6.8779e-01, -5.5209e-04, 3.9293e-02, -3.0698e-01]) tensor([0.4175, 0.2098, 0.2183, 0.1544]) -Greedy action tensor([ 0.2981, 0.0222, 0.0081, -0.3242]) tensor([0.3285, 0.2493, 0.2458, 0.1763]) -Greedy action tensor([ 0.7275, -0.3763, -0.0781, -0.1735]) tensor([0.4578, 0.1518, 0.2045, 0.1859]) -Greedy action tensor([ 1.0601, -0.3923, -0.1647, -0.2626]) tensor([0.5573, 0.1304, 0.1638, 0.1485]) -Greedy action tensor([ 0.7810, -0.6685, -0.0303, -0.2446]) tensor([0.4908, 0.1152, 0.2180, 0.1760]) -Greedy action tensor([ 0.6471, -0.2316, -0.1831, -0.2701]) tensor([0.4443, 0.1845, 0.1937, 0.1776]) -Greedy action tensor([ 0.4868, -0.2369, 0.0555, -0.2991]) tensor([0.3860, 0.1872, 0.2508, 0.1759]) -Greedy action tensor([ 0.9454, -0.3735, -0.0858, -0.2978]) tensor([0.5229, 0.1398, 0.1865, 0.1508]) -Greedy action tensor([ 0.5217, -0.1943, -0.0019, -0.1957]) tensor([0.3893, 0.1902, 0.2306, 0.1900]) -Greedy action tensor([ 0.2066, 0.0245, -0.1124, -0.4020]) tensor([0.3221, 0.2685, 0.2341, 0.1753]) -Greedy action tensor([ 0.8115, -0.6521, -0.0295, -0.2654]) tensor([0.4992, 0.1155, 0.2153, 0.1700]) -Greedy action tensor([ 0.5955, 0.0785, -0.2195, -0.1779]) tensor([0.3999, 0.2385, 0.1770, 0.1846]) -Greedy action tensor([ 0.6068, -0.6743, -0.1362, -0.3440]) tensor([0.4673, 0.1298, 0.2223, 0.1806]) -Greedy action tensor([ 0.4718, -0.1934, 0.0125, -0.5286]) tensor([0.3978, 0.2046, 0.2513, 0.1463]) -Greedy action tensor([ 0.4538, -0.1855, -0.0686, -0.1128]) tensor([0.3720, 0.1963, 0.2206, 0.2111]) -Greedy action tensor([ 0.5318, -0.4617, -0.0123, -0.3480]) tensor([0.4227, 0.1565, 0.2454, 0.1754]) -Greedy action tensor([ 0.6188, -0.5708, -0.1002, -0.2297]) tensor([0.4505, 0.1371, 0.2195, 0.1929]) -Greedy action tensor([ 0.5471, -0.5101, -0.1204, -0.0090]) tensor([0.4109, 0.1428, 0.2108, 0.2356]) -Greedy action tensor([ 1.0634, -0.5534, -0.0895, -0.4151]) tensor([0.5740, 0.1140, 0.1812, 0.1309]) -Greedy action tensor([ 0.2560, 0.4103, 0.0062, -0.7017]) tensor([0.3003, 0.3504, 0.2339, 0.1153]) -Greedy action tensor([ 0.4030, 0.1030, -0.0562, -0.1385]) tensor([0.3385, 0.2507, 0.2138, 0.1969]) -Greedy action tensor([ 0.7125, -0.6036, 0.0640, -0.7521]) tensor([0.4945, 0.1326, 0.2585, 0.1143]) -Greedy action tensor([ 0.8645, -0.3517, -0.0771, -0.3684]) tensor([0.5056, 0.1498, 0.1972, 0.1474]) -Greedy action tensor([ 0.7765, -0.4947, -0.0040, -0.4639]) tensor([0.4931, 0.1383, 0.2259, 0.1426]) -Greedy action tensor([ 0.9464, -0.5747, 0.0303, -0.3676]) tensor([0.5299, 0.1158, 0.2120, 0.1424]) -Greedy action tensor([ 0.3238, 0.2050, -0.2144, -0.1565]) tensor([0.3236, 0.2873, 0.1889, 0.2002]) -Greedy action tensor([ 0.5388, -0.1749, -0.0625, 0.0560]) tensor([0.3767, 0.1845, 0.2064, 0.2324]) -Greedy action tensor([ 1.1119, -0.6566, -0.1668, -0.4647]) tensor([0.6040, 0.1030, 0.1682, 0.1248]) -Greedy action tensor([ 0.5718, -0.2400, -0.1202, -0.3939]) tensor([0.4300, 0.1910, 0.2153, 0.1637]) -Greedy action tensor([ 0.6134, -0.4724, 0.0150, -0.3488]) tensor([0.4407, 0.1488, 0.2422, 0.1683]) -Greedy action tensor([ 0.6170, -0.7856, -0.0877, -0.4009]) tensor([0.4758, 0.1170, 0.2352, 0.1719]) -Greedy action tensor([ 1.2279, -0.4403, -0.0183, -0.3404]) tensor([0.5936, 0.1119, 0.1707, 0.1237]) -Greedy action tensor([ 0.6798, -0.3994, 0.0320, -0.3083]) tensor([0.4474, 0.1520, 0.2341, 0.1665]) -Greedy action tensor([ 0.7146, -0.4966, -0.2202, -0.3580]) tensor([0.4920, 0.1465, 0.1932, 0.1683]) -Greedy action tensor([ 0.6525, -0.3391, 0.1093, -0.3443]) tensor([0.4309, 0.1598, 0.2503, 0.1590]) -Greedy action tensor([ 0.8415, -0.8239, -0.0672, -0.6466]) tensor([0.5501, 0.1040, 0.2217, 0.1242]) -Greedy action tensor([ 0.7945, -0.5055, -0.1130, -0.5654]) tensor([0.5174, 0.1410, 0.2088, 0.1328]) -Greedy action tensor([ 0.5947, -0.1027, -0.1325, -0.3724]) tensor([0.4235, 0.2108, 0.2047, 0.1610]) -Greedy action tensor([ 0.2850, 0.2662, 0.2047, -0.4942]) tensor([0.2974, 0.2918, 0.2744, 0.1364]) -Greedy action tensor([ 0.5173, 0.0839, 0.0156, -0.0066]) tensor([0.3514, 0.2278, 0.2128, 0.2081]) -Greedy action tensor([ 0.6451, 0.0984, -0.0630, -0.1642]) tensor([0.3974, 0.2300, 0.1957, 0.1769]) -Greedy action tensor([ 0.8314, -0.5176, -0.0324, -0.4241]) tensor([0.5086, 0.1320, 0.2144, 0.1449]) -Greedy action tensor([-0.0863, -0.5533, 0.6380, 0.1075]) tensor([0.2039, 0.1278, 0.4207, 0.2475]) -Greedy action tensor([ 0.4166, -1.2711, -0.8034, 0.7426]) tensor([0.3490, 0.0645, 0.1030, 0.4834]) -Greedy action tensor([ 0.2374, -0.1237, 0.6942, 0.1019]) tensor([0.2410, 0.1680, 0.3806, 0.2105]) -Greedy action tensor([ 1.1388, -1.1470, 0.5817, -0.0920]) tensor([0.5085, 0.0517, 0.2913, 0.1485]) -Greedy action tensor([ 0.4551, -0.7657, 0.3625, 0.3858]) tensor([0.3185, 0.0940, 0.2903, 0.2972]) -Greedy action tensor([-1.9384, -0.8651, 0.7563, -0.7123]) tensor([0.0452, 0.1321, 0.6687, 0.1540]) -Greedy action tensor([ 0.1638, -0.4999, 0.8316, -0.1810]) tensor([0.2396, 0.1234, 0.4672, 0.1697]) -Greedy action tensor([-0.1654, -2.0204, -0.0839, 0.7591]) tensor([0.2100, 0.0329, 0.2278, 0.5293]) -Greedy action tensor([ 0.2568, -0.6625, 1.4789, -0.8376]) tensor([0.1950, 0.0778, 0.6619, 0.0653]) -Greedy action tensor([ 0.2494, -0.7463, 1.2763, 0.6501]) tensor([0.1768, 0.0653, 0.4938, 0.2640]) -Greedy action tensor([-0.0015, -1.2946, 0.5248, -0.6045]) tensor([0.2846, 0.0781, 0.4817, 0.1557]) -Greedy action tensor([ 0.2973, -0.0821, 1.0021, 0.3135]) tensor([0.2117, 0.1448, 0.4283, 0.2151]) -Greedy action tensor([ 0.6267, -0.1119, 0.9068, -0.3609]) tensor([0.3151, 0.1506, 0.4170, 0.1174]) -Greedy action tensor([ 0.8510, -1.1695, 0.4296, 1.0764]) tensor([0.3288, 0.0436, 0.2157, 0.4119]) -Greedy action tensor([-0.4928, -1.0288, 0.0938, -0.7248]) tensor([0.2395, 0.1401, 0.4305, 0.1899]) -Greedy action tensor([-0.4164, -0.5777, -0.1301, -0.6852]) tensor([0.2534, 0.2156, 0.3373, 0.1936]) -Greedy action tensor([ 1.0971, -1.0690, -0.0258, 0.7262]) tensor([0.4695, 0.0538, 0.1527, 0.3240]) -Greedy action tensor([-0.7256, -1.3943, 0.5107, 0.7512]) tensor([0.1071, 0.0549, 0.3688, 0.4691]) -Greedy action tensor([-0.5284, -1.1213, 1.0484, -0.5374]) tensor([0.1354, 0.0749, 0.6555, 0.1342]) -Greedy action tensor([ 0.6191, 0.3716, -0.3062, -0.4240]) tensor([0.3953, 0.3087, 0.1567, 0.1393]) -Greedy action tensor([ 0.8469, 0.7339, 0.3825, -0.4124]) tensor([0.3565, 0.3183, 0.2240, 0.1012]) -Greedy action tensor([ 1.1185, -0.8071, -0.1198, 1.8506]) tensor([0.2845, 0.0415, 0.0825, 0.5916]) -Greedy action tensor([-0.1673, -0.1696, -0.7603, 0.0866]) tensor([0.2605, 0.2599, 0.1439, 0.3357]) -Greedy action tensor([ 1.1984, -0.6314, 1.0543, 0.4424]) tensor([0.4007, 0.0643, 0.3469, 0.1881]) -Greedy action tensor([ 1.1972, -0.1430, -0.6646, -0.1805]) tensor([0.5990, 0.1568, 0.0931, 0.1511]) -Greedy action tensor([-0.0312, -0.4564, -0.6548, -0.2023]) tensor([0.3298, 0.2156, 0.1768, 0.2779]) -Greedy action tensor([ 1.7093, 0.0355, -0.5055, 0.8337]) tensor([0.5837, 0.1095, 0.0637, 0.2432]) -Greedy action tensor([-0.5533, 0.2851, -0.2645, 0.2498]) tensor([0.1453, 0.3362, 0.1940, 0.3245]) -Greedy action tensor([ 0.9803, 0.1685, -0.0184, 0.6542]) tensor([0.3946, 0.1752, 0.1454, 0.2848]) -Greedy action tensor([-0.4533, -0.2952, -0.6632, 0.9661]) tensor([0.1405, 0.1646, 0.1139, 0.5810]) -Greedy action tensor([-1.3193, -0.9892, 0.4017, -0.0826]) tensor([0.0875, 0.1218, 0.4893, 0.3014]) -Greedy action tensor([ 0.2082, -0.7577, 1.3939, 0.4779]) tensor([0.1677, 0.0638, 0.5489, 0.2196]) -Greedy action tensor([-0.6021, -1.4173, 0.2598, 0.5637]) tensor([0.1425, 0.0631, 0.3373, 0.4571]) -Greedy action tensor([0.6790, 0.0472, 0.9244, 0.3841]) tensor([0.2813, 0.1496, 0.3596, 0.2095]) -Greedy action tensor([-0.0163, -1.1550, 0.3403, -0.1156]) tensor([0.2737, 0.0876, 0.3909, 0.2478]) -Greedy action tensor([ 0.1154, -1.1100, -0.3952, -0.1565]) tensor([0.3765, 0.1106, 0.2260, 0.2869]) -Greedy action tensor([-0.4587, -0.6358, 0.1921, 0.7446]) tensor([0.1411, 0.1182, 0.2706, 0.4701]) -Greedy action tensor([ 0.3978, -0.6265, -0.3948, 0.8770]) tensor([0.2918, 0.1048, 0.1321, 0.4713]) -Greedy action tensor([-0.2833, -0.3719, -0.1594, -0.3226]) tensor([0.2495, 0.2283, 0.2824, 0.2399]) -Greedy action tensor([ 0.0187, -0.5098, -0.8546, -0.4944]) tensor([0.3838, 0.2262, 0.1603, 0.2297]) -Greedy action tensor([ 0.9594, -0.3218, 1.1550, 1.3975]) tensor([0.2473, 0.0687, 0.3007, 0.3833]) -Greedy action tensor([0.8941, 0.9281, 0.4476, 0.3050]) tensor([0.3097, 0.3204, 0.1981, 0.1718]) -Greedy action tensor([-0.6077, -1.5645, 1.8149, -0.7557]) tensor([0.0740, 0.0284, 0.8339, 0.0638]) -Greedy action tensor([ 0.8675, -0.3293, -0.7368, 1.1417]) tensor([0.3548, 0.1072, 0.0713, 0.4667]) -Greedy action tensor([ 0.5880, -0.1040, -0.6339, 0.2461]) tensor([0.3991, 0.1998, 0.1176, 0.2835]) -Greedy action tensor([ 0.0841, -0.1688, 0.7132, 0.4656]) tensor([0.1954, 0.1518, 0.3666, 0.2862]) -Greedy action tensor([-1.0990, 0.4672, -0.2683, -1.1986]) tensor([0.1113, 0.5327, 0.2553, 0.1007]) -Greedy action tensor([-2.0559, 0.3360, -0.3935, -0.7208]) tensor([0.0476, 0.5205, 0.2510, 0.1809]) -Greedy action tensor([ 1.1032, 0.9019, 0.2799, -1.0609]) tensor([0.4217, 0.3448, 0.1851, 0.0484]) -Greedy action tensor([-0.2168, 0.5313, -0.7177, 1.7589]) tensor([0.0915, 0.1933, 0.0554, 0.6598]) -Greedy action tensor([ 1.1564, -0.1688, -0.3214, -0.3721]) tensor([0.5845, 0.1553, 0.1334, 0.1268]) -Greedy action tensor([0.9714, 0.0387, 0.5688, 0.5806]) tensor([0.3652, 0.1437, 0.2441, 0.2470]) -Greedy action tensor([-0.9482, -1.0909, -0.8622, -0.1237]) tensor([0.1909, 0.1655, 0.2081, 0.4355]) -Greedy action tensor([-0.4129, 0.7318, -0.6236, -1.4049]) tensor([0.1879, 0.5903, 0.1522, 0.0697]) -Greedy action tensor([ 0.9960, 1.0912, 1.3910, -0.2963]) tensor([0.2591, 0.2850, 0.3847, 0.0712]) -Greedy action tensor([ 0.3837, -0.6263, 0.6457, 0.4450]) tensor([0.2683, 0.0977, 0.3487, 0.2853]) -Greedy action tensor([-0.6690, -0.7574, 0.4594, 0.1328]) tensor([0.1382, 0.1265, 0.4271, 0.3081]) -Greedy action tensor([-0.7229, -0.3113, 0.0499, -0.3142]) tensor([0.1618, 0.2442, 0.3505, 0.2435]) -Greedy action tensor([-0.0195, -0.9735, 0.7145, -1.1592]) tensor([0.2640, 0.1017, 0.5499, 0.0844]) -Greedy action tensor([ 0.6893, -1.1161, -0.0821, -0.0130]) tensor([0.4712, 0.0775, 0.2179, 0.2335]) -Greedy action tensor([ 0.9284, -0.0965, 0.1848, 0.6469]) tensor([0.3863, 0.1386, 0.1836, 0.2915]) -Greedy action tensor([ 0.4066, -1.4408, -0.0344, 0.2582]) tensor([0.3755, 0.0592, 0.2416, 0.3237]) -Greedy action tensor([0.6669, 0.7471, 0.4778, 0.6608]) tensor([0.2561, 0.2775, 0.2119, 0.2545]) -Greedy action tensor([ 0.2988, -1.3823, -0.3815, -0.0744]) tensor([0.4200, 0.0782, 0.2127, 0.2891]) -Greedy action tensor([ 0.8461, -0.5427, 0.1680, 0.3648]) tensor([0.4211, 0.1050, 0.2137, 0.2602]) -Greedy action tensor([ 1.2699, -0.0751, 1.1326, 0.3123]) tensor([0.3975, 0.1035, 0.3465, 0.1525]) -Greedy action tensor([-0.6923, -1.7916, 0.5472, 0.4622]) tensor([0.1256, 0.0419, 0.4339, 0.3986]) -Greedy action tensor([-0.2404, -0.3562, 0.5954, -0.1101]) tensor([0.1874, 0.1669, 0.4323, 0.2135]) -Greedy action tensor([ 0.4110, -0.3792, 0.7133, 0.2384]) tensor([0.2741, 0.1244, 0.3709, 0.2307]) -Greedy action tensor([-0.3466, -0.4452, 0.5529, 0.2023]) tensor([0.1640, 0.1486, 0.4033, 0.2840]) -Greedy action tensor([ 0.1594, -2.0196, -0.3009, -0.2209]) tensor([0.4119, 0.0466, 0.2599, 0.2816]) -Greedy action tensor([ 1.0623, -0.9813, 0.0403, 1.0320]) tensor([0.4066, 0.0527, 0.1463, 0.3944]) -Greedy action tensor([ 0.0325, -1.7603, 0.1830, 0.1259]) tensor([0.2918, 0.0486, 0.3392, 0.3204]) -Greedy action tensor([ 0.5195, -0.6750, -0.0444, 0.3081]) tensor([0.3729, 0.1130, 0.2122, 0.3019]) -Greedy action tensor([ 1.1108, 0.2183, -0.1175, 0.9096]) tensor([0.3968, 0.1625, 0.1162, 0.3245]) -Greedy action tensor([ 0.4144, -1.1732, 0.4957, 0.3940]) tensor([0.3059, 0.0625, 0.3318, 0.2997]) -Greedy action tensor([ 0.7912, -0.9552, -0.6488, -0.2532]) tensor([0.5671, 0.0989, 0.1344, 0.1996]) -Greedy action tensor([ 1.3878, -0.3176, 1.0434, 1.1407]) tensor([0.3743, 0.0680, 0.2653, 0.2924]) -Greedy action tensor([ 0.9884, -1.8699, 0.5023, 1.8291]) tensor([0.2506, 0.0144, 0.1541, 0.5809]) -Greedy action tensor([ 0.0894, -0.5250, -0.9123, -0.5397]) tensor([0.4096, 0.2216, 0.1504, 0.2184]) -Greedy action tensor([ 0.7808, -0.5442, 1.1250, 0.5477]) tensor([0.2883, 0.0766, 0.4067, 0.2284]) -Greedy action tensor([0.8225, 0.2425, 0.3247, 0.1578]) tensor([0.3728, 0.2087, 0.2266, 0.1918]) -Greedy action tensor([-1.9501, -0.7414, 0.8933, 0.2142]) tensor([0.0331, 0.1108, 0.5681, 0.2881]) -Greedy action tensor([-1.8772, -0.4639, 0.6271, -0.1300]) tensor([0.0433, 0.1780, 0.5300, 0.2486]) -Greedy action tensor([-1.3020, -0.4771, 0.7897, 0.9519]) tensor([0.0478, 0.1091, 0.3874, 0.4556]) -Greedy action tensor([-1.8961, -0.4561, 0.6471, -0.1556]) tensor([0.0423, 0.1785, 0.5381, 0.2411]) -Greedy action tensor([-1.7661, -0.4811, 0.7636, 0.1467]) tensor([0.0418, 0.1510, 0.5243, 0.2829]) -Greedy action tensor([-1.5884, -0.2201, 0.4253, 0.0594]) tensor([0.0568, 0.2230, 0.4253, 0.2949]) -Greedy action tensor([-1.1685, -0.3429, 0.3207, 0.0805]) tensor([0.0893, 0.2038, 0.3957, 0.3112]) -Greedy action tensor([-1.6874e+00, -5.3386e-01, 5.3778e-01, 1.3809e-03]) tensor([0.0531, 0.1683, 0.4913, 0.2873]) -Greedy action tensor([-1.8839, -0.4552, 0.6327, -0.1490]) tensor([0.0431, 0.1797, 0.5333, 0.2440]) -Greedy action tensor([-1.9339, -0.4362, 0.6612, -0.1743]) tensor([0.0405, 0.1812, 0.5429, 0.2354]) -Greedy action tensor([-1.9198, -0.4269, 0.6606, -0.1540]) tensor([0.0408, 0.1816, 0.5389, 0.2386]) -Greedy action tensor([-1.7653, -0.4245, 0.5806, -0.0862]) tensor([0.0485, 0.1853, 0.5063, 0.2599]) -Greedy action tensor([0.0831, 0.4758, 0.1303, 0.2727]) tensor([0.2110, 0.3126, 0.2213, 0.2551]) -Greedy action tensor([-1.9317, -0.4389, 0.6603, -0.1731]) tensor([0.0406, 0.1808, 0.5427, 0.2359]) -Greedy action tensor([-1.9043, -0.3590, 0.6409, -0.1480]) tensor([0.0413, 0.1936, 0.5261, 0.2390]) -Greedy action tensor([-1.8571, -0.0354, 0.5659, -0.0954]) tensor([0.0412, 0.2546, 0.4645, 0.2398]) -Greedy action tensor([-1.6926, -0.4960, 0.5680, 0.0725]) tensor([0.0507, 0.1676, 0.4858, 0.2960]) -Greedy action tensor([-1.5759, -0.4162, 0.5938, 0.1825]) tensor([0.0533, 0.1701, 0.4670, 0.3095]) -Greedy action tensor([-1.9061, -0.4176, 0.6443, -0.1636]) tensor([0.0417, 0.1850, 0.5349, 0.2384]) -Greedy action tensor([-1.8400, -0.4410, 0.6147, -0.1284]) tensor([0.0450, 0.1822, 0.5237, 0.2491]) -Greedy action tensor([-1.9176, -0.3070, 0.6316, -0.1593]) tensor([0.0406, 0.2034, 0.5201, 0.2358]) -Greedy action tensor([0.3979, 0.8758, 0.3938, 1.0881]) tensor([0.1785, 0.2878, 0.1778, 0.3559]) -Greedy action tensor([-1.7409, -0.5127, 0.5657, -0.1073]) tensor([0.0511, 0.1744, 0.5128, 0.2616]) -Greedy action tensor([-1.4690, -0.5937, 0.4114, 0.1360]) tensor([0.0670, 0.1607, 0.4390, 0.3333]) -Greedy action tensor([-1.4064, -0.7170, 0.9824, 0.7714]) tensor([0.0440, 0.0877, 0.4798, 0.3885]) -Greedy action tensor([-1.8345, -0.4310, 0.6120, -0.1286]) tensor([0.0452, 0.1839, 0.5220, 0.2489]) -Greedy action tensor([-1.7584, -0.4940, 0.5818, -0.0738]) tensor([0.0492, 0.1743, 0.5111, 0.2653]) -Greedy action tensor([-1.8879, -0.3744, 0.6253, -0.1430]) tensor([0.0424, 0.1924, 0.5228, 0.2425]) -Greedy action tensor([-1.8652, -0.1979, 0.5941, -0.1180]) tensor([0.0421, 0.2232, 0.4928, 0.2418]) -Greedy action tensor([-1.7345, -0.2909, 0.5375, -0.0248]) tensor([0.0489, 0.2070, 0.4740, 0.2701]) -Greedy action tensor([-1.9370, -0.4491, 0.6637, -0.1763]) tensor([0.0405, 0.1791, 0.5451, 0.2353]) -Greedy action tensor([-1.9087, -0.4200, 0.6444, -0.1584]) tensor([0.0416, 0.1844, 0.5345, 0.2395]) -Greedy action tensor([-1.8979, -0.4764, 0.6440, -0.1568]) tensor([0.0425, 0.1759, 0.5394, 0.2422]) -Greedy action tensor([-1.8249, -0.3239, 0.6391, -0.0242]) tensor([0.0429, 0.1926, 0.5045, 0.2599]) -Greedy action tensor([-1.4314, -0.7081, 0.3892, 0.1545]) tensor([0.0708, 0.1460, 0.4373, 0.3459]) -Greedy action tensor([-1.8189, -0.4134, 0.6602, -0.1139]) tensor([0.0444, 0.1812, 0.5300, 0.2444]) -Greedy action tensor([-1.3659, -0.3192, 0.3849, 0.1548]) tensor([0.0705, 0.2008, 0.4061, 0.3226]) -Greedy action tensor([-1.8330, -0.4490, 0.6191, -0.1135]) tensor([0.0451, 0.1799, 0.5235, 0.2516]) -Greedy action tensor([-1.8368, -0.4942, 0.6166, -0.1232]) tensor([0.0454, 0.1740, 0.5284, 0.2521]) -Greedy action tensor([-1.7980, -0.4953, 0.6354, -0.0815]) tensor([0.0462, 0.1700, 0.5267, 0.2571]) -Greedy action tensor([-1.8566, -0.3945, 0.6259, -0.1094]) tensor([0.0434, 0.1874, 0.5199, 0.2492]) -Greedy action tensor([-1.8865, -0.0539, 0.5763, -0.1491]) tensor([0.0405, 0.2534, 0.4758, 0.2303]) -Greedy action tensor([-1.9264, -0.4362, 0.6549, -0.1726]) tensor([0.0409, 0.1817, 0.5409, 0.2365]) -Greedy action tensor([-1.8454, -0.2477, 0.5909, -0.1341]) tensor([0.0437, 0.2157, 0.4990, 0.2417]) -Greedy action tensor([-1.7260, -0.3942, 0.5664, -0.0375]) tensor([0.0498, 0.1885, 0.4925, 0.2692]) -Greedy action tensor([-1.6610, -0.4952, 0.5607, 0.1137]) tensor([0.0517, 0.1660, 0.4771, 0.3052]) -Greedy action tensor([-1.9244, -0.4503, 0.6592, -0.1672]) tensor([0.0410, 0.1789, 0.5426, 0.2375]) -Greedy action tensor([-0.6452, 0.9336, 0.0523, 0.3434]) tensor([0.0948, 0.4598, 0.1905, 0.2549]) -Greedy action tensor([-1.9152, -0.3295, 0.6381, -0.1624]) tensor([0.0408, 0.1993, 0.5244, 0.2355]) -Greedy action tensor([-1.3005, -0.2667, 0.5212, 0.3436]) tensor([0.0659, 0.1853, 0.4075, 0.3412]) -Greedy action tensor([-1.8660, -0.2618, 0.6107, -0.1207]) tensor([0.0424, 0.2107, 0.5042, 0.2427]) -Greedy action tensor([-1.0740, -0.0733, 0.1548, 0.1981]) tensor([0.0934, 0.2541, 0.3192, 0.3333]) -Greedy action tensor([-1.6067, -0.5514, 0.4969, 0.0203]) tensor([0.0583, 0.1674, 0.4777, 0.2966]) -Greedy action tensor([-1.8334, -0.4581, 0.6202, -0.1225]) tensor([0.0452, 0.1789, 0.5258, 0.2502]) -Greedy action tensor([-1.9389, -0.4387, 0.6633, -0.1763]) tensor([0.0403, 0.1807, 0.5440, 0.2350]) -Greedy action tensor([-1.3119, 0.5687, 0.2166, 0.1275]) tensor([0.0610, 0.4002, 0.2814, 0.2574]) -Greedy action tensor([-1.6421, -0.5660, 0.5205, -0.0376]) tensor([0.0568, 0.1666, 0.4939, 0.2827]) -Greedy action tensor([-0.9598, 0.7630, 0.2833, -0.3618]) tensor([0.0841, 0.4712, 0.2917, 0.1530]) -Greedy action tensor([-1.5480, -0.6025, 0.4807, -0.0032]) tensor([0.0630, 0.1622, 0.4793, 0.2954]) -Greedy action tensor([-1.7832, -0.1646, 0.5143, -0.1381]) tensor([0.0472, 0.2383, 0.4698, 0.2447]) -Greedy action tensor([-1.7458, -0.5150, 0.6131, -0.0994]) tensor([0.0495, 0.1696, 0.5240, 0.2569]) -Greedy action tensor([-1.8228, -0.3557, 0.5938, -0.0945]) tensor([0.0451, 0.1956, 0.5054, 0.2539]) -Greedy action tensor([-0.7591, -0.5881, 0.1543, 0.3251]) tensor([0.1310, 0.1554, 0.3264, 0.3872]) -Greedy action tensor([-1.8573, -0.3922, 0.6519, -0.1167]) tensor([0.0429, 0.1856, 0.5271, 0.2444]) -Greedy action tensor([-0.8275, -0.5209, 0.2023, 0.1953]) tensor([0.1259, 0.1711, 0.3527, 0.3503]) -Greedy action tensor([-1.8930, -0.4786, 0.6518, -0.1484]) tensor([0.0424, 0.1745, 0.5404, 0.2427]) -Greedy action tensor([-0.4594, -0.4106, 0.1841, 0.1396]) tensor([0.1732, 0.1819, 0.3296, 0.3153]) -Greedy action tensor([-1.8148, -0.3082, 0.5841, -0.1249]) tensor([0.0456, 0.2056, 0.5018, 0.2470]) -Greedy action tensor([-1.8555, -0.1971, 0.5813, -0.1373]) tensor([0.0430, 0.2257, 0.4916, 0.2396]) -Greedy action tensor([-1.9214, -0.4462, 0.6562, -0.1693]) tensor([0.0411, 0.1799, 0.5417, 0.2373]) -Greedy action tensor([-1.6717, -0.2425, 0.6948, 0.0872]) tensor([0.0462, 0.1929, 0.4926, 0.2683]) -Greedy action tensor([-1.8983, -0.4104, 0.6538, -0.1482]) tensor([0.0416, 0.1844, 0.5344, 0.2396]) -Greedy action tensor([-1.8982, -0.4448, 0.6478, -0.1532]) tensor([0.0421, 0.1800, 0.5369, 0.2410]) -Greedy action tensor([-1.8854, -0.3874, 0.6409, -0.1402]) tensor([0.0422, 0.1887, 0.5276, 0.2416]) -Greedy action tensor([-1.8920, -0.4725, 0.6799, -0.1347]) tensor([0.0416, 0.1721, 0.5449, 0.2413]) -Greedy action tensor([-1.1884, -0.5984, 0.3519, -0.0103]) tensor([0.0933, 0.1683, 0.4353, 0.3031]) -Greedy action tensor([-1.8115, -0.1502, 0.5664, -0.0841]) tensor([0.0441, 0.2323, 0.4755, 0.2481]) -Greedy action tensor([-1.1946, 0.4502, 0.2811, 0.1773]) tensor([0.0690, 0.3573, 0.3017, 0.2720]) -Greedy action tensor([-1.8839, -0.4388, 0.6274, -0.1391]) tensor([0.0429, 0.1822, 0.5291, 0.2458]) -Greedy action tensor([-1.8492, -0.4648, 0.6310, -0.0865]) tensor([0.0439, 0.1754, 0.5247, 0.2560]) -Greedy action tensor([-1.8264, -0.4674, 0.6228, -0.0603]) tensor([0.0448, 0.1744, 0.5188, 0.2620]) -Greedy action tensor([ 2.0933, -0.5876, -0.3858, 0.6942]) tensor([0.7147, 0.0490, 0.0599, 0.1764]) -Greedy action tensor([ 1.6370, -0.9061, -0.3837, 0.3758]) tensor([0.6691, 0.0526, 0.0887, 0.1896]) -Greedy action tensor([ 1.7334, -0.6516, -0.3607, 0.8048]) tensor([0.6210, 0.0572, 0.0765, 0.2454]) -Greedy action tensor([ 1.0508, -0.1015, -0.2842, 0.1086]) tensor([0.5079, 0.1605, 0.1337, 0.1980]) -Greedy action tensor([ 1.7708, -0.2497, -0.5282, 0.6215]) tensor([0.6452, 0.0856, 0.0648, 0.2045]) -Greedy action tensor([ 1.7180, -0.1452, -0.5005, 0.4364]) tensor([0.6487, 0.1007, 0.0706, 0.1801]) -Greedy action tensor([ 1.0741, -0.4937, -0.3316, 0.6064]) tensor([0.4807, 0.1002, 0.1179, 0.3011]) -Greedy action tensor([ 0.9600, -0.5067, -0.2218, 0.3056]) tensor([0.4861, 0.1121, 0.1491, 0.2526]) -Greedy action tensor([ 1.4026, -0.5977, -0.5034, 0.3468]) tensor([0.6128, 0.0829, 0.0911, 0.2132]) -Greedy action tensor([ 1.5834, -0.3724, -0.7713, 0.3780]) tensor([0.6511, 0.0921, 0.0618, 0.1950]) -Greedy action tensor([ 1.1200, -0.3323, -0.0132, 0.1402]) tensor([0.5177, 0.1212, 0.1667, 0.1944]) -Greedy action tensor([ 1.3193, -0.5028, -0.0500, 0.1738]) tensor([0.5767, 0.0932, 0.1466, 0.1834]) -Greedy action tensor([ 1.9319, -0.6516, -0.5415, 0.3661]) tensor([0.7306, 0.0552, 0.0616, 0.1526]) -Greedy action tensor([ 1.3289, -0.5553, -0.4837, 0.5499]) tensor([0.5637, 0.0856, 0.0920, 0.2587]) -Greedy action tensor([ 0.9529, -0.3842, -0.0690, 0.3474]) tensor([0.4612, 0.1211, 0.1660, 0.2517]) -Greedy action tensor([ 1.1162, -0.7822, -0.3890, 0.3852]) tensor([0.5396, 0.0808, 0.1198, 0.2598]) -Greedy action tensor([ 1.1455, 0.0991, -0.4255, -0.4188]) tensor([0.5655, 0.1986, 0.1175, 0.1183]) -Greedy action tensor([ 1.3987, -0.3362, -0.6478, 0.9144]) tensor([0.5204, 0.0918, 0.0672, 0.3206]) -Greedy action tensor([ 0.9872, -0.1747, -0.1275, 0.0864]) tensor([0.4885, 0.1528, 0.1602, 0.1984]) -Greedy action tensor([ 1.6439, -0.8303, -0.1408, 0.2549]) tensor([0.6660, 0.0561, 0.1118, 0.1661]) -Greedy action tensor([ 1.2588, -0.3507, -0.5328, 0.3847]) tensor([0.5605, 0.1121, 0.0934, 0.2339]) -Greedy action tensor([ 2.3277, -1.1456, -0.6498, 1.5922]) tensor([0.6405, 0.0199, 0.0326, 0.3070]) -Greedy action tensor([ 1.7032, 0.0420, -0.8441, 0.3415]) tensor([0.6560, 0.1246, 0.0514, 0.1681]) -Greedy action tensor([ 1.6437, -0.3195, -0.3428, 0.4865]) tensor([0.6282, 0.0882, 0.0862, 0.1975]) -Greedy action tensor([ 1.6411, -0.5024, -0.2923, 0.1993]) tensor([0.6674, 0.0782, 0.0965, 0.1578]) -Greedy action tensor([ 0.8356, -0.5085, -0.3097, 0.4512]) tensor([0.4425, 0.1154, 0.1408, 0.3013]) -Greedy action tensor([ 1.1549, -0.3477, -0.3288, 0.2524]) tensor([0.5391, 0.1200, 0.1223, 0.2186]) -Greedy action tensor([ 1.5179, -0.3884, -0.1134, 0.5159]) tensor([0.5843, 0.0868, 0.1143, 0.2145]) -Greedy action tensor([ 1.5819, -0.8644, -0.7266, 0.3516]) tensor([0.6765, 0.0586, 0.0673, 0.1977]) -Greedy action tensor([ 1.2712, -0.1569, -0.2287, 0.3842]) tensor([0.5334, 0.1279, 0.1190, 0.2197]) -Greedy action tensor([ 1.7486, -0.6418, -0.2357, 0.7314]) tensor([0.6287, 0.0576, 0.0864, 0.2273]) -Greedy action tensor([ 0.9220, -0.2445, -0.3143, 0.2009]) tensor([0.4789, 0.1492, 0.1391, 0.2328]) -Greedy action tensor([ 0.7129, -0.1905, 0.1497, 0.1450]) tensor([0.3935, 0.1595, 0.2241, 0.2230]) -Greedy action tensor([ 0.7159, -0.1404, -0.3092, 0.1131]) tensor([0.4290, 0.1822, 0.1539, 0.2348]) -Greedy action tensor([ 1.5245, -0.3283, -1.2827, 0.6875]) tensor([0.6060, 0.0950, 0.0366, 0.2624]) -Greedy action tensor([ 1.2349, -0.3916, -0.5080, 0.1034]) tensor([0.5903, 0.1160, 0.1033, 0.1904]) -Greedy action tensor([ 1.7180, -0.5305, -0.9189, 0.6870]) tensor([0.6520, 0.0688, 0.0467, 0.2325]) -Greedy action tensor([ 0.8073, -0.1651, -0.2514, 0.1708]) tensor([0.4436, 0.1678, 0.1539, 0.2347]) -Greedy action tensor([ 1.5034, -0.3426, -0.3925, 0.5132]) tensor([0.5954, 0.0940, 0.0894, 0.2212]) -Greedy action tensor([ 1.5894, -0.6623, -0.0894, 0.7989]) tensor([0.5729, 0.0603, 0.1069, 0.2599]) -Greedy action tensor([ 0.9297, -0.4531, -0.1386, 0.4512]) tensor([0.4516, 0.1133, 0.1552, 0.2799]) -Greedy action tensor([ 1.8777, -0.2196, -0.4652, 0.2552]) tensor([0.7061, 0.0867, 0.0678, 0.1394]) -Greedy action tensor([ 1.6470, -0.0513, -0.5235, 0.6199]) tensor([0.6042, 0.1106, 0.0689, 0.2163]) -Greedy action tensor([ 1.7399, -0.3939, -0.4283, 0.6222]) tensor([0.6411, 0.0759, 0.0733, 0.2097]) -Greedy action tensor([ 1.3871, -0.3068, -0.6009, 0.6550]) tensor([0.5551, 0.1020, 0.0760, 0.2669]) -Greedy action tensor([ 1.7817, -0.3120, -0.5179, 0.2708]) tensor([0.6924, 0.0853, 0.0694, 0.1528]) -Greedy action tensor([ 1.4034, -0.5837, -0.5481, 0.6998]) tensor([0.5637, 0.0773, 0.0801, 0.2789]) -Greedy action tensor([ 1.2574, -0.2106, -0.0684, -0.1655]) tensor([0.5757, 0.1326, 0.1529, 0.1388]) -Greedy action tensor([ 0.7013, -0.2526, -0.0273, -0.0984]) tensor([0.4315, 0.1662, 0.2083, 0.1940]) -Greedy action tensor([ 1.3128, -0.4351, -0.8572, 0.4755]) tensor([0.5810, 0.1012, 0.0663, 0.2515]) -Greedy action tensor([ 1.2779, -0.2616, 0.2364, -0.1120]) tensor([0.5505, 0.1181, 0.1943, 0.1371]) -Greedy action tensor([ 1.2701, -0.5164, -0.3290, 0.2635]) tensor([0.5763, 0.0966, 0.1165, 0.2106]) -Greedy action tensor([ 2.0298, -0.9834, -0.3749, 0.1046]) tensor([0.7780, 0.0382, 0.0703, 0.1135]) -Greedy action tensor([ 2.1348, 0.2327, -0.2726, 0.3505]) tensor([0.7106, 0.1061, 0.0640, 0.1193]) -Greedy action tensor([ 1.6395, -0.8384, 0.1419, 0.0507]) tensor([0.6615, 0.0555, 0.1480, 0.1351]) -Greedy action tensor([ 1.2156, -0.2354, -0.4419, 0.2310]) tensor([0.5560, 0.1303, 0.1060, 0.2077]) -Greedy action tensor([ 1.2090, -0.5684, -0.5120, 0.2516]) tensor([0.5774, 0.0976, 0.1033, 0.2217]) -Greedy action tensor([ 1.3623, -0.5376, -0.1120, -0.0215]) tensor([0.6138, 0.0918, 0.1405, 0.1538]) -Greedy action tensor([ 1.5946, -0.5205, -0.2735, 0.2015]) tensor([0.6565, 0.0792, 0.1014, 0.1630]) -Greedy action tensor([ 1.3648, -0.4202, -0.5289, 0.5058]) tensor([0.5741, 0.0963, 0.0864, 0.2432]) -Greedy action tensor([ 2.3934, -0.1368, -0.7897, 0.4633]) tensor([0.7897, 0.0629, 0.0327, 0.1146]) -Greedy action tensor([ 2.2217, -0.9753, -0.3362, 0.8725]) tensor([0.7258, 0.0297, 0.0562, 0.1883]) -Greedy action tensor([ 0.8608, -0.0939, -0.0377, -0.0921]) tensor([0.4592, 0.1768, 0.1870, 0.1771]) -Greedy action tensor([ 1.3384, -0.6110, -0.1785, 0.2689]) tensor([0.5865, 0.0835, 0.1287, 0.2013]) -Greedy action tensor([ 1.3898, -0.6932, -0.1100, 0.2930]) tensor([0.5946, 0.0741, 0.1327, 0.1986]) -Greedy action tensor([ 1.1095, -0.1185, -0.0996, -0.0450]) tensor([0.5245, 0.1536, 0.1565, 0.1653]) -Greedy action tensor([ 1.6035, 0.1393, -0.7603, 0.2636]) tensor([0.6300, 0.1457, 0.0593, 0.1650]) -Greedy action tensor([ 1.4303, -0.6516, -0.5534, 0.7420]) tensor([0.5667, 0.0707, 0.0780, 0.2847]) -Greedy action tensor([ 1.2515, -0.1265, -0.7318, -0.2943]) tensor([0.6239, 0.1573, 0.0859, 0.1330]) -Greedy action tensor([ 1.0663, -0.4183, -0.0355, 0.3945]) tensor([0.4832, 0.1095, 0.1605, 0.2468]) -Greedy action tensor([ 2.2593, -1.0150, -0.3254, 0.4389]) tensor([0.7842, 0.0297, 0.0591, 0.1270]) -Greedy action tensor([ 1.1610, -0.5308, -0.2604, 0.3270]) tensor([0.5377, 0.0990, 0.1298, 0.2335]) -Greedy action tensor([ 1.5433, -0.3027, -0.1736, -0.0242]) tensor([0.6468, 0.1021, 0.1162, 0.1349]) -Greedy action tensor([ 1.2518, -0.2771, -0.3767, 0.4421]) tensor([0.5382, 0.1167, 0.1056, 0.2395]) -Greedy action tensor([ 0.8838, -0.4890, -0.2519, 0.3439]) tensor([0.4635, 0.1175, 0.1489, 0.2701]) -Greedy action tensor([ 0.9697, -0.2827, -0.2470, 0.2266]) tensor([0.4860, 0.1389, 0.1440, 0.2311]) -Greedy action tensor([ 1.5168, -0.0106, -0.0178, 0.3163]) tensor([0.5768, 0.1252, 0.1243, 0.1736]) -Greedy action tensor([ 1.3031, -0.2984, -0.4842, 0.1010]) tensor([0.5990, 0.1207, 0.1003, 0.1800]) -Greedy action tensor([ 1.6954, -0.4009, -0.4394, 0.2786]) tensor([0.6740, 0.0828, 0.0797, 0.1634]) -Greedy action tensor([ 1.4758, 0.0772, 0.5346, -0.1019]) tensor([0.5424, 0.1339, 0.2116, 0.1120]) -Greedy action tensor([ 0.8844, -0.1215, 0.4329, -0.2667]) tensor([0.4313, 0.1577, 0.2746, 0.1364]) -Greedy action tensor([ 0.7225, -0.7527, 0.0634, -0.6543]) tensor([0.5004, 0.1145, 0.2589, 0.1263]) -Greedy action tensor([ 1.2753, -0.2774, 0.2248, -0.3371]) tensor([0.5679, 0.1202, 0.1986, 0.1132]) -Greedy action tensor([ 0.6916, -0.5441, 0.0305, -0.2921]) tensor([0.4585, 0.1333, 0.2367, 0.1715]) -Greedy action tensor([ 0.8546, -0.7359, -0.0219, -0.4112]) tensor([0.5257, 0.1072, 0.2188, 0.1483]) -Greedy action tensor([ 0.6559, -0.3306, -0.1011, -0.2309]) tensor([0.4437, 0.1654, 0.2081, 0.1828]) -Greedy action tensor([ 0.7122, -0.4518, 0.0629, -0.3098]) tensor([0.4557, 0.1423, 0.2380, 0.1640]) -Greedy action tensor([ 0.5789, -0.0565, -0.1015, -0.0644]) tensor([0.3904, 0.2068, 0.1977, 0.2051]) -Greedy action tensor([ 0.4022, 0.0735, -0.0811, -0.1005]) tensor([0.3400, 0.2447, 0.2097, 0.2056]) -Greedy action tensor([ 0.4188, -0.2959, 0.1187, -0.2967]) tensor([0.3678, 0.1800, 0.2724, 0.1798]) -Greedy action tensor([ 0.2155, 0.0016, 0.0190, -0.1092]) tensor([0.2984, 0.2409, 0.2451, 0.2156]) -Greedy action tensor([ 0.3366, -0.1052, -0.0721, -0.3519]) tensor([0.3559, 0.2288, 0.2365, 0.1788]) -Greedy action tensor([ 0.9080, -0.6560, 0.0622, -0.4317]) tensor([0.5262, 0.1101, 0.2258, 0.1378]) -Greedy action tensor([ 0.8789, -0.5937, 0.0961, -0.6738]) tensor([0.5268, 0.1208, 0.2408, 0.1115]) -Greedy action tensor([ 0.4301, 0.0788, -0.1971, -0.1561]) tensor([0.3579, 0.2519, 0.1911, 0.1991]) -Greedy action tensor([ 0.8826, -0.3989, -0.0757, -0.2300]) tensor([0.5026, 0.1395, 0.1927, 0.1652]) -Greedy action tensor([ 0.8991, -0.8184, -0.0199, -0.3410]) tensor([0.5354, 0.0961, 0.2136, 0.1549]) -Greedy action tensor([ 0.7663, -0.6576, -0.0015, -0.3950]) tensor([0.4956, 0.1193, 0.2300, 0.1552]) -Greedy action tensor([ 1.1916, -1.1856, -0.0807, -0.4470]) tensor([0.6381, 0.0592, 0.1788, 0.1240]) -Greedy action tensor([ 1.2554, -0.9911, -0.0477, -0.5736]) tensor([0.6502, 0.0688, 0.1766, 0.1044]) -Greedy action tensor([ 0.7383, 0.4132, -0.0982, -0.1257]) tensor([0.3880, 0.2803, 0.1681, 0.1635]) -Greedy action tensor([ 0.5523, -0.2505, -0.1620, -0.2356]) tensor([0.4180, 0.1873, 0.2046, 0.1901]) -Greedy action tensor([ 0.2981, -0.0280, -0.1511, -0.2512]) tensor([0.3405, 0.2457, 0.2173, 0.1966]) -Greedy action tensor([ 0.8105, -0.0725, -0.0231, -0.3754]) tensor([0.4644, 0.1920, 0.2018, 0.1418]) -Greedy action tensor([ 1.0651, -0.8076, -0.0565, -0.6197]) tensor([0.6006, 0.0923, 0.1956, 0.1114]) -Greedy action tensor([ 0.4282, -0.2587, -0.0793, -0.0675]) tensor([0.3684, 0.1854, 0.2218, 0.2244]) -Greedy action tensor([ 0.5495, -0.4864, -0.1234, -0.0556]) tensor([0.4147, 0.1472, 0.2116, 0.2265]) -Greedy action tensor([ 0.3274, -0.0525, -0.1710, -0.3330]) tensor([0.3561, 0.2435, 0.2163, 0.1840]) -Greedy action tensor([ 0.7166, -0.3114, 0.0715, -0.3318]) tensor([0.4479, 0.1602, 0.2349, 0.1570]) -Greedy action tensor([ 0.7755, -0.3936, -0.0254, -0.2447]) tensor([0.4717, 0.1465, 0.2118, 0.1701]) -Greedy action tensor([ 0.5742, -0.3473, -0.1260, -0.2602]) tensor([0.4295, 0.1709, 0.2132, 0.1864]) -Greedy action tensor([ 0.3399, 0.2211, -0.0997, -0.2147]) tensor([0.3219, 0.2858, 0.2074, 0.1849]) -Greedy action tensor([ 1.6974, -0.8035, 0.3141, -0.5574]) tensor([0.6956, 0.0570, 0.1744, 0.0730]) -Greedy action tensor([ 0.6389, -0.1422, -0.0517, -0.0672]) tensor([0.4077, 0.1867, 0.2044, 0.2012]) -Greedy action tensor([ 1.2370, -0.9917, 0.0636, -0.8906]) tensor([0.6510, 0.0701, 0.2014, 0.0776]) -Greedy action tensor([ 0.3277, -0.0403, -0.0759, -0.1342]) tensor([0.3344, 0.2315, 0.2234, 0.2107]) -Greedy action tensor([ 0.4743, -0.1611, -0.0208, -0.1895]) tensor([0.3768, 0.1996, 0.2296, 0.1940]) -Greedy action tensor([ 0.7743, -0.6018, -0.1024, -0.2416]) tensor([0.4924, 0.1244, 0.2049, 0.1783]) -Greedy action tensor([ 0.5695, -0.1807, -0.0172, -0.3087]) tensor([0.4092, 0.1932, 0.2276, 0.1700]) -Greedy action tensor([ 0.8983, -0.9192, 0.0026, -0.3727]) tensor([0.5402, 0.0877, 0.2206, 0.1515]) -Greedy action tensor([ 0.6432, -0.1369, -0.0372, -0.1369]) tensor([0.4127, 0.1892, 0.2090, 0.1892]) -Greedy action tensor([ 0.4911, -0.0608, -0.1498, -0.0468]) tensor([0.3722, 0.2143, 0.1961, 0.2174]) -Greedy action tensor([ 0.5576, -0.0225, 0.0351, -0.0883]) tensor([0.3735, 0.2091, 0.2215, 0.1958]) -Greedy action tensor([ 0.7876, -0.2081, -0.0791, -0.0857]) tensor([0.4530, 0.1674, 0.1904, 0.1892]) -Greedy action tensor([ 0.4854, 0.2779, -0.2663, 0.0361]) tensor([0.3422, 0.2781, 0.1614, 0.2184]) -Greedy action tensor([ 0.6578, -0.4012, -0.0774, -0.2505]) tensor([0.4485, 0.1556, 0.2150, 0.1809]) -Greedy action tensor([ 0.9822, -0.3105, 0.0219, -0.4105]) tensor([0.5247, 0.1441, 0.2009, 0.1303]) -Greedy action tensor([ 0.5770, 0.0245, -0.1854, 0.0547]) tensor([0.3795, 0.2184, 0.1770, 0.2251]) -Greedy action tensor([ 0.5724, -0.2350, 0.0394, -0.3669]) tensor([0.4126, 0.1840, 0.2421, 0.1613]) -Greedy action tensor([ 0.6647, -0.4472, 0.0358, -0.5399]) tensor([0.4625, 0.1522, 0.2466, 0.1387]) -Greedy action tensor([ 0.8941, -0.8444, 0.0077, -0.4607]) tensor([0.5417, 0.0952, 0.2233, 0.1398]) -Greedy action tensor([ 1.1645, -0.7412, -0.0892, -0.5758]) tensor([0.6213, 0.0924, 0.1773, 0.1090]) -Greedy action tensor([ 0.4516, -0.1693, -0.0082, -0.2954]) tensor([0.3784, 0.2034, 0.2389, 0.1793]) -Greedy action tensor([ 0.5789, -0.5006, -0.1264, -0.2472]) tensor([0.4403, 0.1496, 0.2175, 0.1927]) -Greedy action tensor([ 0.9023, -0.5333, -0.0510, -0.5396]) tensor([0.5376, 0.1280, 0.2073, 0.1271]) -Greedy action tensor([ 0.7668, -0.3410, -0.0816, -0.4528]) tensor([0.4869, 0.1608, 0.2084, 0.1438]) -Greedy action tensor([ 0.7272, -0.2142, 0.0668, -0.1948]) tensor([0.4339, 0.1693, 0.2242, 0.1726]) -Greedy action tensor([ 1.0610, -0.5520, 0.1326, -0.6128]) tensor([0.5612, 0.1118, 0.2218, 0.1052]) -Greedy action tensor([ 0.5625, -0.3825, 0.2601, -0.4613]) tensor([0.4021, 0.1563, 0.2972, 0.1444]) -Greedy action tensor([ 0.4805, -0.1372, -0.0572, -0.0370]) tensor([0.3677, 0.1983, 0.2148, 0.2192]) -Greedy action tensor([ 0.4132, -0.0935, 0.1843, -0.2891]) tensor([0.3456, 0.2082, 0.2749, 0.1712]) -Greedy action tensor([ 0.4763, -0.0156, 0.0978, -0.3245]) tensor([0.3643, 0.2227, 0.2495, 0.1635]) -Greedy action tensor([ 0.6666, -0.6526, -0.0576, -0.6167]) tensor([0.4928, 0.1318, 0.2389, 0.1366]) -Greedy action tensor([ 0.2107, 0.1966, -0.1902, -0.1206]) tensor([0.2964, 0.2923, 0.1985, 0.2128]) -Greedy action tensor([ 0.4462, -0.1576, 0.0895, -0.2993]) tensor([0.3675, 0.2009, 0.2572, 0.1744]) -Greedy action tensor([ 0.8267, -0.5697, 0.0477, -0.3634]) tensor([0.4974, 0.1231, 0.2282, 0.1513]) -Greedy action tensor([ 0.7070, -0.3797, -0.0413, -0.2104]) tensor([0.4525, 0.1526, 0.2141, 0.1808]) -Greedy action tensor([ 0.7909, -0.3459, -0.1234, -0.3351]) tensor([0.4888, 0.1568, 0.1959, 0.1585]) -Greedy action tensor([ 0.7613, -0.9171, -0.0462, -0.3553]) tensor([0.5102, 0.0952, 0.2275, 0.1670]) -Greedy action tensor([ 0.4115, -0.3664, 0.1069, -0.3656]) tensor([0.3764, 0.1729, 0.2776, 0.1731]) -Greedy action tensor([ 0.6274, -0.3614, -0.0905, -0.2777]) tensor([0.4416, 0.1643, 0.2154, 0.1786]) -Greedy action tensor([ 0.3908, 0.1018, -0.0513, -0.1749]) tensor([0.3379, 0.2531, 0.2171, 0.1919]) -Greedy action tensor([ 0.5498, -0.4934, -0.2229, 0.0626]) tensor([0.4118, 0.1451, 0.1902, 0.2530]) -Greedy action tensor([ 0.4622, 0.1779, -0.1295, 0.1190]) tensor([0.3316, 0.2496, 0.1835, 0.2353]) -Greedy action tensor([ 0.7887, -0.3965, -0.0258, -0.1132]) tensor([0.4642, 0.1419, 0.2056, 0.1884]) -Greedy action tensor([ 0.5760, -0.5465, -0.0998, -0.2205]) tensor([0.4376, 0.1424, 0.2226, 0.1973]) -Greedy action tensor([ 0.6912, -0.2921, -0.0344, -0.2495]) tensor([0.4447, 0.1664, 0.2153, 0.1736]) -Greedy action tensor([ 0.3092, -0.0159, -0.1287, 0.0242]) tensor([0.3205, 0.2316, 0.2069, 0.2410]) -Greedy action tensor([ 0.8764, -0.6189, -0.1886, -0.3202]) tensor([0.5344, 0.1198, 0.1842, 0.1615]) -Greedy action tensor([ 0.7725, -0.3928, -0.1654, -0.2156]) tensor([0.4818, 0.1502, 0.1886, 0.1794]) -Greedy action tensor([ 0.9634, -0.6405, -0.0379, -0.2522]) tensor([0.5362, 0.1078, 0.1970, 0.1590]) -Greedy action tensor([ 0.5087, -0.1520, 0.0013, -0.1617]) tensor([0.3802, 0.1964, 0.2289, 0.1945]) -Greedy action tensor([-1.1286, -1.4184, -1.0271, -0.1622]) tensor([0.1824, 0.1365, 0.2018, 0.4793]) -Greedy action tensor([ 0.6917, -1.4799, 1.0552, 0.6940]) tensor([0.2813, 0.0321, 0.4046, 0.2820]) -Greedy action tensor([ 0.2211, -1.3827, -0.4276, 0.3433]) tensor([0.3504, 0.0705, 0.1832, 0.3959]) -Greedy action tensor([-0.3292, -0.0554, 0.1496, 0.2252]) tensor([0.1764, 0.2319, 0.2847, 0.3070]) -Greedy action tensor([-0.0789, -2.4125, 0.5544, 0.2020]) tensor([0.2323, 0.0225, 0.4376, 0.3076]) -Greedy action tensor([-0.3647, -1.7025, 1.2387, 0.1196]) tensor([0.1273, 0.0334, 0.6327, 0.2066]) -Greedy action tensor([ 0.7194, -0.3192, 0.0630, 0.5735]) tensor([0.3654, 0.1293, 0.1895, 0.3158]) -Greedy action tensor([-1.1120, 0.2429, 0.0116, -0.7974]) tensor([0.1073, 0.4158, 0.3300, 0.1469]) -Greedy action tensor([ 0.5246, -1.0249, -0.3622, 0.9948]) tensor([0.3101, 0.0659, 0.1278, 0.4963]) -Greedy action tensor([ 0.6249, -0.1484, -0.6722, 1.1040]) tensor([0.2986, 0.1378, 0.0816, 0.4820]) -Greedy action tensor([ 1.1405, -0.2373, -0.3113, 0.4084]) tensor([0.5083, 0.1282, 0.1190, 0.2445]) -Greedy action tensor([ 0.8251, -1.6572, 0.7176, 0.1906]) tensor([0.3981, 0.0333, 0.3575, 0.2111]) -Greedy action tensor([ 1.2327, -1.4677, 0.7062, 1.0498]) tensor([0.4015, 0.0270, 0.2371, 0.3344]) -Greedy action tensor([ 2.0739, -1.4984, 0.7911, 0.7300]) tensor([0.6385, 0.0179, 0.1770, 0.1665]) -Greedy action tensor([-1.0783, -0.3699, 0.3146, -1.1334]) tensor([0.1249, 0.2537, 0.5031, 0.1182]) -Greedy action tensor([ 1.2820, -0.5729, 1.2287, 0.1603]) tensor([0.4115, 0.0644, 0.3901, 0.1340]) -Greedy action tensor([-0.8199, -0.6152, -0.1036, -1.1496]) tensor([0.2003, 0.2458, 0.4099, 0.1440]) -Greedy action tensor([-0.3038, 0.0345, 1.0761, -0.8045]) tensor([0.1432, 0.2008, 0.5692, 0.0868]) -Greedy action tensor([-1.0684, -1.4071, 0.7835, 1.0901]) tensor([0.0597, 0.0426, 0.3806, 0.5171]) -Greedy action tensor([-0.2575, -0.9123, 1.4817, -0.6845]) tensor([0.1271, 0.0661, 0.7238, 0.0830]) -Greedy action tensor([ 1.2286, -1.3849, 0.8427, 1.8377]) tensor([0.2784, 0.0204, 0.1893, 0.5119]) -Greedy action tensor([-0.9128, 0.1969, 1.2209, -0.5576]) tensor([0.0719, 0.2182, 0.6074, 0.1026]) -Greedy action tensor([-0.2668, -0.5815, 0.0202, -0.1924]) tensor([0.2416, 0.1763, 0.3219, 0.2602]) -Greedy action tensor([ 0.0582, -1.1291, 1.4027, 0.7968]) tensor([0.1382, 0.0422, 0.5303, 0.2893]) -Greedy action tensor([0.4657, 0.5065, 0.7053, 0.2121]) tensor([0.2446, 0.2548, 0.3108, 0.1898]) -Greedy action tensor([ 0.5219, 0.8357, -0.6827, -0.5282]) tensor([0.3313, 0.4535, 0.0993, 0.1159]) -Greedy action tensor([-0.3479, -1.1183, -0.4809, -0.1560]) tensor([0.2817, 0.1304, 0.2466, 0.3413]) -Greedy action tensor([-0.2651, -0.9634, -0.9667, 0.1984]) tensor([0.2791, 0.1388, 0.1384, 0.4437]) -Greedy action tensor([-0.7497, -0.9754, 2.0160, 0.2450]) tensor([0.0490, 0.0391, 0.7792, 0.1326]) -Greedy action tensor([-1.0507, 0.6098, 0.3610, -1.1624]) tensor([0.0888, 0.4673, 0.3644, 0.0794]) -Greedy action tensor([-0.1424, -0.7677, 0.2384, -0.5242]) tensor([0.2717, 0.1454, 0.3975, 0.1854]) -Greedy action tensor([0.7829, 0.9481, 0.6541, 1.0347]) tensor([0.2301, 0.2715, 0.2023, 0.2961]) -Greedy action tensor([0.6473, 0.7307, 1.0044, 0.2815]) tensor([0.2375, 0.2582, 0.3395, 0.1648]) -Greedy action tensor([ 0.5622, -2.2020, 0.1436, 0.5277]) tensor([0.3721, 0.0235, 0.2449, 0.3595]) -Greedy action tensor([-0.2637, -2.4421, 0.2977, 0.9731]) tensor([0.1585, 0.0179, 0.2778, 0.5458]) -Greedy action tensor([-0.2004, -2.3527, 0.5383, 0.2770]) tensor([0.2074, 0.0241, 0.4341, 0.3343]) -Greedy action tensor([1.0970, 0.6332, 0.4224, 0.3793]) tensor([0.3808, 0.2395, 0.1940, 0.1858]) -Greedy action tensor([ 1.1522, -0.4324, -0.0894, 0.0490]) tensor([0.5477, 0.1123, 0.1582, 0.1817]) -Greedy action tensor([-1.1936, -0.8098, 1.2325, -1.2981]) tensor([0.0681, 0.1000, 0.7706, 0.0613]) -Greedy action tensor([ 1.3577, 1.0207, 0.6784, -0.5990]) tensor([0.4233, 0.3022, 0.2146, 0.0598]) -Greedy action tensor([ 0.0230, -0.3754, 0.0975, -0.7791]) tensor([0.3128, 0.2100, 0.3370, 0.1402]) -Greedy action tensor([-0.1775, -0.5197, -1.1308, 1.6117]) tensor([0.1238, 0.0879, 0.0477, 0.7407]) -Greedy action tensor([ 1.4073, 0.1644, -0.0848, 1.6999]) tensor([0.3505, 0.1011, 0.0788, 0.4696]) -Greedy action tensor([-1.0502, 0.0072, 0.1392, -1.1671]) tensor([0.1242, 0.3575, 0.4079, 0.1105]) -Greedy action tensor([ 6.6550e-04, -1.0580e+00, -2.1691e-01, -1.7502e-02]) tensor([0.3191, 0.1107, 0.2567, 0.3134]) -Greedy action tensor([ 1.7931, -0.1310, -0.3689, 0.5970]) tensor([0.6396, 0.0934, 0.0736, 0.1934]) -Greedy action tensor([ 1.4914, 0.2433, 0.0798, -0.6944]) tensor([0.6086, 0.1747, 0.1483, 0.0684]) -Greedy action tensor([ 0.2011, -1.7085, 1.2881, -1.2396]) tensor([0.2299, 0.0341, 0.6817, 0.0544]) -Greedy action tensor([ 1.1497, 0.4163, 0.7653, -0.0823]) tensor([0.4077, 0.1958, 0.2776, 0.1189]) -Greedy action tensor([ 0.5314, 0.8610, -0.4484, 0.3116]) tensor([0.2802, 0.3896, 0.1052, 0.2249]) -Greedy action tensor([-0.8911, -0.4580, 0.0866, 0.4451]) tensor([0.1111, 0.1712, 0.2952, 0.4225]) -Greedy action tensor([ 0.3376, -1.4250, -0.2203, 1.0579]) tensor([0.2632, 0.0452, 0.1507, 0.5409]) -Greedy action tensor([ 0.5738, -1.0287, 1.1139, 0.7574]) tensor([0.2428, 0.0489, 0.4166, 0.2917]) -Greedy action tensor([ 0.5962, 0.0133, 0.4656, -0.7707]) tensor([0.3716, 0.2075, 0.3261, 0.0947]) -Greedy action tensor([-1.2825, 0.0344, 0.7152, -0.7587]) tensor([0.0725, 0.2706, 0.5345, 0.1224]) -Greedy action tensor([-0.7987, 1.2501, 1.8205, -1.3664]) tensor([0.0434, 0.3366, 0.5954, 0.0246]) -Greedy action tensor([ 0.1570, -0.1042, -0.0266, 0.3012]) tensor([0.2661, 0.2050, 0.2215, 0.3074]) -Greedy action tensor([ 0.1592, -0.3909, 0.5316, 0.2009]) tensor([0.2457, 0.1417, 0.3565, 0.2561]) -Greedy action tensor([ 1.4368, -1.6119, 0.7806, -0.2199]) tensor([0.5692, 0.0270, 0.2953, 0.1086]) -Greedy action tensor([-0.0414, -0.1664, -0.3119, 0.1589]) tensor([0.2586, 0.2282, 0.1973, 0.3159]) -Greedy action tensor([-1.3264, -0.3408, -0.3551, -0.4992]) tensor([0.1162, 0.3113, 0.3069, 0.2657]) -Greedy action tensor([ 0.2881, 0.1109, 1.8369, -0.4444]) tensor([0.1424, 0.1192, 0.6700, 0.0684]) -Greedy action tensor([ 0.6976, -1.1532, -0.5909, -0.3784]) tensor([0.5638, 0.0886, 0.1554, 0.1922]) -Greedy action tensor([-0.0594, -0.9771, -0.0470, 2.1236]) tensor([0.0886, 0.0354, 0.0897, 0.7863]) -Greedy action tensor([0.6304, 0.5171, 2.0083, 0.5745]) tensor([0.1469, 0.1312, 0.5829, 0.1390]) -Greedy action tensor([-1.2594, -0.2491, -0.1574, -0.6684]) tensor([0.1168, 0.3207, 0.3516, 0.2109]) -Greedy action tensor([-0.6669, 0.0358, -0.5548, -0.2888]) tensor([0.1787, 0.3608, 0.1998, 0.2608]) -Greedy action tensor([-0.0362, 0.1794, -0.7854, 0.4610]) tensor([0.2295, 0.2847, 0.1085, 0.3773]) -Greedy action tensor([-0.2215, 0.3115, -0.2630, 0.5821]) tensor([0.1696, 0.2890, 0.1627, 0.3787]) -Greedy action tensor([ 0.3791, -1.2246, 0.0569, -0.2377]) tensor([0.4056, 0.0816, 0.2939, 0.2189]) -Greedy action tensor([ 0.1693, -2.1465, 0.2019, 0.9095]) tensor([0.2365, 0.0233, 0.2443, 0.4958]) -Greedy action tensor([ 0.2760, -0.8944, -0.0057, 0.2427]) tensor([0.3298, 0.1023, 0.2488, 0.3190]) -Greedy action tensor([-0.1591, 0.0824, -0.8397, 0.6599]) tensor([0.1981, 0.2522, 0.1003, 0.4494]) -Greedy action tensor([ 0.6864, -0.6808, 1.4950, -0.5184]) tensor([0.2632, 0.0671, 0.5908, 0.0789]) -Greedy action tensor([-1.0282, -1.4888, 0.2025, 0.1941]) tensor([0.1183, 0.0747, 0.4052, 0.4018]) -Greedy action tensor([0.5488, 0.1230, 0.3895, 0.3852]) tensor([0.2981, 0.1947, 0.2542, 0.2531]) -Greedy action tensor([ 0.4978, -0.5896, 1.1817, 0.5066]) tensor([0.2311, 0.0779, 0.4579, 0.2331]) -Greedy action tensor([ 1.5985, -0.4866, 0.8585, 0.4388]) tensor([0.5222, 0.0649, 0.2492, 0.1638]) -Greedy action tensor([ 0.8502, -1.4666, 2.1990, -0.8603]) tensor([0.1948, 0.0192, 0.7507, 0.0352]) -Greedy action tensor([ 1.3944, -0.0706, 1.3338, -0.1129]) tensor([0.4177, 0.0965, 0.3932, 0.0925]) -Greedy action tensor([ 0.5600, -0.5779, 1.8903, 0.8083]) tensor([0.1566, 0.0502, 0.5924, 0.2008]) -Greedy action tensor([-0.1066, -1.1436, -0.1331, 0.5757]) tensor([0.2322, 0.0823, 0.2261, 0.4594]) -Greedy action tensor([-1.2870, -0.3866, 0.4478, -0.2161]) tensor([0.0830, 0.2043, 0.4705, 0.2422]) -Greedy action tensor([-1.6619, -0.5010, 0.6383, 0.1449]) tensor([0.0494, 0.1576, 0.4924, 0.3006]) -Greedy action tensor([-1.9090, -0.3986, 0.6462, -0.1593]) tensor([0.0414, 0.1875, 0.5329, 0.2382]) -Greedy action tensor([-1.5616, -0.3613, 0.6289, 0.0918]) tensor([0.0541, 0.1797, 0.4836, 0.2826]) -Greedy action tensor([-1.8807, -0.4031, 0.6356, -0.1343]) tensor([0.0426, 0.1865, 0.5270, 0.2440]) -Greedy action tensor([-1.7943, -0.4080, 0.6060, -0.0541]) tensor([0.0460, 0.1841, 0.5076, 0.2623]) -Greedy action tensor([-1.5665, -0.3892, 0.5773, -0.2487]) tensor([0.0606, 0.1965, 0.5167, 0.2262]) -Greedy action tensor([-1.6053, -0.5128, 0.5026, -0.0465]) tensor([0.0589, 0.1758, 0.4851, 0.2802]) -Greedy action tensor([-1.4399, -0.4578, 0.8528, 0.6331]) tensor([0.0465, 0.1241, 0.4601, 0.3693]) -Greedy action tensor([-1.3690, -0.5561, 0.4032, 0.3632]) tensor([0.0676, 0.1524, 0.3978, 0.3822]) -Greedy action tensor([-1.9142, -0.4320, 0.6511, -0.1661]) tensor([0.0414, 0.1823, 0.5385, 0.2378]) -Greedy action tensor([-1.1775, -0.6324, 0.2541, 0.2798]) tensor([0.0892, 0.1539, 0.3736, 0.3833]) -Greedy action tensor([-1.4235, 0.9512, 0.4595, -0.5577]) tensor([0.0483, 0.5193, 0.3176, 0.1148]) -Greedy action tensor([-1.8552, -0.3091, 0.6025, -0.1251]) tensor([0.0435, 0.2039, 0.5075, 0.2451]) -Greedy action tensor([-1.7755, -0.4274, 0.6509, -0.0708]) tensor([0.0461, 0.1777, 0.5223, 0.2538]) -Greedy action tensor([-1.6551, -0.4741, 0.5247, -0.0169]) tensor([0.0548, 0.1785, 0.4847, 0.2820]) -Greedy action tensor([-1.5931, -0.7308, 0.8614, 0.0497]) tensor([0.0496, 0.1174, 0.5769, 0.2562]) -Greedy action tensor([-0.7452, 0.1389, 0.2247, -0.1717]) tensor([0.1277, 0.3091, 0.3367, 0.2265]) -Greedy action tensor([-1.8653, -0.4451, 0.6492, -0.1232]) tensor([0.0431, 0.1783, 0.5326, 0.2460]) -Greedy action tensor([-1.8295, -0.3267, 0.6201, -0.1074]) tensor([0.0441, 0.1982, 0.5109, 0.2468]) -Greedy action tensor([-1.9230, -0.4478, 0.6533, -0.1684]) tensor([0.0411, 0.1799, 0.5411, 0.2379]) -Greedy action tensor([-1.9246, -0.3688, 0.6418, -0.1687]) tensor([0.0407, 0.1931, 0.5304, 0.2358]) -Greedy action tensor([-1.8015, -0.4692, 0.6646, 0.0346]) tensor([0.0438, 0.1659, 0.5157, 0.2746]) -Greedy action tensor([-1.8343, -0.3551, 0.5950, -0.1226]) tensor([0.0449, 0.1970, 0.5095, 0.2486]) -Greedy action tensor([-1.9219, -0.4500, 0.6548, -0.1682]) tensor([0.0412, 0.1794, 0.5416, 0.2378]) -Greedy action tensor([-1.8846, -0.4253, 0.6703, -0.1344]) tensor([0.0418, 0.1798, 0.5379, 0.2405]) -Greedy action tensor([-1.8854, -0.4283, 0.6798, -0.1052]) tensor([0.0413, 0.1772, 0.5367, 0.2448]) -Greedy action tensor([-1.6733, -0.5981, 0.8490, 0.1984]) tensor([0.0437, 0.1280, 0.5443, 0.2840]) -Greedy action tensor([0.7810, 1.1750, 0.1799, 1.0355]) tensor([0.2314, 0.3432, 0.1269, 0.2985]) -Greedy action tensor([-1.6758, -0.3535, 0.5661, 0.0070]) tensor([0.0512, 0.1920, 0.4815, 0.2753]) -Greedy action tensor([-1.7172, -0.5508, 0.5884, -0.0026]) tensor([0.0505, 0.1622, 0.5067, 0.2806]) -Greedy action tensor([0.7953, 0.8485, 0.4727, 1.3269]) tensor([0.2232, 0.2354, 0.1617, 0.3798]) -Greedy action tensor([-1.6921, -0.4752, 0.5486, -0.0604]) tensor([0.0529, 0.1788, 0.4976, 0.2707]) -Greedy action tensor([-1.8686, -0.4302, 0.6309, -0.1381]) tensor([0.0434, 0.1829, 0.5286, 0.2450]) -Greedy action tensor([-1.7828, -0.4353, 0.5919, -0.0669]) tensor([0.0473, 0.1819, 0.5080, 0.2629]) -Greedy action tensor([-1.9145, -0.4348, 0.6487, -0.1691]) tensor([0.0415, 0.1822, 0.5385, 0.2377]) -Greedy action tensor([-0.8711, 0.0891, 0.3026, -0.5036]) tensor([0.1206, 0.3151, 0.3901, 0.1742]) -Greedy action tensor([-1.8050, -0.4362, 0.6588, -0.0534]) tensor([0.0446, 0.1751, 0.5235, 0.2568]) -Greedy action tensor([-1.5450, -0.1483, 0.3629, 0.0912]) tensor([0.0591, 0.2389, 0.3984, 0.3036]) -Greedy action tensor([-1.8580, -0.4614, 0.6271, -0.1409]) tensor([0.0442, 0.1787, 0.5308, 0.2462]) -Greedy action tensor([-1.7096, -0.6012, 0.8092, 0.0155]) tensor([0.0453, 0.1374, 0.5628, 0.2545]) -Greedy action tensor([-1.3533, -0.4728, 0.5465, 0.3722]) tensor([0.0636, 0.1535, 0.4254, 0.3574]) -Greedy action tensor([-1.7788, -0.4749, 0.6357, 0.0506]) tensor([0.0453, 0.1667, 0.5061, 0.2819]) -Greedy action tensor([-1.4234, -0.3709, 0.4458, -0.1416]) tensor([0.0717, 0.2053, 0.4647, 0.2583]) -Greedy action tensor([-1.9009, -0.4644, 0.6493, -0.1539]) tensor([0.0421, 0.1771, 0.5393, 0.2415]) -Greedy action tensor([-1.7711, -0.4528, 0.5548, -0.0872]) tensor([0.0491, 0.1836, 0.5028, 0.2646]) -Greedy action tensor([-1.8576, -0.4521, 0.6293, -0.1364]) tensor([0.0441, 0.1797, 0.5299, 0.2464]) -Greedy action tensor([-1.8557, -0.4755, 0.6214, -0.1440]) tensor([0.0446, 0.1773, 0.5311, 0.2470]) -Greedy action tensor([-1.8348, -0.4611, 0.9190, 0.2620]) tensor([0.0347, 0.1372, 0.5454, 0.2827]) -Greedy action tensor([-1.8966, -0.4403, 0.6406, -0.1585]) tensor([0.0423, 0.1816, 0.5353, 0.2407]) -Greedy action tensor([-1.4939, -0.0851, 0.5839, 0.0507]) tensor([0.0563, 0.2303, 0.4496, 0.2638]) -Greedy action tensor([-1.9385, -0.4443, 0.6638, -0.1760]) tensor([0.0404, 0.1798, 0.5446, 0.2352]) -Greedy action tensor([-1.9165, -0.4596, 0.6834, -0.1546]) tensor([0.0407, 0.1746, 0.5477, 0.2369]) -Greedy action tensor([-1.6117, -0.4711, 0.5817, 0.2169]) tensor([0.0518, 0.1619, 0.4641, 0.3222]) -Greedy action tensor([-1.8445, -0.3945, 0.6125, -0.1303]) tensor([0.0445, 0.1896, 0.5190, 0.2469]) -Greedy action tensor([-1.9334, -0.4507, 0.6627, -0.1732]) tensor([0.0406, 0.1789, 0.5445, 0.2360]) -Greedy action tensor([-1.2219, -0.6156, 0.3232, 0.1419]) tensor([0.0875, 0.1604, 0.4101, 0.3421]) -Greedy action tensor([-1.6252, -0.2896, 0.8120, 0.3411]) tensor([0.0428, 0.1626, 0.4892, 0.3055]) -Greedy action tensor([-1.9105, -0.4295, 0.6478, -0.1653]) tensor([0.0416, 0.1829, 0.5372, 0.2382]) -Greedy action tensor([-1.7078, -0.1292, 0.5004, -0.0778]) tensor([0.0499, 0.2418, 0.4538, 0.2545]) -Greedy action tensor([-1.8094, -0.0858, 0.5271, -0.0834]) tensor([0.0443, 0.2483, 0.4584, 0.2490]) -Greedy action tensor([-1.7647, -0.4866, 0.7439, 0.1636]) tensor([0.0421, 0.1511, 0.5172, 0.2895]) -Greedy action tensor([-1.7348, -0.4859, 0.5824, -0.0484]) tensor([0.0499, 0.1740, 0.5065, 0.2695]) -Greedy action tensor([-1.9057, -0.4652, 0.6449, -0.1636]) tensor([0.0421, 0.1778, 0.5396, 0.2404]) -Greedy action tensor([-1.8329, -0.3080, 0.5923, -0.1367]) tensor([0.0447, 0.2056, 0.5057, 0.2440]) -Greedy action tensor([-1.9339, -0.4529, 0.6651, -0.1741]) tensor([0.0406, 0.1783, 0.5454, 0.2357]) -Greedy action tensor([-1.8844, -0.4362, 0.6316, -0.1528]) tensor([0.0429, 0.1828, 0.5316, 0.2426]) -Greedy action tensor([-1.4640, -0.5027, 0.4156, 0.1234]) tensor([0.0664, 0.1737, 0.4351, 0.3248]) -Greedy action tensor([-1.7210, -0.4726, 0.6142, 0.0379]) tensor([0.0485, 0.1690, 0.5010, 0.2815]) -Greedy action tensor([-1.8875, -0.4419, 0.6407, -0.1504]) tensor([0.0426, 0.1810, 0.5342, 0.2422]) -Greedy action tensor([-1.8668, -0.4351, 0.6644, -0.0748]) tensor([0.0421, 0.1762, 0.5291, 0.2526]) -Greedy action tensor([-1.8942, -0.4365, 0.6451, -0.1516]) tensor([0.0422, 0.1814, 0.5351, 0.2412]) -Greedy action tensor([-1.7255, -0.5025, 0.5554, -0.0643]) tensor([0.0514, 0.1747, 0.5031, 0.2708]) -Greedy action tensor([-1.8904, -0.4011, 0.6394, -0.1474]) tensor([0.0422, 0.1871, 0.5296, 0.2411]) -Greedy action tensor([-1.0122, -0.1241, 0.3783, -0.5636]) tensor([0.1109, 0.2697, 0.4457, 0.1737]) -Greedy action tensor([-0.8731, 0.6205, 0.2729, 0.6579]) tensor([0.0756, 0.3368, 0.2379, 0.3496]) -Greedy action tensor([-1.9381, -0.4405, 0.6630, -0.1767]) tensor([0.0404, 0.1805, 0.5441, 0.2350]) -Greedy action tensor([-1.8748, -0.2987, 0.6173, -0.1352]) tensor([0.0423, 0.2048, 0.5118, 0.2411]) -Greedy action tensor([-1.5585, -0.3255, 0.7361, 0.2051]) tensor([0.0495, 0.1700, 0.4915, 0.2890]) -Greedy action tensor([-1.8741, -0.4112, 0.6262, -0.1558]) tensor([0.0433, 0.1871, 0.5280, 0.2415]) -Greedy action tensor([-1.7032, -0.4848, 0.5244, -0.0454]) tensor([0.0529, 0.1789, 0.4907, 0.2776]) -Greedy action tensor([-1.8898, -0.3845, 0.6320, -0.1473]) tensor([0.0423, 0.1904, 0.5261, 0.2413]) -Greedy action tensor([ 2.1197, -0.6257, -0.1160, 0.4485]) tensor([0.7357, 0.0473, 0.0787, 0.1383]) -Greedy action tensor([ 1.2903, -0.3634, -0.4802, 0.5137]) tensor([0.5490, 0.1050, 0.0935, 0.2525]) -Greedy action tensor([ 1.1407, -0.4541, -0.3249, 0.3258]) tensor([0.5329, 0.1082, 0.1231, 0.2359]) -Greedy action tensor([ 2.1877, -0.6193, -0.4494, 1.1493]) tensor([0.6729, 0.0406, 0.0482, 0.2382]) -Greedy action tensor([ 1.3231e+00, -7.7534e-01, -6.2593e-01, -4.5790e-04]) tensor([0.6531, 0.0801, 0.0930, 0.1738]) -Greedy action tensor([ 2.2181, -0.8367, -0.2386, 0.5889]) tensor([0.7525, 0.0355, 0.0645, 0.1475]) -Greedy action tensor([ 1.8822, -0.8564, 0.0122, 0.4214]) tensor([0.6893, 0.0446, 0.1062, 0.1599]) -Greedy action tensor([ 1.4811, -0.0998, -0.4222, 0.3815]) tensor([0.5925, 0.1219, 0.0883, 0.1973]) -Greedy action tensor([ 2.5146, -1.5421, -0.1877, 0.4568]) tensor([0.8250, 0.0143, 0.0553, 0.1054]) -Greedy action tensor([ 1.3250, -0.3409, -1.0023, 0.8569]) tensor([0.5228, 0.0988, 0.0510, 0.3274]) -Greedy action tensor([ 1.8518, -0.4467, -0.4186, 0.5957]) tensor([0.6718, 0.0675, 0.0694, 0.1913]) -Greedy action tensor([ 1.2364, -0.9028, -0.3017, 0.4614]) tensor([0.5577, 0.0657, 0.1198, 0.2569]) -Greedy action tensor([ 1.0076, -0.4136, 0.0042, -0.2762]) tensor([0.5305, 0.1281, 0.1945, 0.1469]) -Greedy action tensor([ 1.9292, -0.0724, -0.5360, 0.5261]) tensor([0.6821, 0.0922, 0.0580, 0.1677]) -Greedy action tensor([ 0.7944, -0.3925, -0.1336, 0.0755]) tensor([0.4571, 0.1395, 0.1807, 0.2227]) -Greedy action tensor([ 1.1404, -0.1673, -0.0266, -0.2364]) tensor([0.5452, 0.1474, 0.1697, 0.1376]) -Greedy action tensor([ 1.8237, -0.8724, -0.1036, 0.9856]) tensor([0.6077, 0.0410, 0.0884, 0.2628]) -Greedy action tensor([ 1.2343, -0.3887, -0.4475, 0.2547]) tensor([0.5686, 0.1122, 0.1058, 0.2135]) -Greedy action tensor([ 1.2704, -0.2492, -0.7100, 0.3736]) tensor([0.5667, 0.1240, 0.0782, 0.2311]) -Greedy action tensor([ 1.6204, -0.5877, -0.0598, 0.3166]) tensor([0.6378, 0.0701, 0.1189, 0.1732]) -Greedy action tensor([ 1.1898, 0.0479, -0.1603, 0.0124]) tensor([0.5301, 0.1692, 0.1374, 0.1633]) -Greedy action tensor([ 1.5113, -0.0647, -0.3544, 0.2215]) tensor([0.6109, 0.1263, 0.0946, 0.1682]) -Greedy action tensor([ 1.4259, -0.2440, -0.9281, 0.4422]) tensor([0.6034, 0.1136, 0.0573, 0.2256]) -Greedy action tensor([ 0.9057, -0.3560, 0.1126, 0.1261]) tensor([0.4557, 0.1291, 0.2062, 0.2090]) -Greedy action tensor([ 2.4139, -0.8500, -0.7384, 1.1824]) tensor([0.7284, 0.0279, 0.0311, 0.2126]) -Greedy action tensor([ 1.2977, -0.4514, -0.0845, 0.5631]) tensor([0.5250, 0.0913, 0.1318, 0.2519]) -Greedy action tensor([ 0.3938, -0.0851, -0.0038, -0.2498]) tensor([0.3550, 0.2199, 0.2385, 0.1865]) -Greedy action tensor([ 1.2560, -0.5284, -0.2733, 0.2757]) tensor([0.5683, 0.0954, 0.1231, 0.2132]) -Greedy action tensor([ 0.1194, -0.5380, 0.2618, 0.0426]) tensor([0.2780, 0.1441, 0.3205, 0.2574]) -Greedy action tensor([ 1.5610, -0.7577, -0.3147, 0.4615]) tensor([0.6311, 0.0621, 0.0967, 0.2102]) -Greedy action tensor([ 0.9441, -0.2970, -0.2780, 0.0535]) tensor([0.5015, 0.1450, 0.1477, 0.2058]) -Greedy action tensor([ 1.1812, -0.0884, -0.1485, 0.4868]) tensor([0.4890, 0.1374, 0.1294, 0.2442]) -Greedy action tensor([ 1.3811, -0.6672, -0.3927, 0.1657]) tensor([0.6269, 0.0808, 0.1064, 0.1859]) -Greedy action tensor([ 1.7450, -0.5803, -0.3497, 0.4953]) tensor([0.6634, 0.0648, 0.0817, 0.1901]) -Greedy action tensor([ 1.8251, 0.0544, -0.2094, 0.6319]) tensor([0.6233, 0.1061, 0.0815, 0.1890]) -Greedy action tensor([ 1.1361, -0.1901, -0.1257, 0.0330]) tensor([0.5318, 0.1412, 0.1506, 0.1765]) -Greedy action tensor([ 1.7052, -0.8106, -0.0315, 0.6975]) tensor([0.6165, 0.0498, 0.1086, 0.2251]) -Greedy action tensor([ 2.3252, -0.7482, -0.3003, 0.3596]) tensor([0.7945, 0.0368, 0.0575, 0.1113]) -Greedy action tensor([ 1.4374, -0.0699, -0.5540, 0.5780]) tensor([0.5614, 0.1243, 0.0766, 0.2377]) -Greedy action tensor([ 1.2789, 0.0489, 0.1878, -0.3811]) tensor([0.5500, 0.1607, 0.1847, 0.1046]) -Greedy action tensor([ 1.3658, -0.4473, -0.7824, 0.2810]) tensor([0.6181, 0.1008, 0.0721, 0.2089]) -Greedy action tensor([ 1.2793, -0.6056, 0.1317, 0.0958]) tensor([0.5632, 0.0855, 0.1788, 0.1725]) -Greedy action tensor([ 1.2877, 0.0705, -0.3511, 0.0863]) tensor([0.5583, 0.1653, 0.1084, 0.1679]) -Greedy action tensor([ 2.3666, -1.5596, 0.1037, 0.2181]) tensor([0.8062, 0.0159, 0.0839, 0.0940]) -Greedy action tensor([ 1.8430, -0.3770, -0.5709, 0.0800]) tensor([0.7301, 0.0793, 0.0653, 0.1252]) -Greedy action tensor([ 1.4050, 0.0858, -0.7597, 0.2498]) tensor([0.5892, 0.1575, 0.0676, 0.1856]) -Greedy action tensor([ 1.6109, -0.5529, -0.1961, 0.1126]) tensor([0.6655, 0.0765, 0.1092, 0.1488]) -Greedy action tensor([ 1.7965, -0.8779, -0.3767, -0.0474]) tensor([0.7457, 0.0514, 0.0849, 0.1180]) -Greedy action tensor([ 1.2744, -0.7463, -0.1656, 0.4766]) tensor([0.5495, 0.0728, 0.1302, 0.2474]) -Greedy action tensor([ 1.6510, -0.1002, -0.4377, 0.2896]) tensor([0.6436, 0.1117, 0.0797, 0.1650]) -Greedy action tensor([ 1.0332, -0.5974, -0.0135, 0.2428]) tensor([0.4999, 0.0979, 0.1755, 0.2268]) -Greedy action tensor([ 1.4706, -0.7075, -0.4475, 0.6587]) tensor([0.5868, 0.0665, 0.0862, 0.2605]) -Greedy action tensor([ 0.7682, -0.1968, -0.1351, 0.0460]) tensor([0.4402, 0.1677, 0.1784, 0.2138]) -Greedy action tensor([ 1.2329, -0.2338, -0.6770, 0.2981]) tensor([0.5645, 0.1302, 0.0836, 0.2217]) -Greedy action tensor([ 1.2860, -0.0623, -0.4098, 0.0915]) tensor([0.5728, 0.1487, 0.1051, 0.1735]) -Greedy action tensor([ 2.3184, -0.1796, -0.6758, 0.2248]) tensor([0.7965, 0.0655, 0.0399, 0.0982]) -Greedy action tensor([ 0.8080, -0.2081, -0.1828, 0.0111]) tensor([0.4579, 0.1657, 0.1700, 0.2064]) -Greedy action tensor([ 1.1590, -0.2166, -0.0635, -0.0029]) tensor([0.5376, 0.1359, 0.1583, 0.1682]) -Greedy action tensor([ 1.3015, -0.0465, -0.4821, 0.6548]) tensor([0.5124, 0.1331, 0.0861, 0.2684]) -Greedy action tensor([ 1.3914, -0.5952, -0.2016, 0.0755]) tensor([0.6216, 0.0853, 0.1264, 0.1667]) -Greedy action tensor([ 1.4192, -0.8261, -0.2640, 0.2094]) tensor([0.6289, 0.0666, 0.1168, 0.1876]) -Greedy action tensor([ 1.0114, -0.2336, -0.3955, 0.1322]) tensor([0.5133, 0.1478, 0.1257, 0.2131]) -Greedy action tensor([ 1.2380, -0.9601, -0.3832, 0.4381]) tensor([0.5688, 0.0631, 0.1124, 0.2556]) -Greedy action tensor([ 1.1961, -0.0638, -0.6275, 0.3125]) tensor([0.5381, 0.1526, 0.0869, 0.2224]) -Greedy action tensor([ 1.8044, -0.6153, -0.3651, 0.4954]) tensor([0.6788, 0.0604, 0.0775, 0.1833]) -Greedy action tensor([ 1.1546, -0.4233, -0.3072, 0.0729]) tensor([0.5627, 0.1161, 0.1304, 0.1908]) -Greedy action tensor([ 0.7454, -0.5121, -0.2211, 0.3903]) tensor([0.4227, 0.1202, 0.1608, 0.2963]) -Greedy action tensor([ 1.4508, -0.3316, -0.2211, 0.1634]) tensor([0.6127, 0.1031, 0.1151, 0.1691]) -Greedy action tensor([ 1.3695, -0.5870, -0.1926, 0.2086]) tensor([0.6009, 0.0849, 0.1260, 0.1882]) -Greedy action tensor([ 2.3034, -0.2492, -0.1717, 0.4829]) tensor([0.7553, 0.0588, 0.0636, 0.1223]) -Greedy action tensor([ 1.2019, -0.7052, -0.3996, 0.0555]) tensor([0.5996, 0.0890, 0.1209, 0.1905]) -Greedy action tensor([ 1.6939, -0.6472, -0.5899, 0.3400]) tensor([0.6867, 0.0661, 0.0700, 0.1773]) -Greedy action tensor([ 1.3034, -0.6265, -0.1444, 0.1089]) tensor([0.5941, 0.0862, 0.1397, 0.1799]) -Greedy action tensor([ 1.4691, -0.4650, -0.5234, 0.9816]) tensor([0.5277, 0.0763, 0.0720, 0.3241]) -Greedy action tensor([ 1.0427, -0.3923, -0.1539, 0.2757]) tensor([0.4988, 0.1188, 0.1508, 0.2317]) -Greedy action tensor([ 1.4765, 0.0579, -0.0444, 0.2545]) tensor([0.5697, 0.1379, 0.1245, 0.1679]) -Greedy action tensor([ 1.9171, -1.0832, -0.2364, 0.8269]) tensor([0.6658, 0.0331, 0.0773, 0.2238]) -Greedy action tensor([ 1.7854, 0.0252, -0.6928, 0.4081]) tensor([0.6630, 0.1141, 0.0556, 0.1673]) -Greedy action tensor([ 1.3302, -0.2567, 0.0457, 0.3720]) tensor([0.5362, 0.1097, 0.1484, 0.2057]) -Greedy action tensor([ 1.1050, 0.2253, 0.4108, -0.1913]) tensor([0.4571, 0.1896, 0.2283, 0.1250]) -Greedy action tensor([ 1.0554, -0.7716, -0.2639, 0.2892]) tensor([0.5283, 0.0850, 0.1412, 0.2455]) -Greedy action tensor([ 0.4258, 0.0595, -0.1308, 0.0902]) tensor([0.3354, 0.2325, 0.1922, 0.2398]) -Greedy action tensor([ 0.8155, -0.5501, -0.2694, -0.5893]) tensor([0.5439, 0.1388, 0.1838, 0.1335]) -Greedy action tensor([ 0.4563, -0.1420, -0.1490, -0.6420]) tensor([0.4117, 0.2263, 0.2247, 0.1373]) -Greedy action tensor([ 0.7173, -0.3323, -0.0084, -0.1156]) tensor([0.4407, 0.1543, 0.2133, 0.1916]) -Greedy action tensor([ 0.4871, 0.1101, -0.0832, -0.0505]) tensor([0.3527, 0.2419, 0.1994, 0.2060]) -Greedy action tensor([ 0.6144, -0.0890, -0.0349, -0.0443]) tensor([0.3945, 0.1952, 0.2061, 0.2042]) -Greedy action tensor([ 0.8400, -0.7073, -0.0431, -0.4495]) tensor([0.5258, 0.1119, 0.2174, 0.1448]) -Greedy action tensor([ 0.4196, -0.4285, -0.0367, -0.1556]) tensor([0.3810, 0.1632, 0.2414, 0.2144]) -Greedy action tensor([ 0.5541, -0.2474, 0.1469, -0.1078]) tensor([0.3802, 0.1706, 0.2530, 0.1961]) -Greedy action tensor([ 0.5805, 0.1021, -0.0316, -0.3704]) tensor([0.3924, 0.2432, 0.2128, 0.1516]) -Greedy action tensor([ 0.7259, -0.4793, -0.0664, -0.4442]) tensor([0.4848, 0.1453, 0.2195, 0.1504]) -Greedy action tensor([ 1.0906, -1.2135, -0.0708, -0.6903]) tensor([0.6324, 0.0631, 0.1980, 0.1065]) -Greedy action tensor([ 0.8678, -0.5524, 0.0537, -0.2253]) tensor([0.4951, 0.1196, 0.2193, 0.1659]) -Greedy action tensor([ 0.8235, -0.3774, 0.0288, -0.3727]) tensor([0.4866, 0.1464, 0.2198, 0.1471]) -Greedy action tensor([ 0.7202, -0.5072, 0.0329, -0.3760]) tensor([0.4695, 0.1376, 0.2361, 0.1569]) -Greedy action tensor([ 0.9848, -0.9069, 0.0593, -0.5938]) tensor([0.5703, 0.0860, 0.2260, 0.1176]) -Greedy action tensor([ 0.6325, -0.3695, -0.0307, -0.1091]) tensor([0.4239, 0.1557, 0.2184, 0.2020]) -Greedy action tensor([ 1.2409, -0.9356, 0.0732, -0.5947]) tensor([0.6313, 0.0716, 0.1964, 0.1007]) -Greedy action tensor([ 0.3788, -0.1344, -0.0833, -0.2598]) tensor([0.3628, 0.2171, 0.2285, 0.1916]) -Greedy action tensor([ 0.7960, -0.6296, 0.3372, -0.5550]) tensor([0.4692, 0.1128, 0.2965, 0.1215]) -Greedy action tensor([ 0.5332, -0.0256, -0.1010, 0.0528]) tensor([0.3675, 0.2102, 0.1949, 0.2273]) -Greedy action tensor([ 0.8408, -0.2498, 0.1590, -0.6293]) tensor([0.4827, 0.1622, 0.2441, 0.1110]) -Greedy action tensor([ 0.8984, -0.7546, -0.1462, -0.4968]) tensor([0.5583, 0.1069, 0.1964, 0.1383]) -Greedy action tensor([ 0.4899, -0.6073, 0.1379, -0.6590]) tensor([0.4248, 0.1418, 0.2988, 0.1346]) -Greedy action tensor([ 0.3825, -0.0220, -0.0626, 0.0367]) tensor([0.3316, 0.2213, 0.2125, 0.2347]) -Greedy action tensor([ 0.7680, -0.3910, -0.0439, -0.2310]) tensor([0.4704, 0.1476, 0.2088, 0.1732]) -Greedy action tensor([ 0.7112, -0.2585, -0.0239, -0.1194]) tensor([0.4358, 0.1653, 0.2090, 0.1899]) -Greedy action tensor([ 0.5078, -0.6034, -0.2217, -0.1179]) tensor([0.4262, 0.1403, 0.2055, 0.2280]) -Greedy action tensor([ 0.8128, -0.4540, 0.1159, -0.2077]) tensor([0.4672, 0.1316, 0.2327, 0.1684]) -Greedy action tensor([ 0.0981, -0.2207, -0.1332, -0.4399]) tensor([0.3221, 0.2342, 0.2556, 0.1881]) -Greedy action tensor([ 0.8574, -0.5636, 0.0855, -0.4668]) tensor([0.5077, 0.1226, 0.2346, 0.1351]) -Greedy action tensor([ 0.7313, -0.3565, -0.0468, -0.4250]) tensor([0.4737, 0.1596, 0.2176, 0.1491]) -Greedy action tensor([ 0.7011, -0.2183, 0.0488, -0.1376]) tensor([0.4252, 0.1695, 0.2215, 0.1838]) -Greedy action tensor([ 0.8041, -0.2565, -0.0578, -0.2724]) tensor([0.4741, 0.1642, 0.2002, 0.1616]) -Greedy action tensor([ 0.7696, -0.2308, -0.1433, 0.0980]) tensor([0.4386, 0.1613, 0.1760, 0.2241]) -Greedy action tensor([ 0.7652, -0.4355, -0.0331, -0.2759]) tensor([0.4753, 0.1430, 0.2139, 0.1678]) -Greedy action tensor([ 0.4423, -0.2246, -0.2408, -0.5529]) tensor([0.4188, 0.2149, 0.2115, 0.1548]) -Greedy action tensor([ 0.6324, -0.4233, 0.0712, -0.2079]) tensor([0.4255, 0.1481, 0.2428, 0.1836]) -Greedy action tensor([ 0.5609, -0.3370, -0.1627, -0.7560]) tensor([0.4629, 0.1886, 0.2245, 0.1240]) -Greedy action tensor([ 0.9356, -0.2567, -0.1214, -0.1198]) tensor([0.5002, 0.1518, 0.1738, 0.1741]) -Greedy action tensor([ 0.7530, -0.0968, -0.0409, -0.0116]) tensor([0.4264, 0.1823, 0.1928, 0.1985]) -Greedy action tensor([ 0.6380, 0.0788, -0.0199, -0.3108]) tensor([0.4037, 0.2308, 0.2091, 0.1563]) -Greedy action tensor([ 0.9092, -0.4055, -0.0055, -0.1720]) tensor([0.4979, 0.1337, 0.1995, 0.1689]) -Greedy action tensor([ 1.5238, -0.8880, 0.0796, -1.1006]) tensor([0.7153, 0.0641, 0.1688, 0.0518]) -Greedy action tensor([ 0.5218, -0.3627, -0.0064, -0.2424]) tensor([0.4051, 0.1673, 0.2389, 0.1887]) -Greedy action tensor([ 0.7451, -0.4942, -0.1863, -0.4481]) tensor([0.5033, 0.1458, 0.1983, 0.1526]) -Greedy action tensor([ 0.5938, -0.3772, -0.1680, -0.2094]) tensor([0.4360, 0.1651, 0.2036, 0.1953]) -Greedy action tensor([ 0.8959, -0.7692, -0.0448, -0.4745]) tensor([0.5454, 0.1032, 0.2129, 0.1385]) -Greedy action tensor([ 0.1401, -0.0613, -0.0133, -0.2383]) tensor([0.2976, 0.2433, 0.2553, 0.2038]) -Greedy action tensor([ 0.5927, -0.7243, -0.0988, -0.4973]) tensor([0.4751, 0.1273, 0.2379, 0.1597]) -Greedy action tensor([ 0.3970, 0.2064, -0.0944, -0.2859]) tensor([0.3397, 0.2808, 0.2079, 0.1716]) -Greedy action tensor([ 0.6611, -0.4761, -0.1330, -0.2445]) tensor([0.4593, 0.1473, 0.2076, 0.1857]) -Greedy action tensor([ 1.4173, -0.9717, -0.0082, -0.6715]) tensor([0.6868, 0.0630, 0.1651, 0.0851]) -Greedy action tensor([ 0.7568, -0.3397, -0.0201, -0.3169]) tensor([0.4683, 0.1564, 0.2153, 0.1600]) -Greedy action tensor([ 0.5126, -0.0445, -0.0862, -0.2274]) tensor([0.3847, 0.2204, 0.2114, 0.1835]) -Greedy action tensor([ 0.4805, 0.0395, -0.1950, 0.0295]) tensor([0.3585, 0.2307, 0.1824, 0.2284]) -Greedy action tensor([ 0.3499, 0.2297, -0.0298, 0.1340]) tensor([0.2962, 0.2626, 0.2026, 0.2386]) -Greedy action tensor([ 0.8643, -0.3268, -0.1630, -0.1732]) tensor([0.4960, 0.1507, 0.1775, 0.1757]) -Greedy action tensor([ 0.5814, -0.0106, -0.1106, -0.2744]) tensor([0.4034, 0.2232, 0.2019, 0.1714]) -Greedy action tensor([ 0.9240, -0.6438, -0.0185, -0.2622]) tensor([0.5253, 0.1095, 0.2047, 0.1604]) -Greedy action tensor([ 0.6866, -0.2284, -0.0930, -0.3529]) tensor([0.4519, 0.1810, 0.2072, 0.1598]) -Greedy action tensor([ 0.9102, -0.6636, 0.0424, -0.6938]) tensor([0.5470, 0.1134, 0.2297, 0.1100]) -Greedy action tensor([ 0.3665, 0.1929, -0.0286, 0.0223]) tensor([0.3103, 0.2608, 0.2090, 0.2199]) -Greedy action tensor([ 1.0127, -1.1597, -0.0504, -0.5326]) tensor([0.5979, 0.0681, 0.2065, 0.1275]) -Greedy action tensor([ 0.8812, -0.4098, -0.2796, -0.1615]) tensor([0.5153, 0.1417, 0.1614, 0.1816]) -Greedy action tensor([ 0.5948, -0.5352, -0.1907, -0.0797]) tensor([0.4370, 0.1412, 0.1992, 0.2226]) -Greedy action tensor([ 0.7157, -1.0293, -0.0022, -0.4871]) tensor([0.5095, 0.0890, 0.2485, 0.1530]) -Greedy action tensor([ 0.4602, 0.1579, -0.1200, -0.0544]) tensor([0.3452, 0.2552, 0.1933, 0.2064]) -Greedy action tensor([ 1.2770, -0.6543, -0.0869, -0.4893]) tensor([0.6363, 0.0922, 0.1627, 0.1088]) -Greedy action tensor([ 0.6637, -0.3147, 0.0095, -0.2630]) tensor([0.4364, 0.1640, 0.2269, 0.1727]) -Greedy action tensor([ 0.7024, -0.6381, -0.0705, -0.1065]) tensor([0.4611, 0.1207, 0.2129, 0.2053]) -Greedy action tensor([ 1.2435, -0.8818, 0.0966, -0.7884]) tensor([0.6377, 0.0761, 0.2026, 0.0836]) -Greedy action tensor([ 1.0688, -0.5529, -0.0909, -0.4367]) tensor([0.5770, 0.1140, 0.1809, 0.1280]) -Greedy action tensor([ 0.5458, -0.0577, -0.0936, -0.0940]) tensor([0.3843, 0.2102, 0.2028, 0.2027]) -Greedy action tensor([ 0.9669, -0.4925, 0.2092, -0.6701]) tensor([0.5275, 0.1226, 0.2473, 0.1026]) -Greedy action tensor([ 0.7969, -0.5217, -0.0762, -0.1722]) tensor([0.4844, 0.1296, 0.2023, 0.1838]) -Greedy action tensor([ 0.6380, -0.2999, -0.0641, -0.2936]) tensor([0.4384, 0.1716, 0.2173, 0.1727]) -Greedy action tensor([ 1.0130, -0.5932, 0.1446, -0.5733]) tensor([0.5480, 0.1099, 0.2299, 0.1122]) -Greedy action tensor([ 0.9859, -0.6904, 0.0730, -0.6316]) tensor([0.5597, 0.1047, 0.2246, 0.1110]) -Greedy action tensor([ 0.7994, -0.6149, 0.0295, -0.1897]) tensor([0.4812, 0.1170, 0.2228, 0.1790]) -Greedy action tensor([ 0.3552, 0.0126, -0.0681, -0.3582]) tensor([0.3503, 0.2487, 0.2294, 0.1716]) -Greedy action tensor([ 0.9039, -1.5759, -0.5186, 0.9231]) tensor([0.4266, 0.0357, 0.1029, 0.4348]) -Greedy action tensor([ 0.4415, -1.5392, -0.3769, 0.6324]) tensor([0.3585, 0.0495, 0.1581, 0.4339]) -Greedy action tensor([ 0.5836, -1.2074, 0.9131, 0.7915]) tensor([0.2640, 0.0440, 0.3670, 0.3250]) -Greedy action tensor([ 0.9743, -0.9359, -0.8722, 0.8611]) tensor([0.4548, 0.0673, 0.0718, 0.4061]) -Greedy action tensor([ 0.6970, -1.3171, 2.3098, 0.1154]) tensor([0.1490, 0.0199, 0.7477, 0.0833]) -Greedy action tensor([-1.3587, -0.4989, 0.1660, -0.8465]) tensor([0.1039, 0.2455, 0.4773, 0.1734]) -Greedy action tensor([-1.2404, 0.6108, 1.2074, -1.1792]) tensor([0.0500, 0.3185, 0.5784, 0.0532]) -Greedy action tensor([ 1.0232, -1.3150, -0.5820, 1.3911]) tensor([0.3647, 0.0352, 0.0732, 0.5269]) -Greedy action tensor([ 0.0310, -1.7228, -0.2161, 0.4442]) tensor([0.2885, 0.0499, 0.2254, 0.4362]) -Greedy action tensor([ 1.9059, -1.3674, -0.0652, 0.8972]) tensor([0.6486, 0.0246, 0.0903, 0.2365]) -Greedy action tensor([-0.0131, -1.0407, -0.4667, 0.8816]) tensor([0.2252, 0.0806, 0.1431, 0.5511]) -Greedy action tensor([ 0.3349, -0.8832, -0.0609, -0.8497]) tensor([0.4396, 0.1300, 0.2959, 0.1345]) -Greedy action tensor([-0.9835, -0.9824, -0.3595, -0.6389]) tensor([0.1894, 0.1896, 0.3535, 0.2674]) -Greedy action tensor([-0.0439, -0.3665, 0.2750, -0.5739]) tensor([0.2711, 0.1964, 0.3729, 0.1596]) -Greedy action tensor([ 0.8907, 0.1090, 1.4348, -0.2931]) tensor([0.2868, 0.1313, 0.4942, 0.0878]) -Greedy action tensor([ 0.8105, -0.8386, -0.1202, 1.4833]) tensor([0.2820, 0.0542, 0.1112, 0.5526]) -Greedy action tensor([-0.8637, -1.0600, 0.1736, -0.4335]) tensor([0.1618, 0.1330, 0.4565, 0.2488]) -Greedy action tensor([-0.3326, -0.5414, 0.9843, -0.8803]) tensor([0.1634, 0.1326, 0.6096, 0.0945]) -Greedy action tensor([ 0.1812, 0.2222, -0.4613, 1.0597]) tensor([0.2010, 0.2094, 0.1057, 0.4839]) -Greedy action tensor([-0.1798, -0.2965, -0.7148, -0.3334]) tensor([0.3000, 0.2670, 0.1757, 0.2573]) -Greedy action tensor([ 1.7162, 0.2812, 0.6827, -0.0938]) tensor([0.5690, 0.1355, 0.2024, 0.0931]) -Greedy action tensor([ 0.3594, -0.2786, 0.1986, -0.2610]) tensor([0.3427, 0.1811, 0.2918, 0.1843]) -Greedy action tensor([ 0.2746, -0.3761, 0.1238, 0.5026]) tensor([0.2749, 0.1434, 0.2364, 0.3453]) -Greedy action tensor([ 0.4872, -1.1343, 1.2945, 0.8227]) tensor([0.2067, 0.0408, 0.4634, 0.2891]) -Greedy action tensor([-0.8117, -1.2458, 0.2247, -0.2574]) tensor([0.1611, 0.1044, 0.4541, 0.2804]) -Greedy action tensor([ 0.1256, -0.0511, 0.3258, 0.4962]) tensor([0.2218, 0.1859, 0.2710, 0.3213]) -Greedy action tensor([ 1.1053, -0.0164, 0.3066, -0.3534]) tensor([0.4980, 0.1622, 0.2240, 0.1158]) -Greedy action tensor([ 0.1906, -1.8759, 0.1614, 0.7426]) tensor([0.2608, 0.0330, 0.2533, 0.4529]) -Greedy action tensor([-1.4184, 1.0424, 0.6328, -0.7082]) tensor([0.0444, 0.5200, 0.3453, 0.0903]) -Greedy action tensor([-0.1562, -0.7152, 1.3581, 0.7714]) tensor([0.1157, 0.0661, 0.5258, 0.2924]) -Greedy action tensor([-0.5440, -0.2327, 1.5472, -0.5064]) tensor([0.0870, 0.1187, 0.7040, 0.0903]) -Greedy action tensor([-0.1264, -0.0304, 0.3868, 1.4380]) tensor([0.1169, 0.1287, 0.1954, 0.5590]) -Greedy action tensor([ 0.1563, -0.8334, 1.1627, -0.6131]) tensor([0.2188, 0.0813, 0.5985, 0.1014]) -Greedy action tensor([ 0.1370, -2.3257, -0.1275, -0.0206]) tensor([0.3694, 0.0315, 0.2836, 0.3156]) -Greedy action tensor([ 0.6483, -0.5408, 0.8933, 0.1364]) tensor([0.3143, 0.0957, 0.4016, 0.1884]) -Greedy action tensor([-0.0642, 0.3941, 0.2662, -0.1746]) tensor([0.2054, 0.3248, 0.2858, 0.1839]) -Greedy action tensor([ 0.4387, -1.5181, -0.6552, 1.2888]) tensor([0.2620, 0.0370, 0.0878, 0.6132]) -Greedy action tensor([-0.0499, -1.5721, 0.0882, 0.2211]) tensor([0.2719, 0.0593, 0.3122, 0.3566]) -Greedy action tensor([-1.4757, -1.2824, 0.4229, -0.8779]) tensor([0.0934, 0.1133, 0.6235, 0.1698]) -Greedy action tensor([-0.2914, 0.1002, 0.2534, -0.8377]) tensor([0.2091, 0.3093, 0.3605, 0.1211]) -Greedy action tensor([ 0.2721, -2.0094, 0.2926, 0.1223]) tensor([0.3351, 0.0342, 0.3421, 0.2885]) -Greedy action tensor([ 1.3054, -0.8873, 0.4865, 0.4335]) tensor([0.5074, 0.0566, 0.2237, 0.2122]) -Greedy action tensor([-0.2941, -1.8291, -0.3356, -0.3092]) tensor([0.3165, 0.0682, 0.3036, 0.3117]) -Greedy action tensor([ 0.1008, -0.2226, -0.1662, -0.5466]) tensor([0.3319, 0.2402, 0.2541, 0.1737]) -Greedy action tensor([-0.5906, -0.6662, -0.0054, -0.2685]) tensor([0.1960, 0.1817, 0.3519, 0.2704]) -Greedy action tensor([-0.1300, -0.3622, -0.4856, 0.5451]) tensor([0.2243, 0.1778, 0.1572, 0.4406]) -Greedy action tensor([0.6660, 0.1398, 0.2199, 0.9575]) tensor([0.2802, 0.1655, 0.1793, 0.3750]) -Greedy action tensor([1.0393, 0.2368, 0.0011, 0.2402]) tensor([0.4440, 0.1990, 0.1572, 0.1997]) -Greedy action tensor([-0.8700, -0.6835, 1.0201, -0.6483]) tensor([0.0993, 0.1196, 0.6572, 0.1239]) -Greedy action tensor([-0.1526, -0.1795, 0.5094, -1.1704]) tensor([0.2340, 0.2278, 0.4537, 0.0846]) -Greedy action tensor([ 0.0710, -2.2247, 0.1292, -0.1561]) tensor([0.3381, 0.0340, 0.3584, 0.2694]) -Greedy action tensor([-0.1796, -1.0827, 0.3750, 0.5903]) tensor([0.1885, 0.0764, 0.3282, 0.4070]) -Greedy action tensor([-1.0278, -0.7635, -0.0278, -1.0191]) tensor([0.1658, 0.2160, 0.4508, 0.1673]) -Greedy action tensor([-0.7059, -0.8614, 0.4682, -0.3598]) tensor([0.1537, 0.1316, 0.4974, 0.2173]) -Greedy action tensor([-0.9501, -0.9183, 0.3977, -1.0018]) tensor([0.1464, 0.1511, 0.5635, 0.1390]) -Greedy action tensor([-1.0992, -1.1515, -0.2652, 0.1791]) tensor([0.1275, 0.1210, 0.2936, 0.4579]) -Greedy action tensor([-1.4832, -0.9070, 0.2996, -0.3019]) tensor([0.0834, 0.1485, 0.4962, 0.2719]) -Greedy action tensor([-0.1386, -1.4849, 1.4669, -0.6827]) tensor([0.1466, 0.0381, 0.7302, 0.0851]) -Greedy action tensor([-0.6599, 0.3204, -1.6062, -0.4480]) tensor([0.1891, 0.5039, 0.0734, 0.2337]) -Greedy action tensor([-0.1170, -1.7464, 0.4822, 0.0643]) tensor([0.2372, 0.0465, 0.4319, 0.2844]) -Greedy action tensor([-0.1845, 0.4165, 1.7970, -0.5355]) tensor([0.0928, 0.1692, 0.6728, 0.0653]) -Greedy action tensor([-0.8951, -1.3413, -0.4079, 0.7046]) tensor([0.1217, 0.0779, 0.1980, 0.6024]) -Greedy action tensor([ 0.3276, -2.0524, 1.0833, -0.6243]) tensor([0.2772, 0.0257, 0.5902, 0.1070]) -Greedy action tensor([-0.3727, -1.0158, 1.8265, 0.4211]) tensor([0.0784, 0.0412, 0.7070, 0.1734]) -Greedy action tensor([ 0.6050, -1.3009, -0.5527, -0.1177]) tensor([0.5132, 0.0763, 0.1613, 0.2492]) -Greedy action tensor([ 0.4438, 0.0036, -0.3182, 0.3113]) tensor([0.3348, 0.2156, 0.1563, 0.2933]) -Greedy action tensor([ 0.3842, -0.5548, 0.9319, -0.5320]) tensor([0.2841, 0.1111, 0.4912, 0.1136]) -Greedy action tensor([-0.1346, -1.4984, -0.1778, 0.2108]) tensor([0.2758, 0.0705, 0.2641, 0.3896]) -Greedy action tensor([ 0.2430, -1.3057, 0.3937, -0.2259]) tensor([0.3332, 0.0708, 0.3874, 0.2085]) -Greedy action tensor([-0.3329, 0.3085, 0.2619, -1.1456]) tensor([0.1940, 0.3684, 0.3516, 0.0861]) -Greedy action tensor([ 0.8558, -0.5279, 0.4008, 1.1378]) tensor([0.3114, 0.0781, 0.1976, 0.4129]) -Greedy action tensor([ 0.9121, -0.9534, 1.2744, -0.2279]) tensor([0.3435, 0.0532, 0.4935, 0.1099]) -Greedy action tensor([-0.5668, -0.6556, -1.0760, 0.0595]) tensor([0.2280, 0.2086, 0.1370, 0.4265]) -Greedy action tensor([ 0.1035, -0.5470, 0.9471, 0.1937]) tensor([0.2024, 0.1056, 0.4705, 0.2215]) -Greedy action tensor([-1.3831, -0.0784, -0.6952, -1.1440]) tensor([0.1259, 0.4639, 0.2504, 0.1598]) -Greedy action tensor([-0.2133, 0.0782, -1.1493, 0.5113]) tensor([0.2086, 0.2792, 0.0818, 0.4305]) -Greedy action tensor([-0.9679, -0.2222, -0.4905, -0.2106]) tensor([0.1459, 0.3076, 0.2352, 0.3112]) -Greedy action tensor([ 0.7849, -0.8928, 0.8138, 0.2948]) tensor([0.3535, 0.0660, 0.3639, 0.2166]) -Greedy action tensor([ 0.3892, -0.7363, -0.2289, 2.4168]) tensor([0.1057, 0.0343, 0.0570, 0.8030]) -Greedy action tensor([ 1.3018, -0.3067, -0.0163, 1.7485]) tensor([0.3299, 0.0660, 0.0883, 0.5157]) -Greedy action tensor([ 0.7846, -0.3782, 1.4485, 0.2908]) tensor([0.2587, 0.0809, 0.5025, 0.1579]) -Greedy action tensor([-0.4928, -0.0042, -1.4638, -0.3201]) tensor([0.2383, 0.3884, 0.0902, 0.2832]) -Greedy action tensor([ 1.3569, -0.4113, -0.2337, 0.4273]) tensor([0.5652, 0.0965, 0.1152, 0.2231]) -Greedy action tensor([ 0.9668, -0.5951, -0.1084, 0.1120]) tensor([0.5060, 0.1061, 0.1727, 0.2152]) -Greedy action tensor([ 0.9783, -0.3291, -0.4461, -0.0849]) tensor([0.5386, 0.1457, 0.1296, 0.1860]) -Greedy action tensor([ 1.3464, -0.0718, -1.0889, 0.2634]) tensor([0.5994, 0.1452, 0.0525, 0.2030]) -Greedy action tensor([ 1.6536, -0.6829, -0.0450, 0.3235]) tensor([0.6476, 0.0626, 0.1185, 0.1713]) -Greedy action tensor([ 1.9555, -0.3099, -0.8356, 0.3483]) tensor([0.7323, 0.0760, 0.0449, 0.1468]) -Greedy action tensor([ 1.5157, -0.2412, -0.4775, 0.1429]) tensor([0.6401, 0.1105, 0.0872, 0.1622]) -Greedy action tensor([ 1.5151, 0.0341, -0.1310, 0.4022]) tensor([0.5718, 0.1300, 0.1103, 0.1879]) -Greedy action tensor([ 1.3543, 0.0659, -0.7715, 0.5385]) tensor([0.5443, 0.1501, 0.0650, 0.2407]) -Greedy action tensor([ 1.4578, -0.6650, -0.1606, 0.3123]) tensor([0.6113, 0.0732, 0.1212, 0.1944]) -Greedy action tensor([ 1.6346, -0.3090, -0.5043, 0.6413]) tensor([0.6130, 0.0878, 0.0722, 0.2270]) -Greedy action tensor([ 1.9559, -0.3184, -0.4424, 0.3778]) tensor([0.7142, 0.0735, 0.0649, 0.1474]) -Greedy action tensor([ 1.3835, -0.5814, -0.3221, 0.1976]) tensor([0.6145, 0.0861, 0.1116, 0.1877]) -Greedy action tensor([ 1.0421, -0.2703, -0.1895, 0.1450]) tensor([0.5079, 0.1367, 0.1482, 0.2071]) -Greedy action tensor([ 1.6247, -0.5068, -0.3216, 0.5751]) tensor([0.6205, 0.0736, 0.0886, 0.2172]) -Greedy action tensor([ 1.7456, -0.7397, -0.2640, 0.5259]) tensor([0.6611, 0.0551, 0.0886, 0.1952]) -Greedy action tensor([ 1.8000, -0.0557, -0.4522, 0.5155]) tensor([0.6501, 0.1016, 0.0684, 0.1799]) -Greedy action tensor([ 0.9026, -0.4851, -0.4783, 0.3197]) tensor([0.4856, 0.1212, 0.1221, 0.2711]) -Greedy action tensor([ 1.8634, -0.2111, -0.5179, 0.3911]) tensor([0.6909, 0.0868, 0.0639, 0.1585]) -Greedy action tensor([ 1.6245, -0.6173, 0.0766, 0.8397]) tensor([0.5633, 0.0599, 0.1198, 0.2570]) -Greedy action tensor([ 1.7071, -0.3647, -0.9638, 0.3473]) tensor([0.6888, 0.0868, 0.0477, 0.1768]) -Greedy action tensor([ 1.2465, -0.4141, -0.1221, 0.1593]) tensor([0.5613, 0.1067, 0.1428, 0.1892]) -Greedy action tensor([ 1.1282, -0.4238, 0.1528, -0.1280]) tensor([0.5337, 0.1131, 0.2012, 0.1520]) -Greedy action tensor([ 1.4725, -0.3986, -0.5124, 0.4804]) tensor([0.6016, 0.0926, 0.0827, 0.2231]) -Greedy action tensor([ 1.6340, -0.3683, -0.0601, 0.4465]) tensor([0.6158, 0.0832, 0.1132, 0.1878]) -Greedy action tensor([ 1.2620, -0.5972, -0.1021, 0.4330]) tensor([0.5412, 0.0843, 0.1383, 0.2362]) -Greedy action tensor([ 1.2368, 0.2080, -0.0630, 0.2300]) tensor([0.5011, 0.1791, 0.1366, 0.1831]) -Greedy action tensor([ 0.6756, -0.1528, -0.0962, -0.1457]) tensor([0.4276, 0.1867, 0.1976, 0.1881]) -Greedy action tensor([ 1.2906, -0.2557, -0.1843, 0.0830]) tensor([0.5745, 0.1224, 0.1314, 0.1717]) -Greedy action tensor([ 1.9761, -0.5911, -0.2718, 0.5341]) tensor([0.7048, 0.0541, 0.0744, 0.1667]) -Greedy action tensor([ 1.6592, -0.4307, -0.6492, 0.6687]) tensor([0.6271, 0.0776, 0.0623, 0.2329]) -Greedy action tensor([ 1.4382, -0.3748, -0.0911, 0.1902]) tensor([0.5999, 0.0979, 0.1300, 0.1722]) -Greedy action tensor([ 1.3634, -0.4666, -0.1334, 0.1836]) tensor([0.5912, 0.0948, 0.1323, 0.1817]) -Greedy action tensor([ 1.3276, -0.4631, -0.2413, 0.4989]) tensor([0.5520, 0.0921, 0.1150, 0.2410]) -Greedy action tensor([ 1.4149, -0.9717, -0.5736, 0.5515]) tensor([0.6059, 0.0557, 0.0829, 0.2555]) -Greedy action tensor([ 1.2065, -0.2958, -0.4554, 0.2858]) tensor([0.5523, 0.1229, 0.1048, 0.2200]) -Greedy action tensor([ 1.8419, -0.3907, -0.0805, 0.4669]) tensor([0.6639, 0.0712, 0.0971, 0.1679]) -Greedy action tensor([ 1.3729, -0.2123, -0.5141, 0.3891]) tensor([0.5779, 0.1184, 0.0876, 0.2161]) -Greedy action tensor([ 1.8379, -0.7720, -0.4876, 0.7413]) tensor([0.6643, 0.0489, 0.0649, 0.2219]) -Greedy action tensor([ 1.4929, -0.5881, -0.1332, 0.4475]) tensor([0.5977, 0.0746, 0.1176, 0.2101]) -Greedy action tensor([ 1.4483, 0.1681, -0.5370, 0.2950]) tensor([0.5777, 0.1606, 0.0793, 0.1823]) -Greedy action tensor([ 0.6058, -0.2776, -0.1499, 0.1332]) tensor([0.3990, 0.1649, 0.1874, 0.2487]) -Greedy action tensor([ 0.7349, -0.2959, 0.0219, 0.2206]) tensor([0.4090, 0.1459, 0.2005, 0.2446]) -Greedy action tensor([ 1.5341, -0.5228, -0.5936, 0.2820]) tensor([0.6524, 0.0834, 0.0777, 0.1865]) -Greedy action tensor([ 1.6128, -0.7818, -0.1437, 0.0699]) tensor([0.6768, 0.0617, 0.1168, 0.1447]) -Greedy action tensor([ 2.3376, -0.0243, 0.0931, 0.2825]) tensor([0.7528, 0.0709, 0.0798, 0.0964]) -Greedy action tensor([ 1.0412, -0.5417, -0.1712, 0.3637]) tensor([0.4973, 0.1021, 0.1480, 0.2526]) -Greedy action tensor([ 0.9637, -0.3269, -0.0594, 0.4099]) tensor([0.4526, 0.1245, 0.1627, 0.2602]) -Greedy action tensor([ 1.2645, -0.2830, -0.3372, 0.4906]) tensor([0.5332, 0.1135, 0.1075, 0.2459]) -Greedy action tensor([ 1.2888, -0.1879, -0.7988, 0.1921]) tensor([0.5930, 0.1354, 0.0735, 0.1980]) -Greedy action tensor([ 1.7877, -0.2286, -0.1879, 0.3995]) tensor([0.6573, 0.0875, 0.0912, 0.1640]) -Greedy action tensor([ 1.2501, 0.1950, -0.0137, 0.2606]) tensor([0.4994, 0.1739, 0.1411, 0.1856]) -Greedy action tensor([ 1.2504, -0.7209, -0.1996, 0.2899]) tensor([0.5693, 0.0793, 0.1335, 0.2179]) -Greedy action tensor([ 1.7400, -0.3368, -0.3818, 0.6105]) tensor([0.6376, 0.0799, 0.0764, 0.2061]) -Greedy action tensor([ 1.2260, -0.4438, -0.1750, -0.0395]) tensor([0.5825, 0.1097, 0.1435, 0.1643]) -Greedy action tensor([ 2.2176, -0.4372, -0.1669, 0.0468]) tensor([0.7834, 0.0551, 0.0722, 0.0894]) -Greedy action tensor([ 1.4937, -0.8319, -0.0836, 0.0729]) tensor([0.6469, 0.0632, 0.1336, 0.1562]) -Greedy action tensor([ 1.7344, -0.2254, -0.6092, 0.2494]) tensor([0.6834, 0.0963, 0.0656, 0.1548]) -Greedy action tensor([ 1.2924, -0.1810, -0.5308, 0.4055]) tensor([0.5547, 0.1271, 0.0896, 0.2285]) -Greedy action tensor([ 1.3653, -0.1617, -0.8887, 0.4802]) tensor([0.5764, 0.1252, 0.0605, 0.2379]) -Greedy action tensor([ 2.0112, -0.6567, -0.1179, 0.6302]) tensor([0.6946, 0.0482, 0.0826, 0.1746]) -Greedy action tensor([ 1.4194, -0.1396, -0.7218, 0.1960]) tensor([0.6165, 0.1297, 0.0724, 0.1814]) -Greedy action tensor([ 0.9286, 0.1543, -0.7178, 0.1558]) tensor([0.4727, 0.2179, 0.0911, 0.2183]) -Greedy action tensor([ 1.1606, -0.3334, -0.3096, 0.1389]) tensor([0.5512, 0.1237, 0.1267, 0.1984]) -Greedy action tensor([ 1.3671, -1.2115, -0.3469, -0.1033]) tensor([0.6730, 0.0511, 0.1212, 0.1547]) -Greedy action tensor([ 2.1864, -0.7182, -0.4654, 0.4940]) tensor([0.7637, 0.0418, 0.0539, 0.1406]) -Greedy action tensor([ 1.5948, -0.6052, -0.0500, 0.5925]) tensor([0.5985, 0.0663, 0.1155, 0.2197]) -Greedy action tensor([ 3.1775, -1.5702, -0.1737, 1.1609]) tensor([0.8497, 0.0074, 0.0298, 0.1131]) -Greedy action tensor([ 2.0424, -0.9543, -0.2986, 0.5172]) tensor([0.7333, 0.0366, 0.0706, 0.1595]) -Greedy action tensor([ 2.0999, -0.6184, -0.3862, 0.5239]) tensor([0.7374, 0.0487, 0.0614, 0.1525]) -Greedy action tensor([ 1.2809, -0.1327, -0.7955, 0.5319]) tensor([0.5430, 0.1321, 0.0681, 0.2568]) -Greedy action tensor([ 1.8576, -0.9338, 0.0525, 0.6477]) tensor([0.6561, 0.0402, 0.1079, 0.1957]) -Greedy action tensor([ 0.5633, 0.0613, -0.5212, 0.7627]) tensor([0.3160, 0.1913, 0.1068, 0.3858]) -Greedy action tensor([ 1.5864, -0.6127, -0.0990, 0.2687]) tensor([0.6394, 0.0709, 0.1185, 0.1712]) -Greedy action tensor([ 1.5680, -0.6624, -0.4054, 0.2671]) tensor([0.6584, 0.0708, 0.0915, 0.1793]) -Greedy action tensor([ 2.3996, -0.3690, -0.3405, 1.0913]) tensor([0.7155, 0.0449, 0.0462, 0.1934]) -Greedy action tensor([ 1.7535, -0.4939, -0.4001, 0.1022]) tensor([0.7074, 0.0748, 0.0821, 0.1357]) -Greedy action tensor([ 1.4441, -0.4885, -0.0035, 0.0531]) tensor([0.6140, 0.0889, 0.1444, 0.1528]) -Greedy action tensor([ 1.1820, -0.0450, -0.3070, 0.2447]) tensor([0.5234, 0.1535, 0.1181, 0.2050]) -Greedy action tensor([ 1.8435, -0.8431, -0.3298, 0.7799]) tensor([0.6548, 0.0446, 0.0745, 0.2261]) -Greedy action tensor([ 2.0967, -1.0918, 0.0655, 0.6774]) tensor([0.7071, 0.0292, 0.0928, 0.1710]) -Greedy action tensor([-1.8080, -0.4984, 0.6003, -0.1388]) tensor([0.0473, 0.1754, 0.5261, 0.2512]) -Greedy action tensor([-1.8858, -0.2778, 0.6214, -0.1396]) tensor([0.0417, 0.2081, 0.5114, 0.2389]) -Greedy action tensor([-1.4626, 0.0192, 0.5292, 0.2179]) tensor([0.0553, 0.2432, 0.4049, 0.2966]) -Greedy action tensor([-1.2186, -0.5731, 0.3258, 0.1758]) tensor([0.0860, 0.1640, 0.4030, 0.3469]) -Greedy action tensor([-1.7522, -0.3908, 0.5973, -0.0856]) tensor([0.0484, 0.1887, 0.5069, 0.2561]) -Greedy action tensor([-1.4045, -0.5448, 0.3682, 0.2655]) tensor([0.0687, 0.1622, 0.4043, 0.3648]) -Greedy action tensor([-1.7833, -0.2811, 0.6000, -0.0338]) tensor([0.0453, 0.2034, 0.4909, 0.2605]) -Greedy action tensor([-1.9310, -0.4438, 0.6684, -0.1699]) tensor([0.0405, 0.1791, 0.5448, 0.2356]) -Greedy action tensor([-1.8892, -0.3427, 0.6283, -0.1435]) tensor([0.0420, 0.1971, 0.5204, 0.2405]) -Greedy action tensor([-0.6335, 0.1601, 0.1496, -0.0780]) tensor([0.1400, 0.3096, 0.3064, 0.2440]) -Greedy action tensor([-1.5994, 0.4272, 0.3535, 0.0794]) tensor([0.0476, 0.3614, 0.3357, 0.2552]) -Greedy action tensor([-1.8456, -0.3690, 0.6059, -0.1415]) tensor([0.0445, 0.1947, 0.5163, 0.2445]) -Greedy action tensor([-1.8550, -0.4387, 0.6343, -0.1319]) tensor([0.0439, 0.1810, 0.5292, 0.2460]) -Greedy action tensor([-0.9546, -0.5693, 0.2351, 0.3285]) tensor([0.1068, 0.1570, 0.3509, 0.3853]) -Greedy action tensor([-1.6047, -0.2959, 0.5827, -0.0397]) tensor([0.0544, 0.2012, 0.4844, 0.2600]) -Greedy action tensor([-1.8572, -0.4315, 0.6653, -0.1241]) tensor([0.0430, 0.1787, 0.5352, 0.2431]) -Greedy action tensor([-1.4882, -0.6421, 0.5570, 0.0414]) tensor([0.0638, 0.1487, 0.4931, 0.2945]) -Greedy action tensor([-1.6096, 0.4157, 0.3835, -0.0175]) tensor([0.0480, 0.3638, 0.3523, 0.2359]) -Greedy action tensor([-0.3526, -0.4167, 0.2692, 0.3814]) tensor([0.1700, 0.1594, 0.3165, 0.3541]) -Greedy action tensor([-1.1533, 0.2637, 0.3335, -0.1524]) tensor([0.0815, 0.3362, 0.3605, 0.2218]) -Greedy action tensor([-0.8351, 0.8563, -0.0017, 0.4582]) tensor([0.0808, 0.4386, 0.1860, 0.2946]) -Greedy action tensor([-1.8424, -0.2697, 0.5930, -0.1292]) tensor([0.0439, 0.2115, 0.5012, 0.2434]) -Greedy action tensor([-0.1865, 0.1024, 0.1821, 0.2911]) tensor([0.1854, 0.2475, 0.2681, 0.2990]) -Greedy action tensor([-1.9338, -0.4475, 0.6596, -0.1758]) tensor([0.0407, 0.1797, 0.5438, 0.2358]) -Greedy action tensor([-1.6430e+00, -4.2634e-01, 5.3994e-01, -3.1477e-04]) tensor([0.0543, 0.1833, 0.4817, 0.2807]) -Greedy action tensor([-1.9257, -0.4307, 0.6588, -0.1689]) tensor([0.0408, 0.1819, 0.5409, 0.2364]) -Greedy action tensor([-1.8576, -0.4081, 0.6193, -0.1334]) tensor([0.0439, 0.1871, 0.5227, 0.2462]) -Greedy action tensor([-0.6662, -0.5689, 0.6216, 0.6445]) tensor([0.1060, 0.1168, 0.3841, 0.3931]) -Greedy action tensor([-1.8327, -0.4958, 0.6029, -0.1246]) tensor([0.0460, 0.1751, 0.5252, 0.2537]) -Greedy action tensor([-1.8032, -0.4265, 0.5987, -0.1019]) tensor([0.0465, 0.1844, 0.5140, 0.2551]) -Greedy action tensor([-1.5644, -0.4417, 0.5633, 0.1549]) tensor([0.0554, 0.1703, 0.4651, 0.3092]) -Greedy action tensor([-0.8070, -0.5219, 0.2733, 0.3462]) tensor([0.1184, 0.1575, 0.3488, 0.3752]) -Greedy action tensor([-1.5608, -0.3292, 0.4512, -0.0449]) tensor([0.0608, 0.2082, 0.4544, 0.2767]) -Greedy action tensor([-1.1462, -0.5079, 0.5369, -0.4683]) tensor([0.0976, 0.1848, 0.5253, 0.1923]) -Greedy action tensor([-1.8879, -0.3774, 0.6295, -0.1443]) tensor([0.0423, 0.1916, 0.5243, 0.2418]) -Greedy action tensor([-1.5667, 0.0582, 0.4255, -0.0361]) tensor([0.0555, 0.2816, 0.4066, 0.2563]) -Greedy action tensor([-1.8079, -0.4779, 0.6193, -0.0984]) tensor([0.0462, 0.1748, 0.5236, 0.2554]) -Greedy action tensor([-1.9191, -0.4731, 0.6966, -0.1321]) tensor([0.0402, 0.1706, 0.5494, 0.2399]) -Greedy action tensor([-1.9126, -0.4134, 0.6487, -0.1638]) tensor([0.0414, 0.1852, 0.5357, 0.2377]) -Greedy action tensor([-1.8533, -0.4172, 0.6153, -0.1406]) tensor([0.0443, 0.1864, 0.5234, 0.2458]) -Greedy action tensor([-1.9041, -0.4425, 0.6461, -0.1608]) tensor([0.0419, 0.1809, 0.5373, 0.2398]) -Greedy action tensor([-1.9443, -0.4519, 0.6655, -0.1816]) tensor([0.0402, 0.1788, 0.5466, 0.2343]) -Greedy action tensor([-1.3976, -0.6213, 0.4342, 0.1972]) tensor([0.0697, 0.1515, 0.4353, 0.3435]) -Greedy action tensor([-1.8832, -0.2691, 0.6003, -0.1426]) tensor([0.0422, 0.2119, 0.5055, 0.2405]) -Greedy action tensor([-1.9041, -0.3986, 0.6421, -0.1574]) tensor([0.0417, 0.1878, 0.5316, 0.2390]) -Greedy action tensor([-0.9889, 0.6233, 0.1288, -0.0467]) tensor([0.0859, 0.4308, 0.2628, 0.2205]) -Greedy action tensor([-1.9365, -0.4403, 0.6643, -0.1761]) tensor([0.0404, 0.1804, 0.5443, 0.2349]) -Greedy action tensor([-1.9014, -0.4469, 0.6447, -0.1552]) tensor([0.0421, 0.1801, 0.5366, 0.2411]) -Greedy action tensor([-1.9359, -0.4442, 0.6663, -0.1702]) tensor([0.0403, 0.1793, 0.5444, 0.2359]) -Greedy action tensor([-1.6073, 0.4376, 0.3817, 0.0455]) tensor([0.0470, 0.3635, 0.3438, 0.2456]) -Greedy action tensor([-1.8574, -0.3554, 0.6114, -0.1264]) tensor([0.0436, 0.1957, 0.5146, 0.2461]) -Greedy action tensor([-1.7416, -0.4660, 0.5678, -0.0715]) tensor([0.0501, 0.1794, 0.5044, 0.2661]) -Greedy action tensor([-1.8851, -0.4067, 0.6409, -0.1449]) tensor([0.0424, 0.1859, 0.5301, 0.2416]) -Greedy action tensor([-1.2327, -0.6349, 0.3236, 0.0824]) tensor([0.0886, 0.1611, 0.4202, 0.3301]) -Greedy action tensor([-1.8112, -0.4717, 0.6712, 0.0169]) tensor([0.0435, 0.1659, 0.5202, 0.2704]) -Greedy action tensor([-1.7036, -0.1793, 0.5157, -0.0930]) tensor([0.0505, 0.2319, 0.4647, 0.2528]) -Greedy action tensor([-1.9309, -0.4476, 0.6612, -0.1743]) tensor([0.0407, 0.1795, 0.5439, 0.2359]) -Greedy action tensor([-1.8035, -0.3334, 0.5854, -0.0935]) tensor([0.0459, 0.1997, 0.5005, 0.2538]) -Greedy action tensor([-1.5890, 0.1686, 0.4357, -0.0751]) tensor([0.0529, 0.3065, 0.4004, 0.2402]) -Greedy action tensor([-1.8556, -0.4127, 0.6234, -0.1289]) tensor([0.0439, 0.1858, 0.5236, 0.2467]) -Greedy action tensor([-1.8927, -0.3814, 0.6298, -0.1557]) tensor([0.0422, 0.1915, 0.5263, 0.2400]) -Greedy action tensor([-1.8478, -0.3711, 0.6232, -0.0987]) tensor([0.0435, 0.1907, 0.5154, 0.2504]) -Greedy action tensor([-1.6733, 0.0097, 0.4268, -0.0259]) tensor([0.0507, 0.2726, 0.4137, 0.2631]) -Greedy action tensor([-1.4539, -0.5917, 0.4028, 0.1457]) tensor([0.0679, 0.1609, 0.4349, 0.3363]) -Greedy action tensor([-1.8931, -0.3210, 0.6121, -0.1418]) tensor([0.0420, 0.2022, 0.5140, 0.2419]) -Greedy action tensor([-1.8291, -0.3615, 0.5978, -0.1071]) tensor([0.0449, 0.1949, 0.5087, 0.2514]) -Greedy action tensor([-1.0366, -0.5153, 0.3826, -0.1305]) tensor([0.1076, 0.1812, 0.4449, 0.2663]) -Greedy action tensor([-1.9329, -0.4393, 0.6614, -0.1720]) tensor([0.0406, 0.1806, 0.5429, 0.2359]) -Greedy action tensor([-1.8898, -0.4354, 0.6357, -0.1578]) tensor([0.0427, 0.1827, 0.5334, 0.2412]) -Greedy action tensor([-1.8414, -0.4796, 0.6191, -0.0964]) tensor([0.0448, 0.1747, 0.5242, 0.2563]) -Greedy action tensor([-1.8384, -0.1643, 0.5789, -0.0721]) tensor([0.0427, 0.2280, 0.4793, 0.2500]) -Greedy action tensor([-1.8715, -0.4492, 0.6313, -0.1430]) tensor([0.0435, 0.1803, 0.5313, 0.2449]) -Greedy action tensor([-1.9257, -0.4211, 0.6569, -0.1646]) tensor([0.0407, 0.1834, 0.5389, 0.2370]) -Greedy action tensor([-1.7605, -0.4280, 0.5735, -0.0927]) tensor([0.0490, 0.1857, 0.5056, 0.2597]) -Greedy action tensor([-0.3153, -0.1187, 0.7482, 0.4908]) tensor([0.1360, 0.1655, 0.3939, 0.3045]) -Greedy action tensor([-0.5114, 1.0473, 0.0231, 0.3677]) tensor([0.1013, 0.4816, 0.1729, 0.2441]) -Greedy action tensor([-1.2048, -0.1078, 0.5254, 0.2360]) tensor([0.0721, 0.2161, 0.4070, 0.3047]) -Greedy action tensor([-0.4604, 0.9979, 0.0787, 0.0760]) tensor([0.1146, 0.4928, 0.1966, 0.1960]) -Greedy action tensor([-1.5924, -0.3536, 0.6900, -0.0703]) tensor([0.0531, 0.1833, 0.5204, 0.2433]) -Greedy action tensor([-1.5288, 0.4123, 0.3352, 0.0106]) tensor([0.0524, 0.3652, 0.3381, 0.2444]) -Greedy action tensor([-1.9247, -0.4351, 0.6596, -0.1687]) tensor([0.0409, 0.1812, 0.5414, 0.2365]) -Greedy action tensor([ 0.4367, -0.4748, -0.2258, -0.1531]) tensor([0.4045, 0.1626, 0.2086, 0.2243]) -Greedy action tensor([ 0.3798, -0.2430, -0.1351, 0.0801]) tensor([0.3478, 0.1866, 0.2078, 0.2577]) -Greedy action tensor([ 0.4857, -0.1343, -0.0086, 0.0795]) tensor([0.3554, 0.1912, 0.2168, 0.2367]) -Greedy action tensor([ 0.5829, -0.1412, 0.0399, -0.1758]) tensor([0.3946, 0.1913, 0.2293, 0.1848]) -Greedy action tensor([ 0.6671, -0.4033, 0.0291, -0.2427]) tensor([0.4398, 0.1508, 0.2323, 0.1771]) -Greedy action tensor([ 0.3430, -0.2512, -0.0895, -0.0266]) tensor([0.3458, 0.1909, 0.2244, 0.2389]) -Greedy action tensor([ 0.5019, -0.4536, -0.1080, -0.1010]) tensor([0.4040, 0.1554, 0.2195, 0.2211]) -Greedy action tensor([ 0.9352, -0.9488, -0.0223, -0.3600]) tensor([0.5526, 0.0840, 0.2121, 0.1513]) -Greedy action tensor([ 0.4535, -0.1742, -0.1108, -0.4564]) tensor([0.3992, 0.2131, 0.2270, 0.1607]) -Greedy action tensor([ 0.4549, 0.2826, -0.2010, -0.1905]) tensor([0.3466, 0.2917, 0.1799, 0.1818]) -Greedy action tensor([ 1.0551, -0.6099, -0.0227, -0.5980]) tensor([0.5811, 0.1099, 0.1978, 0.1112]) -Greedy action tensor([ 0.4011, -0.2584, -0.0330, -0.0858]) tensor([0.3598, 0.1860, 0.2331, 0.2211]) -Greedy action tensor([ 0.7358, -0.2505, 0.0730, -0.2245]) tensor([0.4403, 0.1642, 0.2269, 0.1685]) -Greedy action tensor([ 0.5324, -0.2904, -0.1499, -0.1463]) tensor([0.4078, 0.1791, 0.2061, 0.2069]) -Greedy action tensor([ 0.3926, 0.4273, -0.1453, -0.0169]) tensor([0.3046, 0.3153, 0.1779, 0.2022]) -Greedy action tensor([ 0.6028, -0.3048, 0.0986, -0.3933]) tensor([0.4207, 0.1698, 0.2541, 0.1554]) -Greedy action tensor([ 0.8569, -0.4421, -0.0501, -0.5519]) tensor([0.5206, 0.1420, 0.2102, 0.1272]) -Greedy action tensor([ 0.9334, -0.9169, -0.1322, -1.0704]) tensor([0.6110, 0.0961, 0.2105, 0.0824]) -Greedy action tensor([ 0.8494, -0.5532, -0.0437, -0.2027]) tensor([0.4989, 0.1227, 0.2042, 0.1742]) -Greedy action tensor([ 0.5092, -0.0537, 0.0745, -0.0994]) tensor([0.3622, 0.2063, 0.2345, 0.1971]) -Greedy action tensor([ 1.0033, -0.6399, 0.1118, -0.4922]) tensor([0.5472, 0.1058, 0.2244, 0.1227]) -Greedy action tensor([ 0.7142, -0.3116, -0.1112, -0.0680]) tensor([0.4437, 0.1591, 0.1943, 0.2029]) -Greedy action tensor([ 0.9973, -0.7799, 0.1688, -0.9731]) tensor([0.5730, 0.0969, 0.2502, 0.0799]) -Greedy action tensor([ 0.9272, -0.2544, -0.3449, -0.4326]) tensor([0.5424, 0.1664, 0.1520, 0.1392]) -Greedy action tensor([ 0.4225, -0.1399, 0.1541, -0.2765]) tensor([0.3532, 0.2012, 0.2700, 0.1755]) -Greedy action tensor([ 0.3689, -0.1230, -0.0122, -0.0979]) tensor([0.3423, 0.2093, 0.2338, 0.2146]) -Greedy action tensor([ 0.4429, -0.2223, -0.2610, -0.3923]) tensor([0.4094, 0.2105, 0.2025, 0.1776]) -Greedy action tensor([ 0.7458, -0.5928, -0.0490, -0.2457]) tensor([0.4797, 0.1258, 0.2166, 0.1779]) -Greedy action tensor([ 0.8206, -0.4429, -0.0303, -0.2679]) tensor([0.4887, 0.1381, 0.2087, 0.1645]) -Greedy action tensor([ 0.5635, -0.3551, -0.0149, -0.1572]) tensor([0.4088, 0.1631, 0.2292, 0.1988]) -Greedy action tensor([ 0.5497, -0.3535, -0.0480, -0.2584]) tensor([0.4165, 0.1688, 0.2291, 0.1856]) -Greedy action tensor([ 0.6583, -0.5752, -0.1792, -0.1401]) tensor([0.4600, 0.1340, 0.1991, 0.2070]) -Greedy action tensor([ 0.7685, -0.5321, -0.0760, -0.5508]) tensor([0.5077, 0.1383, 0.2182, 0.1357]) -Greedy action tensor([ 0.5019, -0.1061, 0.1304, -0.2554]) tensor([0.3700, 0.2014, 0.2551, 0.1735]) -Greedy action tensor([ 0.2668, 0.1206, -0.0930, -0.0437]) tensor([0.3035, 0.2622, 0.2118, 0.2225]) -Greedy action tensor([ 0.2714, -0.1101, -0.1218, -0.3147]) tensor([0.3431, 0.2343, 0.2316, 0.1910]) -Greedy action tensor([ 0.3514, 0.1364, -0.1370, -0.1852]) tensor([0.3328, 0.2684, 0.2042, 0.1946]) -Greedy action tensor([ 0.4604, -0.4499, -0.0433, -0.1087]) tensor([0.3887, 0.1564, 0.2349, 0.2200]) -Greedy action tensor([ 0.7508, -0.5779, -0.1740, -0.3723]) tensor([0.5033, 0.1333, 0.1996, 0.1637]) -Greedy action tensor([ 0.8156, -0.5776, 0.0717, -0.3406]) tensor([0.4906, 0.1218, 0.2332, 0.1544]) -Greedy action tensor([ 0.6621, 0.0612, -0.2612, -0.0441]) tensor([0.4100, 0.2248, 0.1629, 0.2023]) -Greedy action tensor([ 0.3382, -0.4178, 0.0096, -0.2062]) tensor([0.3611, 0.1695, 0.2599, 0.2095]) -Greedy action tensor([ 0.5107, -0.5880, -0.0931, -0.2247]) tensor([0.4238, 0.1413, 0.2317, 0.2032]) -Greedy action tensor([ 0.6167, -0.6691, -0.1321, -0.3359]) tensor([0.4684, 0.1295, 0.2215, 0.1807]) -Greedy action tensor([ 1.0578, -0.7464, -0.0512, -0.5591]) tensor([0.5907, 0.0972, 0.1948, 0.1173]) -Greedy action tensor([ 0.6792, -0.9660, -0.1573, -0.3505]) tensor([0.5042, 0.0973, 0.2184, 0.1801]) -Greedy action tensor([ 1.4468, -0.8781, -0.0730, -0.4289]) tensor([0.6804, 0.0665, 0.1488, 0.1043]) -Greedy action tensor([ 0.8085, -0.0524, 0.1653, -0.4332]) tensor([0.4470, 0.1890, 0.2349, 0.1291]) -Greedy action tensor([ 0.7580, -0.5441, -0.0562, -0.3715]) tensor([0.4906, 0.1334, 0.2174, 0.1586]) -Greedy action tensor([ 0.5860, -0.3302, -0.0258, -0.2966]) tensor([0.4244, 0.1698, 0.2302, 0.1756]) -Greedy action tensor([ 0.7624, -0.6593, 0.2831, -0.5383]) tensor([0.4689, 0.1131, 0.2903, 0.1277]) -Greedy action tensor([ 0.6846, -0.3935, -0.0670, -0.0892]) tensor([0.4399, 0.1497, 0.2075, 0.2029]) -Greedy action tensor([ 0.7085, -0.4144, -0.1156, -0.1626]) tensor([0.4582, 0.1491, 0.2010, 0.1917]) -Greedy action tensor([ 0.5010, -0.3971, -0.0453, -0.2719]) tensor([0.4085, 0.1664, 0.2365, 0.1886]) -Greedy action tensor([ 0.9828, -0.5858, 0.1240, -0.5024]) tensor([0.5381, 0.1121, 0.2280, 0.1219]) -Greedy action tensor([ 0.9059, -0.7547, -0.0977, -0.3601]) tensor([0.5439, 0.1034, 0.1994, 0.1534]) -Greedy action tensor([ 0.7922, -0.5954, 0.0134, -0.4610]) tensor([0.5014, 0.1252, 0.2301, 0.1432]) -Greedy action tensor([ 0.7326, -0.3799, 0.0342, -0.4048]) tensor([0.4658, 0.1531, 0.2317, 0.1494]) -Greedy action tensor([ 0.6370, 0.0553, -0.1899, -0.0815]) tensor([0.4026, 0.2250, 0.1761, 0.1963]) -Greedy action tensor([ 0.8918, -0.5802, -0.0781, -0.5314]) tensor([0.5407, 0.1241, 0.2050, 0.1303]) -Greedy action tensor([ 0.6592, -0.4439, -0.0832, -0.1159]) tensor([0.4408, 0.1463, 0.2098, 0.2031]) -Greedy action tensor([ 1.2364, -0.5828, -0.0489, -0.5458]) tensor([0.6223, 0.1009, 0.1721, 0.1047]) -Greedy action tensor([ 0.2217, 0.0501, -0.1961, -0.1064]) tensor([0.3104, 0.2615, 0.2044, 0.2236]) -Greedy action tensor([ 0.9080, -0.3830, 0.3796, -0.7145]) tensor([0.4850, 0.1334, 0.2859, 0.0957]) -Greedy action tensor([ 0.8915, -0.6659, 0.1130, -0.4124]) tensor([0.5151, 0.1085, 0.2365, 0.1398]) -Greedy action tensor([ 0.6746, 0.0955, 0.0236, -0.2057]) tensor([0.4005, 0.2245, 0.2089, 0.1661]) -Greedy action tensor([ 0.5124, -0.5021, 0.0646, -0.3854]) tensor([0.4151, 0.1505, 0.2652, 0.1691]) -Greedy action tensor([0.2027, 0.2273, 0.2278, 0.1145]) tensor([0.2521, 0.2584, 0.2586, 0.2309]) -Greedy action tensor([ 0.5563, -0.3554, 0.2460, -0.5176]) tensor([0.4038, 0.1622, 0.2960, 0.1380]) -Greedy action tensor([ 0.8737, -0.5266, -0.0102, -0.2304]) tensor([0.5022, 0.1238, 0.2075, 0.1665]) -Greedy action tensor([ 0.8817, -0.9905, 0.2119, -0.8763]) tensor([0.5441, 0.0837, 0.2785, 0.0938]) -Greedy action tensor([ 0.2437, 0.2125, 0.0169, -0.2714]) tensor([0.2973, 0.2881, 0.2370, 0.1776]) -Greedy action tensor([ 0.7390, -0.6154, -0.0255, -0.4416]) tensor([0.4924, 0.1271, 0.2293, 0.1512]) -Greedy action tensor([ 9.0996e-01, -1.2959e-01, -3.9968e-02, 1.5130e-04]) tensor([0.4666, 0.1650, 0.1805, 0.1879]) -Greedy action tensor([ 0.4165, 0.0460, -0.1147, -0.0864]) tensor([0.3469, 0.2395, 0.2039, 0.2098]) -Greedy action tensor([ 0.4304, -0.1763, 0.0557, -0.3931]) tensor([0.3743, 0.2041, 0.2573, 0.1643]) -Greedy action tensor([ 0.8111, -0.2531, -0.0603, -0.4534]) tensor([0.4888, 0.1686, 0.2045, 0.1380]) -Greedy action tensor([ 0.5809, -0.3923, 0.0836, -0.1482]) tensor([0.4051, 0.1531, 0.2464, 0.1954]) -Greedy action tensor([ 0.3516, -0.4059, -0.0050, -0.5760]) tensor([0.3899, 0.1828, 0.2730, 0.1542]) -Greedy action tensor([ 0.4978, -0.3031, -0.1080, -0.1406]) tensor([0.3964, 0.1780, 0.2163, 0.2094]) -Greedy action tensor([ 0.5165, 0.5758, -0.0650, 0.0653]) tensor([0.3070, 0.3258, 0.1717, 0.1955]) -Greedy action tensor([-1.1534, -0.7693, 1.9727, -1.7299]) tensor([0.0387, 0.0569, 0.8826, 0.0218]) -Greedy action tensor([-0.1276, -1.3845, 1.0615, -0.1258]) tensor([0.1795, 0.0511, 0.5895, 0.1798]) -Greedy action tensor([ 0.4329, -0.4032, -0.0411, -0.4985]) tensor([0.4082, 0.1769, 0.2541, 0.1608]) -Greedy action tensor([ 0.5694, 0.2213, 0.4982, -0.1279]) tensor([0.3190, 0.2252, 0.2970, 0.1588]) -Greedy action tensor([-0.1020, -0.4609, -0.2968, 1.3199]) tensor([0.1500, 0.1048, 0.1235, 0.6218]) -Greedy action tensor([-0.3104, -0.5690, -0.0517, -0.6467]) tensor([0.2644, 0.2042, 0.3425, 0.1889]) -Greedy action tensor([-0.4069, 0.8725, -0.0157, 1.1450]) tensor([0.0926, 0.3330, 0.1370, 0.4373]) -Greedy action tensor([-0.7175, -0.7383, 1.1113, -0.5616]) tensor([0.1067, 0.1045, 0.6642, 0.1247]) -Greedy action tensor([ 1.1279, 0.1719, -0.0565, 0.6581]) tensor([0.4319, 0.1660, 0.1321, 0.2700]) -Greedy action tensor([ 0.1666, -1.7899, -0.7204, 0.1376]) tensor([0.3961, 0.0560, 0.1631, 0.3848]) -Greedy action tensor([ 0.7903, -0.4875, 1.2478, 0.4856]) tensor([0.2781, 0.0775, 0.4394, 0.2050]) -Greedy action tensor([-0.1517, -0.8324, -1.0351, 0.3791]) tensor([0.2762, 0.1399, 0.1142, 0.4697]) -Greedy action tensor([0.0473, 0.2518, 0.1528, 0.8089]) tensor([0.1825, 0.2239, 0.2028, 0.3908]) -Greedy action tensor([-0.7697, -0.7838, 1.3835, -1.1093]) tensor([0.0884, 0.0872, 0.7615, 0.0630]) -Greedy action tensor([-0.6090, -0.2409, 2.0985, 0.3067]) tensor([0.0502, 0.0725, 0.7520, 0.1253]) -Greedy action tensor([ 0.7817, -1.2772, 0.1174, 0.6590]) tensor([0.3958, 0.0505, 0.2037, 0.3501]) -Greedy action tensor([-0.1203, -0.6439, -0.1676, 1.0408]) tensor([0.1742, 0.1032, 0.1662, 0.5564]) -Greedy action tensor([ 0.9245, -0.3231, -0.0013, -0.5565]) tensor([0.5233, 0.1503, 0.2074, 0.1190]) -Greedy action tensor([ 1.1017, -1.0433, 0.1225, 0.0635]) tensor([0.5415, 0.0634, 0.2034, 0.1917]) -Greedy action tensor([ 0.4619, -0.0377, -0.2044, 0.0971]) tensor([0.3553, 0.2156, 0.1825, 0.2467]) -Greedy action tensor([-0.1715, 0.0113, -0.5418, 0.0740]) tensor([0.2398, 0.2880, 0.1656, 0.3066]) -Greedy action tensor([ 0.2312, -0.5033, 1.7129, 0.1416]) tensor([0.1472, 0.0706, 0.6476, 0.1346]) -Greedy action tensor([-0.5721, -0.0494, 0.1379, 0.3559]) tensor([0.1379, 0.2326, 0.2806, 0.3489]) -Greedy action tensor([ 0.1795, -0.9116, 1.2278, -0.8512]) tensor([0.2200, 0.0739, 0.6276, 0.0785]) -Greedy action tensor([ 1.8487, -0.2689, 1.4772, 0.3735]) tensor([0.4905, 0.0590, 0.3383, 0.1122]) -Greedy action tensor([ 0.0705, -0.9200, 1.0637, 0.4587]) tensor([0.1803, 0.0670, 0.4868, 0.2659]) -Greedy action tensor([-0.5873, -0.4592, 0.1276, -0.0626]) tensor([0.1703, 0.1936, 0.3482, 0.2879]) -Greedy action tensor([ 0.0330, -1.4521, -0.0802, 0.7540]) tensor([0.2395, 0.0542, 0.2138, 0.4925]) -Greedy action tensor([ 0.2086, 0.0541, -0.5512, -0.0443]) tensor([0.3225, 0.2763, 0.1508, 0.2504]) -Greedy action tensor([-0.4020, -0.7144, -0.1350, -0.5769]) tensor([0.2579, 0.1887, 0.3368, 0.2165]) -Greedy action tensor([ 0.0826, -0.8838, -1.1059, 0.4660]) tensor([0.3172, 0.1207, 0.0966, 0.4654]) -Greedy action tensor([-0.2220, -0.8942, -0.0405, 0.1672]) tensor([0.2389, 0.1220, 0.2865, 0.3526]) -Greedy action tensor([ 1.5141, -0.8355, 0.6241, -0.3316]) tensor([0.6010, 0.0573, 0.2468, 0.0949]) -Greedy action tensor([ 0.1133, -2.3887, 0.2479, 0.6639]) tensor([0.2525, 0.0207, 0.2889, 0.4379]) -Greedy action tensor([ 0.8006, -0.7570, -0.8213, 1.0854]) tensor([0.3653, 0.0769, 0.0721, 0.4856]) -Greedy action tensor([-0.1292, -0.1258, -1.0337, -0.3418]) tensor([0.3109, 0.3119, 0.1258, 0.2513]) -Greedy action tensor([ 0.6451, -1.3300, 0.0759, 0.2997]) tensor([0.4145, 0.0575, 0.2346, 0.2934]) -Greedy action tensor([-0.3721, -0.8797, 0.4707, 0.3818]) tensor([0.1653, 0.0995, 0.3839, 0.3513]) -Greedy action tensor([-0.0540, -0.3759, -0.8820, -0.3107]) tensor([0.3407, 0.2469, 0.1489, 0.2636]) -Greedy action tensor([-1.8467, -1.1990, 0.7799, -0.6287]) tensor([0.0497, 0.0950, 0.6873, 0.1680]) -Greedy action tensor([ 0.4131, -0.5563, 0.6984, 0.9750]) tensor([0.2240, 0.0850, 0.2980, 0.3930]) -Greedy action tensor([ 0.0909, -1.4567, -0.3573, -0.0341]) tensor([0.3658, 0.0778, 0.2336, 0.3228]) -Greedy action tensor([-1.1174, -1.0294, -0.7837, 0.6082]) tensor([0.1098, 0.1200, 0.1534, 0.6168]) -Greedy action tensor([ 0.5548, 0.0682, -0.4783, 0.1749]) tensor([0.3767, 0.2316, 0.1341, 0.2576]) -Greedy action tensor([ 0.8177, -0.5995, 0.6526, -1.2135]) tensor([0.4502, 0.1091, 0.3817, 0.0591]) -Greedy action tensor([-0.1940, 0.5432, -0.5671, -0.7298]) tensor([0.2292, 0.4790, 0.1578, 0.1341]) -Greedy action tensor([ 0.8857, -1.3598, 0.1485, 0.3617]) tensor([0.4595, 0.0486, 0.2198, 0.2721]) -Greedy action tensor([ 1.4307, -1.0716, 1.6552, 1.1671]) tensor([0.3224, 0.0264, 0.4035, 0.2477]) -Greedy action tensor([ 0.9032, 0.1923, 1.4898, -0.8849]) tensor([0.2893, 0.1421, 0.5202, 0.0484]) -Greedy action tensor([ 0.6931, 0.2442, -0.9407, 2.0237]) tensor([0.1780, 0.1136, 0.0348, 0.6736]) -Greedy action tensor([-0.7690, -0.6292, -0.7402, 1.4267]) tensor([0.0822, 0.0945, 0.0846, 0.7387]) -Greedy action tensor([ 0.1861, -0.4484, -0.0496, 0.0762]) tensor([0.3109, 0.1648, 0.2456, 0.2786]) -Greedy action tensor([-0.8229, 0.2078, -0.3840, -1.3238]) tensor([0.1678, 0.4703, 0.2602, 0.1017]) -Greedy action tensor([-0.7146, -1.1186, -0.7498, -0.1263]) tensor([0.2255, 0.1506, 0.2177, 0.4062]) -Greedy action tensor([ 0.9300, -0.3613, -0.6765, 0.7888]) tensor([0.4267, 0.1173, 0.0856, 0.3705]) -Greedy action tensor([-0.3432, 0.6850, 1.2967, -1.6224]) tensor([0.1084, 0.3030, 0.5585, 0.0302]) -Greedy action tensor([ 0.2916, -1.1550, 0.0929, 0.7934]) tensor([0.2698, 0.0635, 0.2212, 0.4456]) -Greedy action tensor([-0.0671, -1.7785, 0.2673, 0.6928]) tensor([0.2120, 0.0383, 0.2963, 0.4534]) -Greedy action tensor([ 0.8940, -0.6847, 0.7885, 1.2083]) tensor([0.2877, 0.0593, 0.2589, 0.3940]) -Greedy action tensor([ 0.7272, -1.0262, 1.5301, -0.3053]) tensor([0.2659, 0.0460, 0.5934, 0.0947]) -Greedy action tensor([ 0.2944, -0.0324, -0.2511, 0.9577]) tensor([0.2357, 0.1700, 0.1366, 0.4576]) -Greedy action tensor([-0.2742, -0.5516, 0.4515, -0.6716]) tensor([0.2224, 0.1685, 0.4596, 0.1495]) -Greedy action tensor([ 0.4628, -1.2629, 0.8113, -0.6558]) tensor([0.3423, 0.0609, 0.4850, 0.1118]) -Greedy action tensor([ 0.2114, -1.5333, 0.9607, -0.2639]) tensor([0.2556, 0.0447, 0.5408, 0.1589]) -Greedy action tensor([ 0.9339, -0.7410, 0.3929, -0.5806]) tensor([0.5027, 0.0942, 0.2926, 0.1105]) -Greedy action tensor([ 0.8972, -1.3849, 0.4032, 0.0428]) tensor([0.4678, 0.0477, 0.2854, 0.1991]) -Greedy action tensor([ 1.0082, 0.0602, -0.0522, 0.7850]) tensor([0.3947, 0.1529, 0.1367, 0.3157]) -Greedy action tensor([ 0.1388, -2.9610, -1.0373, 0.9744]) tensor([0.2732, 0.0123, 0.0843, 0.6302]) -Greedy action tensor([-0.2130, -0.6089, -0.3178, -0.5169]) tensor([0.3020, 0.2033, 0.2719, 0.2228]) -Greedy action tensor([ 1.5669, -0.1875, 0.6027, 0.4995]) tensor([0.5268, 0.0911, 0.2009, 0.1812]) -Greedy action tensor([ 0.8340, -1.2967, -0.2170, 0.8954]) tensor([0.3950, 0.0469, 0.1381, 0.4200]) -Greedy action tensor([-1.2108, -0.9146, -0.1022, -0.3728]) tensor([0.1301, 0.1749, 0.3942, 0.3007]) -Greedy action tensor([ 0.3158, -0.0144, 0.7617, 0.5684]) tensor([0.2189, 0.1573, 0.3419, 0.2818]) -Greedy action tensor([ 0.0615, 0.0216, -0.4496, 0.1459]) tensor([0.2741, 0.2633, 0.1644, 0.2982]) -Greedy action tensor([-0.3221, -0.3789, -0.8043, -0.2556]) tensor([0.2754, 0.2602, 0.1700, 0.2943]) -Greedy action tensor([-0.4068, -0.3839, 0.3574, -0.5576]) tensor([0.1988, 0.2034, 0.4269, 0.1710]) -Greedy action tensor([ 0.0612, -1.2854, 0.2042, -0.0470]) tensor([0.3020, 0.0786, 0.3484, 0.2710]) -Greedy action tensor([ 0.4189, -1.0356, 0.2308, -0.7283]) tensor([0.4203, 0.0981, 0.3482, 0.1334]) -Greedy action tensor([ 0.3391, -0.7830, 0.0875, -0.0112]) tensor([0.3562, 0.1160, 0.2769, 0.2509]) -Greedy action tensor([ 0.7070, -0.9666, -0.0537, 0.7562]) tensor([0.3696, 0.0693, 0.1728, 0.3883]) -Greedy action tensor([ 0.3959, -0.3769, 1.1609, -0.4493]) tensor([0.2475, 0.1143, 0.5319, 0.1063]) -Greedy action tensor([ 0.9826, 0.1164, 0.3223, -0.0361]) tensor([0.4351, 0.1830, 0.2248, 0.1571]) -Greedy action tensor([ 1.4111, 0.1125, -0.6103, 0.2606]) tensor([0.5808, 0.1585, 0.0769, 0.1838]) -Greedy action tensor([ 1.6616, -0.6280, -0.4400, 0.2499]) tensor([0.6815, 0.0690, 0.0833, 0.1661]) -Greedy action tensor([ 0.8414, -0.4025, -0.4246, 0.3731]) tensor([0.4553, 0.1313, 0.1284, 0.2851]) -Greedy action tensor([1.6062, 0.3497, 0.0373, 0.0167]) tensor([0.5893, 0.1677, 0.1227, 0.1202]) -Greedy action tensor([ 1.2349, -0.2057, -0.7601, 0.6632]) tensor([0.5162, 0.1222, 0.0702, 0.2914]) -Greedy action tensor([ 0.8054, 0.0774, -0.0985, 0.2487]) tensor([0.4063, 0.1962, 0.1646, 0.2329]) -Greedy action tensor([ 1.9614, -0.7783, -0.1710, 0.4387]) tensor([0.7136, 0.0461, 0.0846, 0.1557]) -Greedy action tensor([ 2.2748, -1.4096, -0.0204, 0.2791]) tensor([0.7925, 0.0199, 0.0798, 0.1077]) -Greedy action tensor([ 1.3600, 0.1966, -0.6112, 0.1914]) tensor([0.5674, 0.1773, 0.0790, 0.1763]) -Greedy action tensor([ 1.8904, -0.1696, -0.1505, -0.0245]) tensor([0.7119, 0.0907, 0.0925, 0.1049]) -Greedy action tensor([ 2.2781, -1.4669, 0.0630, 0.1912]) tensor([0.7956, 0.0188, 0.0868, 0.0987]) -Greedy action tensor([ 1.0901, -0.4925, -0.3135, 0.1947]) tensor([0.5378, 0.1105, 0.1321, 0.2196]) -Greedy action tensor([ 1.6259, 0.4651, 0.4341, -0.5091]) tensor([0.5763, 0.1805, 0.1750, 0.0682]) -Greedy action tensor([ 1.9667, 0.0101, -0.3092, 0.5352]) tensor([0.6743, 0.0953, 0.0693, 0.1611]) -Greedy action tensor([ 1.9787, -0.5035, -0.6370, 0.5377]) tensor([0.7177, 0.0600, 0.0525, 0.1699]) -Greedy action tensor([ 1.0785, -0.1517, -0.1184, 0.1066]) tensor([0.5069, 0.1481, 0.1532, 0.1918]) -Greedy action tensor([ 1.5486, -0.2027, -0.5820, 0.3027]) tensor([0.6329, 0.1098, 0.0752, 0.1821]) -Greedy action tensor([ 2.0797, -1.6558, -0.0184, -0.0084]) tensor([0.7871, 0.0188, 0.0966, 0.0975]) -Greedy action tensor([ 1.4614, -0.9447, -0.1925, 0.3675]) tensor([0.6187, 0.0558, 0.1183, 0.2072]) -Greedy action tensor([ 1.3977, -0.4062, -0.5039, 0.0051]) tensor([0.6400, 0.1054, 0.0956, 0.1590]) -Greedy action tensor([ 1.0834, -0.0788, -0.2876, 0.6564]) tensor([0.4506, 0.1410, 0.1144, 0.2940]) -Greedy action tensor([ 1.4383, -0.6078, -0.3715, 0.7588]) tensor([0.5556, 0.0718, 0.0909, 0.2816]) -Greedy action tensor([ 1.0920, -0.4723, -0.3886, 0.4171]) tensor([0.5139, 0.1075, 0.1169, 0.2617]) -Greedy action tensor([ 2.3437, -1.3609, -0.2596, 0.8653]) tensor([0.7538, 0.0186, 0.0558, 0.1719]) -Greedy action tensor([ 1.5299, -0.3711, -0.8216, 0.1735]) tensor([0.6657, 0.0995, 0.0634, 0.1715]) -Greedy action tensor([ 0.9926, -0.5279, -0.3038, 0.2350]) tensor([0.5100, 0.1115, 0.1395, 0.2391]) -Greedy action tensor([ 0.4682, -0.2571, -0.0924, 0.1203]) tensor([0.3622, 0.1754, 0.2067, 0.2557]) -Greedy action tensor([ 2.2934, -0.9780, -0.2245, 0.5310]) tensor([0.7751, 0.0294, 0.0625, 0.1330]) -Greedy action tensor([ 0.5226, -0.2404, -0.0974, 0.0176]) tensor([0.3835, 0.1788, 0.2063, 0.2314]) -Greedy action tensor([ 1.4675, -0.1652, -0.5681, 0.4452]) tensor([0.5932, 0.1159, 0.0775, 0.2134]) -Greedy action tensor([ 0.9882, -0.1841, -0.3421, 0.0506]) tensor([0.5087, 0.1575, 0.1345, 0.1992]) -Greedy action tensor([ 0.9713, -0.5630, -0.4921, 0.6387]) tensor([0.4621, 0.0996, 0.1069, 0.3313]) -Greedy action tensor([ 2.6899, -0.5183, 0.0340, 0.4142]) tensor([0.8241, 0.0333, 0.0579, 0.0847]) -Greedy action tensor([ 1.1569, -0.3711, -0.8824, 0.3437]) tensor([0.5585, 0.1212, 0.0727, 0.2477]) -Greedy action tensor([ 1.0594, -0.1112, -0.3644, 0.3256]) tensor([0.4924, 0.1527, 0.1186, 0.2364]) -Greedy action tensor([ 1.9797, -1.0933, -0.4903, 0.5721]) tensor([0.7270, 0.0336, 0.0615, 0.1779]) -Greedy action tensor([ 1.5902, -0.8645, -0.2428, 0.5444]) tensor([0.6261, 0.0538, 0.1001, 0.2200]) -Greedy action tensor([ 1.3310, -0.0929, -0.2516, 0.5418]) tensor([0.5262, 0.1267, 0.1081, 0.2390]) -Greedy action tensor([ 1.7442, -0.3240, -0.2414, -0.4590]) tensor([0.7277, 0.0920, 0.0999, 0.0804]) -Greedy action tensor([ 1.6663, -0.2820, -0.6884, 0.4613]) tensor([0.6506, 0.0927, 0.0617, 0.1950]) -Greedy action tensor([ 1.0162, 0.2183, -1.0232, 0.2536]) tensor([0.4886, 0.2200, 0.0636, 0.2279]) -Greedy action tensor([ 1.5015, -0.5258, -0.1416, 0.7443]) tensor([0.5574, 0.0734, 0.1078, 0.2614]) -Greedy action tensor([ 1.8706, -0.5537, -0.6128, 0.5466]) tensor([0.6954, 0.0616, 0.0580, 0.1850]) -Greedy action tensor([ 1.0473, -0.4427, -0.2384, 0.1133]) tensor([0.5278, 0.1189, 0.1459, 0.2074]) -Greedy action tensor([ 1.7944, -0.9074, -0.5732, 0.8928]) tensor([0.6383, 0.0428, 0.0598, 0.2591]) -Greedy action tensor([ 1.2247, -0.1494, -0.1952, -0.0187]) tensor([0.5608, 0.1419, 0.1356, 0.1617]) -Greedy action tensor([ 1.6264, -0.5593, -0.5080, -0.0641]) tensor([0.7066, 0.0794, 0.0836, 0.1303]) -Greedy action tensor([ 1.7193, -0.2917, -0.7936, 0.2610]) tensor([0.6908, 0.0925, 0.0560, 0.1607]) -Greedy action tensor([ 1.2611, -0.1733, 0.0218, 0.3655]) tensor([0.5165, 0.1230, 0.1496, 0.2109]) -Greedy action tensor([ 0.4446, -0.4796, -0.2172, 0.5070]) tensor([0.3359, 0.1333, 0.1733, 0.3575]) -Greedy action tensor([ 1.7892, -0.6620, -0.3067, 0.5753]) tensor([0.6639, 0.0572, 0.0816, 0.1972]) -Greedy action tensor([ 2.3253, -1.6251, -0.2493, 0.6734]) tensor([0.7769, 0.0150, 0.0592, 0.1489]) -Greedy action tensor([ 1.9814, -0.7385, -0.4763, 0.4463]) tensor([0.7316, 0.0482, 0.0626, 0.1576]) -Greedy action tensor([ 0.9890, -0.4491, -0.3304, 0.3018]) tensor([0.4981, 0.1182, 0.1331, 0.2505]) -Greedy action tensor([ 1.9195, -0.6726, -0.4525, 0.9766]) tensor([0.6420, 0.0481, 0.0599, 0.2501]) -Greedy action tensor([ 1.7424, -0.6570, -0.0342, 0.2604]) tensor([0.6724, 0.0610, 0.1138, 0.1528]) -Greedy action tensor([ 2.1383, -0.6970, -0.4599, 0.8590]) tensor([0.7085, 0.0416, 0.0527, 0.1971]) -Greedy action tensor([ 1.2561, -0.6843, -0.4874, 0.0476]) tensor([0.6183, 0.0888, 0.1082, 0.1847]) -Greedy action tensor([ 0.9792, -0.6316, -0.0075, 0.2298]) tensor([0.4890, 0.0977, 0.1823, 0.2311]) -Greedy action tensor([ 1.5136, -0.6746, -0.3661, 0.4418]) tensor([0.6222, 0.0698, 0.0950, 0.2130]) -Greedy action tensor([ 1.1852, -0.6524, -0.4013, 0.3569]) tensor([0.5554, 0.0884, 0.1137, 0.2426]) -Greedy action tensor([ 1.2583, -0.1010, -0.1595, 0.4561]) tensor([0.5135, 0.1319, 0.1244, 0.2302]) -Greedy action tensor([ 1.2034, -0.0562, -0.5086, -0.0781]) tensor([0.5741, 0.1629, 0.1036, 0.1594]) -Greedy action tensor([ 1.5943, -0.4922, -0.4377, 0.4094]) tensor([0.6406, 0.0795, 0.0840, 0.1959]) -Greedy action tensor([ 1.0204, -0.3481, -0.6227, 0.2445]) tensor([0.5240, 0.1334, 0.1014, 0.2412]) -Greedy action tensor([ 1.6180, -0.6000, -0.3922, -0.0853]) tensor([0.7018, 0.0764, 0.0940, 0.1278]) -Greedy action tensor([ 1.4923, -0.7449, -0.4117, 0.8750]) tensor([0.5571, 0.0595, 0.0830, 0.3005]) -Greedy action tensor([ 1.4503, -0.5418, -0.4836, 0.3373]) tensor([0.6213, 0.0847, 0.0898, 0.2042]) -Greedy action tensor([ 1.4177, -0.1999, -0.0354, 0.4646]) tensor([0.5501, 0.1091, 0.1286, 0.2121]) -Greedy action tensor([ 1.6400, -0.2779, -0.3983, 0.6939]) tensor([0.6005, 0.0882, 0.0782, 0.2331]) -Greedy action tensor([ 2.2142, 0.5034, -0.0943, 0.3324]) tensor([0.6981, 0.1262, 0.0694, 0.1063]) -Greedy action tensor([ 1.4293, -0.1385, -0.7601, 0.4857]) tensor([0.5849, 0.1220, 0.0655, 0.2277]) -Greedy action tensor([ 1.7197, -0.4340, -0.3800, 0.1975]) tensor([0.6865, 0.0797, 0.0841, 0.1498]) -Greedy action tensor([ 1.2626, -0.6753, -0.5277, 0.2719]) tensor([0.5945, 0.0856, 0.0992, 0.2207]) -Greedy action tensor([ 0.9158, 0.1631, -0.1752, -0.0561]) tensor([0.4576, 0.2156, 0.1537, 0.1731]) -Greedy action tensor([ 0.9074, -0.4321, -0.2483, 0.5615]) tensor([0.4377, 0.1147, 0.1378, 0.3098]) -Greedy action tensor([ 1.9257, -0.5760, -0.2684, 0.5477]) tensor([0.6918, 0.0567, 0.0771, 0.1744]) -Greedy action tensor([ 0.8886, 0.1489, -0.0213, -0.0768]) tensor([0.4424, 0.2111, 0.1781, 0.1685]) -Greedy action tensor([ 0.9472, -0.3522, -0.4094, -0.1628]) tensor([0.5377, 0.1466, 0.1385, 0.1772]) -Greedy action tensor([ 0.7774, -0.4248, -0.1894, 0.4748]) tensor([0.4133, 0.1242, 0.1572, 0.3054]) -Greedy action tensor([ 0.4001, -0.3288, 0.0728, -0.3586]) tensor([0.3743, 0.1806, 0.2698, 0.1753]) -Greedy action tensor([ 0.5060, -0.1857, -0.0231, 0.0081]) tensor([0.3707, 0.1856, 0.2184, 0.2253]) -Greedy action tensor([ 0.5539, -0.3130, -0.0033, -0.2574]) tensor([0.4103, 0.1724, 0.2350, 0.1823]) -Greedy action tensor([ 0.4492, -0.0440, -0.0120, -0.0277]) tensor([0.3494, 0.2134, 0.2203, 0.2169]) -Greedy action tensor([ 0.6681, -0.4675, -0.0523, -0.1399]) tensor([0.4437, 0.1425, 0.2159, 0.1978]) -Greedy action tensor([ 0.7659, -0.6048, -0.0215, -0.3659]) tensor([0.4923, 0.1250, 0.2240, 0.1587]) -Greedy action tensor([ 1.0060, -1.0562, 0.0963, -0.5757]) tensor([0.5762, 0.0733, 0.2320, 0.1185]) -Greedy action tensor([ 0.5048, -0.3591, 0.0086, -0.1343]) tensor([0.3909, 0.1648, 0.2380, 0.2063]) -Greedy action tensor([ 0.9223, -0.8632, 0.0032, -0.4717]) tensor([0.5511, 0.0924, 0.2198, 0.1367]) -Greedy action tensor([ 0.9671, -0.6857, -0.0509, -0.5825]) tensor([0.5665, 0.1085, 0.2047, 0.1203]) -Greedy action tensor([ 0.7961, -0.4456, -0.1148, -0.5260]) tensor([0.5108, 0.1476, 0.2054, 0.1362]) -Greedy action tensor([ 0.4885, -0.1023, -0.0070, -0.2633]) tensor([0.3796, 0.2102, 0.2312, 0.1790]) -Greedy action tensor([ 0.6902, -0.3019, -0.0633, -0.0389]) tensor([0.4303, 0.1596, 0.2026, 0.2076]) -Greedy action tensor([ 0.5829, -0.8062, -0.1681, -0.1924]) tensor([0.4583, 0.1143, 0.2163, 0.2111]) -Greedy action tensor([ 0.7066, -0.3052, -0.1383, -0.4739]) tensor([0.4761, 0.1731, 0.2045, 0.1462]) -Greedy action tensor([ 0.2888, -0.2169, -0.0756, -0.6203]) tensor([0.3703, 0.2233, 0.2572, 0.1492]) -Greedy action tensor([ 0.7132, 0.0725, -0.0860, 0.0840]) tensor([0.3985, 0.2100, 0.1792, 0.2124]) -Greedy action tensor([ 0.0914, -0.0018, -0.1067, -0.3531]) tensor([0.2965, 0.2701, 0.2432, 0.1901]) -Greedy action tensor([ 0.5637, -0.1049, 0.0656, -0.3744]) tensor([0.3982, 0.2040, 0.2420, 0.1558]) -Greedy action tensor([ 0.6739, -0.4143, -0.1186, -0.4003]) tensor([0.4692, 0.1581, 0.2124, 0.1603]) -Greedy action tensor([ 0.7274, -0.5868, 0.1194, -0.5437]) tensor([0.4776, 0.1283, 0.2600, 0.1340]) -Greedy action tensor([ 0.8805, -0.4212, -0.1057, -0.3411]) tensor([0.5155, 0.1403, 0.1923, 0.1520]) -Greedy action tensor([ 0.5033, -0.2388, 0.0210, -0.1744]) tensor([0.3844, 0.1830, 0.2373, 0.1952]) -Greedy action tensor([ 0.8523, -0.5674, -0.0444, -0.4373]) tensor([0.5195, 0.1256, 0.2119, 0.1430]) -Greedy action tensor([ 0.9035, -0.5421, 0.0607, -0.3375]) tensor([0.5115, 0.1205, 0.2202, 0.1479]) -Greedy action tensor([ 0.8173, -0.4465, -0.0802, -0.4461]) tensor([0.5069, 0.1432, 0.2066, 0.1433]) -Greedy action tensor([ 0.5628, -0.0233, -0.1918, -0.0552]) tensor([0.3897, 0.2169, 0.1833, 0.2101]) -Greedy action tensor([ 1.2108, -0.3740, -0.2020, -0.6281]) tensor([0.6221, 0.1275, 0.1515, 0.0989]) -Greedy action tensor([ 1.1138, -0.4695, 0.0252, -0.4756]) tensor([0.5727, 0.1176, 0.1928, 0.1169]) -Greedy action tensor([ 0.6890, -0.4818, -0.0307, -0.3675]) tensor([0.4663, 0.1446, 0.2270, 0.1621]) -Greedy action tensor([ 0.7792, -0.9817, 0.1784, -0.5599]) tensor([0.5045, 0.0867, 0.2766, 0.1322]) -Greedy action tensor([ 0.6245, -0.4133, -0.1284, -0.4504]) tensor([0.4615, 0.1635, 0.2174, 0.1575]) -Greedy action tensor([ 0.7639, -0.5203, -0.0587, -0.4440]) tensor([0.4963, 0.1374, 0.2180, 0.1483]) -Greedy action tensor([ 0.7468, -0.5598, -0.0767, -0.5510]) tensor([0.5043, 0.1366, 0.2213, 0.1378]) -Greedy action tensor([ 0.6873, -0.3976, -0.1522, -0.4476]) tensor([0.4782, 0.1616, 0.2065, 0.1537]) -Greedy action tensor([ 0.9717, -0.5861, -0.1062, -0.6812]) tensor([0.5739, 0.1209, 0.1953, 0.1099]) -Greedy action tensor([ 0.6085, -0.1852, -0.1026, -0.1159]) tensor([0.4119, 0.1862, 0.2023, 0.1996]) -Greedy action tensor([ 0.6306, -0.3258, -0.0712, -0.2584]) tensor([0.4365, 0.1677, 0.2164, 0.1794]) -Greedy action tensor([ 0.3343, 0.1912, -0.0736, 0.0291]) tensor([0.3059, 0.2651, 0.2035, 0.2255]) -Greedy action tensor([ 0.5834, -0.8086, -0.0933, -0.5776]) tensor([0.4831, 0.1201, 0.2455, 0.1513]) -Greedy action tensor([ 0.5284, -0.2266, -0.0477, -0.3737]) tensor([0.4102, 0.1928, 0.2306, 0.1664]) -Greedy action tensor([ 0.4806, 0.0046, 0.0280, -0.0486]) tensor([0.3513, 0.2183, 0.2234, 0.2070]) -Greedy action tensor([ 0.9184, -0.2437, -0.0698, -0.0675]) tensor([0.4859, 0.1520, 0.1809, 0.1813]) -Greedy action tensor([ 0.8360, -0.3141, 0.1030, -0.4247]) tensor([0.4806, 0.1522, 0.2309, 0.1362]) -Greedy action tensor([ 0.8652, -0.4840, -0.2089, -0.0994]) tensor([0.5045, 0.1309, 0.1723, 0.1923]) -Greedy action tensor([ 0.3607, -0.1563, -0.1757, 0.0359]) tensor([0.3444, 0.2053, 0.2014, 0.2489]) -Greedy action tensor([ 0.5698, -0.5905, -0.0159, -0.3444]) tensor([0.4403, 0.1380, 0.2452, 0.1765]) -Greedy action tensor([ 0.5100, 0.0208, -0.0821, 0.1245]) tensor([0.3513, 0.2154, 0.1943, 0.2389]) -Greedy action tensor([ 0.4923, -0.4772, -0.2699, -0.1992]) tensor([0.4261, 0.1616, 0.1988, 0.2134]) -Greedy action tensor([ 0.6755, -0.0046, -0.0971, -0.4603]) tensor([0.4368, 0.2212, 0.2017, 0.1403]) -Greedy action tensor([ 0.5774, -0.2694, -0.0178, -0.1973]) tensor([0.4097, 0.1757, 0.2259, 0.1888]) -Greedy action tensor([ 0.7979, -0.6420, -0.1747, -0.2627]) tensor([0.5099, 0.1208, 0.1928, 0.1765]) -Greedy action tensor([ 0.5740, -0.3479, 0.0094, -0.3180]) tensor([0.4209, 0.1674, 0.2393, 0.1725]) -Greedy action tensor([ 0.7513, -0.6824, -0.1985, -0.2780]) tensor([0.5044, 0.1203, 0.1951, 0.1802]) -Greedy action tensor([ 0.7995, -0.6634, -0.0546, -0.5910]) tensor([0.5246, 0.1215, 0.2233, 0.1306]) -Greedy action tensor([ 0.3132, -0.3182, -0.0243, -0.4159]) tensor([0.3666, 0.1950, 0.2616, 0.1768]) -Greedy action tensor([ 1.1677, -0.4066, -0.2444, -0.4866]) tensor([0.6090, 0.1262, 0.1484, 0.1165]) -Greedy action tensor([ 0.9650, -0.5884, 0.0262, -0.2985]) tensor([0.5304, 0.1122, 0.2075, 0.1499]) -Greedy action tensor([ 0.6834, -0.5720, -0.0277, -0.2750]) tensor([0.4630, 0.1320, 0.2274, 0.1776]) -Greedy action tensor([ 1.0481, -0.5706, -0.0370, -0.5547]) tensor([0.5756, 0.1141, 0.1945, 0.1159]) -Greedy action tensor([ 0.7654, -0.7199, -0.0716, -0.7105]) tensor([0.5296, 0.1199, 0.2294, 0.1211]) -Greedy action tensor([ 0.5848, -0.1923, 0.0024, -0.4131]) tensor([0.4190, 0.1926, 0.2340, 0.1544]) -Greedy action tensor([ 0.7221, -0.1355, -0.2617, -0.4149]) tensor([0.4720, 0.2002, 0.1765, 0.1514]) -Greedy action tensor([ 0.5748, -0.2643, -0.0470, -0.1331]) tensor([0.4062, 0.1755, 0.2181, 0.2001]) -Greedy action tensor([ 0.7837, -0.2498, -0.0260, -0.6142]) tensor([0.4883, 0.1737, 0.2173, 0.1207]) -Greedy action tensor([ 0.4995, -0.1797, 0.0210, -0.0632]) tensor([0.3709, 0.1880, 0.2298, 0.2113]) -Greedy action tensor([ 0.8715, -0.5355, 0.1097, -0.3709]) tensor([0.4999, 0.1224, 0.2334, 0.1443]) -Greedy action tensor([ 0.3261, -0.0438, -0.0462, -0.2428]) tensor([0.3394, 0.2345, 0.2339, 0.1922]) -Greedy action tensor([ 0.4788, -0.3619, -0.1553, -0.0107]) tensor([0.3884, 0.1676, 0.2060, 0.2381]) -Greedy action tensor([ 0.8829, -0.3265, -0.0593, -0.4795]) tensor([0.5143, 0.1535, 0.2005, 0.1317]) -Greedy action tensor([ 1.2121, -1.1635, 0.0214, -0.7605]) tensor([0.6510, 0.0605, 0.1979, 0.0906]) -Greedy action tensor([ 0.4046, 0.1198, -0.1445, -0.0628]) tensor([0.3383, 0.2544, 0.1953, 0.2120]) -Greedy action tensor([ 0.8724, -0.7227, 0.2948, -0.5987]) tensor([0.5015, 0.1018, 0.2815, 0.1152]) -Greedy action tensor([ 0.4030, 0.1796, -0.2768, -0.1785]) tensor([0.3490, 0.2791, 0.1768, 0.1951]) -Greedy action tensor([ 0.6252, -0.0671, -0.0197, -0.8813]) tensor([0.4451, 0.2227, 0.2335, 0.0987]) -Greedy action tensor([ 0.7001, -0.4441, 0.1331, -0.5360]) tensor([0.4595, 0.1463, 0.2607, 0.1335]) -Greedy action tensor([ 0.7314, -0.4911, 0.1173, -0.1652]) tensor([0.4457, 0.1313, 0.2412, 0.1818]) -Greedy action tensor([ 0.4328, -0.0632, 0.0361, -0.0749]) tensor([0.3468, 0.2112, 0.2332, 0.2087]) -Greedy action tensor([ 1.0387, -0.8055, 0.0358, -0.4459]) tensor([0.5709, 0.0903, 0.2094, 0.1294]) -Greedy action tensor([ 0.7155, -0.4702, -0.1361, -0.2881]) tensor([0.4765, 0.1456, 0.2033, 0.1747]) -Greedy action tensor([ 0.5946, -0.4014, -0.0141, -0.2679]) tensor([0.4282, 0.1581, 0.2330, 0.1807]) -Greedy action tensor([-1.8307, -0.3725, 0.5977, -0.1112]) tensor([0.0450, 0.1934, 0.5104, 0.2512]) -Greedy action tensor([-1.8542, -0.3932, 0.6185, -0.1281]) tensor([0.0439, 0.1892, 0.5203, 0.2466]) -Greedy action tensor([-1.8644, -0.4620, 0.6274, -0.1487]) tensor([0.0440, 0.1790, 0.5321, 0.2449]) -Greedy action tensor([-1.6707, -0.5131, 0.5304, -0.0278]) tensor([0.0544, 0.1731, 0.4913, 0.2812]) -Greedy action tensor([-1.9100, -0.4421, 0.6509, -0.1613]) tensor([0.0416, 0.1806, 0.5387, 0.2391]) -Greedy action tensor([-0.2977, 1.0711, 0.0035, 0.3638]) tensor([0.1217, 0.4782, 0.1644, 0.2357]) -Greedy action tensor([-1.8991, -0.4698, 0.6433, -0.1627]) tensor([0.0424, 0.1772, 0.5394, 0.2409]) -Greedy action tensor([-1.9480, -0.4540, 0.6704, -0.1820]) tensor([0.0400, 0.1781, 0.5482, 0.2337]) -Greedy action tensor([-1.5962, -0.4449, 0.4789, 0.1389]) tensor([0.0562, 0.1777, 0.4476, 0.3186]) -Greedy action tensor([-1.0382, 0.1191, 0.4469, 0.4200]) tensor([0.0775, 0.2467, 0.3424, 0.3333]) -Greedy action tensor([-1.9106, -0.3529, 0.6361, -0.1554]) tensor([0.0412, 0.1954, 0.5254, 0.2381]) -Greedy action tensor([-1.6152, -0.3131, 0.5269, -0.1324]) tensor([0.0568, 0.2089, 0.4839, 0.2503]) -Greedy action tensor([-0.8459, -0.5698, 0.3830, 0.3504]) tensor([0.1106, 0.1457, 0.3779, 0.3658]) -Greedy action tensor([-1.1219, 0.4811, 0.2817, -0.1195]) tensor([0.0784, 0.3893, 0.3189, 0.2135]) -Greedy action tensor([-1.9223, -0.4146, 0.6527, -0.1638]) tensor([0.0409, 0.1847, 0.5370, 0.2374]) -Greedy action tensor([-1.7976, -0.4973, 0.7321, 0.0812]) tensor([0.0421, 0.1544, 0.5281, 0.2754]) -Greedy action tensor([-1.5959, -0.4912, 0.5084, -0.0162]) tensor([0.0586, 0.1768, 0.4804, 0.2843]) -Greedy action tensor([-1.9273, -0.4481, 0.6677, -0.1609]) tensor([0.0406, 0.1782, 0.5438, 0.2374]) -Greedy action tensor([-1.9071, -0.4133, 0.6479, -0.1574]) tensor([0.0415, 0.1850, 0.5346, 0.2389]) -Greedy action tensor([-1.9348, -0.4443, 0.6632, -0.1744]) tensor([0.0405, 0.1798, 0.5442, 0.2355]) -Greedy action tensor([-1.7232, -0.4371, 0.6559, 0.0580]) tensor([0.0468, 0.1695, 0.5056, 0.2781]) -Greedy action tensor([-1.9141, -0.3586, 0.6369, -0.1550]) tensor([0.0410, 0.1944, 0.5262, 0.2383]) -Greedy action tensor([-1.8574, -0.2298, 0.5849, -0.1108]) tensor([0.0429, 0.2183, 0.4930, 0.2459]) -Greedy action tensor([-1.9094, -0.4566, 0.6769, -0.1545]) tensor([0.0411, 0.1757, 0.5456, 0.2376]) -Greedy action tensor([-1.7846, -0.3851, 0.6676, -0.0837]) tensor([0.0452, 0.1830, 0.5244, 0.2474]) -Greedy action tensor([-1.9091, -0.4276, 0.6534, -0.1515]) tensor([0.0414, 0.1820, 0.5366, 0.2399]) -Greedy action tensor([-1.0389, -0.2787, 0.1074, 0.1103]) tensor([0.1059, 0.2265, 0.3333, 0.3343]) -Greedy action tensor([-1.8888, -0.4376, 0.6400, -0.1484]) tensor([0.0425, 0.1816, 0.5334, 0.2425]) -Greedy action tensor([-1.5832, 0.2318, 0.3980, -0.0271]) tensor([0.0523, 0.3210, 0.3790, 0.2478]) -Greedy action tensor([-1.6836, -0.2551, 0.5674, 0.0043]) tensor([0.0498, 0.2078, 0.4730, 0.2694]) -Greedy action tensor([-1.6015, -0.2685, 1.1316, 0.8508]) tensor([0.0315, 0.1193, 0.4838, 0.3654]) -Greedy action tensor([-1.5368, 0.5432, 0.3247, 0.0639]) tensor([0.0490, 0.3925, 0.3155, 0.2430]) -Greedy action tensor([-1.9200, -0.4442, 0.6569, -0.1626]) tensor([0.0411, 0.1798, 0.5408, 0.2383]) -Greedy action tensor([-1.4954, -0.1343, 0.4132, -0.0210]) tensor([0.0625, 0.2436, 0.4212, 0.2728]) -Greedy action tensor([-1.8881, -0.4351, 0.6706, -0.1036]) tensor([0.0414, 0.1771, 0.5349, 0.2466]) -Greedy action tensor([-1.2101, -0.3581, 0.5982, 0.4938]) tensor([0.0669, 0.1569, 0.4083, 0.3678]) -Greedy action tensor([-1.9089, -0.4526, 0.6518, -0.1616]) tensor([0.0417, 0.1789, 0.5400, 0.2394]) -Greedy action tensor([-1.8679, -0.3501, 0.6299, -0.1320]) tensor([0.0427, 0.1950, 0.5197, 0.2426]) -Greedy action tensor([-1.4832, -0.4935, 0.6243, 0.1420]) tensor([0.0588, 0.1583, 0.4840, 0.2988]) -Greedy action tensor([-1.8924, -0.4601, 0.6391, -0.1456]) tensor([0.0426, 0.1783, 0.5351, 0.2441]) -Greedy action tensor([-1.9015, -0.4301, 0.6442, -0.1490]) tensor([0.0419, 0.1824, 0.5341, 0.2416]) -Greedy action tensor([-1.9137, -0.3343, 0.6317, -0.1617]) tensor([0.0410, 0.1991, 0.5232, 0.2366]) -Greedy action tensor([-1.8675, -0.4488, 0.6279, -0.1390]) tensor([0.0437, 0.1805, 0.5298, 0.2460]) -Greedy action tensor([-1.8535, -0.4327, 0.6519, -0.0974]) tensor([0.0431, 0.1786, 0.5284, 0.2498]) -Greedy action tensor([1.2776, 1.2145, 0.0579, 0.8520]) tensor([0.3463, 0.3251, 0.1023, 0.2263]) -Greedy action tensor([-1.8177, -0.4155, 0.6592, -0.1040]) tensor([0.0444, 0.1805, 0.5287, 0.2465]) -Greedy action tensor([-1.9418, -0.4543, 0.6678, -0.1779]) tensor([0.0402, 0.1781, 0.5469, 0.2348]) -Greedy action tensor([-1.7580, -0.4203, 0.5871, -0.0645]) tensor([0.0483, 0.1842, 0.5045, 0.2629]) -Greedy action tensor([-1.8853, -0.4341, 0.6695, -0.1172]) tensor([0.0417, 0.1779, 0.5363, 0.2442]) -Greedy action tensor([-1.8604, -0.3750, 0.6237, -0.1274]) tensor([0.0434, 0.1915, 0.5199, 0.2453]) -Greedy action tensor([-1.8893, -0.4342, 0.6419, -0.1468]) tensor([0.0424, 0.1818, 0.5334, 0.2424]) -Greedy action tensor([-1.7424, -0.3384, 0.6941, 0.1031]) tensor([0.0438, 0.1783, 0.5007, 0.2773]) -Greedy action tensor([-1.8058, -0.3910, 0.6627, -0.0801]) tensor([0.0444, 0.1826, 0.5238, 0.2492]) -Greedy action tensor([-1.8249, -0.2765, 0.5537, -0.0973]) tensor([0.0452, 0.2126, 0.4878, 0.2544]) -Greedy action tensor([-0.2021, -0.2868, 0.1987, 0.2395]) tensor([0.2013, 0.1850, 0.3006, 0.3131]) -Greedy action tensor([-1.8814, -0.4719, 0.6393, -0.1320]) tensor([0.0430, 0.1758, 0.5342, 0.2470]) -Greedy action tensor([-1.1585, -0.5638, 0.5053, 0.5301]) tensor([0.0741, 0.1342, 0.3909, 0.4008]) -Greedy action tensor([-1.6129, 0.3435, 0.3810, 0.0839]) tensor([0.0479, 0.3389, 0.3518, 0.2614]) -Greedy action tensor([-1.4982, -0.4748, 0.4307, 0.0459]) tensor([0.0652, 0.1813, 0.4484, 0.3052]) -Greedy action tensor([-1.8516, -0.2935, 0.6402, -0.1003]) tensor([0.0424, 0.2013, 0.5121, 0.2442]) -Greedy action tensor([-1.9136, -0.4514, 0.6758, -0.1531]) tensor([0.0409, 0.1765, 0.5448, 0.2378]) -Greedy action tensor([-1.8817, -0.4384, 0.6616, -0.1262]) tensor([0.0421, 0.1784, 0.5358, 0.2437]) -Greedy action tensor([-1.9157, -0.4730, 0.7084, -0.1166]) tensor([0.0399, 0.1688, 0.5502, 0.2411]) -Greedy action tensor([-0.5841, -0.2366, 0.2042, 1.3296]) tensor([0.0878, 0.1242, 0.1931, 0.5949]) -Greedy action tensor([-1.6471, 0.6046, 0.3252, 0.0984]) tensor([0.0427, 0.4058, 0.3069, 0.2446]) -Greedy action tensor([-1.8230, -0.4318, 0.6070, -0.1029]) tensor([0.0455, 0.1830, 0.5172, 0.2543]) -Greedy action tensor([-1.7218, -0.5345, 0.5931, -0.0380]) tensor([0.0505, 0.1657, 0.5116, 0.2722]) -Greedy action tensor([-1.2454, 0.1421, 0.6859, 0.5231]) tensor([0.0563, 0.2254, 0.3883, 0.3300]) -Greedy action tensor([-1.5346, -0.6445, 0.4628, 0.1776]) tensor([0.0612, 0.1490, 0.4509, 0.3390]) -Greedy action tensor([ 0.4855, -0.3874, 0.8795, 1.1928]) tensor([0.2029, 0.0847, 0.3009, 0.4115]) -Greedy action tensor([-1.3522, -0.4164, 0.3827, 0.1973]) tensor([0.0718, 0.1830, 0.4070, 0.3381]) -Greedy action tensor([-1.9466, -0.4528, 0.6692, -0.1819]) tensor([0.0400, 0.1784, 0.5478, 0.2339]) -Greedy action tensor([-1.8138, -0.2969, 0.6379, -0.0785]) tensor([0.0438, 0.1996, 0.5083, 0.2483]) -Greedy action tensor([-1.7334, -0.2822, 0.6316, -0.0345]) tensor([0.0468, 0.1996, 0.4979, 0.2557]) -Greedy action tensor([-1.0076, -0.6112, 0.2890, 0.0142]) tensor([0.1121, 0.1666, 0.4099, 0.3114]) -Greedy action tensor([-1.8476, -0.4334, 0.6250, -0.1231]) tensor([0.0443, 0.1822, 0.5250, 0.2485]) -Greedy action tensor([-1.9016, -0.4168, 0.6442, -0.1586]) tensor([0.0419, 0.1848, 0.5340, 0.2393]) -Greedy action tensor([-1.7306, -0.4880, 0.5970, -0.0412]) tensor([0.0497, 0.1721, 0.5092, 0.2690]) -Greedy action tensor([-1.2909, 0.5376, 0.2586, 0.0129]) tensor([0.0640, 0.3986, 0.3015, 0.2358]) -Greedy action tensor([-1.6456, -0.4906, 0.4922, 0.0404]) tensor([0.0554, 0.1758, 0.4698, 0.2990]) -Greedy action tensor([-1.8811, -0.4242, 0.6331, -0.1385]) tensor([0.0428, 0.1837, 0.5289, 0.2445]) -Greedy action tensor([ 1.4855, -1.2210, 0.4704, 1.4377]) tensor([0.4197, 0.0280, 0.1521, 0.4001]) -Greedy action tensor([-0.2049, 0.4352, 1.0913, 0.0466]) tensor([0.1276, 0.2420, 0.4664, 0.1641]) -Greedy action tensor([ 0.8469, -0.4537, 0.5240, 0.3850]) tensor([0.3807, 0.1037, 0.2757, 0.2399]) -Greedy action tensor([ 0.7088, 0.2359, 0.0233, -0.1305]) tensor([0.3908, 0.2435, 0.1969, 0.1688]) -Greedy action tensor([ 0.9861, -0.3797, -0.3276, 0.1042]) tensor([0.5160, 0.1317, 0.1387, 0.2136]) -Greedy action tensor([-0.8528, -0.3959, 0.5404, -0.9386]) tensor([0.1329, 0.2099, 0.5353, 0.1220]) -Greedy action tensor([ 0.2226, -0.6614, 0.5758, -0.4070]) tensor([0.2968, 0.1226, 0.4225, 0.1581]) -Greedy action tensor([ 0.0733, -0.9551, 0.4608, 0.0070]) tensor([0.2655, 0.0949, 0.3911, 0.2485]) -Greedy action tensor([ 0.1069, 0.0808, -0.1931, -0.4360]) tensor([0.3034, 0.2956, 0.2248, 0.1763]) -Greedy action tensor([ 1.0238, -0.1047, 0.4796, 0.2857]) tensor([0.4198, 0.1358, 0.2436, 0.2007]) -Greedy action tensor([-1.0800, 0.1445, -1.0389, 0.3746]) tensor([0.1028, 0.3498, 0.1071, 0.4403]) -Greedy action tensor([0.8726, 0.0353, 0.5217, 0.1047]) tensor([0.3845, 0.1664, 0.2707, 0.1784]) -Greedy action tensor([-0.1112, -1.0591, -0.7891, 0.0713]) tensor([0.3231, 0.1252, 0.1640, 0.3877]) -Greedy action tensor([ 0.1623, -1.8755, -0.8033, 0.4452]) tensor([0.3524, 0.0459, 0.1342, 0.4676]) -Greedy action tensor([ 0.1386, -0.4764, 0.9808, -0.6503]) tensor([0.2317, 0.1252, 0.5378, 0.1053]) -Greedy action tensor([-0.5838, -0.6625, 0.2911, -0.7375]) tensor([0.1930, 0.1784, 0.4630, 0.1655]) -Greedy action tensor([ 0.1321, -0.7704, 1.5069, -0.3357]) tensor([0.1670, 0.0677, 0.6606, 0.1046]) -Greedy action tensor([ 1.8561, -1.0632, 0.1290, 1.3221]) tensor([0.5500, 0.0297, 0.0978, 0.3225]) -Greedy action tensor([-0.4428, -0.3092, -0.3612, -0.5619]) tensor([0.2430, 0.2777, 0.2636, 0.2157]) -Greedy action tensor([ 0.2259, -1.0861, 0.4315, -1.0659]) tensor([0.3607, 0.0971, 0.4430, 0.0991]) -Greedy action tensor([ 0.1395, -0.9121, -0.1137, -0.5773]) tensor([0.3826, 0.1337, 0.2970, 0.1868]) -Greedy action tensor([ 0.4646, -0.8163, 0.4060, 0.0817]) tensor([0.3445, 0.0957, 0.3249, 0.2349]) -Greedy action tensor([-0.4505, -0.8084, 0.3765, -0.1126]) tensor([0.1856, 0.1298, 0.4244, 0.2602]) -Greedy action tensor([ 0.1096, -0.1591, 0.9161, -0.2790]) tensor([0.2136, 0.1632, 0.4784, 0.1448]) -Greedy action tensor([ 0.4521, -0.8685, 1.1403, 1.1752]) tensor([0.1880, 0.0502, 0.3742, 0.3875]) -Greedy action tensor([ 0.4955, 0.5063, -0.5669, 1.6277]) tensor([0.1832, 0.1852, 0.0633, 0.5683]) -Greedy action tensor([ 1.2038, -0.2720, 0.0375, 0.2929]) tensor([0.5149, 0.1177, 0.1604, 0.2071]) -Greedy action tensor([ 1.0613, -0.9848, -0.1762, 0.4457]) tensor([0.5103, 0.0660, 0.1480, 0.2757]) -Greedy action tensor([ 0.6942, -0.9090, 0.4225, 0.2233]) tensor([0.3864, 0.0778, 0.2945, 0.2413]) -Greedy action tensor([-0.7103, -1.1080, -0.4647, -0.0086]) tensor([0.2013, 0.1353, 0.2574, 0.4061]) -Greedy action tensor([ 0.4362, 0.9658, -0.3148, -0.2039]) tensor([0.2705, 0.4593, 0.1276, 0.1426]) -Greedy action tensor([-1.1481, -2.2388, 0.8409, 0.8527]) tensor([0.0623, 0.0209, 0.4557, 0.4611]) -Greedy action tensor([-0.2305, -0.6463, -0.2883, 0.5836]) tensor([0.2057, 0.1357, 0.1942, 0.4644]) -Greedy action tensor([-0.2586, -0.0226, -0.7370, 1.0195]) tensor([0.1544, 0.1955, 0.0957, 0.5543]) -Greedy action tensor([ 1.5583, -0.7872, 0.8596, 0.6217]) tensor([0.5038, 0.0483, 0.2505, 0.1975]) -Greedy action tensor([-1.1668, -1.5398, -0.2099, -0.4271]) tensor([0.1566, 0.1078, 0.4076, 0.3280]) -Greedy action tensor([0.9531, 0.8506, 0.7802, 0.8340]) tensor([0.2754, 0.2485, 0.2316, 0.2444]) -Greedy action tensor([ 0.5188, -1.2069, 1.7977, 0.0999]) tensor([0.1842, 0.0328, 0.6618, 0.1212]) -Greedy action tensor([ 0.5030, 0.0318, -0.2563, -0.0880]) tensor([0.3779, 0.2359, 0.1769, 0.2093]) -Greedy action tensor([-0.6446, 0.4961, -0.3111, 0.3441]) tensor([0.1218, 0.3810, 0.1700, 0.3273]) -Greedy action tensor([ 1.4558, 0.1411, -0.0658, 0.7119]) tensor([0.5096, 0.1369, 0.1113, 0.2422]) -Greedy action tensor([ 0.1308, -1.8219, -0.1496, 0.2856]) tensor([0.3263, 0.0463, 0.2465, 0.3809]) -Greedy action tensor([-1.6385, 0.0195, 1.5284, -0.9858]) tensor([0.0313, 0.1645, 0.7439, 0.0602]) -Greedy action tensor([-0.0377, -1.3821, -0.2313, 1.8087]) tensor([0.1187, 0.0310, 0.0978, 0.7525]) -Greedy action tensor([ 1.5147, -0.4516, 0.4336, 1.0706]) tensor([0.4715, 0.0660, 0.1600, 0.3025]) -Greedy action tensor([ 0.3151, -0.9232, -0.2371, -0.3696]) tensor([0.4220, 0.1223, 0.2429, 0.2128]) -Greedy action tensor([-0.8282, -0.9495, 0.9280, -0.5773]) tensor([0.1116, 0.0988, 0.6461, 0.1434]) -Greedy action tensor([ 0.4588, -1.6409, 0.3102, 0.9774]) tensor([0.2729, 0.0334, 0.2352, 0.4584]) -Greedy action tensor([-0.0825, -0.6323, 0.6515, 0.1286]) tensor([0.2043, 0.1179, 0.4256, 0.2523]) -Greedy action tensor([-0.0320, -0.7891, 0.1075, 0.6423]) tensor([0.2183, 0.1024, 0.2509, 0.4284]) -Greedy action tensor([ 1.1829, -0.3092, 1.4128, 0.7275]) tensor([0.3208, 0.0721, 0.4037, 0.2034]) -Greedy action tensor([ 2.1511, -1.2271, 0.7001, 0.6970]) tensor([0.6658, 0.0227, 0.1560, 0.1555]) -Greedy action tensor([ 0.5383, -1.7874, 0.0723, 0.6202]) tensor([0.3558, 0.0348, 0.2233, 0.3862]) -Greedy action tensor([-0.3578, 1.2912, 1.2309, -0.4153]) tensor([0.0830, 0.4319, 0.4066, 0.0784]) -Greedy action tensor([ 1.2278, 0.4733, 0.1836, -0.8496]) tensor([0.5135, 0.2415, 0.1807, 0.0643]) -Greedy action tensor([-1.3755, -0.2660, 0.6068, -1.3205]) tensor([0.0810, 0.2456, 0.5879, 0.0856]) -Greedy action tensor([ 0.3012, -1.4892, 0.7075, 1.7265]) tensor([0.1465, 0.0244, 0.2199, 0.6092]) -Greedy action tensor([-0.6735, -0.9355, -0.6085, 0.2526]) tensor([0.1865, 0.1435, 0.1991, 0.4709]) -Greedy action tensor([ 0.4612, -1.9776, 1.0087, -0.7635]) tensor([0.3215, 0.0281, 0.5559, 0.0945]) -Greedy action tensor([-0.3937, -0.7235, 0.0042, 0.9381]) tensor([0.1429, 0.1028, 0.2128, 0.5414]) -Greedy action tensor([ 0.4913, -0.7686, 0.0202, 0.6363]) tensor([0.3264, 0.0926, 0.2038, 0.3773]) -Greedy action tensor([-0.9359, 0.1154, 1.3888, -0.9464]) tensor([0.0663, 0.1898, 0.6782, 0.0656]) -Greedy action tensor([-0.0214, 0.5678, 1.2605, -0.4982]) tensor([0.1423, 0.2565, 0.5128, 0.0883]) -Greedy action tensor([ 0.8903, 0.2117, 0.5398, -0.2254]) tensor([0.3938, 0.1998, 0.2774, 0.1290]) -Greedy action tensor([-1.2433, -1.4632, 1.4872, 0.4963]) tensor([0.0438, 0.0351, 0.6717, 0.2494]) -Greedy action tensor([-0.4916, 0.4402, -0.5261, 0.3772]) tensor([0.1452, 0.3686, 0.1402, 0.3460]) -Greedy action tensor([ 0.5638, -1.5894, 0.2222, 0.6082]) tensor([0.3482, 0.0404, 0.2474, 0.3640]) -Greedy action tensor([-0.2046, -1.1675, -0.3610, 0.9215]) tensor([0.1880, 0.0718, 0.1607, 0.5796]) -Greedy action tensor([-0.1722, -0.7362, -0.3754, 1.0008]) tensor([0.1780, 0.1013, 0.1453, 0.5754]) -Greedy action tensor([ 0.0494, -1.7445, 0.1323, 0.2771]) tensor([0.2850, 0.0474, 0.3097, 0.3579]) -Greedy action tensor([-0.0723, -1.3232, -0.2220, -0.9298]) tensor([0.3889, 0.1113, 0.3348, 0.1650]) -Greedy action tensor([-0.2863, -0.4134, -0.5225, 0.7737]) tensor([0.1800, 0.1585, 0.1421, 0.5195]) -Greedy action tensor([0.4026, 0.1939, 0.2574, 0.3666]) tensor([0.2746, 0.2229, 0.2375, 0.2649]) -Greedy action tensor([ 0.3879, -1.0702, -0.8974, 0.3991]) tensor([0.3967, 0.0923, 0.1097, 0.4012]) -Greedy action tensor([-1.0857, -0.6906, 1.1523, -0.4453]) tensor([0.0727, 0.1079, 0.6815, 0.1379]) -Greedy action tensor([ 1.2706, -0.8736, -0.0655, 0.2081]) tensor([0.5795, 0.0679, 0.1523, 0.2003]) -Greedy action tensor([-0.5375, 0.1348, 1.3999, -0.9614]) tensor([0.0947, 0.1856, 0.6576, 0.0620]) -Greedy action tensor([ 0.5384, -1.5966, -0.3909, -0.5604]) tensor([0.5416, 0.0640, 0.2138, 0.1805]) -Greedy action tensor([-0.6950, -0.7028, 0.7346, -0.2224]) tensor([0.1286, 0.1276, 0.5374, 0.2064]) -Greedy action tensor([ 0.5649, -0.7288, 1.5060, 0.1998]) tensor([0.2207, 0.0605, 0.5656, 0.1532]) -Greedy action tensor([ 0.8726, -0.7318, 0.6447, 1.5244]) tensor([0.2553, 0.0513, 0.2033, 0.4900]) -Greedy action tensor([-0.3158, -1.7079, 0.4399, -0.6262]) tensor([0.2433, 0.0605, 0.5179, 0.1784]) -Greedy action tensor([ 1.3990, -0.9989, -0.0947, 0.0922]) tensor([0.6305, 0.0573, 0.1416, 0.1707]) -Greedy action tensor([ 0.9840, -0.1700, -0.3726, 0.4618]) tensor([0.4617, 0.1456, 0.1189, 0.2738]) -Greedy action tensor([ 1.5299, -0.3581, -0.4593, 0.5692]) tensor([0.5985, 0.0906, 0.0819, 0.2290]) -Greedy action tensor([ 1.2271, -0.5770, -0.0610, 0.4811]) tensor([0.5223, 0.0860, 0.1440, 0.2477]) -Greedy action tensor([ 1.4505, -0.2930, -0.9317, 0.3565]) tensor([0.6242, 0.1092, 0.0576, 0.2090]) -Greedy action tensor([ 1.2702, -0.2730, 0.1953, 0.2686]) tensor([0.5202, 0.1112, 0.1776, 0.1911]) -Greedy action tensor([ 2.2021, -0.9875, -0.2159, 1.0324]) tensor([0.6941, 0.0286, 0.0618, 0.2155]) -Greedy action tensor([ 1.5329, -0.5401, -0.3323, 0.3652]) tensor([0.6282, 0.0790, 0.0973, 0.1954]) -Greedy action tensor([ 1.0844, -0.4672, -0.1979, 0.3819]) tensor([0.5039, 0.1068, 0.1398, 0.2496]) -Greedy action tensor([ 1.3474, -0.3521, -0.2526, 0.4487]) tensor([0.5581, 0.1020, 0.1127, 0.2272]) -Greedy action tensor([ 0.9149, -0.4760, 0.0451, 0.1118]) tensor([0.4726, 0.1176, 0.1980, 0.2117]) -Greedy action tensor([ 1.4122, -0.8220, -0.3587, 0.7557]) tensor([0.5568, 0.0596, 0.0948, 0.2888]) -Greedy action tensor([ 1.5579, -0.3939, -0.4145, 0.4488]) tensor([0.6207, 0.0882, 0.0864, 0.2048]) -Greedy action tensor([ 0.6848, -0.5487, -0.3801, 0.1094]) tensor([0.4549, 0.1325, 0.1568, 0.2558]) -Greedy action tensor([ 2.3570, -1.2642, 0.1091, 1.0563]) tensor([0.7119, 0.0190, 0.0752, 0.1939]) -Greedy action tensor([ 0.9335, -0.2204, -0.3873, 0.1581]) tensor([0.4895, 0.1544, 0.1307, 0.2254]) -Greedy action tensor([ 1.6503, -0.9734, -0.1920, 0.5326]) tensor([0.6418, 0.0466, 0.1017, 0.2099]) -Greedy action tensor([ 1.0782, -1.0749, -0.2067, 0.3358]) tensor([0.5351, 0.0621, 0.1481, 0.2547]) -Greedy action tensor([ 0.6168, -0.5662, -0.0295, 0.0464]) tensor([0.4174, 0.1279, 0.2187, 0.2360]) -Greedy action tensor([ 1.5231, -0.6114, -0.5694, 0.6286]) tensor([0.6059, 0.0717, 0.0747, 0.2477]) -Greedy action tensor([ 1.3816, -0.5051, -0.1740, 0.4954]) tensor([0.5634, 0.0854, 0.1189, 0.2323]) -Greedy action tensor([ 1.7901, 0.0414, -0.1828, 0.1784]) tensor([0.6611, 0.1150, 0.0919, 0.1319]) -Greedy action tensor([ 0.9889, -0.3110, -0.3542, 0.3233]) tensor([0.4884, 0.1331, 0.1275, 0.2510]) -Greedy action tensor([ 1.7593, 0.2269, -1.2605, 0.5074]) tensor([0.6448, 0.1393, 0.0315, 0.1844]) -Greedy action tensor([ 1.3890, -0.5105, -0.3405, 0.1323]) tensor([0.6205, 0.0929, 0.1101, 0.1766]) -Greedy action tensor([ 1.4751, -0.1756, -0.2736, 0.2383]) tensor([0.6038, 0.1159, 0.1051, 0.1753]) -Greedy action tensor([ 1.5389, -0.3155, -0.3243, 0.5833]) tensor([0.5895, 0.0923, 0.0915, 0.2267]) -Greedy action tensor([ 1.4425, -0.9908, -0.2728, 0.7314]) tensor([0.5686, 0.0499, 0.1023, 0.2792]) -Greedy action tensor([ 1.9558, -0.6670, -0.4262, 0.2274]) tensor([0.7449, 0.0541, 0.0688, 0.1323]) -Greedy action tensor([ 1.1059, -0.0695, -0.2886, 0.4638]) tensor([0.4801, 0.1482, 0.1190, 0.2526]) -Greedy action tensor([ 1.1816, -0.2100, -0.0493, 0.2256]) tensor([0.5194, 0.1292, 0.1517, 0.1997]) -Greedy action tensor([ 1.3351, -0.2171, -0.5201, 0.2921]) tensor([0.5812, 0.1231, 0.0909, 0.2048]) -Greedy action tensor([ 0.8761, -0.2186, -0.0962, 0.1008]) tensor([0.4601, 0.1540, 0.1740, 0.2119]) -Greedy action tensor([ 1.0126, -0.6748, -0.2378, 0.3854]) tensor([0.4986, 0.0922, 0.1428, 0.2663]) -Greedy action tensor([ 1.1577, 0.0516, -0.3591, 0.3302]) tensor([0.5032, 0.1665, 0.1104, 0.2200]) -Greedy action tensor([ 0.9153, -0.5774, -0.2319, 0.4473]) tensor([0.4611, 0.1036, 0.1464, 0.2888]) -Greedy action tensor([ 1.6087, -0.5469, -0.5047, 0.5824]) tensor([0.6270, 0.0726, 0.0758, 0.2247]) -Greedy action tensor([ 1.8588, -0.0368, -0.2501, 0.4048]) tensor([0.6644, 0.0998, 0.0806, 0.1552]) -Greedy action tensor([ 1.5296, -0.2971, -0.3281, 0.4781]) tensor([0.6001, 0.0966, 0.0936, 0.2097]) -Greedy action tensor([ 1.3555, -0.2357, -0.8989, 0.2977]) tensor([0.6039, 0.1230, 0.0634, 0.2097]) -Greedy action tensor([ 1.2429, -0.6042, -0.2226, 0.3097]) tensor([0.5612, 0.0885, 0.1296, 0.2207]) -Greedy action tensor([ 1.5942e+00, -6.8605e-04, -3.9888e-01, 7.0022e-01]) tensor([0.5720, 0.1161, 0.0780, 0.2340]) -Greedy action tensor([ 1.9112, 0.3049, -0.0794, 0.0579]) tensor([0.6694, 0.1343, 0.0914, 0.1049]) -Greedy action tensor([ 2.4944, -0.9385, -1.1394, 0.3027]) tensor([0.8544, 0.0276, 0.0226, 0.0955]) -Greedy action tensor([ 2.2489, -0.4841, -0.5590, 0.3645]) tensor([0.7829, 0.0509, 0.0472, 0.1189]) -Greedy action tensor([ 2.3779, -0.7912, -0.4973, 1.2767]) tensor([0.6989, 0.0294, 0.0394, 0.2324]) -Greedy action tensor([ 1.0115, -0.0364, -0.0337, -0.3229]) tensor([0.5087, 0.1784, 0.1789, 0.1340]) -Greedy action tensor([ 1.6337, -0.3797, -0.3423, 0.1664]) tensor([0.6655, 0.0889, 0.0922, 0.1534]) -Greedy action tensor([ 1.3562, -0.0979, -0.8341, 0.3708]) tensor([0.5818, 0.1359, 0.0651, 0.2172]) -Greedy action tensor([ 1.1214, -0.1635, -0.3057, 0.2288]) tensor([0.5191, 0.1436, 0.1246, 0.2126]) -Greedy action tensor([ 0.8306, -0.6413, -0.1615, -0.2687]) tensor([0.5172, 0.1187, 0.1918, 0.1723]) -Greedy action tensor([ 1.6369, -0.5107, -0.6630, 0.0439]) tensor([0.7041, 0.0822, 0.0706, 0.1431]) -Greedy action tensor([ 1.7056, 0.3731, 0.1344, -0.7575]) tensor([0.6424, 0.1695, 0.1335, 0.0547]) -Greedy action tensor([ 1.0369, -0.0722, -0.2279, -0.0220]) tensor([0.5105, 0.1684, 0.1441, 0.1770]) -Greedy action tensor([ 1.7446, -0.4592, -0.7420, 0.3499]) tensor([0.6937, 0.0766, 0.0577, 0.1720]) -Greedy action tensor([ 1.7550, -0.1260, -0.4865, 0.3784]) tensor([0.6617, 0.1009, 0.0703, 0.1670]) -Greedy action tensor([ 1.1291, -0.4916, 0.0508, -0.1002]) tensor([0.5463, 0.1080, 0.1858, 0.1598]) -Greedy action tensor([ 0.6039, -0.5267, 0.0662, 0.0202]) tensor([0.4057, 0.1310, 0.2370, 0.2263]) -Greedy action tensor([ 1.6573, -0.4211, -0.4669, 0.2909]) tensor([0.6668, 0.0834, 0.0797, 0.1701]) -Greedy action tensor([ 1.2468, -0.4685, -0.0093, 0.0599]) tensor([0.5650, 0.1017, 0.1609, 0.1724]) -Greedy action tensor([ 1.8258, -0.9583, -0.3248, 0.5119]) tensor([0.6911, 0.0427, 0.0805, 0.1857]) -Greedy action tensor([ 1.3876, -0.0137, -0.6305, 0.6332]) tensor([0.5407, 0.1332, 0.0719, 0.2543]) -Greedy action tensor([ 1.5453, -0.8488, -0.2409, 0.2173]) tensor([0.6562, 0.0599, 0.1100, 0.1739]) -Greedy action tensor([ 1.4718, -0.2272, -0.4191, 0.3650]) tensor([0.6008, 0.1099, 0.0907, 0.1986]) -Greedy action tensor([ 1.5964, -0.7515, -0.4613, 0.3171]) tensor([0.6660, 0.0636, 0.0851, 0.1853]) -Greedy action tensor([ 1.5902, -0.9340, -0.2728, 0.5218]) tensor([0.6334, 0.0507, 0.0983, 0.2176]) -Greedy action tensor([ 1.7482, -0.4929, -0.1502, 0.3845]) tensor([0.6614, 0.0703, 0.0991, 0.1691]) -Greedy action tensor([ 1.3582, -0.4286, -0.9059, 0.2565]) tensor([0.6235, 0.1044, 0.0648, 0.2072]) -Greedy action tensor([ 1.1065, 0.1316, -1.1808, 0.0321]) tensor([0.5494, 0.2072, 0.0558, 0.1876]) -Greedy action tensor([ 1.5143, -0.8681, -0.4012, 0.1988]) tensor([0.6632, 0.0612, 0.0977, 0.1780]) -Greedy action tensor([ 2.1051, -0.5397, -0.4187, 0.5196]) tensor([0.7375, 0.0524, 0.0591, 0.1511]) -Greedy action tensor([ 1.4243, 0.1686, -0.6587, 0.9030]) tensor([0.4992, 0.1422, 0.0622, 0.2964]) -Greedy action tensor([ 1.3261, -0.0151, -0.0332, 0.0673]) tensor([0.5548, 0.1451, 0.1425, 0.1576]) -Greedy action tensor([ 1.1329, -0.6244, -0.1397, -0.0918]) tensor([0.5726, 0.0988, 0.1604, 0.1683]) -Greedy action tensor([ 2.0701, -0.6208, -0.4999, 0.4945]) tensor([0.7401, 0.0502, 0.0566, 0.1531]) -Greedy action tensor([ 0.6923, -0.2147, -0.2588, 0.0193]) tensor([0.4347, 0.1755, 0.1679, 0.2218]) -Greedy action tensor([ 1.4469, 0.1699, -0.2738, 0.3799]) tensor([0.5550, 0.1548, 0.0993, 0.1909]) -Greedy action tensor([ 1.7473, -0.8567, 0.1795, -0.0859]) tensor([0.6933, 0.0513, 0.1446, 0.1109]) -Greedy action tensor([ 1.5471, -0.1359, -0.2746, -0.2018]) tensor([0.6572, 0.1221, 0.1063, 0.1143]) -Greedy action tensor([ 1.3173, -0.7385, -0.2285, -0.1337]) tensor([0.6347, 0.0812, 0.1353, 0.1487]) -Greedy action tensor([ 0.9382, -0.4120, 0.0751, -0.0331]) tensor([0.4855, 0.1258, 0.2048, 0.1838]) -Greedy action tensor([ 1.0806, -0.6673, 0.0438, -0.6417]) tensor([0.5857, 0.1020, 0.2077, 0.1046]) -Greedy action tensor([ 0.4890, -0.3293, -0.0270, -0.3043]) tensor([0.4015, 0.1772, 0.2397, 0.1816]) -Greedy action tensor([ 0.8660, -0.4909, -0.0460, -0.3594]) tensor([0.5121, 0.1318, 0.2057, 0.1504]) -Greedy action tensor([ 0.6853, -0.3703, -0.1335, -0.1374]) tensor([0.4488, 0.1562, 0.1979, 0.1971]) -Greedy action tensor([ 0.8254, -0.5768, -0.0733, -0.1430]) tensor([0.4919, 0.1210, 0.2003, 0.1868]) -Greedy action tensor([ 0.8510, -0.6893, -0.1133, -0.4209]) tensor([0.5331, 0.1143, 0.2032, 0.1494]) -Greedy action tensor([ 0.3992, -0.3728, -0.1668, -0.1345]) tensor([0.3822, 0.1766, 0.2170, 0.2241]) -Greedy action tensor([ 0.8989, -0.4501, 0.2767, -0.6537]) tensor([0.4980, 0.1292, 0.2673, 0.1054]) -Greedy action tensor([ 0.4945, 0.0720, -0.0198, -0.3088]) tensor([0.3702, 0.2426, 0.2214, 0.1658]) -Greedy action tensor([ 0.4663, -0.1326, 0.0506, -0.1968]) tensor([0.3670, 0.2017, 0.2422, 0.1891]) -Greedy action tensor([ 0.5484, 0.1194, 0.0525, -0.1308]) tensor([0.3614, 0.2353, 0.2201, 0.1832]) -Greedy action tensor([ 0.5219, -0.5087, 0.1784, -0.5958]) tensor([0.4179, 0.1491, 0.2964, 0.1367]) -Greedy action tensor([ 1.0034, -0.9625, -0.0179, -0.5265]) tensor([0.5825, 0.0816, 0.2098, 0.1261]) -Greedy action tensor([ 1.0735, -0.6178, -0.0193, -0.6713]) tensor([0.5902, 0.1088, 0.1979, 0.1031]) -Greedy action tensor([ 0.2996, 0.1801, -0.0317, -0.3055]) tensor([0.3173, 0.2816, 0.2278, 0.1733]) -Greedy action tensor([ 0.3313, 0.2466, -0.0990, 0.1143]) tensor([0.2964, 0.2723, 0.1927, 0.2386]) -Greedy action tensor([ 0.5424, -0.2316, 0.2786, -0.4359]) tensor([0.3838, 0.1770, 0.2948, 0.1443]) -Greedy action tensor([ 0.6447, -0.2815, 0.0635, -0.3806]) tensor([0.4322, 0.1712, 0.2417, 0.1550]) -Greedy action tensor([ 0.7769, -0.4490, 0.0207, -0.3351]) tensor([0.4781, 0.1403, 0.2244, 0.1572]) -Greedy action tensor([ 0.6454, -0.4517, -0.2126, -0.3394]) tensor([0.4692, 0.1566, 0.1989, 0.1752]) -Greedy action tensor([ 0.8394, -0.5475, -0.0617, -0.4087]) tensor([0.5147, 0.1286, 0.2090, 0.1477]) -Greedy action tensor([ 0.4129, -0.0260, -0.0722, -0.1640]) tensor([0.3544, 0.2285, 0.2181, 0.1990]) -Greedy action tensor([ 0.9113, -0.6987, 0.0577, -0.5165]) tensor([0.5360, 0.1071, 0.2283, 0.1286]) -Greedy action tensor([ 0.5050, -0.4463, 0.0948, -0.4611]) tensor([0.4115, 0.1589, 0.2730, 0.1566]) -Greedy action tensor([ 0.7236, -0.5842, -0.1786, -0.0941]) tensor([0.4722, 0.1277, 0.1916, 0.2085]) -Greedy action tensor([ 0.8474, -0.5883, 0.0312, -0.6255]) tensor([0.5237, 0.1246, 0.2316, 0.1201]) -Greedy action tensor([ 0.6210, -0.4116, -0.1216, -0.0923]) tensor([0.4307, 0.1533, 0.2049, 0.2110]) -Greedy action tensor([ 1.0336, -0.8427, 0.0404, -0.4761]) tensor([0.5732, 0.0878, 0.2123, 0.1267]) -Greedy action tensor([ 0.6100, -0.1044, -0.0386, -0.0401]) tensor([0.3946, 0.1932, 0.2063, 0.2060]) -Greedy action tensor([ 0.5481, -0.1494, -0.0602, -0.0463]) tensor([0.3855, 0.1919, 0.2098, 0.2128]) -Greedy action tensor([ 0.8041, -0.4895, -0.2482, -0.6794]) tensor([0.5405, 0.1482, 0.1887, 0.1226]) -Greedy action tensor([ 0.6526, -0.3712, 0.0166, -0.2961]) tensor([0.4394, 0.1578, 0.2326, 0.1701]) -Greedy action tensor([ 0.8644, -0.4379, -0.1700, -0.6774]) tensor([0.5431, 0.1477, 0.1930, 0.1162]) -Greedy action tensor([ 0.9060, -0.7789, -0.0781, -0.4032]) tensor([0.5467, 0.1014, 0.2043, 0.1476]) -Greedy action tensor([ 0.7178, -0.4973, 0.0283, -0.3463]) tensor([0.4665, 0.1384, 0.2341, 0.1610]) -Greedy action tensor([ 0.3743, 0.0160, -0.0465, -0.0964]) tensor([0.3356, 0.2345, 0.2203, 0.2096]) -Greedy action tensor([ 0.7260, -0.1644, 0.0771, -0.3127]) tensor([0.4372, 0.1795, 0.2285, 0.1548]) -Greedy action tensor([ 0.1919, 0.0276, 0.0675, -0.0481]) tensor([0.2843, 0.2412, 0.2510, 0.2236]) -Greedy action tensor([ 0.7671, -0.0945, -0.0700, 0.0139]) tensor([0.4299, 0.1816, 0.1861, 0.2024]) -Greedy action tensor([ 1.0983, -0.3762, -0.0253, -0.4344]) tensor([0.5650, 0.1293, 0.1837, 0.1220]) -Greedy action tensor([ 1.2355, -0.5746, 0.3084, -0.7927]) tensor([0.5914, 0.0968, 0.2340, 0.0778]) -Greedy action tensor([ 0.5320, 0.1562, -0.1293, -0.1402]) tensor([0.3685, 0.2531, 0.1902, 0.1882]) -Greedy action tensor([ 0.7825, -0.8915, -0.0601, -0.3687]) tensor([0.5170, 0.0969, 0.2226, 0.1635]) -Greedy action tensor([ 0.6445, -0.4621, -0.1506, -0.1354]) tensor([0.4463, 0.1476, 0.2015, 0.2046]) -Greedy action tensor([ 0.5283, -0.3114, -0.0979, -0.2771]) tensor([0.4144, 0.1789, 0.2215, 0.1852]) -Greedy action tensor([ 0.4432, -0.0760, 0.0235, -0.2704]) tensor([0.3647, 0.2170, 0.2397, 0.1787]) -Greedy action tensor([ 0.6061, -0.4110, -0.0875, -0.2831]) tensor([0.4401, 0.1591, 0.2199, 0.1809]) -Greedy action tensor([ 0.7810, -0.7260, 0.1377, -0.3985]) tensor([0.4867, 0.1078, 0.2558, 0.1496]) -Greedy action tensor([ 0.7521, -0.3334, -0.0659, -0.2746]) tensor([0.4679, 0.1580, 0.2065, 0.1676]) -Greedy action tensor([ 0.6413, -0.4616, -0.2024, -0.6746]) tensor([0.4925, 0.1635, 0.2119, 0.1321]) -Greedy action tensor([ 0.6438, -0.4176, -0.0208, -0.1957]) tensor([0.4362, 0.1509, 0.2244, 0.1884]) -Greedy action tensor([ 0.6948, -0.5176, -0.1222, -0.1966]) tensor([0.4653, 0.1384, 0.2055, 0.1908]) -Greedy action tensor([ 0.7084, -0.2768, 0.1330, -0.3761]) tensor([0.4398, 0.1642, 0.2474, 0.1487]) -Greedy action tensor([ 0.8242, -0.4272, -0.0010, -0.5262]) tensor([0.5042, 0.1442, 0.2209, 0.1307]) -Greedy action tensor([ 0.9424, -0.9802, -0.0261, -0.3423]) tensor([0.5548, 0.0811, 0.2106, 0.1535]) -Greedy action tensor([ 0.4299, -0.4721, 0.1990, -0.4376]) tensor([0.3817, 0.1549, 0.3030, 0.1603]) -Greedy action tensor([ 0.8399, 0.5132, -0.2106, -0.1290]) tensor([0.4081, 0.2943, 0.1427, 0.1549]) -Greedy action tensor([ 0.3668, -0.1640, 0.1494, -0.5020]) tensor([0.3556, 0.2091, 0.2861, 0.1492]) -Greedy action tensor([ 1.0879, -0.8898, -0.0745, -0.4312]) tensor([0.5988, 0.0829, 0.1873, 0.1311]) -Greedy action tensor([ 0.5865, -0.3608, 0.0224, -0.2048]) tensor([0.4149, 0.1609, 0.2361, 0.1881]) -Greedy action tensor([ 0.7066, -0.4957, 0.0714, -0.3731]) tensor([0.4608, 0.1385, 0.2442, 0.1565]) -Greedy action tensor([ 0.5526, 0.2449, 0.2256, -0.0445]) tensor([0.3326, 0.2445, 0.2398, 0.1831]) -Greedy action tensor([ 0.7640, -0.6496, -0.0945, -0.2825]) tensor([0.4955, 0.1205, 0.2100, 0.1740]) -Greedy action tensor([ 0.8017, -0.3673, 0.0406, -0.7772]) tensor([0.5040, 0.1566, 0.2355, 0.1039]) -Greedy action tensor([ 0.3616, 0.1418, -0.1946, -0.3789]) tensor([0.3505, 0.2814, 0.2010, 0.1671]) -Greedy action tensor([ 1.0893, -0.7495, 0.2237, -0.5538]) tensor([0.5639, 0.0897, 0.2373, 0.1091]) -Greedy action tensor([ 0.7881, -0.3451, -0.1318, -0.1801]) tensor([0.4761, 0.1533, 0.1898, 0.1808]) -Greedy action tensor([ 0.9484, -0.7455, -0.0223, -0.4515]) tensor([0.5527, 0.1016, 0.2094, 0.1363]) -Greedy action tensor([ 0.7833, -0.4926, -0.0361, -0.3759]) tensor([0.4917, 0.1373, 0.2167, 0.1543]) -Greedy action tensor([ 0.4942, -0.1369, -0.0492, -0.0978]) tensor([0.3751, 0.1995, 0.2178, 0.2075]) -Greedy action tensor([ 0.6082, -0.5769, 0.0169, -0.2192]) tensor([0.4355, 0.1331, 0.2411, 0.1904]) -Greedy action tensor([ 0.9270, -0.5324, -0.0864, -0.3852]) tensor([0.5363, 0.1246, 0.1947, 0.1444]) -Greedy action tensor([ 0.1703, 0.1723, -0.0425, 0.0191]) tensor([0.2725, 0.2730, 0.2203, 0.2342]) -Greedy action tensor([ 0.9162, -0.9534, 0.1663, -0.5267]) tensor([0.5368, 0.0828, 0.2536, 0.1268]) -Greedy action tensor([ 0.7428, -0.0627, 0.1033, -0.4635]) tensor([0.4398, 0.1965, 0.2320, 0.1316]) -Greedy action tensor([ 0.8227, -0.6450, -0.1321, -0.5572]) tensor([0.5356, 0.1234, 0.2062, 0.1348]) -Greedy action tensor([ 0.9828, -0.7819, -0.0495, -0.3926]) tensor([0.5617, 0.0962, 0.2001, 0.1420]) -Greedy action tensor([ 0.7677, -0.2547, -0.1368, -0.0759]) tensor([0.4557, 0.1639, 0.1844, 0.1960]) -Greedy action tensor([ 0.7736, -0.0727, -0.2242, -0.2932]) tensor([0.4669, 0.2003, 0.1721, 0.1607]) -Greedy action tensor([ 0.5148, -0.4983, -0.1382, -0.1545]) tensor([0.4174, 0.1516, 0.2173, 0.2138]) -Greedy action tensor([ 0.6046, -0.5520, 0.0036, -0.3534]) tensor([0.4451, 0.1400, 0.2440, 0.1708]) -Greedy action tensor([-1.7328, -0.4468, 0.6885, 0.1045]) tensor([0.0451, 0.1633, 0.5082, 0.2834]) -Greedy action tensor([-1.8065, -0.4155, 0.6002, -0.1494]) tensor([0.0468, 0.1882, 0.5195, 0.2455]) -Greedy action tensor([-1.7779, -0.2047, 0.6586, -0.3592]) tensor([0.0468, 0.2255, 0.5346, 0.1932]) -Greedy action tensor([-1.7643, -0.2944, 0.5840, -0.0507]) tensor([0.0468, 0.2035, 0.4899, 0.2597]) -Greedy action tensor([-1.8221, -0.4427, 0.5992, -0.1210]) tensor([0.0461, 0.1830, 0.5186, 0.2524]) -Greedy action tensor([-1.3606, -0.3522, 0.3834, 0.0624]) tensor([0.0735, 0.2014, 0.4203, 0.3049]) -Greedy action tensor([-1.0953, -0.4581, 0.3439, 0.4273]) tensor([0.0855, 0.1617, 0.3607, 0.3921]) -Greedy action tensor([-0.7942, -0.3290, 0.2449, -0.0818]) tensor([0.1341, 0.2135, 0.3790, 0.2734]) -Greedy action tensor([-1.8966, -0.4507, 0.6426, -0.1560]) tensor([0.0423, 0.1798, 0.5365, 0.2414]) -Greedy action tensor([-1.9412, -0.4483, 0.6664, -0.1785]) tensor([0.0402, 0.1791, 0.5460, 0.2346]) -Greedy action tensor([-0.6895, -0.3738, 0.2424, -0.2158]) tensor([0.1535, 0.2104, 0.3897, 0.2464]) -Greedy action tensor([-1.9245, -0.4495, 0.6542, -0.1683]) tensor([0.0411, 0.1796, 0.5415, 0.2379]) -Greedy action tensor([-1.5357, 0.4314, 0.3956, 0.0531]) tensor([0.0501, 0.3584, 0.3459, 0.2456]) -Greedy action tensor([-1.7055, -0.4280, 0.6141, 0.0855]) tensor([0.0482, 0.1729, 0.4901, 0.2889]) -Greedy action tensor([-1.9436, -0.4446, 0.6656, -0.1802]) tensor([0.0402, 0.1798, 0.5458, 0.2343]) -Greedy action tensor([-1.9431, -0.4425, 0.6650, -0.1797]) tensor([0.0402, 0.1802, 0.5453, 0.2343]) -Greedy action tensor([-1.5149, -0.2570, 0.4852, 0.0452]) tensor([0.0600, 0.2111, 0.4434, 0.2855]) -Greedy action tensor([-1.8494, -0.4487, 0.6199, -0.1291]) tensor([0.0445, 0.1807, 0.5260, 0.2487]) -Greedy action tensor([-1.6258, -0.3792, 0.6207, 0.1386]) tensor([0.0506, 0.1759, 0.4782, 0.2953]) -Greedy action tensor([-1.0143, 0.7506, 0.0708, 0.2642]) tensor([0.0747, 0.4362, 0.2210, 0.2682]) -Greedy action tensor([-1.8312, -0.3488, 0.6038, -0.1222]) tensor([0.0448, 0.1971, 0.5109, 0.2472]) -Greedy action tensor([-1.9112, -0.4222, 0.6468, -0.1614]) tensor([0.0415, 0.1840, 0.5358, 0.2388]) -Greedy action tensor([-1.8725, -0.4496, 0.6259, -0.1535]) tensor([0.0437, 0.1813, 0.5313, 0.2437]) -Greedy action tensor([-1.8182, -0.4184, 0.5936, -0.1149]) tensor([0.0461, 0.1868, 0.5140, 0.2531]) -Greedy action tensor([-1.8179, -0.3411, 0.5885, -0.1289]) tensor([0.0457, 0.2001, 0.5069, 0.2474]) -Greedy action tensor([-1.9284, -0.4247, 0.6600, -0.1642]) tensor([0.0406, 0.1825, 0.5400, 0.2368]) -Greedy action tensor([-1.0613, -0.6332, 0.2409, -0.2225]) tensor([0.1173, 0.1800, 0.4313, 0.2714]) -Greedy action tensor([-1.9160, -0.4209, 0.6499, -0.1665]) tensor([0.0413, 0.1841, 0.5372, 0.2374]) -Greedy action tensor([-1.8923, -0.4165, 0.6356, -0.1598]) tensor([0.0424, 0.1857, 0.5318, 0.2401]) -Greedy action tensor([-1.5263, -0.4875, 0.4931, 0.1563]) tensor([0.0597, 0.1688, 0.4501, 0.3214]) -Greedy action tensor([-1.5836, -0.5184, 0.4365, 0.0074]) tensor([0.0612, 0.1775, 0.4611, 0.3002]) -Greedy action tensor([-1.7761, -0.4792, 0.5712, -0.0593]) tensor([0.0484, 0.1769, 0.5056, 0.2692]) -Greedy action tensor([-1.8286, -0.4554, 0.6126, -0.1179]) tensor([0.0455, 0.1797, 0.5229, 0.2519]) -Greedy action tensor([-1.3768, -0.5658, 0.4815, 0.1120]) tensor([0.0710, 0.1596, 0.4550, 0.3144]) -Greedy action tensor([-1.9121, -0.3983, 0.6476, -0.1565]) tensor([0.0412, 0.1873, 0.5330, 0.2385]) -Greedy action tensor([-1.8175, -0.1330, 0.5561, -0.1319]) tensor([0.0444, 0.2393, 0.4767, 0.2396]) -Greedy action tensor([-1.3458, -0.6068, 0.3647, 0.1037]) tensor([0.0776, 0.1625, 0.4293, 0.3307]) -Greedy action tensor([-1.9096, -0.4238, 0.6599, -0.1541]) tensor([0.0412, 0.1821, 0.5382, 0.2385]) -Greedy action tensor([-1.8378, -0.3230, 0.6042, -0.1520]) tensor([0.0446, 0.2027, 0.5123, 0.2405]) -Greedy action tensor([-1.9358, -0.4356, 0.6607, -0.1749]) tensor([0.0405, 0.1814, 0.5428, 0.2354]) -Greedy action tensor([-1.4084, -0.1541, 0.4565, 0.1804]) tensor([0.0631, 0.2210, 0.4071, 0.3088]) -Greedy action tensor([-1.8186, -0.4839, 0.6046, -0.1319]) tensor([0.0466, 0.1768, 0.5252, 0.2514]) -Greedy action tensor([-1.7581, -0.4319, 0.6688, -0.0602]) tensor([0.0464, 0.1748, 0.5254, 0.2534]) -Greedy action tensor([-1.6807, -0.2283, 0.5267, 0.0297]) tensor([0.0503, 0.2148, 0.4570, 0.2780]) -Greedy action tensor([-0.9045, 0.1076, 0.4640, 1.1928]) tensor([0.0632, 0.1739, 0.2483, 0.5146]) -Greedy action tensor([-1.4474, -0.1895, 0.6044, 0.2754]) tensor([0.0559, 0.1965, 0.4348, 0.3129]) -Greedy action tensor([-1.9374, -0.4482, 0.6612, -0.1761]) tensor([0.0405, 0.1795, 0.5444, 0.2356]) -Greedy action tensor([-1.3001, 0.0053, 0.3212, -0.0184]) tensor([0.0749, 0.2763, 0.3789, 0.2698]) -Greedy action tensor([-1.6455, -0.4589, 0.4988, -0.0083]) tensor([0.0557, 0.1825, 0.4755, 0.2863]) -Greedy action tensor([-1.5979, -0.4218, 0.5852, 0.1953]) tensor([0.0523, 0.1695, 0.4640, 0.3142]) -Greedy action tensor([-1.9210, -0.3078, 0.6382, -0.1671]) tensor([0.0405, 0.2030, 0.5228, 0.2337]) -Greedy action tensor([-1.6326, -0.1399, 0.4602, -0.0375]) tensor([0.0541, 0.2407, 0.4386, 0.2666]) -Greedy action tensor([-1.9246, -0.4287, 0.6595, -0.1738]) tensor([0.0409, 0.1824, 0.5415, 0.2353]) -Greedy action tensor([-0.8422, 0.4685, 0.0946, 0.0122]) tensor([0.1041, 0.3859, 0.2655, 0.2445]) -Greedy action tensor([-1.8355, -0.4242, 0.6176, -0.1208]) tensor([0.0449, 0.1841, 0.5217, 0.2493]) -Greedy action tensor([-1.2122, -0.6629, 1.1931, 1.2866]) tensor([0.0385, 0.0667, 0.4265, 0.4683]) -Greedy action tensor([-1.6521, -0.2285, 0.6106, 0.2047]) tensor([0.0472, 0.1962, 0.4540, 0.3025]) -Greedy action tensor([-1.9088, -0.3678, 0.6377, -0.1560]) tensor([0.0413, 0.1929, 0.5273, 0.2384]) -Greedy action tensor([-1.9376, -0.4531, 0.6661, -0.1751]) tensor([0.0404, 0.1783, 0.5459, 0.2354]) -Greedy action tensor([-1.8223, -0.3454, 0.5911, -0.1160]) tensor([0.0453, 0.1985, 0.5064, 0.2497]) -Greedy action tensor([-0.8361, 0.3112, 0.3193, 0.3324]) tensor([0.0949, 0.2988, 0.3012, 0.3052]) -Greedy action tensor([-1.9430, -0.4571, 0.6742, -0.1765]) tensor([0.0401, 0.1770, 0.5486, 0.2343]) -Greedy action tensor([-1.7658, -0.1431, 0.5474, -0.0892]) tensor([0.0465, 0.2354, 0.4696, 0.2485]) -Greedy action tensor([-1.6466, -0.4509, 0.5885, 0.0901]) tensor([0.0517, 0.1710, 0.4835, 0.2937]) -Greedy action tensor([-1.9077, -0.3942, 0.6391, -0.1581]) tensor([0.0416, 0.1888, 0.5306, 0.2391]) -Greedy action tensor([-1.6486, -0.2728, 0.5993, -0.0147]) tensor([0.0512, 0.2025, 0.4843, 0.2621]) -Greedy action tensor([-1.9245, -0.4587, 0.6657, -0.1625]) tensor([0.0408, 0.1769, 0.5445, 0.2378]) -Greedy action tensor([-1.7113, -0.5149, 0.5597, -0.1165]) tensor([0.0528, 0.1748, 0.5120, 0.2604]) -Greedy action tensor([-1.6709, -0.4100, 0.5918, 0.0904]) tensor([0.0501, 0.1768, 0.4815, 0.2916]) -Greedy action tensor([-1.9352, -0.4381, 0.6634, -0.1752]) tensor([0.0404, 0.1807, 0.5437, 0.2351]) -Greedy action tensor([-1.8140, -0.3013, 0.6376, -0.1043]) tensor([0.0441, 0.2002, 0.5119, 0.2438]) -Greedy action tensor([-1.4728, -0.4348, 0.5099, -0.2399]) tensor([0.0689, 0.1945, 0.5003, 0.2364]) -Greedy action tensor([-1.9280, -0.4383, 0.6593, -0.1698]) tensor([0.0408, 0.1808, 0.5419, 0.2365]) -Greedy action tensor([-1.9199, -0.4742, 0.7118, -0.1250]) tensor([0.0397, 0.1687, 0.5523, 0.2392]) -Greedy action tensor([-1.8564, -0.4085, 0.6293, -0.1119]) tensor([0.0435, 0.1851, 0.5225, 0.2490]) -Greedy action tensor([-1.8789, -0.4398, 0.6707, -0.1059]) tensor([0.0418, 0.1764, 0.5355, 0.2463]) -Greedy action tensor([-1.8796, -0.4593, 0.6352, -0.1479]) tensor([0.0432, 0.1787, 0.5340, 0.2441]) -Greedy action tensor([-1.2454, -0.7151, 0.5774, 0.2416]) tensor([0.0751, 0.1277, 0.4649, 0.3323]) -Greedy action tensor([-1.9014, -0.4205, 0.6424, -0.1617]) tensor([0.0420, 0.1846, 0.5343, 0.2391]) -Greedy action tensor([-1.8974, -0.4333, 0.6359, -0.1568]) tensor([0.0423, 0.1831, 0.5332, 0.2414]) -Greedy action tensor([-1.7227, -0.5302, 0.5464, -0.0728]) tensor([0.0522, 0.1719, 0.5044, 0.2715]) -Greedy action tensor([-0.4369, -0.3585, -0.0407, 0.8252]) tensor([0.1408, 0.1523, 0.2093, 0.4975]) -Greedy action tensor([-0.5083, -1.2437, -0.5179, -0.4461]) tensor([0.2830, 0.1356, 0.2803, 0.3011]) -Greedy action tensor([ 0.2725, -0.0098, -0.0157, 0.9938]) tensor([0.2193, 0.1653, 0.1644, 0.4510]) -Greedy action tensor([ 1.1909, -0.4833, 1.4101, 0.6679]) tensor([0.3305, 0.0620, 0.4116, 0.1959]) -Greedy action tensor([ 1.1974, -0.4392, 0.3533, 0.1016]) tensor([0.5105, 0.0994, 0.2195, 0.1706]) -Greedy action tensor([ 0.0812, -1.3870, 0.0135, 0.3745]) tensor([0.2853, 0.0657, 0.2666, 0.3825]) -Greedy action tensor([ 0.8286, 1.2654, 0.6133, -0.4344]) tensor([0.2750, 0.4256, 0.2217, 0.0778]) -Greedy action tensor([ 0.5974, -0.9811, 0.2407, -0.8215]) tensor([0.4655, 0.0960, 0.3258, 0.1126]) -Greedy action tensor([ 0.1252, -0.3960, 0.1577, -0.3043]) tensor([0.3051, 0.1812, 0.3152, 0.1986]) -Greedy action tensor([-0.7717, 0.3339, 0.1272, -0.4849]) tensor([0.1280, 0.3868, 0.3146, 0.1706]) -Greedy action tensor([ 0.3964, -0.1475, 0.1116, 0.2446]) tensor([0.3133, 0.1819, 0.2357, 0.2692]) -Greedy action tensor([-0.1263, -2.4409, 0.1782, 0.0359]) tensor([0.2754, 0.0272, 0.3734, 0.3239]) -Greedy action tensor([ 0.0781, -0.1912, 0.9501, 0.0049]) tensor([0.1967, 0.1502, 0.4703, 0.1828]) -Greedy action tensor([ 0.9507, -0.9777, 1.1806, 0.1717]) tensor([0.3493, 0.0508, 0.4396, 0.1603]) -Greedy action tensor([ 1.3850, -0.4541, 0.3647, 0.0172]) tensor([0.5637, 0.0896, 0.2032, 0.1435]) -Greedy action tensor([ 1.3528, -1.1031, 0.5127, 0.4565]) tensor([0.5193, 0.0446, 0.2242, 0.2119]) -Greedy action tensor([ 0.6259, -1.2752, 0.2526, -0.3379]) tensor([0.4506, 0.0673, 0.3102, 0.1719]) -Greedy action tensor([ 0.0213, -0.8711, -0.1113, -0.2481]) tensor([0.3280, 0.1343, 0.2872, 0.2505]) -Greedy action tensor([-0.4516, 0.3363, 0.5869, -0.1905]) tensor([0.1366, 0.3003, 0.3858, 0.1773]) -Greedy action tensor([1.5760, 0.5556, 0.4954, 0.9140]) tensor([0.4513, 0.1627, 0.1532, 0.2328]) -Greedy action tensor([-0.7541, -0.6790, 1.0309, -0.7525]) tensor([0.1106, 0.1193, 0.6593, 0.1108]) -Greedy action tensor([-0.6293, -0.7429, -0.9753, -0.3080]) tensor([0.2513, 0.2243, 0.1778, 0.3465]) -Greedy action tensor([ 0.6440, 0.3847, 1.1193, -0.6822]) tensor([0.2743, 0.2117, 0.4412, 0.0728]) -Greedy action tensor([ 0.9672, -0.4134, 0.5435, -0.2395]) tensor([0.4535, 0.1140, 0.2968, 0.1357]) -Greedy action tensor([ 0.4702, -1.3986, 1.1994, -0.3109]) tensor([0.2713, 0.0419, 0.5626, 0.1242]) -Greedy action tensor([ 1.7822, -1.2634, 0.0776, 1.0691]) tensor([0.5816, 0.0277, 0.1057, 0.2850]) -Greedy action tensor([-0.4333, -0.1687, 0.6071, -0.6290]) tensor([0.1679, 0.2188, 0.4753, 0.1381]) -Greedy action tensor([ 0.9989, -0.8260, -0.3582, 0.8189]) tensor([0.4437, 0.0715, 0.1142, 0.3706]) -Greedy action tensor([ 0.0740, 0.3803, 0.6114, -0.4279]) tensor([0.2139, 0.2906, 0.3661, 0.1295]) -Greedy action tensor([ 0.2479, -0.2758, -0.9565, -0.1760]) tensor([0.3927, 0.2326, 0.1177, 0.2570]) -Greedy action tensor([ 0.6065, -1.4989, 0.8387, -0.3996]) tensor([0.3638, 0.0443, 0.4589, 0.1330]) -Greedy action tensor([ 0.8986, -0.9463, -0.0744, 0.4044]) tensor([0.4660, 0.0736, 0.1761, 0.2843]) -Greedy action tensor([-0.1177, 0.1561, 0.3645, 0.7073]) tensor([0.1609, 0.2115, 0.2605, 0.3671]) -Greedy action tensor([-0.3514, -2.9241, 0.0833, 0.5059]) tensor([0.2009, 0.0153, 0.3103, 0.4735]) -Greedy action tensor([-0.5201, 0.0298, -0.8156, 0.8049]) tensor([0.1381, 0.2394, 0.1028, 0.5197]) -Greedy action tensor([-0.0956, -0.4968, 2.6886, -1.0173]) tensor([0.0548, 0.0367, 0.8867, 0.0218]) -Greedy action tensor([-0.0352, -0.6511, -1.0812, 0.1168]) tensor([0.3273, 0.1768, 0.1150, 0.3810]) -Greedy action tensor([ 0.6628, 0.0576, 0.9006, -0.3230]) tensor([0.3137, 0.1713, 0.3979, 0.1171]) -Greedy action tensor([-0.7687, -0.1335, 0.1950, -0.6441]) tensor([0.1506, 0.2842, 0.3947, 0.1705]) -Greedy action tensor([ 1.0503, -0.5845, -0.7383, 0.2691]) tensor([0.5494, 0.1071, 0.0919, 0.2516]) -Greedy action tensor([-0.6715, -0.8883, 0.9117, 0.6990]) tensor([0.0942, 0.0759, 0.4589, 0.3710]) -Greedy action tensor([ 1.4641, -0.0724, 0.5280, -0.0335]) tensor([0.5461, 0.1175, 0.2142, 0.1222]) -Greedy action tensor([ 0.7392, -1.0940, 0.5204, -0.1048]) tensor([0.4178, 0.0668, 0.3357, 0.1797]) -Greedy action tensor([ 0.5770, -0.8133, -0.3475, 0.2384]) tensor([0.4240, 0.1056, 0.1682, 0.3022]) -Greedy action tensor([ 0.0986, -1.7180, -0.5725, 0.7173]) tensor([0.2833, 0.0461, 0.1448, 0.5259]) -Greedy action tensor([-0.8723, -1.2635, 0.0746, -0.2748]) tensor([0.1647, 0.1114, 0.4246, 0.2994]) -Greedy action tensor([ 0.1366, -1.2654, 0.1235, -0.1236]) tensor([0.3329, 0.0819, 0.3285, 0.2566]) -Greedy action tensor([ 0.4642, 0.5071, -0.8909, -0.0204]) tensor([0.3427, 0.3578, 0.0884, 0.2111]) -Greedy action tensor([ 0.2384, -1.6557, -0.4247, 1.1572]) tensor([0.2397, 0.0361, 0.1235, 0.6008]) -Greedy action tensor([-0.1251, 0.1557, -0.2181, -0.7219]) tensor([0.2641, 0.3498, 0.2407, 0.1454]) -Greedy action tensor([-1.1732, 0.3358, -1.4931, 0.7495]) tensor([0.0764, 0.3455, 0.0555, 0.5226]) -Greedy action tensor([ 1.7431, -0.2134, -0.6774, 0.3952]) tensor([0.6711, 0.0949, 0.0596, 0.1743]) -Greedy action tensor([-0.2223, 0.3457, 0.7273, 0.3959]) tensor([0.1388, 0.2449, 0.3587, 0.2575]) -Greedy action tensor([-1.6523, -0.1911, 0.6436, -0.1632]) tensor([0.0508, 0.2191, 0.5048, 0.2253]) -Greedy action tensor([ 0.4112, 0.3617, 0.1558, -0.4852]) tensor([0.3190, 0.3036, 0.2471, 0.1302]) -Greedy action tensor([ 0.5900, -0.2967, -0.2446, 0.2661]) tensor([0.3892, 0.1604, 0.1689, 0.2815]) -Greedy action tensor([ 1.2763, -0.7834, 0.0193, 1.0974]) tensor([0.4448, 0.0567, 0.1266, 0.3719]) -Greedy action tensor([-0.8384, -2.0877, 0.4996, 0.6001]) tensor([0.1074, 0.0308, 0.4093, 0.4525]) -Greedy action tensor([-0.0583, 0.5091, 0.2182, 0.4265]) tensor([0.1753, 0.3091, 0.2311, 0.2846]) -Greedy action tensor([ 1.4271, -0.2523, 0.1940, 0.4597]) tensor([0.5382, 0.1004, 0.1568, 0.2046]) -Greedy action tensor([ 1.0711, -0.1712, -0.9483, -0.1279]) tensor([0.5804, 0.1676, 0.0770, 0.1750]) -Greedy action tensor([ 0.7991, 0.6303, 0.4127, -0.2099]) tensor([0.3462, 0.2924, 0.2352, 0.1262]) -Greedy action tensor([-0.2002, -0.3947, -0.6062, 0.2166]) tensor([0.2496, 0.2055, 0.1663, 0.3787]) -Greedy action tensor([-0.9650, 0.6975, -0.4364, -0.7395]) tensor([0.1084, 0.5717, 0.1840, 0.1359]) -Greedy action tensor([ 0.8450, -0.1970, -0.2536, -0.8714]) tensor([0.5360, 0.1891, 0.1787, 0.0963]) -Greedy action tensor([ 0.3258, -1.1614, -0.0592, 0.6976]) tensor([0.2979, 0.0673, 0.2027, 0.4321]) -Greedy action tensor([ 0.1386, -0.4418, 0.2070, -0.0539]) tensor([0.2894, 0.1620, 0.3099, 0.2387]) -Greedy action tensor([ 1.5346, -0.0819, -0.0256, 1.1129]) tensor([0.4843, 0.0962, 0.1018, 0.3177]) -Greedy action tensor([-0.0547, -0.3594, -0.3837, -0.3972]) tensor([0.3158, 0.2328, 0.2272, 0.2242]) -Greedy action tensor([ 1.3367, -0.2353, 1.0021, 0.3183]) tensor([0.4378, 0.0909, 0.3132, 0.1581]) -Greedy action tensor([ 0.1606, -2.2537, 0.1509, 0.1967]) tensor([0.3209, 0.0287, 0.3178, 0.3327]) -Greedy action tensor([-0.1460, 0.5813, 0.6907, -0.4062]) tensor([0.1626, 0.3366, 0.3755, 0.1254]) -Greedy action tensor([ 0.2227, -0.2882, -0.0280, -0.5352]) tensor([0.3513, 0.2107, 0.2734, 0.1646]) -Greedy action tensor([-0.5674, -1.2301, -0.0895, 0.9533]) tensor([0.1298, 0.0669, 0.2093, 0.5939]) -Greedy action tensor([ 0.0162, -0.0927, 0.0895, -1.1392]) tensor([0.3041, 0.2728, 0.3273, 0.0958]) -Greedy action tensor([ 0.3564, -1.5098, 0.1766, -0.2401]) tensor([0.3936, 0.0609, 0.3288, 0.2167]) -Greedy action tensor([-0.1871, 0.3745, 1.0816, -0.0642]) tensor([0.1344, 0.2357, 0.4780, 0.1520]) -Greedy action tensor([ 0.0329, -0.4658, 0.6801, 0.5722]) tensor([0.1911, 0.1161, 0.3651, 0.3277]) -Greedy action tensor([ 0.0873, -1.2543, -0.3423, 0.2474]) tensor([0.3241, 0.0847, 0.2109, 0.3803]) -Greedy action tensor([-0.3612, -0.4248, 0.0352, -0.4540]) tensor([0.2306, 0.2164, 0.3428, 0.2102]) -Greedy action tensor([ 0.6658, -1.0692, -0.3682, 0.5281]) tensor([0.4161, 0.0734, 0.1480, 0.3626]) -Greedy action tensor([0.2803, 0.5875, 0.3133, 0.4887]) tensor([0.2162, 0.2940, 0.2235, 0.2663]) -Greedy action tensor([ 1.5331, -0.3419, -0.8232, 0.0855]) tensor([0.6742, 0.1034, 0.0639, 0.1585]) -Greedy action tensor([ 1.5678, -0.3761, -0.3721, 0.3221]) tensor([0.6351, 0.0909, 0.0913, 0.1827]) -Greedy action tensor([ 1.7353, -0.8043, -0.2129, 0.5310]) tensor([0.6573, 0.0519, 0.0937, 0.1971]) -Greedy action tensor([ 1.2860, 0.0372, -0.4997, 0.4073]) tensor([0.5348, 0.1534, 0.0897, 0.2221]) -Greedy action tensor([ 1.4501, -0.7847, -0.7136, 0.3739]) tensor([0.6399, 0.0685, 0.0735, 0.2181]) -Greedy action tensor([ 1.1058, -0.4837, -0.1380, 0.0397]) tensor([0.5445, 0.1111, 0.1570, 0.1875]) -Greedy action tensor([ 1.2849, -0.2091, -0.4529, -0.1554]) tensor([0.6108, 0.1371, 0.1074, 0.1447]) -Greedy action tensor([ 1.9491, -0.7413, -0.3670, 0.2537]) tensor([0.7407, 0.0503, 0.0731, 0.1359]) -Greedy action tensor([ 1.5714, -0.4370, -0.8461, 0.3507]) tensor([0.6586, 0.0884, 0.0587, 0.1943]) -Greedy action tensor([ 1.2550, -0.5352, -0.1709, 0.2100]) tensor([0.5685, 0.0949, 0.1366, 0.2000]) -Greedy action tensor([ 1.1861, -0.8203, -0.3672, 0.9568]) tensor([0.4671, 0.0628, 0.0988, 0.3713]) -Greedy action tensor([ 1.5033, -0.6522, -0.2175, 0.4220]) tensor([0.6120, 0.0709, 0.1095, 0.2076]) -Greedy action tensor([ 1.6520, -0.1942, -0.4560, 0.3211]) tensor([0.6479, 0.1023, 0.0787, 0.1712]) -Greedy action tensor([ 1.2382, -0.1466, -0.4717, 0.4199]) tensor([0.5341, 0.1337, 0.0966, 0.2356]) -Greedy action tensor([ 0.3559, -0.2829, 0.1049, -0.0313]) tensor([0.3350, 0.1769, 0.2606, 0.2275]) -Greedy action tensor([ 1.7243, 0.4452, -0.2188, 0.3260]) tensor([0.5993, 0.1668, 0.0859, 0.1480]) -Greedy action tensor([ 1.7236, -0.7713, -0.1822, 0.0916]) tensor([0.7009, 0.0578, 0.1042, 0.1370]) -Greedy action tensor([ 1.6627, 0.3167, -0.3542, 0.2362]) tensor([0.6122, 0.1593, 0.0815, 0.1470]) -Greedy action tensor([ 1.2157, -0.3432, -0.6333, 0.3538]) tensor([0.5586, 0.1175, 0.0879, 0.2359]) -Greedy action tensor([ 1.4085, -0.3361, -0.4441, 0.4901]) tensor([0.5778, 0.1010, 0.0906, 0.2306]) -Greedy action tensor([ 1.8793, -0.7908, -0.3546, 0.5984]) tensor([0.6877, 0.0476, 0.0737, 0.1910]) -Greedy action tensor([ 1.1902, -0.5530, 0.0795, 0.2176]) tensor([0.5312, 0.0929, 0.1750, 0.2009]) -Greedy action tensor([0.6513, 0.0306, 0.1130, 0.1637]) tensor([0.3656, 0.1965, 0.2134, 0.2245]) -Greedy action tensor([ 1.0536, -0.3408, -0.0096, 0.4776]) tensor([0.4639, 0.1150, 0.1602, 0.2608]) -Greedy action tensor([ 0.8848, -0.3388, 0.0272, 0.2700]) tensor([0.4427, 0.1302, 0.1878, 0.2394]) -Greedy action tensor([ 0.8884, -0.5669, -0.3081, 0.2188]) tensor([0.4884, 0.1140, 0.1476, 0.2500]) -Greedy action tensor([ 1.6667, -0.7511, -0.1267, 0.2593]) tensor([0.6665, 0.0594, 0.1109, 0.1632]) -Greedy action tensor([ 1.7806, -0.6676, -0.1746, 0.5713]) tensor([0.6551, 0.0566, 0.0927, 0.1955]) -Greedy action tensor([ 1.8259, -0.5981, -0.5557, 0.3344]) tensor([0.7112, 0.0630, 0.0657, 0.1601]) -Greedy action tensor([ 1.2160, -0.7153, -0.0920, 0.3229]) tensor([0.5480, 0.0794, 0.1482, 0.2244]) -Greedy action tensor([ 1.6276, -0.6608, -1.1069, 0.1261]) tensor([0.7199, 0.0730, 0.0467, 0.1604]) -Greedy action tensor([ 1.4085, -0.3665, -0.3403, 0.0700]) tensor([0.6228, 0.1056, 0.1084, 0.1633]) -Greedy action tensor([ 1.7864, -0.2532, -0.2154, 0.5814]) tensor([0.6390, 0.0831, 0.0863, 0.1915]) -Greedy action tensor([ 1.9353, -0.3243, -0.4529, 0.0781]) tensor([0.7395, 0.0772, 0.0679, 0.1154]) -Greedy action tensor([ 2.6606, -1.2657, -0.3633, 0.8872]) tensor([0.8077, 0.0159, 0.0393, 0.1371]) -Greedy action tensor([ 1.4893, -0.3131, -0.3990, 0.4616]) tensor([0.5974, 0.0985, 0.0904, 0.2137]) -Greedy action tensor([ 0.9890, -0.7394, -0.0601, 0.3983]) tensor([0.4804, 0.0853, 0.1682, 0.2661]) -Greedy action tensor([ 1.4998, -0.3192, -0.3008, 0.4280]) tensor([0.5989, 0.0971, 0.0989, 0.2051]) -Greedy action tensor([ 1.2396, -0.3691, -0.8039, 0.5018]) tensor([0.5531, 0.1107, 0.0717, 0.2645]) -Greedy action tensor([ 2.0581, -0.6466, -0.5545, 0.4297]) tensor([0.7482, 0.0501, 0.0549, 0.1468]) -Greedy action tensor([ 0.7323, -0.4634, -0.2559, 0.5153]) tensor([0.4033, 0.1220, 0.1501, 0.3246]) -Greedy action tensor([ 0.3221, -0.1546, -0.3517, 0.4588]) tensor([0.3051, 0.1895, 0.1556, 0.3499]) -Greedy action tensor([ 1.5154, -0.5626, -0.4347, 0.2588]) tensor([0.6443, 0.0807, 0.0917, 0.1834]) -Greedy action tensor([ 0.5823, -0.1385, -0.0233, -0.1020]) tensor([0.3942, 0.1917, 0.2152, 0.1989]) -Greedy action tensor([ 2.4380, -0.3869, 0.1857, 0.1839]) tensor([0.7877, 0.0467, 0.0828, 0.0827]) -Greedy action tensor([ 1.2904, -0.7046, -0.3855, 0.3482]) tensor([0.5838, 0.0794, 0.1093, 0.2275]) -Greedy action tensor([ 2.1965, -0.9145, -0.2734, 0.3382]) tensor([0.7782, 0.0347, 0.0658, 0.1213]) -Greedy action tensor([ 1.2695, -0.6915, -0.0246, 0.3377]) tensor([0.5529, 0.0778, 0.1516, 0.2178]) -Greedy action tensor([ 2.2716, -1.3335, -0.3191, 0.6832]) tensor([0.7655, 0.0208, 0.0574, 0.1564]) -Greedy action tensor([ 1.5094, -0.3778, -0.4246, 0.2901]) tensor([0.6283, 0.0952, 0.0908, 0.1856]) -Greedy action tensor([ 1.4340, -0.7913, -0.0323, 0.0817]) tensor([0.6260, 0.0676, 0.1445, 0.1619]) -Greedy action tensor([ 2.2141, -0.8754, -0.3008, 0.7099]) tensor([0.7415, 0.0338, 0.0600, 0.1648]) -Greedy action tensor([ 1.0535, -0.4892, -0.6905, 0.5858]) tensor([0.4963, 0.1061, 0.0868, 0.3109]) -Greedy action tensor([ 0.8203, -0.0993, -0.5939, 0.6534]) tensor([0.4019, 0.1602, 0.0977, 0.3401]) -Greedy action tensor([ 1.3165, -0.3746, -0.3743, 0.5934]) tensor([0.5394, 0.0994, 0.0995, 0.2617]) -Greedy action tensor([ 1.3372, 0.3287, 0.0753, -0.4317]) tensor([0.5499, 0.2006, 0.1557, 0.0938]) -Greedy action tensor([ 1.2081, -0.6482, -0.4406, 0.5632]) tensor([0.5338, 0.0834, 0.1027, 0.2801]) -Greedy action tensor([ 1.5189, -0.8914, -0.0483, 0.4428]) tensor([0.6100, 0.0548, 0.1273, 0.2080]) -Greedy action tensor([ 1.3663, 0.5350, -0.0899, 0.1821]) tensor([0.5064, 0.2206, 0.1181, 0.1550]) -Greedy action tensor([ 0.8647, -0.1225, -0.1264, 0.1975]) tensor([0.4431, 0.1651, 0.1645, 0.2274]) -Greedy action tensor([ 1.4233, 0.0923, -0.2402, 0.4146]) tensor([0.5499, 0.1453, 0.1042, 0.2006]) -Greedy action tensor([ 2.0867, -0.7916, -0.2647, 0.9727]) tensor([0.6758, 0.0380, 0.0644, 0.2218]) -Greedy action tensor([ 0.7913, -0.5055, -0.0299, 0.0779]) tensor([0.4539, 0.1241, 0.1997, 0.2224]) -Greedy action tensor([ 1.7856, -1.3238, -0.2149, 0.3101]) tensor([0.7099, 0.0317, 0.0960, 0.1623]) -Greedy action tensor([ 1.0222, -0.2763, -0.0482, -0.0884]) tensor([0.5141, 0.1403, 0.1763, 0.1693]) -Greedy action tensor([ 1.1881, -0.1480, -0.6152, 0.2459]) tensor([0.5503, 0.1446, 0.0907, 0.2145]) -Greedy action tensor([ 1.3866, -0.6428, -0.5186, 0.3990]) tensor([0.6051, 0.0795, 0.0900, 0.2254]) -Greedy action tensor([ 2.1695, 0.5533, 0.0367, -0.1313]) tensor([0.7056, 0.1402, 0.0836, 0.0707]) -Greedy action tensor([ 1.5513e+00, -6.4969e-06, -2.8918e-01, 1.9814e-01]) tensor([0.6138, 0.1301, 0.0974, 0.1586]) -Greedy action tensor([ 1.9074, 0.4061, 0.0125, -0.0610]) tensor([0.6610, 0.1473, 0.0994, 0.0923]) -Greedy action tensor([ 2.0286, -1.0192, -0.2440, 0.3509]) tensor([0.7478, 0.0355, 0.0771, 0.1397]) -Greedy action tensor([ 1.9990, -0.6111, -0.1936, 0.1312]) tensor([0.7465, 0.0549, 0.0833, 0.1153]) -Greedy action tensor([ 1.9663, -0.1319, -0.2161, 0.6052]) tensor([0.6703, 0.0822, 0.0756, 0.1719]) -Greedy action tensor([ 1.0337, 0.2769, 0.1569, -0.0406]) tensor([0.4491, 0.2107, 0.1869, 0.1534]) -Greedy action tensor([ 2.0965, 0.3707, -0.1362, 0.2415]) tensor([0.6936, 0.1235, 0.0744, 0.1085]) -Greedy action tensor([ 1.3608, -0.4701, -0.2278, -0.1535]) tensor([0.6311, 0.1012, 0.1289, 0.1388]) -Greedy action tensor([ 1.5440, -0.6933, -0.1694, 0.4058]) tensor([0.6221, 0.0664, 0.1121, 0.1993]) -Greedy action tensor([ 1.3406, -0.5893, -0.3814, 0.0324]) tensor([0.6273, 0.0911, 0.1121, 0.1696]) -Greedy action tensor([ 1.5524, -0.6526, 0.0468, 0.0410]) tensor([0.6440, 0.0710, 0.1429, 0.1421]) -Greedy action tensor([ 2.1743, -1.2865, -0.1019, 0.5955]) tensor([0.7461, 0.0234, 0.0766, 0.1539]) -Greedy action tensor([ 1.2807, -0.0374, -0.4565, 0.5238]) tensor([0.5228, 0.1399, 0.0920, 0.2453]) -Greedy action tensor([ 0.8050, 0.1876, -0.0081, -0.3457]) tensor([0.4349, 0.2346, 0.1929, 0.1376]) -Greedy action tensor([ 0.7517, -0.6274, 0.0399, -0.3545]) tensor([0.4823, 0.1214, 0.2367, 0.1595]) -Greedy action tensor([ 0.8226, -0.6170, -0.1055, -0.3625]) tensor([0.5160, 0.1223, 0.2040, 0.1577]) -Greedy action tensor([ 0.8065, -0.6460, -0.0550, -0.3521]) tensor([0.5075, 0.1187, 0.2144, 0.1593]) -Greedy action tensor([ 0.6210, -0.2936, 0.1493, -0.2894]) tensor([0.4120, 0.1651, 0.2571, 0.1658]) -Greedy action tensor([ 0.4279, 0.0447, -0.0708, 0.0098]) tensor([0.3393, 0.2313, 0.2061, 0.2234]) -Greedy action tensor([ 0.7921, -0.4547, -0.0075, -0.4442]) tensor([0.4933, 0.1418, 0.2217, 0.1433]) -Greedy action tensor([ 0.6945, -0.0702, -0.0148, -0.3643]) tensor([0.4340, 0.2020, 0.2135, 0.1505]) -Greedy action tensor([ 0.5642, 0.1429, -0.1807, -0.2014]) tensor([0.3852, 0.2528, 0.1829, 0.1791]) -Greedy action tensor([ 0.2877, 0.0625, -0.0503, -0.0735]) tensor([0.3117, 0.2488, 0.2223, 0.2172]) -Greedy action tensor([ 0.0561, 0.5553, -0.0639, -0.1449]) tensor([0.2298, 0.3785, 0.2038, 0.1879]) -Greedy action tensor([ 0.8457, -0.5843, 0.0250, -0.3778]) tensor([0.5067, 0.1213, 0.2230, 0.1491]) -Greedy action tensor([ 0.7299, -0.2738, 0.0964, -0.1609]) tensor([0.4333, 0.1588, 0.2300, 0.1778]) -Greedy action tensor([ 0.7269, -0.3223, -0.1074, -0.1904]) tensor([0.4579, 0.1604, 0.1988, 0.1830]) -Greedy action tensor([ 0.3782, -0.5991, 0.0707, -0.2703]) tensor([0.3796, 0.1429, 0.2791, 0.1985]) -Greedy action tensor([ 0.4370, -0.1471, -0.0090, -0.1330]) tensor([0.3619, 0.2018, 0.2317, 0.2047]) -Greedy action tensor([ 0.6479, -0.3305, -0.0727, -0.1617]) tensor([0.4334, 0.1629, 0.2108, 0.1929]) -Greedy action tensor([ 1.0086, -0.5589, -0.5218, -0.4702]) tensor([0.6050, 0.1262, 0.1309, 0.1379]) -Greedy action tensor([ 0.9652, -0.5850, 0.0019, -0.2944]) tensor([0.5326, 0.1130, 0.2033, 0.1511]) -Greedy action tensor([ 0.7853, -0.4821, 0.0817, -0.1779]) tensor([0.4634, 0.1305, 0.2293, 0.1769]) -Greedy action tensor([ 0.9207, -0.7703, -0.0088, -0.3863]) tensor([0.5406, 0.0997, 0.2134, 0.1463]) -Greedy action tensor([ 1.0518, -0.4893, -0.1421, -0.4071]) tensor([0.5715, 0.1224, 0.1732, 0.1329]) -Greedy action tensor([ 0.6200, -0.5197, -0.1060, -0.2368]) tensor([0.4488, 0.1436, 0.2171, 0.1905]) -Greedy action tensor([ 0.6931, -0.5774, 0.0122, -0.2957]) tensor([0.4632, 0.1300, 0.2345, 0.1723]) -Greedy action tensor([ 5.5090e-01, -1.9348e-04, -1.0341e-01, -6.1628e-01]) tensor([0.4154, 0.2394, 0.2159, 0.1293]) -Greedy action tensor([ 0.3688, -0.3819, -0.0848, -0.1522]) tensor([0.3702, 0.1747, 0.2352, 0.2199]) -Greedy action tensor([ 0.7713, -0.6635, 0.1843, -0.5750]) tensor([0.4868, 0.1159, 0.2706, 0.1267]) -Greedy action tensor([ 1.0105, -0.9135, 0.0202, -0.4952]) tensor([0.5749, 0.0840, 0.2136, 0.1276]) -Greedy action tensor([ 0.8078, -0.5529, 0.0679, -0.6181]) tensor([0.5066, 0.1299, 0.2417, 0.1217]) -Greedy action tensor([ 0.4209, -0.2003, -0.0097, -0.5441]) tensor([0.3894, 0.2092, 0.2531, 0.1483]) -Greedy action tensor([ 0.8568, -0.2922, -0.0755, -0.5833]) tensor([0.5135, 0.1627, 0.2021, 0.1217]) -Greedy action tensor([ 0.4351, -0.3059, -0.0512, -0.2161]) tensor([0.3827, 0.1824, 0.2353, 0.1995]) -Greedy action tensor([ 0.8003, -0.6793, -0.0930, -0.4328]) tensor([0.5186, 0.1181, 0.2122, 0.1511]) -Greedy action tensor([ 0.7026, -0.2107, -0.0659, -0.1972]) tensor([0.4402, 0.1766, 0.2041, 0.1790]) -Greedy action tensor([ 0.6793, -0.3930, -0.1075, -0.2434]) tensor([0.4556, 0.1559, 0.2074, 0.1811]) -Greedy action tensor([ 0.7664, -0.8187, -0.1010, -0.3201]) tensor([0.5096, 0.1044, 0.2140, 0.1719]) -Greedy action tensor([ 0.2355, -0.2745, -0.1471, -0.0259]) tensor([0.3276, 0.1967, 0.2234, 0.2522]) -Greedy action tensor([ 0.4759, -0.4322, 0.0643, -0.6502]) tensor([0.4184, 0.1687, 0.2772, 0.1357]) -Greedy action tensor([ 0.3448, -0.1953, 0.0395, -0.4140]) tensor([0.3587, 0.2090, 0.2643, 0.1680]) -Greedy action tensor([ 0.6933, -0.4795, -0.0523, -0.1925]) tensor([0.4553, 0.1409, 0.2160, 0.1878]) -Greedy action tensor([ 1.2193, -0.6874, -0.1065, -0.3623]) tensor([0.6173, 0.0917, 0.1640, 0.1270]) -Greedy action tensor([ 0.7028, -0.7218, -0.0238, -0.3012]) tensor([0.4783, 0.1151, 0.2313, 0.1753]) -Greedy action tensor([ 1.0985, -0.5600, 0.1521, -0.3967]) tensor([0.5547, 0.1056, 0.2153, 0.1244]) -Greedy action tensor([ 0.7775, -0.5051, 0.0437, -0.2697]) tensor([0.4743, 0.1315, 0.2277, 0.1664]) -Greedy action tensor([ 0.7566, -0.5535, -0.1282, -0.4020]) tensor([0.5009, 0.1351, 0.2068, 0.1572]) -Greedy action tensor([ 1.2063, -0.7736, 0.0782, -0.6437]) tensor([0.6177, 0.0853, 0.1999, 0.0971]) -Greedy action tensor([ 0.2492, 0.0386, -0.0429, -0.0529]) tensor([0.3034, 0.2458, 0.2265, 0.2243]) -Greedy action tensor([ 0.4912, 0.0108, -0.0427, -0.0479]) tensor([0.3587, 0.2219, 0.2103, 0.2092]) -Greedy action tensor([ 1.4043, -0.8698, -0.0688, -0.4292]) tensor([0.6703, 0.0690, 0.1536, 0.1071]) -Greedy action tensor([ 0.6174, -0.3891, -0.0944, -0.1074]) tensor([0.4272, 0.1562, 0.2097, 0.2069]) -Greedy action tensor([ 0.3864, -0.3448, 0.1790, -0.6350]) tensor([0.3768, 0.1814, 0.3062, 0.1357]) -Greedy action tensor([ 0.6122, -0.5735, 0.0756, -0.5095]) tensor([0.4513, 0.1379, 0.2639, 0.1470]) -Greedy action tensor([ 0.6650, -0.3105, 0.1162, -0.2525]) tensor([0.4248, 0.1601, 0.2454, 0.1697]) -Greedy action tensor([ 0.5899, -0.4202, 0.0347, -0.1529]) tensor([0.4143, 0.1509, 0.2378, 0.1971]) -Greedy action tensor([ 0.7649, -0.4207, -0.0164, -0.3305]) tensor([0.4767, 0.1457, 0.2182, 0.1594]) -Greedy action tensor([ 0.6222, -0.5054, -0.0258, -0.4287]) tensor([0.4553, 0.1474, 0.2381, 0.1592]) -Greedy action tensor([ 0.5074, -0.3288, -0.1156, -0.0970]) tensor([0.3974, 0.1722, 0.2132, 0.2172]) -Greedy action tensor([ 0.3245, -0.3686, -0.2344, -0.0282]) tensor([0.3604, 0.1802, 0.2061, 0.2533]) -Greedy action tensor([ 0.7006, -0.2002, -0.0905, -0.0977]) tensor([0.4330, 0.1759, 0.1963, 0.1949]) -Greedy action tensor([ 0.5548, -0.3888, -0.1401, -0.3015]) tensor([0.4323, 0.1683, 0.2158, 0.1836]) -Greedy action tensor([ 0.7951, -0.7159, 0.0354, -0.3644]) tensor([0.4995, 0.1102, 0.2336, 0.1567]) -Greedy action tensor([ 0.5267, -0.1700, -0.1239, -0.0104]) tensor([0.3840, 0.1913, 0.2003, 0.2244]) -Greedy action tensor([ 0.7288, -0.4841, 0.1403, -0.3022]) tensor([0.4527, 0.1346, 0.2513, 0.1614]) -Greedy action tensor([ 0.4999, -0.5034, 0.0197, -0.3377]) tensor([0.4136, 0.1516, 0.2558, 0.1790]) -Greedy action tensor([ 0.2777, 0.0657, -0.1211, -0.3811]) tensor([0.3336, 0.2699, 0.2239, 0.1726]) -Greedy action tensor([ 0.6362, 0.0459, -0.0980, 0.0422]) tensor([0.3867, 0.2143, 0.1856, 0.2135]) -Greedy action tensor([ 0.8251, -0.2243, -0.1490, -0.1508]) tensor([0.4752, 0.1664, 0.1794, 0.1791]) -Greedy action tensor([ 0.9500, -0.4990, -0.0086, -0.6090]) tensor([0.5469, 0.1284, 0.2097, 0.1150]) -Greedy action tensor([ 0.2730, -0.1002, -0.1699, -0.1280]) tensor([0.3333, 0.2295, 0.2140, 0.2232]) -Greedy action tensor([ 0.6341, -0.3393, -0.0715, -0.0780]) tensor([0.4233, 0.1599, 0.2091, 0.2077]) -Greedy action tensor([ 0.6421, -0.5350, -0.0666, -0.2691]) tensor([0.4540, 0.1399, 0.2235, 0.1825]) -Greedy action tensor([ 0.7249, -0.3329, -0.0206, 0.0020]) tensor([0.4335, 0.1505, 0.2057, 0.2104]) -Greedy action tensor([ 0.7230, -0.3555, -0.0829, -0.1181]) tensor([0.4508, 0.1533, 0.2014, 0.1944]) -Greedy action tensor([ 0.5840, -0.3650, -0.0558, -0.2695]) tensor([0.4273, 0.1654, 0.2253, 0.1820]) -Greedy action tensor([ 0.8094, -0.3790, -0.0655, -0.3005]) tensor([0.4875, 0.1485, 0.2033, 0.1607]) -Greedy action tensor([ 1.2489, -0.5800, -0.2374, -0.9730]) tensor([0.6688, 0.1074, 0.1513, 0.0725]) -Greedy action tensor([ 0.3823, -0.3610, 0.0050, -0.4634]) tensor([0.3860, 0.1836, 0.2647, 0.1657]) -Greedy action tensor([ 0.2110, 0.2414, -0.0847, -0.4303]) tensor([0.3029, 0.3123, 0.2254, 0.1595]) -Greedy action tensor([ 0.9682, -0.7048, -0.0292, -0.4718]) tensor([0.5576, 0.1047, 0.2057, 0.1321]) -Greedy action tensor([ 0.7169, -0.4570, -0.1447, -0.6219]) tensor([0.5016, 0.1551, 0.2119, 0.1315]) -Greedy action tensor([ 0.8148, -0.4787, -0.0761, -0.2052]) tensor([0.4889, 0.1341, 0.2006, 0.1763]) -Greedy action tensor([-1.9056, -0.3691, 0.6382, -0.1546]) tensor([0.0414, 0.1926, 0.5273, 0.2387]) -Greedy action tensor([-1.9178, -0.4429, 0.6497, -0.1674]) tensor([0.0414, 0.1809, 0.5394, 0.2383]) -Greedy action tensor([-1.4879e+00, -1.5152e-04, 3.2030e-01, 1.3083e-01]) tensor([0.0603, 0.2671, 0.3680, 0.3045]) -Greedy action tensor([-1.9000, -0.3285, 0.6360, -0.1282]) tensor([0.0411, 0.1979, 0.5192, 0.2418]) -Greedy action tensor([-1.8885, -0.4460, 0.6411, -0.1464]) tensor([0.0426, 0.1801, 0.5342, 0.2431]) -Greedy action tensor([-1.9159, -0.4427, 0.6678, -0.1642]) tensor([0.0410, 0.1790, 0.5435, 0.2365]) -Greedy action tensor([-1.4035, -0.5017, 0.4158, 0.0280]) tensor([0.0724, 0.1783, 0.4464, 0.3029]) -Greedy action tensor([-1.6126, -0.4951, 0.5623, -0.1737]) tensor([0.0586, 0.1790, 0.5155, 0.2469]) -Greedy action tensor([-1.4460, -0.5690, 0.4728, 0.0422]) tensor([0.0683, 0.1641, 0.4652, 0.3024]) -Greedy action tensor([-1.8964, -0.3898, 0.6552, -0.1284]) tensor([0.0413, 0.1864, 0.5301, 0.2421]) -Greedy action tensor([-1.4739, -0.5187, 0.4211, 0.0781]) tensor([0.0668, 0.1736, 0.4443, 0.3153]) -Greedy action tensor([-0.2148, 0.0423, 1.0635, 1.6471]) tensor([0.0812, 0.1050, 0.2914, 0.5224]) -Greedy action tensor([-1.5431, -0.2308, 0.4574, 0.1209]) tensor([0.0575, 0.2136, 0.4252, 0.3037]) -Greedy action tensor([-1.8570, -0.4824, 0.6197, -0.1350]) tensor([0.0445, 0.1761, 0.5301, 0.2492]) -Greedy action tensor([-1.8123, -0.3427, 0.6242, -0.1032]) tensor([0.0448, 0.1949, 0.5126, 0.2477]) -Greedy action tensor([-1.7983, -0.1950, 0.5615, -0.1277]) tensor([0.0457, 0.2272, 0.4841, 0.2430]) -Greedy action tensor([-1.8128, -0.3948, 0.6859, -0.0533]) tensor([0.0433, 0.1787, 0.5266, 0.2514]) -Greedy action tensor([-1.6058, -0.3141, 0.6341, -0.0232]) tensor([0.0529, 0.1925, 0.4970, 0.2575]) -Greedy action tensor([-1.9276, -0.4288, 0.6594, -0.1665]) tensor([0.0407, 0.1821, 0.5406, 0.2367]) -Greedy action tensor([-1.5341, -0.5185, 0.5008, 0.1521]) tensor([0.0595, 0.1642, 0.4551, 0.3211]) -Greedy action tensor([-0.5453, -0.3637, 0.1634, 0.1668]) tensor([0.1595, 0.1913, 0.3240, 0.3252]) -Greedy action tensor([-1.8536, -0.4132, 0.6062, -0.1217]) tensor([0.0443, 0.1870, 0.5184, 0.2503]) -Greedy action tensor([-0.4845, -0.4214, 0.1794, 0.1462]) tensor([0.1699, 0.1810, 0.3300, 0.3192]) -Greedy action tensor([-1.9105, -0.4429, 0.6472, -0.1642]) tensor([0.0417, 0.1809, 0.5383, 0.2391]) -Greedy action tensor([-1.6662, 0.3463, 0.4167, -0.0406]) tensor([0.0463, 0.3465, 0.3718, 0.2353]) -Greedy action tensor([-1.8490, -0.3607, 0.6254, -0.1200]) tensor([0.0436, 0.1931, 0.5176, 0.2457]) -Greedy action tensor([-1.8765, -0.3557, 0.6095, -0.1378]) tensor([0.0430, 0.1966, 0.5161, 0.2444]) -Greedy action tensor([-1.6204, 0.1954, 0.3987, 0.0598]) tensor([0.0499, 0.3066, 0.3757, 0.2677]) -Greedy action tensor([-0.9973, 0.6164, 0.0578, 0.2023]) tensor([0.0819, 0.4112, 0.2352, 0.2718]) -Greedy action tensor([-1.9096, -0.4518, 0.6746, -0.1587]) tensor([0.0411, 0.1767, 0.5452, 0.2369]) -Greedy action tensor([-1.6083, -0.3498, 0.4877, -0.0325]) tensor([0.0572, 0.2013, 0.4651, 0.2765]) -Greedy action tensor([-1.9011, -0.4544, 0.6578, -0.1454]) tensor([0.0417, 0.1774, 0.5393, 0.2416]) -Greedy action tensor([-1.4756, 0.0956, 0.5444, 0.2067]) tensor([0.0534, 0.2570, 0.4025, 0.2872]) -Greedy action tensor([-1.8323, -0.4273, 0.6486, -0.0487]) tensor([0.0435, 0.1774, 0.5201, 0.2590]) -Greedy action tensor([-1.4835, -0.3708, 0.4434, -0.5124]) tensor([0.0738, 0.2245, 0.5068, 0.1949]) -Greedy action tensor([-1.7426, -0.3424, 0.5839, -0.0462]) tensor([0.0482, 0.1954, 0.4935, 0.2628]) -Greedy action tensor([-1.9199, -0.3960, 0.6479, -0.1678]) tensor([0.0410, 0.1882, 0.5344, 0.2364]) -Greedy action tensor([-1.0297, -0.4684, 0.1400, -0.0849]) tensor([0.1170, 0.2051, 0.3769, 0.3010]) -Greedy action tensor([-1.5590, 0.0467, 0.3652, 0.0897]) tensor([0.0555, 0.2763, 0.3799, 0.2884]) -Greedy action tensor([-1.5465, 0.5620, 0.3268, 0.0675]) tensor([0.0481, 0.3965, 0.3135, 0.2419]) -Greedy action tensor([-0.8691, 0.2454, 0.4852, 1.0342]) tensor([0.0684, 0.2083, 0.2648, 0.4585]) -Greedy action tensor([-1.2960, 0.3660, 0.1670, 0.2140]) tensor([0.0662, 0.3486, 0.2857, 0.2995]) -Greedy action tensor([-1.8752, -0.4285, 0.6288, -0.1327]) tensor([0.0431, 0.1832, 0.5274, 0.2463]) -Greedy action tensor([-1.7272, -0.2194, 0.6519, -0.0052]) tensor([0.0456, 0.2062, 0.4928, 0.2554]) -Greedy action tensor([-1.9384, -0.4256, 0.6587, -0.1759]) tensor([0.0403, 0.1831, 0.5415, 0.2350]) -Greedy action tensor([-1.8388, -0.2044, 0.5970, -0.1031]) tensor([0.0431, 0.2207, 0.4919, 0.2443]) -Greedy action tensor([-0.6277, -0.8860, 0.7673, 0.1293]) tensor([0.1260, 0.0973, 0.5082, 0.2685]) -Greedy action tensor([-0.4878, -0.5280, 0.1745, 0.2606]) tensor([0.1663, 0.1597, 0.3225, 0.3515]) -Greedy action tensor([-8.7054e-01, -4.5136e-04, 4.9940e-02, 6.8983e-01]) tensor([0.0938, 0.2240, 0.2355, 0.4467]) -Greedy action tensor([-1.8460, -0.4718, 0.6196, -0.1319]) tensor([0.0449, 0.1774, 0.5284, 0.2492]) -Greedy action tensor([-1.3490, -0.3873, 0.3581, 0.2041]) tensor([0.0722, 0.1888, 0.3979, 0.3411]) -Greedy action tensor([ 0.1132, -0.5504, 0.1299, 0.2901]) tensor([0.2684, 0.1382, 0.2730, 0.3204]) -Greedy action tensor([-0.5810, 0.7505, 0.0642, 0.4110]) tensor([0.1065, 0.4033, 0.2030, 0.2872]) -Greedy action tensor([-1.9052, -0.4796, 0.6677, -0.1551]) tensor([0.0416, 0.1732, 0.5455, 0.2396]) -Greedy action tensor([-1.9211, -0.4372, 0.6549, -0.1679]) tensor([0.0411, 0.1813, 0.5403, 0.2373]) -Greedy action tensor([-1.9097, -0.4207, 0.6518, -0.1574]) tensor([0.0414, 0.1835, 0.5363, 0.2388]) -Greedy action tensor([-1.7985, -0.4685, 0.6070, -0.0709]) tensor([0.0465, 0.1759, 0.5157, 0.2618]) -Greedy action tensor([-1.8969, -0.3875, 0.6319, -0.1535]) tensor([0.0421, 0.1903, 0.5273, 0.2404]) -Greedy action tensor([-1.0932, 0.2675, 0.7892, 0.9628]) tensor([0.0519, 0.2022, 0.3407, 0.4053]) -Greedy action tensor([-1.8888, -0.3865, 0.6306, -0.1447]) tensor([0.0423, 0.1901, 0.5256, 0.2421]) -Greedy action tensor([-1.2690, -0.2950, 0.3491, 0.1963]) tensor([0.0768, 0.2034, 0.3873, 0.3324]) -Greedy action tensor([0.4746, 0.9965, 0.3090, 0.9248]) tensor([0.1960, 0.3304, 0.1661, 0.3075]) -Greedy action tensor([-1.7852, -0.5067, 0.6138, -0.0481]) tensor([0.0470, 0.1687, 0.5174, 0.2669]) -Greedy action tensor([-1.8531, -0.4307, 0.6502, -0.1015]) tensor([0.0432, 0.1793, 0.5284, 0.2492]) -Greedy action tensor([-1.8787, -0.4393, 0.6271, -0.1526]) tensor([0.0433, 0.1827, 0.5307, 0.2433]) -Greedy action tensor([-1.9334, -0.4527, 0.6630, -0.1753]) tensor([0.0406, 0.1786, 0.5451, 0.2357]) -Greedy action tensor([-1.7638, -0.3012, 0.6320, -0.0492]) tensor([0.0458, 0.1976, 0.5024, 0.2542]) -Greedy action tensor([-1.9415, -0.4539, 0.6661, -0.1794]) tensor([0.0403, 0.1784, 0.5467, 0.2347]) -Greedy action tensor([-1.8265, -0.2138, 0.5990, -0.1024]) tensor([0.0436, 0.2188, 0.4931, 0.2445]) -Greedy action tensor([-1.0502, -0.3651, 0.0503, 0.5674]) tensor([0.0907, 0.1799, 0.2725, 0.4570]) -Greedy action tensor([-1.8792, -0.4171, 0.6420, -0.1333]) tensor([0.0426, 0.1837, 0.5298, 0.2440]) -Greedy action tensor([-1.7143, -0.4858, 0.5198, -0.0535]) tensor([0.0526, 0.1796, 0.4910, 0.2768]) -Greedy action tensor([-0.3112, -0.0478, 0.1929, 0.2872]) tensor([0.1731, 0.2253, 0.2866, 0.3150]) -Greedy action tensor([-1.2611, 0.3218, 0.4811, 0.3655]) tensor([0.0600, 0.2922, 0.3426, 0.3052]) -Greedy action tensor([-1.3575, -0.4770, 0.5839, 0.4792]) tensor([0.0600, 0.1448, 0.4184, 0.3768]) -Greedy action tensor([-1.9262, -0.4384, 0.6555, -0.1730]) tensor([0.0410, 0.1813, 0.5413, 0.2364]) -Greedy action tensor([-1.4826, -0.1993, 0.4513, 0.1396]) tensor([0.0603, 0.2175, 0.4169, 0.3053]) -Greedy action tensor([-1.9225, -0.4244, 0.6539, -0.1677]) tensor([0.0410, 0.1833, 0.5388, 0.2369]) -Greedy action tensor([-1.7411, 0.0277, 0.5321, -0.0088]) tensor([0.0450, 0.2638, 0.4369, 0.2543]) -Greedy action tensor([-1.9388, -0.4480, 0.6650, -0.1757]) tensor([0.0403, 0.1792, 0.5453, 0.2352]) -Greedy action tensor([-1.3080, -0.5456, 0.5065, 0.2733]) tensor([0.0707, 0.1516, 0.4340, 0.3437]) -Greedy action tensor([ 1.1986, -0.5511, -0.0368, 0.1530]) tensor([0.5507, 0.0957, 0.1601, 0.1935]) -Greedy action tensor([ 2.1489, -0.9650, -0.3890, 0.4472]) tensor([0.7658, 0.0340, 0.0605, 0.1397]) -Greedy action tensor([ 1.7856, -1.1213, 0.0418, 0.6328]) tensor([0.6472, 0.0354, 0.1132, 0.2043]) -Greedy action tensor([ 1.6917, -0.3710, -0.6544, 0.1806]) tensor([0.6928, 0.0881, 0.0663, 0.1529]) -Greedy action tensor([ 1.3915, -0.1907, -0.5946, 0.3150]) tensor([0.5940, 0.1221, 0.0815, 0.2024]) -Greedy action tensor([ 1.7645, -0.6610, -0.1197, 0.2832]) tensor([0.6813, 0.0603, 0.1035, 0.1549]) -Greedy action tensor([ 1.8495, -0.2181, -0.1521, 0.3594]) tensor([0.6725, 0.0851, 0.0909, 0.1516]) -Greedy action tensor([ 1.0775, -0.4139, -0.3473, -0.1329]) tensor([0.5670, 0.1276, 0.1364, 0.1690]) -Greedy action tensor([ 1.5650, -0.6655, -0.4278, 0.4629]) tensor([0.6345, 0.0682, 0.0865, 0.2108]) -Greedy action tensor([ 1.1846, -0.6879, -0.2950, 0.3838]) tensor([0.5463, 0.0840, 0.1244, 0.2453]) -Greedy action tensor([ 1.4311, -0.8294, -0.2450, 0.3385]) tensor([0.6147, 0.0641, 0.1150, 0.2061]) -Greedy action tensor([ 2.0286, -0.3928, -0.8728, 0.1202]) tensor([0.7740, 0.0687, 0.0425, 0.1148]) -Greedy action tensor([ 1.2980, -0.3725, -0.3467, 0.2780]) tensor([0.5741, 0.1080, 0.1108, 0.2070]) -Greedy action tensor([ 1.4249, 0.0859, -0.2437, 0.4443]) tensor([0.5477, 0.1436, 0.1033, 0.2055]) -Greedy action tensor([ 1.9422, -0.5475, -0.4763, 0.6260]) tensor([0.6944, 0.0576, 0.0618, 0.1862]) -Greedy action tensor([ 1.9386, -0.4571, -0.2819, 0.6480]) tensor([0.6781, 0.0618, 0.0736, 0.1865]) -Greedy action tensor([ 1.5695, -0.2448, -0.5377, 0.6558]) tensor([0.5933, 0.0967, 0.0721, 0.2379]) -Greedy action tensor([ 1.3535, -0.0147, -1.0363, 0.3522]) tensor([0.5836, 0.1486, 0.0535, 0.2144]) -Greedy action tensor([ 1.7957, -1.2450, -0.2376, 0.5900]) tensor([0.6765, 0.0323, 0.0886, 0.2026]) -Greedy action tensor([ 2.7108, -0.7835, -0.4388, 0.8486]) tensor([0.8140, 0.0247, 0.0349, 0.1264]) -Greedy action tensor([ 1.3445, -0.0391, -0.6135, 0.3559]) tensor([0.5669, 0.1421, 0.0800, 0.2110]) -Greedy action tensor([ 0.9953, -0.1418, -0.8487, 0.1794]) tensor([0.5205, 0.1670, 0.0823, 0.2302]) -Greedy action tensor([ 1.8643, -0.5359, -0.1633, 0.6060]) tensor([0.6638, 0.0602, 0.0874, 0.1886]) -Greedy action tensor([ 1.1921, 0.1657, -0.7031, 0.1281]) tensor([0.5395, 0.1933, 0.0811, 0.1862]) -Greedy action tensor([ 1.2958, -0.3993, -0.8109, 0.9841]) tensor([0.4908, 0.0901, 0.0597, 0.3594]) -Greedy action tensor([ 1.4391, -0.3980, -0.1852, 0.3495]) tensor([0.5908, 0.0941, 0.1164, 0.1987]) -Greedy action tensor([ 1.3152, -0.7900, -0.7452, 0.8768]) tensor([0.5279, 0.0643, 0.0673, 0.3405]) -Greedy action tensor([ 2.5588, -1.0289, 0.1053, 0.7615]) tensor([0.7816, 0.0216, 0.0672, 0.1296]) -Greedy action tensor([ 1.7404, -0.7268, -0.2156, 0.4181]) tensor([0.6699, 0.0568, 0.0947, 0.1786]) -Greedy action tensor([ 1.1471, -0.1314, -0.6179, 0.3089]) tensor([0.5313, 0.1479, 0.0910, 0.2298]) -Greedy action tensor([ 1.3560, -0.4242, -0.3039, 0.4513]) tensor([0.5671, 0.0956, 0.1078, 0.2295]) -Greedy action tensor([ 1.2133, -0.8102, -0.2485, 0.1477]) tensor([0.5853, 0.0774, 0.1357, 0.2016]) -Greedy action tensor([ 1.9436, -0.6453, -0.6273, 0.1758]) tensor([0.7563, 0.0568, 0.0578, 0.1291]) -Greedy action tensor([ 1.6594, -0.5194, -0.9063, 0.3739]) tensor([0.6819, 0.0772, 0.0524, 0.1885]) -Greedy action tensor([ 1.9043, -0.7401, -0.0455, 0.6044]) tensor([0.6730, 0.0478, 0.0958, 0.1834]) -Greedy action tensor([ 1.4351, -0.2206, -0.4194, 0.5574]) tensor([0.5671, 0.1083, 0.0888, 0.2358]) -Greedy action tensor([ 1.4730, -0.5735, -0.1471, 0.1500]) tensor([0.6276, 0.0811, 0.1242, 0.1671]) -Greedy action tensor([ 1.3812, -0.3095, -0.8360, 0.5790]) tensor([0.5742, 0.1059, 0.0625, 0.2574]) -Greedy action tensor([ 1.3459, -0.1319, -0.5434, 0.2186]) tensor([0.5871, 0.1339, 0.0888, 0.1902]) -Greedy action tensor([ 1.4033, -0.3754, -0.5354, 0.3506]) tensor([0.6018, 0.1016, 0.0866, 0.2100]) -Greedy action tensor([ 1.1966, 0.1140, -0.5657, -0.0512]) tensor([0.5563, 0.1884, 0.0955, 0.1598]) -Greedy action tensor([ 1.0979, -0.1489, -0.3067, 0.5109]) tensor([0.4787, 0.1376, 0.1175, 0.2662]) -Greedy action tensor([ 2.4207, -0.9478, -0.5350, 0.7568]) tensor([0.7838, 0.0270, 0.0408, 0.1484]) -Greedy action tensor([ 1.4019, 0.1578, -0.1954, 0.5357]) tensor([0.5232, 0.1508, 0.1059, 0.2200]) -Greedy action tensor([ 1.5946, -0.3250, -0.0930, 0.6955]) tensor([0.5752, 0.0844, 0.1064, 0.2341]) -Greedy action tensor([ 2.1531, -0.0625, 0.0557, 0.4946]) tensor([0.7031, 0.0767, 0.0863, 0.1339]) -Greedy action tensor([ 1.9907, -0.1771, -0.1128, 0.3211]) tensor([0.7019, 0.0803, 0.0856, 0.1322]) -Greedy action tensor([ 1.3661, 0.1743, -0.0203, -0.2192]) tensor([0.5687, 0.1727, 0.1421, 0.1165]) -Greedy action tensor([ 1.8032, -0.7065, -0.5380, 0.4787]) tensor([0.6928, 0.0563, 0.0667, 0.1842]) -Greedy action tensor([ 1.4329, 0.0356, -0.2556, 0.2929]) tensor([0.5708, 0.1411, 0.1055, 0.1826]) -Greedy action tensor([ 1.2531, -0.2925, 0.0759, 0.1923]) tensor([0.5355, 0.1142, 0.1650, 0.1854]) -Greedy action tensor([ 1.2261, -0.4520, -0.0850, 0.1302]) tensor([0.5585, 0.1043, 0.1505, 0.1867]) -Greedy action tensor([ 1.6698, 0.0180, -0.1521, 0.2799]) tensor([0.6240, 0.1196, 0.1009, 0.1554]) -Greedy action tensor([ 1.8611, -0.5928, -0.5704, 0.8682]) tensor([0.6475, 0.0557, 0.0569, 0.2399]) -Greedy action tensor([ 1.6276, -0.1252, 0.0104, 0.0420]) tensor([0.6343, 0.1099, 0.1259, 0.1299]) -Greedy action tensor([ 1.3197, -0.5458, -0.3023, 0.4141]) tensor([0.5693, 0.0881, 0.1124, 0.2302]) -Greedy action tensor([ 1.5747, 0.0262, -0.2013, 0.1799]) tensor([0.6136, 0.1304, 0.1039, 0.1521]) -Greedy action tensor([ 1.3941, -0.1574, -0.4388, 0.1415]) tensor([0.6033, 0.1279, 0.0965, 0.1724]) -Greedy action tensor([ 2.2865, -1.3310, -0.2130, 0.6188]) tensor([0.7706, 0.0207, 0.0633, 0.1454]) -Greedy action tensor([ 1.5945, 0.2339, -0.1360, 0.1822]) tensor([0.5962, 0.1529, 0.1056, 0.1452]) -Greedy action tensor([ 1.2331, -0.0115, -0.8928, 0.0244]) tensor([0.5862, 0.1688, 0.0699, 0.1750]) -Greedy action tensor([ 1.3891, -0.3639, -0.3380, 0.1077]) tensor([0.6140, 0.1064, 0.1092, 0.1705]) -Greedy action tensor([ 1.7532, 0.4295, -0.1751, 0.0511]) tensor([0.6274, 0.1670, 0.0912, 0.1144]) -Greedy action tensor([ 1.7219, -0.9463, -0.2668, 0.5381]) tensor([0.6612, 0.0459, 0.0905, 0.2024]) -Greedy action tensor([ 1.6926, -0.7344, -0.4121, 0.5395]) tensor([0.6554, 0.0579, 0.0799, 0.2069]) -Greedy action tensor([ 1.6906, -1.0919, -0.0103, 0.1660]) tensor([0.6840, 0.0423, 0.1248, 0.1489]) -Greedy action tensor([ 1.9984, -0.2320, -0.5349, 0.6953]) tensor([0.6856, 0.0737, 0.0544, 0.1863]) -Greedy action tensor([ 2.2572, -0.8660, -0.3388, 0.7669]) tensor([0.7441, 0.0328, 0.0555, 0.1676]) -Greedy action tensor([ 1.8460, -0.2824, -0.6855, 0.7455]) tensor([0.6531, 0.0777, 0.0519, 0.2173]) -Greedy action tensor([ 1.9025, -0.1849, -1.0768, 0.5566]) tensor([0.6968, 0.0864, 0.0354, 0.1814]) -Greedy action tensor([ 1.6871, -0.5184, -0.6617, 0.6802]) tensor([0.6365, 0.0701, 0.0608, 0.2326]) -Greedy action tensor([ 1.5797, -0.8181, -0.3939, 0.7524]) tensor([0.5998, 0.0545, 0.0834, 0.2623]) -Greedy action tensor([ 1.2246, -0.2923, -0.7766, 0.3250]) tensor([0.5678, 0.1246, 0.0768, 0.2309]) -Greedy action tensor([ 1.3459, -1.0151, 0.2440, 0.3236]) tensor([0.5598, 0.0528, 0.1860, 0.2014]) -Greedy action tensor([ 1.7416, -0.7770, -0.4885, 0.5399]) tensor([0.6717, 0.0541, 0.0722, 0.2020]) -Greedy action tensor([ 1.7835, -0.3940, -0.3573, 0.3502]) tensor([0.6805, 0.0771, 0.0800, 0.1623]) -Greedy action tensor([ 1.2113, -0.2245, -0.5616, 0.1732]) tensor([0.5676, 0.1350, 0.0964, 0.2010]) -Greedy action tensor([ 1.2361, -0.5246, 0.0242, -0.0985]) tensor([0.5771, 0.0992, 0.1718, 0.1519]) -Greedy action tensor([ 1.7230, -0.4886, -0.3003, 0.4082]) tensor([0.6621, 0.0725, 0.0876, 0.1778]) -Greedy action tensor([ 1.7498, -0.2951, -0.3250, 0.1776]) tensor([0.6837, 0.0885, 0.0859, 0.1419]) -Greedy action tensor([ 2.7638, -1.0403, 0.0095, 0.8577]) tensor([0.8100, 0.0180, 0.0516, 0.1204]) -Greedy action tensor([-0.1027, -0.5160, 0.0533, 1.0759]) tensor([0.1645, 0.1088, 0.1922, 0.5345]) -Greedy action tensor([-0.1495, -1.1451, -0.3282, -1.0377]) tensor([0.3821, 0.1412, 0.3196, 0.1572]) -Greedy action tensor([ 0.6483, -0.2938, 0.0070, 0.5692]) tensor([0.3521, 0.1372, 0.1854, 0.3253]) -Greedy action tensor([ 0.9075, -0.5426, -0.6353, 1.0199]) tensor([0.3895, 0.0914, 0.0833, 0.4359]) -Greedy action tensor([-1.0284, -0.8062, 0.1824, -0.0121]) tensor([0.1195, 0.1492, 0.4011, 0.3302]) -Greedy action tensor([ 0.5935, 0.0572, 0.6631, -0.1886]) tensor([0.3211, 0.1878, 0.3442, 0.1469]) -Greedy action tensor([-0.1938, -0.5010, 0.4083, 0.4450]) tensor([0.1833, 0.1348, 0.3347, 0.3472]) -Greedy action tensor([ 1.1896, -0.2144, 1.5550, 0.6315]) tensor([0.3069, 0.0754, 0.4422, 0.1756]) -Greedy action tensor([-0.1360, 0.0559, 1.2164, -0.5121]) tensor([0.1478, 0.1791, 0.5716, 0.1015]) -Greedy action tensor([ 1.0256, -0.1444, -0.0815, 0.4510]) tensor([0.4538, 0.1408, 0.1500, 0.2554]) -Greedy action tensor([-0.0139, -1.6899, -0.4135, 0.4828]) tensor([0.2856, 0.0534, 0.1915, 0.4694]) -Greedy action tensor([-1.2378, -1.4972, 0.4300, -0.4015]) tensor([0.1066, 0.0823, 0.5651, 0.2460]) -Greedy action tensor([ 0.6907, 0.4672, -0.9896, 0.3493]) tensor([0.3708, 0.2965, 0.0691, 0.2636]) -Greedy action tensor([ 0.3340, 0.6938, -1.0110, -0.9123]) tensor([0.3354, 0.4807, 0.0874, 0.0965]) -Greedy action tensor([-0.0392, -0.9142, -0.3280, -1.0644]) tensor([0.3961, 0.1651, 0.2967, 0.1421]) -Greedy action tensor([ 0.2498, -0.7235, -0.8788, 0.5776]) tensor([0.3237, 0.1223, 0.1047, 0.4493]) -Greedy action tensor([ 0.5609, -2.5729, -0.0338, 0.2368]) tensor([0.4313, 0.0188, 0.2380, 0.3119]) -Greedy action tensor([ 0.6969, -0.2511, -0.2775, 0.3295]) tensor([0.4069, 0.1577, 0.1536, 0.2818]) -Greedy action tensor([ 0.6475, 0.9471, -0.4822, -0.1114]) tensor([0.3184, 0.4296, 0.1029, 0.1491]) -Greedy action tensor([-0.4671, -0.7110, -0.5737, -0.5248]) tensor([0.2758, 0.2161, 0.2479, 0.2603]) -Greedy action tensor([-0.3332, -1.4720, 1.2148, -0.0176]) tensor([0.1353, 0.0433, 0.6360, 0.1854]) -Greedy action tensor([ 1.3874, -0.3272, 1.4448, -0.0374]) tensor([0.4033, 0.0726, 0.4271, 0.0970]) -Greedy action tensor([ 0.8617, -0.0564, -0.5930, -0.4178]) tensor([0.5233, 0.2090, 0.1222, 0.1456]) -Greedy action tensor([-0.5927, -0.1080, 1.0533, -1.0299]) tensor([0.1183, 0.1920, 0.6133, 0.0764]) -Greedy action tensor([1.1112, 0.2732, 0.9196, 0.7222]) tensor([0.3406, 0.1473, 0.2812, 0.2308]) -Greedy action tensor([-1.3269, -0.3618, -0.2770, 0.1101]) tensor([0.0935, 0.2455, 0.2673, 0.3936]) -Greedy action tensor([-0.2881, -0.4524, -0.1816, -0.6485]) tensor([0.2734, 0.2319, 0.3041, 0.1906]) -Greedy action tensor([-1.1501, 0.6772, 0.2536, -0.5960]) tensor([0.0768, 0.4772, 0.3124, 0.1336]) -Greedy action tensor([ 0.5512, 0.8108, -0.8154, -0.4595]) tensor([0.3430, 0.4447, 0.0875, 0.1248]) -Greedy action tensor([ 0.0363, 0.1994, -0.5409, 0.0170]) tensor([0.2689, 0.3165, 0.1509, 0.2637]) -Greedy action tensor([-0.4605, -1.1925, -0.3449, 0.2664]) tensor([0.2140, 0.1029, 0.2403, 0.4428]) -Greedy action tensor([ 0.0329, -0.5273, 1.0955, -0.2414]) tensor([0.1914, 0.1093, 0.5538, 0.1455]) -Greedy action tensor([ 0.1538, -0.6257, 0.2046, 1.0558]) tensor([0.2010, 0.0922, 0.2115, 0.4953]) -Greedy action tensor([ 0.3911, -0.4554, -0.5602, -0.4293]) tensor([0.4434, 0.1902, 0.1712, 0.1952]) -Greedy action tensor([-1.1784, -0.5037, 0.3813, -0.9865]) tensor([0.1119, 0.2198, 0.5326, 0.1356]) -Greedy action tensor([ 1.7301, -1.6662, 0.2891, 0.7953]) tensor([0.6014, 0.0201, 0.1423, 0.2361]) -Greedy action tensor([-0.9681, -1.4970, -1.2208, 0.2427]) tensor([0.1748, 0.1030, 0.1357, 0.5865]) -Greedy action tensor([-0.1186, 1.3748, 0.8792, -0.5431]) tensor([0.1134, 0.5048, 0.3076, 0.0742]) -Greedy action tensor([-0.3489, -0.0971, 0.5102, 0.3320]) tensor([0.1510, 0.1942, 0.3565, 0.2983]) -Greedy action tensor([0.6175, 0.5506, 0.1756, 0.5349]) tensor([0.2858, 0.2673, 0.1837, 0.2631]) -Greedy action tensor([ 0.0663, -0.8491, -0.2838, 0.8638]) tensor([0.2312, 0.0926, 0.1629, 0.5133]) -Greedy action tensor([-0.5805, -0.2842, 0.5566, -1.1990]) tensor([0.1666, 0.2241, 0.5195, 0.0898]) -Greedy action tensor([-0.8557, -0.5552, -0.5788, 0.3363]) tensor([0.1436, 0.1940, 0.1894, 0.4730]) -Greedy action tensor([ 0.3753, -0.8167, -0.1912, 0.1259]) tensor([0.3773, 0.1146, 0.2141, 0.2940]) -Greedy action tensor([ 0.1417, -1.7142, 0.1798, -0.0919]) tensor([0.3348, 0.0523, 0.3478, 0.2651]) -Greedy action tensor([ 0.3173, -0.6904, 0.5776, -0.0880]) tensor([0.3004, 0.1097, 0.3897, 0.2003]) -Greedy action tensor([ 1.3402, -1.0118, 1.1259, 0.0233]) tensor([0.4608, 0.0439, 0.3719, 0.1235]) -Greedy action tensor([ 1.3304, -1.2658, -0.2072, 0.7889]) tensor([0.5344, 0.0398, 0.1148, 0.3109]) -Greedy action tensor([ 1.0837, -0.6912, -0.3492, -0.3455]) tensor([0.6069, 0.1029, 0.1448, 0.1454]) -Greedy action tensor([ 0.9532, -0.8524, -0.4065, 0.9176]) tensor([0.4191, 0.0689, 0.1076, 0.4044]) -Greedy action tensor([-0.0050, -0.3597, -1.3885, -0.0418]) tensor([0.3429, 0.2405, 0.0860, 0.3305]) -Greedy action tensor([ 0.4956, -1.0694, 0.0686, 1.2322]) tensor([0.2531, 0.0529, 0.1652, 0.5288]) -Greedy action tensor([-0.7919, -0.7830, -1.0331, 0.2082]) tensor([0.1814, 0.1830, 0.1425, 0.4931]) -Greedy action tensor([ 0.6457, -1.0483, 0.3786, -0.5368]) tensor([0.4433, 0.0815, 0.3394, 0.1359]) -Greedy action tensor([-0.8219, -0.6730, -1.0964, 1.0992]) tensor([0.1026, 0.1190, 0.0779, 0.7004]) -Greedy action tensor([ 0.8262, 0.0581, -0.0569, -1.0308]) tensor([0.4918, 0.2281, 0.2033, 0.0768]) -Greedy action tensor([-0.2115, -0.3088, 0.0555, -0.1283]) tensor([0.2326, 0.2110, 0.3037, 0.2527]) -Greedy action tensor([-0.7224, -1.7734, 0.2820, 0.1341]) tensor([0.1554, 0.0543, 0.4243, 0.3660]) -Greedy action tensor([ 1.0072, -0.9656, 1.3189, 0.5767]) tensor([0.3169, 0.0441, 0.4329, 0.2061]) -Greedy action tensor([-0.2016, 0.3789, 0.6101, -0.7547]) tensor([0.1781, 0.3183, 0.4011, 0.1025]) -Greedy action tensor([ 0.1671, -2.3526, -0.0292, 0.2968]) tensor([0.3289, 0.0265, 0.2703, 0.3744]) -Greedy action tensor([-0.2314, 0.0896, 0.3837, -0.3800]) tensor([0.1965, 0.2708, 0.3634, 0.1693]) -Greedy action tensor([ 1.0670, -1.2375, 1.6624, 0.5592]) tensor([0.2845, 0.0284, 0.5159, 0.1712]) -Greedy action tensor([ 1.2626, -0.1129, 0.0196, 1.1480]) tensor([0.4110, 0.1039, 0.1186, 0.3665]) -Greedy action tensor([ 1.3774, -0.6657, 0.0737, 1.3113]) tensor([0.4279, 0.0555, 0.1162, 0.4005]) -Greedy action tensor([ 0.2780, -0.9725, 0.5409, -0.1977]) tensor([0.3117, 0.0892, 0.4054, 0.1937]) -Greedy action tensor([-0.2482, -1.4350, 0.3629, 0.0284]) tensor([0.2239, 0.0683, 0.4125, 0.2952]) -Greedy action tensor([ 0.5616, -0.9824, 0.1873, 0.7714]) tensor([0.3190, 0.0681, 0.2194, 0.3935]) -Greedy action tensor([-0.1532, 0.2094, -0.2955, -0.5457]) tensor([0.2513, 0.3611, 0.2179, 0.1697]) -Greedy action tensor([0.7133, 0.3872, 0.6574, 0.3291]) tensor([0.2987, 0.2155, 0.2824, 0.2034]) -Greedy action tensor([ 1.5526, -0.5760, -0.2129, 1.7375]) tensor([0.4011, 0.0477, 0.0686, 0.4826]) -Greedy action tensor([ 1.2044, -0.3170, -0.0872, -0.0899]) tensor([0.5658, 0.1236, 0.1555, 0.1551]) -Greedy action tensor([ 1.0204, 0.0530, -0.2073, -0.5292]) tensor([0.5304, 0.2016, 0.1554, 0.1126]) -Greedy action tensor([-1.4448, 0.6953, 0.3169, -0.9240]) tensor([0.0588, 0.4998, 0.3424, 0.0990]) -Greedy action tensor([-0.8951, -2.2993, 0.1750, -0.0838]) tensor([0.1560, 0.0383, 0.4547, 0.3510]) -Greedy action tensor([ 0.4200, -0.9126, -0.6963, -0.5197]) tensor([0.5045, 0.1331, 0.1652, 0.1971]) -Greedy action tensor([-0.4812, 0.2263, -0.1832, 0.0145]) tensor([0.1662, 0.3372, 0.2239, 0.2728]) -Greedy action tensor([ 0.7431, -0.2560, 0.7910, 0.9940]) tensor([0.2701, 0.0994, 0.2833, 0.3471]) -Greedy action tensor([-0.2592, 0.5729, 0.1599, -0.5523]) tensor([0.1797, 0.4130, 0.2733, 0.1340]) -Greedy action tensor([ 0.8280, -1.3964, -0.7473, 0.8712]) tensor([0.4239, 0.0458, 0.0877, 0.4426]) -Greedy action tensor([ 0.5268, -0.0606, -0.3526, 0.6646]) tensor([0.3207, 0.1782, 0.1331, 0.3680]) -Greedy action tensor([ 0.4710, -1.2036, -0.3332, 0.4475]) tensor([0.3829, 0.0718, 0.1713, 0.3740]) -Greedy action tensor([ 0.8081, -0.6863, -0.2073, -0.4282]) tensor([0.5327, 0.1195, 0.1930, 0.1547]) -Greedy action tensor([ 0.4733, -0.0185, -0.0658, -0.1227]) tensor([0.3642, 0.2227, 0.2124, 0.2007]) -Greedy action tensor([ 0.5149, -0.0735, -0.1983, 0.0926]) tensor([0.3703, 0.2056, 0.1815, 0.2427]) -Greedy action tensor([ 0.2108, 0.1155, -0.0989, -0.2975]) tensor([0.3082, 0.2802, 0.2261, 0.1854]) -Greedy action tensor([ 0.3667, -0.0681, -0.0743, 0.0346]) tensor([0.3324, 0.2152, 0.2139, 0.2385]) -Greedy action tensor([ 0.5238, -0.1048, 0.0250, -0.0275]) tensor([0.3681, 0.1963, 0.2235, 0.2121]) -Greedy action tensor([ 0.2516, 0.2179, -0.1045, -0.3018]) tensor([0.3084, 0.2982, 0.2160, 0.1773]) -Greedy action tensor([ 0.3475, -0.4531, -0.1107, -0.0985]) tensor([0.3674, 0.1650, 0.2324, 0.2352]) -Greedy action tensor([ 0.5076, -0.1022, -0.0529, -0.1793]) tensor([0.3820, 0.2076, 0.2181, 0.1922]) -Greedy action tensor([ 0.9642, -0.4761, 0.0181, -0.5063]) tensor([0.5391, 0.1277, 0.2093, 0.1239]) -Greedy action tensor([ 0.7236, -0.1989, -0.1033, -0.1614]) tensor([0.4449, 0.1769, 0.1946, 0.1836]) -Greedy action tensor([ 0.4469, -0.1065, -0.1154, -0.1592]) tensor([0.3717, 0.2137, 0.2118, 0.2027]) -Greedy action tensor([ 0.2385, 0.1704, -0.0779, -0.5125]) tensor([0.3190, 0.2980, 0.2325, 0.1505]) -Greedy action tensor([ 0.8733, -0.6229, 0.0177, -0.4130]) tensor([0.5194, 0.1163, 0.2208, 0.1435]) -Greedy action tensor([ 0.3236, 0.1320, 0.0327, -0.6505]) tensor([0.3389, 0.2798, 0.2533, 0.1279]) -Greedy action tensor([ 1.0537, -0.9198, 0.0596, -0.3535]) tensor([0.5702, 0.0792, 0.2110, 0.1396]) -Greedy action tensor([ 0.9088, -0.4912, 0.1313, -0.4467]) tensor([0.5092, 0.1256, 0.2340, 0.1313]) -Greedy action tensor([ 1.1778, -0.8895, 0.1030, -0.5157]) tensor([0.6054, 0.0766, 0.2067, 0.1113]) -Greedy action tensor([ 0.7069, -0.4572, 0.0734, -0.2908]) tensor([0.4521, 0.1412, 0.2400, 0.1667]) -Greedy action tensor([ 1.1830, -0.2584, -0.0119, 0.0021]) tensor([0.5416, 0.1281, 0.1640, 0.1663]) -Greedy action tensor([ 0.5735, -0.2169, -0.0413, -0.1466]) tensor([0.4030, 0.1828, 0.2180, 0.1962]) -Greedy action tensor([ 0.4917, -0.2699, -0.0440, -0.3205]) tensor([0.4006, 0.1871, 0.2345, 0.1778]) -Greedy action tensor([ 0.3030, -0.2841, -0.0828, -0.1874]) tensor([0.3511, 0.1952, 0.2387, 0.2150]) -Greedy action tensor([ 0.6833, -0.5889, -0.1908, -0.1490]) tensor([0.4689, 0.1314, 0.1957, 0.2040]) -Greedy action tensor([ 0.6149, 0.3859, -0.1715, -0.1836]) tensor([0.3702, 0.2945, 0.1687, 0.1666]) -Greedy action tensor([ 1.0174, -0.8037, -0.0012, -0.5814]) tensor([0.5797, 0.0938, 0.2093, 0.1172]) -Greedy action tensor([ 0.2741, 0.0877, -0.0368, -0.0484]) tensor([0.3042, 0.2525, 0.2229, 0.2203]) -Greedy action tensor([ 0.7426, -0.5695, 0.0108, -0.2840]) tensor([0.4743, 0.1277, 0.2282, 0.1699]) -Greedy action tensor([ 0.6857, -0.3283, -0.1478, -0.1201]) tensor([0.4456, 0.1617, 0.1936, 0.1991]) -Greedy action tensor([ 1.0480, -0.5395, -0.0543, -0.3279]) tensor([0.5589, 0.1143, 0.1856, 0.1412]) -Greedy action tensor([ 0.4820, 0.0212, -0.0767, -0.1396]) tensor([0.3650, 0.2302, 0.2087, 0.1960]) -Greedy action tensor([ 0.6097, -0.1555, -0.0116, -0.5486]) tensor([0.4317, 0.2008, 0.2319, 0.1356]) -Greedy action tensor([ 0.2860, -0.2425, 0.0041, -0.3184]) tensor([0.3460, 0.2040, 0.2610, 0.1891]) -Greedy action tensor([ 0.7420, -0.2397, 0.1789, -0.1942]) tensor([0.4280, 0.1604, 0.2437, 0.1678]) -Greedy action tensor([ 0.8293, -0.5278, 0.0944, -0.3027]) tensor([0.4856, 0.1250, 0.2329, 0.1565]) -Greedy action tensor([ 0.8090, -0.8155, 0.1085, -0.4989]) tensor([0.5092, 0.1003, 0.2528, 0.1377]) -Greedy action tensor([ 0.8659, -0.5177, 0.1751, -0.6645]) tensor([0.5081, 0.1274, 0.2546, 0.1100]) -Greedy action tensor([ 0.6414, -0.0868, -0.0152, -0.0039]) tensor([0.3959, 0.1911, 0.2053, 0.2077]) -Greedy action tensor([ 0.2773, 0.1565, 0.0840, -0.0787]) tensor([0.2932, 0.2598, 0.2416, 0.2054]) -Greedy action tensor([ 0.8960, -0.5736, -0.0611, -0.3154]) tensor([0.5230, 0.1203, 0.2009, 0.1558]) -Greedy action tensor([ 1.1059, -0.6829, -0.0035, -0.3662]) tensor([0.5793, 0.0968, 0.1910, 0.1329]) -Greedy action tensor([ 0.6853, 0.0718, 0.0714, -0.4112]) tensor([0.4138, 0.2240, 0.2239, 0.1382]) -Greedy action tensor([ 0.7883, -0.0484, 0.1254, -0.6462]) tensor([0.4573, 0.1981, 0.2357, 0.1089]) -Greedy action tensor([ 0.7948, -0.3524, 0.0065, -0.1454]) tensor([0.4624, 0.1468, 0.2102, 0.1806]) -Greedy action tensor([ 0.5155, -0.3420, -0.1250, -0.2511]) tensor([0.4139, 0.1756, 0.2182, 0.1923]) -Greedy action tensor([ 0.7278, -0.3306, -0.0169, -0.0177]) tensor([0.4355, 0.1511, 0.2068, 0.2066]) -Greedy action tensor([ 0.7255, -0.1632, -0.1124, -0.0453]) tensor([0.4336, 0.1783, 0.1876, 0.2006]) -Greedy action tensor([ 1.0570, -0.5020, -0.0350, -0.2650]) tensor([0.5517, 0.1161, 0.1851, 0.1471]) -Greedy action tensor([ 0.5912, -0.1184, 0.1810, 0.0227]) tensor([0.3674, 0.1807, 0.2438, 0.2081]) -Greedy action tensor([ 0.5542, -0.3797, 0.1918, -0.4620]) tensor([0.4080, 0.1604, 0.2840, 0.1477]) -Greedy action tensor([ 0.6268, -0.2835, -0.0605, -0.3163]) tensor([0.4358, 0.1754, 0.2192, 0.1697]) -Greedy action tensor([ 0.5363, -0.3148, 0.0194, -0.2098]) tensor([0.4004, 0.1710, 0.2388, 0.1899]) -Greedy action tensor([ 0.4680, -0.2235, 0.0482, -0.2350]) tensor([0.3769, 0.1888, 0.2477, 0.1866]) -Greedy action tensor([ 0.5998, -0.0540, -0.0471, -0.0447]) tensor([0.3893, 0.2025, 0.2039, 0.2044]) -Greedy action tensor([ 0.7723, -0.2183, -0.0812, -0.2484]) tensor([0.4635, 0.1721, 0.1974, 0.1670]) -Greedy action tensor([ 0.5414, -0.3485, -0.1329, -0.2954]) tensor([0.4249, 0.1745, 0.2165, 0.1840]) -Greedy action tensor([ 0.8024, -0.6967, -0.0116, -0.3134]) tensor([0.5015, 0.1120, 0.2222, 0.1643]) -Greedy action tensor([ 0.4894, 0.0331, -0.1471, 0.1208]) tensor([0.3503, 0.2220, 0.1854, 0.2423]) -Greedy action tensor([ 0.7953, -0.4003, 0.1203, -0.5654]) tensor([0.4835, 0.1463, 0.2462, 0.1240]) -Greedy action tensor([ 0.7584, -0.4146, -0.0999, -0.3667]) tensor([0.4859, 0.1504, 0.2060, 0.1577]) -Greedy action tensor([-0.0161, 0.4345, -0.2110, -0.6206]) tensor([0.2539, 0.3984, 0.2089, 0.1387]) -Greedy action tensor([ 0.5813, -0.3801, -0.0728, -0.1874]) tensor([0.4227, 0.1616, 0.2198, 0.1960]) -Greedy action tensor([ 0.3804, -0.0759, -0.0533, -0.1361]) tensor([0.3474, 0.2201, 0.2252, 0.2073]) -Greedy action tensor([ 0.6606, -0.6703, -0.0883, -0.2493]) tensor([0.4674, 0.1235, 0.2210, 0.1881]) -Greedy action tensor([ 0.9453, -0.4747, 0.1517, -0.2927]) tensor([0.5041, 0.1218, 0.2279, 0.1462]) -Greedy action tensor([ 0.9870, -0.6357, -0.0824, -0.3759]) tensor([0.5566, 0.1099, 0.1910, 0.1425]) -Greedy action tensor([ 0.4887, -0.4430, -0.0233, -0.5060]) tensor([0.4232, 0.1667, 0.2536, 0.1565]) -Greedy action tensor([ 0.7170, -0.2943, -0.0831, -0.4039]) tensor([0.4675, 0.1701, 0.2100, 0.1524]) -Greedy action tensor([ 0.9459, -0.0200, -0.0029, -0.4586]) tensor([0.4967, 0.1891, 0.1923, 0.1219]) -Greedy action tensor([ 0.4250, -0.0765, 0.2871, -0.4334]) tensor([0.3447, 0.2088, 0.3004, 0.1461]) -Greedy action tensor([ 0.6003, -0.5003, -0.1571, -0.4709]) tensor([0.4664, 0.1552, 0.2187, 0.1598]) -Greedy action tensor([ 0.5034, -0.1711, -0.1111, -0.0053]) tensor([0.3771, 0.1921, 0.2040, 0.2268]) -Greedy action tensor([ 0.6833, -0.6629, -0.1819, -0.1334]) tensor([0.4710, 0.1226, 0.1983, 0.2081]) -Greedy action tensor([ 0.5067, -0.1156, 0.2636, -0.4609]) tensor([0.3703, 0.1987, 0.2903, 0.1407]) -Greedy action tensor([ 0.4497, -0.4856, -0.0406, -0.3280]) tensor([0.4058, 0.1593, 0.2485, 0.1864]) -Greedy action tensor([ 0.9215, -0.6034, 0.1035, -0.3918]) tensor([0.5187, 0.1129, 0.2289, 0.1395]) -Greedy action tensor([ 0.3287, 0.0014, -0.0851, -0.0093]) tensor([0.3231, 0.2329, 0.2136, 0.2304]) -Greedy action tensor([ 1.1623, -1.0394, 0.1403, -0.5330]) tensor([0.6046, 0.0669, 0.2176, 0.1110]) -Greedy action tensor([ 1.0129, -0.4872, -0.0068, -0.1892]) tensor([0.5307, 0.1184, 0.1914, 0.1595]) -Greedy action tensor([ 0.6422, -0.4588, -0.0901, -0.5238]) tensor([0.4706, 0.1565, 0.2263, 0.1466]) -Greedy action tensor([ 0.8777, -0.3088, -0.0278, -0.1839]) tensor([0.4865, 0.1485, 0.1967, 0.1683]) -Greedy action tensor([-1.9217, -0.4438, 0.6523, -0.1677]) tensor([0.0412, 0.1805, 0.5403, 0.2380]) -Greedy action tensor([-1.7322, -0.2884, 0.5985, 0.0022]) tensor([0.0472, 0.2000, 0.4854, 0.2674]) -Greedy action tensor([-1.9059, -0.4202, 0.6430, -0.1578]) tensor([0.0417, 0.1844, 0.5340, 0.2398]) -Greedy action tensor([-1.1910, -0.4886, 0.3607, -0.0069]) tensor([0.0909, 0.1834, 0.4288, 0.2969]) -Greedy action tensor([-1.4678, -0.5960, 0.4054, 0.1391]) tensor([0.0672, 0.1606, 0.4372, 0.3350]) -Greedy action tensor([-1.8351, -0.4599, 0.6158, -0.1217]) tensor([0.0452, 0.1790, 0.5248, 0.2510]) -Greedy action tensor([-1.8535, -0.4604, 0.6384, -0.1239]) tensor([0.0440, 0.1770, 0.5312, 0.2478]) -Greedy action tensor([-1.9155, -0.4507, 0.6573, -0.1636]) tensor([0.0413, 0.1788, 0.5415, 0.2383]) -Greedy action tensor([-1.4414, -0.4702, 0.5914, 0.3885]) tensor([0.0571, 0.1508, 0.4361, 0.3560]) -Greedy action tensor([-1.9291, -0.4350, 0.6605, -0.1687]) tensor([0.0407, 0.1811, 0.5418, 0.2364]) -Greedy action tensor([-1.7381, -0.2905, 0.5834, -0.0363]) tensor([0.0478, 0.2032, 0.4869, 0.2620]) -Greedy action tensor([-1.4609, 0.1956, 0.3762, -0.0105]) tensor([0.0596, 0.3123, 0.3741, 0.2541]) -Greedy action tensor([-1.9253, -0.4573, 0.6707, -0.1684]) tensor([0.0407, 0.1769, 0.5463, 0.2361]) -Greedy action tensor([-1.8655, -0.4012, 0.6285, -0.1328]) tensor([0.0433, 0.1873, 0.5245, 0.2450]) -Greedy action tensor([-1.6188, -0.2758, 0.5800, 0.0799]) tensor([0.0518, 0.1984, 0.4668, 0.2831]) -Greedy action tensor([-1.8992, -0.4602, 0.6521, -0.1552]) tensor([0.0421, 0.1775, 0.5397, 0.2407]) -Greedy action tensor([-1.8579, -0.4354, 0.6267, -0.1318]) tensor([0.0439, 0.1822, 0.5270, 0.2468]) -Greedy action tensor([-1.7284, -0.5164, 0.5879, -0.0464]) tensor([0.0503, 0.1691, 0.5101, 0.2705]) -Greedy action tensor([-1.9316, -0.4453, 0.6570, -0.1750]) tensor([0.0408, 0.1802, 0.5428, 0.2362]) -Greedy action tensor([0.3472, 1.0477, 0.0949, 0.8285]) tensor([0.1848, 0.3724, 0.1436, 0.2991]) -Greedy action tensor([-1.4152, -0.5153, 0.4827, 0.3755]) tensor([0.0620, 0.1525, 0.4138, 0.3717]) -Greedy action tensor([-1.9086, -0.4275, 0.6447, -0.1642]) tensor([0.0417, 0.1835, 0.5361, 0.2387]) -Greedy action tensor([-1.4121, -0.5750, 0.3689, 0.1692]) tensor([0.0709, 0.1637, 0.4208, 0.3446]) -Greedy action tensor([-1.7224, -0.3236, 0.5370, -0.0391]) tensor([0.0500, 0.2024, 0.4786, 0.2690]) -Greedy action tensor([-0.9429, 0.8641, 0.0989, 0.3620]) tensor([0.0735, 0.4475, 0.2082, 0.2709]) -Greedy action tensor([-1.9297, -0.4430, 0.6600, -0.1719]) tensor([0.0407, 0.1802, 0.5429, 0.2363]) -Greedy action tensor([-1.8499, -0.3356, 0.6185, -0.1282]) tensor([0.0436, 0.1982, 0.5144, 0.2438]) -Greedy action tensor([-1.9120, -0.4383, 0.6479, -0.1650]) tensor([0.0416, 0.1816, 0.5381, 0.2387]) -Greedy action tensor([-1.8308, -0.4458, 0.6214, -0.1205]) tensor([0.0452, 0.1804, 0.5246, 0.2498]) -Greedy action tensor([-1.1147, -0.6251, 0.2343, 0.3154]) tensor([0.0938, 0.1530, 0.3614, 0.3919]) -Greedy action tensor([-1.9037, -0.4417, 0.6451, -0.1557]) tensor([0.0419, 0.1809, 0.5363, 0.2408]) -Greedy action tensor([-0.7501, -0.3769, 0.1929, 0.9733]) tensor([0.0941, 0.1367, 0.2417, 0.5275]) -Greedy action tensor([-1.1232, -0.5226, 1.0199, 1.2718]) tensor([0.0448, 0.0817, 0.3820, 0.4915]) -Greedy action tensor([-1.9194, -0.4197, 0.6562, -0.1667]) tensor([0.0410, 0.1837, 0.5387, 0.2366]) -Greedy action tensor([-1.9334, -0.4339, 0.6603, -0.1720]) tensor([0.0405, 0.1815, 0.5421, 0.2359]) -Greedy action tensor([-1.9043, -0.4740, 0.7050, -0.1071]) tensor([0.0403, 0.1685, 0.5479, 0.2432]) -Greedy action tensor([-1.9341, -0.4305, 0.6622, -0.1701]) tensor([0.0404, 0.1817, 0.5420, 0.2358]) -Greedy action tensor([-1.8447, -0.4335, 0.6449, -0.0930]) tensor([0.0436, 0.1789, 0.5260, 0.2515]) -Greedy action tensor([-1.8525, -0.4282, 0.6220, -0.1311]) tensor([0.0442, 0.1837, 0.5249, 0.2472]) -Greedy action tensor([-1.8904, -0.3165, 0.6290, -0.1497]) tensor([0.0418, 0.2015, 0.5187, 0.2381]) -Greedy action tensor([-0.9190, -0.5673, 0.3359, 0.1315]) tensor([0.1138, 0.1618, 0.3991, 0.3253]) -Greedy action tensor([-1.6502, 0.3605, 0.3754, 0.0125]) tensor([0.0469, 0.3503, 0.3555, 0.2473]) -Greedy action tensor([-1.8411, -0.4492, 0.7071, -0.0291]) tensor([0.0418, 0.1681, 0.5343, 0.2559]) -Greedy action tensor([-0.5465, -0.4152, 0.1352, 0.1228]) tensor([0.1647, 0.1878, 0.3257, 0.3217]) -Greedy action tensor([-1.9134, -0.4473, 0.6574, -0.1535]) tensor([0.0413, 0.1789, 0.5399, 0.2400]) -Greedy action tensor([-1.9204, -0.4176, 0.6468, -0.1676]) tensor([0.0412, 0.1850, 0.5363, 0.2375]) -Greedy action tensor([-1.9144, -0.3902, 0.6495, -0.1625]) tensor([0.0411, 0.1886, 0.5335, 0.2369]) -Greedy action tensor([-1.8694, -0.4570, 0.6614, -0.1139]) tensor([0.0426, 0.1750, 0.5356, 0.2467]) -Greedy action tensor([-1.9036, -0.2649, 0.6188, -0.1640]) tensor([0.0411, 0.2119, 0.5126, 0.2344]) -Greedy action tensor([-1.5656, -0.5219, 0.7542, -0.3004]) tensor([0.0570, 0.1618, 0.5794, 0.2018]) -Greedy action tensor([-1.9067, -0.4452, 0.6467, -0.1685]) tensor([0.0419, 0.1808, 0.5388, 0.2385]) -Greedy action tensor([-1.9362, -0.4366, 0.6638, -0.1768]) tensor([0.0404, 0.1810, 0.5439, 0.2347]) -Greedy action tensor([-1.5433, -0.4271, 0.7511, 0.3819]) tensor([0.0480, 0.1466, 0.4762, 0.3292]) -Greedy action tensor([-1.8522, -0.1864, 0.5830, -0.1037]) tensor([0.0426, 0.2255, 0.4868, 0.2450]) -Greedy action tensor([-1.8874, -0.4579, 0.6429, -0.1496]) tensor([0.0427, 0.1783, 0.5362, 0.2427]) -Greedy action tensor([-0.8355, 0.8822, 0.0991, 0.1632]) tensor([0.0845, 0.4709, 0.2152, 0.2294]) -Greedy action tensor([-1.9050, -0.4431, 0.6438, -0.1649]) tensor([0.0420, 0.1812, 0.5374, 0.2394]) -Greedy action tensor([-1.9370, -0.4476, 0.6613, -0.1776]) tensor([0.0405, 0.1796, 0.5445, 0.2353]) -Greedy action tensor([-1.3601, -0.4625, 0.6550, 0.5963]) tensor([0.0555, 0.1361, 0.4161, 0.3924]) -Greedy action tensor([-1.4600, -0.4803, 0.4458, 0.1172]) tensor([0.0657, 0.1749, 0.4416, 0.3179]) -Greedy action tensor([-1.8332, -0.4036, 0.6179, -0.1095]) tensor([0.0447, 0.1866, 0.5183, 0.2504]) -Greedy action tensor([-1.7020, -0.4103, 0.5958, 0.0288]) tensor([0.0494, 0.1798, 0.4918, 0.2790]) -Greedy action tensor([-1.6574, -0.6269, 0.7860, 0.0307]) tensor([0.0483, 0.1352, 0.5555, 0.2610]) -Greedy action tensor([-1.9160, -0.4426, 0.6500, -0.1685]) tensor([0.0415, 0.1809, 0.5396, 0.2380]) -Greedy action tensor([-1.8690, -0.4468, 0.6290, -0.1396]) tensor([0.0436, 0.1807, 0.5300, 0.2457]) -Greedy action tensor([-1.6677, -0.5425, 0.5241, -0.0141]) tensor([0.0548, 0.1687, 0.4903, 0.2862]) -Greedy action tensor([-1.7484, -0.4315, 0.5723, -0.0661]) tensor([0.0493, 0.1839, 0.5018, 0.2650]) -Greedy action tensor([ 0.7877, -0.3549, 0.3145, 0.7205]) tensor([0.3476, 0.1109, 0.2166, 0.3250]) -Greedy action tensor([-1.5010, -0.1349, 0.5590, -0.4970]) tensor([0.0645, 0.2530, 0.5063, 0.1761]) -Greedy action tensor([-1.8813, -0.4338, 0.6317, -0.1511]) tensor([0.0430, 0.1830, 0.5312, 0.2428]) -Greedy action tensor([-0.4630, -0.1040, 0.1542, 0.1557]) tensor([0.1628, 0.2331, 0.3018, 0.3023]) -Greedy action tensor([-1.6789, 0.2817, 0.5755, -0.4084]) tensor([0.0472, 0.3352, 0.4496, 0.1681]) -Greedy action tensor([-0.6573, -0.4584, -0.1737, 0.0507]) tensor([0.1703, 0.2078, 0.2762, 0.3457]) -Greedy action tensor([-1.8542, -0.4385, 0.5840, -0.1479]) tensor([0.0453, 0.1866, 0.5187, 0.2495]) -Greedy action tensor([-0.1549, 0.7878, 0.2359, 0.6925]) tensor([0.1355, 0.3479, 0.2003, 0.3163]) -Greedy action tensor([-1.9172, -0.4068, 0.6515, -0.1584]) tensor([0.0410, 0.1857, 0.5352, 0.2381]) -Greedy action tensor([-0.5171, 0.0447, 0.7299, 0.9703]) tensor([0.0938, 0.1645, 0.3265, 0.4152]) -Greedy action tensor([-1.9355, -0.4405, 0.6626, -0.1752]) tensor([0.0405, 0.1805, 0.5438, 0.2353]) -Greedy action tensor([-1.9188, -0.4395, 0.6510, -0.1711]) tensor([0.0413, 0.1814, 0.5399, 0.2373]) -Greedy action tensor([-0.0996, 0.1017, 0.2361, 0.4095]) tensor([0.1892, 0.2314, 0.2647, 0.3148]) -Greedy action tensor([-0.3346, 1.0261, -0.0370, 0.5073]) tensor([0.1167, 0.4552, 0.1572, 0.2709]) -Greedy action tensor([ 1.5243, -0.5613, -0.4947, 0.4981]) tensor([0.6191, 0.0769, 0.0822, 0.2218]) -Greedy action tensor([ 1.4159, -0.4536, -0.1966, 0.4189]) tensor([0.5805, 0.0895, 0.1158, 0.2142]) -Greedy action tensor([ 1.1810, -0.2462, -0.3092, 0.3201]) tensor([0.5296, 0.1271, 0.1193, 0.2239]) -Greedy action tensor([ 1.2069, -0.3700, -0.2077, 0.5537]) tensor([0.5076, 0.1049, 0.1234, 0.2641]) -Greedy action tensor([ 1.5402, -0.2427, -0.5462, 0.3850]) tensor([0.6222, 0.1046, 0.0772, 0.1960]) -Greedy action tensor([ 1.2947, -0.0943, -0.5876, 0.3861]) tensor([0.5541, 0.1382, 0.0844, 0.2234]) -Greedy action tensor([ 1.7253, -0.9542, -0.1474, 0.0824]) tensor([0.7063, 0.0485, 0.1086, 0.1366]) -Greedy action tensor([ 0.5039, -0.0686, 0.2846, -0.1314]) tensor([0.3452, 0.1947, 0.2772, 0.1829]) -Greedy action tensor([ 1.7798, -0.9796, -0.2644, 0.6092]) tensor([0.6653, 0.0421, 0.0861, 0.2064]) -Greedy action tensor([ 1.3128, -0.3840, -0.0711, 0.1482]) tensor([0.5728, 0.1050, 0.1435, 0.1787]) -Greedy action tensor([ 1.6542, -0.8844, -0.0986, 0.6245]) tensor([0.6214, 0.0491, 0.1077, 0.2219]) -Greedy action tensor([ 1.3167, -0.1515, -0.8162, 0.7826]) tensor([0.5168, 0.1190, 0.0612, 0.3030]) -Greedy action tensor([ 1.0114, -0.2660, -0.3212, 0.2604]) tensor([0.4964, 0.1384, 0.1309, 0.2343]) -Greedy action tensor([ 2.0852, -1.3405, -0.5448, 0.3094]) tensor([0.7850, 0.0255, 0.0566, 0.1329]) -Greedy action tensor([ 1.3791, 0.2791, -0.1618, 0.3948]) tensor([0.5206, 0.1733, 0.1115, 0.1946]) -Greedy action tensor([ 1.3469, -0.7183, -0.0124, 0.6226]) tensor([0.5352, 0.0679, 0.1375, 0.2594]) -Greedy action tensor([ 1.4213, -0.6750, -0.1047, 0.0771]) tensor([0.6246, 0.0768, 0.1358, 0.1629]) -Greedy action tensor([1.7071, 0.0586, 0.0934, 0.5161]) tensor([0.5898, 0.1134, 0.1175, 0.1793]) -Greedy action tensor([ 1.9065, -0.6336, -0.3582, 0.6127]) tensor([0.6864, 0.0541, 0.0713, 0.1882]) -Greedy action tensor([ 0.7492, -0.2171, -0.3737, 0.5172]) tensor([0.4002, 0.1523, 0.1302, 0.3173]) -Greedy action tensor([ 1.6090, 0.1996, -0.0483, 0.4088]) tensor([0.5760, 0.1407, 0.1098, 0.1735]) -Greedy action tensor([ 0.8181, -0.2112, -0.1248, 0.0808]) tensor([0.4494, 0.1606, 0.1750, 0.2150]) -Greedy action tensor([ 1.0389, -0.3575, -0.2615, 0.2706]) tensor([0.5041, 0.1248, 0.1373, 0.2338]) -Greedy action tensor([ 1.7509, -0.4061, -0.6583, 0.5713]) tensor([0.6609, 0.0765, 0.0594, 0.2032]) -Greedy action tensor([ 2.3233, -1.1781, -0.3046, 0.5002]) tensor([0.7912, 0.0239, 0.0571, 0.1278]) -Greedy action tensor([ 0.5104, -0.2798, 0.0687, -0.0030]) tensor([0.3710, 0.1684, 0.2386, 0.2220]) -Greedy action tensor([ 1.4516, -0.3996, -0.6297, 0.3645]) tensor([0.6177, 0.0970, 0.0771, 0.2083]) -Greedy action tensor([ 1.2387, -0.4407, -0.2958, 0.3904]) tensor([0.5464, 0.1019, 0.1178, 0.2339]) -Greedy action tensor([ 1.5689, -0.5217, -0.4702, 0.1784]) tensor([0.6655, 0.0823, 0.0866, 0.1657]) -Greedy action tensor([ 0.8360, 0.1274, -0.0220, 0.6039]) tensor([0.3691, 0.1817, 0.1565, 0.2927]) -Greedy action tensor([ 1.4752, -0.8270, 0.3726, 0.5675]) tensor([0.5448, 0.0545, 0.1809, 0.2198]) -Greedy action tensor([ 1.4967, -0.7657, -0.3313, -0.0192]) tensor([0.6736, 0.0701, 0.1083, 0.1479]) -Greedy action tensor([ 1.8277, -0.3620, -0.2841, -0.0033]) tensor([0.7178, 0.0804, 0.0869, 0.1150]) -Greedy action tensor([ 2.2605, -0.6895, 0.0436, 0.6543]) tensor([0.7343, 0.0384, 0.0800, 0.1473]) -Greedy action tensor([ 0.3877, -0.1841, -0.0772, 0.1003]) tensor([0.3398, 0.1918, 0.2135, 0.2549]) -Greedy action tensor([ 1.9404, -0.4202, -0.4081, 0.2983]) tensor([0.7228, 0.0682, 0.0690, 0.1399]) -Greedy action tensor([ 1.8620, -0.3886, -0.7775, 0.2157]) tensor([0.7302, 0.0769, 0.0521, 0.1407]) -Greedy action tensor([ 1.1889, -0.5959, -0.0873, 0.0095]) tensor([0.5700, 0.0957, 0.1591, 0.1753]) -Greedy action tensor([ 1.9734, -0.5657, -0.2227, 0.7573]) tensor([0.6727, 0.0531, 0.0748, 0.1994]) -Greedy action tensor([ 1.8935, -0.5384, -0.1970, 0.3722]) tensor([0.6993, 0.0615, 0.0865, 0.1528]) -Greedy action tensor([ 1.1527, -0.2135, -0.5879, 0.3775]) tensor([0.5288, 0.1349, 0.0928, 0.2436]) -Greedy action tensor([ 0.9960, -0.3762, -0.1227, 0.1810]) tensor([0.4944, 0.1253, 0.1615, 0.2188]) -Greedy action tensor([ 1.3123, -0.7038, -0.3610, 0.4209]) tensor([0.5777, 0.0769, 0.1084, 0.2369]) -Greedy action tensor([ 1.6927, -0.4191, -0.4802, 0.2165]) tensor([0.6833, 0.0827, 0.0778, 0.1562]) -Greedy action tensor([ 1.4390, -0.3837, -0.5451, 0.6185]) tensor([0.5750, 0.0929, 0.0791, 0.2531]) -Greedy action tensor([ 1.5681, -0.0238, -0.5067, 0.2312]) tensor([0.6282, 0.1279, 0.0789, 0.1650]) -Greedy action tensor([ 2.0058, -0.9578, -0.3234, 0.2916]) tensor([0.7524, 0.0388, 0.0733, 0.1355]) -Greedy action tensor([ 2.0778, -0.4417, -0.2667, 0.1528]) tensor([0.7563, 0.0609, 0.0725, 0.1103]) -Greedy action tensor([ 1.5493, -0.6449, -0.1342, 0.4241]) tensor([0.6166, 0.0687, 0.1145, 0.2001]) -Greedy action tensor([ 1.8408, -0.0611, -0.8218, 0.4916]) tensor([0.6764, 0.1010, 0.0472, 0.1755]) -Greedy action tensor([ 1.2735, -0.5640, -0.3616, 0.3158]) tensor([0.5754, 0.0916, 0.1122, 0.2208]) -Greedy action tensor([ 1.2369, -0.6088, -0.3762, 0.5820]) tensor([0.5329, 0.0841, 0.1062, 0.2768]) -Greedy action tensor([ 0.6229, -0.5043, -0.2433, 0.3124]) tensor([0.4036, 0.1307, 0.1697, 0.2959]) -Greedy action tensor([ 1.5793, -0.2460, -0.4073, 0.4749]) tensor([0.6136, 0.0989, 0.0842, 0.2034]) -Greedy action tensor([ 1.5658, -0.6266, -0.3810, 0.6599]) tensor([0.6029, 0.0673, 0.0861, 0.2437]) -Greedy action tensor([ 2.1539, -1.1064, -0.5492, 0.6101]) tensor([0.7582, 0.0291, 0.0508, 0.1619]) -Greedy action tensor([ 1.0802, -0.6437, -0.3495, 0.6784]) tensor([0.4792, 0.0855, 0.1147, 0.3206]) -Greedy action tensor([ 1.3871, -0.0549, -0.7065, 0.2711]) tensor([0.5927, 0.1401, 0.0730, 0.1941]) -Greedy action tensor([ 1.2793, -0.4894, -0.1624, 0.2417]) tensor([0.5677, 0.0968, 0.1343, 0.2012]) -Greedy action tensor([ 2.2208, -1.1609, -0.2346, 0.4587]) tensor([0.7743, 0.0263, 0.0665, 0.1329]) -Greedy action tensor([ 2.0463, -0.5360, -0.3612, 0.1810]) tensor([0.7573, 0.0572, 0.0682, 0.1173]) -Greedy action tensor([ 1.4805, -0.4257, -0.0390, 0.2545]) tensor([0.6021, 0.0895, 0.1317, 0.1767]) -Greedy action tensor([ 1.7144, -0.7103, -0.2759, 0.2655]) tensor([0.6849, 0.0606, 0.0936, 0.1608]) -Greedy action tensor([ 1.0536, -0.1210, -0.3477, -0.1565]) tensor([0.5395, 0.1667, 0.1329, 0.1609]) -Greedy action tensor([ 1.4272, -0.5663, -0.7819, 0.7408]) tensor([0.5716, 0.0779, 0.0628, 0.2877]) -Greedy action tensor([ 1.5530, -0.7508, -0.2485, 0.2957]) tensor([0.6454, 0.0645, 0.1065, 0.1836]) -Greedy action tensor([ 1.3130, 0.0161, -0.2282, -0.0376]) tensor([0.5725, 0.1565, 0.1226, 0.1483]) -Greedy action tensor([ 1.7031, -0.4953, -0.5493, 0.6295]) tensor([0.6419, 0.0712, 0.0675, 0.2194]) -Greedy action tensor([ 2.4690, -1.3268, -0.4482, 0.9526]) tensor([0.7716, 0.0173, 0.0417, 0.1694]) -Greedy action tensor([ 2.1298, -0.4468, -0.1488, 0.6412]) tensor([0.7122, 0.0541, 0.0730, 0.1607]) -Greedy action tensor([ 1.1705, -0.7642, -0.2742, 0.5697]) tensor([0.5185, 0.0749, 0.1223, 0.2843]) -Greedy action tensor([ 1.4874, -0.2821, -0.5060, 0.4169]) tensor([0.6063, 0.1033, 0.0826, 0.2078]) -Greedy action tensor([ 0.6773, -0.4238, 0.0095, 0.2218]) tensor([0.4033, 0.1341, 0.2068, 0.2558]) -Greedy action tensor([ 2.4243, -0.7606, -0.5234, 0.5905]) tensor([0.7977, 0.0330, 0.0418, 0.1275]) -Greedy action tensor([ 1.6564, -0.7261, -0.2029, 0.5735]) tensor([0.6302, 0.0582, 0.0982, 0.2134]) -Greedy action tensor([ 1.5666, -0.8075, -0.2706, 0.4487]) tensor([0.6332, 0.0589, 0.1008, 0.2070]) -Greedy action tensor([ 0.9957, -0.4171, -0.2340, -0.0772]) tensor([0.5325, 0.1296, 0.1557, 0.1821]) -Greedy action tensor([ 1.4847, 0.1976, -0.0204, 0.7823]) tensor([0.5016, 0.1385, 0.1114, 0.2485]) -Greedy action tensor([ 1.4806, -0.7088, -0.2324, 0.0807]) tensor([0.6498, 0.0728, 0.1172, 0.1603]) -Greedy action tensor([ 1.3469, -0.5389, -0.6614, 0.3789]) tensor([0.6003, 0.0911, 0.0806, 0.2280]) -Greedy action tensor([ 1.6485, -1.0040, -0.2758, 0.4021]) tensor([0.6649, 0.0469, 0.0971, 0.1912]) -Greedy action tensor([-0.9011, 0.1214, 0.8525, -0.8068]) tensor([0.0939, 0.2609, 0.5421, 0.1031]) -Greedy action tensor([1.2241, 0.5096, 0.1061, 0.7001]) tensor([0.4152, 0.2032, 0.1357, 0.2459]) -Greedy action tensor([-0.1490, 0.3005, -0.6639, 0.1843]) tensor([0.2193, 0.3437, 0.1310, 0.3060]) -Greedy action tensor([-0.0982, -0.5404, 0.5811, -0.3083]) tensor([0.2260, 0.1452, 0.4457, 0.1831]) -Greedy action tensor([ 0.7304, 0.0466, -0.3558, 0.0103]) tensor([0.4294, 0.2167, 0.1449, 0.2090]) -Greedy action tensor([ 0.0724, -1.8192, 0.5853, -0.5356]) tensor([0.2971, 0.0448, 0.4963, 0.1618]) -Greedy action tensor([-0.6649, -1.1790, 0.1278, 0.6605]) tensor([0.1321, 0.0790, 0.2918, 0.4971]) -Greedy action tensor([ 0.3854, -1.5442, 0.4121, 1.7871]) tensor([0.1604, 0.0233, 0.1647, 0.6516]) -Greedy action tensor([ 3.8545e-01, -2.2927e-03, 2.3120e+00, -1.5865e-01]) tensor([0.1096, 0.0744, 0.7524, 0.0636]) -Greedy action tensor([-0.1037, -0.4679, 1.4126, -0.6414]) tensor([0.1463, 0.1017, 0.6666, 0.0855]) -Greedy action tensor([ 0.2835, -1.7207, -0.5795, 0.7209]) tensor([0.3220, 0.0434, 0.1359, 0.4987]) -Greedy action tensor([ 1.3546, -0.6841, -0.2686, 0.8394]) tensor([0.5195, 0.0676, 0.1025, 0.3104]) -Greedy action tensor([ 0.6255, -1.8335, 0.0028, 0.7683]) tensor([0.3603, 0.0308, 0.1933, 0.4156]) -Greedy action tensor([ 1.4865, -0.6625, 0.2724, 0.1544]) tensor([0.5961, 0.0695, 0.1770, 0.1573]) -Greedy action tensor([ 0.3536, 0.6213, 0.9429, -0.5655]) tensor([0.2218, 0.2899, 0.3998, 0.0885]) -Greedy action tensor([ 0.8719, -0.0748, 0.2448, 1.0117]) tensor([0.3255, 0.1263, 0.1739, 0.3743]) -Greedy action tensor([ 0.3170, -1.0449, 0.5703, 0.3031]) tensor([0.2832, 0.0726, 0.3649, 0.2793]) -Greedy action tensor([ 0.8865, 0.0987, 1.4994, -0.1274]) tensor([0.2730, 0.1242, 0.5039, 0.0990]) -Greedy action tensor([ 2.0815, -0.9834, 0.4504, 0.7457]) tensor([0.6643, 0.0310, 0.1300, 0.1747]) -Greedy action tensor([ 0.1612, -1.1126, 0.2137, 1.5274]) tensor([0.1599, 0.0447, 0.1685, 0.6269]) -Greedy action tensor([ 1.0477, 0.6811, 0.9964, -0.2415]) tensor([0.3426, 0.2375, 0.3255, 0.0944]) -Greedy action tensor([ 0.1468, -0.6745, -0.9858, 0.4253]) tensor([0.3243, 0.1427, 0.1045, 0.4285]) -Greedy action tensor([ 0.1727, -1.3526, 0.6566, 0.1107]) tensor([0.2646, 0.0576, 0.4292, 0.2487]) -Greedy action tensor([-0.7403, -0.9813, -0.1086, 0.4451]) tensor([0.1441, 0.1132, 0.2711, 0.4716]) -Greedy action tensor([-0.6142, -0.8247, 0.2248, -0.0398]) tensor([0.1695, 0.1373, 0.3922, 0.3010]) -Greedy action tensor([ 0.9877, -0.2062, -0.5033, 2.3101]) tensor([0.1894, 0.0574, 0.0426, 0.7106]) -Greedy action tensor([ 0.2292, -1.4100, 0.1328, 0.1694]) tensor([0.3285, 0.0638, 0.2983, 0.3094]) -Greedy action tensor([-0.5441, -0.9290, 0.4192, 0.4215]) tensor([0.1444, 0.0982, 0.3783, 0.3791]) -Greedy action tensor([ 0.2819, -1.7987, 0.2009, 0.0863]) tensor([0.3485, 0.0435, 0.3214, 0.2866]) -Greedy action tensor([ 0.6045, -0.0205, -0.7295, 0.5951]) tensor([0.3585, 0.1919, 0.0944, 0.3552]) -Greedy action tensor([-0.2041, -1.1743, -1.2584, 1.5306]) tensor([0.1352, 0.0513, 0.0471, 0.7664]) -Greedy action tensor([ 2.1907, -0.7075, 1.1041, 0.1860]) tensor([0.6548, 0.0361, 0.2209, 0.0882]) -Greedy action tensor([-0.0385, -0.3215, -0.5092, -0.3350]) tensor([0.3204, 0.2414, 0.2001, 0.2382]) -Greedy action tensor([-0.7522, -2.4054, 0.5093, 0.1045]) tensor([0.1413, 0.0270, 0.4989, 0.3328]) -Greedy action tensor([-0.1227, -0.3519, -0.1448, -0.3035]) tensor([0.2772, 0.2204, 0.2711, 0.2313]) -Greedy action tensor([ 1.0977, -0.0781, 0.1091, 0.0519]) tensor([0.4921, 0.1518, 0.1831, 0.1729]) -Greedy action tensor([-1.3899, -0.0273, -0.3373, -0.3498]) tensor([0.0943, 0.3685, 0.2703, 0.2669]) -Greedy action tensor([ 0.4119, -0.3180, 0.4997, -0.0272]) tensor([0.3107, 0.1497, 0.3392, 0.2003]) -Greedy action tensor([ 0.6317, -0.7416, -0.5359, -0.3674]) tensor([0.5174, 0.1311, 0.1610, 0.1905]) -Greedy action tensor([ 0.4814, -1.4555, -0.1085, 0.9413]) tensor([0.3046, 0.0439, 0.1689, 0.4825]) -Greedy action tensor([-1.0059, -1.0861, 0.6742, -0.1784]) tensor([0.1044, 0.0964, 0.5603, 0.2389]) -Greedy action tensor([ 1.5287, -0.7348, 0.8089, 0.0999]) tensor([0.5463, 0.0568, 0.2660, 0.1309]) -Greedy action tensor([ 0.3355, -0.4924, 0.6183, 0.8950]) tensor([0.2215, 0.0968, 0.2940, 0.3877]) -Greedy action tensor([ 0.2265, -2.1283, -0.3440, 1.1867]) tensor([0.2341, 0.0222, 0.1323, 0.6114]) -Greedy action tensor([0.0441, 0.1227, 0.5442, 0.8526]) tensor([0.1674, 0.1811, 0.2760, 0.3756]) -Greedy action tensor([ 0.5312, -0.8070, 1.0869, 0.2232]) tensor([0.2673, 0.0701, 0.4660, 0.1965]) -Greedy action tensor([-0.2645, -0.5562, 0.6265, -0.2794]) tensor([0.1934, 0.1445, 0.4715, 0.1906]) -Greedy action tensor([ 0.7840, -1.4673, 0.1936, 0.9202]) tensor([0.3565, 0.0375, 0.1975, 0.4085]) -Greedy action tensor([ 0.6363, -0.0340, 1.1274, -0.0896]) tensor([0.2755, 0.1409, 0.4502, 0.1333]) -Greedy action tensor([ 0.0888, -1.2819, -0.4221, 0.1004]) tensor([0.3490, 0.0886, 0.2094, 0.3530]) -Greedy action tensor([ 0.3695, -0.2902, 0.2978, 1.3762]) tensor([0.1929, 0.0997, 0.1795, 0.5279]) -Greedy action tensor([ 1.5423, -0.1433, 0.6584, 0.8917]) tensor([0.4716, 0.0874, 0.1949, 0.2461]) -Greedy action tensor([ 0.8832, 0.0489, 1.4199, -0.7488]) tensor([0.2994, 0.1300, 0.5121, 0.0585]) -Greedy action tensor([-0.2194, 0.9256, -0.1094, -0.2393]) tensor([0.1603, 0.5037, 0.1789, 0.1571]) -Greedy action tensor([-0.0059, -0.6249, 0.0985, -0.4044]) tensor([0.3012, 0.1622, 0.3344, 0.2022]) -Greedy action tensor([ 1.2363, -1.4518, -0.0613, 0.2805]) tensor([0.5795, 0.0394, 0.1583, 0.2228]) -Greedy action tensor([ 0.3813, -0.3495, 0.4548, 0.9224]) tensor([0.2339, 0.1126, 0.2517, 0.4018]) -Greedy action tensor([-0.1821, -0.9495, 0.2283, -0.1389]) tensor([0.2490, 0.1156, 0.3754, 0.2600]) -Greedy action tensor([ 0.2030, 0.5740, 0.0823, -0.0358]) tensor([0.2425, 0.3515, 0.2150, 0.1910]) -Greedy action tensor([ 0.1074, 0.2469, -0.4435, 0.4798]) tensor([0.2394, 0.2752, 0.1380, 0.3474]) -Greedy action tensor([-0.4224, -0.2419, 1.3553, -0.9410]) tensor([0.1148, 0.1375, 0.6793, 0.0684]) -Greedy action tensor([ 0.7165, -2.2749, 0.0353, 0.3542]) tensor([0.4440, 0.0223, 0.2247, 0.3090]) -Greedy action tensor([ 0.5502, 0.0378, 1.5799, -0.1211]) tensor([0.2037, 0.1220, 0.5703, 0.1041]) -Greedy action tensor([-0.0889, -1.2048, 1.5134, 1.0296]) tensor([0.1069, 0.0350, 0.5308, 0.3272]) -Greedy action tensor([ 0.2942, -1.5002, 0.0992, 0.6137]) tensor([0.2971, 0.0494, 0.2445, 0.4090]) -Greedy action tensor([-0.3022, 0.1105, -0.0820, -0.6553]) tensor([0.2242, 0.3388, 0.2795, 0.1575]) -Greedy action tensor([ 0.4599, 0.4436, 0.2073, -0.5530]) tensor([0.3201, 0.3149, 0.2487, 0.1163]) -Greedy action tensor([-0.3426, -0.4157, 0.8105, -1.6216]) tensor([0.1860, 0.1729, 0.5893, 0.0518]) -Greedy action tensor([ 0.3002, -1.7389, 0.4148, 0.1483]) tensor([0.3215, 0.0418, 0.3605, 0.2762]) -Greedy action tensor([-0.5389, -1.6513, 0.6637, -0.6198]) tensor([0.1792, 0.0589, 0.5966, 0.1653]) -Greedy action tensor([ 0.9943, 0.0336, 0.8159, -0.1771]) tensor([0.3954, 0.1513, 0.3308, 0.1225]) -Greedy action tensor([ 1.0308, -0.0447, -0.2273, 0.8504]) tensor([0.4065, 0.1387, 0.1155, 0.3394]) -Greedy action tensor([ 0.5195, 0.1573, 0.1254, -0.1998]) tensor([0.3499, 0.2436, 0.2360, 0.1705]) -Greedy action tensor([-0.9816, -0.0625, -0.4246, -0.6537]) tensor([0.1506, 0.3775, 0.2628, 0.2090]) -Greedy action tensor([-0.5785, -0.1030, 1.4726, -0.7251]) tensor([0.0889, 0.1430, 0.6913, 0.0768]) -Greedy action tensor([ 0.0423, 0.0737, 0.1525, -0.5473]) tensor([0.2700, 0.2787, 0.3015, 0.1498]) -Greedy action tensor([-9.7457e-01, -3.7462e-04, 5.9777e-01, 1.6192e-01]) tensor([0.0863, 0.2287, 0.4160, 0.2690]) -Greedy action tensor([ 0.1783, -2.1125, 0.0046, 0.9061]) tensor([0.2492, 0.0252, 0.2095, 0.5160]) -Greedy action tensor([-1.3100, 0.2674, 1.1420, -1.2472]) tensor([0.0540, 0.2615, 0.6270, 0.0575]) -Greedy action tensor([ 0.3549, -0.8840, 1.3541, 0.8202]) tensor([0.1786, 0.0518, 0.4852, 0.2845]) -Greedy action tensor([-0.9976, -0.3713, 0.3072, -0.2212]) tensor([0.1145, 0.2142, 0.4223, 0.2489]) -Greedy action tensor([ 0.5412, -0.1708, -0.1113, -0.0642]) tensor([0.3910, 0.1919, 0.2036, 0.2135]) -Greedy action tensor([ 1.0926, -0.9375, 0.0135, -0.5512]) tensor([0.6008, 0.0789, 0.2042, 0.1161]) -Greedy action tensor([0.1204, 0.2814, 0.0826, 0.0855]) tensor([0.2437, 0.2863, 0.2347, 0.2354]) -Greedy action tensor([ 0.7152, -0.2535, -0.0373, -0.0798]) tensor([0.4343, 0.1649, 0.2046, 0.1961]) -Greedy action tensor([ 0.7076, 0.1362, -0.1575, 0.1421]) tensor([0.3916, 0.2211, 0.1649, 0.2224]) -Greedy action tensor([ 0.9125, -0.3775, 0.0166, -0.0935]) tensor([0.4880, 0.1343, 0.1992, 0.1784]) -Greedy action tensor([ 0.4929, -0.0591, -0.1479, 0.0488]) tensor([0.3644, 0.2098, 0.1920, 0.2337]) -Greedy action tensor([ 0.7211, -0.6423, -0.0569, -0.3406]) tensor([0.4852, 0.1241, 0.2229, 0.1678]) -Greedy action tensor([ 0.8777, -0.8139, 0.1332, -0.5173]) tensor([0.5244, 0.0966, 0.2491, 0.1300]) -Greedy action tensor([ 0.4306, -0.1437, -0.1964, -0.0091]) tensor([0.3648, 0.2054, 0.1948, 0.2350]) -Greedy action tensor([ 0.8556, -0.7378, -0.1005, -0.2377]) tensor([0.5201, 0.1057, 0.1999, 0.1743]) -Greedy action tensor([ 0.8843, -0.5381, -0.0395, -0.2922]) tensor([0.5137, 0.1239, 0.2040, 0.1584]) -Greedy action tensor([ 0.9860, 0.6249, -0.0920, -0.1575]) tensor([0.4245, 0.2958, 0.1444, 0.1353]) -Greedy action tensor([ 0.3756, -0.0691, -0.0266, -0.1457]) tensor([0.3444, 0.2208, 0.2303, 0.2045]) -Greedy action tensor([ 0.3547, -0.4371, -0.1777, -0.1003]) tensor([0.3739, 0.1694, 0.2195, 0.2372]) -Greedy action tensor([ 0.7185, -0.3360, 0.0828, -0.5910]) tensor([0.4656, 0.1622, 0.2466, 0.1257]) -Greedy action tensor([ 0.5495, -0.0176, -0.1573, -0.2793]) tensor([0.4005, 0.2272, 0.1975, 0.1748]) -Greedy action tensor([ 0.6459, -0.0348, -0.0872, 0.0752]) tensor([0.3919, 0.1984, 0.1883, 0.2215]) -Greedy action tensor([ 0.3436, 0.2451, -0.1331, 0.0728]) tensor([0.3040, 0.2755, 0.1887, 0.2319]) -Greedy action tensor([ 1.2037, -0.6475, -0.1693, -0.4938]) tensor([0.6275, 0.0986, 0.1590, 0.1149]) -Greedy action tensor([ 0.6484, -0.4844, 0.1699, -0.5734]) tensor([0.4471, 0.1440, 0.2771, 0.1318]) -Greedy action tensor([ 0.7738, -0.5550, 0.1047, -0.2746]) tensor([0.4701, 0.1245, 0.2407, 0.1647]) -Greedy action tensor([ 0.8503, -0.6386, -0.0489, -0.2822]) tensor([0.5116, 0.1154, 0.2082, 0.1648]) -Greedy action tensor([ 0.7839, -0.6895, -0.0115, -0.1571]) tensor([0.4829, 0.1107, 0.2180, 0.1884]) -Greedy action tensor([ 0.9160, -0.3600, -0.2492, -0.1426]) tensor([0.5160, 0.1440, 0.1609, 0.1790]) -Greedy action tensor([ 0.3764, 0.0058, 0.0471, -0.2793]) tensor([0.3414, 0.2357, 0.2456, 0.1772]) -Greedy action tensor([0.2436, 0.1496, 0.2240, 0.2691]) tensor([0.2553, 0.2324, 0.2504, 0.2619]) -Greedy action tensor([ 5.7385e-01, -1.4258e-01, -4.4227e-05, -6.4003e-02]) tensor([0.3876, 0.1893, 0.2183, 0.2048]) -Greedy action tensor([ 1.0092, -0.9964, 0.0483, -0.6122]) tensor([0.5832, 0.0785, 0.2231, 0.1152]) -Greedy action tensor([ 0.3027, 0.0014, -0.0464, -0.1699]) tensor([0.3259, 0.2411, 0.2299, 0.2031]) -Greedy action tensor([ 0.7179, -0.5406, -0.1008, -0.4752]) tensor([0.4930, 0.1400, 0.2174, 0.1495]) -Greedy action tensor([ 0.8649, -0.3755, 0.0436, -0.2942]) tensor([0.4895, 0.1416, 0.2153, 0.1536]) -Greedy action tensor([ 0.6622, -0.6075, -0.1393, -0.1809]) tensor([0.4630, 0.1301, 0.2077, 0.1993]) -Greedy action tensor([ 0.2293, 0.1397, -0.0786, 0.0187]) tensor([0.2891, 0.2643, 0.2125, 0.2342]) -Greedy action tensor([ 0.7229, -0.2922, -0.1027, -0.0880]) tensor([0.4455, 0.1614, 0.1951, 0.1980]) -Greedy action tensor([ 0.9989, -0.6557, -0.1045, -0.6671]) tensor([0.5841, 0.1117, 0.1938, 0.1104]) -Greedy action tensor([ 0.6784, -0.4151, -0.2120, -0.2489]) tensor([0.4670, 0.1565, 0.1917, 0.1848]) -Greedy action tensor([ 0.7100, -0.6024, -0.0582, -0.2516]) tensor([0.4727, 0.1273, 0.2193, 0.1807]) -Greedy action tensor([ 0.8731, -0.4880, -0.1068, -0.2807]) tensor([0.5136, 0.1317, 0.1928, 0.1620]) -Greedy action tensor([ 0.6501, -0.3818, -0.0946, -0.1957]) tensor([0.4424, 0.1576, 0.2101, 0.1899]) -Greedy action tensor([ 0.6212, -0.2381, -0.0722, -0.1745]) tensor([0.4211, 0.1783, 0.2105, 0.1901]) -Greedy action tensor([ 0.9147, -0.1811, 0.0376, -0.6541]) tensor([0.5106, 0.1707, 0.2124, 0.1063]) -Greedy action tensor([ 0.8117, -0.3007, -0.1965, -0.3853]) tensor([0.5011, 0.1647, 0.1828, 0.1514]) -Greedy action tensor([ 1.0240, -0.2943, -0.2685, -0.0965]) tensor([0.5352, 0.1432, 0.1470, 0.1746]) -Greedy action tensor([ 0.5755, -0.3241, -0.0975, -0.2908]) tensor([0.4278, 0.1740, 0.2183, 0.1799]) -Greedy action tensor([ 0.1519, -0.2137, -0.1278, -0.2716]) tensor([0.3221, 0.2235, 0.2435, 0.2109]) -Greedy action tensor([ 0.6145, -0.4523, -0.0346, -0.1934]) tensor([0.4325, 0.1488, 0.2260, 0.1928]) -Greedy action tensor([ 0.2527, -0.0757, -0.0394, -0.0877]) tensor([0.3146, 0.2266, 0.2349, 0.2239]) -Greedy action tensor([ 0.8898, -0.5040, -0.0560, -0.3796]) tensor([0.5215, 0.1294, 0.2025, 0.1465]) -Greedy action tensor([ 0.2906, -0.2664, -0.2097, -0.2376]) tensor([0.3612, 0.2069, 0.2190, 0.2130]) -Greedy action tensor([ 0.5708, -0.4171, -0.0642, -0.1219]) tensor([0.4162, 0.1550, 0.2206, 0.2082]) -Greedy action tensor([ 1.1658, -0.7906, 0.2133, -0.6132]) tensor([0.5896, 0.0834, 0.2275, 0.0995]) -Greedy action tensor([ 0.6389, -0.4306, -0.1131, -0.4269]) tensor([0.4632, 0.1589, 0.2183, 0.1595]) -Greedy action tensor([ 0.7216, -0.0423, -0.0298, -0.3160]) tensor([0.4363, 0.2033, 0.2058, 0.1546]) -Greedy action tensor([ 0.8551, -0.4641, -0.0427, -0.4349]) tensor([0.5128, 0.1371, 0.2089, 0.1412]) -Greedy action tensor([ 0.8362, -0.5073, -0.0608, -0.4661]) tensor([0.5153, 0.1345, 0.2101, 0.1401]) -Greedy action tensor([ 0.8565, -0.3717, -0.1032, -0.2397]) tensor([0.4975, 0.1457, 0.1905, 0.1662]) -Greedy action tensor([ 0.6615, -0.2577, -0.0748, -0.2274]) tensor([0.4369, 0.1743, 0.2092, 0.1796]) -Greedy action tensor([ 0.9773, -0.3788, -0.0359, -0.5234]) tensor([0.5424, 0.1398, 0.1969, 0.1209]) -Greedy action tensor([ 0.6627, -0.4572, -0.1286, -0.5283]) tensor([0.4800, 0.1566, 0.2175, 0.1459]) -Greedy action tensor([ 0.7510, -0.2007, -0.0686, -0.0686]) tensor([0.4410, 0.1703, 0.1943, 0.1943]) -Greedy action tensor([ 0.6818, -0.1785, -0.1507, -0.3878]) tensor([0.4543, 0.1922, 0.1976, 0.1559]) -Greedy action tensor([ 0.2802, -0.1292, -0.0272, -0.0529]) tensor([0.3209, 0.2131, 0.2360, 0.2300]) -Greedy action tensor([ 0.4427, -0.1034, -0.0730, -0.1716]) tensor([0.3680, 0.2131, 0.2197, 0.1991]) -Greedy action tensor([ 1.0039, -1.2837, 0.1734, -0.8714]) tensor([0.5915, 0.0600, 0.2578, 0.0907]) -Greedy action tensor([ 0.9963, -0.4976, -0.1475, -0.4169]) tensor([0.5598, 0.1257, 0.1783, 0.1362]) -Greedy action tensor([ 0.8394, -0.5020, 0.0681, -0.2206]) tensor([0.4830, 0.1263, 0.2233, 0.1673]) -Greedy action tensor([ 1.0291, -0.6055, 0.0692, -0.6500]) tensor([0.5667, 0.1105, 0.2170, 0.1057]) -Greedy action tensor([ 0.7207, -0.2844, -0.3466, -0.6280]) tensor([0.5077, 0.1858, 0.1746, 0.1318]) -Greedy action tensor([ 0.7739, -0.4286, -0.1465, -0.4128]) tensor([0.4990, 0.1499, 0.1988, 0.1523]) -Greedy action tensor([ 0.6108, -0.2584, -0.0091, -0.1608]) tensor([0.4133, 0.1733, 0.2224, 0.1911]) -Greedy action tensor([ 0.6170, -0.3995, 0.0042, -0.3113]) tensor([0.4350, 0.1574, 0.2357, 0.1719]) -Greedy action tensor([ 0.9843, -0.3787, 0.0465, -0.3623]) tensor([0.5242, 0.1341, 0.2052, 0.1364]) -Greedy action tensor([ 0.7648, -0.4951, 0.0449, -0.3107]) tensor([0.4736, 0.1344, 0.2305, 0.1615]) -Greedy action tensor([ 0.6474, -0.5828, -0.0075, -0.2064]) tensor([0.4469, 0.1306, 0.2322, 0.1903]) -Greedy action tensor([ 0.5896, -0.1602, -0.0524, -0.1593]) tensor([0.4046, 0.1912, 0.2129, 0.1913]) -Greedy action tensor([ 0.9553, -0.3776, -0.2003, -0.1155]) tensor([0.5205, 0.1373, 0.1639, 0.1784]) -Greedy action tensor([ 0.3934, 0.1284, -0.1015, 0.0177]) tensor([0.3264, 0.2504, 0.1990, 0.2242]) -Greedy action tensor([ 1.2497, -0.6412, -0.0217, -0.7199]) tensor([0.6366, 0.0961, 0.1785, 0.0888]) -Greedy action tensor([ 0.6023, -0.3978, -0.0087, -0.1079]) tensor([0.4163, 0.1531, 0.2260, 0.2046]) -Greedy action tensor([ 0.7596, -0.5319, 0.1720, -0.5426]) tensor([0.4756, 0.1307, 0.2643, 0.1293]) -Greedy action tensor([-1.9178, -0.4251, 0.6572, -0.1625]) tensor([0.0410, 0.1826, 0.5389, 0.2374]) -Greedy action tensor([-1.6096, 0.2722, 0.3786, 0.0622]) tensor([0.0495, 0.3252, 0.3617, 0.2636]) -Greedy action tensor([-0.9801, 0.1528, 0.1063, 0.0539]) tensor([0.1012, 0.3142, 0.2999, 0.2846]) -Greedy action tensor([-0.7564, 0.3590, 0.1400, -0.0441]) tensor([0.1171, 0.3572, 0.2870, 0.2387]) -Greedy action tensor([-1.0299, -0.6961, 0.0728, 0.5960]) tensor([0.0953, 0.1331, 0.2871, 0.4845]) -Greedy action tensor([-1.8236, -0.3702, 0.6220, -0.0475]) tensor([0.0440, 0.1883, 0.5078, 0.2600]) -Greedy action tensor([-1.8025, -0.4851, 0.6010, -0.1108]) tensor([0.0471, 0.1759, 0.5212, 0.2558]) -Greedy action tensor([-1.7694, -0.3582, 0.5873, -0.0664]) tensor([0.0473, 0.1939, 0.4992, 0.2596]) -Greedy action tensor([-1.9473, -0.4541, 0.6681, -0.1822]) tensor([0.0401, 0.1783, 0.5477, 0.2340]) -Greedy action tensor([-1.7537, -0.5207, 0.5710, -0.1187]) tensor([0.0505, 0.1734, 0.5167, 0.2593]) -Greedy action tensor([-1.7609, -0.3654, 0.5699, -0.0535]) tensor([0.0480, 0.1937, 0.4936, 0.2647]) -Greedy action tensor([-0.3451, -0.4132, 0.1801, 0.2048]) tensor([0.1866, 0.1743, 0.3156, 0.3235]) -Greedy action tensor([-1.9218, -0.4489, 0.6545, -0.1710]) tensor([0.0412, 0.1797, 0.5418, 0.2373]) -Greedy action tensor([-1.8978, -0.4013, 0.6421, -0.1533]) tensor([0.0419, 0.1871, 0.5312, 0.2398]) -Greedy action tensor([0.4058, 0.1308, 0.1022, 0.0714]) tensor([0.3112, 0.2364, 0.2297, 0.2227]) -Greedy action tensor([-1.8846, -0.4417, 0.6560, -0.1291]) tensor([0.0422, 0.1786, 0.5352, 0.2441]) -Greedy action tensor([-1.6383, -0.5144, 0.5703, 0.0606]) tensor([0.0536, 0.1650, 0.4881, 0.2932]) -Greedy action tensor([-1.8633, -0.4833, 0.6244, -0.1380]) tensor([0.0442, 0.1757, 0.5319, 0.2482]) -Greedy action tensor([-1.6951, -0.2093, 0.5009, -0.0218]) tensor([0.0507, 0.2239, 0.4554, 0.2700]) -Greedy action tensor([-1.7678, -0.3295, 0.6325, -0.2577]) tensor([0.0482, 0.2029, 0.5310, 0.2180]) -Greedy action tensor([-1.7860, -0.1997, 0.5489, -0.1037]) tensor([0.0463, 0.2263, 0.4784, 0.2491]) -Greedy action tensor([-1.7551, -0.3954, 0.5806, -0.0741]) tensor([0.0485, 0.1891, 0.5017, 0.2607]) -Greedy action tensor([-0.7709, -0.0247, 0.5633, 1.3597]) tensor([0.0652, 0.1376, 0.2477, 0.5494]) -Greedy action tensor([-1.2315, 0.1318, 0.5601, 0.4906]) tensor([0.0606, 0.2368, 0.3635, 0.3391]) -Greedy action tensor([-1.9286, -0.4520, 0.6580, -0.1756]) tensor([0.0409, 0.1792, 0.5437, 0.2362]) -Greedy action tensor([-1.8174, -0.4655, 0.6049, -0.0776]) tensor([0.0458, 0.1770, 0.5163, 0.2609]) -Greedy action tensor([-1.7892, -0.4091, 0.5958, -0.0995]) tensor([0.0471, 0.1870, 0.5110, 0.2549]) -Greedy action tensor([-1.9452, -0.4551, 0.6688, -0.1802]) tensor([0.0401, 0.1780, 0.5476, 0.2343]) -Greedy action tensor([-1.9147, -0.4126, 0.6502, -0.1603]) tensor([0.0412, 0.1850, 0.5356, 0.2382]) -Greedy action tensor([-1.2394, 0.6109, 0.2489, 0.2986]) tensor([0.0608, 0.3868, 0.2693, 0.2831]) -Greedy action tensor([-1.7910, -0.3171, 0.6269, -0.0719]) tensor([0.0451, 0.1970, 0.5062, 0.2517]) -Greedy action tensor([-1.3623, -0.4921, 0.5168, -0.3680]) tensor([0.0791, 0.1889, 0.5181, 0.2139]) -Greedy action tensor([-1.7846, -0.5113, 0.5563, -0.1133]) tensor([0.0493, 0.1761, 0.5123, 0.2623]) -Greedy action tensor([-1.9431, -0.4572, 0.6741, -0.1762]) tensor([0.0400, 0.1770, 0.5486, 0.2344]) -Greedy action tensor([-1.9301, -0.3551, 0.6451, -0.1684]) tensor([0.0403, 0.1949, 0.5299, 0.2349]) -Greedy action tensor([-1.0924, -0.4741, 0.2952, 0.1218]) tensor([0.0978, 0.1814, 0.3916, 0.3292]) -Greedy action tensor([-1.7578, -0.4384, 0.5846, -0.1504]) tensor([0.0497, 0.1858, 0.5168, 0.2478]) -Greedy action tensor([-1.4309, 0.5952, 0.2683, 0.1065]) tensor([0.0535, 0.4055, 0.2924, 0.2487]) -Greedy action tensor([ 0.5099, 1.2141, -0.0346, 0.0308]) tensor([0.2369, 0.4790, 0.1374, 0.1467]) -Greedy action tensor([-1.9373, -0.4360, 0.6614, -0.1759]) tensor([0.0404, 0.1813, 0.5432, 0.2351]) -Greedy action tensor([-1.7910, 0.0748, 0.5162, -0.0924]) tensor([0.0435, 0.2812, 0.4373, 0.2379]) -Greedy action tensor([-1.8851, -0.3984, 0.6272, -0.1574]) tensor([0.0428, 0.1891, 0.5274, 0.2407]) -Greedy action tensor([-1.9026, -0.4432, 0.6430, -0.1595]) tensor([0.0421, 0.1810, 0.5364, 0.2404]) -Greedy action tensor([-1.8759, -0.3596, 0.6206, -0.1410]) tensor([0.0428, 0.1950, 0.5196, 0.2426]) -Greedy action tensor([-1.9146, -0.4332, 0.6570, -0.1647]) tensor([0.0413, 0.1815, 0.5399, 0.2374]) -Greedy action tensor([-1.3576, -0.6020, 0.3990, 0.0538]) tensor([0.0768, 0.1635, 0.4448, 0.3150]) -Greedy action tensor([-1.7087, -0.4987, 0.5338, -0.0257]) tensor([0.0522, 0.1751, 0.4917, 0.2810]) -Greedy action tensor([-1.8590, -0.3587, 0.6108, -0.1457]) tensor([0.0438, 0.1962, 0.5173, 0.2428]) -Greedy action tensor([-1.8991, -0.4577, 0.6477, -0.1581]) tensor([0.0422, 0.1784, 0.5387, 0.2407]) -Greedy action tensor([-1.6143, -0.1380, 0.4407, 0.0320]) tensor([0.0544, 0.2382, 0.4249, 0.2824]) -Greedy action tensor([-1.8448, -0.4396, 0.6242, -0.1272]) tensor([0.0445, 0.1815, 0.5259, 0.2481]) -Greedy action tensor([-0.1238, 0.6541, -0.0605, 0.5170]) tensor([0.1629, 0.3545, 0.1735, 0.3091]) -Greedy action tensor([-1.8419, -0.3229, 0.6122, -0.1149]) tensor([0.0438, 0.2001, 0.5097, 0.2464]) -Greedy action tensor([-1.9186, -0.4217, 0.6627, -0.1452]) tensor([0.0407, 0.1818, 0.5378, 0.2397]) -Greedy action tensor([-1.4213, 0.6302, 0.2827, 0.2410]) tensor([0.0512, 0.3980, 0.2812, 0.2697]) -Greedy action tensor([-0.5916, -0.3846, 0.1578, 0.1638]) tensor([0.1545, 0.1900, 0.3268, 0.3288]) -Greedy action tensor([-1.8455, -0.3683, 0.6230, -0.1163]) tensor([0.0438, 0.1920, 0.5173, 0.2470]) -Greedy action tensor([-1.8828, -0.4532, 0.6394, -0.1390]) tensor([0.0428, 0.1789, 0.5334, 0.2449]) -Greedy action tensor([-0.9240, 0.4709, 0.4916, 0.7627]) tensor([0.0687, 0.2772, 0.2830, 0.3711]) -Greedy action tensor([-1.8942, -0.3320, 0.6233, -0.1692]) tensor([0.0421, 0.2006, 0.5214, 0.2360]) -Greedy action tensor([-1.9370, -0.3778, 0.5281, -0.1950]) tensor([0.0431, 0.2047, 0.5065, 0.2458]) -Greedy action tensor([-1.7914, -0.3413, 0.6255, -0.0891]) tensor([0.0455, 0.1941, 0.5105, 0.2498]) -Greedy action tensor([-0.9702, 0.2992, 0.2849, -0.2010]) tensor([0.0978, 0.3480, 0.3431, 0.2111]) -Greedy action tensor([-1.6902, -0.2225, 0.5814, -0.0079]) tensor([0.0490, 0.2126, 0.4750, 0.2635]) -Greedy action tensor([-1.8921, -0.4337, 0.6442, -0.1516]) tensor([0.0423, 0.1819, 0.5346, 0.2412]) -Greedy action tensor([-0.5112, 0.1406, 0.7886, 1.2219]) tensor([0.0817, 0.1567, 0.2996, 0.4621]) -Greedy action tensor([-1.5677, 0.3661, 0.3948, 0.1970]) tensor([0.0479, 0.3313, 0.3410, 0.2798]) -Greedy action tensor([-1.6220, -0.3838, 0.6095, 0.0203]) tensor([0.0528, 0.1822, 0.4920, 0.2730]) -Greedy action tensor([-1.6277, -0.4275, 0.5041, -0.0196]) tensor([0.0564, 0.1871, 0.4751, 0.2814]) -Greedy action tensor([-1.8661, -0.4258, 0.6254, -0.1512]) tensor([0.0437, 0.1847, 0.5285, 0.2431]) -Greedy action tensor([-1.6247, -0.4992, 0.5182, 0.0140]) tensor([0.0563, 0.1736, 0.4801, 0.2900]) -Greedy action tensor([-1.7353, -0.3612, 0.5495, -0.0344]) tensor([0.0494, 0.1951, 0.4850, 0.2705]) -Greedy action tensor([-1.8291, -0.4730, 0.6000, -0.1180]) tensor([0.0459, 0.1783, 0.5214, 0.2543]) -Greedy action tensor([-1.6160, -0.3470, 0.5349, -0.1487]) tensor([0.0572, 0.2034, 0.4913, 0.2480]) -Greedy action tensor([-1.9393, -0.4267, 0.6611, -0.1763]) tensor([0.0403, 0.1827, 0.5423, 0.2347]) -Greedy action tensor([-1.5942, -0.0983, 0.6211, -0.5831]) tensor([0.0576, 0.2569, 0.5274, 0.1582]) -Greedy action tensor([-1.8886, -0.4253, 0.6377, -0.1461]) tensor([0.0425, 0.1835, 0.5313, 0.2427]) -Greedy action tensor([-1.5941, 0.1185, 0.4062, -0.0194]) tensor([0.0533, 0.2954, 0.3939, 0.2574]) -Greedy action tensor([-1.1159, 0.6818, 0.1018, 0.2335]) tensor([0.0701, 0.4230, 0.2368, 0.2702]) -Greedy action tensor([-1.8546, -0.3968, 0.6144, -0.1429]) tensor([0.0442, 0.1897, 0.5215, 0.2446]) -Greedy action tensor([-1.9144, -0.3928, 0.6402, -0.1705]) tensor([0.0414, 0.1895, 0.5324, 0.2367]) -Greedy action tensor([ 1.7804, -0.3436, -0.6341, 0.3133]) tensor([0.6947, 0.0830, 0.0621, 0.1602]) -Greedy action tensor([ 1.2693, 0.0311, -0.2556, 0.4198]) tensor([0.5168, 0.1498, 0.1125, 0.2210]) -Greedy action tensor([ 1.9289, -0.5533, -0.3397, 0.3349]) tensor([0.7194, 0.0601, 0.0744, 0.1461]) -Greedy action tensor([ 1.1093, -0.7188, -0.2824, 0.3835]) tensor([0.5282, 0.0849, 0.1313, 0.2556]) -Greedy action tensor([ 0.7707, -0.1059, -0.2895, 0.4906]) tensor([0.3971, 0.1653, 0.1376, 0.3001]) -Greedy action tensor([ 1.7508, 0.1946, -0.4183, 0.6791]) tensor([0.5996, 0.1265, 0.0685, 0.2053]) -Greedy action tensor([ 1.3920, -0.5037, -0.6230, 0.6009]) tensor([0.5757, 0.0865, 0.0768, 0.2610]) -Greedy action tensor([ 1.3064, -0.4114, -0.2336, 0.3486]) tensor([0.5626, 0.1010, 0.1206, 0.2159]) -Greedy action tensor([ 1.0762, -0.5415, -0.2549, 0.4543]) tensor([0.5001, 0.0992, 0.1321, 0.2685]) -Greedy action tensor([ 1.2805, -0.2627, -0.5807, 0.2709]) tensor([0.5768, 0.1233, 0.0897, 0.2102]) -Greedy action tensor([ 1.0436, -0.2878, -0.5824, 0.1715]) tensor([0.5322, 0.1406, 0.1047, 0.2225]) -Greedy action tensor([ 1.5418, -0.6162, -0.0882, 0.2739]) tensor([0.6278, 0.0725, 0.1230, 0.1767]) -Greedy action tensor([ 0.6741, -0.5086, -0.3377, 0.1392]) tensor([0.4433, 0.1359, 0.1612, 0.2597]) -Greedy action tensor([ 1.3090, 0.0023, -0.2760, 0.2788]) tensor([0.5457, 0.1477, 0.1118, 0.1948]) -Greedy action tensor([ 1.0391, -0.3259, -0.5258, 0.5587]) tensor([0.4801, 0.1226, 0.1004, 0.2969]) -Greedy action tensor([ 1.9569, -0.6509, -0.2593, 0.7871]) tensor([0.6697, 0.0494, 0.0730, 0.2079]) -Greedy action tensor([ 1.2077, -0.4225, -0.0341, 0.0193]) tensor([0.5588, 0.1095, 0.1614, 0.1703]) -Greedy action tensor([ 1.3857, -0.7149, -0.7352, 0.4395]) tensor([0.6133, 0.0751, 0.0735, 0.2381]) -Greedy action tensor([ 1.2989, -0.8179, 0.0108, 0.3803]) tensor([0.5570, 0.0671, 0.1536, 0.2223]) -Greedy action tensor([ 1.3505, -0.2076, 0.0754, 0.3746]) tensor([0.5357, 0.1128, 0.1497, 0.2019]) -Greedy action tensor([ 1.3539, -0.5849, -0.1334, 0.4621]) tensor([0.5619, 0.0808, 0.1270, 0.2303]) -Greedy action tensor([ 1.2824, -0.5168, -0.1623, 0.2855]) tensor([0.5649, 0.0934, 0.1332, 0.2085]) -Greedy action tensor([ 1.6775, -0.1873, -0.2281, 0.4254]) tensor([0.6291, 0.0975, 0.0936, 0.1799]) -Greedy action tensor([ 1.6273, -0.4166, -0.4413, 0.1019]) tensor([0.6787, 0.0879, 0.0858, 0.1476]) -Greedy action tensor([ 2.6756, -1.9587, -0.3142, 0.3049]) tensor([0.8670, 0.0084, 0.0436, 0.0810]) -Greedy action tensor([ 0.8206, -0.6313, -0.0855, 0.2372]) tensor([0.4553, 0.1066, 0.1840, 0.2541]) -Greedy action tensor([ 1.9689, -0.8640, -0.3913, 0.6348]) tensor([0.7059, 0.0415, 0.0666, 0.1859]) -Greedy action tensor([ 1.6603, -0.8309, -0.1132, 0.6949]) tensor([0.6122, 0.0507, 0.1039, 0.2332]) -Greedy action tensor([ 1.9562, -0.7971, -0.3809, 0.6159]) tensor([0.7032, 0.0448, 0.0679, 0.1841]) -Greedy action tensor([ 1.2967, 0.0839, -0.0979, 0.4621]) tensor([0.5052, 0.1502, 0.1253, 0.2193]) -Greedy action tensor([ 1.6370, -0.4538, -0.3608, 0.3218]) tensor([0.6546, 0.0809, 0.0888, 0.1757]) -Greedy action tensor([ 1.3400, -0.5426, -0.4112, 0.3274]) tensor([0.5921, 0.0901, 0.1028, 0.2151]) -Greedy action tensor([ 1.2414, -0.0911, -0.7728, 0.4529]) tensor([0.5400, 0.1425, 0.0721, 0.2454]) -Greedy action tensor([ 0.8905, -0.6970, 0.0297, 0.3751]) tensor([0.4495, 0.0919, 0.1901, 0.2685]) -Greedy action tensor([ 1.1669, -0.2923, -0.4462, 0.3923]) tensor([0.5284, 0.1228, 0.1053, 0.2435]) -Greedy action tensor([ 1.4673, -0.1686, -0.3100, 0.2332]) tensor([0.6043, 0.1177, 0.1022, 0.1759]) -Greedy action tensor([1.0756, 0.0062, 0.0405, 0.4784]) tensor([0.4447, 0.1526, 0.1579, 0.2447]) -Greedy action tensor([ 1.3634, -0.3938, -0.4839, 0.2251]) tensor([0.6059, 0.1045, 0.0955, 0.1941]) -Greedy action tensor([ 1.8977, -0.7912, -0.5353, 0.6389]) tensor([0.6946, 0.0472, 0.0610, 0.1973]) -Greedy action tensor([ 1.5241, -0.4818, -0.4439, 0.4414]) tensor([0.6200, 0.0834, 0.0866, 0.2100]) -Greedy action tensor([ 2.0024, -0.5398, -0.2442, 0.3218]) tensor([0.7295, 0.0574, 0.0772, 0.1359]) -Greedy action tensor([2.3892, 0.1679, 0.0041, 0.4589]) tensor([0.7431, 0.0806, 0.0684, 0.1078]) -Greedy action tensor([2.6551, 0.3646, 0.0678, 0.1838]) tensor([0.7931, 0.0803, 0.0597, 0.0670]) -Greedy action tensor([ 1.0743, -0.4152, -0.2855, 0.2156]) tensor([0.5247, 0.1183, 0.1347, 0.2223]) -Greedy action tensor([ 1.5828, -0.7790, -0.3739, 0.5328]) tensor([0.6307, 0.0594, 0.0891, 0.2207]) -Greedy action tensor([ 1.9363, -0.6033, -0.2179, 0.5882]) tensor([0.6875, 0.0542, 0.0797, 0.1786]) -Greedy action tensor([ 1.1883, -0.1618, -0.8118, 0.4064]) tensor([0.5399, 0.1400, 0.0731, 0.2471]) -Greedy action tensor([ 1.4000, -0.2520, -0.3684, 0.2402]) tensor([0.5967, 0.1144, 0.1018, 0.1871]) -Greedy action tensor([ 1.4130, 0.1968, -0.5245, -0.3505]) tensor([0.6204, 0.1839, 0.0894, 0.1064]) -Greedy action tensor([ 1.3766, -0.6751, -0.2324, 0.0284]) tensor([0.6296, 0.0809, 0.1260, 0.1635]) -Greedy action tensor([ 1.6441, -0.1491, -0.6203, 0.2887]) tensor([0.6544, 0.1089, 0.0680, 0.1687]) -Greedy action tensor([ 1.3895, -0.8119, -0.0133, 0.5037]) tensor([0.5653, 0.0626, 0.1390, 0.2331]) -Greedy action tensor([ 1.2459, -0.0348, -0.4139, 0.3668]) tensor([0.5310, 0.1475, 0.1010, 0.2204]) -Greedy action tensor([ 0.6442, -0.1720, -0.4747, 0.3830]) tensor([0.3939, 0.1741, 0.1287, 0.3033]) -Greedy action tensor([ 1.8168, -0.9134, -0.2956, 0.6200]) tensor([0.6719, 0.0438, 0.0813, 0.2030]) -Greedy action tensor([ 1.5422, -0.1909, 0.2600, 0.0529]) tensor([0.5953, 0.1052, 0.1652, 0.1343]) -Greedy action tensor([ 1.5594, -0.6424, -0.4734, 0.6530]) tensor([0.6077, 0.0672, 0.0796, 0.2455]) -Greedy action tensor([ 1.4311, -0.5612, -0.3988, 0.4280]) tensor([0.6011, 0.0820, 0.0964, 0.2205]) -Greedy action tensor([ 1.3839, -0.5112, -0.3070, 0.4532]) tensor([0.5784, 0.0869, 0.1066, 0.2280]) -Greedy action tensor([ 1.3150, -0.8266, -0.3056, 0.6240]) tensor([0.5506, 0.0647, 0.1089, 0.2759]) -Greedy action tensor([ 1.4627, -0.1948, -0.6513, 0.4657]) tensor([0.5951, 0.1134, 0.0719, 0.2196]) -Greedy action tensor([ 0.9994, -0.1381, -0.4016, -0.0769]) tensor([0.5241, 0.1681, 0.1291, 0.1787]) -Greedy action tensor([ 0.5057, -0.2576, 0.0583, 0.0993]) tensor([0.3608, 0.1682, 0.2307, 0.2403]) -Greedy action tensor([ 0.1948, -0.4078, -0.0353, 0.1019]) tensor([0.3074, 0.1683, 0.2442, 0.2801]) -Greedy action tensor([ 2.0187, -0.7098, -0.5182, 1.3779]) tensor([0.5983, 0.0391, 0.0473, 0.3152]) -Greedy action tensor([ 0.9134, -0.4456, -0.1248, 0.2544]) tensor([0.4698, 0.1207, 0.1664, 0.2431]) -Greedy action tensor([ 1.7027, -0.7349, 0.0888, 0.1067]) tensor([0.6715, 0.0587, 0.1337, 0.1361]) -Greedy action tensor([ 1.3247, -0.4278, -0.4056, 0.6355]) tensor([0.5398, 0.0936, 0.0957, 0.2710]) -Greedy action tensor([ 1.8874, -0.7387, -0.3490, 0.1902]) tensor([0.7340, 0.0531, 0.0784, 0.1345]) -Greedy action tensor([ 1.0900, -0.4119, -0.0495, 0.3562]) tensor([0.4944, 0.1101, 0.1582, 0.2373]) -Greedy action tensor([ 1.8092, -0.7124, -0.1277, 0.3524]) tensor([0.6861, 0.0551, 0.0989, 0.1599]) -Greedy action tensor([ 1.5974, -0.4673, -0.7205, 0.7518]) tensor([0.6044, 0.0767, 0.0595, 0.2595]) -Greedy action tensor([ 1.3828, -0.5599, -0.0273, 0.4462]) tensor([0.5620, 0.0805, 0.1372, 0.2203]) -Greedy action tensor([ 2.8866, -0.9009, -0.4734, 0.6682]) tensor([0.8575, 0.0194, 0.0298, 0.0933]) -Greedy action tensor([ 1.8715, -0.4551, -0.6889, 1.1127]) tensor([0.6086, 0.0594, 0.0470, 0.2850]) -Greedy action tensor([ 2.0881, -0.8096, 0.2249, 0.4324]) tensor([0.7136, 0.0394, 0.1107, 0.1363]) -Greedy action tensor([ 0.7422, -0.6384, 0.0440, 0.3164]) tensor([0.4163, 0.1047, 0.2071, 0.2719]) -Greedy action tensor([ 1.5708, -0.1659, 0.0217, 0.3223]) tensor([0.5968, 0.1051, 0.1268, 0.1713]) -Greedy action tensor([ 2.0293, -0.9642, -0.1822, 0.7308]) tensor([0.6980, 0.0350, 0.0765, 0.1905]) -Greedy action tensor([ 1.1992, 0.1175, -0.1411, 0.4050]) tensor([0.4871, 0.1652, 0.1275, 0.2202]) -Greedy action tensor([ 2.3267, -1.1654, -0.3290, 0.5937]) tensor([0.7828, 0.0238, 0.0550, 0.1384]) -Greedy action tensor([ 1.8721, -0.7028, -0.8029, 0.7872]) tensor([0.6743, 0.0514, 0.0465, 0.2279]) -Greedy action tensor([ 0.5054, -0.1610, -0.0347, -0.2246]) tensor([0.3879, 0.1992, 0.2260, 0.1869]) -Greedy action tensor([ 0.3904, -0.1741, -0.1686, -0.4741]) tensor([0.3904, 0.2220, 0.2232, 0.1645]) -Greedy action tensor([ 0.8343, -0.2587, -0.1449, -0.4179]) tensor([0.5008, 0.1679, 0.1881, 0.1432]) -Greedy action tensor([ 0.7821, -0.4831, -0.0425, -0.3631]) tensor([0.4905, 0.1384, 0.2150, 0.1561]) -Greedy action tensor([ 0.5052, 0.1800, -0.1199, -0.3673]) tensor([0.3738, 0.2700, 0.2000, 0.1562]) -Greedy action tensor([ 0.5042, -0.3041, 0.0842, -0.3036]) tensor([0.3924, 0.1748, 0.2578, 0.1749]) -Greedy action tensor([ 0.3923, -0.1697, 0.0258, -0.3187]) tensor([0.3631, 0.2070, 0.2517, 0.1783]) -Greedy action tensor([ 0.4026, -0.0318, 0.0211, -0.3022]) tensor([0.3540, 0.2293, 0.2417, 0.1750]) -Greedy action tensor([ 0.7442, -0.5534, 0.0940, -0.2388]) tensor([0.4610, 0.1259, 0.2406, 0.1725]) -Greedy action tensor([ 0.8518, -0.7275, -0.2190, -0.3100]) tensor([0.5371, 0.1107, 0.1841, 0.1681]) -Greedy action tensor([ 0.6409, -0.6133, -0.1019, -0.1909]) tensor([0.4553, 0.1299, 0.2166, 0.1982]) -Greedy action tensor([ 1.2084, -0.5546, -0.1606, -0.3768]) tensor([0.6132, 0.1052, 0.1560, 0.1256]) -Greedy action tensor([ 0.6363, 0.1246, -0.0991, 0.0898]) tensor([0.3763, 0.2256, 0.1803, 0.2178]) -Greedy action tensor([ 0.3941, -0.2288, -0.0925, -0.1627]) tensor([0.3671, 0.1969, 0.2257, 0.2104]) -Greedy action tensor([ 0.3080, 0.1451, -0.1375, -0.3085]) tensor([0.3300, 0.2804, 0.2114, 0.1782]) -Greedy action tensor([ 0.7681, -0.5694, 0.0291, -0.3092]) tensor([0.4806, 0.1262, 0.2296, 0.1637]) -Greedy action tensor([ 0.9760, -0.5791, 0.0625, -0.4956]) tensor([0.5430, 0.1147, 0.2178, 0.1246]) -Greedy action tensor([ 0.7099, -0.4990, -0.1517, -0.4580]) tensor([0.4921, 0.1469, 0.2079, 0.1531]) -Greedy action tensor([ 0.7364, -0.2789, 0.0172, -0.1457]) tensor([0.4418, 0.1601, 0.2152, 0.1829]) -Greedy action tensor([ 0.9384, -0.5465, 0.0180, -0.5347]) tensor([0.5393, 0.1222, 0.2149, 0.1236]) -Greedy action tensor([ 0.4077, -0.4725, -0.0521, -0.1813]) tensor([0.3845, 0.1594, 0.2428, 0.2133]) -Greedy action tensor([ 0.3899, 0.1142, -0.0951, -0.1359]) tensor([0.3372, 0.2559, 0.2076, 0.1993]) -Greedy action tensor([ 0.3081, 0.0925, -0.0499, -0.4232]) tensor([0.3348, 0.2699, 0.2341, 0.1612]) -Greedy action tensor([ 0.7040, -0.2353, -0.0969, -0.4051]) tensor([0.4609, 0.1802, 0.2069, 0.1520]) -Greedy action tensor([ 0.3185, -0.0278, -0.0616, -0.3513]) tensor([0.3445, 0.2436, 0.2355, 0.1763]) -Greedy action tensor([ 0.4766, -0.1906, 0.0586, -0.5901]) tensor([0.3975, 0.2040, 0.2617, 0.1368]) -Greedy action tensor([ 0.8385, -0.5334, 0.0796, -0.5338]) tensor([0.5062, 0.1284, 0.2370, 0.1283]) -Greedy action tensor([ 0.6898, -0.6017, 0.0442, -0.3827]) tensor([0.4670, 0.1284, 0.2449, 0.1598]) -Greedy action tensor([ 0.6847, -0.3827, -0.0474, -0.5251]) tensor([0.4710, 0.1620, 0.2265, 0.1405]) -Greedy action tensor([ 0.7313, -0.4063, 0.0909, -0.4839]) tensor([0.4664, 0.1495, 0.2458, 0.1383]) -Greedy action tensor([ 0.6142, 0.1362, -0.0397, 0.0990]) tensor([0.3653, 0.2265, 0.1900, 0.2182]) -Greedy action tensor([ 0.4461, -0.6016, -0.1643, -0.1854]) tensor([0.4123, 0.1446, 0.2239, 0.2192]) -Greedy action tensor([ 1.1641, -1.1670, 0.1582, -0.6727]) tensor([0.6164, 0.0599, 0.2254, 0.0982]) -Greedy action tensor([ 1.0209, -0.6532, 0.0393, -0.4150]) tensor([0.5555, 0.1041, 0.2082, 0.1322]) -Greedy action tensor([ 0.9634, -0.4477, -0.0914, -0.5395]) tensor([0.5511, 0.1344, 0.1919, 0.1226]) -Greedy action tensor([ 0.4511, -0.0154, 0.0544, -0.0771]) tensor([0.3461, 0.2171, 0.2328, 0.2041]) -Greedy action tensor([ 0.9740, -0.7225, 0.0313, -0.6325]) tensor([0.5639, 0.1034, 0.2197, 0.1131]) -Greedy action tensor([ 0.5605, -0.2724, -0.0366, -0.0415]) tensor([0.3948, 0.1717, 0.2173, 0.2162]) -Greedy action tensor([ 0.9000, -0.3612, -0.1175, -0.1617]) tensor([0.5023, 0.1423, 0.1816, 0.1737]) -Greedy action tensor([ 0.8275, -0.6844, 0.0684, -0.2846]) tensor([0.4957, 0.1093, 0.2320, 0.1630]) -Greedy action tensor([ 0.5262, -0.5060, -0.0164, -0.1689]) tensor([0.4104, 0.1462, 0.2386, 0.2048]) -Greedy action tensor([ 0.8314, -0.6035, -0.0594, -0.2448]) tensor([0.5027, 0.1197, 0.2063, 0.1714]) -Greedy action tensor([ 0.8223, -0.4248, 0.0111, -0.4947]) tensor([0.5001, 0.1437, 0.2222, 0.1340]) -Greedy action tensor([ 0.5517, -0.0458, -0.0016, -0.0231]) tensor([0.3720, 0.2047, 0.2139, 0.2094]) -Greedy action tensor([ 0.9975, -1.0939, 0.0923, -0.5664]) tensor([0.5756, 0.0711, 0.2328, 0.1205]) -Greedy action tensor([ 0.6126, -0.3736, -0.1305, -0.0799]) tensor([0.4257, 0.1588, 0.2025, 0.2130]) -Greedy action tensor([ 0.8928, -0.4150, -0.3083, -0.6733]) tensor([0.5618, 0.1519, 0.1690, 0.1173]) -Greedy action tensor([ 0.9753, -0.9549, -0.2414, -0.3519]) tensor([0.5860, 0.0850, 0.1736, 0.1554]) -Greedy action tensor([ 0.8447, -0.2451, -0.1153, -0.0442]) tensor([0.4694, 0.1579, 0.1797, 0.1930]) -Greedy action tensor([ 0.9767, -0.5421, -0.0702, -0.6891]) tensor([0.5685, 0.1245, 0.1996, 0.1075]) -Greedy action tensor([ 0.6729, -0.5920, 0.0187, -0.2371]) tensor([0.4536, 0.1280, 0.2358, 0.1826]) -Greedy action tensor([ 0.6218, -0.3400, -0.0628, -0.0685]) tensor([0.4188, 0.1601, 0.2112, 0.2100]) -Greedy action tensor([ 0.5057, -0.1024, -0.1318, -0.1611]) tensor([0.3866, 0.2105, 0.2044, 0.1985]) -Greedy action tensor([ 0.0507, -0.1006, 0.0509, -0.0736]) tensor([0.2672, 0.2297, 0.2672, 0.2359]) -Greedy action tensor([ 0.6869, 0.0634, -0.0252, -0.0798]) tensor([0.4014, 0.2152, 0.1969, 0.1865]) -Greedy action tensor([ 0.4408, 0.3200, -0.0413, 0.0622]) tensor([0.3136, 0.2779, 0.1937, 0.2148]) -Greedy action tensor([ 0.7779, -0.2911, 0.0208, -0.2656]) tensor([0.4620, 0.1586, 0.2167, 0.1627]) -Greedy action tensor([ 0.7132, -0.5143, -0.0544, -0.2575]) tensor([0.4682, 0.1372, 0.2173, 0.1774]) -Greedy action tensor([ 0.8802, -0.6793, -0.0685, -0.3911]) tensor([0.5325, 0.1119, 0.2062, 0.1493]) -Greedy action tensor([ 0.7951, -0.0748, 0.0394, 0.0199]) tensor([0.4257, 0.1783, 0.1999, 0.1961]) -Greedy action tensor([ 0.6128, -0.8314, -0.0213, -0.4420]) tensor([0.4729, 0.1116, 0.2508, 0.1647]) -Greedy action tensor([ 0.4708, -0.2706, -0.1457, -0.1000]) tensor([0.3874, 0.1846, 0.2091, 0.2189]) -Greedy action tensor([ 0.8813, -0.3927, -0.0608, -0.1804]) tensor([0.4962, 0.1388, 0.1934, 0.1716]) -Greedy action tensor([ 0.2931, 0.0007, -0.2524, -0.2110]) tensor([0.3413, 0.2548, 0.1978, 0.2062]) -Greedy action tensor([ 0.6913, -0.5624, -0.0297, -0.1956]) tensor([0.4580, 0.1307, 0.2227, 0.1886]) -Greedy action tensor([ 0.7053, -0.4792, 0.0380, -0.3179]) tensor([0.4591, 0.1404, 0.2355, 0.1650]) -Greedy action tensor([ 0.6395, -0.5178, 0.0088, -0.1164]) tensor([0.4317, 0.1357, 0.2298, 0.2028]) -Greedy action tensor([ 0.4080, -0.0419, -0.0442, -0.3326]) tensor([0.3635, 0.2318, 0.2313, 0.1733]) -Greedy action tensor([ 0.4634, -0.3371, -0.0118, -0.2273]) tensor([0.3888, 0.1746, 0.2417, 0.1949]) -Greedy action tensor([ 0.8395, -0.7978, -0.0759, -0.2479]) tensor([0.5176, 0.1007, 0.2072, 0.1745]) -Greedy action tensor([ 0.8481, -0.5983, 0.0061, -0.4166]) tensor([0.5132, 0.1208, 0.2211, 0.1449]) -Greedy action tensor([ 0.8298, -0.3404, -0.0553, -0.1373]) tensor([0.4755, 0.1475, 0.1962, 0.1808]) -Greedy action tensor([ 0.4964, 0.2122, 0.0743, -0.0471]) tensor([0.3346, 0.2518, 0.2194, 0.1943]) -Greedy action tensor([ 0.6352, 0.3220, -0.1857, 0.2492]) tensor([0.3508, 0.2564, 0.1543, 0.2384]) -Greedy action tensor([ 0.3176, -0.2466, -0.1734, -0.0227]) tensor([0.3457, 0.1967, 0.2116, 0.2460]) -Greedy action tensor([ 0.5421, -0.2447, -0.1046, -0.1441]) tensor([0.4028, 0.1834, 0.2110, 0.2028]) -Greedy action tensor([ 0.4913, -0.4343, 0.0189, -0.4209]) tensor([0.4130, 0.1637, 0.2575, 0.1659]) -Greedy action tensor([ 0.5368, -0.0625, -0.1497, 0.0124]) tensor([0.3781, 0.2077, 0.1903, 0.2238]) -Greedy action tensor([ 0.5914, -0.4402, -0.1374, -0.4196]) tensor([0.4540, 0.1618, 0.2190, 0.1652]) -Greedy action tensor([ 0.6089, -0.5331, -0.1120, -0.2111]) tensor([0.4452, 0.1421, 0.2165, 0.1961]) -Greedy action tensor([ 1.0021, -0.6654, 0.0849, -0.2223]) tensor([0.5313, 0.1003, 0.2123, 0.1562]) -Greedy action tensor([ 1.6742, -0.2394, -0.6244, 0.1205]) tensor([0.6852, 0.1011, 0.0688, 0.1449]) -Greedy action tensor([-0.1222, -1.1257, 0.2018, 1.3173]) tensor([0.1435, 0.0526, 0.1984, 0.6054]) -Greedy action tensor([ 1.4670, -0.5392, -0.1216, 1.8019]) tensor([0.3654, 0.0491, 0.0746, 0.5108]) -Greedy action tensor([ 0.8976, -1.3077, -0.1535, 0.5484]) tensor([0.4619, 0.0509, 0.1615, 0.3258]) -Greedy action tensor([-0.1823, -0.7505, -0.2402, -1.3186]) tensor([0.3532, 0.2001, 0.3333, 0.1134]) -Greedy action tensor([ 0.1675, -1.8805, 0.2404, 0.0183]) tensor([0.3262, 0.0421, 0.3508, 0.2809]) -Greedy action tensor([ 0.0127, -0.3860, 0.1001, -1.3109]) tensor([0.3302, 0.2216, 0.3603, 0.0879]) -Greedy action tensor([ 0.3744, -0.8748, 0.0199, 0.6744]) tensor([0.2996, 0.0859, 0.2102, 0.4044]) -Greedy action tensor([-0.6683, -1.4505, -0.3885, 0.6733]) tensor([0.1514, 0.0692, 0.2003, 0.5791]) -Greedy action tensor([ 1.6485, 0.4939, 0.6950, -0.0531]) tensor([0.5311, 0.1674, 0.2047, 0.0969]) -Greedy action tensor([-0.9135, 0.0702, -1.5139, 0.3513]) tensor([0.1288, 0.3444, 0.0706, 0.4562]) -Greedy action tensor([ 1.2379, -0.1172, -0.2305, 0.0474]) tensor([0.5579, 0.1439, 0.1285, 0.1697]) -Greedy action tensor([ 0.6080, 0.3382, 0.6386, -0.5614]) tensor([0.3220, 0.2459, 0.3321, 0.1000]) -Greedy action tensor([ 1.3788, -0.2151, 0.9236, 0.5607]) tensor([0.4388, 0.0891, 0.2784, 0.1936]) -Greedy action tensor([ 0.9217, -1.1770, -0.8270, 1.2922]) tensor([0.3643, 0.0447, 0.0634, 0.5276]) -Greedy action tensor([ 1.2569, -0.7058, 1.9180, 1.1095]) tensor([0.2538, 0.0357, 0.4916, 0.2190]) -Greedy action tensor([-0.4384, 0.1230, 0.2864, -0.4682]) tensor([0.1728, 0.3029, 0.3566, 0.1677]) -Greedy action tensor([-0.2613, -0.5460, 1.1522, -0.2431]) tensor([0.1453, 0.1093, 0.5973, 0.1480]) -Greedy action tensor([-0.2268, -1.4786, 0.4345, 1.5264]) tensor([0.1112, 0.0318, 0.2153, 0.6417]) -Greedy action tensor([ 0.1688, -1.4148, 1.1321, -0.1378]) tensor([0.2192, 0.0450, 0.5744, 0.1613]) -Greedy action tensor([-0.6883, -0.4066, 0.6891, -0.5254]) tensor([0.1339, 0.1775, 0.5310, 0.1576]) -Greedy action tensor([ 0.5853, -0.2418, -0.7259, 0.0898]) tensor([0.4318, 0.1888, 0.1164, 0.2631]) -Greedy action tensor([ 0.6684, -1.3668, 0.7703, -0.4402]) tensor([0.3894, 0.0509, 0.4312, 0.1285]) -Greedy action tensor([ 0.4270, 0.5264, 1.4414, -0.2078]) tensor([0.1855, 0.2048, 0.5114, 0.0983]) -Greedy action tensor([ 0.8695, -1.6266, -0.0169, 0.7439]) tensor([0.4208, 0.0347, 0.1734, 0.3711]) -Greedy action tensor([-0.2427, -1.2561, 0.6906, -0.9168]) tensor([0.2265, 0.0822, 0.5759, 0.1154]) -Greedy action tensor([-0.7864, -0.1741, -0.9143, 0.3032]) tensor([0.1493, 0.2754, 0.1314, 0.4439]) -Greedy action tensor([ 0.1454, -0.1006, 0.7367, -0.1865]) tensor([0.2322, 0.1816, 0.4195, 0.1666]) -Greedy action tensor([ 0.9915, -1.4635, -0.1855, -0.0097]) tensor([0.5677, 0.0487, 0.1750, 0.2086]) -Greedy action tensor([ 0.4626, -0.8434, -0.7272, -0.0558]) tensor([0.4607, 0.1248, 0.1402, 0.2743]) -Greedy action tensor([ 0.2125, -0.5504, 0.6379, 0.6577]) tensor([0.2194, 0.1023, 0.3358, 0.3425]) -Greedy action tensor([-0.3086, -1.0916, 0.6026, -0.5447]) tensor([0.2112, 0.0965, 0.5254, 0.1668]) -Greedy action tensor([ 0.7708, -1.2690, -0.0467, 0.2872]) tensor([0.4570, 0.0594, 0.2018, 0.2818]) -Greedy action tensor([ 1.7449, -1.0391, 0.4154, 1.2607]) tensor([0.5148, 0.0318, 0.1362, 0.3172]) -Greedy action tensor([ 0.8627, -0.1787, -0.0248, 0.9647]) tensor([0.3482, 0.1229, 0.1433, 0.3856]) -Greedy action tensor([-0.3743, -1.1012, 0.2925, 0.3740]) tensor([0.1804, 0.0872, 0.3513, 0.3811]) -Greedy action tensor([-0.7941, -0.6694, -0.0095, -0.0441]) tensor([0.1553, 0.1759, 0.3402, 0.3286]) -Greedy action tensor([ 0.2920, -1.7711, -0.1976, 0.3946]) tensor([0.3511, 0.0446, 0.2152, 0.3891]) -Greedy action tensor([-0.1248, -0.7404, -0.1798, -0.9784]) tensor([0.3433, 0.1855, 0.3250, 0.1462]) -Greedy action tensor([1.0084, 0.2258, 0.0732, 0.5292]) tensor([0.4050, 0.1852, 0.1590, 0.2508]) -Greedy action tensor([ 0.8292, -1.5385, 0.4316, -0.4144]) tensor([0.4869, 0.0456, 0.3271, 0.1404]) -Greedy action tensor([ 0.2629, -1.5779, -0.2954, 1.1424]) tensor([0.2415, 0.0383, 0.1382, 0.5820]) -Greedy action tensor([ 0.3772, -0.9444, 0.1311, 0.7407]) tensor([0.2868, 0.0765, 0.2242, 0.4125]) -Greedy action tensor([-0.2649, -2.2953, -0.2038, 0.3809]) tensor([0.2438, 0.0320, 0.2591, 0.4651]) -Greedy action tensor([-0.5459, -0.4531, 0.3605, -0.6700]) tensor([0.1833, 0.2011, 0.4537, 0.1619]) -Greedy action tensor([0.6679, 0.7219, 0.9770, 0.1483]) tensor([0.2492, 0.2630, 0.3395, 0.1482]) -Greedy action tensor([ 0.2160, -0.0965, 1.4276, -0.4611]) tensor([0.1786, 0.1307, 0.5999, 0.0908]) -Greedy action tensor([ 0.4325, -0.9534, -0.7039, 1.2306]) tensor([0.2637, 0.0660, 0.0846, 0.5857]) -Greedy action tensor([ 0.8873, -0.1640, -1.3678, 1.3130]) tensor([0.3350, 0.1171, 0.0351, 0.5128]) -Greedy action tensor([-0.6533, 0.3369, -1.0949, -0.2149]) tensor([0.1699, 0.4574, 0.1093, 0.2634]) -Greedy action tensor([ 0.1541, 0.0571, -0.5901, 1.2506]) tensor([0.1860, 0.1688, 0.0884, 0.5568]) -Greedy action tensor([ 0.3075, -1.9810, 0.3323, 0.2861]) tensor([0.3220, 0.0327, 0.3301, 0.3152]) -Greedy action tensor([ 1.5649, -1.6478, -0.5800, -0.3469]) tensor([0.7662, 0.0308, 0.0897, 0.1133]) -Greedy action tensor([ 1.0499, -0.1238, 0.6069, 0.3827]) tensor([0.4058, 0.1255, 0.2605, 0.2082]) -Greedy action tensor([ 0.0690, 0.3008, 0.3341, -0.1959]) tensor([0.2309, 0.2911, 0.3009, 0.1771]) -Greedy action tensor([ 0.3714, -0.3444, -0.6075, -0.3288]) tensor([0.4236, 0.2070, 0.1591, 0.2103]) -Greedy action tensor([-1.4410, -1.4757, 0.7420, -0.0623]) tensor([0.0675, 0.0652, 0.5992, 0.2681]) -Greedy action tensor([1.3793, 0.8253, 0.4060, 0.6223]) tensor([0.4129, 0.2373, 0.1560, 0.1937]) -Greedy action tensor([ 0.5572, -0.3264, 0.7326, 0.2358]) tensor([0.3003, 0.1241, 0.3578, 0.2177]) -Greedy action tensor([-0.2156, -0.2096, 0.6684, -0.5826]) tensor([0.1953, 0.1965, 0.4728, 0.1353]) -Greedy action tensor([ 0.1047, -0.4147, 0.6307, -1.0553]) tensor([0.2777, 0.1652, 0.4700, 0.0871]) -Greedy action tensor([-0.2418, 0.2453, 0.9607, 0.3676]) tensor([0.1283, 0.2088, 0.4270, 0.2360]) -Greedy action tensor([ 0.2193, -0.8837, -0.3187, 0.8417]) tensor([0.2646, 0.0878, 0.1545, 0.4931]) -Greedy action tensor([ 1.1816, -0.5751, 0.4431, -0.2535]) tensor([0.5295, 0.0914, 0.2530, 0.1261]) -Greedy action tensor([-0.0282, -0.3190, 0.0389, -0.0310]) tensor([0.2622, 0.1960, 0.2804, 0.2614]) -Greedy action tensor([ 1.2696, 0.0926, 0.3867, -0.3490]) tensor([0.5208, 0.1605, 0.2154, 0.1032]) -Greedy action tensor([ 0.4606, -0.2145, -0.1804, 0.3425]) tensor([0.3419, 0.1741, 0.1801, 0.3039]) -Greedy action tensor([0.9045, 0.0100, 0.2473, 0.6436]) tensor([0.3707, 0.1516, 0.1921, 0.2856]) -Greedy action tensor([-0.5378, 0.6149, 1.1061, -0.9341]) tensor([0.0999, 0.3162, 0.5168, 0.0672]) -Greedy action tensor([1.2231, 0.8438, 0.2688, 0.4510]) tensor([0.3950, 0.2703, 0.1521, 0.1825]) -Greedy action tensor([ 0.4100, -0.6506, 0.0855, 0.4099]) tensor([0.3258, 0.1128, 0.2355, 0.3258]) -Greedy action tensor([ 0.0794, -1.0994, -0.1646, 0.3176]) tensor([0.2976, 0.0916, 0.2332, 0.3777]) -Greedy action tensor([-0.7366, 0.9639, -0.4769, 0.4030]) tensor([0.0918, 0.5025, 0.1190, 0.2868]) -Greedy action tensor([-0.9239, -1.4722, 0.2671, 0.4463]) tensor([0.1136, 0.0656, 0.3737, 0.4471]) -Greedy action tensor([-0.0507, -0.2447, -0.0819, -0.7874]) tensor([0.3057, 0.2518, 0.2963, 0.1463]) -Greedy action tensor([-0.3499, -1.0060, 0.1537, -1.1108]) tensor([0.2747, 0.1425, 0.4545, 0.1283]) -Greedy action tensor([-0.5229, -0.2949, 1.0304, -1.1213]) tensor([0.1328, 0.1667, 0.6275, 0.0730]) -Greedy action tensor([ 0.1955, -1.3934, -0.5884, -0.0342]) tensor([0.4072, 0.0831, 0.1860, 0.3237]) -Greedy action tensor([-0.1762, 0.0795, -0.7069, 0.7125]) tensor([0.1883, 0.2431, 0.1107, 0.4579]) -Greedy action tensor([-1.5165, -1.4248, 0.7337, 0.5697]) tensor([0.0509, 0.0558, 0.4832, 0.4101]) -Greedy action tensor([ 0.1104, -0.8089, 0.1739, 0.5754]) tensor([0.2465, 0.0983, 0.2627, 0.3925]) -Greedy action tensor([ 1.0613, -1.3603, 0.4451, -0.2342]) tensor([0.5256, 0.0467, 0.2838, 0.1439]) -Greedy action tensor([ 1.9923, -1.1489, -0.2832, 0.8834]) tensor([0.6776, 0.0293, 0.0696, 0.2235]) -Greedy action tensor([ 1.3695, -0.9499, 0.2113, -0.0359]) tensor([0.6033, 0.0593, 0.1895, 0.1480]) -Greedy action tensor([ 1.2209, -0.0806, -0.2055, 0.2181]) tensor([0.5322, 0.1448, 0.1278, 0.1952]) -Greedy action tensor([ 2.1087, -0.2488, -0.1189, 0.4253]) tensor([0.7204, 0.0682, 0.0776, 0.1338]) -Greedy action tensor([ 0.9107, -0.3593, 0.1000, 0.0899]) tensor([0.4618, 0.1297, 0.2053, 0.2032]) -Greedy action tensor([ 1.5422, -0.3931, -0.3176, 0.2792]) tensor([0.6318, 0.0912, 0.0984, 0.1787]) -Greedy action tensor([ 1.9827, -1.0658, -0.2616, 0.5601]) tensor([0.7171, 0.0340, 0.0760, 0.1729]) -Greedy action tensor([ 1.5579, -0.5858, -0.2859, 0.6242]) tensor([0.5993, 0.0703, 0.0948, 0.2356]) -Greedy action tensor([ 1.8097, -1.0755, -0.0969, 0.1982]) tensor([0.7122, 0.0398, 0.1058, 0.1422]) -Greedy action tensor([1.5503, 0.0921, 0.2029, 0.1889]) tensor([0.5718, 0.1330, 0.1486, 0.1466]) -Greedy action tensor([ 1.6200, -0.5293, -0.5760, 0.0691]) tensor([0.6945, 0.0810, 0.0773, 0.1473]) -Greedy action tensor([ 0.5190, -0.5529, -0.0180, 0.0195]) tensor([0.3947, 0.1351, 0.2307, 0.2395]) -Greedy action tensor([ 1.9901, -1.1353, -0.2196, 0.4291]) tensor([0.7334, 0.0322, 0.0805, 0.1539]) -Greedy action tensor([ 1.5122, -0.2473, -0.2314, 0.4956]) tensor([0.5852, 0.1007, 0.1023, 0.2117]) -Greedy action tensor([ 1.2531, -0.4895, -0.4009, 0.6776]) tensor([0.5185, 0.0908, 0.0992, 0.2916]) -Greedy action tensor([ 1.2950, 0.0500, -0.2389, 0.5270]) tensor([0.5082, 0.1463, 0.1096, 0.2358]) -Greedy action tensor([ 1.7581, -0.2877, -0.5101, 0.1694]) tensor([0.6959, 0.0900, 0.0720, 0.1421]) -Greedy action tensor([ 1.6498, -0.3859, -0.7190, 0.3295]) tensor([0.6706, 0.0876, 0.0628, 0.1791]) -Greedy action tensor([ 0.9469, -0.3732, -0.1403, 0.0258]) tensor([0.4994, 0.1334, 0.1684, 0.1988]) -Greedy action tensor([ 2.2949, -1.0766, 0.0897, 0.8305]) tensor([0.7269, 0.0250, 0.0801, 0.1681]) -Greedy action tensor([ 1.3302, -0.0617, -0.6892, 0.2819]) tensor([0.5774, 0.1436, 0.0766, 0.2024]) -Greedy action tensor([ 2.0717, -1.2399, -0.2745, 0.6810]) tensor([0.7241, 0.0264, 0.0693, 0.1802]) -Greedy action tensor([ 1.5033, -0.5570, -0.5674, 0.6460]) tensor([0.5960, 0.0759, 0.0752, 0.2529]) -Greedy action tensor([ 1.1437, -0.2367, -0.9258, 0.4272]) tensor([0.5359, 0.1348, 0.0677, 0.2617]) -Greedy action tensor([ 1.1750, -0.3967, -0.2823, 0.3451]) tensor([0.5329, 0.1107, 0.1241, 0.2324]) -Greedy action tensor([ 1.2222, -0.7472, -0.5076, 0.6473]) tensor([0.5320, 0.0742, 0.0943, 0.2994]) -Greedy action tensor([ 1.0619, -0.6498, -0.0161, 0.3663]) tensor([0.4951, 0.0894, 0.1685, 0.2470]) -Greedy action tensor([ 1.1359, -0.3779, -0.3662, 0.0213]) tensor([0.5647, 0.1243, 0.1257, 0.1853]) -Greedy action tensor([ 1.4737, -0.1028, 0.0290, 0.1325]) tensor([0.5868, 0.1213, 0.1384, 0.1535]) -Greedy action tensor([ 1.5620, 0.3542, -0.2651, 0.4352]) tensor([0.5606, 0.1675, 0.0902, 0.1817]) -Greedy action tensor([ 1.2665, -0.5561, -0.5634, 0.7571]) tensor([0.5201, 0.0840, 0.0834, 0.3125]) -Greedy action tensor([ 1.5861, -0.3471, -0.1792, -0.2587]) tensor([0.6785, 0.0982, 0.1161, 0.1072]) -Greedy action tensor([ 1.0838, -0.5216, -0.3871, 0.2257]) tensor([0.5392, 0.1083, 0.1239, 0.2286]) -Greedy action tensor([ 1.2792, -0.1109, -0.4420, 0.4083]) tensor([0.5416, 0.1349, 0.0969, 0.2267]) -Greedy action tensor([ 0.7577, -0.3779, -0.2842, -0.1767]) tensor([0.4838, 0.1554, 0.1707, 0.1901]) -Greedy action tensor([ 1.7223, -0.5247, -0.2705, 0.5831]) tensor([0.6402, 0.0677, 0.0873, 0.2049]) -Greedy action tensor([ 1.4268, -0.7205, -0.2091, 0.5262]) tensor([0.5821, 0.0680, 0.1134, 0.2365]) -Greedy action tensor([ 1.3287, -0.3086, -0.2380, 0.2760]) tensor([0.5707, 0.1110, 0.1191, 0.1992]) -Greedy action tensor([ 1.0468, -0.8255, -0.1708, 0.2447]) tensor([0.5268, 0.0810, 0.1559, 0.2362]) -Greedy action tensor([ 1.4011, -0.5315, -0.2114, 0.1798]) tensor([0.6101, 0.0883, 0.1216, 0.1799]) -Greedy action tensor([ 0.9845, -0.4596, 0.0047, -0.0334]) tensor([0.5069, 0.1196, 0.1903, 0.1832]) -Greedy action tensor([ 1.1459, -0.5370, -0.1951, 0.6144]) tensor([0.4914, 0.0913, 0.1285, 0.2888]) -Greedy action tensor([ 1.3267, -0.3375, -0.1279, 0.3451]) tensor([0.5563, 0.1053, 0.1299, 0.2085]) -Greedy action tensor([ 1.5365, -0.0926, -0.5753, 0.3938]) tensor([0.6112, 0.1199, 0.0740, 0.1950]) -Greedy action tensor([ 1.8823, -0.7853, -0.3497, 0.5018]) tensor([0.7002, 0.0486, 0.0751, 0.1761]) -Greedy action tensor([ 2.0045, -0.7713, -0.5702, 0.8753]) tensor([0.6841, 0.0426, 0.0521, 0.2212]) -Greedy action tensor([ 2.3486, -1.2085, -0.5886, 1.0659]) tensor([0.7359, 0.0210, 0.0390, 0.2041]) -Greedy action tensor([ 1.7477, -1.2609, -0.3494, 0.6823]) tensor([0.6593, 0.0325, 0.0810, 0.2272]) -Greedy action tensor([ 2.1938, -0.8865, -0.1936, 0.5952]) tensor([0.7463, 0.0343, 0.0686, 0.1509]) -Greedy action tensor([ 0.9494, -0.0332, 0.0385, 0.1077]) tensor([0.4530, 0.1696, 0.1822, 0.1952]) -Greedy action tensor([ 1.4571, 0.1322, -0.5612, 0.5713]) tensor([0.5522, 0.1468, 0.0734, 0.2277]) -Greedy action tensor([ 1.6552, -0.4510, -0.6493, 0.5358]) tensor([0.6460, 0.0786, 0.0645, 0.2109]) -Greedy action tensor([ 2.1374, -0.8648, -0.2282, 0.4102]) tensor([0.7568, 0.0376, 0.0711, 0.1345]) -Greedy action tensor([ 0.6753, -0.3578, -0.2207, 0.1725]) tensor([0.4221, 0.1502, 0.1723, 0.2553]) -Greedy action tensor([ 1.6668, -0.1701, -0.2016, 0.0072]) tensor([0.6649, 0.1059, 0.1027, 0.1265]) -Greedy action tensor([ 1.4312, -0.3210, -0.5265, 0.1858]) tensor([0.6241, 0.1082, 0.0881, 0.1796]) -Greedy action tensor([ 1.2717, -0.7410, -0.1799, 0.0015]) tensor([0.6066, 0.0811, 0.1421, 0.1703]) -Greedy action tensor([ 1.5236, -0.7418, -0.2697, 0.0784]) tensor([0.6640, 0.0689, 0.1105, 0.1565]) -Greedy action tensor([ 1.3279, -0.1481, -0.5724, 0.4314]) tensor([0.5599, 0.1280, 0.0837, 0.2284]) -Greedy action tensor([ 1.0097, -0.3405, 0.1448, -0.0810]) tensor([0.4960, 0.1286, 0.2088, 0.1666]) -Greedy action tensor([ 1.7587, -0.3864, -0.6139, 0.3006]) tensor([0.6930, 0.0811, 0.0646, 0.1612]) -Greedy action tensor([ 2.1734, -0.9681, -0.2506, 0.3353]) tensor([0.7747, 0.0335, 0.0686, 0.1233]) -Greedy action tensor([ 1.3855, -0.4455, -0.4859, 0.2877]) tensor([0.6069, 0.0973, 0.0934, 0.2025]) -Greedy action tensor([ 1.1282, -0.5629, -0.2164, -0.3118]) tensor([0.5946, 0.1096, 0.1550, 0.1409]) -Greedy action tensor([ 1.6982, -0.3136, -0.5083, 0.3960]) tensor([0.6597, 0.0882, 0.0726, 0.1794]) -Greedy action tensor([ 0.9904, -0.4631, -0.2659, 0.0251]) tensor([0.5265, 0.1231, 0.1499, 0.2005]) -Greedy action tensor([ 1.5102, -0.8207, -0.3250, 0.4159]) tensor([0.6283, 0.0611, 0.1003, 0.2103]) -Greedy action tensor([ 2.2358, 0.3319, -0.0236, 0.2012]) tensor([0.7225, 0.1076, 0.0754, 0.0945]) -Greedy action tensor([ 1.2953, -0.5702, 0.1872, 0.2961]) tensor([0.5396, 0.0835, 0.1782, 0.1987]) -Greedy action tensor([ 2.5680, -1.2868, -0.1805, 0.5811]) tensor([0.8181, 0.0173, 0.0524, 0.1122]) -Greedy action tensor([ 1.5504, -0.5476, -0.6173, 0.4798]) tensor([0.6329, 0.0777, 0.0724, 0.2170]) -Greedy action tensor([ 1.2477, -0.0765, -0.2627, 0.4380]) tensor([0.5176, 0.1377, 0.1143, 0.2303]) -Greedy action tensor([ 0.4176, -0.0682, -0.0372, 0.0852]) tensor([0.3371, 0.2073, 0.2139, 0.2417]) -Greedy action tensor([ 1.3322, -0.6499, -0.0543, 0.0479]) tensor([0.6007, 0.0828, 0.1502, 0.1663]) -Greedy action tensor([ 1.4184, -0.5270, -0.7855, 0.2073]) tensor([0.6447, 0.0921, 0.0712, 0.1920]) -Greedy action tensor([ 0.9508, -0.4072, -0.4065, 0.4467]) tensor([0.4720, 0.1214, 0.1215, 0.2851]) -Greedy action tensor([ 0.8347, -0.1312, -0.0488, 0.0437]) tensor([0.4450, 0.1694, 0.1839, 0.2017]) -Greedy action tensor([ 1.2097, -0.3558, -0.1642, 0.0159]) tensor([0.5665, 0.1184, 0.1434, 0.1717]) -Greedy action tensor([ 1.7096, -0.1443, -0.5223, 0.5529]) tensor([0.6335, 0.0992, 0.0680, 0.1993]) -Greedy action tensor([ 1.2358, -0.1033, -0.6736, 0.3372]) tensor([0.5502, 0.1442, 0.0815, 0.2240]) -Greedy action tensor([ 1.7976, -0.4430, -0.2271, 0.7687]) tensor([0.6266, 0.0667, 0.0827, 0.2239]) -Greedy action tensor([-1.8386, -0.4043, 0.5930, -0.1015]) tensor([0.0449, 0.1886, 0.5112, 0.2553]) -Greedy action tensor([-1.9252, -0.4145, 0.6532, -0.1651]) tensor([0.0408, 0.1848, 0.5374, 0.2371]) -Greedy action tensor([-1.8679, -0.4485, 0.6307, -0.1439]) tensor([0.0437, 0.1805, 0.5311, 0.2448]) -Greedy action tensor([-1.9227, -0.4354, 0.6538, -0.1704]) tensor([0.0411, 0.1818, 0.5402, 0.2369]) -Greedy action tensor([-1.9295, -0.4429, 0.6610, -0.1717]) tensor([0.0407, 0.1801, 0.5431, 0.2362]) -Greedy action tensor([-1.0907, 0.7279, 0.1906, 0.4483]) tensor([0.0648, 0.3996, 0.2335, 0.3021]) -Greedy action tensor([0.3822, 0.6687, 0.4610, 1.1608]) tensor([0.1788, 0.2381, 0.1935, 0.3896]) -Greedy action tensor([-1.8999, -0.3355, 0.6416, -0.1454]) tensor([0.0412, 0.1970, 0.5235, 0.2383]) -Greedy action tensor([-1.8952, -0.2642, 0.6144, -0.1440]) tensor([0.0414, 0.2114, 0.5089, 0.2384]) -Greedy action tensor([-1.6586, -0.4394, 0.5096, -0.0272]) tensor([0.0548, 0.1856, 0.4794, 0.2802]) -Greedy action tensor([-1.7449, -0.2545, 0.5426, -0.1104]) tensor([0.0490, 0.2174, 0.4825, 0.2511]) -Greedy action tensor([-1.6244, -0.4434, 0.4439, -0.0594]) tensor([0.0590, 0.1922, 0.4667, 0.2821]) -Greedy action tensor([-1.8753, -0.3039, 0.6241, -0.1373]) tensor([0.0422, 0.2033, 0.5143, 0.2402]) -Greedy action tensor([-1.8697, -0.3145, 0.6124, -0.1270]) tensor([0.0427, 0.2023, 0.5111, 0.2440]) -Greedy action tensor([-1.7709, -0.4839, 0.5817, -0.1250]) tensor([0.0492, 0.1782, 0.5174, 0.2552]) -Greedy action tensor([-0.4914, -0.0499, 0.9163, 1.6682]) tensor([0.0653, 0.1016, 0.2669, 0.5662]) -Greedy action tensor([-1.9060, -0.4095, 0.6470, -0.1577]) tensor([0.0416, 0.1857, 0.5340, 0.2388]) -Greedy action tensor([-1.7250, -0.3248, 0.5502, -0.0803]) tensor([0.0501, 0.2032, 0.4873, 0.2594]) -Greedy action tensor([-1.9156, -0.3972, 0.6509, -0.1677]) tensor([0.0411, 0.1876, 0.5352, 0.2361]) -Greedy action tensor([-1.8831, -0.4521, 0.6474, -0.1318]) tensor([0.0425, 0.1780, 0.5344, 0.2451]) -Greedy action tensor([-1.7519, -0.2138, 0.5375, -0.0770]) tensor([0.0479, 0.2232, 0.4730, 0.2559]) -Greedy action tensor([-1.3724, -0.5728, 0.4332, -0.0843]) tensor([0.0773, 0.1720, 0.4704, 0.2803]) -Greedy action tensor([-1.9049, -0.4471, 0.6479, -0.1644]) tensor([0.0419, 0.1802, 0.5387, 0.2391]) -Greedy action tensor([-1.0516, -0.6098, 0.2831, 0.0076]) tensor([0.1082, 0.1684, 0.4112, 0.3122]) -Greedy action tensor([-1.7071, -0.5150, 0.5516, -0.0167]) tensor([0.0519, 0.1708, 0.4962, 0.2811]) -Greedy action tensor([-1.4036, -0.4497, 0.5258, 0.3787]) tensor([0.0609, 0.1580, 0.4192, 0.3619]) -Greedy action tensor([-1.4964, -0.5284, 0.2994, -0.1497]) tensor([0.0741, 0.1950, 0.4462, 0.2848]) -Greedy action tensor([-1.0104, -0.2711, 0.3475, -0.1572]) tensor([0.1072, 0.2245, 0.4167, 0.2516]) -Greedy action tensor([-1.9378, -0.4510, 0.6690, -0.1734]) tensor([0.0403, 0.1782, 0.5462, 0.2353]) -Greedy action tensor([-1.7273, -0.2837, 0.6751, -0.0150]) tensor([0.0458, 0.1941, 0.5062, 0.2539]) -Greedy action tensor([-1.8699, -0.4475, 0.6325, -0.1437]) tensor([0.0435, 0.1805, 0.5314, 0.2446]) -Greedy action tensor([-1.8121, -0.4274, 0.6041, -0.1191]) tensor([0.0462, 0.1846, 0.5179, 0.2513]) -Greedy action tensor([-1.9115, -0.4130, 0.6487, -0.1625]) tensor([0.0414, 0.1852, 0.5355, 0.2379]) -Greedy action tensor([-1.8005, -0.4786, 0.5812, -0.1316]) tensor([0.0479, 0.1796, 0.5184, 0.2541]) -Greedy action tensor([-0.4752, -0.4347, 0.1981, 0.2622]) tensor([0.1641, 0.1709, 0.3218, 0.3431]) -Greedy action tensor([-1.8645, -0.4290, 0.6244, -0.1410]) tensor([0.0438, 0.1839, 0.5272, 0.2452]) -Greedy action tensor([-1.9351, -0.4391, 0.6630, -0.1753]) tensor([0.0405, 0.1806, 0.5438, 0.2352]) -Greedy action tensor([-1.8727, -0.4214, 0.6276, -0.1390]) tensor([0.0433, 0.1847, 0.5272, 0.2449]) -Greedy action tensor([-1.8682, -0.3809, 0.6316, -0.1322]) tensor([0.0430, 0.1901, 0.5232, 0.2438]) -Greedy action tensor([-1.8692, -0.4786, 0.6392, -0.1388]) tensor([0.0436, 0.1751, 0.5354, 0.2459]) -Greedy action tensor([-1.9124, -0.3887, 0.6434, -0.1560]) tensor([0.0412, 0.1892, 0.5309, 0.2387]) -Greedy action tensor([-1.8977, -0.4322, 0.6453, -0.1548]) tensor([0.0421, 0.1822, 0.5352, 0.2405]) -Greedy action tensor([-1.3394, -0.4977, 0.8247, 0.8664]) tensor([0.0474, 0.1099, 0.4126, 0.4301]) -Greedy action tensor([-1.8651, -0.4325, 0.6181, -0.1381]) tensor([0.0439, 0.1838, 0.5256, 0.2467]) -Greedy action tensor([-1.8251, -0.3845, 0.6030, -0.1175]) tensor([0.0453, 0.1913, 0.5136, 0.2499]) -Greedy action tensor([-1.8077, -0.4750, 0.5849, -0.1357]) tensor([0.0475, 0.1801, 0.5197, 0.2528]) -Greedy action tensor([-1.8772, -0.3929, 0.6413, -0.1411]) tensor([0.0426, 0.1878, 0.5282, 0.2415]) -Greedy action tensor([-1.9363, -0.4436, 0.6649, -0.1748]) tensor([0.0404, 0.1798, 0.5446, 0.2352]) -Greedy action tensor([-1.2867, 0.6712, 0.1679, 0.2060]) tensor([0.0595, 0.4213, 0.2547, 0.2646]) -Greedy action tensor([-1.8029, -0.4073, 0.6168, -0.0347]) tensor([0.0452, 0.1823, 0.5078, 0.2647]) -Greedy action tensor([-1.3072, 0.7049, 0.2373, 0.2955]) tensor([0.0552, 0.4125, 0.2584, 0.2739]) -Greedy action tensor([-1.8340, -0.4304, 0.6779, -0.0874]) tensor([0.0432, 0.1759, 0.5329, 0.2479]) -Greedy action tensor([-1.8977, -0.4390, 0.6444, -0.1540]) tensor([0.0421, 0.1813, 0.5356, 0.2410]) -Greedy action tensor([-1.6695, -0.5222, 0.5274, -0.0597]) tensor([0.0551, 0.1735, 0.4958, 0.2756]) -Greedy action tensor([-1.8242, -0.4711, 0.5980, -0.1121]) tensor([0.0461, 0.1785, 0.5199, 0.2556]) -Greedy action tensor([-1.8823, -0.3431, 0.6179, -0.1412]) tensor([0.0425, 0.1979, 0.5174, 0.2422]) -Greedy action tensor([-1.8918, -0.4041, 0.6337, -0.1501]) tensor([0.0423, 0.1873, 0.5288, 0.2415]) -Greedy action tensor([-7.3766e-01, -4.8377e-01, 2.4799e-01, 2.9579e-05]) tensor([0.1416, 0.1826, 0.3796, 0.2962]) -Greedy action tensor([-1.8940e+00, -4.5499e-01, 7.3569e-01, 2.2280e-04]) tensor([0.0389, 0.1639, 0.5390, 0.2583]) -Greedy action tensor([-1.8550, -0.4266, 0.6189, -0.1233]) tensor([0.0441, 0.1839, 0.5231, 0.2490]) -Greedy action tensor([-1.1486, -0.6383, 0.2585, 0.3130]) tensor([0.0904, 0.1506, 0.3692, 0.3899]) -Greedy action tensor([-1.7983, -0.3392, 0.5855, -0.1092]) tensor([0.0464, 0.1995, 0.5030, 0.2511]) -Greedy action tensor([-1.7397, -0.3474, 0.5347, -0.0288]) tensor([0.0493, 0.1984, 0.4794, 0.2729]) -Greedy action tensor([-1.7786, -0.0348, 0.5203, -0.0659]) tensor([0.0450, 0.2573, 0.4483, 0.2494]) -Greedy action tensor([-1.1663, -0.0403, 0.3779, 0.3911]) tensor([0.0740, 0.2282, 0.3466, 0.3512]) -Greedy action tensor([-1.8977, -0.3585, 0.6372, -0.1563]) tensor([0.0417, 0.1943, 0.5260, 0.2379]) -Greedy action tensor([-1.8927, -0.4556, 0.6439, -0.1562]) tensor([0.0425, 0.1789, 0.5372, 0.2414]) -Greedy action tensor([-1.8893, -0.5735, 0.9250, -0.0391]) tensor([0.0360, 0.1342, 0.6007, 0.2291]) -Greedy action tensor([-1.0062, -0.5203, 0.3288, 0.1339]) tensor([0.1047, 0.1702, 0.3978, 0.3273]) -Greedy action tensor([-1.8344, -0.4682, 0.6097, -0.1429]) tensor([0.0457, 0.1793, 0.5268, 0.2482]) -Greedy action tensor([-1.7635, -0.4252, 0.5752, -0.0713]) tensor([0.0485, 0.1850, 0.5030, 0.2635]) -Greedy action tensor([-0.9952, -0.3938, 0.6363, 0.9594]) tensor([0.0667, 0.1217, 0.3408, 0.4708]) -Greedy action tensor([-1.9131, -0.4372, 0.6491, -0.1610]) tensor([0.0415, 0.1815, 0.5378, 0.2392]) -Greedy action tensor([-1.5141, -0.4709, 0.4486, 0.0356]) tensor([0.0638, 0.1812, 0.4544, 0.3006]) -Greedy action tensor([-0.9454, 0.8712, 0.2214, -0.4303]) tensor([0.0831, 0.5110, 0.2668, 0.1391]) -Greedy action tensor([-1.9365, -0.4551, 0.6816, -0.1692]) tensor([0.0401, 0.1762, 0.5492, 0.2345]) -Greedy action tensor([-1.9375, -0.4464, 0.6650, -0.1764]) tensor([0.0404, 0.1794, 0.5452, 0.2350]) -Greedy action tensor([-1.9446, -0.4548, 0.6675, -0.1795]) tensor([0.0401, 0.1781, 0.5472, 0.2346]) -Greedy action tensor([-1.7481, -0.5096, 0.5599, -0.0838]) tensor([0.0505, 0.1744, 0.5081, 0.2669]) -Greedy action tensor([-1.9452, -0.4556, 0.6659, -0.1819]) tensor([0.0402, 0.1783, 0.5472, 0.2344]) -Greedy action tensor([-1.6667, -0.1723, 0.4919, 0.0552]) tensor([0.0507, 0.2261, 0.4393, 0.2839]) -Greedy action tensor([ 0.8241, -0.7023, 0.0547, -0.2568]) tensor([0.4951, 0.1076, 0.2294, 0.1680]) -Greedy action tensor([ 0.6055, -0.1771, -0.0077, -0.2813]) tensor([0.4148, 0.1897, 0.2247, 0.1709]) -Greedy action tensor([ 0.6595, -0.3255, -0.1847, 0.0276]) tensor([0.4283, 0.1599, 0.1841, 0.2277]) -Greedy action tensor([ 0.5672, -0.1030, -0.0592, -0.4769]) tensor([0.4170, 0.2133, 0.2229, 0.1468]) -Greedy action tensor([ 0.5608, -0.1886, 0.0145, -0.3006]) tensor([0.4042, 0.1910, 0.2340, 0.1708]) -Greedy action tensor([ 0.7150, -0.4527, -0.0224, -0.3850]) tensor([0.4712, 0.1466, 0.2254, 0.1568]) -Greedy action tensor([ 0.9492, -0.6003, -0.0023, -0.4015]) tensor([0.5383, 0.1143, 0.2079, 0.1395]) -Greedy action tensor([ 0.7715, -0.3744, -0.0375, -0.4882]) tensor([0.4885, 0.1553, 0.2175, 0.1386]) -Greedy action tensor([ 0.6758, -0.6206, 0.1502, -0.2099]) tensor([0.4392, 0.1201, 0.2596, 0.1811]) -Greedy action tensor([ 0.2564, 0.0488, -0.1121, -0.0748]) tensor([0.3103, 0.2522, 0.2147, 0.2228]) -Greedy action tensor([ 1.3467, -1.1295, 0.1083, -0.7599]) tensor([0.6686, 0.0562, 0.1938, 0.0813]) -Greedy action tensor([ 0.7995, -0.3667, -0.0888, -0.2665]) tensor([0.4837, 0.1507, 0.1990, 0.1666]) -Greedy action tensor([ 0.9367, -0.1457, -0.0321, -0.3151]) tensor([0.4989, 0.1690, 0.1894, 0.1427]) -Greedy action tensor([ 0.3244, -0.1802, -0.0711, -0.1103]) tensor([0.3419, 0.2064, 0.2302, 0.2214]) -Greedy action tensor([ 0.6075, 0.2394, -0.1044, -0.2204]) tensor([0.3817, 0.2642, 0.1873, 0.1668]) -Greedy action tensor([ 0.7051, -0.4598, -0.0513, -0.2840]) tensor([0.4644, 0.1449, 0.2180, 0.1727]) -Greedy action tensor([ 0.6316, -0.6827, 0.2388, -0.8673]) tensor([0.4614, 0.1240, 0.3115, 0.1031]) -Greedy action tensor([ 0.8071, -0.3698, -0.1900, -0.6033]) tensor([0.5205, 0.1604, 0.1920, 0.1270]) -Greedy action tensor([ 0.7489, 0.0075, -0.0826, 0.0342]) tensor([0.4165, 0.1984, 0.1813, 0.2038]) -Greedy action tensor([ 0.4423, -0.1422, -0.0902, -0.2915]) tensor([0.3810, 0.2124, 0.2237, 0.1829]) -Greedy action tensor([ 1.1924, -0.4456, 0.1343, -0.4264]) tensor([0.5748, 0.1117, 0.1995, 0.1139]) -Greedy action tensor([ 0.5980, -0.3080, 0.0951, -0.5609]) tensor([0.4305, 0.1740, 0.2604, 0.1351]) -Greedy action tensor([ 1.1647, -0.8190, 0.0214, -0.3181]) tensor([0.5941, 0.0817, 0.1894, 0.1349]) -Greedy action tensor([ 0.9001, -0.4510, -0.0859, -0.1471]) tensor([0.5043, 0.1306, 0.1881, 0.1770]) -Greedy action tensor([ 0.9455, -0.5619, -0.0120, -0.5718]) tensor([0.5480, 0.1214, 0.2104, 0.1202]) -Greedy action tensor([ 0.6809, -0.1879, -0.0349, -0.1434]) tensor([0.4261, 0.1787, 0.2083, 0.1869]) -Greedy action tensor([ 0.6600, -0.0509, 0.0946, -0.2024]) tensor([0.4030, 0.1980, 0.2290, 0.1701]) -Greedy action tensor([ 0.8930, -0.5896, -0.0876, -0.5573]) tensor([0.5445, 0.1236, 0.2042, 0.1277]) -Greedy action tensor([ 0.6159, -0.3531, -0.0455, -0.1998]) tensor([0.4277, 0.1623, 0.2208, 0.1892]) -Greedy action tensor([ 1.3627, -0.9176, 0.0067, -0.6732]) tensor([0.6709, 0.0686, 0.1729, 0.0876]) -Greedy action tensor([ 0.8399, -0.8243, 0.0052, -0.3462]) tensor([0.5185, 0.0982, 0.2250, 0.1583]) -Greedy action tensor([ 0.8838, -0.6046, -0.0080, -0.3545]) tensor([0.5193, 0.1172, 0.2129, 0.1505]) -Greedy action tensor([ 0.8525, -0.2711, -0.1671, -0.3885]) tensor([0.5064, 0.1646, 0.1827, 0.1464]) -Greedy action tensor([ 0.5352, -0.2167, -0.0312, -0.2509]) tensor([0.4009, 0.1890, 0.2275, 0.1826]) -Greedy action tensor([ 0.5905, -0.4931, -0.1739, -0.2891]) tensor([0.4507, 0.1525, 0.2098, 0.1870]) -Greedy action tensor([ 0.6123, 0.0887, -0.2885, -0.4511]) tensor([0.4267, 0.2527, 0.1733, 0.1473]) -Greedy action tensor([ 0.3946, 0.0546, -0.0179, -0.1962]) tensor([0.3416, 0.2431, 0.2261, 0.1892]) -Greedy action tensor([ 0.2587, -0.3277, -0.1333, -0.0694]) tensor([0.3387, 0.1884, 0.2289, 0.2440]) -Greedy action tensor([ 1.3859, -0.9322, 0.0139, -0.6846]) tensor([0.6765, 0.0666, 0.1716, 0.0853]) -Greedy action tensor([ 0.6823, -0.5936, -0.0499, -0.2536]) tensor([0.4646, 0.1297, 0.2234, 0.1823]) -Greedy action tensor([ 0.5196, 0.0271, -0.1518, -0.0604]) tensor([0.3729, 0.2279, 0.1905, 0.2088]) -Greedy action tensor([ 0.3145, -0.0525, -0.1832, -0.1424]) tensor([0.3408, 0.2361, 0.2072, 0.2158]) -Greedy action tensor([ 0.3954, -0.3784, -0.0027, -0.2964]) tensor([0.3797, 0.1751, 0.2550, 0.1901]) -Greedy action tensor([ 0.6299, -0.4618, -0.0444, -0.1665]) tensor([0.4355, 0.1462, 0.2219, 0.1964]) -Greedy action tensor([ 0.4478, -0.2762, -0.0609, -0.0544]) tensor([0.3716, 0.1801, 0.2234, 0.2249]) -Greedy action tensor([ 0.5310, 0.0106, -0.1836, -0.2229]) tensor([0.3915, 0.2327, 0.1916, 0.1842]) -Greedy action tensor([ 0.4620, -0.1588, -0.1279, -0.1314]) tensor([0.3782, 0.2033, 0.2097, 0.2089]) -Greedy action tensor([ 0.7033, -0.5116, -0.0958, -0.3854]) tensor([0.4800, 0.1424, 0.2159, 0.1616]) -Greedy action tensor([ 1.1421, -1.0297, 0.1463, -0.6759]) tensor([0.6076, 0.0693, 0.2245, 0.0986]) -Greedy action tensor([ 0.5789, -0.4704, -0.1380, -0.0510]) tensor([0.4217, 0.1477, 0.2059, 0.2246]) -Greedy action tensor([ 0.7553, -0.3714, 0.0138, -0.3012]) tensor([0.4655, 0.1509, 0.2218, 0.1618]) -Greedy action tensor([ 0.7789, -0.7163, -0.0292, -0.3033]) tensor([0.4978, 0.1116, 0.2219, 0.1687]) -Greedy action tensor([ 0.8273, -0.9249, 0.0420, -0.5355]) tensor([0.5304, 0.0920, 0.2419, 0.1358]) -Greedy action tensor([ 0.5759, -0.2022, -0.1978, -0.2491]) tensor([0.4239, 0.1947, 0.1956, 0.1858]) -Greedy action tensor([ 0.6223, -0.2086, -0.0947, -0.1951]) tensor([0.4227, 0.1842, 0.2064, 0.1867]) -Greedy action tensor([ 0.6923, -0.3502, -0.1017, -0.0664]) tensor([0.4400, 0.1551, 0.1989, 0.2060]) -Greedy action tensor([ 0.7201, -0.3654, -0.2287, -0.2752]) tensor([0.4774, 0.1612, 0.1849, 0.1765]) -Greedy action tensor([ 0.8489, -0.8715, 0.0343, -0.3580]) tensor([0.5206, 0.0932, 0.2305, 0.1557]) -Greedy action tensor([ 0.7489, -0.5000, -0.0497, -0.4157]) tensor([0.4881, 0.1400, 0.2196, 0.1523]) -Greedy action tensor([ 0.6693, 0.1931, 0.1419, -0.4190]) tensor([0.3924, 0.2438, 0.2316, 0.1322]) -Greedy action tensor([ 0.5084, -0.2157, -0.1985, -0.3741]) tensor([0.4181, 0.2027, 0.2062, 0.1730]) -Greedy action tensor([ 0.8527, -0.5534, -0.1246, -0.2453]) tensor([0.5115, 0.1254, 0.1925, 0.1706]) -Greedy action tensor([ 0.8069, -0.5171, -0.0402, -0.4891]) tensor([0.5080, 0.1352, 0.2178, 0.1390]) -Greedy action tensor([ 0.5859, -0.3366, -0.1040, -0.1491]) tensor([0.4204, 0.1671, 0.2109, 0.2016]) -Greedy action tensor([ 0.5867, -0.3153, 0.0394, -0.4240]) tensor([0.4259, 0.1728, 0.2464, 0.1550]) -Greedy action tensor([ 1.2644, -0.7482, -0.0173, -0.4277]) tensor([0.6268, 0.0838, 0.1740, 0.1154]) -Greedy action tensor([ 0.8582, -0.3954, 0.0117, -0.4283]) tensor([0.5023, 0.1434, 0.2155, 0.1388]) -Greedy action tensor([ 1.1345, -0.5687, -0.0382, -0.4690]) tensor([0.5907, 0.1076, 0.1829, 0.1188]) -Greedy action tensor([ 0.8389, -0.7818, -0.0342, -0.2271]) tensor([0.5103, 0.1009, 0.2131, 0.1757]) -Greedy action tensor([ 0.8322, -0.4222, -0.0784, -0.4376]) tensor([0.5080, 0.1449, 0.2044, 0.1427]) -Greedy action tensor([ 0.7819, -0.4774, 0.0395, -0.2227]) tensor([0.4704, 0.1335, 0.2239, 0.1722]) -Greedy action tensor([ 0.6656, -0.3422, -0.1037, -0.2672]) tensor([0.4501, 0.1643, 0.2085, 0.1771]) -Greedy action tensor([ 0.9102, -0.3710, -0.1708, -0.2986]) tensor([0.5220, 0.1450, 0.1771, 0.1559]) -Greedy action tensor([ 0.6714, -0.3699, -0.0267, -0.1301]) tensor([0.4349, 0.1535, 0.2164, 0.1951]) -Greedy action tensor([ 0.1881, -0.2512, -0.0228, -0.4206]) tensor([0.3335, 0.2149, 0.2701, 0.1814]) -Greedy action tensor([ 0.8850, -0.7921, -0.0219, -0.4341]) tensor([0.5382, 0.1006, 0.2173, 0.1439]) -Greedy action tensor([ 0.5895, -0.3846, -0.1033, -0.1139]) tensor([0.4215, 0.1591, 0.2108, 0.2086]) -Greedy action tensor([ 0.7318, -0.2067, -0.0590, -0.1051]) tensor([0.4390, 0.1718, 0.1991, 0.1901]) -Greedy action tensor([ 0.4264, 0.0680, 0.0641, -0.2826]) tensor([0.3464, 0.2420, 0.2411, 0.1705]) -Greedy action tensor([ 0.6861, -0.5206, -0.0170, -0.3294]) tensor([0.4637, 0.1387, 0.2296, 0.1680]) -Greedy action tensor([ 0.3692, -0.3202, 0.1305, -0.3918]) tensor([0.3628, 0.1821, 0.2857, 0.1695]) -Greedy action tensor([-0.4132, -1.9196, 0.1015, 0.6665]) tensor([0.1713, 0.0380, 0.2866, 0.5042]) -Greedy action tensor([ 1.1783, -0.4851, -0.1167, -0.3950]) tensor([0.5985, 0.1134, 0.1639, 0.1241]) -Greedy action tensor([-0.0843, -1.7184, -0.1211, -0.9332]) tensor([0.3866, 0.0754, 0.3726, 0.1654]) -Greedy action tensor([ 0.5009, -0.6122, -0.4988, 1.3603]) tensor([0.2464, 0.0810, 0.0907, 0.5819]) -Greedy action tensor([ 1.8146, -0.0302, 0.8727, -0.1984]) tensor([0.5947, 0.0940, 0.2319, 0.0794]) -Greedy action tensor([-0.2501, 0.0869, 0.3577, -1.0757]) tensor([0.2139, 0.2996, 0.3928, 0.0937]) -Greedy action tensor([ 1.0059, -0.1259, 0.2483, 0.1486]) tensor([0.4514, 0.1455, 0.2116, 0.1915]) -Greedy action tensor([-1.2130, -1.3794, -0.5748, 0.5579]) tensor([0.1040, 0.0881, 0.1969, 0.6111]) -Greedy action tensor([-0.5377, -1.4787, 0.2966, 0.3108]) tensor([0.1659, 0.0647, 0.3820, 0.3874]) -Greedy action tensor([-0.6235, -0.4230, 0.8739, -0.6300]) tensor([0.1301, 0.1590, 0.5816, 0.1293]) -Greedy action tensor([ 1.2827, -1.7676, 0.8576, -0.4655]) tensor([0.5333, 0.0252, 0.3486, 0.0928]) -Greedy action tensor([ 0.6960, -0.9178, 0.6778, 0.4632]) tensor([0.3363, 0.0670, 0.3303, 0.2665]) -Greedy action tensor([ 0.1810, -0.3377, 1.5289, -0.0723]) tensor([0.1608, 0.0957, 0.6188, 0.1248]) -Greedy action tensor([ 0.5435, -0.5515, -0.0327, 0.7383]) tensor([0.3214, 0.1075, 0.1806, 0.3905]) -Greedy action tensor([-1.4379, -0.1545, -0.5178, -0.8243]) tensor([0.1115, 0.4025, 0.2799, 0.2060]) -Greedy action tensor([ 1.1930, -0.3425, 0.0769, 0.0774]) tensor([0.5346, 0.1151, 0.1751, 0.1752]) -Greedy action tensor([ 0.7797, -2.0707, -0.5248, 1.1064]) tensor([0.3682, 0.0213, 0.0999, 0.5105]) -Greedy action tensor([ 1.2897, -0.9468, 1.1031, 0.8368]) tensor([0.3887, 0.0415, 0.3226, 0.2472]) -Greedy action tensor([ 1.6199, -0.9121, 1.3096, 0.6985]) tensor([0.4524, 0.0360, 0.3317, 0.1800]) -Greedy action tensor([ 0.1887, -0.9975, 0.3612, -0.2289]) tensor([0.3172, 0.0969, 0.3770, 0.2089]) -Greedy action tensor([ 0.3353, -0.0610, -0.0183, -0.2321]) tensor([0.3399, 0.2287, 0.2387, 0.1927]) -Greedy action tensor([-0.2813, 0.6798, -0.4594, 0.0022]) tensor([0.1730, 0.4524, 0.1448, 0.2297]) -Greedy action tensor([ 0.6939, -1.6916, 0.4126, 0.8403]) tensor([0.3328, 0.0306, 0.2512, 0.3853]) -Greedy action tensor([2.2427, 0.3466, 0.4642, 0.7771]) tensor([0.6452, 0.0969, 0.1090, 0.1490]) -Greedy action tensor([-0.1246, 0.8222, -0.1646, -0.6541]) tensor([0.1950, 0.5027, 0.1874, 0.1149]) -Greedy action tensor([ 0.5430, 0.0884, -0.2618, -0.2724]) tensor([0.3961, 0.2514, 0.1771, 0.1753]) -Greedy action tensor([ 0.2787, -0.3273, -0.1942, -0.8563]) tensor([0.4016, 0.2191, 0.2503, 0.1291]) -Greedy action tensor([ 1.4449, -0.3548, 0.6948, 0.6453]) tensor([0.4791, 0.0792, 0.2263, 0.2154]) -Greedy action tensor([-0.8141, -0.3032, -0.0673, -1.0922]) tensor([0.1807, 0.3012, 0.3813, 0.1368]) -Greedy action tensor([ 1.0796, -0.2154, 0.8350, -0.2619]) tensor([0.4313, 0.1181, 0.3377, 0.1128]) -Greedy action tensor([ 0.6771, 0.9617, -0.1584, 0.4243]) tensor([0.2825, 0.3755, 0.1225, 0.2194]) -Greedy action tensor([-0.7137, 0.2360, 0.0295, 0.5216]) tensor([0.1096, 0.2832, 0.2304, 0.3768]) -Greedy action tensor([ 0.2930, -0.8049, -0.0623, -0.5285]) tensor([0.4042, 0.1348, 0.2833, 0.1777]) -Greedy action tensor([-1.3113, -0.2280, 1.1430, -0.7418]) tensor([0.0576, 0.1702, 0.6704, 0.1018]) -Greedy action tensor([-0.7131, -1.7587, 0.7003, 1.0470]) tensor([0.0887, 0.0312, 0.3645, 0.5156]) -Greedy action tensor([ 1.6038, -0.9367, 0.8271, 0.9238]) tensor([0.4889, 0.0385, 0.2249, 0.2477]) -Greedy action tensor([-1.2872, -1.3780, 1.3439, -0.5279]) tensor([0.0557, 0.0509, 0.7742, 0.1191]) -Greedy action tensor([ 1.5590, -0.4365, 1.1588, 0.8472]) tensor([0.4354, 0.0592, 0.2918, 0.2137]) -Greedy action tensor([-0.3343, -0.3566, -0.5551, -0.8131]) tensor([0.2942, 0.2877, 0.2359, 0.1822]) -Greedy action tensor([ 0.6059, -0.8348, 0.9549, 0.1456]) tensor([0.3044, 0.0721, 0.4315, 0.1921]) -Greedy action tensor([-1.2416, -1.3908, 2.0750, -0.7521]) tensor([0.0322, 0.0277, 0.8875, 0.0525]) -Greedy action tensor([-0.8223, 0.0637, 0.0758, 0.0636]) tensor([0.1204, 0.2920, 0.2956, 0.2920]) -Greedy action tensor([ 0.2460, -2.0106, 0.0111, 0.4535]) tensor([0.3199, 0.0335, 0.2529, 0.3937]) -Greedy action tensor([ 0.0950, 0.7120, 0.7722, -0.6007]) tensor([0.1880, 0.3483, 0.3700, 0.0937]) -Greedy action tensor([-1.2638, -0.0436, -1.3067, 0.0201]) tensor([0.1117, 0.3783, 0.1070, 0.4031]) -Greedy action tensor([-0.7358, -1.2257, 0.6992, -0.6849]) tensor([0.1457, 0.0893, 0.6118, 0.1533]) -Greedy action tensor([-0.3997, -1.0706, -0.4959, -0.2991]) tensor([0.2837, 0.1450, 0.2576, 0.3137]) -Greedy action tensor([-0.2625, -0.9575, 0.8571, 0.5144]) tensor([0.1484, 0.0741, 0.4547, 0.3228]) -Greedy action tensor([-0.5011, -0.4273, 0.0675, -0.1924]) tensor([0.1922, 0.2069, 0.3393, 0.2616]) -Greedy action tensor([-0.7894, -0.9846, 0.5650, -0.0923]) tensor([0.1298, 0.1068, 0.5029, 0.2606]) -Greedy action tensor([ 0.9512, -0.2356, -0.2340, -0.4881]) tensor([0.5411, 0.1652, 0.1654, 0.1283]) -Greedy action tensor([-1.3650, -1.2718, 0.0124, -1.1145]) tensor([0.1361, 0.1494, 0.5396, 0.1749]) -Greedy action tensor([-0.2542, -0.7495, -0.3479, 1.7460]) tensor([0.1009, 0.0615, 0.0919, 0.7457]) -Greedy action tensor([-0.1993, -1.0041, -0.4366, -0.6075]) tensor([0.3447, 0.1542, 0.2719, 0.2292]) -Greedy action tensor([ 0.5365, -0.2646, -0.3548, -0.0830]) tensor([0.4172, 0.1872, 0.1711, 0.2245]) -Greedy action tensor([ 0.5499, -0.3041, 2.1669, 0.6366]) tensor([0.1324, 0.0564, 0.6669, 0.1444]) -Greedy action tensor([-0.0845, 0.0106, 0.2559, -0.5543]) tensor([0.2421, 0.2663, 0.3403, 0.1514]) -Greedy action tensor([ 0.2933, -0.8122, -0.2419, 0.8963]) tensor([0.2671, 0.0884, 0.1564, 0.4881]) -Greedy action tensor([ 1.7374, -0.2557, 0.1320, 0.9035]) tensor([0.5645, 0.0769, 0.1134, 0.2452]) -Greedy action tensor([-0.7038, -0.2408, -1.2735, -0.2424]) tensor([0.2109, 0.3351, 0.1193, 0.3346]) -Greedy action tensor([-0.2128, -0.6910, -0.3453, -1.1113]) tensor([0.3445, 0.2135, 0.3017, 0.1403]) -Greedy action tensor([ 0.9133, 0.5562, 0.1405, -0.2308]) tensor([0.4032, 0.2821, 0.1862, 0.1284]) -Greedy action tensor([-0.5076, 0.0205, 1.1977, -0.9875]) tensor([0.1134, 0.1923, 0.6241, 0.0702]) -Greedy action tensor([ 0.1894, -0.6648, -0.7648, 0.4702]) tensor([0.3190, 0.1358, 0.1229, 0.4224]) -Greedy action tensor([ 0.1731, 0.7825, -0.3037, -0.4112]) tensor([0.2489, 0.4578, 0.1545, 0.1388]) -Greedy action tensor([-0.2740, -0.3886, 0.6458, -0.5840]) tensor([0.1948, 0.1737, 0.4887, 0.1429]) -Greedy action tensor([ 0.1869, -1.1397, -0.9830, -0.1050]) tensor([0.4305, 0.1143, 0.1336, 0.3216]) -Greedy action tensor([ 0.5965, 1.2006, 0.6257, -0.1187]) tensor([0.2300, 0.4208, 0.2368, 0.1125]) -Greedy action tensor([1.1634, 0.1538, 0.2101, 0.5246]) tensor([0.4390, 0.1600, 0.1692, 0.2318]) -Greedy action tensor([-1.4101, -0.8422, 0.8443, 0.3574]) tensor([0.0551, 0.0972, 0.5250, 0.3226]) -Greedy action tensor([-0.0737, -1.4860, 0.6060, -0.0289]) tensor([0.2346, 0.0571, 0.4629, 0.2453]) -Greedy action tensor([0.1086, 0.7385, 0.2502, 0.0533]) tensor([0.2010, 0.3773, 0.2316, 0.1902]) -Greedy action tensor([ 0.8150, 0.0619, -0.3495, 0.5100]) tensor([0.3968, 0.1869, 0.1238, 0.2925]) -Greedy action tensor([-0.1158, -0.5301, 0.6978, 0.5142]) tensor([0.1726, 0.1140, 0.3893, 0.3241]) -Greedy action tensor([ 0.4930, -1.4053, 0.0348, -0.5586]) tensor([0.4691, 0.0703, 0.2967, 0.1639]) -Greedy action tensor([0.0079, 0.0051, 0.1388, 0.4900]) tensor([0.2102, 0.2096, 0.2396, 0.3405]) -Greedy action tensor([-0.2859, -0.1504, 0.1000, 1.1414]) tensor([0.1285, 0.1471, 0.1890, 0.5354]) -Greedy action tensor([-0.8165, -0.5955, 0.5278, -0.5344]) tensor([0.1350, 0.1684, 0.5177, 0.1790]) -Greedy action tensor([ 0.6123, -1.8844, -0.3371, -0.2450]) tensor([0.5281, 0.0435, 0.2044, 0.2241]) -Greedy action tensor([ 1.3995, -1.1364, 1.5903, 0.3934]) tensor([0.3766, 0.0298, 0.4558, 0.1377]) -Greedy action tensor([ 0.6075, 0.2887, 0.5695, -0.4644]) tensor([0.3298, 0.2398, 0.3175, 0.1129]) -Greedy action tensor([-0.6685, -0.3010, -0.7570, -0.8545]) tensor([0.2387, 0.3447, 0.2185, 0.1982]) -Greedy action tensor([ 1.8717, -0.1376, -0.4787, 0.2425]) tensor([0.7015, 0.0941, 0.0669, 0.1376]) -Greedy action tensor([ 1.8375, -0.1876, 0.0452, 0.1701]) tensor([0.6724, 0.0887, 0.1120, 0.1269]) -Greedy action tensor([ 1.6277, -0.2282, -0.8899, 0.4984]) tensor([0.6409, 0.1002, 0.0517, 0.2072]) -Greedy action tensor([ 2.1622, -0.6374, -0.5428, 0.4228]) tensor([0.7673, 0.0467, 0.0513, 0.1348]) -Greedy action tensor([ 1.8043, -0.8905, -0.2947, 0.3702]) tensor([0.7001, 0.0473, 0.0858, 0.1668]) -Greedy action tensor([ 1.6198, -0.4838, -0.0927, 0.3423]) tensor([0.6324, 0.0772, 0.1141, 0.1763]) -Greedy action tensor([ 1.4392, -0.5217, -0.5177, -0.0036]) tensor([0.6586, 0.0927, 0.0931, 0.1556]) -Greedy action tensor([ 1.9394, -1.0661, -0.2336, 0.2536]) tensor([0.7415, 0.0367, 0.0844, 0.1374]) -Greedy action tensor([ 1.0699, -0.1713, -0.2171, -0.0334]) tensor([0.5272, 0.1524, 0.1455, 0.1749]) -Greedy action tensor([ 1.6363, -0.6458, -0.0866, 0.3158]) tensor([0.6462, 0.0659, 0.1154, 0.1725]) -Greedy action tensor([ 1.7589, -0.1505, -0.6194, 0.9427]) tensor([0.5942, 0.0880, 0.0551, 0.2627]) -Greedy action tensor([ 1.4649, -0.5092, -0.4073, 0.7401]) tensor([0.5627, 0.0782, 0.0865, 0.2726]) -Greedy action tensor([ 1.9335, -0.5470, 0.1004, 0.1041]) tensor([0.7122, 0.0596, 0.1139, 0.1143]) -Greedy action tensor([ 1.6914, -0.6077, -0.4173, 0.4667]) tensor([0.6598, 0.0662, 0.0801, 0.1939]) -Greedy action tensor([ 0.8865, -0.2443, -0.0687, -0.1815]) tensor([0.4875, 0.1574, 0.1876, 0.1676]) -Greedy action tensor([ 2.1597, 0.3131, -0.1473, -0.2505]) tensor([0.7423, 0.1171, 0.0739, 0.0667]) -Greedy action tensor([ 1.4891, -0.3264, -0.3711, 0.2829]) tensor([0.6181, 0.1006, 0.0962, 0.1850]) -Greedy action tensor([ 1.6649, -1.1316, 0.2445, 0.0912]) tensor([0.6623, 0.0404, 0.1600, 0.1373]) -Greedy action tensor([ 1.8726, -0.5058, -0.4942, 0.3592]) tensor([0.7109, 0.0659, 0.0667, 0.1565]) -Greedy action tensor([ 1.1069, -0.0797, -0.0947, 0.1555]) tensor([0.5020, 0.1532, 0.1509, 0.1939]) -Greedy action tensor([ 1.2842, -0.4020, 0.2772, 0.1406]) tensor([0.5350, 0.0991, 0.1954, 0.1705]) -Greedy action tensor([ 1.5855, -0.1108, -0.1968, 0.4808]) tensor([0.5942, 0.1090, 0.1000, 0.1969]) -Greedy action tensor([ 1.2611, -0.4710, -0.3568, 0.5445]) tensor([0.5366, 0.0949, 0.1064, 0.2621]) -Greedy action tensor([ 1.7304, -0.5620, -0.7370, 0.2848]) tensor([0.7035, 0.0711, 0.0597, 0.1657]) -Greedy action tensor([ 2.3233, -0.5233, -0.5502, 1.0460]) tensor([0.7177, 0.0417, 0.0406, 0.2001]) -Greedy action tensor([ 1.7993, -0.6868, -0.1387, 0.0337]) tensor([0.7152, 0.0595, 0.1030, 0.1223]) -Greedy action tensor([ 0.9813, -0.6579, -0.0767, 0.3794]) tensor([0.4787, 0.0929, 0.1662, 0.2622]) -Greedy action tensor([ 1.5734, -0.4262, -0.6222, 0.3493]) tensor([0.6491, 0.0879, 0.0722, 0.1908]) -Greedy action tensor([ 0.9159, -0.0323, -0.0353, 0.0883]) tensor([0.4523, 0.1752, 0.1747, 0.1977]) -Greedy action tensor([ 0.3898, -0.4428, 0.2244, -0.2149]) tensor([0.3535, 0.1537, 0.2996, 0.1931]) -Greedy action tensor([ 1.6072, -0.0620, -0.6667, 0.2450]) tensor([0.6463, 0.1217, 0.0665, 0.1655]) -Greedy action tensor([ 2.5958, -1.6136, 0.0818, 0.0144]) tensor([0.8536, 0.0127, 0.0691, 0.0646]) -Greedy action tensor([ 1.1376, -0.6059, -0.2885, 0.0839]) tensor([0.5670, 0.0992, 0.1362, 0.1977]) -Greedy action tensor([ 2.3596, -1.2817, -0.4375, 1.0016]) tensor([0.7438, 0.0195, 0.0454, 0.1913]) -Greedy action tensor([ 2.1641, -0.5760, -0.7121, 0.7619]) tensor([0.7315, 0.0472, 0.0412, 0.1800]) -Greedy action tensor([ 2.4272, -1.0279, -0.4403, 1.0012]) tensor([0.7526, 0.0238, 0.0428, 0.1808]) -Greedy action tensor([ 1.0449, -0.5291, -0.3310, 0.4970]) tensor([0.4907, 0.1017, 0.1240, 0.2837]) -Greedy action tensor([ 2.4627, -0.2792, -0.9017, 0.2465]) tensor([0.8278, 0.0534, 0.0286, 0.0902]) -Greedy action tensor([ 1.8683, -0.2326, -0.2057, 0.2436]) tensor([0.6920, 0.0847, 0.0870, 0.1363]) -Greedy action tensor([ 1.5154, -0.5229, -0.6725, 0.2492]) tensor([0.6560, 0.0855, 0.0736, 0.1849]) -Greedy action tensor([ 2.0615, -0.6049, -1.2732, 0.4104]) tensor([0.7710, 0.0536, 0.0275, 0.1479]) -Greedy action tensor([ 1.5854, -0.9418, -0.2097, 0.5579]) tensor([0.6235, 0.0498, 0.1036, 0.2231]) -Greedy action tensor([ 1.9076, -0.3431, -0.4459, 0.6393]) tensor([0.6749, 0.0711, 0.0641, 0.1899]) -Greedy action tensor([1.1141, 0.1652, 0.0505, 0.0991]) tensor([0.4774, 0.1848, 0.1648, 0.1730]) -Greedy action tensor([ 1.4868, -0.2541, -0.6844, 0.3501]) tensor([0.6210, 0.1089, 0.0708, 0.1993]) -Greedy action tensor([ 2.1040, -0.4651, -0.5910, 0.4732]) tensor([0.7463, 0.0572, 0.0504, 0.1461]) -Greedy action tensor([ 1.2355, 0.0088, -0.7438, 0.5485]) tensor([0.5169, 0.1516, 0.0714, 0.2600]) -Greedy action tensor([ 1.0556, -0.3607, -0.0270, 0.4974]) tensor([0.4644, 0.1127, 0.1573, 0.2657]) -Greedy action tensor([ 2.0982, -0.6647, -0.6272, 0.8960]) tensor([0.6997, 0.0442, 0.0458, 0.2103]) -Greedy action tensor([ 1.4348, -0.4967, -0.2623, 0.4121]) tensor([0.5925, 0.0859, 0.1086, 0.2131]) -Greedy action tensor([ 0.8102, -0.3685, -0.1368, -0.0218]) tensor([0.4693, 0.1444, 0.1820, 0.2042]) -Greedy action tensor([ 1.9487, -0.9829, -0.5739, 0.2492]) tensor([0.7597, 0.0405, 0.0610, 0.1389]) -Greedy action tensor([ 0.7212, 0.0226, -0.4203, 0.4283]) tensor([0.3902, 0.1941, 0.1246, 0.2911]) -Greedy action tensor([ 0.7895, -0.5150, -0.6197, 0.8937]) tensor([0.3809, 0.1033, 0.0931, 0.4227]) -Greedy action tensor([ 0.8568, -0.4840, -0.4386, 0.6771]) tensor([0.4218, 0.1103, 0.1155, 0.3524]) -Greedy action tensor([ 1.9889, -0.6614, -0.0492, 0.2926]) tensor([0.7224, 0.0510, 0.0941, 0.1325]) -Greedy action tensor([ 1.7189, -0.5310, -0.2689, -0.0759]) tensor([0.7099, 0.0748, 0.0973, 0.1180]) -Greedy action tensor([ 1.5770, -0.2625, -0.3733, 0.2865]) tensor([0.6344, 0.1008, 0.0902, 0.1746]) -Greedy action tensor([ 2.4009, -0.9260, -0.1093, 0.8692]) tensor([0.7500, 0.0269, 0.0609, 0.1621]) -Greedy action tensor([ 1.4518, -0.2241, -0.5357, 0.3219]) tensor([0.6071, 0.1136, 0.0832, 0.1961]) -Greedy action tensor([ 1.3992, -0.2164, -0.3595, 0.2252]) tensor([0.5952, 0.1183, 0.1025, 0.1840]) -Greedy action tensor([ 2.0557, -0.3390, -0.2882, 0.2566]) tensor([0.7393, 0.0674, 0.0709, 0.1223]) -Greedy action tensor([ 1.1748, -0.4426, -0.3536, 0.3962]) tensor([0.5335, 0.1059, 0.1157, 0.2449]) -Greedy action tensor([ 1.7553, -0.3267, -0.3923, 0.1713]) tensor([0.6913, 0.0862, 0.0807, 0.1418]) -Greedy action tensor([ 1.6242, -0.2736, -0.4872, 0.4493]) tensor([0.6330, 0.0949, 0.0766, 0.1955]) -Greedy action tensor([ 1.7995, -0.1975, -0.6727, 0.6315]) tensor([0.6531, 0.0887, 0.0551, 0.2031]) -Greedy action tensor([ 1.5435, -0.3705, -0.5760, 0.4413]) tensor([0.6251, 0.0922, 0.0751, 0.2076]) -Greedy action tensor([ 1.3066, -0.1472, -0.4656, 0.3675]) tensor([0.5572, 0.1302, 0.0947, 0.2179]) -Greedy action tensor([ 1.4294, -0.6503, -0.0773, 0.3898]) tensor([0.5882, 0.0735, 0.1304, 0.2080]) -Greedy action tensor([ 1.1857, -0.2126, -0.9953, 0.0766]) tensor([0.5918, 0.1462, 0.0668, 0.1952]) -Greedy action tensor([ 1.2543, 0.0858, -0.5302, 0.1397]) tensor([0.5535, 0.1720, 0.0929, 0.1816]) -Greedy action tensor([ 1.5630, -0.4520, -0.2275, 0.3026]) tensor([0.6314, 0.0842, 0.1054, 0.1790]) -Greedy action tensor([ 0.7807, -0.1879, -0.1155, 0.1250]) tensor([0.4335, 0.1646, 0.1769, 0.2250]) -Greedy action tensor([ 1.3532, -0.4787, -0.7474, 0.4823]) tensor([0.5878, 0.0941, 0.0719, 0.2461]) -Greedy action tensor([ 1.0955, -0.4151, -0.3346, 0.7412]) tensor([0.4626, 0.1021, 0.1107, 0.3246]) -Greedy action tensor([ 1.3993, -0.5488, -0.2877, 0.4991]) tensor([0.5767, 0.0822, 0.1067, 0.2344]) -Greedy action tensor([ 1.3481, -0.0523, -0.4242, 0.4558]) tensor([0.5476, 0.1350, 0.0931, 0.2243]) -Greedy action tensor([ 1.9643, -0.2679, -0.5375, 0.9642]) tensor([0.6422, 0.0689, 0.0526, 0.2362]) -Greedy action tensor([ 1.7222, -0.3900, -0.1428, 0.0686]) tensor([0.6815, 0.0825, 0.1056, 0.1304]) -Greedy action tensor([ 0.7614, -0.2330, 0.0295, 0.3119]) tensor([0.4018, 0.1486, 0.1932, 0.2563]) -Greedy action tensor([ 2.2238, -0.7999, -0.7079, 0.3820]) tensor([0.7934, 0.0386, 0.0423, 0.1258]) -Greedy action tensor([-1.9127, -0.4439, 0.6441, -0.1671]) tensor([0.0417, 0.1812, 0.5380, 0.2390]) -Greedy action tensor([-1.8514, -0.3034, 0.6052, -0.0930]) tensor([0.0432, 0.2029, 0.5034, 0.2505]) -Greedy action tensor([-1.8280, -0.4333, 0.6950, -0.0393]) tensor([0.0426, 0.1718, 0.5309, 0.2547]) -Greedy action tensor([-1.5608, -0.3316, 0.4415, 0.0450]) tensor([0.0595, 0.2034, 0.4407, 0.2964]) -Greedy action tensor([-1.7675, -0.4228, 0.5831, -0.0803]) tensor([0.0482, 0.1851, 0.5060, 0.2607]) -Greedy action tensor([-1.6935, -0.5010, 0.5162, -0.0411]) tensor([0.0537, 0.1769, 0.4892, 0.2802]) -Greedy action tensor([-1.8268, -0.3876, 0.6037, -0.1056]) tensor([0.0451, 0.1902, 0.5125, 0.2522]) -Greedy action tensor([-1.9260, -0.4295, 0.6588, -0.1694]) tensor([0.0408, 0.1821, 0.5408, 0.2363]) -Greedy action tensor([-0.5750, 0.9156, 0.0763, 0.0538]) tensor([0.1083, 0.4808, 0.2077, 0.2031]) -Greedy action tensor([-1.8138, -0.4701, 0.6151, -0.1002]) tensor([0.0460, 0.1764, 0.5222, 0.2554]) -Greedy action tensor([-1.6931, 0.2513, 0.4658, -0.0138]) tensor([0.0454, 0.3175, 0.3935, 0.2436]) -Greedy action tensor([-1.9341, -0.4440, 0.6588, -0.1757]) tensor([0.0406, 0.1803, 0.5432, 0.2358]) -Greedy action tensor([-1.9350, -0.4311, 0.6598, -0.1752]) tensor([0.0405, 0.1821, 0.5422, 0.2352]) -Greedy action tensor([-1.8868, -0.4367, 0.6377, -0.1532]) tensor([0.0427, 0.1821, 0.5333, 0.2418]) -Greedy action tensor([-1.6986, -0.2656, 0.5577, 0.0113]) tensor([0.0493, 0.2068, 0.4711, 0.2728]) -Greedy action tensor([-1.7408, -0.4215, 0.6034, -0.0102]) tensor([0.0481, 0.1798, 0.5010, 0.2712]) -Greedy action tensor([-1.7479, -0.4320, 0.5633, -0.0812]) tensor([0.0497, 0.1854, 0.5016, 0.2633]) -Greedy action tensor([-1.9407, -0.4575, 0.6704, -0.1766]) tensor([0.0402, 0.1773, 0.5477, 0.2348]) -Greedy action tensor([-1.4909, -0.0423, 0.3299, 0.0623]) tensor([0.0619, 0.2634, 0.3822, 0.2925]) -Greedy action tensor([-1.4564, -0.5704, 0.4152, 0.0366]) tensor([0.0696, 0.1687, 0.4521, 0.3096]) -Greedy action tensor([-1.6469, 0.0023, 0.6815, -0.5964]) tensor([0.0517, 0.2692, 0.5311, 0.1480]) -Greedy action tensor([-1.8854, -0.3820, 0.6335, -0.1396]) tensor([0.0423, 0.1902, 0.5251, 0.2424]) -Greedy action tensor([-1.7511, -0.1921, 0.6288, 0.0079]) tensor([0.0447, 0.2126, 0.4831, 0.2596]) -Greedy action tensor([-1.8911, -0.3725, 0.6224, -0.1571]) tensor([0.0424, 0.1936, 0.5237, 0.2402]) -Greedy action tensor([-0.6359, 1.0854, 0.1413, 0.3750]) tensor([0.0868, 0.4856, 0.1889, 0.2387]) -Greedy action tensor([-1.8942, -0.4374, 0.6367, -0.1539]) tensor([0.0425, 0.1822, 0.5334, 0.2419]) -Greedy action tensor([-1.4206, -0.4438, 0.4622, 0.3382]) tensor([0.0624, 0.1657, 0.4099, 0.3621]) -Greedy action tensor([-1.9110, -0.4619, 0.6458, -0.1645]) tensor([0.0419, 0.1783, 0.5398, 0.2401]) -Greedy action tensor([-1.4551, -0.4941, 0.4987, 0.2986]) tensor([0.0608, 0.1590, 0.4290, 0.3512]) -Greedy action tensor([-1.9469, -0.4504, 0.6682, -0.1818]) tensor([0.0400, 0.1788, 0.5472, 0.2339]) -Greedy action tensor([-1.8939, -0.3900, 0.6376, -0.1440]) tensor([0.0420, 0.1888, 0.5277, 0.2415]) -Greedy action tensor([-1.7814, -0.0671, 0.5387, -0.0977]) tensor([0.0452, 0.2511, 0.4602, 0.2435]) -Greedy action tensor([-1.0685, 0.7804, 0.0979, 0.2475]) tensor([0.0700, 0.4445, 0.2246, 0.2609]) -Greedy action tensor([-1.6411, -0.5374, 0.5284, -0.1230]) tensor([0.0577, 0.1740, 0.5050, 0.2633]) -Greedy action tensor([-1.5051, 0.1571, 0.3691, -0.0084]) tensor([0.0580, 0.3055, 0.3776, 0.2589]) -Greedy action tensor([-1.8029, -0.3998, 0.6831, -0.0831]) tensor([0.0441, 0.1795, 0.5300, 0.2464]) -Greedy action tensor([-1.8905, -0.4399, 0.6386, -0.1520]) tensor([0.0426, 0.1816, 0.5338, 0.2421]) -Greedy action tensor([-1.5760, -0.8689, 0.7378, 0.3600]) tensor([0.0498, 0.1010, 0.5038, 0.3453]) -Greedy action tensor([-1.6069, -0.4436, 0.4631, -0.0591]) tensor([0.0594, 0.1902, 0.4710, 0.2794]) -Greedy action tensor([-1.5474, -0.4798, 0.5344, 0.1158]) tensor([0.0581, 0.1691, 0.4661, 0.3067]) -Greedy action tensor([-1.4962, 0.5539, 0.3336, 0.1014]) tensor([0.0501, 0.3896, 0.3125, 0.2478]) -Greedy action tensor([-1.2655, 0.6096, 0.1642, 0.2554]) tensor([0.0614, 0.4007, 0.2567, 0.2812]) -Greedy action tensor([-1.6929, -0.4217, 0.5234, -0.0350]) tensor([0.0527, 0.1878, 0.4831, 0.2764]) -Greedy action tensor([-1.8433, -0.1891, 0.5726, -0.1250]) tensor([0.0435, 0.2273, 0.4869, 0.2424]) -Greedy action tensor([-1.5149, 0.0307, 0.5179, 0.2520]) tensor([0.0521, 0.2446, 0.3981, 0.3052]) -Greedy action tensor([-0.5641, -0.6850, 0.3993, 0.2585]) tensor([0.1474, 0.1306, 0.3863, 0.3356]) -Greedy action tensor([-1.8939, -0.4415, 0.6430, -0.1561]) tensor([0.0424, 0.1811, 0.5356, 0.2409]) -Greedy action tensor([-1.5903, 0.2104, 0.3872, 0.0124]) tensor([0.0520, 0.3146, 0.3754, 0.2581]) -Greedy action tensor([-1.5871, -0.5373, 0.5702, 0.1188]) tensor([0.0555, 0.1586, 0.4801, 0.3057]) -Greedy action tensor([-1.5385, -0.5021, 0.4912, -0.0734]) tensor([0.0635, 0.1789, 0.4830, 0.2747]) -Greedy action tensor([-1.3087, -0.4890, 0.3812, -0.0032]) tensor([0.0808, 0.1834, 0.4378, 0.2981]) -Greedy action tensor([-1.8811, -0.3611, 0.6276, -0.1456]) tensor([0.0425, 0.1943, 0.5222, 0.2410]) -Greedy action tensor([-1.9164, -0.4516, 0.6465, -0.1646]) tensor([0.0416, 0.1798, 0.5391, 0.2396]) -Greedy action tensor([-1.9319, -0.4469, 0.6734, -0.1651]) tensor([0.0403, 0.1780, 0.5457, 0.2359]) -Greedy action tensor([-0.8217, -0.2300, 0.3957, -0.0676]) tensor([0.1203, 0.2174, 0.4065, 0.2558]) -Greedy action tensor([-1.8573, -0.1332, 0.5785, -0.1059]) tensor([0.0420, 0.2357, 0.4801, 0.2422]) -Greedy action tensor([-1.8929, -0.3397, 0.6434, -0.1430]) tensor([0.0415, 0.1960, 0.5239, 0.2386]) -Greedy action tensor([-1.7326, -0.4547, 0.5530, -0.0868]) tensor([0.0510, 0.1831, 0.5015, 0.2645]) -Greedy action tensor([-1.1901, 0.2552, 0.2893, -0.0026]) tensor([0.0774, 0.3286, 0.3400, 0.2539]) -Greedy action tensor([-1.5678, -0.4153, 0.4824, -0.0549]) tensor([0.0607, 0.1922, 0.4716, 0.2755]) -Greedy action tensor([0.9868, 0.3861, 0.6837, 1.5531]) tensor([0.2470, 0.1355, 0.1824, 0.4351]) -Greedy action tensor([-1.7754, -0.4646, 0.5758, -0.0897]) tensor([0.0485, 0.1800, 0.5095, 0.2619]) -Greedy action tensor([-1.9103, -0.4612, 0.6462, -0.1626]) tensor([0.0419, 0.1783, 0.5395, 0.2403]) -Greedy action tensor([-1.8656, -0.4603, 0.6364, -0.1341]) tensor([0.0436, 0.1778, 0.5323, 0.2463]) -Greedy action tensor([-1.8571, -0.4505, 0.6241, -0.1189]) tensor([0.0440, 0.1796, 0.5261, 0.2503]) -Greedy action tensor([-1.8527, -0.4217, 0.6344, -0.1257]) tensor([0.0438, 0.1832, 0.5267, 0.2463]) -Greedy action tensor([-1.9242, -0.4528, 0.6613, -0.1666]) tensor([0.0409, 0.1783, 0.5433, 0.2374]) -Greedy action tensor([-1.9226, -0.4149, 0.6537, -0.1686]) tensor([0.0409, 0.1848, 0.5379, 0.2364]) -Greedy action tensor([-1.8420, -0.4718, 0.6182, -0.1018]) tensor([0.0448, 0.1762, 0.5240, 0.2551]) -Greedy action tensor([-1.8687, -0.3345, 0.6185, -0.1471]) tensor([0.0430, 0.1994, 0.5171, 0.2405]) -Greedy action tensor([-1.8162, -0.3534, 0.6366, -0.0734]) tensor([0.0441, 0.1906, 0.5130, 0.2522]) -Greedy action tensor([-1.8975, -0.4459, 0.6459, -0.1579]) tensor([0.0422, 0.1803, 0.5371, 0.2404]) -Greedy action tensor([-1.9318, -0.4479, 0.6634, -0.1728]) tensor([0.0406, 0.1792, 0.5443, 0.2359]) -Greedy action tensor([-1.2337, 0.3189, 0.2625, -0.0073]) tensor([0.0735, 0.3474, 0.3283, 0.2507]) -Greedy action tensor([-1.8884, -0.4315, 0.6413, -0.1518]) tensor([0.0425, 0.1825, 0.5336, 0.2414]) -Greedy action tensor([-1.1690, -0.3236, 0.3582, 0.1165]) tensor([0.0866, 0.2016, 0.3987, 0.3131]) -Greedy action tensor([-1.3678, -0.5807, 0.3678, 0.0411]) tensor([0.0772, 0.1695, 0.4377, 0.3157]) -Greedy action tensor([-1.2410, -0.0725, 0.4736, 0.3286]) tensor([0.0686, 0.2207, 0.3811, 0.3296]) -Greedy action tensor([-1.8704, -0.4583, 0.6322, -0.1469]) tensor([0.0436, 0.1791, 0.5328, 0.2445]) -Greedy action tensor([-0.5757, 0.4959, 0.2209, 0.6315]) tensor([0.1055, 0.3079, 0.2339, 0.3527]) -Greedy action tensor([-1.3595, 0.1901, 0.5149, 0.4098]) tensor([0.0553, 0.2603, 0.3602, 0.3242]) -Greedy action tensor([ 1.0518, -0.6979, -0.1933, -0.3716]) tensor([0.5873, 0.1021, 0.1691, 0.1415]) -Greedy action tensor([ 0.9647, -0.7607, -0.0516, -0.6908]) tensor([0.5777, 0.1029, 0.2091, 0.1103]) -Greedy action tensor([ 0.6284, -0.1564, -0.0039, -0.2369]) tensor([0.4152, 0.1894, 0.2206, 0.1748]) -Greedy action tensor([ 0.7407, -0.0942, 0.0512, -0.6288]) tensor([0.4566, 0.1981, 0.2291, 0.1161]) -Greedy action tensor([ 0.6305, -0.4460, 0.0136, -0.2746]) tensor([0.4377, 0.1491, 0.2362, 0.1770]) -Greedy action tensor([ 1.1269, -0.7728, 0.1046, -0.5603]) tensor([0.5902, 0.0883, 0.2123, 0.1092]) -Greedy action tensor([ 0.8166, -0.6765, -0.0732, -0.2385]) tensor([0.5041, 0.1133, 0.2071, 0.1755]) -Greedy action tensor([ 0.4416, -0.0553, -0.0694, 0.0381]) tensor([0.3477, 0.2115, 0.2086, 0.2322]) -Greedy action tensor([ 0.4022, -0.1835, -0.0779, 0.1138]) tensor([0.3419, 0.1903, 0.2115, 0.2562]) -Greedy action tensor([ 0.4491, -0.3291, -0.0826, -0.2809]) tensor([0.3955, 0.1816, 0.2324, 0.1906]) -Greedy action tensor([ 0.7724, -0.4738, -0.0598, -0.3083]) tensor([0.4850, 0.1395, 0.2110, 0.1646]) -Greedy action tensor([ 0.3471, -0.1456, -0.2289, -0.3848]) tensor([0.3768, 0.2302, 0.2118, 0.1812]) -Greedy action tensor([ 0.7768, -0.7740, 0.0101, -0.2498]) tensor([0.4914, 0.1042, 0.2283, 0.1760]) -Greedy action tensor([ 0.5462, -0.3651, -0.1547, -0.0852]) tensor([0.4115, 0.1654, 0.2042, 0.2189]) -Greedy action tensor([ 0.6522, -0.1637, -0.0193, -0.3270]) tensor([0.4294, 0.1899, 0.2194, 0.1613]) -Greedy action tensor([ 0.5166, -0.1723, -0.0059, -0.3521]) tensor([0.3977, 0.1997, 0.2358, 0.1668]) -Greedy action tensor([ 0.6077, -0.1177, -0.0760, -0.1532]) tensor([0.4072, 0.1971, 0.2055, 0.1902]) -Greedy action tensor([ 0.7861, -1.0087, 0.0091, -0.3955]) tensor([0.5174, 0.0860, 0.2379, 0.1587]) -Greedy action tensor([ 0.7869, -0.5217, 0.0837, -0.2313]) tensor([0.4703, 0.1271, 0.2328, 0.1699]) -Greedy action tensor([ 0.8893, -0.1809, 0.0522, -0.2155]) tensor([0.4746, 0.1627, 0.2055, 0.1572]) -Greedy action tensor([ 0.6393, -0.6044, -0.2226, -0.4778]) tensor([0.4907, 0.1415, 0.2073, 0.1606]) -Greedy action tensor([ 1.0774, -0.4472, -0.0133, -0.2438]) tensor([0.5493, 0.1196, 0.1846, 0.1466]) -Greedy action tensor([ 0.6469, -0.7270, 0.0557, -0.2614]) tensor([0.4525, 0.1145, 0.2505, 0.1825]) -Greedy action tensor([ 0.6451, -0.4654, -0.0476, -0.1287]) tensor([0.4365, 0.1438, 0.2184, 0.2013]) -Greedy action tensor([ 0.6021, -0.4105, 0.0519, -0.2259]) tensor([0.4207, 0.1528, 0.2427, 0.1838]) -Greedy action tensor([ 0.8153, -0.5958, -0.1057, -0.2657]) tensor([0.5047, 0.1231, 0.2009, 0.1712]) -Greedy action tensor([ 0.8242, -0.0403, -0.3581, -0.1202]) tensor([0.4724, 0.1990, 0.1448, 0.1837]) -Greedy action tensor([ 0.5287, 0.0317, 0.1035, -0.5781]) tensor([0.3857, 0.2346, 0.2521, 0.1275]) -Greedy action tensor([ 0.5854, -0.5571, -0.0988, -0.0294]) tensor([0.4230, 0.1349, 0.2134, 0.2287]) -Greedy action tensor([ 0.8375, -0.5390, -0.0805, -0.1861]) tensor([0.4972, 0.1255, 0.1986, 0.1787]) -Greedy action tensor([ 0.3108, -0.0310, -0.2885, -0.4599]) tensor([0.3673, 0.2610, 0.2017, 0.1700]) -Greedy action tensor([ 0.3155, -0.1399, -0.0455, -0.5013]) tensor([0.3606, 0.2287, 0.2513, 0.1593]) -Greedy action tensor([ 0.4506, 0.2974, -0.1508, -0.4049]) tensor([0.3532, 0.3030, 0.1936, 0.1501]) -Greedy action tensor([ 0.8800, -0.4728, 0.1371, -1.0625]) tensor([0.5326, 0.1377, 0.2534, 0.0763]) -Greedy action tensor([ 0.5592, -0.4875, 0.0701, -0.5996]) tensor([0.4390, 0.1541, 0.2692, 0.1378]) -Greedy action tensor([ 0.7659, -0.4266, -0.0985, -0.1949]) tensor([0.4745, 0.1440, 0.1999, 0.1815]) -Greedy action tensor([ 1.0264, -0.5447, -0.1026, -0.3640]) tensor([0.5617, 0.1167, 0.1816, 0.1399]) -Greedy action tensor([ 0.2828, -0.1415, -0.0737, -0.1127]) tensor([0.3303, 0.2161, 0.2312, 0.2224]) -Greedy action tensor([ 0.8061, -1.1801, 0.0773, -0.6608]) tensor([0.5404, 0.0742, 0.2608, 0.1246]) -Greedy action tensor([ 0.6069, -0.4531, -0.1235, -0.1233]) tensor([0.4329, 0.1500, 0.2085, 0.2086]) -Greedy action tensor([ 0.4019, -0.6345, -0.1999, -0.3648]) tensor([0.4225, 0.1499, 0.2314, 0.1962]) -Greedy action tensor([ 0.6134, -0.2471, -0.1280, -0.0678]) tensor([0.4157, 0.1758, 0.1981, 0.2104]) -Greedy action tensor([ 0.7695, -0.6332, 0.0677, -0.3347]) tensor([0.4824, 0.1186, 0.2391, 0.1599]) -Greedy action tensor([ 0.5543, -0.1593, 0.0327, -0.4124]) tensor([0.4059, 0.1988, 0.2409, 0.1544]) -Greedy action tensor([ 0.4194, -0.0562, -0.0220, -0.1442]) tensor([0.3529, 0.2193, 0.2269, 0.2008]) -Greedy action tensor([ 1.5090, -0.9961, 0.1288, -0.3259]) tensor([0.6699, 0.0547, 0.1685, 0.1069]) -Greedy action tensor([ 1.0078, -0.4162, 0.1153, -0.3913]) tensor([0.5271, 0.1269, 0.2159, 0.1301]) -Greedy action tensor([ 0.7195, -0.3784, -0.0730, -0.3266]) tensor([0.4678, 0.1561, 0.2118, 0.1643]) -Greedy action tensor([ 0.6385, -0.4325, -0.1083, -0.1246]) tensor([0.4381, 0.1501, 0.2076, 0.2042]) -Greedy action tensor([ 0.6704, -0.2483, -0.0121, -0.1932]) tensor([0.4299, 0.1716, 0.2173, 0.1813]) -Greedy action tensor([ 0.2605, -0.0325, 0.0592, -0.1076]) tensor([0.3072, 0.2291, 0.2511, 0.2126]) -Greedy action tensor([ 0.8269, -0.4677, 0.1735, -0.6960]) tensor([0.4969, 0.1362, 0.2585, 0.1084]) -Greedy action tensor([ 0.6299, -0.3245, -0.1781, -0.3126]) tensor([0.4503, 0.1734, 0.2008, 0.1755]) -Greedy action tensor([ 0.5447, -0.7338, -0.0427, -0.1367]) tensor([0.4273, 0.1190, 0.2375, 0.2162]) -Greedy action tensor([ 0.8379, -0.5014, -0.1424, -0.4562]) tensor([0.5232, 0.1371, 0.1963, 0.1434]) -Greedy action tensor([ 0.7999, -0.4921, -0.1289, -0.2431]) tensor([0.4945, 0.1359, 0.1954, 0.1743]) -Greedy action tensor([ 1.4102, -0.5102, -0.1229, -0.8213]) tensor([0.6804, 0.0997, 0.1469, 0.0730]) -Greedy action tensor([ 0.7131, -0.4553, -0.0751, -0.0036]) tensor([0.4437, 0.1379, 0.2017, 0.2167]) -Greedy action tensor([ 0.6555, -0.8213, -0.1559, -0.3393]) tensor([0.4896, 0.1118, 0.2175, 0.1811]) -Greedy action tensor([ 0.7552, -0.3796, -0.1112, -0.4085]) tensor([0.4868, 0.1565, 0.2047, 0.1520]) -Greedy action tensor([ 0.9685, -0.2873, 0.0517, -0.2806]) tensor([0.5073, 0.1445, 0.2028, 0.1455]) -Greedy action tensor([ 0.3853, 0.2488, -0.1676, -0.0227]) tensor([0.3213, 0.2803, 0.1848, 0.2137]) -Greedy action tensor([ 0.8163, -0.1884, -0.1552, -0.3813]) tensor([0.4886, 0.1789, 0.1850, 0.1475]) -Greedy action tensor([ 0.9933, -0.6368, -0.1009, -0.7041]) tensor([0.5835, 0.1143, 0.1954, 0.1069]) -Greedy action tensor([ 0.5273, -0.0526, -0.0169, 0.0875]) tensor([0.3591, 0.2011, 0.2084, 0.2313]) -Greedy action tensor([ 0.8977, -0.4176, 0.0998, -0.2750]) tensor([0.4930, 0.1323, 0.2220, 0.1526]) -Greedy action tensor([ 1.1088, -0.3634, -0.2393, -0.1500]) tensor([0.5640, 0.1294, 0.1465, 0.1602]) -Greedy action tensor([ 0.8844, -0.2245, 0.0211, 0.0309]) tensor([0.4592, 0.1515, 0.1937, 0.1956]) -Greedy action tensor([ 0.6070, -0.4543, -0.0500, -0.3010]) tensor([0.4410, 0.1526, 0.2286, 0.1779]) -Greedy action tensor([ 1.2000, -0.5465, -0.1983, -0.5756]) tensor([0.6286, 0.1096, 0.1553, 0.1065]) -Greedy action tensor([ 0.3029, 0.0481, -0.0918, -0.1719]) tensor([0.3256, 0.2524, 0.2194, 0.2026]) -Greedy action tensor([ 0.7634, -0.5990, -0.1201, -0.3327]) tensor([0.4991, 0.1278, 0.2063, 0.1668]) -Greedy action tensor([ 0.6354, -0.2536, 0.0533, -0.3431]) tensor([0.4263, 0.1752, 0.2382, 0.1602]) -Greedy action tensor([ 0.9080, -0.3896, -0.0635, -0.1719]) tensor([0.5022, 0.1372, 0.1901, 0.1706]) -Greedy action tensor([ 2.0418e-01, -1.0661e-01, 1.8868e-04, -2.9269e-01]) tensor([0.3168, 0.2322, 0.2583, 0.1927]) -Greedy action tensor([ 0.6632, -0.3829, 0.0014, -0.4058]) tensor([0.4524, 0.1589, 0.2334, 0.1553]) -Greedy action tensor([ 0.5620, -0.4464, 0.1663, -0.5053]) tensor([0.4198, 0.1532, 0.2826, 0.1444]) -Greedy action tensor([ 0.6982, -0.4470, -0.0417, -0.3323]) tensor([0.4646, 0.1478, 0.2217, 0.1658]) -Greedy action tensor([ 0.8286, -0.5564, -0.0221, -0.3399]) tensor([0.5029, 0.1259, 0.2148, 0.1563]) -Greedy action tensor([ 0.7742, -0.4196, -0.0432, -0.2773]) tensor([0.4775, 0.1447, 0.2109, 0.1669]) -Greedy action tensor([ 0.6347, -0.2307, -0.0290, -0.1073]) tensor([0.4146, 0.1745, 0.2135, 0.1974]) -Greedy action tensor([ 0.4043, 0.0945, -0.1460, -0.6214]) tensor([0.3747, 0.2749, 0.2161, 0.1343]) -Greedy action tensor([ 0.5308, -0.3276, 0.2597, 0.3327]) tensor([0.3326, 0.1410, 0.2536, 0.2728]) -Greedy action tensor([-0.8848, -0.7827, -1.3760, -0.2776]) tensor([0.2196, 0.2432, 0.1343, 0.4029]) -Greedy action tensor([-0.5175, -0.7442, 0.6811, -0.8223]) tensor([0.1709, 0.1363, 0.5668, 0.1260]) -Greedy action tensor([-0.3965, 0.2746, 0.4567, -0.5736]) tensor([0.1628, 0.3186, 0.3822, 0.1364]) -Greedy action tensor([ 0.3680, -0.2331, 1.7188, 0.5837]) tensor([0.1504, 0.0824, 0.5806, 0.1866]) -Greedy action tensor([-1.4896, -0.0887, -0.6102, -0.5346]) tensor([0.0993, 0.4032, 0.2393, 0.2581]) -Greedy action tensor([-0.5257, -0.8369, 0.4959, -0.7597]) tensor([0.1886, 0.1382, 0.5239, 0.1493]) -Greedy action tensor([-0.1410, 0.4166, 1.1982, -0.8251]) tensor([0.1415, 0.2471, 0.5400, 0.0714]) -Greedy action tensor([ 1.5299, -1.3590, 0.7524, 2.1588]) tensor([0.2949, 0.0164, 0.1355, 0.5531]) -Greedy action tensor([1.1966, 0.1575, 0.8353, 0.0386]) tensor([0.4229, 0.1496, 0.2946, 0.1328]) -Greedy action tensor([ 0.2439, -0.9482, -0.5957, 1.2592]) tensor([0.2224, 0.0675, 0.0961, 0.6140]) -Greedy action tensor([-0.4134, -0.6023, 0.1400, 1.2256]) tensor([0.1147, 0.0950, 0.1995, 0.5908]) -Greedy action tensor([ 1.4288, -0.7174, 0.3937, -0.2876]) tensor([0.6054, 0.0708, 0.2150, 0.1088]) -Greedy action tensor([ 0.6490, -1.0106, 0.0508, 1.2766]) tensor([0.2768, 0.0526, 0.1522, 0.5184]) -Greedy action tensor([ 1.4994, 0.3300, -0.5769, 0.4070]) tensor([0.5646, 0.1753, 0.0708, 0.1893]) -Greedy action tensor([ 1.1744, -0.6648, 0.5415, 1.0201]) tensor([0.3926, 0.0624, 0.2085, 0.3365]) -Greedy action tensor([-0.2351, -0.3878, 0.3370, -0.4446]) tensor([0.2252, 0.1933, 0.3990, 0.1826]) -Greedy action tensor([ 0.1939, -0.2998, -0.2953, 0.2719]) tensor([0.3026, 0.1847, 0.1855, 0.3272]) -Greedy action tensor([-0.0944, -0.0706, -0.2668, -0.5280]) tensor([0.2846, 0.2914, 0.2395, 0.1845]) -Greedy action tensor([-1.5021, -0.8992, -0.9028, 0.8062]) tensor([0.0680, 0.1243, 0.1238, 0.6839]) -Greedy action tensor([ 1.0055, 0.3048, 0.0781, -0.0035]) tensor([0.4432, 0.2199, 0.1753, 0.1616]) -Greedy action tensor([ 1.6571, -0.6257, 0.6079, 1.7523]) tensor([0.3918, 0.0400, 0.1372, 0.4310]) -Greedy action tensor([-0.4016, -1.0481, -0.2192, 0.1230]) tensor([0.2266, 0.1187, 0.2719, 0.3828]) -Greedy action tensor([ 0.6140, -0.2012, 1.4341, -0.3335]) tensor([0.2439, 0.1079, 0.5537, 0.0945]) -Greedy action tensor([-0.5367, 0.3928, 0.7409, -0.9848]) tensor([0.1289, 0.3264, 0.4624, 0.0823]) -Greedy action tensor([-0.4313, -0.4592, -0.6574, 0.2914]) tensor([0.2070, 0.2013, 0.1651, 0.4265]) -Greedy action tensor([ 0.5868, -1.9187, -0.1541, -0.1506]) tensor([0.4910, 0.0401, 0.2341, 0.2349]) -Greedy action tensor([ 1.3992, -1.0863, 1.5392, 0.5247]) tensor([0.3773, 0.0314, 0.4339, 0.1574]) -Greedy action tensor([-0.1788, -0.6401, -0.3303, -0.9722]) tensor([0.3399, 0.2143, 0.2921, 0.1537]) -Greedy action tensor([-0.0270, -0.4702, 0.5118, -0.7069]) tensor([0.2589, 0.1662, 0.4437, 0.1312]) -Greedy action tensor([ 0.6424, -0.7702, 0.6210, 1.2185]) tensor([0.2499, 0.0609, 0.2446, 0.4446]) -Greedy action tensor([ 0.6790, -1.9186, 0.0445, -0.0426]) tensor([0.4783, 0.0356, 0.2536, 0.2325]) -Greedy action tensor([ 0.5376, 0.2829, -1.3839, 0.1617]) tensor([0.3834, 0.2972, 0.0561, 0.2633]) -Greedy action tensor([ 0.2579, -1.0601, 0.7769, 0.1051]) tensor([0.2627, 0.0703, 0.4415, 0.2255]) -Greedy action tensor([ 1.3276, -0.2877, 0.4064, 0.7231]) tensor([0.4666, 0.0928, 0.1857, 0.2549]) -Greedy action tensor([ 0.1291, -0.0321, 0.2500, 0.7241]) tensor([0.2087, 0.1776, 0.2355, 0.3783]) -Greedy action tensor([ 1.0087, 0.0632, -0.4603, -0.2534]) tensor([0.5259, 0.2043, 0.1210, 0.1488]) -Greedy action tensor([0.7312, 0.0470, 0.0761, 1.1536]) tensor([0.2817, 0.1421, 0.1463, 0.4298]) -Greedy action tensor([-0.3578, -1.0961, -0.1772, -0.1056]) tensor([0.2524, 0.1206, 0.3023, 0.3247]) -Greedy action tensor([-0.9224, 0.4239, 0.2789, -0.7134]) tensor([0.1064, 0.4088, 0.3537, 0.1311]) -Greedy action tensor([-0.4808, -2.1139, -0.2557, -0.4884]) tensor([0.2907, 0.0568, 0.3641, 0.2885]) -Greedy action tensor([-0.3731, -0.5030, 1.4720, 0.1687]) tensor([0.1007, 0.0885, 0.6376, 0.1732]) -Greedy action tensor([-0.1666, -1.0754, 1.2727, -0.6216]) tensor([0.1599, 0.0644, 0.6743, 0.1014]) -Greedy action tensor([-0.0015, -0.8245, 0.6268, -0.6515]) tensor([0.2607, 0.1145, 0.4887, 0.1361]) -Greedy action tensor([-0.8453, 0.2148, 0.6975, -0.5067]) tensor([0.1003, 0.2896, 0.4693, 0.1408]) -Greedy action tensor([ 0.4850, -0.2623, -0.1496, 0.6386]) tensor([0.3155, 0.1494, 0.1673, 0.3678]) -Greedy action tensor([ 0.2837, 0.6836, 1.1086, -0.6583]) tensor([0.1937, 0.2889, 0.4419, 0.0755]) -Greedy action tensor([ 1.1987, -0.5736, 0.3608, 0.3053]) tensor([0.4971, 0.0845, 0.2150, 0.2034]) -Greedy action tensor([-1.7159, -0.7827, 0.0615, -0.1132]) tensor([0.0693, 0.1763, 0.4100, 0.3443]) -Greedy action tensor([ 0.0345, -1.2387, 1.7068, -0.2208]) tensor([0.1355, 0.0379, 0.7215, 0.1050]) -Greedy action tensor([-0.6876, 0.3746, 0.4254, -0.9777]) tensor([0.1301, 0.3764, 0.3961, 0.0974]) -Greedy action tensor([ 0.7548, -1.4029, -0.5015, 0.6386]) tensor([0.4366, 0.0505, 0.1243, 0.3887]) -Greedy action tensor([ 0.1927, -1.1882, 0.3228, -1.1337]) tensor([0.3766, 0.0947, 0.4288, 0.0999]) -Greedy action tensor([ 1.7617, -0.6636, -0.2213, 0.9254]) tensor([0.6026, 0.0533, 0.0830, 0.2611]) -Greedy action tensor([-0.0087, -1.6709, -0.8369, -0.0297]) tensor([0.3838, 0.0728, 0.1676, 0.3758]) -Greedy action tensor([-0.0215, -0.7250, 0.4347, -0.2113]) tensor([0.2564, 0.1269, 0.4046, 0.2121]) -Greedy action tensor([-0.1789, -1.3132, 0.0914, -0.0366]) tensor([0.2642, 0.0850, 0.3462, 0.3046]) -Greedy action tensor([-0.7557, -0.8269, -0.3929, 0.2585]) tensor([0.1632, 0.1520, 0.2346, 0.4501]) -Greedy action tensor([ 0.0798, 0.7943, 0.3381, -0.0849]) tensor([0.1928, 0.3940, 0.2497, 0.1635]) -Greedy action tensor([-0.7006, -0.8763, 1.0431, -0.8538]) tensor([0.1188, 0.0997, 0.6795, 0.1020]) -Greedy action tensor([-0.2574, -0.8743, 0.0094, 0.6281]) tensor([0.1898, 0.1024, 0.2478, 0.4600]) -Greedy action tensor([-0.6230, -1.3519, -1.3035, -0.8320]) tensor([0.3571, 0.1723, 0.1808, 0.2898]) -Greedy action tensor([-1.2516, -0.5945, 0.8396, -0.3697]) tensor([0.0744, 0.1435, 0.6023, 0.1797]) -Greedy action tensor([-0.6967, -1.2771, -0.0783, -0.2529]) tensor([0.2010, 0.1125, 0.3731, 0.3133]) -Greedy action tensor([ 0.2849, -0.6519, 0.5424, 0.4929]) tensor([0.2553, 0.1000, 0.3303, 0.3143]) -Greedy action tensor([-0.3660, -0.0528, -0.4823, -0.9022]) tensor([0.2602, 0.3559, 0.2316, 0.1522]) -Greedy action tensor([ 0.2254, -0.2586, -0.1962, 1.3716]) tensor([0.1846, 0.1137, 0.1211, 0.5806]) -Greedy action tensor([ 0.6206, 0.9945, 0.1067, -0.7136]) tensor([0.3017, 0.4385, 0.1804, 0.0794]) -Greedy action tensor([ 0.7849, -2.8431, -0.0952, 0.5177]) tensor([0.4532, 0.0120, 0.1879, 0.3469]) -Greedy action tensor([ 0.5863, -0.0089, 0.3508, 0.5511]) tensor([0.3024, 0.1668, 0.2389, 0.2919]) -Greedy action tensor([0.4494, 0.0569, 0.3988, 0.1985]) tensor([0.2938, 0.1984, 0.2793, 0.2286]) -Greedy action tensor([-0.4207, 0.1365, -0.8510, 0.8723]) tensor([0.1420, 0.2480, 0.0924, 0.5176]) -Greedy action tensor([ 1.4690, -1.8866, 1.5423, 0.4252]) tensor([0.4060, 0.0142, 0.4369, 0.1430]) -Greedy action tensor([ 1.1903, 0.1301, 1.6060, -0.1540]) tensor([0.3202, 0.1109, 0.4853, 0.0835]) -Greedy action tensor([ 0.0097, -0.8875, 0.3216, 1.2452]) tensor([0.1609, 0.0656, 0.2198, 0.5536]) -Greedy action tensor([ 0.5891, -1.5594, -0.3117, 0.1750]) tensor([0.4579, 0.0534, 0.1860, 0.3026]) -Greedy action tensor([ 0.9574, -0.8005, 1.3495, 0.8327]) tensor([0.2829, 0.0488, 0.4187, 0.2497]) -Greedy action tensor([-0.4299, -0.6008, -0.0386, 0.6249]) tensor([0.1615, 0.1361, 0.2388, 0.4636]) -Greedy action tensor([-0.5951, -0.0406, 0.0227, 0.6436]) tensor([0.1243, 0.2164, 0.2305, 0.4289]) -Greedy action tensor([ 0.2655, 0.1154, -0.3967, -0.4259]) tensor([0.3476, 0.2991, 0.1792, 0.1741]) -Greedy action tensor([ 0.3316, -0.6394, -0.9473, 1.2733]) tensor([0.2369, 0.0897, 0.0659, 0.6075]) -Greedy action tensor([ 1.4622, -0.9984, 0.2986, 0.0230]) tensor([0.6117, 0.0522, 0.1911, 0.1450]) -Greedy action tensor([ 1.3177, -0.7603, -0.0851, 0.3079]) tensor([0.5762, 0.0721, 0.1417, 0.2099]) -Greedy action tensor([ 1.9588, 0.0748, -0.0400, 0.9133]) tensor([0.6101, 0.0927, 0.0827, 0.2145]) -Greedy action tensor([ 1.6959, -0.6929, -1.0263, 0.1698]) tensor([0.7273, 0.0667, 0.0478, 0.1581]) -Greedy action tensor([ 1.2811, -0.2366, -0.6225, 0.6372]) tensor([0.5281, 0.1158, 0.0787, 0.2774]) -Greedy action tensor([ 0.7818, -0.7369, -0.4634, -0.1217]) tensor([0.5230, 0.1145, 0.1506, 0.2119]) -Greedy action tensor([ 0.9467, -0.0367, -0.1673, 0.2758]) tensor([0.4518, 0.1690, 0.1483, 0.2310]) -Greedy action tensor([ 2.2140, -0.7290, -0.4367, 0.6224]) tensor([0.7536, 0.0397, 0.0532, 0.1534]) -Greedy action tensor([ 1.4772, -1.0055, -0.2030, 0.4461]) tensor([0.6148, 0.0514, 0.1146, 0.2193]) -Greedy action tensor([ 1.4793, -0.6784, 0.1984, 0.1584]) tensor([0.6023, 0.0696, 0.1673, 0.1607]) -Greedy action tensor([ 1.3893, 0.6348, -0.2594, -0.3320]) tensor([0.5431, 0.2554, 0.1044, 0.0971]) -Greedy action tensor([ 1.7414, -0.3044, -0.4167, 0.4236]) tensor([0.6611, 0.0855, 0.0764, 0.1770]) -Greedy action tensor([ 1.6297, -1.0638, -0.5184, 0.9486]) tensor([0.5916, 0.0400, 0.0690, 0.2994]) -Greedy action tensor([ 1.2527, -0.5804, -0.5818, 0.3659]) tensor([0.5775, 0.0924, 0.0922, 0.2379]) -Greedy action tensor([ 1.2395, -0.1597, -0.3888, 0.0854]) tensor([0.5687, 0.1403, 0.1116, 0.1793]) -Greedy action tensor([ 1.5744, -0.5046, -0.3930, 0.6314]) tensor([0.6045, 0.0756, 0.0845, 0.2354]) -Greedy action tensor([ 1.2361, -0.4517, -0.3156, 0.1278]) tensor([0.5791, 0.1071, 0.1227, 0.1912]) -Greedy action tensor([ 1.8492, -0.8612, -0.3613, 0.5980]) tensor([0.6838, 0.0455, 0.0750, 0.1957]) -Greedy action tensor([ 1.1351, -0.4981, -0.6015, 0.3290]) tensor([0.5501, 0.1074, 0.0969, 0.2457]) -Greedy action tensor([ 1.2834, -0.4317, 0.1875, 0.0333]) tensor([0.5554, 0.0999, 0.1856, 0.1591]) -Greedy action tensor([ 1.3944, -0.7905, -0.0844, 0.2989]) tensor([0.5971, 0.0672, 0.1361, 0.1997]) -Greedy action tensor([ 0.9072, -0.1533, -0.1893, -0.3628]) tensor([0.5099, 0.1766, 0.1703, 0.1432]) -Greedy action tensor([ 0.8673, -0.3192, -0.1095, 0.1325]) tensor([0.4627, 0.1412, 0.1742, 0.2219]) -Greedy action tensor([ 1.4079, -0.4301, -0.5771, 0.8823]) tensor([0.5298, 0.0843, 0.0728, 0.3132]) -Greedy action tensor([ 1.4403, 0.1278, -0.2987, 0.2662]) tensor([0.5702, 0.1535, 0.1002, 0.1762]) -Greedy action tensor([ 1.0338, -0.3924, -0.4323, 0.3785]) tensor([0.5024, 0.1207, 0.1160, 0.2609]) -Greedy action tensor([ 1.1427, -0.4682, -0.0055, 0.2085]) tensor([0.5236, 0.1046, 0.1661, 0.2057]) -Greedy action tensor([ 1.4404, -0.4692, -0.4430, 0.2840]) tensor([0.6193, 0.0917, 0.0942, 0.1948]) -Greedy action tensor([ 0.9262, -0.8478, -0.4834, 0.7253]) tensor([0.4481, 0.0760, 0.1094, 0.3665]) -Greedy action tensor([ 1.8092, -0.7034, -0.7186, 0.7275]) tensor([0.6667, 0.0540, 0.0532, 0.2260]) -Greedy action tensor([ 1.2756, -0.8917, -0.3889, 0.7083]) tensor([0.5345, 0.0612, 0.1012, 0.3031]) -Greedy action tensor([ 1.9655, -1.2877, -0.1184, 0.7483]) tensor([0.6853, 0.0265, 0.0853, 0.2029]) -Greedy action tensor([ 1.8225, -0.5139, -0.3106, 0.1342]) tensor([0.7143, 0.0691, 0.0846, 0.1320]) -Greedy action tensor([ 1.4172, -0.3096, -0.6448, 0.5675]) tensor([0.5772, 0.1026, 0.0734, 0.2468]) -Greedy action tensor([ 1.6465, -0.7768, -0.0023, 0.5028]) tensor([0.6252, 0.0554, 0.1202, 0.1992]) -Greedy action tensor([ 1.3578, -0.9965, -0.3900, 0.7212]) tensor([0.5561, 0.0528, 0.0968, 0.2942]) -Greedy action tensor([ 2.1337, -0.8295, -0.2904, 0.6148]) tensor([0.7357, 0.0380, 0.0652, 0.1611]) -Greedy action tensor([ 1.0771, -0.5917, -0.1945, 0.2862]) tensor([0.5202, 0.0980, 0.1459, 0.2359]) -Greedy action tensor([ 1.1798, -0.6774, 0.0939, 0.1190]) tensor([0.5435, 0.0848, 0.1835, 0.1881]) -Greedy action tensor([ 1.0038, -0.3929, -0.3531, 0.0564]) tensor([0.5284, 0.1307, 0.1360, 0.2049]) -Greedy action tensor([ 1.5707, -0.4510, -0.3377, 0.5124]) tensor([0.6143, 0.0814, 0.0911, 0.2132]) -Greedy action tensor([ 1.4514, 0.5112, -0.0515, 0.0690]) tensor([0.5365, 0.2095, 0.1194, 0.1346]) -Greedy action tensor([ 1.4822, -0.1662, -0.4439, 0.4392]) tensor([0.5915, 0.1138, 0.0862, 0.2085]) -Greedy action tensor([ 2.1718, 0.3589, -0.0483, -0.0251]) tensor([0.7231, 0.1180, 0.0785, 0.0804]) -Greedy action tensor([ 1.8812, -1.2808, -0.1315, 0.1230]) tensor([0.7417, 0.0314, 0.0991, 0.1278]) -Greedy action tensor([ 1.2075, -0.4615, -0.0649, 0.4273]) tensor([0.5190, 0.0978, 0.1454, 0.2379]) -Greedy action tensor([ 2.1898, -0.7354, -0.2184, -0.0160]) tensor([0.7976, 0.0428, 0.0718, 0.0879]) -Greedy action tensor([ 1.1059, 0.0269, -0.6943, 0.2236]) tensor([0.5211, 0.1771, 0.0861, 0.2156]) -Greedy action tensor([ 1.7534, -0.6984, -0.6384, 0.4105]) tensor([0.6951, 0.0599, 0.0636, 0.1815]) -Greedy action tensor([ 1.3736, -0.0778, -0.5236, -0.0060]) tensor([0.6113, 0.1432, 0.0917, 0.1538]) -Greedy action tensor([ 1.1205, 0.0871, -0.8322, 0.2624]) tensor([0.5204, 0.1852, 0.0738, 0.2206]) -Greedy action tensor([ 1.6853, -0.0110, -0.2409, 0.4976]) tensor([0.6120, 0.1122, 0.0892, 0.1866]) -Greedy action tensor([ 1.3360, -0.1192, -0.8197, 0.5790]) tensor([0.5500, 0.1283, 0.0637, 0.2580]) -Greedy action tensor([ 1.9957, -0.7904, -0.0803, 0.1141]) tensor([0.7466, 0.0460, 0.0936, 0.1137]) -Greedy action tensor([ 0.9104, -0.2827, -0.1160, 0.3269]) tensor([0.4505, 0.1366, 0.1614, 0.2514]) -Greedy action tensor([ 1.3033, -0.4554, -0.4610, 0.1661]) tensor([0.6008, 0.1035, 0.1029, 0.1927]) -Greedy action tensor([ 0.7036, -0.4284, -0.1817, 0.4254]) tensor([0.4013, 0.1294, 0.1656, 0.3038]) -Greedy action tensor([ 0.9927, -0.2810, -0.3435, 0.1051]) tensor([0.5117, 0.1432, 0.1345, 0.2106]) -Greedy action tensor([ 1.0787, -0.4111, -0.1167, 0.0944]) tensor([0.5259, 0.1185, 0.1591, 0.1965]) -Greedy action tensor([ 1.2806, -0.3884, -0.5517, 0.7535]) tensor([0.5158, 0.0972, 0.0825, 0.3045]) -Greedy action tensor([ 1.2061, -0.5889, -0.1427, 0.1846]) tensor([0.5600, 0.0930, 0.1454, 0.2016]) -Greedy action tensor([ 1.3192, -0.4092, -0.3489, 0.3530]) tensor([0.5725, 0.1017, 0.1080, 0.2178]) -Greedy action tensor([ 0.8006, -0.0976, -0.0764, 0.0275]) tensor([0.4377, 0.1782, 0.1821, 0.2020]) -Greedy action tensor([ 8.3928e-01, -4.8164e-01, 6.8579e-02, 5.6267e-04]) tensor([0.4626, 0.1235, 0.2140, 0.2000]) -Greedy action tensor([ 1.9716, -0.7968, -0.5086, 0.6713]) tensor([0.7048, 0.0442, 0.0590, 0.1920]) -Greedy action tensor([ 0.5800, -0.3387, 0.2382, -0.0635]) tensor([0.3795, 0.1514, 0.2696, 0.1994]) -Greedy action tensor([ 1.7117, -0.3795, -0.5763, 0.7882]) tensor([0.6165, 0.0762, 0.0626, 0.2448]) -Greedy action tensor([ 0.8001, -0.1000, -0.3564, -0.1784]) tensor([0.4769, 0.1939, 0.1500, 0.1792]) -Greedy action tensor([ 1.0814, -0.2613, -0.5461, 0.2906]) tensor([0.5233, 0.1366, 0.1028, 0.2373]) -Greedy action tensor([ 1.0128, 0.5313, 0.1570, -0.5144]) tensor([0.4425, 0.2734, 0.1880, 0.0961]) -Greedy action tensor([ 1.4891, -0.2128, -0.0285, 0.4932]) tensor([0.5647, 0.1030, 0.1238, 0.2086]) -Greedy action tensor([ 0.9741, -0.2536, -0.2008, 0.2769]) tensor([0.4762, 0.1395, 0.1471, 0.2372]) -Greedy action tensor([ 1.6784, -0.5935, -0.0168, 0.5702]) tensor([0.6185, 0.0638, 0.1135, 0.2042]) -Greedy action tensor([ 0.8900, -0.2768, -0.4891, 0.6652]) tensor([0.4234, 0.1318, 0.1066, 0.3382]) -Greedy action tensor([ 1.4410, -0.4763, -0.1091, 0.6739]) tensor([0.5484, 0.0806, 0.1164, 0.2546]) -Greedy action tensor([ 1.7920, -0.6480, -0.3001, 0.6545]) tensor([0.6531, 0.0569, 0.0806, 0.2094]) -Greedy action tensor([ 1.2296, -0.4782, -0.2670, 0.4579]) tensor([0.5355, 0.0971, 0.1199, 0.2475]) -Greedy action tensor([ 1.0392, -0.1165, -0.4010, -0.0760]) tensor([0.5320, 0.1675, 0.1260, 0.1744]) -Greedy action tensor([ 1.4651, -0.4724, -0.4398, 0.3924]) tensor([0.6116, 0.0881, 0.0910, 0.2092]) -Greedy action tensor([ 2.4915, -0.7945, -0.3091, 0.1521]) tensor([0.8371, 0.0313, 0.0509, 0.0807]) -Greedy action tensor([1.4355, 0.5011, 0.0163, 0.1583]) tensor([0.5226, 0.2053, 0.1264, 0.1457]) -Greedy action tensor([-1.8949, -0.4515, 0.6383, -0.1554]) tensor([0.0425, 0.1800, 0.5354, 0.2421]) -Greedy action tensor([-1.8523, -0.4437, 0.6201, -0.1321]) tensor([0.0444, 0.1816, 0.5261, 0.2480]) -Greedy action tensor([-1.5490, 0.4246, 0.3411, 0.0503]) tensor([0.0506, 0.3641, 0.3349, 0.2504]) -Greedy action tensor([-1.7232, -0.3492, 0.6658, -0.0174]) tensor([0.0468, 0.1850, 0.5104, 0.2578]) -Greedy action tensor([-1.2468, -0.4873, 1.2345, 1.1094]) tensor([0.0390, 0.0833, 0.4662, 0.4114]) -Greedy action tensor([-1.8480, -0.4502, 0.6609, -0.0926]) tensor([0.0432, 0.1750, 0.5316, 0.2502]) -Greedy action tensor([-1.4204, 0.5720, 0.2686, 0.0820]) tensor([0.0548, 0.4020, 0.2968, 0.2463]) -Greedy action tensor([-1.5876, -0.4320, 0.6322, 0.2817]) tensor([0.0503, 0.1599, 0.4634, 0.3264]) -Greedy action tensor([-1.8750, -0.3861, 0.6268, -0.1469]) tensor([0.0430, 0.1905, 0.5245, 0.2420]) -Greedy action tensor([-1.8376, -0.4239, 0.6061, -0.1250]) tensor([0.0451, 0.1854, 0.5194, 0.2500]) -Greedy action tensor([-1.9264, -0.4377, 0.6598, -0.1691]) tensor([0.0408, 0.1808, 0.5418, 0.2365]) -Greedy action tensor([-1.7479, -0.1400, 0.5296, -0.0109]) tensor([0.0467, 0.2330, 0.4552, 0.2651]) -Greedy action tensor([-1.2752, 0.0961, 0.5629, 0.2098]) tensor([0.0639, 0.2519, 0.4018, 0.2823]) -Greedy action tensor([-1.8157, -0.3044, 0.6274, -0.1118]) tensor([0.0444, 0.2011, 0.5107, 0.2438]) -Greedy action tensor([-1.4855, -0.5629, 0.5244, 0.1039]) tensor([0.0630, 0.1584, 0.4700, 0.3086]) -Greedy action tensor([-1.9010, -0.4216, 0.6423, -0.1544]) tensor([0.0419, 0.1841, 0.5334, 0.2405]) -Greedy action tensor([-1.8630, -0.2704, 0.5812, -0.1162]) tensor([0.0432, 0.2122, 0.4972, 0.2475]) -Greedy action tensor([-1.8529, -0.3846, 0.5975, -0.1234]) tensor([0.0443, 0.1923, 0.5136, 0.2498]) -Greedy action tensor([-1.8987, -0.4571, 0.6481, -0.1490]) tensor([0.0421, 0.1780, 0.5376, 0.2423]) -Greedy action tensor([-1.8184, -0.4656, 0.6200, -0.0803]) tensor([0.0454, 0.1758, 0.5205, 0.2584]) -Greedy action tensor([-0.5624, 0.3739, 0.6970, 1.2963]) tensor([0.0741, 0.1891, 0.2612, 0.4756]) -Greedy action tensor([-1.3249, -0.6177, 0.3686, 0.1041]) tensor([0.0791, 0.1605, 0.4302, 0.3302]) -Greedy action tensor([-1.8962, -0.4516, 0.6479, -0.1555]) tensor([0.0422, 0.1791, 0.5378, 0.2408]) -Greedy action tensor([-1.8569, -0.3542, 0.6245, -0.1248]) tensor([0.0433, 0.1945, 0.5176, 0.2447]) -Greedy action tensor([-0.5511, -0.5035, 0.9419, 1.5085]) tensor([0.0697, 0.0731, 0.3103, 0.5469]) -Greedy action tensor([-1.8721, -0.4354, 0.6380, -0.1398]) tensor([0.0432, 0.1816, 0.5312, 0.2440]) -Greedy action tensor([-1.8863, -0.2611, 0.6128, -0.1315]) tensor([0.0416, 0.2114, 0.5064, 0.2406]) -Greedy action tensor([-0.9822, 0.4515, 0.5573, 0.7548]) tensor([0.0644, 0.2700, 0.3001, 0.3656]) -Greedy action tensor([-1.7203, -0.5041, 0.5563, -0.0974]) tensor([0.0521, 0.1759, 0.5078, 0.2642]) -Greedy action tensor([-1.8759, -0.7163, 1.1850, 0.0568]) tensor([0.0308, 0.0983, 0.6580, 0.2129]) -Greedy action tensor([-1.0391, -0.7765, -0.0164, 0.0128]) tensor([0.1259, 0.1637, 0.3500, 0.3604]) -Greedy action tensor([-1.9349, -0.4300, 0.6616, -0.1699]) tensor([0.0404, 0.1819, 0.5418, 0.2359]) -Greedy action tensor([-0.3396, 1.0662, 0.0079, 0.6468]) tensor([0.1090, 0.4445, 0.1543, 0.2922]) -Greedy action tensor([-1.8940, -0.4279, 0.6382, -0.1538]) tensor([0.0424, 0.1835, 0.5328, 0.2413]) -Greedy action tensor([-1.6654, 0.1869, 0.4284, -0.0077]) tensor([0.0482, 0.3074, 0.3913, 0.2530]) -Greedy action tensor([-1.9231, -0.4497, 0.6520, -0.1703]) tensor([0.0412, 0.1798, 0.5412, 0.2378]) -Greedy action tensor([-0.8749, -0.0089, 0.2730, -0.1666]) tensor([0.1168, 0.2778, 0.3682, 0.2372]) -Greedy action tensor([-1.9428, -0.4465, 0.6664, -0.1796]) tensor([0.0402, 0.1794, 0.5460, 0.2343]) -Greedy action tensor([-0.5842, -0.5641, 0.2318, 0.2200]) tensor([0.1534, 0.1566, 0.3470, 0.3430]) -Greedy action tensor([-1.9243, -0.4269, 0.6602, -0.1700]) tensor([0.0408, 0.1824, 0.5409, 0.2358]) -Greedy action tensor([-1.7460, -0.4633, 0.5707, -0.0596]) tensor([0.0496, 0.1790, 0.5034, 0.2680]) -Greedy action tensor([-0.0219, -0.4938, 0.9057, 0.7113]) tensor([0.1604, 0.1001, 0.4056, 0.3339]) -Greedy action tensor([-1.3454, 0.0285, 0.5189, 0.3661]) tensor([0.0590, 0.2332, 0.3808, 0.3269]) -Greedy action tensor([-0.5376, -0.4935, 0.1896, 0.2798]) tensor([0.1568, 0.1638, 0.3244, 0.3550]) -Greedy action tensor([-1.9391, -0.4449, 0.6650, -0.1770]) tensor([0.0403, 0.1797, 0.5451, 0.2349]) -Greedy action tensor([-1.6485, -0.2211, 0.5825, 0.0301]) tensor([0.0504, 0.2101, 0.4693, 0.2701]) -Greedy action tensor([-1.9227, -0.4527, 0.6614, -0.1616]) tensor([0.0410, 0.1781, 0.5427, 0.2383]) -Greedy action tensor([-1.4856, -0.3917, 0.7318, 0.4560]) tensor([0.0497, 0.1483, 0.4560, 0.3461]) -Greedy action tensor([-1.8287, -0.2927, 0.6386, -0.0807]) tensor([0.0431, 0.2004, 0.5087, 0.2478]) -Greedy action tensor([-1.8500, -0.4277, 0.6263, -0.0995]) tensor([0.0439, 0.1819, 0.5218, 0.2525]) -Greedy action tensor([-1.8685, -0.3512, 0.6459, -0.1316]) tensor([0.0424, 0.1932, 0.5237, 0.2407]) -Greedy action tensor([-1.9406, -0.4499, 0.6667, -0.1783]) tensor([0.0403, 0.1788, 0.5462, 0.2346]) -Greedy action tensor([-1.8869, -0.4668, 0.6383, -0.1364]) tensor([0.0428, 0.1769, 0.5342, 0.2462]) -Greedy action tensor([-0.2337, 0.3108, 0.8828, 1.3350]) tensor([0.0945, 0.1630, 0.2887, 0.4538]) -Greedy action tensor([-1.9338, -0.4580, 0.6701, -0.1714]) tensor([0.0405, 0.1770, 0.5468, 0.2357]) -Greedy action tensor([-1.9171, -0.4153, 0.6537, -0.1572]) tensor([0.0410, 0.1842, 0.5364, 0.2384]) -Greedy action tensor([-0.8394, 0.1249, 0.2705, -0.1910]) tensor([0.1167, 0.3061, 0.3540, 0.2232]) -Greedy action tensor([-1.8325, -0.4686, 0.6163, -0.1075]) tensor([0.0453, 0.1770, 0.5238, 0.2540]) -Greedy action tensor([-1.9138, -0.4406, 0.6502, -0.1657]) tensor([0.0415, 0.1811, 0.5390, 0.2384]) -Greedy action tensor([-1.8270, -0.4351, 0.6102, -0.1021]) tensor([0.0453, 0.1822, 0.5183, 0.2542]) -Greedy action tensor([-1.9370, -0.4361, 0.6608, -0.1748]) tensor([0.0404, 0.1813, 0.5429, 0.2354]) -Greedy action tensor([-1.8456, -0.4406, 0.6318, -0.0995]) tensor([0.0440, 0.1794, 0.5243, 0.2523]) -Greedy action tensor([-1.7281, -0.0077, 0.4893, -0.0216]) tensor([0.0470, 0.2625, 0.4315, 0.2589]) -Greedy action tensor([-1.7496, -0.2765, 0.5874, -0.0474]) tensor([0.0472, 0.2058, 0.4883, 0.2588]) -Greedy action tensor([-1.9236, -0.4625, 0.6543, -0.1727]) tensor([0.0413, 0.1778, 0.5433, 0.2376]) -Greedy action tensor([-1.5559, -0.4424, 0.8408, 0.5105]) tensor([0.0436, 0.1328, 0.4792, 0.3444]) -Greedy action tensor([-1.8648, -0.1764, 0.5721, -0.1180]) tensor([0.0424, 0.2294, 0.4850, 0.2432]) -Greedy action tensor([-1.2618, -0.2161, 0.4408, -0.1662]) tensor([0.0811, 0.2309, 0.4453, 0.2427]) -Greedy action tensor([-0.6656, 0.0690, 0.9705, 1.2225]) tensor([0.0674, 0.1406, 0.3463, 0.4456]) -Greedy action tensor([-0.5125, -0.4400, 0.1646, 0.2186]) tensor([0.1634, 0.1757, 0.3216, 0.3394]) -Greedy action tensor([-1.5506, -0.6026, 0.4504, -0.0734]) tensor([0.0651, 0.1680, 0.4816, 0.2852]) -Greedy action tensor([-1.6781, -0.5352, 0.5314, -0.0096]) tensor([0.0539, 0.1690, 0.4911, 0.2859]) -Greedy action tensor([-1.8965, -0.4283, 0.6587, -0.1410]) tensor([0.0417, 0.1809, 0.5364, 0.2411]) -Greedy action tensor([-0.7897, 0.8417, 0.1132, 0.0277]) tensor([0.0922, 0.4714, 0.2275, 0.2089]) -Greedy action tensor([-1.9049, -0.3968, 0.6452, -0.1496]) tensor([0.0415, 0.1874, 0.5312, 0.2399]) -Greedy action tensor([-1.8180, -0.4851, 0.5999, -0.1221]) tensor([0.0466, 0.1766, 0.5228, 0.2540]) -Greedy action tensor([-1.3881, 0.0774, 0.4571, -0.6718]) tensor([0.0730, 0.3159, 0.4618, 0.1493]) -Greedy action tensor([-1.1939, -0.1332, 0.6201, 0.8616]) tensor([0.0561, 0.1620, 0.3440, 0.4380]) -Greedy action tensor([-1.6415, -0.1116, 0.6294, 0.0639]) tensor([0.0481, 0.2219, 0.4656, 0.2645]) -Greedy action tensor([-1.1519, -0.5483, 0.5129, 0.0466]) tensor([0.0875, 0.1600, 0.4624, 0.2901]) -Greedy action tensor([-1.7252, -0.4391, 0.5618, -0.0528]) tensor([0.0505, 0.1829, 0.4975, 0.2691]) -Greedy action tensor([ 0.2210, -0.1845, 0.0996, -0.2254]) tensor([0.3133, 0.2088, 0.2774, 0.2005]) -Greedy action tensor([ 0.3618, 0.2186, -0.1639, -0.3374]) tensor([0.3384, 0.2933, 0.2001, 0.1682]) -Greedy action tensor([ 0.8598, -0.2982, -0.0331, -0.1483]) tensor([0.4788, 0.1504, 0.1961, 0.1747]) -Greedy action tensor([ 0.7684, -0.6685, 0.0894, -0.3235]) tensor([0.4807, 0.1142, 0.2438, 0.1613]) -Greedy action tensor([ 1.2146, -0.8860, -0.0061, -0.5575]) tensor([0.6300, 0.0771, 0.1859, 0.1071]) -Greedy action tensor([ 0.7811, -0.2087, -0.1055, -0.3434]) tensor([0.4743, 0.1763, 0.1954, 0.1540]) -Greedy action tensor([ 0.5779, -0.2955, 0.1984, -0.5024]) tensor([0.4096, 0.1710, 0.2803, 0.1391]) -Greedy action tensor([ 0.2395, -0.0138, -0.0665, -0.2554]) tensor([0.3203, 0.2486, 0.2358, 0.1952]) -Greedy action tensor([ 0.4460, 0.3510, -0.1457, -0.0699]) tensor([0.3268, 0.2972, 0.1809, 0.1951]) -Greedy action tensor([ 0.5965, -0.0825, -0.0938, -0.0215]) tensor([0.3925, 0.1991, 0.1968, 0.2116]) -Greedy action tensor([ 0.9415, -0.7265, -0.2228, -0.4821]) tensor([0.5742, 0.1083, 0.1792, 0.1383]) -Greedy action tensor([ 0.5156, -0.0879, 0.0322, -0.0006]) tensor([0.3623, 0.1981, 0.2234, 0.2162]) -Greedy action tensor([ 1.1306, -0.7532, -0.0178, -0.3813]) tensor([0.5918, 0.0900, 0.1877, 0.1305]) -Greedy action tensor([ 0.7722, -0.4825, -0.0009, -0.3317]) tensor([0.4812, 0.1372, 0.2221, 0.1595]) -Greedy action tensor([ 0.5533, -0.3641, -0.0161, -0.1881]) tensor([0.4095, 0.1636, 0.2317, 0.1951]) -Greedy action tensor([ 1.1320, -0.2728, 0.1219, -0.2749]) tensor([0.5392, 0.1323, 0.1964, 0.1321]) -Greedy action tensor([ 0.4217, -0.0747, -0.0151, -0.3082]) tensor([0.3654, 0.2224, 0.2361, 0.1761]) -Greedy action tensor([ 0.8513, -0.4874, -0.1307, -0.4281]) tensor([0.5222, 0.1369, 0.1956, 0.1453]) -Greedy action tensor([ 0.8956, -0.0999, -0.2138, -0.1053]) tensor([0.4838, 0.1788, 0.1595, 0.1778]) -Greedy action tensor([ 0.9517, -0.8372, 0.0237, -0.4539]) tensor([0.5532, 0.0925, 0.2187, 0.1357]) -Greedy action tensor([ 0.9176, -0.3526, -0.3123, -0.3564]) tensor([0.5397, 0.1515, 0.1578, 0.1510]) -Greedy action tensor([ 0.5754, -0.4617, 0.0924, -0.3250]) tensor([0.4206, 0.1491, 0.2594, 0.1709]) -Greedy action tensor([ 1.0379, -0.6413, 0.1751, -0.4173]) tensor([0.5429, 0.1013, 0.2291, 0.1267]) -Greedy action tensor([ 0.5324, -0.2158, -0.0852, -0.0080]) tensor([0.3854, 0.1824, 0.2078, 0.2245]) -Greedy action tensor([ 0.7334, -0.6044, 0.0859, -0.5190]) tensor([0.4827, 0.1267, 0.2526, 0.1380]) -Greedy action tensor([ 0.4588, -0.3916, -0.1331, -0.1242]) tensor([0.3939, 0.1683, 0.2179, 0.2199]) -Greedy action tensor([ 0.5042, -0.2021, -0.1697, -0.2297]) tensor([0.4027, 0.1987, 0.2053, 0.1933]) -Greedy action tensor([ 0.9416, -0.9758, 0.0620, -0.5822]) tensor([0.5618, 0.0826, 0.2331, 0.1224]) -Greedy action tensor([ 0.5627, -0.6753, -0.1762, -0.1279]) tensor([0.4407, 0.1278, 0.2105, 0.2209]) -Greedy action tensor([ 0.5852, -0.4952, -0.1413, -0.1458]) tensor([0.4339, 0.1473, 0.2099, 0.2089]) -Greedy action tensor([ 1.0566, -0.6340, -0.1332, -0.4162]) tensor([0.5821, 0.1073, 0.1771, 0.1335]) -Greedy action tensor([ 0.5855, -0.1421, -0.1624, -0.0030]) tensor([0.3981, 0.1923, 0.1885, 0.2210]) -Greedy action tensor([ 0.7166, -0.6662, -0.0754, -0.6744]) tensor([0.5121, 0.1285, 0.2320, 0.1274]) -Greedy action tensor([ 0.8469, -0.6960, -0.0581, -0.5007]) tensor([0.5324, 0.1138, 0.2154, 0.1384]) -Greedy action tensor([ 0.6083, 0.0731, 0.1621, -0.2315]) tensor([0.3763, 0.2204, 0.2409, 0.1625]) -Greedy action tensor([ 0.3893, -0.1471, -0.0294, -0.1674]) tensor([0.3551, 0.2077, 0.2336, 0.2035]) -Greedy action tensor([ 0.2157, 0.0995, -0.1225, -0.3768]) tensor([0.3168, 0.2821, 0.2259, 0.1752]) -Greedy action tensor([ 1.2101, -0.6364, -0.0322, -0.7101]) tensor([0.6277, 0.0990, 0.1812, 0.0920]) -Greedy action tensor([ 0.2485, -0.1882, -0.1026, -0.0196]) tensor([0.3210, 0.2074, 0.2260, 0.2455]) -Greedy action tensor([ 0.4309, -0.1157, -0.0476, -0.1693]) tensor([0.3640, 0.2107, 0.2256, 0.1997]) -Greedy action tensor([ 0.5278, -0.3900, -0.1255, -0.3990]) tensor([0.4319, 0.1725, 0.2247, 0.1709]) -Greedy action tensor([ 0.7798, -0.4888, -0.1510, -0.3776]) tensor([0.5026, 0.1413, 0.1981, 0.1580]) -Greedy action tensor([ 0.6575, -0.5612, -0.0186, -0.1185]) tensor([0.4416, 0.1305, 0.2246, 0.2032]) -Greedy action tensor([ 0.7579, 0.4039, -0.3984, -0.5223]) tensor([0.4358, 0.3059, 0.1371, 0.1211]) -Greedy action tensor([ 0.6837, -0.7553, -0.0749, -0.3141]) tensor([0.4821, 0.1143, 0.2258, 0.1778]) -Greedy action tensor([ 0.5199, -0.1062, -0.1218, -0.0335]) tensor([0.3794, 0.2028, 0.1997, 0.2181]) -Greedy action tensor([ 0.7894, -0.2478, -0.0716, -0.1995]) tensor([0.4653, 0.1649, 0.1967, 0.1731]) -Greedy action tensor([ 0.8544, -0.6980, -0.0061, -0.3712]) tensor([0.5186, 0.1098, 0.2193, 0.1523]) -Greedy action tensor([ 0.6078, -0.1592, -0.0627, -0.1232]) tensor([0.4069, 0.1890, 0.2081, 0.1959]) -Greedy action tensor([ 0.7864, -0.5569, -0.1482, -0.4266]) tensor([0.5126, 0.1338, 0.2013, 0.1524]) -Greedy action tensor([ 0.8151, -0.5513, -0.0078, -0.3767]) tensor([0.5005, 0.1276, 0.2198, 0.1520]) -Greedy action tensor([ 0.4080, -0.1549, -0.0145, -0.3334]) tensor([0.3702, 0.2108, 0.2426, 0.1764]) -Greedy action tensor([ 0.8524, -0.8027, 0.0507, -0.5749]) tensor([0.5320, 0.1017, 0.2387, 0.1277]) -Greedy action tensor([ 0.7249, -0.2957, 0.2232, -0.1939]) tensor([0.4229, 0.1524, 0.2560, 0.1687]) -Greedy action tensor([ 0.3744, 0.1127, -0.1032, 0.0357]) tensor([0.3223, 0.2481, 0.1999, 0.2297]) -Greedy action tensor([ 0.5059, 0.0172, -0.0338, 0.0229]) tensor([0.3554, 0.2181, 0.2072, 0.2193]) -Greedy action tensor([ 0.3684, 0.2845, 0.0527, -0.3770]) tensor([0.3202, 0.2944, 0.2335, 0.1519]) -Greedy action tensor([ 0.8099, -0.3989, -0.1572, -0.3967]) tensor([0.5056, 0.1510, 0.1922, 0.1513]) -Greedy action tensor([ 0.6492, -0.9768, -0.1276, -0.3088]) tensor([0.4901, 0.0964, 0.2254, 0.1880]) -Greedy action tensor([ 0.5307, -0.3093, -0.2080, -0.0055]) tensor([0.4009, 0.1731, 0.1915, 0.2345]) -Greedy action tensor([ 0.8222, -0.6500, 0.0548, -0.7128]) tensor([0.5238, 0.1202, 0.2432, 0.1129]) -Greedy action tensor([ 0.6297, -0.6311, 0.0286, -0.6209]) tensor([0.4722, 0.1338, 0.2588, 0.1352]) -Greedy action tensor([ 0.6338, -0.4382, -0.1788, -0.1743]) tensor([0.4481, 0.1534, 0.1988, 0.1997]) -Greedy action tensor([ 1.0963, -0.8714, 0.0656, -0.5358]) tensor([0.5910, 0.0826, 0.2109, 0.1156]) -Greedy action tensor([ 0.8411, 0.1773, -0.1705, -0.2885]) tensor([0.4542, 0.2339, 0.1652, 0.1468]) -Greedy action tensor([ 0.9597, -0.4166, 0.1000, -0.4485]) tensor([0.5207, 0.1315, 0.2204, 0.1274]) -Greedy action tensor([ 0.4896, -0.2913, 0.0615, -0.4838]) tensor([0.4020, 0.1841, 0.2620, 0.1519]) -Greedy action tensor([ 0.9477, -0.5424, -0.1200, -0.4782]) tensor([0.5527, 0.1245, 0.1900, 0.1328]) -Greedy action tensor([ 0.4247, -0.2294, -0.1457, -0.2378]) tensor([0.3845, 0.1999, 0.2174, 0.1982]) -Greedy action tensor([ 0.9499, -0.5935, -0.2772, -0.4283]) tensor([0.5686, 0.1215, 0.1667, 0.1433]) -Greedy action tensor([ 1.1905, -0.5299, 0.1502, -0.3021]) tensor([0.5691, 0.1019, 0.2011, 0.1279]) -Greedy action tensor([ 0.6582, -0.3224, -0.0047, -0.0891]) tensor([0.4230, 0.1587, 0.2180, 0.2003]) -Greedy action tensor([ 0.8364, -0.5474, -0.0204, -0.3873]) tensor([0.5078, 0.1273, 0.2156, 0.1494]) -Greedy action tensor([ 0.8863, -0.5498, -0.0832, -0.3212]) tensor([0.5219, 0.1241, 0.1979, 0.1560]) -Greedy action tensor([ 0.9296, -0.9366, 0.1194, -0.5320]) tensor([0.5461, 0.0845, 0.2429, 0.1266]) -Greedy action tensor([ 0.5974, -0.1686, 0.1386, -0.3657]) tensor([0.4034, 0.1875, 0.2550, 0.1540]) -Greedy action tensor([ 0.9702, -0.6972, -0.1085, -0.4520]) tensor([0.5650, 0.1066, 0.1921, 0.1363]) -Greedy action tensor([ 0.5939, -0.8307, -0.1180, -0.2698]) tensor([0.4645, 0.1118, 0.2279, 0.1958]) -Greedy action tensor([ 0.3129, -0.3383, -0.2006, -0.2038]) tensor([0.3681, 0.1920, 0.2203, 0.2196]) -Greedy action tensor([ 0.7501, -0.4564, 0.0553, -0.3708]) tensor([0.4707, 0.1409, 0.2350, 0.1535]) -Greedy action tensor([ 0.3024, -0.0263, -0.0964, 0.0317]) tensor([0.3171, 0.2282, 0.2128, 0.2419]) -Greedy action tensor([-0.2633, -1.5493, 0.2148, 0.1412]) tensor([0.2279, 0.0630, 0.3676, 0.3415]) -Greedy action tensor([ 0.2769, -1.6559, -0.5922, 0.0717]) tensor([0.4204, 0.0609, 0.1763, 0.3424]) -Greedy action tensor([ 0.8285, 0.6161, -0.6275, 0.0255]) tensor([0.4016, 0.3248, 0.0936, 0.1799]) -Greedy action tensor([ 0.7258, -0.7396, -0.0799, 0.1908]) tensor([0.4418, 0.1020, 0.1974, 0.2588]) -Greedy action tensor([ 0.1347, -0.3239, -0.4645, 0.3085]) tensor([0.2966, 0.1875, 0.1629, 0.3529]) -Greedy action tensor([-1.3290, 0.9801, -1.5810, -0.6826]) tensor([0.0727, 0.7320, 0.0565, 0.1388]) -Greedy action tensor([-0.2863, -0.5588, -0.4802, -0.0594]) tensor([0.2604, 0.1983, 0.2145, 0.3267]) -Greedy action tensor([ 0.7230, -1.3816, -0.1028, -0.4531]) tensor([0.5352, 0.0652, 0.2344, 0.1651]) -Greedy action tensor([ 0.6337, 0.3103, 0.0939, -0.3328]) tensor([0.3722, 0.2693, 0.2169, 0.1416]) -Greedy action tensor([-0.1913, -1.6462, 0.2463, 0.4663]) tensor([0.2122, 0.0495, 0.3287, 0.4096]) -Greedy action tensor([-0.0649, -1.3067, -0.1143, -0.3485]) tensor([0.3340, 0.0965, 0.3179, 0.2516]) -Greedy action tensor([ 1.2567, 0.8918, -0.4187, 1.8071]) tensor([0.2766, 0.1920, 0.0518, 0.4796]) -Greedy action tensor([ 0.8221, -0.0204, -0.0497, 0.9776]) tensor([0.3314, 0.1427, 0.1386, 0.3872]) -Greedy action tensor([ 0.9055, -0.6381, 1.5401, -0.4460]) tensor([0.2977, 0.0636, 0.5616, 0.0771]) -Greedy action tensor([-1.3227, 0.2112, -1.3213, 0.0047]) tensor([0.0961, 0.4454, 0.0962, 0.3623]) -Greedy action tensor([-0.4550, -1.2253, -0.9540, 0.9602]) tensor([0.1616, 0.0748, 0.0981, 0.6654]) -Greedy action tensor([0.4811, 0.1689, 0.4226, 1.1774]) tensor([0.2136, 0.1563, 0.2015, 0.4286]) -Greedy action tensor([-0.0799, -1.5183, 0.6214, 0.7699]) tensor([0.1788, 0.0424, 0.3605, 0.4182]) -Greedy action tensor([ 0.1990, 0.0116, 2.0640, -1.0240]) tensor([0.1166, 0.0966, 0.7525, 0.0343]) -Greedy action tensor([ 0.3798, -1.1849, -0.7425, 2.4584]) tensor([0.1049, 0.0220, 0.0342, 0.8389]) -Greedy action tensor([ 0.0257, 0.3941, 0.4875, -0.3725]) tensor([0.2126, 0.3073, 0.3374, 0.1428]) -Greedy action tensor([-0.4063, -1.3277, 0.1090, 0.5379]) tensor([0.1772, 0.0705, 0.2967, 0.4556]) -Greedy action tensor([ 2.0139, -0.1611, 0.3874, 1.3538]) tensor([0.5473, 0.0622, 0.1076, 0.2829]) -Greedy action tensor([-0.2151, -1.1860, -0.4938, 0.4183]) tensor([0.2488, 0.0942, 0.1883, 0.4687]) -Greedy action tensor([0.9761, 0.9618, 0.3356, 0.4706]) tensor([0.3209, 0.3164, 0.1691, 0.1936]) -Greedy action tensor([ 0.5664, 0.5489, -0.2164, -0.1463]) tensor([0.3413, 0.3354, 0.1560, 0.1673]) -Greedy action tensor([ 1.6508, -0.0567, 1.3438, 0.7475]) tensor([0.4306, 0.0781, 0.3168, 0.1745]) -Greedy action tensor([ 0.3428, -0.5532, 0.7279, -1.2198]) tensor([0.3239, 0.1322, 0.4760, 0.0679]) -Greedy action tensor([-0.5813, -0.3077, 1.1548, -0.8596]) tensor([0.1143, 0.1503, 0.6488, 0.0866]) -Greedy action tensor([-0.9779, 0.1885, -0.6746, -0.3943]) tensor([0.1359, 0.4364, 0.1841, 0.2436]) -Greedy action tensor([ 0.4380, -0.9528, 0.0264, 0.4601]) tensor([0.3409, 0.0848, 0.2258, 0.3485]) -Greedy action tensor([ 6.0794e-01, 7.3247e-01, -6.4087e-04, -7.4984e-01]) tensor([0.3408, 0.3860, 0.1855, 0.0877]) -Greedy action tensor([-0.0349, -0.2278, 0.7201, -1.1526]) tensor([0.2337, 0.1927, 0.4972, 0.0764]) -Greedy action tensor([ 0.8363, -0.4819, -1.6222, 0.0501]) tensor([0.5529, 0.1480, 0.0473, 0.2519]) -Greedy action tensor([ 0.2335, -1.7319, 0.8539, -0.4062]) tensor([0.2835, 0.0397, 0.5272, 0.1495]) -Greedy action tensor([ 0.5997, 0.1268, 0.2048, -0.0732]) tensor([0.3562, 0.2220, 0.2400, 0.1818]) -Greedy action tensor([ 1.0813, -0.0924, -0.0422, 0.2836]) tensor([0.4797, 0.1483, 0.1560, 0.2160]) -Greedy action tensor([-0.6625, -0.7348, -0.7894, 0.3406]) tensor([0.1806, 0.1680, 0.1591, 0.4924]) -Greedy action tensor([ 0.2340, 1.0810, 1.3835, -0.8539]) tensor([0.1465, 0.3417, 0.4624, 0.0494]) -Greedy action tensor([ 0.9730, -0.0799, -0.6102, 0.7171]) tensor([0.4295, 0.1499, 0.0882, 0.3325]) -Greedy action tensor([ 0.3951, -1.2718, 0.0159, 0.1345]) tensor([0.3782, 0.0714, 0.2589, 0.2915]) -Greedy action tensor([ 0.8700, -2.4962, -0.4658, 0.8679]) tensor([0.4357, 0.0150, 0.1146, 0.4348]) -Greedy action tensor([ 0.8626, -0.7563, -0.2589, 0.3858]) tensor([0.4663, 0.0924, 0.1519, 0.2894]) -Greedy action tensor([ 1.2217, -0.8206, -0.5513, 1.2015]) tensor([0.4387, 0.0569, 0.0745, 0.4299]) -Greedy action tensor([ 0.2096, 0.0919, 0.1507, -0.3410]) tensor([0.2934, 0.2608, 0.2766, 0.1692]) -Greedy action tensor([ 1.7094, -1.1632, 1.0499, 1.9857]) tensor([0.3458, 0.0196, 0.1788, 0.4558]) -Greedy action tensor([-1.2573, -1.2031, 1.4917, -0.0725]) tensor([0.0477, 0.0504, 0.7458, 0.1561]) -Greedy action tensor([ 0.0468, -1.8198, -0.0618, -0.0477]) tensor([0.3377, 0.0522, 0.3029, 0.3072]) -Greedy action tensor([ 0.8973, 1.0039, 0.8987, -1.2691]) tensor([0.3097, 0.3446, 0.3102, 0.0355]) -Greedy action tensor([-0.9534, -1.2955, -0.2031, -0.7927]) tensor([0.1999, 0.1420, 0.4233, 0.2348]) -Greedy action tensor([-0.8377, -1.2848, -0.5406, 1.3436]) tensor([0.0844, 0.0540, 0.1136, 0.7479]) -Greedy action tensor([-0.0114, -0.2267, 0.2429, -0.2784]) tensor([0.2590, 0.2088, 0.3339, 0.1983]) -Greedy action tensor([ 0.8405, -0.9608, 0.0522, 0.1105]) tensor([0.4758, 0.0785, 0.2163, 0.2293]) -Greedy action tensor([ 1.5421, -1.2397, 0.7906, 1.1151]) tensor([0.4574, 0.0283, 0.2158, 0.2985]) -Greedy action tensor([ 1.1716, -1.5274, 0.8260, 1.1735]) tensor([0.3601, 0.0242, 0.2549, 0.3608]) -Greedy action tensor([ 0.0323, -0.7960, -0.0595, -0.1388]) tensor([0.3133, 0.1368, 0.2858, 0.2640]) -Greedy action tensor([-0.7852, -0.6181, 0.4019, -0.5163]) tensor([0.1478, 0.1746, 0.4843, 0.1933]) -Greedy action tensor([ 0.2367, 1.0713, 0.1199, -0.5069]) tensor([0.2142, 0.4934, 0.1906, 0.1018]) -Greedy action tensor([-0.0137, -0.9511, 0.2127, 1.6526]) tensor([0.1260, 0.0493, 0.1580, 0.6667]) -Greedy action tensor([-0.6545, -0.7043, 0.0617, 0.5596]) tensor([0.1358, 0.1292, 0.2779, 0.4572]) -Greedy action tensor([ 0.0179, 0.5433, 1.0298, -0.6826]) tensor([0.1684, 0.2848, 0.4632, 0.0836]) -Greedy action tensor([ 1.2501, -0.8414, 1.3689, 0.7332]) tensor([0.3514, 0.0434, 0.3957, 0.2096]) -Greedy action tensor([-0.7624, -0.7011, 1.2950, -1.3652]) tensor([0.0958, 0.1019, 0.7499, 0.0524]) -Greedy action tensor([ 0.6780, 0.6833, -0.4405, 0.7242]) tensor([0.2959, 0.2975, 0.0967, 0.3099]) -Greedy action tensor([ 0.8692, -0.4619, -0.9866, 0.9743]) tensor([0.3950, 0.1044, 0.0618, 0.4388]) -Greedy action tensor([ 0.0719, -0.5902, -0.5659, 0.2997]) tensor([0.3030, 0.1563, 0.1601, 0.3805]) -Greedy action tensor([-0.9292, 0.0137, 0.9421, -1.1051]) tensor([0.0917, 0.2355, 0.5959, 0.0769]) -Greedy action tensor([ 0.7474, -0.7848, 1.9268, -0.7699]) tensor([0.2133, 0.0461, 0.6938, 0.0468]) -Greedy action tensor([-0.0235, 0.2273, 0.6395, -0.8880]) tensor([0.2152, 0.2765, 0.4176, 0.0906]) -Greedy action tensor([ 0.0306, -1.5266, -0.5054, 0.5758]) tensor([0.2840, 0.0599, 0.1662, 0.4899]) -Greedy action tensor([ 0.9448, -0.2977, -0.2653, 0.8400]) tensor([0.4020, 0.1160, 0.1199, 0.3620]) -Greedy action tensor([-0.1711, -0.6172, 0.2508, -1.3516]) tensor([0.2880, 0.1844, 0.4392, 0.0885]) -Greedy action tensor([-1.0362, -1.1383, -0.3236, -1.2307]) tensor([0.2098, 0.1895, 0.4279, 0.1728]) -Greedy action tensor([-0.1828, -0.7642, 0.1959, -0.5460]) tensor([0.2692, 0.1505, 0.3931, 0.1872]) -Greedy action tensor([ 1.6977, -0.7192, 1.1176, 0.4533]) tensor([0.5162, 0.0460, 0.2890, 0.1487]) -Greedy action tensor([ 1.7963, -0.1112, 1.1815, 0.0748]) tensor([0.5353, 0.0795, 0.2895, 0.0957]) -Greedy action tensor([ 1.9507, -1.5926, -0.0865, 0.8448]) tensor([0.6710, 0.0194, 0.0875, 0.2221]) -Greedy action tensor([-0.4385, 0.0958, 0.4750, -1.1940]) tensor([0.1764, 0.3010, 0.4398, 0.0829]) -Greedy action tensor([-0.7217, -1.6385, 0.6398, 0.2536]) tensor([0.1257, 0.0503, 0.4906, 0.3334]) -Greedy action tensor([ 0.0948, 0.4800, -0.5208, -1.0027]) tensor([0.2990, 0.4396, 0.1616, 0.0998]) -Greedy action tensor([ 0.2516, -0.2083, 1.1347, -0.4020]) tensor([0.2188, 0.1382, 0.5292, 0.1138]) -Greedy action tensor([ 1.5258, -0.8473, 0.0641, -0.0247]) tensor([0.6505, 0.0606, 0.1508, 0.1380]) -Greedy action tensor([ 1.7842, -0.7528, -0.2393, 0.3370]) tensor([0.6913, 0.0547, 0.0914, 0.1626]) -Greedy action tensor([ 1.9403, -0.7719, -0.4456, 0.3327]) tensor([0.7360, 0.0489, 0.0677, 0.1475]) -Greedy action tensor([ 1.5929, -0.3331, -0.3502, 0.0162]) tensor([0.6686, 0.0974, 0.0958, 0.1382]) -Greedy action tensor([ 1.7137, -0.3471, -0.2876, 0.3318]) tensor([0.6607, 0.0841, 0.0893, 0.1659]) -Greedy action tensor([ 1.4355, -0.5836, -0.3253, 0.3945]) tensor([0.6032, 0.0801, 0.1037, 0.2130]) -Greedy action tensor([ 1.5989, -0.6577, -0.1420, 0.3485]) tensor([0.6384, 0.0668, 0.1119, 0.1828]) -Greedy action tensor([ 2.5557, -1.1227, -0.5548, 0.8589]) tensor([0.7980, 0.0202, 0.0356, 0.1463]) -Greedy action tensor([ 0.9745, -0.3305, -0.1932, 0.0520]) tensor([0.5051, 0.1370, 0.1571, 0.2008]) -Greedy action tensor([ 0.8941, 0.2587, -0.1674, 0.3595]) tensor([0.4063, 0.2152, 0.1405, 0.2380]) -Greedy action tensor([ 1.1295, -0.5917, -0.1142, 0.0404]) tensor([0.5544, 0.0992, 0.1598, 0.1866]) -Greedy action tensor([ 1.0676, -0.2099, -0.3956, 0.4842]) tensor([0.4835, 0.1348, 0.1119, 0.2698]) -Greedy action tensor([ 1.3862, -0.1444, -0.6961, 0.0575]) tensor([0.6227, 0.1348, 0.0776, 0.1649]) -Greedy action tensor([ 1.3866, -0.6569, -0.0505, 0.5836]) tensor([0.5509, 0.0714, 0.1309, 0.2468]) -Greedy action tensor([ 1.4909, -0.3269, -0.2692, 0.2995]) tensor([0.6104, 0.0991, 0.1050, 0.1854]) -Greedy action tensor([ 1.4731, -0.5281, -0.3185, 0.3279]) tensor([0.6173, 0.0834, 0.1029, 0.1964]) -Greedy action tensor([ 1.7214, -0.7619, -0.0732, 0.4879]) tensor([0.6490, 0.0542, 0.1078, 0.1890]) -Greedy action tensor([ 1.6585, -0.3727, -0.6801, 0.2748]) tensor([0.6765, 0.0887, 0.0653, 0.1696]) -Greedy action tensor([ 1.7910, -0.3833, -0.4912, 0.3879]) tensor([0.6842, 0.0778, 0.0698, 0.1682]) -Greedy action tensor([ 1.0400, -0.5629, -0.1362, 0.4689]) tensor([0.4820, 0.0970, 0.1487, 0.2723]) -Greedy action tensor([ 1.9704, -1.0266, -0.0227, 0.2546]) tensor([0.7320, 0.0366, 0.0998, 0.1316]) -Greedy action tensor([ 0.9095, -0.6599, -0.5030, -0.2159]) tensor([0.5630, 0.1172, 0.1371, 0.1827]) -Greedy action tensor([ 1.1423, -0.7263, 0.0842, 0.2913]) tensor([0.5186, 0.0800, 0.1800, 0.2214]) -Greedy action tensor([ 1.2575, -0.0272, -0.3488, -0.1584]) tensor([0.5814, 0.1609, 0.1166, 0.1411]) -Greedy action tensor([ 1.3960, -0.3457, -0.7667, 0.4749]) tensor([0.5923, 0.1038, 0.0681, 0.2358]) -Greedy action tensor([ 1.3894, -0.3535, -0.4448, 0.4901]) tensor([0.5742, 0.1005, 0.0917, 0.2336]) -Greedy action tensor([ 1.9210, -0.7819, -0.2140, 0.8312]) tensor([0.6572, 0.0440, 0.0777, 0.2210]) -Greedy action tensor([ 1.4019, -0.5159, -0.3063, -0.0444]) tensor([0.6396, 0.0940, 0.1159, 0.1506]) -Greedy action tensor([ 1.3942, -0.4758, -0.4004, 0.4930]) tensor([0.5792, 0.0893, 0.0963, 0.2352]) -Greedy action tensor([ 1.3652, -0.3499, -0.3054, 0.1594]) tensor([0.5997, 0.1079, 0.1128, 0.1796]) -Greedy action tensor([ 1.5425, -0.3776, -0.3117, 0.2626]) tensor([0.6324, 0.0927, 0.0990, 0.1758]) -Greedy action tensor([ 1.2366, -0.5960, -0.3744, 0.9423]) tensor([0.4751, 0.0760, 0.0949, 0.3540]) -Greedy action tensor([ 0.9292, -0.5571, -0.1699, 0.0694]) tensor([0.5044, 0.1141, 0.1680, 0.2135]) -Greedy action tensor([ 1.3795, -0.3730, -0.6095, 0.4631]) tensor([0.5848, 0.1014, 0.0800, 0.2339]) -Greedy action tensor([ 0.2399, -0.6705, 0.0226, 0.0711]) tensor([0.3277, 0.1318, 0.2637, 0.2768]) -Greedy action tensor([ 1.8263, -0.0678, -0.2921, 0.6581]) tensor([0.6323, 0.0951, 0.0760, 0.1966]) -Greedy action tensor([ 1.3542, -0.8989, -0.1579, 0.4054]) tensor([0.5839, 0.0613, 0.1287, 0.2261]) -Greedy action tensor([ 1.0900, -0.5052, -0.0378, 0.2314]) tensor([0.5127, 0.1040, 0.1660, 0.2173]) -Greedy action tensor([ 2.0948, -0.9721, -0.1999, 0.5900]) tensor([0.7302, 0.0340, 0.0736, 0.1622]) -Greedy action tensor([ 1.0343, 0.0166, -0.7466, 0.3380]) tensor([0.4930, 0.1782, 0.0831, 0.2457]) -Greedy action tensor([ 1.6282, -0.6703, -0.0962, 0.9481]) tensor([0.5601, 0.0562, 0.0999, 0.2837]) -Greedy action tensor([ 2.4534, -1.8400, -0.4086, 0.4060]) tensor([0.8334, 0.0114, 0.0476, 0.1076]) -Greedy action tensor([ 1.6929, -0.5638, -0.1043, 0.2633]) tensor([0.6623, 0.0693, 0.1098, 0.1586]) -Greedy action tensor([ 1.3636, -0.9059, -0.1813, 0.8513]) tensor([0.5220, 0.0540, 0.1114, 0.3127]) -Greedy action tensor([ 1.0442, -0.2970, -0.1686, 0.1184]) tensor([0.5115, 0.1338, 0.1521, 0.2027]) -Greedy action tensor([ 1.1410, -0.1995, -0.7335, 0.5056]) tensor([0.5142, 0.1346, 0.0789, 0.2724]) -Greedy action tensor([ 1.3069, -0.1957, -0.3627, 0.3800]) tensor([0.5535, 0.1232, 0.1042, 0.2191]) -Greedy action tensor([ 1.6474, -0.6567, -0.4135, 0.0608]) tensor([0.6984, 0.0697, 0.0889, 0.1429]) -Greedy action tensor([ 0.4788, -0.4279, -0.2950, 0.4368]) tensor([0.3541, 0.1430, 0.1633, 0.3395]) -Greedy action tensor([ 1.4180, -0.5206, -0.0029, 0.0563]) tensor([0.6091, 0.0877, 0.1471, 0.1561]) -Greedy action tensor([ 1.6154, -0.4338, -0.8956, 0.5710]) tensor([0.6402, 0.0825, 0.0520, 0.2253]) -Greedy action tensor([ 1.5664, -0.0261, -0.5139, -0.0147]) tensor([0.6519, 0.1326, 0.0814, 0.1341]) -Greedy action tensor([ 1.5861, -0.9299, -0.2125, 0.2467]) tensor([0.6630, 0.0536, 0.1097, 0.1737]) -Greedy action tensor([ 2.1486, -1.1033, -0.3560, 0.6038]) tensor([0.7498, 0.0290, 0.0613, 0.1600]) -Greedy action tensor([ 2.3664, -0.9023, -0.4690, 0.5793]) tensor([0.7910, 0.0301, 0.0464, 0.1324]) -Greedy action tensor([ 1.4320, -0.0184, -1.1351, 0.0666]) tensor([0.6384, 0.1497, 0.0490, 0.1630]) -Greedy action tensor([ 1.3103, -0.8576, -0.2806, 0.5861]) tensor([0.5547, 0.0635, 0.1130, 0.2688]) -Greedy action tensor([ 1.2391, -0.3268, -0.4341, 0.2647]) tensor([0.5637, 0.1178, 0.1058, 0.2128]) -Greedy action tensor([ 2.3918, -1.1344, -0.4615, 0.7957]) tensor([0.7753, 0.0228, 0.0447, 0.1572]) -Greedy action tensor([ 1.6084, -0.4867, -0.3269, 0.4632]) tensor([0.6307, 0.0776, 0.0911, 0.2007]) -Greedy action tensor([ 0.7262, -0.5117, -0.1422, 0.2393]) tensor([0.4303, 0.1248, 0.1806, 0.2644]) -Greedy action tensor([ 1.4850, -0.3962, -0.2060, 0.3967]) tensor([0.5975, 0.0911, 0.1101, 0.2012]) -Greedy action tensor([ 1.1241, -0.6036, -0.4772, 0.6859]) tensor([0.4939, 0.0878, 0.0996, 0.3187]) -Greedy action tensor([ 1.2037, -0.1438, -0.7527, 0.1483]) tensor([0.5717, 0.1486, 0.0808, 0.1990]) -Greedy action tensor([ 1.6956, -0.4533, -0.1015, -0.1712]) tensor([0.6959, 0.0812, 0.1154, 0.1076]) -Greedy action tensor([ 1.3359, -0.4765, -0.7035, 0.3268]) tensor([0.6032, 0.0985, 0.0785, 0.2199]) -Greedy action tensor([1.9613, 0.2083, 0.3407, 0.1719]) tensor([0.6502, 0.1126, 0.1286, 0.1086]) -Greedy action tensor([ 1.9327, -1.0781, 0.0554, 0.5738]) tensor([0.6853, 0.0338, 0.1049, 0.1761]) -Greedy action tensor([ 1.5988, -0.1092, -0.2216, 0.4394]) tensor([0.6035, 0.1094, 0.0978, 0.1893]) -Greedy action tensor([ 2.5272, -1.1328, -0.0947, 1.0021]) tensor([0.7599, 0.0196, 0.0552, 0.1654]) -Greedy action tensor([ 1.2397, -0.3525, -0.6963, 0.5528]) tensor([0.5403, 0.1099, 0.0779, 0.2718]) -Greedy action tensor([ 1.3351, -0.1737, -0.6871, 0.1658]) tensor([0.6009, 0.1329, 0.0795, 0.1866]) -Greedy action tensor([ 1.6976, -0.8353, 0.1287, 0.2732]) tensor([0.6543, 0.0520, 0.1363, 0.1575]) -Greedy action tensor([ 0.9685, -0.3259, -0.4306, 0.2485]) tensor([0.4981, 0.1365, 0.1229, 0.2425]) -Greedy action tensor([ 0.4463, -0.7519, 0.0575, 0.2189]) tensor([0.3602, 0.1087, 0.2442, 0.2869]) -Greedy action tensor([ 1.6092, -0.0672, -0.7391, 0.5569]) tensor([0.6129, 0.1146, 0.0585, 0.2140]) -Greedy action tensor([ 1.1008, -0.3308, -0.2490, -0.1194]) tensor([0.5576, 0.1332, 0.1446, 0.1646]) -Greedy action tensor([ 1.3074, -0.1707, -0.3507, 0.7011]) tensor([0.5092, 0.1161, 0.0970, 0.2777]) -Greedy action tensor([ 1.6301, -0.5033, -0.3423, 0.6998]) tensor([0.6053, 0.0717, 0.0842, 0.2388]) -Greedy action tensor([ 1.8749, -0.3263, -0.0772, 0.4154]) tensor([0.6734, 0.0745, 0.0956, 0.1565]) -Greedy action tensor([ 1.2934, -0.4886, -0.2841, 0.3764]) tensor([0.5635, 0.0948, 0.1164, 0.2253]) -Greedy action tensor([ 0.5372, -0.3017, 0.1171, -0.0385]) tensor([0.3772, 0.1630, 0.2478, 0.2121]) -Greedy action tensor([ 0.6928, -0.4664, -0.0862, -0.3773]) tensor([0.4727, 0.1483, 0.2169, 0.1621]) -Greedy action tensor([ 1.0095, -0.1719, 0.1296, -0.2247]) tensor([0.4968, 0.1524, 0.2061, 0.1446]) -Greedy action tensor([ 0.5806, -0.4613, -0.1043, -0.2899]) tensor([0.4394, 0.1550, 0.2215, 0.1840]) -Greedy action tensor([ 0.4720, -0.2317, 0.3189, -0.5545]) tensor([0.3689, 0.1825, 0.3165, 0.1321]) -Greedy action tensor([ 0.4537, -0.3944, -0.0209, -0.2976]) tensor([0.3965, 0.1698, 0.2467, 0.1870]) -Greedy action tensor([ 0.7236, -0.4458, 0.0526, -0.1981]) tensor([0.4505, 0.1399, 0.2303, 0.1792]) -Greedy action tensor([ 0.7049, -0.5257, 0.0522, -0.3210]) tensor([0.4606, 0.1345, 0.2398, 0.1651]) -Greedy action tensor([ 0.6553, -0.4142, -0.1130, -0.1529]) tensor([0.4439, 0.1523, 0.2059, 0.1978]) -Greedy action tensor([ 0.7265, -0.5867, -0.0503, -0.3231]) tensor([0.4810, 0.1294, 0.2212, 0.1684]) -Greedy action tensor([ 0.5955, -0.3402, -0.0045, -0.2279]) tensor([0.4202, 0.1648, 0.2306, 0.1844]) -Greedy action tensor([ 0.9217, -0.3469, -0.0751, -0.3057]) tensor([0.5146, 0.1447, 0.1899, 0.1508]) -Greedy action tensor([ 0.8222, -0.6267, -0.0343, -0.3390]) tensor([0.5069, 0.1190, 0.2153, 0.1587]) -Greedy action tensor([ 0.7258, -0.5492, 0.0414, -0.2793]) tensor([0.4651, 0.1300, 0.2346, 0.1703]) -Greedy action tensor([ 0.9094, -0.6239, -0.4077, -0.2994]) tensor([0.5611, 0.1211, 0.1503, 0.1675]) -Greedy action tensor([ 0.4174, 0.2473, 0.1021, -0.2350]) tensor([0.3232, 0.2727, 0.2358, 0.1683]) -Greedy action tensor([ 0.3258, 0.1121, 0.0175, -0.1050]) tensor([0.3133, 0.2530, 0.2301, 0.2036]) -Greedy action tensor([ 0.7150, -0.3395, -0.1915, -0.3017]) tensor([0.4730, 0.1648, 0.1911, 0.1711]) -Greedy action tensor([ 0.7626, -0.4885, -0.0833, -0.2292]) tensor([0.4793, 0.1372, 0.2057, 0.1778]) -Greedy action tensor([ 0.9523, -0.6118, 0.0089, -0.4052]) tensor([0.5388, 0.1128, 0.2098, 0.1386]) -Greedy action tensor([ 0.6507, -0.3160, 0.1387, -0.2173]) tensor([0.4168, 0.1585, 0.2498, 0.1750]) -Greedy action tensor([ 0.8154, -0.5303, -0.0369, -0.1906]) tensor([0.4872, 0.1268, 0.2078, 0.1782]) -Greedy action tensor([ 0.9854, -0.8374, 0.0407, -0.7125]) tensor([0.5769, 0.0932, 0.2243, 0.1056]) -Greedy action tensor([ 0.3065, -0.1739, -0.1308, -0.3402]) tensor([0.3587, 0.2218, 0.2316, 0.1879]) -Greedy action tensor([ 0.5951, -0.2112, -0.3567, -0.3077]) tensor([0.4468, 0.1995, 0.1725, 0.1812]) -Greedy action tensor([ 0.2427, 0.0239, -0.1381, -0.1358]) tensor([0.3153, 0.2533, 0.2154, 0.2159]) -Greedy action tensor([ 0.9994, -0.8466, 0.1177, -0.4885]) tensor([0.5562, 0.0878, 0.2303, 0.1256]) -Greedy action tensor([ 1.3214, -1.0981, 0.0777, -0.8370]) tensor([0.6699, 0.0596, 0.1931, 0.0774]) -Greedy action tensor([ 0.9397, -0.9983, 0.0858, -0.5000]) tensor([0.5535, 0.0797, 0.2356, 0.1312]) -Greedy action tensor([ 0.5689, -0.1984, -0.0564, -0.1090]) tensor([0.3989, 0.1852, 0.2134, 0.2025]) -Greedy action tensor([ 0.9993, -0.5178, 0.2235, -0.3802]) tensor([0.5178, 0.1136, 0.2383, 0.1303]) -Greedy action tensor([ 5.0722e-01, -2.6659e-01, 3.2987e-04, -2.1859e-01]) tensor([0.3925, 0.1811, 0.2364, 0.1900]) -Greedy action tensor([ 0.4377, 0.0168, -0.0921, 0.0148]) tensor([0.3448, 0.2263, 0.2030, 0.2259]) -Greedy action tensor([ 0.6708, -0.4383, 0.1109, -0.3618]) tensor([0.4430, 0.1461, 0.2531, 0.1578]) -Greedy action tensor([ 0.9168, -0.6632, -0.1434, -0.4195]) tensor([0.5509, 0.1135, 0.1908, 0.1448]) -Greedy action tensor([ 0.7604, -0.4586, -0.1403, -0.1823]) tensor([0.4781, 0.1413, 0.1943, 0.1863]) -Greedy action tensor([ 0.7154, -0.2540, -0.0727, -0.1683]) tensor([0.4450, 0.1688, 0.2023, 0.1839]) -Greedy action tensor([ 0.4818, -0.2838, -0.2203, -0.1029]) tensor([0.3972, 0.1847, 0.1968, 0.2213]) -Greedy action tensor([ 0.6773, 0.0069, -0.1221, -0.0675]) tensor([0.4105, 0.2100, 0.1846, 0.1949]) -Greedy action tensor([ 0.2289, -0.0620, -0.1240, -0.2566]) tensor([0.3262, 0.2439, 0.2292, 0.2007]) -Greedy action tensor([ 0.4711, -0.1188, -0.1176, -0.2745]) tensor([0.3870, 0.2146, 0.2148, 0.1836]) -Greedy action tensor([ 0.7080, 0.0131, -0.0292, 0.0760]) tensor([0.3986, 0.1989, 0.1907, 0.2118]) -Greedy action tensor([ 1.2658, -0.5283, -0.1118, -0.4540]) tensor([0.6260, 0.1041, 0.1578, 0.1121]) -Greedy action tensor([ 1.0619, -0.8271, -0.0655, -0.3069]) tensor([0.5782, 0.0874, 0.1873, 0.1471]) -Greedy action tensor([ 0.8368, -0.8591, 0.1087, -0.5087]) tensor([0.5190, 0.0952, 0.2506, 0.1352]) -Greedy action tensor([ 0.8504, -0.3938, -0.0297, -0.2289]) tensor([0.4895, 0.1411, 0.2030, 0.1664]) -Greedy action tensor([ 0.8597, -0.6230, -0.0191, -0.5161]) tensor([0.5277, 0.1198, 0.2192, 0.1333]) -Greedy action tensor([ 0.4627, -0.1112, -0.0407, -0.0862]) tensor([0.3643, 0.2052, 0.2202, 0.2104]) -Greedy action tensor([ 0.8034, -0.3952, -0.1740, -0.2722]) tensor([0.4953, 0.1494, 0.1864, 0.1689]) -Greedy action tensor([ 0.3828, 0.0222, -0.0870, -0.8196]) tensor([0.3813, 0.2658, 0.2383, 0.1146]) -Greedy action tensor([ 0.5080, -0.0770, -0.1279, -0.0448]) tensor([0.3757, 0.2093, 0.1989, 0.2161]) -Greedy action tensor([ 0.6472, -0.4202, -0.1686, -0.2144]) tensor([0.4528, 0.1557, 0.2002, 0.1913]) -Greedy action tensor([0.5256, 0.3222, 0.1395, 0.2396]) tensor([0.3080, 0.2513, 0.2093, 0.2314]) -Greedy action tensor([ 0.9688, -0.1716, -0.1579, -0.3121]) tensor([0.5204, 0.1664, 0.1687, 0.1446]) -Greedy action tensor([ 0.6308, -0.0447, -0.0864, -0.0038]) tensor([0.3957, 0.2014, 0.1931, 0.2098]) -Greedy action tensor([ 1.1233, -0.6654, -0.0389, -0.3163]) tensor([0.5824, 0.0974, 0.1822, 0.1380]) -Greedy action tensor([ 0.4213, 0.0135, 0.0228, -0.2316]) tensor([0.3500, 0.2328, 0.2350, 0.1822]) -Greedy action tensor([ 0.5926, 0.0818, -0.1280, -0.4702]) tensor([0.4112, 0.2467, 0.2000, 0.1421]) -Greedy action tensor([ 0.5160, -0.2628, -0.0656, -0.1566]) tensor([0.3955, 0.1815, 0.2211, 0.2019]) -Greedy action tensor([ 1.3074, -0.6325, -0.0678, -0.2527]) tensor([0.6224, 0.0895, 0.1573, 0.1308]) -Greedy action tensor([ 0.7007, -0.5539, -0.0244, -0.4503]) tensor([0.4794, 0.1367, 0.2322, 0.1517]) -Greedy action tensor([ 0.8690, -0.3475, -0.1414, -0.3836]) tensor([0.5139, 0.1522, 0.1871, 0.1468]) -Greedy action tensor([ 1.1758, -0.5982, -0.0147, -0.2719]) tensor([0.5852, 0.0993, 0.1779, 0.1376]) -Greedy action tensor([ 1.3649, -0.5907, -0.0617, -0.5282]) tensor([0.6527, 0.0923, 0.1567, 0.0983]) -Greedy action tensor([ 0.8338, -0.6749, 0.1061, -0.7766]) tensor([0.5252, 0.1162, 0.2537, 0.1049]) -Greedy action tensor([ 0.1510, -0.2939, -0.2700, -0.3831]) tensor([0.3468, 0.2223, 0.2276, 0.2033]) -Greedy action tensor([ 0.4906, -0.3567, 0.0078, -0.4390]) tensor([0.4098, 0.1756, 0.2529, 0.1617]) -Greedy action tensor([ 0.7056, -0.5625, -0.1291, -0.3185]) tensor([0.4820, 0.1356, 0.2092, 0.1731]) -Greedy action tensor([ 1.2168, -0.9303, 0.0346, -0.6270]) tensor([0.6323, 0.0739, 0.1939, 0.1000]) -Greedy action tensor([ 0.7233, -0.5579, -0.0535, -0.5619]) tensor([0.4965, 0.1379, 0.2283, 0.1373]) -Greedy action tensor([ 0.6411, -0.4328, -0.0300, -0.2589]) tensor([0.4426, 0.1512, 0.2262, 0.1799]) -Greedy action tensor([ 0.4500, -0.8807, -0.1746, -0.3600]) tensor([0.4455, 0.1177, 0.2386, 0.1982]) -Greedy action tensor([ 0.1793, -0.0846, -0.0965, -0.2928]) tensor([0.3174, 0.2438, 0.2409, 0.1980]) -Greedy action tensor([ 0.6851, -0.4149, -0.1204, -0.4534]) tensor([0.4762, 0.1585, 0.2128, 0.1525]) -Greedy action tensor([ 0.5841, -0.5240, -0.1438, -0.1263]) tensor([0.4339, 0.1433, 0.2096, 0.2133]) -Greedy action tensor([ 0.6264, 0.1136, -0.2346, -0.0184]) tensor([0.3927, 0.2352, 0.1660, 0.2061]) -Greedy action tensor([ 0.5289, -0.4074, -0.2950, -0.3512]) tensor([0.4453, 0.1746, 0.1954, 0.1847]) -Greedy action tensor([ 0.6397, -0.6636, -0.1970, -0.7633]) tensor([0.5127, 0.1393, 0.2220, 0.1260]) -Greedy action tensor([ 0.5552, -0.3110, -0.0894, -0.0601]) tensor([0.4023, 0.1692, 0.2111, 0.2174]) -Greedy action tensor([ 0.6407, -0.3888, -0.0900, -0.1765]) tensor([0.4385, 0.1566, 0.2112, 0.1937]) -Greedy action tensor([ 0.4128, -0.0963, -0.0653, -0.1877]) tensor([0.3611, 0.2170, 0.2238, 0.1981]) -Greedy action tensor([-1.9235, -0.4128, 0.6555, -0.1594]) tensor([0.0407, 0.1845, 0.5370, 0.2377]) -Greedy action tensor([-1.9281, -0.4289, 0.6576, -0.1716]) tensor([0.0407, 0.1825, 0.5408, 0.2360]) -Greedy action tensor([-1.9447, -0.4525, 0.6647, -0.1812]) tensor([0.0402, 0.1788, 0.5465, 0.2345]) -Greedy action tensor([-1.9045, -0.4134, 0.6420, -0.1538]) tensor([0.0417, 0.1854, 0.5326, 0.2403]) -Greedy action tensor([-1.8605, -0.4316, 0.6234, -0.1410]) tensor([0.0440, 0.1835, 0.5271, 0.2454]) -Greedy action tensor([-0.5515, 0.1071, 0.7280, 1.4851]) tensor([0.0705, 0.1361, 0.2533, 0.5401]) -Greedy action tensor([-1.5404, -0.1813, 0.5189, 0.1195]) tensor([0.0556, 0.2164, 0.4358, 0.2923]) -Greedy action tensor([-1.8727, -0.4331, 0.6825, -0.1011]) tensor([0.0417, 0.1760, 0.5370, 0.2453]) -Greedy action tensor([-1.9299, -0.3327, 0.6373, -0.1661]) tensor([0.0403, 0.1991, 0.5253, 0.2352]) -Greedy action tensor([-0.9378, 0.3980, 0.1732, -0.0277]) tensor([0.0969, 0.3683, 0.2942, 0.2406]) -Greedy action tensor([-0.8526, 0.4880, 0.1559, 0.1044]) tensor([0.0984, 0.3759, 0.2697, 0.2561]) -Greedy action tensor([-0.9881, -0.9236, -0.4979, -0.8464]) tensor([0.2061, 0.2199, 0.3365, 0.2375]) -Greedy action tensor([-1.8890, -0.4683, 0.6376, -0.1510]) tensor([0.0428, 0.1774, 0.5361, 0.2436]) -Greedy action tensor([-1.8677, -0.4319, 0.6262, -0.1373]) tensor([0.0436, 0.1831, 0.5275, 0.2458]) -Greedy action tensor([-1.6854, -0.4214, 0.5333, -0.0933]) tensor([0.0536, 0.1898, 0.4931, 0.2635]) -Greedy action tensor([-1.1847, -0.3466, 0.5811, 0.6083]) tensor([0.0659, 0.1524, 0.3855, 0.3961]) -Greedy action tensor([-1.7755, -0.4266, 0.5863, -0.0903]) tensor([0.0479, 0.1848, 0.5087, 0.2586]) -Greedy action tensor([-1.3921, -0.6650, 0.8288, 0.6674]) tensor([0.0497, 0.1028, 0.4579, 0.3896]) -Greedy action tensor([-1.7251, -0.3136, 0.7084, 0.1095]) tensor([0.0439, 0.1802, 0.5007, 0.2751]) -Greedy action tensor([-1.5696, -0.3670, 0.4834, -0.0776]) tensor([0.0604, 0.2009, 0.4703, 0.2684]) -Greedy action tensor([-1.9283, -0.4433, 0.6695, -0.1681]) tensor([0.0405, 0.1790, 0.5447, 0.2357]) -Greedy action tensor([-1.7817, -0.4903, 0.5893, -0.0693]) tensor([0.0479, 0.1742, 0.5126, 0.2653]) -Greedy action tensor([-1.8005, -0.4997, 0.5932, -0.1237]) tensor([0.0477, 0.1751, 0.5223, 0.2550]) -Greedy action tensor([-1.9151, -0.4445, 0.6522, -0.1665]) tensor([0.0414, 0.1804, 0.5401, 0.2382]) -Greedy action tensor([-1.8968, -0.3851, 0.6432, -0.1528]) tensor([0.0418, 0.1895, 0.5298, 0.2390]) -Greedy action tensor([-1.8758, -0.4641, 0.6530, -0.1244]) tensor([0.0427, 0.1753, 0.5357, 0.2462]) -Greedy action tensor([-1.9098, -0.3695, 0.6404, -0.1541]) tensor([0.0412, 0.1923, 0.5279, 0.2385]) -Greedy action tensor([-1.8807, -0.3441, 0.6229, -0.1501]) tensor([0.0425, 0.1977, 0.5199, 0.2400]) -Greedy action tensor([-1.7637, -0.2581, 0.5624, -0.0573]) tensor([0.0470, 0.2120, 0.4817, 0.2592]) -Greedy action tensor([-1.7809, -0.4239, 0.6116, -0.0337]) tensor([0.0464, 0.1801, 0.5074, 0.2661]) -Greedy action tensor([-1.9443, -0.4448, 0.6639, -0.1801]) tensor([0.0402, 0.1800, 0.5454, 0.2345]) -Greedy action tensor([-1.7809, -0.5164, 0.5968, -0.0739]) tensor([0.0480, 0.1700, 0.5174, 0.2646]) -Greedy action tensor([-1.9147, -0.4051, 0.6409, -0.1607]) tensor([0.0414, 0.1871, 0.5326, 0.2389]) -Greedy action tensor([-1.8499, -0.4108, 0.6050, -0.1234]) tensor([0.0445, 0.1876, 0.5180, 0.2500]) -Greedy action tensor([-1.9329, -0.4579, 0.6605, -0.1725]) tensor([0.0407, 0.1780, 0.5446, 0.2368]) -Greedy action tensor([-1.3876, -0.1101, 0.3653, -0.0044]) tensor([0.0697, 0.2501, 0.4023, 0.2779]) -Greedy action tensor([-1.5357, -0.5388, 0.4577, 0.0221]) tensor([0.0633, 0.1715, 0.4646, 0.3006]) -Greedy action tensor([-1.9027, -0.4003, 0.6404, -0.1532]) tensor([0.0417, 0.1875, 0.5308, 0.2400]) -Greedy action tensor([-0.9929, -0.6419, 0.5455, 1.0998]) tensor([0.0659, 0.0936, 0.3067, 0.5339]) -Greedy action tensor([-1.9453, -0.4493, 0.6675, -0.1811]) tensor([0.0401, 0.1790, 0.5469, 0.2341]) -Greedy action tensor([-1.5265, -0.5003, 0.4073, 0.0838]) tensor([0.0637, 0.1776, 0.4402, 0.3185]) -Greedy action tensor([-1.8648, -0.3812, 0.6208, -0.1369]) tensor([0.0434, 0.1913, 0.5210, 0.2442]) -Greedy action tensor([-1.0837, 0.3093, 0.1499, 0.1163]) tensor([0.0849, 0.3418, 0.2915, 0.2818]) -Greedy action tensor([-0.5363, -0.5491, 0.1626, 0.2824]) tensor([0.1596, 0.1575, 0.3210, 0.3619]) -Greedy action tensor([-1.8707, -0.3878, 0.6338, -0.1307]) tensor([0.0428, 0.1888, 0.5243, 0.2441]) -Greedy action tensor([-1.7968, -0.3791, 0.5823, -0.0987]) tensor([0.0468, 0.1930, 0.5048, 0.2555]) -Greedy action tensor([-0.9319, 0.0222, 0.2776, -0.1278]) tensor([0.1089, 0.2827, 0.3650, 0.2434]) -Greedy action tensor([-1.9393, -0.4441, 0.6646, -0.1777]) tensor([0.0403, 0.1799, 0.5450, 0.2348]) -Greedy action tensor([-1.8577, -0.4631, 0.6142, -0.1274]) tensor([0.0444, 0.1791, 0.5259, 0.2506]) -Greedy action tensor([-0.6665, -0.3604, 0.3063, -0.0423]) tensor([0.1456, 0.1977, 0.3850, 0.2717]) -Greedy action tensor([-1.7460, -0.4148, 0.5635, -0.0797]) tensor([0.0496, 0.1879, 0.4998, 0.2627]) -Greedy action tensor([-1.6501, -0.5431, 0.5296, -0.0964]) tensor([0.0568, 0.1719, 0.5025, 0.2687]) -Greedy action tensor([-1.9012, -0.4347, 0.6406, -0.1595]) tensor([0.0421, 0.1825, 0.5350, 0.2404]) -Greedy action tensor([-1.9398, -0.4481, 0.6669, -0.1771]) tensor([0.0403, 0.1790, 0.5459, 0.2347]) -Greedy action tensor([-1.3121, -0.5238, 0.6760, -0.4144]) tensor([0.0772, 0.1698, 0.5636, 0.1894]) -Greedy action tensor([-1.7325, -0.1176, 0.4144, -0.1300]) tensor([0.0511, 0.2571, 0.4377, 0.2540]) -Greedy action tensor([-1.8734, -0.1851, 0.5869, -0.1353]) tensor([0.0420, 0.2273, 0.4918, 0.2389]) -Greedy action tensor([-1.9484, -0.4540, 0.6676, -0.1831]) tensor([0.0400, 0.1784, 0.5477, 0.2339]) -Greedy action tensor([-1.9359, -0.4494, 0.6604, -0.1773]) tensor([0.0406, 0.1795, 0.5444, 0.2356]) -Greedy action tensor([-1.8133, -0.2967, 0.6159, -0.0083]) tensor([0.0435, 0.1982, 0.4937, 0.2645]) -Greedy action tensor([-1.9370, -0.4482, 0.6659, -0.1745]) tensor([0.0404, 0.1790, 0.5453, 0.2353]) -Greedy action tensor([-1.8191, -0.3197, 0.6075, -0.1118]) tensor([0.0448, 0.2007, 0.5073, 0.2471]) -Greedy action tensor([-1.8112, -0.3355, 0.5769, -0.1060]) tensor([0.0459, 0.2009, 0.5004, 0.2528]) -Greedy action tensor([-1.9422, -0.4531, 0.6677, -0.1780]) tensor([0.0402, 0.1783, 0.5468, 0.2347]) -Greedy action tensor([-1.5334, -0.0222, 0.4822, 0.0602]) tensor([0.0557, 0.2524, 0.4179, 0.2740]) -Greedy action tensor([-1.7453, -0.4277, 0.5603, -0.0759]) tensor([0.0498, 0.1860, 0.4997, 0.2645]) -Greedy action tensor([-1.9017, -0.4511, 0.6645, -0.1469]) tensor([0.0416, 0.1773, 0.5409, 0.2403]) -Greedy action tensor([-1.5311, -0.2041, 0.6814, 0.2833]) tensor([0.0499, 0.1880, 0.4559, 0.3062]) -Greedy action tensor([-1.8045, -0.1270, 0.5480, -0.0944]) tensor([0.0447, 0.2390, 0.4694, 0.2469]) -Greedy action tensor([-1.7420, -0.3445, 0.6122, 0.0596]) tensor([0.0462, 0.1870, 0.4867, 0.2801]) -Greedy action tensor([-1.8030, -0.1388, 0.5723, -0.0441]) tensor([0.0438, 0.2312, 0.4708, 0.2542]) -Greedy action tensor([-1.9422, -0.4464, 0.6657, -0.1788]) tensor([0.0402, 0.1795, 0.5458, 0.2345]) -Greedy action tensor([-1.6758, -0.3388, 0.6131, 0.0198]) tensor([0.0497, 0.1892, 0.4902, 0.2708]) -Greedy action tensor([-1.8208, -0.4071, 0.6106, -0.0728]) tensor([0.0450, 0.1849, 0.5117, 0.2584]) -Greedy action tensor([-0.9546, -0.5182, 0.3636, 0.4382]) tensor([0.0970, 0.1501, 0.3624, 0.3905]) -Greedy action tensor([-1.8890, -0.4353, 0.6997, 0.0193]) tensor([0.0395, 0.1689, 0.5255, 0.2661]) -Greedy action tensor([-1.9112, -0.3961, 0.6438, -0.1558]) tensor([0.0413, 0.1880, 0.5317, 0.2390]) -Greedy action tensor([-1.5572, -0.2351, 0.5044, 0.1304]) tensor([0.0555, 0.2082, 0.4362, 0.3001]) -Greedy action tensor([-1.5584, -0.2968, 0.8482, 0.4711]) tensor([0.0430, 0.1520, 0.4775, 0.3275]) -Greedy action tensor([-1.8066, -0.4155, 0.6065, -0.0920]) tensor([0.0460, 0.1849, 0.5137, 0.2555]) -Greedy action tensor([-1.8454, -0.2996, 0.6002, -0.1333]) tensor([0.0439, 0.2060, 0.5067, 0.2433]) -Greedy action tensor([ 0.9720, -0.1197, 1.1117, 0.1707]) tensor([0.3408, 0.1144, 0.3919, 0.1529]) -Greedy action tensor([ 0.2049, -1.7391, 0.2831, 0.5612]) tensor([0.2738, 0.0392, 0.2960, 0.3910]) -Greedy action tensor([ 1.5236, -0.1531, 0.5988, 0.8054]) tensor([0.4828, 0.0903, 0.1915, 0.2354]) -Greedy action tensor([-0.1954, 0.0401, -0.3680, -0.2659]) tensor([0.2476, 0.3133, 0.2083, 0.2307]) -Greedy action tensor([ 1.3440, -0.2609, 1.5971, 0.6256]) tensor([0.3360, 0.0675, 0.4327, 0.1638]) -Greedy action tensor([-0.3779, 0.3736, 0.8303, -0.6626]) tensor([0.1385, 0.2937, 0.4636, 0.1042]) -Greedy action tensor([ 0.6140, 0.4276, -0.2176, -0.6311]) tensor([0.3917, 0.3251, 0.1705, 0.1128]) -Greedy action tensor([-0.3739, -0.7931, 0.3159, -0.8132]) tensor([0.2328, 0.1531, 0.4641, 0.1500]) -Greedy action tensor([-0.5030, -0.0989, -0.0885, 0.8747]) tensor([0.1254, 0.1878, 0.1897, 0.4971]) -Greedy action tensor([ 1.4291, -1.0680, -0.0276, 0.4458]) tensor([0.5919, 0.0487, 0.1379, 0.2214]) -Greedy action tensor([-0.1551, 0.2853, -0.4915, -0.2275]) tensor([0.2382, 0.3700, 0.1702, 0.2216]) -Greedy action tensor([ 0.3349, -1.3974, 1.0635, 0.1115]) tensor([0.2470, 0.0437, 0.5118, 0.1975]) -Greedy action tensor([ 0.5520, 0.5419, -0.4085, -1.0725]) tensor([0.3892, 0.3852, 0.1489, 0.0767]) -Greedy action tensor([ 0.4072, -0.6474, 1.0004, 0.4733]) tensor([0.2366, 0.0824, 0.4282, 0.2528]) -Greedy action tensor([-0.8499, -0.3643, 0.8521, -1.1048]) tensor([0.1125, 0.1829, 0.6173, 0.0872]) -Greedy action tensor([ 0.8664, -1.7875, 0.1543, 0.7396]) tensor([0.4095, 0.0288, 0.2009, 0.3607]) -Greedy action tensor([-0.2837, 0.1475, 0.0164, -0.8701]) tensor([0.2250, 0.3462, 0.3037, 0.1251]) -Greedy action tensor([ 0.9181, 0.5419, 0.4023, -0.8851]) tensor([0.4085, 0.2804, 0.2439, 0.0673]) -Greedy action tensor([-0.6873, 0.1948, 0.1196, -0.4875]) tensor([0.1454, 0.3513, 0.3258, 0.1775]) -Greedy action tensor([ 0.9826, -0.2666, 0.7901, -0.2880]) tensor([0.4180, 0.1199, 0.3448, 0.1173]) -Greedy action tensor([-0.0045, -0.8057, -0.1904, 0.1452]) tensor([0.2906, 0.1304, 0.2413, 0.3376]) -Greedy action tensor([ 1.0396, -0.1253, 0.3266, 0.5275]) tensor([0.4164, 0.1299, 0.2041, 0.2495]) -Greedy action tensor([ 0.4791, -1.9566, 1.4195, -0.1662]) tensor([0.2396, 0.0210, 0.6137, 0.1257]) -Greedy action tensor([-0.5354, -1.4497, -0.8554, 0.1236]) tensor([0.2463, 0.0987, 0.1789, 0.4761]) -Greedy action tensor([-0.9131, -0.8591, 1.3621, -0.1141]) tensor([0.0714, 0.0753, 0.6946, 0.1587]) -Greedy action tensor([-0.0720, 0.1548, -0.7776, -0.5850]) tensor([0.2988, 0.3748, 0.1475, 0.1789]) -Greedy action tensor([ 1.2294, -0.6420, 1.0488, 0.6379]) tensor([0.3934, 0.0605, 0.3284, 0.2177]) -Greedy action tensor([ 0.6136, 0.7574, 0.8367, -0.0942]) tensor([0.2566, 0.2963, 0.3207, 0.1264]) -Greedy action tensor([ 0.1300, -0.2869, -0.0464, 0.2306]) tensor([0.2775, 0.1829, 0.2326, 0.3069]) -Greedy action tensor([ 0.6149, -0.4123, 0.3429, 0.2944]) tensor([0.3514, 0.1258, 0.2677, 0.2550]) -Greedy action tensor([ 0.2658, -1.7658, 0.6245, 0.3794]) tensor([0.2715, 0.0356, 0.3887, 0.3042]) -Greedy action tensor([-0.1624, -0.9864, -0.4823, 0.1336]) tensor([0.2849, 0.1250, 0.2069, 0.3831]) -Greedy action tensor([ 0.6183, 0.6933, 1.2265, -0.5126]) tensor([0.2360, 0.2543, 0.4335, 0.0762]) -Greedy action tensor([ 0.9442, -0.3626, 0.6009, -0.6265]) tensor([0.4570, 0.1237, 0.3242, 0.0950]) -Greedy action tensor([ 0.3265, -0.2375, 1.6373, -1.2479]) tensor([0.1823, 0.1037, 0.6762, 0.0378]) -Greedy action tensor([-0.8441, -1.6697, -0.6270, -0.4397]) tensor([0.2393, 0.1048, 0.2973, 0.3586]) -Greedy action tensor([-1.0491, -0.0174, 0.4813, 0.5576]) tensor([0.0746, 0.2092, 0.3445, 0.3718]) -Greedy action tensor([ 1.5031, -0.8428, 0.8645, 1.3573]) tensor([0.4019, 0.0385, 0.2122, 0.3474]) -Greedy action tensor([ 0.0268, -1.0036, 1.3954, -0.8577]) tensor([0.1755, 0.0626, 0.6895, 0.0724]) -Greedy action tensor([-1.5202, 0.3759, 0.8406, -0.7694]) tensor([0.0491, 0.3268, 0.5202, 0.1040]) -Greedy action tensor([ 0.4468, -0.1285, 0.0643, -0.3124]) tensor([0.3686, 0.2074, 0.2515, 0.1725]) -Greedy action tensor([-0.1910, 0.3848, 0.0146, 0.0216]) tensor([0.1907, 0.3392, 0.2342, 0.2359]) -Greedy action tensor([-0.4076, -0.7714, 0.3854, 0.2474]) tensor([0.1715, 0.1192, 0.3791, 0.3302]) -Greedy action tensor([ 0.5593, 0.3115, -0.0427, -0.0008]) tensor([0.3449, 0.2692, 0.1889, 0.1970]) -Greedy action tensor([-0.1422, -0.6743, 1.3249, 0.1339]) tensor([0.1381, 0.0811, 0.5988, 0.1820]) -Greedy action tensor([ 0.5645, 0.0734, -0.6354, -0.7108]) tensor([0.4561, 0.2791, 0.1374, 0.1274]) -Greedy action tensor([ 0.1105, -0.0155, -0.0759, 0.0629]) tensor([0.2728, 0.2405, 0.2265, 0.2602]) -Greedy action tensor([ 1.2882, -0.2893, 0.4712, -0.2450]) tensor([0.5364, 0.1108, 0.2370, 0.1158]) -Greedy action tensor([-1.5512, -0.8228, 1.2485, -0.3533]) tensor([0.0438, 0.0908, 0.7203, 0.1452]) -Greedy action tensor([-1.0484, 0.0625, 0.1819, -1.3611]) tensor([0.1221, 0.3708, 0.4178, 0.0893]) -Greedy action tensor([-0.7349, -0.9081, -0.9106, -0.3668]) tensor([0.2424, 0.2039, 0.2034, 0.3503]) -Greedy action tensor([-0.5094, 0.9660, -0.1902, -0.0859]) tensor([0.1208, 0.5284, 0.1663, 0.1845]) -Greedy action tensor([ 1.3994, -0.7746, 0.2636, 2.0792]) tensor([0.2934, 0.0334, 0.0942, 0.5790]) -Greedy action tensor([ 1.3154, -0.1451, -0.0357, 1.0465]) tensor([0.4434, 0.1029, 0.1148, 0.3388]) -Greedy action tensor([-1.6424, 0.5693, -0.4209, -1.1062]) tensor([0.0656, 0.5995, 0.2227, 0.1122]) -Greedy action tensor([ 0.0664, -0.1670, -0.1043, 0.9380]) tensor([0.1990, 0.1576, 0.1678, 0.4757]) -Greedy action tensor([-0.7005, -0.0133, 0.0136, -0.1187]) tensor([0.1466, 0.2915, 0.2995, 0.2624]) -Greedy action tensor([-0.1048, 0.4047, 0.3376, -0.3855]) tensor([0.2010, 0.3345, 0.3128, 0.1518]) -Greedy action tensor([ 0.6246, 0.4604, 0.1710, -0.0674]) tensor([0.3351, 0.2843, 0.2129, 0.1677]) -Greedy action tensor([ 0.2753, -2.1262, -0.0810, 0.4011]) tensor([0.3419, 0.0310, 0.2394, 0.3877]) -Greedy action tensor([ 0.5742, -0.4491, 0.5025, -0.2718]) tensor([0.3677, 0.1322, 0.3423, 0.1578]) -Greedy action tensor([-0.5212, -1.2946, -0.5424, 0.4988]) tensor([0.1918, 0.0885, 0.1878, 0.5319]) -Greedy action tensor([ 0.9486, -0.0131, 0.3016, -0.5231]) tensor([0.4683, 0.1790, 0.2452, 0.1075]) -Greedy action tensor([ 0.0017, -0.4222, -0.3644, -0.8776]) tensor([0.3619, 0.2369, 0.2510, 0.1502]) -Greedy action tensor([ 0.0830, -0.9437, 0.0986, -0.3759]) tensor([0.3327, 0.1192, 0.3379, 0.2102]) -Greedy action tensor([-0.5925, 0.7439, 1.0552, -1.1323]) tensor([0.0945, 0.3596, 0.4909, 0.0551]) -Greedy action tensor([-0.5822, -0.3559, -0.1499, -1.0566]) tensor([0.2264, 0.2839, 0.3488, 0.1409]) -Greedy action tensor([ 1.1692, -1.0602, -0.5626, 0.5356]) tensor([0.5509, 0.0593, 0.0975, 0.2924]) -Greedy action tensor([-0.2249, -0.2866, 0.8956, -0.2205]) tensor([0.1664, 0.1564, 0.5101, 0.1671]) -Greedy action tensor([ 0.8814, 0.1904, -0.1941, -0.4267]) tensor([0.4734, 0.2372, 0.1615, 0.1280]) -Greedy action tensor([ 0.1277, -2.1412, -0.2741, 0.1883]) tensor([0.3527, 0.0365, 0.2360, 0.3748]) -Greedy action tensor([ 0.3342, 0.4699, -0.5424, 0.2971]) tensor([0.2837, 0.3249, 0.1181, 0.2733]) -Greedy action tensor([ 0.8724, -0.1409, -1.3637, 0.4375]) tensor([0.4723, 0.1715, 0.0505, 0.3057]) -Greedy action tensor([ 0.9571, -1.7109, 0.4083, 0.7659]) tensor([0.4044, 0.0281, 0.2336, 0.3340]) -Greedy action tensor([ 0.3437, -0.2462, 0.2633, 1.7334]) tensor([0.1541, 0.0854, 0.1422, 0.6184]) -Greedy action tensor([ 0.9069, -1.6486, -0.4395, 0.0741]) tensor([0.5641, 0.0438, 0.1468, 0.2453]) -Greedy action tensor([ 0.9398, -0.5515, -0.2011, -0.3201]) tensor([0.5469, 0.1231, 0.1748, 0.1552]) -Greedy action tensor([ 0.5648, -0.3325, -0.8568, 2.5288]) tensor([0.1139, 0.0464, 0.0275, 0.8121]) -Greedy action tensor([ 6.9907e-04, -1.5671e+00, 1.0516e+00, -7.1624e-02]) tensor([0.2000, 0.0417, 0.5722, 0.1861]) -Greedy action tensor([ 0.6920, 0.1563, -0.2507, -0.4607]) tensor([0.4366, 0.2555, 0.1701, 0.1379]) -Greedy action tensor([0.4085, 0.0233, 0.5449, 2.1047]) tensor([0.1208, 0.0822, 0.1384, 0.6586]) -Greedy action tensor([ 1.8039, -0.7945, -0.2596, 0.1402]) tensor([0.7190, 0.0535, 0.0913, 0.1362]) -Greedy action tensor([ 2.2445, -0.5631, -0.6240, 0.1166]) tensor([0.8089, 0.0488, 0.0459, 0.0963]) -Greedy action tensor([ 1.0498, -0.2883, -0.2900, 0.2148]) tensor([0.5107, 0.1340, 0.1337, 0.2216]) -Greedy action tensor([ 1.9970, -0.8110, -0.7603, 0.4645]) tensor([0.7464, 0.0450, 0.0474, 0.1612]) -Greedy action tensor([ 1.9279, -0.6652, -0.4573, 0.6913]) tensor([0.6862, 0.0513, 0.0632, 0.1993]) -Greedy action tensor([ 2.2524, -1.2666, -0.2827, 0.9375]) tensor([0.7260, 0.0215, 0.0575, 0.1949]) -Greedy action tensor([ 1.8542, -0.2802, -0.4512, 0.2303]) tensor([0.7066, 0.0836, 0.0705, 0.1393]) -Greedy action tensor([ 0.9188, 0.1426, -0.5503, 0.4812]) tensor([0.4281, 0.1970, 0.0985, 0.2764]) -Greedy action tensor([ 1.0763, -0.2961, -0.6237, 0.3395]) tensor([0.5222, 0.1324, 0.0954, 0.2500]) -Greedy action tensor([ 1.5239, -0.3557, -0.5547, 0.4443]) tensor([0.6182, 0.0944, 0.0773, 0.2100]) -Greedy action tensor([ 1.2634, -0.6075, -0.1015, 0.2497]) tensor([0.5642, 0.0869, 0.1441, 0.2047]) -Greedy action tensor([ 2.2328, -0.5454, -0.4852, 0.5020]) tensor([0.7661, 0.0476, 0.0506, 0.1357]) -Greedy action tensor([ 1.6854, -0.3479, -0.1531, 0.0312]) tensor([0.6751, 0.0884, 0.1074, 0.1291]) -Greedy action tensor([ 1.0205, -0.2707, -0.3120, 0.3210]) tensor([0.4913, 0.1351, 0.1296, 0.2441]) -Greedy action tensor([ 1.7740, -0.7122, -0.2706, 0.2857]) tensor([0.6952, 0.0579, 0.0900, 0.1569]) -Greedy action tensor([ 2.1438, -0.8086, -0.4101, 1.2221]) tensor([0.6545, 0.0342, 0.0509, 0.2604]) -Greedy action tensor([ 1.4777, 0.6159, -0.0855, 0.3040]) tensor([0.5152, 0.2176, 0.1079, 0.1593]) -Greedy action tensor([ 2.7156, -0.4915, 0.6918, -0.0560]) tensor([0.8096, 0.0328, 0.1070, 0.0506]) -Greedy action tensor([ 1.5337, -0.1303, -1.0940, 0.2600]) tensor([0.6488, 0.1229, 0.0469, 0.1815]) -Greedy action tensor([ 1.7664, 0.0978, -0.5294, 0.4574]) tensor([0.6413, 0.1209, 0.0646, 0.1732]) -Greedy action tensor([ 1.9084, -0.6639, -0.5326, 0.1739]) tensor([0.7463, 0.0570, 0.0650, 0.1317]) -Greedy action tensor([ 1.2694, -0.3782, -0.6903, 0.3203]) tensor([0.5812, 0.1119, 0.0819, 0.2250]) -Greedy action tensor([ 1.7232, -0.9247, -0.2246, 0.0032]) tensor([0.7181, 0.0508, 0.1024, 0.1286]) -Greedy action tensor([ 1.2928, -0.6509, -0.4034, 0.1421]) tensor([0.6087, 0.0871, 0.1116, 0.1926]) -Greedy action tensor([ 0.1043, -0.1521, -0.3283, 0.7856]) tensor([0.2273, 0.1759, 0.1475, 0.4493]) -Greedy action tensor([ 1.8275, -0.9085, -0.2311, 0.3423]) tensor([0.7048, 0.0457, 0.0900, 0.1596]) -Greedy action tensor([ 1.9443, -0.6706, -0.0473, 0.1203]) tensor([0.7294, 0.0534, 0.0995, 0.1177]) -Greedy action tensor([ 2.1640, -0.7482, -0.3281, 0.5393]) tensor([0.7496, 0.0407, 0.0620, 0.1476]) -Greedy action tensor([ 1.8482, -0.5243, -0.2046, 0.4071]) tensor([0.6857, 0.0639, 0.0880, 0.1623]) -Greedy action tensor([ 1.2754, -0.1610, -0.3549, 0.2609]) tensor([0.5567, 0.1324, 0.1090, 0.2019]) -Greedy action tensor([ 1.4858, -0.3148, -0.1539, 0.4934]) tensor([0.5781, 0.0955, 0.1122, 0.2143]) -Greedy action tensor([ 1.3260, -0.3767, -0.9528, 0.5933]) tensor([0.5665, 0.1032, 0.0580, 0.2723]) -Greedy action tensor([ 1.7546, 0.3262, -0.1137, 0.4460]) tensor([0.6009, 0.1440, 0.0928, 0.1624]) -Greedy action tensor([ 1.9929, -0.8003, -0.4394, 0.5630]) tensor([0.7202, 0.0441, 0.0633, 0.1724]) -Greedy action tensor([ 1.2443, -0.1250, -0.6253, 0.0718]) tensor([0.5821, 0.1480, 0.0897, 0.1802]) -Greedy action tensor([ 1.8103, -0.6812, -0.2438, 0.0497]) tensor([0.7231, 0.0599, 0.0927, 0.1243]) -Greedy action tensor([ 1.9342, -1.2536, -0.2683, 0.8018]) tensor([0.6784, 0.0280, 0.0750, 0.2186]) -Greedy action tensor([ 1.0919, -0.1384, -0.4544, 0.1463]) tensor([0.5281, 0.1543, 0.1125, 0.2051]) -Greedy action tensor([ 1.5040, -0.3963, -0.5719, 0.4085]) tensor([0.6214, 0.0929, 0.0779, 0.2078]) -Greedy action tensor([ 1.1621, -0.1830, -0.7680, 0.2059]) tensor([0.5587, 0.1455, 0.0811, 0.2147]) -Greedy action tensor([ 1.8547, -0.4700, -0.3808, 0.2873]) tensor([0.7075, 0.0692, 0.0757, 0.1476]) -Greedy action tensor([ 1.7483, -0.5855, -0.3537, 0.3991]) tensor([0.6763, 0.0656, 0.0827, 0.1755]) -Greedy action tensor([ 1.2192, -0.4041, -0.5201, 0.2577]) tensor([0.5697, 0.1124, 0.1001, 0.2178]) -Greedy action tensor([ 1.5635, -0.3638, -0.0437, 0.2213]) tensor([0.6222, 0.0905, 0.1247, 0.1626]) -Greedy action tensor([ 1.3080, -0.1186, -1.0118, 0.4932]) tensor([0.5615, 0.1348, 0.0552, 0.2486]) -Greedy action tensor([ 1.3651, -0.0793, -0.5201, 0.2307]) tensor([0.5850, 0.1380, 0.0888, 0.1882]) -Greedy action tensor([ 1.7489, -0.3915, -0.4613, 0.1974]) tensor([0.6948, 0.0817, 0.0762, 0.1472]) -Greedy action tensor([ 1.6258, -0.5957, -0.4400, 0.3670]) tensor([0.6583, 0.0714, 0.0834, 0.1869]) -Greedy action tensor([ 1.8181, -0.6795, -0.3301, 0.8590]) tensor([0.6320, 0.0520, 0.0738, 0.2422]) -Greedy action tensor([ 1.8692, -0.7199, -0.0638, 0.3875]) tensor([0.6911, 0.0519, 0.1000, 0.1570]) -Greedy action tensor([ 8.9422e-01, -4.2688e-01, 5.4562e-04, 2.8187e-01]) tensor([0.4508, 0.1203, 0.1845, 0.2444]) -Greedy action tensor([ 1.4076, -0.4088, -0.5239, 0.1893]) tensor([0.6237, 0.1014, 0.0904, 0.1845]) -Greedy action tensor([ 1.0437, -0.5716, -0.3225, 0.6080]) tensor([0.4760, 0.0946, 0.1214, 0.3079]) -Greedy action tensor([ 1.3620, -0.4249, -0.4015, 0.2531]) tensor([0.5992, 0.1004, 0.1027, 0.1977]) -Greedy action tensor([ 1.0050, -0.4180, 0.0225, 0.0429]) tensor([0.5006, 0.1206, 0.1874, 0.1913]) -Greedy action tensor([ 2.3955, -0.1386, -0.0431, 0.4594]) tensor([0.7628, 0.0605, 0.0666, 0.1101]) -Greedy action tensor([ 1.1859, -0.4098, -0.0455, 0.1186]) tensor([0.5439, 0.1103, 0.1588, 0.1871]) -Greedy action tensor([ 1.5792, 0.1338, -0.4137, 0.0667]) tensor([0.6280, 0.1480, 0.0856, 0.1384]) -Greedy action tensor([ 1.1732, -0.3380, 0.0654, -0.0332]) tensor([0.5405, 0.1193, 0.1785, 0.1618]) -Greedy action tensor([ 1.5352, -0.4049, -0.3691, 0.1831]) tensor([0.6446, 0.0926, 0.0960, 0.1668]) -Greedy action tensor([ 0.8985, -0.5762, 0.2087, -0.2970]) tensor([0.4919, 0.1126, 0.2468, 0.1488]) -Greedy action tensor([ 2.5675, -0.9270, -0.6698, 0.7516]) tensor([0.8115, 0.0246, 0.0319, 0.1320]) -Greedy action tensor([ 2.0949, -0.3261, -0.8556, 0.2111]) tensor([0.7733, 0.0687, 0.0405, 0.1175]) -Greedy action tensor([ 2.1596, -1.1394, 0.2491, 1.0573]) tensor([0.6592, 0.0243, 0.0976, 0.2189]) -Greedy action tensor([ 1.6118, -0.0729, -0.1102, 0.6354]) tensor([0.5744, 0.1066, 0.1027, 0.2164]) -Greedy action tensor([ 1.7641, -0.6601, -0.4150, 0.1023]) tensor([0.7187, 0.0636, 0.0813, 0.1364]) -Greedy action tensor([ 0.9569, -0.6065, -0.2154, 0.4117]) tensor([0.4764, 0.0998, 0.1475, 0.2762]) -Greedy action tensor([ 2.4658, -0.7207, -0.0591, 0.8622]) tensor([0.7561, 0.0312, 0.0605, 0.1521]) -Greedy action tensor([ 1.9915, -0.5847, -0.2952, 0.4116]) tensor([0.7227, 0.0550, 0.0734, 0.1489]) -Greedy action tensor([ 2.3967, -1.1883, -0.0836, 1.2371]) tensor([0.7017, 0.0195, 0.0587, 0.2201]) -Greedy action tensor([ 1.1484, -0.1693, -0.2272, 0.3223]) tensor([0.5107, 0.1367, 0.1290, 0.2236]) -Greedy action tensor([ 1.6101, -0.1768, -0.0763, 0.4407]) tensor([0.6013, 0.1007, 0.1113, 0.1867]) -Greedy action tensor([ 1.4179, -0.2021, -0.1683, 0.6227]) tensor([0.5394, 0.1067, 0.1104, 0.2435]) -Greedy action tensor([ 1.9703, -0.5498, -0.7447, 0.5822]) tensor([0.7162, 0.0576, 0.0474, 0.1787]) -Greedy action tensor([ 1.9083, -0.3425, -0.5633, 0.2763]) tensor([0.7219, 0.0760, 0.0610, 0.1411]) -Greedy action tensor([ 2.0676, 0.2564, -0.4148, 0.2626]) tensor([0.7085, 0.1158, 0.0592, 0.1165]) -Greedy action tensor([ 1.8862, -0.5506, -0.3012, 0.4658]) tensor([0.6938, 0.0607, 0.0779, 0.1676]) -Greedy action tensor([ 1.1703, -0.6472, -0.4848, -0.3347]) tensor([0.6347, 0.1031, 0.1213, 0.1409]) -Greedy action tensor([ 1.1359, -0.4945, -0.5497, 0.3575]) tensor([0.5434, 0.1064, 0.1007, 0.2495]) -Greedy action tensor([ 1.0288, -0.2869, -0.4405, -0.1070]) tensor([0.5496, 0.1474, 0.1265, 0.1765]) -Greedy action tensor([ 0.9714, -0.4113, -0.4852, 0.6138]) tensor([0.4580, 0.1149, 0.1067, 0.3203]) -Greedy action tensor([ 0.6758, -0.3641, -0.1237, -0.4201]) tensor([0.4679, 0.1654, 0.2103, 0.1564]) -Greedy action tensor([ 0.2615, 0.0669, -0.0760, -0.1581]) tensor([0.3131, 0.2577, 0.2234, 0.2058]) -Greedy action tensor([ 0.5171, -0.5250, -0.2082, -0.0848]) tensor([0.4194, 0.1479, 0.2030, 0.2297]) -Greedy action tensor([ 0.4847, -0.5326, -0.1803, -0.1135]) tensor([0.4123, 0.1491, 0.2120, 0.2267]) -Greedy action tensor([ 1.0647, -0.6109, -0.1426, -0.2761]) tensor([0.5721, 0.1071, 0.1711, 0.1497]) -Greedy action tensor([ 0.6845, -0.3164, -0.0584, -0.1993]) tensor([0.4432, 0.1629, 0.2108, 0.1831]) -Greedy action tensor([ 0.8112, -0.0478, 0.1035, -0.4395]) tensor([0.4540, 0.1923, 0.2237, 0.1300]) -Greedy action tensor([ 0.9687, -0.5582, 0.0147, -0.4233]) tensor([0.5402, 0.1174, 0.2081, 0.1343]) -Greedy action tensor([ 0.3067, -0.2076, -0.0392, -0.4796]) tensor([0.3622, 0.2166, 0.2563, 0.1650]) -Greedy action tensor([ 0.8583, -0.4329, 0.3441, -0.3694]) tensor([0.4617, 0.1269, 0.2761, 0.1353]) -Greedy action tensor([ 0.6882, -0.2373, -0.0151, -0.0787]) tensor([0.4245, 0.1682, 0.2101, 0.1972]) -Greedy action tensor([ 0.7632, -0.5574, 0.0011, -1.0008]) tensor([0.5249, 0.1401, 0.2450, 0.0900]) -Greedy action tensor([ 0.2090, -0.2689, -0.1201, 0.0252]) tensor([0.3153, 0.1955, 0.2269, 0.2623]) -Greedy action tensor([ 0.1268, -0.0159, -0.1782, 0.0091]) tensor([0.2863, 0.2482, 0.2110, 0.2545]) -Greedy action tensor([ 0.5602, -0.5523, 0.2624, -0.5618]) tensor([0.4172, 0.1372, 0.3098, 0.1359]) -Greedy action tensor([ 0.4862, -0.5762, -0.2394, -0.1714]) tensor([0.4259, 0.1472, 0.2062, 0.2207]) -Greedy action tensor([ 0.4398, 0.1647, -0.0802, 0.1572]) tensor([0.3218, 0.2444, 0.1913, 0.2426]) -Greedy action tensor([ 0.5308, -0.3626, -0.1837, -0.2512]) tensor([0.4244, 0.1737, 0.2077, 0.1942]) -Greedy action tensor([ 0.7427, -0.4059, -0.0843, -0.4726]) tensor([0.4875, 0.1546, 0.2132, 0.1446]) -Greedy action tensor([ 0.8194, -0.4646, 0.0059, -0.2417]) tensor([0.4840, 0.1340, 0.2145, 0.1675]) -Greedy action tensor([ 0.6531, -0.6310, -0.1163, -0.3283]) tensor([0.4728, 0.1309, 0.2191, 0.1772]) -Greedy action tensor([ 0.5405, -0.1375, -0.0801, -0.0557]) tensor([0.3852, 0.1955, 0.2071, 0.2122]) -Greedy action tensor([ 0.7070, -0.1198, -0.1581, -0.2057]) tensor([0.4425, 0.1936, 0.1863, 0.1776]) -Greedy action tensor([ 0.4328, 0.1860, -0.1241, 0.0976]) tensor([0.3258, 0.2545, 0.1867, 0.2330]) -Greedy action tensor([ 0.6060, -0.2702, -0.0461, -0.1854]) tensor([0.4183, 0.1742, 0.2179, 0.1896]) -Greedy action tensor([ 1.1525, -0.7891, -0.0751, -0.5338]) tensor([0.6167, 0.0885, 0.1807, 0.1142]) -Greedy action tensor([ 1.1367, -0.3050, -0.2779, -0.4101]) tensor([0.5908, 0.1398, 0.1436, 0.1258]) -Greedy action tensor([ 0.8255, -0.3836, -0.1382, -0.4599]) tensor([0.5111, 0.1525, 0.1950, 0.1413]) -Greedy action tensor([ 0.6718, -0.3862, -0.0419, -0.1104]) tensor([0.4359, 0.1513, 0.2135, 0.1993]) -Greedy action tensor([ 0.6953, -0.4535, -0.0622, -0.4012]) tensor([0.4717, 0.1495, 0.2212, 0.1576]) -Greedy action tensor([ 0.7001, -0.5010, -0.0999, -0.5834]) tensor([0.4933, 0.1484, 0.2216, 0.1367]) -Greedy action tensor([ 0.6585, -0.3796, -0.0492, -0.3389]) tensor([0.4513, 0.1598, 0.2224, 0.1665]) -Greedy action tensor([ 0.5288, -0.3660, 0.0112, -0.1607]) tensor([0.3990, 0.1631, 0.2378, 0.2002]) -Greedy action tensor([ 0.2301, 0.1383, 0.0658, -0.2390]) tensor([0.2953, 0.2694, 0.2506, 0.1847]) -Greedy action tensor([ 0.5319, -0.5563, -0.1862, -0.0791]) tensor([0.4224, 0.1423, 0.2060, 0.2293]) -Greedy action tensor([ 0.8279, -0.8482, 0.0079, -0.4285]) tensor([0.5230, 0.0978, 0.2303, 0.1489]) -Greedy action tensor([ 0.5187, -0.3751, -0.1811, -0.2730]) tensor([0.4239, 0.1734, 0.2106, 0.1921]) -Greedy action tensor([ 0.8437, -0.3678, -0.0391, -0.3130]) tensor([0.4936, 0.1470, 0.2042, 0.1552]) -Greedy action tensor([ 0.6878, -0.5378, -0.0605, -0.0340]) tensor([0.4439, 0.1303, 0.2101, 0.2157]) -Greedy action tensor([ 0.3906, -0.4200, 0.0125, -0.2282]) tensor([0.3748, 0.1666, 0.2568, 0.2018]) -Greedy action tensor([ 0.3089, -0.2266, -0.1133, -0.0277]) tensor([0.3384, 0.1981, 0.2219, 0.2417]) -Greedy action tensor([ 1.1483, -0.5673, -0.0377, -0.5123]) tensor([0.5969, 0.1074, 0.1823, 0.1134]) -Greedy action tensor([ 1.0136, -0.6571, -0.0290, -0.3437]) tensor([0.5562, 0.1046, 0.1961, 0.1431]) -Greedy action tensor([ 0.8189, -0.3980, -0.2559, -0.2089]) tensor([0.5012, 0.1484, 0.1711, 0.1793]) -Greedy action tensor([ 0.5616, -0.3954, -0.0105, -0.2118]) tensor([0.4150, 0.1594, 0.2342, 0.1915]) -Greedy action tensor([ 0.6786, -0.3794, -0.1481, -0.1178]) tensor([0.4473, 0.1553, 0.1957, 0.2017]) -Greedy action tensor([ 0.8189, -0.2377, -0.1641, -0.4331]) tensor([0.4981, 0.1731, 0.1864, 0.1424]) -Greedy action tensor([ 0.6179, -0.5243, 0.0459, -0.1663]) tensor([0.4273, 0.1364, 0.2412, 0.1951]) -Greedy action tensor([ 1.0026, -0.5419, -0.1829, -0.3546]) tensor([0.5629, 0.1201, 0.1720, 0.1449]) -Greedy action tensor([ 0.7992, -0.4119, -0.0190, -0.3338]) tensor([0.4852, 0.1445, 0.2141, 0.1563]) -Greedy action tensor([ 0.6030, 0.0947, -0.0806, -0.1332]) tensor([0.3868, 0.2327, 0.1953, 0.1853]) -Greedy action tensor([ 0.5552, -0.3081, -0.0749, -0.2082]) tensor([0.4132, 0.1742, 0.2200, 0.1926]) -Greedy action tensor([ 0.9254, -0.9019, 0.1254, -0.4441]) tensor([0.5364, 0.0863, 0.2410, 0.1364]) -Greedy action tensor([ 0.8515, -0.6655, 0.0035, -0.5716]) tensor([0.5295, 0.1161, 0.2268, 0.1276]) -Greedy action tensor([ 0.8944, -0.3323, -0.0315, -0.2707]) tensor([0.4997, 0.1465, 0.1980, 0.1558]) -Greedy action tensor([ 0.9960, -0.7532, -0.0173, -0.2635]) tensor([0.5492, 0.0955, 0.1994, 0.1559]) -Greedy action tensor([ 0.5535, -0.3842, -0.0031, -0.1197]) tensor([0.4041, 0.1582, 0.2316, 0.2061]) -Greedy action tensor([ 0.7731, -0.5530, 0.1415, -0.0307]) tensor([0.4455, 0.1183, 0.2369, 0.1994]) -Greedy action tensor([ 0.7045, -0.2252, 0.0133, -0.0481]) tensor([0.4225, 0.1668, 0.2117, 0.1991]) -Greedy action tensor([ 1.0149, -0.7518, 0.0099, -0.4719]) tensor([0.5672, 0.0969, 0.2076, 0.1282]) -Greedy action tensor([ 0.5843, -0.5685, -0.0130, -0.4035]) tensor([0.4467, 0.1411, 0.2458, 0.1664]) -Greedy action tensor([ 0.5049, -0.0489, -0.1225, 0.0454]) tensor([0.3649, 0.2097, 0.1949, 0.2305]) -Greedy action tensor([ 0.8466, -0.3006, -0.1477, -0.3019]) tensor([0.4989, 0.1584, 0.1846, 0.1582]) -Greedy action tensor([ 0.3734, -0.4266, -0.2613, -0.3398]) tensor([0.4049, 0.1819, 0.2147, 0.1985]) -Greedy action tensor([ 0.6386, 0.2127, -0.1576, 0.0185]) tensor([0.3785, 0.2472, 0.1707, 0.2036]) -Greedy action tensor([ 0.8872, -0.6053, -0.0419, -0.4145]) tensor([0.5286, 0.1188, 0.2088, 0.1438]) -Greedy action tensor([ 0.6482, -0.1748, 0.1435, -0.1553]) tensor([0.4015, 0.1763, 0.2424, 0.1798]) -Greedy action tensor([ 0.6528, -0.6009, -0.0835, -0.1172]) tensor([0.4490, 0.1282, 0.2150, 0.2079]) -Greedy action tensor([ 1.2150, -0.6526, 0.0961, -0.3936]) tensor([0.5948, 0.0919, 0.1943, 0.1191]) -Greedy action tensor([ 0.5990, -0.2502, -0.0795, -0.1784]) tensor([0.4176, 0.1786, 0.2119, 0.1919]) -Greedy action tensor([ 0.8367, -0.5748, 0.1428, -0.2937]) tensor([0.4839, 0.1180, 0.2418, 0.1563]) -Greedy action tensor([ 0.6109, -0.0325, -0.0439, 0.0544]) tensor([0.3819, 0.2007, 0.1984, 0.2189]) -Greedy action tensor([ 0.5092, -0.0194, 0.0062, -0.3135]) tensor([0.3797, 0.2238, 0.2296, 0.1668]) -Greedy action tensor([ 0.0874, 0.3640, -0.1505, -0.4340]) tensor([0.2702, 0.3563, 0.2130, 0.1604]) -Greedy action tensor([ 1.3257, -0.6161, 0.0390, -0.4432]) tensor([0.6289, 0.0902, 0.1737, 0.1072]) -Greedy action tensor([ 1.1840, -0.4309, -0.2040, -0.5616]) tensor([0.6161, 0.1226, 0.1538, 0.1075]) -Greedy action tensor([ 0.7463, -0.4502, -0.0825, -0.0453]) tensor([0.4562, 0.1379, 0.1992, 0.2067]) -Greedy action tensor([ 0.5253, 0.4086, -0.0969, -0.0591]) tensor([0.3351, 0.2982, 0.1799, 0.1868]) -Greedy action tensor([ 0.6376, -0.5569, 0.0271, -0.2679]) tensor([0.4444, 0.1346, 0.2413, 0.1797]) -Greedy action tensor([ 0.9975, -0.4546, -0.0434, -0.2902]) tensor([0.5367, 0.1256, 0.1895, 0.1481]) -Greedy action tensor([ 0.3523, -0.1363, -0.0703, -0.3526]) tensor([0.3619, 0.2220, 0.2372, 0.1789]) -Greedy action tensor([-1.9041, -0.4893, 0.7006, -0.1239]) tensor([0.0407, 0.1675, 0.5505, 0.2414]) -Greedy action tensor([-1.7221, -0.7478, 0.4971, -0.0030]) tensor([0.0543, 0.1438, 0.4992, 0.3028]) -Greedy action tensor([-1.8452, -0.4582, 0.6230, -0.1281]) tensor([0.0447, 0.1789, 0.5275, 0.2489]) -Greedy action tensor([-1.8822, -0.3286, 0.6176, -0.1417]) tensor([0.0424, 0.2003, 0.5159, 0.2414]) -Greedy action tensor([-1.9306, -0.4495, 0.6792, -0.1630]) tensor([0.0402, 0.1770, 0.5471, 0.2357]) -Greedy action tensor([-1.1926, -0.1119, 0.2471, 0.2839]) tensor([0.0797, 0.2349, 0.3364, 0.3490]) -Greedy action tensor([-1.5502, -0.3559, 0.6016, 0.1024]) tensor([0.0552, 0.1822, 0.4746, 0.2881]) -Greedy action tensor([-1.4645, 0.6027, 0.3027, 0.1109]) tensor([0.0510, 0.4034, 0.2988, 0.2467]) -Greedy action tensor([-1.9160, -0.4263, 0.6599, -0.1587]) tensor([0.0410, 0.1820, 0.5392, 0.2378]) -Greedy action tensor([-1.8449, -0.4491, 0.6253, -0.1259]) tensor([0.0446, 0.1799, 0.5269, 0.2486]) -Greedy action tensor([-0.1932, 0.0173, 0.2356, 0.3885]) tensor([0.1799, 0.2220, 0.2762, 0.3218]) -Greedy action tensor([-1.8288, -0.4547, 0.5953, -0.1060]) tensor([0.0458, 0.1809, 0.5170, 0.2564]) -Greedy action tensor([-1.8921, -0.4135, 0.6447, -0.1484]) tensor([0.0421, 0.1847, 0.5323, 0.2408]) -Greedy action tensor([-1.8648, -0.3760, 0.6292, -0.1296]) tensor([0.0431, 0.1909, 0.5217, 0.2443]) -Greedy action tensor([-1.7350, -0.4606, 0.7577, 0.2182]) tensor([0.0422, 0.1508, 0.5098, 0.2972]) -Greedy action tensor([-1.1905, 0.7992, 0.1516, 0.2951]) tensor([0.0604, 0.4417, 0.2311, 0.2668]) -Greedy action tensor([-1.9418, -0.4537, 0.6634, -0.1791]) tensor([0.0403, 0.1786, 0.5459, 0.2351]) -Greedy action tensor([-1.6187, 0.3359, 0.3913, 0.0211]) tensor([0.0484, 0.3415, 0.3609, 0.2493]) -Greedy action tensor([-0.8340, 0.9621, 0.0951, 0.2639]) tensor([0.0796, 0.4799, 0.2017, 0.2388]) -Greedy action tensor([-1.8523, -0.4461, 0.6367, -0.1033]) tensor([0.0437, 0.1784, 0.5267, 0.2513]) -Greedy action tensor([-1.9316, -0.4425, 0.6626, -0.1720]) tensor([0.0406, 0.1800, 0.5435, 0.2359]) -Greedy action tensor([-1.9181, -0.4023, 0.6467, -0.1655]) tensor([0.0411, 0.1872, 0.5344, 0.2372]) -Greedy action tensor([-0.8372, -0.5425, 0.1883, 0.2478]) tensor([0.1236, 0.1660, 0.3447, 0.3658]) -Greedy action tensor([-1.9245, -0.4413, 0.6554, -0.1719]) tensor([0.0410, 0.1808, 0.5414, 0.2367]) -Greedy action tensor([-1.9264, -0.4087, 0.6530, -0.1679]) tensor([0.0407, 0.1858, 0.5371, 0.2364]) -Greedy action tensor([-1.7098, -0.4711, 0.5775, -0.0065]) tensor([0.0505, 0.1744, 0.4976, 0.2775]) -Greedy action tensor([-1.7694, -0.3765, 0.5746, -0.0683]) tensor([0.0478, 0.1924, 0.4980, 0.2618]) -Greedy action tensor([-1.8471, -0.2789, 0.6154, -0.1175]) tensor([0.0432, 0.2071, 0.5064, 0.2433]) -Greedy action tensor([-1.9086, -0.3676, 0.6354, -0.1706]) tensor([0.0415, 0.1939, 0.5285, 0.2361]) -Greedy action tensor([-1.9294, -0.4340, 0.6599, -0.1694]) tensor([0.0407, 0.1814, 0.5416, 0.2363]) -Greedy action tensor([-1.8489, -0.4493, 0.6177, -0.1317]) tensor([0.0446, 0.1809, 0.5259, 0.2485]) -Greedy action tensor([-1.7971, -0.4965, 0.5878, -0.1085]) tensor([0.0478, 0.1753, 0.5185, 0.2584]) -Greedy action tensor([-1.8634, -0.3204, 0.6127, -0.1274]) tensor([0.0430, 0.2013, 0.5117, 0.2441]) -Greedy action tensor([-1.8764, -0.4845, 0.6937, -0.0953]) tensor([0.0416, 0.1674, 0.5439, 0.2471]) -Greedy action tensor([-1.9267, -0.4464, 0.6732, -0.1715]) tensor([0.0406, 0.1783, 0.5463, 0.2347]) -Greedy action tensor([-1.0592, -0.5720, 0.2725, 0.2453]) tensor([0.0990, 0.1611, 0.3750, 0.3649]) -Greedy action tensor([-1.9162, -0.4263, 0.6492, -0.1681]) tensor([0.0413, 0.1834, 0.5377, 0.2375]) -Greedy action tensor([-1.7990, -0.4492, 0.5904, -0.1124]) tensor([0.0472, 0.1822, 0.5153, 0.2552]) -Greedy action tensor([-1.7506, -0.4997, 0.6348, -0.0257]) tensor([0.0477, 0.1666, 0.5181, 0.2676]) -Greedy action tensor([-0.8827, -0.0966, 0.2801, -0.4053]) tensor([0.1249, 0.2742, 0.3996, 0.2013]) -Greedy action tensor([-1.6396, -0.5230, 0.5090, 0.0474]) tensor([0.0555, 0.1694, 0.4755, 0.2997]) -Greedy action tensor([-1.8907, -0.5006, 0.6737, -0.1207]) tensor([0.0419, 0.1682, 0.5441, 0.2459]) -Greedy action tensor([-1.8951, -0.3441, 0.6415, -0.1973]) tensor([0.0420, 0.1980, 0.5306, 0.2294]) -Greedy action tensor([-1.9239, -0.4241, 0.6543, -0.1685]) tensor([0.0409, 0.1833, 0.5390, 0.2367]) -Greedy action tensor([-0.7457, 0.4309, 0.4173, 0.3594]) tensor([0.0956, 0.3100, 0.3058, 0.2886]) -Greedy action tensor([-1.1910, -0.1650, 0.3341, -0.1876]) tensor([0.0900, 0.2510, 0.4135, 0.2454]) -Greedy action tensor([-1.5406, 0.4659, 0.3347, 0.0781]) tensor([0.0500, 0.3717, 0.3260, 0.2522]) -Greedy action tensor([-0.7988, -0.6616, 0.3802, 0.1903]) tensor([0.1237, 0.1418, 0.4020, 0.3325]) -Greedy action tensor([-1.7393, -0.2943, 0.5772, -0.0346]) tensor([0.0479, 0.2031, 0.4856, 0.2634]) -Greedy action tensor([-0.8598, -0.0193, 0.2402, -0.1607]) tensor([0.1200, 0.2781, 0.3605, 0.2414]) -Greedy action tensor([-1.8382, -0.3924, 0.6142, -0.1193]) tensor([0.0446, 0.1892, 0.5177, 0.2486]) -Greedy action tensor([-1.9076, -0.4464, 0.6457, -0.1605]) tensor([0.0418, 0.1804, 0.5377, 0.2401]) -Greedy action tensor([-1.4138, -0.5036, 0.4824, 0.2090]) tensor([0.0657, 0.1633, 0.4378, 0.3331]) -Greedy action tensor([-1.6710, -0.4813, 0.5335, -0.0549]) tensor([0.0544, 0.1787, 0.4931, 0.2738]) -Greedy action tensor([-1.6169, -0.4286, 0.6032, 0.1111]) tensor([0.0523, 0.1716, 0.4816, 0.2944]) -Greedy action tensor([-1.6603, -0.3459, 0.4967, -0.0111]) tensor([0.0538, 0.2004, 0.4655, 0.2802]) -Greedy action tensor([-1.4177, -0.5000, 0.4132, 0.3016]) tensor([0.0653, 0.1634, 0.4072, 0.3642]) -Greedy action tensor([-1.6465, 0.3016, 0.4030, -0.0103]) tensor([0.0478, 0.3354, 0.3712, 0.2456]) -Greedy action tensor([-0.9727, -0.5912, 0.2358, 0.5351]) tensor([0.0968, 0.1418, 0.3242, 0.4373]) -Greedy action tensor([-1.1643, -0.4861, 0.4412, 0.5647]) tensor([0.0736, 0.1450, 0.3666, 0.4148]) -Greedy action tensor([-0.9051, 0.9970, 0.0894, 0.4014]) tensor([0.0709, 0.4753, 0.1918, 0.2620]) -Greedy action tensor([-1.5243, -0.1454, 0.4022, -0.0106]) tensor([0.0610, 0.2424, 0.4192, 0.2774]) -Greedy action tensor([-1.8012, -0.2437, 0.5756, -0.0796]) tensor([0.0452, 0.2147, 0.4871, 0.2530]) -Greedy action tensor([-1.8597, -0.4340, 0.6676, -0.1259]) tensor([0.0428, 0.1782, 0.5363, 0.2426]) -Greedy action tensor([-1.8221, -0.4103, 0.6004, -0.1301]) tensor([0.0459, 0.1882, 0.5170, 0.2490]) -Greedy action tensor([-1.8549, -0.4695, 0.6174, -0.1184]) tensor([0.0444, 0.1774, 0.5261, 0.2521]) -Greedy action tensor([-1.5903, -0.4502, 0.5154, 0.1678]) tensor([0.0551, 0.1724, 0.4527, 0.3198]) -Greedy action tensor([-0.9573, 0.6780, 0.0267, 0.2088]) tensor([0.0832, 0.4270, 0.2226, 0.2671]) -Greedy action tensor([-1.9000, -0.4249, 0.6425, -0.1553]) tensor([0.0420, 0.1836, 0.5340, 0.2404]) -Greedy action tensor([-1.8440, -0.4289, 0.6193, -0.1019]) tensor([0.0443, 0.1824, 0.5203, 0.2530]) -Greedy action tensor([-1.9334, -0.4487, 0.6626, -0.1758]) tensor([0.0406, 0.1793, 0.5446, 0.2355]) -Greedy action tensor([-1.8904, -0.3603, 0.6289, -0.1454]) tensor([0.0421, 0.1943, 0.5226, 0.2410]) -Greedy action tensor([-1.8617, -0.2555, 0.6008, -0.1052]) tensor([0.0425, 0.2120, 0.4991, 0.2464]) -Greedy action tensor([-1.7291, -0.0875, 0.5151, -0.1095]) tensor([0.0484, 0.2501, 0.4569, 0.2446]) -Greedy action tensor([-1.0117, 0.7812, 0.0962, 0.2147]) tensor([0.0744, 0.4468, 0.2252, 0.2536]) -Greedy action tensor([-1.9464, -0.4491, 0.6673, -0.1816]) tensor([0.0401, 0.1791, 0.5469, 0.2340]) -Greedy action tensor([-1.6663, -0.4502, 0.4891, -0.0035]) tensor([0.0547, 0.1846, 0.4722, 0.2885]) -Greedy action tensor([-1.6703, -0.2559, 0.6048, -0.0146]) tensor([0.0498, 0.2049, 0.4845, 0.2608]) -Greedy action tensor([-1.4394, -0.4067, 0.6906, 0.4747]) tensor([0.0526, 0.1478, 0.4428, 0.3568]) -Greedy action tensor([-1.9089, -0.3963, 0.6499, -0.1597]) tensor([0.0413, 0.1875, 0.5337, 0.2375]) -Greedy action tensor([-1.1623, -0.4726, 0.4993, 0.6809]) tensor([0.0686, 0.1367, 0.3613, 0.4333]) -Greedy action tensor([-1.2880, -0.2724, 0.9237, -1.3280]) tensor([0.0722, 0.1993, 0.6592, 0.0694]) -Greedy action tensor([ 0.7030, 0.3508, -0.2149, 0.9256]) tensor([0.2984, 0.2098, 0.1191, 0.3727]) -Greedy action tensor([-0.4045, -1.1000, 0.3727, -0.4385]) tensor([0.2155, 0.1075, 0.4687, 0.2083]) -Greedy action tensor([-0.1978, -0.2871, 1.4239, 0.1807]) tensor([0.1185, 0.1084, 0.6000, 0.1731]) -Greedy action tensor([ 0.0491, -0.0286, -0.4516, -0.0288]) tensor([0.2893, 0.2677, 0.1754, 0.2676]) -Greedy action tensor([ 0.6230, -0.1022, 0.4670, -0.2001]) tensor([0.3599, 0.1743, 0.3079, 0.1580]) -Greedy action tensor([ 0.8120, -0.4120, 0.9920, 0.4090]) tensor([0.3165, 0.0931, 0.3789, 0.2115]) -Greedy action tensor([1.3325, 0.1285, 0.7671, 0.1337]) tensor([0.4609, 0.1383, 0.2618, 0.1390]) -Greedy action tensor([ 0.1970, 0.1713, -0.0782, 0.0232]) tensor([0.2798, 0.2726, 0.2125, 0.2351]) -Greedy action tensor([ 0.1678, 0.0749, 0.2006, -0.1686]) tensor([0.2733, 0.2491, 0.2824, 0.1952]) -Greedy action tensor([0.4177, 0.1964, 1.5517, 0.1190]) tensor([0.1770, 0.1418, 0.5500, 0.1313]) -Greedy action tensor([-0.5228, 0.4007, 1.1467, -0.9878]) tensor([0.1058, 0.2663, 0.5615, 0.0664]) -Greedy action tensor([-0.3522, 0.2751, 0.4567, -0.7470]) tensor([0.1727, 0.3233, 0.3877, 0.1163]) -Greedy action tensor([ 0.2666, -0.2737, 0.6885, 0.8340]) tensor([0.2053, 0.1196, 0.3130, 0.3621]) -Greedy action tensor([ 0.7065, -0.1855, 0.1258, 0.9352]) tensor([0.3100, 0.1270, 0.1734, 0.3896]) -Greedy action tensor([-0.3072, -0.3511, 0.7194, 0.3417]) tensor([0.1501, 0.1437, 0.4190, 0.2872]) -Greedy action tensor([ 1.4008, -1.0415, -0.6275, 1.3215]) tensor([0.4668, 0.0406, 0.0614, 0.4312]) -Greedy action tensor([-0.0421, -0.0341, -1.2068, -0.1297]) tensor([0.3090, 0.3115, 0.0964, 0.2831]) -Greedy action tensor([ 0.1444, -1.2246, -0.2786, 0.0736]) tensor([0.3520, 0.0895, 0.2306, 0.3279]) -Greedy action tensor([ 1.0792, -1.0227, 0.1395, 1.0067]) tensor([0.4093, 0.0500, 0.1599, 0.3807]) -Greedy action tensor([-0.1412, -1.0926, -0.3366, 0.9285]) tensor([0.1952, 0.0754, 0.1605, 0.5689]) -Greedy action tensor([-0.4798, -0.5164, 0.8401, 0.6274]) tensor([0.1145, 0.1104, 0.4286, 0.3465]) -Greedy action tensor([ 1.8439, -0.0573, 0.8008, 1.2341]) tensor([0.4889, 0.0730, 0.1723, 0.2657]) -Greedy action tensor([ 1.3739, -1.3457, 0.8365, 0.1288]) tensor([0.5160, 0.0340, 0.3015, 0.1486]) -Greedy action tensor([-0.3227, 0.4108, 0.0364, 0.4499]) tensor([0.1497, 0.3117, 0.2144, 0.3242]) -Greedy action tensor([ 0.0590, -0.7140, 0.0806, 0.3029]) tensor([0.2660, 0.1228, 0.2718, 0.3395]) -Greedy action tensor([ 1.6283, -1.9044, -0.0796, 0.0426]) tensor([0.7066, 0.0206, 0.1281, 0.1447]) -Greedy action tensor([ 0.6034, -0.7648, 0.0968, 1.4062]) tensor([0.2446, 0.0623, 0.1474, 0.5458]) -Greedy action tensor([ 0.6099, 0.1936, 2.2560, -0.6729]) tensor([0.1404, 0.0926, 0.7281, 0.0389]) -Greedy action tensor([ 0.3767, 0.5948, 0.4636, -0.0334]) tensor([0.2501, 0.3111, 0.2728, 0.1660]) -Greedy action tensor([0.7278, 0.6127, 0.7649, 0.5701]) tensor([0.2643, 0.2356, 0.2743, 0.2258]) -Greedy action tensor([ 0.4779, -1.1156, -1.1484, -0.0646]) tensor([0.5047, 0.1026, 0.0993, 0.2934]) -Greedy action tensor([ 0.1818, -0.3002, -0.2053, -0.5520]) tensor([0.3601, 0.2224, 0.2446, 0.1729]) -Greedy action tensor([-0.4061, -0.5518, 0.0667, -0.9581]) tensor([0.2472, 0.2137, 0.3967, 0.1424]) -Greedy action tensor([ 1.5621, 0.1879, -0.4093, 1.3732]) tensor([0.4504, 0.1140, 0.0627, 0.3729]) -Greedy action tensor([ 0.3159, -1.6164, -0.4098, 1.0061]) tensor([0.2760, 0.0400, 0.1336, 0.5504]) -Greedy action tensor([ 0.1767, 0.1408, -0.2902, -0.3899]) tensor([0.3165, 0.3054, 0.1985, 0.1796]) -Greedy action tensor([ 0.7443, -0.6490, -0.3454, 0.3012]) tensor([0.4491, 0.1115, 0.1510, 0.2884]) -Greedy action tensor([-0.2238, -0.3709, 1.1471, 0.5876]) tensor([0.1242, 0.1072, 0.4891, 0.2795]) -Greedy action tensor([ 0.6611, -0.6843, -0.2562, -0.5300]) tensor([0.5092, 0.1326, 0.2035, 0.1547]) -Greedy action tensor([ 0.1271, -1.2963, 0.6159, 0.1142]) tensor([0.2592, 0.0624, 0.4226, 0.2559]) -Greedy action tensor([ 1.1598, -0.0166, -0.3369, -0.3025]) tensor([0.5669, 0.1748, 0.1269, 0.1314]) -Greedy action tensor([ 0.0458, -0.1762, 0.9199, 0.4653]) tensor([0.1749, 0.1400, 0.4191, 0.2660]) -Greedy action tensor([-0.9442, -0.3824, -0.4513, 0.3385]) tensor([0.1250, 0.2193, 0.2047, 0.4509]) -Greedy action tensor([ 0.8648, -0.0729, 1.3186, 0.3409]) tensor([0.2811, 0.1100, 0.4425, 0.1664]) -Greedy action tensor([-0.3108, -0.3596, -0.2480, 0.2991]) tensor([0.2059, 0.1961, 0.2192, 0.3789]) -Greedy action tensor([-0.7013, -0.5223, 0.7437, -1.4008]) tensor([0.1442, 0.1725, 0.6117, 0.0716]) -Greedy action tensor([ 0.7420, -0.2769, 0.4873, 0.3617]) tensor([0.3546, 0.1280, 0.2749, 0.2425]) -Greedy action tensor([-0.0404, -0.9251, 0.1661, 0.6194]) tensor([0.2185, 0.0902, 0.2686, 0.4227]) -Greedy action tensor([-0.5770, -0.9345, 0.7797, -0.3991]) tensor([0.1476, 0.1032, 0.5730, 0.1763]) -Greedy action tensor([ 0.1907, -0.9307, 1.2218, 0.0039]) tensor([0.2016, 0.0657, 0.5654, 0.1673]) -Greedy action tensor([ 0.3418, -0.4261, 0.0691, -0.3851]) tensor([0.3692, 0.1713, 0.2811, 0.1785]) -Greedy action tensor([-0.3060, 0.0972, 0.4815, 0.3921]) tensor([0.1492, 0.2232, 0.3278, 0.2998]) -Greedy action tensor([ 0.6127, 0.4492, -0.2734, -0.0920]) tensor([0.3629, 0.3082, 0.1496, 0.1794]) -Greedy action tensor([ 0.6867, -1.5052, -0.3783, -0.1537]) tensor([0.5297, 0.0592, 0.1826, 0.2286]) -Greedy action tensor([1.0709, 0.5953, 0.4043, 0.1745]) tensor([0.3932, 0.2444, 0.2019, 0.1605]) -Greedy action tensor([ 0.1139, -0.4311, 0.4995, -0.3341]) tensor([0.2711, 0.1572, 0.3986, 0.1732]) -Greedy action tensor([ 0.2975, -2.0137, -0.6144, -0.3364]) tensor([0.4923, 0.0488, 0.1978, 0.2612]) -Greedy action tensor([ 0.1199, -1.7909, 0.4015, 0.3978]) tensor([0.2636, 0.0390, 0.3493, 0.3481]) -Greedy action tensor([ 0.3742, -0.6912, 0.7177, 0.2341]) tensor([0.2760, 0.0951, 0.3891, 0.2399]) -Greedy action tensor([1.6204, 0.4720, 0.5733, 0.3928]) tensor([0.5099, 0.1617, 0.1790, 0.1494]) -Greedy action tensor([ 1.6097, -0.6734, 1.2577, 1.0927]) tensor([0.4164, 0.0425, 0.2928, 0.2483]) -Greedy action tensor([ 0.1067, 0.1985, -0.1030, 0.9967]) tensor([0.1872, 0.2052, 0.1518, 0.4558]) -Greedy action tensor([ 0.6919, -0.1416, -0.2044, 0.3054]) tensor([0.3965, 0.1723, 0.1618, 0.2694]) -Greedy action tensor([ 0.0347, 0.0075, 0.7120, -0.1664]) tensor([0.2101, 0.2045, 0.4136, 0.1718]) -Greedy action tensor([ 0.2787, -1.0826, -0.0239, -0.6270]) tensor([0.4168, 0.1068, 0.3079, 0.1685]) -Greedy action tensor([ 0.2130, -0.2297, 0.8166, -0.0793]) tensor([0.2371, 0.1523, 0.4336, 0.1770]) -Greedy action tensor([-0.6804, 0.9203, -0.5943, 0.3737]) tensor([0.1008, 0.4999, 0.1099, 0.2894]) -Greedy action tensor([ 2.2455, -2.5950, 0.8536, 1.3704]) tensor([0.5976, 0.0047, 0.1486, 0.2491]) -Greedy action tensor([-0.5104, -0.7461, 0.1444, -0.3785]) tensor([0.2059, 0.1627, 0.3964, 0.2350]) -Greedy action tensor([-0.0604, -1.0111, 0.8949, 0.5478]) tensor([0.1717, 0.0664, 0.4464, 0.3155]) -Greedy action tensor([ 0.7241, -0.8398, -0.0995, -0.7099]) tensor([0.5301, 0.1110, 0.2326, 0.1263]) -Greedy action tensor([-1.2821, 0.0739, 0.6281, -1.2113]) tensor([0.0787, 0.3053, 0.5315, 0.0845]) -Greedy action tensor([ 0.3753, -0.3682, -0.0657, -0.4004]) tensor([0.3877, 0.1843, 0.2495, 0.1785]) -Greedy action tensor([ 0.4908, -1.4412, -0.5894, 1.7974]) tensor([0.1931, 0.0280, 0.0656, 0.7133]) -Greedy action tensor([ 0.2333, -0.2693, -1.0535, -0.5092]) tensor([0.4243, 0.2567, 0.1172, 0.2019]) -Greedy action tensor([-0.7612, -1.0389, 0.3528, -0.2643]) tensor([0.1551, 0.1175, 0.4725, 0.2549]) -Greedy action tensor([-0.5221, -0.8722, 1.0280, -0.5604]) tensor([0.1355, 0.0955, 0.6386, 0.1304]) -Greedy action tensor([-0.9553, -0.6816, -1.0775, 0.8589]) tensor([0.1071, 0.1408, 0.0948, 0.6573]) -Greedy action tensor([1.2236, 0.0191, 0.3067, 0.5502]) tensor([0.4526, 0.1357, 0.1809, 0.2308]) -Greedy action tensor([ 0.6356, -1.3611, 0.1754, 0.6343]) tensor([0.3616, 0.0491, 0.2282, 0.3611]) -Greedy action tensor([-0.2256, 0.8430, 0.3145, 0.1180]) tensor([0.1421, 0.4137, 0.2439, 0.2004]) -Greedy action tensor([ 1.3594, -0.7130, -0.5057, 1.2410]) tensor([0.4610, 0.0580, 0.0714, 0.4095]) -Greedy action tensor([ 1.6878, -1.0750, 0.3153, 0.1431]) tensor([0.6536, 0.0413, 0.1657, 0.1395]) -Greedy action tensor([ 1.0150, -0.3861, -0.3592, 0.0172]) tensor([0.5353, 0.1319, 0.1355, 0.1974]) -Greedy action tensor([ 1.3177, -0.8180, -0.6325, 0.9212]) tensor([0.5173, 0.0611, 0.0736, 0.3480]) -Greedy action tensor([ 1.2963, -0.1863, -0.6502, 0.5596]) tensor([0.5410, 0.1228, 0.0772, 0.2590]) -Greedy action tensor([ 1.2890, -0.3924, -0.3416, 0.3185]) tensor([0.5679, 0.1057, 0.1112, 0.2152]) -Greedy action tensor([ 2.3080, -0.2979, -0.6714, 0.2204]) tensor([0.8009, 0.0591, 0.0407, 0.0993]) -Greedy action tensor([ 1.3882, -0.6308, -0.7670, 0.1831]) tensor([0.6459, 0.0858, 0.0748, 0.1935]) -Greedy action tensor([ 1.4010, 0.0140, -0.5829, 0.4993]) tensor([0.5577, 0.1393, 0.0767, 0.2263]) -Greedy action tensor([ 1.6281, -0.5605, -0.2203, 0.4973]) tensor([0.6280, 0.0704, 0.0989, 0.2027]) -Greedy action tensor([ 1.8627, -0.4025, -0.7325, 0.3930]) tensor([0.7100, 0.0737, 0.0530, 0.1633]) -Greedy action tensor([ 1.7208, -0.2834, -0.1507, 0.7019]) tensor([0.6062, 0.0817, 0.0933, 0.2188]) -Greedy action tensor([ 2.0058, -0.8104, -0.3465, 0.5348]) tensor([0.7222, 0.0432, 0.0687, 0.1659]) -Greedy action tensor([ 0.4544, -0.4873, -0.0468, 0.1673]) tensor([0.3641, 0.1420, 0.2206, 0.2733]) -Greedy action tensor([ 1.3638, -0.5983, -0.4601, 0.3331]) tensor([0.6029, 0.0847, 0.0973, 0.2151]) -Greedy action tensor([ 1.1828, -0.1826, -0.7323, -0.2314]) tensor([0.6076, 0.1551, 0.0895, 0.1477]) -Greedy action tensor([ 1.1631, -0.3060, -0.4670, 0.5430]) tensor([0.5092, 0.1172, 0.0998, 0.2739]) -Greedy action tensor([ 1.1282, -0.4117, -0.4688, 0.3221]) tensor([0.5366, 0.1151, 0.1087, 0.2397]) -Greedy action tensor([ 1.4830, -0.8130, -0.2731, 0.4915]) tensor([0.6081, 0.0612, 0.1050, 0.2256]) -Greedy action tensor([ 1.4926, -0.4544, -0.2246, 0.0155]) tensor([0.6449, 0.0920, 0.1158, 0.1472]) -Greedy action tensor([ 1.4737, -1.0090, -0.1064, 0.3810]) tensor([0.6155, 0.0514, 0.1268, 0.2064]) -Greedy action tensor([ 1.4656, 0.0260, -0.4643, 0.5148]) tensor([0.5654, 0.1340, 0.0821, 0.2185]) -Greedy action tensor([ 2.0244, -1.2098, -0.1566, 0.4435]) tensor([0.7363, 0.0290, 0.0831, 0.1515]) -Greedy action tensor([ 1.3455, 0.2317, -0.6880, 0.1312]) tensor([0.5694, 0.1870, 0.0745, 0.1691]) -Greedy action tensor([ 1.5777, -0.6877, -0.1789, -0.0050]) tensor([0.6748, 0.0700, 0.1165, 0.1386]) -Greedy action tensor([ 1.3835, -0.1656, -0.5283, 0.4548]) tensor([0.5697, 0.1210, 0.0842, 0.2251]) -Greedy action tensor([ 2.1070, -0.8949, -0.2116, 0.5795]) tensor([0.7325, 0.0364, 0.0721, 0.1590]) -Greedy action tensor([ 1.6269, -0.6596, -0.4417, 0.3433]) tensor([0.6644, 0.0675, 0.0840, 0.1841]) -Greedy action tensor([ 1.1695, -0.2317, -0.2010, 0.3080]) tensor([0.5201, 0.1281, 0.1321, 0.2197]) -Greedy action tensor([ 1.5736, -0.0480, -0.6730, 0.6783]) tensor([0.5842, 0.1154, 0.0618, 0.2386]) -Greedy action tensor([ 2.1479, -1.0431, -0.3248, 0.7663]) tensor([0.7264, 0.0299, 0.0613, 0.1824]) -Greedy action tensor([ 1.3730, -0.0568, -0.3317, 0.3548]) tensor([0.5610, 0.1343, 0.1020, 0.2027]) -Greedy action tensor([ 2.1504, -1.3515, -0.2403, 0.6258]) tensor([0.7466, 0.0225, 0.0684, 0.1625]) -Greedy action tensor([ 1.0605, -0.4414, -0.3045, 0.3127]) tensor([0.5124, 0.1141, 0.1309, 0.2426]) -Greedy action tensor([ 1.0261, -0.4081, -0.0095, 0.3643]) tensor([0.4741, 0.1130, 0.1683, 0.2446]) -Greedy action tensor([ 0.8706, -0.2594, 0.1590, 0.1713]) tensor([0.4328, 0.1398, 0.2124, 0.2150]) -Greedy action tensor([ 1.4875, -0.6591, -0.3284, 0.1072]) tensor([0.6531, 0.0763, 0.1063, 0.1643]) -Greedy action tensor([ 1.8098, -0.6960, -0.4231, 0.3166]) tensor([0.7075, 0.0577, 0.0758, 0.1589]) -Greedy action tensor([ 1.8561, -0.4832, -0.7917, 1.0259]) tensor([0.6238, 0.0601, 0.0442, 0.2719]) -Greedy action tensor([ 0.9257, -0.7514, 0.2686, -0.0723]) tensor([0.4822, 0.0901, 0.2499, 0.1777]) -Greedy action tensor([ 1.1776, -0.2519, -0.2694, 0.2067]) tensor([0.5395, 0.1292, 0.1269, 0.2043]) -Greedy action tensor([ 0.2965, -0.4468, -0.2284, 0.0813]) tensor([0.3480, 0.1655, 0.2059, 0.2806]) -Greedy action tensor([ 1.3012, -0.1336, -0.9407, 0.3390]) tensor([0.5792, 0.1380, 0.0615, 0.2213]) -Greedy action tensor([ 1.2669, -0.4286, -0.4798, 0.0692]) tensor([0.6025, 0.1106, 0.1050, 0.1819]) -Greedy action tensor([ 1.5606, -0.4746, -0.2695, 0.0405]) tensor([0.6624, 0.0865, 0.1062, 0.1449]) -Greedy action tensor([ 1.5107, -0.5875, -0.0646, 0.0767]) tensor([0.6378, 0.0782, 0.1320, 0.1520]) -Greedy action tensor([ 1.8374, -0.8030, -0.2579, 0.5006]) tensor([0.6863, 0.0490, 0.0844, 0.1803]) -Greedy action tensor([ 1.8891, -0.7545, 0.5199, -0.2824]) tensor([0.6947, 0.0494, 0.1767, 0.0792]) -Greedy action tensor([ 1.8948, -0.3491, -0.2250, 0.0118]) tensor([0.7256, 0.0769, 0.0871, 0.1104]) -Greedy action tensor([ 1.1408, -0.2357, -0.6069, 0.3196]) tensor([0.5357, 0.1353, 0.0933, 0.2357]) -Greedy action tensor([ 0.6913, 0.0291, -0.0395, -0.0370]) tensor([0.4032, 0.2080, 0.1942, 0.1947]) -Greedy action tensor([ 1.4764, -0.1367, -1.1337, 0.5396]) tensor([0.6007, 0.1197, 0.0442, 0.2354]) -Greedy action tensor([ 1.4571, 0.1306, -0.5048, 0.3009]) tensor([0.5812, 0.1542, 0.0817, 0.1829]) -Greedy action tensor([ 1.7442, -0.6113, -0.3935, 0.5462]) tensor([0.6603, 0.0626, 0.0779, 0.1993]) -Greedy action tensor([ 1.2703, -0.4198, -0.0909, -0.1342]) tensor([0.5930, 0.1094, 0.1520, 0.1456]) -Greedy action tensor([ 1.6594, -0.2460, -0.1305, 0.4967]) tensor([0.6141, 0.0914, 0.1025, 0.1920]) -Greedy action tensor([ 0.9923, -0.5373, -0.1783, 0.0296]) tensor([0.5239, 0.1135, 0.1625, 0.2001]) -Greedy action tensor([ 1.7584, -0.5858, -0.2217, 0.2973]) tensor([0.6821, 0.0654, 0.0942, 0.1583]) -Greedy action tensor([ 1.3516, -0.5371, -0.3095, 0.6044]) tensor([0.5510, 0.0833, 0.1047, 0.2610]) -Greedy action tensor([ 1.3113, -0.5196, -0.4062, -0.1227]) tensor([0.6337, 0.1016, 0.1138, 0.1510]) -Greedy action tensor([ 1.6531, -0.8120, -0.2523, 0.4667]) tensor([0.6497, 0.0552, 0.0967, 0.1984]) -Greedy action tensor([ 2.2214, -0.7455, -0.5893, 0.3397]) tensor([0.7912, 0.0407, 0.0476, 0.1205]) -Greedy action tensor([ 1.5870, -0.1611, -0.6067, 0.2521]) tensor([0.6457, 0.1124, 0.0720, 0.1699]) -Greedy action tensor([ 1.4045, -0.1411, -0.7180, 0.5331]) tensor([0.5710, 0.1217, 0.0684, 0.2389]) -Greedy action tensor([ 1.6749, -0.4538, -0.4130, 0.0582]) tensor([0.6937, 0.0826, 0.0860, 0.1377]) -Greedy action tensor([ 2.0370, -1.1148, -0.0569, 0.4659]) tensor([0.7279, 0.0311, 0.0897, 0.1513]) -Greedy action tensor([ 1.5431, -0.6984, -0.5296, 0.5498]) tensor([0.6240, 0.0663, 0.0785, 0.2311]) -Greedy action tensor([ 1.7951, -0.5025, -0.3675, 0.7186]) tensor([0.6426, 0.0646, 0.0739, 0.2190]) -Greedy action tensor([ 1.1936, -0.3507, -0.5840, 0.2591]) tensor([0.5633, 0.1202, 0.0952, 0.2213]) -Greedy action tensor([ 2.0530, -0.3626, -0.5798, 0.4676]) tensor([0.7320, 0.0654, 0.0526, 0.1500]) -Greedy action tensor([ 1.2551, -0.4083, -0.3471, 0.1694]) tensor([0.5785, 0.1096, 0.1165, 0.1953]) -Greedy action tensor([ 1.1819, -0.4600, -0.1795, 0.3129]) tensor([0.5350, 0.1036, 0.1371, 0.2243]) -Greedy action tensor([ 1.2702, -0.4133, -0.4575, 0.4303]) tensor([0.5570, 0.1035, 0.0990, 0.2405]) -Greedy action tensor([ 2.3209, -0.7992, 0.9844, -0.2056]) tensor([0.7210, 0.0318, 0.1895, 0.0576]) -Greedy action tensor([ 1.8059, -0.2209, -0.9160, 1.1485]) tensor([0.5829, 0.0768, 0.0383, 0.3020]) -Greedy action tensor([ 0.9781, -0.1228, -0.5122, 0.2899]) tensor([0.4854, 0.1614, 0.1094, 0.2439]) -Greedy action tensor([ 1.2244, -0.0744, -0.3721, 0.4500]) tensor([0.5164, 0.1409, 0.1046, 0.2381]) -Greedy action tensor([ 1.6013, -0.3098, -0.8086, 0.2505]) tensor([0.6681, 0.0988, 0.0600, 0.1731]) -Greedy action tensor([ 1.3842, -0.1280, -0.4488, 0.4276]) tensor([0.5667, 0.1249, 0.0906, 0.2177]) -Greedy action tensor([ 1.8256, -1.0794, -0.1886, 0.6452]) tensor([0.6687, 0.0366, 0.0892, 0.2054]) -Greedy action tensor([ 1.0968, -0.7355, -0.2517, 0.4246]) tensor([0.5181, 0.0829, 0.1345, 0.2645]) -Greedy action tensor([ 0.8319, -0.5647, -0.0800, -0.1430]) tensor([0.4935, 0.1221, 0.1983, 0.1862]) -Greedy action tensor([ 0.1272, 0.0666, -0.0170, -0.0391]) tensor([0.2737, 0.2576, 0.2369, 0.2318]) -Greedy action tensor([ 0.8049, -0.3665, -0.0074, -0.0912]) tensor([0.4625, 0.1434, 0.2053, 0.1888]) -Greedy action tensor([ 0.7531, -0.9028, -0.0972, -0.3634]) tensor([0.5140, 0.0981, 0.2196, 0.1683]) -Greedy action tensor([ 0.5415, 0.0568, -0.0494, -0.2618]) tensor([0.3820, 0.2353, 0.2116, 0.1711]) -Greedy action tensor([ 0.4547, -0.2831, 0.0488, -0.1279]) tensor([0.3700, 0.1769, 0.2465, 0.2066]) -Greedy action tensor([ 0.6901, -0.6631, -0.0898, -0.2826]) tensor([0.4773, 0.1233, 0.2188, 0.1805]) -Greedy action tensor([ 0.6049, -0.2273, -0.0242, -0.0628]) tensor([0.4031, 0.1754, 0.2149, 0.2067]) -Greedy action tensor([ 0.5055, -0.2063, -0.0121, -0.3255]) tensor([0.3965, 0.1946, 0.2363, 0.1727]) -Greedy action tensor([ 0.4106, -0.2520, -0.0981, -0.1426]) tensor([0.3715, 0.1915, 0.2234, 0.2136]) -Greedy action tensor([ 0.7225, -0.5430, 0.0413, -0.2688]) tensor([0.4631, 0.1307, 0.2344, 0.1719]) -Greedy action tensor([ 1.1647, -0.8168, 0.3920, -0.7634]) tensor([0.5731, 0.0790, 0.2646, 0.0833]) -Greedy action tensor([ 1.4275, -0.9035, 0.0834, -0.9794]) tensor([0.6906, 0.0671, 0.1801, 0.0622]) -Greedy action tensor([ 0.5331, -0.1422, 0.0031, -0.3155]) tensor([0.3959, 0.2015, 0.2330, 0.1695]) -Greedy action tensor([ 0.6288, -0.1956, -0.1238, 0.0072]) tensor([0.4087, 0.1792, 0.1926, 0.2195]) -Greedy action tensor([ 0.9538, -0.7119, -0.2129, -0.3192]) tensor([0.5617, 0.1062, 0.1749, 0.1573]) -Greedy action tensor([ 0.8290, -0.3457, 0.0612, -0.5292]) tensor([0.4926, 0.1522, 0.2286, 0.1267]) -Greedy action tensor([ 0.9897, -0.6865, -0.0516, -0.4676]) tensor([0.5640, 0.1055, 0.1991, 0.1313]) -Greedy action tensor([ 0.3482, -0.3287, -0.1106, -0.0973]) tensor([0.3596, 0.1828, 0.2273, 0.2303]) -Greedy action tensor([ 0.6830, -0.6192, -0.1715, -0.3238]) tensor([0.4848, 0.1318, 0.2063, 0.1771]) -Greedy action tensor([ 0.7299, -0.4957, -0.0063, -0.1593]) tensor([0.4580, 0.1344, 0.2193, 0.1882]) -Greedy action tensor([ 0.9019, -0.6890, -0.0250, -0.4381]) tensor([0.5372, 0.1095, 0.2126, 0.1407]) -Greedy action tensor([ 0.5613, -0.2313, 0.0273, -0.1486]) tensor([0.3952, 0.1789, 0.2317, 0.1943]) -Greedy action tensor([ 0.8212, -0.6872, 0.0391, -0.6439]) tensor([0.5236, 0.1159, 0.2395, 0.1210]) -Greedy action tensor([ 0.7349, -0.6647, 0.0729, -0.5500]) tensor([0.4904, 0.1210, 0.2529, 0.1357]) -Greedy action tensor([ 0.8632, -0.5902, 0.0754, -0.4148]) tensor([0.5083, 0.1188, 0.2312, 0.1416]) -Greedy action tensor([ 0.2760, -0.1005, -0.3601, -0.4207]) tensor([0.3685, 0.2529, 0.1951, 0.1836]) -Greedy action tensor([ 0.7570, 0.3349, -0.1201, 0.0188]) tensor([0.3922, 0.2572, 0.1632, 0.1875]) -Greedy action tensor([ 0.4464, -0.0677, -0.2056, -0.2267]) tensor([0.3804, 0.2275, 0.1982, 0.1940]) -Greedy action tensor([ 0.7595, -1.0406, -0.0459, -0.4195]) tensor([0.5209, 0.0861, 0.2328, 0.1602]) -Greedy action tensor([ 0.2065, -0.1969, -0.0997, -0.1143]) tensor([0.3195, 0.2134, 0.2352, 0.2318]) -Greedy action tensor([ 0.6403, -0.4900, 0.0691, -0.3016]) tensor([0.4390, 0.1418, 0.2480, 0.1712]) -Greedy action tensor([ 0.3965, 0.1535, -0.1061, 0.0459]) tensor([0.3233, 0.2535, 0.1956, 0.2277]) -Greedy action tensor([ 0.7947, -0.4602, 0.0394, -0.3223]) tensor([0.4803, 0.1369, 0.2257, 0.1572]) -Greedy action tensor([ 0.3605, -0.0815, -0.0489, -0.3317]) tensor([0.3562, 0.2290, 0.2365, 0.1783]) -Greedy action tensor([ 0.6705, -0.3444, 0.0993, -0.3367]) tensor([0.4362, 0.1581, 0.2464, 0.1593]) -Greedy action tensor([ 0.3959, 0.0428, -0.1073, -0.0204]) tensor([0.3371, 0.2368, 0.2038, 0.2223]) -Greedy action tensor([ 1.1558, -0.9891, 0.1209, -0.6529]) tensor([0.6112, 0.0716, 0.2171, 0.1001]) -Greedy action tensor([0.2348, 0.0223, 0.0440, 0.5708]) tensor([0.2479, 0.2004, 0.2048, 0.3469]) -Greedy action tensor([ 0.5989, -0.6602, -0.1326, -0.3464]) tensor([0.4643, 0.1318, 0.2234, 0.1804]) -Greedy action tensor([ 0.5328, 0.1945, 0.0139, -0.3084]) tensor([0.3651, 0.2603, 0.2173, 0.1574]) -Greedy action tensor([ 1.0612, -0.6242, -0.1999, -0.5519]) tensor([0.5995, 0.1111, 0.1699, 0.1195]) -Greedy action tensor([ 0.4135, -0.3012, 0.0573, -0.0852]) tensor([0.3575, 0.1750, 0.2504, 0.2171]) -Greedy action tensor([ 0.4373, -0.3549, -0.0156, -0.4027]) tensor([0.3968, 0.1797, 0.2523, 0.1713]) -Greedy action tensor([ 1.1006, -0.5325, -0.1763, -0.2323]) tensor([0.5754, 0.1124, 0.1605, 0.1517]) -Greedy action tensor([ 0.5483, -0.3061, -0.0322, -0.5455]) tensor([0.4310, 0.1834, 0.2412, 0.1444]) -Greedy action tensor([ 0.7256, -0.5611, 0.0146, -0.5897]) tensor([0.4912, 0.1357, 0.2413, 0.1318]) -Greedy action tensor([ 0.7354, -0.3399, -0.0058, -0.3316]) tensor([0.4626, 0.1578, 0.2204, 0.1591]) -Greedy action tensor([ 0.3699, -0.0735, 0.0369, -0.3270]) tensor([0.3500, 0.2247, 0.2509, 0.1744]) -Greedy action tensor([ 0.5783, -0.0080, -0.1249, -0.2003]) tensor([0.3983, 0.2216, 0.1972, 0.1829]) -Greedy action tensor([ 0.8063, -0.5126, -0.1062, -0.2867]) tensor([0.4990, 0.1334, 0.2003, 0.1673]) -Greedy action tensor([ 1.0184, -1.0965, 0.0456, -0.5022]) tensor([0.5823, 0.0703, 0.2201, 0.1273]) -Greedy action tensor([ 0.8832, -0.3210, -0.2326, -0.2077]) tensor([0.5093, 0.1528, 0.1669, 0.1711]) -Greedy action tensor([ 0.5206, 0.0027, 0.0707, -0.0919]) tensor([0.3603, 0.2147, 0.2298, 0.1953]) -Greedy action tensor([ 0.9567, -0.3414, -0.0660, -0.3169]) tensor([0.5229, 0.1428, 0.1880, 0.1463]) -Greedy action tensor([ 1.1016, -0.6261, -0.0566, -0.3973]) tensor([0.5831, 0.1036, 0.1831, 0.1302]) -Greedy action tensor([ 0.3148, 0.2144, 0.0367, -0.3608]) tensor([0.3154, 0.2853, 0.2388, 0.1605]) -Greedy action tensor([ 0.6359, -0.5275, -0.0402, -0.0853]) tensor([0.4334, 0.1354, 0.2204, 0.2107]) -Greedy action tensor([ 0.7026, -0.0546, -0.2127, -0.2081]) tensor([0.4402, 0.2065, 0.1763, 0.1771]) -Greedy action tensor([ 0.7492, -0.6035, -0.2377, -0.1558]) tensor([0.4912, 0.1270, 0.1831, 0.1987]) -Greedy action tensor([ 0.5618, -0.4324, -0.0586, -0.2560]) tensor([0.4257, 0.1575, 0.2289, 0.1879]) -Greedy action tensor([ 0.5303, 0.0705, -0.0784, -0.3283]) tensor([0.3847, 0.2429, 0.2093, 0.1630]) -Greedy action tensor([ 0.6377, 0.1689, -0.2638, -0.0657]) tensor([0.3958, 0.2477, 0.1607, 0.1959]) -Greedy action tensor([ 0.5686, -0.4723, 0.2544, -0.4187]) tensor([0.4071, 0.1438, 0.2974, 0.1517]) -Greedy action tensor([ 0.7174, -0.6280, -0.0770, -0.5104]) tensor([0.4987, 0.1299, 0.2253, 0.1461]) -Greedy action tensor([ 0.5506, -0.3334, -0.0430, -0.1545]) tensor([0.4066, 0.1680, 0.2246, 0.2009]) -Greedy action tensor([ 0.5664, -0.0904, -0.1935, -0.1125]) tensor([0.4011, 0.2079, 0.1876, 0.2034]) -Greedy action tensor([ 0.8823, -0.5334, 0.0987, -0.2215]) tensor([0.4923, 0.1195, 0.2249, 0.1633]) -Greedy action tensor([ 0.5254, -0.2658, -0.1360, -0.2940]) tensor([0.4149, 0.1881, 0.2141, 0.1829]) -Greedy action tensor([ 0.6482, 0.0578, -0.3801, -0.2605]) tensor([0.4320, 0.2394, 0.1545, 0.1741]) -Greedy action tensor([ 0.5888, -0.4322, 0.0196, -0.2422]) tensor([0.4234, 0.1525, 0.2396, 0.1844]) -Greedy action tensor([ 3.7517e-01, 2.8013e-04, -4.2683e-02, -7.0761e-02]) tensor([0.3349, 0.2302, 0.2205, 0.2144]) -Greedy action tensor([ 0.7060, -0.4041, 0.0150, -0.4945]) tensor([0.4691, 0.1546, 0.2351, 0.1412]) -Greedy action tensor([ 0.5356, -0.2427, 0.0833, -0.3247]) tensor([0.3971, 0.1823, 0.2526, 0.1680]) -Greedy action tensor([ 0.6804, 0.1365, -0.0258, -0.3868]) tensor([0.4136, 0.2401, 0.2041, 0.1423]) -Greedy action tensor([ 0.9229, -1.1806, 0.1014, -0.7650]) tensor([0.5725, 0.0699, 0.2518, 0.1059]) -Greedy action tensor([ 0.3268, -0.3232, -0.1072, -0.2635]) tensor([0.3671, 0.1916, 0.2378, 0.2034]) -Greedy action tensor([ 0.9814, -0.5714, 0.0900, -0.4068]) tensor([0.5344, 0.1131, 0.2192, 0.1333]) -Greedy action tensor([ 0.7825, -0.5948, -0.1211, -0.3130]) tensor([0.5021, 0.1266, 0.2034, 0.1679]) -Greedy action tensor([ 0.6671, -0.4381, -0.0357, -0.2757]) tensor([0.4513, 0.1494, 0.2235, 0.1758]) -Greedy action tensor([ 0.5685, -0.2837, -0.0687, -0.2744]) tensor([0.4192, 0.1788, 0.2216, 0.1804]) -Greedy action tensor([-1.6673, -0.3307, 0.6169, -0.0468]) tensor([0.0508, 0.1934, 0.4989, 0.2569]) -Greedy action tensor([-1.4655, 0.5555, 0.3046, 0.0187]) tensor([0.0531, 0.4008, 0.3118, 0.2343]) -Greedy action tensor([-1.1368, -0.3905, 0.3514, 0.1641]) tensor([0.0892, 0.1881, 0.3951, 0.3276]) -Greedy action tensor([-1.8893, -0.4221, 0.6301, -0.1500]) tensor([0.0426, 0.1849, 0.5297, 0.2428]) -Greedy action tensor([-1.9071, -0.4503, 0.6518, -0.1629]) tensor([0.0418, 0.1793, 0.5399, 0.2390]) -Greedy action tensor([-1.9206, -0.4267, 0.6567, -0.1659]) tensor([0.0410, 0.1826, 0.5395, 0.2370]) -Greedy action tensor([-1.7479, -0.3144, 0.5472, -0.1046]) tensor([0.0493, 0.2067, 0.4892, 0.2549]) -Greedy action tensor([-1.8986, -0.4551, 0.6498, -0.1574]) tensor([0.0421, 0.1785, 0.5389, 0.2404]) -Greedy action tensor([-1.4301, -0.5234, 0.3914, 0.1018]) tensor([0.0700, 0.1733, 0.4327, 0.3239]) -Greedy action tensor([-1.8500, -0.4596, 0.6265, -0.1231]) tensor([0.0444, 0.1782, 0.5280, 0.2495]) -Greedy action tensor([-1.6758, -0.4809, 0.5688, 0.0672]) tensor([0.0514, 0.1698, 0.4851, 0.2937]) -Greedy action tensor([-0.6988, 0.0614, 0.8928, 1.3297]) tensor([0.0639, 0.1366, 0.3138, 0.4857]) -Greedy action tensor([-1.9278, -0.4451, 0.6607, -0.1707]) tensor([0.0408, 0.1797, 0.5430, 0.2365]) -Greedy action tensor([-1.8875, -0.3821, 0.6322, -0.1443]) tensor([0.0423, 0.1906, 0.5254, 0.2417]) -Greedy action tensor([-0.9556, -0.6153, 0.1779, 0.3575]) tensor([0.1083, 0.1523, 0.3366, 0.4028]) -Greedy action tensor([-1.9391, -0.4528, 0.6680, -0.1750]) tensor([0.0403, 0.1781, 0.5464, 0.2352]) -Greedy action tensor([-1.8559, -0.4043, 0.6148, -0.1451]) tensor([0.0442, 0.1887, 0.5227, 0.2445]) -Greedy action tensor([-1.8558, -0.3199, 0.6163, -0.1266]) tensor([0.0432, 0.2008, 0.5122, 0.2437]) -Greedy action tensor([-1.7710, -0.3189, 0.5675, -0.0254]) tensor([0.0468, 0.1999, 0.4851, 0.2681]) -Greedy action tensor([-1.4873, -0.3629, 0.4209, -0.0151]) tensor([0.0659, 0.2028, 0.4441, 0.2872]) -Greedy action tensor([-1.3092, -0.3215, -0.3544, -0.3317]) tensor([0.1118, 0.3003, 0.2906, 0.2973]) -Greedy action tensor([-1.9067, -0.4382, 0.6457, -0.1572]) tensor([0.0418, 0.1815, 0.5364, 0.2403]) -Greedy action tensor([-1.8739, -0.3868, 0.6280, -0.1380]) tensor([0.0429, 0.1899, 0.5238, 0.2435]) -Greedy action tensor([-1.2471, -0.6481, 0.2989, 0.2305]) tensor([0.0841, 0.1530, 0.3945, 0.3684]) -Greedy action tensor([-1.3558, -0.5885, 0.4295, 0.3896]) tensor([0.0674, 0.1451, 0.4016, 0.3859]) -Greedy action tensor([-1.7455, -0.5091, 0.8236, 0.2552]) tensor([0.0402, 0.1383, 0.5244, 0.2971]) -Greedy action tensor([-1.8692, -0.4542, 0.6274, -0.1601]) tensor([0.0439, 0.1807, 0.5329, 0.2425]) -Greedy action tensor([-0.8460, 0.4546, 0.0921, 0.0262]) tensor([0.1040, 0.3817, 0.2656, 0.2487]) -Greedy action tensor([-1.8264, -0.4804, 0.6070, -0.1361]) tensor([0.0462, 0.1774, 0.5262, 0.2503]) -Greedy action tensor([-1.8611, -0.3981, 0.6525, -0.0964]) tensor([0.0425, 0.1837, 0.5253, 0.2484]) -Greedy action tensor([-1.8242, -0.4178, 0.6674, 0.0090]) tensor([0.0427, 0.1743, 0.5159, 0.2671]) -Greedy action tensor([-1.8861, -0.4407, 0.6475, -0.1416]) tensor([0.0424, 0.1801, 0.5346, 0.2429]) -Greedy action tensor([-1.8435, -0.4410, 0.6316, -0.1028]) tensor([0.0441, 0.1795, 0.5247, 0.2517]) -Greedy action tensor([-1.6246, -0.4810, 0.5658, 0.1331]) tensor([0.0530, 0.1662, 0.4735, 0.3072]) -Greedy action tensor([-1.8825, -0.4457, 0.6360, -0.1483]) tensor([0.0430, 0.1807, 0.5330, 0.2433]) -Greedy action tensor([-1.9288, -0.4614, 0.6602, -0.1716]) tensor([0.0409, 0.1774, 0.5446, 0.2370]) -Greedy action tensor([-1.6709, -0.5170, 0.5291, 0.0200]) tensor([0.0537, 0.1703, 0.4847, 0.2913]) -Greedy action tensor([-1.2882, -0.5956, 0.4120, 0.3687]) tensor([0.0729, 0.1457, 0.3991, 0.3822]) -Greedy action tensor([-1.5014, -0.5548, 0.4602, 0.0788]) tensor([0.0643, 0.1658, 0.4575, 0.3124]) -Greedy action tensor([-0.9046, 0.0402, 0.5520, 1.0667]) tensor([0.0665, 0.1710, 0.2853, 0.4773]) -Greedy action tensor([-1.9251, -0.4587, 0.6705, -0.1602]) tensor([0.0407, 0.1763, 0.5454, 0.2376]) -Greedy action tensor([-1.9428, -0.4513, 0.6675, -0.1795]) tensor([0.0402, 0.1786, 0.5468, 0.2344]) -Greedy action tensor([-0.9908, 0.6115, -0.2103, -0.5568]) tensor([0.1032, 0.5123, 0.2252, 0.1593]) -Greedy action tensor([-1.8155, -0.3722, 0.5911, -0.1370]) tensor([0.0461, 0.1952, 0.5116, 0.2470]) -Greedy action tensor([-1.8191, -0.4287, 0.6012, -0.1221]) tensor([0.0460, 0.1849, 0.5178, 0.2512]) -Greedy action tensor([-1.5916, 0.2596, 0.4397, 0.1469]) tensor([0.0484, 0.3079, 0.3687, 0.2751]) -Greedy action tensor([-1.8716, -0.3707, 0.6261, -0.1381]) tensor([0.0429, 0.1925, 0.5216, 0.2429]) -Greedy action tensor([-0.7662, 0.8430, 0.0265, 0.2526]) tensor([0.0911, 0.4553, 0.2012, 0.2523]) -Greedy action tensor([-1.9291, -0.4135, 0.6565, -0.1729]) tensor([0.0406, 0.1849, 0.5392, 0.2353]) -Greedy action tensor([-1.5987, -0.4147, 0.7157, 0.2696]) tensor([0.0479, 0.1566, 0.4850, 0.3104]) -Greedy action tensor([-0.7230, 0.7386, -0.4595, -0.3052]) tensor([0.1230, 0.5303, 0.1600, 0.1867]) -Greedy action tensor([-1.8020, -0.3491, 0.6196, -0.0917]) tensor([0.0453, 0.1937, 0.5104, 0.2506]) -Greedy action tensor([-0.9425, 0.5992, 0.1957, -0.1854]) tensor([0.0915, 0.4277, 0.2857, 0.1951]) -Greedy action tensor([-1.6035, -0.4132, 0.4701, 0.0032]) tensor([0.0580, 0.1909, 0.4617, 0.2894]) -Greedy action tensor([-1.9302, -0.4217, 0.6569, -0.1733]) tensor([0.0406, 0.1837, 0.5402, 0.2355]) -Greedy action tensor([-1.8935, -0.3795, 0.6493, -0.1237]) tensor([0.0414, 0.1883, 0.5270, 0.2433]) -Greedy action tensor([-1.8887, -0.4537, 0.6370, -0.1643]) tensor([0.0429, 0.1802, 0.5363, 0.2406]) -Greedy action tensor([-1.8884, -0.3935, 0.6326, -0.1575]) tensor([0.0425, 0.1894, 0.5284, 0.2398]) -Greedy action tensor([-1.1883, 0.0780, 0.3062, 0.0108]) tensor([0.0812, 0.2879, 0.3617, 0.2692]) -Greedy action tensor([-0.8579, -0.2210, -0.4469, -0.3109]) tensor([0.1632, 0.3086, 0.2462, 0.2820]) -Greedy action tensor([-1.8658, -0.3438, 0.6175, -0.1269]) tensor([0.0430, 0.1970, 0.5152, 0.2447]) -Greedy action tensor([-0.9965, 0.8830, 0.1097, 0.2503]) tensor([0.0712, 0.4661, 0.2151, 0.2476]) -Greedy action tensor([-1.9090, -0.4316, 0.6466, -0.1568]) tensor([0.0416, 0.1824, 0.5360, 0.2400]) -Greedy action tensor([-1.0233, -0.6479, 0.1852, 0.2659]) tensor([0.1060, 0.1543, 0.3550, 0.3847]) -Greedy action tensor([0.4781, 1.2446, 0.1955, 0.9478]) tensor([0.1816, 0.3909, 0.1369, 0.2905]) -Greedy action tensor([-1.9176, -0.3900, 0.6472, -0.1588]) tensor([0.0410, 0.1887, 0.5325, 0.2378]) -Greedy action tensor([-1.6603, -0.4813, 0.5752, 0.1507]) tensor([0.0507, 0.1649, 0.4742, 0.3102]) -Greedy action tensor([ 0.0445, 1.0410, -0.2704, -0.1039]) tensor([0.1887, 0.5110, 0.1377, 0.1626]) -Greedy action tensor([-1.8776, -0.3439, 0.6177, -0.1350]) tensor([0.0426, 0.1975, 0.5166, 0.2433]) -Greedy action tensor([-1.8010, -0.3334, 0.6486, 0.0070]) tensor([0.0434, 0.1885, 0.5032, 0.2649]) -Greedy action tensor([-1.8020, -0.2461, 0.5700, -0.1274]) tensor([0.0459, 0.2174, 0.4918, 0.2449]) -Greedy action tensor([-1.9404, -0.4472, 0.6655, -0.1783]) tensor([0.0403, 0.1794, 0.5457, 0.2347]) -Greedy action tensor([-1.5828, -0.5368, 0.5051, -0.0843]) tensor([0.0610, 0.1737, 0.4923, 0.2730]) -Greedy action tensor([-1.6849, 0.1709, 0.4742, -0.0931]) tensor([0.0477, 0.3050, 0.4131, 0.2342]) -Greedy action tensor([-1.7818, -0.4707, 0.5807, -0.0480]) tensor([0.0476, 0.1768, 0.5058, 0.2698]) -Greedy action tensor([-1.9059, -0.4320, 0.6539, -0.1562]) tensor([0.0416, 0.1815, 0.5377, 0.2392]) -Greedy action tensor([-1.2423, -0.5462, 0.3713, 0.4985]) tensor([0.0728, 0.1461, 0.3657, 0.4153]) -Greedy action tensor([-1.8725, -0.3038, 0.6357, -0.1245]) tensor([0.0420, 0.2015, 0.5155, 0.2410]) -Greedy action tensor([-1.0439, -0.6374, 0.2812, 0.0595]) tensor([0.1078, 0.1618, 0.4055, 0.3249]) -Greedy action tensor([-1.8401, -0.7178, 1.1003, 0.1339]) tensor([0.0331, 0.1017, 0.6267, 0.2384]) -Greedy action tensor([-1.8712, -0.4591, 0.6346, -0.1325]) tensor([0.0434, 0.1781, 0.5316, 0.2469]) -Greedy action tensor([ 0.2439, -1.5411, 0.8636, 0.1811]) tensor([0.2522, 0.0423, 0.4687, 0.2368]) -Greedy action tensor([-0.3893, -0.2928, -0.6688, -0.7178]) tensor([0.2795, 0.3078, 0.2114, 0.2013]) -Greedy action tensor([ 0.4702, -0.4917, -0.9261, -0.2641]) tensor([0.4740, 0.1812, 0.1173, 0.2275]) -Greedy action tensor([ 0.9913, 0.2776, 0.8240, -0.1332]) tensor([0.3759, 0.1841, 0.3180, 0.1221]) -Greedy action tensor([ 1.4962, 0.8183, 0.8149, -0.2675]) tensor([0.4576, 0.2324, 0.2316, 0.0784]) -Greedy action tensor([1.0094, 0.0292, 0.5838, 0.8611]) tensor([0.3459, 0.1298, 0.2260, 0.2983]) -Greedy action tensor([-1.1470, -0.5650, -0.4790, -0.1552]) tensor([0.1345, 0.2407, 0.2623, 0.3626]) -Greedy action tensor([-1.1174, -0.0089, 0.1527, -0.6839]) tensor([0.1095, 0.3317, 0.3899, 0.1689]) -Greedy action tensor([-0.9515, -0.7484, 0.7402, -0.4167]) tensor([0.1068, 0.1309, 0.5799, 0.1824]) -Greedy action tensor([ 0.5907, -0.7663, -0.0859, 0.9806]) tensor([0.3084, 0.0794, 0.1568, 0.4555]) -Greedy action tensor([ 1.4397, -1.3496, 1.2341, -0.3886]) tensor([0.4911, 0.0302, 0.3998, 0.0789]) -Greedy action tensor([ 0.5538, -0.9752, 0.7454, 0.6550]) tensor([0.2829, 0.0613, 0.3427, 0.3131]) -Greedy action tensor([ 1.1073, -0.4323, -0.4397, 1.1037]) tensor([0.4126, 0.0885, 0.0878, 0.4111]) -Greedy action tensor([1.6038, 0.2281, 0.5511, 0.0872]) tensor([0.5491, 0.1387, 0.1916, 0.1205]) -Greedy action tensor([ 0.2430, -0.5582, 0.7917, 1.2417]) tensor([0.1697, 0.0761, 0.2937, 0.4605]) -Greedy action tensor([ 0.0037, 0.1094, -0.5403, -0.1052]) tensor([0.2787, 0.3097, 0.1617, 0.2499]) -Greedy action tensor([ 0.2670, -1.4905, 0.8224, 0.5838]) tensor([0.2332, 0.0402, 0.4064, 0.3201]) -Greedy action tensor([ 0.5949, 0.9854, 0.8612, -0.1738]) tensor([0.2355, 0.3480, 0.3074, 0.1092]) -Greedy action tensor([ 1.2796, -0.2794, 0.5935, 0.5938]) tensor([0.4509, 0.0948, 0.2271, 0.2271]) -Greedy action tensor([-0.9029, -1.3954, -0.8854, -0.2474]) tensor([0.2195, 0.1342, 0.2234, 0.4229]) -Greedy action tensor([-0.4453, 0.2619, 0.2185, 0.5433]) tensor([0.1306, 0.2649, 0.2536, 0.3509]) -Greedy action tensor([ 0.2803, -0.8577, -0.2297, -0.0873]) tensor([0.3827, 0.1226, 0.2298, 0.2649]) -Greedy action tensor([-0.8193, -0.6994, 1.2782, -0.8003]) tensor([0.0886, 0.0998, 0.7214, 0.0903]) -Greedy action tensor([ 0.5354, -1.4359, -0.8325, 0.4353]) tensor([0.4350, 0.0606, 0.1108, 0.3936]) -Greedy action tensor([-0.0832, -1.0889, 0.4717, -0.2758]) tensor([0.2543, 0.0930, 0.4429, 0.2097]) -Greedy action tensor([ 0.4451, -1.0291, -0.9916, -0.2263]) tensor([0.5057, 0.1158, 0.1202, 0.2584]) -Greedy action tensor([ 1.5677, -0.1945, 1.0725, 1.0166]) tensor([0.4242, 0.0728, 0.2585, 0.2445]) -Greedy action tensor([-0.0301, -2.0605, -0.0049, 1.3711]) tensor([0.1608, 0.0211, 0.1650, 0.6531]) -Greedy action tensor([ 0.4226, -0.7285, -0.6660, 0.3480]) tensor([0.3874, 0.1225, 0.1304, 0.3596]) -Greedy action tensor([-0.7620, -0.6438, -0.7722, 0.0159]) tensor([0.1890, 0.2127, 0.1870, 0.4113]) -Greedy action tensor([-0.3856, 0.8614, 0.4019, -1.1151]) tensor([0.1397, 0.4860, 0.3070, 0.0673]) -Greedy action tensor([ 0.3255, -0.9794, -0.5404, 1.0583]) tensor([0.2651, 0.0719, 0.1115, 0.5516]) -Greedy action tensor([ 1.5620, -0.0591, -0.5508, 0.9151]) tensor([0.5428, 0.1073, 0.0656, 0.2843]) -Greedy action tensor([-0.8406, -0.1959, -1.0726, 0.0171]) tensor([0.1651, 0.3146, 0.1309, 0.3893]) -Greedy action tensor([-0.5717, -1.2890, 0.9061, -0.0358]) tensor([0.1319, 0.0644, 0.5782, 0.2254]) -Greedy action tensor([-6.0737e-04, -8.8572e-01, -5.1030e-01, 4.0291e-01]) tensor([0.2849, 0.1176, 0.1711, 0.4265]) -Greedy action tensor([ 0.2269, -1.1470, -0.1727, -0.5538]) tensor([0.4199, 0.1063, 0.2815, 0.1923]) -Greedy action tensor([ 1.9823, -1.1009, -0.0523, 0.8135]) tensor([0.6724, 0.0308, 0.0879, 0.2089]) -Greedy action tensor([ 1.1053, -1.1768, 0.9057, 1.1570]) tensor([0.3362, 0.0343, 0.2754, 0.3541]) -Greedy action tensor([-0.4112, 0.1065, -0.2032, -1.4136]) tensor([0.2338, 0.3924, 0.2879, 0.0858]) -Greedy action tensor([ 0.7248, -0.1324, 0.8126, 0.5021]) tensor([0.3015, 0.1279, 0.3292, 0.2413]) -Greedy action tensor([-1.1744, -0.0771, 0.6223, 0.1805]) tensor([0.0719, 0.2155, 0.4337, 0.2788]) -Greedy action tensor([-0.4777, -0.8127, 0.7874, -0.3608]) tensor([0.1567, 0.1121, 0.5552, 0.1761]) -Greedy action tensor([ 0.4347, -0.4499, 0.1887, 0.3763]) tensor([0.3187, 0.1316, 0.2492, 0.3006]) -Greedy action tensor([-0.9083, -0.4556, 1.5428, -0.0966]) tensor([0.0609, 0.0957, 0.7063, 0.1371]) -Greedy action tensor([ 0.2749, -0.9147, 0.1638, 0.1311]) tensor([0.3262, 0.0993, 0.2919, 0.2825]) -Greedy action tensor([ 0.1491, -1.2445, 0.7257, -0.7987]) tensor([0.2927, 0.0727, 0.5211, 0.1135]) -Greedy action tensor([ 0.5233, -0.8293, -1.0381, -0.1946]) tensor([0.5112, 0.1322, 0.1073, 0.2494]) -Greedy action tensor([ 0.0637, -0.4070, 1.6887, -0.3642]) tensor([0.1360, 0.0849, 0.6905, 0.0886]) -Greedy action tensor([ 0.8220, -0.4472, 1.0274, 0.5210]) tensor([0.3078, 0.0865, 0.3779, 0.2278]) -Greedy action tensor([-0.7516, -0.4782, 0.0891, -0.9269]) tensor([0.1828, 0.2402, 0.4236, 0.1534]) -Greedy action tensor([-0.6201, -0.5223, 1.2050, -0.4179]) tensor([0.1049, 0.1157, 0.6509, 0.1284]) -Greedy action tensor([ 0.6907, 0.6837, 0.7917, -0.6666]) tensor([0.2979, 0.2958, 0.3296, 0.0767]) -Greedy action tensor([-0.0684, -0.9623, 1.2472, -0.9120]) tensor([0.1797, 0.0735, 0.6696, 0.0773]) -Greedy action tensor([-1.2061, -0.8317, 1.1443, -0.9806]) tensor([0.0704, 0.1024, 0.7389, 0.0883]) -Greedy action tensor([-0.9335, 0.5309, 0.7917, -0.4982]) tensor([0.0801, 0.3464, 0.4497, 0.1238]) -Greedy action tensor([ 0.3096, -0.5867, -0.1354, 0.5929]) tensor([0.2962, 0.1209, 0.1898, 0.3932]) -Greedy action tensor([ 0.7296, -1.2199, 0.0166, 0.0354]) tensor([0.4690, 0.0668, 0.2299, 0.2343]) -Greedy action tensor([-1.9460, -0.6506, 1.4435, -0.6618]) tensor([0.0264, 0.0963, 0.7820, 0.0953]) -Greedy action tensor([-0.8137, -0.5055, 0.1186, -1.0619]) tensor([0.1760, 0.2395, 0.4471, 0.1373]) -Greedy action tensor([-1.1602, -1.7225, 0.3045, -0.2370]) tensor([0.1189, 0.0677, 0.5142, 0.2992]) -Greedy action tensor([ 1.5324, -1.8978, 0.6243, 0.9903]) tensor([0.4957, 0.0161, 0.1999, 0.2883]) -Greedy action tensor([-0.4095, -1.5362, -0.3155, 0.6279]) tensor([0.1907, 0.0618, 0.2095, 0.5381]) -Greedy action tensor([ 0.1959, -0.4187, 0.8156, 0.7959]) tensor([0.1915, 0.1036, 0.3559, 0.3490]) -Greedy action tensor([ 0.0070, -2.1629, 0.2112, 0.9357]) tensor([0.2053, 0.0234, 0.2518, 0.5195]) -Greedy action tensor([ 0.8320, -0.7419, 0.4105, 0.3254]) tensor([0.4055, 0.0840, 0.2661, 0.2443]) -Greedy action tensor([ 1.0000, -0.6331, 0.0368, 0.8521]) tensor([0.4099, 0.0801, 0.1565, 0.3536]) -Greedy action tensor([ 0.6433, -0.3680, 0.1536, 0.3017]) tensor([0.3721, 0.1354, 0.2281, 0.2645]) -Greedy action tensor([-0.0122, -0.3806, -1.2159, 0.1913]) tensor([0.3108, 0.2150, 0.0933, 0.3809]) -Greedy action tensor([0.5989, 0.0680, 0.7850, 0.0397]) tensor([0.2972, 0.1748, 0.3580, 0.1699]) -Greedy action tensor([ 0.5323, -1.4543, -0.0319, 0.0609]) tensor([0.4292, 0.0589, 0.2441, 0.2679]) -Greedy action tensor([ 0.6214, -0.7080, -0.3986, -0.0213]) tensor([0.4649, 0.1230, 0.1676, 0.2445]) -Greedy action tensor([-0.0734, -0.4835, 0.2705, 0.1996]) tensor([0.2279, 0.1512, 0.3214, 0.2994]) -Greedy action tensor([0.9370, 0.7314, 0.3099, 0.8712]) tensor([0.3044, 0.2479, 0.1626, 0.2851]) -Greedy action tensor([-0.3052, -0.4422, 0.4485, -0.6128]) tensor([0.2113, 0.1843, 0.4490, 0.1554]) -Greedy action tensor([ 1.2162, -1.7829, 0.0999, 1.1652]) tensor([0.4296, 0.0214, 0.1407, 0.4083]) -Greedy action tensor([ 0.6697, -1.3085, 0.7644, 0.0731]) tensor([0.3586, 0.0496, 0.3942, 0.1975]) -Greedy action tensor([ 0.0211, -1.7180, 0.4469, -0.5056]) tensor([0.3033, 0.0533, 0.4643, 0.1791]) -Greedy action tensor([-0.7094, -1.0793, 1.0647, 1.0095]) tensor([0.0760, 0.0525, 0.4478, 0.4238]) -Greedy action tensor([ 1.1578, -0.6590, 0.2354, 0.7749]) tensor([0.4460, 0.0725, 0.1773, 0.3041]) -Greedy action tensor([-0.4454, -0.4155, -0.5936, 0.1503]) tensor([0.2125, 0.2189, 0.1832, 0.3854]) -Greedy action tensor([ 1.0324, -0.5740, 0.1723, 0.0520]) tensor([0.5003, 0.1004, 0.2117, 0.1877]) -Greedy action tensor([ 0.8200, -0.4180, 0.0746, -0.1657]) tensor([0.4678, 0.1356, 0.2220, 0.1746]) -Greedy action tensor([ 0.5617, -0.0819, 0.1005, 0.1587]) tensor([0.3541, 0.1860, 0.2233, 0.2366]) -Greedy action tensor([ 0.7340, -0.6066, 0.0200, 0.1777]) tensor([0.4302, 0.1126, 0.2106, 0.2466]) -Greedy action tensor([ 1.1754, -0.4500, -0.2893, 0.2504]) tensor([0.5481, 0.1079, 0.1267, 0.2173]) -Greedy action tensor([ 1.9358, -0.6008, -0.1375, 0.4127]) tensor([0.7028, 0.0556, 0.0884, 0.1532]) -Greedy action tensor([ 1.1719, -0.0798, -0.2432, 0.4518]) tensor([0.4961, 0.1419, 0.1205, 0.2415]) -Greedy action tensor([ 1.1736, -0.3632, -0.0869, 0.2529]) tensor([0.5272, 0.1134, 0.1495, 0.2099]) -Greedy action tensor([ 1.3336, -0.2785, -0.4547, -0.0018]) tensor([0.6136, 0.1224, 0.1026, 0.1614]) -Greedy action tensor([ 1.8281, -0.4425, 0.1178, 0.5259]) tensor([0.6427, 0.0664, 0.1162, 0.1748]) -Greedy action tensor([ 2.3342, -0.7223, -0.2054, 1.1861]) tensor([0.6929, 0.0326, 0.0547, 0.2198]) -Greedy action tensor([ 1.1435, -0.6669, -0.1558, -0.3056]) tensor([0.5984, 0.0979, 0.1632, 0.1405]) -Greedy action tensor([ 1.5499, -0.0974, -0.5177, 0.2129]) tensor([0.6322, 0.1218, 0.0800, 0.1660]) -Greedy action tensor([ 1.1445, -0.6249, -0.2082, 0.2627]) tensor([0.5426, 0.0925, 0.1403, 0.2247]) -Greedy action tensor([ 2.0216, -0.4207, -0.5453, 0.2852]) tensor([0.7463, 0.0649, 0.0573, 0.1315]) -Greedy action tensor([ 2.1163, 0.1480, 0.0848, -0.3407]) tensor([0.7372, 0.1030, 0.0967, 0.0632]) -Greedy action tensor([ 1.6832, -0.2736, -0.1503, 0.3157]) tensor([0.6427, 0.0908, 0.1027, 0.1637]) -Greedy action tensor([ 1.1823, -0.3874, -0.5928, 0.3421]) tensor([0.5527, 0.1150, 0.0937, 0.2386]) -Greedy action tensor([ 1.8570, -0.4841, -0.7353, 0.3343]) tensor([0.7199, 0.0693, 0.0539, 0.1570]) -Greedy action tensor([ 1.0486, 0.0908, -0.3715, 0.4644]) tensor([0.4581, 0.1758, 0.1107, 0.2554]) -Greedy action tensor([ 1.9363, -0.1741, -0.3402, -0.1195]) tensor([0.7397, 0.0897, 0.0759, 0.0947]) -Greedy action tensor([ 1.4196, -0.0754, -0.4067, -0.1540]) tensor([0.6279, 0.1408, 0.1011, 0.1302]) -Greedy action tensor([ 1.3321, -0.2921, -1.0301, 0.6032]) tensor([0.5638, 0.1111, 0.0531, 0.2720]) -Greedy action tensor([ 2.0536, -0.7281, -0.5679, 0.4901]) tensor([0.7440, 0.0461, 0.0541, 0.1558]) -Greedy action tensor([ 1.5057, -0.5923, -0.3795, 0.7351]) tensor([0.5756, 0.0706, 0.0874, 0.2664]) -Greedy action tensor([ 0.9688, -0.2951, -0.3909, 0.0852]) tensor([0.5121, 0.1447, 0.1315, 0.2117]) -Greedy action tensor([ 2.0645, -0.2399, -0.6151, 0.4466]) tensor([0.7317, 0.0730, 0.0502, 0.1451]) -Greedy action tensor([ 0.9964, -0.4256, -0.2043, 0.2506]) tensor([0.4959, 0.1196, 0.1493, 0.2352]) -Greedy action tensor([ 1.2837, -0.2810, -0.4893, 0.2005]) tensor([0.5823, 0.1218, 0.0989, 0.1971]) -Greedy action tensor([ 1.0544, 0.3694, -0.5809, 0.3184]) tensor([0.4591, 0.2314, 0.0895, 0.2199]) -Greedy action tensor([ 1.7713, 0.0507, -0.6868, -0.2106]) tensor([0.7131, 0.1276, 0.0610, 0.0983]) -Greedy action tensor([ 1.5447, -0.4317, -0.3072, -0.0286]) tensor([0.6654, 0.0922, 0.1044, 0.1380]) -Greedy action tensor([ 1.1300, -0.3577, 0.0145, 0.0215]) tensor([0.5309, 0.1199, 0.1740, 0.1752]) -Greedy action tensor([ 1.5554, -0.5171, -0.3468, 0.2793]) tensor([0.6434, 0.0810, 0.0960, 0.1796]) -Greedy action tensor([ 2.7907, -0.8796, 0.8070, -0.0319]) tensor([0.8180, 0.0208, 0.1125, 0.0486]) -Greedy action tensor([ 0.5615, -0.2920, 0.0006, 0.0026]) tensor([0.3893, 0.1658, 0.2222, 0.2226]) -Greedy action tensor([ 2.4398, -1.6617, -0.0439, 0.4469]) tensor([0.8089, 0.0134, 0.0675, 0.1102]) -Greedy action tensor([ 2.2429, -1.0044, -0.1290, 0.8646]) tensor([0.7225, 0.0281, 0.0674, 0.1821]) -Greedy action tensor([ 1.0407, -0.5033, -0.1599, 0.6204]) tensor([0.4605, 0.0983, 0.1386, 0.3025]) -Greedy action tensor([ 0.9505, -0.1770, -0.3543, 0.0097]) tensor([0.5037, 0.1631, 0.1366, 0.1966]) -Greedy action tensor([ 1.3350, -0.0685, -0.2210, 0.2673]) tensor([0.5554, 0.1365, 0.1172, 0.1910]) -Greedy action tensor([ 1.5268, -0.6737, -0.2848, 0.2665]) tensor([0.6420, 0.0711, 0.1049, 0.1820]) -Greedy action tensor([ 1.7365, -0.9698, -0.2504, 0.8436]) tensor([0.6198, 0.0414, 0.0850, 0.2538]) -Greedy action tensor([ 1.4861, -0.2811, -0.1009, 0.6905]) tensor([0.5474, 0.0935, 0.1120, 0.2471]) -Greedy action tensor([ 1.4687, -0.5605, 0.1124, 0.0065]) tensor([0.6170, 0.0811, 0.1589, 0.1430]) -Greedy action tensor([ 1.4864, -0.5976, -0.1424, 0.0164]) tensor([0.6449, 0.0803, 0.1265, 0.1483]) -Greedy action tensor([ 2.0296, -0.6200, -0.7173, 1.1482]) tensor([0.6456, 0.0456, 0.0414, 0.2674]) -Greedy action tensor([ 1.4562, 0.0374, -0.4521, 0.2926]) tensor([0.5873, 0.1421, 0.0871, 0.1835]) -Greedy action tensor([ 1.3769, -0.1049, -1.1745, 0.4457]) tensor([0.5885, 0.1337, 0.0459, 0.2319]) -Greedy action tensor([ 1.0626, -0.5887, -0.4742, -0.0471]) tensor([0.5759, 0.1105, 0.1238, 0.1898]) -Greedy action tensor([ 1.9597, -0.4890, -0.5408, 0.6583]) tensor([0.6942, 0.0600, 0.0570, 0.1889]) -Greedy action tensor([ 1.3139, -0.3467, -0.0751, 0.3487]) tensor([0.5494, 0.1044, 0.1370, 0.2093]) -Greedy action tensor([ 1.2934, -0.2735, 0.0253, 0.3412]) tensor([0.5331, 0.1112, 0.1500, 0.2057]) -Greedy action tensor([ 1.2158, 0.0874, -0.8886, 0.2121]) tensor([0.5519, 0.1786, 0.0673, 0.2023]) -Greedy action tensor([ 1.4058, -0.1435, -0.5445, 0.2902]) tensor([0.5944, 0.1263, 0.0845, 0.1948]) -Greedy action tensor([ 0.8733, -0.5862, -0.2821, 0.3022]) tensor([0.4734, 0.1100, 0.1491, 0.2674]) -Greedy action tensor([ 1.2434, -0.4555, -0.1253, 0.0681]) tensor([0.5727, 0.1047, 0.1457, 0.1768]) -Greedy action tensor([ 0.6982, -0.5007, 0.0167, 0.2533]) tensor([0.4085, 0.1232, 0.2066, 0.2618]) -Greedy action tensor([ 1.2271, -0.3156, -0.6524, 0.5857]) tensor([0.5283, 0.1129, 0.0806, 0.2782]) -Greedy action tensor([ 1.5603, -0.4933, -0.3657, 0.4242]) tensor([0.6269, 0.0804, 0.0914, 0.2013]) -Greedy action tensor([ 1.4035, -0.5357, -0.2402, 0.1250]) tensor([0.6190, 0.0890, 0.1196, 0.1724]) -Greedy action tensor([ 1.5496, -0.0956, -0.7281, 0.5560]) tensor([0.6003, 0.1159, 0.0615, 0.2223]) -Greedy action tensor([ 1.5671, -0.0137, 0.0522, 0.2135]) tensor([0.5938, 0.1222, 0.1305, 0.1534]) -Greedy action tensor([ 1.7311, -0.1702, -0.7071, 0.2678]) tensor([0.6811, 0.1017, 0.0595, 0.1577]) -Greedy action tensor([ 1.3899, -0.2800, -0.1345, 0.2373]) tensor([0.5808, 0.1093, 0.1265, 0.1834]) -Greedy action tensor([ 0.9771, -0.6014, -0.0472, -0.0366]) tensor([0.5186, 0.1070, 0.1862, 0.1882]) -Greedy action tensor([ 1.7787, -1.4285, -0.0778, 0.0813]) tensor([0.7247, 0.0293, 0.1132, 0.1327]) -Greedy action tensor([ 1.5789, -0.3872, -0.4506, 0.4024]) tensor([0.6330, 0.0886, 0.0832, 0.1952]) -Greedy action tensor([ 1.6473, -1.0897, 0.0400, 0.3328]) tensor([0.6520, 0.0422, 0.1307, 0.1751]) -Greedy action tensor([ 1.5201, -0.2448, -0.3931, 0.4700]) tensor([0.5993, 0.1026, 0.0885, 0.2097]) -Greedy action tensor([ 0.8397, -0.6511, -0.0762, 0.2196]) tensor([0.4623, 0.1041, 0.1850, 0.2487]) -Greedy action tensor([ 1.4312, -0.6678, -0.4731, 0.2120]) tensor([0.6382, 0.0782, 0.0950, 0.1886]) -Greedy action tensor([ 1.4442, -0.4563, -0.4577, 0.2789]) tensor([0.6209, 0.0928, 0.0927, 0.1936]) -Greedy action tensor([ 1.0767, -0.1270, -0.0590, -0.0590]) tensor([0.5148, 0.1545, 0.1654, 0.1654]) -Greedy action tensor([ 1.8000, -0.1060, -0.9093, 0.5325]) tensor([0.6681, 0.0993, 0.0445, 0.1881]) -Greedy action tensor([ 1.6783, 0.2528, -0.5221, 0.2321]) tensor([0.6303, 0.1515, 0.0698, 0.1484]) -Greedy action tensor([ 1.1374, -0.1831, -0.0939, 0.1102]) tensor([0.5217, 0.1393, 0.1523, 0.1868]) -Greedy action tensor([ 1.2223, -0.3814, -0.0516, -0.0703]) tensor([0.5697, 0.1146, 0.1593, 0.1564]) -Greedy action tensor([ 1.6657, -0.3383, -0.0123, 0.5368]) tensor([0.6079, 0.0819, 0.1135, 0.1966]) -Greedy action tensor([ 0.6396, -0.3748, 0.0444, -0.0042]) tensor([0.4099, 0.1487, 0.2261, 0.2153]) -Greedy action tensor([ 1.8886, -0.1091, -0.7247, 0.8946]) tensor([0.6333, 0.0859, 0.0464, 0.2344]) -Greedy action tensor([ 1.0015, -0.9260, -0.1313, -0.4487]) tensor([0.5875, 0.0855, 0.1892, 0.1378]) -Greedy action tensor([ 0.6448, -0.3210, -0.2371, -0.1801]) tensor([0.4478, 0.1705, 0.1854, 0.1963]) -Greedy action tensor([ 0.6865, -0.3596, 0.1534, -0.2970]) tensor([0.4325, 0.1519, 0.2538, 0.1618]) -Greedy action tensor([ 0.5640, -0.1507, -0.0652, -0.1818]) tensor([0.4005, 0.1960, 0.2135, 0.1900]) -Greedy action tensor([ 0.6832, -0.5090, -0.0400, -0.3947]) tensor([0.4697, 0.1426, 0.2279, 0.1598]) -Greedy action tensor([ 0.3371, -0.4669, -0.1552, -0.1448]) tensor([0.3736, 0.1672, 0.2284, 0.2308]) -Greedy action tensor([0.2362, 0.2314, 0.1953, 0.3110]) tensor([0.2480, 0.2468, 0.2380, 0.2672]) -Greedy action tensor([ 0.8912, -0.4435, -0.1788, -0.2742]) tensor([0.5214, 0.1372, 0.1788, 0.1626]) -Greedy action tensor([ 0.8175, -0.2253, -0.1094, -0.1527]) tensor([0.4701, 0.1657, 0.1861, 0.1782]) -Greedy action tensor([ 0.6631, -0.7421, -0.0051, -0.3077]) tensor([0.4680, 0.1148, 0.2399, 0.1773]) -Greedy action tensor([ 0.2059, 0.2266, -0.1842, -0.2989]) tensor([0.3029, 0.3092, 0.2050, 0.1828]) -Greedy action tensor([ 0.8317, -0.4930, -0.0990, -0.3291]) tensor([0.5068, 0.1347, 0.1998, 0.1587]) -Greedy action tensor([ 0.5739, -0.3869, -0.1037, -0.2333]) tensor([0.4280, 0.1637, 0.2173, 0.1909]) -Greedy action tensor([ 0.6190, -0.1192, -0.0669, 0.0232]) tensor([0.3948, 0.1887, 0.1989, 0.2176]) -Greedy action tensor([ 0.5707, -0.4233, -0.1388, -0.3194]) tensor([0.4400, 0.1628, 0.2164, 0.1807]) -Greedy action tensor([ 0.9206, -0.8653, -0.0741, -0.3236]) tensor([0.5478, 0.0918, 0.2026, 0.1578]) -Greedy action tensor([ 0.4836, -0.4412, 0.0593, -0.1691]) tensor([0.3889, 0.1542, 0.2544, 0.2025]) -Greedy action tensor([ 0.8839, -0.7289, -0.1844, -0.3024]) tensor([0.5410, 0.1078, 0.1859, 0.1652]) -Greedy action tensor([ 1.1495, -0.6289, 0.0431, -0.3817]) tensor([0.5828, 0.0984, 0.1927, 0.1260]) -Greedy action tensor([ 0.8146, 0.0039, 0.0436, -0.4616]) tensor([0.4574, 0.2033, 0.2116, 0.1277]) -Greedy action tensor([ 0.7719, -0.7610, 0.0567, -0.5307]) tensor([0.5058, 0.1092, 0.2474, 0.1375]) -Greedy action tensor([ 0.2165, 0.0452, -0.0938, -0.2910]) tensor([0.3147, 0.2652, 0.2307, 0.1894]) -Greedy action tensor([ 1.2677, -0.5945, -0.0213, -0.5747]) tensor([0.6292, 0.0977, 0.1734, 0.0997]) -Greedy action tensor([ 0.4047, -0.1200, -0.0444, -0.1475]) tensor([0.3564, 0.2109, 0.2275, 0.2052]) -Greedy action tensor([ 0.5933, -0.2884, -0.1570, -0.2952]) tensor([0.4352, 0.1802, 0.2055, 0.1790]) -Greedy action tensor([ 1.0451, -1.0735, -0.1044, -0.5397]) tensor([0.6090, 0.0732, 0.1929, 0.1248]) -Greedy action tensor([ 0.6339, -0.1274, 0.0305, -0.2997]) tensor([0.4154, 0.1940, 0.2272, 0.1633]) -Greedy action tensor([ 0.2601, 0.1518, -0.0346, -0.3302]) tensor([0.3129, 0.2808, 0.2330, 0.1734]) -Greedy action tensor([ 0.9112, -0.5709, -0.0553, -0.2354]) tensor([0.5194, 0.1180, 0.1976, 0.1650]) -Greedy action tensor([ 5.4450e-01, -8.3502e-02, 2.2738e-04, 2.1928e-02]) tensor([0.3694, 0.1971, 0.2144, 0.2191]) -Greedy action tensor([ 0.5221, -0.0560, -0.0553, -0.2755]) tensor([0.3887, 0.2180, 0.2182, 0.1751]) -Greedy action tensor([ 0.7821, -0.7476, 0.1013, -0.4038]) tensor([0.4930, 0.1068, 0.2496, 0.1506]) -Greedy action tensor([ 0.5578, -0.0110, -0.0376, -0.3793]) tensor([0.3985, 0.2256, 0.2197, 0.1561]) -Greedy action tensor([ 0.9897, -0.3695, 0.0978, -0.2676]) tensor([0.5125, 0.1316, 0.2101, 0.1458]) -Greedy action tensor([ 0.3922, -0.2156, 0.0604, -0.1389]) tensor([0.3509, 0.1911, 0.2518, 0.2063]) -Greedy action tensor([ 0.9082, -0.9344, -0.0925, -0.3572]) tensor([0.5530, 0.0876, 0.2033, 0.1560]) -Greedy action tensor([ 0.6897, -0.3442, 0.0225, -0.4781]) tensor([0.4587, 0.1631, 0.2354, 0.1427]) -Greedy action tensor([ 0.9467, -0.5112, -0.1545, -0.4512]) tensor([0.5518, 0.1284, 0.1835, 0.1364]) -Greedy action tensor([ 0.6212, -0.4812, -0.1130, -0.1379]) tensor([0.4386, 0.1456, 0.2105, 0.2053]) -Greedy action tensor([ 0.6857, -0.4144, -0.1528, -0.2181]) tensor([0.4608, 0.1534, 0.1992, 0.1866]) -Greedy action tensor([ 0.9509, -0.5693, -0.1216, -0.4206]) tensor([0.5511, 0.1205, 0.1886, 0.1398]) -Greedy action tensor([ 0.8378, -1.1145, -0.0872, -0.3819]) tensor([0.5453, 0.0774, 0.2162, 0.1610]) -Greedy action tensor([ 0.4150, 0.2515, -0.3096, -0.1858]) tensor([0.3470, 0.2946, 0.1681, 0.1903]) -Greedy action tensor([ 0.7885, -0.0329, -0.1333, -0.1583]) tensor([0.4493, 0.1976, 0.1787, 0.1743]) -Greedy action tensor([ 0.9080, -0.4691, -0.0014, -0.3724]) tensor([0.5173, 0.1305, 0.2084, 0.1438]) -Greedy action tensor([ 0.6577, -0.4102, -0.1459, -0.3659]) tensor([0.4650, 0.1598, 0.2082, 0.1671]) -Greedy action tensor([ 0.6633, -0.4879, 0.1214, -0.4524]) tensor([0.4493, 0.1421, 0.2613, 0.1472]) -Greedy action tensor([ 1.1968, -0.4950, -0.1555, -0.3595]) tensor([0.6047, 0.1114, 0.1564, 0.1275]) -Greedy action tensor([ 0.6874, -0.4210, 0.1073, -0.3772]) tensor([0.4475, 0.1477, 0.2505, 0.1543]) -Greedy action tensor([ 0.4563, -0.0908, -0.0940, -0.1868]) tensor([0.3730, 0.2158, 0.2151, 0.1961]) -Greedy action tensor([ 0.2154, -0.2013, -0.2804, -0.4335]) tensor([0.3583, 0.2362, 0.2182, 0.1873]) -Greedy action tensor([ 0.6687, -0.2776, -0.4316, -0.5906]) tensor([0.4988, 0.1936, 0.1660, 0.1416]) -Greedy action tensor([ 0.7189, -0.5541, 0.0899, -0.5610]) tensor([0.4782, 0.1339, 0.2549, 0.1330]) -Greedy action tensor([ 0.3136, -0.0034, 0.0017, -0.5149]) tensor([0.3452, 0.2514, 0.2527, 0.1507]) -Greedy action tensor([ 0.3290, -0.2694, 0.0128, -0.4745]) tensor([0.3668, 0.2016, 0.2674, 0.1642]) -Greedy action tensor([ 0.6995, -0.5026, 0.0230, -0.2167]) tensor([0.4527, 0.1361, 0.2301, 0.1811]) -Greedy action tensor([ 0.4590, -0.0956, 0.0387, 0.0040]) tensor([0.3490, 0.2004, 0.2292, 0.2214]) -Greedy action tensor([ 0.9592, -0.5541, -0.0974, -0.3082]) tensor([0.5407, 0.1191, 0.1880, 0.1523]) -Greedy action tensor([ 0.9798, 0.1866, 0.0816, -0.0524]) tensor([0.4513, 0.2042, 0.1838, 0.1607]) -Greedy action tensor([ 1.1368, -0.3433, -0.1601, -0.0126]) tensor([0.5501, 0.1252, 0.1504, 0.1743]) -Greedy action tensor([ 0.8415, -0.4657, -0.0536, -0.1999]) tensor([0.4921, 0.1331, 0.2011, 0.1737]) -Greedy action tensor([ 0.6897, -0.3488, 0.1659, -0.2411]) tensor([0.4273, 0.1512, 0.2531, 0.1684]) -Greedy action tensor([ 0.5576, -0.4070, -0.0120, -0.1139]) tensor([0.4069, 0.1551, 0.2302, 0.2079]) -Greedy action tensor([ 0.3538, 0.1799, -0.2801, 0.0978]) tensor([0.3180, 0.2672, 0.1687, 0.2461]) -Greedy action tensor([ 0.5247, -0.2796, -0.0947, 0.0064]) tensor([0.3874, 0.1733, 0.2085, 0.2307]) -Greedy action tensor([ 1.0752, -0.8938, 0.0890, -0.3982]) tensor([0.5741, 0.0802, 0.2141, 0.1316]) -Greedy action tensor([ 0.6638, -0.3496, -0.0646, -0.2548]) tensor([0.4455, 0.1617, 0.2150, 0.1778]) -Greedy action tensor([ 0.4202, -0.3511, 0.2774, -0.5350]) tensor([0.3685, 0.1704, 0.3194, 0.1418]) -Greedy action tensor([ 0.4934, -0.0802, 0.1034, -0.2088]) tensor([0.3655, 0.2059, 0.2475, 0.1811]) -Greedy action tensor([ 0.9029, -1.1299, -0.0604, -0.4472]) tensor([0.5644, 0.0739, 0.2154, 0.1463]) -Greedy action tensor([ 0.4400, -0.5597, 0.0110, -0.6047]) tensor([0.4218, 0.1552, 0.2746, 0.1484]) -Greedy action tensor([ 0.7723, -0.4128, -0.0557, -0.2967]) tensor([0.4794, 0.1466, 0.2095, 0.1646]) -Greedy action tensor([ 1.0645, -0.5494, -0.0602, -0.5049]) tensor([0.5774, 0.1150, 0.1875, 0.1202]) -Greedy action tensor([ 1.1824, -0.6245, -0.1145, -0.7848]) tensor([0.6339, 0.1041, 0.1733, 0.0887]) -Greedy action tensor([ 0.3037, 0.1458, -0.0762, -0.2669]) tensor([0.3223, 0.2752, 0.2204, 0.1821]) -Greedy action tensor([ 0.4425, 0.0597, -0.0678, -0.2227]) tensor([0.3576, 0.2439, 0.2147, 0.1839]) -Greedy action tensor([ 0.4255, -0.0825, -0.1767, -0.2048]) tensor([0.3729, 0.2244, 0.2042, 0.1985]) -Greedy action tensor([ 0.7484, -0.4434, 0.0387, -0.1880]) tensor([0.4571, 0.1388, 0.2248, 0.1792]) -Greedy action tensor([ 0.5597, -0.3872, -0.0048, -0.3738]) tensor([0.4256, 0.1651, 0.2420, 0.1673]) -Greedy action tensor([ 0.9453, -0.5397, 0.0158, -0.1977]) tensor([0.5154, 0.1168, 0.2035, 0.1644]) -Greedy action tensor([ 0.6291, -0.7416, -0.0623, -0.2395]) tensor([0.4599, 0.1168, 0.2304, 0.1930]) -Greedy action tensor([-1.8251, -0.5063, 0.6281, -0.0999]) tensor([0.0455, 0.1701, 0.5290, 0.2554]) -Greedy action tensor([-0.9514, 0.9360, 0.1441, 0.2190]) tensor([0.0724, 0.4779, 0.2165, 0.2333]) -Greedy action tensor([-1.8782, -0.3998, 0.6273, -0.1560]) tensor([0.0430, 0.1888, 0.5273, 0.2409]) -Greedy action tensor([-1.7389, -0.4254, 0.5599, -0.0959]) tensor([0.0504, 0.1873, 0.5018, 0.2605]) -Greedy action tensor([-1.7004, -0.4871, 0.6002, 0.1624]) tensor([0.0481, 0.1619, 0.4801, 0.3099]) -Greedy action tensor([-1.6533, -0.4436, 0.5123, -0.0126]) tensor([0.0549, 0.1839, 0.4783, 0.2830]) -Greedy action tensor([-1.9261, -0.3893, 0.6506, -0.1658]) tensor([0.0406, 0.1889, 0.5343, 0.2362]) -Greedy action tensor([-1.9315, -0.4200, 0.6577, -0.1701]) tensor([0.0405, 0.1837, 0.5398, 0.2359]) -Greedy action tensor([-1.4653, 0.3833, 0.3595, -0.0802]) tensor([0.0570, 0.3619, 0.3534, 0.2277]) -Greedy action tensor([-1.8631, -0.4870, 0.6324, -0.1285]) tensor([0.0439, 0.1740, 0.5330, 0.2490]) -Greedy action tensor([-1.8979, -0.4333, 0.6332, -0.1489]) tensor([0.0423, 0.1830, 0.5316, 0.2432]) -Greedy action tensor([-1.9004, -0.4568, 0.6526, -0.1715]) tensor([0.0422, 0.1786, 0.5416, 0.2376]) -Greedy action tensor([-1.8525, -0.4473, 0.6730, -0.0166]) tensor([0.0419, 0.1710, 0.5241, 0.2630]) -Greedy action tensor([-1.8837, -0.4020, 0.6435, -0.1247]) tensor([0.0421, 0.1855, 0.5276, 0.2447]) -Greedy action tensor([-1.8739, -0.4445, 0.6297, -0.1501]) tensor([0.0435, 0.1815, 0.5314, 0.2436]) -Greedy action tensor([-0.7763, -0.2793, 0.2118, 0.0160]) tensor([0.1327, 0.2181, 0.3563, 0.2930]) -Greedy action tensor([0.8431, 0.3896, 0.6212, 1.4684]) tensor([0.2323, 0.1476, 0.1861, 0.4341]) -Greedy action tensor([-1.8785, -0.3886, 0.6212, -0.1474]) tensor([0.0430, 0.1907, 0.5235, 0.2428]) -Greedy action tensor([-1.8286, -0.4401, 0.6143, -0.1119]) tensor([0.0453, 0.1816, 0.5211, 0.2521]) -Greedy action tensor([-1.7470, -0.4956, 0.5653, -0.0471]) tensor([0.0498, 0.1742, 0.5032, 0.2728]) -Greedy action tensor([-1.8546, -0.3266, 0.6091, -0.1581]) tensor([0.0438, 0.2020, 0.5150, 0.2391]) -Greedy action tensor([-1.7066, -0.3402, 0.5781, -0.2440]) tensor([0.0525, 0.2057, 0.5153, 0.2265]) -Greedy action tensor([-1.8852, -0.2284, 0.6184, -0.2095]) tensor([0.0420, 0.2202, 0.5135, 0.2244]) -Greedy action tensor([-1.0940, 0.5266, 0.1626, 0.0177]) tensor([0.0793, 0.4010, 0.2786, 0.2411]) -Greedy action tensor([-1.8641, -0.2953, 0.6164, -0.1292]) tensor([0.0427, 0.2050, 0.5102, 0.2421]) -Greedy action tensor([-1.9369, -0.4397, 0.6652, -0.1763]) tensor([0.0404, 0.1804, 0.5445, 0.2347]) -Greedy action tensor([-1.9456, -0.4487, 0.6669, -0.1813]) tensor([0.0401, 0.1792, 0.5467, 0.2341]) -Greedy action tensor([-1.9470, -0.4550, 0.6696, -0.1824]) tensor([0.0400, 0.1780, 0.5481, 0.2338]) -Greedy action tensor([ 0.8594, 1.2022, -0.0302, 0.6253]) tensor([0.2769, 0.3902, 0.1138, 0.2191]) -Greedy action tensor([-1.9315, -0.3933, 0.6514, -0.1703]) tensor([0.0405, 0.1884, 0.5356, 0.2355]) -Greedy action tensor([-1.9348, -0.4535, 0.6598, -0.1766]) tensor([0.0407, 0.1789, 0.5445, 0.2359]) -Greedy action tensor([-1.7347, -0.2874, 0.6306, -0.3616]) tensor([0.0504, 0.2142, 0.5365, 0.1989]) -Greedy action tensor([-0.8634, -0.3484, 0.1345, 0.5508]) tensor([0.1053, 0.1762, 0.2855, 0.4330]) -Greedy action tensor([-1.8680, -0.5069, 0.7504, -0.1195]) tensor([0.0410, 0.1601, 0.5630, 0.2359]) -Greedy action tensor([-1.9167, -0.4183, 0.6581, -0.1586]) tensor([0.0410, 0.1834, 0.5379, 0.2377]) -Greedy action tensor([-0.6403, -0.4739, 0.6743, 1.0504]) tensor([0.0883, 0.1043, 0.3287, 0.4788]) -Greedy action tensor([-1.9408, -0.4517, 0.6701, -0.1756]) tensor([0.0402, 0.1781, 0.5469, 0.2348]) -Greedy action tensor([-1.9118, -0.4435, 0.6655, -0.1685]) tensor([0.0413, 0.1793, 0.5434, 0.2360]) -Greedy action tensor([-1.9047, -0.4509, 0.6462, -0.1656]) tensor([0.0420, 0.1799, 0.5388, 0.2393]) -Greedy action tensor([-1.6058, -0.5163, 0.6067, 0.2462]) tensor([0.0513, 0.1526, 0.4690, 0.3271]) -Greedy action tensor([-1.8915, -0.4287, 0.6383, -0.1461]) tensor([0.0424, 0.1830, 0.5319, 0.2427]) -Greedy action tensor([-1.2123, -0.6617, 0.6113, 0.7522]) tensor([0.0623, 0.1080, 0.3857, 0.4440]) -Greedy action tensor([-1.9375, -0.4500, 0.6624, -0.1760]) tensor([0.0405, 0.1791, 0.5448, 0.2356]) -Greedy action tensor([ 0.3384, -0.4170, 0.1499, 0.5894]) tensor([0.2791, 0.1311, 0.2311, 0.3587]) -Greedy action tensor([-1.7981, -0.4955, 0.5951, -0.1003]) tensor([0.0474, 0.1744, 0.5192, 0.2590]) -Greedy action tensor([-1.4985, -0.4117, 1.0108, 0.7910]) tensor([0.0383, 0.1135, 0.4706, 0.3777]) -Greedy action tensor([-1.6492, -0.0825, 0.4667, 0.0309]) tensor([0.0514, 0.2463, 0.4265, 0.2758]) -Greedy action tensor([-1.9066, -0.4448, 0.6516, -0.1605]) tensor([0.0417, 0.1800, 0.5390, 0.2392]) -Greedy action tensor([-1.8979, -0.3791, 0.6454, -0.1264]) tensor([0.0414, 0.1890, 0.5264, 0.2433]) -Greedy action tensor([-1.8769, -0.2976, 0.6097, -0.1303]) tensor([0.0424, 0.2055, 0.5092, 0.2429]) -Greedy action tensor([-1.3640, 0.6216, 0.2264, 0.2212]) tensor([0.0553, 0.4031, 0.2715, 0.2701]) -Greedy action tensor([-1.8835, -0.4464, 0.6775, -0.1122]) tensor([0.0416, 0.1751, 0.5387, 0.2446]) -Greedy action tensor([-1.8157, -0.4453, 0.6022, -0.1163]) tensor([0.0462, 0.1820, 0.5188, 0.2529]) -Greedy action tensor([-1.8432, -0.2840, 0.6519, -0.0611]) tensor([0.0420, 0.1996, 0.5089, 0.2495]) -Greedy action tensor([-1.9081, -0.4319, 0.6446, -0.1634]) tensor([0.0418, 0.1828, 0.5364, 0.2391]) -Greedy action tensor([-1.8652, -0.4580, 0.6332, -0.1398]) tensor([0.0437, 0.1787, 0.5320, 0.2456]) -Greedy action tensor([-1.8259, -0.4525, 0.6086, -0.1220]) tensor([0.0458, 0.1807, 0.5221, 0.2515]) -Greedy action tensor([-1.3972, 0.3508, 0.3055, -0.0176]) tensor([0.0617, 0.3544, 0.3387, 0.2452]) -Greedy action tensor([-1.9332, -0.4250, 0.6590, -0.1724]) tensor([0.0405, 0.1830, 0.5410, 0.2355]) -Greedy action tensor([-0.6063, 1.1004, 0.0607, 0.5583]) tensor([0.0857, 0.4725, 0.1670, 0.2748]) -Greedy action tensor([-1.2413, -0.8004, 0.7136, 0.1506]) tensor([0.0733, 0.1139, 0.5178, 0.2949]) -Greedy action tensor([ 0.6018, -0.4555, 0.2601, 0.3186]) tensor([0.3557, 0.1236, 0.2528, 0.2680]) -Greedy action tensor([-1.8965, -0.4534, 0.6509, -0.1446]) tensor([0.0421, 0.1781, 0.5373, 0.2425]) -Greedy action tensor([-1.0547, 0.0216, 0.4637, 0.2979]) tensor([0.0809, 0.2372, 0.3691, 0.3127]) -Greedy action tensor([-1.7769, -0.4306, 0.5760, -0.0913]) tensor([0.0482, 0.1852, 0.5067, 0.2600]) -Greedy action tensor([-1.8082, -0.3952, 0.5988, -0.1180]) tensor([0.0462, 0.1899, 0.5132, 0.2506]) -Greedy action tensor([-0.9461, 0.8212, 0.1460, 0.0586]) tensor([0.0796, 0.4659, 0.2372, 0.2173]) -Greedy action tensor([-1.8499, -0.4298, 0.6325, -0.1241]) tensor([0.0440, 0.1821, 0.5267, 0.2472]) -Greedy action tensor([-1.9354, -0.4580, 0.6767, -0.1657]) tensor([0.0402, 0.1761, 0.5478, 0.2359]) -Greedy action tensor([-1.8838, -0.3386, 0.6229, -0.1314]) tensor([0.0422, 0.1977, 0.5170, 0.2432]) -Greedy action tensor([-1.9027, -0.4241, 0.6407, -0.1552]) tensor([0.0419, 0.1839, 0.5335, 0.2407]) -Greedy action tensor([-1.8729, -0.3895, 0.6469, -0.1206]) tensor([0.0424, 0.1868, 0.5265, 0.2444]) -Greedy action tensor([-1.7824, -0.4134, 0.5803, -0.1048]) tensor([0.0478, 0.1881, 0.5080, 0.2561]) -Greedy action tensor([-1.9155, -0.3982, 0.6490, -0.1579]) tensor([0.0411, 0.1872, 0.5336, 0.2381]) -Greedy action tensor([-1.3611, -0.4306, 0.4094, -0.0405]) tensor([0.0760, 0.1928, 0.4465, 0.2847]) -Greedy action tensor([-1.0807, 0.4001, 0.3877, 0.4116]) tensor([0.0705, 0.3099, 0.3061, 0.3135]) -Greedy action tensor([-1.7169, -0.5159, 0.5559, -0.1055]) tensor([0.0525, 0.1745, 0.5098, 0.2631]) -Greedy action tensor([-1.9240, -0.4307, 0.6542, -0.1665]) tensor([0.0409, 0.1823, 0.5394, 0.2374]) -Greedy action tensor([-1.3325, -0.4824, 0.5372, 0.4792]) tensor([0.0627, 0.1467, 0.4067, 0.3838]) -Greedy action tensor([-1.5793, -0.3373, 0.6542, 0.0200]) tensor([0.0533, 0.1847, 0.4979, 0.2641]) -Greedy action tensor([-1.7641, -0.2042, 0.5863, 0.0588]) tensor([0.0446, 0.2121, 0.4675, 0.2759]) -Greedy action tensor([-0.0500, 1.2600, 0.9320, -0.5767]) tensor([0.1255, 0.4652, 0.3351, 0.0741]) -Greedy action tensor([ 1.4382, -0.2269, 1.0620, 0.8083]) tensor([0.4152, 0.0786, 0.2850, 0.2212]) -Greedy action tensor([-0.7955, -0.3589, -0.2130, -0.2145]) tensor([0.1632, 0.2526, 0.2923, 0.2919]) -Greedy action tensor([ 1.5066, 0.1407, -0.1614, -0.0309]) tensor([0.6029, 0.1538, 0.1137, 0.1296]) -Greedy action tensor([-0.2553, -1.5416, 0.0507, 0.1263]) tensor([0.2440, 0.0674, 0.3313, 0.3573]) -Greedy action tensor([ 1.6653, -0.5316, 0.6943, 0.4634]) tensor([0.5585, 0.0621, 0.2115, 0.1679]) -Greedy action tensor([-0.0160, -1.6025, 2.3036, -0.6310]) tensor([0.0839, 0.0172, 0.8535, 0.0454]) -Greedy action tensor([ 0.6667, -1.9869, -0.1068, 0.9114]) tensor([0.3560, 0.0251, 0.1643, 0.4547]) -Greedy action tensor([ 1.8016, -0.5447, 1.3870, -0.4543]) tensor([0.5373, 0.0514, 0.3549, 0.0563]) -Greedy action tensor([ 0.6277, -0.7870, 0.7237, -0.3307]) tensor([0.3667, 0.0891, 0.4036, 0.1406]) -Greedy action tensor([-0.5732, -1.7757, -1.3013, 0.7005]) tensor([0.1867, 0.0561, 0.0901, 0.6671]) -Greedy action tensor([ 0.3371, -0.2882, -0.3216, 0.5412]) tensor([0.3050, 0.1632, 0.1578, 0.3740]) -Greedy action tensor([ 0.3521, -1.1247, -0.3237, 1.0398]) tensor([0.2684, 0.0613, 0.1365, 0.5338]) -Greedy action tensor([ 0.6549, -0.2064, -0.8704, 0.9589]) tensor([0.3339, 0.1411, 0.0726, 0.4524]) -Greedy action tensor([-0.2755, -1.0412, 0.4582, -0.4667]) tensor([0.2286, 0.1063, 0.4762, 0.1889]) -Greedy action tensor([-0.8436, -0.6136, 0.8629, -0.1778]) tensor([0.1029, 0.1296, 0.5672, 0.2003]) -Greedy action tensor([ 0.3883, 0.0905, 0.6557, -0.4888]) tensor([0.2886, 0.2143, 0.3771, 0.1201]) -Greedy action tensor([-1.0493, -0.7222, 1.2972, -1.0599]) tensor([0.0723, 0.1003, 0.7558, 0.0716]) -Greedy action tensor([ 1.0178, -0.8680, 0.7655, 1.1830]) tensor([0.3217, 0.0488, 0.2500, 0.3795]) -Greedy action tensor([ 0.8166, -1.7209, -0.5369, 0.3140]) tensor([0.5149, 0.0407, 0.1330, 0.3114]) -Greedy action tensor([ 1.0633, -1.0831, -0.2953, 0.3700]) tensor([0.5337, 0.0624, 0.1372, 0.2668]) -Greedy action tensor([ 0.3692, -0.6232, 0.2583, -0.2013]) tensor([0.3532, 0.1309, 0.3162, 0.1997]) -Greedy action tensor([0.7841, 0.6888, 0.0796, 0.2350]) tensor([0.3355, 0.3050, 0.1658, 0.1937]) -Greedy action tensor([-0.8543, -1.0695, 0.0676, -0.9957]) tensor([0.1927, 0.1554, 0.4845, 0.1673]) -Greedy action tensor([ 0.1020, 0.6929, 0.9171, -0.6474]) tensor([0.1806, 0.3261, 0.4080, 0.0854]) -Greedy action tensor([ 0.8021, -0.5263, 0.1070, 0.3127]) tensor([0.4207, 0.1114, 0.2099, 0.2579]) -Greedy action tensor([ 0.6697, -0.9180, -0.3721, -0.2061]) tensor([0.5067, 0.1036, 0.1787, 0.2110]) -Greedy action tensor([ 0.4678, -1.3255, 1.1794, -0.5798]) tensor([0.2813, 0.0468, 0.5731, 0.0987]) -Greedy action tensor([ 0.1132, -0.9330, 1.9040, -0.8471]) tensor([0.1294, 0.0455, 0.7756, 0.0495]) -Greedy action tensor([-0.3349, -1.9186, -0.1569, 0.7820]) tensor([0.1833, 0.0376, 0.2190, 0.5601]) -Greedy action tensor([-0.8392, -1.0420, 1.7702, -0.3704]) tensor([0.0588, 0.0480, 0.7992, 0.0940]) -Greedy action tensor([-1.3237, -0.6989, -0.1062, -0.0111]) tensor([0.1004, 0.1875, 0.3391, 0.3730]) -Greedy action tensor([ 0.0983, -0.4188, 0.0840, -0.1973]) tensor([0.3007, 0.1793, 0.2964, 0.2237]) -Greedy action tensor([ 0.1304, -1.4364, 0.2950, 0.0026]) tensor([0.3060, 0.0639, 0.3608, 0.2693]) -Greedy action tensor([-1.1047, -0.3975, 0.1259, -0.6816]) tensor([0.1253, 0.2542, 0.4291, 0.1914]) -Greedy action tensor([ 0.1923, -1.1439, 0.4868, -0.2230]) tensor([0.3062, 0.0805, 0.4111, 0.2022]) -Greedy action tensor([ 0.1568, -0.5623, 1.7618, -0.6930]) tensor([0.1451, 0.0707, 0.7222, 0.0620]) -Greedy action tensor([ 0.0784, -0.7171, 0.0918, -0.6186]) tensor([0.3375, 0.1523, 0.3421, 0.1681]) -Greedy action tensor([ 0.4541, 0.3613, 0.6099, -0.1034]) tensor([0.2738, 0.2495, 0.3199, 0.1568]) -Greedy action tensor([-0.0624, -1.2382, 0.0963, -0.2076]) tensor([0.2989, 0.0922, 0.3503, 0.2585]) -Greedy action tensor([ 0.1101, -0.7460, 0.1136, 0.1723]) tensor([0.2863, 0.1216, 0.2873, 0.3047]) -Greedy action tensor([ 0.1559, 0.0365, 0.4227, -0.8282]) tensor([0.2803, 0.2488, 0.3661, 0.1048]) -Greedy action tensor([ 1.1200, -0.1968, -0.6341, 0.0836]) tensor([0.5569, 0.1492, 0.0964, 0.1975]) -Greedy action tensor([ 0.5005, -0.1762, 0.4295, -0.0553]) tensor([0.3319, 0.1687, 0.3091, 0.1903]) -Greedy action tensor([-0.7502, -0.5121, 1.3192, -1.1780]) tensor([0.0922, 0.1170, 0.7306, 0.0601]) -Greedy action tensor([-0.3924, -1.4217, 0.0127, 0.7505]) tensor([0.1669, 0.0596, 0.2502, 0.5233]) -Greedy action tensor([ 0.2647, 0.3550, -0.0064, -0.8090]) tensor([0.3126, 0.3422, 0.2384, 0.1068]) -Greedy action tensor([ 0.0293, -0.8596, -0.0097, -0.7094]) tensor([0.3508, 0.1442, 0.3374, 0.1676]) -Greedy action tensor([ 0.3694, -1.2115, -0.5964, 0.7250]) tensor([0.3318, 0.0683, 0.1263, 0.4736]) -Greedy action tensor([-0.1043, 0.0255, 0.0097, 0.8599]) tensor([0.1700, 0.1936, 0.1905, 0.4459]) -Greedy action tensor([ 1.4002, -1.1425, 0.0510, 0.9810]) tensor([0.5011, 0.0394, 0.1300, 0.3295]) -Greedy action tensor([ 0.2820, 0.5553, 0.3393, -0.6229]) tensor([0.2647, 0.3479, 0.2803, 0.1071]) -Greedy action tensor([ 0.7057, -1.0862, -0.0625, 1.1744]) tensor([0.3097, 0.0516, 0.1437, 0.4950]) -Greedy action tensor([ 0.4917, -0.1257, -0.2004, 0.7337]) tensor([0.3018, 0.1628, 0.1510, 0.3844]) -Greedy action tensor([ 1.3266, -0.6653, 0.3963, 0.1132]) tensor([0.5470, 0.0746, 0.2158, 0.1626]) -Greedy action tensor([-0.0397, -0.4607, -1.4031, 0.2264]) tensor([0.3109, 0.2040, 0.0795, 0.4056]) -Greedy action tensor([-0.2173, -0.7814, 0.1819, -1.6756]) tensor([0.3038, 0.1728, 0.4528, 0.0707]) -Greedy action tensor([-0.3592, -0.7706, 0.5059, -0.1104]) tensor([0.1880, 0.1246, 0.4464, 0.2411]) -Greedy action tensor([-0.7794, -2.3367, -0.0818, 0.4315]) tensor([0.1521, 0.0320, 0.3055, 0.5104]) -Greedy action tensor([ 0.0550, -1.0824, 0.9660, -0.4232]) tensor([0.2259, 0.0724, 0.5617, 0.1400]) -Greedy action tensor([ 1.6503, 0.5797, 0.5482, -0.2026]) tensor([0.5459, 0.1872, 0.1813, 0.0856]) -Greedy action tensor([ 0.9481, -0.4613, -0.2666, 0.6463]) tensor([0.4385, 0.1071, 0.1301, 0.3243]) -Greedy action tensor([ 0.3976, 0.7221, -0.8107, 0.2859]) tensor([0.2796, 0.3868, 0.0835, 0.2501]) -Greedy action tensor([-0.2338, 1.0385, -0.2426, -0.4250]) tensor([0.1566, 0.5589, 0.1552, 0.1293]) -Greedy action tensor([ 1.0469, -0.5524, 0.6977, 0.4446]) tensor([0.4073, 0.0823, 0.2873, 0.2230]) -Greedy action tensor([-0.4574, -0.4313, -0.3147, -0.3292]) tensor([0.2317, 0.2378, 0.2672, 0.2633]) -Greedy action tensor([ 0.3349, -0.4990, -0.3814, 0.1071]) tensor([0.3678, 0.1597, 0.1797, 0.2928]) -Greedy action tensor([-0.4366, 0.0350, 0.7110, -0.1424]) tensor([0.1409, 0.2259, 0.4441, 0.1891]) -Greedy action tensor([-1.6283, -0.3583, -0.9883, 0.4791]) tensor([0.0681, 0.2425, 0.1292, 0.5603]) -Greedy action tensor([-0.4882, -0.1013, 0.3628, -0.7862]) tensor([0.1800, 0.2650, 0.4215, 0.1336]) -Greedy action tensor([-2.0912, -0.8306, 0.9667, -0.7901]) tensor([0.0339, 0.1196, 0.7219, 0.1246]) -Greedy action tensor([-0.5180, 0.0984, 0.1817, -0.3113]) tensor([0.1641, 0.3039, 0.3303, 0.2017]) -Greedy action tensor([ 0.9820, -0.4476, 0.5735, 0.6392]) tensor([0.3826, 0.0916, 0.2543, 0.2715]) -Greedy action tensor([ 0.1865, -1.2565, 0.5307, 0.9225]) tensor([0.2112, 0.0499, 0.2980, 0.4409]) -Greedy action tensor([ 0.2369, 0.2424, -0.1431, 1.0198]) tensor([0.2050, 0.2062, 0.1402, 0.4486]) -Greedy action tensor([ 0.7076, -1.0880, 0.0444, -0.3067]) tensor([0.4893, 0.0812, 0.2521, 0.1774]) -Greedy action tensor([1.5526, 0.6220, 0.3456, 0.7506]) tensor([0.4669, 0.1841, 0.1396, 0.2094]) -Greedy action tensor([ 0.4627, 0.0899, -0.3268, 0.5069]) tensor([0.3137, 0.2161, 0.1424, 0.3278]) -Greedy action tensor([-0.0997, 0.2616, -0.1759, 0.6619]) tensor([0.1817, 0.2608, 0.1684, 0.3892]) -Greedy action tensor([ 0.2171, -0.6635, -0.0244, 0.7213]) tensor([0.2594, 0.1075, 0.2037, 0.4294]) -Greedy action tensor([ 1.0334, -0.0981, 0.4346, 0.4175]) tensor([0.4146, 0.1337, 0.2278, 0.2239]) -Greedy action tensor([ 1.3954, -0.2671, 0.3257, 0.1064]) tensor([0.5530, 0.1049, 0.1898, 0.1524]) -Greedy action tensor([ 1.4404, -0.8363, 0.0200, 0.0875]) tensor([0.6239, 0.0640, 0.1508, 0.1613]) -Greedy action tensor([ 1.1381, -0.5698, 0.3044, 0.2861]) tensor([0.4897, 0.0887, 0.2127, 0.2089]) -Greedy action tensor([ 1.1136, -0.4916, -0.4688, 0.1759]) tensor([0.5562, 0.1117, 0.1143, 0.2178]) -Greedy action tensor([ 0.6437, -0.6390, -0.3315, 0.5308]) tensor([0.3925, 0.1088, 0.1480, 0.3506]) -Greedy action tensor([ 1.8421, -1.0997, -0.4001, 0.5283]) tensor([0.7004, 0.0370, 0.0744, 0.1883]) -Greedy action tensor([ 1.3871, 0.0105, -0.8985, 0.2500]) tensor([0.5971, 0.1507, 0.0607, 0.1915]) -Greedy action tensor([ 1.4485, -0.2577, -0.6739, 0.0034]) tensor([0.6506, 0.1181, 0.0779, 0.1534]) -Greedy action tensor([ 1.5880, 0.1655, -0.6328, 0.3001]) tensor([0.6152, 0.1483, 0.0668, 0.1697]) -Greedy action tensor([ 1.1504, -0.2456, -0.2802, 0.1233]) tensor([0.5421, 0.1342, 0.1296, 0.1941]) -Greedy action tensor([ 1.2467, -0.5894, -0.4643, 0.2793]) tensor([0.5813, 0.0927, 0.1050, 0.2209]) -Greedy action tensor([ 2.0199, -0.3732, -0.3597, 0.2966]) tensor([0.7340, 0.0670, 0.0680, 0.1310]) -Greedy action tensor([ 1.4504, -0.7684, -0.5131, 0.5101]) tensor([0.6099, 0.0663, 0.0856, 0.2382]) -Greedy action tensor([ 2.0908, -0.5866, -0.4322, 0.2524]) tensor([0.7645, 0.0526, 0.0613, 0.1216]) -Greedy action tensor([ 2.0236, -0.7877, -0.2483, 0.7787]) tensor([0.6891, 0.0414, 0.0711, 0.1984]) -Greedy action tensor([ 2.2770, -0.8608, -0.0056, 0.1574]) tensor([0.7902, 0.0343, 0.0806, 0.0949]) -Greedy action tensor([ 0.8491, -0.5468, -0.1912, -0.0584]) tensor([0.4989, 0.1235, 0.1763, 0.2013]) -Greedy action tensor([ 1.4849, -0.4609, -0.3208, 0.3052]) tensor([0.6193, 0.0885, 0.1018, 0.1904]) -Greedy action tensor([ 1.0012, -0.2889, -0.3129, 0.0326]) tensor([0.5199, 0.1431, 0.1397, 0.1973]) -Greedy action tensor([ 1.4192, -0.3791, -0.5344, 0.3708]) tensor([0.6032, 0.0999, 0.0855, 0.2114]) -Greedy action tensor([ 1.2044, -0.1323, -0.0754, 0.1530]) tensor([0.5290, 0.1390, 0.1471, 0.1849]) -Greedy action tensor([ 1.1294, -0.3888, 0.1577, -0.0815]) tensor([0.5276, 0.1156, 0.1997, 0.1572]) -Greedy action tensor([ 1.0292, -0.0097, -0.0308, -0.0496]) tensor([0.4901, 0.1734, 0.1698, 0.1666]) -Greedy action tensor([ 1.7491, -0.3303, -0.6826, 0.3514]) tensor([0.6849, 0.0856, 0.0602, 0.1693]) -Greedy action tensor([ 1.1222, -0.2382, -0.1844, -0.5286]) tensor([0.5817, 0.1492, 0.1575, 0.1116]) -Greedy action tensor([2.1242, 1.2660, 0.4898, 0.0165]) tensor([0.5745, 0.2436, 0.1121, 0.0698]) -Greedy action tensor([ 2.0789, -0.8607, -0.6906, 0.2005]) tensor([0.7884, 0.0417, 0.0494, 0.1205]) -Greedy action tensor([ 0.5751, -0.3179, -0.3283, 0.2591]) tensor([0.3931, 0.1610, 0.1593, 0.2866]) -Greedy action tensor([ 1.3697, -0.6121, -0.5955, 0.3444]) tensor([0.6110, 0.0842, 0.0856, 0.2192]) -Greedy action tensor([ 1.5984, -0.5632, -0.4799, 0.4683]) tensor([0.6397, 0.0737, 0.0801, 0.2066]) -Greedy action tensor([ 1.7793, -0.0107, -0.3124, 0.4909]) tensor([0.6385, 0.1066, 0.0788, 0.1760]) -Greedy action tensor([ 1.6681, -0.2738, 0.0182, 0.7871]) tensor([0.5715, 0.0820, 0.1098, 0.2368]) -Greedy action tensor([ 1.3872, -0.4492, -0.6308, 0.3930]) tensor([0.6016, 0.0959, 0.0800, 0.2226]) -Greedy action tensor([ 1.5131, -0.9880, -0.1457, 0.5966]) tensor([0.5980, 0.0490, 0.1138, 0.2391]) -Greedy action tensor([ 1.3596, -0.2822, -0.6020, 0.2247]) tensor([0.6040, 0.1169, 0.0849, 0.1942]) -Greedy action tensor([ 1.4843, -0.5032, -0.6256, 0.3995]) tensor([0.6265, 0.0859, 0.0760, 0.2117]) -Greedy action tensor([ 1.3125, -0.5661, -0.3515, 0.4502]) tensor([0.5668, 0.0866, 0.1073, 0.2393]) -Greedy action tensor([ 1.4535, 0.1518, -0.3348, -0.0956]) tensor([0.6054, 0.1647, 0.1012, 0.1286]) -Greedy action tensor([ 1.2241, -0.4794, -0.3847, 0.2786]) tensor([0.5648, 0.1028, 0.1130, 0.2194]) -Greedy action tensor([ 0.9537, -0.2204, -0.0980, 0.0612]) tensor([0.4835, 0.1495, 0.1689, 0.1981]) -Greedy action tensor([ 1.5205, -0.4936, -0.3568, 0.5510]) tensor([0.6004, 0.0801, 0.0919, 0.2277]) -Greedy action tensor([ 1.3157, -0.6093, -0.7918, 0.4347]) tensor([0.5946, 0.0867, 0.0723, 0.2464]) -Greedy action tensor([ 1.3800, -0.5353, -0.0041, 0.1478]) tensor([0.5919, 0.0872, 0.1483, 0.1726]) -Greedy action tensor([ 1.7686, 0.1439, -0.0794, 0.2162]) tensor([0.6385, 0.1258, 0.1006, 0.1352]) -Greedy action tensor([ 2.0419, -0.7241, -0.4275, 0.0803]) tensor([0.7763, 0.0488, 0.0657, 0.1092]) -Greedy action tensor([ 1.7441, -0.7382, -0.5926, 0.4178]) tensor([0.6917, 0.0578, 0.0668, 0.1836]) -Greedy action tensor([ 1.4636, -0.1824, -0.4525, 0.3225]) tensor([0.6026, 0.1162, 0.0887, 0.1925]) -Greedy action tensor([ 1.2994, -0.8260, 0.0088, 0.3415]) tensor([0.5624, 0.0671, 0.1547, 0.2158]) -Greedy action tensor([ 1.5693, -0.5619, -0.1381, 0.5350]) tensor([0.6041, 0.0717, 0.1095, 0.2147]) -Greedy action tensor([ 1.8747, -1.1243, -0.2030, 0.6305]) tensor([0.6834, 0.0341, 0.0856, 0.1969]) -Greedy action tensor([ 1.8841, -1.2353, 0.0153, 0.4626]) tensor([0.6945, 0.0307, 0.1072, 0.1676]) -Greedy action tensor([ 1.1121, -0.0310, -0.2623, -0.4888]) tensor([0.5638, 0.1798, 0.1426, 0.1137]) -Greedy action tensor([ 1.4761, 0.0589, -0.4762, -0.2659]) tensor([0.6412, 0.1554, 0.0910, 0.1123]) -Greedy action tensor([ 1.5528, -0.8389, -0.0650, 0.5617]) tensor([0.6020, 0.0551, 0.1194, 0.2235]) -Greedy action tensor([ 1.3606, -0.4171, -0.5083, 0.2067]) tensor([0.6102, 0.1031, 0.0942, 0.1925]) -Greedy action tensor([ 2.1534, -1.4120, 0.1248, 0.5112]) tensor([0.7389, 0.0209, 0.0972, 0.1430]) -Greedy action tensor([ 1.3641, -0.5538, -0.2047, 0.1506]) tensor([0.6052, 0.0889, 0.1261, 0.1798]) -Greedy action tensor([ 1.9157, -0.9353, -0.2450, 0.5147]) tensor([0.7045, 0.0407, 0.0812, 0.1736]) -Greedy action tensor([ 1.6439, -0.2890, -0.0785, 0.8058]) tensor([0.5695, 0.0824, 0.1017, 0.2463]) -Greedy action tensor([ 1.9846, -0.8099, -0.2638, 0.4981]) tensor([0.7179, 0.0439, 0.0758, 0.1624]) -Greedy action tensor([ 1.2616, -0.5283, -0.2936, 0.2887]) tensor([0.5694, 0.0951, 0.1202, 0.2152]) -Greedy action tensor([ 1.6888, -0.5070, -0.2927, 0.5455]) tensor([0.6378, 0.0710, 0.0879, 0.2033]) -Greedy action tensor([ 1.3917, -0.4468, -0.5346, 0.4033]) tensor([0.5963, 0.0948, 0.0869, 0.2219]) -Greedy action tensor([ 0.8678, -0.4699, -0.0856, 0.0583]) tensor([0.4778, 0.1254, 0.1842, 0.2127]) -Greedy action tensor([ 1.5395, 0.2080, -0.4953, 0.2441]) tensor([0.5993, 0.1583, 0.0783, 0.1641]) -Greedy action tensor([ 2.0788, -0.3395, -0.4359, 0.3913]) tensor([0.7380, 0.0657, 0.0597, 0.1365]) -Greedy action tensor([ 0.9599, -0.4786, 0.0507, 0.0891]) tensor([0.4857, 0.1153, 0.1957, 0.2033]) -Greedy action tensor([ 1.4269, -0.2836, -0.1105, 0.2968]) tensor([0.5818, 0.1052, 0.1251, 0.1879]) -Greedy action tensor([ 1.1252, 0.0970, -0.3512, 0.4390]) tensor([0.4785, 0.1712, 0.1093, 0.2410]) -Greedy action tensor([ 0.8472, -0.1619, 0.0134, 0.0220]) tensor([0.4470, 0.1630, 0.1942, 0.1959]) -Greedy action tensor([ 1.3788, -0.4412, -0.7741, 0.5167]) tensor([0.5881, 0.0953, 0.0683, 0.2483]) -Greedy action tensor([ 1.8210, -0.3998, -0.9916, 0.6715]) tensor([0.6732, 0.0731, 0.0404, 0.2133]) -Greedy action tensor([ 1.1020, -0.1691, -0.0218, -0.0291]) tensor([0.5186, 0.1455, 0.1686, 0.1673]) -Greedy action tensor([ 1.7096, -0.7317, -0.0789, 0.4233]) tensor([0.6534, 0.0569, 0.1092, 0.1805]) -Greedy action tensor([ 2.2729, -0.5449, -0.3496, 0.2093]) tensor([0.7941, 0.0474, 0.0577, 0.1008]) -Greedy action tensor([ 1.1556, 0.0052, -0.6775, 0.3774]) tensor([0.5166, 0.1635, 0.0826, 0.2372]) -Greedy action tensor([ 2.3091, -0.9051, -0.2497, 0.9620]) tensor([0.7259, 0.0292, 0.0562, 0.1887]) -Greedy action tensor([ 1.6774, -0.8559, -0.1896, 0.8131]) tensor([0.6041, 0.0480, 0.0934, 0.2545]) -Greedy action tensor([ 1.3700, -0.6050, -0.0915, 0.2814]) tensor([0.5857, 0.0813, 0.1358, 0.1972]) -Greedy action tensor([ 1.0148, -0.3721, -0.2774, 0.5398]) tensor([0.4659, 0.1164, 0.1280, 0.2897]) -Greedy action tensor([ 2.1187, -0.6329, -0.6074, -0.0244]) tensor([0.8022, 0.0512, 0.0525, 0.0941]) -Greedy action tensor([ 1.5133, -0.2811, -0.4436, 0.1746]) tensor([0.6371, 0.1059, 0.0900, 0.1670]) -Greedy action tensor([ 0.7763, -0.3658, -0.0785, -0.1700]) tensor([0.4689, 0.1496, 0.1994, 0.1820]) -Greedy action tensor([ 0.4722, -0.3173, -0.1617, -0.0661]) tensor([0.3894, 0.1768, 0.2066, 0.2273]) -Greedy action tensor([ 0.6556, -0.5119, -0.0879, -0.5276]) tensor([0.4778, 0.1487, 0.2272, 0.1463]) -Greedy action tensor([ 0.8143, -0.5235, -0.1741, -0.4340]) tensor([0.5204, 0.1366, 0.1937, 0.1494]) -Greedy action tensor([ 1.1769, -0.6696, -0.0245, -0.9270]) tensor([0.6327, 0.0998, 0.1903, 0.0772]) -Greedy action tensor([ 0.8014, 0.1200, -0.1044, -0.1016]) tensor([0.4319, 0.2185, 0.1746, 0.1751]) -Greedy action tensor([ 0.9745, -0.8072, 0.2494, -0.8169]) tensor([0.5497, 0.0925, 0.2662, 0.0916]) -Greedy action tensor([ 0.6448, -0.6386, 0.0951, -0.7924]) tensor([0.4781, 0.1325, 0.2759, 0.1136]) -Greedy action tensor([ 1.0224, -0.3865, -0.0599, -0.1953]) tensor([0.5321, 0.1301, 0.1803, 0.1575]) -Greedy action tensor([ 0.7618, -0.3234, -0.0585, -0.1702]) tensor([0.4604, 0.1555, 0.2027, 0.1813]) -Greedy action tensor([ 0.6964, -0.5541, -0.1253, -0.2697]) tensor([0.4747, 0.1359, 0.2087, 0.1806]) -Greedy action tensor([ 0.2029, -0.0196, -0.0578, -0.3998]) tensor([0.3207, 0.2567, 0.2471, 0.1755]) -Greedy action tensor([ 0.4934, -0.3688, -0.0976, -0.3513]) tensor([0.4157, 0.1755, 0.2302, 0.1786]) -Greedy action tensor([ 0.7637, -0.4335, -0.1580, -0.2831]) tensor([0.4876, 0.1473, 0.1940, 0.1712]) -Greedy action tensor([ 0.5903, -0.2421, -0.0822, -0.1317]) tensor([0.4113, 0.1789, 0.2099, 0.1998]) -Greedy action tensor([ 0.8711, -0.7824, 0.1627, -0.4563]) tensor([0.5131, 0.0982, 0.2526, 0.1361]) -Greedy action tensor([ 0.8950, -0.5008, 0.0616, -0.5102]) tensor([0.5188, 0.1285, 0.2255, 0.1273]) -Greedy action tensor([ 0.4855, -0.5687, 0.0233, -0.3714]) tensor([0.4162, 0.1450, 0.2621, 0.1767]) -Greedy action tensor([ 0.4579, -0.3525, -0.1845, -0.0913]) tensor([0.3924, 0.1745, 0.2064, 0.2266]) -Greedy action tensor([ 0.5602, -0.2933, -0.0099, -0.2198]) tensor([0.4082, 0.1739, 0.2308, 0.1871]) -Greedy action tensor([ 0.6047, -0.2544, 0.0203, -0.0956]) tensor([0.4036, 0.1710, 0.2250, 0.2004]) -Greedy action tensor([ 1.0053, -0.6254, -0.0216, -0.4278]) tensor([0.5579, 0.1092, 0.1998, 0.1331]) -Greedy action tensor([ 0.7350, -0.4107, 0.0694, -0.2558]) tensor([0.4539, 0.1443, 0.2333, 0.1685]) -Greedy action tensor([ 0.6447, -0.3112, -0.0287, -0.2486]) tensor([0.4341, 0.1669, 0.2214, 0.1777]) -Greedy action tensor([ 0.9906, -1.0368, 0.0103, -0.3577]) tensor([0.5661, 0.0745, 0.2124, 0.1470]) -Greedy action tensor([ 1.1064, -0.5720, 0.2836, -0.4504]) tensor([0.5445, 0.1016, 0.2391, 0.1148]) -Greedy action tensor([ 0.8039, -0.6733, -0.0780, -0.5485]) tensor([0.5261, 0.1201, 0.2178, 0.1360]) -Greedy action tensor([ 0.2966, -0.2211, -0.0967, -0.4339]) tensor([0.3633, 0.2165, 0.2452, 0.1750]) -Greedy action tensor([ 0.7360, -0.6303, 0.0832, -0.2876]) tensor([0.4684, 0.1195, 0.2438, 0.1683]) -Greedy action tensor([ 0.6347, -0.0637, -0.0015, -0.2231]) tensor([0.4080, 0.2029, 0.2160, 0.1730]) -Greedy action tensor([ 0.2744, 0.0235, -0.1200, -0.3616]) tensor([0.3354, 0.2610, 0.2261, 0.1776]) -Greedy action tensor([ 0.2295, 0.0145, -0.1602, -0.2246]) tensor([0.3206, 0.2586, 0.2171, 0.2036]) -Greedy action tensor([ 0.4416, -0.4391, 0.0283, -0.2439]) tensor([0.3876, 0.1607, 0.2564, 0.1953]) -Greedy action tensor([ 0.8194, -0.5432, -0.1015, -0.2765]) tensor([0.5029, 0.1287, 0.2002, 0.1681]) -Greedy action tensor([ 0.4020, -0.3908, -0.1920, -0.0749]) tensor([0.3809, 0.1724, 0.2103, 0.2364]) -Greedy action tensor([ 0.5818, -0.6962, -0.1521, -0.2117]) tensor([0.4523, 0.1260, 0.2171, 0.2046]) -Greedy action tensor([ 0.3821, 0.0523, -0.0131, -0.1237]) tensor([0.3338, 0.2400, 0.2248, 0.2013]) -Greedy action tensor([ 0.5621, -0.3643, 0.0032, -0.3065]) tensor([0.4189, 0.1659, 0.2395, 0.1757]) -Greedy action tensor([ 0.7700, -0.4127, -0.2199, -0.4165]) tensor([0.5042, 0.1545, 0.1874, 0.1539]) -Greedy action tensor([ 0.5149, 0.0871, -0.1764, 0.0659]) tensor([0.3583, 0.2336, 0.1795, 0.2287]) -Greedy action tensor([ 1.0725, -0.6671, 0.1962, -0.3664]) tensor([0.5467, 0.0960, 0.2276, 0.1297]) -Greedy action tensor([ 0.6463, -0.3720, -0.1454, -0.3183]) tensor([0.4555, 0.1645, 0.2064, 0.1736]) -Greedy action tensor([ 0.9772, -0.6163, -0.0627, -0.3318]) tensor([0.5474, 0.1112, 0.1935, 0.1479]) -Greedy action tensor([ 0.4119, 0.1929, 0.0965, -0.4869]) tensor([0.3401, 0.2733, 0.2481, 0.1385]) -Greedy action tensor([ 0.7609, -0.1141, -0.1003, -0.5225]) tensor([0.4725, 0.1969, 0.1997, 0.1309]) -Greedy action tensor([ 1.0202, -1.0333, 0.1000, -0.4969]) tensor([0.5727, 0.0735, 0.2282, 0.1256]) -Greedy action tensor([ 0.7623, -0.4645, 0.1051, -0.3189]) tensor([0.4650, 0.1363, 0.2410, 0.1577]) -Greedy action tensor([ 0.8876, -0.5710, 0.0273, -0.4611]) tensor([0.5221, 0.1214, 0.2209, 0.1355]) -Greedy action tensor([ 0.5776, -0.4147, -0.0024, -0.3268]) tensor([0.4282, 0.1587, 0.2397, 0.1733]) -Greedy action tensor([ 0.4663, -0.0127, -0.0696, -0.0721]) tensor([0.3586, 0.2222, 0.2099, 0.2093]) -Greedy action tensor([ 1.1545, -0.7963, 0.1091, -0.4247]) tensor([0.5883, 0.0836, 0.2068, 0.1213]) -Greedy action tensor([ 0.4961, -0.0664, -0.1515, -0.0811]) tensor([0.3767, 0.2146, 0.1971, 0.2115]) -Greedy action tensor([ 0.9534, -0.5884, 0.0926, -0.7277]) tensor([0.5486, 0.1174, 0.2319, 0.1021]) -Greedy action tensor([ 0.5854, -0.3345, -0.1291, -0.1333]) tensor([0.4210, 0.1678, 0.2060, 0.2052]) -Greedy action tensor([ 0.7640, -0.4566, -0.1131, -0.2535]) tensor([0.4825, 0.1424, 0.2007, 0.1744]) -Greedy action tensor([ 0.4916, -0.2198, -0.0781, -0.3359]) tensor([0.4010, 0.1969, 0.2268, 0.1753]) -Greedy action tensor([ 0.7972, -0.3112, 0.0012, -0.4678]) tensor([0.4846, 0.1600, 0.2186, 0.1368]) -Greedy action tensor([ 0.4389, 0.2170, 0.0897, -0.0681]) tensor([0.3217, 0.2577, 0.2269, 0.1938]) -Greedy action tensor([ 0.7812, -0.5056, -0.0154, -0.3176]) tensor([0.4854, 0.1340, 0.2188, 0.1618]) -Greedy action tensor([ 0.5774, -0.0664, -0.0619, -0.0950]) tensor([0.3901, 0.2049, 0.2058, 0.1991]) -Greedy action tensor([ 0.7417, -0.2676, -0.0791, -0.1853]) tensor([0.4545, 0.1657, 0.2000, 0.1799]) -Greedy action tensor([ 0.6173, -0.2457, -0.0366, -0.2492]) tensor([0.4233, 0.1786, 0.2201, 0.1780]) -Greedy action tensor([ 0.7340, -0.3287, -0.1285, -0.2684]) tensor([0.4685, 0.1619, 0.1977, 0.1719]) -Greedy action tensor([ 0.4938, 0.1902, -0.1634, 0.1255]) tensor([0.3392, 0.2504, 0.1758, 0.2347]) -Greedy action tensor([ 0.4311, -0.4571, -0.1437, -0.1187]) tensor([0.3920, 0.1613, 0.2206, 0.2262]) -Greedy action tensor([ 0.9964, -0.6268, 0.4251, -0.6857]) tensor([0.5133, 0.1013, 0.2899, 0.0955]) -Greedy action tensor([ 0.4180, 0.1790, -0.2529, -0.2143]) tensor([0.3534, 0.2782, 0.1806, 0.1878]) -Greedy action tensor([ 0.5919, -0.1704, -0.1203, -0.1596]) tensor([0.4117, 0.1921, 0.2020, 0.1942]) -Greedy action tensor([ 0.5411, -0.0574, 0.1778, -0.4482]) tensor([0.3821, 0.2100, 0.2657, 0.1421]) -Greedy action tensor([ 0.4559, 0.1254, -0.1543, -0.0709]) tensor([0.3506, 0.2519, 0.1905, 0.2070]) -Greedy action tensor([ 0.9029, -0.4754, -0.0052, -0.2368]) tensor([0.5063, 0.1276, 0.2042, 0.1620]) -Greedy action tensor([ 0.2788, -0.0753, 0.0090, -0.1319]) tensor([0.3196, 0.2243, 0.2440, 0.2120]) -Greedy action tensor([ 0.2466, -0.0582, 0.0137, -0.1328]) tensor([0.3112, 0.2294, 0.2465, 0.2129]) -Greedy action tensor([ 0.6232, -0.3883, -0.0424, -0.0971]) tensor([0.4230, 0.1538, 0.2174, 0.2058]) -Greedy action tensor([ 0.9476, -0.5734, 0.0235, -0.4076]) tensor([0.5338, 0.1166, 0.2119, 0.1377]) -Greedy action tensor([ 0.8883, -0.5472, -0.1442, -0.6617]) tensor([0.5536, 0.1318, 0.1971, 0.1175]) -Greedy action tensor([ 0.8563, -0.6111, -0.0672, -0.4157]) tensor([0.5241, 0.1208, 0.2081, 0.1469]) -Greedy action tensor([ 0.6956, -0.0986, -0.0692, 0.0063]) tensor([0.4134, 0.1868, 0.1924, 0.2075]) -Greedy action tensor([ 0.9190, -0.4525, -0.1347, -0.5514]) tensor([0.5458, 0.1385, 0.1903, 0.1254]) -Greedy action tensor([ 0.9762, -0.6931, -0.0388, -0.2386]) tensor([0.5413, 0.1020, 0.1961, 0.1606]) -Greedy action tensor([ 0.3027, -0.2563, -0.0494, -0.1548]) tensor([0.3439, 0.1966, 0.2418, 0.2176]) -Greedy action tensor([-1.9257, -0.4384, 0.6613, -0.1615]) tensor([0.0407, 0.1802, 0.5413, 0.2377]) -Greedy action tensor([-1.2697, -0.3719, 0.3040, 0.2161]) tensor([0.0788, 0.1933, 0.3800, 0.3480]) -Greedy action tensor([-1.9121, -0.4416, 0.6569, -0.1629]) tensor([0.0414, 0.1801, 0.5404, 0.2381]) -Greedy action tensor([-1.9137, -0.4151, 0.6514, -0.1629]) tensor([0.0413, 0.1847, 0.5364, 0.2376]) -Greedy action tensor([-1.9274, -0.4401, 0.6577, -0.1713]) tensor([0.0408, 0.1808, 0.5419, 0.2365]) -Greedy action tensor([-1.7660, -0.3347, 0.6028, -0.0625]) tensor([0.0468, 0.1959, 0.5002, 0.2571]) -Greedy action tensor([-1.7466, -0.2354, 0.5809, -0.0391]) tensor([0.0470, 0.2128, 0.4814, 0.2589]) -Greedy action tensor([-0.6422, -0.5858, 0.1718, 0.2479]) tensor([0.1481, 0.1567, 0.3343, 0.3608]) -Greedy action tensor([-1.9131, -0.4459, 0.6500, -0.1604]) tensor([0.0415, 0.1801, 0.5388, 0.2396]) -Greedy action tensor([-1.9213, -0.4919, 0.5729, -0.2054]) tensor([0.0438, 0.1828, 0.5301, 0.2434]) -Greedy action tensor([0.6382, 0.5883, 0.5324, 1.1314]) tensor([0.2228, 0.2120, 0.2004, 0.3648]) -Greedy action tensor([-1.4021, 0.4988, 0.2720, 0.0533]) tensor([0.0578, 0.3865, 0.3081, 0.2476]) -Greedy action tensor([-1.9058, -0.4275, 0.6451, -0.1542]) tensor([0.0417, 0.1830, 0.5348, 0.2405]) -Greedy action tensor([-1.9386, -0.4509, 0.6693, -0.1750]) tensor([0.0403, 0.1783, 0.5465, 0.2349]) -Greedy action tensor([-1.9186, -0.4089, 0.6510, -0.1648]) tensor([0.0410, 0.1858, 0.5361, 0.2371]) -Greedy action tensor([-1.3165, -0.3965, 0.4303, 0.2813]) tensor([0.0705, 0.1769, 0.4043, 0.3484]) -Greedy action tensor([-1.9156, -0.4578, 0.6737, -0.1466]) tensor([0.0408, 0.1755, 0.5441, 0.2396]) -Greedy action tensor([-1.9128, -0.4297, 0.6480, -0.1639]) tensor([0.0415, 0.1828, 0.5372, 0.2385]) -Greedy action tensor([-1.8102, -0.3942, 0.5934, -0.1128]) tensor([0.0462, 0.1904, 0.5112, 0.2523]) -Greedy action tensor([-1.8992, -0.4419, 0.6439, -0.1580]) tensor([0.0422, 0.1811, 0.5363, 0.2405]) -Greedy action tensor([-1.9490, -0.4521, 0.6676, -0.1834]) tensor([0.0400, 0.1787, 0.5475, 0.2338]) -Greedy action tensor([-1.9072, -0.4066, 0.6503, -0.1604]) tensor([0.0415, 0.1859, 0.5349, 0.2378]) -Greedy action tensor([-1.9076, -0.4367, 0.6433, -0.1608]) tensor([0.0418, 0.1821, 0.5362, 0.2399]) -Greedy action tensor([-0.5761, -0.5865, 0.6272, 0.6137]) tensor([0.1162, 0.1150, 0.3870, 0.3818]) -Greedy action tensor([-1.5737, -0.4706, 0.5574, 0.1053]) tensor([0.0562, 0.1693, 0.4733, 0.3012]) -Greedy action tensor([-1.6680, -0.1515, 0.4779, -0.0214]) tensor([0.0518, 0.2361, 0.4431, 0.2690]) -Greedy action tensor([-1.9407, -0.4466, 0.6699, -0.1734]) tensor([0.0401, 0.1788, 0.5461, 0.2350]) -Greedy action tensor([-1.9467, -0.4556, 0.6689, -0.1820]) tensor([0.0401, 0.1780, 0.5480, 0.2340]) -Greedy action tensor([-1.8385, -0.4044, 0.6080, -0.1430]) tensor([0.0451, 0.1891, 0.5203, 0.2455]) -Greedy action tensor([-1.8079, -0.3035, 0.6318, -0.1028]) tensor([0.0445, 0.2003, 0.5104, 0.2448]) -Greedy action tensor([-1.9375, -0.4338, 0.6620, -0.1762]) tensor([0.0404, 0.1816, 0.5432, 0.2349]) -Greedy action tensor([-1.0815, 0.7704, 0.1462, 0.1159]) tensor([0.0709, 0.4520, 0.2421, 0.2349]) -Greedy action tensor([-1.8402, -0.4445, 0.6155, -0.1332]) tensor([0.0450, 0.1818, 0.5249, 0.2483]) -Greedy action tensor([-1.6724, 0.3694, 0.4038, 0.0352]) tensor([0.0451, 0.3471, 0.3593, 0.2485]) -Greedy action tensor([-1.9068, -0.4327, 0.6579, -0.1427]) tensor([0.0413, 0.1805, 0.5371, 0.2412]) -Greedy action tensor([-1.8531, -0.4528, 0.6197, -0.1328]) tensor([0.0444, 0.1803, 0.5269, 0.2483]) -Greedy action tensor([-1.0002, -0.3694, 0.1792, 0.2525]) tensor([0.1038, 0.1951, 0.3377, 0.3634]) -Greedy action tensor([-1.8897, -0.4106, 0.6339, -0.1639]) tensor([0.0426, 0.1869, 0.5313, 0.2392]) -Greedy action tensor([-1.3888, 0.2562, 0.4979, 0.1444]) tensor([0.0574, 0.2976, 0.3789, 0.2661]) -Greedy action tensor([-1.4910, -0.3842, 0.5792, 0.2966]) tensor([0.0558, 0.1687, 0.4422, 0.3333]) -Greedy action tensor([-1.1490, -0.4730, 0.3677, 0.4422]) tensor([0.0804, 0.1581, 0.3665, 0.3949]) -Greedy action tensor([-1.9397, -0.4488, 0.6693, -0.1745]) tensor([0.0402, 0.1786, 0.5463, 0.2349]) -Greedy action tensor([-1.8913, -0.3948, 0.6385, -0.1486]) tensor([0.0421, 0.1882, 0.5289, 0.2407]) -Greedy action tensor([-1.9213, -0.4597, 0.6522, -0.1689]) tensor([0.0413, 0.1783, 0.5420, 0.2384]) -Greedy action tensor([-1.8518, -0.4215, 0.6633, -0.0913]) tensor([0.0428, 0.1789, 0.5294, 0.2489]) -Greedy action tensor([-1.8841, -0.4063, 0.6442, -0.1138]) tensor([0.0420, 0.1843, 0.5268, 0.2469]) -Greedy action tensor([-1.0981, 0.7749, 0.3298, 0.2492]) tensor([0.0644, 0.4192, 0.2686, 0.2478]) -Greedy action tensor([-1.2302, -0.5169, 0.7080, -0.3986]) tensor([0.0814, 0.1661, 0.5655, 0.1870]) -Greedy action tensor([-1.7551, -0.4196, 0.5757, -0.0692]) tensor([0.0488, 0.1856, 0.5021, 0.2635]) -Greedy action tensor([-0.8499, -0.0612, 0.3297, -0.3672]) tensor([0.1238, 0.2725, 0.4029, 0.2007]) -Greedy action tensor([-1.7202, -0.3634, 0.6255, -0.0730]) tensor([0.0487, 0.1893, 0.5089, 0.2531]) -Greedy action tensor([-1.9257, -0.5061, 0.8620, -0.0042]) tensor([0.0354, 0.1466, 0.5758, 0.2422]) -Greedy action tensor([-1.8679, -0.4403, 0.6278, -0.1621]) tensor([0.0438, 0.1828, 0.5319, 0.2414]) -Greedy action tensor([-1.9021, -0.4398, 0.7098, -0.0568]) tensor([0.0396, 0.1708, 0.5392, 0.2505]) -Greedy action tensor([-1.8717, -0.4509, 0.6249, -0.1470]) tensor([0.0437, 0.1809, 0.5304, 0.2451]) -Greedy action tensor([-1.8493, -0.3549, 0.6223, -0.1193]) tensor([0.0436, 0.1943, 0.5162, 0.2459]) -Greedy action tensor([-1.9142, -0.4460, 0.6540, -0.1640]) tensor([0.0414, 0.1798, 0.5403, 0.2384]) -Greedy action tensor([-1.9397, -0.4547, 0.6659, -0.1759]) tensor([0.0403, 0.1781, 0.5462, 0.2354]) -Greedy action tensor([-1.8851, -0.4658, 0.6426, -0.1507]) tensor([0.0429, 0.1772, 0.5370, 0.2429]) -Greedy action tensor([-1.2499, 0.1185, 0.5437, 0.5210]) tensor([0.0595, 0.2336, 0.3575, 0.3494]) -Greedy action tensor([-1.2334, -0.3846, 0.3638, -0.0615]) tensor([0.0869, 0.2031, 0.4293, 0.2806]) -Greedy action tensor([-1.9317, -0.4353, 0.6593, -0.1731]) tensor([0.0406, 0.1814, 0.5421, 0.2358]) -Greedy action tensor([-1.8525, -0.3483, 0.6037, -0.1299]) tensor([0.0439, 0.1977, 0.5123, 0.2460]) -Greedy action tensor([-1.8776, -0.3120, 0.6200, -0.1395]) tensor([0.0423, 0.2026, 0.5144, 0.2407]) -Greedy action tensor([-1.3715, -0.1623, 0.5240, 0.2450]) tensor([0.0623, 0.2089, 0.4149, 0.3139]) -Greedy action tensor([-1.9089, -0.4023, 0.6411, -0.1607]) tensor([0.0416, 0.1875, 0.5322, 0.2387]) -Greedy action tensor([-1.9278, -0.4230, 0.6590, -0.1711]) tensor([0.0407, 0.1832, 0.5405, 0.2357]) -Greedy action tensor([-1.9047, -0.3834, 0.6412, -0.1484]) tensor([0.0415, 0.1898, 0.5287, 0.2401]) -Greedy action tensor([-1.7345, -0.4336, 0.6521, 0.0974]) tensor([0.0459, 0.1685, 0.4991, 0.2866]) -Greedy action tensor([-1.9201, -0.4083, 0.6535, -0.1659]) tensor([0.0409, 0.1857, 0.5368, 0.2366]) -Greedy action tensor([-0.8445, -0.0297, 0.1710, -0.0663]) tensor([0.1220, 0.2756, 0.3368, 0.2657]) -Greedy action tensor([-1.8173, -0.4357, 0.5987, -0.1114]) tensor([0.0461, 0.1836, 0.5164, 0.2539]) -Greedy action tensor([-1.8181, -0.1456, 0.5843, -0.0783]) tensor([0.0433, 0.2308, 0.4789, 0.2469]) -Greedy action tensor([-1.8632, -0.4940, 0.6454, -0.1278]) tensor([0.0437, 0.1718, 0.5368, 0.2477]) -Greedy action tensor([-1.8434, -0.4829, 0.6190, -0.1258]) tensor([0.0450, 0.1756, 0.5285, 0.2509]) -Greedy action tensor([-1.9333, -0.4496, 0.6581, -0.1776]) tensor([0.0407, 0.1796, 0.5438, 0.2358]) -Greedy action tensor([-0.5931, -0.4453, 0.1695, 0.1097]) tensor([0.1582, 0.1834, 0.3391, 0.3194]) -Greedy action tensor([-0.9744, 0.7709, 0.0655, 0.2512]) tensor([0.0771, 0.4419, 0.2182, 0.2628]) -Greedy action tensor([-1.9450, -0.4539, 0.6699, -0.1798]) tensor([0.0401, 0.1780, 0.5477, 0.2342]) -Greedy action tensor([-1.9261, -0.4578, 0.6690, -0.1683]) tensor([0.0407, 0.1769, 0.5460, 0.2363]) -Greedy action tensor([-1.9090, -0.4500, 0.6493, -0.1660]) tensor([0.0418, 0.1798, 0.5396, 0.2388]) -Greedy action tensor([-1.4499, 0.7920, -0.0929, -1.0608]) tensor([0.0634, 0.5967, 0.2463, 0.0936]) -Greedy action tensor([ 0.3563, -1.1755, -0.1391, 0.6717]) tensor([0.3129, 0.0676, 0.1906, 0.4289]) -Greedy action tensor([-0.3101, -0.5800, 0.9782, -1.1781]) tensor([0.1721, 0.1314, 0.6242, 0.0723]) -Greedy action tensor([-0.0846, -0.9314, -0.1798, 0.8479]) tensor([0.2050, 0.0879, 0.1864, 0.5208]) -Greedy action tensor([ 0.3262, -1.9981, -0.3554, 0.1481]) tensor([0.4097, 0.0401, 0.2072, 0.3429]) -Greedy action tensor([-1.3605, 0.1284, -1.2209, -0.1151]) tensor([0.0994, 0.4407, 0.1143, 0.3455]) -Greedy action tensor([-0.6855, 0.4449, -0.7265, -1.2596]) tensor([0.1779, 0.5510, 0.1708, 0.1002]) -Greedy action tensor([ 1.3791, -0.4081, -0.5748, 1.1992]) tensor([0.4663, 0.0781, 0.0661, 0.3895]) -Greedy action tensor([-0.0509, -1.7389, -0.3014, 0.0159]) tensor([0.3298, 0.0610, 0.2567, 0.3526]) -Greedy action tensor([-0.3042, -1.0807, 0.2225, 0.2385]) tensor([0.2052, 0.0944, 0.3474, 0.3530]) -Greedy action tensor([ 0.5811, -0.4018, 1.4337, 0.5028]) tensor([0.2153, 0.0806, 0.5050, 0.1991]) -Greedy action tensor([-1.0084, 1.1296, 0.3678, -1.1250]) tensor([0.0698, 0.5918, 0.2763, 0.0621]) -Greedy action tensor([ 0.8058, -1.0826, 0.4213, -0.8303]) tensor([0.4934, 0.0747, 0.3359, 0.0961]) -Greedy action tensor([-0.5978, -1.2400, -0.2200, -0.0404]) tensor([0.2114, 0.1112, 0.3084, 0.3691]) -Greedy action tensor([ 0.3203, -1.4321, -0.7505, -0.1046]) tensor([0.4608, 0.0799, 0.1579, 0.3013]) -Greedy action tensor([ 0.9333, -0.0422, -0.3173, -0.1559]) tensor([0.5000, 0.1885, 0.1432, 0.1683]) -Greedy action tensor([ 0.5120, -2.4566, -0.0646, 0.1018]) tensor([0.4392, 0.0226, 0.2468, 0.2915]) -Greedy action tensor([1.2200, 0.1625, 1.5131, 0.8639]) tensor([0.2951, 0.1025, 0.3957, 0.2067]) -Greedy action tensor([-0.9210, 0.0812, 0.0878, -0.1223]) tensor([0.1151, 0.3135, 0.3156, 0.2558]) -Greedy action tensor([-0.3054, -1.2524, 0.5755, -0.4018]) tensor([0.2123, 0.0824, 0.5124, 0.1928]) -Greedy action tensor([ 2.0115, 0.0745, -0.5703, 0.5120]) tensor([0.6930, 0.0999, 0.0524, 0.1547]) -Greedy action tensor([ 0.0860, -1.1058, 1.0350, 0.2865]) tensor([0.1957, 0.0594, 0.5056, 0.2392]) -Greedy action tensor([-0.5686, -0.6967, 0.8314, -0.4686]) tensor([0.1420, 0.1250, 0.5760, 0.1570]) -Greedy action tensor([-1.0004, 0.3230, -0.9429, -0.6165]) tensor([0.1373, 0.5157, 0.1454, 0.2016]) -Greedy action tensor([-0.4951, -0.9100, 1.1544, -0.3596]) tensor([0.1248, 0.0824, 0.6498, 0.1430]) -Greedy action tensor([ 0.5025, 0.5003, 1.0297, -0.5518]) tensor([0.2475, 0.2470, 0.4193, 0.0862]) -Greedy action tensor([-0.2788, -1.0570, 0.6070, 0.1994]) tensor([0.1819, 0.0835, 0.4411, 0.2935]) -Greedy action tensor([-0.5731, 0.8275, 0.2208, 0.1457]) tensor([0.1073, 0.4353, 0.2373, 0.2201]) -Greedy action tensor([-0.6329, -1.3036, 1.0285, -0.6627]) tensor([0.1290, 0.0660, 0.6797, 0.1253]) -Greedy action tensor([ 1.0035, -0.0166, 0.1672, -0.4279]) tensor([0.4919, 0.1774, 0.2131, 0.1176]) -Greedy action tensor([-1.7662, -0.5859, -0.8093, -0.0085]) tensor([0.0790, 0.2572, 0.2057, 0.4581]) -Greedy action tensor([ 1.2457, -1.5909, 0.0222, 0.1428]) tensor([0.5936, 0.0348, 0.1746, 0.1970]) -Greedy action tensor([-0.1792, -1.3265, -0.4031, 0.1559]) tensor([0.2845, 0.0903, 0.2274, 0.3977]) -Greedy action tensor([ 0.9105, 0.3788, -0.3065, 0.5901]) tensor([0.3832, 0.2252, 0.1135, 0.2781]) -Greedy action tensor([ 0.1165, 0.4467, 1.3104, -0.5599]) tensor([0.1613, 0.2244, 0.5323, 0.0820]) -Greedy action tensor([-0.1025, 0.0069, -0.5090, 0.3589]) tensor([0.2289, 0.2554, 0.1525, 0.3632]) -Greedy action tensor([-0.6129, -1.2627, 0.4002, -0.1909]) tensor([0.1724, 0.0900, 0.4747, 0.2629]) -Greedy action tensor([ 0.0977, -1.9497, 0.3854, 0.7408]) tensor([0.2291, 0.0296, 0.3055, 0.4359]) -Greedy action tensor([ 0.3367, -1.6217, -0.0994, -0.0722]) tensor([0.4078, 0.0575, 0.2637, 0.2710]) -Greedy action tensor([ 0.4098, 0.1431, -0.7149, -0.5598]) tensor([0.4049, 0.3101, 0.1315, 0.1535]) -Greedy action tensor([-0.7742, 0.3037, 1.2810, 0.3377]) tensor([0.0676, 0.1987, 0.5281, 0.2056]) -Greedy action tensor([-0.8047, -0.6843, -0.2521, 0.9809]) tensor([0.1017, 0.1148, 0.1768, 0.6067]) -Greedy action tensor([ 0.0569, -0.7452, -0.1300, -0.8638]) tensor([0.3737, 0.1675, 0.3100, 0.1488]) -Greedy action tensor([ 0.7666, -0.2256, 0.2000, 1.0648]) tensor([0.3044, 0.1128, 0.1727, 0.4101]) -Greedy action tensor([-0.3423, -1.5404, 0.4653, 0.5791]) tensor([0.1651, 0.0498, 0.3702, 0.4149]) -Greedy action tensor([-0.4029, -0.5981, 0.2899, 0.1633]) tensor([0.1791, 0.1473, 0.3581, 0.3155]) -Greedy action tensor([ 0.8794, -0.3181, -0.4549, 0.2607]) tensor([0.4753, 0.1435, 0.1252, 0.2560]) -Greedy action tensor([ 0.3649, -0.6588, -0.1604, 0.3128]) tensor([0.3449, 0.1239, 0.2039, 0.3273]) -Greedy action tensor([-0.1835, -0.5066, 0.5185, 0.0593]) tensor([0.1993, 0.1443, 0.4022, 0.2541]) -Greedy action tensor([ 0.3549, -0.3402, 0.6366, 0.4623]) tensor([0.2539, 0.1267, 0.3366, 0.2827]) -Greedy action tensor([ 0.3705, -0.5467, 0.2899, 1.7012]) tensor([0.1638, 0.0655, 0.1511, 0.6197]) -Greedy action tensor([ 1.4663, -1.2360, 0.7373, 0.4568]) tensor([0.5225, 0.0350, 0.2521, 0.1904]) -Greedy action tensor([0.9826, 0.3774, 0.7741, 0.0354]) tensor([0.3642, 0.1989, 0.2957, 0.1413]) -Greedy action tensor([-1.4264, -0.4482, -1.3779, -0.0375]) tensor([0.1147, 0.3050, 0.1204, 0.4599]) -Greedy action tensor([-0.2224, -0.0930, 0.9870, 0.0094]) tensor([0.1481, 0.1686, 0.4965, 0.1868]) -Greedy action tensor([ 1.3396, -0.3742, -0.2606, 1.0618]) tensor([0.4674, 0.0842, 0.0943, 0.3540]) -Greedy action tensor([ 1.0740, -0.4497, 0.6410, 0.4132]) tensor([0.4197, 0.0914, 0.2722, 0.2167]) -Greedy action tensor([ 0.3199, -0.6793, -0.1165, -0.3978]) tensor([0.3996, 0.1471, 0.2583, 0.1950]) -Greedy action tensor([ 1.5983, 1.1282, -0.4023, 0.9594]) tensor([0.4371, 0.2731, 0.0591, 0.2307]) -Greedy action tensor([-0.5927, 0.6650, 0.6351, -0.6856]) tensor([0.1131, 0.3978, 0.3861, 0.1031]) -Greedy action tensor([-0.8133, -0.6812, -0.9005, 0.1963]) tensor([0.1723, 0.1967, 0.1580, 0.4730]) -Greedy action tensor([ 1.1294, -0.7398, 1.2206, 0.4783]) tensor([0.3608, 0.0557, 0.3953, 0.1882]) -Greedy action tensor([ 0.3688, -0.9591, 0.3645, 0.6906]) tensor([0.2747, 0.0728, 0.2735, 0.3790]) -Greedy action tensor([ 0.6525, -0.3351, 0.0294, -0.6196]) tensor([0.4568, 0.1702, 0.2450, 0.1280]) -Greedy action tensor([ 0.1681, -1.8123, 0.1153, 0.4590]) tensor([0.2920, 0.0403, 0.2770, 0.3906]) -Greedy action tensor([-0.9444, 0.0725, -0.2798, -1.2601]) tensor([0.1553, 0.4294, 0.3019, 0.1133]) -Greedy action tensor([ 1.3811, -0.6250, 0.7517, 0.5152]) tensor([0.4789, 0.0644, 0.2552, 0.2015]) -Greedy action tensor([ 0.2710, -1.2649, 0.0340, 0.3555]) tensor([0.3234, 0.0696, 0.2551, 0.3519]) -Greedy action tensor([-0.2712, -0.6227, -0.2830, -0.6449]) tensor([0.2959, 0.2082, 0.2924, 0.2036]) -Greedy action tensor([-0.8145, -0.6193, 1.0879, -0.2768]) tensor([0.0941, 0.1144, 0.6305, 0.1611]) -Greedy action tensor([ 0.8781, -2.3128, -0.0437, 0.3275]) tensor([0.4961, 0.0204, 0.1974, 0.2861]) -Greedy action tensor([ 1.7570, -1.0455, -0.1940, 1.1005]) tensor([0.5809, 0.0352, 0.0826, 0.3013]) -Greedy action tensor([-0.4807, 0.9721, 0.3107, -0.7415]) tensor([0.1212, 0.5181, 0.2674, 0.0934]) -Greedy action tensor([-0.1692, -1.1401, -0.7875, 0.0269]) tensor([0.3190, 0.1208, 0.1719, 0.3882]) -Greedy action tensor([ 0.5773, -1.1445, -1.7836, 0.1909]) tensor([0.5121, 0.0915, 0.0483, 0.3480]) -Greedy action tensor([-0.5240, -0.4405, -0.5589, -1.1689]) tensor([0.2795, 0.3039, 0.2700, 0.1467]) -Greedy action tensor([ 0.0666, -1.1050, -0.9212, 0.4415]) tensor([0.3188, 0.0988, 0.1187, 0.4638]) -Greedy action tensor([ 0.9083, -0.6370, 0.5190, 0.9377]) tensor([0.3424, 0.0730, 0.2320, 0.3526]) -Greedy action tensor([-0.3187, -1.1249, -0.2560, 0.0133]) tensor([0.2561, 0.1144, 0.2727, 0.3569]) -Greedy action tensor([-0.2527, -0.0931, -0.4791, 0.6205]) tensor([0.1864, 0.2186, 0.1486, 0.4463]) -Greedy action tensor([-0.5413, -0.2787, -1.1931, -0.1467]) tensor([0.2323, 0.3020, 0.1210, 0.3446]) -Greedy action tensor([-0.7759, -0.3533, -0.9541, -0.1404]) tensor([0.1905, 0.2906, 0.1594, 0.3596]) -Greedy action tensor([ 1.5753, -0.6771, -0.4785, 0.7268]) tensor([0.6019, 0.0633, 0.0772, 0.2576]) -Greedy action tensor([ 0.4376, -0.4694, -0.2635, 0.3369]) tensor([0.3566, 0.1440, 0.1769, 0.3225]) -Greedy action tensor([ 0.9539, 0.2024, 0.0318, -0.3186]) tensor([0.4652, 0.2194, 0.1850, 0.1303]) -Greedy action tensor([ 1.2976, -0.4055, -0.3890, -0.2355]) tensor([0.6317, 0.1150, 0.1170, 0.1364]) -Greedy action tensor([ 1.5918, -0.4690, -0.6172, 0.8099]) tensor([0.5901, 0.0751, 0.0648, 0.2700]) -Greedy action tensor([ 1.9376, -0.5650, -0.2899, 0.9118]) tensor([0.6459, 0.0529, 0.0696, 0.2316]) -Greedy action tensor([ 1.7193, -0.3527, -0.3451, 0.5967]) tensor([0.6336, 0.0798, 0.0804, 0.2062]) -Greedy action tensor([ 1.1509, -0.0535, -0.6712, 0.2639]) tensor([0.5338, 0.1601, 0.0863, 0.2199]) -Greedy action tensor([ 1.6461, -0.4961, -0.2501, 0.3811]) tensor([0.6452, 0.0757, 0.0969, 0.1821]) -Greedy action tensor([ 1.0316, -0.3779, -0.4917, 0.2866]) tensor([0.5163, 0.1261, 0.1125, 0.2451]) -Greedy action tensor([ 1.4331, -0.5136, -0.1154, 0.1717]) tensor([0.6103, 0.0871, 0.1297, 0.1729]) -Greedy action tensor([2.6278, 0.4346, 0.3909, 0.1283]) tensor([0.7690, 0.0858, 0.0821, 0.0631]) -Greedy action tensor([ 2.0401, -0.0605, -0.5562, 0.2860]) tensor([0.7299, 0.0893, 0.0544, 0.1263]) -Greedy action tensor([ 2.1276, -0.2511, -0.2236, 0.5109]) tensor([0.7212, 0.0668, 0.0687, 0.1432]) -Greedy action tensor([ 2.3955, -0.7522, 0.2346, 0.8243]) tensor([0.7321, 0.0314, 0.0844, 0.1521]) -Greedy action tensor([ 1.7202, -0.8042, -0.2512, 0.5909]) tensor([0.6483, 0.0519, 0.0903, 0.2096]) -Greedy action tensor([ 2.3004, -1.0742, -0.3580, 0.9839]) tensor([0.7287, 0.0249, 0.0510, 0.1953]) -Greedy action tensor([ 1.3706, -0.1556, -0.8130, 0.5135]) tensor([0.5700, 0.1239, 0.0642, 0.2419]) -Greedy action tensor([ 1.2918, -0.3401, -0.7590, 0.1333]) tensor([0.6104, 0.1194, 0.0785, 0.1917]) -Greedy action tensor([ 1.6939, -0.7107, -0.0290, -0.0322]) tensor([0.6912, 0.0624, 0.1234, 0.1230]) -Greedy action tensor([ 1.1663, 0.2743, -0.1676, -0.2076]) tensor([0.5191, 0.2127, 0.1368, 0.1314]) -Greedy action tensor([ 0.8149, -0.0498, -0.3790, 0.3173]) tensor([0.4288, 0.1806, 0.1299, 0.2607]) -Greedy action tensor([ 1.3759, -0.5586, -0.3851, 0.5670]) tensor([0.5676, 0.0820, 0.0976, 0.2528]) -Greedy action tensor([ 1.4711, -0.6774, -0.2950, 0.2867]) tensor([0.6275, 0.0732, 0.1073, 0.1920]) -Greedy action tensor([ 0.8637, -0.2693, -0.0718, -0.0808]) tensor([0.4754, 0.1531, 0.1866, 0.1849]) -Greedy action tensor([ 2.6761, -1.1902, 0.0328, 0.6013]) tensor([0.8212, 0.0172, 0.0584, 0.1031]) -Greedy action tensor([ 1.7301, -0.3261, -0.8557, 0.6960]) tensor([0.6415, 0.0821, 0.0483, 0.2281]) -Greedy action tensor([ 1.2281, -0.2389, -0.5498, 0.3282]) tensor([0.5536, 0.1277, 0.0936, 0.2251]) -Greedy action tensor([ 1.4976, -0.3659, -0.1921, 0.4394]) tensor([0.5928, 0.0920, 0.1094, 0.2058]) -Greedy action tensor([ 0.9321, -0.1754, -0.6800, 0.7320]) tensor([0.4258, 0.1407, 0.0849, 0.3486]) -Greedy action tensor([ 1.4992, -0.6338, -0.3588, 0.6219]) tensor([0.5916, 0.0701, 0.0923, 0.2460]) -Greedy action tensor([ 1.5483, -0.7348, -0.5183, 0.2791]) tensor([0.6624, 0.0675, 0.0839, 0.1862]) -Greedy action tensor([ 1.6653, -1.0166, -0.2944, -0.0629]) tensor([0.7210, 0.0493, 0.1016, 0.1281]) -Greedy action tensor([ 1.3625, -0.3158, -0.4466, 0.2420]) tensor([0.5964, 0.1114, 0.0977, 0.1945]) -Greedy action tensor([ 1.7672, -0.8358, -0.1907, 0.2548]) tensor([0.6966, 0.0516, 0.0983, 0.1535]) -Greedy action tensor([ 1.5385, -0.3879, -0.4216, 0.4668]) tensor([0.6139, 0.0894, 0.0865, 0.2102]) -Greedy action tensor([ 1.7474, -0.5156, -0.2441, 0.0705]) tensor([0.7005, 0.0729, 0.0956, 0.1310]) -Greedy action tensor([ 1.0575, -0.7012, -0.3608, 0.6453]) tensor([0.4816, 0.0830, 0.1166, 0.3189]) -Greedy action tensor([ 1.0588, -0.4861, -0.1051, 0.2772]) tensor([0.5042, 0.1076, 0.1574, 0.2308]) -Greedy action tensor([ 1.6466, -0.4597, -0.1075, 0.1828]) tensor([0.6553, 0.0797, 0.1134, 0.1516]) -Greedy action tensor([ 1.2328, -0.3699, -0.2116, 0.1998]) tensor([0.5577, 0.1123, 0.1316, 0.1985]) -Greedy action tensor([ 2.0613, -0.4505, -0.2164, 0.0874]) tensor([0.7561, 0.0613, 0.0775, 0.1050]) -Greedy action tensor([ 1.1809, 0.1166, -1.0176, 0.0093]) tensor([0.5663, 0.1954, 0.0628, 0.1755]) -Greedy action tensor([ 1.7560, -0.2044, -0.2480, 0.4184]) tensor([0.6502, 0.0915, 0.0876, 0.1706]) -Greedy action tensor([ 1.1588, -0.5065, 0.1405, -0.0771]) tensor([0.5432, 0.1027, 0.1962, 0.1578]) -Greedy action tensor([ 1.5227, -0.6791, -0.1049, 0.2237]) tensor([0.6330, 0.0700, 0.1243, 0.1727]) -Greedy action tensor([ 1.6421, -0.5038, -0.6514, 0.3603]) tensor([0.6687, 0.0782, 0.0675, 0.1856]) -Greedy action tensor([ 0.9027, -0.2123, -0.1851, -0.1796]) tensor([0.4991, 0.1637, 0.1682, 0.1691]) -Greedy action tensor([ 2.0943, -0.7195, -0.5820, 0.7788]) tensor([0.7157, 0.0429, 0.0493, 0.1921]) -Greedy action tensor([ 1.6267, -0.3767, -0.4045, 0.4153]) tensor([0.6394, 0.0862, 0.0839, 0.1904]) -Greedy action tensor([ 1.8866, -0.4354, -0.2268, 0.6205]) tensor([0.6663, 0.0653, 0.0805, 0.1878]) -Greedy action tensor([ 1.2825, -0.2736, -0.8008, 0.5805]) tensor([0.5461, 0.1152, 0.0680, 0.2707]) -Greedy action tensor([ 0.8168, -0.3820, -0.0693, 0.2650]) tensor([0.4368, 0.1317, 0.1800, 0.2515]) -Greedy action tensor([ 2.0632, -0.6166, -0.3451, 0.9127]) tensor([0.6779, 0.0465, 0.0610, 0.2146]) -Greedy action tensor([ 1.2926, -0.5426, -0.3627, -0.1682]) tensor([0.6318, 0.1008, 0.1207, 0.1466]) -Greedy action tensor([ 1.5339, -0.2766, -0.3891, 0.2417]) tensor([0.6311, 0.1032, 0.0923, 0.1734]) -Greedy action tensor([ 1.4117, -0.2950, -0.3820, 0.4868]) tensor([0.5733, 0.1040, 0.0954, 0.2274]) -Greedy action tensor([ 1.3114, -0.2503, -0.1460, -0.0403]) tensor([0.5878, 0.1233, 0.1368, 0.1521]) -Greedy action tensor([ 1.2226, -0.4614, 0.0546, 0.0614]) tensor([0.5526, 0.1026, 0.1718, 0.1730]) -Greedy action tensor([ 1.0569, -0.4833, -0.3309, 0.2630]) tensor([0.5219, 0.1119, 0.1303, 0.2359]) -Greedy action tensor([ 1.5728, -0.1212, -0.7319, 0.8465]) tensor([0.5659, 0.1040, 0.0565, 0.2737]) -Greedy action tensor([ 1.3490, -0.3576, -0.0557, 0.1287]) tensor([0.5807, 0.1054, 0.1425, 0.1714]) -Greedy action tensor([ 1.4816, -0.4042, -0.3175, 0.2804]) tensor([0.6180, 0.0938, 0.1023, 0.1859]) -Greedy action tensor([ 0.9581, -0.3184, -0.4389, 0.4045]) tensor([0.4759, 0.1328, 0.1177, 0.2736]) -Greedy action tensor([ 1.2855, -0.3758, -0.3384, 0.2960]) tensor([0.5686, 0.1080, 0.1121, 0.2114]) -Greedy action tensor([ 1.4024, -0.2631, -0.3730, 0.3608]) tensor([0.5843, 0.1105, 0.0990, 0.2062]) -Greedy action tensor([ 2.1195, -0.6041, -0.2791, 0.2652]) tensor([0.7616, 0.0500, 0.0692, 0.1192]) -Greedy action tensor([ 0.8905, -0.0331, 0.3899, -0.0555]) tensor([0.4181, 0.1660, 0.2535, 0.1624]) -Greedy action tensor([ 1.8736, -0.3931, -0.7908, 0.2019]) tensor([0.7346, 0.0761, 0.0512, 0.1381]) -Greedy action tensor([ 1.5970, -0.2135, -0.5232, 0.1685]) tensor([0.6565, 0.1074, 0.0788, 0.1573]) -Greedy action tensor([ 1.4954, -0.2543, -0.6869, 0.2352]) tensor([0.6369, 0.1107, 0.0718, 0.1806]) -Greedy action tensor([ 1.7265, -0.4556, -0.3573, 0.2738]) tensor([0.6797, 0.0767, 0.0846, 0.1590]) -Greedy action tensor([ 0.8672, -0.1738, -0.0930, -0.0861]) tensor([0.4714, 0.1664, 0.1805, 0.1817]) -Greedy action tensor([ 2.1606, -0.4809, -0.7019, 0.6394]) tensor([0.7425, 0.0529, 0.0424, 0.1622]) -Greedy action tensor([ 1.5406, -0.6922, -0.3675, 0.2037]) tensor([0.6587, 0.0706, 0.0977, 0.1730]) -Greedy action tensor([ 2.1553, -1.1622, -0.0683, 0.5144]) tensor([0.7472, 0.0271, 0.0809, 0.1448]) -Greedy action tensor([ 1.4426, -0.5922, -0.4810, 0.0973]) tensor([0.6505, 0.0850, 0.0950, 0.1694]) -Greedy action tensor([ 2.0446, -0.6440, -0.2768, 0.5673]) tensor([0.7172, 0.0488, 0.0704, 0.1637]) -Greedy action tensor([ 1.5952, -0.1984, -0.5387, 0.2029]) tensor([0.6522, 0.1085, 0.0772, 0.1621]) -Greedy action tensor([ 1.2749, -0.2741, -0.5746, 0.2391]) tensor([0.5798, 0.1232, 0.0912, 0.2058]) -Greedy action tensor([ 1.6419, -0.1641, -0.4086, 0.1905]) tensor([0.6548, 0.1076, 0.0843, 0.1534]) -Greedy action tensor([ 0.8437, -0.4880, -0.0246, -0.3609]) tensor([0.5042, 0.1331, 0.2116, 0.1512]) -Greedy action tensor([ 0.6886, -0.3831, -0.1005, -0.4278]) tensor([0.4708, 0.1612, 0.2139, 0.1542]) -Greedy action tensor([ 0.6322, 0.1995, -0.2033, -0.0248]) tensor([0.3845, 0.2494, 0.1667, 0.1993]) -Greedy action tensor([ 0.7785, -0.6176, -0.0419, -0.4621]) tensor([0.5058, 0.1252, 0.2227, 0.1463]) -Greedy action tensor([ 0.7027, -0.5312, -0.1020, -0.2711]) tensor([0.4726, 0.1376, 0.2113, 0.1785]) -Greedy action tensor([ 0.7654, -0.4913, -0.0518, -0.4353]) tensor([0.4933, 0.1404, 0.2179, 0.1485]) -Greedy action tensor([ 0.7415, -0.5960, -0.0259, -0.0699]) tensor([0.4606, 0.1209, 0.2138, 0.2046]) -Greedy action tensor([ 0.9501, -0.6808, -0.1333, -0.6380]) tensor([0.5752, 0.1126, 0.1947, 0.1175]) -Greedy action tensor([ 0.9167, -0.5284, -0.1163, -0.3112]) tensor([0.5306, 0.1251, 0.1889, 0.1554]) -Greedy action tensor([ 0.7627, -0.4620, -0.0182, -0.2689]) tensor([0.4743, 0.1394, 0.2172, 0.1691]) -Greedy action tensor([ 0.3257, 0.0519, -0.0610, 0.0440]) tensor([0.3131, 0.2381, 0.2127, 0.2362]) -Greedy action tensor([ 0.7069, -0.5465, -0.0555, -0.3601]) tensor([0.4771, 0.1362, 0.2226, 0.1641]) -Greedy action tensor([ 0.8733, -0.9428, 0.0426, -0.5749]) tensor([0.5454, 0.0887, 0.2377, 0.1282]) -Greedy action tensor([ 0.7485, -0.1924, -0.0488, -0.0470]) tensor([0.4363, 0.1703, 0.1966, 0.1969]) -Greedy action tensor([ 0.7549, -0.1809, -0.2204, -0.2950]) tensor([0.4718, 0.1851, 0.1779, 0.1651]) -Greedy action tensor([ 0.7535, -0.2527, 0.0407, -0.3298]) tensor([0.4557, 0.1666, 0.2234, 0.1542]) -Greedy action tensor([ 0.4124, 0.0161, -0.0558, -0.1992]) tensor([0.3519, 0.2368, 0.2204, 0.1909]) -Greedy action tensor([ 0.4976, -0.2172, 0.0991, -0.1000]) tensor([0.3689, 0.1805, 0.2476, 0.2030]) -Greedy action tensor([ 0.5699, -0.2477, 0.1030, -0.2985]) tensor([0.4019, 0.1774, 0.2520, 0.1686]) -Greedy action tensor([ 0.8181, -0.4335, -0.0192, -0.3026]) tensor([0.4890, 0.1399, 0.2117, 0.1594]) -Greedy action tensor([ 0.6199, -0.4947, 0.1221, -0.3239]) tensor([0.4301, 0.1411, 0.2614, 0.1674]) -Greedy action tensor([ 0.7410, 0.0553, -0.1893, 0.1603]) tensor([0.4069, 0.2050, 0.1605, 0.2277]) -Greedy action tensor([ 1.3582, -0.5960, -0.2038, -0.3633]) tensor([0.6535, 0.0926, 0.1371, 0.1168]) -Greedy action tensor([ 0.8974, -0.5071, 0.1390, -0.3079]) tensor([0.4966, 0.1219, 0.2326, 0.1488]) -Greedy action tensor([ 0.9263, -0.5431, -0.1299, -0.2763]) tensor([0.5324, 0.1225, 0.1852, 0.1599]) -Greedy action tensor([ 0.3090, -0.4531, -0.0137, -0.5932]) tensor([0.3851, 0.1797, 0.2789, 0.1562]) -Greedy action tensor([ 0.7142, 0.3128, -0.0708, -0.1278]) tensor([0.3912, 0.2619, 0.1784, 0.1685]) -Greedy action tensor([ 0.5534, -0.3630, -0.1346, -0.0725]) tensor([0.4103, 0.1641, 0.2062, 0.2194]) -Greedy action tensor([ 0.4651, -0.7676, -0.0958, -0.3380]) tensor([0.4329, 0.1262, 0.2471, 0.1939]) -Greedy action tensor([ 7.9452e-01, -3.2869e-01, -1.6195e-04, -5.1409e-01]) tensor([0.4885, 0.1589, 0.2207, 0.1320]) -Greedy action tensor([ 0.8055, 0.1590, -0.4331, -0.3249]) tensor([0.4680, 0.2452, 0.1356, 0.1511]) -Greedy action tensor([ 0.5595, -0.4444, -0.0478, -0.2549]) tensor([0.4248, 0.1557, 0.2314, 0.1881]) -Greedy action tensor([ 0.3642, -0.1560, -0.0034, -0.2863]) tensor([0.3561, 0.2116, 0.2465, 0.1858]) -Greedy action tensor([ 0.7935, -0.4896, 0.0396, -0.1116]) tensor([0.4646, 0.1288, 0.2186, 0.1879]) -Greedy action tensor([ 0.6331, -0.4144, -0.0924, -0.5636]) tensor([0.4679, 0.1642, 0.2265, 0.1414]) -Greedy action tensor([ 0.5646, -0.3351, -0.0696, -0.2793]) tensor([0.4225, 0.1718, 0.2240, 0.1817]) -Greedy action tensor([ 0.7234, -0.7746, -0.1547, -0.2475]) tensor([0.4956, 0.1108, 0.2059, 0.1877]) -Greedy action tensor([ 1.1451, -0.6531, -0.0526, -0.2164]) tensor([0.5801, 0.0961, 0.1751, 0.1487]) -Greedy action tensor([ 0.4369, -0.4790, -0.1069, -0.0867]) tensor([0.3886, 0.1555, 0.2256, 0.2302]) -Greedy action tensor([ 0.7758, -0.3374, -0.0487, -0.0998]) tensor([0.4580, 0.1504, 0.2008, 0.1908]) -Greedy action tensor([ 0.8221, -0.5173, -0.1498, -0.4847]) tensor([0.5233, 0.1371, 0.1980, 0.1416]) -Greedy action tensor([ 1.1450, -0.2735, -0.1777, -0.5726]) tensor([0.5924, 0.1434, 0.1578, 0.1063]) -Greedy action tensor([ 1.1985, -0.6384, -0.1007, -0.5600]) tensor([0.6233, 0.0993, 0.1700, 0.1074]) -Greedy action tensor([ 0.9123, -0.2316, -0.1018, -0.2959]) tensor([0.5050, 0.1609, 0.1832, 0.1509]) -Greedy action tensor([ 0.7863, -0.3306, -0.1258, -0.5629]) tensor([0.5029, 0.1646, 0.2020, 0.1305]) -Greedy action tensor([ 0.4734, 0.2074, -0.1552, -0.0421]) tensor([0.3452, 0.2646, 0.1841, 0.2061]) -Greedy action tensor([ 0.7767, -0.2183, -0.0987, -0.1399]) tensor([0.4574, 0.1691, 0.1906, 0.1829]) -Greedy action tensor([ 0.8286, -0.7403, 0.2420, -0.2936]) tensor([0.4785, 0.0996, 0.2661, 0.1558]) -Greedy action tensor([ 0.5325, -0.1784, -0.0696, 0.0151]) tensor([0.3795, 0.1864, 0.2078, 0.2262]) -Greedy action tensor([ 0.5348, -0.0154, -0.0458, 0.0243]) tensor([0.3654, 0.2108, 0.2045, 0.2193]) -Greedy action tensor([ 0.2722, 0.1105, -0.1505, -0.1860]) tensor([0.3186, 0.2711, 0.2088, 0.2015]) -Greedy action tensor([ 1.0213, -0.6145, -0.0310, -0.3328]) tensor([0.5549, 0.1081, 0.1937, 0.1433]) -Greedy action tensor([ 0.7801, -0.4380, -0.0802, -0.0301]) tensor([0.4622, 0.1367, 0.1955, 0.2056]) -Greedy action tensor([ 0.2165, 0.0243, -0.1218, -0.1541]) tensor([0.3097, 0.2556, 0.2208, 0.2138]) -Greedy action tensor([ 0.9402, -0.8601, -0.0090, -0.2496]) tensor([0.5386, 0.0890, 0.2085, 0.1639]) -Greedy action tensor([ 0.7228, -0.1254, -0.1910, -0.1428]) tensor([0.4444, 0.1903, 0.1782, 0.1870]) -Greedy action tensor([ 0.9271, -0.3831, -0.1190, -0.3802]) tensor([0.5287, 0.1426, 0.1857, 0.1430]) -Greedy action tensor([ 0.7632, -0.6323, -0.0955, -0.4060]) tensor([0.5045, 0.1250, 0.2138, 0.1567]) -Greedy action tensor([ 0.5471, -0.0755, -0.1095, -0.0256]) tensor([0.3818, 0.2049, 0.1980, 0.2153]) -Greedy action tensor([ 0.5475, -0.4092, -0.0904, -0.2172]) tensor([0.4205, 0.1615, 0.2222, 0.1957]) -Greedy action tensor([ 0.5367, -0.0571, 0.0599, -0.0439]) tensor([0.3660, 0.2021, 0.2272, 0.2048]) -Greedy action tensor([ 0.4435, 0.0014, -0.0192, -0.0132]) tensor([0.3441, 0.2212, 0.2167, 0.2180]) -Greedy action tensor([ 0.8456, -0.4802, 0.0578, -0.3985]) tensor([0.4979, 0.1322, 0.2264, 0.1435]) -Greedy action tensor([ 0.9107, -0.6825, -0.0853, -0.3618]) tensor([0.5397, 0.1097, 0.1994, 0.1512]) -Greedy action tensor([ 1.0187, -0.6928, -0.2294, -0.4019]) tensor([0.5851, 0.1057, 0.1679, 0.1413]) -Greedy action tensor([ 1.0231, -0.5447, 0.1091, -0.3099]) tensor([0.5339, 0.1113, 0.2140, 0.1408]) -Greedy action tensor([ 0.2017, 0.0839, -0.0039, -0.1013]) tensor([0.2906, 0.2583, 0.2366, 0.2146]) -Greedy action tensor([ 0.6822, -0.5125, 0.0105, -0.3083]) tensor([0.4577, 0.1386, 0.2338, 0.1700]) -Greedy action tensor([ 0.6868, -0.0887, -0.0939, 0.0446]) tensor([0.4091, 0.1884, 0.1874, 0.2152]) -Greedy action tensor([ 0.5868, -0.1000, 0.0240, -0.2393]) tensor([0.3983, 0.2004, 0.2269, 0.1744]) -Greedy action tensor([ 0.6600, -0.3394, -0.0174, -0.2813]) tensor([0.4413, 0.1624, 0.2241, 0.1722]) -Greedy action tensor([ 0.6000, -0.2859, 0.0943, -0.0968]) tensor([0.3978, 0.1640, 0.2399, 0.1982]) -Greedy action tensor([ 0.7363, -0.2303, 0.0701, -0.2602]) tensor([0.4419, 0.1681, 0.2270, 0.1631]) -Greedy action tensor([ 0.6448, -0.5248, -0.1049, -0.2204]) tensor([0.4537, 0.1409, 0.2144, 0.1910]) -Greedy action tensor([ 0.5936, -0.1051, -0.1531, -0.1472]) tensor([0.4085, 0.2031, 0.1936, 0.1948]) -Greedy action tensor([ 0.6361, -0.3406, -0.1908, -0.2962]) tensor([0.4530, 0.1706, 0.1981, 0.1783]) -Greedy action tensor([ 0.8010, -0.5679, -0.0710, -0.2497]) tensor([0.4945, 0.1258, 0.2068, 0.1729]) -Greedy action tensor([ 0.6264, -0.2050, -0.0575, -0.2532]) tensor([0.4246, 0.1849, 0.2143, 0.1762]) -Greedy action tensor([ 0.3124, 0.0367, 0.0437, -0.1793]) tensor([0.3190, 0.2421, 0.2438, 0.1951]) -Greedy action tensor([ 0.3935, -0.2658, -0.0563, -0.3012]) tensor([0.3768, 0.1949, 0.2403, 0.1881]) -Greedy action tensor([ 0.5280, -0.4360, -0.1098, -0.3551]) tensor([0.4304, 0.1642, 0.2275, 0.1780]) -Greedy action tensor([-1.3934, 0.4913, 0.3299, -0.0939]) tensor([0.0593, 0.3907, 0.3324, 0.2176]) -Greedy action tensor([-1.7384, -0.4666, 0.5895, -0.0347]) tensor([0.0492, 0.1756, 0.5048, 0.2704]) -Greedy action tensor([-1.8835, -0.4394, 0.6447, -0.1356]) tensor([0.0425, 0.1803, 0.5330, 0.2442]) -Greedy action tensor([0.2003, 1.0733, 0.0016, 0.5936]) tensor([0.1756, 0.4203, 0.1439, 0.2602]) -Greedy action tensor([-1.7783, -0.4528, 0.5942, -0.0875]) tensor([0.0478, 0.1800, 0.5128, 0.2594]) -Greedy action tensor([-1.5567, -0.2503, 0.5062, 0.0234]) tensor([0.0574, 0.2120, 0.4518, 0.2788]) -Greedy action tensor([-1.7975, -0.2558, 0.6244, -0.0197]) tensor([0.0438, 0.2044, 0.4930, 0.2589]) -Greedy action tensor([-1.1128, -0.8040, 0.5463, 1.0707]) tensor([0.0606, 0.0826, 0.3186, 0.5382]) -Greedy action tensor([-1.4955, -0.5704, 0.4619, -0.0430]) tensor([0.0672, 0.1695, 0.4760, 0.2873]) -Greedy action tensor([-1.1250, -0.6198, 0.2536, 0.3126]) tensor([0.0923, 0.1529, 0.3663, 0.3885]) -Greedy action tensor([-1.9230, -0.4528, 0.6534, -0.1696]) tensor([0.0412, 0.1792, 0.5417, 0.2379]) -Greedy action tensor([-1.8374, -0.4402, 0.6179, -0.1244]) tensor([0.0450, 0.1818, 0.5238, 0.2494]) -Greedy action tensor([-0.7546, -0.4209, 0.1285, 0.5348]) tensor([0.1184, 0.1653, 0.2864, 0.4299]) -Greedy action tensor([-1.9375, -0.4563, 0.6682, -0.1764]) tensor([0.0404, 0.1777, 0.5469, 0.2350]) -Greedy action tensor([-1.5396, -0.5682, 0.4356, 0.0887]) tensor([0.0627, 0.1657, 0.4521, 0.3195]) -Greedy action tensor([-1.9133, -0.3930, 0.6483, -0.1622]) tensor([0.0412, 0.1883, 0.5334, 0.2372]) -Greedy action tensor([-1.8106, -0.4704, 0.5851, -0.0918]) tensor([0.0468, 0.1787, 0.5135, 0.2610]) -Greedy action tensor([-1.7942, -0.3693, 0.5981, -0.0842]) tensor([0.0462, 0.1923, 0.5058, 0.2557]) -Greedy action tensor([-1.9246, -0.4446, 0.6563, -0.1718]) tensor([0.0410, 0.1802, 0.5420, 0.2368]) -Greedy action tensor([-1.2812, -0.0685, 0.5088, 0.2946]) tensor([0.0658, 0.2214, 0.3944, 0.3184]) -Greedy action tensor([-1.9022, -0.4633, 0.6866, -0.1166]) tensor([0.0408, 0.1721, 0.5436, 0.2435]) -Greedy action tensor([-1.2959, -0.5823, 0.3090, 0.3756]) tensor([0.0750, 0.1530, 0.3731, 0.3988]) -Greedy action tensor([-0.0198, 0.2341, 0.5385, 1.2564]) tensor([0.1312, 0.1692, 0.2294, 0.4702]) -Greedy action tensor([-1.6952, -0.4034, 0.5523, -0.1310]) tensor([0.0530, 0.1927, 0.5012, 0.2531]) -Greedy action tensor([-1.3917, 0.5474, 0.3162, -0.0505]) tensor([0.0578, 0.4020, 0.3190, 0.2211]) -Greedy action tensor([-1.8859, -0.4365, 0.6493, -0.1227]) tensor([0.0422, 0.1797, 0.5322, 0.2459]) -Greedy action tensor([-1.6747, -0.5145, 0.6063, 0.0336]) tensor([0.0513, 0.1636, 0.5020, 0.2831]) -Greedy action tensor([-1.8126, -0.3220, 0.6127, -0.0935]) tensor([0.0448, 0.1989, 0.5064, 0.2499]) -Greedy action tensor([-0.5431, -0.1974, 0.8810, 1.5567]) tensor([0.0679, 0.0959, 0.2820, 0.5542]) -Greedy action tensor([-1.9105, -0.4419, 0.6477, -0.1610]) tensor([0.0417, 0.1809, 0.5379, 0.2396]) -Greedy action tensor([-1.8988, -0.3501, 0.6332, -0.1564]) tensor([0.0417, 0.1961, 0.5242, 0.2380]) -Greedy action tensor([-1.9298, -0.4963, 0.2445, -0.3670]) tensor([0.0533, 0.2235, 0.4688, 0.2544]) -Greedy action tensor([-1.9365, -0.4454, 0.6666, -0.1752]) tensor([0.0404, 0.1793, 0.5453, 0.2350]) -Greedy action tensor([-1.9076, -0.3701, 0.6391, -0.1542]) tensor([0.0413, 0.1923, 0.5276, 0.2387]) -Greedy action tensor([-1.7350, -0.3324, 0.5320, -0.0687]) tensor([0.0500, 0.2032, 0.4823, 0.2645]) -Greedy action tensor([-1.3215, 0.5396, 0.2864, -0.0056]) tensor([0.0619, 0.3982, 0.3091, 0.2308]) -Greedy action tensor([-1.7217, -0.4219, 0.5444, -0.0667]) tensor([0.0512, 0.1877, 0.4933, 0.2678]) -Greedy action tensor([-1.8626, -0.2549, 0.6057, -0.1170]) tensor([0.0425, 0.2122, 0.5017, 0.2436]) -Greedy action tensor([-1.9037, -0.4299, 0.6461, -0.1573]) tensor([0.0418, 0.1826, 0.5357, 0.2399]) -Greedy action tensor([-1.8202, -0.4476, 0.7105, -0.1254]) tensor([0.0436, 0.1719, 0.5473, 0.2372]) -Greedy action tensor([-0.9836, 0.0980, 0.1708, -0.0202]) tensor([0.1026, 0.3027, 0.3256, 0.2690]) -Greedy action tensor([-1.4122, -0.5058, 0.4370, 0.3767]) tensor([0.0632, 0.1565, 0.4019, 0.3783]) -Greedy action tensor([-1.6315, -0.1544, 0.4860, -0.0881]) tensor([0.0544, 0.2384, 0.4524, 0.2548]) -Greedy action tensor([-1.9034, -0.4436, 0.6445, -0.1630]) tensor([0.0420, 0.1810, 0.5373, 0.2396]) -Greedy action tensor([-1.7859, -0.4098, 0.7075, 0.0112]) tensor([0.0433, 0.1714, 0.5240, 0.2612]) -Greedy action tensor([-1.7129, -0.4210, 0.5536, 0.0022]) tensor([0.0504, 0.1834, 0.4861, 0.2801]) -Greedy action tensor([-1.3718, -0.0908, 0.4010, 0.1747]) tensor([0.0659, 0.2371, 0.3878, 0.3092]) -Greedy action tensor([-1.8417, -0.3941, 0.6056, -0.1394]) tensor([0.0448, 0.1907, 0.5183, 0.2461]) -Greedy action tensor([-1.7284, -0.3395, 0.5511, -0.0522]) tensor([0.0497, 0.1993, 0.4855, 0.2656]) -Greedy action tensor([-0.9700, -0.5407, 0.7444, 1.0062]) tensor([0.0653, 0.1004, 0.3628, 0.4715]) -Greedy action tensor([-0.9360, 0.2991, 0.2591, -0.1557]) tensor([0.1008, 0.3465, 0.3329, 0.2199]) -Greedy action tensor([-1.6637, -0.1641, 0.6708, 0.0708]) tensor([0.0466, 0.2087, 0.4809, 0.2639]) -Greedy action tensor([-1.7046, -0.4585, 0.5795, 0.0107]) tensor([0.0504, 0.1751, 0.4945, 0.2800]) -Greedy action tensor([-1.8425, -0.4377, 0.6201, -0.1196]) tensor([0.0446, 0.1818, 0.5236, 0.2499]) -Greedy action tensor([-1.9308, -0.4478, 0.6676, -0.1688]) tensor([0.0405, 0.1786, 0.5448, 0.2361]) -Greedy action tensor([-1.1071, 0.3262, 0.1902, 0.0169]) tensor([0.0838, 0.3515, 0.3068, 0.2580]) -Greedy action tensor([-1.5817, -0.3893, 0.4967, 0.0373]) tensor([0.0577, 0.1901, 0.4610, 0.2912]) -Greedy action tensor([-1.8404, -0.4884, 0.6106, -0.1254]) tensor([0.0454, 0.1755, 0.5267, 0.2523]) -Greedy action tensor([-1.2397, 0.0312, 0.2670, 0.0030]) tensor([0.0797, 0.2842, 0.3598, 0.2763]) -Greedy action tensor([-1.5153, -0.5759, 0.4326, 0.0916]) tensor([0.0643, 0.1644, 0.4508, 0.3205]) -Greedy action tensor([-1.4474, 0.2165, 0.3909, -0.0764]) tensor([0.0606, 0.3199, 0.3808, 0.2387]) -Greedy action tensor([-1.2716, 0.3350, 0.3299, -0.0993]) tensor([0.0705, 0.3517, 0.3499, 0.2278]) -Greedy action tensor([-0.6823, 0.2656, 0.1794, -0.1510]) tensor([0.1307, 0.3374, 0.3095, 0.2224]) -Greedy action tensor([-1.8913, -0.4373, 0.6400, -0.1546]) tensor([0.0425, 0.1819, 0.5342, 0.2413]) -Greedy action tensor([-1.7823, -0.4246, 0.5758, -0.1311]) tensor([0.0484, 0.1881, 0.5114, 0.2522]) -Greedy action tensor([-1.8159, -0.1337, 0.5595, -0.0624]) tensor([0.0437, 0.2347, 0.4695, 0.2521]) -Greedy action tensor([-1.8865, -0.3737, 0.6396, -0.1497]) tensor([0.0422, 0.1913, 0.5271, 0.2394]) -Greedy action tensor([-0.8741, 0.6960, 0.1444, -0.0204]) tensor([0.0915, 0.4400, 0.2535, 0.2150]) -Greedy action tensor([0.1484, 0.0273, 0.7431, 1.5610]) tensor([0.1281, 0.1135, 0.2322, 0.5262]) -Greedy action tensor([-1.6685, -0.5238, 0.5132, -0.0223]) tensor([0.0550, 0.1727, 0.4871, 0.2852]) -Greedy action tensor([-1.9375, -0.4424, 0.6651, -0.1761]) tensor([0.0404, 0.1800, 0.5448, 0.2349]) -Greedy action tensor([-1.7530, -0.1219, 0.5169, -0.0717]) tensor([0.0473, 0.2415, 0.4574, 0.2539]) -Greedy action tensor([-0.8818, 0.2714, 0.4786, 0.9500]) tensor([0.0699, 0.2214, 0.2724, 0.4364]) -Greedy action tensor([-1.2232, -0.5262, 0.3846, 0.3273]) tensor([0.0787, 0.1579, 0.3926, 0.3708]) -Greedy action tensor([-0.9238, -0.1105, 0.3234, -0.4432]) tensor([0.1197, 0.2700, 0.4167, 0.1936]) -Greedy action tensor([-1.6148, -0.5433, 0.5389, -0.0130]) tensor([0.0571, 0.1669, 0.4924, 0.2836]) -Greedy action tensor([-1.9384, -0.4351, 0.6612, -0.1756]) tensor([0.0404, 0.1814, 0.5430, 0.2352]) -Greedy action tensor([-1.9355, -0.4555, 0.6741, -0.1658]) tensor([0.0402, 0.1767, 0.5469, 0.2361]) -Greedy action tensor([-1.9162, -0.4419, 0.6664, -0.1519]) tensor([0.0409, 0.1787, 0.5415, 0.2389]) -Greedy action tensor([-1.8569, -0.2794, 0.6081, -0.1128]) tensor([0.0429, 0.2076, 0.5043, 0.2452]) -Greedy action tensor([-1.2092, -0.5252, 0.3374, 0.5615]) tensor([0.0738, 0.1462, 0.3465, 0.4335]) -Greedy action tensor([-1.5023, -0.2876, 0.4669, -0.0849]) tensor([0.0639, 0.2151, 0.4575, 0.2635]) -Greedy action tensor([-0.8940, -0.9206, -0.0859, 0.6907]) tensor([0.1099, 0.1071, 0.2467, 0.5363]) -Greedy action tensor([ 0.7540, -0.3949, -0.2502, 0.1040]) tensor([0.4534, 0.1437, 0.1661, 0.2367]) -Greedy action tensor([-0.6843, -0.4563, -1.2275, 0.1015]) tensor([0.1988, 0.2497, 0.1155, 0.4361]) -Greedy action tensor([-0.2619, -1.1005, 0.5844, -0.8307]) tensor([0.2310, 0.0998, 0.5384, 0.1308]) -Greedy action tensor([ 0.2818, 0.5660, 0.0751, -0.2612]) tensor([0.2686, 0.3569, 0.2184, 0.1561]) -Greedy action tensor([ 0.3393, 0.1029, 0.4132, -0.3570]) tensor([0.2972, 0.2346, 0.3200, 0.1481]) -Greedy action tensor([-0.0355, -0.1129, 0.4745, 0.2665]) tensor([0.2023, 0.1872, 0.3369, 0.2736]) -Greedy action tensor([ 0.9980, -0.4318, -0.2223, 1.6635]) tensor([0.2874, 0.0688, 0.0848, 0.5591]) -Greedy action tensor([ 1.1888, -0.4603, -0.1649, 0.3921]) tensor([0.5260, 0.1011, 0.1358, 0.2371]) -Greedy action tensor([ 0.4438, -0.0240, 0.1542, 0.9066]) tensor([0.2523, 0.1580, 0.1889, 0.4008]) -Greedy action tensor([-0.0336, 0.4981, -0.1046, -0.9094]) tensor([0.2469, 0.4202, 0.2300, 0.1029]) -Greedy action tensor([ 0.9728, -0.3485, 0.5522, 0.7614]) tensor([0.3659, 0.0976, 0.2403, 0.2962]) -Greedy action tensor([ 0.4947, -1.1129, 0.1616, -0.0351]) tensor([0.3991, 0.0800, 0.2860, 0.2349]) -Greedy action tensor([ 0.8918, -0.7914, 0.1418, -0.1431]) tensor([0.4967, 0.0923, 0.2346, 0.1765]) -Greedy action tensor([ 1.1104, -0.4932, 0.5910, 0.0719]) tensor([0.4651, 0.0936, 0.2767, 0.1646]) -Greedy action tensor([-0.4684, -1.2222, 0.1147, -0.2635]) tensor([0.2227, 0.1048, 0.3990, 0.2734]) -Greedy action tensor([ 1.4569, -1.0732, 1.3249, 0.6776]) tensor([0.4141, 0.0330, 0.3629, 0.1900]) -Greedy action tensor([-0.4041, -0.3019, -0.2626, -0.8145]) tensor([0.2549, 0.2823, 0.2937, 0.1691]) -Greedy action tensor([ 0.4876, -0.1773, -0.0032, -0.4886]) tensor([0.3995, 0.2055, 0.2445, 0.1505]) -Greedy action tensor([-0.1149, -1.2437, 1.6008, -0.2236]) tensor([0.1285, 0.0416, 0.7146, 0.1153]) -Greedy action tensor([-0.1150, -0.6187, -0.9128, 0.5131]) tensor([0.2545, 0.1538, 0.1146, 0.4770]) -Greedy action tensor([ 0.5756, -1.8845, 0.0328, 0.0565]) tensor([0.4422, 0.0378, 0.2569, 0.2631]) -Greedy action tensor([ 0.2421, -0.9892, 1.1763, -0.4474]) tensor([0.2305, 0.0673, 0.5866, 0.1157]) -Greedy action tensor([-1.1290, -0.2927, -0.7720, 0.4363]) tensor([0.1050, 0.2424, 0.1501, 0.5025]) -Greedy action tensor([-1.3340, -0.7324, 0.0171, 0.0854]) tensor([0.0924, 0.1687, 0.3569, 0.3821]) -Greedy action tensor([-0.3022, 0.8358, 1.4940, -0.0638]) tensor([0.0876, 0.2733, 0.5279, 0.1112]) -Greedy action tensor([ 1.3624, -0.6175, 0.2877, 0.0663]) tensor([0.5704, 0.0788, 0.1947, 0.1561]) -Greedy action tensor([-0.3747, -0.1569, 0.9090, -0.2677]) tensor([0.1436, 0.1785, 0.5182, 0.1598]) -Greedy action tensor([-0.4127, -1.1717, -0.3136, -0.5097]) tensor([0.2874, 0.1345, 0.3173, 0.2608]) -Greedy action tensor([ 0.1425, 0.5566, 1.1041, -0.7141]) tensor([0.1801, 0.2724, 0.4710, 0.0765]) -Greedy action tensor([-0.1228, 0.6387, 0.2163, -0.4043]) tensor([0.1887, 0.4041, 0.2649, 0.1424]) -Greedy action tensor([ 0.8060, -0.7225, 0.4339, 1.1403]) tensor([0.3028, 0.0657, 0.2087, 0.4229]) -Greedy action tensor([ 0.1623, -0.7568, 1.7201, -0.2237]) tensor([0.1465, 0.0584, 0.6955, 0.0996]) -Greedy action tensor([ 0.2632, -0.7058, -0.4925, 0.7963]) tensor([0.2814, 0.1068, 0.1322, 0.4796]) -Greedy action tensor([ 0.0323, -0.5624, 0.6566, 0.5020]) tensor([0.1993, 0.1100, 0.3720, 0.3187]) -Greedy action tensor([ 0.5008, -1.0037, -0.2398, 0.4551]) tensor([0.3767, 0.0837, 0.1796, 0.3599]) -Greedy action tensor([ 0.9718, -1.1166, 0.8694, 0.1101]) tensor([0.4083, 0.0506, 0.3686, 0.1725]) -Greedy action tensor([-0.6555, -0.5504, 0.4691, -1.1725]) tensor([0.1728, 0.1920, 0.5321, 0.1031]) -Greedy action tensor([ 0.2421, -1.4970, 1.1906, 0.7958]) tensor([0.1819, 0.0320, 0.4697, 0.3165]) -Greedy action tensor([ 0.2966, -0.4378, 0.6248, 0.2774]) tensor([0.2598, 0.1246, 0.3607, 0.2549]) -Greedy action tensor([-0.7943, -0.5395, 1.3063, -0.7169]) tensor([0.0866, 0.1118, 0.7079, 0.0936]) -Greedy action tensor([ 0.2239, 0.3482, -0.8906, -0.7551]) tensor([0.3526, 0.3993, 0.1157, 0.1325]) -Greedy action tensor([-0.0554, -1.1784, -0.0482, -0.1195]) tensor([0.3058, 0.0995, 0.3080, 0.2868]) -Greedy action tensor([-0.7073, 0.0591, -0.8533, 0.2870]) tensor([0.1488, 0.3203, 0.1286, 0.4023]) -Greedy action tensor([-0.7417, -2.2529, -0.0167, 0.9276]) tensor([0.1164, 0.0257, 0.2403, 0.6177]) -Greedy action tensor([ 0.8634, -1.4188, 0.4780, 1.1813]) tensor([0.3168, 0.0323, 0.2155, 0.4354]) -Greedy action tensor([ 1.0046, -1.7084, -0.5027, 0.1066]) tensor([0.5899, 0.0391, 0.1307, 0.2403]) -Greedy action tensor([ 0.5305, -0.4989, 0.7266, 0.9460]) tensor([0.2446, 0.0874, 0.2975, 0.3705]) -Greedy action tensor([ 1.1584, -0.9047, -0.7253, 1.1582]) tensor([0.4388, 0.0558, 0.0667, 0.4387]) -Greedy action tensor([-0.5455, 0.4942, -1.3138, 0.5720]) tensor([0.1361, 0.3848, 0.0631, 0.4160]) -Greedy action tensor([1.5342, 0.6384, 0.6645, 0.5363]) tensor([0.4554, 0.1859, 0.1908, 0.1679]) -Greedy action tensor([-0.2177, -0.8078, -0.8199, 1.0869]) tensor([0.1728, 0.0958, 0.0946, 0.6369]) -Greedy action tensor([ 0.7846, -0.3027, -0.3446, 0.1917]) tensor([0.4518, 0.1523, 0.1461, 0.2497]) -Greedy action tensor([ 0.3825, -0.4849, 1.2791, -0.0181]) tensor([0.2202, 0.0925, 0.5398, 0.1475]) -Greedy action tensor([ 0.0854, -1.4919, 0.3190, -0.5445]) tensor([0.3331, 0.0688, 0.4207, 0.1774]) -Greedy action tensor([ 0.4781, 0.1074, 0.1647, -0.0215]) tensor([0.3303, 0.2279, 0.2414, 0.2004]) -Greedy action tensor([ 1.5921, -0.7359, 1.2001, 0.6351]) tensor([0.4636, 0.0452, 0.3132, 0.1780]) -Greedy action tensor([0.5616, 0.0446, 0.6598, 0.0979]) tensor([0.3004, 0.1792, 0.3314, 0.1890]) -Greedy action tensor([-1.3699, -2.0463, -0.0605, 0.2512]) tensor([0.0974, 0.0495, 0.3606, 0.4925]) -Greedy action tensor([ 1.1553, -1.1932, 0.1772, 0.4675]) tensor([0.5065, 0.0484, 0.1905, 0.2546]) -Greedy action tensor([ 0.2427, -2.3343, -0.1332, -0.4088]) tensor([0.4378, 0.0333, 0.3006, 0.2282]) -Greedy action tensor([ 1.3011, -0.3322, 0.7416, -0.2401]) tensor([0.5048, 0.0986, 0.2885, 0.1081]) -Greedy action tensor([ 0.2631, -0.9645, 2.1613, 0.0256]) tensor([0.1142, 0.0335, 0.7622, 0.0901]) -Greedy action tensor([ 0.6652, -0.3282, 0.0943, 0.0151]) tensor([0.4069, 0.1507, 0.2299, 0.2124]) -Greedy action tensor([-0.3528, -0.8427, -0.3554, 0.1310]) tensor([0.2363, 0.1448, 0.2357, 0.3833]) -Greedy action tensor([-0.4568, -2.3268, -0.3731, 0.9753]) tensor([0.1555, 0.0240, 0.1691, 0.6514]) -Greedy action tensor([-1.0702, -0.6241, -0.6539, -0.7735]) tensor([0.1844, 0.2880, 0.2796, 0.2480]) -Greedy action tensor([-0.2512, -0.8983, -0.6375, -0.3971]) tensor([0.3260, 0.1707, 0.2215, 0.2818]) -Greedy action tensor([ 0.9176, -1.6052, 2.2111, -0.0385]) tensor([0.1957, 0.0157, 0.7134, 0.0752]) -Greedy action tensor([ 1.5485, -0.9224, 2.2446, 0.1567]) tensor([0.2995, 0.0253, 0.6007, 0.0745]) -Greedy action tensor([-0.0280, -0.8215, 1.0681, 0.2098]) tensor([0.1750, 0.0792, 0.5238, 0.2220]) -Greedy action tensor([-0.1042, -0.5260, 0.6293, -0.0870]) tensor([0.2103, 0.1379, 0.4379, 0.2139]) -Greedy action tensor([0.8765, 0.3363, 0.7580, 0.4342]) tensor([0.3212, 0.1871, 0.2853, 0.2064]) -Greedy action tensor([-0.7932, -1.7280, -0.1927, 0.1332]) tensor([0.1742, 0.0684, 0.3175, 0.4399]) -Greedy action tensor([-0.5753, -0.1866, 0.2607, -0.7556]) tensor([0.1780, 0.2626, 0.4107, 0.1487]) -Greedy action tensor([-0.2398, 0.4446, -0.8803, -0.4050]) tensor([0.2295, 0.4550, 0.1209, 0.1946]) -Greedy action tensor([ 1.6549, -0.4609, 0.5294, 0.8273]) tensor([0.5313, 0.0640, 0.1724, 0.2322]) -Greedy action tensor([1.0454, 0.5573, 0.1560, 0.0806]) tensor([0.4157, 0.2551, 0.1708, 0.1584]) -Greedy action tensor([-0.4149, -0.8097, 0.5266, 0.2215]) tensor([0.1632, 0.1100, 0.4184, 0.3084]) -Greedy action tensor([ 0.1075, -0.8941, -0.2351, 1.1796]) tensor([0.2001, 0.0735, 0.1420, 0.5844]) -Greedy action tensor([-0.3482, 0.1861, 0.7875, 0.2574]) tensor([0.1307, 0.2230, 0.4069, 0.2395]) -Greedy action tensor([-0.4130, -0.7247, 1.2988, -0.8670]) tensor([0.1265, 0.0926, 0.7006, 0.0803]) -Greedy action tensor([ 1.8674, -0.0432, -0.2077, 0.5528]) tensor([0.6484, 0.0960, 0.0814, 0.1742]) -Greedy action tensor([ 1.5209, -0.8207, -0.6172, 0.5073]) tensor([0.6341, 0.0610, 0.0748, 0.2301]) -Greedy action tensor([ 1.4663, -0.6701, -0.4387, 0.1604]) tensor([0.6503, 0.0768, 0.0968, 0.1762]) -Greedy action tensor([ 2.3428, -0.5741, -0.6742, 0.8992]) tensor([0.7468, 0.0404, 0.0366, 0.1763]) -Greedy action tensor([ 1.8041, -0.6987, -0.2253, 0.1408]) tensor([0.7129, 0.0584, 0.0937, 0.1351]) -Greedy action tensor([ 1.4250, -0.6172, -0.5734, 0.2773]) tensor([0.6318, 0.0820, 0.0857, 0.2005]) -Greedy action tensor([ 1.3174, -0.1640, -0.8740, 0.5902]) tensor([0.5487, 0.1247, 0.0613, 0.2652]) -Greedy action tensor([ 1.3037, -0.5505, -0.2426, 0.5597]) tensor([0.5420, 0.0849, 0.1155, 0.2576]) -Greedy action tensor([ 2.2869, -1.5046, -0.1097, 0.4454]) tensor([0.7861, 0.0177, 0.0716, 0.1247]) -Greedy action tensor([ 1.3537, -0.3612, -0.6570, 0.4070]) tensor([0.5876, 0.1058, 0.0787, 0.2280]) -Greedy action tensor([ 1.0999, -0.2946, -0.3253, 0.3698]) tensor([0.5075, 0.1259, 0.1220, 0.2446]) -Greedy action tensor([ 1.0546, -0.1637, -0.1533, 0.0370]) tensor([0.5112, 0.1512, 0.1528, 0.1848]) -Greedy action tensor([ 1.7761, -0.5022, -0.3604, 0.3686]) tensor([0.6825, 0.0699, 0.0806, 0.1670]) -Greedy action tensor([ 1.6519, -0.4062, -0.3033, 0.1368]) tensor([0.6716, 0.0858, 0.0951, 0.1476]) -Greedy action tensor([ 1.0285, -0.5648, -0.0406, 0.0588]) tensor([0.5193, 0.1055, 0.1783, 0.1969]) -Greedy action tensor([ 1.1960, -0.5568, -0.0436, 0.0636]) tensor([0.5602, 0.0971, 0.1622, 0.1805]) -Greedy action tensor([ 1.1458, -0.3978, -0.1695, 0.2974]) tensor([0.5235, 0.1118, 0.1405, 0.2241]) -Greedy action tensor([ 1.4321, 0.1414, -0.3269, 0.1588]) tensor([0.5790, 0.1593, 0.0997, 0.1621]) -Greedy action tensor([ 1.5026, -0.5710, -0.3265, 0.2830]) tensor([0.6322, 0.0795, 0.1015, 0.1867]) -Greedy action tensor([ 1.4105, -0.6965, -0.1039, 0.4350]) tensor([0.5819, 0.0708, 0.1280, 0.2194]) -Greedy action tensor([ 2.4880, -1.6978, 0.4019, -0.2337]) tensor([0.8298, 0.0126, 0.1030, 0.0546]) -Greedy action tensor([ 1.4022, -0.2661, -0.1308, -0.1704]) tensor([0.6204, 0.1170, 0.1339, 0.1287]) -Greedy action tensor([ 0.8372, -0.3459, 0.0234, -0.0594]) tensor([0.4635, 0.1420, 0.2054, 0.1891]) -Greedy action tensor([ 1.7275, -0.5176, -0.4436, 0.5759]) tensor([0.6510, 0.0690, 0.0742, 0.2058]) -Greedy action tensor([ 1.2213, -0.1878, -0.2210, 0.1675]) tensor([0.5467, 0.1336, 0.1292, 0.1906]) -Greedy action tensor([ 1.2982, 0.1071, -0.5580, 0.1868]) tensor([0.5589, 0.1698, 0.0873, 0.1839]) -Greedy action tensor([ 0.8267, -0.1838, -0.0369, 0.1426]) tensor([0.4366, 0.1590, 0.1841, 0.2203]) -Greedy action tensor([ 1.7789, -0.5137, -0.4951, 0.3381]) tensor([0.6941, 0.0701, 0.0714, 0.1643]) -Greedy action tensor([ 1.2441, -0.5053, -0.2784, 0.2618]) tensor([0.5661, 0.0984, 0.1235, 0.2120]) -Greedy action tensor([ 1.3626, -0.9364, 0.0803, 0.3371]) tensor([0.5759, 0.0578, 0.1598, 0.2065]) -Greedy action tensor([ 2.0365, -0.7273, -0.5943, 1.3132]) tensor([0.6172, 0.0389, 0.0445, 0.2994]) -Greedy action tensor([ 1.7573, -0.4384, -0.6571, 0.5884]) tensor([0.6616, 0.0736, 0.0592, 0.2056]) -Greedy action tensor([ 1.5796, 0.0966, -0.0319, 0.3558]) tensor([0.5812, 0.1319, 0.1160, 0.1709]) -Greedy action tensor([ 0.7639, -0.2602, -0.5683, 0.7125]) tensor([0.3887, 0.1396, 0.1026, 0.3692]) -Greedy action tensor([ 1.3660, -0.3163, -0.4543, 0.5046]) tensor([0.5648, 0.1050, 0.0915, 0.2387]) -Greedy action tensor([ 1.7393, -0.8203, 0.0364, 0.4722]) tensor([0.6489, 0.0502, 0.1182, 0.1828]) -Greedy action tensor([ 1.8105, -0.5235, -0.4051, 0.5483]) tensor([0.6716, 0.0651, 0.0733, 0.1901]) -Greedy action tensor([ 2.9022, -0.6587, -0.1426, 0.8855]) tensor([0.8271, 0.0235, 0.0394, 0.1101]) -Greedy action tensor([ 1.2806, -0.2453, -0.6672, 0.2898]) tensor([0.5776, 0.1256, 0.0824, 0.2144]) -Greedy action tensor([ 1.7634, -1.1896, -0.3673, 0.8313]) tensor([0.6391, 0.0334, 0.0759, 0.2516]) -Greedy action tensor([ 1.8771, -0.2650, -1.0284, 0.3992]) tensor([0.7142, 0.0838, 0.0391, 0.1629]) -Greedy action tensor([ 1.5819, -0.2417, -0.6363, 0.2042]) tensor([0.6569, 0.1060, 0.0715, 0.1656]) -Greedy action tensor([ 1.3585, -0.7132, -0.2995, 0.2354]) tensor([0.6091, 0.0767, 0.1160, 0.1981]) -Greedy action tensor([ 1.1749, -0.1679, -0.1124, 0.1872]) tensor([0.5237, 0.1367, 0.1445, 0.1950]) -Greedy action tensor([ 1.5870, -0.2928, -0.0533, 0.4132]) tensor([0.6040, 0.0922, 0.1171, 0.1867]) -Greedy action tensor([ 1.0742, -0.1284, -0.1029, 0.2502]) tensor([0.4885, 0.1467, 0.1505, 0.2143]) -Greedy action tensor([ 1.2131, -0.0265, -0.5835, 0.1040]) tensor([0.5601, 0.1622, 0.0929, 0.1848]) -Greedy action tensor([ 1.4232, -0.3163, -0.2813, 0.0883]) tensor([0.6170, 0.1084, 0.1122, 0.1624]) -Greedy action tensor([ 1.5633, -0.8756, -0.1126, 0.3117]) tensor([0.6408, 0.0559, 0.1199, 0.1833]) -Greedy action tensor([ 1.7715, -0.2330, -0.2915, -0.0218]) tensor([0.7002, 0.0943, 0.0890, 0.1165]) -Greedy action tensor([ 1.7375, -0.2162, -0.1449, -0.1327]) tensor([0.6906, 0.0979, 0.1051, 0.1064]) -Greedy action tensor([ 1.1504, -0.3485, -0.3315, 0.1411]) tensor([0.5510, 0.1231, 0.1252, 0.2008]) -Greedy action tensor([ 1.6218, -0.0984, -0.1211, 0.3092]) tensor([0.6161, 0.1103, 0.1078, 0.1658]) -Greedy action tensor([ 1.5989, -0.5441, -0.5459, 0.6703]) tensor([0.6137, 0.0720, 0.0719, 0.2425]) -Greedy action tensor([ 1.3147, -0.2852, -0.3366, -0.0259]) tensor([0.6041, 0.1220, 0.1159, 0.1581]) -Greedy action tensor([ 1.2146, -0.9818, -0.1284, -0.1400]) tensor([0.6134, 0.0682, 0.1601, 0.1583]) -Greedy action tensor([ 2.0597, 0.3321, -0.1759, 0.0993]) tensor([0.7015, 0.1247, 0.0750, 0.0988]) -Greedy action tensor([ 1.6590, -0.4175, -0.4169, 0.5771]) tensor([0.6290, 0.0789, 0.0789, 0.2132]) -Greedy action tensor([ 1.8117, -0.7899, -0.6337, 0.5158]) tensor([0.6971, 0.0517, 0.0604, 0.1908]) -Greedy action tensor([ 1.3000, -0.4035, -0.3907, -0.1452]) tensor([0.6242, 0.1136, 0.1151, 0.1471]) -Greedy action tensor([ 1.4561, -0.5650, -0.7211, 0.9340]) tensor([0.5437, 0.0720, 0.0616, 0.3226]) -Greedy action tensor([ 1.1424, -0.3807, -0.2587, 0.0878]) tensor([0.5517, 0.1203, 0.1359, 0.1922]) -Greedy action tensor([ 1.2358, -0.7122, -0.0134, -0.1066]) tensor([0.5915, 0.0843, 0.1696, 0.1545]) -Greedy action tensor([ 1.0177, -0.5935, -0.2351, -0.2136]) tensor([0.5627, 0.1123, 0.1608, 0.1642]) -Greedy action tensor([ 1.4175, -0.2677, -0.6631, 0.4037]) tensor([0.5977, 0.1108, 0.0746, 0.2169]) -Greedy action tensor([ 1.5368, -0.2484, -0.5593, 0.3147]) tensor([0.6308, 0.1058, 0.0776, 0.1858]) -Greedy action tensor([ 1.7571, 0.3056, -0.0342, 0.2787]) tensor([0.6139, 0.1438, 0.1024, 0.1400]) -Greedy action tensor([ 1.2559, -0.2809, -0.6291, 0.5660]) tensor([0.5352, 0.1151, 0.0813, 0.2685]) -Greedy action tensor([ 1.4633, -0.5693, -0.1915, 0.3936]) tensor([0.6005, 0.0787, 0.1148, 0.2060]) -Greedy action tensor([ 1.5950, -0.6128, -0.3736, 0.4942]) tensor([0.6320, 0.0695, 0.0883, 0.2102]) -Greedy action tensor([ 1.2798, -0.2892, -0.2316, 0.2742]) tensor([0.5572, 0.1160, 0.1229, 0.2038]) -Greedy action tensor([ 1.1341, -0.0103, -0.2750, 0.2942]) tensor([0.5014, 0.1596, 0.1225, 0.2165]) -Greedy action tensor([ 2.0054, 0.1556, -0.0912, 0.3586]) tensor([0.6790, 0.1068, 0.0834, 0.1308]) -Greedy action tensor([ 1.3455, -0.5475, 0.0874, 0.1675]) tensor([0.5738, 0.0864, 0.1631, 0.1767]) -Greedy action tensor([ 1.9131, -0.8680, -0.1573, 0.3109]) tensor([0.7196, 0.0446, 0.0908, 0.1450]) -Greedy action tensor([ 1.6555, -0.9325, -0.2035, 0.4497]) tensor([0.6534, 0.0491, 0.1018, 0.1957]) -Greedy action tensor([ 1.3934, -0.5987, -0.0637, 0.7638]) tensor([0.5257, 0.0717, 0.1224, 0.2801]) -Greedy action tensor([ 1.8819, -0.4237, -0.4431, 0.4685]) tensor([0.6941, 0.0692, 0.0679, 0.1689]) -Greedy action tensor([ 1.5153, -0.3146, -0.0644, 0.1759]) tensor([0.6141, 0.0985, 0.1265, 0.1609]) -Greedy action tensor([ 2.0488, -0.5902, -0.2465, 0.2291]) tensor([0.7495, 0.0535, 0.0755, 0.1215]) -Greedy action tensor([ 1.2766, -0.1416, -0.6645, 0.3928]) tensor([0.5559, 0.1346, 0.0798, 0.2297]) -Greedy action tensor([ 0.8808, -0.7953, 0.0675, -0.5990]) tensor([0.5382, 0.1007, 0.2386, 0.1225]) -Greedy action tensor([ 1.2243, -0.6749, 0.0765, -0.6853]) tensor([0.6191, 0.0927, 0.1965, 0.0917]) -Greedy action tensor([ 0.7442, -0.3906, -0.0198, -0.4295]) tensor([0.4770, 0.1533, 0.2222, 0.1475]) -Greedy action tensor([ 0.7243, -0.4458, -0.0472, -0.1509]) tensor([0.4567, 0.1417, 0.2112, 0.1904]) -Greedy action tensor([ 0.7232, -0.4881, -0.1254, -0.4491]) tensor([0.4913, 0.1463, 0.2103, 0.1521]) -Greedy action tensor([ 1.0188, -0.8104, -0.0140, -0.5919]) tensor([0.5826, 0.0935, 0.2074, 0.1164]) -Greedy action tensor([ 0.4425, -0.3745, -0.1535, -0.1774]) tensor([0.3951, 0.1746, 0.2177, 0.2126]) -Greedy action tensor([ 0.1854, -0.1628, 0.2114, 0.0233]) tensor([0.2791, 0.1971, 0.2865, 0.2373]) -Greedy action tensor([ 0.2079, -0.1771, -0.1025, -0.0487]) tensor([0.3138, 0.2135, 0.2300, 0.2427]) -Greedy action tensor([ 0.5416, -0.1058, 0.1771, -0.4205]) tensor([0.3846, 0.2013, 0.2671, 0.1470]) -Greedy action tensor([ 0.4115, -0.2442, -0.0817, -0.1917]) tensor([0.3736, 0.1939, 0.2281, 0.2044]) -Greedy action tensor([ 0.8464, -0.6604, -0.1398, -0.3363]) tensor([0.5260, 0.1166, 0.1962, 0.1612]) -Greedy action tensor([ 0.7586, -0.6503, 0.0352, -0.4161]) tensor([0.4906, 0.1199, 0.2380, 0.1515]) -Greedy action tensor([ 0.7365, -0.3758, 0.0498, -0.1936]) tensor([0.4491, 0.1477, 0.2260, 0.1772]) -Greedy action tensor([ 0.8194, -0.4774, 0.0543, -0.2761]) tensor([0.4824, 0.1319, 0.2244, 0.1613]) -Greedy action tensor([ 0.6025, -0.3455, -0.0105, -0.3851]) tensor([0.4345, 0.1684, 0.2354, 0.1618]) -Greedy action tensor([ 0.5890, -0.3184, -0.0134, -0.3606]) tensor([0.4277, 0.1726, 0.2342, 0.1655]) -Greedy action tensor([ 1.2600, -0.0760, -0.1068, -0.5004]) tensor([0.5918, 0.1556, 0.1509, 0.1018]) -Greedy action tensor([ 0.7490, -0.3521, -0.0472, -0.2707]) tensor([0.4664, 0.1551, 0.2103, 0.1682]) -Greedy action tensor([ 1.0566, -0.0657, -0.0683, -0.5879]) tensor([0.5425, 0.1766, 0.1761, 0.1048]) -Greedy action tensor([ 0.1539, 0.1699, -0.1465, -0.3616]) tensor([0.2982, 0.3030, 0.2208, 0.1781]) -Greedy action tensor([ 0.9964, -0.6374, 0.0233, -0.5070]) tensor([0.5570, 0.1087, 0.2105, 0.1239]) -Greedy action tensor([ 1.0695, -0.5178, -0.1599, -0.3035]) tensor([0.5714, 0.1168, 0.1671, 0.1447]) -Greedy action tensor([ 0.6239, -0.8951, 0.0904, -0.4190]) tensor([0.4634, 0.1015, 0.2718, 0.1633]) -Greedy action tensor([ 0.7319, -0.8207, -0.0034, -0.5520]) tensor([0.5081, 0.1076, 0.2436, 0.1407]) -Greedy action tensor([ 0.7364, -0.3089, -0.0264, -0.4888]) tensor([0.4736, 0.1665, 0.2208, 0.1391]) -Greedy action tensor([ 0.9711, -0.5523, 0.1148, -0.4274]) tensor([0.5292, 0.1153, 0.2248, 0.1307]) -Greedy action tensor([ 0.3505, -0.0982, -0.1054, -0.2424]) tensor([0.3540, 0.2260, 0.2244, 0.1957]) -Greedy action tensor([ 0.5273, -0.2188, -0.1204, -0.1465]) tensor([0.3989, 0.1891, 0.2087, 0.2033]) -Greedy action tensor([ 0.6033, -1.0557, -0.1416, -0.4002]) tensor([0.4922, 0.0937, 0.2337, 0.1804]) -Greedy action tensor([ 0.5710, -0.0298, 0.0131, -0.3928]) tensor([0.3996, 0.2192, 0.2288, 0.1524]) -Greedy action tensor([ 0.8564, 0.1112, -0.1007, -0.5016]) tensor([0.4726, 0.2243, 0.1815, 0.1215]) -Greedy action tensor([ 0.4340, 0.0797, -0.1131, -0.0627]) tensor([0.3462, 0.2429, 0.2003, 0.2107]) -Greedy action tensor([ 0.9429, -0.7108, 0.1699, -0.4514]) tensor([0.5260, 0.1007, 0.2428, 0.1305]) -Greedy action tensor([ 0.6094, -0.2957, -0.0341, -0.2413]) tensor([0.4243, 0.1716, 0.2229, 0.1812]) -Greedy action tensor([ 1.0449, -0.8365, 0.0742, -0.5467]) tensor([0.5764, 0.0878, 0.2184, 0.1174]) -Greedy action tensor([ 0.9558, -0.6267, -0.0217, -0.3444]) tensor([0.5393, 0.1108, 0.2029, 0.1469]) -Greedy action tensor([ 0.4628, -0.0663, -0.0743, -0.1023]) tensor([0.3647, 0.2149, 0.2131, 0.2073]) -Greedy action tensor([ 0.6336, -0.4816, 0.2104, -0.5117]) tensor([0.4346, 0.1425, 0.2846, 0.1383]) -Greedy action tensor([ 0.6655, -0.4273, -0.0963, -0.2103]) tensor([0.4507, 0.1511, 0.2104, 0.1877]) -Greedy action tensor([ 0.3346, -0.2717, -0.0868, -0.2496]) tensor([0.3624, 0.1977, 0.2378, 0.2021]) -Greedy action tensor([ 0.6145, -0.7150, -0.0736, -0.3085]) tensor([0.4620, 0.1223, 0.2322, 0.1836]) -Greedy action tensor([ 1.1358, -0.7312, -0.0222, -0.4459]) tensor([0.5972, 0.0923, 0.1876, 0.1228]) -Greedy action tensor([ 0.4287, -0.1192, -0.1032, -0.0588]) tensor([0.3597, 0.2080, 0.2113, 0.2209]) -Greedy action tensor([ 1.0542, -1.0817, -0.0387, -0.5489]) tensor([0.6044, 0.0714, 0.2026, 0.1216]) -Greedy action tensor([ 0.8230, -0.6035, -0.0356, -0.2423]) tensor([0.4979, 0.1196, 0.2110, 0.1716]) -Greedy action tensor([ 1.1369e+00, -7.5241e-01, -1.4669e-04, -7.2445e-01]) tensor([0.6145, 0.0929, 0.1971, 0.0955]) -Greedy action tensor([ 0.4863, -0.3896, -0.0239, -0.0874]) tensor([0.3875, 0.1614, 0.2327, 0.2184]) -Greedy action tensor([ 0.2873, 0.1540, 0.0246, -0.1784]) tensor([0.3056, 0.2675, 0.2350, 0.1919]) -Greedy action tensor([ 0.3756, -0.1946, -0.1517, 0.0178]) tensor([0.3503, 0.1980, 0.2067, 0.2449]) -Greedy action tensor([ 0.5250, -0.0063, -0.0196, -0.3142]) tensor([0.3846, 0.2261, 0.2231, 0.1662]) -Greedy action tensor([ 9.2102e-01, -8.8198e-01, 5.7274e-04, -4.7990e-01]) tensor([0.5526, 0.0911, 0.2201, 0.1362]) -Greedy action tensor([ 0.7800, -0.2684, -0.0533, -0.1108]) tensor([0.4555, 0.1596, 0.1980, 0.1869]) -Greedy action tensor([ 0.4092, -0.4499, -0.2509, -0.3335]) tensor([0.4139, 0.1753, 0.2139, 0.1969]) -Greedy action tensor([ 0.7977, -0.3948, 0.0887, -0.2935]) tensor([0.4692, 0.1424, 0.2309, 0.1576]) -Greedy action tensor([0.7009, 0.1566, 0.0419, 0.1082]) tensor([0.3773, 0.2189, 0.1952, 0.2086]) -Greedy action tensor([ 0.3275, -0.0749, -0.0626, -0.2253]) tensor([0.3423, 0.2289, 0.2318, 0.1970]) -Greedy action tensor([ 0.5563, -0.0912, -0.0053, -0.0813]) tensor([0.3814, 0.1996, 0.2175, 0.2016]) -Greedy action tensor([ 1.2355, -0.7771, 0.1537, -0.6355]) tensor([0.6148, 0.0822, 0.2084, 0.0947]) -Greedy action tensor([ 1.2565, -0.8093, -0.0378, -0.4982]) tensor([0.6354, 0.0805, 0.1742, 0.1099]) -Greedy action tensor([ 0.5880, -0.4569, -0.0670, -0.2996]) tensor([0.4380, 0.1541, 0.2275, 0.1803]) -Greedy action tensor([ 0.7553, -0.8154, 0.0819, -0.3630]) tensor([0.4891, 0.1017, 0.2494, 0.1598]) -Greedy action tensor([ 1.1984, -0.6967, 0.0167, -0.5316]) tensor([0.6119, 0.0920, 0.1877, 0.1085]) -Greedy action tensor([ 0.7871, -0.4257, -0.0536, -0.2303]) tensor([0.4784, 0.1423, 0.2064, 0.1730]) -Greedy action tensor([ 0.6934, -0.4008, -0.1285, -0.3749]) tensor([0.4721, 0.1581, 0.2076, 0.1622]) -Greedy action tensor([ 0.7729, -0.3992, -0.1622, -0.2367]) tensor([0.4839, 0.1499, 0.1899, 0.1763]) -Greedy action tensor([ 0.5451, -0.1544, 0.0720, -0.2496]) tensor([0.3889, 0.1932, 0.2423, 0.1757]) -Greedy action tensor([ 0.8803, -0.4350, -0.0244, -0.4809]) tensor([0.5183, 0.1391, 0.2097, 0.1329]) -Greedy action tensor([ 1.3811, -0.4746, 0.0044, -0.7370]) tensor([0.6540, 0.1023, 0.1651, 0.0787]) -Greedy action tensor([ 0.5105, 0.0026, 0.0256, -0.0546]) tensor([0.3590, 0.2160, 0.2210, 0.2040]) -Greedy action tensor([ 0.4867, -0.3759, -0.0909, -0.2155]) tensor([0.4034, 0.1703, 0.2264, 0.1999]) -Greedy action tensor([ 1.0612, -0.6659, 0.0061, -0.4451]) tensor([0.5722, 0.1017, 0.1992, 0.1269]) -Greedy action tensor([ 0.3575, -0.1129, -0.0365, -0.0795]) tensor([0.3396, 0.2121, 0.2290, 0.2193]) -Greedy action tensor([ 0.6758, -0.3317, 0.0150, -0.1314]) tensor([0.4296, 0.1569, 0.2219, 0.1917]) -Greedy action tensor([ 0.4196, -0.3826, -0.1443, 0.0115]) tensor([0.3728, 0.1672, 0.2121, 0.2479]) -Greedy action tensor([ 0.7491, -0.2222, -0.1270, -0.1272]) tensor([0.4522, 0.1712, 0.1883, 0.1883]) -Greedy action tensor([ 0.6060, -0.3245, 0.0156, -0.5264]) tensor([0.4404, 0.1737, 0.2440, 0.1419]) -Greedy action tensor([ 1.1161, -0.9310, 0.0815, -0.7123]) tensor([0.6079, 0.0785, 0.2160, 0.0977]) -Greedy action tensor([ 0.3045, -0.0271, -0.0681, -0.1289]) tensor([0.3273, 0.2350, 0.2255, 0.2122]) -Greedy action tensor([ 0.5164, -0.3445, -0.0597, -0.0729]) tensor([0.3938, 0.1665, 0.2213, 0.2184]) -Greedy action tensor([ 0.8497, -0.5447, -0.1331, -0.5720]) tensor([0.5366, 0.1331, 0.2008, 0.1295]) -Greedy action tensor([ 1.4528, -0.3937, -0.3623, 0.4065]) tensor([0.5981, 0.0944, 0.0974, 0.2101]) -Greedy action tensor([ 2.0257, -0.5536, -0.1191, 0.7015]) tensor([0.6854, 0.0520, 0.0803, 0.1823]) -Greedy action tensor([ 2.1740, -1.4333, -0.4912, 1.2164]) tensor([0.6754, 0.0183, 0.0470, 0.2592]) -Greedy action tensor([ 1.8144, -0.5931, -0.3303, 0.2764]) tensor([0.7033, 0.0633, 0.0824, 0.1511]) -Greedy action tensor([ 1.6912, -0.4196, 0.0089, -0.0247]) tensor([0.6725, 0.0815, 0.1251, 0.1209]) -Greedy action tensor([ 1.6529, -0.1704, -0.1062, 0.7708]) tensor([0.5722, 0.0924, 0.0985, 0.2369]) -Greedy action tensor([ 2.1462, -0.9355, -0.1271, 0.8342]) tensor([0.7052, 0.0324, 0.0726, 0.1899]) -Greedy action tensor([ 1.7222, 0.1854, -0.1472, 0.4758]) tensor([0.6036, 0.1298, 0.0931, 0.1736]) -Greedy action tensor([ 1.6290, -0.6283, -0.2967, 0.5687]) tensor([0.6263, 0.0655, 0.0913, 0.2169]) -Greedy action tensor([0.7907, 0.2562, 0.3390, 0.2903]) tensor([0.3535, 0.2071, 0.2250, 0.2143]) -Greedy action tensor([ 1.9727, -1.0226, -0.2200, 0.4061]) tensor([0.7297, 0.0365, 0.0814, 0.1523]) -Greedy action tensor([ 1.5526, -0.2373, -0.2973, 0.4142]) tensor([0.6081, 0.1015, 0.0956, 0.1948]) -Greedy action tensor([ 1.3485, -0.8859, -0.6145, 0.6830]) tensor([0.5677, 0.0608, 0.0797, 0.2918]) -Greedy action tensor([ 1.5129, -0.4876, -0.4795, 0.2694]) tensor([0.6410, 0.0867, 0.0874, 0.1849]) -Greedy action tensor([ 1.7083, -0.1430, -0.1855, 0.2118]) tensor([0.6530, 0.1025, 0.0983, 0.1462]) -Greedy action tensor([ 1.8994, -0.4013, -0.2714, 0.5244]) tensor([0.6816, 0.0683, 0.0778, 0.1723]) -Greedy action tensor([ 1.7025, 0.0304, -0.3514, 0.3638]) tensor([0.6336, 0.1190, 0.0813, 0.1661]) -Greedy action tensor([ 1.5701, 0.0056, -0.4088, 0.4762]) tensor([0.5944, 0.1244, 0.0822, 0.1991]) -Greedy action tensor([ 1.5799, -0.7967, -0.2909, 0.0376]) tensor([0.6846, 0.0636, 0.1054, 0.1464]) -Greedy action tensor([ 1.2168, -0.2196, -0.1584, 0.1009]) tensor([0.5500, 0.1308, 0.1390, 0.1802]) -Greedy action tensor([ 1.4312, -0.2457, -0.2137, 0.4316]) tensor([0.5721, 0.1069, 0.1104, 0.2106]) -Greedy action tensor([ 1.5118, -0.3097, -0.1784, 0.0960]) tensor([0.6293, 0.1018, 0.1161, 0.1528]) -Greedy action tensor([ 1.7062, -0.9415, -0.1681, 0.7091]) tensor([0.6277, 0.0444, 0.0963, 0.2316]) -Greedy action tensor([ 1.3058, -0.4777, -0.0767, 0.1506]) tensor([0.5767, 0.0969, 0.1447, 0.1817]) -Greedy action tensor([ 1.8442, -0.9901, -0.0361, 0.4884]) tensor([0.6807, 0.0400, 0.1038, 0.1755]) -Greedy action tensor([ 1.7045, -0.5497, -0.6161, 0.4293]) tensor([0.6745, 0.0708, 0.0662, 0.1884]) -Greedy action tensor([ 1.4537, -0.4229, -1.3010, 0.3482]) tensor([0.6461, 0.0989, 0.0411, 0.2139]) -Greedy action tensor([ 2.4618, -1.3221, -0.2988, 0.6037]) tensor([0.8052, 0.0183, 0.0509, 0.1256]) -Greedy action tensor([ 1.6921, -0.2042, -0.5782, 0.4314]) tensor([0.6507, 0.0977, 0.0672, 0.1844]) -Greedy action tensor([ 1.1336, -0.4919, 0.0176, -0.2769]) tensor([0.5655, 0.1113, 0.1852, 0.1380]) -Greedy action tensor([ 1.2531, -0.3307, -0.6092, -0.0778]) tensor([0.6155, 0.1263, 0.0956, 0.1626]) -Greedy action tensor([ 1.6631, -0.5952, -0.3191, 0.2670]) tensor([0.6712, 0.0702, 0.0925, 0.1662]) -Greedy action tensor([ 1.2290, -0.4697, -0.3871, 0.2667]) tensor([0.5670, 0.1037, 0.1127, 0.2166]) -Greedy action tensor([ 1.8536, -0.4519, -0.7810, 0.4128]) tensor([0.7101, 0.0708, 0.0510, 0.1681]) -Greedy action tensor([1.2505, 0.1684, 0.3241, 0.0380]) tensor([0.4921, 0.1667, 0.1948, 0.1464]) -Greedy action tensor([ 1.4964, -0.8056, -0.5495, 0.3448]) tensor([0.6471, 0.0647, 0.0836, 0.2045]) -Greedy action tensor([ 1.9051, -0.7422, -0.1942, 0.5019]) tensor([0.6948, 0.0492, 0.0851, 0.1708]) -Greedy action tensor([ 0.6986, -0.3023, 0.0683, 0.1111]) tensor([0.4072, 0.1497, 0.2168, 0.2263]) -Greedy action tensor([ 0.8435, -0.3921, -0.2280, 0.2226]) tensor([0.4607, 0.1339, 0.1578, 0.2476]) -Greedy action tensor([ 1.3668, -0.3695, -0.0592, 0.2693]) tensor([0.5714, 0.1007, 0.1373, 0.1907]) -Greedy action tensor([ 0.9873, -0.2341, -0.1275, 0.2990]) tensor([0.4705, 0.1387, 0.1543, 0.2364]) -Greedy action tensor([ 1.6193, -0.6005, -0.5040, 0.5710]) tensor([0.6334, 0.0688, 0.0758, 0.2220]) -Greedy action tensor([ 1.6009, -0.3289, -0.4061, 0.2389]) tensor([0.6512, 0.0945, 0.0875, 0.1668]) -Greedy action tensor([ 2.3551, -1.2640, 0.1790, 1.1855]) tensor([0.6893, 0.0185, 0.0782, 0.2140]) -Greedy action tensor([ 2.1660, -0.2276, -0.0186, 0.3827]) tensor([0.7289, 0.0665, 0.0820, 0.1225]) -Greedy action tensor([ 2.4134, -0.8822, -0.4627, 0.8319]) tensor([0.7698, 0.0285, 0.0434, 0.1583]) -Greedy action tensor([ 1.4056, 0.0685, -0.4147, 0.5694]) tensor([0.5382, 0.1413, 0.0872, 0.2333]) -Greedy action tensor([ 1.1310, -0.2166, -0.1228, -0.2902]) tensor([0.5597, 0.1454, 0.1597, 0.1351]) -Greedy action tensor([ 1.5212, -0.6327, -0.4231, 0.0896]) tensor([0.6675, 0.0775, 0.0955, 0.1595]) -Greedy action tensor([ 2.2653, -0.9690, -0.3136, 0.5081]) tensor([0.7765, 0.0306, 0.0589, 0.1340]) -Greedy action tensor([ 1.7820, -0.5257, 0.1143, 0.4591]) tensor([0.6433, 0.0640, 0.1214, 0.1713]) -Greedy action tensor([ 1.7411, -0.3927, -0.6386, 0.2781]) tensor([0.6932, 0.0821, 0.0642, 0.1605]) -Greedy action tensor([ 1.9673, -0.9309, -0.0734, 0.8758]) tensor([0.6576, 0.0362, 0.0854, 0.2208]) -Greedy action tensor([ 1.8060, -0.4863, -0.2569, 0.3259]) tensor([0.6869, 0.0694, 0.0873, 0.1564]) -Greedy action tensor([ 0.9848, -0.1468, -0.2727, 0.0292]) tensor([0.5022, 0.1619, 0.1428, 0.1931]) -Greedy action tensor([ 1.4041, -0.5961, -0.4722, 0.6321]) tensor([0.5713, 0.0773, 0.0875, 0.2640]) -Greedy action tensor([ 1.2554, -0.3412, -0.3234, 0.4361]) tensor([0.5407, 0.1095, 0.1115, 0.2383]) -Greedy action tensor([ 0.8661, -0.1321, -0.3342, -0.1343]) tensor([0.4908, 0.1809, 0.1478, 0.1805]) -Greedy action tensor([ 0.9978, 0.3099, -0.3188, -0.1607]) tensor([0.4797, 0.2411, 0.1286, 0.1506]) -Greedy action tensor([ 1.5440, -0.8412, -0.4237, 0.5383]) tensor([0.6259, 0.0576, 0.0875, 0.2290]) -Greedy action tensor([ 1.0267, -0.6385, -0.6147, -0.2317]) tensor([0.5999, 0.1135, 0.1162, 0.1704]) -Greedy action tensor([ 1.4238, -0.2296, -0.4776, 0.3025]) tensor([0.6000, 0.1148, 0.0896, 0.1955]) -Greedy action tensor([ 1.3122, -0.2737, -0.1740, 0.3954]) tensor([0.5462, 0.1118, 0.1236, 0.2184]) -Greedy action tensor([ 1.4159, -0.1010, -0.5130, 0.2963]) tensor([0.5913, 0.1297, 0.0859, 0.1930]) -Greedy action tensor([ 1.0997, 0.0318, -0.2392, 0.1389]) tensor([0.5029, 0.1729, 0.1318, 0.1924]) -Greedy action tensor([ 1.2281, -0.2825, -0.1571, 0.6035]) tensor([0.4984, 0.1100, 0.1247, 0.2669]) -Greedy action tensor([1.5573, 0.1791, 0.1617, 0.1016]) tensor([0.5770, 0.1454, 0.1429, 0.1346]) -Greedy action tensor([ 1.2888, -0.3692, -0.5396, 0.4508]) tensor([0.5606, 0.1068, 0.0901, 0.2425]) -Greedy action tensor([ 1.7845, -0.5623, -0.4083, 0.3361]) tensor([0.6934, 0.0663, 0.0774, 0.1629]) -Greedy action tensor([ 1.3796, -0.7370, -0.3993, -0.0333]) tensor([0.6524, 0.0786, 0.1102, 0.1588]) -Greedy action tensor([ 1.0770, -0.4002, -0.2832, 0.3183]) tensor([0.5120, 0.1169, 0.1314, 0.2397]) -Greedy action tensor([ 1.0241, -0.2798, -0.0557, 0.4394]) tensor([0.4612, 0.1252, 0.1566, 0.2570]) -Greedy action tensor([ 1.0255, -0.6457, -0.4982, 0.1321]) tensor([0.5509, 0.1036, 0.1200, 0.2255]) -Greedy action tensor([ 1.3981, -0.5798, -0.0439, 0.0496]) tensor([0.6118, 0.0847, 0.1447, 0.1589]) -Greedy action tensor([ 1.7992, -0.6825, -0.4933, 0.6327]) tensor([0.6684, 0.0559, 0.0675, 0.2082]) -Greedy action tensor([ 1.6090, -0.6110, -0.3385, 0.5417]) tensor([0.6269, 0.0681, 0.0894, 0.2156]) -Greedy action tensor([ 1.2733, -0.2330, -0.3725, 0.3624]) tensor([0.5504, 0.1220, 0.1062, 0.2214]) -Greedy action tensor([ 1.7826, -0.9032, -0.4598, 0.1888]) tensor([0.7259, 0.0495, 0.0771, 0.1475]) -Greedy action tensor([ 1.2163, -0.3381, -0.5228, 0.1191]) tensor([0.5811, 0.1228, 0.1021, 0.1940]) -Greedy action tensor([ 1.6329, 0.0836, -0.1621, 0.4023]) tensor([0.5986, 0.1271, 0.0994, 0.1749]) -Greedy action tensor([ 1.2963, -0.7717, 0.0140, 0.3441]) tensor([0.5587, 0.0706, 0.1550, 0.2156]) -Greedy action tensor([ 1.3840, -0.3832, -0.1406, 0.2309]) tensor([0.5868, 0.1002, 0.1277, 0.1852]) -Greedy action tensor([-1.7867, -0.4382, 0.6010, -0.0946]) tensor([0.0472, 0.1819, 0.5143, 0.2565]) -Greedy action tensor([-1.6141, -0.4517, 0.5743, 0.1778]) tensor([0.0523, 0.1672, 0.4666, 0.3139]) -Greedy action tensor([-1.9014, -0.4050, 0.6434, -0.1574]) tensor([0.0418, 0.1866, 0.5325, 0.2391]) -Greedy action tensor([-1.9344, -0.4253, 0.6598, -0.1732]) tensor([0.0404, 0.1829, 0.5413, 0.2354]) -Greedy action tensor([-1.8263, -0.4805, 0.6096, -0.1439]) tensor([0.0462, 0.1775, 0.5279, 0.2485]) -Greedy action tensor([-1.3045, -0.1601, 0.3539, -0.0311]) tensor([0.0771, 0.2422, 0.4050, 0.2756]) -Greedy action tensor([-1.8234, -0.4218, 0.6246, -0.1013]) tensor([0.0450, 0.1828, 0.5204, 0.2518]) -Greedy action tensor([-1.3220, -0.0185, 0.1684, 0.0982]) tensor([0.0754, 0.2777, 0.3348, 0.3121]) -Greedy action tensor([-1.4086, 0.1346, 0.3809, 0.1712]) tensor([0.0605, 0.2833, 0.3624, 0.2938]) -Greedy action tensor([-1.9064, -0.3990, 0.6433, -0.1568]) tensor([0.0415, 0.1876, 0.5319, 0.2390]) -Greedy action tensor([0.0429, 0.2282, 0.7610, 1.6078]) tensor([0.1107, 0.1332, 0.2269, 0.5292]) -Greedy action tensor([-1.9401, -0.4548, 0.6653, -0.1783]) tensor([0.0404, 0.1782, 0.5464, 0.2350]) -Greedy action tensor([0.6382, 0.2336, 0.3123, 0.9096]) tensor([0.2702, 0.1803, 0.1951, 0.3544]) -Greedy action tensor([-1.7912, -0.4507, 0.6095, -0.0053]) tensor([0.0458, 0.1751, 0.5056, 0.2734]) -Greedy action tensor([-1.8089, -0.4350, 0.6014, -0.1114]) tensor([0.0464, 0.1834, 0.5168, 0.2534]) -Greedy action tensor([-1.9063, -0.4500, 0.6487, -0.1620]) tensor([0.0419, 0.1796, 0.5389, 0.2396]) -Greedy action tensor([-1.8915, -0.4105, 0.6478, -0.1395]) tensor([0.0420, 0.1845, 0.5316, 0.2419]) -Greedy action tensor([-1.3312, -0.3069, 0.3380, 0.0723]) tensor([0.0760, 0.2116, 0.4033, 0.3092]) -Greedy action tensor([-1.8195, -0.3198, 0.5821, -0.1106]) tensor([0.0454, 0.2032, 0.5009, 0.2505]) -Greedy action tensor([-1.5714, -0.2513, 0.6597, 0.0575]) tensor([0.0522, 0.1955, 0.4861, 0.2662]) -Greedy action tensor([-1.8428, -0.4550, 0.7393, 0.0916]) tensor([0.0398, 0.1593, 0.5258, 0.2751]) -Greedy action tensor([-1.6864, -0.3897, 0.6896, 0.0623]) tensor([0.0472, 0.1728, 0.5084, 0.2715]) -Greedy action tensor([-1.7436, -0.1504, 0.5372, -0.0123]) tensor([0.0468, 0.2304, 0.4583, 0.2645]) -Greedy action tensor([-0.1136, 0.3894, 0.7899, 1.6573]) tensor([0.0909, 0.1504, 0.2244, 0.5343]) -Greedy action tensor([-1.8467, -0.4823, 0.6184, -0.1156]) tensor([0.0448, 0.1753, 0.5270, 0.2529]) -Greedy action tensor([-1.1699, -0.3427, 0.3350, 0.0033]) tensor([0.0907, 0.2075, 0.4086, 0.2932]) -Greedy action tensor([-1.8994, -0.4305, 0.6459, -0.1563]) tensor([0.0420, 0.1825, 0.5354, 0.2401]) -Greedy action tensor([-1.8008, -0.2563, 0.5656, -0.1222]) tensor([0.0461, 0.2159, 0.4911, 0.2469]) -Greedy action tensor([-1.4605, -0.3109, 0.4097, -0.0108]) tensor([0.0671, 0.2117, 0.4353, 0.2859]) -Greedy action tensor([-1.2248, -0.3300, 0.2770, 0.2744]) tensor([0.0806, 0.1971, 0.3616, 0.3607]) -Greedy action tensor([-1.7976, -0.4158, 0.5885, -0.1387]) tensor([0.0474, 0.1887, 0.5151, 0.2489]) -Greedy action tensor([-1.9241, -0.4330, 0.6553, -0.1710]) tensor([0.0410, 0.1820, 0.5405, 0.2365]) -Greedy action tensor([-1.7902, -0.3680, 0.5601, -0.0880]) tensor([0.0473, 0.1963, 0.4966, 0.2598]) -Greedy action tensor([0.1078, 0.3973, 0.6282, 1.5670]) tensor([0.1202, 0.1605, 0.2022, 0.5171]) -Greedy action tensor([-1.8386, -0.4474, 0.6175, -0.1236]) tensor([0.0450, 0.1808, 0.5244, 0.2499]) -Greedy action tensor([-1.9281, -0.4485, 0.6726, -0.1637]) tensor([0.0405, 0.1778, 0.5454, 0.2363]) -Greedy action tensor([-1.6682, 0.0041, 0.5280, 0.0792]) tensor([0.0475, 0.2529, 0.4270, 0.2726]) -Greedy action tensor([-0.5201, -0.3624, 0.1678, 0.1856]) tensor([0.1617, 0.1893, 0.3216, 0.3274]) -Greedy action tensor([-1.7147, -0.3872, 0.5427, -0.0971]) tensor([0.0516, 0.1947, 0.4934, 0.2602]) -Greedy action tensor([-1.8833, -0.4413, 0.6697, -0.1190]) tensor([0.0418, 0.1769, 0.5372, 0.2441]) -Greedy action tensor([-0.9379, -0.0784, 0.1453, 0.3419]) tensor([0.1009, 0.2383, 0.2980, 0.3628]) -Greedy action tensor([-1.8082, -0.5089, 0.5904, -0.1443]) tensor([0.0477, 0.1750, 0.5253, 0.2520]) -Greedy action tensor([-0.9171, -0.0379, 0.4093, -0.2738]) tensor([0.1101, 0.2653, 0.4150, 0.2096]) -Greedy action tensor([ 0.9152, 1.2408, -0.0402, 0.6136]) tensor([0.2850, 0.3946, 0.1096, 0.2108]) -Greedy action tensor([-1.8885, -0.4812, 0.6535, -0.1439]) tensor([0.0425, 0.1737, 0.5403, 0.2434]) -Greedy action tensor([-1.6230, -0.1477, 0.4778, -0.0697]) tensor([0.0547, 0.2393, 0.4473, 0.2587]) -Greedy action tensor([-1.9143, -0.4556, 0.6488, -0.1614]) tensor([0.0416, 0.1788, 0.5396, 0.2400]) -Greedy action tensor([-1.8987, -0.4047, 0.6432, -0.1531]) tensor([0.0419, 0.1865, 0.5318, 0.2398]) -Greedy action tensor([-1.9421, -0.4558, 0.6705, -0.1786]) tensor([0.0402, 0.1776, 0.5478, 0.2344]) -Greedy action tensor([-1.9224, -0.4121, 0.6519, -0.1649]) tensor([0.0409, 0.1852, 0.5367, 0.2371]) -Greedy action tensor([-1.8993, -0.4530, 0.6486, -0.1596]) tensor([0.0422, 0.1790, 0.5387, 0.2401]) -Greedy action tensor([-1.6259, -0.5458, 0.4925, 0.0160]) tensor([0.0574, 0.1690, 0.4773, 0.2964]) -Greedy action tensor([-1.7661, -0.3659, 0.6345, -0.2802]) tensor([0.0488, 0.1978, 0.5379, 0.2155]) -Greedy action tensor([-1.5587, -0.4835, 0.4810, 0.0254]) tensor([0.0606, 0.1777, 0.4661, 0.2956]) -Greedy action tensor([-1.7339, -0.3773, 0.5855, -0.0247]) tensor([0.0486, 0.1887, 0.4942, 0.2685]) -Greedy action tensor([-1.8038, -0.4143, 0.5937, -0.0972]) tensor([0.0465, 0.1865, 0.5110, 0.2561]) -Greedy action tensor([-1.7822, -0.3080, 0.6291, -0.0752]) tensor([0.0454, 0.1983, 0.5061, 0.2503]) -Greedy action tensor([-1.7972, -0.3865, 0.6004, -0.1614]) tensor([0.0471, 0.1931, 0.5180, 0.2418]) -Greedy action tensor([-1.5014, -0.5792, 0.4224, 0.0806]) tensor([0.0657, 0.1652, 0.4497, 0.3195]) -Greedy action tensor([-1.7366, 0.1216, 0.4837, 0.0066]) tensor([0.0448, 0.2871, 0.4123, 0.2559]) -Greedy action tensor([-0.9935, 0.0782, 0.1613, 0.1610]) tensor([0.0974, 0.2845, 0.3091, 0.3090]) -Greedy action tensor([-1.6338, -0.4006, 0.5061, -0.0773]) tensor([0.0566, 0.1942, 0.4809, 0.2683]) -Greedy action tensor([-1.0041, 0.2215, 0.3829, 0.3811]) tensor([0.0806, 0.2746, 0.3227, 0.3221]) -Greedy action tensor([-1.8631, -0.4468, 0.6173, -0.1367]) tensor([0.0441, 0.1817, 0.5265, 0.2477]) -Greedy action tensor([-1.8362, -0.4412, 0.6191, -0.1220]) tensor([0.0450, 0.1814, 0.5239, 0.2497]) -Greedy action tensor([-0.9359, 0.1208, -0.1396, -0.1932]) tensor([0.1220, 0.3510, 0.2705, 0.2564]) -Greedy action tensor([-1.8853, -0.4265, 0.6343, -0.1528]) tensor([0.0428, 0.1840, 0.5314, 0.2419]) -Greedy action tensor([-0.8653, -0.5052, 0.2432, 0.0511]) tensor([0.1256, 0.1800, 0.3805, 0.3140]) -Greedy action tensor([-1.8521, -0.4598, 0.6249, -0.1290]) tensor([0.0444, 0.1786, 0.5284, 0.2486]) -Greedy action tensor([-1.8986, -0.4081, 0.6456, -0.1547]) tensor([0.0419, 0.1858, 0.5329, 0.2394]) -Greedy action tensor([-1.9298, -0.4405, 0.6567, -0.1726]) tensor([0.0408, 0.1809, 0.5419, 0.2365]) -Greedy action tensor([-1.9242, -0.4535, 0.6543, -0.1679]) tensor([0.0411, 0.1789, 0.5418, 0.2381]) -Greedy action tensor([-1.8564, -0.3889, 0.6146, -0.1273]) tensor([0.0438, 0.1902, 0.5189, 0.2471]) -Greedy action tensor([-1.5197, -0.3803, 0.5132, 0.0969]) tensor([0.0595, 0.1860, 0.4546, 0.2998]) -Greedy action tensor([-1.9185, -0.4534, 0.6572, -0.1688]) tensor([0.0413, 0.1787, 0.5425, 0.2375]) -Greedy action tensor([-1.9120, -0.4623, 0.6510, -0.1628]) tensor([0.0417, 0.1777, 0.5409, 0.2397]) -Greedy action tensor([-1.3459, -0.7351, -0.0268, 0.0287]) tensor([0.0949, 0.1748, 0.3550, 0.3753]) -Greedy action tensor([-1.8486, -0.3769, 0.6086, -0.1317]) tensor([0.0443, 0.1928, 0.5166, 0.2464]) -Greedy action tensor([-1.8792, -0.3503, 0.6174, -0.1435]) tensor([0.0427, 0.1969, 0.5183, 0.2421]) -Greedy action tensor([-0.9051, 0.0274, 0.4232, 0.3851]) tensor([0.0913, 0.2321, 0.3447, 0.3319]) -Greedy action tensor([-0.6156, -0.3815, 0.4365, 0.5414]) tensor([0.1204, 0.1521, 0.3447, 0.3828]) -Greedy action tensor([-1.7210, -0.4208, 0.5577, -0.0508]) tensor([0.0506, 0.1858, 0.4945, 0.2691]) -Greedy action tensor([-0.4757, -1.3864, 0.9140, 1.1193]) tensor([0.0967, 0.0389, 0.3880, 0.4764]) -Greedy action tensor([ 0.3395, -0.7968, 0.2630, 0.9593]) tensor([0.2435, 0.0782, 0.2256, 0.4527]) -Greedy action tensor([-0.2750, -0.7686, -0.1966, 0.1289]) tensor([0.2387, 0.1457, 0.2582, 0.3575]) -Greedy action tensor([ 1.6182, -1.1512, 0.3184, 1.1646]) tensor([0.5074, 0.0318, 0.1383, 0.3224]) -Greedy action tensor([ 1.6837, -0.9247, -0.2771, 0.1026]) tensor([0.7041, 0.0519, 0.0991, 0.1449]) -Greedy action tensor([ 1.0146, -1.1907, 0.3753, 0.1706]) tensor([0.4836, 0.0533, 0.2552, 0.2079]) -Greedy action tensor([-0.0240, -1.1743, 0.8574, -0.0483]) tensor([0.2124, 0.0673, 0.5129, 0.2074]) -Greedy action tensor([ 0.6659, -1.3583, 0.2055, -0.3600]) tensor([0.4714, 0.0623, 0.2974, 0.1690]) -Greedy action tensor([-0.7673, -0.3009, -0.9290, -0.1840]) tensor([0.1909, 0.3044, 0.1624, 0.3422]) -Greedy action tensor([-0.9069, -0.6115, 1.1115, -1.3377]) tensor([0.0951, 0.1277, 0.7154, 0.0618]) -Greedy action tensor([-0.2742, -1.2631, 0.7953, -0.7818]) tensor([0.2046, 0.0761, 0.5962, 0.1231]) -Greedy action tensor([-0.2366, -1.6167, -0.5441, 0.4238]) tensor([0.2550, 0.0641, 0.1875, 0.4935]) -Greedy action tensor([ 0.9587, -0.5241, 0.8624, 1.8598]) tensor([0.2175, 0.0494, 0.1975, 0.5356]) -Greedy action tensor([-0.0677, -0.8933, -0.7544, -0.4971]) tensor([0.3858, 0.1690, 0.1941, 0.2511]) -Greedy action tensor([ 0.0525, -1.7168, 0.4914, 0.0846]) tensor([0.2664, 0.0454, 0.4131, 0.2751]) -Greedy action tensor([ 0.9538, -1.5564, -0.0500, 0.9441]) tensor([0.4102, 0.0333, 0.1503, 0.4062]) -Greedy action tensor([ 0.9928, -0.3510, 0.8834, 0.0543]) tensor([0.3924, 0.1024, 0.3517, 0.1535]) -Greedy action tensor([-1.5256, -0.8070, -0.5280, 0.7778]) tensor([0.0634, 0.1301, 0.1719, 0.6346]) -Greedy action tensor([-0.8818, -0.5331, 0.4026, -0.8759]) tensor([0.1421, 0.2014, 0.5135, 0.1430]) -Greedy action tensor([ 1.0590, -0.3233, 0.3597, 1.1599]) tensor([0.3504, 0.0879, 0.1741, 0.3876]) -Greedy action tensor([-0.1084, -0.3327, -1.3931, 0.3743]) tensor([0.2706, 0.2162, 0.0749, 0.4384]) -Greedy action tensor([ 0.2739, -0.0430, -0.4676, -0.3735]) tensor([0.3665, 0.2670, 0.1746, 0.1919]) -Greedy action tensor([-0.1357, -1.3606, 0.0572, 0.3283]) tensor([0.2441, 0.0717, 0.2960, 0.3882]) -Greedy action tensor([-0.2827, -0.5358, 1.7467, 0.1262]) tensor([0.0918, 0.0713, 0.6987, 0.1382]) -Greedy action tensor([ 0.4066, 0.0519, 1.3102, -0.5346]) tensor([0.2193, 0.1538, 0.5413, 0.0856]) -Greedy action tensor([-0.4214, 0.6893, 0.2475, -1.0733]) tensor([0.1536, 0.4665, 0.2999, 0.0800]) -Greedy action tensor([ 0.3529, 0.1703, -0.9282, 0.4186]) tensor([0.3146, 0.2621, 0.0874, 0.3359]) -Greedy action tensor([ 0.2864, -0.8101, 0.0054, 0.6955]) tensor([0.2782, 0.0929, 0.2101, 0.4188]) -Greedy action tensor([ 0.6914, -0.4189, -0.4523, 1.7877]) tensor([0.2155, 0.0710, 0.0687, 0.6449]) -Greedy action tensor([-0.2748, -0.8145, -0.0263, 0.9203]) tensor([0.1621, 0.0945, 0.2078, 0.5356]) -Greedy action tensor([-0.7651, 0.6502, 0.1797, 0.1278]) tensor([0.0987, 0.4064, 0.2539, 0.2410]) -Greedy action tensor([ 0.4468, -0.9916, -0.4206, 1.5636]) tensor([0.2122, 0.0504, 0.0891, 0.6483]) -Greedy action tensor([ 0.5158, -0.3027, 0.2449, 0.4379]) tensor([0.3196, 0.1410, 0.2438, 0.2957]) -Greedy action tensor([-0.2959, -0.6861, -0.8186, -0.9714]) tensor([0.3599, 0.2436, 0.2134, 0.1831]) -Greedy action tensor([ 0.3121, 0.5415, 0.9977, -0.3936]) tensor([0.2111, 0.2656, 0.4191, 0.1042]) -Greedy action tensor([-0.5853, -0.3523, 0.3806, -0.3190]) tensor([0.1614, 0.2038, 0.4241, 0.2107]) -Greedy action tensor([-0.4829, -1.4124, -0.1472, 0.0823]) tensor([0.2196, 0.0867, 0.3072, 0.3865]) -Greedy action tensor([1.7375, 0.2379, 0.1343, 0.9203]) tensor([0.5359, 0.1196, 0.1078, 0.2367]) -Greedy action tensor([ 1.1861, -1.1164, 0.4117, 0.8356]) tensor([0.4415, 0.0441, 0.2035, 0.3109]) -Greedy action tensor([-1.2078, -0.3852, 0.8023, -0.6618]) tensor([0.0802, 0.1826, 0.5987, 0.1385]) -Greedy action tensor([ 0.1205, -0.5195, 0.2301, 1.2233]) tensor([0.1768, 0.0932, 0.1973, 0.5327]) -Greedy action tensor([ 0.8255, 0.3738, 0.7927, -0.0764]) tensor([0.3322, 0.2115, 0.3215, 0.1348]) -Greedy action tensor([ 0.4383, 0.0455, -0.3846, 0.6640]) tensor([0.2969, 0.2005, 0.1304, 0.3721]) -Greedy action tensor([ 0.1021, -0.3460, 0.4293, -0.8814]) tensor([0.2941, 0.1879, 0.4080, 0.1100]) -Greedy action tensor([ 0.1395, -0.5192, -0.7124, -0.1983]) tensor([0.3763, 0.1947, 0.1605, 0.2684]) -Greedy action tensor([ 0.2206, -1.1433, 0.7434, -0.0935]) tensor([0.2723, 0.0696, 0.4592, 0.1989]) -Greedy action tensor([ 0.4371, -2.0254, 0.3834, -0.0187]) tensor([0.3750, 0.0320, 0.3554, 0.2377]) -Greedy action tensor([-0.0922, -2.1187, 0.1305, 0.9199]) tensor([0.1948, 0.0257, 0.2434, 0.5361]) -Greedy action tensor([ 0.8327, -0.8573, 1.5859, 0.5380]) tensor([0.2467, 0.0455, 0.5240, 0.1838]) -Greedy action tensor([-0.4367, -1.5323, -0.4607, 0.8135]) tensor([0.1724, 0.0576, 0.1683, 0.6017]) -Greedy action tensor([ 0.5371, -1.3328, -0.2905, 0.4247]) tensor([0.4024, 0.0620, 0.1759, 0.3597]) -Greedy action tensor([ 1.1256, -1.1504, 1.2225, 0.1944]) tensor([0.3848, 0.0395, 0.4240, 0.1517]) -Greedy action tensor([ 0.6276, -1.8154, 0.0126, 0.3868]) tensor([0.4143, 0.0360, 0.2240, 0.3257]) -Greedy action tensor([ 0.4680, -0.3617, -0.1608, 0.1772]) tensor([0.3680, 0.1605, 0.1963, 0.2752]) -Greedy action tensor([-0.0894, 1.4329, -0.3936, -0.6415]) tensor([0.1450, 0.6645, 0.1070, 0.0835]) -Greedy action tensor([-0.4847, -2.2731, 0.0697, 0.7924]) tensor([0.1540, 0.0258, 0.2681, 0.5522]) -Greedy action tensor([-0.1896, -0.2437, -0.0670, 0.1080]) tensor([0.2260, 0.2141, 0.2555, 0.3044]) -Greedy action tensor([-0.6394, -1.6318, -0.2731, 0.4551]) tensor([0.1724, 0.0639, 0.2487, 0.5151]) -Greedy action tensor([ 1.1461, -1.5573, 1.1937, -0.5934]) tensor([0.4364, 0.0292, 0.4577, 0.0766]) -Greedy action tensor([ 0.4317, -0.7695, -0.5738, 1.1223]) tensor([0.2731, 0.0822, 0.0999, 0.5448]) -Greedy action tensor([ 0.1138, -1.2747, -0.6005, -0.4258]) tensor([0.4307, 0.1074, 0.2108, 0.2511]) -Greedy action tensor([ 0.5578, -0.1811, -0.3066, -0.3301]) tensor([0.4328, 0.2067, 0.1823, 0.1781]) -Greedy action tensor([ 1.4014, -0.0774, -0.0118, 1.3433]) tensor([0.4141, 0.0944, 0.1008, 0.3907]) -Greedy action tensor([ 1.0670, -0.7221, 0.5119, 1.1526]) tensor([0.3533, 0.0590, 0.2028, 0.3849]) -Greedy action tensor([-0.8184, -0.6947, -0.3228, -0.7743]) tensor([0.2075, 0.2349, 0.3407, 0.2169]) -Greedy action tensor([-0.4519, 0.2092, 0.4860, -0.3276]) tensor([0.1510, 0.2924, 0.3857, 0.1710]) -Greedy action tensor([ 0.7894, 0.1439, -0.6030, 0.2815]) tensor([0.4211, 0.2208, 0.1046, 0.2534]) -Greedy action tensor([ 1.6898, -0.0522, 0.8180, -0.4348]) tensor([0.5838, 0.1023, 0.2441, 0.0698]) -Greedy action tensor([-0.7927, 0.1435, 0.7713, -0.8626]) tensor([0.1080, 0.2754, 0.5159, 0.1007]) -Greedy action tensor([ 0.2370, -1.7933, -0.2354, -0.0983]) tensor([0.4049, 0.0532, 0.2524, 0.2895]) -Greedy action tensor([-0.1232, 1.0680, 1.6278, -0.5731]) tensor([0.0936, 0.3079, 0.5389, 0.0597]) -Greedy action tensor([-0.6847, -0.8032, -1.1082, 0.0277]) tensor([0.2182, 0.1939, 0.1429, 0.4450]) -Greedy action tensor([ 1.1632, -1.0991, -0.0375, 1.2631]) tensor([0.3984, 0.0415, 0.1199, 0.4402]) -Greedy action tensor([ 1.0126, 0.2155, -0.0297, 0.1210]) tensor([0.4518, 0.2036, 0.1593, 0.1852]) -Greedy action tensor([-0.6131, -0.2646, 0.4332, 0.5632]) tensor([0.1176, 0.1666, 0.3347, 0.3812]) -Greedy action tensor([-0.1834, -0.0180, -0.1948, -0.3270]) tensor([0.2478, 0.2924, 0.2450, 0.2147]) -Greedy action tensor([ 1.4297, -1.2966, -0.1812, 0.7622]) tensor([0.5624, 0.0368, 0.1123, 0.2885]) -Greedy action tensor([-1.0244, -0.1279, -0.6125, -0.7888]) tensor([0.1606, 0.3936, 0.2425, 0.2033]) -Greedy action tensor([ 0.3301, -0.3567, 1.3469, 0.1701]) tensor([0.1953, 0.0983, 0.5399, 0.1665]) -Greedy action tensor([-1.1151, -1.3629, -0.3558, 0.9741]) tensor([0.0834, 0.0651, 0.1781, 0.6734]) -Greedy action tensor([-1.1750, 0.1786, -0.2598, 1.2314]) tensor([0.0542, 0.2097, 0.1353, 0.6009]) -Greedy action tensor([ 0.4333, -0.1206, -0.1201, -0.1401]) tensor([0.3685, 0.2118, 0.2119, 0.2077]) -Greedy action tensor([ 0.1813, -0.0417, -0.1346, -0.4307]) tensor([0.3256, 0.2605, 0.2374, 0.1765]) -Greedy action tensor([ 0.7231, -0.3538, 0.0531, -0.3155]) tensor([0.4533, 0.1544, 0.2319, 0.1604]) -Greedy action tensor([ 0.7698, -0.5640, -0.0220, -0.7568]) tensor([0.5171, 0.1362, 0.2343, 0.1124]) -Greedy action tensor([ 0.4550, -0.1539, -0.1367, -0.0467]) tensor([0.3700, 0.2012, 0.2047, 0.2240]) -Greedy action tensor([ 0.7790, -0.3316, -0.0889, -0.2359]) tensor([0.4736, 0.1560, 0.1988, 0.1716]) -Greedy action tensor([ 9.3841e-01, -1.1711e+00, 6.4290e-04, -6.1790e-01]) tensor([0.5801, 0.0704, 0.2271, 0.1224]) -Greedy action tensor([ 0.8561, -0.6591, -0.0376, -0.2258]) tensor([0.5082, 0.1117, 0.2079, 0.1723]) -Greedy action tensor([ 0.3731, -0.0391, -0.0116, -0.2036]) tensor([0.3443, 0.2280, 0.2343, 0.1934]) -Greedy action tensor([ 0.5025, -0.0416, -0.0822, 0.0028]) tensor([0.3644, 0.2115, 0.2031, 0.2211]) -Greedy action tensor([ 0.5170, -0.0841, 0.0396, -0.0412]) tensor([0.3648, 0.2000, 0.2264, 0.2088]) -Greedy action tensor([ 0.6310, -0.0742, -0.4576, -0.6079]) tensor([0.4716, 0.2330, 0.1588, 0.1366]) -Greedy action tensor([ 0.9262, -0.3354, -0.0382, -0.2235]) tensor([0.5048, 0.1429, 0.1924, 0.1599]) -Greedy action tensor([ 1.1475, -0.6446, -0.0942, -0.4521]) tensor([0.6033, 0.1005, 0.1743, 0.1219]) -Greedy action tensor([ 0.7410, -0.3699, -0.1719, -0.4092]) tensor([0.4885, 0.1608, 0.1960, 0.1546]) -Greedy action tensor([ 0.7898, -0.5486, -0.0942, -0.4740]) tensor([0.5107, 0.1339, 0.2110, 0.1443]) -Greedy action tensor([ 0.5161, -0.1816, -0.0718, -0.3191]) tensor([0.4021, 0.2001, 0.2234, 0.1744]) -Greedy action tensor([ 0.3081, -0.0105, 0.0124, -0.1033]) tensor([0.3191, 0.2320, 0.2374, 0.2115]) -Greedy action tensor([ 0.4508, -0.0881, -0.0321, -0.1954]) tensor([0.3671, 0.2141, 0.2265, 0.1923]) -Greedy action tensor([ 0.7211, -0.5182, 0.1150, -0.4669]) tensor([0.4673, 0.1353, 0.2549, 0.1425]) -Greedy action tensor([ 0.4123, -0.0406, -0.0162, -0.1888]) tensor([0.3527, 0.2242, 0.2298, 0.1933]) -Greedy action tensor([ 0.3539, -0.3135, 0.2334, -0.5551]) tensor([0.3568, 0.1831, 0.3163, 0.1438]) -Greedy action tensor([ 0.9345, -0.8393, 0.1177, -0.3275]) tensor([0.5278, 0.0896, 0.2332, 0.1494]) -Greedy action tensor([ 0.9623, -0.3061, -0.1970, -0.2347]) tensor([0.5271, 0.1483, 0.1654, 0.1592]) -Greedy action tensor([ 0.9024, -0.6479, -0.2691, -0.5565]) tensor([0.5699, 0.1209, 0.1766, 0.1325]) -Greedy action tensor([ 0.5440, -0.2870, -0.0762, -0.1807]) tensor([0.4069, 0.1772, 0.2188, 0.1971]) -Greedy action tensor([ 0.6778, -0.7896, -0.1004, -0.2951]) tensor([0.4836, 0.1115, 0.2221, 0.1828]) -Greedy action tensor([ 0.5602, -0.4195, -0.0197, -0.2909]) tensor([0.4233, 0.1589, 0.2370, 0.1807]) -Greedy action tensor([ 1.2495, -0.8996, -0.1099, -0.6249]) tensor([0.6549, 0.0764, 0.1682, 0.1005]) -Greedy action tensor([ 0.9622, -0.5770, -0.0821, -0.4443]) tensor([0.5520, 0.1184, 0.1943, 0.1353]) -Greedy action tensor([ 0.6241, -0.3950, 0.0268, -0.2440]) tensor([0.4290, 0.1548, 0.2361, 0.1801]) -Greedy action tensor([ 0.7075, -0.4680, 0.0677, -0.4474]) tensor([0.4649, 0.1435, 0.2452, 0.1465]) -Greedy action tensor([ 0.6853, -0.1232, -0.0740, -0.2525]) tensor([0.4338, 0.1933, 0.2030, 0.1698]) -Greedy action tensor([ 0.7878, -0.4363, -0.0726, -0.2203]) tensor([0.4803, 0.1412, 0.2032, 0.1753]) -Greedy action tensor([ 0.6341, -0.4664, -0.2022, -0.3166]) tensor([0.4646, 0.1546, 0.2013, 0.1795]) -Greedy action tensor([ 0.8262, -0.4144, -0.1311, -0.1187]) tensor([0.4850, 0.1403, 0.1862, 0.1885]) -Greedy action tensor([ 1.1108, -0.5935, -0.0564, -0.4004]) tensor([0.5835, 0.1061, 0.1816, 0.1287]) -Greedy action tensor([ 0.7617, -0.2472, 0.1737, -0.3233]) tensor([0.4429, 0.1615, 0.2460, 0.1496]) -Greedy action tensor([ 0.6730, -0.4955, 0.0929, -0.3092]) tensor([0.4454, 0.1384, 0.2494, 0.1668]) -Greedy action tensor([ 0.6067, 0.4833, -0.3011, -0.1457]) tensor([0.3625, 0.3204, 0.1462, 0.1708]) -Greedy action tensor([ 0.3314, -0.0151, 0.2712, 0.0269]) tensor([0.2953, 0.2088, 0.2781, 0.2178]) -Greedy action tensor([ 0.9674, -0.5117, 0.0600, -0.5627]) tensor([0.5411, 0.1233, 0.2184, 0.1172]) -Greedy action tensor([ 0.4376, -0.1719, -0.1009, -0.0446]) tensor([0.3643, 0.1981, 0.2126, 0.2249]) -Greedy action tensor([ 0.3319, -0.4270, -0.1097, -0.0956]) tensor([0.3619, 0.1694, 0.2327, 0.2360]) -Greedy action tensor([ 0.5249, -0.2293, -0.1419, -0.1432]) tensor([0.4006, 0.1884, 0.2056, 0.2054]) -Greedy action tensor([0.4794, 0.0577, 0.1446, 0.0124]) tensor([0.3335, 0.2188, 0.2386, 0.2091]) -Greedy action tensor([ 0.6387, 0.2746, -0.2293, 0.1355]) tensor([0.3677, 0.2555, 0.1544, 0.2223]) -Greedy action tensor([ 0.7506, -0.1318, -0.0462, -0.0068]) tensor([0.4286, 0.1773, 0.1932, 0.2009]) -Greedy action tensor([ 0.4445, 0.2837, -0.0097, -0.0647]) tensor([0.3239, 0.2758, 0.2057, 0.1946]) -Greedy action tensor([ 0.7628, -0.1817, -0.0013, -0.2011]) tensor([0.4472, 0.1739, 0.2083, 0.1706]) -Greedy action tensor([ 0.5634, -0.3553, -0.0317, -0.2305]) tensor([0.4162, 0.1661, 0.2296, 0.1882]) -Greedy action tensor([ 1.3290, -1.0121, 0.1600, -0.7786]) tensor([0.6543, 0.0630, 0.2033, 0.0795]) -Greedy action tensor([ 0.4934, 0.1585, -0.0476, 0.1168]) tensor([0.3351, 0.2398, 0.1951, 0.2300]) -Greedy action tensor([ 0.5739, -0.0499, 0.0070, -0.2315]) tensor([0.3922, 0.2102, 0.2225, 0.1752]) -Greedy action tensor([ 0.6842, -0.6158, -0.0128, -0.7176]) tensor([0.4958, 0.1351, 0.2470, 0.1221]) -Greedy action tensor([ 0.7338, -0.4850, -0.0031, -0.2322]) tensor([0.4641, 0.1372, 0.2221, 0.1766]) -Greedy action tensor([ 0.4683, -0.7243, -0.2214, -0.3330]) tensor([0.4437, 0.1346, 0.2226, 0.1991]) -Greedy action tensor([ 0.5894, -0.1406, -0.0148, -0.1103]) tensor([0.3960, 0.1908, 0.2164, 0.1967]) -Greedy action tensor([ 0.6384, -0.1351, -0.1530, -0.0374]) tensor([0.4127, 0.1904, 0.1870, 0.2099]) -Greedy action tensor([ 0.7165, -0.4672, -0.1170, -0.2541]) tensor([0.4718, 0.1444, 0.2050, 0.1787]) -Greedy action tensor([ 0.5499, -0.4608, 0.1040, -0.1755]) tensor([0.4019, 0.1463, 0.2573, 0.1946]) -Greedy action tensor([ 0.8611, -0.5694, -0.0583, -0.2230]) tensor([0.5060, 0.1210, 0.2018, 0.1712]) -Greedy action tensor([ 0.3666, -0.3831, -0.1412, -0.5042]) tensor([0.4011, 0.1895, 0.2414, 0.1679]) -Greedy action tensor([ 0.4130, -0.0539, -0.1208, -0.0134]) tensor([0.3489, 0.2187, 0.2046, 0.2278]) -Greedy action tensor([ 0.6411, -0.3083, -0.0696, -0.2111]) tensor([0.4339, 0.1679, 0.2132, 0.1850]) -Greedy action tensor([ 0.6182, -0.4510, 0.0051, -0.2499]) tensor([0.4339, 0.1490, 0.2350, 0.1821]) -Greedy action tensor([ 0.7148, -0.3891, -0.0491, -0.3866]) tensor([0.4695, 0.1557, 0.2187, 0.1561]) -Greedy action tensor([ 0.7488, -0.4775, 0.0080, -0.3365]) tensor([0.4744, 0.1392, 0.2262, 0.1603]) -Greedy action tensor([ 0.8075, -0.3528, -0.0244, -0.1412]) tensor([0.4682, 0.1467, 0.2038, 0.1813]) -Greedy action tensor([ 0.5434, 0.1563, 0.0561, -0.2216]) tensor([0.3625, 0.2461, 0.2227, 0.1687]) -Greedy action tensor([ 0.6685, -0.2022, -0.1032, -0.4495]) tensor([0.4529, 0.1896, 0.2094, 0.1481]) -Greedy action tensor([ 0.6643, -0.3394, -0.0773, -0.3566]) tensor([0.4539, 0.1664, 0.2162, 0.1635]) -Greedy action tensor([ 1.3521, -0.8680, 0.0884, -0.5826]) tensor([0.6512, 0.0707, 0.1840, 0.0941]) -Greedy action tensor([ 0.6512, -0.1432, -0.0101, -0.4926]) tensor([0.4373, 0.1976, 0.2257, 0.1393]) -Greedy action tensor([ 0.5001, -0.3121, -0.0336, -0.2129]) tensor([0.3967, 0.1761, 0.2327, 0.1945]) -Greedy action tensor([ 0.5464, -0.3978, 0.2102, -0.4997]) tensor([0.4074, 0.1585, 0.2911, 0.1431]) -Greedy action tensor([ 1.1039, -0.4891, -0.0312, -0.6287]) tensor([0.5877, 0.1195, 0.1889, 0.1039]) -Greedy action tensor([ 1.1140, -0.4719, -0.2527, -0.7221]) tensor([0.6176, 0.1265, 0.1575, 0.0985]) -Greedy action tensor([ 0.3374, 0.0405, -0.1736, -0.1790]) tensor([0.3402, 0.2528, 0.2041, 0.2030]) -Greedy action tensor([ 0.4553, 0.0392, -0.0601, 0.0772]) tensor([0.3399, 0.2242, 0.2030, 0.2329]) -Greedy action tensor([ 0.6114, -0.4750, -0.1992, -0.3991]) tensor([0.4660, 0.1572, 0.2072, 0.1696]) -Greedy action tensor([ 0.7085, -0.2833, -0.0791, 0.0062]) tensor([0.4308, 0.1598, 0.1960, 0.2134]) -Greedy action tensor([ 2.0581, -0.6181, -0.2848, 0.5807]) tensor([0.7178, 0.0494, 0.0689, 0.1638]) -Greedy action tensor([ 2.2609, -0.5253, -0.1952, 0.6562]) tensor([0.7416, 0.0457, 0.0636, 0.1490]) -Greedy action tensor([ 1.7551, -0.9974, -0.1847, 0.4729]) tensor([0.6734, 0.0429, 0.0968, 0.1868]) -Greedy action tensor([ 0.8369, -0.8496, -0.2384, -0.0786]) tensor([0.5190, 0.0961, 0.1771, 0.2078]) -Greedy action tensor([ 1.2536, -0.7998, -0.4631, 0.2689]) tensor([0.5947, 0.0763, 0.1068, 0.2222]) -Greedy action tensor([ 1.0057, -0.4278, -0.6870, 0.6566]) tensor([0.4700, 0.1121, 0.0865, 0.3315]) -Greedy action tensor([ 1.2974, -0.3365, -0.2784, 0.2859]) tensor([0.5663, 0.1105, 0.1171, 0.2060]) -Greedy action tensor([ 1.4019, -0.7156, -0.1938, 0.2318]) tensor([0.6122, 0.0737, 0.1241, 0.1900]) -Greedy action tensor([ 1.5870, -0.4696, -0.3761, 0.3340]) tensor([0.6435, 0.0823, 0.0904, 0.1838]) -Greedy action tensor([ 0.8443, -0.0022, -0.7282, 0.3226]) tensor([0.4484, 0.1923, 0.0931, 0.2662]) -Greedy action tensor([ 1.8921, -0.3300, -0.7196, 0.4033]) tensor([0.7105, 0.0770, 0.0522, 0.1603]) -Greedy action tensor([ 1.9491, -0.4598, -0.2865, 0.3983]) tensor([0.7098, 0.0638, 0.0759, 0.1505]) -Greedy action tensor([ 1.8275, -0.5067, -0.5267, 0.4013]) tensor([0.6983, 0.0677, 0.0663, 0.1677]) -Greedy action tensor([ 1.7552, -0.4750, -0.5717, 0.3935]) tensor([0.6843, 0.0736, 0.0668, 0.1753]) -Greedy action tensor([ 1.8380, -0.1019, -0.6091, 0.5509]) tensor([0.6639, 0.0954, 0.0575, 0.1833]) -Greedy action tensor([ 1.6068, -0.1031, -0.2394, 0.1395]) tensor([0.6372, 0.1153, 0.1006, 0.1469]) -Greedy action tensor([ 1.8168, -0.1007, -0.5236, 0.4537]) tensor([0.6671, 0.0980, 0.0642, 0.1707]) -Greedy action tensor([ 1.4018, -0.2414, -0.4954, 0.2505]) tensor([0.6026, 0.1165, 0.0904, 0.1906]) -Greedy action tensor([ 1.3165, -0.4441, -0.1988, 0.3449]) tensor([0.5649, 0.0971, 0.1241, 0.2138]) -Greedy action tensor([ 1.4828, -0.2006, -0.6943, 0.3432]) tensor([0.6176, 0.1147, 0.0700, 0.1976]) -Greedy action tensor([ 1.9234, -0.9627, -0.4678, 0.2007]) tensor([0.7542, 0.0421, 0.0690, 0.1347]) -Greedy action tensor([ 1.4082, -0.7108, -0.0927, 0.5502]) tensor([0.5659, 0.0680, 0.1262, 0.2400]) -Greedy action tensor([ 0.9955, -0.3815, -0.0023, 0.0826]) tensor([0.4945, 0.1248, 0.1823, 0.1985]) -Greedy action tensor([ 1.6345, -0.1147, -0.3958, 0.7057]) tensor([0.5882, 0.1023, 0.0772, 0.2323]) -Greedy action tensor([ 1.7902, -0.5675, -0.5917, -0.0110]) tensor([0.7396, 0.0700, 0.0683, 0.1221]) -Greedy action tensor([ 1.5554, -0.9190, -0.4356, -0.1199]) tensor([0.7102, 0.0598, 0.0970, 0.1330]) -Greedy action tensor([1.1279, 0.2940, 0.4018, 0.3164]) tensor([0.4233, 0.1839, 0.2048, 0.1880]) -Greedy action tensor([ 2.2727, -0.8689, -0.2321, 0.7981]) tensor([0.7387, 0.0319, 0.0603, 0.1691]) -Greedy action tensor([ 1.1764, -0.9275, 0.1636, -0.0448]) tensor([0.5618, 0.0685, 0.2040, 0.1657]) -Greedy action tensor([ 1.1450, -0.3023, -0.4416, 0.0701]) tensor([0.5614, 0.1321, 0.1149, 0.1916]) -Greedy action tensor([ 0.8253, -0.2689, -0.8634, 0.1053]) tensor([0.4984, 0.1669, 0.0921, 0.2426]) -Greedy action tensor([ 2.0685, -0.4901, -0.5903, 0.5112]) tensor([0.7363, 0.0570, 0.0516, 0.1551]) -Greedy action tensor([ 1.1729, -0.3509, -0.3038, 0.1915]) tensor([0.5491, 0.1196, 0.1254, 0.2058]) -Greedy action tensor([ 0.9632, -0.2322, -0.5775, 0.4529]) tensor([0.4723, 0.1429, 0.1012, 0.2836]) -Greedy action tensor([ 1.2295, -0.4014, -0.3508, 0.0795]) tensor([0.5820, 0.1139, 0.1198, 0.1843]) -Greedy action tensor([ 2.2826, -1.1135, -0.0867, 0.8222]) tensor([0.7357, 0.0247, 0.0688, 0.1708]) -Greedy action tensor([ 1.5434, -0.6068, -0.0980, 0.1941]) tensor([0.6371, 0.0742, 0.1234, 0.1653]) -Greedy action tensor([ 1.4784, -0.2613, -0.4199, 0.3228]) tensor([0.6097, 0.1070, 0.0913, 0.1920]) -Greedy action tensor([ 1.4245, -0.3398, -0.6134, 0.2099]) tensor([0.6256, 0.1072, 0.0815, 0.1857]) -Greedy action tensor([ 1.9898, -1.2733, -0.2101, 0.2439]) tensor([0.7555, 0.0289, 0.0837, 0.1318]) -Greedy action tensor([ 1.3500, -0.6752, -0.1762, 0.3399]) tensor([0.5836, 0.0770, 0.1269, 0.2125]) -Greedy action tensor([ 1.4112, -0.8382, -0.7045, 0.8965]) tensor([0.5483, 0.0578, 0.0661, 0.3277]) -Greedy action tensor([ 1.3970, -0.4356, -0.8831, 0.2364]) tensor([0.6347, 0.1015, 0.0649, 0.1988]) -Greedy action tensor([ 1.9334, -0.8923, -0.2999, 0.4629]) tensor([0.7162, 0.0424, 0.0768, 0.1646]) -Greedy action tensor([ 0.9495, -0.3300, -0.1095, 0.0542]) tensor([0.4918, 0.1368, 0.1706, 0.2009]) -Greedy action tensor([ 1.3226, -0.9762, -0.5295, 0.3299]) tensor([0.6143, 0.0617, 0.0964, 0.2277]) -Greedy action tensor([ 1.0842, -0.5262, -0.2391, -0.0612]) tensor([0.5605, 0.1120, 0.1492, 0.1783]) -Greedy action tensor([ 1.7862, -0.5861, -0.1215, 0.4938]) tensor([0.6595, 0.0615, 0.0979, 0.1811]) -Greedy action tensor([ 2.1467, -1.0343, -0.4484, 0.8747]) tensor([0.7161, 0.0298, 0.0534, 0.2007]) -Greedy action tensor([ 1.5966, 0.1104, -0.0080, 0.3405]) tensor([0.5841, 0.1321, 0.1174, 0.1663]) -Greedy action tensor([ 1.4876, -0.5119, -0.1565, 0.4749]) tensor([0.5911, 0.0800, 0.1142, 0.2147]) -Greedy action tensor([ 1.4113, -0.0464, -0.2686, -0.4193]) tensor([0.6331, 0.1474, 0.1180, 0.1015]) -Greedy action tensor([ 0.5070, -0.2638, -0.0427, -0.1021]) tensor([0.3871, 0.1791, 0.2234, 0.2105]) -Greedy action tensor([ 1.5862, -0.1635, -0.6077, 0.8207]) tensor([0.5713, 0.0993, 0.0637, 0.2657]) -Greedy action tensor([ 1.5782, -0.2756, 0.2005, 0.7740]) tensor([0.5387, 0.0844, 0.1358, 0.2411]) -Greedy action tensor([ 1.3573, -0.6969, -0.1553, 0.2667]) tensor([0.5936, 0.0761, 0.1308, 0.1995]) -Greedy action tensor([ 0.8410, -0.3162, -0.2575, 0.0168]) tensor([0.4793, 0.1507, 0.1598, 0.2102]) -Greedy action tensor([ 0.9705, -0.4810, -0.1479, 0.1258]) tensor([0.5023, 0.1177, 0.1642, 0.2158]) -Greedy action tensor([ 1.2455, -0.4861, -0.4866, 0.3713]) tensor([0.5646, 0.0999, 0.0999, 0.2356]) -Greedy action tensor([ 1.1762, -0.1743, -0.0392, 0.2045]) tensor([0.5170, 0.1340, 0.1534, 0.1957]) -Greedy action tensor([ 1.1624, -0.1884, -0.1391, 0.2384]) tensor([0.5186, 0.1343, 0.1411, 0.2059]) -Greedy action tensor([ 1.2816, -0.0180, -0.1681, 0.2196]) tensor([0.5397, 0.1471, 0.1266, 0.1866]) -Greedy action tensor([ 2.1072, -0.6595, -0.4720, 0.8343]) tensor([0.7049, 0.0443, 0.0535, 0.1974]) -Greedy action tensor([ 1.4787, -0.4737, -0.1040, 0.0105]) tensor([0.6338, 0.0900, 0.1302, 0.1460]) -Greedy action tensor([ 1.8610, -0.7148, -0.1748, 0.1452]) tensor([0.7213, 0.0549, 0.0942, 0.1297]) -Greedy action tensor([ 1.8412, 0.1938, -0.8173, 0.4809]) tensor([0.6582, 0.1267, 0.0461, 0.1689]) -Greedy action tensor([ 1.4457, -0.1492, -0.5221, 0.3427]) tensor([0.5972, 0.1212, 0.0835, 0.1982]) -Greedy action tensor([ 1.1408, -0.1320, -0.0933, 0.4352]) tensor([0.4843, 0.1356, 0.1410, 0.2391]) -Greedy action tensor([ 1.6299, -0.5865, -0.4961, 0.1702]) tensor([0.6846, 0.0746, 0.0817, 0.1591]) -Greedy action tensor([ 1.0891, -0.3644, -0.0660, -0.0455]) tensor([0.5347, 0.1250, 0.1684, 0.1719]) -Greedy action tensor([ 0.4586, -0.1888, -0.0319, 0.4204]) tensor([0.3228, 0.1689, 0.1976, 0.3107]) -Greedy action tensor([ 1.4262, 0.2345, -0.1999, 0.4461]) tensor([0.5331, 0.1619, 0.1049, 0.2001]) -Greedy action tensor([ 1.9340, -0.7049, -0.1624, 0.0494]) tensor([0.7428, 0.0531, 0.0913, 0.1128]) -Greedy action tensor([ 1.8738, -0.4097, -0.2020, 0.8648]) tensor([0.6281, 0.0640, 0.0788, 0.2290]) -Greedy action tensor([ 1.4294, -0.9313, -0.1029, 0.2053]) tensor([0.6233, 0.0588, 0.1347, 0.1833]) -Greedy action tensor([ 1.6714, -0.0580, -0.5740, 0.3019]) tensor([0.6504, 0.1154, 0.0689, 0.1653]) -Greedy action tensor([ 0.9047, -0.2621, 0.0296, 0.3518]) tensor([0.4341, 0.1352, 0.1810, 0.2497]) -Greedy action tensor([ 1.1026, -0.2684, -0.7844, 0.2789]) tensor([0.5423, 0.1376, 0.0822, 0.2379]) -Greedy action tensor([ 1.3970, -0.2901, -0.2874, 0.1886]) tensor([0.5991, 0.1109, 0.1112, 0.1789]) -Greedy action tensor([ 1.1984, -0.2443, -0.4579, 0.6926]) tensor([0.4926, 0.1164, 0.0940, 0.2970]) -Greedy action tensor([-0.2204, 0.1875, -0.5991, -0.1979]) tensor([0.2375, 0.3571, 0.1626, 0.2429]) -Greedy action tensor([ 0.3719, -1.5126, -0.1094, 1.1964]) tensor([0.2469, 0.0375, 0.1526, 0.5631]) -Greedy action tensor([ 1.9247, -0.1844, 0.3571, 0.9161]) tensor([0.5901, 0.0716, 0.1231, 0.2152]) -Greedy action tensor([ 0.2935, -0.9059, 0.5305, 0.7021]) tensor([0.2455, 0.0740, 0.3111, 0.3694]) -Greedy action tensor([ 0.9676, -1.6611, -0.7194, 0.7573]) tensor([0.4837, 0.0349, 0.0895, 0.3919]) -Greedy action tensor([1.4292, 0.4073, 1.6514, 0.6240]) tensor([0.3273, 0.1178, 0.4087, 0.1463]) -Greedy action tensor([ 0.2851, -1.6448, -0.4192, 0.9164]) tensor([0.2841, 0.0412, 0.1405, 0.5342]) -Greedy action tensor([ 0.0465, -1.2308, 0.1634, -0.8551]) tensor([0.3560, 0.0993, 0.4002, 0.1445]) -Greedy action tensor([ 0.2754, -0.8228, -0.3332, 0.4066]) tensor([0.3314, 0.1105, 0.1803, 0.3778]) -Greedy action tensor([ 0.4096, -0.7189, 0.2335, -0.5673]) tensor([0.3939, 0.1274, 0.3303, 0.1483]) -Greedy action tensor([ 1.0248, -1.0502, -0.1486, 0.1421]) tensor([0.5410, 0.0679, 0.1673, 0.2238]) -Greedy action tensor([-0.3325, -0.3152, -0.7137, -0.7252]) tensor([0.2962, 0.3014, 0.2023, 0.2000]) -Greedy action tensor([ 0.3771, -0.0140, -0.6923, -0.2506]) tensor([0.3916, 0.2649, 0.1344, 0.2091]) -Greedy action tensor([ 1.5284e+00, -2.9884e-01, -9.2569e-02, 1.1202e-03]) tensor([0.6346, 0.1021, 0.1255, 0.1378]) -Greedy action tensor([0.7094, 0.6526, 0.1156, 0.7983]) tensor([0.2786, 0.2632, 0.1538, 0.3044]) -Greedy action tensor([ 0.5492, -0.9085, -1.0415, -0.5843]) tensor([0.5687, 0.1324, 0.1159, 0.1831]) -Greedy action tensor([-0.0216, -0.3708, 1.1194, -1.5418]) tensor([0.1979, 0.1396, 0.6193, 0.0433]) -Greedy action tensor([-0.0261, -1.1245, 0.7172, -0.6549]) tensor([0.2519, 0.0840, 0.5298, 0.1343]) -Greedy action tensor([-1.8392, -0.8189, 0.0611, -0.2859]) tensor([0.0658, 0.1826, 0.4403, 0.3112]) -Greedy action tensor([-0.4744, 0.6695, -0.0504, -1.1643]) tensor([0.1621, 0.5089, 0.2477, 0.0813]) -Greedy action tensor([ 1.2834, -1.4008, 0.9504, -0.4784]) tensor([0.5111, 0.0349, 0.3663, 0.0878]) -Greedy action tensor([-0.0507, -0.3237, 0.2888, 1.3237]) tensor([0.1405, 0.1069, 0.1973, 0.5553]) -Greedy action tensor([ 0.7289, -1.3212, 0.4200, 0.6886]) tensor([0.3542, 0.0456, 0.2600, 0.3402]) -Greedy action tensor([ 0.4459, 0.3503, 0.1603, -0.2445]) tensor([0.3163, 0.2874, 0.2377, 0.1586]) -Greedy action tensor([ 0.5973, -0.1144, 0.4383, -0.2969]) tensor([0.3633, 0.1783, 0.3099, 0.1486]) -Greedy action tensor([1.2611, 0.8225, 1.4441, 0.8141]) tensor([0.2869, 0.1851, 0.3445, 0.1835]) -Greedy action tensor([ 0.8071, -1.3047, 1.0572, -0.3986]) tensor([0.3697, 0.0447, 0.4748, 0.1107]) -Greedy action tensor([ 0.8752, -1.0239, 0.5127, -0.3021]) tensor([0.4643, 0.0695, 0.3231, 0.1430]) -Greedy action tensor([-0.2588, -0.7818, -0.3298, 0.1300]) tensor([0.2500, 0.1482, 0.2329, 0.3689]) -Greedy action tensor([-1.1389, -1.1243, 0.3157, -0.5484]) tensor([0.1234, 0.1252, 0.5286, 0.2228]) -Greedy action tensor([ 0.0680, -0.9197, 0.6392, 0.3215]) tensor([0.2257, 0.0840, 0.3995, 0.2908]) -Greedy action tensor([ 0.7656, -1.4217, -0.3812, 0.3780]) tensor([0.4743, 0.0532, 0.1506, 0.3219]) -Greedy action tensor([ 0.5116, -0.7347, 1.3451, -0.0115]) tensor([0.2391, 0.0688, 0.5503, 0.1417]) -Greedy action tensor([-0.9993, 0.4158, -0.2117, 1.0565]) tensor([0.0661, 0.2721, 0.1453, 0.5165]) -Greedy action tensor([-0.2183, -0.9068, 0.0131, -0.3388]) tensor([0.2740, 0.1377, 0.3454, 0.2429]) -Greedy action tensor([ 0.3806, 0.0349, 0.0981, -0.7101]) tensor([0.3575, 0.2530, 0.2695, 0.1201]) -Greedy action tensor([0.2817, 0.2854, 0.0271, 0.3015]) tensor([0.2632, 0.2642, 0.2041, 0.2685]) -Greedy action tensor([ 0.5995, -1.4220, -0.1730, 0.6600]) tensor([0.3764, 0.0499, 0.1738, 0.3999]) -Greedy action tensor([-0.2790, -0.6396, -0.6890, -0.4105]) tensor([0.3089, 0.2154, 0.2050, 0.2708]) -Greedy action tensor([ 0.0999, -1.4259, -0.5218, 1.8215]) tensor([0.1361, 0.0296, 0.0731, 0.7612]) -Greedy action tensor([-0.1968, 0.4745, 0.4830, 1.1392]) tensor([0.1145, 0.2240, 0.2260, 0.4355]) -Greedy action tensor([-0.3182, -0.6688, -0.7142, 0.0853]) tensor([0.2581, 0.1818, 0.1737, 0.3864]) -Greedy action tensor([ 0.0149, -0.7400, 0.5361, -0.2915]) tensor([0.2571, 0.1208, 0.4329, 0.1892]) -Greedy action tensor([ 0.7884, -0.5794, 0.4536, -1.0686]) tensor([0.4703, 0.1198, 0.3365, 0.0734]) -Greedy action tensor([ 0.9198, -0.2219, -0.1583, 0.4477]) tensor([0.4380, 0.1398, 0.1490, 0.2732]) -Greedy action tensor([-1.3398, -0.9584, 0.9826, 0.0541]) tensor([0.0599, 0.0877, 0.6110, 0.2414]) -Greedy action tensor([ 0.1381, 0.0048, 0.0839, -0.1101]) tensor([0.2776, 0.2429, 0.2629, 0.2166]) -Greedy action tensor([ 2.1469, -0.4085, 0.6063, 1.3646]) tensor([0.5717, 0.0444, 0.1225, 0.2615]) -Greedy action tensor([-0.1691, -0.3524, 0.9951, -0.7683]) tensor([0.1791, 0.1491, 0.5736, 0.0983]) -Greedy action tensor([ 0.2354, -1.2993, 1.1771, 0.2353]) tensor([0.2092, 0.0451, 0.5365, 0.2092]) -Greedy action tensor([-0.7903, -0.4759, -0.0252, -0.7255]) tensor([0.1790, 0.2452, 0.3848, 0.1910]) -Greedy action tensor([ 1.3847, -0.6508, -0.3989, 0.7816]) tensor([0.5418, 0.0708, 0.0910, 0.2964]) -Greedy action tensor([ 0.4349, -1.1121, -0.2523, 0.4251]) tensor([0.3695, 0.0787, 0.1859, 0.3659]) -Greedy action tensor([ 0.7660, -0.8468, 0.3673, -0.2393]) tensor([0.4471, 0.0891, 0.3001, 0.1636]) -Greedy action tensor([-0.1710, 1.0935, 0.3727, -0.4232]) tensor([0.1420, 0.5030, 0.2446, 0.1104]) -Greedy action tensor([ 0.1197, -0.9041, -0.5660, 0.6141]) tensor([0.2855, 0.1026, 0.1438, 0.4681]) -Greedy action tensor([-0.4435, -0.0217, 0.2098, -0.7582]) tensor([0.1932, 0.2945, 0.3713, 0.1410]) -Greedy action tensor([-0.1573, -0.4110, 0.6249, 0.1352]) tensor([0.1886, 0.1463, 0.4124, 0.2527]) -Greedy action tensor([ 1.0522, 0.3847, -0.6186, 0.6385]) tensor([0.4233, 0.2172, 0.0796, 0.2799]) -Greedy action tensor([ 0.6188, -0.4795, 0.6564, -0.3135]) tensor([0.3616, 0.1206, 0.3755, 0.1424]) -Greedy action tensor([ 0.2362, -0.6322, 1.8732, -0.4382]) tensor([0.1415, 0.0594, 0.7271, 0.0721]) -Greedy action tensor([-0.1673, -0.9395, -1.3802, -0.1784]) tensor([0.3639, 0.1681, 0.1082, 0.3598]) -Greedy action tensor([ 1.6613, -1.4188, 0.3745, 0.7750]) tensor([0.5766, 0.0265, 0.1592, 0.2377]) -Greedy action tensor([-0.0396, 0.5308, 1.0686, -0.0577]) tensor([0.1475, 0.2609, 0.4468, 0.1448]) -Greedy action tensor([ 0.0221, -1.0648, 0.3127, -0.3488]) tensor([0.2972, 0.1002, 0.3974, 0.2051]) -Greedy action tensor([ 0.6405, -0.0901, 1.0732, -0.3853]) tensor([0.2957, 0.1424, 0.4558, 0.1060]) -Greedy action tensor([ 0.9170, -0.5316, 1.7379, 0.6963]) tensor([0.2321, 0.0545, 0.5274, 0.1861]) -Greedy action tensor([ 0.0126, -0.2771, -0.7137, 0.0241]) tensor([0.3083, 0.2308, 0.1491, 0.3119]) -Greedy action tensor([ 0.2883, -0.5347, -0.0483, -0.5574]) tensor([0.3872, 0.1700, 0.2766, 0.1662]) -Greedy action tensor([ 0.4526, 0.2699, -0.5656, 0.0753]) tensor([0.3472, 0.2892, 0.1254, 0.2381]) -Greedy action tensor([ 1.1382, -0.3928, 0.2151, -0.1556]) tensor([0.5297, 0.1146, 0.2104, 0.1453]) -Greedy action tensor([ 0.8511, -0.6549, -0.1327, -0.3721]) tensor([0.5291, 0.1173, 0.1978, 0.1557]) -Greedy action tensor([ 0.2984, 0.2937, -0.1387, -0.1066]) tensor([0.3023, 0.3009, 0.1953, 0.2016]) -Greedy action tensor([ 0.1571, -0.9843, -0.2130, 1.4924]) tensor([0.1721, 0.0550, 0.1189, 0.6541]) -Greedy action tensor([ 0.1115, -0.3310, -0.7473, -0.4964]) tensor([0.3831, 0.2461, 0.1623, 0.2086]) -Greedy action tensor([ 1.0355, 0.8224, 0.8953, -0.5232]) tensor([0.3463, 0.2798, 0.3010, 0.0729]) -Greedy action tensor([-0.4906, 0.9542, -0.4763, -1.0664]) tensor([0.1467, 0.6221, 0.1488, 0.0825]) -Greedy action tensor([-0.7680, -0.5560, -0.7177, -0.4210]) tensor([0.2127, 0.2629, 0.2236, 0.3008]) -Greedy action tensor([ 0.2479, -0.9896, -0.1915, 0.0441]) tensor([0.3636, 0.1055, 0.2343, 0.2966]) -Greedy action tensor([0.0733, 0.1075, 0.1233, 0.3506]) tensor([0.2270, 0.2349, 0.2386, 0.2995]) -Greedy action tensor([ 0.5405, -1.0800, 0.6736, 0.5582]) tensor([0.2978, 0.0589, 0.3402, 0.3031]) -Greedy action tensor([-1.7806, -0.5580, 0.9588, 0.2817]) tensor([0.0361, 0.1224, 0.5580, 0.2835]) -Greedy action tensor([-0.8871, 0.5732, 0.1953, -0.0687]) tensor([0.0950, 0.4092, 0.2804, 0.2154]) -Greedy action tensor([-1.8824, -0.3875, 0.6327, -0.1376]) tensor([0.0425, 0.1893, 0.5251, 0.2431]) -Greedy action tensor([-1.5040, 0.3622, 0.3551, 0.0393]) tensor([0.0539, 0.3482, 0.3458, 0.2521]) -Greedy action tensor([-1.7271, -0.4566, 0.6839, 0.0860]) tensor([0.0458, 0.1632, 0.5104, 0.2807]) -Greedy action tensor([-1.9019, -0.4404, 0.6480, -0.1555]) tensor([0.0419, 0.1808, 0.5369, 0.2404]) -Greedy action tensor([-1.8431, -0.4567, 0.8099, 0.1604]) tensor([0.0376, 0.1503, 0.5335, 0.2786]) -Greedy action tensor([-1.8542, -0.4709, 0.6213, -0.1492]) tensor([0.0447, 0.1782, 0.5312, 0.2459]) -Greedy action tensor([-1.7993, -0.3919, 0.5885, -0.0960]) tensor([0.0466, 0.1903, 0.5073, 0.2558]) -Greedy action tensor([-1.9425, -0.4522, 0.6696, -0.1771]) tensor([0.0401, 0.1782, 0.5471, 0.2346]) -Greedy action tensor([-1.8968, -0.4108, 0.6556, -0.1448]) tensor([0.0416, 0.1840, 0.5344, 0.2400]) -Greedy action tensor([-1.9342, -0.4433, 0.6638, -0.1742]) tensor([0.0405, 0.1799, 0.5442, 0.2354]) -Greedy action tensor([-1.9390, -0.4459, 0.6657, -0.1774]) tensor([0.0403, 0.1795, 0.5455, 0.2347]) -Greedy action tensor([-1.8333, -0.4665, 0.6184, -0.1262]) tensor([0.0454, 0.1780, 0.5266, 0.2501]) -Greedy action tensor([-1.4364, -0.1340, 0.6513, -0.7113]) tensor([0.0675, 0.2484, 0.5447, 0.1394]) -Greedy action tensor([-1.7033, -0.1848, 0.5201, -0.1004]) tensor([0.0506, 0.2309, 0.4673, 0.2512]) -Greedy action tensor([-1.5141, 0.0630, 0.3592, 0.0395]) tensor([0.0586, 0.2834, 0.3812, 0.2769]) -Greedy action tensor([-1.8929, -0.4126, 0.6359, -0.1614]) tensor([0.0424, 0.1863, 0.5317, 0.2395]) -Greedy action tensor([-1.9429, -0.4469, 0.6669, -0.1779]) tensor([0.0402, 0.1793, 0.5460, 0.2346]) -Greedy action tensor([-1.8457, -0.4379, 0.6147, -0.1313]) tensor([0.0447, 0.1829, 0.5239, 0.2485]) -Greedy action tensor([-1.7912, 0.2031, 0.5335, -0.1876]) tensor([0.0425, 0.3121, 0.4343, 0.2111]) -Greedy action tensor([-1.9095, -0.4399, 0.6701, -0.1522]) tensor([0.0411, 0.1786, 0.5421, 0.2382]) -Greedy action tensor([-1.6734, 0.2293, 0.4269, 0.0192]) tensor([0.0469, 0.3146, 0.3834, 0.2550]) -Greedy action tensor([-1.9276, -0.3752, 0.6450, -0.1754]) tensor([0.0407, 0.1921, 0.5327, 0.2345]) -Greedy action tensor([-1.2081, -0.5952, 0.5216, 0.5606]) tensor([0.0697, 0.1287, 0.3930, 0.4086]) -Greedy action tensor([-1.8959, -0.3542, 0.6379, -0.1371]) tensor([0.0415, 0.1941, 0.5233, 0.2411]) -Greedy action tensor([-1.4449, -0.6421, 0.4834, 0.1127]) tensor([0.0673, 0.1502, 0.4629, 0.3196]) -Greedy action tensor([-1.9381, -0.4516, 0.6631, -0.1758]) tensor([0.0404, 0.1788, 0.5451, 0.2356]) -Greedy action tensor([-0.6229, 0.5865, 0.0959, 0.2787]) tensor([0.1128, 0.3780, 0.2314, 0.2778]) -Greedy action tensor([-1.8873, -0.3896, 0.6268, -0.1504]) tensor([0.0425, 0.1902, 0.5256, 0.2416]) -Greedy action tensor([-1.0654, -0.6508, 0.4391, 0.0579]) tensor([0.0991, 0.1500, 0.4461, 0.3047]) -Greedy action tensor([-1.9119, -0.4501, 0.6553, -0.1634]) tensor([0.0415, 0.1791, 0.5409, 0.2385]) -Greedy action tensor([-1.4775, -0.2478, 0.3968, 0.0344]) tensor([0.0646, 0.2211, 0.4212, 0.2931]) -Greedy action tensor([-1.2516, 0.1660, 0.4553, 0.6838]) tensor([0.0569, 0.2350, 0.3138, 0.3943]) -Greedy action tensor([-1.8079, -0.2839, 0.5754, -0.1141]) tensor([0.0457, 0.2099, 0.4956, 0.2487]) -Greedy action tensor([-1.9091, -0.4584, 0.6661, -0.1262]) tensor([0.0411, 0.1752, 0.5394, 0.2443]) -Greedy action tensor([-1.8635, -0.3269, 0.6240, -0.1334]) tensor([0.0429, 0.1993, 0.5159, 0.2419]) -Greedy action tensor([-1.9014, -0.4408, 0.6417, -0.1587]) tensor([0.0421, 0.1815, 0.5357, 0.2406]) -Greedy action tensor([-1.5803, -0.1372, 0.4533, -0.0379]) tensor([0.0570, 0.2412, 0.4354, 0.2664]) -Greedy action tensor([-1.7414, -0.2929, 0.5806, -0.0359]) tensor([0.0477, 0.2031, 0.4865, 0.2626]) -Greedy action tensor([-1.9235, -0.4413, 0.6570, -0.1707]) tensor([0.0410, 0.1806, 0.5416, 0.2367]) -Greedy action tensor([-1.2517, -0.3226, 0.2969, 0.2596]) tensor([0.0783, 0.1983, 0.3685, 0.3549]) -Greedy action tensor([-1.9146, -0.4402, 0.6524, -0.1639]) tensor([0.0414, 0.1809, 0.5393, 0.2384]) -Greedy action tensor([-1.9298, -0.4343, 0.6599, -0.1718]) tensor([0.0407, 0.1815, 0.5420, 0.2359]) -Greedy action tensor([-0.9154, -0.1943, 0.1764, 0.3605]) tensor([0.1040, 0.2138, 0.3098, 0.3724]) -Greedy action tensor([-1.9074, -0.4175, 0.6401, -0.1573]) tensor([0.0417, 0.1851, 0.5330, 0.2401]) -Greedy action tensor([-1.8368, -0.4613, 0.6952, 0.0146]) tensor([0.0418, 0.1655, 0.5262, 0.2664]) -Greedy action tensor([-1.6900, -0.3228, 0.5925, 0.0111]) tensor([0.0495, 0.1942, 0.4851, 0.2712]) -Greedy action tensor([-1.5745, -0.5804, 0.5147, 0.0341]) tensor([0.0596, 0.1611, 0.4815, 0.2978]) -Greedy action tensor([-1.8486, -0.2530, 0.6081, -0.1164]) tensor([0.0430, 0.2121, 0.5018, 0.2431]) -Greedy action tensor([-1.4395, -0.5831, 0.4327, 0.0693]) tensor([0.0695, 0.1638, 0.4523, 0.3144]) -Greedy action tensor([-1.9420, -0.4542, 0.6745, -0.1764]) tensor([0.0401, 0.1774, 0.5484, 0.2342]) -Greedy action tensor([-1.6544, -0.5527, 0.4552, -0.0706]) tensor([0.0584, 0.1757, 0.4814, 0.2845]) -Greedy action tensor([-1.9136, -0.3880, 0.6441, -0.1581]) tensor([0.0412, 0.1893, 0.5313, 0.2382]) -Greedy action tensor([-1.7815, -0.2611, 0.5699, -0.0581]) tensor([0.0461, 0.2110, 0.4844, 0.2585]) -Greedy action tensor([-1.5652, -0.2732, 0.5911, 0.0577]) tensor([0.0545, 0.1984, 0.4709, 0.2762]) -Greedy action tensor([-1.9234, -0.4444, 0.6593, -0.1679]) tensor([0.0410, 0.1798, 0.5421, 0.2371]) -Greedy action tensor([-1.7165, -0.3830, 0.5657, -0.1968]) tensor([0.0522, 0.1980, 0.5113, 0.2385]) -Greedy action tensor([-1.5739, -0.2419, 0.6189, 0.1307]) tensor([0.0520, 0.1968, 0.4655, 0.2857]) -Greedy action tensor([-1.8495, -0.4335, 0.6196, -0.1381]) tensor([0.0445, 0.1834, 0.5257, 0.2464]) -Greedy action tensor([-1.8815, -0.3608, 0.6343, -0.1409]) tensor([0.0423, 0.1934, 0.5233, 0.2410]) -Greedy action tensor([-1.9249, -0.4652, 0.6582, -0.1690]) tensor([0.0411, 0.1769, 0.5441, 0.2379]) -Greedy action tensor([-1.6415, -0.5329, 0.5073, 0.0051]) tensor([0.0562, 0.1703, 0.4819, 0.2916]) -Greedy action tensor([-1.6174, -0.2986, 0.6659, 0.0697]) tensor([0.0501, 0.1874, 0.4916, 0.2708]) -Greedy action tensor([-1.8915, -0.3614, 0.6310, -0.1529]) tensor([0.0421, 0.1943, 0.5242, 0.2394]) -Greedy action tensor([-1.8652, -0.3264, 0.6078, -0.1362]) tensor([0.0432, 0.2012, 0.5122, 0.2434]) -Greedy action tensor([-1.9437, -0.4529, 0.6685, -0.1786]) tensor([0.0401, 0.1783, 0.5471, 0.2345]) -Greedy action tensor([-1.6682, -0.4204, 0.7136, 0.2241]) tensor([0.0456, 0.1587, 0.4933, 0.3024]) -Greedy action tensor([-1.8664, -0.4536, 0.6335, -0.1366]) tensor([0.0436, 0.1791, 0.5313, 0.2460]) -Greedy action tensor([-1.9093, -0.4519, 0.6863, -0.1395]) tensor([0.0407, 0.1748, 0.5456, 0.2389]) -Greedy action tensor([-1.6478, -0.3826, 0.6663, 0.1321]) tensor([0.0486, 0.1721, 0.4913, 0.2880]) -Greedy action tensor([-1.8148, -0.3302, 0.6431, -0.0872]) tensor([0.0440, 0.1942, 0.5141, 0.2477]) -Greedy action tensor([-1.8445, -0.3657, 0.6096, -0.1227]) tensor([0.0442, 0.1940, 0.5144, 0.2474]) -Greedy action tensor([-1.9285, -0.4330, 0.6613, -0.1719]) tensor([0.0407, 0.1815, 0.5422, 0.2357]) -Greedy action tensor([-1.9092, -0.4233, 0.6538, -0.1595]) tensor([0.0414, 0.1830, 0.5373, 0.2382]) -Greedy action tensor([-1.7417, -0.4404, 0.5746, -0.1092]) tensor([0.0502, 0.1844, 0.5087, 0.2567]) -Greedy action tensor([-1.9194, -0.4279, 0.6592, -0.1689]) tensor([0.0410, 0.1823, 0.5405, 0.2362]) -Greedy action tensor([-1.9074, -0.4540, 0.6552, -0.1596]) tensor([0.0417, 0.1783, 0.5406, 0.2394]) -Greedy action tensor([-1.5495, -0.4832, 0.5212, 0.0901]) tensor([0.0589, 0.1710, 0.4668, 0.3033]) -Greedy action tensor([-1.7019, -0.1322, 0.5142, -0.1126]) tensor([0.0503, 0.2417, 0.4614, 0.2465]) -Greedy action tensor([-1.8086, -0.2895, 0.5914, -0.1227]) tensor([0.0455, 0.2077, 0.5013, 0.2455]) -Greedy action tensor([ 0.7823, -0.7555, -0.0616, -0.3190]) tensor([0.5057, 0.1087, 0.2175, 0.1681]) -Greedy action tensor([ 0.7798, -0.3802, -0.0177, -0.2186]) tensor([0.4689, 0.1470, 0.2112, 0.1728]) -Greedy action tensor([ 1.2536, -0.9857, -0.0410, -0.8625]) tensor([0.6662, 0.0710, 0.1825, 0.0803]) -Greedy action tensor([ 0.7757, -0.4827, 0.0123, -0.3910]) tensor([0.4851, 0.1378, 0.2261, 0.1510]) -Greedy action tensor([ 0.8362, -0.4779, -0.0201, -0.3537]) tensor([0.5006, 0.1345, 0.2126, 0.1523]) -Greedy action tensor([ 0.2891, 0.0493, -0.0736, -0.0346]) tensor([0.3119, 0.2454, 0.2170, 0.2256]) -Greedy action tensor([ 0.6781, -0.3517, -0.0459, -0.1999]) tensor([0.4430, 0.1582, 0.2148, 0.1841]) -Greedy action tensor([ 0.5857, -0.2942, -0.0038, -0.6690]) tensor([0.4435, 0.1840, 0.2460, 0.1265]) -Greedy action tensor([ 0.7434, -0.4379, -0.0663, -0.4291]) tensor([0.4851, 0.1489, 0.2159, 0.1502]) -Greedy action tensor([ 0.5160, 0.0383, -0.1067, 0.0262]) tensor([0.3611, 0.2239, 0.1937, 0.2213]) -Greedy action tensor([ 0.7420, -0.2914, 0.0121, -0.5792]) tensor([0.4752, 0.1691, 0.2290, 0.1268]) -Greedy action tensor([ 0.1721, 0.0865, -0.1062, -0.2436]) tensor([0.2999, 0.2753, 0.2270, 0.1979]) -Greedy action tensor([ 0.4686, -0.1485, 0.0610, -0.2400]) tensor([0.3708, 0.2000, 0.2467, 0.1825]) -Greedy action tensor([ 0.3750, -0.2309, 0.0021, -0.3159]) tensor([0.3656, 0.1995, 0.2518, 0.1832]) -Greedy action tensor([ 0.6381, -0.1506, -0.0625, -0.1394]) tensor([0.4149, 0.1885, 0.2059, 0.1907]) -Greedy action tensor([ 0.5090, -0.4969, -0.0614, -0.2684]) tensor([0.4183, 0.1530, 0.2365, 0.1923]) -Greedy action tensor([ 0.5108, 0.0303, 0.2128, -0.1264]) tensor([0.3461, 0.2140, 0.2569, 0.1830]) -Greedy action tensor([ 0.8427, -1.0949, 0.1596, -0.7428]) tensor([0.5394, 0.0777, 0.2724, 0.1105]) -Greedy action tensor([ 0.6080, -0.4753, -0.0820, -0.2213]) tensor([0.4393, 0.1487, 0.2203, 0.1917]) -Greedy action tensor([ 0.8348, -0.6503, 0.0669, -0.3816]) tensor([0.5033, 0.1140, 0.2335, 0.1491]) -Greedy action tensor([ 0.5021, -0.0057, -0.1563, -0.1376]) tensor([0.3778, 0.2274, 0.1956, 0.1993]) -Greedy action tensor([ 0.6679, -0.5886, -0.1540, -0.2191]) tensor([0.4681, 0.1333, 0.2058, 0.1928]) -Greedy action tensor([ 0.6494, -0.1957, 0.0888, -0.3560]) tensor([0.4226, 0.1815, 0.2412, 0.1546]) -Greedy action tensor([ 1.1943, -0.5567, -0.0512, -0.2789]) tensor([0.5915, 0.1027, 0.1702, 0.1356]) -Greedy action tensor([ 1.0258, -0.6535, -0.1193, -0.3204]) tensor([0.5666, 0.1057, 0.1803, 0.1474]) -Greedy action tensor([ 0.6097, -0.4333, -0.0965, -0.2307]) tensor([0.4391, 0.1547, 0.2167, 0.1895]) -Greedy action tensor([ 0.8258, -0.5786, -0.1530, -0.3524]) tensor([0.5184, 0.1273, 0.1948, 0.1596]) -Greedy action tensor([ 0.6459, -0.5244, -0.2194, -0.3814]) tensor([0.4787, 0.1485, 0.2015, 0.1714]) -Greedy action tensor([ 0.8224, -0.7123, -0.1241, -0.3233]) tensor([0.5204, 0.1122, 0.2020, 0.1655]) -Greedy action tensor([ 0.7068, -0.5932, -0.2072, -0.3643]) tensor([0.4960, 0.1352, 0.1989, 0.1699]) -Greedy action tensor([ 0.7612, -0.5015, 0.0200, -0.1549]) tensor([0.4631, 0.1310, 0.2207, 0.1853]) -Greedy action tensor([ 0.7199, -0.4991, 0.0159, -0.1226]) tensor([0.4503, 0.1331, 0.2227, 0.1939]) -Greedy action tensor([ 1.0006, -0.6047, -0.2160, -0.5220]) tensor([0.5830, 0.1171, 0.1727, 0.1272]) -Greedy action tensor([ 0.8154, -0.5346, 0.0998, -0.7179]) tensor([0.5092, 0.1320, 0.2489, 0.1099]) -Greedy action tensor([ 0.4753, -0.1375, -0.0160, -0.1499]) tensor([0.3719, 0.2015, 0.2275, 0.1990]) -Greedy action tensor([ 1.0017, -0.7641, 0.0093, -0.4228]) tensor([0.5611, 0.0960, 0.2080, 0.1350]) -Greedy action tensor([ 0.7720, -0.1857, -0.0320, -0.0310]) tensor([0.4387, 0.1684, 0.1963, 0.1965]) -Greedy action tensor([ 0.6447, -0.5070, -0.1279, -0.2715]) tensor([0.4591, 0.1451, 0.2120, 0.1837]) -Greedy action tensor([ 1.0378, -0.8158, 0.0404, -0.5120]) tensor([0.5754, 0.0902, 0.2122, 0.1222]) -Greedy action tensor([ 0.5739, -0.4817, -0.0999, -0.2682]) tensor([0.4370, 0.1521, 0.2227, 0.1883]) -Greedy action tensor([ 0.8344, -0.5674, -0.0540, -0.6250]) tensor([0.5291, 0.1303, 0.2176, 0.1230]) -Greedy action tensor([ 0.9697, -0.7344, 0.0481, -0.3824]) tensor([0.5439, 0.0990, 0.2164, 0.1407]) -Greedy action tensor([ 0.5943, -0.1406, -0.0293, -0.2806]) tensor([0.4111, 0.1971, 0.2203, 0.1714]) -Greedy action tensor([ 1.2346, -0.8280, 0.0290, -0.7076]) tensor([0.6369, 0.0810, 0.1908, 0.0913]) -Greedy action tensor([ 0.2883, -0.0565, -0.0086, -0.0354]) tensor([0.3150, 0.2231, 0.2341, 0.2279]) -Greedy action tensor([ 0.9631, -0.5153, -0.1794, -0.2957]) tensor([0.5461, 0.1245, 0.1742, 0.1551]) -Greedy action tensor([ 0.8101, -0.1761, -0.0863, -0.0909]) tensor([0.4572, 0.1705, 0.1866, 0.1857]) -Greedy action tensor([ 0.6994, -0.3894, 0.0278, -0.3368]) tensor([0.4541, 0.1528, 0.2320, 0.1611]) -Greedy action tensor([ 1.3293, -0.6919, -0.1607, -0.6783]) tensor([0.6702, 0.0888, 0.1510, 0.0900]) -Greedy action tensor([ 0.2972, 0.2071, -0.0398, -0.3198]) tensor([0.3157, 0.2885, 0.2254, 0.1704]) -Greedy action tensor([ 0.7152, -0.4233, -0.1248, -0.1290]) tensor([0.4583, 0.1468, 0.1979, 0.1970]) -Greedy action tensor([ 0.8076, -0.6189, -0.0862, -0.3496]) tensor([0.5093, 0.1223, 0.2083, 0.1601]) -Greedy action tensor([ 0.5988, -0.4283, -0.0650, -0.4991]) tensor([0.4532, 0.1623, 0.2333, 0.1512]) -Greedy action tensor([ 0.6915, -0.1513, 0.0674, -0.0600]) tensor([0.4102, 0.1766, 0.2197, 0.1935]) -Greedy action tensor([ 0.7289, -0.4436, 0.0080, -0.4218]) tensor([0.4734, 0.1466, 0.2302, 0.1498]) -Greedy action tensor([ 0.7732, -0.5311, -0.1601, -0.2939]) tensor([0.4979, 0.1351, 0.1958, 0.1713]) -Greedy action tensor([ 0.9702, -0.8986, -0.0027, -0.3565]) tensor([0.5563, 0.0858, 0.2103, 0.1476]) -Greedy action tensor([ 1.0720e+00, -7.9706e-01, -8.6707e-04, -8.0424e-01]) tensor([0.6063, 0.0935, 0.2074, 0.0929]) -Greedy action tensor([ 0.6628, -0.4267, -0.2118, -0.4123]) tensor([0.4774, 0.1606, 0.1991, 0.1629]) -Greedy action tensor([ 0.7965, -0.3750, 0.0852, -0.3009]) tensor([0.4684, 0.1452, 0.2300, 0.1563]) -Greedy action tensor([ 0.7031, -0.3545, -0.0924, -0.0462]) tensor([0.4403, 0.1529, 0.1987, 0.2081]) -Greedy action tensor([ 0.8319, -0.4106, -0.1369, -0.3877]) tensor([0.5093, 0.1470, 0.1933, 0.1504]) -Greedy action tensor([ 0.6524, -0.4120, -0.0725, -0.3974]) tensor([0.4588, 0.1583, 0.2223, 0.1606]) -Greedy action tensor([ 0.8174, -0.6200, -0.0324, -0.4430]) tensor([0.5132, 0.1219, 0.2194, 0.1455]) -Greedy action tensor([ 0.7113, -0.2244, 0.0278, -0.2141]) tensor([0.4360, 0.1711, 0.2201, 0.1728]) -Greedy action tensor([ 0.8746, -0.3949, -0.0779, -0.3044]) tensor([0.5065, 0.1423, 0.1954, 0.1558]) -Greedy action tensor([ 0.7323, 0.1852, -0.1274, -0.1898]) tensor([0.4167, 0.2411, 0.1764, 0.1657]) -Greedy action tensor([0.2836, 0.1551, 0.2047, 0.1711]) tensor([0.2705, 0.2379, 0.2500, 0.2417]) -Greedy action tensor([ 0.8262, -0.3363, -0.0507, -0.3127]) tensor([0.4881, 0.1526, 0.2031, 0.1563]) -Greedy action tensor([ 1.0326, -0.8510, 0.1688, -0.5475]) tensor([0.5619, 0.0854, 0.2369, 0.1157]) -Greedy action tensor([ 0.6761, -0.2757, 0.0217, -0.1310]) tensor([0.4252, 0.1641, 0.2210, 0.1897]) -Greedy action tensor([ 0.5395, -0.4279, -0.1155, -0.0359]) tensor([0.4062, 0.1544, 0.2110, 0.2285]) -Greedy action tensor([ 0.6398, -0.5950, -0.0876, -0.2417]) tensor([0.4570, 0.1329, 0.2208, 0.1893]) -Greedy action tensor([ 0.7976, -0.5876, -0.0375, -0.1881]) tensor([0.4861, 0.1217, 0.2109, 0.1814]) -Greedy action tensor([ 0.4833, -0.3464, 0.0472, -0.0832]) tensor([0.3773, 0.1646, 0.2440, 0.2141]) -Greedy action tensor([ 1.0683, -0.4634, 0.0095, -0.2476]) tensor([0.5461, 0.1180, 0.1894, 0.1465]) -Greedy action tensor([ 0.8569, -0.6088, 0.1327, -0.4216]) tensor([0.5015, 0.1158, 0.2431, 0.1396]) -Greedy action tensor([ 0.5281, -0.0813, 0.0142, -0.0804]) tensor([0.3723, 0.2024, 0.2227, 0.2026]) -Greedy action tensor([ 0.6002, -0.4833, -0.1492, -0.3057]) tensor([0.4514, 0.1528, 0.2134, 0.1825]) -Greedy action tensor([ 0.9403, -0.5608, -0.0353, -0.2344]) tensor([0.5239, 0.1168, 0.1975, 0.1619]) -Greedy action tensor([ 0.0930, 0.2936, -0.1807, -0.4693]) tensor([0.2815, 0.3440, 0.2141, 0.1604]) -Greedy action tensor([ 1.5097, -0.5691, -0.3358, 0.4470]) tensor([0.6140, 0.0768, 0.0970, 0.2122]) -Greedy action tensor([ 1.2200, -0.5471, -0.5393, 0.6902]) tensor([0.5177, 0.0884, 0.0891, 0.3048]) -Greedy action tensor([ 1.0365, -0.3760, -0.4462, 0.2353]) tensor([0.5210, 0.1269, 0.1183, 0.2338]) -Greedy action tensor([ 1.5059, -0.2724, -0.0555, 0.2096]) tensor([0.6052, 0.1022, 0.1270, 0.1656]) -Greedy action tensor([ 1.7241, -0.2467, -0.4455, 0.5739]) tensor([0.6369, 0.0888, 0.0727, 0.2016]) -Greedy action tensor([ 1.6493, -0.2606, -0.5441, 0.0162]) tensor([0.6873, 0.1018, 0.0767, 0.1342]) -Greedy action tensor([ 2.4485, -0.4584, -0.0601, 0.4554]) tensor([0.7860, 0.0430, 0.0640, 0.1071]) -Greedy action tensor([ 2.1091, -0.9360, -0.4169, 0.8704]) tensor([0.7055, 0.0336, 0.0564, 0.2044]) -Greedy action tensor([ 1.9988, -0.6643, -0.3637, 0.1603]) tensor([0.7559, 0.0527, 0.0712, 0.1202]) -Greedy action tensor([ 1.2127, -0.2105, -0.2446, 0.5279]) tensor([0.5056, 0.1218, 0.1177, 0.2549]) -Greedy action tensor([ 1.6169, -0.6741, -0.3295, 0.3042]) tensor([0.6609, 0.0669, 0.0944, 0.1778]) -Greedy action tensor([ 1.1384, -0.3660, -0.1485, 0.1621]) tensor([0.5333, 0.1185, 0.1473, 0.2009]) -Greedy action tensor([ 1.7421, -0.1834, -1.0406, 0.2149]) tensor([0.7019, 0.1023, 0.0434, 0.1524]) -Greedy action tensor([ 1.6688, -0.6687, -0.5121, 0.5511]) tensor([0.6508, 0.0628, 0.0735, 0.2128]) -Greedy action tensor([ 1.5067, -0.4091, -0.5709, 0.4561]) tensor([0.6165, 0.0908, 0.0772, 0.2156]) -Greedy action tensor([ 1.8822, -0.6100, -0.6647, 0.1857]) tensor([0.7438, 0.0615, 0.0583, 0.1364]) -Greedy action tensor([ 1.5025, -0.4951, -0.2468, -0.1281]) tensor([0.6643, 0.0901, 0.1155, 0.1301]) -Greedy action tensor([ 1.7044, -0.5743, -0.0587, 0.2475]) tensor([0.6636, 0.0680, 0.1138, 0.1546]) -Greedy action tensor([ 2.0518, -0.4869, -0.2561, 0.1784]) tensor([0.7507, 0.0593, 0.0747, 0.1153]) -Greedy action tensor([ 1.1860, -0.4747, -0.5302, 0.8300]) tensor([0.4831, 0.0918, 0.0868, 0.3383]) -Greedy action tensor([ 0.7842, 0.1308, -0.1116, 0.1802]) tensor([0.4040, 0.2102, 0.1649, 0.2208]) -Greedy action tensor([ 2.2100, -0.6436, -0.9928, 0.1874]) tensor([0.8126, 0.0468, 0.0330, 0.1075]) -Greedy action tensor([ 0.7518, -0.1210, -0.1642, 0.1365]) tensor([0.4240, 0.1771, 0.1697, 0.2292]) -Greedy action tensor([ 1.5521, -0.3638, -0.3499, 0.3722]) tensor([0.6235, 0.0918, 0.0931, 0.1916]) -Greedy action tensor([ 1.4597, -0.7168, -0.4582, 0.8928]) tensor([0.5472, 0.0621, 0.0804, 0.3104]) -Greedy action tensor([ 1.0931, 0.1431, -0.4352, 0.5389]) tensor([0.4591, 0.1775, 0.0996, 0.2638]) -Greedy action tensor([ 0.9668, 0.0128, -0.6817, 0.2556]) tensor([0.4834, 0.1862, 0.0930, 0.2374]) -Greedy action tensor([ 1.6621, -1.2536, -0.0774, 0.5028]) tensor([0.6479, 0.0351, 0.1138, 0.2032]) -Greedy action tensor([ 1.3316, -0.4829, -0.2551, 0.3268]) tensor([0.5768, 0.0940, 0.1180, 0.2112]) -Greedy action tensor([ 1.2224, -0.4055, -0.0392, 0.1526]) tensor([0.5487, 0.1077, 0.1554, 0.1882]) -Greedy action tensor([ 1.5894, 0.1415, -0.9096, 0.4931]) tensor([0.6056, 0.1423, 0.0498, 0.2023]) -Greedy action tensor([ 1.6680, -0.4594, -0.2201, 0.0850]) tensor([0.6776, 0.0807, 0.1026, 0.1391]) -Greedy action tensor([ 1.2386, -0.3417, -0.3530, 0.1945]) tensor([0.5677, 0.1169, 0.1156, 0.1998]) -Greedy action tensor([ 0.8767, 0.1180, -0.0598, -0.0614]) tensor([0.4441, 0.2080, 0.1741, 0.1738]) -Greedy action tensor([ 1.6266, -0.7320, 0.1022, 0.2960]) tensor([0.6343, 0.0600, 0.1381, 0.1677]) -Greedy action tensor([ 1.5447, -0.3733, -0.4286, 0.4602]) tensor([0.6158, 0.0905, 0.0856, 0.2082]) -Greedy action tensor([ 0.6277, -0.0605, -0.0716, 0.2322]) tensor([0.3741, 0.1880, 0.1859, 0.2519]) -Greedy action tensor([ 1.6378, -0.6643, -0.5272, 0.3901]) tensor([0.6658, 0.0666, 0.0764, 0.1912]) -Greedy action tensor([ 1.7674, -0.5410, -0.9510, 0.2635]) tensor([0.7206, 0.0716, 0.0475, 0.1602]) -Greedy action tensor([ 1.6881, -0.7488, -0.0183, 0.4218]) tensor([0.6448, 0.0564, 0.1170, 0.1817]) -Greedy action tensor([ 1.2712, -0.7400, -0.1736, 0.3678]) tensor([0.5634, 0.0754, 0.1329, 0.2283]) -Greedy action tensor([ 1.4352, -0.7521, -0.1589, 0.2707]) tensor([0.6145, 0.0690, 0.1248, 0.1918]) -Greedy action tensor([ 1.5656, -0.2197, -0.7612, 0.4987]) tensor([0.6213, 0.1042, 0.0606, 0.2138]) -Greedy action tensor([ 1.1519, -0.5078, 0.0501, 0.2365]) tensor([0.5201, 0.0989, 0.1728, 0.2082]) -Greedy action tensor([ 1.2271, -0.2967, -0.2691, 0.1699]) tensor([0.5589, 0.1218, 0.1252, 0.1942]) -Greedy action tensor([ 1.4067, -0.2686, -0.8028, 0.3811]) tensor([0.6040, 0.1131, 0.0663, 0.2166]) -Greedy action tensor([ 1.9446, -0.8092, -0.5554, 0.1573]) tensor([0.7615, 0.0485, 0.0625, 0.1275]) -Greedy action tensor([ 1.7199, -0.2856, -0.4144, 0.5613]) tensor([0.6382, 0.0859, 0.0755, 0.2004]) -Greedy action tensor([ 1.8216, -0.9349, -0.4846, 0.2107]) tensor([0.7338, 0.0466, 0.0731, 0.1465]) -Greedy action tensor([ 2.0239, -1.0948, -0.1272, 0.2632]) tensor([0.7505, 0.0332, 0.0873, 0.1290]) -Greedy action tensor([ 1.9201, 0.1564, -0.2404, 0.5171]) tensor([0.6525, 0.1118, 0.0752, 0.1604]) -Greedy action tensor([ 1.1188, -0.1027, -0.0365, 0.4792]) tensor([0.4679, 0.1379, 0.1474, 0.2468]) -Greedy action tensor([ 1.5644, 0.2306, -0.2879, 0.2557]) tensor([0.5915, 0.1559, 0.0928, 0.1598]) -Greedy action tensor([ 1.7412, -0.6926, -0.4277, 0.4852]) tensor([0.6726, 0.0590, 0.0769, 0.1916]) -Greedy action tensor([ 1.3460, -0.0980, -0.0571, 0.2402]) tensor([0.5516, 0.1302, 0.1356, 0.1826]) -Greedy action tensor([ 0.9068, -0.1539, 0.1579, -0.0957]) tensor([0.4574, 0.1584, 0.2163, 0.1679]) -Greedy action tensor([ 1.0866, -0.4015, -0.1631, 0.2540]) tensor([0.5135, 0.1160, 0.1472, 0.2233]) -Greedy action tensor([ 1.4014, -0.4386, -0.6450, 0.5618]) tensor([0.5814, 0.0923, 0.0751, 0.2511]) -Greedy action tensor([ 1.3735, -0.5545, -0.1202, 0.3369]) tensor([0.5798, 0.0843, 0.1302, 0.2056]) -Greedy action tensor([ 1.9923, -0.8985, -0.1898, 0.4072]) tensor([0.7282, 0.0404, 0.0821, 0.1492]) -Greedy action tensor([ 1.8075, -0.6402, 0.1160, -0.1156]) tensor([0.7058, 0.0610, 0.1300, 0.1031]) -Greedy action tensor([ 1.3344, -0.4133, -0.6534, 0.1911]) tensor([0.6135, 0.1069, 0.0841, 0.1956]) -Greedy action tensor([ 0.4852, -0.4332, -0.0034, 0.2099]) tensor([0.3608, 0.1440, 0.2213, 0.2739]) -Greedy action tensor([ 1.9172, -0.9227, -0.0965, 0.2655]) tensor([0.7227, 0.0422, 0.0965, 0.1386]) -Greedy action tensor([ 1.5809, -0.3562, 0.0586, 0.7254]) tensor([0.5595, 0.0806, 0.1221, 0.2378]) -Greedy action tensor([ 1.1612, -0.2629, -0.1045, 0.2701]) tensor([0.5174, 0.1245, 0.1459, 0.2122]) -Greedy action tensor([ 1.7820, -0.6641, -0.3435, 0.5847]) tensor([0.6631, 0.0574, 0.0792, 0.2003]) -Greedy action tensor([ 0.6776, -0.0981, -0.1165, 0.1970]) tensor([0.3951, 0.1819, 0.1786, 0.2444]) -Greedy action tensor([ 1.5218, -0.8199, -0.0216, 0.2942]) tensor([0.6239, 0.0600, 0.1333, 0.1828]) -Greedy action tensor([ 1.1564, -0.4054, -0.2642, 0.7231]) tensor([0.4762, 0.0999, 0.1151, 0.3088]) -Greedy action tensor([ 1.1957, -0.2605, -0.3819, 0.4380]) tensor([0.5240, 0.1222, 0.1082, 0.2456]) -Greedy action tensor([ 1.3690, -0.7389, -0.2066, 0.0064]) tensor([0.6312, 0.0767, 0.1306, 0.1616]) -Greedy action tensor([ 1.9354, -0.6684, -0.3510, 0.1460]) tensor([0.7448, 0.0551, 0.0757, 0.1244]) -Greedy action tensor([ 1.5107, -0.9649, 0.1237, 0.4483]) tensor([0.5954, 0.0501, 0.1487, 0.2058]) -Greedy action tensor([ 0.5043, -0.2043, -0.0300, 0.1146]) tensor([0.3629, 0.1787, 0.2127, 0.2458]) -Greedy action tensor([ 1.2437, -0.6119, -0.1803, 0.2443]) tensor([0.5665, 0.0886, 0.1364, 0.2085]) -Greedy action tensor([ 1.6529, -0.8782, -0.0252, 0.3210]) tensor([0.6535, 0.0520, 0.1220, 0.1725]) -Greedy action tensor([ 2.0750, -0.4466, -0.4866, 0.9131]) tensor([0.6801, 0.0546, 0.0525, 0.2128]) -Greedy action tensor([ 0.9331, -0.6879, 0.0140, 0.4020]) tensor([0.4578, 0.0905, 0.1826, 0.2691]) -Greedy action tensor([ 1.6074, -0.3700, -0.2101, 0.3844]) tensor([0.6269, 0.0868, 0.1018, 0.1845]) -Greedy action tensor([ 1.4761, -0.1470, -0.7434, 0.5073]) tensor([0.5933, 0.1171, 0.0645, 0.2252]) -Greedy action tensor([ 0.1196, -1.2090, 0.0820, -0.1782]) tensor([0.3367, 0.0892, 0.3242, 0.2500]) -Greedy action tensor([ 0.2409, -1.3898, -0.1329, 0.1955]) tensor([0.3522, 0.0690, 0.2423, 0.3365]) -Greedy action tensor([-0.5513, -1.2136, -0.1003, 0.2318]) tensor([0.1896, 0.0978, 0.2977, 0.4149]) -Greedy action tensor([-0.3200, -1.0955, 0.2017, -0.6232]) tensor([0.2575, 0.1186, 0.4338, 0.1901]) -Greedy action tensor([ 0.5981, -0.4602, -0.2825, 0.3532]) tensor([0.3930, 0.1364, 0.1629, 0.3077]) -Greedy action tensor([ 0.8781, -1.6513, -0.0520, -0.1452]) tensor([0.5454, 0.0435, 0.2151, 0.1960]) -Greedy action tensor([-0.1198, 0.8383, 0.5908, 0.0386]) tensor([0.1468, 0.3826, 0.2987, 0.1720]) -Greedy action tensor([ 1.3548, -1.0204, 0.1043, 0.7616]) tensor([0.5176, 0.0481, 0.1482, 0.2860]) -Greedy action tensor([-0.9525, -0.2667, 0.1210, -1.2015]) tensor([0.1495, 0.2967, 0.4373, 0.1165]) -Greedy action tensor([-0.1161, -1.1583, -0.0996, 0.0617]) tensor([0.2806, 0.0990, 0.2853, 0.3352]) -Greedy action tensor([ 0.0337, -0.8585, 0.2662, -0.3211]) tensor([0.2965, 0.1215, 0.3741, 0.2079]) -Greedy action tensor([-0.9683, -1.0161, 0.1454, -0.8235]) tensor([0.1625, 0.1549, 0.4948, 0.1878]) -Greedy action tensor([-0.0496, -0.2618, 0.1258, -0.5619]) tensor([0.2778, 0.2247, 0.3311, 0.1664]) -Greedy action tensor([-1.0589, -2.3515, -0.3578, 0.1161]) tensor([0.1532, 0.0421, 0.3088, 0.4960]) -Greedy action tensor([ 1.0071, -1.7823, 0.4805, 0.0533]) tensor([0.4908, 0.0302, 0.2899, 0.1891]) -Greedy action tensor([-1.2908, -0.4458, 0.8441, -0.4317]) tensor([0.0707, 0.1646, 0.5978, 0.1669]) -Greedy action tensor([-0.6165, -0.6265, 1.2159, -0.1546]) tensor([0.1018, 0.1008, 0.6360, 0.1615]) -Greedy action tensor([ 0.2771, -0.9815, -0.7718, 0.6347]) tensor([0.3263, 0.0927, 0.1143, 0.4666]) -Greedy action tensor([ 0.9078, -0.3257, -0.0382, 0.4869]) tensor([0.4281, 0.1247, 0.1662, 0.2810]) -Greedy action tensor([-0.8874, -0.4110, -1.0637, -0.2452]) tensor([0.1869, 0.3010, 0.1567, 0.3553]) -Greedy action tensor([-0.7920, -0.5215, -0.5441, -0.0864]) tensor([0.1780, 0.2333, 0.2281, 0.3605]) -Greedy action tensor([ 0.0438, -0.6668, -0.8445, 0.6624]) tensor([0.2660, 0.1307, 0.1094, 0.4938]) -Greedy action tensor([ 0.1645, 0.5472, 1.3700, -0.3125]) tensor([0.1556, 0.2282, 0.5196, 0.0966]) -Greedy action tensor([-0.1212, -0.5397, -0.4753, -0.1751]) tensor([0.3024, 0.1989, 0.2122, 0.2865]) -Greedy action tensor([-0.0196, -0.2278, 1.1024, 0.2317]) tensor([0.1621, 0.1316, 0.4978, 0.2084]) -Greedy action tensor([0.8343, 0.5265, 0.7336, 0.5384]) tensor([0.2956, 0.2173, 0.2673, 0.2199]) -Greedy action tensor([-0.2838, -0.6026, 1.1854, -0.6110]) tensor([0.1472, 0.1070, 0.6397, 0.1061]) -Greedy action tensor([-0.3769, -0.7556, -0.3757, -0.3656]) tensor([0.2705, 0.1852, 0.2708, 0.2735]) -Greedy action tensor([-0.9974, -0.7093, 0.4973, -1.6513]) tensor([0.1368, 0.1824, 0.6097, 0.0711]) -Greedy action tensor([1.4862, 0.9916, 0.3607, 1.0114]) tensor([0.3912, 0.2386, 0.1269, 0.2433]) -Greedy action tensor([-0.4299, -0.1140, -0.4122, -1.3910]) tensor([0.2651, 0.3636, 0.2699, 0.1014]) -Greedy action tensor([ 0.8801, -1.0429, -0.4054, 0.3887]) tensor([0.4915, 0.0718, 0.1359, 0.3007]) -Greedy action tensor([-0.9792, -1.3310, 0.7664, 0.1059]) tensor([0.0962, 0.0677, 0.5513, 0.2848]) -Greedy action tensor([-0.2376, -0.1209, -1.1844, 0.0480]) tensor([0.2603, 0.2925, 0.1010, 0.3463]) -Greedy action tensor([-0.8626, -2.5258, -0.3656, 0.4255]) tensor([0.1548, 0.0293, 0.2545, 0.5614]) -Greedy action tensor([-0.5766, 0.0443, -0.6824, -0.3376]) tensor([0.1988, 0.3699, 0.1788, 0.2525]) -Greedy action tensor([-0.2761, -0.3452, -0.1213, -0.7475]) tensor([0.2685, 0.2505, 0.3134, 0.1676]) -Greedy action tensor([-0.4287, -1.0285, -0.9584, 0.1916]) tensor([0.2502, 0.1373, 0.1473, 0.4652]) -Greedy action tensor([-0.3118, -0.5915, 0.0060, -0.4555]) tensor([0.2502, 0.1892, 0.3439, 0.2167]) -Greedy action tensor([ 0.8922, -2.2051, -0.0224, 1.2485]) tensor([0.3480, 0.0157, 0.1394, 0.4969]) -Greedy action tensor([ 0.3665, 0.0246, -0.5420, -1.1344]) tensor([0.4280, 0.3040, 0.1725, 0.0954]) -Greedy action tensor([ 0.7658, -0.6557, -0.1848, 0.0742]) tensor([0.4698, 0.1134, 0.1816, 0.2353]) -Greedy action tensor([ 0.5643, -0.2007, -0.0492, 0.6565]) tensor([0.3222, 0.1499, 0.1745, 0.3534]) -Greedy action tensor([ 0.4740, -1.5459, 0.0766, 0.7752]) tensor([0.3168, 0.0420, 0.2129, 0.4282]) -Greedy action tensor([0.2161, 0.2684, 0.5677, 0.0723]) tensor([0.2304, 0.2427, 0.3274, 0.1995]) -Greedy action tensor([ 0.4766, -0.4313, 0.4724, 1.0754]) tensor([0.2370, 0.0956, 0.2360, 0.4314]) -Greedy action tensor([-0.4551, -0.8542, 0.6653, -1.1600]) tensor([0.1912, 0.1283, 0.5861, 0.0945]) -Greedy action tensor([ 0.5125, -1.8973, 0.3136, -0.2074]) tensor([0.4173, 0.0375, 0.3420, 0.2032]) -Greedy action tensor([-0.5156, -0.7542, 0.1885, -0.3033]) tensor([0.1982, 0.1561, 0.4007, 0.2450]) -Greedy action tensor([ 0.3974, -0.0182, -0.0256, 0.1607]) tensor([0.3221, 0.2126, 0.2110, 0.2542]) -Greedy action tensor([-0.1173, 0.1415, 0.6551, -0.8772]) tensor([0.2029, 0.2629, 0.4393, 0.0949]) -Greedy action tensor([-1.7181, -0.2385, 0.4117, -1.3447]) tensor([0.0655, 0.2878, 0.5514, 0.0952]) -Greedy action tensor([-0.1989, -1.2922, 0.9268, 0.5756]) tensor([0.1518, 0.0509, 0.4680, 0.3294]) -Greedy action tensor([-0.4545, 0.3224, 0.5407, 0.1069]) tensor([0.1310, 0.2849, 0.3544, 0.2297]) -Greedy action tensor([ 0.9755, 0.0655, -0.0360, -0.4030]) tensor([0.4955, 0.1995, 0.1802, 0.1248]) -Greedy action tensor([1.5305, 0.6275, 1.0049, 0.1488]) tensor([0.4449, 0.1803, 0.2630, 0.1117]) -Greedy action tensor([ 0.2811, 0.5315, 0.6135, -0.1296]) tensor([0.2303, 0.2958, 0.3211, 0.1527]) -Greedy action tensor([-1.2507, -2.0491, 0.0192, 0.7240]) tensor([0.0819, 0.0368, 0.2915, 0.5898]) -Greedy action tensor([-0.3301, 0.0749, 2.1177, -1.1875]) tensor([0.0690, 0.1035, 0.7982, 0.0293]) -Greedy action tensor([ 0.8743, -0.2794, 1.5042, 0.1325]) tensor([0.2725, 0.0860, 0.5117, 0.1298]) -Greedy action tensor([-0.5706, -1.1107, -0.5441, 1.1642]) tensor([0.1208, 0.0704, 0.1241, 0.6847]) -Greedy action tensor([ 0.1582, -1.1620, 0.3234, 0.4613]) tensor([0.2631, 0.0703, 0.3104, 0.3563]) -Greedy action tensor([ 1.2015, -0.7270, -0.5602, -0.5846]) tensor([0.6735, 0.0979, 0.1157, 0.1129]) -Greedy action tensor([-0.5646, 0.4960, -0.3844, -0.5114]) tensor([0.1629, 0.4704, 0.1950, 0.1718]) -Greedy action tensor([ 0.7206, -2.3364, -0.2332, 0.2686]) tensor([0.4834, 0.0227, 0.1862, 0.3076]) -Greedy action tensor([ 1.5510, 0.6214, -0.2505, 1.3124]) tensor([0.4260, 0.1681, 0.0703, 0.3356]) -Greedy action tensor([ 1.0384, -1.5526, 0.2368, 0.1064]) tensor([0.5216, 0.0391, 0.2340, 0.2054]) -Greedy action tensor([ 0.2930, -0.6542, -1.0012, 0.4934]) tensor([0.3468, 0.1345, 0.0951, 0.4237]) -Greedy action tensor([-0.1723, -1.6414, 0.1589, -0.2067]) tensor([0.2786, 0.0641, 0.3880, 0.2692]) -Greedy action tensor([ 0.3930, -1.1246, 0.2522, -0.3918]) tensor([0.3931, 0.0862, 0.3414, 0.1793]) -Greedy action tensor([ 1.1067, -0.0318, 0.8960, -0.3815]) tensor([0.4244, 0.1359, 0.3438, 0.0958]) -Greedy action tensor([-0.2713, -0.5712, -0.3024, -0.2146]) tensor([0.2653, 0.1966, 0.2572, 0.2808]) -Greedy action tensor([ 1.3478, -0.2530, 0.0444, 0.0222]) tensor([0.5751, 0.1160, 0.1562, 0.1528]) -Greedy action tensor([ 0.1477, -1.0688, -0.8287, 1.5991]) tensor([0.1683, 0.0499, 0.0634, 0.7185]) -Greedy action tensor([ 1.0059, -0.4963, 0.2815, -0.1964]) tensor([0.4981, 0.1109, 0.2414, 0.1497]) -Greedy action tensor([ 0.7430, -1.2393, -0.0477, 1.2804]) tensor([0.3028, 0.0417, 0.1373, 0.5182]) -Greedy action tensor([0.8929, 0.0012, 0.0671, 0.4463]) tensor([0.4020, 0.1648, 0.1760, 0.2572]) -Greedy action tensor([-0.4602, -1.8847, 0.1451, 0.2126]) tensor([0.1987, 0.0478, 0.3640, 0.3895]) -Greedy action tensor([-1.3039, -0.0126, -0.2892, 0.1255]) tensor([0.0864, 0.3143, 0.2384, 0.3609]) -Greedy action tensor([ 2.0224, -0.6016, 1.4227, 1.5419]) tensor([0.4464, 0.0324, 0.2451, 0.2761]) -Greedy action tensor([ 1.7826, -0.2007, 0.0341, 0.0922]) tensor([0.6684, 0.0920, 0.1163, 0.1233]) -Greedy action tensor([ 0.0983, -1.1682, -0.1556, -1.1219]) tensor([0.4250, 0.1198, 0.3297, 0.1255]) -Greedy action tensor([ 0.5657, -0.3268, -0.1257, -0.1521]) tensor([0.4169, 0.1708, 0.2088, 0.2034]) -Greedy action tensor([ 0.3774, -0.1405, -0.0100, -0.1623]) tensor([0.3499, 0.2085, 0.2376, 0.2040]) -Greedy action tensor([ 0.7335, -0.3405, -0.0058, -0.1535]) tensor([0.4482, 0.1531, 0.2140, 0.1846]) -Greedy action tensor([ 0.3238, -0.0364, -0.0591, -0.1052]) tensor([0.3300, 0.2302, 0.2250, 0.2149]) -Greedy action tensor([ 0.5199, 0.2635, -0.1840, 0.1674]) tensor([0.3365, 0.2604, 0.1665, 0.2366]) -Greedy action tensor([ 0.9745, -0.5852, -0.1853, -0.3491]) tensor([0.5587, 0.1174, 0.1752, 0.1487]) -Greedy action tensor([ 0.7538, -0.5434, -0.0241, -0.3403]) tensor([0.4837, 0.1322, 0.2222, 0.1620]) -Greedy action tensor([ 0.3601, -0.0697, 0.0253, -0.1072]) tensor([0.3341, 0.2174, 0.2391, 0.2094]) -Greedy action tensor([ 0.8616, -1.0285, 0.0333, -0.5498]) tensor([0.5460, 0.0825, 0.2385, 0.1331]) -Greedy action tensor([ 0.3342, 0.0188, -0.1683, -0.1636]) tensor([0.3399, 0.2479, 0.2056, 0.2066]) -Greedy action tensor([ 0.4164, -0.0182, -0.1282, -0.0821]) tensor([0.3527, 0.2284, 0.2046, 0.2143]) -Greedy action tensor([ 0.9517, -0.6762, -0.0293, -0.3941]) tensor([0.5460, 0.1072, 0.2047, 0.1421]) -Greedy action tensor([ 0.6028, -0.3743, -0.0793, -0.0355]) tensor([0.4149, 0.1562, 0.2098, 0.2192]) -Greedy action tensor([ 0.7657, -0.5385, -0.0813, -0.4148]) tensor([0.4982, 0.1352, 0.2136, 0.1530]) -Greedy action tensor([ 0.7841, -0.5932, 0.0478, -0.5434]) tensor([0.5009, 0.1264, 0.2399, 0.1328]) -Greedy action tensor([ 0.6870, -0.4967, -0.1200, -0.3302]) tensor([0.4730, 0.1448, 0.2111, 0.1711]) -Greedy action tensor([ 0.8206, -0.7299, -0.1127, -0.1912]) tensor([0.5079, 0.1077, 0.1997, 0.1846]) -Greedy action tensor([ 0.8170, -0.4831, -0.0412, -0.2471]) tensor([0.4899, 0.1335, 0.2077, 0.1690]) -Greedy action tensor([ 0.3936, 0.5156, -0.1815, 0.0012]) tensor([0.2969, 0.3355, 0.1671, 0.2005]) -Greedy action tensor([ 0.5939, -0.7802, -0.0719, -0.5750]) tensor([0.4813, 0.1218, 0.2473, 0.1496]) -Greedy action tensor([ 0.8256, -0.6112, 0.1052, -0.4644]) tensor([0.5001, 0.1189, 0.2433, 0.1377]) -Greedy action tensor([ 0.6608, -0.1799, 0.0613, -0.4029]) tensor([0.4300, 0.1855, 0.2361, 0.1484]) -Greedy action tensor([ 0.8307, -0.6652, 0.0797, -0.3609]) tensor([0.5001, 0.1120, 0.2360, 0.1519]) -Greedy action tensor([ 0.7771, -0.2718, -0.0733, 0.0442]) tensor([0.4429, 0.1551, 0.1892, 0.2128]) -Greedy action tensor([ 0.5055, -0.2465, -0.0597, -0.1393]) tensor([0.3900, 0.1838, 0.2216, 0.2046]) -Greedy action tensor([ 0.5482, -0.3884, -0.0473, -0.0693]) tensor([0.4028, 0.1579, 0.2221, 0.2172]) -Greedy action tensor([ 0.2000, -0.2292, -0.1006, -0.2370]) tensor([0.3292, 0.2143, 0.2438, 0.2127]) -Greedy action tensor([ 1.1378, -0.6818, -0.0508, -0.3664]) tensor([0.5921, 0.0960, 0.1804, 0.1316]) -Greedy action tensor([ 0.9520, -0.6254, -0.0807, -0.4634]) tensor([0.5539, 0.1144, 0.1972, 0.1345]) -Greedy action tensor([ 0.6148, -0.3681, -0.0540, -0.1487]) tensor([0.4251, 0.1591, 0.2178, 0.1981]) -Greedy action tensor([ 0.5942, -0.0634, -0.0515, 0.0651]) tensor([0.3800, 0.1969, 0.1992, 0.2239]) -Greedy action tensor([ 0.6016, -0.3366, 0.1058, -0.5592]) tensor([0.4322, 0.1691, 0.2633, 0.1354]) -Greedy action tensor([ 0.4515, -0.3490, 0.0172, -0.3036]) tensor([0.3896, 0.1750, 0.2523, 0.1831]) -Greedy action tensor([ 1.0225, -0.6636, -0.0742, -0.4352]) tensor([0.5708, 0.1057, 0.1906, 0.1329]) -Greedy action tensor([ 0.6530, -0.4623, 0.2390, -0.2863]) tensor([0.4202, 0.1378, 0.2778, 0.1643]) -Greedy action tensor([ 0.6746, -0.6315, -0.0708, -0.3302]) tensor([0.4736, 0.1283, 0.2247, 0.1734]) -Greedy action tensor([ 0.6566, -0.5159, 0.1128, -0.1706]) tensor([0.4297, 0.1330, 0.2494, 0.1879]) -Greedy action tensor([ 0.5942, -0.5001, -0.0724, -0.1721]) tensor([0.4323, 0.1447, 0.2220, 0.2009]) -Greedy action tensor([ 0.8213, -0.4105, -0.1179, -0.1753]) tensor([0.4874, 0.1422, 0.1905, 0.1799]) -Greedy action tensor([ 0.5802, -0.1346, -0.0938, -0.0529]) tensor([0.3953, 0.1934, 0.2015, 0.2099]) -Greedy action tensor([ 0.1769, 0.2600, -0.2758, 0.0953]) tensor([0.2744, 0.2982, 0.1745, 0.2529]) -Greedy action tensor([ 0.5931, -0.4584, -0.0887, -0.0772]) tensor([0.4225, 0.1476, 0.2137, 0.2161]) -Greedy action tensor([ 0.4669, -0.3348, -0.0392, -0.1236]) tensor([0.3838, 0.1722, 0.2314, 0.2127]) -Greedy action tensor([ 0.6049, 0.0377, -0.1774, -0.0452]) tensor([0.3927, 0.2227, 0.1796, 0.2050]) -Greedy action tensor([ 0.7269, -0.5868, -0.0771, -0.2379]) tensor([0.4768, 0.1282, 0.2134, 0.1817]) -Greedy action tensor([ 1.1319, -0.9364, 0.0587, -0.7006]) tensor([0.6141, 0.0776, 0.2100, 0.0983]) -Greedy action tensor([ 0.9796, -0.8131, 0.1423, -0.5411]) tensor([0.5501, 0.0916, 0.2381, 0.1202]) -Greedy action tensor([ 0.5828, -0.3167, -0.1526, -0.1329]) tensor([0.4211, 0.1713, 0.2018, 0.2058]) -Greedy action tensor([ 0.4856, -0.0958, -0.0328, -0.3514]) tensor([0.3865, 0.2161, 0.2301, 0.1673]) -Greedy action tensor([ 0.3590, -0.1165, -0.0410, -0.4112]) tensor([0.3630, 0.2256, 0.2433, 0.1680]) -Greedy action tensor([ 0.8935, -0.2216, -0.0403, -0.0622]) tensor([0.4750, 0.1557, 0.1867, 0.1826]) -Greedy action tensor([ 1.0488, -0.4380, -0.1548, -0.3648]) tensor([0.5651, 0.1278, 0.1696, 0.1375]) -Greedy action tensor([ 1.1618, -0.6771, -0.0033, -0.5362]) tensor([0.6046, 0.0961, 0.1886, 0.1107]) -Greedy action tensor([ 5.0239e-01, 1.3927e-04, -2.6669e-02, -6.9436e-02]) tensor([0.3625, 0.2194, 0.2136, 0.2046]) -Greedy action tensor([ 0.8173, -0.5600, -0.2415, -0.4315]) tensor([0.5302, 0.1338, 0.1839, 0.1521]) -Greedy action tensor([ 0.8558, -0.7205, 0.0542, -0.4714]) tensor([0.5207, 0.1076, 0.2336, 0.1381]) -Greedy action tensor([ 0.8282, -0.3587, -0.0537, -0.1781]) tensor([0.4797, 0.1464, 0.1986, 0.1754]) -Greedy action tensor([ 0.9444, -0.8828, 0.1052, -0.5637]) tensor([0.5512, 0.0887, 0.2381, 0.1220]) -Greedy action tensor([ 0.4237, 0.2330, -0.1770, -0.1263]) tensor([0.3388, 0.2800, 0.1858, 0.1955]) -Greedy action tensor([ 0.3874, -0.0513, 0.2219, -0.5615]) tensor([0.3473, 0.2240, 0.2943, 0.1345]) -Greedy action tensor([ 0.6226, -0.4621, -0.1332, -0.1525]) tensor([0.4409, 0.1490, 0.2070, 0.2031]) -Greedy action tensor([ 1.1186, -1.1153, 0.1539, -0.6640]) tensor([0.6037, 0.0647, 0.2301, 0.1015]) -Greedy action tensor([ 0.5134, -0.1210, 0.0443, -0.2007]) tensor([0.3780, 0.2004, 0.2365, 0.1851]) -Greedy action tensor([ 0.6891, -0.2561, -0.0508, -0.2652]) tensor([0.4443, 0.1726, 0.2120, 0.1711]) -Greedy action tensor([ 0.4964, -0.2374, -0.0354, -0.3711]) tensor([0.4020, 0.1930, 0.2362, 0.1688]) -Greedy action tensor([ 0.8910, -0.6051, -0.0082, -0.4652]) tensor([0.5295, 0.1186, 0.2155, 0.1364]) -Greedy action tensor([ 0.9186, -0.8376, -0.0171, -0.5309]) tensor([0.5557, 0.0960, 0.2180, 0.1304]) -Greedy action tensor([ 0.8820, -0.6979, -0.1788, -0.3152]) tensor([0.5393, 0.1111, 0.1867, 0.1629]) -Greedy action tensor([ 1.2663, -0.9649, 0.1351, -0.7293]) tensor([0.6386, 0.0686, 0.2060, 0.0868]) -Greedy action tensor([ 0.5335, -0.2992, -0.0540, -0.1628]) tensor([0.4018, 0.1747, 0.2233, 0.2002]) -Greedy action tensor([ 0.4397, -0.7375, -0.2269, -0.2836]) tensor([0.4335, 0.1336, 0.2226, 0.2103]) -Greedy action tensor([ 0.0776, -0.1624, -0.0510, -0.2938]) tensor([0.2980, 0.2344, 0.2620, 0.2056]) -Greedy action tensor([ 0.8904, -0.8340, -0.2085, -0.6939]) tensor([0.5826, 0.1039, 0.1941, 0.1195]) -Greedy action tensor([0.2062, 0.1743, 0.0346, 0.1948]) tensor([0.2632, 0.2549, 0.2217, 0.2602]) -Greedy action tensor([ 0.3113, 0.0659, -0.1823, -0.1453]) tensor([0.3304, 0.2585, 0.2017, 0.2093]) -Greedy action tensor([ 0.6042, -0.3260, 0.0092, -0.1917]) tensor([0.4171, 0.1646, 0.2301, 0.1882]) -Greedy action tensor([ 0.7257, -0.3645, 0.0026, -0.1718]) tensor([0.4486, 0.1508, 0.2177, 0.1829]) -Greedy action tensor([ 0.6554, -0.5259, 0.0901, -0.3123]) tensor([0.4435, 0.1361, 0.2520, 0.1685]) -Greedy action tensor([ 0.6530, -0.4976, -0.0141, -0.5231]) tensor([0.4677, 0.1480, 0.2400, 0.1443]) -Greedy action tensor([ 0.6302, -0.4501, 0.0750, -0.5250]) tensor([0.4488, 0.1523, 0.2575, 0.1414]) -Greedy action tensor([ 0.4809, 0.0232, -0.0983, -0.4315]) tensor([0.3854, 0.2439, 0.2160, 0.1548]) -Greedy action tensor([-1.4492, -0.5077, 0.8864, 0.6401]) tensor([0.0455, 0.1166, 0.4703, 0.3676]) -Greedy action tensor([-1.6661, -0.1797, 0.5082, -0.1052]) tensor([0.0527, 0.2329, 0.4634, 0.2510]) -Greedy action tensor([-0.5204, 0.3730, 0.1167, -0.0724]) tensor([0.1449, 0.3541, 0.2741, 0.2269]) -Greedy action tensor([-1.9212, -0.3864, 0.6409, -0.1810]) tensor([0.0411, 0.1909, 0.5334, 0.2345]) -Greedy action tensor([-1.8647, -0.3286, 0.6419, -0.0789]) tensor([0.0419, 0.1946, 0.5137, 0.2498]) -Greedy action tensor([-1.7077, -0.3729, 0.6217, -0.0874]) tensor([0.0497, 0.1888, 0.5104, 0.2512]) -Greedy action tensor([-1.7788, -0.4107, 0.5790, -0.0812]) tensor([0.0477, 0.1874, 0.5043, 0.2606]) -Greedy action tensor([-1.6537, -0.4633, 0.5564, 0.0052]) tensor([0.0536, 0.1762, 0.4886, 0.2816]) -Greedy action tensor([-1.7222, 0.1859, 0.4549, -0.0171]) tensor([0.0453, 0.3055, 0.3998, 0.2494]) -Greedy action tensor([-1.1559, 0.3335, 0.4178, 0.2630]) tensor([0.0695, 0.3081, 0.3352, 0.2872]) -Greedy action tensor([-1.9395, -0.4441, 0.6642, -0.1775]) tensor([0.0403, 0.1799, 0.5449, 0.2349]) -Greedy action tensor([-1.9078, -0.4037, 0.6452, -0.1549]) tensor([0.0415, 0.1866, 0.5327, 0.2393]) -Greedy action tensor([-1.9019, -0.4055, 0.6456, -0.1570]) tensor([0.0417, 0.1863, 0.5331, 0.2389]) -Greedy action tensor([-1.9329, -0.4580, 0.6669, -0.1750]) tensor([0.0406, 0.1774, 0.5465, 0.2355]) -Greedy action tensor([-1.6713, 0.1897, 0.4692, 0.0579]) tensor([0.0464, 0.2981, 0.3942, 0.2613]) -Greedy action tensor([-1.8980, -0.4201, 0.6396, -0.1595]) tensor([0.0422, 0.1848, 0.5332, 0.2398]) -Greedy action tensor([-1.8861, -0.3830, 0.6413, -0.1496]) tensor([0.0422, 0.1897, 0.5284, 0.2396]) -Greedy action tensor([-1.8321, -0.3725, 0.6062, -0.1043]) tensor([0.0447, 0.1923, 0.5116, 0.2514]) -Greedy action tensor([-0.9835, 0.2643, 0.1877, -0.0685]) tensor([0.0980, 0.3413, 0.3161, 0.2446]) -Greedy action tensor([-1.9235, -0.4072, 0.6480, -0.1689]) tensor([0.0409, 0.1865, 0.5358, 0.2367]) -Greedy action tensor([-1.2291, -0.5334, 0.3355, -0.0210]) tensor([0.0898, 0.1801, 0.4294, 0.3006]) -Greedy action tensor([-1.6947, 0.2346, 0.4550, -0.0138]) tensor([0.0458, 0.3153, 0.3930, 0.2459]) -Greedy action tensor([-0.5129, -0.4724, -0.2970, -0.1337]) tensor([0.2108, 0.2195, 0.2616, 0.3080]) -Greedy action tensor([-0.6648, -0.3537, 1.2018, 1.5432]) tensor([0.0558, 0.0761, 0.3607, 0.5074]) -Greedy action tensor([-1.8799, -0.3732, 0.6311, -0.1369]) tensor([0.0425, 0.1916, 0.5232, 0.2427]) -Greedy action tensor([-1.8599, -0.4550, 0.6293, -0.1391]) tensor([0.0440, 0.1794, 0.5305, 0.2461]) -Greedy action tensor([-1.7880, -0.1600, 0.5809, -0.0712]) tensor([0.0448, 0.2280, 0.4782, 0.2491]) -Greedy action tensor([-1.8936, -0.4146, 0.6521, -0.1395]) tensor([0.0418, 0.1835, 0.5331, 0.2416]) -Greedy action tensor([-1.7283, -0.5245, 0.8492, 0.2987]) tensor([0.0399, 0.1328, 0.5247, 0.3026]) -Greedy action tensor([-1.3652, -0.4512, 0.5046, 0.4120]) tensor([0.0629, 0.1569, 0.4081, 0.3720]) -Greedy action tensor([-0.3499, -0.7078, 0.4894, 0.3596]) tensor([0.1654, 0.1156, 0.3828, 0.3362]) -Greedy action tensor([-1.7209e+00, -5.0116e-01, 5.6933e-01, -4.6444e-04]) tensor([0.0504, 0.1706, 0.4976, 0.2815]) -Greedy action tensor([-1.7397, -0.5187, 0.5629, -0.0719]) tensor([0.0508, 0.1722, 0.5078, 0.2692]) -Greedy action tensor([0.0317, 1.1720, 0.0369, 0.1772]) tensor([0.1590, 0.4973, 0.1598, 0.1839]) -Greedy action tensor([-1.9375, -0.4409, 0.6648, -0.1741]) tensor([0.0403, 0.1802, 0.5443, 0.2352]) -Greedy action tensor([-1.6419, -0.4607, 0.6134, 0.1251]) tensor([0.0509, 0.1658, 0.4854, 0.2979]) -Greedy action tensor([-1.9096, -0.4194, 0.6600, -0.1575]) tensor([0.0412, 0.1829, 0.5382, 0.2377]) -Greedy action tensor([-1.9315, -0.4398, 0.6605, -0.1735]) tensor([0.0406, 0.1807, 0.5429, 0.2358]) -Greedy action tensor([-1.9202, -0.4430, 0.6537, -0.1680]) tensor([0.0412, 0.1805, 0.5406, 0.2377]) -Greedy action tensor([-0.8306, 0.8820, 0.1209, 0.0713]) tensor([0.0862, 0.4780, 0.2233, 0.2125]) -Greedy action tensor([-1.2223, 0.3536, 0.3362, -0.1278]) tensor([0.0737, 0.3562, 0.3500, 0.2201]) -Greedy action tensor([-1.5340, -0.4363, 0.4662, 0.0495]) tensor([0.0615, 0.1843, 0.4545, 0.2996]) -Greedy action tensor([-1.8873, -0.4526, 0.6415, -0.1518]) tensor([0.0427, 0.1794, 0.5356, 0.2423]) -Greedy action tensor([-1.8348, -0.2988, 0.5988, -0.1384]) tensor([0.0444, 0.2065, 0.5067, 0.2424]) -Greedy action tensor([-0.8794, 0.9768, 0.1322, 0.2482]) tensor([0.0755, 0.4834, 0.2077, 0.2333]) -Greedy action tensor([-1.5412, -0.3896, 0.5173, 0.1085]) tensor([0.0581, 0.1839, 0.4554, 0.3026]) -Greedy action tensor([-1.4997, 0.5630, 0.4426, -0.5037]) tensor([0.0539, 0.4241, 0.3760, 0.1460]) -Greedy action tensor([-1.9211, -0.4595, 0.7002, -0.1364]) tensor([0.0400, 0.1724, 0.5496, 0.2381]) -Greedy action tensor([-1.9361, -0.4571, 0.6768, -0.1691]) tensor([0.0402, 0.1764, 0.5482, 0.2352]) -Greedy action tensor([0.4767, 1.2516, 0.1369, 0.8732]) tensor([0.1863, 0.4042, 0.1326, 0.2769]) -Greedy action tensor([-1.6716, -0.3391, 0.6877, 0.0437]) tensor([0.0478, 0.1811, 0.5056, 0.2655]) -Greedy action tensor([-1.5187, 0.5075, 0.3196, 0.0952]) tensor([0.0503, 0.3813, 0.3160, 0.2525]) -Greedy action tensor([-1.7591, -0.4619, 0.6513, 0.0283]) tensor([0.0459, 0.1681, 0.5116, 0.2744]) -Greedy action tensor([-1.8987, -0.3629, 0.6402, -0.1533]) tensor([0.0416, 0.1932, 0.5269, 0.2383]) -Greedy action tensor([-1.8990, -0.4049, 0.6436, -0.1446]) tensor([0.0418, 0.1860, 0.5308, 0.2414]) -Greedy action tensor([-1.9237, -0.4293, 0.6564, -0.1659]) tensor([0.0409, 0.1822, 0.5397, 0.2372]) -Greedy action tensor([-1.8051, -0.3740, 0.6497, -0.1033]) tensor([0.0448, 0.1875, 0.5219, 0.2458]) -Greedy action tensor([-0.8153, -0.4126, 0.4495, 0.8932]) tensor([0.0865, 0.1294, 0.3065, 0.4776]) -Greedy action tensor([-1.8129, -0.4686, 0.6059, -0.1555]) tensor([0.0469, 0.1800, 0.5270, 0.2461]) -Greedy action tensor([-1.3350, -0.4883, 0.4026, 0.1677]) tensor([0.0740, 0.1726, 0.4207, 0.3326]) -Greedy action tensor([-1.5735, 0.1689, 0.4954, 0.1369]) tensor([0.0496, 0.2833, 0.3927, 0.2744]) -Greedy action tensor([-1.8330, -0.3644, 0.6169, -0.1042]) tensor([0.0443, 0.1925, 0.5135, 0.2497]) -Greedy action tensor([-1.0034, -0.6597, 0.1992, 0.3018]) tensor([0.1061, 0.1496, 0.3531, 0.3913]) -Greedy action tensor([-1.5810, -0.1636, 0.5666, 0.0649]) tensor([0.0530, 0.2186, 0.4537, 0.2747]) -Greedy action tensor([-1.7785, -0.2111, 0.6068, -0.0626]) tensor([0.0450, 0.2158, 0.4889, 0.2503]) -Greedy action tensor([-1.8169, -0.3916, 0.5966, -0.0999]) tensor([0.0457, 0.1899, 0.5102, 0.2542]) -Greedy action tensor([-1.8483, -0.4552, 0.6233, -0.1296]) tensor([0.0446, 0.1794, 0.5276, 0.2485]) -Greedy action tensor([-1.7893, -0.3979, 0.6047, -0.0757]) tensor([0.0465, 0.1868, 0.5090, 0.2578]) -Greedy action tensor([-1.8898, -0.4599, 0.6337, -0.1570]) tensor([0.0429, 0.1793, 0.5351, 0.2427]) -Greedy action tensor([-1.9020, -0.4639, 0.6458, -0.1737]) tensor([0.0423, 0.1783, 0.5410, 0.2384]) -Greedy action tensor([-1.9264, -0.4811, 0.7825, -0.0622]) tensor([0.0374, 0.1589, 0.5621, 0.2415]) -Greedy action tensor([-1.7304, -0.5719, 0.9285, 0.1579]) tensor([0.0399, 0.1270, 0.5695, 0.2635]) -Greedy action tensor([-1.9206, -0.4153, 0.6527, -0.1648]) tensor([0.0410, 0.1846, 0.5372, 0.2372]) -Greedy action tensor([-1.8003, -0.4410, 0.6837, 0.0031]) tensor([0.0436, 0.1696, 0.5223, 0.2645]) -Greedy action tensor([-1.9157, -0.4466, 0.6514, -0.1663]) tensor([0.0415, 0.1801, 0.5400, 0.2384]) -Greedy action tensor([-1.3873, 0.4631, 0.2771, 0.0159]) tensor([0.0598, 0.3807, 0.3161, 0.2434]) -Greedy action tensor([-1.8287, -0.3101, 0.5906, -0.1389]) tensor([0.0450, 0.2055, 0.5057, 0.2438]) -Greedy action tensor([-1.7607, -0.4821, 0.5795, -0.1236]) tensor([0.0497, 0.1786, 0.5162, 0.2555]) -Greedy action tensor([-1.5852, -0.8236, 0.5638, -0.1598]) tensor([0.0630, 0.1349, 0.5401, 0.2620]) -Greedy action tensor([-1.2809, -0.1692, 0.3892, -0.1047]) tensor([0.0794, 0.2413, 0.4218, 0.2574]) -Greedy action tensor([-1.8150, -0.3956, 0.6085, -0.1395]) tensor([0.0460, 0.1900, 0.5186, 0.2454]) -Greedy action tensor([ 1.8312, -0.5824, -0.4133, 0.6995]) tensor([0.6588, 0.0590, 0.0698, 0.2125]) -Greedy action tensor([ 1.3379, -0.8326, 0.3092, 0.1839]) tensor([0.5596, 0.0639, 0.2001, 0.1765]) -Greedy action tensor([ 1.4241, -0.4338, -0.0109, -0.0466]) tensor([0.6158, 0.0961, 0.1466, 0.1415]) -Greedy action tensor([ 1.1681, -0.2836, -0.7559, 0.3884]) tensor([0.5438, 0.1274, 0.0794, 0.2494]) -Greedy action tensor([ 1.6215, -0.8805, -0.1253, -0.0445]) tensor([0.6919, 0.0567, 0.1206, 0.1308]) -Greedy action tensor([ 1.4761, -0.4105, -0.5900, 0.2360]) tensor([0.6379, 0.0967, 0.0808, 0.1846]) -Greedy action tensor([ 0.5364, -0.2733, -0.0414, 0.1885]) tensor([0.3687, 0.1641, 0.2069, 0.2604]) -Greedy action tensor([ 1.6415, -0.4512, -0.2770, 0.2560]) tensor([0.6577, 0.0811, 0.0966, 0.1646]) -Greedy action tensor([ 1.6111, -0.5002, -0.7013, 0.4213]) tensor([0.6560, 0.0794, 0.0650, 0.1996]) -Greedy action tensor([ 1.2208, -0.4513, -0.4663, 0.2065]) tensor([0.5762, 0.1082, 0.1066, 0.2089]) -Greedy action tensor([ 1.0244, -0.3218, -0.0127, -0.1819]) tensor([0.5225, 0.1360, 0.1852, 0.1564]) -Greedy action tensor([ 2.0377, -1.1269, -0.6579, 1.2199]) tensor([0.6447, 0.0272, 0.0435, 0.2846]) -Greedy action tensor([ 1.3721, -0.4805, 0.1301, 0.4309]) tensor([0.5447, 0.0854, 0.1573, 0.2125]) -Greedy action tensor([ 1.5367, -0.0652, -0.6361, 0.6097]) tensor([0.5844, 0.1178, 0.0665, 0.2313]) -Greedy action tensor([ 1.8317, -0.8995, -0.3355, 0.8342]) tensor([0.6458, 0.0421, 0.0739, 0.2382]) -Greedy action tensor([ 1.5444, -0.5910, -0.4172, 0.1701]) tensor([0.6615, 0.0782, 0.0930, 0.1674]) -Greedy action tensor([ 1.2657, -0.0811, -0.7193, 0.2981]) tensor([0.5626, 0.1463, 0.0773, 0.2138]) -Greedy action tensor([ 2.6733, 0.5341, -0.0648, 0.1737]) tensor([0.7908, 0.0931, 0.0512, 0.0649]) -Greedy action tensor([ 1.4365, -0.2519, 0.0119, 0.1473]) tensor([0.5879, 0.1087, 0.1415, 0.1620]) -Greedy action tensor([ 1.5621, -0.3353, -0.4928, 0.1646]) tensor([0.6556, 0.0983, 0.0840, 0.1621]) -Greedy action tensor([ 1.8982, -0.4390, -0.1330, 0.4494]) tensor([0.6837, 0.0660, 0.0897, 0.1606]) -Greedy action tensor([ 1.8864, -0.5380, -0.3486, 0.5427]) tensor([0.6866, 0.0608, 0.0735, 0.1791]) -Greedy action tensor([ 1.5092, -0.6015, -0.4680, 0.0042]) tensor([0.6749, 0.0818, 0.0934, 0.1498]) -Greedy action tensor([ 1.5483, -0.4278, 0.1516, -0.2190]) tensor([0.6423, 0.0890, 0.1589, 0.1097]) -Greedy action tensor([ 1.3750, -0.7577, -0.2958, 0.3309]) tensor([0.6029, 0.0715, 0.1134, 0.2122]) -Greedy action tensor([ 1.4622, -0.4649, -0.2910, 0.3607]) tensor([0.6056, 0.0882, 0.1049, 0.2013]) -Greedy action tensor([ 1.1294, -0.1591, -0.5640, 0.4514]) tensor([0.5083, 0.1401, 0.0935, 0.2580]) -Greedy action tensor([ 1.1671, -0.6372, -0.1621, 0.2988]) tensor([0.5409, 0.0890, 0.1432, 0.2270]) -Greedy action tensor([ 2.3865, -0.5307, -0.4836, 0.5250]) tensor([0.7897, 0.0427, 0.0448, 0.1228]) -Greedy action tensor([ 1.1363, -0.0396, -0.2845, -0.3781]) tensor([0.5650, 0.1743, 0.1364, 0.1243]) -Greedy action tensor([ 1.3487, -0.1909, -0.2569, 0.4107]) tensor([0.5535, 0.1187, 0.1111, 0.2166]) -Greedy action tensor([ 0.9747, -0.6552, 0.1599, -0.0577]) tensor([0.5013, 0.0982, 0.2219, 0.1785]) -Greedy action tensor([ 1.9961, -0.5808, -0.4356, 0.7839]) tensor([0.6843, 0.0520, 0.0601, 0.2036]) -Greedy action tensor([ 1.5436, -0.0755, -0.9416, 0.6140]) tensor([0.5966, 0.1182, 0.0497, 0.2355]) -Greedy action tensor([ 1.4722, -0.4195, -0.6771, 0.6056]) tensor([0.5925, 0.0894, 0.0691, 0.2491]) -Greedy action tensor([ 1.3653, 0.1828, -0.5593, 0.4605]) tensor([0.5385, 0.1650, 0.0786, 0.2179]) -Greedy action tensor([ 1.4411, -0.0875, -0.1875, 0.1879]) tensor([0.5887, 0.1277, 0.1155, 0.1681]) -Greedy action tensor([ 1.4142, -0.7642, -0.2870, 0.3412]) tensor([0.6106, 0.0691, 0.1114, 0.2088]) -Greedy action tensor([ 1.4432, -0.5698, -0.2130, 0.3923]) tensor([0.5973, 0.0798, 0.1140, 0.2088]) -Greedy action tensor([ 1.1168e+00, -7.4995e-01, 4.6080e-04, 1.7278e-01]) tensor([0.5344, 0.0826, 0.1750, 0.2079]) -Greedy action tensor([ 1.2291, 0.0247, -1.0081, 0.2304]) tensor([0.5634, 0.1689, 0.0601, 0.2075]) -Greedy action tensor([ 1.5647, 0.1856, -0.9763, 0.0319]) tensor([0.6466, 0.1628, 0.0509, 0.1396]) -Greedy action tensor([ 2.5086, -1.1938, -0.4142, 1.0528]) tensor([0.7624, 0.0188, 0.0410, 0.1778]) -Greedy action tensor([ 1.6892, -0.0034, -1.0393, 0.5857]) tensor([0.6325, 0.1164, 0.0413, 0.2098]) -Greedy action tensor([ 1.0990, -0.7929, 0.1361, -0.0498]) tensor([0.5407, 0.0815, 0.2064, 0.1714]) -Greedy action tensor([ 1.7276, -0.4768, -0.4629, 0.3810]) tensor([0.6746, 0.0744, 0.0755, 0.1755]) -Greedy action tensor([ 1.4332, 0.0911, -0.9371, 0.7372]) tensor([0.5396, 0.1410, 0.0504, 0.2690]) -Greedy action tensor([ 0.8664, -0.0255, -0.0922, 0.0865]) tensor([0.4441, 0.1820, 0.1703, 0.2036]) -Greedy action tensor([ 1.5086, -1.1645, -0.1621, 0.2325]) tensor([0.6509, 0.0449, 0.1224, 0.1817]) -Greedy action tensor([ 1.6460, -0.5636, -0.1882, 0.2962]) tensor([0.6541, 0.0718, 0.1045, 0.1696]) -Greedy action tensor([ 1.6940, -0.8023, -0.0720, 0.2170]) tensor([0.6749, 0.0556, 0.1154, 0.1541]) -Greedy action tensor([ 1.6474, -0.5466, -0.6332, 0.4025]) tensor([0.6659, 0.0742, 0.0681, 0.1918]) -Greedy action tensor([ 1.3533, -0.5451, -0.3299, -0.1098]) tensor([0.6381, 0.0956, 0.1185, 0.1477]) -Greedy action tensor([ 0.9205, -0.3066, -0.1092, 0.2546]) tensor([0.4621, 0.1355, 0.1650, 0.2374]) -Greedy action tensor([ 0.5607, -0.2596, -0.0494, 0.5331]) tensor([0.3383, 0.1489, 0.1838, 0.3290]) -Greedy action tensor([ 2.3198, -1.7844, -0.4118, 0.1846]) tensor([0.8334, 0.0138, 0.0543, 0.0985]) -Greedy action tensor([ 0.8995, -0.2180, -0.0750, -0.0932]) tensor([0.4819, 0.1576, 0.1819, 0.1786]) -Greedy action tensor([ 1.3169, 0.1983, -0.7236, 0.2226]) tensor([0.5582, 0.1824, 0.0725, 0.1869]) -Greedy action tensor([ 2.1041, -1.2639, -0.4130, 0.5299]) tensor([0.7562, 0.0261, 0.0610, 0.1567]) -Greedy action tensor([ 2.0821, -0.7911, -0.3409, 0.7063]) tensor([0.7154, 0.0404, 0.0634, 0.1807]) -Greedy action tensor([ 1.1466e+00, -7.0460e-01, -5.5532e-02, 6.2922e-04]) tensor([0.5632, 0.0885, 0.1693, 0.1791]) -Greedy action tensor([ 0.9220, -0.2696, -0.1882, 0.2441]) tensor([0.4671, 0.1419, 0.1539, 0.2371]) -Greedy action tensor([ 1.9173, -0.8007, -0.4769, 0.4063]) tensor([0.7257, 0.0479, 0.0662, 0.1602]) -Greedy action tensor([ 2.1173, -0.6734, -0.2559, 0.8741]) tensor([0.6930, 0.0425, 0.0646, 0.1999]) -Greedy action tensor([ 2.0037, -1.0000, -0.2096, 0.6342]) tensor([0.7076, 0.0351, 0.0774, 0.1799]) -Greedy action tensor([ 1.8424, -0.9181, -0.4788, 0.7186]) tensor([0.6727, 0.0426, 0.0660, 0.2187]) -Greedy action tensor([ 0.8762, -0.5846, 0.0562, 0.1961]) tensor([0.4589, 0.1065, 0.2021, 0.2325]) -Greedy action tensor([ 2.2248, -1.4060, -0.0243, 0.7218]) tensor([0.7383, 0.0196, 0.0779, 0.1642]) -Greedy action tensor([ 1.6036, -0.6058, -0.1755, 0.4562]) tensor([0.6266, 0.0688, 0.1058, 0.1989]) -Greedy action tensor([ 1.9348, -0.7719, -0.1506, 0.3214]) tensor([0.7193, 0.0480, 0.0894, 0.1433]) -Greedy action tensor([ 1.4360, -0.2786, -0.2400, 0.4278]) tensor([0.5773, 0.1039, 0.1080, 0.2107]) -Greedy action tensor([ 1.2801, -0.3027, -0.4290, 0.2913]) tensor([0.5687, 0.1168, 0.1030, 0.2116]) -Greedy action tensor([ 1.2500, -0.5156, -0.2231, 0.0646]) tensor([0.5862, 0.1003, 0.1344, 0.1792]) -Greedy action tensor([ 1.4463, -0.0954, -0.1954, 0.1289]) tensor([0.5968, 0.1277, 0.1156, 0.1599]) -Greedy action tensor([ 1.0320, -0.5662, -0.2813, 0.4515]) tensor([0.4924, 0.0996, 0.1324, 0.2756]) -Greedy action tensor([ 1.1638, -0.1310, -0.2454, 0.0413]) tensor([0.5424, 0.1486, 0.1325, 0.1765]) -Greedy action tensor([ 1.0477, -0.1935, -0.1774, 0.2142]) tensor([0.4957, 0.1433, 0.1456, 0.2154]) -Greedy action tensor([ 1.1923, -0.8365, -0.0997, 0.6857]) tensor([0.4978, 0.0655, 0.1368, 0.3000]) -Greedy action tensor([ 1.7287, -1.1426, 0.1975, 0.6957]) tensor([0.6139, 0.0348, 0.1328, 0.2185]) -Greedy action tensor([ 1.4519, -0.2714, -0.3952, 0.3414]) tensor([0.6004, 0.1072, 0.0947, 0.1978]) -Greedy action tensor([ 1.6655, -0.5629, -0.5161, 0.5417]) tensor([0.6470, 0.0697, 0.0730, 0.2103]) -Greedy action tensor([-0.0054, 0.3615, 0.3990, 0.7137]) tensor([0.1668, 0.2408, 0.2500, 0.3424]) -Greedy action tensor([ 0.8151, 0.9768, -0.3658, -0.2866]) tensor([0.3552, 0.4176, 0.1091, 0.1181]) -Greedy action tensor([ 1.5643, -0.2915, 0.4024, 1.2969]) tensor([0.4475, 0.0700, 0.1400, 0.3425]) -Greedy action tensor([-0.7727, -1.0320, -0.5874, 0.1832]) tensor([0.1793, 0.1384, 0.2158, 0.4664]) -Greedy action tensor([ 0.5227, -0.7980, 0.8096, 0.5074]) tensor([0.2790, 0.0745, 0.3717, 0.2748]) -Greedy action tensor([ 0.1637, -0.6737, 0.9829, 0.5480]) tensor([0.1934, 0.0837, 0.4388, 0.2840]) -Greedy action tensor([ 0.3890, -1.2853, -0.8698, -0.5751]) tensor([0.5397, 0.1012, 0.1533, 0.2058]) -Greedy action tensor([ 1.2545, -1.4323, 0.4992, 0.7796]) tensor([0.4630, 0.0315, 0.2175, 0.2880]) -Greedy action tensor([-0.5309, -0.4772, 0.2884, 0.2915]) tensor([0.1515, 0.1599, 0.3438, 0.3448]) -Greedy action tensor([ 0.8988, -0.1228, -0.0590, 0.4784]) tensor([0.4166, 0.1500, 0.1599, 0.2736]) -Greedy action tensor([ 0.4405, 0.0585, -0.2108, -0.4095]) tensor([0.3800, 0.2594, 0.1981, 0.1624]) -Greedy action tensor([ 0.2334, -0.6019, -0.0839, 0.2190]) tensor([0.3177, 0.1378, 0.2313, 0.3132]) -Greedy action tensor([-0.3414, -0.1878, -0.5085, -0.4097]) tensor([0.2534, 0.2955, 0.2144, 0.2367]) -Greedy action tensor([ 1.1245, -1.4220, 1.8427, -0.3079]) tensor([0.2969, 0.0233, 0.6089, 0.0709]) -Greedy action tensor([ 0.1127, -0.5641, 0.4916, -0.2948]) tensor([0.2752, 0.1398, 0.4019, 0.1831]) -Greedy action tensor([ 0.4048, -1.3571, 0.4429, -0.3757]) tensor([0.3747, 0.0643, 0.3893, 0.1717]) -Greedy action tensor([-0.6338, -0.5530, 1.0753, -0.8023]) tensor([0.1183, 0.1283, 0.6535, 0.1000]) -Greedy action tensor([-0.5509, -0.0936, -0.6482, -0.3542]) tensor([0.2126, 0.3358, 0.1929, 0.2588]) -Greedy action tensor([-0.3015, 0.5950, 0.1875, 0.3518]) tensor([0.1428, 0.3500, 0.2328, 0.2744]) -Greedy action tensor([ 0.0411, 0.6264, 0.1533, -1.4134]) tensor([0.2411, 0.4329, 0.2697, 0.0563]) -Greedy action tensor([-0.3019, -1.0256, -0.9857, 0.2553]) tensor([0.2677, 0.1298, 0.1351, 0.4673]) -Greedy action tensor([ 0.4443, -1.2943, 0.0820, -0.3594]) tensor([0.4311, 0.0758, 0.3001, 0.1930]) -Greedy action tensor([ 0.2392, -1.5207, 0.6885, 0.1741]) tensor([0.2720, 0.0468, 0.4263, 0.2549]) -Greedy action tensor([ 0.3192, -1.2445, -0.2347, 0.9074]) tensor([0.2789, 0.0584, 0.1603, 0.5023]) -Greedy action tensor([ 1.2786, -0.5821, 0.1076, 1.4639]) tensor([0.3746, 0.0583, 0.1162, 0.4509]) -Greedy action tensor([-0.0870, -2.2382, 0.1797, -0.0547]) tensor([0.2894, 0.0337, 0.3779, 0.2990]) -Greedy action tensor([-1.3905, -1.3335, 0.4963, -0.6191]) tensor([0.0924, 0.0978, 0.6098, 0.1999]) -Greedy action tensor([ 0.0163, -0.5588, 0.3138, -0.0502]) tensor([0.2601, 0.1463, 0.3502, 0.2434]) -Greedy action tensor([-0.0244, -0.7875, -0.2681, -0.1970]) tensor([0.3235, 0.1508, 0.2535, 0.2722]) -Greedy action tensor([-1.1217, 0.1804, 1.0387, -0.4674]) tensor([0.0655, 0.2407, 0.5679, 0.1259]) -Greedy action tensor([-0.1694, -2.2075, 1.0638, -1.4492]) tensor([0.2066, 0.0269, 0.7090, 0.0575]) -Greedy action tensor([ 0.0394, -1.5977, 0.6857, 0.0611]) tensor([0.2424, 0.0472, 0.4627, 0.2477]) -Greedy action tensor([-0.1515, -1.2998, 0.7964, -0.2953]) tensor([0.2099, 0.0666, 0.5417, 0.1818]) -Greedy action tensor([ 0.0379, -0.6013, -0.8875, 0.0117]) tensor([0.3450, 0.1821, 0.1368, 0.3361]) -Greedy action tensor([-0.5628, -1.5246, 1.1147, -0.8751]) tensor([0.1339, 0.0512, 0.7169, 0.0980]) -Greedy action tensor([ 0.5467, -1.4019, -0.6704, 0.8824]) tensor([0.3524, 0.0502, 0.1043, 0.4930]) -Greedy action tensor([ 1.6040, -0.7018, 1.3815, 0.7163]) tensor([0.4326, 0.0431, 0.3463, 0.1780]) -Greedy action tensor([-0.0995, -0.4296, 0.3074, 0.8105]) tensor([0.1753, 0.1260, 0.2633, 0.4354]) -Greedy action tensor([ 0.1015, 0.0359, 0.3039, -0.4500]) tensor([0.2676, 0.2506, 0.3276, 0.1542]) -Greedy action tensor([ 0.8440, 0.5123, -0.3023, 0.3585]) tensor([0.3772, 0.2707, 0.1199, 0.2321]) -Greedy action tensor([-0.4544, -0.1561, 1.1060, -0.0398]) tensor([0.1160, 0.1563, 0.5522, 0.1756]) -Greedy action tensor([ 0.6776, -0.2371, 0.1063, -0.4142]) tensor([0.4346, 0.1741, 0.2455, 0.1459]) -Greedy action tensor([ 0.1904, 0.7741, -0.4315, 0.1044]) tensor([0.2355, 0.4221, 0.1264, 0.2160]) -Greedy action tensor([ 0.4797, -0.9061, -0.0463, 0.0237]) tensor([0.4041, 0.1011, 0.2388, 0.2561]) -Greedy action tensor([-0.2406, -1.1707, -0.5552, -0.5882]) tensor([0.3532, 0.1394, 0.2579, 0.2495]) -Greedy action tensor([-1.0148, -0.4063, 0.2710, -0.8598]) tensor([0.1312, 0.2411, 0.4746, 0.1532]) -Greedy action tensor([ 0.4027, -1.8953, 0.3291, 0.8238]) tensor([0.2814, 0.0283, 0.2615, 0.4288]) -Greedy action tensor([ 0.9242, -0.8770, 0.9854, 0.3958]) tensor([0.3549, 0.0586, 0.3773, 0.2092]) -Greedy action tensor([ 0.2702, -0.7053, 0.2206, 1.1676]) tensor([0.2091, 0.0788, 0.1990, 0.5130]) -Greedy action tensor([ 0.1470, -1.5647, -0.2111, 0.5541]) tensor([0.2957, 0.0534, 0.2067, 0.4442]) -Greedy action tensor([ 0.4773, -0.6838, -0.4652, 0.7265]) tensor([0.3349, 0.1049, 0.1305, 0.4297]) -Greedy action tensor([ 0.0630, -0.9667, 0.0935, 0.8880]) tensor([0.2141, 0.0765, 0.2208, 0.4886]) -Greedy action tensor([ 1.1534, -1.3905, 1.1994, 0.1322]) tensor([0.4023, 0.0316, 0.4212, 0.1449]) -Greedy action tensor([ 0.6294, -1.7384, 0.3912, 0.9843]) tensor([0.3023, 0.0283, 0.2382, 0.4311]) -Greedy action tensor([-0.4251, -2.2518, -0.0619, 0.2729]) tensor([0.2170, 0.0349, 0.3120, 0.4361]) -Greedy action tensor([ 0.3698, 0.1709, -0.6397, -0.8594]) tensor([0.4038, 0.3309, 0.1471, 0.1181]) -Greedy action tensor([ 1.3606, 0.0636, 2.3055, -0.6273]) tensor([0.2511, 0.0686, 0.6459, 0.0344]) -Greedy action tensor([ 0.0187, -1.1312, 0.7556, 0.0957]) tensor([0.2229, 0.0706, 0.4658, 0.2408]) -Greedy action tensor([-0.0340, -0.9931, 1.7483, -0.1065]) tensor([0.1211, 0.0464, 0.7198, 0.1126]) -Greedy action tensor([-0.5607, -0.0770, -0.3713, -0.3175]) tensor([0.1959, 0.3177, 0.2367, 0.2498]) -Greedy action tensor([-1.3315, -0.2458, 0.0669, -1.2094]) tensor([0.1094, 0.3240, 0.4429, 0.1236]) -Greedy action tensor([ 0.4600, 0.2973, 0.4804, -0.2155]) tensor([0.2959, 0.2515, 0.3020, 0.1506]) -Greedy action tensor([ 0.2783, -0.5150, -0.8274, -0.3618]) tensor([0.4328, 0.1958, 0.1433, 0.2282]) -Greedy action tensor([-1.5846, -0.6073, 0.4992, -0.0792]) tensor([0.0617, 0.1641, 0.4960, 0.2782]) -Greedy action tensor([ 0.2720, -0.3184, 0.4849, -0.0525]) tensor([0.2846, 0.1577, 0.3521, 0.2057]) -Greedy action tensor([ 0.4468, -0.8399, 1.7584, 0.2989]) tensor([0.1709, 0.0472, 0.6345, 0.1474]) -Greedy action tensor([-0.0533, -1.2149, -0.5356, -0.8166]) tensor([0.4173, 0.1306, 0.2576, 0.1945]) -Greedy action tensor([-0.3632, -1.6217, 0.9766, -0.8814]) tensor([0.1755, 0.0499, 0.6701, 0.1045]) -Greedy action tensor([ 1.0419, -0.6238, 0.3799, 0.5803]) tensor([0.4282, 0.0810, 0.2209, 0.2699]) -Greedy action tensor([-0.0102, -0.8778, -1.0319, 0.0871]) tensor([0.3470, 0.1457, 0.1249, 0.3824]) -Greedy action tensor([-0.3635, 0.2042, -1.1279, -0.4507]) tensor([0.2412, 0.4255, 0.1123, 0.2210]) -Greedy action tensor([1.5412, 0.6339, 0.9007, 0.0344]) tensor([0.4646, 0.1875, 0.2449, 0.1030]) -Greedy action tensor([-0.2970, -2.1509, 1.6938, -0.2017]) tensor([0.1044, 0.0164, 0.7644, 0.1148]) -Greedy action tensor([-0.9160, -0.0071, 0.3618, -0.6872]) tensor([0.1201, 0.2980, 0.4310, 0.1510]) -Greedy action tensor([0.2103, 0.9981, 0.0740, 0.0665]) tensor([0.2025, 0.4453, 0.1767, 0.1754]) -Greedy action tensor([-0.6709, 0.3217, -0.2064, -1.5219]) tensor([0.1749, 0.4720, 0.2784, 0.0747]) -Greedy action tensor([ 0.6731, -0.5040, 0.6301, 1.1207]) tensor([0.2611, 0.0804, 0.2501, 0.4084]) -Greedy action tensor([ 0.4113, 0.7188, -0.3212, 1.7301]) tensor([0.1520, 0.2067, 0.0731, 0.5683]) -Greedy action tensor([-0.0087, -0.8366, -0.4585, -0.8965]) tensor([0.4022, 0.1758, 0.2565, 0.1655]) -Greedy action tensor([-0.6343, -0.1631, 1.4477, -0.5310]) tensor([0.0852, 0.1365, 0.6837, 0.0945]) -Greedy action tensor([-0.7606, -1.2005, 0.7030, -0.5747]) tensor([0.1395, 0.0898, 0.6027, 0.1680]) -Greedy action tensor([-0.7797, -0.7534, 1.8366, 0.0652]) tensor([0.0554, 0.0569, 0.7586, 0.1290]) -Greedy action tensor([ 0.6555, -0.2166, 0.0685, -0.3471]) tensor([0.4272, 0.1786, 0.2375, 0.1567]) -Greedy action tensor([ 0.5651, -0.5669, -0.0847, -0.0822]) tensor([0.4223, 0.1361, 0.2205, 0.2210]) -Greedy action tensor([ 1.0049, -0.7111, -0.0063, -0.4163]) tensor([0.5602, 0.1007, 0.2038, 0.1353]) -Greedy action tensor([ 0.9978, -0.7565, 0.0410, -0.6132]) tensor([0.5692, 0.0985, 0.2186, 0.1137]) -Greedy action tensor([ 1.0141, -0.3991, -0.2817, -0.2889]) tensor([0.5591, 0.1361, 0.1530, 0.1519]) -Greedy action tensor([ 0.4667, -0.3271, 0.0013, -0.2359]) tensor([0.3883, 0.1756, 0.2438, 0.1923]) -Greedy action tensor([ 0.9411, -0.8260, -0.0583, -0.3460]) tensor([0.5510, 0.0941, 0.2028, 0.1521]) -Greedy action tensor([ 0.5873, -0.2327, -0.0807, -0.1391]) tensor([0.4104, 0.1807, 0.2104, 0.1985]) -Greedy action tensor([ 0.7393, -0.4625, -0.0956, -0.3680]) tensor([0.4843, 0.1456, 0.2101, 0.1600]) -Greedy action tensor([ 0.9311, 0.0727, -0.2660, 0.1477]) tensor([0.4581, 0.1942, 0.1384, 0.2093]) -Greedy action tensor([ 0.5276, -0.3853, 0.0198, -0.1698]) tensor([0.3998, 0.1605, 0.2406, 0.1991]) -Greedy action tensor([ 0.6996, -0.6694, -0.1635, -0.2601]) tensor([0.4856, 0.1235, 0.2049, 0.1860]) -Greedy action tensor([ 0.7022, -0.5953, -0.0186, -0.1641]) tensor([0.4587, 0.1253, 0.2231, 0.1929]) -Greedy action tensor([ 0.8435, -1.0382, -0.0154, -0.4371]) tensor([0.5394, 0.0822, 0.2285, 0.1499]) -Greedy action tensor([ 0.5939, -0.1565, -0.0959, -0.0377]) tensor([0.3991, 0.1885, 0.2002, 0.2122]) -Greedy action tensor([ 0.8101, -0.3250, 0.0752, -0.3034]) tensor([0.4696, 0.1509, 0.2252, 0.1542]) -Greedy action tensor([ 0.9097, -0.6323, 0.4109, -0.7817]) tensor([0.4986, 0.1067, 0.3028, 0.0919]) -Greedy action tensor([ 0.7468, -0.7148, 0.1148, -0.2758]) tensor([0.4710, 0.1092, 0.2504, 0.1694]) -Greedy action tensor([ 0.5450, -0.3310, -0.1775, -0.3195]) tensor([0.4304, 0.1792, 0.2090, 0.1813]) -Greedy action tensor([ 0.5564, -0.4557, -0.1642, -0.1524]) tensor([0.4269, 0.1552, 0.2077, 0.2102]) -Greedy action tensor([ 0.4744, 0.0282, -0.0820, -0.0895]) tensor([0.3594, 0.2300, 0.2060, 0.2045]) -Greedy action tensor([ 1.1397, -0.9218, 0.0852, -0.4390]) tensor([0.5946, 0.0757, 0.2071, 0.1226]) -Greedy action tensor([ 0.7349, -0.3097, 0.0770, -0.1250]) tensor([0.4361, 0.1534, 0.2259, 0.1846]) -Greedy action tensor([ 0.6304, -0.2697, 0.0280, -0.0672]) tensor([0.4079, 0.1658, 0.2233, 0.2030]) -Greedy action tensor([ 0.6824, -0.4251, -0.0306, -0.4277]) tensor([0.4651, 0.1537, 0.2280, 0.1533]) -Greedy action tensor([ 0.4016, 0.0149, -0.2782, -0.7494]) tensor([0.3996, 0.2715, 0.2025, 0.1264]) -Greedy action tensor([ 1.2045, -0.6477, 0.0970, -0.3828]) tensor([0.5911, 0.0927, 0.1953, 0.1209]) -Greedy action tensor([ 1.0249, -0.6357, -0.1349, -0.5139]) tensor([0.5820, 0.1106, 0.1825, 0.1249]) -Greedy action tensor([ 0.7295, -0.0588, -0.0213, -0.2482]) tensor([0.4343, 0.1974, 0.2050, 0.1634]) -Greedy action tensor([ 0.5320, -0.2345, -0.0299, -0.3441]) tensor([0.4080, 0.1896, 0.2326, 0.1699]) -Greedy action tensor([ 0.9603, -0.5229, 0.0065, -0.4200]) tensor([0.5366, 0.1218, 0.2067, 0.1349]) -Greedy action tensor([ 0.6856, -0.2623, -0.0447, -0.1465]) tensor([0.4339, 0.1682, 0.2091, 0.1888]) -Greedy action tensor([ 0.5962, -0.3261, -0.0733, -0.1465]) tensor([0.4192, 0.1667, 0.2146, 0.1995]) -Greedy action tensor([ 0.8493, -0.3222, 0.0980, -0.4023]) tensor([0.4836, 0.1499, 0.2282, 0.1383]) -Greedy action tensor([ 0.1089, 0.0279, -0.2619, -0.2947]) tensor([0.3049, 0.2811, 0.2104, 0.2036]) -Greedy action tensor([ 0.8157, -0.7090, 0.0707, -0.2924]) tensor([0.4944, 0.1076, 0.2347, 0.1632]) -Greedy action tensor([ 0.8330, -0.4162, -0.0282, -0.4329]) tensor([0.5022, 0.1440, 0.2122, 0.1416]) -Greedy action tensor([ 0.8689, -0.2325, -0.2073, -0.5415]) tensor([0.5215, 0.1734, 0.1778, 0.1273]) -Greedy action tensor([ 0.8386, -0.3102, -0.0573, -0.3486]) tensor([0.4925, 0.1561, 0.2011, 0.1503]) -Greedy action tensor([ 0.9271, -0.0156, -0.0158, -0.3937]) tensor([0.4888, 0.1904, 0.1904, 0.1305]) -Greedy action tensor([ 0.5056, -0.0627, -0.0539, -0.0133]) tensor([0.3659, 0.2073, 0.2091, 0.2178]) -Greedy action tensor([ 1.0836, -0.5733, -0.0581, -0.4575]) tensor([0.5800, 0.1106, 0.1852, 0.1242]) -Greedy action tensor([ 0.3475, 0.0592, -0.0606, -0.0551]) tensor([0.3243, 0.2431, 0.2157, 0.2169]) -Greedy action tensor([ 0.9035, -0.5794, -0.0254, -0.3504]) tensor([0.5243, 0.1190, 0.2071, 0.1496]) -Greedy action tensor([ 1.0565, -0.3748, -0.0460, -0.3128]) tensor([0.5479, 0.1309, 0.1819, 0.1393]) -Greedy action tensor([ 0.7727, -0.8554, 0.0764, -0.5689]) tensor([0.5112, 0.1004, 0.2548, 0.1336]) -Greedy action tensor([ 0.6610, -0.2508, -0.0154, 0.0198]) tensor([0.4104, 0.1649, 0.2086, 0.2161]) -Greedy action tensor([ 0.4496, -0.3694, 0.0142, -0.1158]) tensor([0.3765, 0.1660, 0.2436, 0.2139]) -Greedy action tensor([ 0.6364, -0.1694, 0.1388, -0.1144]) tensor([0.3958, 0.1768, 0.2406, 0.1868]) -Greedy action tensor([ 0.8650, -0.6160, -0.0758, -0.6699]) tensor([0.5455, 0.1240, 0.2129, 0.1175]) -Greedy action tensor([ 0.3874, -0.4433, -0.1569, -0.1203]) tensor([0.3820, 0.1665, 0.2217, 0.2299]) -Greedy action tensor([ 0.3507, 0.0183, 0.1272, -0.2408]) tensor([0.3257, 0.2336, 0.2605, 0.1803]) -Greedy action tensor([ 0.9355, -1.0726, 0.1645, -0.5140]) tensor([0.5460, 0.0733, 0.2525, 0.1281]) -Greedy action tensor([ 0.6349, -0.1944, -0.1025, -0.1564]) tensor([0.4223, 0.1843, 0.2020, 0.1914]) -Greedy action tensor([ 1.0875, -0.8029, 0.0956, -0.6742]) tensor([0.5904, 0.0892, 0.2190, 0.1014]) -Greedy action tensor([ 0.5990, -0.1316, 0.0709, -0.0740]) tensor([0.3874, 0.1866, 0.2284, 0.1976]) -Greedy action tensor([ 0.3818, -0.1724, -0.0563, -0.1850]) tensor([0.3588, 0.2061, 0.2315, 0.2036]) -Greedy action tensor([ 0.4266, 0.0030, 0.0097, -0.0640]) tensor([0.3418, 0.2237, 0.2253, 0.2092]) -Greedy action tensor([ 0.8031, -1.2080, -0.0676, -0.5032]) tensor([0.5485, 0.0734, 0.2296, 0.1485]) -Greedy action tensor([ 0.7451, -0.2757, 0.1032, -0.6692]) tensor([0.4696, 0.1692, 0.2471, 0.1141]) -Greedy action tensor([ 0.9252, -0.3349, -0.1113, -0.3845]) tensor([0.5240, 0.1486, 0.1859, 0.1414]) -Greedy action tensor([ 0.8537, -0.6812, -0.0049, -0.6936]) tensor([0.5399, 0.1163, 0.2288, 0.1149]) -Greedy action tensor([ 0.6794, -0.1689, -0.0019, -0.0869]) tensor([0.4169, 0.1785, 0.2109, 0.1937]) -Greedy action tensor([ 0.6484, -0.1838, -0.2409, -0.4597]) tensor([0.4595, 0.1999, 0.1888, 0.1517]) -Greedy action tensor([ 1.0081, -0.7812, -0.0625, -0.3580]) tensor([0.5666, 0.0947, 0.1942, 0.1445]) -Greedy action tensor([ 0.6004, -0.5073, -0.0956, -0.0457]) tensor([0.4250, 0.1404, 0.2119, 0.2227]) -Greedy action tensor([ 0.4053, 0.0241, -0.2161, 0.0031]) tensor([0.3461, 0.2364, 0.1859, 0.2315]) -Greedy action tensor([ 0.8819, -0.7212, 0.0394, -0.4582]) tensor([0.5280, 0.1063, 0.2274, 0.1383]) -Greedy action tensor([ 0.6200, -0.5630, -0.1534, -0.3086]) tensor([0.4624, 0.1416, 0.2133, 0.1827]) -Greedy action tensor([ 0.5673, -0.1412, 0.2763, -0.5095]) tensor([0.3875, 0.1908, 0.2897, 0.1320]) -Greedy action tensor([ 0.3849, -0.1642, -0.1356, -0.2081]) tensor([0.3671, 0.2120, 0.2181, 0.2029]) -Greedy action tensor([ 0.3418, 0.2595, -0.1137, -0.0922]) tensor([0.3122, 0.2875, 0.1980, 0.2023]) -Greedy action tensor([ 0.4991, -0.2175, -0.0706, -0.0536]) tensor([0.3803, 0.1857, 0.2151, 0.2188]) -Greedy action tensor([ 0.7587, -0.4597, 0.0351, -0.5005]) tensor([0.4844, 0.1432, 0.2349, 0.1375]) -Greedy action tensor([ 0.3132, -0.0323, 0.0082, -0.2957]) tensor([0.3346, 0.2368, 0.2466, 0.1820]) -Greedy action tensor([ 0.2861, 0.2987, -0.1993, 0.0969]) tensor([0.2894, 0.2930, 0.1781, 0.2395]) -Greedy action tensor([ 1.4326, -0.8368, -0.2189, -0.2873]) tensor([0.6783, 0.0701, 0.1301, 0.1215]) -Greedy action tensor([ 0.3624, -0.4620, -0.3232, -0.8117]) tensor([0.4442, 0.1948, 0.2238, 0.1373]) -Greedy action tensor([ 0.4704, -0.5992, -0.2660, -0.3567]) tensor([0.4426, 0.1519, 0.2119, 0.1936]) -Greedy action tensor([ 1.0005, -0.3225, -0.0885, -0.2180]) tensor([0.5267, 0.1403, 0.1773, 0.1557]) -Greedy action tensor([ 0.7004, -0.4050, -0.1040, -0.1843]) tensor([0.4564, 0.1511, 0.2042, 0.1884]) -Greedy action tensor([-1.5902, -0.5305, 0.4956, 0.0984]) tensor([0.0576, 0.1663, 0.4641, 0.3119]) -Greedy action tensor([-1.9367, -0.4346, 0.6622, -0.1742]) tensor([0.0404, 0.1813, 0.5430, 0.2353]) -Greedy action tensor([-1.3474, 0.2892, 0.5731, -0.5666]) tensor([0.0660, 0.3392, 0.4506, 0.1442]) -Greedy action tensor([-1.9022, -0.4530, 0.6516, -0.1565]) tensor([0.0419, 0.1786, 0.5391, 0.2403]) -Greedy action tensor([-1.9283, -0.4467, 0.6556, -0.1745]) tensor([0.0409, 0.1801, 0.5424, 0.2365]) -Greedy action tensor([-1.9167, -0.2906, 0.6278, -0.1646]) tensor([0.0407, 0.2068, 0.5180, 0.2345]) -Greedy action tensor([-1.8901, -0.3472, 0.6233, -0.1470]) tensor([0.0421, 0.1970, 0.5201, 0.2407]) -Greedy action tensor([-1.8770, -0.4500, 0.6416, -0.1273]) tensor([0.0429, 0.1786, 0.5320, 0.2466]) -Greedy action tensor([-1.9246, -0.4492, 0.6574, -0.1624]) tensor([0.0409, 0.1791, 0.5415, 0.2385]) -Greedy action tensor([-0.8067, 0.9747, 0.1127, 0.2119]) tensor([0.0819, 0.4861, 0.2053, 0.2267]) -Greedy action tensor([-1.9294, -0.4312, 0.6590, -0.1710]) tensor([0.0407, 0.1820, 0.5413, 0.2360]) -Greedy action tensor([-1.5773, -0.3199, 0.4610, 0.0469]) tensor([0.0579, 0.2036, 0.4446, 0.2939]) -Greedy action tensor([-1.8793, -0.4558, 0.6813, -0.0990]) tensor([0.0416, 0.1728, 0.5387, 0.2469]) -Greedy action tensor([-1.9054, -0.2451, 0.6126, -0.1528]) tensor([0.0409, 0.2153, 0.5076, 0.2361]) -Greedy action tensor([-1.7672, -0.4918, 0.5789, -0.1180]) tensor([0.0494, 0.1770, 0.5164, 0.2572]) -Greedy action tensor([-1.6689, -0.1534, 0.5918, -0.0386]) tensor([0.0494, 0.2248, 0.4736, 0.2522]) -Greedy action tensor([-1.9272, -0.4156, 0.6567, -0.1703]) tensor([0.0407, 0.1845, 0.5391, 0.2358]) -Greedy action tensor([-1.8987, -0.4160, 0.6464, -0.1585]) tensor([0.0419, 0.1847, 0.5344, 0.2389]) -Greedy action tensor([-1.9024, -0.4425, 0.6424, -0.1610]) tensor([0.0421, 0.1813, 0.5364, 0.2402]) -Greedy action tensor([-1.7112, -0.3118, 0.6056, 0.0359]) tensor([0.0478, 0.1936, 0.4845, 0.2741]) -Greedy action tensor([-1.9255, -0.4229, 0.6568, -0.1703]) tensor([0.0408, 0.1834, 0.5398, 0.2361]) -Greedy action tensor([-1.9406, -0.4539, 0.6693, -0.1763]) tensor([0.0402, 0.1779, 0.5470, 0.2348]) -Greedy action tensor([-1.8827, -0.3688, 0.6391, -0.1509]) tensor([0.0423, 0.1922, 0.5266, 0.2390]) -Greedy action tensor([-1.4367, 0.3636, 0.3336, 0.3439]) tensor([0.0530, 0.3209, 0.3114, 0.3146]) -Greedy action tensor([-1.6532, -0.1835, 0.4917, 0.0277]) tensor([0.0519, 0.2258, 0.4435, 0.2788]) -Greedy action tensor([-1.9325, -0.4353, 0.6642, -0.1649]) tensor([0.0404, 0.1806, 0.5423, 0.2367]) -Greedy action tensor([-1.7440, 0.2827, 0.4979, -0.1655]) tensor([0.0438, 0.3321, 0.4119, 0.2122]) -Greedy action tensor([-1.9136, -0.4380, 0.6409, -0.1613]) tensor([0.0417, 0.1822, 0.5359, 0.2403]) -Greedy action tensor([-1.8594, -0.4038, 0.6388, -0.1123]) tensor([0.0431, 0.1849, 0.5245, 0.2475]) -Greedy action tensor([-0.9576, -0.2640, 0.5112, -0.4406]) tensor([0.1108, 0.2218, 0.4815, 0.1859]) -Greedy action tensor([-1.8666, -0.4095, 0.6249, -0.1472]) tensor([0.0436, 0.1870, 0.5262, 0.2432]) -Greedy action tensor([-1.8422, -0.4591, 0.6300, -0.1265]) tensor([0.0446, 0.1780, 0.5290, 0.2483]) -Greedy action tensor([-1.7142, -0.4887, 0.5689, -0.0473]) tensor([0.0513, 0.1746, 0.5027, 0.2714]) -Greedy action tensor([-0.8044, 0.3538, 0.5883, 0.6138]) tensor([0.0810, 0.2581, 0.3262, 0.3347]) -Greedy action tensor([-1.7413, -0.5148, 0.5651, -0.0994]) tensor([0.0510, 0.1738, 0.5118, 0.2634]) -Greedy action tensor([-1.9358, -0.4446, 0.6605, -0.1779]) tensor([0.0406, 0.1802, 0.5440, 0.2352]) -Greedy action tensor([-1.8904, -0.9123, 0.6754, -0.0691]) tensor([0.0438, 0.1164, 0.5694, 0.2704]) -Greedy action tensor([-1.3648, -0.5140, 0.4655, 0.3478]) tensor([0.0661, 0.1549, 0.4124, 0.3666]) -Greedy action tensor([-1.4049, -0.5854, 0.3884, 0.1282]) tensor([0.0719, 0.1631, 0.4320, 0.3330]) -Greedy action tensor([-1.9398, -0.4549, 0.6694, -0.1795]) tensor([0.0403, 0.1779, 0.5475, 0.2343]) -Greedy action tensor([-1.9058, -0.3926, 0.6459, -0.1485]) tensor([0.0414, 0.1879, 0.5308, 0.2399]) -Greedy action tensor([-1.9387, -0.4399, 0.6647, -0.1757]) tensor([0.0403, 0.1804, 0.5444, 0.2349]) -Greedy action tensor([-0.5185, 0.7098, 0.3172, 0.5577]) tensor([0.1036, 0.3537, 0.2389, 0.3038]) -Greedy action tensor([-1.8962, -0.4360, 0.6494, -0.1534]) tensor([0.0421, 0.1812, 0.5364, 0.2403]) -Greedy action tensor([-1.8673, -0.4080, 0.6475, -0.1245]) tensor([0.0428, 0.1840, 0.5288, 0.2444]) -Greedy action tensor([-1.8185, -0.3202, 0.6096, -0.0629]) tensor([0.0443, 0.1980, 0.5017, 0.2561]) -Greedy action tensor([-1.5499, -0.4506, 0.5490, 0.1935]) tensor([0.0559, 0.1679, 0.4563, 0.3198]) -Greedy action tensor([-1.8326, -0.3262, 0.6266, -0.1142]) tensor([0.0439, 0.1980, 0.5134, 0.2447]) -Greedy action tensor([-1.9363, -0.4302, 0.6606, -0.1747]) tensor([0.0404, 0.1822, 0.5422, 0.2352]) -Greedy action tensor([-1.7735, -0.2950, 0.5864, -0.0502]) tensor([0.0463, 0.2033, 0.4907, 0.2597]) -Greedy action tensor([-0.4790, 1.0063, 0.0377, 0.2360]) tensor([0.1094, 0.4833, 0.1835, 0.2237]) -Greedy action tensor([-1.8740, -0.3451, 0.6156, -0.1160]) tensor([0.0426, 0.1966, 0.5137, 0.2472]) -Greedy action tensor([-1.9201, -0.4197, 0.6618, -0.1457]) tensor([0.0406, 0.1822, 0.5374, 0.2397]) -Greedy action tensor([-1.7026, 0.2150, 0.4497, -0.0609]) tensor([0.0464, 0.3154, 0.3989, 0.2394]) -Greedy action tensor([-1.7566, -0.1898, 0.5226, -0.0870]) tensor([0.0479, 0.2296, 0.4681, 0.2544]) -Greedy action tensor([-0.5663, -0.4340, 0.2727, 0.3761]) tensor([0.1424, 0.1626, 0.3295, 0.3655]) -Greedy action tensor([-1.8370, -0.4804, 0.6077, -0.1262]) tensor([0.0456, 0.1770, 0.5253, 0.2522]) -Greedy action tensor([-0.7971, 0.9187, 0.0388, 0.3171]) tensor([0.0839, 0.4667, 0.1936, 0.2557]) -Greedy action tensor([-1.8551, -0.4498, 0.6126, -0.1432]) tensor([0.0446, 0.1819, 0.5263, 0.2472]) -Greedy action tensor([-1.9369, -0.4514, 0.6665, -0.1753]) tensor([0.0404, 0.1785, 0.5459, 0.2352]) -Greedy action tensor([-1.7973, -0.4668, 0.5907, -0.1098]) tensor([0.0474, 0.1795, 0.5167, 0.2564]) -Greedy action tensor([-1.8931, -0.3819, 0.6417, -0.1506]) tensor([0.0419, 0.1900, 0.5287, 0.2394]) -Greedy action tensor([-1.2384, -0.3269, 0.3271, 0.2353]) tensor([0.0791, 0.1969, 0.3786, 0.3454]) -Greedy action tensor([-1.8174, -0.3705, 0.5937, -0.1301]) tensor([0.0459, 0.1949, 0.5113, 0.2479]) -Greedy action tensor([-1.8810, -0.3547, 0.6204, -0.1436]) tensor([0.0426, 0.1959, 0.5195, 0.2420]) -Greedy action tensor([-1.8663, -0.4163, 0.6209, -0.1465]) tensor([0.0437, 0.1864, 0.5258, 0.2441]) -Greedy action tensor([ 0.6656, 0.0371, 0.1050, -0.0880]) tensor([0.3884, 0.2071, 0.2217, 0.1828]) -Greedy action tensor([-1.8642, -0.4353, 0.6532, -0.0938]) tensor([0.0427, 0.1780, 0.5288, 0.2505]) -Greedy action tensor([-1.6927, -0.2053, 0.5441, -0.0077]) tensor([0.0496, 0.2193, 0.4640, 0.2672]) -Greedy action tensor([-1.8081, -0.4629, 0.6010, -0.1107]) tensor([0.0467, 0.1792, 0.5193, 0.2548]) -Greedy action tensor([-1.9138, -0.4629, 0.6496, -0.1657]) tensor([0.0417, 0.1779, 0.5410, 0.2394]) -Greedy action tensor([-1.7081, -0.4399, 0.5431, -0.0652]) tensor([0.0520, 0.1849, 0.4941, 0.2689]) -Greedy action tensor([-1.1556, 0.0839, 0.4543, 0.3790]) tensor([0.0709, 0.2450, 0.3549, 0.3292]) -Greedy action tensor([-1.7929, -0.3644, 0.6557, -0.0263]) tensor([0.0443, 0.1847, 0.5122, 0.2589]) -Greedy action tensor([-1.0505, -0.5300, 0.5753, -0.3742]) tensor([0.1028, 0.1729, 0.5222, 0.2021]) -Greedy action tensor([-1.8587, -0.3760, 0.6146, -0.1419]) tensor([0.0438, 0.1929, 0.5195, 0.2438]) -Greedy action tensor([-1.9456, -0.4496, 0.6675, -0.1815]) tensor([0.0401, 0.1790, 0.5469, 0.2340]) -Greedy action tensor([-1.8926, -0.4143, 0.6536, -0.1189]) tensor([0.0416, 0.1824, 0.5308, 0.2452]) -Greedy action tensor([-1.2015, -0.5542, 0.3321, 0.2049]) tensor([0.0860, 0.1643, 0.3986, 0.3510]) -Greedy action tensor([-1.6630, -0.5048, 0.5761, 0.0762]) tensor([0.0519, 0.1653, 0.4872, 0.2956]) -Greedy action tensor([-1.6650, -0.1499, 0.5268, 0.1570]) tensor([0.0483, 0.2200, 0.4327, 0.2990]) -Greedy action tensor([ 1.7515, -0.7184, -0.7150, 0.4881]) tensor([0.6886, 0.0583, 0.0585, 0.1947]) -Greedy action tensor([ 1.6675, -0.5935, -0.3954, 0.1297]) tensor([0.6915, 0.0721, 0.0879, 0.1486]) -Greedy action tensor([ 1.2674, -0.3994, -0.4797, 0.7792]) tensor([0.5059, 0.0955, 0.0882, 0.3105]) -Greedy action tensor([ 1.9916, -0.4298, -0.4051, 0.6122]) tensor([0.6986, 0.0620, 0.0636, 0.1758]) -Greedy action tensor([ 1.2705, -0.2476, -0.8052, 0.5371]) tensor([0.5480, 0.1201, 0.0688, 0.2632]) -Greedy action tensor([ 1.3977, -0.3553, 0.0091, 0.1293]) tensor([0.5869, 0.1017, 0.1464, 0.1651]) -Greedy action tensor([ 1.6674, -0.1374, -0.6243, 0.1437]) tensor([0.6741, 0.1109, 0.0682, 0.1469]) -Greedy action tensor([ 1.1402, -0.5196, -0.1176, 0.1380]) tensor([0.5430, 0.1033, 0.1544, 0.1993]) -Greedy action tensor([ 1.4843, -0.6530, -0.3379, 0.6287]) tensor([0.5866, 0.0692, 0.0948, 0.2493]) -Greedy action tensor([ 1.9396, -0.6520, -0.4280, 0.4352]) tensor([0.7190, 0.0539, 0.0674, 0.1597]) -Greedy action tensor([ 1.4806, -0.6963, -0.3601, 0.0415]) tensor([0.6626, 0.0751, 0.1052, 0.1571]) -Greedy action tensor([ 1.0874, -0.3787, -0.0446, 0.2245]) tensor([0.5063, 0.1169, 0.1632, 0.2136]) -Greedy action tensor([ 1.0577, -0.3859, 0.1296, 0.2259]) tensor([0.4839, 0.1142, 0.1913, 0.2106]) -Greedy action tensor([ 0.5238, 0.0589, 0.1470, -0.1141]) tensor([0.3518, 0.2210, 0.2413, 0.1859]) -Greedy action tensor([ 1.5331, -0.3672, -0.6387, 0.5393]) tensor([0.6121, 0.0915, 0.0698, 0.2266]) -Greedy action tensor([ 1.2015, -0.1279, -0.3357, 0.5295]) tensor([0.5024, 0.1330, 0.1080, 0.2566]) -Greedy action tensor([ 1.6628, -0.7228, -0.1745, 0.3282]) tensor([0.6603, 0.0608, 0.1051, 0.1738]) -Greedy action tensor([ 1.3109, -0.2332, -0.2355, 0.6968]) tensor([0.5082, 0.1085, 0.1083, 0.2750]) -Greedy action tensor([ 1.5533, -0.2893, -0.6524, 0.5039]) tensor([0.6178, 0.0979, 0.0681, 0.2163]) -Greedy action tensor([ 1.2337, -0.6168, -0.3130, 0.5522]) tensor([0.5331, 0.0838, 0.1135, 0.2697]) -Greedy action tensor([ 2.0462, -0.6792, -0.4456, 0.7300]) tensor([0.7060, 0.0463, 0.0584, 0.1893]) -Greedy action tensor([ 1.5839, -0.6569, -0.7347, 0.6503]) tensor([0.6258, 0.0666, 0.0616, 0.2460]) -Greedy action tensor([ 1.3272, 0.0589, -0.6454, 0.0563]) tensor([0.5879, 0.1654, 0.0818, 0.1649]) -Greedy action tensor([ 1.3235, -0.2703, -0.1013, 0.4538]) tensor([0.5368, 0.1091, 0.1291, 0.2250]) -Greedy action tensor([ 1.2288, -0.9001, -0.0832, 0.2571]) tensor([0.5660, 0.0673, 0.1524, 0.2142]) -Greedy action tensor([ 2.1068, -0.1372, -0.4547, -0.0169]) tensor([0.7676, 0.0814, 0.0592, 0.0918]) -Greedy action tensor([ 1.4461, 0.1064, -0.9293, 0.3556]) tensor([0.5914, 0.1549, 0.0550, 0.1987]) -Greedy action tensor([ 2.4089, -0.6906, -0.3416, 0.1355]) tensor([0.8251, 0.0372, 0.0527, 0.0850]) -Greedy action tensor([ 1.2783, -0.3251, -0.7937, 0.1328]) tensor([0.6078, 0.1223, 0.0765, 0.1933]) -Greedy action tensor([ 1.7218, -0.5675, -0.3485, 0.3233]) tensor([0.6782, 0.0687, 0.0856, 0.1675]) -Greedy action tensor([ 1.2938, -0.0477, -0.6529, 0.6518]) tensor([0.5180, 0.1354, 0.0739, 0.2726]) -Greedy action tensor([ 2.0843, -0.5405, -0.2186, 0.0941]) tensor([0.7639, 0.0553, 0.0764, 0.1044]) -Greedy action tensor([ 1.3431, -0.4940, -0.4672, 0.3999]) tensor([0.5840, 0.0930, 0.0956, 0.2274]) -Greedy action tensor([ 1.4020, -0.2540, -0.0827, 0.1596]) tensor([0.5861, 0.1119, 0.1328, 0.1692]) -Greedy action tensor([ 1.7721, -0.4903, -0.3310, 0.6626]) tensor([0.6427, 0.0669, 0.0785, 0.2119]) -Greedy action tensor([ 1.3941, -0.3248, -0.6878, 0.3471]) tensor([0.6043, 0.1083, 0.0753, 0.2121]) -Greedy action tensor([ 1.5631, -0.8236, -0.1452, 0.9931]) tensor([0.5439, 0.0500, 0.0985, 0.3076]) -Greedy action tensor([ 1.1896, -0.4506, 0.0531, 0.0948]) tensor([0.5407, 0.1049, 0.1735, 0.1809]) -Greedy action tensor([ 1.4142, -0.3238, -0.5113, 0.3314]) tensor([0.6023, 0.1059, 0.0878, 0.2040]) -Greedy action tensor([ 0.8855, -0.4253, -0.0120, -0.1984]) tensor([0.4962, 0.1338, 0.2022, 0.1678]) -Greedy action tensor([ 1.5567, -0.7234, -0.3075, 0.9873]) tensor([0.5485, 0.0561, 0.0850, 0.3104]) -Greedy action tensor([ 1.6711, -0.9573, -0.1183, 0.1217]) tensor([0.6889, 0.0497, 0.1151, 0.1463]) -Greedy action tensor([ 1.7595, -0.3338, -0.4665, 0.1070]) tensor([0.7028, 0.0866, 0.0759, 0.1346]) -Greedy action tensor([ 1.8998, -0.3392, -0.6392, 0.4961]) tensor([0.6987, 0.0745, 0.0552, 0.1717]) -Greedy action tensor([ 1.4104, -0.3024, -0.1826, 0.2753]) tensor([0.5865, 0.1058, 0.1192, 0.1885]) -Greedy action tensor([ 1.9438, -0.4327, -0.7736, 0.3509]) tensor([0.7341, 0.0682, 0.0485, 0.1493]) -Greedy action tensor([ 1.5818, -0.4992, -0.3628, 0.1646]) tensor([0.6621, 0.0826, 0.0947, 0.1605]) -Greedy action tensor([ 1.4990, -0.4531, -0.5843, 0.1468]) tensor([0.6557, 0.0931, 0.0816, 0.1696]) -Greedy action tensor([ 0.5854, -0.1767, 0.0141, -0.0525]) tensor([0.3906, 0.1823, 0.2206, 0.2064]) -Greedy action tensor([ 2.0046, -0.7866, -0.2537, 0.2950]) tensor([0.7425, 0.0456, 0.0776, 0.1343]) -Greedy action tensor([ 0.9846, -0.4876, -0.0957, 0.3535]) tensor([0.4760, 0.1092, 0.1616, 0.2532]) -Greedy action tensor([ 1.8197, 0.4721, -0.5973, 0.2707]) tensor([0.6404, 0.1664, 0.0571, 0.1361]) -Greedy action tensor([ 1.5948, -0.4349, -0.2542, 0.1822]) tensor([0.6526, 0.0857, 0.1027, 0.1589]) -Greedy action tensor([ 1.4985, -0.7584, -0.1469, 0.5545]) tensor([0.5929, 0.0621, 0.1144, 0.2307]) -Greedy action tensor([ 1.2667, -0.7203, -0.2606, 0.1770]) tensor([0.5915, 0.0811, 0.1284, 0.1989]) -Greedy action tensor([ 1.8765, -1.0193, -0.3985, 0.5940]) tensor([0.6967, 0.0385, 0.0716, 0.1932]) -Greedy action tensor([ 1.3254, -0.0545, -0.8556, 0.2172]) tensor([0.5901, 0.1485, 0.0666, 0.1948]) -Greedy action tensor([ 1.3236, -0.3652, -0.1343, 0.0521]) tensor([0.5890, 0.1088, 0.1371, 0.1651]) -Greedy action tensor([ 2.2272, 0.4616, -0.1209, 0.2562]) tensor([0.7113, 0.1217, 0.0680, 0.0991]) -Greedy action tensor([ 2.5061, -1.2615, -0.2782, 1.2270]) tensor([0.7336, 0.0170, 0.0453, 0.2041]) -Greedy action tensor([ 1.5262, -0.4567, -0.5402, 0.2722]) tensor([0.6453, 0.0888, 0.0817, 0.1841]) -Greedy action tensor([ 1.1690, 0.1642, -0.4624, 0.3207]) tensor([0.5025, 0.1840, 0.0983, 0.2152]) -Greedy action tensor([ 2.0949, -1.0953, -0.3327, 0.5638]) tensor([0.7431, 0.0306, 0.0656, 0.1607]) -Greedy action tensor([ 1.1831, -0.2737, -0.0132, -0.0071]) tensor([0.5436, 0.1267, 0.1644, 0.1653]) -Greedy action tensor([ 1.9902, -1.0728, -0.3342, 0.4730]) tensor([0.7332, 0.0343, 0.0717, 0.1608]) -Greedy action tensor([ 1.2207, -0.3254, -0.0808, 0.1507]) tensor([0.5470, 0.1165, 0.1488, 0.1876]) -Greedy action tensor([ 1.2232, -0.5258, -0.4307, 0.2906]) tensor([0.5686, 0.0989, 0.1088, 0.2238]) -Greedy action tensor([ 1.2162, -0.5169, -0.3135, 0.5030]) tensor([0.5309, 0.0938, 0.1150, 0.2602]) -Greedy action tensor([ 2.2265, -1.2145, -0.3693, 0.7196]) tensor([0.7529, 0.0241, 0.0562, 0.1668]) -Greedy action tensor([ 0.8802, 0.0563, -0.3073, 0.5641]) tensor([0.4044, 0.1774, 0.1233, 0.2948]) -Greedy action tensor([ 2.0318, -0.5974, -0.1599, 0.7001]) tensor([0.6907, 0.0498, 0.0772, 0.1824]) -Greedy action tensor([ 1.7055, -0.6495, -0.5678, 0.8438]) tensor([0.6172, 0.0586, 0.0636, 0.2607]) -Greedy action tensor([ 1.3252, 0.1565, -0.0355, 0.0641]) tensor([0.5404, 0.1679, 0.1386, 0.1531]) -Greedy action tensor([ 1.1740, -0.1479, -0.0437, -0.2071]) tensor([0.5513, 0.1470, 0.1631, 0.1385]) -Greedy action tensor([ 0.7948, -0.1237, -0.4708, 0.5803]) tensor([0.4019, 0.1604, 0.1134, 0.3243]) -Greedy action tensor([ 1.6640, -0.3865, -0.7816, 0.2228]) tensor([0.6887, 0.0886, 0.0597, 0.1630]) -Greedy action tensor([ 1.6714, -0.3122, 0.2503, 0.6294]) tensor([0.5774, 0.0794, 0.1394, 0.2037]) -Greedy action tensor([ 1.2197, -0.5012, -0.2204, 0.2678]) tensor([0.5550, 0.0993, 0.1315, 0.2142]) -Greedy action tensor([ 0.4599, -0.2191, -0.2911, 0.5616]) tensor([0.3240, 0.1643, 0.1529, 0.3587]) -Greedy action tensor([ 1.7453, -0.6043, -0.4175, 0.3253]) tensor([0.6887, 0.0657, 0.0792, 0.1665]) -Greedy action tensor([ 2.8592, -1.9052, -0.2737, 0.9598]) tensor([0.8321, 0.0071, 0.0363, 0.1245]) -Greedy action tensor([ 0.7302, -0.4761, 0.1420, -0.3058]) tensor([0.4526, 0.1355, 0.2513, 0.1606]) -Greedy action tensor([ 1.0665, -0.6277, 0.0584, -0.3954]) tensor([0.5617, 0.1032, 0.2050, 0.1302]) -Greedy action tensor([ 0.8003, -0.9330, -0.0728, -0.4406]) tensor([0.5309, 0.0938, 0.2217, 0.1535]) -Greedy action tensor([ 0.7459, -0.1996, -0.2147, -0.0269]) tensor([0.4479, 0.1740, 0.1714, 0.2068]) -Greedy action tensor([ 0.8609, -0.6637, -0.0035, -0.5919]) tensor([0.5339, 0.1162, 0.2249, 0.1249]) -Greedy action tensor([ 1.0651, -0.8144, 0.0678, -0.5893]) tensor([0.5839, 0.0891, 0.2154, 0.1116]) -Greedy action tensor([ 0.0388, 0.0614, -0.0090, -0.2335]) tensor([0.2675, 0.2737, 0.2551, 0.2038]) -Greedy action tensor([ 0.4655, -0.0874, 0.0290, -0.1703]) tensor([0.3635, 0.2091, 0.2349, 0.1925]) -Greedy action tensor([ 0.8361, -0.2840, 0.0394, -0.2867]) tensor([0.4757, 0.1552, 0.2144, 0.1548]) -Greedy action tensor([ 0.6073, -0.5326, 0.0394, -0.3490]) tensor([0.4404, 0.1408, 0.2496, 0.1692]) -Greedy action tensor([ 1.0754, -0.8222, -0.1208, -0.6595]) tensor([0.6140, 0.0921, 0.1856, 0.1083]) -Greedy action tensor([ 0.6855, -0.5175, -0.0385, -0.3434]) tensor([0.4667, 0.1402, 0.2263, 0.1668]) -Greedy action tensor([ 1.0007, -0.5348, -0.1263, -0.1755]) tensor([0.5412, 0.1165, 0.1753, 0.1669]) -Greedy action tensor([ 0.3692, -0.0210, -0.1419, -0.0061]) tensor([0.3374, 0.2284, 0.2024, 0.2318]) -Greedy action tensor([ 0.5193, -0.0672, 0.1328, -0.2911]) tensor([0.3731, 0.2075, 0.2535, 0.1659]) -Greedy action tensor([ 0.7045, -0.7786, 0.0562, -0.3382]) tensor([0.4757, 0.1079, 0.2487, 0.1677]) -Greedy action tensor([ 0.7581, -0.5227, -0.0486, -0.2907]) tensor([0.4820, 0.1339, 0.2152, 0.1689]) -Greedy action tensor([ 1.0183, -0.7035, 0.0598, -0.5870]) tensor([0.5672, 0.1014, 0.2175, 0.1139]) -Greedy action tensor([ 1.0772, -0.4367, -0.0829, -0.2066]) tensor([0.5523, 0.1215, 0.1731, 0.1530]) -Greedy action tensor([ 0.5408, -0.3000, 0.0027, -0.2592]) tensor([0.4058, 0.1750, 0.2369, 0.1823]) -Greedy action tensor([ 1.4668, -0.8870, -0.0539, -0.6607]) tensor([0.6980, 0.0663, 0.1526, 0.0832]) -Greedy action tensor([ 0.6414, -0.4159, -0.1076, -0.1439]) tensor([0.4393, 0.1526, 0.2077, 0.2003]) -Greedy action tensor([ 0.7341, -0.7769, -0.0776, -0.2383]) tensor([0.4895, 0.1080, 0.2174, 0.1851]) -Greedy action tensor([ 0.7050, -0.4176, 0.0455, -0.1933]) tensor([0.4445, 0.1446, 0.2299, 0.1810]) -Greedy action tensor([ 1.1230, -0.6869, 0.1969, -0.2968]) tensor([0.5551, 0.0909, 0.2199, 0.1342]) -Greedy action tensor([ 0.9277, -0.0165, 0.1097, -0.3001]) tensor([0.4710, 0.1832, 0.2078, 0.1380]) -Greedy action tensor([ 0.7355, -0.3420, -0.0968, -0.2340]) tensor([0.4641, 0.1580, 0.2019, 0.1760]) -Greedy action tensor([ 0.8188, -0.6589, 0.0321, -0.3396]) tensor([0.5006, 0.1142, 0.2280, 0.1572]) -Greedy action tensor([ 0.2918, -0.1712, -0.0287, -0.2907]) tensor([0.3432, 0.2160, 0.2491, 0.1917]) -Greedy action tensor([ 0.4472, 0.4644, -0.2396, 0.1386]) tensor([0.3072, 0.3125, 0.1546, 0.2256]) -Greedy action tensor([ 0.8365, -0.4033, -0.1373, -0.2442]) tensor([0.4984, 0.1443, 0.1882, 0.1691]) -Greedy action tensor([ 0.7441, -0.6185, 0.1134, -0.2783]) tensor([0.4656, 0.1192, 0.2478, 0.1675]) -Greedy action tensor([ 0.5659, -0.2721, -0.0317, -0.2777]) tensor([0.4144, 0.1793, 0.2280, 0.1783]) -Greedy action tensor([ 1.1484, -0.7886, -0.0310, -0.4625]) tensor([0.6056, 0.0873, 0.1862, 0.1209]) -Greedy action tensor([ 0.5447, -0.4270, -0.1069, -0.1710]) tensor([0.4187, 0.1584, 0.2182, 0.2047]) -Greedy action tensor([ 0.2578, 0.2108, -0.2084, -0.1218]) tensor([0.3062, 0.2922, 0.1921, 0.2095]) -Greedy action tensor([ 0.3464, -0.1101, -0.1233, -0.3216]) tensor([0.3608, 0.2286, 0.2256, 0.1850]) -Greedy action tensor([ 0.5249, 0.1779, -0.1579, 0.0479]) tensor([0.3530, 0.2495, 0.1784, 0.2191]) -Greedy action tensor([ 0.8894, -0.4759, -0.0508, -0.5690]) tensor([0.5323, 0.1359, 0.2079, 0.1238]) -Greedy action tensor([ 1.1393, -0.8370, -0.1422, -0.5600]) tensor([0.6254, 0.0867, 0.1736, 0.1143]) -Greedy action tensor([ 0.8582, -0.3749, -0.0900, -0.5601]) tensor([0.5206, 0.1517, 0.2017, 0.1260]) -Greedy action tensor([ 1.2682, -0.7433, -0.1218, -0.4127]) tensor([0.6373, 0.0853, 0.1587, 0.1187]) -Greedy action tensor([ 0.5933, -0.4983, -0.0186, -0.5715]) tensor([0.4566, 0.1533, 0.2476, 0.1425]) -Greedy action tensor([ 0.8904, -0.5710, 0.1036, -0.2299]) tensor([0.4967, 0.1152, 0.2261, 0.1620]) -Greedy action tensor([ 0.3999, -0.2780, -0.0047, -0.1362]) tensor([0.3623, 0.1839, 0.2418, 0.2120]) -Greedy action tensor([ 0.7562, -0.6175, 0.0834, -0.3953]) tensor([0.4809, 0.1217, 0.2454, 0.1520]) -Greedy action tensor([ 0.5769, -0.2479, -0.2432, -0.2828]) tensor([0.4344, 0.1904, 0.1913, 0.1839]) -Greedy action tensor([ 0.2861, -0.0827, -0.1378, -0.2101]) tensor([0.3384, 0.2340, 0.2215, 0.2060]) -Greedy action tensor([ 0.6279, -0.4708, 0.2665, -0.5891]) tensor([0.4299, 0.1433, 0.2995, 0.1273]) -Greedy action tensor([ 0.8561, -0.5001, -0.0328, -0.2763]) tensor([0.5023, 0.1294, 0.2065, 0.1619]) -Greedy action tensor([ 0.6048, -0.4049, -0.1308, -0.1551]) tensor([0.4327, 0.1576, 0.2073, 0.2024]) -Greedy action tensor([ 0.7587, -0.3932, -0.1106, -0.2646]) tensor([0.4774, 0.1509, 0.2001, 0.1716]) -Greedy action tensor([ 0.9488, -0.4857, -0.0175, -0.3621]) tensor([0.5296, 0.1262, 0.2015, 0.1428]) -Greedy action tensor([ 0.5014, -0.3663, -0.0300, -0.2295]) tensor([0.4017, 0.1687, 0.2361, 0.1934]) -Greedy action tensor([ 1.1202, -0.3689, -0.2325, -0.1551]) tensor([0.5671, 0.1279, 0.1466, 0.1584]) -Greedy action tensor([ 0.3828, -0.2353, -0.0423, -0.1866]) tensor([0.3625, 0.1954, 0.2370, 0.2051]) -Greedy action tensor([ 0.6420, -0.1721, -0.0152, -0.1316]) tensor([0.4128, 0.1829, 0.2139, 0.1904]) -Greedy action tensor([ 0.3728, 0.1671, -0.2890, -0.1455]) tensor([0.3418, 0.2783, 0.1764, 0.2036]) -Greedy action tensor([ 0.0545, -0.1500, -0.1421, -0.0721]) tensor([0.2843, 0.2317, 0.2336, 0.2505]) -Greedy action tensor([ 0.9527, -0.7138, 0.2134, -0.3188]) tensor([0.5137, 0.0970, 0.2452, 0.1440]) -Greedy action tensor([ 0.5823, -0.3206, -0.0707, -0.3209]) tensor([0.4290, 0.1739, 0.2233, 0.1739]) -Greedy action tensor([ 0.9671, -0.7731, -0.1479, -0.5304]) tensor([0.5790, 0.1016, 0.1899, 0.1295]) -Greedy action tensor([ 0.3395, -0.0125, -0.0503, -0.1353]) tensor([0.3331, 0.2342, 0.2255, 0.2072]) -Greedy action tensor([ 0.3963, 0.0740, -0.0720, -0.3955]) tensor([0.3567, 0.2584, 0.2233, 0.1616]) -Greedy action tensor([ 0.8547, -0.6200, -0.2698, -0.2828]) tensor([0.5335, 0.1221, 0.1733, 0.1711]) -Greedy action tensor([ 1.0429, -0.5075, -0.1015, -0.4112]) tensor([0.5668, 0.1203, 0.1805, 0.1324]) -Greedy action tensor([ 0.5725, -0.4695, -0.1292, -0.0352]) tensor([0.4179, 0.1474, 0.2072, 0.2276]) -Greedy action tensor([ 0.8611, -0.4431, -0.1128, -0.1805]) tensor([0.4995, 0.1356, 0.1886, 0.1763]) -Greedy action tensor([ 0.9605, -0.5493, -0.0636, -0.2410]) tensor([0.5317, 0.1175, 0.1909, 0.1599]) -Greedy action tensor([ 0.9957, -0.6786, 0.0684, -0.7227]) tensor([0.5674, 0.1064, 0.2245, 0.1018]) -Greedy action tensor([ 0.8871, -0.5109, -0.0624, -0.2728]) tensor([0.5135, 0.1269, 0.1987, 0.1610]) -Greedy action tensor([ 0.6243, -0.4518, -0.1663, 0.0374]) tensor([0.4255, 0.1450, 0.1930, 0.2366]) -Greedy action tensor([ 0.7745, -0.4881, -0.0282, -0.5724]) tensor([0.5022, 0.1421, 0.2251, 0.1306]) -Greedy action tensor([ 0.5776, -0.2238, 0.0619, -0.0783]) tensor([0.3899, 0.1749, 0.2328, 0.2024]) -Greedy action tensor([ 8.2749e-01, -5.0883e-02, -3.6952e-04, -9.2596e-02]) tensor([0.4443, 0.1846, 0.1941, 0.1770]) -Greedy action tensor([ 0.6614, -0.4486, -0.0366, -0.3431]) tensor([0.4559, 0.1502, 0.2269, 0.1670]) -Greedy action tensor([ 0.8610, -0.4917, 0.0454, -0.6190]) tensor([0.5185, 0.1341, 0.2294, 0.1180]) -Greedy action tensor([ 0.9228, -0.3302, -0.0699, -0.3116]) tensor([0.5136, 0.1467, 0.1903, 0.1494]) -Greedy action tensor([ 0.6637, -0.5694, 0.0718, -0.6516]) tensor([0.4732, 0.1379, 0.2618, 0.1270]) -Greedy action tensor([ 0.7896, -0.3654, 0.0318, -0.3568]) tensor([0.4758, 0.1499, 0.2230, 0.1512]) -Greedy action tensor([ 0.8327, -0.6679, -0.0158, -0.4833]) tensor([0.5210, 0.1162, 0.2230, 0.1397]) -Greedy action tensor([-0.0439, -0.4411, 0.9213, 0.0535]) tensor([0.1852, 0.1245, 0.4862, 0.2041]) -Greedy action tensor([ 1.1536, -0.9287, 1.1561, 0.9777]) tensor([0.3372, 0.0420, 0.3380, 0.2828]) -Greedy action tensor([-0.1065, 0.7307, -0.1637, 0.2654]) tensor([0.1753, 0.4049, 0.1655, 0.2543]) -Greedy action tensor([-0.6734, -0.4659, -0.0966, -0.3179]) tensor([0.1839, 0.2263, 0.3274, 0.2624]) -Greedy action tensor([-0.1831, -0.4130, 0.4629, -0.5497]) tensor([0.2275, 0.1808, 0.4341, 0.1577]) -Greedy action tensor([0.7132, 0.2496, 0.0605, 0.3583]) tensor([0.3508, 0.2206, 0.1826, 0.2460]) -Greedy action tensor([ 0.1867, -0.8090, 0.1971, -0.0540]) tensor([0.3158, 0.1167, 0.3192, 0.2483]) -Greedy action tensor([ 0.5945, -0.0773, -0.0786, 0.9153]) tensor([0.2942, 0.1503, 0.1501, 0.4055]) -Greedy action tensor([ 0.2477, -1.1991, -0.4839, 0.3757]) tensor([0.3505, 0.0825, 0.1686, 0.3984]) -Greedy action tensor([ 0.1287, -0.8949, -0.3769, 0.7523]) tensor([0.2612, 0.0939, 0.1576, 0.4874]) -Greedy action tensor([ 0.3649, -1.0767, 1.1441, -0.4053]) tensor([0.2578, 0.0610, 0.5619, 0.1193]) -Greedy action tensor([ 0.9922, -1.0948, 0.2687, 0.7246]) tensor([0.4212, 0.0523, 0.2043, 0.3223]) -Greedy action tensor([-1.4433, -0.8812, -0.8678, 0.1607]) tensor([0.1052, 0.1846, 0.1871, 0.5232]) -Greedy action tensor([ 0.2938, -1.4560, -0.0062, -0.6803]) tensor([0.4363, 0.0758, 0.3232, 0.1647]) -Greedy action tensor([-0.7983, -0.5221, -0.1144, 0.3003]) tensor([0.1370, 0.1806, 0.2715, 0.4110]) -Greedy action tensor([-0.5762, -0.4527, 0.6465, -0.2798]) tensor([0.1455, 0.1646, 0.4942, 0.1957]) -Greedy action tensor([ 1.1545, -1.6564, 2.0950, 0.2051]) tensor([0.2495, 0.0150, 0.6390, 0.0965]) -Greedy action tensor([-0.6956, -1.9456, -0.0476, 1.0994]) tensor([0.1085, 0.0311, 0.2074, 0.6530]) -Greedy action tensor([0.6934, 0.0774, 0.1627, 0.0777]) tensor([0.3747, 0.2024, 0.2204, 0.2025]) -Greedy action tensor([ 0.7610, -1.0597, 0.4920, 0.0915]) tensor([0.4102, 0.0664, 0.3134, 0.2100]) -Greedy action tensor([ 0.6305, -0.5042, 0.2391, 0.6940]) tensor([0.3265, 0.1050, 0.2207, 0.3478]) -Greedy action tensor([ 0.1604, -0.5936, 0.3132, 0.1841]) tensor([0.2732, 0.1286, 0.3184, 0.2798]) -Greedy action tensor([1.4754, 0.2319, 0.4071, 0.8668]) tensor([0.4595, 0.1325, 0.1579, 0.2500]) -Greedy action tensor([-0.1807, 0.4938, -0.6686, -0.0548]) tensor([0.2123, 0.4167, 0.1303, 0.2407]) -Greedy action tensor([-0.3306, -2.1524, 0.1311, -0.2319]) tensor([0.2596, 0.0420, 0.4119, 0.2865]) -Greedy action tensor([-0.2795, -0.4862, 0.1296, 0.0631]) tensor([0.2115, 0.1720, 0.3185, 0.2980]) -Greedy action tensor([ 1.7573, -0.3269, -0.9452, 0.3759]) tensor([0.6932, 0.0862, 0.0465, 0.1741]) -Greedy action tensor([ 0.2411, -0.1071, 0.3073, -0.1421]) tensor([0.2893, 0.2043, 0.3091, 0.1972]) -Greedy action tensor([-0.7306, -0.7418, 1.9272, -0.8924]) tensor([0.0585, 0.0578, 0.8340, 0.0497]) -Greedy action tensor([-0.2242, 0.6429, -0.2026, 0.4743]) tensor([0.1559, 0.3712, 0.1593, 0.3136]) -Greedy action tensor([0.3876, 0.1550, 0.1705, 0.0424]) tensor([0.3025, 0.2398, 0.2435, 0.2142]) -Greedy action tensor([ 0.0704, -0.4900, 0.9434, -0.3560]) tensor([0.2165, 0.1236, 0.5184, 0.1414]) -Greedy action tensor([ 0.7633, -1.4870, 1.6452, 0.7843]) tensor([0.2202, 0.0232, 0.5318, 0.2248]) -Greedy action tensor([ 0.5784, -0.0180, 0.5041, 1.5520]) tensor([0.1951, 0.1074, 0.1811, 0.5164]) -Greedy action tensor([ 1.2882, -1.2912, -0.5545, 0.9854]) tensor([0.5069, 0.0384, 0.0803, 0.3744]) -Greedy action tensor([ 1.2587, -2.2854, 0.2403, 0.5684]) tensor([0.5287, 0.0153, 0.1909, 0.2651]) -Greedy action tensor([-0.0878, -0.7676, 0.4123, 0.6165]) tensor([0.1931, 0.0979, 0.3184, 0.3906]) -Greedy action tensor([-0.5783, -0.2340, -1.5947, 0.9091]) tensor([0.1389, 0.1960, 0.0503, 0.6148]) -Greedy action tensor([-0.1006, -1.1561, -0.0084, -1.2133]) tensor([0.3606, 0.1255, 0.3954, 0.1185]) -Greedy action tensor([-0.0233, 0.0452, 0.9987, 0.0146]) tensor([0.1698, 0.1819, 0.4719, 0.1764]) -Greedy action tensor([ 1.0273, -1.8688, 0.0891, 0.9106]) tensor([0.4280, 0.0236, 0.1675, 0.3808]) -Greedy action tensor([ 0.2705, -1.1435, -0.4594, 0.8531]) tensor([0.2844, 0.0692, 0.1371, 0.5093]) -Greedy action tensor([-0.3055, -0.3799, 0.1901, -0.5014]) tensor([0.2277, 0.2114, 0.3738, 0.1872]) -Greedy action tensor([0.1003, 0.1133, 0.3576, 0.8882]) tensor([0.1816, 0.1840, 0.2349, 0.3994]) -Greedy action tensor([ 0.4074, -1.3382, 0.5599, 0.0036]) tensor([0.3325, 0.0580, 0.3873, 0.2221]) -Greedy action tensor([ 0.6034, -2.3250, 0.1242, -0.0055]) tensor([0.4511, 0.0241, 0.2794, 0.2454]) -Greedy action tensor([-0.0054, -0.8933, 0.0268, -0.6598]) tensor([0.3374, 0.1388, 0.3484, 0.1754]) -Greedy action tensor([-0.9354, -0.9775, 0.8688, -0.7938]) tensor([0.1089, 0.1044, 0.6614, 0.1254]) -Greedy action tensor([ 1.1896, -0.6759, 1.0053, 0.5076]) tensor([0.4013, 0.0621, 0.3337, 0.2029]) -Greedy action tensor([-0.0894, -2.2776, 1.6095, 0.0121]) tensor([0.1301, 0.0146, 0.7113, 0.1440]) -Greedy action tensor([ 0.8616, 0.1634, -0.5426, -0.4121]) tensor([0.4944, 0.2459, 0.1214, 0.1383]) -Greedy action tensor([ 0.1605, -1.0671, 0.4576, -1.2226]) tensor([0.3460, 0.1014, 0.4658, 0.0868]) -Greedy action tensor([ 0.6148, -1.0150, -0.7202, -0.3506]) tensor([0.5435, 0.1065, 0.1430, 0.2070]) -Greedy action tensor([-0.3210, 0.5185, 0.3594, -0.3014]) tensor([0.1585, 0.3669, 0.3130, 0.1616]) -Greedy action tensor([-0.4212, -1.0361, 0.5102, 0.7677]) tensor([0.1358, 0.0734, 0.3447, 0.4460]) -Greedy action tensor([-0.2546, -0.1329, 0.4711, -0.0700]) tensor([0.1852, 0.2092, 0.3827, 0.2228]) -Greedy action tensor([ 0.5675, -1.0876, 0.4454, 1.1488]) tensor([0.2588, 0.0494, 0.2290, 0.4628]) -Greedy action tensor([ 0.0803, -1.2426, 1.0245, 0.2104]) tensor([0.2010, 0.0535, 0.5166, 0.2289]) -Greedy action tensor([ 0.1722, -0.6902, 1.0055, 0.2607]) tensor([0.2077, 0.0877, 0.4778, 0.2269]) -Greedy action tensor([ 0.6692, -0.3201, -0.1517, 0.0876]) tensor([0.4218, 0.1568, 0.1856, 0.2358]) -Greedy action tensor([-0.2300, -0.8126, 0.6314, -0.3504]) tensor([0.2078, 0.1161, 0.4918, 0.1843]) -Greedy action tensor([ 0.4017, -0.2644, -0.0304, 1.6038]) tensor([0.1822, 0.0936, 0.1182, 0.6060]) -Greedy action tensor([ 0.5744, -1.3835, -0.0825, 0.1665]) tensor([0.4302, 0.0607, 0.2230, 0.2861]) -Greedy action tensor([-0.4885, -0.9958, 0.6530, -0.2996]) tensor([0.1683, 0.1013, 0.5270, 0.2033]) -Greedy action tensor([-0.9061, -0.6954, 0.7923, -1.0856]) tensor([0.1172, 0.1446, 0.6403, 0.0979]) -Greedy action tensor([ 0.7148, -1.4345, -0.5310, 1.0185]) tensor([0.3624, 0.0422, 0.1043, 0.4911]) -Greedy action tensor([-1.5521, -0.9800, 0.3860, -0.9979]) tensor([0.0873, 0.1547, 0.6062, 0.1519]) -Greedy action tensor([ 0.2014, -0.9060, -0.8275, 1.3150]) tensor([0.2113, 0.0698, 0.0755, 0.6434]) -Greedy action tensor([-0.5712, -1.4343, -0.4931, 0.2882]) tensor([0.2056, 0.0867, 0.2223, 0.4855]) -Greedy action tensor([ 0.2836, -0.8943, 0.5445, -0.5300]) tensor([0.3280, 0.1010, 0.4257, 0.1454]) -Greedy action tensor([ 1.6336, -1.2244, 1.4897, 0.8812]) tensor([0.4176, 0.0240, 0.3616, 0.1968]) -Greedy action tensor([-0.9625, -0.2229, 1.5316, -0.4224]) tensor([0.0591, 0.1238, 0.7157, 0.1014]) -Greedy action tensor([-1.6536, -0.0380, -1.2904, 0.1507]) tensor([0.0738, 0.3714, 0.1062, 0.4486]) -Greedy action tensor([ 0.8720, -0.5960, -0.1532, 0.9556]) tensor([0.3737, 0.0861, 0.1340, 0.4062]) -Greedy action tensor([1.3655, 0.5280, 0.7571, 0.5611]) tensor([0.4125, 0.1785, 0.2245, 0.1845]) -Greedy action tensor([-0.0875, -0.1179, -0.3885, 0.7009]) tensor([0.2037, 0.1976, 0.1507, 0.4480]) -Greedy action tensor([-0.0264, 0.3063, -0.3482, -0.2640]) tensor([0.2559, 0.3569, 0.1855, 0.2018]) -Greedy action tensor([ 0.8140, -1.7212, -0.0504, 1.1763]) tensor([0.3405, 0.0270, 0.1434, 0.4891]) -Greedy action tensor([ 0.5164, -1.0839, 0.1384, -0.0515]) tensor([0.4075, 0.0823, 0.2793, 0.2310]) -Greedy action tensor([ 0.3169, -1.6665, -0.5375, -0.2223]) tensor([0.4659, 0.0641, 0.1983, 0.2717]) -Greedy action tensor([ 0.0634, -1.7396, -0.4331, 0.3101]) tensor([0.3275, 0.0540, 0.1993, 0.4192]) -Greedy action tensor([ 1.0730, -0.6912, -0.4987, -0.2883]) tensor([0.6115, 0.1048, 0.1270, 0.1567]) -Greedy action tensor([-1.8999, -0.4075, 0.6404, -0.1434]) tensor([0.0418, 0.1859, 0.5302, 0.2421]) -Greedy action tensor([-0.6904, -0.2402, -0.4000, -0.2493]) tensor([0.1831, 0.2873, 0.2449, 0.2847]) -Greedy action tensor([-1.4760, 0.5970, 0.3052, 0.2320]) tensor([0.0490, 0.3896, 0.2910, 0.2704]) -Greedy action tensor([-1.8738, -0.4669, 0.6359, -0.1448]) tensor([0.0434, 0.1774, 0.5344, 0.2448]) -Greedy action tensor([-1.5839, -0.5061, 0.6365, 0.1488]) tensor([0.0532, 0.1563, 0.4898, 0.3008]) -Greedy action tensor([-1.4481, 0.0431, 0.4991, 0.1079]) tensor([0.0582, 0.2584, 0.4077, 0.2757]) -Greedy action tensor([-1.7449, -0.3748, 0.6049, 0.0031]) tensor([0.0473, 0.1860, 0.4954, 0.2714]) -Greedy action tensor([-1.8879, -0.4576, 0.6449, -0.1502]) tensor([0.0426, 0.1782, 0.5368, 0.2424]) -Greedy action tensor([-1.8948, -0.3369, 0.6300, -0.1571]) tensor([0.0418, 0.1985, 0.5220, 0.2376]) -Greedy action tensor([-1.1627, 0.0084, 0.1906, 0.1490]) tensor([0.0847, 0.2732, 0.3278, 0.3144]) -Greedy action tensor([-1.9179, -0.3238, 0.6301, -0.1542]) tensor([0.0407, 0.2006, 0.5209, 0.2377]) -Greedy action tensor([-1.9101, -0.4591, 0.6542, -0.1611]) tensor([0.0417, 0.1777, 0.5411, 0.2395]) -Greedy action tensor([-0.5830, -0.0327, -0.1354, 0.1151]) tensor([0.1585, 0.2748, 0.2480, 0.3186]) -Greedy action tensor([-1.0441, -0.4307, 0.5438, 0.8402]) tensor([0.0698, 0.1289, 0.3417, 0.4596]) -Greedy action tensor([-0.5585, -0.2254, 0.1898, -0.0461]) tensor([0.1619, 0.2258, 0.3421, 0.2702]) -Greedy action tensor([-1.3772, -0.6084, 0.3678, 0.1559]) tensor([0.0740, 0.1596, 0.4237, 0.3427]) -Greedy action tensor([-1.9063, -0.4561, 0.6535, -0.1589]) tensor([0.0418, 0.1781, 0.5403, 0.2398]) -Greedy action tensor([-1.9417, -0.4486, 0.6662, -0.1777]) tensor([0.0402, 0.1791, 0.5459, 0.2348]) -Greedy action tensor([-1.9051, -0.4134, 0.6442, -0.1616]) tensor([0.0417, 0.1855, 0.5341, 0.2386]) -Greedy action tensor([-1.8724, -0.4188, 0.6289, -0.1477]) tensor([0.0433, 0.1853, 0.5283, 0.2430]) -Greedy action tensor([-1.9390, -0.4441, 0.6669, -0.1752]) tensor([0.0403, 0.1795, 0.5453, 0.2349]) -Greedy action tensor([-1.6482, -0.5053, 0.4727, -0.0211]) tensor([0.0569, 0.1785, 0.4748, 0.2898]) -Greedy action tensor([-1.9385, -0.4505, 0.6662, -0.1757]) tensor([0.0403, 0.1787, 0.5458, 0.2352]) -Greedy action tensor([-1.8440, -0.3713, 0.6082, -0.1314]) tensor([0.0444, 0.1937, 0.5158, 0.2462]) -Greedy action tensor([-1.9403, -0.4446, 0.6637, -0.1778]) tensor([0.0403, 0.1799, 0.5449, 0.2349]) -Greedy action tensor([-1.1404, -0.6347, 0.3220, 0.0020]) tensor([0.0989, 0.1640, 0.4270, 0.3101]) -Greedy action tensor([-1.6776, -0.2080, 0.4981, -0.0162]) tensor([0.0515, 0.2238, 0.4535, 0.2712]) -Greedy action tensor([-1.8178, -0.0725, 0.5625, -0.0717]) tensor([0.0430, 0.2462, 0.4645, 0.2464]) -Greedy action tensor([-1.7552, -0.4802, 0.6124, 0.0311]) tensor([0.0471, 0.1687, 0.5030, 0.2812]) -Greedy action tensor([-0.8962, -0.7264, 0.3092, 0.3205]) tensor([0.1124, 0.1332, 0.3751, 0.3794]) -Greedy action tensor([-1.8189, -0.4548, 0.6120, -0.1148]) tensor([0.0459, 0.1796, 0.5221, 0.2524]) -Greedy action tensor([-1.3851, -0.5180, 0.7452, 0.6508]) tensor([0.0514, 0.1223, 0.4326, 0.3937]) -Greedy action tensor([-1.5965, -0.0073, 0.5561, 0.1355]) tensor([0.0496, 0.2431, 0.4270, 0.2804]) -Greedy action tensor([-1.8891, -0.4279, 0.6310, -0.1489]) tensor([0.0427, 0.1839, 0.5303, 0.2431]) -Greedy action tensor([-1.9459, -0.4495, 0.6678, -0.1811]) tensor([0.0401, 0.1789, 0.5470, 0.2340]) -Greedy action tensor([-1.2096, 0.2382, 0.2668, 0.3348]) tensor([0.0699, 0.2971, 0.3057, 0.3273]) -Greedy action tensor([-1.8926, -0.4693, 0.6396, -0.1592]) tensor([0.0427, 0.1774, 0.5379, 0.2419]) -Greedy action tensor([-1.6935, -0.3057, 0.5750, 0.0080]) tensor([0.0496, 0.1988, 0.4796, 0.2720]) -Greedy action tensor([-1.8498, -0.3995, 0.6420, -0.0770]) tensor([0.0430, 0.1835, 0.5200, 0.2534]) -Greedy action tensor([-1.5239, -0.5448, 0.5158, 0.1003]) tensor([0.0609, 0.1621, 0.4681, 0.3090]) -Greedy action tensor([-1.8354, -0.4478, 0.6148, -0.1238]) tensor([0.0452, 0.1810, 0.5237, 0.2502]) -Greedy action tensor([-1.6626, -0.0412, 0.5505, -0.0219]) tensor([0.0491, 0.2485, 0.4490, 0.2533]) -Greedy action tensor([-1.8698, -0.3016, 0.6113, -0.1635]) tensor([0.0430, 0.2063, 0.5139, 0.2368]) -Greedy action tensor([-1.9459, -0.4516, 0.6690, -0.1803]) tensor([0.0401, 0.1785, 0.5473, 0.2341]) -Greedy action tensor([-1.4574, -0.5666, 0.4435, 0.1989]) tensor([0.0651, 0.1586, 0.4354, 0.3410]) -Greedy action tensor([-1.9472, -0.4506, 0.6682, -0.1821]) tensor([0.0400, 0.1788, 0.5473, 0.2339]) -Greedy action tensor([-1.9327, -0.4501, 0.6688, -0.1741]) tensor([0.0405, 0.1784, 0.5461, 0.2351]) -Greedy action tensor([-1.9224, -0.4264, 0.6526, -0.1699]) tensor([0.0410, 0.1832, 0.5390, 0.2368]) -Greedy action tensor([-1.0690, -0.4515, -0.1783, -0.2865]) tensor([0.1337, 0.2480, 0.3259, 0.2925]) -Greedy action tensor([-1.7488, 0.0620, 0.4899, -0.0267]) tensor([0.0453, 0.2768, 0.4246, 0.2533]) -Greedy action tensor([-1.8632, -0.3024, 0.6098, -0.1172]) tensor([0.0428, 0.2040, 0.5078, 0.2454]) -Greedy action tensor([-1.1181, 0.1002, 0.1605, 0.2168]) tensor([0.0849, 0.2872, 0.3051, 0.3228]) -Greedy action tensor([-1.9430, -0.4500, 0.6671, -0.1790]) tensor([0.0402, 0.1788, 0.5465, 0.2345]) -Greedy action tensor([-1.9361, -0.4471, 0.6660, -0.1735]) tensor([0.0404, 0.1791, 0.5451, 0.2354]) -Greedy action tensor([-1.9362, -0.4466, 0.6636, -0.1757]) tensor([0.0405, 0.1795, 0.5447, 0.2353]) -Greedy action tensor([-1.7350, -0.2616, 0.5393, -0.0996]) tensor([0.0495, 0.2159, 0.4809, 0.2538]) -Greedy action tensor([-1.7661, -0.4603, 0.5886, -0.0743]) tensor([0.0484, 0.1787, 0.5100, 0.2629]) -Greedy action tensor([-1.8173, -0.4295, 0.5966, -0.1024]) tensor([0.0460, 0.1843, 0.5141, 0.2556]) -Greedy action tensor([-1.8705, -0.4557, 0.6373, -0.1398]) tensor([0.0434, 0.1787, 0.5329, 0.2450]) -Greedy action tensor([-1.9167, -0.4418, 0.6552, -0.1640]) tensor([0.0413, 0.1804, 0.5402, 0.2381]) -Greedy action tensor([-1.8363, -0.4095, 0.6695, -0.0782]) tensor([0.0431, 0.1794, 0.5277, 0.2498]) -Greedy action tensor([-1.3982, -0.5073, 0.4224, -0.0571]) tensor([0.0744, 0.1814, 0.4596, 0.2846]) -Greedy action tensor([-1.6661, -0.3023, 0.5105, -0.0108]) tensor([0.0527, 0.2063, 0.4650, 0.2761]) -Greedy action tensor([-1.8939, -0.4217, 0.6368, -0.1526]) tensor([0.0423, 0.1845, 0.5317, 0.2415]) -Greedy action tensor([-1.8930, -0.3651, 0.6246, -0.1486]) tensor([0.0421, 0.1942, 0.5225, 0.2412]) -Greedy action tensor([-1.9294, -0.4454, 0.6610, -0.1684]) tensor([0.0407, 0.1796, 0.5429, 0.2369]) -Greedy action tensor([-1.9140, -0.4473, 0.6506, -0.1638]) tensor([0.0415, 0.1800, 0.5395, 0.2390]) -Greedy action tensor([-1.8306, -0.4788, 0.6157, -0.0900]) tensor([0.0452, 0.1748, 0.5222, 0.2578]) -Greedy action tensor([-1.8996, -0.4367, 0.6450, -0.1555]) tensor([0.0421, 0.1816, 0.5357, 0.2406]) -Greedy action tensor([-1.9028, -0.3569, 0.6312, -0.1552]) tensor([0.0416, 0.1952, 0.5244, 0.2388]) -Greedy action tensor([-1.1592, -0.4358, 0.3546, 0.0089]) tensor([0.0924, 0.1905, 0.4199, 0.2972]) -Greedy action tensor([-1.8857, -0.3983, 0.6446, -0.1460]) tensor([0.0422, 0.1869, 0.5303, 0.2405]) -Greedy action tensor([-1.1012, -0.2981, 0.3230, -0.0452]) tensor([0.0974, 0.2175, 0.4048, 0.2802]) -Greedy action tensor([-1.7629, 0.0965, 0.4956, -0.0159]) tensor([0.0440, 0.2825, 0.4210, 0.2525]) -Greedy action tensor([-1.8753, -0.4298, 0.6318, -0.1430]) tensor([0.0432, 0.1832, 0.5296, 0.2440]) -Greedy action tensor([-1.9094, -0.3499, 0.6371, -0.1476]) tensor([0.0411, 0.1954, 0.5243, 0.2392]) -Greedy action tensor([-1.8051, 0.0328, 0.5268, -0.0497]) tensor([0.0428, 0.2689, 0.4407, 0.2476]) -Greedy action tensor([-1.8712, -0.4415, 0.6781, -0.1174]) tensor([0.0421, 0.1759, 0.5388, 0.2432]) -Greedy action tensor([-1.6306, -0.1943, 0.5819, 0.2167]) tensor([0.0483, 0.2033, 0.4418, 0.3066]) -Greedy action tensor([-1.8614, -0.3748, 0.6285, -0.1295]) tensor([0.0432, 0.1911, 0.5213, 0.2443]) -Greedy action tensor([-1.8830, -0.4553, 0.6403, -0.1495]) tensor([0.0429, 0.1789, 0.5352, 0.2429]) -Greedy action tensor([ 1.4637, -0.2081, -0.7510, 0.5103]) tensor([0.5943, 0.1117, 0.0649, 0.2291]) -Greedy action tensor([ 1.8345, 0.4241, 0.2969, -0.3732]) tensor([0.6374, 0.1555, 0.1370, 0.0701]) -Greedy action tensor([ 0.8611, -0.2954, -0.3808, -0.0163]) tensor([0.4952, 0.1558, 0.1430, 0.2059]) -Greedy action tensor([ 1.1553, -0.5690, -0.6417, 0.6878]) tensor([0.5074, 0.0905, 0.0841, 0.3180]) -Greedy action tensor([ 1.1412, 0.0182, -0.5143, 0.3372]) tensor([0.5092, 0.1656, 0.0973, 0.2279]) -Greedy action tensor([ 1.3051, -0.5834, 0.0625, 0.3593]) tensor([0.5469, 0.0828, 0.1579, 0.2124]) -Greedy action tensor([ 1.8395, -0.7215, -0.0955, 0.2617]) tensor([0.7002, 0.0541, 0.1011, 0.1446]) -Greedy action tensor([ 1.5002, -0.5159, -0.7552, 0.5437]) tensor([0.6164, 0.0821, 0.0646, 0.2369]) -Greedy action tensor([ 1.8541, -1.0338, -0.7003, 1.3784]) tensor([0.5698, 0.0317, 0.0443, 0.3541]) -Greedy action tensor([ 0.9540, -0.6121, -0.2387, 0.3688]) tensor([0.4833, 0.1009, 0.1466, 0.2692]) -Greedy action tensor([ 1.4953, -0.4518, -0.4312, 0.2766]) tensor([0.6313, 0.0901, 0.0920, 0.1866]) -Greedy action tensor([ 1.1217, -0.2248, -0.0882, 0.1139]) tensor([0.5199, 0.1353, 0.1550, 0.1898]) -Greedy action tensor([ 1.7232, 0.1909, 0.0261, -0.0661]) tensor([0.6384, 0.1379, 0.1170, 0.1067]) -Greedy action tensor([ 0.9079, -0.3597, -0.3119, 0.1950]) tensor([0.4838, 0.1362, 0.1429, 0.2372]) -Greedy action tensor([ 2.3039, -0.6110, -0.3899, 0.7890]) tensor([0.7453, 0.0404, 0.0504, 0.1639]) -Greedy action tensor([ 1.1585, -0.3601, -0.3008, 0.2565]) tensor([0.5384, 0.1179, 0.1251, 0.2185]) -Greedy action tensor([ 1.3581, -0.3298, -0.5782, 0.2391]) tensor([0.6040, 0.1117, 0.0871, 0.1973]) -Greedy action tensor([ 1.9899, 0.2906, -0.5448, 0.2953]) tensor([0.6917, 0.1264, 0.0548, 0.1270]) -Greedy action tensor([ 1.7682, -0.4101, -0.6466, 0.3812]) tensor([0.6885, 0.0780, 0.0615, 0.1720]) -Greedy action tensor([ 1.5030, 0.1898, -0.3376, 0.4115]) tensor([0.5671, 0.1525, 0.0900, 0.1904]) -Greedy action tensor([ 1.2687, -0.2541, -0.2518, 0.0662]) tensor([0.5756, 0.1256, 0.1258, 0.1730]) -Greedy action tensor([ 1.0075, -0.4622, -0.1676, 0.1708]) tensor([0.5071, 0.1166, 0.1566, 0.2196]) -Greedy action tensor([ 1.4332, -0.8644, -0.2430, 0.8236]) tensor([0.5461, 0.0549, 0.1022, 0.2968]) -Greedy action tensor([ 1.5787, -0.2033, -0.3494, 0.1698]) tensor([0.6418, 0.1080, 0.0933, 0.1569]) -Greedy action tensor([ 1.6016, -0.6525, -0.5696, 0.3586]) tensor([0.6633, 0.0696, 0.0756, 0.1914]) -Greedy action tensor([ 1.8425, -0.7352, -0.0757, 0.7355]) tensor([0.6438, 0.0489, 0.0945, 0.2128]) -Greedy action tensor([ 1.8428, -0.5815, 0.0030, 0.3217]) tensor([0.6822, 0.0604, 0.1084, 0.1490]) -Greedy action tensor([ 1.9336, -0.9230, -0.1442, 0.3052]) tensor([0.7252, 0.0417, 0.0908, 0.1423]) -Greedy action tensor([ 1.4675, -0.3525, -0.2989, 0.3808]) tensor([0.5987, 0.0970, 0.1023, 0.2020]) -Greedy action tensor([ 1.6697, -0.5399, -0.7617, 0.3896]) tensor([0.6777, 0.0744, 0.0596, 0.1884]) -Greedy action tensor([ 1.1446, -0.2275, -0.2936, 0.4056]) tensor([0.5080, 0.1288, 0.1206, 0.2426]) -Greedy action tensor([ 1.5430, -0.8023, 0.1449, -0.1010]) tensor([0.6510, 0.0624, 0.1608, 0.1258]) -Greedy action tensor([ 1.4718, -0.4215, -0.2906, 0.4581]) tensor([0.5934, 0.0894, 0.1019, 0.2153]) -Greedy action tensor([ 1.5343, 0.0188, -0.6182, 0.3372]) tensor([0.6105, 0.1341, 0.0709, 0.1844]) -Greedy action tensor([ 1.9758, -0.5147, -0.6521, 0.3993]) tensor([0.7343, 0.0609, 0.0530, 0.1518]) -Greedy action tensor([ 1.7658, -0.9191, -0.3059, 0.0455]) tensor([0.7282, 0.0497, 0.0917, 0.1304]) -Greedy action tensor([ 1.6292, -0.0140, -0.4784, 0.0295]) tensor([0.6593, 0.1275, 0.0801, 0.1331]) -Greedy action tensor([ 1.6669, -0.6398, -0.3197, 0.4546]) tensor([0.6518, 0.0649, 0.0894, 0.1939]) -Greedy action tensor([ 1.5085, -0.2437, -0.8321, 0.4965]) tensor([0.6123, 0.1062, 0.0589, 0.2226]) -Greedy action tensor([ 1.1744, -0.5636, -0.3074, 0.4900]) tensor([0.5242, 0.0922, 0.1191, 0.2644]) -Greedy action tensor([ 1.2513, -0.6351, -0.1466, 0.1308]) tensor([0.5798, 0.0879, 0.1433, 0.1891]) -Greedy action tensor([ 1.7203, -0.4246, -0.7474, 0.3735]) tensor([0.6840, 0.0801, 0.0580, 0.1779]) -Greedy action tensor([ 1.2176, -0.5576, -0.2788, 0.6616]) tensor([0.5084, 0.0862, 0.1139, 0.2916]) -Greedy action tensor([ 1.5852, -0.6971, -0.1767, 0.4607]) tensor([0.6256, 0.0638, 0.1074, 0.2032]) -Greedy action tensor([ 1.5521, -0.1421, -0.5410, 0.3019]) tensor([0.6275, 0.1153, 0.0774, 0.1798]) -Greedy action tensor([ 1.7556, -0.5605, -0.5524, 0.4962]) tensor([0.6748, 0.0666, 0.0671, 0.1915]) -Greedy action tensor([ 1.3812, -0.2258, -0.1164, 0.3608]) tensor([0.5604, 0.1123, 0.1253, 0.2020]) -Greedy action tensor([ 0.8904, -0.5986, 0.1504, -0.1590]) tensor([0.4871, 0.1099, 0.2324, 0.1706]) -Greedy action tensor([ 2.3395, -0.7371, -0.4748, 0.8245]) tensor([0.7542, 0.0348, 0.0452, 0.1658]) -Greedy action tensor([ 1.3040, -0.8108, 0.0294, 0.2268]) tensor([0.5745, 0.0693, 0.1606, 0.1956]) -Greedy action tensor([ 0.6309, -0.1388, 0.0183, -0.1881]) tensor([0.4088, 0.1894, 0.2216, 0.1802]) -Greedy action tensor([ 1.5469, -0.8049, -0.3384, 0.4007]) tensor([0.6390, 0.0608, 0.0970, 0.2031]) -Greedy action tensor([ 1.3054, 0.1724, -0.7602, 0.4322]) tensor([0.5358, 0.1726, 0.0679, 0.2238]) -Greedy action tensor([ 1.1962, -0.3016, -0.7148, 0.4857]) tensor([0.5368, 0.1200, 0.0794, 0.2638]) -Greedy action tensor([ 1.1943, -0.4623, -0.3341, 0.2621]) tensor([0.5551, 0.1059, 0.1204, 0.2185]) -Greedy action tensor([ 1.4800, -0.6428, -0.2625, -0.0225]) tensor([0.6590, 0.0789, 0.1154, 0.1467]) -Greedy action tensor([ 1.5425, -0.3646, -0.4316, 0.3612]) tensor([0.6273, 0.0931, 0.0871, 0.1925]) -Greedy action tensor([ 1.4095, -0.4776, -0.4064, 0.6737]) tensor([0.5576, 0.0845, 0.0907, 0.2672]) -Greedy action tensor([ 1.8272, -0.2281, -0.1703, 0.4229]) tensor([0.6626, 0.0848, 0.0899, 0.1627]) -Greedy action tensor([ 0.7846, -0.0034, -0.5611, 0.1306]) tensor([0.4474, 0.2035, 0.1165, 0.2326]) -Greedy action tensor([ 0.6865, -0.2607, -0.0580, 0.3350]) tensor([0.3896, 0.1511, 0.1851, 0.2742]) -Greedy action tensor([ 0.7587, -0.3509, -0.3718, 0.4999]) tensor([0.4124, 0.1360, 0.1332, 0.3184]) -Greedy action tensor([ 1.1083, -0.7820, 0.2803, -0.0073]) tensor([0.5220, 0.0788, 0.2281, 0.1711]) -Greedy action tensor([ 1.7050, -0.8269, -0.0511, 0.2029]) tensor([0.6780, 0.0539, 0.1171, 0.1510]) -Greedy action tensor([ 1.3361, -0.4935, -0.1983, 0.4243]) tensor([0.5625, 0.0903, 0.1213, 0.2260]) -Greedy action tensor([ 1.3993, -0.4999, -0.4046, 0.8375]) tensor([0.5307, 0.0794, 0.0874, 0.3025]) -Greedy action tensor([ 1.5369, -0.4188, -0.4809, 0.2605]) tensor([0.6437, 0.0911, 0.0856, 0.1796]) -Greedy action tensor([ 1.0799, -0.6191, -0.1924, 0.3101]) tensor([0.5192, 0.0949, 0.1455, 0.2404]) -Greedy action tensor([ 1.0279, -0.3742, 0.0676, 0.1211]) tensor([0.4920, 0.1211, 0.1883, 0.1987]) -Greedy action tensor([ 1.1474, 0.0788, -0.3614, -0.2863]) tensor([0.5546, 0.1905, 0.1227, 0.1322]) -Greedy action tensor([ 1.9477, 0.1331, -0.5711, 0.3126]) tensor([0.6952, 0.1133, 0.0560, 0.1355]) -Greedy action tensor([ 0.7090, -0.2829, 0.0277, 0.2585]) tensor([0.3977, 0.1475, 0.2012, 0.2535]) -Greedy action tensor([ 1.2962, -0.3202, -0.2120, 0.1664]) tensor([0.5737, 0.1140, 0.1270, 0.1854]) -Greedy action tensor([ 1.0846, -0.2109, -0.2683, 0.3031]) tensor([0.5025, 0.1376, 0.1299, 0.2300]) -Greedy action tensor([ 1.4113, -0.2961, -1.2219, 0.7170]) tensor([0.5706, 0.1035, 0.0410, 0.2850]) -Greedy action tensor([ 1.3455, -0.9467, 0.0173, 0.2576]) tensor([0.5872, 0.0593, 0.1556, 0.1979]) -Greedy action tensor([ 1.1423, -0.4435, -0.4389, 0.5891]) tensor([0.5036, 0.1031, 0.1036, 0.2896]) -Greedy action tensor([ 1.8134, -0.8527, -0.7277, 0.5576]) tensor([0.6978, 0.0485, 0.0550, 0.1987]) -Greedy action tensor([ 1.4322, 0.2633, -0.0156, 0.3384]) tensor([0.5317, 0.1652, 0.1250, 0.1781]) -Greedy action tensor([ 1.3253, 0.1005, -0.3410, -0.1928]) tensor([0.5876, 0.1726, 0.1110, 0.1288]) -Greedy action tensor([ 1.5733, 0.4919, -0.6413, 0.4250]) tensor([0.5664, 0.1921, 0.0619, 0.1796]) -Greedy action tensor([ 0.8501, -0.7029, 0.0412, -0.2830]) tensor([0.5053, 0.1069, 0.2250, 0.1627]) -Greedy action tensor([ 0.4109, -0.1219, 0.0259, -0.2023]) tensor([0.3560, 0.2089, 0.2422, 0.1928]) -Greedy action tensor([ 0.8348, -0.6296, -0.0449, -0.3698]) tensor([0.5139, 0.1188, 0.2132, 0.1541]) -Greedy action tensor([ 0.7184, -0.3501, -0.0248, -0.1713]) tensor([0.4484, 0.1541, 0.2133, 0.1842]) -Greedy action tensor([ 0.2834, -0.1141, -0.0843, -0.1842]) tensor([0.3344, 0.2247, 0.2315, 0.2095]) -Greedy action tensor([ 0.8438, -0.4645, 0.0225, -0.3327]) tensor([0.4954, 0.1339, 0.2179, 0.1528]) -Greedy action tensor([ 0.8377, -0.4627, -0.1025, -0.0920]) tensor([0.4860, 0.1324, 0.1898, 0.1918]) -Greedy action tensor([ 0.8478, -0.8237, -0.0289, -0.4211]) tensor([0.5304, 0.0997, 0.2207, 0.1491]) -Greedy action tensor([ 0.9954, -0.5476, -0.0504, -0.8226]) tensor([0.5789, 0.1237, 0.2034, 0.0940]) -Greedy action tensor([ 0.4603, -0.3166, -0.1041, -0.1862]) tensor([0.3918, 0.1802, 0.2228, 0.2052]) -Greedy action tensor([ 0.5501, -0.4093, 0.2044, -0.5416]) tensor([0.4121, 0.1579, 0.2917, 0.1383]) -Greedy action tensor([ 0.7636, -0.4626, 0.0060, -0.4410]) tensor([0.4850, 0.1423, 0.2273, 0.1454]) -Greedy action tensor([ 0.6113, -0.3080, -0.0603, -0.2465]) tensor([0.4285, 0.1709, 0.2189, 0.1817]) -Greedy action tensor([ 0.5473, -0.3559, -0.1598, -0.2477]) tensor([0.4256, 0.1725, 0.2098, 0.1922]) -Greedy action tensor([ 0.5977, -0.4476, -0.0780, -0.1352]) tensor([0.4272, 0.1502, 0.2174, 0.2053]) -Greedy action tensor([ 0.5239, 0.1891, -0.0758, 0.0068]) tensor([0.3496, 0.2501, 0.1919, 0.2084]) -Greedy action tensor([ 0.5611, -0.1722, -0.0414, -0.0461]) tensor([0.3887, 0.1867, 0.2128, 0.2118]) -Greedy action tensor([ 0.5099, 0.0876, -0.1522, -0.1291]) tensor([0.3705, 0.2429, 0.1911, 0.1955]) -Greedy action tensor([ 1.0959, -0.7087, -0.0705, -0.4757]) tensor([0.5939, 0.0977, 0.1850, 0.1234]) -Greedy action tensor([ 0.4447, -0.2114, 0.0025, -0.1697]) tensor([0.3700, 0.1920, 0.2378, 0.2002]) -Greedy action tensor([ 0.8476, -0.5839, -0.0264, -0.2415]) tensor([0.5018, 0.1199, 0.2094, 0.1689]) -Greedy action tensor([ 1.0884, -0.7395, -0.2400, -0.3844]) tensor([0.6043, 0.0971, 0.1601, 0.1385]) -Greedy action tensor([ 0.3147, 0.4678, -0.1203, 0.0944]) tensor([0.2766, 0.3224, 0.1790, 0.2219]) -Greedy action tensor([ 0.8139, -0.4084, -0.0906, -0.3664]) tensor([0.4984, 0.1468, 0.2017, 0.1531]) -Greedy action tensor([ 0.5773, -0.1763, -0.0501, -0.2570]) tensor([0.4100, 0.1930, 0.2190, 0.1780]) -Greedy action tensor([ 0.8524, -0.5331, -0.1051, -0.2905]) tensor([0.5120, 0.1281, 0.1965, 0.1633]) -Greedy action tensor([ 0.8306, -0.6025, 0.0160, -0.5052]) tensor([0.5143, 0.1227, 0.2277, 0.1352]) -Greedy action tensor([ 0.8193, -0.4978, 0.0219, -0.1119]) tensor([0.4734, 0.1268, 0.2133, 0.1865]) -Greedy action tensor([ 0.8149, -0.4535, -0.1682, -0.2933]) tensor([0.5036, 0.1417, 0.1884, 0.1663]) -Greedy action tensor([ 0.6820, -0.5396, -0.0765, -0.1740]) tensor([0.4570, 0.1347, 0.2141, 0.1942]) -Greedy action tensor([ 0.4796, -0.1244, -0.1057, -0.1150]) tensor([0.3766, 0.2059, 0.2097, 0.2078]) -Greedy action tensor([ 0.5141, -0.2323, -0.0926, -0.1912]) tensor([0.3979, 0.1886, 0.2169, 0.1965]) -Greedy action tensor([ 0.7547, -0.3868, -0.0457, -0.2972]) tensor([0.4722, 0.1508, 0.2121, 0.1649]) -Greedy action tensor([ 0.6372, -0.3568, 0.0435, -0.1974]) tensor([0.4244, 0.1571, 0.2344, 0.1842]) -Greedy action tensor([ 0.7025, -0.4826, -0.1006, -0.4402]) tensor([0.4825, 0.1475, 0.2161, 0.1539]) -Greedy action tensor([ 0.9166, -0.4494, -0.2578, -0.5620]) tensor([0.5580, 0.1424, 0.1724, 0.1272]) -Greedy action tensor([ 0.4952, 0.0893, -0.1632, -0.1750]) tensor([0.3710, 0.2472, 0.1920, 0.1898]) -Greedy action tensor([ 1.0632, -0.8796, 0.1500, -0.6862]) tensor([0.5819, 0.0834, 0.2335, 0.1012]) -Greedy action tensor([ 0.7645, -0.4275, -0.0010, -0.2583]) tensor([0.4699, 0.1427, 0.2185, 0.1690]) -Greedy action tensor([ 0.8193, -0.3961, -0.1649, -0.4527]) tensor([0.5126, 0.1521, 0.1916, 0.1437]) -Greedy action tensor([ 0.8090, -0.7563, -0.0953, -0.2525]) tensor([0.5103, 0.1067, 0.2066, 0.1765]) -Greedy action tensor([ 0.6253, -0.3003, -0.0073, -0.1722]) tensor([0.4205, 0.1667, 0.2234, 0.1894]) -Greedy action tensor([ 0.8088, -0.6974, -0.1060, -0.5066]) tensor([0.5289, 0.1173, 0.2119, 0.1419]) -Greedy action tensor([ 0.6956, -0.5480, -0.0519, -0.3797]) tensor([0.4755, 0.1371, 0.2252, 0.1622]) -Greedy action tensor([ 0.7660, -0.5117, 0.0411, -0.3298]) tensor([0.4768, 0.1329, 0.2309, 0.1594]) -Greedy action tensor([ 0.8692, -0.9150, 0.4977, -0.6771]) tensor([0.4829, 0.0811, 0.3331, 0.1029]) -Greedy action tensor([ 0.8558, -0.4307, -0.1472, -0.2925]) tensor([0.5102, 0.1409, 0.1871, 0.1618]) -Greedy action tensor([ 0.4846, -0.4562, -0.1820, -0.1635]) tensor([0.4121, 0.1608, 0.2116, 0.2155]) -Greedy action tensor([ 0.8808, -0.3601, -0.0888, -0.1190]) tensor([0.4911, 0.1420, 0.1862, 0.1807]) -Greedy action tensor([ 0.5455, -0.2259, -0.1815, -0.2553]) tensor([0.4176, 0.1931, 0.2018, 0.1875]) -Greedy action tensor([ 0.7126, -0.5755, -0.0879, -0.3988]) tensor([0.4869, 0.1343, 0.2186, 0.1602]) -Greedy action tensor([ 0.7721, -0.5959, -0.0458, -0.3565]) tensor([0.4952, 0.1261, 0.2185, 0.1602]) -Greedy action tensor([ 4.5672e-01, -2.4063e-01, -3.6350e-04, -4.0499e-01]) tensor([0.3916, 0.1950, 0.2479, 0.1654]) -Greedy action tensor([ 0.9975, -0.5303, -0.0984, -0.3323]) tensor([0.5507, 0.1195, 0.1841, 0.1457]) -Greedy action tensor([ 1.0661, -0.5257, -0.1314, -0.4761]) tensor([0.5816, 0.1184, 0.1756, 0.1244]) -Greedy action tensor([ 0.7583, -0.2740, 0.0709, -0.3364]) tensor([0.4558, 0.1624, 0.2292, 0.1525]) -Greedy action tensor([ 1.0560, -0.9739, 0.0311, -0.6046]) tensor([0.5952, 0.0782, 0.2136, 0.1131]) -Greedy action tensor([ 0.9608, -0.6148, -0.0692, -0.4026]) tensor([0.5495, 0.1137, 0.1962, 0.1406]) -Greedy action tensor([ 1.1665, -0.7680, -0.1286, -0.5203]) tensor([0.6236, 0.0901, 0.1708, 0.1154]) -Greedy action tensor([ 0.3463, -0.1329, -0.0671, -0.2369]) tensor([0.3523, 0.2181, 0.2330, 0.1966]) -Greedy action tensor([ 0.9299, -0.4733, 0.0780, -0.5348]) tensor([0.5253, 0.1291, 0.2241, 0.1214]) -Greedy action tensor([ 0.5616, -0.0231, -0.1168, -0.0303]) tensor([0.3820, 0.2129, 0.1938, 0.2113]) -Greedy action tensor([ 0.6493, -0.0880, -0.0823, -0.1583]) tensor([0.4157, 0.1989, 0.2000, 0.1854]) -Greedy action tensor([ 1.0473, -0.8493, -0.0886, -0.4281]) tensor([0.5883, 0.0883, 0.1889, 0.1345]) -Greedy action tensor([ 0.3996, -0.2570, 0.1350, -0.1995]) tensor([0.3527, 0.1829, 0.2707, 0.1937]) -Greedy action tensor([ 0.7089, -0.6231, -0.1636, -0.6243]) tensor([0.5140, 0.1357, 0.2148, 0.1355]) -Greedy action tensor([ 0.6143, -0.5604, -0.1004, -0.0734]) tensor([0.4346, 0.1343, 0.2127, 0.2185]) -Greedy action tensor([ 0.5170, -0.4259, -0.0580, -0.3306]) tensor([0.4201, 0.1636, 0.2364, 0.1800]) -Greedy action tensor([ 0.7064, -0.4395, 0.1114, -0.5912]) tensor([0.4667, 0.1484, 0.2574, 0.1275]) -Greedy action tensor([ 0.6568, 0.0020, -0.4577, -0.3909]) tensor([0.4549, 0.2363, 0.1492, 0.1595]) -Greedy action tensor([ 0.8461, -0.4431, -0.1451, -0.2644]) tensor([0.5061, 0.1394, 0.1878, 0.1667]) -Greedy action tensor([ 0.5305, -0.1145, -0.1487, -0.0836]) tensor([0.3887, 0.2039, 0.1971, 0.2103]) -Greedy action tensor([ 0.7334, -0.1988, -0.2195, -0.3991]) tensor([0.4759, 0.1873, 0.1835, 0.1533]) -Greedy action tensor([ 0.6016, -0.3216, -0.0591, -0.0544]) tensor([0.4111, 0.1633, 0.2123, 0.2133]) -Greedy action tensor([ 0.7566, -0.3308, -0.0052, -0.4175]) tensor([0.4733, 0.1595, 0.2209, 0.1463]) -Greedy action tensor([ 0.3256, -0.0639, -0.0767, -0.0702]) tensor([0.3312, 0.2244, 0.2215, 0.2229]) -Greedy action tensor([ 1.2491, -0.6622, 0.0233, -0.3233]) tensor([0.6064, 0.0897, 0.1780, 0.1259]) -Greedy action tensor([ 1.1057, -0.7662, 0.0228, -0.5353]) tensor([0.5930, 0.0912, 0.2008, 0.1149]) -Greedy action tensor([ 0.6959, -0.0685, -0.0676, -0.4162]) tensor([0.4424, 0.2060, 0.2062, 0.1455]) -Greedy action tensor([ 0.5748, -0.2130, -0.0717, -0.2225]) tensor([0.4117, 0.1872, 0.2157, 0.1855]) -Greedy action tensor([ 0.9634, -0.5921, -0.0763, -0.3917]) tensor([0.5487, 0.1158, 0.1940, 0.1415]) -Greedy action tensor([-0.6116, -2.2883, -0.3316, 0.7464]) tensor([0.1563, 0.0292, 0.2068, 0.6077]) -Greedy action tensor([ 0.5209, -1.9471, -0.1850, 0.4689]) tensor([0.3956, 0.0335, 0.1953, 0.3756]) -Greedy action tensor([-0.1338, -1.3005, 0.6632, 0.4663]) tensor([0.1868, 0.0582, 0.4146, 0.3404]) -Greedy action tensor([-0.0675, -0.8873, 0.1290, 1.7048]) tensor([0.1171, 0.0516, 0.1425, 0.6889]) -Greedy action tensor([ 0.7051, -1.0485, 0.5767, 0.6529]) tensor([0.3331, 0.0577, 0.2930, 0.3162]) -Greedy action tensor([ 1.4678, -0.4271, 1.0183, 0.1572]) tensor([0.4859, 0.0730, 0.3100, 0.1310]) -Greedy action tensor([ 1.3023, 0.2588, -0.2682, -0.2193]) tensor([0.5623, 0.1980, 0.1169, 0.1228]) -Greedy action tensor([ 0.5372, -1.5255, 1.9709, -0.4719]) tensor([0.1759, 0.0224, 0.7376, 0.0641]) -Greedy action tensor([-0.0242, -0.3426, -0.0699, -0.2186]) tensor([0.2852, 0.2075, 0.2725, 0.2348]) -Greedy action tensor([ 0.5161, 0.2886, -0.6058, -0.4718]) tensor([0.4009, 0.3193, 0.1305, 0.1493]) -Greedy action tensor([ 0.2128, 0.3437, 1.1552, -0.8970]) tensor([0.1986, 0.2264, 0.5096, 0.0655]) -Greedy action tensor([ 0.5238, -0.9616, 0.2346, -0.2927]) tensor([0.4137, 0.0937, 0.3098, 0.1828]) -Greedy action tensor([ 1.4744, -0.3905, -0.0472, 0.7836]) tensor([0.5335, 0.0826, 0.1165, 0.2674]) -Greedy action tensor([-0.0666, 0.3927, 0.2883, -0.1361]) tensor([0.2024, 0.3203, 0.2886, 0.1888]) -Greedy action tensor([6.6060e-04, 2.3244e-01, 6.0986e-02, 8.1850e-01]) tensor([0.1789, 0.2256, 0.1901, 0.4054]) -Greedy action tensor([ 0.7860, -1.7407, 0.6597, 0.8215]) tensor([0.3336, 0.0267, 0.2940, 0.3457]) -Greedy action tensor([ 0.2545, -1.6143, -0.1467, 1.2618]) tensor([0.2192, 0.0338, 0.1468, 0.6002]) -Greedy action tensor([ 0.9963, 0.0448, 1.1742, -0.0332]) tensor([0.3404, 0.1314, 0.4066, 0.1216]) -Greedy action tensor([ 1.5470, -0.9520, -0.3127, -0.5000]) tensor([0.7315, 0.0601, 0.1139, 0.0945]) -Greedy action tensor([ 1.2034, -0.0356, -0.3278, 0.4749]) tensor([0.5029, 0.1457, 0.1088, 0.2427]) -Greedy action tensor([ 0.1257, -1.6341, 0.1875, 0.2161]) tensor([0.3003, 0.0517, 0.3194, 0.3287]) -Greedy action tensor([0.6878, 0.2876, 0.2007, 0.0502]) tensor([0.3555, 0.2382, 0.2184, 0.1879]) -Greedy action tensor([-0.1231, -1.6849, -0.2382, 0.5056]) tensor([0.2515, 0.0528, 0.2241, 0.4716]) -Greedy action tensor([ 0.2991, -0.8390, -0.7359, 0.4645]) tensor([0.3502, 0.1122, 0.1244, 0.4132]) -Greedy action tensor([-0.1545, -1.5667, -0.1457, -0.4943]) tensor([0.3373, 0.0822, 0.3403, 0.2402]) -Greedy action tensor([-0.7711, -1.0358, -0.8276, -0.2593]) tensor([0.2283, 0.1752, 0.2157, 0.3808]) -Greedy action tensor([-1.1259, -0.5163, 1.2663, -0.9642]) tensor([0.0669, 0.1230, 0.7315, 0.0786]) -Greedy action tensor([ 0.3590, -0.5963, -0.5606, 0.7999]) tensor([0.2996, 0.1153, 0.1195, 0.4657]) -Greedy action tensor([-0.8212, -0.3022, -0.3762, 0.2518]) tensor([0.1396, 0.2345, 0.2178, 0.4081]) -Greedy action tensor([ 0.0504, -0.7052, 0.9003, 1.0820]) tensor([0.1512, 0.0710, 0.3537, 0.4241]) -Greedy action tensor([ 0.0642, -0.1943, 0.8667, -0.6286]) tensor([0.2220, 0.1715, 0.4954, 0.1111]) -Greedy action tensor([ 0.6912, -0.8904, -0.4611, 1.2525]) tensor([0.3054, 0.0628, 0.0965, 0.5353]) -Greedy action tensor([-0.4591, -0.3175, -1.3447, -0.5715]) tensor([0.2892, 0.3332, 0.1193, 0.2584]) -Greedy action tensor([ 1.5457, 1.2490, 1.2284, -0.2093]) tensor([0.3782, 0.2811, 0.2754, 0.0654]) -Greedy action tensor([ 0.0603, -0.0273, -0.1068, -0.5927]) tensor([0.3046, 0.2791, 0.2577, 0.1585]) -Greedy action tensor([ 0.3464, -1.3215, -0.1321, 0.2302]) tensor([0.3706, 0.0699, 0.2296, 0.3299]) -Greedy action tensor([-0.1817, -1.8022, -0.5133, 1.3031]) tensor([0.1580, 0.0312, 0.1134, 0.6974]) -Greedy action tensor([-0.2346, 0.4221, 0.5576, -0.6257]) tensor([0.1720, 0.3318, 0.3799, 0.1163]) -Greedy action tensor([ 0.1073, 0.3740, 0.0052, -0.6224]) tensor([0.2710, 0.3538, 0.2447, 0.1306]) -Greedy action tensor([-1.2258, -0.3844, 0.4378, -0.6172]) tensor([0.0958, 0.2223, 0.5058, 0.1761]) -Greedy action tensor([ 0.5486, -1.7635, -0.4675, 0.0787]) tensor([0.4794, 0.0475, 0.1735, 0.2996]) -Greedy action tensor([-0.4796, -0.8599, 0.2864, 0.8407]) tensor([0.1319, 0.0902, 0.2838, 0.4940]) -Greedy action tensor([-0.9115, -0.8491, -0.1216, -0.0841]) tensor([0.1526, 0.1624, 0.3361, 0.3490]) -Greedy action tensor([ 0.5990, 0.0514, 0.4668, -0.0705]) tensor([0.3371, 0.1950, 0.2953, 0.1726]) -Greedy action tensor([-0.0915, -0.1863, 1.7890, -0.6858]) tensor([0.1109, 0.1009, 0.7271, 0.0612]) -Greedy action tensor([ 0.1887, -1.0087, 0.1690, -0.1379]) tensor([0.3329, 0.1005, 0.3264, 0.2402]) -Greedy action tensor([ 0.7084, -0.2289, 0.4715, 0.5301]) tensor([0.3314, 0.1298, 0.2615, 0.2773]) -Greedy action tensor([ 0.2307, 0.3186, -0.5272, 0.2198]) tensor([0.2817, 0.3076, 0.1320, 0.2787]) -Greedy action tensor([-0.2529, -0.4152, -0.5161, 0.1923]) tensor([0.2393, 0.2034, 0.1839, 0.3734]) -Greedy action tensor([ 1.7486, -1.1502, 0.8028, 1.0314]) tensor([0.5177, 0.0285, 0.2011, 0.2527]) -Greedy action tensor([ 0.2081, -0.8271, 0.3731, -1.3101]) tensor([0.3631, 0.1290, 0.4283, 0.0796]) -Greedy action tensor([-0.6767, -0.4889, 0.8144, -1.1945]) tensor([0.1380, 0.1665, 0.6132, 0.0822]) -Greedy action tensor([-0.4478, -0.2649, -0.3783, -0.5243]) tensor([0.2382, 0.2859, 0.2553, 0.2206]) -Greedy action tensor([ 0.6344, 0.1391, -0.5201, 0.9850]) tensor([0.2990, 0.1822, 0.0942, 0.4246]) -Greedy action tensor([ 0.4469, -0.1012, 0.6733, 0.0635]) tensor([0.2846, 0.1645, 0.3569, 0.1940]) -Greedy action tensor([ 1.5352, -1.3481, -0.1647, 0.9294]) tensor([0.5605, 0.0314, 0.1024, 0.3058]) -Greedy action tensor([ 0.1710, -1.2406, 0.6109, 0.0553]) tensor([0.2712, 0.0661, 0.4211, 0.2416]) -Greedy action tensor([ 1.6102, -0.1866, -0.1484, 0.4251]) tensor([0.6083, 0.1009, 0.1048, 0.1860]) -Greedy action tensor([0.1866, 0.2701, 1.0316, 0.1525]) tensor([0.1858, 0.2020, 0.4326, 0.1796]) -Greedy action tensor([ 0.2943, -0.6727, 1.9457, -0.9083]) tensor([0.1450, 0.0551, 0.7562, 0.0436]) -Greedy action tensor([-0.0715, -1.4604, 1.1973, 0.8515]) tensor([0.1366, 0.0341, 0.4857, 0.3437]) -Greedy action tensor([-0.3480, -0.6262, -0.1280, 1.6706]) tensor([0.0950, 0.0719, 0.1183, 0.7148]) -Greedy action tensor([ 0.3486, 0.3269, -0.4221, -0.7355]) tensor([0.3598, 0.3521, 0.1665, 0.1217]) -Greedy action tensor([-1.2582, -1.2753, 0.3229, -0.6552]) tensor([0.1153, 0.1134, 0.5605, 0.2108]) -Greedy action tensor([-0.4185, 0.1059, -0.0582, 0.1176]) tensor([0.1714, 0.2897, 0.2458, 0.2931]) -Greedy action tensor([ 0.6208, -1.2543, 1.7951, 0.5538]) tensor([0.1878, 0.0288, 0.6078, 0.1756]) -Greedy action tensor([ 0.2508, -0.1423, 1.0631, -0.1226]) tensor([0.2166, 0.1462, 0.4881, 0.1491]) -Greedy action tensor([-0.5010, -0.4263, -1.5891, 0.0522]) tensor([0.2408, 0.2594, 0.0811, 0.4187]) -Greedy action tensor([-0.0634, -1.6034, -0.3493, -0.3474]) tensor([0.3679, 0.0789, 0.2764, 0.2769]) -Greedy action tensor([ 0.0635, -0.4754, 0.1749, 1.4801]) tensor([0.1465, 0.0855, 0.1638, 0.6042]) -Greedy action tensor([ 0.4929, 0.1802, 0.2321, -0.6871]) tensor([0.3560, 0.2604, 0.2743, 0.1094]) -Greedy action tensor([ 0.3449, -1.1428, -0.1507, 0.7096]) tensor([0.3053, 0.0690, 0.1860, 0.4397]) -Greedy action tensor([-0.7536, -0.1332, -0.0688, -1.0788]) tensor([0.1797, 0.3341, 0.3564, 0.1298]) -Greedy action tensor([-0.0073, -1.3005, 2.4122, -0.6896]) tensor([0.0768, 0.0211, 0.8633, 0.0388]) -Greedy action tensor([-0.2802, -0.2545, -0.6337, -0.5191]) tensor([0.2844, 0.2918, 0.1997, 0.2240]) -Greedy action tensor([-0.9010, -1.2226, 0.1206, -0.5056]) tensor([0.1670, 0.1211, 0.4639, 0.2480]) -Greedy action tensor([ 1.0524, -0.4922, 1.2976, 0.2361]) tensor([0.3409, 0.0727, 0.4356, 0.1507]) -Greedy action tensor([ 0.1889, -0.3750, -0.3159, -0.0996]) tensor([0.3422, 0.1947, 0.2066, 0.2565]) -Greedy action tensor([ 1.3494, -1.9906, 0.9283, 0.9033]) tensor([0.4288, 0.0152, 0.2815, 0.2745]) -Greedy action tensor([-0.9202, -0.2700, 0.3991, -0.3884]) tensor([0.1196, 0.2292, 0.4475, 0.2036]) -Greedy action tensor([-0.0114, -0.4324, 0.8835, -0.1193]) tensor([0.1999, 0.1312, 0.4893, 0.1795]) -Greedy action tensor([-1.6980, -0.4731, 0.5839, 0.0386]) tensor([0.0503, 0.1712, 0.4928, 0.2857]) -Greedy action tensor([-1.7960, -0.3738, 0.6600, -0.0919]) tensor([0.0448, 0.1859, 0.5228, 0.2465]) -Greedy action tensor([-1.6509, -0.0836, 0.4538, -0.0224]) tensor([0.0524, 0.2511, 0.4297, 0.2669]) -Greedy action tensor([-1.6741, -0.3457, 0.6132, -0.0145]) tensor([0.0503, 0.1899, 0.4954, 0.2644]) -Greedy action tensor([-1.4236, -0.5463, 0.4639, 0.1260]) tensor([0.0679, 0.1634, 0.4487, 0.3200]) -Greedy action tensor([-1.7207, -0.2841, 0.5628, -0.0215]) tensor([0.0488, 0.2053, 0.4789, 0.2670]) -Greedy action tensor([-1.6181, -0.4723, 0.5140, -0.0295]) tensor([0.0572, 0.1800, 0.4826, 0.2802]) -Greedy action tensor([-1.9152, -0.4374, 0.6538, -0.1554]) tensor([0.0412, 0.1808, 0.5383, 0.2397]) -Greedy action tensor([-1.1359, -0.1627, 0.5373, -0.0052]) tensor([0.0828, 0.2192, 0.4414, 0.2566]) -Greedy action tensor([-1.8947, -0.4459, 0.6437, -0.1571]) tensor([0.0424, 0.1804, 0.5364, 0.2408]) -Greedy action tensor([-1.4303, -0.0145, 0.4422, 0.4454]) tensor([0.0551, 0.2270, 0.3584, 0.3595]) -Greedy action tensor([-1.7347, -0.4265, 0.5609, -0.0596]) tensor([0.0501, 0.1853, 0.4973, 0.2674]) -Greedy action tensor([-1.9192, -0.4722, 0.6640, -0.1644]) tensor([0.0412, 0.1751, 0.5455, 0.2382]) -Greedy action tensor([-1.9448, -0.4544, 0.6656, -0.1810]) tensor([0.0402, 0.1784, 0.5468, 0.2345]) -Greedy action tensor([-1.9325, -0.4571, 0.6601, -0.1740]) tensor([0.0408, 0.1782, 0.5446, 0.2365]) -Greedy action tensor([-1.5706, -0.5485, 0.4875, -0.0601]) tensor([0.0620, 0.1722, 0.4852, 0.2806]) -Greedy action tensor([-1.9190, -0.4385, 0.6575, -0.1600]) tensor([0.0411, 0.1805, 0.5400, 0.2384]) -Greedy action tensor([-1.8243, -0.4101, 0.6040, -0.1524]) tensor([0.0459, 0.1889, 0.5207, 0.2444]) -Greedy action tensor([-1.8124, -0.4000, 0.6562, -0.0310]) tensor([0.0438, 0.1797, 0.5167, 0.2599]) -Greedy action tensor([-0.4446, 0.6925, 0.0204, 0.2147]) tensor([0.1308, 0.4079, 0.2083, 0.2530]) -Greedy action tensor([-1.7093, -0.3909, 0.6131, 0.0196]) tensor([0.0486, 0.1817, 0.4958, 0.2739]) -Greedy action tensor([-1.9118, -0.4340, 0.6525, -0.1608]) tensor([0.0414, 0.1816, 0.5383, 0.2387]) -Greedy action tensor([-0.5240, 0.4636, 0.1049, 0.0909]) tensor([0.1350, 0.3623, 0.2531, 0.2496]) -Greedy action tensor([-1.7844, -0.2104, 0.5564, -0.0952]) tensor([0.0462, 0.2231, 0.4803, 0.2504]) -Greedy action tensor([-1.5974, -0.4642, 0.7023, 0.3170]) tensor([0.0479, 0.1489, 0.4780, 0.3252]) -Greedy action tensor([-1.6254, -0.5536, 0.5109, -0.0600]) tensor([0.0582, 0.1701, 0.4931, 0.2786]) -Greedy action tensor([-1.8979, -0.4384, 0.6408, -0.1570]) tensor([0.0422, 0.1818, 0.5350, 0.2409]) -Greedy action tensor([-1.7293, -0.4861, 0.5893, -0.0408]) tensor([0.0499, 0.1730, 0.5071, 0.2700]) -Greedy action tensor([-1.7588, -0.4441, 0.5781, -0.0852]) tensor([0.0490, 0.1825, 0.5072, 0.2613]) -Greedy action tensor([-1.8113, -0.4453, 0.6493, -0.0410]) tensor([0.0444, 0.1742, 0.5204, 0.2610]) -Greedy action tensor([-1.8479, -0.4411, 0.6160, -0.1438]) tensor([0.0448, 0.1828, 0.5262, 0.2462]) -Greedy action tensor([-1.9006, -0.4309, 0.6470, -0.1591]) tensor([0.0420, 0.1825, 0.5361, 0.2394]) -Greedy action tensor([-1.6812, -0.4298, 0.5244, -0.0499]) tensor([0.0535, 0.1871, 0.4858, 0.2736]) -Greedy action tensor([-1.8958, -0.4019, 0.6098, -0.1752]) tensor([0.0429, 0.1912, 0.5259, 0.2399]) -Greedy action tensor([-1.9139, -0.4413, 0.6772, -0.1414]) tensor([0.0407, 0.1773, 0.5427, 0.2393]) -Greedy action tensor([-0.1273, -0.2401, 0.1819, 0.1822]) tensor([0.2165, 0.1934, 0.2950, 0.2951]) -Greedy action tensor([-1.8326, -0.1860, 0.5775, -0.1388]) tensor([0.0439, 0.2280, 0.4891, 0.2390]) -Greedy action tensor([-1.9068, -0.4534, 0.6523, -0.1632]) tensor([0.0418, 0.1788, 0.5403, 0.2391]) -Greedy action tensor([-1.9158, -0.4376, 0.6482, -0.1588]) tensor([0.0414, 0.1814, 0.5374, 0.2398]) -Greedy action tensor([-1.8933, -0.2844, 0.6163, -0.1374]) tensor([0.0415, 0.2075, 0.5107, 0.2403]) -Greedy action tensor([-1.6223, -0.2310, 0.7060, 0.1698]) tensor([0.0470, 0.1889, 0.4821, 0.2820]) -Greedy action tensor([-1.0724, -0.0093, 0.1823, 0.3549]) tensor([0.0864, 0.2502, 0.3031, 0.3602]) -Greedy action tensor([-1.7580, -0.4345, 0.5793, -0.0768]) tensor([0.0488, 0.1834, 0.5055, 0.2623]) -Greedy action tensor([-1.8444, -0.4590, 0.6105, -0.1228]) tensor([0.0450, 0.1797, 0.5237, 0.2516]) -Greedy action tensor([-1.6751, -0.4343, 0.5330, -0.0382]) tensor([0.0535, 0.1850, 0.4866, 0.2749]) -Greedy action tensor([-1.7388, 0.0594, 0.5318, -0.0382]) tensor([0.0450, 0.2720, 0.4363, 0.2467]) -Greedy action tensor([-1.9036, -0.3305, 0.6339, -0.1586]) tensor([0.0413, 0.1993, 0.5227, 0.2367]) -Greedy action tensor([-1.6482, -0.4837, 0.0911, -0.5013]) tensor([0.0767, 0.2456, 0.4364, 0.2413]) -Greedy action tensor([-1.8626, -0.4181, 0.6251, -0.1390]) tensor([0.0437, 0.1853, 0.5260, 0.2450]) -Greedy action tensor([-1.6131, -0.4335, 0.5093, 0.0478]) tensor([0.0560, 0.1821, 0.4674, 0.2946]) -Greedy action tensor([-1.8576, -0.4120, 0.6200, -0.1394]) tensor([0.0440, 0.1867, 0.5241, 0.2452]) -Greedy action tensor([-1.8070, -0.5611, 1.4324, 0.9201]) tensor([0.0221, 0.0768, 0.5635, 0.3376]) -Greedy action tensor([-1.8574, -0.4162, 0.6315, -0.1193]) tensor([0.0436, 0.1840, 0.5247, 0.2477]) -Greedy action tensor([-1.7758, -0.4668, 0.5838, -0.1505]) tensor([0.0491, 0.1818, 0.5197, 0.2494]) -Greedy action tensor([-1.7392, 0.0728, 0.4836, -0.0555]) tensor([0.0460, 0.2816, 0.4247, 0.2477]) -Greedy action tensor([-1.8739, -0.7057, 0.9400, -0.1568]) tensor([0.0378, 0.1216, 0.6302, 0.2105]) -Greedy action tensor([-0.8689, 0.2003, -0.1894, -0.0944]) tensor([0.1241, 0.3616, 0.2449, 0.2693]) -Greedy action tensor([-1.9027, -0.4547, 0.6500, -0.1611]) tensor([0.0420, 0.1788, 0.5395, 0.2397]) -Greedy action tensor([-1.5454, -0.1257, 0.4010, -0.0094]) tensor([0.0596, 0.2464, 0.4172, 0.2768]) -Greedy action tensor([-1.0791, -0.3478, 0.3629, 0.3914]) tensor([0.0858, 0.1782, 0.3628, 0.3732]) -Greedy action tensor([-1.6925, -0.4585, 0.5357, -0.0566]) tensor([0.0530, 0.1822, 0.4924, 0.2723]) -Greedy action tensor([-1.5988, -0.3020, 0.6067, -0.0080]) tensor([0.0536, 0.1962, 0.4868, 0.2633]) -Greedy action tensor([-1.9217, -0.4437, 0.6518, -0.1670]) tensor([0.0412, 0.1806, 0.5401, 0.2381]) -Greedy action tensor([-1.8814, -0.3904, 0.6506, -0.1286]) tensor([0.0420, 0.1867, 0.5287, 0.2426]) -Greedy action tensor([-1.9432, -0.4426, 0.6649, -0.1795]) tensor([0.0402, 0.1801, 0.5453, 0.2344]) -Greedy action tensor([-1.6804, 0.0207, 0.4641, -0.0128]) tensor([0.0492, 0.2697, 0.4202, 0.2608]) -Greedy action tensor([-1.9089, -0.4094, 0.6506, -0.1536]) tensor([0.0413, 0.1851, 0.5344, 0.2391]) -Greedy action tensor([-1.4058, -0.3896, 0.5573, -0.3378]) tensor([0.0725, 0.2003, 0.5163, 0.2109]) -Greedy action tensor([-0.4912, 0.3706, 0.5589, 1.3005]) tensor([0.0818, 0.1937, 0.2338, 0.4908]) -Greedy action tensor([-1.6389, 0.3934, 0.4191, 0.0040]) tensor([0.0462, 0.3528, 0.3620, 0.2390]) -Greedy action tensor([-1.9037, -0.4021, 0.6576, -0.1526]) tensor([0.0413, 0.1855, 0.5352, 0.2380]) -Greedy action tensor([-1.8311, -0.4657, 0.6067, -0.1250]) tensor([0.0457, 0.1791, 0.5234, 0.2518]) -Greedy action tensor([-1.8784, -0.4097, 0.6625, -0.1339]) tensor([0.0421, 0.1828, 0.5342, 0.2409]) -Greedy action tensor([-1.9221, -0.3951, 0.6451, -0.1726]) tensor([0.0410, 0.1888, 0.5343, 0.2359]) -Greedy action tensor([-1.4560, 0.0549, 0.4787, 0.0910]) tensor([0.0583, 0.2642, 0.4036, 0.2739]) -Greedy action tensor([-1.6127, 0.1703, 0.4457, -0.1159]) tensor([0.0520, 0.3090, 0.4069, 0.2321]) -Greedy action tensor([-1.9343, -0.4398, 0.6626, -0.1745]) tensor([0.0405, 0.1805, 0.5436, 0.2354]) -Greedy action tensor([-1.9445, -0.4544, 0.6694, -0.1797]) tensor([0.0401, 0.1780, 0.5476, 0.2343]) -Greedy action tensor([-1.8990, -0.4138, 0.6401, -0.1594]) tensor([0.0421, 0.1857, 0.5328, 0.2395]) -Greedy action tensor([-1.9157, -0.4508, 0.6563, -0.1655]) tensor([0.0414, 0.1790, 0.5415, 0.2381]) -Greedy action tensor([-1.9236, -0.4513, 0.6579, -0.1694]) tensor([0.0411, 0.1790, 0.5427, 0.2373]) -Greedy action tensor([ 1.2123, -0.4048, -0.7377, 0.4805]) tensor([0.5489, 0.1089, 0.0781, 0.2640]) -Greedy action tensor([ 1.7195, -0.7712, -0.2633, 0.2086]) tensor([0.6938, 0.0575, 0.0955, 0.1531]) -Greedy action tensor([ 1.3289, -0.3946, -0.3755, 0.2030]) tensor([0.5936, 0.1059, 0.1080, 0.1925]) -Greedy action tensor([ 1.8877, -0.7323, 0.0456, 0.5879]) tensor([0.6649, 0.0484, 0.1054, 0.1813]) -Greedy action tensor([ 1.6119, -0.4165, -0.4277, 0.7489]) tensor([0.5940, 0.0781, 0.0773, 0.2506]) -Greedy action tensor([ 1.2711, -0.0327, -0.4429, 0.4600]) tensor([0.5274, 0.1432, 0.0950, 0.2344]) -Greedy action tensor([ 2.3587, -0.8969, -0.6342, 0.6731]) tensor([0.7849, 0.0303, 0.0394, 0.1455]) -Greedy action tensor([ 0.9448, 0.0143, -0.4608, 0.3354]) tensor([0.4580, 0.1806, 0.1123, 0.2490]) -Greedy action tensor([ 1.2597, -0.8122, -0.1818, 0.5284]) tensor([0.5424, 0.0683, 0.1283, 0.2610]) -Greedy action tensor([ 0.4577, -0.4193, -0.2618, 0.1718]) tensor([0.3767, 0.1567, 0.1835, 0.2831]) -Greedy action tensor([ 1.1709, -0.7143, -0.0633, 0.0863]) tensor([0.5615, 0.0852, 0.1634, 0.1898]) -Greedy action tensor([ 1.6446, -0.4827, -0.1649, -0.0062]) tensor([0.6781, 0.0808, 0.1110, 0.1301]) -Greedy action tensor([ 1.7931, -0.4125, -0.5174, 0.3021]) tensor([0.6971, 0.0768, 0.0692, 0.1569]) -Greedy action tensor([ 1.8324, -0.8977, -0.0852, 0.6284]) tensor([0.6613, 0.0431, 0.0972, 0.1984]) -Greedy action tensor([ 1.7187, -0.9092, 0.0696, 0.1930]) tensor([0.6748, 0.0487, 0.1297, 0.1467]) -Greedy action tensor([ 1.6741, -0.7370, -0.4709, -0.0303]) tensor([0.7201, 0.0646, 0.0843, 0.1310]) -Greedy action tensor([ 1.1722, -0.2211, -0.1421, -0.0735]) tensor([0.5541, 0.1376, 0.1489, 0.1594]) -Greedy action tensor([ 1.4937, -0.2612, -0.4401, 0.4995]) tensor([0.5926, 0.1025, 0.0857, 0.2193]) -Greedy action tensor([ 1.6136, -0.7829, -0.3321, 0.4569]) tensor([0.6458, 0.0588, 0.0923, 0.2031]) -Greedy action tensor([ 0.7924, -0.1851, -0.4617, 0.1905]) tensor([0.4526, 0.1703, 0.1291, 0.2479]) -Greedy action tensor([ 2.0087, -0.8066, -0.7172, 0.6058]) tensor([0.7293, 0.0437, 0.0478, 0.1793]) -Greedy action tensor([ 1.2967, -0.4420, -0.4011, 0.4398]) tensor([0.5607, 0.0986, 0.1027, 0.2380]) -Greedy action tensor([ 0.8875, -0.3524, -0.1475, 0.1741]) tensor([0.4685, 0.1356, 0.1664, 0.2295]) -Greedy action tensor([ 1.6303, -0.0661, -0.2977, -0.0966]) tensor([0.6637, 0.1217, 0.0965, 0.1180]) -Greedy action tensor([ 1.0814, -0.6110, -0.1523, 0.1689]) tensor([0.5328, 0.0981, 0.1552, 0.2139]) -Greedy action tensor([ 2.3342, -0.9834, -0.6027, 0.4713]) tensor([0.8035, 0.0291, 0.0426, 0.1247]) -Greedy action tensor([ 1.3017, -0.2520, -0.6760, 0.3182]) tensor([0.5801, 0.1227, 0.0803, 0.2170]) -Greedy action tensor([ 1.7047, -0.7220, 0.0747, 0.2634]) tensor([0.6575, 0.0581, 0.1288, 0.1556]) -Greedy action tensor([ 1.1971, -0.1273, -0.8244, 0.4680]) tensor([0.5317, 0.1414, 0.0704, 0.2565]) -Greedy action tensor([ 1.9381, -0.8400, 0.1625, 0.7528]) tensor([0.6505, 0.0404, 0.1102, 0.1988]) -Greedy action tensor([ 1.4270, -0.1207, -0.4029, 0.3799]) tensor([0.5800, 0.1234, 0.0931, 0.2036]) -Greedy action tensor([ 1.7944, -0.3340, -0.5993, 0.4130]) tensor([0.6842, 0.0814, 0.0625, 0.1719]) -Greedy action tensor([ 1.3036, -0.3623, -0.8542, 0.3169]) tensor([0.5962, 0.1127, 0.0689, 0.2223]) -Greedy action tensor([ 0.7128, -0.1231, -0.1303, 0.0727]) tensor([0.4182, 0.1813, 0.1800, 0.2205]) -Greedy action tensor([ 1.1444, -0.2987, -1.0659, 0.4690]) tensor([0.5392, 0.1273, 0.0591, 0.2744]) -Greedy action tensor([ 1.3289, -0.4805, -0.4977, 0.1621]) tensor([0.6112, 0.1001, 0.0984, 0.1903]) -Greedy action tensor([ 1.9942, -0.7251, -0.1889, 0.8004]) tensor([0.6749, 0.0445, 0.0761, 0.2045]) -Greedy action tensor([ 1.5480, -0.3589, -0.0423, 0.7739]) tensor([0.5514, 0.0819, 0.1124, 0.2543]) -Greedy action tensor([ 1.9726, -0.6128, -0.4346, 0.3692]) tensor([0.7317, 0.0551, 0.0659, 0.1472]) -Greedy action tensor([ 1.3371, -0.7330, -0.2708, -0.0513]) tensor([0.6345, 0.0801, 0.1271, 0.1583]) -Greedy action tensor([ 1.3219, -0.7206, -0.2605, 0.6394]) tensor([0.5433, 0.0705, 0.1116, 0.2746]) -Greedy action tensor([ 2.0860, -1.0968, -0.1832, 0.5131]) tensor([0.7395, 0.0307, 0.0765, 0.1534]) -Greedy action tensor([ 0.7353, -0.1086, 0.1205, -0.0608]) tensor([0.4129, 0.1776, 0.2233, 0.1863]) -Greedy action tensor([ 1.7408, -0.4005, -0.1734, 0.5901]) tensor([0.6324, 0.0743, 0.0933, 0.2001]) -Greedy action tensor([ 1.2777, -0.6748, -0.5822, 0.5678]) tensor([0.5589, 0.0793, 0.0870, 0.2748]) -Greedy action tensor([ 1.1369, -0.4624, -0.2066, 0.5120]) tensor([0.5004, 0.1011, 0.1306, 0.2679]) -Greedy action tensor([ 1.0866, -0.1717, 0.1910, 0.4017]) tensor([0.4552, 0.1293, 0.1859, 0.2295]) -Greedy action tensor([ 1.1425, -0.3295, -0.1763, 0.1757]) tensor([0.5327, 0.1222, 0.1425, 0.2026]) -Greedy action tensor([ 1.7856, -0.5660, -0.7781, 0.9816]) tensor([0.6174, 0.0588, 0.0475, 0.2763]) -Greedy action tensor([ 1.3544, -0.4920, -0.4657, 0.4460]) tensor([0.5804, 0.0916, 0.0940, 0.2340]) -Greedy action tensor([ 0.8840, 0.1466, 0.0152, -0.5298]) tensor([0.4671, 0.2234, 0.1959, 0.1136]) -Greedy action tensor([ 0.9108, -0.3355, 0.0317, -0.0772]) tensor([0.4819, 0.1386, 0.2001, 0.1794]) -Greedy action tensor([ 1.1078, -0.6385, -0.0622, 0.3342]) tensor([0.5138, 0.0896, 0.1595, 0.2371]) -Greedy action tensor([ 1.2202, -0.4914, -0.2409, 0.0748]) tensor([0.5778, 0.1043, 0.1340, 0.1838]) -Greedy action tensor([ 1.4526, 0.4887, -0.2973, 0.3859]) tensor([0.5265, 0.2008, 0.0915, 0.1812]) -Greedy action tensor([ 2.3163, -0.7392, -0.1701, 0.4767]) tensor([0.7757, 0.0365, 0.0645, 0.1232]) -Greedy action tensor([ 1.1561, -0.3309, -0.4422, 0.3082]) tensor([0.5386, 0.1218, 0.1089, 0.2307]) -Greedy action tensor([ 0.8670, -0.1496, 0.0859, 0.0495]) tensor([0.4422, 0.1600, 0.2025, 0.1953]) -Greedy action tensor([ 0.9655, -0.3324, -0.2682, 0.0092]) tensor([0.5132, 0.1401, 0.1494, 0.1972]) -Greedy action tensor([ 1.5778, 0.4146, -0.1551, 0.3548]) tensor([0.5607, 0.1752, 0.0991, 0.1650]) -Greedy action tensor([ 2.1086, 0.5704, 0.4631, -0.3113]) tensor([0.6682, 0.1435, 0.1289, 0.0594]) -Greedy action tensor([ 1.2532, 0.1094, -0.5835, 0.5151]) tensor([0.5113, 0.1629, 0.0815, 0.2444]) -Greedy action tensor([ 1.5772, -0.0565, -0.6489, 0.4463]) tensor([0.6150, 0.1201, 0.0664, 0.1985]) -Greedy action tensor([ 1.1343, -0.0509, -0.5035, 0.5950]) tensor([0.4800, 0.1467, 0.0933, 0.2799]) -Greedy action tensor([ 1.7054, -0.0469, -0.5321, 0.2557]) tensor([0.6602, 0.1145, 0.0705, 0.1549]) -Greedy action tensor([ 2.2035, -1.2874, -0.2555, 0.6597]) tensor([0.7521, 0.0229, 0.0643, 0.1606]) -Greedy action tensor([ 1.2963, 0.0500, -0.5738, 0.3204]) tensor([0.5499, 0.1581, 0.0847, 0.2072]) -Greedy action tensor([ 1.0519, -0.3721, -0.0220, -0.2621]) tensor([0.5402, 0.1301, 0.1846, 0.1452]) -Greedy action tensor([ 1.5356, -0.5915, -0.3169, 0.0659]) tensor([0.6640, 0.0791, 0.1041, 0.1527]) -Greedy action tensor([ 1.3825, -0.5324, -0.3746, 0.5696]) tensor([0.5671, 0.0836, 0.0978, 0.2515]) -Greedy action tensor([ 1.4407, -0.2026, -0.2505, 0.4969]) tensor([0.5660, 0.1094, 0.1043, 0.2203]) -Greedy action tensor([ 1.3983, -0.5460, -0.2970, 0.3548]) tensor([0.5956, 0.0852, 0.1093, 0.2098]) -Greedy action tensor([ 1.4637, -0.1246, -0.4760, 0.0557]) tensor([0.6279, 0.1283, 0.0903, 0.1536]) -Greedy action tensor([ 1.4916, 0.2435, -0.1629, -0.1183]) tensor([0.5959, 0.1711, 0.1139, 0.1191]) -Greedy action tensor([ 1.3486, -0.2614, -0.1472, 0.1249]) tensor([0.5820, 0.1163, 0.1304, 0.1712]) -Greedy action tensor([ 1.8108, 0.2458, -0.5645, 0.3982]) tensor([0.6470, 0.1353, 0.0602, 0.1575]) -Greedy action tensor([ 1.6727, -0.8791, -0.2642, -0.0695]) tensor([0.7157, 0.0558, 0.1032, 0.1253]) -Greedy action tensor([ 0.6151, -0.2539, -0.3044, 0.5128]) tensor([0.3675, 0.1541, 0.1465, 0.3318]) -Greedy action tensor([2.0085, 0.0759, 0.1621, 0.4900]) tensor([0.6572, 0.0951, 0.1037, 0.1439]) -Greedy action tensor([ 1.7604, -0.4672, -0.3803, 0.4518]) tensor([0.6687, 0.0721, 0.0786, 0.1807]) -Greedy action tensor([ 2.1601, -0.7197, -0.2111, 0.8901]) tensor([0.6991, 0.0393, 0.0653, 0.1963]) -Greedy action tensor([ 0.3312, -0.0362, -0.0485, -0.3342]) tensor([0.3460, 0.2396, 0.2366, 0.1778]) -Greedy action tensor([ 0.9003, -0.4156, -0.1028, -0.7064]) tensor([0.5448, 0.1461, 0.1998, 0.1093]) -Greedy action tensor([ 1.0099, -0.5385, -0.0850, -0.6473]) tensor([0.5754, 0.1223, 0.1925, 0.1097]) -Greedy action tensor([ 0.5373, -0.0327, 0.0915, 0.0162]) tensor([0.3572, 0.2020, 0.2287, 0.2121]) -Greedy action tensor([ 0.7783, -0.4281, -0.1067, -0.1774]) tensor([0.4770, 0.1427, 0.1968, 0.1834]) -Greedy action tensor([ 0.6248, -0.4670, -0.0248, -0.5315]) tensor([0.4603, 0.1545, 0.2404, 0.1448]) -Greedy action tensor([ 8.5006e-01, -8.3466e-01, -2.4304e-04, -4.7488e-01]) tensor([0.5323, 0.0987, 0.2274, 0.1415]) -Greedy action tensor([ 0.7158, -0.6969, -0.0649, -0.5737]) tensor([0.5058, 0.1232, 0.2317, 0.1393]) -Greedy action tensor([ 0.7765, -0.5306, -0.1796, -0.2737]) tensor([0.4988, 0.1350, 0.1917, 0.1745]) -Greedy action tensor([ 0.4962, -0.2967, -0.0497, -0.1815]) tensor([0.3937, 0.1782, 0.2281, 0.1999]) -Greedy action tensor([ 0.6485, -0.5307, -0.0439, -0.3632]) tensor([0.4605, 0.1416, 0.2304, 0.1674]) -Greedy action tensor([ 0.8798, -0.4719, 0.0191, -0.2103]) tensor([0.4956, 0.1283, 0.2096, 0.1666]) -Greedy action tensor([ 0.6148, -0.6071, 0.0131, -0.2025]) tensor([0.4378, 0.1290, 0.2398, 0.1933]) -Greedy action tensor([ 9.3914e-01, -2.2259e-01, -3.6617e-04, -4.8458e-01]) tensor([0.5142, 0.1609, 0.2010, 0.1238]) -Greedy action tensor([ 0.4641, 0.0211, -0.0444, 0.0978]) tensor([0.3405, 0.2186, 0.2048, 0.2361]) -Greedy action tensor([ 0.4241, -0.1354, -0.1001, -0.3133]) tensor([0.3785, 0.2163, 0.2241, 0.1811]) -Greedy action tensor([ 0.5831, -0.1864, -0.1494, -0.0975]) tensor([0.4081, 0.1891, 0.1962, 0.2066]) -Greedy action tensor([ 0.6886, -0.5322, -0.0739, -0.1957]) tensor([0.4599, 0.1357, 0.2145, 0.1899]) -Greedy action tensor([ 0.3969, -0.2734, -0.0375, -0.3071]) tensor([0.3768, 0.1928, 0.2440, 0.1864]) -Greedy action tensor([ 0.6564, -0.3639, 0.0733, -0.6825]) tensor([0.4586, 0.1653, 0.2559, 0.1202]) -Greedy action tensor([ 0.7055, -0.5560, -0.1392, -0.0597]) tensor([0.4591, 0.1300, 0.1973, 0.2136]) -Greedy action tensor([ 0.8967, -1.1383, 0.1147, -0.5942]) tensor([0.5515, 0.0721, 0.2523, 0.1242]) -Greedy action tensor([ 0.3706, -0.0427, 0.0062, -0.1622]) tensor([0.3398, 0.2248, 0.2360, 0.1994]) -Greedy action tensor([ 0.9273, -0.8850, 0.1108, -0.4103]) tensor([0.5354, 0.0874, 0.2366, 0.1405]) -Greedy action tensor([ 0.9385, -0.4191, -0.2137, -0.3161]) tensor([0.5381, 0.1384, 0.1700, 0.1535]) -Greedy action tensor([ 0.5588, -0.3080, -0.1084, -0.1931]) tensor([0.4158, 0.1748, 0.2134, 0.1960]) -Greedy action tensor([ 0.6965, -0.4544, -0.1697, -0.3475]) tensor([0.4787, 0.1514, 0.2013, 0.1685]) -Greedy action tensor([ 0.6061, -0.4680, -0.0669, -0.2479]) tensor([0.4391, 0.1500, 0.2240, 0.1869]) -Greedy action tensor([ 0.7326, -0.7915, 0.1416, -0.5962]) tensor([0.4911, 0.1070, 0.2719, 0.1300]) -Greedy action tensor([ 0.3376, -0.0295, -0.0860, -0.1615]) tensor([0.3385, 0.2345, 0.2216, 0.2055]) -Greedy action tensor([ 0.3189, -0.1353, 0.3027, -0.3775]) tensor([0.3208, 0.2037, 0.3156, 0.1599]) -Greedy action tensor([ 0.1926, -0.2058, -0.1007, -0.1853]) tensor([0.3223, 0.2164, 0.2404, 0.2209]) -Greedy action tensor([ 1.2183, -1.0144, 0.0977, -0.5065]) tensor([0.6205, 0.0665, 0.2023, 0.1106]) -Greedy action tensor([ 0.4920, -0.3382, -0.1358, -0.2244]) tensor([0.4068, 0.1774, 0.2171, 0.1987]) -Greedy action tensor([ 0.5939, -0.5159, 0.0023, -0.2543]) tensor([0.4327, 0.1426, 0.2394, 0.1853]) -Greedy action tensor([ 0.7486, -0.1558, 0.0616, -0.3481]) tensor([0.4461, 0.1806, 0.2244, 0.1490]) -Greedy action tensor([ 1.0854, -0.7252, -0.1190, -0.3629]) tensor([0.5888, 0.0963, 0.1766, 0.1383]) -Greedy action tensor([ 0.8207, -0.6957, -0.0306, -0.4910]) tensor([0.5220, 0.1146, 0.2228, 0.1406]) -Greedy action tensor([ 1.0372, -0.5016, 0.0191, -0.6162]) tensor([0.5658, 0.1215, 0.2044, 0.1083]) -Greedy action tensor([ 0.8897, -0.6483, 0.0834, -0.5695]) tensor([0.5281, 0.1134, 0.2358, 0.1227]) -Greedy action tensor([ 0.4559, -0.5044, -0.0460, -0.1530]) tensor([0.3949, 0.1512, 0.2391, 0.2148]) -Greedy action tensor([ 0.2407, -0.0920, -0.0165, -0.2707]) tensor([0.3236, 0.2321, 0.2502, 0.1941]) -Greedy action tensor([ 0.3334, -0.3922, -0.1204, -0.1890]) tensor([0.3687, 0.1785, 0.2342, 0.2187]) -Greedy action tensor([ 0.6976, -0.5353, -0.0152, -0.1182]) tensor([0.4497, 0.1310, 0.2204, 0.1989]) -Greedy action tensor([ 0.3332, 0.0871, -0.2156, 0.0718]) tensor([0.3195, 0.2498, 0.1846, 0.2460]) -Greedy action tensor([ 0.7071, -0.3452, -0.1326, -0.1957]) tensor([0.4574, 0.1597, 0.1975, 0.1854]) -Greedy action tensor([ 0.5654, -0.4146, -0.1236, -0.1364]) tensor([0.4214, 0.1582, 0.2116, 0.2089]) -Greedy action tensor([ 0.4913, -0.0934, -0.0321, -0.1773]) tensor([0.3756, 0.2093, 0.2226, 0.1925]) -Greedy action tensor([ 0.5998, -0.3316, -0.0401, -0.3253]) tensor([0.4314, 0.1700, 0.2275, 0.1711]) -Greedy action tensor([ 0.9590, -0.4647, -0.0342, -0.1890]) tensor([0.5185, 0.1249, 0.1921, 0.1645]) -Greedy action tensor([ 0.6564, -0.5446, -0.0789, -0.3971]) tensor([0.4697, 0.1413, 0.2252, 0.1638]) -Greedy action tensor([ 0.0847, 0.3294, -0.1640, -0.6541]) tensor([0.2829, 0.3613, 0.2206, 0.1351]) -Greedy action tensor([ 1.0348, -0.7489, -0.0758, -0.4050]) tensor([0.5766, 0.0969, 0.1899, 0.1366]) -Greedy action tensor([ 0.9205, -0.2254, -0.4732, -0.4909]) tensor([0.5525, 0.1757, 0.1371, 0.1347]) -Greedy action tensor([ 1.1021, -0.6481, 0.0582, -0.6480]) tensor([0.5884, 0.1022, 0.2072, 0.1022]) -Greedy action tensor([ 1.0646, -0.7484, -0.1127, -0.4576]) tensor([0.5919, 0.0966, 0.1824, 0.1292]) -Greedy action tensor([ 0.8547, -0.6939, 0.0893, -0.6849]) tensor([0.5285, 0.1123, 0.2458, 0.1133]) -Greedy action tensor([ 0.3998, -0.2090, -0.0498, -0.2098]) tensor([0.3669, 0.1996, 0.2340, 0.1994]) -Greedy action tensor([ 1.0893, -0.6804, -0.0070, -0.4444]) tensor([0.5813, 0.0990, 0.1942, 0.1254]) -Greedy action tensor([ 0.7442, 0.0972, 0.1126, -0.4674]) tensor([0.4250, 0.2225, 0.2260, 0.1265]) -Greedy action tensor([ 0.4644, 0.1492, -0.1562, 0.0519]) tensor([0.3414, 0.2491, 0.1835, 0.2260]) -Greedy action tensor([ 1.6766, -0.9395, -0.1840, -0.6276]) tensor([0.7527, 0.0550, 0.1171, 0.0751]) -Greedy action tensor([ 0.6953, -0.3667, -0.0308, -0.2234]) tensor([0.4487, 0.1551, 0.2171, 0.1791]) -Greedy action tensor([ 0.6014, 0.0482, -0.0490, -0.4336]) tensor([0.4078, 0.2345, 0.2128, 0.1449]) -Greedy action tensor([ 4.9381e-01, -3.3202e-01, -2.0050e-04, -3.4317e-01]) tensor([0.4031, 0.1765, 0.2459, 0.1745]) -Greedy action tensor([ 0.7266, -0.2587, -0.1687, -0.0714]) tensor([0.4480, 0.1673, 0.1830, 0.2017]) -Greedy action tensor([ 0.7896, -0.3739, -0.0049, -0.2591]) tensor([0.4729, 0.1477, 0.2137, 0.1657]) -Greedy action tensor([ 0.5874, -0.3986, -0.0516, -0.1799]) tensor([0.4228, 0.1577, 0.2232, 0.1963]) -Greedy action tensor([ 1.0762, -0.6988, -0.1180, -0.5887]) tensor([0.6018, 0.1020, 0.1823, 0.1139]) -Greedy action tensor([ 0.7001, -0.4652, -0.1114, -0.2360]) tensor([0.4655, 0.1452, 0.2068, 0.1826]) -Greedy action tensor([ 0.7553, -0.2255, -0.0691, -0.1443]) tensor([0.4504, 0.1689, 0.1975, 0.1832]) -Greedy action tensor([ 0.8729, -0.6788, 0.0075, -0.3618]) tensor([0.5198, 0.1101, 0.2188, 0.1512]) -Greedy action tensor([ 0.6394, -0.6303, -0.0549, -0.3020]) tensor([0.4607, 0.1294, 0.2301, 0.1797]) -Greedy action tensor([ 0.5933, -0.4537, -0.0942, -0.3379]) tensor([0.4448, 0.1561, 0.2237, 0.1753]) -Greedy action tensor([ 0.9044, -0.2602, -0.2451, -0.4153]) tensor([0.5274, 0.1646, 0.1671, 0.1409]) -Greedy action tensor([ 1.2541, -0.7548, 0.1753, -0.7841]) tensor([0.6233, 0.0836, 0.2119, 0.0812]) -Greedy action tensor([ 0.6486, -0.3844, 0.0154, -0.1343]) tensor([0.4266, 0.1519, 0.2265, 0.1950]) -Greedy action tensor([ 0.6325, -0.2351, -0.0603, -0.1804]) tensor([0.4231, 0.1777, 0.2116, 0.1877]) -Greedy action tensor([ 0.7529, -0.8628, 0.0341, -0.3378]) tensor([0.4945, 0.0983, 0.2410, 0.1662]) -Greedy action tensor([ 0.6364, -0.3776, -0.0364, -0.1184]) tensor([0.4268, 0.1548, 0.2178, 0.2006]) -Greedy action tensor([ 0.3347, -0.0984, -0.1066, -0.1590]) tensor([0.3446, 0.2235, 0.2216, 0.2103]) -Greedy action tensor([ 0.5459, 0.0836, 0.0278, -0.0224]) tensor([0.3582, 0.2256, 0.2134, 0.2029]) -Greedy action tensor([ 0.2006, 0.4818, 0.5640, -0.1271]) tensor([0.2230, 0.2955, 0.3208, 0.1607]) -Greedy action tensor([ 0.6972, -1.1111, -0.3861, -0.0778]) tensor([0.5094, 0.0835, 0.1724, 0.2347]) -Greedy action tensor([ 0.6533, -1.3378, -0.1801, 0.7506]) tensor([0.3741, 0.0511, 0.1626, 0.4123]) -Greedy action tensor([-0.2927, -0.7848, -0.6996, -0.2445]) tensor([0.3006, 0.1838, 0.2001, 0.3155]) -Greedy action tensor([-0.5135, -0.5995, 0.0798, -0.2054]) tensor([0.1965, 0.1803, 0.3557, 0.2674]) -Greedy action tensor([ 0.4952, -0.5719, 0.4982, -0.5061]) tensor([0.3684, 0.1267, 0.3695, 0.1354]) -Greedy action tensor([-0.4603, -0.6211, 1.0629, -1.2010]) tensor([0.1446, 0.1231, 0.6633, 0.0689]) -Greedy action tensor([ 0.2732, 0.2174, 0.4077, -0.3846]) tensor([0.2772, 0.2621, 0.3171, 0.1436]) -Greedy action tensor([ 0.5578, -0.0125, 1.1919, 0.8562]) tensor([0.2084, 0.1178, 0.3929, 0.2809]) -Greedy action tensor([ 1.2097, -0.0420, 0.4053, 0.8340]) tensor([0.4132, 0.1182, 0.1849, 0.2838]) -Greedy action tensor([ 0.9380, -1.0277, 0.6469, 1.6508]) tensor([0.2546, 0.0357, 0.1903, 0.5194]) -Greedy action tensor([-0.2562, -1.0866, 0.3203, 0.0718]) tensor([0.2172, 0.0947, 0.3866, 0.3015]) -Greedy action tensor([-0.2062, -2.5024, 0.5522, 0.3816]) tensor([0.1986, 0.0200, 0.4240, 0.3575]) -Greedy action tensor([1.0913, 0.3445, 0.9557, 1.0379]) tensor([0.3035, 0.1438, 0.2650, 0.2877]) -Greedy action tensor([ 0.0653, 0.1833, 1.0947, -1.2103]) tensor([0.1922, 0.2162, 0.5379, 0.0537]) -Greedy action tensor([ 0.8053, -1.8602, -0.2835, 0.4868]) tensor([0.4687, 0.0326, 0.1578, 0.3409]) -Greedy action tensor([-0.2423, 0.2250, -0.0465, -0.4657]) tensor([0.2168, 0.3460, 0.2637, 0.1734]) -Greedy action tensor([ 0.1288, -0.2032, 0.6382, -0.8545]) tensor([0.2663, 0.1910, 0.4431, 0.0996]) -Greedy action tensor([1.5094, 0.1421, 1.6794, 0.3287]) tensor([0.3640, 0.0927, 0.4314, 0.1118]) -Greedy action tensor([-0.2579, 0.0889, -0.7481, -0.3539]) tensor([0.2541, 0.3594, 0.1556, 0.2308]) -Greedy action tensor([-0.5907, -1.9899, 0.4183, 0.2709]) tensor([0.1573, 0.0388, 0.4315, 0.3724]) -Greedy action tensor([ 0.1042, 0.5864, 0.5688, -0.4598]) tensor([0.2092, 0.3388, 0.3329, 0.1190]) -Greedy action tensor([-0.6993, 0.3741, 0.4974, -0.8319]) tensor([0.1233, 0.3607, 0.4080, 0.1080]) -Greedy action tensor([-0.1060, -1.2037, 0.6723, 0.9142]) tensor([0.1591, 0.0531, 0.3465, 0.4413]) -Greedy action tensor([ 0.9754, -0.2890, -0.0628, 0.3645]) tensor([0.4588, 0.1296, 0.1625, 0.2491]) -Greedy action tensor([ 0.1270, -1.7160, 0.6556, -0.2102]) tensor([0.2802, 0.0444, 0.4754, 0.2000]) -Greedy action tensor([ 1.2079, -0.2739, -0.5296, 0.9688]) tensor([0.4565, 0.1037, 0.0803, 0.3594]) -Greedy action tensor([ 0.6144, -1.2271, 1.1223, 1.0388]) tensor([0.2299, 0.0365, 0.3821, 0.3515]) -Greedy action tensor([-0.6643, -0.5181, -1.2225, 0.4278]) tensor([0.1751, 0.2027, 0.1002, 0.5220]) -Greedy action tensor([ 0.1611, 0.7400, 0.4294, -0.2064]) tensor([0.2090, 0.3729, 0.2733, 0.1447]) -Greedy action tensor([-0.3231, -0.7349, -1.1913, -0.1935]) tensor([0.3105, 0.2057, 0.1303, 0.3535]) -Greedy action tensor([-0.3680, -0.8923, 2.0892, -1.1235]) tensor([0.0728, 0.0431, 0.8499, 0.0342]) -Greedy action tensor([-1.5353, -0.8035, -1.2670, 0.8868]) tensor([0.0639, 0.1328, 0.0835, 0.7198]) -Greedy action tensor([ 1.6979, -1.0176, 0.3956, 0.5998]) tensor([0.5982, 0.0396, 0.1627, 0.1995]) -Greedy action tensor([ 0.8116, -0.9926, -0.1102, 1.1557]) tensor([0.3363, 0.0554, 0.1338, 0.4745]) -Greedy action tensor([-0.7621, 0.1242, 0.3847, -0.6755]) tensor([0.1305, 0.3165, 0.4107, 0.1423]) -Greedy action tensor([-0.2500, -1.7361, 1.0438, -0.0891]) tensor([0.1654, 0.0374, 0.6030, 0.1942]) -Greedy action tensor([ 1.0459, -0.0517, 1.1478, -0.3552]) tensor([0.3721, 0.1242, 0.4120, 0.0917]) -Greedy action tensor([-0.2825, -1.0723, -0.3497, 0.1982]) tensor([0.2496, 0.1133, 0.2334, 0.4037]) -Greedy action tensor([ 1.1653, -0.4220, -0.3571, 0.7102]) tensor([0.4861, 0.0994, 0.1061, 0.3084]) -Greedy action tensor([-0.0974, -0.9578, 0.3820, -0.5815]) tensor([0.2736, 0.1157, 0.4420, 0.1686]) -Greedy action tensor([0.8578, 0.4448, 0.6223, 0.0685]) tensor([0.3441, 0.2277, 0.2719, 0.1563]) -Greedy action tensor([ 1.4113, -0.3366, 1.0850, 0.7769]) tensor([0.4122, 0.0718, 0.2974, 0.2186]) -Greedy action tensor([-0.6251, -2.0666, 0.0459, -0.4103]) tensor([0.2256, 0.0534, 0.4413, 0.2797]) -Greedy action tensor([ 1.3373, -0.0870, 0.8750, -0.1692]) tensor([0.4780, 0.1150, 0.3010, 0.1060]) -Greedy action tensor([ 0.2653, 0.1365, 1.0872, -0.1612]) tensor([0.2080, 0.1829, 0.4733, 0.1358]) -Greedy action tensor([ 0.4968, -1.8621, -0.2130, 1.2338]) tensor([0.2721, 0.0257, 0.1338, 0.5685]) -Greedy action tensor([ 0.1682, -0.6150, -0.1050, 0.0361]) tensor([0.3232, 0.1477, 0.2459, 0.2832]) -Greedy action tensor([ 0.2548, -0.8276, 0.2547, -0.1010]) tensor([0.3290, 0.1115, 0.3290, 0.2305]) -Greedy action tensor([ 0.5334, -0.5716, 0.9923, 0.2593]) tensor([0.2722, 0.0902, 0.4307, 0.2069]) -Greedy action tensor([-0.8463, 0.6425, -0.6331, -1.1940]) tensor([0.1356, 0.6009, 0.1678, 0.0958]) -Greedy action tensor([ 0.6539, -0.6470, 0.3052, -0.3352]) tensor([0.4256, 0.1159, 0.3003, 0.1583]) -Greedy action tensor([-0.2859, -0.2249, -0.3844, -0.3730]) tensor([0.2574, 0.2735, 0.2332, 0.2359]) -Greedy action tensor([ 0.6260, -1.6119, -0.0957, 0.3409]) tensor([0.4265, 0.0455, 0.2073, 0.3207]) -Greedy action tensor([ 0.1154, -1.0365, -1.2596, -0.0175]) tensor([0.4091, 0.1293, 0.1034, 0.3582]) -Greedy action tensor([-0.6373, -0.4141, 0.8408, -1.0981]) tensor([0.1376, 0.1721, 0.6035, 0.0868]) -Greedy action tensor([ 0.5424, -1.1318, 0.2125, 0.1302]) tensor([0.3893, 0.0730, 0.2799, 0.2578]) -Greedy action tensor([ 0.5238, -1.3397, 0.9101, -0.5983]) tensor([0.3387, 0.0525, 0.4984, 0.1103]) -Greedy action tensor([ 1.1921, 0.0022, -0.4419, 0.9428]) tensor([0.4388, 0.1335, 0.0856, 0.3420]) -Greedy action tensor([-0.6654, -0.3024, 0.8418, -0.8598]) tensor([0.1286, 0.1849, 0.5806, 0.1059]) -Greedy action tensor([ 0.2488, -0.7972, 0.0509, -0.1513]) tensor([0.3519, 0.1236, 0.2887, 0.2358]) -Greedy action tensor([0.5780, 0.5143, 0.6683, 0.0358]) tensor([0.2767, 0.2596, 0.3028, 0.1609]) -Greedy action tensor([ 0.4242, -0.1106, 1.5656, -0.0661]) tensor([0.1876, 0.1099, 0.5875, 0.1149]) -Greedy action tensor([ 0.2410, -1.1849, -0.1009, 0.8024]) tensor([0.2700, 0.0649, 0.1918, 0.4733]) -Greedy action tensor([ 0.1695, -2.3509, 0.0239, 0.3211]) tensor([0.3217, 0.0259, 0.2781, 0.3743]) -Greedy action tensor([-0.5911, -0.4682, -1.3997, 0.5449]) tensor([0.1757, 0.1987, 0.0783, 0.5473]) -Greedy action tensor([ 1.1714, -1.0832, -0.5098, 1.0971]) tensor([0.4506, 0.0473, 0.0839, 0.4183]) -Greedy action tensor([ 1.5543, -0.0117, -0.2870, 0.8339]) tensor([0.5394, 0.1127, 0.0855, 0.2624]) -Greedy action tensor([ 0.3421, -0.2013, 0.3276, 0.8653]) tensor([0.2351, 0.1365, 0.2317, 0.3967]) -Greedy action tensor([ 0.4776, -0.6204, 1.3947, -0.9830]) tensor([0.2458, 0.0820, 0.6151, 0.0571]) -Greedy action tensor([-0.0117, 0.2661, 0.4997, -0.0554]) tensor([0.2022, 0.2670, 0.3372, 0.1936]) -Greedy action tensor([-0.2538, -0.7022, -0.3399, -0.1715]) tensor([0.2746, 0.1754, 0.2519, 0.2981]) -Greedy action tensor([ 0.8681, 0.1522, 1.2677, -0.4795]) tensor([0.3086, 0.1509, 0.4603, 0.0802]) -Greedy action tensor([ 0.2618, 0.2764, -0.0093, -1.0394]) tensor([0.3279, 0.3327, 0.2501, 0.0893]) -Greedy action tensor([-0.2429, -2.1861, -0.0601, -0.4895]) tensor([0.3200, 0.0458, 0.3842, 0.2500]) -Greedy action tensor([-0.1353, -0.7518, -0.5466, 0.2193]) tensor([0.2756, 0.1488, 0.1827, 0.3929]) -Greedy action tensor([-0.1377, -0.6110, -0.5794, -0.6989]) tensor([0.3526, 0.2196, 0.2267, 0.2011]) -Greedy action tensor([ 0.3709, -1.2696, -0.1804, 0.4613]) tensor([0.3491, 0.0677, 0.2011, 0.3821]) -Greedy action tensor([-0.3376, 0.0518, 0.6970, -0.0733]) tensor([0.1517, 0.2239, 0.4268, 0.1976]) -Greedy action tensor([ 0.0541, 0.0290, -0.4141, 0.6944]) tensor([0.2223, 0.2168, 0.1392, 0.4217]) -Greedy action tensor([ 0.5770, -0.3982, 0.2229, 0.0429]) tensor([0.3752, 0.1415, 0.2633, 0.2200]) -Greedy action tensor([-1.9324, -0.4387, 0.6621, -0.1732]) tensor([0.0406, 0.1807, 0.5432, 0.2356]) -Greedy action tensor([-1.9259, -0.4424, 0.6554, -0.1723]) tensor([0.0410, 0.1807, 0.5416, 0.2367]) -Greedy action tensor([-1.9352, -0.4445, 0.6618, -0.1759]) tensor([0.0405, 0.1800, 0.5441, 0.2354]) -Greedy action tensor([-1.9258, -0.4358, 0.6542, -0.1727]) tensor([0.0410, 0.1818, 0.5407, 0.2365]) -Greedy action tensor([-1.8352, -0.4402, 0.6076, -0.1245]) tensor([0.0453, 0.1828, 0.5212, 0.2507]) -Greedy action tensor([-1.8138, -0.2140, 0.6161, -0.0763]) tensor([0.0435, 0.2154, 0.4940, 0.2472]) -Greedy action tensor([-1.9342, -0.4288, 0.6595, -0.1732]) tensor([0.0405, 0.1824, 0.5416, 0.2355]) -Greedy action tensor([-1.9276, -0.4487, 0.6601, -0.1683]) tensor([0.0408, 0.1791, 0.5429, 0.2371]) -Greedy action tensor([-1.7231, -0.3804, 0.5498, -0.1055]) tensor([0.0511, 0.1956, 0.4958, 0.2575]) -Greedy action tensor([-1.8814, -0.4384, 0.6351, -0.1395]) tensor([0.0429, 0.1815, 0.5309, 0.2447]) -Greedy action tensor([-1.8692, -0.4249, 0.6204, -0.1401]) tensor([0.0436, 0.1849, 0.5258, 0.2458]) -Greedy action tensor([-1.8331, -0.3352, 0.5987, -0.1414]) tensor([0.0449, 0.2007, 0.5107, 0.2437]) -Greedy action tensor([-1.8996, -0.3890, 0.6475, -0.1499]) tensor([0.0416, 0.1883, 0.5309, 0.2392]) -Greedy action tensor([-1.9356, -0.4427, 0.6639, -0.1754]) tensor([0.0405, 0.1800, 0.5444, 0.2352]) -Greedy action tensor([-1.1242, 0.2081, 0.4406, 0.7656]) tensor([0.0618, 0.2341, 0.2954, 0.4088]) -Greedy action tensor([-1.0353, 0.6096, 0.2778, -0.1842]) tensor([0.0817, 0.4232, 0.3037, 0.1914]) -Greedy action tensor([-0.4466, 0.8542, -0.1672, -0.2262]) tensor([0.1381, 0.5071, 0.1826, 0.1722]) -Greedy action tensor([-1.8463, -0.3213, 0.6173, -0.1094]) tensor([0.0434, 0.1996, 0.5103, 0.2467]) -Greedy action tensor([-0.9792, 0.6776, 0.1457, 0.0421]) tensor([0.0826, 0.4333, 0.2546, 0.2295]) -Greedy action tensor([-1.5042, -0.4385, 0.4614, 0.0736]) tensor([0.0629, 0.1827, 0.4494, 0.3049]) -Greedy action tensor([-1.9035, -0.4547, 0.6507, -0.1607]) tensor([0.0420, 0.1787, 0.5397, 0.2397]) -Greedy action tensor([-1.8952, -0.4578, 0.6478, -0.1555]) tensor([0.0423, 0.1782, 0.5383, 0.2411]) -Greedy action tensor([-1.7208, -0.2731, 0.5318, -0.1002]) tensor([0.0504, 0.2146, 0.4799, 0.2551]) -Greedy action tensor([-1.9031, -0.3669, 0.6345, -0.1543]) tensor([0.0416, 0.1933, 0.5261, 0.2391]) -Greedy action tensor([-1.5086, -0.4485, 1.1500, 0.5549]) tensor([0.0384, 0.1109, 0.5483, 0.3024]) -Greedy action tensor([-1.9070, -0.4407, 0.6713, -0.1581]) tensor([0.0412, 0.1786, 0.5432, 0.2370]) -Greedy action tensor([-1.9286, -0.4360, 0.6612, -0.1708]) tensor([0.0407, 0.1810, 0.5423, 0.2360]) -Greedy action tensor([-1.7713, -0.4387, 0.5932, -0.0708]) tensor([0.0478, 0.1813, 0.5089, 0.2620]) -Greedy action tensor([-1.9199, -0.4347, 0.6529, -0.1684]) tensor([0.0412, 0.1819, 0.5396, 0.2374]) -Greedy action tensor([-1.6990, -0.3473, 0.6665, -0.0522]) tensor([0.0483, 0.1866, 0.5144, 0.2507]) -Greedy action tensor([-1.7723, -0.4496, 0.5810, -0.0945]) tensor([0.0485, 0.1820, 0.5100, 0.2596]) -Greedy action tensor([-1.9223, -0.4262, 0.6501, -0.1694]) tensor([0.0411, 0.1835, 0.5382, 0.2372]) -Greedy action tensor([-1.9401, -0.4457, 0.6657, -0.1765]) tensor([0.0403, 0.1795, 0.5453, 0.2349]) -Greedy action tensor([-1.8329, -0.3546, 0.6214, -0.0462]) tensor([0.0435, 0.1907, 0.5062, 0.2596]) -Greedy action tensor([-0.9732, -0.0708, -0.3164, -0.1781]) tensor([0.1314, 0.3240, 0.2535, 0.2911]) -Greedy action tensor([-1.9262, -0.4466, 0.6545, -0.1693]) tensor([0.0410, 0.1800, 0.5414, 0.2376]) -Greedy action tensor([-1.9373, -0.4435, 0.6640, -0.1764]) tensor([0.0404, 0.1799, 0.5446, 0.2350]) -Greedy action tensor([-1.9108, -0.4521, 0.6523, -0.1629]) tensor([0.0416, 0.1790, 0.5402, 0.2391]) -Greedy action tensor([-1.8409, -0.4593, 0.6199, -0.1270]) tensor([0.0450, 0.1790, 0.5266, 0.2495]) -Greedy action tensor([-0.7744, -0.1218, 0.2725, -0.2444]) tensor([0.1339, 0.2571, 0.3815, 0.2275]) -Greedy action tensor([-1.8525, -0.2025, 0.5998, -0.0930]) tensor([0.0423, 0.2203, 0.4915, 0.2458]) -Greedy action tensor([-1.8878, -0.4518, 0.6427, -0.1497]) tensor([0.0426, 0.1793, 0.5356, 0.2425]) -Greedy action tensor([-1.0008, -0.5472, 0.4192, -0.2727]) tensor([0.1139, 0.1792, 0.4711, 0.2358]) -Greedy action tensor([-0.8959, -0.5080, 0.4723, 1.0921]) tensor([0.0730, 0.1076, 0.2867, 0.5328]) -Greedy action tensor([-1.9358, -0.4448, 0.6669, -0.1731]) tensor([0.0404, 0.1793, 0.5450, 0.2353]) -Greedy action tensor([-1.8755, -0.3654, 0.6243, -0.1461]) tensor([0.0428, 0.1939, 0.5218, 0.2415]) -Greedy action tensor([-1.6759, -0.5129, 0.5997, 0.0914]) tensor([0.0505, 0.1617, 0.4919, 0.2959]) -Greedy action tensor([-1.9318, -0.4225, 0.6594, -0.1744]) tensor([0.0405, 0.1834, 0.5411, 0.2350]) -Greedy action tensor([-1.8392, -0.4483, 0.6456, -0.0546]) tensor([0.0435, 0.1749, 0.5223, 0.2593]) -Greedy action tensor([-1.7901, -0.4133, 0.6736, 0.0112]) tensor([0.0439, 0.1740, 0.5160, 0.2661]) -Greedy action tensor([-1.9049, -0.3208, 0.6182, -0.1467]) tensor([0.0414, 0.2019, 0.5164, 0.2403]) -Greedy action tensor([-1.7916, -0.9353, 0.3479, -0.3052]) tensor([0.0615, 0.1447, 0.5221, 0.2717]) -Greedy action tensor([-1.8642, -0.4301, 0.6278, -0.1418]) tensor([0.0437, 0.1834, 0.5282, 0.2447]) -Greedy action tensor([-1.2136, 0.4558, 0.2040, 0.3684]) tensor([0.0654, 0.3470, 0.2697, 0.3179]) -Greedy action tensor([-1.1280, -0.8846, 0.9293, 1.2608]) tensor([0.0476, 0.0607, 0.3726, 0.5190]) -Greedy action tensor([-1.0940, -0.3374, 0.2958, 0.2613]) tensor([0.0907, 0.1933, 0.3641, 0.3518]) -Greedy action tensor([-1.8860, -0.7170, 0.8783, 0.2088]) tensor([0.0354, 0.1141, 0.5625, 0.2880]) -Greedy action tensor([-1.9229, -0.4250, 0.6641, -0.1553]) tensor([0.0406, 0.1817, 0.5398, 0.2379]) -Greedy action tensor([-1.5640, -0.5623, 0.4848, 0.0148]) tensor([0.0612, 0.1667, 0.4751, 0.2969]) -Greedy action tensor([-0.2882, -0.1857, 0.1708, 0.1873]) tensor([0.1887, 0.2091, 0.2986, 0.3036]) -Greedy action tensor([-1.3457, -0.2919, 1.3381, 1.1016]) tensor([0.0333, 0.0954, 0.4869, 0.3844]) -Greedy action tensor([-1.5825, -0.4266, 0.6571, 0.2650]) tensor([0.0502, 0.1596, 0.4716, 0.3186]) -Greedy action tensor([-1.7723, -0.2879, 0.5574, -0.1195]) tensor([0.0478, 0.2110, 0.4914, 0.2497]) -Greedy action tensor([-1.8623, -0.4441, 0.6734, -0.1307]) tensor([0.0427, 0.1765, 0.5394, 0.2414]) -Greedy action tensor([-1.6933, -0.1405, 0.5131, -0.1016]) tensor([0.0507, 0.2396, 0.4606, 0.2491]) -Greedy action tensor([-1.6594, -0.2201, 0.7086, 0.1320]) tensor([0.0457, 0.1927, 0.4877, 0.2740]) -Greedy action tensor([-1.8832, -0.4790, 0.6301, -0.1684]) tensor([0.0435, 0.1773, 0.5374, 0.2418]) -Greedy action tensor([-0.9619, 0.2471, 0.1894, -0.0420]) tensor([0.0998, 0.3343, 0.3156, 0.2504]) -Greedy action tensor([-1.8603, -0.4349, 0.6260, -0.1417]) tensor([0.0439, 0.1828, 0.5281, 0.2451]) -Greedy action tensor([-0.9960, -0.6381, 0.1510, 0.6600]) tensor([0.0924, 0.1322, 0.2911, 0.4842]) -Greedy action tensor([-1.6107, -0.0184, 0.5585, 0.0968]) tensor([0.0496, 0.2435, 0.4336, 0.2733]) -Greedy action tensor([-1.9049, -0.4378, 0.6437, -0.1610]) tensor([0.0419, 0.1819, 0.5364, 0.2399]) -Greedy action tensor([-1.6151, -0.5117, 0.5219, -0.0151]) tensor([0.0573, 0.1728, 0.4859, 0.2840]) -Greedy action tensor([-1.8994, -0.4574, 0.6481, -0.1463]) tensor([0.0421, 0.1779, 0.5373, 0.2428]) -Greedy action tensor([-1.9395, -0.4404, 0.6628, -0.1775]) tensor([0.0403, 0.1806, 0.5442, 0.2349]) -Greedy action tensor([-1.6864, -0.1593, 0.5227, 0.0021]) tensor([0.0497, 0.2288, 0.4526, 0.2689]) -Greedy action tensor([-1.3642, -0.4611, 0.3750, 0.1438]) tensor([0.0731, 0.1804, 0.4162, 0.3303]) -Greedy action tensor([-1.8028, -0.2554, 0.6332, -0.3040]) tensor([0.0463, 0.2175, 0.5290, 0.2072]) -Greedy action tensor([-1.5724, -0.5068, 0.4861, 0.1473]) tensor([0.0577, 0.1676, 0.4523, 0.3223]) -Greedy action tensor([-1.8913, -0.4523, 0.6943, -0.1113]) tensor([0.0410, 0.1727, 0.5435, 0.2429]) -Greedy action tensor([-0.3265, -0.6644, 0.3378, -0.1685]) tensor([0.2072, 0.1478, 0.4025, 0.2426]) -Greedy action tensor([ 1.4519, -0.5510, -0.5074, 0.3495]) tensor([0.6219, 0.0839, 0.0877, 0.2065]) -Greedy action tensor([ 1.4202, -0.1592, -0.9806, 0.0570]) tensor([0.6441, 0.1328, 0.0584, 0.1648]) -Greedy action tensor([ 0.9114, -0.1519, -0.5473, 0.1403]) tensor([0.4901, 0.1692, 0.1140, 0.2267]) -Greedy action tensor([ 1.4054, 0.0545, -0.2102, 0.2456]) tensor([0.5645, 0.1462, 0.1122, 0.1770]) -Greedy action tensor([ 1.3264, 0.0403, -0.7362, 0.5710]) tensor([0.5338, 0.1475, 0.0679, 0.2508]) -Greedy action tensor([ 1.2760, -0.4018, -0.3924, 0.2332]) tensor([0.5788, 0.1081, 0.1091, 0.2040]) -Greedy action tensor([ 1.3845, -0.4811, -0.2759, 0.4535]) tensor([0.5750, 0.0890, 0.1093, 0.2267]) -Greedy action tensor([ 1.9144, -0.4430, -0.6285, 0.4804]) tensor([0.7084, 0.0671, 0.0557, 0.1688]) -Greedy action tensor([ 1.4154, -0.7314, -0.4745, 0.5086]) tensor([0.5982, 0.0699, 0.0904, 0.2416]) -Greedy action tensor([ 1.9228, -0.2645, -0.8604, 0.2858]) tensor([0.7307, 0.0820, 0.0452, 0.1422]) -Greedy action tensor([ 0.5500, -0.1704, 0.1520, -0.0382]) tensor([0.3685, 0.1793, 0.2475, 0.2047]) -Greedy action tensor([ 1.1274, -0.2485, -0.1128, -0.0159]) tensor([0.5374, 0.1358, 0.1555, 0.1713]) -Greedy action tensor([ 0.7773, -0.1905, 0.0309, -0.2520]) tensor([0.4522, 0.1718, 0.2144, 0.1616]) -Greedy action tensor([ 1.4491, -0.6408, -0.5102, 0.2293]) tensor([0.6411, 0.0793, 0.0904, 0.1893]) -Greedy action tensor([ 1.1750, -0.2385, -0.9687, 0.1023]) tensor([0.5873, 0.1429, 0.0688, 0.2009]) -Greedy action tensor([ 1.0618, -0.3509, -0.1144, 0.3076]) tensor([0.4945, 0.1204, 0.1525, 0.2326]) -Greedy action tensor([ 1.0015, -0.2058, -0.6241, 0.1206]) tensor([0.5235, 0.1565, 0.1030, 0.2169]) -Greedy action tensor([ 1.5385, -0.7509, -0.2536, 0.2640]) tensor([0.6462, 0.0655, 0.1077, 0.1807]) -Greedy action tensor([ 1.5713, -0.0760, -0.6262, 0.4941]) tensor([0.6082, 0.1171, 0.0676, 0.2071]) -Greedy action tensor([ 1.8138, -0.9324, 0.0688, 0.6426]) tensor([0.6457, 0.0414, 0.1128, 0.2001]) -Greedy action tensor([ 1.2531, -0.3537, -0.2693, 0.2418]) tensor([0.5610, 0.1125, 0.1224, 0.2041]) -Greedy action tensor([ 1.2567, -0.1492, -0.2074, 0.2050]) tensor([0.5477, 0.1343, 0.1267, 0.1913]) -Greedy action tensor([ 2.3186, -1.3586, -0.4323, 0.3391]) tensor([0.8148, 0.0206, 0.0520, 0.1126]) -Greedy action tensor([ 1.2843, -0.4285, -0.1234, 0.2790]) tensor([0.5584, 0.1007, 0.1366, 0.2043]) -Greedy action tensor([ 1.8906, 0.0331, -0.3700, 0.1158]) tensor([0.6994, 0.1091, 0.0729, 0.1186]) -Greedy action tensor([ 0.6882, -0.4619, -0.0087, 0.2158]) tensor([0.4101, 0.1298, 0.2043, 0.2557]) -Greedy action tensor([ 0.8524, -0.0030, -0.0152, 0.3521]) tensor([0.4079, 0.1734, 0.1713, 0.2473]) -Greedy action tensor([ 0.9179, -0.4646, -0.0449, 0.1538]) tensor([0.4765, 0.1196, 0.1820, 0.2219]) -Greedy action tensor([ 1.2042, -0.4292, -0.0206, 0.1120]) tensor([0.5481, 0.1070, 0.1610, 0.1839]) -Greedy action tensor([ 1.4774, -0.4848, -0.2352, 0.4161]) tensor([0.5999, 0.0843, 0.1082, 0.2076]) -Greedy action tensor([ 2.7912, -0.8836, 0.0265, 0.7110]) tensor([0.8242, 0.0209, 0.0519, 0.1030]) -Greedy action tensor([ 1.4567, -0.6615, -0.4705, 0.1822]) tensor([0.6471, 0.0778, 0.0942, 0.1809]) -Greedy action tensor([ 1.7581, -0.2512, -0.4751, 0.2718]) tensor([0.6814, 0.0914, 0.0730, 0.1542]) -Greedy action tensor([ 1.9013, -0.9095, -0.1132, 0.4698]) tensor([0.6981, 0.0420, 0.0931, 0.1668]) -Greedy action tensor([ 0.6970, -0.5095, -0.0710, 0.2040]) tensor([0.4212, 0.1261, 0.1954, 0.2573]) -Greedy action tensor([ 1.5001, -0.2526, -0.5649, 0.4986]) tensor([0.5997, 0.1039, 0.0761, 0.2203]) -Greedy action tensor([ 1.7270, -0.3938, -0.4715, 0.9781]) tensor([0.5869, 0.0704, 0.0651, 0.2776]) -Greedy action tensor([ 1.9164, -0.6999, -0.1830, 0.3955]) tensor([0.7072, 0.0517, 0.0867, 0.1545]) -Greedy action tensor([ 2.1460, -1.1092, -0.2716, 1.0530]) tensor([0.6836, 0.0264, 0.0609, 0.2291]) -Greedy action tensor([ 1.1864, -0.5738, 0.2274, 0.2700]) tensor([0.5115, 0.0880, 0.1960, 0.2046]) -Greedy action tensor([ 1.5369, -0.7340, -0.3253, 0.4774]) tensor([0.6230, 0.0643, 0.0968, 0.2159]) -Greedy action tensor([ 1.3329, -0.0726, -0.0394, 0.1213]) tensor([0.5566, 0.1365, 0.1411, 0.1657]) -Greedy action tensor([ 1.8423, -0.9299, -0.6246, 0.3815]) tensor([0.7249, 0.0453, 0.0615, 0.1682]) -Greedy action tensor([ 1.2290, -0.2029, -0.2973, 0.0833]) tensor([0.5636, 0.1346, 0.1225, 0.1792]) -Greedy action tensor([ 1.8343, -0.0939, -0.1664, 0.7634]) tensor([0.6160, 0.0896, 0.0833, 0.2111]) -Greedy action tensor([ 1.6571, 0.1220, -1.2746, 0.5798]) tensor([0.6214, 0.1339, 0.0331, 0.2116]) -Greedy action tensor([ 1.5063, -0.1399, -0.5649, 0.0761]) tensor([0.6418, 0.1237, 0.0809, 0.1536]) -Greedy action tensor([ 1.0985, -0.1298, -0.8035, 0.3750]) tensor([0.5189, 0.1519, 0.0775, 0.2517]) -Greedy action tensor([ 1.4809, -0.4719, -0.4391, 0.1939]) tensor([0.6392, 0.0907, 0.0937, 0.1765]) -Greedy action tensor([ 1.3724, -0.3100, -0.7539, 0.6823]) tensor([0.5535, 0.1029, 0.0660, 0.2776]) -Greedy action tensor([ 1.0694, -0.4065, -0.5443, -0.0491]) tensor([0.5700, 0.1303, 0.1135, 0.1863]) -Greedy action tensor([ 1.2214, -0.2380, -0.6356, -0.0394]) tensor([0.5981, 0.1390, 0.0934, 0.1695]) -Greedy action tensor([ 2.4099, -1.2764, -0.3886, 0.4002]) tensor([0.8197, 0.0205, 0.0499, 0.1099]) -Greedy action tensor([ 1.0997, -0.2412, -0.1896, -0.0189]) tensor([0.5365, 0.1404, 0.1478, 0.1753]) -Greedy action tensor([ 1.6312, -0.7453, -0.3598, 0.4687]) tensor([0.6484, 0.0602, 0.0886, 0.2028]) -Greedy action tensor([ 1.2776, -0.2020, -0.9595, 0.6862]) tensor([0.5296, 0.1206, 0.0565, 0.2932]) -Greedy action tensor([ 1.1055, -0.1255, -0.5487, 0.2135]) tensor([0.5282, 0.1542, 0.1010, 0.2165]) -Greedy action tensor([ 1.7221, -0.4763, -0.4338, 0.2702]) tensor([0.6845, 0.0760, 0.0793, 0.1603]) -Greedy action tensor([ 1.5875, -0.6673, -0.4173, -0.0560]) tensor([0.6979, 0.0732, 0.0940, 0.1349]) -Greedy action tensor([ 1.1539, -0.0863, -0.4062, 0.3973]) tensor([0.5080, 0.1470, 0.1067, 0.2384]) -Greedy action tensor([ 1.1502, -0.2319, -0.2822, 0.4433]) tensor([0.5043, 0.1266, 0.1204, 0.2487]) -Greedy action tensor([ 1.2561, 0.3869, -0.7973, 0.2292]) tensor([0.5247, 0.2200, 0.0673, 0.1879]) -Greedy action tensor([ 1.8697, -0.5829, 0.1032, 0.3641]) tensor([0.6762, 0.0582, 0.1156, 0.1500]) -Greedy action tensor([ 1.3490, -0.7351, -0.1412, -0.0193]) tensor([0.6233, 0.0776, 0.1405, 0.1587]) -Greedy action tensor([ 1.4434, -0.6701, -0.1518, 0.2057]) tensor([0.6197, 0.0749, 0.1257, 0.1797]) -Greedy action tensor([ 1.7614, -0.6453, -0.0978, 0.4957]) tensor([0.6545, 0.0590, 0.1020, 0.1846]) -Greedy action tensor([ 1.7073, -0.4967, -1.1706, 0.2229]) tensor([0.7177, 0.0792, 0.0404, 0.1627]) -Greedy action tensor([ 1.4259, -0.5420, -0.3695, 0.5390]) tensor([0.5822, 0.0814, 0.0967, 0.2398]) -Greedy action tensor([ 1.7158, -0.4315, -0.7641, 0.4049]) tensor([0.6802, 0.0794, 0.0570, 0.1834]) -Greedy action tensor([ 1.3435, -0.3366, -0.3308, 0.2284]) tensor([0.5877, 0.1095, 0.1101, 0.1927]) -Greedy action tensor([ 1.5519, -0.4780, -0.2173, 0.3014]) tensor([0.6296, 0.0827, 0.1073, 0.1803]) -Greedy action tensor([ 1.4014, -0.6839, -0.1819, 0.1823]) tensor([0.6154, 0.0765, 0.1263, 0.1818]) -Greedy action tensor([ 1.7136, -0.2953, -0.2263, -0.0529]) tensor([0.6902, 0.0926, 0.0992, 0.1180]) -Greedy action tensor([ 1.5670, -0.6159, -0.3135, 0.1967]) tensor([0.6582, 0.0742, 0.1004, 0.1672]) -Greedy action tensor([ 1.1069, -0.4018, -0.4473, -0.1070]) tensor([0.5782, 0.1279, 0.1222, 0.1717]) -Greedy action tensor([ 1.0738, -0.0436, -0.2811, 0.6495]) tensor([0.4466, 0.1461, 0.1152, 0.2922]) -Greedy action tensor([ 1.9845, -0.6860, -0.4383, 0.6303]) tensor([0.7062, 0.0489, 0.0626, 0.1823]) -Greedy action tensor([ 1.7691e+00, -4.3656e-01, -4.8350e-04, 5.4279e-01]) tensor([0.6354, 0.0700, 0.1083, 0.1864]) -Greedy action tensor([ 1.6828, -0.4737, -0.5270, 0.2545]) tensor([0.6825, 0.0790, 0.0749, 0.1636]) -Greedy action tensor([ 1.7643, -0.8866, -0.3319, 0.4870]) tensor([0.6792, 0.0479, 0.0835, 0.1894]) -Greedy action tensor([ 1.3804, -0.1277, -1.0300, 0.6177]) tensor([0.5626, 0.1245, 0.0505, 0.2624]) -Greedy action tensor([ 0.5330, -0.2099, -0.0151, 0.0021]) tensor([0.3785, 0.1801, 0.2188, 0.2226]) -Greedy action tensor([ 1.1712, -0.8414, -0.1958, -0.4768]) tensor([0.6325, 0.0845, 0.1612, 0.1217]) -Greedy action tensor([ 0.6939, -0.4524, -0.0695, -0.2907]) tensor([0.4635, 0.1473, 0.2160, 0.1732]) -Greedy action tensor([ 0.3682, 0.1215, 0.1610, -0.3513]) tensor([0.3245, 0.2536, 0.2638, 0.1580]) -Greedy action tensor([ 0.8768, -0.6020, 0.0966, -0.4514]) tensor([0.5125, 0.1168, 0.2349, 0.1358]) -Greedy action tensor([ 0.7200, -0.4568, -0.0844, -0.2567]) tensor([0.4690, 0.1446, 0.2098, 0.1766]) -Greedy action tensor([ 0.7948, -0.3317, -0.3292, -0.4394]) tensor([0.5154, 0.1671, 0.1675, 0.1500]) -Greedy action tensor([ 0.7821, -0.5958, -0.1637, -0.4396]) tensor([0.5168, 0.1303, 0.2007, 0.1523]) -Greedy action tensor([ 1.1774, -0.8328, -0.0786, -0.5148]) tensor([0.6239, 0.0836, 0.1777, 0.1149]) -Greedy action tensor([ 0.8989, -0.7267, 0.1133, -0.6075]) tensor([0.5335, 0.1050, 0.2432, 0.1183]) -Greedy action tensor([ 0.7551, -0.0172, 0.0731, -0.6483]) tensor([0.4518, 0.2087, 0.2284, 0.1110]) -Greedy action tensor([ 0.8248, -0.3748, 0.1879, -0.6360]) tensor([0.4849, 0.1461, 0.2565, 0.1125]) -Greedy action tensor([ 1.0406, -0.4697, -0.3129, -0.4125]) tensor([0.5838, 0.1289, 0.1508, 0.1365]) -Greedy action tensor([ 0.3810, -0.2155, -0.1002, -0.2211]) tensor([0.3681, 0.2027, 0.2275, 0.2016]) -Greedy action tensor([ 0.5929, -0.7363, -0.1884, -0.2829]) tensor([0.4675, 0.1237, 0.2140, 0.1947]) -Greedy action tensor([ 0.7166, -0.4398, -0.0800, -0.0156]) tensor([0.4452, 0.1401, 0.2007, 0.2141]) -Greedy action tensor([ 0.5851, 0.4263, -0.0684, -0.2081]) tensor([0.3539, 0.3019, 0.1841, 0.1601]) -Greedy action tensor([ 0.3113, -0.0852, -0.1405, -0.4391]) tensor([0.3595, 0.2419, 0.2288, 0.1698]) -Greedy action tensor([ 0.2938, -0.0260, -0.1481, -0.1089]) tensor([0.3292, 0.2391, 0.2116, 0.2201]) -Greedy action tensor([ 0.9593, -1.0538, 0.0737, -0.5336]) tensor([0.5647, 0.0754, 0.2329, 0.1269]) -Greedy action tensor([ 0.3300, 0.1578, -0.0037, 0.1071]) tensor([0.2978, 0.2507, 0.2133, 0.2383]) -Greedy action tensor([ 0.7013, -0.4631, -0.0427, -0.4723]) tensor([0.4770, 0.1489, 0.2267, 0.1475]) -Greedy action tensor([ 0.9540, -0.7284, 0.0937, -0.3286]) tensor([0.5302, 0.0986, 0.2243, 0.1470]) -Greedy action tensor([ 0.3615, 0.0244, -0.0036, -0.7566]) tensor([0.3656, 0.2610, 0.2538, 0.1195]) -Greedy action tensor([ 0.9072, -0.6560, -0.0838, -0.3387]) tensor([0.5352, 0.1121, 0.1987, 0.1540]) -Greedy action tensor([ 0.9388, -0.5111, 0.0540, -0.5585]) tensor([0.5344, 0.1254, 0.2206, 0.1196]) -Greedy action tensor([ 0.9734, -0.7151, -0.3433, -0.6491]) tensor([0.6060, 0.1120, 0.1624, 0.1196]) -Greedy action tensor([ 0.9460, -0.7654, 0.0653, -0.7837]) tensor([0.5642, 0.1019, 0.2339, 0.1001]) -Greedy action tensor([ 0.8944, -0.5097, 0.0138, -0.4599]) tensor([0.5213, 0.1280, 0.2161, 0.1346]) -Greedy action tensor([ 0.7617, -0.3254, 0.0163, -0.1856]) tensor([0.4546, 0.1533, 0.2158, 0.1763]) -Greedy action tensor([ 0.4754, 0.3551, -0.1809, 0.2440]) tensor([0.3126, 0.2772, 0.1622, 0.2480]) -Greedy action tensor([ 0.5673, -0.3076, -0.0159, -0.0773]) tensor([0.4000, 0.1668, 0.2232, 0.2100]) -Greedy action tensor([ 0.7846, -0.3266, 0.0685, -0.3497]) tensor([0.4674, 0.1539, 0.2284, 0.1503]) -Greedy action tensor([ 0.6096, -0.1204, -0.0594, -0.3164]) tensor([0.4184, 0.2016, 0.2143, 0.1657]) -Greedy action tensor([ 0.8870, -0.6449, 0.0218, -0.2598]) tensor([0.5116, 0.1106, 0.2154, 0.1625]) -Greedy action tensor([ 0.9279, -0.9868, -0.1040, -0.8887]) tensor([0.6001, 0.0885, 0.2138, 0.0976]) -Greedy action tensor([ 0.5780, -0.2689, -0.1477, -0.3559]) tensor([0.4337, 0.1859, 0.2099, 0.1705]) -Greedy action tensor([ 0.7836, -0.3578, -0.0825, -0.2688]) tensor([0.4787, 0.1529, 0.2013, 0.1671]) -Greedy action tensor([ 0.8850, -0.1020, -0.0350, -0.0637]) tensor([0.4633, 0.1727, 0.1846, 0.1794]) -Greedy action tensor([ 0.6668, -0.0816, -0.0271, 0.0049]) tensor([0.4018, 0.1901, 0.2008, 0.2073]) -Greedy action tensor([ 0.7445, -0.4790, 0.0832, -0.3668]) tensor([0.4674, 0.1375, 0.2413, 0.1538]) -Greedy action tensor([ 0.7227, -0.6339, -0.0437, -0.2085]) tensor([0.4725, 0.1217, 0.2196, 0.1862]) -Greedy action tensor([ 0.5047, -0.1572, -0.0012, -0.0739]) tensor([0.3732, 0.1925, 0.2250, 0.2092]) -Greedy action tensor([ 0.7383, -0.9561, 0.0431, -0.4750]) tensor([0.5051, 0.0928, 0.2520, 0.1501]) -Greedy action tensor([ 0.9870, -0.5298, 0.0083, -0.3900]) tensor([0.5413, 0.1188, 0.2034, 0.1366]) -Greedy action tensor([ 0.6903, -0.4083, -0.0740, -0.4097]) tensor([0.4691, 0.1564, 0.2184, 0.1561]) -Greedy action tensor([ 0.7019, -0.5339, 0.0556, -0.1517]) tensor([0.4463, 0.1297, 0.2339, 0.1901]) -Greedy action tensor([ 0.8796, 0.4019, 0.1590, -0.5149]) tensor([0.4247, 0.2634, 0.2066, 0.1053]) -Greedy action tensor([ 0.9041, -0.2540, -0.0602, -0.1052]) tensor([0.4855, 0.1525, 0.1851, 0.1770]) -Greedy action tensor([ 0.6540, -0.4701, 0.0074, -0.4840]) tensor([0.4610, 0.1498, 0.2415, 0.1477]) -Greedy action tensor([ 0.6510, -0.4443, 0.0105, -0.2660]) tensor([0.4422, 0.1479, 0.2331, 0.1768]) -Greedy action tensor([ 0.2497, -0.1234, -0.0099, -0.1145]) tensor([0.3170, 0.2183, 0.2445, 0.2202]) -Greedy action tensor([ 0.8032, -0.2862, -0.0560, -0.1203]) tensor([0.4636, 0.1560, 0.1963, 0.1841]) -Greedy action tensor([ 0.6975, -0.2843, -0.0971, -0.1269]) tensor([0.4415, 0.1654, 0.1995, 0.1936]) -Greedy action tensor([ 1.1332, -0.7734, 0.0351, -0.6773]) tensor([0.6077, 0.0903, 0.2027, 0.0994]) -Greedy action tensor([ 0.2517, 0.6750, -0.2938, -0.2111]) tensor([0.2677, 0.4087, 0.1551, 0.1685]) -Greedy action tensor([ 0.5938, -0.4565, -0.1361, -0.3091]) tensor([0.4470, 0.1564, 0.2154, 0.1812]) -Greedy action tensor([ 0.3443, -0.2429, 0.0270, -0.1097]) tensor([0.3426, 0.1904, 0.2494, 0.2176]) -Greedy action tensor([ 0.7800, -0.5747, -0.0511, -0.4287]) tensor([0.5020, 0.1295, 0.2186, 0.1499]) -Greedy action tensor([ 0.7627, -0.4716, -0.0632, -0.4291]) tensor([0.4920, 0.1432, 0.2154, 0.1494]) -Greedy action tensor([ 0.8034, -0.6975, 0.0885, -0.5519]) tensor([0.5076, 0.1132, 0.2483, 0.1309]) -Greedy action tensor([ 0.5949, -0.2743, -0.0851, -0.2891]) tensor([0.4275, 0.1793, 0.2166, 0.1766]) -Greedy action tensor([ 0.6034, -0.5194, 0.0156, -0.1806]) tensor([0.4278, 0.1392, 0.2377, 0.1953]) -Greedy action tensor([ 0.5049, 0.3024, -0.1940, 0.1661]) tensor([0.3304, 0.2699, 0.1643, 0.2355]) -Greedy action tensor([ 0.2743, -0.1379, -0.0712, -0.2146]) tensor([0.3352, 0.2220, 0.2373, 0.2056]) -Greedy action tensor([ 0.3017, -0.2228, -0.0898, -0.2846]) tensor([0.3541, 0.2096, 0.2394, 0.1970]) -Greedy action tensor([ 0.6821, -0.6435, -0.1719, -0.2983]) tensor([0.4839, 0.1285, 0.2060, 0.1815]) -Greedy action tensor([ 0.4053, 0.0793, -0.0574, -0.2539]) tensor([0.3486, 0.2516, 0.2195, 0.1803]) -Greedy action tensor([ 0.9790, -0.5825, -0.2630, -0.2441]) tensor([0.5577, 0.1170, 0.1611, 0.1642]) -Greedy action tensor([ 0.4587, -0.2199, -0.0682, -0.0879]) tensor([0.3736, 0.1895, 0.2206, 0.2163]) -Greedy action tensor([ 0.5976, -0.1757, -0.0869, -0.3190]) tensor([0.4227, 0.1951, 0.2132, 0.1690]) -Greedy action tensor([ 0.7246, -0.5326, -0.0829, -0.3009]) tensor([0.4787, 0.1362, 0.2135, 0.1717]) -Greedy action tensor([ 0.6473, -0.2562, -0.0331, -0.0776]) tensor([0.4174, 0.1691, 0.2114, 0.2022]) -Greedy action tensor([ 0.7079, -0.2831, -0.0449, -0.2338]) tensor([0.4480, 0.1663, 0.2110, 0.1747]) -Greedy action tensor([ 0.5521, 0.0629, -0.1092, -0.4304]) tensor([0.3994, 0.2449, 0.2062, 0.1495]) -Greedy action tensor([ 0.5815, -0.2968, -0.1292, -0.2584]) tensor([0.4276, 0.1777, 0.2101, 0.1846]) -Greedy action tensor([ 0.4186, -0.0058, -0.1923, 0.0036]) tensor([0.3500, 0.2289, 0.1900, 0.2311]) -Greedy action tensor([ 0.8921, -0.7835, -0.0532, -0.4354]) tensor([0.5432, 0.1017, 0.2111, 0.1440]) -Greedy action tensor([ 0.3651, 0.0252, -0.1332, -0.0401]) tensor([0.3349, 0.2384, 0.2034, 0.2233]) -Greedy action tensor([ 0.7351, -0.4863, -0.1391, -0.4018]) tensor([0.4919, 0.1450, 0.2052, 0.1578]) -Greedy action tensor([ 0.8123, -0.6038, 0.1730, -0.6978]) tensor([0.5022, 0.1219, 0.2650, 0.1109]) -Greedy action tensor([ 1.0296, -0.7074, 0.2207, 1.1472]) tensor([0.3642, 0.0641, 0.1622, 0.4096]) -Greedy action tensor([-0.9388, 0.6335, 0.5156, -0.9935]) tensor([0.0905, 0.4361, 0.3876, 0.0857]) -Greedy action tensor([ 0.9528, 0.3312, -0.1697, -0.0415]) tensor([0.4479, 0.2406, 0.1458, 0.1657]) -Greedy action tensor([ 0.4820, -0.0380, -0.5396, -0.4815]) tensor([0.4281, 0.2545, 0.1541, 0.1633]) -Greedy action tensor([ 1.0742, -1.8498, 0.2328, 0.3759]) tensor([0.5045, 0.0271, 0.2175, 0.2509]) -Greedy action tensor([ 1.1238, -1.6677, 0.4313, 0.6160]) tensor([0.4622, 0.0283, 0.2312, 0.2782]) -Greedy action tensor([-0.0151, -1.4554, -0.3812, -0.2595]) tensor([0.3685, 0.0873, 0.2556, 0.2886]) -Greedy action tensor([ 0.5043, -0.0097, -0.5064, 0.9077]) tensor([0.2891, 0.1729, 0.1052, 0.4328]) -Greedy action tensor([-0.4720, 0.7015, -0.0333, -0.7923]) tensor([0.1536, 0.4967, 0.2382, 0.1115]) -Greedy action tensor([ 0.7088, 0.3216, 1.7610, -0.8999]) tensor([0.2108, 0.1431, 0.6038, 0.0422]) -Greedy action tensor([ 0.1091, -2.2698, 0.3181, -0.2610]) tensor([0.3316, 0.0307, 0.4087, 0.2290]) -Greedy action tensor([-0.6897, 0.7445, 0.7204, -0.4201]) tensor([0.0943, 0.3958, 0.3864, 0.1235]) -Greedy action tensor([ 0.3462, -1.0889, 1.1208, -0.7129]) tensor([0.2663, 0.0634, 0.5779, 0.0924]) -Greedy action tensor([-0.6463, -0.8634, 0.0725, -0.6224]) tensor([0.2049, 0.1649, 0.4204, 0.2098]) -Greedy action tensor([-0.2084, -1.3600, 0.1606, -0.8103]) tensor([0.3021, 0.0955, 0.4369, 0.1655]) -Greedy action tensor([-0.9778, -0.7931, 1.8738, 0.4381]) tensor([0.0423, 0.0509, 0.7325, 0.1743]) -Greedy action tensor([ 0.3572, -0.2927, -0.2956, 0.1495]) tensor([0.3502, 0.1829, 0.1823, 0.2846]) -Greedy action tensor([-0.4726, -0.0909, -0.1812, -0.9566]) tensor([0.2263, 0.3314, 0.3028, 0.1395]) -Greedy action tensor([ 0.0613, -1.2297, 1.1596, -0.4049]) tensor([0.2040, 0.0561, 0.6119, 0.1280]) -Greedy action tensor([ 0.4559, -0.0264, -0.2866, -0.5087]) tensor([0.4041, 0.2495, 0.1923, 0.1540]) -Greedy action tensor([ 0.9782, -0.5325, 0.3013, -0.0965]) tensor([0.4830, 0.1066, 0.2455, 0.1649]) -Greedy action tensor([-0.3694, 0.8067, -0.5306, 0.0773]) tensor([0.1502, 0.4870, 0.1279, 0.2348]) -Greedy action tensor([-1.6307, -1.1545, 0.2860, 0.1303]) tensor([0.0657, 0.1057, 0.4465, 0.3821]) -Greedy action tensor([ 0.2103, -1.1981, -0.8939, 1.4862]) tensor([0.1939, 0.0474, 0.0643, 0.6944]) -Greedy action tensor([0.4089, 0.0622, 0.0256, 1.9831]) tensor([0.1386, 0.0980, 0.0945, 0.6690]) -Greedy action tensor([ 0.4559, -1.3367, 1.5571, 0.6677]) tensor([0.1848, 0.0308, 0.5559, 0.2284]) -Greedy action tensor([-0.1292, -0.1533, 0.9952, -0.6439]) tensor([0.1769, 0.1727, 0.5446, 0.1057]) -Greedy action tensor([-0.6127, -0.7864, 0.8434, -0.3513]) tensor([0.1346, 0.1132, 0.5774, 0.1748]) -Greedy action tensor([-1.0114, -0.7547, -1.3621, 0.0637]) tensor([0.1687, 0.2181, 0.1188, 0.4944]) -Greedy action tensor([-0.1913, 0.5939, 0.4954, -0.4806]) tensor([0.1687, 0.3699, 0.3352, 0.1263]) -Greedy action tensor([ 0.3495, -1.0024, 0.0259, -0.4409]) tensor([0.4105, 0.1062, 0.2970, 0.1862]) -Greedy action tensor([1.4529, 1.1078, 0.2578, 0.2296]) tensor([0.4338, 0.3072, 0.1313, 0.1277]) -Greedy action tensor([ 0.8620, -1.5408, 0.7038, 0.4628]) tensor([0.3824, 0.0346, 0.3265, 0.2565]) -Greedy action tensor([ 1.5962, -0.0323, 1.2677, 0.6405]) tensor([0.4346, 0.0853, 0.3129, 0.1671]) -Greedy action tensor([-1.3595, -0.7209, 0.7301, -0.3982]) tensor([0.0736, 0.1394, 0.5947, 0.1924]) -Greedy action tensor([-0.2437, 0.7976, 0.4306, -1.1774]) tensor([0.1616, 0.4577, 0.3171, 0.0635]) -Greedy action tensor([-0.9492, 0.1200, 0.1392, -0.6331]) tensor([0.1211, 0.3529, 0.3598, 0.1662]) -Greedy action tensor([1.1631, 0.4309, 0.2450, 0.3044]) tensor([0.4341, 0.2087, 0.1733, 0.1839]) -Greedy action tensor([ 1.3382, -0.9359, 0.1874, 0.1748]) tensor([0.5775, 0.0594, 0.1827, 0.1804]) -Greedy action tensor([ 0.4468, 0.0767, 0.9862, -0.5933]) tensor([0.2660, 0.1837, 0.4562, 0.0940]) -Greedy action tensor([ 0.4232, -1.2593, 0.4173, 0.0016]) tensor([0.3526, 0.0656, 0.3505, 0.2313]) -Greedy action tensor([ 0.0363, 0.0388, 0.8141, -0.9104]) tensor([0.2190, 0.2195, 0.4766, 0.0850]) -Greedy action tensor([ 0.8997, -0.3111, 0.6272, 1.0037]) tensor([0.3156, 0.0940, 0.2403, 0.3501]) -Greedy action tensor([ 0.5977, -0.3307, 0.4459, -1.0231]) tensor([0.4078, 0.1612, 0.3504, 0.0806]) -Greedy action tensor([-1.3438, -0.0404, -1.4111, 0.0426]) tensor([0.1040, 0.3828, 0.0972, 0.4160]) -Greedy action tensor([-0.5946, -0.6590, 0.3033, -0.0526]) tensor([0.1636, 0.1534, 0.4016, 0.2814]) -Greedy action tensor([ 0.7893, -1.1615, 1.4968, 0.3230]) tensor([0.2633, 0.0374, 0.5341, 0.1652]) -Greedy action tensor([ 1.8537, -1.3219, 0.7737, 1.3425]) tensor([0.5047, 0.0211, 0.1714, 0.3028]) -Greedy action tensor([-0.8266, 0.1741, 1.4277, -1.4192]) tensor([0.0725, 0.1971, 0.6904, 0.0401]) -Greedy action tensor([0.5870, 0.1623, 0.4339, 0.3877]) tensor([0.3002, 0.1963, 0.2576, 0.2459]) -Greedy action tensor([ 0.5865, -1.2812, 0.6316, 0.1764]) tensor([0.3491, 0.0539, 0.3653, 0.2317]) -Greedy action tensor([ 0.3851, -0.3349, -0.0917, -0.3009]) tensor([0.3830, 0.1864, 0.2377, 0.1929]) -Greedy action tensor([ 1.0355, -1.0270, 0.6489, 0.9183]) tensor([0.3709, 0.0472, 0.2520, 0.3299]) -Greedy action tensor([ 0.1600, 0.1018, -0.4389, 0.6476]) tensor([0.2426, 0.2289, 0.1333, 0.3951]) -Greedy action tensor([ 0.2147, -1.5999, 0.8348, -0.4088]) tensor([0.2810, 0.0458, 0.5225, 0.1507]) -Greedy action tensor([-2.4027, -0.5589, 1.1962, -1.0311]) tensor([0.0209, 0.1322, 0.7645, 0.0824]) -Greedy action tensor([ 0.7518, -0.1889, 0.2509, -0.5542]) tensor([0.4411, 0.1722, 0.2673, 0.1195]) -Greedy action tensor([ 0.6897, -0.5274, 0.7211, -0.2763]) tensor([0.3692, 0.1093, 0.3810, 0.1405]) -Greedy action tensor([ 1.3764, -0.0190, 0.9372, 1.0688]) tensor([0.3806, 0.0943, 0.2453, 0.2798]) -Greedy action tensor([ 0.6018, -1.5023, 0.4522, -0.2707]) tensor([0.4165, 0.0508, 0.3586, 0.1741]) -Greedy action tensor([ 0.2081, -0.4467, 0.2714, -0.6126]) tensor([0.3306, 0.1717, 0.3522, 0.1455]) -Greedy action tensor([ 0.0962, -0.2676, -0.3256, -0.2309]) tensor([0.3255, 0.2263, 0.2135, 0.2347]) -Greedy action tensor([ 0.1079, -0.9245, 0.0370, -0.3741]) tensor([0.3442, 0.1226, 0.3207, 0.2126]) -Greedy action tensor([ 0.1115, 0.6189, -0.0180, 0.7492]) tensor([0.1841, 0.3058, 0.1617, 0.3484]) -Greedy action tensor([-0.0396, -0.7245, -0.2931, 1.7172]) tensor([0.1238, 0.0624, 0.0961, 0.7176]) -Greedy action tensor([ 0.0301, -1.0293, -0.7657, -0.3171]) tensor([0.3993, 0.1384, 0.1802, 0.2822]) -Greedy action tensor([ 1.0418, -0.3443, -0.0379, -0.0165]) tensor([0.5163, 0.1291, 0.1754, 0.1792]) -Greedy action tensor([1.0760, 0.0813, 0.1192, 0.2265]) tensor([0.4584, 0.1695, 0.1761, 0.1960]) -Greedy action tensor([-0.2476, -1.4812, -0.1487, 1.5596]) tensor([0.1178, 0.0343, 0.1301, 0.7178]) -Greedy action tensor([-0.2470, -0.5221, 0.2423, 0.2671]) tensor([0.1975, 0.1500, 0.3222, 0.3303]) -Greedy action tensor([-0.4971, -0.0999, -0.4811, -0.4289]) tensor([0.2186, 0.3252, 0.2221, 0.2340]) -Greedy action tensor([-0.5978, -0.1600, 0.5626, -0.5370]) tensor([0.1470, 0.2277, 0.4691, 0.1562]) -Greedy action tensor([-0.4327, -1.1331, 0.0815, 0.3485]) tensor([0.1868, 0.0927, 0.3124, 0.4080]) -Greedy action tensor([ 0.2011, 0.6009, -0.0597, -0.8146]) tensor([0.2759, 0.4116, 0.2126, 0.0999]) -Greedy action tensor([-0.6501, -0.7537, 0.5162, -0.9913]) tensor([0.1717, 0.1548, 0.5513, 0.1221]) -Greedy action tensor([ 0.3271, -0.8050, -0.6695, -0.0913]) tensor([0.4256, 0.1372, 0.1571, 0.2801]) -Greedy action tensor([ 0.6195, -0.6256, 0.1923, 0.3499]) tensor([0.3698, 0.1065, 0.2413, 0.2824]) -Greedy action tensor([ 0.6177, -0.5970, -0.2702, 1.1012]) tensor([0.3003, 0.0891, 0.1236, 0.4870]) -Greedy action tensor([-1.2378, -0.6452, -0.9246, -0.3636]) tensor([0.1521, 0.2751, 0.2081, 0.3647]) -Greedy action tensor([-0.6117, -1.6266, 1.3971, -0.0367]) tensor([0.0944, 0.0342, 0.7037, 0.1677]) -Greedy action tensor([ 1.2721, -1.0523, 0.3980, -0.3998]) tensor([0.5872, 0.0575, 0.2450, 0.1103]) -Greedy action tensor([ 0.2920, -1.4518, 0.0373, -0.5602]) tensor([0.4208, 0.0736, 0.3262, 0.1795]) -Greedy action tensor([-1.9097, -0.4374, 0.6554, -0.1615]) tensor([0.0415, 0.1809, 0.5394, 0.2383]) -Greedy action tensor([-1.8817, -0.3556, 0.6180, -0.1477]) tensor([0.0427, 0.1962, 0.5195, 0.2416]) -Greedy action tensor([-1.7863, -0.5069, 0.5831, -0.0882]) tensor([0.0482, 0.1732, 0.5152, 0.2633]) -Greedy action tensor([-1.9046, -0.4445, 0.6465, -0.1601]) tensor([0.0419, 0.1806, 0.5376, 0.2400]) -Greedy action tensor([-1.4583, 0.0255, 0.3693, 0.1487]) tensor([0.0602, 0.2654, 0.3743, 0.3002]) -Greedy action tensor([-0.7690, -0.4715, 1.1400, 1.4919]) tensor([0.0535, 0.0721, 0.3610, 0.5134]) -Greedy action tensor([-1.8422, -0.2092, 0.5756, -0.1119]) tensor([0.0435, 0.2227, 0.4882, 0.2455]) -Greedy action tensor([-1.9150, -0.4305, 0.6488, -0.1717]) tensor([0.0415, 0.1830, 0.5385, 0.2370]) -Greedy action tensor([-1.3690, -0.5506, 0.4206, 0.1703]) tensor([0.0719, 0.1629, 0.4302, 0.3350]) -Greedy action tensor([-1.8762, -0.4239, 0.6309, -0.1651]) tensor([0.0433, 0.1852, 0.5317, 0.2398]) -Greedy action tensor([-1.9057, -0.4491, 0.6524, -0.1583]) tensor([0.0418, 0.1792, 0.5392, 0.2397]) -Greedy action tensor([0.2724, 0.6318, 0.3108, 0.8586]) tensor([0.1898, 0.2719, 0.1972, 0.3411]) -Greedy action tensor([-1.8719, -0.4280, 0.6302, -0.1454]) tensor([0.0434, 0.1837, 0.5293, 0.2437]) -Greedy action tensor([-1.9028, -0.3102, 0.6312, -0.1573]) tensor([0.0412, 0.2027, 0.5198, 0.2362]) -Greedy action tensor([-1.9160, -0.4280, 0.6494, -0.1608]) tensor([0.0413, 0.1829, 0.5370, 0.2388]) -Greedy action tensor([-1.7696, -0.4503, 0.5945, -0.1575]) tensor([0.0490, 0.1835, 0.5216, 0.2459]) -Greedy action tensor([-1.8622, -0.4509, 0.6309, -0.1460]) tensor([0.0439, 0.1802, 0.5315, 0.2444]) -Greedy action tensor([-1.5283, 0.4684, 0.3248, 0.2062]) tensor([0.0490, 0.3608, 0.3126, 0.2776]) -Greedy action tensor([-1.7850, -0.3040, 0.6068, -0.0737]) tensor([0.0457, 0.2011, 0.5000, 0.2532]) -Greedy action tensor([-1.9351, -0.4351, 0.6623, -0.1756]) tensor([0.0405, 0.1813, 0.5432, 0.2350]) -Greedy action tensor([-1.2862, -0.0996, 0.4128, 0.3523]) tensor([0.0671, 0.2200, 0.3672, 0.3457]) -Greedy action tensor([-1.8874, -0.3942, 0.6371, -0.1477]) tensor([0.0423, 0.1884, 0.5283, 0.2410]) -Greedy action tensor([-1.9362, -0.4383, 0.6632, -0.1755]) tensor([0.0404, 0.1807, 0.5438, 0.2351]) -Greedy action tensor([-1.7363, -0.4509, 0.5738, -0.0629]) tensor([0.0499, 0.1806, 0.5032, 0.2662]) -Greedy action tensor([-1.8108, -0.3042, 0.5898, -0.0830]) tensor([0.0451, 0.2035, 0.4975, 0.2539]) -Greedy action tensor([-1.8975, -0.3582, 0.6333, -0.1448]) tensor([0.0417, 0.1943, 0.5236, 0.2405]) -Greedy action tensor([-1.6208, -0.5075, 0.8139, -0.4977]) tensor([0.0540, 0.1643, 0.6159, 0.1659]) -Greedy action tensor([-1.9453, -0.4476, 0.6672, -0.1805]) tensor([0.0401, 0.1792, 0.5465, 0.2341]) -Greedy action tensor([-1.9319, -0.4323, 0.6614, -0.1681]) tensor([0.0405, 0.1815, 0.5417, 0.2363]) -Greedy action tensor([-1.9264, -0.3932, 0.6469, -0.1704]) tensor([0.0408, 0.1889, 0.5344, 0.2360]) -Greedy action tensor([-1.8189, -0.3443, 0.6126, -0.0973]) tensor([0.0448, 0.1956, 0.5093, 0.2504]) -Greedy action tensor([-1.9378, -0.4459, 0.6674, -0.1752]) tensor([0.0403, 0.1792, 0.5456, 0.2349]) -Greedy action tensor([-1.7074, 0.2932, 0.4397, -0.0497]) tensor([0.0450, 0.3330, 0.3856, 0.2364]) -Greedy action tensor([-1.0372, -0.5888, 0.2675, 0.1181]) tensor([0.1061, 0.1661, 0.3911, 0.3368]) -Greedy action tensor([-1.3330, -0.4825, 0.4929, 0.3252]) tensor([0.0676, 0.1582, 0.4195, 0.3547]) -Greedy action tensor([-1.7200, -0.0032, 0.4960, -0.1057]) tensor([0.0482, 0.2681, 0.4417, 0.2420]) -Greedy action tensor([-1.7866, -0.3227, 0.5732, -0.0645]) tensor([0.0465, 0.2010, 0.4923, 0.2602]) -Greedy action tensor([-1.7249, 0.0574, 0.5115, 0.0640]) tensor([0.0449, 0.2667, 0.4200, 0.2685]) -Greedy action tensor([-1.9265, -0.4397, 0.6580, -0.1704]) tensor([0.0409, 0.1807, 0.5418, 0.2366]) -Greedy action tensor([-1.9284, -0.4433, 0.6606, -0.1711]) tensor([0.0408, 0.1800, 0.5429, 0.2363]) -Greedy action tensor([-1.9032, -0.4448, 0.6518, -0.1572]) tensor([0.0418, 0.1799, 0.5385, 0.2398]) -Greedy action tensor([-1.6315, -0.1623, 0.6072, 0.0355]) tensor([0.0499, 0.2170, 0.4685, 0.2645]) -Greedy action tensor([-1.7691, -0.0078, 0.5151, -0.0576]) tensor([0.0451, 0.2625, 0.4427, 0.2497]) -Greedy action tensor([-1.7620, -0.4116, 0.6816, -0.0375]) tensor([0.0455, 0.1755, 0.5238, 0.2552]) -Greedy action tensor([-0.7258, 0.2671, 0.2996, 0.7643]) tensor([0.0915, 0.2471, 0.2552, 0.4062]) -Greedy action tensor([-0.8447, 0.9411, 0.0839, 0.2693]) tensor([0.0797, 0.4756, 0.2018, 0.2429]) -Greedy action tensor([-1.6120, -0.2504, 0.5312, 0.0574]) tensor([0.0534, 0.2083, 0.4550, 0.2833]) -Greedy action tensor([-1.7249, -0.3722, 0.5533, -0.0799]) tensor([0.0505, 0.1953, 0.4927, 0.2616]) -Greedy action tensor([-1.1060, -0.0347, 0.5705, 0.7815]) tensor([0.0630, 0.1840, 0.3369, 0.4161]) -Greedy action tensor([-1.7247, -0.1675, 0.5402, 0.0106]) tensor([0.0475, 0.2255, 0.4576, 0.2694]) -Greedy action tensor([-1.7196, -0.4134, 0.5541, -0.0480]) tensor([0.0507, 0.1872, 0.4925, 0.2697]) -Greedy action tensor([-1.4194, -0.4303, 0.4876, -0.3251]) tensor([0.0746, 0.2005, 0.5021, 0.2228]) -Greedy action tensor([-1.6331, 0.0205, 0.4712, 0.0559]) tensor([0.0504, 0.2634, 0.4133, 0.2729]) -Greedy action tensor([-0.8904, 0.7634, 0.1633, -0.0085]) tensor([0.0869, 0.4541, 0.2492, 0.2099]) -Greedy action tensor([-1.3830, -0.2798, 0.5648, 0.1700]) tensor([0.0635, 0.1913, 0.4452, 0.3000]) -Greedy action tensor([-1.5524, 0.5329, 0.3383, 0.0383]) tensor([0.0486, 0.3910, 0.3219, 0.2385]) -Greedy action tensor([-1.8115, -0.4667, 0.5883, -0.1005]) tensor([0.0467, 0.1794, 0.5152, 0.2587]) -Greedy action tensor([-1.9397, -0.4578, 0.6650, -0.1782]) tensor([0.0404, 0.1778, 0.5466, 0.2352]) -Greedy action tensor([-1.8605, -0.3403, 0.6383, -0.0979]) tensor([0.0424, 0.1940, 0.5163, 0.2473]) -Greedy action tensor([-1.8028, -0.3199, 0.6218, -0.0485]) tensor([0.0445, 0.1960, 0.5025, 0.2571]) -Greedy action tensor([-1.8467, -0.4489, 0.6175, -0.1512]) tensor([0.0449, 0.1819, 0.5283, 0.2449]) -Greedy action tensor([-1.1463, 0.7367, 0.2566, -0.0578]) tensor([0.0684, 0.4499, 0.2784, 0.2033]) -Greedy action tensor([-1.8832, -0.3774, 0.6315, -0.1396]) tensor([0.0424, 0.1911, 0.5241, 0.2424]) -Greedy action tensor([-1.6191, 0.4894, 0.3963, -0.0422]) tensor([0.0463, 0.3817, 0.3477, 0.2243]) -Greedy action tensor([-1.8671, -0.3691, 0.6293, -0.1164]) tensor([0.0428, 0.1914, 0.5194, 0.2464]) -Greedy action tensor([-1.8853, -0.2045, 0.6010, -0.1363]) tensor([0.0414, 0.2225, 0.4979, 0.2382]) -Greedy action tensor([-1.8824, -0.4394, 0.6370, -0.1503]) tensor([0.0429, 0.1816, 0.5329, 0.2425]) -Greedy action tensor([-1.4617, -0.4978, 0.5148, 0.2138]) tensor([0.0618, 0.1620, 0.4461, 0.3301]) -Greedy action tensor([-1.4874, -0.4403, 0.4155, 0.0661]) tensor([0.0654, 0.1864, 0.4387, 0.3094]) -Greedy action tensor([-1.8534, -0.4676, 0.6278, -0.1323]) tensor([0.0444, 0.1773, 0.5303, 0.2480]) -Greedy action tensor([-0.8481, 0.7110, 0.2699, 0.5325]) tensor([0.0782, 0.3717, 0.2391, 0.3109]) -Greedy action tensor([-1.8715, -0.3778, 0.6347, -0.1383]) tensor([0.0428, 0.1906, 0.5245, 0.2421]) -Greedy action tensor([-1.9357, -0.4267, 0.6576, -0.1754]) tensor([0.0405, 0.1830, 0.5412, 0.2353]) -Greedy action tensor([-1.7925, -0.3707, 0.5810, -0.0913]) tensor([0.0468, 0.1940, 0.5026, 0.2566]) -Greedy action tensor([-1.6076, 0.0339, 0.5382, 0.1068]) tensor([0.0493, 0.2548, 0.4218, 0.2740]) -Greedy action tensor([-1.8629, -0.3222, 0.6169, -0.1226]) tensor([0.0429, 0.2003, 0.5123, 0.2445]) -Greedy action tensor([-1.5313, -0.5459, 0.4721, 0.0178]) tensor([0.0633, 0.1695, 0.4693, 0.2979]) -Greedy action tensor([-1.6324, -0.6451, 0.6413, 0.1162]) tensor([0.0522, 0.1402, 0.5074, 0.3001]) -Greedy action tensor([-1.9439, -0.4490, 0.6672, -0.1804]) tensor([0.0402, 0.1790, 0.5466, 0.2342]) -Greedy action tensor([-1.7633, -0.3780, 0.5669, -0.1159]) tensor([0.0489, 0.1952, 0.5022, 0.2537]) -Greedy action tensor([-1.8645, -0.4116, 0.6267, -0.1446]) tensor([0.0436, 0.1864, 0.5265, 0.2435]) -Greedy action tensor([ 1.4733, -0.2149, -0.3996, 0.3159]) tensor([0.6050, 0.1118, 0.0930, 0.1901]) -Greedy action tensor([ 1.6363, -0.6675, -0.3794, 0.1412]) tensor([0.6862, 0.0685, 0.0914, 0.1539]) -Greedy action tensor([ 1.7173, -0.6032, -0.0930, 0.6572]) tensor([0.6218, 0.0611, 0.1017, 0.2154]) -Greedy action tensor([ 2.1472, -1.2841, -0.3951, 0.4901]) tensor([0.7682, 0.0248, 0.0604, 0.1465]) -Greedy action tensor([ 1.7020, -0.4114, -0.1654, 0.3059]) tensor([0.6566, 0.0793, 0.1015, 0.1626]) -Greedy action tensor([ 0.9261, -0.3403, -0.0558, 0.2448]) tensor([0.4624, 0.1303, 0.1732, 0.2340]) -Greedy action tensor([ 1.1995, -0.5477, -0.5977, 0.3983]) tensor([0.5590, 0.0974, 0.0927, 0.2509]) -Greedy action tensor([ 1.0463, -0.0660, -0.2964, 0.3334]) tensor([0.4807, 0.1581, 0.1255, 0.2357]) -Greedy action tensor([ 1.7946, -0.9540, -0.1396, 0.2583]) tensor([0.7024, 0.0450, 0.1015, 0.1511]) -Greedy action tensor([ 1.3156, 0.0798, -0.4388, 0.5286]) tensor([0.5212, 0.1514, 0.0902, 0.2372]) -Greedy action tensor([ 1.7357, -0.5264, -0.2919, 0.5428]) tensor([0.6497, 0.0677, 0.0855, 0.1971]) -Greedy action tensor([ 1.3440, -0.2201, -0.4234, 0.1347]) tensor([0.5958, 0.1247, 0.1017, 0.1778]) -Greedy action tensor([ 0.7663, -0.0806, -0.4179, 0.2187]) tensor([0.4323, 0.1854, 0.1323, 0.2500]) -Greedy action tensor([ 1.2232, -0.2466, -0.6591, 0.0997]) tensor([0.5857, 0.1347, 0.0892, 0.1904]) -Greedy action tensor([ 1.3749, -0.3672, -0.0093, 0.1835]) tensor([0.5782, 0.1013, 0.1449, 0.1757]) -Greedy action tensor([ 1.4420, -0.7274, -0.2984, 0.3256]) tensor([0.6184, 0.0706, 0.1085, 0.2025]) -Greedy action tensor([ 1.4376, -0.2621, -0.6907, 0.5063]) tensor([0.5897, 0.1078, 0.0702, 0.2323]) -Greedy action tensor([ 1.2142, -0.4445, -0.2855, 0.4768]) tensor([0.5286, 0.1006, 0.1180, 0.2528]) -Greedy action tensor([ 2.2978, -0.3944, 0.0122, 0.5180]) tensor([0.7473, 0.0506, 0.0760, 0.1261]) -Greedy action tensor([ 1.4608, -0.6720, -0.6913, 0.2468]) tensor([0.6528, 0.0774, 0.0759, 0.1939]) -Greedy action tensor([ 0.9059, -0.5417, -0.3453, 0.3777]) tensor([0.4737, 0.1114, 0.1356, 0.2793]) -Greedy action tensor([ 1.7620, 0.1151, -0.2380, -0.2367]) tensor([0.6833, 0.1316, 0.0925, 0.0926]) -Greedy action tensor([ 2.5422, -1.3594, 0.0701, 0.9684]) tensor([0.7623, 0.0154, 0.0643, 0.1580]) -Greedy action tensor([ 2.1115, -0.8833, -0.4927, 0.8789]) tensor([0.7064, 0.0354, 0.0522, 0.2060]) -Greedy action tensor([ 1.5128, -0.2324, -0.3077, 0.4854]) tensor([0.5902, 0.1030, 0.0956, 0.2112]) -Greedy action tensor([ 1.4591, -0.5971, -0.0054, 0.7509]) tensor([0.5401, 0.0691, 0.1249, 0.2660]) -Greedy action tensor([ 1.0552, -0.1833, -0.4368, 0.5237]) tensor([0.4756, 0.1378, 0.1070, 0.2795]) -Greedy action tensor([ 1.2804, -0.3391, -0.1563, -0.0259]) tensor([0.5860, 0.1160, 0.1393, 0.1587]) -Greedy action tensor([ 1.7984, -0.1180, -0.1079, 0.2962]) tensor([0.6586, 0.0969, 0.0979, 0.1466]) -Greedy action tensor([ 1.0732, -0.5690, -0.3437, 0.7098]) tensor([0.4692, 0.0908, 0.1138, 0.3262]) -Greedy action tensor([ 1.4925, 0.0328, -1.0736, 0.2466]) tensor([0.6263, 0.1455, 0.0481, 0.1802]) -Greedy action tensor([ 1.4383, -0.5106, -0.5870, 0.3390]) tensor([0.6221, 0.0886, 0.0821, 0.2072]) -Greedy action tensor([ 2.3691, -1.0778, 0.0111, 0.7254]) tensor([0.7577, 0.0241, 0.0717, 0.1464]) -Greedy action tensor([ 1.0713, -0.0321, -0.4399, 0.1263]) tensor([0.5152, 0.1709, 0.1137, 0.2002]) -Greedy action tensor([ 0.9763, -0.1874, -0.1364, 0.1771]) tensor([0.4783, 0.1494, 0.1572, 0.2151]) -Greedy action tensor([1.9962, 0.8592, 0.4110, 0.0376]) tensor([0.6000, 0.1925, 0.1229, 0.0846]) -Greedy action tensor([ 2.4441, -0.8513, -0.5687, 0.6782]) tensor([0.7954, 0.0295, 0.0391, 0.1360]) -Greedy action tensor([ 1.6535, -0.5286, -0.1115, 0.4256]) tensor([0.6341, 0.0715, 0.1086, 0.1857]) -Greedy action tensor([ 1.7551, -0.3843, -0.1924, 0.4455]) tensor([0.6535, 0.0769, 0.0932, 0.1764]) -Greedy action tensor([ 1.3201, -0.6111, -0.0940, 0.3558]) tensor([0.5652, 0.0819, 0.1374, 0.2155]) -Greedy action tensor([ 1.3261, -0.2969, -0.2725, 0.1571]) tensor([0.5847, 0.1154, 0.1182, 0.1817]) -Greedy action tensor([ 1.3182, -0.0408, -0.0418, 0.0543]) tensor([0.5568, 0.1430, 0.1429, 0.1573]) -Greedy action tensor([ 1.5704, -0.2336, -0.0584, 0.6381]) tensor([0.5700, 0.0938, 0.1118, 0.2244]) -Greedy action tensor([ 1.4558, -0.8205, -0.0272, 0.1690]) tensor([0.6227, 0.0639, 0.1413, 0.1720]) -Greedy action tensor([ 1.6167, -0.4973, -0.4112, 0.2454]) tensor([0.6640, 0.0802, 0.0874, 0.1685]) -Greedy action tensor([ 1.4856, -0.0644, -0.5376, -0.0285]) tensor([0.6392, 0.1357, 0.0845, 0.1406]) -Greedy action tensor([ 1.3297, -0.8489, -0.0713, 0.5126]) tensor([0.5552, 0.0628, 0.1368, 0.2452]) -Greedy action tensor([ 2.0402, -1.2765, -0.2742, 0.6124]) tensor([0.7273, 0.0264, 0.0719, 0.1744]) -Greedy action tensor([ 1.1263, -0.0044, -0.5697, 0.2755]) tensor([0.5172, 0.1670, 0.0949, 0.2209]) -Greedy action tensor([ 1.2750, -0.5658, -0.0086, 0.2713]) tensor([0.5549, 0.0881, 0.1537, 0.2034]) -Greedy action tensor([ 1.5029, -0.5504, -0.2688, 0.3591]) tensor([0.6184, 0.0793, 0.1052, 0.1970]) -Greedy action tensor([ 1.8118, -1.0688, 0.0898, 0.4372]) tensor([0.6721, 0.0377, 0.1201, 0.1700]) -Greedy action tensor([ 1.8107, -0.7663, -0.1346, 0.5660]) tensor([0.6636, 0.0504, 0.0949, 0.1911]) -Greedy action tensor([ 0.5616, -0.2830, -0.7024, 0.8498]) tensor([0.3283, 0.1411, 0.0927, 0.4379]) -Greedy action tensor([ 1.5773, -0.6150, -0.5503, 0.7485]) tensor([0.5998, 0.0670, 0.0714, 0.2618]) -Greedy action tensor([ 2.4147, -1.3526, 0.0998, 0.5552]) tensor([0.7827, 0.0181, 0.0773, 0.1219]) -Greedy action tensor([ 1.3834, -0.3715, -0.2765, 0.6802]) tensor([0.5382, 0.0931, 0.1023, 0.2664]) -Greedy action tensor([ 1.2429, -0.1930, -0.3995, 0.3828]) tensor([0.5392, 0.1283, 0.1044, 0.2282]) -Greedy action tensor([ 1.7604, -0.1389, -0.4950, 0.5801]) tensor([0.6403, 0.0958, 0.0671, 0.1967]) -Greedy action tensor([ 1.2750, -0.5768, -0.4220, 0.0324]) tensor([0.6139, 0.0964, 0.1125, 0.1772]) -Greedy action tensor([ 1.5225, 0.1901, -0.1178, 0.3256]) tensor([0.5682, 0.1499, 0.1102, 0.1717]) -Greedy action tensor([ 1.4655, -0.8289, -0.1329, 0.6010]) tensor([0.5799, 0.0585, 0.1173, 0.2443]) -Greedy action tensor([ 1.6458, -0.3363, -0.7113, 0.4772]) tensor([0.6480, 0.0893, 0.0614, 0.2014]) -Greedy action tensor([ 1.4882, 0.0913, -0.5548, 0.7910]) tensor([0.5333, 0.1319, 0.0691, 0.2656]) -Greedy action tensor([ 1.2593, -0.2788, -0.3744, 0.3779]) tensor([0.5482, 0.1177, 0.1070, 0.2271]) -Greedy action tensor([ 1.5699, 0.4404, -0.1214, 0.0032]) tensor([0.5827, 0.1883, 0.1074, 0.1216]) -Greedy action tensor([ 1.8502, -1.1054, 0.0429, 0.4825]) tensor([0.6799, 0.0354, 0.1116, 0.1732]) -Greedy action tensor([ 1.4291, -0.7030, -0.2285, -0.0255]) tensor([0.6482, 0.0769, 0.1235, 0.1514]) -Greedy action tensor([ 1.5029, -0.6244, -0.1298, 0.3003]) tensor([0.6192, 0.0738, 0.1210, 0.1860]) -Greedy action tensor([ 1.1181, -0.8098, 0.2292, 0.1740]) tensor([0.5140, 0.0748, 0.2113, 0.2000]) -Greedy action tensor([ 1.2817, -0.4772, -0.4491, 0.5743]) tensor([0.5428, 0.0935, 0.0961, 0.2676]) -Greedy action tensor([ 1.3631, -0.4741, -0.1085, 0.3797]) tensor([0.5673, 0.0903, 0.1302, 0.2122]) -Greedy action tensor([ 1.2629, -0.4779, 0.0190, 0.0305]) tensor([0.5697, 0.0999, 0.1642, 0.1661]) -Greedy action tensor([ 1.3070, -0.1205, -0.5025, 0.3861]) tensor([0.5550, 0.1331, 0.0909, 0.2210]) -Greedy action tensor([ 2.0143, -0.8781, -0.4228, 0.9082]) tensor([0.6786, 0.0376, 0.0593, 0.2245]) -Greedy action tensor([ 1.1906, -0.3845, -0.2175, 0.0633]) tensor([0.5632, 0.1166, 0.1378, 0.1824]) -Greedy action tensor([ 0.8507, -0.4203, -0.2283, 0.3086]) tensor([0.4541, 0.1274, 0.1544, 0.2641]) -Greedy action tensor([ 1.1320, -0.0660, -0.7728, 0.1726]) tensor([0.5453, 0.1646, 0.0812, 0.2089]) -Greedy action tensor([ 1.2476, -0.1659, -0.5056, 0.4318]) tensor([0.5380, 0.1309, 0.0932, 0.2379]) -Greedy action tensor([ 1.6377, 0.1820, -0.1886, 0.2395]) tensor([0.6093, 0.1421, 0.0981, 0.1505]) -Greedy action tensor([ 1.7142, -0.1047, -0.5932, 0.1680]) tensor([0.6781, 0.1100, 0.0675, 0.1445]) -Greedy action tensor([ 0.5753, -0.0669, -0.0979, -0.0096]) tensor([0.3856, 0.2029, 0.1967, 0.2149]) -Greedy action tensor([ 0.7724, -0.4844, -0.0445, -0.3757]) tensor([0.4893, 0.1393, 0.2162, 0.1552]) -Greedy action tensor([ 0.8610, -0.5192, -0.0127, -0.4841]) tensor([0.5183, 0.1304, 0.2163, 0.1350]) -Greedy action tensor([ 0.8796, -0.4014, -0.0406, -0.3989]) tensor([0.5116, 0.1421, 0.2038, 0.1425]) -Greedy action tensor([ 0.7351, -0.4333, 0.0149, -0.3382]) tensor([0.4674, 0.1453, 0.2275, 0.1598]) -Greedy action tensor([ 0.1314, -0.0772, -0.2029, 0.0293]) tensor([0.2915, 0.2366, 0.2087, 0.2632]) -Greedy action tensor([ 0.4899, -0.0960, -0.0954, -0.1270]) tensor([0.3769, 0.2098, 0.2099, 0.2034]) -Greedy action tensor([ 0.7839, -0.1014, -0.0446, -0.1405]) tensor([0.4452, 0.1837, 0.1944, 0.1766]) -Greedy action tensor([ 8.9013e-01, 5.6288e-04, -8.6666e-02, 3.6429e-02]) tensor([0.4518, 0.1856, 0.1701, 0.1924]) -Greedy action tensor([ 0.9954, -0.2070, -0.0988, -0.5684]) tensor([0.5421, 0.1629, 0.1815, 0.1135]) -Greedy action tensor([ 1.1702, -0.6718, -0.2703, -0.7991]) tensor([0.6515, 0.1033, 0.1543, 0.0909]) -Greedy action tensor([ 1.2314, -0.7010, -0.1662, -0.3504]) tensor([0.6259, 0.0906, 0.1547, 0.1287]) -Greedy action tensor([ 1.0758, -0.8923, 0.1145, -0.7829]) tensor([0.5959, 0.0833, 0.2279, 0.0929]) -Greedy action tensor([ 0.9915, -0.6360, -0.0870, -0.4904]) tensor([0.5670, 0.1114, 0.1928, 0.1288]) -Greedy action tensor([ 0.9324, 0.0933, -0.1136, -0.2621]) tensor([0.4793, 0.2071, 0.1684, 0.1452]) -Greedy action tensor([ 0.2465, 0.0292, 0.0426, -0.3990]) tensor([0.3180, 0.2559, 0.2593, 0.1668]) -Greedy action tensor([ 1.1613, -1.1320, 0.0632, -0.6229]) tensor([0.6241, 0.0630, 0.2081, 0.1048]) -Greedy action tensor([ 0.7221, -0.5857, -0.0352, -0.3264]) tensor([0.4785, 0.1294, 0.2244, 0.1677]) -Greedy action tensor([ 0.8692, -0.1467, -0.2324, -0.4803]) tensor([0.5118, 0.1853, 0.1701, 0.1328]) -Greedy action tensor([ 0.7801, -0.3883, -0.1256, -0.2170]) tensor([0.4798, 0.1492, 0.1940, 0.1770]) -Greedy action tensor([ 0.8275, -0.3357, 0.0898, -0.8615]) tensor([0.5062, 0.1582, 0.2421, 0.0935]) -Greedy action tensor([ 0.7506, -0.4797, -0.1306, -0.4623]) tensor([0.4990, 0.1458, 0.2068, 0.1484]) -Greedy action tensor([ 0.5698, -0.3955, -0.1336, -0.1406]) tensor([0.4224, 0.1609, 0.2091, 0.2076]) -Greedy action tensor([ 0.4576, -0.0965, -0.0155, -0.0114]) tensor([0.3542, 0.2035, 0.2207, 0.2216]) -Greedy action tensor([ 0.5794, -0.3538, 0.0259, -0.2333]) tensor([0.4146, 0.1631, 0.2384, 0.1840]) -Greedy action tensor([ 0.7413, -0.1402, -0.0329, -0.0733]) tensor([0.4314, 0.1787, 0.1989, 0.1910]) -Greedy action tensor([ 0.5217, -0.0833, 0.0269, -0.6074]) tensor([0.4034, 0.2203, 0.2459, 0.1304]) -Greedy action tensor([ 0.7593, -0.3245, -0.2086, -0.2878]) tensor([0.4833, 0.1635, 0.1836, 0.1696]) -Greedy action tensor([ 0.8933, -0.3770, -0.0547, -0.3659]) tensor([0.5123, 0.1438, 0.1985, 0.1454]) -Greedy action tensor([ 0.6371, -0.3903, 0.1451, -0.3395]) tensor([0.4263, 0.1526, 0.2606, 0.1605]) -Greedy action tensor([ 0.9828, -0.6738, -0.4091, -0.3530]) tensor([0.5874, 0.1121, 0.1460, 0.1545]) -Greedy action tensor([ 0.7548, -0.6113, 0.0125, -0.4274]) tensor([0.4908, 0.1252, 0.2336, 0.1505]) -Greedy action tensor([ 0.7023, -0.7189, -0.0759, -0.1103]) tensor([0.4663, 0.1126, 0.2142, 0.2069]) -Greedy action tensor([ 0.1723, 0.1281, -0.1698, -0.5210]) tensor([0.3158, 0.3021, 0.2243, 0.1579]) -Greedy action tensor([ 0.7359, -0.5409, -0.1419, -0.4231]) tensor([0.4979, 0.1389, 0.2070, 0.1562]) -Greedy action tensor([ 0.8553, -0.3915, -0.0602, -0.5085]) tensor([0.5146, 0.1479, 0.2060, 0.1316]) -Greedy action tensor([ 0.8477, -0.1533, -0.0896, -0.1255]) tensor([0.4679, 0.1720, 0.1833, 0.1768]) -Greedy action tensor([ 0.8585, -0.7417, -0.0400, -0.5228]) tensor([0.5376, 0.1085, 0.2189, 0.1351]) -Greedy action tensor([ 0.8223, -0.4083, -0.0117, -0.4803]) tensor([0.5004, 0.1462, 0.2174, 0.1360]) -Greedy action tensor([ 1.0199, -0.6437, -0.1429, -0.4431]) tensor([0.5768, 0.1093, 0.1803, 0.1336]) -Greedy action tensor([ 0.8596, -1.1456, 0.1129, -0.4706]) tensor([0.5339, 0.0719, 0.2530, 0.1412]) -Greedy action tensor([ 0.4680, 0.1420, -0.0857, 0.0424]) tensor([0.3390, 0.2447, 0.1949, 0.2215]) -Greedy action tensor([ 0.6560, -0.5230, 0.0446, -0.2638]) tensor([0.4447, 0.1368, 0.2413, 0.1773]) -Greedy action tensor([ 1.0697, -0.5093, -0.1609, -0.2725]) tensor([0.5683, 0.1172, 0.1660, 0.1485]) -Greedy action tensor([ 0.7413, -0.3960, -0.0548, -0.3798]) tensor([0.4767, 0.1529, 0.2150, 0.1554]) -Greedy action tensor([ 0.6561, -0.2563, -0.0782, -0.1452]) tensor([0.4292, 0.1723, 0.2059, 0.1926]) -Greedy action tensor([ 1.2100, -0.3713, -0.1164, -0.2756]) tensor([0.5891, 0.1212, 0.1564, 0.1334]) -Greedy action tensor([0.2519, 0.0748, 0.1674, 0.2924]) tensor([0.2633, 0.2206, 0.2420, 0.2742]) -Greedy action tensor([ 1.0654, -0.9291, -0.0039, -0.5935]) tensor([0.5989, 0.0815, 0.2056, 0.1140]) -Greedy action tensor([ 0.4996, -0.5263, -0.1448, -0.1169]) tensor([0.4127, 0.1479, 0.2166, 0.2228]) -Greedy action tensor([ 0.3417, -0.0539, -0.2383, -0.1594]) tensor([0.3522, 0.2372, 0.1972, 0.2134]) -Greedy action tensor([ 0.4652, -0.2103, -0.0352, -0.1049]) tensor([0.3730, 0.1898, 0.2262, 0.2109]) -Greedy action tensor([ 0.6147, -0.2141, 0.0364, -0.1181]) tensor([0.4036, 0.1762, 0.2263, 0.1939]) -Greedy action tensor([ 0.3860, 0.0974, -0.2189, -0.0185]) tensor([0.3375, 0.2529, 0.1843, 0.2252]) -Greedy action tensor([ 0.9788, -0.5233, -0.1395, -0.4337]) tensor([0.5577, 0.1242, 0.1823, 0.1358]) -Greedy action tensor([ 0.6475, -0.4231, -0.0577, -0.4404]) tensor([0.4600, 0.1577, 0.2273, 0.1550]) -Greedy action tensor([ 0.7907, -0.5858, -0.0612, -0.3631]) tensor([0.5014, 0.1266, 0.2139, 0.1582]) -Greedy action tensor([ 0.8601, -0.5808, -0.0910, -0.1394]) tensor([0.5022, 0.1189, 0.1940, 0.1848]) -Greedy action tensor([ 0.6567, -0.2184, -0.0900, -0.1215]) tensor([0.4255, 0.1774, 0.2017, 0.1954]) -Greedy action tensor([ 0.5509, -0.2594, -0.1344, -0.1637]) tensor([0.4101, 0.1824, 0.2067, 0.2007]) -Greedy action tensor([ 1.0193, -0.5730, -0.1445, -0.2797]) tensor([0.5591, 0.1138, 0.1746, 0.1525]) -Greedy action tensor([ 0.6161, -0.4559, 0.1721, -0.6247]) tensor([0.4400, 0.1506, 0.2822, 0.1272]) -Greedy action tensor([ 0.8876, -0.4945, 0.0626, -0.1826]) tensor([0.4921, 0.1235, 0.2156, 0.1687]) -Greedy action tensor([ 5.8288e-01, -2.9997e-01, -4.5452e-02, -5.4570e-04]) tensor([0.3992, 0.1651, 0.2130, 0.2227]) -Greedy action tensor([ 0.8525, -0.5058, -0.0710, -0.4213]) tensor([0.5171, 0.1329, 0.2053, 0.1447]) -Greedy action tensor([ 0.7521, -0.3608, 0.0008, -0.2726]) tensor([0.4631, 0.1522, 0.2185, 0.1662]) -Greedy action tensor([ 0.8080, -0.3336, -0.0345, -0.1377]) tensor([0.4676, 0.1493, 0.2014, 0.1816]) -Greedy action tensor([ 0.3869, -0.2917, -0.0881, -0.2050]) tensor([0.3728, 0.1891, 0.2318, 0.2063]) -Greedy action tensor([ 0.9762, -0.6310, -0.0019, -0.4446]) tensor([0.5501, 0.1103, 0.2068, 0.1328]) -Greedy action tensor([ 0.3993, 0.1166, -0.0230, -0.5230]) tensor([0.3563, 0.2685, 0.2335, 0.1416]) -Greedy action tensor([ 1.1218, -0.9721, 0.1093, -0.5940]) tensor([0.6001, 0.0739, 0.2180, 0.1079]) -Greedy action tensor([ 0.8221, -0.7389, 0.1104, -0.6951]) tensor([0.5208, 0.1093, 0.2556, 0.1142]) -Greedy action tensor([ 0.7650, -0.4817, -0.0853, -0.4102]) tensor([0.4942, 0.1421, 0.2112, 0.1526]) -Greedy action tensor([ 0.9827, -0.8792, 0.0934, -0.4025]) tensor([0.5505, 0.0855, 0.2262, 0.1378]) -Greedy action tensor([ 0.7282, -0.3719, -0.0312, -0.1986]) tensor([0.4553, 0.1515, 0.2130, 0.1802]) -Greedy action tensor([ 0.8264, -0.5190, -0.1976, -0.1427]) tensor([0.5002, 0.1303, 0.1797, 0.1898]) -Greedy action tensor([ 0.9521, -0.7747, 0.1919, -0.3953]) tensor([0.5248, 0.0933, 0.2454, 0.1364]) -Greedy action tensor([ 0.8698, -0.6252, -0.0118, -0.6274]) tensor([0.5370, 0.1204, 0.2224, 0.1202]) -Greedy action tensor([ 0.5735, -0.3191, 0.0348, -0.1250]) tensor([0.4015, 0.1645, 0.2343, 0.1997]) -Greedy action tensor([ 0.8573, -0.5121, 0.0953, -0.3375]) tensor([0.4941, 0.1256, 0.2306, 0.1496]) -Greedy action tensor([ 0.8753, -0.7297, 0.0197, -0.2909]) tensor([0.5161, 0.1037, 0.2194, 0.1608]) -Greedy action tensor([-0.5736, -0.7106, 0.9339, -1.3893]) tensor([0.1464, 0.1277, 0.6612, 0.0648]) -Greedy action tensor([ 0.0131, 0.2799, -0.0597, -0.1641]) tensor([0.2455, 0.3206, 0.2283, 0.2056]) -Greedy action tensor([ 1.5678, -0.9006, 0.7792, 0.7559]) tensor([0.5042, 0.0427, 0.2292, 0.2239]) -Greedy action tensor([ 0.0492, -0.3575, 0.2184, -0.8309]) tensor([0.3063, 0.2039, 0.3628, 0.1270]) -Greedy action tensor([ 0.0372, -2.1400, 0.2956, 0.4803]) tensor([0.2522, 0.0286, 0.3265, 0.3927]) -Greedy action tensor([-1.6987, 0.6953, -0.6765, -0.3993]) tensor([0.0543, 0.5954, 0.1510, 0.1993]) -Greedy action tensor([ 1.7805, -0.5177, -0.5677, 0.5067]) tensor([0.6776, 0.0681, 0.0647, 0.1896]) -Greedy action tensor([ 0.0224, -1.1864, -0.3456, 0.1851]) tensor([0.3157, 0.0943, 0.2185, 0.3715]) -Greedy action tensor([ 0.1295, -1.2539, -0.6868, 0.7793]) tensor([0.2772, 0.0695, 0.1225, 0.5308]) -Greedy action tensor([-0.7067, -1.1534, 0.6599, -0.9527]) tensor([0.1576, 0.1008, 0.6183, 0.1233]) -Greedy action tensor([0.7123, 0.0487, 0.2946, 0.4308]) tensor([0.3415, 0.1759, 0.2249, 0.2577]) -Greedy action tensor([ 2.0051, -0.6423, 0.3284, 0.7170]) tensor([0.6521, 0.0462, 0.1219, 0.1798]) -Greedy action tensor([ 1.0404, -0.6054, 0.3750, 0.5911]) tensor([0.4265, 0.0822, 0.2192, 0.2721]) -Greedy action tensor([ 0.6386, -1.2737, 0.7656, 0.9751]) tensor([0.2715, 0.0401, 0.3083, 0.3801]) -Greedy action tensor([-0.5650, -0.5255, 0.5527, -0.0455]) tensor([0.1475, 0.1535, 0.4510, 0.2480]) -Greedy action tensor([ 0.6580, -0.8690, 0.1262, -0.0104]) tensor([0.4315, 0.0937, 0.2536, 0.2212]) -Greedy action tensor([ 0.7007, -0.0087, -0.5528, 0.2871]) tensor([0.4101, 0.2017, 0.1171, 0.2712]) -Greedy action tensor([ 0.9501, -1.2594, 1.6815, -0.2950]) tensor([0.2877, 0.0316, 0.5979, 0.0828]) -Greedy action tensor([ 0.5476, -1.0675, 0.7300, 1.3439]) tensor([0.2166, 0.0431, 0.2600, 0.4803]) -Greedy action tensor([ 0.8997, -1.6709, -0.1057, 1.1166]) tensor([0.3725, 0.0285, 0.1363, 0.4627]) -Greedy action tensor([-0.7213, -0.4286, 0.9251, 0.3439]) tensor([0.0959, 0.1285, 0.4975, 0.2782]) -Greedy action tensor([ 0.7679, 0.7624, -0.0474, -0.4656]) tensor([0.3665, 0.3645, 0.1622, 0.1068]) -Greedy action tensor([-0.3662, -0.3308, 2.3019, -0.0623]) tensor([0.0562, 0.0582, 0.8095, 0.0761]) -Greedy action tensor([ 0.5480, 0.2696, 0.4166, -0.0616]) tensor([0.3147, 0.2382, 0.2760, 0.1711]) -Greedy action tensor([-0.9448, -0.9686, 1.4632, -0.0348]) tensor([0.0642, 0.0627, 0.7135, 0.1595]) -Greedy action tensor([ 0.4380, -0.2208, 0.5635, 1.0816]) tensor([0.2196, 0.1136, 0.2489, 0.4179]) -Greedy action tensor([-0.9053, -1.5996, -0.5581, 0.3191]) tensor([0.1583, 0.0791, 0.2240, 0.5386]) -Greedy action tensor([ 0.5312, -0.0217, -0.2398, -0.1108]) tensor([0.3900, 0.2244, 0.1804, 0.2052]) -Greedy action tensor([ 1.8439, -1.9666, 0.4396, 0.8155]) tensor([0.6153, 0.0136, 0.1511, 0.2200]) -Greedy action tensor([ 0.2631, -1.3485, 0.4298, -0.3493]) tensor([0.3421, 0.0683, 0.4042, 0.1854]) -Greedy action tensor([-0.5633, 0.0774, 0.1050, -0.7475]) tensor([0.1760, 0.3341, 0.3434, 0.1464]) -Greedy action tensor([ 0.3522, -0.9148, -0.2173, 0.2522]) tensor([0.3633, 0.1023, 0.2056, 0.3288]) -Greedy action tensor([-0.0239, -1.0964, 0.0912, 0.7767]) tensor([0.2132, 0.0729, 0.2392, 0.4747]) -Greedy action tensor([1.0902, 0.1682, 1.1375, 0.3428]) tensor([0.3425, 0.1362, 0.3591, 0.1622]) -Greedy action tensor([ 1.2760, -1.0093, 0.0018, -0.5139]) tensor([0.6459, 0.0657, 0.1806, 0.1078]) -Greedy action tensor([ 0.6005, 0.1936, -0.1063, 0.0136]) tensor([0.3683, 0.2452, 0.1817, 0.2048]) -Greedy action tensor([-1.2321, 0.5703, -0.6583, 0.0932]) tensor([0.0793, 0.4812, 0.1408, 0.2986]) -Greedy action tensor([-0.3069, -1.4294, 0.4339, -0.5126]) tensor([0.2360, 0.0768, 0.4951, 0.1921]) -Greedy action tensor([ 1.6297, -0.0510, 0.1064, 0.2905]) tensor([0.6001, 0.1118, 0.1308, 0.1573]) -Greedy action tensor([ 0.8375, -1.2506, 1.2099, 0.4046]) tensor([0.3102, 0.0384, 0.4502, 0.2012]) -Greedy action tensor([ 0.1051, 0.7169, 0.5564, -0.5665]) tensor([0.2030, 0.3744, 0.3189, 0.1037]) -Greedy action tensor([ 1.5617, -1.9182, 0.3416, 0.6090]) tensor([0.5842, 0.0180, 0.1725, 0.2253]) -Greedy action tensor([ 1.5463, -1.0858, 1.3888, 0.2952]) tensor([0.4520, 0.0325, 0.3861, 0.1294]) -Greedy action tensor([ 0.6545, -1.2521, 0.1082, 0.5097]) tensor([0.3857, 0.0573, 0.2233, 0.3337]) -Greedy action tensor([ 0.6619, -0.3519, 0.1576, 0.4550]) tensor([0.3597, 0.1305, 0.2173, 0.2925]) -Greedy action tensor([ 0.5701, -0.0735, 0.3046, 0.7194]) tensor([0.2896, 0.1521, 0.2221, 0.3362]) -Greedy action tensor([ 0.1369, -0.3315, -0.4685, -0.2449]) tensor([0.3503, 0.2193, 0.1912, 0.2392]) -Greedy action tensor([ 0.1564, 0.4200, -0.1030, -0.2241]) tensor([0.2662, 0.3465, 0.2054, 0.1820]) -Greedy action tensor([-0.3090, -0.7526, 0.3856, -0.8819]) tensor([0.2376, 0.1525, 0.4759, 0.1340]) -Greedy action tensor([-0.7471, -2.5751, -0.4605, 0.4941]) tensor([0.1680, 0.0270, 0.2238, 0.5812]) -Greedy action tensor([-0.1183, -1.0036, -0.0986, 0.6126]) tensor([0.2218, 0.0915, 0.2262, 0.4606]) -Greedy action tensor([-0.6239, 0.2986, 0.2604, -0.6591]) tensor([0.1449, 0.3645, 0.3508, 0.1399]) -Greedy action tensor([ 0.8658, -0.0310, -0.3581, 0.5521]) tensor([0.4111, 0.1677, 0.1209, 0.3004]) -Greedy action tensor([ 1.0215, -1.2844, 0.0225, 1.0948]) tensor([0.3931, 0.0392, 0.1448, 0.4230]) -Greedy action tensor([-0.7363, -1.2690, 1.4111, -1.1071]) tensor([0.0923, 0.0542, 0.7899, 0.0637]) -Greedy action tensor([ 1.8811, -0.5586, 0.3472, 0.6623]) tensor([0.6256, 0.0545, 0.1349, 0.1849]) -Greedy action tensor([-0.7247, 0.9184, 0.0943, -0.2401]) tensor([0.0994, 0.5139, 0.2254, 0.1613]) -Greedy action tensor([ 0.9237, -0.0799, 0.8134, 0.6459]) tensor([0.3312, 0.1214, 0.2966, 0.2508]) -Greedy action tensor([ 1.3526, 0.0784, 0.1586, -0.7476]) tensor([0.5865, 0.1640, 0.1777, 0.0718]) -Greedy action tensor([-0.1709, 0.0150, -0.0481, -0.0360]) tensor([0.2233, 0.2688, 0.2524, 0.2555]) -Greedy action tensor([ 1.3644e-01, -1.3436e-03, 1.7773e+00, 5.4161e-01]) tensor([0.1172, 0.1021, 0.6049, 0.1758]) -Greedy action tensor([ 1.2068, -0.5345, 1.6131, -1.0534]) tensor([0.3596, 0.0630, 0.5398, 0.0375]) -Greedy action tensor([-0.2748, -0.3261, -0.8165, 0.1534]) tensor([0.2459, 0.2336, 0.1431, 0.3774]) -Greedy action tensor([ 0.8675, 0.3337, 0.5998, -0.2672]) tensor([0.3741, 0.2194, 0.2862, 0.1203]) -Greedy action tensor([2.2132, 0.0968, 0.2518, 0.5011]) tensor([0.6937, 0.0836, 0.0976, 0.1252]) -Greedy action tensor([-1.0422, -0.3790, -1.0088, -0.1466]) tensor([0.1557, 0.3022, 0.1610, 0.3812]) -Greedy action tensor([0.6835, 0.3220, 0.9165, 0.2027]) tensor([0.2795, 0.1947, 0.3529, 0.1728]) -Greedy action tensor([-0.6920, -0.0345, -0.1763, 0.5084]) tensor([0.1262, 0.2435, 0.2113, 0.4191]) -Greedy action tensor([ 0.0040, -1.3011, -0.0067, -0.3444]) tensor([0.3371, 0.0914, 0.3335, 0.2380]) -Greedy action tensor([-0.7480, -1.2588, 0.2954, -0.8849]) tensor([0.1883, 0.1130, 0.5345, 0.1642]) -Greedy action tensor([ 1.0913, -1.1260, 1.3407, -0.3095]) tensor([0.3790, 0.0413, 0.4863, 0.0934]) -Greedy action tensor([-0.0746, 0.5554, 1.4137, -0.9594]) tensor([0.1295, 0.2432, 0.5738, 0.0535]) -Greedy action tensor([ 0.0294, -2.2497, -0.4284, 0.1575]) tensor([0.3482, 0.0357, 0.2203, 0.3958]) -Greedy action tensor([ 0.7267, -1.7299, -0.3979, -0.8469]) tensor([0.6181, 0.0530, 0.2008, 0.1281]) -Greedy action tensor([ 0.4981, -0.8407, 0.3924, 1.0362]) tensor([0.2581, 0.0677, 0.2322, 0.4420]) -Greedy action tensor([-1.3042, 0.8039, 0.2495, 0.1504]) tensor([0.0548, 0.4512, 0.2592, 0.2347]) -Greedy action tensor([-0.2498, -0.7443, -0.2126, 0.1758]) tensor([0.2393, 0.1460, 0.2484, 0.3663]) -Greedy action tensor([ 0.9157, -0.3410, 0.9661, 0.3730]) tensor([0.3428, 0.0976, 0.3605, 0.1992]) -Greedy action tensor([ 0.0057, -1.6258, 0.9471, 0.9895]) tensor([0.1554, 0.0304, 0.3985, 0.4157]) -Greedy action tensor([ 0.6108, -0.1086, -0.4450, 0.7605]) tensor([0.3337, 0.1625, 0.1161, 0.3876]) -Greedy action tensor([ 0.7964, -0.0616, -0.2666, -0.3958]) tensor([0.4824, 0.2045, 0.1666, 0.1464]) -Greedy action tensor([ 0.8318, -0.5042, 0.0540, -0.5192]) tensor([0.5047, 0.1327, 0.2319, 0.1307]) -Greedy action tensor([ 0.7271, -0.3079, -0.0302, -0.2608]) tensor([0.4553, 0.1617, 0.2135, 0.1695]) -Greedy action tensor([ 0.4996, -0.3487, -0.0105, -0.2898]) tensor([0.4028, 0.1725, 0.2418, 0.1829]) -Greedy action tensor([ 1.0221, -0.4842, -0.1665, -0.2186]) tensor([0.5508, 0.1221, 0.1678, 0.1593]) -Greedy action tensor([ 0.7356, -0.6981, 0.3824, -0.5969]) tensor([0.4536, 0.1081, 0.3186, 0.1197]) -Greedy action tensor([ 0.5612, -0.5907, -0.0637, -0.2004]) tensor([0.4314, 0.1363, 0.2309, 0.2014]) -Greedy action tensor([ 0.7785, -0.7869, -0.0398, -0.2522]) tensor([0.4983, 0.1041, 0.2198, 0.1778]) -Greedy action tensor([ 0.5738, -0.3157, 0.0303, -0.1377]) tensor([0.4028, 0.1655, 0.2339, 0.1978]) -Greedy action tensor([ 0.8096, -0.5049, 0.1061, -0.5241]) tensor([0.4933, 0.1325, 0.2441, 0.1300]) -Greedy action tensor([ 1.0271, -0.8589, 0.0162, -0.3479]) tensor([0.5655, 0.0858, 0.2058, 0.1430]) -Greedy action tensor([ 0.4820, -0.1913, 0.2144, -0.3085]) tensor([0.3665, 0.1869, 0.2804, 0.1662]) -Greedy action tensor([ 1.0705, -0.4820, 0.0728, -0.7065]) tensor([0.5716, 0.1210, 0.2108, 0.0967]) -Greedy action tensor([ 0.8989, -0.7466, 0.0404, -0.3850]) tensor([0.5281, 0.1019, 0.2238, 0.1463]) -Greedy action tensor([ 0.6159, -0.0536, -0.0161, -0.0688]) tensor([0.3925, 0.2010, 0.2086, 0.1979]) -Greedy action tensor([ 0.5480, -0.4625, -0.0166, -0.3249]) tensor([0.4255, 0.1549, 0.2419, 0.1777]) -Greedy action tensor([ 0.8072, -0.5967, 0.0283, -0.4076]) tensor([0.4997, 0.1227, 0.2293, 0.1483]) -Greedy action tensor([ 1.1575, -0.3332, -0.3317, -0.3576]) tensor([0.5986, 0.1348, 0.1350, 0.1316]) -Greedy action tensor([ 1.0053, -0.6538, -0.0036, -0.3904]) tensor([0.5548, 0.1056, 0.2023, 0.1374]) -Greedy action tensor([ 0.5818, -0.2712, -0.0852, -0.2263]) tensor([0.4193, 0.1787, 0.2152, 0.1869]) -Greedy action tensor([ 0.3588, 0.1223, -0.1537, -0.4322]) tensor([0.3519, 0.2778, 0.2108, 0.1596]) -Greedy action tensor([ 1.1622, -0.6882, 0.0594, -0.6108]) tensor([0.6028, 0.0947, 0.2001, 0.1024]) -Greedy action tensor([ 0.4754, 0.0705, -0.0748, -0.0797]) tensor([0.3549, 0.2367, 0.2047, 0.2037]) -Greedy action tensor([ 0.4612, -0.3332, 0.0267, -0.2172]) tensor([0.3836, 0.1733, 0.2484, 0.1946]) -Greedy action tensor([ 0.7449, -0.2434, 0.0316, -0.1542]) tensor([0.4407, 0.1640, 0.2159, 0.1793]) -Greedy action tensor([ 0.6519, -0.4055, -0.1419, -0.2230]) tensor([0.4512, 0.1567, 0.2040, 0.1881]) -Greedy action tensor([ 0.4095, 0.2328, -0.1417, -0.2450]) tensor([0.3408, 0.2856, 0.1964, 0.1771]) -Greedy action tensor([ 0.5055, -0.1357, -0.0195, -0.2168]) tensor([0.3841, 0.2023, 0.2272, 0.1865]) -Greedy action tensor([ 0.4675, -0.0567, 0.0024, -0.2128]) tensor([0.3668, 0.2171, 0.2304, 0.1857]) -Greedy action tensor([ 0.5907, -0.3484, 0.0077, -0.1034]) tensor([0.4084, 0.1597, 0.2280, 0.2040]) -Greedy action tensor([ 0.2165, 0.1205, -0.1993, -0.2939]) tensor([0.3156, 0.2867, 0.2082, 0.1894]) -Greedy action tensor([ 0.7601, -0.4783, -0.0557, -0.3439]) tensor([0.4846, 0.1405, 0.2143, 0.1607]) -Greedy action tensor([ 0.3525, -0.2247, -0.0118, -0.2036]) tensor([0.3534, 0.1984, 0.2455, 0.2027]) -Greedy action tensor([ 0.5952, -0.6947, 0.0162, -0.2373]) tensor([0.4404, 0.1212, 0.2468, 0.1915]) -Greedy action tensor([ 0.8771, -0.5880, -0.1106, -0.2864]) tensor([0.5220, 0.1206, 0.1944, 0.1630]) -Greedy action tensor([ 0.7089, 0.2466, -0.2434, -0.0321]) tensor([0.4012, 0.2527, 0.1548, 0.1912]) -Greedy action tensor([ 0.2786, -0.3032, -0.0737, -0.1311]) tensor([0.3418, 0.1910, 0.2403, 0.2269]) -Greedy action tensor([ 0.7446, -0.2314, -0.0750, -0.0618]) tensor([0.4417, 0.1664, 0.1946, 0.1972]) -Greedy action tensor([ 0.9855, -0.7829, -0.0172, -0.5958]) tensor([0.5737, 0.0979, 0.2105, 0.1180]) -Greedy action tensor([ 0.6479, -0.4492, -0.1016, -0.4333]) tensor([0.4661, 0.1556, 0.2203, 0.1581]) -Greedy action tensor([ 0.4025, -0.2386, -0.1010, -0.1700]) tensor([0.3710, 0.1954, 0.2242, 0.2093]) -Greedy action tensor([ 0.7074, -0.1928, -0.0705, -0.0815]) tensor([0.4310, 0.1752, 0.1980, 0.1958]) -Greedy action tensor([ 7.9318e-01, -2.9017e-01, 6.1758e-04, -2.0077e-01]) tensor([0.4627, 0.1566, 0.2095, 0.1712]) -Greedy action tensor([ 0.6655, -0.3692, -0.2152, -0.2360]) tensor([0.4596, 0.1633, 0.1905, 0.1866]) -Greedy action tensor([ 0.7985, -0.4815, 0.0407, -0.4070]) tensor([0.4887, 0.1359, 0.2290, 0.1464]) -Greedy action tensor([ 0.3999, 0.0197, -0.0712, 0.1455]) tensor([0.3243, 0.2218, 0.2025, 0.2515]) -Greedy action tensor([ 0.7877, -0.4205, -0.1637, -0.4538]) tensor([0.5066, 0.1513, 0.1957, 0.1464]) -Greedy action tensor([ 0.6893, -0.5865, -0.1666, -0.1112]) tensor([0.4644, 0.1297, 0.1973, 0.2086]) -Greedy action tensor([ 0.6442, -0.5928, 0.0361, -0.1393]) tensor([0.4364, 0.1267, 0.2376, 0.1994]) -Greedy action tensor([ 0.5578, -0.0461, -0.0649, -0.0248]) tensor([0.3786, 0.2069, 0.2031, 0.2114]) -Greedy action tensor([ 0.4696, -0.2951, -0.1987, -0.1171]) tensor([0.3946, 0.1837, 0.2023, 0.2195]) -Greedy action tensor([ 0.5936, -0.4682, -0.1035, -0.1669]) tensor([0.4327, 0.1496, 0.2155, 0.2022]) -Greedy action tensor([ 0.4237, -0.3279, -0.0873, -0.4264]) tensor([0.4002, 0.1887, 0.2401, 0.1710]) -Greedy action tensor([ 0.2014, 0.3117, -0.1172, -0.2185]) tensor([0.2856, 0.3190, 0.2077, 0.1877]) -Greedy action tensor([ 0.9626, -0.3944, 0.1042, -0.2005]) tensor([0.5015, 0.1291, 0.2126, 0.1568]) -Greedy action tensor([ 1.0042, -0.1106, -0.0110, -0.4614]) tensor([0.5205, 0.1707, 0.1886, 0.1202]) -Greedy action tensor([ 0.7702, -0.5439, -0.0528, -0.4237]) tensor([0.4973, 0.1336, 0.2184, 0.1507]) -Greedy action tensor([ 1.0407, -0.6418, -0.0255, -0.5241]) tensor([0.5749, 0.1069, 0.1980, 0.1202]) -Greedy action tensor([ 0.3326, -0.3115, -0.0057, -0.2212]) tensor([0.3555, 0.1867, 0.2535, 0.2043]) -Greedy action tensor([ 0.5290, -0.3463, 0.0246, -0.4024]) tensor([0.4141, 0.1726, 0.2501, 0.1632]) -Greedy action tensor([ 0.7219, -0.0512, 0.0951, -0.4838]) tensor([0.4357, 0.2011, 0.2328, 0.1305]) -Greedy action tensor([ 0.8257, -0.6645, 0.0122, -0.4667]) tensor([0.5146, 0.1160, 0.2281, 0.1413]) -Greedy action tensor([ 0.2461, -0.1011, -0.1283, -0.2367]) tensor([0.3321, 0.2347, 0.2284, 0.2049]) -Greedy action tensor([ 0.6165, -0.7084, -0.0058, -0.2051]) tensor([0.4460, 0.1186, 0.2394, 0.1961]) -Greedy action tensor([ 0.5284, -0.3211, -0.0135, -0.2887]) tensor([0.4080, 0.1745, 0.2373, 0.1802]) -Greedy action tensor([ 1.0517, -0.7800, -0.1479, -0.4414]) tensor([0.5931, 0.0950, 0.1787, 0.1332]) -Greedy action tensor([ 8.3708e-01, -4.7465e-01, 4.9667e-04, -2.9437e-01]) tensor([0.4938, 0.1330, 0.2139, 0.1593]) -Greedy action tensor([ 1.1562, -0.7834, -0.0778, -0.4821]) tensor([0.6138, 0.0882, 0.1787, 0.1193]) -Greedy action tensor([ 0.7688, -0.2076, -0.3551, -0.4336]) tensor([0.4995, 0.1881, 0.1623, 0.1501]) -Greedy action tensor([ 0.6430, -0.4014, -0.0355, -0.3265]) tensor([0.4467, 0.1572, 0.2267, 0.1694]) -Greedy action tensor([ 0.6654, -0.1104, 0.1730, -0.2745]) tensor([0.4061, 0.1870, 0.2482, 0.1587]) -Greedy action tensor([ 1.3290, -0.8982, 0.1663, -0.9328]) tensor([0.6559, 0.0707, 0.2051, 0.0683]) -Greedy action tensor([ 0.3692, -0.2827, -0.0626, -0.0659]) tensor([0.3549, 0.1849, 0.2305, 0.2297]) -Greedy action tensor([ 0.5031, -0.3876, -0.1065, -0.2721]) tensor([0.4142, 0.1700, 0.2251, 0.1908]) -Greedy action tensor([ 0.5401, 0.0379, -0.0468, 0.0902]) tensor([0.3573, 0.2162, 0.1987, 0.2278]) -Greedy action tensor([ 0.2839, -0.1685, -0.0463, -0.0327]) tensor([0.3243, 0.2063, 0.2331, 0.2363]) -Greedy action tensor([ 0.8338, -0.0971, -0.0065, -0.3500]) tensor([0.4691, 0.1849, 0.2024, 0.1436]) -Greedy action tensor([ 0.3222, -0.2195, -0.1316, -0.2989]) tensor([0.3631, 0.2112, 0.2306, 0.1951]) -Greedy action tensor([ 0.9348, -0.5324, -0.0553, -0.3890]) tensor([0.5353, 0.1234, 0.1989, 0.1425]) -Greedy action tensor([ 0.4740, 0.0059, 0.0265, -0.2471]) tensor([0.3634, 0.2276, 0.2323, 0.1767]) -Greedy action tensor([ 0.7053, -0.6359, -0.0800, -0.2422]) tensor([0.4750, 0.1242, 0.2166, 0.1842]) -Greedy action tensor([ 1.2469, -0.1613, -0.1337, -0.3095]) tensor([0.5859, 0.1433, 0.1473, 0.1236]) -Greedy action tensor([ 0.7793, -0.5432, -0.3101, 0.3580]) tensor([0.4427, 0.1180, 0.1489, 0.2905]) -Greedy action tensor([ 1.4338, -0.6495, -0.1481, 0.1877]) tensor([0.6182, 0.0770, 0.1271, 0.1778]) -Greedy action tensor([ 1.4650, -0.5891, -0.1136, 0.2466]) tensor([0.6134, 0.0786, 0.1265, 0.1814]) -Greedy action tensor([ 1.3593, -0.1146, -0.6134, 0.3381]) tensor([0.5786, 0.1325, 0.0805, 0.2084]) -Greedy action tensor([ 1.3765, -0.0526, -0.6389, 0.3181]) tensor([0.5815, 0.1393, 0.0775, 0.2018]) -Greedy action tensor([ 0.8426, -0.3234, 0.1621, 0.0087]) tensor([0.4440, 0.1384, 0.2248, 0.1928]) -Greedy action tensor([ 1.3613, -0.5729, -0.1472, 0.1574]) tensor([0.6003, 0.0868, 0.1328, 0.1801]) -Greedy action tensor([ 1.6526, -0.6648, -0.3564, 0.2864]) tensor([0.6722, 0.0662, 0.0901, 0.1714]) -Greedy action tensor([ 1.5297, -0.3944, -0.3978, 0.4223]) tensor([0.6166, 0.0900, 0.0897, 0.2037]) -Greedy action tensor([ 1.5298, -0.5034, -0.3852, 0.0397]) tensor([0.6651, 0.0871, 0.0980, 0.1499]) -Greedy action tensor([ 1.4436, -0.7165, -0.4588, 0.5337]) tensor([0.5999, 0.0692, 0.0895, 0.2415]) -Greedy action tensor([ 0.4635, -0.1412, 0.0355, 0.1305]) tensor([0.3431, 0.1874, 0.2236, 0.2459]) -Greedy action tensor([ 0.7685, -0.2234, -0.6692, 0.5111]) tensor([0.4199, 0.1557, 0.0997, 0.3246]) -Greedy action tensor([ 1.7375, -0.2647, -0.4031, 0.3081]) tensor([0.6702, 0.0905, 0.0788, 0.1605]) -Greedy action tensor([ 1.4320, -0.2730, -0.3925, 0.4787]) tensor([0.5785, 0.1052, 0.0933, 0.2230]) -Greedy action tensor([ 1.6696, -0.4227, -0.6611, 0.0277]) tensor([0.7071, 0.0873, 0.0687, 0.1369]) -Greedy action tensor([ 1.2889, 0.1361, -0.4494, 0.4350]) tensor([0.5216, 0.1647, 0.0917, 0.2221]) -Greedy action tensor([ 2.4066, -1.1242, -0.4066, 0.5667]) tensor([0.8012, 0.0235, 0.0481, 0.1273]) -Greedy action tensor([ 1.1835, -0.2207, -0.4461, -0.0057]) tensor([0.5727, 0.1406, 0.1123, 0.1744]) -Greedy action tensor([ 1.8044, -0.2793, -0.2046, 0.0214]) tensor([0.7009, 0.0872, 0.0940, 0.1178]) -Greedy action tensor([ 1.5520, -0.1308, -0.6537, -0.0916]) tensor([0.6715, 0.1248, 0.0740, 0.1298]) -Greedy action tensor([ 1.6407, -0.2997, -1.0321, 0.3658]) tensor([0.6702, 0.0963, 0.0463, 0.1873]) -Greedy action tensor([ 0.9555, -0.3955, -0.2448, 0.2722]) tensor([0.4842, 0.1254, 0.1458, 0.2445]) -Greedy action tensor([ 1.8214, -1.2635, -0.0892, 0.8254]) tensor([0.6398, 0.0293, 0.0947, 0.2363]) -Greedy action tensor([ 2.6268, 0.7125, 0.3852, -0.0319]) tensor([0.7554, 0.1114, 0.0803, 0.0529]) -Greedy action tensor([ 1.7639, -0.2026, -0.1938, 0.3848]) tensor([0.6523, 0.0913, 0.0921, 0.1643]) -Greedy action tensor([ 1.5140, -0.7920, -0.3652, 0.3098]) tensor([0.6442, 0.0642, 0.0984, 0.1932]) -Greedy action tensor([ 1.3879, -0.8636, -0.2499, 0.2363]) tensor([0.6189, 0.0651, 0.1203, 0.1957]) -Greedy action tensor([ 1.2500, -0.2456, -0.9023, 0.2094]) tensor([0.5905, 0.1323, 0.0686, 0.2086]) -Greedy action tensor([ 1.5801, -0.7868, -0.4672, 0.6468]) tensor([0.6188, 0.0580, 0.0799, 0.2433]) -Greedy action tensor([ 1.4092, -0.4408, -0.3632, 0.8708]) tensor([0.5233, 0.0823, 0.0889, 0.3055]) -Greedy action tensor([ 1.6068, -0.5318, -0.8247, 0.4403]) tensor([0.6591, 0.0777, 0.0579, 0.2053]) -Greedy action tensor([ 1.3031, -0.2273, -0.4567, 0.4121]) tensor([0.5559, 0.1203, 0.0957, 0.2281]) -Greedy action tensor([ 1.3625, -0.0117, -0.7985, 0.3172]) tensor([0.5815, 0.1471, 0.0670, 0.2044]) -Greedy action tensor([ 0.5212, -0.4471, 0.2915, 0.2991]) tensor([0.3361, 0.1276, 0.2671, 0.2692]) -Greedy action tensor([ 1.3419, -0.5977, -0.3796, 0.0537]) tensor([0.6256, 0.0899, 0.1119, 0.1725]) -Greedy action tensor([ 1.3996, -0.8993, -0.3287, 0.2556]) tensor([0.6264, 0.0629, 0.1112, 0.1995]) -Greedy action tensor([ 1.1602, -0.5012, -0.4770, 0.1933]) tensor([0.5667, 0.1076, 0.1102, 0.2155]) -Greedy action tensor([ 1.0233, -0.3575, -0.3496, 0.0091]) tensor([0.5355, 0.1346, 0.1357, 0.1942]) -Greedy action tensor([ 1.3046, -0.1846, -0.2131, 0.0419]) tensor([0.5788, 0.1305, 0.1269, 0.1637]) -Greedy action tensor([ 1.4676, -0.3249, -0.6672, 0.3760]) tensor([0.6171, 0.1028, 0.0730, 0.2071]) -Greedy action tensor([ 1.1959, 0.0327, -0.5770, 0.3674]) tensor([0.5211, 0.1628, 0.0885, 0.2276]) -Greedy action tensor([ 1.8835, -0.4998, -0.6186, 0.6148]) tensor([0.6871, 0.0634, 0.0563, 0.1932]) -Greedy action tensor([ 1.1382, 0.0270, -0.6949, 0.0795]) tensor([0.5447, 0.1793, 0.0871, 0.1889]) -Greedy action tensor([ 1.2705, -0.0461, -0.4649, 0.3043]) tensor([0.5480, 0.1469, 0.0966, 0.2085]) -Greedy action tensor([ 1.3730, 0.0045, -0.4810, 0.3667]) tensor([0.5628, 0.1432, 0.0882, 0.2058]) -Greedy action tensor([ 2.1480, -1.4111, 0.0515, 0.5419]) tensor([0.7396, 0.0211, 0.0909, 0.1484]) -Greedy action tensor([ 0.7812, -0.2022, -0.0875, 0.4641]) tensor([0.3965, 0.1483, 0.1663, 0.2888]) -Greedy action tensor([ 1.9892, -1.1339, -0.2345, 0.5295]) tensor([0.7223, 0.0318, 0.0782, 0.1678]) -Greedy action tensor([ 1.2681, -0.5762, -0.2778, 0.7836]) tensor([0.5032, 0.0796, 0.1072, 0.3100]) -Greedy action tensor([ 1.2567, -0.7534, -0.1681, 0.1822]) tensor([0.5827, 0.0781, 0.1402, 0.1990]) -Greedy action tensor([ 0.4149, -0.1925, -0.0935, 0.2884]) tensor([0.3303, 0.1799, 0.1987, 0.2911]) -Greedy action tensor([ 1.6632, -0.0863, -0.2293, 0.8451]) tensor([0.5663, 0.0985, 0.0853, 0.2499]) -Greedy action tensor([ 2.2008, -0.9735, -0.2417, 0.7685]) tensor([0.7312, 0.0306, 0.0636, 0.1746]) -Greedy action tensor([ 1.5445, -0.6381, -0.6393, -0.0584]) tensor([0.7009, 0.0790, 0.0789, 0.1411]) -Greedy action tensor([ 1.2273, 0.1377, -0.4654, -0.1594]) tensor([0.5649, 0.1900, 0.1039, 0.1412]) -Greedy action tensor([ 1.3709, 0.0997, -0.3436, 0.3208]) tensor([0.5524, 0.1549, 0.0995, 0.1933]) -Greedy action tensor([ 1.1738, -0.5165, -0.2106, 0.5988]) tensor([0.5006, 0.0923, 0.1254, 0.2817]) -Greedy action tensor([ 1.7524, -0.8049, -0.0600, 0.6420]) tensor([0.6369, 0.0494, 0.1040, 0.2098]) -Greedy action tensor([ 1.8318, -0.8841, 0.1390, 0.4724]) tensor([0.6636, 0.0439, 0.1221, 0.1704]) -Greedy action tensor([ 1.5975, -0.4565, -0.0919, 0.5747]) tensor([0.5979, 0.0767, 0.1104, 0.2150]) -Greedy action tensor([ 1.7489, -0.7858, -0.4464, 0.3644]) tensor([0.6939, 0.0550, 0.0773, 0.1738]) -Greedy action tensor([ 1.1964, -0.2434, -0.3095, 0.2299]) tensor([0.5437, 0.1289, 0.1206, 0.2068]) -Greedy action tensor([ 1.5532, 0.0515, -0.3611, 0.3075]) tensor([0.6031, 0.1344, 0.0889, 0.1736]) -Greedy action tensor([ 0.9582, -0.3096, -0.2295, 0.1666]) tensor([0.4903, 0.1380, 0.1495, 0.2222]) -Greedy action tensor([ 1.3080, -0.0469, 0.6278, 0.0678]) tensor([0.4869, 0.1256, 0.2466, 0.1409]) -Greedy action tensor([ 1.9014, -0.5149, -0.7373, 0.9468]) tensor([0.6470, 0.0577, 0.0462, 0.2491]) -Greedy action tensor([ 2.2984, -1.0523, -0.4714, 0.4505]) tensor([0.7966, 0.0279, 0.0499, 0.1255]) -Greedy action tensor([ 1.2538, -0.4188, -0.2066, 0.1314]) tensor([0.5729, 0.1076, 0.1330, 0.1865]) -Greedy action tensor([ 1.8170, -0.7651, -0.3400, 0.3252]) tensor([0.7061, 0.0534, 0.0817, 0.1588]) -Greedy action tensor([ 1.4307, 0.1725, -0.5110, 0.2595]) tensor([0.5755, 0.1635, 0.0826, 0.1784]) -Greedy action tensor([ 1.4708, 0.0378, -0.5654, 0.6162]) tensor([0.5572, 0.1330, 0.0727, 0.2371]) -Greedy action tensor([ 1.3970, -0.8246, -0.2965, 0.5292]) tensor([0.5840, 0.0633, 0.1074, 0.2452]) -Greedy action tensor([ 1.1227, -0.4184, 0.2743, 0.0929]) tensor([0.5002, 0.1071, 0.2141, 0.1786]) -Greedy action tensor([ 1.4553, -0.1807, -0.6028, 0.4732]) tensor([0.5893, 0.1148, 0.0752, 0.2207]) -Greedy action tensor([ 1.2940, -0.1187, 0.0304, 0.3833]) tensor([0.5186, 0.1263, 0.1466, 0.2086]) -Greedy action tensor([ 1.6186, -0.3107, -0.7795, 0.1951]) tensor([0.6771, 0.0983, 0.0615, 0.1631]) -Greedy action tensor([ 1.9541, -1.0657, -0.0359, 0.7411]) tensor([0.6744, 0.0329, 0.0922, 0.2005]) -Greedy action tensor([ 1.2808, -0.5627, -0.2247, 0.5544]) tensor([0.5365, 0.0849, 0.1191, 0.2595]) -Greedy action tensor([ 1.1792, -0.2348, -0.5037, 0.2823]) tensor([0.5444, 0.1324, 0.1012, 0.2220]) -Greedy action tensor([ 1.7064, -0.5553, -0.4353, 0.4487]) tensor([0.6640, 0.0692, 0.0780, 0.1888]) -Greedy action tensor([-0.3278, -0.3314, 0.2379, 0.3164]) tensor([0.1766, 0.1760, 0.3110, 0.3364]) -Greedy action tensor([-0.7171, 0.2869, 0.4119, 0.7628]) tensor([0.0892, 0.2434, 0.2758, 0.3917]) -Greedy action tensor([-1.9445, -0.4482, 0.6677, -0.1802]) tensor([0.0401, 0.1791, 0.5467, 0.2341]) -Greedy action tensor([-1.8464, -0.4463, 0.6151, -0.1344]) tensor([0.0448, 0.1817, 0.5252, 0.2482]) -Greedy action tensor([-1.9307, -0.4286, 0.6610, -0.1728]) tensor([0.0406, 0.1822, 0.5418, 0.2354]) -Greedy action tensor([-1.7927, -0.4621, 0.5665, -0.0892]) tensor([0.0479, 0.1814, 0.5073, 0.2634]) -Greedy action tensor([-1.5320, -0.5780, 0.4106, 0.0579]) tensor([0.0646, 0.1677, 0.4508, 0.3168]) -Greedy action tensor([-1.8989, -0.4462, 0.6470, -0.1593]) tensor([0.0422, 0.1802, 0.5376, 0.2401]) -Greedy action tensor([-1.8638, -0.5462, -0.1280, -0.4321]) tensor([0.0685, 0.2559, 0.3888, 0.2868]) -Greedy action tensor([-0.8947, 0.8110, 0.0342, 0.3070]) tensor([0.0809, 0.4453, 0.2048, 0.2690]) -Greedy action tensor([-1.8934, -0.4102, 0.6446, -0.1515]) tensor([0.0421, 0.1854, 0.5324, 0.2402]) -Greedy action tensor([-1.9237, -0.4426, 0.6546, -0.1705]) tensor([0.0411, 0.1806, 0.5411, 0.2371]) -Greedy action tensor([-1.9089, -0.4342, 0.6511, -0.1590]) tensor([0.0416, 0.1816, 0.5376, 0.2392]) -Greedy action tensor([-1.9332, -0.4375, 0.6627, -0.1741]) tensor([0.0405, 0.1808, 0.5433, 0.2353]) -Greedy action tensor([-1.5057, 0.5220, 0.3159, 0.1320]) tensor([0.0502, 0.3813, 0.3103, 0.2582]) -Greedy action tensor([ 0.3476, 1.2040, -0.0098, 0.2144]) tensor([0.2029, 0.4777, 0.1419, 0.1776]) -Greedy action tensor([-0.9000, 0.9636, 0.0656, 0.4244]) tensor([0.0723, 0.4661, 0.1899, 0.2718]) -Greedy action tensor([-0.5011, 0.8814, 0.0386, 0.0313]) tensor([0.1190, 0.4742, 0.2041, 0.2027]) -Greedy action tensor([-1.8394, -0.4498, 0.6297, -0.1045]) tensor([0.0445, 0.1784, 0.5251, 0.2520]) -Greedy action tensor([-1.7158, -0.0383, 0.6370, -0.3748]) tensor([0.0483, 0.2587, 0.5082, 0.1848]) -Greedy action tensor([-1.6002e+00, -5.2221e-01, 4.8668e-01, 3.1692e-04]) tensor([0.0590, 0.1733, 0.4754, 0.2923]) -Greedy action tensor([-1.9466, -0.4512, 0.6683, -0.1815]) tensor([0.0401, 0.1787, 0.5473, 0.2340]) -Greedy action tensor([-1.7786, -0.4412, 0.5928, -0.0887]) tensor([0.0478, 0.1819, 0.5116, 0.2588]) -Greedy action tensor([-1.8832, -0.3974, 0.6344, -0.1363]) tensor([0.0425, 0.1876, 0.5264, 0.2436]) -Greedy action tensor([-0.4612, 0.4307, 0.4544, 1.4448]) tensor([0.0790, 0.1926, 0.1973, 0.5311]) -Greedy action tensor([-1.8795, -0.4304, 0.6323, -0.1512]) tensor([0.0431, 0.1835, 0.5309, 0.2425]) -Greedy action tensor([-1.5576, 0.1674, 0.4438, 0.3030]) tensor([0.0489, 0.2746, 0.3620, 0.3145]) -Greedy action tensor([-1.5474, -0.3010, 0.4676, -0.1021]) tensor([0.0616, 0.2144, 0.4624, 0.2616]) -Greedy action tensor([-1.8142, -0.4780, 0.6077, -0.1083]) tensor([0.0463, 0.1763, 0.5221, 0.2552]) -Greedy action tensor([-1.8924, -0.3330, 0.6240, -0.1422]) tensor([0.0418, 0.1990, 0.5183, 0.2409]) -Greedy action tensor([-1.9358, -0.4435, 0.6641, -0.1756]) tensor([0.0404, 0.1799, 0.5445, 0.2352]) -Greedy action tensor([-1.8533, -0.3773, 0.6561, -0.0533]) tensor([0.0422, 0.1844, 0.5184, 0.2550]) -Greedy action tensor([-1.5888, 0.2936, 0.4454, 0.0730]) tensor([0.0488, 0.3207, 0.3733, 0.2572]) -Greedy action tensor([-1.9272, -0.4034, 0.6485, -0.1743]) tensor([0.0408, 0.1873, 0.5363, 0.2356]) -Greedy action tensor([-1.9175, -0.3532, 0.6433, -0.1565]) tensor([0.0407, 0.1947, 0.5275, 0.2370]) -Greedy action tensor([-1.4538, -0.3772, 0.4481, 0.1270]) tensor([0.0645, 0.1894, 0.4324, 0.3136]) -Greedy action tensor([-1.8580, -0.5156, 0.6206, -0.1385]) tensor([0.0448, 0.1714, 0.5339, 0.2499]) -Greedy action tensor([-1.7324, -0.4219, 0.5976, -0.0538]) tensor([0.0492, 0.1823, 0.5052, 0.2634]) -Greedy action tensor([-1.8604, -0.3728, 0.6238, -0.1248]) tensor([0.0433, 0.1917, 0.5193, 0.2457]) -Greedy action tensor([-1.6651, -0.4470, 0.5403, 0.0474]) tensor([0.0526, 0.1780, 0.4776, 0.2918]) -Greedy action tensor([-1.2988, -0.4352, 0.3316, 0.1250]) tensor([0.0792, 0.1878, 0.4042, 0.3288]) -Greedy action tensor([-0.4537, 1.0027, -0.0380, 0.5109]) tensor([0.1060, 0.4550, 0.1607, 0.2782]) -Greedy action tensor([-1.9463, -0.4512, 0.6683, -0.1815]) tensor([0.0401, 0.1787, 0.5473, 0.2340]) -Greedy action tensor([-1.6328, -0.4390, 0.5890, 0.0908]) tensor([0.0523, 0.1725, 0.4822, 0.2930]) -Greedy action tensor([-1.8347, -0.4861, 0.6250, -0.1051]) tensor([0.0451, 0.1736, 0.5273, 0.2541]) -Greedy action tensor([-1.2214, -0.6795, 0.1363, -0.1859]) tensor([0.1061, 0.1825, 0.4125, 0.2989]) -Greedy action tensor([-1.9073, -0.3290, 0.6307, -0.1551]) tensor([0.0412, 0.1997, 0.5214, 0.2377]) -Greedy action tensor([-1.8276, -0.3667, 0.6121, -0.0660]) tensor([0.0442, 0.1907, 0.5075, 0.2576]) -Greedy action tensor([-1.4568, 0.4324, 0.3270, -0.0425]) tensor([0.0566, 0.3741, 0.3367, 0.2327]) -Greedy action tensor([-1.9370, -0.4376, 0.6617, -0.1757]) tensor([0.0404, 0.1810, 0.5434, 0.2352]) -Greedy action tensor([-1.8739, 0.0645, 0.5142, -0.1388]) tensor([0.0408, 0.2835, 0.4444, 0.2313]) -Greedy action tensor([-1.7922, 0.0231, 0.5256, -0.0526]) tensor([0.0435, 0.2672, 0.4416, 0.2477]) -Greedy action tensor([-1.8669, -0.4328, 0.6261, -0.1379]) tensor([0.0436, 0.1830, 0.5276, 0.2458]) -Greedy action tensor([-1.6275, -0.2953, 0.6249, -0.0382]) tensor([0.0521, 0.1974, 0.4954, 0.2552]) -Greedy action tensor([-1.4456, -0.1174, 0.4322, 0.1677]) tensor([0.0612, 0.2311, 0.4004, 0.3073]) -Greedy action tensor([-1.6165, 0.3840, 0.3741, 0.0717]) tensor([0.0473, 0.3500, 0.3465, 0.2561]) -Greedy action tensor([-1.0681, 0.5766, 0.3162, -0.6228]) tensor([0.0852, 0.4415, 0.3403, 0.1330]) -Greedy action tensor([-1.9409, -0.4489, 0.6652, -0.1786]) tensor([0.0403, 0.1791, 0.5458, 0.2347]) -Greedy action tensor([-1.7249, -0.3631, 0.5762, -0.0222]) tensor([0.0491, 0.1915, 0.4900, 0.2694]) -Greedy action tensor([-1.7200, -0.2244, 0.5627, -0.0176]) tensor([0.0482, 0.2150, 0.4724, 0.2644]) -Greedy action tensor([0.7630, 1.3007, 0.1826, 0.9051]) tensor([0.2260, 0.3870, 0.1265, 0.2605]) -Greedy action tensor([-1.9130, -0.4553, 0.6569, -0.1636]) tensor([0.0415, 0.1782, 0.5418, 0.2385]) -Greedy action tensor([-1.9337, -0.4515, 0.6642, -0.1733]) tensor([0.0406, 0.1786, 0.5450, 0.2359]) -Greedy action tensor([-1.8720, -0.4046, 0.6321, -0.1370]) tensor([0.0430, 0.1867, 0.5264, 0.2439]) -Greedy action tensor([-1.8467, -0.2281, 0.6022, -0.1214]) tensor([0.0430, 0.2172, 0.4982, 0.2416]) -Greedy action tensor([ 0.8363, 1.1479, -0.0759, 0.3537]) tensor([0.2955, 0.4035, 0.1187, 0.1824]) -Greedy action tensor([-1.8824, -0.4289, 0.6344, -0.1516]) tensor([0.0429, 0.1835, 0.5314, 0.2422]) -Greedy action tensor([-1.8456, -0.3901, 0.6211, -0.1152]) tensor([0.0440, 0.1887, 0.5188, 0.2484]) -Greedy action tensor([-1.8438, -0.3930, 0.6070, -0.1377]) tensor([0.0447, 0.1907, 0.5184, 0.2462]) -Greedy action tensor([-1.8397, -0.2892, 0.5915, -0.1121]) tensor([0.0440, 0.2075, 0.5007, 0.2477]) -Greedy action tensor([-1.8285, -0.3856, 0.6001, -0.1429]) tensor([0.0455, 0.1927, 0.5163, 0.2456]) -Greedy action tensor([-1.9220, -0.4171, 0.6572, -0.1583]) tensor([0.0408, 0.1836, 0.5377, 0.2379]) -Greedy action tensor([-1.7445, 0.1738, 0.4749, -0.0208]) tensor([0.0442, 0.3011, 0.4069, 0.2478]) -Greedy action tensor([-1.7651, -0.0592, 0.5116, -0.1044]) tensor([0.0465, 0.2559, 0.4530, 0.2446]) -Greedy action tensor([-1.5864, -0.5638, 0.4882, -0.0327]) tensor([0.0607, 0.1688, 0.4834, 0.2871]) -Greedy action tensor([-1.8115, -0.4332, 0.6077, -0.0899]) tensor([0.0459, 0.1820, 0.5155, 0.2566]) -Greedy action tensor([-1.6779, -0.3466, 0.6460, -0.0131]) tensor([0.0493, 0.1866, 0.5036, 0.2605]) -Greedy action tensor([-1.9230, -0.4509, 0.6594, -0.1706]) tensor([0.0411, 0.1789, 0.5432, 0.2368]) -Greedy action tensor([-0.6205, -0.5390, 0.3468, 0.4924]) tensor([0.1289, 0.1398, 0.3391, 0.3922]) -Greedy action tensor([-1.5058, -0.5409, 0.4183, 0.1030]) tensor([0.0646, 0.1696, 0.4427, 0.3230]) -Greedy action tensor([-1.0909, -0.6749, 0.2674, 0.3301]) tensor([0.0948, 0.1437, 0.3688, 0.3927]) -Greedy action tensor([-0.1030, -0.5426, 1.7497, 0.9883]) tensor([0.0909, 0.0586, 0.5798, 0.2708]) -Greedy action tensor([-0.6218, -1.1614, 0.1579, -0.3840]) tensor([0.1987, 0.1159, 0.4334, 0.2521]) -Greedy action tensor([ 0.8626, -0.1730, -0.0616, 1.5678]) tensor([0.2648, 0.0940, 0.1051, 0.5361]) -Greedy action tensor([ 0.0654, -1.8106, -0.2060, 1.1041]) tensor([0.2109, 0.0323, 0.1608, 0.5960]) -Greedy action tensor([-0.0886, -0.5601, 0.4997, 0.1467]) tensor([0.2132, 0.1331, 0.3840, 0.2698]) -Greedy action tensor([ 1.0245, -0.5640, 0.9817, 0.8553]) tensor([0.3326, 0.0679, 0.3187, 0.2808]) -Greedy action tensor([-0.1688, -1.1048, 0.6741, -0.0053]) tensor([0.2044, 0.0802, 0.4748, 0.2407]) -Greedy action tensor([ 0.7094, -1.6629, 0.0820, 0.9493]) tensor([0.3450, 0.0322, 0.1842, 0.4386]) -Greedy action tensor([-0.2494, -1.0019, -0.1340, 0.1711]) tensor([0.2429, 0.1145, 0.2727, 0.3699]) -Greedy action tensor([0.8970, 0.4674, 0.1821, 0.0342]) tensor([0.3903, 0.2540, 0.1910, 0.1647]) -Greedy action tensor([ 1.3698, -0.5858, 1.1535, 0.9233]) tensor([0.3866, 0.0547, 0.3114, 0.2474]) -Greedy action tensor([ 0.3988, -0.0241, 0.2149, -0.5198]) tensor([0.3465, 0.2270, 0.2883, 0.1383]) -Greedy action tensor([ 1.2374, -0.2712, 0.5792, 0.3807]) tensor([0.4622, 0.1022, 0.2393, 0.1962]) -Greedy action tensor([ 0.6549, -1.0225, -0.0678, 0.2793]) tensor([0.4239, 0.0792, 0.2058, 0.2912]) -Greedy action tensor([ 0.6796, -2.2214, 0.1010, 1.1618]) tensor([0.3091, 0.0170, 0.1733, 0.5006]) -Greedy action tensor([ 0.8597, -0.2421, 0.4029, -0.0911]) tensor([0.4252, 0.1413, 0.2693, 0.1643]) -Greedy action tensor([-0.3587, -1.5615, 0.5965, 1.3934]) tensor([0.1035, 0.0311, 0.2689, 0.5966]) -Greedy action tensor([ 0.4036, -0.7509, 0.5435, -0.1433]) tensor([0.3285, 0.1035, 0.3778, 0.1901]) -Greedy action tensor([-0.5702, -0.9762, 0.3794, -0.3374]) tensor([0.1814, 0.1209, 0.4688, 0.2289]) -Greedy action tensor([-0.5768, -0.0490, 0.4297, -0.2434]) tensor([0.1465, 0.2483, 0.4008, 0.2044]) -Greedy action tensor([-0.2783, -0.0887, 1.2823, -1.0260]) tensor([0.1343, 0.1624, 0.6397, 0.0636]) -Greedy action tensor([0.0398, 0.2425, 0.7804, 0.5276]) tensor([0.1681, 0.2058, 0.3524, 0.2737]) -Greedy action tensor([ 0.9163, -0.2961, -0.0863, -0.1215]) tensor([0.4954, 0.1474, 0.1818, 0.1755]) -Greedy action tensor([-0.3290, -1.0163, 1.1888, -0.4242]) tensor([0.1434, 0.0721, 0.6541, 0.1304]) -Greedy action tensor([ 1.8579, -1.6973, 0.0171, 0.9691]) tensor([0.6256, 0.0179, 0.0993, 0.2572]) -Greedy action tensor([-0.4460, -1.4111, -0.1832, 2.3034]) tensor([0.0546, 0.0208, 0.0710, 0.8536]) -Greedy action tensor([-0.9330, -0.4553, 0.4950, -0.6672]) tensor([0.1237, 0.1994, 0.5157, 0.1613]) -Greedy action tensor([ 1.0140, -1.1285, 0.6858, 0.5892]) tensor([0.4014, 0.0471, 0.2891, 0.2625]) -Greedy action tensor([-0.3340, -0.8290, 0.5107, -0.7355]) tensor([0.2171, 0.1323, 0.5052, 0.1453]) -Greedy action tensor([-0.7301, 0.1383, -0.5701, 0.1376]) tensor([0.1441, 0.3435, 0.1691, 0.3432]) -Greedy action tensor([0.8850, 0.0257, 0.2139, 0.7029]) tensor([0.3612, 0.1530, 0.1847, 0.3011]) -Greedy action tensor([ 0.6544, -0.4083, 0.7176, 0.2402]) tensor([0.3256, 0.1125, 0.3468, 0.2152]) -Greedy action tensor([ 1.0550, -0.3653, 0.6100, 0.3282]) tensor([0.4227, 0.1021, 0.2709, 0.2043]) -Greedy action tensor([-0.7975, -0.2328, 0.2974, -0.9739]) tensor([0.1518, 0.2671, 0.4538, 0.1273]) -Greedy action tensor([-0.9886, 0.0469, 0.6208, -0.5917]) tensor([0.0971, 0.2734, 0.4852, 0.1443]) -Greedy action tensor([ 0.1883, 0.1123, 0.3912, -0.4924]) tensor([0.2734, 0.2534, 0.3349, 0.1384]) -Greedy action tensor([ 0.6567, 0.8022, -0.5976, 0.3846]) tensor([0.3121, 0.3610, 0.0890, 0.2378]) -Greedy action tensor([-0.6968, 0.3912, 0.1964, 0.0102]) tensor([0.1185, 0.3517, 0.2895, 0.2403]) -Greedy action tensor([-0.1502, -2.0379, 0.4004, 0.4097]) tensor([0.2157, 0.0327, 0.3741, 0.3776]) -Greedy action tensor([ 0.0595, -0.6378, -0.5950, -0.2358]) tensor([0.3621, 0.1803, 0.1882, 0.2695]) -Greedy action tensor([-0.3804, -0.2875, -0.3655, -0.3471]) tensor([0.2412, 0.2647, 0.2448, 0.2494]) -Greedy action tensor([ 1.3779, 0.3828, -0.7319, 1.0432]) tensor([0.4532, 0.1675, 0.0550, 0.3243]) -Greedy action tensor([ 0.2481, -0.1736, 0.9714, -0.2319]) tensor([0.2306, 0.1513, 0.4754, 0.1427]) -Greedy action tensor([ 0.1385, -0.0615, -0.1428, 0.6200]) tensor([0.2386, 0.1953, 0.1801, 0.3861]) -Greedy action tensor([-1.0354, 0.7501, 1.6093, -0.7165]) tensor([0.0446, 0.2660, 0.6280, 0.0614]) -Greedy action tensor([-0.2041, -0.7084, -0.2500, 1.4985]) tensor([0.1243, 0.0750, 0.1187, 0.6820]) -Greedy action tensor([ 0.6930, -0.1180, 1.8487, -0.1583]) tensor([0.1981, 0.0880, 0.6293, 0.0846]) -Greedy action tensor([-0.6106, -0.7358, 0.4782, 0.0441]) tensor([0.1475, 0.1302, 0.4383, 0.2840]) -Greedy action tensor([-0.3111, -0.4198, 0.9781, -0.6491]) tensor([0.1603, 0.1437, 0.5817, 0.1143]) -Greedy action tensor([-0.1094, -0.7441, 0.5820, 0.5756]) tensor([0.1815, 0.0962, 0.3623, 0.3600]) -Greedy action tensor([ 0.5494, -0.9670, 0.1754, 0.6595]) tensor([0.3307, 0.0726, 0.2275, 0.3692]) -Greedy action tensor([-0.5944, -0.5785, -0.0447, 0.2890]) tensor([0.1621, 0.1647, 0.2809, 0.3922]) -Greedy action tensor([-0.4766, -0.9345, -0.0025, 0.1925]) tensor([0.1926, 0.1218, 0.3094, 0.3761]) -Greedy action tensor([-3.7846e-04, -1.8555e+00, 3.0078e-01, 2.8418e-01]) tensor([0.2606, 0.0408, 0.3522, 0.3464]) -Greedy action tensor([-0.9412, -0.7893, -0.2695, -0.2792]) tensor([0.1650, 0.1921, 0.3230, 0.3199]) -Greedy action tensor([ 0.1842, 0.0189, -0.5999, 1.4799]) tensor([0.1678, 0.1423, 0.0766, 0.6133]) -Greedy action tensor([ 0.6984, -1.3282, 0.8678, -0.3518]) tensor([0.3751, 0.0494, 0.4443, 0.1312]) -Greedy action tensor([-0.5263, 0.4624, -0.1803, -0.9174]) tensor([0.1731, 0.4652, 0.2446, 0.1171]) -Greedy action tensor([ 0.1400, -1.5293, 0.9483, 0.9549]) tensor([0.1757, 0.0331, 0.3943, 0.3969]) -Greedy action tensor([ 0.4306, -0.8251, 0.5308, -0.2865]) tensor([0.3474, 0.0990, 0.3840, 0.1696]) -Greedy action tensor([-0.3570, -0.5974, -0.3276, -0.3129]) tensor([0.2590, 0.2036, 0.2667, 0.2707]) -Greedy action tensor([-0.0263, 0.2015, -0.1525, 0.0488]) tensor([0.2372, 0.2979, 0.2091, 0.2557]) -Greedy action tensor([-0.8674, -0.5132, -0.0729, -0.1617]) tensor([0.1501, 0.2139, 0.3321, 0.3039]) -Greedy action tensor([-0.2636, -0.2607, -0.2685, -0.6661]) tensor([0.2727, 0.2735, 0.2714, 0.1824]) -Greedy action tensor([0.4478, 0.1220, 0.7032, 1.0378]) tensor([0.2076, 0.1499, 0.2680, 0.3745]) -Greedy action tensor([-0.3411, -0.2178, -0.2850, -0.5626]) tensor([0.2506, 0.2835, 0.2651, 0.2008]) -Greedy action tensor([-0.6241, 0.1111, 0.1490, -0.8691]) tensor([0.1657, 0.3456, 0.3590, 0.1297]) -Greedy action tensor([-0.8441, 0.2145, 0.4321, -1.1218]) tensor([0.1216, 0.3505, 0.4357, 0.0921]) -Greedy action tensor([-0.8933, -0.5970, 0.9121, -0.7802]) tensor([0.1047, 0.1409, 0.6371, 0.1173]) -Greedy action tensor([-0.2879, -1.1364, 0.1406, -0.4823]) tensor([0.2641, 0.1131, 0.4054, 0.2174]) -Greedy action tensor([-0.8290, 0.1492, -0.7766, 0.1775]) tensor([0.1342, 0.3570, 0.1415, 0.3673]) -Greedy action tensor([0.6716, 0.1852, 0.6453, 0.4653]) tensor([0.2939, 0.1807, 0.2863, 0.2391]) -Greedy action tensor([ 0.3736, -1.9730, -0.0496, 0.0116]) tensor([0.4087, 0.0391, 0.2677, 0.2846]) -Greedy action tensor([-0.3136, -1.3954, 0.3816, -0.3430]) tensor([0.2318, 0.0786, 0.4645, 0.2251]) -Greedy action tensor([-0.5631, -1.0265, 0.4144, -0.3582]) tensor([0.1813, 0.1141, 0.4820, 0.2226]) -Greedy action tensor([ 0.2038, -1.0016, 0.5854, -1.5375]) tensor([0.3402, 0.1019, 0.4983, 0.0596]) -Greedy action tensor([-0.8808, 0.3668, -0.0316, 0.4197]) tensor([0.0953, 0.3319, 0.2228, 0.3499]) -Greedy action tensor([ 0.4416, 0.4488, 0.4518, -0.1707]) tensor([0.2809, 0.2830, 0.2838, 0.1523]) -Greedy action tensor([-0.6672, -1.3033, -0.0804, -0.5320]) tensor([0.2236, 0.1184, 0.4021, 0.2560]) -Greedy action tensor([ 0.2175, -0.8981, -0.3058, 0.2473]) tensor([0.3389, 0.1111, 0.2008, 0.3492]) -Greedy action tensor([ 0.1108, -0.9219, -0.6283, 0.1677]) tensor([0.3458, 0.1231, 0.1651, 0.3660]) -Greedy action tensor([ 0.6653, -0.2721, 0.0858, -0.0824]) tensor([0.4123, 0.1615, 0.2310, 0.1952]) -Greedy action tensor([ 1.0988, -0.2232, -0.2136, 0.1697]) tensor([0.5179, 0.1381, 0.1394, 0.2046]) -Greedy action tensor([ 2.3187, 0.8030, 0.1699, -0.0850]) tensor([0.7009, 0.1540, 0.0818, 0.0634]) -Greedy action tensor([ 2.1351, -0.9038, -0.2346, 0.8190]) tensor([0.7094, 0.0340, 0.0663, 0.1903]) -Greedy action tensor([ 0.9579, -0.8566, 0.1349, -0.0425]) tensor([0.5077, 0.0827, 0.2229, 0.1867]) -Greedy action tensor([ 0.9670, -0.1503, -0.3792, 0.2563]) tensor([0.4811, 0.1574, 0.1252, 0.2364]) -Greedy action tensor([ 1.0720, -0.4973, -0.1566, 0.3491]) tensor([0.5035, 0.1048, 0.1474, 0.2443]) -Greedy action tensor([ 1.3869, 0.2362, -0.2894, 0.3709]) tensor([0.5361, 0.1696, 0.1003, 0.1941]) -Greedy action tensor([ 1.8843, -0.6576, -0.2242, 0.3005]) tensor([0.7116, 0.0560, 0.0864, 0.1460]) -Greedy action tensor([ 1.1589, -0.4603, -0.1517, -0.0657]) tensor([0.5677, 0.1124, 0.1531, 0.1668]) -Greedy action tensor([ 1.8264, -0.4013, -0.1070, 0.4066]) tensor([0.6693, 0.0721, 0.0968, 0.1618]) -Greedy action tensor([ 1.0946, -0.3391, -0.2369, -0.2631]) tensor([0.5682, 0.1355, 0.1501, 0.1462]) -Greedy action tensor([ 1.8230, -0.5912, -0.3619, 0.5534]) tensor([0.6744, 0.0603, 0.0759, 0.1895]) -Greedy action tensor([ 2.0886, -0.4606, -0.7224, 0.3443]) tensor([0.7616, 0.0595, 0.0458, 0.1331]) -Greedy action tensor([ 2.2991, -1.1674, -0.4314, 0.4738]) tensor([0.7952, 0.0248, 0.0518, 0.1282]) -Greedy action tensor([ 0.5402, -0.3337, -0.1260, 0.1749]) tensor([0.3810, 0.1590, 0.1957, 0.2644]) -Greedy action tensor([ 1.5609, -0.5836, -0.3010, 0.0842]) tensor([0.6663, 0.0780, 0.1035, 0.1522]) -Greedy action tensor([ 2.1589, -0.8036, -0.1833, 0.1623]) tensor([0.7791, 0.0403, 0.0749, 0.1058]) -Greedy action tensor([ 1.5601, -0.5263, -0.0688, 0.0844]) tensor([0.6456, 0.0801, 0.1266, 0.1476]) -Greedy action tensor([ 1.4381, -0.5885, -0.4922, 0.6097]) tensor([0.5836, 0.0769, 0.0847, 0.2549]) -Greedy action tensor([ 1.1712, -0.3484, -0.4350, -0.1305]) tensor([0.5912, 0.1294, 0.1186, 0.1608]) -Greedy action tensor([ 1.5850, 0.1751, -0.8381, 0.1201]) tensor([0.6394, 0.1561, 0.0567, 0.1478]) -Greedy action tensor([ 1.9760, -0.1824, -0.5908, 0.3350]) tensor([0.7215, 0.0833, 0.0554, 0.1398]) -Greedy action tensor([ 1.7027, 0.1034, -0.4847, 0.6467]) tensor([0.6016, 0.1216, 0.0675, 0.2093]) -Greedy action tensor([ 2.0352, -0.5149, -1.0289, 0.2300]) tensor([0.7757, 0.0606, 0.0362, 0.1276]) -Greedy action tensor([ 2.0698, -0.7411, -0.7843, 1.2864]) tensor([0.6351, 0.0382, 0.0366, 0.2901]) -Greedy action tensor([ 1.2823, -0.1957, -0.2791, 0.2526]) tensor([0.5571, 0.1271, 0.1169, 0.1989]) -Greedy action tensor([ 1.1279, -0.1896, -0.1863, -0.1369]) tensor([0.5498, 0.1472, 0.1477, 0.1552]) -Greedy action tensor([ 2.4998, -0.4933, 0.0734, 0.2582]) tensor([0.8034, 0.0403, 0.0710, 0.0854]) -Greedy action tensor([0.7493, 0.3529, 0.2339, 0.0828]) tensor([0.3593, 0.2417, 0.2146, 0.1845]) -Greedy action tensor([ 1.4291, -0.4511, -0.2664, 0.3226]) tensor([0.6000, 0.0915, 0.1101, 0.1984]) -Greedy action tensor([ 1.6830, -0.7360, -0.4422, 0.6099]) tensor([0.6450, 0.0574, 0.0770, 0.2206]) -Greedy action tensor([ 1.2200, -0.3783, -0.4889, 0.3612]) tensor([0.5534, 0.1119, 0.1002, 0.2345]) -Greedy action tensor([ 0.8320, -0.3638, -0.0634, 0.0158]) tensor([0.4645, 0.1405, 0.1897, 0.2053]) -Greedy action tensor([ 2.0897, -0.7143, -0.4298, 0.8826]) tensor([0.6944, 0.0421, 0.0559, 0.2077]) -Greedy action tensor([ 1.0206, -0.4675, 0.1137, 0.2953]) tensor([0.4731, 0.1068, 0.1910, 0.2291]) -Greedy action tensor([ 1.6646, -0.6813, -0.0530, 0.3818]) tensor([0.6441, 0.0617, 0.1156, 0.1786]) -Greedy action tensor([ 1.8432, -0.4964, -0.6807, 0.9986]) tensor([0.6226, 0.0600, 0.0499, 0.2675]) -Greedy action tensor([ 1.3464, -0.4633, -0.7431, 0.5361]) tensor([0.5773, 0.0945, 0.0714, 0.2567]) -Greedy action tensor([ 1.0756, -0.4684, -0.5623, 0.4615]) tensor([0.5131, 0.1096, 0.0997, 0.2776]) -Greedy action tensor([ 1.1781, -0.5822, -0.0333, 0.2396]) tensor([0.5373, 0.0924, 0.1600, 0.2102]) -Greedy action tensor([ 1.1851, -0.3047, -0.5357, 0.6658]) tensor([0.5002, 0.1127, 0.0895, 0.2976]) -Greedy action tensor([ 1.0247, -0.4407, 0.0176, 0.0512]) tensor([0.5066, 0.1170, 0.1850, 0.1914]) -Greedy action tensor([ 1.1737, -0.1073, -0.8832, 0.2945]) tensor([0.5492, 0.1526, 0.0702, 0.2280]) -Greedy action tensor([ 1.5605, -0.5091, -0.2334, 0.5411]) tensor([0.6048, 0.0763, 0.1006, 0.2182]) -Greedy action tensor([ 1.2606, -0.1157, -0.5620, 0.6570]) tensor([0.5100, 0.1288, 0.0824, 0.2789]) -Greedy action tensor([ 1.7665, -0.5312, -0.8617, 0.3009]) tensor([0.7124, 0.0716, 0.0514, 0.1645]) -Greedy action tensor([ 1.6196, -0.3561, -0.2674, 0.3735]) tensor([0.6338, 0.0879, 0.0960, 0.1823]) -Greedy action tensor([ 1.7237, -1.2381, -0.0927, 0.0450]) tensor([0.7138, 0.0369, 0.1161, 0.1332]) -Greedy action tensor([ 1.1885, -0.5072, -0.2607, 0.2615]) tensor([0.5513, 0.1011, 0.1294, 0.2182]) -Greedy action tensor([ 1.4304, -0.4749, -0.1277, 0.1746]) tensor([0.6082, 0.0905, 0.1281, 0.1732]) -Greedy action tensor([ 1.5920, -0.3917, -0.2867, 0.3472]) tensor([0.6336, 0.0872, 0.0968, 0.1825]) -Greedy action tensor([ 1.4255, -0.3224, -0.0890, 0.5213]) tensor([0.5559, 0.0968, 0.1222, 0.2251]) -Greedy action tensor([ 2.4519, -1.0694, -0.0829, 0.7244]) tensor([0.7773, 0.0230, 0.0616, 0.1381]) -Greedy action tensor([ 0.9883, -0.4870, -0.4093, 0.3100]) tensor([0.5042, 0.1153, 0.1246, 0.2559]) -Greedy action tensor([ 2.3980, -1.7254, -0.0197, 0.4692]) tensor([0.7996, 0.0129, 0.0713, 0.1162]) -Greedy action tensor([ 0.8093, -0.3116, -0.0781, 0.1600]) tensor([0.4424, 0.1442, 0.1822, 0.2311]) -Greedy action tensor([ 1.4474, 0.4234, -1.0917, 0.6232]) tensor([0.5329, 0.1914, 0.0421, 0.2337]) -Greedy action tensor([ 1.6670, -0.6322, -0.3574, 0.5199]) tensor([0.6452, 0.0647, 0.0852, 0.2049]) -Greedy action tensor([ 1.9352, -1.0485, -0.0566, 0.2167]) tensor([0.7319, 0.0370, 0.0999, 0.1312]) -Greedy action tensor([ 1.8669, 0.0353, -0.2336, 0.4683]) tensor([0.6538, 0.1047, 0.0800, 0.1615]) -Greedy action tensor([ 1.4169, -0.4943, -0.0438, 0.5606]) tensor([0.5541, 0.0820, 0.1286, 0.2354]) -Greedy action tensor([ 1.2626, -0.7480, 0.0861, 0.3684]) tensor([0.5402, 0.0723, 0.1666, 0.2209]) -Greedy action tensor([ 1.1237, -0.4160, -0.1790, 0.5219]) tensor([0.4916, 0.1054, 0.1336, 0.2693]) -Greedy action tensor([ 1.5327, -0.3365, -0.2890, 0.3469]) tensor([0.6167, 0.0951, 0.0998, 0.1884]) -Greedy action tensor([ 0.7121, -0.4587, 0.1521, -0.0641]) tensor([0.4271, 0.1324, 0.2440, 0.1965]) -Greedy action tensor([ 2.1598, -1.2514, 0.0262, 0.8390]) tensor([0.7051, 0.0233, 0.0835, 0.1882]) -Greedy action tensor([ 1.0934, -0.1689, -0.2755, 0.0937]) tensor([0.5248, 0.1485, 0.1335, 0.1931]) -Greedy action tensor([ 2.0620, -0.7571, 0.1627, 0.2453]) tensor([0.7289, 0.0435, 0.1091, 0.1185]) -Greedy action tensor([ 1.2403, -0.1713, -0.4243, 0.3262]) tensor([0.5453, 0.1329, 0.1032, 0.2186]) -Greedy action tensor([ 1.6271, -0.2552, -0.8567, 0.1814]) tensor([0.6797, 0.1035, 0.0567, 0.1601]) -Greedy action tensor([ 2.0115, -1.3394, -0.2024, 0.3156]) tensor([0.7532, 0.0264, 0.0823, 0.1381]) -Greedy action tensor([ 1.6602, 0.0227, -0.6708, 0.0508]) tensor([0.6704, 0.1304, 0.0652, 0.1341]) -Greedy action tensor([ 2.1079, -0.6638, -0.2663, 0.5423]) tensor([0.7328, 0.0458, 0.0682, 0.1531]) -Greedy action tensor([ 1.6432, -0.4561, -0.3871, 0.5330]) tensor([0.6316, 0.0774, 0.0829, 0.2081]) -Greedy action tensor([ 1.5154, -0.5873, -0.2209, 0.0120]) tensor([0.6576, 0.0803, 0.1158, 0.1462]) -Greedy action tensor([ 0.9150, 0.0807, -0.3816, -0.0037]) tensor([0.4747, 0.2061, 0.1298, 0.1894]) -Greedy action tensor([ 1.6114, -0.2903, -0.5734, 0.8229]) tensor([0.5826, 0.0870, 0.0655, 0.2648]) -Greedy action tensor([ 1.7872, -0.2985, -0.7229, 0.2287]) tensor([0.7062, 0.0877, 0.0574, 0.1486]) -Greedy action tensor([ 1.8612, -0.1817, -0.5933, 0.1893]) tensor([0.7125, 0.0924, 0.0612, 0.1339]) -Greedy action tensor([ 1.4554, -0.6547, -0.5120, 0.4327]) tensor([0.6170, 0.0748, 0.0863, 0.2219]) -Greedy action tensor([ 0.7939, -0.7958, 0.0891, -0.4299]) tensor([0.5019, 0.1024, 0.2481, 0.1476]) -Greedy action tensor([ 0.6670, -0.4585, -0.1365, -0.3832]) tensor([0.4712, 0.1529, 0.2110, 0.1649]) -Greedy action tensor([ 1.1039, -0.5053, -0.0619, -0.5196]) tensor([0.5852, 0.1171, 0.1824, 0.1154]) -Greedy action tensor([ 0.6453, -0.4003, 0.0113, -0.1846]) tensor([0.4314, 0.1516, 0.2288, 0.1881]) -Greedy action tensor([ 0.8295, -0.4924, -0.0998, -0.3012]) tensor([0.5040, 0.1344, 0.1990, 0.1627]) -Greedy action tensor([ 0.8972, -0.7604, 0.0195, -0.7365]) tensor([0.5551, 0.1058, 0.2308, 0.1084]) -Greedy action tensor([ 0.2933, -0.5635, -0.2782, -0.3236]) tensor([0.3954, 0.1679, 0.2233, 0.2134]) -Greedy action tensor([ 0.8383, -0.4524, -0.0251, -0.3200]) tensor([0.4973, 0.1368, 0.2097, 0.1562]) -Greedy action tensor([ 0.5250, -0.3151, -0.0919, -0.1335]) tensor([0.4018, 0.1734, 0.2168, 0.2080]) -Greedy action tensor([ 0.6634, -0.7146, -0.1630, -0.2231]) tensor([0.4758, 0.1199, 0.2082, 0.1961]) -Greedy action tensor([ 1.1641, -0.2772, -0.2260, -0.3884]) tensor([0.5891, 0.1394, 0.1467, 0.1247]) -Greedy action tensor([ 0.8767, -0.6984, 0.0254, -0.4438]) tensor([0.5261, 0.1089, 0.2246, 0.1405]) -Greedy action tensor([ 0.9055, -0.2479, -0.0116, -0.7042]) tensor([0.5221, 0.1648, 0.2087, 0.1044]) -Greedy action tensor([ 0.4115, 0.0867, -0.2619, 0.1199]) tensor([0.3356, 0.2425, 0.1712, 0.2507]) -Greedy action tensor([ 1.1019, -0.6017, -0.1233, -0.3602]) tensor([0.5857, 0.1066, 0.1720, 0.1357]) -Greedy action tensor([ 0.7807, -0.8929, -0.0694, -0.4628]) tensor([0.5254, 0.0986, 0.2245, 0.1515]) -Greedy action tensor([ 0.9138, -0.6562, -0.0702, -0.3657]) tensor([0.5376, 0.1119, 0.2010, 0.1496]) -Greedy action tensor([ 1.1122, -1.0190, 0.1035, -0.5752]) tensor([0.5994, 0.0711, 0.2186, 0.1109]) -Greedy action tensor([ 0.8416, -0.6728, -0.0803, -0.3743]) tensor([0.5224, 0.1149, 0.2078, 0.1549]) -Greedy action tensor([ 0.4964, 0.0074, 0.0070, -0.1921]) tensor([0.3665, 0.2247, 0.2247, 0.1841]) -Greedy action tensor([ 1.0815, -0.5165, -0.1272, -0.5666]) tensor([0.5906, 0.1195, 0.1763, 0.1136]) -Greedy action tensor([ 1.0557, -0.9220, 0.0039, -0.4785]) tensor([0.5871, 0.0812, 0.2051, 0.1266]) -Greedy action tensor([ 0.7071, -0.4630, -0.0869, -0.1079]) tensor([0.4535, 0.1407, 0.2050, 0.2008]) -Greedy action tensor([ 0.8901, -0.5195, -0.0596, -0.2405]) tensor([0.5118, 0.1250, 0.1980, 0.1652]) -Greedy action tensor([ 0.5289, -0.0192, 0.0073, -0.0076]) tensor([0.3628, 0.2097, 0.2153, 0.2122]) -Greedy action tensor([ 0.6526, -0.3452, -0.0128, -0.1894]) tensor([0.4322, 0.1594, 0.2222, 0.1862]) -Greedy action tensor([ 0.6990, -0.6174, -0.1530, -0.1748]) tensor([0.4735, 0.1269, 0.2020, 0.1976]) -Greedy action tensor([ 0.3948, -0.2039, -0.0529, -0.0927]) tensor([0.3568, 0.1961, 0.2280, 0.2191]) -Greedy action tensor([ 0.8921, -0.3263, -0.0423, -0.3500]) tensor([0.5057, 0.1496, 0.1987, 0.1460]) -Greedy action tensor([ 0.4369, -0.2530, -0.1070, -0.0132]) tensor([0.3677, 0.1844, 0.2134, 0.2344]) -Greedy action tensor([ 0.7265, -0.1450, -0.0566, -0.0280]) tensor([0.4263, 0.1784, 0.1948, 0.2005]) -Greedy action tensor([ 0.6482, -0.4453, 0.1514, -0.2713]) tensor([0.4270, 0.1430, 0.2598, 0.1702]) -Greedy action tensor([ 0.6407, -0.4000, -0.0648, -0.4149]) tensor([0.4556, 0.1609, 0.2250, 0.1585]) -Greedy action tensor([ 0.7366, -0.3270, 0.1647, -0.8241]) tensor([0.4718, 0.1629, 0.2663, 0.0991]) -Greedy action tensor([ 1.0821, -0.6515, -0.0806, -0.5312]) tensor([0.5922, 0.1046, 0.1852, 0.1180]) -Greedy action tensor([ 0.8157, -0.5563, -0.1449, -0.1931]) tensor([0.4998, 0.1267, 0.1912, 0.1822]) -Greedy action tensor([ 0.8328, -0.4722, 0.0056, -0.3518]) tensor([0.4965, 0.1346, 0.2171, 0.1519]) -Greedy action tensor([ 0.1787, 0.0290, -0.1966, -0.2753]) tensor([0.3142, 0.2705, 0.2159, 0.1995]) -Greedy action tensor([ 0.3744, 0.0197, -0.1466, -0.0225]) tensor([0.3370, 0.2363, 0.2001, 0.2266]) -Greedy action tensor([ 0.7287, -0.3321, -0.0455, -0.1829]) tensor([0.4527, 0.1567, 0.2087, 0.1819]) -Greedy action tensor([ 0.6441, -0.2092, -0.1013, -0.1168]) tensor([0.4223, 0.1799, 0.2004, 0.1973]) -Greedy action tensor([ 0.8170, -0.2135, -0.0782, -0.0455]) tensor([0.4572, 0.1631, 0.1868, 0.1930]) -Greedy action tensor([ 0.5513, 0.1395, -0.2318, -0.0349]) tensor([0.3737, 0.2476, 0.1708, 0.2079]) -Greedy action tensor([ 0.3847, 0.0131, -0.1145, -0.1557]) tensor([0.3473, 0.2395, 0.2108, 0.2023]) -Greedy action tensor([ 0.7293, -0.3287, -0.0035, -0.1424]) tensor([0.4453, 0.1546, 0.2140, 0.1862]) -Greedy action tensor([ 0.4305, -0.0739, 0.0039, -0.1272]) tensor([0.3535, 0.2135, 0.2307, 0.2024]) -Greedy action tensor([ 0.8502, -0.3911, 0.0284, -0.2283]) tensor([0.4834, 0.1397, 0.2125, 0.1644]) -Greedy action tensor([ 0.5192, 0.1109, -0.0427, 0.1208]) tensor([0.3441, 0.2287, 0.1962, 0.2310]) -Greedy action tensor([ 0.5287, -0.3081, 0.0584, -0.2717]) tensor([0.3989, 0.1728, 0.2492, 0.1791]) -Greedy action tensor([ 0.4919, -0.4273, -0.1308, -0.3816]) tensor([0.4250, 0.1695, 0.2280, 0.1774]) -Greedy action tensor([ 0.3794, -0.1425, -0.0990, -0.0645]) tensor([0.3503, 0.2079, 0.2171, 0.2247]) -Greedy action tensor([ 0.6809, -0.4044, -0.0128, -0.1427]) tensor([0.4393, 0.1484, 0.2195, 0.1928]) -Greedy action tensor([ 0.4083, -0.0767, -0.1374, -0.0439]) tensor([0.3532, 0.2175, 0.2046, 0.2247]) -Greedy action tensor([ 0.6041, -0.7942, 0.2897, -0.4812]) tensor([0.4320, 0.1067, 0.3154, 0.1459]) -Greedy action tensor([ 1.2926, -0.6829, -0.3215, -0.5912]) tensor([0.6712, 0.0931, 0.1336, 0.1020]) -Greedy action tensor([ 1.0157, -0.7540, 0.1147, -0.4802]) tensor([0.5554, 0.0946, 0.2256, 0.1244]) -Greedy action tensor([ 0.5656, -0.5921, -0.0032, -0.4059]) tensor([0.4427, 0.1391, 0.2507, 0.1676]) -Greedy action tensor([ 0.6042, -0.4061, -0.1943, -0.0782]) tensor([0.4311, 0.1570, 0.1940, 0.2179]) -Greedy action tensor([ 0.6596, -0.3074, -0.2107, -0.3445]) tensor([0.4618, 0.1756, 0.1934, 0.1692]) -Greedy action tensor([ 0.6755, 0.1040, -0.0994, 0.1247]) tensor([0.3843, 0.2170, 0.1771, 0.2216]) -Greedy action tensor([ 0.6282, -0.4285, -0.0922, -0.1282]) tensor([0.4341, 0.1509, 0.2112, 0.2038]) -Greedy action tensor([ 0.8006, -0.5918, 0.0600, -0.2779]) tensor([0.4842, 0.1203, 0.2309, 0.1647]) -Greedy action tensor([ 0.6255, -0.2277, 0.0027, -0.3197]) tensor([0.4253, 0.1812, 0.2282, 0.1653]) -Greedy action tensor([ 0.2608, 0.1308, -0.1868, -0.2664]) tensor([0.3218, 0.2826, 0.2057, 0.1899]) -Greedy action tensor([ 0.7502, -0.4150, -0.1008, -0.5132]) tensor([0.4947, 0.1543, 0.2112, 0.1398]) -Greedy action tensor([ 1.1704, -0.9186, 0.0197, -0.4930]) tensor([0.6136, 0.0760, 0.1942, 0.1163]) -Greedy action tensor([ 0.7806, -0.8464, -0.0737, -0.4490]) tensor([0.5223, 0.1026, 0.2223, 0.1527]) -Greedy action tensor([ 0.1926, 0.0277, -0.0405, -0.1124]) tensor([0.2961, 0.2511, 0.2345, 0.2183]) -Greedy action tensor([ 0.8154, -0.4808, -0.0300, -0.2149]) tensor([0.4855, 0.1328, 0.2085, 0.1733]) -Greedy action tensor([ 0.6074, -0.3506, 0.0388, -0.2620]) tensor([0.4221, 0.1619, 0.2390, 0.1769]) -Greedy action tensor([ 0.5948, -0.2767, -0.0747, -0.1746]) tensor([0.4178, 0.1748, 0.2139, 0.1936]) -Greedy action tensor([ 0.9122, -0.9224, -0.0699, -0.5153]) tensor([0.5637, 0.0900, 0.2111, 0.1352]) -Greedy action tensor([ 0.8216, -0.5502, 0.0074, -0.3898]) tensor([0.5014, 0.1272, 0.2221, 0.1493]) -Greedy action tensor([ 0.5387, -0.2750, 0.0219, -0.2372]) tensor([0.4000, 0.1773, 0.2386, 0.1841]) -Greedy action tensor([ 1.1634, -0.4776, 0.1943, -0.4109]) tensor([0.5617, 0.1088, 0.2131, 0.1164]) -Greedy action tensor([ 0.8516, -0.4773, 0.1382, -0.0931]) tensor([0.4665, 0.1235, 0.2286, 0.1814]) -Greedy action tensor([ 0.7677, -0.6151, -0.0383, -0.4016]) tensor([0.4980, 0.1249, 0.2224, 0.1547]) -Greedy action tensor([ 0.6458, -0.7298, -0.0631, -0.2080]) tensor([0.4607, 0.1164, 0.2267, 0.1962]) -Greedy action tensor([ 0.7833, 0.2040, -0.1220, -0.3779]) tensor([0.4390, 0.2460, 0.1775, 0.1375]) -Greedy action tensor([ 0.1001, 0.4730, -0.1256, -0.5521]) tensor([0.2652, 0.3850, 0.2116, 0.1381]) -Greedy action tensor([ 0.4195, 0.1668, -0.1757, 0.0115]) tensor([0.3341, 0.2595, 0.1842, 0.2222]) -Greedy action tensor([-1.5021, 0.5013, 0.3009, 0.1161]) tensor([0.0512, 0.3797, 0.3108, 0.2583]) -Greedy action tensor([-1.6501, -0.3207, 0.5742, 0.0324]) tensor([0.0515, 0.1947, 0.4765, 0.2772]) -Greedy action tensor([-1.7556, -0.3691, 0.6229, -0.0554]) tensor([0.0470, 0.1881, 0.5074, 0.2575]) -Greedy action tensor([-1.6004, -0.6040, 0.7782, 0.0786]) tensor([0.0504, 0.1364, 0.5433, 0.2699]) -Greedy action tensor([-1.8595, -0.2743, 0.6226, -0.0928]) tensor([0.0422, 0.2059, 0.5050, 0.2469]) -Greedy action tensor([-1.8495, -0.3905, 0.6088, -0.1323]) tensor([0.0443, 0.1907, 0.5180, 0.2469]) -Greedy action tensor([-0.6400, -0.4903, 0.2283, 0.3525]) tensor([0.1381, 0.1604, 0.3290, 0.3725]) -Greedy action tensor([-0.8424, 0.5844, 0.1707, -0.0389]) tensor([0.0985, 0.4102, 0.2713, 0.2200]) -Greedy action tensor([-0.1722, 0.3803, 0.2198, 0.4703]) tensor([0.1634, 0.2840, 0.2419, 0.3107]) -Greedy action tensor([-1.0538, -0.5545, 0.4330, 0.9500]) tensor([0.0690, 0.1137, 0.3053, 0.5120]) -Greedy action tensor([-1.6093, -0.4552, 0.5854, 0.1302]) tensor([0.0531, 0.1683, 0.4764, 0.3022]) -Greedy action tensor([-1.7542, -0.1788, 0.6175, -0.0075]) tensor([0.0449, 0.2169, 0.4809, 0.2574]) -Greedy action tensor([-1.5365, -0.4317, 0.5272, 0.1440]) tensor([0.0579, 0.1749, 0.4562, 0.3110]) -Greedy action tensor([-1.3580, -0.4919, 0.4879, 0.4340]) tensor([0.0636, 0.1513, 0.4031, 0.3819]) -Greedy action tensor([-1.6964, -0.4410, 0.7006, 0.1396]) tensor([0.0459, 0.1612, 0.5048, 0.2881]) -Greedy action tensor([-1.2868, 0.7125, 0.2064, 0.1509]) tensor([0.0587, 0.4331, 0.2611, 0.2470]) -Greedy action tensor([-1.9397, -0.4469, 0.6665, -0.1774]) tensor([0.0403, 0.1793, 0.5458, 0.2347]) -Greedy action tensor([-0.7186, -0.3867, 0.1999, 0.1208]) tensor([0.1386, 0.1932, 0.3473, 0.3209]) -Greedy action tensor([-1.9134, -0.3887, 0.6356, -0.1639]) tensor([0.0414, 0.1903, 0.5300, 0.2383]) -Greedy action tensor([-1.8172, -0.2776, 0.6283, -0.1045]) tensor([0.0440, 0.2050, 0.5073, 0.2438]) -Greedy action tensor([-1.9377, -0.4586, 0.6790, -0.1682]) tensor([0.0401, 0.1759, 0.5488, 0.2352]) -Greedy action tensor([-0.9649, 0.2074, 0.2279, 0.0010]) tensor([0.0985, 0.3181, 0.3247, 0.2588]) -Greedy action tensor([-1.9320, -0.4425, 0.6601, -0.1736]) tensor([0.0407, 0.1803, 0.5431, 0.2360]) -Greedy action tensor([-1.9005, -0.4366, 0.6467, -0.1574]) tensor([0.0420, 0.1816, 0.5364, 0.2400]) -Greedy action tensor([-1.9243, -0.3760, 0.6455, -0.1638]) tensor([0.0407, 0.1913, 0.5314, 0.2366]) -Greedy action tensor([-1.1423, -0.5972, 0.2640, 0.2342]) tensor([0.0929, 0.1602, 0.3790, 0.3679]) -Greedy action tensor([-1.7558, -0.3736, 0.5395, -0.1969]) tensor([0.0509, 0.2026, 0.5048, 0.2417]) -Greedy action tensor([-1.3090, 0.6125, 0.1972, 0.1336]) tensor([0.0603, 0.4122, 0.2721, 0.2553]) -Greedy action tensor([-1.9251, -0.4576, 0.6728, -0.1684]) tensor([0.0407, 0.1766, 0.5469, 0.2358]) -Greedy action tensor([-1.7973, -0.4804, 0.7813, 0.0762]) tensor([0.0409, 0.1528, 0.5396, 0.2666]) -Greedy action tensor([-1.8567, -0.3751, 0.6450, -0.0952]) tensor([0.0427, 0.1878, 0.5210, 0.2485]) -Greedy action tensor([-1.7640, -0.4771, 0.5861, -0.0764]) tensor([0.0487, 0.1765, 0.5112, 0.2635]) -Greedy action tensor([-1.7868, -0.1193, 0.5690, -0.0292]) tensor([0.0442, 0.2340, 0.4658, 0.2561]) -Greedy action tensor([-1.2437, -0.4565, 0.8430, 0.5700]) tensor([0.0575, 0.1264, 0.4634, 0.3527]) -Greedy action tensor([-1.7735, -0.4891, 0.5970, -0.0234]) tensor([0.0475, 0.1714, 0.5080, 0.2731]) -Greedy action tensor([-1.9446, -0.4491, 0.6678, -0.1800]) tensor([0.0401, 0.1789, 0.5467, 0.2342]) -Greedy action tensor([-0.9963, -0.5266, 0.3264, 0.3747]) tensor([0.0972, 0.1554, 0.3647, 0.3827]) -Greedy action tensor([-1.8635, -0.4226, 0.6686, -0.0945]) tensor([0.0422, 0.1785, 0.5315, 0.2478]) -Greedy action tensor([-1.7493, -0.3155, 0.6511, -0.0519]) tensor([0.0461, 0.1935, 0.5086, 0.2518]) -Greedy action tensor([-1.9208, -0.4429, 0.6543, -0.1687]) tensor([0.0412, 0.1805, 0.5408, 0.2375]) -Greedy action tensor([-1.8802, -0.2914, 0.6172, -0.1558]) tensor([0.0423, 0.2070, 0.5136, 0.2371]) -Greedy action tensor([-0.6183, -0.0988, 0.7013, 1.4663]) tensor([0.0691, 0.1162, 0.2587, 0.5560]) -Greedy action tensor([-1.8672, -0.3905, 0.6192, -0.1488]) tensor([0.0435, 0.1906, 0.5232, 0.2427]) -Greedy action tensor([-1.4104, -0.6060, 0.5240, -0.0193]) tensor([0.0705, 0.1577, 0.4882, 0.2836]) -Greedy action tensor([-1.8294, -0.4132, 0.6066, -0.1066]) tensor([0.0451, 0.1861, 0.5159, 0.2528]) -Greedy action tensor([-1.0741, -0.6688, 0.3548, 0.2006]) tensor([0.0975, 0.1463, 0.4072, 0.3490]) -Greedy action tensor([-1.3797, 0.4965, 0.2992, -0.0147]) tensor([0.0595, 0.3885, 0.3189, 0.2330]) -Greedy action tensor([-1.1638, -0.1582, 0.8680, 0.9546]) tensor([0.0508, 0.1389, 0.3876, 0.4227]) -Greedy action tensor([-1.8787, -0.4229, 0.6348, -0.1407]) tensor([0.0429, 0.1839, 0.5295, 0.2438]) -Greedy action tensor([-1.8635, -0.3447, 0.6081, -0.1663]) tensor([0.0437, 0.1997, 0.5179, 0.2387]) -Greedy action tensor([-1.8797, -0.4190, 0.6306, -0.1521]) tensor([0.0430, 0.1854, 0.5295, 0.2421]) -Greedy action tensor([-1.6424, -0.2261, 0.5429, -0.3149]) tensor([0.0562, 0.2317, 0.5000, 0.2120]) -Greedy action tensor([-1.9349, -0.4401, 0.6624, -0.1751]) tensor([0.0405, 0.1805, 0.5437, 0.2353]) -Greedy action tensor([-1.9045, -0.3900, 0.6471, -0.1462]) tensor([0.0414, 0.1881, 0.5306, 0.2400]) -Greedy action tensor([-1.8321, -0.4054, 0.6033, -0.1224]) tensor([0.0452, 0.1883, 0.5165, 0.2500]) -Greedy action tensor([-1.8824, -0.4296, 0.6340, -0.1528]) tensor([0.0429, 0.1835, 0.5316, 0.2420]) -Greedy action tensor([-1.5490, -0.4472, 0.5159, 0.0616]) tensor([0.0592, 0.1781, 0.4665, 0.2962]) -Greedy action tensor([-1.9090, -0.4390, 0.6510, -0.1597]) tensor([0.0416, 0.1809, 0.5382, 0.2393]) -Greedy action tensor([-0.7017, 0.9907, 0.0759, 0.2659]) tensor([0.0890, 0.4833, 0.1936, 0.2341]) -Greedy action tensor([-1.9054, -0.4117, 0.6468, -0.1561]) tensor([0.0416, 0.1853, 0.5339, 0.2392]) -Greedy action tensor([-1.7353, -0.3048, 0.5491, -0.1220]) tensor([0.0500, 0.2088, 0.4905, 0.2507]) -Greedy action tensor([-1.8875, -0.4307, 0.6341, -0.1417]) tensor([0.0426, 0.1829, 0.5304, 0.2442]) -Greedy action tensor([-1.9178, -0.4251, 0.6550, -0.1643]) tensor([0.0411, 0.1829, 0.5386, 0.2374]) -Greedy action tensor([-1.1456, -0.1890, 0.3029, 0.4934]) tensor([0.0769, 0.2001, 0.3272, 0.3959]) -Greedy action tensor([-1.4566, 0.2055, 0.4346, 0.1842]) tensor([0.0554, 0.2919, 0.3670, 0.2857]) -Greedy action tensor([-1.8528, -0.4460, 0.6238, -0.1349]) tensor([0.0443, 0.1810, 0.5276, 0.2471]) -Greedy action tensor([-1.9253, -0.4472, 0.6615, -0.1669]) tensor([0.0409, 0.1792, 0.5429, 0.2371]) -Greedy action tensor([-1.9039, -0.4103, 0.6464, -0.1558]) tensor([0.0417, 0.1855, 0.5336, 0.2392]) -Greedy action tensor([-1.8925, -0.3233, 0.6387, -0.1321]) tensor([0.0413, 0.1986, 0.5197, 0.2404]) -Greedy action tensor([-1.7828, -0.3758, 0.6471, -0.0389]) tensor([0.0451, 0.1843, 0.5125, 0.2581]) -Greedy action tensor([-1.8494, -0.3681, 0.6082, -0.1465]) tensor([0.0443, 0.1949, 0.5175, 0.2433]) -Greedy action tensor([-1.1809, 0.1127, -0.2596, -0.9357]) tensor([0.1185, 0.4322, 0.2978, 0.1515]) -Greedy action tensor([-1.9048, -0.4255, 0.6434, -0.1642]) tensor([0.0419, 0.1839, 0.5355, 0.2388]) -Greedy action tensor([-1.6207, -0.5685, 0.5008, -0.0238]) tensor([0.0583, 0.1670, 0.4866, 0.2880]) -Greedy action tensor([-1.8351, -0.2623, 0.5908, -0.1206]) tensor([0.0441, 0.2125, 0.4986, 0.2448]) -Greedy action tensor([-1.8237, -0.4689, 0.5962, -0.1335]) tensor([0.0464, 0.1799, 0.5220, 0.2516]) -Greedy action tensor([-0.9286, -0.6074, 0.2740, -0.0475]) tensor([0.1231, 0.1698, 0.4099, 0.2972]) -Greedy action tensor([-1.0458, -0.3496, 0.5459, 0.8514]) tensor([0.0686, 0.1375, 0.3368, 0.4571]) -Greedy action tensor([-0.5897, -0.4866, 0.2024, 0.0114]) tensor([0.1628, 0.1805, 0.3596, 0.2970]) -Greedy action tensor([-1.8898, -0.3830, 0.6293, -0.1484]) tensor([0.0423, 0.1909, 0.5254, 0.2414]) -Greedy action tensor([-1.8064, -0.3361, 0.5911, -0.1483]) tensor([0.0463, 0.2014, 0.5092, 0.2431]) -Greedy action tensor([ 1.2600, -0.4635, 0.0776, 0.3365]) tensor([0.5313, 0.0948, 0.1629, 0.2110]) -Greedy action tensor([ 1.2045, -0.5505, -0.0812, -0.3297]) tensor([0.6006, 0.1039, 0.1660, 0.1295]) -Greedy action tensor([ 1.0587, -0.3428, -0.1957, 0.2602]) tensor([0.5047, 0.1243, 0.1440, 0.2271]) -Greedy action tensor([ 1.0952, -0.2184, -0.1414, 0.0158]) tensor([0.5266, 0.1416, 0.1529, 0.1789]) -Greedy action tensor([ 1.8983, -0.7969, -0.2337, 0.2562]) tensor([0.7248, 0.0489, 0.0860, 0.1403]) -Greedy action tensor([ 0.6374, -0.3813, -0.3114, 0.4978]) tensor([0.3820, 0.1379, 0.1479, 0.3322]) -Greedy action tensor([ 1.2415, -0.8235, -0.1059, 0.0116]) tensor([0.5956, 0.0755, 0.1548, 0.1741]) -Greedy action tensor([ 1.3211, -0.2522, -0.7221, 0.3562]) tensor([0.5821, 0.1207, 0.0754, 0.2218]) -Greedy action tensor([ 1.3417, 0.2000, -0.7519, -0.0256]) tensor([0.5892, 0.1881, 0.0726, 0.1501]) -Greedy action tensor([ 1.3386, -0.1203, 0.0014, 0.2677]) tensor([0.5441, 0.1265, 0.1429, 0.1865]) -Greedy action tensor([ 1.9930, -0.8994, -0.2475, 1.0390]) tensor([0.6464, 0.0358, 0.0688, 0.2490]) -Greedy action tensor([ 2.1835, -1.5657, -0.4181, 1.1143]) tensor([0.6940, 0.0163, 0.0515, 0.2382]) -Greedy action tensor([ 1.3890, -0.1310, -0.4661, 0.4841]) tensor([0.5619, 0.1229, 0.0879, 0.2273]) -Greedy action tensor([ 1.1803e+00, -6.0874e-01, -3.0268e-01, 2.2179e-04]) tensor([0.5878, 0.0982, 0.1334, 0.1806]) -Greedy action tensor([ 1.2082, -0.3402, -0.4595, 0.1521]) tensor([0.5717, 0.1215, 0.1079, 0.1989]) -Greedy action tensor([ 1.9310, -0.3317, -0.6484, 0.5444]) tensor([0.6994, 0.0728, 0.0530, 0.1748]) -Greedy action tensor([ 2.6187, -1.3684, 0.2567, 1.3504]) tensor([0.7173, 0.0133, 0.0676, 0.2018]) -Greedy action tensor([ 1.8473, -0.2790, -0.3560, 0.2336]) tensor([0.6999, 0.0835, 0.0773, 0.1394]) -Greedy action tensor([ 1.5137, -0.0421, -0.3633, 0.3529]) tensor([0.5962, 0.1258, 0.0912, 0.1867]) -Greedy action tensor([ 1.6008, -0.6259, -0.2661, 0.4774]) tensor([0.6299, 0.0679, 0.0974, 0.2048]) -Greedy action tensor([ 1.7664, -0.9974, -0.2756, -0.0851]) tensor([0.7408, 0.0467, 0.0961, 0.1163]) -Greedy action tensor([ 2.1376, -0.1268, -0.6618, 0.5133]) tensor([0.7343, 0.0763, 0.0447, 0.1447]) -Greedy action tensor([ 0.6484, -0.1915, -0.0436, -0.1265]) tensor([0.4179, 0.1804, 0.2092, 0.1925]) -Greedy action tensor([ 1.7737, 0.2570, -0.3921, 0.2414]) tensor([0.6451, 0.1416, 0.0740, 0.1394]) -Greedy action tensor([ 1.9523, -0.7073, -0.4350, 0.6822]) tensor([0.6932, 0.0485, 0.0637, 0.1947]) -Greedy action tensor([ 2.2878, -0.4129, -0.2994, 0.5010]) tensor([0.7634, 0.0513, 0.0574, 0.1279]) -Greedy action tensor([ 2.0911, -0.9471, -0.1310, 0.4705]) tensor([0.7385, 0.0354, 0.0800, 0.1461]) -Greedy action tensor([ 1.8479, -0.9817, -0.3404, 0.4970]) tensor([0.6992, 0.0413, 0.0784, 0.1811]) -Greedy action tensor([ 1.2730, -0.4399, 0.0056, 0.4040]) tensor([0.5316, 0.0959, 0.1497, 0.2229]) -Greedy action tensor([ 1.5278, 0.1710, -0.4668, 0.3357]) tensor([0.5892, 0.1517, 0.0802, 0.1789]) -Greedy action tensor([ 1.6492, 0.4471, -0.5655, 0.2804]) tensor([0.6009, 0.1806, 0.0656, 0.1529]) -Greedy action tensor([ 1.5628, -0.5937, -0.2028, 0.6104]) tensor([0.5978, 0.0692, 0.1023, 0.2307]) -Greedy action tensor([ 2.0066, -0.6515, -0.2625, 0.8031]) tensor([0.6786, 0.0476, 0.0702, 0.2037]) -Greedy action tensor([ 1.5804, -0.5577, -0.2885, 0.3399]) tensor([0.6405, 0.0755, 0.0988, 0.1852]) -Greedy action tensor([ 1.9118, -0.7516, -0.3681, 0.7237]) tensor([0.6771, 0.0472, 0.0693, 0.2064]) -Greedy action tensor([ 1.6923, 0.1800, -0.1702, 0.0621]) tensor([0.6363, 0.1403, 0.0988, 0.1246]) -Greedy action tensor([ 1.4296, -0.7280, -0.4974, 0.7552]) tensor([0.5648, 0.0653, 0.0822, 0.2877]) -Greedy action tensor([ 1.5788, -0.3786, -0.1022, 0.0383]) tensor([0.6486, 0.0916, 0.1208, 0.1390]) -Greedy action tensor([ 1.8474, -1.0639, -0.0357, 0.6472]) tensor([0.6633, 0.0361, 0.1009, 0.1997]) -Greedy action tensor([ 1.3213, -0.0123, -0.7108, 0.4784]) tensor([0.5479, 0.1444, 0.0718, 0.2359]) -Greedy action tensor([ 1.5884, -0.1652, -0.2103, 0.2304]) tensor([0.6266, 0.1085, 0.1037, 0.1612]) -Greedy action tensor([ 1.4814, 0.0069, -0.1205, 0.1165]) tensor([0.5932, 0.1358, 0.1195, 0.1515]) -Greedy action tensor([ 2.1314, -0.4070, -0.8572, 0.5911]) tensor([0.7442, 0.0588, 0.0375, 0.1595]) -Greedy action tensor([ 0.6266, -0.5515, 0.0183, 0.2008]) tensor([0.3991, 0.1229, 0.2172, 0.2607]) -Greedy action tensor([ 1.5830, -1.0296, 0.0748, 0.9711]) tensor([0.5444, 0.0399, 0.1205, 0.2952]) -Greedy action tensor([ 1.2668, -0.1247, -0.2307, 0.0433]) tensor([0.5661, 0.1408, 0.1266, 0.1665]) -Greedy action tensor([ 1.5691, -0.7907, -0.3350, 0.4277]) tensor([0.6399, 0.0604, 0.0953, 0.2044]) -Greedy action tensor([ 1.9195, -1.4972, 0.0914, 0.1062]) tensor([0.7371, 0.0242, 0.1185, 0.1202]) -Greedy action tensor([ 1.4165, -0.5368, -0.5082, 0.1627]) tensor([0.6357, 0.0901, 0.0928, 0.1814]) -Greedy action tensor([ 1.1838, -0.3301, -0.3701, 0.3961]) tensor([0.5301, 0.1166, 0.1121, 0.2412]) -Greedy action tensor([ 1.4890, -0.5350, 0.1090, 0.4167]) tensor([0.5794, 0.0766, 0.1458, 0.1983]) -Greedy action tensor([ 1.5643, 0.3738, 0.2005, -0.3069]) tensor([0.5835, 0.1774, 0.1492, 0.0898]) -Greedy action tensor([ 1.2510, -0.8213, -0.0848, 0.2378]) tensor([0.5708, 0.0719, 0.1501, 0.2072]) -Greedy action tensor([ 1.4637, -0.7951, -0.0318, 0.0932]) tensor([0.6319, 0.0660, 0.1416, 0.1605]) -Greedy action tensor([ 1.3164, -0.0520, -0.4270, 0.4400]) tensor([0.5418, 0.1379, 0.0948, 0.2255]) -Greedy action tensor([ 2.0921, -1.1075, -0.0747, 0.3812]) tensor([0.7485, 0.0305, 0.0857, 0.1353]) -Greedy action tensor([ 1.5678, -0.8713, -0.2819, 0.3150]) tensor([0.6535, 0.0570, 0.1028, 0.1867]) -Greedy action tensor([ 1.5039, -0.3901, 0.0036, 0.4255]) tensor([0.5835, 0.0878, 0.1302, 0.1985]) -Greedy action tensor([ 1.0311, 0.0586, -0.7596, 0.4194]) tensor([0.4791, 0.1812, 0.0799, 0.2598]) -Greedy action tensor([ 1.4689, -0.2260, -0.4457, 0.3801]) tensor([0.5996, 0.1101, 0.0884, 0.2019]) -Greedy action tensor([ 0.9729, -0.4023, -0.0330, 0.1639]) tensor([0.4846, 0.1225, 0.1772, 0.2158]) -Greedy action tensor([ 1.5103, -0.6396, -0.4833, 0.0135]) tensor([0.6772, 0.0789, 0.0922, 0.1516]) -Greedy action tensor([0.6206, 0.0125, 0.1139, 0.1028]) tensor([0.3646, 0.1985, 0.2197, 0.2173]) -Greedy action tensor([ 1.9224, -1.3097, -0.2209, 0.4349]) tensor([0.7232, 0.0285, 0.0848, 0.1634]) -Greedy action tensor([ 1.6415, -0.3654, -1.1322, 0.7000]) tensor([0.6302, 0.0847, 0.0393, 0.2458]) -Greedy action tensor([ 1.7401, -0.2159, -0.1182, 0.8818]) tensor([0.5810, 0.0822, 0.0906, 0.2463]) -Greedy action tensor([ 1.9113, -0.3625, 0.0076, -0.1209]) tensor([0.7231, 0.0744, 0.1078, 0.0948]) -Greedy action tensor([ 2.5241, -1.1089, -0.1558, 0.9091]) tensor([0.7729, 0.0204, 0.0530, 0.1537]) -Greedy action tensor([ 1.8815, -0.1629, -0.5588, 0.4892]) tensor([0.6826, 0.0884, 0.0595, 0.1696]) -Greedy action tensor([ 1.3823, -0.2966, -0.7845, 0.1854]) tensor([0.6237, 0.1164, 0.0714, 0.1884]) -Greedy action tensor([ 0.4208, -0.3068, 0.0862, 0.0341]) tensor([0.3475, 0.1679, 0.2487, 0.2360]) -Greedy action tensor([ 1.0163, -0.5418, -0.0304, 0.2122]) tensor([0.4977, 0.1048, 0.1747, 0.2227]) -Greedy action tensor([ 0.9744, -0.1568, -0.0230, 0.0211]) tensor([0.4815, 0.1553, 0.1776, 0.1856]) -Greedy action tensor([ 1.8630, 0.6747, 0.2044, -0.4000]) tensor([0.6253, 0.1906, 0.1191, 0.0651]) -Greedy action tensor([ 1.3731, -0.4563, -0.4151, 0.3551]) tensor([0.5920, 0.0950, 0.0990, 0.2139]) -Greedy action tensor([ 1.2092, -0.0990, -0.4121, 0.6691]) tensor([0.4877, 0.1318, 0.0964, 0.2841]) -Greedy action tensor([ 1.0452, -0.0013, -0.4563, 0.5430]) tensor([0.4589, 0.1611, 0.1022, 0.2777]) -Greedy action tensor([ 1.8030, 0.0477, -0.5855, 0.7859]) tensor([0.6149, 0.1063, 0.0564, 0.2224]) -Greedy action tensor([ 1.7055, -0.3681, -0.0530, 0.8825]) tensor([0.5757, 0.0724, 0.0992, 0.2528]) -Greedy action tensor([ 1.5826, -0.6335, -0.3134, 0.6184]) tensor([0.6096, 0.0665, 0.0915, 0.2324]) -Greedy action tensor([ 1.3413, -0.4743, -0.5781, 0.0867]) tensor([0.6271, 0.1020, 0.0920, 0.1788]) -Greedy action tensor([ 0.6335, -0.4275, -0.0842, -0.2902]) tensor([0.4482, 0.1551, 0.2187, 0.1780]) -Greedy action tensor([ 7.9320e-01, -5.2581e-01, -2.2405e-04, -3.4965e-01]) tensor([0.4905, 0.1312, 0.2219, 0.1564]) -Greedy action tensor([ 0.9252, -0.9177, 0.0229, -0.5278]) tensor([0.5562, 0.0881, 0.2256, 0.1301]) -Greedy action tensor([ 0.8935, -0.5600, 0.0458, -0.4735]) tensor([0.5217, 0.1219, 0.2235, 0.1329]) -Greedy action tensor([ 1.0739, -0.5687, -0.2251, -0.1815]) tensor([0.5710, 0.1105, 0.1558, 0.1627]) -Greedy action tensor([ 0.4779, -0.0638, 0.1035, -0.3891]) tensor([0.3718, 0.2163, 0.2557, 0.1562]) -Greedy action tensor([ 0.5519, -0.4577, -0.0495, -0.3515]) tensor([0.4315, 0.1572, 0.2365, 0.1748]) -Greedy action tensor([ 0.7862, -0.1736, -0.0794, -0.2253]) tensor([0.4614, 0.1767, 0.1941, 0.1678]) -Greedy action tensor([ 0.4371, -0.2450, -0.0636, -0.1255]) tensor([0.3729, 0.1885, 0.2260, 0.2125]) -Greedy action tensor([ 0.7875, -0.5488, -0.0086, -0.2649]) tensor([0.4847, 0.1274, 0.2187, 0.1692]) -Greedy action tensor([ 0.4580, -0.1880, -0.0213, -0.1218]) tensor([0.3699, 0.1939, 0.2291, 0.2071]) -Greedy action tensor([ 0.7020, -0.3626, -0.1260, -0.1647]) tensor([0.4541, 0.1566, 0.1984, 0.1909]) -Greedy action tensor([ 0.4052, -0.3392, -0.0953, -0.3506]) tensor([0.3920, 0.1862, 0.2376, 0.1841]) -Greedy action tensor([ 0.5863, -0.1397, -0.1316, -0.0849]) tensor([0.4028, 0.1949, 0.1965, 0.2058]) -Greedy action tensor([ 0.3913, -0.2359, 0.0134, -0.3467]) tensor([0.3707, 0.1980, 0.2541, 0.1772]) -Greedy action tensor([ 0.8758, -0.2507, -0.1020, -0.3851]) tensor([0.5041, 0.1634, 0.1896, 0.1429]) -Greedy action tensor([ 0.3936, 0.0629, -0.1144, -0.2264]) tensor([0.3499, 0.2514, 0.2105, 0.1882]) -Greedy action tensor([ 1.1131, -1.1273, 0.0396, -0.4709]) tensor([0.6048, 0.0644, 0.2067, 0.1241]) -Greedy action tensor([ 0.5170, -0.4586, -0.1219, -0.0499]) tensor([0.4045, 0.1525, 0.2135, 0.2295]) -Greedy action tensor([ 0.6594, -0.2381, -0.1619, 0.2095]) tensor([0.4024, 0.1640, 0.1770, 0.2566]) -Greedy action tensor([ 0.6529, -0.7108, -0.0011, -0.2704]) tensor([0.4602, 0.1177, 0.2393, 0.1828]) -Greedy action tensor([ 0.5760, -0.3580, -0.1599, -0.4227]) tensor([0.4464, 0.1754, 0.2138, 0.1644]) -Greedy action tensor([ 0.5685, -0.2422, -0.2083, -0.3956]) tensor([0.4375, 0.1945, 0.2012, 0.1668]) -Greedy action tensor([ 0.7220, -0.5444, 0.0040, -0.4671]) tensor([0.4822, 0.1359, 0.2351, 0.1468]) -Greedy action tensor([ 0.8009, -0.5573, 0.0937, -0.4846]) tensor([0.4934, 0.1269, 0.2433, 0.1364]) -Greedy action tensor([ 0.6176, -0.4449, -0.0743, -0.3731]) tensor([0.4510, 0.1558, 0.2258, 0.1674]) -Greedy action tensor([ 1.0723, -0.6988, -0.1061, -0.5403]) tensor([0.5962, 0.1014, 0.1835, 0.1189]) -Greedy action tensor([ 0.3643, -0.1444, -0.0348, -0.0608]) tensor([0.3418, 0.2055, 0.2293, 0.2234]) -Greedy action tensor([ 0.5117, 0.0608, -0.0913, -0.1827]) tensor([0.3726, 0.2374, 0.2039, 0.1861]) -Greedy action tensor([ 0.5022, -0.4665, -0.1092, -0.2017]) tensor([0.4138, 0.1571, 0.2245, 0.2047]) -Greedy action tensor([ 0.6279, -0.2131, -0.0170, -0.0209]) tensor([0.4034, 0.1740, 0.2117, 0.2109]) -Greedy action tensor([ 1.0580, -0.6799, -0.0567, -0.4327]) tensor([0.5783, 0.1017, 0.1897, 0.1302]) -Greedy action tensor([ 0.5716, 0.1242, -0.0748, -0.0601]) tensor([0.3711, 0.2372, 0.1944, 0.1973]) -Greedy action tensor([ 1.1151, -1.1774, 0.1306, -0.6601]) tensor([0.6082, 0.0614, 0.2273, 0.1031]) -Greedy action tensor([ 1.1859, -0.6878, -0.1537, -0.4683]) tensor([0.6224, 0.0956, 0.1630, 0.1190]) -Greedy action tensor([ 0.3746, -0.1582, -0.0618, -0.1896]) tensor([0.3569, 0.2095, 0.2307, 0.2030]) -Greedy action tensor([ 0.8794, -0.5092, -0.0469, -0.2971]) tensor([0.5118, 0.1277, 0.2027, 0.1578]) -Greedy action tensor([ 0.6254, -0.6640, -0.0099, -0.2117]) tensor([0.4468, 0.1231, 0.2367, 0.1934]) -Greedy action tensor([ 0.9388, -0.8034, -0.0351, -0.8192]) tensor([0.5797, 0.1015, 0.2189, 0.0999]) -Greedy action tensor([ 0.8695, -0.5074, -0.0842, -0.3129]) tensor([0.5143, 0.1298, 0.1982, 0.1577]) -Greedy action tensor([ 1.0670, -0.7185, -0.0740, -0.4486]) tensor([0.5859, 0.0982, 0.1872, 0.1287]) -Greedy action tensor([ 0.9179, -0.5205, -0.1807, -0.2460]) tensor([0.5311, 0.1260, 0.1770, 0.1658]) -Greedy action tensor([ 0.5484, -0.2100, -0.0448, -0.5124]) tensor([0.4224, 0.1979, 0.2334, 0.1462]) -Greedy action tensor([ 0.6451, 0.0519, -0.0883, -0.3670]) tensor([0.4173, 0.2306, 0.2004, 0.1517]) -Greedy action tensor([ 0.5932, -0.3333, 0.2348, -0.3054]) tensor([0.3997, 0.1583, 0.2793, 0.1627]) -Greedy action tensor([ 0.4385, -0.0833, -0.0640, -0.5266]) tensor([0.3877, 0.2301, 0.2346, 0.1477]) -Greedy action tensor([ 0.5885, -0.3730, 0.0093, -0.3037]) tensor([0.4251, 0.1625, 0.2382, 0.1742]) -Greedy action tensor([ 0.5470, -0.3113, 0.0397, -0.1766]) tensor([0.3983, 0.1688, 0.2398, 0.1932]) -Greedy action tensor([ 0.9231, -0.7470, 0.0265, -0.5170]) tensor([0.5455, 0.1027, 0.2225, 0.1292]) -Greedy action tensor([ 0.9275, -0.5563, 0.0125, -0.2865]) tensor([0.5197, 0.1178, 0.2081, 0.1543]) -Greedy action tensor([ 0.2443, -0.0487, -0.1601, 0.0059]) tensor([0.3124, 0.2330, 0.2085, 0.2461]) -Greedy action tensor([ 0.4532, -0.1744, 0.1888, -0.2281]) tensor([0.3562, 0.1902, 0.2734, 0.1802]) -Greedy action tensor([ 0.8282, -0.4264, 0.0313, -0.6856]) tensor([0.5113, 0.1458, 0.2304, 0.1125]) -Greedy action tensor([ 0.4970, -0.1108, 0.0102, -0.0550]) tensor([0.3656, 0.1991, 0.2247, 0.2105]) -Greedy action tensor([ 0.4886, -0.2041, -0.0083, -0.0430]) tensor([0.3709, 0.1855, 0.2256, 0.2180]) -Greedy action tensor([ 0.7470, -0.4866, 0.0054, -0.2413]) tensor([0.4673, 0.1361, 0.2226, 0.1740]) -Greedy action tensor([ 0.5018, -0.0356, -0.0715, 0.0121]) tensor([0.3622, 0.2116, 0.2042, 0.2220]) -Greedy action tensor([ 0.8734, -0.2479, -0.0537, -0.4798]) tensor([0.5051, 0.1646, 0.1999, 0.1305]) -Greedy action tensor([ 0.5068, -0.2775, -0.0478, -0.1268]) tensor([0.3904, 0.1782, 0.2242, 0.2072]) -Greedy action tensor([ 0.3842, -0.1656, 0.0046, -0.2164]) tensor([0.3559, 0.2054, 0.2435, 0.1952]) -Greedy action tensor([ 0.4207, -0.3344, -0.0208, -0.3008]) tensor([0.3848, 0.1808, 0.2474, 0.1870]) -Greedy action tensor([ 0.6423, -0.3962, 0.0258, -0.2410]) tensor([0.4334, 0.1534, 0.2340, 0.1792]) -Greedy action tensor([ 0.6947, -0.3977, -0.0344, -0.3286]) tensor([0.4593, 0.1541, 0.2215, 0.1651]) -Greedy action tensor([ 0.8296, -0.5742, -0.1127, -0.5095]) tensor([0.5270, 0.1295, 0.2054, 0.1381]) -Greedy action tensor([ 0.6369, -0.4697, -0.0145, -0.1288]) tensor([0.4316, 0.1427, 0.2250, 0.2007]) -Greedy action tensor([ 0.9747, -0.7444, -0.0725, -0.4007]) tensor([0.5609, 0.1005, 0.1968, 0.1418]) -Greedy action tensor([ 0.2014, -0.0995, -0.1647, -0.3157]) tensor([0.3300, 0.2443, 0.2289, 0.1968]) -Greedy action tensor([ 0.6734, -0.6097, -0.0579, -0.4302]) tensor([0.4784, 0.1326, 0.2303, 0.1587]) -Greedy action tensor([ 0.4646, -0.0718, 0.1010, -0.2438]) tensor([0.3607, 0.2110, 0.2507, 0.1776]) -Greedy action tensor([ 0.1642, -0.2189, -0.2361, -0.1996]) tensor([0.3282, 0.2238, 0.2199, 0.2281]) -Greedy action tensor([ 0.7912, -0.4500, -0.0372, -0.2006]) tensor([0.4769, 0.1379, 0.2083, 0.1769]) -Greedy action tensor([ 0.7752, -0.0606, -0.1173, -0.3954]) tensor([0.4644, 0.2013, 0.1902, 0.1440]) -Greedy action tensor([ 0.9762, -0.5360, -0.1990, -0.4072]) tensor([0.5618, 0.1238, 0.1735, 0.1409]) -Greedy action tensor([ 0.8495, -0.7001, -0.1087, -0.4456]) tensor([0.5348, 0.1136, 0.2052, 0.1465]) -Greedy action tensor([ 0.9087, -0.6312, -0.0160, -0.5164]) tensor([0.5401, 0.1158, 0.2142, 0.1299]) -Greedy action tensor([ 0.9622, -0.8305, 0.1053, -0.4925]) tensor([0.5481, 0.0913, 0.2327, 0.1280]) -Greedy action tensor([ 0.7645, 0.1323, -0.1274, 0.1898]) tensor([0.3993, 0.2122, 0.1637, 0.2248]) -Greedy action tensor([ 1.0711, -0.5098, -0.2163, -0.4542]) tensor([0.5885, 0.1211, 0.1624, 0.1280]) -Greedy action tensor([ 0.4225, -0.4205, -0.0449, -0.1762]) tensor([0.3836, 0.1651, 0.2404, 0.2108]) -Greedy action tensor([ 0.6705, -0.7272, -0.1582, -0.2941]) tensor([0.4843, 0.1197, 0.2114, 0.1846]) -Greedy action tensor([ 0.6432, -0.3760, -0.1798, -0.4541]) tensor([0.4686, 0.1691, 0.2058, 0.1564]) -Greedy action tensor([-0.7204, -0.7013, 0.5361, 0.4160]) tensor([0.1156, 0.1179, 0.4062, 0.3603]) -Greedy action tensor([ 0.1683, 0.2885, 0.2085, -0.0519]) tensor([0.2518, 0.2840, 0.2622, 0.2021]) -Greedy action tensor([-0.1993, -1.1614, 0.4256, 0.9301]) tensor([0.1576, 0.0602, 0.2945, 0.4877]) -Greedy action tensor([-0.5773, -0.8290, 0.4440, -1.3182]) tensor([0.1988, 0.1545, 0.5520, 0.0947]) -Greedy action tensor([-0.1629, 0.7999, -0.2186, 0.0396]) tensor([0.1727, 0.4524, 0.1634, 0.2115]) -Greedy action tensor([-0.6556, 0.0196, 0.7199, -0.4852]) tensor([0.1233, 0.2423, 0.4881, 0.1463]) -Greedy action tensor([-0.0597, 0.0972, -0.1158, 0.2475]) tensor([0.2235, 0.2614, 0.2113, 0.3038]) -Greedy action tensor([-1.8331, -0.6989, 1.1896, -1.2829]) tensor([0.0379, 0.1178, 0.7786, 0.0657]) -Greedy action tensor([ 0.4712, 0.2895, 1.0757, -0.3552]) tensor([0.2438, 0.2033, 0.4462, 0.1067]) -Greedy action tensor([ 0.2134, 0.0916, 0.4892, -0.2286]) tensor([0.2600, 0.2302, 0.3426, 0.1671]) -Greedy action tensor([0.7845, 0.4156, 0.8216, 0.2695]) tensor([0.3006, 0.2079, 0.3120, 0.1796]) -Greedy action tensor([-1.0667, -1.9354, -0.2049, 0.0932]) tensor([0.1433, 0.0601, 0.3393, 0.4572]) -Greedy action tensor([ 1.1529, -1.0289, 1.2134, 0.3839]) tensor([0.3790, 0.0428, 0.4026, 0.1757]) -Greedy action tensor([ 0.3806, -1.8971, 0.7460, 0.3619]) tensor([0.2837, 0.0291, 0.4088, 0.2784]) -Greedy action tensor([-0.6227, -0.3970, -0.7262, 0.4796]) tensor([0.1622, 0.2033, 0.1462, 0.4883]) -Greedy action tensor([-0.1915, -0.0320, -0.2495, -0.4245]) tensor([0.2558, 0.3001, 0.2414, 0.2027]) -Greedy action tensor([ 0.6506, -0.1440, 0.6938, -1.2857]) tensor([0.3788, 0.1711, 0.3955, 0.0546]) -Greedy action tensor([ 0.4217, 0.2708, -0.5149, -0.0450]) tensor([0.3473, 0.2987, 0.1361, 0.2178]) -Greedy action tensor([-4.3073e-04, -8.9406e-01, -3.6069e-01, -1.0890e-01]) tensor([0.3329, 0.1362, 0.2322, 0.2987]) -Greedy action tensor([ 0.3945, -1.3277, 0.8764, -0.3276]) tensor([0.3045, 0.0544, 0.4931, 0.1479]) -Greedy action tensor([-0.7416, -0.9431, -0.4676, 0.1541]) tensor([0.1792, 0.1465, 0.2356, 0.4388]) -Greedy action tensor([ 1.4005, -0.2511, 0.8179, 0.8099]) tensor([0.4340, 0.0832, 0.2424, 0.2404]) -Greedy action tensor([ 0.3803, -0.2982, -0.0758, -0.2138]) tensor([0.3713, 0.1884, 0.2353, 0.2050]) -Greedy action tensor([ 0.1917, -0.7505, 0.0709, 0.7764]) tensor([0.2457, 0.0958, 0.2177, 0.4408]) -Greedy action tensor([-0.3300, -0.9919, -0.3992, 0.5235]) tensor([0.2085, 0.1075, 0.1945, 0.4894]) -Greedy action tensor([-0.4531, -0.8017, 1.8477, -0.9435]) tensor([0.0813, 0.0574, 0.8115, 0.0498]) -Greedy action tensor([ 0.9923, -0.1817, 0.5402, 0.0555]) tensor([0.4278, 0.1323, 0.2722, 0.1677]) -Greedy action tensor([ 0.0805, -0.2362, 0.0419, -0.7576]) tensor([0.3202, 0.2333, 0.3081, 0.1385]) -Greedy action tensor([0.8100, 0.1043, 1.4776, 0.8218]) tensor([0.2245, 0.1108, 0.4376, 0.2271]) -Greedy action tensor([-0.2638, -0.2524, 0.9761, 0.0044]) tensor([0.1476, 0.1493, 0.5101, 0.1930]) -Greedy action tensor([ 0.5917, -0.9221, -0.1828, 0.0308]) tensor([0.4441, 0.0977, 0.2047, 0.2535]) -Greedy action tensor([-1.2543, -0.9656, 0.5658, -0.0596]) tensor([0.0847, 0.1130, 0.5226, 0.2797]) -Greedy action tensor([ 0.1411, -0.1334, -0.8774, 1.2479]) tensor([0.1943, 0.1477, 0.0702, 0.5878]) -Greedy action tensor([-0.1558, -0.7672, 1.0135, -0.3940]) tensor([0.1802, 0.0978, 0.5801, 0.1420]) -Greedy action tensor([ 0.1574, 1.0560, -0.0157, 0.9000]) tensor([0.1563, 0.3839, 0.1314, 0.3284]) -Greedy action tensor([-0.4808, -2.0885, 0.1480, 0.7851]) tensor([0.1510, 0.0303, 0.2832, 0.5355]) -Greedy action tensor([ 0.1713, -1.5899, 0.2736, 0.2578]) tensor([0.2967, 0.0510, 0.3287, 0.3235]) -Greedy action tensor([ 0.0541, 0.2733, 0.0804, -0.6516]) tensor([0.2656, 0.3306, 0.2726, 0.1311]) -Greedy action tensor([ 1.0128, -0.4053, 0.1485, 0.3977]) tensor([0.4537, 0.1099, 0.1912, 0.2453]) -Greedy action tensor([-0.2098, -0.5594, 0.7835, -0.9837]) tensor([0.2055, 0.1449, 0.5549, 0.0948]) -Greedy action tensor([-0.1667, 0.6972, 0.1852, -0.3795]) tensor([0.1785, 0.4235, 0.2538, 0.1443]) -Greedy action tensor([-0.3764, -2.4539, -0.0349, 0.9022]) tensor([0.1633, 0.0205, 0.2298, 0.5865]) -Greedy action tensor([ 0.8554, -0.8191, -0.5762, -0.0261]) tensor([0.5433, 0.1018, 0.1298, 0.2250]) -Greedy action tensor([ 0.4348, -0.5752, -0.7517, 1.6641]) tensor([0.1965, 0.0716, 0.0600, 0.6719]) -Greedy action tensor([ 0.8300, -0.3363, 0.1768, 0.3958]) tensor([0.4033, 0.1256, 0.2099, 0.2612]) -Greedy action tensor([-0.4151, -0.7190, -0.0491, 0.4233]) tensor([0.1821, 0.1344, 0.2625, 0.4210]) -Greedy action tensor([ 0.2520, -1.9295, 1.4539, -0.0838]) tensor([0.1940, 0.0219, 0.6454, 0.1387]) -Greedy action tensor([ 0.1795, 0.7437, 1.3724, -0.4657]) tensor([0.1520, 0.2672, 0.5011, 0.0797]) -Greedy action tensor([ 1.4776, -0.7091, -0.5573, 0.9842]) tensor([0.5395, 0.0606, 0.0705, 0.3294]) -Greedy action tensor([ 0.8521, -0.7464, 1.1877, 0.1171]) tensor([0.3246, 0.0656, 0.4541, 0.1557]) -Greedy action tensor([-0.4638, -1.2734, 0.0702, 0.5488]) tensor([0.1694, 0.0754, 0.2889, 0.4663]) -Greedy action tensor([ 1.1184e+00, -1.7679e-01, -1.5336e-01, 2.4182e-04]) tensor([0.5316, 0.1456, 0.1490, 0.1738]) -Greedy action tensor([ 1.0387, 0.1184, 0.0160, -0.2932]) tensor([0.4946, 0.1970, 0.1779, 0.1306]) -Greedy action tensor([-0.6496, -1.5132, -0.4257, 0.4903]) tensor([0.1725, 0.0727, 0.2157, 0.5391]) -Greedy action tensor([-1.6247, -0.3906, -0.6128, 0.4951]) tensor([0.0645, 0.2214, 0.1773, 0.5368]) -Greedy action tensor([ 0.4288, -0.8313, 1.5832, 0.4875]) tensor([0.1813, 0.0514, 0.5751, 0.1922]) -Greedy action tensor([ 0.3104, -1.5866, -0.4189, -0.5178]) tensor([0.4833, 0.0725, 0.2331, 0.2111]) -Greedy action tensor([ 0.7205, -0.8044, -0.4109, 0.5196]) tensor([0.4241, 0.0923, 0.1368, 0.3469]) -Greedy action tensor([-0.5585, 0.3506, 0.7522, -0.9897]) tensor([0.1275, 0.3166, 0.4730, 0.0829]) -Greedy action tensor([-0.7486, -1.3251, 1.2675, -0.1687]) tensor([0.0921, 0.0518, 0.6916, 0.1645]) -Greedy action tensor([-0.0246, -0.8675, -0.7587, 0.3163]) tensor([0.3015, 0.1298, 0.1447, 0.4240]) -Greedy action tensor([ 0.8578, -1.0880, 0.9359, -0.4009]) tensor([0.3987, 0.0570, 0.4311, 0.1132]) -Greedy action tensor([ 0.6694, -0.2627, -0.5757, 0.4440]) tensor([0.4032, 0.1588, 0.1161, 0.3219]) -Greedy action tensor([ 0.5224, -1.0262, -0.1390, -0.1803]) tensor([0.4497, 0.0956, 0.2321, 0.2227]) -Greedy action tensor([-0.6765, -0.4627, -1.0021, 0.1232]) tensor([0.1928, 0.2388, 0.1393, 0.4291]) -Greedy action tensor([-1.6632, -0.3325, -0.0959, -0.5504]) tensor([0.0792, 0.2998, 0.3798, 0.2411]) -Greedy action tensor([ 0.2880, 0.4314, -0.9398, -0.4277]) tensor([0.3406, 0.3931, 0.0998, 0.1665]) -Greedy action tensor([ 0.7243, -0.7983, 0.0581, 0.0347]) tensor([0.4477, 0.0977, 0.2300, 0.2247]) -Greedy action tensor([0.5308, 0.8767, 0.5336, 0.6261]) tensor([0.2214, 0.3129, 0.2220, 0.2436]) -Greedy action tensor([-0.4827, -0.9128, -0.3671, -0.6819]) tensor([0.2784, 0.1811, 0.3125, 0.2281]) -Greedy action tensor([-0.5949, 1.0584, -0.5746, -0.6667]) tensor([0.1223, 0.6390, 0.1248, 0.1138]) -Greedy action tensor([ 0.4588, -0.7137, 0.1005, -0.4101]) tensor([0.4119, 0.1275, 0.2879, 0.1727]) -Greedy action tensor([ 0.2669, -0.6041, 0.4815, -0.8220]) tensor([0.3339, 0.1398, 0.4139, 0.1124]) -Greedy action tensor([ 1.6312, -0.1668, 0.7360, 0.9774]) tensor([0.4775, 0.0791, 0.1951, 0.2483]) -Greedy action tensor([ 0.9805, -1.3168, -0.1841, 0.2390]) tensor([0.5294, 0.0532, 0.1652, 0.2522]) -Greedy action tensor([-0.8542, -1.3566, 0.9473, -0.1436]) tensor([0.1031, 0.0624, 0.6247, 0.2098]) -Greedy action tensor([ 0.1016, -1.6552, -0.3044, -0.8695]) tensor([0.4510, 0.0778, 0.3005, 0.1708]) -Greedy action tensor([ 0.7689, -0.8420, 1.0796, 0.8846]) tensor([0.2713, 0.0542, 0.3701, 0.3045]) -Greedy action tensor([-0.3737, -2.1064, 0.2134, -0.2669]) tensor([0.2446, 0.0432, 0.4400, 0.2722]) -Greedy action tensor([ 0.2800, -1.7225, 0.4639, 0.7349]) tensor([0.2556, 0.0345, 0.3071, 0.4028]) -Greedy action tensor([-0.5626, 0.0807, -0.3926, 0.7385]) tensor([0.1288, 0.2451, 0.1527, 0.4733]) -Greedy action tensor([-1.6824, 0.1606, 0.4387, -0.0039]) tensor([0.0476, 0.3005, 0.3969, 0.2550]) -Greedy action tensor([-1.9262, -0.4301, 0.6598, -0.1693]) tensor([0.0408, 0.1819, 0.5411, 0.2362]) -Greedy action tensor([-1.7191, -0.5063, 0.5353, -0.0480]) tensor([0.0521, 0.1751, 0.4961, 0.2768]) -Greedy action tensor([-1.8443, -0.4324, 0.6212, -0.1211]) tensor([0.0445, 0.1826, 0.5236, 0.2493]) -Greedy action tensor([-1.5917, -0.5417, 0.6574, -0.0685]) tensor([0.0558, 0.1594, 0.5289, 0.2559]) -Greedy action tensor([-1.5583, -0.3692, 0.5330, -0.1854]) tensor([0.0613, 0.2012, 0.4959, 0.2417]) -Greedy action tensor([-1.2231, 0.1508, 0.5048, 0.2436]) tensor([0.0670, 0.2649, 0.3774, 0.2906]) -Greedy action tensor([-1.4221, 0.5097, 0.2299, 0.0346]) tensor([0.0574, 0.3964, 0.2997, 0.2465]) -Greedy action tensor([-1.1958, -0.2042, 0.5356, 0.2689]) tensor([0.0732, 0.1972, 0.4132, 0.3165]) -Greedy action tensor([-1.9196, -0.4335, 0.6543, -0.1667]) tensor([0.0411, 0.1818, 0.5396, 0.2374]) -Greedy action tensor([-1.6740, 0.0340, 0.6287, -0.4697]) tensor([0.0504, 0.2779, 0.5038, 0.1679]) -Greedy action tensor([-1.8297, -0.3702, 0.6069, -0.1149]) tensor([0.0449, 0.1931, 0.5129, 0.2492]) -Greedy action tensor([-1.6700, 0.1913, 0.4238, -0.0128]) tensor([0.0481, 0.3093, 0.3903, 0.2523]) -Greedy action tensor([-1.8132, -0.4880, 0.7721, 0.1329]) tensor([0.0399, 0.1503, 0.5300, 0.2797]) -Greedy action tensor([-1.6026, -0.4157, 0.6535, 0.2965]) tensor([0.0488, 0.1598, 0.4656, 0.3258]) -Greedy action tensor([-1.8027, -0.3182, 0.6340, -0.0594]) tensor([0.0443, 0.1956, 0.5068, 0.2533]) -Greedy action tensor([-1.9148, -0.3878, 0.6492, -0.1631]) tensor([0.0411, 0.1890, 0.5332, 0.2367]) -Greedy action tensor([-1.8006, -0.0238, 0.5282, -0.0666]) tensor([0.0438, 0.2588, 0.4495, 0.2480]) -Greedy action tensor([-1.9205, -0.4526, 0.6577, -0.1703]) tensor([0.0412, 0.1788, 0.5428, 0.2372]) -Greedy action tensor([-1.9139, -0.4562, 0.6643, -0.1599]) tensor([0.0412, 0.1772, 0.5433, 0.2383]) -Greedy action tensor([-1.7623, -0.4251, 0.5694, -0.0831]) tensor([0.0489, 0.1861, 0.5031, 0.2620]) -Greedy action tensor([-1.9350, -0.4454, 0.6685, -0.1731]) tensor([0.0404, 0.1791, 0.5455, 0.2351]) -Greedy action tensor([-1.5952, -0.4435, 0.5541, 0.1471]) tensor([0.0542, 0.1714, 0.4649, 0.3095]) -Greedy action tensor([-1.8555, -0.4855, 0.6360, -0.1170]) tensor([0.0440, 0.1733, 0.5321, 0.2506]) -Greedy action tensor([-1.8881, -0.4096, 0.6553, -0.1449]) tensor([0.0420, 0.1841, 0.5340, 0.2399]) -Greedy action tensor([-1.7460, -0.4625, 0.5521, -0.1241]) tensor([0.0510, 0.1839, 0.5072, 0.2579]) -Greedy action tensor([-1.7932, -0.4659, 0.5974, -0.0948]) tensor([0.0473, 0.1782, 0.5162, 0.2583]) -Greedy action tensor([-1.9137, -0.4308, 0.6433, -0.1604]) tensor([0.0415, 0.1830, 0.5357, 0.2398]) -Greedy action tensor([-2.0326, -0.7998, 0.5968, -0.1017]) tensor([0.0397, 0.1362, 0.5504, 0.2737]) -Greedy action tensor([-1.6521, -0.4360, 0.4973, -0.0131]) tensor([0.0552, 0.1864, 0.4739, 0.2845]) -Greedy action tensor([-1.7540, -0.3595, 0.5724, -0.0627]) tensor([0.0483, 0.1948, 0.4947, 0.2621]) -Greedy action tensor([-1.8815, -0.3630, 0.6353, -0.1460]) tensor([0.0423, 0.1932, 0.5244, 0.2401]) -Greedy action tensor([-1.8278, -0.3031, 0.5704, -0.1016]) tensor([0.0450, 0.2068, 0.4953, 0.2529]) -Greedy action tensor([-1.8635, -0.4612, 0.6287, -0.1383]) tensor([0.0439, 0.1785, 0.5309, 0.2466]) -Greedy action tensor([-1.8491, -0.4631, 0.6297, -0.1261]) tensor([0.0444, 0.1775, 0.5294, 0.2486]) -Greedy action tensor([-1.7767, -0.4406, 0.6393, -0.0116]) tensor([0.0458, 0.1741, 0.5127, 0.2674]) -Greedy action tensor([-1.4143, -0.5915, 0.3796, 0.0466]) tensor([0.0735, 0.1674, 0.4421, 0.3169]) -Greedy action tensor([-1.1535, -0.5627, 0.3052, 0.0149]) tensor([0.0969, 0.1749, 0.4166, 0.3116]) -Greedy action tensor([-1.8324, -0.2028, 0.5897, -0.1110]) tensor([0.0435, 0.2222, 0.4908, 0.2435]) -Greedy action tensor([-1.6854, -0.2953, 0.5214, -0.0870]) tensor([0.0525, 0.2108, 0.4771, 0.2596]) -Greedy action tensor([-1.9373, -0.4476, 0.6657, -0.1755]) tensor([0.0404, 0.1791, 0.5453, 0.2351]) -Greedy action tensor([-1.8673, -0.3477, 0.6119, -0.1554]) tensor([0.0434, 0.1984, 0.5178, 0.2404]) -Greedy action tensor([-1.2352, 0.5653, 0.2468, 0.0761]) tensor([0.0659, 0.3991, 0.2903, 0.2447]) -Greedy action tensor([-0.4640, -0.5082, 0.3202, 0.4376]) tensor([0.1513, 0.1447, 0.3314, 0.3726]) -Greedy action tensor([-1.9285, -0.4653, 0.6613, -0.1713]) tensor([0.0409, 0.1767, 0.5452, 0.2371]) -Greedy action tensor([-1.2747, 0.7868, 0.2486, 0.1651]) tensor([0.0566, 0.4448, 0.2597, 0.2389]) -Greedy action tensor([-0.6563, -0.1939, 0.2562, 0.4817]) tensor([0.1220, 0.1937, 0.3038, 0.3806]) -Greedy action tensor([-1.9090, -0.3910, 0.6407, -0.1540]) tensor([0.0414, 0.1890, 0.5301, 0.2395]) -Greedy action tensor([-1.6611, -0.3210, 0.6088, 0.0058]) tensor([0.0505, 0.1930, 0.4890, 0.2676]) -Greedy action tensor([-1.5670, -0.4747, 0.4866, 0.0356]) tensor([0.0597, 0.1781, 0.4656, 0.2966]) -Greedy action tensor([-1.8042, -0.3857, 0.5897, -0.1108]) tensor([0.0465, 0.1919, 0.5090, 0.2526]) -Greedy action tensor([-1.2854, 0.7148, 0.2055, 0.1738]) tensor([0.0584, 0.4313, 0.2592, 0.2511]) -Greedy action tensor([-1.8330, -0.4209, 0.6085, -0.1337]) tensor([0.0453, 0.1860, 0.5207, 0.2479]) -Greedy action tensor([-1.6387, 0.3801, 0.3841, 0.0558]) tensor([0.0464, 0.3497, 0.3511, 0.2528]) -Greedy action tensor([-1.8575, -0.3545, 0.6336, -0.1167]) tensor([0.0430, 0.1932, 0.5189, 0.2450]) -Greedy action tensor([-1.9287, -0.4318, 0.6596, -0.1715]) tensor([0.0407, 0.1818, 0.5416, 0.2359]) -Greedy action tensor([-1.8956, -0.4410, 0.6449, -0.1534]) tensor([0.0422, 0.1809, 0.5358, 0.2411]) -Greedy action tensor([-0.8573, 0.8937, 0.0777, 0.2386]) tensor([0.0813, 0.4683, 0.2071, 0.2432]) -Greedy action tensor([-1.6543, -0.3457, 0.5056, -0.0510]) tensor([0.0545, 0.2018, 0.4727, 0.2710]) -Greedy action tensor([-1.6451, -0.5629, 0.5480, 0.0528]) tensor([0.0544, 0.1606, 0.4877, 0.2973]) -Greedy action tensor([-1.6604, -0.1732, 0.5915, 0.0223]) tensor([0.0492, 0.2178, 0.4680, 0.2649]) -Greedy action tensor([-1.7702, -0.4621, 0.5880, -0.0902]) tensor([0.0485, 0.1793, 0.5123, 0.2600]) -Greedy action tensor([-1.8697, -0.4075, 0.6207, -0.1353]) tensor([0.0434, 0.1872, 0.5235, 0.2458]) -Greedy action tensor([-1.8115, -0.4341, 0.6013, -0.0990]) tensor([0.0461, 0.1829, 0.5152, 0.2557]) -Greedy action tensor([-1.7453, -0.0670, 0.5325, -0.0915]) tensor([0.0469, 0.2510, 0.4572, 0.2450]) -Greedy action tensor([-1.8079, -0.4250, 0.5972, -0.1272]) tensor([0.0467, 0.1860, 0.5169, 0.2505]) -Greedy action tensor([-1.5971, -0.3659, 0.5773, -0.2325]) tensor([0.0584, 0.1999, 0.5133, 0.2284]) -Greedy action tensor([-1.9144, -0.3482, 0.6343, -0.1692]) tensor([0.0411, 0.1970, 0.5262, 0.2356]) -Greedy action tensor([-1.8382, -0.3932, 0.6010, -0.1266]) tensor([0.0450, 0.1907, 0.5154, 0.2490]) -Greedy action tensor([-0.4687, 0.4280, 0.4075, 0.6923]) tensor([0.1105, 0.2710, 0.2655, 0.3530]) -Greedy action tensor([-1.6210, -0.3363, 0.6825, 0.1444]) tensor([0.0489, 0.1766, 0.4891, 0.2855]) -Greedy action tensor([-1.3753, -0.5517, 0.3357, 0.1291]) tensor([0.0751, 0.1711, 0.4157, 0.3381]) -Greedy action tensor([-1.1211, -0.4963, 0.3292, 0.0277]) tensor([0.0972, 0.1816, 0.4146, 0.3066]) -Greedy action tensor([-1.2006, 0.8295, 0.1851, -0.6487]) tensor([0.0697, 0.5307, 0.2786, 0.1210]) -Greedy action tensor([-1.3686, -0.4638, 0.5038, 0.2939]) tensor([0.0656, 0.1621, 0.4265, 0.3458]) -Greedy action tensor([-1.2189, -0.6258, 0.2762, 0.3213]) tensor([0.0838, 0.1516, 0.3737, 0.3909]) -Greedy action tensor([-1.5080, -0.1255, 0.4078, -0.0322]) tensor([0.0619, 0.2467, 0.4205, 0.2708]) -Greedy action tensor([-1.8865, -0.4600, 0.6380, -0.1411]) tensor([0.0428, 0.1781, 0.5341, 0.2450]) -Greedy action tensor([-1.9414, -0.4554, 0.6671, -0.1785]) tensor([0.0403, 0.1780, 0.5469, 0.2348]) -Greedy action tensor([-1.7386, -0.4504, 0.5643, -0.0772]) tensor([0.0503, 0.1823, 0.5028, 0.2647]) -Greedy action tensor([-1.9192, -0.3588, 0.6446, -0.1576]) tensor([0.0407, 0.1938, 0.5285, 0.2370]) -Greedy action tensor([ 1.7191, -0.2700, -0.6323, 0.2260]) tensor([0.6865, 0.0939, 0.0654, 0.1542]) -Greedy action tensor([ 1.4069, -0.6801, -0.3832, 0.3547]) tensor([0.6097, 0.0756, 0.1018, 0.2129]) -Greedy action tensor([ 1.4554, 0.0592, -1.0224, 0.3797]) tensor([0.5979, 0.1480, 0.0502, 0.2039]) -Greedy action tensor([ 1.1571, -0.3423, -0.5095, 0.0409]) tensor([0.5748, 0.1283, 0.1086, 0.1883]) -Greedy action tensor([ 1.1491, -0.2644, -0.2106, 0.4276]) tensor([0.5035, 0.1225, 0.1293, 0.2447]) -Greedy action tensor([ 0.8414, -0.3127, -0.0461, 0.0641]) tensor([0.4573, 0.1442, 0.1883, 0.2102]) -Greedy action tensor([ 1.2207, -0.6069, -0.4867, 0.9344]) tensor([0.4777, 0.0768, 0.0866, 0.3588]) -Greedy action tensor([ 1.6346, -0.7840, -0.0669, 0.4195]) tensor([0.6377, 0.0568, 0.1163, 0.1892]) -Greedy action tensor([ 1.8954, 0.0256, -0.0799, 0.4065]) tensor([0.6585, 0.1015, 0.0914, 0.1486]) -Greedy action tensor([ 1.1787, -0.1334, -1.2377, 0.3877]) tensor([0.5519, 0.1486, 0.0493, 0.2502]) -Greedy action tensor([ 2.2222, -1.6197, -0.5148, 0.5029]) tensor([0.7903, 0.0170, 0.0512, 0.1416]) -Greedy action tensor([ 1.6505, -0.4557, -0.6922, 0.2998]) tensor([0.6771, 0.0824, 0.0651, 0.1754]) -Greedy action tensor([ 1.5073, -0.1577, -0.5567, 0.4661]) tensor([0.5991, 0.1133, 0.0761, 0.2115]) -Greedy action tensor([ 1.0953, -0.4659, -0.1610, 0.4440]) tensor([0.4961, 0.1041, 0.1412, 0.2586]) -Greedy action tensor([ 1.3119, -0.2228, -0.6585, 0.3679]) tensor([0.5734, 0.1236, 0.0799, 0.2231]) -Greedy action tensor([ 1.1362, -0.1528, -0.8105, 0.7146]) tensor([0.4821, 0.1328, 0.0688, 0.3163]) -Greedy action tensor([ 1.3904, 0.1196, -0.4405, -0.1279]) tensor([0.6024, 0.1690, 0.0966, 0.1320]) -Greedy action tensor([ 1.5115, -0.0267, -0.4223, 0.7312]) tensor([0.5502, 0.1182, 0.0796, 0.2521]) -Greedy action tensor([ 1.5529, -0.6108, -0.5226, 0.2152]) tensor([0.6654, 0.0765, 0.0835, 0.1746]) -Greedy action tensor([ 1.5790, -0.9718, -0.2360, 0.6651]) tensor([0.6091, 0.0475, 0.0992, 0.2442]) -Greedy action tensor([ 1.2215, -0.1918, -0.3202, 0.4728]) tensor([0.5180, 0.1261, 0.1109, 0.2450]) -Greedy action tensor([ 1.0278, -0.3779, 0.1711, -0.2420]) tensor([0.5127, 0.1257, 0.2176, 0.1440]) -Greedy action tensor([ 1.8880, -0.8674, -0.1000, 0.1609]) tensor([0.7255, 0.0461, 0.0994, 0.1290]) -Greedy action tensor([ 1.9101, -0.0756, -0.3404, 0.5033]) tensor([0.6722, 0.0923, 0.0708, 0.1647]) -Greedy action tensor([ 1.2476, -0.1488, -0.1276, 0.7121]) tensor([0.4795, 0.1187, 0.1212, 0.2807]) -Greedy action tensor([ 1.8129, -0.6162, -0.6720, 0.4967]) tensor([0.6946, 0.0612, 0.0579, 0.1863]) -Greedy action tensor([ 1.8835, -0.5795, -0.3805, 0.3594]) tensor([0.7108, 0.0605, 0.0739, 0.1548]) -Greedy action tensor([ 1.2911, -0.2294, -0.0196, -0.5054]) tensor([0.6046, 0.1322, 0.1630, 0.1003]) -Greedy action tensor([ 0.9546, -0.2130, -0.0194, 0.2399]) tensor([0.4591, 0.1428, 0.1734, 0.2247]) -Greedy action tensor([ 1.6201, -0.4383, -0.5192, 0.1871]) tensor([0.6739, 0.0860, 0.0793, 0.1608]) -Greedy action tensor([ 2.0206, -0.4104, -0.8666, 0.7208]) tensor([0.7061, 0.0621, 0.0394, 0.1925]) -Greedy action tensor([ 1.3576, 0.5068, -0.0021, 0.1863]) tensor([0.5016, 0.2142, 0.1288, 0.1555]) -Greedy action tensor([ 2.0723, -0.9337, -0.0523, 0.2945]) tensor([0.7474, 0.0370, 0.0893, 0.1263]) -Greedy action tensor([ 2.0597, -0.0205, -0.0435, 0.1683]) tensor([0.7154, 0.0894, 0.0873, 0.1079]) -Greedy action tensor([ 1.4180, -0.9250, -0.4148, 0.5771]) tensor([0.5926, 0.0569, 0.0948, 0.2556]) -Greedy action tensor([ 1.5777, -0.4731, -0.1619, 0.4888]) tensor([0.6095, 0.0784, 0.1070, 0.2051]) -Greedy action tensor([ 0.7142, 0.1648, 0.1025, -0.1350]) tensor([0.3926, 0.2266, 0.2129, 0.1679]) -Greedy action tensor([ 1.3005, -0.1499, -0.6880, 0.2743]) tensor([0.5781, 0.1356, 0.0791, 0.2072]) -Greedy action tensor([ 1.6319, -0.6307, -0.5057, 0.4505]) tensor([0.6541, 0.0681, 0.0771, 0.2007]) -Greedy action tensor([ 1.5263, -0.4568, -0.4044, 0.4703]) tensor([0.6133, 0.0844, 0.0890, 0.2133]) -Greedy action tensor([ 1.0976, -0.3359, -0.3619, 0.1685]) tensor([0.5360, 0.1278, 0.1245, 0.2117]) -Greedy action tensor([ 2.4924, -1.4119, 0.1191, 1.0035]) tensor([0.7469, 0.0151, 0.0696, 0.1685]) -Greedy action tensor([ 2.1644, 0.9754, 0.9589, -1.1302]) tensor([0.6093, 0.1856, 0.1825, 0.0226]) -Greedy action tensor([ 1.6295, -0.4247, -0.1867, 0.4741]) tensor([0.6227, 0.0798, 0.1013, 0.1961]) -Greedy action tensor([ 1.0284, -0.2446, -0.1606, 0.3293]) tensor([0.4804, 0.1345, 0.1463, 0.2388]) -Greedy action tensor([ 1.3734, -0.3552, 0.0841, 0.6059]) tensor([0.5216, 0.0926, 0.1437, 0.2421]) -Greedy action tensor([ 1.6314, -0.4663, -0.0785, 0.3844]) tensor([0.6286, 0.0771, 0.1137, 0.1806]) -Greedy action tensor([ 1.9559, -0.2987, -0.5744, 0.0197]) tensor([0.7526, 0.0790, 0.0599, 0.1086]) -Greedy action tensor([ 1.2983, -0.0705, -0.7827, 0.2550]) tensor([0.5775, 0.1469, 0.0721, 0.2035]) -Greedy action tensor([ 1.4048, 0.0477, -0.5822, 0.6366]) tensor([0.5381, 0.1385, 0.0738, 0.2496]) -Greedy action tensor([ 1.6330, -0.8306, -0.7044, 0.2174]) tensor([0.7020, 0.0598, 0.0678, 0.1704]) -Greedy action tensor([ 1.3326, -0.4435, -0.3793, 0.2528]) tensor([0.5919, 0.1002, 0.1068, 0.2010]) -Greedy action tensor([ 1.6166, -0.1996, -0.4588, 0.4183]) tensor([0.6290, 0.1023, 0.0789, 0.1898]) -Greedy action tensor([ 1.3056, -1.0096, 0.0141, 0.1988]) tensor([0.5868, 0.0579, 0.1613, 0.1940]) -Greedy action tensor([ 1.2484, -0.0448, -0.5544, 0.1722]) tensor([0.5618, 0.1541, 0.0926, 0.1915]) -Greedy action tensor([ 1.6740, -0.3138, -0.6950, -0.1800]) tensor([0.7209, 0.0988, 0.0675, 0.1129]) -Greedy action tensor([ 1.4736, -0.6575, -0.3450, 0.1199]) tensor([0.6497, 0.0771, 0.1054, 0.1678]) -Greedy action tensor([ 0.9559, -0.3433, -0.1726, 0.3652]) tensor([0.4651, 0.1268, 0.1505, 0.2576]) -Greedy action tensor([ 1.6146, -0.7099, -0.3537, 0.3742]) tensor([0.6550, 0.0641, 0.0915, 0.1895]) -Greedy action tensor([ 1.8874, -0.4566, -0.8904, 0.4322]) tensor([0.7187, 0.0690, 0.0447, 0.1677]) -Greedy action tensor([ 0.6127, -0.1051, -0.0862, 0.5441]) tensor([0.3426, 0.1671, 0.1703, 0.3199]) -Greedy action tensor([ 2.2805, -1.0605, -0.2877, 0.9702]) tensor([0.7237, 0.0256, 0.0555, 0.1952]) -Greedy action tensor([ 1.2146, -0.2367, -0.6626, 0.1038]) tensor([0.5826, 0.1365, 0.0891, 0.1918]) -Greedy action tensor([ 1.5975, -0.8177, -0.1733, 0.3523]) tensor([0.6462, 0.0577, 0.1100, 0.1860]) -Greedy action tensor([ 0.9568, -0.0943, 0.0343, -0.6198]) tensor([0.5118, 0.1789, 0.2035, 0.1058]) -Greedy action tensor([ 2.1240, -1.1244, -0.2191, 0.8163]) tensor([0.7116, 0.0276, 0.0683, 0.1924]) -Greedy action tensor([ 1.5396, -0.6258, -0.1361, 0.5589]) tensor([0.5963, 0.0684, 0.1116, 0.2237]) -Greedy action tensor([ 1.8906, -1.0485, -0.5680, 1.0385]) tensor([0.6390, 0.0338, 0.0547, 0.2725]) -Greedy action tensor([ 0.9956, -0.4216, -0.3112, 0.3437]) tensor([0.4916, 0.1192, 0.1331, 0.2562]) -Greedy action tensor([ 1.4226, -0.1774, -0.3266, -0.1350]) tensor([0.6303, 0.1273, 0.1096, 0.1328]) -Greedy action tensor([ 1.6091, -0.4453, -0.3470, 0.5189]) tensor([0.6228, 0.0798, 0.0881, 0.2093]) -Greedy action tensor([ 1.4088, -0.8236, -0.3876, -0.0387]) tensor([0.6630, 0.0711, 0.1100, 0.1559]) -Greedy action tensor([ 2.0464, -0.5011, -0.3215, 0.5297]) tensor([0.7187, 0.0563, 0.0673, 0.1577]) -Greedy action tensor([ 1.3256, -0.5132, -0.4249, 0.7141]) tensor([0.5333, 0.0848, 0.0926, 0.2893]) -Greedy action tensor([ 2.0693, 0.0498, -0.5228, 0.2767]) tensor([0.7277, 0.0966, 0.0545, 0.1212]) -Greedy action tensor([ 1.5950, -0.0781, -0.3292, 0.3109]) tensor([0.6209, 0.1165, 0.0907, 0.1719]) -Greedy action tensor([ 1.5982, -0.4809, 0.1603, 0.1132]) tensor([0.6293, 0.0787, 0.1494, 0.1425]) -Greedy action tensor([ 1.6255, -0.5214, -0.3737, 0.1426]) tensor([0.6760, 0.0790, 0.0916, 0.1534]) -Greedy action tensor([ 1.3809, -0.3310, -0.4154, 0.1376]) tensor([0.6117, 0.1104, 0.1015, 0.1764]) -Greedy action tensor([ 1.7278, -0.3660, -0.1454, 0.1565]) tensor([0.6736, 0.0830, 0.1035, 0.1400]) -Greedy action tensor([ 1.1492, -1.2325, -1.2463, 1.3882]) tensor([0.4076, 0.0377, 0.0371, 0.5176]) -Greedy action tensor([ 0.3099, -0.1200, -0.0417, -0.2062]) tensor([0.3389, 0.2205, 0.2384, 0.2023]) -Greedy action tensor([ 0.7268, -0.5709, -0.0869, -0.4243]) tensor([0.4920, 0.1344, 0.2180, 0.1556]) -Greedy action tensor([ 0.6945, -0.5187, -0.0553, -0.3618]) tensor([0.4723, 0.1404, 0.2231, 0.1642]) -Greedy action tensor([ 0.5161, -0.6366, 0.0064, -0.2474]) tensor([0.4197, 0.1325, 0.2521, 0.1956]) -Greedy action tensor([ 0.6859, -0.4530, -0.0078, -0.2743]) tensor([0.4540, 0.1454, 0.2269, 0.1738]) -Greedy action tensor([ 0.6736, -0.4262, -0.1553, -0.2709]) tensor([0.4633, 0.1543, 0.2022, 0.1802]) -Greedy action tensor([ 0.9613, -0.5375, -0.1462, -0.3949]) tensor([0.5521, 0.1233, 0.1824, 0.1422]) -Greedy action tensor([ 0.4516, -0.0359, -0.1390, -0.2357]) tensor([0.3744, 0.2299, 0.2074, 0.1883]) -Greedy action tensor([ 0.2646, 0.1905, 0.0405, -0.0246]) tensor([0.2876, 0.2671, 0.2299, 0.2154]) -Greedy action tensor([ 0.5489, -0.0768, -0.0720, 0.0105]) tensor([0.3765, 0.2014, 0.2024, 0.2198]) -Greedy action tensor([ 0.1469, -0.0500, -0.0607, 0.1013]) tensor([0.2786, 0.2288, 0.2264, 0.2662]) -Greedy action tensor([ 0.3440, -0.0241, 0.0275, -0.2989]) tensor([0.3394, 0.2349, 0.2473, 0.1784]) -Greedy action tensor([ 0.7073, -0.5918, -0.0493, -0.4739]) tensor([0.4881, 0.1331, 0.2290, 0.1498]) -Greedy action tensor([ 0.5231, -0.1526, 0.0276, -0.1140]) tensor([0.3778, 0.1922, 0.2302, 0.1998]) -Greedy action tensor([ 0.6145, -0.2007, -0.0325, -0.2795]) tensor([0.4210, 0.1863, 0.2204, 0.1722]) -Greedy action tensor([ 0.6335, 0.0437, -0.1480, 0.1381]) tensor([0.3815, 0.2115, 0.1746, 0.2324]) -Greedy action tensor([ 0.6893, -0.6763, 0.0292, -0.6494]) tensor([0.4916, 0.1255, 0.2541, 0.1289]) -Greedy action tensor([ 0.6487, -0.2921, 0.1424, -0.3919]) tensor([0.4262, 0.1664, 0.2569, 0.1506]) -Greedy action tensor([ 0.4615, 0.0287, -0.1061, 0.0217]) tensor([0.3497, 0.2268, 0.1982, 0.2253]) -Greedy action tensor([ 0.8128, -0.4670, 0.0422, -0.4707]) tensor([0.4956, 0.1378, 0.2293, 0.1373]) -Greedy action tensor([ 0.5874, -0.2678, -0.1067, -0.4566]) tensor([0.4392, 0.1868, 0.2194, 0.1546]) -Greedy action tensor([ 0.8067, -0.6724, -0.0746, -0.2599]) tensor([0.5035, 0.1147, 0.2086, 0.1733]) -Greedy action tensor([ 1.1564, -0.5431, -0.0728, -0.3075]) tensor([0.5860, 0.1071, 0.1714, 0.1355]) -Greedy action tensor([ 0.5904, -0.2262, -0.0836, -0.0772]) tensor([0.4058, 0.1793, 0.2068, 0.2081]) -Greedy action tensor([ 0.8109, -0.5964, -0.0263, -0.5012]) tensor([0.5136, 0.1257, 0.2224, 0.1383]) -Greedy action tensor([ 0.7670, -0.2321, -0.1813, -0.2092]) tensor([0.4690, 0.1727, 0.1817, 0.1767]) -Greedy action tensor([ 0.6478, -0.4492, 0.0064, -0.3670]) tensor([0.4499, 0.1502, 0.2369, 0.1631]) -Greedy action tensor([ 0.8669, -0.1326, -0.0026, -0.4561]) tensor([0.4870, 0.1792, 0.2041, 0.1297]) -Greedy action tensor([ 0.6684, -0.2803, -0.0430, -0.1836]) tensor([0.4339, 0.1680, 0.2130, 0.1851]) -Greedy action tensor([ 0.4905, -0.0336, -0.0569, -0.3633]) tensor([0.3851, 0.2280, 0.2228, 0.1640]) -Greedy action tensor([ 0.9406, -0.1871, 0.1108, -0.0708]) tensor([0.4709, 0.1525, 0.2054, 0.1713]) -Greedy action tensor([ 0.6112, -0.2278, -0.0183, -0.3080]) tensor([0.4230, 0.1828, 0.2254, 0.1687]) -Greedy action tensor([ 0.5582, 0.0752, -0.1241, -0.2306]) tensor([0.3881, 0.2394, 0.1962, 0.1763]) -Greedy action tensor([ 1.2088, -1.1513, 0.0900, -0.6366]) tensor([0.6333, 0.0598, 0.2069, 0.1000]) -Greedy action tensor([ 0.5211, -0.1457, 0.0037, -0.4519]) tensor([0.4020, 0.2064, 0.2396, 0.1519]) -Greedy action tensor([ 0.9571, -0.4933, -0.0750, -0.5018]) tensor([0.5485, 0.1286, 0.1954, 0.1275]) -Greedy action tensor([ 0.7403, -0.6150, 0.1508, -0.5553]) tensor([0.4793, 0.1236, 0.2658, 0.1312]) -Greedy action tensor([ 0.0038, 0.4789, -0.1362, -0.0680]) tensor([0.2268, 0.3648, 0.1972, 0.2111]) -Greedy action tensor([ 1.0444, -0.7751, 0.1333, -0.3644]) tensor([0.5529, 0.0896, 0.2223, 0.1351]) -Greedy action tensor([ 0.8429, -0.4210, -0.1198, -0.1264]) tensor([0.4893, 0.1383, 0.1868, 0.1856]) -Greedy action tensor([ 0.7734, -0.7854, -0.0575, -0.2606]) tensor([0.4996, 0.1051, 0.2176, 0.1776]) -Greedy action tensor([ 0.6745, -0.3822, -0.1802, -0.4297]) tensor([0.4752, 0.1652, 0.2021, 0.1575]) -Greedy action tensor([ 0.7940, -0.5495, -0.0140, -0.5386]) tensor([0.5075, 0.1324, 0.2262, 0.1339]) -Greedy action tensor([ 0.5885, -0.7105, -0.0105, -0.4951]) tensor([0.4628, 0.1263, 0.2543, 0.1566]) -Greedy action tensor([ 0.7273, -0.2225, 0.1010, -0.2014]) tensor([0.4317, 0.1670, 0.2308, 0.1706]) -Greedy action tensor([ 0.6210, -0.2852, -0.0951, -0.2012]) tensor([0.4288, 0.1732, 0.2095, 0.1884]) -Greedy action tensor([ 0.9014, -0.4064, -0.1369, -0.2739]) tensor([0.5173, 0.1399, 0.1831, 0.1597]) -Greedy action tensor([ 0.5405, 0.2856, -0.2635, 0.2408]) tensor([0.3374, 0.2615, 0.1510, 0.2500]) -Greedy action tensor([ 0.6499, 0.2265, -0.1574, -0.3286]) tensor([0.4037, 0.2644, 0.1801, 0.1518]) -Greedy action tensor([ 0.8074, -0.6702, -0.1035, -0.5004]) tensor([0.5261, 0.1201, 0.2116, 0.1423]) -Greedy action tensor([ 1.2712, -1.0147, 0.1647, -0.7558]) tensor([0.6393, 0.0650, 0.2114, 0.0842]) -Greedy action tensor([ 0.7406, -0.0482, -0.1195, -0.2498]) tensor([0.4447, 0.2020, 0.1881, 0.1652]) -Greedy action tensor([ 0.6773, -0.3095, 0.0476, -0.2509]) tensor([0.4346, 0.1620, 0.2315, 0.1718]) -Greedy action tensor([ 0.4411, 0.0086, -0.0952, -0.2211]) tensor([0.3637, 0.2360, 0.2127, 0.1876]) -Greedy action tensor([ 0.7976, -0.4318, -0.0941, -0.5814]) tensor([0.5117, 0.1497, 0.2098, 0.1289]) -Greedy action tensor([ 0.6269, -0.5907, -0.0859, -0.3894]) tensor([0.4655, 0.1378, 0.2282, 0.1685]) -Greedy action tensor([ 1.1596, -0.5157, 0.0781, -0.1890]) tensor([0.5599, 0.1048, 0.1899, 0.1454]) -Greedy action tensor([ 0.4587, -0.4412, -0.1491, -0.1297]) tensor([0.3990, 0.1622, 0.2173, 0.2215]) -Greedy action tensor([ 0.6911, -0.2760, -0.0858, -0.2474]) tensor([0.4482, 0.1704, 0.2061, 0.1753]) -Greedy action tensor([ 0.8120, -0.6486, 0.0535, -0.3922]) tensor([0.4999, 0.1160, 0.2341, 0.1499]) -Greedy action tensor([ 1.0413, -0.7684, 0.0393, -0.5301]) tensor([0.5752, 0.0942, 0.2112, 0.1195]) -Greedy action tensor([ 0.7263, 0.1689, -0.1162, 0.1254]) tensor([0.3919, 0.2244, 0.1688, 0.2149]) -Greedy action tensor([ 0.2225, -0.0217, -0.1061, -0.3675]) tensor([0.3271, 0.2562, 0.2355, 0.1813]) -Greedy action tensor([ 0.1566, -0.0559, -0.1947, 0.0615]) tensor([0.2923, 0.2363, 0.2057, 0.2657]) -Greedy action tensor([ 0.7766, -0.5282, 0.1087, -0.4126]) tensor([0.4788, 0.1299, 0.2455, 0.1458]) -Greedy action tensor([ 0.9066, -0.7496, -0.0072, -0.5728]) tensor([0.5496, 0.1049, 0.2204, 0.1252]) -Greedy action tensor([ 0.7842, -0.1819, 0.0069, -0.0296]) tensor([0.4379, 0.1667, 0.2013, 0.1941]) -Greedy action tensor([ 0.6622, -0.4120, 0.1702, -0.5762]) tensor([0.4459, 0.1523, 0.2726, 0.1292]) -Greedy action tensor([ 0.7188, -0.5495, -0.0183, -0.2844]) tensor([0.4702, 0.1323, 0.2250, 0.1725]) -Greedy action tensor([ 0.5121, -0.1516, -0.0291, -0.0375]) tensor([0.3740, 0.1926, 0.2177, 0.2158]) -Greedy action tensor([ 0.8914, -0.6305, -0.0043, -0.5914]) tensor([0.5395, 0.1178, 0.2203, 0.1225]) -Greedy action tensor([ 0.1740, 0.5376, -0.3773, -0.2567]) tensor([0.2729, 0.3925, 0.1572, 0.1774]) -Greedy action tensor([ 0.8134, -0.4280, -0.1512, -0.3520]) tensor([0.5046, 0.1458, 0.1923, 0.1573]) -Greedy action tensor([ 0.8127, -0.3945, 0.0368, -0.1747]) tensor([0.4691, 0.1403, 0.2159, 0.1747]) -Greedy action tensor([ 0.5118, -0.1193, -0.0454, -0.2582]) tensor([0.3894, 0.2072, 0.2231, 0.1803]) -Greedy action tensor([ 0.9586, -0.5838, 0.0160, -0.3163]) tensor([0.5311, 0.1136, 0.2069, 0.1484]) -Greedy action tensor([ 0.1826, 0.0532, -0.0562, -0.2783]) tensor([0.3033, 0.2665, 0.2389, 0.1913]) -Greedy action tensor([ 0.7141, -0.5064, 0.0014, -0.2739]) tensor([0.4635, 0.1368, 0.2272, 0.1725]) -Greedy action tensor([ 0.4754, -0.2362, 0.0117, -0.1776]) tensor([0.3788, 0.1859, 0.2382, 0.1971]) -Greedy action tensor([ 0.8026, -0.4271, 0.1371, -0.3875]) tensor([0.4738, 0.1385, 0.2435, 0.1441]) -Greedy action tensor([ 0.8428, -0.2958, -0.0071, -0.2748]) tensor([0.4820, 0.1544, 0.2060, 0.1576]) -Greedy action tensor([-1.6466, -0.0581, 0.7487, -0.2929]) tensor([0.0482, 0.2361, 0.5290, 0.1867]) -Greedy action tensor([ 0.7433, -0.3800, 0.0316, 0.7114]) tensor([0.3591, 0.1168, 0.1763, 0.3478]) -Greedy action tensor([ 1.4102, -0.9778, -0.5133, 0.6195]) tensor([0.5912, 0.0543, 0.0864, 0.2681]) -Greedy action tensor([-1.4630, -0.0959, -0.1778, 0.2003]) tensor([0.0724, 0.2840, 0.2617, 0.3819]) -Greedy action tensor([ 0.2898, 0.2105, 0.2033, -0.3802]) tensor([0.2983, 0.2755, 0.2735, 0.1526]) -Greedy action tensor([ 0.5552, -1.4770, 0.1918, 0.2970]) tensor([0.3848, 0.0504, 0.2676, 0.2972]) -Greedy action tensor([ 1.3924, -0.9818, 0.7472, 1.2407]) tensor([0.4037, 0.0376, 0.2118, 0.3469]) -Greedy action tensor([-0.9128, -1.2044, 0.3446, -0.9243]) tensor([0.1600, 0.1195, 0.5624, 0.1581]) -Greedy action tensor([-0.0020, -0.1503, 0.3193, -0.1000]) tensor([0.2411, 0.2079, 0.3325, 0.2186]) -Greedy action tensor([ 0.0909, -2.0878, -0.4076, -0.0028]) tensor([0.3801, 0.0430, 0.2309, 0.3461]) -Greedy action tensor([-1.7306, 0.0900, -0.1269, 0.1276]) tensor([0.0539, 0.3327, 0.2679, 0.3455]) -Greedy action tensor([0.6782, 0.1033, 0.4354, 0.0554]) tensor([0.3468, 0.1952, 0.2720, 0.1860]) -Greedy action tensor([ 0.2743, -1.2022, -0.6914, 0.1385]) tensor([0.4029, 0.0920, 0.1534, 0.3517]) -Greedy action tensor([-0.0054, 0.2021, -0.0938, -0.7102]) tensor([0.2747, 0.3381, 0.2515, 0.1358]) -Greedy action tensor([ 0.0917, -1.4998, 0.2964, -0.4673]) tensor([0.3331, 0.0678, 0.4087, 0.1904]) -Greedy action tensor([ 0.7751, -0.9097, 0.2622, 1.5091]) tensor([0.2586, 0.0480, 0.1548, 0.5387]) -Greedy action tensor([ 0.6235, -0.8354, 0.4870, -0.4630]) tensor([0.4095, 0.0952, 0.3572, 0.1381]) -Greedy action tensor([-0.1947, 0.5212, 1.3345, -0.4384]) tensor([0.1184, 0.2423, 0.5465, 0.0928]) -Greedy action tensor([ 0.7688, -1.2457, -0.0327, 0.5450]) tensor([0.4199, 0.0560, 0.1884, 0.3357]) -Greedy action tensor([ 0.2688, -2.3630, 0.4886, 0.3706]) tensor([0.2920, 0.0210, 0.3638, 0.3233]) -Greedy action tensor([ 0.2945, -0.1521, 1.3789, 0.6069]) tensor([0.1677, 0.1073, 0.4959, 0.2292]) -Greedy action tensor([ 0.6015, 0.3921, -1.3056, 0.7443]) tensor([0.3212, 0.2605, 0.0477, 0.3705]) -Greedy action tensor([ 0.8408, -0.6481, 0.8389, 1.8142]) tensor([0.2053, 0.0463, 0.2049, 0.5434]) -Greedy action tensor([-1.2119, -0.7201, -0.0153, -1.5968]) tensor([0.1510, 0.2469, 0.4995, 0.1027]) -Greedy action tensor([ 1.3823, -1.0699, -0.2376, 0.9242]) tensor([0.5218, 0.0449, 0.1033, 0.3300]) -Greedy action tensor([ 0.4753, -0.6823, -0.0916, 0.4214]) tensor([0.3535, 0.1111, 0.2005, 0.3349]) -Greedy action tensor([ 0.0650, -2.1679, -0.5157, -0.7358]) tensor([0.4727, 0.0507, 0.2645, 0.2122]) -Greedy action tensor([-0.8806, -1.1327, 0.0421, -0.3765]) tensor([0.1681, 0.1306, 0.4230, 0.2783]) -Greedy action tensor([ 0.0525, -1.5134, -0.1453, 1.2729]) tensor([0.1846, 0.0386, 0.1514, 0.6254]) -Greedy action tensor([ 0.7132, -0.2913, -0.3068, -0.1548]) tensor([0.4659, 0.1706, 0.1680, 0.1955]) -Greedy action tensor([-0.8759, -1.4481, -0.5678, 0.2648]) tensor([0.1652, 0.0932, 0.2248, 0.5168]) -Greedy action tensor([ 0.8373, 0.6598, -0.2260, 0.1760]) tensor([0.3705, 0.3103, 0.1279, 0.1913]) -Greedy action tensor([-0.1507, -2.2935, 0.0702, 0.3377]) tensor([0.2504, 0.0294, 0.3122, 0.4080]) -Greedy action tensor([-0.5030, 0.6428, -0.1743, -0.4149]) tensor([0.1509, 0.4746, 0.2097, 0.1648]) -Greedy action tensor([-0.6968, -0.8843, 0.6313, -0.9799]) tensor([0.1573, 0.1304, 0.5937, 0.1185]) -Greedy action tensor([-0.3029, -0.3926, -1.1912, 0.3955]) tensor([0.2306, 0.2108, 0.0949, 0.4637]) -Greedy action tensor([ 0.7512, -0.8421, 0.4689, 0.9674]) tensor([0.3126, 0.0635, 0.2357, 0.3881]) -Greedy action tensor([-0.8220, -0.9736, 0.4653, -0.2792]) tensor([0.1388, 0.1193, 0.5030, 0.2389]) -Greedy action tensor([-0.2110, -0.0668, 0.3997, -1.0449]) tensor([0.2257, 0.2607, 0.4156, 0.0980]) -Greedy action tensor([ 0.7510, -1.8966, 0.3846, -0.5960]) tensor([0.4941, 0.0350, 0.3425, 0.1285]) -Greedy action tensor([ 0.3232, -1.2232, 0.1748, -0.7474]) tensor([0.4136, 0.0881, 0.3565, 0.1418]) -Greedy action tensor([-0.1991, 0.1210, 0.2304, -0.8016]) tensor([0.2242, 0.3087, 0.3444, 0.1227]) -Greedy action tensor([-0.1220, 0.4941, 0.2707, -0.3442]) tensor([0.1948, 0.3607, 0.2885, 0.1560]) -Greedy action tensor([ 0.0271, 0.1063, 0.7091, -0.6399]) tensor([0.2186, 0.2367, 0.4325, 0.1122]) -Greedy action tensor([ 0.1486, -1.1246, 0.0720, 1.3550]) tensor([0.1803, 0.0505, 0.1670, 0.6023]) -Greedy action tensor([ 0.1687, 0.0809, -0.8691, 0.0489]) tensor([0.3167, 0.2901, 0.1122, 0.2810]) -Greedy action tensor([-0.0587, -0.2048, 0.1841, 0.2234]) tensor([0.2240, 0.1935, 0.2855, 0.2970]) -Greedy action tensor([-0.1100, -0.0369, 0.3988, 0.7097]) tensor([0.1664, 0.1790, 0.2768, 0.3777]) -Greedy action tensor([-0.7763, -0.4307, 0.7491, -0.6824]) tensor([0.1233, 0.1742, 0.5669, 0.1355]) -Greedy action tensor([0.7057, 0.1124, 1.5390, 0.2141]) tensor([0.2240, 0.1237, 0.5153, 0.1370]) -Greedy action tensor([-0.5472, -0.2940, 0.3694, -0.0849]) tensor([0.1568, 0.2020, 0.3922, 0.2490]) -Greedy action tensor([ 1.1829, -1.0816, 0.8635, 2.0092]) tensor([0.2430, 0.0252, 0.1766, 0.5552]) -Greedy action tensor([ 1.1562, -0.5082, -0.0607, 0.9705]) tensor([0.4318, 0.0817, 0.1279, 0.3586]) -Greedy action tensor([ 1.3752, -0.2519, 0.7314, 0.2738]) tensor([0.4868, 0.0957, 0.2557, 0.1618]) -Greedy action tensor([ 0.1965, -0.9088, -0.7262, -0.2612]) tensor([0.4235, 0.1402, 0.1683, 0.2680]) -Greedy action tensor([ 1.2101, -1.0969, 1.3216, 0.4274]) tensor([0.3739, 0.0372, 0.4180, 0.1709]) -Greedy action tensor([-0.4611, -2.3296, -0.2423, 0.9788]) tensor([0.1511, 0.0233, 0.1880, 0.6376]) -Greedy action tensor([ 1.1008, -0.3484, -0.7802, 0.6285]) tensor([0.4973, 0.1168, 0.0758, 0.3101]) -Greedy action tensor([ 1.2116, -1.7818, -0.2741, -0.2553]) tensor([0.6635, 0.0333, 0.1502, 0.1530]) -Greedy action tensor([-0.0080, -1.1461, 0.4657, -1.1515]) tensor([0.3082, 0.0987, 0.4949, 0.0982]) -Greedy action tensor([ 0.3227, -0.8878, 1.3101, 0.0235]) tensor([0.2117, 0.0631, 0.5682, 0.1570]) -Greedy action tensor([ 0.0859, -0.5999, -0.2718, 0.8647]) tensor([0.2282, 0.1150, 0.1596, 0.4972]) -Greedy action tensor([ 1.3806, -0.6172, -0.2302, -0.0768]) tensor([0.6377, 0.0865, 0.1274, 0.1485]) -Greedy action tensor([ 0.0890, -0.0626, 1.7753, -0.6649]) tensor([0.1294, 0.1112, 0.6986, 0.0609]) -Greedy action tensor([-0.8240, -1.7040, 0.1426, -0.3615]) tensor([0.1776, 0.0737, 0.4668, 0.2820]) -Greedy action tensor([-0.0292, -0.4508, 0.1522, 0.0308]) tensor([0.2553, 0.1675, 0.3061, 0.2711]) -Greedy action tensor([ 0.6231, -0.7101, 1.3625, 0.0411]) tensor([0.2553, 0.0673, 0.5347, 0.1427]) -Greedy action tensor([ 0.9066, 0.4031, 0.1999, -0.5922]) tensor([0.4308, 0.2604, 0.2125, 0.0962]) -Greedy action tensor([ 0.2610, -0.6434, 0.4569, -0.3247]) tensor([0.3147, 0.1274, 0.3828, 0.1752]) -Greedy action tensor([ 0.9570, 0.1892, 0.7569, -0.5897]) tensor([0.4007, 0.1859, 0.3280, 0.0853]) -Greedy action tensor([-0.1986, -0.4810, -0.3641, 0.5567]) tensor([0.2114, 0.1594, 0.1792, 0.4500]) -Greedy action tensor([ 1.4231, -0.0830, 0.0680, -0.4326]) tensor([0.6112, 0.1356, 0.1577, 0.0956]) -Greedy action tensor([ 0.9628, -0.3195, -0.3997, -0.6159]) tensor([0.5748, 0.1595, 0.1472, 0.1185]) -Greedy action tensor([1.0312, 0.3622, 0.2407, 0.9308]) tensor([0.3484, 0.1784, 0.1580, 0.3151]) -Greedy action tensor([ 0.4514, -1.4822, -0.0599, 0.1961]) tensor([0.3970, 0.0574, 0.2381, 0.3075]) -Greedy action tensor([-0.3727, -0.3960, -0.1878, -0.9848]) tensor([0.2686, 0.2625, 0.3232, 0.1457]) -Greedy action tensor([ 0.9187, -0.5711, -0.6448, 1.7302]) tensor([0.2713, 0.0612, 0.0568, 0.6108]) -Greedy action tensor([-1.4944, -0.4632, 1.0112, -1.5425]) tensor([0.0588, 0.1649, 0.7203, 0.0560]) -Greedy action tensor([-0.0368, -0.9936, -0.0583, -0.3784]) tensor([0.3254, 0.1250, 0.3184, 0.2312]) -Greedy action tensor([ 0.6465, -1.0156, -1.1104, 0.4163]) tensor([0.4637, 0.0880, 0.0800, 0.3683]) -Greedy action tensor([-0.1239, 0.3436, 1.3368, -0.3628]) tensor([0.1300, 0.2075, 0.5601, 0.1024]) -Greedy action tensor([ 2.0763, -0.8623, 1.7429, 1.3673]) tensor([0.4422, 0.0234, 0.3168, 0.2176]) -Greedy action tensor([-1.8663, -0.3918, 0.6310, -0.1411]) tensor([0.0432, 0.1889, 0.5252, 0.2427]) -Greedy action tensor([-1.0274, 0.1450, 0.1786, 0.0696]) tensor([0.0947, 0.3057, 0.3161, 0.2835]) -Greedy action tensor([-1.5624, -0.0913, 0.4467, -0.0662]) tensor([0.0579, 0.2520, 0.4316, 0.2585]) -Greedy action tensor([-1.3745, -0.4167, 0.3504, 0.0673]) tensor([0.0744, 0.1938, 0.4174, 0.3145]) -Greedy action tensor([-1.4389, -0.5820, 0.6354, 0.2313]) tensor([0.0601, 0.1417, 0.4787, 0.3195]) -Greedy action tensor([-1.8020, -0.3525, 0.5884, -0.1157]) tensor([0.0463, 0.1975, 0.5060, 0.2502]) -Greedy action tensor([-0.7016, 0.6358, -0.0516, 0.4260]) tensor([0.1019, 0.3882, 0.1952, 0.3147]) -Greedy action tensor([-0.9250, 0.3975, 0.2169, 0.4776]) tensor([0.0837, 0.3140, 0.2621, 0.3402]) -Greedy action tensor([-1.3712, 0.6900, 0.2311, 0.2487]) tensor([0.0530, 0.4163, 0.2630, 0.2677]) -Greedy action tensor([-1.4279, -0.1846, 0.5429, 0.1975]) tensor([0.0598, 0.2073, 0.4291, 0.3038]) -Greedy action tensor([-1.8761, -0.2914, 0.6170, -0.1382]) tensor([0.0423, 0.2062, 0.5113, 0.2403]) -Greedy action tensor([-1.6914, -0.3667, 0.6318, -0.2819]) tensor([0.0525, 0.1973, 0.5355, 0.2148]) -Greedy action tensor([-0.9806, -0.6156, 0.2635, 0.0766]) tensor([0.1138, 0.1639, 0.3948, 0.3275]) -Greedy action tensor([-1.8307, -0.2017, 0.5938, -0.0421]) tensor([0.0428, 0.2181, 0.4832, 0.2559]) -Greedy action tensor([-1.6755, 0.2966, 0.4680, 0.0432]) tensor([0.0449, 0.3224, 0.3826, 0.2502]) -Greedy action tensor([-1.9405, -0.4371, 0.6632, -0.1777]) tensor([0.0403, 0.1810, 0.5441, 0.2346]) -Greedy action tensor([-1.8907, -0.4538, 0.6408, -0.1520]) tensor([0.0426, 0.1793, 0.5357, 0.2424]) -Greedy action tensor([0.7826, 0.1390, 0.6880, 1.5585]) tensor([0.2170, 0.1140, 0.1974, 0.4715]) -Greedy action tensor([-1.8961, -0.4518, 0.6450, -0.1592]) tensor([0.0424, 0.1795, 0.5376, 0.2405]) -Greedy action tensor([-1.9245, -0.4437, 0.6595, -0.1687]) tensor([0.0409, 0.1799, 0.5423, 0.2369]) -Greedy action tensor([-1.8416, -0.3726, 0.5825, -0.1121]) tensor([0.0449, 0.1951, 0.5069, 0.2531]) -Greedy action tensor([-1.7824, -0.6354, 0.8697, -0.0318]) tensor([0.0415, 0.1307, 0.5888, 0.2390]) -Greedy action tensor([-1.3165, 0.6383, 0.2257, 0.1171]) tensor([0.0591, 0.4171, 0.2761, 0.2477]) -Greedy action tensor([-1.8618, -0.2853, 0.6076, -0.1410]) tensor([0.0430, 0.2082, 0.5084, 0.2405]) -Greedy action tensor([-1.7815, -0.3375, 0.5695, -0.0739]) tensor([0.0471, 0.1994, 0.4939, 0.2596]) -Greedy action tensor([-1.7842, -0.4220, 0.6711, -0.0912]) tensor([0.0455, 0.1776, 0.5298, 0.2472]) -Greedy action tensor([-1.8894, -0.4644, 0.6323, -0.1477]) tensor([0.0429, 0.1783, 0.5340, 0.2448]) -Greedy action tensor([-1.8201, -0.4889, 0.5987, -0.1120]) tensor([0.0464, 0.1758, 0.5216, 0.2562]) -Greedy action tensor([-1.2834, -0.1118, 0.4478, 0.3419]) tensor([0.0669, 0.2158, 0.3776, 0.3397]) -Greedy action tensor([-1.4504, -0.1590, 0.5820, 0.1236]) tensor([0.0585, 0.2128, 0.4464, 0.2823]) -Greedy action tensor([-1.8177, -0.1872, 0.5660, -0.1272]) tensor([0.0447, 0.2282, 0.4847, 0.2424]) -Greedy action tensor([-1.8467, -0.3985, 0.6311, -0.1200]) tensor([0.0439, 0.1867, 0.5227, 0.2467]) -Greedy action tensor([-1.8766, -0.3862, 0.6365, -0.1265]) tensor([0.0425, 0.1886, 0.5244, 0.2445]) -Greedy action tensor([-0.7918, -0.5785, 0.1548, 0.3684]) tensor([0.1249, 0.1546, 0.3219, 0.3986]) -Greedy action tensor([-1.6790, -0.1243, 0.6169, 0.0434]) tensor([0.0470, 0.2226, 0.4671, 0.2632]) -Greedy action tensor([-0.9601, -0.0395, 0.4406, 0.5138]) tensor([0.0838, 0.2104, 0.3400, 0.3658]) -Greedy action tensor([-1.6153, 0.0137, 0.4307, -0.0273]) tensor([0.0534, 0.2722, 0.4131, 0.2613]) -Greedy action tensor([-1.8997, -0.3651, 0.6368, -0.1425]) tensor([0.0415, 0.1927, 0.5249, 0.2408]) -Greedy action tensor([-1.9090, -0.3768, 0.6413, -0.1537]) tensor([0.0413, 0.1911, 0.5288, 0.2388]) -Greedy action tensor([-1.8911, -0.3960, 0.6347, -0.1476]) tensor([0.0422, 0.1883, 0.5280, 0.2415]) -Greedy action tensor([-1.8580, -0.4312, 0.6225, -0.1404]) tensor([0.0441, 0.1836, 0.5267, 0.2456]) -Greedy action tensor([-1.7773, -0.4184, 0.5752, -0.0928]) tensor([0.0481, 0.1872, 0.5056, 0.2592]) -Greedy action tensor([-1.8977, -0.4771, 0.6478, -0.1527]) tensor([0.0423, 0.1753, 0.5399, 0.2425]) -Greedy action tensor([-1.9349, -0.4458, 0.6601, -0.1771]) tensor([0.0406, 0.1800, 0.5439, 0.2355]) -Greedy action tensor([-1.7968, -0.2271, 0.5324, -0.0779]) tensor([0.0462, 0.2219, 0.4743, 0.2576]) -Greedy action tensor([-1.7906, -0.1887, 0.5576, -0.1068]) tensor([0.0458, 0.2275, 0.4798, 0.2469]) -Greedy action tensor([-1.9298, -0.4494, 0.6894, -0.1347]) tensor([0.0398, 0.1748, 0.5460, 0.2395]) -Greedy action tensor([-1.3950, -0.2203, 0.5906, 0.1568]) tensor([0.0616, 0.1993, 0.4485, 0.2906]) -Greedy action tensor([-1.7286, -0.3468, 0.6667, -0.0132]) tensor([0.0465, 0.1851, 0.5100, 0.2584]) -Greedy action tensor([-1.8517, -0.1881, 0.5745, -0.1292]) tensor([0.0431, 0.2276, 0.4879, 0.2414]) -Greedy action tensor([-1.7058, -0.4915, 0.5421, -0.0081]) tensor([0.0518, 0.1745, 0.4906, 0.2830]) -Greedy action tensor([-1.7516, -0.1180, 0.5736, -0.0655]) tensor([0.0460, 0.2355, 0.4703, 0.2482]) -Greedy action tensor([-1.8962, -0.2072, 0.6062, -0.1560]) tensor([0.0411, 0.2226, 0.5021, 0.2343]) -Greedy action tensor([-1.9238, -0.4468, 0.6538, -0.1663]) tensor([0.0411, 0.1799, 0.5408, 0.2382]) -Greedy action tensor([-1.6940, -0.1689, 0.5925, 0.0055]) tensor([0.0478, 0.2198, 0.4707, 0.2617]) -Greedy action tensor([-1.9431, -0.4484, 0.6666, -0.1796]) tensor([0.0402, 0.1791, 0.5463, 0.2344]) -Greedy action tensor([-0.4484, -0.4303, 0.2255, 0.2718]) tensor([0.1657, 0.1687, 0.3251, 0.3405]) -Greedy action tensor([-1.6502, 0.0094, 0.4726, 0.0450]) tensor([0.0499, 0.2621, 0.4165, 0.2716]) -Greedy action tensor([-1.6035, -0.3267, 0.4270, 0.0493]) tensor([0.0574, 0.2058, 0.4372, 0.2997]) -Greedy action tensor([-1.9423, -0.4457, 0.6660, -0.1794]) tensor([0.0402, 0.1796, 0.5458, 0.2344]) -Greedy action tensor([-1.9067, -0.3896, 0.6483, -0.1583]) tensor([0.0414, 0.1886, 0.5324, 0.2377]) -Greedy action tensor([-1.9114, -0.3616, 0.6447, -0.1575]) tensor([0.0410, 0.1933, 0.5287, 0.2370]) -Greedy action tensor([-1.8306, -0.3356, 0.6086, -0.1066]) tensor([0.0444, 0.1979, 0.5088, 0.2489]) -Greedy action tensor([-1.4490, 0.5791, 0.2801, 0.1224]) tensor([0.0525, 0.3990, 0.2958, 0.2527]) -Greedy action tensor([-1.9097, -0.4235, 0.6487, -0.1613]) tensor([0.0415, 0.1836, 0.5363, 0.2386]) -Greedy action tensor([-1.9124, -0.4683, 0.6482, -0.1645]) tensor([0.0418, 0.1771, 0.5410, 0.2400]) -Greedy action tensor([-1.7944, -0.3539, 0.6018, -0.0750]) tensor([0.0459, 0.1938, 0.5041, 0.2562]) -Greedy action tensor([-1.5296, -0.4751, 0.5084, 0.1842]) tensor([0.0585, 0.1679, 0.4490, 0.3246]) -Greedy action tensor([-1.8175, -0.4558, 0.6339, -0.0345]) tensor([0.0445, 0.1738, 0.5168, 0.2649]) -Greedy action tensor([-1.8066, -0.4244, 0.5968, -0.1025]) tensor([0.0464, 0.1849, 0.5135, 0.2552]) -Greedy action tensor([-1.9313, -0.4518, 0.6607, -0.1771]) tensor([0.0408, 0.1790, 0.5446, 0.2356]) -Greedy action tensor([-0.9581, 0.9200, 0.1447, 0.3833]) tensor([0.0696, 0.4549, 0.2095, 0.2660]) -Greedy action tensor([-1.9085, -0.4339, 0.6492, -0.1528]) tensor([0.0416, 0.1816, 0.5364, 0.2405]) -Greedy action tensor([-1.8664, -0.3671, 0.6314, -0.1353]) tensor([0.0430, 0.1924, 0.5221, 0.2426]) -Greedy action tensor([-1.9434, -0.4499, 0.6686, -0.1799]) tensor([0.0401, 0.1787, 0.5470, 0.2341]) -Greedy action tensor([-1.2115, -0.0898, 0.4442, 0.6763]) tensor([0.0629, 0.1929, 0.3291, 0.4151]) -Greedy action tensor([-1.2424, 0.0227, 0.3132, 0.0231]) tensor([0.0780, 0.2763, 0.3694, 0.2764]) -Greedy action tensor([-1.6901, -0.1023, 0.4861, -0.0679]) tensor([0.0506, 0.2475, 0.4458, 0.2562]) -Greedy action tensor([-1.7907, -0.4183, 0.6076, -0.0894]) tensor([0.0467, 0.1841, 0.5135, 0.2558]) -Greedy action tensor([-1.9063, -0.3176, 0.6186, -0.1484]) tensor([0.0413, 0.2025, 0.5164, 0.2398]) -Greedy action tensor([-1.7270, -0.1241, 0.5533, -0.0206]) tensor([0.0470, 0.2337, 0.4601, 0.2592]) -Greedy action tensor([ 1.1769, -0.3801, -0.4277, 0.1984]) tensor([0.5594, 0.1179, 0.1124, 0.2103]) -Greedy action tensor([ 1.5721, -0.4635, -0.4912, -0.0469]) tensor([0.6869, 0.0897, 0.0873, 0.1361]) -Greedy action tensor([ 1.4886, -0.1540, -0.4001, 0.2126]) tensor([0.6158, 0.1191, 0.0931, 0.1719]) -Greedy action tensor([ 1.7387, -0.3149, -0.5868, 0.4633]) tensor([0.6643, 0.0852, 0.0649, 0.1856]) -Greedy action tensor([ 1.5673, -0.6293, -0.5193, 0.8676]) tensor([0.5773, 0.0642, 0.0717, 0.2868]) -Greedy action tensor([ 1.5520, -0.6916, -0.4260, 0.9174]) tensor([0.5635, 0.0598, 0.0780, 0.2988]) -Greedy action tensor([ 1.7593, -0.4786, -0.1702, -0.0929]) tensor([0.7098, 0.0757, 0.1031, 0.1114]) -Greedy action tensor([ 1.4379, -0.5727, -0.2773, 0.1983]) tensor([0.6237, 0.0835, 0.1122, 0.1806]) -Greedy action tensor([ 1.4185, -0.2472, -0.4687, 0.3097]) tensor([0.5986, 0.1132, 0.0907, 0.1975]) -Greedy action tensor([ 1.3317, 0.0103, -1.1217, 0.6155]) tensor([0.5431, 0.1449, 0.0467, 0.2653]) -Greedy action tensor([ 1.5125, -0.1715, -0.6362, 0.5362]) tensor([0.5956, 0.1106, 0.0695, 0.2244]) -Greedy action tensor([ 1.4279, 0.3584, -1.4033, 0.6032]) tensor([0.5433, 0.1865, 0.0320, 0.2382]) -Greedy action tensor([ 1.5436, -0.3572, 0.0709, 0.3545]) tensor([0.5941, 0.0888, 0.1362, 0.1809]) -Greedy action tensor([ 1.6880, -0.5080, -0.5958, 0.1706]) tensor([0.6981, 0.0777, 0.0711, 0.1531]) -Greedy action tensor([ 2.0475, -0.7083, -0.2290, 0.5370]) tensor([0.7210, 0.0458, 0.0740, 0.1592]) -Greedy action tensor([ 1.8031, -1.2682, -0.1962, 0.7886]) tensor([0.6475, 0.0300, 0.0877, 0.2348]) -Greedy action tensor([ 2.0493, 0.0948, -0.2276, 0.3100]) tensor([0.7043, 0.0998, 0.0723, 0.1237]) -Greedy action tensor([ 1.4884, -0.6328, -0.2909, 0.2944]) tensor([0.6283, 0.0753, 0.1060, 0.1904]) -Greedy action tensor([ 1.6589, -0.9908, -0.1943, 0.6597]) tensor([0.6267, 0.0443, 0.0982, 0.2307]) -Greedy action tensor([ 1.3692, -0.1905, -0.2666, -0.0474]) tensor([0.6070, 0.1276, 0.1182, 0.1472]) -Greedy action tensor([ 2.0699, -0.3788, -0.4130, 0.4901]) tensor([0.7268, 0.0628, 0.0607, 0.1497]) -Greedy action tensor([ 0.8825, -0.6211, -0.0442, -0.1310]) tensor([0.5048, 0.1122, 0.1998, 0.1832]) -Greedy action tensor([ 1.7209, -0.1807, -0.3891, 0.4662]) tensor([0.6428, 0.0960, 0.0779, 0.1833]) -Greedy action tensor([ 1.5108, -0.7605, -0.4029, 0.5116]) tensor([0.6177, 0.0637, 0.0911, 0.2274]) -Greedy action tensor([ 1.0925, 0.0893, -1.1652, 0.1215]) tensor([0.5405, 0.1982, 0.0565, 0.2047]) -Greedy action tensor([ 1.4309, -0.5693, -0.4384, 0.6569]) tensor([0.5712, 0.0773, 0.0881, 0.2634]) -Greedy action tensor([ 1.7221, -0.5285, -0.3232, 0.3248]) tensor([0.6748, 0.0711, 0.0873, 0.1668]) -Greedy action tensor([ 1.1776, -0.3890, 0.1317, 0.0424]) tensor([0.5315, 0.1109, 0.1868, 0.1708]) -Greedy action tensor([ 1.1469, -0.3111, -0.2910, 0.5676]) tensor([0.4925, 0.1146, 0.1169, 0.2760]) -Greedy action tensor([ 0.9395, -0.6048, -0.0430, 0.2696]) tensor([0.4763, 0.1017, 0.1783, 0.2437]) -Greedy action tensor([ 1.2324, -0.0440, -0.2467, -0.0986]) tensor([0.5646, 0.1576, 0.1286, 0.1492]) -Greedy action tensor([ 1.2429, 0.0606, -0.0856, 0.2975]) tensor([0.5102, 0.1564, 0.1351, 0.1982]) -Greedy action tensor([ 1.7292, -0.3853, -0.4974, 0.3853]) tensor([0.6714, 0.0810, 0.0724, 0.1751]) -Greedy action tensor([ 1.3554, -0.7439, -0.0603, 0.6655]) tensor([0.5356, 0.0656, 0.1300, 0.2687]) -Greedy action tensor([ 1.6539, -0.6754, -0.1881, 0.2924]) tensor([0.6613, 0.0644, 0.1048, 0.1695]) -Greedy action tensor([ 1.4332, -1.0761, -0.0835, -0.0880]) tensor([0.6582, 0.0535, 0.1444, 0.1438]) -Greedy action tensor([ 1.0732, -0.1485, -0.6020, 0.2666]) tensor([0.5186, 0.1528, 0.0971, 0.2315]) -Greedy action tensor([ 1.1269, -0.0599, -0.5194, 0.0483]) tensor([0.5440, 0.1661, 0.1049, 0.1850]) -Greedy action tensor([ 1.2194, -0.5965, 0.0175, 0.3427]) tensor([0.5321, 0.0866, 0.1599, 0.2214]) -Greedy action tensor([ 1.4289, -0.2847, -0.4878, -0.0633]) tensor([0.6443, 0.1161, 0.0948, 0.1449]) -Greedy action tensor([ 1.4595, -0.4826, -0.1907, 0.2934]) tensor([0.6072, 0.0871, 0.1166, 0.1892]) -Greedy action tensor([ 1.7263, -0.8117, -0.5886, -0.0933]) tensor([0.7463, 0.0590, 0.0737, 0.1210]) -Greedy action tensor([ 1.6728, -0.9842, -0.7565, -0.2100]) tensor([0.7631, 0.0535, 0.0672, 0.1161]) -Greedy action tensor([ 1.4723, -0.0015, -0.6749, 0.5064]) tensor([0.5792, 0.1327, 0.0677, 0.2205]) -Greedy action tensor([ 2.4356, -1.4134, -0.2968, 1.1018]) tensor([0.7408, 0.0158, 0.0482, 0.1952]) -Greedy action tensor([ 1.3046, -1.0126, -0.2892, 0.4044]) tensor([0.5854, 0.0577, 0.1189, 0.2380]) -Greedy action tensor([ 1.2701, -0.4521, -0.3441, 0.4259]) tensor([0.5532, 0.0988, 0.1101, 0.2378]) -Greedy action tensor([ 1.2620, -0.3083, -0.3867, 0.1401]) tensor([0.5794, 0.1205, 0.1114, 0.1887]) -Greedy action tensor([ 2.3432, -1.0198, -0.3998, 0.5187]) tensor([0.7935, 0.0275, 0.0511, 0.1280]) -Greedy action tensor([ 1.1277, -0.2993, -0.1527, -0.0794]) tensor([0.5503, 0.1321, 0.1530, 0.1646]) -Greedy action tensor([ 1.3471, -0.4168, -0.3873, 0.4309]) tensor([0.5721, 0.0980, 0.1010, 0.2289]) -Greedy action tensor([ 1.4772, -0.6717, 0.1827, 0.4714]) tensor([0.5693, 0.0664, 0.1560, 0.2082]) -Greedy action tensor([ 1.2283, -0.2976, -0.2966, 0.1653]) tensor([0.5617, 0.1221, 0.1222, 0.1940]) -Greedy action tensor([ 1.1776, -0.0413, -0.2272, 0.1718]) tensor([0.5245, 0.1550, 0.1287, 0.1918]) -Greedy action tensor([ 1.9178, -0.9479, -0.4105, 0.8192]) tensor([0.6722, 0.0383, 0.0655, 0.2241]) -Greedy action tensor([ 1.4251, -0.4659, -0.0420, 0.0376]) tensor([0.6130, 0.0925, 0.1414, 0.1531]) -Greedy action tensor([ 1.8464, -0.7954, -0.3601, 0.6830]) tensor([0.6695, 0.0477, 0.0737, 0.2091]) -Greedy action tensor([ 1.6386, 0.1716, -0.3139, 0.4977]) tensor([0.5910, 0.1363, 0.0839, 0.1888]) -Greedy action tensor([ 3.1437, -2.1875, -0.1913, 0.5693]) tensor([0.8955, 0.0043, 0.0319, 0.0682]) -Greedy action tensor([ 1.0518, -0.4531, -0.1768, 0.1951]) tensor([0.5157, 0.1145, 0.1509, 0.2189]) -Greedy action tensor([ 1.5120, -0.6576, -0.4020, 0.2876]) tensor([0.6428, 0.0734, 0.0948, 0.1889]) -Greedy action tensor([ 2.1853, -1.0540, 0.2306, 0.9716]) tensor([0.6766, 0.0265, 0.0958, 0.2010]) -Greedy action tensor([ 1.7680, -0.2196, -0.4057, 0.3526]) tensor([0.6695, 0.0917, 0.0762, 0.1626]) -Greedy action tensor([ 0.8831, -0.4295, -0.2259, 0.2998]) tensor([0.4636, 0.1248, 0.1529, 0.2587]) -Greedy action tensor([ 1.1154, -0.0924, -0.5681, 0.3464]) tensor([0.5133, 0.1534, 0.0953, 0.2379]) -Greedy action tensor([ 1.7165, 0.0038, -0.0559, -0.5635]) tensor([0.6884, 0.1242, 0.1170, 0.0704]) -Greedy action tensor([ 1.1962, -0.0172, -0.6198, 0.3737]) tensor([0.5265, 0.1565, 0.0857, 0.2313]) -Greedy action tensor([ 1.2815, -0.2977, -0.2603, 0.0497]) tensor([0.5842, 0.1204, 0.1250, 0.1704]) -Greedy action tensor([ 2.3443, -1.3189, -0.0024, 1.0013]) tensor([0.7234, 0.0186, 0.0692, 0.1888]) -Greedy action tensor([ 0.9038, -0.2997, -0.4345, 0.5548]) tensor([0.4410, 0.1324, 0.1157, 0.3110]) -Greedy action tensor([ 1.6393, -0.8079, -0.2607, 0.6478]) tensor([0.6222, 0.0538, 0.0931, 0.2309]) -Greedy action tensor([ 0.8574, -0.1304, -0.3518, 0.5162]) tensor([0.4199, 0.1564, 0.1253, 0.2985]) -Greedy action tensor([ 1.6783, -1.3247, -0.1011, -0.1070]) tensor([0.7214, 0.0358, 0.1217, 0.1210]) -Greedy action tensor([ 0.3992, 0.0859, -0.5036, 0.8037]) tensor([0.2751, 0.2011, 0.1115, 0.4123]) -Greedy action tensor([ 2.5268, 0.4848, -0.0907, 0.4634]) tensor([0.7520, 0.0976, 0.0549, 0.0955]) -Greedy action tensor([ 1.1751, -0.9232, -0.0827, 0.1311]) tensor([0.5685, 0.0697, 0.1616, 0.2001]) -Greedy action tensor([ 1.2295, -0.0402, -0.4179, 0.2155]) tensor([0.5446, 0.1530, 0.1049, 0.1976]) -Greedy action tensor([ 1.5234, -1.0205, -0.3470, 0.1965]) tensor([0.6676, 0.0524, 0.1028, 0.1771]) -Greedy action tensor([ 1.5700, -0.3168, -0.5900, 0.4303]) tensor([0.6302, 0.0955, 0.0727, 0.2016]) -Greedy action tensor([ 1.9049, -0.7826, -0.5962, 0.5538]) tensor([0.7097, 0.0483, 0.0582, 0.1838]) -Greedy action tensor([ 1.2818, -0.7651, -0.1845, 0.3787]) tensor([0.5665, 0.0732, 0.1307, 0.2296]) -Greedy action tensor([ 0.6788, -0.4106, -0.1410, -0.3704]) tensor([0.4701, 0.1582, 0.2071, 0.1646]) -Greedy action tensor([ 0.7353, -0.2930, 0.0368, -0.4608]) tensor([0.4635, 0.1658, 0.2305, 0.1402]) -Greedy action tensor([ 0.9468, -0.6610, 0.1952, -0.6196]) tensor([0.5317, 0.1065, 0.2508, 0.1110]) -Greedy action tensor([ 1.1416, -0.5591, -0.1826, -0.3687]) tensor([0.5990, 0.1094, 0.1593, 0.1323]) -Greedy action tensor([ 0.7423, -0.6016, 0.1050, -0.6182]) tensor([0.4887, 0.1275, 0.2584, 0.1254]) -Greedy action tensor([ 0.6776, -0.2951, -0.0416, -0.1347]) tensor([0.4331, 0.1637, 0.2110, 0.1922]) -Greedy action tensor([ 0.4716, -0.3032, 0.0946, -0.3992]) tensor([0.3898, 0.1796, 0.2674, 0.1632]) -Greedy action tensor([ 0.5720, -0.0395, 0.2899, -0.2161]) tensor([0.3634, 0.1972, 0.2741, 0.1653]) -Greedy action tensor([ 0.5725, -0.0379, -0.1402, -0.1802]) tensor([0.3993, 0.2169, 0.1958, 0.1881]) -Greedy action tensor([ 0.4707, -0.0653, 0.2434, -0.4020]) tensor([0.3572, 0.2090, 0.2846, 0.1492]) -Greedy action tensor([ 0.6751, -0.6516, -0.1777, -0.1301]) tensor([0.4676, 0.1241, 0.1993, 0.2090]) -Greedy action tensor([ 0.6075, -0.5445, -0.1312, -0.3958]) tensor([0.4629, 0.1463, 0.2211, 0.1697]) -Greedy action tensor([ 0.5833, -0.4185, -0.1695, -0.4481]) tensor([0.4556, 0.1673, 0.2146, 0.1624]) -Greedy action tensor([ 0.5168, -0.1589, 0.0611, -0.2611]) tensor([0.3843, 0.1955, 0.2436, 0.1765]) -Greedy action tensor([ 0.5188, -0.3694, 0.0638, -0.1971]) tensor([0.3946, 0.1623, 0.2503, 0.1928]) -Greedy action tensor([ 0.5029, 0.0252, -0.1468, 0.0549]) tensor([0.3595, 0.2230, 0.1878, 0.2297]) -Greedy action tensor([ 0.6091, -0.2274, -0.0582, -0.4168]) tensor([0.4339, 0.1880, 0.2226, 0.1555]) -Greedy action tensor([ 0.3311, 0.1848, -0.2664, 0.1083]) tensor([0.3111, 0.2688, 0.1712, 0.2490]) -Greedy action tensor([ 1.0604, -0.6741, 0.0642, -0.7262]) tensor([0.5837, 0.1030, 0.2155, 0.0978]) -Greedy action tensor([ 0.6778, -0.6415, 0.0913, -0.3834]) tensor([0.4609, 0.1232, 0.2564, 0.1595]) -Greedy action tensor([ 0.6346, -0.4295, -0.0342, -0.3015]) tensor([0.4445, 0.1534, 0.2277, 0.1743]) -Greedy action tensor([ 0.7568, -0.2156, -0.0583, -0.0743]) tensor([0.4432, 0.1676, 0.1962, 0.1930]) -Greedy action tensor([ 1.4875, -0.9376, -0.0952, -0.5011]) tensor([0.6989, 0.0618, 0.1436, 0.0957]) -Greedy action tensor([ 0.2395, -0.1415, -0.0602, -0.3534]) tensor([0.3359, 0.2295, 0.2489, 0.1857]) -Greedy action tensor([ 1.1543, -0.4466, -0.1361, -0.1910]) tensor([0.5756, 0.1161, 0.1584, 0.1499]) -Greedy action tensor([ 1.0508, -0.2222, 0.0747, -0.1073]) tensor([0.5074, 0.1421, 0.1912, 0.1594]) -Greedy action tensor([ 0.7094, -0.1817, -0.0627, -0.1360]) tensor([0.4345, 0.1782, 0.2007, 0.1866]) -Greedy action tensor([ 1.0031, -0.4171, 0.2214, -0.5657]) tensor([0.5242, 0.1267, 0.2399, 0.1092]) -Greedy action tensor([ 0.4237, -0.0223, -0.0814, 0.0619]) tensor([0.3401, 0.2177, 0.2052, 0.2369]) -Greedy action tensor([ 0.7984, -0.8695, 0.0993, -0.3880]) tensor([0.5023, 0.0947, 0.2496, 0.1533]) -Greedy action tensor([ 1.1279, -1.0388, 0.0400, -0.5291]) tensor([0.6089, 0.0698, 0.2052, 0.1161]) -Greedy action tensor([ 0.2773, -0.2600, -0.1546, -0.0577]) tensor([0.3391, 0.1982, 0.2202, 0.2426]) -Greedy action tensor([ 0.1115, 0.3104, -0.2846, -0.4036]) tensor([0.2865, 0.3495, 0.1928, 0.1712]) -Greedy action tensor([ 0.8630, -0.3968, -0.1207, -0.4658]) tensor([0.5202, 0.1476, 0.1945, 0.1377]) -Greedy action tensor([ 0.7784, -0.4396, -0.1516, -0.2455]) tensor([0.4879, 0.1443, 0.1925, 0.1752]) -Greedy action tensor([ 0.3492, 0.3496, -0.2713, 0.0165]) tensor([0.3072, 0.3073, 0.1652, 0.2203]) -Greedy action tensor([ 0.5415, -0.2732, -0.1084, -0.1517]) tensor([0.4057, 0.1796, 0.2118, 0.2028]) -Greedy action tensor([ 0.4588, -0.2510, 0.0697, -0.3822]) tensor([0.3845, 0.1891, 0.2606, 0.1658]) -Greedy action tensor([ 0.6860, -0.8962, -0.0428, -0.3001]) tensor([0.4852, 0.0997, 0.2341, 0.1810]) -Greedy action tensor([ 0.7777, -0.1721, -0.0759, -0.0803]) tensor([0.4471, 0.1729, 0.1904, 0.1896]) -Greedy action tensor([ 0.5875, 0.2777, -0.1198, -0.0142]) tensor([0.3604, 0.2644, 0.1777, 0.1975]) -Greedy action tensor([ 0.4731, 0.1589, 0.0750, -0.1349]) tensor([0.3394, 0.2479, 0.2279, 0.1848]) -Greedy action tensor([ 1.0521, -0.7502, -0.0276, -0.5817]) tensor([0.5883, 0.0970, 0.1998, 0.1148]) -Greedy action tensor([ 0.4393, -0.6162, -0.2069, -0.0880]) tensor([0.4061, 0.1413, 0.2128, 0.2397]) -Greedy action tensor([ 0.6560, -0.3376, 0.0318, -0.1326]) tensor([0.4236, 0.1569, 0.2270, 0.1925]) -Greedy action tensor([ 1.0199, -0.6860, 0.0399, -0.4152]) tensor([0.5571, 0.1012, 0.2091, 0.1326]) -Greedy action tensor([ 0.5650, -0.1640, -0.0550, -0.0385]) tensor([0.3895, 0.1879, 0.2095, 0.2130]) -Greedy action tensor([ 0.7491, -0.4556, 0.0251, -0.3576]) tensor([0.4728, 0.1417, 0.2292, 0.1563]) -Greedy action tensor([ 0.2282, -0.0672, 0.0112, -0.3157]) tensor([0.3195, 0.2378, 0.2572, 0.1855]) -Greedy action tensor([ 0.5567, -0.4630, -0.0627, -0.1899]) tensor([0.4214, 0.1520, 0.2268, 0.1997]) -Greedy action tensor([ 0.8712, -0.3753, -0.0307, -0.1267]) tensor([0.4850, 0.1394, 0.1968, 0.1788]) -Greedy action tensor([ 1.2143, -0.9676, -0.0660, -0.8349]) tensor([0.6581, 0.0742, 0.1829, 0.0848]) -Greedy action tensor([ 0.3951, -0.2106, -0.0338, -0.1660]) tensor([0.3613, 0.1972, 0.2353, 0.2062]) -Greedy action tensor([ 0.7279, -0.5139, 0.0215, -0.0545]) tensor([0.4465, 0.1290, 0.2203, 0.2042]) -Greedy action tensor([ 0.8537, -0.7805, 0.0765, -0.8785]) tensor([0.5460, 0.1065, 0.2510, 0.0966]) -Greedy action tensor([ 0.4633, -0.2902, 0.0091, -0.1375]) tensor([0.3768, 0.1774, 0.2392, 0.2066]) -Greedy action tensor([ 1.0494, -0.5934, 0.0232, -0.8610]) tensor([0.5883, 0.1138, 0.2108, 0.0871]) -Greedy action tensor([ 0.8756, -0.2831, -0.1139, -0.4103]) tensor([0.5097, 0.1600, 0.1895, 0.1409]) -Greedy action tensor([ 1.2620, -0.8874, -0.0847, -0.7214]) tensor([0.6604, 0.0770, 0.1718, 0.0909]) -Greedy action tensor([ 0.9589, -0.5991, -0.2238, -0.4829]) tensor([0.5703, 0.1201, 0.1748, 0.1349]) -Greedy action tensor([ 0.4497, 0.2122, -0.1498, 0.0096]) tensor([0.3354, 0.2645, 0.1842, 0.2160]) -Greedy action tensor([ 1.0570, -0.2995, -0.0808, -0.4344]) tensor([0.5546, 0.1428, 0.1778, 0.1248]) -Greedy action tensor([ 0.5625, -0.0502, 0.0622, -0.2892]) tensor([0.3884, 0.2105, 0.2355, 0.1657]) -Greedy action tensor([ 0.6073, -0.0603, -0.4802, -0.3639]) tensor([0.4487, 0.2302, 0.1512, 0.1699]) -Greedy action tensor([ 0.6124, -0.3045, -0.0948, -0.3117]) tensor([0.4367, 0.1746, 0.2153, 0.1733]) -Greedy action tensor([ 1.3107, -1.0746, 0.1002, -0.6588]) tensor([0.6538, 0.0602, 0.1948, 0.0912]) -Greedy action tensor([ 0.3282, -0.1521, -0.1368, -0.2724]) tensor([0.3577, 0.2213, 0.2247, 0.1962]) -Greedy action tensor([ 0.6725, -0.5444, -0.0509, -0.3947]) tensor([0.4705, 0.1393, 0.2283, 0.1618]) -Greedy action tensor([ 0.6766, -0.0642, -0.1286, -0.5968]) tensor([0.4538, 0.2163, 0.2028, 0.1270]) -Greedy action tensor([ 0.6984, -0.5212, -0.1025, -0.1281]) tensor([0.4583, 0.1354, 0.2057, 0.2006]) -Greedy action tensor([ 0.6813, -0.3343, -0.1539, -0.4688]) tensor([0.4734, 0.1714, 0.2053, 0.1499]) -Greedy action tensor([ 1.1865, -0.6914, -0.1716, -0.6537]) tensor([0.6374, 0.0975, 0.1639, 0.1012]) -Greedy action tensor([ 0.5231, -0.2890, -0.1248, -0.3064]) tensor([0.4161, 0.1847, 0.2177, 0.1815]) -Greedy action tensor([ 0.8170, -0.4882, 0.0220, -0.6223]) tensor([0.5103, 0.1383, 0.2304, 0.1210]) -Greedy action tensor([ 0.9717, -0.4031, 0.0550, -0.2969]) tensor([0.5171, 0.1308, 0.2068, 0.1454]) -Greedy action tensor([ 0.3363, -0.0414, 0.2295, 0.0523]) tensor([0.2997, 0.2054, 0.2693, 0.2256]) -Greedy action tensor([ 0.8053, -0.4205, -0.0402, -0.2667]) tensor([0.4842, 0.1421, 0.2079, 0.1658]) -Greedy action tensor([ 1.0271, -0.5043, 0.0078, -0.3165]) tensor([0.5441, 0.1176, 0.1963, 0.1419]) -Greedy action tensor([ 0.4890, -0.3245, -0.0375, -0.1883]) tensor([0.3934, 0.1744, 0.2324, 0.1998]) -Greedy action tensor([ 0.5965, -0.0630, 0.0185, -0.0087]) tensor([0.3811, 0.1971, 0.2138, 0.2081]) -Greedy action tensor([ 1.3028, -0.7764, -0.0065, -0.6539]) tensor([0.6509, 0.0814, 0.1757, 0.0920]) -Greedy action tensor([-0.0351, -0.1141, 0.9432, 0.2506]) tensor([0.1691, 0.1562, 0.4497, 0.2250]) -Greedy action tensor([ 1.4511, 0.6007, 0.4860, -0.2579]) tensor([0.5027, 0.2148, 0.1915, 0.0910]) -Greedy action tensor([ 1.4071, -0.8507, 0.2472, 0.3754]) tensor([0.5635, 0.0589, 0.1767, 0.2008]) -Greedy action tensor([ 0.1394, -1.5655, -0.4333, 0.9174]) tensor([0.2549, 0.0463, 0.1438, 0.5550]) -Greedy action tensor([ 0.9433, -0.4772, 1.1143, -0.5640]) tensor([0.3774, 0.0912, 0.4478, 0.0836]) -Greedy action tensor([-0.0990, -1.1337, -0.4685, -0.0630]) tensor([0.3243, 0.1152, 0.2242, 0.3362]) -Greedy action tensor([ 0.5673, -0.5774, 1.6909, 0.4799]) tensor([0.1883, 0.0599, 0.5792, 0.1725]) -Greedy action tensor([ 0.5771, -0.9008, 0.0113, 1.2428]) tensor([0.2672, 0.0610, 0.1518, 0.5200]) -Greedy action tensor([-0.1627, 0.5522, -1.2920, 0.3526]) tensor([0.1984, 0.4055, 0.0641, 0.3321]) -Greedy action tensor([-0.1579, 0.2304, -0.8096, -0.4783]) tensor([0.2687, 0.3962, 0.1400, 0.1950]) -Greedy action tensor([ 0.3455, -0.9176, 0.1778, -0.1462]) tensor([0.3650, 0.1032, 0.3086, 0.2232]) -Greedy action tensor([-0.5061, 0.2599, 0.0430, -1.1918]) tensor([0.1856, 0.3994, 0.3215, 0.0935]) -Greedy action tensor([-0.3266, -0.0208, -0.3042, -1.4259]) tensor([0.2693, 0.3656, 0.2754, 0.0897]) -Greedy action tensor([ 0.3072, -0.1034, 0.3344, -0.2001]) tensor([0.3037, 0.2014, 0.3120, 0.1829]) -Greedy action tensor([-1.3508, 0.0646, 0.5466, 0.1292]) tensor([0.0618, 0.2545, 0.4122, 0.2715]) -Greedy action tensor([0.9464, 0.6924, 0.3277, 0.0696]) tensor([0.3662, 0.2841, 0.1973, 0.1524]) -Greedy action tensor([ 0.6265, -0.2949, 0.4469, -1.1357]) tensor([0.4158, 0.1655, 0.3474, 0.0714]) -Greedy action tensor([ 0.2237, -0.8171, -0.2306, -0.1509]) tensor([0.3737, 0.1320, 0.2373, 0.2570]) -Greedy action tensor([-0.2366, -1.2310, -0.8154, 0.5809]) tensor([0.2384, 0.0882, 0.1336, 0.5399]) -Greedy action tensor([-0.0967, -1.0895, 1.3179, 0.1579]) tensor([0.1476, 0.0547, 0.6073, 0.1904]) -Greedy action tensor([ 0.7828, 0.0721, -0.0404, -0.2534]) tensor([0.4376, 0.2150, 0.1921, 0.1553]) -Greedy action tensor([-0.3084, -2.0915, -0.1552, 0.7040]) tensor([0.1966, 0.0331, 0.2292, 0.5411]) -Greedy action tensor([ 0.6572, -0.0579, -0.0390, 0.0773]) tensor([0.3925, 0.1920, 0.1957, 0.2198]) -Greedy action tensor([ 0.1230, 0.0470, 0.4847, -0.0786]) tensor([0.2392, 0.2217, 0.3435, 0.1956]) -Greedy action tensor([ 0.9968, -0.4064, -0.1064, 0.8735]) tensor([0.4062, 0.0999, 0.1348, 0.3591]) -Greedy action tensor([-1.2438, 1.0888, -0.1522, 0.0347]) tensor([0.0559, 0.5765, 0.1667, 0.2009]) -Greedy action tensor([ 0.0259, -0.3951, 0.0695, -0.0475]) tensor([0.2755, 0.1808, 0.2878, 0.2560]) -Greedy action tensor([ 0.4429, -1.4198, -0.5111, -0.2057]) tensor([0.4847, 0.0752, 0.1867, 0.2534]) -Greedy action tensor([-1.2093, -0.8084, 1.3317, -0.8456]) tensor([0.0602, 0.0898, 0.7635, 0.0865]) -Greedy action tensor([-0.2414, -1.5285, 0.7002, 0.0545]) tensor([0.1929, 0.0532, 0.4946, 0.2593]) -Greedy action tensor([-0.5365, 0.0029, 0.3226, -0.4709]) tensor([0.1628, 0.2791, 0.3843, 0.1738]) -Greedy action tensor([ 0.4468, -0.5415, 0.6324, -0.2534]) tensor([0.3255, 0.1211, 0.3918, 0.1616]) -Greedy action tensor([ 1.4076, -1.7895, -0.4368, 1.1809]) tensor([0.5010, 0.0205, 0.0792, 0.3993]) -Greedy action tensor([-0.0412, -1.0112, 0.6648, -0.6761]) tensor([0.2541, 0.0963, 0.5148, 0.1347]) -Greedy action tensor([ 0.6906, -1.2226, -0.8704, 0.4659]) tensor([0.4638, 0.0685, 0.0974, 0.3704]) -Greedy action tensor([-0.2997, -1.6910, 1.0792, -0.2354]) tensor([0.1591, 0.0396, 0.6317, 0.1696]) -Greedy action tensor([-0.4514, -1.4932, 1.2728, -0.4373]) tensor([0.1254, 0.0442, 0.7032, 0.1272]) -Greedy action tensor([-0.4113, 0.2110, 0.4295, -0.8028]) tensor([0.1707, 0.3181, 0.3958, 0.1154]) -Greedy action tensor([-1.4449, 0.6112, -0.3092, -0.2935]) tensor([0.0663, 0.5179, 0.2063, 0.2096]) -Greedy action tensor([ 0.1973, -0.5849, -0.3706, 0.1103]) tensor([0.3400, 0.1555, 0.1927, 0.3117]) -Greedy action tensor([ 0.8601, -2.4408, -0.5612, -0.3682]) tensor([0.6365, 0.0235, 0.1537, 0.1864]) -Greedy action tensor([-0.6144, -0.1669, -0.5111, -0.5177]) tensor([0.2094, 0.3277, 0.2322, 0.2307]) -Greedy action tensor([-0.3076, 0.2333, 0.6539, -0.5247]) tensor([0.1629, 0.2798, 0.4261, 0.1311]) -Greedy action tensor([ 0.1993, 0.0096, 0.8159, -0.2129]) tensor([0.2303, 0.1905, 0.4267, 0.1525]) -Greedy action tensor([ 1.5505, -0.7204, 0.8170, 1.0972]) tensor([0.4506, 0.0465, 0.2164, 0.2864]) -Greedy action tensor([-0.5079, -0.1271, 1.0235, -0.2025]) tensor([0.1184, 0.1733, 0.5476, 0.1607]) -Greedy action tensor([0.1418, 0.4315, 0.0245, 0.8311]) tensor([0.1917, 0.2560, 0.1704, 0.3818]) -Greedy action tensor([1.0890, 0.1393, 1.1747, 0.0300]) tensor([0.3542, 0.1370, 0.3859, 0.1228]) -Greedy action tensor([ 1.5315, -0.4572, -0.3037, 0.1521]) tensor([0.6459, 0.0884, 0.1031, 0.1626]) -Greedy action tensor([ 0.3179, -1.1411, -0.7886, 0.1627]) tensor([0.4133, 0.0961, 0.1367, 0.3539]) -Greedy action tensor([ 0.2240, -0.3192, -0.8714, 0.4012]) tensor([0.3216, 0.1868, 0.1076, 0.3840]) -Greedy action tensor([ 0.3812, 0.0852, -0.5002, 0.0973]) tensor([0.3435, 0.2555, 0.1423, 0.2586]) -Greedy action tensor([-0.3408, -0.2957, -0.9596, -0.3580]) tensor([0.2803, 0.2932, 0.1510, 0.2755]) -Greedy action tensor([ 0.5131, -0.2041, 0.6819, -0.1727]) tensor([0.3149, 0.1537, 0.3728, 0.1586]) -Greedy action tensor([ 0.3465, -0.0963, 0.3474, -0.0912]) tensor([0.3041, 0.1953, 0.3044, 0.1963]) -Greedy action tensor([ 1.3485, -0.2649, 0.7529, 0.5791]) tensor([0.4517, 0.0900, 0.2490, 0.2093]) -Greedy action tensor([-0.4911, -0.1183, 0.5113, -0.1435]) tensor([0.1517, 0.2202, 0.4133, 0.2148]) -Greedy action tensor([ 0.3521, -1.2558, 0.3737, 0.5142]) tensor([0.2943, 0.0589, 0.3007, 0.3461]) -Greedy action tensor([-0.4590, 0.0431, -1.3161, 0.4615]) tensor([0.1790, 0.2957, 0.0760, 0.4494]) -Greedy action tensor([ 1.4988, -0.0947, 0.1705, 0.5837]) tensor([0.5352, 0.1087, 0.1418, 0.2143]) -Greedy action tensor([-0.8166, -0.9438, 0.2993, -0.9857]) tensor([0.1731, 0.1524, 0.5283, 0.1462]) -Greedy action tensor([1.2820, 0.4812, 0.1275, 0.0504]) tensor([0.4864, 0.2184, 0.1533, 0.1419]) -Greedy action tensor([ 0.2441, -1.1627, -0.4955, -0.2677]) tensor([0.4307, 0.1055, 0.2056, 0.2582]) -Greedy action tensor([ 0.9882, -0.6007, 0.8922, 0.2943]) tensor([0.3828, 0.0781, 0.3478, 0.1913]) -Greedy action tensor([ 0.0299, 0.1389, -0.1259, -0.5143]) tensor([0.2816, 0.3140, 0.2410, 0.1634]) -Greedy action tensor([ 0.6441, -0.7452, -0.0846, -0.0751]) tensor([0.4507, 0.1123, 0.2175, 0.2195]) -Greedy action tensor([-0.0143, -0.5543, 0.3823, -0.4692]) tensor([0.2700, 0.1573, 0.4014, 0.1713]) -Greedy action tensor([1.5152, 0.4386, 0.5121, 1.4390]) tensor([0.3796, 0.1294, 0.1392, 0.3518]) -Greedy action tensor([ 1.0266, -0.3425, -0.1880, 1.0660]) tensor([0.3859, 0.0981, 0.1145, 0.4014]) -Greedy action tensor([-0.0270, -0.3374, -0.5359, 0.1766]) tensor([0.2809, 0.2059, 0.1689, 0.3443]) -Greedy action tensor([-0.6236, -1.5501, 0.5745, 0.7946]) tensor([0.1131, 0.0448, 0.3749, 0.4672]) -Greedy action tensor([ 0.4599, -1.0055, -0.3028, 0.8835]) tensor([0.3101, 0.0716, 0.1446, 0.4737]) -Greedy action tensor([ 1.3327, -0.5006, 0.5549, 0.4577]) tensor([0.4911, 0.0785, 0.2256, 0.2047]) -Greedy action tensor([ 2.0007, -0.7936, 1.1896, 1.2959]) tensor([0.5001, 0.0306, 0.2222, 0.2471]) -Greedy action tensor([-0.8844, -0.7897, 0.0205, -0.1736]) tensor([0.1514, 0.1664, 0.3741, 0.3081]) -Greedy action tensor([ 0.9113, -2.0965, 0.2637, 0.1759]) tensor([0.4873, 0.0241, 0.2550, 0.2336]) -Greedy action tensor([-0.8545, -1.2517, -0.0725, 0.7961]) tensor([0.1103, 0.0741, 0.2410, 0.5746]) -Greedy action tensor([ 0.0350, 0.3735, -0.7489, -0.9130]) tensor([0.3080, 0.4321, 0.1406, 0.1193]) -Greedy action tensor([-0.2449, -1.2195, 0.8392, 0.8671]) tensor([0.1356, 0.0512, 0.4010, 0.4123]) -Greedy action tensor([ 1.0541, 1.1897, -0.2038, -0.1381]) tensor([0.3659, 0.4190, 0.1040, 0.1111]) -Greedy action tensor([ 0.6241, -0.4007, 0.1308, 0.0583]) tensor([0.3941, 0.1414, 0.2406, 0.2238]) -Greedy action tensor([ 0.2994, -1.8199, -0.5308, 0.6448]) tensor([0.3368, 0.0405, 0.1469, 0.4758]) -Greedy action tensor([-1.8916, -0.4353, 0.6375, -0.1547]) tensor([0.0425, 0.1825, 0.5335, 0.2416]) -Greedy action tensor([-1.7228, -0.4051, 0.5680, -0.0440]) tensor([0.0501, 0.1870, 0.4947, 0.2683]) -Greedy action tensor([-1.4210, 0.1872, 0.2836, 0.0698]) tensor([0.0628, 0.3134, 0.3451, 0.2787]) -Greedy action tensor([-1.9203, -0.4153, 0.6491, -0.1628]) tensor([0.0411, 0.1849, 0.5360, 0.2380]) -Greedy action tensor([-1.8479, -0.3291, 0.5881, -0.1139]) tensor([0.0441, 0.2016, 0.5043, 0.2500]) -Greedy action tensor([-1.1366, -0.4573, 0.3362, -0.0677]) tensor([0.0976, 0.1925, 0.4257, 0.2842]) -Greedy action tensor([-1.9028, -0.4478, 0.6456, -0.1559]) tensor([0.0420, 0.1800, 0.5371, 0.2410]) -Greedy action tensor([-1.9477, -0.4511, 0.6674, -0.1829]) tensor([0.0400, 0.1788, 0.5473, 0.2338]) -Greedy action tensor([-1.5859, 0.4244, 0.3681, 0.1108]) tensor([0.0477, 0.3559, 0.3364, 0.2601]) -Greedy action tensor([-1.6859, -0.3664, 0.5809, 0.0115]) tensor([0.0504, 0.1885, 0.4861, 0.2750]) -Greedy action tensor([-1.7725, -0.2705, 0.5531, -0.0576]) tensor([0.0470, 0.2110, 0.4809, 0.2611]) -Greedy action tensor([-1.7939, -0.4637, 0.5996, -0.1052]) tensor([0.0473, 0.1788, 0.5179, 0.2559]) -Greedy action tensor([-1.9351, -0.4295, 0.6611, -0.1733]) tensor([0.0404, 0.1821, 0.5421, 0.2353]) -Greedy action tensor([-1.7925, -0.4150, 0.6670, -0.0925]) tensor([0.0452, 0.1791, 0.5285, 0.2473]) -Greedy action tensor([-1.9121, -0.4259, 0.6515, -0.1619]) tensor([0.0414, 0.1830, 0.5374, 0.2382]) -Greedy action tensor([-1.5830, -0.5520, 0.4872, -0.0292]) tensor([0.0608, 0.1703, 0.4816, 0.2873]) -Greedy action tensor([-1.4837, 0.3353, 0.3310, 0.0494]) tensor([0.0558, 0.3437, 0.3422, 0.2583]) -Greedy action tensor([-1.9375, -0.4476, 0.6617, -0.1781]) tensor([0.0405, 0.1796, 0.5447, 0.2352]) -Greedy action tensor([-1.8684, -0.4373, 0.6294, -0.1439]) tensor([0.0436, 0.1823, 0.5297, 0.2445]) -Greedy action tensor([-0.4556, 0.4665, 0.0795, 0.1731]) tensor([0.1409, 0.3543, 0.2406, 0.2642]) -Greedy action tensor([-1.8662, -0.3629, 0.6200, -0.1292]) tensor([0.0431, 0.1939, 0.5181, 0.2449]) -Greedy action tensor([-1.8997, -0.4106, 0.6445, -0.1518]) tensor([0.0418, 0.1854, 0.5326, 0.2402]) -Greedy action tensor([-1.9331, -0.4415, 0.6609, -0.1718]) tensor([0.0406, 0.1803, 0.5430, 0.2361]) -Greedy action tensor([-1.3219, 0.4017, 0.2936, -0.0555]) tensor([0.0659, 0.3691, 0.3313, 0.2337]) -Greedy action tensor([-1.4777, 0.6023, 0.3038, 0.2050]) tensor([0.0492, 0.3939, 0.2922, 0.2647]) -Greedy action tensor([-0.9197, -0.6604, 0.1547, 0.3967]) tensor([0.1117, 0.1447, 0.3270, 0.4165]) -Greedy action tensor([-1.9444, -0.4490, 0.6667, -0.1803]) tensor([0.0401, 0.1791, 0.5465, 0.2343]) -Greedy action tensor([-1.6830, -0.3984, 0.5668, -0.0219]) tensor([0.0516, 0.1866, 0.4899, 0.2719]) -Greedy action tensor([-0.7083, 0.7921, 0.0649, 0.4038]) tensor([0.0935, 0.4194, 0.2027, 0.2844]) -Greedy action tensor([-1.9189, -0.4529, 0.6588, -0.1681]) tensor([0.0412, 0.1786, 0.5428, 0.2374]) -Greedy action tensor([-0.9223, -0.0817, 0.1673, 0.0090]) tensor([0.1133, 0.2625, 0.3368, 0.2875]) -Greedy action tensor([-1.6431, -0.1666, 0.5455, -0.1918]) tensor([0.0539, 0.2358, 0.4805, 0.2299]) -Greedy action tensor([-1.9324, -0.4402, 0.6627, -0.1732]) tensor([0.0406, 0.1804, 0.5435, 0.2356]) -Greedy action tensor([-1.8859, -0.3423, 0.6337, -0.1321]) tensor([0.0419, 0.1960, 0.5202, 0.2419]) -Greedy action tensor([-1.8520, -0.4420, 0.6584, -0.0903]) tensor([0.0430, 0.1763, 0.5300, 0.2506]) -Greedy action tensor([-1.6414, -0.5711, 0.5300, -0.0443]) tensor([0.0567, 0.1655, 0.4976, 0.2802]) -Greedy action tensor([-1.8982, -0.4517, 0.6472, -0.1597]) tensor([0.0422, 0.1794, 0.5382, 0.2402]) -Greedy action tensor([-1.7655, -0.4471, 0.5746, -0.0905]) tensor([0.0489, 0.1827, 0.5075, 0.2609]) -Greedy action tensor([-1.4985, -0.0129, 0.4186, -0.0648]) tensor([0.0609, 0.2692, 0.4144, 0.2555]) -Greedy action tensor([-1.7479, -0.3655, 0.6021, -0.1998]) tensor([0.0496, 0.1975, 0.5198, 0.2331]) -Greedy action tensor([-1.7485, -0.4995, 0.5708, -0.0332]) tensor([0.0495, 0.1725, 0.5030, 0.2750]) -Greedy action tensor([-1.9225, -0.4589, 0.6732, -0.1525]) tensor([0.0407, 0.1757, 0.5450, 0.2387]) -Greedy action tensor([-1.2436, -0.4818, 0.4210, 0.3198]) tensor([0.0758, 0.1623, 0.4003, 0.3617]) -Greedy action tensor([-1.9217, -0.4220, 0.6522, -0.1659]) tensor([0.0410, 0.1837, 0.5379, 0.2374]) -Greedy action tensor([-1.1745, -0.1447, 0.6855, 1.0110]) tensor([0.0523, 0.1465, 0.3360, 0.4653]) -Greedy action tensor([-0.1039, -0.2473, 0.0474, 0.2662]) tensor([0.2233, 0.1935, 0.2598, 0.3233]) -Greedy action tensor([-1.7768, -0.3714, 0.6085, -0.0690]) tensor([0.0466, 0.1900, 0.5063, 0.2571]) -Greedy action tensor([-1.8634, -0.4648, 0.6390, -0.1424]) tensor([0.0438, 0.1772, 0.5344, 0.2446]) -Greedy action tensor([-1.8682, -0.4634, 0.6257, -0.1443]) tensor([0.0439, 0.1788, 0.5313, 0.2460]) -Greedy action tensor([-1.8865, -0.4544, 0.6443, -0.1473]) tensor([0.0427, 0.1786, 0.5359, 0.2428]) -Greedy action tensor([-1.4728, 0.2891, 0.3484, 0.1262]) tensor([0.0557, 0.3244, 0.3442, 0.2756]) -Greedy action tensor([-1.9466, -0.4515, 0.6692, -0.1813]) tensor([0.0400, 0.1785, 0.5475, 0.2339]) -Greedy action tensor([-1.1760, 0.6584, 0.0992, 0.3128]) tensor([0.0655, 0.4100, 0.2344, 0.2902]) -Greedy action tensor([-1.6981, -0.1892, 0.4960, -0.0553]) tensor([0.0509, 0.2300, 0.4563, 0.2629]) -Greedy action tensor([-1.7873, -0.4043, 0.5890, -0.1208]) tensor([0.0475, 0.1894, 0.5115, 0.2515]) -Greedy action tensor([-1.9435, -0.4486, 0.6671, -0.1800]) tensor([0.0402, 0.1791, 0.5465, 0.2343]) -Greedy action tensor([-1.9432, -0.4514, 0.6676, -0.1807]) tensor([0.0402, 0.1786, 0.5470, 0.2342]) -Greedy action tensor([-1.8442, -0.4540, 0.6215, -0.1259]) tensor([0.0447, 0.1796, 0.5264, 0.2493]) -Greedy action tensor([-1.9369, -0.4414, 0.6668, -0.1721]) tensor([0.0403, 0.1798, 0.5446, 0.2353]) -Greedy action tensor([-1.4261, -0.4492, 0.5498, 0.3076]) tensor([0.0605, 0.1607, 0.4364, 0.3425]) -Greedy action tensor([-1.7825, -0.4788, 0.5878, -0.1294]) tensor([0.0485, 0.1787, 0.5193, 0.2535]) -Greedy action tensor([-0.6277, -0.5669, 0.7878, 1.5021]) tensor([0.0685, 0.0728, 0.2822, 0.5765]) -Greedy action tensor([-1.8072, -0.2804, 0.5711, -0.1025]) tensor([0.0457, 0.2103, 0.4928, 0.2513]) -Greedy action tensor([-1.8769, -0.2935, 0.6075, -0.1388]) tensor([0.0425, 0.2068, 0.5093, 0.2414]) -Greedy action tensor([-1.8416, -0.1771, 0.5734, -0.0962]) tensor([0.0431, 0.2277, 0.4823, 0.2469]) -Greedy action tensor([-1.6999, -0.4885, 0.6639, 0.0679]) tensor([0.0480, 0.1611, 0.5099, 0.2810]) -Greedy action tensor([-1.5273, -0.5615, 0.4784, 0.0460]) tensor([0.0630, 0.1654, 0.4679, 0.3037]) -Greedy action tensor([-1.6536, -0.2677, 0.5700, 0.0211]) tensor([0.0511, 0.2043, 0.4720, 0.2726]) -Greedy action tensor([-0.9829, -0.5680, 0.2270, 0.3095]) tensor([0.1052, 0.1592, 0.3526, 0.3830]) -Greedy action tensor([-1.5222, -0.4591, 0.5280, 0.1425]) tensor([0.0590, 0.1708, 0.4584, 0.3118]) -Greedy action tensor([-1.9176, -0.4028, 0.6509, -0.1592]) tensor([0.0410, 0.1864, 0.5347, 0.2378]) -Greedy action tensor([-1.3202, -0.2484, 0.2900, 0.1982]) tensor([0.0741, 0.2165, 0.3709, 0.3384]) -Greedy action tensor([1.2063, 1.3044, 0.1344, 0.8154]) tensor([0.3203, 0.3533, 0.1097, 0.2167]) -Greedy action tensor([-1.4320, 0.5611, 0.2851, 0.0427]) tensor([0.0547, 0.4015, 0.3047, 0.2391]) -Greedy action tensor([-0.8155, -0.3540, 0.2893, 0.6377]) tensor([0.1012, 0.1605, 0.3055, 0.4328]) -Greedy action tensor([-1.8706, -0.3239, 0.5827, -0.1773]) tensor([0.0439, 0.2063, 0.5108, 0.2389]) -Greedy action tensor([-1.6135, 0.1759, 0.5176, 0.0305]) tensor([0.0486, 0.2908, 0.4092, 0.2514]) -Greedy action tensor([-1.5534, -0.5492, 0.4561, 0.1144]) tensor([0.0606, 0.1655, 0.4524, 0.3214]) -Greedy action tensor([ 1.1728, 0.6286, -0.0764, 0.5606]) tensor([0.4151, 0.2409, 0.1190, 0.2250]) -Greedy action tensor([-1.8859, -0.4548, 0.6458, -0.1482]) tensor([0.0427, 0.1784, 0.5364, 0.2425]) -Greedy action tensor([-1.8942, -0.4516, 0.6399, -0.1544]) tensor([0.0425, 0.1798, 0.5356, 0.2420]) -Greedy action tensor([ 1.3931e+00, -5.5632e-01, 6.0514e-04, 1.3923e-01]) tensor([0.5966, 0.0849, 0.1482, 0.1703]) -Greedy action tensor([ 1.0612, -0.6096, -0.4791, 0.4014]) tensor([0.5210, 0.0980, 0.1117, 0.2693]) -Greedy action tensor([ 1.4033, -0.5268, -0.1663, 0.3922]) tensor([0.5824, 0.0845, 0.1212, 0.2119]) -Greedy action tensor([ 0.8596, -0.1332, 0.1467, -0.2271]) tensor([0.4549, 0.1686, 0.2230, 0.1535]) -Greedy action tensor([ 1.3062, -0.1094, -0.9592, 0.2991]) tensor([0.5841, 0.1418, 0.0606, 0.2134]) -Greedy action tensor([ 1.4232, -0.7098, -0.2244, 0.5624]) tensor([0.5768, 0.0683, 0.1110, 0.2439]) -Greedy action tensor([ 1.3321, -0.3824, -0.6911, 0.7589]) tensor([0.5331, 0.0960, 0.0705, 0.3005]) -Greedy action tensor([1.7083, 0.5615, 0.0370, 0.1615]) tensor([0.5819, 0.1848, 0.1094, 0.1239]) -Greedy action tensor([ 1.0590, -0.1536, -0.5390, -0.0027]) tensor([0.5418, 0.1611, 0.1096, 0.1874]) -Greedy action tensor([ 1.5679, -0.5852, -1.1717, 0.2488]) tensor([0.6906, 0.0802, 0.0446, 0.1846]) -Greedy action tensor([ 1.0982, 0.0311, -0.6776, 0.3659]) tensor([0.5015, 0.1725, 0.0849, 0.2411]) -Greedy action tensor([ 1.4354, -0.1939, -0.3770, 0.0421]) tensor([0.6221, 0.1220, 0.1016, 0.1544]) -Greedy action tensor([ 0.8742, -0.2169, -0.0082, 0.3891]) tensor([0.4228, 0.1420, 0.1749, 0.2603]) -Greedy action tensor([ 1.6399, -0.1632, -0.6168, -0.1572]) tensor([0.6967, 0.1148, 0.0730, 0.1155]) -Greedy action tensor([ 1.0499, -0.0957, -0.5716, 0.3092]) tensor([0.5019, 0.1596, 0.0992, 0.2393]) -Greedy action tensor([ 1.0836, -0.4280, -0.5079, 0.0829]) tensor([0.5581, 0.1231, 0.1136, 0.2052]) -Greedy action tensor([ 1.3519, -0.1107, -0.6657, -0.2858]) tensor([0.6414, 0.1486, 0.0853, 0.1247]) -Greedy action tensor([ 1.7059, -0.8838, -0.3033, 0.5116]) tensor([0.6613, 0.0496, 0.0887, 0.2003]) -Greedy action tensor([ 2.0676, -0.8525, -0.3525, 0.5036]) tensor([0.7396, 0.0399, 0.0658, 0.1548]) -Greedy action tensor([ 1.6014, -0.5437, -0.6568, 0.8481]) tensor([0.5909, 0.0692, 0.0618, 0.2782]) -Greedy action tensor([ 1.6758, -0.7609, -0.2839, 0.5732]) tensor([0.6409, 0.0560, 0.0903, 0.2128]) -Greedy action tensor([ 1.0929, -0.1982, -0.0293, 0.0051]) tensor([0.5161, 0.1419, 0.1680, 0.1739]) -Greedy action tensor([ 1.6599, -0.2203, -0.3683, 0.1817]) tensor([0.6613, 0.1009, 0.0870, 0.1508]) -Greedy action tensor([ 1.9200, -0.8717, -0.4475, 0.6353]) tensor([0.6984, 0.0428, 0.0655, 0.1933]) -Greedy action tensor([ 0.9076, -0.3712, 0.0424, 0.1424]) tensor([0.4620, 0.1286, 0.1945, 0.2149]) -Greedy action tensor([ 1.3770, -0.8995, -0.0551, 0.1765]) tensor([0.6088, 0.0625, 0.1454, 0.1833]) -Greedy action tensor([ 1.5290, -0.8189, -0.1580, 0.3621]) tensor([0.6282, 0.0600, 0.1163, 0.1956]) -Greedy action tensor([ 1.6614, -0.6154, -0.4441, 0.6721]) tensor([0.6265, 0.0643, 0.0763, 0.2330]) -Greedy action tensor([ 0.9212, -0.3848, -0.4400, 0.6196]) tensor([0.4411, 0.1195, 0.1131, 0.3263]) -Greedy action tensor([ 1.5713, -0.5264, -0.1693, 0.2403]) tensor([0.6400, 0.0786, 0.1123, 0.1691]) -Greedy action tensor([ 1.3709, -0.3075, -0.7273, 0.7727]) tensor([0.5379, 0.1004, 0.0660, 0.2957]) -Greedy action tensor([ 1.2164, -0.2538, -0.2071, 0.4794]) tensor([0.5130, 0.1179, 0.1236, 0.2455]) -Greedy action tensor([ 1.4893, -0.9472, -0.1777, 0.1681]) tensor([0.6481, 0.0567, 0.1224, 0.1729]) -Greedy action tensor([ 1.3744, -0.1350, -0.1236, -0.0576]) tensor([0.5940, 0.1313, 0.1328, 0.1419]) -Greedy action tensor([ 1.6271, -0.6153, -0.5623, 0.2228]) tensor([0.6832, 0.0726, 0.0765, 0.1678]) -Greedy action tensor([ 1.7156, -0.4183, -0.4961, 0.5900]) tensor([0.6442, 0.0763, 0.0705, 0.2090]) -Greedy action tensor([ 1.2515, -0.2038, -0.8183, 0.5705]) tensor([0.5360, 0.1251, 0.0677, 0.2713]) -Greedy action tensor([ 1.4551, -0.2791, -0.3758, 0.1630]) tensor([0.6205, 0.1096, 0.0995, 0.1705]) -Greedy action tensor([1.6730, 0.1021, 0.0822, 0.2574]) tensor([0.6045, 0.1256, 0.1232, 0.1467]) -Greedy action tensor([ 1.7233, -0.3177, -0.3195, 0.2235]) tensor([0.6744, 0.0876, 0.0874, 0.1505]) -Greedy action tensor([ 1.2586, -0.3846, -0.7517, 0.3066]) tensor([0.5837, 0.1129, 0.0782, 0.2253]) -Greedy action tensor([ 1.1095, -0.0141, -0.5416, 0.3773]) tensor([0.5006, 0.1627, 0.0960, 0.2407]) -Greedy action tensor([ 1.5814, -0.6576, -0.3260, 0.3707]) tensor([0.6439, 0.0686, 0.0956, 0.1919]) -Greedy action tensor([ 1.4131, -0.1491, -0.5228, 0.2197]) tensor([0.6034, 0.1265, 0.0871, 0.1830]) -Greedy action tensor([ 1.3839, -0.2005, -0.7888, 0.0516]) tensor([0.6318, 0.1296, 0.0719, 0.1667]) -Greedy action tensor([ 1.6006, -0.6082, -0.4706, 0.3772]) tensor([0.6536, 0.0718, 0.0824, 0.1923]) -Greedy action tensor([ 1.6458, -0.7502, -0.5794, 0.7242]) tensor([0.6262, 0.0570, 0.0677, 0.2491]) -Greedy action tensor([ 1.5012, -0.7857, -0.1597, 0.4541]) tensor([0.6088, 0.0618, 0.1157, 0.2137]) -Greedy action tensor([ 1.5188, -0.1422, -0.4243, 0.3163]) tensor([0.6121, 0.1163, 0.0877, 0.1839]) -Greedy action tensor([ 1.0860, -0.3309, -0.6532, 0.4630]) tensor([0.5117, 0.1241, 0.0899, 0.2744]) -Greedy action tensor([ 1.9067, -0.4927, -0.1288, 0.3588]) tensor([0.6973, 0.0633, 0.0911, 0.1483]) -Greedy action tensor([ 0.7678, -0.2581, -0.1003, 0.1607]) tensor([0.4304, 0.1543, 0.1807, 0.2346]) -Greedy action tensor([ 1.5330, 0.5617, -0.1257, -0.0855]) tensor([0.5659, 0.2142, 0.1077, 0.1122]) -Greedy action tensor([ 0.4156, -0.0917, -0.2296, 0.3307]) tensor([0.3284, 0.1977, 0.1723, 0.3016]) -Greedy action tensor([ 1.3361, -0.4887, -0.4775, 0.2448]) tensor([0.6024, 0.0971, 0.0982, 0.2023]) -Greedy action tensor([ 1.7834, -1.5189, 0.0983, -0.0361]) tensor([0.7224, 0.0266, 0.1340, 0.1171]) -Greedy action tensor([ 1.9320, -1.0963, -0.2569, 0.4699]) tensor([0.7183, 0.0348, 0.0805, 0.1665]) -Greedy action tensor([ 2.1319, -0.5406, -0.5407, 0.4111]) tensor([0.7593, 0.0525, 0.0524, 0.1359]) -Greedy action tensor([ 1.4382, -0.7849, -0.1880, -0.0734]) tensor([0.6555, 0.0710, 0.1289, 0.1446]) -Greedy action tensor([ 1.7167, -1.0474, 0.2922, 0.1097]) tensor([0.6648, 0.0419, 0.1600, 0.1333]) -Greedy action tensor([ 1.4350, -0.6820, -0.3345, 0.4578]) tensor([0.5998, 0.0722, 0.1022, 0.2257]) -Greedy action tensor([ 1.4853, -0.7103, 0.0809, 0.0462]) tensor([0.6274, 0.0698, 0.1540, 0.1488]) -Greedy action tensor([ 1.3842, -0.1920, -0.2215, 0.4172]) tensor([0.5594, 0.1157, 0.1123, 0.2127]) -Greedy action tensor([ 0.6324, -0.5314, -0.1369, 0.4566]) tensor([0.3825, 0.1195, 0.1772, 0.3208]) -Greedy action tensor([ 1.6970, -0.1844, -0.5122, 0.4579]) tensor([0.6444, 0.0982, 0.0707, 0.1867]) -Greedy action tensor([ 1.1313, -0.8768, -0.0940, 0.6839]) tensor([0.4838, 0.0649, 0.1421, 0.3092]) -Greedy action tensor([ 1.7460, -0.3931, -0.2616, 0.4487]) tensor([0.6556, 0.0772, 0.0881, 0.1792]) -Greedy action tensor([ 1.9609, -1.0319, -0.4192, 0.6498]) tensor([0.7081, 0.0355, 0.0655, 0.1908]) -Greedy action tensor([ 1.4371, -0.7669, 0.1651, -0.0866]) tensor([0.6217, 0.0686, 0.1742, 0.1355]) -Greedy action tensor([ 1.0832, -0.1914, -0.4225, 0.2322]) tensor([0.5186, 0.1450, 0.1151, 0.2214]) -Greedy action tensor([ 1.4499, -0.5164, -0.3514, 0.2571]) tensor([0.6217, 0.0870, 0.1026, 0.1886]) -Greedy action tensor([ 0.9279, -0.2454, -0.0817, -0.1131]) tensor([0.4934, 0.1526, 0.1798, 0.1742]) -Greedy action tensor([ 0.8410, -0.1439, 0.0782, 0.1337]) tensor([0.4287, 0.1601, 0.1999, 0.2113]) -Greedy action tensor([ 1.8027, -0.9456, -0.1273, 0.1335]) tensor([0.7155, 0.0458, 0.1039, 0.1348]) -Greedy action tensor([ 1.8171, -0.4937, -0.6496, 0.6920]) tensor([0.6628, 0.0657, 0.0563, 0.2152]) -Greedy action tensor([ 1.7215, -0.5185, -0.1409, 0.2335]) tensor([0.6722, 0.0716, 0.1044, 0.1518]) -Greedy action tensor([ 0.6524, -0.1259, -0.1432, 0.1837]) tensor([0.3943, 0.1810, 0.1779, 0.2467]) -Greedy action tensor([ 1.5139, -0.7471, -0.6849, 0.2007]) tensor([0.6738, 0.0702, 0.0748, 0.1812]) -Greedy action tensor([ 1.9496, -0.3198, -0.3647, 0.7836]) tensor([0.6606, 0.0683, 0.0653, 0.2059]) -Greedy action tensor([ 1.2582, -0.4429, -0.6149, 0.5157]) tensor([0.5519, 0.1007, 0.0848, 0.2626]) -Greedy action tensor([ 1.2378, -0.5455, -0.1979, -0.0864]) tensor([0.5981, 0.1005, 0.1423, 0.1591]) -Greedy action tensor([ 0.3320, -0.1288, -0.1345, -0.0699]) tensor([0.3416, 0.2155, 0.2143, 0.2286]) -Greedy action tensor([ 0.7460, -0.6018, -0.0688, -0.2052]) tensor([0.4787, 0.1244, 0.2119, 0.1849]) -Greedy action tensor([ 0.9106, -0.6698, 0.1006, -0.4114]) tensor([0.5216, 0.1074, 0.2320, 0.1390]) -Greedy action tensor([ 0.5268, -0.2608, -0.0099, -0.2500]) tensor([0.4001, 0.1820, 0.2339, 0.1840]) -Greedy action tensor([ 0.6749, -0.4078, -0.1070, -0.2685]) tensor([0.4575, 0.1550, 0.2094, 0.1781]) -Greedy action tensor([ 0.5250, -0.3591, -0.0869, -0.0027]) tensor([0.3929, 0.1623, 0.2131, 0.2318]) -Greedy action tensor([ 0.9585, -0.7335, 0.1853, -0.4428]) tensor([0.5286, 0.0973, 0.2439, 0.1302]) -Greedy action tensor([ 0.5821, -0.3032, -0.0855, -0.1677]) tensor([0.4170, 0.1721, 0.2139, 0.1970]) -Greedy action tensor([ 0.7680, -0.6345, -0.0991, -0.0192]) tensor([0.4714, 0.1160, 0.1981, 0.2145]) -Greedy action tensor([ 0.1888, -0.1003, -0.0396, -0.1754]) tensor([0.3087, 0.2312, 0.2457, 0.2145]) -Greedy action tensor([ 0.6966, -0.2335, -0.0170, -0.1447]) tensor([0.4319, 0.1704, 0.2116, 0.1862]) -Greedy action tensor([ 0.8230, -0.1533, -0.0110, -0.3966]) tensor([0.4747, 0.1788, 0.2062, 0.1402]) -Greedy action tensor([ 1.0165, -0.5769, -0.1352, -0.3332]) tensor([0.5622, 0.1143, 0.1777, 0.1458]) -Greedy action tensor([ 0.3528, 0.0365, 0.0135, -0.4269]) tensor([0.3449, 0.2514, 0.2456, 0.1581]) -Greedy action tensor([ 1.2236, -1.0062, 0.2654, -0.6596]) tensor([0.6086, 0.0654, 0.2334, 0.0926]) -Greedy action tensor([ 1.0926, -0.6518, -0.1018, -0.4234]) tensor([0.5892, 0.1030, 0.1785, 0.1294]) -Greedy action tensor([ 1.2077, -0.4660, -0.1329, -0.7277]) tensor([0.6275, 0.1177, 0.1642, 0.0906]) -Greedy action tensor([ 0.9144, -0.3894, -0.0596, -0.2473]) tensor([0.5097, 0.1384, 0.1924, 0.1595]) -Greedy action tensor([ 0.9709, -1.2439, 0.0145, -0.5327]) tensor([0.5828, 0.0636, 0.2240, 0.1296]) -Greedy action tensor([ 0.4870, -0.0507, -0.0553, 0.0549]) tensor([0.3553, 0.2075, 0.2066, 0.2306]) -Greedy action tensor([ 0.8961, -1.0094, -0.0087, -0.3849]) tensor([0.5461, 0.0812, 0.2210, 0.1517]) -Greedy action tensor([ 0.9009, -0.6095, -0.0220, -0.4875]) tensor([0.5354, 0.1182, 0.2128, 0.1336]) -Greedy action tensor([ 1.1088, -0.7950, -0.0498, -0.5776]) tensor([0.6067, 0.0904, 0.1905, 0.1124]) -Greedy action tensor([ 0.6308, -0.5478, -0.0174, -0.1796]) tensor([0.4395, 0.1352, 0.2298, 0.1954]) -Greedy action tensor([ 0.6896, -0.3965, -0.0436, -0.4559]) tensor([0.4682, 0.1580, 0.2249, 0.1489]) -Greedy action tensor([ 0.8142, -0.8334, -0.0644, -0.3463]) tensor([0.5205, 0.1002, 0.2162, 0.1631]) -Greedy action tensor([ 0.6414, -0.2576, 0.0237, -0.0897]) tensor([0.4119, 0.1676, 0.2221, 0.1983]) -Greedy action tensor([ 6.1419e-01, -3.2291e-03, -2.3931e-04, -1.9911e-01]) tensor([0.3962, 0.2137, 0.2143, 0.1757]) -Greedy action tensor([ 0.5872, -0.5300, -0.1449, -0.0369]) tensor([0.4266, 0.1396, 0.2052, 0.2286]) -Greedy action tensor([ 0.6981, -0.5437, -0.0570, -0.3031]) tensor([0.4703, 0.1359, 0.2210, 0.1728]) -Greedy action tensor([ 0.2496, -0.1032, -0.1338, -0.1663]) tensor([0.3285, 0.2308, 0.2239, 0.2167]) -Greedy action tensor([ 0.8822, -0.4756, -0.0692, -0.3344]) tensor([0.5156, 0.1326, 0.1991, 0.1527]) -Greedy action tensor([ 0.7428, -0.3896, 0.0767, -0.1704]) tensor([0.4470, 0.1440, 0.2296, 0.1794]) -Greedy action tensor([ 0.8089, -0.4850, -0.1123, -0.6377]) tensor([0.5242, 0.1437, 0.2087, 0.1234]) -Greedy action tensor([ 0.7301, -0.2826, -0.0531, -0.3787]) tensor([0.4651, 0.1689, 0.2125, 0.1535]) -Greedy action tensor([ 0.6909, -0.4884, 0.0429, -0.5813]) tensor([0.4738, 0.1457, 0.2478, 0.1327]) -Greedy action tensor([ 0.7618, -0.4483, -0.1409, -0.2599]) tensor([0.4846, 0.1445, 0.1965, 0.1744]) -Greedy action tensor([ 0.5885, -0.2294, -0.1691, -0.3893]) tensor([0.4374, 0.1931, 0.2050, 0.1645]) -Greedy action tensor([ 0.8101, -0.5329, -0.1760, -0.2952]) tensor([0.5089, 0.1328, 0.1898, 0.1685]) -Greedy action tensor([ 0.8359, -0.5322, -0.0512, -0.4511]) tensor([0.5148, 0.1311, 0.2120, 0.1421]) -Greedy action tensor([ 0.1912, 0.0700, -0.3040, -0.3432]) tensor([0.3245, 0.2875, 0.1978, 0.1902]) -Greedy action tensor([ 1.2382, -0.6763, -0.0145, -0.8440]) tensor([0.6419, 0.0946, 0.1834, 0.0800]) -Greedy action tensor([ 0.5289, -0.2284, -0.0935, -0.1956]) tensor([0.4016, 0.1883, 0.2155, 0.1946]) -Greedy action tensor([ 0.6824, -0.3343, 0.0079, -0.3286]) tensor([0.4474, 0.1619, 0.2279, 0.1628]) -Greedy action tensor([ 0.8244, -0.5126, -0.1332, -0.1805]) tensor([0.4969, 0.1305, 0.1907, 0.1819]) -Greedy action tensor([ 0.6366, -0.2356, -0.0064, -0.4261]) tensor([0.4368, 0.1826, 0.2296, 0.1509]) -Greedy action tensor([ 0.5101, -0.1722, -0.1744, -0.3781]) tensor([0.4130, 0.2088, 0.2083, 0.1699]) -Greedy action tensor([ 0.4233, -0.0984, 0.1043, -0.2914]) tensor([0.3559, 0.2112, 0.2587, 0.1742]) -Greedy action tensor([ 0.7976, -0.2655, -0.1188, -0.3359]) tensor([0.4837, 0.1671, 0.1935, 0.1557]) -Greedy action tensor([ 1.0418, -0.3275, -0.1880, -0.1541]) tensor([0.5408, 0.1375, 0.1581, 0.1636]) -Greedy action tensor([ 0.3878, -0.4450, -0.0385, -0.3845]) tensor([0.3922, 0.1705, 0.2561, 0.1812]) -Greedy action tensor([ 0.6712, -0.0570, -0.3042, -0.3458]) tensor([0.4501, 0.2173, 0.1697, 0.1628]) -Greedy action tensor([ 0.3484, -0.2439, -0.2072, -0.1547]) tensor([0.3661, 0.2025, 0.2100, 0.2214]) -Greedy action tensor([ 0.6453, -0.3654, 0.1266, -0.4502]) tensor([0.4360, 0.1587, 0.2595, 0.1458]) -Greedy action tensor([ 0.6080, -0.2320, -0.0746, -0.1364]) tensor([0.4146, 0.1790, 0.2095, 0.1969]) -Greedy action tensor([ 0.5498, -0.2159, -0.1800, -0.3768]) tensor([0.4268, 0.1985, 0.2057, 0.1690]) -Greedy action tensor([ 0.9876, -0.5318, -0.0811, -0.6433]) tensor([0.5688, 0.1245, 0.1954, 0.1113]) -Greedy action tensor([ 0.6480, -0.5668, -0.0194, -0.3706]) tensor([0.4606, 0.1367, 0.2363, 0.1663]) -Greedy action tensor([ 0.8682, -0.3601, 0.1559, -0.2327]) tensor([0.4726, 0.1384, 0.2318, 0.1572]) -Greedy action tensor([ 0.7960, -0.7972, 0.0037, -0.4008]) tensor([0.5107, 0.1038, 0.2312, 0.1543]) -Greedy action tensor([ 0.8740, -0.5762, 0.0775, -0.4588]) tensor([0.5130, 0.1203, 0.2313, 0.1353]) -Greedy action tensor([ 0.6629, -0.5443, 0.0232, -0.5955]) tensor([0.4738, 0.1417, 0.2499, 0.1346]) -Greedy action tensor([ 0.4599, -0.1175, -0.1861, -0.2643]) tensor([0.3891, 0.2184, 0.2039, 0.1886]) -Greedy action tensor([ 0.6813, -0.5748, -0.1918, -0.0942]) tensor([0.4623, 0.1317, 0.1931, 0.2129]) -Greedy action tensor([ 0.4359, 0.1601, 0.0156, -0.0638]) tensor([0.3308, 0.2511, 0.2173, 0.2007]) -Greedy action tensor([ 1.3835, -1.1406, 0.0446, -0.7161]) tensor([0.6827, 0.0547, 0.1790, 0.0836]) -Greedy action tensor([ 0.6060, -0.1516, -0.0875, -0.2100]) tensor([0.4148, 0.1944, 0.2073, 0.1834]) -Greedy action tensor([ 0.9506, -0.0786, -0.3213, -0.2755]) tensor([0.5179, 0.1850, 0.1452, 0.1520]) -Greedy action tensor([ 0.6930, -0.3912, -0.0308, -0.2938]) tensor([0.4554, 0.1540, 0.2208, 0.1698]) -Greedy action tensor([ 0.8283, -0.7599, 0.2466, -0.7391]) tensor([0.5071, 0.1036, 0.2835, 0.1058]) -Greedy action tensor([ 0.6670, -0.3751, -0.0257, -0.2161]) tensor([0.4412, 0.1556, 0.2207, 0.1824]) -Greedy action tensor([ 0.4393, -0.2611, -0.0653, -0.5231]) tensor([0.4029, 0.2000, 0.2432, 0.1539]) -Greedy action tensor([ 0.9326, -1.0381, 0.0635, -0.6812]) tensor([0.5689, 0.0793, 0.2386, 0.1133]) -Greedy action tensor([ 0.8856, -0.4726, 0.1558, -0.2286]) tensor([0.4837, 0.1244, 0.2332, 0.1588]) -Greedy action tensor([ 0.5191, -0.4131, -0.0227, -0.1971]) tensor([0.4058, 0.1598, 0.2361, 0.1983]) -Greedy action tensor([ 0.7973, -0.1650, 0.1535, -0.3098]) tensor([0.4469, 0.1707, 0.2347, 0.1477]) -Greedy action tensor([ 0.5460, -0.4440, -0.0481, -0.3273]) tensor([0.4271, 0.1587, 0.2358, 0.1784]) -Greedy action tensor([ 1.0550, -0.8634, 0.0183, -0.4722]) tensor([0.5819, 0.0854, 0.2063, 0.1263]) -Greedy action tensor([ 0.6401, -0.1252, -0.0909, -0.1039]) tensor([0.4129, 0.1921, 0.1988, 0.1962]) -Greedy action tensor([ 1.0077, -0.9258, 0.0622, -0.5072]) tensor([0.5705, 0.0825, 0.2216, 0.1254]) -Greedy action tensor([ 0.4503, -0.1672, -0.1312, -0.2656]) tensor([0.3865, 0.2085, 0.2161, 0.1889]) -Greedy action tensor([ 0.1123, -0.6504, -0.8052, -1.4568]) tensor([0.4821, 0.2249, 0.1926, 0.1004]) -Greedy action tensor([ 0.7151, -1.7719, 0.5039, -0.3131]) tensor([0.4444, 0.0370, 0.3598, 0.1589]) -Greedy action tensor([-0.3497, -0.3699, -0.8891, 0.5606]) tensor([0.1981, 0.1941, 0.1155, 0.4923]) -Greedy action tensor([-0.3750, -1.4997, 0.9748, -0.9149]) tensor([0.1735, 0.0563, 0.6691, 0.1011]) -Greedy action tensor([ 1.6318, -0.4336, 0.0933, 0.0806]) tensor([0.6437, 0.0816, 0.1382, 0.1365]) -Greedy action tensor([-0.4764, -0.3841, 0.6502, -0.9792]) tensor([0.1728, 0.1895, 0.5332, 0.1045]) -Greedy action tensor([ 0.4587, -1.3320, -0.0299, 0.1753]) tensor([0.3947, 0.0659, 0.2422, 0.2973]) -Greedy action tensor([-0.8119, -0.5765, 0.5608, -0.1673]) tensor([0.1232, 0.1559, 0.4862, 0.2347]) -Greedy action tensor([ 1.0439, -2.2313, 0.1060, 0.5126]) tensor([0.4958, 0.0187, 0.1941, 0.2914]) -Greedy action tensor([ 1.7723, 0.3804, -0.7514, 1.0317]) tensor([0.5538, 0.1377, 0.0444, 0.2641]) -Greedy action tensor([ 0.0050, 0.6987, 1.2560, -0.7275]) tensor([0.1434, 0.2869, 0.5009, 0.0689]) -Greedy action tensor([-0.1143, 0.5933, -0.1972, -0.0483]) tensor([0.1993, 0.4044, 0.1834, 0.2129]) -Greedy action tensor([-0.4411, -1.5149, 0.6739, 0.2606]) tensor([0.1561, 0.0533, 0.4759, 0.3148]) -Greedy action tensor([ 0.6287, -1.1566, -0.6297, 0.3171]) tensor([0.4579, 0.0768, 0.1301, 0.3353]) -Greedy action tensor([ 0.4540, -1.3419, 0.0341, 0.8320]) tensor([0.3047, 0.0506, 0.2002, 0.4446]) -Greedy action tensor([ 0.9440, 0.6361, -0.1230, -0.2003]) tensor([0.4171, 0.3066, 0.1435, 0.1328]) -Greedy action tensor([ 1.8408, -0.1708, 0.8740, 0.7887]) tensor([0.5367, 0.0718, 0.2041, 0.1874]) -Greedy action tensor([ 0.2630, 0.4699, 1.3041, -0.0537]) tensor([0.1727, 0.2124, 0.4891, 0.1258]) -Greedy action tensor([ 0.7317, 0.3611, 0.6961, -0.5810]) tensor([0.3419, 0.2361, 0.3300, 0.0920]) -Greedy action tensor([-0.6270, -1.2347, 0.9385, -0.8740]) tensor([0.1406, 0.0766, 0.6729, 0.1099]) -Greedy action tensor([-0.1172, -0.0013, -0.4909, 0.8366]) tensor([0.1850, 0.2077, 0.1273, 0.4801]) -Greedy action tensor([-0.1709, -0.7983, -0.2999, -0.3116]) tensor([0.3047, 0.1627, 0.2678, 0.2647]) -Greedy action tensor([ 0.6742, -0.0540, 0.3752, -0.7601]) tensor([0.4061, 0.1960, 0.3011, 0.0968]) -Greedy action tensor([ 1.1301, -0.4823, -0.5198, -0.1263]) tensor([0.5966, 0.1190, 0.1146, 0.1698]) -Greedy action tensor([-0.4270, -0.5089, -0.3302, 0.0310]) tensor([0.2172, 0.2001, 0.2393, 0.3434]) -Greedy action tensor([-0.8125, -1.0657, -0.4825, -0.3407]) tensor([0.2096, 0.1627, 0.2916, 0.3360]) -Greedy action tensor([ 1.3191, -0.1844, 0.4202, 0.8498]) tensor([0.4435, 0.0986, 0.1805, 0.2774]) -Greedy action tensor([-0.6320, -0.6888, -0.3029, -0.3851]) tensor([0.2167, 0.2047, 0.3012, 0.2774]) -Greedy action tensor([ 1.2514, -0.5199, 0.2615, 0.7387]) tensor([0.4672, 0.0795, 0.1736, 0.2798]) -Greedy action tensor([0.4981, 0.5061, 0.1629, 0.4690]) tensor([0.2707, 0.2728, 0.1936, 0.2629]) -Greedy action tensor([ 0.6827, -0.3996, 0.7135, -0.6120]) tensor([0.3782, 0.1281, 0.3900, 0.1036]) -Greedy action tensor([-0.8790, -0.7714, 0.5075, -0.6027]) tensor([0.1345, 0.1498, 0.5383, 0.1774]) -Greedy action tensor([-0.6448, -0.7615, -0.7695, -0.5785]) tensor([0.2603, 0.2317, 0.2298, 0.2782]) -Greedy action tensor([-0.3055, -0.7347, 0.0047, -0.2094]) tensor([0.2430, 0.1582, 0.3313, 0.2675]) -Greedy action tensor([-0.1984, -1.2199, -0.4931, -0.5637]) tensor([0.3573, 0.1286, 0.2661, 0.2480]) -Greedy action tensor([-0.5133, -0.6225, 0.1945, -1.2094]) tensor([0.2260, 0.2026, 0.4587, 0.1127]) -Greedy action tensor([ 0.5585, -0.5553, -0.3360, 0.6347]) tensor([0.3551, 0.1166, 0.1452, 0.3832]) -Greedy action tensor([ 1.2675, -1.8047, 0.1798, -0.2254]) tensor([0.6219, 0.0288, 0.2096, 0.1397]) -Greedy action tensor([ 0.2578, 0.9117, 0.2700, -0.3596]) tensor([0.2235, 0.4297, 0.2262, 0.1205]) -Greedy action tensor([1.2705, 0.4218, 0.1409, 0.3380]) tensor([0.4663, 0.1995, 0.1507, 0.1835]) -Greedy action tensor([ 1.4946, -0.9567, 0.2142, 0.6431]) tensor([0.5584, 0.0481, 0.1552, 0.2383]) -Greedy action tensor([ 1.7793, -1.0345, 1.9996, 0.6250]) tensor([0.3814, 0.0229, 0.4754, 0.1203]) -Greedy action tensor([0.4535, 0.0334, 0.3437, 0.4919]) tensor([0.2784, 0.1829, 0.2494, 0.2893]) -Greedy action tensor([ 0.6499, -1.4435, 0.6814, 0.4520]) tensor([0.3361, 0.0414, 0.3468, 0.2757]) -Greedy action tensor([ 0.8650, -0.5009, 1.9192, 1.4411]) tensor([0.1694, 0.0432, 0.4861, 0.3013]) -Greedy action tensor([-0.7490, -0.0861, 0.6412, -0.5210]) tensor([0.1218, 0.2363, 0.4890, 0.1530]) -Greedy action tensor([ 0.0768, -0.8382, 0.7939, -0.0496]) tensor([0.2309, 0.0925, 0.4731, 0.2035]) -Greedy action tensor([ 0.4197, 0.6352, 1.1254, -0.2965]) tensor([0.2103, 0.2609, 0.4260, 0.1028]) -Greedy action tensor([0.6476, 0.2804, 1.1238, 0.5385]) tensor([0.2381, 0.1650, 0.3834, 0.2135]) -Greedy action tensor([-0.6943, -1.4720, -0.6260, -0.6158]) tensor([0.2769, 0.1272, 0.2964, 0.2995]) -Greedy action tensor([-0.0056, -0.4291, 0.4532, 0.2190]) tensor([0.2228, 0.1459, 0.3525, 0.2789]) -Greedy action tensor([ 0.5099, -0.5954, -0.1157, -0.1018]) tensor([0.4152, 0.1375, 0.2221, 0.2252]) -Greedy action tensor([ 0.2700, -0.7139, -0.6161, 0.4690]) tensor([0.3326, 0.1244, 0.1371, 0.4059]) -Greedy action tensor([-0.0823, 0.2225, 0.7076, 0.9816]) tensor([0.1341, 0.1819, 0.2954, 0.3886]) -Greedy action tensor([-0.0048, -0.7671, 0.6576, -0.2132]) tensor([0.2371, 0.1106, 0.4598, 0.1925]) -Greedy action tensor([ 0.0731, -0.5152, 0.5552, -0.1969]) tensor([0.2539, 0.1410, 0.4112, 0.1938]) -Greedy action tensor([-0.5205, -0.3825, 0.2148, 0.4148]) tensor([0.1475, 0.1693, 0.3076, 0.3757]) -Greedy action tensor([ 0.4069, -0.5393, 0.4596, -0.3111]) tensor([0.3413, 0.1325, 0.3598, 0.1665]) -Greedy action tensor([ 1.0511, -0.9136, 1.2466, 0.5488]) tensor([0.3377, 0.0473, 0.4106, 0.2043]) -Greedy action tensor([-0.2293, -1.3705, -0.8049, 0.3430]) tensor([0.2737, 0.0874, 0.1539, 0.4850]) -Greedy action tensor([-1.2462, -0.7203, 0.9397, -0.4196]) tensor([0.0721, 0.1219, 0.6413, 0.1647]) -Greedy action tensor([ 0.7806, -1.1932, 0.0077, 0.5438]) tensor([0.4184, 0.0581, 0.1932, 0.3302]) -Greedy action tensor([-0.4917, -0.8768, 0.6764, -0.2210]) tensor([0.1611, 0.1096, 0.5181, 0.2112]) -Greedy action tensor([ 0.2204, -1.6956, 0.0739, 1.0285]) tensor([0.2350, 0.0346, 0.2030, 0.5274]) -Greedy action tensor([-0.9374, -1.0673, -0.8449, -0.8169]) tensor([0.2437, 0.2140, 0.2673, 0.2749]) -Greedy action tensor([-0.4645, -0.0372, -0.0104, -1.0277]) tensor([0.2138, 0.3278, 0.3367, 0.1217]) -Greedy action tensor([ 1.5995, 0.3957, 0.1735, -0.2166]) tensor([0.5872, 0.1762, 0.1411, 0.0955]) -Greedy action tensor([ 0.5809, -0.7330, -0.2760, 1.0796]) tensor([0.2994, 0.0805, 0.1271, 0.4930]) -Greedy action tensor([-0.4231, -0.1283, -0.7901, -0.2750]) tensor([0.2384, 0.3201, 0.1651, 0.2764]) -Greedy action tensor([-0.3197, -0.5360, 0.7110, 0.2311]) tensor([0.1576, 0.1270, 0.4419, 0.2735]) -Greedy action tensor([-1.0511, -1.7183, 1.2449, 0.6584]) tensor([0.0589, 0.0302, 0.5853, 0.3256]) -Greedy action tensor([ 0.8640, -0.2698, 0.8405, -0.3948]) tensor([0.3872, 0.1246, 0.3782, 0.1100]) -Greedy action tensor([-0.1751, -1.5511, 0.2182, 0.7126]) tensor([0.1936, 0.0489, 0.2870, 0.4705]) -Greedy action tensor([ 0.3766, -0.1677, -0.5610, -0.1126]) tensor([0.3869, 0.2245, 0.1515, 0.2372]) -Greedy action tensor([ 0.3656, -1.6503, -0.3715, 0.6943]) tensor([0.3332, 0.0444, 0.1595, 0.4629]) -Greedy action tensor([-0.3890, -0.5034, 0.5486, -0.8765]) tensor([0.1976, 0.1763, 0.5047, 0.1214]) -Greedy action tensor([-0.5988, -0.0243, 0.9987, -0.8276]) tensor([0.1175, 0.2087, 0.5804, 0.0934]) -Greedy action tensor([-0.4771, -0.0283, -1.0590, -0.2894]) tensor([0.2309, 0.3616, 0.1290, 0.2785]) -Greedy action tensor([ 0.6444, 0.3723, 0.2673, -0.8416]) tensor([0.3740, 0.2849, 0.2565, 0.0846]) -Greedy action tensor([ 0.7318, 1.2577, 0.1465, -0.3616]) tensor([0.2790, 0.4721, 0.1554, 0.0935]) -Greedy action tensor([-0.1944, -0.8794, -0.4051, 0.8835]) tensor([0.1904, 0.0960, 0.1542, 0.5594]) -Greedy action tensor([-0.2084, 0.1771, -0.2837, -0.0776]) tensor([0.2204, 0.3240, 0.2044, 0.2512]) -Greedy action tensor([-1.8457, -0.4594, 0.6261, -0.1260]) tensor([0.0446, 0.1784, 0.5281, 0.2489]) -Greedy action tensor([-1.9016, -0.4049, 0.6416, -0.1581]) tensor([0.0418, 0.1869, 0.5321, 0.2392]) -Greedy action tensor([-1.9395, -0.4509, 0.6690, -0.1748]) tensor([0.0402, 0.1783, 0.5465, 0.2350]) -Greedy action tensor([-1.2933, 0.2612, 0.5504, 0.4129]) tensor([0.0569, 0.2695, 0.3599, 0.3137]) -Greedy action tensor([-1.9422, -0.9645, 0.7080, -0.0946]) tensor([0.0414, 0.1100, 0.5860, 0.2626]) -Greedy action tensor([-1.6639, -0.2715, 0.5748, -0.0181]) tensor([0.0510, 0.2054, 0.4789, 0.2647]) -Greedy action tensor([-1.8454, -0.4055, 0.6191, -0.1297]) tensor([0.0444, 0.1873, 0.5217, 0.2467]) -Greedy action tensor([-1.8232, -0.2308, 0.5922, -0.0913]) tensor([0.0439, 0.2160, 0.4918, 0.2483]) -Greedy action tensor([-1.3055, -0.4664, 0.4402, 0.4017]) tensor([0.0687, 0.1590, 0.3936, 0.3787]) -Greedy action tensor([-1.8999, -0.4485, 0.6476, -0.1591]) tensor([0.0421, 0.1798, 0.5380, 0.2401]) -Greedy action tensor([-1.8432, -0.4276, 0.6400, -0.1015]) tensor([0.0439, 0.1806, 0.5253, 0.2503]) -Greedy action tensor([-1.1723, -0.0897, 0.2166, 0.3326]) tensor([0.0802, 0.2368, 0.3217, 0.3613]) -Greedy action tensor([-1.7408, -0.5176, 0.7183, 0.1233]) tensor([0.0444, 0.1507, 0.5188, 0.2861]) -Greedy action tensor([-1.9002, -0.3749, 0.6424, -0.1604]) tensor([0.0417, 0.1915, 0.5296, 0.2373]) -Greedy action tensor([-0.6455, 0.7111, 0.3183, 0.7256]) tensor([0.0874, 0.3393, 0.2291, 0.3442]) -Greedy action tensor([-1.9127, -0.3724, 0.6483, -0.1424]) tensor([0.0408, 0.1905, 0.5288, 0.2398]) -Greedy action tensor([-1.9176, -0.4523, 0.6494, -0.1708]) tensor([0.0415, 0.1797, 0.5407, 0.2381]) -Greedy action tensor([-1.5574, 0.1132, 0.5593, -0.6112]) tensor([0.0582, 0.3091, 0.4829, 0.1498]) -Greedy action tensor([-1.8867, -0.4495, 0.6384, -0.1557]) tensor([0.0428, 0.1803, 0.5350, 0.2419]) -Greedy action tensor([-1.2852, -0.5043, 0.4217, 0.3481]) tensor([0.0724, 0.1580, 0.3990, 0.3706]) -Greedy action tensor([-0.5758, -0.5523, 0.1605, 0.2968]) tensor([0.1537, 0.1574, 0.3210, 0.3679]) -Greedy action tensor([-1.9077, -0.4139, 0.6472, -0.1604]) tensor([0.0416, 0.1851, 0.5348, 0.2385]) -Greedy action tensor([-1.8638, -0.4544, 0.6671, -0.1251]) tensor([0.0428, 0.1753, 0.5382, 0.2437]) -Greedy action tensor([-1.5463, 0.1411, 0.5077, 0.1362]) tensor([0.0511, 0.2760, 0.3982, 0.2747]) -Greedy action tensor([-1.0979, -0.6329, 0.2331, 0.2793]) tensor([0.0967, 0.1540, 0.3660, 0.3833]) -Greedy action tensor([-1.7943, -0.4455, 0.6312, -0.0205]) tensor([0.0453, 0.1747, 0.5127, 0.2672]) -Greedy action tensor([-0.0591, 0.9137, 0.0739, 0.6570]) tensor([0.1463, 0.3871, 0.1671, 0.2995]) -Greedy action tensor([-0.5132, 0.0637, 0.9331, 1.4804]) tensor([0.0696, 0.1239, 0.2956, 0.5109]) -Greedy action tensor([-1.9421, -0.4454, 0.6657, -0.1793]) tensor([0.0402, 0.1796, 0.5457, 0.2344]) -Greedy action tensor([-1.8991, -0.3531, 0.6378, -0.1534]) tensor([0.0416, 0.1950, 0.5253, 0.2381]) -Greedy action tensor([-1.7807, -0.3633, 0.6430, -0.0807]) tensor([0.0457, 0.1885, 0.5157, 0.2501]) -Greedy action tensor([-1.6143, -0.5814, 0.5143, 0.0199]) tensor([0.0577, 0.1620, 0.4847, 0.2956]) -Greedy action tensor([-1.8908, -0.4503, 0.6429, -0.1487]) tensor([0.0425, 0.1794, 0.5355, 0.2426]) -Greedy action tensor([-1.9424, -0.4532, 0.6710, -0.1770]) tensor([0.0401, 0.1779, 0.5475, 0.2345]) -Greedy action tensor([-1.3979, 0.0681, 0.5494, 0.1719]) tensor([0.0583, 0.2526, 0.4088, 0.2803]) -Greedy action tensor([-1.9332, -0.4459, 0.6686, -0.1698]) tensor([0.0404, 0.1788, 0.5451, 0.2357]) -Greedy action tensor([-1.9198, -0.4491, 0.6561, -0.1668]) tensor([0.0412, 0.1793, 0.5416, 0.2379]) -Greedy action tensor([-1.3601, 0.1113, 0.5751, 0.4265]) tensor([0.0548, 0.2386, 0.3795, 0.3271]) -Greedy action tensor([-1.8690, -0.4061, 0.6290, -0.1341]) tensor([0.0432, 0.1866, 0.5253, 0.2449]) -Greedy action tensor([-1.8398, -0.3312, 0.6144, -0.1133]) tensor([0.0439, 0.1985, 0.5109, 0.2468]) -Greedy action tensor([-1.9046, -0.4153, 0.6455, -0.1580]) tensor([0.0417, 0.1849, 0.5342, 0.2392]) -Greedy action tensor([-0.7189, 0.1163, 0.3105, 0.4704]) tensor([0.1065, 0.2455, 0.2981, 0.3498]) -Greedy action tensor([-1.1544, -0.5634, 0.2716, 0.2242]) tensor([0.0914, 0.1651, 0.3805, 0.3629]) -Greedy action tensor([-1.7255, 0.2833, 0.4422, -0.0037]) tensor([0.0439, 0.3271, 0.3835, 0.2455]) -Greedy action tensor([-1.8901, -0.4317, 0.6394, -0.1408]) tensor([0.0424, 0.1822, 0.5317, 0.2437]) -Greedy action tensor([-0.6430, -0.7404, 0.9604, 1.5538]) tensor([0.0630, 0.0572, 0.3131, 0.5667]) -Greedy action tensor([-1.5363, -0.4840, 0.5011, 0.2269]) tensor([0.0576, 0.1649, 0.4417, 0.3358]) -Greedy action tensor([-1.8637, -0.1211, 0.5755, -0.1159]) tensor([0.0418, 0.2388, 0.4793, 0.2401]) -Greedy action tensor([-1.8255, -0.4082, 0.6086, -0.1136]) tensor([0.0453, 0.1869, 0.5168, 0.2510]) -Greedy action tensor([-1.6626, -0.1645, 0.4764, -0.0495]) tensor([0.0527, 0.2357, 0.4473, 0.2644]) -Greedy action tensor([-1.7292, -0.3889, 0.5673, -0.0406]) tensor([0.0496, 0.1894, 0.4928, 0.2683]) -Greedy action tensor([-1.4008, -0.2592, 0.5523, 0.4764]) tensor([0.0564, 0.1768, 0.3979, 0.3689]) -Greedy action tensor([-1.6993, 0.0435, 0.4910, 0.0129]) tensor([0.0472, 0.2696, 0.4217, 0.2615]) -Greedy action tensor([-1.5512, -0.4419, 0.6412, 0.3698]) tensor([0.0505, 0.1530, 0.4520, 0.3446]) -Greedy action tensor([-1.9045, -0.4176, 0.6439, -0.1493]) tensor([0.0417, 0.1844, 0.5329, 0.2411]) -Greedy action tensor([-1.6473, -0.4919, 0.5636, 0.1487]) tensor([0.0517, 0.1643, 0.4722, 0.3118]) -Greedy action tensor([-1.9148, -0.3666, 0.6448, -0.1486]) tensor([0.0408, 0.1921, 0.5282, 0.2389]) -Greedy action tensor([-1.7304, 0.1233, 0.5108, -0.0371]) tensor([0.0450, 0.2872, 0.4231, 0.2446]) -Greedy action tensor([-1.6001, -0.4472, 0.5684, -0.2133]) tensor([0.0591, 0.1873, 0.5170, 0.2366]) -Greedy action tensor([-1.7551, -0.4505, 0.6720, 0.1705]) tensor([0.0437, 0.1612, 0.4952, 0.2999]) -Greedy action tensor([-1.8940, -0.4386, 0.6427, -0.1537]) tensor([0.0423, 0.1814, 0.5350, 0.2413]) -Greedy action tensor([-0.7386, 0.2142, 0.3616, 0.6152]) tensor([0.0955, 0.2477, 0.2870, 0.3698]) -Greedy action tensor([-1.9370, -0.4430, 0.6648, -0.1753]) tensor([0.0404, 0.1799, 0.5446, 0.2351]) -Greedy action tensor([-1.9436, -0.4476, 0.6682, -0.1779]) tensor([0.0401, 0.1790, 0.5464, 0.2345]) -Greedy action tensor([-1.9380, -0.4529, 0.6653, -0.1764]) tensor([0.0404, 0.1784, 0.5459, 0.2353]) -Greedy action tensor([-1.3631, -0.2233, 0.4144, 0.3002]) tensor([0.0653, 0.2041, 0.3862, 0.3445]) -Greedy action tensor([-1.8897, -0.4028, 0.6557, -0.1356]) tensor([0.0418, 0.1847, 0.5323, 0.2413]) -Greedy action tensor([-1.7209, -0.3880, 0.5565, -0.0723]) tensor([0.0507, 0.1921, 0.4939, 0.2634]) -Greedy action tensor([-1.9373, -0.4496, 0.6642, -0.1740]) tensor([0.0404, 0.1789, 0.5450, 0.2357]) -Greedy action tensor([-1.8682, -0.3654, 0.6315, -0.1325]) tensor([0.0428, 0.1925, 0.5217, 0.2430]) -Greedy action tensor([-1.1536, -0.5823, 0.2202, 0.2951]) tensor([0.0911, 0.1613, 0.3598, 0.3878]) -Greedy action tensor([-1.8897, -0.4289, 0.6402, -0.1508]) tensor([0.0425, 0.1830, 0.5330, 0.2416]) -Greedy action tensor([-1.9339, -0.4453, 0.6595, -0.1759]) tensor([0.0406, 0.1801, 0.5436, 0.2357]) -Greedy action tensor([-1.9221, -0.4279, 0.6539, -0.1688]) tensor([0.0410, 0.1828, 0.5393, 0.2369]) -Greedy action tensor([-1.1241, -0.7158, 0.9814, 1.0817]) tensor([0.0505, 0.0760, 0.4148, 0.4586]) -Greedy action tensor([-1.7331, -0.3456, 0.6100, 0.0272]) tensor([0.0471, 0.1886, 0.4904, 0.2738]) -Greedy action tensor([-0.7631, 0.6987, 0.1215, -0.0550]) tensor([0.1024, 0.4417, 0.2480, 0.2079]) -Greedy action tensor([-1.3038, -0.4212, 0.5962, 0.7033]) tensor([0.0570, 0.1378, 0.3811, 0.4242]) -Greedy action tensor([-1.8353, -0.2354, 0.6098, -0.0791]) tensor([0.0430, 0.2128, 0.4954, 0.2488]) -Greedy action tensor([-1.7930, -0.0375, 0.5587, -0.0480]) tensor([0.0435, 0.2514, 0.4564, 0.2488]) -Greedy action tensor([-1.7015, -0.0050, 0.4606, -0.0086]) tensor([0.0486, 0.2651, 0.4222, 0.2641]) -Greedy action tensor([ 0.5239, -0.3929, 0.0858, 0.4435]) tensor([0.3369, 0.1347, 0.2174, 0.3109]) -Greedy action tensor([ 2.0075, -0.8393, -0.6251, 0.7350]) tensor([0.7092, 0.0412, 0.0510, 0.1987]) -Greedy action tensor([ 2.5043, -1.2309, -0.2460, 0.5160]) tensor([0.8165, 0.0195, 0.0522, 0.1118]) -Greedy action tensor([ 1.6170, -0.4792, -0.4799, 0.5506]) tensor([0.6289, 0.0773, 0.0773, 0.2165]) -Greedy action tensor([ 1.2887, -0.5819, -0.4080, 0.2013]) tensor([0.5972, 0.0920, 0.1095, 0.2013]) -Greedy action tensor([ 1.6707, -0.3883, -0.7533, 0.4501]) tensor([0.6617, 0.0844, 0.0586, 0.1952]) -Greedy action tensor([ 1.2797, -0.3755, -0.1387, -0.0820]) tensor([0.5919, 0.1131, 0.1433, 0.1517]) -Greedy action tensor([ 1.7159, -0.1794, -0.8231, 0.0719]) tensor([0.7030, 0.1056, 0.0555, 0.1358]) -Greedy action tensor([ 1.7282, -0.6445, -0.3755, 0.2513]) tensor([0.6927, 0.0646, 0.0845, 0.1582]) -Greedy action tensor([ 1.3066, -0.4987, -0.3733, 0.5939]) tensor([0.5431, 0.0893, 0.1012, 0.2663]) -Greedy action tensor([ 1.6437, -0.4331, -0.6525, 0.2800]) tensor([0.6749, 0.0846, 0.0679, 0.1726]) -Greedy action tensor([ 1.4682, -0.4677, -0.0747, -0.0258]) tensor([0.6319, 0.0912, 0.1351, 0.1418]) -Greedy action tensor([ 1.4074, -0.6582, -0.4477, 0.4975]) tensor([0.5932, 0.0752, 0.0928, 0.2388]) -Greedy action tensor([ 1.7435, -0.6899, -0.7153, 0.2626]) tensor([0.7139, 0.0626, 0.0611, 0.1624]) -Greedy action tensor([ 1.4923, -0.5197, -0.4974, 0.0061]) tensor([0.6681, 0.0893, 0.0914, 0.1512]) -Greedy action tensor([ 1.2529, -0.6772, -0.3036, 0.2601]) tensor([0.5792, 0.0841, 0.1221, 0.2146]) -Greedy action tensor([ 1.0151, -0.6743, -0.4548, 0.2533]) tensor([0.5315, 0.0981, 0.1222, 0.2481]) -Greedy action tensor([ 1.2765, -0.2769, -0.1972, 0.1454]) tensor([0.5671, 0.1200, 0.1299, 0.1830]) -Greedy action tensor([ 1.7085, -0.5173, -0.7405, 0.1499]) tensor([0.7119, 0.0769, 0.0615, 0.1498]) -Greedy action tensor([ 1.3992, -0.8380, -0.3652, 0.4119]) tensor([0.6058, 0.0647, 0.1038, 0.2257]) -Greedy action tensor([ 1.1970, -0.1949, -0.2021, 0.3906]) tensor([0.5150, 0.1280, 0.1271, 0.2299]) -Greedy action tensor([ 1.7959, -0.7193, -0.3541, 0.7805]) tensor([0.6412, 0.0518, 0.0747, 0.2323]) -Greedy action tensor([ 2.0852, -1.2999, 0.1583, 0.5987]) tensor([0.7114, 0.0241, 0.1036, 0.1609]) -Greedy action tensor([ 1.6823, 0.0711, -0.2423, 0.7026]) tensor([0.5811, 0.1160, 0.0848, 0.2181]) -Greedy action tensor([ 1.4756, -0.2850, -0.4627, 0.3627]) tensor([0.6081, 0.1046, 0.0875, 0.1998]) -Greedy action tensor([ 2.2207, -1.1983, -0.1642, 0.7711]) tensor([0.7356, 0.0241, 0.0677, 0.1726]) -Greedy action tensor([ 0.8245, -0.7127, -0.2696, 0.2792]) tensor([0.4696, 0.1010, 0.1572, 0.2722]) -Greedy action tensor([ 0.9135, -0.0229, -0.3874, 0.5468]) tensor([0.4242, 0.1663, 0.1155, 0.2940]) -Greedy action tensor([ 1.7254, -0.1460, -0.1092, 0.7060]) tensor([0.5972, 0.0919, 0.0954, 0.2155]) -Greedy action tensor([ 1.1627, -0.3428, -0.2961, 0.2844]) tensor([0.5348, 0.1187, 0.1243, 0.2222]) -Greedy action tensor([ 2.0103, -1.2462, -0.2147, 0.7860]) tensor([0.6942, 0.0267, 0.0750, 0.2041]) -Greedy action tensor([ 1.8221, -0.2932, -0.9272, 0.5496]) tensor([0.6827, 0.0823, 0.0437, 0.1912]) -Greedy action tensor([ 1.2237, -0.1311, -0.4456, 0.2997]) tensor([0.5425, 0.1400, 0.1022, 0.2153]) -Greedy action tensor([ 1.4236, -0.4742, -0.1238, 0.6459]) tensor([0.5488, 0.0823, 0.1168, 0.2521]) -Greedy action tensor([ 2.3066, -1.2175, -0.3657, 0.5114]) tensor([0.7907, 0.0233, 0.0546, 0.1313]) -Greedy action tensor([ 1.0737, -0.3896, -0.2989, 0.4711]) tensor([0.4921, 0.1139, 0.1247, 0.2693]) -Greedy action tensor([ 0.8804, -0.2124, -0.4633, 0.3742]) tensor([0.4548, 0.1525, 0.1186, 0.2741]) -Greedy action tensor([ 1.1586, -0.6038, -0.1463, 0.6196]) tensor([0.4936, 0.0847, 0.1338, 0.2879]) -Greedy action tensor([ 1.5615, -0.8461, -0.0437, 0.3279]) tensor([0.6321, 0.0569, 0.1270, 0.1841]) -Greedy action tensor([ 1.6694, -0.9998, -0.4618, 0.6340]) tensor([0.6480, 0.0449, 0.0769, 0.2301]) -Greedy action tensor([ 0.9141, -0.1535, -0.1752, 0.1564]) tensor([0.4653, 0.1600, 0.1566, 0.2181]) -Greedy action tensor([ 0.8392, -0.6204, -0.3582, 0.2527]) tensor([0.4783, 0.1111, 0.1445, 0.2661]) -Greedy action tensor([ 1.4443, -0.6618, -0.4293, -0.0638]) tensor([0.6682, 0.0813, 0.1026, 0.1479]) -Greedy action tensor([ 1.8212, -0.9310, -0.2641, 0.6928]) tensor([0.6615, 0.0422, 0.0822, 0.2141]) -Greedy action tensor([ 1.7551, -1.0167, -0.2191, 0.2310]) tensor([0.7046, 0.0441, 0.0979, 0.1535]) -Greedy action tensor([ 1.1645, 0.0569, -0.5082, 0.5061]) tensor([0.4912, 0.1623, 0.0922, 0.2543]) -Greedy action tensor([ 1.9986, 0.1077, -0.1031, 0.1431]) tensor([0.6995, 0.1056, 0.0855, 0.1094]) -Greedy action tensor([ 1.2626, -0.0608, -0.6083, 0.3046]) tensor([0.5544, 0.1476, 0.0854, 0.2127]) -Greedy action tensor([ 1.2737, -0.4421, -0.5167, 0.3478]) tensor([0.5738, 0.1032, 0.0958, 0.2273]) -Greedy action tensor([ 1.2586, -0.5431, -0.5090, 0.1667]) tensor([0.5983, 0.0987, 0.1022, 0.2008]) -Greedy action tensor([ 1.1549, -0.3819, -0.2861, -0.0638]) tensor([0.5723, 0.1231, 0.1355, 0.1692]) -Greedy action tensor([ 2.1410, -1.0428, -0.4491, 0.4070]) tensor([0.7734, 0.0320, 0.0580, 0.1366]) -Greedy action tensor([ 2.2751, -0.6827, -0.8595, 0.5330]) tensor([0.7870, 0.0409, 0.0342, 0.1378]) -Greedy action tensor([ 1.3272, -0.8348, -0.0463, 0.2512]) tensor([0.5851, 0.0673, 0.1481, 0.1995]) -Greedy action tensor([ 1.9536, -0.3565, -0.5073, 0.1905]) tensor([0.7374, 0.0732, 0.0629, 0.1265]) -Greedy action tensor([ 1.7378, -0.3056, -0.2994, 0.3828]) tensor([0.6588, 0.0854, 0.0859, 0.1699]) -Greedy action tensor([ 1.3642, 0.1737, -0.2812, 0.3167]) tensor([0.5412, 0.1646, 0.1044, 0.1899]) -Greedy action tensor([ 1.0036, -0.1129, -0.2824, 0.1836]) tensor([0.4892, 0.1602, 0.1352, 0.2155]) -Greedy action tensor([ 1.7811, 0.2146, -0.1611, 0.2830]) tensor([0.6346, 0.1325, 0.0910, 0.1419]) -Greedy action tensor([ 1.4901, -0.4113, -0.7959, 0.6795]) tensor([0.5898, 0.0881, 0.0600, 0.2622]) -Greedy action tensor([ 1.5595, -0.7672, -0.3995, 0.6839]) tensor([0.6041, 0.0590, 0.0852, 0.2517]) -Greedy action tensor([ 1.2059, -0.3934, -0.1913, 0.1202]) tensor([0.5596, 0.1131, 0.1384, 0.1890]) -Greedy action tensor([ 1.0706, -0.5347, -0.2268, 0.4036]) tensor([0.5032, 0.1011, 0.1375, 0.2583]) -Greedy action tensor([ 1.1358, 0.2131, -0.8482, 0.2763]) tensor([0.5107, 0.2029, 0.0702, 0.2162]) -Greedy action tensor([ 1.9921, -1.1827, -0.0678, 0.7065]) tensor([0.6917, 0.0289, 0.0882, 0.1912]) -Greedy action tensor([ 1.9471, 0.5122, -0.2004, 0.3052]) tensor([0.6458, 0.1538, 0.0754, 0.1250]) -Greedy action tensor([ 1.3646, -0.6609, -0.3608, 0.5376]) tensor([0.5723, 0.0755, 0.1019, 0.2503]) -Greedy action tensor([ 1.6481, -0.6772, -0.2235, 0.9048]) tensor([0.5790, 0.0566, 0.0891, 0.2753]) -Greedy action tensor([ 1.5131, -0.1448, -0.5220, 0.3689]) tensor([0.6099, 0.1162, 0.0797, 0.1942]) -Greedy action tensor([ 2.3885, -1.1954, -0.1804, 0.9425]) tensor([0.7463, 0.0207, 0.0572, 0.1758]) -Greedy action tensor([ 1.4191, -0.7715, -0.2923, 0.4593]) tensor([0.5969, 0.0668, 0.1078, 0.2286]) -Greedy action tensor([ 1.6465, -0.5850, -0.1427, 0.2812]) tensor([0.6537, 0.0702, 0.1092, 0.1669]) -Greedy action tensor([ 1.1740, -0.5304, 0.0046, 0.3603]) tensor([0.5166, 0.0940, 0.1604, 0.2290]) -Greedy action tensor([ 2.0061, -0.8414, -0.4157, 0.2420]) tensor([0.7587, 0.0440, 0.0673, 0.1300]) -Greedy action tensor([ 1.4855, -0.7794, -0.1721, 0.1264]) tensor([0.6446, 0.0669, 0.1229, 0.1656]) -Greedy action tensor([ 1.4450, 0.1547, -0.0671, 0.6895]) tensor([0.5088, 0.1400, 0.1122, 0.2390]) -Greedy action tensor([ 1.7701, -0.2783, -0.6764, 0.1840]) tensor([0.7041, 0.0908, 0.0610, 0.1441]) -Greedy action tensor([ 2.0294, -0.2876, -0.1810, 0.3609]) tensor([0.7160, 0.0706, 0.0785, 0.1350]) -Greedy action tensor([ 1.6846, -0.7732, -0.3953, 0.4509]) tensor([0.6659, 0.0570, 0.0832, 0.1939]) -Greedy action tensor([ 1.3303, -0.6155, -0.1266, 0.1253]) tensor([0.5968, 0.0853, 0.1390, 0.1789]) -Greedy action tensor([ 1.6249, -0.7111, -0.3135, 0.3851]) tensor([0.6536, 0.0632, 0.0941, 0.1892]) -Greedy action tensor([ 0.4038, -0.0224, -0.3940, -0.2251]) tensor([0.3793, 0.2477, 0.1708, 0.2022]) -Greedy action tensor([ 0.4048, -0.0146, -0.2293, -0.2476]) tensor([0.3692, 0.2427, 0.1958, 0.1923]) -Greedy action tensor([ 0.4228, -0.0363, -0.0680, -0.0337]) tensor([0.3475, 0.2196, 0.2127, 0.2202]) -Greedy action tensor([ 0.4423, -0.1203, -0.0340, -0.1355]) tensor([0.3634, 0.2070, 0.2257, 0.2039]) -Greedy action tensor([ 0.4550, -0.3079, -0.0146, -0.1902]) tensor([0.3822, 0.1782, 0.2390, 0.2005]) -Greedy action tensor([ 0.5397, -0.0481, -0.0178, -0.1001]) tensor([0.3766, 0.2092, 0.2156, 0.1986]) -Greedy action tensor([ 0.8678, -0.6473, -0.1207, -0.2509]) tensor([0.5212, 0.1146, 0.1939, 0.1703]) -Greedy action tensor([ 0.7817, -0.2794, -0.0875, -0.1773]) tensor([0.4654, 0.1611, 0.1951, 0.1784]) -Greedy action tensor([ 0.7647, -0.1672, 0.0731, -0.2866]) tensor([0.4456, 0.1755, 0.2232, 0.1557]) -Greedy action tensor([ 0.3462, -0.2356, -0.0876, -0.3103]) tensor([0.3669, 0.2051, 0.2378, 0.1903]) -Greedy action tensor([ 0.6833, -0.5157, -0.0176, -0.6026]) tensor([0.4821, 0.1454, 0.2392, 0.1333]) -Greedy action tensor([ 0.9834, -0.8520, 0.2347, -0.7160]) tensor([0.5509, 0.0879, 0.2605, 0.1007]) -Greedy action tensor([ 0.6434, -0.1283, 0.0518, -0.0209]) tensor([0.3952, 0.1827, 0.2187, 0.2034]) -Greedy action tensor([ 0.2913, -0.1012, -0.1182, -0.1613]) tensor([0.3361, 0.2270, 0.2232, 0.2138]) -Greedy action tensor([ 0.9695, -0.5986, -0.0285, -0.3428]) tensor([0.5416, 0.1129, 0.1997, 0.1458]) -Greedy action tensor([ 0.6785, -0.6707, -0.1415, -0.3276]) tensor([0.4841, 0.1256, 0.2132, 0.1770]) -Greedy action tensor([ 0.2444, 0.0296, -0.1116, -0.2621]) tensor([0.3216, 0.2594, 0.2252, 0.1938]) -Greedy action tensor([ 0.2237, 0.0977, -0.0274, -0.0029]) tensor([0.2893, 0.2550, 0.2251, 0.2306]) -Greedy action tensor([ 1.4344, -0.7238, 0.3453, -0.5480]) tensor([0.6290, 0.0727, 0.2117, 0.0866]) -Greedy action tensor([ 0.9145, -0.3563, 0.0242, -0.4207]) tensor([0.5117, 0.1436, 0.2101, 0.1346]) -Greedy action tensor([ 1.1055, -0.5953, -0.0385, -0.5764]) tensor([0.5927, 0.1082, 0.1888, 0.1103]) -Greedy action tensor([ 0.6681, -0.4722, 0.0533, -0.4189]) tensor([0.4550, 0.1455, 0.2461, 0.1534]) -Greedy action tensor([ 0.6785, -0.3793, -0.2235, -0.0713]) tensor([0.4494, 0.1560, 0.1823, 0.2123]) -Greedy action tensor([ 0.7326, -0.6016, -0.1290, -0.6644]) tensor([0.5173, 0.1362, 0.2185, 0.1279]) -Greedy action tensor([ 0.9549, -0.5873, 0.1687, -0.5069]) tensor([0.5260, 0.1125, 0.2396, 0.1219]) -Greedy action tensor([ 1.0200, -0.6620, -0.0087, -0.6568]) tensor([0.5779, 0.1075, 0.2066, 0.1080]) -Greedy action tensor([ 0.5604, -0.2113, -0.0336, -0.1382]) tensor([0.3981, 0.1840, 0.2198, 0.1980]) -Greedy action tensor([ 0.3874, 0.2119, -0.0294, -0.2251]) tensor([0.3289, 0.2760, 0.2168, 0.1783]) -Greedy action tensor([ 0.7452, -0.4974, 0.1062, -0.3610]) tensor([0.4657, 0.1344, 0.2458, 0.1541]) -Greedy action tensor([ 0.5186, 0.0828, -0.0402, -0.0774]) tensor([0.3611, 0.2335, 0.2065, 0.1989]) -Greedy action tensor([ 0.5683, -0.2804, -0.0999, -0.0462]) tensor([0.4030, 0.1725, 0.2066, 0.2180]) -Greedy action tensor([ 0.5750, -0.5371, -0.0713, -0.1082]) tensor([0.4241, 0.1395, 0.2222, 0.2142]) -Greedy action tensor([ 0.3718, 0.2060, -0.1527, -0.3249]) tensor([0.3404, 0.2884, 0.2015, 0.1696]) -Greedy action tensor([ 0.4959, -0.0185, 0.0080, -0.2371]) tensor([0.3714, 0.2221, 0.2280, 0.1785]) -Greedy action tensor([ 0.6045, -0.2565, 0.0234, -0.4219]) tensor([0.4273, 0.1806, 0.2390, 0.1531]) -Greedy action tensor([ 0.5435, -0.5021, -0.1141, -0.1793]) tensor([0.4246, 0.1492, 0.2200, 0.2061]) -Greedy action tensor([ 0.5306, -0.2008, -0.0482, -0.0374]) tensor([0.3834, 0.1845, 0.2149, 0.2172]) -Greedy action tensor([ 0.9484, -0.3151, -0.0305, -0.2393]) tensor([0.5093, 0.1440, 0.1914, 0.1553]) -Greedy action tensor([ 0.8167, -0.3032, -0.2758, -0.1655]) tensor([0.4911, 0.1603, 0.1647, 0.1839]) -Greedy action tensor([ 1.4360, -0.6381, -0.1847, -0.8069]) tensor([0.6995, 0.0879, 0.1383, 0.0743]) -Greedy action tensor([ 0.9324, -0.5831, -0.0082, -0.3106]) tensor([0.5267, 0.1157, 0.2056, 0.1520]) -Greedy action tensor([ 0.6219, 0.1214, -0.1648, -0.2594]) tensor([0.4039, 0.2449, 0.1839, 0.1673]) -Greedy action tensor([ 1.1581, -0.5284, 0.0859, -0.3088]) tensor([0.5688, 0.1053, 0.1947, 0.1312]) -Greedy action tensor([ 0.2960, 0.0376, -0.1283, -0.0601]) tensor([0.3198, 0.2470, 0.2092, 0.2240]) -Greedy action tensor([ 0.5044, -0.1547, 0.0467, -0.0730]) tensor([0.3688, 0.1908, 0.2334, 0.2070]) -Greedy action tensor([ 0.5096, -0.1500, 0.0552, -0.0218]) tensor([0.3650, 0.1887, 0.2317, 0.2145]) -Greedy action tensor([ 0.5688, -0.4261, -0.1350, -0.0164]) tensor([0.4130, 0.1527, 0.2043, 0.2300]) -Greedy action tensor([ 0.5026, 0.0073, -0.1762, -0.0412]) tensor([0.3708, 0.2259, 0.1881, 0.2152]) -Greedy action tensor([ 0.7071, -0.7381, -0.0613, -0.1291]) tensor([0.4689, 0.1105, 0.2174, 0.2032]) -Greedy action tensor([ 0.5162, -0.4618, -0.1281, -0.4268]) tensor([0.4366, 0.1642, 0.2292, 0.1700]) -Greedy action tensor([ 0.6954, -0.0702, -0.2085, -0.1873]) tensor([0.4379, 0.2036, 0.1773, 0.1811]) -Greedy action tensor([ 0.9412, -0.2058, -0.1021, -0.3442]) tensor([0.5138, 0.1632, 0.1810, 0.1421]) -Greedy action tensor([ 0.8217, -0.5777, 0.1266, -0.6896]) tensor([0.5085, 0.1255, 0.2538, 0.1122]) -Greedy action tensor([ 0.7316, -0.1534, -0.3142, 0.1158]) tensor([0.4340, 0.1791, 0.1525, 0.2344]) -Greedy action tensor([-0.0870, -0.1383, -0.1495, -0.2586]) tensor([0.2680, 0.2546, 0.2517, 0.2257]) -Greedy action tensor([ 0.0974, 0.5553, -0.1187, -0.3762]) tensor([0.2494, 0.3943, 0.2009, 0.1553]) -Greedy action tensor([ 0.4713, 0.0518, -0.0499, -0.0326]) tensor([0.3502, 0.2302, 0.2080, 0.2116]) -Greedy action tensor([ 0.5606, -0.3767, -0.0467, -0.0364]) tensor([0.4021, 0.1575, 0.2191, 0.2213]) -Greedy action tensor([ 0.3745, 0.2296, -0.0560, -0.1467]) tensor([0.3216, 0.2783, 0.2091, 0.1910]) -Greedy action tensor([ 0.4935, -0.0674, -0.0227, -0.1672]) tensor([0.3726, 0.2126, 0.2224, 0.1924]) -Greedy action tensor([ 0.5477, -0.4543, -0.0291, -0.3803]) tensor([0.4302, 0.1580, 0.2417, 0.1701]) -Greedy action tensor([ 0.4196, -0.2458, -0.0646, -0.3034]) tensor([0.3823, 0.1965, 0.2356, 0.1855]) -Greedy action tensor([ 0.7337, -0.5634, 0.0232, -0.2634]) tensor([0.4687, 0.1281, 0.2303, 0.1729]) -Greedy action tensor([ 0.7272, -0.5950, -0.0436, -0.4033]) tensor([0.4873, 0.1299, 0.2255, 0.1573]) -Greedy action tensor([ 0.3219, -0.1083, -0.0408, -0.0193]) tensor([0.3271, 0.2127, 0.2276, 0.2325]) -Greedy action tensor([ 0.6460, -0.6600, -0.1285, -0.2375]) tensor([0.4662, 0.1263, 0.2149, 0.1927]) -Greedy action tensor([ 0.6314, -0.3042, -0.1721, -0.1309]) tensor([0.4335, 0.1701, 0.1941, 0.2023]) -Greedy action tensor([ 0.7479, -0.3200, -0.0349, -0.0039]) tensor([0.4401, 0.1513, 0.2012, 0.2075]) -Greedy action tensor([ 0.7615, -0.7249, 0.0237, -0.2672]) tensor([0.4850, 0.1097, 0.2319, 0.1734]) -Greedy action tensor([ 0.9098, -0.6476, -0.0719, -0.7065]) tensor([0.5605, 0.1181, 0.2100, 0.1113]) -Greedy action tensor([ 0.5149, -0.3185, -0.0925, -0.2577]) tensor([0.4097, 0.1780, 0.2232, 0.1892]) -Greedy action tensor([ 0.7821, -0.6119, -0.1384, -0.3320]) tensor([0.5064, 0.1256, 0.2017, 0.1662]) -Greedy action tensor([ 0.9096, -0.5701, -0.0309, -0.2999]) tensor([0.5218, 0.1188, 0.2037, 0.1557]) -Greedy action tensor([ 0.3649, -0.0492, 0.2242, 0.2438]) tensor([0.2928, 0.1935, 0.2543, 0.2594]) -Greedy action tensor([ 0.5115, 0.0071, -0.2762, -0.2185]) tensor([0.3936, 0.2377, 0.1790, 0.1897]) -Greedy action tensor([ 0.7214, -0.4273, 0.0626, -0.1937]) tensor([0.4474, 0.1418, 0.2315, 0.1792]) -Greedy action tensor([ 1.1489, -0.6287, -0.1109, -0.6561]) tensor([0.6183, 0.1045, 0.1754, 0.1017]) -Greedy action tensor([ 0.4240, -0.0906, -0.0766, -0.4466]) tensor([0.3813, 0.2279, 0.2311, 0.1597]) -Greedy action tensor([ 1.1065, -0.4722, 0.0092, -0.4075]) tensor([0.5681, 0.1172, 0.1896, 0.1250]) -Greedy action tensor([ 0.9259, -0.8144, -0.1098, -0.5335]) tensor([0.5673, 0.0995, 0.2014, 0.1318]) -Greedy action tensor([ 0.6625, -0.3594, 0.3547, -0.2061]) tensor([0.3977, 0.1431, 0.2923, 0.1668]) -Greedy action tensor([ 0.4132, -0.1999, 0.1803, -0.1043]) tensor([0.3413, 0.1849, 0.2704, 0.2034]) -Greedy action tensor([-0.6713, -0.6951, 1.4000, -0.4122]) tensor([0.0892, 0.0871, 0.7080, 0.1156]) -Greedy action tensor([-0.5510, -0.8625, -0.0208, 0.2892]) tensor([0.1740, 0.1274, 0.2956, 0.4030]) -Greedy action tensor([-1.1648, -1.5432, -0.5994, 0.4658]) tensor([0.1169, 0.0801, 0.2058, 0.5972]) -Greedy action tensor([-0.4501, -1.1182, -0.3317, 0.7989]) tensor([0.1633, 0.0837, 0.1838, 0.5693]) -Greedy action tensor([ 0.2803, -0.7331, -0.5866, 0.6549]) tensor([0.3089, 0.1121, 0.1298, 0.4492]) -Greedy action tensor([ 1.5701, -0.4108, 0.3067, 0.5620]) tensor([0.5601, 0.0773, 0.1583, 0.2044]) -Greedy action tensor([-0.0393, -0.0544, -0.2216, 0.7594]) tensor([0.1984, 0.1954, 0.1653, 0.4409]) -Greedy action tensor([-0.3089, -0.8385, 1.4129, -0.2955]) tensor([0.1220, 0.0718, 0.6825, 0.1236]) -Greedy action tensor([-0.0287, -0.3256, 0.4062, -0.0719]) tensor([0.2355, 0.1750, 0.3639, 0.2256]) -Greedy action tensor([ 0.5976, -1.9491, 0.3645, 0.0807]) tensor([0.4054, 0.0318, 0.3211, 0.2418]) -Greedy action tensor([-0.1540, -0.4402, 0.4624, -0.1309]) tensor([0.2161, 0.1623, 0.4003, 0.2212]) -Greedy action tensor([0.6134, 0.1311, 0.0747, 0.1826]) tensor([0.3508, 0.2166, 0.2047, 0.2280]) -Greedy action tensor([-1.1919, -1.5994, -0.0161, -1.3364]) tensor([0.1733, 0.1153, 0.5615, 0.1500]) -Greedy action tensor([ 0.9399, 0.5490, -0.2442, 0.0825]) tensor([0.4155, 0.2811, 0.1272, 0.1763]) -Greedy action tensor([ 1.4097, 0.5772, -0.7221, 0.1531]) tensor([0.5440, 0.2366, 0.0645, 0.1548]) -Greedy action tensor([ 0.8198, -0.0763, 0.6771, -0.4101]) tensor([0.3895, 0.1590, 0.3377, 0.1139]) -Greedy action tensor([-0.5021, -0.7288, -0.8274, -0.3701]) tensor([0.2732, 0.2178, 0.1973, 0.3117]) -Greedy action tensor([ 1.0496, -0.5012, 0.3131, 0.7835]) tensor([0.4070, 0.0863, 0.1948, 0.3119]) -Greedy action tensor([0.3261, 0.1742, 0.6612, 0.7970]) tensor([0.2058, 0.1768, 0.2878, 0.3296]) -Greedy action tensor([ 0.6525, -0.1414, 0.8497, 0.0963]) tensor([0.3083, 0.1394, 0.3755, 0.1768]) -Greedy action tensor([-0.0508, 0.2272, -0.9329, 0.0376]) tensor([0.2613, 0.3451, 0.1082, 0.2855]) -Greedy action tensor([ 0.6336, -0.3099, 0.7340, -0.0197]) tensor([0.3317, 0.1291, 0.3667, 0.1726]) -Greedy action tensor([-0.3544, -1.3251, 2.2891, -0.8228]) tensor([0.0622, 0.0236, 0.8752, 0.0390]) -Greedy action tensor([ 0.5398, -0.7875, 0.8070, 0.4331]) tensor([0.2882, 0.0764, 0.3764, 0.2590]) -Greedy action tensor([ 0.8932, -1.1203, -0.5272, 0.8542]) tensor([0.4279, 0.0571, 0.1034, 0.4116]) -Greedy action tensor([-0.3156, -0.0893, 0.3606, -0.1189]) tensor([0.1839, 0.2306, 0.3616, 0.2239]) -Greedy action tensor([-0.2054, 0.6887, -0.6344, -0.3503]) tensor([0.2016, 0.4928, 0.1312, 0.1744]) -Greedy action tensor([0.9107, 0.2655, 0.8886, 0.1941]) tensor([0.3343, 0.1754, 0.3270, 0.1633]) -Greedy action tensor([-0.0604, -1.4692, 0.9901, 1.0802]) tensor([0.1383, 0.0338, 0.3953, 0.4326]) -Greedy action tensor([-1.0078, -0.4220, 0.5717, -1.1007]) tensor([0.1168, 0.2099, 0.5669, 0.1065]) -Greedy action tensor([-0.0061, -1.3625, -0.0292, 0.1966]) tensor([0.2891, 0.0745, 0.2825, 0.3540]) -Greedy action tensor([ 0.0343, -0.6805, -0.0209, 0.8084]) tensor([0.2172, 0.1063, 0.2055, 0.4710]) -Greedy action tensor([ 1.2825, -0.7056, 1.0902, 0.6847]) tensor([0.3981, 0.0545, 0.3284, 0.2190]) -Greedy action tensor([ 0.1173, -1.3505, -0.1608, 0.1594]) tensor([0.3300, 0.0760, 0.2498, 0.3441]) -Greedy action tensor([ 0.5156, -0.0916, 0.9348, 0.3812]) tensor([0.2538, 0.1383, 0.3860, 0.2219]) -Greedy action tensor([-0.5057, -1.2102, 0.3635, -0.4200]) tensor([0.2013, 0.0995, 0.4800, 0.2193]) -Greedy action tensor([-0.3188, -0.6105, -1.2096, -0.2079]) tensor([0.3054, 0.2281, 0.1253, 0.3412]) -Greedy action tensor([ 0.0230, 0.7895, -0.0645, -0.2046]) tensor([0.2056, 0.4424, 0.1883, 0.1637]) -Greedy action tensor([-0.5023, -2.1865, 0.2937, 0.6725]) tensor([0.1506, 0.0280, 0.3339, 0.4876]) -Greedy action tensor([ 0.5182, 0.7875, 0.2745, -0.7534]) tensor([0.2965, 0.3881, 0.2323, 0.0831]) -Greedy action tensor([-0.8511, -1.1798, -0.4540, -0.1103]) tensor([0.1885, 0.1357, 0.2804, 0.3954]) -Greedy action tensor([ 0.5106, -1.5624, 0.1741, -0.0929]) tensor([0.4189, 0.0527, 0.2992, 0.2291]) -Greedy action tensor([ 0.8976, -0.6049, 1.1507, -0.4611]) tensor([0.3613, 0.0804, 0.4654, 0.0929]) -Greedy action tensor([ 1.5630, -0.0159, 1.1096, 0.4169]) tensor([0.4631, 0.0955, 0.2943, 0.1472]) -Greedy action tensor([ 1.0491, -0.5334, 1.2082, 1.1481]) tensor([0.2872, 0.0590, 0.3367, 0.3171]) -Greedy action tensor([-0.3006, -0.9718, 0.3831, -0.0924]) tensor([0.2117, 0.1082, 0.4194, 0.2607]) -Greedy action tensor([ 0.3135, -0.3382, 0.1629, 0.3928]) tensor([0.2887, 0.1505, 0.2483, 0.3125]) -Greedy action tensor([ 0.7639, 0.3968, -0.0509, -0.6282]) tensor([0.4195, 0.2906, 0.1857, 0.1043]) -Greedy action tensor([-0.3459, -0.1446, -0.8378, 0.2294]) tensor([0.2168, 0.2652, 0.1326, 0.3854]) -Greedy action tensor([-0.8551, -0.3201, 0.4153, -0.0736]) tensor([0.1183, 0.2020, 0.4213, 0.2584]) -Greedy action tensor([-0.0148, -0.2453, 0.1659, 0.2041]) tensor([0.2360, 0.1874, 0.2828, 0.2938]) -Greedy action tensor([ 0.1517, -0.5713, 0.4502, 0.0875]) tensor([0.2652, 0.1287, 0.3574, 0.2487]) -Greedy action tensor([ 0.2656, -0.9130, 0.6204, 0.6949]) tensor([0.2342, 0.0721, 0.3340, 0.3598]) -Greedy action tensor([-0.4166, -0.5403, -0.4641, -0.3476]) tensor([0.2558, 0.2261, 0.2440, 0.2741]) -Greedy action tensor([-0.0334, -1.3142, 1.6620, 0.5517]) tensor([0.1173, 0.0326, 0.6394, 0.2106]) -Greedy action tensor([ 0.8815, -1.4578, 1.5256, 0.9644]) tensor([0.2447, 0.0236, 0.4659, 0.2658]) -Greedy action tensor([-0.8274, -2.2562, -0.4510, 1.0183]) tensor([0.1107, 0.0265, 0.1614, 0.7014]) -Greedy action tensor([-0.2697, -1.6111, -0.1830, -0.9196]) tensor([0.3479, 0.0910, 0.3794, 0.1817]) -Greedy action tensor([ 0.3631, -0.0286, -0.0455, 0.7030]) tensor([0.2670, 0.1805, 0.1774, 0.3751]) -Greedy action tensor([-0.3527, -0.8374, -0.4245, 0.5720]) tensor([0.1973, 0.1215, 0.1837, 0.4975]) -Greedy action tensor([ 0.3642, 0.7201, 0.1569, -1.3362]) tensor([0.2921, 0.4171, 0.2375, 0.0534]) -Greedy action tensor([-0.4600, -0.8547, -0.4346, -0.1729]) tensor([0.2480, 0.1671, 0.2544, 0.3305]) -Greedy action tensor([ 0.4658, -1.0111, 1.1725, 0.4279]) tensor([0.2370, 0.0541, 0.4806, 0.2282]) -Greedy action tensor([ 0.4859, -0.2518, 0.3768, 0.4158]) tensor([0.3024, 0.1446, 0.2711, 0.2819]) -Greedy action tensor([ 0.8152, -1.5118, -0.2307, 0.1537]) tensor([0.5089, 0.0497, 0.1788, 0.2626]) -Greedy action tensor([-0.3941, 0.8875, 0.1632, -1.1230]) tensor([0.1464, 0.5274, 0.2556, 0.0706]) -Greedy action tensor([ 0.7761, -1.8067, 0.3032, 0.4895]) tensor([0.4082, 0.0308, 0.2544, 0.3065]) -Greedy action tensor([ 0.5880, -0.9190, 0.5287, -0.2032]) tensor([0.3821, 0.0847, 0.3601, 0.1732]) -Greedy action tensor([-0.9387, -0.5917, -0.0576, -1.6063]) tensor([0.1872, 0.2649, 0.4519, 0.0960]) -Greedy action tensor([1.3736, 0.4193, 0.4163, 1.2426]) tensor([0.3779, 0.1455, 0.1451, 0.3315]) -Greedy action tensor([-0.2492, -2.3990, 0.0903, 0.4459]) tensor([0.2210, 0.0257, 0.3104, 0.4429]) -Greedy action tensor([ 2.0120, -0.3214, 0.9394, -0.7475]) tensor([0.6656, 0.0645, 0.2277, 0.0421]) -Greedy action tensor([ 0.6138, -0.3437, -0.8239, 1.3414]) tensor([0.2709, 0.1040, 0.0643, 0.5608]) -Greedy action tensor([-0.0746, 0.0644, 1.5562, -0.6681]) tensor([0.1280, 0.1471, 0.6541, 0.0707]) -Greedy action tensor([ 0.6992, -0.2680, 0.6585, 1.3764]) tensor([0.2321, 0.0882, 0.2228, 0.4568]) -Greedy action tensor([ 1.5747, -1.0016, 0.6968, 1.0395]) tensor([0.4814, 0.0366, 0.2001, 0.2819]) -Greedy action tensor([ 0.1551, -2.0889, 0.0418, 0.4688]) tensor([0.2970, 0.0315, 0.2652, 0.4064]) -Greedy action tensor([ 1.2362, -0.6312, 1.0341, 0.1904]) tensor([0.4305, 0.0665, 0.3517, 0.1513]) -Greedy action tensor([ 0.5431, -0.6984, 0.3740, 0.6500]) tensor([0.3081, 0.0890, 0.2601, 0.3428]) -Greedy action tensor([ 0.9090, -1.6466, -0.6489, 0.9782]) tensor([0.4238, 0.0329, 0.0892, 0.4541]) -Greedy action tensor([ 0.0497, 0.7804, 0.8632, -1.1400]) tensor([0.1774, 0.3684, 0.4002, 0.0540]) -Greedy action tensor([-0.4974, 0.5837, 0.4392, 0.3980]) tensor([0.1118, 0.3295, 0.2851, 0.2736]) -Greedy action tensor([-1.6427, 0.1100, 0.4725, 0.0783]) tensor([0.0484, 0.2794, 0.4015, 0.2707]) -Greedy action tensor([-1.9062, -0.4632, 0.7247, -0.1315]) tensor([0.0400, 0.1692, 0.5550, 0.2358]) -Greedy action tensor([-1.9294, -0.4210, 0.6584, -0.1726]) tensor([0.0406, 0.1836, 0.5404, 0.2354]) -Greedy action tensor([-1.6664, -0.5239, 0.5353, -0.1054]) tensor([0.0557, 0.1747, 0.5040, 0.2655]) -Greedy action tensor([-0.5699, -0.6776, 0.0607, 0.5559]) tensor([0.1458, 0.1309, 0.2739, 0.4494]) -Greedy action tensor([-1.7554, -0.4133, 0.6369, 0.0142]) tensor([0.0462, 0.1769, 0.5056, 0.2713]) -Greedy action tensor([-1.9133, -0.4453, 0.6532, -0.1647]) tensor([0.0415, 0.1801, 0.5401, 0.2384]) -Greedy action tensor([-1.4657e+00, 1.1967e-03, 3.8241e-01, 2.0781e-01]) tensor([0.0588, 0.2548, 0.3731, 0.3133]) -Greedy action tensor([-1.0536, -0.1991, 0.3102, -0.1050]) tensor([0.1016, 0.2388, 0.3973, 0.2623]) -Greedy action tensor([-1.4413, -0.1671, 0.5762, 0.1231]) tensor([0.0593, 0.2119, 0.4456, 0.2833]) -Greedy action tensor([-1.2842, -0.5370, 0.4060, 0.0158]) tensor([0.0820, 0.1730, 0.4443, 0.3007]) -Greedy action tensor([-1.9170, -0.4229, 0.6555, -0.1623]) tensor([0.0411, 0.1831, 0.5382, 0.2376]) -Greedy action tensor([-1.7825, -0.4288, 0.6092, -0.0864]) tensor([0.0470, 0.1821, 0.5143, 0.2565]) -Greedy action tensor([-1.5090, -0.5447, 0.4734, 0.1352]) tensor([0.0623, 0.1633, 0.4520, 0.3224]) -Greedy action tensor([-1.9470, -0.4523, 0.6689, -0.1821]) tensor([0.0400, 0.1785, 0.5476, 0.2338]) -Greedy action tensor([-1.8130, -0.1556, 0.5571, -0.0881]) tensor([0.0443, 0.2326, 0.4743, 0.2488]) -Greedy action tensor([-1.9375, -0.4516, 0.6631, -0.1753]) tensor([0.0405, 0.1788, 0.5451, 0.2357]) -Greedy action tensor([-1.3879, 0.7246, 0.2346, 0.1719]) tensor([0.0524, 0.4331, 0.2653, 0.2492]) -Greedy action tensor([-1.8015, -0.3783, 0.6036, -0.1031]) tensor([0.0461, 0.1913, 0.5107, 0.2519]) -Greedy action tensor([-1.3916, 0.1424, 0.4570, 0.1721]) tensor([0.0597, 0.2766, 0.3788, 0.2849]) -Greedy action tensor([-1.1452, 0.7836, 0.1485, 0.3767]) tensor([0.0621, 0.4272, 0.2264, 0.2844]) -Greedy action tensor([-1.8914, -0.4436, 0.6408, -0.1548]) tensor([0.0425, 0.1809, 0.5351, 0.2415]) -Greedy action tensor([-1.9217, -0.4403, 0.6545, -0.1699]) tensor([0.0411, 0.1809, 0.5408, 0.2371]) -Greedy action tensor([-1.8883, -0.3864, 0.6343, -0.1429]) tensor([0.0422, 0.1896, 0.5262, 0.2419]) -Greedy action tensor([-1.5258, -0.4749, 0.5953, -0.3452]) tensor([0.0647, 0.1850, 0.5396, 0.2107]) -Greedy action tensor([-1.7891, -0.4209, 0.5874, -0.1433]) tensor([0.0479, 0.1881, 0.5156, 0.2483]) -Greedy action tensor([-1.8799, -0.3414, 0.6213, -0.1444]) tensor([0.0425, 0.1980, 0.5185, 0.2411]) -Greedy action tensor([-1.0701, 0.1375, 0.2405, 0.4012]) tensor([0.0806, 0.2696, 0.2988, 0.3509]) -Greedy action tensor([-1.8908, -0.4249, 0.6373, -0.1568]) tensor([0.0425, 0.1841, 0.5326, 0.2407]) -Greedy action tensor([-1.8766, -0.4093, 0.6332, -0.1445]) tensor([0.0429, 0.1862, 0.5282, 0.2427]) -Greedy action tensor([-1.3077, -0.5080, 0.3871, -0.0299]) tensor([0.0816, 0.1815, 0.4442, 0.2927]) -Greedy action tensor([-1.9379, -0.4403, 0.6639, -0.1748]) tensor([0.0403, 0.1804, 0.5441, 0.2352]) -Greedy action tensor([-1.9422, -0.4524, 0.6716, -0.1797]) tensor([0.0401, 0.1781, 0.5479, 0.2339]) -Greedy action tensor([-1.7702, -0.4768, 0.5948, -0.0931]) tensor([0.0484, 0.1766, 0.5157, 0.2592]) -Greedy action tensor([-1.8861, -0.3210, 0.6325, -0.1250]) tensor([0.0416, 0.1992, 0.5169, 0.2423]) -Greedy action tensor([-1.6083, -0.5350, 0.5117, -0.1608]) tensor([0.0606, 0.1772, 0.5047, 0.2576]) -Greedy action tensor([-1.0064, -0.7497, 0.5319, 0.3606]) tensor([0.0920, 0.1189, 0.4283, 0.3609]) -Greedy action tensor([-1.9328, -0.4266, 0.6577, -0.1715]) tensor([0.0405, 0.1828, 0.5407, 0.2360]) -Greedy action tensor([-1.7991, -0.5223, 0.6201, -0.0717]) tensor([0.0466, 0.1672, 0.5239, 0.2623]) -Greedy action tensor([-1.7696, -0.2388, 0.4653, -0.4296]) tensor([0.0532, 0.2460, 0.4975, 0.2033]) -Greedy action tensor([-1.8891, -0.4453, 0.6377, -0.1578]) tensor([0.0427, 0.1811, 0.5348, 0.2414]) -Greedy action tensor([-1.8595, -0.3697, 0.6024, -0.1230]) tensor([0.0438, 0.1942, 0.5134, 0.2486]) -Greedy action tensor([-1.7235, -0.5172, 0.5500, -0.0700]) tensor([0.0519, 0.1733, 0.5038, 0.2710]) -Greedy action tensor([1.2178, 1.3404, 0.1996, 0.8604]) tensor([0.3134, 0.3542, 0.1132, 0.2192]) -Greedy action tensor([-1.6741, -0.4244, 0.5459, -0.0084]) tensor([0.0527, 0.1838, 0.4849, 0.2786]) -Greedy action tensor([-1.9389, -0.4492, 0.6702, -0.1736]) tensor([0.0402, 0.1784, 0.5464, 0.2350]) -Greedy action tensor([-1.8310, -0.3957, 0.6305, -0.1060]) tensor([0.0444, 0.1864, 0.5202, 0.2491]) -Greedy action tensor([0.2020, 0.9449, 0.2057, 0.8702]) tensor([0.1651, 0.3471, 0.1657, 0.3221]) -Greedy action tensor([-1.7859, -0.0061, 0.5221, -0.1216]) tensor([0.0449, 0.2663, 0.4516, 0.2372]) -Greedy action tensor([-1.9434, -0.4480, 0.6670, -0.1799]) tensor([0.0402, 0.1792, 0.5464, 0.2343]) -Greedy action tensor([-1.7890, -0.3561, 0.6576, -0.0523]) tensor([0.0446, 0.1869, 0.5152, 0.2533]) -Greedy action tensor([-1.9105, -0.4226, 0.6501, -0.1581]) tensor([0.0414, 0.1834, 0.5362, 0.2390]) -Greedy action tensor([-1.5983e+00, 8.0115e-04, 4.2486e-01, 7.4232e-04]) tensor([0.0542, 0.2681, 0.4097, 0.2681]) -Greedy action tensor([-1.8982, -0.3716, 0.6432, -0.1474]) tensor([0.0416, 0.1913, 0.5278, 0.2394]) -Greedy action tensor([-1.9384, -0.4448, 0.6647, -0.1770]) tensor([0.0404, 0.1797, 0.5450, 0.2349]) -Greedy action tensor([-1.7953, -0.2325, 0.5477, -0.1450]) tensor([0.0467, 0.2231, 0.4867, 0.2435]) -Greedy action tensor([-1.9038, -0.3868, 0.6395, -0.1630]) tensor([0.0417, 0.1901, 0.5305, 0.2377]) -Greedy action tensor([-1.9168, -0.4554, 0.6541, -0.1624]) tensor([0.0414, 0.1784, 0.5411, 0.2391]) -Greedy action tensor([-1.8702, -0.2662, 0.6077, -0.1491]) tensor([0.0426, 0.2118, 0.5075, 0.2381]) -Greedy action tensor([-1.8720, -0.4165, 0.6263, -0.1522]) tensor([0.0434, 0.1861, 0.5280, 0.2424]) -Greedy action tensor([-1.9146, -0.4412, 0.6534, -0.1632]) tensor([0.0414, 0.1806, 0.5396, 0.2385]) -Greedy action tensor([-1.6456, -0.2061, 0.5808, 0.0515]) tensor([0.0501, 0.2115, 0.4646, 0.2737]) -Greedy action tensor([-1.8752, -0.4058, 0.6281, -0.1287]) tensor([0.0429, 0.1865, 0.5245, 0.2461]) -Greedy action tensor([-1.8832, -0.3255, 0.6322, -0.1492]) tensor([0.0420, 0.1996, 0.5202, 0.2381]) -Greedy action tensor([-1.7950, -0.4593, 0.6261, 0.0107]) tensor([0.0452, 0.1717, 0.5084, 0.2747]) -Greedy action tensor([-1.2360, 0.7021, 0.1569, 0.1889]) tensor([0.0620, 0.4306, 0.2496, 0.2578]) -Greedy action tensor([-1.6484, -0.5153, 0.5349, 0.0683]) tensor([0.0539, 0.1674, 0.4785, 0.3001]) -Greedy action tensor([-1.5076, 0.5136, 0.3094, 0.1530]) tensor([0.0501, 0.3781, 0.3082, 0.2636]) -Greedy action tensor([-1.8538, -0.4515, 0.6229, -0.1383]) tensor([0.0444, 0.1804, 0.5284, 0.2468]) -Greedy action tensor([-0.8881, 0.1074, 0.5094, 1.0645]) tensor([0.0676, 0.1829, 0.2733, 0.4762]) -Greedy action tensor([-0.6887, 0.9785, 0.1169, 0.1417]) tensor([0.0923, 0.4892, 0.2067, 0.2118]) -Greedy action tensor([-1.8015, -0.5013, 0.5961, -0.1039]) tensor([0.0473, 0.1737, 0.5205, 0.2585]) -Greedy action tensor([-1.8131, -0.4536, 0.5988, -0.1203]) tensor([0.0465, 0.1813, 0.5192, 0.2530]) -Greedy action tensor([-1.9209, -0.4463, 0.6573, -0.1694]) tensor([0.0411, 0.1797, 0.5420, 0.2371]) -Greedy action tensor([-1.9359, -0.4545, 0.6653, -0.1731]) tensor([0.0405, 0.1780, 0.5456, 0.2359]) -Greedy action tensor([-0.6731, -0.5980, 0.1219, 0.4532]) tensor([0.1356, 0.1461, 0.3002, 0.4181]) -Greedy action tensor([-0.3329, 1.0200, -0.0062, 0.2679]) tensor([0.1238, 0.4789, 0.1716, 0.2257]) -Greedy action tensor([-1.7725, -0.4229, 0.6520, -0.1075]) tensor([0.0466, 0.1799, 0.5269, 0.2466]) -Greedy action tensor([-1.1532, 0.8957, 0.1390, 0.3623]) tensor([0.0590, 0.4577, 0.2148, 0.2685]) -Greedy action tensor([-1.7569, -0.5017, 0.6076, -0.0891]) tensor([0.0489, 0.1716, 0.5203, 0.2592]) -Greedy action tensor([ 0.0096, 0.4424, -0.2389, -0.4676]) tensor([0.2537, 0.3910, 0.1979, 0.1574]) -Greedy action tensor([ 0.6451, -0.0512, 0.0644, -0.1836]) tensor([0.4009, 0.1998, 0.2243, 0.1750]) -Greedy action tensor([ 0.9022, -0.4868, -0.0232, -0.4995]) tensor([0.5286, 0.1318, 0.2095, 0.1301]) -Greedy action tensor([ 0.5205, -0.3200, -0.1341, -0.3451]) tensor([0.4216, 0.1819, 0.2191, 0.1774]) -Greedy action tensor([ 0.3939, -0.2539, -0.0651, -0.4067]) tensor([0.3840, 0.2009, 0.2427, 0.1724]) -Greedy action tensor([ 0.7541, -0.2321, -0.0497, -0.1265]) tensor([0.4474, 0.1669, 0.2003, 0.1855]) -Greedy action tensor([ 0.8431, -0.0750, -0.2931, -0.4910]) tensor([0.5041, 0.2013, 0.1618, 0.1328]) -Greedy action tensor([ 0.5835, -0.3405, 0.0335, -0.1899]) tensor([0.4106, 0.1630, 0.2369, 0.1895]) -Greedy action tensor([ 0.5153, -0.1781, -0.0134, -0.1006]) tensor([0.3803, 0.1901, 0.2241, 0.2054]) -Greedy action tensor([ 0.6856, -0.3433, -0.0325, -0.1619]) tensor([0.4398, 0.1572, 0.2145, 0.1885]) -Greedy action tensor([ 0.7707, -0.2171, -0.1408, -0.0756]) tensor([0.4539, 0.1690, 0.1824, 0.1947]) -Greedy action tensor([ 0.7739, -0.4856, -0.0111, -0.3526]) tensor([0.4845, 0.1375, 0.2210, 0.1570]) -Greedy action tensor([ 1.0390, -0.5179, 0.0057, -0.3733]) tensor([0.5524, 0.1165, 0.1966, 0.1346]) -Greedy action tensor([0.2786, 0.1797, 0.0213, 0.1383]) tensor([0.2818, 0.2553, 0.2179, 0.2449]) -Greedy action tensor([ 0.8735, -0.9631, 0.0501, -0.3588]) tensor([0.5291, 0.0843, 0.2322, 0.1543]) -Greedy action tensor([ 0.5325, -0.1082, 0.0171, -0.1360]) tensor([0.3793, 0.1998, 0.2265, 0.1944]) -Greedy action tensor([ 0.7309, -0.4671, -0.1556, -0.0830]) tensor([0.4636, 0.1399, 0.1911, 0.2054]) -Greedy action tensor([ 1.1117, -0.5915, -0.1558, -0.4293]) tensor([0.5960, 0.1085, 0.1678, 0.1276]) -Greedy action tensor([ 1.0427, -0.4635, -0.1723, -0.2808]) tensor([0.5603, 0.1243, 0.1663, 0.1492]) -Greedy action tensor([ 0.6168, -0.2885, 0.0466, -0.0991]) tensor([0.4067, 0.1645, 0.2300, 0.1988]) -Greedy action tensor([ 0.5676, -0.2595, -0.0142, -0.3509]) tensor([0.4175, 0.1826, 0.2333, 0.1666]) -Greedy action tensor([ 0.6289, -0.3168, -0.0513, -0.2896]) tensor([0.4359, 0.1693, 0.2208, 0.1740]) -Greedy action tensor([ 0.7827, -0.2654, -0.0915, -0.1490]) tensor([0.4626, 0.1622, 0.1930, 0.1822]) -Greedy action tensor([ 0.8645, -0.2333, 0.0228, -0.4367]) tensor([0.4910, 0.1638, 0.2116, 0.1336]) -Greedy action tensor([ 1.1268, -0.1776, 0.2553, -0.4689]) tensor([0.5284, 0.1434, 0.2210, 0.1071]) -Greedy action tensor([ 0.7257, -0.6202, 0.0939, -0.6087]) tensor([0.4866, 0.1267, 0.2587, 0.1281]) -Greedy action tensor([ 0.7225, -0.3588, -0.0641, -0.0694]) tensor([0.4449, 0.1509, 0.2026, 0.2015]) -Greedy action tensor([ 0.5294, -0.4697, -0.0391, -0.2745]) tensor([0.4198, 0.1546, 0.2378, 0.1879]) -Greedy action tensor([ 0.4657, -0.0019, -0.1739, -0.1579]) tensor([0.3718, 0.2329, 0.1961, 0.1993]) -Greedy action tensor([ 0.9199, -0.8657, -0.0664, -0.4498]) tensor([0.5572, 0.0934, 0.2078, 0.1416]) -Greedy action tensor([ 0.6947, 0.0292, 0.0966, -0.1588]) tensor([0.4016, 0.2065, 0.2209, 0.1711]) -Greedy action tensor([ 0.8661, -0.9056, -0.1068, -0.3777]) tensor([0.5446, 0.0926, 0.2058, 0.1570]) -Greedy action tensor([ 0.8891, -0.6165, -0.1256, -0.2927]) tensor([0.5288, 0.1173, 0.1917, 0.1622]) -Greedy action tensor([ 0.6226, -0.2988, -0.0517, -0.1315]) tensor([0.4205, 0.1674, 0.2143, 0.1978]) -Greedy action tensor([ 0.5203, -0.1076, -0.0645, -0.0438]) tensor([0.3760, 0.2007, 0.2095, 0.2139]) -Greedy action tensor([ 0.7136, -0.2127, 0.0139, -0.1336]) tensor([0.4308, 0.1706, 0.2140, 0.1846]) -Greedy action tensor([ 0.6562, -0.5137, -0.0190, -0.2260]) tensor([0.4478, 0.1390, 0.2279, 0.1853]) -Greedy action tensor([ 0.8876, -0.6119, -0.0011, -0.3473]) tensor([0.5194, 0.1160, 0.2136, 0.1511]) -Greedy action tensor([ 0.6617, -0.4334, -0.1033, -0.4924]) tensor([0.4728, 0.1581, 0.2200, 0.1491]) -Greedy action tensor([ 0.4396, -0.0740, -0.0464, 0.0723]) tensor([0.3441, 0.2059, 0.2117, 0.2383]) -Greedy action tensor([ 0.5833, 0.2185, -0.2231, -0.0835]) tensor([0.3768, 0.2616, 0.1682, 0.1934]) -Greedy action tensor([ 0.3122, 0.1421, 0.0492, -0.3096]) tensor([0.3175, 0.2679, 0.2441, 0.1705]) -Greedy action tensor([ 0.5650, -0.1853, -0.1414, -0.6642]) tensor([0.4428, 0.2091, 0.2185, 0.1295]) -Greedy action tensor([ 0.6770, -0.2820, -0.0618, -0.5406]) tensor([0.4636, 0.1777, 0.2215, 0.1372]) -Greedy action tensor([ 0.4519, -0.2778, -0.0621, -0.5224]) tensor([0.4069, 0.1962, 0.2434, 0.1536]) -Greedy action tensor([ 0.5609, -0.3796, -0.0065, -0.2398]) tensor([0.4156, 0.1622, 0.2356, 0.1866]) -Greedy action tensor([ 1.0127, -0.4126, 0.0046, -0.3300]) tensor([0.5358, 0.1288, 0.1955, 0.1399]) -Greedy action tensor([ 0.8914, -1.1671, -0.0208, -1.1226]) tensor([0.6014, 0.0768, 0.2415, 0.0803]) -Greedy action tensor([ 0.5399, -0.5370, -0.0624, -0.1881]) tensor([0.4217, 0.1437, 0.2309, 0.2037]) -Greedy action tensor([ 0.7088, -0.4610, -0.1341, -0.1022]) tensor([0.4576, 0.1421, 0.1970, 0.2034]) -Greedy action tensor([ 0.5184, 0.2668, -0.1071, -0.3581]) tensor([0.3665, 0.2849, 0.1960, 0.1525]) -Greedy action tensor([ 0.9029, -0.5891, 0.0482, -0.4190]) tensor([0.5217, 0.1173, 0.2219, 0.1391]) -Greedy action tensor([ 0.8428, -0.0116, 0.2292, -0.1983]) tensor([0.4310, 0.1834, 0.2334, 0.1522]) -Greedy action tensor([ 0.5911, -0.4378, -0.1379, -0.2143]) tensor([0.4373, 0.1563, 0.2110, 0.1954]) -Greedy action tensor([ 1.5951, -0.9550, -0.1294, -0.4081]) tensor([0.7188, 0.0561, 0.1281, 0.0970]) -Greedy action tensor([ 0.5278, -0.0606, -0.0380, 0.0613]) tensor([0.3636, 0.2019, 0.2065, 0.2280]) -Greedy action tensor([ 0.6698, -0.2748, -0.1395, -0.2016]) tensor([0.4440, 0.1726, 0.1976, 0.1858]) -Greedy action tensor([ 0.7255, -0.6194, -0.1631, -0.3470]) tensor([0.4966, 0.1294, 0.2042, 0.1699]) -Greedy action tensor([ 0.3681, 0.4615, -0.0849, -0.2532]) tensor([0.3057, 0.3356, 0.1944, 0.1642]) -Greedy action tensor([ 0.5086, -0.2026, 0.0625, -0.0628]) tensor([0.3709, 0.1822, 0.2374, 0.2095]) -Greedy action tensor([ 0.4735, -0.1888, 0.0032, -0.0953]) tensor([0.3695, 0.1905, 0.2308, 0.2092]) -Greedy action tensor([ 0.4535, -0.0704, -0.2115, -0.1541]) tensor([0.3772, 0.2234, 0.1940, 0.2054]) -Greedy action tensor([ 0.6083, -0.2435, -0.1289, -0.1471]) tensor([0.4211, 0.1796, 0.2015, 0.1978]) -Greedy action tensor([ 0.7581, -0.6067, -0.1800, -0.1317]) tensor([0.4860, 0.1241, 0.1902, 0.1996]) -Greedy action tensor([ 0.7856, -0.5006, -0.0105, -0.3268]) tensor([0.4863, 0.1344, 0.2194, 0.1599]) -Greedy action tensor([ 0.3646, -0.1483, -0.0626, -0.3336]) tensor([0.3638, 0.2178, 0.2373, 0.1810]) -Greedy action tensor([ 0.7687, 0.2960, -0.1404, -0.6513]) tensor([0.4409, 0.2748, 0.1776, 0.1066]) -Greedy action tensor([ 0.6476, -0.5540, 0.0634, -0.3183]) tensor([0.4466, 0.1343, 0.2490, 0.1700]) -Greedy action tensor([ 0.8083, -0.3785, 0.0109, -0.3651]) tensor([0.4843, 0.1478, 0.2182, 0.1498]) -Greedy action tensor([ 0.9555, -0.7606, -0.0468, -0.3038]) tensor([0.5463, 0.0982, 0.2005, 0.1550]) -Greedy action tensor([ 0.5424, -0.4214, 0.0285, -0.3237]) tensor([0.4166, 0.1589, 0.2492, 0.1752]) -Greedy action tensor([ 0.7471, -0.4554, -0.0428, -0.4949]) tensor([0.4894, 0.1470, 0.2222, 0.1414]) -Greedy action tensor([ 0.3228, -0.1335, 0.0216, -0.1616]) tensor([0.3345, 0.2119, 0.2475, 0.2061]) -Greedy action tensor([ 0.5915, -0.1536, 0.0147, -0.0709]) tensor([0.3919, 0.1860, 0.2201, 0.2020]) -Greedy action tensor([ 0.4619, -0.1817, -0.0453, -0.2761]) tensor([0.3838, 0.2016, 0.2311, 0.1835]) -Greedy action tensor([ 0.7511, -0.5296, 0.0554, -0.2533]) tensor([0.4667, 0.1297, 0.2327, 0.1709]) -Greedy action tensor([ 0.8362, -0.9808, -0.0473, -0.2888]) tensor([0.5262, 0.0855, 0.2175, 0.1708]) -Greedy action tensor([ 0.6132, -0.3195, -0.0259, -0.2394]) tensor([0.4260, 0.1676, 0.2248, 0.1816]) -Greedy action tensor([ 0.7640, -0.3474, -0.0454, -0.1325]) tensor([0.4582, 0.1508, 0.2040, 0.1870]) -Greedy action tensor([ 1.1934, -1.0649, 0.1513, -0.6533]) tensor([0.6192, 0.0647, 0.2184, 0.0977]) -Greedy action tensor([ 0.5253, -0.3795, 0.0909, -0.5222]) tensor([0.4161, 0.1684, 0.2695, 0.1460]) -Greedy action tensor([ 1.5214, 0.1960, -0.2613, -0.2153]) tensor([0.6211, 0.1650, 0.1045, 0.1094]) -Greedy action tensor([ 1.6204, -0.3273, -0.3698, 0.0940]) tensor([0.6682, 0.0953, 0.0913, 0.1452]) -Greedy action tensor([ 1.4653, -0.6184, -0.0790, 0.2232]) tensor([0.6147, 0.0765, 0.1312, 0.1775]) -Greedy action tensor([ 1.1010, -0.0268, 0.2846, -0.0241]) tensor([0.4784, 0.1549, 0.2115, 0.1553]) -Greedy action tensor([ 1.2454, -0.5279, -0.0694, 0.4268]) tensor([0.5321, 0.0903, 0.1429, 0.2347]) -Greedy action tensor([ 1.3371, -0.5712, -0.1029, 0.2712]) tensor([0.5781, 0.0858, 0.1370, 0.1991]) -Greedy action tensor([ 1.9329, -0.6307, -0.6354, 0.5315]) tensor([0.7143, 0.0550, 0.0548, 0.1759]) -Greedy action tensor([ 2.4979, -1.4225, 0.2652, 1.0002]) tensor([0.7403, 0.0147, 0.0794, 0.1656]) -Greedy action tensor([ 1.7024, -0.4661, -0.4731, 0.5629]) tensor([0.6460, 0.0739, 0.0734, 0.2067]) -Greedy action tensor([ 0.3499, -0.5351, 0.0701, -0.0837]) tensor([0.3550, 0.1465, 0.2684, 0.2301]) -Greedy action tensor([ 1.7287, -0.5824, -0.5327, 0.6823]) tensor([0.6433, 0.0638, 0.0670, 0.2259]) -Greedy action tensor([ 1.5159, 0.3226, -0.9365, 0.1907]) tensor([0.6042, 0.1832, 0.0520, 0.1606]) -Greedy action tensor([ 1.6490, -0.8178, -0.2279, 0.1234]) tensor([0.6871, 0.0583, 0.1052, 0.1494]) -Greedy action tensor([ 0.9185, -0.4778, -0.2207, 0.2962]) tensor([0.4752, 0.1176, 0.1521, 0.2551]) -Greedy action tensor([ 2.0457, -0.5905, -0.5485, 0.5148]) tensor([0.7339, 0.0526, 0.0548, 0.1588]) -Greedy action tensor([ 1.4933, 0.2711, 0.0708, -0.6951]) tensor([0.6069, 0.1788, 0.1463, 0.0680]) -Greedy action tensor([ 0.7528, -0.1165, -0.0906, -0.0884]) tensor([0.4385, 0.1838, 0.1887, 0.1891]) -Greedy action tensor([ 1.7872, -0.6672, -0.6945, 0.7665]) tensor([0.6537, 0.0562, 0.0546, 0.2355]) -Greedy action tensor([ 1.4698, -0.5977, -0.9302, 0.3882]) tensor([0.6426, 0.0813, 0.0583, 0.2179]) -Greedy action tensor([ 1.7265, -0.7203, -0.9077, 0.1341]) tensor([0.7343, 0.0636, 0.0527, 0.1494]) -Greedy action tensor([ 1.6880, -0.4907, -0.4626, 0.2980]) tensor([0.6763, 0.0765, 0.0787, 0.1684]) -Greedy action tensor([ 1.3031, -0.2044, -0.9884, 0.6347]) tensor([0.5449, 0.1207, 0.0551, 0.2793]) -Greedy action tensor([ 1.4921, -0.3543, -0.1309, 0.3360]) tensor([0.5989, 0.0945, 0.1182, 0.1885]) -Greedy action tensor([ 1.7525, -0.0479, -0.8403, 0.7994]) tensor([0.6152, 0.1016, 0.0460, 0.2372]) -Greedy action tensor([ 1.4883, -0.5267, -0.3586, 0.2760]) tensor([0.6295, 0.0839, 0.0993, 0.1873]) -Greedy action tensor([ 1.5729, -0.3207, -0.3448, 0.2990]) tensor([0.6340, 0.0954, 0.0932, 0.1774]) -Greedy action tensor([ 1.5670, -0.7136, -0.4595, 0.3103]) tensor([0.6585, 0.0673, 0.0868, 0.1874]) -Greedy action tensor([ 1.2449, -0.4617, -0.0182, -0.0224]) tensor([0.5728, 0.1039, 0.1620, 0.1613]) -Greedy action tensor([ 2.0646, -1.3063, -0.0035, 0.2978]) tensor([0.7509, 0.0258, 0.0949, 0.1283]) -Greedy action tensor([ 1.2274, -0.7625, -0.0535, -0.0450]) tensor([0.5901, 0.0807, 0.1639, 0.1653]) -Greedy action tensor([ 1.9118, -0.7619, -0.6322, 0.4791]) tensor([0.7214, 0.0498, 0.0567, 0.1722]) -Greedy action tensor([ 1.5558, -0.6340, -0.3345, 0.0968]) tensor([0.6687, 0.0749, 0.1010, 0.1554]) -Greedy action tensor([ 1.5889, -0.4245, -0.7140, 0.8802]) tensor([0.5794, 0.0774, 0.0579, 0.2853]) -Greedy action tensor([ 0.9890, 0.0409, -0.3884, 0.2578]) tensor([0.4715, 0.1827, 0.1189, 0.2269]) -Greedy action tensor([ 1.3573, -0.4823, -0.3061, 0.4406]) tensor([0.5720, 0.0909, 0.1084, 0.2287]) -Greedy action tensor([ 1.5122, -0.5303, -0.1181, 0.1095]) tensor([0.6363, 0.0825, 0.1246, 0.1565]) -Greedy action tensor([ 1.8569, -0.7361, -0.4003, 0.3848]) tensor([0.7098, 0.0531, 0.0743, 0.1629]) -Greedy action tensor([ 1.0126, -0.2921, -0.9821, 0.3579]) tensor([0.5190, 0.1408, 0.0706, 0.2697]) -Greedy action tensor([ 1.9437, -1.1985, 0.1850, 0.5589]) tensor([0.6822, 0.0295, 0.1175, 0.1708]) -Greedy action tensor([ 1.1987, -0.4918, -0.2101, -0.3198]) tensor([0.6068, 0.1119, 0.1483, 0.1329]) -Greedy action tensor([ 1.0850, -0.0380, -0.1713, 0.3847]) tensor([0.4747, 0.1544, 0.1352, 0.2357]) -Greedy action tensor([ 1.8768, -0.3854, -0.7596, 0.4320]) tensor([0.7084, 0.0738, 0.0507, 0.1671]) -Greedy action tensor([ 1.3308, -1.0094, -0.0670, 0.3861]) tensor([0.5773, 0.0556, 0.1427, 0.2244]) -Greedy action tensor([ 1.8165, -0.6288, -0.6332, 0.3895]) tensor([0.7077, 0.0614, 0.0611, 0.1699]) -Greedy action tensor([ 1.3415, -0.3717, -0.0996, 0.1063]) tensor([0.5856, 0.1056, 0.1386, 0.1703]) -Greedy action tensor([ 1.7257, -0.8168, -0.1660, 0.2431]) tensor([0.6866, 0.0540, 0.1035, 0.1559]) -Greedy action tensor([ 1.9234, -0.3734, -1.0933, 0.7128]) tensor([0.6908, 0.0695, 0.0338, 0.2059]) -Greedy action tensor([ 2.5179, -1.1423, -0.1868, 0.5138]) tensor([0.8147, 0.0210, 0.0545, 0.1098]) -Greedy action tensor([ 1.3599, -0.8029, -0.3965, 0.1120]) tensor([0.6350, 0.0730, 0.1096, 0.1823]) -Greedy action tensor([ 1.1059, -0.4720, -0.1341, 0.5680]) tensor([0.4808, 0.0992, 0.1391, 0.2808]) -Greedy action tensor([ 1.4319, -0.2090, -0.6377, 0.2154]) tensor([0.6187, 0.1199, 0.0781, 0.1833]) -Greedy action tensor([ 1.1604, -0.5683, 0.0665, 0.3199]) tensor([0.5144, 0.0913, 0.1723, 0.2220]) -Greedy action tensor([ 2.0710, -1.2241, 0.2184, 0.8736]) tensor([0.6685, 0.0248, 0.1048, 0.2019]) -Greedy action tensor([ 1.8378, -0.7736, -0.4350, 0.2698]) tensor([0.7221, 0.0530, 0.0744, 0.1505]) -Greedy action tensor([ 0.8910, 0.0388, 0.0776, -0.1672]) tensor([0.4511, 0.1924, 0.2000, 0.1566]) -Greedy action tensor([ 1.4241, -0.2933, -0.4993, 0.2179]) tensor([0.6154, 0.1105, 0.0899, 0.1842]) -Greedy action tensor([ 1.3206, -0.4471, -0.1428, -0.2615]) tensor([0.6220, 0.1062, 0.1440, 0.1278]) -Greedy action tensor([ 1.5897, -1.2020, -0.1999, -0.2846]) tensor([0.7237, 0.0444, 0.1209, 0.1111]) -Greedy action tensor([ 1.9052, -0.8218, 0.0022, 0.7606]) tensor([0.6524, 0.0427, 0.0973, 0.2077]) -Greedy action tensor([ 1.7737, -0.6704, -0.3929, 0.6879]) tensor([0.6498, 0.0564, 0.0744, 0.2194]) -Greedy action tensor([ 1.5739, -0.7569, -0.2659, 0.3398]) tensor([0.6463, 0.0628, 0.1027, 0.1882]) -Greedy action tensor([ 1.7048, -0.7848, -1.0411, 0.0890]) tensor([0.7430, 0.0616, 0.0477, 0.1477]) -Greedy action tensor([1.4357, 0.1733, 0.0073, 0.3211]) tensor([0.5403, 0.1529, 0.1295, 0.1773]) -Greedy action tensor([ 1.2955, -0.1802, -0.0671, -0.0649]) tensor([0.5743, 0.1313, 0.1470, 0.1474]) -Greedy action tensor([ 2.2149, -0.9242, -0.4642, 0.9024]) tensor([0.7241, 0.0314, 0.0497, 0.1949]) -Greedy action tensor([ 1.4417, -0.5480, -0.5096, 0.6282]) tensor([0.5807, 0.0794, 0.0825, 0.2574]) -Greedy action tensor([ 1.6209, -0.6851, -0.1420, 0.3308]) tensor([0.6467, 0.0644, 0.1109, 0.1780]) -Greedy action tensor([ 1.1212, -0.0828, -0.5026, -0.2651]) tensor([0.5724, 0.1717, 0.1128, 0.1431]) -Greedy action tensor([ 1.9134, -0.8068, -0.4249, 0.1872]) tensor([0.7461, 0.0491, 0.0720, 0.1328]) -Greedy action tensor([ 1.2732, -0.5744, -0.4246, 0.7321]) tensor([0.5201, 0.0820, 0.0952, 0.3027]) -Greedy action tensor([ 1.1984, 0.0059, -0.8427, 0.3812]) tensor([0.5333, 0.1619, 0.0693, 0.2356]) -Greedy action tensor([ 1.4516, -0.5493, -0.4403, 0.5654]) tensor([0.5888, 0.0796, 0.0888, 0.2427]) -Greedy action tensor([ 2.3214, -1.2842, -0.1200, 0.7130]) tensor([0.7608, 0.0207, 0.0662, 0.1523]) -Greedy action tensor([ 0.8966, -0.2625, 0.0228, 0.3921]) tensor([0.4283, 0.1344, 0.1788, 0.2586]) -Greedy action tensor([ 1.5883, -0.6279, -0.4426, 0.2266]) tensor([0.6682, 0.0729, 0.0877, 0.1712]) -Greedy action tensor([ 1.6605, -0.4495, -0.2835, 0.3971]) tensor([0.6464, 0.0784, 0.0925, 0.1827]) -Greedy action tensor([ 1.4820, -0.0690, -1.0324, -0.1035]) tensor([0.6677, 0.1416, 0.0540, 0.1368]) -Greedy action tensor([ 1.2880, -0.8974, -0.0669, 0.3583]) tensor([0.5665, 0.0637, 0.1462, 0.2236]) -Greedy action tensor([ 1.0031, -0.4268, -0.2276, 0.0720]) tensor([0.5193, 0.1243, 0.1517, 0.2047]) -Greedy action tensor([ 1.3588, -0.5137, -0.4454, 0.1735]) tensor([0.6158, 0.0947, 0.1014, 0.1882]) -Greedy action tensor([ 0.8283, -0.6376, -0.4868, 0.5252]) tensor([0.4469, 0.1032, 0.1200, 0.3300]) -Greedy action tensor([-0.9311, -0.3528, 0.1026, -1.0945]) tensor([0.1552, 0.2767, 0.4363, 0.1318]) -Greedy action tensor([ 2.4866, -0.0046, -0.4301, 0.0120]) tensor([0.8189, 0.0678, 0.0443, 0.0690]) -Greedy action tensor([ 1.0138, 0.8338, -0.5916, 0.5538]) tensor([0.3749, 0.3131, 0.0753, 0.2367]) -Greedy action tensor([ 0.5469, 0.1945, 0.1705, -1.0750]) tensor([0.3866, 0.2718, 0.2653, 0.0764]) -Greedy action tensor([-0.4828, -0.1369, 0.7557, -0.4789]) tensor([0.1456, 0.2058, 0.5024, 0.1462]) -Greedy action tensor([-0.7449, -1.5101, 0.1146, 0.5582]) tensor([0.1332, 0.0620, 0.3146, 0.4902]) -Greedy action tensor([-0.1051, -0.1652, 0.6900, -0.9737]) tensor([0.2185, 0.2058, 0.4840, 0.0917]) -Greedy action tensor([-0.5782, -1.4425, -0.0301, -0.1146]) tensor([0.2109, 0.0889, 0.3649, 0.3353]) -Greedy action tensor([-0.8533, -1.4472, 0.0870, -0.6903]) tensor([0.1890, 0.1044, 0.4841, 0.2225]) -Greedy action tensor([-0.3315, -0.2086, 0.0542, -0.9790]) tensor([0.2424, 0.2741, 0.3565, 0.1269]) -Greedy action tensor([-0.1364, -1.1022, -0.5838, 0.3237]) tensor([0.2775, 0.1056, 0.1774, 0.4396]) -Greedy action tensor([ 0.5421, 0.3235, 0.4325, -1.0443]) tensor([0.3443, 0.2767, 0.3086, 0.0705]) -Greedy action tensor([-0.7399, -0.5999, -0.2579, 0.4887]) tensor([0.1392, 0.1601, 0.2253, 0.4754]) -Greedy action tensor([ 1.5158, -0.4420, 0.8745, 0.8181]) tensor([0.4618, 0.0652, 0.2432, 0.2298]) -Greedy action tensor([-0.2714, 0.2016, -0.4558, -0.1978]) tensor([0.2216, 0.3556, 0.1843, 0.2385]) -Greedy action tensor([ 0.8321, 0.1685, 1.0032, -0.8084]) tensor([0.3454, 0.1779, 0.4098, 0.0670]) -Greedy action tensor([ 1.3072, -0.8293, 1.6227, 0.2234]) tensor([0.3537, 0.0418, 0.4849, 0.1197]) -Greedy action tensor([ 0.4763, -0.7902, 0.1144, -0.2689]) tensor([0.4077, 0.1149, 0.2839, 0.1935]) -Greedy action tensor([-0.3733, -0.6856, 0.4336, -0.3392]) tensor([0.1997, 0.1461, 0.4475, 0.2066]) -Greedy action tensor([ 0.6307, -1.5902, 0.4172, 0.8444]) tensor([0.3170, 0.0344, 0.2561, 0.3925]) -Greedy action tensor([ 0.5607, 0.5973, -0.3278, 0.2659]) tensor([0.3132, 0.3248, 0.1288, 0.2332]) -Greedy action tensor([-0.0093, -0.4096, 0.8758, 1.1112]) tensor([0.1397, 0.0936, 0.3385, 0.4283]) -Greedy action tensor([ 0.2852, -0.5141, 0.4302, -0.3744]) tensor([0.3202, 0.1440, 0.3702, 0.1656]) -Greedy action tensor([ 0.0771, 0.0118, 0.8726, -0.4502]) tensor([0.2109, 0.1975, 0.4672, 0.1244]) -Greedy action tensor([-0.0923, 0.2338, -0.1503, -0.5328]) tensor([0.2517, 0.3488, 0.2375, 0.1620]) -Greedy action tensor([ 0.1034, -1.9078, -0.6722, -0.3413]) tensor([0.4474, 0.0599, 0.2060, 0.2868]) -Greedy action tensor([-0.8619, -1.0517, 1.4747, -0.2048]) tensor([0.0709, 0.0587, 0.7336, 0.1368]) -Greedy action tensor([-0.8323, -0.2257, 0.6432, -0.4323]) tensor([0.1149, 0.2108, 0.5027, 0.1715]) -Greedy action tensor([-0.6935, -0.6781, 2.0125, -0.4275]) tensor([0.0547, 0.0555, 0.8185, 0.0713]) -Greedy action tensor([0.8392, 0.2103, 0.7544, 0.6587]) tensor([0.3043, 0.1622, 0.2795, 0.2540]) -Greedy action tensor([ 1.1815, -0.9538, 1.3645, 0.0035]) tensor([0.3807, 0.0450, 0.4571, 0.1172]) -Greedy action tensor([ 0.6885, -0.6956, 0.0092, 0.6309]) tensor([0.3702, 0.0927, 0.1877, 0.3494]) -Greedy action tensor([-1.3897, -0.7059, 0.2080, -0.1180]) tensor([0.0870, 0.1724, 0.4301, 0.3104]) -Greedy action tensor([ 0.5190, 0.3284, -0.1859, 1.1660]) tensor([0.2364, 0.1954, 0.1168, 0.4514]) -Greedy action tensor([ 0.8617, -0.9957, -1.1621, 0.4949]) tensor([0.5048, 0.0788, 0.0667, 0.3498]) -Greedy action tensor([-0.4613, 0.2755, -0.4246, -0.6327]) tensor([0.2012, 0.4205, 0.2088, 0.1695]) -Greedy action tensor([ 0.4690, -0.8023, 1.0299, -1.1025]) tensor([0.3086, 0.0866, 0.5407, 0.0641]) -Greedy action tensor([ 0.7511, -0.0623, 0.4366, 0.1750]) tensor([0.3655, 0.1621, 0.2669, 0.2055]) -Greedy action tensor([-0.5901, -0.4133, -0.7428, 0.0610]) tensor([0.2012, 0.2402, 0.1727, 0.3859]) -Greedy action tensor([ 0.2551, -0.9896, 0.1836, -0.4054]) tensor([0.3656, 0.1053, 0.3403, 0.1888]) -Greedy action tensor([ 0.0140, -0.4910, -0.5238, 0.8135]) tensor([0.2267, 0.1368, 0.1324, 0.5042]) -Greedy action tensor([ 0.1424, -0.1579, -0.3024, -0.9146]) tensor([0.3664, 0.2714, 0.2349, 0.1273]) -Greedy action tensor([ 0.4827, -0.1525, 0.0554, 0.3034]) tensor([0.3314, 0.1756, 0.2161, 0.2770]) -Greedy action tensor([-1.1548, -0.9555, -0.0981, -0.3215]) tensor([0.1352, 0.1650, 0.3889, 0.3110]) -Greedy action tensor([-0.7048, -0.7942, -1.4853, -0.1888]) tensor([0.2470, 0.2259, 0.1132, 0.4139]) -Greedy action tensor([-0.3074, -0.4874, 1.0475, -0.1036]) tensor([0.1441, 0.1204, 0.5587, 0.1767]) -Greedy action tensor([-0.1582, 0.4126, -0.0376, -0.9033]) tensor([0.2287, 0.4047, 0.2580, 0.1086]) -Greedy action tensor([ 0.5767, 0.3700, -0.0268, -1.1004]) tensor([0.3926, 0.3193, 0.2147, 0.0734]) -Greedy action tensor([ 0.7978, 0.0553, -0.2467, 1.3182]) tensor([0.2849, 0.1356, 0.1002, 0.4793]) -Greedy action tensor([ 1.3406, -0.5004, 0.4548, 0.9017]) tensor([0.4513, 0.0716, 0.1861, 0.2910]) -Greedy action tensor([ 1.4605, -0.7533, 0.6663, 0.7873]) tensor([0.4828, 0.0528, 0.2182, 0.2463]) -Greedy action tensor([ 0.7578, 0.4755, 1.2578, -0.1250]) tensor([0.2620, 0.1976, 0.4320, 0.1084]) -Greedy action tensor([ 0.6270, -1.4738, 0.0844, 0.7732]) tensor([0.3495, 0.0428, 0.2032, 0.4046]) -Greedy action tensor([ 0.1855, -0.2389, -0.2082, -0.1641]) tensor([0.3296, 0.2156, 0.2224, 0.2324]) -Greedy action tensor([ 0.2836, -1.5057, 0.1790, 0.4169]) tensor([0.3115, 0.0520, 0.2806, 0.3559]) -Greedy action tensor([ 1.1171, -0.2419, 0.3700, 0.1084]) tensor([0.4772, 0.1226, 0.2261, 0.1741]) -Greedy action tensor([-0.5774, 0.4434, 0.1527, -0.3501]) tensor([0.1407, 0.3906, 0.2921, 0.1766]) -Greedy action tensor([ 1.0328, -0.4102, 0.2177, 1.4707]) tensor([0.3098, 0.0732, 0.1371, 0.4800]) -Greedy action tensor([1.3661, 0.1633, 0.6734, 0.2262]) tensor([0.4716, 0.1416, 0.2359, 0.1508]) -Greedy action tensor([ 0.1299, 0.1059, 1.5110, -0.5606]) tensor([0.1549, 0.1512, 0.6163, 0.0776]) -Greedy action tensor([-0.4116, -1.1538, -1.0252, 1.2874]) tensor([0.1336, 0.0636, 0.0723, 0.7305]) -Greedy action tensor([-1.0617, -0.9108, 1.1608, -1.3061]) tensor([0.0821, 0.0955, 0.7580, 0.0643]) -Greedy action tensor([-0.9663, -0.4507, 1.4440, -0.4786]) tensor([0.0648, 0.1085, 0.7213, 0.1055]) -Greedy action tensor([-0.6274, -0.9610, -0.5530, -0.5817]) tensor([0.2604, 0.1865, 0.2805, 0.2726]) -Greedy action tensor([ 0.5202, -1.2513, 0.1732, 0.2786]) tensor([0.3756, 0.0639, 0.2655, 0.2950]) -Greedy action tensor([ 0.0553, 0.1628, -0.0138, -0.2104]) tensor([0.2622, 0.2920, 0.2447, 0.2010]) -Greedy action tensor([ 0.0743, -0.3971, -0.3051, -0.1058]) tensor([0.3181, 0.1985, 0.2177, 0.2657]) -Greedy action tensor([ 0.3466, -0.9196, 0.6841, 0.1129]) tensor([0.2878, 0.0811, 0.4033, 0.2278]) -Greedy action tensor([ 0.1460, -1.1345, -0.6728, 0.4242]) tensor([0.3290, 0.0914, 0.1451, 0.4345]) -Greedy action tensor([ 1.0198, -1.3843, 0.4161, 0.1297]) tensor([0.4883, 0.0441, 0.2670, 0.2005]) -Greedy action tensor([ 1.4318, -0.7673, 0.8881, -0.1038]) tensor([0.5244, 0.0582, 0.3045, 0.1129]) -Greedy action tensor([ 1.0427, -1.3405, 0.1129, 0.9298]) tensor([0.4202, 0.0388, 0.1658, 0.3753]) -Greedy action tensor([-0.9123, -0.0981, 0.4291, -0.7850]) tensor([0.1217, 0.2747, 0.4654, 0.1382]) -Greedy action tensor([ 0.8443, -0.6250, -0.1544, 0.5037]) tensor([0.4329, 0.0996, 0.1595, 0.3080]) -Greedy action tensor([ 0.6256, -1.8180, -0.0797, 0.7421]) tensor([0.3698, 0.0321, 0.1827, 0.4155]) -Greedy action tensor([ 0.0545, -1.5036, -0.2567, 0.1103]) tensor([0.3333, 0.0702, 0.2442, 0.3524]) -Greedy action tensor([ 0.9348, -0.3009, 0.4090, -0.7349]) tensor([0.4831, 0.1404, 0.2855, 0.0910]) -Greedy action tensor([ 0.0273, -0.9335, 0.5565, -0.1879]) tensor([0.2573, 0.0984, 0.4368, 0.2075]) -Greedy action tensor([ 0.8845, 1.2167, 0.3126, -0.0121]) tensor([0.2970, 0.4141, 0.1677, 0.1212]) -Greedy action tensor([ 1.1507, -1.5815, 1.4866, 1.0258]) tensor([0.2988, 0.0194, 0.4181, 0.2637]) -Greedy action tensor([-0.2580, -0.2098, -0.7913, -0.5114]) tensor([0.2931, 0.3075, 0.1719, 0.2275]) -Greedy action tensor([0.9127, 0.0490, 1.3416, 0.9966]) tensor([0.2472, 0.1042, 0.3796, 0.2689]) -Greedy action tensor([-1.8813, -0.4186, 0.6560, -0.1515]) tensor([0.0424, 0.1829, 0.5358, 0.2389]) -Greedy action tensor([-1.8957, -0.4349, 0.6418, -0.1526]) tensor([0.0422, 0.1820, 0.5343, 0.2414]) -Greedy action tensor([-1.7527, -0.4582, 0.5802, -0.0870]) tensor([0.0494, 0.1802, 0.5091, 0.2613]) -Greedy action tensor([-1.9482, -0.4508, 0.6681, -0.1830]) tensor([0.0400, 0.1788, 0.5474, 0.2337]) -Greedy action tensor([-1.8743, -0.3836, 0.6349, -0.1363]) tensor([0.0427, 0.1896, 0.5249, 0.2428]) -Greedy action tensor([-1.9212, -0.4157, 0.6557, -0.1675]) tensor([0.0409, 0.1844, 0.5383, 0.2363]) -Greedy action tensor([-1.9005, -0.4527, 0.6495, -0.1497]) tensor([0.0420, 0.1786, 0.5377, 0.2418]) -Greedy action tensor([-1.0491, -0.3378, 0.2275, 0.4089]) tensor([0.0916, 0.1865, 0.3283, 0.3936]) -Greedy action tensor([-1.4254, 0.5978, 0.2769, 0.0735]) tensor([0.0540, 0.4082, 0.2962, 0.2417]) -Greedy action tensor([-1.8688, -0.2900, 0.6136, -0.1377]) tensor([0.0426, 0.2066, 0.5101, 0.2406]) -Greedy action tensor([-1.4889, -0.4028, 0.4963, -0.1467]) tensor([0.0664, 0.1966, 0.4831, 0.2540]) -Greedy action tensor([-1.7469, -0.3141, 0.5467, -0.0476]) tensor([0.0486, 0.2037, 0.4818, 0.2659]) -Greedy action tensor([-1.9264, -0.4443, 0.6580, -0.1717]) tensor([0.0409, 0.1801, 0.5424, 0.2366]) -Greedy action tensor([-1.7805, -0.3886, 0.5763, -0.0969]) tensor([0.0477, 0.1919, 0.5036, 0.2569]) -Greedy action tensor([-1.8868, -0.3372, 0.6271, -0.1299]) tensor([0.0419, 0.1974, 0.5178, 0.2429]) -Greedy action tensor([-1.7859, -0.4943, 0.6078, -0.0424]) tensor([0.0469, 0.1707, 0.5140, 0.2683]) -Greedy action tensor([-1.9209, -0.4137, 0.6463, -0.1640]) tensor([0.0411, 0.1855, 0.5354, 0.2381]) -Greedy action tensor([-1.8603, -0.4066, 0.6714, -0.0479]) tensor([0.0417, 0.1784, 0.5244, 0.2554]) -Greedy action tensor([-1.8987, -0.4363, 0.6429, -0.1597]) tensor([0.0422, 0.1821, 0.5357, 0.2401]) -Greedy action tensor([-1.0897, 0.3433, 0.2139, 0.3063]) tensor([0.0774, 0.3246, 0.2852, 0.3128]) -Greedy action tensor([-1.7825, -0.5064, 0.5870, -0.0855]) tensor([0.0482, 0.1728, 0.5157, 0.2632]) -Greedy action tensor([-1.7593, -0.2611, 0.5998, -0.0538]) tensor([0.0464, 0.2075, 0.4908, 0.2553]) -Greedy action tensor([-1.6459, -0.3874, 0.4890, -0.0283]) tensor([0.0555, 0.1954, 0.4693, 0.2798]) -Greedy action tensor([-1.8860, -0.4602, 0.6367, -0.1477]) tensor([0.0429, 0.1785, 0.5346, 0.2440]) -Greedy action tensor([-1.9334, -0.4536, 0.6735, -0.1728]) tensor([0.0404, 0.1774, 0.5474, 0.2348]) -Greedy action tensor([-1.9252, -0.4368, 0.6585, -0.1694]) tensor([0.0409, 0.1811, 0.5414, 0.2366]) -Greedy action tensor([-1.8996, -0.4420, 0.6719, -0.1203]) tensor([0.0411, 0.1767, 0.5384, 0.2438]) -Greedy action tensor([-1.7963, -0.1191, 0.5554, -0.1115]) tensor([0.0450, 0.2405, 0.4722, 0.2424]) -Greedy action tensor([-1.9114, -0.4214, 0.6521, -0.1606]) tensor([0.0414, 0.1835, 0.5369, 0.2382]) -Greedy action tensor([-1.5102, -0.0180, 0.4756, -0.1680]) tensor([0.0604, 0.2685, 0.4399, 0.2311]) -Greedy action tensor([-1.7767, -0.3269, 0.5981, -0.0600]) tensor([0.0463, 0.1975, 0.4982, 0.2580]) -Greedy action tensor([-0.5497, 0.9089, 0.0585, 0.0487]) tensor([0.1117, 0.4801, 0.2051, 0.2031]) -Greedy action tensor([-1.5293, 0.1744, 0.3421, 0.0241]) tensor([0.0564, 0.3101, 0.3667, 0.2668]) -Greedy action tensor([-1.5498, -0.4419, 0.5784, 0.3798]) tensor([0.0518, 0.1568, 0.4349, 0.3566]) -Greedy action tensor([-1.4528, 0.1062, 0.3013, 0.0465]) tensor([0.0625, 0.2969, 0.3609, 0.2797]) -Greedy action tensor([-1.6437, -0.4148, 0.5024, -0.0172]) tensor([0.0554, 0.1893, 0.4736, 0.2817]) -Greedy action tensor([-1.8500, -0.4739, 0.6197, -0.1047]) tensor([0.0444, 0.1759, 0.5251, 0.2545]) -Greedy action tensor([-1.9353, -0.4565, 0.6811, -0.1693]) tensor([0.0401, 0.1760, 0.5492, 0.2346]) -Greedy action tensor([-1.9135, -0.4157, 0.6585, -0.1546]) tensor([0.0410, 0.1835, 0.5372, 0.2382]) -Greedy action tensor([-1.5668, -0.4516, 0.4898, -0.0991]) tensor([0.0617, 0.1882, 0.4824, 0.2677]) -Greedy action tensor([-1.8747, -0.4353, 0.6368, -0.1312]) tensor([0.0430, 0.1814, 0.5299, 0.2458]) -Greedy action tensor([-1.8214, -0.3584, 0.6367, -0.1090]) tensor([0.0444, 0.1916, 0.5182, 0.2458]) -Greedy action tensor([-1.6190, -0.2082, 0.4794, -0.0717]) tensor([0.0557, 0.2284, 0.4542, 0.2618]) -Greedy action tensor([-1.7689, -0.4639, 0.6159, -0.0256]) tensor([0.0470, 0.1734, 0.5106, 0.2689]) -Greedy action tensor([-1.6523, -0.5663, 0.5402, -0.0171]) tensor([0.0554, 0.1641, 0.4962, 0.2842]) -Greedy action tensor([ 0.3450, 1.0026, -0.0653, 0.5124]) tensor([0.2094, 0.4041, 0.1389, 0.2475]) -Greedy action tensor([-1.9252, -0.4202, 0.6583, -0.1701]) tensor([0.0408, 0.1836, 0.5399, 0.2358]) -Greedy action tensor([-1.9134, -0.4471, 0.6526, -0.1641]) tensor([0.0415, 0.1798, 0.5400, 0.2386]) -Greedy action tensor([-1.9135, -0.4538, 0.6763, -0.1583]) tensor([0.0410, 0.1763, 0.5458, 0.2369]) -Greedy action tensor([-1.9160, -0.4184, 0.6496, -0.1677]) tensor([0.0413, 0.1846, 0.5370, 0.2372]) -Greedy action tensor([-1.8632, -0.4088, 0.6238, -0.1374]) tensor([0.0436, 0.1868, 0.5246, 0.2450]) -Greedy action tensor([-1.8078, -0.3549, 0.6220, -0.1159]) tensor([0.0453, 0.1938, 0.5148, 0.2461]) -Greedy action tensor([-0.4621, 1.0992, 0.0188, 0.5187]) tensor([0.0995, 0.4742, 0.1610, 0.2654]) -Greedy action tensor([-1.4049, 0.3713, 0.3150, 0.0524]) tensor([0.0596, 0.3519, 0.3326, 0.2558]) -Greedy action tensor([-1.7917, -0.3459, 0.5833, -0.1412]) tensor([0.0472, 0.2002, 0.5070, 0.2457]) -Greedy action tensor([-1.5517, -0.5353, 0.4854, 0.0137]) tensor([0.0617, 0.1704, 0.4729, 0.2950]) -Greedy action tensor([1.2693, 1.2794, 0.0122, 0.7793]) tensor([0.3440, 0.3475, 0.0978, 0.2107]) -Greedy action tensor([-1.6904, 0.2291, 0.4384, -0.0703]) tensor([0.0470, 0.3204, 0.3950, 0.2375]) -Greedy action tensor([-1.9211, -0.3567, 0.6405, -0.1624]) tensor([0.0407, 0.1948, 0.5280, 0.2365]) -Greedy action tensor([-1.8219, -0.3748, 0.5957, -0.1358]) tensor([0.0457, 0.1944, 0.5130, 0.2469]) -Greedy action tensor([-1.8992, -0.4049, 0.6427, -0.1518]) tensor([0.0418, 0.1865, 0.5315, 0.2402]) -Greedy action tensor([-1.9055, -0.4391, 0.6427, -0.1587]) tensor([0.0419, 0.1817, 0.5359, 0.2405]) -Greedy action tensor([-1.1705, 0.8925, 0.1717, 0.3139]) tensor([0.0584, 0.4600, 0.2237, 0.2579]) -Greedy action tensor([-1.6217, -0.5125, 0.4918, -0.0217]) tensor([0.0579, 0.1756, 0.4795, 0.2869]) -Greedy action tensor([-1.7083, -0.4412, 0.6936, 0.0545]) tensor([0.0467, 0.1657, 0.5155, 0.2721]) -Greedy action tensor([-1.9057, -0.3967, 0.6459, -0.1577]) tensor([0.0415, 0.1877, 0.5324, 0.2384]) -Greedy action tensor([0.3032, 0.6790, 0.4412, 1.1215]) tensor([0.1703, 0.2480, 0.1955, 0.3861]) -Greedy action tensor([-1.9023, -0.4222, 0.6555, -0.1592]) tensor([0.0416, 0.1829, 0.5375, 0.2380]) -Greedy action tensor([-0.3809, -0.4089, 0.2064, 0.2336]) tensor([0.1779, 0.1730, 0.3201, 0.3289]) -Greedy action tensor([-1.1319, -0.4357, 0.6701, 0.6444]) tensor([0.0668, 0.1340, 0.4048, 0.3945]) -Greedy action tensor([-1.8524, -0.3634, 0.6091, -0.1430]) tensor([0.0441, 0.1954, 0.5169, 0.2436]) -Greedy action tensor([-1.7925, -0.2755, 0.5904, -0.0900]) tensor([0.0457, 0.2083, 0.4952, 0.2508]) -Greedy action tensor([-1.9450, -0.4560, 0.6762, -0.1790]) tensor([0.0399, 0.1771, 0.5494, 0.2336]) -Greedy action tensor([-1.9208, -0.4440, 0.6532, -0.1694]) tensor([0.0412, 0.1805, 0.5407, 0.2375]) -Greedy action tensor([-1.9003, -0.4233, 0.6276, -0.1532]) tensor([0.0423, 0.1852, 0.5298, 0.2427]) -Greedy action tensor([-1.8852, -0.3611, 0.6379, -0.1427]) tensor([0.0421, 0.1932, 0.5245, 0.2403]) -Greedy action tensor([-1.8236, -0.3851, 0.6208, -0.0960]) tensor([0.0447, 0.1884, 0.5152, 0.2516]) -Greedy action tensor([-1.8551, -0.2485, 0.6056, -0.1769]) tensor([0.0434, 0.2163, 0.5081, 0.2323]) -Greedy action tensor([-1.3524, 0.1565, 0.4654, 0.2205]) tensor([0.0606, 0.2740, 0.3732, 0.2921]) -Greedy action tensor([-1.4467, 0.5564, 0.2917, 0.0757]) tensor([0.0535, 0.3967, 0.3044, 0.2453]) -Greedy action tensor([-0.6995, -0.5424, 0.1604, 0.1863]) tensor([0.1437, 0.1682, 0.3396, 0.3485]) -Greedy action tensor([-0.4507, 0.3919, 0.5697, 1.4415]) tensor([0.0786, 0.1824, 0.2179, 0.5211]) -Greedy action tensor([ 0.8091, -0.4893, -0.0666, -0.3432]) tensor([0.4987, 0.1361, 0.2077, 0.1575]) -Greedy action tensor([ 1.1898, -0.6294, -0.1318, -0.3431]) tensor([0.6080, 0.0986, 0.1622, 0.1313]) -Greedy action tensor([ 0.8360, -0.4849, 0.0615, -0.3666]) tensor([0.4930, 0.1316, 0.2273, 0.1481]) -Greedy action tensor([ 0.4820, -0.2148, -0.0733, -0.2177]) tensor([0.3893, 0.1939, 0.2234, 0.1934]) -Greedy action tensor([ 0.9733, -0.0657, 0.1544, -0.3692]) tensor([0.4864, 0.1721, 0.2145, 0.1270]) -Greedy action tensor([ 0.7410, -0.5471, -0.0552, -0.1522]) tensor([0.4681, 0.1291, 0.2111, 0.1916]) -Greedy action tensor([ 0.5234, -0.0398, 0.0778, -0.0917]) tensor([0.3636, 0.2070, 0.2328, 0.1965]) -Greedy action tensor([ 0.6574, -0.5750, -0.0719, -0.1079]) tensor([0.4466, 0.1302, 0.2154, 0.2078]) -Greedy action tensor([ 0.6956, 0.1776, -0.1695, 0.1314]) tensor([0.3868, 0.2304, 0.1628, 0.2200]) -Greedy action tensor([ 0.6847, -0.5219, -0.1962, -0.5772]) tensor([0.5008, 0.1499, 0.2075, 0.1418]) -Greedy action tensor([ 0.8054, -0.5495, 0.1216, -0.4578]) tensor([0.4889, 0.1261, 0.2467, 0.1382]) -Greedy action tensor([0.2196, 0.2339, 0.0049, 0.1184]) tensor([0.2685, 0.2723, 0.2166, 0.2426]) -Greedy action tensor([ 0.7172, -0.4029, -0.1257, -0.2410]) tensor([0.4672, 0.1524, 0.2011, 0.1792]) -Greedy action tensor([ 0.6605, -0.0145, 0.0499, -0.0241]) tensor([0.3912, 0.1992, 0.2124, 0.1973]) -Greedy action tensor([ 0.5663, 0.3730, -0.2449, -0.2726]) tensor([0.3703, 0.3052, 0.1645, 0.1600]) -Greedy action tensor([ 1.0221, -0.8241, -0.0499, -0.4645]) tensor([0.5793, 0.0914, 0.1983, 0.1310]) -Greedy action tensor([ 0.6465, -0.4549, -0.0779, -0.1446]) tensor([0.4405, 0.1464, 0.2135, 0.1997]) -Greedy action tensor([ 0.7607, -0.5088, -0.1192, -0.1778]) tensor([0.4791, 0.1346, 0.1988, 0.1874]) -Greedy action tensor([ 0.6847, -0.4383, -0.1030, 0.0296]) tensor([0.4349, 0.1415, 0.1978, 0.2259]) -Greedy action tensor([ 0.6042, -0.3623, -0.0157, -0.1360]) tensor([0.4175, 0.1588, 0.2246, 0.1991]) -Greedy action tensor([ 0.4494, -0.3225, -0.0980, -0.1484]) tensor([0.3860, 0.1784, 0.2233, 0.2123]) -Greedy action tensor([ 1.0661, -0.3745, 0.0030, -0.2926]) tensor([0.5437, 0.1287, 0.1878, 0.1397]) -Greedy action tensor([ 0.5539, 0.2731, -0.1459, 0.1929]) tensor([0.3391, 0.2561, 0.1684, 0.2363]) -Greedy action tensor([ 0.9324, -1.3120, 0.0067, -0.6734]) tensor([0.5872, 0.0622, 0.2327, 0.1179]) -Greedy action tensor([ 0.8403, -0.5297, 0.0368, -0.2739]) tensor([0.4926, 0.1252, 0.2206, 0.1617]) -Greedy action tensor([0.3538, 0.0287, 0.1298, 0.1128]) tensor([0.3023, 0.2184, 0.2417, 0.2376]) -Greedy action tensor([ 0.8668, -0.5984, -0.0709, -0.3281]) tensor([0.5194, 0.1200, 0.2034, 0.1572]) -Greedy action tensor([ 0.6676, -0.0927, 0.2628, -0.5061]) tensor([0.4092, 0.1913, 0.2730, 0.1265]) -Greedy action tensor([ 1.1466, -0.9695, 0.0988, -0.4164]) tensor([0.5950, 0.0717, 0.2087, 0.1247]) -Greedy action tensor([ 0.9824, -0.7605, 0.0149, -0.5814]) tensor([0.5668, 0.0992, 0.2154, 0.1186]) -Greedy action tensor([ 0.4335, -0.3421, -0.0604, -0.2847]) tensor([0.3909, 0.1800, 0.2385, 0.1906]) -Greedy action tensor([ 0.4521, 0.2628, -0.1599, 0.1062]) tensor([0.3250, 0.2689, 0.1762, 0.2299]) -Greedy action tensor([ 0.4957, 0.2508, -0.1228, -0.0323]) tensor([0.3435, 0.2689, 0.1851, 0.2026]) -Greedy action tensor([ 0.8815, -0.2679, -0.0346, -0.0235]) tensor([0.4714, 0.1494, 0.1886, 0.1907]) -Greedy action tensor([ 0.6760, -0.1024, -0.0128, -0.7027]) tensor([0.4518, 0.2075, 0.2269, 0.1138]) -Greedy action tensor([ 0.8045, -0.5902, -0.0832, -0.3202]) tensor([0.5040, 0.1249, 0.2074, 0.1637]) -Greedy action tensor([ 0.4498, 0.0449, -0.2357, -0.2990]) tensor([0.3782, 0.2523, 0.1906, 0.1789]) -Greedy action tensor([ 0.7257, -0.3017, -0.1277, -0.2960]) tensor([0.4665, 0.1670, 0.1987, 0.1679]) -Greedy action tensor([ 0.2591, -0.1074, -0.1379, -0.1765]) tensor([0.3320, 0.2301, 0.2232, 0.2148]) -Greedy action tensor([ 1.2704, -0.4790, 0.0811, -0.5585]) tensor([0.6102, 0.1061, 0.1857, 0.0980]) -Greedy action tensor([ 0.6233, -0.5322, -0.0501, -0.3577]) tensor([0.4546, 0.1431, 0.2318, 0.1704]) -Greedy action tensor([ 0.4997, -0.5165, -0.1059, -0.2116]) tensor([0.4169, 0.1509, 0.2275, 0.2047]) -Greedy action tensor([ 1.2548, -0.8872, -0.2141, -0.5592]) tensor([0.6620, 0.0777, 0.1524, 0.1079]) -Greedy action tensor([ 0.6201, -0.4941, 0.0050, -0.2496]) tensor([0.4371, 0.1434, 0.2363, 0.1832]) -Greedy action tensor([ 0.6275, -0.2412, -0.0385, -0.4492]) tensor([0.4398, 0.1845, 0.2259, 0.1498]) -Greedy action tensor([ 0.7983, -0.9497, -0.1410, -0.3762]) tensor([0.5336, 0.0929, 0.2086, 0.1649]) -Greedy action tensor([ 0.8932, -0.5076, -0.0753, -0.4795]) tensor([0.5321, 0.1311, 0.2020, 0.1348]) -Greedy action tensor([ 0.4979, -0.2917, -0.1470, -0.1670]) tensor([0.4011, 0.1821, 0.2105, 0.2063]) -Greedy action tensor([ 0.8608, -0.5848, 0.1298, -0.5764]) tensor([0.5116, 0.1205, 0.2463, 0.1216]) -Greedy action tensor([ 0.3668, -0.0524, -0.0235, -0.2661]) tensor([0.3490, 0.2295, 0.2362, 0.1853]) -Greedy action tensor([ 1.0535, -0.9231, 0.0725, -0.4919]) tensor([0.5791, 0.0802, 0.2171, 0.1235]) -Greedy action tensor([ 0.9610, -0.5179, -0.1640, -0.3837]) tensor([0.5515, 0.1257, 0.1791, 0.1437]) -Greedy action tensor([ 1.0365, -0.5683, -0.0911, -0.2510]) tensor([0.5553, 0.1116, 0.1798, 0.1533]) -Greedy action tensor([ 0.6301, -0.3852, -0.1560, -0.4628]) tensor([0.4644, 0.1683, 0.2116, 0.1557]) -Greedy action tensor([ 0.8180, -0.7270, 0.0058, -0.3158]) tensor([0.5053, 0.1078, 0.2243, 0.1626]) -Greedy action tensor([ 0.2945, -0.2946, -0.0332, -0.3712]) tensor([0.3585, 0.1989, 0.2583, 0.1842]) -Greedy action tensor([ 0.8195, -0.6273, -0.0904, -0.2689]) tensor([0.5064, 0.1192, 0.2039, 0.1705]) -Greedy action tensor([ 0.8965, -0.6760, 0.1434, -0.6029]) tensor([0.5258, 0.1091, 0.2476, 0.1174]) -Greedy action tensor([ 0.7368, -0.2302, -0.0851, -0.1076]) tensor([0.4445, 0.1690, 0.1954, 0.1911]) -Greedy action tensor([ 0.9529, -1.1901, -0.1228, -0.5449]) tensor([0.5945, 0.0697, 0.2028, 0.1330]) -Greedy action tensor([ 0.7780, -0.2776, 0.0717, -0.2437]) tensor([0.4543, 0.1581, 0.2242, 0.1635]) -Greedy action tensor([ 0.8853, -0.6015, -0.0910, -0.3900]) tensor([0.5313, 0.1201, 0.2001, 0.1484]) -Greedy action tensor([ 0.7214, -0.4369, -0.0452, -0.4244]) tensor([0.4770, 0.1498, 0.2216, 0.1517]) -Greedy action tensor([ 0.8021, -0.4736, -0.0989, -0.2937]) tensor([0.4951, 0.1383, 0.2011, 0.1655]) -Greedy action tensor([ 0.7670, -0.2309, -0.1264, -0.6193]) tensor([0.4931, 0.1818, 0.2018, 0.1233]) -Greedy action tensor([ 0.7264, 0.0951, 0.0862, -0.0337]) tensor([0.3958, 0.2105, 0.2086, 0.1851]) -Greedy action tensor([ 0.5019, -0.1148, -0.0631, -0.2473]) tensor([0.3875, 0.2091, 0.2202, 0.1832]) -Greedy action tensor([ 1.0076, -0.6197, -0.0195, -0.4171]) tensor([0.5571, 0.1094, 0.1995, 0.1340]) -Greedy action tensor([ 1.1117, -1.1069, 0.1255, -0.4663]) tensor([0.5924, 0.0644, 0.2209, 0.1223]) -Greedy action tensor([ 0.4875, -0.2638, -0.1629, -0.2950]) tensor([0.4080, 0.1925, 0.2129, 0.1866]) -Greedy action tensor([ 0.6937, -0.5554, -0.1254, -0.2916]) tensor([0.4760, 0.1365, 0.2098, 0.1777]) -Greedy action tensor([ 0.7823, -0.3753, -0.1441, -0.2759]) tensor([0.4861, 0.1527, 0.1925, 0.1687]) -Greedy action tensor([ 0.6178, -0.3265, -0.0216, -0.3040]) tensor([0.4321, 0.1681, 0.2280, 0.1719]) -Greedy action tensor([ 0.8753, -0.2836, 0.0344, -0.3289]) tensor([0.4890, 0.1535, 0.2109, 0.1467]) -Greedy action tensor([ 0.9298, -0.4369, -0.0246, -0.4262]) tensor([0.5270, 0.1344, 0.2029, 0.1358]) -Greedy action tensor([ 0.5890, -0.3767, 0.1519, -0.2601]) tensor([0.4074, 0.1551, 0.2632, 0.1743]) -Greedy action tensor([ 0.5590, -0.4967, -0.1106, -0.2139]) tensor([0.4308, 0.1499, 0.2205, 0.1989]) -Greedy action tensor([ 0.9941, -0.5607, -0.0172, -0.6086]) tensor([0.5630, 0.1189, 0.2048, 0.1134]) -Greedy action tensor([ 0.5606, 0.3615, -0.2177, -0.4152]) tensor([0.3766, 0.3086, 0.1729, 0.1419]) -Greedy action tensor([ 0.7431, -0.7778, -0.0488, -0.3632]) tensor([0.4994, 0.1091, 0.2262, 0.1652]) -Greedy action tensor([ 0.4336, -0.0047, 0.0071, -0.2531]) tensor([0.3570, 0.2303, 0.2330, 0.1796]) -Greedy action tensor([ 1.0621, -0.7973, -0.4421, -0.8737]) tensor([0.6569, 0.1023, 0.1460, 0.0948]) -Greedy action tensor([ 1.6870, -0.4535, -0.5061, 0.1228]) tensor([0.6952, 0.0818, 0.0776, 0.1455]) -Greedy action tensor([ 1.4245, -0.4223, -0.5942, 0.5989]) tensor([0.5785, 0.0913, 0.0768, 0.2534]) -Greedy action tensor([ 1.6205, -0.7200, -0.6337, 0.6988]) tensor([0.6254, 0.0602, 0.0656, 0.2488]) -Greedy action tensor([ 1.6396, -0.7364, -0.4248, 0.8183]) tensor([0.6025, 0.0560, 0.0765, 0.2650]) -Greedy action tensor([ 1.2353, -0.6560, -0.1668, 0.2563]) tensor([0.5641, 0.0851, 0.1388, 0.2119]) -Greedy action tensor([ 0.6714, -0.3688, -0.0381, -0.0511]) tensor([0.4290, 0.1516, 0.2110, 0.2083]) -Greedy action tensor([ 1.6251, -0.3908, -0.3967, 0.1874]) tensor([0.6653, 0.0886, 0.0881, 0.1580]) -Greedy action tensor([ 1.2720, -0.7415, -0.2247, 0.6572]) tensor([0.5268, 0.0703, 0.1179, 0.2849]) -Greedy action tensor([ 1.9981, -1.0965, -0.1817, 0.3584]) tensor([0.7394, 0.0335, 0.0836, 0.1435]) -Greedy action tensor([ 2.0440, -0.9635, -0.0290, 0.4731]) tensor([0.7230, 0.0357, 0.0910, 0.1503]) -Greedy action tensor([ 1.2693, 0.0734, -0.8958, 0.4534]) tensor([0.5378, 0.1627, 0.0617, 0.2378]) -Greedy action tensor([ 1.3909, -0.2217, -0.6036, 0.3081]) tensor([0.5973, 0.1191, 0.0813, 0.2023]) -Greedy action tensor([1.5431, 0.2090, 0.0990, 0.1575]) tensor([0.5716, 0.1506, 0.1349, 0.1430]) -Greedy action tensor([ 1.7173, -0.3185, -0.5245, 0.2279]) tensor([0.6838, 0.0893, 0.0727, 0.1542]) -Greedy action tensor([ 1.3499, -0.4170, -0.5007, 0.1741]) tensor([0.6110, 0.1044, 0.0960, 0.1885]) -Greedy action tensor([ 3.2397, -1.5752, -0.2904, 1.2180]) tensor([0.8548, 0.0069, 0.0250, 0.1132]) -Greedy action tensor([ 1.2653, -0.4761, -0.4982, 0.4098]) tensor([0.5644, 0.0989, 0.0968, 0.2399]) -Greedy action tensor([ 1.1391, -0.1518, -0.1756, -0.2025]) tensor([0.5540, 0.1524, 0.1488, 0.1448]) -Greedy action tensor([ 1.4226, -0.2559, -1.2099, 0.3329]) tensor([0.6270, 0.1170, 0.0451, 0.2109]) -Greedy action tensor([ 1.8984, -0.2763, -0.6540, 0.8005]) tensor([0.6557, 0.0745, 0.0511, 0.2187]) -Greedy action tensor([ 0.8879, -0.3393, -0.3102, 0.4603]) tensor([0.4451, 0.1305, 0.1343, 0.2902]) -Greedy action tensor([ 1.3510, -0.5925, -0.0025, 0.7096]) tensor([0.5187, 0.0743, 0.1340, 0.2731]) -Greedy action tensor([ 0.8020, -0.7817, 0.0426, 0.3306]) tensor([0.4353, 0.0893, 0.2037, 0.2717]) -Greedy action tensor([ 1.3144, -0.3068, -0.4143, 0.4296]) tensor([0.5593, 0.1106, 0.0993, 0.2309]) -Greedy action tensor([ 1.9080, -1.2772, -0.2763, 0.5349]) tensor([0.7106, 0.0294, 0.0800, 0.1800]) -Greedy action tensor([ 1.3331, -0.7429, -0.1908, 0.0827]) tensor([0.6136, 0.0770, 0.1337, 0.1757]) -Greedy action tensor([ 1.6799, -0.6838, -0.3542, 0.4432]) tensor([0.6600, 0.0621, 0.0863, 0.1916]) -Greedy action tensor([ 1.2886, -0.4100, -0.5685, 0.0429]) tensor([0.6147, 0.1125, 0.0960, 0.1769]) -Greedy action tensor([ 1.1312, 0.0333, -0.3636, 0.1772]) tensor([0.5146, 0.1717, 0.1154, 0.1983]) -Greedy action tensor([ 1.9908, -0.7297, -0.6213, 0.5683]) tensor([0.7245, 0.0477, 0.0532, 0.1747]) -Greedy action tensor([ 1.7672, -0.6022, -0.5341, 0.5085]) tensor([0.6767, 0.0633, 0.0678, 0.1922]) -Greedy action tensor([ 1.3652, -0.4009, -0.4504, 0.2075]) tensor([0.6068, 0.1038, 0.0988, 0.1907]) -Greedy action tensor([ 1.9437, -0.2416, -0.2439, 0.4751]) tensor([0.6873, 0.0773, 0.0771, 0.1583]) -Greedy action tensor([ 1.4528, -0.9922, -0.0287, 0.2651]) tensor([0.6177, 0.0536, 0.1404, 0.1884]) -Greedy action tensor([ 1.6438, -0.6880, -0.2945, 0.2748]) tensor([0.6687, 0.0649, 0.0963, 0.1701]) -Greedy action tensor([ 1.3585, -0.5793, -0.2719, 0.3492]) tensor([0.5867, 0.0845, 0.1149, 0.2138]) -Greedy action tensor([ 1.2547, -0.6202, -0.2045, -0.2069]) tensor([0.6182, 0.0948, 0.1437, 0.1433]) -Greedy action tensor([ 1.5067, -0.6617, -0.3976, -0.0801]) tensor([0.6812, 0.0779, 0.1015, 0.1394]) -Greedy action tensor([ 1.7600, -0.5494, -0.4919, 0.5325]) tensor([0.6677, 0.0663, 0.0702, 0.1957]) -Greedy action tensor([ 1.5620, -0.3987, -0.5248, 0.1758]) tensor([0.6601, 0.0929, 0.0819, 0.1650]) -Greedy action tensor([ 2.2870, -1.2292, 0.0629, 0.7297]) tensor([0.7415, 0.0220, 0.0802, 0.1562]) -Greedy action tensor([ 1.1923, -0.4481, -0.4013, 0.3529]) tensor([0.5467, 0.1060, 0.1111, 0.2362]) -Greedy action tensor([ 1.6552, 0.1506, -0.6064, 0.2433]) tensor([0.6370, 0.1415, 0.0664, 0.1552]) -Greedy action tensor([ 1.3823, 0.0554, -0.2943, -0.1026]) tensor([0.5956, 0.1580, 0.1114, 0.1349]) -Greedy action tensor([ 1.6970, -0.3766, -0.5259, 0.4665]) tensor([0.6552, 0.0824, 0.0710, 0.1914]) -Greedy action tensor([ 1.3913, -0.7321, 0.0630, 0.3853]) tensor([0.5714, 0.0683, 0.1514, 0.2089]) -Greedy action tensor([ 1.2673, -0.1496, -0.0233, 0.5514]) tensor([0.4984, 0.1209, 0.1371, 0.2436]) -Greedy action tensor([ 1.7924, -0.5678, -0.2198, 0.1168]) tensor([0.7066, 0.0667, 0.0945, 0.1323]) -Greedy action tensor([ 1.4202, -0.5318, -0.4659, 0.3702]) tensor([0.6084, 0.0864, 0.0923, 0.2129]) -Greedy action tensor([ 1.0132, -0.2240, -1.0207, 0.5608]) tensor([0.4861, 0.1411, 0.0636, 0.3092]) -Greedy action tensor([ 2.4116, -0.8896, 0.1784, 0.4518]) tensor([0.7783, 0.0287, 0.0834, 0.1096]) -Greedy action tensor([ 2.0221, -1.0350, -0.4949, 0.5153]) tensor([0.7411, 0.0349, 0.0598, 0.1642]) -Greedy action tensor([ 0.8299, -0.4680, -0.9615, 0.4944]) tensor([0.4641, 0.1267, 0.0774, 0.3318]) -Greedy action tensor([ 1.9747, 0.1160, -0.7957, 0.1155]) tensor([0.7276, 0.1134, 0.0456, 0.1134]) -Greedy action tensor([ 1.1686, -0.1800, -0.4640, 0.1249]) tensor([0.5533, 0.1437, 0.1081, 0.1949]) -Greedy action tensor([ 1.8845, -0.5641, -0.3004, 0.4665]) tensor([0.6939, 0.0600, 0.0781, 0.1681]) -Greedy action tensor([ 1.1281, 0.1961, -0.2723, -0.0053]) tensor([0.5096, 0.2007, 0.1256, 0.1641]) -Greedy action tensor([ 1.5431, -0.6855, -0.1699, 0.3124]) tensor([0.6329, 0.0681, 0.1141, 0.1848]) -Greedy action tensor([ 1.4091, -0.3713, -0.1114, -0.2076]) tensor([0.6306, 0.1063, 0.1378, 0.1252]) -Greedy action tensor([ 2.5512, -1.0091, -0.4886, 0.8180]) tensor([0.7981, 0.0227, 0.0382, 0.1410]) -Greedy action tensor([ 1.4082, -0.9533, 0.2281, 0.1971]) tensor([0.5884, 0.0555, 0.1808, 0.1753]) -Greedy action tensor([ 1.0590, -0.9590, -0.3672, -0.2068]) tensor([0.6042, 0.0803, 0.1451, 0.1704]) -Greedy action tensor([ 1.5802, -0.3755, -1.2417, 0.2783]) tensor([0.6789, 0.0960, 0.0404, 0.1847]) -Greedy action tensor([ 1.1718, -0.6277, -0.0352, 0.1572]) tensor([0.5473, 0.0905, 0.1637, 0.1984]) -Greedy action tensor([ 1.6830, -0.8629, -0.3701, 0.5057]) tensor([0.6601, 0.0518, 0.0847, 0.2034]) -Greedy action tensor([ 2.1578, -0.4241, -0.3546, 0.5096]) tensor([0.7412, 0.0561, 0.0601, 0.1426]) -Greedy action tensor([ 1.6375, -0.4567, 0.0155, 0.7587]) tensor([0.5761, 0.0710, 0.1138, 0.2392]) -Greedy action tensor([ 1.6398, -1.0104, -0.1221, 0.1529]) tensor([0.6810, 0.0481, 0.1169, 0.1540]) -Greedy action tensor([ 1.3041, -0.8205, 0.0165, 0.3518]) tensor([0.5614, 0.0671, 0.1549, 0.2166]) -Greedy action tensor([1.8129, 0.4300, 0.2727, 0.2088]) tensor([0.6001, 0.1505, 0.1286, 0.1207]) -Greedy action tensor([ 2.0450, -0.7328, -0.2836, 0.6211]) tensor([0.7141, 0.0444, 0.0696, 0.1719]) -Greedy action tensor([ 1.9761, -0.5569, -0.0612, 0.6221]) tensor([0.6812, 0.0541, 0.0888, 0.1759]) -Greedy action tensor([ 1.4547, -0.5532, -0.2554, 0.2933]) tensor([0.6142, 0.0825, 0.1111, 0.1923]) -Greedy action tensor([ 1.3763, -0.3395, -0.4563, 0.6450]) tensor([0.5491, 0.0987, 0.0879, 0.2643]) -Greedy action tensor([ 1.2400, -0.2280, -0.8962, 0.1233]) tensor([0.5967, 0.1375, 0.0705, 0.1953]) -Greedy action tensor([ 1.4277, -0.5122, -0.2147, 0.1865]) tensor([0.6149, 0.0884, 0.1190, 0.1777]) -Greedy action tensor([ 2.3438, -0.8969, -0.3719, 0.2826]) tensor([0.8113, 0.0318, 0.0537, 0.1033]) -Greedy action tensor([ 1.5724, -1.0388, -0.4581, 0.4215]) tensor([0.6574, 0.0483, 0.0863, 0.2080]) -Greedy action tensor([ 1.7997, -0.2498, -0.4264, 0.5308]) tensor([0.6588, 0.0849, 0.0711, 0.1852]) -Greedy action tensor([ 1.5918, -0.4867, -0.1921, 0.6503]) tensor([0.5941, 0.0743, 0.0998, 0.2317]) -Greedy action tensor([ 1.2970, -0.2787, -0.3379, -0.0254]) tensor([0.5994, 0.1240, 0.1169, 0.1597]) -Greedy action tensor([ 1.2051, -0.1717, -0.7970, 0.4361]) tensor([0.5403, 0.1364, 0.0730, 0.2504]) -Greedy action tensor([ 0.4623, -1.5467, 0.9904, 0.2528]) tensor([0.2747, 0.0368, 0.4657, 0.2228]) -Greedy action tensor([-1.0372, -0.5978, 0.3710, -1.1476]) tensor([0.1327, 0.2059, 0.5426, 0.1188]) -Greedy action tensor([ 0.0931, 0.1670, 0.3769, -0.2941]) tensor([0.2449, 0.2637, 0.3252, 0.1663]) -Greedy action tensor([ 0.6954, 0.5721, -1.5429, 0.6466]) tensor([0.3398, 0.3004, 0.0362, 0.3236]) -Greedy action tensor([ 0.5137, -1.3046, 0.5352, 0.2889]) tensor([0.3353, 0.0544, 0.3426, 0.2678]) -Greedy action tensor([ 1.3150, 0.2324, 0.5002, -0.5243]) tensor([0.5154, 0.1746, 0.2282, 0.0819]) -Greedy action tensor([-0.3374, 0.0626, -0.4890, 0.8128]) tensor([0.1536, 0.2291, 0.1320, 0.4852]) -Greedy action tensor([ 2.0858, -0.4475, 0.3171, 0.0601]) tensor([0.7237, 0.0575, 0.1234, 0.0955]) -Greedy action tensor([-0.4998, -0.0961, 1.0061, -0.0993]) tensor([0.1177, 0.1762, 0.5305, 0.1756]) -Greedy action tensor([-0.3628, -0.2257, -0.8189, 0.6281]) tensor([0.1827, 0.2095, 0.1158, 0.4920]) -Greedy action tensor([ 0.1907, 0.0861, -0.2784, -0.5036]) tensor([0.3305, 0.2977, 0.2067, 0.1651]) -Greedy action tensor([-0.4264, -0.8973, -0.0140, 0.8807]) tensor([0.1464, 0.0914, 0.2211, 0.5410]) -Greedy action tensor([ 1.5405, -1.1588, 0.9750, 0.8439]) tensor([0.4687, 0.0315, 0.2663, 0.2335]) -Greedy action tensor([-0.4308, -2.1384, 0.3839, 0.7696]) tensor([0.1479, 0.0268, 0.3340, 0.4912]) -Greedy action tensor([ 0.9526, -0.2309, -0.2292, 0.3941]) tensor([0.4577, 0.1401, 0.1404, 0.2618]) -Greedy action tensor([0.6763, 0.0684, 0.8110, 0.2612]) tensor([0.2986, 0.1626, 0.3416, 0.1972]) -Greedy action tensor([-0.0062, 0.1675, 0.1923, -0.8784]) tensor([0.2613, 0.3108, 0.3187, 0.1092]) -Greedy action tensor([-0.8507, -0.9422, 1.6042, -0.5815]) tensor([0.0673, 0.0614, 0.7833, 0.0880]) -Greedy action tensor([-0.9902, 0.6957, 0.9045, -0.2697]) tensor([0.0662, 0.3574, 0.4403, 0.1361]) -Greedy action tensor([ 0.3816, -0.8498, 0.9257, -0.9127]) tensor([0.3040, 0.0887, 0.5239, 0.0833]) -Greedy action tensor([ 0.2931, 0.8203, -0.3505, 0.2737]) tensor([0.2381, 0.4033, 0.1251, 0.2335]) -Greedy action tensor([ 0.0180, -0.2118, 0.6995, -0.3230]) tensor([0.2231, 0.1773, 0.4410, 0.1586]) -Greedy action tensor([ 1.0003, -0.6482, 1.3185, 0.1335]) tensor([0.3347, 0.0644, 0.4602, 0.1407]) -Greedy action tensor([-3.0364e-01, -1.1367e+00, 4.9205e-01, -4.9651e-04]) tensor([0.1998, 0.0869, 0.4428, 0.2706]) -Greedy action tensor([ 0.2373, -0.9296, 0.2444, -1.3426]) tensor([0.3961, 0.1233, 0.3989, 0.0816]) -Greedy action tensor([ 0.7112, -1.1332, 0.4986, -0.3530]) tensor([0.4326, 0.0684, 0.3498, 0.1492]) -Greedy action tensor([-0.0827, -1.7054, -0.2649, 1.1237]) tensor([0.1861, 0.0367, 0.1551, 0.6220]) -Greedy action tensor([ 0.7177, -0.6143, 0.0813, -0.0190]) tensor([0.4402, 0.1162, 0.2329, 0.2107]) -Greedy action tensor([ 0.0757, -0.4732, 0.1274, 0.5802]) tensor([0.2333, 0.1347, 0.2457, 0.3863]) -Greedy action tensor([-4.9634e-01, -1.8367e+00, 6.7545e-01, -5.0884e-04]) tensor([0.1631, 0.0427, 0.5264, 0.2678]) -Greedy action tensor([-0.0449, -0.9936, 0.4237, -0.3677]) tensor([0.2696, 0.1044, 0.4308, 0.1952]) -Greedy action tensor([ 0.1996, -1.1362, 0.2374, 0.3764]) tensor([0.2861, 0.0752, 0.2972, 0.3415]) -Greedy action tensor([0.5762, 1.5344, 0.5439, 0.1145]) tensor([0.1921, 0.5008, 0.1860, 0.1211]) -Greedy action tensor([-0.0741, 0.2884, 0.7604, 0.0338]) tensor([0.1708, 0.2454, 0.3935, 0.1903]) -Greedy action tensor([ 0.6248, -1.1635, -0.9300, -0.9801]) tensor([0.6332, 0.1059, 0.1337, 0.1272]) -Greedy action tensor([-0.4460, 0.2895, -0.3772, 0.4610]) tensor([0.1507, 0.3145, 0.1615, 0.3733]) -Greedy action tensor([ 1.2442, -1.8505, 1.1757, 0.6720]) tensor([0.3932, 0.0178, 0.3671, 0.2219]) -Greedy action tensor([-1.0169, -1.2504, 0.1737, -0.3162]) tensor([0.1409, 0.1116, 0.4635, 0.2840]) -Greedy action tensor([ 0.6238, -0.0127, 1.2675, -0.2254]) tensor([0.2590, 0.1371, 0.4931, 0.1108]) -Greedy action tensor([-0.3805, -0.5997, -0.5319, -0.1954]) tensor([0.2587, 0.2078, 0.2223, 0.3112]) -Greedy action tensor([ 0.0532, -1.1015, 0.4686, 1.1004]) tensor([0.1761, 0.0555, 0.2667, 0.5017]) -Greedy action tensor([ 0.9844, -0.8662, 0.2766, 0.0189]) tensor([0.4925, 0.0774, 0.2426, 0.1875]) -Greedy action tensor([-1.1074, -1.1276, -0.7693, 0.2055]) tensor([0.1409, 0.1380, 0.1975, 0.5236]) -Greedy action tensor([-0.2854, -0.1626, -1.0239, 0.0044]) tensor([0.2535, 0.2866, 0.1211, 0.3387]) -Greedy action tensor([ 0.0218, -0.0210, 2.2140, -0.7657]) tensor([0.0880, 0.0843, 0.7877, 0.0400]) -Greedy action tensor([ 0.1275, -0.5158, -0.2440, 0.0075]) tensor([0.3224, 0.1694, 0.2223, 0.2859]) -Greedy action tensor([ 0.2551, -1.1241, -0.1894, -0.2733]) tensor([0.4028, 0.1014, 0.2583, 0.2375]) -Greedy action tensor([-1.1631, -0.1289, -0.1571, 0.6365]) tensor([0.0794, 0.2233, 0.2171, 0.4801]) -Greedy action tensor([ 0.0534, -0.2542, 1.1354, -0.2784]) tensor([0.1851, 0.1361, 0.5461, 0.1328]) -Greedy action tensor([-0.1746, -0.9749, 0.5200, -0.1301]) tensor([0.2223, 0.0999, 0.4453, 0.2325]) -Greedy action tensor([ 1.1205, -0.8448, 1.3059, 0.0200]) tensor([0.3736, 0.0524, 0.4497, 0.1243]) -Greedy action tensor([-0.2193, -2.0755, 0.2129, -0.3795]) tensor([0.2818, 0.0440, 0.4341, 0.2401]) -Greedy action tensor([-0.2109, -0.2814, -0.3137, 0.3000]) tensor([0.2222, 0.2070, 0.2005, 0.3703]) -Greedy action tensor([-1.7052, -1.0944, 0.2002, -0.8451]) tensor([0.0838, 0.1544, 0.5636, 0.1981]) -Greedy action tensor([ 1.2074, -0.6997, 0.5963, 1.1942]) tensor([0.3734, 0.0555, 0.2027, 0.3685]) -Greedy action tensor([-1.5475, -0.6433, 0.8814, -0.7090]) tensor([0.0584, 0.1442, 0.6624, 0.1350]) -Greedy action tensor([ 0.8745, -0.2053, 0.4879, 1.5801]) tensor([0.2473, 0.0840, 0.1680, 0.5008]) -Greedy action tensor([ 0.1698, -1.1951, -0.7520, 0.6231]) tensor([0.3099, 0.0792, 0.1233, 0.4876]) -Greedy action tensor([ 0.6020, -0.2081, -0.0644, -0.4032]) tensor([0.4302, 0.1914, 0.2210, 0.1575]) -Greedy action tensor([ 0.2678, 0.0394, 0.8754, -0.3040]) tensor([0.2383, 0.1896, 0.4375, 0.1345]) -Greedy action tensor([-0.7424, -0.6688, 0.9434, -1.3368]) tensor([0.1246, 0.1341, 0.6725, 0.0688]) -Greedy action tensor([ 0.4993, -0.1891, -0.1197, -0.2696]) tensor([0.3993, 0.2006, 0.2150, 0.1851]) -Greedy action tensor([ 0.1701, -0.2412, 1.0606, 0.0184]) tensor([0.2017, 0.1337, 0.4914, 0.1733]) -Greedy action tensor([ 0.2735, -1.7800, 1.0387, -0.0501]) tensor([0.2499, 0.0321, 0.5372, 0.1808]) -Greedy action tensor([-0.0404, -2.0144, 0.7885, -0.4721]) tensor([0.2452, 0.0341, 0.5616, 0.1592]) -Greedy action tensor([ 0.4846, -0.1155, 0.7664, -0.4906]) tensor([0.3076, 0.1688, 0.4077, 0.1160]) -Greedy action tensor([ 1.3101, -0.6251, -0.2976, 0.8803]) tensor([0.5012, 0.0724, 0.1004, 0.3261]) -Greedy action tensor([ 0.4135, -0.1289, 1.1253, 0.3360]) tensor([0.2200, 0.1279, 0.4484, 0.2036]) -Greedy action tensor([-1.1686, 0.4900, 1.1650, -1.4297]) tensor([0.0577, 0.3029, 0.5950, 0.0444]) -Greedy action tensor([ 1.1661, -1.3192, 0.2374, 0.3561]) tensor([0.5200, 0.0433, 0.2054, 0.2313]) -Greedy action tensor([ 1.0081, -1.6046, -0.0040, 1.1114]) tensor([0.3928, 0.0288, 0.1428, 0.4356]) -Greedy action tensor([-0.4462, -0.4366, -0.2408, -0.2077]) tensor([0.2219, 0.2240, 0.2725, 0.2816]) -Greedy action tensor([-0.8495, -0.5884, 0.8445, -0.2058]) tensor([0.1037, 0.1346, 0.5643, 0.1974]) -Greedy action tensor([-0.0978, -1.2109, -0.6586, 0.5682]) tensor([0.2600, 0.0854, 0.1484, 0.5061]) -Greedy action tensor([ 0.5529, 0.3309, -0.2024, 0.0518]) tensor([0.3476, 0.2784, 0.1633, 0.2106]) -Greedy action tensor([-0.1027, -1.0283, 1.2440, -0.2079]) tensor([0.1628, 0.0645, 0.6261, 0.1466]) -Greedy action tensor([-0.7221, 0.0603, -1.0835, -0.4682]) tensor([0.1933, 0.4228, 0.1347, 0.2492]) -Greedy action tensor([ 0.0559, 0.2842, 0.1731, -0.7553]) tensor([0.2614, 0.3285, 0.2939, 0.1162]) -Greedy action tensor([ 0.4208, -0.6129, -0.1913, -0.4021]) tensor([0.4279, 0.1522, 0.2320, 0.1879]) -Greedy action tensor([-0.2160, -1.0314, -0.0731, -0.7153]) tensor([0.3122, 0.1381, 0.3602, 0.1895]) -Greedy action tensor([ 0.3347, -0.9847, 0.1750, 0.8625]) tensor([0.2621, 0.0701, 0.2234, 0.4444]) -Greedy action tensor([ 0.8177, -0.3962, -0.1233, -0.2898]) tensor([0.4956, 0.1472, 0.1934, 0.1637]) -Greedy action tensor([0.3942, 0.2440, 0.0408, 0.0667]) tensor([0.3045, 0.2621, 0.2139, 0.2195]) -Greedy action tensor([ 0.3638, -0.0531, -0.0309, -0.3077]) tensor([0.3516, 0.2318, 0.2369, 0.1797]) -Greedy action tensor([ 0.8765, -0.5164, 0.0658, -0.3685]) tensor([0.5048, 0.1254, 0.2244, 0.1454]) -Greedy action tensor([ 0.5149, -0.0715, -0.0510, -0.3454]) tensor([0.3926, 0.2184, 0.2229, 0.1661]) -Greedy action tensor([ 1.0385, -0.6611, 0.1172, -0.5844]) tensor([0.5624, 0.1028, 0.2238, 0.1110]) -Greedy action tensor([ 0.7516, 0.0642, -0.3127, -0.0408]) tensor([0.4347, 0.2186, 0.1499, 0.1968]) -Greedy action tensor([ 1.1113, -0.4441, -0.1227, -0.2860]) tensor([0.5716, 0.1207, 0.1664, 0.1413]) -Greedy action tensor([ 1.0424, -0.5020, -0.1035, -0.2727]) tensor([0.5556, 0.1186, 0.1766, 0.1491]) -Greedy action tensor([ 0.9145, -0.2477, 0.0426, -0.4327]) tensor([0.5023, 0.1571, 0.2100, 0.1306]) -Greedy action tensor([ 0.5069, -0.2687, -0.0365, -0.1778]) tensor([0.3929, 0.1809, 0.2282, 0.1981]) -Greedy action tensor([ 0.5932, -0.1091, -0.0814, 0.0956]) tensor([0.3827, 0.1896, 0.1949, 0.2327]) -Greedy action tensor([ 0.9719, -0.7689, 0.0509, -0.6619]) tensor([0.5654, 0.0992, 0.2251, 0.1104]) -Greedy action tensor([ 0.7205, 0.2879, -0.1859, 0.2047]) tensor([0.3774, 0.2449, 0.1525, 0.2253]) -Greedy action tensor([ 0.3537, -0.2105, -0.0697, -0.4118]) tensor([0.3719, 0.2115, 0.2435, 0.1730]) -Greedy action tensor([ 0.2936, -0.0014, -0.1236, -0.0757]) tensor([0.3231, 0.2406, 0.2129, 0.2234]) -Greedy action tensor([ 0.5975, -0.1452, 0.0564, -0.2037]) tensor([0.3989, 0.1898, 0.2322, 0.1790]) -Greedy action tensor([ 1.1263, -0.6916, 0.0060, -0.5080]) tensor([0.5939, 0.0964, 0.1937, 0.1159]) -Greedy action tensor([ 1.0362, -0.6622, 0.1726, -0.7406]) tensor([0.5638, 0.1032, 0.2377, 0.0954]) -Greedy action tensor([ 0.5196, -0.2981, -0.1665, -0.2216]) tensor([0.4130, 0.1823, 0.2079, 0.1968]) -Greedy action tensor([ 7.0108e-01, -2.3188e-01, -4.9693e-02, 3.6746e-04]) tensor([0.4234, 0.1666, 0.1999, 0.2101]) -Greedy action tensor([ 0.8366, -0.7104, 0.0571, -0.5804]) tensor([0.5225, 0.1112, 0.2396, 0.1267]) -Greedy action tensor([ 0.3267, -0.0684, 0.0496, -0.0576]) tensor([0.3213, 0.2164, 0.2435, 0.2188]) -Greedy action tensor([ 1.0158, -0.6235, -0.1113, -0.4223]) tensor([0.5697, 0.1106, 0.1845, 0.1352]) -Greedy action tensor([ 1.2450, -0.9129, 0.0271, -0.5855]) tensor([0.6362, 0.0735, 0.1882, 0.1020]) -Greedy action tensor([ 0.8099, -0.7816, 0.0101, -0.2253]) tensor([0.4980, 0.1014, 0.2238, 0.1769]) -Greedy action tensor([ 0.5582, -0.4816, -0.1452, -0.1771]) tensor([0.4296, 0.1519, 0.2126, 0.2059]) -Greedy action tensor([ 0.6011, 0.0376, -0.2557, 0.0340]) tensor([0.3905, 0.2223, 0.1658, 0.2215]) -Greedy action tensor([ 0.6772, -0.6324, -0.1048, -0.3034]) tensor([0.4756, 0.1284, 0.2176, 0.1784]) -Greedy action tensor([ 0.7998, -0.5811, -0.0597, -0.4490]) tensor([0.5098, 0.1281, 0.2158, 0.1462]) -Greedy action tensor([ 0.5235, -0.0755, -0.2065, -0.1029]) tensor([0.3897, 0.2141, 0.1878, 0.2083]) -Greedy action tensor([ 0.7844, -0.4520, -0.1286, -0.1326]) tensor([0.4781, 0.1389, 0.1919, 0.1911]) -Greedy action tensor([ 0.4994, -0.0132, -0.0705, -0.0172]) tensor([0.3622, 0.2169, 0.2048, 0.2161]) -Greedy action tensor([ 0.7237, -0.3969, -0.1211, -0.4326]) tensor([0.4830, 0.1575, 0.2075, 0.1520]) -Greedy action tensor([ 0.4385, -0.5198, -0.1339, -0.1193]) tensor([0.3968, 0.1522, 0.2239, 0.2271]) -Greedy action tensor([ 0.7856, -0.1242, -0.3517, -0.2376]) tensor([0.4801, 0.1933, 0.1540, 0.1726]) -Greedy action tensor([ 0.9162, -0.4902, -0.1323, -0.3791]) tensor([0.5350, 0.1311, 0.1875, 0.1465]) -Greedy action tensor([ 0.5963, -0.5613, -0.0678, -0.2297]) tensor([0.4412, 0.1386, 0.2271, 0.1931]) -Greedy action tensor([ 0.6867, 0.3718, -0.1468, 0.0019]) tensor([0.3747, 0.2735, 0.1628, 0.1889]) -Greedy action tensor([ 0.3908, -0.0050, -0.0217, -0.0257]) tensor([0.3339, 0.2248, 0.2211, 0.2202]) -Greedy action tensor([ 0.9573, -0.6558, 0.0632, -0.7594]) tensor([0.5593, 0.1115, 0.2287, 0.1005]) -Greedy action tensor([ 0.6597, -0.1014, -0.0167, -0.0073]) tensor([0.4018, 0.1877, 0.2043, 0.2062]) -Greedy action tensor([ 0.8033, -0.3738, -0.0705, -0.1351]) tensor([0.4724, 0.1456, 0.1972, 0.1848]) -Greedy action tensor([ 0.5642, -0.2309, 0.0871, -0.3176]) tensor([0.4022, 0.1816, 0.2496, 0.1665]) -Greedy action tensor([ 0.4007, -0.3357, 0.0791, -0.2503]) tensor([0.3669, 0.1757, 0.2660, 0.1914]) -Greedy action tensor([ 0.5866, -0.0677, 0.0184, -0.2143]) tensor([0.3944, 0.2050, 0.2235, 0.1771]) -Greedy action tensor([ 1.0145, -0.6960, 0.0590, -0.7119]) tensor([0.5736, 0.1037, 0.2206, 0.1021]) -Greedy action tensor([ 0.9119, -0.7514, -0.0126, -0.2829]) tensor([0.5294, 0.1003, 0.2100, 0.1603]) -Greedy action tensor([ 1.0649, -0.6131, 0.0750, -0.4781]) tensor([0.5643, 0.1054, 0.2097, 0.1206]) -Greedy action tensor([ 0.9035, -0.2716, -0.0062, -0.1322]) tensor([0.4839, 0.1494, 0.1949, 0.1718]) -Greedy action tensor([ 0.8384, -0.4342, -0.1210, -0.1069]) tensor([0.4874, 0.1365, 0.1867, 0.1894]) -Greedy action tensor([ 0.4133, -0.4880, -0.1499, -0.2223]) tensor([0.3992, 0.1621, 0.2273, 0.2114]) -Greedy action tensor([ 0.7070, -0.6938, 0.0231, -0.2785]) tensor([0.4707, 0.1160, 0.2376, 0.1757]) -Greedy action tensor([ 0.8822, -0.6534, 0.0472, -0.3513]) tensor([0.5153, 0.1110, 0.2236, 0.1501]) -Greedy action tensor([ 0.8371, -0.5648, 0.0210, -0.4505]) tensor([0.5091, 0.1253, 0.2251, 0.1405]) -Greedy action tensor([ 0.4271, -0.1949, -0.0528, -0.0185]) tensor([0.3577, 0.1920, 0.2213, 0.2290]) -Greedy action tensor([ 0.5626, -0.4113, -0.2702, -0.1463]) tensor([0.4339, 0.1638, 0.1887, 0.2136]) -Greedy action tensor([ 0.7200, -0.5941, -0.1967, -0.8109]) tensor([0.5305, 0.1426, 0.2121, 0.1148]) -Greedy action tensor([ 0.6773, -0.3048, -0.0775, -0.5115]) tensor([0.4653, 0.1743, 0.2187, 0.1417]) -Greedy action tensor([ 0.8378, -0.2392, -0.0032, -0.0768]) tensor([0.4603, 0.1568, 0.1985, 0.1844]) -Greedy action tensor([ 0.9182, -0.1653, 0.0669, -0.7242]) tensor([0.5105, 0.1728, 0.2179, 0.0988]) -Greedy action tensor([ 0.7027, -0.3141, -0.0697, -0.3823]) tensor([0.4626, 0.1674, 0.2137, 0.1563]) -Greedy action tensor([ 0.9545, -0.7212, -0.1258, -0.2659]) tensor([0.5489, 0.1027, 0.1863, 0.1620]) -Greedy action tensor([ 0.6403, 0.1580, -0.0869, -0.0222]) tensor([0.3822, 0.2360, 0.1847, 0.1971]) -Greedy action tensor([ 1.0345, -0.6807, -0.0110, -0.4183]) tensor([0.5665, 0.1019, 0.1991, 0.1325]) -Greedy action tensor([ 0.7263, 0.0155, -0.1478, -0.1369]) tensor([0.4291, 0.2108, 0.1790, 0.1810]) -Greedy action tensor([ 1.0491, -0.3512, -0.1097, -0.3320]) tensor([0.5520, 0.1361, 0.1732, 0.1387]) -Greedy action tensor([ 0.6240, -0.5553, -0.0623, -0.1131]) tensor([0.4368, 0.1343, 0.2199, 0.2090]) -Greedy action tensor([ 0.8636, -0.4593, -0.0299, -0.1990]) tensor([0.4948, 0.1318, 0.2025, 0.1710]) -Greedy action tensor([ 1.0322, -0.8395, 0.0145, -0.8192]) tensor([0.5980, 0.0920, 0.2161, 0.0939]) -Greedy action tensor([ 0.6153, -0.1798, -0.1434, -0.1380]) tensor([0.4183, 0.1889, 0.1959, 0.1969]) -Greedy action tensor([ 0.8261, -0.5587, -0.0202, -0.3807]) tensor([0.5054, 0.1265, 0.2168, 0.1512]) -Greedy action tensor([ 0.9668, -0.6087, 0.1288, -0.3122]) tensor([0.5214, 0.1079, 0.2256, 0.1451]) -Greedy action tensor([ 0.6302, -0.1869, 0.0287, -0.2722]) tensor([0.4175, 0.1844, 0.2288, 0.1693]) -Greedy action tensor([ 0.7793, -0.5994, -0.1561, -0.3305]) tensor([0.5066, 0.1276, 0.1988, 0.1670]) -Greedy action tensor([ 1.0030, -0.8395, -0.1092, -0.4492]) tensor([0.5810, 0.0920, 0.1910, 0.1360]) -Greedy action tensor([ 0.9937, -0.1918, -0.0976, -0.0364]) tensor([0.5004, 0.1529, 0.1680, 0.1786]) -Greedy action tensor([ 0.9880, -1.1864, 0.1959, -0.8790]) tensor([0.5810, 0.0660, 0.2631, 0.0898]) -Greedy action tensor([ 0.8732, -0.3356, -0.0046, -0.4915]) tensor([0.5077, 0.1516, 0.2110, 0.1297]) -Greedy action tensor([ 0.8602, -0.5210, -0.0696, -0.3300]) tensor([0.5128, 0.1289, 0.2024, 0.1560]) -Greedy action tensor([ 0.9595, -0.7300, -0.0387, -0.5153]) tensor([0.5612, 0.1036, 0.2068, 0.1284]) -Greedy action tensor([ 2.4407, -1.1541, -0.3958, 0.5080]) tensor([0.8124, 0.0223, 0.0476, 0.1176]) -Greedy action tensor([ 1.8789, -0.3204, -0.4608, 0.6619]) tensor([0.6652, 0.0738, 0.0641, 0.1970]) -Greedy action tensor([ 1.5679, -0.7955, -0.3934, 0.9748]) tensor([0.5595, 0.0526, 0.0787, 0.3092]) -Greedy action tensor([ 1.4109, -0.3291, -0.4898, 0.1856]) tensor([0.6178, 0.1084, 0.0923, 0.1814]) -Greedy action tensor([ 1.4076, -0.8012, -0.1890, 0.5321]) tensor([0.5783, 0.0635, 0.1172, 0.2410]) -Greedy action tensor([ 0.9216, -0.4693, -0.1135, 0.5035]) tensor([0.4420, 0.1100, 0.1570, 0.2910]) -Greedy action tensor([ 1.1858, -0.5641, -0.0037, 0.4841]) tensor([0.5066, 0.0880, 0.1542, 0.2511]) -Greedy action tensor([ 1.6840, -0.5868, -0.8815, 0.6712]) tensor([0.6480, 0.0669, 0.0498, 0.2353]) -Greedy action tensor([ 1.4391, -0.7756, -0.1753, 0.4328]) tensor([0.5975, 0.0652, 0.1189, 0.2184]) -Greedy action tensor([ 1.5584, -0.6378, -0.7233, 0.4184]) tensor([0.6523, 0.0725, 0.0666, 0.2086]) -Greedy action tensor([ 1.6569, -0.2198, -0.2799, 0.5171]) tensor([0.6184, 0.0947, 0.0892, 0.1978]) -Greedy action tensor([ 1.6720, -0.6205, -0.4486, 0.4748]) tensor([0.6566, 0.0663, 0.0788, 0.1983]) -Greedy action tensor([ 1.9870, -0.4154, 0.2269, 0.2600]) tensor([0.6943, 0.0628, 0.1194, 0.1235]) -Greedy action tensor([ 1.7326, 0.2358, -0.6331, 0.5289]) tensor([0.6181, 0.1384, 0.0580, 0.1855]) -Greedy action tensor([ 1.8891, -0.7315, -0.5599, 1.0558]) tensor([0.6274, 0.0457, 0.0542, 0.2727]) -Greedy action tensor([ 1.6880, -0.7840, -0.0626, 0.3342]) tensor([0.6595, 0.0557, 0.1145, 0.1703]) -Greedy action tensor([ 1.1925, -0.1840, -0.0181, 0.3094]) tensor([0.5092, 0.1285, 0.1517, 0.2105]) -Greedy action tensor([ 1.6065, -0.1211, -0.6685, 0.4910]) tensor([0.6218, 0.1105, 0.0639, 0.2038]) -Greedy action tensor([ 1.3042, -0.6185, -0.1767, 0.3940]) tensor([0.5630, 0.0823, 0.1280, 0.2266]) -Greedy action tensor([ 2.4715, -1.0576, 0.5863, -0.3440]) tensor([0.8058, 0.0236, 0.1223, 0.0482]) -Greedy action tensor([ 1.2575, -0.6142, -0.1025, 0.1767]) tensor([0.5715, 0.0879, 0.1467, 0.1939]) -Greedy action tensor([ 1.6470, -0.7352, -0.0395, 0.5288]) tensor([0.6233, 0.0576, 0.1154, 0.2037]) -Greedy action tensor([ 1.4679, -0.2572, -0.6005, 0.1160]) tensor([0.6397, 0.1140, 0.0809, 0.1655]) -Greedy action tensor([ 1.7832, -0.9231, -0.2365, 0.6139]) tensor([0.6622, 0.0442, 0.0879, 0.2057]) -Greedy action tensor([ 1.7097e+00, -7.6063e-01, -2.4386e-01, -5.6496e-04]) tensor([0.7107, 0.0601, 0.1007, 0.1285]) -Greedy action tensor([ 2.0302, -1.3594, 0.1723, 0.3493]) tensor([0.7268, 0.0245, 0.1134, 0.1353]) -Greedy action tensor([ 1.8446, -0.1229, -0.5066, 0.4233]) tensor([0.6773, 0.0947, 0.0645, 0.1635]) -Greedy action tensor([ 1.4837, -0.9329, -0.2765, 0.7499]) tensor([0.5743, 0.0512, 0.0988, 0.2757]) -Greedy action tensor([ 1.6226, -0.6175, -0.3292, 0.5488]) tensor([0.6289, 0.0669, 0.0893, 0.2149]) -Greedy action tensor([ 1.4426, 0.0611, -0.1259, 0.4424]) tensor([0.5472, 0.1375, 0.1140, 0.2013]) -Greedy action tensor([ 2.0849, 0.7355, -0.1750, 0.1405]) tensor([0.6636, 0.1721, 0.0693, 0.0949]) -Greedy action tensor([ 1.6641, -0.7987, -0.3151, 0.0206]) tensor([0.7059, 0.0601, 0.0975, 0.1364]) -Greedy action tensor([ 1.3203, -0.5755, -0.2031, 0.3441]) tensor([0.5731, 0.0861, 0.1249, 0.2159]) -Greedy action tensor([ 1.3372, -0.4537, -0.4450, 0.2056]) tensor([0.6033, 0.1006, 0.1015, 0.1946]) -Greedy action tensor([ 1.0797, -0.2057, 0.3181, -0.4815]) tensor([0.5119, 0.1416, 0.2390, 0.1075]) -Greedy action tensor([ 1.3411, -0.4203, -0.4669, 0.3266]) tensor([0.5888, 0.1012, 0.0965, 0.2135]) -Greedy action tensor([ 2.0661, -0.5430, -0.4288, 0.4598]) tensor([0.7371, 0.0543, 0.0608, 0.1479]) -Greedy action tensor([ 2.4411, -1.1790, -0.3108, 0.8725]) tensor([0.7699, 0.0206, 0.0491, 0.1604]) -Greedy action tensor([ 1.9117, -0.5126, -0.6029, 0.4265]) tensor([0.7164, 0.0634, 0.0580, 0.1622]) -Greedy action tensor([1.9846, 0.4357, 0.0435, 0.2211]) tensor([0.6547, 0.1391, 0.0940, 0.1122]) -Greedy action tensor([ 1.4859, -0.6623, -0.3954, 0.7516]) tensor([0.5718, 0.0667, 0.0871, 0.2744]) -Greedy action tensor([ 2.2099, -0.8057, -0.1695, 0.1966]) tensor([0.7842, 0.0384, 0.0726, 0.1047]) -Greedy action tensor([ 1.5279, -0.3834, -0.8542, 0.0162]) tensor([0.6846, 0.1012, 0.0632, 0.1510]) -Greedy action tensor([ 1.4744, -0.4656, -0.4151, 0.3192]) tensor([0.6212, 0.0893, 0.0939, 0.1957]) -Greedy action tensor([ 1.6101, -0.4916, -0.2782, 0.5630]) tensor([0.6156, 0.0753, 0.0932, 0.2160]) -Greedy action tensor([ 1.1878, -0.4389, -0.7793, 0.3167]) tensor([0.5698, 0.1120, 0.0797, 0.2385]) -Greedy action tensor([ 1.5023, -0.6407, -0.2590, 0.3931]) tensor([0.6177, 0.0725, 0.1061, 0.2037]) -Greedy action tensor([ 2.1396, -0.7976, -0.3662, 0.5315]) tensor([0.7491, 0.0397, 0.0611, 0.1500]) -Greedy action tensor([ 1.7271, -0.2090, -0.0685, 0.1222]) tensor([0.6617, 0.0955, 0.1099, 0.1329]) -Greedy action tensor([ 1.1029, -0.3847, -0.0580, 0.2112]) tensor([0.5131, 0.1159, 0.1607, 0.2103]) -Greedy action tensor([ 1.6306, 0.4552, 0.1775, -0.0299]) tensor([0.5772, 0.1782, 0.1350, 0.1097]) -Greedy action tensor([ 1.6267, 0.1662, -1.0564, 0.6081]) tensor([0.6018, 0.1397, 0.0411, 0.2173]) -Greedy action tensor([ 1.3372, -0.2440, -0.5944, 0.2824]) tensor([0.5886, 0.1211, 0.0853, 0.2050]) -Greedy action tensor([ 1.8367, -0.3436, -0.2712, 0.8430]) tensor([0.6232, 0.0704, 0.0757, 0.2307]) -Greedy action tensor([ 1.1379, -0.4720, -0.0070, 0.2439]) tensor([0.5189, 0.1037, 0.1651, 0.2122]) -Greedy action tensor([ 1.3892, -0.5407, -0.2529, 0.4788]) tensor([0.5744, 0.0834, 0.1112, 0.2311]) -Greedy action tensor([ 1.7420, -0.4111, -0.8173, 0.4078]) tensor([0.6864, 0.0797, 0.0531, 0.1808]) -Greedy action tensor([ 1.7237, -0.6876, -0.3446, 0.3923]) tensor([0.6756, 0.0606, 0.0854, 0.1784]) -Greedy action tensor([ 1.6753, -0.6141, -0.5974, 0.9230]) tensor([0.5968, 0.0605, 0.0615, 0.2813]) -Greedy action tensor([ 2.2980, -0.6044, -0.7365, 0.4937]) tensor([0.7889, 0.0433, 0.0379, 0.1298]) -Greedy action tensor([ 1.3490, -0.6331, -0.4002, 0.3718]) tensor([0.5924, 0.0816, 0.1030, 0.2230]) -Greedy action tensor([ 1.2025, -0.0760, -0.9544, 0.4717]) tensor([0.5331, 0.1485, 0.0617, 0.2567]) -Greedy action tensor([ 2.1466, -0.5882, -0.2838, 0.1199]) tensor([0.7784, 0.0505, 0.0685, 0.1026]) -Greedy action tensor([ 1.8012, -0.7421, -0.5428, 0.4141]) tensor([0.7021, 0.0552, 0.0674, 0.1754]) -Greedy action tensor([ 1.0713, 0.0928, -0.0211, 0.5853]) tensor([0.4298, 0.1616, 0.1442, 0.2644]) -Greedy action tensor([ 1.1878, -0.0554, -0.2324, 0.3386]) tensor([0.5108, 0.1473, 0.1234, 0.2185]) -Greedy action tensor([ 1.7495, -0.8143, -0.4035, 0.9824]) tensor([0.6033, 0.0465, 0.0701, 0.2802]) -Greedy action tensor([ 1.0752, -0.4415, -0.7156, 0.5178]) tensor([0.5105, 0.1120, 0.0852, 0.2924]) -Greedy action tensor([ 1.3413, -0.2474, -0.8047, 0.0372]) tensor([0.6279, 0.1282, 0.0734, 0.1704]) -Greedy action tensor([ 0.6453, -0.2829, -0.1124, -0.0782]) tensor([0.4257, 0.1683, 0.1996, 0.2065]) -Greedy action tensor([ 1.0373, -0.4699, -0.2178, 0.3377]) tensor([0.4991, 0.1106, 0.1423, 0.2480]) -Greedy action tensor([ 1.0891, -0.1958, -0.4113, -0.0208]) tensor([0.5466, 0.1512, 0.1219, 0.1802]) -Greedy action tensor([ 0.9004, -0.4647, -0.1466, 0.1139]) tensor([0.4850, 0.1238, 0.1702, 0.2209]) -Greedy action tensor([ 1.2433, 0.0768, -0.1427, 0.4135]) tensor([0.5006, 0.1559, 0.1252, 0.2183]) -Greedy action tensor([ 2.0524, -0.1896, -0.0181, 0.2700]) tensor([0.7140, 0.0759, 0.0901, 0.1201]) -Greedy action tensor([ 1.3717, -0.0433, -0.3239, 0.2924]) tensor([0.5662, 0.1375, 0.1039, 0.1924]) -Greedy action tensor([ 2.0151, -0.7241, -0.2561, 0.5603]) tensor([0.7136, 0.0461, 0.0736, 0.1666]) -Greedy action tensor([ 1.6377, -0.8495, 0.1761, 0.8074]) tensor([0.5711, 0.0475, 0.1324, 0.2490]) -Greedy action tensor([ 1.3168, -0.4674, -0.4045, 0.3402]) tensor([0.5803, 0.0974, 0.1038, 0.2185]) -Greedy action tensor([ 1.4864, -0.6665, -0.1639, 0.0183]) tensor([0.6500, 0.0755, 0.1248, 0.1497]) -Greedy action tensor([ 1.8780, -0.8585, -0.3841, 0.4652]) tensor([0.7080, 0.0459, 0.0737, 0.1724]) -Greedy action tensor([-1.8228, -0.4583, 0.6559, -0.0979]) tensor([0.0445, 0.1743, 0.5312, 0.2500]) -Greedy action tensor([-1.7763, -0.3494, -0.0712, -0.5727]) tensor([0.0714, 0.2975, 0.3930, 0.2380]) -Greedy action tensor([-1.8814, -0.4620, 0.6424, -0.1473]) tensor([0.0430, 0.1776, 0.5360, 0.2434]) -Greedy action tensor([-0.8309, 0.9292, 0.1259, 0.4520]) tensor([0.0768, 0.4464, 0.1999, 0.2770]) -Greedy action tensor([-1.8402, -0.4408, 0.6142, -0.1298]) tensor([0.0450, 0.1824, 0.5237, 0.2489]) -Greedy action tensor([-1.7319, 0.8817, 0.1222, 0.1392]) tensor([0.0363, 0.4958, 0.2320, 0.2359]) -Greedy action tensor([-1.8415, -0.3934, 0.6350, -0.1193]) tensor([0.0439, 0.1870, 0.5230, 0.2460]) -Greedy action tensor([-1.8642, -0.3680, 0.6121, -0.1308]) tensor([0.0434, 0.1939, 0.5168, 0.2458]) -Greedy action tensor([-1.9236, -0.4407, 0.6544, -0.1658]) tensor([0.0410, 0.1807, 0.5403, 0.2379]) -Greedy action tensor([-1.9253, -0.4497, 0.6600, -0.1700]) tensor([0.0409, 0.1791, 0.5432, 0.2369]) -Greedy action tensor([-1.1869, -0.4098, 0.3187, 0.3249]) tensor([0.0819, 0.1781, 0.3689, 0.3712]) -Greedy action tensor([-1.7577, -0.4095, 0.5635, -0.0640]) tensor([0.0488, 0.1880, 0.4975, 0.2656]) -Greedy action tensor([-1.8728, -0.4248, 0.6212, -0.1579]) tensor([0.0436, 0.1856, 0.5283, 0.2424]) -Greedy action tensor([-1.9124, -0.4383, 0.6521, -0.1639]) tensor([0.0415, 0.1812, 0.5390, 0.2384]) -Greedy action tensor([-1.6316, -0.2811, 0.4880, -0.0698]) tensor([0.0557, 0.2150, 0.4638, 0.2655]) -Greedy action tensor([-1.3650, -0.5888, 0.4757, -0.3064]) tensor([0.0809, 0.1759, 0.5099, 0.2333]) -Greedy action tensor([-1.5671, -0.5193, 0.4995, -0.0789]) tensor([0.0618, 0.1762, 0.4882, 0.2738]) -Greedy action tensor([-1.7283, -0.4618, 0.5580, -0.0755]) tensor([0.0510, 0.1810, 0.5018, 0.2663]) -Greedy action tensor([-1.8393, -0.4455, 0.6148, -0.1241]) tensor([0.0450, 0.1813, 0.5236, 0.2501]) -Greedy action tensor([-1.8605, -0.3817, 0.6196, -0.1229]) tensor([0.0435, 0.1906, 0.5189, 0.2470]) -Greedy action tensor([-1.7761, -0.1199, 0.5308, -0.0738]) tensor([0.0459, 0.2407, 0.4613, 0.2520]) -Greedy action tensor([-1.7770, -0.3965, 0.5736, -0.0757]) tensor([0.0477, 0.1898, 0.5008, 0.2616]) -Greedy action tensor([-1.9293, -0.4214, 0.6554, -0.1700]) tensor([0.0407, 0.1837, 0.5393, 0.2363]) -Greedy action tensor([-1.9062, -0.4408, 0.6451, -0.1632]) tensor([0.0419, 0.1814, 0.5373, 0.2394]) -Greedy action tensor([-1.8693, -0.2986, 0.6022, -0.1528]) tensor([0.0431, 0.2072, 0.5100, 0.2397]) -Greedy action tensor([-1.4690, 0.0855, 0.5743, 0.1611]) tensor([0.0539, 0.2551, 0.4159, 0.2751]) -Greedy action tensor([-1.9101, -0.4481, 0.6466, -0.1594]) tensor([0.0417, 0.1800, 0.5380, 0.2403]) -Greedy action tensor([-1.9360, -0.4281, 0.6598, -0.1744]) tensor([0.0404, 0.1825, 0.5418, 0.2353]) -Greedy action tensor([-1.9101, -0.3359, 0.6364, -0.1624]) tensor([0.0411, 0.1984, 0.5245, 0.2360]) -Greedy action tensor([-1.9068, -0.4497, 0.6505, -0.1642]) tensor([0.0418, 0.1796, 0.5396, 0.2389]) -Greedy action tensor([-1.9026, -0.4297, 0.6467, -0.1574]) tensor([0.0419, 0.1826, 0.5358, 0.2398]) -Greedy action tensor([-1.4786, -0.6044, 1.0385, 0.8379]) tensor([0.0386, 0.0924, 0.4779, 0.3911]) -Greedy action tensor([-1.4297, -0.1507, 0.3871, -0.0255]) tensor([0.0675, 0.2425, 0.4152, 0.2748]) -Greedy action tensor([-1.7392, 0.1257, 0.4826, -0.0114]) tensor([0.0448, 0.2894, 0.4135, 0.2523]) -Greedy action tensor([-1.8745, -0.2544, 0.6092, -0.0983]) tensor([0.0418, 0.2110, 0.5005, 0.2467]) -Greedy action tensor([-1.7303, -0.2757, 0.5379, -0.0980]) tensor([0.0498, 0.2135, 0.4817, 0.2550]) -Greedy action tensor([-1.8734, -0.4322, 0.6733, -0.1096]) tensor([0.0420, 0.1774, 0.5358, 0.2449]) -Greedy action tensor([-1.9427, -0.4446, 0.6650, -0.1796]) tensor([0.0402, 0.1799, 0.5455, 0.2344]) -Greedy action tensor([-1.8461, -0.4562, 0.6238, -0.1326]) tensor([0.0447, 0.1793, 0.5281, 0.2479]) -Greedy action tensor([-1.9018, -0.4557, 0.6516, -0.1594]) tensor([0.0420, 0.1784, 0.5398, 0.2399]) -Greedy action tensor([-1.4885, -0.5539, 0.4594, 0.2247]) tensor([0.0621, 0.1581, 0.4355, 0.3444]) -Greedy action tensor([-1.9246, -0.3934, 0.6460, -0.1721]) tensor([0.0409, 0.1890, 0.5343, 0.2358]) -Greedy action tensor([-1.9283, -0.4081, 0.6576, -0.1614]) tensor([0.0405, 0.1851, 0.5374, 0.2369]) -Greedy action tensor([-1.2534, -0.2989, 0.6828, 0.7283]) tensor([0.0562, 0.1460, 0.3898, 0.4079]) -Greedy action tensor([-1.9235, -0.4510, 0.6726, -0.1594]) tensor([0.0406, 0.1772, 0.5450, 0.2372]) -Greedy action tensor([-0.6597, 0.9099, 0.1087, -0.0331]) tensor([0.1017, 0.4887, 0.2193, 0.1903]) -Greedy action tensor([-1.7517, -0.4865, 0.6507, 0.0839]) tensor([0.0457, 0.1621, 0.5054, 0.2867]) -Greedy action tensor([-1.3157, -0.1733, 0.7109, 0.6389]) tensor([0.0532, 0.1669, 0.4040, 0.3759]) -Greedy action tensor([-1.8729, -0.4558, 0.6366, -0.1462]) tensor([0.0434, 0.1790, 0.5337, 0.2439]) -Greedy action tensor([-1.9469, -0.4518, 0.6686, -0.1820]) tensor([0.0400, 0.1786, 0.5475, 0.2339]) -Greedy action tensor([-1.5337, -0.1399, 0.5783, 0.1025]) tensor([0.0543, 0.2187, 0.4484, 0.2786]) -Greedy action tensor([-1.9440, -0.4732, 0.5695, -0.2156]) tensor([0.0429, 0.1866, 0.5292, 0.2414]) -Greedy action tensor([-1.9042, -0.4140, 0.6405, -0.1618]) tensor([0.0419, 0.1858, 0.5333, 0.2391]) -Greedy action tensor([-0.7752, 0.6387, 0.0067, -0.0351]) tensor([0.1065, 0.4377, 0.2327, 0.2231]) -Greedy action tensor([-1.7305, -0.4164, 0.6429, -0.0159]) tensor([0.0476, 0.1771, 0.5109, 0.2644]) -Greedy action tensor([-1.8673, -0.4196, 0.6245, -0.1492]) tensor([0.0437, 0.1857, 0.5274, 0.2433]) -Greedy action tensor([-0.7854, -0.2709, 0.9714, 1.0686]) tensor([0.0673, 0.1126, 0.3901, 0.4299]) -Greedy action tensor([-1.4979, -0.2474, 0.4214, -0.0175]) tensor([0.0637, 0.2224, 0.4341, 0.2799]) -Greedy action tensor([-1.8333, -0.3801, 0.6816, -0.0508]) tensor([0.0424, 0.1813, 0.5242, 0.2520]) -Greedy action tensor([-1.7771, -0.4135, 0.6069, -0.0659]) tensor([0.0470, 0.1836, 0.5095, 0.2600]) -Greedy action tensor([-1.7826, -0.4290, 0.5891, -0.0979]) tensor([0.0477, 0.1846, 0.5108, 0.2570]) -Greedy action tensor([-1.5771, 0.1965, 0.3980, -0.0675]) tensor([0.0537, 0.3164, 0.3870, 0.2430]) -Greedy action tensor([-1.9287, -0.4623, 0.6862, -0.1626]) tensor([0.0402, 0.1744, 0.5500, 0.2354]) -Greedy action tensor([-0.5922, -0.0703, 0.8529, 1.5531]) tensor([0.0646, 0.1089, 0.2742, 0.5523]) -Greedy action tensor([-1.9299, -0.4362, 0.6608, -0.1713]) tensor([0.0407, 0.1811, 0.5423, 0.2360]) -Greedy action tensor([-1.8221, -0.4693, 0.6140, -0.1201]) tensor([0.0459, 0.1776, 0.5247, 0.2518]) -Greedy action tensor([-1.9334, -0.4326, 0.6620, -0.1708]) tensor([0.0405, 0.1815, 0.5423, 0.2358]) -Greedy action tensor([-1.7187, -0.4312, 0.5619, -0.1724]) tensor([0.0524, 0.1897, 0.5122, 0.2458]) -Greedy action tensor([-1.2719, -0.5661, 0.3135, 0.1905]) tensor([0.0818, 0.1657, 0.3993, 0.3531]) -Greedy action tensor([-1.9299, -0.3846, 0.6477, -0.1735]) tensor([0.0406, 0.1903, 0.5342, 0.2350]) -Greedy action tensor([-0.7091, 0.4040, 0.0778, -0.0072]) tensor([0.1211, 0.3686, 0.2660, 0.2443]) -Greedy action tensor([-1.8391, -0.4455, 0.6141, -0.1247]) tensor([0.0450, 0.1814, 0.5235, 0.2501]) -Greedy action tensor([-1.6273, -0.4086, 0.4873, -0.0609]) tensor([0.0573, 0.1938, 0.4746, 0.2743]) -Greedy action tensor([-1.8550, -0.3372, 0.6208, -0.1009]) tensor([0.0430, 0.1964, 0.5119, 0.2487]) -Greedy action tensor([-1.8794, -0.3205, 0.6192, -0.1358]) tensor([0.0423, 0.2011, 0.5147, 0.2419]) -Greedy action tensor([-1.8214, -0.1983, 0.5901, -0.1234]) tensor([0.0441, 0.2235, 0.4916, 0.2408]) -Greedy action tensor([-1.8388, -0.3406, 0.5875, -0.1257]) tensor([0.0448, 0.2003, 0.5067, 0.2483]) -Greedy action tensor([-1.9247, -0.4138, 0.6566, -0.1600]) tensor([0.0407, 0.1843, 0.5375, 0.2375]) -Greedy action tensor([-0.9376, -0.4811, -0.0124, 0.6557]) tensor([0.0998, 0.1575, 0.2517, 0.4910]) -Greedy action tensor([-1.5170, -0.5039, 0.4472, 0.0154]) tensor([0.0645, 0.1775, 0.4596, 0.2984]) -Greedy action tensor([-1.7877, 0.0680, 0.5114, -0.0772]) tensor([0.0437, 0.2794, 0.4353, 0.2416]) -Greedy action tensor([-0.8316, -0.5145, -1.5633, 0.9361]) tensor([0.1148, 0.1576, 0.0552, 0.6724]) -Greedy action tensor([ 1.9002, -0.9641, 0.0746, 1.1237]) tensor([0.5959, 0.0340, 0.0960, 0.2741]) -Greedy action tensor([ 0.2634, 0.0844, -0.6704, -0.1210]) tensor([0.3436, 0.2873, 0.1351, 0.2340]) -Greedy action tensor([-0.9742, -0.5389, 0.8414, -0.1798]) tensor([0.0917, 0.1417, 0.5636, 0.2030]) -Greedy action tensor([ 0.0132, -1.1074, 0.7951, -0.1941]) tensor([0.2313, 0.0754, 0.5054, 0.1880]) -Greedy action tensor([ 1.4132, -0.4565, 0.2644, 0.7799]) tensor([0.4995, 0.0770, 0.1583, 0.2652]) -Greedy action tensor([ 0.8074, -0.9928, -0.8493, 0.5495]) tensor([0.4698, 0.0776, 0.0896, 0.3630]) -Greedy action tensor([-0.6384, -0.6836, 0.0671, -1.0739]) tensor([0.2161, 0.2065, 0.4376, 0.1398]) -Greedy action tensor([ 1.2319, -1.5523, -0.3469, 0.6294]) tensor([0.5508, 0.0340, 0.1136, 0.3015]) -Greedy action tensor([ 1.0018, -0.0168, 0.3935, 0.1621]) tensor([0.4279, 0.1545, 0.2329, 0.1848]) -Greedy action tensor([-0.2246, -0.4545, -0.2794, 0.1757]) tensor([0.2362, 0.1877, 0.2236, 0.3525]) -Greedy action tensor([-0.5116, -1.4574, -0.5140, 0.1615]) tensor([0.2301, 0.0894, 0.2295, 0.4510]) -Greedy action tensor([ 0.7252, 0.0169, 1.3788, -0.6839]) tensor([0.2733, 0.1346, 0.5254, 0.0668]) -Greedy action tensor([-0.0587, -1.6210, -0.4158, 0.7164]) tensor([0.2451, 0.0514, 0.1715, 0.5320]) -Greedy action tensor([ 0.0186, -1.3227, 0.2682, -0.2119]) tensor([0.2995, 0.0783, 0.3844, 0.2378]) -Greedy action tensor([ 0.6582, -1.0142, 0.5797, 1.0849]) tensor([0.2744, 0.0515, 0.2537, 0.4204]) -Greedy action tensor([ 0.0633, -0.4848, -0.8508, -0.9692]) tensor([0.4282, 0.2476, 0.1717, 0.1525]) -Greedy action tensor([ 0.9754, -0.2004, 0.3571, 0.7241]) tensor([0.3809, 0.1175, 0.2053, 0.2963]) -Greedy action tensor([ 0.5242, -0.4254, -0.7083, 1.1138]) tensor([0.2872, 0.1111, 0.0837, 0.5179]) -Greedy action tensor([-0.2072, -1.0101, 0.8106, -0.5935]) tensor([0.2043, 0.0915, 0.5653, 0.1388]) -Greedy action tensor([-0.4055, -1.3285, 0.7765, -0.7532]) tensor([0.1864, 0.0741, 0.6079, 0.1317]) -Greedy action tensor([-0.0834, -0.3063, 1.5921, 0.1913]) tensor([0.1182, 0.0946, 0.6315, 0.1556]) -Greedy action tensor([-0.2576, -0.5205, 0.5498, 0.5679]) tensor([0.1589, 0.1222, 0.3562, 0.3627]) -Greedy action tensor([-0.0535, 0.1812, -0.9736, 0.5559]) tensor([0.2221, 0.2808, 0.0885, 0.4085]) -Greedy action tensor([-0.1805, -1.2972, -0.4785, -0.1461]) tensor([0.3221, 0.1054, 0.2391, 0.3334]) -Greedy action tensor([ 1.5922, -0.4583, 0.8719, 0.5534]) tensor([0.5078, 0.0653, 0.2471, 0.1797]) -Greedy action tensor([ 0.5264, -1.7580, 0.1487, 0.4013]) tensor([0.3746, 0.0381, 0.2567, 0.3305]) -Greedy action tensor([-0.3518, -2.3362, 0.7209, 0.6214]) tensor([0.1491, 0.0205, 0.4358, 0.3946]) -Greedy action tensor([ 0.4769, -1.8884, -0.2033, 0.1078]) tensor([0.4363, 0.0410, 0.2210, 0.3017]) -Greedy action tensor([ 0.2863, -0.2468, 1.0195, 0.0574]) tensor([0.2240, 0.1315, 0.4663, 0.1782]) -Greedy action tensor([-0.0830, -0.8452, 0.2212, -1.1834]) tensor([0.3170, 0.1479, 0.4297, 0.1055]) -Greedy action tensor([ 0.9751, -1.0777, 0.7394, 0.2805]) tensor([0.4136, 0.0531, 0.3268, 0.2065]) -Greedy action tensor([-0.0020, -0.0739, 0.6642, -0.3941]) tensor([0.2196, 0.2044, 0.4276, 0.1484]) -Greedy action tensor([-0.3579, -0.0612, -0.7769, 0.2091]) tensor([0.2098, 0.2823, 0.1380, 0.3699]) -Greedy action tensor([ 0.0191, -0.0062, 0.5746, 0.0283]) tensor([0.2116, 0.2063, 0.3687, 0.2135]) -Greedy action tensor([ 0.0612, 0.3409, 1.9023, -1.1420]) tensor([0.1120, 0.1482, 0.7062, 0.0336]) -Greedy action tensor([ 0.1443, -1.2144, 1.2883, -0.0887]) tensor([0.1927, 0.0495, 0.6050, 0.1527]) -Greedy action tensor([ 0.8252, -0.6793, -0.1931, 0.1884]) tensor([0.4734, 0.1052, 0.1710, 0.2504]) -Greedy action tensor([ 0.1287, 0.1948, -0.2490, -0.4613]) tensor([0.3023, 0.3229, 0.2072, 0.1676]) -Greedy action tensor([ 0.8986, -1.6745, 1.0842, 0.0445]) tensor([0.3696, 0.0282, 0.4449, 0.1573]) -Greedy action tensor([ 0.8982, 0.4230, 0.9780, -0.0880]) tensor([0.3249, 0.2020, 0.3519, 0.1212]) -Greedy action tensor([-0.9223, 0.3936, 0.4206, -0.6169]) tensor([0.1008, 0.3760, 0.3863, 0.1369]) -Greedy action tensor([-0.1617, -0.2368, 0.6640, -1.2341]) tensor([0.2196, 0.2037, 0.5015, 0.0751]) -Greedy action tensor([ 0.9364, -0.0601, 0.8162, -0.8408]) tensor([0.4124, 0.1522, 0.3657, 0.0697]) -Greedy action tensor([-0.2464, -1.0856, -0.4464, 1.3042]) tensor([0.1436, 0.0620, 0.1175, 0.6769]) -Greedy action tensor([ 0.0247, -1.1440, 0.4068, 0.8406]) tensor([0.1985, 0.0617, 0.2909, 0.4489]) -Greedy action tensor([ 0.3067, 0.1134, -0.3452, 0.1845]) tensor([0.3096, 0.2552, 0.1613, 0.2740]) -Greedy action tensor([-1.0493, -0.2357, -1.0172, -0.4691]) tensor([0.1646, 0.3714, 0.1700, 0.2941]) -Greedy action tensor([-0.3962, -0.0410, -0.7725, 0.7344]) tensor([0.1610, 0.2297, 0.1105, 0.4988]) -Greedy action tensor([ 0.3060, -0.9919, 0.5252, -1.4730]) tensor([0.3722, 0.1016, 0.4634, 0.0628]) -Greedy action tensor([ 1.3488, -1.0050, -0.0491, 1.2058]) tensor([0.4527, 0.0430, 0.1119, 0.3924]) -Greedy action tensor([-0.0264, -0.9326, -0.3177, 2.5141]) tensor([0.0674, 0.0272, 0.0504, 0.8550]) -Greedy action tensor([-0.2502, -0.8792, 0.8019, -0.4551]) tensor([0.1919, 0.1023, 0.5495, 0.1563]) -Greedy action tensor([ 1.1169, -1.4360, 1.0426, 0.9493]) tensor([0.3506, 0.0273, 0.3255, 0.2965]) -Greedy action tensor([ 1.0260, -0.3726, 0.0845, -0.3440]) tensor([0.5288, 0.1306, 0.2063, 0.1344]) -Greedy action tensor([ 0.3734, 0.1150, 1.7934, -0.2767]) tensor([0.1555, 0.1201, 0.6433, 0.0812]) -Greedy action tensor([ 2.0073, -0.8414, 1.3138, 1.1171]) tensor([0.5081, 0.0294, 0.2539, 0.2086]) -Greedy action tensor([ 0.5641, -0.2995, -0.0855, -0.5007]) tensor([0.4369, 0.1842, 0.2282, 0.1506]) -Greedy action tensor([ 0.9722, -1.2986, -0.4309, 1.5213]) tensor([0.3246, 0.0335, 0.0798, 0.5621]) -Greedy action tensor([-1.1709, -1.0865, -0.2532, 0.4999]) tensor([0.1009, 0.1098, 0.2527, 0.5366]) -Greedy action tensor([ 0.0515, -1.6156, -1.0645, 1.2338]) tensor([0.2093, 0.0395, 0.0686, 0.6826]) -Greedy action tensor([-0.1668, 0.0673, -0.8595, -0.1668]) tensor([0.2657, 0.3357, 0.1329, 0.2657]) -Greedy action tensor([-0.9500, -0.7051, -0.5072, 1.1064]) tensor([0.0858, 0.1096, 0.1336, 0.6709]) -Greedy action tensor([-0.6459, -0.9865, 1.8944, -0.6630]) tensor([0.0650, 0.0463, 0.8248, 0.0639]) -Greedy action tensor([-0.4010, -1.0504, 1.4533, -0.7561]) tensor([0.1161, 0.0607, 0.7418, 0.0814]) -Greedy action tensor([ 1.3564, 0.5232, 0.5604, -0.5136]) tensor([0.4902, 0.2131, 0.2212, 0.0756]) -Greedy action tensor([ 0.3600, -0.1763, -0.1673, 0.9780]) tensor([0.2481, 0.1451, 0.1464, 0.4603]) -Greedy action tensor([-0.4851, -1.0617, 0.4393, -1.0748]) tensor([0.2157, 0.1212, 0.5436, 0.1196]) -Greedy action tensor([ 1.3966, -1.0282, 0.1589, 1.2018]) tensor([0.4542, 0.0402, 0.1317, 0.3738]) -Greedy action tensor([ 0.2856, -1.2156, -0.6766, -0.2893]) tensor([0.4613, 0.1028, 0.1763, 0.2596]) -Greedy action tensor([-0.3267, -0.5375, 0.4953, -0.2572]) tensor([0.1939, 0.1570, 0.4412, 0.2079]) -Greedy action tensor([ 0.6653, -1.1467, 0.2906, 0.0995]) tensor([0.4134, 0.0675, 0.2842, 0.2348]) -Greedy action tensor([ 1.5029, -1.3107, -0.3035, 0.1638]) tensor([0.6728, 0.0404, 0.1105, 0.1763]) -Greedy action tensor([ 0.3338, -1.1676, 1.8568, 0.3050]) tensor([0.1475, 0.0329, 0.6763, 0.1433]) -Greedy action tensor([ 0.1911, -0.6420, 0.1015, 0.3059]) tensor([0.2881, 0.1252, 0.2634, 0.3232]) -Greedy action tensor([-0.2379, 0.6891, -0.7949, -0.5350]) tensor([0.2065, 0.5218, 0.1183, 0.1534]) -Greedy action tensor([ 0.4144, 0.3868, -0.4752, 0.3025]) tensor([0.3051, 0.2968, 0.1253, 0.2728]) -Greedy action tensor([-1.2069, 0.1339, -1.3370, -1.0757]) tensor([0.1462, 0.5588, 0.1284, 0.1667]) -Greedy action tensor([ 0.9178, -1.3494, -0.1117, 1.2699]) tensor([0.3469, 0.0359, 0.1239, 0.4933]) -Greedy action tensor([-0.3171, -1.4840, -0.0096, 0.0849]) tensor([0.2400, 0.0747, 0.3265, 0.3588]) -Greedy action tensor([-0.7879, -0.8767, -1.1309, 0.0809]) tensor([0.1997, 0.1827, 0.1417, 0.4760]) -Greedy action tensor([ 0.4238, 0.1422, 0.0291, -0.1006]) tensor([0.3311, 0.2498, 0.2231, 0.1960]) -Greedy action tensor([ 0.3980, 0.0503, -0.0783, -0.3340]) tensor([0.3561, 0.2515, 0.2212, 0.1713]) -Greedy action tensor([ 0.6127, -0.2128, 0.0107, -0.2090]) tensor([0.4123, 0.1806, 0.2258, 0.1813]) -Greedy action tensor([ 0.7613, -0.5568, 0.1460, -0.3850]) tensor([0.4704, 0.1259, 0.2542, 0.1495]) -Greedy action tensor([ 0.7841, -0.5071, 0.0051, -0.4847]) tensor([0.4963, 0.1364, 0.2277, 0.1395]) -Greedy action tensor([ 0.8692, -0.8481, 0.1110, -0.4330]) tensor([0.5208, 0.0935, 0.2440, 0.1416]) -Greedy action tensor([ 0.8188, -0.2850, -0.1184, -0.2079]) tensor([0.4804, 0.1593, 0.1882, 0.1721]) -Greedy action tensor([ 0.5533, -0.0898, -0.0784, -0.0122]) tensor([0.3809, 0.2002, 0.2025, 0.2164]) -Greedy action tensor([ 0.7888, -0.3668, -0.0062, -0.1954]) tensor([0.4672, 0.1471, 0.2110, 0.1746]) -Greedy action tensor([ 0.4325, 0.1101, -0.0448, 0.0606]) tensor([0.3296, 0.2387, 0.2045, 0.2272]) -Greedy action tensor([ 0.7319, -0.3353, -0.1496, -0.3089]) tensor([0.4736, 0.1629, 0.1962, 0.1673]) -Greedy action tensor([ 0.5993, -0.6345, -0.1133, -0.2498]) tensor([0.4526, 0.1318, 0.2220, 0.1936]) -Greedy action tensor([ 0.5831, 0.1011, -0.0749, 0.1077]) tensor([0.3627, 0.2240, 0.1878, 0.2255]) -Greedy action tensor([ 0.9526, -0.5528, -0.1805, -0.2315]) tensor([0.5405, 0.1200, 0.1741, 0.1654]) -Greedy action tensor([ 0.8540, -0.5806, -0.1505, -0.5907]) tensor([0.5434, 0.1294, 0.1990, 0.1282]) -Greedy action tensor([ 1.0045, -0.9280, 0.1573, -0.6079]) tensor([0.5641, 0.0817, 0.2418, 0.1125]) -Greedy action tensor([ 0.7506, -0.3172, -0.0305, -0.1753]) tensor([0.4550, 0.1564, 0.2083, 0.1803]) -Greedy action tensor([ 0.5812, -0.3103, -0.0971, -0.1922]) tensor([0.4204, 0.1724, 0.2133, 0.1940]) -Greedy action tensor([ 0.5122, -0.1610, -0.0536, -0.1719]) tensor([0.3872, 0.1975, 0.2199, 0.1954]) -Greedy action tensor([ 0.4608, -0.2068, 0.0230, -0.3718]) tensor([0.3856, 0.1978, 0.2489, 0.1677]) -Greedy action tensor([ 0.8156, -0.3775, 0.0152, -0.2963]) tensor([0.4805, 0.1457, 0.2158, 0.1580]) -Greedy action tensor([ 0.6381, -0.2575, -0.0500, -0.2241]) tensor([0.4286, 0.1750, 0.2154, 0.1810]) -Greedy action tensor([ 0.8103, -0.7032, -0.0381, -0.3350]) tensor([0.5085, 0.1120, 0.2177, 0.1618]) -Greedy action tensor([ 0.9191, -0.3422, 0.0536, -0.3149]) tensor([0.5012, 0.1420, 0.2109, 0.1459]) -Greedy action tensor([ 0.9357, -0.6169, 0.0922, -0.6351]) tensor([0.5406, 0.1144, 0.2326, 0.1124]) -Greedy action tensor([ 0.7466, -0.5591, 0.0061, -0.5098]) tensor([0.4920, 0.1333, 0.2346, 0.1401]) -Greedy action tensor([ 0.8057, -0.5212, 0.0926, -0.3411]) tensor([0.4824, 0.1280, 0.2364, 0.1532]) -Greedy action tensor([ 0.8125, -0.5392, -0.1891, -0.1834]) tensor([0.5011, 0.1297, 0.1841, 0.1851]) -Greedy action tensor([ 0.2566, 0.0169, 0.0268, -0.5457]) tensor([0.3301, 0.2597, 0.2623, 0.1480]) -Greedy action tensor([ 0.6747, -0.4822, -0.1266, -0.3161]) tensor([0.4685, 0.1473, 0.2102, 0.1740]) -Greedy action tensor([ 0.6266, -0.1999, -0.0567, 0.0085]) tensor([0.4030, 0.1763, 0.2035, 0.2172]) -Greedy action tensor([ 0.9661, -0.3362, -0.0713, -0.6151]) tensor([0.5459, 0.1484, 0.1934, 0.1123]) -Greedy action tensor([ 0.5172, -0.2259, -0.0546, -0.1738]) tensor([0.3935, 0.1872, 0.2221, 0.1972]) -Greedy action tensor([ 1.0095, -0.3384, 0.1383, -0.4409]) tensor([0.5228, 0.1358, 0.2188, 0.1226]) -Greedy action tensor([ 0.7685, -0.5579, 0.0322, -0.5522]) tensor([0.4972, 0.1320, 0.2381, 0.1327]) -Greedy action tensor([ 0.5763, -0.6146, -0.1728, -0.1790]) tensor([0.4451, 0.1353, 0.2104, 0.2092]) -Greedy action tensor([ 0.5759, -0.6321, -0.1382, -0.1086]) tensor([0.4362, 0.1303, 0.2136, 0.2200]) -Greedy action tensor([ 1.1935, -0.5655, 0.0817, -0.2861]) tensor([0.5784, 0.0996, 0.1903, 0.1317]) -Greedy action tensor([ 0.6535, -0.1317, -0.0408, 0.0796]) tensor([0.3970, 0.1811, 0.1983, 0.2237]) -Greedy action tensor([ 0.8739, -0.5753, -0.0910, -0.2651]) tensor([0.5166, 0.1213, 0.1968, 0.1654]) -Greedy action tensor([ 0.8352, -0.4435, 0.1331, -0.2849]) tensor([0.4762, 0.1326, 0.2360, 0.1553]) -Greedy action tensor([ 0.7710, -0.4358, -0.0108, -0.5319]) tensor([0.4930, 0.1475, 0.2256, 0.1340]) -Greedy action tensor([ 0.3694, -0.3273, -0.0499, -0.1413]) tensor([0.3629, 0.1808, 0.2386, 0.2177]) -Greedy action tensor([ 0.8591, -0.7748, -0.0018, -0.4349]) tensor([0.5285, 0.1031, 0.2234, 0.1449]) -Greedy action tensor([ 0.4339, -0.2799, -0.0763, -0.3035]) tensor([0.3893, 0.1907, 0.2337, 0.1862]) -Greedy action tensor([ 1.1110, -0.5770, 0.1718, -0.6556]) tensor([0.5725, 0.1058, 0.2238, 0.0978]) -Greedy action tensor([ 0.8395, -0.6451, -0.0197, -0.6740]) tensor([0.5347, 0.1212, 0.2264, 0.1177]) -Greedy action tensor([ 0.6814, 0.1224, -0.0846, -0.1244]) tensor([0.4027, 0.2302, 0.1872, 0.1799]) -Greedy action tensor([ 0.8599, -0.6358, 0.0404, -0.3239]) tensor([0.5074, 0.1137, 0.2236, 0.1553]) -Greedy action tensor([ 0.9608, -0.4130, -0.0149, -0.5777]) tensor([0.5421, 0.1372, 0.2043, 0.1164]) -Greedy action tensor([ 0.8361, -0.4679, -0.0462, -0.5900]) tensor([0.5193, 0.1410, 0.2149, 0.1248]) -Greedy action tensor([ 1.0755, -0.6806, -0.1262, -0.4150]) tensor([0.5887, 0.1017, 0.1770, 0.1326]) -Greedy action tensor([ 0.5851, -0.4097, 0.0503, -0.2308]) tensor([0.4170, 0.1542, 0.2443, 0.1844]) -Greedy action tensor([ 0.8548, -0.4271, 0.0210, -0.1328]) tensor([0.4798, 0.1331, 0.2084, 0.1787]) -Greedy action tensor([ 0.6772, -0.6507, -0.0178, -0.3434]) tensor([0.4707, 0.1247, 0.2349, 0.1696]) -Greedy action tensor([ 0.8583, -0.1408, -0.1295, -0.1862]) tensor([0.4779, 0.1760, 0.1780, 0.1682]) -Greedy action tensor([ 0.3740, -0.2176, -0.0351, -0.2147]) tensor([0.3607, 0.1996, 0.2396, 0.2002]) -Greedy action tensor([ 0.8993, -0.7942, -0.0581, -0.2450]) tensor([0.5302, 0.0975, 0.2035, 0.1688]) -Greedy action tensor([ 0.5463, -0.8323, -0.3115, -0.1703]) tensor([0.4620, 0.1164, 0.1959, 0.2256]) -Greedy action tensor([ 0.5800, -0.3120, -0.0325, -0.1692]) tensor([0.4124, 0.1690, 0.2236, 0.1950]) -Greedy action tensor([ 0.8722, -0.8887, 0.0342, -0.5286]) tensor([0.5403, 0.0929, 0.2337, 0.1331]) -Greedy action tensor([ 0.9475, -0.5141, -0.2008, -0.1599]) tensor([0.5321, 0.1234, 0.1688, 0.1758]) -Greedy action tensor([ 0.8398, -0.7351, 0.0654, -0.6369]) tensor([0.5273, 0.1092, 0.2431, 0.1204]) -Greedy action tensor([ 1.0828, -0.6022, 0.0863, -0.4389]) tensor([0.5640, 0.1046, 0.2082, 0.1232]) -Greedy action tensor([ 0.7343, -0.5897, -0.1384, -0.4565]) tensor([0.5031, 0.1338, 0.2102, 0.1529]) -Greedy action tensor([ 0.5165, -0.4158, 0.0427, -0.4131]) tensor([0.4148, 0.1633, 0.2582, 0.1637]) -Greedy action tensor([ 0.9959, -0.4169, -0.1166, -0.3487]) tensor([0.5456, 0.1328, 0.1794, 0.1422]) -Greedy action tensor([ 0.3383, 0.1238, -0.0391, -0.2515]) tensor([0.3282, 0.2648, 0.2250, 0.1820]) -Greedy action tensor([ 0.6376, -0.5121, -0.1360, -0.1165]) tensor([0.4447, 0.1409, 0.2052, 0.2092]) -Greedy action tensor([ 0.7482, -0.3202, -0.0334, -0.3954]) tensor([0.4717, 0.1621, 0.2159, 0.1503]) -Greedy action tensor([ 0.7925, -1.0021, 0.0200, -0.4319]) tensor([0.5203, 0.0865, 0.2403, 0.1529]) -Greedy action tensor([ 7.8417e-01, 1.0161e-01, -2.6658e-04, -2.3209e-01]) tensor([0.4304, 0.2175, 0.1964, 0.1558]) -Greedy action tensor([ 0.6159, -0.3107, -0.1202, -0.1567]) tensor([0.4279, 0.1694, 0.2050, 0.1976]) -Greedy action tensor([ 0.8801, -0.6911, 0.1074, -0.3803]) tensor([0.5120, 0.1064, 0.2364, 0.1452]) -Greedy action tensor([ 0.9729, -0.6248, 0.1017, -0.5608]) tensor([0.5445, 0.1102, 0.2278, 0.1175]) -Greedy action tensor([ 0.4787, 0.1012, -0.0453, -0.3219]) tensor([0.3667, 0.2514, 0.2172, 0.1647]) -Greedy action tensor([ 0.5750, -0.0654, -0.0208, -0.0893]) tensor([0.3857, 0.2033, 0.2126, 0.1985]) -Greedy action tensor([ 0.6892, 0.1687, 0.0062, -0.0179]) tensor([0.3857, 0.2292, 0.1948, 0.1902]) -Greedy action tensor([ 0.4706, -0.5893, -0.1834, -0.2751]) tensor([0.4272, 0.1480, 0.2221, 0.2027]) -Greedy action tensor([ 0.4951, 0.0267, -0.1306, -0.0671]) tensor([0.3662, 0.2292, 0.1959, 0.2087]) -Greedy action tensor([ 0.7719, 0.1578, -0.1582, -0.3456]) tensor([0.4419, 0.2392, 0.1744, 0.1446]) -Greedy action tensor([ 2.0189, -0.6102, -0.5172, 0.1672]) tensor([0.7644, 0.0551, 0.0605, 0.1200]) -Greedy action tensor([ 1.6259, -0.0459, -0.7407, 0.6824]) tensor([0.5984, 0.1125, 0.0561, 0.2330]) -Greedy action tensor([ 1.3284, -0.4456, -0.1174, 0.6313]) tensor([0.5254, 0.0891, 0.1238, 0.2617]) -Greedy action tensor([ 1.4826, -0.7799, -0.2451, 0.4556]) tensor([0.6098, 0.0635, 0.1084, 0.2184]) -Greedy action tensor([ 1.5783, 0.2384, -0.6394, 0.5372]) tensor([0.5801, 0.1519, 0.0631, 0.2048]) -Greedy action tensor([ 1.7457, -0.2977, -0.9107, 0.0868]) tensor([0.7193, 0.0932, 0.0505, 0.1369]) -Greedy action tensor([ 1.5457, 0.4159, -0.0027, 0.2697]) tensor([0.5510, 0.1780, 0.1171, 0.1538]) -Greedy action tensor([ 2.2103, -0.9810, -0.3910, 0.6018]) tensor([0.7602, 0.0313, 0.0564, 0.1522]) -Greedy action tensor([ 1.4122, -0.5489, -0.3527, -0.0975]) tensor([0.6524, 0.0918, 0.1117, 0.1442]) -Greedy action tensor([ 1.9253, -0.6882, -0.3732, 0.2092]) tensor([0.7389, 0.0541, 0.0742, 0.1328]) -Greedy action tensor([ 1.1977, -0.1029, -0.0145, 0.4573]) tensor([0.4886, 0.1331, 0.1454, 0.2330]) -Greedy action tensor([ 1.4506, -0.0024, -0.5165, 0.5888]) tensor([0.5568, 0.1302, 0.0779, 0.2352]) -Greedy action tensor([ 1.1811, -0.2429, -0.5309, 0.1838]) tensor([0.5586, 0.1345, 0.1008, 0.2061]) -Greedy action tensor([ 1.1919, -0.8619, 0.0746, 0.2813]) tensor([0.5383, 0.0690, 0.1761, 0.2165]) -Greedy action tensor([ 1.1112, -0.6557, -0.1264, 0.0526]) tensor([0.5531, 0.0945, 0.1605, 0.1919]) -Greedy action tensor([ 1.3360, -0.6802, -0.3880, 0.3123]) tensor([0.5985, 0.0797, 0.1067, 0.2150]) -Greedy action tensor([ 1.1686, -0.3556, -0.4661, 0.1168]) tensor([0.5675, 0.1236, 0.1107, 0.1982]) -Greedy action tensor([ 1.3558, -0.3665, -0.3091, 0.4370]) tensor([0.5660, 0.1011, 0.1071, 0.2258]) -Greedy action tensor([ 1.5598, -0.2866, -0.3809, 0.1513]) tensor([0.6469, 0.1021, 0.0929, 0.1582]) -Greedy action tensor([ 1.8239, 0.5257, -0.1420, 0.2692]) tensor([0.6157, 0.1681, 0.0862, 0.1300]) -Greedy action tensor([ 2.2593, -1.0487, -0.1927, 0.5265]) tensor([0.7695, 0.0282, 0.0663, 0.1360]) -Greedy action tensor([ 1.4768, -0.6410, -0.2718, 0.2681]) tensor([0.6278, 0.0755, 0.1093, 0.1875]) -Greedy action tensor([ 1.9873, -0.5542, -0.7842, 0.1711]) tensor([0.7669, 0.0604, 0.0480, 0.1247]) -Greedy action tensor([ 1.9837, -0.8042, -0.5425, 0.5985]) tensor([0.7185, 0.0442, 0.0575, 0.1798]) -Greedy action tensor([ 2.6181, -1.4708, -0.3609, 0.2949]) tensor([0.8580, 0.0144, 0.0436, 0.0840]) -Greedy action tensor([ 1.2603, -0.3291, -0.5255, 0.2968]) tensor([0.5704, 0.1164, 0.0956, 0.2176]) -Greedy action tensor([ 1.0752, 0.1101, -0.1780, 0.3656]) tensor([0.4633, 0.1765, 0.1323, 0.2279]) -Greedy action tensor([ 1.5516, -0.6048, -0.4842, 0.3061]) tensor([0.6518, 0.0754, 0.0851, 0.1876]) -Greedy action tensor([ 1.8473, -0.6200, -0.2729, 0.1797]) tensor([0.7176, 0.0609, 0.0861, 0.1354]) -Greedy action tensor([ 1.3583, 0.0394, -1.0851, 0.4468]) tensor([0.5694, 0.1523, 0.0495, 0.2288]) -Greedy action tensor([ 1.2277, -0.6236, -0.1044, 0.2649]) tensor([0.5547, 0.0871, 0.1464, 0.2118]) -Greedy action tensor([ 1.5089, -0.5949, -0.6221, 0.4914]) tensor([0.6241, 0.0761, 0.0741, 0.2256]) -Greedy action tensor([ 2.1229, -1.2545, 0.1042, 0.9190]) tensor([0.6817, 0.0233, 0.0905, 0.2045]) -Greedy action tensor([ 1.3737, -0.5962, -0.2697, 0.3149]) tensor([0.5954, 0.0830, 0.1151, 0.2065]) -Greedy action tensor([ 0.8986, -0.1609, -0.2451, -0.0208]) tensor([0.4845, 0.1679, 0.1544, 0.1932]) -Greedy action tensor([ 1.2442, 0.0987, -0.7884, 0.2822]) tensor([0.5461, 0.1737, 0.0715, 0.2087]) -Greedy action tensor([ 2.1271, -0.7253, -0.5190, 0.5338]) tensor([0.7508, 0.0433, 0.0533, 0.1526]) -Greedy action tensor([ 1.2587, -0.6047, -0.2473, 0.3563]) tensor([0.5610, 0.0870, 0.1244, 0.2275]) -Greedy action tensor([ 1.7607, -0.8611, 0.0413, -0.0840]) tensor([0.7093, 0.0515, 0.1271, 0.1121]) -Greedy action tensor([ 1.3823, -0.4570, 0.0269, 0.0475]) tensor([0.5952, 0.0946, 0.1535, 0.1567]) -Greedy action tensor([ 1.2365, -0.5674, -0.3122, 0.0035]) tensor([0.5993, 0.0987, 0.1274, 0.1746]) -Greedy action tensor([ 1.6050, 0.0532, -1.4525, 0.0197]) tensor([0.6832, 0.1447, 0.0321, 0.1400]) -Greedy action tensor([ 1.4964, -0.9570, -0.4262, -0.0725]) tensor([0.6942, 0.0597, 0.1015, 0.1446]) -Greedy action tensor([ 1.8757, -0.4012, -0.1922, 0.2138]) tensor([0.7048, 0.0723, 0.0891, 0.1338]) -Greedy action tensor([ 1.3648, -0.6812, -0.2352, 0.4586]) tensor([0.5763, 0.0745, 0.1164, 0.2329]) -Greedy action tensor([ 1.0702, -0.3408, -0.2789, 0.0350]) tensor([0.5381, 0.1312, 0.1396, 0.1911]) -Greedy action tensor([2.1323, 0.5657, 0.4591, 0.2812]) tensor([0.6437, 0.1344, 0.1208, 0.1011]) -Greedy action tensor([ 1.0007, -0.5540, 0.0861, 0.0837]) tensor([0.4971, 0.1050, 0.1992, 0.1987]) -Greedy action tensor([ 0.7416, -0.1533, -0.4025, 0.3593]) tensor([0.4150, 0.1696, 0.1322, 0.2832]) -Greedy action tensor([ 1.0438, -0.3407, -0.1594, 0.4044]) tensor([0.4812, 0.1205, 0.1445, 0.2539]) -Greedy action tensor([ 1.2449, -0.3377, -0.2951, 0.2511]) tensor([0.5587, 0.1148, 0.1198, 0.2068]) -Greedy action tensor([ 1.6282, -0.7511, -0.4953, 0.4261]) tensor([0.6610, 0.0612, 0.0791, 0.1987]) -Greedy action tensor([ 1.0724, 0.1980, -0.6411, 0.2666]) tensor([0.4892, 0.2041, 0.0882, 0.2186]) -Greedy action tensor([ 1.6734, -0.4215, -0.4450, 0.4976]) tensor([0.6444, 0.0793, 0.0775, 0.1988]) -Greedy action tensor([ 1.5335, 0.3092, -0.1023, 0.1166]) tensor([0.5776, 0.1698, 0.1125, 0.1400]) -Greedy action tensor([ 1.4262, -0.4592, -0.6290, 0.4077]) tensor([0.6094, 0.0925, 0.0780, 0.2201]) -Greedy action tensor([ 1.2972, -0.3206, -0.5075, 0.4089]) tensor([0.5636, 0.1118, 0.0927, 0.2319]) -Greedy action tensor([ 2.4227, -0.7605, -0.0686, 0.6045]) tensor([0.7773, 0.0322, 0.0644, 0.1262]) -Greedy action tensor([ 2.3476, -0.7497, -0.1932, 0.6723]) tensor([0.7626, 0.0345, 0.0601, 0.1428]) -Greedy action tensor([ 1.4424, -0.5416, -0.3972, 0.2028]) tensor([0.6306, 0.0867, 0.1002, 0.1825]) -Greedy action tensor([ 1.1836, -0.7705, -0.0084, 0.5139]) tensor([0.5109, 0.0724, 0.1551, 0.2615]) -Greedy action tensor([ 2.0013, -0.9457, -0.3497, 0.5531]) tensor([0.7232, 0.0380, 0.0689, 0.1700]) -Greedy action tensor([ 1.0353, -0.3279, 0.3066, -0.5506]) tensor([0.5146, 0.1317, 0.2483, 0.1054]) -Greedy action tensor([ 2.1565, -0.2192, -0.7403, 0.6206]) tensor([0.7335, 0.0682, 0.0405, 0.1579]) -Greedy action tensor([ 1.8490, -0.9620, -0.0745, -0.3109]) tensor([0.7567, 0.0455, 0.1105, 0.0873]) -Greedy action tensor([ 1.2851, -0.3476, -0.5066, -0.0652]) tensor([0.6168, 0.1205, 0.1028, 0.1598]) -Greedy action tensor([ 1.6219, -1.0831, 0.0673, 0.4965]) tensor([0.6239, 0.0417, 0.1318, 0.2025]) -Greedy action tensor([ 1.7260, -0.1761, -0.5782, 0.5697]) tensor([0.6395, 0.0955, 0.0638, 0.2012]) -Greedy action tensor([ 1.3341, -0.4350, -0.1480, 0.2873]) tensor([0.5718, 0.0975, 0.1299, 0.2008]) -Greedy action tensor([ 1.1972, -0.1584, -0.5390, 0.1664]) tensor([0.5584, 0.1440, 0.0984, 0.1992]) -Greedy action tensor([ 1.7868, -1.3384, -0.0387, 0.3103]) tensor([0.6976, 0.0306, 0.1124, 0.1594]) -Greedy action tensor([ 1.3531, -0.4330, -0.4641, 0.2921]) tensor([0.5966, 0.1000, 0.0969, 0.2065]) -Greedy action tensor([ 1.1379, -0.0707, -0.1742, 0.1487]) tensor([0.5155, 0.1539, 0.1388, 0.1917]) -Greedy action tensor([ 1.9062, 0.0045, -0.5099, 0.0324]) tensor([0.7183, 0.1073, 0.0641, 0.1103]) -Greedy action tensor([ 2.2528, -0.5892, -0.2067, 0.7426]) tensor([0.7328, 0.0427, 0.0626, 0.1619]) -Greedy action tensor([ 1.5483, -0.3569, -0.6852, 0.3179]) tensor([0.6459, 0.0961, 0.0692, 0.1887]) -Greedy action tensor([ 1.0645, -0.4558, -0.0657, 0.2319]) tensor([0.5059, 0.1106, 0.1634, 0.2200]) -Greedy action tensor([ 1.4635, -0.1706, -0.7677, 0.4835]) tensor([0.5960, 0.1163, 0.0640, 0.2237]) -Greedy action tensor([ 2.4308, -0.9353, -0.5242, 0.9553]) tensor([0.7603, 0.0262, 0.0396, 0.1739]) -Greedy action tensor([ 1.7393, -0.6294, -0.1861, 0.2164]) tensor([0.6861, 0.0642, 0.1000, 0.1496]) -Greedy action tensor([ 1.2567, -0.7761, -0.1525, 0.1134]) tensor([0.5903, 0.0773, 0.1442, 0.1882]) -Greedy action tensor([-1.9085, -0.4525, 0.6803, -0.1387]) tensor([0.0409, 0.1753, 0.5440, 0.2399]) -Greedy action tensor([-1.9129, -0.3895, 0.6462, -0.1628]) tensor([0.0412, 0.1891, 0.5326, 0.2372]) -Greedy action tensor([-1.8469, -0.4932, 0.6298, -0.1143]) tensor([0.0446, 0.1726, 0.5307, 0.2521]) -Greedy action tensor([-1.8735, -0.4346, 0.6589, -0.1232]) tensor([0.0425, 0.1790, 0.5342, 0.2444]) -Greedy action tensor([-1.8762, -0.4598, 0.6396, -0.1467]) tensor([0.0432, 0.1782, 0.5349, 0.2437]) -Greedy action tensor([-1.7298, -1.0572, 0.3921, -0.4490]) tensor([0.0671, 0.1314, 0.5600, 0.2415]) -Greedy action tensor([-1.8612, -0.3927, 0.6191, -0.1303]) tensor([0.0436, 0.1894, 0.5209, 0.2462]) -Greedy action tensor([-1.5507, -0.0933, 0.5942, 0.0153]) tensor([0.0537, 0.2306, 0.4586, 0.2571]) -Greedy action tensor([-1.6297, -0.3236, 0.6730, 0.0788]) tensor([0.0495, 0.1826, 0.4948, 0.2731]) -Greedy action tensor([-0.9415, 0.2275, 0.5032, 0.8078]) tensor([0.0704, 0.2265, 0.2984, 0.4047]) -Greedy action tensor([-1.9355, -0.4575, 0.6769, -0.1623]) tensor([0.0402, 0.1760, 0.5473, 0.2365]) -Greedy action tensor([-1.9444, -0.4468, 0.6671, -0.1807]) tensor([0.0401, 0.1794, 0.5464, 0.2341]) -Greedy action tensor([-1.8847, -0.4498, 0.6413, -0.1488]) tensor([0.0428, 0.1796, 0.5349, 0.2427]) -Greedy action tensor([-1.2415, 0.3019, 0.3472, -0.6878]) tensor([0.0812, 0.3800, 0.3976, 0.1412]) -Greedy action tensor([-1.9139, -0.3827, 0.6516, -0.1550]) tensor([0.0409, 0.1892, 0.5323, 0.2376]) -Greedy action tensor([-1.8768, -0.4359, 0.6449, -0.1155]) tensor([0.0426, 0.1798, 0.5299, 0.2477]) -Greedy action tensor([-1.9211, -0.4312, 0.6554, -0.1668]) tensor([0.0410, 0.1821, 0.5397, 0.2372]) -Greedy action tensor([-1.9246, -0.4114, 0.6519, -0.1648]) tensor([0.0408, 0.1853, 0.5367, 0.2372]) -Greedy action tensor([-1.9432, -0.4483, 0.6670, -0.1797]) tensor([0.0402, 0.1791, 0.5464, 0.2343]) -Greedy action tensor([-1.6407, -0.4313, 0.7105, 0.3217]) tensor([0.0455, 0.1526, 0.4779, 0.3240]) -Greedy action tensor([-1.8321, -0.1642, 0.6420, -0.3007]) tensor([0.0439, 0.2325, 0.5207, 0.2029]) -Greedy action tensor([-1.9348, -0.4483, 0.6666, -0.1745]) tensor([0.0405, 0.1789, 0.5454, 0.2352]) -Greedy action tensor([-1.8939, -0.4305, 0.6428, -0.1524]) tensor([0.0423, 0.1826, 0.5341, 0.2411]) -Greedy action tensor([-1.5288, -0.3668, 0.4268, 0.0347]) tensor([0.0623, 0.1993, 0.4407, 0.2977]) -Greedy action tensor([-1.8292, -0.4404, 0.6101, -0.1188]) tensor([0.0454, 0.1822, 0.5210, 0.2513]) -Greedy action tensor([-1.6805, -0.2942, 0.6436, 0.0678]) tensor([0.0477, 0.1908, 0.4874, 0.2741]) -Greedy action tensor([-1.8597, -0.4617, 0.6315, -0.1410]) tensor([0.0441, 0.1783, 0.5320, 0.2457]) -Greedy action tensor([-1.8317, -0.4325, 0.6066, -0.1199]) tensor([0.0454, 0.1838, 0.5196, 0.2513]) -Greedy action tensor([-1.8820, -0.4480, 0.6632, -0.1116]) tensor([0.0420, 0.1762, 0.5352, 0.2466]) -Greedy action tensor([-1.9413, -0.4591, 0.6639, -0.1815]) tensor([0.0404, 0.1779, 0.5469, 0.2348]) -Greedy action tensor([-0.9227, -0.2698, 0.2304, 0.5284]) tensor([0.0966, 0.1855, 0.3059, 0.4121]) -Greedy action tensor([-1.9155, -0.4312, 0.6488, -0.1681]) tensor([0.0414, 0.1827, 0.5381, 0.2377]) -Greedy action tensor([-0.8008, -0.0983, 0.7510, 1.3941]) tensor([0.0598, 0.1208, 0.2823, 0.5371]) -Greedy action tensor([-1.9205, -0.4443, 0.6554, -0.1676]) tensor([0.0412, 0.1802, 0.5411, 0.2376]) -Greedy action tensor([-1.2283, 0.3107, 0.2214, 0.1005]) tensor([0.0730, 0.3402, 0.3111, 0.2757]) -Greedy action tensor([-1.7343, -0.5217, 0.5620, -0.1242]) tensor([0.0518, 0.1742, 0.5148, 0.2592]) -Greedy action tensor([-1.8866, -0.4482, 0.6434, -0.1494]) tensor([0.0426, 0.1797, 0.5354, 0.2423]) -Greedy action tensor([-0.9126, 0.3395, 0.1704, -0.0360]) tensor([0.1015, 0.3550, 0.2997, 0.2438]) -Greedy action tensor([-1.9342, -0.4260, 0.6593, -0.1734]) tensor([0.0405, 0.1829, 0.5413, 0.2354]) -Greedy action tensor([-1.8840, -0.2071, 0.6025, -0.1572]) tensor([0.0417, 0.2230, 0.5010, 0.2344]) -Greedy action tensor([-1.6883, -0.4442, 0.5481, -0.0218]) tensor([0.0523, 0.1815, 0.4894, 0.2768]) -Greedy action tensor([-1.6340, -0.4855, 0.5101, -0.0027]) tensor([0.0562, 0.1772, 0.4795, 0.2871]) -Greedy action tensor([-1.9340, -0.4499, 0.6721, -0.1727]) tensor([0.0404, 0.1780, 0.5467, 0.2349]) -Greedy action tensor([-1.5523, 0.0381, 0.4097, -0.0236]) tensor([0.0567, 0.2782, 0.4034, 0.2616]) -Greedy action tensor([-1.8913, -0.4644, 0.6342, -0.1492]) tensor([0.0428, 0.1782, 0.5347, 0.2443]) -Greedy action tensor([-1.8900, -0.4220, 0.6393, -0.1422]) tensor([0.0423, 0.1837, 0.5310, 0.2430]) -Greedy action tensor([-1.5969, -0.4071, 0.5140, -0.1188]) tensor([0.0591, 0.1942, 0.4877, 0.2590]) -Greedy action tensor([-1.2730, 0.2681, 0.5490, 0.4564]) tensor([0.0572, 0.2670, 0.3536, 0.3223]) -Greedy action tensor([-1.9066, -0.4482, 0.6503, -0.1617]) tensor([0.0418, 0.1797, 0.5391, 0.2394]) -Greedy action tensor([-1.9083, -0.4527, 0.6503, -0.1504]) tensor([0.0417, 0.1786, 0.5381, 0.2416]) -Greedy action tensor([-1.9447, -0.4503, 0.6648, -0.1813]) tensor([0.0402, 0.1791, 0.5463, 0.2344]) -Greedy action tensor([-1.9302, -0.3971, 0.6515, -0.1689]) tensor([0.0405, 0.1878, 0.5358, 0.2359]) -Greedy action tensor([-1.6099, -0.4681, 0.5861, 0.2306]) tensor([0.0515, 0.1613, 0.4629, 0.3244]) -Greedy action tensor([-1.1904, -0.4493, 0.6667, 0.9617]) tensor([0.0552, 0.1159, 0.3537, 0.4751]) -Greedy action tensor([-1.7200, -0.4773, 0.5511, -0.1100]) tensor([0.0522, 0.1809, 0.5058, 0.2611]) -Greedy action tensor([-1.8966, -0.4197, 0.6417, -0.1541]) tensor([0.0421, 0.1844, 0.5330, 0.2405]) -Greedy action tensor([-1.7802, 0.0563, 0.5375, -0.0715]) tensor([0.0436, 0.2734, 0.4424, 0.2406]) -Greedy action tensor([-1.8659, -0.4365, 0.6298, -0.1377]) tensor([0.0436, 0.1821, 0.5289, 0.2455]) -Greedy action tensor([-1.6788, 0.1357, 0.4500, 0.0272]) tensor([0.0475, 0.2916, 0.3993, 0.2616]) -Greedy action tensor([-0.6216, 0.4091, 0.2552, 0.5940]) tensor([0.1044, 0.2926, 0.2509, 0.3521]) -Greedy action tensor([-1.9013, -0.4333, 0.6426, -0.1599]) tensor([0.0421, 0.1826, 0.5354, 0.2400]) -Greedy action tensor([-1.4201, 0.4980, 0.2258, 0.1008]) tensor([0.0569, 0.3875, 0.2951, 0.2605]) -Greedy action tensor([-1.4814, -0.3433, 1.1270, 0.9495]) tensor([0.0344, 0.1074, 0.4671, 0.3911]) -Greedy action tensor([-1.6760, -0.2557, 0.5794, -0.0537]) tensor([0.0507, 0.2096, 0.4832, 0.2565]) -Greedy action tensor([-1.0981, -0.9027, 0.8541, 0.1093]) tensor([0.0793, 0.0965, 0.5589, 0.2653]) -Greedy action tensor([-1.9463, -0.4535, 0.6675, -0.1818]) tensor([0.0401, 0.1784, 0.5474, 0.2341]) -Greedy action tensor([-0.7657, 1.0341, 0.0585, 0.4502]) tensor([0.0787, 0.4762, 0.1795, 0.2656]) -Greedy action tensor([-1.7716, -0.3270, 0.5644, -0.1167]) tensor([0.0481, 0.2037, 0.4968, 0.2514]) -Greedy action tensor([-1.1982, 0.8513, 0.1425, 0.3297]) tensor([0.0582, 0.4515, 0.2223, 0.2680]) -Greedy action tensor([-1.9357, -0.4477, 0.6644, -0.1753]) tensor([0.0405, 0.1792, 0.5450, 0.2353]) -Greedy action tensor([-1.8567, -0.3177, 0.6072, -0.1399]) tensor([0.0435, 0.2028, 0.5114, 0.2423]) -Greedy action tensor([-1.9141, -0.4358, 0.6512, -0.1655]) tensor([0.0414, 0.1817, 0.5388, 0.2381]) -Greedy action tensor([-1.8424, -0.4795, 0.6120, -0.1269]) tensor([0.0452, 0.1768, 0.5265, 0.2515]) -Greedy action tensor([-1.9341, -0.4124, 0.6531, -0.1779]) tensor([0.0405, 0.1857, 0.5390, 0.2348]) -Greedy action tensor([-1.8994, -0.4026, 0.6461, -0.1509]) tensor([0.0417, 0.1864, 0.5321, 0.2398]) -Greedy action tensor([-1.9197, -0.4334, 0.6519, -0.1691]) tensor([0.0412, 0.1822, 0.5393, 0.2373]) -Greedy action tensor([-0.8097, 0.1909, 0.0543, -0.0429]) tensor([0.1213, 0.3299, 0.2878, 0.2611]) -Greedy action tensor([-1.6620, -0.3908, 0.6732, 0.1432]) tensor([0.0477, 0.1699, 0.4925, 0.2899]) -Greedy action tensor([-1.5721, -0.3187, 0.4968, -0.0372]) tensor([0.0586, 0.2053, 0.4640, 0.2720]) -Greedy action tensor([-1.8794, -0.3997, -0.0992, -0.2433]) tensor([0.0608, 0.2668, 0.3604, 0.3120]) -Greedy action tensor([-1.9018, -0.4324, 0.6401, -0.1596]) tensor([0.0421, 0.1829, 0.5347, 0.2403]) -Greedy action tensor([-1.1242, -1.1676, 0.0389, -1.0043]) tensor([0.1591, 0.1524, 0.5092, 0.1794]) -Greedy action tensor([ 0.0325, -0.4652, 0.6244, 0.5144]) tensor([0.1986, 0.1207, 0.3590, 0.3216]) -Greedy action tensor([1.2443, 0.4062, 0.3819, 0.1413]) tensor([0.4573, 0.1978, 0.1931, 0.1518]) -Greedy action tensor([ 0.9583, 0.5420, 0.8802, -0.5926]) tensor([0.3576, 0.2358, 0.3307, 0.0758]) -Greedy action tensor([ 0.3038, -1.2539, -0.7290, -0.0123]) tensor([0.4356, 0.0917, 0.1551, 0.3176]) -Greedy action tensor([ 0.5611, -1.3890, -0.4866, 0.7553]) tensor([0.3693, 0.0525, 0.1296, 0.4485]) -Greedy action tensor([-0.2898, -0.8654, 0.1005, -0.7719]) tensor([0.2734, 0.1538, 0.4040, 0.1688]) -Greedy action tensor([-0.0142, -0.6382, -0.2738, -0.2901]) tensor([0.3262, 0.1747, 0.2516, 0.2475]) -Greedy action tensor([ 1.4829, -0.5878, 1.4310, 1.3547]) tensor([0.3384, 0.0427, 0.3213, 0.2977]) -Greedy action tensor([ 0.1880, -0.8958, -0.7638, 0.8680]) tensor([0.2704, 0.0915, 0.1044, 0.5337]) -Greedy action tensor([-0.5538, -0.6535, 0.2734, -1.1789]) tensor([0.2115, 0.1915, 0.4838, 0.1132]) -Greedy action tensor([ 0.9506, -1.1471, 1.5161, -0.5048]) tensor([0.3209, 0.0394, 0.5649, 0.0749]) -Greedy action tensor([ 1.4546, -0.3149, 0.2551, 0.4904]) tensor([0.5397, 0.0920, 0.1626, 0.2058]) -Greedy action tensor([-1.1078, -0.9054, 1.6318, -0.2806]) tensor([0.0500, 0.0612, 0.7743, 0.1144]) -Greedy action tensor([-0.1111, -1.2483, -0.0987, 0.0494]) tensor([0.2851, 0.0914, 0.2887, 0.3348]) -Greedy action tensor([ 1.1605, 0.1305, -0.0209, 0.5670]) tensor([0.4512, 0.1611, 0.1385, 0.2493]) -Greedy action tensor([-0.4201, -0.3672, -0.1462, -0.7002]) tensor([0.2424, 0.2556, 0.3188, 0.1832]) -Greedy action tensor([ 0.2461, -0.7541, 0.4745, -0.2243]) tensor([0.3078, 0.1132, 0.3868, 0.1923]) -Greedy action tensor([0.2795, 0.3579, 0.7756, 0.0691]) tensor([0.2206, 0.2385, 0.3622, 0.1787]) -Greedy action tensor([ 0.2740, -0.1756, 0.2537, -0.2572]) tensor([0.3120, 0.1990, 0.3057, 0.1834]) -Greedy action tensor([-0.1399, -0.5251, 1.8362, 0.2846]) tensor([0.0959, 0.0653, 0.6921, 0.1467]) -Greedy action tensor([ 0.2000, -1.3733, -0.1381, 0.7171]) tensor([0.2779, 0.0576, 0.1982, 0.4662]) -Greedy action tensor([-0.4225, -1.1280, 0.6160, -0.2292]) tensor([0.1808, 0.0893, 0.5107, 0.2193]) -Greedy action tensor([ 1.0957, -1.6516, 0.6409, 1.1629]) tensor([0.3613, 0.0232, 0.2292, 0.3864]) -Greedy action tensor([ 1.3040, -0.8598, 0.6204, 1.2302]) tensor([0.3924, 0.0451, 0.1981, 0.3645]) -Greedy action tensor([-0.1934, -1.9656, 0.7093, -0.2637]) tensor([0.2189, 0.0372, 0.5399, 0.2040]) -Greedy action tensor([ 0.0869, 0.8978, 0.4432, -0.4689]) tensor([0.1904, 0.4284, 0.2719, 0.1092]) -Greedy action tensor([-0.1968, -1.1225, -0.1031, -0.8224]) tensor([0.3301, 0.1308, 0.3625, 0.1766]) -Greedy action tensor([-0.8131, -0.4571, -0.6902, 0.2543]) tensor([0.1547, 0.2208, 0.1749, 0.4497]) -Greedy action tensor([ 0.1273, 0.9859, -0.2239, -0.8326]) tensor([0.2249, 0.5307, 0.1583, 0.0861]) -Greedy action tensor([ 0.3328, -1.3331, -0.3851, -0.7809]) tensor([0.4987, 0.0943, 0.2433, 0.1637]) -Greedy action tensor([ 0.1615, -0.0803, 0.6019, -0.6546]) tensor([0.2645, 0.2077, 0.4109, 0.1169]) -Greedy action tensor([ 0.2011, -0.7469, 0.5460, 0.2743]) tensor([0.2581, 0.1000, 0.3643, 0.2776]) -Greedy action tensor([ 0.6484, 0.0193, 1.2090, -0.8246]) tensor([0.2846, 0.1517, 0.4985, 0.0652]) -Greedy action tensor([-0.9567, -1.1712, -1.0109, 0.9587]) tensor([0.1048, 0.0846, 0.0993, 0.7114]) -Greedy action tensor([ 1.3159, -1.8783, 0.7609, 1.2823]) tensor([0.3873, 0.0159, 0.2223, 0.3745]) -Greedy action tensor([ 1.0893, -1.6745, 0.3252, 1.0646]) tensor([0.3993, 0.0252, 0.1860, 0.3895]) -Greedy action tensor([ 0.3473, -0.1772, 0.0466, 1.3223]) tensor([0.2007, 0.1188, 0.1486, 0.5320]) -Greedy action tensor([-0.4139, -1.8789, 0.6741, -0.1165]) tensor([0.1803, 0.0417, 0.5352, 0.2428]) -Greedy action tensor([-1.3876, -1.0411, 0.2281, -1.1230]) tensor([0.1143, 0.1616, 0.5751, 0.1489]) -Greedy action tensor([1.7934, 0.1630, 1.1776, 2.1561]) tensor([0.3151, 0.0617, 0.1702, 0.4529]) -Greedy action tensor([ 0.3658, -1.3251, 0.4469, 0.6985]) tensor([0.2730, 0.0503, 0.2960, 0.3807]) -Greedy action tensor([-0.2073, -1.2286, -0.2763, 0.2203]) tensor([0.2613, 0.0941, 0.2439, 0.4007]) -Greedy action tensor([ 1.1286, -0.5760, -0.4590, 0.9861]) tensor([0.4438, 0.0807, 0.0907, 0.3848]) -Greedy action tensor([ 1.5076, -1.1551, 0.9345, -0.0927]) tensor([0.5449, 0.0380, 0.3072, 0.1100]) -Greedy action tensor([-0.4095, 0.6588, -0.5005, -0.0256]) tensor([0.1589, 0.4626, 0.1451, 0.2333]) -Greedy action tensor([ 0.4780, 0.0654, -0.8249, 0.4762]) tensor([0.3411, 0.2258, 0.0927, 0.3405]) -Greedy action tensor([ 1.5264, 0.7975, 0.3318, -0.2722]) tensor([0.5126, 0.2473, 0.1552, 0.0848]) -Greedy action tensor([ 0.0768, -0.6606, -0.3381, -0.6082]) tensor([0.3784, 0.1810, 0.2499, 0.1907]) -Greedy action tensor([ 0.2390, 0.5414, 0.9297, -0.2772]) tensor([0.2022, 0.2736, 0.4034, 0.1207]) -Greedy action tensor([-0.6083, -0.4123, 1.1276, -0.7891]) tensor([0.1146, 0.1394, 0.6503, 0.0957]) -Greedy action tensor([ 1.0708, -0.5761, 0.2417, 0.5765]) tensor([0.4466, 0.0860, 0.1949, 0.2724]) -Greedy action tensor([ 0.4955, -1.2036, -0.0707, 0.8207]) tensor([0.3190, 0.0583, 0.1811, 0.4416]) -Greedy action tensor([-0.3432, -1.5645, -0.2170, -0.3481]) tensor([0.2920, 0.0861, 0.3313, 0.2906]) -Greedy action tensor([ 0.5209, -1.4762, -0.1809, -0.3475]) tensor([0.4876, 0.0662, 0.2417, 0.2046]) -Greedy action tensor([ 0.5713, -0.2785, -0.2731, 0.2160]) tensor([0.3909, 0.1671, 0.1680, 0.2740]) -Greedy action tensor([-0.3215, 0.4206, 0.5407, -0.6348]) tensor([0.1613, 0.3388, 0.3820, 0.1179]) -Greedy action tensor([ 1.0878, -0.1916, -0.1288, 0.2158]) tensor([0.5019, 0.1396, 0.1487, 0.2098]) -Greedy action tensor([ 0.4697, 0.2160, -0.2628, -0.3920]) tensor([0.3733, 0.2896, 0.1794, 0.1577]) -Greedy action tensor([-1.1519, -0.9981, 1.4453, -0.6298]) tensor([0.0579, 0.0675, 0.7771, 0.0976]) -Greedy action tensor([-0.5335, -0.2015, -0.4458, -0.1375]) tensor([0.2011, 0.2804, 0.2196, 0.2989]) -Greedy action tensor([-0.7787, 0.1478, -0.2842, -0.6933]) tensor([0.1599, 0.4038, 0.2622, 0.1741]) -Greedy action tensor([-0.5420, -1.3860, -0.3303, 0.6875]) tensor([0.1643, 0.0707, 0.2031, 0.5619]) -Greedy action tensor([-1.5215, -0.5848, -0.3304, -0.5095]) tensor([0.1042, 0.2660, 0.3430, 0.2868]) -Greedy action tensor([ 0.4400, -1.1122, 0.2160, 1.2838]) tensor([0.2306, 0.0488, 0.1843, 0.5362]) -Greedy action tensor([1.2578, 0.0993, 0.3344, 0.5676]) tensor([0.4520, 0.1419, 0.1795, 0.2266]) -Greedy action tensor([-0.0944, -0.0902, -0.4229, -0.1770]) tensor([0.2744, 0.2755, 0.1975, 0.2526]) -Greedy action tensor([ 0.3360, -0.7899, 0.4912, 0.4325]) tensor([0.2783, 0.0903, 0.3250, 0.3065]) -Greedy action tensor([ 1.1419, -0.6973, 0.1884, 0.5153]) tensor([0.4811, 0.0765, 0.1854, 0.2571]) -Greedy action tensor([-0.0925, 0.4743, 0.0769, -0.5134]) tensor([0.2172, 0.3829, 0.2573, 0.1426]) -Greedy action tensor([1.3911, 0.1709, 1.6469, 0.5009]) tensor([0.3336, 0.0985, 0.4309, 0.1370]) -Greedy action tensor([ 0.2676, -0.1987, -0.6094, 0.8168]) tensor([0.2649, 0.1662, 0.1102, 0.4587]) -Greedy action tensor([ 1.0597, 0.7052, 0.0230, -0.8638]) tensor([0.4541, 0.3185, 0.1610, 0.0663]) -Greedy action tensor([ 0.7051, -0.3803, 0.1892, 1.2115]) tensor([0.2782, 0.0940, 0.1661, 0.4617]) -Greedy action tensor([ 0.3099, -0.8301, -0.7925, 0.0022]) tensor([0.4189, 0.1340, 0.1391, 0.3080]) -Greedy action tensor([ 0.4438, -0.0330, -0.5897, 0.9063]) tensor([0.2805, 0.1741, 0.0998, 0.4455]) -Greedy action tensor([-0.4107, 0.7677, 0.2089, -0.0465]) tensor([0.1325, 0.4305, 0.2462, 0.1907]) -Greedy action tensor([-1.2539, -0.6510, -0.7344, 0.2152]) tensor([0.1129, 0.2064, 0.1899, 0.4908]) -Greedy action tensor([-0.8643, -0.4638, -0.6797, 0.5434]) tensor([0.1285, 0.1918, 0.1546, 0.5251]) -Greedy action tensor([-0.9509, -1.0697, 0.5978, -0.2995]) tensor([0.1175, 0.1043, 0.5528, 0.2254]) -Greedy action tensor([ 0.0914, -0.5440, -0.1160, 0.7266]) tensor([0.2364, 0.1252, 0.1921, 0.4462]) -Greedy action tensor([ 0.6676, -0.5547, -0.2502, 0.1901]) tensor([0.4321, 0.1273, 0.1726, 0.2681]) -Greedy action tensor([ 1.0027, 0.5593, -0.1660, -0.4053]) tensor([0.4551, 0.2921, 0.1414, 0.1113]) -Greedy action tensor([ 0.9606, -0.7337, 0.0545, -0.2962]) tensor([0.5341, 0.0981, 0.2158, 0.1520]) -Greedy action tensor([ 0.7909, -0.2960, -0.0472, -0.1351]) tensor([0.4617, 0.1557, 0.1997, 0.1829]) -Greedy action tensor([ 0.6887, -0.3026, -0.0054, -0.0462]) tensor([0.4255, 0.1579, 0.2125, 0.2041]) -Greedy action tensor([ 0.9467, -0.2977, 0.0522, -0.4702]) tensor([0.5156, 0.1486, 0.2108, 0.1250]) -Greedy action tensor([ 0.8459, -0.2161, 0.0061, -0.1184]) tensor([0.4632, 0.1602, 0.2000, 0.1766]) -Greedy action tensor([ 0.7779, -0.3871, -0.0352, -0.2181]) tensor([0.4706, 0.1468, 0.2087, 0.1738]) -Greedy action tensor([ 0.9447, -0.5490, 0.0783, -0.1642]) tensor([0.5064, 0.1137, 0.2129, 0.1670]) -Greedy action tensor([ 1.0417, -0.7280, -0.0196, -0.4313]) tensor([0.5729, 0.0976, 0.1982, 0.1313]) -Greedy action tensor([ 0.5582, -0.4329, 0.0098, -0.4492]) tensor([0.4321, 0.1604, 0.2497, 0.1578]) -Greedy action tensor([ 0.7996, -0.5213, -0.0856, -0.4358]) tensor([0.5075, 0.1355, 0.2094, 0.1476]) -Greedy action tensor([ 0.3852, -0.0375, -0.0447, -0.1006]) tensor([0.3424, 0.2243, 0.2227, 0.2106]) -Greedy action tensor([ 0.9388, -0.5535, 0.1424, -0.4797]) tensor([0.5214, 0.1172, 0.2351, 0.1262]) -Greedy action tensor([ 0.7235, -0.3141, 0.0997, -0.2702]) tensor([0.4424, 0.1567, 0.2371, 0.1638]) -Greedy action tensor([ 0.4543, -0.1397, -0.1253, -0.2932]) tensor([0.3867, 0.2135, 0.2166, 0.1831]) -Greedy action tensor([ 0.3045, -0.1340, -0.0832, -0.1343]) tensor([0.3369, 0.2173, 0.2286, 0.2172]) -Greedy action tensor([ 0.7950, -0.4290, 0.0029, -0.3668]) tensor([0.4855, 0.1428, 0.2199, 0.1519]) -Greedy action tensor([ 0.5690, -0.1546, -0.0384, -0.0083]) tensor([0.3859, 0.1872, 0.2102, 0.2167]) -Greedy action tensor([ 0.2989, 0.0192, -0.1311, 0.0062]) tensor([0.3172, 0.2398, 0.2063, 0.2367]) -Greedy action tensor([ 0.4952, 0.1733, -0.0712, -0.2415]) tensor([0.3609, 0.2616, 0.2048, 0.1727]) -Greedy action tensor([ 0.6337, -0.4267, 0.0204, -0.2942]) tensor([0.4380, 0.1517, 0.2372, 0.1732]) -Greedy action tensor([ 0.7967, -0.7385, 0.0509, -0.3333]) tensor([0.4968, 0.1070, 0.2357, 0.1605]) -Greedy action tensor([ 0.6875, -0.5582, -0.0486, -0.2240]) tensor([0.4611, 0.1327, 0.2209, 0.1853]) -Greedy action tensor([ 0.8429, -0.4669, -0.3033, -0.3843]) tensor([0.5317, 0.1435, 0.1690, 0.1558]) -Greedy action tensor([ 0.5934, -0.5515, -0.1045, -0.2368]) tensor([0.4441, 0.1413, 0.2210, 0.1936]) -Greedy action tensor([ 0.7493, 0.3293, -0.0969, 0.0881]) tensor([0.3843, 0.2525, 0.1649, 0.1984]) -Greedy action tensor([ 1.0214, -0.7882, 0.0250, -0.5378]) tensor([0.5737, 0.0939, 0.2118, 0.1206]) -Greedy action tensor([ 0.9910, -0.5817, -0.3889, -0.6048]) tensor([0.6017, 0.1248, 0.1514, 0.1220]) -Greedy action tensor([ 0.5822, -0.2549, 0.0464, -0.2341]) tensor([0.4065, 0.1760, 0.2379, 0.1797]) -Greedy action tensor([ 0.6074, -0.0560, -0.1183, 0.0030]) tensor([0.3928, 0.2024, 0.1901, 0.2147]) -Greedy action tensor([ 0.4437, 0.2266, -0.1963, 0.0161]) tensor([0.3351, 0.2697, 0.1767, 0.2185]) -Greedy action tensor([ 0.7940, -0.4793, -0.0134, -0.1130]) tensor([0.4695, 0.1314, 0.2094, 0.1896]) -Greedy action tensor([ 1.0412, -0.4562, -0.2491, -0.3511]) tensor([0.5723, 0.1280, 0.1575, 0.1422]) -Greedy action tensor([ 0.3148, -0.0576, 0.0051, -0.0014]) tensor([0.3173, 0.2187, 0.2328, 0.2313]) -Greedy action tensor([ 0.5683, -0.4427, -0.1932, 0.0462]) tensor([0.4125, 0.1501, 0.1926, 0.2447]) -Greedy action tensor([ 0.8660, -0.7055, -0.0186, -0.3836]) tensor([0.5243, 0.1089, 0.2165, 0.1503]) -Greedy action tensor([ 0.6842, -0.1468, -0.0713, -0.1646]) tensor([0.4286, 0.1867, 0.2013, 0.1834]) -Greedy action tensor([ 0.7352, -0.4608, -0.1620, -0.1765]) tensor([0.4735, 0.1432, 0.1930, 0.1903]) -Greedy action tensor([ 0.6273, -0.3789, 0.0372, -0.1778]) tensor([0.4225, 0.1545, 0.2342, 0.1889]) -Greedy action tensor([ 1.0534, -0.6905, -0.0731, -0.3920]) tensor([0.5765, 0.1008, 0.1869, 0.1358]) -Greedy action tensor([ 0.4533, -0.2741, -0.1253, -0.4207]) tensor([0.4063, 0.1963, 0.2278, 0.1695]) -Greedy action tensor([ 0.5624, -0.4145, 0.0440, -0.4560]) tensor([0.4286, 0.1614, 0.2552, 0.1548]) -Greedy action tensor([ 0.3398, 0.0322, -0.0453, -0.1243]) tensor([0.3285, 0.2415, 0.2235, 0.2065]) -Greedy action tensor([ 0.6796, -0.3803, -0.0437, -0.0559]) tensor([0.4327, 0.1499, 0.2099, 0.2074]) -Greedy action tensor([ 0.7039, -0.2180, -0.0163, -0.0411]) tensor([0.4239, 0.1686, 0.2063, 0.2012]) -Greedy action tensor([ 0.7002, -0.1246, 0.0166, -0.2028]) tensor([0.4258, 0.1866, 0.2150, 0.1726]) -Greedy action tensor([ 1.1651, -0.7035, -0.1208, -0.5125]) tensor([0.6182, 0.0954, 0.1709, 0.1155]) -Greedy action tensor([ 0.7141, 0.0093, -0.0841, -0.2895]) tensor([0.4327, 0.2139, 0.1948, 0.1586]) -Greedy action tensor([ 0.6146, -0.3585, 0.0232, -0.3712]) tensor([0.4339, 0.1640, 0.2402, 0.1619]) -Greedy action tensor([ 0.4023, -0.3831, -0.1860, -0.0621]) tensor([0.3788, 0.1727, 0.2104, 0.2381]) -Greedy action tensor([ 0.8912, -0.4691, -0.0639, -0.4942]) tensor([0.5287, 0.1356, 0.2034, 0.1323]) -Greedy action tensor([ 0.5588, -0.2084, -0.0151, -0.0484]) tensor([0.3887, 0.1805, 0.2190, 0.2118]) -Greedy action tensor([ 0.8160, 0.2038, -0.0447, -0.1115]) tensor([0.4236, 0.2297, 0.1791, 0.1676]) -Greedy action tensor([ 0.7044, -0.4493, -0.1030, -0.3260]) tensor([0.4721, 0.1489, 0.2106, 0.1685]) -Greedy action tensor([ 0.6606, -0.3330, -0.1118, -0.0215]) tensor([0.4278, 0.1584, 0.1976, 0.2163]) -Greedy action tensor([ 0.8323, -0.8956, -0.1355, -0.7507]) tensor([0.5672, 0.1008, 0.2155, 0.1165]) -Greedy action tensor([ 0.7455, -0.4980, -0.0861, -0.2397]) tensor([0.4768, 0.1375, 0.2076, 0.1780]) -Greedy action tensor([ 0.5318, -0.5070, 0.0446, -0.3771]) tensor([0.4217, 0.1492, 0.2591, 0.1699]) -Greedy action tensor([ 0.3805, -0.4334, -0.2573, -0.4689]) tensor([0.4168, 0.1847, 0.2203, 0.1783]) -Greedy action tensor([ 1.0009, -0.7441, -0.1602, -0.2477]) tensor([0.5635, 0.0984, 0.1764, 0.1617]) -Greedy action tensor([ 0.7979, -0.4516, -0.0394, -0.0737]) tensor([0.4678, 0.1341, 0.2025, 0.1957]) -Greedy action tensor([ 0.5820, -0.4306, -0.1471, -0.4556]) tensor([0.4546, 0.1651, 0.2193, 0.1611]) -Greedy action tensor([ 0.6513, -0.0038, -0.3738, -0.1192]) tensor([0.4272, 0.2219, 0.1533, 0.1977]) -Greedy action tensor([ 0.7163, -0.0870, 0.0256, -0.1551]) tensor([0.4224, 0.1892, 0.2117, 0.1767]) -Greedy action tensor([ 0.3784, -0.0420, 0.0469, 0.0372]) tensor([0.3241, 0.2129, 0.2326, 0.2304]) -Greedy action tensor([ 0.9949, -0.6705, -0.0292, -0.4926]) tensor([0.5636, 0.1066, 0.2024, 0.1273]) -Greedy action tensor([ 0.7746, -0.3174, -0.1436, -0.4263]) tensor([0.4912, 0.1648, 0.1961, 0.1478]) -Greedy action tensor([ 0.6949, -0.4683, -0.1170, -0.2520]) tensor([0.4663, 0.1457, 0.2071, 0.1809]) -Greedy action tensor([ 0.9062, 0.1390, -0.1678, -0.2668]) tensor([0.4727, 0.2195, 0.1615, 0.1463]) -Greedy action tensor([ 0.6702, -0.2062, 0.1075, -0.5409]) tensor([0.4379, 0.1823, 0.2494, 0.1304]) -Greedy action tensor([ 0.4305, -0.2209, -0.0649, -0.1090]) tensor([0.3685, 0.1921, 0.2245, 0.2148]) -Greedy action tensor([ 0.5351, -0.0328, -0.1247, -0.0241]) tensor([0.3766, 0.2134, 0.1947, 0.2153]) -Greedy action tensor([ 0.3601, -0.2873, -0.1111, -0.3119]) tensor([0.3762, 0.1969, 0.2348, 0.1921]) -Greedy action tensor([ 0.4282, 0.0925, -0.0818, -0.3652]) tensor([0.3613, 0.2583, 0.2170, 0.1634]) -Greedy action tensor([ 0.7420, 0.2303, 0.0220, -0.2113]) tensor([0.4046, 0.2425, 0.1969, 0.1560]) -Greedy action tensor([ 0.7977, -0.2798, -0.0452, -0.1251]) tensor([0.4612, 0.1570, 0.1985, 0.1833]) -Greedy action tensor([ 0.7042, -0.4327, -0.0538, -0.1999]) tensor([0.4557, 0.1462, 0.2135, 0.1845]) -Greedy action tensor([ 0.7461, -0.3164, -0.0913, -0.0497]) tensor([0.4485, 0.1550, 0.1941, 0.2024]) -Greedy action tensor([ 0.8029, -0.6955, -0.0407, -0.5632]) tensor([0.5239, 0.1171, 0.2254, 0.1337]) -Greedy action tensor([ 0.5022, 0.0973, -0.1086, -0.0071]) tensor([0.3558, 0.2373, 0.1931, 0.2138]) -Greedy action tensor([ 0.8350, -0.5497, 0.0830, -0.3244]) tensor([0.4913, 0.1230, 0.2316, 0.1541]) -Greedy action tensor([ 1.9128, -1.1389, -0.1208, 0.3376]) tensor([0.7220, 0.0341, 0.0945, 0.1494]) -Greedy action tensor([ 0.9810, -0.6360, -0.3124, 0.2867]) tensor([0.5070, 0.1006, 0.1391, 0.2532]) -Greedy action tensor([ 0.8013, -0.2727, -0.2672, 0.2092]) tensor([0.4468, 0.1526, 0.1535, 0.2471]) -Greedy action tensor([ 1.3890, -0.4810, -0.3804, 0.3598]) tensor([0.5946, 0.0916, 0.1013, 0.2124]) -Greedy action tensor([ 1.7062, -0.0996, -0.4508, 0.3312]) tensor([0.6524, 0.1072, 0.0755, 0.1649]) -Greedy action tensor([ 1.7162, -0.5902, -0.3346, 0.3225]) tensor([0.6773, 0.0675, 0.0871, 0.1681]) -Greedy action tensor([ 2.0856, -0.3446, -0.6177, 0.7618]) tensor([0.7037, 0.0619, 0.0471, 0.1873]) -Greedy action tensor([ 1.4694, -0.3415, -0.5145, 0.5816]) tensor([0.5839, 0.0955, 0.0803, 0.2403]) -Greedy action tensor([ 2.4790, -1.1184, -0.4739, 0.4414]) tensor([0.8265, 0.0226, 0.0431, 0.1077]) -Greedy action tensor([ 1.4383, -0.3474, -0.2732, 0.4044]) tensor([0.5869, 0.0984, 0.1060, 0.2087]) -Greedy action tensor([ 1.5244, -0.7955, -0.4051, 0.7009]) tensor([0.5944, 0.0584, 0.0863, 0.2609]) -Greedy action tensor([ 1.2080, -0.0540, -0.2906, 0.4862]) tensor([0.5019, 0.1421, 0.1121, 0.2439]) -Greedy action tensor([ 1.2052, -0.4603, -0.8245, 0.4222]) tensor([0.5626, 0.1064, 0.0739, 0.2571]) -Greedy action tensor([ 1.5001, -0.3026, -0.4728, 0.4010]) tensor([0.6108, 0.1007, 0.0849, 0.2035]) -Greedy action tensor([ 1.2691, -0.6235, -0.4165, 0.3918]) tensor([0.5708, 0.0860, 0.1058, 0.2374]) -Greedy action tensor([ 1.6573, 0.0453, -0.7637, 0.3148]) tensor([0.6454, 0.1287, 0.0573, 0.1686]) -Greedy action tensor([ 1.3448, -0.0986, -0.7576, -0.1798]) tensor([0.6345, 0.1498, 0.0775, 0.1381]) -Greedy action tensor([ 1.3663, -0.0995, -0.4435, 0.0252]) tensor([0.6038, 0.1394, 0.0988, 0.1579]) -Greedy action tensor([ 0.9522, -0.6639, 0.1259, 0.2743]) tensor([0.4664, 0.0927, 0.2041, 0.2368]) -Greedy action tensor([ 1.2328, -0.4051, -0.5763, 0.6382]) tensor([0.5236, 0.1018, 0.0858, 0.2889]) -Greedy action tensor([ 2.1394, -0.5454, -0.7512, 0.5942]) tensor([0.7479, 0.0510, 0.0415, 0.1595]) -Greedy action tensor([ 1.2328, 0.1533, -0.3544, 0.2943]) tensor([0.5167, 0.1755, 0.1057, 0.2021]) -Greedy action tensor([ 1.7265, -0.7260, -0.1413, 0.2453]) tensor([0.6812, 0.0586, 0.1052, 0.1549]) -Greedy action tensor([ 1.2868, -0.6252, -0.2456, 0.4076]) tensor([0.5621, 0.0831, 0.1214, 0.2334]) -Greedy action tensor([ 1.4676, -0.3800, -0.5319, 0.4804]) tensor([0.6004, 0.0946, 0.0813, 0.2237]) -Greedy action tensor([ 1.7200, -0.6028, -0.0322, 0.3142]) tensor([0.6594, 0.0646, 0.1143, 0.1617]) -Greedy action tensor([ 1.1417, -0.1303, -0.5205, 0.4859]) tensor([0.5028, 0.1409, 0.0954, 0.2609]) -Greedy action tensor([ 1.4425, -1.0312, -0.2309, 0.3566]) tensor([0.6213, 0.0524, 0.1166, 0.2098]) -Greedy action tensor([ 2.3140, -0.8971, -0.3410, 0.2520]) tensor([0.8079, 0.0326, 0.0568, 0.1028]) -Greedy action tensor([ 1.8885, -0.5929, -0.4573, 0.4445]) tensor([0.7065, 0.0591, 0.0677, 0.1667]) -Greedy action tensor([ 1.6561, -0.9281, -0.2850, 0.0885]) tensor([0.7005, 0.0529, 0.1006, 0.1461]) -Greedy action tensor([ 1.6782, -0.7012, -0.3415, 0.5470]) tensor([0.6460, 0.0598, 0.0857, 0.2084]) -Greedy action tensor([ 1.8734, -1.2070, -0.2682, 0.3407]) tensor([0.7250, 0.0333, 0.0852, 0.1566]) -Greedy action tensor([ 1.0277, 0.3662, -0.5236, 0.1449]) tensor([0.4669, 0.2410, 0.0990, 0.1931]) -Greedy action tensor([ 1.3199, -0.8803, -0.0949, 0.1466]) tensor([0.6013, 0.0666, 0.1461, 0.1860]) -Greedy action tensor([ 0.9319, -0.1657, -0.9934, 0.5367]) tensor([0.4645, 0.1550, 0.0677, 0.3128]) -Greedy action tensor([ 1.2775, -0.5074, -0.2824, 0.7229]) tensor([0.5122, 0.0860, 0.1076, 0.2942]) -Greedy action tensor([ 1.8485, -0.6511, -0.2495, 0.4428]) tensor([0.6896, 0.0566, 0.0846, 0.1691]) -Greedy action tensor([ 1.6648, -0.0873, -0.8667, 0.3576]) tensor([0.6564, 0.1138, 0.0522, 0.1776]) -Greedy action tensor([ 2.0464, -0.9147, -0.5196, 0.8178]) tensor([0.7036, 0.0364, 0.0541, 0.2059]) -Greedy action tensor([ 0.7608, -0.3963, 0.1288, 0.1701]) tensor([0.4167, 0.1310, 0.2215, 0.2308]) -Greedy action tensor([ 1.5750, -0.1721, -0.3437, 0.3606]) tensor([0.6181, 0.1077, 0.0907, 0.1835]) -Greedy action tensor([ 1.2582, -0.6685, -0.4647, 0.2749]) tensor([0.5888, 0.0858, 0.1051, 0.2203]) -Greedy action tensor([ 1.3185, -0.4206, -0.7907, 0.0602]) tensor([0.6325, 0.1111, 0.0767, 0.1797]) -Greedy action tensor([ 1.5175, -0.3176, -0.3152, 0.7017]) tensor([0.5676, 0.0906, 0.0908, 0.2510]) -Greedy action tensor([ 1.9001, -0.2541, -0.5491, 0.4917]) tensor([0.6911, 0.0802, 0.0597, 0.1690]) -Greedy action tensor([ 1.6734, -0.0285, -0.2823, -0.1145]) tensor([0.6706, 0.1223, 0.0949, 0.1122]) -Greedy action tensor([ 2.1894, -0.8516, -0.1134, 0.7604]) tensor([0.7208, 0.0344, 0.0721, 0.1727]) -Greedy action tensor([ 1.5197, -0.9283, 0.1670, -0.1320]) tensor([0.6507, 0.0563, 0.1682, 0.1248]) -Greedy action tensor([ 1.5623, -0.0146, -0.9769, 0.7599]) tensor([0.5768, 0.1192, 0.0455, 0.2585]) -Greedy action tensor([ 1.4623, -0.3843, -0.1428, 0.2750]) tensor([0.6011, 0.0948, 0.1207, 0.1834]) -Greedy action tensor([ 1.7822, -0.8639, -0.2508, 0.2435]) tensor([0.7059, 0.0501, 0.0924, 0.1515]) -Greedy action tensor([ 1.5017, -0.4355, -0.4152, 0.7307]) tensor([0.5702, 0.0822, 0.0839, 0.2638]) -Greedy action tensor([ 1.9013, -0.2455, -0.5976, 0.3111]) tensor([0.7128, 0.0833, 0.0586, 0.1453]) -Greedy action tensor([ 1.7279, -0.1944, -0.0846, 0.3087]) tensor([0.6446, 0.0943, 0.1052, 0.1559]) -Greedy action tensor([ 1.2960, -0.6912, -0.2184, 0.3601]) tensor([0.5717, 0.0784, 0.1257, 0.2242]) -Greedy action tensor([ 1.0487, -0.6075, -0.2071, -0.0379]) tensor([0.5515, 0.1053, 0.1571, 0.1861]) -Greedy action tensor([ 1.3728, -0.3510, -0.5859, 0.5099]) tensor([0.5743, 0.1024, 0.0810, 0.2423]) -Greedy action tensor([ 0.9471, -0.2483, -0.0231, 0.1587]) tensor([0.4681, 0.1417, 0.1774, 0.2128]) -Greedy action tensor([ 1.1798, -0.1850, -0.6192, 0.3531]) tensor([0.5381, 0.1375, 0.0890, 0.2354]) -Greedy action tensor([ 1.7120, 0.0739, -0.3365, 0.5343]) tensor([0.6130, 0.1191, 0.0790, 0.1888]) -Greedy action tensor([ 1.2726, -0.4744, -0.5855, -0.1661]) tensor([0.6379, 0.1112, 0.0995, 0.1514]) -Greedy action tensor([ 1.9097, -1.4153, 0.0292, 0.1454]) tensor([0.7354, 0.0265, 0.1122, 0.1260]) -Greedy action tensor([ 1.4327, -0.5539, -0.2458, 0.3160]) tensor([0.6056, 0.0831, 0.1131, 0.1983]) -Greedy action tensor([ 1.1253, -0.0214, -0.4483, -0.0506]) tensor([0.5454, 0.1733, 0.1131, 0.1683]) -Greedy action tensor([ 2.1033, 0.6383, 0.5256, -0.3224]) tensor([0.6553, 0.1514, 0.1353, 0.0579]) -Greedy action tensor([ 1.6961, -0.2881, -0.5323, 0.4190]) tensor([0.6561, 0.0902, 0.0707, 0.1830]) -Greedy action tensor([ 1.0340, -0.4876, 0.0514, -0.0261]) tensor([0.5157, 0.1126, 0.1930, 0.1786]) -Greedy action tensor([ 1.4561, -0.3789, -0.3002, 0.2148]) tensor([0.6168, 0.0984, 0.1065, 0.1783]) -Greedy action tensor([ 1.8861, 0.1638, -0.1553, 0.6397]) tensor([0.6266, 0.1119, 0.0814, 0.1801]) -Greedy action tensor([ 1.6706, -0.7521, -0.2112, 0.5076]) tensor([0.6437, 0.0571, 0.0980, 0.2012]) -Greedy action tensor([ 1.7360, -0.5595, -0.1425, 0.2997]) tensor([0.6705, 0.0675, 0.1025, 0.1595]) -Greedy action tensor([ 1.3682, -0.8643, -0.0802, 0.4036]) tensor([0.5803, 0.0622, 0.1363, 0.2212]) -Greedy action tensor([ 1.3159, -0.1192, -0.8343, 0.3473]) tensor([0.5766, 0.1373, 0.0672, 0.2189]) -Greedy action tensor([ 1.2972, -0.3489, -0.7556, 0.3841]) tensor([0.5806, 0.1119, 0.0745, 0.2330]) -Greedy action tensor([ 1.3992, 0.1690, -0.3011, -0.3863]) tensor([0.6088, 0.1779, 0.1112, 0.1021]) -Greedy action tensor([ 1.1565, -0.5154, -0.7556, 0.5316]) tensor([0.5345, 0.1004, 0.0790, 0.2861]) -Greedy action tensor([ 0.8042, -0.3122, 0.1525, -0.3261]) tensor([0.4605, 0.1508, 0.2400, 0.1487]) -Greedy action tensor([ 1.4182, -0.2618, -0.4855, 0.1655]) tensor([0.6169, 0.1150, 0.0919, 0.1763]) -Greedy action tensor([ 1.8249, -0.6411, -0.3654, 0.5941]) tensor([0.6716, 0.0570, 0.0751, 0.1962]) -Greedy action tensor([ 1.4018, 0.0072, -0.8632, 0.3706]) tensor([0.5854, 0.1451, 0.0608, 0.2087]) -Greedy action tensor([-0.8920, 0.3163, 0.1403, 0.0365]) tensor([0.1032, 0.3456, 0.2899, 0.2613]) -Greedy action tensor([-1.6339, -0.3390, 0.6976, 0.1549]) tensor([0.0478, 0.1745, 0.4919, 0.2859]) -Greedy action tensor([-1.8204, -0.3834, 0.5845, -0.1183]) tensor([0.0459, 0.1933, 0.5088, 0.2520]) -Greedy action tensor([-1.0294, 0.1434, 0.3594, 0.2616]) tensor([0.0842, 0.2720, 0.3376, 0.3062]) -Greedy action tensor([-1.9004, -0.3217, 0.6288, -0.1403]) tensor([0.0413, 0.2003, 0.5182, 0.2402]) -Greedy action tensor([-1.7790, -0.4817, 0.5758, -0.0882]) tensor([0.0485, 0.1775, 0.5110, 0.2630]) -Greedy action tensor([-1.7781, -0.2918, 0.6052, -0.0668]) tensor([0.0459, 0.2028, 0.4973, 0.2540]) -Greedy action tensor([-1.8168, -0.4380, 0.6437, -0.0904]) tensor([0.0448, 0.1780, 0.5251, 0.2520]) -Greedy action tensor([-0.8133, 0.5102, -0.1456, -0.5074]) tensor([0.1240, 0.4658, 0.2418, 0.1684]) -Greedy action tensor([-1.9168, -0.4600, 0.6602, -0.1515]) tensor([0.0412, 0.1767, 0.5416, 0.2405]) -Greedy action tensor([-1.7334, -0.3890, 0.5747, -0.0424]) tensor([0.0492, 0.1888, 0.4950, 0.2670]) -Greedy action tensor([-1.7261, -0.2576, 0.5300, -0.0806]) tensor([0.0498, 0.2164, 0.4756, 0.2583]) -Greedy action tensor([-1.4793, 0.5450, 0.3037, 0.1851]) tensor([0.0505, 0.3823, 0.3004, 0.2668]) -Greedy action tensor([-1.9407, -0.4482, 0.6669, -0.1770]) tensor([0.0402, 0.1790, 0.5460, 0.2348]) -Greedy action tensor([-1.8253, -0.4002, 0.6037, -0.1453]) tensor([0.0457, 0.1901, 0.5188, 0.2453]) -Greedy action tensor([-1.8038, -0.4620, 0.5941, -0.1131]) tensor([0.0471, 0.1800, 0.5177, 0.2552]) -Greedy action tensor([-0.4089, 0.6876, 0.1191, 0.5110]) tensor([0.1220, 0.3652, 0.2068, 0.3061]) -Greedy action tensor([-1.9062, -0.4354, 0.6476, -0.1612]) tensor([0.0418, 0.1819, 0.5371, 0.2392]) -Greedy action tensor([-1.9263, -0.4332, 0.6592, -0.1652]) tensor([0.0408, 0.1814, 0.5407, 0.2371]) -Greedy action tensor([-0.8183, 0.2661, -0.1385, -0.0371]) tensor([0.1232, 0.3645, 0.2432, 0.2691]) -Greedy action tensor([-1.9078, -0.4443, 0.6503, -0.1618]) tensor([0.0417, 0.1803, 0.5388, 0.2392]) -Greedy action tensor([-1.8067, -0.3615, 0.6440, -0.0882]) tensor([0.0446, 0.1893, 0.5173, 0.2488]) -Greedy action tensor([-1.2202, 0.4384, 0.3423, -0.1776]) tensor([0.0722, 0.3789, 0.3442, 0.2047]) -Greedy action tensor([-1.9277, -0.4476, 0.6584, -0.1724]) tensor([0.0409, 0.1796, 0.5429, 0.2366]) -Greedy action tensor([-1.8848, -0.3679, 0.6261, -0.1573]) tensor([0.0426, 0.1940, 0.5241, 0.2394]) -Greedy action tensor([-1.8574, -0.3430, 0.6200, -0.1058]) tensor([0.0431, 0.1958, 0.5129, 0.2482]) -Greedy action tensor([-1.9223, -0.4301, 0.6527, -0.1707]) tensor([0.0411, 0.1827, 0.5394, 0.2368]) -Greedy action tensor([-1.3381, -0.1223, 0.6937, 0.6306]) tensor([0.0522, 0.1760, 0.3981, 0.3737]) -Greedy action tensor([-1.5803, -0.3079, 0.4520, -0.0025]) tensor([0.0587, 0.2094, 0.4477, 0.2842]) -Greedy action tensor([-1.8927, -0.4053, 0.6414, -0.1512]) tensor([0.0421, 0.1864, 0.5310, 0.2404]) -Greedy action tensor([-1.9132, -0.4408, 0.6507, -0.1638]) tensor([0.0415, 0.1809, 0.5389, 0.2387]) -Greedy action tensor([-1.7078, -0.4768, 0.6810, 0.1673]) tensor([0.0458, 0.1568, 0.4990, 0.2985]) -Greedy action tensor([-0.1826, -0.2715, 0.9494, 1.5888]) tensor([0.0918, 0.0840, 0.2847, 0.5396]) -Greedy action tensor([-1.5462, -0.5459, 0.4689, -0.0178]) tensor([0.0632, 0.1718, 0.4738, 0.2913]) -Greedy action tensor([-1.9140, -0.4064, 0.6494, -0.1606]) tensor([0.0412, 0.1861, 0.5348, 0.2379]) -Greedy action tensor([-1.8411, -0.4019, 0.6428, -0.1075]) tensor([0.0437, 0.1844, 0.5243, 0.2476]) -Greedy action tensor([-1.7588, 0.0691, 0.4988, -0.0773]) tensor([0.0451, 0.2808, 0.4315, 0.2425]) -Greedy action tensor([-1.7342, -0.3907, 0.5698, -0.0495]) tensor([0.0494, 0.1894, 0.4948, 0.2664]) -Greedy action tensor([-1.8025, -0.4733, 0.6491, -0.1425]) tensor([0.0462, 0.1745, 0.5363, 0.2430]) -Greedy action tensor([-1.2925, -0.4876, 0.2738, 0.2844]) tensor([0.0777, 0.1738, 0.3722, 0.3762]) -Greedy action tensor([-1.3545, -0.5158, 0.4432, 0.3955]) tensor([0.0662, 0.1532, 0.3996, 0.3810]) -Greedy action tensor([-1.5306, 0.0353, 0.4254, -0.0190]) tensor([0.0575, 0.2752, 0.4066, 0.2607]) -Greedy action tensor([-1.8322, -0.3252, 0.5979, -0.1218]) tensor([0.0446, 0.2014, 0.5071, 0.2469]) -Greedy action tensor([-1.8721, -0.4364, 0.6423, -0.1141]) tensor([0.0428, 0.1799, 0.5290, 0.2483]) -Greedy action tensor([-1.8082, -0.2358, 0.5724, -0.1085]) tensor([0.0452, 0.2180, 0.4892, 0.2476]) -Greedy action tensor([-1.7584, -0.3135, 0.5607, -0.1089]) tensor([0.0485, 0.2058, 0.4932, 0.2525]) -Greedy action tensor([-1.8059, -0.0371, 0.5339, -0.0654]) tensor([0.0436, 0.2556, 0.4524, 0.2484]) -Greedy action tensor([-1.9085, -0.4203, 0.6443, -0.1642]) tensor([0.0417, 0.1846, 0.5353, 0.2385]) -Greedy action tensor([-0.7234, -0.0705, 0.2215, -0.1387]) tensor([0.1372, 0.2636, 0.3530, 0.2462]) -Greedy action tensor([-1.9137, -0.4389, 0.6497, -0.1660]) tensor([0.0415, 0.1814, 0.5388, 0.2383]) -Greedy action tensor([-1.8496, -0.3626, 0.6409, -0.1034]) tensor([0.0431, 0.1905, 0.5196, 0.2469]) -Greedy action tensor([-1.9163, -0.4455, 0.6539, -0.1640]) tensor([0.0413, 0.1799, 0.5403, 0.2385]) -Greedy action tensor([-1.7830, -0.3402, 0.5972, -0.0760]) tensor([0.0464, 0.1964, 0.5014, 0.2558]) -Greedy action tensor([-1.6835, -0.4915, 0.5459, -0.0092]) tensor([0.0528, 0.1741, 0.4912, 0.2819]) -Greedy action tensor([-1.8966, -0.3687, 0.6401, -0.1591]) tensor([0.0418, 0.1926, 0.5281, 0.2375]) -Greedy action tensor([-1.8422, -0.3800, 0.6032, -0.1288]) tensor([0.0446, 0.1927, 0.5150, 0.2477]) -Greedy action tensor([-1.9036, -0.4514, 0.6616, -0.1520]) tensor([0.0416, 0.1777, 0.5409, 0.2398]) -Greedy action tensor([-1.9089, -0.3990, 0.6501, -0.1596]) tensor([0.0413, 0.1870, 0.5340, 0.2376]) -Greedy action tensor([-1.9418, -0.4563, 0.6658, -0.1787]) tensor([0.0403, 0.1780, 0.5467, 0.2350]) -Greedy action tensor([-1.5220, -0.3945, 0.4263, 0.0532]) tensor([0.0628, 0.1938, 0.4403, 0.3032]) -Greedy action tensor([-1.9378, -0.4472, 0.6690, -0.1758]) tensor([0.0403, 0.1789, 0.5462, 0.2347]) -Greedy action tensor([-1.6223, -0.5354, 0.4911, -0.0657]) tensor([0.0589, 0.1746, 0.4873, 0.2792]) -Greedy action tensor([-1.5998, -0.3254, 0.4708, -0.0607]) tensor([0.0583, 0.2083, 0.4619, 0.2715]) -Greedy action tensor([-1.2211, -0.4063, 0.2401, 0.1975]) tensor([0.0855, 0.1930, 0.3684, 0.3531]) -Greedy action tensor([-1.6160, -0.4638, 0.5666, 0.0820]) tensor([0.0541, 0.1711, 0.4795, 0.2953]) -Greedy action tensor([-1.3845, -0.1298, 0.4060, -0.1004]) tensor([0.0709, 0.2485, 0.4247, 0.2559]) -Greedy action tensor([-1.3235, 0.0625, 0.3406, -0.0366]) tensor([0.0719, 0.2877, 0.3799, 0.2605]) -Greedy action tensor([-1.8446, -0.4582, 0.6214, -0.1354]) tensor([0.0448, 0.1794, 0.5280, 0.2477]) -Greedy action tensor([-1.9446, -0.4527, 0.6712, -0.1799]) tensor([0.0401, 0.1781, 0.5479, 0.2339]) -Greedy action tensor([-1.5285, 0.5851, 0.3401, -0.1101]) tensor([0.0503, 0.4162, 0.3258, 0.2077]) -Greedy action tensor([-1.9172, -0.4148, 0.6502, -0.1651]) tensor([0.0412, 0.1850, 0.5365, 0.2374]) -Greedy action tensor([-1.9215, -0.4068, 0.6562, -0.1551]) tensor([0.0407, 0.1851, 0.5360, 0.2381]) -Greedy action tensor([-0.5685, -0.3395, 0.2097, -0.0516]) tensor([0.1636, 0.2057, 0.3563, 0.2744]) -Greedy action tensor([-1.9308, -0.3851, 0.6494, -0.1707]) tensor([0.0405, 0.1899, 0.5343, 0.2353]) -Greedy action tensor([-1.9260, -0.4435, 0.6580, -0.1699]) tensor([0.0409, 0.1802, 0.5421, 0.2369]) -Greedy action tensor([-0.5980, 0.2038, -0.0438, 0.0546]) tensor([0.1451, 0.3236, 0.2526, 0.2787]) -Greedy action tensor([-1.9120, -0.4241, 0.6510, -0.1576]) tensor([0.0414, 0.1831, 0.5365, 0.2390]) -Greedy action tensor([-1.6075, -0.3585, 0.5743, 0.0834]) tensor([0.0533, 0.1857, 0.4721, 0.2889]) -Greedy action tensor([-1.8831, -0.4903, 0.8787, 0.0687]) tensor([0.0358, 0.1443, 0.5674, 0.2524]) -Greedy action tensor([-1.9444, -0.4529, 0.6702, -0.1783]) tensor([0.0401, 0.1781, 0.5475, 0.2343]) -Greedy action tensor([-1.9009, -0.4522, 0.6483, -0.1523]) tensor([0.0420, 0.1789, 0.5377, 0.2414]) -Greedy action tensor([ 0.8531, -0.8483, 0.1041, -0.7717]) tensor([0.5399, 0.0985, 0.2553, 0.1063]) -Greedy action tensor([ 1.0497, -1.0336, 0.0726, -0.6705]) tensor([0.5953, 0.0741, 0.2241, 0.1066]) -Greedy action tensor([ 0.2926, 0.1184, -0.0440, -0.3059]) tensor([0.3222, 0.2707, 0.2301, 0.1771]) -Greedy action tensor([ 0.9701, -0.7653, 0.0348, -0.6269]) tensor([0.5646, 0.0995, 0.2216, 0.1143]) -Greedy action tensor([ 0.5907, -0.3858, -0.0436, -0.2900]) tensor([0.4308, 0.1622, 0.2284, 0.1786]) -Greedy action tensor([ 0.6008, -0.3940, 0.1102, -0.3260]) tensor([0.4205, 0.1555, 0.2575, 0.1665]) -Greedy action tensor([ 0.6477, 0.1026, -0.1278, -0.1414]) tensor([0.4009, 0.2324, 0.1846, 0.1821]) -Greedy action tensor([ 0.9556, -0.5355, -0.1238, -0.7081]) tensor([0.5700, 0.1283, 0.1937, 0.1080]) -Greedy action tensor([ 0.9809, -0.2461, -0.2005, -0.3724]) tensor([0.5381, 0.1578, 0.1651, 0.1390]) -Greedy action tensor([ 0.9219, -0.7358, -0.1556, -0.3839]) tensor([0.5550, 0.1058, 0.1889, 0.1504]) -Greedy action tensor([ 0.7340, -0.5672, -0.0606, -0.5295]) tensor([0.4983, 0.1357, 0.2251, 0.1409]) -Greedy action tensor([ 1.0189, -0.4729, -0.2020, -0.4857]) tensor([0.5740, 0.1291, 0.1693, 0.1275]) -Greedy action tensor([ 1.2744, -0.6794, -0.0403, -0.4112]) tensor([0.6267, 0.0888, 0.1683, 0.1161]) -Greedy action tensor([ 1.2099, -0.4921, 0.0162, -0.3928]) tensor([0.5928, 0.1081, 0.1797, 0.1194]) -Greedy action tensor([ 0.6960, -0.5839, -0.0427, -0.0714]) tensor([0.4505, 0.1253, 0.2152, 0.2091]) -Greedy action tensor([ 0.8651, -0.3985, -0.1562, -0.2567]) tensor([0.5080, 0.1436, 0.1829, 0.1655]) -Greedy action tensor([ 0.6418, -0.1471, 0.0380, -0.0586]) tensor([0.4004, 0.1819, 0.2189, 0.1987]) -Greedy action tensor([ 1.0027, -0.8810, 0.0456, -0.5577]) tensor([0.5727, 0.0871, 0.2199, 0.1203]) -Greedy action tensor([ 0.4382, 0.1280, -0.1330, -0.0005]) tensor([0.3398, 0.2492, 0.1919, 0.2191]) -Greedy action tensor([ 1.1859, -0.6586, 0.0233, -0.7590]) tensor([0.6197, 0.0980, 0.1937, 0.0886]) -Greedy action tensor([ 0.4040, 0.0208, 0.0310, -0.0479]) tensor([0.3326, 0.2267, 0.2290, 0.2117]) -Greedy action tensor([ 0.8363, -0.3614, -0.1529, -0.5794]) tensor([0.5218, 0.1575, 0.1940, 0.1267]) -Greedy action tensor([ 0.7788, -0.2769, 0.0011, -0.2532]) tensor([0.4622, 0.1608, 0.2123, 0.1647]) -Greedy action tensor([ 1.2286, -0.6651, 0.0641, -0.6817]) tensor([0.6209, 0.0934, 0.1938, 0.0919]) -Greedy action tensor([ 0.7517, -1.0968, -0.0742, -0.5857]) tensor([0.5383, 0.0848, 0.2357, 0.1413]) -Greedy action tensor([ 6.5804e-01, -2.3705e-01, 2.8390e-04, -1.8429e-01]) tensor([0.4242, 0.1733, 0.2197, 0.1827]) -Greedy action tensor([ 8.6038e-01, -6.2026e-01, 5.2017e-04, -4.9306e-01]) tensor([0.5238, 0.1192, 0.2217, 0.1353]) -Greedy action tensor([ 0.8511, -0.5153, -0.0709, -0.3873]) tensor([0.5148, 0.1313, 0.2047, 0.1492]) -Greedy action tensor([ 0.2947, 0.3733, -0.1425, -0.0612]) tensor([0.2917, 0.3156, 0.1884, 0.2043]) -Greedy action tensor([ 1.1059, -0.5489, -0.1078, -0.4720]) tensor([0.5901, 0.1128, 0.1753, 0.1218]) -Greedy action tensor([ 0.9913, -0.6123, -0.0792, -0.8283]) tensor([0.5861, 0.1179, 0.2009, 0.0950]) -Greedy action tensor([ 0.7871, -0.4146, -0.1046, -0.1664]) tensor([0.4771, 0.1434, 0.1956, 0.1839]) -Greedy action tensor([ 0.8759, -0.2796, -0.1426, -0.2125]) tensor([0.4968, 0.1564, 0.1794, 0.1673]) -Greedy action tensor([ 0.5672, -0.2302, -0.1303, -0.1348]) tensor([0.4092, 0.1843, 0.2037, 0.2028]) -Greedy action tensor([ 0.5965, -0.2352, -0.1681, -0.5479]) tensor([0.4506, 0.1962, 0.2098, 0.1435]) -Greedy action tensor([ 0.5909, -0.6032, -0.1369, -0.0426]) tensor([0.4317, 0.1308, 0.2085, 0.2291]) -Greedy action tensor([ 0.8766, -0.6429, 0.0057, -0.5849]) tensor([0.5350, 0.1171, 0.2239, 0.1241]) -Greedy action tensor([ 0.7973, -0.6141, -0.0609, -0.3908]) tensor([0.5070, 0.1236, 0.2149, 0.1545]) -Greedy action tensor([ 0.7472, -0.3764, 0.0025, -0.2986]) tensor([0.4648, 0.1511, 0.2207, 0.1633]) -Greedy action tensor([ 0.5321, -0.3373, -0.2396, -0.0610]) tensor([0.4108, 0.1722, 0.1899, 0.2270]) -Greedy action tensor([ 0.5638, -0.4879, -0.0952, -0.3399]) tensor([0.4402, 0.1538, 0.2277, 0.1783]) -Greedy action tensor([ 0.4296, -0.1291, -0.0289, -0.2599]) tensor([0.3695, 0.2114, 0.2336, 0.1854]) -Greedy action tensor([ 0.3628, -0.2744, -0.0614, -0.1386]) tensor([0.3586, 0.1896, 0.2346, 0.2172]) -Greedy action tensor([ 0.6607, -0.5932, -0.1741, -0.2149]) tensor([0.4682, 0.1336, 0.2032, 0.1950]) -Greedy action tensor([ 0.7680, -0.3302, -0.1176, -0.0966]) tensor([0.4614, 0.1539, 0.1903, 0.1944]) -Greedy action tensor([ 0.7018, -0.4365, -0.0435, -0.2635]) tensor([0.4596, 0.1472, 0.2181, 0.1750]) -Greedy action tensor([ 0.7045, -0.5952, -0.0057, -0.2343]) tensor([0.4640, 0.1265, 0.2281, 0.1815]) -Greedy action tensor([ 0.7988, -0.5007, -0.0973, -0.6972]) tensor([0.5250, 0.1431, 0.2143, 0.1176]) -Greedy action tensor([ 0.6142, -0.4893, -0.0407, -0.5757]) tensor([0.4639, 0.1539, 0.2410, 0.1412]) -Greedy action tensor([ 1.1232, -0.7642, -0.1790, -0.6913]) tensor([0.6304, 0.0955, 0.1714, 0.1027]) -Greedy action tensor([ 0.6182, -0.5493, -0.1318, -0.1302]) tensor([0.4431, 0.1379, 0.2093, 0.2097]) -Greedy action tensor([ 0.5890, -0.4973, -0.1993, -0.2094]) tensor([0.4460, 0.1505, 0.2028, 0.2007]) -Greedy action tensor([ 1.0589, -0.2874, 0.0695, 0.0079]) tensor([0.5046, 0.1313, 0.1876, 0.1764]) -Greedy action tensor([ 1.0297, -0.6448, -0.0319, -0.3481]) tensor([0.5601, 0.1050, 0.1937, 0.1412]) -Greedy action tensor([ 0.8480, -0.7072, -0.0769, -0.3695]) tensor([0.5253, 0.1109, 0.2083, 0.1555]) -Greedy action tensor([ 0.7152, -0.3525, 0.0694, -0.3446]) tensor([0.4516, 0.1552, 0.2367, 0.1565]) -Greedy action tensor([ 0.6995, -0.3030, -0.0290, -0.1576]) tensor([0.4398, 0.1614, 0.2122, 0.1866]) -Greedy action tensor([ 0.2642, 0.0510, -0.0373, -0.0987]) tensor([0.3083, 0.2491, 0.2281, 0.2145]) -Greedy action tensor([ 0.6250, -0.1553, 0.0197, -0.2605]) tensor([0.4138, 0.1896, 0.2259, 0.1707]) -Greedy action tensor([ 0.5558, -0.7377, -0.1901, -0.2063]) tensor([0.4514, 0.1238, 0.2141, 0.2107]) -Greedy action tensor([ 0.9650, -0.2934, -0.1504, -0.3179]) tensor([0.5293, 0.1504, 0.1735, 0.1468]) -Greedy action tensor([ 0.7843, -0.5466, -0.0776, -0.4091]) tensor([0.5026, 0.1328, 0.2123, 0.1524]) -Greedy action tensor([ 0.9945, -0.6284, 0.1043, -0.5669]) tensor([0.5501, 0.1086, 0.2259, 0.1154]) -Greedy action tensor([ 0.6917, -0.4420, -0.1886, -0.1617]) tensor([0.4624, 0.1488, 0.1918, 0.1970]) -Greedy action tensor([ 0.7850, -0.1828, 0.1008, -0.3151]) tensor([0.4510, 0.1713, 0.2275, 0.1501]) -Greedy action tensor([ 0.7310, -0.6482, -0.1382, -0.5025]) tensor([0.5096, 0.1283, 0.2137, 0.1484]) -Greedy action tensor([ 0.6281, -0.4779, 0.0367, -0.4746]) tensor([0.4512, 0.1493, 0.2498, 0.1498]) -Greedy action tensor([ 0.4457, -0.0790, -0.0461, -0.3243]) tensor([0.3751, 0.2219, 0.2294, 0.1737]) -Greedy action tensor([ 0.7889, -0.1448, -0.1129, 0.0080]) tensor([0.4431, 0.1742, 0.1798, 0.2029]) -Greedy action tensor([ 0.4819, -0.0627, -0.0141, -0.2308]) tensor([0.3732, 0.2165, 0.2273, 0.1830]) -Greedy action tensor([ 0.6038, -0.2406, -0.0055, -0.1689]) tensor([0.4106, 0.1765, 0.2233, 0.1896]) -Greedy action tensor([ 0.2817, -0.0632, 0.0243, -0.3448]) tensor([0.3316, 0.2349, 0.2563, 0.1772]) -Greedy action tensor([ 0.4010, -0.1084, -0.1405, 0.0420]) tensor([0.3471, 0.2086, 0.2020, 0.2424]) -Greedy action tensor([ 0.8694, -0.4022, -0.1383, -0.3120]) tensor([0.5122, 0.1436, 0.1870, 0.1572]) -Greedy action tensor([ 0.5626, -0.6482, -0.0335, -0.2538]) tensor([0.4365, 0.1301, 0.2405, 0.1929]) -Greedy action tensor([ 0.6250, -0.2866, -0.0132, -0.1070]) tensor([0.4147, 0.1667, 0.2191, 0.1995]) -Greedy action tensor([ 0.5127, -0.3308, 0.0649, -0.1115]) tensor([0.3839, 0.1651, 0.2453, 0.2057]) -Greedy action tensor([ 1.4247, -0.8422, -0.0025, -1.2203]) tensor([0.7069, 0.0733, 0.1696, 0.0502]) -Greedy action tensor([ 0.9564, -0.7394, 0.0082, -0.6206]) tensor([0.5626, 0.1032, 0.2180, 0.1162]) -Greedy action tensor([ 0.5595, -0.0275, -0.0329, -0.1008]) tensor([0.3808, 0.2118, 0.2106, 0.1968]) -Greedy action tensor([ 0.8165, -0.5148, -0.0818, -0.2614]) tensor([0.4971, 0.1313, 0.2025, 0.1692]) -Greedy action tensor([ 0.2797, -1.6960, 0.0324, 0.3618]) tensor([0.3328, 0.0461, 0.2599, 0.3612]) -Greedy action tensor([-0.2372, -0.7944, 0.3282, 1.9265]) tensor([0.0831, 0.0476, 0.1462, 0.7231]) -Greedy action tensor([ 1.4432, 0.0293, 0.1870, -0.7959]) tensor([0.6118, 0.1488, 0.1742, 0.0652]) -Greedy action tensor([-0.2506, -0.5244, 1.0782, -0.1215]) tensor([0.1498, 0.1139, 0.5658, 0.1705]) -Greedy action tensor([-0.8938, -0.3977, -0.5127, -0.4311]) tensor([0.1756, 0.2884, 0.2571, 0.2789]) -Greedy action tensor([-0.3387, -0.4219, -0.4519, 0.1266]) tensor([0.2270, 0.2089, 0.2027, 0.3615]) -Greedy action tensor([-1.0756, -0.7986, 0.5176, -1.2319]) tensor([0.1236, 0.1630, 0.6078, 0.1057]) -Greedy action tensor([-0.3192, -0.7308, 0.4623, -0.5103]) tensor([0.2140, 0.1418, 0.4675, 0.1768]) -Greedy action tensor([ 0.8868, -1.7432, 0.4535, 0.6320]) tensor([0.4007, 0.0289, 0.2598, 0.3106]) -Greedy action tensor([-1.5806, -1.3102, 1.1468, 0.2400]) tensor([0.0421, 0.0551, 0.6431, 0.2597]) -Greedy action tensor([ 0.3318, 0.1434, -0.5515, 0.3584]) tensor([0.3059, 0.2534, 0.1265, 0.3142]) -Greedy action tensor([ 0.8413, -0.4555, -0.2314, 0.7850]) tensor([0.3905, 0.1068, 0.1336, 0.3691]) -Greedy action tensor([-0.6705, -0.5174, -0.3783, -0.8687]) tensor([0.2312, 0.2695, 0.3097, 0.1896]) -Greedy action tensor([-0.7647, -0.9634, -0.3393, -0.5877]) tensor([0.2201, 0.1804, 0.3368, 0.2627]) -Greedy action tensor([ 1.0894, -1.1654, -0.1416, 1.2579]) tensor([0.3875, 0.0407, 0.1132, 0.4587]) -Greedy action tensor([-0.7031, -0.3030, 0.1095, -0.7519]) tensor([0.1755, 0.2618, 0.3955, 0.1671]) -Greedy action tensor([ 0.7347, 0.5280, 0.9043, -0.4046]) tensor([0.3014, 0.2451, 0.3571, 0.0965]) -Greedy action tensor([-0.8286, 0.4790, -1.6172, 0.6443]) tensor([0.1051, 0.3886, 0.0478, 0.4585]) -Greedy action tensor([-0.5265, -0.0481, 0.3057, -0.4797]) tensor([0.1678, 0.2707, 0.3856, 0.1758]) -Greedy action tensor([-1.2981, -1.4126, -0.2225, -0.1637]) tensor([0.1261, 0.1124, 0.3696, 0.3920]) -Greedy action tensor([-0.3987, -0.2407, 0.4311, 0.3720]) tensor([0.1509, 0.1768, 0.3461, 0.3262]) -Greedy action tensor([-0.7361, -0.3643, 0.2134, -0.7615]) tensor([0.1664, 0.2413, 0.4300, 0.1622]) -Greedy action tensor([-0.0326, -0.5725, -0.7538, 1.3116]) tensor([0.1694, 0.0987, 0.0823, 0.6496]) -Greedy action tensor([ 1.2388, -1.5389, 1.4711, -0.1119]) tensor([0.3872, 0.0241, 0.4884, 0.1003]) -Greedy action tensor([ 0.0715, 0.0141, 0.1906, -0.3291]) tensor([0.2673, 0.2524, 0.3011, 0.1791]) -Greedy action tensor([ 0.5962, -1.2365, -0.2689, 0.4837]) tensor([0.4041, 0.0647, 0.1701, 0.3611]) -Greedy action tensor([ 0.3121, 0.0102, 1.1886, -0.0751]) tensor([0.2074, 0.1534, 0.4984, 0.1408]) -Greedy action tensor([-0.5554, -0.8227, 0.6510, -0.9083]) tensor([0.1721, 0.1318, 0.5752, 0.1209]) -Greedy action tensor([-0.6623, 0.6120, -0.9126, -0.1096]) tensor([0.1410, 0.5042, 0.1098, 0.2450]) -Greedy action tensor([0.0587, 0.1174, 0.8147, 0.0903]) tensor([0.1915, 0.2031, 0.4078, 0.1976]) -Greedy action tensor([ 0.5465, 0.2031, -0.3798, 0.1431]) tensor([0.3606, 0.2558, 0.1428, 0.2409]) -Greedy action tensor([ 1.0735, -0.4555, -0.3127, 0.1539]) tensor([0.5361, 0.1162, 0.1340, 0.2137]) -Greedy action tensor([ 0.2918, -0.9507, 0.7436, 1.1654]) tensor([0.1903, 0.0549, 0.2990, 0.4558]) -Greedy action tensor([ 0.2923, -0.1578, -0.5254, -0.1619]) tensor([0.3685, 0.2349, 0.1627, 0.2340]) -Greedy action tensor([-0.3117, -1.2843, 0.6894, 0.3690]) tensor([0.1646, 0.0622, 0.4480, 0.3252]) -Greedy action tensor([ 0.1419, -1.5744, 0.1757, 1.0838]) tensor([0.2093, 0.0376, 0.2164, 0.5367]) -Greedy action tensor([ 1.6893, -0.4821, 0.9791, 0.3657]) tensor([0.5343, 0.0609, 0.2626, 0.1422]) -Greedy action tensor([-0.3302, -0.9496, 1.4850, -1.1477]) tensor([0.1231, 0.0663, 0.7562, 0.0544]) -Greedy action tensor([ 0.3760, 0.1149, 0.3106, -0.2006]) tensor([0.3059, 0.2356, 0.2866, 0.1719]) -Greedy action tensor([-0.2959, 0.8313, -0.0013, -0.6216]) tensor([0.1626, 0.5018, 0.2183, 0.1174]) -Greedy action tensor([1.1610, 0.2143, 0.1616, 0.1302]) tensor([0.4733, 0.1837, 0.1742, 0.1688]) -Greedy action tensor([ 0.1125, -0.2004, 0.7669, -0.7229]) tensor([0.2446, 0.1788, 0.4705, 0.1061]) -Greedy action tensor([-0.1694, -1.6650, 0.4786, -0.5663]) tensor([0.2626, 0.0588, 0.5020, 0.1766]) -Greedy action tensor([-0.7069, -1.1371, 0.4603, -0.6222]) tensor([0.1680, 0.1093, 0.5398, 0.1829]) -Greedy action tensor([-0.8789, -0.8272, -0.1886, -1.2676]) tensor([0.2116, 0.2228, 0.4221, 0.1435]) -Greedy action tensor([-0.4273, -1.0862, -0.1627, 0.2596]) tensor([0.2080, 0.1076, 0.2710, 0.4134]) -Greedy action tensor([ 0.7218, 0.8871, 0.6244, -0.1699]) tensor([0.2860, 0.3374, 0.2594, 0.1172]) -Greedy action tensor([-0.7636, -0.9637, 0.4700, -0.8656]) tensor([0.1625, 0.1330, 0.5578, 0.1467]) -Greedy action tensor([ 0.1686, -0.6233, 1.0649, 0.2855]) tensor([0.1989, 0.0901, 0.4874, 0.2236]) -Greedy action tensor([-0.2922, -0.6041, 0.3329, -1.1881]) tensor([0.2495, 0.1826, 0.4661, 0.1018]) -Greedy action tensor([-0.3468, -0.8964, 1.4107, -0.5554]) tensor([0.1221, 0.0705, 0.7082, 0.0992]) -Greedy action tensor([ 0.2369, -0.7078, 0.0039, -0.3593]) tensor([0.3661, 0.1423, 0.2900, 0.2017]) -Greedy action tensor([-0.0770, -0.8807, -0.7246, 0.0079]) tensor([0.3268, 0.1463, 0.1710, 0.3558]) -Greedy action tensor([ 0.4546, -0.9113, -0.8300, 0.0761]) tensor([0.4511, 0.1151, 0.1249, 0.3090]) -Greedy action tensor([-0.2025, 0.2481, 0.4500, -0.0956]) tensor([0.1785, 0.2801, 0.3428, 0.1986]) -Greedy action tensor([-0.8618, -0.0658, 0.5101, 0.0115]) tensor([0.1047, 0.2320, 0.4127, 0.2506]) -Greedy action tensor([ 1.2417, -1.2531, 1.2956, -0.3493]) tensor([0.4270, 0.0352, 0.4507, 0.0870]) -Greedy action tensor([ 0.4645, -1.3191, -0.0520, -0.1400]) tensor([0.4327, 0.0727, 0.2581, 0.2364]) -Greedy action tensor([ 0.5141, -0.7527, 0.2140, 0.1967]) tensor([0.3636, 0.1024, 0.2693, 0.2647]) -Greedy action tensor([ 1.2985, -1.1078, 0.9722, -0.8394]) tensor([0.5182, 0.0467, 0.3739, 0.0611]) -Greedy action tensor([-0.4289, -0.5473, -0.0874, -0.5376]) tensor([0.2385, 0.2119, 0.3356, 0.2140]) -Greedy action tensor([ 0.8795, -1.8084, -0.2392, 0.0824]) tensor([0.5419, 0.0369, 0.1770, 0.2442]) -Greedy action tensor([ 1.6289, -0.1399, 0.6213, 0.9401]) tensor([0.4907, 0.0837, 0.1792, 0.2464]) -Greedy action tensor([1.4092, 0.8933, 0.3510, 1.0146]) tensor([0.3820, 0.2280, 0.1326, 0.2574]) -Greedy action tensor([ 0.2773, 0.4019, 0.4287, -0.1558]) tensor([0.2535, 0.2872, 0.2950, 0.1644]) -Greedy action tensor([-0.2646, -0.7553, 0.0958, -0.3687]) tensor([0.2534, 0.1551, 0.3633, 0.2283]) -Greedy action tensor([-0.2330, -0.6582, -0.6820, -0.2560]) tensor([0.3059, 0.1999, 0.1952, 0.2989]) -Greedy action tensor([ 0.5555, -0.5915, -0.3913, 0.3388]) tensor([0.3983, 0.1265, 0.1545, 0.3207]) -Greedy action tensor([-1.0079, -1.1124, 0.2278, 0.0823]) tensor([0.1202, 0.1083, 0.4137, 0.3577]) -Greedy action tensor([ 0.4516, -1.3504, 0.7714, -0.6229]) tensor([0.3468, 0.0572, 0.4775, 0.1184]) -Greedy action tensor([ 0.0102, -1.2383, -0.6740, 0.4368]) tensor([0.3009, 0.0863, 0.1518, 0.4610]) -Greedy action tensor([-0.4215, -0.8593, -0.5599, 0.3010]) tensor([0.2185, 0.1411, 0.1903, 0.4501]) -Greedy action tensor([ 0.3075, -1.3307, 0.4180, -0.1842]) tensor([0.3421, 0.0665, 0.3821, 0.2092]) -Greedy action tensor([ 0.0947, -1.0146, 0.9039, 0.4008]) tensor([0.2027, 0.0668, 0.4552, 0.2753]) -Greedy action tensor([ 0.7852, -0.2097, 0.4306, -0.1620]) tensor([0.4067, 0.1504, 0.2853, 0.1577]) -Greedy action tensor([-2.2958, -1.1008, 1.3512, -0.7349]) tensor([0.0211, 0.0697, 0.8088, 0.1004]) -Greedy action tensor([-0.7511, -0.3739, 1.3513, 0.1692]) tensor([0.0760, 0.1109, 0.6223, 0.1908]) -Greedy action tensor([ 0.4848, 0.4888, -0.5355, 1.5139]) tensor([0.1937, 0.1945, 0.0698, 0.5420]) -Greedy action tensor([1.4751, 1.2912, 1.0349, 0.0870]) tensor([0.3669, 0.3053, 0.2363, 0.0916]) -Greedy action tensor([-0.0035, -0.1884, 1.5395, -0.5096]) tensor([0.1406, 0.1169, 0.6578, 0.0848]) -Greedy action tensor([ 0.8179, -0.4709, 0.1488, 0.3571]) tensor([0.4135, 0.1140, 0.2118, 0.2608]) -Greedy action tensor([-0.3040, -1.0087, -0.2574, 0.2980]) tensor([0.2290, 0.1132, 0.2399, 0.4180]) -Greedy action tensor([ 2.2582, -0.0183, -0.1699, 0.5315]) tensor([0.7306, 0.0750, 0.0644, 0.1300]) -Greedy action tensor([ 1.5874, 0.5168, -0.1989, 0.4222]) tensor([0.5488, 0.1881, 0.0920, 0.1711]) -Greedy action tensor([ 1.1497, -0.4452, -0.5341, 0.6982]) tensor([0.4938, 0.1002, 0.0917, 0.3144]) -Greedy action tensor([ 2.3257, -0.1636, -0.3082, 0.4208]) tensor([0.7671, 0.0636, 0.0551, 0.1142]) -Greedy action tensor([ 1.5355, -0.6988, -0.2750, 0.3279]) tensor([0.6371, 0.0682, 0.1042, 0.1905]) -Greedy action tensor([ 1.4204, -0.3879, -0.5943, 0.2237]) tensor([0.6252, 0.1025, 0.0834, 0.1889]) -Greedy action tensor([ 1.5082, -0.5742, -0.0497, 0.2950]) tensor([0.6126, 0.0763, 0.1290, 0.1821]) -Greedy action tensor([1.5549, 0.7572, 0.4525, 0.2869]) tensor([0.4845, 0.2182, 0.1609, 0.1363]) -Greedy action tensor([ 1.6193, -0.7581, -0.1671, 0.8800]) tensor([0.5754, 0.0534, 0.0964, 0.2748]) -Greedy action tensor([ 1.6882, -0.3268, -0.3582, 0.4208]) tensor([0.6476, 0.0863, 0.0837, 0.1824]) -Greedy action tensor([ 1.6497, -0.6411, -0.7280, 0.8096]) tensor([0.6151, 0.0622, 0.0571, 0.2655]) -Greedy action tensor([ 1.7901, -0.6056, -0.7670, 0.6694]) tensor([0.6690, 0.0610, 0.0519, 0.2181]) -Greedy action tensor([ 1.6597, -0.3019, -0.8575, 0.3815]) tensor([0.6667, 0.0938, 0.0538, 0.1857]) -Greedy action tensor([ 1.1680, -0.5511, -0.2734, 0.0355]) tensor([0.5754, 0.1031, 0.1361, 0.1854]) -Greedy action tensor([ 1.1501, -0.2689, 0.1328, -0.1350]) tensor([0.5319, 0.1287, 0.1923, 0.1471]) -Greedy action tensor([ 1.6929, -0.6524, -0.4251, 0.3779]) tensor([0.6736, 0.0645, 0.0810, 0.1808]) -Greedy action tensor([ 1.2963, -0.4988, -0.8363, 0.3014]) tensor([0.6044, 0.1004, 0.0716, 0.2235]) -Greedy action tensor([ 1.5938, -0.8525, -0.2315, 0.4344]) tensor([0.6404, 0.0555, 0.1032, 0.2009]) -Greedy action tensor([ 1.0169, 0.0290, -0.4428, 0.4647]) tensor([0.4587, 0.1708, 0.1065, 0.2640]) -Greedy action tensor([ 1.3647, -0.7517, -0.4920, 0.7417]) tensor([0.5516, 0.0664, 0.0861, 0.2958]) -Greedy action tensor([ 1.4382, -0.6043, -0.1153, 0.5875]) tensor([0.5655, 0.0733, 0.1196, 0.2415]) -Greedy action tensor([ 1.2830, -0.3844, -0.2836, -0.1123]) tensor([0.6078, 0.1147, 0.1269, 0.1506]) -Greedy action tensor([ 1.7209, -0.5331, -0.5039, 0.2975]) tensor([0.6878, 0.0722, 0.0743, 0.1657]) -Greedy action tensor([ 2.0613, -0.8166, -0.4257, 0.9272]) tensor([0.6844, 0.0385, 0.0569, 0.2202]) -Greedy action tensor([ 0.7960, -0.1785, 0.0917, 0.1364]) tensor([0.4186, 0.1580, 0.2070, 0.2164]) -Greedy action tensor([ 0.6983, -0.4371, -0.0445, 0.0210]) tensor([0.4338, 0.1394, 0.2064, 0.2204]) -Greedy action tensor([ 1.3798, -0.3878, -0.3073, -0.0455]) tensor([0.6265, 0.1070, 0.1159, 0.1506]) -Greedy action tensor([ 1.7479, -0.2063, -0.5856, 0.7617]) tensor([0.6205, 0.0879, 0.0602, 0.2314]) -Greedy action tensor([ 1.6190, 0.2696, -0.6330, 0.3835]) tensor([0.6041, 0.1567, 0.0635, 0.1756]) -Greedy action tensor([ 1.6011, -0.6711, -0.3965, 0.3557]) tensor([0.6551, 0.0675, 0.0889, 0.1885]) -Greedy action tensor([ 2.0398, -0.6972, -1.1555, 0.5488]) tensor([0.7514, 0.0487, 0.0308, 0.1692]) -Greedy action tensor([ 1.2159, -0.0904, -0.0915, 0.2227]) tensor([0.5231, 0.1417, 0.1415, 0.1937]) -Greedy action tensor([ 2.3598, -1.4059, -0.3015, 0.4121]) tensor([0.8093, 0.0187, 0.0565, 0.1154]) -Greedy action tensor([ 1.3811, -0.5365, -0.5001, 0.3048]) tensor([0.6097, 0.0896, 0.0929, 0.2078]) -Greedy action tensor([ 2.2068, -0.5934, -0.2283, 0.3624]) tensor([0.7654, 0.0465, 0.0670, 0.1210]) -Greedy action tensor([ 1.4611, -0.4792, -0.3726, -0.1265]) tensor([0.6632, 0.0953, 0.1060, 0.1356]) -Greedy action tensor([ 1.0755, 0.3044, -0.6216, 0.2258]) tensor([0.4823, 0.2231, 0.0884, 0.2062]) -Greedy action tensor([ 1.7296, -0.5750, -0.4073, 0.3181]) tensor([0.6842, 0.0683, 0.0807, 0.1668]) -Greedy action tensor([ 1.5084, -0.5980, -0.2498, 0.1528]) tensor([0.6444, 0.0784, 0.1111, 0.1661]) -Greedy action tensor([ 1.7444, -0.7052, -0.3050, 0.2609]) tensor([0.6935, 0.0599, 0.0893, 0.1573]) -Greedy action tensor([ 1.7831, -0.7153, -0.1716, 0.8236]) tensor([0.6223, 0.0512, 0.0881, 0.2384]) -Greedy action tensor([ 1.3552, -0.4011, -0.6915, 0.3318]) tensor([0.6020, 0.1040, 0.0778, 0.2163]) -Greedy action tensor([ 1.8668, -0.8266, -0.2936, 0.1270]) tensor([0.7361, 0.0498, 0.0849, 0.1292]) -Greedy action tensor([ 1.7360, -0.5364, -0.4921, 0.2613]) tensor([0.6946, 0.0716, 0.0748, 0.1590]) -Greedy action tensor([ 1.9264, -0.6368, -0.2793, 0.5962]) tensor([0.6889, 0.0531, 0.0759, 0.1821]) -Greedy action tensor([ 1.3692, 0.2962, 0.2272, -0.1455]) tensor([0.5316, 0.1818, 0.1697, 0.1169]) -Greedy action tensor([ 1.7393, -0.6030, -0.5544, 0.6337]) tensor([0.6544, 0.0629, 0.0660, 0.2166]) -Greedy action tensor([ 2.1450, -0.8606, -0.3322, 0.5410]) tensor([0.7493, 0.0371, 0.0629, 0.1507]) -Greedy action tensor([ 1.0291, -0.3237, -0.4076, -0.1277]) tensor([0.5523, 0.1428, 0.1313, 0.1737]) -Greedy action tensor([ 2.1096, -0.4774, -0.1875, 0.8967]) tensor([0.6788, 0.0511, 0.0683, 0.2018]) -Greedy action tensor([ 1.7100, -0.8083, -0.3021, 0.0474]) tensor([0.7123, 0.0574, 0.0952, 0.1351]) -Greedy action tensor([2.6825, 0.7708, 0.1156, 0.0339]) tensor([0.7720, 0.1141, 0.0593, 0.0546]) -Greedy action tensor([ 0.9782, 0.0037, -0.6256, 0.0286]) tensor([0.5088, 0.1920, 0.1023, 0.1968]) -Greedy action tensor([ 0.9245, -0.2578, -0.3184, -0.2452]) tensor([0.5248, 0.1609, 0.1514, 0.1629]) -Greedy action tensor([ 0.6776, -0.3574, 0.1280, -0.1428]) tensor([0.4215, 0.1497, 0.2433, 0.1856]) -Greedy action tensor([ 1.1123, -0.4419, 0.0393, 0.0740]) tensor([0.5243, 0.1108, 0.1793, 0.1856]) -Greedy action tensor([ 1.7917, -0.5443, 0.0399, 0.2211]) tensor([0.6765, 0.0654, 0.1174, 0.1407]) -Greedy action tensor([ 1.5374, -0.0258, -0.5691, 0.1788]) tensor([0.6297, 0.1319, 0.0766, 0.1618]) -Greedy action tensor([ 1.1822, -0.3717, -0.8966, 0.4065]) tensor([0.5565, 0.1177, 0.0696, 0.2562]) -Greedy action tensor([ 0.9177, -0.2457, 0.0434, 0.1983]) tensor([0.4511, 0.1409, 0.1882, 0.2197]) -Greedy action tensor([ 1.2518, -0.5438, -0.0971, 0.0831]) tensor([0.5759, 0.0956, 0.1495, 0.1790]) -Greedy action tensor([ 1.6975, -0.5776, -0.2877, 0.3940]) tensor([0.6615, 0.0680, 0.0909, 0.1797]) -Greedy action tensor([ 1.6873, -0.5630, -0.4978, 0.3903]) tensor([0.6706, 0.0707, 0.0754, 0.1833]) -Greedy action tensor([ 1.8855, -0.7073, -0.3307, 0.9687]) tensor([0.6315, 0.0472, 0.0688, 0.2525]) -Greedy action tensor([ 1.5864, -0.3946, -0.4730, 0.3127]) tensor([0.6472, 0.0893, 0.0825, 0.1811]) -Greedy action tensor([ 1.0204, -0.1146, -0.2674, 0.1958]) tensor([0.4912, 0.1579, 0.1355, 0.2154]) -Greedy action tensor([ 1.1207, -0.4892, -0.2036, 0.1492]) tensor([0.5422, 0.1084, 0.1442, 0.2052]) -Greedy action tensor([ 0.9128, -0.7287, 0.0661, 0.2317]) tensor([0.4698, 0.0910, 0.2015, 0.2377]) -Greedy action tensor([ 1.0249, -0.2350, 0.1067, 0.1123]) tensor([0.4798, 0.1361, 0.1915, 0.1926]) -Greedy action tensor([ 1.1855, -0.5408, -0.1938, 0.3814]) tensor([0.5327, 0.0948, 0.1341, 0.2384]) -Greedy action tensor([ 1.3517, -0.6829, -0.0030, 0.2163]) tensor([0.5848, 0.0765, 0.1509, 0.1879]) -Greedy action tensor([ 1.5318, -0.3978, -0.4553, 0.9246]) tensor([0.5473, 0.0795, 0.0750, 0.2982]) -Greedy action tensor([ 1.7394, -0.4251, -0.6251, 0.7111]) tensor([0.6384, 0.0733, 0.0600, 0.2283]) -Greedy action tensor([ 2.0447, -0.3739, -0.5093, 0.3387]) tensor([0.7416, 0.0660, 0.0577, 0.1347]) -Greedy action tensor([ 1.1297, -0.1829, -0.7684, 0.0729]) tensor([0.5661, 0.1523, 0.0848, 0.1967]) -Greedy action tensor([ 1.9650, -0.6967, -0.2649, 0.5288]) tensor([0.7066, 0.0493, 0.0760, 0.1681]) -Greedy action tensor([ 2.2627, -1.3211, 0.0530, 0.9781]) tensor([0.7071, 0.0196, 0.0776, 0.1957]) -Greedy action tensor([ 1.5672, -0.9816, -0.2864, 0.6132]) tensor([0.6173, 0.0483, 0.0967, 0.2378]) -Greedy action tensor([ 1.1389, -0.3715, -0.4870, 0.5460]) tensor([0.5075, 0.1121, 0.0999, 0.2805]) -Greedy action tensor([ 1.2356, -0.1660, -0.3550, -0.1331]) tensor([0.5867, 0.1445, 0.1196, 0.1493]) -Greedy action tensor([ 1.1449, -0.4788, -0.5834, -0.3886]) tensor([0.6287, 0.1240, 0.1117, 0.1357]) -Greedy action tensor([ 1.7788, -0.6934, -0.6552, 1.1903]) tensor([0.5790, 0.0489, 0.0508, 0.3214]) -Greedy action tensor([-1.9206, -0.4259, 0.6563, -0.1674]) tensor([0.0410, 0.1828, 0.5395, 0.2367]) -Greedy action tensor([-1.6688, -0.2112, 0.5799, 0.0277]) tensor([0.0494, 0.2124, 0.4685, 0.2697]) -Greedy action tensor([-1.4550, 0.5540, 0.3192, 0.0605]) tensor([0.0529, 0.3944, 0.3119, 0.2408]) -Greedy action tensor([-1.8486, -0.4967, 0.6490, -0.0838]) tensor([0.0437, 0.1691, 0.5317, 0.2555]) -Greedy action tensor([-0.6323, 0.9431, -0.0115, 0.6908]) tensor([0.0873, 0.4221, 0.1625, 0.3280]) -Greedy action tensor([-1.6840, -0.3287, 0.6311, 0.0089]) tensor([0.0489, 0.1897, 0.4954, 0.2659]) -Greedy action tensor([-1.9154, -0.4317, 0.6640, -0.1645]) tensor([0.0411, 0.1810, 0.5415, 0.2365]) -Greedy action tensor([-1.0536, -0.5383, 0.2835, 0.1208]) tensor([0.1029, 0.1723, 0.3918, 0.3330]) -Greedy action tensor([-1.4814, -0.1237, 0.5439, 0.0437]) tensor([0.0586, 0.2278, 0.4442, 0.2694]) -Greedy action tensor([-1.7642, -0.4641, 0.5689, -0.0480]) tensor([0.0487, 0.1786, 0.5019, 0.2708]) -Greedy action tensor([-1.8888, -0.3783, 0.6329, -0.1469]) tensor([0.0422, 0.1912, 0.5256, 0.2410]) -Greedy action tensor([-1.3656, -0.5952, 0.4493, 0.2980]) tensor([0.0686, 0.1482, 0.4212, 0.3620]) -Greedy action tensor([-1.8460, -0.4629, 0.6581, -0.0918]) tensor([0.0435, 0.1734, 0.5319, 0.2513]) -Greedy action tensor([-1.5621, -0.5412, 0.4678, -0.0034]) tensor([0.0620, 0.1720, 0.4717, 0.2944]) -Greedy action tensor([-1.8200, -0.0336, 0.5472, -0.0793]) tensor([0.0429, 0.2557, 0.4571, 0.2443]) -Greedy action tensor([-1.9024, -0.4536, 0.6515, -0.1580]) tensor([0.0420, 0.1786, 0.5394, 0.2401]) -Greedy action tensor([-1.8090, -0.3038, 0.5779, -0.1114]) tensor([0.0458, 0.2062, 0.4980, 0.2500]) -Greedy action tensor([-0.7265, -0.4615, 0.3635, 0.5329]) tensor([0.1136, 0.1481, 0.3380, 0.4003]) -Greedy action tensor([-1.7099, -0.4628, 0.1843, -0.2735]) tensor([0.0652, 0.2270, 0.4335, 0.2743]) -Greedy action tensor([-1.9109, -0.4386, 0.6512, -0.1630]) tensor([0.0416, 0.1811, 0.5387, 0.2386]) -Greedy action tensor([-1.1997, 0.7403, 0.3466, -0.2824]) tensor([0.0660, 0.4592, 0.3097, 0.1651]) -Greedy action tensor([-1.8562, -0.4009, 0.6191, -0.1412]) tensor([0.0440, 0.1886, 0.5229, 0.2445]) -Greedy action tensor([-1.3032, -0.5504, 0.6546, -0.2631]) tensor([0.0767, 0.1629, 0.5434, 0.2171]) -Greedy action tensor([-1.5988, 0.4151, 0.3622, 0.0641]) tensor([0.0479, 0.3589, 0.3405, 0.2527]) -Greedy action tensor([-1.9126, -0.4676, 0.6915, -0.1304]) tensor([0.0405, 0.1717, 0.5472, 0.2406]) -Greedy action tensor([-1.7619, -0.4897, 0.5584, -0.0684]) tensor([0.0495, 0.1768, 0.5043, 0.2694]) -Greedy action tensor([-1.7471, -0.4398, 0.5878, -0.0472]) tensor([0.0488, 0.1803, 0.5039, 0.2670]) -Greedy action tensor([-1.4630, 0.4740, 0.3432, -0.0666]) tensor([0.0554, 0.3840, 0.3370, 0.2237]) -Greedy action tensor([-1.2738, 0.7826, 0.1988, 0.1732]) tensor([0.0574, 0.4486, 0.2502, 0.2439]) -Greedy action tensor([-0.8772, -0.5677, 0.2133, 0.2619]) tensor([0.1182, 0.1610, 0.3516, 0.3691]) -Greedy action tensor([-1.8646, -0.4450, 0.6256, -0.1406]) tensor([0.0438, 0.1813, 0.5290, 0.2459]) -Greedy action tensor([-1.7109, -0.6719, 0.1398, -0.2792]) tensor([0.0696, 0.1966, 0.4427, 0.2912]) -Greedy action tensor([-1.6370, -0.3300, 0.6765, 0.1364]) tensor([0.0483, 0.1785, 0.4885, 0.2846]) -Greedy action tensor([-1.7307, -0.0742, 0.4830, -0.0355]) tensor([0.0480, 0.2515, 0.4391, 0.2614]) -Greedy action tensor([-1.7958, -0.4224, 0.5781, -0.0908]) tensor([0.0472, 0.1864, 0.5068, 0.2596]) -Greedy action tensor([-1.0354, -0.3607, 0.1610, 0.0502]) tensor([0.1083, 0.2127, 0.3583, 0.3207]) -Greedy action tensor([-0.7720, -0.0253, 0.2150, -0.1112]) tensor([0.1294, 0.2730, 0.3471, 0.2505]) -Greedy action tensor([-1.8188, -0.4772, 0.6039, -0.0717]) tensor([0.0458, 0.1751, 0.5163, 0.2627]) -Greedy action tensor([-1.8336, -0.4990, 0.7997, 0.1198]) tensor([0.0388, 0.1474, 0.5401, 0.2737]) -Greedy action tensor([-1.6602, 0.1468, 0.4901, 0.0438]) tensor([0.0472, 0.2877, 0.4055, 0.2595]) -Greedy action tensor([-1.8269, -0.2752, 0.5843, -0.1056]) tensor([0.0445, 0.2101, 0.4963, 0.2490]) -Greedy action tensor([-1.0693, 0.0602, 0.5585, 0.1913]) tensor([0.0787, 0.2434, 0.4005, 0.2774]) -Greedy action tensor([-1.9371, -0.4437, 0.6657, -0.1748]) tensor([0.0404, 0.1797, 0.5449, 0.2351]) -Greedy action tensor([-1.8862, -0.4293, 0.6449, -0.1465]) tensor([0.0424, 0.1822, 0.5335, 0.2418]) -Greedy action tensor([-1.7235, -0.3573, 0.5450, -0.0690]) tensor([0.0505, 0.1978, 0.4877, 0.2640]) -Greedy action tensor([-1.9050, -0.2983, 0.6226, -0.1592]) tensor([0.0413, 0.2057, 0.5166, 0.2364]) -Greedy action tensor([-1.9123, -0.4392, 0.6478, -0.1664]) tensor([0.0416, 0.1816, 0.5383, 0.2385]) -Greedy action tensor([-0.8493, 0.6977, 0.1426, -0.0463]) tensor([0.0941, 0.4421, 0.2538, 0.2101]) -Greedy action tensor([-1.8702, -0.3753, 0.6235, -0.1352]) tensor([0.0430, 0.1919, 0.5210, 0.2440]) -Greedy action tensor([-0.1572, 1.0562, -0.1179, 0.0651]) tensor([0.1503, 0.5057, 0.1563, 0.1877]) -Greedy action tensor([-1.2749, -0.4016, 0.5848, 0.6779]) tensor([0.0593, 0.1420, 0.3808, 0.4179]) -Greedy action tensor([-1.3696, 0.5262, 0.3078, 0.0269]) tensor([0.0586, 0.3905, 0.3139, 0.2370]) -Greedy action tensor([-1.6345, -0.2525, 0.4997, -0.0911]) tensor([0.0552, 0.2199, 0.4665, 0.2584]) -Greedy action tensor([-1.6924, -0.3823, 0.6712, 0.0115]) tensor([0.0480, 0.1779, 0.5102, 0.2638]) -Greedy action tensor([-1.9077, -0.4072, 0.6461, -0.1585]) tensor([0.0415, 0.1861, 0.5337, 0.2387]) -Greedy action tensor([-1.9077, -0.4368, 0.6702, -0.1493]) tensor([0.0411, 0.1789, 0.5414, 0.2386]) -Greedy action tensor([-1.7210, -0.5024, 0.5471, -0.0749]) tensor([0.0520, 0.1759, 0.5024, 0.2697]) -Greedy action tensor([-1.8614, -0.0908, 0.5612, -0.1420]) tensor([0.0421, 0.2475, 0.4751, 0.2352]) -Greedy action tensor([-1.8421, -0.3233, 0.6002, -0.1368]) tensor([0.0443, 0.2023, 0.5095, 0.2438]) -Greedy action tensor([-1.8741, -0.3992, 0.6316, -0.1365]) tensor([0.0429, 0.1875, 0.5257, 0.2439]) -Greedy action tensor([-1.8617, -0.2458, 0.6092, -0.1452]) tensor([0.0427, 0.2148, 0.5050, 0.2375]) -Greedy action tensor([-1.7889, -0.2870, 0.6370, -0.0350]) tensor([0.0443, 0.1989, 0.5010, 0.2559]) -Greedy action tensor([-1.0970, -0.3111, 0.8609, 0.8360]) tensor([0.0582, 0.1277, 0.4122, 0.4020]) -Greedy action tensor([-1.9319, -0.4315, 0.6601, -0.1708]) tensor([0.0406, 0.1818, 0.5417, 0.2360]) -Greedy action tensor([-0.1518, -0.5608, 0.2561, 0.4494]) tensor([0.2003, 0.1331, 0.3012, 0.3654]) -Greedy action tensor([-1.8785, -0.2785, 0.6178, -0.1462]) tensor([0.0421, 0.2086, 0.5112, 0.2381]) -Greedy action tensor([-1.2792, -0.1116, 0.5638, 0.1516]) tensor([0.0680, 0.2185, 0.4293, 0.2843]) -Greedy action tensor([-0.7277, 0.5124, 0.1265, 0.3856]) tensor([0.1015, 0.3509, 0.2385, 0.3091]) -Greedy action tensor([-1.1892, -0.4612, 0.3245, 0.0046]) tensor([0.0916, 0.1897, 0.4163, 0.3023]) -Greedy action tensor([-1.8841, -0.4306, 0.6317, -0.1510]) tensor([0.0429, 0.1835, 0.5309, 0.2427]) -Greedy action tensor([-1.9325, -0.4375, 0.6613, -0.1736]) tensor([0.0406, 0.1809, 0.5429, 0.2356]) -Greedy action tensor([-0.4934, -0.4532, 0.1584, -0.0141]) tensor([0.1794, 0.1867, 0.3442, 0.2897]) -Greedy action tensor([-1.8559, -0.4182, 0.6249, -0.1330]) tensor([0.0439, 0.1850, 0.5250, 0.2460]) -Greedy action tensor([-0.0854, 1.2089, 0.0755, 0.2801]) tensor([0.1377, 0.5023, 0.1617, 0.1984]) -Greedy action tensor([-1.8474, -0.1725, 0.5944, -0.1256]) tensor([0.0427, 0.2279, 0.4906, 0.2388]) -Greedy action tensor([-1.5114, -0.4819, 0.4802, 0.0330]) tensor([0.0632, 0.1771, 0.4634, 0.2963]) -Greedy action tensor([-1.8869, -0.2478, 0.5784, -0.1405]) tensor([0.0423, 0.2178, 0.4975, 0.2424]) -Greedy action tensor([-1.8476, -0.3222, 0.6178, -0.1244]) tensor([0.0435, 0.2001, 0.5124, 0.2439]) -Greedy action tensor([-1.9296, -0.4386, 0.6610, -0.1726]) tensor([0.0407, 0.1807, 0.5428, 0.2358]) -Greedy action tensor([-1.4246, -0.4745, 0.4831, -0.1718]) tensor([0.0723, 0.1871, 0.4874, 0.2532]) -Greedy action tensor([-1.3198, 0.7192, 0.2052, 0.0521]) tensor([0.0581, 0.4461, 0.2668, 0.2289]) -Greedy action tensor([ 0.4763, -0.2000, -0.1075, -0.1007]) tensor([0.3805, 0.1935, 0.2123, 0.2137]) -Greedy action tensor([ 0.7217, -0.3902, -0.1050, -0.2350]) tensor([0.4650, 0.1529, 0.2034, 0.1786]) -Greedy action tensor([ 0.8108, -0.6233, 0.0271, -0.3064]) tensor([0.4945, 0.1179, 0.2259, 0.1618]) -Greedy action tensor([ 0.8649, -0.4098, 0.2933, -0.3781]) tensor([0.4689, 0.1311, 0.2647, 0.1353]) -Greedy action tensor([ 1.2801, -0.7505, -0.0912, -0.7397]) tensor([0.6589, 0.0865, 0.1672, 0.0874]) -Greedy action tensor([ 0.7519, -0.2622, -0.1190, -0.1290]) tensor([0.4554, 0.1652, 0.1906, 0.1887]) -Greedy action tensor([ 0.5656, -0.1159, 0.1942, -0.2555]) tensor([0.3794, 0.1919, 0.2617, 0.1669]) -Greedy action tensor([ 1.0719, -0.8677, 0.0651, -0.6002]) tensor([0.5893, 0.0847, 0.2153, 0.1107]) -Greedy action tensor([ 0.9121, -0.8185, 0.1158, -0.3820]) tensor([0.5257, 0.0931, 0.2371, 0.1441]) -Greedy action tensor([ 0.8913, -0.4618, -0.0696, -0.2398]) tensor([0.5093, 0.1316, 0.1948, 0.1643]) -Greedy action tensor([ 0.8491, -0.6937, -0.1181, -0.1953]) tensor([0.5139, 0.1099, 0.1954, 0.1808]) -Greedy action tensor([ 0.4803, 0.0081, -0.0255, 0.0144]) tensor([0.3503, 0.2185, 0.2113, 0.2199]) -Greedy action tensor([ 0.8627, -0.7233, 0.0562, -0.4064]) tensor([0.5175, 0.1060, 0.2310, 0.1455]) -Greedy action tensor([ 0.3137, -0.2191, -0.0906, -0.2705]) tensor([0.3556, 0.2087, 0.2373, 0.1983]) -Greedy action tensor([ 0.4706, -0.1838, 0.0347, -0.2562]) tensor([0.3774, 0.1961, 0.2440, 0.1824]) -Greedy action tensor([ 0.7172, -0.5440, -0.0772, -0.1461]) tensor([0.4636, 0.1313, 0.2095, 0.1955]) -Greedy action tensor([ 0.2814, -0.0638, -0.1328, -0.3344]) tensor([0.3437, 0.2434, 0.2272, 0.1857]) -Greedy action tensor([ 0.0305, 0.3595, -0.1960, -0.6550]) tensor([0.2709, 0.3765, 0.2160, 0.1365]) -Greedy action tensor([ 0.7341, -0.3978, -0.1214, -0.3915]) tensor([0.4826, 0.1556, 0.2052, 0.1566]) -Greedy action tensor([ 0.1988, -0.0348, -0.2005, -0.4153]) tensor([0.3329, 0.2636, 0.2233, 0.1802]) -Greedy action tensor([ 0.3729, -0.2644, -0.0031, -0.1035]) tensor([0.3526, 0.1864, 0.2421, 0.2190]) -Greedy action tensor([ 0.7299, -0.1863, -0.1281, 0.0784]) tensor([0.4264, 0.1706, 0.1808, 0.2223]) -Greedy action tensor([ 0.5864, -0.4855, -0.0717, -0.1313]) tensor([0.4259, 0.1458, 0.2205, 0.2078]) -Greedy action tensor([ 0.6104, -0.4188, -0.0145, -0.2134]) tensor([0.4289, 0.1533, 0.2296, 0.1882]) -Greedy action tensor([ 0.6853, -0.2528, -0.0877, -0.2989]) tensor([0.4491, 0.1758, 0.2073, 0.1678]) -Greedy action tensor([ 0.9326, -0.5101, -0.1792, -0.4701]) tensor([0.5521, 0.1305, 0.1816, 0.1358]) -Greedy action tensor([ 0.6675, -0.4082, -0.0502, -0.1601]) tensor([0.4413, 0.1505, 0.2153, 0.1929]) -Greedy action tensor([ 1.0157, -0.6543, -0.0652, -0.3903]) tensor([0.5641, 0.1062, 0.1914, 0.1383]) -Greedy action tensor([ 0.2085, -0.0562, 0.0511, -0.1814]) tensor([0.3031, 0.2326, 0.2590, 0.2053]) -Greedy action tensor([ 0.8070, -0.5804, -0.1800, -0.6195]) tensor([0.5369, 0.1341, 0.2001, 0.1289]) -Greedy action tensor([ 0.5332, -0.5977, -0.1638, -0.4592]) tensor([0.4563, 0.1473, 0.2273, 0.1692]) -Greedy action tensor([ 1.1417, -0.5767, 0.1983, -0.5318]) tensor([0.5694, 0.1021, 0.2217, 0.1068]) -Greedy action tensor([ 0.9199, -0.5289, -0.1285, -0.1852]) tensor([0.5218, 0.1225, 0.1829, 0.1728]) -Greedy action tensor([ 0.5318, -0.5213, -0.1098, -0.1963]) tensor([0.4241, 0.1479, 0.2232, 0.2048]) -Greedy action tensor([ 0.5690, -0.1054, -0.0590, 0.0228]) tensor([0.3814, 0.1943, 0.2035, 0.2209]) -Greedy action tensor([ 0.5721, -0.4831, 0.0920, -0.4400]) tensor([0.4291, 0.1494, 0.2655, 0.1560]) -Greedy action tensor([ 0.6938, -0.3924, -0.0929, -0.1342]) tensor([0.4485, 0.1514, 0.2042, 0.1960]) -Greedy action tensor([ 0.7255, -0.3755, -0.0116, -0.2178]) tensor([0.4545, 0.1511, 0.2175, 0.1769]) -Greedy action tensor([ 0.9155, -1.2106, 0.0918, -0.8230]) tensor([0.5767, 0.0688, 0.2531, 0.1014]) -Greedy action tensor([0.4236, 0.2686, 0.2250, 0.4117]) tensor([0.2729, 0.2337, 0.2237, 0.2697]) -Greedy action tensor([ 0.6768, -0.4739, -0.1213, -0.5550]) tensor([0.4858, 0.1537, 0.2187, 0.1417]) -Greedy action tensor([ 0.6371, -0.2435, 0.0299, -0.2972]) tensor([0.4251, 0.1762, 0.2316, 0.1670]) -Greedy action tensor([ 0.4980, -0.0573, -0.1413, -0.0671]) tensor([0.3745, 0.2150, 0.1976, 0.2129]) -Greedy action tensor([ 0.8859, -0.4318, -0.2721, -0.4509]) tensor([0.5421, 0.1452, 0.1703, 0.1424]) -Greedy action tensor([ 1.1377, -0.6246, 0.0056, -0.5118]) tensor([0.5931, 0.1018, 0.1912, 0.1140]) -Greedy action tensor([ 1.2158, -0.4365, -0.1518, -0.3822]) tensor([0.6066, 0.1162, 0.1545, 0.1227]) -Greedy action tensor([ 0.5845, -0.0654, 0.0505, -0.1132]) tensor([0.3837, 0.2003, 0.2250, 0.1910]) -Greedy action tensor([ 0.6783, -0.0914, 0.0334, -0.0478]) tensor([0.4046, 0.1874, 0.2123, 0.1957]) -Greedy action tensor([ 0.6982, -0.0345, 0.0274, -0.9049]) tensor([0.4560, 0.2191, 0.2331, 0.0918]) -Greedy action tensor([ 0.4149, -0.1609, -0.1006, -0.0602]) tensor([0.3595, 0.2022, 0.2147, 0.2236]) -Greedy action tensor([ 0.6686, -0.4181, -0.1516, -0.1077]) tensor([0.4469, 0.1507, 0.1968, 0.2056]) -Greedy action tensor([ 0.7111, 0.2629, -0.0124, 0.0391]) tensor([0.3796, 0.2425, 0.1841, 0.1938]) -Greedy action tensor([ 0.7773, -0.3635, -0.0453, -0.3197]) tensor([0.4779, 0.1527, 0.2099, 0.1595]) -Greedy action tensor([ 0.2388, -0.0555, -0.1036, -0.2830]) tensor([0.3280, 0.2444, 0.2329, 0.1947]) -Greedy action tensor([ 4.3189e-01, -2.4205e-04, -1.2122e-01, -5.5592e-02]) tensor([0.3523, 0.2287, 0.2026, 0.2164]) -Greedy action tensor([ 0.5207, -0.2713, -0.2104, -0.1898]) tensor([0.4122, 0.1867, 0.1985, 0.2026]) -Greedy action tensor([ 0.7013, -0.3404, -0.1177, -0.0845]) tensor([0.4445, 0.1569, 0.1960, 0.2026]) -Greedy action tensor([ 0.8178, -0.5892, -0.0240, -0.3102]) tensor([0.5001, 0.1225, 0.2155, 0.1619]) -Greedy action tensor([ 0.6778, -0.2173, -0.1465, -0.4679]) tensor([0.4619, 0.1887, 0.2025, 0.1469]) -Greedy action tensor([ 0.7324, -0.5547, -0.1696, -0.4665]) tensor([0.5042, 0.1392, 0.2046, 0.1520]) -Greedy action tensor([ 0.8411, -0.6549, 0.2367, -0.1481]) tensor([0.4668, 0.1046, 0.2551, 0.1736]) -Greedy action tensor([ 0.7254, -0.3496, -0.1577, -0.3773]) tensor([0.4792, 0.1636, 0.1982, 0.1591]) -Greedy action tensor([ 0.3646, -0.0590, -0.0613, -0.1249]) tensor([0.3424, 0.2241, 0.2236, 0.2098]) -Greedy action tensor([ 0.8828, -0.5228, 0.0492, -0.3391]) tensor([0.5065, 0.1242, 0.2201, 0.1493]) -Greedy action tensor([ 0.9007, -0.7369, 0.0843, -0.4216]) tensor([0.5255, 0.1022, 0.2323, 0.1401]) -Greedy action tensor([ 0.4281, -0.0515, -0.0335, -0.0215]) tensor([0.3464, 0.2144, 0.2183, 0.2209]) -Greedy action tensor([ 0.6983, -0.6813, -0.0279, -0.2375]) tensor([0.4700, 0.1183, 0.2274, 0.1844]) -Greedy action tensor([ 0.8463, -0.5402, -0.0588, -0.4132]) tensor([0.5159, 0.1290, 0.2087, 0.1464]) -Greedy action tensor([ 0.5621, -0.3286, -0.1472, -0.2363]) tensor([0.4251, 0.1745, 0.2091, 0.1913]) -Greedy action tensor([ 0.7613, -0.7344, -0.1244, -0.1830]) tensor([0.4937, 0.1106, 0.2036, 0.1920]) -Greedy action tensor([ 0.5637, -0.0250, -0.0934, -0.1802]) tensor([0.3924, 0.2178, 0.2034, 0.1865]) -Greedy action tensor([ 0.7595, -0.4591, 0.0175, -0.3734]) tensor([0.4776, 0.1412, 0.2274, 0.1538]) -Greedy action tensor([ 0.5432, -0.0411, -0.1659, -0.2124]) tensor([0.3970, 0.2213, 0.1953, 0.1864]) -Greedy action tensor([ 0.7163, -0.3702, -0.0533, -0.3164]) tensor([0.4637, 0.1564, 0.2148, 0.1651]) -Greedy action tensor([ 0.7614, -0.3630, 0.0809, -0.3125]) tensor([0.4602, 0.1495, 0.2330, 0.1572]) -Greedy action tensor([ 0.3360, 0.2719, -0.1587, -0.0414]) tensor([0.3093, 0.2901, 0.1886, 0.2121]) -Greedy action tensor([ 0.7351, -0.6903, -0.0131, -0.4590]) tensor([0.4959, 0.1192, 0.2347, 0.1502]) -Greedy action tensor([ 1.0450, -0.4366, -0.0472, -0.2190]) tensor([0.5419, 0.1232, 0.1818, 0.1531]) -Greedy action tensor([ 0.9846, -0.9288, 0.0801, -0.5004]) tensor([0.5622, 0.0830, 0.2275, 0.1273]) -Greedy action tensor([ 0.9756, -0.6841, 0.0290, -0.6499]) tensor([0.5634, 0.1072, 0.2186, 0.1109]) -Greedy action tensor([ 0.8430, -0.3997, -0.1259, -0.2537]) tensor([0.4995, 0.1442, 0.1895, 0.1668]) -Greedy action tensor([ 1.2837, -0.7015, -0.0950, 0.2984]) tensor([0.5673, 0.0779, 0.1429, 0.2118]) -Greedy action tensor([ 1.2957, -0.4464, -0.4627, 0.1251]) tensor([0.6033, 0.1057, 0.1040, 0.1871]) -Greedy action tensor([ 1.4606, -0.7097, -0.3338, 0.4611]) tensor([0.6066, 0.0692, 0.1008, 0.2233]) -Greedy action tensor([ 1.5138, -0.1639, -0.4944, 0.4810]) tensor([0.5963, 0.1114, 0.0800, 0.2123]) -Greedy action tensor([ 1.7101, -0.1538, -0.5131, -0.1031]) tensor([0.7010, 0.1087, 0.0759, 0.1144]) -Greedy action tensor([ 1.5164, -0.2933, -0.4479, 0.0606]) tensor([0.6505, 0.1065, 0.0912, 0.1517]) -Greedy action tensor([ 1.6963, 0.0979, -0.8861, -0.0214]) tensor([0.6862, 0.1388, 0.0519, 0.1232]) -Greedy action tensor([ 1.9755, -1.0283, -0.3807, -0.0584]) tensor([0.7842, 0.0389, 0.0743, 0.1026]) -Greedy action tensor([ 1.5739, -0.0461, -0.1724, 0.4592]) tensor([0.5881, 0.1164, 0.1026, 0.1929]) -Greedy action tensor([ 1.5289, -0.1833, -0.2119, 0.5490]) tensor([0.5776, 0.1042, 0.1013, 0.2168]) -Greedy action tensor([ 0.9355, -0.5597, 0.0076, 0.5155]) tensor([0.4393, 0.0985, 0.1737, 0.2886]) -Greedy action tensor([ 1.0684, 0.2559, 0.4079, -0.5639]) tensor([0.4638, 0.2058, 0.2396, 0.0907]) -Greedy action tensor([ 1.2091, -0.2122, -0.3924, 0.4365]) tensor([0.5250, 0.1267, 0.1058, 0.2424]) -Greedy action tensor([ 1.2530, -0.4489, -0.2502, 0.0778]) tensor([0.5836, 0.1064, 0.1298, 0.1802]) -Greedy action tensor([ 1.2456, -0.5848, -0.1419, 0.2490]) tensor([0.5621, 0.0901, 0.1403, 0.2075]) -Greedy action tensor([ 1.3873, 0.1673, -0.2722, 0.3227]) tensor([0.5463, 0.1613, 0.1039, 0.1884]) -Greedy action tensor([ 1.8908, -0.6277, 0.1272, 0.5831]) tensor([0.6568, 0.0529, 0.1126, 0.1776]) -Greedy action tensor([ 1.7836, 0.5219, -0.1578, 0.4120]) tensor([0.5951, 0.1685, 0.0854, 0.1510]) -Greedy action tensor([ 0.9191, -0.3249, -0.0704, -0.1636]) tensor([0.5003, 0.1442, 0.1860, 0.1695]) -Greedy action tensor([ 1.9683, -0.5944, 0.0599, 0.6625]) tensor([0.6683, 0.0515, 0.0991, 0.1811]) -Greedy action tensor([ 1.4885, -0.6779, -0.0618, 0.1951]) tensor([0.6246, 0.0716, 0.1325, 0.1713]) -Greedy action tensor([ 2.0143, -1.2012, -0.2561, 0.7740]) tensor([0.6980, 0.0280, 0.0721, 0.2019]) -Greedy action tensor([ 1.3809, -0.6747, -0.8014, 0.0782]) tensor([0.6611, 0.0846, 0.0746, 0.1797]) -Greedy action tensor([ 1.4131, -0.8685, -0.0852, 0.3819]) tensor([0.5945, 0.0607, 0.1329, 0.2120]) -Greedy action tensor([ 1.1845, -0.1577, -0.2435, 0.3042]) tensor([0.5220, 0.1364, 0.1252, 0.2165]) -Greedy action tensor([ 1.8594, -0.7990, -0.2922, 0.4915]) tensor([0.6940, 0.0486, 0.0807, 0.1767]) -Greedy action tensor([ 2.2111, -0.4995, -0.2521, 0.4625]) tensor([0.7543, 0.0502, 0.0642, 0.1313]) -Greedy action tensor([ 2.2513, -1.1570, 0.0485, 0.9371]) tensor([0.7081, 0.0234, 0.0782, 0.1903]) -Greedy action tensor([ 1.1208, -0.3748, -0.7276, 0.0920]) tensor([0.5750, 0.1289, 0.0906, 0.2055]) -Greedy action tensor([ 1.9586, -1.4203, -0.4304, 0.2467]) tensor([0.7655, 0.0261, 0.0702, 0.1382]) -Greedy action tensor([ 1.3895, -0.2991, -0.4701, 0.1449]) tensor([0.6140, 0.1135, 0.0956, 0.1769]) -Greedy action tensor([ 1.3298, -0.3363, -0.5971, 0.2249]) tensor([0.6003, 0.1134, 0.0874, 0.1988]) -Greedy action tensor([ 1.2076, -0.4008, -0.1129, 0.5707]) tensor([0.5010, 0.1003, 0.1338, 0.2650]) -Greedy action tensor([ 1.8729, 0.0465, -0.8309, 0.4003]) tensor([0.6862, 0.1105, 0.0459, 0.1574]) -Greedy action tensor([ 2.1114, 0.1216, -0.0932, 0.4697]) tensor([0.6941, 0.0949, 0.0766, 0.1344]) -Greedy action tensor([ 1.4637, -0.3502, -0.3849, 0.1863]) tensor([0.6253, 0.1019, 0.0985, 0.1743]) -Greedy action tensor([ 1.1241, -0.0152, -0.6526, -0.0916]) tensor([0.5600, 0.1792, 0.0947, 0.1660]) -Greedy action tensor([ 1.2146, 0.1592, 0.2811, -0.1876]) tensor([0.5032, 0.1751, 0.1978, 0.1238]) -Greedy action tensor([ 2.2370, -0.5357, -0.5374, 0.2315]) tensor([0.7940, 0.0496, 0.0495, 0.1069]) -Greedy action tensor([ 2.0294, -0.6105, -0.7567, 0.7613]) tensor([0.7070, 0.0505, 0.0436, 0.1989]) -Greedy action tensor([ 1.6714, -0.1706, -0.5951, -0.2518]) tensor([0.7101, 0.1126, 0.0736, 0.1038]) -Greedy action tensor([ 1.6474, -0.6206, -0.3990, 0.3342]) tensor([0.6659, 0.0689, 0.0860, 0.1791]) -Greedy action tensor([ 1.6593, -0.4164, -1.0600, 0.1889]) tensor([0.7036, 0.0883, 0.0464, 0.1617]) -Greedy action tensor([ 1.7316, -0.0719, -0.2454, 0.6413]) tensor([0.6100, 0.1005, 0.0845, 0.2050]) -Greedy action tensor([ 1.4383, -0.1868, -0.3484, 0.3135]) tensor([0.5920, 0.1166, 0.0992, 0.1922]) -Greedy action tensor([ 1.2925, -0.2037, -1.0782, 0.3934]) tensor([0.5799, 0.1299, 0.0542, 0.2360]) -Greedy action tensor([ 1.4852, -0.0875, -0.0557, 0.7672]) tensor([0.5237, 0.1087, 0.1122, 0.2554]) -Greedy action tensor([ 2.0553, -0.9562, -0.5858, 0.7202]) tensor([0.7227, 0.0356, 0.0515, 0.1902]) -Greedy action tensor([ 1.3912, -0.2757, -0.7988, 0.3352]) tensor([0.6066, 0.1145, 0.0679, 0.2110]) -Greedy action tensor([ 1.8546, -1.0607, -0.2100, 0.2028]) tensor([0.7285, 0.0395, 0.0924, 0.1396]) -Greedy action tensor([ 1.5790, -0.4380, -0.2654, 0.4294]) tensor([0.6219, 0.0827, 0.0983, 0.1970]) -Greedy action tensor([ 1.5643, -0.3603, -0.3097, 0.2108]) tensor([0.6419, 0.0937, 0.0985, 0.1658]) -Greedy action tensor([ 2.2707, -0.9949, -0.2620, 0.8931]) tensor([0.7300, 0.0279, 0.0580, 0.1841]) -Greedy action tensor([ 1.2346, -0.5517, -0.1513, 0.2150]) tensor([0.5623, 0.0942, 0.1406, 0.2028]) -Greedy action tensor([ 1.4986, -0.2594, -0.5280, 0.1136]) tensor([0.6433, 0.1109, 0.0848, 0.1610]) -Greedy action tensor([ 1.3942e+00, -9.4346e-01, 1.2356e-03, 5.0491e-01]) tensor([0.5695, 0.0550, 0.1414, 0.2340]) -Greedy action tensor([ 1.4448, -0.1199, -0.0821, 0.5876]) tensor([0.5403, 0.1130, 0.1174, 0.2293]) -Greedy action tensor([ 2.0158, -0.8755, -0.6102, 0.2487]) tensor([0.7700, 0.0427, 0.0557, 0.1315]) -Greedy action tensor([ 1.1702, 0.1509, -0.4782, 0.2271]) tensor([0.5148, 0.1857, 0.0990, 0.2005]) -Greedy action tensor([ 1.1852, -0.2134, -0.2422, 0.4728]) tensor([0.5057, 0.1249, 0.1213, 0.2480]) -Greedy action tensor([ 1.6809, 0.7134, -0.0534, 0.3169]) tensor([0.5518, 0.2097, 0.0974, 0.1411]) -Greedy action tensor([ 1.8529, -0.5449, -0.6569, 1.1445]) tensor([0.6007, 0.0546, 0.0488, 0.2958]) -Greedy action tensor([ 1.4936, -0.3791, 0.0390, 0.5780]) tensor([0.5595, 0.0860, 0.1306, 0.2239]) -Greedy action tensor([ 1.7468, -0.7419, -0.4306, 0.4758]) tensor([0.6771, 0.0562, 0.0767, 0.1900]) -Greedy action tensor([ 1.2615, -0.6758, -0.1820, 0.3694]) tensor([0.5587, 0.0805, 0.1319, 0.2289]) -Greedy action tensor([ 1.6074, -0.5350, -0.2570, 0.1046]) tensor([0.6690, 0.0785, 0.1037, 0.1488]) -Greedy action tensor([ 1.2810, -0.0526, -0.7109, 0.2312]) tensor([0.5714, 0.1506, 0.0780, 0.2000]) -Greedy action tensor([ 1.3216, -0.3210, -0.3427, -0.0380]) tensor([0.6099, 0.1180, 0.1155, 0.1566]) -Greedy action tensor([ 2.0817, 0.1187, -0.7110, 0.5031]) tensor([0.7103, 0.0997, 0.0435, 0.1465]) -Greedy action tensor([ 2.4594, -1.4930, -0.0284, 0.4335]) tensor([0.8103, 0.0156, 0.0673, 0.1069]) -Greedy action tensor([ 1.1298, -0.3020, -0.0563, 0.2599]) tensor([0.5094, 0.1217, 0.1556, 0.2134]) -Greedy action tensor([ 1.2432, -0.5355, -0.8236, 0.3009]) tensor([0.5934, 0.1002, 0.0751, 0.2313]) -Greedy action tensor([ 0.3278, -0.2856, -0.1767, 0.1200]) tensor([0.3381, 0.1831, 0.2041, 0.2747]) -Greedy action tensor([ 2.4250, -0.0161, -0.0590, 0.5026]) tensor([0.7595, 0.0661, 0.0633, 0.1111]) -Greedy action tensor([ 1.3588, -0.1200, -0.8824, 0.1201]) tensor([0.6158, 0.1403, 0.0655, 0.1784]) -Greedy action tensor([ 1.5987, -0.3090, -0.3204, -0.0470]) tensor([0.6720, 0.0997, 0.0986, 0.1296]) -Greedy action tensor([ 1.4558, -0.3837, -0.8562, 0.5362]) tensor([0.6036, 0.0959, 0.0598, 0.2407]) -Greedy action tensor([ 1.5323, -0.9007, -0.3075, 0.1231]) tensor([0.6707, 0.0589, 0.1065, 0.1639]) -Greedy action tensor([ 0.7651, -0.6377, -0.0206, 0.1374]) tensor([0.4473, 0.1100, 0.2039, 0.2388]) -Greedy action tensor([ 1.3331, -0.1902, -0.2587, 0.1524]) tensor([0.5785, 0.1261, 0.1178, 0.1776]) -Greedy action tensor([ 1.3519, -0.6130, -0.1081, 0.1618]) tensor([0.5964, 0.0836, 0.1385, 0.1814]) -Greedy action tensor([ 0.5027, -0.0257, -0.4060, -0.9564]) tensor([0.4494, 0.2650, 0.1811, 0.1045]) -Greedy action tensor([ 0.0478, -1.6717, 1.3000, -0.9117]) tensor([0.1976, 0.0354, 0.6913, 0.0757]) -Greedy action tensor([ 0.1558, -0.7423, 0.3708, -0.2898]) tensor([0.3042, 0.1239, 0.3771, 0.1948]) -Greedy action tensor([ 0.6073, -1.5675, 0.8365, -0.1056]) tensor([0.3495, 0.0397, 0.4395, 0.1713]) -Greedy action tensor([ 0.1552, -1.1788, 0.0016, 0.2940]) tensor([0.3058, 0.0806, 0.2623, 0.3514]) -Greedy action tensor([-0.9561, -0.0928, 0.4926, -0.8270]) tensor([0.1141, 0.2705, 0.4857, 0.1298]) -Greedy action tensor([ 0.3023, -0.3594, 0.1267, 0.4599]) tensor([0.2836, 0.1464, 0.2380, 0.3321]) -Greedy action tensor([-0.1776, -0.9150, 1.2084, 0.2624]) tensor([0.1423, 0.0680, 0.5688, 0.2209]) -Greedy action tensor([-0.1895, -0.1730, -0.0143, -0.0682]) tensor([0.2306, 0.2344, 0.2747, 0.2603]) -Greedy action tensor([-0.5484, -0.9313, 0.0749, -0.5501]) tensor([0.2200, 0.1500, 0.4103, 0.2196]) -Greedy action tensor([ 0.2772, -1.3386, -0.3614, 1.3862]) tensor([0.2102, 0.0418, 0.1110, 0.6371]) -Greedy action tensor([ 0.7797, -0.0913, -0.2300, -0.2664]) tensor([0.4686, 0.1961, 0.1707, 0.1646]) -Greedy action tensor([1.0322, 0.8842, 0.4104, 0.5153]) tensor([0.3338, 0.2879, 0.1792, 0.1991]) -Greedy action tensor([ 0.1370, 0.6707, 0.1166, -0.2313]) tensor([0.2285, 0.3896, 0.2238, 0.1581]) -Greedy action tensor([ 0.7477, -0.9244, 0.8064, 0.3465]) tensor([0.3427, 0.0644, 0.3634, 0.2295]) -Greedy action tensor([ 1.8949, -1.2337, 1.8238, 0.1902]) tensor([0.4636, 0.0203, 0.4318, 0.0843]) -Greedy action tensor([ 0.4656, -0.4376, 0.8414, -0.4764]) tensor([0.3076, 0.1246, 0.4479, 0.1199]) -Greedy action tensor([ 0.8432, -1.3503, 0.0152, 1.0345]) tensor([0.3624, 0.0404, 0.1583, 0.4388]) -Greedy action tensor([ 1.1133, -0.6004, -0.5048, 0.9776]) tensor([0.4441, 0.0800, 0.0881, 0.3878]) -Greedy action tensor([ 0.9471, -0.3313, 1.2701, 1.1610]) tensor([0.2565, 0.0714, 0.3543, 0.3177]) -Greedy action tensor([ 0.0836, -1.0481, 0.0978, -0.1234]) tensor([0.3175, 0.1024, 0.3220, 0.2581]) -Greedy action tensor([-0.9804, 0.0493, -0.5639, 0.5233]) tensor([0.1019, 0.2853, 0.1545, 0.4583]) -Greedy action tensor([-0.6833, -0.6436, -1.0679, 0.7240]) tensor([0.1469, 0.1529, 0.1000, 0.6002]) -Greedy action tensor([ 0.2000, -0.2632, 1.2041, -0.3551]) tensor([0.2027, 0.1276, 0.5533, 0.1164]) -Greedy action tensor([ 0.2204, 0.9372, -0.6255, -0.2626]) tensor([0.2443, 0.5002, 0.1048, 0.1507]) -Greedy action tensor([-0.1796, -0.6804, -0.8834, -0.2484]) tensor([0.3296, 0.1997, 0.1630, 0.3076]) -Greedy action tensor([-1.1358, -0.2457, -1.7171, -0.5319]) tensor([0.1717, 0.4182, 0.0960, 0.3141]) -Greedy action tensor([-0.1324, -0.0624, 0.9756, 0.1944]) tensor([0.1541, 0.1653, 0.4668, 0.2137]) -Greedy action tensor([ 0.5650, 0.3903, -0.1973, -0.7223]) tensor([0.3872, 0.3252, 0.1807, 0.1069]) -Greedy action tensor([ 0.3120, -1.3452, 0.3685, 0.1743]) tensor([0.3205, 0.0611, 0.3391, 0.2793]) -Greedy action tensor([-0.2617, -0.8924, -0.5260, 0.9022]) tensor([0.1817, 0.0967, 0.1395, 0.5820]) -Greedy action tensor([-0.7580, -2.2547, -0.2025, 0.6741]) tensor([0.1398, 0.0313, 0.2436, 0.5853]) -Greedy action tensor([-0.6611, 0.0140, -0.0241, 0.0179]) tensor([0.1465, 0.2877, 0.2770, 0.2888]) -Greedy action tensor([ 0.6308, -0.4826, -0.8303, -0.1977]) tensor([0.5007, 0.1645, 0.1162, 0.2187]) -Greedy action tensor([-0.0640, 0.1241, -0.3800, 0.0647]) tensor([0.2455, 0.2963, 0.1790, 0.2792]) -Greedy action tensor([ 0.3568, -1.6518, 0.0148, -0.0950]) tensor([0.4031, 0.0541, 0.2863, 0.2565]) -Greedy action tensor([-1.4610, -0.5134, 1.0430, 0.1331]) tensor([0.0482, 0.1244, 0.5899, 0.2375]) -Greedy action tensor([ 1.5081, 0.4139, -1.0018, 0.5432]) tensor([0.5564, 0.1863, 0.0452, 0.2120]) -Greedy action tensor([-0.2259, -0.9468, -0.1908, -0.2545]) tensor([0.2862, 0.1392, 0.2964, 0.2782]) -Greedy action tensor([ 0.7931, -0.2887, 0.6096, 0.6877]) tensor([0.3256, 0.1104, 0.2710, 0.2930]) -Greedy action tensor([-0.1920, -1.5306, -0.2600, 0.0021]) tensor([0.2932, 0.0769, 0.2739, 0.3560]) -Greedy action tensor([ 0.7875, -0.1946, 1.3403, 0.8220]) tensor([0.2411, 0.0903, 0.4191, 0.2495]) -Greedy action tensor([ 0.1884, -0.0599, -0.8885, 0.3109]) tensor([0.3076, 0.2400, 0.1048, 0.3477]) -Greedy action tensor([ 0.8214, -1.1821, -1.0932, 0.5144]) tensor([0.4956, 0.0668, 0.0730, 0.3646]) -Greedy action tensor([ 0.5744, -0.5245, 1.4880, 1.5815]) tensor([0.1523, 0.0508, 0.3798, 0.4171]) -Greedy action tensor([ 0.5868, -0.6518, -0.6674, 0.6192]) tensor([0.3834, 0.1111, 0.1094, 0.3961]) -Greedy action tensor([ 1.2607, -0.6242, 1.0753, 1.3136]) tensor([0.3293, 0.0500, 0.2736, 0.3472]) -Greedy action tensor([-0.3249, 0.5302, 1.5012, -0.6446]) tensor([0.0972, 0.2286, 0.6036, 0.0706]) -Greedy action tensor([-0.2837, 0.3338, -0.2044, -1.2323]) tensor([0.2313, 0.4288, 0.2503, 0.0896]) -Greedy action tensor([-0.0498, -0.2094, -1.6391, 0.3557]) tensor([0.2812, 0.2397, 0.0574, 0.4218]) -Greedy action tensor([ 1.3566, 0.6281, 0.4501, -0.7574]) tensor([0.4982, 0.2404, 0.2012, 0.0602]) -Greedy action tensor([ 0.6451, -0.7110, 0.6438, -0.2714]) tensor([0.3765, 0.0970, 0.3760, 0.1506]) -Greedy action tensor([ 0.4648, -1.8582, 0.2843, 0.7465]) tensor([0.3069, 0.0301, 0.2562, 0.4068]) -Greedy action tensor([-0.5428, 0.2409, -0.3567, -0.1831]) tensor([0.1716, 0.3758, 0.2067, 0.2459]) -Greedy action tensor([-0.4614, -0.6894, -0.2356, 0.3329]) tensor([0.1900, 0.1513, 0.2382, 0.4205]) -Greedy action tensor([ 0.8616, -0.4469, 0.6829, 0.7700]) tensor([0.3312, 0.0895, 0.2770, 0.3022]) -Greedy action tensor([ 0.1690, -0.7347, -0.4879, -0.4694]) tensor([0.4079, 0.1652, 0.2115, 0.2154]) -Greedy action tensor([1.2627, 0.7949, 0.1007, 0.4691]) tensor([0.4182, 0.2619, 0.1308, 0.1891]) -Greedy action tensor([ 0.4357, -0.4848, 0.2070, 0.5126]) tensor([0.3054, 0.1217, 0.2430, 0.3299]) -Greedy action tensor([ 0.8439, -0.6602, -0.5175, -0.0574]) tensor([0.5306, 0.1179, 0.1360, 0.2155]) -Greedy action tensor([-0.1415, 0.6521, 1.1592, -0.3458]) tensor([0.1299, 0.2873, 0.4770, 0.1059]) -Greedy action tensor([ 0.6075, -1.3322, 0.2083, -0.0176]) tensor([0.4256, 0.0612, 0.2855, 0.2278]) -Greedy action tensor([ 0.1529, -0.2990, 0.6189, -1.2259]) tensor([0.2872, 0.1828, 0.4577, 0.0723]) -Greedy action tensor([ 0.7230, -0.1855, -0.3705, 0.5638]) tensor([0.3860, 0.1556, 0.1293, 0.3292]) -Greedy action tensor([ 0.0848, -0.7215, 0.0096, -0.0024]) tensor([0.3039, 0.1357, 0.2819, 0.2785]) -Greedy action tensor([-0.2520, -0.2961, -0.0617, -0.5584]) tensor([0.2563, 0.2452, 0.3099, 0.1886]) -Greedy action tensor([-0.6800, -0.2666, 0.6347, -0.3668]) tensor([0.1315, 0.1989, 0.4897, 0.1799]) -Greedy action tensor([ 0.1159, -0.4071, -0.7711, -0.0327]) tensor([0.3488, 0.2068, 0.1437, 0.3007]) -Greedy action tensor([ 0.9379, -1.0892, 0.6594, 1.2201]) tensor([0.3111, 0.0410, 0.2355, 0.4125]) -Greedy action tensor([-0.2108, 0.0496, 1.0321, 0.1774]) tensor([0.1382, 0.1793, 0.4789, 0.2037]) -Greedy action tensor([ 0.8345, 0.1594, 1.1248, -0.1142]) tensor([0.3093, 0.1575, 0.4135, 0.1198]) -Greedy action tensor([-0.1060, -1.1390, 0.2621, -0.4696]) tensor([0.2860, 0.1018, 0.4133, 0.1988]) -Greedy action tensor([ 0.7201, -0.0553, 0.7938, 1.0091]) tensor([0.2583, 0.1189, 0.2780, 0.3448]) -Greedy action tensor([-0.4638, -0.9359, 0.5582, -0.4171]) tensor([0.1835, 0.1144, 0.5098, 0.1922]) -Greedy action tensor([0.6151, 1.1164, 0.5453, 0.0532]) tensor([0.2407, 0.3975, 0.2245, 0.1373]) -Greedy action tensor([-0.1498, -0.7730, 1.4690, -0.5609]) tensor([0.1380, 0.0740, 0.6965, 0.0915]) -Greedy action tensor([-0.5270, -1.4859, -0.4916, 0.2057]) tensor([0.2222, 0.0852, 0.2302, 0.4624]) -Greedy action tensor([0.8277, 0.2532, 0.1550, 0.0184]) tensor([0.3971, 0.2235, 0.2026, 0.1768]) -Greedy action tensor([ 0.6600, -1.5199, 1.5960, 0.0988]) tensor([0.2362, 0.0267, 0.6023, 0.1348]) -Greedy action tensor([ 0.3069, -0.5884, -0.1721, 0.0100]) tensor([0.3609, 0.1474, 0.2235, 0.2682]) -Greedy action tensor([ 0.2614, -1.7976, 0.4695, 0.3287]) tensor([0.2917, 0.0372, 0.3592, 0.3120]) -Greedy action tensor([-0.8693, -0.0083, -0.1234, 0.6034]) tensor([0.1017, 0.2405, 0.2144, 0.4434]) -Greedy action tensor([-1.4494, 0.0881, 0.3355, 0.1579]) tensor([0.0602, 0.2803, 0.3589, 0.3006]) -Greedy action tensor([-0.9555, -0.4967, 0.5076, 0.8544]) tensor([0.0769, 0.1216, 0.3320, 0.4696]) -Greedy action tensor([-1.9185, -0.3452, 0.6401, -0.1534]) tensor([0.0407, 0.1962, 0.5255, 0.2377]) -Greedy action tensor([-1.5377, 0.4993, 0.3654, -0.0302]) tensor([0.0503, 0.3855, 0.3372, 0.2270]) -Greedy action tensor([-1.0636, 0.2809, 0.3486, 0.2913]) tensor([0.0780, 0.2993, 0.3203, 0.3024]) -Greedy action tensor([-1.8316, -0.4469, 0.6117, -0.1211]) tensor([0.0454, 0.1812, 0.5224, 0.2510]) -Greedy action tensor([-1.2404, -0.3324, 0.7832, 0.8000]) tensor([0.0534, 0.1323, 0.4038, 0.4106]) -Greedy action tensor([-1.7716, -0.4025, 0.5864, -0.0751]) tensor([0.0477, 0.1876, 0.5044, 0.2603]) -Greedy action tensor([-1.9092, -0.4032, 0.6512, -0.1609]) tensor([0.0413, 0.1863, 0.5349, 0.2374]) -Greedy action tensor([-1.8011, -0.4919, 0.5966, -0.0850]) tensor([0.0470, 0.1742, 0.5172, 0.2616]) -Greedy action tensor([-1.8748, -0.3263, 0.6240, -0.1196]) tensor([0.0423, 0.1988, 0.5144, 0.2445]) -Greedy action tensor([-1.9438, -0.4407, 0.6635, -0.1797]) tensor([0.0402, 0.1806, 0.5448, 0.2344]) -Greedy action tensor([-1.8780, -0.4444, 0.6690, -0.1306]) tensor([0.0422, 0.1769, 0.5387, 0.2422]) -Greedy action tensor([-1.1740, 0.4277, 0.3017, -0.1131]) tensor([0.0756, 0.3752, 0.3308, 0.2185]) -Greedy action tensor([-1.8257, -0.4012, 0.6194, -0.1112]) tensor([0.0450, 0.1869, 0.5185, 0.2497]) -Greedy action tensor([-1.6984, -0.5475, 0.9823, 0.5322]) tensor([0.0356, 0.1126, 0.5201, 0.3316]) -Greedy action tensor([-1.8259, -0.4489, 0.6314, -0.0521]) tensor([0.0444, 0.1759, 0.5181, 0.2616]) -Greedy action tensor([-1.8903, -0.3908, 0.6399, -0.1487]) tensor([0.0421, 0.1887, 0.5289, 0.2403]) -Greedy action tensor([-1.8128, -0.4814, 0.5982, -0.1292]) tensor([0.0469, 0.1776, 0.5229, 0.2526]) -Greedy action tensor([-1.8318, -0.4131, 0.6080, -0.1143]) tensor([0.0451, 0.1863, 0.5173, 0.2512]) -Greedy action tensor([-1.8731, -0.4474, 0.6346, -0.1445]) tensor([0.0433, 0.1803, 0.5321, 0.2442]) -Greedy action tensor([-0.2452, 1.0486, 0.0494, 0.1903]) tensor([0.1327, 0.4839, 0.1782, 0.2052]) -Greedy action tensor([-1.5405, 0.2177, 0.5613, -0.5827]) tensor([0.0569, 0.3299, 0.4651, 0.1482]) -Greedy action tensor([-1.8872, -0.4216, 0.6347, -0.1561]) tensor([0.0427, 0.1848, 0.5315, 0.2410]) -Greedy action tensor([-1.3048, -0.1439, 0.5163, 0.2823]) tensor([0.0655, 0.2092, 0.4049, 0.3204]) -Greedy action tensor([-1.9082, -0.4477, 0.6530, -0.1572]) tensor([0.0416, 0.1794, 0.5392, 0.2398]) -Greedy action tensor([-1.8679, -0.4305, 0.6287, -0.1457]) tensor([0.0436, 0.1835, 0.5291, 0.2439]) -Greedy action tensor([-1.2737, 0.0223, 0.4539, -0.7168]) tensor([0.0831, 0.3039, 0.4679, 0.1451]) -Greedy action tensor([-1.9349, -0.4211, 0.6560, -0.1740]) tensor([0.0405, 0.1839, 0.5401, 0.2355]) -Greedy action tensor([-0.8523, 0.3572, 0.2068, -0.2627]) tensor([0.1106, 0.3708, 0.3190, 0.1995]) -Greedy action tensor([-1.6988, -0.3039, 0.6095, -0.0580]) tensor([0.0494, 0.1992, 0.4966, 0.2548]) -Greedy action tensor([-1.6525, -0.2390, 0.6456, 0.0532]) tensor([0.0486, 0.1998, 0.4839, 0.2676]) -Greedy action tensor([-1.8938, -0.4277, 0.6370, -0.1603]) tensor([0.0424, 0.1839, 0.5333, 0.2403]) -Greedy action tensor([-1.0131, -0.5770, 0.2377, 0.2691]) tensor([0.1037, 0.1604, 0.3622, 0.3737]) -Greedy action tensor([-1.7847, -0.4070, 0.8392, 0.2679]) tensor([0.0377, 0.1494, 0.5195, 0.2934]) -Greedy action tensor([-1.3780, -0.0772, 0.2401, 0.1226]) tensor([0.0704, 0.2586, 0.3552, 0.3158]) -Greedy action tensor([-0.1882, 0.0155, 0.5768, 0.5694]) tensor([0.1537, 0.1884, 0.3302, 0.3278]) -Greedy action tensor([-0.0221, -0.4740, 0.9956, 1.5778]) tensor([0.1069, 0.0680, 0.2957, 0.5294]) -Greedy action tensor([-1.7468, -0.4888, 0.7849, 0.2334]) tensor([0.0411, 0.1446, 0.5167, 0.2976]) -Greedy action tensor([-1.9410, -0.4358, 0.6629, -0.1780]) tensor([0.0402, 0.1813, 0.5439, 0.2346]) -Greedy action tensor([-1.8402, -0.4988, 0.6116, -0.1278]) tensor([0.0455, 0.1740, 0.5283, 0.2522]) -Greedy action tensor([-1.8851, -0.4356, 0.6342, -0.1391]) tensor([0.0427, 0.1820, 0.5305, 0.2448]) -Greedy action tensor([-1.9317, -0.4534, 0.6728, -0.1710]) tensor([0.0404, 0.1774, 0.5470, 0.2352]) -Greedy action tensor([-0.3999, 0.8578, -0.0117, 0.2388]) tensor([0.1268, 0.4460, 0.1870, 0.2402]) -Greedy action tensor([-1.3306, 0.5780, 0.2053, -0.0014]) tensor([0.0619, 0.4171, 0.2873, 0.2337]) -Greedy action tensor([-1.9428, -0.4510, 0.6675, -0.1801]) tensor([0.0402, 0.1787, 0.5468, 0.2343]) -Greedy action tensor([-1.7450, -0.4820, 0.5732, -0.1325]) tensor([0.0507, 0.1794, 0.5154, 0.2545]) -Greedy action tensor([-1.7516, -0.4711, 0.6239, -0.2821]) tensor([0.0508, 0.1826, 0.5460, 0.2206]) -Greedy action tensor([-1.7539, -0.4598, 0.5892, -0.1477]) tensor([0.0499, 0.1820, 0.5195, 0.2486]) -Greedy action tensor([-1.9145, -0.4327, 0.6530, -0.1616]) tensor([0.0413, 0.1818, 0.5384, 0.2384]) -Greedy action tensor([-1.8379, -0.3624, 0.6508, -0.1005]) tensor([0.0433, 0.1893, 0.5214, 0.2460]) -Greedy action tensor([-1.8946, -0.4419, 0.6423, -0.1463]) tensor([0.0423, 0.1807, 0.5342, 0.2428]) -Greedy action tensor([-1.8150, -0.3827, 0.6521, -0.0823]) tensor([0.0442, 0.1851, 0.5209, 0.2499]) -Greedy action tensor([-1.8734, -0.3799, 0.6068, -0.1284]) tensor([0.0432, 0.1926, 0.5165, 0.2476]) -Greedy action tensor([-1.6468, -0.2929, 0.5452, 0.0410]) tensor([0.0520, 0.2014, 0.4655, 0.2812]) -Greedy action tensor([-1.8705, -0.4337, 0.6363, -0.1165]) tensor([0.0430, 0.1809, 0.5275, 0.2485]) -Greedy action tensor([-1.8360, -0.1396, 0.5632, -0.1002]) tensor([0.0432, 0.2357, 0.4759, 0.2452]) -Greedy action tensor([-1.9373, -0.4555, 0.6631, -0.1774]) tensor([0.0405, 0.1783, 0.5457, 0.2355]) -Greedy action tensor([-1.4939, -0.9115, 1.3234, 0.9638]) tensor([0.0321, 0.0574, 0.5363, 0.3743]) -Greedy action tensor([-1.8901, -0.4428, 0.6408, -0.1530]) tensor([0.0426, 0.1809, 0.5348, 0.2418]) -Greedy action tensor([-0.7154, 0.1805, 0.6229, 0.7275]) tensor([0.0870, 0.2131, 0.3317, 0.3682]) -Greedy action tensor([-1.4947, 0.6156, 0.3053, 0.1821]) tensor([0.0484, 0.3996, 0.2930, 0.2590]) -Greedy action tensor([-0.6138, 0.5341, 0.1243, -0.0211]) tensor([0.1242, 0.3914, 0.2598, 0.2246]) -Greedy action tensor([-1.8994, -0.4426, 0.6420, -0.1627]) tensor([0.0423, 0.1813, 0.5365, 0.2399]) -Greedy action tensor([-1.8511, -0.3033, 0.6018, -0.1462]) tensor([0.0438, 0.2060, 0.5092, 0.2410]) -Greedy action tensor([-1.9006, -0.4452, 0.6553, -0.1439]) tensor([0.0417, 0.1789, 0.5376, 0.2418]) -Greedy action tensor([-1.2860, -0.5763, 0.3049, 0.2390]) tensor([0.0798, 0.1622, 0.3915, 0.3666]) -Greedy action tensor([-0.8817, -0.1988, 0.2111, -0.0419]) tensor([0.1208, 0.2391, 0.3603, 0.2798]) -Greedy action tensor([-0.5111, 0.2642, 0.6124, 1.4204]) tensor([0.0761, 0.1652, 0.2339, 0.5248]) -Greedy action tensor([-1.6173, -0.3374, 0.4751, 0.0055]) tensor([0.0563, 0.2024, 0.4561, 0.2852]) -Greedy action tensor([-1.9209, -0.4542, 0.6751, -0.1597]) tensor([0.0407, 0.1765, 0.5459, 0.2369]) -Greedy action tensor([-1.7562, -0.2252, 0.5426, -0.1025]) tensor([0.0481, 0.2221, 0.4787, 0.2511]) -Greedy action tensor([-1.7658, -0.4672, 0.6297, 0.0145]) tensor([0.0464, 0.1699, 0.5088, 0.2750]) -Greedy action tensor([-1.7795, -0.4595, 0.5864, -0.1317]) tensor([0.0486, 0.1818, 0.5173, 0.2523]) -Greedy action tensor([-1.8465, -0.3670, 0.6114, -0.1115]) tensor([0.0440, 0.1931, 0.5136, 0.2493]) -Greedy action tensor([-1.6215, -0.1635, 0.5771, 0.0277]) tensor([0.0513, 0.2202, 0.4619, 0.2666]) -Greedy action tensor([-1.9217, -0.3560, 0.6432, -0.1609]) tensor([0.0406, 0.1945, 0.5284, 0.2365]) -Greedy action tensor([-1.8834, -0.2461, 0.6055, -0.1173]) tensor([0.0416, 0.2139, 0.5012, 0.2433]) -Greedy action tensor([-1.7568, -0.3974, 0.7627, 0.1267]) tensor([0.0419, 0.1630, 0.5199, 0.2753]) -Greedy action tensor([-1.9310, -0.4432, 0.6601, -0.1706]) tensor([0.0407, 0.1801, 0.5428, 0.2365]) -Greedy action tensor([-1.8830, -0.4343, 0.6374, -0.1427]) tensor([0.0428, 0.1820, 0.5316, 0.2437]) -Greedy action tensor([ 1.5392, -0.7171, 0.0436, -0.6807]) tensor([0.6957, 0.0729, 0.1559, 0.0756]) -Greedy action tensor([ 0.8157, -0.4670, 0.0401, -0.4572]) tensor([0.4956, 0.1374, 0.2282, 0.1388]) -Greedy action tensor([ 0.3352, -0.0766, -0.1207, -0.2417]) tensor([0.3499, 0.2318, 0.2218, 0.1965]) -Greedy action tensor([ 0.7894, -0.7393, 0.0796, -0.8404]) tensor([0.5251, 0.1138, 0.2582, 0.1029]) -Greedy action tensor([ 0.7602, -0.4798, -0.0299, -0.1986]) tensor([0.4702, 0.1361, 0.2134, 0.1803]) -Greedy action tensor([ 0.6527, -0.1679, -0.0939, 0.0383]) tensor([0.4073, 0.1793, 0.1931, 0.2203]) -Greedy action tensor([ 0.9409, -0.7082, 0.0973, -0.4677]) tensor([0.5357, 0.1030, 0.2304, 0.1310]) -Greedy action tensor([ 0.5856, -0.2009, -0.0471, -0.6221]) tensor([0.4375, 0.1993, 0.2324, 0.1308]) -Greedy action tensor([ 0.6409, -0.3136, -0.1642, -0.3399]) tensor([0.4531, 0.1744, 0.2026, 0.1699]) -Greedy action tensor([ 0.0541, -0.1816, -0.1250, -0.4622]) tensor([0.3103, 0.2451, 0.2594, 0.1852]) -Greedy action tensor([ 0.9730, -0.9187, -0.0974, -0.3765]) tensor([0.5704, 0.0860, 0.1956, 0.1479]) -Greedy action tensor([ 0.8194, -0.5448, 0.0740, -0.2808]) tensor([0.4848, 0.1239, 0.2300, 0.1613]) -Greedy action tensor([ 1.0645, -1.0149, 0.1480, -0.7543]) tensor([0.5927, 0.0741, 0.2370, 0.0962]) -Greedy action tensor([ 0.5419, 0.0325, -0.0474, 0.0798]) tensor([0.3590, 0.2157, 0.1992, 0.2262]) -Greedy action tensor([ 0.6704, -0.2616, -0.0069, -0.1578]) tensor([0.4276, 0.1684, 0.2172, 0.1868]) -Greedy action tensor([ 0.7308, -0.5248, -0.0704, -0.2946]) tensor([0.4779, 0.1362, 0.2145, 0.1714]) -Greedy action tensor([ 0.8769, -0.3043, -0.0654, -0.3278]) tensor([0.5009, 0.1537, 0.1952, 0.1502]) -Greedy action tensor([ 0.2905, 0.4956, -0.3250, 0.0433]) tensor([0.2818, 0.3459, 0.1523, 0.2201]) -Greedy action tensor([ 0.6342, -0.5205, 0.0799, -0.2028]) tensor([0.4306, 0.1357, 0.2473, 0.1864]) -Greedy action tensor([ 0.4190, -0.3298, 0.2561, -0.5450]) tensor([0.3698, 0.1749, 0.3142, 0.1410]) -Greedy action tensor([ 0.2385, 0.1013, 0.1227, -0.6881]) tensor([0.3166, 0.2760, 0.2820, 0.1253]) -Greedy action tensor([ 0.5847, -0.8468, -0.1217, -0.8279]) tensor([0.5061, 0.1209, 0.2497, 0.1232]) -Greedy action tensor([ 0.3286, 0.2506, -0.1057, -0.0005]) tensor([0.3037, 0.2810, 0.1967, 0.2186]) -Greedy action tensor([ 0.7319, -0.4936, -0.1000, -0.3031]) tensor([0.4798, 0.1409, 0.2088, 0.1705]) -Greedy action tensor([ 0.9393, -0.7255, 0.1965, -0.5934]) tensor([0.5316, 0.1006, 0.2529, 0.1148]) -Greedy action tensor([ 1.0858, -1.3075, -0.1313, -0.6338]) tensor([0.6383, 0.0583, 0.1890, 0.1144]) -Greedy action tensor([ 0.6558, -0.0909, -0.0663, -0.4680]) tensor([0.4377, 0.2074, 0.2126, 0.1423]) -Greedy action tensor([ 0.6657, -0.3535, -0.0207, -0.0786]) tensor([0.4275, 0.1543, 0.2152, 0.2031]) -Greedy action tensor([ 0.4915, -0.3527, -0.3291, -0.1573]) tensor([0.4179, 0.1797, 0.1840, 0.2184]) -Greedy action tensor([ 0.5700, -0.3479, -0.1157, -0.2517]) tensor([0.4268, 0.1705, 0.2150, 0.1877]) -Greedy action tensor([ 0.8383, -0.7296, 0.0480, -0.4707]) tensor([0.5175, 0.1079, 0.2348, 0.1398]) -Greedy action tensor([ 0.5786, -0.5097, -0.0624, -0.4844]) tensor([0.4527, 0.1525, 0.2385, 0.1564]) -Greedy action tensor([ 0.8422, -0.4130, 0.1441, -0.3966]) tensor([0.4826, 0.1375, 0.2401, 0.1398]) -Greedy action tensor([ 0.6496, -0.5000, 0.1357, -0.1935]) tensor([0.4264, 0.1351, 0.2550, 0.1835]) -Greedy action tensor([ 0.6141, -0.4640, -0.2167, -0.3154]) tensor([0.4607, 0.1568, 0.2007, 0.1819]) -Greedy action tensor([ 0.6268, -0.4317, -0.0832, -0.2852]) tensor([0.4464, 0.1549, 0.2194, 0.1793]) -Greedy action tensor([ 0.2524, 0.3067, -0.1020, 0.0955]) tensor([0.2768, 0.2923, 0.1942, 0.2366]) -Greedy action tensor([ 0.8842, -0.5177, -0.1511, -0.3562]) tensor([0.5289, 0.1302, 0.1878, 0.1530]) -Greedy action tensor([ 0.9306, -0.4348, 0.0974, -0.3486]) tensor([0.5081, 0.1297, 0.2208, 0.1414]) -Greedy action tensor([ 0.5306, -0.1152, 0.0075, -0.0931]) tensor([0.3769, 0.1976, 0.2234, 0.2020]) -Greedy action tensor([ 0.9304, -0.6539, -0.0417, -0.3172]) tensor([0.5346, 0.1096, 0.2022, 0.1535]) -Greedy action tensor([ 1.3676, -0.5636, -0.1553, -0.5610]) tensor([0.6629, 0.0961, 0.1446, 0.0964]) -Greedy action tensor([ 0.4508, 0.2382, 0.0741, -0.2129]) tensor([0.3323, 0.2686, 0.2280, 0.1711]) -Greedy action tensor([ 0.4251, -0.2249, 0.0747, -0.2190]) tensor([0.3634, 0.1897, 0.2560, 0.1909]) -Greedy action tensor([ 0.5926, -0.3342, -0.1235, -0.1114]) tensor([0.4203, 0.1664, 0.2054, 0.2079]) -Greedy action tensor([ 0.7516, -0.2137, -0.1076, -0.2759]) tensor([0.4625, 0.1761, 0.1959, 0.1655]) -Greedy action tensor([ 0.6803, -0.5680, 0.0190, -0.2780]) tensor([0.4573, 0.1312, 0.2360, 0.1754]) -Greedy action tensor([ 0.5884, -0.5533, -0.1247, -0.2040]) tensor([0.4421, 0.1411, 0.2167, 0.2001]) -Greedy action tensor([ 0.5659, -0.3311, -0.2822, -0.0777]) tensor([0.4235, 0.1727, 0.1813, 0.2225]) -Greedy action tensor([ 0.5835, -0.1592, -0.0123, -0.1984]) tensor([0.4025, 0.1915, 0.2218, 0.1842]) -Greedy action tensor([ 0.5548, -0.3216, 0.0189, -0.1296]) tensor([0.3991, 0.1661, 0.2335, 0.2013]) -Greedy action tensor([ 0.8680, -1.0521, 0.3121, -0.4468]) tensor([0.5028, 0.0737, 0.2884, 0.1350]) -Greedy action tensor([ 0.8136, -0.4885, 0.0009, -0.4182]) tensor([0.4982, 0.1355, 0.2210, 0.1453]) -Greedy action tensor([ 0.4597, -0.3712, -0.1346, 0.0109]) tensor([0.3808, 0.1659, 0.2102, 0.2431]) -Greedy action tensor([ 1.1322, -0.6730, 0.1483, -0.3755]) tensor([0.5683, 0.0934, 0.2124, 0.1258]) -Greedy action tensor([ 0.6246, -0.3685, -0.2274, -0.2988]) tensor([0.4557, 0.1688, 0.1944, 0.1810]) -Greedy action tensor([ 0.7760, -0.4918, 0.0508, -0.3326]) tensor([0.4772, 0.1343, 0.2311, 0.1575]) -Greedy action tensor([ 0.6597, -0.1392, -0.2395, -0.2319]) tensor([0.4412, 0.1984, 0.1795, 0.1809]) -Greedy action tensor([ 0.7264, -0.2685, -0.0512, -0.0644]) tensor([0.4381, 0.1620, 0.2013, 0.1987]) -Greedy action tensor([ 0.5790, -0.9538, -0.0923, -0.3574]) tensor([0.4719, 0.1019, 0.2412, 0.1850]) -Greedy action tensor([ 0.9060, -0.8314, 0.1416, -0.3133]) tensor([0.5162, 0.0908, 0.2404, 0.1525]) -Greedy action tensor([ 0.8565, -0.6813, 0.1452, -0.4149]) tensor([0.5034, 0.1082, 0.2472, 0.1412]) -Greedy action tensor([ 0.1953, -0.1051, -0.0192, -0.3003]) tensor([0.3168, 0.2346, 0.2556, 0.1930]) -Greedy action tensor([ 0.8571, -0.2501, -0.0215, -0.3511]) tensor([0.4891, 0.1616, 0.2032, 0.1461]) -Greedy action tensor([ 0.2289, -0.1410, -0.0296, -0.0617]) tensor([0.3115, 0.2151, 0.2405, 0.2329]) -Greedy action tensor([ 0.5351, -0.3721, 0.0129, -0.2063]) tensor([0.4043, 0.1632, 0.2399, 0.1926]) -Greedy action tensor([ 0.4353, -0.5190, -0.1771, -0.1580]) tensor([0.4033, 0.1553, 0.2186, 0.2228]) -Greedy action tensor([ 0.5984, -0.4368, 0.0249, -0.3597]) tensor([0.4344, 0.1543, 0.2448, 0.1666]) -Greedy action tensor([ 0.2864, -0.0794, -0.0946, -0.1579]) tensor([0.3313, 0.2298, 0.2264, 0.2125]) -Greedy action tensor([ 0.1857, -0.1702, 0.1819, -0.3561]) tensor([0.3050, 0.2137, 0.3039, 0.1774]) -Greedy action tensor([ 1.1082, -0.5992, -0.0296, -0.3493]) tensor([0.5765, 0.1045, 0.1848, 0.1342]) -Greedy action tensor([ 0.6806, -0.4616, -0.0799, -0.3345]) tensor([0.4653, 0.1485, 0.2175, 0.1686]) -Greedy action tensor([ 0.9248, -0.7431, -0.0037, -0.5454]) tensor([0.5514, 0.1040, 0.2179, 0.1267]) -Greedy action tensor([ 0.5691, -0.2860, 0.0073, -0.3621]) tensor([0.4185, 0.1780, 0.2386, 0.1649]) -Greedy action tensor([ 0.4591, 0.2301, -0.0623, 0.1010]) tensor([0.3238, 0.2575, 0.1922, 0.2264]) -Greedy action tensor([ 0.9429, -0.8334, -0.0161, -0.4102]) tensor([0.5522, 0.0935, 0.2116, 0.1427]) -Greedy action tensor([ 0.8319, -0.5869, -0.1276, -0.2808]) tensor([0.5118, 0.1239, 0.1961, 0.1682]) -Greedy action tensor([ 0.8644, -0.2675, 0.0068, -0.4924]) tensor([0.4990, 0.1609, 0.2117, 0.1285]) -Greedy action tensor([ 0.3938, -0.4842, 0.0702, -0.4654]) tensor([0.3902, 0.1622, 0.2823, 0.1653]) -Greedy action tensor([ 0.8041, -0.3264, 0.2645, -0.2934]) tensor([0.4465, 0.1442, 0.2603, 0.1490]) -Greedy action tensor([ 0.4044, -0.2034, 0.0085, -0.1388]) tensor([0.3573, 0.1946, 0.2405, 0.2076]) -Greedy action tensor([ 1.2972, -0.4860, -0.2441, 0.3538]) tensor([0.5645, 0.0949, 0.1209, 0.2197]) -Greedy action tensor([ 1.1992, 0.1628, -0.4052, 0.4331]) tensor([0.4949, 0.1756, 0.0995, 0.2300]) -Greedy action tensor([ 1.5714, -0.7914, -0.1723, 0.1338]) tensor([0.6638, 0.0625, 0.1161, 0.1577]) -Greedy action tensor([ 1.0115, -0.3645, -0.1718, 0.3364]) tensor([0.4836, 0.1221, 0.1481, 0.2462]) -Greedy action tensor([ 1.9375, -0.9283, -0.2821, 0.7435]) tensor([0.6809, 0.0388, 0.0740, 0.2063]) -Greedy action tensor([ 1.2646, -0.2824, -0.3244, 0.4288]) tensor([0.5404, 0.1150, 0.1103, 0.2343]) -Greedy action tensor([ 1.7247, -0.4902, -0.8422, 0.3005]) tensor([0.7010, 0.0765, 0.0538, 0.1687]) -Greedy action tensor([ 1.3403, -0.0307, -0.8267, 0.4138]) tensor([0.5668, 0.1439, 0.0649, 0.2244]) -Greedy action tensor([ 1.5272, -0.4161, -0.6236, 0.1331]) tensor([0.6633, 0.0950, 0.0772, 0.1645]) -Greedy action tensor([ 1.9542, -0.8164, -0.6590, 1.2533]) tensor([0.6127, 0.0384, 0.0449, 0.3040]) -Greedy action tensor([ 1.3048, -0.4528, -0.6515, 0.6918]) tensor([0.5389, 0.0929, 0.0762, 0.2920]) -Greedy action tensor([ 1.3635, -0.3946, -0.5015, 0.3317]) tensor([0.5940, 0.1024, 0.0920, 0.2117]) -Greedy action tensor([ 1.9339, -0.8774, -0.3563, 0.3325]) tensor([0.7337, 0.0441, 0.0743, 0.1479]) -Greedy action tensor([ 1.5922, -0.7781, -0.6528, 0.5498]) tensor([0.6443, 0.0602, 0.0683, 0.2272]) -Greedy action tensor([ 1.4566, -0.3356, -0.2745, 0.4549]) tensor([0.5845, 0.0974, 0.1035, 0.2146]) -Greedy action tensor([ 1.2681, -0.0817, -0.2223, 0.0369]) tensor([0.5629, 0.1459, 0.1268, 0.1643]) -Greedy action tensor([ 1.3466, -0.2414, -0.4095, 0.5390]) tensor([0.5485, 0.1121, 0.0947, 0.2446]) -Greedy action tensor([ 1.4276, -0.2229, -0.3115, 0.0373]) tensor([0.6186, 0.1187, 0.1087, 0.1540]) -Greedy action tensor([ 1.2621, -0.2672, 0.0950, 0.0831]) tensor([0.5448, 0.1180, 0.1696, 0.1676]) -Greedy action tensor([ 1.8972, -0.5163, -0.4010, 0.4900]) tensor([0.6970, 0.0624, 0.0700, 0.1706]) -Greedy action tensor([ 1.2350, -0.2518, -0.0896, 0.0021]) tensor([0.5607, 0.1268, 0.1491, 0.1634]) -Greedy action tensor([1.9424, 0.1373, 0.0858, 0.1691]) tensor([0.6710, 0.1103, 0.1048, 0.1139]) -Greedy action tensor([ 1.7750, -0.3169, -0.3907, 0.3119]) tensor([0.6804, 0.0840, 0.0780, 0.1575]) -Greedy action tensor([ 1.0369, -0.6508, -0.0203, 0.1694]) tensor([0.5122, 0.0947, 0.1780, 0.2151]) -Greedy action tensor([ 1.9863, -0.9641, -0.3762, 0.2843]) tensor([0.7526, 0.0394, 0.0709, 0.1372]) -Greedy action tensor([ 1.4321, 0.3124, -0.3960, -0.0653]) tensor([0.5845, 0.1908, 0.0939, 0.1308]) -Greedy action tensor([ 1.1915, -0.5137, -0.1466, 0.0582]) tensor([0.5662, 0.1029, 0.1486, 0.1823]) -Greedy action tensor([ 1.5482, -0.9145, -0.3652, -0.0600]) tensor([0.6978, 0.0595, 0.1030, 0.1397]) -Greedy action tensor([ 1.3702, -0.1368, -0.2547, -0.0406]) tensor([0.6015, 0.1333, 0.1185, 0.1467]) -Greedy action tensor([ 1.6967, -0.8313, -0.6580, 0.9295]) tensor([0.6101, 0.0487, 0.0579, 0.2833]) -Greedy action tensor([ 1.2963, -0.5834, 0.1549, -0.2755]) tensor([0.5954, 0.0909, 0.1901, 0.1236]) -Greedy action tensor([ 0.8437, -0.2628, -0.3499, 0.2503]) tensor([0.4574, 0.1513, 0.1386, 0.2527]) -Greedy action tensor([ 1.2893, -0.2066, -0.1505, 0.1817]) tensor([0.5582, 0.1251, 0.1323, 0.1844]) -Greedy action tensor([ 1.1861, -0.4392, -0.9111, 0.2839]) tensor([0.5796, 0.1141, 0.0712, 0.2351]) -Greedy action tensor([ 1.6860, -0.1648, -0.5102, 0.3190]) tensor([0.6565, 0.1031, 0.0730, 0.1673]) -Greedy action tensor([ 1.3323, -0.2743, -0.8095, 0.0689]) tensor([0.6247, 0.1253, 0.0734, 0.1766]) -Greedy action tensor([ 1.3991, -0.5323, -0.4477, 0.3441]) tensor([0.6057, 0.0878, 0.0956, 0.2109]) -Greedy action tensor([ 1.2207, -0.5733, -0.0930, 0.5781]) tensor([0.5099, 0.0848, 0.1371, 0.2682]) -Greedy action tensor([ 1.6905, -0.8351, -0.3111, 0.2158]) tensor([0.6925, 0.0554, 0.0936, 0.1585]) -Greedy action tensor([ 1.7447, -1.5840, 0.0151, -0.0194]) tensor([0.7223, 0.0259, 0.1281, 0.1238]) -Greedy action tensor([ 1.6677, -0.5313, -0.3438, 0.3329]) tensor([0.6632, 0.0736, 0.0887, 0.1745]) -Greedy action tensor([ 1.2023, -0.3407, -0.3827, 0.2504]) tensor([0.5541, 0.1184, 0.1136, 0.2139]) -Greedy action tensor([ 1.9452, -1.2296, -0.0696, 0.6380]) tensor([0.6917, 0.0289, 0.0922, 0.1872]) -Greedy action tensor([ 1.9056, -0.9387, 0.0365, 0.4829]) tensor([0.6880, 0.0400, 0.1061, 0.1658]) -Greedy action tensor([ 1.8933, -0.8950, -0.4040, 0.2191]) tensor([0.7410, 0.0456, 0.0745, 0.1389]) -Greedy action tensor([ 1.3781, -0.1809, -0.6072, 0.0905]) tensor([0.6159, 0.1296, 0.0846, 0.1700]) -Greedy action tensor([ 0.5158, -0.0062, 0.1043, 0.0669]) tensor([0.3455, 0.2050, 0.2290, 0.2205]) -Greedy action tensor([ 2.0769, -0.7251, -0.6087, 0.7885]) tensor([0.7120, 0.0432, 0.0485, 0.1963]) -Greedy action tensor([ 1.8422, -1.2118, -0.2187, 1.0365]) tensor([0.6168, 0.0291, 0.0785, 0.2756]) -Greedy action tensor([ 1.0253, -0.1385, -1.0518, 0.4540]) tensor([0.4994, 0.1560, 0.0626, 0.2821]) -Greedy action tensor([ 1.2865, -0.1718, -0.3444, 0.0140]) tensor([0.5853, 0.1362, 0.1146, 0.1640]) -Greedy action tensor([ 1.9068, -0.3015, -0.4527, 0.3225]) tensor([0.7095, 0.0780, 0.0670, 0.1455]) -Greedy action tensor([ 1.7421, -0.7308, -0.2440, 0.3213]) tensor([0.6835, 0.0576, 0.0938, 0.1651]) -Greedy action tensor([ 1.5143, -0.4405, -0.4903, 0.0290]) tensor([0.6654, 0.0942, 0.0897, 0.1507]) -Greedy action tensor([ 1.7680, -0.7194, -0.4576, 0.4587]) tensor([0.6844, 0.0569, 0.0739, 0.1848]) -Greedy action tensor([ 2.1254, -0.2455, -0.6214, 0.6901]) tensor([0.7165, 0.0669, 0.0460, 0.1706]) -Greedy action tensor([ 1.3289, -0.3699, -0.9216, 0.5690]) tensor([0.5695, 0.1042, 0.0600, 0.2664]) -Greedy action tensor([ 1.6554, -0.1759, -0.2925, 0.0059]) tensor([0.6689, 0.1072, 0.0954, 0.1285]) -Greedy action tensor([ 1.3146, -0.1247, -0.3583, 0.0508]) tensor([0.5857, 0.1389, 0.1099, 0.1655]) -Greedy action tensor([ 1.4134, -0.4110, -0.4425, 0.2657]) tensor([0.6116, 0.0987, 0.0956, 0.1941]) -Greedy action tensor([ 1.5848, -0.5702, -0.4004, 0.2963]) tensor([0.6540, 0.0758, 0.0898, 0.1803]) -Greedy action tensor([ 0.9741, -0.5102, -0.2967, 0.5910]) tensor([0.4568, 0.1035, 0.1282, 0.3114]) -Greedy action tensor([ 1.4980, -0.0887, -0.5102, 0.4607]) tensor([0.5906, 0.1208, 0.0793, 0.2093]) -Greedy action tensor([ 1.2189, -0.7188, -0.0449, 0.0903]) tensor([0.5714, 0.0823, 0.1615, 0.1848]) -Greedy action tensor([ 1.3282, -0.2791, -0.5432, 0.3903]) tensor([0.5728, 0.1148, 0.0882, 0.2242]) -Greedy action tensor([ 1.2052, -0.0297, -0.6933, -0.2134]) tensor([0.5943, 0.1729, 0.0890, 0.1438]) -Greedy action tensor([ 1.3494, -0.5222, -0.3503, 0.1692]) tensor([0.6083, 0.0936, 0.1112, 0.1869]) -Greedy action tensor([ 2.0598, -0.8577, -0.1314, 0.6850]) tensor([0.7048, 0.0381, 0.0788, 0.1783]) -Greedy action tensor([ 1.6918, -0.9646, -0.7751, 0.1070]) tensor([0.7353, 0.0516, 0.0624, 0.1507]) -Greedy action tensor([ 1.1615, -0.6954, -0.1621, 0.0173]) tensor([0.5744, 0.0897, 0.1529, 0.1829]) -Greedy action tensor([ 1.2338, -0.4907, -0.3508, 0.1891]) tensor([0.5763, 0.1027, 0.1182, 0.2027]) -Greedy action tensor([ 1.8237, -1.1080, -0.2552, 0.5277]) tensor([0.6887, 0.0367, 0.0861, 0.1884]) -Greedy action tensor([ 0.7551, -0.0049, -0.1558, -0.3032]) tensor([0.4511, 0.2109, 0.1814, 0.1565]) -Greedy action tensor([ 1.8509, -1.1594, -0.3652, 0.6485]) tensor([0.6855, 0.0338, 0.0747, 0.2060]) -Greedy action tensor([ 1.8808, -0.9399, -0.2729, 0.5187]) tensor([0.6985, 0.0416, 0.0811, 0.1789]) -Greedy action tensor([ 1.6123, -0.3092, -0.4087, 0.4911]) tensor([0.6231, 0.0912, 0.0826, 0.2031]) -Greedy action tensor([ 1.3868, -0.5917, -0.0226, -0.0272]) tensor([0.6151, 0.0851, 0.1503, 0.1496]) -Greedy action tensor([ 1.4057, -0.6115, -0.2005, 0.1389]) tensor([0.6190, 0.0823, 0.1242, 0.1744]) -Greedy action tensor([ 2.0720, -0.7577, -0.5019, 0.4068]) tensor([0.7550, 0.0446, 0.0576, 0.1428]) -Greedy action tensor([ 0.7542, -0.1138, -0.5302, 0.7157]) tensor([0.3761, 0.1579, 0.1041, 0.3619]) -Greedy action tensor([ 2.2090, -0.5325, -0.3701, 0.4392]) tensor([0.7630, 0.0492, 0.0579, 0.1300]) -Greedy action tensor([-0.5001, -0.1812, 1.3193, -0.9066]) tensor([0.1086, 0.1494, 0.6698, 0.0723]) -Greedy action tensor([ 0.3240, -0.4081, -1.1044, -0.3273]) tensor([0.4460, 0.2145, 0.1069, 0.2326]) -Greedy action tensor([ 1.9608, -0.3657, 1.2297, 1.2076]) tensor([0.4878, 0.0476, 0.2348, 0.2297]) -Greedy action tensor([-1.7749, -0.4548, 0.5942, 0.0861]) tensor([0.0457, 0.1713, 0.4889, 0.2941]) -Greedy action tensor([ 0.2838, -1.1088, 1.3658, -0.2808]) tensor([0.2097, 0.0521, 0.6189, 0.1193]) -Greedy action tensor([ 1.4443, -0.3449, 1.5669, 0.9080]) tensor([0.3469, 0.0580, 0.3922, 0.2029]) -Greedy action tensor([ 0.7975, -0.0159, -0.0945, 0.7622]) tensor([0.3548, 0.1573, 0.1454, 0.3425]) -Greedy action tensor([-0.7436, -0.5847, 0.9640, -0.4286]) tensor([0.1104, 0.1294, 0.6089, 0.1513]) -Greedy action tensor([-0.5795, -0.2751, 0.6235, -0.8491]) tensor([0.1551, 0.2102, 0.5163, 0.1184]) -Greedy action tensor([ 0.8115, -1.3010, 2.3234, 0.0357]) tensor([0.1635, 0.0198, 0.7415, 0.0753]) -Greedy action tensor([ 0.5516, -0.8729, -0.0730, 0.2678]) tensor([0.3954, 0.0951, 0.2117, 0.2977]) -Greedy action tensor([0.6111, 0.1737, 0.8693, 0.2528]) tensor([0.2748, 0.1774, 0.3557, 0.1920]) -Greedy action tensor([ 0.3458, -2.2836, 0.6621, 1.3119]) tensor([0.1972, 0.0142, 0.2705, 0.5181]) -Greedy action tensor([ 0.0238, -0.2338, 0.1128, -0.3791]) tensor([0.2829, 0.2187, 0.3093, 0.1891]) -Greedy action tensor([ 0.1521, -0.5259, 0.1646, -0.6691]) tensor([0.3378, 0.1715, 0.3421, 0.1486]) -Greedy action tensor([-1.3870, -1.1546, -0.6217, 0.7639]) tensor([0.0769, 0.0970, 0.1653, 0.6608]) -Greedy action tensor([ 0.3562, 0.9211, -0.0066, -0.0813]) tensor([0.2439, 0.4290, 0.1697, 0.1574]) -Greedy action tensor([ 0.3662, 0.4981, -0.4540, -0.2987]) tensor([0.3230, 0.3686, 0.1422, 0.1661]) -Greedy action tensor([-0.0880, -1.8369, -0.3720, -0.1593]) tensor([0.3499, 0.0609, 0.2634, 0.3258]) -Greedy action tensor([-0.8202, -1.0895, 0.3812, -0.5475]) tensor([0.1562, 0.1193, 0.5193, 0.2052]) -Greedy action tensor([-0.3589, -0.6942, -0.6011, -0.2723]) tensor([0.2785, 0.1992, 0.2186, 0.3037]) -Greedy action tensor([ 0.4349, -1.5726, -0.6859, -0.5763]) tensor([0.5482, 0.0736, 0.1787, 0.1994]) -Greedy action tensor([-0.1211, 0.5784, 0.0306, -0.3762]) tensor([0.2020, 0.4065, 0.2350, 0.1565]) -Greedy action tensor([ 0.9379, 0.7039, 0.7777, -0.6982]) tensor([0.3524, 0.2788, 0.3002, 0.0686]) -Greedy action tensor([ 0.8092, -0.3645, -0.2451, -0.0128]) tensor([0.4768, 0.1474, 0.1661, 0.2096]) -Greedy action tensor([ 0.7425, -0.7149, -0.2017, 0.3383]) tensor([0.4368, 0.1017, 0.1699, 0.2916]) -Greedy action tensor([1.2147, 0.2666, 0.4178, 0.0629]) tensor([0.4642, 0.1799, 0.2092, 0.1467]) -Greedy action tensor([ 0.4508, -1.4362, -0.7587, -0.1466]) tensor([0.5000, 0.0758, 0.1492, 0.2751]) -Greedy action tensor([-0.1555, -0.5035, 1.2438, -0.5323]) tensor([0.1552, 0.1096, 0.6288, 0.1065]) -Greedy action tensor([-0.7060, -0.4289, 0.5895, -0.9092]) tensor([0.1473, 0.1944, 0.5381, 0.1202]) -Greedy action tensor([ 0.7505, -0.5815, 0.4245, -0.1621]) tensor([0.4189, 0.1106, 0.3024, 0.1682]) -Greedy action tensor([-0.3647, -1.7655, 0.9001, 1.0732]) tensor([0.1111, 0.0274, 0.3936, 0.4680]) -Greedy action tensor([-0.5418, -0.8720, 0.6470, -0.6019]) tensor([0.1683, 0.1209, 0.5524, 0.1584]) -Greedy action tensor([-0.8373, -0.5953, 0.8685, -0.7609]) tensor([0.1129, 0.1438, 0.6215, 0.1218]) -Greedy action tensor([ 0.2542, -0.4634, 1.2249, -0.6627]) tensor([0.2209, 0.1078, 0.5830, 0.0883]) -Greedy action tensor([ 0.8627, -0.2220, 0.1072, 0.3393]) tensor([0.4166, 0.1408, 0.1957, 0.2469]) -Greedy action tensor([-1.1780, -0.1775, 0.6587, -0.2445]) tensor([0.0798, 0.2169, 0.5005, 0.2028]) -Greedy action tensor([1.7068, 0.5436, 0.7124, 0.8970]) tensor([0.4701, 0.1469, 0.1739, 0.2092]) -Greedy action tensor([-0.0587, 0.1434, 0.5413, -0.6126]) tensor([0.2164, 0.2649, 0.3943, 0.1244]) -Greedy action tensor([ 0.2139, -0.5841, -0.9724, -0.2951]) tensor([0.4243, 0.1910, 0.1296, 0.2551]) -Greedy action tensor([ 0.7703, -1.0442, 1.0505, -0.4826]) tensor([0.3607, 0.0588, 0.4774, 0.1031]) -Greedy action tensor([-0.2911, 0.7090, 0.3539, -0.0941]) tensor([0.1461, 0.3973, 0.2786, 0.1780]) -Greedy action tensor([ 0.4325, -0.3753, 0.5993, 0.6478]) tensor([0.2586, 0.1153, 0.3055, 0.3207]) -Greedy action tensor([-0.8432, -0.6803, -0.0796, 0.4471]) tensor([0.1257, 0.1479, 0.2697, 0.4567]) -Greedy action tensor([-0.5841, -0.3299, -1.0590, -0.8353]) tensor([0.2711, 0.3495, 0.1686, 0.2109]) -Greedy action tensor([ 0.1385, -1.8645, 0.2510, 0.1765]) tensor([0.3037, 0.0410, 0.3398, 0.3155]) -Greedy action tensor([-0.4652, 0.1464, -0.1521, -0.3371]) tensor([0.1870, 0.3447, 0.2557, 0.2126]) -Greedy action tensor([-0.4273, 0.0082, 2.0481, -0.4412]) tensor([0.0649, 0.1003, 0.7709, 0.0640]) -Greedy action tensor([-0.0889, -0.0942, 0.5648, -0.7495]) tensor([0.2255, 0.2243, 0.4336, 0.1165]) -Greedy action tensor([-0.1399, 0.1562, 1.3845, -0.7014]) tensor([0.1332, 0.1791, 0.6117, 0.0760]) -Greedy action tensor([ 1.2733, -0.8095, 2.5692, 0.0326]) tensor([0.1973, 0.0246, 0.7210, 0.0571]) -Greedy action tensor([ 0.3740, -0.0159, 0.1951, -0.6431]) tensor([0.3478, 0.2355, 0.2908, 0.1258]) -Greedy action tensor([-0.0043, -0.4199, 0.7626, -1.0774]) tensor([0.2407, 0.1588, 0.5182, 0.0823]) -Greedy action tensor([1.2617, 0.0174, 0.0606, 0.2825]) tensor([0.5090, 0.1467, 0.1531, 0.1912]) -Greedy action tensor([-1.6480, -0.1244, 0.3677, -1.2828]) tensor([0.0688, 0.3157, 0.5164, 0.0991]) -Greedy action tensor([ 0.4645, -0.3155, -0.1405, -0.3089]) tensor([0.4055, 0.1859, 0.2215, 0.1871]) -Greedy action tensor([ 0.9345, -0.6977, 0.4648, 0.5491]) tensor([0.3999, 0.0782, 0.2500, 0.2720]) -Greedy action tensor([ 0.7831, -0.2847, -0.3931, -0.6003]) tensor([0.5255, 0.1806, 0.1621, 0.1317]) -Greedy action tensor([-1.2580, -1.0483, 0.2550, -0.7641]) tensor([0.1189, 0.1466, 0.5397, 0.1948]) -Greedy action tensor([ 0.0392, -0.4013, -1.0613, -0.1857]) tensor([0.3604, 0.2320, 0.1199, 0.2878]) -Greedy action tensor([-0.7354, -1.0732, 1.3293, 0.3591]) tensor([0.0795, 0.0567, 0.6264, 0.2374]) -Greedy action tensor([-0.4123, 0.5359, -0.2913, 0.0023]) tensor([0.1607, 0.4147, 0.1813, 0.2432]) -Greedy action tensor([-0.1992, -0.3016, -0.5485, 0.4522]) tensor([0.2209, 0.1994, 0.1558, 0.4238]) -Greedy action tensor([-0.6531, 0.6495, 0.0639, -0.2752]) tensor([0.1222, 0.4494, 0.2502, 0.1783]) -Greedy action tensor([ 0.7239, 0.2883, 0.7465, -0.2382]) tensor([0.3277, 0.2120, 0.3352, 0.1252]) -Greedy action tensor([-0.8188, -1.0430, -0.3702, -0.2412]) tensor([0.1943, 0.1553, 0.3043, 0.3462]) -Greedy action tensor([0.5698, 1.0460, 2.2852, 0.5713]) tensor([0.1090, 0.1756, 0.6062, 0.1092]) -Greedy action tensor([ 0.7390, 0.7654, -0.5549, -0.1215]) tensor([0.3671, 0.3769, 0.1007, 0.1553]) -Greedy action tensor([ 1.7248, -0.2355, -0.1619, 0.9465]) tensor([0.5709, 0.0804, 0.0865, 0.2622]) -Greedy action tensor([-0.0053, -1.1485, 1.7378, -0.7874]) tensor([0.1335, 0.0426, 0.7629, 0.0611]) -Greedy action tensor([ 0.1954, 0.3421, 0.6110, -0.6415]) tensor([0.2435, 0.2820, 0.3690, 0.1055]) -Greedy action tensor([ 0.6247, -1.7695, -0.2927, 0.1963]) tensor([0.4668, 0.0426, 0.1865, 0.3041]) -Greedy action tensor([-0.6522, 0.1810, 0.8275, -1.2635]) tensor([0.1214, 0.2794, 0.5333, 0.0659]) -Greedy action tensor([ 0.6730, -0.4372, -0.3959, 0.4367]) tensor([0.4061, 0.1338, 0.1394, 0.3206]) -Greedy action tensor([-0.8662, -0.5984, -0.6882, 0.6355]) tensor([0.1251, 0.1636, 0.1495, 0.5618]) -Greedy action tensor([-1.2858, -0.6868, 0.6897, -1.2375]) tensor([0.0902, 0.1643, 0.6507, 0.0947]) -Greedy action tensor([1.4837, 0.0551, 1.1029, 0.4489]) tensor([0.4389, 0.1052, 0.2999, 0.1559]) -Greedy action tensor([-0.2264, -1.3602, -0.1623, 0.9792]) tensor([0.1746, 0.0562, 0.1862, 0.5830]) -Greedy action tensor([ 1.0990, -1.0993, 0.8572, -0.9126]) tensor([0.4926, 0.0547, 0.3868, 0.0659]) -Greedy action tensor([ 0.0126, -1.4643, -0.2472, -0.4388]) tensor([0.3793, 0.0866, 0.2925, 0.2415]) -Greedy action tensor([ 0.2635, -0.1340, -0.3129, -0.0724]) tensor([0.3391, 0.2279, 0.1906, 0.2424]) -Greedy action tensor([ 1.1089, 0.5872, 1.1755, -0.1257]) tensor([0.3386, 0.2010, 0.3619, 0.0985]) -Greedy action tensor([ 0.7577, -0.5069, -0.0338, -0.3613]) tensor([0.4849, 0.1369, 0.2198, 0.1584]) -Greedy action tensor([ 0.8261, -0.4149, 0.0444, -0.3726]) tensor([0.4882, 0.1411, 0.2234, 0.1472]) -Greedy action tensor([ 0.9127, -0.8735, -0.0142, -0.4252]) tensor([0.5477, 0.0918, 0.2168, 0.1437]) -Greedy action tensor([ 0.7186, -0.1837, -0.1298, -0.1680]) tensor([0.4453, 0.1806, 0.1906, 0.1835]) -Greedy action tensor([ 0.7595, -0.4031, -0.0588, -0.1638]) tensor([0.4649, 0.1454, 0.2051, 0.1847]) -Greedy action tensor([ 0.9302, -0.5130, -0.1203, -0.2894]) tensor([0.5316, 0.1255, 0.1859, 0.1570]) -Greedy action tensor([ 0.7533, -0.6895, -0.0278, -0.1301]) tensor([0.4745, 0.1121, 0.2173, 0.1961]) -Greedy action tensor([ 0.8256, -0.2746, -0.0398, -0.2182]) tensor([0.4749, 0.1580, 0.1999, 0.1672]) -Greedy action tensor([ 0.7507, -0.3326, -0.0294, -0.1375]) tensor([0.4529, 0.1533, 0.2076, 0.1863]) -Greedy action tensor([ 0.7873, -0.6439, -0.0669, -0.5431]) tensor([0.5184, 0.1239, 0.2206, 0.1370]) -Greedy action tensor([ 0.8147, -0.2467, 0.0869, -0.2927]) tensor([0.4631, 0.1602, 0.2237, 0.1530]) -Greedy action tensor([ 0.4919, 0.1723, -0.0935, -0.0378]) tensor([0.3482, 0.2529, 0.1939, 0.2050]) -Greedy action tensor([ 0.6207, -0.2590, -0.1386, -0.0486]) tensor([0.4175, 0.1732, 0.1954, 0.2138]) -Greedy action tensor([ 0.5492, -0.4613, -0.0548, -0.1346]) tensor([0.4140, 0.1507, 0.2263, 0.2089]) -Greedy action tensor([ 0.3449, -0.3526, -0.1342, -0.3458]) tensor([0.3819, 0.1901, 0.2365, 0.1914]) -Greedy action tensor([ 1.0054, -0.2790, -0.1433, -0.4171]) tensor([0.5450, 0.1509, 0.1728, 0.1314]) -Greedy action tensor([ 0.2482, -0.1281, -0.0006, -0.0983]) tensor([0.3151, 0.2163, 0.2457, 0.2229]) -Greedy action tensor([ 1.0516, -0.3223, -0.0039, -0.2609]) tensor([0.5347, 0.1353, 0.1861, 0.1439]) -Greedy action tensor([ 0.3048, -0.2485, -0.1886, 0.0276]) tensor([0.3397, 0.1954, 0.2074, 0.2575]) -Greedy action tensor([ 0.4319, -0.3029, 0.0038, -0.4285]) tensor([0.3915, 0.1878, 0.2551, 0.1656]) -Greedy action tensor([ 0.9118, -0.5032, -0.1682, -0.8495]) tensor([0.5700, 0.1385, 0.1936, 0.0979]) -Greedy action tensor([ 0.6154, -0.3204, 0.0014, -0.0971]) tensor([0.4126, 0.1618, 0.2233, 0.2023]) -Greedy action tensor([ 0.5160, -0.4354, -0.2326, -0.0167]) tensor([0.4088, 0.1579, 0.1934, 0.2400]) -Greedy action tensor([ 0.6839, -0.2066, 0.0642, -0.1258]) tensor([0.4178, 0.1715, 0.2248, 0.1859]) -Greedy action tensor([ 0.6943, -0.5571, -0.1079, -0.2993]) tensor([0.4751, 0.1359, 0.2130, 0.1759]) -Greedy action tensor([ 0.5620, 0.2049, -0.1500, 0.1684]) tensor([0.3491, 0.2442, 0.1713, 0.2355]) -Greedy action tensor([ 0.8630, -0.8811, 0.0287, -0.2236]) tensor([0.5138, 0.0898, 0.2231, 0.1733]) -Greedy action tensor([ 0.8798, -0.6332, 0.0020, -0.6524]) tensor([0.5399, 0.1189, 0.2245, 0.1167]) -Greedy action tensor([ 0.8982, -0.4534, 0.0709, -0.1487]) tensor([0.4885, 0.1264, 0.2136, 0.1715]) -Greedy action tensor([ 1.0193, -0.8099, -0.0528, -0.5929]) tensor([0.5875, 0.0943, 0.2011, 0.1172]) -Greedy action tensor([ 0.7522, -0.8189, -0.0466, -0.4701]) tensor([0.5122, 0.1064, 0.2304, 0.1509]) -Greedy action tensor([ 0.5068, -0.1797, -0.0884, -0.2130]) tensor([0.3934, 0.1980, 0.2170, 0.1915]) -Greedy action tensor([ 0.9447, -0.5116, -0.0352, -0.4947]) tensor([0.5419, 0.1263, 0.2034, 0.1285]) -Greedy action tensor([ 0.2806, 0.2790, -0.0714, -0.0643]) tensor([0.2932, 0.2928, 0.2062, 0.2077]) -Greedy action tensor([ 0.4293, -0.2496, -0.2626, -0.7868]) tensor([0.4340, 0.2201, 0.2173, 0.1286]) -Greedy action tensor([ 0.8277, -0.1013, 0.0695, -0.1441]) tensor([0.4461, 0.1762, 0.2090, 0.1688]) -Greedy action tensor([ 0.6914, -0.4359, -0.0654, -0.3060]) tensor([0.4626, 0.1498, 0.2170, 0.1706]) -Greedy action tensor([ 1.1822, -0.8785, 0.0813, -0.6291]) tensor([0.6160, 0.0785, 0.2049, 0.1007]) -Greedy action tensor([ 1.4770, -0.8834, 0.0168, -0.4073]) tensor([0.6764, 0.0638, 0.1570, 0.1028]) -Greedy action tensor([ 0.1892, 0.1205, -0.0312, 0.0980]) tensor([0.2741, 0.2559, 0.2199, 0.2502]) -Greedy action tensor([ 0.5644, -0.3726, -0.0413, -0.1554]) tensor([0.4125, 0.1616, 0.2251, 0.2008]) -Greedy action tensor([ 0.6708, -0.3873, -0.1675, -0.4659]) tensor([0.4761, 0.1653, 0.2059, 0.1528]) -Greedy action tensor([ 0.5625, 0.3602, -0.1814, 0.2274]) tensor([0.3325, 0.2716, 0.1580, 0.2378]) -Greedy action tensor([ 0.4281, -0.1446, -0.0894, -0.1996]) tensor([0.3712, 0.2094, 0.2213, 0.1982]) -Greedy action tensor([ 0.9362, -0.7434, 0.1106, -0.8862]) tensor([0.5599, 0.1044, 0.2452, 0.0905]) -Greedy action tensor([ 0.6198, -0.4727, 0.1020, -0.0293]) tensor([0.4076, 0.1367, 0.2428, 0.2129]) -Greedy action tensor([ 1.0593, -0.4990, -0.2392, -0.4761]) tensor([0.5887, 0.1239, 0.1607, 0.1268]) -Greedy action tensor([ 0.7567, -0.4921, -0.0944, -0.2874]) tensor([0.4841, 0.1389, 0.2067, 0.1704]) -Greedy action tensor([ 0.9698, -0.5527, -0.1958, -0.3565]) tensor([0.5570, 0.1215, 0.1736, 0.1479]) -Greedy action tensor([ 0.5411, -0.6870, -0.2173, -0.2705]) tensor([0.4534, 0.1328, 0.2124, 0.2014]) -Greedy action tensor([ 0.8040, -0.0517, -0.0033, 0.0190]) tensor([0.4297, 0.1826, 0.1917, 0.1960]) -Greedy action tensor([ 0.8645, -0.3829, 0.0056, -0.1920]) tensor([0.4858, 0.1395, 0.2058, 0.1689]) -Greedy action tensor([ 0.7070, -0.7850, 0.0436, -0.9737]) tensor([0.5191, 0.1168, 0.2674, 0.0967]) -Greedy action tensor([ 0.8791, -0.2016, 0.0180, -0.2805]) tensor([0.4818, 0.1635, 0.2036, 0.1511]) -Greedy action tensor([ 0.6077, -0.2448, 0.1820, -0.0700]) tensor([0.3865, 0.1648, 0.2525, 0.1963]) -Greedy action tensor([ 0.8121, -0.7113, -0.0532, -0.1537]) tensor([0.4951, 0.1079, 0.2084, 0.1885]) -Greedy action tensor([ 0.7348, -0.4092, -0.0009, -0.3402]) tensor([0.4675, 0.1489, 0.2240, 0.1596]) -Greedy action tensor([ 0.7047, -0.3214, 0.0421, -0.2452]) tensor([0.4423, 0.1585, 0.2280, 0.1711]) -Greedy action tensor([ 0.8287, -0.3783, 0.1364, -0.2686]) tensor([0.4688, 0.1402, 0.2346, 0.1565]) -Greedy action tensor([ 0.8460, -0.5677, -0.0732, -0.4485]) tensor([0.5219, 0.1269, 0.2081, 0.1430]) -Greedy action tensor([ 0.9780, -0.3927, -0.2036, -0.3446]) tensor([0.5473, 0.1390, 0.1679, 0.1458]) -Greedy action tensor([ 0.9206, -0.5217, 0.0703, -0.4919]) tensor([0.5243, 0.1239, 0.2240, 0.1277]) -Greedy action tensor([ 0.7033, -0.4215, -0.0536, -0.3052]) tensor([0.4633, 0.1504, 0.2173, 0.1690]) -Greedy action tensor([ 0.8720, -0.6128, -0.0492, -0.3751]) tensor([0.5230, 0.1185, 0.2082, 0.1503]) -Greedy action tensor([ 0.8226, -0.6464, 0.0294, -0.4245]) tensor([0.5076, 0.1168, 0.2297, 0.1459]) -Greedy action tensor([ 0.9156, -0.8985, -0.0946, -0.8003]) tensor([0.5859, 0.0955, 0.2133, 0.1053]) -Greedy action tensor([ 0.2793, 0.0735, -0.1589, -0.0540]) tensor([0.3149, 0.2563, 0.2032, 0.2256]) -Greedy action tensor([ 0.5796, -0.1506, -0.1235, -0.1912]) tensor([0.4099, 0.1975, 0.2029, 0.1896]) -Greedy action tensor([ 0.4571, -0.3309, -0.1537, -0.3485]) tensor([0.4091, 0.1860, 0.2221, 0.1828]) -Greedy action tensor([ 0.6029, -0.3109, -0.0710, -0.5301]) tensor([0.4479, 0.1796, 0.2283, 0.1443]) -Greedy action tensor([ 1.2298, -0.7341, 0.0281, -0.4582]) tensor([0.6150, 0.0863, 0.1849, 0.1137]) -Greedy action tensor([ 0.7451, -0.5670, 0.0654, -0.3375]) tensor([0.4729, 0.1273, 0.2396, 0.1602]) -Greedy action tensor([ 0.9654, -0.5393, 0.0035, -0.3130]) tensor([0.5311, 0.1180, 0.2030, 0.1479]) -Greedy action tensor([ 0.6804, -0.3572, 0.0350, -0.1448]) tensor([0.4316, 0.1529, 0.2264, 0.1891]) -Greedy action tensor([ 0.5299, -0.2611, -0.1229, -0.1622]) tensor([0.4041, 0.1832, 0.2104, 0.2023]) -Greedy action tensor([ 0.7841, -0.4221, -0.0963, -0.2918]) tensor([0.4866, 0.1457, 0.2018, 0.1659]) -Greedy action tensor([ 0.8456, -0.2150, -0.4256, -0.4844]) tensor([0.5288, 0.1831, 0.1483, 0.1398]) -Greedy action tensor([ 0.6410, -0.4205, -0.2017, 0.0086]) tensor([0.4333, 0.1499, 0.1866, 0.2302]) -Greedy action tensor([ 0.3634, 0.1887, -0.1995, -0.0983]) tensor([0.3290, 0.2763, 0.1874, 0.2073]) -Greedy action tensor([ 0.6954, -0.6047, -0.1095, -0.2838]) tensor([0.4773, 0.1301, 0.2134, 0.1793]) -Greedy action tensor([ 1.2718, -0.5272, 0.0024, -0.3971]) tensor([0.6116, 0.1012, 0.1719, 0.1153]) -Greedy action tensor([-1.9267, -0.4314, 0.6573, -0.1692]) tensor([0.0408, 0.1820, 0.5406, 0.2366]) -Greedy action tensor([-1.9227, -0.4100, 0.6587, -0.1652]) tensor([0.0407, 0.1849, 0.5382, 0.2362]) -Greedy action tensor([-1.8426, -0.3553, 0.6344, -0.1083]) tensor([0.0435, 0.1924, 0.5177, 0.2463]) -Greedy action tensor([-1.8554, -0.4743, 0.6229, -0.1525]) tensor([0.0447, 0.1777, 0.5324, 0.2452]) -Greedy action tensor([-1.5087, -0.4765, 0.7725, -0.6092]) tensor([0.0623, 0.1749, 0.6097, 0.1531]) -Greedy action tensor([-1.8784, -0.4407, 0.6337, -0.1511]) tensor([0.0432, 0.1818, 0.5323, 0.2428]) -Greedy action tensor([-0.3380, 0.9919, 0.0112, 0.1024]) tensor([0.1290, 0.4877, 0.1829, 0.2004]) -Greedy action tensor([-1.5406, -0.0129, 0.4245, -0.0680]) tensor([0.0585, 0.2694, 0.4172, 0.2549]) -Greedy action tensor([-0.8481, -0.6317, 0.2908, -0.1130]) tensor([0.1342, 0.1666, 0.4192, 0.2799]) -Greedy action tensor([-1.9301, -0.3644, 0.6447, -0.1723]) tensor([0.0405, 0.1937, 0.5312, 0.2347]) -Greedy action tensor([-0.5556, 0.7246, 0.0273, 0.0199]) tensor([0.1225, 0.4405, 0.2193, 0.2177]) -Greedy action tensor([-1.9120, -0.3927, 0.6454, -0.1678]) tensor([0.0413, 0.1889, 0.5333, 0.2365]) -Greedy action tensor([-1.9111, -0.3607, 0.6397, -0.1495]) tensor([0.0411, 0.1936, 0.5263, 0.2391]) -Greedy action tensor([-1.8807, -0.4264, 0.6387, -0.1261]) tensor([0.0426, 0.1823, 0.5289, 0.2462]) -Greedy action tensor([-1.8375, -0.4450, 0.6188, -0.1237]) tensor([0.0450, 0.1810, 0.5244, 0.2496]) -Greedy action tensor([-1.7922, -0.3808, 0.6303, -0.1019]) tensor([0.0459, 0.1882, 0.5172, 0.2487]) -Greedy action tensor([-1.7790, -0.3537, 0.5937, -0.0160]) tensor([0.0461, 0.1915, 0.4940, 0.2685]) -Greedy action tensor([-1.8119, -0.4976, 0.5965, -0.1178]) tensor([0.0470, 0.1749, 0.5224, 0.2557]) -Greedy action tensor([-1.7029, -0.5127, 0.5390, -0.0684]) tensor([0.0531, 0.1746, 0.4999, 0.2723]) -Greedy action tensor([-0.7957, 1.0055, 0.1382, 0.1125]) tensor([0.0828, 0.5013, 0.2106, 0.2053]) -Greedy action tensor([-1.9455, -0.4487, 0.6673, -0.1814]) tensor([0.0401, 0.1791, 0.5468, 0.2340]) -Greedy action tensor([-1.8957, -0.2721, 0.6229, -0.1576]) tensor([0.0414, 0.2098, 0.5135, 0.2353]) -Greedy action tensor([-0.8554, -0.1829, 0.2535, -0.0848]) tensor([0.1227, 0.2404, 0.3718, 0.2651]) -Greedy action tensor([-1.6523, 0.1149, 0.4718, 0.0817]) tensor([0.0479, 0.2803, 0.4006, 0.2712]) -Greedy action tensor([-1.8935, -0.4405, 0.6387, -0.1460]) tensor([0.0424, 0.1812, 0.5332, 0.2433]) -Greedy action tensor([-1.6330, -0.3195, 0.5425, 0.0580]) tensor([0.0528, 0.1963, 0.4647, 0.2863]) -Greedy action tensor([-1.8272, -0.4362, 0.6016, -0.1188]) tensor([0.0457, 0.1836, 0.5184, 0.2522]) -Greedy action tensor([-1.9164, -0.4435, 0.6528, -0.1616]) tensor([0.0413, 0.1802, 0.5395, 0.2389]) -Greedy action tensor([-1.9355, -0.4453, 0.6655, -0.1751]) tensor([0.0404, 0.1795, 0.5450, 0.2351]) -Greedy action tensor([-1.3808, -0.1899, 0.6073, 0.0880]) tensor([0.0628, 0.2064, 0.4582, 0.2726]) -Greedy action tensor([-1.8278, -0.4388, 0.5942, -0.1027]) tensor([0.0457, 0.1832, 0.5147, 0.2564]) -Greedy action tensor([-0.7380, 1.0071, 0.0629, 0.3377]) tensor([0.0841, 0.4818, 0.1874, 0.2467]) -Greedy action tensor([-1.1147, -0.6451, 0.2113, 0.3641]) tensor([0.0930, 0.1487, 0.3502, 0.4080]) -Greedy action tensor([-1.8853, -0.4134, 0.6527, -0.1493]) tensor([0.0422, 0.1840, 0.5342, 0.2396]) -Greedy action tensor([-1.6524, 0.3791, 0.4383, -0.0434]) tensor([0.0461, 0.3512, 0.3726, 0.2302]) -Greedy action tensor([-1.7148, -0.1434, 0.5960, -0.0033]) tensor([0.0467, 0.2246, 0.4704, 0.2584]) -Greedy action tensor([-1.9254, -0.4251, 0.6612, -0.1673]) tensor([0.0407, 0.1825, 0.5407, 0.2361]) -Greedy action tensor([-1.7094, -0.3525, 0.5209, -0.0302]) tensor([0.0512, 0.1987, 0.4759, 0.2743]) -Greedy action tensor([-1.1613, -0.5164, 0.4798, 0.5112]) tensor([0.0747, 0.1423, 0.3854, 0.3977]) -Greedy action tensor([-1.8339, -0.4203, 0.6098, -0.1272]) tensor([0.0452, 0.1857, 0.5202, 0.2489]) -Greedy action tensor([-1.5212, -0.4407, 0.5630, 0.2550]) tensor([0.0559, 0.1647, 0.4493, 0.3302]) -Greedy action tensor([-1.5744, -0.5394, 0.8237, 0.5275]) tensor([0.0435, 0.1224, 0.4784, 0.3558]) -Greedy action tensor([-1.8863, -0.4436, 0.6394, -0.1490]) tensor([0.0427, 0.1807, 0.5339, 0.2427]) -Greedy action tensor([-1.9379, -0.4382, 0.6625, -0.1772]) tensor([0.0404, 0.1809, 0.5438, 0.2349]) -Greedy action tensor([-1.6112, -0.2484, 0.5974, 0.0424]) tensor([0.0520, 0.2031, 0.4732, 0.2717]) -Greedy action tensor([-1.9165, -0.4447, 0.6702, -0.1578]) tensor([0.0409, 0.1782, 0.5434, 0.2374]) -Greedy action tensor([-0.9131, -0.0775, 0.5077, 1.1543]) tensor([0.0651, 0.1502, 0.2697, 0.5149]) -Greedy action tensor([-1.9360, -0.3957, 0.6524, -0.1767]) tensor([0.0403, 0.1883, 0.5370, 0.2344]) -Greedy action tensor([-0.7846, -0.3770, 0.6088, 0.5870]) tensor([0.0955, 0.1435, 0.3847, 0.3763]) -Greedy action tensor([-1.5277, -0.3505, 0.4434, 0.0308]) tensor([0.0618, 0.2006, 0.4438, 0.2938]) -Greedy action tensor([-1.4893, -0.4664, 0.4975, 0.1880]) tensor([0.0609, 0.1693, 0.4440, 0.3258]) -Greedy action tensor([-1.9064, -0.4366, 0.6480, -0.1534]) tensor([0.0417, 0.1813, 0.5364, 0.2407]) -Greedy action tensor([-1.8470, -0.4086, 0.6620, -0.0970]) tensor([0.0430, 0.1811, 0.5285, 0.2474]) -Greedy action tensor([-1.8738, -0.4261, 0.6295, -0.1356]) tensor([0.0432, 0.1836, 0.5277, 0.2455]) -Greedy action tensor([-1.8259, -0.4894, 0.6036, -0.1240]) tensor([0.0462, 0.1758, 0.5245, 0.2534]) -Greedy action tensor([-1.9046, -0.4432, 0.6466, -0.1608]) tensor([0.0419, 0.1808, 0.5375, 0.2398]) -Greedy action tensor([-1.4828, -0.5933, 0.4528, -0.0399]) tensor([0.0685, 0.1668, 0.4747, 0.2900]) -Greedy action tensor([-1.9005, -0.4565, 0.6504, -0.1416]) tensor([0.0419, 0.1776, 0.5372, 0.2433]) -Greedy action tensor([-1.8840, -0.3835, 0.6483, -0.1331]) tensor([0.0420, 0.1882, 0.5281, 0.2417]) -Greedy action tensor([-1.9050, -0.3938, 0.6417, -0.1566]) tensor([0.0416, 0.1885, 0.5309, 0.2390]) -Greedy action tensor([-1.1143, -0.1842, 0.2293, 0.0828]) tensor([0.0936, 0.2374, 0.3590, 0.3100]) -Greedy action tensor([-1.8610, -0.3082, 0.6092, -0.1375]) tensor([0.0432, 0.2041, 0.5107, 0.2420]) -Greedy action tensor([-1.8540, -0.4173, 0.6311, -0.1270]) tensor([0.0438, 0.1842, 0.5257, 0.2463]) -Greedy action tensor([-0.9743, 0.3559, 0.1642, 0.0275]) tensor([0.0941, 0.3559, 0.2938, 0.2563]) -Greedy action tensor([-1.2553, -0.2396, 0.5057, 0.1820]) tensor([0.0725, 0.2003, 0.4219, 0.3053]) -Greedy action tensor([-1.9445, -0.4475, 0.6669, -0.1797]) tensor([0.0401, 0.1792, 0.5463, 0.2343]) -Greedy action tensor([-0.9331, -0.2871, 0.3477, 0.6206]) tensor([0.0890, 0.1698, 0.3204, 0.4208]) -Greedy action tensor([-1.9171, -0.3608, 0.6433, -0.1663]) tensor([0.0409, 0.1940, 0.5295, 0.2356]) -Greedy action tensor([-1.8343, -0.3558, 0.5994, -0.1071]) tensor([0.0446, 0.1957, 0.5087, 0.2510]) -Greedy action tensor([-1.7939, -0.5219, 0.6012, -0.1151]) tensor([0.0479, 0.1707, 0.5249, 0.2565]) -Greedy action tensor([-1.9470, -0.4513, 0.6663, -0.1821]) tensor([0.0401, 0.1789, 0.5469, 0.2341]) -Greedy action tensor([-1.7959, -0.2052, 0.5392, -0.0909]) tensor([0.0460, 0.2257, 0.4752, 0.2531]) -Greedy action tensor([-1.5630, -0.0691, 0.5014, 0.0760]) tensor([0.0541, 0.2410, 0.4263, 0.2786]) -Greedy action tensor([-1.6685, -0.3788, 0.6109, 0.0555]) tensor([0.0500, 0.1815, 0.4883, 0.2802]) -Greedy action tensor([-1.5777, 0.2378, 0.4236, 0.1267]) tensor([0.0499, 0.3066, 0.3692, 0.2743]) -Greedy action tensor([-1.5155, 0.0282, 0.5495, -0.6128]) tensor([0.0624, 0.2920, 0.4918, 0.1538]) -Greedy action tensor([-1.7339, 0.0568, 0.4829, -0.0593]) tensor([0.0465, 0.2787, 0.4267, 0.2481]) -Greedy action tensor([-1.8831, -0.3823, 0.6251, -0.1349]) tensor([0.0425, 0.1908, 0.5224, 0.2443]) -Greedy action tensor([-1.8840, -0.3247, 0.6313, -0.1295]) tensor([0.0418, 0.1989, 0.5174, 0.2418]) -Greedy action tensor([-1.5762, -0.3762, 0.4497, 0.1040]) tensor([0.0579, 0.1922, 0.4391, 0.3108]) -Greedy action tensor([-1.8911, -0.3885, 0.6409, -0.1438]) tensor([0.0420, 0.1887, 0.5283, 0.2410]) -Greedy action tensor([ 1.7415, -0.7393, -0.4921, 0.1449]) tensor([0.7177, 0.0601, 0.0769, 0.1454]) -Greedy action tensor([ 1.1219, -0.5887, -0.2794, 0.3844]) tensor([0.5248, 0.0949, 0.1293, 0.2510]) -Greedy action tensor([ 1.2703, -0.1371, -0.4575, 0.0178]) tensor([0.5854, 0.1433, 0.1040, 0.1673]) -Greedy action tensor([ 1.6543, -0.4543, -0.6617, 0.3329]) tensor([0.6726, 0.0817, 0.0664, 0.1794]) -Greedy action tensor([ 1.9358, -0.5711, -0.0842, 0.7783]) tensor([0.6543, 0.0533, 0.0868, 0.2056]) -Greedy action tensor([ 1.1522, -0.2127, -0.1628, 0.0889]) tensor([0.5350, 0.1366, 0.1436, 0.1847]) -Greedy action tensor([ 1.5917, -0.1676, -0.3748, 0.0932]) tensor([0.6512, 0.1121, 0.0911, 0.1455]) -Greedy action tensor([ 1.7370, -0.7232, -0.0901, 0.2773]) tensor([0.6763, 0.0578, 0.1088, 0.1571]) -Greedy action tensor([ 0.7520, -0.5339, 0.0725, -0.0778]) tensor([0.4506, 0.1245, 0.2284, 0.1965]) -Greedy action tensor([ 0.8156, -0.2454, -0.2513, 0.4434]) tensor([0.4203, 0.1455, 0.1446, 0.2897]) -Greedy action tensor([ 1.0570e+00, -5.0110e-04, -5.0087e-01, 5.3459e-01]) tensor([0.4649, 0.1615, 0.0979, 0.2757]) -Greedy action tensor([ 1.4938, 0.0022, -0.6121, 0.4368]) tensor([0.5902, 0.1328, 0.0719, 0.2051]) -Greedy action tensor([ 2.4835, -1.5064, -0.1988, 0.6784]) tensor([0.7991, 0.0148, 0.0547, 0.1314]) -Greedy action tensor([ 1.4582, -0.2919, -0.5726, 0.3188]) tensor([0.6154, 0.1069, 0.0808, 0.1969]) -Greedy action tensor([ 1.4812, -0.6474, -0.3372, 0.1056]) tensor([0.6519, 0.0776, 0.1058, 0.1647]) -Greedy action tensor([ 1.3539, -0.7313, 0.3777, 0.1569]) tensor([0.5546, 0.0689, 0.2089, 0.1675]) -Greedy action tensor([ 1.3439, -0.4323, -0.8593, 0.1781]) tensor([0.6284, 0.1064, 0.0694, 0.1959]) -Greedy action tensor([ 1.1753, 0.1076, -0.5174, -0.0156]) tensor([0.5459, 0.1877, 0.1005, 0.1659]) -Greedy action tensor([ 1.7349, -0.5671, -0.4763, 0.1862]) tensor([0.7032, 0.0704, 0.0770, 0.1494]) -Greedy action tensor([ 1.2928, -0.1527, -0.4113, 0.3175]) tensor([0.5572, 0.1313, 0.1014, 0.2101]) -Greedy action tensor([ 1.1884, -0.4551, -0.1389, 0.2648]) tensor([0.5389, 0.1042, 0.1429, 0.2140]) -Greedy action tensor([ 1.6825, -0.5643, -0.3691, 0.2133]) tensor([0.6829, 0.0722, 0.0878, 0.1571]) -Greedy action tensor([ 1.6120, -1.0337, -0.3616, 0.3135]) tensor([0.6744, 0.0478, 0.0937, 0.1841]) -Greedy action tensor([ 1.9222, -0.4283, -0.2563, 0.6303]) tensor([0.6742, 0.0643, 0.0763, 0.1852]) -Greedy action tensor([ 2.2877, 0.0299, -0.0702, 0.0965]) tensor([0.7628, 0.0798, 0.0722, 0.0853]) -Greedy action tensor([ 1.6147, -0.8570, -0.1567, 0.5588]) tensor([0.6240, 0.0527, 0.1062, 0.2171]) -Greedy action tensor([ 1.3581, -0.1885, -0.2453, 0.1636]) tensor([0.5824, 0.1240, 0.1172, 0.1764]) -Greedy action tensor([ 0.9506, -0.6666, -0.0535, 0.1171]) tensor([0.5002, 0.0993, 0.1832, 0.2173]) -Greedy action tensor([ 1.2649, -0.5977, -0.5385, 1.2553]) tensor([0.4328, 0.0672, 0.0713, 0.4287]) -Greedy action tensor([2.2377, 0.5720, 0.1958, 0.2507]) tensor([0.6868, 0.1299, 0.0891, 0.0942]) -Greedy action tensor([ 1.9115, -1.2998, -0.1823, 0.3252]) tensor([0.7309, 0.0295, 0.0901, 0.1496]) -Greedy action tensor([ 2.0492, -1.1161, -0.0466, 1.2629]) tensor([0.6170, 0.0260, 0.0759, 0.2811]) -Greedy action tensor([ 1.6204, -0.6963, -0.5220, 0.7374]) tensor([0.6137, 0.0605, 0.0720, 0.2538]) -Greedy action tensor([ 1.3333, -0.2865, -0.0751, 0.1473]) tensor([0.5721, 0.1132, 0.1399, 0.1747]) -Greedy action tensor([ 1.6056, -0.2407, -0.0907, 0.2514]) tensor([0.6253, 0.0987, 0.1146, 0.1614]) -Greedy action tensor([ 1.7072, -0.3359, -1.1463, 0.2108]) tensor([0.7086, 0.0919, 0.0408, 0.1587]) -Greedy action tensor([ 1.5906, 0.1255, -0.1976, 0.5835]) tensor([0.5670, 0.1310, 0.0948, 0.2071]) -Greedy action tensor([ 1.3728, -0.4828, -0.3937, 0.2505]) tensor([0.6050, 0.0946, 0.1034, 0.1970]) -Greedy action tensor([ 1.1587, -0.1578, -0.7802, 0.2371]) tensor([0.5525, 0.1481, 0.0795, 0.2198]) -Greedy action tensor([ 2.0990, -0.7553, -0.1028, 0.6279]) tensor([0.7154, 0.0412, 0.0791, 0.1643]) -Greedy action tensor([ 1.6127, -0.4649, -0.1634, 0.0694]) tensor([0.6630, 0.0830, 0.1122, 0.1417]) -Greedy action tensor([ 1.6969, 0.0991, -0.9060, 0.1701]) tensor([0.6695, 0.1355, 0.0496, 0.1454]) -Greedy action tensor([ 1.6625, -0.8205, -0.2175, 0.2194]) tensor([0.6792, 0.0567, 0.1036, 0.1604]) -Greedy action tensor([ 1.1203, -0.5486, -0.3707, 0.3119]) tensor([0.5379, 0.1014, 0.1211, 0.2397]) -Greedy action tensor([ 1.3114, 0.1263, -0.7334, 0.2015]) tensor([0.5667, 0.1732, 0.0733, 0.1868]) -Greedy action tensor([ 1.2879, 0.4843, 0.1848, -0.1786]) tensor([0.4975, 0.2227, 0.1651, 0.1148]) -Greedy action tensor([ 1.2739, -0.2523, -0.3224, 0.3817]) tensor([0.5465, 0.1188, 0.1108, 0.2239]) -Greedy action tensor([ 1.3341, -0.6890, -0.4130, 0.3561]) tensor([0.5943, 0.0786, 0.1036, 0.2235]) -Greedy action tensor([ 1.1246, -0.0762, -0.2293, 0.0316]) tensor([0.5279, 0.1589, 0.1363, 0.1769]) -Greedy action tensor([ 1.4510, -0.2009, -0.4018, 0.2642]) tensor([0.6047, 0.1159, 0.0948, 0.1846]) -Greedy action tensor([ 1.7178, -0.9317, 0.2206, 0.6599]) tensor([0.6091, 0.0431, 0.1363, 0.2115]) -Greedy action tensor([ 2.0665, -0.8348, -0.5734, 0.4639]) tensor([0.7532, 0.0414, 0.0538, 0.1517]) -Greedy action tensor([ 2.0254, -0.9515, -0.3410, 0.3843]) tensor([0.7471, 0.0381, 0.0701, 0.1448]) -Greedy action tensor([ 1.1765, -0.0966, -0.9302, 0.3920]) tensor([0.5382, 0.1507, 0.0655, 0.2456]) -Greedy action tensor([ 1.2046, 0.0504, -0.6088, 0.4810]) tensor([0.5093, 0.1606, 0.0831, 0.2470]) -Greedy action tensor([ 1.2294, 0.2697, -0.8022, 0.3394]) tensor([0.5196, 0.1990, 0.0681, 0.2133]) -Greedy action tensor([ 1.3455, -0.2220, -0.4543, 0.4211]) tensor([0.5648, 0.1178, 0.0934, 0.2241]) -Greedy action tensor([ 1.7890, -0.4010, -0.2733, 0.7137]) tensor([0.6328, 0.0708, 0.0805, 0.2159]) -Greedy action tensor([ 1.6768, -0.4230, -0.0806, 0.2328]) tensor([0.6532, 0.0800, 0.1127, 0.1541]) -Greedy action tensor([ 0.9331, -0.2577, -0.1945, 0.0868]) tensor([0.4862, 0.1478, 0.1574, 0.2086]) -Greedy action tensor([ 1.6065, -0.4792, -0.3408, 0.1534]) tensor([0.6663, 0.0828, 0.0951, 0.1558]) -Greedy action tensor([ 0.8483, -0.2025, 0.0233, -0.0853]) tensor([0.4585, 0.1603, 0.2009, 0.1802]) -Greedy action tensor([ 1.8041, -0.7440, -0.1535, 0.5866]) tensor([0.6599, 0.0516, 0.0932, 0.1953]) -Greedy action tensor([ 2.5414, -1.1832, -0.3235, 1.1045]) tensor([0.7583, 0.0183, 0.0432, 0.1802]) -Greedy action tensor([ 1.7222, -0.6071, -0.3296, 0.4421]) tensor([0.6649, 0.0647, 0.0854, 0.1849]) -Greedy action tensor([ 1.7584, 0.1849, -0.1342, -0.2788]) tensor([0.6719, 0.1393, 0.1012, 0.0876]) -Greedy action tensor([ 1.7109, -0.7883, -0.1250, 0.3780]) tensor([0.6643, 0.0546, 0.1059, 0.1752]) -Greedy action tensor([ 1.1577, -0.4018, -0.3520, 1.0298]) tensor([0.4327, 0.0910, 0.0956, 0.3807]) -Greedy action tensor([ 1.0149, -0.3861, -0.1407, 0.0892]) tensor([0.5109, 0.1259, 0.1609, 0.2024]) -Greedy action tensor([ 1.2669, -0.5415, -0.1894, 0.3931]) tensor([0.5512, 0.0903, 0.1285, 0.2300]) -Greedy action tensor([ 1.6576, -0.4999, -0.3576, 0.2432]) tensor([0.6703, 0.0775, 0.0893, 0.1629]) -Greedy action tensor([ 2.5813, -0.4976, -0.0355, 0.6393]) tensor([0.7921, 0.0364, 0.0579, 0.1136]) -Greedy action tensor([ 1.9999, -0.5920, -0.3240, 0.5729]) tensor([0.7078, 0.0530, 0.0693, 0.1699]) -Greedy action tensor([ 1.5358e+00, -1.4132e-03, -5.6201e-01, 1.7324e-01]) tensor([0.6275, 0.1349, 0.0770, 0.1606]) -Greedy action tensor([ 1.8020, -0.6344, 0.1497, 0.4223]) tensor([0.6533, 0.0571, 0.1252, 0.1644]) -Greedy action tensor([ 1.4589, -0.1967, -0.6929, -0.2273]) tensor([0.6700, 0.1280, 0.0779, 0.1241]) -Greedy action tensor([ 1.3178, -0.0102, 0.4908, -0.1149]) tensor([0.5152, 0.1365, 0.2253, 0.1229]) -Greedy action tensor([ 0.8357, -0.3684, -0.0737, 0.3696]) tensor([0.4291, 0.1287, 0.1729, 0.2693]) -Greedy action tensor([ 2.1367, -0.7849, -0.6647, 0.6081]) tensor([0.7511, 0.0404, 0.0456, 0.1629]) -Greedy action tensor([ 1.7334, -1.1087, -0.1909, 0.1373]) tensor([0.7108, 0.0414, 0.1037, 0.1441]) -Greedy action tensor([ 2.0716, -0.5376, -0.3827, 0.1113]) tensor([0.7690, 0.0566, 0.0661, 0.1083]) -Greedy action tensor([-0.6094, 0.3372, 0.2739, -0.1178]) tensor([0.1310, 0.3377, 0.3170, 0.2143]) -Greedy action tensor([ 1.4423, -0.3076, 0.5578, -0.2150]) tensor([0.5626, 0.0978, 0.2323, 0.1073]) -Greedy action tensor([-1.1779, -1.0874, 0.0481, -1.1437]) tensor([0.1530, 0.1675, 0.5213, 0.1583]) -Greedy action tensor([ 0.2389, -1.5908, 1.0330, 0.2153]) tensor([0.2299, 0.0369, 0.5087, 0.2245]) -Greedy action tensor([-0.0539, -1.7086, -0.4600, 0.7515]) tensor([0.2442, 0.0467, 0.1627, 0.5464]) -Greedy action tensor([ 1.5589, 0.4692, -0.3501, -0.3817]) tensor([0.6142, 0.2066, 0.0910, 0.0882]) -Greedy action tensor([ 0.3514, -0.4724, -1.0942, 0.0187]) tensor([0.4182, 0.1835, 0.0985, 0.2998]) -Greedy action tensor([ 0.3035, -0.6218, 1.4425, 0.8140]) tensor([0.1617, 0.0641, 0.5049, 0.2693]) -Greedy action tensor([ 0.9554, -0.3836, 0.2253, -0.1888]) tensor([0.4849, 0.1271, 0.2336, 0.1544]) -Greedy action tensor([ 0.1416, -0.1041, 0.1412, -0.5382]) tensor([0.3041, 0.2378, 0.3040, 0.1541]) -Greedy action tensor([ 0.3533, -0.3047, 0.5127, -0.2073]) tensor([0.3066, 0.1588, 0.3596, 0.1750]) -Greedy action tensor([-0.8683, 0.3574, 0.0485, -0.3509]) tensor([0.1165, 0.3968, 0.2913, 0.1954]) -Greedy action tensor([ 1.7308, -1.0570, 1.2317, -0.0516]) tensor([0.5444, 0.0335, 0.3305, 0.0916]) -Greedy action tensor([ 0.7862, -0.0743, 0.2700, 0.6883]) tensor([0.3417, 0.1445, 0.2039, 0.3098]) -Greedy action tensor([ 0.3324, -0.4898, 1.1152, -0.5698]) tensor([0.2480, 0.1090, 0.5425, 0.1006]) -Greedy action tensor([-0.1562, 0.2776, -0.0921, -0.0026]) tensor([0.2094, 0.3231, 0.2233, 0.2442]) -Greedy action tensor([-0.9476, -1.0025, 0.2679, -0.8339]) tensor([0.1553, 0.1470, 0.5237, 0.1740]) -Greedy action tensor([ 0.3047, 0.2981, -0.0616, -0.1501]) tensor([0.3011, 0.2991, 0.2087, 0.1911]) -Greedy action tensor([ 0.2911, 0.1202, 0.1500, -0.6023]) tensor([0.3205, 0.2701, 0.2783, 0.1311]) -Greedy action tensor([ 0.6755, -1.1808, 1.2263, -0.4714]) tensor([0.3117, 0.0487, 0.5406, 0.0990]) -Greedy action tensor([ 0.2820, 0.0905, -0.0730, 0.3737]) tensor([0.2760, 0.2279, 0.1935, 0.3025]) -Greedy action tensor([-0.2845, -0.3954, 0.4487, -1.2138]) tensor([0.2287, 0.2047, 0.4762, 0.0903]) -Greedy action tensor([ 0.1544, -0.6374, -0.0302, 0.6748]) tensor([0.2521, 0.1142, 0.2096, 0.4242]) -Greedy action tensor([ 1.0399, -1.2804, 0.0793, 0.6154]) tensor([0.4684, 0.0460, 0.1792, 0.3064]) -Greedy action tensor([0.4743, 0.0711, 0.1642, 0.1692]) tensor([0.3186, 0.2129, 0.2337, 0.2348]) -Greedy action tensor([ 1.0397, -1.0001, 0.2939, 0.3466]) tensor([0.4752, 0.0618, 0.2254, 0.2376]) -Greedy action tensor([1.3241, 0.0768, 1.3226, 0.6436]) tensor([0.3581, 0.1029, 0.3576, 0.1814]) -Greedy action tensor([0.2731, 1.0105, 0.4314, 0.4848]) tensor([0.1819, 0.3802, 0.2131, 0.2248]) -Greedy action tensor([-0.8146, -0.8607, 1.0180, -2.0081]) tensor([0.1175, 0.1122, 0.7346, 0.0356]) -Greedy action tensor([ 0.3395, 0.2120, 0.5024, -0.0828]) tensor([0.2694, 0.2371, 0.3170, 0.1766]) -Greedy action tensor([-0.1617, -2.0535, -0.2191, 0.4201]) tensor([0.2575, 0.0388, 0.2431, 0.4606]) -Greedy action tensor([-1.2369, -0.7036, 1.0182, -1.3772]) tensor([0.0763, 0.1300, 0.7274, 0.0663]) -Greedy action tensor([-0.4100, -0.6030, -0.6900, -0.1612]) tensor([0.2589, 0.2134, 0.1957, 0.3320]) -Greedy action tensor([ 0.5858, -0.8173, -0.1218, 0.5994]) tensor([0.3633, 0.0893, 0.1791, 0.3683]) -Greedy action tensor([-0.2699, -0.6003, 0.9762, -0.9601]) tensor([0.1755, 0.1261, 0.6103, 0.0880]) -Greedy action tensor([ 1.1009, -1.6378, 0.5452, -0.2371]) tensor([0.5261, 0.0340, 0.3018, 0.1380]) -Greedy action tensor([-0.8458, -0.1999, 1.2258, -0.8372]) tensor([0.0844, 0.1609, 0.6696, 0.0851]) -Greedy action tensor([ 0.8350, -1.2788, 1.3736, 0.9588]) tensor([0.2521, 0.0305, 0.4320, 0.2854]) -Greedy action tensor([-0.7630, -0.6096, -0.4787, -0.1683]) tensor([0.1884, 0.2197, 0.2504, 0.3415]) -Greedy action tensor([ 1.7710, -1.2023, -0.0196, 0.4879]) tensor([0.6688, 0.0342, 0.1116, 0.1854]) -Greedy action tensor([-0.2803, 0.2945, 0.6507, -1.1865]) tensor([0.1749, 0.3107, 0.4437, 0.0707]) -Greedy action tensor([-0.3277, -0.5254, -0.5818, -0.6255]) tensor([0.2995, 0.2458, 0.2323, 0.2224]) -Greedy action tensor([-1.0354, -1.4079, 0.1813, -0.5128]) tensor([0.1481, 0.1021, 0.5000, 0.2498]) -Greedy action tensor([-0.2442, 0.5871, -0.4094, -0.6346]) tensor([0.2074, 0.4763, 0.1758, 0.1404]) -Greedy action tensor([ 0.6117, -0.8908, -0.6135, -0.1905]) tensor([0.5090, 0.1133, 0.1495, 0.2282]) -Greedy action tensor([ 0.3685, -0.6058, 0.0875, -0.4418]) tensor([0.3880, 0.1465, 0.2930, 0.1726]) -Greedy action tensor([ 0.1859, -0.4638, 0.8761, 0.1733]) tensor([0.2220, 0.1159, 0.4428, 0.2192]) -Greedy action tensor([-0.5083, 0.3536, 0.4741, -0.9545]) tensor([0.1497, 0.3545, 0.3999, 0.0958]) -Greedy action tensor([ 0.5231, -0.6449, -0.4459, 1.0002]) tensor([0.3029, 0.0942, 0.1149, 0.4880]) -Greedy action tensor([-0.3642, -0.9649, -0.4269, -0.1004]) tensor([0.2639, 0.1447, 0.2479, 0.3436]) -Greedy action tensor([-0.3311, 0.1090, -0.1132, -1.1786]) tensor([0.2367, 0.3676, 0.2943, 0.1014]) -Greedy action tensor([ 0.1982, -0.4466, -0.5317, -0.4570]) tensor([0.3959, 0.2077, 0.1908, 0.2056]) -Greedy action tensor([-0.6349, -0.0552, -0.5031, 0.7060]) tensor([0.1290, 0.2304, 0.1472, 0.4933]) -Greedy action tensor([ 0.0568, -1.4602, -0.2494, -0.0039]) tensor([0.3452, 0.0757, 0.2542, 0.3249]) -Greedy action tensor([ 0.7997, -1.4142, 0.1601, 0.4572]) tensor([0.4261, 0.0466, 0.2248, 0.3025]) -Greedy action tensor([ 0.1670, -1.1400, -0.6302, 0.8619]) tensor([0.2685, 0.0727, 0.1210, 0.5379]) -Greedy action tensor([ 0.1151, -0.7469, 0.1710, -0.5714]) tensor([0.3352, 0.1416, 0.3545, 0.1687]) -Greedy action tensor([ 0.2607, -1.2780, 0.4291, -0.1362]) tensor([0.3257, 0.0699, 0.3854, 0.2190]) -Greedy action tensor([ 0.2410, -1.3277, 0.1010, -0.2280]) tensor([0.3699, 0.0771, 0.3216, 0.2314]) -Greedy action tensor([ 0.9814, -0.1805, -1.0699, 1.2003]) tensor([0.3723, 0.1165, 0.0479, 0.4634]) -Greedy action tensor([-1.4326e-01, -5.5353e-04, 7.5470e-01, 2.4261e-01]) tensor([0.1645, 0.1897, 0.4038, 0.2420]) -Greedy action tensor([-0.3506, -0.5704, -0.0234, 0.1741]) tensor([0.2049, 0.1645, 0.2842, 0.3463]) -Greedy action tensor([ 0.6279, 0.0085, -0.1597, 0.9185]) tensor([0.3003, 0.1616, 0.1366, 0.4015]) -Greedy action tensor([-0.0139, -0.9657, -0.5258, -0.1653]) tensor([0.3515, 0.1357, 0.2107, 0.3021]) -Greedy action tensor([ 0.1203, -0.8696, 0.8553, 0.0224]) tensor([0.2292, 0.0852, 0.4779, 0.2078]) -Greedy action tensor([ 1.1764, 0.9214, -0.1100, -1.0004]) tensor([0.4620, 0.3580, 0.1276, 0.0524]) -Greedy action tensor([ 0.6323, -0.5289, -0.0568, -0.5714]) tensor([0.4728, 0.1480, 0.2373, 0.1419]) -Greedy action tensor([ 0.8887, -1.4165, 0.3702, 1.2731]) tensor([0.3161, 0.0315, 0.1882, 0.4642]) -Greedy action tensor([-0.0557, -0.0493, 0.9340, 0.0098]) tensor([0.1735, 0.1746, 0.4667, 0.1852]) -Greedy action tensor([0.2242, 0.5358, 0.5494, 0.4813]) tensor([0.1983, 0.2708, 0.2745, 0.2564]) -Greedy action tensor([ 1.4602, -0.4632, 1.7767, 1.1992]) tensor([0.3041, 0.0444, 0.4173, 0.2342]) -Greedy action tensor([ 0.3286, -0.0453, 0.1836, -0.3056]) tensor([0.3243, 0.2231, 0.2805, 0.1720]) -Greedy action tensor([-0.1704, -2.1264, -1.0630, 1.5725]) tensor([0.1376, 0.0195, 0.0564, 0.7865]) -Greedy action tensor([ 0.0374, -1.0096, 1.1745, -0.2234]) tensor([0.1909, 0.0670, 0.5951, 0.1471]) -Greedy action tensor([-0.1103, -1.4996, -0.9419, 0.5619]) tensor([0.2745, 0.0684, 0.1195, 0.5376]) -Greedy action tensor([ 5.2358e-04, -4.2632e-01, 6.5742e-01, -2.9791e-01]) tensor([0.2313, 0.1509, 0.4461, 0.1716]) -Greedy action tensor([ 0.5684, -0.0992, -1.1118, -1.1997]) tensor([0.5348, 0.2743, 0.0996, 0.0913]) -Greedy action tensor([-0.4697, -0.5457, 0.2581, -1.1614]) tensor([0.2223, 0.2060, 0.4603, 0.1113]) -Greedy action tensor([0.4392, 0.3522, 0.1963, 0.8870]) tensor([0.2344, 0.2149, 0.1839, 0.3668]) -Greedy action tensor([ 0.4580, -1.2840, 0.5238, -0.0324]) tensor([0.3502, 0.0613, 0.3740, 0.2145]) -Greedy action tensor([ 0.8007, -0.8618, 0.7327, 1.1077]) tensor([0.2871, 0.0545, 0.2682, 0.3902]) -Greedy action tensor([ 0.6598, -0.6712, 0.0213, -0.4400]) tensor([0.4705, 0.1243, 0.2485, 0.1567]) -Greedy action tensor([ 0.2958, -0.1634, -0.1686, 0.1005]) tensor([0.3244, 0.2049, 0.2039, 0.2668]) -Greedy action tensor([ 0.4788, -0.1875, 0.0076, 0.0088]) tensor([0.3620, 0.1859, 0.2259, 0.2262]) -Greedy action tensor([ 0.7723, -0.4538, -0.0922, -0.3590]) tensor([0.4908, 0.1440, 0.2068, 0.1584]) -Greedy action tensor([ 0.7267, -0.3770, -0.2311, -0.6330]) tensor([0.5071, 0.1682, 0.1946, 0.1302]) -Greedy action tensor([ 0.8913, -1.0583, 0.0680, -0.6782]) tensor([0.5588, 0.0795, 0.2453, 0.1163]) -Greedy action tensor([ 0.7068, -0.4860, 0.0154, -0.3994]) tensor([0.4684, 0.1421, 0.2346, 0.1549]) -Greedy action tensor([ 0.7125, -0.4452, -0.0438, -0.1922]) tensor([0.4570, 0.1436, 0.2145, 0.1849]) -Greedy action tensor([ 0.5273, -0.4549, -0.1244, -0.0778]) tensor([0.4095, 0.1534, 0.2134, 0.2236]) -Greedy action tensor([ 1.0704, -0.7197, -0.0047, -0.5932]) tensor([0.5891, 0.0983, 0.2010, 0.1116]) -Greedy action tensor([ 1.2316, -1.0784, -0.0198, -0.6282]) tensor([0.6489, 0.0644, 0.1856, 0.1010]) -Greedy action tensor([ 0.9075, -0.5625, -0.2021, -0.3974]) tensor([0.5462, 0.1256, 0.1801, 0.1481]) -Greedy action tensor([ 0.7745, -0.7095, 0.0519, -0.4039]) tensor([0.4951, 0.1122, 0.2403, 0.1524]) -Greedy action tensor([ 0.7434, -0.7099, 0.0840, -0.7452]) tensor([0.5059, 0.1183, 0.2616, 0.1142]) -Greedy action tensor([ 0.4150, -0.3758, 0.1583, -0.3388]) tensor([0.3707, 0.1681, 0.2868, 0.1744]) -Greedy action tensor([ 0.6249, -0.4022, -0.0088, -0.3987]) tensor([0.4448, 0.1593, 0.2361, 0.1598]) -Greedy action tensor([ 0.4303, -0.1579, -0.0291, 0.0018]) tensor([0.3523, 0.1956, 0.2225, 0.2295]) -Greedy action tensor([ 0.7238, -0.2658, -0.0143, -0.1548]) tensor([0.4415, 0.1641, 0.2110, 0.1834]) -Greedy action tensor([ 1.2529, -0.6396, -0.0127, -0.9230]) tensor([0.6467, 0.0975, 0.1824, 0.0734]) -Greedy action tensor([ 1.1738, -0.6654, -0.1233, -0.5744]) tensor([0.6225, 0.0989, 0.1701, 0.1084]) -Greedy action tensor([ 0.8361, -0.8353, -0.1180, -0.3532]) tensor([0.5326, 0.1001, 0.2051, 0.1621]) -Greedy action tensor([ 0.7655, -0.5431, 0.0639, -0.3046]) tensor([0.4742, 0.1281, 0.2351, 0.1626]) -Greedy action tensor([ 0.5817, -0.3583, 0.0255, -0.0631]) tensor([0.4018, 0.1570, 0.2304, 0.2109]) -Greedy action tensor([ 1.1644, -1.0084, -0.1627, -0.8017]) tensor([0.6583, 0.0750, 0.1746, 0.0922]) -Greedy action tensor([ 0.6436, -0.6948, -0.1666, -0.2078]) tensor([0.4686, 0.1229, 0.2084, 0.2000]) -Greedy action tensor([ 0.3423, -0.0363, 0.3214, -0.0940]) tensor([0.3021, 0.2069, 0.2958, 0.1953]) -Greedy action tensor([ 0.5801, -0.3057, -0.1281, -0.1649]) tensor([0.4202, 0.1733, 0.2070, 0.1995]) -Greedy action tensor([ 0.7857, -0.3866, -0.0667, -0.0888]) tensor([0.4645, 0.1438, 0.1980, 0.1937]) -Greedy action tensor([ 0.4206, -0.0463, -0.0496, -0.0413]) tensor([0.3470, 0.2175, 0.2168, 0.2186]) -Greedy action tensor([ 0.9690, -0.5802, -0.0306, -0.2860]) tensor([0.5361, 0.1139, 0.1973, 0.1528]) -Greedy action tensor([ 1.0618, -0.7694, -0.0702, -0.3598]) tensor([0.5801, 0.0929, 0.1870, 0.1400]) -Greedy action tensor([ 0.8246, -0.2899, 0.0585, -0.1497]) tensor([0.4608, 0.1512, 0.2142, 0.1739]) -Greedy action tensor([ 0.6982, -0.0843, -0.0501, -0.0344]) tensor([0.4147, 0.1896, 0.1963, 0.1994]) -Greedy action tensor([ 0.4498, 0.0133, -0.0288, -0.0161]) tensor([0.3456, 0.2234, 0.2142, 0.2169]) -Greedy action tensor([ 0.5633, -0.5674, -0.2615, -0.1571]) tensor([0.4449, 0.1436, 0.1950, 0.2165]) -Greedy action tensor([ 0.5155, -0.4127, -0.0539, -0.2061]) tensor([0.4086, 0.1615, 0.2312, 0.1986]) -Greedy action tensor([ 1.0959, -0.6946, -0.1586, -0.5222]) tensor([0.6059, 0.1011, 0.1728, 0.1201]) -Greedy action tensor([ 0.8288, -0.5030, -0.1215, -0.4945]) tensor([0.5217, 0.1377, 0.2017, 0.1389]) -Greedy action tensor([ 0.7061, -0.3406, 0.0618, -0.1862]) tensor([0.4375, 0.1536, 0.2297, 0.1792]) -Greedy action tensor([ 0.6063, 0.2153, -0.1500, 0.1627]) tensor([0.3587, 0.2427, 0.1684, 0.2302]) -Greedy action tensor([ 0.5037, -0.1743, -0.0374, -0.3946]) tensor([0.4005, 0.2033, 0.2331, 0.1631]) -Greedy action tensor([ 0.4348, -0.1535, -0.0957, -0.2188]) tensor([0.3754, 0.2085, 0.2209, 0.1953]) -Greedy action tensor([ 1.0017, -0.6086, 0.0460, -0.4310]) tensor([0.5485, 0.1096, 0.2109, 0.1309]) -Greedy action tensor([ 0.0848, 0.4439, -0.0703, -0.5325]) tensor([0.2612, 0.3741, 0.2237, 0.1409]) -Greedy action tensor([ 0.7393, -0.8591, 0.1642, -0.6451]) tensor([0.4962, 0.1003, 0.2792, 0.1243]) -Greedy action tensor([ 0.7048, -0.3272, 0.0296, -0.3484]) tensor([0.4516, 0.1609, 0.2299, 0.1575]) -Greedy action tensor([ 0.8320, -0.6678, -0.1349, -0.2802]) tensor([0.5175, 0.1155, 0.1968, 0.1702]) -Greedy action tensor([ 0.4407, 0.0904, 0.0559, -0.2957]) tensor([0.3492, 0.2460, 0.2376, 0.1672]) -Greedy action tensor([ 0.7234, -0.4404, -0.0272, -0.2089]) tensor([0.4591, 0.1434, 0.2167, 0.1807]) -Greedy action tensor([ 0.5589, -0.4899, -0.0381, -0.2411]) tensor([0.4255, 0.1491, 0.2342, 0.1912]) -Greedy action tensor([ 0.8750, -0.6964, 0.1500, -0.5247]) tensor([0.5158, 0.1072, 0.2498, 0.1272]) -Greedy action tensor([ 0.6130, -0.1124, -0.1761, 0.0452]) tensor([0.3992, 0.1933, 0.1813, 0.2262]) -Greedy action tensor([ 0.8553, -0.3680, 0.0823, -0.3119]) tensor([0.4838, 0.1423, 0.2233, 0.1506]) -Greedy action tensor([ 0.6888, -0.6196, -0.0299, -0.3137]) tensor([0.4707, 0.1272, 0.2294, 0.1727]) -Greedy action tensor([ 0.7179, -0.1287, -0.0306, -0.1394]) tensor([0.4299, 0.1844, 0.2034, 0.1824]) -Greedy action tensor([ 0.6954, -0.4553, -0.0490, -0.2565]) tensor([0.4592, 0.1453, 0.2182, 0.1773]) -Greedy action tensor([ 0.4113, -0.0512, -0.0418, -0.2404]) tensor([0.3589, 0.2260, 0.2281, 0.1870]) -Greedy action tensor([ 0.7150, -0.4185, -0.1475, -0.1091]) tensor([0.4582, 0.1475, 0.1934, 0.2010]) -Greedy action tensor([ 0.9218, -0.6636, -0.0368, -0.3453]) tensor([0.5348, 0.1096, 0.2051, 0.1506]) -Greedy action tensor([ 1.2384, -0.9207, 0.1353, -0.4267]) tensor([0.6111, 0.0705, 0.2028, 0.1156]) -Greedy action tensor([ 0.5682, -0.4207, -0.1105, -0.2809]) tensor([0.4334, 0.1612, 0.2199, 0.1854]) -Greedy action tensor([ 0.6770, 0.1768, 0.1271, -0.1713]) tensor([0.3829, 0.2322, 0.2209, 0.1639]) -Greedy action tensor([ 0.3361, 0.2072, -0.1108, -0.2195]) tensor([0.3234, 0.2843, 0.2068, 0.1855]) -Greedy action tensor([ 1.3322, -0.8984, 0.0087, -0.7855]) tensor([0.6694, 0.0719, 0.1782, 0.0805]) -Greedy action tensor([ 0.5731, -0.1945, 0.1379, -0.3529]) tensor([0.3988, 0.1851, 0.2581, 0.1580]) -Greedy action tensor([ 0.6623, -0.4609, -0.1084, -0.2939]) tensor([0.4603, 0.1497, 0.2130, 0.1769]) -Greedy action tensor([ 0.4560, -0.3204, -0.1655, -0.0741]) tensor([0.3867, 0.1779, 0.2077, 0.2276]) -Greedy action tensor([ 0.6050, -0.3043, 0.0426, -0.2489]) tensor([0.4169, 0.1680, 0.2376, 0.1775]) -Greedy action tensor([ 1.0238, -0.7727, -0.0357, -0.7019]) tensor([0.5915, 0.0981, 0.2050, 0.1053]) -Greedy action tensor([ 0.1919, -0.1039, -0.1081, -0.5188]) tensor([0.3360, 0.2500, 0.2489, 0.1651]) -Greedy action tensor([ 0.6285, -0.6703, -0.1823, -0.3643]) tensor([0.4790, 0.1307, 0.2129, 0.1775]) -Greedy action tensor([ 0.9624, -0.6538, 0.0138, -0.4857]) tensor([0.5492, 0.1091, 0.2127, 0.1291]) -Greedy action tensor([ 0.3307, -0.2553, -0.1326, -0.2962]) tensor([0.3677, 0.2046, 0.2313, 0.1964]) -Greedy action tensor([ 1.0531, -1.0058, -0.0575, -0.3451]) tensor([0.5869, 0.0749, 0.1933, 0.1450]) -Greedy action tensor([ 0.5109, -0.5184, -0.2035, -0.4816]) tensor([0.4510, 0.1611, 0.2208, 0.1671]) -Greedy action tensor([ 0.2607, 0.0826, -0.0805, -0.2318]) tensor([0.3166, 0.2649, 0.2251, 0.1935]) -Greedy action tensor([ 0.6476, -0.2049, -0.0812, -0.2477]) tensor([0.4315, 0.1840, 0.2082, 0.1763]) -Greedy action tensor([ 0.6948, 0.1045, -0.3713, -0.1545]) tensor([0.4299, 0.2382, 0.1480, 0.1839]) -Greedy action tensor([ 1.2040, -0.6663, -0.0223, -0.3768]) tensor([0.6049, 0.0932, 0.1774, 0.1245]) -Greedy action tensor([ 0.9538, -0.8101, 0.1294, -0.5692]) tensor([0.5471, 0.0938, 0.2399, 0.1193]) -Greedy action tensor([ 0.8022, -0.3384, 0.1127, -0.1341]) tensor([0.4518, 0.1444, 0.2267, 0.1771]) -Greedy action tensor([-1.4371, -0.4559, 0.5428, 0.2039]) tensor([0.0622, 0.1660, 0.4507, 0.3211]) -Greedy action tensor([-1.8157, -0.4645, 0.6041, -0.1152]) tensor([0.0463, 0.1789, 0.5210, 0.2538]) -Greedy action tensor([-1.8152, -0.3791, 0.5979, -0.1212]) tensor([0.0458, 0.1927, 0.5120, 0.2494]) -Greedy action tensor([-1.7374, -0.3721, 0.6339, 0.0552]) tensor([0.0462, 0.1811, 0.4951, 0.2776]) -Greedy action tensor([-1.9464, -0.4533, 0.6707, -0.1816]) tensor([0.0400, 0.1781, 0.5481, 0.2337]) -Greedy action tensor([-1.7681, -0.3148, 0.5603, -0.0989]) tensor([0.0480, 0.2052, 0.4922, 0.2546]) -Greedy action tensor([-1.8623, -0.3443, 0.6181, -0.1316]) tensor([0.0432, 0.1971, 0.5159, 0.2438]) -Greedy action tensor([-1.6385, 0.0853, 0.4822, -0.1431]) tensor([0.0515, 0.2889, 0.4297, 0.2299]) -Greedy action tensor([-1.3395e+00, 9.9296e-03, 3.2783e-01, 8.4174e-04]) tensor([0.0716, 0.2759, 0.3791, 0.2734]) -Greedy action tensor([-1.2017, -0.1877, 0.6185, 0.9056]) tensor([0.0551, 0.1518, 0.3400, 0.4531]) -Greedy action tensor([-0.6061, 0.7500, 0.0579, 0.0162]) tensor([0.1151, 0.4468, 0.2236, 0.2145]) -Greedy action tensor([-0.3065, 0.9656, -0.0281, 0.1313]) tensor([0.1344, 0.4797, 0.1776, 0.2083]) -Greedy action tensor([-1.8879, -0.4579, 0.6559, -0.1220]) tensor([0.0421, 0.1759, 0.5358, 0.2462]) -Greedy action tensor([-1.2978, 0.7041, 0.2069, 0.1744]) tensor([0.0579, 0.4288, 0.2608, 0.2525]) -Greedy action tensor([-1.8812, -0.4545, 0.6405, -0.1487]) tensor([0.0430, 0.1790, 0.5350, 0.2430]) -Greedy action tensor([-1.9415, -0.4584, 0.6718, -0.1771]) tensor([0.0402, 0.1771, 0.5482, 0.2346]) -Greedy action tensor([-1.7559, -0.5151, 0.5714, -0.1006]) tensor([0.0501, 0.1734, 0.5140, 0.2625]) -Greedy action tensor([-1.9011, -0.3938, 0.6362, -0.1506]) tensor([0.0418, 0.1888, 0.5287, 0.2407]) -Greedy action tensor([-1.3716, 0.6850, 0.2344, 0.2276]) tensor([0.0533, 0.4170, 0.2657, 0.2639]) -Greedy action tensor([-1.7979, -0.5129, 0.6037, -0.0942]) tensor([0.0473, 0.1709, 0.5220, 0.2598]) -Greedy action tensor([-1.7889, -0.4631, 0.5727, -0.1285]) tensor([0.0485, 0.1825, 0.5141, 0.2550]) -Greedy action tensor([-1.8013, -0.3023, 0.5737, -0.0895]) tensor([0.0459, 0.2057, 0.4939, 0.2545]) -Greedy action tensor([-1.4836, 0.5504, 0.3656, -0.0473]) tensor([0.0521, 0.3981, 0.3309, 0.2190]) -Greedy action tensor([-1.8448, -0.2536, 0.5877, -0.1317]) tensor([0.0438, 0.2149, 0.4985, 0.2428]) -Greedy action tensor([-1.8661, -0.2129, 0.5993, -0.1160]) tensor([0.0421, 0.2200, 0.4956, 0.2423]) -Greedy action tensor([-1.9273, -0.3828, 0.6490, -0.1673]) tensor([0.0406, 0.1901, 0.5335, 0.2358]) -Greedy action tensor([-1.9058, -0.3642, 0.6413, -0.1587]) tensor([0.0414, 0.1932, 0.5281, 0.2373]) -Greedy action tensor([-1.5666, 0.1807, 0.4538, 0.1032]) tensor([0.0510, 0.2929, 0.3849, 0.2711]) -Greedy action tensor([-1.9264, -0.4698, 0.7168, -0.1505]) tensor([0.0396, 0.1699, 0.5566, 0.2338]) -Greedy action tensor([-1.9056, -0.4441, 0.6416, -0.1567]) tensor([0.0420, 0.1810, 0.5359, 0.2412]) -Greedy action tensor([-1.9348, -0.4040, 0.6551, -0.1731]) tensor([0.0404, 0.1866, 0.5380, 0.2350]) -Greedy action tensor([-1.9010, -0.4250, 0.6452, -0.1555]) tensor([0.0419, 0.1834, 0.5347, 0.2401]) -Greedy action tensor([-1.8959, -0.3044, 0.6272, -0.1384]) tensor([0.0414, 0.2031, 0.5157, 0.2398]) -Greedy action tensor([-0.4279, 1.0697, 0.0485, 0.2836]) tensor([0.1097, 0.4903, 0.1766, 0.2234]) -Greedy action tensor([-1.8849, -0.4392, 0.6412, -0.1393]) tensor([0.0426, 0.1808, 0.5326, 0.2440]) -Greedy action tensor([-1.8833, -0.4069, 0.6260, -0.1434]) tensor([0.0428, 0.1873, 0.5262, 0.2438]) -Greedy action tensor([-1.1018, 0.4915, 0.0896, 0.2731]) tensor([0.0760, 0.3737, 0.2500, 0.3004]) -Greedy action tensor([-1.8882, -0.4556, 0.6424, -0.1552]) tensor([0.0427, 0.1790, 0.5366, 0.2417]) -Greedy action tensor([-1.9265, -0.4180, 0.6571, -0.1669]) tensor([0.0407, 0.1839, 0.5389, 0.2364]) -Greedy action tensor([-1.6331, -0.4094, 0.5406, -0.1294]) tensor([0.0565, 0.1922, 0.4970, 0.2543]) -Greedy action tensor([-1.8767, -0.4522, 0.6604, -0.0960]) tensor([0.0421, 0.1751, 0.5327, 0.2500]) -Greedy action tensor([-1.7300, -0.4443, 0.5523, -0.0593]) tensor([0.0507, 0.1833, 0.4966, 0.2694]) -Greedy action tensor([-1.8805, -0.3497, 0.6314, -0.1424]) tensor([0.0423, 0.1955, 0.5216, 0.2406]) -Greedy action tensor([-1.2545, -0.5954, 0.2912, 0.2985]) tensor([0.0810, 0.1565, 0.3799, 0.3826]) -Greedy action tensor([-1.7055, -0.3642, 0.5700, -0.0111]) tensor([0.0500, 0.1912, 0.4866, 0.2722]) -Greedy action tensor([-1.8459, -0.4484, 0.6371, -0.0936]) tensor([0.0439, 0.1775, 0.5255, 0.2531]) -Greedy action tensor([-1.8676, -0.3959, 0.6228, -0.1438]) tensor([0.0434, 0.1892, 0.5240, 0.2434]) -Greedy action tensor([-1.7995, -0.2165, 0.5670, -0.1139]) tensor([0.0456, 0.2221, 0.4862, 0.2461]) -Greedy action tensor([-1.7656, -0.5076, 0.5694, -0.0677]) tensor([0.0492, 0.1732, 0.5086, 0.2690]) -Greedy action tensor([-1.9238, -0.4185, 0.6548, -0.1684]) tensor([0.0409, 0.1841, 0.5386, 0.2365]) -Greedy action tensor([-1.9386, -0.4449, 0.6641, -0.1769]) tensor([0.0404, 0.1798, 0.5449, 0.2350]) -Greedy action tensor([-1.8216, -0.3878, 0.6389, -0.0815]) tensor([0.0442, 0.1856, 0.5181, 0.2521]) -Greedy action tensor([-1.8222, -0.4217, 0.6088, -0.1110]) tensor([0.0455, 0.1847, 0.5177, 0.2520]) -Greedy action tensor([-1.7114, -0.4901, 0.5541, -0.0960]) tensor([0.0525, 0.1780, 0.5056, 0.2639]) -Greedy action tensor([-1.2409, 0.5184, 0.1645, 0.1483]) tensor([0.0671, 0.3899, 0.2737, 0.2693]) -Greedy action tensor([-1.7655, -0.5148, 0.6308, -0.0303]) tensor([0.0473, 0.1652, 0.5194, 0.2682]) -Greedy action tensor([-1.4654, -0.5963, 0.4185, 0.1116]) tensor([0.0675, 0.1611, 0.4444, 0.3270]) -Greedy action tensor([-1.8167, -0.3131, 0.6037, -0.0720]) tensor([0.0445, 0.2002, 0.5006, 0.2547]) -Greedy action tensor([-1.8568, -0.4437, 0.6251, -0.1423]) tensor([0.0442, 0.1816, 0.5288, 0.2455]) -Greedy action tensor([-1.9458, -0.4506, 0.6688, -0.1800]) tensor([0.0401, 0.1786, 0.5472, 0.2341]) -Greedy action tensor([-1.8105, -0.4753, 0.5935, -0.1126]) tensor([0.0469, 0.1782, 0.5188, 0.2561]) -Greedy action tensor([-1.6933, -0.2449, 0.5075, -0.0052]) tensor([0.0508, 0.2161, 0.4585, 0.2746]) -Greedy action tensor([-1.8183, -0.2985, 0.6108, -0.0837]) tensor([0.0443, 0.2024, 0.5025, 0.2509]) -Greedy action tensor([-0.5650, -0.4736, 0.3047, -0.4585]) tensor([0.1788, 0.1959, 0.4265, 0.1988]) -Greedy action tensor([-1.8821, -0.3604, 0.6386, -0.1419]) tensor([0.0422, 0.1931, 0.5244, 0.2403]) -Greedy action tensor([-1.6820, -0.3489, 0.5187, -0.0862]) tensor([0.0533, 0.2022, 0.4815, 0.2630]) -Greedy action tensor([-1.7755, -0.4468, 0.5901, -0.0823]) tensor([0.0479, 0.1810, 0.5105, 0.2606]) -Greedy action tensor([-1.6940, -0.2727, 0.4820, -0.0390]) tensor([0.0521, 0.2159, 0.4592, 0.2727]) -Greedy action tensor([-1.9310, -0.4614, 0.6625, -0.1721]) tensor([0.0408, 0.1772, 0.5453, 0.2367]) -Greedy action tensor([-1.7785, -0.2036, 0.6049, -0.0643]) tensor([0.0450, 0.2173, 0.4878, 0.2498]) -Greedy action tensor([-1.7411, -0.4597, 0.5776, -0.0706]) tensor([0.0498, 0.1794, 0.5061, 0.2647]) -Greedy action tensor([-1.2270, -0.6152, 0.2804, 0.2403]) tensor([0.0855, 0.1576, 0.3860, 0.3708]) -Greedy action tensor([-1.7327, -0.3911, 0.6542, 0.0029]) tensor([0.0468, 0.1789, 0.5089, 0.2653]) -Greedy action tensor([-1.8491, -0.2763, 0.6090, -0.1161]) tensor([0.0432, 0.2081, 0.5044, 0.2443]) -Greedy action tensor([-1.2283, -0.1198, 0.5444, 0.3182]) tensor([0.0684, 0.2074, 0.4029, 0.3213]) -Greedy action tensor([-1.8253, -0.2465, 0.5929, -0.1003]) tensor([0.0441, 0.2137, 0.4948, 0.2474]) -Greedy action tensor([-0.7821, 0.2867, 0.0748, 0.1245]) tensor([0.1144, 0.3330, 0.2694, 0.2832]) -Greedy action tensor([-1.9055, -0.4698, 0.6510, -0.1604]) tensor([0.0420, 0.1764, 0.5412, 0.2404]) -Greedy action tensor([-1.7731, 0.0926, 0.4976, -0.0622]) tensor([0.0441, 0.2849, 0.4270, 0.2440]) -Greedy action tensor([-1.8753, -0.4518, 0.6367, -0.1485]) tensor([0.0433, 0.1797, 0.5337, 0.2434]) -Greedy action tensor([-1.9355, -0.4379, 0.6616, -0.1755]) tensor([0.0405, 0.1810, 0.5433, 0.2352]) -Greedy action tensor([ 0.9292, -0.2688, -0.3998, 0.1773]) tensor([0.4907, 0.1481, 0.1299, 0.2313]) -Greedy action tensor([ 0.3611, -0.2221, 0.1100, 0.1753]) tensor([0.3158, 0.1763, 0.2457, 0.2623]) -Greedy action tensor([ 1.5431, -0.4473, -0.7753, -0.0171]) tensor([0.6920, 0.0945, 0.0681, 0.1454]) -Greedy action tensor([ 1.4892, 0.0257, -0.1384, 0.4793]) tensor([0.5580, 0.1291, 0.1096, 0.2033]) -Greedy action tensor([ 2.4826, -1.3661, 0.0419, 0.7950]) tensor([0.7732, 0.0165, 0.0673, 0.1430]) -Greedy action tensor([ 1.2718, -0.0558, -0.8170, 0.2943]) tensor([0.5665, 0.1502, 0.0702, 0.2132]) -Greedy action tensor([ 1.2353, -0.3979, -0.2727, 0.3087]) tensor([0.5517, 0.1078, 0.1221, 0.2184]) -Greedy action tensor([ 1.8687, -0.8812, -0.1720, 0.5111]) tensor([0.6891, 0.0441, 0.0895, 0.1773]) -Greedy action tensor([ 1.1323, -0.4746, -0.3921, 0.3072]) tensor([0.5387, 0.1080, 0.1173, 0.2360]) -Greedy action tensor([ 1.4439, -0.7124, -0.1634, 0.4494]) tensor([0.5931, 0.0687, 0.1189, 0.2194]) -Greedy action tensor([ 1.0799, -0.4393, -0.1142, 0.2416]) tensor([0.5117, 0.1120, 0.1550, 0.2213]) -Greedy action tensor([ 0.8814, 0.0085, -0.6472, 0.1825]) tensor([0.4691, 0.1960, 0.1017, 0.2332]) -Greedy action tensor([ 1.2767, 0.3699, -0.4765, -0.2733]) tensor([0.5589, 0.2257, 0.0968, 0.1186]) -Greedy action tensor([ 1.7801, -0.5334, -0.5664, 0.3738]) tensor([0.6946, 0.0687, 0.0665, 0.1702]) -Greedy action tensor([ 1.5131, -0.4357, -0.7680, 0.1892]) tensor([0.6619, 0.0943, 0.0676, 0.1761]) -Greedy action tensor([ 1.5742, -0.7029, -0.3141, 0.3183]) tensor([0.6499, 0.0667, 0.0983, 0.1851]) -Greedy action tensor([ 1.6007, -0.5889, -0.2563, 0.4447]) tensor([0.6318, 0.0707, 0.0986, 0.1988]) -Greedy action tensor([ 1.3035, -0.0040, -0.2378, 0.2801]) tensor([0.5423, 0.1467, 0.1161, 0.1949]) -Greedy action tensor([ 1.8943, 0.2279, -0.2733, 0.2261]) tensor([0.6702, 0.1266, 0.0767, 0.1264]) -Greedy action tensor([ 1.2715, 0.1067, -0.4909, 0.3635]) tensor([0.5300, 0.1653, 0.0910, 0.2137]) -Greedy action tensor([ 1.5250, -0.1887, -0.1781, 0.2756]) tensor([0.6064, 0.1093, 0.1104, 0.1738]) -Greedy action tensor([ 1.1933, -0.4788, -0.6715, 0.3229]) tensor([0.5677, 0.1066, 0.0879, 0.2377]) -Greedy action tensor([ 1.7555, -0.8412, -0.3853, 0.4574]) tensor([0.6825, 0.0509, 0.0802, 0.1864]) -Greedy action tensor([ 2.0078, -1.0354, -0.6667, 0.5754]) tensor([0.7378, 0.0352, 0.0509, 0.1761]) -Greedy action tensor([ 0.8179, -0.0037, 0.1258, -0.5221]) tensor([0.4541, 0.1997, 0.2273, 0.1189]) -Greedy action tensor([ 1.6403, -0.2609, -0.4802, 0.2482]) tensor([0.6588, 0.0984, 0.0790, 0.1637]) -Greedy action tensor([ 1.2591, -0.5573, -0.1559, 0.1653]) tensor([0.5746, 0.0934, 0.1396, 0.1924]) -Greedy action tensor([ 2.2074, -1.0140, -0.3432, 0.4245]) tensor([0.7776, 0.0310, 0.0607, 0.1307]) -Greedy action tensor([ 1.4137, -0.3305, -0.3592, 0.3155]) tensor([0.5959, 0.1042, 0.1012, 0.1987]) -Greedy action tensor([ 0.9506, -0.5047, -0.0524, 0.1753]) tensor([0.4853, 0.1132, 0.1780, 0.2235]) -Greedy action tensor([ 1.3165, -0.1524, -0.6397, 0.8176]) tensor([0.5054, 0.1163, 0.0715, 0.3069]) -Greedy action tensor([ 1.3911, -0.7508, -0.4307, 0.4743]) tensor([0.5956, 0.0699, 0.0963, 0.2381]) -Greedy action tensor([ 1.0056, -0.2208, 0.0091, -0.0556]) tensor([0.4979, 0.1461, 0.1838, 0.1723]) -Greedy action tensor([ 1.3826, -0.4878, -0.3050, 0.0454]) tensor([0.6244, 0.0962, 0.1155, 0.1640]) -Greedy action tensor([ 0.9738, -0.2945, -0.6986, 0.5497]) tensor([0.4709, 0.1325, 0.0884, 0.3081]) -Greedy action tensor([ 1.6608, -0.5429, -0.2497, 0.3053]) tensor([0.6595, 0.0728, 0.0976, 0.1700]) -Greedy action tensor([ 1.5767, -0.3821, -0.5470, 0.0429]) tensor([0.6774, 0.0955, 0.0810, 0.1461]) -Greedy action tensor([ 1.2055, -0.4429, -0.5658, 0.5430]) tensor([0.5325, 0.1024, 0.0906, 0.2745]) -Greedy action tensor([ 1.2947, -0.4498, -0.1546, 0.3597]) tensor([0.5549, 0.0970, 0.1303, 0.2179]) -Greedy action tensor([ 1.4279, -0.1061, -0.8178, 0.1283]) tensor([0.6273, 0.1353, 0.0664, 0.1710]) -Greedy action tensor([ 2.3889, -0.8846, -0.2975, 0.8027]) tensor([0.7629, 0.0289, 0.0520, 0.1562]) -Greedy action tensor([ 1.3426, 0.2406, -0.5066, -0.5525]) tensor([0.6098, 0.2026, 0.0960, 0.0917]) -Greedy action tensor([ 1.4857, 0.2856, -0.3717, 0.5846]) tensor([0.5367, 0.1616, 0.0838, 0.2179]) -Greedy action tensor([ 1.0200, -0.1481, -0.3851, -0.1810]) tensor([0.5384, 0.1674, 0.1321, 0.1620]) -Greedy action tensor([ 1.9433, -0.4429, -0.5781, 0.6214]) tensor([0.6949, 0.0639, 0.0558, 0.1853]) -Greedy action tensor([ 1.5112, -0.2613, -0.1922, -0.5657]) tensor([0.6769, 0.1150, 0.1232, 0.0848]) -Greedy action tensor([ 1.9076, -0.0266, -0.6269, 0.2012]) tensor([0.7116, 0.1029, 0.0564, 0.1292]) -Greedy action tensor([ 1.3842, -0.4650, -0.5080, 0.3343]) tensor([0.6031, 0.0949, 0.0909, 0.2111]) -Greedy action tensor([ 1.9377, -0.6390, -0.5543, 0.3605]) tensor([0.7324, 0.0557, 0.0606, 0.1513]) -Greedy action tensor([ 2.4075, -1.1706, -0.1387, 0.4787]) tensor([0.7990, 0.0223, 0.0626, 0.1161]) -Greedy action tensor([ 1.9567, -0.5049, -0.5475, 0.2595]) tensor([0.7406, 0.0632, 0.0605, 0.1357]) -Greedy action tensor([ 2.6370, -1.1555, -0.4370, 0.5306]) tensor([0.8400, 0.0189, 0.0388, 0.1022]) -Greedy action tensor([ 1.0215, -0.4347, -0.0810, 0.3666]) tensor([0.4797, 0.1118, 0.1593, 0.2492]) -Greedy action tensor([ 1.7028, -0.7198, -0.4185, 0.5281]) tensor([0.6590, 0.0584, 0.0790, 0.2036]) -Greedy action tensor([ 1.3036, -0.0645, -0.5500, 0.2881]) tensor([0.5638, 0.1436, 0.0883, 0.2042]) -Greedy action tensor([ 1.4620, -0.5930, -0.2289, 0.3207]) tensor([0.6128, 0.0785, 0.1130, 0.1957]) -Greedy action tensor([ 1.1282, -0.0430, -0.3939, 0.2658]) tensor([0.5127, 0.1589, 0.1119, 0.2164]) -Greedy action tensor([ 1.6785, 0.1653, -0.1596, 0.4119]) tensor([0.6020, 0.1326, 0.0958, 0.1696]) -Greedy action tensor([ 1.6107, -0.3434, -0.4890, 0.2543]) tensor([0.6571, 0.0931, 0.0805, 0.1693]) -Greedy action tensor([ 1.6258, -0.6986, -0.2155, 0.2976]) tensor([0.6573, 0.0643, 0.1043, 0.1742]) -Greedy action tensor([ 1.6480, -0.8916, -0.3776, 0.3952]) tensor([0.6682, 0.0527, 0.0882, 0.1909]) -Greedy action tensor([ 2.3317, -1.1945, -0.2098, 0.8245]) tensor([0.7521, 0.0221, 0.0592, 0.1666]) -Greedy action tensor([ 1.2984, -0.0123, -0.6206, 0.1623]) tensor([0.5755, 0.1552, 0.0845, 0.1848]) -Greedy action tensor([ 1.3528, -0.3667, -0.3963, 0.2514]) tensor([0.5933, 0.1063, 0.1032, 0.1972]) -Greedy action tensor([ 1.6905, -0.5503, -0.3070, 0.5377]) tensor([0.6419, 0.0683, 0.0871, 0.2027]) -Greedy action tensor([ 1.2448, -0.6486, 0.0600, 0.4103]) tensor([0.5290, 0.0796, 0.1618, 0.2296]) -Greedy action tensor([ 1.3930, 0.1292, -0.5850, -0.4383]) tensor([0.6325, 0.1787, 0.0875, 0.1013]) -Greedy action tensor([ 1.3560, -0.4574, -0.7313, 0.4935]) tensor([0.5851, 0.0954, 0.0726, 0.2470]) -Greedy action tensor([ 1.6867, -0.4616, -0.5534, 0.1416]) tensor([0.6962, 0.0812, 0.0741, 0.1485]) -Greedy action tensor([ 1.6560, -0.6041, -0.2667, 0.3601]) tensor([0.6561, 0.0685, 0.0959, 0.1795]) -Greedy action tensor([ 1.1836, -0.6473, 0.1138, 0.1334]) tensor([0.5396, 0.0865, 0.1851, 0.1888]) -Greedy action tensor([ 1.6302, -0.7808, -0.2731, 0.4627]) tensor([0.6452, 0.0579, 0.0962, 0.2007]) -Greedy action tensor([ 1.4960, -0.3765, -0.4602, 0.6301]) tensor([0.5828, 0.0896, 0.0824, 0.2452]) -Greedy action tensor([ 1.5141, -0.3604, -0.5821, 0.4090]) tensor([0.6221, 0.0954, 0.0765, 0.2060]) -Greedy action tensor([ 1.7750, -1.1432, -0.2057, 0.4879]) tensor([0.6812, 0.0368, 0.0940, 0.1880]) -Greedy action tensor([ 1.3504, -0.6691, -0.8173, 0.1320]) tensor([0.6481, 0.0860, 0.0742, 0.1917]) -Greedy action tensor([ 2.1651, -0.5210, -0.4531, 0.9208]) tensor([0.6997, 0.0477, 0.0510, 0.2016]) -Greedy action tensor([ 1.3290, -0.1368, -0.9081, 0.4718]) tensor([0.5675, 0.1310, 0.0606, 0.2408]) -Greedy action tensor([ 1.4019, -0.2181, -0.4456, 0.8596]) tensor([0.5163, 0.1022, 0.0814, 0.3002]) -Greedy action tensor([ 1.1343, -0.5984, -0.1574, 0.1119]) tensor([0.5521, 0.0976, 0.1517, 0.1986]) -Greedy action tensor([ 1.4089, -0.3313, -0.5818, 0.6476]) tensor([0.5621, 0.0986, 0.0768, 0.2625]) -Greedy action tensor([-0.3453, -0.1919, -0.3336, -0.6820]) tensor([0.2570, 0.2996, 0.2600, 0.1835]) -Greedy action tensor([-0.9878, -0.7144, -0.1806, 0.8628]) tensor([0.0916, 0.1204, 0.2053, 0.5828]) -Greedy action tensor([-1.0703, -1.9263, -0.2722, 0.2875]) tensor([0.1327, 0.0564, 0.2949, 0.5160]) -Greedy action tensor([ 0.2928, -1.1776, 1.3496, 0.2936]) tensor([0.1958, 0.0450, 0.5633, 0.1959]) -Greedy action tensor([-0.4510, -1.4477, 1.5975, 0.1212]) tensor([0.0918, 0.0339, 0.7117, 0.1626]) -Greedy action tensor([ 0.2752, -0.0862, 0.4491, -0.0863]) tensor([0.2791, 0.1944, 0.3321, 0.1944]) -Greedy action tensor([ 0.5126, 0.0969, -0.4416, -0.5568]) tensor([0.4187, 0.2763, 0.1613, 0.1437]) -Greedy action tensor([ 1.3354, -0.4411, 0.6417, 0.0596]) tensor([0.5133, 0.0869, 0.2565, 0.1433]) -Greedy action tensor([-0.1777, -1.1200, -0.6194, -0.5595]) tensor([0.3683, 0.1435, 0.2368, 0.2514]) -Greedy action tensor([ 1.3060, -0.0070, -0.0635, -0.4384]) tensor([0.5889, 0.1584, 0.1497, 0.1029]) -Greedy action tensor([ 1.4158, -1.7115, 0.6667, 0.6548]) tensor([0.5041, 0.0221, 0.2383, 0.2355]) -Greedy action tensor([ 0.3508, -0.8809, 0.7637, -0.5275]) tensor([0.3107, 0.0907, 0.4695, 0.1291]) -Greedy action tensor([-0.2952, -1.3014, 0.6645, -0.1571]) tensor([0.1951, 0.0713, 0.5095, 0.2240]) -Greedy action tensor([-0.8498, -0.2614, -0.0272, -1.7726]) tensor([0.1827, 0.3290, 0.4158, 0.0726]) -Greedy action tensor([ 1.1092, -0.9401, -0.2443, 0.9495]) tensor([0.4465, 0.0575, 0.1153, 0.3806]) -Greedy action tensor([ 1.0213, -0.1010, 0.0743, 0.6130]) tensor([0.4205, 0.1369, 0.1631, 0.2795]) -Greedy action tensor([ 1.1314, -0.8494, 0.0358, 0.0199]) tensor([0.5551, 0.0766, 0.1856, 0.1827]) -Greedy action tensor([-0.9571, -1.1532, 0.3741, -1.0816]) tensor([0.1541, 0.1266, 0.5832, 0.1360]) -Greedy action tensor([ 0.0321, -0.5255, 1.2669, -0.2274]) tensor([0.1730, 0.0990, 0.5946, 0.1334]) -Greedy action tensor([-0.3895, 0.1396, 0.1941, -0.4683]) tensor([0.1847, 0.3135, 0.3311, 0.1707]) -Greedy action tensor([ 0.5250, 0.6949, 0.0297, -0.1483]) tensor([0.3026, 0.3586, 0.1844, 0.1543]) -Greedy action tensor([ 1.3920, -0.0235, 0.3379, -0.3406]) tensor([0.5656, 0.1373, 0.1971, 0.1000]) -Greedy action tensor([-0.7560, -0.4035, -0.9973, -0.5189]) tensor([0.2234, 0.3179, 0.1755, 0.2832]) -Greedy action tensor([-0.5463, -0.4956, 0.4428, -0.3236]) tensor([0.1669, 0.1756, 0.4489, 0.2086]) -Greedy action tensor([-0.1396, -0.8975, 0.1545, -0.1768]) tensor([0.2650, 0.1242, 0.3556, 0.2553]) -Greedy action tensor([-1.3872, -1.7066, -0.0632, 0.4964]) tensor([0.0829, 0.0602, 0.3116, 0.5453]) -Greedy action tensor([ 0.3409, -0.7082, -0.4526, 0.7715]) tensor([0.2993, 0.1048, 0.1354, 0.4604]) -Greedy action tensor([ 0.5811, -0.0982, 1.1896, 0.1164]) tensor([0.2517, 0.1276, 0.4625, 0.1581]) -Greedy action tensor([ 0.5946, -1.1381, -0.4136, -0.4461]) tensor([0.5277, 0.0933, 0.1926, 0.1864]) -Greedy action tensor([-0.0546, -1.1276, -0.6544, 0.1234]) tensor([0.3241, 0.1108, 0.1779, 0.3872]) -Greedy action tensor([ 0.9479, 0.1669, 0.1188, -0.3074]) tensor([0.4588, 0.2101, 0.2003, 0.1308]) -Greedy action tensor([ 0.2020, -1.5575, 0.4365, 1.2115]) tensor([0.1930, 0.0332, 0.2440, 0.5297]) -Greedy action tensor([-0.6500, -0.3269, 0.1084, -0.5662]) tensor([0.1785, 0.2465, 0.3810, 0.1941]) -Greedy action tensor([-0.8182, 0.0978, 0.0225, -0.7071]) tensor([0.1442, 0.3604, 0.3343, 0.1611]) -Greedy action tensor([ 0.5327, -1.5522, 0.2099, 0.1681]) tensor([0.3932, 0.0489, 0.2848, 0.2731]) -Greedy action tensor([-0.4435, 0.6079, -0.1445, 0.9133]) tensor([0.1100, 0.3147, 0.1483, 0.4271]) -Greedy action tensor([ 1.6179, -1.6357, 2.3238, 1.1815]) tensor([0.2695, 0.0104, 0.5459, 0.1742]) -Greedy action tensor([ 0.7693, -0.7862, -0.1801, -0.1655]) tensor([0.5023, 0.1060, 0.1944, 0.1973]) -Greedy action tensor([ 0.1870, -0.1588, 0.0928, -0.3970]) tensor([0.3149, 0.2228, 0.2866, 0.1756]) -Greedy action tensor([-0.0932, -0.0055, -0.1289, 0.1591]) tensor([0.2302, 0.2513, 0.2222, 0.2963]) -Greedy action tensor([ 0.7288, -0.1065, 0.0901, 0.8551]) tensor([0.3230, 0.1401, 0.1705, 0.3665]) -Greedy action tensor([ 0.4954, 0.9610, 0.2675, -0.1405]) tensor([0.2552, 0.4065, 0.2032, 0.1351]) -Greedy action tensor([ 0.6379, -0.9251, 2.5719, -0.2433]) tensor([0.1171, 0.0245, 0.8099, 0.0485]) -Greedy action tensor([-0.3099, -1.6928, -0.8388, 0.3879]) tensor([0.2598, 0.0652, 0.1531, 0.5220]) -Greedy action tensor([ 0.1307, -0.8432, 0.6366, 0.1942]) tensor([0.2438, 0.0921, 0.4044, 0.2598]) -Greedy action tensor([ 1.3539, 0.5636, 0.6088, -0.3655]) tensor([0.4745, 0.2153, 0.2252, 0.0850]) -Greedy action tensor([-0.2258, 1.0372, -1.2299, -0.7181]) tensor([0.1814, 0.6413, 0.0665, 0.1109]) -Greedy action tensor([-0.6804, 0.0070, -0.6739, 0.3930]) tensor([0.1445, 0.2873, 0.1454, 0.4227]) -Greedy action tensor([-1.0854, -0.3988, -0.9679, -0.3496]) tensor([0.1613, 0.3205, 0.1814, 0.3367]) -Greedy action tensor([ 0.5462, -0.0626, -0.7464, 0.0078]) tensor([0.4163, 0.2264, 0.1143, 0.2430]) -Greedy action tensor([-0.3841, -1.0933, 0.4847, -0.9609]) tensor([0.2253, 0.1109, 0.5372, 0.1266]) -Greedy action tensor([ 0.5004, -1.6025, 0.3649, -0.2425]) tensor([0.4047, 0.0494, 0.3534, 0.1925]) -Greedy action tensor([ 0.0507, -0.2625, -0.2162, 0.9930]) tensor([0.1975, 0.1444, 0.1513, 0.5068]) -Greedy action tensor([ 0.1330, -0.2364, 0.9456, 0.0533]) tensor([0.2054, 0.1420, 0.4630, 0.1897]) -Greedy action tensor([-1.1118, -0.9971, -0.2832, 0.3323]) tensor([0.1156, 0.1297, 0.2648, 0.4900]) -Greedy action tensor([-1.2648, -0.5508, 0.2099, -0.9735]) tensor([0.1143, 0.2334, 0.4994, 0.1529]) -Greedy action tensor([ 0.3271, -1.6678, 0.1452, 0.6957]) tensor([0.2928, 0.0398, 0.2441, 0.4233]) -Greedy action tensor([-0.3576, -0.6508, -0.1578, 0.0221]) tensor([0.2258, 0.1684, 0.2757, 0.3301]) -Greedy action tensor([ 1.0108, -1.1929, 0.3609, -0.5692]) tensor([0.5439, 0.0600, 0.2840, 0.1120]) -Greedy action tensor([ 0.2531, -0.6607, -0.9452, -0.3853]) tensor([0.4483, 0.1797, 0.1352, 0.2367]) -Greedy action tensor([ 0.6653, -1.1734, 0.1228, 0.0834]) tensor([0.4349, 0.0692, 0.2528, 0.2431]) -Greedy action tensor([ 1.7420, -1.1809, 1.4174, 0.5487]) tensor([0.4808, 0.0259, 0.3475, 0.1458]) -Greedy action tensor([0.6019, 0.0495, 0.9449, 0.7513]) tensor([0.2412, 0.1388, 0.3399, 0.2801]) -Greedy action tensor([ 0.5595, -1.2720, 1.2976, 0.3416]) tensor([0.2465, 0.0395, 0.5157, 0.1983]) -Greedy action tensor([-1.2629, -1.2435, 0.0957, -0.9558]) tensor([0.1376, 0.1403, 0.5352, 0.1870]) -Greedy action tensor([ 0.7765, -0.5733, 0.3790, 0.9067]) tensor([0.3257, 0.0845, 0.2189, 0.3710]) -Greedy action tensor([ 0.4163, 0.2413, -0.0599, 0.6901]) tensor([0.2649, 0.2223, 0.1645, 0.3483]) -Greedy action tensor([ 1.0867, -1.2182, 0.5116, 0.3721]) tensor([0.4647, 0.0464, 0.2615, 0.2274]) -Greedy action tensor([ 0.5305, -1.0195, -0.3549, 1.3551]) tensor([0.2560, 0.0543, 0.1056, 0.5840]) -Greedy action tensor([1.6565, 0.2620, 0.4312, 0.2059]) tensor([0.5631, 0.1396, 0.1653, 0.1320]) -Greedy action tensor([ 0.7355, -0.2724, 0.7712, 0.5817]) tensor([0.3069, 0.1120, 0.3180, 0.2631]) -Greedy action tensor([ 0.3260, -0.1113, 0.0335, -0.2968]) tensor([0.3415, 0.2205, 0.2549, 0.1832]) -Greedy action tensor([-0.0019, 0.0387, 0.3684, 1.4328]) tensor([0.1301, 0.1355, 0.1884, 0.5461]) -Greedy action tensor([-0.3110, -0.6242, 0.4191, 0.1479]) tensor([0.1856, 0.1357, 0.3851, 0.2936]) -Greedy action tensor([ 0.1252, -1.8810, -0.0106, 0.5402]) tensor([0.2839, 0.0382, 0.2479, 0.4300]) -Greedy action tensor([ 0.5316, -1.0586, 0.1499, -0.0824]) tensor([0.4119, 0.0840, 0.2812, 0.2229]) -Greedy action tensor([ 0.3456, -0.5328, 0.5804, 0.0220]) tensor([0.2938, 0.1221, 0.3716, 0.2126]) -Greedy action tensor([0.8605, 0.2297, 0.2014, 1.9698]) tensor([0.1968, 0.1047, 0.1018, 0.5967]) -Greedy action tensor([-0.5064, 0.9580, 0.4251, -0.9984]) tensor([0.1180, 0.5103, 0.2995, 0.0721]) -Greedy action tensor([0.3545, 0.5120, 0.2309, 0.6845]) tensor([0.2250, 0.2633, 0.1988, 0.3129]) -Greedy action tensor([ 0.8279, -1.0038, -0.4143, 1.1153]) tensor([0.3595, 0.0576, 0.1038, 0.4792]) -Greedy action tensor([-0.4024, -0.4753, 0.0702, -0.5489]) tensor([0.2274, 0.2114, 0.3648, 0.1964]) -Greedy action tensor([ 0.5794, -0.3987, -0.0355, -0.2723]) tensor([0.4267, 0.1605, 0.2307, 0.1821]) -Greedy action tensor([ 1.0380, -0.6033, -0.0198, -0.3938]) tensor([0.5618, 0.1089, 0.1951, 0.1342]) -Greedy action tensor([ 0.9784, -0.6199, -0.0999, -0.4153]) tensor([0.5585, 0.1129, 0.1900, 0.1386]) -Greedy action tensor([ 0.6300, -0.0204, 0.1106, -0.1658]) tensor([0.3894, 0.2032, 0.2317, 0.1757]) -Greedy action tensor([ 0.7086, -0.5413, -0.1418, -0.4032]) tensor([0.4895, 0.1403, 0.2092, 0.1610]) -Greedy action tensor([ 1.0471, -0.7774, -0.0914, -0.5961]) tensor([0.5970, 0.0963, 0.1912, 0.1154]) -Greedy action tensor([ 0.6657, -0.0926, -0.0555, -0.0199]) tensor([0.4068, 0.1906, 0.1978, 0.2049]) -Greedy action tensor([ 0.2546, -0.1441, -0.0800, -0.4677]) tensor([0.3481, 0.2337, 0.2491, 0.1691]) -Greedy action tensor([ 0.5170, -0.3155, 0.1667, -0.5240]) tensor([0.4012, 0.1745, 0.2826, 0.1417]) -Greedy action tensor([ 0.7658, -0.5549, -0.0967, -0.3211]) tensor([0.4935, 0.1317, 0.2083, 0.1664]) -Greedy action tensor([ 0.8823, -0.5388, -0.0281, -0.3747]) tensor([0.5186, 0.1252, 0.2087, 0.1476]) -Greedy action tensor([ 1.0168, -0.9790, -0.0386, -0.5239]) tensor([0.5889, 0.0800, 0.2050, 0.1261]) -Greedy action tensor([ 0.7971, -0.6025, -0.1538, -0.4612]) tensor([0.5216, 0.1287, 0.2015, 0.1482]) -Greedy action tensor([ 0.8221, -0.7067, -0.0832, -0.3990]) tensor([0.5219, 0.1131, 0.2111, 0.1539]) -Greedy action tensor([ 0.9122, -0.5772, -0.1618, -0.6953]) tensor([0.5658, 0.1276, 0.1933, 0.1134]) -Greedy action tensor([ 0.8593, -0.4230, -0.1862, -0.1984]) tensor([0.5060, 0.1404, 0.1779, 0.1757]) -Greedy action tensor([ 1.2624, -0.9549, 0.1172, -0.8651]) tensor([0.6467, 0.0704, 0.2058, 0.0771]) -Greedy action tensor([ 0.7178, -0.1565, -0.0167, -0.3393]) tensor([0.4456, 0.1859, 0.2137, 0.1548]) -Greedy action tensor([ 0.9742, -0.6881, 0.0033, -0.4502]) tensor([0.5528, 0.1049, 0.2094, 0.1330]) -Greedy action tensor([ 0.5935, -0.2939, -0.1126, -0.3091]) tensor([0.4328, 0.1782, 0.2136, 0.1755]) -Greedy action tensor([ 0.5174, 0.1040, 0.0672, -0.2096]) tensor([0.3594, 0.2377, 0.2291, 0.1737]) -Greedy action tensor([ 0.8489, -0.1693, -0.0142, -0.2526]) tensor([0.4727, 0.1708, 0.1994, 0.1571]) -Greedy action tensor([ 0.6421, -0.3429, -0.0587, -0.0828]) tensor([0.4248, 0.1586, 0.2108, 0.2058]) -Greedy action tensor([ 0.6635, -0.2494, -0.0369, -0.0562]) tensor([0.4194, 0.1683, 0.2081, 0.2042]) -Greedy action tensor([ 0.1536, 0.0502, -0.1262, 0.0765]) tensor([0.2791, 0.2517, 0.2109, 0.2583]) -Greedy action tensor([ 1.0425, -0.9954, 0.1182, -0.5462]) tensor([0.5776, 0.0753, 0.2292, 0.1179]) -Greedy action tensor([ 0.7397, -0.4235, -0.1063, -0.3538]) tensor([0.4816, 0.1505, 0.2066, 0.1613]) -Greedy action tensor([ 0.3510, -0.1070, -0.0956, -0.0869]) tensor([0.3427, 0.2168, 0.2193, 0.2212]) -Greedy action tensor([ 0.5687, -0.2932, -0.0332, -0.1385]) tensor([0.4060, 0.1715, 0.2224, 0.2002]) -Greedy action tensor([ 0.7525, -0.2899, -0.0059, 0.0097]) tensor([0.4354, 0.1535, 0.2039, 0.2072]) -Greedy action tensor([ 3.4658e-01, -2.1677e-01, -1.8835e-04, -2.9743e-01]) tensor([0.3570, 0.2032, 0.2524, 0.1875]) -Greedy action tensor([ 0.5300, -0.2416, -0.0562, -0.1188]) tensor([0.3935, 0.1819, 0.2190, 0.2057]) -Greedy action tensor([ 0.4372, -0.3376, -0.1739, 0.0020]) tensor([0.3773, 0.1738, 0.2048, 0.2441]) -Greedy action tensor([ 0.5822, -0.1121, -0.0921, -0.3489]) tensor([0.4161, 0.2078, 0.2120, 0.1640]) -Greedy action tensor([ 1.1810, -1.0429, 0.1477, -0.8066]) tensor([0.6246, 0.0676, 0.2223, 0.0856]) -Greedy action tensor([ 0.7404, -0.6058, -0.1412, -0.2735]) tensor([0.4909, 0.1277, 0.2033, 0.1781]) -Greedy action tensor([ 0.9140, -0.7536, -0.0315, -0.4298]) tensor([0.5441, 0.1027, 0.2114, 0.1419]) -Greedy action tensor([ 0.8922, -0.5824, 0.1693, -0.5836]) tensor([0.5147, 0.1178, 0.2498, 0.1177]) -Greedy action tensor([ 0.8544, -0.7087, 0.0589, -0.6425]) tensor([0.5306, 0.1112, 0.2395, 0.1188]) -Greedy action tensor([ 1.0376, -0.8353, 0.1455, -0.6557]) tensor([0.5723, 0.0880, 0.2345, 0.1053]) -Greedy action tensor([ 0.4591, 0.0410, 0.0919, -0.0119]) tensor([0.3361, 0.2213, 0.2328, 0.2098]) -Greedy action tensor([ 0.8236, -0.4868, 0.0315, -1.0104]) tensor([0.5312, 0.1433, 0.2406, 0.0849]) -Greedy action tensor([ 0.5321, -0.5105, -0.0896, -0.3045]) tensor([0.4305, 0.1518, 0.2312, 0.1865]) -Greedy action tensor([ 0.6406, -0.3472, -0.0687, -0.3377]) tensor([0.4464, 0.1662, 0.2196, 0.1678]) -Greedy action tensor([ 0.4423, -0.1465, -0.0776, -0.2600]) tensor([0.3781, 0.2098, 0.2248, 0.1873]) -Greedy action tensor([ 0.8852, -0.3079, -0.0448, -0.3538]) tensor([0.5031, 0.1526, 0.1985, 0.1457]) -Greedy action tensor([ 0.1005, -0.1008, -0.2071, 0.0161]) tensor([0.2880, 0.2355, 0.2118, 0.2647]) -Greedy action tensor([ 0.5128, -0.4003, 0.0757, -0.3756]) tensor([0.4068, 0.1632, 0.2627, 0.1673]) -Greedy action tensor([ 0.9682, -0.7239, 0.0164, -0.4817]) tensor([0.5541, 0.1020, 0.2139, 0.1300]) -Greedy action tensor([ 0.8284, -0.6507, -0.0680, -0.2550]) tensor([0.5065, 0.1154, 0.2067, 0.1714]) -Greedy action tensor([ 0.6535, -0.2378, 0.0264, -0.2830]) tensor([0.4280, 0.1755, 0.2286, 0.1678]) -Greedy action tensor([ 1.0008, -0.7996, -0.0192, -0.3408]) tensor([0.5595, 0.0925, 0.2018, 0.1463]) -Greedy action tensor([ 1.3071, -1.1604, 0.0998, -0.8170]) tensor([0.6652, 0.0564, 0.1989, 0.0795]) -Greedy action tensor([ 0.6721, -0.1504, 0.0440, -0.1113]) tensor([0.4116, 0.1808, 0.2196, 0.1880]) -Greedy action tensor([ 0.2896, 0.0818, -0.0375, -0.4705]) tensor([0.3332, 0.2707, 0.2403, 0.1558]) -Greedy action tensor([ 0.6575, -0.2161, -0.0994, -0.1456]) tensor([0.4284, 0.1788, 0.2010, 0.1919]) -Greedy action tensor([ 0.0491, 0.0092, -0.2226, 0.1423]) tensor([0.2617, 0.2515, 0.1995, 0.2873]) -Greedy action tensor([ 0.5635, -0.3780, -0.1301, -0.2172]) tensor([0.4259, 0.1661, 0.2129, 0.1951]) -Greedy action tensor([ 0.9399, -0.0403, 0.0696, -0.2554]) tensor([0.4769, 0.1790, 0.1998, 0.1443]) -Greedy action tensor([ 0.6461, -0.2428, 0.0629, -0.3054]) tensor([0.4246, 0.1745, 0.2370, 0.1640]) -Greedy action tensor([ 1.1482, -0.7724, 0.0988, -0.5719]) tensor([0.5968, 0.0874, 0.2090, 0.1069]) -Greedy action tensor([ 0.3246, -0.0456, -0.1942, 0.0140]) tensor([0.3313, 0.2288, 0.1972, 0.2428]) -Greedy action tensor([ 0.3206, 0.1507, -0.0514, -0.3469]) tensor([0.3283, 0.2770, 0.2263, 0.1684]) -Greedy action tensor([ 0.4654, -0.2138, 0.0725, -0.5536]) tensor([0.3932, 0.1994, 0.2655, 0.1419]) -Greedy action tensor([ 0.8159, -0.3054, -0.0677, -0.1106]) tensor([0.4684, 0.1526, 0.1936, 0.1855]) -Greedy action tensor([ 0.2872, -0.0901, 0.0025, -0.3084]) tensor([0.3345, 0.2294, 0.2517, 0.1844]) -Greedy action tensor([ 1.3774, -1.1002, -0.1518, -0.4576]) tensor([0.6848, 0.0575, 0.1484, 0.1093]) -Greedy action tensor([ 0.5920, -0.1612, -0.0204, 0.0398]) tensor([0.3863, 0.1819, 0.2094, 0.2224]) -Greedy action tensor([ 0.6311, -0.3888, 0.0628, -0.4634]) tensor([0.4421, 0.1594, 0.2505, 0.1480]) -Greedy action tensor([ 0.3862, -0.0212, -0.1335, -0.0751]) tensor([0.3460, 0.2302, 0.2057, 0.2181]) -Greedy action tensor([ 0.4033, 0.0887, -0.1213, 0.0763]) tensor([0.3286, 0.2399, 0.1945, 0.2370]) -Greedy action tensor([ 0.8212, -0.4373, -0.1581, -0.3418]) tensor([0.5070, 0.1440, 0.1904, 0.1585]) -Greedy action tensor([ 0.5833, -0.6473, -0.1195, -0.1816]) tensor([0.4439, 0.1297, 0.2198, 0.2066]) -Greedy action tensor([ 0.5409, -0.0038, -0.2662, -0.0690]) tensor([0.3892, 0.2257, 0.1736, 0.2115]) -Greedy action tensor([ 0.7449, -0.2135, -0.0928, -0.1627]) tensor([0.4505, 0.1728, 0.1949, 0.1818]) -Greedy action tensor([ 0.7249, -0.7185, -0.1907, -0.1846]) tensor([0.4904, 0.1158, 0.1963, 0.1975]) -Greedy action tensor([ 0.6825, -0.5188, -0.0454, -0.2694]) tensor([0.4609, 0.1386, 0.2226, 0.1779]) -Greedy action tensor([ 0.7410, -0.4381, 0.0413, -0.3400]) tensor([0.4665, 0.1435, 0.2317, 0.1583]) -Greedy action tensor([ 0.3602, 0.1187, -0.0210, 0.2020]) tensor([0.3010, 0.2364, 0.2056, 0.2570]) -Greedy action tensor([ 0.5011, -0.0160, -0.0412, -0.1265]) tensor([0.3688, 0.2199, 0.2144, 0.1969]) -Greedy action tensor([ 1.4177, -0.7913, -0.0085, -0.5005]) tensor([0.6681, 0.0734, 0.1605, 0.0981]) -Greedy action tensor([-1.6816, -0.5214, 0.5555, 0.0402]) tensor([0.0522, 0.1666, 0.4891, 0.2921]) -Greedy action tensor([-1.7405, -0.3831, 0.6428, -0.0179]) tensor([0.0469, 0.1822, 0.5083, 0.2625]) -Greedy action tensor([-1.9053, -0.4423, 0.6464, -0.1617]) tensor([0.0419, 0.1810, 0.5375, 0.2396]) -Greedy action tensor([-1.6697, -0.0790, 0.5959, 0.0133]) tensor([0.0478, 0.2345, 0.4605, 0.2572]) -Greedy action tensor([-1.6161, 0.2311, 0.4318, 0.0270]) tensor([0.0493, 0.3129, 0.3825, 0.2552]) -Greedy action tensor([-1.3912, -0.2957, 0.4888, 0.2238]) tensor([0.0642, 0.1921, 0.4208, 0.3229]) -Greedy action tensor([-1.9226, -0.4416, 0.6558, -0.1687]) tensor([0.0411, 0.1806, 0.5411, 0.2372]) -Greedy action tensor([-1.8963, -0.4547, 0.6481, -0.1579]) tensor([0.0423, 0.1787, 0.5385, 0.2405]) -Greedy action tensor([-1.2573, 0.1426, 0.2551, 0.0286]) tensor([0.0757, 0.3069, 0.3435, 0.2739]) -Greedy action tensor([-1.1066, -0.3777, 0.2694, 0.0590]) tensor([0.0977, 0.2024, 0.3866, 0.3133]) -Greedy action tensor([-1.8540, -0.3888, 0.6154, -0.1438]) tensor([0.0441, 0.1909, 0.5211, 0.2439]) -Greedy action tensor([-1.8762, -0.4239, 0.6415, -0.1335]) tensor([0.0428, 0.1827, 0.5302, 0.2443]) -Greedy action tensor([-1.8500, -0.4334, 0.6141, -0.1478]) tensor([0.0447, 0.1844, 0.5256, 0.2453]) -Greedy action tensor([-1.9458, -0.4509, 0.6682, -0.1811]) tensor([0.0401, 0.1787, 0.5472, 0.2340]) -Greedy action tensor([-1.5180, -0.1949, 0.4953, 0.0376]) tensor([0.0589, 0.2211, 0.4410, 0.2790]) -Greedy action tensor([-1.8620, -0.4421, 0.6540, -0.0811]) tensor([0.0426, 0.1764, 0.5279, 0.2531]) -Greedy action tensor([-1.0279, -0.4473, 0.5343, 0.8779]) tensor([0.0700, 0.1251, 0.3340, 0.4709]) -Greedy action tensor([-1.8960, -0.4374, 0.6494, -0.1526]) tensor([0.0421, 0.1809, 0.5364, 0.2406]) -Greedy action tensor([-0.7793, -0.4988, 0.8459, 0.8278]) tensor([0.0807, 0.1068, 0.4099, 0.4026]) -Greedy action tensor([-0.7098, -0.6402, 0.3559, 0.4914]) tensor([0.1205, 0.1292, 0.3498, 0.4005]) -Greedy action tensor([-1.9426, -0.4511, 0.6677, -0.1785]) tensor([0.0402, 0.1786, 0.5467, 0.2345]) -Greedy action tensor([-1.8803, -0.4575, 0.6374, -0.1518]) tensor([0.0431, 0.1790, 0.5349, 0.2430]) -Greedy action tensor([-1.5926, 0.4428, 0.3677, -0.0776]) tensor([0.0492, 0.3770, 0.3497, 0.2240]) -Greedy action tensor([-1.8461, -0.4092, 0.6036, -0.1246]) tensor([0.0447, 0.1880, 0.5175, 0.2498]) -Greedy action tensor([-0.0526, 0.4065, 0.4508, 1.0965]) tensor([0.1353, 0.2141, 0.2238, 0.4268]) -Greedy action tensor([-1.9272, -0.4294, 0.6591, -0.1706]) tensor([0.0407, 0.1822, 0.5411, 0.2360]) -Greedy action tensor([-1.6804, 0.1021, 0.4895, -0.0100]) tensor([0.0476, 0.2829, 0.4167, 0.2529]) -Greedy action tensor([-1.0526, -0.4980, 0.8234, 1.0348]) tensor([0.0577, 0.1005, 0.3766, 0.4652]) -Greedy action tensor([-1.7212, -0.4982, 0.5495, -0.0079]) tensor([0.0509, 0.1731, 0.4934, 0.2826]) -Greedy action tensor([-1.9163, -0.4567, 0.6591, -0.1640]) tensor([0.0413, 0.1778, 0.5426, 0.2383]) -Greedy action tensor([-1.9444, -0.4540, 0.6707, -0.1787]) tensor([0.0401, 0.1779, 0.5478, 0.2343]) -Greedy action tensor([-1.8901, -0.4227, 0.6413, -0.1737]) tensor([0.0426, 0.1848, 0.5355, 0.2371]) -Greedy action tensor([-1.9352, -0.4401, 0.6614, -0.1736]) tensor([0.0405, 0.1806, 0.5432, 0.2357]) -Greedy action tensor([-0.9409, 0.8825, 0.0778, -0.4095]) tensor([0.0857, 0.5310, 0.2374, 0.1459]) -Greedy action tensor([-1.9297, -0.4248, 0.6574, -0.1708]) tensor([0.0406, 0.1831, 0.5403, 0.2360]) -Greedy action tensor([-1.7006, -0.5072, 0.5474, 0.0053]) tensor([0.0519, 0.1711, 0.4913, 0.2857]) -Greedy action tensor([-1.4184, 0.6916, 0.2469, 0.2167]) tensor([0.0508, 0.4194, 0.2689, 0.2609]) -Greedy action tensor([-1.7121, -0.3363, 0.6006, 0.0024]) tensor([0.0485, 0.1920, 0.4900, 0.2694]) -Greedy action tensor([-1.9417, -0.4447, 0.6651, -0.1783]) tensor([0.0402, 0.1798, 0.5454, 0.2346]) -Greedy action tensor([-1.9169, -0.4294, 0.6565, -0.1538]) tensor([0.0410, 0.1816, 0.5381, 0.2393]) -Greedy action tensor([-1.1362, -0.5761, 0.2672, 0.3680]) tensor([0.0883, 0.1547, 0.3594, 0.3976]) -Greedy action tensor([-1.8628, -0.3271, 0.6133, -0.1352]) tensor([0.0432, 0.2005, 0.5135, 0.2429]) -Greedy action tensor([-0.2890, -0.4361, 1.0115, 1.5799]) tensor([0.0832, 0.0718, 0.3055, 0.5394]) -Greedy action tensor([-1.9104, -0.4271, 0.6467, -0.1661]) tensor([0.0416, 0.1834, 0.5368, 0.2381]) -Greedy action tensor([-1.6831, -0.2342, 0.6528, 0.0406]) tensor([0.0472, 0.2008, 0.4876, 0.2644]) -Greedy action tensor([-1.8922, -0.4415, 0.6386, -0.1513]) tensor([0.0425, 0.1813, 0.5339, 0.2423]) -Greedy action tensor([-1.5137, -0.4252, 0.4752, 0.2435]) tensor([0.0586, 0.1740, 0.4280, 0.3395]) -Greedy action tensor([-1.8958, -0.4374, 0.6409, -0.1601]) tensor([0.0424, 0.1821, 0.5353, 0.2403]) -Greedy action tensor([-1.9120, -0.4484, 0.6482, -0.1636]) tensor([0.0417, 0.1800, 0.5390, 0.2393]) -Greedy action tensor([-1.9384, -0.4412, 0.6655, -0.1732]) tensor([0.0403, 0.1800, 0.5444, 0.2353]) -Greedy action tensor([-0.8394, 0.0583, 0.1512, -0.0120]) tensor([0.1186, 0.2910, 0.3193, 0.2712]) -Greedy action tensor([-0.6272, 0.5772, 0.0755, -0.0656]) tensor([0.1233, 0.4113, 0.2491, 0.2163]) -Greedy action tensor([-1.9177, -0.4342, 0.6555, -0.1646]) tensor([0.0412, 0.1815, 0.5397, 0.2377]) -Greedy action tensor([-0.4779, 1.0641, 0.0127, 0.5631]) tensor([0.0986, 0.4610, 0.1611, 0.2793]) -Greedy action tensor([-1.8747, -0.4492, 0.6386, -0.1430]) tensor([0.0432, 0.1796, 0.5332, 0.2440]) -Greedy action tensor([-1.9324, -0.4393, 0.6629, -0.1696]) tensor([0.0405, 0.1803, 0.5430, 0.2362]) -Greedy action tensor([-1.8766, -0.4440, 0.6368, -0.1437]) tensor([0.0431, 0.1806, 0.5323, 0.2439]) -Greedy action tensor([-1.7048, -0.4438, 0.5727, -0.0182]) tensor([0.0508, 0.1793, 0.4955, 0.2744]) -Greedy action tensor([-1.8273, -0.1545, 0.5711, -0.1190]) tensor([0.0438, 0.2331, 0.4816, 0.2415]) -Greedy action tensor([-1.1147, 0.6324, 0.1998, -0.1404]) tensor([0.0763, 0.4377, 0.2840, 0.2021]) -Greedy action tensor([-1.8998, -0.4321, 0.6458, -0.1569]) tensor([0.0420, 0.1823, 0.5357, 0.2401]) -Greedy action tensor([-1.9317, -0.4473, 0.6630, -0.1714]) tensor([0.0406, 0.1792, 0.5440, 0.2362]) -Greedy action tensor([-1.7813, -0.3423, 0.5782, -0.1304]) tensor([0.0476, 0.2007, 0.5038, 0.2480]) -Greedy action tensor([-1.9439, -0.4543, 0.6704, -0.1784]) tensor([0.0401, 0.1779, 0.5477, 0.2344]) -Greedy action tensor([0.1860, 0.9320, 0.1233, 0.8470]) tensor([0.1671, 0.3523, 0.1569, 0.3236]) -Greedy action tensor([-1.8134, -0.1865, 0.5814, -0.0918]) tensor([0.0442, 0.2247, 0.4842, 0.2470]) -Greedy action tensor([-1.8030, -0.4234, 0.6047, -0.0856]) tensor([0.0462, 0.1835, 0.5130, 0.2573]) -Greedy action tensor([-1.6401e+00, -2.3937e-01, 4.9593e-01, -1.5534e-03]) tensor([0.0536, 0.2173, 0.4534, 0.2757]) -Greedy action tensor([-1.9418, -0.4673, 0.6959, -0.1728]) tensor([0.0397, 0.1733, 0.5545, 0.2326]) -Greedy action tensor([-1.8677, -0.4589, 0.6368, -0.1379]) tensor([0.0435, 0.1781, 0.5328, 0.2455]) -Greedy action tensor([-1.8869, -0.4027, 0.6411, -0.1478]) tensor([0.0423, 0.1867, 0.5301, 0.2409]) -Greedy action tensor([-1.6804, -0.4335, 0.6445, 0.0963]) tensor([0.0485, 0.1688, 0.4960, 0.2867]) -Greedy action tensor([-1.2139, 0.7802, 0.1459, 0.3190]) tensor([0.0593, 0.4354, 0.2309, 0.2745]) -Greedy action tensor([-1.8915, -0.4199, 0.6351, -0.1574]) tensor([0.0425, 0.1851, 0.5317, 0.2407]) -Greedy action tensor([-1.9359, -0.4412, 0.6662, -0.1737]) tensor([0.0404, 0.1799, 0.5446, 0.2351]) -Greedy action tensor([-1.6953, -0.1610, 0.5100, -0.1090]) tensor([0.0510, 0.2367, 0.4630, 0.2493]) -Greedy action tensor([-0.9642, -0.5526, 0.4152, -0.3281]) tensor([0.1195, 0.1803, 0.4746, 0.2257]) -Greedy action tensor([-1.8830, -0.2566, 0.6081, -0.1329]) tensor([0.0418, 0.2126, 0.5049, 0.2407]) -Greedy action tensor([-1.8844, -0.4792, 0.7179, -0.0403]) tensor([0.0402, 0.1637, 0.5421, 0.2540]) -Greedy action tensor([-1.9399, -0.4398, 0.6649, -0.1753]) tensor([0.0402, 0.1804, 0.5444, 0.2350]) -Greedy action tensor([ 0.0265, -0.2541, 0.1084, 0.2353]) tensor([0.2455, 0.1854, 0.2665, 0.3025]) -Greedy action tensor([ 1.3303, -0.4237, -0.6964, 0.7310]) tensor([0.5394, 0.0933, 0.0711, 0.2962]) -Greedy action tensor([ 1.3755, -0.0404, -0.9344, 0.2370]) tensor([0.6016, 0.1460, 0.0597, 0.1927]) -Greedy action tensor([ 1.5443, -0.8069, -0.6200, 0.3537]) tensor([0.6604, 0.0629, 0.0758, 0.2008]) -Greedy action tensor([ 1.5155, -0.4849, -0.5831, 0.2829]) tensor([0.6454, 0.0873, 0.0791, 0.1882]) -Greedy action tensor([ 1.6850, -0.1833, -0.9673, 0.5158]) tensor([0.6513, 0.1005, 0.0459, 0.2023]) -Greedy action tensor([ 1.4126, -0.6566, 0.0240, -0.0411]) tensor([0.6214, 0.0785, 0.1550, 0.1452]) -Greedy action tensor([ 1.9015, -0.7462, -0.3024, 0.4226]) tensor([0.7097, 0.0503, 0.0783, 0.1617]) -Greedy action tensor([ 1.6320, -0.5807, -0.6984, 0.4187]) tensor([0.6650, 0.0727, 0.0647, 0.1976]) -Greedy action tensor([ 1.1003, -0.3106, -0.2519, 0.1156]) tensor([0.5330, 0.1300, 0.1379, 0.1991]) -Greedy action tensor([ 1.5984, -0.1264, -0.9310, 0.2399]) tensor([0.6601, 0.1176, 0.0526, 0.1697]) -Greedy action tensor([ 1.7330, -0.4620, -0.1376, 0.1009]) tensor([0.6845, 0.0762, 0.1054, 0.1338]) -Greedy action tensor([ 2.2927, -0.9212, -0.3702, 0.5431]) tensor([0.7789, 0.0313, 0.0543, 0.1354]) -Greedy action tensor([ 1.6197, 0.4069, -0.7465, 0.5275]) tensor([0.5791, 0.1722, 0.0543, 0.1943]) -Greedy action tensor([ 1.3886, -0.7438, -0.0769, 0.1646]) tensor([0.6084, 0.0721, 0.1405, 0.1789]) -Greedy action tensor([ 1.5220, -0.3498, -0.4875, 0.3794]) tensor([0.6223, 0.0957, 0.0834, 0.1985]) -Greedy action tensor([ 1.4429, -0.1046, -0.3479, 0.2466]) tensor([0.5946, 0.1265, 0.0992, 0.1797]) -Greedy action tensor([ 1.5858, -0.2818, -0.3433, 0.2318]) tensor([0.6419, 0.0992, 0.0932, 0.1657]) -Greedy action tensor([ 1.3393, -0.5311, 0.0078, 0.0408]) tensor([0.5913, 0.0911, 0.1562, 0.1614]) -Greedy action tensor([ 1.1494, -0.2675, 0.0173, 0.1819]) tensor([0.5142, 0.1247, 0.1657, 0.1954]) -Greedy action tensor([ 1.2584, -0.6736, 0.0109, 0.3534]) tensor([0.5445, 0.0789, 0.1564, 0.2203]) -Greedy action tensor([ 1.0364, -0.4564, -0.3511, 0.2944]) tensor([0.5127, 0.1152, 0.1280, 0.2441]) -Greedy action tensor([ 1.8366, -0.9060, -0.2018, 0.3112]) tensor([0.7081, 0.0456, 0.0922, 0.1541]) -Greedy action tensor([ 1.7708, -0.9490, -0.1820, 0.6405]) tensor([0.6533, 0.0430, 0.0927, 0.2110]) -Greedy action tensor([ 1.8110, -1.4539, 0.3230, 0.0655]) tensor([0.6951, 0.0266, 0.1570, 0.1213]) -Greedy action tensor([ 1.2099, -0.3282, -0.1580, 0.1334]) tensor([0.5524, 0.1187, 0.1407, 0.1883]) -Greedy action tensor([ 1.5304, -0.7158, -0.0943, 0.1426]) tensor([0.6442, 0.0681, 0.1269, 0.1608]) -Greedy action tensor([ 1.8978, -0.3019, -0.4409, 0.0892]) tensor([0.7293, 0.0808, 0.0703, 0.1195]) -Greedy action tensor([ 1.9413, -0.7365, -0.1580, 0.4033]) tensor([0.7112, 0.0489, 0.0872, 0.1528]) -Greedy action tensor([ 2.2456, -0.7044, -0.6778, 1.0117]) tensor([0.7157, 0.0375, 0.0385, 0.2084]) -Greedy action tensor([ 1.7500, -0.0977, -0.5909, 0.3854]) tensor([0.6625, 0.1044, 0.0638, 0.1693]) -Greedy action tensor([ 1.7862, -0.6912, -0.1910, 0.2122]) tensor([0.6995, 0.0587, 0.0969, 0.1450]) -Greedy action tensor([ 1.0774, 0.1723, -1.0257, 0.2533]) tensor([0.5088, 0.2058, 0.0621, 0.2232]) -Greedy action tensor([1.5497, 0.5362, 0.1027, 0.1878]) tensor([0.5393, 0.1957, 0.1269, 0.1381]) -Greedy action tensor([ 1.9747, -0.7043, -0.5580, 0.6383]) tensor([0.7088, 0.0486, 0.0563, 0.1863]) -Greedy action tensor([ 1.8288, -0.5809, -0.6105, 0.6672]) tensor([0.6711, 0.0603, 0.0585, 0.2101]) -Greedy action tensor([ 1.4735, -0.4960, -0.0905, 0.1361]) tensor([0.6206, 0.0866, 0.1299, 0.1629]) -Greedy action tensor([ 2.1668, -0.5217, -0.2948, 0.5456]) tensor([0.7402, 0.0503, 0.0631, 0.1463]) -Greedy action tensor([ 1.4616, -0.7101, -0.3758, 0.6634]) tensor([0.5803, 0.0661, 0.0924, 0.2612]) -Greedy action tensor([ 1.2825, -0.3174, -0.4736, 0.1633]) tensor([0.5878, 0.1187, 0.1015, 0.1919]) -Greedy action tensor([ 1.3040, -0.6455, 0.0806, -0.0173]) tensor([0.5871, 0.0836, 0.1727, 0.1566]) -Greedy action tensor([ 1.5315, -0.4865, -0.1712, 0.0370]) tensor([0.6496, 0.0863, 0.1183, 0.1457]) -Greedy action tensor([ 2.0135, -0.5202, -0.4451, 0.6100]) tensor([0.7089, 0.0563, 0.0607, 0.1742]) -Greedy action tensor([ 1.5073, 0.0035, -0.7377, 0.7582]) tensor([0.5553, 0.1234, 0.0588, 0.2625]) -Greedy action tensor([ 2.4737, -0.6950, -0.8973, 0.6596]) tensor([0.8068, 0.0339, 0.0277, 0.1315]) -Greedy action tensor([ 1.2925, -0.2873, 0.0018, 0.0436]) tensor([0.5656, 0.1165, 0.1556, 0.1622]) -Greedy action tensor([ 1.2792, -0.6469, -0.6198, 0.4498]) tensor([0.5774, 0.0841, 0.0865, 0.2520]) -Greedy action tensor([ 1.0486, -0.4661, -0.5360, 0.5174]) tensor([0.4968, 0.1092, 0.1019, 0.2921]) -Greedy action tensor([ 1.1475, -0.8713, 0.1268, 0.2429]) tensor([0.5269, 0.0700, 0.1899, 0.2132]) -Greedy action tensor([ 1.3649, 0.2432, -0.1798, 0.0806]) tensor([0.5507, 0.1794, 0.1175, 0.1525]) -Greedy action tensor([1.6918, 0.3475, 0.0538, 0.2132]) tensor([0.5942, 0.1549, 0.1155, 0.1354]) -Greedy action tensor([ 1.8283, -0.9091, 0.1803, 0.2110]) tensor([0.6870, 0.0445, 0.1322, 0.1363]) -Greedy action tensor([ 1.1598, -0.2418, -0.5077, -0.2498]) tensor([0.5956, 0.1466, 0.1124, 0.1454]) -Greedy action tensor([ 1.4779, -0.3902, -0.2658, 0.3955]) tensor([0.5995, 0.0926, 0.1048, 0.2031]) -Greedy action tensor([ 1.4909, -0.5139, -0.6422, 0.1811]) tensor([0.6566, 0.0884, 0.0778, 0.1772]) -Greedy action tensor([ 1.6163, -0.2815, -0.9214, 0.5452]) tensor([0.6363, 0.0954, 0.0503, 0.2180]) -Greedy action tensor([ 1.5161, 0.3522, -0.5557, 0.6054]) tensor([0.5433, 0.1697, 0.0684, 0.2186]) -Greedy action tensor([ 1.4863, 0.0418, 0.1580, -0.0378]) tensor([0.5819, 0.1372, 0.1542, 0.1267]) -Greedy action tensor([ 1.2294, -0.0776, -0.6121, 0.0938]) tensor([0.5713, 0.1546, 0.0906, 0.1835]) -Greedy action tensor([ 1.3144, -0.3713, -0.9978, 0.4848]) tensor([0.5812, 0.1077, 0.0576, 0.2535]) -Greedy action tensor([ 1.2629, -0.4070, 0.0021, -0.0331]) tensor([0.5730, 0.1079, 0.1624, 0.1568]) -Greedy action tensor([ 0.8961, -0.8295, 0.1093, 0.2836]) tensor([0.4597, 0.0819, 0.2093, 0.2492]) -Greedy action tensor([ 1.2420, -0.3984, -0.6635, 0.0221]) tensor([0.6105, 0.1184, 0.0908, 0.1803]) -Greedy action tensor([ 1.0299, -0.4895, 0.0861, 0.0319]) tensor([0.5059, 0.1107, 0.1969, 0.1865]) -Greedy action tensor([ 1.1024, -0.6776, -0.6900, 1.1068]) tensor([0.4274, 0.0721, 0.0712, 0.4293]) -Greedy action tensor([ 1.3011, -0.1527, 0.2425, 0.1345]) tensor([0.5285, 0.1235, 0.1834, 0.1646]) -Greedy action tensor([ 1.7044, -0.3135, -1.0253, 0.1922]) tensor([0.7049, 0.0937, 0.0460, 0.1554]) -Greedy action tensor([ 1.2041, -0.1918, -0.2528, 0.4532]) tensor([0.5122, 0.1268, 0.1193, 0.2417]) -Greedy action tensor([ 1.3664, -0.4480, -0.6089, -0.0152]) tensor([0.6440, 0.1049, 0.0893, 0.1618]) -Greedy action tensor([ 1.6025, -0.2616, -0.3957, 0.1778]) tensor([0.6531, 0.1013, 0.0885, 0.1571]) -Greedy action tensor([ 1.5209, -0.4493, -0.3490, 0.1094]) tensor([0.6505, 0.0907, 0.1003, 0.1586]) -Greedy action tensor([ 1.4642, 0.0333, -1.0876, 0.6414]) tensor([0.5694, 0.1361, 0.0444, 0.2501]) -Greedy action tensor([2.0186, 0.2478, 0.1658, 0.1176]) tensor([0.6773, 0.1153, 0.1062, 0.1012]) -Greedy action tensor([ 1.3059, -0.7242, -0.0110, 0.1549]) tensor([0.5829, 0.0765, 0.1562, 0.1844]) -Greedy action tensor([ 1.2695, -0.4730, -0.3374, 0.2483]) tensor([0.5761, 0.1009, 0.1155, 0.2075]) -Greedy action tensor([ 1.3148, -0.3373, -0.4538, 0.4432]) tensor([0.5617, 0.1076, 0.0958, 0.2349]) -Greedy action tensor([ 1.6664, -1.0752, -0.0408, 0.5021]) tensor([0.6419, 0.0414, 0.1164, 0.2003]) -Greedy action tensor([ 1.8100, -0.7011, -0.4614, 0.0389]) tensor([0.7383, 0.0599, 0.0762, 0.1256]) -Greedy action tensor([ 1.5314, -0.5922, -0.3748, 0.2895]) tensor([0.6422, 0.0768, 0.0955, 0.1855]) -Greedy action tensor([ 1.8502, -0.0887, -0.4141, 0.1811]) tensor([0.6963, 0.1002, 0.0723, 0.1312]) -Greedy action tensor([ 1.3901, -0.5207, -0.1851, 0.4469]) tensor([0.5733, 0.0848, 0.1187, 0.2232]) -Greedy action tensor([ 1.3132, -0.1383, -0.5689, 0.6644]) tensor([0.5238, 0.1227, 0.0798, 0.2738]) -Greedy action tensor([ 1.2662, -0.1967, -0.7525, 0.3316]) tensor([0.5691, 0.1318, 0.0756, 0.2235]) -Greedy action tensor([ 0.5347, -0.5379, -0.1678, -0.2419]) tensor([0.4353, 0.1489, 0.2156, 0.2002]) -Greedy action tensor([ 1.0128, -0.8051, 0.1964, -0.6446]) tensor([0.5571, 0.0905, 0.2462, 0.1062]) -Greedy action tensor([ 0.8771, -0.5534, -0.1085, -0.2004]) tensor([0.5121, 0.1225, 0.1911, 0.1743]) -Greedy action tensor([ 0.9639, -0.8817, 0.1188, -0.4019]) tensor([0.5427, 0.0857, 0.2331, 0.1385]) -Greedy action tensor([ 0.8148, -0.4752, -0.0762, -0.4265]) tensor([0.5065, 0.1394, 0.2078, 0.1464]) -Greedy action tensor([ 0.3833, 0.0023, 0.0138, -0.1601]) tensor([0.3384, 0.2312, 0.2339, 0.1965]) -Greedy action tensor([ 0.4812, -0.0931, -0.0042, -0.0652]) tensor([0.3626, 0.2042, 0.2232, 0.2100]) -Greedy action tensor([ 1.3617, -0.7316, 0.0769, -0.4873]) tensor([0.6421, 0.0792, 0.1777, 0.1011]) -Greedy action tensor([ 1.2842, -0.7124, 0.0426, -0.7600]) tensor([0.6434, 0.0874, 0.1859, 0.0833]) -Greedy action tensor([ 0.9371, -0.1572, -0.0829, -0.0270]) tensor([0.4815, 0.1612, 0.1736, 0.1836]) -Greedy action tensor([ 0.2332, -0.6164, -0.1846, -0.3451]) tensor([0.3778, 0.1615, 0.2488, 0.2119]) -Greedy action tensor([ 0.6242, -0.1592, 0.0197, -0.4211]) tensor([0.4247, 0.1940, 0.2320, 0.1493]) -Greedy action tensor([ 1.1643, -0.7541, 0.1097, -0.4259]) tensor([0.5886, 0.0864, 0.2050, 0.1200]) -Greedy action tensor([ 0.4999, -0.1442, -0.0584, -0.1970]) tensor([0.3853, 0.2023, 0.2205, 0.1919]) -Greedy action tensor([ 0.7204, -0.4285, -0.1361, -0.4413]) tensor([0.4867, 0.1543, 0.2067, 0.1523]) -Greedy action tensor([ 0.5506, -0.2779, -0.0488, -0.2498]) tensor([0.4107, 0.1793, 0.2255, 0.1844]) -Greedy action tensor([ 0.7498, -0.3353, 0.1828, -0.1776]) tensor([0.4347, 0.1469, 0.2465, 0.1719]) -Greedy action tensor([ 1.0605, -0.6523, -0.2030, -0.6039]) tensor([0.6052, 0.1092, 0.1711, 0.1146]) -Greedy action tensor([ 1.1759, -0.5703, -0.0860, -0.7805]) tensor([0.6254, 0.1091, 0.1771, 0.0884]) -Greedy action tensor([ 0.7502, -0.4462, -0.0786, -0.1839]) tensor([0.4691, 0.1418, 0.2048, 0.1843]) -Greedy action tensor([ 1.1226, -0.4263, -0.2433, -0.5204]) tensor([0.6020, 0.1279, 0.1536, 0.1164]) -Greedy action tensor([ 0.5015, 0.2429, -0.1530, 0.1537]) tensor([0.3336, 0.2575, 0.1733, 0.2356]) -Greedy action tensor([ 0.7503, -0.3855, -0.1761, -0.1227]) tensor([0.4684, 0.1504, 0.1855, 0.1957]) -Greedy action tensor([ 0.4503, -0.0854, -0.1269, -0.1807]) tensor([0.3733, 0.2185, 0.2096, 0.1986]) -Greedy action tensor([ 0.7546, -0.5145, 0.1152, -0.4998]) tensor([0.4776, 0.1342, 0.2520, 0.1362]) -Greedy action tensor([ 0.7045, -0.3649, -0.0398, -0.1693]) tensor([0.4473, 0.1535, 0.2125, 0.1867]) -Greedy action tensor([ 0.3913, -0.2371, -0.1418, -0.1820]) tensor([0.3726, 0.1988, 0.2186, 0.2100]) -Greedy action tensor([ 0.9221, -0.4715, -0.0773, -0.6258]) tensor([0.5467, 0.1357, 0.2013, 0.1163]) -Greedy action tensor([ 1.0482, -0.4084, -0.1445, -0.3325]) tensor([0.5593, 0.1303, 0.1697, 0.1406]) -Greedy action tensor([ 0.6665, -0.3677, 0.0578, -0.2188]) tensor([0.4325, 0.1538, 0.2353, 0.1784]) -Greedy action tensor([ 0.9316, -0.7195, 0.1089, -0.5357]) tensor([0.5372, 0.1030, 0.2360, 0.1238]) -Greedy action tensor([ 0.5151, -0.5896, 0.1127, -0.4670]) tensor([0.4211, 0.1395, 0.2816, 0.1577]) -Greedy action tensor([ 0.9332, -0.6773, -0.1268, -0.2630]) tensor([0.5409, 0.1081, 0.1874, 0.1636]) -Greedy action tensor([ 0.4731, -0.0920, -0.0469, -0.0596]) tensor([0.3637, 0.2067, 0.2162, 0.2135]) -Greedy action tensor([ 0.8011, -0.1357, -0.0552, -0.0273]) tensor([0.4438, 0.1739, 0.1885, 0.1938]) -Greedy action tensor([ 0.6888, -0.7940, 0.1271, -0.5494]) tensor([0.4791, 0.1088, 0.2732, 0.1389]) -Greedy action tensor([ 0.6858, -0.3096, -0.1283, -0.4651]) tensor([0.4697, 0.1736, 0.2081, 0.1486]) -Greedy action tensor([ 0.6300, -0.1399, 0.1313, -0.3960]) tensor([0.4117, 0.1907, 0.2501, 0.1476]) -Greedy action tensor([ 0.8759, -0.2605, -0.1567, -0.1652]) tensor([0.4926, 0.1581, 0.1754, 0.1739]) -Greedy action tensor([ 0.9786, -0.3343, -0.1482, -0.2093]) tensor([0.5269, 0.1418, 0.1707, 0.1606]) -Greedy action tensor([ 0.6663, -0.3350, -0.0590, -0.3227]) tensor([0.4497, 0.1652, 0.2178, 0.1673]) -Greedy action tensor([ 1.0110, -0.7598, -0.1465, -0.1963]) tensor([0.5607, 0.0954, 0.1762, 0.1677]) -Greedy action tensor([ 0.2794, 0.2609, -0.1541, -0.3317]) tensor([0.3152, 0.3094, 0.2043, 0.1711]) -Greedy action tensor([ 0.8987, -0.3283, -0.0772, -0.1967]) tensor([0.4989, 0.1463, 0.1880, 0.1668]) -Greedy action tensor([ 0.9138, -0.7810, 0.0732, -0.5055]) tensor([0.5385, 0.0989, 0.2323, 0.1303]) -Greedy action tensor([ 0.9136, -0.5992, -0.0874, -0.4407]) tensor([0.5417, 0.1193, 0.1991, 0.1398]) -Greedy action tensor([ 0.4470, -0.1238, -0.0392, -0.2869]) tensor([0.3759, 0.2124, 0.2312, 0.1805]) -Greedy action tensor([ 0.1468, 0.7778, -0.1320, 0.0369]) tensor([0.2207, 0.4147, 0.1670, 0.1977]) -Greedy action tensor([ 0.8381, -0.6817, 0.1249, -0.4100]) tensor([0.5010, 0.1096, 0.2455, 0.1438]) -Greedy action tensor([ 1.2983, -1.1158, 0.1783, -0.5769]) tensor([0.6373, 0.0570, 0.2079, 0.0977]) -Greedy action tensor([ 0.5553, 0.0327, -0.0839, -0.4138]) tensor([0.4000, 0.2372, 0.2111, 0.1518]) -Greedy action tensor([ 0.5154, -0.6278, -0.1823, -0.2651]) tensor([0.4396, 0.1402, 0.2188, 0.2014]) -Greedy action tensor([ 0.4942, -0.0816, -0.0510, -0.1476]) tensor([0.3748, 0.2107, 0.2173, 0.1973]) -Greedy action tensor([ 0.8146, -0.5455, 0.0869, -0.3835]) tensor([0.4899, 0.1257, 0.2366, 0.1478]) -Greedy action tensor([ 0.8751, -0.6960, 0.0036, -0.6503]) tensor([0.5424, 0.1127, 0.2269, 0.1180]) -Greedy action tensor([ 0.5445, -0.1169, 0.0646, -0.4637]) tensor([0.4000, 0.2065, 0.2476, 0.1460]) -Greedy action tensor([ 0.8232, -0.6282, -0.1481, -0.2614]) tensor([0.5126, 0.1201, 0.1941, 0.1733]) -Greedy action tensor([ 0.9558, -1.0033, 0.1148, -0.6027]) tensor([0.5609, 0.0791, 0.2419, 0.1181]) -Greedy action tensor([ 0.7332, -0.4242, -0.1120, -0.4193]) tensor([0.4855, 0.1526, 0.2085, 0.1534]) -Greedy action tensor([ 0.9867, -0.6413, 0.2240, -0.2812]) tensor([0.5144, 0.1010, 0.2399, 0.1448]) -Greedy action tensor([ 0.5442, -0.1219, 0.0262, -0.1788]) tensor([0.3854, 0.1980, 0.2296, 0.1870]) -Greedy action tensor([ 0.4636, -0.1622, -0.1706, -0.3229]) tensor([0.3967, 0.2122, 0.2104, 0.1807]) -Greedy action tensor([ 0.5930, -0.3685, -0.1269, -0.1608]) tensor([0.4274, 0.1634, 0.2081, 0.2011]) -Greedy action tensor([ 0.8569, -0.5369, 0.1668, -0.5021]) tensor([0.4984, 0.1237, 0.2499, 0.1280]) -Greedy action tensor([ 0.7891, -0.5559, -0.0470, -0.4910]) tensor([0.5071, 0.1321, 0.2198, 0.1410]) -Greedy action tensor([ 0.6382, -0.3805, -0.0441, -0.0784]) tensor([0.4246, 0.1533, 0.2146, 0.2074]) -Greedy action tensor([ 0.2691, 0.1652, -0.1060, -0.0683]) tensor([0.3028, 0.2729, 0.2081, 0.2161]) -Greedy action tensor([ 0.8466, -0.6614, 0.0613, -0.4800]) tensor([0.5147, 0.1139, 0.2347, 0.1366]) -Greedy action tensor([ 0.7766, -0.5726, 0.0466, -0.4138]) tensor([0.4889, 0.1268, 0.2356, 0.1487]) -Greedy action tensor([ 0.6950, -0.4865, -0.0638, -0.4434]) tensor([0.4772, 0.1464, 0.2235, 0.1529]) -Greedy action tensor([ 0.4893, -0.1141, 0.1010, -0.4614]) tensor([0.3829, 0.2094, 0.2597, 0.1480]) -Greedy action tensor([ 0.7921, 0.0781, -0.0707, 0.0248]) tensor([0.4209, 0.2061, 0.1776, 0.1954]) -Greedy action tensor([ 0.8031, -0.4974, -0.0633, -0.3935]) tensor([0.5012, 0.1365, 0.2107, 0.1515]) -Greedy action tensor([ 0.8193, -0.7337, 0.0627, -0.2848]) tensor([0.4969, 0.1052, 0.2332, 0.1647]) -Greedy action tensor([ 0.2258, 0.4266, -0.0629, 0.0140]) tensor([0.2645, 0.3233, 0.1982, 0.2140]) -Greedy action tensor([ 0.4880, -0.2178, -0.1047, -0.0186]) tensor([0.3775, 0.1864, 0.2087, 0.2275]) -Greedy action tensor([ 0.6811, -0.3162, -0.0476, -0.1069]) tensor([0.4336, 0.1599, 0.2092, 0.1972]) -Greedy action tensor([ 0.8842, -0.5727, 0.0933, -0.2444]) tensor([0.4975, 0.1159, 0.2256, 0.1609]) -Greedy action tensor([ 0.6095, -0.2060, 0.2235, 0.0765]) tensor([0.3691, 0.1633, 0.2509, 0.2166]) -Greedy action tensor([ 1.0476, -0.4879, 0.0483, -0.2438]) tensor([0.5381, 0.1159, 0.1981, 0.1479]) -Greedy action tensor([ 0.9245, -1.0296, -0.0269, -0.4646]) tensor([0.5627, 0.0797, 0.2173, 0.1403]) -Greedy action tensor([ 1.2359, -0.2164, 0.5699, 1.1124]) tensor([0.3800, 0.0889, 0.1952, 0.3359]) -Greedy action tensor([ 1.1592, 0.2335, -0.3779, -0.1747]) tensor([0.5334, 0.2114, 0.1147, 0.1405]) -Greedy action tensor([-1.1006, -1.5753, 0.7034, 0.4280]) tensor([0.0813, 0.0505, 0.4935, 0.3747]) -Greedy action tensor([ 0.1598, -1.1169, 0.0113, -0.4804]) tensor([0.3748, 0.1046, 0.3231, 0.1976]) -Greedy action tensor([-0.7740, -1.1899, -0.0364, -1.1324]) tensor([0.2247, 0.1483, 0.4699, 0.1571]) -Greedy action tensor([ 1.5375, -0.5878, -0.0028, 0.9464]) tensor([0.5298, 0.0633, 0.1135, 0.2934]) -Greedy action tensor([ 0.6460, -0.1798, 0.7518, 0.7201]) tensor([0.2758, 0.1207, 0.3065, 0.2970]) -Greedy action tensor([ 1.3440, -0.3875, 0.0503, 0.5301]) tensor([0.5279, 0.0934, 0.1448, 0.2339]) -Greedy action tensor([ 0.0508, -1.7305, 0.1237, -0.6185]) tensor([0.3628, 0.0611, 0.3903, 0.1858]) -Greedy action tensor([-1.0381, -1.8708, 1.1283, 0.0658]) tensor([0.0759, 0.0330, 0.6622, 0.2289]) -Greedy action tensor([ 0.1270, -1.2923, -0.5754, 1.0341]) tensor([0.2373, 0.0574, 0.1176, 0.5878]) -Greedy action tensor([ 0.5510, -0.5800, 0.1379, -0.4760]) tensor([0.4269, 0.1378, 0.2825, 0.1529]) -Greedy action tensor([ 0.3090, -0.1978, 0.5241, -0.8338]) tensor([0.3163, 0.1905, 0.3922, 0.1009]) -Greedy action tensor([ 0.5504, -0.2024, 0.7833, -0.2666]) tensor([0.3150, 0.1484, 0.3976, 0.1391]) -Greedy action tensor([-0.8678, -0.3382, -0.1174, 0.0111]) tensor([0.1384, 0.2351, 0.2931, 0.3333]) -Greedy action tensor([ 0.3473, -0.7446, -0.6124, -0.4451]) tensor([0.4606, 0.1545, 0.1764, 0.2085]) -Greedy action tensor([-0.9533, -0.6961, 0.9808, -1.0262]) tensor([0.0986, 0.1275, 0.6822, 0.0917]) -Greedy action tensor([ 0.2677, -1.1143, -0.7868, -0.0623]) tensor([0.4313, 0.1083, 0.1503, 0.3101]) -Greedy action tensor([-0.2792, -0.9886, 0.4871, -0.8601]) tensor([0.2379, 0.1170, 0.5119, 0.1331]) -Greedy action tensor([-0.6283, -1.4912, 0.1887, 0.1401]) tensor([0.1712, 0.0722, 0.3875, 0.3691]) -Greedy action tensor([ 1.2533, -0.0351, -0.4793, 0.3881]) tensor([0.5338, 0.1472, 0.0944, 0.2247]) -Greedy action tensor([-0.7331, -1.0278, 0.9124, 0.1416]) tensor([0.1072, 0.0799, 0.5558, 0.2571]) -Greedy action tensor([-0.6473, -0.3539, -0.3721, -0.2026]) tensor([0.1916, 0.2570, 0.2524, 0.2990]) -Greedy action tensor([ 0.5359, 0.7231, 0.6821, -0.4920]) tensor([0.2687, 0.3241, 0.3111, 0.0961]) -Greedy action tensor([-0.7499, -0.6869, 0.3534, -0.6938]) tensor([0.1630, 0.1736, 0.4911, 0.1723]) -Greedy action tensor([-0.1328, 0.2340, 0.4934, -0.6968]) tensor([0.2048, 0.2956, 0.3831, 0.1165]) -Greedy action tensor([-0.9880, -0.5976, 1.5361, -1.0502]) tensor([0.0629, 0.0929, 0.7850, 0.0591]) -Greedy action tensor([0.4964, 0.5541, 0.0461, 0.4739]) tensor([0.2721, 0.2883, 0.1735, 0.2661]) -Greedy action tensor([-0.4681, -0.9425, -0.1551, -1.1332]) tensor([0.2854, 0.1776, 0.3903, 0.1468]) -Greedy action tensor([-0.5831, -0.1550, 0.8058, 0.1890]) tensor([0.1148, 0.1762, 0.4605, 0.2485]) -Greedy action tensor([0.6759, 0.2967, 0.6060, 0.4236]) tensor([0.2947, 0.2017, 0.2747, 0.2289]) -Greedy action tensor([ 0.6074, 0.1770, 0.3025, -0.6072]) tensor([0.3725, 0.2422, 0.2746, 0.1106]) -Greedy action tensor([ 0.2109, -1.6513, -0.5173, -0.7879]) tensor([0.4984, 0.0774, 0.2406, 0.1836]) -Greedy action tensor([ 1.7890, -1.2528, 1.8957, -0.2114]) tensor([0.4356, 0.0208, 0.4847, 0.0589]) -Greedy action tensor([-0.1301, 0.1155, -0.4254, -0.1659]) tensor([0.2508, 0.3206, 0.1867, 0.2419]) -Greedy action tensor([-0.0205, -0.3257, -0.3959, 0.4499]) tensor([0.2485, 0.1831, 0.1707, 0.3977]) -Greedy action tensor([ 0.9444, -0.2710, 0.3234, -0.0106]) tensor([0.4507, 0.1337, 0.2422, 0.1734]) -Greedy action tensor([ 0.7343, 0.8596, -0.1559, -0.6728]) tensor([0.3586, 0.4064, 0.1472, 0.0878]) -Greedy action tensor([-1.0770, -0.0727, -0.5463, -0.6047]) tensor([0.1422, 0.3881, 0.2417, 0.2280]) -Greedy action tensor([-0.9289, -0.5263, -1.1169, -0.4307]) tensor([0.2012, 0.3010, 0.1667, 0.3311]) -Greedy action tensor([-0.4307, -1.6262, 0.0034, -0.3953]) tensor([0.2576, 0.0779, 0.3976, 0.2669]) -Greedy action tensor([ 1.0477, -0.0468, 1.5474, -1.2980]) tensor([0.3248, 0.1087, 0.5354, 0.0311]) -Greedy action tensor([ 0.2635, -0.0272, -0.0553, -0.4147]) tensor([0.3353, 0.2507, 0.2438, 0.1702]) -Greedy action tensor([-0.2168, 0.1556, 0.0995, -0.3126]) tensor([0.2113, 0.3067, 0.2900, 0.1920]) -Greedy action tensor([-0.7144, -0.8603, -0.2722, 0.0736]) tensor([0.1780, 0.1538, 0.2769, 0.3913]) -Greedy action tensor([ 0.4969, -0.9971, -0.0277, -0.2300]) tensor([0.4348, 0.0976, 0.2573, 0.2102]) -Greedy action tensor([ 0.4683, -0.0036, -0.2194, -0.6551]) tensor([0.4079, 0.2545, 0.2051, 0.1326]) -Greedy action tensor([ 0.4357, -1.7289, 0.9009, -0.3108]) tensor([0.3144, 0.0361, 0.5005, 0.1490]) -Greedy action tensor([ 0.9904, -1.0680, -0.8405, 0.9529]) tensor([0.4442, 0.0567, 0.0712, 0.4279]) -Greedy action tensor([-0.3797, -0.1136, 1.0692, -0.7161]) tensor([0.1374, 0.1793, 0.5851, 0.0982]) -Greedy action tensor([ 1.1588, -0.1495, -0.2021, 0.1047]) tensor([0.5333, 0.1441, 0.1367, 0.1859]) -Greedy action tensor([ 1.6524, -0.3881, 0.3553, 0.8980]) tensor([0.5337, 0.0694, 0.1459, 0.2510]) -Greedy action tensor([-0.8558, -1.1241, 2.2424, -0.9426]) tensor([0.0403, 0.0308, 0.8920, 0.0369]) -Greedy action tensor([ 1.1103, -1.6936, 0.4447, 0.9036]) tensor([0.4188, 0.0254, 0.2152, 0.3406]) -Greedy action tensor([-0.7430, -0.9809, -0.9257, -0.2106]) tensor([0.2312, 0.1823, 0.1926, 0.3938]) -Greedy action tensor([ 0.5547, -0.9726, 0.6628, 1.5450]) tensor([0.1991, 0.0432, 0.2218, 0.5359]) -Greedy action tensor([-0.1834, -0.0034, -0.8009, -0.3139]) tensor([0.2767, 0.3312, 0.1492, 0.2428]) -Greedy action tensor([ 0.8521, -0.7307, 1.2727, -0.4224]) tensor([0.3325, 0.0683, 0.5063, 0.0929]) -Greedy action tensor([ 0.9802, -0.7601, -0.5002, 0.9708]) tensor([0.4178, 0.0733, 0.0951, 0.4139]) -Greedy action tensor([-0.1877, -0.1794, 0.5582, -0.3999]) tensor([0.2030, 0.2047, 0.4281, 0.1642]) -Greedy action tensor([ 1.2444, 0.0487, -0.8210, -0.1916]) tensor([0.5998, 0.1815, 0.0760, 0.1427]) -Greedy action tensor([ 0.5880, 0.0548, -0.5563, -0.2029]) tensor([0.4240, 0.2488, 0.1350, 0.1923]) -Greedy action tensor([-0.0490, -0.0926, -1.0398, -0.7636]) tensor([0.3549, 0.3397, 0.1318, 0.1737]) -Greedy action tensor([ 0.2669, -0.3600, 0.6642, -0.1737]) tensor([0.2728, 0.1457, 0.4059, 0.1756]) -Greedy action tensor([-1.2795, -1.0685, -0.6985, 1.0024]) tensor([0.0724, 0.0894, 0.1294, 0.7089]) -Greedy action tensor([-0.0581, -0.5830, -1.1808, -0.0208]) tensor([0.3384, 0.2002, 0.1101, 0.3513]) -Greedy action tensor([ 0.0801, -0.1337, 0.2802, 0.2253]) tensor([0.2389, 0.1929, 0.2919, 0.2763]) -Greedy action tensor([ 0.4133, -1.0700, 1.0912, 0.0137]) tensor([0.2586, 0.0587, 0.5093, 0.1734]) -Greedy action tensor([-0.0414, -0.5514, -0.4004, -0.9517]) tensor([0.3702, 0.2223, 0.2585, 0.1490]) -Greedy action tensor([ 0.1588, 0.4505, 0.4261, -0.1130]) tensor([0.2269, 0.3037, 0.2964, 0.1729]) -Greedy action tensor([ 0.2623, 0.2871, -0.8164, 1.3183]) tensor([0.1908, 0.1956, 0.0649, 0.5486]) -Greedy action tensor([ 0.4245, -0.8886, 1.2154, -0.7231]) tensor([0.2637, 0.0709, 0.5816, 0.0837]) -Greedy action tensor([ 0.4989, 0.6272, -0.1974, 0.6520]) tensor([0.2631, 0.2991, 0.1311, 0.3066]) -Greedy action tensor([ 0.4920, -0.1606, 0.4516, -0.1831]) tensor([0.3344, 0.1741, 0.3212, 0.1703]) -Greedy action tensor([ 0.8255, 0.1946, 0.0617, -0.1720]) tensor([0.4225, 0.2248, 0.1968, 0.1558]) -Greedy action tensor([ 1.8345, -1.1606, 0.3189, 1.0103]) tensor([0.5854, 0.0293, 0.1286, 0.2567]) -Greedy action tensor([ 1.9584, -1.3826, 0.6218, 0.5804]) tensor([0.6451, 0.0228, 0.1695, 0.1626]) -Greedy action tensor([-0.1064, -1.6666, 0.9446, -0.3182]) tensor([0.2049, 0.0431, 0.5862, 0.1658]) -Greedy action tensor([ 0.1522, -0.5581, -1.2340, 0.4618]) tensor([0.3221, 0.1583, 0.0805, 0.4390]) -Greedy action tensor([ 0.5424, -0.8404, 0.3886, 0.6635]) tensor([0.3089, 0.0775, 0.2649, 0.3487]) -Greedy action tensor([-0.0212, -1.1191, 0.3765, -0.7399]) tensor([0.3022, 0.1008, 0.4497, 0.1473]) -Greedy action tensor([ 0.0836, -0.6889, 0.8771, -0.7051]) tensor([0.2423, 0.1119, 0.5357, 0.1101]) -Greedy action tensor([ 1.3361, -0.0091, -0.3957, 0.3498]) tensor([0.5523, 0.1439, 0.0977, 0.2060]) -Greedy action tensor([ 1.3013, -0.1107, -0.3250, 0.3249]) tensor([0.5504, 0.1341, 0.1082, 0.2073]) -Greedy action tensor([ 1.7490, -0.5163, -0.6793, 0.3425]) tensor([0.6959, 0.0722, 0.0614, 0.1705]) -Greedy action tensor([ 1.4023, -0.6407, -0.5229, 0.4738]) tensor([0.5986, 0.0776, 0.0873, 0.2365]) -Greedy action tensor([ 1.5071, -0.2340, -0.4802, 0.5503]) tensor([0.5894, 0.1033, 0.0808, 0.2264]) -Greedy action tensor([ 1.1454, -0.1116, -0.7046, 0.1470]) tensor([0.5524, 0.1572, 0.0869, 0.2036]) -Greedy action tensor([ 1.2115, -0.2944, -0.0473, 0.2990]) tensor([0.5243, 0.1163, 0.1489, 0.2105]) -Greedy action tensor([ 1.3690, -0.2325, -0.6819, 0.2752]) tensor([0.6005, 0.1211, 0.0772, 0.2012]) -Greedy action tensor([ 1.7289, -0.2253, -0.8193, 0.3510]) tensor([0.6794, 0.0962, 0.0531, 0.1713]) -Greedy action tensor([ 1.5765, -0.9711, -0.3273, 0.8496]) tensor([0.5846, 0.0458, 0.0871, 0.2826]) -Greedy action tensor([ 1.0244, -0.0129, -1.1685, 0.2615]) tensor([0.5175, 0.1834, 0.0578, 0.2413]) -Greedy action tensor([ 1.6895, -0.4920, -0.8190, 0.1408]) tensor([0.7108, 0.0802, 0.0579, 0.1511]) -Greedy action tensor([ 2.0223, -0.8622, -0.6393, 0.3153]) tensor([0.7650, 0.0428, 0.0534, 0.1388]) -Greedy action tensor([ 1.1137, 0.2237, -0.3416, 0.6354]) tensor([0.4417, 0.1814, 0.1031, 0.2738]) -Greedy action tensor([ 1.5963, -0.7944, -0.4976, 0.3793]) tensor([0.6619, 0.0606, 0.0815, 0.1960]) -Greedy action tensor([ 1.5346, -0.7399, -0.4178, 0.3081]) tensor([0.6502, 0.0669, 0.0923, 0.1907]) -Greedy action tensor([ 1.4345, 0.0283, -0.6350, 0.2411]) tensor([0.5972, 0.1464, 0.0754, 0.1811]) -Greedy action tensor([ 1.1212, 0.0735, -0.5054, 0.2224]) tensor([0.5117, 0.1795, 0.1006, 0.2083]) -Greedy action tensor([ 1.4107, -0.0244, -0.1848, 0.3665]) tensor([0.5578, 0.1328, 0.1131, 0.1963]) -Greedy action tensor([ 1.2749, -0.3757, -0.2992, 0.2236]) tensor([0.5719, 0.1098, 0.1185, 0.1999]) -Greedy action tensor([ 1.7544, -0.6531, -0.8899, 0.1255]) tensor([0.7368, 0.0663, 0.0524, 0.1445]) -Greedy action tensor([ 1.6537, -0.6257, -0.6412, 0.3670]) tensor([0.6760, 0.0692, 0.0681, 0.1867]) -Greedy action tensor([ 0.7358, -0.2674, 0.0149, -0.1969]) tensor([0.4451, 0.1632, 0.2165, 0.1752]) -Greedy action tensor([ 1.4727, -0.7553, -0.3427, 0.3647]) tensor([0.6247, 0.0673, 0.1017, 0.2063]) -Greedy action tensor([ 1.5200, -0.1503, -0.8716, 0.5655]) tensor([0.6007, 0.1130, 0.0550, 0.2313]) -Greedy action tensor([ 1.1558, -0.5708, -0.3547, 0.5080]) tensor([0.5203, 0.0926, 0.1149, 0.2722]) -Greedy action tensor([ 2.0406, 0.0883, -0.4393, 0.5378]) tensor([0.6905, 0.0980, 0.0578, 0.1536]) -Greedy action tensor([ 2.2744, -0.0512, -0.1049, 0.1972]) tensor([0.7601, 0.0743, 0.0704, 0.0952]) -Greedy action tensor([ 1.7298, -0.1480, -0.2483, 0.5613]) tensor([0.6242, 0.0955, 0.0863, 0.1940]) -Greedy action tensor([ 1.4423, -0.2779, -0.7721, 0.3083]) tensor([0.6211, 0.1112, 0.0678, 0.1998]) -Greedy action tensor([ 0.9879, -0.2280, -0.2925, 0.8056]) tensor([0.4153, 0.1231, 0.1154, 0.3461]) -Greedy action tensor([ 1.3881, -0.0562, -0.2031, 0.1336]) tensor([0.5798, 0.1368, 0.1181, 0.1654]) -Greedy action tensor([ 1.0245, -0.4737, -0.3650, 0.2555]) tensor([0.5165, 0.1154, 0.1287, 0.2394]) -Greedy action tensor([ 1.7872, -0.3545, -0.5983, 0.2986]) tensor([0.6968, 0.0818, 0.0641, 0.1573]) -Greedy action tensor([ 1.4845, -0.7594, -0.1404, 0.0565]) tensor([0.6482, 0.0687, 0.1276, 0.1554]) -Greedy action tensor([ 1.3786, -0.3142, -0.2839, -0.0219]) tensor([0.6172, 0.1136, 0.1171, 0.1521]) -Greedy action tensor([ 1.5292, -0.1821, -0.2072, 0.3432]) tensor([0.6016, 0.1087, 0.1060, 0.1837]) -Greedy action tensor([ 1.1127, -0.4931, 0.0621, 0.1667]) tensor([0.5158, 0.1035, 0.1804, 0.2003]) -Greedy action tensor([ 1.0670, 0.0535, -0.3835, 0.2468]) tensor([0.4907, 0.1781, 0.1151, 0.2161]) -Greedy action tensor([ 2.1749, -1.1606, -0.1624, 0.7145]) tensor([0.7330, 0.0261, 0.0708, 0.1702]) -Greedy action tensor([ 1.5532, -0.3439, -0.0301, 0.4820]) tensor([0.5890, 0.0883, 0.1209, 0.2018]) -Greedy action tensor([ 2.0290, -0.8727, -0.3327, 0.2991]) tensor([0.7539, 0.0414, 0.0711, 0.1337]) -Greedy action tensor([ 1.3076, -0.3760, -0.5129, 0.1578]) tensor([0.6008, 0.1116, 0.0973, 0.1903]) -Greedy action tensor([ 1.6630, -0.8489, 0.0436, 0.2110]) tensor([0.6608, 0.0536, 0.1309, 0.1547]) -Greedy action tensor([ 1.5546, -0.8699, -0.3664, 0.4568]) tensor([0.6375, 0.0564, 0.0934, 0.2127]) -Greedy action tensor([ 1.6435, -0.6281, -0.7280, 0.8467]) tensor([0.6071, 0.0626, 0.0567, 0.2736]) -Greedy action tensor([ 1.8181, -0.6826, 0.0113, 0.2770]) tensor([0.6848, 0.0562, 0.1124, 0.1466]) -Greedy action tensor([ 1.1552, -0.0404, -0.7988, 0.2287]) tensor([0.5434, 0.1644, 0.0770, 0.2152]) -Greedy action tensor([ 1.3434, -0.2029, -0.1348, 0.3881]) tensor([0.5477, 0.1167, 0.1249, 0.2107]) -Greedy action tensor([ 1.2227, -0.2778, 0.0090, 0.1288]) tensor([0.5391, 0.1202, 0.1602, 0.1805]) -Greedy action tensor([ 0.9251, 0.2174, -0.9844, 0.0524]) tensor([0.4857, 0.2394, 0.0720, 0.2029]) -Greedy action tensor([ 1.5400, -0.3927, -0.4112, 0.4915]) tensor([0.6108, 0.0884, 0.0868, 0.2140]) -Greedy action tensor([ 1.3400, -0.6898, -0.1120, 0.1493]) tensor([0.5990, 0.0787, 0.1402, 0.1821]) -Greedy action tensor([ 1.5659, -0.0169, -0.4578, 0.1991]) tensor([0.6279, 0.1290, 0.0830, 0.1601]) -Greedy action tensor([ 1.5612, -0.4744, 0.1478, 0.2814]) tensor([0.6053, 0.0791, 0.1473, 0.1683]) -Greedy action tensor([ 1.4432, 0.0741, -0.3814, 0.3522]) tensor([0.5709, 0.1452, 0.0921, 0.1918]) -Greedy action tensor([ 0.5997, -0.1953, 0.0426, 0.2287]) tensor([0.3684, 0.1664, 0.2110, 0.2542]) -Greedy action tensor([ 1.6620, -0.1447, -0.6960, 0.4165]) tensor([0.6466, 0.1062, 0.0612, 0.1861]) -Greedy action tensor([ 1.4244, -0.7884, -0.0233, 0.1654]) tensor([0.6141, 0.0672, 0.1444, 0.1744]) -Greedy action tensor([ 1.4368, -0.6072, -0.4901, 0.0735]) tensor([0.6532, 0.0846, 0.0951, 0.1671]) -Greedy action tensor([ 1.8827, -0.7442, -0.3573, 0.7940]) tensor([0.6599, 0.0477, 0.0703, 0.2221]) -Greedy action tensor([ 1.0894, -0.2707, -0.2714, 0.6675]) tensor([0.4611, 0.1183, 0.1182, 0.3024]) -Greedy action tensor([ 1.5866, -0.2781, -0.4997, -0.0058]) tensor([0.6745, 0.1045, 0.0837, 0.1372]) -Greedy action tensor([ 1.9284, -0.4935, -0.1207, 0.7059]) tensor([0.6613, 0.0587, 0.0852, 0.1948]) -Greedy action tensor([ 1.9442, -1.0723, -0.5797, 0.9041]) tensor([0.6745, 0.0330, 0.0541, 0.2384]) -Greedy action tensor([ 1.6468, -0.8177, -0.2374, 0.5748]) tensor([0.6332, 0.0539, 0.0962, 0.2168]) -Greedy action tensor([ 1.9157, -0.6317, -0.4890, -0.2258]) tensor([0.7776, 0.0609, 0.0702, 0.0913]) -Greedy action tensor([ 1.9837, -0.6582, -0.5197, 1.1673]) tensor([0.6269, 0.0447, 0.0513, 0.2771]) -Greedy action tensor([ 1.4427, -0.1800, -0.7912, 0.1846]) tensor([0.6295, 0.1242, 0.0674, 0.1789]) -Greedy action tensor([ 1.1636, -0.0369, -0.5040, 0.2104]) tensor([0.5333, 0.1605, 0.1006, 0.2056]) -Greedy action tensor([ 1.3118, -0.2575, -0.5324, 0.4112]) tensor([0.5641, 0.1174, 0.0892, 0.2292]) -Greedy action tensor([ 1.9963, -0.8813, -0.1158, 0.0658]) tensor([0.7562, 0.0426, 0.0915, 0.1097]) -Greedy action tensor([ 0.8607, -0.3299, -0.1388, 0.2450]) tensor([0.4520, 0.1374, 0.1664, 0.2442]) -Greedy action tensor([ 1.2249, -0.3739, -0.1753, 0.6631]) tensor([0.4953, 0.1001, 0.1221, 0.2824]) -Greedy action tensor([ 1.2506, -0.1390, -0.6384, 0.1992]) tensor([0.5715, 0.1424, 0.0864, 0.1997]) -Greedy action tensor([ 1.1554, -0.4805, -0.2119, -0.0728]) tensor([0.5739, 0.1118, 0.1462, 0.1680]) -Greedy action tensor([ 2.2821, -0.6570, -0.5856, 0.2014]) tensor([0.8100, 0.0429, 0.0460, 0.1011]) -Greedy action tensor([ 1.4563, 0.0189, -0.4166, 0.1227]) tensor([0.6043, 0.1436, 0.0929, 0.1593]) -Greedy action tensor([ 1.3931, -0.5874, -0.7068, 0.7079]) tensor([0.5667, 0.0782, 0.0694, 0.2856]) -Greedy action tensor([ 1.7748, -0.1231, -0.8857, 0.5454]) tensor([0.6613, 0.0991, 0.0462, 0.1934]) -Greedy action tensor([ 0.8908, -0.5148, -0.2332, 0.1435]) tensor([0.4893, 0.1200, 0.1590, 0.2317]) -Greedy action tensor([-1.8466, -0.4059, 0.6193, -0.1380]) tensor([0.0444, 0.1876, 0.5229, 0.2452]) -Greedy action tensor([-1.8813, -0.4428, 0.6350, -0.1537]) tensor([0.0431, 0.1815, 0.5332, 0.2423]) -Greedy action tensor([-1.8364, -0.3104, 0.5937, -0.1210]) tensor([0.0444, 0.2043, 0.5045, 0.2469]) -Greedy action tensor([-1.1228, 0.5786, -0.0284, -0.4059]) tensor([0.0868, 0.4760, 0.2594, 0.1778]) -Greedy action tensor([-1.9304, -0.4487, 0.6754, -0.1708]) tensor([0.0404, 0.1778, 0.5471, 0.2347]) -Greedy action tensor([-1.8527, -0.3612, 0.6060, -0.1344]) tensor([0.0440, 0.1957, 0.5148, 0.2455]) -Greedy action tensor([-1.9005, -0.4264, 0.6427, -0.1591]) tensor([0.0420, 0.1835, 0.5346, 0.2398]) -Greedy action tensor([-1.2630, -0.5171, 0.2932, 0.1833]) tensor([0.0827, 0.1743, 0.3919, 0.3511]) -Greedy action tensor([-1.8574, -0.3330, 0.6413, -0.1291]) tensor([0.0428, 0.1963, 0.5202, 0.2407]) -Greedy action tensor([-1.5787, -0.4730, 0.4887, 0.0203]) tensor([0.0593, 0.1791, 0.4684, 0.2932]) -Greedy action tensor([-1.9091, -0.4536, 0.6556, -0.1609]) tensor([0.0416, 0.1784, 0.5409, 0.2391]) -Greedy action tensor([-1.9187, -0.4517, 0.6581, -0.1685]) tensor([0.0412, 0.1788, 0.5425, 0.2374]) -Greedy action tensor([-0.9853, 0.5888, 0.0731, -0.5398]) tensor([0.0974, 0.4700, 0.2806, 0.1520]) -Greedy action tensor([-1.8961, -0.4644, 0.6445, -0.1573]) tensor([0.0424, 0.1776, 0.5384, 0.2415]) -Greedy action tensor([-1.7451, -0.2266, 0.5766, -0.0358]) tensor([0.0470, 0.2145, 0.4789, 0.2596]) -Greedy action tensor([-1.7764, -0.1351, 0.5932, -0.0403]) tensor([0.0444, 0.2291, 0.4746, 0.2519]) -Greedy action tensor([-1.8109, -0.5593, 0.7156, -0.1086]) tensor([0.0445, 0.1554, 0.5562, 0.2439]) -Greedy action tensor([-1.9150, -0.3825, 0.6439, -0.1627]) tensor([0.0411, 0.1904, 0.5313, 0.2372]) -Greedy action tensor([-1.9257, -0.4429, 0.6571, -0.1712]) tensor([0.0409, 0.1804, 0.5419, 0.2367]) -Greedy action tensor([-1.7280, -0.0659, 0.5056, -0.1267]) tensor([0.0486, 0.2563, 0.4539, 0.2412]) -Greedy action tensor([-1.8900, -0.3304, 0.6351, -0.1423]) tensor([0.0417, 0.1983, 0.5207, 0.2393]) -Greedy action tensor([-1.3529, -0.6098, 0.4045, -0.0178]) tensor([0.0787, 0.1656, 0.4565, 0.2992]) -Greedy action tensor([-1.8824, -0.2922, 0.6133, -0.1311]) tensor([0.0420, 0.2061, 0.5097, 0.2421]) -Greedy action tensor([-1.6832, -0.4863, 0.5379, -0.0833]) tensor([0.0541, 0.1791, 0.4988, 0.2680]) -Greedy action tensor([-1.7863, -0.4457, 0.7213, 0.0464]) tensor([0.0428, 0.1637, 0.5258, 0.2677]) -Greedy action tensor([0.1526, 0.3347, 0.9076, 1.7068]) tensor([0.1104, 0.1324, 0.2349, 0.5223]) -Greedy action tensor([-1.4699, -0.5165, 0.4153, -0.0727]) tensor([0.0703, 0.1824, 0.4631, 0.2843]) -Greedy action tensor([-1.4345, -0.3681, 0.4127, 0.1009]) tensor([0.0672, 0.1951, 0.4259, 0.3118]) -Greedy action tensor([-1.8641, -0.4205, 0.6219, -0.1410]) tensor([0.0438, 0.1854, 0.5257, 0.2451]) -Greedy action tensor([-1.8569, -0.3635, 0.6384, -0.1189]) tensor([0.0430, 0.1914, 0.5212, 0.2444]) -Greedy action tensor([-1.9191, -0.4029, 0.6497, -0.1612]) tensor([0.0410, 0.1866, 0.5347, 0.2377]) -Greedy action tensor([-1.8812, -0.4199, 0.6335, -0.1460]) tensor([0.0428, 0.1847, 0.5296, 0.2429]) -Greedy action tensor([-1.4873, -0.4581, 0.6376, 0.3974]) tensor([0.0533, 0.1492, 0.4464, 0.3511]) -Greedy action tensor([-1.9074, -0.4366, 0.6478, -0.1609]) tensor([0.0417, 0.1817, 0.5373, 0.2393]) -Greedy action tensor([-0.8504, 0.8788, 0.0363, 0.2925]) tensor([0.0820, 0.4620, 0.1990, 0.2571]) -Greedy action tensor([-1.8463, -0.4001, 0.6575, -0.1162]) tensor([0.0433, 0.1837, 0.5290, 0.2440]) -Greedy action tensor([-0.4942, -0.3490, 0.1965, 0.2371]) tensor([0.1605, 0.1856, 0.3203, 0.3336]) -Greedy action tensor([-1.8817, -0.3984, 0.6491, -0.1322]) tensor([0.0422, 0.1858, 0.5296, 0.2425]) -Greedy action tensor([-1.9048, -0.4387, 0.6478, -0.1603]) tensor([0.0418, 0.1813, 0.5374, 0.2395]) -Greedy action tensor([-1.9049, -0.4258, 0.6480, -0.1583]) tensor([0.0417, 0.1831, 0.5359, 0.2393]) -Greedy action tensor([-1.9008, -0.4142, 0.6427, -0.1511]) tensor([0.0418, 0.1850, 0.5324, 0.2407]) -Greedy action tensor([-1.9068, -0.4320, 0.6492, -0.1595]) tensor([0.0417, 0.1821, 0.5370, 0.2392]) -Greedy action tensor([-1.8883, -0.2569, 0.6168, -0.1292]) tensor([0.0414, 0.2115, 0.5068, 0.2403]) -Greedy action tensor([-1.9246, -0.4390, 0.6573, -0.1701]) tensor([0.0409, 0.1809, 0.5415, 0.2367]) -Greedy action tensor([-1.6583, -0.3041, 0.5414, 0.0228]) tensor([0.0519, 0.2011, 0.4683, 0.2788]) -Greedy action tensor([-1.2876, -0.4274, 0.5800, -0.4979]) tensor([0.0831, 0.1963, 0.5376, 0.1830]) -Greedy action tensor([-1.6145, -0.3313, 0.5333, 0.0270]) tensor([0.0545, 0.1968, 0.4671, 0.2816]) -Greedy action tensor([-1.8154, -0.2947, 0.5852, -0.0781]) tensor([0.0449, 0.2053, 0.4949, 0.2549]) -Greedy action tensor([-1.6777, -0.4583, 0.5270, -0.0538]) tensor([0.0540, 0.1827, 0.4895, 0.2738]) -Greedy action tensor([-1.9428, -0.4484, 0.6673, -0.1793]) tensor([0.0402, 0.1791, 0.5464, 0.2343]) -Greedy action tensor([-1.8192, -0.4602, 0.6212, -0.0277]) tensor([0.0447, 0.1740, 0.5131, 0.2682]) -Greedy action tensor([-1.6061, -0.5328, 0.4982, 0.0381]) tensor([0.0578, 0.1690, 0.4740, 0.2992]) -Greedy action tensor([-1.9176, -0.4176, 0.6554, -0.1658]) tensor([0.0411, 0.1840, 0.5381, 0.2367]) -Greedy action tensor([-1.9078, -0.4158, 0.6447, -0.1628]) tensor([0.0416, 0.1852, 0.5347, 0.2385]) -Greedy action tensor([-1.9165, -0.4595, 0.6555, -0.1684]) tensor([0.0414, 0.1779, 0.5426, 0.2381]) -Greedy action tensor([-1.8699, -0.3371, 0.6161, -0.1466]) tensor([0.0430, 0.1992, 0.5168, 0.2410]) -Greedy action tensor([-1.7650, -0.3402, 0.5864, -0.0568]) tensor([0.0472, 0.1963, 0.4959, 0.2606]) -Greedy action tensor([-1.8070, -0.4509, 0.5999, -0.1101]) tensor([0.0466, 0.1810, 0.5177, 0.2546]) -Greedy action tensor([-1.7830, -0.4563, 0.5842, -0.0962]) tensor([0.0480, 0.1808, 0.5119, 0.2592]) -Greedy action tensor([-1.7096, 0.0285, 0.6168, -0.4850]) tensor([0.0492, 0.2797, 0.5037, 0.1674]) -Greedy action tensor([-1.8428, -0.4551, 0.6778, -0.1080]) tensor([0.0433, 0.1733, 0.5381, 0.2453]) -Greedy action tensor([-1.7898, -0.3869, 0.5794, -0.0971]) tensor([0.0472, 0.1919, 0.5044, 0.2564]) -Greedy action tensor([-1.7932, -0.4197, 0.5994, -0.0881]) tensor([0.0467, 0.1846, 0.5115, 0.2572]) -Greedy action tensor([-1.1600, 0.6452, 0.2976, 0.2089]) tensor([0.0653, 0.3973, 0.2806, 0.2568]) -Greedy action tensor([-1.9092, -0.4007, 0.6435, -0.1642]) tensor([0.0415, 0.1877, 0.5331, 0.2377]) -Greedy action tensor([-1.9322, -0.4385, 0.6601, -0.1743]) tensor([0.0406, 0.1809, 0.5428, 0.2356]) -Greedy action tensor([-1.9047, -0.4364, 0.6507, -0.1596]) tensor([0.0418, 0.1813, 0.5377, 0.2392]) -Greedy action tensor([ 0.2370, -0.4068, 1.1997, 1.6172]) tensor([0.1232, 0.0647, 0.3225, 0.4896]) -Greedy action tensor([-1.4852, -0.3095, 0.4363, -0.0434]) tensor([0.0654, 0.2118, 0.4465, 0.2764]) -Greedy action tensor([-1.0891, 0.0528, -1.0546, -1.1663]) tensor([0.1641, 0.5141, 0.1699, 0.1519]) -Greedy action tensor([-1.6669, -0.3725, 0.5222, -0.0258]) tensor([0.0534, 0.1947, 0.4765, 0.2754]) -Greedy action tensor([-1.0389, -0.1618, 0.1333, 0.1019]) tensor([0.1024, 0.2463, 0.3308, 0.3205]) -Greedy action tensor([-1.6487, -0.3096, 0.5284, 0.0238]) tensor([0.0527, 0.2012, 0.4652, 0.2809]) -Greedy action tensor([-1.9231, -0.3985, 0.6519, -0.1688]) tensor([0.0408, 0.1875, 0.5359, 0.2359]) -Greedy action tensor([-0.9585, 0.9896, 0.0785, 0.5064]) tensor([0.0659, 0.4627, 0.1860, 0.2854]) -Greedy action tensor([-1.8684, -0.4037, 0.6396, -0.1217]) tensor([0.0428, 0.1853, 0.5261, 0.2457]) -Greedy action tensor([-1.1334, 0.3459, 0.2616, -0.0620]) tensor([0.0810, 0.3556, 0.3269, 0.2365]) -Greedy action tensor([-0.8801, -0.5162, 0.2224, 0.3076]) tensor([0.1145, 0.1648, 0.3450, 0.3757]) -Greedy action tensor([-1.6740, -0.2372, 0.6199, 0.0555]) tensor([0.0482, 0.2027, 0.4776, 0.2716]) -Greedy action tensor([-1.7747, -0.2188, 0.5963, 0.0310]) tensor([0.0444, 0.2103, 0.4752, 0.2700]) -Greedy action tensor([-1.2953, -0.5075, 0.4579, 0.3854]) tensor([0.0697, 0.1533, 0.4025, 0.3744]) -Greedy action tensor([ 0.8018, -0.5108, -0.1143, -0.4207]) tensor([0.5093, 0.1370, 0.2037, 0.1500]) -Greedy action tensor([ 0.7169, -0.5468, 0.0119, -0.2475]) tensor([0.4634, 0.1310, 0.2290, 0.1767]) -Greedy action tensor([ 0.8226, -0.7782, 0.0459, -0.8307]) tensor([0.5396, 0.1089, 0.2482, 0.1033]) -Greedy action tensor([ 0.7331, -0.2193, -0.0707, 0.0318]) tensor([0.4293, 0.1656, 0.1922, 0.2129]) -Greedy action tensor([ 0.6536, -0.4170, -0.1154, -0.1576]) tensor([0.4443, 0.1523, 0.2059, 0.1974]) -Greedy action tensor([ 0.9800, -0.5630, -0.0419, -0.5298]) tensor([0.5572, 0.1191, 0.2005, 0.1231]) -Greedy action tensor([ 0.8670, -0.5436, -0.2932, -0.4674]) tensor([0.5492, 0.1340, 0.1721, 0.1446]) -Greedy action tensor([ 0.4276, 0.2033, 0.1457, -0.0188]) tensor([0.3132, 0.2502, 0.2362, 0.2004]) -Greedy action tensor([ 0.2343, -0.1327, -0.0808, -0.2455]) tensor([0.3288, 0.2278, 0.2399, 0.2035]) -Greedy action tensor([ 0.7943, -0.2350, -0.1805, -0.1996]) tensor([0.4751, 0.1697, 0.1793, 0.1759]) -Greedy action tensor([ 1.0377, -0.4990, -0.0707, -0.2584]) tensor([0.5498, 0.1183, 0.1815, 0.1504]) -Greedy action tensor([ 0.8487, -0.6361, -0.0532, -0.4259]) tensor([0.5230, 0.1185, 0.2123, 0.1462]) -Greedy action tensor([ 0.9801, -0.3194, -0.0052, -0.6042]) tensor([0.5402, 0.1473, 0.2017, 0.1108]) -Greedy action tensor([ 0.1370, -0.1025, -0.2127, -0.3025]) tensor([0.3188, 0.2509, 0.2248, 0.2055]) -Greedy action tensor([ 0.6923, -0.2115, 0.0339, -0.6326]) tensor([0.4569, 0.1851, 0.2365, 0.1215]) -Greedy action tensor([ 1.0116, -0.8973, 0.0961, -0.5365]) tensor([0.5678, 0.0842, 0.2273, 0.1207]) -Greedy action tensor([ 0.6073, -0.3563, -0.0281, -0.1755]) tensor([0.4222, 0.1611, 0.2237, 0.1930]) -Greedy action tensor([ 0.2526, 0.2722, -0.0769, 0.1788]) tensor([0.2726, 0.2780, 0.1961, 0.2532]) -Greedy action tensor([ 8.7181e-01, -5.4644e-01, 1.5073e-04, -3.4647e-01]) tensor([0.5112, 0.1238, 0.2138, 0.1512]) -Greedy action tensor([ 1.1550, -1.0601, 0.2365, -0.7343]) tensor([0.6026, 0.0658, 0.2405, 0.0911]) -Greedy action tensor([ 0.7859, -0.4703, 0.0050, -0.4001]) tensor([0.4882, 0.1390, 0.2236, 0.1491]) -Greedy action tensor([ 0.7504, -0.2814, -0.0684, -0.2990]) tensor([0.4657, 0.1659, 0.2053, 0.1630]) -Greedy action tensor([ 0.7910, -0.7227, -0.0068, -0.2896]) tensor([0.4976, 0.1095, 0.2241, 0.1689]) -Greedy action tensor([ 1.1359, -0.9392, 0.1081, -0.7000]) tensor([0.6087, 0.0764, 0.2178, 0.0971]) -Greedy action tensor([ 0.6322, -0.1739, -0.0792, -0.0627]) tensor([0.4104, 0.1833, 0.2015, 0.2048]) -Greedy action tensor([ 0.9989, -0.4624, 0.0653, -0.3766]) tensor([0.5326, 0.1235, 0.2094, 0.1346]) -Greedy action tensor([ 0.3881, 0.0014, -0.1319, -0.3474]) tensor([0.3632, 0.2467, 0.2160, 0.1741]) -Greedy action tensor([ 0.5831, 0.0978, -0.4283, -0.1006]) tensor([0.4026, 0.2478, 0.1464, 0.2032]) -Greedy action tensor([ 0.6040, -0.2704, 0.0267, -0.1575]) tensor([0.4089, 0.1706, 0.2296, 0.1910]) -Greedy action tensor([ 0.9475, -0.7535, 0.1648, -0.4319]) tensor([0.5287, 0.0965, 0.2417, 0.1331]) -Greedy action tensor([ 0.7913, -0.3261, 0.0173, -0.3672]) tensor([0.4757, 0.1556, 0.2194, 0.1493]) -Greedy action tensor([ 0.4018, -0.3070, -0.1270, -0.1947]) tensor([0.3799, 0.1870, 0.2239, 0.2092]) -Greedy action tensor([ 0.7732, -0.6298, -0.0014, -0.2421]) tensor([0.4833, 0.1188, 0.2227, 0.1751]) -Greedy action tensor([ 0.7334, -0.4707, -0.1102, -0.0448]) tensor([0.4568, 0.1370, 0.1965, 0.2098]) -Greedy action tensor([ 1.0278, -0.5625, 0.1877, -0.7319]) tensor([0.5532, 0.1128, 0.2388, 0.0952]) -Greedy action tensor([ 0.7652, -0.1334, -0.0569, -0.3872]) tensor([0.4624, 0.1883, 0.2032, 0.1461]) -Greedy action tensor([ 0.9158, -0.5678, -0.0596, -0.4681]) tensor([0.5392, 0.1223, 0.2033, 0.1351]) -Greedy action tensor([ 0.6421, -0.2786, -0.0070, -0.2678]) tensor([0.4304, 0.1714, 0.2249, 0.1733]) -Greedy action tensor([ 0.9509, -0.7877, -0.1145, -0.5216]) tensor([0.5715, 0.1005, 0.1969, 0.1311]) -Greedy action tensor([ 1.1075, -0.7421, -0.1023, -0.2591]) tensor([0.5846, 0.0920, 0.1744, 0.1491]) -Greedy action tensor([ 1.0542, -0.4736, -0.0202, -0.0548]) tensor([0.5295, 0.1149, 0.1808, 0.1747]) -Greedy action tensor([ 0.9631, 0.2288, 0.0361, -0.6342]) tensor([0.4812, 0.2309, 0.1904, 0.0974]) -Greedy action tensor([ 0.6947, -0.2971, -0.0524, -0.2152]) tensor([0.4450, 0.1651, 0.2108, 0.1791]) -Greedy action tensor([ 0.4955, -0.4259, 0.1786, -0.5445]) tensor([0.4033, 0.1605, 0.2937, 0.1425]) -Greedy action tensor([ 0.7259, -0.3075, -0.1709, -0.2597]) tensor([0.4680, 0.1665, 0.1909, 0.1747]) -Greedy action tensor([ 0.5418, 0.2303, -0.1242, 0.0328]) tensor([0.3512, 0.2572, 0.1804, 0.2111]) -Greedy action tensor([ 0.8250, -0.4998, -0.1521, -0.1984]) tensor([0.4996, 0.1328, 0.1880, 0.1795]) -Greedy action tensor([ 0.6655, -0.1723, -0.0758, -0.0826]) tensor([0.4197, 0.1816, 0.2000, 0.1987]) -Greedy action tensor([ 0.9635, -0.6148, 0.1517, -0.4466]) tensor([0.5278, 0.1089, 0.2344, 0.1289]) -Greedy action tensor([ 0.9321, -0.4648, 0.0893, -0.1190]) tensor([0.4932, 0.1220, 0.2123, 0.1724]) -Greedy action tensor([ 0.9774, -0.8892, -0.1762, -0.4230]) tensor([0.5825, 0.0901, 0.1838, 0.1436]) -Greedy action tensor([ 0.8646, -0.3172, 0.1067, -0.4950]) tensor([0.4921, 0.1509, 0.2306, 0.1264]) -Greedy action tensor([ 0.7974, -0.6614, -0.2630, -0.3883]) tensor([0.5307, 0.1234, 0.1838, 0.1621]) -Greedy action tensor([ 1.0073, -0.6144, -0.0291, -0.6106]) tensor([0.5712, 0.1128, 0.2026, 0.1133]) -Greedy action tensor([ 0.5183, 0.0120, -0.0367, -0.1132]) tensor([0.3692, 0.2225, 0.2120, 0.1963]) -Greedy action tensor([ 0.3428, -0.3514, -0.0653, -0.2260]) tensor([0.3662, 0.1829, 0.2435, 0.2074]) -Greedy action tensor([ 0.9289, -0.7379, 0.1378, -0.5366]) tensor([0.5338, 0.1008, 0.2420, 0.1233]) -Greedy action tensor([ 0.6631, -0.5148, -0.0416, -0.3856]) tensor([0.4646, 0.1431, 0.2296, 0.1628]) -Greedy action tensor([ 1.2352, -0.9626, 0.1264, -0.7099]) tensor([0.6313, 0.0701, 0.2083, 0.0903]) -Greedy action tensor([ 0.8503, -0.5744, 0.0174, -0.2936]) tensor([0.5015, 0.1207, 0.2180, 0.1598]) -Greedy action tensor([ 0.4276, -0.1347, -0.1159, -0.0604]) tensor([0.3617, 0.2061, 0.2101, 0.2220]) -Greedy action tensor([ 0.2870, -0.2746, -0.1029, -0.1885]) tensor([0.3486, 0.1988, 0.2360, 0.2166]) -Greedy action tensor([ 0.4956, -0.2265, -0.0228, -0.2929]) tensor([0.3944, 0.1916, 0.2348, 0.1793]) -Greedy action tensor([ 0.5357, -0.2209, -0.0754, -0.0713]) tensor([0.3911, 0.1835, 0.2123, 0.2131]) -Greedy action tensor([ 0.5705, 0.0890, -0.0530, 0.0024]) tensor([0.3676, 0.2271, 0.1970, 0.2083]) -Greedy action tensor([ 0.6867, -0.6210, -0.0543, -0.3500]) tensor([0.4758, 0.1287, 0.2268, 0.1687]) -Greedy action tensor([ 0.9419, -0.5183, 0.0425, -0.3174]) tensor([0.5201, 0.1208, 0.2116, 0.1476]) -Greedy action tensor([ 0.6447, -0.4269, 0.0120, -0.5163]) tensor([0.4573, 0.1566, 0.2429, 0.1432]) -Greedy action tensor([ 1.0207, -0.7384, -0.1016, -0.2986]) tensor([0.5666, 0.0976, 0.1844, 0.1515]) -Greedy action tensor([ 0.7124, -0.3925, 0.0324, -0.2692]) tensor([0.4520, 0.1497, 0.2290, 0.1694]) -Greedy action tensor([ 0.7504, -0.1960, 0.0032, -0.1089]) tensor([0.4376, 0.1698, 0.2073, 0.1853]) -Greedy action tensor([ 1.1704, -0.7251, -0.0771, -0.4574]) tensor([0.6121, 0.0920, 0.1758, 0.1202]) -Greedy action tensor([ 0.7529, 0.1513, -0.1473, -0.0838]) tensor([0.4188, 0.2295, 0.1703, 0.1814]) -Greedy action tensor([ 0.7365, -0.4522, -0.0010, -0.5612]) tensor([0.4864, 0.1482, 0.2326, 0.1329]) -Greedy action tensor([ 0.9063, -0.5032, -0.1725, -0.4387]) tensor([0.5421, 0.1324, 0.1843, 0.1412]) -Greedy action tensor([ 9.0258e-01, -4.8113e-01, -7.1442e-04, -1.5963e-01]) tensor([0.4996, 0.1252, 0.2025, 0.1727]) -Greedy action tensor([ 0.7255, -0.5711, -0.0621, -0.3236]) tensor([0.4811, 0.1316, 0.2189, 0.1685]) -Greedy action tensor([ 0.8548, -0.4602, -0.0702, -0.3022]) tensor([0.5052, 0.1356, 0.2003, 0.1588]) -Greedy action tensor([ 0.8948, -0.7456, -0.1287, -0.4717]) tensor([0.5530, 0.1072, 0.1987, 0.1410]) -Greedy action tensor([ 0.7421, -0.0811, -0.0196, -0.3502]) tensor([0.4462, 0.1959, 0.2083, 0.1497]) -Greedy action tensor([ 0.7563, -0.1344, -0.0573, -0.0405]) tensor([0.4339, 0.1781, 0.1924, 0.1956]) -Greedy action tensor([-0.1617, -0.6818, 1.4214, -0.0857]) tensor([0.1326, 0.0788, 0.6456, 0.1430]) -Greedy action tensor([0.5747, 0.5136, 0.0920, 1.5511]) tensor([0.1918, 0.1805, 0.1184, 0.5093]) -Greedy action tensor([-0.2650, -0.1620, -0.2359, 0.9047]) tensor([0.1573, 0.1743, 0.1619, 0.5065]) -Greedy action tensor([-1.4437, 0.1227, 0.3614, -1.1010]) tensor([0.0753, 0.3607, 0.4579, 0.1061]) -Greedy action tensor([ 1.1894, -0.1913, 0.3694, 0.7821]) tensor([0.4242, 0.1067, 0.1868, 0.2823]) -Greedy action tensor([ 1.5103, -0.7021, -0.1551, 1.5154]) tensor([0.4341, 0.0475, 0.0821, 0.4363]) -Greedy action tensor([ 1.5229, 0.4383, 0.6623, -0.3166]) tensor([0.5209, 0.1761, 0.2203, 0.0828]) -Greedy action tensor([-0.4317, -0.1786, 1.4178, -1.2287]) tensor([0.1099, 0.1416, 0.6989, 0.0496]) -Greedy action tensor([ 0.1642, 0.3353, -0.5219, -0.7303]) tensor([0.3227, 0.3829, 0.1625, 0.1319]) -Greedy action tensor([1.7403, 0.8493, 0.5563, 0.5900]) tensor([0.4919, 0.2018, 0.1506, 0.1557]) -Greedy action tensor([-0.2029, -0.7569, -0.1252, 0.4335]) tensor([0.2200, 0.1264, 0.2378, 0.4158]) -Greedy action tensor([ 0.8096, -1.8884, 0.5071, 0.2085]) tensor([0.4247, 0.0286, 0.3139, 0.2328]) -Greedy action tensor([ 0.1089, -1.8578, 0.1688, 1.0783]) tensor([0.2067, 0.0289, 0.2195, 0.5449]) -Greedy action tensor([ 0.3984, -0.6909, -0.3293, 0.3277]) tensor([0.3635, 0.1223, 0.1756, 0.3387]) -Greedy action tensor([ 0.5653, -1.3393, 0.1853, 0.2595]) tensor([0.3892, 0.0579, 0.2662, 0.2867]) -Greedy action tensor([ 1.0934, -0.4459, 0.6921, 0.6302]) tensor([0.3979, 0.0854, 0.2664, 0.2504]) -Greedy action tensor([ 0.1932, -0.0904, -0.2061, 0.1741]) tensor([0.2937, 0.2212, 0.1970, 0.2881]) -Greedy action tensor([ 0.1640, -0.4316, -0.2588, -0.2849]) tensor([0.3515, 0.1938, 0.2303, 0.2244]) -Greedy action tensor([-1.1843, 0.4162, -0.8864, -0.2707]) tensor([0.1021, 0.5059, 0.1375, 0.2545]) -Greedy action tensor([-0.6591, -0.0650, -0.5154, -0.2680]) tensor([0.1837, 0.3327, 0.2121, 0.2716]) -Greedy action tensor([ 0.6431, 0.5723, 0.8509, -0.8284]) tensor([0.2948, 0.2747, 0.3629, 0.0677]) -Greedy action tensor([-1.6855, -0.2660, -0.0796, -0.8273]) tensor([0.0802, 0.3314, 0.3994, 0.1891]) -Greedy action tensor([ 0.8218, -1.1741, 0.2962, -0.5037]) tensor([0.5018, 0.0682, 0.2967, 0.1333]) -Greedy action tensor([ 0.4244, -0.7258, -0.1783, -0.0957]) tensor([0.4068, 0.1288, 0.2226, 0.2418]) -Greedy action tensor([-0.0230, -0.7044, 0.2742, -0.8656]) tensor([0.3046, 0.1541, 0.4101, 0.1312]) -Greedy action tensor([ 0.3133, -0.0720, 0.2915, 0.1146]) tensor([0.2875, 0.1956, 0.2813, 0.2357]) -Greedy action tensor([ 0.2538, -0.7665, 0.1203, 0.3600]) tensor([0.2987, 0.1077, 0.2614, 0.3322]) -Greedy action tensor([ 1.2848, -0.8376, 0.4680, 1.1760]) tensor([0.4068, 0.0487, 0.1797, 0.3648]) -Greedy action tensor([ 0.0599, -1.0937, 0.3549, -0.2387]) tensor([0.2941, 0.0928, 0.3950, 0.2182]) -Greedy action tensor([ 1.1362, -1.2183, -0.1766, 0.3088]) tensor([0.5552, 0.0527, 0.1494, 0.2427]) -Greedy action tensor([-0.0684, 0.2387, 0.4435, -0.9196]) tensor([0.2245, 0.3052, 0.3745, 0.0958]) -Greedy action tensor([-0.3465, -1.5518, -0.0612, -0.4180]) tensor([0.2809, 0.0841, 0.3736, 0.2614]) -Greedy action tensor([ 0.2775, -0.6576, 0.2817, -0.0779]) tensor([0.3228, 0.1267, 0.3242, 0.2263]) -Greedy action tensor([-1.2893, -0.0981, -0.5520, -0.2835]) tensor([0.1097, 0.3610, 0.2293, 0.2999]) -Greedy action tensor([-0.0210, -0.5306, 1.3173, -0.2220]) tensor([0.1605, 0.0964, 0.6118, 0.1313]) -Greedy action tensor([ 0.4451, -2.2243, -0.3915, 0.0788]) tensor([0.4554, 0.0316, 0.1973, 0.3157]) -Greedy action tensor([-0.2430, -1.8563, 1.6862, -0.7719]) tensor([0.1153, 0.0230, 0.7938, 0.0679]) -Greedy action tensor([-0.9270, -0.4754, 1.2902, -0.1114]) tensor([0.0714, 0.1121, 0.6552, 0.1613]) -Greedy action tensor([ 0.0268, -0.3910, 0.2183, -0.3310]) tensor([0.2802, 0.1845, 0.3394, 0.1959]) -Greedy action tensor([ 0.6827, -1.9053, 0.5370, 0.5073]) tensor([0.3599, 0.0271, 0.3111, 0.3020]) -Greedy action tensor([-0.8737, -0.6089, 0.1930, 0.0522]) tensor([0.1293, 0.1685, 0.3758, 0.3264]) -Greedy action tensor([ 0.0959, -0.7152, -1.1015, -0.9336]) tensor([0.4754, 0.2113, 0.1436, 0.1698]) -Greedy action tensor([-0.5349, 0.7972, 0.1524, -0.4945]) tensor([0.1279, 0.4846, 0.2543, 0.1332]) -Greedy action tensor([ 0.7900, -1.5340, -0.0284, -0.0876]) tensor([0.5116, 0.0501, 0.2257, 0.2127]) -Greedy action tensor([-0.3200, -0.7597, -0.6501, -0.2518]) tensor([0.2912, 0.1876, 0.2094, 0.3118]) -Greedy action tensor([-0.2201, -0.3859, 0.3942, -0.5383]) tensor([0.2261, 0.1916, 0.4179, 0.1645]) -Greedy action tensor([ 1.5424, -0.8705, 0.6526, 0.4764]) tensor([0.5421, 0.0485, 0.2227, 0.1867]) -Greedy action tensor([-0.2676, 0.2121, -0.9341, -0.2859]) tensor([0.2433, 0.3930, 0.1249, 0.2388]) -Greedy action tensor([-0.4499, 0.0875, 0.2363, -0.0218]) tensor([0.1605, 0.2746, 0.3187, 0.2462]) -Greedy action tensor([ 0.3096, -0.8501, 0.3379, -0.4398]) tensor([0.3552, 0.1114, 0.3654, 0.1679]) -Greedy action tensor([ 0.7429, -0.4414, 0.8945, -0.2122]) tensor([0.3503, 0.1072, 0.4077, 0.1348]) -Greedy action tensor([-0.1742, -0.7792, -0.5141, 0.5159]) tensor([0.2352, 0.1284, 0.1674, 0.4690]) -Greedy action tensor([ 1.0924, -0.3712, 0.3434, -0.4416]) tensor([0.5209, 0.1205, 0.2463, 0.1123]) -Greedy action tensor([ 1.5047, -0.3008, -0.2191, 0.1770]) tensor([0.6219, 0.1022, 0.1110, 0.1649]) -Greedy action tensor([ 0.0625, -0.4187, 0.1946, -0.4333]) tensor([0.2969, 0.1835, 0.3388, 0.1808]) -Greedy action tensor([ 0.6026, 0.4095, 0.2370, -0.2179]) tensor([0.3380, 0.2787, 0.2345, 0.1488]) -Greedy action tensor([-0.4085, -0.7579, -0.3590, 0.7022]) tensor([0.1726, 0.1217, 0.1814, 0.5242]) -Greedy action tensor([ 1.5814, -0.3327, -0.1337, 1.1276]) tensor([0.5095, 0.0751, 0.0917, 0.3237]) -Greedy action tensor([ 0.7542, 0.0531, -0.1276, 0.4921]) tensor([0.3732, 0.1851, 0.1545, 0.2872]) -Greedy action tensor([ 0.1468, -2.0791, 0.4870, 0.7491]) tensor([0.2304, 0.0249, 0.3238, 0.4209]) -Greedy action tensor([ 0.5287, 0.4833, -0.0430, -0.6647]) tensor([0.3542, 0.3385, 0.2000, 0.1074]) -Greedy action tensor([-1.4427, -0.9547, 0.6941, -0.3177]) tensor([0.0705, 0.1149, 0.5974, 0.2172]) -Greedy action tensor([-1.2831, -1.4844, 1.7370, -0.5529]) tensor([0.0410, 0.0335, 0.8404, 0.0851]) -Greedy action tensor([ 0.3289, -0.7492, 0.8790, -0.5767]) tensor([0.2875, 0.0978, 0.4984, 0.1163]) -Greedy action tensor([-0.4527, -0.4560, 0.4567, 0.5246]) tensor([0.1401, 0.1397, 0.3479, 0.3723]) -Greedy action tensor([ 0.5278, -0.3025, -0.0842, 0.6877]) tensor([0.3173, 0.1383, 0.1721, 0.3723]) -Greedy action tensor([ 0.8541, -0.8812, 0.0946, 0.9203]) tensor([0.3686, 0.0650, 0.1725, 0.3939]) -Greedy action tensor([-0.3017, 0.1871, 0.3460, 0.2837]) tensor([0.1578, 0.2573, 0.3016, 0.2833]) -Greedy action tensor([-0.5652, -0.3526, 0.4012, 0.5938]) tensor([0.1242, 0.1536, 0.3264, 0.3958]) -Greedy action tensor([-0.2141, -0.3109, -0.1948, -0.5734]) tensor([0.2758, 0.2504, 0.2812, 0.1926]) -Greedy action tensor([ 0.6868, -0.6318, 1.3101, 0.1117]) tensor([0.2706, 0.0724, 0.5047, 0.1523]) -Greedy action tensor([-0.2243, -0.3126, -0.1941, -0.2047]) tensor([0.2521, 0.2308, 0.2599, 0.2571]) -Greedy action tensor([ 0.7299, 0.0125, 0.4273, -0.5195]) tensor([0.3978, 0.1942, 0.2940, 0.1141]) -Greedy action tensor([-0.4908, -0.4688, 2.1175, -0.1480]) tensor([0.0588, 0.0601, 0.7983, 0.0828]) -Greedy action tensor([ 0.1854, -0.1435, -0.1043, -0.5981]) tensor([0.3419, 0.2460, 0.2559, 0.1562]) -Greedy action tensor([-0.7976, -0.5977, -0.5505, 0.0869]) tensor([0.1688, 0.2062, 0.2161, 0.4089]) -Greedy action tensor([-0.2155, -0.1207, 0.6307, -1.0721]) tensor([0.2060, 0.2265, 0.4801, 0.0875]) -Greedy action tensor([-0.2739, -0.9392, -0.3114, 0.6107]) tensor([0.2041, 0.1049, 0.1966, 0.4944]) -Greedy action tensor([ 1.1798, 0.5355, 0.7086, -0.2637]) tensor([0.4192, 0.2201, 0.2617, 0.0990]) -Greedy action tensor([ 0.5625, -1.4311, 0.3408, 0.2405]) tensor([0.3756, 0.0512, 0.3010, 0.2722]) -Greedy action tensor([ 0.0889, -0.2456, 0.2009, -0.7545]) tensor([0.3063, 0.2192, 0.3426, 0.1318]) -Greedy action tensor([ 0.1826, 0.8917, 0.7667, -0.4310]) tensor([0.1863, 0.3786, 0.3342, 0.1009]) -Greedy action tensor([ 1.3577, -0.5744, -0.2077, 0.6471]) tensor([0.5419, 0.0785, 0.1133, 0.2663]) -Greedy action tensor([ 1.5575, -0.5116, -0.4649, 0.4540]) tensor([0.6288, 0.0794, 0.0832, 0.2086]) -Greedy action tensor([ 1.7524, -0.3979, -0.5403, 1.1350]) tensor([0.5692, 0.0663, 0.0575, 0.3070]) -Greedy action tensor([ 1.6709, 0.0054, -0.8523, 0.2671]) tensor([0.6601, 0.1248, 0.0529, 0.1622]) -Greedy action tensor([ 2.0192, -1.1482, 0.0180, 0.5260]) tensor([0.7133, 0.0300, 0.0964, 0.1602]) -Greedy action tensor([ 1.7831, -0.4550, -0.5741, 0.1248]) tensor([0.7185, 0.0766, 0.0680, 0.1368]) -Greedy action tensor([ 2.3077, -1.8439, -0.1883, 0.8419]) tensor([0.7524, 0.0118, 0.0620, 0.1737]) -Greedy action tensor([ 1.7965, -0.4475, -0.7491, 0.1981]) tensor([0.7212, 0.0765, 0.0566, 0.1458]) -Greedy action tensor([ 1.5276, -0.4966, -0.1671, 0.4361]) tensor([0.6055, 0.0800, 0.1112, 0.2033]) -Greedy action tensor([ 1.4644, -0.0328, -0.1203, -0.1078]) tensor([0.6111, 0.1367, 0.1253, 0.1269]) -Greedy action tensor([ 1.1667, -0.1276, -0.8444, 0.4923]) tensor([0.5215, 0.1429, 0.0698, 0.2657]) -Greedy action tensor([ 1.0235, -0.4917, -0.2382, 0.2632]) tensor([0.5075, 0.1115, 0.1437, 0.2373]) -Greedy action tensor([ 2.3576, -1.3598, -0.3282, 0.6676]) tensor([0.7831, 0.0190, 0.0534, 0.1445]) -Greedy action tensor([ 2.0592, -0.6694, -0.4892, 0.4550]) tensor([0.7437, 0.0486, 0.0582, 0.1495]) -Greedy action tensor([ 1.7203, -1.1298, -0.5097, 0.6166]) tensor([0.6680, 0.0386, 0.0718, 0.2215]) -Greedy action tensor([ 0.6516, -0.3153, -0.0455, 0.1507]) tensor([0.4025, 0.1531, 0.2005, 0.2439]) -Greedy action tensor([ 1.4478, 0.3414, -0.3810, 0.0104]) tensor([0.5784, 0.1913, 0.0929, 0.1374]) -Greedy action tensor([ 1.5268, -0.1244, -0.7251, 0.6486]) tensor([0.5839, 0.1120, 0.0614, 0.2426]) -Greedy action tensor([ 2.0077, -0.2405, -0.6817, 0.4127]) tensor([0.7265, 0.0767, 0.0493, 0.1474]) -Greedy action tensor([ 1.3044, -0.6465, -0.2843, -0.1796]) tensor([0.6357, 0.0904, 0.1298, 0.1441]) -Greedy action tensor([ 1.3858, -0.6301, -0.8672, 0.2075]) tensor([0.6468, 0.0862, 0.0680, 0.1991]) -Greedy action tensor([ 1.0407, -0.4160, -0.0063, 0.3487]) tensor([0.4797, 0.1118, 0.1684, 0.2401]) -Greedy action tensor([ 1.3386, -0.4015, -0.7753, 0.0620]) tensor([0.6348, 0.1114, 0.0767, 0.1771]) -Greedy action tensor([ 2.6578, -0.2694, -0.8904, 0.5743]) tensor([0.8286, 0.0444, 0.0238, 0.1032]) -Greedy action tensor([ 2.0579, -0.5072, -0.5205, 0.0748]) tensor([0.7749, 0.0596, 0.0588, 0.1067]) -Greedy action tensor([ 1.5010, -0.5843, -0.0112, 0.2098]) tensor([0.6174, 0.0767, 0.1361, 0.1698]) -Greedy action tensor([1.8775, 0.0855, 0.3478, 0.1929]) tensor([0.6375, 0.1062, 0.1381, 0.1183]) -Greedy action tensor([ 1.3983, -0.6449, -0.5289, 0.2554]) tensor([0.6273, 0.0813, 0.0913, 0.2000]) -Greedy action tensor([ 2.1717, -0.6335, -0.4230, 0.6707]) tensor([0.7363, 0.0445, 0.0550, 0.1641]) -Greedy action tensor([ 1.7135, -0.4831, -0.2891, 0.6654]) tensor([0.6263, 0.0696, 0.0845, 0.2196]) -Greedy action tensor([ 1.0479, -0.5383, -0.5147, 0.6035]) tensor([0.4865, 0.0996, 0.1020, 0.3119]) -Greedy action tensor([ 1.8840, -0.4371, -0.2864, 0.7123]) tensor([0.6570, 0.0645, 0.0750, 0.2036]) -Greedy action tensor([ 2.1395, -1.5419, -0.1714, 0.3636]) tensor([0.7730, 0.0195, 0.0767, 0.1309]) -Greedy action tensor([ 1.3353, -0.0835, -0.4716, -0.2037]) tensor([0.6170, 0.1493, 0.1013, 0.1324]) -Greedy action tensor([ 2.0100, -0.3712, -0.1891, 0.3145]) tensor([0.7211, 0.0667, 0.0800, 0.1323]) -Greedy action tensor([ 1.3693, -0.4696, -0.0672, -0.0459]) tensor([0.6099, 0.0970, 0.1450, 0.1481]) -Greedy action tensor([ 1.4909, -0.4396, -0.7489, 0.4137]) tensor([0.6281, 0.0911, 0.0669, 0.2139]) -Greedy action tensor([ 1.7686, 0.0512, -0.5365, -0.0033]) tensor([0.6900, 0.1239, 0.0688, 0.1173]) -Greedy action tensor([ 1.3412, -0.0083, -0.8791, 0.1711]) tensor([0.5958, 0.1545, 0.0647, 0.1849]) -Greedy action tensor([ 1.5857, -0.6710, -0.2557, 0.5987]) tensor([0.6112, 0.0640, 0.0969, 0.2278]) -Greedy action tensor([ 2.0799, -0.0970, -0.6882, 0.4490]) tensor([0.7289, 0.0827, 0.0458, 0.1427]) -Greedy action tensor([ 1.5518, -0.6823, -0.7241, 0.8128]) tensor([0.5926, 0.0635, 0.0609, 0.2830]) -Greedy action tensor([ 1.4561, -0.4746, -0.1477, 0.1688]) tensor([0.6165, 0.0894, 0.1240, 0.1701]) -Greedy action tensor([ 1.6025, -0.3561, -0.6349, 0.2145]) tensor([0.6678, 0.0942, 0.0713, 0.1667]) -Greedy action tensor([ 1.4488, -0.9681, -0.0914, -0.1660]) tensor([0.6656, 0.0594, 0.1427, 0.1324]) -Greedy action tensor([ 1.3881, -0.2612, 0.0652, 0.3619]) tensor([0.5504, 0.1058, 0.1466, 0.1972]) -Greedy action tensor([ 1.4241, -0.6052, -0.2435, 0.4023]) tensor([0.5952, 0.0782, 0.1123, 0.2143]) -Greedy action tensor([ 1.5996, 0.2571, -0.0398, 0.0031]) tensor([0.6032, 0.1575, 0.1171, 0.1222]) -Greedy action tensor([ 1.2211, -0.5814, -0.3275, 0.5843]) tensor([0.5246, 0.0865, 0.1115, 0.2775]) -Greedy action tensor([ 1.3549, -0.7294, -0.1678, 0.1510]) tensor([0.6088, 0.0757, 0.1328, 0.1827]) -Greedy action tensor([ 1.1450, -0.6346, -0.5096, 0.5861]) tensor([0.5177, 0.0873, 0.0990, 0.2960]) -Greedy action tensor([ 0.5096, -0.1958, -0.3789, 0.0998]) tensor([0.3893, 0.1923, 0.1601, 0.2584]) -Greedy action tensor([ 1.4551, -0.7341, -0.6354, 0.9926]) tensor([0.5361, 0.0600, 0.0663, 0.3376]) -Greedy action tensor([ 1.2641, -0.1646, -0.4951, 0.0317]) tensor([0.5871, 0.1407, 0.1011, 0.1712]) -Greedy action tensor([ 2.0404, -0.1957, -0.3753, 0.4146]) tensor([0.7179, 0.0767, 0.0641, 0.1412]) -Greedy action tensor([ 1.4547, -0.2276, -0.3261, 0.2582]) tensor([0.6036, 0.1122, 0.1017, 0.1824]) -Greedy action tensor([ 1.0473, -0.2642, -0.4410, 0.2101]) tensor([0.5187, 0.1397, 0.1171, 0.2245]) -Greedy action tensor([ 2.0604, -0.9768, -0.2749, 0.6605]) tensor([0.7187, 0.0345, 0.0696, 0.1773]) -Greedy action tensor([ 2.3815, -0.1407, -0.7763, 0.4962]) tensor([0.7846, 0.0630, 0.0334, 0.1191]) -Greedy action tensor([ 1.4272, -0.3343, -0.2554, 0.2788]) tensor([0.5971, 0.1026, 0.1110, 0.1894]) -Greedy action tensor([ 1.5447, -0.7717, -0.1694, 0.3511]) tensor([0.6322, 0.0624, 0.1139, 0.1916]) -Greedy action tensor([ 2.1375, -1.1663, -0.1928, 0.2044]) tensor([0.7820, 0.0287, 0.0761, 0.1132]) -Greedy action tensor([ 1.4104, 0.0448, -0.7467, 0.5046]) tensor([0.5634, 0.1438, 0.0652, 0.2277]) -Greedy action tensor([ 1.7042, -0.4505, -0.2085, 0.5292]) tensor([0.6359, 0.0737, 0.0939, 0.1964]) -Greedy action tensor([ 1.1632, -0.4745, -0.3072, -0.0170]) tensor([0.5775, 0.1123, 0.1327, 0.1774]) -Greedy action tensor([ 1.4692, -0.6528, -0.2877, 0.4647]) tensor([0.6029, 0.0722, 0.1041, 0.2208]) -Greedy action tensor([ 1.6736, -1.1318, -0.1637, 0.3939]) tensor([0.6676, 0.0404, 0.1063, 0.1857]) -Greedy action tensor([ 1.4716, -0.1020, 0.0043, 0.6310]) tensor([0.5349, 0.1109, 0.1233, 0.2308]) -Greedy action tensor([ 1.9529, -1.1097, -0.4940, 0.2520]) tensor([0.7600, 0.0355, 0.0658, 0.1387]) -Greedy action tensor([ 1.6957, -0.4454, -0.5143, 0.8611]) tensor([0.6019, 0.0707, 0.0660, 0.2613]) -Greedy action tensor([ 1.8753, -0.6421, -0.8193, 0.5181]) tensor([0.7114, 0.0574, 0.0481, 0.1831]) -Greedy action tensor([ 2.1189, -1.0014, -0.3677, 0.5559]) tensor([0.7480, 0.0330, 0.0622, 0.1567]) -Greedy action tensor([ 1.8815, 0.1457, -0.6662, 0.6097]) tensor([0.6515, 0.1148, 0.0510, 0.1826]) -Greedy action tensor([ 1.1801, -0.9430, -0.2050, -0.1618]) tensor([0.6130, 0.0734, 0.1534, 0.1602]) -Greedy action tensor([ 1.3983, -0.4635, -0.3720, 0.2044]) tensor([0.6140, 0.0954, 0.1045, 0.1861]) -Greedy action tensor([ 2.1693, -0.5633, -0.4373, 0.6649]) tensor([0.7348, 0.0478, 0.0542, 0.1632]) -Greedy action tensor([ 2.6848, -1.1105, -0.3444, 1.0751]) tensor([0.7869, 0.0177, 0.0381, 0.1573]) -Greedy action tensor([ 1.3342, -0.0679, -0.5906, 0.3538]) tensor([0.5659, 0.1392, 0.0826, 0.2123]) -Greedy action tensor([ 1.2368, -0.4792, -0.5355, 0.1219]) tensor([0.5961, 0.1072, 0.1013, 0.1955]) -Greedy action tensor([ 1.4501, -0.4223, -0.5267, 0.0313]) tensor([0.6518, 0.1002, 0.0903, 0.1577]) -Greedy action tensor([ 1.2789, -0.9864, -0.1070, 0.6369]) tensor([0.5319, 0.0552, 0.1330, 0.2799]) -Greedy action tensor([-1.6018, -0.3754, 0.5446, -0.0185]) tensor([0.0561, 0.1911, 0.4797, 0.2731]) -Greedy action tensor([-1.9144, -0.4573, 0.6471, -0.1656]) tensor([0.0417, 0.1789, 0.5399, 0.2395]) -Greedy action tensor([-1.7087, -0.3959, 0.6829, 0.1734]) tensor([0.0450, 0.1673, 0.4920, 0.2956]) -Greedy action tensor([-1.8424, -0.4739, 0.6417, -0.1026]) tensor([0.0442, 0.1738, 0.5302, 0.2519]) -Greedy action tensor([-1.9378, -0.4435, 0.6659, -0.1763]) tensor([0.0403, 0.1797, 0.5451, 0.2348]) -Greedy action tensor([-1.9404, -0.4536, 0.6715, -0.1759]) tensor([0.0402, 0.1777, 0.5475, 0.2346]) -Greedy action tensor([-1.7859, -0.4445, 0.5968, -0.0838]) tensor([0.0473, 0.1809, 0.5124, 0.2594]) -Greedy action tensor([-1.7699, -0.5282, 0.5800, -0.1182]) tensor([0.0496, 0.1717, 0.5200, 0.2587]) -Greedy action tensor([-1.9221, -0.4403, 0.6587, -0.1686]) tensor([0.0410, 0.1805, 0.5417, 0.2368]) -Greedy action tensor([ 0.0533, -0.0478, 0.8265, 1.5768]) tensor([0.1155, 0.1044, 0.2502, 0.5299]) -Greedy action tensor([-1.5783, 0.3466, 0.6018, -0.6403]) tensor([0.0519, 0.3560, 0.4594, 0.1327]) -Greedy action tensor([-1.7281, -0.4676, 0.5642, -0.0735]) tensor([0.0509, 0.1794, 0.5035, 0.2661]) -Greedy action tensor([-1.0086, -0.5738, 0.2114, 0.3002]) tensor([0.1038, 0.1603, 0.3516, 0.3843]) -Greedy action tensor([-0.8998, -0.5422, 0.2319, 0.2825]) tensor([0.1137, 0.1626, 0.3527, 0.3710]) -Greedy action tensor([-1.9071, -0.4603, 0.6457, -0.1594]) tensor([0.0420, 0.1783, 0.5389, 0.2409]) -Greedy action tensor([-1.8637, -0.4463, 0.6314, -0.1373]) tensor([0.0437, 0.1804, 0.5301, 0.2458]) -Greedy action tensor([-1.7263, -0.0462, 0.5295, -0.0528]) tensor([0.0471, 0.2526, 0.4493, 0.2510]) -Greedy action tensor([-1.8845, -0.4300, 0.6423, -0.1828]) tensor([0.0430, 0.1840, 0.5375, 0.2355]) -Greedy action tensor([-0.3735, 1.0787, 0.0336, 0.3066]) tensor([0.1143, 0.4883, 0.1717, 0.2256]) -Greedy action tensor([-1.8775, -0.3681, 0.6253, -0.1512]) tensor([0.0428, 0.1937, 0.5230, 0.2406]) -Greedy action tensor([-1.9396, -0.4514, 0.6663, -0.1765]) tensor([0.0403, 0.1786, 0.5461, 0.2351]) -Greedy action tensor([-1.8655, -0.4334, 0.6234, -0.1408]) tensor([0.0438, 0.1833, 0.5273, 0.2456]) -Greedy action tensor([-1.9000, -0.4714, 0.7047, -0.0770]) tensor([0.0402, 0.1676, 0.5435, 0.2487]) -Greedy action tensor([-1.8766, -0.3748, 0.6270, -0.1468]) tensor([0.0428, 0.1922, 0.5235, 0.2415]) -Greedy action tensor([-1.6474, -0.4486, 0.6243, 0.1468]) tensor([0.0499, 0.1656, 0.4841, 0.3003]) -Greedy action tensor([-1.5782, -0.6257, 0.6556, -0.0141]) tensor([0.0565, 0.1464, 0.5273, 0.2699]) -Greedy action tensor([-1.8668, -0.4364, 0.6265, -0.1383]) tensor([0.0436, 0.1824, 0.5281, 0.2458]) -Greedy action tensor([-1.4899, 0.6778, 0.3450, -0.0428]) tensor([0.0494, 0.4314, 0.3093, 0.2099]) -Greedy action tensor([-1.4380, -0.2865, 0.3571, 0.1010]) tensor([0.0674, 0.2131, 0.4056, 0.3139]) -Greedy action tensor([-1.8961, -0.4523, 0.6460, -0.1523]) tensor([0.0423, 0.1791, 0.5370, 0.2417]) -Greedy action tensor([-0.7190, 0.7003, 0.1055, -0.0408]) tensor([0.1065, 0.4405, 0.2430, 0.2099]) -Greedy action tensor([-1.2742, -0.0997, 0.9518, 0.9317]) tensor([0.0443, 0.1433, 0.4103, 0.4021]) -Greedy action tensor([-1.9119, -0.4535, 0.6586, -0.1612]) tensor([0.0414, 0.1782, 0.5417, 0.2387]) -Greedy action tensor([-1.7712, -0.4479, 0.5983, -0.0810]) tensor([0.0479, 0.1800, 0.5123, 0.2597]) -Greedy action tensor([-1.7552, -0.4406, 0.5708, -0.1193]) tensor([0.0498, 0.1853, 0.5094, 0.2555]) -Greedy action tensor([-1.9047, -0.3840, 0.6469, -0.1566]) tensor([0.0414, 0.1895, 0.5312, 0.2379]) -Greedy action tensor([-1.6370, 0.2048, 0.4075, -0.0123]) tensor([0.0497, 0.3137, 0.3842, 0.2525]) -Greedy action tensor([-1.8510, -0.4540, 0.6403, -0.1308]) tensor([0.0440, 0.1781, 0.5319, 0.2460]) -Greedy action tensor([-1.8722, -0.4251, 0.6299, -0.1270]) tensor([0.0431, 0.1833, 0.5265, 0.2470]) -Greedy action tensor([-1.8989, -0.4528, 0.6413, -0.1681]) tensor([0.0424, 0.1801, 0.5380, 0.2395]) -Greedy action tensor([-1.8839, -0.4268, 0.6359, -0.1513]) tensor([0.0428, 0.1837, 0.5316, 0.2419]) -Greedy action tensor([-1.9157, -0.4263, 0.6556, -0.1629]) tensor([0.0412, 0.1826, 0.5386, 0.2376]) -Greedy action tensor([-1.4252, 0.3021, 0.4357, 0.3302]) tensor([0.0531, 0.2986, 0.3413, 0.3071]) -Greedy action tensor([-1.8552, -0.4419, 0.6244, -0.1391]) tensor([0.0442, 0.1818, 0.5280, 0.2460]) -Greedy action tensor([-1.7889, -0.4704, 0.5964, -0.1011]) tensor([0.0476, 0.1779, 0.5171, 0.2574]) -Greedy action tensor([-0.5057, -0.5978, 0.1419, 0.6708]) tensor([0.1415, 0.1291, 0.2705, 0.4589]) -Greedy action tensor([-0.8516, 0.7658, 0.1943, 0.4577]) tensor([0.0794, 0.4003, 0.2260, 0.2942]) -Greedy action tensor([-1.2885, -0.3003, 0.5701, 0.1046]) tensor([0.0708, 0.1901, 0.4540, 0.2851]) -Greedy action tensor([-1.6082, 0.2709, 0.4197, -0.1098]) tensor([0.0510, 0.3337, 0.3873, 0.2281]) -Greedy action tensor([-0.2722, -0.0270, 0.6161, 0.8521]) tensor([0.1284, 0.1641, 0.3122, 0.3953]) -Greedy action tensor([-1.9162, -0.4232, 0.6605, -0.1560]) tensor([0.0410, 0.1823, 0.5387, 0.2381]) -Greedy action tensor([-1.7087, -0.3347, 0.5715, -0.1680]) tensor([0.0516, 0.2037, 0.5041, 0.2407]) -Greedy action tensor([-1.6022, -0.1388, 0.6353, 0.0611]) tensor([0.0501, 0.2164, 0.4693, 0.2643]) -Greedy action tensor([-1.7668, -0.4403, 0.6393, -0.0086]) tensor([0.0462, 0.1740, 0.5120, 0.2679]) -Greedy action tensor([-1.7291, -0.0997, 0.5006, -0.0341]) tensor([0.0480, 0.2447, 0.4460, 0.2613]) -Greedy action tensor([-1.7890, -0.3153, 0.6104, -0.0686]) tensor([0.0455, 0.1987, 0.5015, 0.2543]) -Greedy action tensor([-1.7332, -0.2392, 0.5787, -0.0388]) tensor([0.0476, 0.2122, 0.4808, 0.2593]) -Greedy action tensor([-1.4632, -0.4472, 0.4335, -0.0160]) tensor([0.0681, 0.1882, 0.4540, 0.2896]) -Greedy action tensor([-0.9358, -0.5582, 0.2603, 0.0418]) tensor([0.1187, 0.1732, 0.3926, 0.3155]) -Greedy action tensor([-0.8009, -0.3300, 1.0699, 1.4118]) tensor([0.0548, 0.0878, 0.3561, 0.5013]) -Greedy action tensor([-1.7638, -0.4598, 0.6963, 0.0660]) tensor([0.0442, 0.1629, 0.5174, 0.2755]) -Greedy action tensor([-1.8718, -0.4300, 0.6261, -0.1542]) tensor([0.0436, 0.1842, 0.5296, 0.2427]) -Greedy action tensor([-1.7957, -0.3362, 0.6033, -0.0742]) tensor([0.0456, 0.1964, 0.5026, 0.2553]) -Greedy action tensor([-1.7157, -0.5013, 0.5538, -0.0732]) tensor([0.0521, 0.1753, 0.5036, 0.2690]) -Greedy action tensor([-1.9156, -0.4477, 0.6586, -0.1632]) tensor([0.0413, 0.1791, 0.5415, 0.2381]) -Greedy action tensor([-1.9211, -0.4496, 0.6580, -0.1666]) tensor([0.0411, 0.1791, 0.5421, 0.2377]) -Greedy action tensor([-1.6287, -0.5496, 0.5200, -0.1177]) tensor([0.0587, 0.1726, 0.5030, 0.2658]) -Greedy action tensor([-1.9092, -0.4436, 0.6509, -0.1612]) tensor([0.0416, 0.1803, 0.5388, 0.2392]) -Greedy action tensor([-1.9202, -0.4423, 0.6509, -0.1681]) tensor([0.0413, 0.1809, 0.5398, 0.2380]) -Greedy action tensor([-1.9038, -0.4390, 0.6493, -0.1593]) tensor([0.0418, 0.1810, 0.5376, 0.2395]) -Greedy action tensor([-1.7831, -0.3749, 0.6537, -0.0572]) tensor([0.0452, 0.1847, 0.5165, 0.2537]) -Greedy action tensor([-1.4596, 0.3388, 0.4238, 0.0884]) tensor([0.0546, 0.3297, 0.3590, 0.2567]) -Greedy action tensor([-1.6364, -0.3386, 0.5009, -0.0768]) tensor([0.0559, 0.2046, 0.4737, 0.2658]) -Greedy action tensor([-1.3786, -0.4521, 0.3827, 0.0828]) tensor([0.0732, 0.1849, 0.4261, 0.3157]) -Greedy action tensor([-1.7622, -0.3617, 0.6454, -0.0562]) tensor([0.0461, 0.1872, 0.5125, 0.2541]) -Greedy action tensor([-1.9260, -0.4529, 0.6624, -0.1682]) tensor([0.0409, 0.1783, 0.5438, 0.2370]) -Greedy action tensor([-1.8765, -0.3559, 0.6206, -0.1410]) tensor([0.0427, 0.1956, 0.5192, 0.2424]) -Greedy action tensor([-1.7724, -0.3190, 0.5600, -0.1075]) tensor([0.0479, 0.2050, 0.4938, 0.2533]) -Greedy action tensor([-1.5355, -0.4838, 0.5703, 0.0431]) tensor([0.0591, 0.1691, 0.4853, 0.2865]) -Greedy action tensor([-1.8660, -0.4723, 0.6624, -0.1058]) tensor([0.0428, 0.1724, 0.5361, 0.2487]) -Greedy action tensor([-1.8715, -0.1934, 0.5957, -0.1639]) tensor([0.0423, 0.2263, 0.4983, 0.2331]) -Greedy action tensor([ 0.5664, -0.5002, 0.0061, -0.4441]) tensor([0.4388, 0.1510, 0.2505, 0.1597]) -Greedy action tensor([ 0.7185, -0.2151, -0.0548, -0.5017]) tensor([0.4652, 0.1829, 0.2147, 0.1373]) -Greedy action tensor([ 0.3205, -0.2163, -0.0371, -0.0392]) tensor([0.3354, 0.1961, 0.2345, 0.2340]) -Greedy action tensor([ 0.6418, -0.0537, 0.0281, 0.0643]) tensor([0.3844, 0.1917, 0.2081, 0.2158]) -Greedy action tensor([ 0.1358, 0.5470, -0.1557, -0.1970]) tensor([0.2517, 0.3798, 0.1881, 0.1805]) -Greedy action tensor([ 0.9202, -1.0978, 0.0138, -0.2819]) tensor([0.5442, 0.0723, 0.2199, 0.1636]) -Greedy action tensor([ 0.6644, -0.7281, 0.0567, -0.3788]) tensor([0.4661, 0.1158, 0.2538, 0.1642]) -Greedy action tensor([ 0.9396, -0.3768, -0.1435, -0.3272]) tensor([0.5296, 0.1420, 0.1793, 0.1492]) -Greedy action tensor([ 1.1759, -0.9761, 0.0661, -0.6859]) tensor([0.6245, 0.0726, 0.2059, 0.0970]) -Greedy action tensor([ 0.4185, -0.2538, -0.0135, -0.0907]) tensor([0.3622, 0.1849, 0.2352, 0.2177]) -Greedy action tensor([ 0.6685, -0.3021, -0.0332, -0.0510]) tensor([0.4234, 0.1604, 0.2099, 0.2062]) -Greedy action tensor([ 0.9263, -0.2495, -0.1549, -0.2309]) tensor([0.5096, 0.1573, 0.1729, 0.1602]) -Greedy action tensor([ 0.6193, -0.4181, -0.1943, -0.3391]) tensor([0.4585, 0.1625, 0.2032, 0.1758]) -Greedy action tensor([ 0.2768, 0.0590, -0.0461, -0.1066]) tensor([0.3115, 0.2506, 0.2256, 0.2123]) -Greedy action tensor([ 0.5147, -0.6400, -0.0708, -0.5700]) tensor([0.4525, 0.1426, 0.2520, 0.1529]) -Greedy action tensor([ 0.6271, -0.2558, -0.0768, -0.1626]) tensor([0.4233, 0.1751, 0.2094, 0.1922]) -Greedy action tensor([ 0.9660, -0.9005, 0.0255, -0.3001]) tensor([0.5473, 0.0847, 0.2137, 0.1543]) -Greedy action tensor([ 0.9391, -0.5164, 0.0654, -0.5264]) tensor([0.5315, 0.1240, 0.2218, 0.1227]) -Greedy action tensor([ 0.7584, -0.4217, -0.0719, -0.6214]) tensor([0.5013, 0.1540, 0.2185, 0.1261]) -Greedy action tensor([ 0.7240, -0.4960, 0.0713, -0.4477]) tensor([0.4704, 0.1389, 0.2449, 0.1458]) -Greedy action tensor([ 0.2618, -0.0522, -0.0301, -0.1348]) tensor([0.3175, 0.2319, 0.2371, 0.2135]) -Greedy action tensor([ 0.6980, -0.2115, 0.0047, -0.2089]) tensor([0.4336, 0.1746, 0.2167, 0.1751]) -Greedy action tensor([ 1.2679, -0.8967, -0.0521, -0.5787]) tensor([0.6495, 0.0746, 0.1735, 0.1025]) -Greedy action tensor([ 0.7873, -0.1856, 0.2326, -0.4157]) tensor([0.4439, 0.1678, 0.2549, 0.1333]) -Greedy action tensor([ 0.9415, -0.7347, 0.0520, -0.4819]) tensor([0.5438, 0.1017, 0.2234, 0.1310]) -Greedy action tensor([ 0.8591, -0.4022, -0.0713, -0.1693]) tensor([0.4913, 0.1392, 0.1938, 0.1757]) -Greedy action tensor([ 0.6119, -0.2668, 0.0249, -0.2240]) tensor([0.4158, 0.1727, 0.2312, 0.1803]) -Greedy action tensor([ 0.7053, -0.4076, -0.0968, -0.0566]) tensor([0.4457, 0.1465, 0.1998, 0.2080]) -Greedy action tensor([ 0.8218, -0.5707, 0.1426, -0.2176]) tensor([0.4741, 0.1178, 0.2404, 0.1677]) -Greedy action tensor([ 0.6663, -0.3599, -0.0314, -0.2442]) tensor([0.4428, 0.1587, 0.2204, 0.1781]) -Greedy action tensor([ 0.6904, -0.3503, -0.0267, -0.5177]) tensor([0.4673, 0.1650, 0.2281, 0.1396]) -Greedy action tensor([ 0.6201, -0.0488, 0.0113, -0.5465]) tensor([0.4224, 0.2164, 0.2298, 0.1315]) -Greedy action tensor([ 0.8827, -0.8288, -0.0603, -0.3573]) tensor([0.5378, 0.0971, 0.2095, 0.1556]) -Greedy action tensor([ 1.2421, -0.7344, -0.2261, -0.4196]) tensor([0.6416, 0.0889, 0.1478, 0.1218]) -Greedy action tensor([ 1.2458, -0.8056, -0.1384, -0.5350]) tensor([0.6462, 0.0831, 0.1619, 0.1089]) -Greedy action tensor([ 0.3008, 0.5077, -0.2399, 0.1266]) tensor([0.2738, 0.3367, 0.1594, 0.2300]) -Greedy action tensor([ 0.6959, -0.4428, -0.0362, -0.4023]) tensor([0.4685, 0.1500, 0.2253, 0.1562]) -Greedy action tensor([ 0.5426, -0.3507, -0.2004, -0.1541]) tensor([0.4196, 0.1717, 0.1996, 0.2091]) -Greedy action tensor([ 0.2589, 0.0843, -0.1973, -0.2519]) tensor([0.3254, 0.2732, 0.2062, 0.1952]) -Greedy action tensor([ 0.9451, -0.6413, -0.0089, -0.3218]) tensor([0.5343, 0.1093, 0.2058, 0.1505]) -Greedy action tensor([ 1.1513, -0.6961, 0.1364, -0.4566]) tensor([0.5813, 0.0916, 0.2107, 0.1164]) -Greedy action tensor([ 0.6614, -0.5281, -0.0298, -0.2617]) tensor([0.4540, 0.1382, 0.2274, 0.1804]) -Greedy action tensor([ 0.6810, -0.1438, -0.0008, -0.1485]) tensor([0.4201, 0.1841, 0.2125, 0.1833]) -Greedy action tensor([ 0.9500, -0.3024, -0.1969, -0.1739]) tensor([0.5186, 0.1482, 0.1647, 0.1685]) -Greedy action tensor([ 0.5918, -0.6955, 0.0340, -0.5169]) tensor([0.4590, 0.1267, 0.2628, 0.1515]) -Greedy action tensor([ 0.8673, -0.4623, -0.0590, -0.5737]) tensor([0.5271, 0.1395, 0.2087, 0.1247]) -Greedy action tensor([ 0.7401, -0.5601, -0.1110, -0.1646]) tensor([0.4753, 0.1295, 0.2029, 0.1923]) -Greedy action tensor([ 0.6423, -0.3282, -0.0240, -0.1789]) tensor([0.4287, 0.1624, 0.2202, 0.1886]) -Greedy action tensor([ 0.4411, -0.4762, -0.1869, -0.0857]) tensor([0.3962, 0.1583, 0.2115, 0.2340]) -Greedy action tensor([ 0.9227, -0.2148, 0.0383, -0.4336]) tensor([0.5022, 0.1610, 0.2074, 0.1294]) -Greedy action tensor([ 0.7859, -0.3487, -0.1306, -0.2522]) tensor([0.4818, 0.1549, 0.1927, 0.1706]) -Greedy action tensor([ 0.2513, -0.1113, -0.0437, -0.4264]) tensor([0.3392, 0.2360, 0.2525, 0.1722]) -Greedy action tensor([ 0.4296, -0.1507, -0.0417, -0.0934]) tensor([0.3601, 0.2016, 0.2248, 0.2135]) -Greedy action tensor([ 1.3499, -0.5248, 0.1408, -0.3975]) tensor([0.6150, 0.0943, 0.1835, 0.1071]) -Greedy action tensor([ 0.6348, -0.4225, -0.1081, -0.0931]) tensor([0.4336, 0.1506, 0.2063, 0.2094]) -Greedy action tensor([ 0.9220, -0.6275, -0.1550, -0.5509]) tensor([0.5611, 0.1192, 0.1911, 0.1286]) -Greedy action tensor([ 0.7042, -0.1060, -0.0671, -0.0814]) tensor([0.4232, 0.1882, 0.1957, 0.1929]) -Greedy action tensor([ 0.4941, -0.0890, -0.0601, -0.1827]) tensor([0.3787, 0.2113, 0.2176, 0.1924]) -Greedy action tensor([ 0.6028, -0.3539, -0.0837, -0.1124]) tensor([0.4208, 0.1616, 0.2118, 0.2058]) -Greedy action tensor([ 0.5301, -0.3965, -0.0012, -0.7158]) tensor([0.4402, 0.1743, 0.2588, 0.1267]) -Greedy action tensor([ 0.8284, -0.4812, 0.0357, -0.2657]) tensor([0.4861, 0.1312, 0.2200, 0.1628]) -Greedy action tensor([ 0.6788, -0.1891, -0.0925, -0.2677]) tensor([0.4405, 0.1849, 0.2037, 0.1709]) -Greedy action tensor([ 0.6819, 0.3491, -0.3823, -0.0956]) tensor([0.3966, 0.2843, 0.1368, 0.1822]) -Greedy action tensor([ 0.5625, 0.0187, -0.0713, -0.3709]) tensor([0.3993, 0.2318, 0.2119, 0.1570]) -Greedy action tensor([ 0.8078, -0.5885, -0.0360, -0.4860]) tensor([0.5123, 0.1268, 0.2203, 0.1405]) -Greedy action tensor([ 1.1228, -1.0813, 0.0018, -0.5019]) tensor([0.6123, 0.0676, 0.1996, 0.1206]) -Greedy action tensor([ 0.8938, -0.9261, -0.0458, -0.4997]) tensor([0.5553, 0.0900, 0.2170, 0.1378]) -Greedy action tensor([ 0.8659, -0.5564, -0.0276, -0.2469]) tensor([0.5053, 0.1219, 0.2068, 0.1661]) -Greedy action tensor([ 0.7653, -0.2503, 0.1232, -0.3905]) tensor([0.4539, 0.1644, 0.2388, 0.1429]) -Greedy action tensor([ 0.7851, -0.3549, -0.0731, -0.1526]) tensor([0.4683, 0.1498, 0.1985, 0.1834]) -Greedy action tensor([ 0.6058, -0.3894, -0.0041, -0.3019]) tensor([0.4317, 0.1596, 0.2346, 0.1742]) -Greedy action tensor([ 0.5772, -0.4365, -0.3141, -0.1605]) tensor([0.4442, 0.1612, 0.1822, 0.2124]) -Greedy action tensor([ 0.9097, -0.5154, -0.0372, -0.3217]) tensor([0.5207, 0.1252, 0.2020, 0.1520]) -Greedy action tensor([ 0.5455, -0.0491, 0.0146, -0.0258]) tensor([0.3697, 0.2040, 0.2174, 0.2088]) -Greedy action tensor([ 0.7716, -0.4466, 0.1135, -0.4559]) tensor([0.4747, 0.1404, 0.2458, 0.1391]) -Greedy action tensor([ 0.9336, -0.5214, 0.0392, -0.3619]) tensor([0.5219, 0.1218, 0.2134, 0.1429]) -Greedy action tensor([ 0.8222, -0.5016, -0.0854, -0.5824]) tensor([0.5222, 0.1390, 0.2107, 0.1282]) -Greedy action tensor([ 0.6147, -0.4036, -0.1701, -0.1611]) tensor([0.4390, 0.1586, 0.2003, 0.2021]) -Greedy action tensor([ 0.4177, -0.1518, 0.0176, -0.2622]) tensor([0.3646, 0.2063, 0.2444, 0.1847]) -Greedy action tensor([ 0.2278, 0.1204, -0.1534, -0.4394]) tensor([0.3232, 0.2903, 0.2207, 0.1658]) -Greedy action tensor([ 0.6863, -0.4843, 0.0536, -0.2485]) tensor([0.4476, 0.1388, 0.2378, 0.1758]) -Greedy action tensor([ 0.4521, -0.8363, 0.3829, -0.5847]) tensor([0.3901, 0.1076, 0.3640, 0.1383]) -Greedy action tensor([ 1.0379, -0.3193, 0.5443, -0.0088]) tensor([0.4507, 0.1160, 0.2751, 0.1582]) -Greedy action tensor([ 1.2605, -0.5758, 1.1315, 0.9567]) tensor([0.3602, 0.0574, 0.3166, 0.2658]) -Greedy action tensor([ 0.0324, -0.2443, -0.4885, 0.6362]) tensor([0.2392, 0.1814, 0.1421, 0.4374]) -Greedy action tensor([-0.1443, -0.7111, -0.8467, 0.7101]) tensor([0.2266, 0.1286, 0.1123, 0.5325]) -Greedy action tensor([0.0690, 1.2717, 0.4866, 0.1979]) tensor([0.1432, 0.4766, 0.2174, 0.1629]) -Greedy action tensor([-0.0488, -0.8719, 0.8004, -0.6907]) tensor([0.2324, 0.1020, 0.5433, 0.1223]) -Greedy action tensor([ 0.1939, -0.8712, -0.5305, -0.0828]) tensor([0.3865, 0.1332, 0.1873, 0.2931]) -Greedy action tensor([ 0.0841, -1.3519, -0.6548, -0.1616]) tensor([0.4004, 0.0952, 0.1912, 0.3132]) -Greedy action tensor([-0.2827, 0.4285, -0.0634, -0.2295]) tensor([0.1874, 0.3816, 0.2333, 0.1976]) -Greedy action tensor([-1.2172, -0.0177, 0.6043, -0.3620]) tensor([0.0778, 0.2582, 0.4810, 0.1830]) -Greedy action tensor([-0.3205, 0.0550, 0.4589, 0.2315]) tensor([0.1569, 0.2284, 0.3421, 0.2725]) -Greedy action tensor([-0.3243, 0.0273, -0.5866, -0.8582]) tensor([0.2648, 0.3763, 0.2037, 0.1552]) -Greedy action tensor([ 0.5581, -0.8183, 1.3265, 0.2653]) tensor([0.2407, 0.0608, 0.5190, 0.1796]) -Greedy action tensor([0.7363, 0.1784, 0.2872, 0.2538]) tensor([0.3536, 0.2024, 0.2257, 0.2183]) -Greedy action tensor([-0.3058, -0.7603, 0.6389, 0.3386]) tensor([0.1636, 0.1039, 0.4209, 0.3117]) -Greedy action tensor([-0.3117, -0.2505, -1.1094, -0.4539]) tensor([0.2958, 0.3144, 0.1332, 0.2566]) -Greedy action tensor([-0.0146, -1.1950, 0.0403, 0.2459]) tensor([0.2731, 0.0839, 0.2886, 0.3544]) -Greedy action tensor([-0.3202, -1.4126, 0.4572, 0.0302]) tensor([0.2028, 0.0680, 0.4413, 0.2879]) -Greedy action tensor([ 0.2869, -0.6643, 0.3648, 0.7773]) tensor([0.2439, 0.0942, 0.2636, 0.3983]) -Greedy action tensor([-1.0919, -0.2044, -0.4806, 0.1998]) tensor([0.1122, 0.2726, 0.2068, 0.4084]) -Greedy action tensor([ 0.5942, -0.8871, 0.3368, 0.1217]) tensor([0.3811, 0.0866, 0.2946, 0.2376]) -Greedy action tensor([-0.6617, 0.4016, -0.6779, -0.7570]) tensor([0.1727, 0.5002, 0.1700, 0.1570]) -Greedy action tensor([ 0.4221, -0.1395, -1.5751, 0.2494]) tensor([0.3926, 0.2239, 0.0533, 0.3303]) -Greedy action tensor([ 0.3110, -0.4797, 0.0609, -0.6491]) tensor([0.3824, 0.1734, 0.2978, 0.1464]) -Greedy action tensor([-0.4034, 0.5821, 0.5519, -0.8041]) tensor([0.1439, 0.3856, 0.3741, 0.0964]) -Greedy action tensor([ 0.3494, 0.4653, -0.9010, 0.1030]) tensor([0.3134, 0.3519, 0.0898, 0.2449]) -Greedy action tensor([-0.3940, 0.6208, -0.3404, -0.4413]) tensor([0.1734, 0.4783, 0.1829, 0.1654]) -Greedy action tensor([ 0.8033, -1.2003, -0.6176, -0.3744]) tensor([0.5937, 0.0801, 0.1434, 0.1828]) -Greedy action tensor([ 0.8402, -1.8644, 0.3928, 0.6116]) tensor([0.3997, 0.0267, 0.2555, 0.3180]) -Greedy action tensor([ 0.1168, -0.1661, 0.4305, -0.2666]) tensor([0.2629, 0.1981, 0.3598, 0.1792]) -Greedy action tensor([ 1.1409, -0.1615, 0.4420, -0.5976]) tensor([0.5142, 0.1398, 0.2556, 0.0904]) -Greedy action tensor([-0.9987, -1.0192, -0.0067, -0.6820]) tensor([0.1653, 0.1620, 0.4458, 0.2269]) -Greedy action tensor([ 0.5253, 0.2861, 0.8344, -0.6093]) tensor([0.2881, 0.2268, 0.3924, 0.0926]) -Greedy action tensor([-0.0213, 0.1841, 0.0053, -0.0826]) tensor([0.2384, 0.2927, 0.2448, 0.2242]) -Greedy action tensor([ 0.9692, -1.6139, -0.1129, 0.7597]) tensor([0.4493, 0.0339, 0.1523, 0.3644]) -Greedy action tensor([ 0.5216, -0.7882, 1.0815, -0.7589]) tensor([0.3032, 0.0818, 0.5307, 0.0843]) -Greedy action tensor([-0.4760, -0.4386, 0.9041, -0.3506]) tensor([0.1399, 0.1452, 0.5562, 0.1586]) -Greedy action tensor([ 0.0450, 0.6546, -0.6326, -0.2096]) tensor([0.2426, 0.4462, 0.1232, 0.1880]) -Greedy action tensor([ 0.1567, -0.3684, -0.0269, -0.1448]) tensor([0.3161, 0.1870, 0.2631, 0.2338]) -Greedy action tensor([ 1.1613, -0.7322, 1.3939, 0.0431]) tensor([0.3650, 0.0550, 0.4607, 0.1193]) -Greedy action tensor([ 0.4557, -1.9924, 1.0858, -0.2664]) tensor([0.2899, 0.0251, 0.5443, 0.1408]) -Greedy action tensor([ 0.4086, -0.2729, 0.4766, 0.8281]) tensor([0.2440, 0.1235, 0.2612, 0.3713]) -Greedy action tensor([ 0.2019, 0.5016, 0.1097, -0.4828]) tensor([0.2656, 0.3584, 0.2422, 0.1339]) -Greedy action tensor([-0.3153, -0.4594, 0.6456, -0.4505]) tensor([0.1868, 0.1617, 0.4883, 0.1632]) -Greedy action tensor([-1.0647, -0.3416, -0.0353, 0.3690]) tensor([0.0995, 0.2050, 0.2784, 0.4172]) -Greedy action tensor([ 0.2363, -1.1433, -0.7107, 0.1849]) tensor([0.3862, 0.0972, 0.1498, 0.3668]) -Greedy action tensor([ 0.7890, -0.6291, 1.0386, 0.6732]) tensor([0.2927, 0.0709, 0.3757, 0.2607]) -Greedy action tensor([-0.7589, -0.2396, -0.3623, 0.3740]) tensor([0.1375, 0.2311, 0.2044, 0.4269]) -Greedy action tensor([-0.2707, -1.0225, -1.0498, -0.1523]) tensor([0.3272, 0.1543, 0.1501, 0.3684]) -Greedy action tensor([ 0.4725, -1.7500, -0.6312, 1.0881]) tensor([0.3039, 0.0329, 0.1008, 0.5624]) -Greedy action tensor([-0.2221, 0.0650, -0.4244, -0.1014]) tensor([0.2338, 0.3115, 0.1909, 0.2638]) -Greedy action tensor([ 0.7089, -0.6620, 0.1771, 0.1984]) tensor([0.4096, 0.1040, 0.2406, 0.2458]) -Greedy action tensor([ 1.1242, -1.7877, 0.1135, 0.9224]) tensor([0.4473, 0.0243, 0.1628, 0.3656]) -Greedy action tensor([ 0.6423, -0.5427, -0.1006, -0.2880]) tensor([0.4596, 0.1405, 0.2186, 0.1813]) -Greedy action tensor([-1.2703, -0.4334, 0.4870, -0.7443]) tensor([0.0926, 0.2138, 0.5368, 0.1567]) -Greedy action tensor([1.3423, 0.4027, 1.0242, 0.2676]) tensor([0.4066, 0.1589, 0.2958, 0.1388]) -Greedy action tensor([ 0.6799, -0.4694, 0.7305, -0.2485]) tensor([0.3618, 0.1146, 0.3806, 0.1430]) -Greedy action tensor([ 0.6012, -0.9045, -0.6867, -0.2139]) tensor([0.5154, 0.1143, 0.1422, 0.2281]) -Greedy action tensor([ 0.6622, -0.8141, 0.7874, 1.9398]) tensor([0.1681, 0.0384, 0.1905, 0.6030]) -Greedy action tensor([ 0.7606, -1.6221, -0.0890, 0.8980]) tensor([0.3749, 0.0346, 0.1603, 0.4302]) -Greedy action tensor([1.0015, 0.0270, 0.1545, 0.8065]) tensor([0.3804, 0.1436, 0.1631, 0.3130]) -Greedy action tensor([ 0.3193, -0.2507, 0.9436, 0.4336]) tensor([0.2196, 0.1242, 0.4100, 0.2462]) -Greedy action tensor([ 0.2619, -1.9547, -0.2875, 0.4301]) tensor([0.3485, 0.0380, 0.2012, 0.4123]) -Greedy action tensor([ 0.2642, -0.5291, -0.3062, 0.7694]) tensor([0.2721, 0.1231, 0.1538, 0.4510]) -Greedy action tensor([ 0.0449, 0.7879, -1.3632, 0.8447]) tensor([0.1795, 0.3773, 0.0439, 0.3993]) -Greedy action tensor([-0.6632, -0.4099, 0.2679, -0.9457]) tensor([0.1792, 0.2309, 0.4548, 0.1351]) -Greedy action tensor([-0.5097, -0.3050, 0.0513, 0.5601]) tensor([0.1451, 0.1780, 0.2542, 0.4228]) -Greedy action tensor([ 1.0596, -0.4079, 0.8105, -0.2974]) tensor([0.4410, 0.1017, 0.3438, 0.1135]) -Greedy action tensor([ 0.1698, -1.5219, 0.2631, 0.2069]) tensor([0.3012, 0.0555, 0.3307, 0.3126]) -Greedy action tensor([ 0.3065, 1.0541, 0.4239, -0.6559]) tensor([0.2165, 0.4573, 0.2435, 0.0827]) -Greedy action tensor([-0.9313, -0.5512, 0.7540, 0.0963]) tensor([0.0939, 0.1373, 0.5064, 0.2624]) -Greedy action tensor([-0.8302, 0.2964, 0.2572, -0.9920]) tensor([0.1265, 0.3904, 0.3754, 0.1076]) -Greedy action tensor([ 0.7609, -1.4652, 0.4148, 1.4002]) tensor([0.2695, 0.0291, 0.1907, 0.5107]) -Greedy action tensor([ 0.2298, -0.4157, -0.7088, -0.2067]) tensor([0.3903, 0.2047, 0.1527, 0.2523]) -Greedy action tensor([ 1.1819, -0.9003, 0.0720, 0.0850]) tensor([0.5592, 0.0697, 0.1843, 0.1867]) -Greedy action tensor([-0.4847, -1.3760, 0.2406, -0.0034]) tensor([0.1963, 0.0805, 0.4055, 0.3177]) -Greedy action tensor([-1.1419, -1.5413, -0.3529, 0.6170]) tensor([0.1033, 0.0693, 0.2275, 0.5999]) -Greedy action tensor([-0.1427, 0.4757, -0.2979, 0.5899]) tensor([0.1726, 0.3204, 0.1478, 0.3592]) -Greedy action tensor([-0.2218, -0.2525, 0.2969, -0.4745]) tensor([0.2259, 0.2191, 0.3795, 0.1755]) -Greedy action tensor([ 1.0360, -0.8939, 0.7747, 0.6742]) tensor([0.3829, 0.0556, 0.2949, 0.2667]) -Greedy action tensor([ 0.7914, -0.6389, 0.2118, 0.1652]) tensor([0.4285, 0.1025, 0.2400, 0.2291]) -Greedy action tensor([ 1.6004, -0.0119, -0.5467, 0.3687]) tensor([0.6219, 0.1240, 0.0726, 0.1815]) -Greedy action tensor([ 1.5976, -0.6564, -0.1714, 0.1312]) tensor([0.6639, 0.0697, 0.1132, 0.1532]) -Greedy action tensor([ 1.2670, -0.5499, -0.3061, 0.4844]) tensor([0.5473, 0.0890, 0.1135, 0.2502]) -Greedy action tensor([ 1.8823, -0.8440, -0.2211, 0.5174]) tensor([0.6930, 0.0454, 0.0846, 0.1770]) -Greedy action tensor([ 0.9626, -0.5926, -0.4012, 0.3786]) tensor([0.4940, 0.1043, 0.1263, 0.2755]) -Greedy action tensor([ 1.0525, -0.2149, -0.0826, -0.0473]) tensor([0.5165, 0.1455, 0.1660, 0.1720]) -Greedy action tensor([ 1.0490, -0.2659, -0.2150, 0.1783]) tensor([0.5077, 0.1363, 0.1434, 0.2126]) -Greedy action tensor([ 1.3782, -0.2395, -0.6153, 0.4002]) tensor([0.5846, 0.1160, 0.0796, 0.2198]) -Greedy action tensor([ 1.0258, 0.0507, -0.5288, 0.1202]) tensor([0.5018, 0.1893, 0.1060, 0.2029]) -Greedy action tensor([ 1.4944, -0.2039, -0.4550, 0.6530]) tensor([0.5693, 0.1042, 0.0810, 0.2454]) -Greedy action tensor([ 1.3514, -0.3461, -0.2867, 0.5499]) tensor([0.5476, 0.1003, 0.1064, 0.2457]) -Greedy action tensor([ 1.5019, -1.0553, -0.0922, 0.4891]) tensor([0.6083, 0.0472, 0.1235, 0.2210]) -Greedy action tensor([ 0.8814, -0.2783, -0.3311, 0.4489]) tensor([0.4425, 0.1388, 0.1316, 0.2871]) -Greedy action tensor([ 1.5790, -1.1622, -0.1033, 0.2878]) tensor([0.6556, 0.0423, 0.1219, 0.1802]) -Greedy action tensor([ 1.2916, -0.4051, 0.0460, 0.0919]) tensor([0.5642, 0.1034, 0.1624, 0.1700]) -Greedy action tensor([ 1.6411, -0.7484, -0.1248, 0.3501]) tensor([0.6503, 0.0596, 0.1112, 0.1788]) -Greedy action tensor([ 1.5104, -0.4711, -0.7661, 0.3198]) tensor([0.6475, 0.0893, 0.0665, 0.1968]) -Greedy action tensor([ 1.3571, -0.1628, -0.4849, 0.3938]) tensor([0.5685, 0.1244, 0.0901, 0.2170]) -Greedy action tensor([ 1.5927, -0.4482, -0.6984, 0.1753]) tensor([0.6787, 0.0882, 0.0687, 0.1645]) -Greedy action tensor([ 1.1239, -0.0794, -0.2799, 0.1693]) tensor([0.5179, 0.1555, 0.1272, 0.1994]) -Greedy action tensor([ 1.6619, -0.0694, -1.1805, 0.4939]) tensor([0.6467, 0.1145, 0.0377, 0.2011]) -Greedy action tensor([ 0.9936, -0.3782, -0.5704, 0.1625]) tensor([0.5267, 0.1336, 0.1102, 0.2294]) -Greedy action tensor([ 1.3587, -0.4980, -0.4692, 0.1598]) tensor([0.6179, 0.0965, 0.0993, 0.1863]) -Greedy action tensor([1.9430, 0.6000, 0.0221, 0.2936]) tensor([0.6251, 0.1632, 0.0916, 0.1201]) -Greedy action tensor([ 1.6213, -0.6646, -0.3749, 0.7092]) tensor([0.6101, 0.0620, 0.0829, 0.2450]) -Greedy action tensor([ 1.4621, -0.5170, -0.2128, 0.3674]) tensor([0.6024, 0.0832, 0.1128, 0.2016]) -Greedy action tensor([ 1.4163, -0.9875, -0.0952, -0.0215]) tensor([0.6458, 0.0584, 0.1425, 0.1533]) -Greedy action tensor([ 1.4078, -0.4507, -0.4838, 0.1320]) tensor([0.6305, 0.0983, 0.0951, 0.1760]) -Greedy action tensor([ 1.9310, -0.1525, -1.0434, 0.2000]) tensor([0.7393, 0.0920, 0.0378, 0.1309]) -Greedy action tensor([ 1.6914, -1.4599, 0.0421, 0.2730]) tensor([0.6770, 0.0290, 0.1301, 0.1639]) -Greedy action tensor([ 1.7416, -0.2696, -0.6255, 0.2853]) tensor([0.6846, 0.0916, 0.0642, 0.1596]) -Greedy action tensor([ 2.0177, -0.5623, -0.0327, 0.4984]) tensor([0.7026, 0.0532, 0.0904, 0.1538]) -Greedy action tensor([ 1.3110, -0.5329, -0.3892, 0.4333]) tensor([0.5693, 0.0901, 0.1040, 0.2367]) -Greedy action tensor([ 1.4844, -0.5065, -0.6966, 0.2819]) tensor([0.6452, 0.0881, 0.0729, 0.1938]) -Greedy action tensor([ 1.0902, -0.0848, -0.3993, 0.6563]) tensor([0.4582, 0.1415, 0.1033, 0.2969]) -Greedy action tensor([ 1.2146, -0.5612, -0.1222, 0.4650]) tensor([0.5250, 0.0889, 0.1379, 0.2481]) -Greedy action tensor([ 1.3938, 0.0327, -0.6624, 0.2900]) tensor([0.5828, 0.1494, 0.0746, 0.1933]) -Greedy action tensor([ 1.2380, -0.2038, -0.3244, -0.0173]) tensor([0.5777, 0.1366, 0.1211, 0.1646]) -Greedy action tensor([ 1.9185, -0.8282, -0.4328, 0.1907]) tensor([0.7479, 0.0480, 0.0712, 0.1329]) -Greedy action tensor([ 1.5342, -0.7429, -0.5221, 0.4898]) tensor([0.6320, 0.0648, 0.0808, 0.2224]) -Greedy action tensor([ 0.6521, -0.3488, -0.0137, -0.0760]) tensor([0.4230, 0.1555, 0.2174, 0.2042]) -Greedy action tensor([ 1.4821, -0.5284, -0.5768, 1.0387]) tensor([0.5254, 0.0704, 0.0670, 0.3372]) -Greedy action tensor([ 1.1070, -0.4032, 0.0974, 0.0549]) tensor([0.5169, 0.1142, 0.1884, 0.1805]) -Greedy action tensor([ 1.7512, -0.5168, -0.1911, 0.2694]) tensor([0.6784, 0.0702, 0.0973, 0.1541]) -Greedy action tensor([ 1.7876, -0.5972, -0.4479, 0.1908]) tensor([0.7135, 0.0657, 0.0763, 0.1445]) -Greedy action tensor([ 1.4477, -0.0674, -0.6309, 0.2891]) tensor([0.6029, 0.1325, 0.0754, 0.1892]) -Greedy action tensor([ 1.6486, -0.4094, -0.4666, 0.4918]) tensor([0.6399, 0.0817, 0.0772, 0.2012]) -Greedy action tensor([ 1.5997, -0.3063, -0.8417, 0.0865]) tensor([0.6869, 0.1021, 0.0598, 0.1512]) -Greedy action tensor([ 1.5933, -0.7185, -0.2230, 0.1504]) tensor([0.6676, 0.0661, 0.1086, 0.1577]) -Greedy action tensor([ 1.8525, -1.3849, -0.6725, 0.8571]) tensor([0.6716, 0.0264, 0.0538, 0.2482]) -Greedy action tensor([ 1.2665, -0.7475, -0.0377, 0.2130]) tensor([0.5703, 0.0761, 0.1548, 0.1989]) -Greedy action tensor([ 1.3944, -0.6156, -0.1065, 0.1025]) tensor([0.6129, 0.0821, 0.1366, 0.1684]) -Greedy action tensor([ 1.5331, -0.2583, -0.3672, 0.2019]) tensor([0.6328, 0.1055, 0.0946, 0.1671]) -Greedy action tensor([ 1.5726, -0.3222, -0.6364, 0.3491]) tensor([0.6434, 0.0967, 0.0706, 0.1893]) -Greedy action tensor([ 1.5015, -0.4603, -0.2099, 0.2540]) tensor([0.6217, 0.0874, 0.1123, 0.1786]) -Greedy action tensor([ 2.0004, -0.5535, -0.3221, 0.5575]) tensor([0.7082, 0.0551, 0.0694, 0.1673]) -Greedy action tensor([ 1.8909, 0.2564, -0.2568, 0.1039]) tensor([0.6760, 0.1319, 0.0789, 0.1132]) -Greedy action tensor([ 0.9911, -0.2589, -0.8873, 0.3317]) tensor([0.5111, 0.1464, 0.0781, 0.2643]) -Greedy action tensor([ 1.5820, -0.3800, -0.9940, 0.4311]) tensor([0.6523, 0.0917, 0.0496, 0.2064]) -Greedy action tensor([ 1.9397, 0.4093, -0.2698, 0.3914]) tensor([0.6498, 0.1407, 0.0713, 0.1382]) -Greedy action tensor([ 1.1959, -0.1344, -0.5395, 0.4022]) tensor([0.5283, 0.1397, 0.0932, 0.2389]) -Greedy action tensor([ 1.8424, -0.7170, -0.3834, 0.2558]) tensor([0.7195, 0.0556, 0.0777, 0.1472]) -Greedy action tensor([ 2.3810, -1.3566, -0.0871, 0.3034]) tensor([0.8105, 0.0193, 0.0687, 0.1015]) -Greedy action tensor([ 1.9372, -0.5328, -0.5439, 0.4831]) tensor([0.7133, 0.0603, 0.0597, 0.1667]) -Greedy action tensor([ 1.5907, -0.2956, -0.8760, 0.0044]) tensor([0.6939, 0.1052, 0.0589, 0.1420]) -Greedy action tensor([ 1.3466, 0.0932, -0.4328, 0.3839]) tensor([0.5446, 0.1555, 0.0919, 0.2080]) -Greedy action tensor([ 1.5247, -0.3155, -0.4292, 0.4118]) tensor([0.6138, 0.0975, 0.0870, 0.2017]) -Greedy action tensor([ 1.1838, 0.0779, -0.5783, 0.4219]) tensor([0.5078, 0.1680, 0.0872, 0.2370]) -Greedy action tensor([ 1.6093, -0.2170, -0.5718, 0.4444]) tensor([0.6306, 0.1015, 0.0712, 0.1967]) -Greedy action tensor([ 1.2053, 0.0650, -0.1547, 0.3129]) tensor([0.5035, 0.1610, 0.1292, 0.2063]) -Greedy action tensor([ 1.4049, -0.1421, -0.7178, 0.3637]) tensor([0.5933, 0.1263, 0.0710, 0.2094]) -Greedy action tensor([ 1.4418, -0.3695, -0.3722, 0.3644]) tensor([0.5999, 0.0980, 0.0978, 0.2043]) -Greedy action tensor([ 2.0450, -0.6701, -0.3289, 0.5260]) tensor([0.7256, 0.0480, 0.0676, 0.1588]) -Greedy action tensor([ 1.5947, -0.6631, -0.1391, 0.0477]) tensor([0.6693, 0.0700, 0.1182, 0.1425]) -Greedy action tensor([ 1.5917, -0.2889, -0.0122, 0.5907]) tensor([0.5810, 0.0886, 0.1168, 0.2135]) -Greedy action tensor([ 1.1340, -0.1176, -0.5832, 0.6100]) tensor([0.4860, 0.1390, 0.0873, 0.2878]) -Greedy action tensor([ 1.6112, -0.1819, -0.4467, 0.3522]) tensor([0.6337, 0.1055, 0.0809, 0.1799]) -Greedy action tensor([ 1.2961, -0.4201, -0.5040, 0.0019]) tensor([0.6176, 0.1110, 0.1021, 0.1693]) -Greedy action tensor([ 1.5783, -0.5130, -0.2759, 0.2411]) tensor([0.6482, 0.0801, 0.1015, 0.1702]) -Greedy action tensor([ 1.0766, -0.1844, -0.3226, 0.2713]) tensor([0.5058, 0.1433, 0.1248, 0.2261]) -Greedy action tensor([ 2.0621, -1.9104, 0.0778, -0.2132]) tensor([0.7942, 0.0150, 0.1092, 0.0816]) -Greedy action tensor([ 1.0440, -1.1474, 0.1401, -0.4681]) tensor([0.5756, 0.0643, 0.2331, 0.1269]) -Greedy action tensor([ 0.9895, -0.3646, -0.1932, -0.2314]) tensor([0.5377, 0.1388, 0.1648, 0.1586]) -Greedy action tensor([ 0.6263, -0.4183, -0.0182, -0.3940]) tensor([0.4470, 0.1573, 0.2346, 0.1611]) -Greedy action tensor([ 0.8275, -0.6204, -0.0022, -0.4421]) tensor([0.5122, 0.1204, 0.2234, 0.1439]) -Greedy action tensor([ 0.5646, 0.0619, -0.1795, -0.1016]) tensor([0.3856, 0.2332, 0.1832, 0.1980]) -Greedy action tensor([ 0.4848, 0.0782, 0.2333, -0.2445]) tensor([0.3418, 0.2276, 0.2658, 0.1648]) -Greedy action tensor([ 0.5443, -0.1666, -0.1244, -0.1253]) tensor([0.3975, 0.1953, 0.2037, 0.2035]) -Greedy action tensor([ 0.4566, -0.5063, -0.1072, -0.0466]) tensor([0.3913, 0.1494, 0.2227, 0.2366]) -Greedy action tensor([ 0.6940, -0.3727, -0.0847, 0.0274]) tensor([0.4317, 0.1486, 0.1981, 0.2216]) -Greedy action tensor([ 1.0347, -0.6277, -0.0994, -0.3618]) tensor([0.5685, 0.1078, 0.1829, 0.1407]) -Greedy action tensor([ 0.8629, -0.3921, 0.1760, -0.1891]) tensor([0.4678, 0.1334, 0.2354, 0.1634]) -Greedy action tensor([ 0.6172, -0.3132, -0.0749, -0.2570]) tensor([0.4325, 0.1706, 0.2165, 0.1804]) -Greedy action tensor([ 0.8656, -0.6608, -0.0698, -0.3788]) tensor([0.5269, 0.1145, 0.2068, 0.1518]) -Greedy action tensor([ 0.9197, -0.8493, 0.0178, -0.5223]) tensor([0.5517, 0.0941, 0.2239, 0.1304]) -Greedy action tensor([ 0.6250, -0.5677, -0.0455, -0.2266]) tensor([0.4461, 0.1353, 0.2282, 0.1904]) -Greedy action tensor([ 0.8367, -0.4398, 0.0537, -0.4027]) tensor([0.4937, 0.1377, 0.2256, 0.1429]) -Greedy action tensor([ 0.5912, -0.4820, 0.1224, -0.6148]) tensor([0.4411, 0.1508, 0.2760, 0.1321]) -Greedy action tensor([ 0.3586, 0.1216, -0.1592, -0.2091]) tensor([0.3388, 0.2673, 0.2019, 0.1920]) -Greedy action tensor([ 0.8375, -0.7979, 0.2555, -0.6735]) tensor([0.5065, 0.0987, 0.2830, 0.1118]) -Greedy action tensor([ 0.3555, -0.0139, -0.0396, 0.0053]) tensor([0.3258, 0.2252, 0.2195, 0.2295]) -Greedy action tensor([ 0.8063, -0.5236, -0.0503, -0.2055]) tensor([0.4872, 0.1289, 0.2069, 0.1771]) -Greedy action tensor([ 0.2550, -0.1956, -0.1565, -0.0941]) tensor([0.3328, 0.2120, 0.2205, 0.2347]) -Greedy action tensor([ 0.5283, -0.4269, -0.0289, -0.1688]) tensor([0.4072, 0.1567, 0.2333, 0.2028]) -Greedy action tensor([ 0.5624, -0.4372, 0.0101, -0.3510]) tensor([0.4265, 0.1570, 0.2455, 0.1711]) -Greedy action tensor([ 0.3635, -0.3071, -0.0651, -0.8706]) tensor([0.4075, 0.2084, 0.2655, 0.1186]) -Greedy action tensor([ 1.0279, -0.4503, -0.0505, -0.1306]) tensor([0.5313, 0.1212, 0.1807, 0.1668]) -Greedy action tensor([ 0.4166, 0.0021, -0.0196, -0.0603]) tensor([0.3416, 0.2256, 0.2208, 0.2120]) -Greedy action tensor([ 0.5276, -0.2947, -0.0048, -0.2083]) tensor([0.3991, 0.1754, 0.2343, 0.1912]) -Greedy action tensor([ 0.3099, -0.4489, -0.1987, -0.1246]) tensor([0.3680, 0.1723, 0.2213, 0.2383]) -Greedy action tensor([ 0.5924, -0.5271, 0.0033, -0.4571]) tensor([0.4482, 0.1463, 0.2487, 0.1569]) -Greedy action tensor([ 0.7528, -0.6487, -0.1357, -0.2134]) tensor([0.4907, 0.1208, 0.2018, 0.1867]) -Greedy action tensor([ 1.1205, -0.4244, -0.0328, -0.6676]) tensor([0.5895, 0.1258, 0.1861, 0.0986]) -Greedy action tensor([ 0.5356, -0.5901, 0.0072, -0.2739]) tensor([0.4239, 0.1375, 0.2499, 0.1887]) -Greedy action tensor([ 0.1615, -0.2382, -0.2525, -0.0092]) tensor([0.3150, 0.2112, 0.2082, 0.2656]) -Greedy action tensor([ 0.9317, -0.3962, -0.0690, -0.3491]) tensor([0.5234, 0.1387, 0.1924, 0.1454]) -Greedy action tensor([ 1.0903, -0.4168, -0.4351, -0.6097]) tensor([0.6166, 0.1366, 0.1341, 0.1126]) -Greedy action tensor([ 0.8939, -0.8225, -0.0833, -0.4667]) tensor([0.5517, 0.0991, 0.2076, 0.1415]) -Greedy action tensor([ 0.9046, -0.7907, 0.0530, -0.4259]) tensor([0.5334, 0.0979, 0.2276, 0.1410]) -Greedy action tensor([ 0.7006, -0.5289, 0.0092, -0.2984]) tensor([0.4626, 0.1353, 0.2317, 0.1704]) -Greedy action tensor([ 0.6134, -0.5217, 0.0128, -0.1429]) tensor([0.4275, 0.1374, 0.2345, 0.2007]) -Greedy action tensor([ 0.9562, -0.6689, -0.0485, -0.5292]) tensor([0.5588, 0.1100, 0.2046, 0.1265]) -Greedy action tensor([ 0.6543, -0.5198, -0.1490, -0.3984]) tensor([0.4748, 0.1468, 0.2127, 0.1657]) -Greedy action tensor([ 0.6666, -0.5991, -0.1447, -0.4871]) tensor([0.4898, 0.1381, 0.2176, 0.1545]) -Greedy action tensor([ 0.6736, -0.4724, -0.1238, -0.2963]) tensor([0.4657, 0.1480, 0.2098, 0.1765]) -Greedy action tensor([ 0.8214, -0.1511, -0.1216, -0.5791]) tensor([0.4965, 0.1878, 0.1934, 0.1224]) -Greedy action tensor([ 0.5575, -0.3076, -0.0718, -0.5134]) tensor([0.4354, 0.1833, 0.2321, 0.1492]) -Greedy action tensor([ 0.7099, -0.7398, -0.0694, -0.2837]) tensor([0.4846, 0.1137, 0.2223, 0.1794]) -Greedy action tensor([ 0.7324, -0.3473, -0.0360, -0.3343]) tensor([0.4656, 0.1582, 0.2159, 0.1602]) -Greedy action tensor([ 1.0703, -0.7088, -0.0848, -0.4262]) tensor([0.5856, 0.0988, 0.1845, 0.1311]) -Greedy action tensor([ 0.8162, -0.8299, 0.0831, -0.4351]) tensor([0.5104, 0.0984, 0.2452, 0.1460]) -Greedy action tensor([ 1.0239, -0.5079, -0.0353, -0.2337]) tensor([0.5414, 0.1170, 0.1877, 0.1539]) -Greedy action tensor([ 0.5091, 0.0651, -0.0348, -0.1666]) tensor([0.3662, 0.2349, 0.2126, 0.1863]) -Greedy action tensor([ 0.6858, -0.6112, 0.2034, -0.4651]) tensor([0.4531, 0.1239, 0.2797, 0.1433]) -Greedy action tensor([ 0.8979, -0.0616, 0.2262, -1.0814]) tensor([0.4921, 0.1885, 0.2514, 0.0680]) -Greedy action tensor([ 0.1781, 0.5651, -0.1538, 0.0562]) tensor([0.2454, 0.3613, 0.1761, 0.2172]) -Greedy action tensor([ 0.4215, -0.3987, -0.0549, -0.2500]) tensor([0.3888, 0.1712, 0.2414, 0.1986]) -Greedy action tensor([ 0.6020, -0.3757, -0.1156, -0.2228]) tensor([0.4343, 0.1634, 0.2119, 0.1904]) -Greedy action tensor([ 0.6808, -0.3941, -0.0999, -0.1859]) tensor([0.4505, 0.1538, 0.2064, 0.1894]) -Greedy action tensor([ 0.3188, 0.1910, -0.0885, -0.1177]) tensor([0.3133, 0.2757, 0.2085, 0.2025]) -Greedy action tensor([ 1.1001, -0.7003, -0.1342, -0.6367]) tensor([0.6126, 0.1012, 0.1783, 0.1079]) -Greedy action tensor([ 0.8394, -0.8780, 0.1416, -0.6297]) tensor([0.5243, 0.0941, 0.2609, 0.1206]) -Greedy action tensor([ 0.3984, 0.0156, 0.0577, -0.3196]) tensor([0.3471, 0.2367, 0.2469, 0.1693]) -Greedy action tensor([ 0.7130, -0.3874, -0.0290, -0.1171]) tensor([0.4454, 0.1482, 0.2121, 0.1942]) -Greedy action tensor([ 0.5840, -0.5873, -0.1049, -0.2027]) tensor([0.4410, 0.1367, 0.2214, 0.2008]) -Greedy action tensor([ 0.8512, -0.4073, 0.0547, -0.2743]) tensor([0.4856, 0.1379, 0.2189, 0.1576]) -Greedy action tensor([ 0.7547, -0.5479, 0.0498, -0.1718]) tensor([0.4626, 0.1257, 0.2286, 0.1831]) -Greedy action tensor([ 0.5228, -0.3134, -0.0957, -0.0896]) tensor([0.3977, 0.1724, 0.2143, 0.2156]) -Greedy action tensor([ 1.0696, -0.8110, 0.1043, -0.5650]) tensor([0.5786, 0.0882, 0.2204, 0.1128]) -Greedy action tensor([ 0.7537, -0.5460, -0.1416, -0.2610]) tensor([0.4893, 0.1334, 0.1999, 0.1774]) -Greedy action tensor([ 0.8312, -0.7021, 0.1869, -0.2441]) tensor([0.4803, 0.1037, 0.2522, 0.1639]) -Greedy action tensor([ 0.8730, -0.5797, -0.0407, -0.5527]) tensor([0.5332, 0.1247, 0.2138, 0.1282]) -Greedy action tensor([ 0.6461, -0.4479, -0.1362, -0.5627]) tensor([0.4783, 0.1602, 0.2187, 0.1428]) -Greedy action tensor([ 0.5163, -0.1638, -0.0614, -0.2206]) tensor([0.3927, 0.1989, 0.2204, 0.1880]) -Greedy action tensor([ 0.8751, -0.4556, 0.0454, -0.2944]) tensor([0.4973, 0.1314, 0.2169, 0.1544]) -Greedy action tensor([ 0.5202, -0.0862, 0.1852, -0.3817]) tensor([0.3750, 0.2045, 0.2683, 0.1522]) -Greedy action tensor([ 0.5295, -0.0834, -0.0156, -0.2582]) tensor([0.3881, 0.2103, 0.2250, 0.1766]) -Greedy action tensor([ 1.0820, -0.6351, -0.0649, -0.3575]) tensor([0.5766, 0.1036, 0.1831, 0.1367]) -Greedy action tensor([ 0.9025, -0.8571, 0.0155, -0.2166]) tensor([0.5234, 0.0901, 0.2156, 0.1709]) -Greedy action tensor([ 0.4395, -0.2725, -0.2526, -0.0196]) tensor([0.3812, 0.1871, 0.1908, 0.2409]) -Greedy action tensor([ 1.0613, -0.5808, -0.0419, -0.2754]) tensor([0.5593, 0.1083, 0.1856, 0.1469]) -Greedy action tensor([ 0.4697, -0.1369, -0.0103, -0.1317]) tensor([0.3687, 0.2010, 0.2282, 0.2021]) -Greedy action tensor([-1.8483, -0.4293, 0.6173, -0.1342]) tensor([0.0445, 0.1840, 0.5242, 0.2472]) -Greedy action tensor([-1.2863, 0.7113, 0.1981, 0.2048]) tensor([0.0581, 0.4279, 0.2562, 0.2579]) -Greedy action tensor([-1.3341, 0.6869, 0.3084, 0.2004]) tensor([0.0545, 0.4112, 0.2816, 0.2528]) -Greedy action tensor([-1.5842, -0.5752, 0.4942, 0.0059]) tensor([0.0601, 0.1648, 0.4803, 0.2947]) -Greedy action tensor([-1.5411, -0.5188, 0.8644, 0.4790]) tensor([0.0446, 0.1241, 0.4948, 0.3365]) -Greedy action tensor([-1.7328, -0.2507, 0.5976, -0.0210]) tensor([0.0471, 0.2074, 0.4845, 0.2610]) -Greedy action tensor([-1.8020, -0.4624, 0.6033, -0.1061]) tensor([0.0468, 0.1788, 0.5190, 0.2553]) -Greedy action tensor([-0.9368, 0.3617, 0.1671, 0.0467]) tensor([0.0966, 0.3539, 0.2913, 0.2582]) -Greedy action tensor([-1.9331, -0.4374, 0.6622, -0.1729]) tensor([0.0405, 0.1808, 0.5430, 0.2356]) -Greedy action tensor([-1.7680, -0.4488, 0.6882, 0.0152]) tensor([0.0447, 0.1674, 0.5217, 0.2662]) -Greedy action tensor([-0.8100, 0.0963, 0.1282, -0.0419]) tensor([0.1222, 0.3024, 0.3122, 0.2633]) -Greedy action tensor([-1.9228, -0.4079, 0.6505, -0.1662]) tensor([0.0409, 0.1860, 0.5361, 0.2369]) -Greedy action tensor([-1.6978e+00, -2.2301e-01, 6.3481e-01, 5.8907e-04]) tensor([0.0473, 0.2067, 0.4875, 0.2585]) -Greedy action tensor([-1.8803, -0.4673, 0.7079, -0.0484]) tensor([0.0406, 0.1666, 0.5396, 0.2533]) -Greedy action tensor([-1.1843, 0.8442, 0.1618, 0.2691]) tensor([0.0598, 0.4546, 0.2298, 0.2558]) -Greedy action tensor([-1.6782, -0.4688, 0.5368, -0.0461]) tensor([0.0537, 0.1799, 0.4918, 0.2746]) -Greedy action tensor([-1.8939, -0.4025, 0.6504, -0.1287]) tensor([0.0416, 0.1850, 0.5301, 0.2432]) -Greedy action tensor([-0.7798, 0.2458, 0.1128, 0.0023]) tensor([0.1188, 0.3314, 0.2901, 0.2597]) -Greedy action tensor([-1.0869, 0.5490, -0.0765, 0.2424]) tensor([0.0790, 0.4056, 0.2170, 0.2985]) -Greedy action tensor([-1.5489, -0.2047, 0.5724, 0.3367]) tensor([0.0506, 0.1940, 0.4220, 0.3334]) -Greedy action tensor([-1.0580, 0.3213, 0.2644, 0.4495]) tensor([0.0755, 0.3000, 0.2834, 0.3410]) -Greedy action tensor([-1.9007, -0.3816, 0.6497, -0.1435]) tensor([0.0414, 0.1889, 0.5300, 0.2397]) -Greedy action tensor([-0.8615, 0.1114, 0.3156, -0.3510]) tensor([0.1169, 0.3092, 0.3792, 0.1947]) -Greedy action tensor([-1.8683, -0.3901, 0.6235, -0.1504]) tensor([0.0434, 0.1903, 0.5244, 0.2419]) -Greedy action tensor([-1.8961, -0.4218, 0.6624, -0.1302]) tensor([0.0414, 0.1810, 0.5352, 0.2423]) -Greedy action tensor([-1.9226, -0.4098, 0.6558, -0.1681]) tensor([0.0408, 0.1853, 0.5379, 0.2360]) -Greedy action tensor([-1.9153, -0.3964, 0.6473, -0.1575]) tensor([0.0411, 0.1877, 0.5329, 0.2383]) -Greedy action tensor([-1.8096, -0.2671, 0.5735, -0.1149]) tensor([0.0455, 0.2129, 0.4936, 0.2480]) -Greedy action tensor([-1.9339, -0.4517, 0.6687, -0.1707]) tensor([0.0404, 0.1780, 0.5458, 0.2358]) -Greedy action tensor([-1.5630, 0.2690, 0.4418, 0.0779]) tensor([0.0504, 0.3150, 0.3744, 0.2602]) -Greedy action tensor([-1.3782, 0.2344, 0.3250, 0.1670]) tensor([0.0617, 0.3097, 0.3391, 0.2895]) -Greedy action tensor([-1.8540, -0.4383, 0.6916, -0.0867]) tensor([0.0421, 0.1736, 0.5374, 0.2468]) -Greedy action tensor([-1.7063, 0.0381, 0.5338, 0.0511]) tensor([0.0456, 0.2611, 0.4287, 0.2645]) -Greedy action tensor([-1.8759, -0.3602, 0.6315, -0.1350]) tensor([0.0425, 0.1935, 0.5216, 0.2424]) -Greedy action tensor([-1.6799, -0.0564, 0.5286, 0.1531]) tensor([0.0467, 0.2367, 0.4248, 0.2918]) -Greedy action tensor([-1.1473, -0.4834, 0.5940, 0.9750]) tensor([0.0588, 0.1143, 0.3356, 0.4913]) -Greedy action tensor([-1.9092, -0.4448, 0.6501, -0.1686]) tensor([0.0417, 0.1806, 0.5397, 0.2380]) -Greedy action tensor([-1.9295, -0.4173, 0.6588, -0.1721]) tensor([0.0406, 0.1841, 0.5400, 0.2353]) -Greedy action tensor([-0.9559, -0.6285, 0.1775, 0.3576]) tensor([0.1085, 0.1506, 0.3372, 0.4037]) -Greedy action tensor([-1.9388, -0.4396, 0.6627, -0.1763]) tensor([0.0403, 0.1807, 0.5439, 0.2351]) -Greedy action tensor([-1.1994, -0.4409, 0.3905, -0.0463]) tensor([0.0892, 0.1905, 0.4375, 0.2827]) -Greedy action tensor([-1.8715, -0.4520, 0.6343, -0.1391]) tensor([0.0434, 0.1795, 0.5318, 0.2454]) -Greedy action tensor([-1.9114, -0.4495, 0.6514, -0.1671]) tensor([0.0417, 0.1797, 0.5403, 0.2383]) -Greedy action tensor([-1.7889, -0.4895, 0.6072, -0.1027]) tensor([0.0475, 0.1742, 0.5217, 0.2565]) -Greedy action tensor([-1.1820, -0.2412, 0.1752, 0.3754]) tensor([0.0820, 0.2101, 0.3186, 0.3893]) -Greedy action tensor([-1.8345, -0.3622, 0.6159, -0.1071]) tensor([0.0443, 0.1931, 0.5135, 0.2492]) -Greedy action tensor([-1.7741, -0.1018, 0.5738, -0.0630]) tensor([0.0448, 0.2385, 0.4687, 0.2480]) -Greedy action tensor([-1.6044, -0.3812, 0.5352, -0.0426]) tensor([0.0566, 0.1924, 0.4810, 0.2699]) -Greedy action tensor([-1.7153, -0.5346, 0.5755, -0.0305]) tensor([0.0512, 0.1667, 0.5060, 0.2760]) -Greedy action tensor([-1.5408, 0.1660, 0.3591, 0.0552]) tensor([0.0552, 0.3040, 0.3687, 0.2721]) -Greedy action tensor([-1.8810, -0.4001, 0.6293, -0.1516]) tensor([0.0428, 0.1884, 0.5273, 0.2415]) -Greedy action tensor([-1.3716, -0.2286, 0.5778, 0.1278]) tensor([0.0639, 0.2005, 0.4492, 0.2864]) -Greedy action tensor([-1.9363, -0.4144, 0.6575, -0.1740]) tensor([0.0403, 0.1848, 0.5398, 0.2350]) -Greedy action tensor([-1.7108, -0.4941, 0.5762, -0.1563]) tensor([0.0528, 0.1781, 0.5194, 0.2497]) -Greedy action tensor([-1.9068, -0.3716, 0.6402, -0.1535]) tensor([0.0413, 0.1919, 0.5280, 0.2387]) -Greedy action tensor([-1.9089, -0.4390, 0.6685, -0.1565]) tensor([0.0412, 0.1791, 0.5421, 0.2376]) -Greedy action tensor([-1.8908, -0.4520, 0.6428, -0.1553]) tensor([0.0426, 0.1795, 0.5364, 0.2415]) -Greedy action tensor([-1.7800, -0.3404, 0.5695, -0.0807]) tensor([0.0472, 0.1993, 0.4951, 0.2584]) -Greedy action tensor([-1.8231, -0.0756, 0.5476, -0.0950]) tensor([0.0433, 0.2488, 0.4639, 0.2440]) -Greedy action tensor([-1.9059, -0.4097, 0.6498, -0.1577]) tensor([0.0415, 0.1853, 0.5347, 0.2385]) -Greedy action tensor([-1.9096, -0.4257, 0.6487, -0.1616]) tensor([0.0415, 0.1832, 0.5366, 0.2386]) -Greedy action tensor([-0.9593, 0.3634, 0.1903, -0.0051]) tensor([0.0952, 0.3572, 0.3005, 0.2471]) -Greedy action tensor([-1.7491, -0.2120, 0.5491, -0.1156]) tensor([0.0482, 0.2244, 0.4803, 0.2471]) -Greedy action tensor([-1.9433, -0.4528, 0.6644, -0.1796]) tensor([0.0403, 0.1787, 0.5462, 0.2348]) -Greedy action tensor([-1.2490, -0.3873, 1.0104, 1.1024]) tensor([0.0427, 0.1010, 0.4085, 0.4479]) -Greedy action tensor([-1.9311, -0.4441, 0.6599, -0.1739]) tensor([0.0407, 0.1801, 0.5432, 0.2360]) -Greedy action tensor([-1.8005, -0.4136, 0.6266, -0.0785]) tensor([0.0456, 0.1826, 0.5166, 0.2552]) -Greedy action tensor([-1.9060, -0.4534, 0.6529, -0.1588]) tensor([0.0418, 0.1786, 0.5399, 0.2398]) -Greedy action tensor([-1.9125, -0.4137, 0.6546, -0.1604]) tensor([0.0412, 0.1844, 0.5368, 0.2376]) -Greedy action tensor([-1.6350e+00, -4.4064e-01, 6.1067e-01, -3.7509e-04]) tensor([0.0530, 0.1749, 0.5005, 0.2716]) -Greedy action tensor([-1.7370, -0.4968, 0.5590, -0.0275]) tensor([0.0502, 0.1735, 0.4988, 0.2775]) -Greedy action tensor([-1.4791, 0.0234, 0.4042, -0.1057]) tensor([0.0624, 0.2805, 0.4105, 0.2465]) -Greedy action tensor([-1.9199, -0.3915, 0.6513, -0.1557]) tensor([0.0408, 0.1880, 0.5333, 0.2380]) -Greedy action tensor([-1.2812, 0.2861, 0.2553, 0.0154]) tensor([0.0709, 0.3400, 0.3297, 0.2594]) -Greedy action tensor([-1.3027, 0.5489, 0.2674, -0.0444]) tensor([0.0637, 0.4058, 0.3063, 0.2242]) -Greedy action tensor([-1.6322, -0.0903, 0.5844, 0.0608]) tensor([0.0493, 0.2304, 0.4524, 0.2680]) -Greedy action tensor([-1.8616, -0.4325, 0.6123, -0.1266]) tensor([0.0440, 0.1838, 0.5226, 0.2496]) -Greedy action tensor([-0.0046, -0.1618, 0.9358, 1.6441]) tensor([0.1040, 0.0889, 0.2663, 0.5408]) -Greedy action tensor([-1.4511, -0.3246, 0.5413, 0.1814]) tensor([0.0605, 0.1866, 0.4435, 0.3094]) -Greedy action tensor([-1.8107, -0.3345, 0.6766, -0.0654]) tensor([0.0432, 0.1892, 0.5200, 0.2476]) -Greedy action tensor([-0.4584, 0.9407, 0.0367, 0.0585]) tensor([0.1195, 0.4841, 0.1960, 0.2004]) -Greedy action tensor([ 0.4013, -1.6354, 1.3239, -0.1750]) tensor([0.2376, 0.0310, 0.5978, 0.1335]) -Greedy action tensor([-0.4729, -0.8836, -1.0041, 0.2943]) tensor([0.2270, 0.1506, 0.1335, 0.4890]) -Greedy action tensor([-0.2484, -0.8093, 0.1454, -0.6070]) tensor([0.2665, 0.1521, 0.3952, 0.1862]) -Greedy action tensor([-0.0094, -0.5406, -0.4633, -0.2742]) tensor([0.3344, 0.1966, 0.2124, 0.2566]) -Greedy action tensor([ 0.4050, -1.0140, 0.0635, 0.4028]) tensor([0.3389, 0.0820, 0.2409, 0.3382]) -Greedy action tensor([ 1.4445, -0.1401, 0.1265, 0.3516]) tensor([0.5531, 0.1134, 0.1481, 0.1854]) -Greedy action tensor([-0.3514, -0.2602, -0.9856, -0.5494]) tensor([0.2902, 0.3179, 0.1539, 0.2380]) -Greedy action tensor([-1.2844, -0.4680, -0.8551, -0.4948]) tensor([0.1428, 0.3232, 0.2194, 0.3146]) -Greedy action tensor([-1.6618, -0.8490, 0.6332, -1.0729]) tensor([0.0668, 0.1505, 0.6625, 0.1203]) -Greedy action tensor([0.2596, 0.7389, 0.0451, 0.9114]) tensor([0.1872, 0.3024, 0.1511, 0.3593]) -Greedy action tensor([ 0.4142, -0.3087, -0.0374, 0.0603]) tensor([0.3541, 0.1719, 0.2254, 0.2486]) -Greedy action tensor([-0.6505, -0.5526, -0.6531, 0.6141]) tensor([0.1506, 0.1660, 0.1502, 0.5332]) -Greedy action tensor([-1.0470, -1.2140, -0.0253, -1.4402]) tensor([0.1887, 0.1597, 0.5242, 0.1274]) -Greedy action tensor([-0.4258, -0.5199, -0.3141, -0.4644]) tensor([0.2506, 0.2281, 0.2802, 0.2411]) -Greedy action tensor([ 0.5697, -0.5112, -0.3172, -0.3204]) tensor([0.4626, 0.1569, 0.1906, 0.1899]) -Greedy action tensor([ 0.2813, 0.1096, -0.7904, 0.0462]) tensor([0.3361, 0.2831, 0.1151, 0.2657]) -Greedy action tensor([ 0.7241, -0.5525, 0.1088, 0.8929]) tensor([0.3330, 0.0929, 0.1800, 0.3942]) -Greedy action tensor([ 0.6764, -1.3733, 0.2577, 0.6345]) tensor([0.3642, 0.0469, 0.2396, 0.3493]) -Greedy action tensor([-0.1365, 0.4536, -0.1205, -0.7975]) tensor([0.2306, 0.4160, 0.2343, 0.1191]) -Greedy action tensor([-0.3763, -1.2921, 0.1370, 0.5462]) tensor([0.1790, 0.0716, 0.2991, 0.4503]) -Greedy action tensor([ 0.0217, -0.5772, -0.8854, 0.4250]) tensor([0.2899, 0.1593, 0.1170, 0.4339]) -Greedy action tensor([-0.2877, -0.9411, 1.6742, -0.1220]) tensor([0.1019, 0.0530, 0.7248, 0.1203]) -Greedy action tensor([ 0.7701, -0.9995, 0.3330, 1.0442]) tensor([0.3193, 0.0544, 0.2062, 0.4200]) -Greedy action tensor([ 0.3946, 0.2066, -0.0755, 0.0964]) tensor([0.3129, 0.2593, 0.1955, 0.2322]) -Greedy action tensor([ 0.0923, -1.5916, -0.1949, 0.3182]) tensor([0.3135, 0.0582, 0.2353, 0.3930]) -Greedy action tensor([ 0.7144, 0.0380, -0.1195, 0.5002]) tensor([0.3637, 0.1849, 0.1579, 0.2935]) -Greedy action tensor([-0.5100, -0.7359, -0.3684, -0.2010]) tensor([0.2319, 0.1850, 0.2672, 0.3159]) -Greedy action tensor([-0.0222, -0.8493, 0.1643, -0.3560]) tensor([0.2977, 0.1302, 0.3588, 0.2132]) -Greedy action tensor([-0.2430, -0.1800, -0.5076, -0.3533]) tensor([0.2682, 0.2857, 0.2059, 0.2402]) -Greedy action tensor([ 0.5737, 0.2875, -0.1811, 0.6847]) tensor([0.2995, 0.2250, 0.1408, 0.3347]) -Greedy action tensor([ 1.0915, -0.7187, 0.7047, 0.3851]) tensor([0.4280, 0.0700, 0.2907, 0.2112]) -Greedy action tensor([-1.0376, -2.0303, -0.0800, 0.9543]) tensor([0.0885, 0.0328, 0.2305, 0.6483]) -Greedy action tensor([0.5926, 0.3937, 0.9529, 0.0623]) tensor([0.2603, 0.2133, 0.3732, 0.1532]) -Greedy action tensor([ 0.1969, -0.7661, -0.0242, 0.0069]) tensor([0.3322, 0.1268, 0.2663, 0.2747]) -Greedy action tensor([-0.1524, -0.7816, 0.8969, -0.5811]) tensor([0.1984, 0.1058, 0.5666, 0.1292]) -Greedy action tensor([-0.2520, -1.6765, 0.3458, -0.1955]) tensor([0.2429, 0.0585, 0.4416, 0.2570]) -Greedy action tensor([ 0.1075, -0.2537, 0.1024, -0.2820]) tensor([0.2968, 0.2068, 0.2953, 0.2011]) -Greedy action tensor([ 0.0077, 0.5080, -0.3083, -0.4829]) tensor([0.2506, 0.4133, 0.1827, 0.1534]) -Greedy action tensor([ 1.7359, -0.8331, 0.8821, -0.0874]) tensor([0.6010, 0.0460, 0.2559, 0.0971]) -Greedy action tensor([ 0.6579, -1.7202, -0.0021, 0.4470]) tensor([0.4133, 0.0383, 0.2136, 0.3347]) -Greedy action tensor([ 2.1113, -0.3338, 0.5302, 0.7228]) tensor([0.6485, 0.0562, 0.1334, 0.1618]) -Greedy action tensor([-0.2553, -0.2199, 0.3978, -0.0542]) tensor([0.1930, 0.2000, 0.3709, 0.2360]) -Greedy action tensor([-0.1359, -0.5644, 0.3611, -0.1167]) tensor([0.2318, 0.1510, 0.3810, 0.2363]) -Greedy action tensor([-0.4650, -0.8931, 0.1511, -0.0803]) tensor([0.2011, 0.1311, 0.3724, 0.2955]) -Greedy action tensor([ 0.7618, 0.3078, 0.9547, -0.5826]) tensor([0.3217, 0.2043, 0.3901, 0.0839]) -Greedy action tensor([ 0.8474, -0.3302, -0.0640, 0.1890]) tensor([0.4489, 0.1383, 0.1804, 0.2324]) -Greedy action tensor([-0.7212, -0.2654, 0.3805, -0.7268]) tensor([0.1520, 0.2397, 0.4573, 0.1511]) -Greedy action tensor([ 0.3392, 0.3726, -0.5877, 0.6910]) tensor([0.2596, 0.2685, 0.1028, 0.3691]) -Greedy action tensor([1.3817, 0.6371, 0.1894, 0.8669]) tensor([0.4209, 0.1999, 0.1277, 0.2515]) -Greedy action tensor([ 0.9176, -0.6082, -0.6443, 0.3655]) tensor([0.4993, 0.1086, 0.1047, 0.2874]) -Greedy action tensor([ 0.9827, -0.1146, 0.1401, 1.0818]) tensor([0.3486, 0.1164, 0.1501, 0.3849]) -Greedy action tensor([ 0.7009, 0.2061, 0.5070, -0.7077]) tensor([0.3734, 0.2277, 0.3076, 0.0913]) -Greedy action tensor([ 0.6739, -1.2864, 0.1651, 0.8390]) tensor([0.3423, 0.0482, 0.2058, 0.4037]) -Greedy action tensor([ 1.0502, -0.0936, 1.0042, -0.3886]) tensor([0.3983, 0.1269, 0.3804, 0.0945]) -Greedy action tensor([-0.1183, 0.0512, 0.3143, -0.1039]) tensor([0.2109, 0.2499, 0.3251, 0.2140]) -Greedy action tensor([ 0.1951, -1.7429, 1.5729, -0.3533]) tensor([0.1758, 0.0253, 0.6973, 0.1016]) -Greedy action tensor([ 0.5485, -1.6792, 0.5372, 0.1416]) tensor([0.3620, 0.0390, 0.3580, 0.2410]) -Greedy action tensor([ 0.2732, -1.0124, 2.0188, 0.6241]) tensor([0.1187, 0.0328, 0.6799, 0.1686]) -Greedy action tensor([ 1.1589, -1.1530, 0.2313, 0.8459]) tensor([0.4493, 0.0445, 0.1777, 0.3285]) -Greedy action tensor([-0.3797, -0.2686, -0.5208, -0.8740]) tensor([0.2781, 0.3108, 0.2415, 0.1696]) -Greedy action tensor([-0.2241, 0.7547, -0.4489, -0.7696]) tensor([0.1984, 0.5281, 0.1585, 0.1150]) -Greedy action tensor([-1.1631, 0.9039, -0.2130, -0.7809]) tensor([0.0772, 0.6100, 0.1996, 0.1131]) -Greedy action tensor([-1.5097, -1.3487, 0.4982, -0.7736]) tensor([0.0854, 0.1003, 0.6360, 0.1783]) -Greedy action tensor([-1.8342, -0.3850, -0.3978, 0.4851]) tensor([0.0509, 0.2170, 0.2142, 0.5179]) -Greedy action tensor([-0.1527, 0.1954, -0.8277, -0.3777]) tensor([0.2685, 0.3803, 0.1367, 0.2144]) -Greedy action tensor([-0.3116, -0.2377, -1.0527, -0.0944]) tensor([0.2634, 0.2837, 0.1256, 0.3274]) -Greedy action tensor([-1.5589, 0.3034, 0.0970, -0.1684]) tensor([0.0599, 0.3857, 0.3138, 0.2406]) -Greedy action tensor([ 0.0020, -0.2112, 0.3306, -0.3934]) tensor([0.2584, 0.2088, 0.3589, 0.1740]) -Greedy action tensor([ 1.2328, -0.0732, 0.1924, -0.7091]) tensor([0.5657, 0.1533, 0.1999, 0.0811]) -Greedy action tensor([-1.5700, -0.3879, -0.7435, 0.0943]) tensor([0.0845, 0.2757, 0.1932, 0.4465]) -Greedy action tensor([ 0.6154, -1.5034, 0.0502, 0.6758]) tensor([0.3636, 0.0437, 0.2066, 0.3862]) -Greedy action tensor([-1.1496, -1.5167, 0.1803, -0.7260]) tensor([0.1428, 0.0989, 0.5400, 0.2182]) -Greedy action tensor([ 0.3168, -0.0375, -0.7894, -0.6322]) tensor([0.4133, 0.2900, 0.1367, 0.1600]) -Greedy action tensor([-0.0498, -0.0503, 0.7088, -0.4083]) tensor([0.2069, 0.2068, 0.4418, 0.1446]) -Greedy action tensor([ 0.7506, -0.6107, -0.1082, 0.4211]) tensor([0.4168, 0.1068, 0.1766, 0.2998]) -Greedy action tensor([-0.1034, -1.6166, -0.5259, 1.1310]) tensor([0.1883, 0.0415, 0.1234, 0.6469]) -Greedy action tensor([ 0.7080, -0.4168, 0.5323, -0.7400]) tensor([0.4169, 0.1354, 0.3497, 0.0980]) -Greedy action tensor([ 0.2680, -0.6195, -1.4196, 0.2633]) tensor([0.3858, 0.1588, 0.0714, 0.3840]) -Greedy action tensor([0.0567, 0.8084, 0.8142, 0.0512]) tensor([0.1600, 0.3394, 0.3414, 0.1592]) -Greedy action tensor([ 1.0040, -1.2099, -0.1610, 0.2792]) tensor([0.5248, 0.0573, 0.1637, 0.2542]) -Greedy action tensor([-1.2157, -1.6234, 1.4612, -1.4871]) tensor([0.0589, 0.0392, 0.8569, 0.0449]) -Greedy action tensor([ 0.0926, 0.1575, -0.1282, 0.1489]) tensor([0.2547, 0.2717, 0.2042, 0.2694]) -Greedy action tensor([ 1.6732, 0.2502, -0.1889, -0.1139]) tensor([0.6395, 0.1541, 0.0993, 0.1071]) -Greedy action tensor([ 1.7637, -0.5429, -0.3412, 0.4357]) tensor([0.6727, 0.0670, 0.0820, 0.1783]) -Greedy action tensor([ 0.9602, -0.7068, 0.0199, -0.0608]) tensor([0.5156, 0.0974, 0.2013, 0.1857]) -Greedy action tensor([ 2.1808, -0.9620, -0.1284, 0.8067]) tensor([0.7165, 0.0309, 0.0712, 0.1813]) -Greedy action tensor([ 1.5491, -0.4491, -0.1691, 0.3007]) tensor([0.6242, 0.0846, 0.1120, 0.1791]) -Greedy action tensor([ 1.6136, -0.3639, -0.1139, 0.2769]) tensor([0.6334, 0.0877, 0.1126, 0.1664]) -Greedy action tensor([ 1.7317, -1.3551, 0.1311, 0.1500]) tensor([0.6882, 0.0314, 0.1389, 0.1415]) -Greedy action tensor([ 1.1874, -0.4093, -0.1346, -0.0759]) tensor([0.5708, 0.1156, 0.1522, 0.1614]) -Greedy action tensor([ 1.1773, 0.0329, -0.6426, 0.5363]) tensor([0.4982, 0.1586, 0.0807, 0.2624]) -Greedy action tensor([ 1.0014, 0.0536, -0.7241, 0.1621]) tensor([0.5006, 0.1940, 0.0891, 0.2163]) -Greedy action tensor([ 1.1475, 0.1184, -0.4008, -0.1329]) tensor([0.5412, 0.1934, 0.1151, 0.1504]) -Greedy action tensor([ 1.6796, -0.7294, -0.7748, 0.2873]) tensor([0.7021, 0.0631, 0.0603, 0.1745]) -Greedy action tensor([ 1.3658, -0.0828, -0.1847, 0.3912]) tensor([0.5481, 0.1288, 0.1163, 0.2068]) -Greedy action tensor([ 1.7426, -0.3711, -0.4523, 0.0413]) tensor([0.7069, 0.0854, 0.0787, 0.1290]) -Greedy action tensor([ 0.8707, -0.2035, -0.4468, 0.5270]) tensor([0.4313, 0.1473, 0.1155, 0.3059]) -Greedy action tensor([ 1.4371, -0.4247, -0.5764, 0.0531]) tensor([0.6496, 0.1009, 0.0867, 0.1628]) -Greedy action tensor([ 1.4114, -0.3028, -0.2934, 0.3291]) tensor([0.5880, 0.1059, 0.1069, 0.1992]) -Greedy action tensor([ 1.1263, -0.1940, -0.4583, 0.4420]) tensor([0.5059, 0.1351, 0.1037, 0.2552]) -Greedy action tensor([ 1.2425, -0.2612, -0.0203, 0.0976]) tensor([0.5484, 0.1219, 0.1551, 0.1745]) -Greedy action tensor([ 1.6616, -0.5837, -0.0937, 0.2955]) tensor([0.6520, 0.0690, 0.1127, 0.1663]) -Greedy action tensor([ 1.4723, -0.0863, -0.2693, 0.5778]) tensor([0.5573, 0.1173, 0.0977, 0.2278]) -Greedy action tensor([ 1.3833, -0.3660, -0.7035, 0.4057]) tensor([0.5973, 0.1039, 0.0741, 0.2247]) -Greedy action tensor([ 1.3215, -0.2075, -0.1173, 0.1370]) tensor([0.5682, 0.1232, 0.1348, 0.1738]) -Greedy action tensor([ 1.0714, -0.4961, -0.0998, 0.2682]) tensor([0.5085, 0.1061, 0.1576, 0.2278]) -Greedy action tensor([ 1.0477, -0.8316, -0.0273, 0.2138]) tensor([0.5186, 0.0792, 0.1770, 0.2253]) -Greedy action tensor([ 1.8751, -0.8496, -0.1588, -0.0877]) tensor([0.7480, 0.0490, 0.0979, 0.1051]) -Greedy action tensor([ 2.6592, -1.3589, -0.2677, 0.6657]) tensor([0.8280, 0.0149, 0.0443, 0.1128]) -Greedy action tensor([ 1.8211, -0.3864, -0.3480, 0.4504]) tensor([0.6765, 0.0744, 0.0773, 0.1718]) -Greedy action tensor([ 1.5016, -1.0337, 0.0595, 0.3445]) tensor([0.6135, 0.0486, 0.1450, 0.1929]) -Greedy action tensor([ 1.5552, -0.6473, -0.5444, 0.6558]) tensor([0.6098, 0.0674, 0.0747, 0.2481]) -Greedy action tensor([ 1.4885, -0.5139, -0.4617, 0.3246]) tensor([0.6291, 0.0849, 0.0895, 0.1965]) -Greedy action tensor([ 1.5631, -0.9792, -0.5012, 0.7079]) tensor([0.6132, 0.0482, 0.0778, 0.2607]) -Greedy action tensor([ 1.8623, -0.3628, -0.7365, 0.7010]) tensor([0.6687, 0.0723, 0.0497, 0.2093]) -Greedy action tensor([ 1.6382, -0.0960, -0.7711, 0.1651]) tensor([0.6686, 0.1180, 0.0601, 0.1533]) -Greedy action tensor([ 1.2606, -0.6913, -0.0259, 0.0728]) tensor([0.5803, 0.0824, 0.1603, 0.1769]) -Greedy action tensor([ 1.5280, -0.4260, -0.0598, 0.1753]) tensor([0.6232, 0.0883, 0.1274, 0.1611]) -Greedy action tensor([ 1.4576, -0.2079, -0.5162, 1.0213]) tensor([0.5065, 0.0958, 0.0704, 0.3274]) -Greedy action tensor([ 2.0553, -0.6392, -0.3386, 0.1879]) tensor([0.7614, 0.0515, 0.0695, 0.1177]) -Greedy action tensor([ 2.0662e+00, -1.1843e+00, -1.2370e-03, 7.3738e-01]) tensor([0.6993, 0.0271, 0.0885, 0.1852]) -Greedy action tensor([ 1.5447, -0.6268, -0.0633, 0.5687]) tensor([0.5913, 0.0674, 0.1184, 0.2228]) -Greedy action tensor([ 2.5970, -0.8050, -0.3367, 0.6383]) tensor([0.8146, 0.0271, 0.0433, 0.1149]) -Greedy action tensor([ 0.5309, -0.2140, -0.1314, 0.1336]) tensor([0.3756, 0.1783, 0.1937, 0.2524]) -Greedy action tensor([ 1.4268, 0.1805, -0.4252, 0.5075]) tensor([0.5425, 0.1560, 0.0851, 0.2164]) -Greedy action tensor([ 1.6251, -0.0418, -0.2150, 0.1713]) tensor([0.6324, 0.1194, 0.1004, 0.1478]) -Greedy action tensor([ 0.4308, -0.1000, -0.4048, 0.5363]) tensor([0.3192, 0.1877, 0.1384, 0.3547]) -Greedy action tensor([ 1.3146, -0.3902, -0.4349, 0.3532]) tensor([0.5754, 0.1046, 0.1000, 0.2200]) -Greedy action tensor([ 1.4403, 0.0080, -0.7056, 0.5956]) tensor([0.5601, 0.1337, 0.0655, 0.2407]) -Greedy action tensor([ 1.1528, 0.0795, -0.1479, 0.0552]) tensor([0.5134, 0.1755, 0.1398, 0.1713]) -Greedy action tensor([ 1.4149, -0.3176, -0.5134, 0.0884]) tensor([0.6299, 0.1114, 0.0916, 0.1672]) -Greedy action tensor([ 1.7063, -0.1580, -1.3160, 0.4303]) tensor([0.6744, 0.1045, 0.0328, 0.1883]) -Greedy action tensor([ 1.5942, -0.5209, 0.0976, 0.5441]) tensor([0.5902, 0.0712, 0.1321, 0.2065]) -Greedy action tensor([ 1.5151, -0.0387, -0.6267, 0.3245]) tensor([0.6124, 0.1295, 0.0719, 0.1862]) -Greedy action tensor([ 1.9548, -0.6014, -0.2528, 0.6349]) tensor([0.6874, 0.0533, 0.0756, 0.1836]) -Greedy action tensor([ 1.3334, -0.4954, -0.5329, 0.6696]) tensor([0.5464, 0.0878, 0.0845, 0.2813]) -Greedy action tensor([ 1.1253, -0.3320, -0.1115, 0.0674]) tensor([0.5347, 0.1245, 0.1552, 0.1856]) -Greedy action tensor([ 1.6757, 0.1802, 0.1040, -0.1254]) tensor([0.6262, 0.1404, 0.1301, 0.1034]) -Greedy action tensor([ 1.6914, 0.0039, -0.7104, 0.1397]) tensor([0.6723, 0.1244, 0.0609, 0.1424]) -Greedy action tensor([ 2.1995, -0.8005, -0.3739, 0.6973]) tensor([0.7414, 0.0369, 0.0566, 0.1651]) -Greedy action tensor([ 2.1393, -0.3329, -0.6653, 0.1810]) tensor([0.7776, 0.0656, 0.0471, 0.1097]) -Greedy action tensor([ 1.5687, -0.5015, -0.9952, 0.6637]) tensor([0.6220, 0.0785, 0.0479, 0.2516]) -Greedy action tensor([ 1.2797, -0.6970, -0.2144, 0.5578]) tensor([0.5409, 0.0749, 0.1214, 0.2628]) -Greedy action tensor([ 1.5627, -0.6680, -0.0427, 0.3558]) tensor([0.6221, 0.0668, 0.1249, 0.1861]) -Greedy action tensor([ 1.5543, -1.0307, -0.2346, 0.6328]) tensor([0.6096, 0.0460, 0.1019, 0.2426]) -Greedy action tensor([1.6310, 0.5496, 0.0635, 0.3904]) tensor([0.5444, 0.1846, 0.1135, 0.1574]) -Greedy action tensor([ 1.3689, -0.1928, -0.2892, 0.1182]) tensor([0.5929, 0.1244, 0.1130, 0.1698]) -Greedy action tensor([ 1.5716, -0.6151, -0.3670, 0.1487]) tensor([0.6679, 0.0750, 0.0961, 0.1610]) -Greedy action tensor([ 2.1098, -0.7457, -0.8068, 0.6811]) tensor([0.7401, 0.0426, 0.0400, 0.1773]) -Greedy action tensor([ 1.2384, -0.1021, -0.3099, 0.1116]) tensor([0.5561, 0.1455, 0.1182, 0.1802]) -Greedy action tensor([ 1.8580, -0.0262, -0.1534, 0.2552]) tensor([0.6725, 0.1022, 0.0900, 0.1354]) -Greedy action tensor([ 1.6003, -0.3622, -0.6851, 0.2987]) tensor([0.6604, 0.0928, 0.0672, 0.1797]) -Greedy action tensor([ 1.2586, -0.5841, -0.1682, 0.0891]) tensor([0.5852, 0.0927, 0.1405, 0.1817]) -Greedy action tensor([ 1.1281, -0.1025, -0.3377, 0.2823]) tensor([0.5122, 0.1496, 0.1183, 0.2199]) -Greedy action tensor([ 1.3377, -0.7421, -0.1924, -0.0411]) tensor([0.6276, 0.0784, 0.1359, 0.1581]) -Greedy action tensor([ 1.2849, -0.5090, -0.5262, 0.2983]) tensor([0.5873, 0.0977, 0.0960, 0.2190]) -Greedy action tensor([ 2.2216, -1.5431, 0.0314, 0.3073]) tensor([0.7797, 0.0181, 0.0872, 0.1150]) -Greedy action tensor([ 1.6759, -0.2246, -0.4887, 0.0154]) tensor([0.6876, 0.1028, 0.0789, 0.1307]) -Greedy action tensor([ 1.7404, -0.5927, -0.3625, 0.4356]) tensor([0.6710, 0.0651, 0.0819, 0.1820]) -Greedy action tensor([ 1.7878, -0.5551, -0.2621, 0.3207]) tensor([0.6871, 0.0660, 0.0885, 0.1584]) -Greedy action tensor([ 2.2980, -0.4021, -0.7869, 0.2276]) tensor([0.8071, 0.0542, 0.0369, 0.1018]) -Greedy action tensor([ 1.5318, -0.7161, -0.4422, -0.0488]) tensor([0.6895, 0.0728, 0.0958, 0.1419]) -Greedy action tensor([ 1.5404, 0.1817, -0.2821, 0.2074]) tensor([0.5944, 0.1528, 0.0961, 0.1567]) -Greedy action tensor([ 0.5899, -0.2807, 0.0420, -0.1327]) tensor([0.4029, 0.1687, 0.2329, 0.1956]) -Greedy action tensor([ 0.1675, 0.0754, -0.1263, -0.5033]) tensor([0.3156, 0.2878, 0.2352, 0.1614]) -Greedy action tensor([ 0.4529, -0.3579, -0.0103, -0.1305]) tensor([0.3800, 0.1689, 0.2391, 0.2120]) -Greedy action tensor([ 0.6274, 0.2003, 0.0122, -0.1036]) tensor([0.3739, 0.2440, 0.2021, 0.1800]) -Greedy action tensor([ 1.0797, -0.5269, -0.0979, -0.3572]) tensor([0.5727, 0.1149, 0.1764, 0.1361]) -Greedy action tensor([ 0.7065, -0.4881, -0.0386, -0.4918]) tensor([0.4809, 0.1456, 0.2283, 0.1451]) -Greedy action tensor([ 0.7725, -0.7647, -0.1214, -0.1277]) tensor([0.4925, 0.1059, 0.2014, 0.2002]) -Greedy action tensor([ 0.7972, -0.6398, -0.0919, -0.4400]) tensor([0.5158, 0.1226, 0.2120, 0.1497]) -Greedy action tensor([ 0.6844, -0.5820, -0.1181, -0.2543]) tensor([0.4714, 0.1329, 0.2113, 0.1844]) -Greedy action tensor([ 0.6023, -0.4842, -0.0893, -0.2657]) tensor([0.4429, 0.1494, 0.2218, 0.1859]) -Greedy action tensor([ 0.8053, -0.4591, -0.1150, -0.4319]) tensor([0.5074, 0.1433, 0.2021, 0.1472]) -Greedy action tensor([ 0.4973, -0.2853, -0.1105, -0.1246]) tensor([0.3939, 0.1801, 0.2145, 0.2115]) -Greedy action tensor([ 0.8452, -0.6275, -0.0739, -0.2978]) tensor([0.5136, 0.1178, 0.2049, 0.1638]) -Greedy action tensor([ 1.2957, -0.3019, -0.3569, -0.3647]) tensor([0.6313, 0.1278, 0.1209, 0.1200]) -Greedy action tensor([ 1.1372, -1.0145, 0.1594, -0.5999]) tensor([0.5993, 0.0697, 0.2255, 0.1055]) -Greedy action tensor([ 0.8206, -0.0330, -0.1588, -0.0492]) tensor([0.4503, 0.1918, 0.1691, 0.1887]) -Greedy action tensor([ 0.6300, -0.0945, -0.0197, 0.0370]) tensor([0.3907, 0.1893, 0.2040, 0.2159]) -Greedy action tensor([ 0.7313, -0.3513, 0.0150, -0.2817]) tensor([0.4565, 0.1546, 0.2230, 0.1658]) -Greedy action tensor([ 0.5883, -0.2503, 0.0128, -0.0313]) tensor([0.3948, 0.1707, 0.2220, 0.2125]) -Greedy action tensor([ 0.5005, 0.0121, -0.0545, 0.0542]) tensor([0.3536, 0.2170, 0.2030, 0.2263]) -Greedy action tensor([ 0.7996, -0.4315, -0.0438, -0.2870]) tensor([0.4855, 0.1418, 0.2089, 0.1638]) -Greedy action tensor([ 0.2652, 0.6352, -0.1654, -0.1656]) tensor([0.2668, 0.3863, 0.1735, 0.1734]) -Greedy action tensor([ 0.7676, -0.0854, -0.1010, 0.0878]) tensor([0.4251, 0.1812, 0.1783, 0.2154]) -Greedy action tensor([ 0.8722, -0.5723, 0.0777, -0.2050]) tensor([0.4931, 0.1163, 0.2227, 0.1679]) -Greedy action tensor([0.2493, 0.2170, 0.1715, 0.3531]) tensor([0.2498, 0.2419, 0.2311, 0.2772]) -Greedy action tensor([ 0.9549, -0.3946, 0.1243, -0.6715]) tensor([0.5286, 0.1371, 0.2304, 0.1039]) -Greedy action tensor([ 0.3033, -0.3506, -0.0523, -0.1571]) tensor([0.3506, 0.1823, 0.2457, 0.2213]) -Greedy action tensor([ 0.7764, -0.3027, -0.0224, -0.0745]) tensor([0.4511, 0.1533, 0.2029, 0.1926]) -Greedy action tensor([ 0.7368, -0.1280, -0.2268, -0.0053]) tensor([0.4388, 0.1848, 0.1674, 0.2089]) -Greedy action tensor([ 0.7468, -0.4105, -0.1865, -0.2864]) tensor([0.4846, 0.1523, 0.1906, 0.1725]) -Greedy action tensor([ 0.9632, -0.7838, 0.0232, -0.5157]) tensor([0.5578, 0.0972, 0.2179, 0.1271]) -Greedy action tensor([ 0.8209, -0.6496, 0.0411, -0.3777]) tensor([0.5025, 0.1155, 0.2304, 0.1516]) -Greedy action tensor([ 0.5442, -0.1136, -0.1281, 0.0545]) tensor([0.3786, 0.1961, 0.1933, 0.2320]) -Greedy action tensor([ 0.5531, 0.1235, -0.0327, 0.1008]) tensor([0.3517, 0.2288, 0.1958, 0.2237]) -Greedy action tensor([ 0.8403, -0.4864, 0.0450, -0.2086]) tensor([0.4838, 0.1284, 0.2184, 0.1695]) -Greedy action tensor([ 0.8672, -0.8573, 0.0608, -0.3627]) tensor([0.5216, 0.0930, 0.2329, 0.1525]) -Greedy action tensor([ 0.3627, -0.1754, -0.0401, -0.2939]) tensor([0.3609, 0.2107, 0.2412, 0.1872]) -Greedy action tensor([ 0.9794, -0.3338, -0.0259, -0.6529]) tensor([0.5463, 0.1469, 0.1999, 0.1068]) -Greedy action tensor([ 0.4699, -0.3354, -0.3785, -0.6306]) tensor([0.4529, 0.2025, 0.1939, 0.1507]) -Greedy action tensor([ 0.4441, -0.1038, -0.1254, -0.2879]) tensor([0.3810, 0.2203, 0.2155, 0.1832]) -Greedy action tensor([ 0.6098, -0.3339, -0.0223, -0.3753]) tensor([0.4359, 0.1697, 0.2317, 0.1628]) -Greedy action tensor([ 0.5721, -0.4288, -0.0727, -0.4664]) tensor([0.4452, 0.1636, 0.2336, 0.1576]) -Greedy action tensor([ 0.5847, -0.4964, -0.1608, -0.0579]) tensor([0.4274, 0.1450, 0.2028, 0.2248]) -Greedy action tensor([ 0.6510, -0.5240, -0.0499, -0.1490]) tensor([0.4436, 0.1370, 0.2201, 0.1993]) -Greedy action tensor([ 0.9972, -0.6418, 0.0573, -0.4691]) tensor([0.5508, 0.1069, 0.2152, 0.1271]) -Greedy action tensor([ 0.4161, -0.0868, -0.0813, -0.2294]) tensor([0.3653, 0.2209, 0.2222, 0.1916]) -Greedy action tensor([ 0.5429, -0.2868, -0.1025, -0.1958]) tensor([0.4101, 0.1789, 0.2151, 0.1959]) -Greedy action tensor([ 1.3441, -1.0264, 0.1553, -0.7679]) tensor([0.6583, 0.0615, 0.2005, 0.0797]) -Greedy action tensor([ 0.6579, -0.5773, 0.0163, -0.3696]) tensor([0.4597, 0.1337, 0.2420, 0.1645]) -Greedy action tensor([ 0.5349, -0.0597, -0.1377, 0.0290]) tensor([0.3752, 0.2070, 0.1915, 0.2262]) -Greedy action tensor([ 0.7991, -0.3656, -0.0211, -0.1854]) tensor([0.4704, 0.1468, 0.2071, 0.1757]) -Greedy action tensor([ 0.6428, -0.3825, -0.0947, -0.2285]) tensor([0.4434, 0.1590, 0.2121, 0.1855]) -Greedy action tensor([ 0.2166, -0.2688, 0.0290, -0.3535]) tensor([0.3322, 0.2045, 0.2754, 0.1879]) -Greedy action tensor([ 0.1888, -0.1711, -0.0564, -0.1114]) tensor([0.3105, 0.2166, 0.2430, 0.2300]) -Greedy action tensor([ 0.7252, -0.2340, -0.2204, -0.3594]) tensor([0.4740, 0.1816, 0.1841, 0.1602]) -Greedy action tensor([ 1.0197, -0.5776, -0.1479, -0.3116]) tensor([0.5625, 0.1139, 0.1750, 0.1486]) -Greedy action tensor([ 0.9218, -0.4929, -0.2078, -0.4108]) tensor([0.5465, 0.1328, 0.1766, 0.1441]) -Greedy action tensor([ 0.9842, -0.6934, -0.1301, -0.3436]) tensor([0.5618, 0.1050, 0.1844, 0.1489]) -Greedy action tensor([ 0.5941, -0.3264, -0.1112, -0.0465]) tensor([0.4133, 0.1646, 0.2042, 0.2178]) -Greedy action tensor([ 0.9491, -0.8843, 0.0976, -0.4529]) tensor([0.5456, 0.0872, 0.2329, 0.1343]) -Greedy action tensor([ 0.5165, -0.5459, -0.1540, -0.0615]) tensor([0.4135, 0.1429, 0.2115, 0.2320]) -Greedy action tensor([ 1.0597, -0.6437, -0.1707, -0.3820]) tensor([0.5845, 0.1064, 0.1708, 0.1383]) -Greedy action tensor([ 0.4832, -0.1118, -0.1220, -0.0599]) tensor([0.3733, 0.2059, 0.2038, 0.2169]) -Greedy action tensor([ 0.1572, 0.2382, -0.1780, -0.0076]) tensor([0.2742, 0.2973, 0.1961, 0.2325]) -Greedy action tensor([ 0.7386, -0.3660, 0.0206, -0.3330]) tensor([0.4626, 0.1533, 0.2256, 0.1584]) -Greedy action tensor([ 0.5219, 0.2386, 0.2308, -0.4144]) tensor([0.3457, 0.2604, 0.2584, 0.1355]) -Greedy action tensor([0.0777, 0.3319, 0.0586, 0.0128]) tensor([0.2377, 0.3064, 0.2332, 0.2227]) -Greedy action tensor([ 0.4763, -0.1443, -0.0533, -0.3031]) tensor([0.3868, 0.2080, 0.2278, 0.1774]) -Greedy action tensor([ 0.9099, 0.0135, -0.2833, -0.4660]) tensor([0.5092, 0.2078, 0.1544, 0.1286]) -Greedy action tensor([ 0.9993, -0.5483, 0.0936, -0.3087]) tensor([0.5298, 0.1127, 0.2142, 0.1432]) -Greedy action tensor([ 0.6309, -0.4591, -0.1378, -0.1124]) tensor([0.4395, 0.1478, 0.2038, 0.2090]) -Greedy action tensor([ 1.5472, -0.8628, -0.1168, -0.6554]) tensor([0.7196, 0.0646, 0.1363, 0.0795]) -Greedy action tensor([ 0.8800, -0.5231, -0.0626, -0.5752]) tensor([0.5351, 0.1315, 0.2085, 0.1249]) -Greedy action tensor([ 0.4736, -0.1095, -0.1148, -0.3785]) tensor([0.3937, 0.2198, 0.2186, 0.1679]) -Greedy action tensor([ 0.9641, -0.8398, 0.0635, -0.5710]) tensor([0.5598, 0.0922, 0.2275, 0.1206]) -Greedy action tensor([ 0.8918, -0.5825, -0.1289, -0.3832]) tensor([0.5351, 0.1225, 0.1928, 0.1495]) -Greedy action tensor([ 1.0469, -0.7607, -0.0454, -0.5369]) tensor([0.5866, 0.0962, 0.1968, 0.1204]) -Greedy action tensor([ 1.2805, -0.7667, 0.1748, -0.9192]) tensor([0.6366, 0.0822, 0.2107, 0.0706]) -Greedy action tensor([ 0.8127, -0.2711, -0.0103, -0.1047]) tensor([0.4594, 0.1554, 0.2017, 0.1835]) -Greedy action tensor([ 0.6780, -0.3922, -0.1002, -0.2135]) tensor([0.4520, 0.1550, 0.2076, 0.1853]) -Greedy action tensor([ 0.5788, -0.4176, 0.1979, -0.4986]) tensor([0.4179, 0.1543, 0.2855, 0.1423]) -Greedy action tensor([ 0.8415, -0.9509, -0.0394, -0.3771]) tensor([0.5329, 0.0888, 0.2208, 0.1575]) -Greedy action tensor([-1.8969, -0.4524, 0.6631, -0.1284]) tensor([0.0416, 0.1764, 0.5381, 0.2439]) -Greedy action tensor([-1.6803, -0.5266, 0.7283, 0.3145]) tensor([0.0442, 0.1400, 0.4911, 0.3247]) -Greedy action tensor([-1.9283, -0.4522, 0.6543, -0.1785]) tensor([0.0410, 0.1796, 0.5431, 0.2362]) -Greedy action tensor([-1.9083, -0.4050, 0.6495, -0.1586]) tensor([0.0414, 0.1861, 0.5343, 0.2381]) -Greedy action tensor([-1.9261, -0.4065, 0.6528, -0.1673]) tensor([0.0407, 0.1861, 0.5368, 0.2364]) -Greedy action tensor([-1.6177, 0.1178, 0.4114, 0.0502]) tensor([0.0511, 0.2897, 0.3885, 0.2707]) -Greedy action tensor([-1.9348, -0.4463, 0.6666, -0.1732]) tensor([0.0404, 0.1791, 0.5451, 0.2354]) -Greedy action tensor([-1.6084, -0.6511, 0.1746, -0.1822]) tensor([0.0729, 0.1899, 0.4337, 0.3035]) -Greedy action tensor([-1.9203, -0.3900, 0.6495, -0.1671]) tensor([0.0409, 0.1889, 0.5341, 0.2361]) -Greedy action tensor([-0.7426, 0.3129, 0.2633, 0.5628]) tensor([0.0971, 0.2791, 0.2655, 0.3583]) -Greedy action tensor([-1.9340, -0.4329, 0.6600, -0.1750]) tensor([0.0405, 0.1818, 0.5423, 0.2353]) -Greedy action tensor([-1.6138, 0.3035, 0.4004, 0.0425]) tensor([0.0487, 0.3312, 0.3649, 0.2551]) -Greedy action tensor([-1.7450, -0.3909, 0.5762, -0.0550]) tensor([0.0488, 0.1891, 0.4974, 0.2646]) -Greedy action tensor([-1.9168, -0.2870, 0.6281, -0.1652]) tensor([0.0406, 0.2074, 0.5178, 0.2342]) -Greedy action tensor([-1.9066, -0.4221, 0.6460, -0.1565]) tensor([0.0417, 0.1838, 0.5348, 0.2397]) -Greedy action tensor([-1.8598, -0.4187, 0.6296, -0.1298]) tensor([0.0436, 0.1844, 0.5259, 0.2461]) -Greedy action tensor([-1.7132, -0.3430, 0.5353, -0.0947]) tensor([0.0514, 0.2023, 0.4869, 0.2593]) -Greedy action tensor([-1.8045, -0.4495, 0.5728, -0.1018]) tensor([0.0473, 0.1834, 0.5097, 0.2596]) -Greedy action tensor([-1.7306, -0.4349, 0.5567, -0.0836]) tensor([0.0508, 0.1855, 0.5001, 0.2636]) -Greedy action tensor([-0.2036, -0.2449, 0.9795, 1.5627]) tensor([0.0903, 0.0867, 0.2948, 0.5282]) -Greedy action tensor([-1.1989, -0.6132, 0.5043, 0.6894]) tensor([0.0671, 0.1206, 0.3686, 0.4436]) -Greedy action tensor([-1.8799, -0.3772, 0.6455, -0.1264]) tensor([0.0421, 0.1891, 0.5258, 0.2430]) -Greedy action tensor([0.0178, 0.9731, 0.3959, 0.7763]) tensor([0.1390, 0.3613, 0.2029, 0.2968]) -Greedy action tensor([-1.8355, -0.3531, 0.6243, -0.1156]) tensor([0.0441, 0.1941, 0.5158, 0.2461]) -Greedy action tensor([-1.4764, -0.5405, 0.2679, -0.1180]) tensor([0.0760, 0.1937, 0.4347, 0.2956]) -Greedy action tensor([-1.8432, -0.4143, 0.6081, -0.1216]) tensor([0.0447, 0.1866, 0.5187, 0.2500]) -Greedy action tensor([-1.7176, -0.2549, 0.6622, -0.3791]) tensor([0.0502, 0.2166, 0.5419, 0.1913]) -Greedy action tensor([-1.8281, -0.3699, 0.6776, -0.0720]) tensor([0.0428, 0.1842, 0.5249, 0.2481]) -Greedy action tensor([-1.9051, -0.3655, 0.6420, -0.1581]) tensor([0.0414, 0.1929, 0.5283, 0.2374]) -Greedy action tensor([-1.9050, -0.4467, 0.6505, -0.1585]) tensor([0.0418, 0.1798, 0.5386, 0.2398]) -Greedy action tensor([-1.8252, -0.2781, 0.6348, -0.0936]) tensor([0.0434, 0.2038, 0.5078, 0.2451]) -Greedy action tensor([-1.8091, 0.1742, 0.4999, -0.0356]) tensor([0.0413, 0.3000, 0.4155, 0.2432]) -Greedy action tensor([-1.7871, -0.3509, 0.5658, -0.0502]) tensor([0.0467, 0.1965, 0.4914, 0.2654]) -Greedy action tensor([-1.3861, 0.2643, 0.2624, 0.0805]) tensor([0.0635, 0.3309, 0.3303, 0.2753]) -Greedy action tensor([-1.8379, -0.3239, 0.6015, -0.1212]) tensor([0.0443, 0.2013, 0.5079, 0.2465]) -Greedy action tensor([-1.4503, 0.2497, 0.4576, 0.2116]) tensor([0.0541, 0.2962, 0.3646, 0.2851]) -Greedy action tensor([-1.8404, -0.4176, 0.6557, -0.0821]) tensor([0.0433, 0.1797, 0.5256, 0.2513]) -Greedy action tensor([-1.8898, -0.4546, 0.6468, -0.1422]) tensor([0.0424, 0.1782, 0.5360, 0.2435]) -Greedy action tensor([-1.8723, -0.4184, 0.6354, -0.1350]) tensor([0.0430, 0.1842, 0.5283, 0.2445]) -Greedy action tensor([-1.8403, -0.4394, 0.6123, -0.1335]) tensor([0.0451, 0.1829, 0.5236, 0.2484]) -Greedy action tensor([-1.9358, -0.4426, 0.6653, -0.1755]) tensor([0.0404, 0.1799, 0.5447, 0.2350]) -Greedy action tensor([-1.4735, 0.5898, 0.3018, 0.0403]) tensor([0.0518, 0.4075, 0.3055, 0.2352]) -Greedy action tensor([-1.5945, -0.3696, 0.5045, 0.0522]) tensor([0.0563, 0.1918, 0.4596, 0.2923]) -Greedy action tensor([-1.3983, 0.0645, 0.5505, 0.1650]) tensor([0.0584, 0.2523, 0.4102, 0.2790]) -Greedy action tensor([-1.1910, 0.8016, 0.1508, 0.2841]) tensor([0.0605, 0.4436, 0.2314, 0.2644]) -Greedy action tensor([-1.8493, -0.3624, 0.6214, -0.1166]) tensor([0.0436, 0.1931, 0.5164, 0.2469]) -Greedy action tensor([-1.7446, 0.0134, 0.4925, -0.0299]) tensor([0.0460, 0.2670, 0.4312, 0.2557]) -Greedy action tensor([-1.9262, -0.3816, 0.6450, -0.1743]) tensor([0.0408, 0.1910, 0.5332, 0.2350]) -Greedy action tensor([-1.8669, -0.5295, 0.6274, -0.1388]) tensor([0.0443, 0.1689, 0.5371, 0.2496]) -Greedy action tensor([-1.9349, -0.4249, 0.6596, -0.1758]) tensor([0.0404, 0.1831, 0.5416, 0.2349]) -Greedy action tensor([-1.8306, -0.3791, 0.5943, -0.1240]) tensor([0.0453, 0.1934, 0.5118, 0.2495]) -Greedy action tensor([-1.9374, -0.4435, 0.6647, -0.1750]) tensor([0.0404, 0.1798, 0.5446, 0.2352]) -Greedy action tensor([-1.9183, -0.4437, 0.6544, -0.1670]) tensor([0.0413, 0.1803, 0.5406, 0.2378]) -Greedy action tensor([-1.7510, -0.4098, 0.5628, -0.0701]) tensor([0.0492, 0.1883, 0.4980, 0.2645]) -Greedy action tensor([-1.4550, -0.1187, 0.8311, 0.4651]) tensor([0.0466, 0.1773, 0.4583, 0.3178]) -Greedy action tensor([-1.9244, -0.4599, 0.6867, -0.1392]) tensor([0.0402, 0.1737, 0.5467, 0.2394]) -Greedy action tensor([-1.8473, -0.3478, 0.6233, -0.1151]) tensor([0.0436, 0.1951, 0.5152, 0.2462]) -Greedy action tensor([-1.5135, -0.1934, 0.5773, 0.0959]) tensor([0.0561, 0.2099, 0.4537, 0.2803]) -Greedy action tensor([-1.9090, -0.4324, 0.6600, -0.1614]) tensor([0.0414, 0.1811, 0.5400, 0.2375]) -Greedy action tensor([-1.8337, -0.4135, 0.6105, -0.1255]) tensor([0.0451, 0.1866, 0.5195, 0.2488]) -Greedy action tensor([-1.8364, -0.4355, 0.6191, -0.1169]) tensor([0.0449, 0.1821, 0.5227, 0.2504]) -Greedy action tensor([-1.8993, -0.4514, 0.6486, -0.1554]) tensor([0.0421, 0.1791, 0.5380, 0.2408]) -Greedy action tensor([-0.6680, -0.5388, 0.1602, 0.2792]) tensor([0.1427, 0.1624, 0.3268, 0.3681]) -Greedy action tensor([-1.9024, -0.4342, 0.6529, -0.1570]) tensor([0.0418, 0.1813, 0.5377, 0.2392]) -Greedy action tensor([-1.9422, -0.4406, 0.6642, -0.1786]) tensor([0.0402, 0.1805, 0.5448, 0.2345]) -Greedy action tensor([-1.9098, -0.4469, 0.6481, -0.1595]) tensor([0.0417, 0.1801, 0.5382, 0.2400]) -Greedy action tensor([-0.1131, -0.3693, 0.1931, 0.2782]) tensor([0.2169, 0.1678, 0.2946, 0.3207]) -Greedy action tensor([-1.8654, -0.4437, 0.6465, -0.1069]) tensor([0.0430, 0.1780, 0.5297, 0.2493]) -Greedy action tensor([-1.9235, -0.4518, 0.6853, -0.1537]) tensor([0.0403, 0.1756, 0.5475, 0.2366]) -Greedy action tensor([-1.8386, -0.2717, 0.5768, -0.1058]) tensor([0.0442, 0.2116, 0.4944, 0.2498]) -Greedy action tensor([-1.6281, -0.2400, 0.5784, -0.0226]) tensor([0.0524, 0.2101, 0.4763, 0.2611]) -Greedy action tensor([-1.8935, -0.3551, 0.6249, -0.1615]) tensor([0.0422, 0.1963, 0.5232, 0.2383]) -Greedy action tensor([-1.9243, -0.4330, 0.6544, -0.1713]) tensor([0.0410, 0.1821, 0.5403, 0.2366]) -Greedy action tensor([-1.5796, -0.5783, 0.7675, 0.0779]) tensor([0.0515, 0.1401, 0.5383, 0.2701]) -Greedy action tensor([-1.8159, -0.4326, 0.6668, -0.0165]) tensor([0.0435, 0.1733, 0.5204, 0.2628]) -Greedy action tensor([-1.5902, -0.3475, 0.4971, -0.0169]) tensor([0.0576, 0.1997, 0.4647, 0.2779]) -Greedy action tensor([-1.8733, -0.3942, 0.6264, -0.1494]) tensor([0.0432, 0.1894, 0.5255, 0.2419]) -Greedy action tensor([-0.4325, -0.2988, 0.0804, -0.1472]) tensor([0.1944, 0.2222, 0.3247, 0.2586]) -Greedy action tensor([-1.9096, -0.3487, 0.6356, -0.1743]) tensor([0.0414, 0.1970, 0.5271, 0.2345]) -Greedy action tensor([-1.5517, -0.2350, 0.6040, 0.0839]) tensor([0.0541, 0.2017, 0.4668, 0.2775]) -Greedy action tensor([-1.8938, -0.4529, 0.6472, -0.1560]) tensor([0.0424, 0.1790, 0.5378, 0.2409]) -Greedy action tensor([ 0.1351, -1.0277, -0.0130, -0.8376]) tensor([0.3917, 0.1224, 0.3378, 0.1481]) -Greedy action tensor([ 0.0391, -0.3499, -0.5126, -0.0462]) tensor([0.3153, 0.2137, 0.1816, 0.2895]) -Greedy action tensor([0.3327, 0.1421, 1.0905, 0.2714]) tensor([0.2041, 0.1687, 0.4354, 0.1919]) -Greedy action tensor([ 0.3302, -0.6015, -0.2433, 2.3321]) tensor([0.1068, 0.0421, 0.0602, 0.7909]) -Greedy action tensor([-1.0169, -0.3567, 1.5773, -1.2433]) tensor([0.0584, 0.1130, 0.7820, 0.0466]) -Greedy action tensor([-0.6853, 0.0668, -0.4278, -0.4008]) tensor([0.1741, 0.3693, 0.2252, 0.2314]) -Greedy action tensor([ 0.4913, -0.3609, -0.5139, -0.5787]) tensor([0.4683, 0.1997, 0.1714, 0.1606]) -Greedy action tensor([-0.2359, -1.6722, 0.1061, 0.5654]) tensor([0.2052, 0.0488, 0.2888, 0.4572]) -Greedy action tensor([-0.9872, -0.9844, 0.5753, -0.9558]) tensor([0.1281, 0.1285, 0.6112, 0.1322]) -Greedy action tensor([-0.8046, -0.8613, 0.7371, -0.2339]) tensor([0.1192, 0.1127, 0.5571, 0.2110]) -Greedy action tensor([-0.7470, -1.8343, -0.1117, 0.4690]) tensor([0.1516, 0.0511, 0.2861, 0.5113]) -Greedy action tensor([ 0.0874, -0.5905, -0.9638, -0.1724]) tensor([0.3805, 0.1931, 0.1330, 0.2934]) -Greedy action tensor([ 1.6278, -1.6446, 1.2738, 1.1169]) tensor([0.4274, 0.0162, 0.3000, 0.2564]) -Greedy action tensor([-0.4956, -2.5239, -0.6047, 0.4733]) tensor([0.2144, 0.0282, 0.1923, 0.5651]) -Greedy action tensor([ 0.1061, -0.6039, -0.2068, 0.4935]) tensor([0.2705, 0.1330, 0.1979, 0.3986]) -Greedy action tensor([ 0.3681, -0.0407, -0.6477, -0.4937]) tensor([0.4083, 0.2713, 0.1479, 0.1725]) -Greedy action tensor([ 0.5948, 0.7155, 0.0770, -0.1956]) tensor([0.3147, 0.3551, 0.1875, 0.1428]) -Greedy action tensor([ 0.4394, 0.2926, 0.3687, -0.2216]) tensor([0.3020, 0.2608, 0.2814, 0.1559]) -Greedy action tensor([-0.0595, 0.2322, 0.8947, 0.6996]) tensor([0.1414, 0.1893, 0.3672, 0.3021]) -Greedy action tensor([-1.0732, -0.0577, 0.3974, -0.8322]) tensor([0.1066, 0.2942, 0.4637, 0.1356]) -Greedy action tensor([-1.1144, 0.0460, 0.0906, -1.0600]) tensor([0.1165, 0.3718, 0.3887, 0.1230]) -Greedy action tensor([0.4865, 0.5359, 0.2803, 0.1435]) tensor([0.2798, 0.2940, 0.2277, 0.1986]) -Greedy action tensor([-1.5322, -0.2934, -0.9831, 0.0160]) tensor([0.0919, 0.3171, 0.1591, 0.4320]) -Greedy action tensor([-0.1024, 0.0802, 0.2811, -0.2485]) tensor([0.2207, 0.2649, 0.3238, 0.1907]) -Greedy action tensor([-1.0332, -0.3700, 0.5755, -0.6847]) tensor([0.1069, 0.2075, 0.5341, 0.1515]) -Greedy action tensor([ 0.1482, -0.3178, 0.3186, -0.6280]) tensor([0.3055, 0.1917, 0.3623, 0.1406]) -Greedy action tensor([ 0.1987, 0.2278, 1.6303, -0.7362]) tensor([0.1513, 0.1558, 0.6334, 0.0594]) -Greedy action tensor([-0.6100, -1.8203, -0.6315, 0.9945]) tensor([0.1379, 0.0411, 0.1350, 0.6860]) -Greedy action tensor([ 1.9207, -0.8369, 1.3090, 0.5917]) tensor([0.5346, 0.0339, 0.2900, 0.1415]) -Greedy action tensor([-0.8601, 1.1486, -0.4544, -0.7184]) tensor([0.0900, 0.6711, 0.1351, 0.1038]) -Greedy action tensor([-0.2270, -0.2480, -0.4885, 0.1875]) tensor([0.2346, 0.2297, 0.1806, 0.3551]) -Greedy action tensor([ 0.9194, 0.3903, -1.2838, -0.1738]) tensor([0.4915, 0.2896, 0.0543, 0.1647]) -Greedy action tensor([ 0.8807, -0.3150, 1.0584, 0.3405]) tensor([0.3247, 0.0982, 0.3879, 0.1892]) -Greedy action tensor([ 1.2778, -0.2329, -0.0339, 1.0782]) tensor([0.4331, 0.0956, 0.1167, 0.3547]) -Greedy action tensor([-0.8700, -0.9659, 1.5686, -0.9515]) tensor([0.0700, 0.0636, 0.8019, 0.0645]) -Greedy action tensor([-0.2225, -0.8635, 0.9050, -0.7279]) tensor([0.1916, 0.1010, 0.5918, 0.1156]) -Greedy action tensor([ 0.9704, -0.9079, -0.3465, 0.3997]) tensor([0.5035, 0.0770, 0.1349, 0.2846]) -Greedy action tensor([-0.1989, -2.6517, -0.4665, -0.1932]) tensor([0.3500, 0.0301, 0.2678, 0.3520]) -Greedy action tensor([-0.3613, -1.0143, 0.5614, -0.1727]) tensor([0.1907, 0.0993, 0.4798, 0.2303]) -Greedy action tensor([-0.3118, -0.0566, -0.0104, -1.0747]) tensor([0.2434, 0.3142, 0.3290, 0.1135]) -Greedy action tensor([1.0009, 0.0068, 0.1199, 0.3653]) tensor([0.4321, 0.1599, 0.1791, 0.2289]) -Greedy action tensor([-0.2526, -0.8244, 0.2198, -0.6399]) tensor([0.2599, 0.1467, 0.4169, 0.1765]) -Greedy action tensor([ 0.9099, 0.0406, -1.0853, 0.2473]) tensor([0.4829, 0.2025, 0.0657, 0.2489]) -Greedy action tensor([-0.2467, -0.2654, -0.5445, -0.2099]) tensor([0.2659, 0.2609, 0.1974, 0.2758]) -Greedy action tensor([ 1.0532, -0.2451, 0.2741, -0.0183]) tensor([0.4821, 0.1316, 0.2212, 0.1651]) -Greedy action tensor([-1.2644, -1.8723, 1.7179, -0.7388]) tensor([0.0435, 0.0237, 0.8591, 0.0736]) -Greedy action tensor([-0.1129, -0.2119, -0.1920, -1.1499]) tensor([0.3140, 0.2845, 0.2902, 0.1113]) -Greedy action tensor([-0.1252, 0.2327, 0.8350, -0.0740]) tensor([0.1641, 0.2347, 0.4286, 0.1727]) -Greedy action tensor([ 0.4996, -1.3646, -0.1144, 0.0308]) tensor([0.4307, 0.0668, 0.2331, 0.2695]) -Greedy action tensor([ 0.1014, -0.1994, 0.4187, -0.7808]) tensor([0.2835, 0.2098, 0.3893, 0.1173]) -Greedy action tensor([-0.2823, 0.1357, 0.5080, -0.1720]) tensor([0.1712, 0.2601, 0.3774, 0.1912]) -Greedy action tensor([-0.5547, -1.2061, 0.1396, 1.2931]) tensor([0.1013, 0.0528, 0.2029, 0.6430]) -Greedy action tensor([-0.0378, -0.2469, -0.7810, -0.0822]) tensor([0.3083, 0.2501, 0.1466, 0.2949]) -Greedy action tensor([-0.5824, -0.0037, 0.8112, -1.0010]) tensor([0.1338, 0.2387, 0.5393, 0.0881]) -Greedy action tensor([ 0.6984, -1.5106, 0.4188, 0.9072]) tensor([0.3228, 0.0354, 0.2441, 0.3977]) -Greedy action tensor([-0.2555, -0.8120, 1.9430, -0.2890]) tensor([0.0866, 0.0496, 0.7801, 0.0837]) -Greedy action tensor([-0.6170, -0.6351, 0.6012, -0.9978]) tensor([0.1654, 0.1624, 0.5592, 0.1130]) -Greedy action tensor([1.7089, 0.1384, 0.6444, 0.1705]) tensor([0.5658, 0.1176, 0.1951, 0.1215]) -Greedy action tensor([-1.0616, -1.4998, 0.9591, -0.0765]) tensor([0.0843, 0.0544, 0.6357, 0.2257]) -Greedy action tensor([0.3769, 0.1503, 0.8829, 0.0074]) tensor([0.2411, 0.1922, 0.4000, 0.1666]) -Greedy action tensor([0.4030, 0.6068, 0.0228, 0.3070]) tensor([0.2619, 0.3211, 0.1791, 0.2379]) -Greedy action tensor([ 1.4404, -0.5472, 1.4367, 0.9876]) tensor([0.3611, 0.0495, 0.3598, 0.2296]) -Greedy action tensor([-1.5418, -0.4977, -0.3886, 0.0698]) tensor([0.0832, 0.2363, 0.2636, 0.4169]) -Greedy action tensor([-0.9747, -0.3223, 0.9002, -0.5858]) tensor([0.0916, 0.1759, 0.5973, 0.1352]) -Greedy action tensor([ 0.2224, -0.1711, -0.5067, 1.6115]) tensor([0.1621, 0.1094, 0.0782, 0.6503]) -Greedy action tensor([ 0.4573, -0.8200, -0.8197, 0.2678]) tensor([0.4193, 0.1169, 0.1169, 0.3469]) -Greedy action tensor([1.2230, 0.2042, 0.3663, 0.5901]) tensor([0.4317, 0.1558, 0.1833, 0.2292]) -Greedy action tensor([ 0.1445, -1.3282, 1.0857, 0.4748]) tensor([0.1929, 0.0442, 0.4945, 0.2684]) -Greedy action tensor([-1.7139, -0.7310, -0.7877, 0.6497]) tensor([0.0594, 0.1588, 0.1501, 0.6317]) -Greedy action tensor([-0.6868, -1.1561, -0.2798, -0.1706]) tensor([0.2082, 0.1302, 0.3128, 0.3489]) -Greedy action tensor([ 0.9249, -0.6913, 0.1391, -0.0357]) tensor([0.4909, 0.0975, 0.2237, 0.1879]) -Greedy action tensor([-0.0382, -0.8027, 0.3743, 0.1678]) tensor([0.2378, 0.1107, 0.3592, 0.2922]) -Greedy action tensor([ 0.7389, -0.8770, 1.2267, 0.3107]) tensor([0.2874, 0.0571, 0.4681, 0.1873]) -Greedy action tensor([-0.9527, 0.1149, -0.2537, 0.5490]) tensor([0.0961, 0.2794, 0.1933, 0.4313]) -Greedy action tensor([ 0.1766, 0.3206, -0.0163, 0.3004]) tensor([0.2432, 0.2809, 0.2006, 0.2753]) -Greedy action tensor([-0.3942, -1.1997, -0.2296, 0.2593]) tensor([0.2199, 0.0983, 0.2592, 0.4227]) -Greedy action tensor([-0.6309, -0.5967, 0.9152, -1.2190]) tensor([0.1373, 0.1421, 0.6444, 0.0763]) -Greedy action tensor([ 0.5468, -0.6827, 1.0002, 0.0560]) tensor([0.2875, 0.0841, 0.4524, 0.1760]) -Greedy action tensor([ 0.4782, -0.2203, 1.7258, -0.7893]) tensor([0.1901, 0.0945, 0.6619, 0.0535]) -Greedy action tensor([-0.3789, -0.5866, 0.2972, -0.8347]) tensor([0.2266, 0.1841, 0.4456, 0.1437]) -Greedy action tensor([-0.7577, -0.2382, -0.4906, -0.4567]) tensor([0.1873, 0.3149, 0.2447, 0.2531]) -Greedy action tensor([ 0.4252, -0.9143, 0.6715, 0.8367]) tensor([0.2469, 0.0647, 0.3158, 0.3726]) -Greedy action tensor([ 1.6971, -0.3719, -0.4346, 0.4229]) tensor([0.6559, 0.0829, 0.0778, 0.1834]) -Greedy action tensor([ 1.7629, -0.1127, -0.1354, -0.1589]) tensor([0.6899, 0.1057, 0.1034, 0.1010]) -Greedy action tensor([ 1.5116, -0.6958, -0.2880, 0.3689]) tensor([0.6272, 0.0690, 0.1037, 0.2001]) -Greedy action tensor([ 0.8723, 0.2537, -0.0408, -0.1505]) tensor([0.4349, 0.2343, 0.1745, 0.1564]) -Greedy action tensor([ 1.1980, -0.3164, -0.3032, 0.1705]) tensor([0.5553, 0.1221, 0.1238, 0.1988]) -Greedy action tensor([ 1.7351, -0.0907, -1.0887, -0.3800]) tensor([0.7457, 0.1201, 0.0443, 0.0899]) -Greedy action tensor([ 1.1199, -0.1723, -0.0940, 0.3456]) tensor([0.4919, 0.1351, 0.1461, 0.2268]) -Greedy action tensor([ 2.0327, -0.9118, -0.3592, 0.4362]) tensor([0.7426, 0.0391, 0.0679, 0.1504]) -Greedy action tensor([ 1.8969, -0.0394, -0.1754, 0.4650]) tensor([0.6627, 0.0956, 0.0834, 0.1583]) -Greedy action tensor([ 1.0988, -0.2905, -0.5775, 0.0825]) tensor([0.5561, 0.1386, 0.1040, 0.2013]) -Greedy action tensor([ 1.8909, -0.5250, -0.2911, 0.4792]) tensor([0.6916, 0.0618, 0.0780, 0.1686]) -Greedy action tensor([ 1.1628, -0.1233, 0.1963, -0.3798]) tensor([0.5346, 0.1477, 0.2034, 0.1143]) -Greedy action tensor([ 1.4720, 0.0475, -0.1642, -0.1802]) tensor([0.6146, 0.1479, 0.1197, 0.1178]) -Greedy action tensor([ 1.2371, -0.4355, -0.2609, 0.1078]) tensor([0.5765, 0.1082, 0.1289, 0.1864]) -Greedy action tensor([ 1.4518, -0.7652, -0.0039, -0.0856]) tensor([0.6422, 0.0700, 0.1498, 0.1380]) -Greedy action tensor([ 1.0757, -0.0656, 0.0019, 0.1065]) tensor([0.4901, 0.1565, 0.1675, 0.1859]) -Greedy action tensor([ 1.2703, -0.5772, -0.4535, 0.0991]) tensor([0.6075, 0.0958, 0.1084, 0.1883]) -Greedy action tensor([ 2.2547, -1.2811, -0.0749, 0.5102]) tensor([0.7685, 0.0224, 0.0748, 0.1343]) -Greedy action tensor([ 1.6072, -0.5357, -0.1453, 0.6149]) tensor([0.6019, 0.0706, 0.1043, 0.2231]) -Greedy action tensor([ 1.5761, -0.1460, -0.5429, 0.7433]) tensor([0.5768, 0.1031, 0.0693, 0.2508]) -Greedy action tensor([ 1.6303, -1.3182, -0.1499, 0.2651]) tensor([0.6773, 0.0355, 0.1142, 0.1730]) -Greedy action tensor([ 1.3812, -0.7611, -0.1224, 0.1359]) tensor([0.6144, 0.0721, 0.1366, 0.1769]) -Greedy action tensor([ 1.1323, -0.2541, -0.4404, 0.5170]) tensor([0.5005, 0.1251, 0.1039, 0.2705]) -Greedy action tensor([ 1.6093, -0.3090, -0.2130, 0.4850]) tensor([0.6122, 0.0899, 0.0990, 0.1989]) -Greedy action tensor([ 1.7169, -0.7331, -0.2392, 0.5371]) tensor([0.6515, 0.0562, 0.0921, 0.2002]) -Greedy action tensor([ 1.7922, -0.5859, -0.5571, 0.3434]) tensor([0.7027, 0.0652, 0.0671, 0.1650]) -Greedy action tensor([ 1.6968, -0.3404, -0.5610, 0.1582]) tensor([0.6898, 0.0899, 0.0721, 0.1481]) -Greedy action tensor([ 1.4571, -0.8978, -0.1034, 0.4716]) tensor([0.5959, 0.0565, 0.1251, 0.2224]) -Greedy action tensor([ 1.4651, -0.4116, -0.4426, 0.1886]) tensor([0.6327, 0.0969, 0.0939, 0.1765]) -Greedy action tensor([ 1.4451, -0.5616, -0.0436, 0.1465]) tensor([0.6124, 0.0823, 0.1382, 0.1671]) -Greedy action tensor([ 1.5022, -0.2983, -0.0662, 0.6772]) tensor([0.5519, 0.0912, 0.1150, 0.2419]) -Greedy action tensor([ 1.2379, -0.3143, -0.6988, 0.3352]) tensor([0.5677, 0.1202, 0.0819, 0.2302]) -Greedy action tensor([ 1.5003, -0.8011, -0.3649, 0.0618]) tensor([0.6701, 0.0671, 0.1038, 0.1590]) -Greedy action tensor([ 2.0995, -0.3736, -0.6496, 0.4782]) tensor([0.7430, 0.0626, 0.0475, 0.1468]) -Greedy action tensor([ 1.4198, -0.2371, -0.5119, 0.5740]) tensor([0.5666, 0.1081, 0.0821, 0.2432]) -Greedy action tensor([ 1.1972, -0.2661, -0.3690, -0.1078]) tensor([0.5843, 0.1352, 0.1220, 0.1584]) -Greedy action tensor([ 1.2418, -0.2402, -0.2835, 0.1108]) tensor([0.5658, 0.1285, 0.1231, 0.1826]) -Greedy action tensor([ 1.6642, -0.5317, -0.3816, 0.0823]) tensor([0.6915, 0.0769, 0.0894, 0.1422]) -Greedy action tensor([ 1.7364, -0.5533, -0.3452, 0.3453]) tensor([0.6780, 0.0687, 0.0846, 0.1687]) -Greedy action tensor([ 1.3721, -0.3610, -0.4192, 0.4984]) tensor([0.5679, 0.1004, 0.0947, 0.2370]) -Greedy action tensor([ 1.5543, -0.2056, -0.5588, 0.6299]) tensor([0.5918, 0.1018, 0.0715, 0.2348]) -Greedy action tensor([ 1.9275, -0.9174, -0.2559, 0.3291]) tensor([0.7283, 0.0423, 0.0821, 0.1473]) -Greedy action tensor([ 1.4264, -0.5177, -0.9261, 0.6861]) tensor([0.5830, 0.0834, 0.0555, 0.2781]) -Greedy action tensor([ 1.6577, -0.5880, -0.3064, 0.1580]) tensor([0.6806, 0.0720, 0.0955, 0.1519]) -Greedy action tensor([ 1.6356, -0.6815, -0.5453, 0.2989]) tensor([0.6783, 0.0669, 0.0766, 0.1782]) -Greedy action tensor([ 1.4765, -0.3969, -0.5970, 0.2391]) tensor([0.6371, 0.0979, 0.0801, 0.1849]) -Greedy action tensor([ 1.1926, -0.5202, 0.1052, 0.1603]) tensor([0.5337, 0.0963, 0.1799, 0.1901]) -Greedy action tensor([ 1.4563, -0.2569, -0.4660, 0.3542]) tensor([0.6029, 0.1087, 0.0882, 0.2003]) -Greedy action tensor([ 1.1955, -0.7080, -0.1008, 0.1591]) tensor([0.5626, 0.0839, 0.1539, 0.1996]) -Greedy action tensor([ 1.4167, -0.0890, -0.8609, 0.4668]) tensor([0.5844, 0.1296, 0.0599, 0.2260]) -Greedy action tensor([ 1.3166, -0.3106, -0.0960, 0.6148]) tensor([0.5166, 0.1015, 0.1258, 0.2561]) -Greedy action tensor([ 1.8306, -0.5708, -0.8638, 0.4011]) tensor([0.7155, 0.0648, 0.0484, 0.1713]) -Greedy action tensor([ 1.2836, -0.6569, -0.6817, 0.7331]) tensor([0.5375, 0.0772, 0.0753, 0.3100]) -Greedy action tensor([ 1.1800, -0.3752, -0.2573, 0.2483]) tensor([0.5427, 0.1146, 0.1289, 0.2138]) -Greedy action tensor([ 0.4949, -0.2967, -0.1638, 0.4554]) tensor([0.3411, 0.1546, 0.1765, 0.3279]) -Greedy action tensor([ 1.7266, -1.0191, 0.1353, 0.3796]) tensor([0.6545, 0.0420, 0.1333, 0.1702]) -Greedy action tensor([ 1.3957, -0.0315, -0.6829, 0.2153]) tensor([0.5980, 0.1435, 0.0748, 0.1837]) -Greedy action tensor([0.8493, 0.2430, 0.0449, 0.1251]) tensor([0.4037, 0.2201, 0.1806, 0.1957]) -Greedy action tensor([ 1.5035, -0.3128, -0.5757, 0.2928]) tensor([0.6307, 0.1026, 0.0789, 0.1879]) -Greedy action tensor([ 2.1949, -0.6227, 0.3711, 0.2938]) tensor([0.7296, 0.0436, 0.1178, 0.1090]) -Greedy action tensor([ 1.7563, -0.6000, -0.2810, 0.5938]) tensor([0.6503, 0.0616, 0.0848, 0.2033]) -Greedy action tensor([ 1.2873, -0.2375, -0.5006, 0.0027]) tensor([0.6018, 0.1310, 0.1007, 0.1666]) -Greedy action tensor([ 1.4769, -0.5914, -0.4982, 0.3120]) tensor([0.6341, 0.0802, 0.0880, 0.1978]) -Greedy action tensor([ 1.4003, -0.4163, -0.3008, 0.2947]) tensor([0.5966, 0.0970, 0.1089, 0.1975]) -Greedy action tensor([ 1.7770, -0.9532, -0.2963, 0.9209]) tensor([0.6189, 0.0404, 0.0778, 0.2629]) -Greedy action tensor([ 1.5253, -0.2512, -0.7330, 0.1014]) tensor([0.6603, 0.1117, 0.0690, 0.1590]) -Greedy action tensor([ 1.0939, -0.0656, -0.8087, 0.1649]) tensor([0.5383, 0.1688, 0.0803, 0.2126]) -Greedy action tensor([ 1.3179, -0.3817, -0.7415, 0.5632]) tensor([0.5616, 0.1026, 0.0716, 0.2641]) -Greedy action tensor([ 2.8966, -1.1059, 0.0035, 0.9692]) tensor([0.8202, 0.0150, 0.0454, 0.1194]) -Greedy action tensor([ 1.6292, -0.6374, -0.0572, 0.0716]) tensor([0.6669, 0.0691, 0.1235, 0.1405]) -Greedy action tensor([ 1.7429, -0.7330, -0.2675, 0.0783]) tensor([0.7106, 0.0598, 0.0952, 0.1345]) -Greedy action tensor([ 1.4551, -0.0646, 0.0495, 0.4605]) tensor([0.5453, 0.1193, 0.1337, 0.2017]) -Greedy action tensor([ 1.3882, -0.6797, -0.2427, 0.4940]) tensor([0.5776, 0.0730, 0.1131, 0.2362]) -Greedy action tensor([ 1.7617, -0.8273, -0.5249, 0.1694]) tensor([0.7246, 0.0544, 0.0736, 0.1474]) -Greedy action tensor([ 1.4784, 0.0283, -0.1600, 0.6075]) tensor([0.5413, 0.1270, 0.1052, 0.2266]) -Greedy action tensor([ 1.4763, 0.1127, -0.2591, 0.3651]) tensor([0.5678, 0.1452, 0.1001, 0.1869]) -Greedy action tensor([ 1.4404, -0.0064, -0.0120, 0.6505]) tensor([0.5200, 0.1224, 0.1217, 0.2360]) -Greedy action tensor([ 1.7063, -0.5167, -0.1594, 0.3824]) tensor([0.6539, 0.0708, 0.1012, 0.1740]) -Greedy action tensor([ 1.9881, -0.8576, -0.2529, 0.4717]) tensor([0.7226, 0.0420, 0.0768, 0.1586]) -Greedy action tensor([ 1.7772, -0.0230, -0.6424, 0.4823]) tensor([0.6544, 0.1082, 0.0582, 0.1793]) -Greedy action tensor([ 1.7763, -0.7717, -0.1489, 0.6901]) tensor([0.6404, 0.0501, 0.0934, 0.2161]) -Greedy action tensor([ 0.9049, -0.4652, -0.1681, -0.2675]) tensor([0.5247, 0.1333, 0.1794, 0.1625]) -Greedy action tensor([ 1.0901, -0.4613, 0.0439, -0.2712]) tensor([0.5496, 0.1165, 0.1931, 0.1409]) -Greedy action tensor([ 1.1953, -0.8151, 0.1505, -0.5705]) tensor([0.6036, 0.0808, 0.2123, 0.1032]) -Greedy action tensor([ 0.2908, 0.0491, -0.1478, -0.2189]) tensor([0.3299, 0.2591, 0.2128, 0.1982]) -Greedy action tensor([-0.0142, 0.4160, -0.3085, -0.4383]) tensor([0.2540, 0.3906, 0.1892, 0.1662]) -Greedy action tensor([ 1.1025, -0.7141, -0.1665, -0.5447]) tensor([0.6111, 0.0994, 0.1718, 0.1177]) -Greedy action tensor([ 0.3539, 0.0430, -0.0913, -0.0420]) tensor([0.3282, 0.2405, 0.2103, 0.2209]) -Greedy action tensor([ 0.7923, -0.0688, -0.0650, -0.2366]) tensor([0.4536, 0.1918, 0.1925, 0.1621]) -Greedy action tensor([ 1.2298, -0.6178, 0.0113, -0.4740]) tensor([0.6115, 0.0964, 0.1808, 0.1113]) -Greedy action tensor([ 0.6783, -0.5764, -0.0562, -0.2115]) tensor([0.4596, 0.1311, 0.2205, 0.1888]) -Greedy action tensor([ 0.9076, -0.6316, -0.1710, -0.2329]) tensor([0.5336, 0.1145, 0.1814, 0.1705]) -Greedy action tensor([ 0.6768, -0.1553, -0.2540, -0.1071]) tensor([0.4374, 0.1904, 0.1725, 0.1997]) -Greedy action tensor([ 1.0265, -0.2782, -0.2261, -0.1389]) tensor([0.5351, 0.1452, 0.1529, 0.1668]) -Greedy action tensor([ 0.5272, -0.5194, -0.1873, -0.1057]) tensor([0.4216, 0.1480, 0.2064, 0.2239]) -Greedy action tensor([ 1.1412, -0.8809, 0.2120, -0.5002]) tensor([0.5811, 0.0769, 0.2294, 0.1126]) -Greedy action tensor([ 0.4550, -0.2806, -0.1969, -0.2252]) tensor([0.3989, 0.1912, 0.2079, 0.2021]) -Greedy action tensor([ 0.9389, -0.7205, 0.0730, -0.3518]) tensor([0.5302, 0.1009, 0.2230, 0.1459]) -Greedy action tensor([ 0.4155, -0.0953, -0.1007, -0.1459]) tensor([0.3614, 0.2168, 0.2157, 0.2061]) -Greedy action tensor([ 0.8037, -0.4745, -0.1053, -0.3437]) tensor([0.5003, 0.1393, 0.2016, 0.1588]) -Greedy action tensor([ 0.9520, -0.7853, 0.0208, -0.4244]) tensor([0.5487, 0.0966, 0.2162, 0.1385]) -Greedy action tensor([ 0.8848, -0.4811, -0.1124, -0.2782]) tensor([0.5164, 0.1318, 0.1905, 0.1614]) -Greedy action tensor([ 0.5579, 0.1832, -0.1318, 0.1398]) tensor([0.3512, 0.2414, 0.1762, 0.2312]) -Greedy action tensor([ 1.1218, -0.5373, -0.0893, -0.4215]) tensor([0.5876, 0.1118, 0.1750, 0.1255]) -Greedy action tensor([ 0.6715, -0.5010, -0.0611, -0.3195]) tensor([0.4627, 0.1432, 0.2224, 0.1717]) -Greedy action tensor([ 1.1027, -0.6405, -0.1590, -0.3911]) tensor([0.5943, 0.1040, 0.1683, 0.1334]) -Greedy action tensor([ 0.6196, -0.0269, -0.0253, -0.2079]) tensor([0.4023, 0.2108, 0.2111, 0.1759]) -Greedy action tensor([ 0.9072, -0.4645, -0.1055, -0.3028]) tensor([0.5222, 0.1325, 0.1897, 0.1557]) -Greedy action tensor([ 0.5081, -0.2169, -0.0562, -0.3127]) tensor([0.4011, 0.1943, 0.2281, 0.1765]) -Greedy action tensor([ 0.5040, -0.2780, 0.0756, -0.3697]) tensor([0.3958, 0.1811, 0.2579, 0.1652]) -Greedy action tensor([ 0.6321, -0.1179, 0.0024, -0.0011]) tensor([0.3943, 0.1863, 0.2101, 0.2093]) -Greedy action tensor([ 0.5938, -0.4988, -0.0092, -0.3771]) tensor([0.4422, 0.1483, 0.2420, 0.1675]) -Greedy action tensor([ 0.3833, 0.2403, -0.2117, 0.2065]) tensor([0.3071, 0.2662, 0.1694, 0.2573]) -Greedy action tensor([ 0.4650, -0.1539, -0.0666, -0.1374]) tensor([0.3740, 0.2014, 0.2198, 0.2048]) -Greedy action tensor([ 0.8153, -0.6252, 0.0761, -0.4165]) tensor([0.4985, 0.1181, 0.2380, 0.1454]) -Greedy action tensor([ 0.7183, -0.8774, 0.0208, -0.6112]) tensor([0.5088, 0.1032, 0.2533, 0.1347]) -Greedy action tensor([ 0.7439, -0.4091, -0.1241, -0.2176]) tensor([0.4722, 0.1491, 0.1982, 0.1805]) -Greedy action tensor([ 1.2125, -1.1735, 0.1316, -0.6770]) tensor([0.6319, 0.0581, 0.2144, 0.0955]) -Greedy action tensor([ 0.7811, -0.8170, 0.1086, -0.7726]) tensor([0.5197, 0.1051, 0.2653, 0.1099]) -Greedy action tensor([ 0.8353, -0.3784, -0.1331, -0.2429]) tensor([0.4958, 0.1473, 0.1882, 0.1687]) -Greedy action tensor([ 0.7291, -0.1493, -0.0349, -0.0243]) tensor([0.4252, 0.1766, 0.1980, 0.2002]) -Greedy action tensor([ 0.7040, 0.2068, -0.2362, -0.2855]) tensor([0.4219, 0.2566, 0.1647, 0.1568]) -Greedy action tensor([ 1.0157, -0.6589, 0.1471, -0.4879]) tensor([0.5467, 0.1024, 0.2293, 0.1215]) -Greedy action tensor([ 0.6844, -0.0096, -0.0990, 0.1028]) tensor([0.3975, 0.1986, 0.1816, 0.2222]) -Greedy action tensor([ 0.8742, -0.8403, 0.0736, -0.5319]) tensor([0.5336, 0.0961, 0.2396, 0.1308]) -Greedy action tensor([ 0.5162, -0.3788, -0.0811, -0.2385]) tensor([0.4117, 0.1682, 0.2265, 0.1936]) -Greedy action tensor([ 0.9541, -0.7090, 0.0012, -0.4815]) tensor([0.5515, 0.1045, 0.2127, 0.1312]) -Greedy action tensor([ 0.4786, -0.0073, -0.0623, -0.4132]) tensor([0.3835, 0.2359, 0.2233, 0.1572]) -Greedy action tensor([ 0.4832, -0.5009, -0.1663, -0.2267]) tensor([0.4188, 0.1565, 0.2187, 0.2059]) -Greedy action tensor([ 0.3958, -0.3166, -0.0381, -0.1264]) tensor([0.3661, 0.1795, 0.2372, 0.2172]) -Greedy action tensor([ 0.0863, 0.2105, -0.0515, -0.1145]) tensor([0.2617, 0.2963, 0.2280, 0.2141]) -Greedy action tensor([ 0.4414, -0.1368, -0.0389, -0.1316]) tensor([0.3645, 0.2045, 0.2255, 0.2055]) -Greedy action tensor([ 0.5792, -0.2133, 0.0608, -0.2320]) tensor([0.4012, 0.1816, 0.2389, 0.1783]) -Greedy action tensor([ 0.5298, -0.3189, -0.0623, -0.2385]) tensor([0.4090, 0.1750, 0.2262, 0.1897]) -Greedy action tensor([ 0.7041, -0.5692, 0.0851, -0.3693]) tensor([0.4629, 0.1296, 0.2493, 0.1582]) -Greedy action tensor([ 0.4051, -0.0072, -0.2419, -0.0329]) tensor([0.3532, 0.2339, 0.1849, 0.2279]) -Greedy action tensor([ 0.4642, -0.3167, -0.0141, -0.1980]) tensor([0.3856, 0.1766, 0.2390, 0.1988]) -Greedy action tensor([ 0.6151, -0.1489, -0.0674, -0.2778]) tensor([0.4200, 0.1957, 0.2123, 0.1720]) -Greedy action tensor([ 0.5552, -0.5520, 0.1411, -0.4658]) tensor([0.4252, 0.1405, 0.2811, 0.1532]) -Greedy action tensor([ 0.9096, -0.4347, 0.0678, -0.1529]) tensor([0.4908, 0.1280, 0.2115, 0.1696]) -Greedy action tensor([ 0.8466, -0.0687, -0.1521, -0.1081]) tensor([0.4643, 0.1859, 0.1710, 0.1787]) -Greedy action tensor([ 0.5559, -0.4149, -0.0590, -0.3423]) tensor([0.4298, 0.1628, 0.2324, 0.1751]) -Greedy action tensor([ 0.5261, -0.2125, -0.0536, -0.3502]) tensor([0.4075, 0.1947, 0.2282, 0.1696]) -Greedy action tensor([ 1.0239, -0.5223, -0.0492, -0.5698]) tensor([0.5688, 0.1212, 0.1945, 0.1156]) -Greedy action tensor([ 0.3681, 0.0631, -0.1115, -0.1236]) tensor([0.3370, 0.2484, 0.2086, 0.2061]) -Greedy action tensor([ 0.4669, -0.3784, 0.0328, -0.4056]) tensor([0.4008, 0.1721, 0.2596, 0.1675]) -Greedy action tensor([0.7102, 0.0255, 0.0706, 0.0237]) tensor([0.3945, 0.1989, 0.2081, 0.1985]) -Greedy action tensor([ 0.7966, -0.6033, -0.0401, -0.4567]) tensor([0.5088, 0.1255, 0.2204, 0.1453]) -Greedy action tensor([ 0.4427, -0.3417, -0.0602, -0.0296]) tensor([0.3725, 0.1700, 0.2253, 0.2323]) -Greedy action tensor([ 0.5093, -0.2694, -0.1153, -0.1256]) tensor([0.3961, 0.1818, 0.2121, 0.2099]) -Greedy action tensor([ 0.7550, -0.3964, 0.1285, -0.4708]) tensor([0.4664, 0.1475, 0.2493, 0.1369]) -Greedy action tensor([ 0.9019, -0.4836, -0.1519, -0.3520]) tensor([0.5307, 0.1328, 0.1850, 0.1515]) -Greedy action tensor([ 0.4597, -0.1988, -0.0579, -0.1486]) tensor([0.3762, 0.1948, 0.2242, 0.2048]) -Greedy action tensor([ 0.7104, -0.5795, 0.1047, -0.3888]) tensor([0.4642, 0.1278, 0.2533, 0.1547]) -Greedy action tensor([ 0.9145, -0.9935, 0.1024, -0.3092]) tensor([0.5301, 0.0787, 0.2353, 0.1559]) -Greedy action tensor([ 0.9784, -0.7570, 0.1675, -0.6112]) tensor([0.5480, 0.0966, 0.2436, 0.1118]) -Greedy action tensor([ 0.7336, -0.5181, -0.0709, -0.2778]) tensor([0.4769, 0.1364, 0.2133, 0.1734]) -Greedy action tensor([ 0.7574, -0.4941, 0.0704, -0.3852]) tensor([0.4744, 0.1357, 0.2386, 0.1513]) -Greedy action tensor([ 0.7064, -0.2893, -0.0867, -0.5169]) tensor([0.4725, 0.1746, 0.2138, 0.1391]) -Greedy action tensor([ 0.4555, -0.3649, -0.2584, -0.0255]) tensor([0.3924, 0.1728, 0.1922, 0.2426]) -Greedy action tensor([ 0.8681, -0.5830, -0.0639, -0.3616]) tensor([0.5207, 0.1220, 0.2050, 0.1522]) -Greedy action tensor([ 1.0370, -0.4156, -0.1640, -0.3909]) tensor([0.5635, 0.1318, 0.1696, 0.1351]) -Greedy action tensor([-0.5548, 0.7205, 0.2824, 0.4552]) tensor([0.1038, 0.3715, 0.2397, 0.2850]) -Greedy action tensor([-1.8574, -0.3740, 0.6465, -0.1312]) tensor([0.0430, 0.1895, 0.5259, 0.2416]) -Greedy action tensor([-1.3777, -0.2055, 0.4645, 0.2662]) tensor([0.0636, 0.2055, 0.4016, 0.3293]) -Greedy action tensor([-1.6450, -0.0278, 0.5457, 0.0577]) tensor([0.0489, 0.2462, 0.4368, 0.2682]) -Greedy action tensor([-1.8997, -0.4042, 0.6537, -0.1432]) tensor([0.0415, 0.1851, 0.5331, 0.2403]) -Greedy action tensor([-1.0002, 0.1556, 0.5020, 0.8938]) tensor([0.0653, 0.2074, 0.2933, 0.4340]) -Greedy action tensor([-1.7929, -0.2114, 0.5568, -0.0542]) tensor([0.0454, 0.2207, 0.4757, 0.2582]) -Greedy action tensor([-1.9018, -0.4525, 0.6507, -0.1591]) tensor([0.0420, 0.1789, 0.5392, 0.2399]) -Greedy action tensor([-1.8443, -0.3523, 0.5884, -0.1065]) tensor([0.0444, 0.1974, 0.5057, 0.2524]) -Greedy action tensor([-1.7515, 0.0803, 0.4841, -0.0399]) tensor([0.0452, 0.2821, 0.4225, 0.2502]) -Greedy action tensor([-1.8249, -0.0904, 0.5760, -0.1245]) tensor([0.0431, 0.2445, 0.4761, 0.2363]) -Greedy action tensor([-1.6036, 0.2667, 0.3798, 0.0235]) tensor([0.0504, 0.3270, 0.3662, 0.2564]) -Greedy action tensor([-1.6684, -0.5661, 0.4577, -0.0812]) tensor([0.0579, 0.1742, 0.4850, 0.2829]) -Greedy action tensor([-1.8874, -0.4337, 0.6173, -0.1496]) tensor([0.0431, 0.1844, 0.5275, 0.2450]) -Greedy action tensor([-1.8806, -0.4817, 0.6152, -0.1516]) tensor([0.0438, 0.1775, 0.5317, 0.2470]) -Greedy action tensor([-1.1189, 0.3520, 0.1625, 0.0946]) tensor([0.0812, 0.3533, 0.2923, 0.2731]) -Greedy action tensor([-1.8994, -0.4093, 0.6483, -0.1534]) tensor([0.0418, 0.1853, 0.5336, 0.2394]) -Greedy action tensor([-0.9098, 0.7632, 0.0698, 0.1710]) tensor([0.0838, 0.4463, 0.2231, 0.2469]) -Greedy action tensor([-1.7699, 0.1503, 0.5130, -0.2301]) tensor([0.0449, 0.3060, 0.4399, 0.2092]) -Greedy action tensor([-1.9176, -0.4457, 0.6532, -0.1668]) tensor([0.0413, 0.1801, 0.5405, 0.2381]) -Greedy action tensor([-1.9046, -0.4388, 0.6447, -0.1616]) tensor([0.0419, 0.1816, 0.5368, 0.2397]) -Greedy action tensor([-1.7840, -0.4851, 0.5845, -0.0603]) tensor([0.0477, 0.1749, 0.5098, 0.2675]) -Greedy action tensor([-1.9422, -0.4485, 0.6671, -0.1787]) tensor([0.0402, 0.1790, 0.5463, 0.2345]) -Greedy action tensor([-1.8090, -0.3530, 0.6203, -0.0909]) tensor([0.0450, 0.1931, 0.5110, 0.2509]) -Greedy action tensor([-1.9097, -0.3832, 0.6495, -0.1427]) tensor([0.0410, 0.1888, 0.5301, 0.2401]) -Greedy action tensor([-1.7865, -0.4436, 0.6429, 0.0339]) tensor([0.0447, 0.1713, 0.5078, 0.2762]) -Greedy action tensor([-1.8966, -0.4962, 0.2793, -0.3046]) tensor([0.0532, 0.2160, 0.4691, 0.2616]) -Greedy action tensor([-1.5574, -0.4522, 0.4810, 0.0547]) tensor([0.0598, 0.1807, 0.4594, 0.3000]) -Greedy action tensor([-1.9026, -0.3387, 0.6249, -0.1519]) tensor([0.0416, 0.1986, 0.5205, 0.2394]) -Greedy action tensor([-1.9231, -0.4445, 0.6567, -0.1672]) tensor([0.0410, 0.1800, 0.5414, 0.2375]) -Greedy action tensor([-1.9193, -0.4675, 0.7585, -0.0495]) tensor([0.0380, 0.1623, 0.5531, 0.2466]) -Greedy action tensor([-1.7073, -0.4048, 0.5621, -0.0462]) tensor([0.0510, 0.1875, 0.4931, 0.2684]) -Greedy action tensor([-1.7399, 0.0667, 0.4981, -0.0704]) tensor([0.0459, 0.2797, 0.4305, 0.2439]) -Greedy action tensor([-0.0984, 0.3685, 0.4656, 1.0577]) tensor([0.1328, 0.2118, 0.2334, 0.4220]) -Greedy action tensor([-1.9391, -0.4412, 0.6648, -0.1776]) tensor([0.0403, 0.1803, 0.5448, 0.2346]) -Greedy action tensor([-1.6160, -0.2502, 0.5571, 0.0481]) tensor([0.0527, 0.2064, 0.4627, 0.2782]) -Greedy action tensor([-1.7140, -0.3279, 0.5664, -0.0196]) tensor([0.0494, 0.1977, 0.4836, 0.2692]) -Greedy action tensor([-1.9139, -0.3708, 0.6438, -0.1628]) tensor([0.0411, 0.1922, 0.5301, 0.2366]) -Greedy action tensor([-1.9273, -0.4201, 0.6588, -0.1636]) tensor([0.0406, 0.1833, 0.5392, 0.2369]) -Greedy action tensor([-1.4554, -0.0225, -0.6276, -0.9683]) tensor([0.1098, 0.4602, 0.2513, 0.1787]) -Greedy action tensor([-1.9062, -0.4563, 0.6544, -0.1596]) tensor([0.0418, 0.1781, 0.5406, 0.2396]) -Greedy action tensor([-1.8435, -0.1485, 0.5766, -0.1945]) tensor([0.0437, 0.2379, 0.4912, 0.2272]) -Greedy action tensor([-1.7853, -0.3764, 0.6797, -0.0658]) tensor([0.0446, 0.1824, 0.5243, 0.2488]) -Greedy action tensor([-1.8729, -0.4374, 0.6355, -0.1248]) tensor([0.0430, 0.1809, 0.5288, 0.2472]) -Greedy action tensor([-1.9197, -0.4305, 0.6528, -0.1685]) tensor([0.0412, 0.1825, 0.5392, 0.2372]) -Greedy action tensor([-1.9207, -0.4532, 0.6519, -0.1644]) tensor([0.0413, 0.1790, 0.5407, 0.2390]) -Greedy action tensor([-1.8742, -0.4554, 0.6388, -0.1451]) tensor([0.0433, 0.1788, 0.5341, 0.2439]) -Greedy action tensor([-1.0800, 0.1336, 0.2385, 0.0279]) tensor([0.0898, 0.3024, 0.3358, 0.2720]) -Greedy action tensor([-1.8888, -0.3695, 0.6433, -0.1450]) tensor([0.0419, 0.1914, 0.5271, 0.2396]) -Greedy action tensor([-0.8740, 0.9134, 0.1642, 0.0540]) tensor([0.0811, 0.4846, 0.2291, 0.2052]) -Greedy action tensor([-1.6860, -0.1559, 0.4694, -0.0855]) tensor([0.0521, 0.2405, 0.4494, 0.2580]) -Greedy action tensor([-1.9362, -0.4342, 0.6609, -0.1754]) tensor([0.0404, 0.1816, 0.5428, 0.2352]) -Greedy action tensor([-1.4103, 0.2098, 0.4531, 0.2318]) tensor([0.0566, 0.2861, 0.3649, 0.2924]) -Greedy action tensor([1.2069, 1.3191, 0.1621, 0.8544]) tensor([0.3151, 0.3525, 0.1109, 0.2215]) -Greedy action tensor([-1.7476, -0.4970, 0.5717, -0.0122]) tensor([0.0492, 0.1718, 0.5001, 0.2789]) -Greedy action tensor([-1.5518, -0.5498, 0.4992, 0.1085]) tensor([0.0597, 0.1625, 0.4639, 0.3139]) -Greedy action tensor([-1.9324, -0.4064, 0.6562, -0.1700]) tensor([0.0404, 0.1859, 0.5381, 0.2355]) -Greedy action tensor([-1.8441, -0.4287, 0.6213, -0.1254]) tensor([0.0445, 0.1833, 0.5239, 0.2483]) -Greedy action tensor([-1.9132, -0.4238, 0.6630, -0.1539]) tensor([0.0410, 0.1818, 0.5390, 0.2382]) -Greedy action tensor([-1.9202, -0.4467, 0.6571, -0.1671]) tensor([0.0412, 0.1796, 0.5417, 0.2376]) -Greedy action tensor([-1.9385, -0.4386, 0.6637, -0.1772]) tensor([0.0403, 0.1807, 0.5442, 0.2347]) -Greedy action tensor([-1.8857, -0.4182, 0.6328, -0.1546]) tensor([0.0427, 0.1854, 0.5304, 0.2414]) -Greedy action tensor([-1.8918, -0.4579, 0.6490, -0.1507]) tensor([0.0424, 0.1778, 0.5380, 0.2418]) -Greedy action tensor([-1.6446, -0.2615, 0.5879, 0.0751]) tensor([0.0503, 0.2004, 0.4687, 0.2806]) -Greedy action tensor([-0.5967, 1.0113, 0.0508, 0.2872]) tensor([0.0969, 0.4836, 0.1851, 0.2344]) -Greedy action tensor([-1.9153, -0.4355, 0.6527, -0.1648]) tensor([0.0413, 0.1816, 0.5391, 0.2380]) -Greedy action tensor([-1.8734, -0.3779, 0.6339, -0.1395]) tensor([0.0427, 0.1907, 0.5245, 0.2420]) -Greedy action tensor([-1.9158, -0.3524, 0.6426, -0.1498]) tensor([0.0408, 0.1946, 0.5263, 0.2383]) -Greedy action tensor([-1.9143, -0.4157, 0.6540, -0.1631]) tensor([0.0412, 0.1843, 0.5372, 0.2373]) -Greedy action tensor([-1.5444, 0.1897, 0.3893, 0.1188]) tensor([0.0530, 0.3004, 0.3667, 0.2798]) -Greedy action tensor([-1.6029, -0.5676, 0.4946, -0.0713]) tensor([0.0603, 0.1698, 0.4911, 0.2789]) -Greedy action tensor([-1.7787, -0.5023, 0.6008, -0.0931]) tensor([0.0481, 0.1725, 0.5198, 0.2597]) -Greedy action tensor([-1.8947, -0.4608, 0.6407, -0.1625]) tensor([0.0426, 0.1787, 0.5378, 0.2409]) -Greedy action tensor([ 0.5003, -0.3249, 0.6167, 0.7080]) tensor([0.2637, 0.1155, 0.2962, 0.3246]) -Greedy action tensor([-1.8079, -0.2800, 0.6359, -0.0732]) tensor([0.0439, 0.2022, 0.5053, 0.2486]) -Greedy action tensor([-1.7246, -0.4286, 0.5960, -0.0452]) tensor([0.0495, 0.1809, 0.5041, 0.2655]) -Greedy action tensor([-1.7710, -0.4779, 0.5885, -0.0815]) tensor([0.0484, 0.1765, 0.5127, 0.2624]) -Greedy action tensor([-1.9300, -0.4454, 0.6622, -0.1718]) tensor([0.0407, 0.1796, 0.5436, 0.2361]) -Greedy action tensor([-0.8971, 0.9124, 0.1322, -0.3435]) tensor([0.0859, 0.5244, 0.2403, 0.1494]) -Greedy action tensor([-1.8588, -0.2009, 0.5862, -0.1116]) tensor([0.0425, 0.2232, 0.4903, 0.2440]) -Greedy action tensor([-1.8338, -0.4611, 0.6135, -0.1223]) tensor([0.0454, 0.1790, 0.5244, 0.2512]) -Greedy action tensor([ 0.7202, -0.2860, -0.1495, 0.9981]) tensor([0.3221, 0.1177, 0.1350, 0.4252]) -Greedy action tensor([ 0.7105, -0.4514, 0.5806, 0.1417]) tensor([0.3627, 0.1135, 0.3185, 0.2053]) -Greedy action tensor([ 1.3377, -0.1159, -0.0757, 0.6430]) tensor([0.5060, 0.1183, 0.1231, 0.2526]) -Greedy action tensor([ 0.1646, -0.0954, 0.9858, -0.2578]) tensor([0.2128, 0.1641, 0.4837, 0.1395]) -Greedy action tensor([-0.6318, -0.6314, 1.4129, -1.6589]) tensor([0.0992, 0.0992, 0.7662, 0.0355]) -Greedy action tensor([ 0.1154, -0.7293, 0.3171, 1.3819]) tensor([0.1612, 0.0693, 0.1973, 0.5722]) -Greedy action tensor([ 0.2214, -2.1649, -0.3543, -0.0079]) tensor([0.4083, 0.0375, 0.2296, 0.3246]) -Greedy action tensor([-0.5164, 0.0582, 0.2926, -0.0137]) tensor([0.1498, 0.2661, 0.3364, 0.2476]) -Greedy action tensor([-0.2276, 0.8898, -0.0515, -1.0093]) tensor([0.1752, 0.5356, 0.2090, 0.0802]) -Greedy action tensor([-6.8769e-05, -1.0623e+00, 9.9613e-01, 1.4939e-01]) tensor([0.1918, 0.0663, 0.5193, 0.2227]) -Greedy action tensor([-0.6872, -1.0514, -0.9903, 0.1132]) tensor([0.2146, 0.1491, 0.1585, 0.4778]) -Greedy action tensor([-0.4249, -1.2433, -0.0522, -0.9219]) tensor([0.2856, 0.1260, 0.4146, 0.1738]) -Greedy action tensor([ 0.8940, -0.2669, -0.0591, 1.2128]) tensor([0.3253, 0.1019, 0.1254, 0.4474]) -Greedy action tensor([ 0.8081, -0.1764, -0.3439, 0.0233]) tensor([0.4660, 0.1741, 0.1473, 0.2126]) -Greedy action tensor([ 1.0094, -1.8744, 0.9676, -0.0126]) tensor([0.4211, 0.0235, 0.4039, 0.1515]) -Greedy action tensor([-0.0667, 1.1161, 0.1987, -0.5373]) tensor([0.1615, 0.5270, 0.2106, 0.1009]) -Greedy action tensor([ 0.5123, -0.4416, 0.1539, 0.2510]) tensor([0.3504, 0.1350, 0.2448, 0.2698]) -Greedy action tensor([ 0.4014, -1.1887, -0.5589, 0.5326]) tensor([0.3667, 0.0748, 0.1404, 0.4181]) -Greedy action tensor([-0.7839, -1.3986, 0.7364, 0.7149]) tensor([0.0944, 0.0511, 0.4318, 0.4227]) -Greedy action tensor([ 1.1189, -0.7803, 1.5107, -0.4654]) tensor([0.3528, 0.0528, 0.5220, 0.0724]) -Greedy action tensor([-0.5361, 0.6581, 1.2732, -0.4139]) tensor([0.0867, 0.2861, 0.5293, 0.0979]) -Greedy action tensor([-0.2227, -0.7100, 0.2163, -0.1599]) tensor([0.2364, 0.1452, 0.3667, 0.2517]) -Greedy action tensor([-0.2433, -0.2817, -0.8180, 0.2887]) tensor([0.2365, 0.2276, 0.1331, 0.4027]) -Greedy action tensor([-0.1261, -0.8590, 0.1927, -0.8849]) tensor([0.3008, 0.1446, 0.4138, 0.1409]) -Greedy action tensor([ 0.3655, -0.3074, 1.8110, -0.3983]) tensor([0.1608, 0.0820, 0.6823, 0.0749]) -Greedy action tensor([-0.5164, -0.9044, -0.6202, -0.2378]) tensor([0.2563, 0.1739, 0.2311, 0.3387]) -Greedy action tensor([-0.4879, -1.1911, 1.1615, -0.4863]) tensor([0.1299, 0.0643, 0.6758, 0.1301]) -Greedy action tensor([-0.5585, -0.6898, -0.1284, -0.2358]) tensor([0.2085, 0.1829, 0.3206, 0.2880]) -Greedy action tensor([-0.4599, -0.8908, -0.7692, 0.5989]) tensor([0.1899, 0.1234, 0.1394, 0.5474]) -Greedy action tensor([-0.2972, 0.8154, -0.1984, -0.2837]) tensor([0.1624, 0.4939, 0.1792, 0.1645]) -Greedy action tensor([ 0.1340, -1.0687, -0.0171, 1.2924]) tensor([0.1871, 0.0562, 0.1609, 0.5959]) -Greedy action tensor([-0.0596, 0.1510, -0.8355, 0.7102]) tensor([0.2060, 0.2543, 0.0948, 0.4448]) -Greedy action tensor([ 0.0480, -0.5978, 1.0168, -0.8386]) tensor([0.2188, 0.1147, 0.5764, 0.0901]) -Greedy action tensor([-0.1722, -1.2546, 0.3896, -0.0070]) tensor([0.2341, 0.0793, 0.4105, 0.2761]) -Greedy action tensor([ 0.3356, -0.3667, -0.5273, 0.0332]) tensor([0.3764, 0.1865, 0.1588, 0.2782]) -Greedy action tensor([ 0.2422, -1.5078, 0.6910, -0.3531]) tensor([0.3038, 0.0528, 0.4759, 0.1675]) -Greedy action tensor([-0.2026, -0.0748, 0.9664, -0.6725]) tensor([0.1672, 0.1900, 0.5382, 0.1045]) -Greedy action tensor([-0.9670, -0.2337, -0.5371, 0.0140]) tensor([0.1372, 0.2857, 0.2110, 0.3661]) -Greedy action tensor([-0.4098, -0.6206, 1.9121, 0.0366]) tensor([0.0737, 0.0597, 0.7514, 0.1152]) -Greedy action tensor([-0.0121, -1.8555, -0.5805, 0.3928]) tensor([0.3102, 0.0491, 0.1757, 0.4650]) -Greedy action tensor([ 0.1673, -1.7324, -0.0149, 0.1985]) tensor([0.3317, 0.0496, 0.2765, 0.3422]) -Greedy action tensor([ 0.2924, -1.7154, -0.5562, -0.7222]) tensor([0.5195, 0.0698, 0.2224, 0.1884]) -Greedy action tensor([ 0.4477, -0.1519, -0.2873, 0.6596]) tensor([0.3063, 0.1682, 0.1469, 0.3786]) -Greedy action tensor([-0.2315, -1.1556, -1.0608, 0.4002]) tensor([0.2692, 0.1069, 0.1175, 0.5064]) -Greedy action tensor([ 0.3734, -0.0348, -0.3953, 0.3114]) tensor([0.3259, 0.2167, 0.1511, 0.3063]) -Greedy action tensor([-0.6380, -0.1465, -0.3080, -0.3069]) tensor([0.1846, 0.3017, 0.2567, 0.2570]) -Greedy action tensor([ 0.1575, -0.6385, 0.1287, -0.5963]) tensor([0.3456, 0.1559, 0.3358, 0.1627]) -Greedy action tensor([ 0.8146, -0.0412, 0.7244, 0.4084]) tensor([0.3328, 0.1414, 0.3041, 0.2217]) -Greedy action tensor([ 1.3651, -0.5772, -0.1966, 0.1471]) tensor([0.6064, 0.0869, 0.1272, 0.1794]) -Greedy action tensor([-0.2072, 0.3205, -1.2608, 0.0656]) tensor([0.2295, 0.3890, 0.0800, 0.3015]) -Greedy action tensor([-0.5098, -0.9465, -0.7671, 0.4780]) tensor([0.1959, 0.1266, 0.1515, 0.5261]) -Greedy action tensor([-0.7444, -2.1152, 0.1745, 0.8719]) tensor([0.1137, 0.0289, 0.2850, 0.5724]) -Greedy action tensor([-0.5678, -1.6677, 1.5161, -1.3782]) tensor([0.1019, 0.0339, 0.8189, 0.0453]) -Greedy action tensor([-0.8609, -0.3024, -0.3725, -0.7958]) tensor([0.1837, 0.3210, 0.2993, 0.1960]) -Greedy action tensor([-0.1666, -0.5520, 0.8156, -0.8346]) tensor([0.2056, 0.1399, 0.5491, 0.1054]) -Greedy action tensor([1.2291, 0.2513, 0.1432, 0.0809]) tensor([0.4924, 0.1852, 0.1662, 0.1562]) -Greedy action tensor([-0.4434, 0.1177, 1.6971, -1.0639]) tensor([0.0848, 0.1486, 0.7210, 0.0456]) -Greedy action tensor([-0.0235, 0.0372, 0.3959, -0.7190]) tensor([0.2450, 0.2603, 0.3726, 0.1222]) -Greedy action tensor([-0.5726, -0.6320, 0.5450, -1.1063]) tensor([0.1790, 0.1687, 0.5473, 0.1050]) -Greedy action tensor([-0.0312, -1.4636, -0.9057, -0.6527]) tensor([0.4560, 0.1089, 0.1902, 0.2449]) -Greedy action tensor([-0.5772, -0.0561, 0.7059, -0.4613]) tensor([0.1349, 0.2271, 0.4866, 0.1514]) -Greedy action tensor([ 1.2970, -1.6961, 0.8431, 0.2184]) tensor([0.4937, 0.0248, 0.3136, 0.1679]) -Greedy action tensor([ 0.1166, -1.1207, 0.2876, 0.6196]) tensor([0.2421, 0.0703, 0.2873, 0.4004]) -Greedy action tensor([ 0.6257, -0.5970, 0.7378, -0.5757]) tensor([0.3685, 0.1085, 0.4122, 0.1108]) -Greedy action tensor([ 0.7584, -1.1446, 0.0646, 0.3364]) tensor([0.4339, 0.0647, 0.2168, 0.2845]) -Greedy action tensor([ 0.8442, -1.2759, 2.6249, 0.0849]) tensor([0.1329, 0.0160, 0.7889, 0.0622]) -Greedy action tensor([-0.3781, 0.5708, 0.1485, -0.4291]) tensor([0.1606, 0.4148, 0.2719, 0.1526]) -Greedy action tensor([-0.2423, -0.5736, 1.3801, -0.3998]) tensor([0.1309, 0.0940, 0.6632, 0.1119]) -Greedy action tensor([-0.7220, -0.0425, 0.1067, 0.4402]) tensor([0.1182, 0.2332, 0.2707, 0.3779]) -Greedy action tensor([-1.4122, -0.1599, -0.8323, 1.3234]) tensor([0.0461, 0.1612, 0.0823, 0.7105]) -Greedy action tensor([-0.2024, 0.5126, -0.2481, -0.8512]) tensor([0.2211, 0.4520, 0.2113, 0.1156]) -Greedy action tensor([-1.5178, -0.8315, -1.2991, 0.3860]) tensor([0.0914, 0.1815, 0.1137, 0.6133]) -Greedy action tensor([-0.4120, 0.4542, -1.3356, 0.0664]) tensor([0.1856, 0.4413, 0.0737, 0.2994]) -Greedy action tensor([-0.6959, -0.2948, 1.0051, 1.2197]) tensor([0.0677, 0.1012, 0.3711, 0.4600]) -Greedy action tensor([ 0.2231, -0.5837, 0.1774, 0.1990]) tensor([0.2961, 0.1321, 0.2828, 0.2890]) -Greedy action tensor([ 0.3924, -0.2589, 0.3674, -0.8521]) tensor([0.3591, 0.1872, 0.3502, 0.1034]) -Greedy action tensor([ 0.6880, -0.1989, 0.1494, -0.1659]) tensor([0.4130, 0.1701, 0.2410, 0.1758]) -Greedy action tensor([-1.0484, -0.9197, -0.2319, -0.2783]) tensor([0.1524, 0.1734, 0.3449, 0.3293]) -Greedy action tensor([-0.9962, 0.2134, -0.1484, -1.1432]) tensor([0.1325, 0.4440, 0.3092, 0.1143]) -Greedy action tensor([ 0.3455, -0.0900, -0.2765, 0.0826]) tensor([0.3387, 0.2191, 0.1818, 0.2604]) -Greedy action tensor([-0.4458, -0.7489, -0.8970, 0.0412]) tensor([0.2498, 0.1845, 0.1591, 0.4066]) -Greedy action tensor([ 1.2704, 0.0563, -0.8011, 0.5907]) tensor([0.5182, 0.1539, 0.0653, 0.2626]) -Greedy action tensor([ 1.7399, -0.2404, -1.1227, 0.1652]) tensor([0.7132, 0.0984, 0.0407, 0.1477]) -Greedy action tensor([ 1.2403, -0.0212, 0.0217, -0.3379]) tensor([0.5602, 0.1586, 0.1656, 0.1156]) -Greedy action tensor([ 1.5863, -0.3305, -0.8648, 0.6827]) tensor([0.6104, 0.0898, 0.0526, 0.2473]) -Greedy action tensor([ 1.4318, -0.4920, -0.2696, 0.1859]) tensor([0.6188, 0.0904, 0.1129, 0.1780]) -Greedy action tensor([ 1.4988, -0.3949, -0.8848, 0.5029]) tensor([0.6203, 0.0934, 0.0572, 0.2291]) -Greedy action tensor([ 1.2771, -0.6408, -0.5407, 0.6130]) tensor([0.5482, 0.0805, 0.0890, 0.2822]) -Greedy action tensor([ 1.5549, -0.6859, -0.5569, 0.4879]) tensor([0.6364, 0.0677, 0.0770, 0.2189]) -Greedy action tensor([ 2.0266, -0.5970, -0.5319, 0.5258]) tensor([0.7284, 0.0528, 0.0564, 0.1624]) -Greedy action tensor([ 1.8387, -0.7100, 0.0044, 0.0888]) tensor([0.7084, 0.0554, 0.1131, 0.1231]) -Greedy action tensor([ 1.3047, -0.5325, -0.2322, 0.2614]) tensor([0.5792, 0.0922, 0.1245, 0.2040]) -Greedy action tensor([ 1.6399, -0.1379, -0.9344, 0.3868]) tensor([0.6532, 0.1104, 0.0498, 0.1866]) -Greedy action tensor([ 1.3536, -0.4757, -0.2351, 0.5855]) tensor([0.5469, 0.0878, 0.1117, 0.2537]) -Greedy action tensor([ 1.2306, -0.2930, -0.4874, 0.0994]) tensor([0.5814, 0.1267, 0.1043, 0.1876]) -Greedy action tensor([ 2.0964, -0.8881, -0.0233, 0.5826]) tensor([0.7191, 0.0364, 0.0863, 0.1582]) -Greedy action tensor([ 1.5623, -0.5553, -0.6421, 0.4119]) tensor([0.6464, 0.0778, 0.0713, 0.2046]) -Greedy action tensor([ 1.3621, -0.3256, -0.6551, 0.5025]) tensor([0.5743, 0.1062, 0.0764, 0.2431]) -Greedy action tensor([ 1.7154, -0.4777, -0.4945, 0.3359]) tensor([0.6789, 0.0757, 0.0745, 0.1709]) -Greedy action tensor([ 2.8191, -1.8433, 0.1849, 0.3677]) tensor([0.8566, 0.0081, 0.0615, 0.0738]) -Greedy action tensor([ 1.2730, -0.4504, -0.4293, 0.4853]) tensor([0.5508, 0.0983, 0.1004, 0.2505]) -Greedy action tensor([ 0.7766, -0.1085, -0.3161, -0.6778]) tensor([0.5046, 0.2083, 0.1692, 0.1179]) -Greedy action tensor([ 1.5554, -0.5043, -0.9524, 0.5853]) tensor([0.6297, 0.0803, 0.0513, 0.2387]) -Greedy action tensor([ 1.3800, -0.7433, -0.0798, 0.1864]) tensor([0.6042, 0.0723, 0.1403, 0.1832]) -Greedy action tensor([ 1.4185, -0.2961, -0.8050, 0.4940]) tensor([0.5935, 0.1068, 0.0642, 0.2354]) -Greedy action tensor([ 1.2503, -0.8884, -0.3717, 0.3228]) tensor([0.5845, 0.0689, 0.1154, 0.2312]) -Greedy action tensor([ 1.2348, -0.3219, -0.9258, 0.4032]) tensor([0.5677, 0.1197, 0.0654, 0.2472]) -Greedy action tensor([ 2.2480, -1.0793, -0.2587, 0.6838]) tensor([0.7538, 0.0271, 0.0615, 0.1577]) -Greedy action tensor([ 1.6198, -0.8314, 0.0687, 0.1194]) tensor([0.6574, 0.0567, 0.1394, 0.1466]) -Greedy action tensor([ 1.2404, -0.5936, -0.3166, 0.2964]) tensor([0.5683, 0.0908, 0.1198, 0.2211]) -Greedy action tensor([ 0.7735, 0.0873, -0.1261, -0.0509]) tensor([0.4258, 0.2144, 0.1732, 0.1867]) -Greedy action tensor([ 1.2941, 0.0220, -0.6525, 0.3676]) tensor([0.5498, 0.1541, 0.0785, 0.2177]) -Greedy action tensor([ 1.0791, -0.4187, -0.6660, 0.7014]) tensor([0.4799, 0.1073, 0.0838, 0.3290]) -Greedy action tensor([ 2.1174, -0.0321, -0.1564, -0.0441]) tensor([0.7493, 0.0873, 0.0771, 0.0863]) -Greedy action tensor([ 1.3178, -0.3629, -0.0210, 0.2613]) tensor([0.5568, 0.1037, 0.1460, 0.1936]) -Greedy action tensor([ 1.7424, -0.1308, -0.7739, 0.4785]) tensor([0.6592, 0.1013, 0.0532, 0.1863]) -Greedy action tensor([ 1.2765, -0.2381, -1.0570, 0.1686]) tensor([0.6071, 0.1335, 0.0589, 0.2005]) -Greedy action tensor([ 1.7546, 0.2685, -0.3665, 0.3594]) tensor([0.6274, 0.1419, 0.0752, 0.1555]) -Greedy action tensor([ 1.3891, -0.8625, -0.2532, 0.3908]) tensor([0.5998, 0.0631, 0.1161, 0.2210]) -Greedy action tensor([ 1.1846, -0.2653, -0.8929, 0.4646]) tensor([0.5415, 0.1270, 0.0678, 0.2636]) -Greedy action tensor([ 1.9376, -0.4199, -0.2174, 0.6705]) tensor([0.6701, 0.0634, 0.0777, 0.1887]) -Greedy action tensor([ 1.3090, -0.1615, -0.6629, 0.3613]) tensor([0.5693, 0.1308, 0.0792, 0.2207]) -Greedy action tensor([ 1.4559, -0.6490, -0.6827, 0.6988]) tensor([0.5852, 0.0713, 0.0690, 0.2745]) -Greedy action tensor([ 1.4817, -1.0283, -0.1925, 0.4850]) tensor([0.6106, 0.0496, 0.1145, 0.2253]) -Greedy action tensor([ 1.1327, -0.0901, -0.5722, 0.6482]) tensor([0.4780, 0.1407, 0.0869, 0.2944]) -Greedy action tensor([ 2.7154, -1.4476, -0.1171, 0.5847]) tensor([0.8381, 0.0130, 0.0493, 0.0995]) -Greedy action tensor([ 3.2966, -1.7456, 0.4902, 0.1006]) tensor([0.9027, 0.0058, 0.0545, 0.0369]) -Greedy action tensor([ 0.5640, -0.2847, -0.1195, 0.0451]) tensor([0.3956, 0.1693, 0.1997, 0.2354]) -Greedy action tensor([ 1.4189, -0.7245, 0.1024, -0.0633]) tensor([0.6202, 0.0727, 0.1663, 0.1409]) -Greedy action tensor([ 0.8486, -0.1619, -0.0057, -0.0171]) tensor([0.4524, 0.1647, 0.1925, 0.1903]) -Greedy action tensor([ 2.0533, -1.1299, -0.2343, 0.9548]) tensor([0.6773, 0.0281, 0.0688, 0.2258]) -Greedy action tensor([ 1.2099, -0.6164, -0.0333, 0.0745]) tensor([0.5647, 0.0909, 0.1629, 0.1814]) -Greedy action tensor([ 1.5660, -0.8919, -0.2972, 0.7642]) tensor([0.5920, 0.0507, 0.0919, 0.2655]) -Greedy action tensor([ 1.2102, -0.3895, -0.0085, 0.3633]) tensor([0.5191, 0.1048, 0.1535, 0.2226]) -Greedy action tensor([ 1.5063, -0.6788, -0.0881, -0.0606]) tensor([0.6561, 0.0738, 0.1332, 0.1369]) -Greedy action tensor([ 1.3462, -0.5232, -0.2882, 0.3912]) tensor([0.5767, 0.0889, 0.1125, 0.2219]) -Greedy action tensor([ 1.1739, -0.4417, -0.1826, 0.0348]) tensor([0.5629, 0.1119, 0.1450, 0.1802]) -Greedy action tensor([ 1.2363, 0.0536, -0.3423, 0.1870]) tensor([0.5368, 0.1645, 0.1107, 0.1880]) -Greedy action tensor([ 2.4822, -1.2474, -0.4216, 0.5159]) tensor([0.8205, 0.0197, 0.0450, 0.1149]) -Greedy action tensor([ 1.6498, -0.9481, -0.3583, 0.5797]) tensor([0.6445, 0.0480, 0.0865, 0.2210]) -Greedy action tensor([ 1.8918, -0.2205, -0.4844, -0.1055]) tensor([0.7410, 0.0896, 0.0688, 0.1005]) -Greedy action tensor([ 2.2315, -0.8611, -0.3721, 0.2758]) tensor([0.7931, 0.0360, 0.0587, 0.1122]) -Greedy action tensor([ 1.3766, 0.5095, -0.0724, -1.0044]) tensor([0.5723, 0.2404, 0.1344, 0.0529]) -Greedy action tensor([ 1.2673, -0.3317, -0.4412, 0.2734]) tensor([0.5703, 0.1153, 0.1033, 0.2111]) -Greedy action tensor([ 1.2304, 0.1869, 0.4457, -0.7930]) tensor([0.5153, 0.1815, 0.2351, 0.0681]) -Greedy action tensor([ 1.2157, -0.3966, -0.6024, 0.2931]) tensor([0.5684, 0.1134, 0.0923, 0.2259]) -Greedy action tensor([ 1.2810, 0.2789, -0.2321, -0.4792]) tensor([0.5684, 0.2087, 0.1252, 0.0978]) -Greedy action tensor([ 1.0294, -0.1681, -0.7236, 0.3181]) tensor([0.5086, 0.1536, 0.0881, 0.2497]) -Greedy action tensor([ 1.6953, -0.3870, -0.2937, 0.3691]) tensor([0.6549, 0.0816, 0.0896, 0.1739]) -Greedy action tensor([ 2.3266, -0.4183, -0.3596, 0.4411]) tensor([0.7787, 0.0500, 0.0531, 0.1182]) -Greedy action tensor([ 1.4511, -0.4965, -0.3791, 0.4838]) tensor([0.5941, 0.0847, 0.0953, 0.2258]) -Greedy action tensor([ 1.2535, -0.6359, -0.2320, 0.3607]) tensor([0.5596, 0.0846, 0.1267, 0.2292]) -Greedy action tensor([ 1.5350, -0.0120, 0.0598, 0.6245]) tensor([0.5423, 0.1155, 0.1240, 0.2182]) -Greedy action tensor([ 1.4041, -0.6269, -0.4009, 0.4850]) tensor([0.5901, 0.0774, 0.0971, 0.2354]) -Greedy action tensor([ 1.7308, -0.5542, -0.3982, 0.4649]) tensor([0.6655, 0.0677, 0.0792, 0.1876]) -Greedy action tensor([ 6.1809e-01, -1.4902e-01, -4.3605e-04, -1.8082e-01]) tensor([0.4077, 0.1893, 0.2196, 0.1834]) -Greedy action tensor([ 1.4093, -0.4419, -0.5022, 0.3094]) tensor([0.6106, 0.0959, 0.0903, 0.2033]) -Greedy action tensor([ 1.8743, -1.2430, -0.0633, 0.6372]) tensor([0.6763, 0.0299, 0.0974, 0.1963]) -Greedy action tensor([ 2.0362, -1.0484, 0.1221, 1.0196]) tensor([0.6431, 0.0294, 0.0948, 0.2327]) -Greedy action tensor([ 1.8400, -0.9207, -0.4374, 0.3810]) tensor([0.7152, 0.0452, 0.0733, 0.1663]) -Greedy action tensor([ 1.4287, -0.4456, -0.1159, 0.3682]) tensor([0.5837, 0.0896, 0.1246, 0.2021]) -Greedy action tensor([ 1.4175, -0.4416, -0.3486, 0.1952]) tensor([0.6167, 0.0961, 0.1055, 0.1817]) -Greedy action tensor([ 0.9114, -0.7003, -0.0199, -0.4367]) tensor([0.5396, 0.1077, 0.2126, 0.1401]) -Greedy action tensor([ 0.6432, 0.1911, -0.2223, -0.0473]) tensor([0.3909, 0.2487, 0.1645, 0.1959]) -Greedy action tensor([ 0.5751, -0.2856, -0.0972, -0.1584]) tensor([0.4143, 0.1752, 0.2115, 0.1990]) -Greedy action tensor([ 0.6963, -0.2607, -0.0612, -0.1924]) tensor([0.4417, 0.1696, 0.2071, 0.1816]) -Greedy action tensor([ 0.8483, -0.4426, -0.0502, -0.3065]) tensor([0.5007, 0.1377, 0.2039, 0.1578]) -Greedy action tensor([ 0.5126, -0.3899, -0.0611, -0.1064]) tensor([0.3988, 0.1617, 0.2247, 0.2148]) -Greedy action tensor([ 0.5469, -0.3168, -0.0645, -0.4312]) tensor([0.4273, 0.1802, 0.2319, 0.1607]) -Greedy action tensor([ 0.4856, -0.1709, -0.2535, -0.3947]) tensor([0.4148, 0.2151, 0.1981, 0.1720]) -Greedy action tensor([ 0.8874, -0.6204, 0.0500, -0.2849]) tensor([0.5092, 0.1127, 0.2204, 0.1577]) -Greedy action tensor([ 0.4355, -0.1634, -0.2834, -0.4825]) tensor([0.4105, 0.2255, 0.2000, 0.1639]) -Greedy action tensor([ 0.8167, -0.3411, 0.0577, -0.2331]) tensor([0.4690, 0.1473, 0.2195, 0.1641]) -Greedy action tensor([ 0.5929, -0.3353, -0.0806, -0.0915]) tensor([0.4150, 0.1640, 0.2116, 0.2093]) -Greedy action tensor([ 0.4868, -0.2303, -0.0283, -0.4550]) tensor([0.4040, 0.1972, 0.2413, 0.1575]) -Greedy action tensor([ 0.6137, -0.3072, -0.0542, -0.0478]) tensor([0.4120, 0.1641, 0.2113, 0.2126]) -Greedy action tensor([ 0.8217, -0.4938, -0.0244, -0.4281]) tensor([0.5040, 0.1353, 0.2163, 0.1444]) -Greedy action tensor([ 0.3707, -0.1974, 0.2178, -0.4841]) tensor([0.3508, 0.1988, 0.3011, 0.1492]) -Greedy action tensor([ 0.5970, -0.1436, -0.0908, -0.1667]) tensor([0.4089, 0.1950, 0.2056, 0.1905]) -Greedy action tensor([ 0.2990, -0.1968, -0.0211, -0.2601]) tensor([0.3440, 0.2095, 0.2498, 0.1967]) -Greedy action tensor([ 0.5447, -0.1529, -0.0225, -0.0279]) tensor([0.3804, 0.1893, 0.2157, 0.2146]) -Greedy action tensor([ 0.5433, -0.2914, -0.0647, -0.2606]) tensor([0.4122, 0.1789, 0.2244, 0.1845]) -Greedy action tensor([ 0.7086, -0.5912, 0.0117, -0.1862]) tensor([0.4588, 0.1251, 0.2286, 0.1875]) -Greedy action tensor([ 0.6779, -0.4832, -0.1498, -0.1327]) tensor([0.4556, 0.1427, 0.1991, 0.2026]) -Greedy action tensor([ 0.5854, -0.3775, -0.1139, -0.1935]) tensor([0.4278, 0.1633, 0.2126, 0.1963]) -Greedy action tensor([ 1.2578, -0.9387, 0.1304, -0.7890]) tensor([0.6393, 0.0711, 0.2071, 0.0826]) -Greedy action tensor([ 0.7699, -0.5022, 0.0277, -0.3916]) tensor([0.4832, 0.1354, 0.2301, 0.1513]) -Greedy action tensor([ 0.6057, -0.6768, 0.0640, -0.2578]) tensor([0.4385, 0.1216, 0.2551, 0.1849]) -Greedy action tensor([ 0.8969, -0.0404, 0.0540, -0.1796]) tensor([0.4623, 0.1811, 0.1990, 0.1576]) -Greedy action tensor([ 0.1256, 0.0677, 0.0569, -0.4186]) tensor([0.2892, 0.2729, 0.2700, 0.1678]) -Greedy action tensor([ 0.6764, -0.4074, -0.1265, -0.1371]) tensor([0.4485, 0.1517, 0.2009, 0.1988]) -Greedy action tensor([ 0.8825, -0.5821, -0.1577, -0.6478]) tensor([0.5552, 0.1284, 0.1962, 0.1202]) -Greedy action tensor([ 0.7074, -0.2368, 0.0231, -0.3257]) tensor([0.4446, 0.1729, 0.2243, 0.1582]) -Greedy action tensor([ 0.5124, -0.1534, 0.0841, -0.3732]) tensor([0.3879, 0.1993, 0.2528, 0.1600]) -Greedy action tensor([ 0.2984, -0.0086, 0.0118, -0.4270]) tensor([0.3366, 0.2476, 0.2527, 0.1630]) -Greedy action tensor([ 0.6779, -0.3728, -0.2036, -0.1662]) tensor([0.4558, 0.1594, 0.1888, 0.1960]) -Greedy action tensor([ 0.5511, -0.2884, -0.0649, -0.1974]) tensor([0.4090, 0.1767, 0.2209, 0.1935]) -Greedy action tensor([ 1.0503, -0.6358, -0.0947, -0.3754]) tensor([0.5735, 0.1062, 0.1825, 0.1378]) -Greedy action tensor([ 0.9341, -0.2820, 0.0955, -0.3133]) tensor([0.4960, 0.1470, 0.2144, 0.1425]) -Greedy action tensor([ 0.7799, -0.7668, 0.1615, -0.6802]) tensor([0.5041, 0.1073, 0.2716, 0.1170]) -Greedy action tensor([ 0.9888, -0.6064, -0.1967, -0.4249]) tensor([0.5709, 0.1158, 0.1745, 0.1389]) -Greedy action tensor([ 0.4754, -0.3615, -0.0682, -0.2705]) tensor([0.4019, 0.1741, 0.2334, 0.1906]) -Greedy action tensor([ 0.7306, -0.5532, 0.0800, -0.5653]) tensor([0.4825, 0.1337, 0.2518, 0.1320]) -Greedy action tensor([ 0.4213, -0.1008, 0.0065, -0.2712]) tensor([0.3631, 0.2154, 0.2398, 0.1817]) -Greedy action tensor([ 0.7936, -0.3936, 0.0389, -0.7644]) tensor([0.5036, 0.1536, 0.2368, 0.1060]) -Greedy action tensor([ 0.2780, -0.0716, -0.0246, -0.4103]) tensor([0.3394, 0.2393, 0.2508, 0.1705]) -Greedy action tensor([ 0.4804, -0.0818, -0.0266, 0.0048]) tensor([0.3579, 0.2040, 0.2156, 0.2225]) -Greedy action tensor([ 1.1912, -0.7715, -0.0422, -0.6789]) tensor([0.6306, 0.0886, 0.1837, 0.0972]) -Greedy action tensor([ 1.0236, -0.7788, 0.2811, -0.5794]) tensor([0.5429, 0.0895, 0.2584, 0.1093]) -Greedy action tensor([ 0.8973, -0.4656, -0.1775, -0.2661]) tensor([0.5236, 0.1340, 0.1788, 0.1636]) -Greedy action tensor([ 0.7876, -0.3726, 0.0488, -0.3276]) tensor([0.4719, 0.1479, 0.2254, 0.1547]) -Greedy action tensor([ 0.5916, -0.3374, -0.0249, -0.1972]) tensor([0.4186, 0.1653, 0.2259, 0.1902]) -Greedy action tensor([ 0.9595, -0.5878, 0.0962, -0.5151]) tensor([0.5366, 0.1142, 0.2263, 0.1228]) -Greedy action tensor([ 0.6487, -0.5107, -0.0989, -0.0745]) tensor([0.4401, 0.1380, 0.2084, 0.2135]) -Greedy action tensor([ 0.4671, -0.2237, -0.1659, -0.0089]) tensor([0.3769, 0.1889, 0.2001, 0.2341]) -Greedy action tensor([ 0.7946, -0.3572, -0.0754, -0.1628]) tensor([0.4719, 0.1492, 0.1977, 0.1812]) -Greedy action tensor([ 0.9388, -0.6933, -0.0191, -0.4197]) tensor([0.5446, 0.1065, 0.2090, 0.1400]) -Greedy action tensor([ 0.8787, -0.6161, -0.0519, -0.3427]) tensor([0.5226, 0.1172, 0.2061, 0.1541]) -Greedy action tensor([ 0.7008, -0.3389, -0.1102, -0.0902]) tensor([0.4442, 0.1570, 0.1974, 0.2014]) -Greedy action tensor([ 0.6537, -0.3654, -0.1503, -0.1933]) tensor([0.4470, 0.1613, 0.2000, 0.1916]) -Greedy action tensor([ 0.8606, -0.6265, -0.1274, -0.2092]) tensor([0.5151, 0.1164, 0.1918, 0.1767]) -Greedy action tensor([ 0.4619, -0.1173, -0.0567, 0.0223]) tensor([0.3572, 0.2001, 0.2126, 0.2301]) -Greedy action tensor([ 0.7147, -0.4184, -0.1406, -0.3855]) tensor([0.4808, 0.1548, 0.2044, 0.1600]) -Greedy action tensor([ 0.8520, -0.7779, -0.0640, -0.2555]) tensor([0.5191, 0.1017, 0.2077, 0.1715]) -Greedy action tensor([ 1.1345, -0.5221, -0.2493, -0.7641]) tensor([0.6285, 0.1199, 0.1575, 0.0941]) -Greedy action tensor([ 1.0504, -1.0455, 0.0506, -0.4950]) tensor([0.5868, 0.0722, 0.2159, 0.1251]) -Greedy action tensor([ 0.3835, -0.3369, -0.0714, -0.1750]) tensor([0.3713, 0.1807, 0.2356, 0.2124]) -Greedy action tensor([ 0.7238, -0.5326, 0.1755, -0.1559]) tensor([0.4391, 0.1250, 0.2538, 0.1822]) -Greedy action tensor([ 0.5898, -0.2772, -0.0048, -0.3117]) tensor([0.4205, 0.1767, 0.2320, 0.1707]) -Greedy action tensor([ 0.7718, -0.6041, -0.0941, -0.3212]) tensor([0.4979, 0.1258, 0.2094, 0.1669]) -Greedy action tensor([ 0.5645, -0.6754, -0.1546, -0.4512]) tensor([0.4676, 0.1353, 0.2278, 0.1693]) -Greedy action tensor([ 1.4374, -0.7582, -0.2625, -0.4586]) tensor([0.6924, 0.0771, 0.1265, 0.1040]) -Greedy action tensor([ 0.5688, -0.2516, -0.1153, -0.1702]) tensor([0.4128, 0.1818, 0.2083, 0.1972]) -Greedy action tensor([ 1.2535, -0.6075, -0.1721, -0.4136]) tensor([0.6311, 0.0981, 0.1517, 0.1191]) -Greedy action tensor([ 0.5171, -0.3033, -0.1458, -0.0363]) tensor([0.3952, 0.1740, 0.2036, 0.2272]) -Greedy action tensor([ 0.6166, -0.0717, -0.1704, -0.4596]) tensor([0.4351, 0.2186, 0.1980, 0.1483]) -Greedy action tensor([ 0.9055, -0.6717, -0.1366, -0.3550]) tensor([0.5427, 0.1121, 0.1914, 0.1538]) -Greedy action tensor([ 0.8759, -0.7912, 0.1858, -0.3296]) tensor([0.5026, 0.0949, 0.2520, 0.1505]) -Greedy action tensor([ 0.6002, -0.4231, 0.0537, -0.2778]) tensor([0.4248, 0.1527, 0.2460, 0.1766]) -Greedy action tensor([ 0.9755, -0.4738, -0.1312, -0.2999]) tensor([0.5421, 0.1272, 0.1792, 0.1514]) -Greedy action tensor([ 0.8757, -0.4528, 0.0027, -0.2730]) tensor([0.5001, 0.1325, 0.2089, 0.1586]) -Greedy action tensor([ 0.8549, -0.3719, -0.1246, -0.0868]) tensor([0.4857, 0.1424, 0.1824, 0.1894]) -Greedy action tensor([ 0.8353, -0.5206, -0.1638, -0.3913]) tensor([0.5210, 0.1343, 0.1919, 0.1528]) -Greedy action tensor([-1.7722, -0.4067, 0.5958, -0.0772]) tensor([0.0475, 0.1862, 0.5074, 0.2589]) -Greedy action tensor([-1.8224, -0.4417, 0.6273, -0.0923]) tensor([0.0450, 0.1791, 0.5217, 0.2541]) -Greedy action tensor([-1.6886, -0.2431, 0.5893, -0.0177]) tensor([0.0492, 0.2089, 0.4802, 0.2617]) -Greedy action tensor([-1.5788, -0.3605, 0.4671, -0.0265]) tensor([0.0594, 0.2008, 0.4594, 0.2804]) -Greedy action tensor([-1.8253, -0.3227, 0.5927, -0.1237]) tensor([0.0450, 0.2024, 0.5056, 0.2470]) -Greedy action tensor([-1.3946, -0.4620, 0.5978, 0.3163]) tensor([0.0609, 0.1549, 0.4469, 0.3373]) -Greedy action tensor([-1.9209, -0.4551, 0.6565, -0.1747]) tensor([0.0413, 0.1788, 0.5433, 0.2366]) -Greedy action tensor([-1.7797, -0.3814, 0.6159, -0.0574]) tensor([0.0463, 0.1872, 0.5076, 0.2589]) -Greedy action tensor([-1.9447, -0.4509, 0.6682, -0.1803]) tensor([0.0401, 0.1787, 0.5471, 0.2342]) -Greedy action tensor([-1.9235, -0.4417, 0.6606, -0.1644]) tensor([0.0409, 0.1799, 0.5418, 0.2374]) -Greedy action tensor([-1.1279, 0.3466, 0.2574, 0.4940]) tensor([0.0693, 0.3028, 0.2770, 0.3509]) -Greedy action tensor([-1.9305, -0.4445, 0.6621, -0.1722]) tensor([0.0407, 0.1798, 0.5436, 0.2360]) -Greedy action tensor([-1.8804, -0.3640, 0.6369, -0.1321]) tensor([0.0422, 0.1923, 0.5231, 0.2424]) -Greedy action tensor([-1.7565, -0.3188, 0.6040, -0.0423]) tensor([0.0468, 0.1972, 0.4961, 0.2599]) -Greedy action tensor([-0.6870, -0.2090, 0.1301, 0.3256]) tensor([0.1311, 0.2114, 0.2967, 0.3608]) -Greedy action tensor([-0.5393, -0.4730, 0.1748, 0.0169]) tensor([0.1708, 0.1825, 0.3488, 0.2979]) -Greedy action tensor([-1.3498, -0.3519, 0.4957, 0.2837]) tensor([0.0659, 0.1789, 0.4175, 0.3377]) -Greedy action tensor([-1.8302, -0.4691, 0.6162, -0.1230]) tensor([0.0455, 0.1776, 0.5258, 0.2511]) -Greedy action tensor([-1.6050, 0.3229, 0.3961, 0.0303]) tensor([0.0490, 0.3370, 0.3626, 0.2515]) -Greedy action tensor([-1.5831, -0.4421, 0.5259, -0.1657]) tensor([0.0606, 0.1897, 0.4995, 0.2501]) -Greedy action tensor([-1.3845, -0.4567, 0.4851, 0.4207]) tensor([0.0621, 0.1571, 0.4030, 0.3778]) -Greedy action tensor([-1.9289, -0.4179, 0.6576, -0.1651]) tensor([0.0406, 0.1838, 0.5389, 0.2367]) -Greedy action tensor([-1.8015, -0.3053, 0.6145, -0.0874]) tensor([0.0450, 0.2010, 0.5042, 0.2499]) -Greedy action tensor([-1.9240, -0.4428, 0.6662, -0.1587]) tensor([0.0407, 0.1790, 0.5425, 0.2378]) -Greedy action tensor([-1.8824, -0.4528, 0.6397, -0.1500]) tensor([0.0429, 0.1794, 0.5349, 0.2428]) -Greedy action tensor([-1.4591, -0.5223, 0.8643, -0.0219]) tensor([0.0556, 0.1420, 0.5682, 0.2342]) -Greedy action tensor([-1.8371, -0.5002, 0.6135, -0.1388]) tensor([0.0457, 0.1741, 0.5303, 0.2499]) -Greedy action tensor([-1.4091, 0.6088, 0.2490, 0.2127]) tensor([0.0531, 0.3994, 0.2787, 0.2688]) -Greedy action tensor([-1.8918, -0.4809, 0.6478, -0.1469]) tensor([0.0426, 0.1745, 0.5394, 0.2436]) -Greedy action tensor([-1.8964, -0.4348, 0.6467, -0.1600]) tensor([0.0422, 0.1819, 0.5365, 0.2394]) -Greedy action tensor([-1.6860, -0.4202, 0.6297, 0.0459]) tensor([0.0492, 0.1744, 0.4984, 0.2780]) -Greedy action tensor([-1.3468, 0.5272, 0.2774, -0.0178]) tensor([0.0611, 0.3980, 0.3100, 0.2308]) -Greedy action tensor([-1.9294, -0.4495, 0.6724, -0.1669]) tensor([0.0405, 0.1778, 0.5459, 0.2358]) -Greedy action tensor([-1.8742, -0.1610, 0.5858, -0.1352]) tensor([0.0418, 0.2316, 0.4889, 0.2377]) -Greedy action tensor([-0.8912, 0.1345, 0.2256, 0.0284]) tensor([0.1069, 0.2982, 0.3267, 0.2682]) -Greedy action tensor([-1.6382, -0.4341, 0.5880, 0.1054]) tensor([0.0518, 0.1726, 0.4796, 0.2960]) -Greedy action tensor([-1.8391, -0.4426, 0.6087, -0.1236]) tensor([0.0451, 0.1823, 0.5217, 0.2508]) -Greedy action tensor([-1.2402, -0.2464, 0.8800, 1.0206]) tensor([0.0462, 0.1249, 0.3853, 0.4435]) -Greedy action tensor([-1.8928, -0.4405, 0.6467, -0.1503]) tensor([0.0423, 0.1806, 0.5357, 0.2414]) -Greedy action tensor([-1.0816, 0.0591, 0.3888, 0.3806]) tensor([0.0782, 0.2445, 0.3400, 0.3373]) -Greedy action tensor([-1.8969, -0.4409, 0.6455, -0.1547]) tensor([0.0422, 0.1809, 0.5361, 0.2408]) -Greedy action tensor([-1.0877, -0.0035, 0.1848, 0.0369]) tensor([0.0943, 0.2788, 0.3366, 0.2903]) -Greedy action tensor([-1.4805, 0.7843, 0.4812, -0.5770]) tensor([0.0495, 0.4765, 0.3519, 0.1221]) -Greedy action tensor([-1.8174, -0.3933, 0.5990, -0.1303]) tensor([0.0459, 0.1909, 0.5149, 0.2483]) -Greedy action tensor([-1.4836, -0.5701, 0.4593, -0.0420]) tensor([0.0680, 0.1696, 0.4748, 0.2876]) -Greedy action tensor([-1.6100, -0.4805, 0.5199, 0.0172]) tensor([0.0568, 0.1758, 0.4781, 0.2892]) -Greedy action tensor([-1.8027, -0.4697, 0.6210, -0.0523]) tensor([0.0458, 0.1737, 0.5169, 0.2636]) -Greedy action tensor([-1.4307, -0.5176, 0.3980, 0.0747]) tensor([0.0703, 0.1752, 0.4377, 0.3168]) -Greedy action tensor([-1.8737, -0.4577, 0.6364, -0.1481]) tensor([0.0434, 0.1788, 0.5341, 0.2437]) -Greedy action tensor([-1.9150, -0.4493, 0.6518, -0.1678]) tensor([0.0415, 0.1797, 0.5406, 0.2382]) -Greedy action tensor([-1.9252, -0.4271, 0.6568, -0.1693]) tensor([0.0408, 0.1827, 0.5401, 0.2364]) -Greedy action tensor([-1.8847, -0.4495, 0.6405, -0.1509]) tensor([0.0428, 0.1798, 0.5349, 0.2424]) -Greedy action tensor([-1.8430, -0.1916, 0.5732, -0.1414]) tensor([0.0437, 0.2277, 0.4892, 0.2394]) -Greedy action tensor([-1.9330, -0.4453, 0.6702, -0.1698]) tensor([0.0404, 0.1788, 0.5454, 0.2355]) -Greedy action tensor([-1.9069, -0.4374, 0.6727, -0.1535]) tensor([0.0411, 0.1788, 0.5426, 0.2375]) -Greedy action tensor([-1.8950, -0.4374, 0.6408, -0.1469]) tensor([0.0423, 0.1815, 0.5335, 0.2427]) -Greedy action tensor([-1.8179, -0.0904, 0.5476, -0.0797]) tensor([0.0435, 0.2450, 0.4637, 0.2477]) -Greedy action tensor([-1.8940, -0.2998, 0.6153, -0.1562]) tensor([0.0418, 0.2060, 0.5144, 0.2378]) -Greedy action tensor([-1.8794, -0.4683, 0.6408, -0.1336]) tensor([0.0430, 0.1763, 0.5344, 0.2463]) -Greedy action tensor([-1.9037, -0.3943, 0.6427, -0.1567]) tensor([0.0416, 0.1883, 0.5312, 0.2388]) -Greedy action tensor([-1.9432, -0.4475, 0.6672, -0.1787]) tensor([0.0402, 0.1792, 0.5462, 0.2344]) -Greedy action tensor([-0.4045, 1.0816, 0.0167, 0.4251]) tensor([0.1083, 0.4785, 0.1650, 0.2482]) -Greedy action tensor([-1.8145, -0.2004, 0.5561, -0.1157]) tensor([0.0451, 0.2263, 0.4823, 0.2463]) -Greedy action tensor([-1.6781, -0.5147, 0.5532, -0.0065]) tensor([0.0531, 0.1700, 0.4944, 0.2825]) -Greedy action tensor([-1.6284, -0.5556, 0.5055, -0.0327]) tensor([0.0578, 0.1690, 0.4882, 0.2850]) -Greedy action tensor([-1.9303, -0.4408, 0.6637, -0.1725]) tensor([0.0406, 0.1801, 0.5437, 0.2356]) -Greedy action tensor([-1.9348, -0.4487, 0.6620, -0.1770]) tensor([0.0406, 0.1794, 0.5447, 0.2354]) -Greedy action tensor([-1.8531, -0.4559, 0.6422, -0.1132]) tensor([0.0437, 0.1768, 0.5303, 0.2491]) -Greedy action tensor([-1.8214, -0.4052, 0.6548, -0.1049]) tensor([0.0443, 0.1825, 0.5268, 0.2464]) -Greedy action tensor([-1.4911, -0.1695, 0.5959, 0.3159]) tensor([0.0529, 0.1984, 0.4264, 0.3223]) -Greedy action tensor([-1.9191, -0.4085, 0.6512, -0.1618]) tensor([0.0410, 0.1857, 0.5357, 0.2376]) -Greedy action tensor([-1.9340, -0.4392, 0.6637, -0.1724]) tensor([0.0405, 0.1804, 0.5435, 0.2356]) -Greedy action tensor([-1.9157, -0.4423, 0.6554, -0.1654]) tensor([0.0413, 0.1803, 0.5405, 0.2379]) -Greedy action tensor([-1.4671, -0.2962, 0.4944, 0.1789]) tensor([0.0605, 0.1952, 0.4303, 0.3139]) -Greedy action tensor([-0.2120, 0.9001, 0.2385, 0.5411]) tensor([0.1293, 0.3932, 0.2029, 0.2746]) -Greedy action tensor([-0.9831, 0.8667, 0.1018, 0.2406]) tensor([0.0729, 0.4635, 0.2157, 0.2478]) -Greedy action tensor([-0.8846, -0.6799, 0.8571, 0.9303]) tensor([0.0710, 0.0872, 0.4055, 0.4363]) -Greedy action tensor([-1.0478, -0.3666, 0.2156, 0.3720]) tensor([0.0939, 0.1856, 0.3322, 0.3884]) -Greedy action tensor([-1.8540, -0.4325, 0.6187, -0.1324]) tensor([0.0443, 0.1834, 0.5247, 0.2476]) -Greedy action tensor([-1.5951, -0.5137, 0.5019, -0.2324]) tensor([0.0625, 0.1843, 0.5089, 0.2442]) -Greedy action tensor([-1.9039, -0.4761, 0.6728, -0.1511]) tensor([0.0415, 0.1730, 0.5459, 0.2395]) -Greedy action tensor([ 0.6861, -0.0879, 0.6792, 0.4160]) tensor([0.3108, 0.1433, 0.3087, 0.2372]) -Greedy action tensor([ 0.3648, -1.8257, 1.1083, 0.6050]) tensor([0.2229, 0.0249, 0.4688, 0.2834]) -Greedy action tensor([ 0.2164, -1.0255, 0.1083, 0.0563]) tensor([0.3291, 0.0951, 0.2954, 0.2804]) -Greedy action tensor([ 0.4593, -0.2416, 0.4252, -0.2903]) tensor([0.3407, 0.1690, 0.3293, 0.1610]) -Greedy action tensor([-0.6276, -0.8743, 0.4180, -0.6461]) tensor([0.1783, 0.1393, 0.5073, 0.1750]) -Greedy action tensor([-0.1969, -0.1342, 0.0526, -0.9044]) tensor([0.2603, 0.2772, 0.3341, 0.1283]) -Greedy action tensor([ 1.0847, -0.2253, 0.7882, 0.2383]) tensor([0.4095, 0.1105, 0.3044, 0.1756]) -Greedy action tensor([-0.6052, -0.6845, 0.5648, 0.2817]) tensor([0.1320, 0.1220, 0.4254, 0.3205]) -Greedy action tensor([ 0.6878, -1.0482, 0.9819, 0.2792]) tensor([0.3142, 0.0554, 0.4216, 0.2088]) -Greedy action tensor([ 1.1372, -1.8357, 0.0428, 0.5269]) tensor([0.5184, 0.0265, 0.1735, 0.2816]) -Greedy action tensor([ 1.6701, -0.9117, 1.3304, 0.6823]) tensor([0.4630, 0.0350, 0.3296, 0.1724]) -Greedy action tensor([-0.1164, 0.6222, 0.9922, -0.0952]) tensor([0.1400, 0.2929, 0.4241, 0.1430]) -Greedy action tensor([-0.8938, -0.7224, 0.8701, -0.8970]) tensor([0.1109, 0.1316, 0.6470, 0.1105]) -Greedy action tensor([-0.4537, -0.7554, -0.4451, -0.3501]) tensor([0.2593, 0.1917, 0.2615, 0.2875]) -Greedy action tensor([ 0.6642, -1.9191, 0.1893, 0.7804]) tensor([0.3545, 0.0268, 0.2205, 0.3982]) -Greedy action tensor([ 0.5388, -0.7513, 0.4671, 1.4962]) tensor([0.2079, 0.0572, 0.1935, 0.5414]) -Greedy action tensor([-0.9702, -1.0041, 0.1827, -0.4873]) tensor([0.1480, 0.1431, 0.4689, 0.2400]) -Greedy action tensor([-0.5450, -1.9839, 0.0184, -0.3795]) tensor([0.2396, 0.0568, 0.4209, 0.2827]) -Greedy action tensor([ 0.8685, -0.2910, -0.2287, 0.7062]) tensor([0.4004, 0.1256, 0.1336, 0.3404]) -Greedy action tensor([ 0.7289, -0.3276, -0.2458, 0.0291]) tensor([0.4501, 0.1565, 0.1698, 0.2236]) -Greedy action tensor([ 0.4322, 0.0800, -0.3506, 0.0802]) tensor([0.3492, 0.2455, 0.1596, 0.2456]) -Greedy action tensor([-0.2883, -0.8938, -0.4218, 0.4367]) tensor([0.2229, 0.1217, 0.1951, 0.4603]) -Greedy action tensor([-0.4240, -1.0543, 0.6356, -0.6984]) tensor([0.1931, 0.1028, 0.5572, 0.1468]) -Greedy action tensor([ 0.3608, -0.0527, 0.3990, -0.1260]) tensor([0.3017, 0.1995, 0.3134, 0.1854]) -Greedy action tensor([ 0.8280, -0.3342, -0.1252, 0.9642]) tensor([0.3516, 0.1100, 0.1355, 0.4029]) -Greedy action tensor([ 0.2299, -1.0544, 0.1539, 0.4238]) tensor([0.2926, 0.0810, 0.2712, 0.3552]) -Greedy action tensor([-0.6269, -1.6299, -0.4228, -0.4761]) tensor([0.2662, 0.0977, 0.3265, 0.3096]) -Greedy action tensor([-1.2084, -0.1907, -0.3877, 0.3472]) tensor([0.0928, 0.2567, 0.2108, 0.4396]) -Greedy action tensor([ 0.1738, -2.0076, 0.5648, -0.4644]) tensor([0.3206, 0.0362, 0.4739, 0.1693]) -Greedy action tensor([ 0.1472, -0.4616, 0.9643, -0.1486]) tensor([0.2197, 0.1195, 0.4974, 0.1634]) -Greedy action tensor([ 0.4003, 1.1454, -0.7002, -0.1842]) tensor([0.2502, 0.5271, 0.0832, 0.1395]) -Greedy action tensor([-0.2346, -0.7224, 0.0137, -0.8831]) tensor([0.2925, 0.1796, 0.3749, 0.1529]) -Greedy action tensor([ 1.6599, -1.3153, 1.1282, 1.0715]) tensor([0.4558, 0.0233, 0.2678, 0.2531]) -Greedy action tensor([-0.3425, -1.5337, 0.4926, -0.5502]) tensor([0.2262, 0.0687, 0.5213, 0.1838]) -Greedy action tensor([-1.0970, -0.8591, 0.1842, -0.8743]) tensor([0.1405, 0.1782, 0.5058, 0.1755]) -Greedy action tensor([ 1.0964, -0.0215, -0.2429, -0.6994]) tensor([0.5698, 0.1863, 0.1493, 0.0946]) -Greedy action tensor([-0.9416, -0.5759, 1.3780, -1.0897]) tensor([0.0742, 0.1070, 0.7548, 0.0640]) -Greedy action tensor([ 0.2360, -0.3443, -0.5902, -0.2146]) tensor([0.3796, 0.2124, 0.1661, 0.2419]) -Greedy action tensor([-0.1651, 1.3581, 0.4419, -0.6360]) tensor([0.1243, 0.5701, 0.2280, 0.0776]) -Greedy action tensor([-0.4973, -1.3584, -0.9472, -0.3123]) tensor([0.3064, 0.1295, 0.1954, 0.3687]) -Greedy action tensor([ 1.9448, -0.0540, 1.1695, -0.1301]) tensor([0.5809, 0.0787, 0.2675, 0.0729]) -Greedy action tensor([ 0.1240, -0.3234, -0.7103, 0.7497]) tensor([0.2536, 0.1621, 0.1101, 0.4742]) -Greedy action tensor([-0.5140, -0.8847, -0.5598, -0.3156]) tensor([0.2587, 0.1786, 0.2471, 0.3155]) -Greedy action tensor([ 0.5796, -0.2602, 0.4426, -0.0764]) tensor([0.3543, 0.1530, 0.3089, 0.1838]) -Greedy action tensor([ 2.0508, -0.3231, 0.3475, 0.8032]) tensor([0.6400, 0.0596, 0.1165, 0.1838]) -Greedy action tensor([ 0.6913, -1.5139, 0.9944, -0.6829]) tensor([0.3680, 0.0406, 0.4983, 0.0931]) -Greedy action tensor([ 0.2314, -0.4003, 0.1807, -0.2448]) tensor([0.3222, 0.1713, 0.3063, 0.2002]) -Greedy action tensor([ 0.8267, -1.1281, 0.2932, 0.8821]) tensor([0.3590, 0.0508, 0.2106, 0.3795]) -Greedy action tensor([ 0.7196, -0.3180, 0.3344, 0.3001]) tensor([0.3715, 0.1316, 0.2527, 0.2442]) -Greedy action tensor([ 0.4852, -0.1833, 0.8968, -0.5230]) tensor([0.2953, 0.1513, 0.4457, 0.1077]) -Greedy action tensor([-0.2036, -0.8035, -0.0520, 0.6199]) tensor([0.2004, 0.1100, 0.2331, 0.4565]) -Greedy action tensor([ 0.9233, -0.2536, 0.2502, 0.1712]) tensor([0.4367, 0.1346, 0.2228, 0.2059]) -Greedy action tensor([-0.0047, -0.3056, 0.3250, -0.3040]) tensor([0.2583, 0.1912, 0.3591, 0.1915]) -Greedy action tensor([ 0.2350, -0.9969, -0.2130, 0.4349]) tensor([0.3173, 0.0926, 0.2027, 0.3875]) -Greedy action tensor([-0.6445, -1.0767, -1.1600, 0.6747]) tensor([0.1670, 0.1084, 0.0998, 0.6248]) -Greedy action tensor([0.5470, 0.9159, 0.1029, 0.1382]) tensor([0.2665, 0.3854, 0.1709, 0.1771]) -Greedy action tensor([ 0.1858, -0.9479, 1.4417, -0.5147]) tensor([0.1877, 0.0604, 0.6588, 0.0931]) -Greedy action tensor([-0.0781, -1.3357, -0.3409, 0.9652]) tensor([0.2044, 0.0581, 0.1572, 0.5803]) -Greedy action tensor([ 0.2402, -0.6978, 0.4262, 0.0866]) tensor([0.2896, 0.1133, 0.3488, 0.2483]) -Greedy action tensor([-0.3553, -1.2733, 0.1242, 0.0330]) tensor([0.2228, 0.0890, 0.3598, 0.3285]) -Greedy action tensor([-1.0448, -1.4026, -0.0144, 0.5171]) tensor([0.1079, 0.0754, 0.3023, 0.5144]) -Greedy action tensor([0.9574, 0.0300, 0.1760, 0.5137]) tensor([0.4008, 0.1585, 0.1835, 0.2572]) -Greedy action tensor([ 0.1650, -0.3000, -0.6503, 0.4257]) tensor([0.2969, 0.1865, 0.1314, 0.3853]) -Greedy action tensor([-0.6054, -0.6303, 0.1685, -0.1788]) tensor([0.1762, 0.1719, 0.3820, 0.2699]) -Greedy action tensor([-0.3690, -0.4228, -0.0605, -0.1407]) tensor([0.2190, 0.2076, 0.2982, 0.2752]) -Greedy action tensor([-0.4734, -0.2927, -0.4021, -0.6906]) tensor([0.2453, 0.2939, 0.2634, 0.1974]) -Greedy action tensor([ 0.1157, -0.4097, 0.0665, 0.2951]) tensor([0.2674, 0.1581, 0.2546, 0.3199]) -Greedy action tensor([ 0.8587, -0.0649, 0.2783, -0.1286]) tensor([0.4293, 0.1705, 0.2403, 0.1600]) -Greedy action tensor([-1.4474, -0.9590, 0.3497, 0.0394]) tensor([0.0764, 0.1246, 0.4610, 0.3380]) -Greedy action tensor([-0.1647, -1.2002, 0.8235, -0.3901]) tensor([0.2066, 0.0734, 0.5551, 0.1649]) -Greedy action tensor([ 0.5286, -1.1391, -0.2463, -0.4376]) tensor([0.4926, 0.0929, 0.2270, 0.1874]) -Greedy action tensor([ 0.8081, -1.0588, 0.4322, 0.1065]) tensor([0.4279, 0.0662, 0.2938, 0.2121]) -Greedy action tensor([ 0.5762, 0.7418, 0.2757, -0.2623]) tensor([0.2983, 0.3520, 0.2208, 0.1289]) -Greedy action tensor([ 0.2602, -1.1079, 0.2249, -0.4659]) tensor([0.3698, 0.0942, 0.3570, 0.1789]) -Greedy action tensor([ 0.8193, -1.0065, -0.2012, 0.1821]) tensor([0.4877, 0.0786, 0.1758, 0.2579]) -Greedy action tensor([ 1.3916, -1.0089, 0.4369, 0.3515]) tensor([0.5467, 0.0496, 0.2105, 0.1932]) -Greedy action tensor([ 0.3114, 0.5386, 0.7243, -0.2381]) tensor([0.2302, 0.2890, 0.3479, 0.1329]) -Greedy action tensor([-0.3514, -0.4144, 0.0222, -0.5212]) tensor([0.2361, 0.2217, 0.3430, 0.1992]) -Greedy action tensor([ 0.4234, -1.0475, -0.0211, 0.0384]) tensor([0.3920, 0.0900, 0.2513, 0.2667]) -Greedy action tensor([ 0.2859, -1.3297, 0.6514, 0.0678]) tensor([0.2904, 0.0577, 0.4185, 0.2334]) -Greedy action tensor([ 0.6633, -1.4376, -0.3865, 0.1804]) tensor([0.4786, 0.0586, 0.1675, 0.2953]) -Greedy action tensor([ 0.6272, -2.0555, 0.9596, 0.1254]) tensor([0.3259, 0.0223, 0.4545, 0.1973]) -Greedy action tensor([ 1.4740, -0.3893, -0.5005, 0.6971]) tensor([0.5702, 0.0885, 0.0792, 0.2622]) -Greedy action tensor([ 1.6732, -0.7960, -0.4208, 0.4965]) tensor([0.6596, 0.0558, 0.0813, 0.2033]) -Greedy action tensor([ 0.4940, -0.3595, 0.2617, -0.1325]) tensor([0.3632, 0.1547, 0.2879, 0.1941]) -Greedy action tensor([ 1.4959, -0.4477, -0.2301, 0.4334]) tensor([0.6000, 0.0859, 0.1068, 0.2073]) -Greedy action tensor([ 2.1011, -1.0278, -0.3782, 0.5999]) tensor([0.7405, 0.0324, 0.0621, 0.1650]) -Greedy action tensor([ 1.5920, -0.1540, 0.1179, 0.1049]) tensor([0.6137, 0.1071, 0.1405, 0.1387]) -Greedy action tensor([ 1.2990, -0.2699, -0.6192, -0.1480]) tensor([0.6288, 0.1309, 0.0923, 0.1479]) -Greedy action tensor([ 1.7301, -0.4430, -0.4739, 0.6572]) tensor([0.6385, 0.0727, 0.0705, 0.2184]) -Greedy action tensor([ 1.6900, -0.7600, -0.2851, 0.3596]) tensor([0.6714, 0.0579, 0.0932, 0.1775]) -Greedy action tensor([ 1.3286, -0.6461, -0.2405, 0.3962]) tensor([0.5745, 0.0797, 0.1196, 0.2261]) -Greedy action tensor([ 1.1815, -0.2093, -0.0415, 0.1023]) tensor([0.5310, 0.1322, 0.1563, 0.1805]) -Greedy action tensor([ 1.8514, -0.6054, -0.7619, 0.4162]) tensor([0.7158, 0.0613, 0.0525, 0.1704]) -Greedy action tensor([ 1.3523, -0.4291, -0.2706, 0.1142]) tensor([0.6040, 0.1017, 0.1192, 0.1751]) -Greedy action tensor([ 1.9879, -0.7021, -0.3920, 0.5923]) tensor([0.7102, 0.0482, 0.0657, 0.1759]) -Greedy action tensor([ 1.2676, -0.3978, -0.2091, 0.2628]) tensor([0.5606, 0.1060, 0.1280, 0.2053]) -Greedy action tensor([ 1.9520, -0.6717, -0.6578, 0.2751]) tensor([0.7502, 0.0544, 0.0552, 0.1402]) -Greedy action tensor([ 1.1735, -0.0468, -0.5900, 0.4425]) tensor([0.5133, 0.1515, 0.0880, 0.2471]) -Greedy action tensor([ 1.6659, -0.2756, -0.6311, 0.4827]) tensor([0.6450, 0.0925, 0.0649, 0.1976]) -Greedy action tensor([ 1.1591, -0.3382, -0.1431, 0.3025]) tensor([0.5208, 0.1165, 0.1416, 0.2211]) -Greedy action tensor([ 0.5482, -0.4263, 0.0030, 0.1907]) tensor([0.3764, 0.1421, 0.2182, 0.2633]) -Greedy action tensor([ 0.9534, -0.1747, -0.4256, 0.2208]) tensor([0.4864, 0.1574, 0.1225, 0.2338]) -Greedy action tensor([ 2.3621, -1.1974, -0.2627, 0.8759]) tensor([0.7535, 0.0214, 0.0546, 0.1705]) -Greedy action tensor([ 1.4003, -0.3644, -0.3628, 0.4244]) tensor([0.5815, 0.0996, 0.0997, 0.2192]) -Greedy action tensor([ 1.8099, -0.8359, -0.1941, 0.4296]) tensor([0.6862, 0.0487, 0.0925, 0.1726]) -Greedy action tensor([ 1.6529, -0.3384, 0.0040, -0.0586]) tensor([0.6625, 0.0904, 0.1274, 0.1196]) -Greedy action tensor([ 0.2802, -0.2391, -0.1638, 0.1538]) tensor([0.3208, 0.1908, 0.2058, 0.2827]) -Greedy action tensor([ 1.4752, -0.4052, -0.6203, 0.4780]) tensor([0.6081, 0.0928, 0.0748, 0.2243]) -Greedy action tensor([ 1.9234, -1.0626, -0.0812, 0.1225]) tensor([0.7405, 0.0374, 0.0998, 0.1223]) -Greedy action tensor([ 1.7031, -1.0236, -0.2782, 0.2011]) tensor([0.7013, 0.0459, 0.0967, 0.1561]) -Greedy action tensor([ 1.9514, -0.6571, -0.7351, 0.2642]) tensor([0.7537, 0.0555, 0.0513, 0.1395]) -Greedy action tensor([ 1.3647, -0.5362, -0.0951, -0.0666]) tensor([0.6170, 0.0922, 0.1433, 0.1475]) -Greedy action tensor([ 1.7242, -0.5571, -0.7809, 0.3781]) tensor([0.6925, 0.0707, 0.0566, 0.1802]) -Greedy action tensor([ 1.6948, -0.7892, -0.4044, 0.2118]) tensor([0.6979, 0.0582, 0.0855, 0.1584]) -Greedy action tensor([ 1.7543, -0.3862, -0.8993, 0.6688]) tensor([0.6554, 0.0771, 0.0461, 0.2214]) -Greedy action tensor([ 1.5430, -0.5735, -0.3586, 0.1086]) tensor([0.6631, 0.0799, 0.0990, 0.1580]) -Greedy action tensor([ 1.3283, -0.2228, -0.4194, 0.3298]) tensor([0.5699, 0.1208, 0.0993, 0.2100]) -Greedy action tensor([ 1.5474, -0.5186, -0.4294, 0.2715]) tensor([0.6475, 0.0820, 0.0897, 0.1808]) -Greedy action tensor([ 1.9471, 0.3172, -0.2530, 0.1771]) tensor([0.6770, 0.1327, 0.0750, 0.1153]) -Greedy action tensor([ 1.8487, -0.5438, -0.2698, 0.5428]) tensor([0.6745, 0.0617, 0.0811, 0.1827]) -Greedy action tensor([ 1.9264, -0.7364, -0.6521, 0.8595]) tensor([0.6713, 0.0468, 0.0509, 0.2310]) -Greedy action tensor([ 1.2597, -0.2597, -0.6143, 0.5084]) tensor([0.5423, 0.1187, 0.0832, 0.2558]) -Greedy action tensor([ 1.7359, -0.5780, -0.4345, 0.1400]) tensor([0.7063, 0.0698, 0.0806, 0.1432]) -Greedy action tensor([ 1.5324, -0.5604, -0.6174, 0.3946]) tensor([0.6409, 0.0790, 0.0747, 0.2054]) -Greedy action tensor([ 1.7169, 0.4110, -0.4403, 0.1716]) tensor([0.6251, 0.1694, 0.0723, 0.1333]) -Greedy action tensor([ 1.4218, -0.2777, -0.4847, 0.5140]) tensor([0.5764, 0.1054, 0.0857, 0.2325]) -Greedy action tensor([ 1.5307, 0.3687, -0.5100, 0.0461]) tensor([0.5990, 0.1874, 0.0778, 0.1357]) -Greedy action tensor([ 1.0764, -0.1938, -0.7771, 0.0918]) tensor([0.5522, 0.1550, 0.0865, 0.2063]) -Greedy action tensor([ 1.1064, -0.1710, -0.6124, 0.2414]) tensor([0.5322, 0.1483, 0.0954, 0.2241]) -Greedy action tensor([ 1.6905, 0.1211, -0.5900, 0.5744]) tensor([0.6105, 0.1271, 0.0624, 0.2000]) -Greedy action tensor([ 2.6772, -1.2457, 0.0350, 1.0083]) tensor([0.7816, 0.0155, 0.0557, 0.1473]) -Greedy action tensor([ 1.4006, -0.5606, -0.3853, 0.3604]) tensor([0.6018, 0.0847, 0.1009, 0.2127]) -Greedy action tensor([ 1.7695, -0.0962, -0.3595, -0.1007]) tensor([0.7004, 0.1084, 0.0833, 0.1079]) -Greedy action tensor([ 1.7221, -0.7891, -0.3280, 0.6885]) tensor([0.6387, 0.0518, 0.0822, 0.2272]) -Greedy action tensor([ 1.4712, 0.0654, -0.3716, 0.4970]) tensor([0.5615, 0.1377, 0.0889, 0.2120]) -Greedy action tensor([ 1.3386, -0.1981, -0.4526, 0.1129]) tensor([0.5969, 0.1284, 0.0995, 0.1752]) -Greedy action tensor([ 1.9430, -0.7423, -0.1948, 0.5416]) tensor([0.6981, 0.0476, 0.0823, 0.1719]) -Greedy action tensor([ 1.8845, -0.2557, -0.4632, 0.5215]) tensor([0.6807, 0.0801, 0.0651, 0.1742]) -Greedy action tensor([ 1.4330, -0.0927, -0.5820, 0.3602]) tensor([0.5907, 0.1285, 0.0788, 0.2020]) -Greedy action tensor([ 1.2602, -0.4648, -0.7344, 0.3945]) tensor([0.5764, 0.1027, 0.0784, 0.2425]) -Greedy action tensor([ 2.6853, 0.7576, -0.0487, 0.3939]) tensor([0.7624, 0.1109, 0.0495, 0.0771]) -Greedy action tensor([ 1.4477, -0.5223, -0.1410, 0.5046]) tensor([0.5770, 0.0805, 0.1178, 0.2247]) -Greedy action tensor([ 1.5240, -0.4050, -0.3738, 0.6533]) tensor([0.5835, 0.0848, 0.0875, 0.2443]) -Greedy action tensor([ 1.8796, -0.7547, -0.3434, 0.3107]) tensor([0.7203, 0.0517, 0.0780, 0.1500]) -Greedy action tensor([ 1.2809, -0.4862, -0.1753, 0.3421]) tensor([0.5571, 0.0952, 0.1299, 0.2179]) -Greedy action tensor([ 1.0911, -0.2697, -0.3333, 0.1705]) tensor([0.5276, 0.1353, 0.1270, 0.2101]) -Greedy action tensor([ 1.2105, -0.3465, -0.5939, 0.0190]) tensor([0.5956, 0.1255, 0.0980, 0.1809]) -Greedy action tensor([ 1.1964, -0.7779, -0.3258, 0.2759]) tensor([0.5696, 0.0791, 0.1243, 0.2269]) -Greedy action tensor([ 1.7470, -0.6750, -0.3371, 0.1009]) tensor([0.7113, 0.0631, 0.0885, 0.1371]) -Greedy action tensor([ 1.6728, -0.6905, -0.6230, 0.2746]) tensor([0.6936, 0.0653, 0.0698, 0.1713]) -Greedy action tensor([ 0.7968, -0.2802, -0.4000, 0.4321]) tensor([0.4279, 0.1457, 0.1293, 0.2971]) -Greedy action tensor([ 0.8482, -0.1769, 0.0649, -1.1892]) tensor([0.5139, 0.1844, 0.2348, 0.0670]) -Greedy action tensor([ 1.3183, -0.6540, -0.2163, 0.5915]) tensor([0.5440, 0.0757, 0.1173, 0.2630]) -Greedy action tensor([ 2.2912, -0.9358, -0.4582, 0.3842]) tensor([0.7986, 0.0317, 0.0511, 0.1186]) -Greedy action tensor([ 1.8900, -0.6595, 0.0287, -0.1398]) tensor([0.7326, 0.0572, 0.1139, 0.0962]) -Greedy action tensor([ 1.4097, -0.4357, -0.1537, 0.3621]) tensor([0.5820, 0.0919, 0.1219, 0.2042]) -Greedy action tensor([ 1.8546, -0.9672, -0.3565, 0.4878]) tensor([0.7023, 0.0418, 0.0770, 0.1790]) -Greedy action tensor([ 2.0311, -1.0406, -0.1449, 0.6036]) tensor([0.7144, 0.0331, 0.0811, 0.1714]) -Greedy action tensor([ 1.5731, -0.4673, -0.2317, 0.5153]) tensor([0.6091, 0.0792, 0.1002, 0.2115]) -Greedy action tensor([ 1.6926, -0.4654, -0.3987, 0.6618]) tensor([0.6266, 0.0724, 0.0774, 0.2235]) -Greedy action tensor([ 1.1260, -0.1437, -0.9478, 0.4514]) tensor([0.5219, 0.1466, 0.0656, 0.2658]) -Greedy action tensor([ 2.0409, -1.1639, -0.2646, 0.7604]) tensor([0.7051, 0.0286, 0.0703, 0.1960]) -Greedy action tensor([ 0.6416, -0.4282, -0.1693, -0.1685]) tensor([0.4480, 0.1537, 0.1991, 0.1993]) -Greedy action tensor([ 1.1207, -0.4059, 0.2032, -0.5368]) tensor([0.5533, 0.1202, 0.2210, 0.1055]) -Greedy action tensor([ 0.6408, -0.5781, -0.0168, -0.0961]) tensor([0.4362, 0.1289, 0.2260, 0.2088]) -Greedy action tensor([ 0.5488, -0.0078, -0.0999, -0.1475]) tensor([0.3855, 0.2209, 0.2015, 0.1921]) -Greedy action tensor([ 0.8236, -0.2007, -0.0500, -0.0344]) tensor([0.4544, 0.1632, 0.1897, 0.1927]) -Greedy action tensor([ 0.4275, -0.2034, -0.0906, 0.0928]) tensor([0.3517, 0.1871, 0.2095, 0.2517]) -Greedy action tensor([ 0.7209, -0.4190, -0.2098, -0.3229]) tensor([0.4840, 0.1548, 0.1908, 0.1704]) -Greedy action tensor([ 0.9666, -0.7221, -0.0395, -0.5105]) tensor([0.5622, 0.1039, 0.2056, 0.1284]) -Greedy action tensor([ 0.8683, -0.3212, -0.2640, -0.4882]) tensor([0.5307, 0.1615, 0.1710, 0.1367]) -Greedy action tensor([ 1.0110, -0.2972, -0.0012, -0.3504]) tensor([0.5291, 0.1430, 0.1923, 0.1356]) -Greedy action tensor([ 0.7612, -0.3510, -0.1211, -0.0731]) tensor([0.4594, 0.1511, 0.1901, 0.1995]) -Greedy action tensor([ 0.9696, -0.5202, 0.0995, -0.2373]) tensor([0.5145, 0.1160, 0.2156, 0.1539]) -Greedy action tensor([ 0.6167, -0.6173, 0.0378, -0.1709]) tensor([0.4336, 0.1262, 0.2430, 0.1972]) -Greedy action tensor([ 0.5533, -0.0615, -0.0222, -0.3798]) tensor([0.4006, 0.2166, 0.2253, 0.1576]) -Greedy action tensor([ 0.4399, -0.1207, -0.0028, -0.4014]) tensor([0.3782, 0.2159, 0.2429, 0.1630]) -Greedy action tensor([ 0.4248, 0.0211, 0.3204, -0.1549]) tensor([0.3196, 0.2134, 0.2879, 0.1790]) -Greedy action tensor([ 0.4915, -0.1877, -0.0274, -0.4528]) tensor([0.4014, 0.2035, 0.2389, 0.1561]) -Greedy action tensor([ 0.6720, -0.2926, -0.0769, -0.2841]) tensor([0.4467, 0.1703, 0.2113, 0.1717]) -Greedy action tensor([ 0.7944, -0.4673, -0.1458, -0.2298]) tensor([0.4919, 0.1393, 0.1921, 0.1766]) -Greedy action tensor([ 0.7195, -0.2298, 0.0876, -0.4310]) tensor([0.4474, 0.1732, 0.2378, 0.1416]) -Greedy action tensor([ 0.5066, -0.1980, 0.1122, -0.3803]) tensor([0.3875, 0.1916, 0.2612, 0.1596]) -Greedy action tensor([ 0.8544, -0.8197, -0.0299, -0.4912]) tensor([0.5374, 0.1007, 0.2219, 0.1399]) -Greedy action tensor([ 0.7274, -0.9170, -0.0716, -0.4384]) tensor([0.5116, 0.0988, 0.2301, 0.1595]) -Greedy action tensor([ 0.7840, -0.1468, -0.0267, -0.1504]) tensor([0.4481, 0.1767, 0.1992, 0.1760]) -Greedy action tensor([ 1.0014, 0.1829, -0.0529, -0.1487]) tensor([0.4748, 0.2094, 0.1654, 0.1503]) -Greedy action tensor([ 0.6132, -0.2602, -0.0134, -0.2329]) tensor([0.4200, 0.1753, 0.2244, 0.1802]) -Greedy action tensor([ 0.5147, -0.2592, -0.0017, -0.0599]) tensor([0.3816, 0.1760, 0.2277, 0.2148]) -Greedy action tensor([ 0.8512, -0.7178, -0.1392, -0.2935]) tensor([0.5269, 0.1097, 0.1957, 0.1677]) -Greedy action tensor([ 1.0024, -0.8952, -0.0032, -0.4539]) tensor([0.5718, 0.0857, 0.2092, 0.1333]) -Greedy action tensor([ 0.9777, -0.6530, -0.0511, -0.3169]) tensor([0.5473, 0.1072, 0.1956, 0.1500]) -Greedy action tensor([ 1.0218, -1.2402, 0.0823, -0.6670]) tensor([0.5954, 0.0620, 0.2327, 0.1100]) -Greedy action tensor([ 0.5380, 0.0273, -0.0607, 0.0241]) tensor([0.3639, 0.2184, 0.2000, 0.2177]) -Greedy action tensor([ 0.7053, -0.4480, 0.0039, -0.2934]) tensor([0.4588, 0.1448, 0.2275, 0.1690]) -Greedy action tensor([ 0.7951, -0.5209, -0.0924, -0.2937]) tensor([0.4959, 0.1330, 0.2042, 0.1669]) -Greedy action tensor([ 0.1930, -0.0706, -0.1019, -0.2082]) tensor([0.3142, 0.2414, 0.2340, 0.2104]) -Greedy action tensor([ 0.4413, -0.3612, -0.3443, -0.0098]) tensor([0.3936, 0.1764, 0.1794, 0.2507]) -Greedy action tensor([ 0.5388, -0.2746, 0.1290, -0.4082]) tensor([0.4008, 0.1777, 0.2660, 0.1555]) -Greedy action tensor([ 0.9763, -0.3742, -0.0505, -0.5561]) tensor([0.5455, 0.1413, 0.1954, 0.1178]) -Greedy action tensor([ 0.5074, 0.0966, 0.0033, -0.1534]) tensor([0.3592, 0.2382, 0.2170, 0.1855]) -Greedy action tensor([ 1.0652, -0.7309, -0.0609, -0.6817]) tensor([0.6008, 0.0997, 0.1948, 0.1047]) -Greedy action tensor([ 0.7442, -0.6441, 0.0410, -0.4793]) tensor([0.4905, 0.1224, 0.2428, 0.1443]) -Greedy action tensor([ 1.0849, -0.7748, 0.1084, -0.6501]) tensor([0.5852, 0.0911, 0.2204, 0.1032]) -Greedy action tensor([ 0.4011, 0.0652, 0.0677, -0.3839]) tensor([0.3463, 0.2475, 0.2482, 0.1580]) -Greedy action tensor([ 0.8325, -0.5031, 0.0426, -0.5182]) tensor([0.5061, 0.1331, 0.2297, 0.1311]) -Greedy action tensor([ 1.1582, -0.8908, 0.1999, -0.7382]) tensor([0.6015, 0.0775, 0.2307, 0.0903]) -Greedy action tensor([ 0.7181, -0.2388, -0.0498, -0.1128]) tensor([0.4379, 0.1682, 0.2032, 0.1908]) -Greedy action tensor([ 1.1428, -0.5993, -0.1922, -0.3187]) tensor([0.5987, 0.1049, 0.1576, 0.1388]) -Greedy action tensor([ 0.7577, -0.5390, -0.0976, -0.5042]) tensor([0.5046, 0.1380, 0.2145, 0.1429]) -Greedy action tensor([ 0.2412, -0.0211, -0.0542, -0.3371]) tensor([0.3253, 0.2502, 0.2421, 0.1824]) -Greedy action tensor([ 0.6954, -0.6810, 0.0187, -0.2908]) tensor([0.4687, 0.1183, 0.2382, 0.1748]) -Greedy action tensor([ 0.8358, -0.5103, -0.0362, -0.4003]) tensor([0.5079, 0.1322, 0.2124, 0.1476]) -Greedy action tensor([ 0.7150, -0.3381, -0.0081, -0.1098]) tensor([0.4401, 0.1535, 0.2135, 0.1929]) -Greedy action tensor([ 0.6729, -0.2402, 0.0655, -0.4188]) tensor([0.4383, 0.1759, 0.2388, 0.1471]) -Greedy action tensor([ 0.7740, -0.4587, -0.0582, -0.1072]) tensor([0.4671, 0.1362, 0.2032, 0.1935]) -Greedy action tensor([ 0.7545, -0.2016, 0.0219, -0.4764]) tensor([0.4636, 0.1782, 0.2228, 0.1354]) -Greedy action tensor([ 0.7273, -0.6230, 0.0355, -0.2673]) tensor([0.4696, 0.1217, 0.2351, 0.1737]) -Greedy action tensor([ 0.6289, -0.4248, 0.0854, -0.4633]) tensor([0.4415, 0.1539, 0.2564, 0.1481]) -Greedy action tensor([ 0.3473, -0.0393, -0.0684, -0.1157]) tensor([0.3369, 0.2289, 0.2223, 0.2120]) -Greedy action tensor([ 0.6419, -0.3812, 0.0249, -0.3288]) tensor([0.4390, 0.1578, 0.2369, 0.1663]) -Greedy action tensor([ 0.8632, -0.2384, -0.1102, -0.1113]) tensor([0.4790, 0.1592, 0.1810, 0.1808]) -Greedy action tensor([ 0.2776, 0.2073, -0.2471, 0.0695]) tensor([0.2998, 0.2794, 0.1774, 0.2434]) -Greedy action tensor([ 0.6611, -0.1692, 0.0190, -0.3856]) tensor([0.4323, 0.1884, 0.2275, 0.1518]) -Greedy action tensor([ 0.6277, 0.0544, 0.2330, -0.4384]) tensor([0.3873, 0.2183, 0.2610, 0.1334]) -Greedy action tensor([ 1.2083, -1.0643, 0.0938, -0.4783]) tensor([0.6187, 0.0638, 0.2030, 0.1146]) -Greedy action tensor([ 1.1125, -0.6449, -0.0884, -0.3166]) tensor([0.5838, 0.1007, 0.1757, 0.1398]) -Greedy action tensor([ 0.8912, -0.3434, -0.1413, -0.5118]) tensor([0.5283, 0.1537, 0.1881, 0.1299]) -Greedy action tensor([ 0.2834, -0.1711, -0.0647, -0.0060]) tensor([0.3237, 0.2055, 0.2285, 0.2423]) -Greedy action tensor([ 0.8953, -0.4890, 0.1589, -0.2229]) tensor([0.4863, 0.1218, 0.2329, 0.1590]) -Greedy action tensor([ 0.3170, 0.1593, -0.0175, -0.0535]) tensor([0.3067, 0.2620, 0.2195, 0.2118]) -Greedy action tensor([ 1.0295, 0.2541, 0.0117, -0.2558]) tensor([0.4765, 0.2194, 0.1722, 0.1318]) -Greedy action tensor([ 1.1583, -0.5112, -0.2049, -0.2889]) tensor([0.5954, 0.1121, 0.1523, 0.1401]) -Greedy action tensor([ 0.6396, -0.6278, -0.0185, -0.2230]) tensor([0.4501, 0.1268, 0.2331, 0.1900]) -Greedy action tensor([ 0.5157, -0.3720, -0.1239, -0.1859]) tensor([0.4107, 0.1690, 0.2166, 0.2036]) -Greedy action tensor([ 0.8463, -0.4630, 0.0770, -0.5217]) tensor([0.5030, 0.1358, 0.2331, 0.1281]) -Greedy action tensor([ 0.5357, -0.5494, -0.0596, -0.1549]) tensor([0.4183, 0.1413, 0.2307, 0.2097]) -Greedy action tensor([ 0.5200, -0.1801, 0.0209, -0.1428]) tensor([0.3818, 0.1896, 0.2318, 0.1968]) -Greedy action tensor([ 0.3336, -0.3950, 0.1309, -0.4550]) tensor([0.3632, 0.1753, 0.2965, 0.1651]) -Greedy action tensor([ 0.4701, -0.3752, -0.0771, -0.1712]) tensor([0.3945, 0.1694, 0.2283, 0.2078]) -Greedy action tensor([ 0.7759, -0.3002, -0.0368, -0.3653]) tensor([0.4753, 0.1620, 0.2109, 0.1518]) -Greedy action tensor([ 0.8016, -0.4656, 0.0436, -0.3587]) tensor([0.4846, 0.1365, 0.2271, 0.1519]) -Greedy action tensor([ 0.6529, -0.6031, -0.0952, -0.1368]) tensor([0.4521, 0.1288, 0.2140, 0.2052]) -Greedy action tensor([-1.7815, -0.4270, 0.5850, -0.1035]) tensor([0.0479, 0.1855, 0.5103, 0.2563]) -Greedy action tensor([-1.9257, -0.4510, 0.6593, -0.1745]) tensor([0.0410, 0.1791, 0.5437, 0.2362]) -Greedy action tensor([-1.8847, -0.3804, 0.6360, -0.1464]) tensor([0.0423, 0.1905, 0.5264, 0.2407]) -Greedy action tensor([-1.3828, -0.4182, 0.4264, 0.1466]) tensor([0.0697, 0.1829, 0.4256, 0.3217]) -Greedy action tensor([-1.9322, -0.4059, 0.6551, -0.1683]) tensor([0.0404, 0.1861, 0.5376, 0.2359]) -Greedy action tensor([-1.9136, -0.4743, 0.6662, -0.1637]) tensor([0.0414, 0.1745, 0.5460, 0.2381]) -Greedy action tensor([-1.9155, -0.3958, 0.6406, -0.1642]) tensor([0.0413, 0.1887, 0.5321, 0.2379]) -Greedy action tensor([-1.5638, -0.2619, 0.6003, 0.0065]) tensor([0.0550, 0.2021, 0.4786, 0.2643]) -Greedy action tensor([-1.6923, -0.4210, 0.5477, -0.0228]) tensor([0.0519, 0.1850, 0.4875, 0.2756]) -Greedy action tensor([-1.4701, -0.4465, 0.4925, 0.1530]) tensor([0.0626, 0.1743, 0.4457, 0.3174]) -Greedy action tensor([-1.9313, -0.4336, 0.6627, -0.1726]) tensor([0.0406, 0.1813, 0.5427, 0.2354]) -Greedy action tensor([-1.8515, -0.2045, 0.5838, -0.1169]) tensor([0.0430, 0.2230, 0.4906, 0.2434]) -Greedy action tensor([-1.8905, -0.4494, 0.6782, -0.1362]) tensor([0.0416, 0.1757, 0.5425, 0.2403]) -Greedy action tensor([-1.0919, -0.3150, 0.5674, 0.6487]) tensor([0.0708, 0.1539, 0.3719, 0.4034]) -Greedy action tensor([-0.8985, 0.2051, 0.1435, -0.0185]) tensor([0.1080, 0.3256, 0.3061, 0.2603]) -Greedy action tensor([-1.8920, -0.4526, 0.6464, -0.1535]) tensor([0.0424, 0.1790, 0.5372, 0.2414]) -Greedy action tensor([-1.4053, 0.0673, 0.4807, -0.6891]) tensor([0.0714, 0.3115, 0.4709, 0.1462]) -Greedy action tensor([-1.9338, -0.4335, 0.6558, -0.1781]) tensor([0.0407, 0.1823, 0.5418, 0.2353]) -Greedy action tensor([-1.7555, -0.3170, 0.6469, -0.0702]) tensor([0.0462, 0.1946, 0.5102, 0.2491]) -Greedy action tensor([-1.8870, -0.4063, 0.6368, -0.1482]) tensor([0.0424, 0.1866, 0.5295, 0.2415]) -Greedy action tensor([-1.9468, -0.4509, 0.6682, -0.1819]) tensor([0.0400, 0.1787, 0.5473, 0.2339]) -Greedy action tensor([-1.5772, -0.2805, 0.4908, -0.1046]) tensor([0.0591, 0.2161, 0.4672, 0.2576]) -Greedy action tensor([-1.9406, -0.4555, 0.6803, -0.1713]) tensor([0.0400, 0.1764, 0.5493, 0.2344]) -Greedy action tensor([-1.4185, -0.4841, 0.5067, 0.3527]) tensor([0.0614, 0.1564, 0.4212, 0.3611]) -Greedy action tensor([-1.7857, -0.3993, 0.6358, -0.1086]) tensor([0.0463, 0.1851, 0.5211, 0.2475]) -Greedy action tensor([-1.5237, -0.0110, 0.4074, -0.0703]) tensor([0.0598, 0.2716, 0.4127, 0.2559]) -Greedy action tensor([-1.8259, -0.3172, 0.6091, -0.0927]) tensor([0.0443, 0.2001, 0.5052, 0.2504]) -Greedy action tensor([-1.2317, 0.1295, -0.4646, -0.5191]) tensor([0.1100, 0.4290, 0.2368, 0.2243]) -Greedy action tensor([-1.5958, -0.4731, 0.4961, 0.0129]) tensor([0.0582, 0.1790, 0.4718, 0.2910]) -Greedy action tensor([-1.4482, -0.5702, 0.4034, 0.0951]) tensor([0.0692, 0.1664, 0.4406, 0.3237]) -Greedy action tensor([-1.6291, -0.3990, 0.6767, 0.2566]) tensor([0.0475, 0.1626, 0.4767, 0.3132]) -Greedy action tensor([-1.6478, -0.5117, 0.5103, 0.0468]) tensor([0.0549, 0.1710, 0.4752, 0.2989]) -Greedy action tensor([-0.7551, 0.1893, 0.1467, -0.0346]) tensor([0.1236, 0.3178, 0.3046, 0.2540]) -Greedy action tensor([-1.9300, -0.3963, 0.6538, -0.1680]) tensor([0.0405, 0.1876, 0.5362, 0.2357]) -Greedy action tensor([-1.9247, -0.4192, 0.6560, -0.1641]) tensor([0.0408, 0.1837, 0.5384, 0.2371]) -Greedy action tensor([-1.8833, -0.4563, 0.6401, -0.1514]) tensor([0.0429, 0.1789, 0.5355, 0.2427]) -Greedy action tensor([-1.6925, -0.3329, 0.6942, 0.0576]) tensor([0.0465, 0.1809, 0.5053, 0.2673]) -Greedy action tensor([-1.8129, -0.2442, 0.6089, -0.0981]) tensor([0.0442, 0.2122, 0.4980, 0.2456]) -Greedy action tensor([-1.7067, -0.5401, 0.6848, 0.0976]) tensor([0.0471, 0.1513, 0.5152, 0.2864]) -Greedy action tensor([-1.8433, -0.2142, 0.5725, -0.1019]) tensor([0.0435, 0.2217, 0.4868, 0.2480]) -Greedy action tensor([-1.1380, -0.2116, 0.2855, 0.2651]) tensor([0.0851, 0.2150, 0.3535, 0.3464]) -Greedy action tensor([-1.8895, -0.2080, 0.5996, -0.1326]) tensor([0.0413, 0.2219, 0.4976, 0.2393]) -Greedy action tensor([-0.6681, 0.3226, 0.1553, -0.3121]) tensor([0.1352, 0.3640, 0.3079, 0.1929]) -Greedy action tensor([-1.9407, -0.4429, 0.6640, -0.1784]) tensor([0.0403, 0.1801, 0.5449, 0.2347]) -Greedy action tensor([-1.7923, -0.3632, 0.5715, -0.1023]) tensor([0.0471, 0.1967, 0.5009, 0.2553]) -Greedy action tensor([-1.9231, -0.4261, 0.6618, -0.1595]) tensor([0.0407, 0.1819, 0.5399, 0.2375]) -Greedy action tensor([-0.8988, -0.2531, 0.3563, 0.9683]) tensor([0.0776, 0.1480, 0.2723, 0.5021]) -Greedy action tensor([-1.0430, -0.1128, 0.4910, 0.4412]) tensor([0.0795, 0.2015, 0.3685, 0.3506]) -Greedy action tensor([-1.8170, -0.3780, 0.6261, -0.1024]) tensor([0.0449, 0.1892, 0.5166, 0.2493]) -Greedy action tensor([-0.7878, 0.7797, -0.0599, -0.1042]) tensor([0.1016, 0.4869, 0.2103, 0.2012]) -Greedy action tensor([-1.1416, -0.3402, 1.0262, 1.1879]) tensor([0.0450, 0.1002, 0.3929, 0.4619]) -Greedy action tensor([-1.5143, -0.4030, 0.4353, -0.0213]) tensor([0.0645, 0.1958, 0.4528, 0.2869]) -Greedy action tensor([-1.9347, -0.4377, 0.6661, -0.1696]) tensor([0.0403, 0.1803, 0.5437, 0.2357]) -Greedy action tensor([-1.8898, -0.3281, 0.6231, -0.1697]) tensor([0.0422, 0.2012, 0.5209, 0.2357]) -Greedy action tensor([-1.6847, -0.3276, 0.5861, 0.0141]) tensor([0.0499, 0.1939, 0.4834, 0.2728]) -Greedy action tensor([-1.9117, -0.4093, 0.6549, -0.1409]) tensor([0.0410, 0.1842, 0.5339, 0.2409]) -Greedy action tensor([-1.2815, -0.3568, 0.3513, 0.1350]) tensor([0.0784, 0.1976, 0.4011, 0.3230]) -Greedy action tensor([-1.3594, -0.5194, 0.4121, 0.1580]) tensor([0.0727, 0.1684, 0.4274, 0.3315]) -Greedy action tensor([-1.7439, -0.1808, 0.5159, -0.0990]) tensor([0.0487, 0.2325, 0.4666, 0.2523]) -Greedy action tensor([-1.7018, -0.3335, 0.5295, -0.0916]) tensor([0.0520, 0.2041, 0.4839, 0.2600]) -Greedy action tensor([-1.8565, -0.4411, 0.6406, -0.0926]) tensor([0.0433, 0.1783, 0.5258, 0.2526]) -Greedy action tensor([-1.8822, -0.3522, 0.6432, -0.1423]) tensor([0.0420, 0.1940, 0.5248, 0.2393]) -Greedy action tensor([-0.7115, -0.5977, 0.1870, 0.1646]) tensor([0.1433, 0.1606, 0.3519, 0.3442]) -Greedy action tensor([-1.3927, 0.5353, 0.2878, -0.0139]) tensor([0.0581, 0.3994, 0.3118, 0.2306]) -Greedy action tensor([-1.8607, -0.4488, 0.6317, -0.1380]) tensor([0.0439, 0.1800, 0.5304, 0.2457]) -Greedy action tensor([-0.2303, -0.2588, 1.1785, 1.4670]) tensor([0.0868, 0.0844, 0.3551, 0.4738]) -Greedy action tensor([-1.7390, -0.2389, 0.5081, -0.0421]) tensor([0.0490, 0.2197, 0.4638, 0.2675]) -Greedy action tensor([-1.7973, 0.0408, 0.5434, -0.1831]) tensor([0.0441, 0.2769, 0.4577, 0.2213]) -Greedy action tensor([-1.8521, -0.2910, 0.6039, -0.1049]) tensor([0.0432, 0.2057, 0.5034, 0.2478]) -Greedy action tensor([-1.7396, -0.4561, 0.5708, -0.1409]) tensor([0.0509, 0.1838, 0.5133, 0.2519]) -Greedy action tensor([-1.5082, -0.2601, 0.6224, 0.0555]) tensor([0.0566, 0.1970, 0.4763, 0.2702]) -Greedy action tensor([-1.6684, -0.2412, 0.5924, -0.3380]) tensor([0.0539, 0.2248, 0.5173, 0.2040]) -Greedy action tensor([-1.9375, -0.4438, 0.6645, -0.1761]) tensor([0.0404, 0.1798, 0.5448, 0.2350]) -Greedy action tensor([-1.8553, -0.4124, 0.6858, -0.0475]) tensor([0.0416, 0.1762, 0.5284, 0.2538]) -Greedy action tensor([-1.9044, -0.4598, 0.6511, -0.1589]) tensor([0.0419, 0.1778, 0.5400, 0.2402]) -Greedy action tensor([-1.9133, -0.3167, 0.6041, -0.1617]) tensor([0.0415, 0.2049, 0.5144, 0.2392]) -Greedy action tensor([-1.8520, -0.4351, 0.6268, -0.1294]) tensor([0.0441, 0.1821, 0.5266, 0.2472]) -Greedy action tensor([-1.5599, -0.4151, 0.6957, 0.4616]) tensor([0.0471, 0.1480, 0.4494, 0.3556]) -Greedy action tensor([0.4497, 1.1346, 0.0112, 0.7064]) tensor([0.2032, 0.4031, 0.1311, 0.2627]) -Greedy action tensor([-1.5651, 0.0732, 0.4110, 0.0605]) tensor([0.0542, 0.2790, 0.3912, 0.2755]) -Greedy action tensor([-0.5783, 0.9411, 0.2386, 0.6157]) tensor([0.0898, 0.4104, 0.2033, 0.2964]) -Greedy action tensor([ 1.1428, 0.1416, -0.6541, -0.1931]) tensor([0.5567, 0.2046, 0.0923, 0.1464]) -Greedy action tensor([ 1.4725, -0.4839, -0.5785, 0.3821]) tensor([0.6226, 0.0880, 0.0801, 0.2093]) -Greedy action tensor([ 1.4151, -0.9051, -0.2208, 0.3194]) tensor([0.6145, 0.0604, 0.1197, 0.2054]) -Greedy action tensor([ 2.5052, -1.7173, 0.1203, 1.1936]) tensor([0.7267, 0.0107, 0.0669, 0.1958]) -Greedy action tensor([ 1.5710, -0.2990, -0.5917, 0.4190]) tensor([0.6309, 0.0972, 0.0726, 0.1993]) -Greedy action tensor([ 1.6158, -0.4062, -0.1366, 0.1846]) tensor([0.6473, 0.0857, 0.1122, 0.1547]) -Greedy action tensor([ 1.1361, -0.3495, -0.5818, 0.1829]) tensor([0.5583, 0.1264, 0.1002, 0.2152]) -Greedy action tensor([ 1.7501, -0.9727, -0.4108, 0.9657]) tensor([0.6108, 0.0401, 0.0704, 0.2787]) -Greedy action tensor([ 1.7040, -1.5897, -0.1099, -0.3261]) tensor([0.7511, 0.0279, 0.1224, 0.0986]) -Greedy action tensor([ 1.4496, -0.2227, -0.3829, 0.5928]) tensor([0.5642, 0.1060, 0.0903, 0.2395]) -Greedy action tensor([ 1.9852, -1.0575, -0.4198, 0.1959]) tensor([0.7663, 0.0366, 0.0692, 0.1280]) -Greedy action tensor([ 1.5469, 0.0081, -0.6479, 0.6794]) tensor([0.5727, 0.1229, 0.0638, 0.2405]) -Greedy action tensor([ 1.3140, -0.4317, -0.0413, 0.4246]) tensor([0.5425, 0.0947, 0.1399, 0.2229]) -Greedy action tensor([ 1.6724, -0.2518, -0.4585, 0.6001]) tensor([0.6223, 0.0909, 0.0739, 0.2130]) -Greedy action tensor([ 1.4020, -0.2375, -0.4217, 0.1546]) tensor([0.6087, 0.1181, 0.0983, 0.1749]) -Greedy action tensor([ 1.1918, 0.0412, -0.6110, 0.2516]) tensor([0.5342, 0.1691, 0.0881, 0.2086]) -Greedy action tensor([ 1.6354, -0.8626, -0.2593, 0.3998]) tensor([0.6565, 0.0540, 0.0987, 0.1908]) -Greedy action tensor([ 2.1242, -0.9769, -0.2482, 0.6759]) tensor([0.7282, 0.0328, 0.0679, 0.1711]) -Greedy action tensor([ 2.0871, -0.3053, -0.6858, 0.5011]) tensor([0.7360, 0.0673, 0.0460, 0.1507]) -Greedy action tensor([ 1.2822, -0.3826, -0.1713, 0.2102]) tensor([0.5665, 0.1072, 0.1324, 0.1939]) -Greedy action tensor([ 1.7959, -0.9154, -0.2398, 0.6148]) tensor([0.6649, 0.0442, 0.0868, 0.2041]) -Greedy action tensor([ 1.3293, -0.4113, -0.3283, 0.3104]) tensor([0.5790, 0.1016, 0.1104, 0.2090]) -Greedy action tensor([ 0.6085, -0.5049, -0.1801, 0.2152]) tensor([0.4069, 0.1336, 0.1849, 0.2746]) -Greedy action tensor([ 1.2572, -0.6065, -0.2688, 0.0829]) tensor([0.5947, 0.0922, 0.1293, 0.1838]) -Greedy action tensor([ 1.1573, -0.5884, -0.2064, -0.5032]) tensor([0.6172, 0.1077, 0.1578, 0.1173]) -Greedy action tensor([ 1.5450, -0.4438, -0.6867, 0.1463]) tensor([0.6706, 0.0918, 0.0720, 0.1656]) -Greedy action tensor([ 1.4327, -0.5716, -0.1968, 0.0872]) tensor([0.6284, 0.0847, 0.1232, 0.1637]) -Greedy action tensor([ 1.9356, -0.6926, -0.9390, 0.2862]) tensor([0.7571, 0.0547, 0.0427, 0.1455]) -Greedy action tensor([ 1.3076, -0.5345, -0.1472, 0.2396]) tensor([0.5761, 0.0913, 0.1345, 0.1980]) -Greedy action tensor([ 1.4644, -0.5104, -0.6754, 0.4876]) tensor([0.6124, 0.0850, 0.0721, 0.2306]) -Greedy action tensor([ 1.6745, -0.6115, -0.5003, -0.0416]) tensor([0.7168, 0.0729, 0.0815, 0.1289]) -Greedy action tensor([ 1.1550, 0.0683, -0.9541, 0.0848]) tensor([0.5551, 0.1872, 0.0674, 0.1904]) -Greedy action tensor([ 1.2872, -0.2287, -0.4270, -0.1423]) tensor([0.6101, 0.1340, 0.1099, 0.1461]) -Greedy action tensor([ 1.5141, -0.6406, -0.0443, 0.5089]) tensor([0.5909, 0.0685, 0.1244, 0.2162]) -Greedy action tensor([ 1.7048, -0.4914, -0.6109, 0.1293]) tensor([0.7058, 0.0785, 0.0697, 0.1460]) -Greedy action tensor([ 1.8432, -0.5017, -0.6913, 0.3575]) tensor([0.7135, 0.0684, 0.0566, 0.1615]) -Greedy action tensor([ 0.7745, -0.2220, -0.0297, -0.0637]) tensor([0.4446, 0.1641, 0.1989, 0.1923]) -Greedy action tensor([ 1.6910, 0.1912, -0.2736, 0.5058]) tensor([0.5991, 0.1337, 0.0840, 0.1831]) -Greedy action tensor([ 1.3336, -0.1366, -0.5019, 0.1923]) tensor([0.5852, 0.1345, 0.0934, 0.1869]) -Greedy action tensor([ 1.6305, -0.2242, -0.3877, 0.5017]) tensor([0.6200, 0.0970, 0.0824, 0.2005]) -Greedy action tensor([ 2.4367, -1.5747, 0.0797, 1.0179]) tensor([0.7381, 0.0134, 0.0699, 0.1786]) -Greedy action tensor([ 0.8412, -0.1786, -0.2052, 0.3353]) tensor([0.4320, 0.1558, 0.1517, 0.2605]) -Greedy action tensor([ 1.6094, -0.7191, -0.4663, 0.5436]) tensor([0.6380, 0.0622, 0.0800, 0.2198]) -Greedy action tensor([ 1.6313, 0.0516, -0.6012, 0.4146]) tensor([0.6213, 0.1280, 0.0666, 0.1840]) -Greedy action tensor([ 1.1037, -0.1518, 0.0099, -0.0117]) tensor([0.5134, 0.1463, 0.1720, 0.1683]) -Greedy action tensor([ 1.9244, 0.1625, -0.1478, 0.1501]) tensor([0.6816, 0.1170, 0.0858, 0.1156]) -Greedy action tensor([ 1.4320, -0.6089, -0.2932, 0.0886]) tensor([0.6373, 0.0828, 0.1135, 0.1663]) -Greedy action tensor([ 1.5553, -0.4272, -0.3558, 0.0784]) tensor([0.6605, 0.0910, 0.0977, 0.1508]) -Greedy action tensor([ 2.4171, -0.2022, -0.5190, 0.2878]) tensor([0.8033, 0.0585, 0.0426, 0.0955]) -Greedy action tensor([ 1.3365, -0.4237, -0.0884, 0.1440]) tensor([0.5828, 0.1002, 0.1402, 0.1768]) -Greedy action tensor([ 0.7871, -0.0900, 0.1127, 0.0831]) tensor([0.4132, 0.1719, 0.2105, 0.2044]) -Greedy action tensor([ 1.5155, -0.6553, -0.2878, 0.4431]) tensor([0.6169, 0.0704, 0.1016, 0.2111]) -Greedy action tensor([ 1.3540, -0.5968, -0.4193, 0.2897]) tensor([0.6036, 0.0858, 0.1025, 0.2082]) -Greedy action tensor([ 1.7896, -0.5427, -0.1347, 0.1843]) tensor([0.6926, 0.0672, 0.1011, 0.1391]) -Greedy action tensor([ 1.9699, -1.2985, 0.0525, 0.8281]) tensor([0.6648, 0.0253, 0.0977, 0.2122]) -Greedy action tensor([ 0.9424, -0.2001, -0.5428, 0.5520]) tensor([0.4500, 0.1436, 0.1019, 0.3046]) -Greedy action tensor([ 1.2430, -0.2079, -0.5310, 0.1280]) tensor([0.5774, 0.1353, 0.0980, 0.1893]) -Greedy action tensor([ 2.0171, -0.8313, -0.2659, 0.1806]) tensor([0.7580, 0.0439, 0.0773, 0.1208]) -Greedy action tensor([ 1.3478, -1.0553, 0.2438, 0.3732]) tensor([0.5558, 0.0503, 0.1843, 0.2097]) -Greedy action tensor([ 1.2505, -0.2352, -0.0893, -0.0240]) tensor([0.5657, 0.1280, 0.1482, 0.1581]) -Greedy action tensor([ 1.3828, -0.2828, -0.6452, 0.2546]) tensor([0.6082, 0.1150, 0.0800, 0.1968]) -Greedy action tensor([ 1.6807, -0.8846, -0.3036, 0.3305]) tensor([0.6786, 0.0522, 0.0933, 0.1759]) -Greedy action tensor([ 1.7104, -0.1741, -0.5146, 0.4954]) tensor([0.6424, 0.0976, 0.0694, 0.1906]) -Greedy action tensor([ 0.9506, -0.2116, -0.3159, 0.3341]) tensor([0.4685, 0.1465, 0.1320, 0.2529]) -Greedy action tensor([ 1.6661, -0.2848, -0.6224, 0.6480]) tensor([0.6231, 0.0886, 0.0632, 0.2251]) -Greedy action tensor([ 2.0603, -0.1165, -0.6331, 0.4981]) tensor([0.7190, 0.0815, 0.0486, 0.1508]) -Greedy action tensor([ 1.4807, -0.4676, -0.1113, 0.5063]) tensor([0.5802, 0.0827, 0.1181, 0.2190]) -Greedy action tensor([ 0.9750, -0.4350, -0.0919, 0.0024]) tensor([0.5086, 0.1242, 0.1750, 0.1923]) -Greedy action tensor([ 1.2931, -0.4859, -0.4401, 0.0987]) tensor([0.6067, 0.1024, 0.1072, 0.1837]) -Greedy action tensor([ 1.1315, -0.1449, -1.1967, 0.2695]) tensor([0.5559, 0.1551, 0.0542, 0.2348]) -Greedy action tensor([ 1.1507, -0.5084, -0.4550, 0.5772]) tensor([0.5116, 0.0974, 0.1027, 0.2883]) -Greedy action tensor([ 1.0551, 0.0524, -0.4217, 0.1553]) tensor([0.4995, 0.1833, 0.1141, 0.2031]) -Greedy action tensor([ 1.2779, -0.6082, -0.1907, 0.3844]) tensor([0.5583, 0.0847, 0.1285, 0.2285]) -Greedy action tensor([ 2.3755, -0.9611, -0.4376, 0.3654]) tensor([0.8133, 0.0289, 0.0488, 0.1090]) -Greedy action tensor([ 1.5735, -0.6974, -0.1479, 0.2435]) tensor([0.6466, 0.0667, 0.1156, 0.1710]) -Greedy action tensor([ 1.1411, -0.6194, -0.2409, 0.3450]) tensor([0.5336, 0.0918, 0.1340, 0.2407]) -Greedy action tensor([ 1.4744, 0.0570, -0.6081, 0.0477]) tensor([0.6223, 0.1508, 0.0775, 0.1494]) -Greedy action tensor([ 2.2330, -1.4246, -0.2795, 0.6040]) tensor([0.7675, 0.0198, 0.0622, 0.1505]) -Greedy action tensor([ 1.5739, -0.1170, -0.3814, 0.3425]) tensor([0.6182, 0.1140, 0.0875, 0.1804]) -Greedy action tensor([ 2.3714, -1.2677, -0.2367, 0.5444]) tensor([0.7931, 0.0208, 0.0584, 0.1276]) -Greedy action tensor([ 2.1635, -0.7080, -0.2614, 0.7594]) tensor([0.7191, 0.0407, 0.0636, 0.1766]) -Greedy action tensor([ 0.8329, -0.5501, -0.0322, -0.2388]) tensor([0.4965, 0.1245, 0.2090, 0.1700]) -Greedy action tensor([ 0.7989, -0.4273, -0.0362, -0.2182]) tensor([0.4787, 0.1405, 0.2077, 0.1731]) -Greedy action tensor([ 0.7341, -0.3122, -0.1720, -0.3931]) tensor([0.4809, 0.1689, 0.1943, 0.1558]) -Greedy action tensor([ 0.4789, 0.0956, -0.0029, 0.0104]) tensor([0.3419, 0.2330, 0.2112, 0.2140]) -Greedy action tensor([ 0.8317, -0.6651, -0.0113, -0.4092]) tensor([0.5146, 0.1152, 0.2215, 0.1488]) -Greedy action tensor([ 1.0955, -0.5403, -0.1645, -0.3848]) tensor([0.5862, 0.1142, 0.1663, 0.1334]) -Greedy action tensor([ 0.7636, -0.4796, -0.1061, -0.1865]) tensor([0.4775, 0.1377, 0.2001, 0.1846]) -Greedy action tensor([ 0.8960, -0.3605, 0.0234, -0.7141]) tensor([0.5257, 0.1496, 0.2196, 0.1051]) -Greedy action tensor([ 0.8882, -0.5953, 0.1125, -0.3616]) tensor([0.5067, 0.1149, 0.2332, 0.1452]) -Greedy action tensor([ 1.2454, -1.0043, 0.1288, -0.8142]) tensor([0.6409, 0.0676, 0.2098, 0.0817]) -Greedy action tensor([ 0.5535, -0.4365, 0.0748, -0.3477]) tensor([0.4171, 0.1550, 0.2585, 0.1694]) -Greedy action tensor([ 0.9005, -0.5344, -0.1533, -0.2521]) tensor([0.5256, 0.1252, 0.1832, 0.1660]) -Greedy action tensor([ 0.6209, -0.3464, -0.0314, -0.3745]) tensor([0.4404, 0.1674, 0.2294, 0.1628]) -Greedy action tensor([ 0.7933, -0.4208, -0.0699, -0.2990]) tensor([0.4868, 0.1446, 0.2053, 0.1633]) -Greedy action tensor([ 0.2228, -0.2464, -0.0710, -0.1664]) tensor([0.3280, 0.2052, 0.2445, 0.2223]) -Greedy action tensor([ 0.9440, -0.4140, 0.1937, -0.4234]) tensor([0.5040, 0.1296, 0.2380, 0.1284]) -Greedy action tensor([ 0.5926, -0.4654, -0.1552, -0.3955]) tensor([0.4560, 0.1583, 0.2159, 0.1698]) -Greedy action tensor([ 0.8060, -0.5517, -0.1177, -0.3254]) tensor([0.5059, 0.1301, 0.2009, 0.1632]) -Greedy action tensor([ 0.3333, -0.1273, -0.0651, -0.1378]) tensor([0.3417, 0.2156, 0.2294, 0.2133]) -Greedy action tensor([ 1.0203, -0.9447, -0.0464, -0.8029]) tensor([0.6076, 0.0852, 0.2091, 0.0981]) -Greedy action tensor([ 0.9202, -1.1596, -0.0308, -0.4945]) tensor([0.5700, 0.0712, 0.2202, 0.1385]) -Greedy action tensor([ 0.8172, -0.3093, 0.0741, -0.2815]) tensor([0.4688, 0.1520, 0.2230, 0.1562]) -Greedy action tensor([ 0.8943, -0.5021, -0.1737, -0.2075]) tensor([0.5199, 0.1287, 0.1787, 0.1727]) -Greedy action tensor([ 0.3655, -0.4350, -0.0412, -0.1075]) tensor([0.3652, 0.1640, 0.2432, 0.2276]) -Greedy action tensor([ 0.4440, -0.1467, 0.0266, -0.6979]) tensor([0.3950, 0.2188, 0.2602, 0.1261]) -Greedy action tensor([ 0.6865, -0.5395, -0.1453, -0.1313]) tensor([0.4608, 0.1352, 0.2006, 0.2034]) -Greedy action tensor([ 0.4719, -0.1546, -0.0284, -0.1242]) tensor([0.3715, 0.1985, 0.2253, 0.2047]) -Greedy action tensor([ 0.7601, -0.3866, -0.0786, -0.1559]) tensor([0.4651, 0.1478, 0.2010, 0.1861]) -Greedy action tensor([ 0.4889, 0.0325, -0.0546, 0.0826]) tensor([0.3472, 0.2199, 0.2016, 0.2313]) -Greedy action tensor([ 0.7768, -0.2507, 0.2107, -0.5986]) tensor([0.4591, 0.1643, 0.2606, 0.1160]) -Greedy action tensor([ 1.0187, -0.7729, 0.1386, -0.5304]) tensor([0.5574, 0.0929, 0.2312, 0.1184]) -Greedy action tensor([ 0.5024, -0.1521, 0.0879, -0.2073]) tensor([0.3742, 0.1945, 0.2473, 0.1840]) -Greedy action tensor([ 0.9702, -0.6513, -0.1170, -0.3365]) tensor([0.5539, 0.1094, 0.1867, 0.1499]) -Greedy action tensor([ 0.5125, -0.2477, -0.1326, 0.0042]) tensor([0.3855, 0.1803, 0.2023, 0.2319]) -Greedy action tensor([ 0.3692, 0.0438, -0.0999, -0.3789]) tensor([0.3545, 0.2560, 0.2217, 0.1678]) -Greedy action tensor([ 0.7649, -0.5686, -0.1525, -0.4759]) tensor([0.5122, 0.1350, 0.2047, 0.1481]) -Greedy action tensor([ 0.8602, -0.5061, -0.2577, -0.3660]) tensor([0.5332, 0.1360, 0.1743, 0.1564]) -Greedy action tensor([ 0.8127, -0.1796, 0.0551, -0.0450]) tensor([0.4418, 0.1638, 0.2071, 0.1874]) -Greedy action tensor([ 0.7433, -0.2968, -0.2114, -0.1991]) tensor([0.4699, 0.1661, 0.1809, 0.1831]) -Greedy action tensor([ 0.8463, -0.5897, -0.1710, -0.4603]) tensor([0.5347, 0.1272, 0.1933, 0.1448]) -Greedy action tensor([ 0.2482, 0.1492, -0.2149, 0.0699]) tensor([0.2966, 0.2686, 0.1866, 0.2481]) -Greedy action tensor([ 1.0910, -0.8716, 0.1080, -0.6568]) tensor([0.5921, 0.0832, 0.2216, 0.1031]) -Greedy action tensor([ 0.7852, -0.6612, -0.0597, -0.2906]) tensor([0.4985, 0.1174, 0.2142, 0.1700]) -Greedy action tensor([ 1.0775, -0.7356, 0.0061, -0.4564]) tensor([0.5809, 0.0948, 0.1990, 0.1253]) -Greedy action tensor([ 1.0574, -0.6920, 0.2146, -0.3889]) tensor([0.5435, 0.0945, 0.2340, 0.1280]) -Greedy action tensor([ 0.8498, -0.8910, -0.1136, -0.3264]) tensor([0.5361, 0.0940, 0.2046, 0.1654]) -Greedy action tensor([ 0.8539, -0.4240, -0.1993, -0.2511]) tensor([0.5105, 0.1423, 0.1781, 0.1691]) -Greedy action tensor([ 0.6944, -0.4857, 0.0038, -0.2979]) tensor([0.4589, 0.1410, 0.2300, 0.1701]) -Greedy action tensor([-0.0098, 0.3394, -0.2834, -0.3935]) tensor([0.2591, 0.3673, 0.1971, 0.1765]) -Greedy action tensor([ 0.9903, -0.5728, -0.2459, -0.5281]) tensor([0.5817, 0.1219, 0.1690, 0.1274]) -Greedy action tensor([ 1.0123, -0.7167, 0.1539, -0.4994]) tensor([0.5489, 0.0974, 0.2326, 0.1210]) -Greedy action tensor([ 0.6646, -0.3979, 0.1008, -0.5428]) tensor([0.4518, 0.1561, 0.2571, 0.1351]) -Greedy action tensor([ 0.4311, 0.0113, -0.0743, -0.1005]) tensor([0.3511, 0.2307, 0.2118, 0.2063]) -Greedy action tensor([ 0.6471, -0.1090, 0.1043, -0.1233]) tensor([0.3979, 0.1868, 0.2312, 0.1841]) -Greedy action tensor([ 0.7525, -0.2374, -0.0817, -0.2493]) tensor([0.4602, 0.1710, 0.1998, 0.1690]) -Greedy action tensor([ 0.7901, -0.3269, -0.0392, -0.1256]) tensor([0.4621, 0.1512, 0.2017, 0.1850]) -Greedy action tensor([ 0.3263, -0.0784, -0.1225, 0.0174]) tensor([0.3290, 0.2195, 0.2100, 0.2416]) -Greedy action tensor([ 0.5348, -0.4785, -0.0925, -0.0927]) tensor([0.4114, 0.1493, 0.2197, 0.2196]) -Greedy action tensor([ 1.0129, -0.9117, 0.1213, -0.4349]) tensor([0.5584, 0.0815, 0.2289, 0.1313]) -Greedy action tensor([ 0.5537, -0.3719, -0.0368, -0.0513]) tensor([0.4006, 0.1587, 0.2219, 0.2187]) -Greedy action tensor([ 1.2379, -0.6484, 0.2118, -0.4393]) tensor([0.5893, 0.0894, 0.2112, 0.1101]) -Greedy action tensor([ 0.9805, -0.3170, 0.0965, -0.3108]) tensor([0.5099, 0.1393, 0.2107, 0.1402]) -Greedy action tensor([ 0.6883, -0.3730, -0.0560, -0.2879]) tensor([0.4550, 0.1574, 0.2162, 0.1714]) -Greedy action tensor([ 0.6630, -0.4278, -0.2332, -0.2676]) tensor([0.4676, 0.1571, 0.1909, 0.1844]) -Greedy action tensor([ 0.2779, 0.2608, -0.0694, -0.4688]) tensor([0.3161, 0.3107, 0.2234, 0.1498]) -Greedy action tensor([ 1.1063, -0.9651, 0.2463, -0.7637]) tensor([0.5871, 0.0740, 0.2484, 0.0905]) -Greedy action tensor([ 0.6135, -0.2511, -0.1045, -0.1074]) tensor([0.4175, 0.1759, 0.2036, 0.2030]) -Greedy action tensor([ 0.4358, -0.1782, -0.1230, 0.0005]) tensor([0.3623, 0.1961, 0.2072, 0.2344]) -Greedy action tensor([ 0.7475, -0.3607, 0.0043, -0.2513]) tensor([0.4600, 0.1519, 0.2188, 0.1694]) -Greedy action tensor([ 0.4809, -0.1083, 0.0006, -0.2679]) tensor([0.3779, 0.2096, 0.2338, 0.1787]) -Greedy action tensor([ 0.7708, -0.4115, 0.0405, -0.3776]) tensor([0.4749, 0.1456, 0.2288, 0.1506]) -Greedy action tensor([ 4.8306e-01, -8.8954e-02, -1.2815e-05, -1.3080e-02]) tensor([0.3584, 0.2023, 0.2211, 0.2182]) -Greedy action tensor([ 0.8332, -0.5264, 0.0030, -0.5940]) tensor([0.5174, 0.1329, 0.2256, 0.1242]) -Greedy action tensor([ 0.6732, 0.0307, 0.1096, -0.0420]) tensor([0.3870, 0.2035, 0.2202, 0.1893]) -Greedy action tensor([ 0.2902, -0.1698, -0.0120, -0.4313]) tensor([0.3501, 0.2210, 0.2588, 0.1701]) -Greedy action tensor([ 0.7890, -0.4577, -0.0029, -0.4525]) tensor([0.4927, 0.1417, 0.2232, 0.1424]) -Greedy action tensor([ 1.1697, -0.2437, 0.2880, -0.3232]) tensor([0.5313, 0.1293, 0.2200, 0.1194]) -Greedy action tensor([ 0.8471, -0.5423, -0.4246, -0.3725]) tensor([0.5480, 0.1366, 0.1536, 0.1618]) -Greedy action tensor([ 0.3071, -0.0071, -0.0185, -0.1470]) tensor([0.3239, 0.2366, 0.2339, 0.2057]) -Greedy action tensor([ 0.0695, 0.6128, -0.3744, -0.3956]) tensor([0.2505, 0.4314, 0.1607, 0.1574]) -Greedy action tensor([ 0.5993, -0.2204, -0.3132, -0.3329]) tensor([0.4473, 0.1970, 0.1796, 0.1761]) -Greedy action tensor([-1.3690, -1.8394, 1.4032, -1.2581]) tensor([0.0534, 0.0333, 0.8536, 0.0596]) -Greedy action tensor([-0.4980, -2.6583, 0.0066, 0.2275]) tensor([0.2067, 0.0238, 0.3424, 0.4270]) -Greedy action tensor([-1.0496, -2.3994, 0.6093, -0.4798]) tensor([0.1208, 0.0313, 0.6344, 0.2135]) -Greedy action tensor([-0.0952, -1.6593, -0.1201, -0.1637]) tensor([0.3207, 0.0671, 0.3128, 0.2994]) -Greedy action tensor([ 0.0700, -0.5102, 0.3108, -0.5693]) tensor([0.2977, 0.1666, 0.3787, 0.1571]) -Greedy action tensor([0.8808, 0.2787, 0.7539, 1.0432]) tensor([0.2774, 0.1519, 0.2443, 0.3263]) -Greedy action tensor([0.8887, 0.3489, 1.5533, 0.6252]) tensor([0.2328, 0.1357, 0.4526, 0.1789]) -Greedy action tensor([ 1.0542, -1.6723, 0.3089, 1.3181]) tensor([0.3518, 0.0230, 0.1670, 0.4581]) -Greedy action tensor([-0.9019, -0.3453, -0.8289, 0.2699]) tensor([0.1419, 0.2475, 0.1526, 0.4580]) -Greedy action tensor([ 0.3408, -0.4810, 0.3049, -0.1546]) tensor([0.3318, 0.1459, 0.3201, 0.2022]) -Greedy action tensor([-1.8639, 0.4515, 0.3584, -1.1389]) tensor([0.0446, 0.4517, 0.4116, 0.0921]) -Greedy action tensor([-0.4404, -0.7794, 0.4547, 0.3103]) tensor([0.1593, 0.1135, 0.3898, 0.3374]) -Greedy action tensor([-0.8599, -0.6665, -0.2572, -0.3282]) tensor([0.1741, 0.2113, 0.3182, 0.2964]) -Greedy action tensor([-0.1006, -0.9416, -0.3338, -0.7909]) tensor([0.3670, 0.1583, 0.2907, 0.1840]) -Greedy action tensor([ 0.2615, -0.1254, 0.0770, -1.3839]) tensor([0.3699, 0.2512, 0.3076, 0.0714]) -Greedy action tensor([0.9481, 0.2952, 0.9874, 0.4083]) tensor([0.3181, 0.1656, 0.3309, 0.1854]) -Greedy action tensor([-0.5022, 0.1042, 0.7471, -1.3929]) tensor([0.1485, 0.2724, 0.5181, 0.0610]) -Greedy action tensor([-0.6695, -0.7708, 0.8879, -0.7013]) tensor([0.1312, 0.1186, 0.6230, 0.1271]) -Greedy action tensor([-0.9860, -0.1408, -0.3573, -0.6481]) tensor([0.1514, 0.3525, 0.2839, 0.2122]) -Greedy action tensor([-0.8505, -2.3447, -0.1997, 0.1586]) tensor([0.1699, 0.0381, 0.3258, 0.4661]) -Greedy action tensor([ 0.0268, -0.8277, 0.1814, -0.4369]) tensor([0.3104, 0.1321, 0.3623, 0.1952]) -Greedy action tensor([-0.7103, -1.3212, -0.6744, -0.2880]) tensor([0.2436, 0.1322, 0.2525, 0.3716]) -Greedy action tensor([ 0.4153, -0.9618, -0.2140, 0.1277]) tensor([0.3944, 0.0995, 0.2102, 0.2958]) -Greedy action tensor([-1.1732, -0.3125, -1.0286, 0.4570]) tensor([0.1039, 0.2457, 0.1201, 0.5304]) -Greedy action tensor([-0.9483, -1.7612, -0.1801, 0.4264]) tensor([0.1324, 0.0587, 0.2854, 0.5235]) -Greedy action tensor([-0.0791, -0.5199, -0.6415, -0.4690]) tensor([0.3460, 0.2226, 0.1972, 0.2343]) -Greedy action tensor([ 0.3501, -0.4368, 0.6138, -0.3834]) tensor([0.3089, 0.1406, 0.4021, 0.1484]) -Greedy action tensor([-1.2634, -0.0140, -0.7068, -0.6274]) tensor([0.1231, 0.4295, 0.2148, 0.2326]) -Greedy action tensor([ 0.9230, -0.6278, 0.4720, -0.4781]) tensor([0.4772, 0.1012, 0.3040, 0.1176]) -Greedy action tensor([ 0.8803, -0.2960, -0.4111, 0.4260]) tensor([0.4508, 0.1390, 0.1239, 0.2862]) -Greedy action tensor([ 0.2889, -0.8994, 0.5842, -0.6210]) tensor([0.3278, 0.0999, 0.4404, 0.1320]) -Greedy action tensor([-0.9469, -0.4447, 0.2171, -0.5652]) tensor([0.1366, 0.2257, 0.4375, 0.2001]) -Greedy action tensor([ 0.5405, -0.0442, 0.5578, 0.1108]) tensor([0.3100, 0.1728, 0.3154, 0.2017]) -Greedy action tensor([-1.1964, -0.6408, 1.0473, -1.4062]) tensor([0.0770, 0.1343, 0.7263, 0.0625]) -Greedy action tensor([-0.5995, -0.4453, 0.9989, -0.6509]) tensor([0.1240, 0.1447, 0.6134, 0.1178]) -Greedy action tensor([-0.1069, -0.4005, -0.5822, -0.4393]) tensor([0.3242, 0.2417, 0.2016, 0.2325]) -Greedy action tensor([ 0.8126, -1.5986, -0.2777, 0.6327]) tensor([0.4422, 0.0397, 0.1486, 0.3694]) -Greedy action tensor([-0.0024, 0.3188, 0.5545, -1.1837]) tensor([0.2257, 0.3112, 0.3939, 0.0693]) -Greedy action tensor([-0.4309, 0.3741, -0.9716, -0.5403]) tensor([0.2121, 0.4743, 0.1235, 0.1901]) -Greedy action tensor([-0.0405, -0.9322, 0.4849, -0.5259]) tensor([0.2691, 0.1103, 0.4550, 0.1656]) -Greedy action tensor([-0.4793, 0.9717, -0.0811, -0.3814]) tensor([0.1272, 0.5430, 0.1895, 0.1403]) -Greedy action tensor([-0.0375, -1.4717, 0.7856, -0.4026]) tensor([0.2375, 0.0566, 0.5410, 0.1649]) -Greedy action tensor([ 0.4070, -0.0303, 1.2640, 0.3449]) tensor([0.2024, 0.1307, 0.4768, 0.1902]) -Greedy action tensor([2.1446, 0.4047, 0.4752, 0.4332]) tensor([0.6475, 0.1136, 0.1220, 0.1169]) -Greedy action tensor([-0.9568, 0.0180, 1.6947, -0.4649]) tensor([0.0514, 0.1362, 0.7284, 0.0840]) -Greedy action tensor([ 0.5872, -0.0580, -0.2246, 0.0293]) tensor([0.3935, 0.2064, 0.1748, 0.2253]) -Greedy action tensor([ 0.0432, -0.8839, -0.5786, 0.1209]) tensor([0.3318, 0.1313, 0.1782, 0.3587]) -Greedy action tensor([ 1.0980, -0.4597, 1.1818, -0.4187]) tensor([0.3972, 0.0837, 0.4319, 0.0872]) -Greedy action tensor([ 0.9872, -0.9834, 1.1548, -0.0269]) tensor([0.3725, 0.0519, 0.4405, 0.1351]) -Greedy action tensor([-1.0777, -1.3672, 0.7935, -1.9252]) tensor([0.1153, 0.0863, 0.7490, 0.0494]) -Greedy action tensor([ 0.3306, -0.8259, 0.6114, -0.0149]) tensor([0.2988, 0.0940, 0.3957, 0.2115]) -Greedy action tensor([-0.6284, -0.5223, 0.3669, -1.1097]) tensor([0.1840, 0.2046, 0.4978, 0.1137]) -Greedy action tensor([ 0.1103, 0.2468, 0.0657, -0.4853]) tensor([0.2737, 0.3137, 0.2617, 0.1509]) -Greedy action tensor([ 1.4610, -1.6678, 2.0809, 1.0527]) tensor([0.2803, 0.0123, 0.5210, 0.1864]) -Greedy action tensor([-0.7826, 0.1560, 0.3855, -0.7143]) tensor([0.1275, 0.3259, 0.4100, 0.1365]) -Greedy action tensor([-0.5927, -2.0404, 0.0193, 0.2415]) tensor([0.1858, 0.0437, 0.3426, 0.4279]) -Greedy action tensor([ 0.3862, -1.6580, -0.2726, 0.2865]) tensor([0.3918, 0.0507, 0.2028, 0.3547]) -Greedy action tensor([ 0.8418, 0.0342, -0.0016, 0.5009]) tensor([0.3865, 0.1724, 0.1663, 0.2749]) -Greedy action tensor([-0.1468, -1.5556, 0.9783, -0.2888]) tensor([0.1926, 0.0471, 0.5933, 0.1671]) -Greedy action tensor([ 0.6828, -1.2265, -0.1758, 1.0264]) tensor([0.3354, 0.0497, 0.1421, 0.4728]) -Greedy action tensor([ 0.1134, -1.4595, -0.5634, 0.2716]) tensor([0.3464, 0.0719, 0.1760, 0.4057]) -Greedy action tensor([ 0.0478, -0.2142, 0.1589, -0.7567]) tensor([0.2999, 0.2308, 0.3352, 0.1341]) -Greedy action tensor([ 0.7278, -0.9620, 1.0973, 2.5684]) tensor([0.1120, 0.0207, 0.1620, 0.7054]) -Greedy action tensor([-0.1977, 0.4122, 0.1624, -0.8185]) tensor([0.2078, 0.3825, 0.2979, 0.1117]) -Greedy action tensor([ 0.4452, -0.6748, 0.5490, -0.5021]) tensor([0.3542, 0.1156, 0.3929, 0.1373]) -Greedy action tensor([ 0.8365, 0.2146, -0.5592, -0.0222]) tensor([0.4528, 0.2431, 0.1121, 0.1919]) -Greedy action tensor([-0.8540, -1.0101, 1.3134, -0.7207]) tensor([0.0852, 0.0729, 0.7445, 0.0974]) -Greedy action tensor([ 0.7825, -0.5398, 0.8125, 0.3023]) tensor([0.3430, 0.0914, 0.3534, 0.2122]) -Greedy action tensor([-1.0264, -0.5253, 0.3068, -0.6758]) tensor([0.1272, 0.2099, 0.4824, 0.1806]) -Greedy action tensor([ 1.7687, -0.9377, 0.4821, 0.7350]) tensor([0.5887, 0.0393, 0.1626, 0.2094]) -Greedy action tensor([ 0.7079, -0.3931, 0.4364, 0.0512]) tensor([0.3827, 0.1272, 0.2917, 0.1984]) -Greedy action tensor([ 0.4160, 0.3853, -0.1803, 0.2175]) tensor([0.2993, 0.2903, 0.1649, 0.2454]) -Greedy action tensor([-0.5436, 0.1856, -0.4262, -0.1378]) tensor([0.1755, 0.3639, 0.1973, 0.2633]) -Greedy action tensor([ 0.2355, 0.2224, -0.9015, 0.4254]) tensor([0.2843, 0.2806, 0.0912, 0.3438]) -Greedy action tensor([0.1431, 0.2873, 0.7750, 0.9256]) tensor([0.1607, 0.1856, 0.3023, 0.3514]) -Greedy action tensor([ 0.6791, -0.6170, 0.2285, -0.3700]) tensor([0.4423, 0.1210, 0.2818, 0.1549]) -Greedy action tensor([ 0.2114, -1.8377, 1.1780, 0.8614]) tensor([0.1763, 0.0227, 0.4634, 0.3376]) -Greedy action tensor([-2.1057, -0.7244, 0.2412, -0.7078]) tensor([0.0513, 0.2043, 0.5366, 0.2077]) -Greedy action tensor([-0.2514, -0.0336, 0.1631, 0.0784]) tensor([0.1943, 0.2415, 0.2940, 0.2702]) -Greedy action tensor([ 0.4325, -0.7289, 0.1015, 1.6887]) tensor([0.1804, 0.0565, 0.1296, 0.6336]) -Greedy action tensor([ 0.1278, -0.0262, -0.0725, -0.6934]) tensor([0.3210, 0.2752, 0.2627, 0.1412]) -Greedy action tensor([ 0.0651, -1.0246, -0.9142, 0.3904]) tensor([0.3230, 0.1086, 0.1213, 0.4471]) -Greedy action tensor([ 1.9116, -0.3077, -0.4177, 0.5617]) tensor([0.6825, 0.0742, 0.0664, 0.1769]) -Greedy action tensor([ 1.8262, -1.0603, -0.2378, 0.5404]) tensor([0.6853, 0.0382, 0.0870, 0.1894]) -Greedy action tensor([ 1.1725, 0.0780, -0.8828, 0.3332]) tensor([0.5278, 0.1766, 0.0676, 0.2280]) -Greedy action tensor([ 1.7835, -0.0738, -0.7374, 0.2055]) tensor([0.6931, 0.1082, 0.0557, 0.1430]) -Greedy action tensor([ 1.2173, -0.3157, -0.1080, 0.1307]) tensor([0.5498, 0.1187, 0.1461, 0.1855]) -Greedy action tensor([ 1.2317, -0.2151, -0.8495, 0.2427]) tensor([0.5773, 0.1359, 0.0720, 0.2148]) -Greedy action tensor([ 1.7725, -0.5025, -0.3689, 0.6385]) tensor([0.6485, 0.0667, 0.0762, 0.2086]) -Greedy action tensor([ 1.5587, 0.0681, -0.5579, 0.2331]) tensor([0.6206, 0.1398, 0.0747, 0.1649]) -Greedy action tensor([ 1.1538, -0.2640, -0.4832, 0.1847]) tensor([0.5506, 0.1334, 0.1071, 0.2089]) -Greedy action tensor([ 2.2862, -0.7138, -0.4430, 0.6852]) tensor([0.7594, 0.0378, 0.0496, 0.1532]) -Greedy action tensor([ 2.1114, -1.2733, 0.0343, 0.8934]) tensor([0.6873, 0.0233, 0.0861, 0.2033]) -Greedy action tensor([ 1.3335, -0.5070, -0.3272, 0.0603]) tensor([0.6140, 0.0975, 0.1167, 0.1719]) -Greedy action tensor([ 1.1664, -0.1616, -0.4964, -0.1956]) tensor([0.5845, 0.1549, 0.1108, 0.1497]) -Greedy action tensor([ 1.4378, -0.2283, -0.7769, 0.1363]) tensor([0.6368, 0.1204, 0.0695, 0.1733]) -Greedy action tensor([ 1.4193, -0.6161, -0.4211, 0.1318]) tensor([0.6388, 0.0835, 0.1014, 0.1763]) -Greedy action tensor([ 1.2501, -0.1740, -0.7915, 0.3349]) tensor([0.5647, 0.1359, 0.0733, 0.2261]) -Greedy action tensor([ 2.3097, -1.0366, -0.1688, 0.0830]) tensor([0.8150, 0.0287, 0.0683, 0.0879]) -Greedy action tensor([ 0.2050, -0.3882, -0.1113, 0.1379]) tensor([0.3109, 0.1718, 0.2266, 0.2907]) -Greedy action tensor([ 2.0005, -0.4472, -0.3699, 0.2106]) tensor([0.7424, 0.0642, 0.0694, 0.1240]) -Greedy action tensor([ 2.5182, -0.3549, 0.1523, 0.3781]) tensor([0.7886, 0.0446, 0.0740, 0.0928]) -Greedy action tensor([ 1.1876, -0.0313, -0.3465, 0.7182]) tensor([0.4680, 0.1383, 0.1009, 0.2927]) -Greedy action tensor([ 1.5945, -0.9339, -0.4481, 0.6408]) tensor([0.6270, 0.0500, 0.0813, 0.2416]) -Greedy action tensor([ 1.6766, -0.7602, -0.2939, 0.2977]) tensor([0.6763, 0.0591, 0.0943, 0.1703]) -Greedy action tensor([ 2.3682, -1.3925, -0.1095, 0.8783]) tensor([0.7504, 0.0175, 0.0630, 0.1691]) -Greedy action tensor([ 1.6947, -0.7725, -0.1710, 0.4222]) tensor([0.6580, 0.0558, 0.1019, 0.1843]) -Greedy action tensor([ 1.9481, -0.2333, -0.7168, 0.8871]) tensor([0.6542, 0.0738, 0.0455, 0.2264]) -Greedy action tensor([ 1.9945, -1.3560, -0.2146, 0.1471]) tensor([0.7677, 0.0269, 0.0843, 0.1210]) -Greedy action tensor([ 0.7725, -0.4165, -0.0128, -0.0174]) tensor([0.4516, 0.1375, 0.2059, 0.2050]) -Greedy action tensor([ 1.5163, 0.2759, -0.2605, -0.1038]) tensor([0.6037, 0.1746, 0.1021, 0.1195]) -Greedy action tensor([ 1.2026, -0.3485, -0.4192, -0.0986]) tensor([0.5946, 0.1261, 0.1175, 0.1618]) -Greedy action tensor([ 1.7933, -0.7001, -0.3950, 0.5055]) tensor([0.6800, 0.0562, 0.0762, 0.1876]) -Greedy action tensor([ 1.4967, -0.5793, -0.5466, 0.2871]) tensor([0.6438, 0.0808, 0.0834, 0.1920]) -Greedy action tensor([ 1.3501, -0.2799, -0.7812, 0.3214]) tensor([0.5981, 0.1172, 0.0710, 0.2138]) -Greedy action tensor([ 1.3890, -0.3043, -0.5607, 0.3209]) tensor([0.5988, 0.1101, 0.0852, 0.2058]) -Greedy action tensor([ 1.3769, -0.3465, -0.5611, 0.5115]) tensor([0.5736, 0.1024, 0.0826, 0.2414]) -Greedy action tensor([ 1.2228, -0.7263, 0.0983, 0.0679]) tensor([0.5611, 0.0799, 0.1822, 0.1768]) -Greedy action tensor([ 1.8449, -0.4826, -0.8884, 0.1756]) tensor([0.7402, 0.0722, 0.0481, 0.1394]) -Greedy action tensor([ 1.1985, -0.2979, -0.2199, 0.0627]) tensor([0.5595, 0.1253, 0.1355, 0.1797]) -Greedy action tensor([ 1.7423, -0.5870, 0.0761, -0.0776]) tensor([0.6904, 0.0672, 0.1305, 0.1119]) -Greedy action tensor([ 1.4809, -0.4668, -0.2381, 0.1816]) tensor([0.6271, 0.0894, 0.1124, 0.1710]) -Greedy action tensor([ 0.7366, -0.0441, -0.4916, 0.2383]) tensor([0.4240, 0.1942, 0.1242, 0.2576]) -Greedy action tensor([ 1.0256, -0.3420, -0.4662, 0.0749]) tensor([0.5359, 0.1365, 0.1206, 0.2071]) -Greedy action tensor([ 1.0362, -0.2251, -0.3215, 0.1911]) tensor([0.5076, 0.1438, 0.1306, 0.2180]) -Greedy action tensor([ 1.2218, -0.5908, -0.4670, 0.0944]) tensor([0.5981, 0.0976, 0.1105, 0.1937]) -Greedy action tensor([ 1.5329, -0.6080, -0.2618, 0.1768]) tensor([0.6488, 0.0763, 0.1078, 0.1672]) -Greedy action tensor([ 1.5993, -0.2758, -0.4220, 0.2836]) tensor([0.6435, 0.0987, 0.0852, 0.1726]) -Greedy action tensor([ 1.2237, -0.3538, -0.6145, 0.2151]) tensor([0.5779, 0.1193, 0.0920, 0.2108]) -Greedy action tensor([ 1.9668, 0.2246, -0.4298, 0.7346]) tensor([0.6419, 0.1124, 0.0584, 0.1872]) -Greedy action tensor([ 1.9233, -0.2111, -0.7979, 0.3883]) tensor([0.7145, 0.0845, 0.0470, 0.1540]) -Greedy action tensor([ 1.6248, 0.3049, -0.3459, 0.4684]) tensor([0.5810, 0.1552, 0.0810, 0.1828]) -Greedy action tensor([ 1.1655, 0.2626, -0.0027, 0.2051]) tensor([0.4764, 0.1931, 0.1481, 0.1823]) -Greedy action tensor([ 1.9846, -0.7301, -0.6155, 0.7003]) tensor([0.7056, 0.0467, 0.0524, 0.1953]) -Greedy action tensor([ 1.6986, -0.6172, -0.3764, 0.4913]) tensor([0.6565, 0.0648, 0.0824, 0.1963]) -Greedy action tensor([ 0.8716, -0.4075, -0.0903, -0.0675]) tensor([0.4875, 0.1357, 0.1863, 0.1906]) -Greedy action tensor([ 1.7018, -0.9199, -0.4403, 0.7243]) tensor([0.6384, 0.0464, 0.0750, 0.2402]) -Greedy action tensor([ 0.7905, -0.3184, -0.2629, 0.2689]) tensor([0.4401, 0.1452, 0.1535, 0.2612]) -Greedy action tensor([ 2.0856, -0.5368, -0.4761, 0.5174]) tensor([0.7363, 0.0535, 0.0568, 0.1535]) -Greedy action tensor([ 1.9714, 0.6472, -0.0378, 0.0205]) tensor([0.6484, 0.1725, 0.0869, 0.0922]) -Greedy action tensor([ 1.3308, -0.3545, -0.3262, 0.1116]) tensor([0.5982, 0.1109, 0.1141, 0.1768]) -Greedy action tensor([ 1.3905, -0.3403, -0.4255, 0.0159]) tensor([0.6278, 0.1112, 0.1021, 0.1588]) -Greedy action tensor([ 1.5056, -0.4774, -0.5887, 0.2604]) tensor([0.6457, 0.0889, 0.0795, 0.1859]) -Greedy action tensor([ 1.8984, -0.5697, -0.4149, 0.3900]) tensor([0.7118, 0.0603, 0.0704, 0.1575]) -Greedy action tensor([ 0.9481, -0.1790, -0.2249, 0.0467]) tensor([0.4903, 0.1589, 0.1517, 0.1991]) -Greedy action tensor([ 1.4750, -0.1240, -0.1679, -0.3961]) tensor([0.6454, 0.1304, 0.1248, 0.0994]) -Greedy action tensor([ 1.3701, -0.6808, 0.0516, 0.1919]) tensor([0.5869, 0.0755, 0.1570, 0.1807]) -Greedy action tensor([ 1.5537, -0.0607, -0.1991, 0.3482]) tensor([0.5982, 0.1190, 0.1037, 0.1792]) -Greedy action tensor([ 1.7169, 0.3071, -0.2210, 0.0730]) tensor([0.6324, 0.1544, 0.0911, 0.1222]) -Greedy action tensor([ 1.1356, -0.4164, -0.1709, 0.2846]) tensor([0.5237, 0.1109, 0.1418, 0.2236]) -Greedy action tensor([ 2.1236, -0.7490, -0.8361, 0.2489]) tensor([0.7925, 0.0448, 0.0411, 0.1216]) -Greedy action tensor([ 1.8143, -0.9029, 0.0541, 0.0109]) tensor([0.7129, 0.0471, 0.1226, 0.1174]) -Greedy action tensor([ 0.8958, 0.0080, -0.0432, 0.4379]) tensor([0.4106, 0.1690, 0.1606, 0.2598]) -Greedy action tensor([ 1.2936, -0.1791, -0.5884, 0.5298]) tensor([0.5413, 0.1241, 0.0824, 0.2522]) -Greedy action tensor([ 1.5041, -0.3553, -0.1904, 0.8501]) tensor([0.5378, 0.0838, 0.0988, 0.2796]) -Greedy action tensor([ 1.9034, -0.8857, -0.1023, 0.1400]) tensor([0.7313, 0.0450, 0.0984, 0.1254]) -Greedy action tensor([ 2.7561, -1.2855, -0.1963, 0.7554]) tensor([0.8299, 0.0146, 0.0433, 0.1122]) -Greedy action tensor([ 2.0913, -0.7438, -0.4750, 0.5739]) tensor([0.7381, 0.0433, 0.0567, 0.1619]) -Greedy action tensor([ 1.1395, -0.9822, -0.0643, -0.1348]) tensor([0.5884, 0.0705, 0.1766, 0.1645]) -Greedy action tensor([ 1.2471, -0.4842, -0.1356, 0.1766]) tensor([0.5647, 0.1000, 0.1417, 0.1936]) -Greedy action tensor([ 1.7033, -0.3512, -1.3060, 0.5227]) tensor([0.6736, 0.0863, 0.0332, 0.2069]) -Greedy action tensor([ 1.7361, -0.4279, -1.0092, 0.7135]) tensor([0.6499, 0.0746, 0.0417, 0.2337]) -Greedy action tensor([ 2.6060, 0.0844, -0.4345, 0.2248]) tensor([0.8193, 0.0658, 0.0392, 0.0757]) -Greedy action tensor([-1.9117, -0.4361, 0.6539, -0.1614]) tensor([0.0414, 0.1812, 0.5389, 0.2385]) -Greedy action tensor([-1.9046, -0.4348, 0.6431, -0.1611]) tensor([0.0419, 0.1824, 0.5359, 0.2398]) -Greedy action tensor([-1.6731, -0.4289, 0.5502, -0.1204]) tensor([0.0543, 0.1883, 0.5012, 0.2563]) -Greedy action tensor([-1.7814, -0.4485, 0.5860, -0.1012]) tensor([0.0480, 0.1821, 0.5123, 0.2576]) -Greedy action tensor([-0.3453, 1.0783, 0.0190, 0.4380]) tensor([0.1139, 0.4729, 0.1639, 0.2493]) -Greedy action tensor([-1.8955, -0.4475, 0.6289, -0.1853]) tensor([0.0430, 0.1828, 0.5365, 0.2377]) -Greedy action tensor([-1.2048, 0.1531, 0.2746, 0.0698]) tensor([0.0778, 0.3024, 0.3415, 0.2783]) -Greedy action tensor([-1.8156, -0.3480, 0.6014, -0.0826]) tensor([0.0450, 0.1954, 0.5049, 0.2547]) -Greedy action tensor([-1.8898, -0.3614, 0.6304, -0.1520]) tensor([0.0421, 0.1943, 0.5239, 0.2396]) -Greedy action tensor([-1.6916, -0.4262, 0.6373, 0.1111]) tensor([0.0479, 0.1698, 0.4918, 0.2906]) -Greedy action tensor([-1.8962, -0.4396, 0.6424, -0.1571]) tensor([0.0423, 0.1815, 0.5355, 0.2407]) -Greedy action tensor([-1.9328, -0.4317, 0.6601, -0.1740]) tensor([0.0406, 0.1819, 0.5421, 0.2354]) -Greedy action tensor([-1.6750, -0.3978, 0.5176, -0.0562]) tensor([0.0538, 0.1929, 0.4818, 0.2715]) -Greedy action tensor([-1.8707, -0.4426, 0.6359, -0.1406]) tensor([0.0433, 0.1807, 0.5315, 0.2445]) -Greedy action tensor([-1.6354, -0.4073, 0.5218, -0.0529]) tensor([0.0558, 0.1905, 0.4823, 0.2715]) -Greedy action tensor([-1.2905, -0.3970, 0.5669, 0.2980]) tensor([0.0678, 0.1657, 0.4345, 0.3320]) -Greedy action tensor([-1.5152, 0.0815, 0.4153, -0.0671]) tensor([0.0585, 0.2890, 0.4035, 0.2490]) -Greedy action tensor([-0.9556, 0.9366, -0.1097, -0.4539]) tensor([0.0861, 0.5711, 0.2006, 0.1422]) -Greedy action tensor([-1.8204, -0.2834, 0.5814, -0.1099]) tensor([0.0450, 0.2092, 0.4969, 0.2489]) -Greedy action tensor([-1.9348, -0.4298, 0.6600, -0.1751]) tensor([0.0405, 0.1823, 0.5421, 0.2352]) -Greedy action tensor([-1.0450, -0.3300, 0.4488, -0.1683]) tensor([0.1010, 0.2065, 0.4498, 0.2427]) -Greedy action tensor([-1.8836, -0.4178, 0.6338, -0.1612]) tensor([0.0429, 0.1857, 0.5315, 0.2400]) -Greedy action tensor([-1.3757, 0.4652, 0.3752, 0.2842]) tensor([0.0546, 0.3440, 0.3144, 0.2870]) -Greedy action tensor([-1.9102, -0.4780, 0.6444, -0.1646]) tensor([0.0420, 0.1761, 0.5410, 0.2409]) -Greedy action tensor([-1.8066, -0.4137, 0.5920, -0.1113]) tensor([0.0466, 0.1874, 0.5124, 0.2536]) -Greedy action tensor([ 0.6527, 1.0289, -0.3016, 0.5517]) tensor([0.2670, 0.3889, 0.1028, 0.2413]) -Greedy action tensor([-1.8959, -0.4312, 0.6450, -0.1558]) tensor([0.0422, 0.1824, 0.5352, 0.2403]) -Greedy action tensor([-0.3870, 0.9988, -0.0022, 0.2974]) tensor([0.1183, 0.4732, 0.1739, 0.2346]) -Greedy action tensor([-1.9440, -0.4459, 0.6655, -0.1803]) tensor([0.0402, 0.1797, 0.5459, 0.2343]) -Greedy action tensor([-1.7565, -0.2861, 0.5879, -0.0611]) tensor([0.0471, 0.2050, 0.4912, 0.2567]) -Greedy action tensor([-1.7435, -0.3240, 0.6089, -0.0351]) tensor([0.0472, 0.1954, 0.4966, 0.2608]) -Greedy action tensor([-1.9184, -0.4499, 0.6585, -0.1669]) tensor([0.0412, 0.1790, 0.5423, 0.2375]) -Greedy action tensor([-1.7492, -0.4684, 0.5783, -0.0685]) tensor([0.0495, 0.1780, 0.5070, 0.2655]) -Greedy action tensor([-1.3386, -0.2255, 0.8732, 0.6536]) tensor([0.0488, 0.1484, 0.4453, 0.3575]) -Greedy action tensor([-1.9033, -0.4292, 0.6444, -0.1612]) tensor([0.0419, 0.1831, 0.5357, 0.2393]) -Greedy action tensor([-1.9171, -0.4180, 0.6596, -0.1631]) tensor([0.0410, 0.1834, 0.5389, 0.2367]) -Greedy action tensor([-1.4900, 0.5534, 0.2947, 0.1388]) tensor([0.0506, 0.3903, 0.3013, 0.2578]) -Greedy action tensor([-1.9120, -0.4301, 0.6532, -0.1631]) tensor([0.0414, 0.1822, 0.5384, 0.2380]) -Greedy action tensor([-1.8761, -0.3780, 0.6286, -0.1492]) tensor([0.0429, 0.1917, 0.5245, 0.2410]) -Greedy action tensor([-0.5379, -0.6241, 0.8367, 0.6843]) tensor([0.1079, 0.0990, 0.4267, 0.3664]) -Greedy action tensor([-1.6380, -0.2887, 0.6552, 0.0666]) tensor([0.0494, 0.1903, 0.4890, 0.2714]) -Greedy action tensor([-1.9118, -0.4184, 0.6462, -0.1603]) tensor([0.0414, 0.1845, 0.5351, 0.2389]) -Greedy action tensor([-1.8214, -0.3156, 0.5932, -0.0993]) tensor([0.0449, 0.2022, 0.5018, 0.2511]) -Greedy action tensor([-1.9411, -0.4419, 0.6651, -0.1770]) tensor([0.0402, 0.1801, 0.5449, 0.2347]) -Greedy action tensor([-1.3832, 0.6735, 0.2584, 0.0445]) tensor([0.0551, 0.4308, 0.2845, 0.2297]) -Greedy action tensor([-1.3152, 0.6688, 0.2203, 0.1359]) tensor([0.0582, 0.4232, 0.2702, 0.2484]) -Greedy action tensor([-0.5755, -0.0227, 0.0900, 0.0105]) tensor([0.1543, 0.2682, 0.3002, 0.2773]) -Greedy action tensor([-1.9089, -0.4531, 0.6547, -0.1600]) tensor([0.0416, 0.1785, 0.5405, 0.2393]) -Greedy action tensor([-1.9229, -0.4216, 0.6565, -0.1622]) tensor([0.0408, 0.1832, 0.5385, 0.2375]) -Greedy action tensor([-1.8371, -0.4333, 0.6115, -0.1313]) tensor([0.0452, 0.1838, 0.5225, 0.2486]) -Greedy action tensor([-1.8738, -0.3691, 0.6341, -0.1400]) tensor([0.0427, 0.1921, 0.5238, 0.2415]) -Greedy action tensor([-1.4010, -0.0166, 0.3870, -0.7429]) tensor([0.0775, 0.3095, 0.4633, 0.1497]) -Greedy action tensor([-0.3725, 0.3454, -0.0260, 0.1085]) tensor([0.1644, 0.3371, 0.2325, 0.2660]) -Greedy action tensor([-1.7703, -0.5013, 0.5998, -0.0687]) tensor([0.0482, 0.1715, 0.5159, 0.2644]) -Greedy action tensor([-1.6359, -0.4762, 0.5205, -0.0018]) tensor([0.0557, 0.1776, 0.4812, 0.2854]) -Greedy action tensor([-1.0131, 0.5912, 0.1252, -0.0547]) tensor([0.0854, 0.4250, 0.2667, 0.2228]) -Greedy action tensor([-1.8364, -0.4697, 0.6079, -0.1306]) tensor([0.0456, 0.1787, 0.5249, 0.2508]) -Greedy action tensor([-1.3336, 0.0507, 0.4616, 0.2362]) tensor([0.0632, 0.2524, 0.3806, 0.3038]) -Greedy action tensor([-1.0352, 0.1770, 0.4605, 0.3970]) tensor([0.0769, 0.2583, 0.3430, 0.3219]) -Greedy action tensor([-1.9444, -0.4501, 0.6681, -0.1800]) tensor([0.0401, 0.1788, 0.5469, 0.2342]) -Greedy action tensor([-1.2795, -0.3837, 0.5270, 0.5128]) tensor([0.0643, 0.1576, 0.3918, 0.3863]) -Greedy action tensor([-1.9197, -0.4277, 0.6562, -0.1580]) tensor([0.0410, 0.1821, 0.5384, 0.2385]) -Greedy action tensor([-1.7665, -0.3885, 0.6083, -0.0513]) tensor([0.0470, 0.1865, 0.5053, 0.2613]) -Greedy action tensor([-1.9322, -0.4595, 0.6701, -0.1656]) tensor([0.0405, 0.1765, 0.5462, 0.2368]) -Greedy action tensor([-1.3296, 0.6752, 0.1982, 0.1624]) tensor([0.0572, 0.4248, 0.2636, 0.2544]) -Greedy action tensor([-1.8959, -0.4484, 0.6468, -0.1587]) tensor([0.0423, 0.1798, 0.5376, 0.2403]) -Greedy action tensor([-1.8720, -0.4505, 0.6473, -0.1091]) tensor([0.0427, 0.1771, 0.5309, 0.2492]) -Greedy action tensor([-0.7178, 0.6980, 0.1015, -0.0294]) tensor([0.1066, 0.4392, 0.2419, 0.2122]) -Greedy action tensor([0.3560, 1.0771, 0.1635, 0.9283]) tensor([0.1769, 0.3638, 0.1459, 0.3135]) -Greedy action tensor([-1.8720, -0.2545, 0.5976, -0.1467]) tensor([0.0426, 0.2147, 0.5035, 0.2392]) -Greedy action tensor([-1.5433, 0.6299, 0.3654, -0.0618]) tensor([0.0478, 0.4198, 0.3222, 0.2102]) -Greedy action tensor([-0.7368, 0.9603, 0.0515, 0.2950]) tensor([0.0872, 0.4761, 0.1919, 0.2448]) -Greedy action tensor([-1.8881, -0.4417, 0.6362, -0.1501]) tensor([0.0427, 0.1814, 0.5331, 0.2428]) -Greedy action tensor([-1.3192, 0.7807, 0.2232, 0.2601]) tensor([0.0535, 0.4368, 0.2501, 0.2595]) -Greedy action tensor([-1.9413, -0.4556, 0.6662, -0.1780]) tensor([0.0403, 0.1781, 0.5466, 0.2350]) -Greedy action tensor([-1.8383, -0.3536, 0.6137, -0.0852]) tensor([0.0439, 0.1936, 0.5093, 0.2532]) -Greedy action tensor([-1.7931, -0.3096, 0.6404, 0.0146]) tensor([0.0437, 0.1925, 0.4977, 0.2662]) -Greedy action tensor([-1.9015, -0.3798, 0.6297, -0.1681]) tensor([0.0420, 0.1924, 0.5279, 0.2377]) -Greedy action tensor([-1.5864, -0.3160, 0.6411, -0.0050]) tensor([0.0535, 0.1905, 0.4961, 0.2600]) -Greedy action tensor([-1.5628, -0.4483, 0.4928, 0.0440]) tensor([0.0594, 0.1809, 0.4637, 0.2960]) -Greedy action tensor([-1.5685, -0.3390, 0.5525, 0.2582]) tensor([0.0527, 0.1802, 0.4396, 0.3275]) -Greedy action tensor([ 0.6993, -0.1459, -0.0185, -0.3816]) tensor([0.4432, 0.1903, 0.2162, 0.1503]) -Greedy action tensor([ 0.8932, -0.9736, 0.0606, -0.4798]) tensor([0.5426, 0.0839, 0.2360, 0.1375]) -Greedy action tensor([ 0.6926, -0.8139, -0.2938, -0.9811]) tensor([0.5611, 0.1244, 0.2093, 0.1052]) -Greedy action tensor([ 0.5488, -0.1567, -0.1244, -0.1382]) tensor([0.3989, 0.1970, 0.2035, 0.2007]) -Greedy action tensor([ 0.6151, -0.4288, -0.0031, -0.3449]) tensor([0.4398, 0.1548, 0.2370, 0.1684]) -Greedy action tensor([ 1.2153, -0.6984, -0.1009, -0.2687]) tensor([0.6089, 0.0898, 0.1633, 0.1380]) -Greedy action tensor([ 0.2837, -0.1208, -0.1329, -0.2371]) tensor([0.3424, 0.2285, 0.2257, 0.2034]) -Greedy action tensor([ 0.5372, -0.4355, -0.1109, -0.1279]) tensor([0.4140, 0.1565, 0.2165, 0.2129]) -Greedy action tensor([ 0.7018, -0.4391, 0.0452, -0.2332]) tensor([0.4483, 0.1432, 0.2325, 0.1760]) -Greedy action tensor([ 0.3912, 0.0335, -0.0568, -0.1207]) tensor([0.3404, 0.2381, 0.2175, 0.2040]) -Greedy action tensor([ 0.7790, -0.4287, -0.0607, -0.2565]) tensor([0.4794, 0.1433, 0.2070, 0.1702]) -Greedy action tensor([ 1.2898, -0.6768, -0.2059, -0.6473]) tensor([0.6631, 0.0928, 0.1486, 0.0956]) -Greedy action tensor([ 1.0954, -0.5668, -0.2203, -0.6348]) tensor([0.6115, 0.1160, 0.1641, 0.1084]) -Greedy action tensor([ 0.5200, -0.1818, 0.0654, -0.1029]) tensor([0.3750, 0.1859, 0.2380, 0.2011]) -Greedy action tensor([ 0.6790, -0.3288, -0.1020, -0.1521]) tensor([0.4428, 0.1616, 0.2028, 0.1929]) -Greedy action tensor([ 0.6850, -0.2246, 0.0621, -0.2163]) tensor([0.4264, 0.1717, 0.2287, 0.1731]) -Greedy action tensor([ 0.8667, -0.3144, 0.1356, -0.3526]) tensor([0.4799, 0.1473, 0.2310, 0.1418]) -Greedy action tensor([ 0.8494, 0.1896, -0.0527, -0.2234]) tensor([0.4416, 0.2283, 0.1791, 0.1510]) -Greedy action tensor([ 0.7633, -0.5844, -0.0043, -0.4431]) tensor([0.4943, 0.1284, 0.2294, 0.1479]) -Greedy action tensor([ 0.3010, 0.0686, -0.0107, -0.0099]) tensor([0.3070, 0.2433, 0.2248, 0.2249]) -Greedy action tensor([ 0.5673, 0.0701, -0.0458, -0.3520]) tensor([0.3924, 0.2386, 0.2125, 0.1565]) -Greedy action tensor([ 1.1780, -0.2487, -0.1324, -0.1631]) tensor([0.5645, 0.1355, 0.1523, 0.1477]) -Greedy action tensor([ 0.7699, -0.4428, -0.1685, -0.1790]) tensor([0.4817, 0.1433, 0.1885, 0.1865]) -Greedy action tensor([ 0.8092, -0.5659, 0.0406, -0.6883]) tensor([0.5154, 0.1303, 0.2390, 0.1153]) -Greedy action tensor([ 1.1316e+00, -5.3327e-01, -6.8936e-04, -4.9238e-01]) tensor([0.5853, 0.1107, 0.1886, 0.1154]) -Greedy action tensor([ 0.8683, -0.5124, -0.1293, -0.2194]) tensor([0.5110, 0.1284, 0.1884, 0.1722]) -Greedy action tensor([ 0.5509, -0.4857, -0.1845, -0.3656]) tensor([0.4477, 0.1588, 0.2146, 0.1790]) -Greedy action tensor([ 0.5764, -0.1963, 0.0716, -0.0877]) tensor([0.3876, 0.1790, 0.2340, 0.1995]) -Greedy action tensor([ 1.0483, -0.7346, 0.0908, -0.6443]) tensor([0.5760, 0.0969, 0.2211, 0.1060]) -Greedy action tensor([ 1.1391, -0.6500, -0.0848, -0.5909]) tensor([0.6103, 0.1020, 0.1795, 0.1082]) -Greedy action tensor([ 0.4441, -0.1352, 0.1188, -0.1662]) tensor([0.3539, 0.1983, 0.2556, 0.1922]) -Greedy action tensor([ 0.5889, -0.3816, -0.2022, -0.0680]) tensor([0.4254, 0.1612, 0.1929, 0.2206]) -Greedy action tensor([ 0.6383, -0.2639, -0.0110, -0.1756]) tensor([0.4217, 0.1711, 0.2203, 0.1869]) -Greedy action tensor([ 0.9541, -0.4265, 0.0233, -0.3430]) tensor([0.5211, 0.1310, 0.2054, 0.1424]) -Greedy action tensor([ 0.6766, -0.2945, -0.1852, -0.2167]) tensor([0.4524, 0.1713, 0.1911, 0.1852]) -Greedy action tensor([ 0.7713, -0.6545, -0.0742, -0.6572]) tensor([0.5238, 0.1259, 0.2249, 0.1255]) -Greedy action tensor([ 0.6121, -0.4132, 0.2331, -0.2408]) tensor([0.4050, 0.1453, 0.2772, 0.1726]) -Greedy action tensor([ 0.7925, -0.5294, -0.0125, -0.1790]) tensor([0.4780, 0.1274, 0.2137, 0.1809]) -Greedy action tensor([ 0.4241, 0.0428, 0.0158, -0.1749]) tensor([0.3452, 0.2357, 0.2295, 0.1896]) -Greedy action tensor([ 0.9149, -0.5672, 0.0862, -0.7921]) tensor([0.5419, 0.1231, 0.2366, 0.0983]) -Greedy action tensor([ 0.7022, -0.4104, -0.1026, -0.3226]) tensor([0.4685, 0.1540, 0.2095, 0.1681]) -Greedy action tensor([ 0.6538, -0.4856, -0.0122, -0.3977]) tensor([0.4580, 0.1466, 0.2353, 0.1601]) -Greedy action tensor([ 1.2341, -1.0477, 0.0106, -0.6754]) tensor([0.6475, 0.0661, 0.1905, 0.0959]) -Greedy action tensor([ 0.5161, -0.1083, 0.0701, -0.5038]) tensor([0.3943, 0.2112, 0.2524, 0.1422]) -Greedy action tensor([ 0.9621, -0.9044, 0.0619, -0.5275]) tensor([0.5597, 0.0866, 0.2275, 0.1262]) -Greedy action tensor([ 1.0478, -0.4224, 0.0277, -0.2280]) tensor([0.5349, 0.1230, 0.1928, 0.1493]) -Greedy action tensor([ 0.7029, -0.3140, -0.1200, -0.2533]) tensor([0.4576, 0.1655, 0.2010, 0.1759]) -Greedy action tensor([ 0.9258, -0.5337, 0.0848, -0.4831]) tensor([0.5241, 0.1218, 0.2260, 0.1281]) -Greedy action tensor([ 0.4568, -0.5692, 0.2080, -0.3270]) tensor([0.3854, 0.1381, 0.3005, 0.1760]) -Greedy action tensor([ 0.8912, -0.5333, -0.1110, -0.2451]) tensor([0.5185, 0.1248, 0.1903, 0.1664]) -Greedy action tensor([ 0.2570, -0.1969, -0.0439, -0.1263]) tensor([0.3271, 0.2078, 0.2421, 0.2230]) -Greedy action tensor([ 0.8178, -0.4930, 0.1183, -0.6415]) tensor([0.5003, 0.1349, 0.2486, 0.1163]) -Greedy action tensor([ 0.7476, -0.7971, 0.1999, -0.7730]) tensor([0.4974, 0.1061, 0.2877, 0.1087]) -Greedy action tensor([ 0.3409, -0.4426, -0.1607, -0.2039]) tensor([0.3785, 0.1729, 0.2292, 0.2195]) -Greedy action tensor([ 1.0559, -0.6843, -0.1414, -0.8190]) tensor([0.6132, 0.1076, 0.1852, 0.0940]) -Greedy action tensor([ 1.0463, -0.5788, -0.0690, -0.5105]) tensor([0.5762, 0.1134, 0.1889, 0.1215]) -Greedy action tensor([ 0.6571, -0.4224, -0.2242, -0.0482]) tensor([0.4448, 0.1512, 0.1843, 0.2197]) -Greedy action tensor([ 0.2319, 0.2196, -0.1943, -0.0862]) tensor([0.2969, 0.2933, 0.1939, 0.2160]) -Greedy action tensor([ 0.6920, -0.3522, -0.1453, -0.1141]) tensor([0.4481, 0.1577, 0.1940, 0.2001]) -Greedy action tensor([ 0.4722, -0.1524, -0.0590, -0.0551]) tensor([0.3685, 0.1973, 0.2166, 0.2175]) -Greedy action tensor([ 0.8190, -0.5900, -0.0361, -0.2166]) tensor([0.4939, 0.1207, 0.2100, 0.1753]) -Greedy action tensor([ 1.0378, -0.4944, -0.0889, -0.3131]) tensor([0.5558, 0.1201, 0.1801, 0.1440]) -Greedy action tensor([ 0.5094, -0.3017, 0.0072, -0.3464]) tensor([0.4041, 0.1796, 0.2446, 0.1717]) -Greedy action tensor([ 0.9009, -0.6363, 0.0936, -0.3873]) tensor([0.5163, 0.1110, 0.2303, 0.1424]) -Greedy action tensor([ 0.7611, -0.3960, -0.0514, -0.4234]) tensor([0.4845, 0.1523, 0.2150, 0.1482]) -Greedy action tensor([ 0.8553, -0.4892, -0.0543, -0.3854]) tensor([0.5121, 0.1335, 0.2062, 0.1481]) -Greedy action tensor([ 0.4868, -0.2739, -0.0160, -0.1797]) tensor([0.3868, 0.1807, 0.2339, 0.1986]) -Greedy action tensor([ 0.4226, -0.3338, -0.1254, 0.0063]) tensor([0.3694, 0.1734, 0.2136, 0.2436]) -Greedy action tensor([ 1.0224, -0.6509, -0.1562, -0.4384]) tensor([0.5789, 0.1086, 0.1781, 0.1343]) -Greedy action tensor([ 0.7567, -0.3960, -0.0333, -0.2539]) tensor([0.4687, 0.1480, 0.2127, 0.1706]) -Greedy action tensor([ 0.7235, -0.0327, -0.0122, -0.0510]) tensor([0.4150, 0.1948, 0.1989, 0.1913]) -Greedy action tensor([ 0.7365, -0.6560, -0.1062, -0.3815]) tensor([0.4985, 0.1239, 0.2146, 0.1630]) -Greedy action tensor([ 0.7794, 0.0213, -0.0673, 0.0521]) tensor([0.4201, 0.1968, 0.1801, 0.2030]) -Greedy action tensor([ 0.7840, -0.6008, -0.0147, -0.6281]) tensor([0.5144, 0.1288, 0.2315, 0.1253]) -Greedy action tensor([ 0.3900, -0.0498, -0.0969, -0.6001]) tensor([0.3802, 0.2449, 0.2337, 0.1413]) -Greedy action tensor([ 0.6773, -0.3744, -0.0065, -0.1514]) tensor([0.4366, 0.1525, 0.2203, 0.1906]) -Greedy action tensor([ 0.6465, -0.5775, 0.0159, -0.5848]) tensor([0.4721, 0.1388, 0.2513, 0.1378]) -Greedy action tensor([ 0.7101, -0.7011, -0.1224, -0.2972]) tensor([0.4892, 0.1193, 0.2128, 0.1787]) -Greedy action tensor([ 0.6672, -0.4739, -0.0760, -0.7509]) tensor([0.4909, 0.1568, 0.2334, 0.1189]) -Greedy action tensor([ 0.6670, -0.2534, -0.0167, -0.1898]) tensor([0.4296, 0.1712, 0.2168, 0.1824]) -Greedy action tensor([ 0.5398, -0.4269, -0.1088, -0.2403]) tensor([0.4235, 0.1611, 0.2214, 0.1941]) -Greedy action tensor([ 1.6325, -0.9207, 0.5674, 1.3653]) tensor([0.4570, 0.0356, 0.1575, 0.3499]) -Greedy action tensor([ 0.6981, 0.0402, -0.3135, 0.5178]) tensor([0.3681, 0.1907, 0.1339, 0.3074]) -Greedy action tensor([ 0.5601, 0.5182, 0.0716, -0.5564]) tensor([0.3448, 0.3307, 0.2116, 0.1129]) -Greedy action tensor([ 0.7317, -0.5547, 0.7137, 1.1578]) tensor([0.2639, 0.0729, 0.2592, 0.4041]) -Greedy action tensor([-0.5096, -0.9686, -0.0839, -0.6454]) tensor([0.2478, 0.1566, 0.3793, 0.2163]) -Greedy action tensor([-1.5283, -0.2573, -0.4359, -0.2276]) tensor([0.0891, 0.3177, 0.2658, 0.3273]) -Greedy action tensor([-0.2929, -0.9841, -0.5457, 0.1343]) tensor([0.2624, 0.1315, 0.2038, 0.4023]) -Greedy action tensor([ 0.8084, 0.4898, -0.0096, 0.0453]) tensor([0.3795, 0.2760, 0.1675, 0.1770]) -Greedy action tensor([-0.2537, 0.0030, -0.2065, 0.3025]) tensor([0.1966, 0.2542, 0.2062, 0.3430]) -Greedy action tensor([-0.6627, -0.0399, -1.1828, -1.2122]) tensor([0.2478, 0.4619, 0.1473, 0.1430]) -Greedy action tensor([-0.1138, -0.2055, 1.2072, 0.0471]) tensor([0.1463, 0.1335, 0.5483, 0.1719]) -Greedy action tensor([-0.0359, -0.6935, 0.0094, 1.5464]) tensor([0.1346, 0.0697, 0.1408, 0.6549]) -Greedy action tensor([-0.2039, -1.3480, 0.1243, -0.0429]) tensor([0.2576, 0.0821, 0.3577, 0.3026]) -Greedy action tensor([0.6537, 0.4115, 1.0344, 0.4195]) tensor([0.2476, 0.1943, 0.3623, 0.1959]) -Greedy action tensor([ 1.1677, -1.0779, 1.1357, 0.5283]) tensor([0.3843, 0.0407, 0.3722, 0.2028]) -Greedy action tensor([0.7259, 0.0368, 0.9020, 0.2138]) tensor([0.3036, 0.1524, 0.3621, 0.1819]) -Greedy action tensor([ 0.2741, -0.7710, 0.1186, -0.5505]) tensor([0.3779, 0.1329, 0.3235, 0.1657]) -Greedy action tensor([-1.3452, -0.6457, -0.2055, 0.5206]) tensor([0.0794, 0.1597, 0.2481, 0.5128]) -Greedy action tensor([ 0.2552, -1.9315, -0.4322, -0.7069]) tensor([0.5007, 0.0562, 0.2518, 0.1913]) -Greedy action tensor([ 0.9806, 0.1369, 1.5881, -0.0639]) tensor([0.2764, 0.1189, 0.5074, 0.0973]) -Greedy action tensor([ 0.9350, -1.6498, -0.2990, -0.3360]) tensor([0.6071, 0.0458, 0.1768, 0.1703]) -Greedy action tensor([ 1.6164e+00, -2.7181e-01, 7.1050e-04, 1.2282e+00]) tensor([0.4930, 0.0746, 0.0980, 0.3344]) -Greedy action tensor([ 0.0431, -1.0869, 0.3629, -0.1112]) tensor([0.2811, 0.0908, 0.3871, 0.2409]) -Greedy action tensor([ 0.5121, -0.9840, 0.4729, -0.6739]) tensor([0.4014, 0.0899, 0.3860, 0.1226]) -Greedy action tensor([ 0.1119, -0.0863, 0.4541, -0.2270]) tensor([0.2537, 0.2081, 0.3573, 0.1808]) -Greedy action tensor([-0.4674, -0.8718, 0.2711, -0.1100]) tensor([0.1927, 0.1286, 0.4032, 0.2755]) -Greedy action tensor([ 0.1959, -1.0068, -0.7751, -0.5997]) tensor([0.4694, 0.1410, 0.1778, 0.2118]) -Greedy action tensor([-0.0896, -0.4748, 0.0526, -0.2190]) tensor([0.2694, 0.1833, 0.3106, 0.2367]) -Greedy action tensor([-0.3649, -0.7204, -0.2046, -0.2406]) tensor([0.2496, 0.1749, 0.2929, 0.2826]) -Greedy action tensor([-1.4832, -0.4114, -0.4883, 0.3061]) tensor([0.0793, 0.2316, 0.2145, 0.4746]) -Greedy action tensor([ 0.3611, -0.2068, -0.4164, -0.5622]) tensor([0.4126, 0.2338, 0.1896, 0.1639]) -Greedy action tensor([-0.2311, -0.3939, -0.9985, 0.0436]) tensor([0.2755, 0.2341, 0.1279, 0.3626]) -Greedy action tensor([ 0.6794, -0.4486, -1.1893, 0.0196]) tensor([0.5012, 0.1623, 0.0774, 0.2591]) -Greedy action tensor([-0.9699, 0.6605, 0.7169, -0.6690]) tensor([0.0778, 0.3970, 0.4201, 0.1051]) -Greedy action tensor([ 1.0584, -0.1670, -0.3910, 1.1865]) tensor([0.3752, 0.1102, 0.0881, 0.4265]) -Greedy action tensor([0.8473, 0.5803, 0.7247, 0.0158]) tensor([0.3241, 0.2481, 0.2867, 0.1411]) -Greedy action tensor([ 0.9647, -1.7108, 0.0927, 1.3274]) tensor([0.3420, 0.0236, 0.1430, 0.4915]) -Greedy action tensor([-0.1815, -0.2955, -0.6364, 0.5053]) tensor([0.2215, 0.1977, 0.1406, 0.4403]) -Greedy action tensor([-0.3139, -1.5080, -0.2922, -0.1089]) tensor([0.2815, 0.0853, 0.2877, 0.3456]) -Greedy action tensor([-0.4515, -0.0754, -0.5388, -0.0677]) tensor([0.2066, 0.3009, 0.1893, 0.3032]) -Greedy action tensor([ 0.3560, -0.8251, -0.4567, -0.1611]) tensor([0.4261, 0.1308, 0.1890, 0.2541]) -Greedy action tensor([-0.2311, -0.2784, 0.2648, -0.0731]) tensor([0.2098, 0.2001, 0.3444, 0.2457]) -Greedy action tensor([-0.4890, -0.2571, -0.4618, 0.0731]) tensor([0.1983, 0.2501, 0.2038, 0.3479]) -Greedy action tensor([ 0.7649, -0.2116, -0.6625, 0.0907]) tensor([0.4703, 0.1771, 0.1128, 0.2397]) -Greedy action tensor([-1.2582, -0.6450, 0.9714, -0.9195]) tensor([0.0738, 0.1363, 0.6863, 0.1036]) -Greedy action tensor([-0.3243, -1.8239, 0.5168, -0.4360]) tensor([0.2254, 0.0503, 0.5227, 0.2016]) -Greedy action tensor([-0.1595, 0.7308, -0.7648, -0.7774]) tensor([0.2212, 0.5388, 0.1208, 0.1192]) -Greedy action tensor([-0.3052, -0.3094, 0.0470, -0.2273]) tensor([0.2223, 0.2213, 0.3161, 0.2403]) -Greedy action tensor([ 0.7673, -1.6605, 0.5399, 0.2496]) tensor([0.4031, 0.0356, 0.3211, 0.2402]) -Greedy action tensor([ 0.2596, 0.4073, 1.4722, -0.5312]) tensor([0.1674, 0.1940, 0.5627, 0.0759]) -Greedy action tensor([-1.2411, -0.8562, -1.2150, 0.1941]) tensor([0.1299, 0.1909, 0.1334, 0.5458]) -Greedy action tensor([ 0.4202, -0.6539, -0.4830, 0.5054]) tensor([0.3526, 0.1205, 0.1429, 0.3840]) -Greedy action tensor([0.2260, 0.1137, 0.7456, 0.5177]) tensor([0.2035, 0.1819, 0.3422, 0.2724]) -Greedy action tensor([-0.3704, -1.2448, -0.5876, 0.1907]) tensor([0.2516, 0.1050, 0.2025, 0.4410]) -Greedy action tensor([-1.0465, -1.1841, 1.5581, -0.6679]) tensor([0.0593, 0.0517, 0.8024, 0.0866]) -Greedy action tensor([ 0.2898, -1.3115, 1.2902, -0.1760]) tensor([0.2198, 0.0443, 0.5978, 0.1380]) -Greedy action tensor([ 0.6918, -0.5256, 0.5937, 0.7582]) tensor([0.3057, 0.0905, 0.2771, 0.3267]) -Greedy action tensor([-0.2123, -1.0882, -0.9527, -1.0332]) tensor([0.4286, 0.1785, 0.2044, 0.1886]) -Greedy action tensor([-0.1551, -1.0750, 0.2378, -0.3627]) tensor([0.2708, 0.1079, 0.4012, 0.2201]) -Greedy action tensor([-0.0719, -1.2569, 0.2417, -0.2313]) tensor([0.2835, 0.0867, 0.3880, 0.2418]) -Greedy action tensor([ 5.1054e-04, -1.6369e+00, -6.0483e-01, 3.9083e-01]) tensor([0.3108, 0.0604, 0.1696, 0.4591]) -Greedy action tensor([-0.3718, -0.5199, -0.3800, 0.7799]) tensor([0.1662, 0.1433, 0.1648, 0.5257]) -Greedy action tensor([-1.1956, -0.3044, -0.4140, 0.7273]) tensor([0.0802, 0.1956, 0.1753, 0.5489]) -Greedy action tensor([ 0.0467, 0.5295, 0.2703, -0.1909]) tensor([0.2146, 0.3478, 0.2684, 0.1692]) -Greedy action tensor([ 0.2683, -1.5134, 0.1020, 0.0662]) tensor([0.3531, 0.0594, 0.2990, 0.2885]) -Greedy action tensor([-0.2989, 0.1632, 0.2707, -0.7179]) tensor([0.1995, 0.3167, 0.3526, 0.1312]) -Greedy action tensor([ 0.6507, 0.2127, 0.5767, -0.3672]) tensor([0.3407, 0.2199, 0.3164, 0.1231]) -Greedy action tensor([ 0.1701, 0.1709, 0.0924, -0.3516]) tensor([0.2841, 0.2844, 0.2629, 0.1686]) -Greedy action tensor([-1.3870, 0.2736, -0.2253, -1.1025]) tensor([0.0927, 0.4879, 0.2962, 0.1232]) -Greedy action tensor([ 0.7111, -1.1672, 0.1186, 1.2119]) tensor([0.2980, 0.0455, 0.1648, 0.4917]) -Greedy action tensor([-0.2485, 0.1228, 0.4994, -0.7474]) tensor([0.1934, 0.2804, 0.4087, 0.1175]) -Greedy action tensor([-0.9452, 0.3001, 0.2077, 0.1298]) tensor([0.0946, 0.3286, 0.2996, 0.2772]) -Greedy action tensor([-0.6615, -1.2308, -0.2047, -0.0517]) tensor([0.2006, 0.1135, 0.3167, 0.3691]) -Greedy action tensor([ 0.4977, -0.9264, 0.3904, -0.3330]) tensor([0.3884, 0.0935, 0.3489, 0.1692]) -Greedy action tensor([ 0.1887, -0.9910, 1.0223, -0.0019]) tensor([0.2255, 0.0693, 0.5189, 0.1863]) -Greedy action tensor([ 0.2661, 0.8578, 1.0255, -0.6359]) tensor([0.1869, 0.3378, 0.3995, 0.0758]) -Greedy action tensor([-0.7667, -0.8333, 0.6014, -0.6162]) tensor([0.1423, 0.1332, 0.5591, 0.1655]) -Greedy action tensor([ 0.7795, -0.2459, -0.3699, 0.6245]) tensor([0.3950, 0.1417, 0.1251, 0.3383]) -Greedy action tensor([ 0.0296, -0.4225, 0.4680, -1.1969]) tensor([0.2874, 0.1828, 0.4455, 0.0843]) -Greedy action tensor([-1.0549, -1.0489, -0.0421, -0.5190]) tensor([0.1546, 0.1555, 0.4257, 0.2642]) -Greedy action tensor([ 1.3946, -0.9792, -0.4945, 1.3041]) tensor([0.4634, 0.0432, 0.0701, 0.4233]) -Greedy action tensor([ 1.7533, -0.2878, -0.4391, 0.3645]) tensor([0.6707, 0.0871, 0.0749, 0.1673]) -Greedy action tensor([ 1.6269, -0.6189, -0.5619, 0.4088]) tensor([0.6606, 0.0699, 0.0740, 0.1954]) -Greedy action tensor([ 2.5191, 0.0150, -0.0477, 0.6095]) tensor([0.7653, 0.0626, 0.0588, 0.1134]) -Greedy action tensor([ 1.4473, 0.0053, -0.6259, 0.3299]) tensor([0.5919, 0.1400, 0.0745, 0.1936]) -Greedy action tensor([ 1.0814, 0.1884, -0.0603, 0.0864]) tensor([0.4765, 0.1951, 0.1522, 0.1762]) -Greedy action tensor([ 1.3462, -0.6490, -0.1869, 0.0891]) tensor([0.6111, 0.0831, 0.1319, 0.1738]) -Greedy action tensor([ 1.2302, -0.1180, -0.4748, 0.1551]) tensor([0.5609, 0.1457, 0.1020, 0.1914]) -Greedy action tensor([ 1.2394, -0.4287, -0.1804, 0.4969]) tensor([0.5246, 0.0989, 0.1268, 0.2497]) -Greedy action tensor([ 1.8678, -0.8177, -0.0096, 1.1330]) tensor([0.5880, 0.0401, 0.0900, 0.2820]) -Greedy action tensor([ 1.8409, -0.2222, -0.4949, 0.3264]) tensor([0.6927, 0.0880, 0.0670, 0.1523]) -Greedy action tensor([ 1.3859, -0.5724, 0.0415, 0.0439]) tensor([0.6013, 0.0848, 0.1567, 0.1571]) -Greedy action tensor([ 1.5165, -0.6351, -0.1163, 0.3962]) tensor([0.6105, 0.0710, 0.1193, 0.1992]) -Greedy action tensor([ 1.3298, -0.7948, -0.2798, 0.3534]) tensor([0.5896, 0.0704, 0.1179, 0.2221]) -Greedy action tensor([ 1.8212, -0.8191, -0.1514, 0.1054]) tensor([0.7193, 0.0513, 0.1001, 0.1293]) -Greedy action tensor([ 1.8814, -0.8156, -0.3518, 0.2412]) tensor([0.7307, 0.0493, 0.0783, 0.1417]) -Greedy action tensor([ 1.3821, -0.3671, -0.1327, 0.3794]) tensor([0.5680, 0.0988, 0.1249, 0.2084]) -Greedy action tensor([ 1.2720, -0.3583, -0.7045, 0.2432]) tensor([0.5911, 0.1158, 0.0819, 0.2113]) -Greedy action tensor([ 1.6391, 0.1471, -0.0248, 0.7848]) tensor([0.5435, 0.1222, 0.1029, 0.2313]) -Greedy action tensor([ 1.9702, -1.0953, -0.1810, 0.4192]) tensor([0.7273, 0.0339, 0.0846, 0.1542]) -Greedy action tensor([ 1.5175, -0.2214, -0.3130, 0.1793]) tensor([0.6256, 0.1099, 0.1003, 0.1641]) -Greedy action tensor([ 1.2913, -0.4174, -0.2614, 0.3818]) tensor([0.5570, 0.1009, 0.1179, 0.2243]) -Greedy action tensor([ 1.3555, 0.0654, -0.7244, 0.0797]) tensor([0.5955, 0.1639, 0.0744, 0.1663]) -Greedy action tensor([ 2.1505, -1.5345, -0.1322, 0.4103]) tensor([0.7677, 0.0193, 0.0783, 0.1347]) -Greedy action tensor([ 1.4168, -0.6157, -0.4324, 0.5145]) tensor([0.5903, 0.0773, 0.0929, 0.2395]) -Greedy action tensor([ 0.8493, -0.6374, -0.1860, 0.2134]) tensor([0.4738, 0.1071, 0.1682, 0.2508]) -Greedy action tensor([ 1.5093, -0.0776, -0.0782, 0.2224]) tensor([0.5934, 0.1214, 0.1213, 0.1639]) -Greedy action tensor([ 1.1600, -0.0873, -0.0777, -0.3242]) tensor([0.5543, 0.1593, 0.1608, 0.1257]) -Greedy action tensor([ 1.9113, -1.3603, 0.0745, 0.5936]) tensor([0.6826, 0.0259, 0.1088, 0.1828]) -Greedy action tensor([ 1.3570, -0.6336, -0.4670, 0.6186]) tensor([0.5631, 0.0769, 0.0909, 0.2691]) -Greedy action tensor([ 1.1034, -0.4085, -0.4314, 0.5176]) tensor([0.5019, 0.1107, 0.1081, 0.2793]) -Greedy action tensor([ 1.4467, -0.5489, -0.3593, 0.5062]) tensor([0.5915, 0.0804, 0.0972, 0.2309]) -Greedy action tensor([ 1.3153, -0.1574, -0.6628, 0.1580]) tensor([0.5946, 0.1363, 0.0822, 0.1869]) -Greedy action tensor([ 1.2727, -0.1889, -1.0177, 0.3212]) tensor([0.5816, 0.1349, 0.0589, 0.2246]) -Greedy action tensor([ 3.3237, -1.4035, -0.0375, 1.3208]) tensor([0.8485, 0.0075, 0.0294, 0.1145]) -Greedy action tensor([ 1.7992, -0.5378, -0.2213, 0.4871]) tensor([0.6673, 0.0645, 0.0885, 0.1797]) -Greedy action tensor([ 1.8828, -0.1671, -0.5966, 0.2712]) tensor([0.7082, 0.0912, 0.0593, 0.1413]) -Greedy action tensor([ 1.5626, -0.0679, -0.3432, -0.0342]) tensor([0.6464, 0.1266, 0.0961, 0.1309]) -Greedy action tensor([ 1.0719, -0.2922, -0.3154, -0.0433]) tensor([0.5455, 0.1394, 0.1362, 0.1788]) -Greedy action tensor([ 1.4994, -0.4131, -0.2806, 0.1620]) tensor([0.6334, 0.0936, 0.1068, 0.1663]) -Greedy action tensor([ 1.6972, -0.4850, -0.6319, 0.7997]) tensor([0.6181, 0.0697, 0.0602, 0.2519]) -Greedy action tensor([ 0.8091, -0.1552, -0.0931, 0.2322]) tensor([0.4258, 0.1623, 0.1727, 0.2391]) -Greedy action tensor([ 1.7485, -0.2859, -0.5055, 0.2834]) tensor([0.6818, 0.0891, 0.0716, 0.1575]) -Greedy action tensor([ 1.6265, -0.3142, -0.7921, 0.1865]) tensor([0.6805, 0.0977, 0.0606, 0.1612]) -Greedy action tensor([ 2.8186, -1.7176, 0.0323, 0.4696]) tensor([0.8563, 0.0092, 0.0528, 0.0817]) -Greedy action tensor([ 1.5954, -1.2041, -0.0367, 0.0989]) tensor([0.6755, 0.0411, 0.1321, 0.1513]) -Greedy action tensor([ 2.1364, -0.0167, -0.5851, 0.4453]) tensor([0.7320, 0.0850, 0.0481, 0.1349]) -Greedy action tensor([ 1.3578, -0.3961, -0.4326, 0.1115]) tensor([0.6144, 0.1064, 0.1025, 0.1767]) -Greedy action tensor([ 1.0882, -0.5762, -0.0706, 0.1462]) tensor([0.5283, 0.1000, 0.1658, 0.2059]) -Greedy action tensor([ 2.2495, -1.1505, -0.2454, 0.5947]) tensor([0.7651, 0.0255, 0.0631, 0.1462]) -Greedy action tensor([ 1.8429, -1.1983, -0.0211, -0.0281]) tensor([0.7370, 0.0352, 0.1143, 0.1135]) -Greedy action tensor([ 1.8638, -0.3452, -0.7492, 0.7765]) tensor([0.6578, 0.0722, 0.0482, 0.2218]) -Greedy action tensor([ 1.4333, 0.1099, -0.4502, 0.3795]) tensor([0.5660, 0.1507, 0.0861, 0.1973]) -Greedy action tensor([ 1.3403, -0.4248, -0.5438, -0.2253]) tensor([0.6527, 0.1117, 0.0992, 0.1364]) -Greedy action tensor([ 1.4317, -0.0658, -0.7924, 0.2333]) tensor([0.6122, 0.1369, 0.0662, 0.1847]) -Greedy action tensor([ 2.2035, -0.9188, -0.4238, 0.6013]) tensor([0.7588, 0.0334, 0.0548, 0.1529]) -Greedy action tensor([ 1.8628, -0.7271, -0.3410, 0.5879]) tensor([0.6826, 0.0512, 0.0754, 0.1908]) -Greedy action tensor([ 2.5666, -1.2934, -0.3387, 0.7902]) tensor([0.8032, 0.0169, 0.0440, 0.1359]) -Greedy action tensor([ 1.7090, -0.6692, -0.8230, 0.1968]) tensor([0.7181, 0.0666, 0.0571, 0.1583]) -Greedy action tensor([ 1.6926, -0.0202, -0.4413, 0.6665]) tensor([0.6035, 0.1088, 0.0714, 0.2163]) -Greedy action tensor([ 1.7113, -0.5554, -0.6499, 0.1819]) tensor([0.7069, 0.0733, 0.0667, 0.1532]) -Greedy action tensor([ 1.5198, -0.1704, -0.9373, 0.4974]) tensor([0.6135, 0.1132, 0.0526, 0.2207]) -Greedy action tensor([ 0.8210, -0.2150, -0.0878, 0.1423]) tensor([0.4415, 0.1567, 0.1779, 0.2240]) -Greedy action tensor([ 1.1133, -0.3693, -0.2433, 0.3825]) tensor([0.5086, 0.1155, 0.1310, 0.2449]) -Greedy action tensor([ 1.2137, -0.4502, -0.6294, 0.1369]) tensor([0.5923, 0.1122, 0.0938, 0.2018]) -Greedy action tensor([ 1.9244, -0.4064, -0.4182, 0.3357]) tensor([0.7156, 0.0696, 0.0687, 0.1461]) -Greedy action tensor([ 1.6077, -0.5028, -0.3088, 0.2797]) tensor([0.6522, 0.0790, 0.0959, 0.1728]) -Greedy action tensor([ 1.3179, -0.0160, -0.0793, 0.0629]) tensor([0.5569, 0.1467, 0.1377, 0.1588]) -Greedy action tensor([ 1.7053, -1.2712, -0.1337, 0.6329]) tensor([0.6443, 0.0328, 0.1024, 0.2205]) -Greedy action tensor([ 2.8428, -1.5500, -0.2652, 0.7665]) tensor([0.8457, 0.0105, 0.0378, 0.1060]) -Greedy action tensor([ 1.5062, -0.6351, -0.4147, 0.2016]) tensor([0.6513, 0.0765, 0.0954, 0.1767]) -Greedy action tensor([ 1.0127, 0.1389, -0.0929, 0.4663]) tensor([0.4297, 0.1793, 0.1422, 0.2488]) -Greedy action tensor([ 2.1322, -1.0263, -0.1693, 0.2865]) tensor([0.7689, 0.0327, 0.0770, 0.1214]) -Greedy action tensor([ 1.5540, -0.6593, -0.3790, 0.3226]) tensor([0.6469, 0.0707, 0.0936, 0.1888]) -Greedy action tensor([ 1.5167, -0.1794, -0.2142, 0.4285]) tensor([0.5891, 0.1081, 0.1044, 0.1984]) -Greedy action tensor([ 2.3104, -0.8862, -0.1292, 0.7984]) tensor([0.7415, 0.0303, 0.0647, 0.1635]) -Greedy action tensor([ 9.3236e-01, 4.0562e-02, 1.2012e-01, -6.1333e-05]) tensor([0.4450, 0.1824, 0.1975, 0.1751]) -Greedy action tensor([ 2.3974, -1.3216, -0.1677, 0.7031]) tensor([0.7783, 0.0189, 0.0599, 0.1430]) -Greedy action tensor([ 1.2726, -0.2590, -0.3347, 0.0148]) tensor([0.5879, 0.1271, 0.1178, 0.1671]) -Greedy action tensor([ 1.6167, -1.0277, -0.4289, 0.5694]) tensor([0.6446, 0.0458, 0.0834, 0.2262]) -Greedy action tensor([ 2.7405, -1.4120, -0.1824, 0.9164]) tensor([0.8124, 0.0128, 0.0437, 0.1311]) -Greedy action tensor([ 1.8868, -0.9830, -0.2888, 0.2223]) tensor([0.7355, 0.0417, 0.0835, 0.1392]) -Greedy action tensor([ 0.8971, -0.4739, -0.1318, -0.4000]) tensor([0.5306, 0.1347, 0.1896, 0.1450]) -Greedy action tensor([ 0.5142, -0.2273, -0.0939, -0.1571]) tensor([0.3950, 0.1882, 0.2150, 0.2019]) -Greedy action tensor([ 0.5868, -0.1995, -0.0991, -0.2520]) tensor([0.4182, 0.1905, 0.2106, 0.1807]) -Greedy action tensor([ 0.7879, -0.4484, -0.0253, -0.2737]) tensor([0.4808, 0.1397, 0.2132, 0.1663]) -Greedy action tensor([ 0.9431, -0.7241, -0.0392, -0.3265]) tensor([0.5423, 0.1024, 0.2030, 0.1523]) -Greedy action tensor([ 0.8569, -0.4253, -0.1430, -0.3716]) tensor([0.5160, 0.1432, 0.1898, 0.1510]) -Greedy action tensor([ 0.4689, -0.1706, 0.0200, -0.2848]) tensor([0.3793, 0.2001, 0.2421, 0.1785]) -Greedy action tensor([ 0.3683, -0.2901, -0.1763, -0.0688]) tensor([0.3645, 0.1887, 0.2114, 0.2354]) -Greedy action tensor([ 0.8965, -0.6130, -0.0718, -0.3461]) tensor([0.5293, 0.1170, 0.2010, 0.1528]) -Greedy action tensor([ 0.8678, -0.6005, 0.0840, -0.4521]) tensor([0.5117, 0.1179, 0.2337, 0.1367]) -Greedy action tensor([ 0.5644, -0.0359, 0.1870, -0.2069]) tensor([0.3708, 0.2034, 0.2543, 0.1715]) -Greedy action tensor([ 0.8441, -0.3032, 0.0046, -0.5378]) tensor([0.4999, 0.1587, 0.2159, 0.1255]) -Greedy action tensor([ 0.8095, -0.7505, 0.1251, -0.3858]) tensor([0.4958, 0.1042, 0.2501, 0.1500]) -Greedy action tensor([ 0.6151, -0.2049, -0.0155, -0.2789]) tensor([0.4199, 0.1849, 0.2235, 0.1717]) -Greedy action tensor([ 0.9125, -0.5037, 0.0862, -0.2350]) tensor([0.5006, 0.1215, 0.2191, 0.1589]) -Greedy action tensor([ 0.6178, 0.1975, -0.1979, -0.2267]) tensor([0.3954, 0.2597, 0.1749, 0.1699]) -Greedy action tensor([ 0.9757, -0.7750, 0.0562, -0.2549]) tensor([0.5363, 0.0931, 0.2139, 0.1567]) -Greedy action tensor([ 0.9530, -0.5538, -0.0011, -0.6557]) tensor([0.5534, 0.1226, 0.2132, 0.1108]) -Greedy action tensor([ 0.9347, -0.6439, -0.0121, -0.7345]) tensor([0.5610, 0.1157, 0.2176, 0.1057]) -Greedy action tensor([ 0.7159, -0.6031, -0.0055, -0.1970]) tensor([0.4641, 0.1241, 0.2256, 0.1863]) -Greedy action tensor([ 0.6114, -0.2808, -0.0269, -0.1216]) tensor([0.4135, 0.1694, 0.2184, 0.1987]) -Greedy action tensor([ 1.1844, -0.6283, 0.0076, -0.5274]) tensor([0.6053, 0.0988, 0.1866, 0.1093]) -Greedy action tensor([ 0.9571, -0.9003, 0.0629, -0.5458]) tensor([0.5594, 0.0873, 0.2288, 0.1245]) -Greedy action tensor([ 0.5766, -0.1947, 0.1033, -0.1021]) tensor([0.3857, 0.1784, 0.2403, 0.1957]) -Greedy action tensor([ 1.0820, -0.4987, -0.0807, -0.2152]) tensor([0.5581, 0.1149, 0.1745, 0.1525]) -Greedy action tensor([ 0.7123, -0.6337, 0.0357, -0.1816]) tensor([0.4592, 0.1195, 0.2334, 0.1878]) -Greedy action tensor([ 0.3297, -0.0975, -0.1264, -0.1326]) tensor([0.3430, 0.2237, 0.2173, 0.2160]) -Greedy action tensor([ 0.8087, -0.5464, -0.1565, -0.4669]) tensor([0.5214, 0.1345, 0.1986, 0.1456]) -Greedy action tensor([ 0.8907, -0.9885, 0.0234, -0.3110]) tensor([0.5338, 0.0815, 0.2242, 0.1605]) -Greedy action tensor([ 0.5015, -0.0056, -0.0137, -0.0281]) tensor([0.3586, 0.2160, 0.2142, 0.2112]) -Greedy action tensor([ 0.8530, -0.9981, -0.1275, -0.3799]) tensor([0.5483, 0.0861, 0.2057, 0.1598]) -Greedy action tensor([0.1915, 0.1729, 0.0301, 0.0418]) tensor([0.2707, 0.2658, 0.2304, 0.2331]) -Greedy action tensor([ 0.6516, -0.3364, -0.0668, 0.0025]) tensor([0.4198, 0.1563, 0.2046, 0.2193]) -Greedy action tensor([ 0.5575, -0.1197, 0.0631, -0.4471]) tensor([0.4026, 0.2045, 0.2455, 0.1474]) -Greedy action tensor([ 0.4121, -0.1382, 0.1999, -0.3268]) tensor([0.3492, 0.2014, 0.2825, 0.1668]) -Greedy action tensor([ 0.5872, -0.4686, -0.0896, -0.1935]) tensor([0.4321, 0.1503, 0.2196, 0.1980]) -Greedy action tensor([ 0.6445, -0.5479, 0.2360, -0.5636]) tensor([0.4411, 0.1339, 0.2932, 0.1318]) -Greedy action tensor([ 1.0830, -0.6100, -0.2196, -0.3723]) tensor([0.5920, 0.1089, 0.1609, 0.1381]) -Greedy action tensor([ 0.7571, -0.5000, -0.0467, -0.4745]) tensor([0.4941, 0.1406, 0.2212, 0.1442]) -Greedy action tensor([ 0.3571, 0.0573, -0.0859, -0.1287]) tensor([0.3335, 0.2471, 0.2142, 0.2052]) -Greedy action tensor([ 0.4273, -0.3885, -0.1108, -0.2063]) tensor([0.3911, 0.1730, 0.2284, 0.2076]) -Greedy action tensor([ 0.6129, -0.3700, 0.0389, -0.1680]) tensor([0.4175, 0.1562, 0.2351, 0.1912]) -Greedy action tensor([ 0.5321, -0.3364, -0.0049, -0.3327]) tensor([0.4123, 0.1730, 0.2410, 0.1736]) -Greedy action tensor([ 1.3378, -0.8100, 0.0099, -0.4318]) tensor([0.6443, 0.0752, 0.1708, 0.1098]) -Greedy action tensor([ 1.0846, -0.5171, -0.1535, -0.5106]) tensor([0.5902, 0.1189, 0.1711, 0.1197]) -Greedy action tensor([ 0.3675, -0.3277, -0.2278, -0.2285]) tensor([0.3844, 0.1918, 0.2120, 0.2118]) -Greedy action tensor([ 0.5111, 0.0445, -0.0117, -0.0650]) tensor([0.3595, 0.2254, 0.2131, 0.2020]) -Greedy action tensor([ 1.2975, -0.9074, -0.0483, -0.6140]) tensor([0.6586, 0.0726, 0.1714, 0.0974]) -Greedy action tensor([ 0.5478, -0.3681, -0.1237, -0.0484]) tensor([0.4062, 0.1625, 0.2075, 0.2238]) -Greedy action tensor([ 0.4815, -0.1829, -0.0401, -0.1087]) tensor([0.3756, 0.1933, 0.2230, 0.2082]) -Greedy action tensor([ 0.7622, -0.3715, 0.0037, -0.2319]) tensor([0.4629, 0.1490, 0.2168, 0.1713]) -Greedy action tensor([ 1.0647, -0.5904, -0.3270, -0.1909]) tensor([0.5798, 0.1108, 0.1442, 0.1652]) -Greedy action tensor([ 0.8421, -0.3557, 0.0941, -0.2373]) tensor([0.4728, 0.1427, 0.2238, 0.1607]) -Greedy action tensor([ 0.9599, -0.6576, -0.0719, -0.5081]) tensor([0.5602, 0.1111, 0.1996, 0.1291]) -Greedy action tensor([ 0.5376, -0.4274, -0.2993, -0.4470]) tensor([0.4571, 0.1742, 0.1980, 0.1708]) -Greedy action tensor([ 0.7822, -0.4897, 0.0285, -0.4214]) tensor([0.4875, 0.1367, 0.2295, 0.1463]) -Greedy action tensor([ 0.6214, -0.4898, -0.1800, -0.2148]) tensor([0.4523, 0.1489, 0.2029, 0.1960]) -Greedy action tensor([ 1.0428, -0.5455, 0.0259, -0.4540]) tensor([0.5587, 0.1141, 0.2021, 0.1251]) -Greedy action tensor([ 0.8904, -0.3833, 0.1456, -0.3777]) tensor([0.4912, 0.1374, 0.2332, 0.1382]) -Greedy action tensor([ 0.8274, -0.5010, -0.0702, -0.4432]) tensor([0.5120, 0.1356, 0.2087, 0.1437]) -Greedy action tensor([ 1.1409, -0.5499, 0.0551, -0.8676]) tensor([0.6038, 0.1113, 0.2039, 0.0810]) -Greedy action tensor([ 0.8948, -0.5279, -0.0920, -0.2512]) tensor([0.5177, 0.1248, 0.1930, 0.1646]) -Greedy action tensor([ 1.0202, -0.6462, -0.0074, -0.2235]) tensor([0.5449, 0.1030, 0.1950, 0.1571]) -Greedy action tensor([ 0.1936, -0.0423, -0.0774, -0.3843]) tensor([0.3212, 0.2537, 0.2449, 0.1802]) -Greedy action tensor([ 1.0869, -0.7335, -0.0118, -0.7369]) tensor([0.6036, 0.0978, 0.2012, 0.0974]) -Greedy action tensor([ 0.4397, -0.2934, -0.0307, -0.0616]) tensor([0.3689, 0.1772, 0.2305, 0.2234]) -Greedy action tensor([ 0.9189, -0.5015, -0.0111, -0.3684]) tensor([0.5230, 0.1264, 0.2063, 0.1443]) -Greedy action tensor([ 0.3580, -0.0222, -0.0887, -0.2274]) tensor([0.3472, 0.2374, 0.2221, 0.1933]) -Greedy action tensor([ 1.1664, -0.7522, 0.0113, -0.7433]) tensor([0.6211, 0.0912, 0.1957, 0.0920]) -Greedy action tensor([ 0.3418, -0.1097, -0.1752, -0.2890]) tensor([0.3617, 0.2302, 0.2157, 0.1924]) -Greedy action tensor([ 0.8806, -0.4521, -0.0369, -0.2698]) tensor([0.5051, 0.1332, 0.2018, 0.1599]) -Greedy action tensor([ 0.4694, -0.4309, 0.0188, -0.1922]) tensor([0.3907, 0.1588, 0.2490, 0.2016]) -Greedy action tensor([ 0.9846, -0.4930, -0.1107, -0.5928]) tensor([0.5652, 0.1290, 0.1890, 0.1167]) -Greedy action tensor([ 0.8913, -0.5904, 0.0472, -0.3345]) tensor([0.5126, 0.1165, 0.2204, 0.1505]) -Greedy action tensor([ 0.6432, 0.0251, -0.0560, -0.3795]) tensor([0.4174, 0.2250, 0.2075, 0.1501]) -Greedy action tensor([ 0.2636, -0.3061, -0.2167, -0.0712]) tensor([0.3449, 0.1951, 0.2133, 0.2467]) -Greedy action tensor([ 0.6701, -0.2903, 0.0229, -0.0775]) tensor([0.4202, 0.1608, 0.2200, 0.1990]) -Greedy action tensor([ 0.9307, -0.6853, -0.0318, -0.5686]) tensor([0.5543, 0.1101, 0.2117, 0.1238]) -Greedy action tensor([ 0.7791, -0.5682, -0.1289, -0.2904]) tensor([0.4984, 0.1296, 0.2010, 0.1710]) -Greedy action tensor([ 0.5781, -0.3845, -0.0444, -0.2211]) tensor([0.4223, 0.1613, 0.2266, 0.1899]) -Greedy action tensor([ 0.5188, -0.1514, -0.0970, -0.0555]) tensor([0.3824, 0.1956, 0.2066, 0.2153]) -Greedy action tensor([-1.8069, -0.4140, 0.6061, -0.0954]) tensor([0.0460, 0.1853, 0.5139, 0.2548]) -Greedy action tensor([-1.8493, -0.4367, 0.6177, -0.1348]) tensor([0.0445, 0.1829, 0.5251, 0.2474]) -Greedy action tensor([-1.8901, -0.4531, 0.6421, -0.1566]) tensor([0.0426, 0.1795, 0.5365, 0.2414]) -Greedy action tensor([-1.9450, -0.4475, 0.6670, -0.1802]) tensor([0.0401, 0.1793, 0.5464, 0.2342]) -Greedy action tensor([-1.8335, -0.2850, 0.5830, -0.1131]) tensor([0.0444, 0.2091, 0.4981, 0.2483]) -Greedy action tensor([-1.8363, -0.3347, 0.6132, -0.1057]) tensor([0.0440, 0.1976, 0.5099, 0.2485]) -Greedy action tensor([-1.9413, -0.4514, 0.6654, -0.1784]) tensor([0.0403, 0.1788, 0.5461, 0.2349]) -Greedy action tensor([-1.8235, -0.3860, 0.6533, -0.1018]) tensor([0.0440, 0.1854, 0.5242, 0.2464]) -Greedy action tensor([-1.9192, -0.3587, 0.6399, -0.1650]) tensor([0.0409, 0.1946, 0.5283, 0.2362]) -Greedy action tensor([-1.8143, -0.4213, 0.6057, -0.1094]) tensor([0.0459, 0.1849, 0.5165, 0.2526]) -Greedy action tensor([-1.7497, -0.4321, 0.5388, -0.0559]) tensor([0.0499, 0.1864, 0.4921, 0.2715]) -Greedy action tensor([-1.6284, -0.4430, 0.5064, -0.0778]) tensor([0.0573, 0.1876, 0.4848, 0.2703]) -Greedy action tensor([-1.7585, -0.4587, 0.5840, -0.1323]) tensor([0.0496, 0.1820, 0.5162, 0.2522]) -Greedy action tensor([-1.8021, -0.4693, 0.5915, -0.1131]) tensor([0.0473, 0.1792, 0.5177, 0.2559]) -Greedy action tensor([-1.8656, -0.4515, 0.6297, -0.1456]) tensor([0.0438, 0.1802, 0.5313, 0.2447]) -Greedy action tensor([-1.6805, -0.4762, 0.5087, -0.0125]) tensor([0.0539, 0.1796, 0.4809, 0.2856]) -Greedy action tensor([-1.5679, -0.5193, 0.5011, -0.0403]) tensor([0.0611, 0.1742, 0.4834, 0.2813]) -Greedy action tensor([-1.7857, -0.1654, 0.5567, -0.0618]) tensor([0.0453, 0.2291, 0.4716, 0.2540]) -Greedy action tensor([-1.9042, -0.4716, 0.6513, -0.1626]) tensor([0.0421, 0.1762, 0.5417, 0.2400]) -Greedy action tensor([-1.9249, -0.4516, 0.6608, -0.1693]) tensor([0.0409, 0.1787, 0.5435, 0.2369]) -Greedy action tensor([-1.9193, -0.4534, 0.6580, -0.1694]) tensor([0.0412, 0.1786, 0.5428, 0.2373]) -Greedy action tensor([-1.7624, -0.4598, 0.5775, -0.0865]) tensor([0.0490, 0.1803, 0.5088, 0.2619]) -Greedy action tensor([-1.5310, -0.4996, 0.4422, 0.0394]) tensor([0.0633, 0.1774, 0.4551, 0.3042]) -Greedy action tensor([-1.8926, -0.3470, 0.6416, -0.1496]) tensor([0.0416, 0.1953, 0.5250, 0.2380]) -Greedy action tensor([-1.8702, -0.3724, 0.6195, -0.1473]) tensor([0.0432, 0.1933, 0.5213, 0.2421]) -Greedy action tensor([-1.9065, -0.4551, 0.6546, -0.1592]) tensor([0.0417, 0.1782, 0.5405, 0.2395]) -Greedy action tensor([-1.7480, -0.2766, 0.6626, 0.0217]) tensor([0.0447, 0.1947, 0.4981, 0.2624]) -Greedy action tensor([-1.8962, -0.2381, 0.6080, -0.1449]) tensor([0.0412, 0.2165, 0.5046, 0.2377]) -Greedy action tensor([-1.9155, -0.3927, 0.6488, -0.1638]) tensor([0.0411, 0.1884, 0.5337, 0.2368]) -Greedy action tensor([-0.3103, 0.4090, 0.7405, 1.4100]) tensor([0.0870, 0.1785, 0.2487, 0.4858]) -Greedy action tensor([-1.8642, -0.4645, 0.6362, -0.1024]) tensor([0.0434, 0.1758, 0.5284, 0.2525]) -Greedy action tensor([-0.9406, 0.8723, 0.1926, 0.0063]) tensor([0.0781, 0.4783, 0.2424, 0.2012]) -Greedy action tensor([-1.9009, -0.4233, 0.6440, -0.1571]) tensor([0.0419, 0.1838, 0.5344, 0.2399]) -Greedy action tensor([-1.7748, -0.4682, 0.6542, -0.0232]) tensor([0.0459, 0.1694, 0.5204, 0.2643]) -Greedy action tensor([-1.8134, -0.4330, 0.5975, -0.1394]) tensor([0.0466, 0.1853, 0.5194, 0.2486]) -Greedy action tensor([-1.1119, 0.7442, 0.1546, 0.3058]) tensor([0.0663, 0.4245, 0.2354, 0.2738]) -Greedy action tensor([-1.8054, -0.3230, 0.5762, -0.0758]) tensor([0.0457, 0.2014, 0.4950, 0.2579]) -Greedy action tensor([-0.8217, 0.1342, 0.1979, 0.0719]) tensor([0.1134, 0.2950, 0.3144, 0.2772]) -Greedy action tensor([-1.5990, 0.2497, 0.3767, 0.0722]) tensor([0.0503, 0.3195, 0.3627, 0.2675]) -Greedy action tensor([-1.9035, -0.3129, 0.6241, -0.1650]) tensor([0.0415, 0.2034, 0.5192, 0.2359]) -Greedy action tensor([-1.7368, -0.4701, 0.5680, -0.0773]) tensor([0.0504, 0.1790, 0.5055, 0.2651]) -Greedy action tensor([-1.9009, -0.3812, 0.6398, -0.1492]) tensor([0.0416, 0.1903, 0.5282, 0.2399]) -Greedy action tensor([-1.7752, -0.0973, 0.5367, -0.1566]) tensor([0.0465, 0.2491, 0.4696, 0.2348]) -Greedy action tensor([-1.9216, -0.4381, 0.6563, -0.1675]) tensor([0.0411, 0.1810, 0.5407, 0.2372]) -Greedy action tensor([-1.4664, -0.5614, 0.4309, -0.0097]) tensor([0.0693, 0.1713, 0.4621, 0.2974]) -Greedy action tensor([-0.8320, 0.0137, 0.4614, 1.2372]) tensor([0.0671, 0.1564, 0.2447, 0.5317]) -Greedy action tensor([0.3952, 0.9032, 0.4079, 1.0984]) tensor([0.1756, 0.2918, 0.1778, 0.3547]) -Greedy action tensor([-1.9314, -0.4378, 0.6552, -0.1724]) tensor([0.0407, 0.1814, 0.5412, 0.2366]) -Greedy action tensor([-1.9272, -0.4277, 0.6594, -0.1703]) tensor([0.0407, 0.1824, 0.5409, 0.2359]) -Greedy action tensor([-1.8984, -0.4465, 0.6523, -0.1488]) tensor([0.0419, 0.1792, 0.5376, 0.2413]) -Greedy action tensor([-1.9194, -0.4177, 0.6527, -0.1662]) tensor([0.0411, 0.1843, 0.5376, 0.2370]) -Greedy action tensor([-1.7899, -0.3317, 0.6059, -0.0764]) tensor([0.0458, 0.1970, 0.5030, 0.2542]) -Greedy action tensor([-1.9198, -0.3208, 0.6334, -0.1563]) tensor([0.0406, 0.2009, 0.5217, 0.2368]) -Greedy action tensor([-1.8100, -0.4368, 0.5965, -0.1084]) tensor([0.0465, 0.1834, 0.5154, 0.2547]) -Greedy action tensor([-0.2891, 0.0264, 0.7864, 1.3303]) tensor([0.0966, 0.1324, 0.2832, 0.4878]) -Greedy action tensor([-1.8491, -0.4352, 0.6177, -0.1294]) tensor([0.0445, 0.1829, 0.5242, 0.2484]) -Greedy action tensor([-1.6873, -0.4894, 0.7532, 0.1551]) tensor([0.0452, 0.1499, 0.5193, 0.2856]) -Greedy action tensor([-1.8908, -0.4443, 0.6397, -0.1537]) tensor([0.0426, 0.1809, 0.5347, 0.2419]) -Greedy action tensor([-1.6770, -0.3050, 0.5635, 0.0739]) tensor([0.0497, 0.1962, 0.4675, 0.2865]) -Greedy action tensor([-1.7916, -0.4059, 0.5975, -0.0870]) tensor([0.0467, 0.1868, 0.5095, 0.2570]) -Greedy action tensor([-1.2848, -0.2581, 0.7194, 0.4371]) tensor([0.0595, 0.1661, 0.4415, 0.3329]) -Greedy action tensor([-1.8730, -0.4318, 0.6302, -0.1379]) tensor([0.0433, 0.1828, 0.5287, 0.2453]) -Greedy action tensor([-1.1624, 0.4620, 0.1847, -0.6841]) tensor([0.0867, 0.4400, 0.3334, 0.1399]) -Greedy action tensor([-1.0265, 0.1479, 0.1215, 0.3592]) tensor([0.0878, 0.2842, 0.2768, 0.3511]) -Greedy action tensor([-0.9954, -0.0799, -0.0977, -0.1269]) tensor([0.1200, 0.2997, 0.2944, 0.2859]) -Greedy action tensor([-1.2826, -0.4985, 0.3412, -0.0134]) tensor([0.0846, 0.1853, 0.4291, 0.3010]) -Greedy action tensor([-1.6349, -0.4213, 0.5043, -0.0295]) tensor([0.0561, 0.1887, 0.4761, 0.2792]) -Greedy action tensor([ 0.4282, -0.1117, 0.1155, 0.1835]) tensor([0.3229, 0.1882, 0.2362, 0.2528]) -Greedy action tensor([-1.4897, 0.0749, 0.3164, 0.0061]) tensor([0.0612, 0.2928, 0.3727, 0.2733]) -Greedy action tensor([-1.9195, -0.4507, 0.6490, -0.1652]) tensor([0.0414, 0.1797, 0.5398, 0.2391]) -Greedy action tensor([-1.8010, -0.3743, 0.6191, -0.0672]) tensor([0.0453, 0.1887, 0.5095, 0.2565]) -Greedy action tensor([-0.7834, 0.4543, 0.2551, 0.3028]) tensor([0.0977, 0.3368, 0.2760, 0.2895]) -Greedy action tensor([-1.6893, -0.5304, 0.5462, -0.0473]) tensor([0.0535, 0.1704, 0.5000, 0.2762]) -Greedy action tensor([-1.6777, -0.4615, 0.5373, -0.0488]) tensor([0.0537, 0.1811, 0.4916, 0.2736]) -Greedy action tensor([-1.9116, -0.4566, 0.6625, -0.1533]) tensor([0.0413, 0.1770, 0.5420, 0.2397]) -Greedy action tensor([-1.5538, -0.4133, 0.6315, 0.0571]) tensor([0.0555, 0.1735, 0.4933, 0.2777]) -Greedy action tensor([-1.9110, -0.4540, 0.6554, -0.1646]) tensor([0.0416, 0.1785, 0.5414, 0.2385]) -Greedy action tensor([-1.2491, -0.5125, 0.3059, 0.1881]) tensor([0.0831, 0.1736, 0.3935, 0.3498]) -Greedy action tensor([-1.9105, -0.3955, 0.6478, -0.1602]) tensor([0.0413, 0.1878, 0.5332, 0.2377]) -Greedy action tensor([-1.2540, 0.7247, 0.1778, 0.2439]) tensor([0.0592, 0.4282, 0.2478, 0.2648]) -Greedy action tensor([-1.8380, -0.3219, 0.6229, -0.1106]) tensor([0.0437, 0.1989, 0.5117, 0.2457]) -Greedy action tensor([-0.1405, -0.2519, -0.1251, -0.1357]) tensor([0.2554, 0.2285, 0.2594, 0.2567]) -Greedy action tensor([ 1.2743, -0.8748, 0.2303, 0.8666]) tensor([0.4686, 0.0546, 0.1650, 0.3117]) -Greedy action tensor([-0.2797, 0.4870, -0.9511, -0.2319]) tensor([0.2122, 0.4568, 0.1084, 0.2226]) -Greedy action tensor([ 0.0259, -0.7138, -0.3184, -0.2524]) tensor([0.3398, 0.1622, 0.2408, 0.2572]) -Greedy action tensor([ 0.5079, -1.0838, 0.1386, 1.0540]) tensor([0.2761, 0.0562, 0.1909, 0.4768]) -Greedy action tensor([ 0.5982, -0.0861, -0.7511, -0.7393]) tensor([0.4935, 0.2490, 0.1280, 0.1295]) -Greedy action tensor([ 1.6782, -1.8298, 0.2111, -0.5435]) tensor([0.7305, 0.0219, 0.1684, 0.0792]) -Greedy action tensor([ 0.1495, -0.1442, 0.9673, 0.5974]) tensor([0.1793, 0.1337, 0.4063, 0.2807]) -Greedy action tensor([ 0.4938, 0.0673, -0.1035, 0.0781]) tensor([0.3493, 0.2280, 0.1922, 0.2305]) -Greedy action tensor([-0.5616, 0.0134, 0.2082, -0.2473]) tensor([0.1586, 0.2818, 0.3425, 0.2171]) -Greedy action tensor([-0.2860, -0.6677, 0.3861, 0.1480]) tensor([0.1929, 0.1317, 0.3777, 0.2977]) -Greedy action tensor([ 0.1342, -0.6978, -0.1825, 0.8218]) tensor([0.2408, 0.1048, 0.1754, 0.4789]) -Greedy action tensor([-0.7244, 1.1175, -0.2956, -0.9804]) tensor([0.1040, 0.6559, 0.1596, 0.0805]) -Greedy action tensor([-0.3341, -0.8505, 0.1655, -0.9188]) tensor([0.2630, 0.1569, 0.4335, 0.1466]) -Greedy action tensor([-0.0859, -1.1320, 1.7009, -0.6690]) tensor([0.1269, 0.0446, 0.7577, 0.0708]) -Greedy action tensor([ 1.2272, -0.9016, 1.3283, 0.6206]) tensor([0.3609, 0.0429, 0.3993, 0.1968]) -Greedy action tensor([ 0.4393, -0.1321, 0.6412, -1.2870]) tensor([0.3371, 0.1904, 0.4125, 0.0600]) -Greedy action tensor([ 1.4059, -0.9270, 0.3926, -0.1094]) tensor([0.5953, 0.0578, 0.2161, 0.1308]) -Greedy action tensor([ 0.9784, -0.2540, 0.0694, 0.4268]) tensor([0.4404, 0.1284, 0.1775, 0.2537]) -Greedy action tensor([-0.7645, -0.7476, 0.0445, -0.8401]) tensor([0.1927, 0.1960, 0.4327, 0.1787]) -Greedy action tensor([-1.0263, -1.3163, -0.3315, 0.2903]) tensor([0.1337, 0.1000, 0.2677, 0.4986]) -Greedy action tensor([ 0.4664, 0.3668, 0.8727, -0.6856]) tensor([0.2686, 0.2432, 0.4033, 0.0849]) -Greedy action tensor([ 0.2017, -0.3967, 1.4009, -0.4180]) tensor([0.1850, 0.1017, 0.6137, 0.0996]) -Greedy action tensor([ 0.1244, 0.9751, -0.9592, 0.9096]) tensor([0.1703, 0.3987, 0.0576, 0.3734]) -Greedy action tensor([ 0.4581, 0.3142, -0.5365, 0.1250]) tensor([0.3387, 0.2933, 0.1253, 0.2427]) -Greedy action tensor([-0.3312, -1.2273, 0.1722, -0.1470]) tensor([0.2345, 0.0957, 0.3879, 0.2819]) -Greedy action tensor([ 0.6267, -0.5338, -0.2400, 0.4255]) tensor([0.3919, 0.1228, 0.1647, 0.3205]) -Greedy action tensor([ 1.8402, 0.2226, -0.1034, -0.4242]) tensor([0.6918, 0.1372, 0.0991, 0.0719]) -Greedy action tensor([ 0.9639, -0.5414, 0.0034, -0.1298]) tensor([0.5156, 0.1144, 0.1973, 0.1727]) -Greedy action tensor([-0.6793, -1.6838, -0.6433, 0.5122]) tensor([0.1756, 0.0643, 0.1820, 0.5781]) -Greedy action tensor([-1.5098, -0.9326, -0.4346, 0.3385]) tensor([0.0829, 0.1477, 0.2430, 0.5264]) -Greedy action tensor([-0.2239, -1.4954, 0.5118, -0.6242]) tensor([0.2477, 0.0694, 0.5169, 0.1660]) -Greedy action tensor([ 1.2272, 0.5091, 0.4335, -0.0032]) tensor([0.4480, 0.2185, 0.2026, 0.1309]) -Greedy action tensor([-1.4253, -0.3944, 0.4741, -0.1580]) tensor([0.0712, 0.1997, 0.4760, 0.2530]) -Greedy action tensor([ 1.4196, -0.3754, 0.0467, 0.4006]) tensor([0.5617, 0.0933, 0.1423, 0.2027]) -Greedy action tensor([-0.0404, 0.0727, -0.0492, -0.2256]) tensor([0.2537, 0.2841, 0.2515, 0.2108]) -Greedy action tensor([ 0.0551, -0.7472, 0.8968, 1.0892]) tensor([0.1519, 0.0681, 0.3526, 0.4274]) -Greedy action tensor([ 0.7093, -0.2988, -0.1816, 0.5728]) tensor([0.3777, 0.1378, 0.1550, 0.3295]) -Greedy action tensor([-0.5313, -0.0383, 0.8915, -0.4436]) tensor([0.1269, 0.2078, 0.5267, 0.1386]) -Greedy action tensor([-1.0871, 0.5558, 0.1451, -0.5588]) tensor([0.0885, 0.4577, 0.3036, 0.1502]) -Greedy action tensor([-0.6440, -0.5674, -1.2652, 0.4459]) tensor([0.1789, 0.1931, 0.0961, 0.5319]) -Greedy action tensor([ 1.6376, 0.0544, 0.0376, -0.1311]) tensor([0.6338, 0.1301, 0.1280, 0.1081]) -Greedy action tensor([-0.6962, 0.1095, -0.5894, -0.0938]) tensor([0.1619, 0.3623, 0.1801, 0.2957]) -Greedy action tensor([-0.1083, -1.3945, 0.5371, 0.9888]) tensor([0.1619, 0.0447, 0.3086, 0.4848]) -Greedy action tensor([-0.2433, -0.9626, -0.5773, -0.4724]) tensor([0.3335, 0.1625, 0.2388, 0.2652]) -Greedy action tensor([ 0.4655, -1.1101, -0.3009, -0.2970]) tensor([0.4677, 0.0968, 0.2173, 0.2182]) -Greedy action tensor([ 1.4560, -0.7697, 0.2678, 1.4959]) tensor([0.4076, 0.0440, 0.1242, 0.4242]) -Greedy action tensor([-0.7601, -0.9845, -0.7945, -0.0279]) tensor([0.2064, 0.1649, 0.1994, 0.4292]) -Greedy action tensor([ 0.2608, -0.3930, 0.2365, -0.3917]) tensor([0.3315, 0.1724, 0.3235, 0.1726]) -Greedy action tensor([0.3283, 0.0334, 0.8668, 0.1494]) tensor([0.2329, 0.1734, 0.3990, 0.1947]) -Greedy action tensor([ 0.2635, -0.9656, -0.8142, 0.1727]) tensor([0.3927, 0.1149, 0.1337, 0.3587]) -Greedy action tensor([-1.3473, -0.0967, 0.7953, -0.9871]) tensor([0.0692, 0.2417, 0.5898, 0.0992]) -Greedy action tensor([ 0.1891, -0.6665, 0.5463, -0.0837]) tensor([0.2766, 0.1175, 0.3953, 0.2106]) -Greedy action tensor([ 0.3184, -0.0179, 1.4253, 0.7144]) tensor([0.1606, 0.1148, 0.4859, 0.2387]) -Greedy action tensor([ 0.1713, -0.9449, -0.6259, 1.2728]) tensor([0.2089, 0.0684, 0.0941, 0.6285]) -Greedy action tensor([ 0.6756, -0.3028, 0.0760, -0.4750]) tensor([0.4462, 0.1677, 0.2450, 0.1412]) -Greedy action tensor([ 0.8158, -1.2179, -0.6741, 0.4839]) tensor([0.4822, 0.0631, 0.1087, 0.3460]) -Greedy action tensor([-1.1658, -1.2599, -0.6076, -0.0078]) tensor([0.1462, 0.1330, 0.2554, 0.4653]) -Greedy action tensor([-0.8002, 0.0281, 0.7943, -0.6694]) tensor([0.1069, 0.2447, 0.5265, 0.1218]) -Greedy action tensor([-0.1623, -0.4393, 0.4242, -0.4293]) tensor([0.2314, 0.1754, 0.4160, 0.1772]) -Greedy action tensor([ 0.5305, -0.8606, 0.7657, -1.1346]) tensor([0.3699, 0.0920, 0.4680, 0.0700]) -Greedy action tensor([ 0.0981, 0.0892, 1.0868, -0.9515]) tensor([0.1988, 0.1971, 0.5345, 0.0696]) -Greedy action tensor([ 0.1101, 0.2316, -0.5669, -0.6327]) tensor([0.3212, 0.3627, 0.1632, 0.1528]) -Greedy action tensor([-0.3902, -1.3496, -0.2451, 0.4384]) tensor([0.2071, 0.0793, 0.2394, 0.4742]) -Greedy action tensor([-0.7068, -0.2329, 0.6317, -0.4591]) tensor([0.1299, 0.2086, 0.4952, 0.1664]) -Greedy action tensor([-0.3502, -1.5458, -0.6292, 0.4337]) tensor([0.2354, 0.0712, 0.1781, 0.5154]) -Greedy action tensor([-0.1238, -0.7908, -0.2184, 0.1605]) tensor([0.2665, 0.1368, 0.2425, 0.3542]) -Greedy action tensor([ 1.9395, -0.8334, 0.7908, 1.2324]) tensor([0.5340, 0.0334, 0.1693, 0.2633]) -Greedy action tensor([ 0.7443, 0.4881, 0.4382, -0.4448]) tensor([0.3553, 0.2750, 0.2616, 0.1082]) -Greedy action tensor([ 0.8178, -0.6002, 0.2649, 0.3743]) tensor([0.4066, 0.0985, 0.2339, 0.2610]) -Greedy action tensor([ 0.8124, 0.0295, -0.4235, 1.4502]) tensor([0.2747, 0.1256, 0.0798, 0.5199]) -Greedy action tensor([ 0.3320, -0.8683, 1.7127, 0.2502]) tensor([0.1613, 0.0486, 0.6415, 0.1486]) -Greedy action tensor([ 0.3524, -0.6087, 0.6458, -0.4030]) tensor([0.3132, 0.1198, 0.4199, 0.1471]) -Greedy action tensor([ 0.1300, -1.2405, 0.9138, -0.8758]) tensor([0.2625, 0.0667, 0.5748, 0.0960]) -Greedy action tensor([-0.9511, -1.0495, -0.4931, -0.2646]) tensor([0.1827, 0.1656, 0.2888, 0.3629]) -Greedy action tensor([ 0.9592, -0.6692, -0.4997, -0.7548]) tensor([0.6216, 0.1220, 0.1445, 0.1120]) -Greedy action tensor([1.0100, 0.7305, 0.2486, 0.0740]) tensor([0.3824, 0.2891, 0.1786, 0.1500]) -Greedy action tensor([-0.1595, 0.1448, 0.8354, -0.5121]) tensor([0.1735, 0.2352, 0.4693, 0.1220]) -Greedy action tensor([ 1.5205, -0.3208, 0.2884, 0.8710]) tensor([0.5069, 0.0804, 0.1479, 0.2648]) -Greedy action tensor([ 0.3509, -1.3781, -0.3818, 0.1861]) tensor([0.3990, 0.0708, 0.1918, 0.3384]) -Greedy action tensor([ 0.4911, -1.1997, -0.9325, 0.2066]) tensor([0.4592, 0.0847, 0.1106, 0.3455]) -Greedy action tensor([ 1.8362, -0.8733, 0.6877, 1.0421]) tensor([0.5448, 0.0363, 0.1728, 0.2462]) -Greedy action tensor([ 1.0936, -0.5171, -0.8033, 0.6391]) tensor([0.5039, 0.1006, 0.0756, 0.3199]) -Greedy action tensor([ 1.1250, -0.2257, -0.5669, 0.3504]) tensor([0.5252, 0.1360, 0.0967, 0.2420]) -Greedy action tensor([ 1.3179, 0.0573, -0.9814, 0.4253]) tensor([0.5576, 0.1581, 0.0559, 0.2284]) -Greedy action tensor([ 1.2597, -0.4967, -0.3134, 0.4778]) tensor([0.5442, 0.0940, 0.1129, 0.2490]) -Greedy action tensor([ 1.2157, -0.2546, -0.6931, 0.0368]) tensor([0.5932, 0.1364, 0.0879, 0.1825]) -Greedy action tensor([ 1.3666, -0.1295, -0.3121, -0.0703]) tensor([0.6067, 0.1359, 0.1132, 0.1442]) -Greedy action tensor([ 1.5600, -0.4085, -0.5109, 0.2920]) tensor([0.6464, 0.0903, 0.0815, 0.1819]) -Greedy action tensor([ 1.9522, -0.2881, -0.6697, 0.0025]) tensor([0.7568, 0.0805, 0.0550, 0.1077]) -Greedy action tensor([ 0.9112, -0.3226, -0.1884, 0.2397]) tensor([0.4684, 0.1364, 0.1560, 0.2393]) -Greedy action tensor([ 1.6074, -1.4202, -0.1714, 0.3445]) tensor([0.6666, 0.0323, 0.1126, 0.1886]) -Greedy action tensor([ 2.0375, -0.5243, -0.2354, 0.3529]) tensor([0.7322, 0.0565, 0.0754, 0.1358]) -Greedy action tensor([ 2.2432, -1.7219, 0.1720, 0.9654]) tensor([0.7024, 0.0133, 0.0885, 0.1957]) -Greedy action tensor([ 0.8796, 0.0850, -0.5538, -0.2008]) tensor([0.4927, 0.2226, 0.1175, 0.1673]) -Greedy action tensor([ 1.1694, 0.1128, -0.5311, 0.0657]) tensor([0.5371, 0.1867, 0.0981, 0.1781]) -Greedy action tensor([ 2.0962, -1.0084, -0.0052, 0.5434]) tensor([0.7253, 0.0325, 0.0887, 0.1535]) -Greedy action tensor([ 2.3210, -1.0799, -0.2041, 0.6042]) tensor([0.7734, 0.0258, 0.0619, 0.1389]) -Greedy action tensor([ 1.3082, -0.4264, -0.4595, 0.3586]) tensor([0.5767, 0.1018, 0.0985, 0.2231]) -Greedy action tensor([ 1.7083, -0.8342, -0.3793, 0.2451]) tensor([0.6973, 0.0549, 0.0864, 0.1614]) -Greedy action tensor([ 1.5544, -0.6872, -0.6952, 0.1830]) tensor([0.6824, 0.0725, 0.0720, 0.1732]) -Greedy action tensor([ 1.4538, -0.6654, -0.0917, 0.4636]) tensor([0.5866, 0.0705, 0.1251, 0.2179]) -Greedy action tensor([ 0.4022, -0.4614, -0.0205, -0.0600]) tensor([0.3694, 0.1558, 0.2421, 0.2327]) -Greedy action tensor([ 1.6708, 0.3040, 0.0229, -0.5965]) tensor([0.6448, 0.1644, 0.1241, 0.0668]) -Greedy action tensor([ 1.3466, -0.2015, -0.7571, 0.2058]) tensor([0.6045, 0.1286, 0.0738, 0.1932]) -Greedy action tensor([ 1.4512, -0.5086, -0.1754, 0.4288]) tensor([0.5892, 0.0830, 0.1158, 0.2119]) -Greedy action tensor([ 1.7438, -0.3358, -0.7877, 0.5692]) tensor([0.6607, 0.0826, 0.0526, 0.2041]) -Greedy action tensor([ 1.3675, -0.8000, 0.0352, 0.1298]) tensor([0.5994, 0.0686, 0.1582, 0.1739]) -Greedy action tensor([ 1.2982, -0.3139, -0.7943, 0.4247]) tensor([0.5746, 0.1146, 0.0709, 0.2399]) -Greedy action tensor([ 1.3407, -0.2314, -0.0477, 0.1872]) tensor([0.5641, 0.1171, 0.1407, 0.1780]) -Greedy action tensor([ 1.2673, -0.5028, -0.3847, 0.4750]) tensor([0.5510, 0.0938, 0.1056, 0.2495]) -Greedy action tensor([ 1.5185, -0.7035, -0.2202, 0.2709]) tensor([0.6364, 0.0690, 0.1118, 0.1828]) -Greedy action tensor([ 1.9473, -0.5267, -0.7058, 0.3145]) tensor([0.7407, 0.0624, 0.0522, 0.1447]) -Greedy action tensor([ 2.0462, -0.0802, -0.4184, 0.3258]) tensor([0.7229, 0.0862, 0.0615, 0.1294]) -Greedy action tensor([ 0.8859, -0.1916, -0.1811, -1.4077]) tensor([0.5601, 0.1907, 0.1927, 0.0565]) -Greedy action tensor([ 1.8536, -1.1726, -0.4218, 0.5972]) tensor([0.6964, 0.0338, 0.0716, 0.1982]) -Greedy action tensor([ 1.3710, 0.2049, -0.1328, 0.2069]) tensor([0.5417, 0.1688, 0.1204, 0.1691]) -Greedy action tensor([ 2.0573, -0.4073, 0.6519, -0.4279]) tensor([0.7074, 0.0602, 0.1735, 0.0589]) -Greedy action tensor([ 2.3041, -1.0269, -0.1673, 0.2828]) tensor([0.7983, 0.0285, 0.0674, 0.1058]) -Greedy action tensor([ 1.7250, -0.3751, -0.5077, 0.0456]) tensor([0.7061, 0.0865, 0.0757, 0.1317]) -Greedy action tensor([ 1.9445, -0.5044, -0.3254, -0.0806]) tensor([0.7566, 0.0654, 0.0782, 0.0999]) -Greedy action tensor([ 1.1897, 0.0621, -0.8485, 0.2072]) tensor([0.5469, 0.1771, 0.0712, 0.2047]) -Greedy action tensor([ 1.1868, -0.3870, -0.3002, 0.1646]) tensor([0.5577, 0.1156, 0.1261, 0.2007]) -Greedy action tensor([ 1.0006, -0.2252, -0.2009, 0.2325]) tensor([0.4859, 0.1426, 0.1461, 0.2254]) -Greedy action tensor([ 2.1966, -0.5063, -0.7098, 0.2084]) tensor([0.7945, 0.0532, 0.0434, 0.1088]) -Greedy action tensor([ 1.1925, -0.4094, -0.3487, 0.3167]) tensor([0.5458, 0.1100, 0.1169, 0.2273]) -Greedy action tensor([ 1.2302, -0.4814, -0.1711, 0.2304]) tensor([0.5572, 0.1006, 0.1372, 0.2050]) -Greedy action tensor([ 1.5660, -0.6490, -0.6884, 0.3574]) tensor([0.6611, 0.0722, 0.0694, 0.1974]) -Greedy action tensor([ 0.8815, -0.4084, 0.0438, -0.3422]) tensor([0.4995, 0.1375, 0.2161, 0.1469]) -Greedy action tensor([ 1.5971, -0.3713, -0.5033, 0.4742]) tensor([0.6299, 0.0880, 0.0771, 0.2050]) -Greedy action tensor([ 2.2359, -1.4506, -0.0567, 0.2040]) tensor([0.7955, 0.0199, 0.0803, 0.1043]) -Greedy action tensor([ 0.6562, -0.3422, -0.3274, 0.3023]) tensor([0.4091, 0.1507, 0.1530, 0.2872]) -Greedy action tensor([ 1.1799, -0.4196, -0.2169, 0.1344]) tensor([0.5553, 0.1122, 0.1374, 0.1952]) -Greedy action tensor([ 1.2277, -0.0573, -0.3768, -0.3504]) tensor([0.5938, 0.1643, 0.1193, 0.1225]) -Greedy action tensor([ 0.8599, -0.2905, -0.3917, 0.7036]) tensor([0.4069, 0.1288, 0.1164, 0.3480]) -Greedy action tensor([ 1.4947, -0.5752, -0.2341, 0.2036]) tensor([0.6335, 0.0799, 0.1124, 0.1742]) -Greedy action tensor([ 1.7696, -0.5208, -0.1904, 0.1830]) tensor([0.6912, 0.0700, 0.0974, 0.1414]) -Greedy action tensor([ 1.7461, -0.7559, -0.4126, 0.8833]) tensor([0.6175, 0.0506, 0.0713, 0.2606]) -Greedy action tensor([ 1.5912, -0.5308, -0.1873, 0.3859]) tensor([0.6296, 0.0754, 0.1063, 0.1886]) -Greedy action tensor([ 2.2148, -0.9803, -0.4897, 0.6995]) tensor([0.7532, 0.0309, 0.0504, 0.1655]) -Greedy action tensor([ 1.5801, -0.4981, -0.3510, -0.0569]) tensor([0.6827, 0.0854, 0.0990, 0.1328]) -Greedy action tensor([ 1.9332, -0.3724, -0.7569, 0.3701]) tensor([0.7262, 0.0724, 0.0493, 0.1521]) -Greedy action tensor([ 1.0930, -0.3732, -0.2976, 0.5806]) tensor([0.4811, 0.1110, 0.1197, 0.2882]) -Greedy action tensor([ 1.7732, -0.9727, 0.0542, 0.5023]) tensor([0.6562, 0.0421, 0.1176, 0.1841]) -Greedy action tensor([ 1.9997, -1.1606, -0.1155, 0.4144]) tensor([0.7310, 0.0310, 0.0882, 0.1498]) -Greedy action tensor([ 1.3643, -0.1075, -0.0921, 0.1511]) tensor([0.5682, 0.1304, 0.1324, 0.1689]) -Greedy action tensor([ 0.9555, -0.0021, -0.1145, 0.0149]) tensor([0.4723, 0.1813, 0.1620, 0.1844]) -Greedy action tensor([ 1.9370, -0.4997, -0.2978, 0.6784]) tensor([0.6764, 0.0591, 0.0724, 0.1921]) -Greedy action tensor([ 1.5966, -0.0294, -0.0609, 0.1213]) tensor([0.6188, 0.1217, 0.1179, 0.1415]) -Greedy action tensor([ 1.8190, -0.6286, -0.2815, 0.9509]) tensor([0.6140, 0.0531, 0.0752, 0.2577]) -Greedy action tensor([ 1.9335, -0.4084, -0.4986, 0.6641]) tensor([0.6826, 0.0656, 0.0600, 0.1918]) -Greedy action tensor([ 1.6729, -1.0261, -0.2700, 0.4452]) tensor([0.6651, 0.0447, 0.0953, 0.1949]) -Greedy action tensor([ 1.9116, 0.2248, -0.5215, 0.0903]) tensor([0.6970, 0.1290, 0.0612, 0.1128]) -Greedy action tensor([ 0.9918, -0.3670, -0.0956, 0.4660]) tensor([0.4576, 0.1176, 0.1543, 0.2705]) -Greedy action tensor([ 2.0599, -0.1483, -0.5458, -0.1006]) tensor([0.7698, 0.0846, 0.0569, 0.0887]) -Greedy action tensor([ 1.7006, -0.6462, -0.2590, 0.4945]) tensor([0.6511, 0.0623, 0.0917, 0.1949]) -Greedy action tensor([ 1.3945, -0.3474, -0.2629, 0.4668]) tensor([0.5678, 0.0995, 0.1082, 0.2245]) -Greedy action tensor([ 1.2455, -0.5156, 0.0448, 0.4536]) tensor([0.5192, 0.0892, 0.1563, 0.2352]) -Greedy action tensor([ 1.2119, -0.3047, -0.1516, 0.2186]) tensor([0.5418, 0.1189, 0.1386, 0.2007]) -Greedy action tensor([ 1.5373, -0.2682, -0.1680, 0.3903]) tensor([0.6011, 0.0988, 0.1092, 0.1909]) -Greedy action tensor([ 2.3342, -0.8044, -0.3840, 0.9916]) tensor([0.7297, 0.0316, 0.0482, 0.1906]) -Greedy action tensor([ 1.2254, -0.4069, -0.8578, 0.1582]) tensor([0.6010, 0.1175, 0.0748, 0.2067]) -Greedy action tensor([ 2.1530, -0.3778, -0.5852, 0.4992]) tensor([0.7487, 0.0596, 0.0484, 0.1432]) -Greedy action tensor([ 0.5496, -0.1018, -0.2076, -0.5952]) tensor([0.4332, 0.2258, 0.2031, 0.1379]) -Greedy action tensor([ 0.9161, -0.3093, -0.2115, -0.0318]) tensor([0.4988, 0.1465, 0.1615, 0.1933]) -Greedy action tensor([ 0.9589, -0.0618, -0.6723, -0.7878]) tensor([0.5779, 0.2082, 0.1131, 0.1008]) -Greedy action tensor([ 0.9109, -0.4877, -0.0107, -0.5686]) tensor([0.5340, 0.1319, 0.2125, 0.1216]) -Greedy action tensor([ 0.3910, 0.0870, 0.0973, -0.2929]) tensor([0.3347, 0.2469, 0.2495, 0.1689]) -Greedy action tensor([ 0.3265, 0.1301, -0.1607, 0.0155]) tensor([0.3156, 0.2593, 0.1939, 0.2312]) -Greedy action tensor([ 1.0167, -0.4917, -0.0435, -0.3308]) tensor([0.5472, 0.1211, 0.1895, 0.1422]) -Greedy action tensor([ 0.8548, -0.2007, -0.1605, -0.1681]) tensor([0.4831, 0.1681, 0.1750, 0.1737]) -Greedy action tensor([ 1.1762, -0.8027, 0.0702, -0.5479]) tensor([0.6070, 0.0839, 0.2009, 0.1082]) -Greedy action tensor([ 0.5600, 0.0487, -0.6500, -0.2160]) tensor([0.4241, 0.2543, 0.1265, 0.1952]) -Greedy action tensor([ 0.7712, -0.6379, -0.1370, -0.2476]) tensor([0.4979, 0.1217, 0.2007, 0.1797]) -Greedy action tensor([ 0.9119, -0.6200, -0.0246, -0.4023]) tensor([0.5328, 0.1152, 0.2089, 0.1432]) -Greedy action tensor([ 0.5915, -0.3949, -0.1286, -0.2300]) tensor([0.4349, 0.1622, 0.2117, 0.1913]) -Greedy action tensor([ 1.2110, -0.8316, 0.0100, -0.7165]) tensor([0.6345, 0.0823, 0.1909, 0.0923]) -Greedy action tensor([ 0.7407, -0.4136, 0.0102, -0.5146]) tensor([0.4803, 0.1514, 0.2314, 0.1369]) -Greedy action tensor([ 1.0072, -0.4771, -0.2071, -0.5370]) tensor([0.5757, 0.1305, 0.1709, 0.1229]) -Greedy action tensor([ 0.5914, -0.5446, 0.1165, -0.5730]) tensor([0.4434, 0.1424, 0.2758, 0.1384]) -Greedy action tensor([ 0.5579, -0.5625, -0.0376, -0.1273]) tensor([0.4199, 0.1370, 0.2315, 0.2116]) -Greedy action tensor([ 1.1749, -0.8942, 0.1760, -0.6718]) tensor([0.6052, 0.0764, 0.2229, 0.0955]) -Greedy action tensor([ 1.0675, -0.5537, -0.0803, -0.2584]) tensor([0.5616, 0.1110, 0.1782, 0.1491]) -Greedy action tensor([ 0.6494, -0.1444, 0.4135, -0.7428]) tensor([0.4015, 0.1815, 0.3172, 0.0998]) -Greedy action tensor([ 0.3499, 0.0340, -0.0704, 0.0783]) tensor([0.3177, 0.2316, 0.2086, 0.2421]) -Greedy action tensor([ 0.6608, -0.3794, -0.1133, -0.0993]) tensor([0.4382, 0.1549, 0.2021, 0.2049]) -Greedy action tensor([ 0.9764, -0.5785, 0.1860, -0.4412]) tensor([0.5243, 0.1107, 0.2379, 0.1271]) -Greedy action tensor([ 0.6812, -0.4412, -0.0830, -0.1887]) tensor([0.4524, 0.1473, 0.2107, 0.1896]) -Greedy action tensor([ 0.4576, -0.3231, -0.0349, -0.1330]) tensor([0.3812, 0.1746, 0.2330, 0.2112]) -Greedy action tensor([ 0.8242, -0.3727, -0.2168, -0.4064]) tensor([0.5135, 0.1551, 0.1813, 0.1500]) -Greedy action tensor([ 0.2925, -0.0456, -0.0291, -0.1085]) tensor([0.3218, 0.2295, 0.2333, 0.2155]) -Greedy action tensor([ 0.8177, -0.6332, 0.1131, -0.6430]) tensor([0.5100, 0.1195, 0.2521, 0.1184]) -Greedy action tensor([ 0.3762, 0.0306, -0.1442, 0.0163]) tensor([0.3334, 0.2359, 0.1981, 0.2326]) -Greedy action tensor([ 0.9488, -0.5371, -0.1603, -0.3321]) tensor([0.5453, 0.1234, 0.1799, 0.1515]) -Greedy action tensor([ 0.4113, -0.3883, -0.2042, -0.0855]) tensor([0.3849, 0.1730, 0.2080, 0.2342]) -Greedy action tensor([ 0.5229, -0.2349, -0.0560, -0.1363]) tensor([0.3927, 0.1841, 0.2201, 0.2031]) -Greedy action tensor([ 0.7340, -0.3751, 0.0034, -0.1044]) tensor([0.4457, 0.1470, 0.2146, 0.1927]) -Greedy action tensor([ 0.7281, -0.4443, -0.0604, -0.2333]) tensor([0.4659, 0.1442, 0.2118, 0.1781]) -Greedy action tensor([ 0.5232, 0.1676, -0.2743, -0.1451]) tensor([0.3754, 0.2631, 0.1691, 0.1924]) -Greedy action tensor([ 1.0152, -0.9126, 0.1022, -0.5470]) tensor([0.5693, 0.0828, 0.2285, 0.1194]) -Greedy action tensor([ 0.9453, -1.1119, 0.2501, -0.4779]) tensor([0.5354, 0.0684, 0.2672, 0.1290]) -Greedy action tensor([ 0.8052, -0.4415, -0.1101, -0.4358]) tensor([0.5058, 0.1454, 0.2025, 0.1462]) -Greedy action tensor([ 0.4835, 0.0854, -0.0597, -0.1199]) tensor([0.3572, 0.2399, 0.2075, 0.1954]) -Greedy action tensor([ 1.0161, -0.2611, -0.0195, -0.0715]) tensor([0.5074, 0.1415, 0.1801, 0.1710]) -Greedy action tensor([ 1.1527, -0.7995, -0.1228, -0.4897]) tensor([0.6193, 0.0879, 0.1730, 0.1198]) -Greedy action tensor([ 1.3620, -0.6681, -0.0980, -0.4986]) tensor([0.6583, 0.0864, 0.1529, 0.1024]) -Greedy action tensor([ 0.7523, -0.4720, -0.0366, -0.2532]) tensor([0.4730, 0.1391, 0.2149, 0.1731]) -Greedy action tensor([ 1.2040, -0.5603, -0.2014, -0.6011]) tensor([0.6325, 0.1084, 0.1551, 0.1040]) -Greedy action tensor([ 0.3473, -0.1703, 0.0093, -0.4270]) tensor([0.3610, 0.2151, 0.2575, 0.1664]) -Greedy action tensor([ 0.3690, -0.1275, 0.0659, -0.1853]) tensor([0.3423, 0.2083, 0.2528, 0.1966]) -Greedy action tensor([ 0.4770, -0.0954, 0.2225, -0.1865]) tensor([0.3503, 0.1976, 0.2716, 0.1804]) -Greedy action tensor([ 0.5648, -0.1313, 0.1364, -0.1914]) tensor([0.3817, 0.1903, 0.2487, 0.1792]) -Greedy action tensor([ 0.8781, -0.5028, -0.1452, -0.1430]) tensor([0.5074, 0.1275, 0.1824, 0.1828]) -Greedy action tensor([ 0.8928, -0.2130, -0.0807, -0.1938]) tensor([0.4887, 0.1618, 0.1846, 0.1649]) -Greedy action tensor([ 0.4430, -0.2144, -0.0179, -0.2490]) tensor([0.3774, 0.1956, 0.2381, 0.1889]) -Greedy action tensor([ 0.8793, -0.3432, -0.1331, -0.1759]) tensor([0.4985, 0.1468, 0.1811, 0.1735]) -Greedy action tensor([ 0.7135, -0.2576, 0.1643, -0.3874]) tensor([0.4369, 0.1655, 0.2523, 0.1453]) -Greedy action tensor([ 0.7919, -0.4916, -0.1366, -0.2234]) tensor([0.4915, 0.1362, 0.1942, 0.1781]) -Greedy action tensor([ 0.7894, -0.3899, -0.0989, -0.2634]) tensor([0.4836, 0.1487, 0.1989, 0.1688]) -Greedy action tensor([ 0.4810, -0.2534, -0.0567, -0.1225]) tensor([0.3830, 0.1838, 0.2237, 0.2095]) -Greedy action tensor([ 1.1066, -0.7781, -0.0609, -0.3143]) tensor([0.5867, 0.0891, 0.1825, 0.1417]) -Greedy action tensor([ 0.9336, -0.5327, -0.0706, -0.2971]) tensor([0.5293, 0.1222, 0.1939, 0.1546]) -Greedy action tensor([ 1.1100, -0.8169, -0.1132, -0.4496]) tensor([0.6060, 0.0882, 0.1783, 0.1274]) -Greedy action tensor([ 0.2460, -0.0043, -0.0814, -0.1067]) tensor([0.3123, 0.2432, 0.2251, 0.2195]) -Greedy action tensor([ 0.7690, 0.1577, -0.4212, -0.6422]) tensor([0.4783, 0.2596, 0.1455, 0.1166]) -Greedy action tensor([ 0.7997, -0.2059, -0.0590, -0.0584]) tensor([0.4518, 0.1653, 0.1914, 0.1915]) -Greedy action tensor([ 0.9110, -0.7140, 0.1161, -0.6124]) tensor([0.5358, 0.1055, 0.2419, 0.1168]) -Greedy action tensor([ 1.1470, -0.1588, -0.0404, -0.4199]) tensor([0.5603, 0.1518, 0.1709, 0.1169]) -Greedy action tensor([ 0.5378, -0.4377, -0.0948, -0.0528]) tensor([0.4061, 0.1531, 0.2157, 0.2250]) -Greedy action tensor([ 0.5447, -0.4045, 0.0014, -0.5332]) tensor([0.4332, 0.1677, 0.2516, 0.1474]) -Greedy action tensor([ 0.7296, -0.2721, 0.0122, -0.2121]) tensor([0.4454, 0.1636, 0.2174, 0.1737]) -Greedy action tensor([ 0.4591, -0.3745, -0.0976, -0.1094]) tensor([0.3885, 0.1688, 0.2227, 0.2200]) -Greedy action tensor([ 0.9193, -0.6570, 0.0595, -0.3787]) tensor([0.5255, 0.1086, 0.2224, 0.1435]) -Greedy action tensor([ 0.7632, -0.5559, -0.0305, -0.3656]) tensor([0.4895, 0.1309, 0.2213, 0.1583]) -Greedy action tensor([ 0.9237, -0.4847, -0.1519, -0.2124]) tensor([0.5245, 0.1282, 0.1789, 0.1684]) -Greedy action tensor([ 0.4661, 0.2573, -0.1285, 0.0872]) tensor([0.3281, 0.2663, 0.1810, 0.2246]) -Greedy action tensor([ 0.9041, -0.5022, -0.1282, -0.4916]) tensor([0.5409, 0.1325, 0.1926, 0.1339]) -Greedy action tensor([ 0.6643, -0.4202, -0.0282, -0.0346]) tensor([0.4282, 0.1448, 0.2142, 0.2128]) -Greedy action tensor([ 0.5148, -0.1350, -0.0817, -0.2034]) tensor([0.3905, 0.2039, 0.2151, 0.1904]) -Greedy action tensor([ 0.9688, -0.6442, -0.1108, -0.2886]) tensor([0.5484, 0.1093, 0.1863, 0.1560]) -Greedy action tensor([ 0.9126, -0.7594, -0.0624, -0.4072]) tensor([0.5458, 0.1025, 0.2059, 0.1458]) -Greedy action tensor([ 0.8234, -0.5534, 0.0138, -0.4371]) tensor([0.5048, 0.1274, 0.2247, 0.1431]) -Greedy action tensor([ 0.7112, -0.5651, -0.0008, -0.3354]) tensor([0.4715, 0.1316, 0.2314, 0.1656]) -Greedy action tensor([ 0.5615, -0.3350, 0.0412, -0.2735]) tensor([0.4105, 0.1675, 0.2440, 0.1781]) -Greedy action tensor([ 0.1556, 0.5778, -0.4210, -0.2743]) tensor([0.2675, 0.4081, 0.1503, 0.1741]) -Greedy action tensor([-0.2549, -0.4271, -0.1060, -0.6486]) tensor([0.2720, 0.2289, 0.3156, 0.1835]) -Greedy action tensor([ 0.0795, -1.2074, -0.0320, 0.6099]) tensor([0.2584, 0.0713, 0.2311, 0.4391]) -Greedy action tensor([-0.2928, -0.3586, -0.4236, -1.3340]) tensor([0.3158, 0.2957, 0.2771, 0.1115]) -Greedy action tensor([ 0.2911, -0.9534, 0.7658, 0.5121]) tensor([0.2414, 0.0695, 0.3880, 0.3011]) -Greedy action tensor([ 0.1345, 0.0682, -0.1103, 0.0369]) tensor([0.2758, 0.2581, 0.2159, 0.2501]) -Greedy action tensor([ 0.3434, -1.8639, 0.4656, 0.4288]) tensor([0.3004, 0.0330, 0.3394, 0.3272]) -Greedy action tensor([ 0.6624, -0.8644, 0.1096, 1.0082]) tensor([0.3120, 0.0678, 0.1795, 0.4408]) -Greedy action tensor([ 0.2552, -1.7708, 1.1857, -0.3292]) tensor([0.2367, 0.0312, 0.6002, 0.1319]) -Greedy action tensor([0.7593, 0.3367, 0.8892, 0.7164]) tensor([0.2665, 0.1747, 0.3035, 0.2553]) -Greedy action tensor([-1.4158, -0.3066, 0.5557, -0.5141]) tensor([0.0731, 0.2217, 0.5251, 0.1801]) -Greedy action tensor([-0.5255, 0.0906, 0.4405, 0.6718]) tensor([0.1138, 0.2106, 0.2989, 0.3767]) -Greedy action tensor([ 0.7317, -1.3701, 0.6544, 0.2013]) tensor([0.3793, 0.0464, 0.3511, 0.2232]) -Greedy action tensor([-0.6145, -0.6313, 0.4257, -0.3954]) tensor([0.1651, 0.1623, 0.4671, 0.2055]) -Greedy action tensor([ 0.8488, 0.2578, 0.9570, -0.7171]) tensor([0.3476, 0.1925, 0.3873, 0.0726]) -Greedy action tensor([-0.1478, -0.8533, 0.1328, -0.7356]) tensor([0.2964, 0.1464, 0.3925, 0.1647]) -Greedy action tensor([ 1.6638, -0.9859, 0.5576, 1.1895]) tensor([0.4941, 0.0349, 0.1635, 0.3075]) -Greedy action tensor([-1.1339, -2.1117, -0.4558, -1.0110]) tensor([0.2234, 0.0840, 0.4400, 0.2526]) -Greedy action tensor([ 0.2374, -1.0554, -1.2718, 0.7187]) tensor([0.3212, 0.0882, 0.0710, 0.5197]) -Greedy action tensor([0.8132, 0.3363, 0.6959, 0.2753]) tensor([0.3232, 0.2006, 0.2874, 0.1887]) -Greedy action tensor([-0.5570, -0.5478, 1.4286, -0.7251]) tensor([0.0986, 0.0996, 0.7184, 0.0834]) -Greedy action tensor([-0.5541, -1.6404, 0.3708, 0.2441]) tensor([0.1645, 0.0555, 0.4147, 0.3654]) -Greedy action tensor([ 0.5494, -0.0546, 0.5767, -0.2295]) tensor([0.3297, 0.1802, 0.3388, 0.1513]) -Greedy action tensor([-0.3981, -0.6261, -0.5836, 0.1314]) tensor([0.2312, 0.1841, 0.1921, 0.3926]) -Greedy action tensor([0.6938, 0.2611, 0.3668, 0.0665]) tensor([0.3444, 0.2234, 0.2483, 0.1839]) -Greedy action tensor([-1.0949, -0.8209, -0.1016, -1.1736]) tensor([0.1684, 0.2214, 0.4546, 0.1556]) -Greedy action tensor([-0.7340, -1.0042, -0.2795, -0.7064]) tensor([0.2290, 0.1748, 0.3608, 0.2354]) -Greedy action tensor([-0.5112, -0.8047, 0.7173, -0.2071]) tensor([0.1534, 0.1144, 0.5242, 0.2080]) -Greedy action tensor([-0.1114, -1.0162, 0.4951, 1.7643]) tensor([0.1024, 0.0414, 0.1878, 0.6683]) -Greedy action tensor([-0.1158, -1.4959, -0.1643, 0.1456]) tensor([0.2855, 0.0718, 0.2720, 0.3707]) -Greedy action tensor([ 0.4511, -0.2195, -0.4186, 0.5015]) tensor([0.3353, 0.1715, 0.1405, 0.3527]) -Greedy action tensor([ 0.3849, 0.1102, -0.9920, -0.4702]) tensor([0.4103, 0.3117, 0.1035, 0.1745]) -Greedy action tensor([ 1.1093, 0.3158, 0.2789, -0.2481]) tensor([0.4661, 0.2108, 0.2032, 0.1199]) -Greedy action tensor([-0.0173, -1.1969, 0.2689, 0.2132]) tensor([0.2566, 0.0789, 0.3415, 0.3230]) -Greedy action tensor([-1.2678, -0.6190, 1.0647, -1.0377]) tensor([0.0691, 0.1322, 0.7118, 0.0870]) -Greedy action tensor([ 0.3514, -1.2726, -0.7194, 0.8810]) tensor([0.3088, 0.0609, 0.1058, 0.5245]) -Greedy action tensor([-0.3652, -0.5833, 0.3689, -0.7800]) tensor([0.2199, 0.1768, 0.4581, 0.1452]) -Greedy action tensor([ 0.4912, 0.5725, 1.3107, -0.1457]) tensor([0.2048, 0.2221, 0.4648, 0.1083]) -Greedy action tensor([ 0.4352, -1.3186, -0.4465, -0.3751]) tensor([0.4921, 0.0852, 0.2038, 0.2189]) -Greedy action tensor([ 0.5384, -1.3581, 0.7284, -0.9983]) tensor([0.3884, 0.0583, 0.4697, 0.0835]) -Greedy action tensor([ 1.4641, -0.8067, -0.8407, 0.3723]) tensor([0.6499, 0.0671, 0.0649, 0.2181]) -Greedy action tensor([ 0.0369, -1.7410, -0.1385, -0.3162]) tensor([0.3689, 0.0623, 0.3096, 0.2592]) -Greedy action tensor([-0.7740, -1.2633, -0.3020, 0.3026]) tensor([0.1626, 0.0997, 0.2607, 0.4771]) -Greedy action tensor([ 0.5914, -1.9359, 0.4117, -0.6964]) tensor([0.4564, 0.0365, 0.3813, 0.1259]) -Greedy action tensor([ 1.8055, -1.1965, 0.8885, -0.0355]) tensor([0.6219, 0.0309, 0.2486, 0.0987]) -Greedy action tensor([ 0.2520, -0.3813, -1.1819, 0.1926]) tensor([0.3688, 0.1958, 0.0879, 0.3475]) -Greedy action tensor([ 0.5277, -1.5391, -0.0512, -0.7488]) tensor([0.5086, 0.0644, 0.2851, 0.1419]) -Greedy action tensor([0.9643, 0.5010, 0.1410, 0.1766]) tensor([0.3963, 0.2494, 0.1740, 0.1803]) -Greedy action tensor([-0.9848, -1.2219, 1.0577, -1.1055]) tensor([0.0963, 0.0760, 0.7424, 0.0853]) -Greedy action tensor([-0.1978, -1.7596, -0.4724, -0.5212]) tensor([0.3713, 0.0779, 0.2821, 0.2687]) -Greedy action tensor([ 0.6974, 0.2020, -0.1549, 0.2738]) tensor([0.3717, 0.2265, 0.1585, 0.2433]) -Greedy action tensor([ 0.9240, -0.3333, 0.2712, 0.5803]) tensor([0.3977, 0.1131, 0.2071, 0.2821]) -Greedy action tensor([ 1.0084, -0.7078, 0.4200, 1.0685]) tensor([0.3575, 0.0643, 0.1985, 0.3797]) -Greedy action tensor([-0.1750, -0.1652, -0.0756, -1.0232]) tensor([0.2823, 0.2851, 0.3118, 0.1209]) -Greedy action tensor([-1.4671, 0.1024, -0.6901, 0.5758]) tensor([0.0637, 0.3062, 0.1386, 0.4915]) -Greedy action tensor([ 0.5612, -0.2139, 0.7825, -0.6432]) tensor([0.3324, 0.1531, 0.4148, 0.0997]) -Greedy action tensor([-0.6633, 0.5553, 0.3912, -0.4936]) tensor([0.1185, 0.4008, 0.3402, 0.1404]) -Greedy action tensor([-0.5235, -1.4266, 0.3670, -0.2941]) tensor([0.1961, 0.0795, 0.4778, 0.2467]) -Greedy action tensor([ 0.1150, -0.7638, -0.0658, -0.0804]) tensor([0.3255, 0.1352, 0.2716, 0.2677]) -Greedy action tensor([ 0.5953, -1.1121, 0.6185, -0.1106]) tensor([0.3706, 0.0672, 0.3793, 0.1829]) -Greedy action tensor([-0.0974, -0.2020, 0.6804, -1.1151]) tensor([0.2253, 0.2029, 0.4904, 0.0814]) -Greedy action tensor([-0.2027, 0.8284, 0.4574, 0.6777]) tensor([0.1227, 0.3440, 0.2374, 0.2959]) -Greedy action tensor([ 1.1613, -0.2829, 0.0569, -0.4765]) tensor([0.5676, 0.1339, 0.1881, 0.1103]) -Greedy action tensor([ 1.3128, -1.3590, 1.1307, 2.0676]) tensor([0.2481, 0.0172, 0.2068, 0.5279]) -Greedy action tensor([ 0.1730, -1.1628, 0.8088, 0.2142]) tensor([0.2385, 0.0627, 0.4503, 0.2485]) -Greedy action tensor([-1.2660, -0.8615, -1.2113, -0.0288]) tensor([0.1428, 0.2141, 0.1509, 0.4922]) -Greedy action tensor([-0.0875, 0.4639, -0.8768, -0.2357]) tensor([0.2468, 0.4284, 0.1121, 0.2128]) -Greedy action tensor([-0.2318, 0.0769, -0.2627, -0.6596]) tensor([0.2511, 0.3419, 0.2434, 0.1637]) -Greedy action tensor([ 1.0185, 0.2076, 0.5716, -1.0090]) tensor([0.4513, 0.2006, 0.2887, 0.0594]) -Greedy action tensor([-0.4542, -0.8538, 0.0943, -0.0245]) tensor([0.2025, 0.1358, 0.3505, 0.3112]) -Greedy action tensor([ 0.4930, -1.6988, -0.2529, 0.0330]) tensor([0.4510, 0.0504, 0.2139, 0.2847]) -Greedy action tensor([ 0.7259, -0.2037, 1.0037, 0.0563]) tensor([0.3099, 0.1223, 0.4091, 0.1586]) -Greedy action tensor([-0.8657, -0.7722, 0.9343, -0.7029]) tensor([0.1073, 0.1178, 0.6488, 0.1262]) -Greedy action tensor([-0.6200, -0.8389, 0.1853, 0.1149]) tensor([0.1632, 0.1311, 0.3652, 0.3404]) -Greedy action tensor([ 0.6794, 0.7689, 1.9234, -0.7085]) tensor([0.1720, 0.1882, 0.5969, 0.0429]) -Greedy action tensor([ 1.1454, -0.7726, -0.1541, 0.7835]) tensor([0.4726, 0.0694, 0.1289, 0.3291]) -Greedy action tensor([ 0.3622, -0.7637, 0.5175, 0.0546]) tensor([0.3098, 0.1005, 0.3619, 0.2278]) -Greedy action tensor([-0.1067, -0.2698, -0.6903, -0.5236]) tensor([0.3261, 0.2770, 0.1819, 0.2149]) -Greedy action tensor([-0.2353, 0.8407, -0.2533, -1.0779]) tensor([0.1871, 0.5487, 0.1837, 0.0805]) -Greedy action tensor([-0.0846, -1.4031, -0.5504, -0.3549]) tensor([0.3762, 0.1006, 0.2361, 0.2871]) -Greedy action tensor([ 0.3337, -0.8350, 0.2305, -0.3747]) tensor([0.3697, 0.1149, 0.3334, 0.1820]) -Greedy action tensor([ 0.3737, -1.6796, 0.2014, 0.1503]) tensor([0.3610, 0.0463, 0.3039, 0.2887]) -Greedy action tensor([-0.5667, 0.2225, 0.3983, 0.5958]) tensor([0.1108, 0.2440, 0.2909, 0.3544]) -Greedy action tensor([-1.8924, -0.4547, 0.6323, -0.1462]) tensor([0.0427, 0.1797, 0.5329, 0.2447]) -Greedy action tensor([-1.9342, -0.4225, 0.6586, -0.1721]) tensor([0.0404, 0.1834, 0.5406, 0.2356]) -Greedy action tensor([-1.8333, -0.4370, 0.7944, 0.1631]) tensor([0.0381, 0.1539, 0.5274, 0.2805]) -Greedy action tensor([-0.9963, -0.4017, 0.2383, 0.0017]) tensor([0.1116, 0.2022, 0.3835, 0.3027]) -Greedy action tensor([-1.0224, -0.0026, 0.5015, 0.3971]) tensor([0.0800, 0.2218, 0.3673, 0.3309]) -Greedy action tensor([-1.1003, 0.7836, 0.2954, -0.2471]) tensor([0.0716, 0.4711, 0.2892, 0.1681]) -Greedy action tensor([-1.9047, -0.4206, 0.6491, -0.1588]) tensor([0.0417, 0.1838, 0.5357, 0.2388]) -Greedy action tensor([-1.4626, -0.5496, 0.4370, 0.0164]) tensor([0.0687, 0.1711, 0.4589, 0.3013]) -Greedy action tensor([-0.8983, -0.2289, 0.2897, 0.3396]) tensor([0.1033, 0.2017, 0.3388, 0.3562]) -Greedy action tensor([-1.3356, 0.3565, 0.3609, -0.1235]) tensor([0.0656, 0.3562, 0.3578, 0.2204]) -Greedy action tensor([-1.8216, -0.4231, 0.6847, -0.0791]) tensor([0.0434, 0.1759, 0.5326, 0.2481]) -Greedy action tensor([-1.9093, -0.4301, 0.6496, -0.1621]) tensor([0.0416, 0.1825, 0.5373, 0.2386]) -Greedy action tensor([-1.9054, -0.3547, 0.6380, -0.1806]) tensor([0.0416, 0.1960, 0.5290, 0.2333]) -Greedy action tensor([-1.8830, -0.4301, 0.6297, -0.1492]) tensor([0.0430, 0.1837, 0.5301, 0.2433]) -Greedy action tensor([-1.8429, -0.4333, 0.6201, -0.1256]) tensor([0.0446, 0.1828, 0.5240, 0.2486]) -Greedy action tensor([0.2565, 0.7423, 0.3440, 1.0656]) tensor([0.1677, 0.2726, 0.1830, 0.3767]) -Greedy action tensor([-1.9014, -0.1529, 0.1498, -0.2986]) tensor([0.0513, 0.2948, 0.3990, 0.2548]) -Greedy action tensor([-1.4729, -0.4887, 0.7240, 0.5218]) tensor([0.0499, 0.1336, 0.4494, 0.3671]) -Greedy action tensor([-1.7775, -0.0853, 0.5576, -0.0644]) tensor([0.0448, 0.2435, 0.4631, 0.2486]) -Greedy action tensor([-1.6884, -0.2550, 0.4958, -0.0580]) tensor([0.0521, 0.2186, 0.4631, 0.2662]) -Greedy action tensor([-1.8358, -0.4376, 0.6121, -0.1299]) tensor([0.0452, 0.1830, 0.5228, 0.2490]) -Greedy action tensor([-1.9119, -0.4418, 0.6467, -0.1628]) tensor([0.0416, 0.1811, 0.5379, 0.2394]) -Greedy action tensor([-1.9112, -0.4516, 0.6552, -0.1619]) tensor([0.0415, 0.1788, 0.5408, 0.2389]) -Greedy action tensor([-1.7851, -0.3169, 0.6576, -0.0768]) tensor([0.0447, 0.1941, 0.5144, 0.2468]) -Greedy action tensor([-1.8844, -0.4629, 0.6435, -0.1423]) tensor([0.0428, 0.1772, 0.5358, 0.2442]) -Greedy action tensor([-1.8743, -0.4322, 0.6308, -0.1468]) tensor([0.0433, 0.1831, 0.5301, 0.2436]) -Greedy action tensor([-0.6382, 0.8749, 0.0867, 0.0172]) tensor([0.1049, 0.4764, 0.2166, 0.2021]) -Greedy action tensor([-1.8033, -0.1373, 0.5479, -0.1030]) tensor([0.0449, 0.2376, 0.4715, 0.2459]) -Greedy action tensor([-1.8584, -0.4170, 0.6218, -0.1319]) tensor([0.0439, 0.1854, 0.5241, 0.2466]) -Greedy action tensor([-1.7216, -0.4420, 0.3938, -0.3759]) tensor([0.0598, 0.2149, 0.4957, 0.2296]) -Greedy action tensor([-1.9023, -0.4458, 0.6748, -0.1423]) tensor([0.0412, 0.1769, 0.5424, 0.2396]) -Greedy action tensor([-1.4457, -0.1470, 0.5629, 0.1685]) tensor([0.0583, 0.2138, 0.4348, 0.2931]) -Greedy action tensor([-1.9236, -0.4107, 0.6563, -0.1602]) tensor([0.0407, 0.1848, 0.5371, 0.2374]) -Greedy action tensor([-1.9356, -0.4223, 0.6590, -0.1760]) tensor([0.0404, 0.1835, 0.5412, 0.2348]) -Greedy action tensor([-1.0501, 0.6675, -0.7806, -0.8403]) tensor([0.1097, 0.6113, 0.1437, 0.1353]) -Greedy action tensor([-1.9278, -0.4479, 0.6558, -0.1705]) tensor([0.0409, 0.1798, 0.5421, 0.2373]) -Greedy action tensor([-1.8951, -0.3753, 0.6375, -0.1423]) tensor([0.0418, 0.1910, 0.5260, 0.2412]) -Greedy action tensor([-1.3261, 0.1549, 0.5426, 0.4477]) tensor([0.0563, 0.2475, 0.3647, 0.3316]) -Greedy action tensor([-0.8398, 1.0459, 0.1326, 0.4407]) tensor([0.0723, 0.4765, 0.1911, 0.2601]) -Greedy action tensor([-1.7541, -0.4477, 0.5693, -0.0800]) tensor([0.0494, 0.1825, 0.5045, 0.2636]) -Greedy action tensor([-1.3262, 0.2665, 0.2483, 0.0631]) tensor([0.0678, 0.3332, 0.3272, 0.2719]) -Greedy action tensor([-1.8965, -0.4431, 0.6457, -0.1558]) tensor([0.0422, 0.1806, 0.5365, 0.2407]) -Greedy action tensor([-1.8918, -0.4274, 0.6432, -0.1478]) tensor([0.0423, 0.1828, 0.5332, 0.2417]) -Greedy action tensor([-1.8613, -0.5964, 0.9703, 0.0115]) tensor([0.0357, 0.1264, 0.6057, 0.2322]) -Greedy action tensor([-1.9197, -0.4028, 0.6452, -0.1744]) tensor([0.0412, 0.1877, 0.5353, 0.2359]) -Greedy action tensor([-0.5916, 0.2969, 0.8194, 1.1513]) tensor([0.0755, 0.1836, 0.3095, 0.4314]) -Greedy action tensor([-1.9018, -0.4410, 0.6455, -0.1631]) tensor([0.0421, 0.1813, 0.5373, 0.2394]) -Greedy action tensor([-1.6750, -0.0969, 0.4984, -0.0843]) tensor([0.0512, 0.2480, 0.4497, 0.2511]) -Greedy action tensor([-1.8336, -0.3629, 0.6560, -0.0682]) tensor([0.0430, 0.1872, 0.5185, 0.2513]) -Greedy action tensor([-1.9263, -0.4298, 0.6599, -0.1675]) tensor([0.0407, 0.1819, 0.5409, 0.2365]) -Greedy action tensor([-1.9410, -0.4447, 0.6622, -0.1783]) tensor([0.0403, 0.1800, 0.5446, 0.2350]) -Greedy action tensor([-1.6548, -0.4856, 0.5518, -0.0035]) tensor([0.0540, 0.1739, 0.4906, 0.2815]) -Greedy action tensor([-1.5799, -0.4272, 0.4790, 0.0304]) tensor([0.0588, 0.1862, 0.4608, 0.2942]) -Greedy action tensor([-1.9379, -0.4375, 0.6622, -0.1772]) tensor([0.0404, 0.1810, 0.5437, 0.2349]) -Greedy action tensor([-1.9343, -0.4186, 0.6580, -0.1695]) tensor([0.0404, 0.1839, 0.5397, 0.2359]) -Greedy action tensor([-1.8586, -0.2259, 0.5998, -0.1033]) tensor([0.0424, 0.2169, 0.4954, 0.2453]) -Greedy action tensor([-1.9069, -0.4242, 0.6431, -0.1613]) tensor([0.0418, 0.1840, 0.5349, 0.2393]) -Greedy action tensor([-1.1976, -0.5875, 0.3255, 0.3373]) tensor([0.0829, 0.1525, 0.3800, 0.3846]) -Greedy action tensor([-1.9280, -0.4502, 0.6582, -0.1758]) tensor([0.0409, 0.1794, 0.5436, 0.2361]) -Greedy action tensor([-1.8924, -0.4553, 0.6430, -0.1566]) tensor([0.0425, 0.1791, 0.5370, 0.2414]) -Greedy action tensor([-1.6033, -0.5301, 0.4844, -0.0172]) tensor([0.0593, 0.1733, 0.4780, 0.2894]) -Greedy action tensor([-1.8451, -0.3607, 0.6237, -0.1167]) tensor([0.0438, 0.1931, 0.5167, 0.2464]) -Greedy action tensor([-1.6495, -0.5712, 0.5109, -0.0260]) tensor([0.0565, 0.1662, 0.4905, 0.2867]) -Greedy action tensor([-1.9220, -0.4246, 0.6593, -0.1580]) tensor([0.0408, 0.1823, 0.5389, 0.2380]) -Greedy action tensor([-1.3267, 0.1385, 0.3410, -0.0448]) tensor([0.0703, 0.3041, 0.3724, 0.2532]) -Greedy action tensor([-1.4270, -0.4708, 0.4118, 0.0558]) tensor([0.0699, 0.1820, 0.4399, 0.3082]) -Greedy action tensor([-1.9230, -0.4349, 0.6577, -0.1657]) tensor([0.0409, 0.1813, 0.5405, 0.2373]) -Greedy action tensor([-1.7808, -0.3175, 0.6797, -0.0238]) tensor([0.0438, 0.1893, 0.5131, 0.2539]) -Greedy action tensor([-1.8862, -0.4511, 0.6415, -0.1508]) tensor([0.0427, 0.1795, 0.5353, 0.2424]) -Greedy action tensor([-1.8600, -0.4767, 0.6285, -0.1281]) tensor([0.0441, 0.1758, 0.5310, 0.2491]) -Greedy action tensor([0.1831, 1.1219, 0.0086, 0.5240]) tensor([0.1723, 0.4406, 0.1447, 0.2423]) -Greedy action tensor([-1.6711, -0.4995, 0.5749, 0.0112]) tensor([0.0525, 0.1694, 0.4959, 0.2822]) -Greedy action tensor([-1.7510, -0.2966, 0.5774, -0.2453]) tensor([0.0499, 0.2135, 0.5118, 0.2248]) -Greedy action tensor([-0.7401, 0.3512, 0.1189, -0.0344]) tensor([0.1196, 0.3560, 0.2823, 0.2421]) -Greedy action tensor([-1.8863, -0.2521, 0.6065, -0.1409]) tensor([0.0418, 0.2140, 0.5050, 0.2392]) -Greedy action tensor([-1.9267, -0.4076, 0.6520, -0.1670]) tensor([0.0407, 0.1860, 0.5367, 0.2366]) -Greedy action tensor([-1.9046, -0.4396, 0.6464, -0.1590]) tensor([0.0419, 0.1812, 0.5369, 0.2399]) -Greedy action tensor([-1.6110, -0.2310, 0.6104, 0.0529]) tensor([0.0513, 0.2041, 0.4734, 0.2711]) -Greedy action tensor([-1.3405, 0.6906, 0.2427, 0.2299]) tensor([0.0546, 0.4165, 0.2661, 0.2627]) -Greedy action tensor([-1.6184, -0.4322, 0.5143, 0.0232]) tensor([0.0559, 0.1832, 0.4720, 0.2889]) -Greedy action tensor([-1.9312, -0.4281, 0.6599, -0.1689]) tensor([0.0405, 0.1823, 0.5410, 0.2362]) -Greedy action tensor([ 2.0086, -0.9248, -0.3285, 0.5066]) tensor([0.7286, 0.0388, 0.0704, 0.1622]) -Greedy action tensor([ 1.1846, -0.4760, -0.4024, 0.3108]) tensor([0.5519, 0.1049, 0.1129, 0.2304]) -Greedy action tensor([ 1.1310, -0.4404, -0.3870, 0.7441]) tensor([0.4748, 0.0986, 0.1041, 0.3225]) -Greedy action tensor([ 1.5777, -0.4614, -0.4084, 0.4571]) tensor([0.6276, 0.0817, 0.0861, 0.2046]) -Greedy action tensor([ 1.1929, -0.2310, -0.7474, 0.1075]) tensor([0.5807, 0.1398, 0.0834, 0.1961]) -Greedy action tensor([ 1.6601, -0.4475, -0.9665, 0.4943]) tensor([0.6642, 0.0807, 0.0480, 0.2070]) -Greedy action tensor([ 1.1896, -0.4066, -0.3684, 0.2601]) tensor([0.5531, 0.1121, 0.1165, 0.2183]) -Greedy action tensor([ 1.5857, -0.4203, -0.4713, 0.3541]) tensor([0.6434, 0.0866, 0.0823, 0.1878]) -Greedy action tensor([ 1.3946, -0.9976, -0.1096, 1.1441]) tensor([0.4780, 0.0437, 0.1062, 0.3721]) -Greedy action tensor([ 1.6816, -0.0690, -0.6972, 0.3159]) tensor([0.6572, 0.1141, 0.0609, 0.1677]) -Greedy action tensor([ 2.0622, 0.1988, -0.0832, 0.4565]) tensor([0.6789, 0.1053, 0.0794, 0.1363]) -Greedy action tensor([ 0.6046, -0.1791, -0.0398, -0.0411]) tensor([0.3991, 0.1822, 0.2095, 0.2092]) -Greedy action tensor([ 1.0250, -0.1565, -0.4715, 0.6629]) tensor([0.4490, 0.1378, 0.1005, 0.3127]) -Greedy action tensor([ 1.4352, -0.5991, -0.4420, 0.5021]) tensor([0.5963, 0.0780, 0.0912, 0.2345]) -Greedy action tensor([ 1.3987, -0.5569, -0.6262, 0.4943]) tensor([0.5959, 0.0843, 0.0787, 0.2412]) -Greedy action tensor([ 1.2716, -0.3265, -0.1533, 0.8798]) tensor([0.4720, 0.0955, 0.1135, 0.3190]) -Greedy action tensor([ 0.9355, -0.7009, 0.1607, 0.1699]) tensor([0.4716, 0.0918, 0.2173, 0.2193]) -Greedy action tensor([ 1.4657, -0.6856, -0.2524, 0.4089]) tensor([0.6085, 0.0708, 0.1092, 0.2115]) -Greedy action tensor([ 1.8020, -1.3643, 0.2514, 0.5376]) tensor([0.6507, 0.0274, 0.1380, 0.1838]) -Greedy action tensor([ 1.5429, -1.2512, -0.3263, -0.3220]) tensor([0.7298, 0.0446, 0.1126, 0.1130]) -Greedy action tensor([ 1.5382, -0.2793, -0.9126, 0.0172]) tensor([0.6816, 0.1107, 0.0588, 0.1489]) -Greedy action tensor([ 1.2206, -0.4592, -0.1695, 0.3148]) tensor([0.5436, 0.1013, 0.1354, 0.2197]) -Greedy action tensor([ 2.0017, -0.6763, -0.6257, 0.7385]) tensor([0.7024, 0.0483, 0.0508, 0.1986]) -Greedy action tensor([ 2.6845, -1.5711, -0.1935, 0.8700]) tensor([0.8108, 0.0115, 0.0456, 0.1321]) -Greedy action tensor([ 1.3019, -0.4634, -0.4733, 0.3054]) tensor([0.5849, 0.1001, 0.0991, 0.2159]) -Greedy action tensor([ 1.4309, -0.3960, -0.2528, 0.1826]) tensor([0.6122, 0.0985, 0.1137, 0.1757]) -Greedy action tensor([ 1.5270, -0.5788, -0.2495, 0.3312]) tensor([0.6276, 0.0764, 0.1062, 0.1898]) -Greedy action tensor([ 1.7997, -0.5091, -0.5983, 0.5934]) tensor([0.6713, 0.0667, 0.0610, 0.2009]) -Greedy action tensor([ 1.7756, -0.8611, -0.3801, 0.5410]) tensor([0.6764, 0.0484, 0.0783, 0.1968]) -Greedy action tensor([ 1.3116, -0.3211, -0.1938, -0.1753]) tensor([0.6085, 0.1189, 0.1350, 0.1376]) -Greedy action tensor([ 1.4893, -0.1480, -0.0626, 0.4427]) tensor([0.5690, 0.1107, 0.1205, 0.1998]) -Greedy action tensor([ 1.4806, -0.0280, -0.3646, 0.0994]) tensor([0.6133, 0.1357, 0.0969, 0.1541]) -Greedy action tensor([ 1.0728, -0.5011, -0.2080, 0.5136]) tensor([0.4862, 0.1008, 0.1351, 0.2780]) -Greedy action tensor([ 1.3022, -0.6631, -0.1132, -0.1362]) tensor([0.6172, 0.0865, 0.1499, 0.1465]) -Greedy action tensor([ 1.9822, -0.4314, -1.0748, 0.3131]) tensor([0.7547, 0.0675, 0.0355, 0.1422]) -Greedy action tensor([ 2.0387, 0.2580, -0.0765, 0.5709]) tensor([0.6581, 0.1109, 0.0794, 0.1516]) -Greedy action tensor([ 1.8832, -1.0306, 0.0204, 0.5305]) tensor([0.6812, 0.0370, 0.1057, 0.1761]) -Greedy action tensor([ 1.4623, -0.1689, -0.3562, 0.1727]) tensor([0.6123, 0.1198, 0.0993, 0.1686]) -Greedy action tensor([ 1.2675, -0.4818, -0.3321, 0.5011]) tensor([0.5433, 0.0945, 0.1097, 0.2525]) -Greedy action tensor([ 0.9452, -0.1622, -0.1815, -0.0112]) tensor([0.4905, 0.1621, 0.1590, 0.1885]) -Greedy action tensor([ 0.7355, -0.2243, -0.0430, 0.0251]) tensor([0.4285, 0.1641, 0.1967, 0.2106]) -Greedy action tensor([ 1.7526, -0.8028, -0.2389, 0.2992]) tensor([0.6906, 0.0536, 0.0943, 0.1615]) -Greedy action tensor([ 1.4403, -0.4542, -0.0054, 0.4372]) tensor([0.5705, 0.0858, 0.1344, 0.2092]) -Greedy action tensor([ 1.4662, -0.3706, -1.1535, 0.2525]) tensor([0.6539, 0.1042, 0.0476, 0.1943]) -Greedy action tensor([ 1.9606, -0.8382, -0.4832, 0.7052]) tensor([0.6980, 0.0425, 0.0606, 0.1989]) -Greedy action tensor([ 1.5378, -0.4053, -0.4146, 0.5463]) tensor([0.6038, 0.0865, 0.0857, 0.2240]) -Greedy action tensor([ 1.8305, -0.7542, -0.4596, 0.5155]) tensor([0.6920, 0.0522, 0.0701, 0.1858]) -Greedy action tensor([ 2.0384, -1.2076, -0.0834, 0.8778]) tensor([0.6793, 0.0264, 0.0814, 0.2128]) -Greedy action tensor([1.6962, 0.1292, 0.3514, 0.6277]) tensor([0.5516, 0.1151, 0.1437, 0.1895]) -Greedy action tensor([ 1.4467, -0.5379, 0.0079, 0.1166]) tensor([0.6101, 0.0839, 0.1447, 0.1613]) -Greedy action tensor([ 1.4005, -0.5079, -0.4285, 0.4150]) tensor([0.5945, 0.0882, 0.0955, 0.2219]) -Greedy action tensor([ 1.3543, -0.2324, -1.1973, 0.5158]) tensor([0.5831, 0.1193, 0.0455, 0.2521]) -Greedy action tensor([ 1.9621, -1.2110, -0.1324, 0.5784]) tensor([0.7064, 0.0296, 0.0870, 0.1771]) -Greedy action tensor([ 1.4349, -0.5750, -0.3824, 0.4033]) tensor([0.6050, 0.0811, 0.0983, 0.2156]) -Greedy action tensor([ 2.1263, -0.9736, -0.3521, 0.4230]) tensor([0.7628, 0.0344, 0.0640, 0.1389]) -Greedy action tensor([ 1.1907, -0.3636, -0.4774, 0.4524]) tensor([0.5325, 0.1125, 0.1004, 0.2545]) -Greedy action tensor([ 1.0937, -0.0323, -0.3552, 0.1176]) tensor([0.5165, 0.1675, 0.1213, 0.1946]) -Greedy action tensor([ 1.4776, 0.3427, 0.0300, -0.2869]) tensor([0.5788, 0.1860, 0.1361, 0.0991]) -Greedy action tensor([ 1.5108, -0.9002, -0.0672, 0.9130]) tensor([0.5417, 0.0486, 0.1118, 0.2979]) -Greedy action tensor([ 1.8081, -0.2224, -0.4229, -0.1230]) tensor([0.7227, 0.0949, 0.0776, 0.1048]) -Greedy action tensor([ 0.7863, -0.3944, -0.1424, 0.0679]) tensor([0.4567, 0.1402, 0.1804, 0.2226]) -Greedy action tensor([ 1.2843, -0.5241, -0.2924, 0.5606]) tensor([0.5389, 0.0883, 0.1114, 0.2614]) -Greedy action tensor([ 1.3850, -0.0701, -0.3321, 0.6829]) tensor([0.5240, 0.1223, 0.0941, 0.2596]) -Greedy action tensor([ 1.0361, -0.2357, -0.1163, -0.0115]) tensor([0.5136, 0.1440, 0.1622, 0.1802]) -Greedy action tensor([ 1.4462, -0.4070, -0.7521, 0.2616]) tensor([0.6355, 0.0996, 0.0705, 0.1944]) -Greedy action tensor([ 1.7733, -0.7688, -0.4307, 0.4678]) tensor([0.6849, 0.0539, 0.0756, 0.1856]) -Greedy action tensor([ 1.3140, -0.7182, -0.3653, 0.6555]) tensor([0.5449, 0.0714, 0.1016, 0.2821]) -Greedy action tensor([ 1.7550, -0.4936, -0.4760, 0.0302]) tensor([0.7188, 0.0759, 0.0772, 0.1281]) -Greedy action tensor([ 2.2202, 0.3237, -0.2396, -0.0694]) tensor([0.7480, 0.1123, 0.0639, 0.0758]) -Greedy action tensor([ 1.9902, -0.2474, -0.1559, 0.2722]) tensor([0.7127, 0.0761, 0.0833, 0.1279]) -Greedy action tensor([ 1.3602, -0.2085, -0.5636, 0.1731]) tensor([0.6026, 0.1255, 0.0880, 0.1838]) -Greedy action tensor([ 1.2329, 0.1268, -0.6223, -0.0599]) tensor([0.5676, 0.1878, 0.0888, 0.1558]) -Greedy action tensor([ 1.0243, -0.4536, 0.1201, -0.2472]) tensor([0.5226, 0.1192, 0.2116, 0.1466]) -Greedy action tensor([ 1.8684, -0.7198, -0.2272, 0.0559]) tensor([0.7345, 0.0552, 0.0903, 0.1199]) -Greedy action tensor([ 2.1518, -0.5530, -0.1972, 0.4881]) tensor([0.7398, 0.0495, 0.0706, 0.1401]) -Greedy action tensor([ 1.4627, -0.3739, -0.3512, 0.1120]) tensor([0.6323, 0.1008, 0.1031, 0.1638]) -Greedy action tensor([ 1.7086, -0.3795, -0.3541, 0.4127]) tensor([0.6559, 0.0813, 0.0834, 0.1795]) -Greedy action tensor([ 1.2885, -0.1258, -0.6752, 0.3389]) tensor([0.5649, 0.1373, 0.0793, 0.2185]) -Greedy action tensor([ 1.5393, -0.7268, -0.5447, 0.7029]) tensor([0.6019, 0.0624, 0.0749, 0.2608]) -Greedy action tensor([ 1.2929, -0.1925, -0.7085, 0.3637]) tensor([0.5693, 0.1289, 0.0769, 0.2248]) -Greedy action tensor([ 1.6099, 0.7257, -0.2728, -0.2589]) tensor([0.5815, 0.2402, 0.0885, 0.0897]) -Greedy action tensor([ 1.0348, -0.5132, 0.1341, -0.3927]) tensor([0.5380, 0.1144, 0.2186, 0.1291]) -Greedy action tensor([ 0.8157, -0.3928, -0.1156, -0.2400]) tensor([0.4900, 0.1464, 0.1931, 0.1705]) -Greedy action tensor([ 0.6163, -0.2682, -0.1006, -0.3714]) tensor([0.4398, 0.1816, 0.2147, 0.1638]) -Greedy action tensor([ 0.7501, -0.4557, -0.1961, -0.0064]) tensor([0.4636, 0.1388, 0.1800, 0.2176]) -Greedy action tensor([ 0.7135, -0.4694, -0.0387, 0.0134]) tensor([0.4397, 0.1347, 0.2072, 0.2183]) -Greedy action tensor([ 1.4267, -1.1021, -0.3929, -0.6735]) tensor([0.7330, 0.0585, 0.1188, 0.0897]) -Greedy action tensor([ 0.7480, -0.6679, -0.1305, -0.2828]) tensor([0.4963, 0.1205, 0.2062, 0.1770]) -Greedy action tensor([ 0.6923, -0.3162, -0.1512, -0.3603]) tensor([0.4664, 0.1701, 0.2007, 0.1628]) -Greedy action tensor([ 0.8066, -0.4661, -0.0144, -0.2214]) tensor([0.4813, 0.1348, 0.2118, 0.1722]) -Greedy action tensor([ 0.7950, -0.2730, -0.0264, -0.1213]) tensor([0.4580, 0.1574, 0.2014, 0.1832]) -Greedy action tensor([ 0.7972, -0.4454, -0.0116, -0.5366]) tensor([0.5006, 0.1445, 0.2230, 0.1319]) -Greedy action tensor([ 0.8230, -0.4495, 0.0954, -0.2477]) tensor([0.4748, 0.1330, 0.2294, 0.1628]) -Greedy action tensor([ 0.8737, -0.7501, 0.0065, -0.4708]) tensor([0.5325, 0.1050, 0.2237, 0.1388]) -Greedy action tensor([ 0.8181, 0.0087, 0.1761, -0.1153]) tensor([0.4229, 0.1882, 0.2226, 0.1663]) -Greedy action tensor([ 0.8171, -0.5524, 0.0962, -0.2108]) tensor([0.4766, 0.1212, 0.2318, 0.1705]) -Greedy action tensor([ 0.9615, -0.7655, 0.1836, -0.5425]) tensor([0.5378, 0.0956, 0.2470, 0.1195]) -Greedy action tensor([ 0.6828, -0.6061, -0.0842, -0.1352]) tensor([0.4584, 0.1263, 0.2129, 0.2023]) -Greedy action tensor([ 0.2899, -0.7603, -0.1806, -0.4960]) tensor([0.4115, 0.1440, 0.2571, 0.1875]) -Greedy action tensor([ 0.7215, -0.1771, -0.0801, -0.0110]) tensor([0.4280, 0.1743, 0.1920, 0.2057]) -Greedy action tensor([ 0.7383, -0.5023, -0.1169, -0.1595]) tensor([0.4713, 0.1363, 0.2004, 0.1920]) -Greedy action tensor([ 0.7272, -0.4469, -0.1025, -0.2652]) tensor([0.4726, 0.1461, 0.2061, 0.1752]) -Greedy action tensor([ 0.6087, -0.3432, -0.1625, -0.1221]) tensor([0.4292, 0.1657, 0.1985, 0.2067]) -Greedy action tensor([ 0.6579, 0.1142, -0.0398, 0.0574]) tensor([0.3807, 0.2210, 0.1895, 0.2088]) -Greedy action tensor([ 0.7564, -0.2756, -0.0943, -0.1159]) tensor([0.4543, 0.1618, 0.1940, 0.1899]) -Greedy action tensor([ 0.3548, -0.3978, 0.0356, -0.1523]) tensor([0.3571, 0.1683, 0.2595, 0.2151]) -Greedy action tensor([ 0.2703, 0.1291, 0.1583, -0.2274]) tensor([0.2967, 0.2576, 0.2653, 0.1804]) -Greedy action tensor([ 0.5921, -0.1512, 0.0113, -0.1490]) tensor([0.3982, 0.1893, 0.2227, 0.1898]) -Greedy action tensor([ 0.9437, -0.4002, -0.2560, -0.4063]) tensor([0.5490, 0.1432, 0.1654, 0.1423]) -Greedy action tensor([ 0.8146, -0.2532, -0.0417, -0.1158]) tensor([0.4623, 0.1589, 0.1964, 0.1823]) -Greedy action tensor([ 0.9107, -0.6588, 0.0414, -0.1830]) tensor([0.5096, 0.1061, 0.2136, 0.1707]) -Greedy action tensor([ 0.8129, -0.5150, -0.0297, -0.3659]) tensor([0.4992, 0.1323, 0.2149, 0.1536]) -Greedy action tensor([ 0.9005, -0.7098, -0.0015, -0.4963]) tensor([0.5397, 0.1078, 0.2190, 0.1335]) -Greedy action tensor([ 0.9878, -0.5859, -0.0077, -0.4041]) tensor([0.5478, 0.1136, 0.2024, 0.1362]) -Greedy action tensor([ 0.7182, -0.5224, 0.0243, -0.1521]) tensor([0.4530, 0.1310, 0.2263, 0.1897]) -Greedy action tensor([ 0.2993, -0.3655, -0.1103, -0.0911]) tensor([0.3502, 0.1802, 0.2325, 0.2370]) -Greedy action tensor([ 0.5420, -0.0943, 0.1059, -0.0500]) tensor([0.3664, 0.1939, 0.2369, 0.2027]) -Greedy action tensor([ 0.7480, -0.4622, -0.0743, -0.0416]) tensor([0.4563, 0.1360, 0.2005, 0.2072]) -Greedy action tensor([ 0.7517, -0.7493, -0.0020, -0.2577]) tensor([0.4859, 0.1083, 0.2287, 0.1771]) -Greedy action tensor([ 0.3423, -0.1632, 0.0312, -0.4019]) tensor([0.3558, 0.2146, 0.2606, 0.1690]) -Greedy action tensor([ 0.7488, -0.4996, -0.0583, -0.1983]) tensor([0.4715, 0.1353, 0.2103, 0.1829]) -Greedy action tensor([ 0.2083, 0.0952, -0.1359, -0.2280]) tensor([0.3079, 0.2749, 0.2182, 0.1990]) -Greedy action tensor([ 0.7964, -0.4413, -0.1946, -0.2598]) tensor([0.4978, 0.1444, 0.1848, 0.1731]) -Greedy action tensor([ 0.6619, -0.4238, -0.1000, -0.2889]) tensor([0.4564, 0.1541, 0.2131, 0.1764]) -Greedy action tensor([ 1.0868, -0.6421, -0.0254, -0.5032]) tensor([0.5847, 0.1038, 0.1923, 0.1192]) -Greedy action tensor([ 0.8091, -0.3318, 0.0450, -0.2512]) tensor([0.4691, 0.1499, 0.2185, 0.1625]) -Greedy action tensor([ 0.2407, -0.1909, -0.0789, -0.2329]) tensor([0.3335, 0.2166, 0.2423, 0.2077]) -Greedy action tensor([ 0.6191, -0.7842, -0.1439, -0.1798]) tensor([0.4626, 0.1137, 0.2157, 0.2081]) -Greedy action tensor([ 0.4960, 0.0457, 0.0174, -0.5760]) tensor([0.3847, 0.2452, 0.2384, 0.1317]) -Greedy action tensor([ 0.8057, -0.3994, -0.1300, -0.5485]) tensor([0.5128, 0.1537, 0.2012, 0.1324]) -Greedy action tensor([ 0.8764, -0.2383, -0.0369, -0.0646]) tensor([0.4718, 0.1548, 0.1893, 0.1841]) -Greedy action tensor([ 0.8277, -1.1114, -0.0811, -0.5227]) tensor([0.5537, 0.0796, 0.2231, 0.1435]) -Greedy action tensor([ 0.7232, -0.8869, -0.0490, -0.3310]) tensor([0.4974, 0.0994, 0.2298, 0.1733]) -Greedy action tensor([ 0.7994, -0.2473, -0.0713, -0.0435]) tensor([0.4545, 0.1596, 0.1903, 0.1956]) -Greedy action tensor([ 0.3255, -0.0506, -0.1215, 0.1545]) tensor([0.3156, 0.2166, 0.2018, 0.2660]) -Greedy action tensor([ 0.8901, -0.3612, -0.0679, -0.2090]) tensor([0.4993, 0.1428, 0.1915, 0.1663]) -Greedy action tensor([ 0.6296, -0.3109, -0.1689, -0.2281]) tensor([0.4416, 0.1724, 0.1987, 0.1873]) -Greedy action tensor([ 0.9480, -0.5896, -0.0325, -0.2971]) tensor([0.5325, 0.1144, 0.1997, 0.1533]) -Greedy action tensor([ 0.2136, 0.3430, -0.1895, 0.0887]) tensor([0.2711, 0.3085, 0.1812, 0.2393]) -Greedy action tensor([ 1.1706, -0.6826, -0.1107, -0.2507]) tensor([0.5967, 0.0935, 0.1657, 0.1441]) -Greedy action tensor([ 0.6809, 0.3096, 0.0433, -0.2852]) tensor([0.3848, 0.2654, 0.2034, 0.1464]) -Greedy action tensor([ 0.4061, 0.5452, -0.2498, 0.3738]) tensor([0.2750, 0.3160, 0.1427, 0.2663]) -Greedy action tensor([ 0.9422, -0.3409, -0.1062, -0.1772]) tensor([0.5117, 0.1418, 0.1794, 0.1671]) -Greedy action tensor([ 0.3195, 0.0977, -0.1635, -0.2843]) tensor([0.3373, 0.2702, 0.2081, 0.1844]) -Greedy action tensor([ 0.5760, -0.2956, 0.0319, -0.2433]) tensor([0.4099, 0.1715, 0.2379, 0.1807]) -Greedy action tensor([ 0.9467, -0.4205, -0.2701, -0.2143]) tensor([0.5364, 0.1367, 0.1589, 0.1680]) -Greedy action tensor([ 0.6704, -0.2625, -0.0115, -0.2021]) tensor([0.4316, 0.1698, 0.2182, 0.1804]) -Greedy action tensor([ 0.5518, 0.3760, -0.2246, 0.2289]) tensor([0.3308, 0.2775, 0.1522, 0.2395]) -Greedy action tensor([ 0.8378, -0.6217, 0.0764, -0.1714]) tensor([0.4845, 0.1126, 0.2263, 0.1766]) -Greedy action tensor([ 0.7001, -0.4413, -0.1063, -0.2046]) tensor([0.4607, 0.1471, 0.2057, 0.1864]) -Greedy action tensor([ 0.5624, 0.1271, -0.0250, -0.1193]) tensor([0.3692, 0.2389, 0.2052, 0.1867]) -Greedy action tensor([ 0.3466, 0.4068, -0.1473, 0.2353]) tensor([0.2804, 0.2977, 0.1711, 0.2508]) -Greedy action tensor([ 0.9891, -0.9154, 0.1812, -0.4706]) tensor([0.5474, 0.0815, 0.2440, 0.1272]) -Greedy action tensor([ 0.4312, 0.3886, 0.0261, -0.0386]) tensor([0.3077, 0.2948, 0.2052, 0.1923]) -Greedy action tensor([ 0.8470, -0.5865, 0.0399, -0.3678]) tensor([0.5047, 0.1204, 0.2252, 0.1498]) -Greedy action tensor([ 0.4546, -0.0906, -0.1281, -0.3436]) tensor([0.3864, 0.2240, 0.2158, 0.1739]) -Greedy action tensor([ 0.7476, -0.1971, 0.0535, -0.0828]) tensor([0.4303, 0.1673, 0.2149, 0.1875]) -Greedy action tensor([ 0.3620, -0.3282, 0.0634, -0.5738]) tensor([0.3794, 0.1903, 0.2815, 0.1488]) -Greedy action tensor([ 0.9066, -0.6310, -0.1161, -0.4180]) tensor([0.5433, 0.1168, 0.1954, 0.1445]) -Greedy action tensor([ 0.9317, -0.6905, 0.1096, -0.3110]) tensor([0.5193, 0.1025, 0.2282, 0.1499]) -Greedy action tensor([ 0.3589, -0.2551, 0.1142, -0.3613]) tensor([0.3558, 0.1925, 0.2786, 0.1731]) -Greedy action tensor([ 0.1137, 0.3009, -0.1300, 0.0592]) tensor([0.2540, 0.3063, 0.1991, 0.2406]) -Greedy action tensor([0.2699, 0.4038, 1.1146, 0.3796]) tensor([0.1790, 0.2047, 0.4166, 0.1998]) -Greedy action tensor([-0.3675, 0.0099, -0.9342, 0.5732]) tensor([0.1790, 0.2610, 0.1015, 0.4585]) -Greedy action tensor([-0.1397, -0.9984, 0.7172, -1.1583]) tensor([0.2415, 0.1023, 0.5690, 0.0872]) -Greedy action tensor([ 0.2917, -0.4002, -0.1013, 0.4740]) tensor([0.2963, 0.1483, 0.2000, 0.3555]) -Greedy action tensor([ 1.8037, -1.2369, 0.7608, 0.5152]) tensor([0.5967, 0.0285, 0.2103, 0.1645]) -Greedy action tensor([-0.1081, -2.2797, -0.4072, 0.3048]) tensor([0.2970, 0.0339, 0.2202, 0.4489]) -Greedy action tensor([ 0.6173, 0.1467, -1.6051, 0.5881]) tensor([0.3698, 0.2310, 0.0401, 0.3591]) -Greedy action tensor([ 0.2502, -0.7455, 0.5767, 0.1182]) tensor([0.2753, 0.1017, 0.3816, 0.2413]) -Greedy action tensor([ 0.0568, 0.0303, -0.4123, -0.4603]) tensor([0.3129, 0.3047, 0.1958, 0.1866]) -Greedy action tensor([-0.1882, 0.5556, -0.4041, -1.1912]) tensor([0.2338, 0.4920, 0.1884, 0.0858]) -Greedy action tensor([-0.0452, -1.5120, 0.4682, -0.0734]) tensor([0.2581, 0.0595, 0.4313, 0.2510]) -Greedy action tensor([-0.6980, -0.2959, -0.4546, -1.1070]) tensor([0.2255, 0.3371, 0.2876, 0.1498]) -Greedy action tensor([ 0.3416, 0.3779, 1.2863, -0.7291]) tensor([0.2019, 0.2094, 0.5194, 0.0692]) -Greedy action tensor([ 0.1782, -0.1371, -0.2128, -0.2924]) tensor([0.3300, 0.2407, 0.2232, 0.2061]) -Greedy action tensor([-0.1984, -1.0256, -0.4156, 0.6808]) tensor([0.2150, 0.0940, 0.1730, 0.5180]) -Greedy action tensor([-0.1004, -0.5357, 0.1436, -0.4068]) tensor([0.2733, 0.1768, 0.3488, 0.2011]) -Greedy action tensor([ 0.3881, -0.5108, -0.9467, -0.5944]) tensor([0.4891, 0.1991, 0.1287, 0.1831]) -Greedy action tensor([-0.1099, 0.3391, 0.4057, -0.4386]) tensor([0.2016, 0.3158, 0.3376, 0.1451]) -Greedy action tensor([-0.4323, -0.6020, -0.6455, -0.1163]) tensor([0.2485, 0.2097, 0.2008, 0.3409]) -Greedy action tensor([ 0.1968, -0.6792, -0.4764, -0.5990]) tensor([0.4206, 0.1751, 0.2145, 0.1898]) -Greedy action tensor([ 1.3758, -0.2954, -0.0121, 0.2754]) tensor([0.5649, 0.1062, 0.1410, 0.1879]) -Greedy action tensor([-0.3726, -0.8955, -0.1665, 0.5994]) tensor([0.1830, 0.1085, 0.2249, 0.4837]) -Greedy action tensor([-0.2062, 0.7649, -0.7956, -0.2601]) tensor([0.1944, 0.5135, 0.1078, 0.1842]) -Greedy action tensor([ 1.7573, -1.8163, 0.4107, 0.1655]) tensor([0.6704, 0.0188, 0.1744, 0.1365]) -Greedy action tensor([-0.1258, -0.8729, 0.0239, -0.9947]) tensor([0.3274, 0.1551, 0.3802, 0.1373]) -Greedy action tensor([-0.6058, -0.2861, 0.3291, -0.3886]) tensor([0.1622, 0.2233, 0.4130, 0.2015]) -Greedy action tensor([-0.4861, -0.0295, 0.5571, -0.9815]) tensor([0.1659, 0.2620, 0.4710, 0.1011]) -Greedy action tensor([ 0.0513, 0.0823, -0.1622, -0.2496]) tensor([0.2794, 0.2882, 0.2257, 0.2068]) -Greedy action tensor([1.4526, 0.3866, 0.7449, 0.4826]) tensor([0.4512, 0.1554, 0.2224, 0.1710]) -Greedy action tensor([-0.8935, -0.4284, 0.4313, -0.9292]) tensor([0.1366, 0.2176, 0.5140, 0.1318]) -Greedy action tensor([ 0.1544, 0.2060, 0.0822, -0.7757]) tensor([0.2961, 0.3117, 0.2754, 0.1168]) -Greedy action tensor([-0.6780, -1.6173, 1.6274, -0.9628]) tensor([0.0822, 0.0321, 0.8239, 0.0618]) -Greedy action tensor([ 0.2514, -0.3862, -0.2437, 0.5662]) tensor([0.2851, 0.1507, 0.1737, 0.3905]) -Greedy action tensor([-0.6108, -0.0234, 1.3081, -0.7777]) tensor([0.0956, 0.1720, 0.6514, 0.0809]) -Greedy action tensor([ 0.1892, -0.4880, -0.0842, -0.4241]) tensor([0.3558, 0.1808, 0.2707, 0.1927]) -Greedy action tensor([-0.3470, -1.3740, -0.2014, -0.3625]) tensor([0.2858, 0.1023, 0.3305, 0.2814]) -Greedy action tensor([-0.3556, -0.7194, 0.0328, -0.3971]) tensor([0.2422, 0.1683, 0.3571, 0.2324]) -Greedy action tensor([-0.2250, 0.0679, 0.6272, -0.8189]) tensor([0.1909, 0.2559, 0.4477, 0.1054]) -Greedy action tensor([0.7812, 0.0625, 0.1740, 0.0781]) tensor([0.3957, 0.1929, 0.2156, 0.1959]) -Greedy action tensor([-1.3335, -0.5220, -0.6536, 0.7739]) tensor([0.0743, 0.1674, 0.1467, 0.6116]) -Greedy action tensor([1.0135, 0.3061, 0.0388, 0.0439]) tensor([0.4445, 0.2191, 0.1677, 0.1686]) -Greedy action tensor([ 0.2998, -0.5946, 1.5921, -0.7939]) tensor([0.1857, 0.0759, 0.6762, 0.0622]) -Greedy action tensor([-0.6733, -0.7583, -0.3027, -0.5694]) tensor([0.2234, 0.2052, 0.3236, 0.2479]) -Greedy action tensor([-1.1775, -1.2821, 0.5473, -0.4708]) tensor([0.1048, 0.0944, 0.5882, 0.2125]) -Greedy action tensor([ 0.5410, 0.4274, -0.3625, 0.3226]) tensor([0.3224, 0.2878, 0.1306, 0.2592]) -Greedy action tensor([-0.0493, 1.0030, 0.3987, 0.0431]) tensor([0.1532, 0.4389, 0.2398, 0.1681]) -Greedy action tensor([ 1.2285, -0.8213, 1.0699, 0.6293]) tensor([0.3951, 0.0509, 0.3371, 0.2170]) -Greedy action tensor([-1.1924, -0.1587, 1.3582, 0.0847]) tensor([0.0495, 0.1391, 0.6340, 0.1774]) -Greedy action tensor([-0.8832, -0.2107, 0.3083, -0.8497]) tensor([0.1373, 0.2689, 0.4519, 0.1419]) -Greedy action tensor([-0.2089, 0.2911, 0.4509, -0.3305]) tensor([0.1829, 0.3015, 0.3537, 0.1619]) -Greedy action tensor([ 0.5100, -0.2225, 0.6868, -0.2514]) tensor([0.3184, 0.1530, 0.3799, 0.1487]) -Greedy action tensor([ 0.3006, 0.1445, 0.0895, -0.3607]) tensor([0.3143, 0.2689, 0.2545, 0.1622]) -Greedy action tensor([ 0.3830, -0.3959, -0.3511, -1.4030]) tensor([0.4747, 0.2179, 0.2278, 0.0796]) -Greedy action tensor([ 0.5581, -0.1881, 0.1739, 0.2447]) tensor([0.3465, 0.1643, 0.2360, 0.2533]) -Greedy action tensor([ 0.1184, -1.3049, -0.4202, 0.2604]) tensor([0.3359, 0.0809, 0.1960, 0.3872]) -Greedy action tensor([ 0.3042, -0.7985, 0.2926, -0.5579]) tensor([0.3646, 0.1210, 0.3604, 0.1540]) -Greedy action tensor([-0.8795, -0.9309, -0.7375, -0.6529]) tensor([0.2295, 0.2180, 0.2645, 0.2879]) -Greedy action tensor([ 0.5887, -0.6789, 1.1860, -0.5068]) tensor([0.2913, 0.0820, 0.5293, 0.0974]) -Greedy action tensor([-1.5919, 0.4636, 0.2516, -0.4127]) tensor([0.0544, 0.4249, 0.3438, 0.1769]) -Greedy action tensor([-0.7043, -1.2185, 1.1043, -0.4230]) tensor([0.1108, 0.0663, 0.6761, 0.1468]) -Greedy action tensor([-0.8938, -1.0015, 1.4842, -1.1710]) tensor([0.0744, 0.0668, 0.8024, 0.0564]) -Greedy action tensor([ 1.3055, -0.9588, 0.4372, 0.5794]) tensor([0.4982, 0.0518, 0.2091, 0.2410]) -Greedy action tensor([ 0.3877, -0.6303, -0.2484, 0.1787]) tensor([0.3701, 0.1337, 0.1959, 0.3003]) -Greedy action tensor([-1.0103, -0.3046, 1.4536, -0.9402]) tensor([0.0631, 0.1278, 0.7414, 0.0677]) -Greedy action tensor([-1.0689, -0.2929, 0.2994, -1.0002]) tensor([0.1224, 0.2659, 0.4807, 0.1311]) -Greedy action tensor([-0.2170, 0.6206, 0.0754, -1.3719]) tensor([0.2014, 0.4654, 0.2698, 0.0635]) -Greedy action tensor([ 0.2424, 0.4912, -0.6074, 0.0146]) tensor([0.2852, 0.3658, 0.1219, 0.2271]) -Greedy action tensor([-0.7262, -0.8891, 0.1361, -0.6919]) tensor([0.1904, 0.1617, 0.4509, 0.1970]) -Greedy action tensor([-1.3817, 0.0793, -0.2510, -0.3247]) tensor([0.0886, 0.3819, 0.2745, 0.2550]) -Greedy action tensor([-0.1116, 0.5664, 0.7572, -0.3048]) tensor([0.1619, 0.3188, 0.3859, 0.1334]) -Greedy action tensor([ 0.4491, 0.0778, 0.5408, -0.8072]) tensor([0.3257, 0.2247, 0.3569, 0.0927]) -Greedy action tensor([ 0.1796, 0.2286, 0.0900, -0.5202]) tensor([0.2889, 0.3034, 0.2642, 0.1435]) -Greedy action tensor([1.1771, 0.5994, 0.1805, 0.1115]) tensor([0.4396, 0.2467, 0.1623, 0.1514]) -Greedy action tensor([-0.5004, -1.1830, -1.1045, -1.1431]) tensor([0.3879, 0.1960, 0.2120, 0.2040]) -Greedy action tensor([-0.2892, -1.8487, -1.2937, 0.7295]) tensor([0.2301, 0.0484, 0.0843, 0.6373]) -Greedy action tensor([ 0.1074, -1.2009, 1.0070, -0.4829]) tensor([0.2335, 0.0631, 0.5740, 0.1294]) -Greedy action tensor([-1.0616, -1.5924, -0.9459, 0.7669]) tensor([0.1119, 0.0658, 0.1256, 0.6966]) -Greedy action tensor([ 0.7687, -0.3900, -0.0473, 0.4518]) tensor([0.4025, 0.1263, 0.1780, 0.2932]) -Greedy action tensor([ 1.6866, -0.6822, 0.6210, 0.5672]) tensor([0.5667, 0.0530, 0.1952, 0.1850]) -Greedy action tensor([-0.6853, -0.9056, -0.1977, -1.5187]) tensor([0.2587, 0.2076, 0.4213, 0.1124]) -Greedy action tensor([-0.0805, -0.4506, -0.1579, -0.5248]) tensor([0.3070, 0.2120, 0.2841, 0.1969]) -Greedy action tensor([ 0.3759, -0.7984, 0.3907, 0.3786]) tensor([0.3006, 0.0929, 0.3051, 0.3014]) -Greedy action tensor([-1.6074, -0.5112, 0.7784, 0.2729]) tensor([0.0467, 0.1398, 0.5075, 0.3061]) -Greedy action tensor([-1.7978, -0.2858, 0.5792, -0.0823]) tensor([0.0457, 0.2074, 0.4926, 0.2542]) -Greedy action tensor([-0.9267, 0.6193, 0.1646, -0.0511]) tensor([0.0903, 0.4239, 0.2690, 0.2168]) -Greedy action tensor([-1.8064, -0.3535, 0.6083, -0.0933]) tensor([0.0454, 0.1943, 0.5083, 0.2520]) -Greedy action tensor([-1.8761, -0.4126, 0.6496, -0.1079]) tensor([0.0422, 0.1825, 0.5278, 0.2475]) -Greedy action tensor([-1.9183, -0.4395, 0.6507, -0.1703]) tensor([0.0414, 0.1814, 0.5397, 0.2375]) -Greedy action tensor([-1.9383, -0.4242, 0.6599, -0.1753]) tensor([0.0403, 0.1832, 0.5416, 0.2349]) -Greedy action tensor([-1.8231, -0.3328, 0.6222, -0.0992]) tensor([0.0443, 0.1966, 0.5108, 0.2483]) -Greedy action tensor([-1.8943, -0.4294, 0.6372, -0.1577]) tensor([0.0424, 0.1835, 0.5332, 0.2408]) -Greedy action tensor([-1.3775, 0.1982, 0.3047, 0.1660]) tensor([0.0629, 0.3042, 0.3384, 0.2945]) -Greedy action tensor([-1.9185, -0.3211, 0.6332, -0.1730]) tensor([0.0408, 0.2017, 0.5237, 0.2338]) -Greedy action tensor([-1.7716, -0.4661, 0.8357, 0.2685]) tensor([0.0385, 0.1422, 0.5228, 0.2965]) -Greedy action tensor([-1.8495, -0.2991, 0.5923, -0.1484]) tensor([0.0441, 0.2078, 0.5066, 0.2415]) -Greedy action tensor([-1.6746, -0.1182, 0.5808, -0.0368]) tensor([0.0490, 0.2322, 0.4670, 0.2518]) -Greedy action tensor([-1.9395, -0.4463, 0.6657, -0.1777]) tensor([0.0403, 0.1794, 0.5455, 0.2347]) -Greedy action tensor([-1.6825, 0.1021, 0.4492, -0.0468]) tensor([0.0487, 0.2903, 0.4108, 0.2502]) -Greedy action tensor([-1.8672, -0.4568, 0.6338, -0.1396]) tensor([0.0436, 0.1788, 0.5321, 0.2455]) -Greedy action tensor([-0.9365, 0.2771, 0.7436, 0.7830]) tensor([0.0653, 0.2198, 0.3504, 0.3645]) -Greedy action tensor([-1.9220, -0.4503, 0.6597, -0.1683]) tensor([0.0411, 0.1789, 0.5429, 0.2372]) -Greedy action tensor([-1.9002, -0.4523, 0.6577, -0.1495]) tensor([0.0418, 0.1778, 0.5396, 0.2407]) -Greedy action tensor([-1.8405, -0.3208, 0.6432, -0.1158]) tensor([0.0432, 0.1973, 0.5174, 0.2422]) -Greedy action tensor([-1.8069, -0.4805, 0.6090, -0.1042]) tensor([0.0466, 0.1756, 0.5220, 0.2558]) -Greedy action tensor([-1.8267, -0.4589, 0.6109, -0.1182]) tensor([0.0457, 0.1794, 0.5228, 0.2522]) -Greedy action tensor([-1.9323, -0.2729, 0.6316, -0.1749]) tensor([0.0399, 0.2099, 0.5186, 0.2315]) -Greedy action tensor([-1.9268, -0.4480, 0.6550, -0.1700]) tensor([0.0410, 0.1798, 0.5418, 0.2374]) -Greedy action tensor([-1.8634, -0.4047, 0.6438, -0.1039]) tensor([0.0428, 0.1839, 0.5248, 0.2485]) -Greedy action tensor([-1.9194, -0.4281, 0.6532, -0.1658]) tensor([0.0411, 0.1827, 0.5387, 0.2375]) -Greedy action tensor([-1.9148, -0.4188, 0.6519, -0.1627]) tensor([0.0412, 0.1841, 0.5369, 0.2378]) -Greedy action tensor([-1.0052, 0.8367, 0.0875, 0.3341]) tensor([0.0709, 0.4472, 0.2114, 0.2705]) -Greedy action tensor([-1.9429, -0.4459, 0.6660, -0.1791]) tensor([0.0402, 0.1795, 0.5458, 0.2344]) -Greedy action tensor([-1.5872, -0.3199, 0.5053, 0.0847]) tensor([0.0556, 0.1975, 0.4508, 0.2960]) -Greedy action tensor([-1.5611, 0.2403, 0.4557, 0.1288]) tensor([0.0500, 0.3030, 0.3759, 0.2711]) -Greedy action tensor([-1.3528, 0.3285, 0.3259, -0.0572]) tensor([0.0650, 0.3492, 0.3483, 0.2375]) -Greedy action tensor([-1.8153, -0.4494, 0.6065, -0.1129]) tensor([0.0461, 0.1808, 0.5198, 0.2532]) -Greedy action tensor([-1.7052, -0.2103, 0.5269, -0.1136]) tensor([0.0508, 0.2265, 0.4733, 0.2494]) -Greedy action tensor([-1.4666, -0.3589, 0.6558, 0.2005]) tensor([0.0566, 0.1713, 0.4725, 0.2997]) -Greedy action tensor([-1.8883, -0.3940, 0.6301, -0.1546]) tensor([0.0425, 0.1894, 0.5274, 0.2406]) -Greedy action tensor([-1.8755, -0.4853, 0.6324, -0.1397]) tensor([0.0435, 0.1748, 0.5346, 0.2470]) -Greedy action tensor([-1.4859, -0.5728, 0.7727, 0.4969]) tensor([0.0492, 0.1226, 0.4708, 0.3574]) -Greedy action tensor([-1.6030, -0.5434, 0.4867, -0.0117]) tensor([0.0592, 0.1709, 0.4789, 0.2909]) -Greedy action tensor([-1.8378, -0.2896, 0.6032, -0.1187]) tensor([0.0439, 0.2066, 0.5044, 0.2451]) -Greedy action tensor([-1.8214, -0.4427, 0.6043, -0.1168]) tensor([0.0459, 0.1823, 0.5193, 0.2525]) -Greedy action tensor([-1.9426, -0.4484, 0.6671, -0.1793]) tensor([0.0402, 0.1791, 0.5464, 0.2344]) -Greedy action tensor([-1.3803, 0.3769, 0.2861, 0.0089]) tensor([0.0621, 0.3600, 0.3287, 0.2492]) -Greedy action tensor([-1.5891, 0.4691, -0.1158, -0.8090]) tensor([0.0650, 0.5093, 0.2838, 0.1419]) -Greedy action tensor([-1.8799, -0.4409, 0.6782, -0.1130]) tensor([0.0417, 0.1758, 0.5384, 0.2441]) -Greedy action tensor([-1.8944, -0.4366, 0.6714, -0.1337]) tensor([0.0415, 0.1781, 0.5393, 0.2411]) -Greedy action tensor([-1.8757, -0.4579, 0.6403, -0.1442]) tensor([0.0432, 0.1783, 0.5346, 0.2439]) -Greedy action tensor([-1.5766, -0.4180, 0.5196, 0.0728]) tensor([0.0571, 0.1818, 0.4642, 0.2970]) -Greedy action tensor([-1.9300, -0.3412, 0.6401, -0.1672]) tensor([0.0403, 0.1976, 0.5270, 0.2351]) -Greedy action tensor([-1.9171, -0.3841, 0.6453, -0.1604]) tensor([0.0410, 0.1899, 0.5316, 0.2375]) -Greedy action tensor([-1.2718, -0.3511, 0.2918, 0.1931]) tensor([0.0793, 0.1991, 0.3786, 0.3430]) -Greedy action tensor([-1.6993, -0.4770, 0.5950, -0.0383]) tensor([0.0511, 0.1734, 0.5066, 0.2689]) -Greedy action tensor([-1.7479, -0.4626, 0.5645, -0.1259]) tensor([0.0506, 0.1828, 0.5106, 0.2560]) -Greedy action tensor([-1.6897, -0.3122, 0.5678, -0.0169]) tensor([0.0504, 0.1997, 0.4815, 0.2683]) -Greedy action tensor([-1.3698, -0.4927, 0.9538, 0.6257]) tensor([0.0477, 0.1146, 0.4869, 0.3507]) -Greedy action tensor([-1.8913, -0.4573, 0.6368, -0.1501]) tensor([0.0427, 0.1791, 0.5348, 0.2435]) -Greedy action tensor([-1.4794, -0.2666, 0.4045, -0.0277]) tensor([0.0657, 0.2211, 0.4325, 0.2807]) -Greedy action tensor([-1.8705, -0.4470, 0.6298, -0.1300]) tensor([0.0434, 0.1802, 0.5290, 0.2474]) -Greedy action tensor([-1.9409, -0.4468, 0.6673, -0.1764]) tensor([0.0402, 0.1792, 0.5459, 0.2348]) -Greedy action tensor([-1.6748, -0.3987, 0.5292, -0.0866]) tensor([0.0539, 0.1932, 0.4888, 0.2640]) -Greedy action tensor([-1.7895, -0.1642, 0.5443, -0.0559]) tensor([0.0453, 0.2303, 0.4677, 0.2566]) -Greedy action tensor([-1.7813, -0.4172, 0.5804, -0.1247]) tensor([0.0482, 0.1884, 0.5110, 0.2524]) -Greedy action tensor([-1.8789, -0.3472, 0.6289, -0.1091]) tensor([0.0421, 0.1946, 0.5165, 0.2469]) -Greedy action tensor([-0.9472, -0.2788, 0.1849, 0.0010]) tensor([0.1158, 0.2260, 0.3593, 0.2989]) -Greedy action tensor([-1.8721, -0.4376, 0.6269, -0.1434]) tensor([0.0435, 0.1825, 0.5291, 0.2449]) -Greedy action tensor([-1.1951, -0.0183, 0.2608, 0.0188]) tensor([0.0840, 0.2726, 0.3604, 0.2829]) -Greedy action tensor([-1.0990, 0.2181, 0.0469, 0.2597]) tensor([0.0850, 0.3172, 0.2672, 0.3306]) -Greedy action tensor([-1.8749, -0.4516, 0.6372, -0.1457]) tensor([0.0433, 0.1796, 0.5334, 0.2438]) -Greedy action tensor([-0.7352, 0.2304, 0.1016, -0.0605]) tensor([0.1266, 0.3325, 0.2923, 0.2486]) -Greedy action tensor([-1.7686, -0.3271, 0.5745, -0.1311]) tensor([0.0481, 0.2034, 0.5011, 0.2474]) -Greedy action tensor([-1.9269, -0.3646, 0.6469, -0.1646]) tensor([0.0405, 0.1930, 0.5308, 0.2358]) -Greedy action tensor([-0.5734, -0.3950, 0.7365, 1.0298]) tensor([0.0920, 0.1100, 0.3409, 0.4571]) -Greedy action tensor([-1.4736, -0.4828, 0.4194, 0.0796]) tensor([0.0664, 0.1789, 0.4409, 0.3139]) -Greedy action tensor([-1.2467, 0.2759, 0.2482, -0.0019]) tensor([0.0740, 0.3392, 0.3299, 0.2569]) -Greedy action tensor([-1.9048, -0.3210, 0.6284, -0.1534]) tensor([0.0413, 0.2011, 0.5198, 0.2378]) -Greedy action tensor([-1.8514, -0.3669, 0.6249, -0.1277]) tensor([0.0436, 0.1926, 0.5192, 0.2446]) -Greedy action tensor([-1.9158, -0.4220, 0.6491, -0.1670]) tensor([0.0413, 0.1840, 0.5372, 0.2375]) -Greedy action tensor([-1.6816, -0.1788, 0.4920, 0.0065]) tensor([0.0508, 0.2282, 0.4463, 0.2747]) -Greedy action tensor([-1.5312, -0.5691, 0.4468, 0.0711]) tensor([0.0633, 0.1655, 0.4572, 0.3140]) -Greedy action tensor([-1.2626, -0.5531, 0.3235, 0.3485]) tensor([0.0774, 0.1573, 0.3779, 0.3875]) -Greedy action tensor([ 0.8327, -0.1858, -0.1608, 0.2328]) tensor([0.4385, 0.1584, 0.1624, 0.2407]) -Greedy action tensor([ 1.4224, -0.1152, -0.2987, 0.3229]) tensor([0.5791, 0.1244, 0.1036, 0.1929]) -Greedy action tensor([ 1.4379, -0.3382, -0.7919, 0.5722]) tensor([0.5891, 0.0997, 0.0634, 0.2479]) -Greedy action tensor([ 1.3209, -0.7852, -0.1194, 0.2595]) tensor([0.5867, 0.0714, 0.1390, 0.2030]) -Greedy action tensor([ 0.9033, -0.3789, -0.0971, 0.4925]) tensor([0.4332, 0.1202, 0.1593, 0.2873]) -Greedy action tensor([ 1.2547, -0.2048, -0.6714, 0.0127]) tensor([0.5999, 0.1394, 0.0874, 0.1733]) -Greedy action tensor([ 1.3138, -0.1671, -0.2667, 0.0826]) tensor([0.5796, 0.1318, 0.1193, 0.1692]) -Greedy action tensor([ 0.8421, -0.4912, -0.0123, 0.3236]) tensor([0.4377, 0.1154, 0.1863, 0.2606]) -Greedy action tensor([ 0.5884, -0.7218, 0.1864, 0.0294]) tensor([0.3983, 0.1075, 0.2665, 0.2277]) -Greedy action tensor([ 0.8226, -0.1681, -0.1249, 0.1601]) tensor([0.4396, 0.1632, 0.1704, 0.2267]) -Greedy action tensor([ 1.1781, -0.4628, -0.5249, 0.1767]) tensor([0.5736, 0.1112, 0.1045, 0.2107]) -Greedy action tensor([ 1.4158, -0.5402, -0.4572, 0.2728]) tensor([0.6196, 0.0876, 0.0952, 0.1976]) -Greedy action tensor([ 2.0673, -0.3550, -0.6842, 0.5248]) tensor([0.7319, 0.0649, 0.0467, 0.1565]) -Greedy action tensor([ 1.5993, -0.6060, -0.0748, 0.3665]) tensor([0.6293, 0.0694, 0.1180, 0.1834]) -Greedy action tensor([ 1.5978, -0.4249, -0.6903, 0.5231]) tensor([0.6349, 0.0840, 0.0644, 0.2167]) -Greedy action tensor([ 1.3582e+00, -1.2754e-03, -3.7472e-01, 5.1499e-01]) tensor([0.5365, 0.1378, 0.0948, 0.2309]) -Greedy action tensor([2.0026, 0.2590, 0.1713, 0.2453]) tensor([0.6633, 0.1160, 0.1063, 0.1144]) -Greedy action tensor([ 2.2109, -1.0039, -0.3773, 0.6219]) tensor([0.7579, 0.0304, 0.0570, 0.1547]) -Greedy action tensor([ 1.3815, 0.3740, -0.2369, 0.4363]) tensor([0.5123, 0.1871, 0.1015, 0.1991]) -Greedy action tensor([ 1.4474, -0.5268, -0.2551, 0.4965]) tensor([0.5857, 0.0813, 0.1067, 0.2263]) -Greedy action tensor([ 1.1210, -0.2811, -0.0775, 0.1264]) tensor([0.5215, 0.1283, 0.1573, 0.1929]) -Greedy action tensor([ 1.3769, -0.4392, -0.2728, 0.0703]) tensor([0.6152, 0.1001, 0.1182, 0.1666]) -Greedy action tensor([ 1.7052, -1.0737, -0.1425, 0.5541]) tensor([0.6511, 0.0404, 0.1026, 0.2059]) -Greedy action tensor([ 1.0388, -0.4305, -0.2291, 0.1861]) tensor([0.5161, 0.1187, 0.1452, 0.2200]) -Greedy action tensor([ 1.3387, -0.5716, -0.4370, 0.6793]) tensor([0.5451, 0.0807, 0.0923, 0.2819]) -Greedy action tensor([ 1.8668, -0.4932, -0.3046, 0.6567]) tensor([0.6637, 0.0627, 0.0757, 0.1979]) -Greedy action tensor([ 1.0640, -0.4456, -0.4827, 0.4966]) tensor([0.4998, 0.1104, 0.1064, 0.2834]) -Greedy action tensor([ 2.0166, 0.4388, -0.2099, -0.2608]) tensor([0.7058, 0.1457, 0.0762, 0.0724]) -Greedy action tensor([ 0.2445, -0.2229, -0.1421, 0.1750]) tensor([0.3088, 0.1935, 0.2097, 0.2880]) -Greedy action tensor([ 1.8170, -0.5732, -0.6475, 0.5091]) tensor([0.6910, 0.0633, 0.0588, 0.1869]) -Greedy action tensor([ 1.6897, -0.7215, -0.2175, 0.3858]) tensor([0.6624, 0.0594, 0.0984, 0.1798]) -Greedy action tensor([ 1.7028, -0.5232, -0.3620, 0.4128]) tensor([0.6622, 0.0715, 0.0840, 0.1823]) -Greedy action tensor([ 1.4733, -0.2411, -0.5540, 0.2113]) tensor([0.6270, 0.1129, 0.0826, 0.1775]) -Greedy action tensor([ 1.7022, -0.6648, -0.1339, 0.5854]) tensor([0.6327, 0.0593, 0.1009, 0.2071]) -Greedy action tensor([ 1.8332, -0.8294, -0.2124, 0.1693]) tensor([0.7202, 0.0503, 0.0931, 0.1364]) -Greedy action tensor([ 1.1154, -0.3252, 0.1537, 0.0353]) tensor([0.5106, 0.1209, 0.1952, 0.1734]) -Greedy action tensor([ 2.1581, -0.5000, -0.4262, 0.0158]) tensor([0.7918, 0.0555, 0.0597, 0.0929]) -Greedy action tensor([ 1.8449, -0.3418, -0.3099, 0.4340]) tensor([0.6793, 0.0763, 0.0787, 0.1657]) -Greedy action tensor([ 1.3859, 0.0054, -0.1520, 0.5220]) tensor([0.5297, 0.1332, 0.1138, 0.2233]) -Greedy action tensor([ 1.3720, -0.3329, 0.0548, 0.5809]) tensor([0.5255, 0.0955, 0.1408, 0.2382]) -Greedy action tensor([ 1.3865, 0.4103, -0.3847, 0.3410]) tensor([0.5268, 0.1985, 0.0896, 0.1852]) -Greedy action tensor([ 1.3687, -0.5519, -0.2804, 0.3592]) tensor([0.5872, 0.0860, 0.1129, 0.2139]) -Greedy action tensor([ 1.8628, -0.7720, -0.2417, 0.3735]) tensor([0.7046, 0.0505, 0.0859, 0.1589]) -Greedy action tensor([ 1.5960, -0.5381, -0.7407, 0.5043]) tensor([0.6449, 0.0763, 0.0623, 0.2165]) -Greedy action tensor([ 1.0831, -0.5557, 0.1675, 0.4692]) tensor([0.4682, 0.0909, 0.1874, 0.2534]) -Greedy action tensor([ 1.4961, -0.4528, -0.3542, 0.2387]) tensor([0.6313, 0.0899, 0.0992, 0.1795]) -Greedy action tensor([ 1.8913, -0.5050, -0.3061, 0.5376]) tensor([0.6847, 0.0624, 0.0761, 0.1769]) -Greedy action tensor([ 1.3522, -0.1619, -0.6747, 0.0815]) tensor([0.6126, 0.1348, 0.0807, 0.1719]) -Greedy action tensor([ 1.4382, -0.2850, -0.2739, 0.2224]) tensor([0.6041, 0.1078, 0.1090, 0.1791]) -Greedy action tensor([ 2.3924, -1.0556, 0.3890, 0.5009]) tensor([0.7590, 0.0241, 0.1024, 0.1145]) -Greedy action tensor([ 1.5089, -0.7139, -0.1912, 0.3644]) tensor([0.6214, 0.0673, 0.1135, 0.1978]) -Greedy action tensor([ 1.2823, -0.5068, -0.1868, 0.1928]) tensor([0.5768, 0.0964, 0.1327, 0.1940]) -Greedy action tensor([ 1.7325, -0.3912, -0.5818, 0.5121]) tensor([0.6607, 0.0790, 0.0653, 0.1950]) -Greedy action tensor([ 0.7238, 0.0223, -0.2519, -0.0358]) tensor([0.4272, 0.2118, 0.1610, 0.1999]) -Greedy action tensor([ 1.3824, -0.5229, -0.4125, 0.0090]) tensor([0.6377, 0.0949, 0.1060, 0.1615]) -Greedy action tensor([ 1.2946, -0.3192, -0.3866, 0.4071]) tensor([0.5565, 0.1108, 0.1036, 0.2291]) -Greedy action tensor([ 2.5028, -0.4779, -0.2974, 0.6297]) tensor([0.7904, 0.0401, 0.0481, 0.1214]) -Greedy action tensor([ 1.5217, 0.0739, -0.4535, 0.2161]) tensor([0.6080, 0.1429, 0.0843, 0.1648]) -Greedy action tensor([ 1.1063, -0.0464, -0.8157, 0.5586]) tensor([0.4901, 0.1548, 0.0717, 0.2834]) -Greedy action tensor([ 0.8406, -0.1640, -0.0530, -0.2883]) tensor([0.4765, 0.1745, 0.1950, 0.1541]) -Greedy action tensor([ 0.6735, -0.1091, -0.1039, 0.3308]) tensor([0.3807, 0.1741, 0.1750, 0.2703]) -Greedy action tensor([ 1.8502, -1.1574, -0.0505, 0.8109]) tensor([0.6441, 0.0318, 0.0963, 0.2278]) -Greedy action tensor([ 1.5730, -0.5631, -0.6736, 0.2675]) tensor([0.6689, 0.0790, 0.0707, 0.1813]) -Greedy action tensor([ 1.7012, -0.5366, -0.3958, 0.2011]) tensor([0.6884, 0.0734, 0.0846, 0.1536]) -Greedy action tensor([ 1.8601, -0.9300, -0.0292, 0.2749]) tensor([0.7055, 0.0433, 0.1067, 0.1446]) -Greedy action tensor([ 1.3120, -0.2940, -0.9201, 0.4370]) tensor([0.5798, 0.1163, 0.0622, 0.2417]) -Greedy action tensor([ 1.1385, 0.1417, -0.8331, 0.4279]) tensor([0.5001, 0.1846, 0.0696, 0.2457]) -Greedy action tensor([ 1.4287, -0.3886, -0.5099, 0.2669]) tensor([0.6176, 0.1003, 0.0889, 0.1932]) -Greedy action tensor([ 1.4975, -0.5992, -0.6838, -0.0980]) tensor([0.6951, 0.0854, 0.0785, 0.1410]) -Greedy action tensor([ 2.1167, -1.2852, -0.1789, 0.3038]) tensor([0.7709, 0.0257, 0.0776, 0.1258]) -Greedy action tensor([ 0.7878, -0.1681, -0.1554, 0.2366]) tensor([0.4255, 0.1636, 0.1657, 0.2452]) -Greedy action tensor([ 1.3504, -0.6391, -0.9261, 0.3235]) tensor([0.6260, 0.0856, 0.0643, 0.2242]) -Greedy action tensor([ 1.2406, -0.6078, -0.3342, 0.3461]) tensor([0.5639, 0.0888, 0.1168, 0.2305]) -Greedy action tensor([ 1.7548, -0.2499, -1.1709, 0.2849]) tensor([0.7051, 0.0950, 0.0378, 0.1621]) -Greedy action tensor([ 1.7785, -0.5712, -0.1695, 0.2880]) tensor([0.6834, 0.0652, 0.0974, 0.1540]) -Greedy action tensor([ 2.4953, 0.0926, 0.2152, -0.0412]) tensor([0.7862, 0.0711, 0.0804, 0.0622]) -Greedy action tensor([ 1.5338, 0.0699, -0.0676, 0.3368]) tensor([0.5763, 0.1333, 0.1162, 0.1741]) -Greedy action tensor([ 1.3155, -0.2257, -0.2491, -0.0225]) tensor([0.5932, 0.1270, 0.1241, 0.1557]) -Greedy action tensor([ 2.0026, -0.7038, -0.3088, 0.2563]) tensor([0.7461, 0.0498, 0.0740, 0.1301]) -Greedy action tensor([ 2.5782, -1.5172, -0.2064, 0.5462]) tensor([0.8268, 0.0138, 0.0511, 0.1084]) -Greedy action tensor([ 1.5100, -0.8184, -0.4800, 0.1128]) tensor([0.6750, 0.0658, 0.0923, 0.1669]) -Greedy action tensor([ 0.6558, -0.0688, -0.0790, -0.2661]) tensor([0.4234, 0.2051, 0.2031, 0.1684]) -Greedy action tensor([ 0.6040, -0.3839, -0.1356, -0.0886]) tensor([0.4255, 0.1585, 0.2031, 0.2129]) -Greedy action tensor([ 0.8117, -0.9038, 0.0536, -0.2884]) tensor([0.5047, 0.0908, 0.2365, 0.1680]) -Greedy action tensor([ 0.5547, -0.1776, -0.0084, -0.1030]) tensor([0.3894, 0.1872, 0.2217, 0.2017]) -Greedy action tensor([ 0.8583, -0.3270, 0.0165, -0.1905]) tensor([0.4792, 0.1465, 0.2065, 0.1679]) -Greedy action tensor([ 0.7291, -0.3602, -0.2354, -0.2866]) tensor([0.4808, 0.1618, 0.1833, 0.1741]) -Greedy action tensor([ 0.8786, -0.5735, -0.0791, -0.2935]) tensor([0.5188, 0.1214, 0.1991, 0.1607]) -Greedy action tensor([ 0.7504, -0.4788, -0.1234, -0.6798]) tensor([0.5130, 0.1501, 0.2141, 0.1228]) -Greedy action tensor([ 0.6851, -0.3300, -0.0984, -0.1868]) tensor([0.4470, 0.1620, 0.2042, 0.1869]) -Greedy action tensor([0.6761, 0.0441, 0.0081, 0.0700]) tensor([0.3861, 0.2052, 0.1980, 0.2106]) -Greedy action tensor([ 0.3836, -0.1846, -0.1096, -0.4261]) tensor([0.3814, 0.2160, 0.2329, 0.1697]) -Greedy action tensor([ 0.6697, -0.5208, -0.0099, -0.3864]) tensor([0.4632, 0.1409, 0.2348, 0.1611]) -Greedy action tensor([ 0.6039, -0.1340, -0.0007, -0.1226]) tensor([0.3987, 0.1906, 0.2178, 0.1928]) -Greedy action tensor([ 0.3726, -0.0730, -0.1713, -0.0434]) tensor([0.3472, 0.2223, 0.2015, 0.2290]) -Greedy action tensor([ 0.4451, -0.2111, -0.0161, -0.2751]) tensor([0.3794, 0.1968, 0.2392, 0.1846]) -Greedy action tensor([ 0.6662, -0.1522, -0.0942, -0.3006]) tensor([0.4369, 0.1927, 0.2042, 0.1661]) -Greedy action tensor([ 0.6031, -0.2611, -0.0514, -0.0927]) tensor([0.4099, 0.1727, 0.2130, 0.2044]) -Greedy action tensor([ 1.0568, -0.6420, -0.1736, -0.3251]) tensor([0.5793, 0.1060, 0.1693, 0.1455]) -Greedy action tensor([ 0.4641, -0.3516, -0.1408, -0.2683]) tensor([0.4050, 0.1791, 0.2212, 0.1947]) -Greedy action tensor([ 0.8647, -0.4581, 0.1189, -0.5295]) tensor([0.5028, 0.1339, 0.2385, 0.1247]) -Greedy action tensor([ 0.7026, -0.4028, -0.0366, -0.2660]) tensor([0.4570, 0.1513, 0.2182, 0.1735]) -Greedy action tensor([ 0.3985, -0.1774, -0.0920, -0.1611]) tensor([0.3642, 0.2047, 0.2230, 0.2081]) -Greedy action tensor([ 0.9220, -0.7626, 0.0300, -0.3825]) tensor([0.5357, 0.0994, 0.2196, 0.1453]) -Greedy action tensor([ 0.4031, 0.0285, -0.0763, -0.3696]) tensor([0.3612, 0.2484, 0.2236, 0.1668]) -Greedy action tensor([ 0.6276, -0.2494, -0.0318, -0.5458]) tensor([0.4459, 0.1855, 0.2306, 0.1379]) -Greedy action tensor([ 0.3786, -0.1719, -0.1001, -0.0334]) tensor([0.3498, 0.2017, 0.2167, 0.2317]) -Greedy action tensor([ 0.8897, -0.5498, -0.1077, -0.5616]) tensor([0.5434, 0.1288, 0.2004, 0.1273]) -Greedy action tensor([ 0.3534, -0.2279, -0.1948, -0.6875]) tensor([0.4016, 0.2245, 0.2321, 0.1418]) -Greedy action tensor([ 0.7182, -0.2292, -0.1153, -0.1570]) tensor([0.4466, 0.1732, 0.1941, 0.1861]) -Greedy action tensor([ 0.8087, -0.4159, 0.1307, -0.1442]) tensor([0.4572, 0.1344, 0.2321, 0.1763]) -Greedy action tensor([ 0.1970, -0.0562, -0.1419, -0.0047]) tensor([0.3024, 0.2348, 0.2155, 0.2472]) -Greedy action tensor([ 1.0340, -0.6436, -0.0651, -0.4598]) tensor([0.5732, 0.1071, 0.1910, 0.1287]) -Greedy action tensor([ 0.7595, -0.6761, 0.2306, -0.5351]) tensor([0.4759, 0.1133, 0.2804, 0.1304]) -Greedy action tensor([ 0.8467, -0.3886, -0.0516, -0.3222]) tensor([0.4978, 0.1447, 0.2028, 0.1547]) -Greedy action tensor([ 1.1185, -0.0949, -0.1597, 0.0040]) tensor([0.5253, 0.1561, 0.1463, 0.1723]) -Greedy action tensor([ 0.8202, -0.2969, 0.0126, -0.1055]) tensor([0.4610, 0.1508, 0.2055, 0.1827]) -Greedy action tensor([ 0.8011, -0.8181, 0.0990, -0.8743]) tensor([0.5317, 0.1053, 0.2635, 0.0995]) -Greedy action tensor([ 7.3613e-01, -7.5521e-01, 5.0914e-04, -3.2455e-01]) tensor([0.4877, 0.1098, 0.2337, 0.1688]) -Greedy action tensor([ 0.7917, -0.2802, -0.0958, -0.3092]) tensor([0.4793, 0.1641, 0.1973, 0.1594]) -Greedy action tensor([ 0.7583, -0.6654, 0.0452, -0.2095]) tensor([0.4737, 0.1141, 0.2322, 0.1800]) -Greedy action tensor([ 0.4812, -0.0357, -0.0148, -0.3168]) tensor([0.3766, 0.2246, 0.2293, 0.1695]) -Greedy action tensor([ 0.4983, -0.4893, -0.0431, -0.1000]) tensor([0.3993, 0.1487, 0.2324, 0.2195]) -Greedy action tensor([ 0.9044, -0.4616, 0.0125, -0.2813]) tensor([0.5075, 0.1295, 0.2080, 0.1551]) -Greedy action tensor([ 0.6966, -0.4862, -0.0155, -0.3143]) tensor([0.4627, 0.1418, 0.2270, 0.1684]) -Greedy action tensor([ 1.0808, -1.0308, 0.2041, -0.5601]) tensor([0.5777, 0.0699, 0.2404, 0.1120]) -Greedy action tensor([ 0.6088, -0.5002, -0.0163, -0.3408]) tensor([0.4440, 0.1465, 0.2377, 0.1718]) -Greedy action tensor([ 0.1816, 0.0798, -0.1549, -0.2586]) tensor([0.3066, 0.2769, 0.2190, 0.1974]) -Greedy action tensor([ 0.8641, -0.6553, -0.0421, -0.2440]) tensor([0.5120, 0.1121, 0.2069, 0.1691]) -Greedy action tensor([ 0.4805, -0.1962, -0.0184, -0.1027]) tensor([0.3740, 0.1901, 0.2271, 0.2088]) -Greedy action tensor([ 0.6001, -0.1185, -0.0826, 0.0479]) tensor([0.3894, 0.1898, 0.1967, 0.2241]) -Greedy action tensor([ 1.0697, -0.7843, 0.2160, -0.6616]) tensor([0.5683, 0.0890, 0.2420, 0.1006]) -Greedy action tensor([ 0.5716, -0.0429, -0.0895, -0.0291]) tensor([0.3838, 0.2076, 0.1981, 0.2105]) -Greedy action tensor([ 0.7194, -0.0053, -0.1958, -0.1796]) tensor([0.4363, 0.2114, 0.1747, 0.1776]) -Greedy action tensor([ 1.1068, -0.5201, -0.0287, -0.2583]) tensor([0.5640, 0.1108, 0.1812, 0.1440]) -Greedy action tensor([ 1.1206, -1.0922, 0.0082, -0.4969]) tensor([0.6110, 0.0668, 0.2009, 0.1212]) -Greedy action tensor([ 0.3060, 0.1110, 0.0619, -0.7085]) tensor([0.3368, 0.2772, 0.2639, 0.1221]) -Greedy action tensor([ 0.4269, -0.0537, -0.0457, -0.0559]) tensor([0.3498, 0.2163, 0.2180, 0.2158]) -Greedy action tensor([ 1.1396, 0.6151, -0.1458, -0.1590]) tensor([0.4670, 0.2764, 0.1291, 0.1275]) -Greedy action tensor([ 0.6295, -0.5500, -0.0085, -0.0944]) tensor([0.4309, 0.1325, 0.2277, 0.2089]) -Greedy action tensor([ 0.6581, -0.4671, -0.1096, -0.1861]) tensor([0.4507, 0.1463, 0.2092, 0.1938]) -Greedy action tensor([ 0.3355, 0.1862, -0.0790, -0.4675]) tensor([0.3367, 0.2900, 0.2225, 0.1508]) -Greedy action tensor([ 0.5172, -0.1507, -0.0427, -0.1457]) tensor([0.3847, 0.1973, 0.2198, 0.1983]) -Greedy action tensor([ 0.4356, -0.1300, -0.0401, -0.0968]) tensor([0.3601, 0.2046, 0.2238, 0.2115]) -Greedy action tensor([ 0.7572, -0.4383, -0.1219, -0.2483]) tensor([0.4799, 0.1452, 0.1993, 0.1756]) -Greedy action tensor([ 0.5861, -0.0978, -0.0609, -0.3172]) tensor([0.4109, 0.2074, 0.2152, 0.1665]) -Greedy action tensor([ 0.8584, -0.3998, -0.1128, -0.2319]) tensor([0.5003, 0.1422, 0.1894, 0.1682]) -Greedy action tensor([ 0.5170, 0.0250, -0.2087, -0.3240]) tensor([0.3958, 0.2420, 0.1916, 0.1707]) -Greedy action tensor([ 0.7325, -0.3965, -0.0932, -0.1842]) tensor([0.4627, 0.1496, 0.2026, 0.1850]) -Greedy action tensor([ 0.8323, -0.6576, 0.0559, -0.5256]) tensor([0.5148, 0.1160, 0.2368, 0.1324]) -Greedy action tensor([ 1.0523, -0.5643, 0.0171, -0.2889]) tensor([0.5509, 0.1094, 0.1957, 0.1441]) -Greedy action tensor([ 0.9317, -0.3303, -0.0754, -0.2594]) tensor([0.5122, 0.1450, 0.1871, 0.1557]) -Greedy action tensor([ 0.7416, -0.0845, -0.0877, -0.1792]) tensor([0.4401, 0.1926, 0.1920, 0.1752]) -Greedy action tensor([ 1.0611, -0.5895, -0.1209, -0.3159]) tensor([0.5711, 0.1096, 0.1751, 0.1441]) -Greedy action tensor([ 0.4042, 0.0080, -0.1230, -0.0263]) tensor([0.3432, 0.2310, 0.2026, 0.2232]) -Greedy action tensor([ 0.5217, 0.0788, 0.0020, -0.2021]) tensor([0.3674, 0.2359, 0.2185, 0.1782]) -Greedy action tensor([ 0.6694, 0.0954, -0.3277, -0.2010]) tensor([0.4253, 0.2396, 0.1569, 0.1781]) -Greedy action tensor([ 1.0483, -0.9461, 0.0898, -0.5461]) tensor([0.5805, 0.0790, 0.2226, 0.1179]) -Greedy action tensor([ 0.8687, -0.6309, -0.0621, -0.4867]) tensor([0.5332, 0.1190, 0.2102, 0.1375]) -Greedy action tensor([ 0.9668, -1.0139, -0.0228, -0.4369]) tensor([0.5697, 0.0786, 0.2118, 0.1400]) -Greedy action tensor([ 0.6339, -0.7571, 0.0217, -0.1981]) tensor([0.4492, 0.1118, 0.2435, 0.1955]) -Greedy action tensor([ 0.9310, -0.4600, 0.0555, -0.4492]) tensor([0.5216, 0.1298, 0.2174, 0.1312]) -Greedy action tensor([-1.7728, -0.3103, 0.6083, -0.0645]) tensor([0.0462, 0.1993, 0.4996, 0.2549]) -Greedy action tensor([-1.8938, -0.4066, 0.6415, -0.1432]) tensor([0.0420, 0.1859, 0.5302, 0.2419]) -Greedy action tensor([-1.9351, -0.4368, 0.6617, -0.1731]) tensor([0.0405, 0.1810, 0.5429, 0.2356]) -Greedy action tensor([-1.5768, -0.4047, 0.6732, 0.4106]) tensor([0.0476, 0.1537, 0.4515, 0.3472]) -Greedy action tensor([-1.8782, -0.5038, 0.6333, -0.1492]) tensor([0.0436, 0.1725, 0.5379, 0.2459]) -Greedy action tensor([-1.2419, 0.8930, 0.1579, 0.4239]) tensor([0.0532, 0.4498, 0.2157, 0.2814]) -Greedy action tensor([-1.7928, -0.3083, 0.6813, -0.0117]) tensor([0.0431, 0.1900, 0.5112, 0.2557]) -Greedy action tensor([-1.9030, -0.3260, 0.6304, -0.1435]) tensor([0.0412, 0.1996, 0.5195, 0.2396]) -Greedy action tensor([-1.8714, -0.3427, 0.6170, -0.1276]) tensor([0.0428, 0.1973, 0.5152, 0.2447]) -Greedy action tensor([-1.6733, -0.1542, 0.5505, 0.1355]) tensor([0.0478, 0.2184, 0.4419, 0.2918]) -Greedy action tensor([-1.9288, -0.4079, 0.6545, -0.1714]) tensor([0.0406, 0.1859, 0.5379, 0.2355]) -Greedy action tensor([-1.9097, -0.4365, 0.6523, -0.1596]) tensor([0.0415, 0.1812, 0.5383, 0.2390]) -Greedy action tensor([-1.9418, -0.4373, 0.6632, -0.1786]) tensor([0.0402, 0.1811, 0.5442, 0.2345]) -Greedy action tensor([-1.7820, -0.3999, 0.6012, -0.0780]) tensor([0.0469, 0.1868, 0.5084, 0.2578]) -Greedy action tensor([-1.9351, -0.4292, 0.6616, -0.1722]) tensor([0.0404, 0.1821, 0.5421, 0.2355]) -Greedy action tensor([-1.6377, -0.4556, 0.6209, 0.1861]) tensor([0.0499, 0.1628, 0.4779, 0.3094]) -Greedy action tensor([-0.9829, -0.6083, 0.1590, 0.6225]) tensor([0.0946, 0.1376, 0.2965, 0.4713]) -Greedy action tensor([-1.9315, -0.4492, 0.6641, -0.1710]) tensor([0.0406, 0.1788, 0.5444, 0.2362]) -Greedy action tensor([-1.9234, -0.4516, 0.6586, -0.1709]) tensor([0.0411, 0.1789, 0.5431, 0.2369]) -Greedy action tensor([-1.9381, -0.4494, 0.6649, -0.1768]) tensor([0.0404, 0.1790, 0.5455, 0.2351]) -Greedy action tensor([-1.9398, -0.4372, 0.6632, -0.1769]) tensor([0.0403, 0.1810, 0.5439, 0.2348]) -Greedy action tensor([-1.7068, -0.3423, 0.5521, -0.0338]) tensor([0.0505, 0.1975, 0.4831, 0.2689]) -Greedy action tensor([-0.9957, 0.3050, 0.1419, 0.4962]) tensor([0.0817, 0.3001, 0.2549, 0.3633]) -Greedy action tensor([-1.7105, 0.1138, 0.5004, 0.0142]) tensor([0.0456, 0.2826, 0.4160, 0.2558]) -Greedy action tensor([-1.8959, -0.4537, 0.6471, -0.1560]) tensor([0.0423, 0.1789, 0.5379, 0.2409]) -Greedy action tensor([-1.9208, -0.4448, 0.6549, -0.1690]) tensor([0.0412, 0.1802, 0.5412, 0.2374]) -Greedy action tensor([-1.8898, -0.3492, 0.6111, -0.1592]) tensor([0.0425, 0.1986, 0.5188, 0.2401]) -Greedy action tensor([-1.7371, -0.4433, 0.6024, -0.0415]) tensor([0.0488, 0.1781, 0.5068, 0.2662]) -Greedy action tensor([-1.9340, -0.4091, 0.6562, -0.1722]) tensor([0.0404, 0.1857, 0.5387, 0.2353]) -Greedy action tensor([-1.8927, -0.3364, 0.6260, -0.1458]) tensor([0.0419, 0.1985, 0.5196, 0.2401]) -Greedy action tensor([-1.5899, -0.1634, 0.5538, 0.0729]) tensor([0.0527, 0.2195, 0.4497, 0.2780]) -Greedy action tensor([-1.7684, 0.0241, 0.5257, -0.1083]) tensor([0.0451, 0.2707, 0.4471, 0.2371]) -Greedy action tensor([-1.9106, -0.4442, 0.6490, -0.1638]) tensor([0.0417, 0.1806, 0.5388, 0.2390]) -Greedy action tensor([-1.2986, -0.4773, 0.4359, 0.2388]) tensor([0.0736, 0.1673, 0.4169, 0.3423]) -Greedy action tensor([-1.5451, 0.0762, 0.4010, -0.0511]) tensor([0.0571, 0.2889, 0.3997, 0.2543]) -Greedy action tensor([-1.8406, -0.4426, 0.6522, -0.0988]) tensor([0.0438, 0.1771, 0.5293, 0.2498]) -Greedy action tensor([-0.9328, 0.0898, 0.1685, 0.6978]) tensor([0.0841, 0.2337, 0.2529, 0.4293]) -Greedy action tensor([-1.9068, -0.4199, 0.6642, -0.1267]) tensor([0.0409, 0.1810, 0.5353, 0.2427]) -Greedy action tensor([ 0.6094, 1.0921, -0.1484, 0.4622]) tensor([0.2530, 0.4100, 0.1186, 0.2184]) -Greedy action tensor([-1.8672, -0.3814, 0.6137, -0.1320]) tensor([0.0434, 0.1918, 0.5187, 0.2461]) -Greedy action tensor([-1.6887, -0.3707, 0.8128, 0.4084]) tensor([0.0399, 0.1490, 0.4865, 0.3247]) -Greedy action tensor([-1.7620, -0.3315, 0.5594, -0.1010]) tensor([0.0485, 0.2026, 0.4938, 0.2551]) -Greedy action tensor([-1.6758, -0.3528, 0.6025, 0.0168]) tensor([0.0501, 0.1882, 0.4893, 0.2724]) -Greedy action tensor([-1.7759, -0.3548, 0.5957, -0.0704]) tensor([0.0468, 0.1939, 0.5016, 0.2577]) -Greedy action tensor([-1.8175, -0.3836, 0.6032, -0.1075]) tensor([0.0455, 0.1909, 0.5120, 0.2516]) -Greedy action tensor([-1.9228, -0.4323, 0.6578, -0.1675]) tensor([0.0409, 0.1817, 0.5405, 0.2368]) -Greedy action tensor([-1.8454, 0.0042, 0.0473, -0.2399]) tensor([0.0527, 0.3350, 0.3498, 0.2625]) -Greedy action tensor([-1.7992, -0.3840, 0.6464, -0.1086]) tensor([0.0453, 0.1865, 0.5226, 0.2456]) -Greedy action tensor([-1.9225, -0.3932, 0.6542, -0.1620]) tensor([0.0407, 0.1877, 0.5350, 0.2366]) -Greedy action tensor([-1.3850, 0.3079, 0.5540, -0.7312]) tensor([0.0653, 0.3550, 0.4541, 0.1256]) -Greedy action tensor([-1.8801, -0.3845, 0.6263, -0.1318]) tensor([0.0426, 0.1901, 0.5224, 0.2448]) -Greedy action tensor([-1.9233, -0.4395, 0.6548, -0.1701]) tensor([0.0411, 0.1811, 0.5408, 0.2370]) -Greedy action tensor([-1.4290, -0.4317, 0.4248, 0.0429]) tensor([0.0692, 0.1876, 0.4417, 0.3015]) -Greedy action tensor([-1.6517, -0.2807, 0.4725, 0.0295]) tensor([0.0535, 0.2109, 0.4479, 0.2876]) -Greedy action tensor([-1.9290, -0.4101, 0.6582, -0.1683]) tensor([0.0405, 0.1851, 0.5387, 0.2357]) -Greedy action tensor([-1.7896, -0.4988, 0.5919, -0.1353]) tensor([0.0483, 0.1758, 0.5231, 0.2528]) -Greedy action tensor([-1.9338, -0.4460, 0.6636, -0.1742]) tensor([0.0405, 0.1795, 0.5444, 0.2356]) -Greedy action tensor([-1.8972, -0.4466, 0.6474, -0.1534]) tensor([0.0422, 0.1798, 0.5370, 0.2411]) -Greedy action tensor([-1.5216, -0.4259, 0.5512, -0.3331]) tensor([0.0657, 0.1965, 0.5221, 0.2156]) -Greedy action tensor([-1.1830, -0.0964, 0.4521, 0.3708]) tensor([0.0723, 0.2144, 0.3711, 0.3421]) -Greedy action tensor([-1.9271, -0.4429, 0.6577, -0.1718]) tensor([0.0409, 0.1804, 0.5422, 0.2366]) -Greedy action tensor([-1.5639, -0.4011, 0.6708, 0.0502]) tensor([0.0539, 0.1723, 0.5033, 0.2706]) -Greedy action tensor([-1.9142, -0.4655, 0.7950, 0.0115]) tensor([0.0369, 0.1569, 0.5534, 0.2528]) -Greedy action tensor([-1.7985, -0.4149, 0.6180, -0.0764]) tensor([0.0459, 0.1831, 0.5143, 0.2568]) -Greedy action tensor([-1.8704, -0.4478, 0.6271, -0.1485]) tensor([0.0437, 0.1812, 0.5308, 0.2444]) -Greedy action tensor([-1.8326, -0.3237, 0.6129, -0.1026]) tensor([0.0441, 0.1992, 0.5082, 0.2485]) -Greedy action tensor([-1.0963, 0.4951, 0.3173, -0.2619]) tensor([0.0811, 0.3984, 0.3335, 0.1869]) -Greedy action tensor([-1.8782, -0.3796, 0.6263, -0.1472]) tensor([0.0428, 0.1916, 0.5239, 0.2417]) -Greedy action tensor([-1.3216, -0.3917, 0.3102, 0.0714]) tensor([0.0789, 0.1999, 0.4034, 0.3177]) -Greedy action tensor([-1.9411, -0.4459, 0.6659, -0.1786]) tensor([0.0403, 0.1795, 0.5457, 0.2345]) -Greedy action tensor([-1.8539, -0.1518, 0.5891, -0.1354]) tensor([0.0424, 0.2327, 0.4882, 0.2366]) -Greedy action tensor([-1.8531, -0.1904, 0.5953, -0.1768]) tensor([0.0431, 0.2274, 0.4989, 0.2305]) -Greedy action tensor([-1.9058, -0.3959, 0.6405, -0.1632]) tensor([0.0417, 0.1886, 0.5317, 0.2380]) -Greedy action tensor([-1.9415, -0.4451, 0.6662, -0.1790]) tensor([0.0402, 0.1796, 0.5458, 0.2344]) -Greedy action tensor([-1.6462, -0.3727, 0.6514, 0.1520]) tensor([0.0486, 0.1738, 0.4839, 0.2937]) -Greedy action tensor([-1.8093, -0.4089, 0.5941, -0.1304]) tensor([0.0466, 0.1889, 0.5150, 0.2495]) -Greedy action tensor([-1.7606, -0.2403, 0.5827, -0.0109]) tensor([0.0460, 0.2104, 0.4790, 0.2646]) -Greedy action tensor([-1.9233, -0.4333, 0.6577, -0.1682]) tensor([0.0409, 0.1816, 0.5407, 0.2368]) -Greedy action tensor([-1.8518, -0.4503, 0.6251, -0.1367]) tensor([0.0444, 0.1803, 0.5286, 0.2467]) -Greedy action tensor([-1.9234, -0.3658, 0.6447, -0.1781]) tensor([0.0408, 0.1936, 0.5319, 0.2336]) -Greedy action tensor([-1.4210, 0.4611, 0.4081, 0.0756]) tensor([0.0548, 0.3596, 0.3411, 0.2446]) -Greedy action tensor([-0.6741, -0.2801, -0.4918, -0.1054]) tensor([0.1835, 0.2721, 0.2202, 0.3241]) -Greedy action tensor([ 0.2687, -1.2098, 0.5716, -0.2022]) tensor([0.3119, 0.0711, 0.4223, 0.1948]) -Greedy action tensor([ 0.0451, -1.0795, 1.1536, 0.9349]) tensor([0.1473, 0.0478, 0.4463, 0.3586]) -Greedy action tensor([-0.2007, -1.0393, 0.0534, 0.4249]) tensor([0.2178, 0.0942, 0.2808, 0.4072]) -Greedy action tensor([-0.6736, -0.5084, 0.5313, -0.8426]) tensor([0.1572, 0.1855, 0.5245, 0.1328]) -Greedy action tensor([ 0.2847, -0.6422, 0.2907, -0.6491]) tensor([0.3578, 0.1416, 0.3599, 0.1406]) -Greedy action tensor([ 1.5624, -0.0997, -0.5076, 0.5786]) tensor([0.5918, 0.1123, 0.0747, 0.2213]) -Greedy action tensor([-1.1188, -0.8545, 1.7326, -0.6488]) tensor([0.0471, 0.0614, 0.8160, 0.0754]) -Greedy action tensor([ 0.8332, -0.0918, 0.8909, 0.9834]) tensor([0.2764, 0.1096, 0.2928, 0.3212]) -Greedy action tensor([ 0.7313, 0.4079, -0.5318, -0.5971]) tensor([0.4403, 0.3186, 0.1245, 0.1166]) -Greedy action tensor([ 0.5026, -1.2565, -0.5231, 0.5372]) tensor([0.3897, 0.0671, 0.1397, 0.4034]) -Greedy action tensor([ 0.9200, -0.2555, 0.7241, -0.3092]) tensor([0.4127, 0.1274, 0.3392, 0.1207]) -Greedy action tensor([ 0.6111, -0.8988, -1.2039, 0.1722]) tensor([0.4930, 0.1089, 0.0803, 0.3178]) -Greedy action tensor([-0.0904, -1.2620, 2.2783, -1.0929]) tensor([0.0809, 0.0251, 0.8643, 0.0297]) -Greedy action tensor([-0.9054, -1.3901, -0.5089, 0.2463]) tensor([0.1596, 0.0983, 0.2372, 0.5049]) -Greedy action tensor([-0.7867, 0.5455, -0.1166, -0.1083]) tensor([0.1148, 0.4348, 0.2243, 0.2261]) -Greedy action tensor([-0.2247, -1.0470, 0.5931, -1.3169]) tensor([0.2475, 0.1088, 0.5607, 0.0830]) -Greedy action tensor([-0.7846, 0.1497, -0.1848, -0.3342]) tensor([0.1442, 0.3670, 0.2626, 0.2262]) -Greedy action tensor([-0.3337, 0.5687, -0.3689, 0.1498]) tensor([0.1652, 0.4073, 0.1595, 0.2679]) -Greedy action tensor([-1.1641, -0.8134, 0.4738, -1.1957]) tensor([0.1172, 0.1664, 0.6029, 0.1135]) -Greedy action tensor([-0.3115, 0.6981, -0.4078, -0.8786]) tensor([0.1916, 0.5258, 0.1740, 0.1087]) -Greedy action tensor([ 0.8705, -1.7590, -0.0480, 1.4455]) tensor([0.3079, 0.0222, 0.1229, 0.5471]) -Greedy action tensor([ 0.2359, -0.7769, 2.6378, -1.1036]) tensor([0.0789, 0.0287, 0.8717, 0.0207]) -Greedy action tensor([ 0.7057, 0.5129, -0.8722, 1.0094]) tensor([0.2953, 0.2435, 0.0610, 0.4002]) -Greedy action tensor([ 0.3807, -1.5291, 0.5249, 0.3440]) tensor([0.3061, 0.0453, 0.3535, 0.2950]) -Greedy action tensor([-0.8292, -0.1897, -0.3784, -0.5486]) tensor([0.1727, 0.3274, 0.2711, 0.2287]) -Greedy action tensor([ 0.6240, -1.0280, 0.1197, -0.4972]) tensor([0.4714, 0.0903, 0.2847, 0.1536]) -Greedy action tensor([ 1.9409, -0.6139, 0.9209, 1.7681]) tensor([0.4387, 0.0341, 0.1582, 0.3691]) -Greedy action tensor([ 1.0965, -1.2884, 0.0744, 0.5218]) tensor([0.4963, 0.0457, 0.1786, 0.2794]) -Greedy action tensor([-0.1040, -1.5528, 0.1426, 0.0570]) tensor([0.2711, 0.0637, 0.3469, 0.3184]) -Greedy action tensor([ 0.2964, 0.9830, 0.5269, -0.6759]) tensor([0.2162, 0.4297, 0.2723, 0.0818]) -Greedy action tensor([-0.5536, 0.1925, -0.7028, 0.4834]) tensor([0.1473, 0.3105, 0.1269, 0.4154]) -Greedy action tensor([-0.4144, -0.4553, 0.6844, -0.6193]) tensor([0.1732, 0.1662, 0.5196, 0.1411]) -Greedy action tensor([ 1.2520, -0.3988, -0.0349, 0.4680]) tensor([0.5196, 0.0997, 0.1435, 0.2372]) -Greedy action tensor([ 0.2225, -0.0152, 0.3895, 0.7528]) tensor([0.2142, 0.1688, 0.2531, 0.3639]) -Greedy action tensor([-0.1133, -0.7555, -0.8790, 0.6079]) tensor([0.2470, 0.1300, 0.1149, 0.5081]) -Greedy action tensor([ 0.4238, -1.3729, -0.3182, 0.9596]) tensor([0.2984, 0.0495, 0.1421, 0.5100]) -Greedy action tensor([ 0.0997, 0.1103, 0.0138, -1.0086]) tensor([0.3069, 0.3102, 0.2816, 0.1013]) -Greedy action tensor([-1.0123, -0.5952, -0.6997, -0.0224]) tensor([0.1521, 0.2308, 0.2079, 0.4092]) -Greedy action tensor([ 1.1975, -0.8371, 1.1677, 0.6502]) tensor([0.3731, 0.0488, 0.3622, 0.2159]) -Greedy action tensor([-1.0925, -0.2896, -1.0940, -0.8004]) tensor([0.1795, 0.4007, 0.1793, 0.2405]) -Greedy action tensor([ 0.3199, -0.9320, 0.3564, 0.3659]) tensor([0.2967, 0.0848, 0.3078, 0.3107]) -Greedy action tensor([ 0.4871, -0.2532, 0.0732, -0.5481]) tensor([0.4011, 0.1913, 0.2651, 0.1425]) -Greedy action tensor([-0.2023, -0.4191, -0.1876, -0.8399]) tensor([0.2986, 0.2404, 0.3031, 0.1579]) -Greedy action tensor([0.4669, 0.0402, 0.7907, 0.3569]) tensor([0.2544, 0.1660, 0.3517, 0.2279]) -Greedy action tensor([-0.0824, -0.1094, -0.5016, -0.7051]) tensor([0.3157, 0.3073, 0.2076, 0.1694]) -Greedy action tensor([ 0.3651, -0.0481, -1.0159, -0.0778]) tensor([0.3914, 0.2589, 0.0984, 0.2513]) -Greedy action tensor([-0.3177, -0.4594, -0.8744, -0.9394]) tensor([0.3358, 0.2914, 0.1925, 0.1803]) -Greedy action tensor([ 0.8698, -0.7370, -0.0268, 0.1022]) tensor([0.4825, 0.0968, 0.1968, 0.2239]) -Greedy action tensor([-0.1782, -0.3231, 0.4093, 0.2363]) tensor([0.1931, 0.1671, 0.3475, 0.2923]) -Greedy action tensor([-0.3940, -1.4267, -0.1566, 0.7363]) tensor([0.1748, 0.0622, 0.2217, 0.5413]) -Greedy action tensor([ 0.3845, -0.3313, 0.5863, -0.5064]) tensor([0.3202, 0.1565, 0.3918, 0.1314]) -Greedy action tensor([-0.0038, 0.1349, 0.4931, -0.3277]) tensor([0.2215, 0.2544, 0.3640, 0.1602]) -Greedy action tensor([-0.1885, -0.2265, 0.0444, -0.7056]) tensor([0.2617, 0.2519, 0.3303, 0.1560]) -Greedy action tensor([ 0.0072, -0.2538, 0.4326, -0.5793]) tensor([0.2593, 0.1997, 0.3968, 0.1442]) -Greedy action tensor([ 0.1532, -0.9973, 0.4290, -0.7936]) tensor([0.3309, 0.1047, 0.4360, 0.1284]) -Greedy action tensor([-0.2264, -0.3928, -0.1809, -0.1912]) tensor([0.2545, 0.2155, 0.2663, 0.2636]) -Greedy action tensor([-0.1098, -0.5324, -0.5599, -0.9497]) tensor([0.3670, 0.2405, 0.2340, 0.1585]) -Greedy action tensor([-0.2417, -1.5266, -0.2916, -0.3442]) tensor([0.3194, 0.0884, 0.3039, 0.2883]) -Greedy action tensor([ 1.1408, 0.5733, 0.2218, -0.2357]) tensor([0.4508, 0.2556, 0.1798, 0.1138]) -Greedy action tensor([-0.7401, -0.2957, -0.2089, -0.3596]) tensor([0.1747, 0.2725, 0.2972, 0.2556]) -Greedy action tensor([-1.1302, -0.7873, -1.3275, -0.1428]) tensor([0.1691, 0.2382, 0.1388, 0.4539]) -Greedy action tensor([-1.2141, -1.0922, 0.7239, -0.6414]) tensor([0.0922, 0.1041, 0.6402, 0.1635]) -Greedy action tensor([ 0.0363, 0.9767, 0.8053, -0.7766]) tensor([0.1623, 0.4156, 0.3501, 0.0720]) -Greedy action tensor([ 1.1306, -1.1800, 0.5817, 0.6744]) tensor([0.4328, 0.0429, 0.2500, 0.2743]) -Greedy action tensor([ 0.2008, -0.0961, -0.6687, 0.2417]) tensor([0.3121, 0.2319, 0.1308, 0.3251]) -Greedy action tensor([ 0.9581, -1.7258, 1.1366, 1.1877]) tensor([0.2839, 0.0194, 0.3394, 0.3572]) -Greedy action tensor([-0.5333, -0.6406, 1.2103, -0.4403]) tensor([0.1148, 0.1031, 0.6562, 0.1259]) -Greedy action tensor([-0.9665, 0.0991, 1.5048, -1.2112]) tensor([0.0605, 0.1757, 0.7164, 0.0474]) -Greedy action tensor([-0.4735, -0.1163, 0.8663, -0.4254]) tensor([0.1370, 0.1959, 0.5233, 0.1438]) -Greedy action tensor([ 0.5208, -1.0072, -0.3881, -0.2277]) tensor([0.4778, 0.1037, 0.1925, 0.2260]) -Greedy action tensor([-0.5188, 0.3373, 1.2571, -1.3416]) tensor([0.1031, 0.2427, 0.6089, 0.0453]) -Greedy action tensor([-1.2560, -0.4037, 0.1851, 0.3181]) tensor([0.0807, 0.1892, 0.3408, 0.3893]) -Greedy action tensor([-0.3702, -0.4355, 0.1657, -1.4144]) tensor([0.2501, 0.2343, 0.4275, 0.0880]) -Greedy action tensor([ 0.9822, -0.2736, 0.7707, 1.0605]) tensor([0.3149, 0.0897, 0.2549, 0.3405]) -Greedy action tensor([1.6763, 0.9550, 0.7713, 0.5642]) tensor([0.4506, 0.2190, 0.1823, 0.1482]) -Greedy action tensor([-0.7043, -0.5116, 0.2756, -1.1052]) tensor([0.1803, 0.2186, 0.4803, 0.1207]) -Greedy action tensor([0.1681, 0.0228, 0.8149, 0.5569]) tensor([0.1905, 0.1647, 0.3637, 0.2810]) -Greedy action tensor([-1.0287, -0.7622, -0.4545, 0.5107]) tensor([0.1144, 0.1493, 0.2031, 0.5332]) -Greedy action tensor([ 1.1538, -0.2538, 0.6252, 0.9935]) tensor([0.3723, 0.0911, 0.2194, 0.3171]) -Greedy action tensor([ 0.3429, -1.5473, -0.2619, 0.8141]) tensor([0.3031, 0.0458, 0.1656, 0.4855]) -Greedy action tensor([-0.8496, -0.4872, 0.0205, -0.7482]) tensor([0.1686, 0.2423, 0.4025, 0.1866]) -Greedy action tensor([ 1.5237, -0.2534, -0.3920, 0.1891]) tensor([0.6331, 0.1071, 0.0932, 0.1667]) -Greedy action tensor([ 2.0210, -1.0518, -0.0549, 0.7283]) tensor([0.6914, 0.0320, 0.0867, 0.1898]) -Greedy action tensor([ 1.3721, -0.0904, -0.1677, 0.1788]) tensor([0.5717, 0.1324, 0.1226, 0.1733]) -Greedy action tensor([ 1.2099, 0.0151, -0.2820, 0.3924]) tensor([0.5078, 0.1537, 0.1142, 0.2242]) -Greedy action tensor([ 1.2202, -0.1656, -0.6381, 0.3399]) tensor([0.5492, 0.1374, 0.0856, 0.2277]) -Greedy action tensor([ 1.2073, -0.3447, -0.2512, 0.4109]) tensor([0.5276, 0.1118, 0.1227, 0.2379]) -Greedy action tensor([ 1.2119, -0.3878, -0.2095, 0.3041]) tensor([0.5415, 0.1094, 0.1307, 0.2184]) -Greedy action tensor([ 0.6711, -0.2202, -0.1481, 0.0531]) tensor([0.4184, 0.1716, 0.1844, 0.2255]) -Greedy action tensor([ 0.8357, -0.3878, 0.2529, -0.1515]) tensor([0.4494, 0.1322, 0.2509, 0.1675]) -Greedy action tensor([ 1.8294, -0.7125, -0.4444, 0.4327]) tensor([0.6998, 0.0551, 0.0720, 0.1731]) -Greedy action tensor([ 1.4209, -0.6422, -0.7674, 0.5967]) tensor([0.5960, 0.0757, 0.0668, 0.2614]) -Greedy action tensor([ 1.2027, -0.1505, -0.7487, 0.2739]) tensor([0.5569, 0.1439, 0.0791, 0.2200]) -Greedy action tensor([ 1.4999, -0.3315, -0.4141, 0.4191]) tensor([0.6071, 0.0973, 0.0896, 0.2060]) -Greedy action tensor([ 1.4140, 0.1896, -0.3891, 0.2338]) tensor([0.5663, 0.1665, 0.0933, 0.1740]) -Greedy action tensor([ 1.4641, -0.6494, -0.2269, 0.4521]) tensor([0.5993, 0.0724, 0.1105, 0.2178]) -Greedy action tensor([ 1.6984, -1.4589, 0.2091, 0.0143]) tensor([0.6879, 0.0293, 0.1551, 0.1277]) -Greedy action tensor([ 1.6137, -0.3225, -0.2957, 0.1775]) tensor([0.6535, 0.0943, 0.0968, 0.1554]) -Greedy action tensor([ 1.7165, -0.7809, -0.4195, 0.6140]) tensor([0.6525, 0.0537, 0.0771, 0.2167]) -Greedy action tensor([ 2.2368, -1.2254, -0.1338, 0.1149]) tensor([0.8035, 0.0252, 0.0751, 0.0963]) -Greedy action tensor([ 1.7871, -0.5541, -0.3739, 0.2319]) tensor([0.7030, 0.0676, 0.0810, 0.1484]) -Greedy action tensor([ 1.4391, -0.5977, -0.2140, 0.5776]) tensor([0.5733, 0.0748, 0.1098, 0.2422]) -Greedy action tensor([ 1.1442, -0.4466, -0.2500, 0.2449]) tensor([0.5380, 0.1096, 0.1335, 0.2189]) -Greedy action tensor([ 1.2351, -0.4526, -0.0842, 0.0501]) tensor([0.5688, 0.1052, 0.1521, 0.1739]) -Greedy action tensor([ 2.0617, -0.9746, -0.4466, 0.1540]) tensor([0.7826, 0.0376, 0.0637, 0.1162]) -Greedy action tensor([ 1.7427, 0.0788, -0.6340, -0.1658]) tensor([0.6990, 0.1324, 0.0649, 0.1037]) -Greedy action tensor([ 1.5530, -0.4711, -0.3874, 0.2317]) tensor([0.6483, 0.0856, 0.0931, 0.1729]) -Greedy action tensor([ 0.7854, -0.3470, -0.0838, 0.1826]) tensor([0.4369, 0.1408, 0.1832, 0.2391]) -Greedy action tensor([ 1.2298, -0.5529, -0.2332, -0.0117]) tensor([0.5922, 0.0996, 0.1371, 0.1711]) -Greedy action tensor([ 1.4016, -0.8108, 0.0212, 0.1220]) tensor([0.6101, 0.0668, 0.1534, 0.1697]) -Greedy action tensor([ 1.0950, -0.3398, -0.5961, 0.4523]) tensor([0.5133, 0.1222, 0.0946, 0.2699]) -Greedy action tensor([ 1.3951, 0.0374, -0.6664, -0.3160]) tensor([0.6389, 0.1644, 0.0813, 0.1154]) -Greedy action tensor([ 2.2526, -0.9598, -0.2828, 0.6758]) tensor([0.7541, 0.0304, 0.0597, 0.1558]) -Greedy action tensor([ 1.4291, -0.2807, -0.5754, 0.6500]) tensor([0.5636, 0.1019, 0.0759, 0.2586]) -Greedy action tensor([ 1.2050, 0.2597, -0.0914, -0.3590]) tensor([0.5344, 0.2076, 0.1462, 0.1118]) -Greedy action tensor([ 1.7067, -0.2240, -0.6815, 0.5103]) tensor([0.6497, 0.0942, 0.0596, 0.1964]) -Greedy action tensor([ 1.5554, -0.4253, -0.5484, 0.0385]) tensor([0.6760, 0.0933, 0.0825, 0.1483]) -Greedy action tensor([ 1.3577, -0.3492, -0.3226, 0.6283]) tensor([0.5406, 0.0981, 0.1007, 0.2606]) -Greedy action tensor([ 1.3052, -0.1947, -0.5127, 0.1665]) tensor([0.5862, 0.1308, 0.0952, 0.1877]) -Greedy action tensor([ 1.2095, -0.4623, -0.6204, 0.1528]) tensor([0.5896, 0.1108, 0.0946, 0.2050]) -Greedy action tensor([ 1.6552, -0.7780, -0.1219, 0.3976]) tensor([0.6488, 0.0569, 0.1097, 0.1845]) -Greedy action tensor([ 1.4550, -0.4048, -0.3058, 0.1847]) tensor([0.6217, 0.0968, 0.1069, 0.1746]) -Greedy action tensor([ 2.3704, -0.0775, -0.1513, 0.2187]) tensor([0.7794, 0.0674, 0.0626, 0.0906]) -Greedy action tensor([ 1.6006, -0.3197, -0.7773, 0.8594]) tensor([0.5828, 0.0854, 0.0541, 0.2777]) -Greedy action tensor([ 1.8292, -0.6088, -0.4410, 0.3786]) tensor([0.7017, 0.0613, 0.0725, 0.1645]) -Greedy action tensor([ 1.1509, -0.3766, -0.1812, 0.3789]) tensor([0.5146, 0.1117, 0.1358, 0.2378]) -Greedy action tensor([ 1.9209, -0.5524, -0.5093, 0.3317]) tensor([0.7265, 0.0612, 0.0639, 0.1483]) -Greedy action tensor([ 1.9242, -0.2662, -0.2918, 0.3172]) tensor([0.7035, 0.0787, 0.0767, 0.1410]) -Greedy action tensor([ 1.6441, -1.0103, -0.6312, -0.5498]) tensor([0.7785, 0.0548, 0.0800, 0.0868]) -Greedy action tensor([ 1.4565, -0.4050, -0.3623, 0.2194]) tensor([0.6219, 0.0967, 0.1009, 0.1805]) -Greedy action tensor([ 2.2640, -0.8679, -0.4228, 0.9994]) tensor([0.7173, 0.0313, 0.0488, 0.2025]) -Greedy action tensor([ 2.5147, 0.1338, -0.7259, -0.1031]) tensor([0.8302, 0.0768, 0.0325, 0.0606]) -Greedy action tensor([ 1.4708, -0.2038, 0.0167, 0.4171]) tensor([0.5651, 0.1059, 0.1320, 0.1970]) -Greedy action tensor([ 2.0390, -1.1937, -0.0056, 0.4183]) tensor([0.7317, 0.0289, 0.0947, 0.1447]) -Greedy action tensor([ 1.4786, -0.9359, -0.3240, 0.5727]) tensor([0.6030, 0.0539, 0.0994, 0.2437]) -Greedy action tensor([ 1.2971, -0.4119, -0.7166, 0.2385]) tensor([0.6019, 0.1090, 0.0803, 0.2088]) -Greedy action tensor([ 1.6610, -0.8375, -0.3009, 0.6135]) tensor([0.6355, 0.0522, 0.0893, 0.2229]) -Greedy action tensor([ 1.2303, -0.4897, -0.3909, 0.1451]) tensor([0.5832, 0.1044, 0.1153, 0.1970]) -Greedy action tensor([ 1.8130, -0.0147, -0.6195, 0.4727]) tensor([0.6621, 0.1065, 0.0581, 0.1733]) -Greedy action tensor([ 2.0114, -0.7093, -0.3269, 0.7954]) tensor([0.6855, 0.0451, 0.0661, 0.2032]) -Greedy action tensor([ 1.1555, -0.1419, -0.5245, 0.2558]) tensor([0.5358, 0.1464, 0.0999, 0.2179]) -Greedy action tensor([ 0.6389, -0.4714, 0.1266, 0.0257]) tensor([0.4048, 0.1334, 0.2425, 0.2193]) -Greedy action tensor([ 2.0769, -0.9439, -0.1156, 0.6068]) tensor([0.7193, 0.0351, 0.0803, 0.1654]) -Greedy action tensor([ 1.7598, -0.1884, -0.2086, 0.5037]) tensor([0.6382, 0.0910, 0.0891, 0.1817]) -Greedy action tensor([ 1.4670, -0.3694, -0.5330, 0.2391]) tensor([0.6299, 0.1004, 0.0852, 0.1845]) -Greedy action tensor([ 1.5974, -0.3994, -0.6456, 0.2171]) tensor([0.6696, 0.0909, 0.0711, 0.1684]) -Greedy action tensor([ 2.1270, -0.4596, -0.7912, 0.7627]) tensor([0.7221, 0.0544, 0.0390, 0.1845]) -Greedy action tensor([ 1.2712, -0.0552, -0.5928, 0.4788]) tensor([0.5338, 0.1417, 0.0828, 0.2417]) -Greedy action tensor([ 1.3856, -0.6767, -0.2043, 0.3139]) tensor([0.5975, 0.0760, 0.1219, 0.2046]) -Greedy action tensor([ 1.3092, -0.3632, -0.8858, 0.1986]) tensor([0.6141, 0.1153, 0.0684, 0.2022]) -Greedy action tensor([ 1.4297, -0.2316, -0.6706, 0.1544]) tensor([0.6283, 0.1193, 0.0769, 0.1755]) -Greedy action tensor([ 1.6134, -0.3284, -0.8477, 0.6089]) tensor([0.6270, 0.0899, 0.0535, 0.2296]) -Greedy action tensor([ 1.0896, -1.2513, -0.2286, 0.0248]) tensor([0.5853, 0.0563, 0.1566, 0.2018]) -Greedy action tensor([ 1.7961, 0.4297, -0.3755, 0.0502]) tensor([0.6479, 0.1652, 0.0739, 0.1130]) -Greedy action tensor([ 1.6669, -0.6496, -0.1740, 0.1497]) tensor([0.6772, 0.0668, 0.1075, 0.1485]) -Greedy action tensor([ 1.2881, -0.3350, -0.3437, 0.3944]) tensor([0.5549, 0.1095, 0.1085, 0.2271]) -Greedy action tensor([ 1.5623, -0.7595, -0.2293, 0.4833]) tensor([0.6232, 0.0611, 0.1039, 0.2118]) -Greedy action tensor([ 1.1603, -0.3690, -0.0489, -0.1156]) tensor([0.5573, 0.1208, 0.1663, 0.1556]) -Greedy action tensor([ 0.9717, -0.2372, -0.1189, 0.1532]) tensor([0.4818, 0.1438, 0.1619, 0.2125]) -Greedy action tensor([ 1.3750, -0.7206, -0.5654, 0.5585]) tensor([0.5853, 0.0720, 0.0841, 0.2587]) -Greedy action tensor([ 1.7464, -0.4155, -0.6816, 0.2503]) tensor([0.7006, 0.0806, 0.0618, 0.1569]) -Greedy action tensor([ 2.3411, -1.1229, 0.0294, 0.7671]) tensor([0.7476, 0.0234, 0.0741, 0.1549]) -Greedy action tensor([ 1.0949, -0.6669, -0.2026, -0.2422]) tensor([0.5856, 0.1006, 0.1600, 0.1538]) -Greedy action tensor([ 0.6269, -0.2440, 0.0361, -0.3106]) tensor([0.4230, 0.1771, 0.2343, 0.1656]) -Greedy action tensor([ 1.2567, -0.0736, 0.0427, -0.0816]) tensor([0.5483, 0.1450, 0.1629, 0.1438]) -Greedy action tensor([ 0.8873, -0.7528, 0.0494, -0.4460]) tensor([0.5290, 0.1026, 0.2289, 0.1395]) -Greedy action tensor([ 0.9174, -0.5316, -0.0060, -0.4521]) tensor([0.5302, 0.1245, 0.2106, 0.1348]) -Greedy action tensor([ 0.4681, -0.1555, -0.0360, -0.0263]) tensor([0.3636, 0.1949, 0.2197, 0.2218]) -Greedy action tensor([ 0.8192, -0.2220, -0.0546, -0.1778]) tensor([0.4674, 0.1650, 0.1951, 0.1725]) -Greedy action tensor([ 0.5242, 0.2257, -0.1698, 0.0663]) tensor([0.3479, 0.2581, 0.1738, 0.2201]) -Greedy action tensor([ 0.5889, -0.4913, -0.1104, -0.1096]) tensor([0.4285, 0.1455, 0.2129, 0.2131]) -Greedy action tensor([ 1.2093, -0.8404, -0.0086, -0.5696]) tensor([0.6276, 0.0808, 0.1857, 0.1060]) -Greedy action tensor([ 0.5929, -0.4140, 0.0006, -0.1049]) tensor([0.4139, 0.1512, 0.2289, 0.2060]) -Greedy action tensor([ 0.8086, -0.6561, 0.0191, -0.4370]) tensor([0.5068, 0.1172, 0.2301, 0.1459]) -Greedy action tensor([ 0.3494, -0.1011, -0.0157, -0.0765]) tensor([0.3351, 0.2135, 0.2326, 0.2188]) -Greedy action tensor([ 0.5163, -0.3304, 0.0603, -0.7160]) tensor([0.4248, 0.1821, 0.2692, 0.1239]) -Greedy action tensor([ 0.8638, -0.5238, 0.0739, -0.2086]) tensor([0.4888, 0.1220, 0.2219, 0.1673]) -Greedy action tensor([ 0.3638, 0.0078, -0.0270, -0.2431]) tensor([0.3422, 0.2397, 0.2315, 0.1865]) -Greedy action tensor([ 0.5685, 0.0113, -0.0165, -0.0423]) tensor([0.3741, 0.2143, 0.2084, 0.2031]) -Greedy action tensor([ 0.8465, -0.4113, -0.0028, -0.1837]) tensor([0.4833, 0.1374, 0.2067, 0.1725]) -Greedy action tensor([ 0.7134, -0.1534, -0.0774, -0.0452]) tensor([0.4270, 0.1794, 0.1936, 0.2000]) -Greedy action tensor([ 0.9071, -0.8725, 0.0436, -0.4893]) tensor([0.5441, 0.0918, 0.2294, 0.1347]) -Greedy action tensor([ 4.9013e-01, -2.3124e-01, 2.3335e-05, -2.0006e-01]) tensor([0.3846, 0.1869, 0.2356, 0.1929]) -Greedy action tensor([ 0.8344, 0.0137, 0.0374, -0.3548]) tensor([0.4555, 0.2005, 0.2053, 0.1387]) -Greedy action tensor([ 0.8253, -0.8983, 0.0962, -0.3899]) tensor([0.5109, 0.0912, 0.2464, 0.1516]) -Greedy action tensor([ 0.9915, -0.8723, 0.0018, -0.2328]) tensor([0.5492, 0.0852, 0.2041, 0.1615]) -Greedy action tensor([ 0.3690, -0.0478, -0.0567, -0.0642]) tensor([0.3377, 0.2226, 0.2206, 0.2190]) -Greedy action tensor([ 0.9835, -0.5920, 0.0157, -0.3759]) tensor([0.5424, 0.1122, 0.2061, 0.1393]) -Greedy action tensor([ 0.7760, -0.2933, 0.0152, -0.0906]) tensor([0.4482, 0.1539, 0.2095, 0.1884]) -Greedy action tensor([ 0.5905, -0.4305, 0.0045, -0.4187]) tensor([0.4384, 0.1579, 0.2440, 0.1598]) -Greedy action tensor([ 0.7755, -0.2776, -0.1253, -0.2124]) tensor([0.4701, 0.1640, 0.1909, 0.1750]) -Greedy action tensor([ 0.7036, -0.5078, -0.3327, -0.8216]) tensor([0.5347, 0.1592, 0.1897, 0.1163]) -Greedy action tensor([ 0.3854, -0.2316, -0.0076, -0.4389]) tensor([0.3769, 0.2034, 0.2544, 0.1653]) -Greedy action tensor([ 0.3618, 0.5492, -0.0646, -0.0008]) tensor([0.2813, 0.3393, 0.1837, 0.1957]) -Greedy action tensor([ 0.4785, -0.2376, 0.0122, -0.5960]) tensor([0.4069, 0.1989, 0.2553, 0.1390]) -Greedy action tensor([ 0.6828, -0.7675, -0.0984, -0.4673]) tensor([0.4978, 0.1167, 0.2279, 0.1576]) -Greedy action tensor([ 0.7849, -0.4026, -0.0331, -0.3865]) tensor([0.4863, 0.1483, 0.2146, 0.1507]) -Greedy action tensor([ 0.6226, -0.4435, -0.1160, -0.1774]) tensor([0.4403, 0.1516, 0.2103, 0.1978]) -Greedy action tensor([ 0.6062, -0.1875, -0.0274, -0.3017]) tensor([0.4191, 0.1895, 0.2224, 0.1690]) -Greedy action tensor([ 0.2180, -0.0552, -0.0348, -0.1402]) tensor([0.3090, 0.2351, 0.2400, 0.2160]) -Greedy action tensor([ 0.5520, -0.0588, -0.0819, -0.0158]) tensor([0.3787, 0.2056, 0.2010, 0.2147]) -Greedy action tensor([ 0.4312, -0.5254, -0.1529, -0.2569]) tensor([0.4091, 0.1572, 0.2281, 0.2056]) -Greedy action tensor([ 0.8139, -0.6522, -0.0362, -0.3862]) tensor([0.5104, 0.1178, 0.2181, 0.1537]) -Greedy action tensor([ 0.6448, -0.2973, -0.1039, -0.1731]) tensor([0.4340, 0.1692, 0.2053, 0.1915]) -Greedy action tensor([ 1.1265, -0.2395, -0.1625, -0.1331]) tensor([0.5511, 0.1406, 0.1519, 0.1564]) -Greedy action tensor([ 0.5074, -0.1311, 0.0106, -0.1383]) tensor([0.3758, 0.1985, 0.2287, 0.1970]) -Greedy action tensor([ 0.8694, -0.4596, -0.0072, -0.2534]) tensor([0.4984, 0.1320, 0.2074, 0.1622]) -Greedy action tensor([ 0.7701, -0.4900, -0.1017, -0.1056]) tensor([0.4720, 0.1339, 0.1974, 0.1966]) -Greedy action tensor([ 0.9061, -0.4168, -0.0631, -0.2435]) tensor([0.5095, 0.1357, 0.1933, 0.1614]) -Greedy action tensor([ 1.0897, -0.9847, 0.1432, -0.7346]) tensor([0.5970, 0.0750, 0.2317, 0.0963]) -Greedy action tensor([ 0.9156, -0.7173, 0.0290, -0.4614]) tensor([0.5377, 0.1050, 0.2216, 0.1357]) -Greedy action tensor([ 1.1170, -0.7032, -0.0226, -0.3339]) tensor([0.5827, 0.0944, 0.1864, 0.1365]) -Greedy action tensor([ 0.8565, -0.5307, -0.0158, -0.4414]) tensor([0.5152, 0.1287, 0.2154, 0.1407]) -Greedy action tensor([ 0.9729, -0.8892, 0.0236, -0.4525]) tensor([0.5609, 0.0871, 0.2171, 0.1349]) -Greedy action tensor([ 0.7997, -0.5930, -0.0259, -0.4379]) tensor([0.5060, 0.1257, 0.2216, 0.1468]) -Greedy action tensor([ 0.8952, -0.6749, 0.0139, -0.4117]) tensor([0.5283, 0.1099, 0.2188, 0.1430]) -Greedy action tensor([ 0.7378, -0.5272, -0.1508, -0.2805]) tensor([0.4867, 0.1374, 0.2002, 0.1758]) -Greedy action tensor([ 0.8237, -0.4081, -0.0424, -0.1448]) tensor([0.4780, 0.1395, 0.2010, 0.1815]) -Greedy action tensor([ 1.0415, -0.4707, -0.2344, -0.2893]) tensor([0.5669, 0.1250, 0.1583, 0.1498]) -Greedy action tensor([ 1.0784, -0.9452, 0.3198, -0.6230]) tensor([0.5609, 0.0741, 0.2627, 0.1023]) -Greedy action tensor([ 1.0113, -0.6895, 0.0407, -0.5483]) tensor([0.5645, 0.1030, 0.2138, 0.1187]) -Greedy action tensor([ 1.0704, -0.6369, -0.1134, -0.3301]) tensor([0.5767, 0.1046, 0.1765, 0.1421]) -Greedy action tensor([ 0.6887, -0.1296, -0.0350, -0.0552]) tensor([0.4164, 0.1837, 0.2020, 0.1979]) -Greedy action tensor([ 0.7278, -0.3578, -0.0774, -0.1155]) tensor([0.4515, 0.1525, 0.2018, 0.1943]) -Greedy action tensor([ 0.4661, -0.3627, -0.1095, -0.2679]) tensor([0.4034, 0.1761, 0.2269, 0.1936]) -Greedy action tensor([ 1.0138, -0.8213, 0.1509, -0.3112]) tensor([0.5413, 0.0864, 0.2284, 0.1439]) -Greedy action tensor([ 0.9532, -0.5318, -0.1277, -0.3451]) tensor([0.5438, 0.1232, 0.1845, 0.1485]) -Greedy action tensor([ 0.5686, -0.1157, -0.0607, -0.5244]) tensor([0.4215, 0.2126, 0.2246, 0.1413]) -Greedy action tensor([ 0.6897, -0.4348, -0.0536, -0.2233]) tensor([0.4542, 0.1475, 0.2160, 0.1823]) -Greedy action tensor([ 0.8770, -0.4618, -0.1611, -0.2280]) tensor([0.5135, 0.1346, 0.1818, 0.1701]) -Greedy action tensor([ 0.9130, -0.5900, -0.2221, -0.5118]) tensor([0.5604, 0.1247, 0.1801, 0.1348]) -Greedy action tensor([ 0.6717, -0.3372, -0.0479, -0.0365]) tensor([0.4266, 0.1556, 0.2077, 0.2101]) -Greedy action tensor([ 0.5879, -0.3989, -0.1338, 0.0157]) tensor([0.4127, 0.1538, 0.2006, 0.2329]) -Greedy action tensor([ 0.6547, -0.2315, -0.0718, -0.0686]) tensor([0.4200, 0.1731, 0.2031, 0.2038]) -Greedy action tensor([ 0.7074, -0.2557, -0.0300, -0.1643]) tensor([0.4389, 0.1675, 0.2100, 0.1836]) -Greedy action tensor([ 0.7283, -0.4697, -0.0298, -0.3816]) tensor([0.4762, 0.1437, 0.2231, 0.1569]) -Greedy action tensor([ 0.9657, -0.9203, -0.1135, -0.4226]) tensor([0.5744, 0.0871, 0.1952, 0.1433]) -Greedy action tensor([ 0.7014, -0.0932, -0.0414, -0.0208]) tensor([0.4144, 0.1872, 0.1971, 0.2013]) -Greedy action tensor([ 1.1759, -1.0657, 0.0487, -0.6927]) tensor([0.6311, 0.0671, 0.2044, 0.0974]) -Greedy action tensor([ 0.5719, -0.2137, 0.0077, -0.0172]) tensor([0.3877, 0.1767, 0.2205, 0.2151]) -Greedy action tensor([ 0.7587, -0.3076, 0.0712, -0.4315]) tensor([0.4648, 0.1600, 0.2337, 0.1414]) -Greedy action tensor([ 0.3227, 0.3794, -0.1852, 0.0569]) tensor([0.2918, 0.3089, 0.1756, 0.2237]) -Greedy action tensor([ 0.4707, 0.2802, -0.1925, 0.1039]) tensor([0.3295, 0.2724, 0.1698, 0.2283]) -Greedy action tensor([-1.9357, -0.4421, 0.6647, -0.1753]) tensor([0.0404, 0.1800, 0.5445, 0.2351]) -Greedy action tensor([-1.8124, -0.0725, 0.5450, -0.0933]) tensor([0.0438, 0.2494, 0.4625, 0.2443]) -Greedy action tensor([-1.7811, -0.3738, 0.6661, -0.0411]) tensor([0.0448, 0.1829, 0.5173, 0.2551]) -Greedy action tensor([-1.8773, -0.4652, 0.6953, -0.0359]) tensor([0.0408, 0.1675, 0.5345, 0.2573]) -Greedy action tensor([-1.8276, -0.4120, 0.6097, -0.1166]) tensor([0.0453, 0.1864, 0.5178, 0.2505]) -Greedy action tensor([-0.5754, 0.8635, 0.0372, 0.2175]) tensor([0.1079, 0.4547, 0.1990, 0.2384]) -Greedy action tensor([-1.7849, -0.4348, 0.5559, -0.0833]) tensor([0.0482, 0.1861, 0.5012, 0.2645]) -Greedy action tensor([-0.9862, -0.1867, 0.3014, -0.1239]) tensor([0.1085, 0.2413, 0.3932, 0.2570]) -Greedy action tensor([-1.9389, -0.4417, 0.6644, -0.1752]) tensor([0.0403, 0.1801, 0.5445, 0.2351]) -Greedy action tensor([-1.8649, -0.4448, 0.6279, -0.1394]) tensor([0.0438, 0.1811, 0.5294, 0.2458]) -Greedy action tensor([-1.5671, -0.0708, 0.4214, -0.0254]) tensor([0.0573, 0.2560, 0.4188, 0.2679]) -Greedy action tensor([-0.6754, 0.9481, 0.1053, 0.0409]) tensor([0.0971, 0.4923, 0.2119, 0.1987]) -Greedy action tensor([-1.6894, -0.4498, 0.5397, -0.0880]) tensor([0.0535, 0.1847, 0.4967, 0.2652]) -Greedy action tensor([-1.9258, -0.4497, 0.6613, -0.1693]) tensor([0.0409, 0.1789, 0.5434, 0.2368]) -Greedy action tensor([-1.7500, -0.2944, 0.6222, 0.0066]) tensor([0.0459, 0.1967, 0.4918, 0.2657]) -Greedy action tensor([-1.8336, -0.4270, 0.6662, -0.0557]) tensor([0.0431, 0.1761, 0.5255, 0.2553]) -Greedy action tensor([-1.7544, -0.3407, 0.5599, -0.1984]) tensor([0.0501, 0.2059, 0.5067, 0.2374]) -Greedy action tensor([-1.8586, -0.4141, 0.6243, -0.1191]) tensor([0.0437, 0.1851, 0.5227, 0.2486]) -Greedy action tensor([-1.4967, 0.3549, 0.3437, 0.2083]) tensor([0.0522, 0.3323, 0.3286, 0.2870]) -Greedy action tensor([-1.7652, -0.4703, 0.5810, -0.0922]) tensor([0.0490, 0.1787, 0.5114, 0.2609]) -Greedy action tensor([-1.9342, -0.4356, 0.6632, -0.1747]) tensor([0.0405, 0.1811, 0.5434, 0.2351]) -Greedy action tensor([-1.8536, -0.4816, 0.6155, -0.1315]) tensor([0.0447, 0.1764, 0.5285, 0.2504]) -Greedy action tensor([-1.7878, -0.3960, 0.6533, -0.0825]) tensor([0.0454, 0.1827, 0.5218, 0.2500]) -Greedy action tensor([-0.6437, -0.1826, 0.6745, 1.2046]) tensor([0.0789, 0.1252, 0.2949, 0.5010]) -Greedy action tensor([-1.6762, 0.0302, 0.4671, -0.0393]) tensor([0.0496, 0.2731, 0.4227, 0.2547]) -Greedy action tensor([-1.0209, 0.0748, 0.5922, 0.2936]) tensor([0.0785, 0.2349, 0.3941, 0.2924]) -Greedy action tensor([-1.3777, -0.2881, 0.4699, -0.1753]) tensor([0.0733, 0.2179, 0.4649, 0.2439]) -Greedy action tensor([-1.9035, -0.4431, 0.6704, -0.1509]) tensor([0.0413, 0.1781, 0.5421, 0.2385]) -Greedy action tensor([-1.9397, -0.4453, 0.6658, -0.1775]) tensor([0.0403, 0.1796, 0.5455, 0.2347]) -Greedy action tensor([-1.3728, 0.6601, 0.2298, 0.1895]) tensor([0.0544, 0.4157, 0.2703, 0.2596]) -Greedy action tensor([-1.9233, -0.4229, 0.6482, -0.1715]) tensor([0.0411, 0.1843, 0.5377, 0.2369]) -Greedy action tensor([-1.9096, -0.3861, 0.6397, -0.1577]) tensor([0.0414, 0.1900, 0.5299, 0.2387]) -Greedy action tensor([-1.3571, 0.6677, 0.2037, 0.2121]) tensor([0.0551, 0.4176, 0.2625, 0.2648]) -Greedy action tensor([-1.1783, 0.2424, 0.1903, 0.1350]) tensor([0.0782, 0.3237, 0.3073, 0.2908]) -Greedy action tensor([-1.6713, -0.5263, 0.5384, -0.0963]) tensor([0.0553, 0.1737, 0.5039, 0.2671]) -Greedy action tensor([-1.8605, -0.2842, 0.6004, -0.1348]) tensor([0.0432, 0.2088, 0.5056, 0.2424]) -Greedy action tensor([-1.9139, -0.4313, 0.6537, -0.1623]) tensor([0.0413, 0.1820, 0.5386, 0.2381]) -Greedy action tensor([-1.7939e+00, 1.3489e-04, 5.2110e-01, -7.6580e-02]) tensor([0.0440, 0.2648, 0.4459, 0.2453]) -Greedy action tensor([-1.8718, -0.3396, 0.6160, -0.1324]) tensor([0.0428, 0.1982, 0.5152, 0.2438]) -Greedy action tensor([-1.6877, -0.3099, 0.6228, 0.0085]) tensor([0.0488, 0.1935, 0.4917, 0.2660]) -Greedy action tensor([-1.8048, -0.4253, 0.6036, -0.1018]) tensor([0.0463, 0.1841, 0.5151, 0.2544]) -Greedy action tensor([-1.8857, -0.3861, 0.6240, -0.1445]) tensor([0.0426, 0.1907, 0.5238, 0.2429]) -Greedy action tensor([-1.9385, -0.4463, 0.6626, -0.1766]) tensor([0.0404, 0.1797, 0.5446, 0.2353]) -Greedy action tensor([-1.3939, 0.6881, 0.2559, 0.2001]) tensor([0.0522, 0.4188, 0.2718, 0.2571]) -Greedy action tensor([-1.9230, -0.3926, 0.6512, -0.1611]) tensor([0.0407, 0.1881, 0.5341, 0.2371]) -Greedy action tensor([-1.8185, -0.2992, 0.5854, -0.1311]) tensor([0.0454, 0.2073, 0.5021, 0.2452]) -Greedy action tensor([-1.7331, -0.4876, 0.6123, -0.0703]) tensor([0.0495, 0.1721, 0.5171, 0.2613]) -Greedy action tensor([-1.6517, 0.1544, 0.4188, -0.0363]) tensor([0.0499, 0.3036, 0.3955, 0.2509]) -Greedy action tensor([-1.9239, -0.3942, 0.6460, -0.1696]) tensor([0.0409, 0.1887, 0.5341, 0.2363]) -Greedy action tensor([-1.6567, -0.3124, 0.5558, 0.0321]) tensor([0.0516, 0.1978, 0.4714, 0.2792]) -Greedy action tensor([-1.8749, -0.3540, 0.6306, -0.1402]) tensor([0.0426, 0.1948, 0.5214, 0.2412]) -Greedy action tensor([-1.4577, -0.5374, 0.4426, -0.0498]) tensor([0.0700, 0.1757, 0.4682, 0.2861]) -Greedy action tensor([-1.0324, -0.0771, 0.2513, -0.0533]) tensor([0.1013, 0.2633, 0.3657, 0.2697]) -Greedy action tensor([-1.9165, -0.4161, 0.6503, -0.1640]) tensor([0.0412, 0.1847, 0.5365, 0.2376]) -Greedy action tensor([-1.8970, -0.4378, 0.6396, -0.1567]) tensor([0.0423, 0.1820, 0.5346, 0.2411]) -Greedy action tensor([-1.5238, -0.5401, 0.5343, 0.2712]) tensor([0.0571, 0.1526, 0.4469, 0.3435]) -Greedy action tensor([-1.8582, -0.4534, 0.6310, -0.1375]) tensor([0.0440, 0.1794, 0.5306, 0.2460]) -Greedy action tensor([-1.6175, -0.4474, 0.5095, -0.0130]) tensor([0.0569, 0.1832, 0.4770, 0.2829]) -Greedy action tensor([-1.2044, -0.3960, 0.4501, -0.3192]) tensor([0.0918, 0.2059, 0.4799, 0.2224]) -Greedy action tensor([-1.6317, -0.5185, 0.7633, 0.4147]) tensor([0.0439, 0.1338, 0.4821, 0.3402]) -Greedy action tensor([-1.7117, 0.3242, 0.5299, -0.3133]) tensor([0.0452, 0.3463, 0.4254, 0.1831]) -Greedy action tensor([-1.4702, -0.5802, 0.4184, 0.0550]) tensor([0.0683, 0.1663, 0.4515, 0.3139]) -Greedy action tensor([-1.8988, -0.4394, 0.6395, -0.1575]) tensor([0.0423, 0.1818, 0.5349, 0.2411]) -Greedy action tensor([-1.9179, -0.4051, 0.6524, -0.1649]) tensor([0.0410, 0.1862, 0.5361, 0.2367]) -Greedy action tensor([-1.8773, -0.4729, 0.6304, -0.1485]) tensor([0.0435, 0.1772, 0.5341, 0.2451]) -Greedy action tensor([-1.9395, -0.4454, 0.6657, -0.1773]) tensor([0.0403, 0.1795, 0.5454, 0.2347]) -Greedy action tensor([-1.3220, 0.4259, 0.3848, -0.2373]) tensor([0.0657, 0.3775, 0.3623, 0.1945]) -Greedy action tensor([-1.0918, -0.1389, 0.5278, 0.4487]) tensor([0.0751, 0.1948, 0.3795, 0.3506]) -Greedy action tensor([-1.7058, -0.2203, 0.5156, -0.0871]) tensor([0.0508, 0.2244, 0.4684, 0.2564]) -Greedy action tensor([-1.6409, 0.2035, 0.4781, 0.0964]) tensor([0.0469, 0.2965, 0.3902, 0.2664]) -Greedy action tensor([-0.7564, -0.5660, 0.2290, 0.7673]) tensor([0.1055, 0.1276, 0.2826, 0.4842]) -Greedy action tensor([-1.7829, -0.4562, 0.6445, -0.0251]) tensor([0.0457, 0.1721, 0.5174, 0.2648]) -Greedy action tensor([-1.9081, -0.4501, 0.6542, -0.1605]) tensor([0.0417, 0.1790, 0.5401, 0.2392]) -Greedy action tensor([-1.4419, -0.1745, 0.6097, 0.0675]) tensor([0.0593, 0.2107, 0.4616, 0.2684]) -Greedy action tensor([-1.9099, -0.4385, 0.6445, -0.1573]) tensor([0.0417, 0.1816, 0.5362, 0.2405]) -Greedy action tensor([-1.7851, -0.4887, 0.5743, -0.0948]) tensor([0.0484, 0.1769, 0.5123, 0.2624]) -Greedy action tensor([-1.9232, -0.4624, 0.6842, -0.1502]) tensor([0.0404, 0.1740, 0.5478, 0.2378]) -Greedy action tensor([-1.8474, -0.3543, 0.6209, -0.1195]) tensor([0.0437, 0.1945, 0.5158, 0.2460]) -Greedy action tensor([-1.8866, -0.4418, 0.6436, -0.1518]) tensor([0.0426, 0.1807, 0.5351, 0.2415]) -Greedy action tensor([-0.2993, 1.0574, -0.0206, 0.5266]) tensor([0.1178, 0.4575, 0.1557, 0.2691]) -Greedy action tensor([-1.1987, 0.4495, 0.2782, -0.0984]) tensor([0.0736, 0.3827, 0.3224, 0.2213]) -Greedy action tensor([ 0.7652, -1.4086, 0.4287, 0.6119]) tensor([0.3723, 0.0423, 0.2659, 0.3194]) -Greedy action tensor([-0.1417, 0.1971, -0.1265, -0.6427]) tensor([0.2485, 0.3487, 0.2523, 0.1506]) -Greedy action tensor([-0.8571, -0.5535, -0.0621, -0.8835]) tensor([0.1804, 0.2444, 0.3995, 0.1757]) -Greedy action tensor([-0.1189, -0.9756, -0.1766, -0.1029]) tensor([0.2955, 0.1254, 0.2789, 0.3002]) -Greedy action tensor([-0.2563, -0.4367, -1.0834, -0.0178]) tensor([0.2824, 0.2358, 0.1235, 0.3584]) -Greedy action tensor([-0.3127, -0.5503, 0.8939, -0.1897]) tensor([0.1597, 0.1259, 0.5338, 0.1806]) -Greedy action tensor([-0.3238, -0.4652, -1.6434, -0.0505]) tensor([0.2899, 0.2517, 0.0775, 0.3810]) -Greedy action tensor([-1.2960, -0.6271, -0.0117, 0.1638]) tensor([0.0920, 0.1796, 0.3323, 0.3961]) -Greedy action tensor([-0.6759, 1.1773, -0.4587, -0.8787]) tensor([0.1059, 0.6759, 0.1316, 0.0865]) -Greedy action tensor([-0.8176, -0.3382, -0.2323, -0.6380]) tensor([0.1783, 0.2880, 0.3202, 0.2134]) -Greedy action tensor([ 0.2219, 0.1064, 0.6523, -0.2244]) tensor([0.2458, 0.2190, 0.3780, 0.1573]) -Greedy action tensor([ 0.8913, -0.8679, 0.6398, 0.8284]) tensor([0.3462, 0.0596, 0.2692, 0.3250]) -Greedy action tensor([-0.5452, -0.5677, -1.6168, -0.1601]) tensor([0.2639, 0.2580, 0.0904, 0.3878]) -Greedy action tensor([ 0.0271, -1.2498, 0.1853, -0.3255]) tensor([0.3171, 0.0885, 0.3715, 0.2229]) -Greedy action tensor([-0.2044, 0.4016, 0.8153, -1.0889]) tensor([0.1662, 0.3046, 0.4606, 0.0686]) -Greedy action tensor([-1.4259, -0.8403, -0.4108, 0.0597]) tensor([0.1003, 0.1801, 0.2767, 0.4430]) -Greedy action tensor([ 0.5240, 0.8557, -0.5511, -0.1957]) tensor([0.3104, 0.4325, 0.1059, 0.1511]) -Greedy action tensor([-1.0119, -0.1405, -1.4081, -0.3822]) tensor([0.1684, 0.4024, 0.1133, 0.3160]) -Greedy action tensor([-0.1105, -1.3605, -0.0474, -0.2126]) tensor([0.3073, 0.0880, 0.3273, 0.2774]) -Greedy action tensor([ 0.0708, -1.4279, -0.2581, 1.0239]) tensor([0.2204, 0.0492, 0.1586, 0.5717]) -Greedy action tensor([ 0.5298, -0.6893, 0.1832, 0.5341]) tensor([0.3326, 0.0983, 0.2352, 0.3340]) -Greedy action tensor([-0.3162, -0.6536, 0.5633, -0.6619]) tensor([0.2070, 0.1477, 0.4988, 0.1465]) -Greedy action tensor([ 1.0206, -0.1044, 0.2101, 0.2538]) tensor([0.4477, 0.1453, 0.1990, 0.2079]) -Greedy action tensor([ 0.5204, -2.1956, -0.2771, 0.4502]) tensor([0.4084, 0.0270, 0.1839, 0.3807]) -Greedy action tensor([ 1.0469, -0.5904, 1.9944, 0.7509]) tensor([0.2214, 0.0431, 0.5710, 0.1646]) -Greedy action tensor([ 0.2833, -1.4974, 0.0445, 0.5510]) tensor([0.3065, 0.0516, 0.2414, 0.4005]) -Greedy action tensor([ 1.5822, -0.6670, 0.6612, 0.2719]) tensor([0.5639, 0.0595, 0.2245, 0.1521]) -Greedy action tensor([-0.9843, -0.3414, 1.0461, -0.8048]) tensor([0.0854, 0.1623, 0.6502, 0.1021]) -Greedy action tensor([-0.4051, -1.5594, 1.5488, -0.1875]) tensor([0.1040, 0.0328, 0.7339, 0.1293]) -Greedy action tensor([-0.3083, 0.3492, -0.2590, 0.2345]) tensor([0.1754, 0.3385, 0.1843, 0.3018]) -Greedy action tensor([-1.4067, -0.4727, 0.1859, -0.3507]) tensor([0.0882, 0.2245, 0.4337, 0.2536]) -Greedy action tensor([-0.5454, -1.2271, 0.9161, -0.4891]) tensor([0.1454, 0.0736, 0.6272, 0.1539]) -Greedy action tensor([-0.0534, 0.6504, 0.5685, -0.1930]) tensor([0.1738, 0.3513, 0.3237, 0.1512]) -Greedy action tensor([-0.9278, -0.7959, -0.7313, -0.0646]) tensor([0.1746, 0.1992, 0.2125, 0.4138]) -Greedy action tensor([-0.3873, 0.2614, -0.3445, -1.0328]) tensor([0.2231, 0.4269, 0.2329, 0.1170]) -Greedy action tensor([ 0.5162, -0.5062, -0.0471, 0.8028]) tensor([0.3067, 0.1103, 0.1746, 0.4084]) -Greedy action tensor([-0.4083, -0.3957, 0.2301, 0.2784]) tensor([0.1697, 0.1718, 0.3213, 0.3372]) -Greedy action tensor([ 0.3781, -0.5159, 0.9504, -0.2754]) tensor([0.2702, 0.1105, 0.4788, 0.1405]) -Greedy action tensor([-0.2712, -2.1123, -0.5202, 0.8029]) tensor([0.2055, 0.0326, 0.1602, 0.6016]) -Greedy action tensor([ 1.0627, -0.8522, 0.8144, 0.0276]) tensor([0.4381, 0.0646, 0.3418, 0.1556]) -Greedy action tensor([ 0.6759, -2.0156, -0.2072, 0.0057]) tensor([0.5018, 0.0340, 0.2075, 0.2567]) -Greedy action tensor([ 0.7253, 0.0629, -0.3961, 0.3916]) tensor([0.3910, 0.2016, 0.1274, 0.2800]) -Greedy action tensor([ 0.8858, -1.0074, 0.2202, -0.0624]) tensor([0.4873, 0.0734, 0.2505, 0.1888]) -Greedy action tensor([-0.6472, -0.3806, 0.7420, -0.2091]) tensor([0.1271, 0.1659, 0.5099, 0.1970]) -Greedy action tensor([-0.2004, -0.5855, -0.7511, 0.0548]) tensor([0.2819, 0.1918, 0.1625, 0.3638]) -Greedy action tensor([-0.4444, -1.4192, 0.2290, 0.0129]) tensor([0.2033, 0.0767, 0.3987, 0.3212]) -Greedy action tensor([-0.6926, -0.1945, -0.1187, 0.2939]) tensor([0.1408, 0.2317, 0.2499, 0.3776]) -Greedy action tensor([ 0.5536, -0.7750, 0.1649, -0.0860]) tensor([0.4048, 0.1072, 0.2744, 0.2135]) -Greedy action tensor([-1.1823, -1.0754, -0.1051, -0.0067]) tensor([0.1206, 0.1342, 0.3542, 0.3909]) -Greedy action tensor([ 0.8620, -0.7457, 0.0639, -0.4628]) tensor([0.5218, 0.1045, 0.2349, 0.1387]) -Greedy action tensor([-1.1153, -0.0162, -0.0973, -0.6947]) tensor([0.1206, 0.3620, 0.3338, 0.1837]) -Greedy action tensor([-0.1967, -0.6647, -0.6046, -0.3143]) tensor([0.3144, 0.1969, 0.2091, 0.2796]) -Greedy action tensor([ 0.8746, -0.0213, -0.3730, 0.1407]) tensor([0.4597, 0.1877, 0.1320, 0.2207]) -Greedy action tensor([ 0.2034, -0.0572, -0.9904, -0.5965]) tensor([0.3963, 0.3054, 0.1201, 0.1781]) -Greedy action tensor([ 1.4775, -0.1825, 1.6635, -0.4219]) tensor([0.3930, 0.0747, 0.4734, 0.0588]) -Greedy action tensor([-0.3732, -0.0706, 0.0265, 0.0782]) tensor([0.1847, 0.2499, 0.2754, 0.2900]) -Greedy action tensor([-0.2141, -0.2797, 0.2333, -0.9734]) tensor([0.2520, 0.2360, 0.3941, 0.1179]) -Greedy action tensor([-0.4135, -0.2497, -0.1664, -0.1188]) tensor([0.2083, 0.2454, 0.2667, 0.2797]) -Greedy action tensor([-0.6621, -0.2966, -0.1147, 0.0986]) tensor([0.1585, 0.2284, 0.2740, 0.3391]) -Greedy action tensor([ 0.0017, -0.4039, 0.7890, -1.3010]) tensor([0.2418, 0.1612, 0.5313, 0.0657]) -Greedy action tensor([-0.6501, -0.9319, 0.3637, -0.1449]) tensor([0.1621, 0.1223, 0.4468, 0.2687]) -Greedy action tensor([ 0.8656, -1.1150, 1.5063, 0.9859]) tensor([0.2402, 0.0331, 0.4558, 0.2709]) -Greedy action tensor([ 0.9915, -0.2844, -0.2097, -0.2332]) tensor([0.5337, 0.1490, 0.1605, 0.1568]) -Greedy action tensor([ 0.4931, 0.0812, -0.7166, -0.3001]) tensor([0.4144, 0.2745, 0.1236, 0.1875]) -Greedy action tensor([-1.2647, -0.1201, -0.6760, -0.6039]) tensor([0.1269, 0.3987, 0.2287, 0.2457]) -Greedy action tensor([ 0.6737, -0.2721, -0.1443, 0.8764]) tensor([0.3274, 0.1272, 0.1445, 0.4009]) -Greedy action tensor([-1.5034, -1.1448, 1.3829, -1.5732]) tensor([0.0470, 0.0672, 0.8420, 0.0438]) -Greedy action tensor([ 0.4899, -1.2681, -0.1215, 0.8502]) tensor([0.3176, 0.0548, 0.1723, 0.4553]) -Greedy action tensor([ 0.9998, -0.1973, 0.0107, 0.1657]) tensor([0.4743, 0.1433, 0.1764, 0.2060]) -Greedy action tensor([-0.3048, -0.4096, -0.6078, -0.0528]) tensor([0.2547, 0.2294, 0.1882, 0.3277]) -Greedy action tensor([ 0.8816, -0.0826, -0.3224, 0.5182]) tensor([0.4208, 0.1604, 0.1262, 0.2926]) -Greedy action tensor([ 0.8600, -1.3010, -0.6149, 0.4254]) tensor([0.5021, 0.0579, 0.1149, 0.3251]) -Greedy action tensor([0.4888, 0.1300, 0.0063, 0.5242]) tensor([0.2983, 0.2084, 0.1841, 0.3091]) -Greedy action tensor([-1.2592, -0.8259, -0.4719, 0.7439]) tensor([0.0823, 0.1269, 0.1808, 0.6100]) -Greedy action tensor([ 0.1568, -0.0895, -0.0484, -0.1731]) tensor([0.3017, 0.2358, 0.2457, 0.2169]) -Greedy action tensor([1.3190, 0.0704, 0.5455, 1.4139]) tensor([0.3511, 0.1007, 0.1620, 0.3861]) -Greedy action tensor([-0.2746, -1.4901, -0.3159, 1.2965]) tensor([0.1415, 0.0420, 0.1358, 0.6808]) -Greedy action tensor([ 1.0644, -0.3360, -0.3596, 0.1979]) tensor([0.5242, 0.1292, 0.1262, 0.2204]) -Greedy action tensor([ 1.1195, -0.1246, -0.7105, -0.4938]) tensor([0.6069, 0.1749, 0.0973, 0.1209]) -Greedy action tensor([-0.6144, -1.8394, 0.3921, 0.3443]) tensor([0.1506, 0.0443, 0.4122, 0.3929]) -Greedy action tensor([-0.4527, -0.8048, 0.9878, -0.8808]) tensor([0.1520, 0.1069, 0.6420, 0.0991]) -Greedy action tensor([ 0.1404, 0.6264, 0.7098, -0.9943]) tensor([0.2121, 0.3448, 0.3749, 0.0682]) -Greedy action tensor([ 2.1424, -0.4223, -0.8185, 0.2432]) tensor([0.7822, 0.0602, 0.0405, 0.1171]) -Greedy action tensor([ 1.9084, -0.1524, -0.1209, 0.4371]) tensor([0.6719, 0.0856, 0.0883, 0.1543]) -Greedy action tensor([ 2.2977, -1.0054, -0.0434, 0.4990]) tensor([0.7701, 0.0283, 0.0741, 0.1275]) -Greedy action tensor([ 1.3693, -0.4053, -0.8891, 0.1619]) tensor([0.6357, 0.1078, 0.0664, 0.1901]) -Greedy action tensor([ 1.9605, 0.2944, -0.6386, 0.1241]) tensor([0.7029, 0.1328, 0.0523, 0.1120]) -Greedy action tensor([ 1.3562, -0.0551, -0.8974, 0.2368]) tensor([0.5969, 0.1455, 0.0627, 0.1949]) -Greedy action tensor([ 1.3034, -0.7304, -0.1817, 0.3339]) tensor([0.5759, 0.0753, 0.1304, 0.2184]) -Greedy action tensor([ 1.8492, -0.1725, -0.8595, 0.4793]) tensor([0.6881, 0.0911, 0.0458, 0.1749]) -Greedy action tensor([ 1.5015, 0.3091, -0.9644, 0.4137]) tensor([0.5796, 0.1759, 0.0492, 0.1953]) -Greedy action tensor([ 0.9055, -0.2070, -0.0124, -0.0675]) tensor([0.4748, 0.1561, 0.1896, 0.1795]) -Greedy action tensor([ 1.8173, -0.2924, -0.2831, 0.4858]) tensor([0.6632, 0.0804, 0.0812, 0.1752]) -Greedy action tensor([ 0.7079, -0.1971, 0.0698, 0.1136]) tensor([0.4025, 0.1628, 0.2126, 0.2221]) -Greedy action tensor([ 1.2371, -0.0610, -0.0305, -0.1371]) tensor([0.5532, 0.1511, 0.1557, 0.1400]) -Greedy action tensor([ 1.7278, -0.2751, 0.1638, 0.4044]) tensor([0.6209, 0.0838, 0.1300, 0.1653]) -Greedy action tensor([ 1.3623, 0.1661, -0.8219, 0.3035]) tensor([0.5676, 0.1716, 0.0639, 0.1969]) -Greedy action tensor([ 1.7893, -0.5137, -0.9783, 0.3558]) tensor([0.7137, 0.0713, 0.0448, 0.1702]) -Greedy action tensor([ 1.7664, -0.9269, -0.2948, 0.0522]) tensor([0.7272, 0.0492, 0.0926, 0.1310]) -Greedy action tensor([ 0.9633, -0.0991, -0.6671, 0.2033]) tensor([0.4977, 0.1720, 0.0975, 0.2328]) -Greedy action tensor([ 1.5989, -0.4518, -0.6685, 0.2360]) tensor([0.6720, 0.0865, 0.0696, 0.1720]) -Greedy action tensor([ 1.9942, -0.7851, -0.2258, 0.6935]) tensor([0.6930, 0.0430, 0.0753, 0.1887]) -Greedy action tensor([ 1.0300, -0.2464, -0.3145, 0.3639]) tensor([0.4870, 0.1359, 0.1269, 0.2502]) -Greedy action tensor([ 1.5658, -0.5682, -0.0122, 0.5647]) tensor([0.5909, 0.0699, 0.1220, 0.2171]) -Greedy action tensor([ 2.0388, -0.2846, -0.4874, 0.4442]) tensor([0.7242, 0.0709, 0.0579, 0.1470]) -Greedy action tensor([ 1.3799, -0.5804, -0.2228, 0.2395]) tensor([0.6017, 0.0847, 0.1212, 0.1924]) -Greedy action tensor([ 2.0982, -0.6646, -1.0603, 0.5505]) tensor([0.7585, 0.0479, 0.0322, 0.1614]) -Greedy action tensor([ 2.2548, -1.1139, -0.2748, 0.7874]) tensor([0.7437, 0.0256, 0.0593, 0.1714]) -Greedy action tensor([ 1.2524, -0.2365, -0.3174, 0.0935]) tensor([0.5722, 0.1291, 0.1191, 0.1796]) -Greedy action tensor([ 0.9906, -0.0585, -0.7179, 0.1634]) tensor([0.5080, 0.1779, 0.0920, 0.2221]) -Greedy action tensor([ 1.4680, -0.2263, -0.2876, 0.2572]) tensor([0.6044, 0.1110, 0.1044, 0.1801]) -Greedy action tensor([ 1.3200, -0.2176, -0.6242, 0.2835]) tensor([0.5839, 0.1255, 0.0836, 0.2071]) -Greedy action tensor([ 1.3280, -0.9970, 0.0344, -0.4192]) tensor([0.6467, 0.0632, 0.1774, 0.1127]) -Greedy action tensor([ 1.7598, -0.8495, -0.3979, 0.0627]) tensor([0.7287, 0.0536, 0.0842, 0.1335]) -Greedy action tensor([ 0.9597, -0.0503, -0.1646, -0.0476]) tensor([0.4868, 0.1773, 0.1581, 0.1778]) -Greedy action tensor([ 1.6839, -0.9023, -0.0830, 0.3601]) tensor([0.6612, 0.0498, 0.1130, 0.1760]) -Greedy action tensor([ 1.6709, -0.5054, -0.9094, 0.2080]) tensor([0.7038, 0.0799, 0.0533, 0.1630]) -Greedy action tensor([ 1.2611, -0.0554, -0.7848, 0.3367]) tensor([0.5574, 0.1494, 0.0721, 0.2212]) -Greedy action tensor([ 1.8755, -0.9892, -0.0606, 0.6931]) tensor([0.6632, 0.0378, 0.0957, 0.2033]) -Greedy action tensor([ 1.7391, -0.6895, -0.2850, 0.5918]) tensor([0.6503, 0.0573, 0.0859, 0.2065]) -Greedy action tensor([ 1.0761, -0.3770, -0.7032, 0.4325]) tensor([0.5187, 0.1213, 0.0875, 0.2725]) -Greedy action tensor([ 1.4850, 0.1840, -0.6750, 0.2194]) tensor([0.5989, 0.1631, 0.0691, 0.1689]) -Greedy action tensor([ 0.9485, -0.4534, -0.1020, 0.3715]) tensor([0.4635, 0.1141, 0.1621, 0.2603]) -Greedy action tensor([ 2.0756, -0.5820, 0.0559, 0.5172]) tensor([0.7076, 0.0496, 0.0939, 0.1489]) -Greedy action tensor([ 1.6964, -0.2766, -0.6432, 0.2815]) tensor([0.6764, 0.0940, 0.0652, 0.1643]) -Greedy action tensor([ 2.1573, -0.5316, -0.2274, 0.5930]) tensor([0.7303, 0.0496, 0.0673, 0.1528]) -Greedy action tensor([ 1.4406, 0.0754, -0.3504, 0.3199]) tensor([0.5720, 0.1461, 0.0954, 0.1865]) -Greedy action tensor([ 1.7402, -0.4927, -0.6148, 0.3855]) tensor([0.6849, 0.0734, 0.0650, 0.1767]) -Greedy action tensor([ 1.1403, -0.3373, -0.4700, 0.0211]) tensor([0.5700, 0.1301, 0.1139, 0.1861]) -Greedy action tensor([ 1.6872, -0.3176, 0.0092, 0.3950]) tensor([0.6265, 0.0844, 0.1170, 0.1721]) -Greedy action tensor([ 1.1012, -0.4339, -0.8009, 0.5162]) tensor([0.5203, 0.1121, 0.0777, 0.2899]) -Greedy action tensor([ 2.0678, -0.6273, -0.3419, 0.3778]) tensor([0.7452, 0.0503, 0.0670, 0.1375]) -Greedy action tensor([ 1.9503, -0.4274, -0.7034, -0.0946]) tensor([0.7737, 0.0718, 0.0545, 0.1001]) -Greedy action tensor([ 1.7965, -1.1208, -0.0075, 0.7594]) tensor([0.6356, 0.0344, 0.1047, 0.2253]) -Greedy action tensor([ 2.1492, -0.8823, -0.0354, 0.7780]) tensor([0.7069, 0.0341, 0.0795, 0.1794]) -Greedy action tensor([ 1.2018, -0.0997, -0.6994, 0.4452]) tensor([0.5289, 0.1439, 0.0790, 0.2482]) -Greedy action tensor([2.1078, 0.5687, 0.2000, 0.0286]) tensor([0.6720, 0.1442, 0.0997, 0.0840]) -Greedy action tensor([ 1.8963, -1.0315, -0.2477, 0.7387]) tensor([0.6734, 0.0360, 0.0789, 0.2116]) -Greedy action tensor([ 1.2441, -0.2309, -0.4214, 0.2790]) tensor([0.5559, 0.1272, 0.1051, 0.2118]) -Greedy action tensor([ 2.3065, -0.8047, -0.0306, 0.0086]) tensor([0.8054, 0.0359, 0.0778, 0.0809]) -Greedy action tensor([ 1.3491, -0.1787, -0.4908, 0.1290]) tensor([0.5984, 0.1299, 0.0950, 0.1767]) -Greedy action tensor([ 1.5872, -0.8055, -0.3262, 0.4496]) tensor([0.6412, 0.0586, 0.0946, 0.2056]) -Greedy action tensor([ 1.5492, -0.4410, -0.0101, 0.2685]) tensor([0.6155, 0.0841, 0.1294, 0.1710]) -Greedy action tensor([ 1.2448, -0.1717, -0.6243, 0.2870]) tensor([0.5616, 0.1362, 0.0866, 0.2155]) -Greedy action tensor([ 1.4607, -0.2439, -0.7793, 0.5867]) tensor([0.5863, 0.1066, 0.0624, 0.2447]) -Greedy action tensor([ 1.5580, -0.9430, -0.2131, 0.1768]) tensor([0.6651, 0.0545, 0.1132, 0.1671]) -Greedy action tensor([ 1.2628, -0.0579, -0.8448, 0.2847]) tensor([0.5667, 0.1513, 0.0689, 0.2131]) -Greedy action tensor([ 1.8194, -0.6318, -0.2851, 0.0719]) tensor([0.7234, 0.0624, 0.0882, 0.1260]) -Greedy action tensor([ 1.1181, -0.4418, -0.3113, 0.0812]) tensor([0.5543, 0.1165, 0.1327, 0.1965]) -Greedy action tensor([ 1.2126, -0.7163, -0.3536, 0.4360]) tensor([0.5512, 0.0801, 0.1151, 0.2536]) -Greedy action tensor([ 1.1589, -0.6872, -0.0685, 0.2838]) tensor([0.5354, 0.0845, 0.1569, 0.2232]) -Greedy action tensor([ 2.3322, -0.9898, -0.1557, 0.5680]) tensor([0.7749, 0.0280, 0.0644, 0.1328]) -Greedy action tensor([ 1.0608, -0.5024, -0.0732, 0.4302]) tensor([0.4846, 0.1015, 0.1559, 0.2580]) -Greedy action tensor([ 2.7506, -1.3983, 0.2682, -0.0639]) tensor([0.8626, 0.0136, 0.0721, 0.0517]) -Greedy action tensor([ 1.1951, -0.1185, -0.4847, 0.3542]) tensor([0.5301, 0.1425, 0.0988, 0.2286]) -Greedy action tensor([ 0.7485, -0.5619, 0.0524, 0.2958]) tensor([0.4159, 0.1122, 0.2074, 0.2645]) -Greedy action tensor([ 2.0210, -0.9587, -0.6063, 0.2491]) tensor([0.7734, 0.0393, 0.0559, 0.1315]) -Greedy action tensor([ 1.6863, -0.4964, -0.3604, 0.1534]) tensor([0.6860, 0.0773, 0.0886, 0.1481]) -Greedy action tensor([ 1.6483, -0.8384, -0.4407, 0.6668]) tensor([0.6322, 0.0526, 0.0783, 0.2369]) -Greedy action tensor([ 1.3749, -0.3171, -0.8584, 0.5044]) tensor([0.5848, 0.1077, 0.0627, 0.2449]) -Greedy action tensor([ 2.0246, -0.9919, 0.0092, 0.7601]) tensor([0.6828, 0.0334, 0.0910, 0.1928]) -Greedy action tensor([ 1.2724, -0.6017, -0.7761, -0.2827]) tensor([0.6695, 0.1028, 0.0863, 0.1414]) -Greedy action tensor([ 1.2270, -0.6083, -0.5013, 0.4696]) tensor([0.5537, 0.0884, 0.0983, 0.2596]) -Greedy action tensor([ 0.6892, -0.1247, -0.2009, -0.2168]) tensor([0.4429, 0.1963, 0.1819, 0.1790]) -Greedy action tensor([ 0.6599, -0.2890, -0.0505, -0.2203]) tensor([0.4361, 0.1688, 0.2143, 0.1808]) -Greedy action tensor([ 0.8622, -0.5172, 0.0494, -0.4685]) tensor([0.5103, 0.1285, 0.2264, 0.1349]) -Greedy action tensor([ 0.4214, -0.2116, -0.1988, -0.1696]) tensor([0.3813, 0.2025, 0.2051, 0.2112]) -Greedy action tensor([ 0.6122, -0.2104, -0.0422, -0.2237]) tensor([0.4180, 0.1836, 0.2172, 0.1812]) -Greedy action tensor([ 0.7984, -0.7110, -0.1066, -0.3165]) tensor([0.5119, 0.1131, 0.2071, 0.1679]) -Greedy action tensor([ 0.7639, -0.4649, -0.1627, -0.2801]) tensor([0.4901, 0.1434, 0.1940, 0.1725]) -Greedy action tensor([ 0.5749, -0.2983, 0.3424, -0.6181]) tensor([0.3979, 0.1662, 0.3153, 0.1207]) -Greedy action tensor([ 0.7708, -0.1014, -0.0688, 0.0370]) tensor([0.4292, 0.1794, 0.1854, 0.2061]) -Greedy action tensor([ 0.5632, -0.4094, -0.0494, -0.3425]) tensor([0.4302, 0.1627, 0.2332, 0.1739]) -Greedy action tensor([ 0.7265, -0.4510, -0.0357, -0.3457]) tensor([0.4724, 0.1455, 0.2204, 0.1617]) -Greedy action tensor([ 1.0027, -0.4705, -0.2420, -0.5130]) tensor([0.5758, 0.1320, 0.1658, 0.1265]) -Greedy action tensor([ 0.4579, -0.2403, 0.0670, -0.4071]) tensor([0.3854, 0.1917, 0.2607, 0.1623]) -Greedy action tensor([ 0.8902, -0.3194, 0.0858, -0.3770]) tensor([0.4933, 0.1472, 0.2207, 0.1389]) -Greedy action tensor([ 1.0330, -0.8865, -0.0127, -0.4759]) tensor([0.5816, 0.0853, 0.2044, 0.1286]) -Greedy action tensor([ 0.4895, -0.1012, -0.0092, -0.0668]) tensor([0.3657, 0.2026, 0.2221, 0.2097]) -Greedy action tensor([ 0.1937, -0.1206, -0.0796, 0.0115]) tensor([0.3008, 0.2197, 0.2289, 0.2507]) -Greedy action tensor([ 0.4492, -0.2529, -0.0231, -0.2356]) tensor([0.3812, 0.1889, 0.2377, 0.1922]) -Greedy action tensor([0.3563, 0.6844, 0.0404, 0.1272]) tensor([0.2556, 0.3548, 0.1863, 0.2032]) -Greedy action tensor([ 0.7126, -0.6072, -0.0235, -0.1702]) tensor([0.4630, 0.1237, 0.2218, 0.1915]) -Greedy action tensor([ 0.6803, -0.4030, -0.0710, -0.4422]) tensor([0.4682, 0.1585, 0.2209, 0.1524]) -Greedy action tensor([ 0.8863, -0.7651, 0.0459, -0.3354]) tensor([0.5214, 0.1000, 0.2250, 0.1537]) -Greedy action tensor([ 0.6100, -0.4236, -0.0289, -0.1806]) tensor([0.4279, 0.1522, 0.2259, 0.1941]) -Greedy action tensor([ 0.7156, -0.8133, -0.2160, -0.1660]) tensor([0.4939, 0.1071, 0.1945, 0.2045]) -Greedy action tensor([ 0.9082, -0.6424, -0.0211, -0.4350]) tensor([0.5354, 0.1136, 0.2114, 0.1397]) -Greedy action tensor([ 0.7153, -0.6471, -0.0757, -0.1581]) tensor([0.4701, 0.1204, 0.2132, 0.1963]) -Greedy action tensor([ 0.8472, -0.3062, 0.0341, -0.2731]) tensor([0.4796, 0.1513, 0.2127, 0.1564]) -Greedy action tensor([ 0.1849, 0.1833, -0.0110, -0.0191]) tensor([0.2750, 0.2746, 0.2261, 0.2243]) -Greedy action tensor([ 0.7893, -0.4578, -0.2007, -0.2262]) tensor([0.4948, 0.1422, 0.1838, 0.1792]) -Greedy action tensor([ 0.9416, -0.7585, 0.1395, -0.4676]) tensor([0.5332, 0.0974, 0.2391, 0.1303]) -Greedy action tensor([ 0.7487, -0.1986, -0.0364, -0.1029]) tensor([0.4404, 0.1708, 0.2009, 0.1879]) -Greedy action tensor([ 1.0322, -0.6417, -0.1610, -0.3715]) tensor([0.5759, 0.1080, 0.1746, 0.1415]) -Greedy action tensor([ 0.9972, -0.5829, -0.0577, -0.4292]) tensor([0.5573, 0.1148, 0.1941, 0.1338]) -Greedy action tensor([ 0.5629, -0.1885, -0.1619, -0.2919]) tensor([0.4199, 0.1981, 0.2034, 0.1786]) -Greedy action tensor([ 0.7874, -0.4269, -0.0527, -0.3894]) tensor([0.4909, 0.1458, 0.2119, 0.1513]) -Greedy action tensor([ 0.4902, -0.0714, -0.0065, -0.0169]) tensor([0.3596, 0.2051, 0.2188, 0.2165]) -Greedy action tensor([0.3462, 0.0728, 0.1215, 0.1030]) tensor([0.2991, 0.2275, 0.2389, 0.2345]) -Greedy action tensor([ 0.5082, -0.1974, -0.0151, -0.3599]) tensor([0.3990, 0.1970, 0.2364, 0.1675]) -Greedy action tensor([ 0.2694, -0.0032, -0.0645, -0.0504]) tensor([0.3121, 0.2376, 0.2235, 0.2267]) -Greedy action tensor([ 0.8763, -0.2666, -0.2019, -0.2687]) tensor([0.5057, 0.1613, 0.1721, 0.1609]) -Greedy action tensor([ 0.5201, -0.0183, -0.0174, -0.2590]) tensor([0.3807, 0.2222, 0.2224, 0.1747]) -Greedy action tensor([ 0.8686, -0.6856, 0.0794, -0.4848]) tensor([0.5198, 0.1099, 0.2361, 0.1343]) -Greedy action tensor([ 0.9262, -0.2802, -0.0183, -0.2414]) tensor([0.5002, 0.1497, 0.1945, 0.1556]) -Greedy action tensor([ 0.8038, -0.3593, -0.1837, -0.6610]) tensor([0.5219, 0.1631, 0.1944, 0.1206]) -Greedy action tensor([ 1.0417, -0.3863, -0.0230, -0.5123]) tensor([0.5568, 0.1335, 0.1920, 0.1177]) -Greedy action tensor([ 0.3470, -0.0821, -0.0407, -0.0488]) tensor([0.3330, 0.2168, 0.2260, 0.2242]) -Greedy action tensor([ 0.7901, -0.4584, 0.0149, -0.3584]) tensor([0.4843, 0.1390, 0.2231, 0.1536]) -Greedy action tensor([ 0.6797, -0.4099, -0.0910, -0.0727]) tensor([0.4405, 0.1482, 0.2038, 0.2076]) -Greedy action tensor([ 0.5792, -0.2794, -0.0477, -0.1050]) tensor([0.4061, 0.1721, 0.2170, 0.2049]) -Greedy action tensor([ 0.6632, 0.1395, -0.1180, 0.2066]) tensor([0.3726, 0.2207, 0.1706, 0.2360]) -Greedy action tensor([ 0.9147, -0.9422, -0.0048, -0.3430]) tensor([0.5437, 0.0849, 0.2168, 0.1546]) -Greedy action tensor([ 0.6081, -0.5838, 0.3857, -0.5147]) tensor([0.4116, 0.1250, 0.3295, 0.1339]) -Greedy action tensor([ 0.8246, -0.2600, 0.0520, -0.3678]) tensor([0.4754, 0.1607, 0.2196, 0.1443]) -Greedy action tensor([ 0.8292, -0.6827, 0.1100, -0.4576]) tensor([0.5041, 0.1111, 0.2456, 0.1392]) -Greedy action tensor([ 0.8655, -0.3712, -0.0908, -0.3702]) tensor([0.5088, 0.1477, 0.1956, 0.1479]) -Greedy action tensor([ 0.9647, -0.5346, -0.1366, -0.3498]) tensor([0.5481, 0.1224, 0.1822, 0.1472]) -Greedy action tensor([ 0.6101, -0.2697, 0.0480, -0.3368]) tensor([0.4214, 0.1748, 0.2402, 0.1635]) -Greedy action tensor([ 0.4677, -0.1328, -0.1264, -0.0649]) tensor([0.3721, 0.2041, 0.2054, 0.2184]) -Greedy action tensor([ 0.2284, 0.4448, -0.0708, 0.1159]) tensor([0.2580, 0.3203, 0.1913, 0.2305]) -Greedy action tensor([ 0.6774, -0.4819, -0.1000, -0.0936]) tensor([0.4473, 0.1403, 0.2056, 0.2069]) -Greedy action tensor([ 1.1918, -0.5211, -0.1866, -0.3455]) tensor([0.6070, 0.1095, 0.1530, 0.1305]) -Greedy action tensor([ 0.2239, 0.0507, -0.1734, -0.1858]) tensor([0.3148, 0.2647, 0.2116, 0.2090]) -Greedy action tensor([ 0.8826, -0.6592, -0.1637, -0.2967]) tensor([0.5340, 0.1143, 0.1875, 0.1642]) -Greedy action tensor([ 0.8301, -0.3702, 0.1198, -0.2274]) tensor([0.4673, 0.1407, 0.2297, 0.1623]) -Greedy action tensor([ 0.8371, -0.4661, -0.0631, -0.3800]) tensor([0.5065, 0.1376, 0.2059, 0.1500]) -Greedy action tensor([ 1.0645, -0.6212, -0.0201, -0.3485]) tensor([0.5660, 0.1049, 0.1913, 0.1378]) -Greedy action tensor([ 1.3448, -1.3157, 0.0560, -0.6694]) tensor([0.6761, 0.0473, 0.1864, 0.0902]) -Greedy action tensor([ 0.8272, 0.5938, -0.0406, 0.1783]) tensor([0.3657, 0.2896, 0.1536, 0.1911]) -Greedy action tensor([ 0.5734, -0.3687, -0.0033, -0.2469]) tensor([0.4181, 0.1630, 0.2348, 0.1841]) -Greedy action tensor([ 0.7970, -0.7877, 0.0130, -0.3684]) tensor([0.5067, 0.1039, 0.2314, 0.1580]) -Greedy action tensor([ 0.6948, -0.5481, 0.0177, -0.2383]) tensor([0.4566, 0.1318, 0.2320, 0.1796]) -Greedy action tensor([ 0.7987, -0.8245, -0.1018, -0.3450]) tensor([0.5202, 0.1026, 0.2114, 0.1658]) -Greedy action tensor([ 0.7805, -0.3904, 0.0383, -0.4699]) tensor([0.4825, 0.1496, 0.2297, 0.1382]) -Greedy action tensor([ 1.2671, -1.1078, 0.0986, -0.7142]) tensor([0.6486, 0.0603, 0.2016, 0.0894]) -Greedy action tensor([ 0.1504, -0.0185, -0.0876, -0.4761]) tensor([0.3157, 0.2667, 0.2489, 0.1687]) -Greedy action tensor([ 0.6768, -0.2175, -0.0853, -0.1526]) tensor([0.4325, 0.1769, 0.2019, 0.1887]) -Greedy action tensor([ 0.5592, -0.3996, 0.0277, -0.1447]) tensor([0.4056, 0.1555, 0.2384, 0.2006]) -Greedy action tensor([ 0.8321, -0.4327, -0.1005, -0.1553]) tensor([0.4882, 0.1378, 0.1921, 0.1819]) -Greedy action tensor([ 0.6084, -0.1123, -0.0346, -0.2742]) tensor([0.4122, 0.2005, 0.2167, 0.1705]) -Greedy action tensor([ 0.8239, -0.3785, -0.1634, -0.1411]) tensor([0.4869, 0.1463, 0.1814, 0.1855]) -Greedy action tensor([ 0.3507, -0.4457, -0.2300, -0.2469]) tensor([0.3905, 0.1761, 0.2185, 0.2149]) -Greedy action tensor([ 0.7483, -0.5598, -0.0013, -0.1077]) tensor([0.4613, 0.1247, 0.2180, 0.1960]) -Greedy action tensor([-0.7596, 0.0925, 0.1648, -0.0716]) tensor([0.1273, 0.2985, 0.3209, 0.2533]) -Greedy action tensor([-1.9419, -0.4504, 0.6703, -0.1780]) tensor([0.0401, 0.1784, 0.5472, 0.2343]) -Greedy action tensor([-1.8877, -0.4383, 0.6405, -0.1523]) tensor([0.0426, 0.1816, 0.5341, 0.2417]) -Greedy action tensor([-1.8797, -0.4082, 0.6482, -0.1327]) tensor([0.0423, 0.1844, 0.5304, 0.2429]) -Greedy action tensor([-1.4765, -0.1084, 0.5394, 0.1505]) tensor([0.0571, 0.2241, 0.4284, 0.2904]) -Greedy action tensor([-1.5995, -0.5785, 0.5333, 0.0050]) tensor([0.0582, 0.1615, 0.4909, 0.2894]) -Greedy action tensor([-1.4455, -0.4941, 0.5087, 0.3212]) tensor([0.0606, 0.1569, 0.4278, 0.3547]) -Greedy action tensor([-1.9116, -0.3998, 0.6512, -0.1618]) tensor([0.0412, 0.1869, 0.5347, 0.2372]) -Greedy action tensor([-1.2848, -0.2702, 0.3428, 0.2412]) tensor([0.0744, 0.2051, 0.3786, 0.3420]) -Greedy action tensor([-0.4634, 1.0868, 0.0252, 0.4044]) tensor([0.1028, 0.4846, 0.1676, 0.2449]) -Greedy action tensor([-1.8075, -0.4383, 0.6274, -0.0415]) tensor([0.0451, 0.1772, 0.5143, 0.2635]) -Greedy action tensor([-1.3683, 0.7195, 0.2438, 0.2499]) tensor([0.0523, 0.4218, 0.2621, 0.2637]) -Greedy action tensor([-1.8175, -0.3534, 0.5875, -0.1101]) tensor([0.0456, 0.1973, 0.5055, 0.2516]) -Greedy action tensor([-1.9385, -0.4413, 0.6634, -0.1750]) tensor([0.0403, 0.1803, 0.5441, 0.2353]) -Greedy action tensor([-1.7829, -0.3561, 0.6038, -0.0736]) tensor([0.0464, 0.1931, 0.5043, 0.2562]) -Greedy action tensor([-1.8376, -0.4929, 0.6169, -0.1344]) tensor([0.0455, 0.1747, 0.5299, 0.2500]) -Greedy action tensor([-0.5855, -0.7360, 0.2549, 0.4342]) tensor([0.1439, 0.1238, 0.3334, 0.3989]) -Greedy action tensor([-1.8988, -0.3679, 0.6431, -0.1545]) tensor([0.0416, 0.1922, 0.5283, 0.2379]) -Greedy action tensor([-1.0525, -0.5440, 0.2607, 0.1636]) tensor([0.1025, 0.1705, 0.3812, 0.3459]) -Greedy action tensor([-1.1499, 0.9510, 0.2947, 0.0935]) tensor([0.0592, 0.4842, 0.2512, 0.2054]) -Greedy action tensor([-1.8501, -0.4772, 0.6170, -0.1358]) tensor([0.0449, 0.1771, 0.5289, 0.2491]) -Greedy action tensor([-1.6157, 0.3629, 0.4319, -0.1521]) tensor([0.0493, 0.3562, 0.3817, 0.2128]) -Greedy action tensor([-1.8176, -0.4644, 0.6470, -0.0502]) tensor([0.0445, 0.1721, 0.5230, 0.2604]) -Greedy action tensor([-1.7766, -0.3196, 0.6127, -0.1050]) tensor([0.0465, 0.1995, 0.5068, 0.2472]) -Greedy action tensor([-1.6254, -0.3064, 0.6070, -0.0363]) tensor([0.0527, 0.1972, 0.4917, 0.2584]) -Greedy action tensor([-1.9090, -0.4631, 0.6471, -0.1677]) tensor([0.0420, 0.1781, 0.5406, 0.2393]) -Greedy action tensor([-1.9372, -0.4380, 0.6618, -0.1761]) tensor([0.0404, 0.1809, 0.5435, 0.2351]) -Greedy action tensor([-1.9344, -0.4502, 0.6619, -0.1735]) tensor([0.0406, 0.1790, 0.5443, 0.2361]) -Greedy action tensor([-1.8866, -0.4478, 0.6417, -0.1509]) tensor([0.0427, 0.1800, 0.5351, 0.2422]) -Greedy action tensor([-1.9145, -0.4325, 0.6494, -0.1661]) tensor([0.0414, 0.1824, 0.5381, 0.2381]) -Greedy action tensor([-1.8834, -0.3414, 0.6228, -0.1502]) tensor([0.0424, 0.1981, 0.5196, 0.2399]) -Greedy action tensor([-1.1767, -0.0608, 0.5113, -0.7075]) tensor([0.0904, 0.2760, 0.4890, 0.1446]) -Greedy action tensor([-1.1617, -0.5238, 0.3896, 0.5142]) tensor([0.0772, 0.1461, 0.3642, 0.4125]) -Greedy action tensor([-1.8497, -0.5280, 0.8246, -0.0535]) tensor([0.0396, 0.1483, 0.5737, 0.2384]) -Greedy action tensor([-1.6155, -0.4669, 0.4948, 0.0510]) tensor([0.0565, 0.1782, 0.4662, 0.2991]) -Greedy action tensor([-1.6696, -0.4652, 0.5439, -0.0278]) tensor([0.0536, 0.1788, 0.4906, 0.2770]) -Greedy action tensor([-1.9345, -0.4419, 0.6612, -0.1730]) tensor([0.0405, 0.1803, 0.5433, 0.2359]) -Greedy action tensor([-1.9002, -0.4459, 0.6466, -0.1588]) tensor([0.0421, 0.1803, 0.5374, 0.2402]) -Greedy action tensor([-1.8312, -0.2203, 0.5778, -0.0963]) tensor([0.0439, 0.2196, 0.4879, 0.2486]) -Greedy action tensor([-0.7085, 0.8583, 0.2061, 0.3918]) tensor([0.0886, 0.4243, 0.2210, 0.2661]) -Greedy action tensor([-1.8434, -0.4618, 0.6165, -0.1364]) tensor([0.0451, 0.1794, 0.5273, 0.2483]) -Greedy action tensor([-1.6159, -0.5279, 0.4512, -0.0736]) tensor([0.0604, 0.1794, 0.4776, 0.2826]) -Greedy action tensor([-1.8876, -0.3590, 0.6385, -0.1500]) tensor([0.0420, 0.1938, 0.5254, 0.2388]) -Greedy action tensor([-1.7724, -0.3403, 0.5735, -0.1354]) tensor([0.0481, 0.2016, 0.5028, 0.2475]) -Greedy action tensor([-1.9308, -0.4391, 0.6616, -0.1723]) tensor([0.0406, 0.1806, 0.5429, 0.2358]) -Greedy action tensor([-0.1114, -0.4666, 0.9962, 1.6536]) tensor([0.0946, 0.0663, 0.2864, 0.5527]) -Greedy action tensor([-1.6848, -0.3600, 0.6189, -0.0432]) tensor([0.0502, 0.1887, 0.5022, 0.2590]) -Greedy action tensor([-1.4903, -0.7149, 0.9075, 0.1069]) tensor([0.0523, 0.1136, 0.5756, 0.2585]) -Greedy action tensor([-1.7247, -0.5139, 0.5290, -0.0571]) tensor([0.0521, 0.1750, 0.4966, 0.2763]) -Greedy action tensor([-1.8842, -0.3583, 0.6304, -0.1347]) tensor([0.0422, 0.1940, 0.5213, 0.2426]) -Greedy action tensor([-1.8768, -0.4816, 0.6291, -0.1474]) tensor([0.0436, 0.1760, 0.5345, 0.2459]) -Greedy action tensor([-1.8591, -0.4630, 0.6330, -0.1334]) tensor([0.0440, 0.1776, 0.5315, 0.2470]) -Greedy action tensor([-1.9299, -0.3991, 0.6517, -0.1707]) tensor([0.0406, 0.1875, 0.5363, 0.2356]) -Greedy action tensor([-1.9413, -0.4408, 0.6646, -0.1788]) tensor([0.0402, 0.1804, 0.5449, 0.2344]) -Greedy action tensor([-1.8204, -0.3936, 0.5862, -0.1131]) tensor([0.0459, 0.1913, 0.5096, 0.2532]) -Greedy action tensor([-0.8953, 0.7590, 0.1650, -0.0326]) tensor([0.0871, 0.4553, 0.2514, 0.2063]) -Greedy action tensor([-1.8235, -0.4612, 0.6076, -0.1159]) tensor([0.0459, 0.1792, 0.5218, 0.2531]) -Greedy action tensor([-1.8498, -0.4736, 0.6400, -0.1360]) tensor([0.0443, 0.1754, 0.5343, 0.2459]) -Greedy action tensor([-1.8245, -0.3550, 0.6122, -0.0937]) tensor([0.0446, 0.1938, 0.5099, 0.2517]) -Greedy action tensor([-1.4049, 0.7107, 0.2423, 0.2413]) tensor([0.0508, 0.4216, 0.2639, 0.2637]) -Greedy action tensor([-1.0082, 0.3348, 0.1060, 0.3424]) tensor([0.0852, 0.3264, 0.2596, 0.3288]) -Greedy action tensor([-1.3024, -0.5051, 0.2947, 0.2594]) tensor([0.0774, 0.1717, 0.3821, 0.3688]) -Greedy action tensor([-1.2115, -0.2345, 0.3044, -0.2955]) tensor([0.0934, 0.2481, 0.4252, 0.2334]) -Greedy action tensor([-1.6919, -0.5940, 0.6511, -0.1489]) tensor([0.0524, 0.1571, 0.5455, 0.2451]) -Greedy action tensor([-1.9118, -0.4485, 0.6542, -0.1676]) tensor([0.0416, 0.1796, 0.5410, 0.2378]) -Greedy action tensor([-1.8929, -0.3935, 0.6404, -0.1539]) tensor([0.0421, 0.1885, 0.5300, 0.2395]) -Greedy action tensor([-1.9364, -0.4319, 0.6618, -0.1735]) tensor([0.0404, 0.1817, 0.5426, 0.2353]) -Greedy action tensor([-1.8939, -0.3886, 0.6475, -0.1461]) tensor([0.0418, 0.1882, 0.5303, 0.2398]) -Greedy action tensor([-1.8228, -0.4467, 0.6101, -0.1162]) tensor([0.0457, 0.1811, 0.5211, 0.2520]) -Greedy action tensor([-1.9228, -0.4145, 0.6494, -0.1677]) tensor([0.0410, 0.1852, 0.5367, 0.2371]) -Greedy action tensor([-1.8310, -0.3427, 0.6330, -0.0788]) tensor([0.0436, 0.1930, 0.5121, 0.2513]) -Greedy action tensor([-1.9447, -0.4488, 0.6681, -0.1799]) tensor([0.0401, 0.1790, 0.5468, 0.2342]) -Greedy action tensor([-1.9195, -0.4076, 0.6506, -0.1641]) tensor([0.0410, 0.1860, 0.5358, 0.2372]) -Greedy action tensor([-0.4191, 0.7936, 0.0633, 0.4980]) tensor([0.1179, 0.3963, 0.1909, 0.2949]) -Greedy action tensor([-1.8669, -0.2667, 0.6054, -0.1212]) tensor([0.0425, 0.2105, 0.5035, 0.2435]) -Greedy action tensor([-1.9445, -0.4470, 0.6675, -0.1809]) tensor([0.0401, 0.1793, 0.5466, 0.2340]) -Greedy action tensor([-0.9971, -0.4738, 0.2641, 0.2874]) tensor([0.1017, 0.1717, 0.3591, 0.3675]) -Greedy action tensor([-1.0330, -0.4244, 0.4162, 0.5294]) tensor([0.0843, 0.1549, 0.3589, 0.4019]) -Greedy action tensor([-1.9206, -0.4481, 0.6580, -0.1705]) tensor([0.0412, 0.1795, 0.5425, 0.2369]) -Greedy action tensor([-1.8615, -0.1012, 0.5861, -0.1730]) tensor([0.0420, 0.2444, 0.4860, 0.2275]) -Greedy action tensor([-1.1157, 0.3018, 0.1806, 0.1221]) tensor([0.0818, 0.3374, 0.2989, 0.2819]) -Greedy action tensor([ 1.3148, -0.2690, -0.9169, 0.5237]) tensor([0.5663, 0.1162, 0.0608, 0.2567]) -Greedy action tensor([ 2.0687, -0.9784, -0.1459, 0.6274]) tensor([0.7177, 0.0341, 0.0784, 0.1698]) -Greedy action tensor([ 1.2800, -0.0464, -0.7656, 0.1408]) tensor([0.5831, 0.1548, 0.0754, 0.1867]) -Greedy action tensor([ 1.3536, 0.1130, -0.8963, 0.6018]) tensor([0.5359, 0.1550, 0.0565, 0.2527]) -Greedy action tensor([ 1.7522, -0.6399, -0.0156, 1.0640]) tensor([0.5667, 0.0518, 0.0967, 0.2848]) -Greedy action tensor([ 1.2317, -0.3814, -0.2092, 0.1471]) tensor([0.5637, 0.1123, 0.1334, 0.1905]) -Greedy action tensor([ 1.1543, 0.0705, -0.9430, 0.6431]) tensor([0.4852, 0.1642, 0.0596, 0.2910]) -Greedy action tensor([ 1.3608, -0.0034, -0.6267, 0.6641]) tensor([0.5289, 0.1352, 0.0725, 0.2635]) -Greedy action tensor([ 1.4016, -0.4068, -0.5574, 0.2714]) tensor([0.6143, 0.1007, 0.0866, 0.1984]) -Greedy action tensor([ 2.5943, -1.2492, -0.3166, 1.0816]) tensor([0.7715, 0.0165, 0.0420, 0.1700]) -Greedy action tensor([ 1.2981e+00, 1.7871e-01, -2.1496e-01, -3.9837e-04]) tensor([0.5495, 0.1794, 0.1210, 0.1500]) -Greedy action tensor([ 1.7457, -0.9744, -0.1893, 0.5614]) tensor([0.6595, 0.0434, 0.0953, 0.2018]) -Greedy action tensor([ 2.0114, -0.4744, -0.2743, 0.3337]) tensor([0.7290, 0.0607, 0.0741, 0.1362]) -Greedy action tensor([ 1.7109, -0.2908, 0.0076, 0.2441]) tensor([0.6461, 0.0873, 0.1176, 0.1490]) -Greedy action tensor([ 1.7340, -0.1740, -0.8632, 0.0999]) tensor([0.7052, 0.1046, 0.0525, 0.1376]) -Greedy action tensor([ 1.0712, -0.2377, -0.4019, 0.1149]) tensor([0.5309, 0.1434, 0.1217, 0.2040]) -Greedy action tensor([ 1.0445, -0.6002, -0.1680, 0.1972]) tensor([0.5211, 0.1006, 0.1550, 0.2233]) -Greedy action tensor([ 0.7021, -0.0739, -0.1400, 0.1453]) tensor([0.4058, 0.1868, 0.1748, 0.2326]) -Greedy action tensor([ 1.6839, -0.7948, -0.1048, 0.1231]) tensor([0.6845, 0.0574, 0.1144, 0.1437]) -Greedy action tensor([ 1.8891, -1.0942, -0.1367, 0.5638]) tensor([0.6905, 0.0350, 0.0911, 0.1835]) -Greedy action tensor([0.8662, 0.1100, 0.0642, 0.2883]) tensor([0.4034, 0.1894, 0.1809, 0.2263]) -Greedy action tensor([ 1.1141, -0.3225, -0.0457, 0.0915]) tensor([0.5233, 0.1244, 0.1641, 0.1882]) -Greedy action tensor([ 1.5389, -0.7010, -0.1165, 0.0856]) tensor([0.6530, 0.0695, 0.1247, 0.1527]) -Greedy action tensor([ 1.8196, -1.2179, 0.1113, 0.1611]) tensor([0.7045, 0.0338, 0.1276, 0.1341]) -Greedy action tensor([ 1.4675, -0.7269, -0.5122, 0.3387]) tensor([0.6357, 0.0708, 0.0878, 0.2056]) -Greedy action tensor([ 1.6981, -0.4452, -0.1423, 0.2752]) tensor([0.6592, 0.0773, 0.1046, 0.1589]) -Greedy action tensor([ 2.0919, -1.2900, 0.0383, 0.8362]) tensor([0.6910, 0.0235, 0.0886, 0.1969]) -Greedy action tensor([ 1.6492, -0.7939, -0.3824, 0.3807]) tensor([0.6670, 0.0580, 0.0875, 0.1876]) -Greedy action tensor([ 0.7835, -0.0834, -0.1943, 0.1718]) tensor([0.4276, 0.1797, 0.1608, 0.2319]) -Greedy action tensor([ 1.0391, -0.1389, -0.1103, -0.1253]) tensor([0.5163, 0.1590, 0.1636, 0.1611]) -Greedy action tensor([ 0.9347, -0.8135, -0.3732, -0.3276]) tensor([0.5789, 0.1008, 0.1565, 0.1638]) -Greedy action tensor([ 0.7312, -0.5688, -0.2061, 0.2152]) tensor([0.4423, 0.1205, 0.1732, 0.2640]) -Greedy action tensor([ 2.9652, -2.0077, 0.2737, -0.1481]) tensor([0.8935, 0.0062, 0.0606, 0.0397]) -Greedy action tensor([ 1.8481, -1.2139, -0.0870, 0.9417]) tensor([0.6269, 0.0293, 0.0905, 0.2533]) -Greedy action tensor([ 1.4829, -0.2979, -0.6455, 0.0823]) tensor([0.6519, 0.1098, 0.0776, 0.1607]) -Greedy action tensor([ 1.6063, -0.2594, -0.8174, -0.2013]) tensor([0.7105, 0.1100, 0.0629, 0.1166]) -Greedy action tensor([ 1.5866, 0.0139, -0.7605, 0.8961]) tensor([0.5542, 0.1150, 0.0530, 0.2778]) -Greedy action tensor([ 1.1774, -0.7767, 0.0969, 0.0580]) tensor([0.5532, 0.0784, 0.1878, 0.1806]) -Greedy action tensor([ 1.3073, -0.3490, -0.7043, 0.1867]) tensor([0.6058, 0.1156, 0.0810, 0.1975]) -Greedy action tensor([ 2.2670, -1.1404, -0.1222, 0.3704]) tensor([0.7844, 0.0260, 0.0719, 0.1177]) -Greedy action tensor([ 1.6177, -0.5050, -0.7501, 0.4069]) tensor([0.6617, 0.0792, 0.0620, 0.1971]) -Greedy action tensor([ 1.3763, -0.4553, -0.4600, 0.4029]) tensor([0.5892, 0.0944, 0.0939, 0.2226]) -Greedy action tensor([ 1.0126, -0.6793, 0.0554, -0.0351]) tensor([0.5211, 0.0960, 0.2001, 0.1828]) -Greedy action tensor([ 1.4418, 0.0340, -0.1587, 0.4992]) tensor([0.5446, 0.1333, 0.1099, 0.2122]) -Greedy action tensor([ 1.5043, -0.4252, -0.3977, 0.3120]) tensor([0.6258, 0.0909, 0.0934, 0.1899]) -Greedy action tensor([ 1.8752, 0.4557, -0.2837, 0.3244]) tensor([0.6372, 0.1541, 0.0736, 0.1351]) -Greedy action tensor([ 1.5625, -0.5972, -0.2283, 0.2627]) tensor([0.6432, 0.0742, 0.1073, 0.1753]) -Greedy action tensor([ 1.9966, 0.4405, -0.1339, 0.4406]) tensor([0.6491, 0.1369, 0.0771, 0.1369]) -Greedy action tensor([ 1.4297, -0.5521, -0.6025, 0.1503]) tensor([0.6464, 0.0891, 0.0847, 0.1798]) -Greedy action tensor([ 1.5861, -0.4270, -0.4316, 0.0026]) tensor([0.6795, 0.0908, 0.0903, 0.1395]) -Greedy action tensor([ 2.0113, -0.3573, -0.9519, 0.5019]) tensor([0.7319, 0.0685, 0.0378, 0.1618]) -Greedy action tensor([ 0.8918, -0.4423, 0.2502, 0.0319]) tensor([0.4519, 0.1190, 0.2379, 0.1912]) -Greedy action tensor([ 2.4198, -1.6881, -0.2913, 0.3808]) tensor([0.8244, 0.0136, 0.0548, 0.1073]) -Greedy action tensor([ 1.5973, -0.4947, -0.6053, 0.3705]) tensor([0.6548, 0.0808, 0.0724, 0.1920]) -Greedy action tensor([ 1.7650, -0.6599, -0.6126, 0.4511]) tensor([0.6896, 0.0610, 0.0640, 0.1854]) -Greedy action tensor([1.4567, 0.5437, 0.0290, 0.3383]) tensor([0.5081, 0.2039, 0.1219, 0.1661]) -Greedy action tensor([ 1.6702, -0.3694, -0.2589, 0.3839]) tensor([0.6445, 0.0838, 0.0936, 0.1781]) -Greedy action tensor([ 1.2162, -0.2453, -0.6238, 0.4915]) tensor([0.5333, 0.1237, 0.0847, 0.2584]) -Greedy action tensor([ 1.3436, -0.5171, -0.2920, 0.3601]) tensor([0.5799, 0.0902, 0.1130, 0.2169]) -Greedy action tensor([ 1.9579, -0.6454, -0.0725, 0.4068]) tensor([0.7055, 0.0522, 0.0926, 0.1496]) -Greedy action tensor([ 1.6529, -0.4502, -0.9566, 0.2463]) tensor([0.6941, 0.0847, 0.0511, 0.1701]) -Greedy action tensor([ 1.5052, -0.6222, -0.3635, 0.7073]) tensor([0.5801, 0.0691, 0.0895, 0.2612]) -Greedy action tensor([ 0.9839, -0.2281, -0.3076, 0.1485]) tensor([0.4984, 0.1483, 0.1370, 0.2162]) -Greedy action tensor([ 1.3303, -0.6674, -0.1565, 0.0414]) tensor([0.6107, 0.0828, 0.1381, 0.1683]) -Greedy action tensor([ 1.2357, -0.3363, -0.8476, 0.2800]) tensor([0.5825, 0.1209, 0.0725, 0.2240]) -Greedy action tensor([ 1.3143, -0.3082, -0.4712, 0.4723]) tensor([0.5568, 0.1099, 0.0934, 0.2399]) -Greedy action tensor([ 1.4449, -0.0350, -0.8617, 0.7011]) tensor([0.5548, 0.1263, 0.0553, 0.2637]) -Greedy action tensor([ 1.4253, -0.7760, -0.3008, 0.3391]) tensor([0.6150, 0.0681, 0.1095, 0.2075]) -Greedy action tensor([ 1.3194, -0.0069, -0.5908, 0.3364]) tensor([0.5594, 0.1485, 0.0828, 0.2093]) -Greedy action tensor([ 1.9687, -0.6079, -0.2862, 0.1426]) tensor([0.7452, 0.0567, 0.0782, 0.1200]) -Greedy action tensor([ 1.6158, -0.6412, -0.6588, 0.2016]) tensor([0.6894, 0.0722, 0.0709, 0.1676]) -Greedy action tensor([ 1.2145, -0.7739, 0.0360, 0.3116]) tensor([0.5405, 0.0740, 0.1663, 0.2191]) -Greedy action tensor([ 1.6002, -0.0781, -0.4839, 0.0122]) tensor([0.6599, 0.1232, 0.0821, 0.1348]) -Greedy action tensor([ 1.5205, 0.0510, -0.4860, 0.1416]) tensor([0.6187, 0.1423, 0.0832, 0.1558]) -Greedy action tensor([ 1.3294, -0.4724, -0.6173, 0.5589]) tensor([0.5648, 0.0932, 0.0806, 0.2614]) -Greedy action tensor([ 1.6043, -0.2801, -0.6251, 0.3241]) tensor([0.6504, 0.0988, 0.0700, 0.1808]) -Greedy action tensor([ 1.6051, -0.8183, -0.1168, 0.3641]) tensor([0.6425, 0.0569, 0.1148, 0.1857]) -Greedy action tensor([ 1.5406, -0.4256, -0.4784, 0.4186]) tensor([0.6256, 0.0876, 0.0831, 0.2037]) -Greedy action tensor([ 1.7021, 0.1152, -0.6270, 0.2134]) tensor([0.6546, 0.1339, 0.0637, 0.1477]) -Greedy action tensor([ 1.9030, -0.5722, -0.1087, 0.5187]) tensor([0.6810, 0.0573, 0.0911, 0.1706]) -Greedy action tensor([ 1.5521, -0.2179, -0.6655, 0.3420]) tensor([0.6340, 0.1080, 0.0690, 0.1890]) -Greedy action tensor([ 0.6640, -0.2107, -0.6105, 0.3000]) tensor([0.4182, 0.1744, 0.1169, 0.2906]) -Greedy action tensor([ 0.1422, -1.4766, 0.5060, 0.1661]) tensor([0.2731, 0.0541, 0.3930, 0.2797]) -Greedy action tensor([-0.4474, -0.1483, 0.0863, -0.0329]) tensor([0.1796, 0.2422, 0.3063, 0.2719]) -Greedy action tensor([-0.6716, -0.3034, 0.3203, -1.3611]) tensor([0.1772, 0.2561, 0.4778, 0.0889]) -Greedy action tensor([ 0.0223, -0.1888, -0.2224, 0.9702]) tensor([0.1933, 0.1565, 0.1514, 0.4988]) -Greedy action tensor([-0.5898, 0.0839, 0.8292, -1.0431]) tensor([0.1294, 0.2538, 0.5347, 0.0822]) -Greedy action tensor([ 1.3613, -0.7037, -0.3500, 0.4966]) tensor([0.5785, 0.0734, 0.1045, 0.2436]) -Greedy action tensor([-0.9861, -0.6687, -0.9907, 0.5179]) tensor([0.1271, 0.1746, 0.1265, 0.5718]) -Greedy action tensor([ 0.9937, -0.4828, 0.1217, 0.4024]) tensor([0.4545, 0.1038, 0.1900, 0.2516]) -Greedy action tensor([ 0.5762, -0.0062, 1.1187, -0.7644]) tensor([0.2824, 0.1578, 0.4859, 0.0739]) -Greedy action tensor([-0.4290, -0.7620, -0.2177, -0.1874]) tensor([0.2367, 0.1696, 0.2924, 0.3013]) -Greedy action tensor([-0.1044, -0.1514, 1.0334, -0.6487]) tensor([0.1769, 0.1687, 0.5518, 0.1026]) -Greedy action tensor([-0.3677, 0.0597, 1.1950, -0.4287]) tensor([0.1213, 0.1859, 0.5787, 0.1141]) -Greedy action tensor([ 0.2304, -1.2118, 0.2663, 0.5334]) tensor([0.2757, 0.0652, 0.2858, 0.3733]) -Greedy action tensor([-0.1307, -0.9963, 0.8065, -0.0829]) tensor([0.1991, 0.0838, 0.5083, 0.2088]) -Greedy action tensor([ 0.3686, -1.2494, 0.2969, -0.8824]) tensor([0.4140, 0.0821, 0.3854, 0.1185]) -Greedy action tensor([ 0.5307, -0.6478, 0.4068, -0.1639]) tensor([0.3717, 0.1144, 0.3284, 0.1856]) -Greedy action tensor([-1.3382, -0.7007, 0.6580, -0.6149]) tensor([0.0812, 0.1536, 0.5978, 0.1674]) -Greedy action tensor([ 1.4825, -1.0780, -0.0619, 0.3163]) tensor([0.6241, 0.0482, 0.1332, 0.1944]) -Greedy action tensor([-0.6919, -0.4629, -0.7253, -0.0414]) tensor([0.1945, 0.2446, 0.1881, 0.3728]) -Greedy action tensor([-0.1552, -1.8810, 0.8746, 0.3926]) tensor([0.1752, 0.0312, 0.4906, 0.3030]) -Greedy action tensor([ 0.2874, -0.1677, -0.1227, -0.9750]) tensor([0.3874, 0.2458, 0.2571, 0.1096]) -Greedy action tensor([ 0.3090, -0.2140, -0.6500, -0.0805]) tensor([0.3769, 0.2234, 0.1444, 0.2553]) -Greedy action tensor([ 1.4637, -0.3460, 0.2569, 0.4967]) tensor([0.5426, 0.0888, 0.1623, 0.2063]) -Greedy action tensor([ 0.5097, -1.6259, -1.1203, 0.4310]) tensor([0.4467, 0.0528, 0.0875, 0.4130]) -Greedy action tensor([-0.3300, -0.8888, 0.7446, -0.4823]) tensor([0.1866, 0.1067, 0.5465, 0.1602]) -Greedy action tensor([ 0.5180, 0.3276, -0.6203, -0.1913]) tensor([0.3789, 0.3132, 0.1214, 0.1864]) -Greedy action tensor([-1.2543, -0.8441, 0.2736, -0.6389]) tensor([0.1115, 0.1681, 0.5140, 0.2064]) -Greedy action tensor([-0.0888, -0.6711, 0.1202, -0.2805]) tensor([0.2765, 0.1545, 0.3408, 0.2283]) -Greedy action tensor([1.4471, 0.1892, 0.1347, 0.0824]) tensor([0.5528, 0.1571, 0.1488, 0.1412]) -Greedy action tensor([-0.4485, -0.6248, -0.8839, -0.0548]) tensor([0.2520, 0.2113, 0.1631, 0.3736]) -Greedy action tensor([-0.1794, -1.2398, -0.0404, 0.0651]) tensor([0.2651, 0.0918, 0.3046, 0.3385]) -Greedy action tensor([ 0.7085, -0.4638, 0.1260, 0.2964]) tensor([0.3952, 0.1224, 0.2207, 0.2617]) -Greedy action tensor([ 0.2232, -1.2027, 0.2526, 0.6033]) tensor([0.2679, 0.0644, 0.2759, 0.3918]) -Greedy action tensor([-0.6675, -0.0107, -0.1745, -0.2536]) tensor([0.1645, 0.3173, 0.2694, 0.2489]) -Greedy action tensor([-1.4703, -0.5074, 0.6505, 0.0445]) tensor([0.0606, 0.1587, 0.5052, 0.2756]) -Greedy action tensor([-0.1409, -1.2280, 0.4790, -1.1074]) tensor([0.2796, 0.0943, 0.5197, 0.1064]) -Greedy action tensor([-0.1712, -0.6686, 0.3568, -0.3547]) tensor([0.2418, 0.1470, 0.4099, 0.2012]) -Greedy action tensor([-0.2250, -0.2026, 0.4590, 0.3774]) tensor([0.1715, 0.1754, 0.3399, 0.3133]) -Greedy action tensor([ 0.1110, -0.4881, -0.3747, 0.5391]) tensor([0.2704, 0.1485, 0.1663, 0.4148]) -Greedy action tensor([-0.3890, -0.8740, -0.3691, -0.0694]) tensor([0.2492, 0.1534, 0.2542, 0.3431]) -Greedy action tensor([ 1.0519, -1.0477, 0.8073, 0.5210]) tensor([0.4010, 0.0491, 0.3140, 0.2358]) -Greedy action tensor([-0.3846, 1.0101, -0.5523, -0.5882]) tensor([0.1494, 0.6025, 0.1263, 0.1218]) -Greedy action tensor([ 0.0238, -0.5034, 1.1995, -0.4138]) tensor([0.1826, 0.1078, 0.5917, 0.1179]) -Greedy action tensor([-1.0632, -1.1242, 1.1921, -0.7874]) tensor([0.0781, 0.0735, 0.7454, 0.1030]) -Greedy action tensor([-1.1112e+00, -1.2614e+00, -9.6418e-05, -2.7215e-01]) tensor([0.1387, 0.1193, 0.4212, 0.3209]) -Greedy action tensor([ 0.8115, -0.1990, 0.3478, 0.6572]) tensor([0.3509, 0.1277, 0.2207, 0.3007]) -Greedy action tensor([ 0.3074, -0.4832, 0.0302, 0.0139]) tensor([0.3382, 0.1534, 0.2563, 0.2522]) -Greedy action tensor([-0.4878, -0.4644, 0.4784, 0.5137]) tensor([0.1356, 0.1388, 0.3564, 0.3692]) -Greedy action tensor([ 0.3351, -1.1702, 0.0677, 0.2507]) tensor([0.3441, 0.0764, 0.2634, 0.3162]) -Greedy action tensor([-0.2357, -1.0457, 0.0445, 0.4516]) tensor([0.2102, 0.0935, 0.2782, 0.4180]) -Greedy action tensor([ 0.6012, -0.0202, 0.2719, 1.4753]) tensor([0.2149, 0.1154, 0.1546, 0.5151]) -Greedy action tensor([ 0.7785, -0.0994, 0.2615, -0.2682]) tensor([0.4232, 0.1759, 0.2523, 0.1486]) -Greedy action tensor([-0.0675, 0.0257, 0.0332, -0.0249]) tensor([0.2355, 0.2585, 0.2604, 0.2457]) -Greedy action tensor([-0.4221, -0.3166, 0.9336, -0.5127]) tensor([0.1448, 0.1610, 0.5619, 0.1323]) -Greedy action tensor([-0.1380, 0.1340, -0.8666, 0.0262]) tensor([0.2517, 0.3303, 0.1215, 0.2966]) -Greedy action tensor([ 0.1542, -0.2561, 0.5036, -0.3670]) tensor([0.2721, 0.1805, 0.3858, 0.1616]) -Greedy action tensor([-0.2923, -0.3304, 1.2279, -1.1317]) tensor([0.1435, 0.1381, 0.6563, 0.0620]) -Greedy action tensor([ 0.0385, -1.0178, 0.2846, -0.2660]) tensor([0.2972, 0.1034, 0.3802, 0.2192]) -Greedy action tensor([ 0.7150, -0.4733, -0.3881, -0.4285]) tensor([0.5114, 0.1559, 0.1697, 0.1630]) -Greedy action tensor([-0.0593, 0.1174, -0.3979, 0.6846]) tensor([0.1996, 0.2382, 0.1423, 0.4200]) -Greedy action tensor([-0.0176, -0.2162, 0.5305, 0.0753]) tensor([0.2152, 0.1764, 0.3723, 0.2361]) -Greedy action tensor([ 0.5987, -1.6161, -0.4104, 1.2126]) tensor([0.3011, 0.0329, 0.1098, 0.5563]) -Greedy action tensor([ 0.0334, -1.1465, -0.0496, 0.6004]) tensor([0.2506, 0.0770, 0.2306, 0.4418]) -Greedy action tensor([ 0.0396, 0.0043, -0.1852, -0.3778]) tensor([0.2922, 0.2820, 0.2333, 0.1925]) -Greedy action tensor([ 0.9336, -0.8047, 1.0659, 0.4252]) tensor([0.3426, 0.0602, 0.3911, 0.2061]) -Greedy action tensor([ 0.8888, -1.4834, -0.0108, -0.3279]) tensor([0.5567, 0.0519, 0.2264, 0.1649]) -Greedy action tensor([ 0.3682, -0.7197, -0.6000, 0.2784]) tensor([0.3801, 0.1281, 0.1444, 0.3475]) -Greedy action tensor([ 0.0786, -1.2782, -0.6301, 0.7157]) tensor([0.2747, 0.0707, 0.1352, 0.5194]) -Greedy action tensor([-0.2016, -1.3162, -0.8908, 0.6024]) tensor([0.2460, 0.0807, 0.1235, 0.5498]) -Greedy action tensor([ 0.1576, -0.9446, 0.0174, -0.0939]) tensor([0.3357, 0.1115, 0.2918, 0.2610]) -Greedy action tensor([ 0.9844, -1.1766, -0.0383, 0.0875]) tensor([0.5312, 0.0612, 0.1910, 0.2166]) -Greedy action tensor([-0.2538, -1.6782, -0.4792, -0.1352]) tensor([0.3160, 0.0760, 0.2522, 0.3558]) -Greedy action tensor([-1.0014, -0.7272, 1.0311, -0.3131]) tensor([0.0838, 0.1102, 0.6394, 0.1667]) -Greedy action tensor([ 0.7582, -0.7465, 0.0437, 0.8240]) tensor([0.3598, 0.0799, 0.1761, 0.3842]) -Greedy action tensor([-0.3444, -0.3870, -0.8690, -0.5643]) tensor([0.2983, 0.2858, 0.1765, 0.2394]) -Greedy action tensor([-0.6892, -0.7815, 0.1342, -0.5979]) tensor([0.1892, 0.1725, 0.4310, 0.2073]) -Greedy action tensor([ 0.0870, 0.6055, -0.9758, -1.0162]) tensor([0.2979, 0.5003, 0.1029, 0.0988]) -Greedy action tensor([-1.0226, -0.6501, 0.3138, -0.8621]) tensor([0.1346, 0.1953, 0.5121, 0.1580]) -Greedy action tensor([-0.0164, -0.7505, -0.1382, -0.1975]) tensor([0.3125, 0.1500, 0.2767, 0.2608]) -Greedy action tensor([ 0.5993, -0.2427, -0.2601, 0.2112]) tensor([0.3948, 0.1701, 0.1672, 0.2679]) -Greedy action tensor([ 0.5234, 0.1259, -0.1280, -0.2607]) tensor([0.3774, 0.2536, 0.1967, 0.1723]) -Greedy action tensor([ 0.8671, -0.4770, -0.0951, -0.3849]) tensor([0.5185, 0.1352, 0.1981, 0.1482]) -Greedy action tensor([ 0.8356, -0.7126, 0.0985, -0.8234]) tensor([0.5315, 0.1130, 0.2543, 0.1012]) -Greedy action tensor([ 0.8709, -0.4901, -0.0515, -0.2414]) tensor([0.5043, 0.1293, 0.2005, 0.1658]) -Greedy action tensor([ 0.7055, -0.2522, -0.0447, -0.1635]) tensor([0.4395, 0.1687, 0.2076, 0.1843]) -Greedy action tensor([ 0.6439, -0.4040, -0.1050, -0.3945]) tensor([0.4592, 0.1610, 0.2172, 0.1626]) -Greedy action tensor([ 0.8450, -0.7340, 0.0528, -0.5517]) tensor([0.5245, 0.1081, 0.2375, 0.1298]) -Greedy action tensor([ 0.7303, -0.0548, -0.2205, -0.4070]) tensor([0.4623, 0.2108, 0.1786, 0.1482]) -Greedy action tensor([ 1.0093, -0.7665, 0.1981, -0.5530]) tensor([0.5484, 0.0929, 0.2437, 0.1150]) -Greedy action tensor([ 1.1695, -0.7241, -0.0860, -0.4323]) tensor([0.6109, 0.0920, 0.1741, 0.1231]) -Greedy action tensor([ 0.7305, -0.4542, -0.0288, -0.3675]) tensor([0.4745, 0.1451, 0.2221, 0.1583]) -Greedy action tensor([ 0.8618, -0.4136, 0.0953, -0.4450]) tensor([0.4964, 0.1386, 0.2306, 0.1344]) -Greedy action tensor([ 0.7742, -0.2285, -0.0414, -0.5224]) tensor([0.4801, 0.1762, 0.2124, 0.1313]) -Greedy action tensor([ 0.5860, -0.3329, 0.0308, -0.1424]) tensor([0.4072, 0.1625, 0.2337, 0.1966]) -Greedy action tensor([ 0.7641, -0.4171, -0.0320, -0.2986]) tensor([0.4754, 0.1459, 0.2144, 0.1643]) -Greedy action tensor([ 0.8814, -0.8202, 0.1121, -0.4120]) tensor([0.5208, 0.0950, 0.2413, 0.1429]) -Greedy action tensor([ 0.5101, -0.1274, -0.1481, 0.1679]) tensor([0.3628, 0.1918, 0.1878, 0.2576]) -Greedy action tensor([ 0.8753, -0.6486, 0.0274, -0.3574]) tensor([0.5161, 0.1124, 0.2210, 0.1504]) -Greedy action tensor([ 0.8221, -0.1851, -0.0124, -0.4171]) tensor([0.4787, 0.1748, 0.2078, 0.1386]) -Greedy action tensor([ 0.7323, -0.5669, -0.0668, -0.4958]) tensor([0.4962, 0.1353, 0.2231, 0.1453]) -Greedy action tensor([ 0.8216, -0.3704, 0.0107, -0.3995]) tensor([0.4895, 0.1486, 0.2176, 0.1444]) -Greedy action tensor([ 0.1374, 0.5524, -0.1960, 0.1184]) tensor([0.2374, 0.3595, 0.1701, 0.2330]) -Greedy action tensor([ 0.6472, -0.4312, -0.0554, -0.2481]) tensor([0.4456, 0.1516, 0.2207, 0.1820]) -Greedy action tensor([ 0.6125, -0.4385, -0.1584, -0.4234]) tensor([0.4615, 0.1613, 0.2135, 0.1638]) -Greedy action tensor([ 0.4776, -0.3970, -0.0785, -0.1400]) tensor([0.3953, 0.1649, 0.2267, 0.2132]) -Greedy action tensor([ 0.8082, -0.2492, -0.0714, -0.1300]) tensor([0.4643, 0.1613, 0.1927, 0.1817]) -Greedy action tensor([ 1.1752, -0.7308, -0.1725, -0.5612]) tensor([0.6311, 0.0938, 0.1640, 0.1112]) -Greedy action tensor([ 0.8095, -0.6481, -0.0600, -0.1470]) tensor([0.4911, 0.1143, 0.2059, 0.1887]) -Greedy action tensor([ 0.5701, -0.3365, -0.0454, -0.1974]) tensor([0.4152, 0.1677, 0.2244, 0.1927]) -Greedy action tensor([ 0.6493, 0.0401, -0.0563, -0.2148]) tensor([0.4067, 0.2211, 0.2008, 0.1714]) -Greedy action tensor([ 1.2055, -0.7950, 0.0248, -0.3945]) tensor([0.6082, 0.0823, 0.1868, 0.1228]) -Greedy action tensor([ 0.6978, -0.5668, 0.0799, -0.3296]) tensor([0.4589, 0.1296, 0.2474, 0.1642]) -Greedy action tensor([ 0.7643, -0.5659, -0.0743, -0.3621]) tensor([0.4948, 0.1308, 0.2139, 0.1604]) -Greedy action tensor([ 0.6372, -0.4810, -0.0701, -0.4770]) tensor([0.4655, 0.1522, 0.2295, 0.1528]) -Greedy action tensor([ 0.7250, -0.3532, -0.1734, -0.5047]) tensor([0.4903, 0.1668, 0.1996, 0.1433]) -Greedy action tensor([ 0.5204, -0.6200, -0.0732, -0.1248]) tensor([0.4173, 0.1334, 0.2305, 0.2189]) -Greedy action tensor([ 0.8633, -0.5256, 0.2704, -0.5762]) tensor([0.4904, 0.1223, 0.2711, 0.1162]) -Greedy action tensor([ 0.6427, -0.3973, -0.1347, -0.1014]) tensor([0.4370, 0.1545, 0.2008, 0.2077]) -Greedy action tensor([ 1.1062, -0.4593, -0.0982, -0.5040]) tensor([0.5852, 0.1223, 0.1755, 0.1170]) -Greedy action tensor([ 0.7449, -0.5595, -0.0196, -0.1835]) tensor([0.4690, 0.1273, 0.2184, 0.1853]) -Greedy action tensor([ 0.7587, -0.7285, -0.0624, -0.3995]) tensor([0.5050, 0.1141, 0.2222, 0.1586]) -Greedy action tensor([ 0.6866, -0.4509, -0.0946, -0.0568]) tensor([0.4437, 0.1423, 0.2031, 0.2109]) -Greedy action tensor([ 0.7990, -0.7358, 0.0911, -0.5898]) tensor([0.5108, 0.1101, 0.2517, 0.1274]) -Greedy action tensor([ 0.7176, -0.5823, 0.2421, -0.5794]) tensor([0.4614, 0.1258, 0.2868, 0.1261]) -Greedy action tensor([ 0.5967, -0.3928, -0.1099, -0.4064]) tensor([0.4481, 0.1666, 0.2210, 0.1643]) -Greedy action tensor([ 0.4342, -0.1971, -0.0468, -0.0824]) tensor([0.3641, 0.1937, 0.2251, 0.2172]) -Greedy action tensor([ 0.2927, 0.1445, -0.0272, -0.3945]) tensor([0.3235, 0.2789, 0.2349, 0.1627]) -Greedy action tensor([ 0.5744, -0.2388, -0.0275, -0.1585]) tensor([0.4046, 0.1794, 0.2216, 0.1944]) -Greedy action tensor([ 0.5336, 0.0464, -0.0764, -0.1674]) tensor([0.3768, 0.2315, 0.2047, 0.1869]) -Greedy action tensor([ 0.5394, -0.1134, 0.0213, 0.0121]) tensor([0.3695, 0.1924, 0.2201, 0.2181]) -Greedy action tensor([ 0.5617, -0.1228, -0.0194, -0.0376]) tensor([0.3827, 0.1930, 0.2141, 0.2102]) -Greedy action tensor([ 0.8667, -0.6790, -0.1318, -0.3296]) tensor([0.5308, 0.1132, 0.1956, 0.1605]) -Greedy action tensor([ 0.6398, -0.1636, -0.1541, -0.0412]) tensor([0.4156, 0.1861, 0.1879, 0.2103]) -Greedy action tensor([ 0.8242, -0.1249, -0.1502, -0.1628]) tensor([0.4679, 0.1811, 0.1766, 0.1744]) -Greedy action tensor([ 1.0826, -0.6001, 0.0439, -0.4795]) tensor([0.5716, 0.1062, 0.2023, 0.1199]) -Greedy action tensor([ 1.0582, -0.4212, -0.1531, -0.4172]) tensor([0.5700, 0.1298, 0.1698, 0.1304]) -Greedy action tensor([ 0.6505, -0.2156, -0.0960, 0.0526]) tensor([0.4091, 0.1720, 0.1939, 0.2250]) -Greedy action tensor([ 0.8422, -0.5402, 0.0604, -0.3627]) tensor([0.4979, 0.1250, 0.2279, 0.1492]) -Greedy action tensor([ 0.3155, 0.3316, -0.1661, 0.1867]) tensor([0.2846, 0.2893, 0.1758, 0.2502]) -Greedy action tensor([ 0.7216, -0.6568, 0.2381, -0.3355]) tensor([0.4513, 0.1137, 0.2783, 0.1568]) -Greedy action tensor([ 0.7407, -0.4615, 0.0479, -0.5196]) tensor([0.4798, 0.1442, 0.2400, 0.1360]) -Greedy action tensor([ 0.8629, -0.5847, -0.1828, -0.2712]) tensor([0.5240, 0.1232, 0.1842, 0.1686]) -Greedy action tensor([ 0.5429, -0.6767, -0.1123, -0.3645]) tensor([0.4508, 0.1331, 0.2341, 0.1819]) -Greedy action tensor([ 0.3476, -0.0105, 0.0077, -0.0289]) tensor([0.3229, 0.2257, 0.2298, 0.2216]) -Greedy action tensor([ 0.7565, -0.2627, 0.0298, -0.3633]) tensor([0.4607, 0.1662, 0.2227, 0.1503]) -Greedy action tensor([ 0.3682, 0.2611, 0.0024, -0.0844]) tensor([0.3098, 0.2783, 0.2149, 0.1970]) -Greedy action tensor([ 0.7739, -0.5066, 0.0095, -0.0458]) tensor([0.4579, 0.1272, 0.2132, 0.2017]) -Greedy action tensor([ 0.4235, -0.0153, -0.0013, -0.0019]) tensor([0.3387, 0.2184, 0.2215, 0.2214]) -Greedy action tensor([ 1.0159, 0.0442, -0.1176, -0.2180]) tensor([0.5021, 0.1900, 0.1616, 0.1462]) -Greedy action tensor([ 1.1195, -0.7140, 0.0190, -0.6260]) tensor([0.5998, 0.0959, 0.1996, 0.1047]) -Greedy action tensor([ 0.7429, -0.2310, 0.0130, -0.5502]) tensor([0.4686, 0.1770, 0.2259, 0.1286]) -Greedy action tensor([ 0.6562, -0.2872, -0.1025, -0.3178]) tensor([0.4474, 0.1742, 0.2095, 0.1689]) -Greedy action tensor([ 0.6386, -0.4054, -0.0472, -0.3103]) tensor([0.4459, 0.1570, 0.2246, 0.1726]) -Greedy action tensor([ 0.8176, -0.3117, 0.1152, -0.3492]) tensor([0.4695, 0.1518, 0.2326, 0.1462]) -Greedy action tensor([ 0.6532, -0.5250, -0.0934, -0.2960]) tensor([0.4611, 0.1419, 0.2185, 0.1785]) -Greedy action tensor([ 0.5214, -0.3961, -0.0040, -0.1804]) tensor([0.4022, 0.1607, 0.2378, 0.1993]) -Greedy action tensor([ 0.5173, -0.0208, -0.2381, -0.0688]) tensor([0.3831, 0.2237, 0.1800, 0.2132]) -Greedy action tensor([ 0.8257, -0.2598, -0.0479, -0.1263]) tensor([0.4670, 0.1577, 0.1950, 0.1803]) -Greedy action tensor([ 0.3463, 0.1387, -0.0932, -0.0915]) tensor([0.3223, 0.2619, 0.2077, 0.2080]) -Greedy action tensor([ 0.9705, -0.6113, -0.1385, -0.5423]) tensor([0.5695, 0.1171, 0.1879, 0.1255]) -Greedy action tensor([ 0.6186, -0.5218, 0.2150, -0.5177]) tensor([0.4332, 0.1385, 0.2893, 0.1390]) -Greedy action tensor([-1.7400, 0.3315, 0.5938, -0.3881]) tensor([0.0433, 0.3433, 0.4463, 0.1672]) -Greedy action tensor([-1.7423, 0.2098, 0.4979, -0.0896]) tensor([0.0441, 0.3108, 0.4146, 0.2304]) -Greedy action tensor([-1.9154, -0.4623, 0.6535, -0.1672]) tensor([0.0415, 0.1776, 0.5422, 0.2386]) -Greedy action tensor([-1.6410, 0.0645, 0.4611, -0.0797]) tensor([0.0514, 0.2830, 0.4207, 0.2449]) -Greedy action tensor([-1.9075, -0.3637, 0.6373, -0.1551]) tensor([0.0413, 0.1936, 0.5267, 0.2384]) -Greedy action tensor([-1.8988, -0.4261, 0.6415, -0.1562]) tensor([0.0421, 0.1836, 0.5339, 0.2404]) -Greedy action tensor([-1.4081, -0.4645, 0.4389, -0.3414]) tensor([0.0780, 0.2005, 0.4948, 0.2267]) -Greedy action tensor([-1.7355, -0.5237, 0.7710, 0.1284]) tensor([0.0433, 0.1456, 0.5315, 0.2795]) -Greedy action tensor([-1.6406, -0.1175, 0.4730, -0.0730]) tensor([0.0536, 0.2458, 0.4436, 0.2570]) -Greedy action tensor([-1.4691, -0.2777, 0.5763, 0.1414]) tensor([0.0587, 0.1933, 0.4541, 0.2939]) -Greedy action tensor([-0.9816, -0.3863, 0.4418, 0.3229]) tensor([0.0939, 0.1703, 0.3898, 0.3461]) -Greedy action tensor([-1.9431, -0.4484, 0.6664, -0.1796]) tensor([0.0402, 0.1792, 0.5462, 0.2344]) -Greedy action tensor([-1.8996, -0.3587, 0.6348, -0.1515]) tensor([0.0416, 0.1944, 0.5249, 0.2391]) -Greedy action tensor([-1.3536, 0.4315, 0.2600, 0.2224]) tensor([0.0595, 0.3544, 0.2986, 0.2875]) -Greedy action tensor([-1.9240, -0.4494, 0.6602, -0.1682]) tensor([0.0410, 0.1790, 0.5429, 0.2371]) -Greedy action tensor([-1.7075, -0.2511, 0.5125, -0.0890]) tensor([0.0512, 0.2195, 0.4711, 0.2582]) -Greedy action tensor([-1.9153, -0.3877, 0.6495, -0.1638]) tensor([0.0410, 0.1891, 0.5334, 0.2365]) -Greedy action tensor([-1.3531, -0.3253, 0.3696, 0.2036]) tensor([0.0707, 0.1977, 0.3961, 0.3355]) -Greedy action tensor([-1.8638, -0.4344, 0.7738, 0.0462]) tensor([0.0386, 0.1612, 0.5396, 0.2606]) -Greedy action tensor([-1.4041, -0.5572, 0.4182, 0.0798]) tensor([0.0718, 0.1675, 0.4441, 0.3166]) -Greedy action tensor([-1.9180, -0.4559, 0.6586, -0.1650]) tensor([0.0413, 0.1780, 0.5426, 0.2381]) -Greedy action tensor([-1.7931, -0.3796, 0.6598, -0.0448]) tensor([0.0445, 0.1829, 0.5171, 0.2556]) -Greedy action tensor([-1.9404, -0.4447, 0.6659, -0.1772]) tensor([0.0403, 0.1796, 0.5454, 0.2347]) -Greedy action tensor([-1.6105, -0.1876, 0.4837, -0.0774]) tensor([0.0559, 0.2318, 0.4536, 0.2588]) -Greedy action tensor([0.1559, 0.4985, 0.5275, 0.9421]) tensor([0.1652, 0.2327, 0.2395, 0.3626]) -Greedy action tensor([-1.6895, -0.5277, 0.5543, -0.1363]) tensor([0.0545, 0.1741, 0.5138, 0.2575]) -Greedy action tensor([-1.9450, -0.4504, 0.6682, -0.1801]) tensor([0.0401, 0.1787, 0.5470, 0.2342]) -Greedy action tensor([-1.7534, -0.4885, 0.7644, 0.1790]) tensor([0.0419, 0.1485, 0.5200, 0.2895]) -Greedy action tensor([-1.2497, 0.6977, 0.1596, 0.1758]) tensor([0.0615, 0.4311, 0.2517, 0.2558]) -Greedy action tensor([-0.8413, -0.4707, 0.4210, -0.3093]) tensor([0.1301, 0.1885, 0.4598, 0.2215]) -Greedy action tensor([-1.6151, -0.1832, 0.4596, -0.0292]) tensor([0.0555, 0.2322, 0.4415, 0.2708]) -Greedy action tensor([-1.2479, 0.8385, 0.2349, -0.6685]) tensor([0.0656, 0.5284, 0.2889, 0.1171]) -Greedy action tensor([-1.6796, 0.1132, 0.4764, 0.0085]) tensor([0.0475, 0.2853, 0.4103, 0.2569]) -Greedy action tensor([-1.8843, -0.4648, 0.6983, -0.0902]) tensor([0.0410, 0.1696, 0.5427, 0.2467]) -Greedy action tensor([-0.9467, 0.6278, 0.1367, 0.0453]) tensor([0.0871, 0.4206, 0.2574, 0.2349]) -Greedy action tensor([-0.8872, -0.5408, 0.9158, 0.9946]) tensor([0.0665, 0.0940, 0.4032, 0.4363]) -Greedy action tensor([-1.7860, -0.1380, 0.5580, -0.1537]) tensor([0.0460, 0.2391, 0.4795, 0.2354]) -Greedy action tensor([-1.7967, -0.4014, 0.6103, -0.0890]) tensor([0.0462, 0.1864, 0.5127, 0.2547]) -Greedy action tensor([-1.8812, -0.4373, 0.6316, -0.1592]) tensor([0.0432, 0.1828, 0.5325, 0.2415]) -Greedy action tensor([-1.8017, -0.4832, 0.7660, 0.1634]) tensor([0.0401, 0.1501, 0.5233, 0.2865]) -Greedy action tensor([0.5797, 0.3710, 0.6035, 1.2761]) tensor([0.2065, 0.1676, 0.2115, 0.4144]) -Greedy action tensor([-1.8869, -0.4044, 0.6320, -0.1583]) tensor([0.0426, 0.1878, 0.5294, 0.2402]) -Greedy action tensor([-1.9309, -0.4217, 0.6588, -0.1733]) tensor([0.0406, 0.1835, 0.5406, 0.2353]) -Greedy action tensor([-1.9034, -0.4113, 0.6427, -0.1568]) tensor([0.0418, 0.1857, 0.5329, 0.2396]) -Greedy action tensor([-1.7833, -0.2320, 0.5594, -0.1226]) tensor([0.0468, 0.2206, 0.4866, 0.2460]) -Greedy action tensor([-1.9426, -0.4484, 0.6678, -0.1783]) tensor([0.0402, 0.1790, 0.5464, 0.2344]) -Greedy action tensor([-1.4809, -0.3655, 0.5215, 0.1619]) tensor([0.0601, 0.1835, 0.4455, 0.3109]) -Greedy action tensor([-1.7228, 0.1957, 0.4663, -0.0856]) tensor([0.0457, 0.3113, 0.4080, 0.2350]) -Greedy action tensor([-1.7565, -0.4832, 0.5787, -0.1315]) tensor([0.0500, 0.1788, 0.5170, 0.2541]) -Greedy action tensor([-1.9371, -0.4443, 0.6664, -0.1744]) tensor([0.0403, 0.1795, 0.5451, 0.2351]) -Greedy action tensor([-1.5690, -0.5640, 0.4640, 0.0631]) tensor([0.0607, 0.1657, 0.4633, 0.3103]) -Greedy action tensor([-1.6654, -0.3173, 0.6021, -0.0613]) tensor([0.0513, 0.1977, 0.4957, 0.2553]) -Greedy action tensor([-1.8946, -0.4347, 0.6414, -0.1542]) tensor([0.0423, 0.1822, 0.5343, 0.2412]) -Greedy action tensor([-1.7933, -0.4872, 0.5950, -0.1366]) tensor([0.0480, 0.1772, 0.5231, 0.2517]) -Greedy action tensor([-1.7136, -0.4992, 0.7595, 0.0837]) tensor([0.0449, 0.1513, 0.5327, 0.2710]) -Greedy action tensor([-0.8070, 0.8606, 0.0930, 0.1235]) tensor([0.0885, 0.4692, 0.2178, 0.2245]) -Greedy action tensor([-1.8706, -0.7520, 0.4758, -0.3625]) tensor([0.0526, 0.1609, 0.5491, 0.2375]) -Greedy action tensor([-1.8883, -0.4561, 0.6418, -0.1557]) tensor([0.0427, 0.1790, 0.5366, 0.2417]) -Greedy action tensor([-1.9340, -0.4347, 0.6617, -0.1749]) tensor([0.0405, 0.1814, 0.5429, 0.2352]) -Greedy action tensor([-1.8547, -0.4318, 0.6189, -0.1364]) tensor([0.0443, 0.1837, 0.5253, 0.2468]) -Greedy action tensor([-1.9222, -0.4303, 0.6573, -0.1673]) tensor([0.0410, 0.1820, 0.5402, 0.2368]) -Greedy action tensor([-1.8194, -0.4545, 0.6089, -0.1482]) tensor([0.0464, 0.1815, 0.5256, 0.2465]) -Greedy action tensor([-1.8292, -0.4401, 0.6199, -0.1161]) tensor([0.0452, 0.1812, 0.5231, 0.2506]) -Greedy action tensor([-1.8850, -0.4535, 0.6452, -0.1468]) tensor([0.0427, 0.1786, 0.5359, 0.2427]) -Greedy action tensor([-1.9257, -0.4452, 0.6698, -0.1655]) tensor([0.0406, 0.1786, 0.5446, 0.2362]) -Greedy action tensor([-1.6993, -0.2835, 0.5284, -0.1185]) tensor([0.0519, 0.2139, 0.4818, 0.2523]) -Greedy action tensor([-1.5903e+00, -3.2821e-01, 4.6718e-01, 1.1622e-03]) tensor([0.0579, 0.2046, 0.4532, 0.2844]) -Greedy action tensor([-1.2536, -0.6674, 0.3146, 0.2156]) tensor([0.0837, 0.1505, 0.4018, 0.3639]) -Greedy action tensor([-1.8500, -0.2879, 0.6211, -0.1055]) tensor([0.0429, 0.2044, 0.5073, 0.2453]) -Greedy action tensor([-1.9129, -0.4560, 0.6761, -0.1438]) tensor([0.0409, 0.1754, 0.5441, 0.2397]) -Greedy action tensor([-0.5876, 0.2654, 0.0348, 0.1774]) tensor([0.1359, 0.3189, 0.2532, 0.2920]) -Greedy action tensor([-1.3496, -0.1697, 0.3856, -0.0616]) tensor([0.0738, 0.2402, 0.4185, 0.2676]) -Greedy action tensor([ 0.3004, -0.0177, 0.7420, 1.4388]) tensor([0.1561, 0.1136, 0.2428, 0.4874]) -Greedy action tensor([-1.4507, 0.5242, 0.2821, 0.0697]) tensor([0.0542, 0.3909, 0.3068, 0.2481]) -Greedy action tensor([-1.9474, -0.4516, 0.6689, -0.1822]) tensor([0.0400, 0.1786, 0.5476, 0.2338]) -Greedy action tensor([-1.9372, -0.4395, 0.6631, -0.1749]) tensor([0.0404, 0.1805, 0.5438, 0.2352]) -Greedy action tensor([-1.8316, -0.4650, 0.5819, -0.1504]) tensor([0.0466, 0.1827, 0.5205, 0.2502]) -Greedy action tensor([-1.9181, -0.4675, 0.6753, -0.1532]) tensor([0.0408, 0.1742, 0.5463, 0.2386]) -Greedy action tensor([-1.7155, -0.5021, 0.5606, -0.1132]) tensor([0.0524, 0.1765, 0.5107, 0.2604]) -Greedy action tensor([-1.2034, 0.7973, 0.1626, 0.2794]) tensor([0.0598, 0.4422, 0.2345, 0.2635]) -Greedy action tensor([-1.9228, -0.3680, 0.6445, -0.1604]) tensor([0.0407, 0.1925, 0.5299, 0.2369]) -Greedy action tensor([ 1.8962, 0.1049, -0.5991, -0.2119]) tensor([0.7296, 0.1217, 0.0602, 0.0886]) -Greedy action tensor([ 1.3000, -0.6479, -0.3653, 0.0511]) tensor([0.6178, 0.0881, 0.1169, 0.1772]) -Greedy action tensor([ 1.4689, -0.3868, -0.3779, 0.4616]) tensor([0.5955, 0.0931, 0.0939, 0.2175]) -Greedy action tensor([ 1.3532, -0.3177, 0.0153, 0.7318]) tensor([0.5031, 0.0946, 0.1320, 0.2703]) -Greedy action tensor([ 1.7208, -0.2240, -0.4027, 0.2958]) tensor([0.6653, 0.0951, 0.0796, 0.1600]) -Greedy action tensor([ 1.9262, 0.6030, -0.2572, 0.2033]) tensor([0.6421, 0.1710, 0.0723, 0.1146]) -Greedy action tensor([ 1.5278, -0.4600, -0.6578, 0.5285]) tensor([0.6182, 0.0847, 0.0695, 0.2276]) -Greedy action tensor([ 0.8053, -0.4235, 0.0035, 0.0890]) tensor([0.4485, 0.1312, 0.2012, 0.2191]) -Greedy action tensor([ 1.1352, -0.2799, -0.4768, 0.1778]) tensor([0.5476, 0.1330, 0.1092, 0.2102]) -Greedy action tensor([ 1.5403, 0.0270, -0.1716, 0.2948]) tensor([0.5922, 0.1304, 0.1069, 0.1704]) -Greedy action tensor([ 1.2897, 0.1444, -1.2814, 0.0257]) tensor([0.5963, 0.1897, 0.0456, 0.1685]) -Greedy action tensor([ 1.5104, -0.0305, -0.9470, 0.2577]) tensor([0.6307, 0.1351, 0.0540, 0.1802]) -Greedy action tensor([ 1.5629, -0.3630, -0.2924, 0.4243]) tensor([0.6164, 0.0898, 0.0964, 0.1974]) -Greedy action tensor([ 1.5986, -0.3031, -0.0737, 0.1310]) tensor([0.6379, 0.0953, 0.1198, 0.1470]) -Greedy action tensor([ 1.9223, -0.6141, -0.4141, 0.7375]) tensor([0.6749, 0.0534, 0.0652, 0.2064]) -Greedy action tensor([ 1.1049, -0.1547, -0.2131, -0.3194]) tensor([0.5580, 0.1584, 0.1494, 0.1343]) -Greedy action tensor([ 1.1528, -0.6830, -0.4161, 0.1571]) tensor([0.5756, 0.0918, 0.1199, 0.2127]) -Greedy action tensor([ 1.5033, -0.6500, -0.3160, 0.1751]) tensor([0.6480, 0.0752, 0.1051, 0.1717]) -Greedy action tensor([ 1.5347, -1.1823, -0.3032, 0.2359]) tensor([0.6675, 0.0441, 0.1062, 0.1821]) -Greedy action tensor([ 1.2962, -0.1897, -0.7653, -0.1919]) tensor([0.6332, 0.1433, 0.0806, 0.1430]) -Greedy action tensor([ 1.4377, -0.2742, -1.0319, 0.6797]) tensor([0.5768, 0.1041, 0.0488, 0.2703]) -Greedy action tensor([ 1.3738, -0.5035, -0.4580, -0.1024]) tensor([0.6487, 0.0992, 0.1039, 0.1482]) -Greedy action tensor([ 1.0108, -0.4816, 0.1196, 0.3868]) tensor([0.4606, 0.1036, 0.1890, 0.2468]) -Greedy action tensor([ 1.7953, -0.9287, -0.4813, 0.2036]) tensor([0.7290, 0.0478, 0.0748, 0.1484]) -Greedy action tensor([ 0.7653, -0.3448, 0.1018, 0.2082]) tensor([0.4137, 0.1363, 0.2131, 0.2370]) -Greedy action tensor([ 1.2353, -0.6028, -0.5560, 0.2344]) tensor([0.5905, 0.0940, 0.0985, 0.2170]) -Greedy action tensor([ 1.3413, -0.2143, -0.4877, 0.2501]) tensor([0.5857, 0.1236, 0.0940, 0.1967]) -Greedy action tensor([ 1.6925, -0.5489, 0.2093, 0.7290]) tensor([0.5832, 0.0620, 0.1323, 0.2225]) -Greedy action tensor([ 1.8732, -0.6322, -0.5134, 0.6102]) tensor([0.6866, 0.0561, 0.0631, 0.1942]) -Greedy action tensor([ 1.3869, -0.1615, -0.2629, 0.0889]) tensor([0.5960, 0.1267, 0.1145, 0.1628]) -Greedy action tensor([ 1.8428, -1.1609, 0.0203, 0.2214]) tensor([0.7098, 0.0352, 0.1147, 0.1403]) -Greedy action tensor([ 2.0252, -0.8422, -0.1101, 0.6183]) tensor([0.7042, 0.0400, 0.0832, 0.1725]) -Greedy action tensor([ 1.6783, 0.3668, -0.4538, 0.3078]) tensor([0.6090, 0.1641, 0.0722, 0.1547]) -Greedy action tensor([ 1.7287, -1.0154, -0.1012, 0.4243]) tensor([0.6684, 0.0430, 0.1072, 0.1814]) -Greedy action tensor([ 1.3710, -0.6391, -0.3628, 0.0738]) tensor([0.6314, 0.0846, 0.1115, 0.1725]) -Greedy action tensor([ 1.3438, -0.3141, -0.2825, 0.2636]) tensor([0.5791, 0.1103, 0.1139, 0.1966]) -Greedy action tensor([ 1.3573, -0.3960, -0.9447, 0.1313]) tensor([0.6383, 0.1106, 0.0639, 0.1873]) -Greedy action tensor([ 2.5047, 0.8985, -0.1348, 0.1395]) tensor([0.7321, 0.1469, 0.0523, 0.0688]) -Greedy action tensor([ 2.1884, -1.0367, -0.0108, 0.4829]) tensor([0.7506, 0.0298, 0.0832, 0.1364]) -Greedy action tensor([ 1.1159, -0.2013, -0.3490, 0.3994]) tensor([0.5032, 0.1348, 0.1163, 0.2458]) -Greedy action tensor([ 1.4526, -0.4075, -0.2901, 0.3663]) tensor([0.5994, 0.0933, 0.1049, 0.2023]) -Greedy action tensor([ 1.0916, -0.4755, -0.0621, -0.1445]) tensor([0.5511, 0.1150, 0.1738, 0.1601]) -Greedy action tensor([ 1.4879, -0.5516, -0.6382, 0.4140]) tensor([0.6285, 0.0818, 0.0750, 0.2148]) -Greedy action tensor([ 1.4177e+00, -7.4115e-04, -2.9275e-01, 3.3348e-01]) tensor([0.5678, 0.1375, 0.1027, 0.1920]) -Greedy action tensor([ 1.7799, 0.1819, -0.7206, 0.3005]) tensor([0.6613, 0.1338, 0.0543, 0.1506]) -Greedy action tensor([ 1.5117, -0.5859, -0.2304, 0.1064]) tensor([0.6480, 0.0795, 0.1135, 0.1590]) -Greedy action tensor([ 1.1278, -0.3271, 0.2516, 0.3964]) tensor([0.4693, 0.1095, 0.1954, 0.2258]) -Greedy action tensor([ 1.9606, -0.5622, -0.6849, 0.5682]) tensor([0.7144, 0.0573, 0.0507, 0.1775]) -Greedy action tensor([ 1.4671, -0.2205, -0.4719, 0.3293]) tensor([0.6063, 0.1122, 0.0872, 0.1943]) -Greedy action tensor([ 2.3943, -1.2145, -0.1841, 0.5715]) tensor([0.7908, 0.0214, 0.0600, 0.1278]) -Greedy action tensor([ 1.5235, -0.7841, -0.1015, 0.0369]) tensor([0.6568, 0.0653, 0.1293, 0.1485]) -Greedy action tensor([ 1.9712, -0.7189, -0.6061, 0.7271]) tensor([0.6983, 0.0474, 0.0531, 0.2012]) -Greedy action tensor([ 1.2123, -0.1367, -1.0199, 0.2353]) tensor([0.5737, 0.1489, 0.0615, 0.2159]) -Greedy action tensor([ 1.5605, -0.0875, -0.3986, 0.7619]) tensor([0.5607, 0.1079, 0.0791, 0.2523]) -Greedy action tensor([ 0.9071, 0.1085, -0.6208, 0.5542]) tensor([0.4220, 0.1899, 0.0916, 0.2965]) -Greedy action tensor([ 1.1989, -0.4014, -0.4542, 0.2524]) tensor([0.5614, 0.1133, 0.1075, 0.2179]) -Greedy action tensor([ 1.6114, -0.5596, -0.1199, 0.0386]) tensor([0.6673, 0.0761, 0.1181, 0.1384]) -Greedy action tensor([ 1.4647, -0.6663, -0.4294, -0.0135]) tensor([0.6679, 0.0793, 0.1005, 0.1523]) -Greedy action tensor([ 1.3708, -0.1130, -0.5732, -0.1031]) tensor([0.6254, 0.1418, 0.0895, 0.1432]) -Greedy action tensor([ 2.0899, -0.9637, -0.0384, 0.1462]) tensor([0.7637, 0.0360, 0.0909, 0.1093]) -Greedy action tensor([ 1.6790, -0.1693, -0.4625, 0.2501]) tensor([0.6603, 0.1040, 0.0776, 0.1582]) -Greedy action tensor([ 2.7524, -0.9338, -0.3262, 0.6644]) tensor([0.8368, 0.0210, 0.0385, 0.1037]) -Greedy action tensor([ 1.8410, -0.4012, -0.9088, 0.7697]) tensor([0.6611, 0.0702, 0.0423, 0.2264]) -Greedy action tensor([ 1.4646, -0.3940, -0.1288, 0.4659]) tensor([0.5789, 0.0902, 0.1176, 0.2132]) -Greedy action tensor([ 1.9831, -0.7500, -0.6506, 0.8868]) tensor([0.6799, 0.0442, 0.0488, 0.2271]) -Greedy action tensor([ 1.3217, -0.1264, -1.0806, 0.0847]) tensor([0.6189, 0.1454, 0.0560, 0.1796]) -Greedy action tensor([ 1.7814, 0.0032, -0.4464, 0.8424]) tensor([0.5996, 0.1013, 0.0646, 0.2345]) -Greedy action tensor([ 2.0767, -0.5632, -0.5419, 0.2920]) tensor([0.7621, 0.0544, 0.0556, 0.1279]) -Greedy action tensor([ 1.1804, -0.0700, 0.0734, -0.0434]) tensor([0.5233, 0.1499, 0.1730, 0.1539]) -Greedy action tensor([ 1.6240, -0.4478, -0.4840, 0.1197]) tensor([0.6804, 0.0857, 0.0827, 0.1512]) -Greedy action tensor([ 1.8354, -0.2384, -1.1314, 0.3684]) tensor([0.7103, 0.0893, 0.0366, 0.1638]) -Greedy action tensor([ 1.7264, -0.7225, -0.4325, 0.6464]) tensor([0.6487, 0.0560, 0.0749, 0.2203]) -Greedy action tensor([1.6314, 0.1362, 0.1266, 0.3071]) tensor([0.5840, 0.1309, 0.1297, 0.1553]) -Greedy action tensor([1.8723, 0.5389, 0.1667, 0.4356]) tensor([0.5942, 0.1566, 0.1079, 0.1413]) -Greedy action tensor([ 1.1654, -0.1611, -0.2077, 0.6866]) tensor([0.4677, 0.1241, 0.1185, 0.2897]) -Greedy action tensor([ 1.8846, -0.7551, -0.1363, 0.1392]) tensor([0.7254, 0.0518, 0.0961, 0.1266]) -Greedy action tensor([ 1.3613, -0.3791, -1.0213, 0.2222]) tensor([0.6298, 0.1105, 0.0581, 0.2016]) -Greedy action tensor([ 1.4604, -0.7354, 0.0938, 0.1580]) tensor([0.6104, 0.0679, 0.1557, 0.1660]) -Greedy action tensor([ 2.5348, -0.8058, -0.5851, 0.2776]) tensor([0.8444, 0.0299, 0.0373, 0.0884]) -Greedy action tensor([ 1.4167, -0.8402, -0.1559, 0.6068]) tensor([0.5691, 0.0596, 0.1181, 0.2532]) -Greedy action tensor([ 1.7560, -0.3583, -0.5825, 0.2421]) tensor([0.6958, 0.0840, 0.0671, 0.1531]) -Greedy action tensor([-1.1029, -1.5784, 0.3380, -0.6011]) tensor([0.1334, 0.0829, 0.5634, 0.2203]) -Greedy action tensor([ 0.3592, -0.4785, -0.2048, -0.2128]) tensor([0.3897, 0.1686, 0.2217, 0.2199]) -Greedy action tensor([ 0.5188, -0.6161, 0.1014, 0.3011]) tensor([0.3591, 0.1154, 0.2366, 0.2889]) -Greedy action tensor([-0.8878, -0.3797, 0.8419, -0.6256]) tensor([0.1042, 0.1731, 0.5873, 0.1354]) -Greedy action tensor([-0.0834, -0.5064, 0.3950, -0.2944]) tensor([0.2452, 0.1606, 0.3956, 0.1986]) -Greedy action tensor([ 0.5942, -0.0596, 0.4217, 0.6681]) tensor([0.2908, 0.1513, 0.2448, 0.3132]) -Greedy action tensor([-1.0201, -0.8460, 0.6764, -0.2584]) tensor([0.1022, 0.1216, 0.5574, 0.2189]) -Greedy action tensor([ 0.1909, -1.3306, -0.0654, -0.0989]) tensor([0.3649, 0.0797, 0.2824, 0.2731]) -Greedy action tensor([ 0.0918, -1.1504, 0.1898, 0.1883]) tensor([0.2863, 0.0827, 0.3158, 0.3153]) -Greedy action tensor([-1.3605, 0.1201, -0.0731, 0.1591]) tensor([0.0736, 0.3235, 0.2666, 0.3363]) -Greedy action tensor([ 0.0517, -0.4433, -0.9146, 0.9671]) tensor([0.2228, 0.1358, 0.0848, 0.5566]) -Greedy action tensor([-0.2797, 0.0055, -0.0573, -0.3551]) tensor([0.2219, 0.2951, 0.2772, 0.2058]) -Greedy action tensor([ 0.8025, -0.5971, 0.5390, 0.3826]) tensor([0.3742, 0.0923, 0.2875, 0.2459]) -Greedy action tensor([-0.1335, 0.0509, -0.8605, -0.0958]) tensor([0.2685, 0.3229, 0.1298, 0.2788]) -Greedy action tensor([-0.5394, -0.3122, 0.7385, -0.8726]) tensor([0.1524, 0.1913, 0.5471, 0.1092]) -Greedy action tensor([-0.1097, -0.2991, 0.4609, -0.2506]) tensor([0.2239, 0.1853, 0.3962, 0.1945]) -Greedy action tensor([ 1.5659, -0.0057, 0.1435, 0.4642]) tensor([0.5615, 0.1166, 0.1354, 0.1866]) -Greedy action tensor([-0.5159, 0.0344, 0.3961, -0.3547]) tensor([0.1563, 0.2710, 0.3891, 0.1836]) -Greedy action tensor([ 1.4006, -0.1609, -0.1252, 1.2391]) tensor([0.4389, 0.0921, 0.0955, 0.3735]) -Greedy action tensor([ 1.3920, -1.2125, 0.9531, 0.6055]) tensor([0.4599, 0.0340, 0.2966, 0.2095]) -Greedy action tensor([ 1.6417, -0.8325, 0.2549, 0.8523]) tensor([0.5592, 0.0471, 0.1397, 0.2539]) -Greedy action tensor([-1.2141, 0.4566, -0.1719, -1.6904]) tensor([0.1023, 0.5439, 0.2902, 0.0636]) -Greedy action tensor([ 1.0558, -1.2641, -0.3875, -0.0355]) tensor([0.5987, 0.0588, 0.1414, 0.2010]) -Greedy action tensor([-9.2784e-01, -3.4082e-04, -1.0288e+00, 1.5245e-01]) tensor([0.1355, 0.3427, 0.1225, 0.3992]) -Greedy action tensor([-1.2107, -0.5809, 0.4790, -1.0599]) tensor([0.1057, 0.1985, 0.5728, 0.1229]) -Greedy action tensor([-0.5418, -1.0572, 0.0742, -0.2536]) tensor([0.2091, 0.1249, 0.3871, 0.2789]) -Greedy action tensor([-0.4063, -2.1103, 0.1886, -0.5565]) tensor([0.2594, 0.0472, 0.4702, 0.2232]) -Greedy action tensor([1.1126, 0.5432, 0.2273, 0.0113]) tensor([0.4327, 0.2449, 0.1785, 0.1439]) -Greedy action tensor([ 1.0079, -0.6425, 0.8962, 0.6699]) tensor([0.3572, 0.0686, 0.3194, 0.2548]) -Greedy action tensor([-0.4535, -0.3774, 0.1044, -0.5125]) tensor([0.2097, 0.2263, 0.3663, 0.1977]) -Greedy action tensor([ 0.4314, -1.1981, 0.0420, 0.3113]) tensor([0.3623, 0.0710, 0.2454, 0.3213]) -Greedy action tensor([-0.4617, -0.6423, 1.4191, -1.1919]) tensor([0.1127, 0.0941, 0.7390, 0.0543]) -Greedy action tensor([ 0.5269, -0.5535, -0.4901, 0.3215]) tensor([0.3975, 0.1349, 0.1438, 0.3237]) -Greedy action tensor([ 0.4867, -1.6106, -0.6093, 0.3878]) tensor([0.4232, 0.0520, 0.1414, 0.3834]) -Greedy action tensor([-1.6793e-01, -7.7493e-01, -4.2611e-04, -3.7357e-01]) tensor([0.2824, 0.1539, 0.3339, 0.2299]) -Greedy action tensor([ 0.5442, 0.0492, -0.2033, 0.5517]) tensor([0.3236, 0.1972, 0.1532, 0.3260]) -Greedy action tensor([1.2704, 0.4084, 0.3019, 0.2464]) tensor([0.4627, 0.1954, 0.1757, 0.1662]) -Greedy action tensor([ 1.0488, -0.7573, 0.6872, -0.5305]) tensor([0.4838, 0.0795, 0.3370, 0.0997]) -Greedy action tensor([-0.4521, -0.7888, 0.0388, 0.1781]) tensor([0.1913, 0.1367, 0.3126, 0.3594]) -Greedy action tensor([-0.3274, -0.0238, 1.0596, -0.4073]) tensor([0.1374, 0.1861, 0.5498, 0.1268]) -Greedy action tensor([ 0.1926, -1.0714, 1.1081, 0.1928]) tensor([0.2092, 0.0591, 0.5225, 0.2092]) -Greedy action tensor([-1.2568, -1.0870, 1.8288, -0.9739]) tensor([0.0394, 0.0467, 0.8617, 0.0523]) -Greedy action tensor([ 0.7997, -0.8970, 0.8474, 0.4557]) tensor([0.3400, 0.0623, 0.3566, 0.2410]) -Greedy action tensor([-0.2905, -1.3243, 0.5318, -0.4762]) tensor([0.2241, 0.0797, 0.5100, 0.1861]) -Greedy action tensor([ 0.0760, -1.2476, 0.6304, -0.4763]) tensor([0.2791, 0.0743, 0.4859, 0.1607]) -Greedy action tensor([ 0.3676, 0.1492, -0.5679, -0.5740]) tensor([0.3866, 0.3108, 0.1517, 0.1508]) -Greedy action tensor([ 0.4040, 0.6196, -0.2311, -0.5929]) tensor([0.3185, 0.3952, 0.1688, 0.1175]) -Greedy action tensor([ 0.5454, -0.4665, -0.0462, -0.0856]) tensor([0.4083, 0.1484, 0.2260, 0.2173]) -Greedy action tensor([ 0.2236, -1.4368, 0.3936, -0.4912]) tensor([0.3491, 0.0663, 0.4138, 0.1708]) -Greedy action tensor([-0.8469, -0.3760, 0.0176, -0.7919]) tensor([0.1658, 0.2655, 0.3935, 0.1752]) -Greedy action tensor([-0.5939, -1.0383, 0.5756, -1.1502]) tensor([0.1840, 0.1180, 0.5925, 0.1055]) -Greedy action tensor([ 0.4683, -1.0572, 0.4867, 0.2822]) tensor([0.3261, 0.0709, 0.3322, 0.2707]) -Greedy action tensor([-1.3212, -1.0661, 0.0334, -0.9559]) tensor([0.1315, 0.1697, 0.5094, 0.1894]) -Greedy action tensor([ 0.0981, -1.2252, 1.2083, 0.5687]) tensor([0.1694, 0.0451, 0.5142, 0.2712]) -Greedy action tensor([-0.9045, -1.1299, 0.8618, -1.0367]) tensor([0.1173, 0.0936, 0.6862, 0.1028]) -Greedy action tensor([ 0.3595, -0.6218, 0.1713, 0.3843]) tensor([0.3097, 0.1161, 0.2566, 0.3175]) -Greedy action tensor([-0.2512, -0.5189, -0.2801, 0.5612]) tensor([0.2004, 0.1533, 0.1947, 0.4516]) -Greedy action tensor([-0.1754, -0.2483, 0.2478, -1.2128]) tensor([0.2624, 0.2440, 0.4006, 0.0930]) -Greedy action tensor([1.1723, 0.8464, 0.3161, 0.0359]) tensor([0.4053, 0.2925, 0.1721, 0.1301]) -Greedy action tensor([ 0.1568, -0.4345, 0.3709, -1.0645]) tensor([0.3239, 0.1793, 0.4013, 0.0955]) -Greedy action tensor([-0.4135, -2.1826, -0.2645, -0.5856]) tensor([0.3152, 0.0537, 0.3658, 0.2653]) -Greedy action tensor([-0.0897, 0.2118, -0.6328, -0.0571]) tensor([0.2521, 0.3409, 0.1465, 0.2605]) -Greedy action tensor([-0.9100, -0.2848, 1.0506, -0.7040]) tensor([0.0893, 0.1668, 0.6342, 0.1097]) -Greedy action tensor([ 0.0146, -0.9655, -0.5027, -0.2197]) tensor([0.3620, 0.1358, 0.2158, 0.2864]) -Greedy action tensor([ 0.3786, -1.4420, 0.0199, 0.6424]) tensor([0.3162, 0.0512, 0.2209, 0.4117]) -Greedy action tensor([-0.3215, 0.2404, -0.1858, -0.6583]) tensor([0.2168, 0.3802, 0.2483, 0.1548]) -Greedy action tensor([ 0.5130, -2.5889, -0.1431, 0.5348]) tensor([0.3867, 0.0174, 0.2007, 0.3952]) -Greedy action tensor([ 1.0018, -1.0106, 0.4505, 0.7714]) tensor([0.3993, 0.0534, 0.2301, 0.3172]) -Greedy action tensor([-0.2452, -0.3779, -0.2563, -1.1704]) tensor([0.3066, 0.2685, 0.3033, 0.1216]) -Greedy action tensor([-0.0619, -0.5980, -0.3270, -0.1503]) tensor([0.3060, 0.1790, 0.2348, 0.2801]) -Greedy action tensor([ 0.5184, -0.0729, 1.0094, 0.0396]) tensor([0.2627, 0.1454, 0.4292, 0.1627]) -Greedy action tensor([-0.7785, -0.3492, 0.4797, -0.1117]) tensor([0.1250, 0.1919, 0.4397, 0.2434]) -Greedy action tensor([-0.2034, -1.0656, 0.9205, -0.4214]) tensor([0.1886, 0.0796, 0.5802, 0.1516]) -Greedy action tensor([ 2.0579e-05, -3.4914e-01, -7.3596e-01, 1.8013e-01]) tensor([0.2957, 0.2086, 0.1417, 0.3541]) -Greedy action tensor([-1.5936, -0.7489, -0.0486, -0.2830]) tensor([0.0853, 0.1985, 0.3999, 0.3163]) -Greedy action tensor([ 0.7330, 0.0720, -0.1326, -0.1788]) tensor([0.4276, 0.2208, 0.1799, 0.1718]) -Greedy action tensor([-0.0404, 0.0533, 0.2773, -1.2195]) tensor([0.2646, 0.2906, 0.3635, 0.0814]) -Greedy action tensor([1.2279, 0.0073, 0.3852, 1.3037]) tensor([0.3566, 0.1052, 0.1535, 0.3847]) -Greedy action tensor([ 0.2748, -0.0352, -0.1060, 0.4325]) tensor([0.2787, 0.2044, 0.1905, 0.3264]) -Greedy action tensor([-1.3003, -0.7868, -0.0759, 0.2208]) tensor([0.0939, 0.1569, 0.3194, 0.4298]) -Greedy action tensor([ 0.3134, -0.1860, -0.7623, -0.1860]) tensor([0.3914, 0.2375, 0.1335, 0.2375]) -Greedy action tensor([ 0.7231, -0.6697, -0.0979, -0.0751]) tensor([0.4676, 0.1161, 0.2058, 0.2105]) -Greedy action tensor([ 0.7740, -0.4088, -0.1255, -0.2471]) tensor([0.4823, 0.1478, 0.1962, 0.1737]) -Greedy action tensor([ 0.4861, -0.1825, -0.0724, -0.0474]) tensor([0.3744, 0.1918, 0.2142, 0.2196]) -Greedy action tensor([ 0.7505, -0.7249, 0.1716, -0.2728]) tensor([0.4654, 0.1064, 0.2609, 0.1673]) -Greedy action tensor([ 0.8479, -0.6101, 0.0009, -0.4639]) tensor([0.5179, 0.1205, 0.2220, 0.1395]) -Greedy action tensor([ 0.7613, -0.7232, -0.3154, -0.8150]) tensor([0.5637, 0.1277, 0.1921, 0.1165]) -Greedy action tensor([ 1.0712, -0.7352, -0.0512, -0.5389]) tensor([0.5918, 0.0972, 0.1926, 0.1183]) -Greedy action tensor([ 0.8839, -0.6517, -0.1669, -0.5267]) tensor([0.5528, 0.1190, 0.1933, 0.1349]) -Greedy action tensor([ 0.7884, -0.3776, 0.0008, -0.4323]) tensor([0.4851, 0.1512, 0.2207, 0.1431]) -Greedy action tensor([ 0.9526, -0.6981, 0.0779, -0.3506]) tensor([0.5318, 0.1021, 0.2217, 0.1445]) -Greedy action tensor([ 0.8795, -0.6702, 0.0081, -0.4539]) tensor([0.5279, 0.1121, 0.2209, 0.1391]) -Greedy action tensor([ 0.7200, -0.5310, 0.1317, -0.4148]) tensor([0.4623, 0.1323, 0.2567, 0.1486]) -Greedy action tensor([ 0.7336, -0.6489, 0.0039, -0.5586]) tensor([0.4981, 0.1250, 0.2401, 0.1368]) -Greedy action tensor([ 0.5048, -0.0469, -0.1072, -0.1410]) tensor([0.3784, 0.2180, 0.2052, 0.1984]) -Greedy action tensor([ 0.8895, -0.5814, -0.0198, -0.3956]) tensor([0.5238, 0.1203, 0.2110, 0.1449]) -Greedy action tensor([ 0.8179, -0.5441, -0.1174, -0.2465]) tensor([0.5016, 0.1285, 0.1969, 0.1730]) -Greedy action tensor([ 0.9480, -0.4917, 0.0082, -0.3506]) tensor([0.5261, 0.1247, 0.2056, 0.1436]) -Greedy action tensor([ 1.1228, -0.4243, -0.2036, -0.3515]) tensor([0.5857, 0.1247, 0.1555, 0.1341]) -Greedy action tensor([ 0.7991, -0.8378, -0.0893, -0.3613]) tensor([0.5210, 0.1014, 0.2143, 0.1633]) -Greedy action tensor([ 1.0666, -0.5516, -0.0731, -0.3629]) tensor([0.5690, 0.1128, 0.1820, 0.1362]) -Greedy action tensor([ 0.6274, -0.4722, -0.0459, -0.5792]) tensor([0.4668, 0.1555, 0.2381, 0.1397]) -Greedy action tensor([ 0.6098, -0.2467, 0.0394, -0.0482]) tensor([0.3988, 0.1693, 0.2254, 0.2065]) -Greedy action tensor([ 0.4966, -0.3927, 0.0634, -0.3802]) tensor([0.4040, 0.1660, 0.2619, 0.1681]) -Greedy action tensor([ 0.9070, -0.5603, -0.1935, -0.2985]) tensor([0.5368, 0.1238, 0.1786, 0.1608]) -Greedy action tensor([ 0.6303, 0.3007, -0.0220, -0.2568]) tensor([0.3771, 0.2712, 0.1964, 0.1553]) -Greedy action tensor([ 0.5934, -0.4680, -0.0465, -0.1875]) tensor([0.4289, 0.1484, 0.2262, 0.1965]) -Greedy action tensor([ 0.8710, -0.8498, 0.4134, -0.4940]) tensor([0.4838, 0.0866, 0.3061, 0.1235]) -Greedy action tensor([ 1.2759, -0.8377, 0.0159, -0.8437]) tensor([0.6559, 0.0792, 0.1861, 0.0788]) -Greedy action tensor([ 0.5819, -0.3010, -0.0578, -0.1088]) tensor([0.4095, 0.1693, 0.2160, 0.2052]) -Greedy action tensor([ 0.1417, 0.1724, -0.0059, -0.1551]) tensor([0.2749, 0.2835, 0.2372, 0.2043]) -Greedy action tensor([ 0.2423, 0.1458, 0.2594, -0.0545]) tensor([0.2726, 0.2475, 0.2773, 0.2026]) -Greedy action tensor([ 0.8463, -0.5789, -0.0598, -0.2937]) tensor([0.5091, 0.1224, 0.2057, 0.1628]) -Greedy action tensor([ 0.6807, -0.6202, -0.0331, -0.2859]) tensor([0.4668, 0.1271, 0.2286, 0.1775]) -Greedy action tensor([ 0.8429, -0.6557, -0.0448, -0.3104]) tensor([0.5127, 0.1145, 0.2110, 0.1618]) -Greedy action tensor([ 0.2182, 0.1780, -0.1446, -0.0120]) tensor([0.2898, 0.2784, 0.2016, 0.2302]) -Greedy action tensor([ 0.5925, -0.4977, 0.1434, -0.3936]) tensor([0.4260, 0.1432, 0.2719, 0.1589]) -Greedy action tensor([ 0.7829, -0.3884, 0.0173, -0.2744]) tensor([0.4712, 0.1460, 0.2191, 0.1637]) -Greedy action tensor([ 0.7360, -0.6981, -0.0040, -0.2416]) tensor([0.4781, 0.1139, 0.2281, 0.1799]) -Greedy action tensor([ 0.8288, -0.4123, 0.0120, -0.4031]) tensor([0.4944, 0.1429, 0.2185, 0.1442]) -Greedy action tensor([ 0.5852, -0.0120, -0.2549, -0.1902]) tensor([0.4094, 0.2253, 0.1767, 0.1885]) -Greedy action tensor([ 0.9472, -0.0385, 0.1235, -0.2259]) tensor([0.4714, 0.1759, 0.2069, 0.1459]) -Greedy action tensor([ 0.9181, -0.8247, -0.0397, -0.3475]) tensor([0.5432, 0.0951, 0.2085, 0.1532]) -Greedy action tensor([ 0.7239, -0.6513, -0.0173, -0.2963]) tensor([0.4785, 0.1210, 0.2280, 0.1725]) -Greedy action tensor([ 0.8478, -0.8907, -0.0837, -0.0820]) tensor([0.5091, 0.0895, 0.2005, 0.2009]) -Greedy action tensor([ 0.2327, -0.1287, 0.0511, -0.1696]) tensor([0.3126, 0.2178, 0.2606, 0.2090]) -Greedy action tensor([ 0.9296, -0.7525, 0.0717, -0.3375]) tensor([0.5286, 0.0983, 0.2242, 0.1489]) -Greedy action tensor([ 1.2288, -0.9156, -0.1226, -0.3696]) tensor([0.6336, 0.0742, 0.1640, 0.1281]) -Greedy action tensor([ 0.8405, -0.8890, 0.2092, -0.3239]) tensor([0.4947, 0.0877, 0.2631, 0.1544]) -Greedy action tensor([ 0.4718, -0.0168, -0.1053, -0.0663]) tensor([0.3625, 0.2224, 0.2035, 0.2116]) -Greedy action tensor([ 0.6331, -0.4637, -0.0482, -0.0742]) tensor([0.4287, 0.1431, 0.2169, 0.2113]) -Greedy action tensor([ 0.6297, -0.2878, -0.0989, -0.1481]) tensor([0.4271, 0.1706, 0.2061, 0.1962]) -Greedy action tensor([ 1.0864, -0.6257, 0.0171, -0.4299]) tensor([0.5736, 0.1035, 0.1969, 0.1259]) -Greedy action tensor([ 0.8982, -0.6009, -0.0437, -0.4647]) tensor([0.5350, 0.1195, 0.2086, 0.1369]) -Greedy action tensor([ 0.8076, -0.4484, 0.0365, -0.3853]) tensor([0.4877, 0.1389, 0.2255, 0.1479]) -Greedy action tensor([ 0.8057, -0.7060, 0.1311, -0.7036]) tensor([0.5126, 0.1130, 0.2611, 0.1133]) -Greedy action tensor([ 0.5963, -0.2949, -0.0328, -0.2520]) tensor([0.4217, 0.1730, 0.2248, 0.1805]) -Greedy action tensor([ 0.9335, -0.6705, -0.1148, -0.3073]) tensor([0.5432, 0.1092, 0.1904, 0.1571]) -Greedy action tensor([ 1.1709, -0.5484, -0.0101, -0.5175]) tensor([0.5984, 0.1072, 0.1837, 0.1106]) -Greedy action tensor([ 0.6759, -0.3687, -0.1149, -0.1852]) tensor([0.4488, 0.1579, 0.2035, 0.1897]) -Greedy action tensor([ 0.6116, -0.5008, 0.3454, -0.5448]) tensor([0.4150, 0.1364, 0.3180, 0.1306]) -Greedy action tensor([ 0.8926, -0.1762, -0.1107, -0.1727]) tensor([0.4867, 0.1671, 0.1785, 0.1677]) -Greedy action tensor([ 0.8429, -0.6493, 0.1246, -0.2924]) tensor([0.4917, 0.1106, 0.2397, 0.1580]) -Greedy action tensor([ 0.5251, -0.3340, -0.0762, -0.1141]) tensor([0.4001, 0.1695, 0.2193, 0.2111]) -Greedy action tensor([ 0.5563, -0.2206, -0.1005, -0.3449]) tensor([0.4194, 0.1928, 0.2175, 0.1703]) -Greedy action tensor([ 0.9407, -0.3724, 0.0138, -0.7470]) tensor([0.5406, 0.1454, 0.2140, 0.1000]) -Greedy action tensor([ 0.4026, -0.2052, -0.0918, -0.1171]) tensor([0.3637, 0.1981, 0.2219, 0.2163]) -Greedy action tensor([ 1.1025, -0.3634, 0.1673, -0.2257]) tensor([0.5296, 0.1223, 0.2078, 0.1403]) -Greedy action tensor([ 0.6705, -0.4139, 0.0014, -0.2208]) tensor([0.4424, 0.1496, 0.2266, 0.1814]) -Greedy action tensor([ 0.9651, -0.4094, -0.1297, -0.3047]) tensor([0.5352, 0.1354, 0.1791, 0.1503]) -Greedy action tensor([ 1.2772, -0.7639, 0.1389, -0.7457]) tensor([0.6319, 0.0821, 0.2024, 0.0836]) -Greedy action tensor([ 0.8617, -0.8380, 0.1765, -0.5525]) tensor([0.5182, 0.0947, 0.2611, 0.1260]) -Greedy action tensor([ 0.4710, -0.0710, 0.0149, -0.3884]) tensor([0.3790, 0.2204, 0.2402, 0.1605]) -Greedy action tensor([ 0.8055, 0.0493, -0.1319, -0.0169]) tensor([0.4347, 0.2041, 0.1702, 0.1910]) -Greedy action tensor([ 0.7130, -0.9530, 0.1565, -0.3684]) tensor([0.4759, 0.0899, 0.2728, 0.1614]) -Greedy action tensor([ 0.8718, -0.6654, 0.0166, -0.6164]) tensor([0.5359, 0.1152, 0.2279, 0.1210]) -Greedy action tensor([ 0.5798, -0.2022, 0.0430, -0.1346]) tensor([0.3950, 0.1807, 0.2309, 0.1934]) -Greedy action tensor([ 0.7078, -0.3539, -0.0861, -0.1804]) tensor([0.4526, 0.1565, 0.2046, 0.1862]) -Greedy action tensor([ 0.4682, -0.2711, 0.1836, -0.4133]) tensor([0.3782, 0.1806, 0.2845, 0.1567]) -Greedy action tensor([ 0.4544, -0.0831, -0.0120, -0.4269]) tensor([0.3808, 0.2225, 0.2389, 0.1578]) -Greedy action tensor([ 1.3105, -1.2046, 0.1920, -0.7269]) tensor([0.6502, 0.0526, 0.2125, 0.0848]) -Greedy action tensor([ 1.1472, -0.4632, -0.0659, -0.3230]) tensor([0.5791, 0.1157, 0.1721, 0.1331]) -Greedy action tensor([ 1.4722, -0.5431, -0.3641, 0.4167]) tensor([0.6095, 0.0812, 0.0972, 0.2121]) -Greedy action tensor([ 1.3406, -0.3243, -0.3523, 0.3813]) tensor([0.5694, 0.1077, 0.1048, 0.2182]) -Greedy action tensor([ 1.8702, -0.8183, -0.3303, 0.4572]) tensor([0.7032, 0.0478, 0.0779, 0.1711]) -Greedy action tensor([ 1.2905, -0.7238, 0.0658, 0.3823]) tensor([0.5463, 0.0729, 0.1605, 0.2203]) -Greedy action tensor([ 1.9873, -0.9948, -0.0178, 0.4488]) tensor([0.7143, 0.0362, 0.0962, 0.1534]) -Greedy action tensor([ 1.5333, -0.4053, -0.1029, 0.5275]) tensor([0.5867, 0.0844, 0.1142, 0.2146]) -Greedy action tensor([ 1.1929, -0.4713, -0.0991, 0.1795]) tensor([0.5473, 0.1036, 0.1504, 0.1987]) -Greedy action tensor([ 2.1545, -0.8343, 0.0297, 0.5300]) tensor([0.7316, 0.0368, 0.0874, 0.1441]) -Greedy action tensor([ 1.4977, -0.3661, -0.1964, 0.0558]) tensor([0.6348, 0.0984, 0.1167, 0.1501]) -Greedy action tensor([ 1.1496, -0.8458, 0.0667, 0.2111]) tensor([0.5360, 0.0729, 0.1815, 0.2097]) -Greedy action tensor([ 1.4343, -0.1595, -0.1340, 0.3918]) tensor([0.5669, 0.1152, 0.1181, 0.1999]) -Greedy action tensor([ 2.4205, -0.9630, -0.2442, 0.7926]) tensor([0.7693, 0.0261, 0.0536, 0.1510]) -Greedy action tensor([ 1.1958, -0.3637, -0.3230, 0.4728]) tensor([0.5223, 0.1098, 0.1144, 0.2535]) -Greedy action tensor([ 1.3382, -0.3053, -0.4886, 0.4441]) tensor([0.5672, 0.1096, 0.0913, 0.2319]) -Greedy action tensor([ 2.1849, -0.6998, -0.4370, 0.7030]) tensor([0.7376, 0.0412, 0.0536, 0.1676]) -Greedy action tensor([ 1.8752, -0.9669, -0.6670, 0.4570]) tensor([0.7251, 0.0423, 0.0571, 0.1756]) -Greedy action tensor([ 1.4721, -0.4536, -0.8994, 0.6400]) tensor([0.5973, 0.0871, 0.0558, 0.2599]) -Greedy action tensor([ 1.2214, -0.5051, -0.4467, 0.4345]) tensor([0.5489, 0.0977, 0.1035, 0.2499]) -Greedy action tensor([ 1.4178, -0.6659, -0.2383, 0.8196]) tensor([0.5362, 0.0667, 0.1023, 0.2948]) -Greedy action tensor([ 1.5280, 0.1671, -0.1818, 0.6574]) tensor([0.5388, 0.1382, 0.0975, 0.2256]) -Greedy action tensor([ 1.5402, 0.1753, -0.9609, 0.3238]) tensor([0.6121, 0.1563, 0.0502, 0.1814]) -Greedy action tensor([ 1.9793, -0.4743, -0.4724, 0.6392]) tensor([0.6974, 0.0600, 0.0601, 0.1826]) -Greedy action tensor([ 1.4065, -0.3420, -0.3214, 0.0908]) tensor([0.6173, 0.1074, 0.1097, 0.1656]) -Greedy action tensor([ 0.8341, -0.3917, -0.1129, 0.3438]) tensor([0.4359, 0.1280, 0.1691, 0.2670]) -Greedy action tensor([ 1.5422, -0.7399, 0.0091, 0.6267]) tensor([0.5820, 0.0594, 0.1256, 0.2330]) -Greedy action tensor([ 1.5983, -0.7636, -0.1607, 0.5419]) tensor([0.6195, 0.0584, 0.1067, 0.2154]) -Greedy action tensor([ 1.7663, -0.8615, -0.5531, 0.7369]) tensor([0.6545, 0.0473, 0.0644, 0.2338]) -Greedy action tensor([ 1.4778, -0.0221, -0.0669, 0.3807]) tensor([0.5648, 0.1261, 0.1205, 0.1886]) -Greedy action tensor([ 1.1002, -0.3937, -0.5437, 0.4897]) tensor([0.5100, 0.1145, 0.0985, 0.2770]) -Greedy action tensor([ 1.6867, -0.4339, -0.5577, 0.3816]) tensor([0.6680, 0.0801, 0.0708, 0.1811]) -Greedy action tensor([ 1.7414, -0.4509, -0.4295, 0.4060]) tensor([0.6717, 0.0750, 0.0766, 0.1767]) -Greedy action tensor([ 1.6988, -0.3127, -0.8541, 0.1229]) tensor([0.7050, 0.0943, 0.0549, 0.1458]) -Greedy action tensor([ 0.9635, -0.7273, -0.2226, -0.0959]) tensor([0.5445, 0.1004, 0.1663, 0.1888]) -Greedy action tensor([ 1.8031, -0.7395, -0.7236, 0.1541]) tensor([0.7403, 0.0582, 0.0592, 0.1423]) -Greedy action tensor([ 1.3707, -0.4582, -0.3901, 0.2857]) tensor([0.5986, 0.0961, 0.1029, 0.2023]) -Greedy action tensor([ 1.9950, -0.5389, -0.4361, 0.7767]) tensor([0.6835, 0.0542, 0.0601, 0.2021]) -Greedy action tensor([ 0.9605, -0.3550, -0.1212, 0.3986]) tensor([0.4593, 0.1232, 0.1557, 0.2618]) -Greedy action tensor([ 1.2222, -0.7457, -0.3867, 0.5554]) tensor([0.5396, 0.0754, 0.1080, 0.2770]) -Greedy action tensor([ 1.5549, -0.1065, -0.3667, 0.3911]) tensor([0.6066, 0.1152, 0.0888, 0.1894]) -Greedy action tensor([ 1.7954, 0.1132, -0.1897, 0.4785]) tensor([0.6284, 0.1169, 0.0863, 0.1684]) -Greedy action tensor([ 1.9736, -1.2430, -0.1311, 0.7218]) tensor([0.6906, 0.0277, 0.0842, 0.1975]) -Greedy action tensor([ 1.2227, -0.0689, -0.9763, 0.2151]) tensor([0.5712, 0.1570, 0.0633, 0.2085]) -Greedy action tensor([ 1.4326, -0.7735, -0.3758, 0.1869]) tensor([0.6403, 0.0705, 0.1049, 0.1842]) -Greedy action tensor([ 1.4966, -0.3220, -0.6338, 0.2873]) tensor([0.6331, 0.1027, 0.0752, 0.1889]) -Greedy action tensor([ 1.7150, -0.4414, -0.2231, 0.2960]) tensor([0.6659, 0.0771, 0.0959, 0.1611]) -Greedy action tensor([ 1.1887, 0.1477, -0.7313, 0.6961]) tensor([0.4738, 0.1673, 0.0695, 0.2895]) -Greedy action tensor([ 1.4031, -0.4142, -0.9289, 0.1377]) tensor([0.6486, 0.1054, 0.0630, 0.1830]) -Greedy action tensor([ 1.4009, -0.4280, -0.5128, 0.1351]) tensor([0.6289, 0.1010, 0.0928, 0.1773]) -Greedy action tensor([ 1.7493, -1.1049, -0.0567, 0.7581]) tensor([0.6277, 0.0362, 0.1031, 0.2330]) -Greedy action tensor([ 0.9181, -0.4645, 0.1427, -0.0923]) tensor([0.4818, 0.1209, 0.2219, 0.1754]) -Greedy action tensor([ 1.5449, -0.7637, -0.1780, 0.5910]) tensor([0.6013, 0.0598, 0.1074, 0.2316]) -Greedy action tensor([ 0.9286, -0.5835, -0.3224, 0.6140]) tensor([0.4471, 0.0986, 0.1280, 0.3264]) -Greedy action tensor([ 1.5419, -0.7262, -0.1845, 0.3732]) tensor([0.6281, 0.0650, 0.1118, 0.1952]) -Greedy action tensor([ 2.1887, -0.6311, -0.5806, 0.5143]) tensor([0.7635, 0.0455, 0.0479, 0.1431]) -Greedy action tensor([ 2.1385, -0.8207, -0.3161, 0.6155]) tensor([0.7376, 0.0383, 0.0634, 0.1608]) -Greedy action tensor([ 1.3111, -0.3167, -0.7221, 0.3556]) tensor([0.5841, 0.1147, 0.0765, 0.2247]) -Greedy action tensor([ 1.7053, -0.9258, 0.0594, 0.2916]) tensor([0.6631, 0.0477, 0.1279, 0.1613]) -Greedy action tensor([ 2.8059, 0.6826, -0.0465, 0.1811]) tensor([0.8001, 0.0957, 0.0462, 0.0580]) -Greedy action tensor([ 1.0522, -0.5821, -0.8044, 0.6778]) tensor([0.4905, 0.0957, 0.0766, 0.3373]) -Greedy action tensor([ 2.6790, -0.6317, -0.1432, 0.8775]) tensor([0.7930, 0.0289, 0.0472, 0.1309]) -Greedy action tensor([ 1.4327, -0.7246, -0.3344, 0.6741]) tensor([0.5699, 0.0659, 0.0974, 0.2669]) -Greedy action tensor([ 1.0090, -0.1332, -0.6952, 0.1507]) tensor([0.5195, 0.1658, 0.0945, 0.2202]) -Greedy action tensor([ 1.4754, -0.9087, -0.5496, 0.5090]) tensor([0.6232, 0.0574, 0.0823, 0.2371]) -Greedy action tensor([ 1.6637, -0.7591, -0.2112, 0.4064]) tensor([0.6551, 0.0581, 0.1005, 0.1863]) -Greedy action tensor([ 2.0425, -1.1962, -0.1991, -0.1480]) tensor([0.7953, 0.0312, 0.0845, 0.0890]) -Greedy action tensor([ 2.4408, -1.1183, -0.4576, 0.5508]) tensor([0.8099, 0.0231, 0.0446, 0.1224]) -Greedy action tensor([ 1.5306, -0.2562, -0.5057, 0.7481]) tensor([0.5697, 0.0954, 0.0744, 0.2605]) -Greedy action tensor([ 1.9166, -0.2004, -0.9837, 0.3446]) tensor([0.7231, 0.0870, 0.0398, 0.1501]) -Greedy action tensor([ 1.8710, -0.6367, -0.2776, 0.7112]) tensor([0.6615, 0.0539, 0.0772, 0.2074]) -Greedy action tensor([ 1.5005, -0.4133, -0.0858, 0.0880]) tensor([0.6267, 0.0924, 0.1283, 0.1526]) -Greedy action tensor([ 1.2148, -0.4372, -0.2991, 0.4340]) tensor([0.5348, 0.1025, 0.1177, 0.2450]) -Greedy action tensor([ 1.4835, -0.8009, -0.2493, 0.2285]) tensor([0.6395, 0.0651, 0.1131, 0.1823]) -Greedy action tensor([ 1.1026, -0.2705, -0.5360, 0.2474]) tensor([0.5340, 0.1353, 0.1037, 0.2270]) -Greedy action tensor([ 1.5312, -0.9269, 0.1942, 0.5024]) tensor([0.5863, 0.0502, 0.1540, 0.2096]) -Greedy action tensor([ 1.7673, -0.6675, -0.1134, 0.6166]) tensor([0.6425, 0.0563, 0.0980, 0.2033]) -Greedy action tensor([ 1.1224, -0.0776, -0.6385, 0.2383]) tensor([0.5302, 0.1597, 0.0911, 0.2190]) -Greedy action tensor([ 1.4615, 0.0720, -0.5537, -0.1112]) tensor([0.6289, 0.1567, 0.0838, 0.1305]) -Greedy action tensor([ 1.3695, -0.1506, -0.3343, 0.1090]) tensor([0.5937, 0.1299, 0.1081, 0.1683]) -Greedy action tensor([ 1.6229, -0.3051, -0.7975, 0.8019]) tensor([0.5973, 0.0869, 0.0531, 0.2628]) -Greedy action tensor([ 1.3883, -0.4473, -0.5019, 0.0754]) tensor([0.6331, 0.1010, 0.0956, 0.1703]) -Greedy action tensor([ 1.2481, 0.0082, -0.6848, 0.3391]) tensor([0.5443, 0.1576, 0.0788, 0.2193]) -Greedy action tensor([-1.7178, -0.4428, 0.5433, -0.0550]) tensor([0.0514, 0.1840, 0.4933, 0.2712]) -Greedy action tensor([-1.8896, -0.4531, 0.6812, -0.1026]) tensor([0.0412, 0.1734, 0.5391, 0.2462]) -Greedy action tensor([-1.8829, -0.3859, 0.6391, -0.1387]) tensor([0.0423, 0.1890, 0.5267, 0.2420]) -Greedy action tensor([-1.9448, -0.4495, 0.6676, -0.1807]) tensor([0.0401, 0.1789, 0.5468, 0.2341]) -Greedy action tensor([-1.2234, -0.5413, 0.3782, 0.1090]) tensor([0.0853, 0.1687, 0.4229, 0.3231]) -Greedy action tensor([-1.9222, -0.3845, 0.6439, -0.1769]) tensor([0.0410, 0.1908, 0.5335, 0.2348]) -Greedy action tensor([-1.4666, 0.0313, 0.4732, 0.0851]) tensor([0.0583, 0.2608, 0.4057, 0.2752]) -Greedy action tensor([-1.8992, -0.4467, 0.6414, -0.1584]) tensor([0.0423, 0.1806, 0.5362, 0.2410]) -Greedy action tensor([-8.8521e-01, 3.6255e-04, 2.0342e-01, -6.1130e-02]) tensor([0.1153, 0.2795, 0.3424, 0.2628]) -Greedy action tensor([-1.8363, -0.4502, 0.6211, -0.1251]) tensor([0.0450, 0.1801, 0.5256, 0.2492]) -Greedy action tensor([-1.9104, -0.4363, 0.6452, -0.1640]) tensor([0.0417, 0.1821, 0.5371, 0.2391]) -Greedy action tensor([-1.2723, 0.7219, 0.3711, -0.2792]) tensor([0.0617, 0.4530, 0.3189, 0.1665]) -Greedy action tensor([-1.9232, -0.4284, 0.6533, -0.1691]) tensor([0.0410, 0.1828, 0.5392, 0.2369]) -Greedy action tensor([-1.6803, -0.0895, 0.5602, 0.0103]) tensor([0.0482, 0.2368, 0.4534, 0.2616]) -Greedy action tensor([-0.6286, 0.4329, 0.1000, -0.1480]) tensor([0.1319, 0.3814, 0.2734, 0.2133]) -Greedy action tensor([-1.9370, -0.4497, 0.6620, -0.1753]) tensor([0.0405, 0.1792, 0.5446, 0.2357]) -Greedy action tensor([-1.8021, -0.4787, 0.6065, -0.1017]) tensor([0.0468, 0.1759, 0.5207, 0.2565]) -Greedy action tensor([-1.7303, -0.4686, 0.5591, -0.0748]) tensor([0.0509, 0.1798, 0.5026, 0.2666]) -Greedy action tensor([-1.7822, -0.2775, 0.5790, -0.0619]) tensor([0.0461, 0.2076, 0.4888, 0.2575]) -Greedy action tensor([-1.9125, -0.4528, 0.6560, -0.1625]) tensor([0.0415, 0.1786, 0.5412, 0.2387]) -Greedy action tensor([-1.7595, -0.3536, 0.5615, -0.1213]) tensor([0.0490, 0.1998, 0.4990, 0.2521]) -Greedy action tensor([-1.8916, -0.4277, 0.6405, -0.1422]) tensor([0.0423, 0.1828, 0.5318, 0.2431]) -Greedy action tensor([-1.8086, -0.5092, 0.5966, -0.1352]) tensor([0.0474, 0.1740, 0.5257, 0.2529]) -Greedy action tensor([-1.6367, -0.4667, 0.6635, 0.1527]) tensor([0.0495, 0.1596, 0.4942, 0.2966]) -Greedy action tensor([-0.8596, -0.0723, 0.1955, -0.0228]) tensor([0.1194, 0.2623, 0.3428, 0.2756]) -Greedy action tensor([-1.8956, -0.4562, 0.6404, -0.1419]) tensor([0.0423, 0.1786, 0.5346, 0.2445]) -Greedy action tensor([-1.2195, 0.3132, 0.3102, -0.0812]) tensor([0.0748, 0.3464, 0.3453, 0.2335]) -Greedy action tensor([-1.6147, -0.2898, 0.6411, 0.1387]) tensor([0.0498, 0.1873, 0.4753, 0.2876]) -Greedy action tensor([-1.2782, -0.2840, 0.2914, 0.2220]) tensor([0.0770, 0.2080, 0.3699, 0.3451]) -Greedy action tensor([-1.4351, 0.6553, 0.2629, 0.2230]) tensor([0.0505, 0.4085, 0.2759, 0.2651]) -Greedy action tensor([-1.5173, -0.1945, 0.4422, 0.0378]) tensor([0.0603, 0.2263, 0.4278, 0.2855]) -Greedy action tensor([-1.9351, -0.4492, 0.6695, -0.1724]) tensor([0.0404, 0.1784, 0.5460, 0.2353]) -Greedy action tensor([-1.1345, 0.0834, 0.2536, 0.0195]) tensor([0.0865, 0.2924, 0.3467, 0.2743]) -Greedy action tensor([-0.6870, 0.7554, 0.2511, 0.4051]) tensor([0.0929, 0.3930, 0.2373, 0.2768]) -Greedy action tensor([-1.4820, -0.0278, 0.3888, -0.0163]) tensor([0.0621, 0.2658, 0.4032, 0.2689]) -Greedy action tensor([-0.0982, 0.3682, 0.4076, 0.7057]) tensor([0.1542, 0.2458, 0.2556, 0.3444]) -Greedy action tensor([-1.4682, -0.0686, 0.4732, 0.2021]) tensor([0.0577, 0.2338, 0.4020, 0.3065]) -Greedy action tensor([-1.9484, -0.4535, 0.6700, -0.1829]) tensor([0.0400, 0.1782, 0.5482, 0.2336]) -Greedy action tensor([-1.8934, -0.2852, 0.6178, -0.1480]) tensor([0.0416, 0.2077, 0.5124, 0.2383]) -Greedy action tensor([-1.9130, -0.4388, 0.6455, -0.1611]) tensor([0.0416, 0.1816, 0.5371, 0.2397]) -Greedy action tensor([-1.8815, -0.3369, 0.6030, -0.1311]) tensor([0.0427, 0.1999, 0.5118, 0.2456]) -Greedy action tensor([-1.9198, -0.4482, 0.6584, -0.1665]) tensor([0.0411, 0.1792, 0.5421, 0.2376]) -Greedy action tensor([-1.8599, -0.4081, 0.6208, -0.1303]) tensor([0.0437, 0.1868, 0.5228, 0.2467]) -Greedy action tensor([-1.8998, -0.4093, 0.6582, -0.1446]) tensor([0.0414, 0.1840, 0.5349, 0.2397]) -Greedy action tensor([-1.9082, -0.4423, 0.6513, -0.1489]) tensor([0.0415, 0.1800, 0.5372, 0.2413]) -Greedy action tensor([-1.8882, -0.4050, 0.6352, -0.1535]) tensor([0.0425, 0.1872, 0.5297, 0.2407]) -Greedy action tensor([-1.3911, -0.4262, 0.3625, 0.1162]) tensor([0.0719, 0.1886, 0.4151, 0.3245]) -Greedy action tensor([-1.0095, 0.5976, 0.0862, 0.1711]) tensor([0.0817, 0.4077, 0.2445, 0.2661]) -Greedy action tensor([-1.8665, -0.4416, 0.6286, -0.1428]) tensor([0.0437, 0.1817, 0.5297, 0.2449]) -Greedy action tensor([-1.8821, -0.4425, 0.6358, -0.1509]) tensor([0.0430, 0.1813, 0.5330, 0.2427]) -Greedy action tensor([-1.8258, -0.4742, 0.6199, -0.0810]) tensor([0.0452, 0.1746, 0.5215, 0.2587]) -Greedy action tensor([-0.5111, 0.8454, 0.0489, 0.0657]) tensor([0.1189, 0.4615, 0.2081, 0.2116]) -Greedy action tensor([-1.9052, -0.4146, 0.6418, -0.1567]) tensor([0.0417, 0.1853, 0.5330, 0.2399]) -Greedy action tensor([-1.7307, -0.4036, 0.6156, -0.0448]) tensor([0.0485, 0.1829, 0.5068, 0.2618]) -Greedy action tensor([-1.5499, -0.4500, 0.5194, 0.0750]) tensor([0.0588, 0.1767, 0.4658, 0.2987]) -Greedy action tensor([-1.7585, -0.2054, 0.6122, -0.3494]) tensor([0.0487, 0.2303, 0.5216, 0.1994]) -Greedy action tensor([-1.5843, -0.1887, 0.5080, 0.0560]) tensor([0.0546, 0.2207, 0.4429, 0.2818]) -Greedy action tensor([-1.8954, -0.3732, 0.6405, -0.1543]) tensor([0.0418, 0.1916, 0.5281, 0.2385]) -Greedy action tensor([-1.4469, -0.6204, 0.4025, 0.1029]) tensor([0.0697, 0.1592, 0.4429, 0.3282]) -Greedy action tensor([-1.7830, -0.3566, 0.6287, -0.0896]) tensor([0.0460, 0.1914, 0.5127, 0.2500]) -Greedy action tensor([-1.9376, -0.4296, 0.6601, -0.1760]) tensor([0.0404, 0.1824, 0.5422, 0.2350]) -Greedy action tensor([-0.3939, 0.9864, 0.0605, 0.0499]) tensor([0.1233, 0.4903, 0.1942, 0.1922]) -Greedy action tensor([-1.9025, -0.4685, 0.6393, -0.1595]) tensor([0.0423, 0.1777, 0.5380, 0.2420]) -Greedy action tensor([-1.1683, 0.3814, 0.2129, -0.0013]) tensor([0.0775, 0.3651, 0.3085, 0.2490]) -Greedy action tensor([-1.7767, -0.4516, 0.5841, -0.0972]) tensor([0.0482, 0.1816, 0.5114, 0.2588]) -Greedy action tensor([-1.9273, -0.4344, 0.6566, -0.1713]) tensor([0.0408, 0.1817, 0.5410, 0.2364]) -Greedy action tensor([-1.8094, -0.2611, 0.5775, -0.1313]) tensor([0.0456, 0.2144, 0.4959, 0.2441]) -Greedy action tensor([-1.9264, -0.4560, 0.6627, -0.1700]) tensor([0.0409, 0.1779, 0.5445, 0.2368]) -Greedy action tensor([-1.7536, -0.4533, 0.5791, -0.0790]) tensor([0.0492, 0.1807, 0.5073, 0.2627]) -Greedy action tensor([ 0.6379, -0.1146, 0.9796, 1.7771]) tensor([0.1666, 0.0785, 0.2345, 0.5205]) -Greedy action tensor([-1.7330, -0.3450, 0.5447, -0.0984]) tensor([0.0503, 0.2015, 0.4905, 0.2578]) -Greedy action tensor([-1.5669, -0.5418, 0.8431, 0.5777]) tensor([0.0426, 0.1188, 0.4746, 0.3640]) -Greedy action tensor([-1.8708, -0.3367, 0.6269, -0.1371]) tensor([0.0426, 0.1977, 0.5182, 0.2414]) -Greedy action tensor([-1.9138, -0.3766, 0.6355, -0.1564]) tensor([0.0412, 0.1918, 0.5278, 0.2391]) -Greedy action tensor([-1.8517, -0.3413, 0.6559, -0.0647]) tensor([0.0421, 0.1905, 0.5163, 0.2512]) -Greedy action tensor([-1.9480, -0.4522, 0.6688, -0.1825]) tensor([0.0400, 0.1785, 0.5477, 0.2338]) -Greedy action tensor([-2.4842e-01, -4.5681e-04, 4.2058e-01, 9.5183e-01]) tensor([0.1324, 0.1696, 0.2584, 0.4396]) -Greedy action tensor([-1.6320, -0.5047, 0.5044, -0.0196]) tensor([0.0569, 0.1757, 0.4820, 0.2854]) -Greedy action tensor([-1.5902, -0.5063, 0.4927, -0.0040]) tensor([0.0593, 0.1752, 0.4759, 0.2896]) -Greedy action tensor([-1.7331, -0.0905, 0.5314, -0.1422]) tensor([0.0483, 0.2497, 0.4650, 0.2371]) -Greedy action tensor([-1.4936, 0.4543, 0.3067, 0.0803]) tensor([0.0529, 0.3713, 0.3203, 0.2554]) -Greedy action tensor([ 0.1964, -0.1627, -0.0388, -0.3141]) tensor([0.3237, 0.2261, 0.2559, 0.1943]) -Greedy action tensor([ 0.6059, -0.3307, 0.1608, -0.5366]) tensor([0.4252, 0.1667, 0.2725, 0.1357]) -Greedy action tensor([ 0.4227, -0.3911, 0.0333, -0.2285]) tensor([0.3785, 0.1677, 0.2564, 0.1974]) -Greedy action tensor([ 0.7599, -0.5317, -0.1151, -0.3417]) tensor([0.4941, 0.1358, 0.2060, 0.1642]) -Greedy action tensor([ 0.6445, -0.1407, -0.0502, -0.0158]) tensor([0.4045, 0.1845, 0.2020, 0.2090]) -Greedy action tensor([ 0.7507, -0.7786, 0.1296, -0.2800]) tensor([0.4738, 0.1027, 0.2546, 0.1690]) -Greedy action tensor([ 0.5942, 0.1381, -0.0832, 0.1231]) tensor([0.3615, 0.2291, 0.1836, 0.2257]) -Greedy action tensor([ 1.1474, -0.5878, -0.0269, -0.6633]) tensor([0.6065, 0.1070, 0.1874, 0.0992]) -Greedy action tensor([ 0.4761, -0.3712, -0.1267, -0.0701]) tensor([0.3914, 0.1677, 0.2142, 0.2267]) -Greedy action tensor([ 0.8155, -0.8914, -0.0293, -0.4221]) tensor([0.5260, 0.0954, 0.2260, 0.1526]) -Greedy action tensor([ 0.7005, -0.4162, -0.0758, -0.3557]) tensor([0.4683, 0.1533, 0.2155, 0.1629]) -Greedy action tensor([ 0.9332, -0.5820, -0.0453, -0.2447]) tensor([0.5253, 0.1154, 0.1975, 0.1618]) -Greedy action tensor([ 0.3482, -0.1186, -0.1307, -0.1568]) tensor([0.3509, 0.2200, 0.2174, 0.2118]) -Greedy action tensor([ 1.1555, -0.4620, -0.0504, -0.3234]) tensor([0.5795, 0.1150, 0.1735, 0.1321]) -Greedy action tensor([ 0.5269, -0.3277, -0.1882, -0.0339]) tensor([0.4024, 0.1712, 0.1968, 0.2297]) -Greedy action tensor([ 0.5166, 0.0128, 0.0072, -0.2695]) tensor([0.3759, 0.2271, 0.2258, 0.1712]) -Greedy action tensor([ 0.6240, -0.4093, 0.0790, -0.2020]) tensor([0.4213, 0.1499, 0.2443, 0.1845]) -Greedy action tensor([ 0.6744, -0.5217, -0.1049, -0.2272]) tensor([0.4615, 0.1395, 0.2117, 0.1873]) -Greedy action tensor([ 0.4002, 0.1557, -0.1297, 0.0331]) tensor([0.3263, 0.2555, 0.1921, 0.2261]) -Greedy action tensor([ 0.8496, -0.6519, 0.0863, -0.2365]) tensor([0.4935, 0.1099, 0.2300, 0.1666]) -Greedy action tensor([ 0.8704, -0.6296, 0.1460, -0.4130]) tensor([0.5038, 0.1124, 0.2442, 0.1396]) -Greedy action tensor([ 0.6988, -0.4911, -0.1218, -0.2174]) tensor([0.4663, 0.1419, 0.2053, 0.1865]) -Greedy action tensor([ 0.6122, -0.3584, 0.0312, -0.6841]) tensor([0.4521, 0.1713, 0.2529, 0.1237]) -Greedy action tensor([ 0.8791, -0.6713, 0.0028, -0.4326]) tensor([0.5269, 0.1118, 0.2194, 0.1419]) -Greedy action tensor([ 0.8591, -0.6184, 0.2129, -0.3893]) tensor([0.4904, 0.1119, 0.2570, 0.1407]) -Greedy action tensor([ 0.4419, -0.2411, -0.0908, -0.2728]) tensor([0.3874, 0.1957, 0.2274, 0.1896]) -Greedy action tensor([ 0.8054, -0.1923, -0.0466, -0.1844]) tensor([0.4615, 0.1702, 0.1969, 0.1715]) -Greedy action tensor([ 1.0443, -0.5197, -0.0899, -0.3923]) tensor([0.5654, 0.1183, 0.1819, 0.1344]) -Greedy action tensor([ 0.8374, -0.4960, -0.0743, -0.2002]) tensor([0.4951, 0.1305, 0.1990, 0.1754]) -Greedy action tensor([ 0.6082, -0.5239, -0.1325, -0.2425]) tensor([0.4492, 0.1448, 0.2142, 0.1919]) -Greedy action tensor([ 0.6275, -0.1359, -0.0158, -0.1225]) tensor([0.4059, 0.1892, 0.2133, 0.1917]) -Greedy action tensor([ 1.1166, -0.6463, 0.2123, -0.8392]) tensor([0.5821, 0.0999, 0.2357, 0.0823]) -Greedy action tensor([ 1.0556, -0.9123, 0.1572, -0.6092]) tensor([0.5760, 0.0805, 0.2345, 0.1090]) -Greedy action tensor([ 0.6057, -0.2952, -0.0684, -0.1904]) tensor([0.4225, 0.1716, 0.2153, 0.1906]) -Greedy action tensor([ 5.5556e-01, -1.5020e-01, -1.4860e-04, -1.5997e-01]) tensor([0.3912, 0.1931, 0.2244, 0.1913]) -Greedy action tensor([ 0.8341, -0.5860, 0.0009, -0.4065]) tensor([0.5088, 0.1230, 0.2211, 0.1471]) -Greedy action tensor([ 0.5114, -0.2753, -0.0675, -0.4732]) tensor([0.4185, 0.1906, 0.2346, 0.1563]) -Greedy action tensor([ 1.0092, -0.9742, 0.0998, -0.2426]) tensor([0.5475, 0.0753, 0.2205, 0.1566]) -Greedy action tensor([ 0.5939, 0.0849, -0.1191, -0.0697]) tensor([0.3837, 0.2306, 0.1881, 0.1976]) -Greedy action tensor([ 0.7361, -0.6549, -0.0934, -0.3744]) tensor([0.4964, 0.1235, 0.2166, 0.1635]) -Greedy action tensor([ 0.5060, -0.3196, -0.0949, -0.2508]) tensor([0.4073, 0.1784, 0.2233, 0.1911]) -Greedy action tensor([ 0.5211, -0.2265, -0.1674, -0.1324]) tensor([0.4006, 0.1897, 0.2012, 0.2084]) -Greedy action tensor([ 0.7153, -0.3562, -0.0115, -0.1028]) tensor([0.4411, 0.1511, 0.2132, 0.1946]) -Greedy action tensor([ 0.8529, -0.5037, -0.0851, -0.3089]) tensor([0.5097, 0.1313, 0.1995, 0.1595]) -Greedy action tensor([ 0.7108, -0.1018, -0.0391, -0.3062]) tensor([0.4390, 0.1948, 0.2074, 0.1588]) -Greedy action tensor([ 0.8269, -0.5613, -0.0063, -0.3551]) tensor([0.5023, 0.1253, 0.2183, 0.1540]) -Greedy action tensor([ 0.6551, -0.2213, 0.0758, -0.2094]) tensor([0.4170, 0.1736, 0.2337, 0.1757]) -Greedy action tensor([ 0.0659, 0.5726, -0.0462, -0.0934]) tensor([0.2269, 0.3767, 0.2029, 0.1935]) -Greedy action tensor([ 0.5667, 0.0840, -0.0984, 0.0717]) tensor([0.3649, 0.2252, 0.1876, 0.2224]) -Greedy action tensor([ 0.3707, -0.1403, -0.1224, -0.3118]) tensor([0.3682, 0.2209, 0.2249, 0.1861]) -Greedy action tensor([ 0.4936, -0.0568, -0.1247, -0.4846]) tensor([0.4013, 0.2315, 0.2163, 0.1509]) -Greedy action tensor([ 0.5905, -0.4353, -0.1085, -0.1729]) tensor([0.4307, 0.1544, 0.2141, 0.2007]) -Greedy action tensor([ 0.6513, -0.0764, -0.0650, -0.0757]) tensor([0.4073, 0.1968, 0.1990, 0.1969]) -Greedy action tensor([ 1.1299, -0.4594, -0.1628, -0.3251]) tensor([0.5841, 0.1192, 0.1604, 0.1363]) -Greedy action tensor([ 0.6902, -0.1706, -0.1281, -0.0234]) tensor([0.4248, 0.1796, 0.1874, 0.2081]) -Greedy action tensor([ 0.5706, -0.1032, -0.0964, -0.0623]) tensor([0.3915, 0.1996, 0.2010, 0.2079]) -Greedy action tensor([ 0.6335, -0.3708, -0.0349, -0.2695]) tensor([0.4378, 0.1604, 0.2244, 0.1775]) -Greedy action tensor([ 0.6478, -0.2028, -0.0792, -0.0074]) tensor([0.4115, 0.1758, 0.1989, 0.2137]) -Greedy action tensor([ 1.0860, -0.4289, -0.1029, -0.3615]) tensor([0.5683, 0.1249, 0.1731, 0.1336]) -Greedy action tensor([ 1.0696, -0.8495, 0.0452, -0.6130]) tensor([0.5911, 0.0867, 0.2122, 0.1099]) -Greedy action tensor([ 0.6484, -0.5455, 0.1242, -0.4098]) tensor([0.4460, 0.1352, 0.2640, 0.1548]) -Greedy action tensor([ 1.0134, -0.4491, -0.0986, -0.5210]) tensor([0.5630, 0.1304, 0.1852, 0.1214]) -Greedy action tensor([ 0.7369, -0.3247, -0.0654, -0.0739]) tensor([0.4467, 0.1545, 0.2002, 0.1986]) -Greedy action tensor([ 0.6518, 0.0837, -0.0247, -0.0173]) tensor([0.3865, 0.2190, 0.1965, 0.1980]) -Greedy action tensor([ 0.6527, -0.2816, -0.0281, -0.1343]) tensor([0.4247, 0.1669, 0.2150, 0.1934]) -Greedy action tensor([ 0.6409, -0.4599, -0.0747, -0.0796]) tensor([0.4333, 0.1441, 0.2118, 0.2108]) -Greedy action tensor([ 0.7933, -0.3581, -0.0225, -0.0358]) tensor([0.4556, 0.1441, 0.2015, 0.1988]) -Greedy action tensor([ 0.5209, -0.4313, -0.2459, -0.2022]) tensor([0.4281, 0.1652, 0.1989, 0.2078]) -Greedy action tensor([ 1.0520, -0.4174, 0.0972, -0.5839]) tensor([0.5526, 0.1271, 0.2127, 0.1076]) -Greedy action tensor([ 1.2625, -0.3005, -0.1376, -0.2116]) tensor([0.5935, 0.1243, 0.1463, 0.1359]) -Greedy action tensor([ 0.9812, -1.0844, 0.0576, -0.4233]) tensor([0.5652, 0.0716, 0.2244, 0.1387]) -Greedy action tensor([ 0.8155, -0.3087, -0.2002, -0.1450]) tensor([0.4831, 0.1570, 0.1750, 0.1849]) -Greedy action tensor([ 0.4185, -0.2473, 0.1015, -0.3257]) tensor([0.3680, 0.1891, 0.2680, 0.1749]) -Greedy action tensor([ 0.9315, -0.8147, 0.0724, -0.7311]) tensor([0.5594, 0.0976, 0.2369, 0.1061]) -Greedy action tensor([ 0.2420, 0.0855, -0.1772, 0.0392]) tensor([0.3004, 0.2569, 0.1975, 0.2452]) -Greedy action tensor([ 1.0545, -0.6955, 0.1613, -0.6883]) tensor([0.5688, 0.0988, 0.2328, 0.0996]) -Greedy action tensor([ 1.1714, -0.6373, 0.1921, -0.4866]) tensor([0.5780, 0.0947, 0.2171, 0.1101]) -Greedy action tensor([ 0.5794, 0.1358, -0.0599, 0.0990]) tensor([0.3587, 0.2302, 0.1893, 0.2219]) -Greedy action tensor([ 0.7319, -0.0432, 0.1659, -0.0467]) tensor([0.4020, 0.1852, 0.2283, 0.1845]) -Greedy action tensor([ 0.5981, -0.5819, 0.2076, -0.7206]) tensor([0.4441, 0.1365, 0.3006, 0.1188]) -Greedy action tensor([ 7.5657e-01, -5.3700e-01, -2.3468e-04, -4.6853e-01]) tensor([0.4909, 0.1346, 0.2303, 0.1442]) -Greedy action tensor([-0.7796, -0.2067, 0.7109, -0.3635]) tensor([0.1146, 0.2032, 0.5086, 0.1737]) -Greedy action tensor([ 0.0106, -0.5652, 0.7270, -0.2733]) tensor([0.2292, 0.1289, 0.4693, 0.1726]) -Greedy action tensor([-0.0027, -0.2823, -1.0571, -0.1894]) tensor([0.3408, 0.2577, 0.1187, 0.2828]) -Greedy action tensor([ 0.1473, -1.0093, 0.8988, -0.1404]) tensor([0.2390, 0.0752, 0.5066, 0.1792]) -Greedy action tensor([-1.2147, 0.5878, -0.9751, -0.6339]) tensor([0.0988, 0.5991, 0.1255, 0.1766]) -Greedy action tensor([ 0.8201, -0.0269, 0.0148, 1.2019]) tensor([0.2994, 0.1283, 0.1338, 0.4385]) -Greedy action tensor([-0.7371, -0.9699, -0.2758, -0.5585]) tensor([0.2186, 0.1732, 0.3468, 0.2614]) -Greedy action tensor([-0.6747, -0.1128, -0.4887, -0.3189]) tensor([0.1857, 0.3257, 0.2236, 0.2650]) -Greedy action tensor([-0.7731, -0.5223, -0.1839, 0.8868]) tensor([0.1070, 0.1375, 0.1929, 0.5626]) -Greedy action tensor([-1.3142, -0.3527, 0.2846, -1.1825]) tensor([0.1031, 0.2696, 0.5098, 0.1176]) -Greedy action tensor([ 0.4608, -1.0394, -0.1533, 0.5627]) tensor([0.3482, 0.0777, 0.1884, 0.3856]) -Greedy action tensor([-0.4787, -0.2176, -1.0492, 0.1071]) tensor([0.2146, 0.2786, 0.1213, 0.3855]) -Greedy action tensor([-1.4730e-01, 5.6699e-04, 1.4297e+00, 2.3648e-01]) tensor([0.1181, 0.1369, 0.5716, 0.1733]) -Greedy action tensor([0.6786, 0.0844, 0.0690, 0.3581]) tensor([0.3544, 0.1957, 0.1927, 0.2572]) -Greedy action tensor([ 1.3401, 0.1795, -0.2488, -0.5630]) tensor([0.6000, 0.1880, 0.1225, 0.0895]) -Greedy action tensor([-1.0097, -0.5930, 0.8473, -1.1379]) tensor([0.1020, 0.1548, 0.6535, 0.0898]) -Greedy action tensor([-0.3005, -1.0673, -0.1901, 0.3199]) tensor([0.2252, 0.1046, 0.2515, 0.4187]) -Greedy action tensor([-0.5095, -1.4361, 0.6959, -0.6532]) tensor([0.1786, 0.0707, 0.5961, 0.1547]) -Greedy action tensor([ 1.3108, -1.3479, 1.3513, 2.3171]) tensor([0.2063, 0.0145, 0.2148, 0.5644]) -Greedy action tensor([-0.2491, -0.5321, 0.5183, -0.3289]) tensor([0.2070, 0.1560, 0.4459, 0.1911]) -Greedy action tensor([ 0.5967, -1.0077, -0.6818, 0.4029]) tensor([0.4342, 0.0873, 0.1209, 0.3577]) -Greedy action tensor([-0.9413, 0.6382, 0.2595, -1.0474]) tensor([0.0993, 0.4817, 0.3298, 0.0893]) -Greedy action tensor([-1.0082, -0.5257, 0.2786, -0.7128]) tensor([0.1318, 0.2136, 0.4774, 0.1771]) -Greedy action tensor([ 1.1291, -0.6169, 0.5355, -0.8418]) tensor([0.5359, 0.0935, 0.2960, 0.0747]) -Greedy action tensor([-0.2320, -0.4297, -0.4696, -0.3486]) tensor([0.2858, 0.2345, 0.2253, 0.2543]) -Greedy action tensor([ 0.0525, -0.1599, 0.9917, -0.3684]) tensor([0.1991, 0.1610, 0.5092, 0.1307]) -Greedy action tensor([-0.4939, -0.5759, 0.0632, -0.9894]) tensor([0.2339, 0.2154, 0.4082, 0.1425]) -Greedy action tensor([ 0.1594, -0.8078, 0.9138, -0.9048]) tensor([0.2596, 0.0987, 0.5521, 0.0896]) -Greedy action tensor([ 1.2891, -0.8001, -0.5685, 1.4817]) tensor([0.4012, 0.0497, 0.0626, 0.4865]) -Greedy action tensor([-0.1492, 0.1414, -0.7581, -0.1780]) tensor([0.2596, 0.3471, 0.1412, 0.2522]) -Greedy action tensor([-0.2911, -1.0411, 0.6479, 0.7588]) tensor([0.1452, 0.0686, 0.3713, 0.4149]) -Greedy action tensor([-0.1418, -0.7380, 1.1424, -0.1665]) tensor([0.1629, 0.0897, 0.5884, 0.1589]) -Greedy action tensor([-0.1679, 0.2082, 0.3409, -0.2247]) tensor([0.1974, 0.2876, 0.3284, 0.1865]) -Greedy action tensor([ 1.5949, -1.2924, -0.7000, 1.2110]) tensor([0.5442, 0.0303, 0.0548, 0.3707]) -Greedy action tensor([-0.4948, -0.3330, 0.9728, 0.1433]) tensor([0.1189, 0.1398, 0.5161, 0.2252]) -Greedy action tensor([-0.5710, -0.1738, -0.7478, 0.5092]) tensor([0.1595, 0.2372, 0.1336, 0.4697]) -Greedy action tensor([ 0.8064, -0.8420, 0.0068, 1.0555]) tensor([0.3419, 0.0658, 0.1537, 0.4386]) -Greedy action tensor([ 1.5731, 0.6051, 0.6087, -0.3420]) tensor([0.5240, 0.1990, 0.1998, 0.0772]) -Greedy action tensor([-0.1811, -0.8393, -0.3154, -0.3384]) tensor([0.3080, 0.1595, 0.2693, 0.2632]) -Greedy action tensor([-0.3004, -1.5912, 1.3937, 1.0341]) tensor([0.0951, 0.0262, 0.5175, 0.3612]) -Greedy action tensor([ 0.8232, -1.1410, 0.9170, 0.0842]) tensor([0.3682, 0.0516, 0.4044, 0.1758]) -Greedy action tensor([ 0.2073, -1.4236, 0.1761, 0.4735]) tensor([0.2882, 0.0564, 0.2793, 0.3761]) -Greedy action tensor([ 0.2292, -0.4769, -0.6373, 0.3554]) tensor([0.3280, 0.1619, 0.1379, 0.3722]) -Greedy action tensor([ 1.4605, -0.4935, 0.2842, 0.9442]) tensor([0.4886, 0.0692, 0.1507, 0.2915]) -Greedy action tensor([ 0.9920, -0.1348, -0.1381, 0.2213]) tensor([0.4740, 0.1536, 0.1531, 0.2193]) -Greedy action tensor([-0.5743, 0.6325, 0.5409, -0.6819]) tensor([0.1206, 0.4032, 0.3679, 0.1083]) -Greedy action tensor([ 0.8350, -0.3310, 0.2769, 0.8596]) tensor([0.3438, 0.1071, 0.1967, 0.3523]) -Greedy action tensor([ 0.6916, -1.0533, 1.0437, 0.6870]) tensor([0.2784, 0.0486, 0.3959, 0.2771]) -Greedy action tensor([-0.4775, 0.0171, 0.7423, -0.4831]) tensor([0.1424, 0.2336, 0.4824, 0.1416]) -Greedy action tensor([ 0.5985, 0.1660, 0.4987, -0.0875]) tensor([0.3271, 0.2122, 0.2960, 0.1647]) -Greedy action tensor([ 0.1125, -1.5848, 0.8236, 0.6132]) tensor([0.2054, 0.0376, 0.4182, 0.3388]) -Greedy action tensor([-1.1701, -0.0089, -0.5959, -0.5679]) tensor([0.1283, 0.4097, 0.2278, 0.2342]) -Greedy action tensor([ 0.0278, -0.9311, -0.9426, 0.0384]) tensor([0.3606, 0.1382, 0.1367, 0.3645]) -Greedy action tensor([ 0.4548, -1.1733, 0.2550, -0.8288]) tensor([0.4362, 0.0856, 0.3573, 0.1209]) -Greedy action tensor([-0.3673, -0.5642, 0.3485, -0.1602]) tensor([0.1962, 0.1611, 0.4014, 0.2413]) -Greedy action tensor([0.5473, 0.0379, 0.6761, 0.3597]) tensor([0.2803, 0.1684, 0.3189, 0.2324]) -Greedy action tensor([ 0.3021, -0.8310, 1.5151, -0.4568]) tensor([0.1940, 0.0625, 0.6526, 0.0908]) -Greedy action tensor([ 0.0367, -0.3109, -0.4013, -0.2821]) tensor([0.3248, 0.2295, 0.2096, 0.2361]) -Greedy action tensor([ 0.8478, -0.0781, -0.8217, 0.7215]) tensor([0.4055, 0.1607, 0.0764, 0.3574]) -Greedy action tensor([-0.4078, -0.4820, -0.1176, -0.1328]) tensor([0.2183, 0.2026, 0.2917, 0.2873]) -Greedy action tensor([-0.5666, -2.4185, 0.1452, 0.5665]) tensor([0.1587, 0.0249, 0.3234, 0.4929]) -Greedy action tensor([-0.2556, -0.6725, -0.0164, 0.6271]) tensor([0.1870, 0.1233, 0.2376, 0.4521]) -Greedy action tensor([ 0.7934, 0.6939, 0.2152, -0.3677]) tensor([0.3598, 0.3257, 0.2018, 0.1127]) -Greedy action tensor([-0.7994, 0.1333, -0.3887, -0.9627]) tensor([0.1695, 0.4309, 0.2556, 0.1440]) -Greedy action tensor([ 1.0747, -0.9742, -0.1074, -0.7576]) tensor([0.6267, 0.0808, 0.1922, 0.1003]) -Greedy action tensor([-0.2232, -1.7930, 0.2231, -0.8258]) tensor([0.3014, 0.0627, 0.4709, 0.1650]) -Greedy action tensor([-0.3905, 0.0848, 0.1374, -0.5547]) tensor([0.1941, 0.3122, 0.3290, 0.1647]) -Greedy action tensor([ 0.0969, -0.8468, 0.6166, 0.4287]) tensor([0.2240, 0.0872, 0.3767, 0.3121]) -Greedy action tensor([0.2301, 0.6027, 0.8024, 0.7205]) tensor([0.1707, 0.2478, 0.3026, 0.2788]) -Greedy action tensor([ 1.4452, -1.6814, 0.0527, 0.5023]) tensor([0.5946, 0.0261, 0.1477, 0.2316]) -Greedy action tensor([-0.9664, -2.2937, 0.6756, -0.0865]) tensor([0.1131, 0.0300, 0.5842, 0.2727]) -Greedy action tensor([ 0.3768, -0.2525, 0.1612, 0.2667]) tensor([0.3092, 0.1648, 0.2492, 0.2769]) -Greedy action tensor([ 1.7312, -1.3150, 0.7157, -0.1464]) tensor([0.6399, 0.0304, 0.2318, 0.0979]) -Greedy action tensor([-0.4120, 0.4946, 0.3862, -0.2896]) tensor([0.1465, 0.3626, 0.3254, 0.1655]) -Greedy action tensor([-1.0789, -0.0158, -0.3313, -0.2418]) tensor([0.1202, 0.3481, 0.2539, 0.2777]) -Greedy action tensor([ 0.1883, -0.5417, 0.0619, -0.6333]) tensor([0.3568, 0.1719, 0.3144, 0.1569]) -Greedy action tensor([ 0.6006, -1.3267, 0.2907, -0.1008]) tensor([0.4211, 0.0613, 0.3089, 0.2088]) -Greedy action tensor([ 4.1208e-01, -1.0645e-02, -1.0490e+00, 7.0405e-04]) tensor([0.3922, 0.2570, 0.0910, 0.2599]) -Greedy action tensor([-1.0027, -0.4628, -0.3448, -1.3163]) tensor([0.1860, 0.3191, 0.3590, 0.1359]) -Greedy action tensor([ 0.3177, 0.9708, -0.3123, -0.4723]) tensor([0.2559, 0.4917, 0.1363, 0.1161]) -Greedy action tensor([-0.8842, -0.2216, -0.5750, 0.2706]) tensor([0.1338, 0.2595, 0.1822, 0.4245]) -Greedy action tensor([ 1.9285, -0.2552, -0.1913, 0.3083]) tensor([0.6990, 0.0787, 0.0839, 0.1383]) -Greedy action tensor([ 1.8087, -0.9692, -0.3562, 0.6531]) tensor([0.6703, 0.0417, 0.0769, 0.2111]) -Greedy action tensor([ 1.6324, -0.3894, -0.2052, 0.3434]) tensor([0.6381, 0.0845, 0.1016, 0.1758]) -Greedy action tensor([ 1.1224, -0.5843, -0.0660, 0.4766]) tensor([0.4974, 0.0903, 0.1516, 0.2608]) -Greedy action tensor([ 1.3817, -0.0978, -0.3435, -0.7339]) tensor([0.6551, 0.1492, 0.1167, 0.0790]) -Greedy action tensor([ 1.4864, -0.1752, -0.3730, 0.5251]) tensor([0.5787, 0.1099, 0.0901, 0.2213]) -Greedy action tensor([ 0.8983, -0.0170, 0.1153, -0.6023]) tensor([0.4807, 0.1925, 0.2197, 0.1072]) -Greedy action tensor([ 0.9919, -0.3765, -0.1936, 0.0699]) tensor([0.5108, 0.1300, 0.1561, 0.2031]) -Greedy action tensor([ 1.3997, -0.4073, -0.3521, 0.7458]) tensor([0.5383, 0.0884, 0.0934, 0.2799]) -Greedy action tensor([ 1.1167, -0.1614, -0.6105, 0.1341]) tensor([0.5462, 0.1522, 0.0971, 0.2045]) -Greedy action tensor([ 1.2686, -0.3994, -0.1796, 0.4392]) tensor([0.5376, 0.1014, 0.1263, 0.2346]) -Greedy action tensor([ 0.8564, -0.4428, -0.1459, 0.1772]) tensor([0.4658, 0.1271, 0.1710, 0.2362]) -Greedy action tensor([ 0.8691, -0.0546, 0.2211, -0.2992]) tensor([0.4482, 0.1780, 0.2345, 0.1393]) -Greedy action tensor([ 1.5340, -0.4000, -0.1459, 0.2804]) tensor([0.6186, 0.0894, 0.1153, 0.1766]) -Greedy action tensor([ 1.8802, -0.7109, 0.0802, 0.0666]) tensor([0.7126, 0.0534, 0.1178, 0.1162]) -Greedy action tensor([ 1.9210, -1.1021, -0.3076, 0.7405]) tensor([0.6833, 0.0332, 0.0736, 0.2099]) -Greedy action tensor([ 2.0122, -0.6537, -0.4199, 0.3028]) tensor([0.7472, 0.0520, 0.0656, 0.1352]) -Greedy action tensor([ 1.5049, -0.4024, -0.3662, 0.4433]) tensor([0.6067, 0.0901, 0.0934, 0.2098]) -Greedy action tensor([ 1.4662, 0.0831, -0.3921, 0.6222]) tensor([0.5444, 0.1365, 0.0849, 0.2341]) -Greedy action tensor([ 2.1365, -1.5106, -0.2167, 0.7579]) tensor([0.7283, 0.0190, 0.0692, 0.1835]) -Greedy action tensor([ 1.4169, -0.2820, -0.2832, 0.2280]) tensor([0.5988, 0.1095, 0.1094, 0.1824]) -Greedy action tensor([ 1.6592, -0.4802, -0.5705, 0.5155]) tensor([0.6477, 0.0763, 0.0697, 0.2064]) -Greedy action tensor([ 1.8759, -1.0453, -0.6171, 0.2518]) tensor([0.7498, 0.0404, 0.0620, 0.1478]) -Greedy action tensor([ 1.6729, -0.7706, -0.4630, 0.2804]) tensor([0.6880, 0.0598, 0.0813, 0.1709]) -Greedy action tensor([ 1.9017, -1.1557, -0.1755, 0.4084]) tensor([0.7159, 0.0337, 0.0897, 0.1608]) -Greedy action tensor([ 1.1339, -0.4509, -0.3406, 0.4778]) tensor([0.5121, 0.1050, 0.1172, 0.2657]) -Greedy action tensor([ 1.2340, -0.3045, 0.0869, 0.4636]) tensor([0.5012, 0.1076, 0.1592, 0.2320]) -Greedy action tensor([ 1.7162, -0.2292, -0.5173, 0.6928]) tensor([0.6213, 0.0888, 0.0666, 0.2233]) -Greedy action tensor([ 1.6130, -0.1434, -0.3636, 0.4059]) tensor([0.6210, 0.1072, 0.0860, 0.1857]) -Greedy action tensor([ 1.2069, -0.2841, -0.3724, 0.1437]) tensor([0.5629, 0.1267, 0.1160, 0.1944]) -Greedy action tensor([ 1.3962, -0.0749, -0.7144, 0.3738]) tensor([0.5846, 0.1343, 0.0708, 0.2103]) -Greedy action tensor([ 1.7940, -0.8373, -0.3407, 0.4763]) tensor([0.6859, 0.0494, 0.0811, 0.1836]) -Greedy action tensor([ 1.8790, -0.9865, -0.3801, 0.5056]) tensor([0.7069, 0.0403, 0.0738, 0.1790]) -Greedy action tensor([ 1.2051, -0.0893, -0.3236, 0.5426]) tensor([0.4984, 0.1366, 0.1081, 0.2570]) -Greedy action tensor([ 1.0209, -0.4240, -0.1269, 0.3789]) tensor([0.4809, 0.1134, 0.1526, 0.2531]) -Greedy action tensor([ 1.6291, -0.4118, -0.4323, 0.2727]) tensor([0.6602, 0.0858, 0.0840, 0.1700]) -Greedy action tensor([ 1.9109, -0.5928, -0.5815, 0.3932]) tensor([0.7227, 0.0591, 0.0598, 0.1584]) -Greedy action tensor([ 1.7308, -0.7375, -0.3511, 0.4120]) tensor([0.6771, 0.0574, 0.0844, 0.1811]) -Greedy action tensor([ 1.0425, -0.5512, -0.1195, 0.1141]) tensor([0.5232, 0.1063, 0.1637, 0.2068]) -Greedy action tensor([ 1.3441, -0.1035, -0.1307, 0.3633]) tensor([0.5438, 0.1279, 0.1244, 0.2039]) -Greedy action tensor([ 1.2435, -0.5057, -0.1904, 0.2098]) tensor([0.5656, 0.0984, 0.1348, 0.2012]) -Greedy action tensor([ 0.9884, -0.3363, -0.2001, -0.1560]) tensor([0.5294, 0.1408, 0.1613, 0.1686]) -Greedy action tensor([ 1.0409, -0.4832, -0.3721, 0.1578]) tensor([0.5334, 0.1162, 0.1298, 0.2206]) -Greedy action tensor([ 2.2053, -0.8877, -0.5358, 0.4573]) tensor([0.7788, 0.0353, 0.0502, 0.1356]) -Greedy action tensor([ 1.7331, -0.0961, -0.1955, 0.3564]) tensor([0.6417, 0.1030, 0.0933, 0.1620]) -Greedy action tensor([ 1.7280, -0.5325, -0.4580, 0.5940]) tensor([0.6500, 0.0678, 0.0730, 0.2091]) -Greedy action tensor([ 1.7792, -0.9461, -0.5234, -0.1019]) tensor([0.7588, 0.0497, 0.0759, 0.1156]) -Greedy action tensor([ 1.0507, -0.3660, -0.7574, 0.2865]) tensor([0.5341, 0.1295, 0.0876, 0.2488]) -Greedy action tensor([ 1.5121, -1.0004, -0.1574, 0.2751]) tensor([0.6412, 0.0520, 0.1208, 0.1861]) -Greedy action tensor([ 1.7280, -0.4377, -0.5791, 0.0839]) tensor([0.7105, 0.0815, 0.0707, 0.1373]) -Greedy action tensor([ 1.2367, 0.0801, -0.4987, 0.3605]) tensor([0.5243, 0.1649, 0.0925, 0.2183]) -Greedy action tensor([ 1.9754, -0.1514, -0.6058, -0.0187]) tensor([0.7513, 0.0896, 0.0569, 0.1023]) -Greedy action tensor([ 1.7523, -0.8291, -0.3313, 0.4448]) tensor([0.6800, 0.0515, 0.0846, 0.1839]) -Greedy action tensor([ 0.9497, -0.2195, -0.2575, 0.0872]) tensor([0.4922, 0.1529, 0.1472, 0.2078]) -Greedy action tensor([ 1.4922, -0.6411, -0.7577, 0.2736]) tensor([0.6581, 0.0780, 0.0694, 0.1946]) -Greedy action tensor([ 1.5797, -0.5681, -0.5965, 0.3385]) tensor([0.6582, 0.0768, 0.0747, 0.1902]) -Greedy action tensor([ 1.4176, -0.5780, -0.1770, 0.3013]) tensor([0.6001, 0.0816, 0.1218, 0.1965]) -Greedy action tensor([ 1.5802, -0.2605, -0.7359, 0.2223]) tensor([0.6602, 0.1048, 0.0651, 0.1698]) -Greedy action tensor([ 1.3986, -0.4124, -0.0731, -0.2673]) tensor([0.6321, 0.1033, 0.1451, 0.1195]) -Greedy action tensor([ 1.8869, -0.8645, -0.3040, 0.4191]) tensor([0.7112, 0.0454, 0.0795, 0.1639]) -Greedy action tensor([ 1.5995, 0.1087, -0.6863, 0.5169]) tensor([0.6004, 0.1352, 0.0611, 0.2034]) -Greedy action tensor([ 1.4644, -0.5214, -0.2698, 0.0979]) tensor([0.6374, 0.0875, 0.1125, 0.1625]) -Greedy action tensor([ 1.5566, -0.9412, 0.1182, 0.6114]) tensor([0.5854, 0.0482, 0.1389, 0.2275]) -Greedy action tensor([ 2.1324, -0.7414, -0.4615, 0.3564]) tensor([0.7689, 0.0434, 0.0575, 0.1302]) -Greedy action tensor([ 1.2346, 0.1643, -0.7931, 0.5801]) tensor([0.5014, 0.1719, 0.0660, 0.2606]) -Greedy action tensor([ 1.6182, -0.3703, -0.1589, 0.4034]) tensor([0.6239, 0.0854, 0.1055, 0.1852]) -Greedy action tensor([ 1.1175, -0.7348, 0.2524, 0.2604]) tensor([0.4994, 0.0783, 0.2103, 0.2119]) -Greedy action tensor([ 1.6533, 0.2886, -0.2364, 0.6643]) tensor([0.5623, 0.1436, 0.0850, 0.2091]) -Greedy action tensor([ 0.7940, -0.5010, -0.2054, 0.3028]) tensor([0.4437, 0.1215, 0.1633, 0.2715]) -Greedy action tensor([ 1.8034, 0.0251, -0.5009, 0.2455]) tensor([0.6760, 0.1142, 0.0675, 0.1424]) -Greedy action tensor([ 1.4973, -0.7402, -0.3659, 0.4387]) tensor([0.6216, 0.0663, 0.0964, 0.2156]) -Greedy action tensor([ 1.5777, -0.7366, 0.0951, 0.2083]) tensor([0.6329, 0.0625, 0.1437, 0.1609]) -Greedy action tensor([ 1.2930, -0.9210, -0.3126, 0.3104]) tensor([0.5937, 0.0649, 0.1192, 0.2222]) -Greedy action tensor([ 1.4732, -0.6677, -0.2046, 0.1240]) tensor([0.6395, 0.0752, 0.1194, 0.1659]) -Greedy action tensor([ 1.9376, -0.9563, -0.3691, 0.5841]) tensor([0.7076, 0.0392, 0.0705, 0.1828]) -Greedy action tensor([ 1.9750, -0.6676, -1.0106, 0.3393]) tensor([0.7596, 0.0541, 0.0384, 0.1480]) -Greedy action tensor([ 1.4336, 0.1690, -0.3773, 0.4077]) tensor([0.5542, 0.1565, 0.0906, 0.1987]) -Greedy action tensor([ 2.1666, -0.5624, -0.5934, 0.7984]) tensor([0.7230, 0.0472, 0.0458, 0.1840]) -Greedy action tensor([ 1.4965, -0.6722, -0.5212, -0.0195]) tensor([0.6817, 0.0779, 0.0906, 0.1497]) -Greedy action tensor([ 1.6830, -0.6917, -0.1960, 0.4381]) tensor([0.6520, 0.0607, 0.0996, 0.1878]) -Greedy action tensor([ 1.2600, -0.3359, -0.2621, 0.4926]) tensor([0.5304, 0.1075, 0.1158, 0.2462]) -Greedy action tensor([ 0.8438, -0.4164, -0.1015, -0.2986]) tensor([0.5022, 0.1424, 0.1951, 0.1602]) -Greedy action tensor([ 0.2865, -0.0599, -0.0241, -0.0970]) tensor([0.3203, 0.2265, 0.2348, 0.2183]) -Greedy action tensor([ 0.5533, -0.0345, 0.1993, -0.2824]) tensor([0.3716, 0.2064, 0.2608, 0.1611]) -Greedy action tensor([ 0.9759, -1.1121, 0.1567, -0.5575]) tensor([0.5616, 0.0696, 0.2476, 0.1212]) -Greedy action tensor([ 0.8775, -0.0495, 0.0102, 0.0557]) tensor([0.4434, 0.1755, 0.1862, 0.1949]) -Greedy action tensor([ 0.9393, -0.4843, 0.1464, -0.2201]) tensor([0.4983, 0.1200, 0.2255, 0.1563]) -Greedy action tensor([ 0.8052, -0.9465, -0.1708, -0.3601]) tensor([0.5370, 0.0932, 0.2024, 0.1675]) -Greedy action tensor([ 0.6180, -0.4118, -0.1504, -0.2063]) tensor([0.4426, 0.1580, 0.2053, 0.1941]) -Greedy action tensor([ 0.6413, -0.0876, -0.1429, -0.3874]) tensor([0.4355, 0.2101, 0.1988, 0.1557]) -Greedy action tensor([ 0.9366, -0.5331, -0.1338, -0.5357]) tensor([0.5548, 0.1276, 0.1903, 0.1273]) -Greedy action tensor([ 0.6905, -0.4657, 0.0261, -0.7984]) tensor([0.4867, 0.1531, 0.2504, 0.1098]) -Greedy action tensor([ 0.7760, -0.4475, -0.1325, -0.2684]) tensor([0.4880, 0.1436, 0.1967, 0.1717]) -Greedy action tensor([ 0.4221, -0.1189, -0.0279, -0.1435]) tensor([0.3587, 0.2088, 0.2287, 0.2037]) -Greedy action tensor([ 1.0375, -0.5196, -0.0173, -0.4822]) tensor([0.5625, 0.1185, 0.1959, 0.1231]) -Greedy action tensor([ 0.3220, -0.0635, -0.0778, -0.1740]) tensor([0.3379, 0.2298, 0.2265, 0.2058]) -Greedy action tensor([ 0.3174, 0.0052, -0.1286, -0.1036]) tensor([0.3302, 0.2417, 0.2114, 0.2167]) -Greedy action tensor([ 0.7044, -0.6360, 0.0972, -0.3608]) tensor([0.4648, 0.1217, 0.2533, 0.1602]) -Greedy action tensor([ 1.1414, -0.8577, -0.0016, -0.6147]) tensor([0.6146, 0.0833, 0.1960, 0.1062]) -Greedy action tensor([ 0.7601, -0.2101, -0.0294, -0.3262]) tensor([0.4607, 0.1746, 0.2092, 0.1555]) -Greedy action tensor([ 0.9115, -0.6792, -0.1181, -0.3868]) tensor([0.5453, 0.1111, 0.1947, 0.1489]) -Greedy action tensor([ 0.5985, -0.2066, -0.0324, -0.2348]) tensor([0.4143, 0.1852, 0.2205, 0.1801]) -Greedy action tensor([ 0.4972, 0.6906, -0.4101, 0.3496]) tensor([0.2874, 0.3487, 0.1160, 0.2480]) -Greedy action tensor([ 0.7817, -0.3506, 0.0046, -0.2227]) tensor([0.4655, 0.1500, 0.2140, 0.1705]) -Greedy action tensor([ 0.5984, -0.3482, -0.1183, -0.3159]) tensor([0.4391, 0.1704, 0.2145, 0.1760]) -Greedy action tensor([ 1.0869, -0.8934, 0.0074, -0.5598]) tensor([0.5986, 0.0826, 0.2034, 0.1154]) -Greedy action tensor([ 0.8724, -0.7691, -0.0883, -0.5999]) tensor([0.5538, 0.1073, 0.2119, 0.1270]) -Greedy action tensor([ 1.0705, -1.1128, 0.0038, -0.5127]) tensor([0.6016, 0.0678, 0.2071, 0.1235]) -Greedy action tensor([ 0.7026, -0.2523, 0.0283, -0.0968]) tensor([0.4266, 0.1642, 0.2174, 0.1918]) -Greedy action tensor([ 1.2010, -0.6069, 0.1661, -0.5275]) tensor([0.5893, 0.0967, 0.2094, 0.1046]) -Greedy action tensor([ 0.8770, -0.5542, -0.0822, -0.1532]) tensor([0.5053, 0.1208, 0.1936, 0.1804]) -Greedy action tensor([ 0.3166, -0.1266, -0.0574, -0.1503]) tensor([0.3382, 0.2171, 0.2327, 0.2120]) -Greedy action tensor([ 0.4664, -0.3100, -0.1082, -0.0421]) tensor([0.3810, 0.1753, 0.2145, 0.2292]) -Greedy action tensor([ 0.6895, -0.5962, 0.0417, -0.4274]) tensor([0.4702, 0.1300, 0.2460, 0.1539]) -Greedy action tensor([ 0.7673, -0.7051, -0.1256, -0.0884]) tensor([0.4845, 0.1111, 0.1984, 0.2059]) -Greedy action tensor([ 0.4324, 0.1864, -0.1124, 0.1360]) tensor([0.3220, 0.2518, 0.1868, 0.2394]) -Greedy action tensor([ 0.3711, -0.1369, -0.0408, -0.2518]) tensor([0.3571, 0.2148, 0.2365, 0.1915]) -Greedy action tensor([ 0.4145, -0.0907, -0.0688, -0.0459]) tensor([0.3507, 0.2116, 0.2163, 0.2213]) -Greedy action tensor([ 0.4609, 0.3673, -0.1762, 0.1293]) tensor([0.3167, 0.2884, 0.1675, 0.2273]) -Greedy action tensor([ 0.7867, -0.5040, -0.0325, -0.2759]) tensor([0.4851, 0.1334, 0.2138, 0.1676]) -Greedy action tensor([ 0.5289, -0.3870, -0.2128, -0.1435]) tensor([0.4189, 0.1676, 0.1995, 0.2139]) -Greedy action tensor([ 0.3844, 0.1064, -0.0741, -0.1132]) tensor([0.3336, 0.2526, 0.2109, 0.2028]) -Greedy action tensor([ 0.5471, 0.0697, -0.1306, -0.2927]) tensor([0.3906, 0.2423, 0.1984, 0.1687]) -Greedy action tensor([ 0.8018, -0.0058, -0.0255, 0.0653]) tensor([0.4234, 0.1888, 0.1851, 0.2027]) -Greedy action tensor([ 0.9331, -0.4822, -0.0557, -0.2380]) tensor([0.5195, 0.1262, 0.1933, 0.1611]) -Greedy action tensor([ 0.7646, -0.4406, 0.0824, -0.1176]) tensor([0.4507, 0.1350, 0.2278, 0.1865]) -Greedy action tensor([ 1.2219, -0.6979, -0.1688, -0.6190]) tensor([0.6434, 0.0943, 0.1601, 0.1021]) -Greedy action tensor([ 1.0579, -0.9235, -0.0118, -0.9226]) tensor([0.6177, 0.0852, 0.2119, 0.0852]) -Greedy action tensor([ 0.6652, -0.5550, 0.0929, -0.2029]) tensor([0.4388, 0.1295, 0.2476, 0.1842]) -Greedy action tensor([ 0.7267, -0.2409, -0.0610, -0.0269]) tensor([0.4337, 0.1648, 0.1973, 0.2041]) -Greedy action tensor([ 0.6387, -0.2705, -0.0030, -0.2466]) tensor([0.4270, 0.1720, 0.2248, 0.1762]) -Greedy action tensor([ 1.0778, -0.9140, 0.1859, -0.5721]) tensor([0.5752, 0.0785, 0.2358, 0.1105]) -Greedy action tensor([ 0.7716, -0.5348, 0.0619, -0.4848]) tensor([0.4884, 0.1323, 0.2402, 0.1391]) -Greedy action tensor([ 0.9226, -0.5252, 0.0118, -0.6378]) tensor([0.5413, 0.1273, 0.2177, 0.1137]) -Greedy action tensor([ 0.6067, -0.0434, -0.1341, -0.0357]) tensor([0.3961, 0.2068, 0.1888, 0.2084]) -Greedy action tensor([ 0.6349, -0.0898, 0.1414, -0.1816]) tensor([0.3942, 0.1910, 0.2406, 0.1742]) -Greedy action tensor([ 0.6535, -0.3836, -0.1863, -0.1546]) tensor([0.4480, 0.1588, 0.1935, 0.1997]) -Greedy action tensor([ 0.4355, -0.5555, 0.0297, -0.2501]) tensor([0.3935, 0.1461, 0.2622, 0.1982]) -Greedy action tensor([ 0.6767, -0.3060, -0.0430, -0.1834]) tensor([0.4378, 0.1639, 0.2131, 0.1852]) -Greedy action tensor([ 0.7393, -0.2060, -0.1150, -0.4170]) tensor([0.4697, 0.1825, 0.1999, 0.1478]) -Greedy action tensor([ 0.5819, -0.6004, -0.1572, -0.2083]) tensor([0.4469, 0.1370, 0.2134, 0.2028]) -Greedy action tensor([ 0.4799, -0.0695, -0.1326, -0.1588]) tensor([0.3777, 0.2181, 0.2047, 0.1995]) -Greedy action tensor([ 0.5674, -0.4610, -0.0807, -0.0895]) tensor([0.4168, 0.1490, 0.2180, 0.2161]) -Greedy action tensor([ 0.8861, -0.9099, 0.1107, -0.5424]) tensor([0.5359, 0.0889, 0.2468, 0.1284]) -Greedy action tensor([ 0.8155, -0.4249, -0.1892, -0.3643]) tensor([0.5095, 0.1474, 0.1866, 0.1566]) -Greedy action tensor([ 0.5628, 0.0567, -0.0982, -0.3392]) tensor([0.3960, 0.2388, 0.2045, 0.1607]) -Greedy action tensor([ 0.6791, -0.2623, -0.0400, -0.4223]) tensor([0.4526, 0.1765, 0.2205, 0.1504]) -Greedy action tensor([ 0.7092, -0.3817, -0.0852, 0.0069]) tensor([0.4380, 0.1471, 0.1979, 0.2170]) -Greedy action tensor([ 0.3446, -0.1218, -0.3200, 0.1740]) tensor([0.3350, 0.2101, 0.1724, 0.2825]) -Greedy action tensor([ 0.9934, -0.5988, 0.0802, -0.4672]) tensor([0.5444, 0.1108, 0.2184, 0.1264]) -Greedy action tensor([ 0.9955, -0.6415, -0.0811, -0.4824]) tensor([0.5671, 0.1103, 0.1932, 0.1294]) -Greedy action tensor([ 0.7478, -0.1519, 0.0223, -0.0157]) tensor([0.4243, 0.1726, 0.2054, 0.1977]) -Greedy action tensor([ 0.6099, -0.3977, -0.2484, -0.4170]) tensor([0.4657, 0.1700, 0.1974, 0.1668]) -Greedy action tensor([ 0.9519, -0.8170, 0.2068, -0.4841]) tensor([0.5310, 0.0906, 0.2521, 0.1263]) -Greedy action tensor([ 0.9487, -0.5749, -0.0913, -0.2780]) tensor([0.5363, 0.1169, 0.1896, 0.1573]) -Greedy action tensor([ 0.7864, -0.6728, 0.0740, -0.1918]) tensor([0.4764, 0.1107, 0.2337, 0.1791]) -Greedy action tensor([ 0.2521, 0.1980, -0.0994, 0.0738]) tensor([0.2867, 0.2716, 0.2017, 0.2399]) -Greedy action tensor([ 0.9339, -0.4402, -0.1590, -0.6037]) tensor([0.5546, 0.1403, 0.1859, 0.1192]) -Greedy action tensor([ 0.3972, -0.5269, -0.1970, -0.1702]) tensor([0.3975, 0.1577, 0.2194, 0.2254]) -Greedy action tensor([ 0.8295, -0.3540, -0.0401, -0.3366]) tensor([0.4909, 0.1503, 0.2058, 0.1530]) -Greedy action tensor([ 0.9315, -0.7407, -0.2040, -0.4630]) tensor([0.5691, 0.1069, 0.1828, 0.1411]) -Greedy action tensor([ 0.9080, -0.6440, 0.1009, -0.6110]) tensor([0.5328, 0.1129, 0.2377, 0.1167]) -Greedy action tensor([-1.7607, -0.4511, 0.5771, -0.1075]) tensor([0.0493, 0.1826, 0.5106, 0.2575]) -Greedy action tensor([-1.4696, -0.2234, 0.6640, 0.1508]) tensor([0.0556, 0.1934, 0.4698, 0.2812]) -Greedy action tensor([-1.9357, -0.4302, 0.6602, -0.1723]) tensor([0.0404, 0.1821, 0.5418, 0.2357]) -Greedy action tensor([-1.8904, -0.4304, 0.6444, -0.1525]) tensor([0.0424, 0.1824, 0.5344, 0.2409]) -Greedy action tensor([-1.8567, -0.3491, 0.6132, -0.1222]) tensor([0.0435, 0.1963, 0.5139, 0.2463]) -Greedy action tensor([-1.9236, -0.4131, 0.6512, -0.1672]) tensor([0.0409, 0.1852, 0.5370, 0.2369]) -Greedy action tensor([-1.3665, 0.0676, 0.5033, 0.1599]) tensor([0.0614, 0.2576, 0.3984, 0.2826]) -Greedy action tensor([-1.7399, -0.2334, 0.5306, -0.0941]) tensor([0.0491, 0.2213, 0.4752, 0.2544]) -Greedy action tensor([-1.8857, -0.4400, 0.6299, -0.1412]) tensor([0.0428, 0.1818, 0.5301, 0.2452]) -Greedy action tensor([-0.6840, 0.1043, 0.8105, 1.4701]) tensor([0.0614, 0.1351, 0.2738, 0.5296]) -Greedy action tensor([-1.7853, -0.3373, 0.5753, -0.0860]) tensor([0.0469, 0.1995, 0.4970, 0.2566]) -Greedy action tensor([-1.9401, -0.4604, 0.6844, -0.1772]) tensor([0.0400, 0.1755, 0.5515, 0.2330]) -Greedy action tensor([-1.8573, -0.4575, 0.6464, -0.1091]) tensor([0.0434, 0.1761, 0.5310, 0.2495]) -Greedy action tensor([-1.9280, -0.4538, 0.6549, -0.1720]) tensor([0.0410, 0.1791, 0.5426, 0.2373]) -Greedy action tensor([-1.6384, -0.4794, 0.5248, -0.0283]) tensor([0.0559, 0.1781, 0.4863, 0.2797]) -Greedy action tensor([-1.3217, -0.0766, 0.5399, 0.1979]) tensor([0.0646, 0.2244, 0.4157, 0.2953]) -Greedy action tensor([-1.1051, 0.9070, 0.1664, 0.2873]) tensor([0.0622, 0.4654, 0.2219, 0.2504]) -Greedy action tensor([-1.8911, -0.4426, 0.6450, -0.1507]) tensor([0.0424, 0.1805, 0.5355, 0.2416]) -Greedy action tensor([-0.5647, 0.6235, 0.3802, 0.4793]) tensor([0.1032, 0.3385, 0.2654, 0.2930]) -Greedy action tensor([-1.3189, 0.0575, 0.3470, -0.0284]) tensor([0.0720, 0.2852, 0.3810, 0.2617]) -Greedy action tensor([-1.8988, -0.4114, 0.6312, -0.1625]) tensor([0.0423, 0.1871, 0.5307, 0.2400]) -Greedy action tensor([-0.8381, -0.5374, 0.2338, 0.2000]) tensor([0.1235, 0.1669, 0.3608, 0.3488]) -Greedy action tensor([-1.9414, -0.4450, 0.6650, -0.1791]) tensor([0.0403, 0.1798, 0.5455, 0.2345]) -Greedy action tensor([-1.9396, -0.4444, 0.6664, -0.1751]) tensor([0.0403, 0.1795, 0.5452, 0.2350]) -Greedy action tensor([-1.6260, -0.3199, 0.6014, 0.0940]) tensor([0.0511, 0.1888, 0.4744, 0.2856]) -Greedy action tensor([-1.2699, 0.4690, 0.0352, -0.0799]) tensor([0.0732, 0.4164, 0.2699, 0.2405]) -Greedy action tensor([-1.7646, -0.4835, 0.5760, -0.1091]) tensor([0.0494, 0.1780, 0.5136, 0.2589]) -Greedy action tensor([-1.6589, -0.4528, 0.5327, -0.0288]) tensor([0.0544, 0.1816, 0.4865, 0.2775]) -Greedy action tensor([-1.9216, -0.4159, 0.6528, -0.1661]) tensor([0.0410, 0.1846, 0.5375, 0.2370]) -Greedy action tensor([-1.8160, -0.3894, 0.6084, -0.0985]) tensor([0.0454, 0.1890, 0.5127, 0.2529]) -Greedy action tensor([-1.5246, -0.1098, 0.5104, 0.0953]) tensor([0.0561, 0.2309, 0.4294, 0.2835]) -Greedy action tensor([-1.2424, -0.1917, 0.6268, 0.0994]) tensor([0.0706, 0.2018, 0.4576, 0.2700]) -Greedy action tensor([-0.6371, 0.9488, 0.1735, -0.1410]) tensor([0.1023, 0.4996, 0.2301, 0.1680]) -Greedy action tensor([-1.9373, -0.4446, 0.6642, -0.1748]) tensor([0.0404, 0.1797, 0.5446, 0.2353]) -Greedy action tensor([-1.8086, -0.2939, 0.5864, -0.1308]) tensor([0.0457, 0.2080, 0.5015, 0.2448]) -Greedy action tensor([-1.6266, 0.4729, 0.3808, -0.0921]) tensor([0.0471, 0.3842, 0.3504, 0.2184]) -Greedy action tensor([-1.8689, -0.2899, 0.6035, -0.1281]) tensor([0.0427, 0.2072, 0.5064, 0.2436]) -Greedy action tensor([-1.8729, -0.4490, 0.7226, -0.0475]) tensor([0.0404, 0.1677, 0.5413, 0.2506]) -Greedy action tensor([-1.7745, -0.4634, 0.6102, -0.0719]) tensor([0.0475, 0.1762, 0.5156, 0.2607]) -Greedy action tensor([-1.8969, -0.4243, 0.6638, -0.1467]) tensor([0.0416, 0.1812, 0.5380, 0.2392]) -Greedy action tensor([-1.8855, -0.4488, 0.6416, -0.1501]) tensor([0.0427, 0.1798, 0.5350, 0.2424]) -Greedy action tensor([-1.9365, -0.4437, 0.6655, -0.1725]) tensor([0.0404, 0.1796, 0.5445, 0.2355]) -Greedy action tensor([-1.8873, -0.4436, 0.6326, -0.1558]) tensor([0.0429, 0.1817, 0.5331, 0.2423]) -Greedy action tensor([-1.9024, -0.3760, 0.6522, -0.1520]) tensor([0.0413, 0.1900, 0.5311, 0.2377]) -Greedy action tensor([-1.6074, -0.5510, 0.5224, -0.1050]) tensor([0.0596, 0.1714, 0.5013, 0.2677]) -Greedy action tensor([-1.8542, -0.4192, 0.6183, -0.1414]) tensor([0.0443, 0.1859, 0.5245, 0.2454]) -Greedy action tensor([-0.0418, 1.0285, 0.0400, 0.6618]) tensor([0.1424, 0.4153, 0.1545, 0.2878]) -Greedy action tensor([-1.8623, -0.4811, 0.6418, -0.1308]) tensor([0.0437, 0.1741, 0.5351, 0.2471]) -Greedy action tensor([-1.8968, -0.4497, 0.6553, -0.1308]) tensor([0.0418, 0.1776, 0.5363, 0.2443]) -Greedy action tensor([-1.6471, -0.5442, 0.5025, -0.0059]) tensor([0.0563, 0.1697, 0.4833, 0.2907]) -Greedy action tensor([-1.6252, -0.4797, 0.5099, -0.0232]) tensor([0.0569, 0.1790, 0.4815, 0.2826]) -Greedy action tensor([-1.8508, -0.3547, 0.6230, -0.1185]) tensor([0.0435, 0.1942, 0.5163, 0.2460]) -Greedy action tensor([-0.8979, -0.0255, 0.4467, 0.5286]) tensor([0.0878, 0.2100, 0.3368, 0.3655]) -Greedy action tensor([-1.8784, -0.4605, 0.6319, -0.1526]) tensor([0.0434, 0.1791, 0.5339, 0.2436]) -Greedy action tensor([-1.9220, -0.4210, 0.6576, -0.1639]) tensor([0.0408, 0.1833, 0.5389, 0.2370]) -Greedy action tensor([-1.9060, -0.4471, 0.6748, -0.1522]) tensor([0.0412, 0.1771, 0.5439, 0.2379]) -Greedy action tensor([-1.8249, -0.4864, 0.6043, -0.1200]) tensor([0.0462, 0.1760, 0.5239, 0.2539]) -Greedy action tensor([-1.6630, -0.4621, 0.5360, -0.0900]) tensor([0.0551, 0.1830, 0.4965, 0.2655]) -Greedy action tensor([-1.8961, -0.4116, 0.6352, -0.1707]) tensor([0.0424, 0.1870, 0.5327, 0.2379]) -Greedy action tensor([-1.9073, -0.4336, 0.6451, -0.1630]) tensor([0.0418, 0.1825, 0.5366, 0.2391]) -Greedy action tensor([-1.6137, -0.1336, 0.4602, -0.0727]) tensor([0.0555, 0.2438, 0.4415, 0.2591]) -Greedy action tensor([-0.7878, 0.7998, 0.0492, 0.5407]) tensor([0.0835, 0.4085, 0.1928, 0.3152]) -Greedy action tensor([-0.5769, 1.0225, 0.0566, 0.2762]) tensor([0.0982, 0.4862, 0.1851, 0.2305]) -Greedy action tensor([-1.2328, -0.1476, 0.1753, 0.1421]) tensor([0.0833, 0.2466, 0.3406, 0.3295]) -Greedy action tensor([-1.3805, -0.4754, 0.5123, -0.2928]) tensor([0.0765, 0.1890, 0.5076, 0.2269]) -Greedy action tensor([-0.7320, 0.1788, 0.5538, 0.3749]) tensor([0.0987, 0.2455, 0.3571, 0.2987]) -Greedy action tensor([-1.5580, -0.3056, 0.4665, -0.0556]) tensor([0.0604, 0.2112, 0.4572, 0.2712]) -Greedy action tensor([-1.6759, -0.4164, 0.5199, -0.1115]) tensor([0.0547, 0.1927, 0.4914, 0.2613]) -Greedy action tensor([-1.8978, -0.4444, 0.6465, -0.1584]) tensor([0.0422, 0.1804, 0.5372, 0.2402]) -Greedy action tensor([-1.3700, -0.4195, 0.4048, 0.1977]) tensor([0.0700, 0.1811, 0.4131, 0.3358]) -Greedy action tensor([-1.7565, -0.4865, 0.5700, -0.0979]) tensor([0.0499, 0.1776, 0.5107, 0.2619]) -Greedy action tensor([-1.8651, -0.4203, 0.6496, -0.1366]) tensor([0.0430, 0.1825, 0.5320, 0.2424]) -Greedy action tensor([-1.9435, -0.4473, 0.6672, -0.1801]) tensor([0.0402, 0.1793, 0.5464, 0.2342]) -Greedy action tensor([-1.8877, -0.3941, 0.6389, -0.1471]) tensor([0.0423, 0.1882, 0.5287, 0.2409]) -Greedy action tensor([-1.1739, 0.1422, 0.3072, -0.1008]) tensor([0.0830, 0.3094, 0.3649, 0.2427]) -Greedy action tensor([-1.7799, -0.3925, 0.6237, -0.0572]) tensor([0.0462, 0.1848, 0.5106, 0.2584]) -Greedy action tensor([-0.9853, 0.8789, 0.1686, 0.0855]) tensor([0.0739, 0.4764, 0.2342, 0.2155]) -Greedy action tensor([-1.8097, -0.4841, 0.5943, -0.0821]) tensor([0.0466, 0.1754, 0.5158, 0.2622]) -Greedy action tensor([-0.9420, -0.5232, 0.9121, 1.1449]) tensor([0.0589, 0.0896, 0.3764, 0.4751]) -Greedy action tensor([-1.8384, -0.4490, 0.6147, -0.1310]) tensor([0.0451, 0.1811, 0.5248, 0.2490]) -Greedy action tensor([-1.8672, -0.4636, 0.6991, -0.0795]) tensor([0.0416, 0.1691, 0.5410, 0.2483]) -Greedy action tensor([-0.1303, -0.1211, 1.3423, -0.1597]) tensor([0.1362, 0.1375, 0.5940, 0.1323]) -Greedy action tensor([ 0.3733, -0.8506, 0.4592, 0.2894]) tensor([0.3027, 0.0890, 0.3299, 0.2784]) -Greedy action tensor([ 0.9025, -0.1986, 0.0269, -0.0466]) tensor([0.4681, 0.1557, 0.1950, 0.1812]) -Greedy action tensor([-0.5945, -0.9868, -0.2356, 0.1799]) tensor([0.1895, 0.1280, 0.2713, 0.4111]) -Greedy action tensor([-0.4662, -0.2475, 0.9697, -0.7558]) tensor([0.1390, 0.1729, 0.5841, 0.1040]) -Greedy action tensor([ 0.4226, -0.5444, 0.0754, -0.5030]) tensor([0.4027, 0.1531, 0.2846, 0.1596]) -Greedy action tensor([ 1.2209, -0.0769, 0.9249, 0.5268]) tensor([0.3974, 0.1085, 0.2956, 0.1985]) -Greedy action tensor([-0.4314, -1.1303, 0.0062, 0.5248]) tensor([0.1771, 0.0880, 0.2743, 0.4606]) -Greedy action tensor([-0.0855, 0.0940, -0.1432, 0.2925]) tensor([0.2174, 0.2601, 0.2052, 0.3173]) -Greedy action tensor([ 0.5532, -0.0628, 0.4500, -0.5307]) tensor([0.3597, 0.1943, 0.3244, 0.1217]) -Greedy action tensor([ 1.0582, 0.0964, 0.3131, -0.4070]) tensor([0.4790, 0.1830, 0.2273, 0.1106]) -Greedy action tensor([-0.9312, -0.7227, -0.4319, 0.1737]) tensor([0.1450, 0.1786, 0.2388, 0.4376]) -Greedy action tensor([ 0.8668, 0.4415, 0.1158, -0.2502]) tensor([0.4077, 0.2665, 0.1924, 0.1334]) -Greedy action tensor([-1.2533, -0.8831, -1.0634, 0.0315]) tensor([0.1375, 0.1992, 0.1663, 0.4970]) -Greedy action tensor([ 0.4716, -0.5340, 1.3094, 0.3326]) tensor([0.2199, 0.0804, 0.5083, 0.1914]) -Greedy action tensor([ 0.5534, -1.0804, -0.3205, -0.2107]) tensor([0.4812, 0.0939, 0.2008, 0.2241]) -Greedy action tensor([ 0.6290, 0.1260, -0.2392, 0.1130]) tensor([0.3815, 0.2307, 0.1601, 0.2277]) -Greedy action tensor([0.4983, 0.4212, 0.2773, 0.0497]) tensor([0.2971, 0.2750, 0.2382, 0.1897]) -Greedy action tensor([ 0.7830, -1.4051, 0.7852, -0.8781]) tensor([0.4340, 0.0487, 0.4349, 0.0824]) -Greedy action tensor([ 0.0727, -0.1947, -0.4786, -0.8808]) tensor([0.3667, 0.2807, 0.2113, 0.1413]) -Greedy action tensor([ 0.0645, 0.2211, -0.6605, 0.2137]) tensor([0.2621, 0.3066, 0.1270, 0.3043]) -Greedy action tensor([ 0.4301, -1.7865, -0.4494, 1.0699]) tensor([0.2924, 0.0319, 0.1213, 0.5544]) -Greedy action tensor([-0.4055, -1.0469, 0.7977, 0.8377]) tensor([0.1201, 0.0633, 0.4001, 0.4165]) -Greedy action tensor([ 1.0391, -0.8858, 0.1916, 0.7032]) tensor([0.4368, 0.0637, 0.1872, 0.3122]) -Greedy action tensor([ 1.0543, -0.1419, 0.9648, 0.5184]) tensor([0.3569, 0.1079, 0.3263, 0.2088]) -Greedy action tensor([ 0.6143, -0.1599, -1.0632, 0.0259]) tensor([0.4539, 0.2093, 0.0848, 0.2520]) -Greedy action tensor([-0.8843, -0.6688, 0.2272, -0.6394]) tensor([0.1525, 0.1892, 0.4635, 0.1948]) -Greedy action tensor([-0.1505, -0.8983, 0.1084, 0.5762]) tensor([0.2067, 0.0979, 0.2678, 0.4276]) -Greedy action tensor([-1.8078e-04, 2.6067e-01, -5.7167e-01, 2.0971e-01]) tensor([0.2441, 0.3169, 0.1379, 0.3011]) -Greedy action tensor([ 0.4782, -0.2098, 1.0202, -0.3432]) tensor([0.2731, 0.1373, 0.4696, 0.1201]) -Greedy action tensor([0.3574, 0.5016, 0.2226, 0.8045]) tensor([0.2177, 0.2515, 0.1903, 0.3405]) -Greedy action tensor([ 1.1443, -0.8877, 0.9915, 0.1325]) tensor([0.4250, 0.0557, 0.3648, 0.1545]) -Greedy action tensor([ 1.4719, -1.4906, -0.2685, 1.1356]) tensor([0.5150, 0.0266, 0.0904, 0.3680]) -Greedy action tensor([ 0.7110, 0.6349, 0.1383, -0.3816]) tensor([0.3538, 0.3279, 0.1996, 0.1187]) -Greedy action tensor([ 0.0761, -0.1047, 0.2121, 0.2123]) tensor([0.2424, 0.2023, 0.2777, 0.2777]) -Greedy action tensor([ 1.4358, -0.8405, 1.1886, 0.6785]) tensor([0.4251, 0.0436, 0.3320, 0.1993]) -Greedy action tensor([ 0.7913, -1.1723, -0.2954, -0.5064]) tensor([0.5711, 0.0802, 0.1927, 0.1560]) -Greedy action tensor([-1.8634, -0.6892, -0.7445, 0.2169]) tensor([0.0653, 0.2114, 0.2000, 0.5232]) -Greedy action tensor([-0.0182, -1.7545, -0.1938, 0.5729]) tensor([0.2617, 0.0461, 0.2195, 0.4726]) -Greedy action tensor([-0.3060, -1.9628, -0.2309, 0.7843]) tensor([0.1907, 0.0364, 0.2056, 0.5674]) -Greedy action tensor([ 0.1453, -0.5827, -0.0764, -0.5129]) tensor([0.3569, 0.1723, 0.2859, 0.1848]) -Greedy action tensor([ 0.0488, -0.5477, -0.0417, -0.2289]) tensor([0.3104, 0.1709, 0.2835, 0.2351]) -Greedy action tensor([ 0.2939, 0.5341, -0.4825, -0.3027]) tensor([0.3047, 0.3874, 0.1402, 0.1678]) -Greedy action tensor([ 1.0910, -0.0036, 0.1387, 0.3297]) tensor([0.4571, 0.1530, 0.1764, 0.2135]) -Greedy action tensor([-0.5250, -1.0659, -1.0501, 0.0226]) tensor([0.2562, 0.1492, 0.1516, 0.4430]) -Greedy action tensor([-0.9050, -1.2879, 0.7496, -0.2019]) tensor([0.1119, 0.0763, 0.5856, 0.2261]) -Greedy action tensor([ 2.0379, -1.1918, 0.2335, 0.6693]) tensor([0.6856, 0.0271, 0.1128, 0.1745]) -Greedy action tensor([ 0.6659, 0.0444, -0.2043, -0.3641]) tensor([0.4323, 0.2322, 0.1811, 0.1543]) -Greedy action tensor([ 0.8639, -0.9330, -0.0959, -0.2906]) tensor([0.5365, 0.0890, 0.2055, 0.1691]) -Greedy action tensor([-0.1660, -0.2173, -0.7239, 0.5876]) tensor([0.2152, 0.2044, 0.1232, 0.4572]) -Greedy action tensor([-0.0204, -0.7417, 0.4576, -0.7179]) tensor([0.2780, 0.1352, 0.4484, 0.1384]) -Greedy action tensor([-0.4040, -1.1180, 1.4263, -0.6002]) tensor([0.1170, 0.0573, 0.7296, 0.0962]) -Greedy action tensor([-0.1119, 0.0336, -0.7305, -0.2062]) tensor([0.2774, 0.3208, 0.1494, 0.2524]) -Greedy action tensor([ 0.2770, -0.1720, -0.4760, 0.1851]) tensor([0.3310, 0.2112, 0.1559, 0.3019]) -Greedy action tensor([ 0.6325, -1.3111, 0.3415, -0.2861]) tensor([0.4367, 0.0625, 0.3265, 0.1743]) -Greedy action tensor([-0.6498, 0.4115, 0.3915, -0.4565]) tensor([0.1260, 0.3642, 0.3570, 0.1529]) -Greedy action tensor([ 0.7351, -1.3433, 0.7150, -0.1562]) tensor([0.3976, 0.0497, 0.3896, 0.1630]) -Greedy action tensor([-0.5594, -0.9155, 0.2077, -0.7609]) tensor([0.2141, 0.1499, 0.4610, 0.1750]) -Greedy action tensor([ 0.2891, -0.7355, -0.1193, 0.1125]) tensor([0.3494, 0.1254, 0.2323, 0.2929]) -Greedy action tensor([-0.6283, -1.0963, 0.8758, -0.4022]) tensor([0.1355, 0.0849, 0.6098, 0.1699]) -Greedy action tensor([ 0.1593, 0.2205, 0.6629, -0.2443]) tensor([0.2280, 0.2424, 0.3773, 0.1523]) -Greedy action tensor([-1.0838, -0.9973, -0.0792, -0.2268]) tensor([0.1393, 0.1519, 0.3805, 0.3283]) -Greedy action tensor([-0.3247, 0.4104, 0.2901, -0.3013]) tensor([0.1678, 0.3500, 0.3104, 0.1718]) -Greedy action tensor([ 0.7477, -0.1603, -0.0906, 0.6582]) tensor([0.3636, 0.1467, 0.1572, 0.3325]) -Greedy action tensor([-0.9916, 0.0171, -0.3792, -0.5001]) tensor([0.1385, 0.3797, 0.2555, 0.2264]) -Greedy action tensor([-0.0155, 0.0480, -0.1664, -1.2480]) tensor([0.3108, 0.3312, 0.2673, 0.0906]) -Greedy action tensor([-0.1945, -0.9686, 0.1272, -0.2014]) tensor([0.2608, 0.1203, 0.3598, 0.2590]) -Greedy action tensor([ 0.2955, -1.0184, 1.0381, -0.5471]) tensor([0.2631, 0.0707, 0.5529, 0.1133]) -Greedy action tensor([ 0.2240, 0.2131, 0.0503, -0.3308]) tensor([0.2938, 0.2906, 0.2469, 0.1687]) -Greedy action tensor([-0.3449, -0.2320, -1.0855, 0.4302]) tensor([0.2098, 0.2348, 0.1000, 0.4554]) -Greedy action tensor([ 0.1549, -0.2190, 1.0399, -0.8299]) tensor([0.2230, 0.1534, 0.5403, 0.0833]) -Greedy action tensor([ 0.4240, -0.9929, -0.0738, 0.7595]) tensor([0.3078, 0.0746, 0.1871, 0.4305]) -Greedy action tensor([-0.8511, -1.3094, 0.3082, -0.5810]) tensor([0.1631, 0.1032, 0.5200, 0.2137]) -Greedy action tensor([ 0.7341, -0.3054, 1.1159, 0.4286]) tensor([0.2813, 0.0995, 0.4120, 0.2072]) -Greedy action tensor([ 0.0192, 0.0194, -0.2746, 0.7869]) tensor([0.2041, 0.2041, 0.1521, 0.4397]) -Greedy action tensor([-0.4110, -2.4041, 0.4251, 0.6443]) tensor([0.1583, 0.0216, 0.3653, 0.4548]) -Greedy action tensor([ 0.9332, -0.6144, 1.7368, 0.4197]) tensor([0.2472, 0.0526, 0.5522, 0.1479]) -Greedy action tensor([-0.5017, -1.0169, -0.2911, 0.6538]) tensor([0.1665, 0.0994, 0.2055, 0.5286]) -Greedy action tensor([-0.5313, -0.0152, -0.5670, 0.1547]) tensor([0.1777, 0.2978, 0.1715, 0.3530]) -Greedy action tensor([-0.1888, -0.0543, -0.6361, -0.3874]) tensor([0.2775, 0.3175, 0.1774, 0.2275]) -Greedy action tensor([-0.2099, -0.0838, -0.2975, -0.6507]) tensor([0.2707, 0.3071, 0.2480, 0.1742]) -Greedy action tensor([ 1.9311, -0.8539, -0.4829, 0.4119]) tensor([0.7299, 0.0451, 0.0653, 0.1598]) -Greedy action tensor([ 1.6390, -0.4885, -0.2849, 0.2643]) tensor([0.6587, 0.0785, 0.0962, 0.1666]) -Greedy action tensor([ 1.6865, -0.5433, -0.4947, 0.5554]) tensor([0.6480, 0.0697, 0.0732, 0.2091]) -Greedy action tensor([ 1.2017, -0.1948, -0.4297, 0.2362]) tensor([0.5483, 0.1357, 0.1073, 0.2088]) -Greedy action tensor([ 1.4148, -0.8697, -0.3427, 0.3343]) tensor([0.6197, 0.0631, 0.1069, 0.2103]) -Greedy action tensor([ 2.1412, -1.2309, -0.2876, 0.5845]) tensor([0.7500, 0.0257, 0.0661, 0.1581]) -Greedy action tensor([ 1.2088, -0.5391, -0.2896, 0.3341]) tensor([0.5511, 0.0960, 0.1232, 0.2298]) -Greedy action tensor([ 1.2322, -0.5676, -0.1818, 0.0220]) tensor([0.5860, 0.0969, 0.1425, 0.1747]) -Greedy action tensor([ 0.6139, -0.1090, -0.1589, 0.0487]) tensor([0.3976, 0.1929, 0.1836, 0.2259]) -Greedy action tensor([ 1.5825, -1.0648, -0.4685, 0.3096]) tensor([0.6759, 0.0479, 0.0869, 0.1893]) -Greedy action tensor([ 2.0673, -0.7386, 0.4115, 1.0817]) tensor([0.6155, 0.0372, 0.1175, 0.2297]) -Greedy action tensor([ 1.9060, -0.9512, -0.2654, 0.4392]) tensor([0.7132, 0.0410, 0.0813, 0.1645]) -Greedy action tensor([ 0.9785, -0.3783, -0.1825, 0.1593]) tensor([0.4972, 0.1280, 0.1557, 0.2191]) -Greedy action tensor([ 2.0523, -0.6832, -0.2986, 0.3533]) tensor([0.7446, 0.0483, 0.0709, 0.1362]) -Greedy action tensor([ 1.6004, -0.4333, -0.5919, 0.6293]) tensor([0.6168, 0.0807, 0.0689, 0.2336]) -Greedy action tensor([ 1.2933, -0.3307, -0.2660, 0.0661]) tensor([0.5881, 0.1159, 0.1237, 0.1724]) -Greedy action tensor([ 1.7298, -0.1724, -0.5961, 0.6819]) tensor([0.6259, 0.0934, 0.0611, 0.2195]) -Greedy action tensor([ 1.4281, -0.8123, -0.2595, 0.4195]) tensor([0.6038, 0.0643, 0.1117, 0.2202]) -Greedy action tensor([ 1.9721, -0.5983, -0.0473, 0.5867]) tensor([0.6852, 0.0524, 0.0909, 0.1715]) -Greedy action tensor([ 1.6276, -0.2902, -0.6198, 0.2429]) tensor([0.6653, 0.0978, 0.0703, 0.1666]) -Greedy action tensor([ 1.5413, -0.5758, -0.2151, 0.3158]) tensor([0.6302, 0.0759, 0.1088, 0.1851]) -Greedy action tensor([ 0.7006, -0.5005, -0.1614, 0.4702]) tensor([0.3972, 0.1195, 0.1678, 0.3155]) -Greedy action tensor([ 1.1081, 0.0493, -0.0617, 0.0838]) tensor([0.4960, 0.1720, 0.1540, 0.1781]) -Greedy action tensor([ 2.2817, 0.6796, -0.1198, 0.1961]) tensor([0.7061, 0.1423, 0.0640, 0.0877]) -Greedy action tensor([ 1.5105, -0.5659, -0.0237, 0.0200]) tensor([0.6385, 0.0800, 0.1377, 0.1438]) -Greedy action tensor([ 1.5177, -0.4562, -0.3753, 0.6783]) tensor([0.5809, 0.0807, 0.0875, 0.2509]) -Greedy action tensor([ 0.9438, -0.2781, -0.3099, -0.3078]) tensor([0.5359, 0.1579, 0.1530, 0.1533]) -Greedy action tensor([ 1.2238e+00, -1.7117e-01, -4.6891e-01, 1.2117e-03]) tensor([0.5793, 0.1436, 0.1066, 0.1706]) -Greedy action tensor([ 2.0412, -1.2957, -0.2632, 0.3339]) tensor([0.7595, 0.0270, 0.0758, 0.1377]) -Greedy action tensor([ 2.5018, -0.4071, -1.1082, 0.3860]) tensor([0.8319, 0.0454, 0.0225, 0.1003]) -Greedy action tensor([ 1.3750, -0.5501, -0.4285, 0.2626]) tensor([0.6100, 0.0890, 0.1005, 0.2006]) -Greedy action tensor([ 1.4881, -0.7335, -0.0532, 0.1562]) tensor([0.6303, 0.0683, 0.1349, 0.1664]) -Greedy action tensor([ 1.3697, -0.4707, -0.0471, 0.1675]) tensor([0.5876, 0.0933, 0.1425, 0.1766]) -Greedy action tensor([ 1.8634, -1.0159, 0.0558, 0.4483]) tensor([0.6835, 0.0384, 0.1121, 0.1660]) -Greedy action tensor([ 1.1341, -0.3831, -0.0116, 0.1089]) tensor([0.5274, 0.1157, 0.1677, 0.1892]) -Greedy action tensor([ 1.4893, -0.5943, -0.4437, 0.0592]) tensor([0.6629, 0.0825, 0.0959, 0.1586]) -Greedy action tensor([ 2.2974, -0.8963, -0.5279, 0.8055]) tensor([0.7546, 0.0310, 0.0447, 0.1697]) -Greedy action tensor([ 1.7481, -0.8877, -0.4911, 0.4751]) tensor([0.6858, 0.0491, 0.0731, 0.1920]) -Greedy action tensor([ 1.6652, -1.1520, -0.4377, 0.6549]) tensor([0.6468, 0.0387, 0.0790, 0.2355]) -Greedy action tensor([ 1.6587, -0.2148, -0.4219, 0.1480]) tensor([0.6670, 0.1024, 0.0833, 0.1472]) -Greedy action tensor([ 1.0754, -0.2684, -0.6140, 0.0817]) tensor([0.5508, 0.1437, 0.1017, 0.2039]) -Greedy action tensor([ 1.6737, -0.5379, -0.4329, 0.3942]) tensor([0.6625, 0.0726, 0.0806, 0.1843]) -Greedy action tensor([ 1.2592, -0.5293, -0.4003, 0.1460]) tensor([0.5931, 0.0992, 0.1128, 0.1948]) -Greedy action tensor([ 1.6209, -0.7200, -0.3545, 0.4991]) tensor([0.6408, 0.0617, 0.0889, 0.2087]) -Greedy action tensor([ 1.6747, -1.3437, -0.1182, 0.2549]) tensor([0.6863, 0.0335, 0.1143, 0.1659]) -Greedy action tensor([ 1.2401, 0.0468, -0.7111, 0.7187]) tensor([0.4904, 0.1487, 0.0697, 0.2912]) -Greedy action tensor([ 2.2687, -1.1386, -0.0882, 0.4393]) tensor([0.7762, 0.0257, 0.0735, 0.1246]) -Greedy action tensor([ 1.6026, -0.2889, -0.3728, -0.2318]) tensor([0.6900, 0.1041, 0.0957, 0.1102]) -Greedy action tensor([ 1.6356, -0.3191, -0.3843, 0.1423]) tensor([0.6671, 0.0945, 0.0885, 0.1499]) -Greedy action tensor([ 2.7917, -0.8632, 0.1463, 0.1769]) tensor([0.8547, 0.0221, 0.0607, 0.0625]) -Greedy action tensor([ 0.8422, -0.4520, 0.0478, 0.0362]) tensor([0.4603, 0.1262, 0.2080, 0.2056]) -Greedy action tensor([ 1.6021, -0.3106, -0.6365, 0.4987]) tensor([0.6305, 0.0931, 0.0672, 0.2092]) -Greedy action tensor([ 1.4241, -0.0165, -0.4616, -0.0094]) tensor([0.6146, 0.1455, 0.0933, 0.1466]) -Greedy action tensor([ 0.6980, -0.2651, 0.1804, 0.6599]) tensor([0.3401, 0.1298, 0.2027, 0.3274]) -Greedy action tensor([ 1.3159, -0.3042, -0.0853, 0.0127]) tensor([0.5828, 0.1153, 0.1435, 0.1583]) -Greedy action tensor([ 1.1765, -0.1103, -0.4676, 0.1384]) tensor([0.5484, 0.1514, 0.1059, 0.1942]) -Greedy action tensor([ 1.6622, -0.7608, -0.3648, 0.3319]) tensor([0.6735, 0.0597, 0.0887, 0.1781]) -Greedy action tensor([ 1.6300, -0.4033, -0.5094, 0.3553]) tensor([0.6544, 0.0857, 0.0770, 0.1829]) -Greedy action tensor([ 2.0358, -1.0333, -0.0300, 0.2839]) tensor([0.7426, 0.0345, 0.0941, 0.1288]) -Greedy action tensor([ 1.7559, -0.2844, -0.3916, 0.3925]) tensor([0.6655, 0.0865, 0.0777, 0.1702]) -Greedy action tensor([ 0.9300, -0.2676, -0.5430, 0.3421]) tensor([0.4792, 0.1447, 0.1099, 0.2662]) -Greedy action tensor([ 1.6897, -0.7328, -0.3341, 0.1890]) tensor([0.6926, 0.0614, 0.0915, 0.1544]) -Greedy action tensor([ 2.0930, -0.7303, -0.4813, 0.5297]) tensor([0.7435, 0.0442, 0.0567, 0.1557]) -Greedy action tensor([ 1.8802, -0.5736, -0.4723, 0.3826]) tensor([0.7119, 0.0612, 0.0677, 0.1592]) -Greedy action tensor([ 0.9298, -0.3970, -0.2110, 0.2087]) tensor([0.4829, 0.1281, 0.1543, 0.2348]) -Greedy action tensor([ 0.9419, -0.2125, -0.0609, -0.0084]) tensor([0.4834, 0.1524, 0.1773, 0.1869]) -Greedy action tensor([ 1.2102, -0.3478, -0.4303, 0.5039]) tensor([0.5269, 0.1109, 0.1022, 0.2600]) -Greedy action tensor([ 1.2944, -0.2697, -1.0454, 0.2638]) tensor([0.6015, 0.1259, 0.0580, 0.2146]) -Greedy action tensor([ 1.6255, -0.0846, -0.5804, 0.1241]) tensor([0.6606, 0.1195, 0.0728, 0.1472]) -Greedy action tensor([ 0.8672, -0.3389, -0.5189, 0.1001]) tensor([0.4966, 0.1487, 0.1242, 0.2306]) -Greedy action tensor([ 1.9673, -0.4462, -0.4888, 0.2823]) tensor([0.7349, 0.0658, 0.0630, 0.1363]) -Greedy action tensor([ 2.0393, -0.7143, -0.6057, 0.6951]) tensor([0.7166, 0.0456, 0.0509, 0.1869]) -Greedy action tensor([ 1.2156, -0.1792, 0.2101, -0.1890]) tensor([0.5379, 0.1333, 0.1968, 0.1320]) -Greedy action tensor([ 1.2423, -0.4569, -0.3357, 0.4701]) tensor([0.5402, 0.0988, 0.1115, 0.2496]) -Greedy action tensor([ 1.1392, -0.6430, -0.1019, 0.0828]) tensor([0.5540, 0.0932, 0.1601, 0.1926]) -Greedy action tensor([ 2.0718, -0.7352, -0.4711, 0.1289]) tensor([0.7798, 0.0471, 0.0613, 0.1117]) -Greedy action tensor([ 2.0693, -1.2671, -0.2059, 0.5291]) tensor([0.7393, 0.0263, 0.0760, 0.1585]) -Greedy action tensor([ 1.5667, -0.5936, -0.0900, 0.2949]) tensor([0.6304, 0.0727, 0.1203, 0.1767]) -Greedy action tensor([ 1.2131, -0.2099, -0.4603, 0.3759]) tensor([0.5372, 0.1295, 0.1008, 0.2326]) -Greedy action tensor([ 1.4802, -0.1061, -0.5497, -0.2903]) tensor([0.6639, 0.1359, 0.0872, 0.1130]) -Greedy action tensor([ 1.1702, -0.1838, 0.0547, -0.1739]) tensor([0.5415, 0.1398, 0.1775, 0.1412]) -Greedy action tensor([ 0.3939, 0.0527, 0.0781, -0.1372]) tensor([0.3302, 0.2348, 0.2408, 0.1942]) -Greedy action tensor([ 0.4372, 0.3331, -0.1340, 0.2126]) tensor([0.3063, 0.2760, 0.1730, 0.2447]) -Greedy action tensor([ 0.7680, -0.4443, 0.0023, -0.1744]) tensor([0.4646, 0.1382, 0.2161, 0.1811]) -Greedy action tensor([ 1.0167, -0.4614, -0.4521, -0.4511]) tensor([0.5922, 0.1351, 0.1363, 0.1365]) -Greedy action tensor([ 1.0479, -0.6391, -0.1195, -0.3723]) tensor([0.5754, 0.1065, 0.1791, 0.1390]) -Greedy action tensor([ 1.1775, -0.5701, -0.0391, -0.3411]) tensor([0.5919, 0.1031, 0.1753, 0.1296]) -Greedy action tensor([ 0.4634, -0.3545, 0.0460, -0.7882]) tensor([0.4191, 0.1850, 0.2761, 0.1199]) -Greedy action tensor([ 0.6729, -0.2344, -0.0737, -0.0765]) tensor([0.4255, 0.1717, 0.2017, 0.2011]) -Greedy action tensor([ 0.7428, -0.3701, -0.0458, -0.1732]) tensor([0.4581, 0.1505, 0.2082, 0.1833]) -Greedy action tensor([ 0.8966, -0.4997, 0.2506, -0.5020]) tensor([0.4954, 0.1226, 0.2597, 0.1223]) -Greedy action tensor([ 0.2969, 0.0904, -0.1743, -0.8068]) tensor([0.3611, 0.2937, 0.2254, 0.1198]) -Greedy action tensor([ 0.8110, -0.5784, -0.0611, -0.1552]) tensor([0.4883, 0.1217, 0.2042, 0.1858]) -Greedy action tensor([ 0.5192, -0.5740, -0.0037, -0.2402]) tensor([0.4174, 0.1399, 0.2474, 0.1953]) -Greedy action tensor([ 0.8374, -0.6573, 0.0014, -0.3208]) tensor([0.5071, 0.1138, 0.2198, 0.1593]) -Greedy action tensor([ 0.7271, -0.4231, -0.0578, -0.2382]) tensor([0.4643, 0.1470, 0.2118, 0.1769]) -Greedy action tensor([ 1.2426, -0.8614, -0.0460, -0.4897]) tensor([0.6351, 0.0775, 0.1751, 0.1123]) -Greedy action tensor([ 0.9523, -0.2895, -0.1596, -0.2764]) tensor([0.5234, 0.1512, 0.1722, 0.1532]) -Greedy action tensor([ 0.9070, -0.4208, -0.0285, -0.3001]) tensor([0.5111, 0.1355, 0.2005, 0.1528]) -Greedy action tensor([ 0.3427, -0.2072, -0.0281, -0.1631]) tensor([0.3484, 0.2010, 0.2405, 0.2101]) -Greedy action tensor([ 0.4804, 0.1074, -0.0152, -0.1262]) tensor([0.3517, 0.2422, 0.2143, 0.1918]) -Greedy action tensor([ 1.0290, -0.8541, -0.1478, -0.3889]) tensor([0.5873, 0.0893, 0.1811, 0.1423]) -Greedy action tensor([ 0.4108, -0.2901, 0.0284, -0.2596]) tensor([0.3718, 0.1844, 0.2536, 0.1902]) -Greedy action tensor([ 1.1952, -0.4462, -0.0517, -0.6587]) tensor([0.6106, 0.1183, 0.1755, 0.0956]) -Greedy action tensor([ 0.7901, -0.2564, -0.0483, -0.5806]) tensor([0.4908, 0.1724, 0.2122, 0.1246]) -Greedy action tensor([ 0.9704, -0.6352, -0.0155, -0.3328]) tensor([0.5418, 0.1088, 0.2022, 0.1472]) -Greedy action tensor([ 0.7023, -0.4250, 0.0244, -0.0525]) tensor([0.4345, 0.1407, 0.2206, 0.2042]) -Greedy action tensor([ 0.4158, -0.2554, 0.0329, -0.0246]) tensor([0.3525, 0.1802, 0.2404, 0.2269]) -Greedy action tensor([ 1.0037, -0.9582, 0.0134, -0.5040]) tensor([0.5769, 0.0811, 0.2143, 0.1277]) -Greedy action tensor([ 0.7271, -0.5241, -0.0659, -0.4980]) tensor([0.4920, 0.1408, 0.2226, 0.1445]) -Greedy action tensor([ 0.9682, -0.6833, 0.0557, -0.4797]) tensor([0.5469, 0.1049, 0.2196, 0.1286]) -Greedy action tensor([ 1.1208, -0.6051, 0.0469, -0.6653]) tensor([0.5927, 0.1055, 0.2025, 0.0993]) -Greedy action tensor([ 0.5062, -0.4266, 0.0091, -0.2048]) tensor([0.4011, 0.1578, 0.2440, 0.1970]) -Greedy action tensor([ 0.4630, -0.2300, -0.1095, -0.2174]) tensor([0.3890, 0.1945, 0.2194, 0.1970]) -Greedy action tensor([ 0.4443, -0.5378, 0.0433, -0.6338]) tensor([0.4194, 0.1571, 0.2808, 0.1427]) -Greedy action tensor([ 0.6380, -0.3814, 0.0977, -0.3700]) tensor([0.4332, 0.1563, 0.2524, 0.1581]) -Greedy action tensor([ 1.1864, -0.7890, 0.0489, -0.6562]) tensor([0.6181, 0.0857, 0.1982, 0.0979]) -Greedy action tensor([ 0.5683, -0.1365, -0.0876, -0.0252]) tensor([0.3898, 0.1926, 0.2023, 0.2153]) -Greedy action tensor([ 0.8957, -0.6552, 0.1234, -0.5038]) tensor([0.5206, 0.1104, 0.2405, 0.1285]) -Greedy action tensor([ 0.8385, -0.4494, 0.0900, -0.3211]) tensor([0.4848, 0.1337, 0.2294, 0.1521]) -Greedy action tensor([ 0.9805, -0.2504, -0.1712, -0.3138]) tensor([0.5313, 0.1551, 0.1679, 0.1456]) -Greedy action tensor([ 0.5726, -0.3528, 0.0637, -0.2691]) tensor([0.4118, 0.1632, 0.2475, 0.1775]) -Greedy action tensor([ 0.3849, -0.6212, -0.1720, -0.2616]) tensor([0.4061, 0.1485, 0.2327, 0.2127]) -Greedy action tensor([ 0.6098, -0.3018, -0.1304, -0.2917]) tensor([0.4376, 0.1759, 0.2088, 0.1777]) -Greedy action tensor([ 0.9629, -0.6494, -0.0395, -0.3749]) tensor([0.5468, 0.1090, 0.2007, 0.1435]) -Greedy action tensor([ 0.9588, -0.7086, 0.0291, -0.5417]) tensor([0.5536, 0.1045, 0.2185, 0.1235]) -Greedy action tensor([ 0.2837, 0.1100, -0.1339, -0.1048]) tensor([0.3147, 0.2646, 0.2073, 0.2134]) -Greedy action tensor([ 0.4804, -0.4143, -0.1567, -0.0480]) tensor([0.3957, 0.1617, 0.2093, 0.2333]) -Greedy action tensor([ 0.6714, -0.3138, -0.1425, -0.2652]) tensor([0.4528, 0.1691, 0.2006, 0.1775]) -Greedy action tensor([ 1.2082, -0.3337, -0.3072, -0.4070]) tensor([0.6125, 0.1311, 0.1346, 0.1218]) -Greedy action tensor([ 0.5803, -0.1540, 0.0096, -0.1115]) tensor([0.3928, 0.1885, 0.2220, 0.1967]) -Greedy action tensor([ 0.3744, -0.0428, -0.0343, -0.1339]) tensor([0.3419, 0.2253, 0.2272, 0.2057]) -Greedy action tensor([ 0.7684, -0.5258, -0.0836, -0.7214]) tensor([0.5192, 0.1423, 0.2215, 0.1170]) -Greedy action tensor([ 1.0564, -0.3422, -0.2543, -0.3807]) tensor([0.5701, 0.1408, 0.1537, 0.1355]) -Greedy action tensor([ 1.0291, -0.9442, 0.0374, -0.6603]) tensor([0.5901, 0.0820, 0.2189, 0.1090]) -Greedy action tensor([ 0.5350, -0.1486, -0.0421, -0.0923]) tensor([0.3846, 0.1941, 0.2159, 0.2054]) -Greedy action tensor([ 0.7698, -0.6259, 0.0698, -0.8523]) tensor([0.5150, 0.1276, 0.2558, 0.1017]) -Greedy action tensor([ 0.6447, 0.1385, -0.0953, 0.1475]) tensor([0.3720, 0.2242, 0.1775, 0.2263]) -Greedy action tensor([ 1.1363, -0.7055, -0.0558, -0.5625]) tensor([0.6079, 0.0964, 0.1846, 0.1112]) -Greedy action tensor([ 0.5524, -0.1341, -0.0267, -0.0675]) tensor([0.3844, 0.1935, 0.2154, 0.2068]) -Greedy action tensor([ 0.7392, -0.6142, -0.0128, -0.1345]) tensor([0.4657, 0.1203, 0.2195, 0.1944]) -Greedy action tensor([ 0.7968, -0.2678, -0.0394, -0.4431]) tensor([0.4836, 0.1668, 0.2096, 0.1400]) -Greedy action tensor([ 0.6934, -0.7977, 0.0732, -0.5068]) tensor([0.4845, 0.1091, 0.2606, 0.1459]) -Greedy action tensor([ 0.4688, -0.0415, -0.0133, -0.0680]) tensor([0.3568, 0.2142, 0.2203, 0.2086]) -Greedy action tensor([ 0.6878, 0.1799, -0.0527, -0.3767]) tensor([0.4126, 0.2483, 0.1968, 0.1423]) -Greedy action tensor([ 0.6851, -0.6661, -0.3499, -0.9903]) tensor([0.5551, 0.1437, 0.1972, 0.1039]) -Greedy action tensor([ 0.9964, -0.8133, 0.2059, -0.5764]) tensor([0.5480, 0.0897, 0.2486, 0.1137]) -Greedy action tensor([ 0.2693, 0.1631, -0.0492, -0.3631]) tensor([0.3167, 0.2848, 0.2303, 0.1682]) -Greedy action tensor([ 0.4109, -0.1210, -0.0827, -0.0393]) tensor([0.3527, 0.2072, 0.2153, 0.2248]) -Greedy action tensor([ 0.7176, -0.0956, -0.0555, -0.0024]) tensor([0.4181, 0.1854, 0.1930, 0.2035]) -Greedy action tensor([ 0.3387, 0.1211, 0.0251, -0.3889]) tensor([0.3313, 0.2665, 0.2421, 0.1600]) -Greedy action tensor([ 0.8959, -0.4912, -0.0218, -0.5169]) tensor([0.5284, 0.1320, 0.2110, 0.1286]) -Greedy action tensor([ 0.7628, -0.6125, -0.0015, -0.5122]) tensor([0.5005, 0.1265, 0.2331, 0.1399]) -Greedy action tensor([ 0.9462, -0.6699, 0.0901, -0.4387]) tensor([0.5337, 0.1060, 0.2267, 0.1336]) -Greedy action tensor([ 0.6151, -0.3696, -0.0388, -0.2494]) tensor([0.4320, 0.1614, 0.2246, 0.1820]) -Greedy action tensor([ 1.0110, -0.4249, 0.0246, -0.3710]) tensor([0.5371, 0.1278, 0.2003, 0.1349]) -Greedy action tensor([ 0.7513, -0.6131, -0.0240, -0.2475]) tensor([0.4798, 0.1226, 0.2209, 0.1767]) -Greedy action tensor([ 0.5214, -0.3653, -0.0806, -0.2932]) tensor([0.4162, 0.1715, 0.2280, 0.1843]) -Greedy action tensor([ 1.0956, -0.7787, -0.0115, -0.5059]) tensor([0.5933, 0.0910, 0.1961, 0.1196]) -Greedy action tensor([ 0.5088, -0.1782, -0.1389, 0.2285]) tensor([0.3595, 0.1809, 0.1881, 0.2716]) -Greedy action tensor([ 0.8342, -0.6850, -0.1029, -0.2913]) tensor([0.5168, 0.1131, 0.2024, 0.1677]) -Greedy action tensor([ 0.8873, -0.2607, -0.0526, -0.1026]) tensor([0.4809, 0.1526, 0.1879, 0.1787]) -Greedy action tensor([-1.2236, -0.4277, 0.2440, 0.3022]) tensor([0.0823, 0.1824, 0.3570, 0.3784]) -Greedy action tensor([-1.8556, -0.3251, 0.6371, -0.1287]) tensor([0.0428, 0.1980, 0.5182, 0.2410]) -Greedy action tensor([-1.7804, -0.4776, 0.5861, -0.1332]) tensor([0.0487, 0.1792, 0.5192, 0.2529]) -Greedy action tensor([-1.8439, -0.3506, 0.6280, -0.1208]) tensor([0.0437, 0.1944, 0.5173, 0.2446]) -Greedy action tensor([-1.3882, 0.3320, 0.3074, -0.0465]) tensor([0.0630, 0.3522, 0.3436, 0.2412]) -Greedy action tensor([-1.8785, -0.4769, 0.5948, -0.1919]) tensor([0.0448, 0.1819, 0.5313, 0.2419]) -Greedy action tensor([-1.8480, -0.4053, 0.6362, -0.1243]) tensor([0.0438, 0.1854, 0.5253, 0.2455]) -Greedy action tensor([-1.6970, -0.4476, 0.5470, -0.0822]) tensor([0.0528, 0.1841, 0.4978, 0.2653]) -Greedy action tensor([-1.9373, -0.4171, 0.6589, -0.1751]) tensor([0.0403, 0.1843, 0.5406, 0.2348]) -Greedy action tensor([-1.4786, -0.4590, 0.4315, 0.1949]) tensor([0.0631, 0.1748, 0.4259, 0.3362]) -Greedy action tensor([-1.9361, -0.4540, 0.6669, -0.1735]) tensor([0.0404, 0.1780, 0.5460, 0.2356]) -Greedy action tensor([-1.9178, -0.3247, 0.6340, -0.1632]) tensor([0.0408, 0.2005, 0.5230, 0.2357]) -Greedy action tensor([-1.7343, -0.4608, 0.5587, -0.0419]) tensor([0.0502, 0.1795, 0.4975, 0.2729]) -Greedy action tensor([-1.7809, -0.1745, 0.5210, -0.0615]) tensor([0.0464, 0.2312, 0.4635, 0.2589]) -Greedy action tensor([-0.7307, 0.2047, 0.1039, -0.1643]) tensor([0.1313, 0.3347, 0.3026, 0.2314]) -Greedy action tensor([-1.8728, -0.3474, 0.6252, -0.1367]) tensor([0.0427, 0.1962, 0.5189, 0.2422]) -Greedy action tensor([-1.8519, -0.2761, 0.5952, -0.1131]) tensor([0.0433, 0.2095, 0.5006, 0.2466]) -Greedy action tensor([-1.7626, -0.3922, 0.6105, -0.0459]) tensor([0.0471, 0.1854, 0.5054, 0.2621]) -Greedy action tensor([ 0.1238, 1.1269, -0.0463, 0.3767]) tensor([0.1707, 0.4655, 0.1440, 0.2198]) -Greedy action tensor([-1.6273, -0.1664, 0.4821, 0.0112]) tensor([0.0535, 0.2305, 0.4408, 0.2752]) -Greedy action tensor([-0.9703, 0.5271, 0.3236, 0.3099]) tensor([0.0786, 0.3516, 0.2868, 0.2829]) -Greedy action tensor([-1.7848, 0.0920, 0.5281, -0.1138]) tensor([0.0436, 0.2846, 0.4402, 0.2317]) -Greedy action tensor([-1.9471, -0.4510, 0.6685, -0.1822]) tensor([0.0400, 0.1787, 0.5475, 0.2338]) -Greedy action tensor([-1.2251, -0.4710, 0.4111, -0.1599]) tensor([0.0896, 0.1904, 0.4601, 0.2599]) -Greedy action tensor([-1.8458, -0.4469, 0.6187, -0.1361]) tensor([0.0448, 0.1814, 0.5264, 0.2475]) -Greedy action tensor([-1.7849, -0.2418, 0.5564, -0.0969]) tensor([0.0465, 0.2178, 0.4839, 0.2518]) -Greedy action tensor([-1.8913, -0.4509, 0.6396, -0.1625]) tensor([0.0427, 0.1803, 0.5365, 0.2405]) -Greedy action tensor([-1.5863, -0.4819, 0.5198, 0.0272]) tensor([0.0580, 0.1749, 0.4762, 0.2910]) -Greedy action tensor([-1.8665, -0.4445, 0.6597, -0.1124]) tensor([0.0427, 0.1769, 0.5338, 0.2466]) -Greedy action tensor([-0.5819, 0.1346, 0.5066, 0.4198]) tensor([0.1144, 0.2342, 0.3398, 0.3115]) -Greedy action tensor([-1.8699, -0.3959, 0.6284, -0.1748]) tensor([0.0435, 0.1901, 0.5293, 0.2371]) -Greedy action tensor([-0.6738, -0.5694, 0.1401, 0.2703]) tensor([0.1441, 0.1600, 0.3253, 0.3705]) -Greedy action tensor([-1.9012, -0.3303, 0.6314, -0.1519]) tensor([0.0414, 0.1992, 0.5212, 0.2381]) -Greedy action tensor([-1.9214, -0.4512, 0.6535, -0.1682]) tensor([0.0412, 0.1794, 0.5414, 0.2380]) -Greedy action tensor([-0.4653, 0.1437, 0.6542, 0.4809]) tensor([0.1180, 0.2169, 0.3613, 0.3038]) -Greedy action tensor([-1.9256, -0.4270, 0.6587, -0.1694]) tensor([0.0408, 0.1825, 0.5406, 0.2361]) -Greedy action tensor([-1.7350, -0.1920, 0.5362, -0.1210]) tensor([0.0490, 0.2294, 0.4752, 0.2463]) -Greedy action tensor([-1.9319, -0.4446, 0.6551, -0.1738]) tensor([0.0408, 0.1805, 0.5421, 0.2366]) -Greedy action tensor([-1.8903, -0.1870, 0.5984, -0.1476]) tensor([0.0412, 0.2265, 0.4967, 0.2356]) -Greedy action tensor([-1.9247, -0.4387, 0.6605, -0.1691]) tensor([0.0409, 0.1806, 0.5421, 0.2365]) -Greedy action tensor([-1.8980, -0.4460, 0.6459, -0.1582]) tensor([0.0422, 0.1803, 0.5372, 0.2404]) -Greedy action tensor([-1.6932, -0.1058, 0.5342, 0.0109]) tensor([0.0484, 0.2367, 0.4489, 0.2660]) -Greedy action tensor([-1.8157, -0.4125, 0.6617, -0.1017]) tensor([0.0444, 0.1806, 0.5286, 0.2464]) -Greedy action tensor([-1.8357, -0.4555, 0.7431, -0.0416]) tensor([0.0414, 0.1645, 0.5453, 0.2488]) -Greedy action tensor([-1.4027, 0.5070, 0.2675, 0.0677]) tensor([0.0574, 0.3877, 0.3051, 0.2498]) -Greedy action tensor([-1.8745, -0.4541, 0.6368, -0.1470]) tensor([0.0433, 0.1793, 0.5337, 0.2437]) -Greedy action tensor([-1.7380, -0.1477, 0.5050, -0.0776]) tensor([0.0486, 0.2382, 0.4576, 0.2555]) -Greedy action tensor([-1.7306, -0.3209, 0.5928, -0.0391]) tensor([0.0482, 0.1975, 0.4925, 0.2618]) -Greedy action tensor([-1.9254, -0.4174, 0.6572, -0.1698]) tensor([0.0408, 0.1841, 0.5393, 0.2359]) -Greedy action tensor([-1.3643, -0.6356, 0.3814, 0.2373]) tensor([0.0727, 0.1506, 0.4163, 0.3604]) -Greedy action tensor([-1.6756, -0.5449, 0.5286, -0.0953]) tensor([0.0555, 0.1719, 0.5030, 0.2696]) -Greedy action tensor([-1.8786, -0.4224, 0.6294, -0.1514]) tensor([0.0431, 0.1849, 0.5294, 0.2425]) -Greedy action tensor([-0.0631, 0.6320, 0.4679, 0.8734]) tensor([0.1378, 0.2762, 0.2344, 0.3516]) -Greedy action tensor([0.0786, 0.9259, 0.4136, 0.8431]) tensor([0.1454, 0.3392, 0.2032, 0.3122]) -Greedy action tensor([-1.7303, -0.3227, 0.5452, -0.0831]) tensor([0.0500, 0.2042, 0.4864, 0.2595]) -Greedy action tensor([-1.5634, -0.4545, 0.4695, 0.0114]) tensor([0.0606, 0.1837, 0.4629, 0.2928]) -Greedy action tensor([-0.5474, 1.0250, 0.0683, 0.2217]) tensor([0.1018, 0.4903, 0.1884, 0.2196]) -Greedy action tensor([-1.8719, -0.4567, 0.6294, -0.1218]) tensor([0.0433, 0.1785, 0.5287, 0.2495]) -Greedy action tensor([-1.0733, 0.3423, 0.4522, 0.3099]) tensor([0.0730, 0.3006, 0.3355, 0.2910]) -Greedy action tensor([-1.0698, -0.4722, 0.3855, 0.4446]) tensor([0.0858, 0.1560, 0.3679, 0.3903]) -Greedy action tensor([-1.7662, -0.2322, 0.5897, -0.0610]) tensor([0.0461, 0.2138, 0.4863, 0.2537]) -Greedy action tensor([-1.8803, -0.3458, 0.6275, -0.1470]) tensor([0.0424, 0.1968, 0.5208, 0.2400]) -Greedy action tensor([-1.8956, -0.4456, 0.6400, -0.1552]) tensor([0.0424, 0.1808, 0.5352, 0.2417]) -Greedy action tensor([-1.8831, -0.4561, 0.6425, -0.1495]) tensor([0.0429, 0.1786, 0.5358, 0.2427]) -Greedy action tensor([-1.9175, -0.4359, 0.6531, -0.1671]) tensor([0.0413, 0.1816, 0.5395, 0.2376]) -Greedy action tensor([-1.8873, -0.4174, 0.6349, -0.1497]) tensor([0.0426, 0.1852, 0.5303, 0.2420]) -Greedy action tensor([-1.8025, -0.3242, 0.5737, -0.0996]) tensor([0.0462, 0.2027, 0.4974, 0.2537]) -Greedy action tensor([-1.3027, 0.4373, 0.3783, -0.1930]) tensor([0.0662, 0.3773, 0.3556, 0.2009]) -Greedy action tensor([-1.5419, -0.2177, 0.4509, -0.0856]) tensor([0.0610, 0.2294, 0.4477, 0.2618]) -Greedy action tensor([-1.9030, -0.3831, 0.6363, -0.1537]) tensor([0.0417, 0.1905, 0.5281, 0.2397]) -Greedy action tensor([-1.5095, -0.5272, 0.4243, 0.0454]) tensor([0.0653, 0.1743, 0.4514, 0.3090]) -Greedy action tensor([-0.0414, 0.8056, 0.3567, 0.7861]) tensor([0.1407, 0.3281, 0.2094, 0.3218]) -Greedy action tensor([-1.8467, -0.4117, 0.6148, -0.1238]) tensor([0.0444, 0.1865, 0.5205, 0.2487]) -Greedy action tensor([-1.2129, 0.0294, 0.7000, 0.5495]) tensor([0.0586, 0.2030, 0.3969, 0.3415]) -Greedy action tensor([-0.7909, -0.5611, 1.0234, 1.3805]) tensor([0.0583, 0.0733, 0.3575, 0.5109]) -Greedy action tensor([-1.6944, -0.4541, 0.5420, -0.1087]) tensor([0.0535, 0.1849, 0.5005, 0.2611]) -Greedy action tensor([-1.8951, -0.4061, 0.6319, -0.1535]) tensor([0.0423, 0.1874, 0.5291, 0.2412]) -Greedy action tensor([-1.8231, -0.3748, 0.6612, -0.0602]) tensor([0.0433, 0.1844, 0.5197, 0.2526]) -Greedy action tensor([-1.9214, -0.3744, 0.6468, -0.1618]) tensor([0.0407, 0.1913, 0.5313, 0.2367]) -Greedy action tensor([-1.3437, -0.3797, 0.4335, -0.1993]) tensor([0.0789, 0.2069, 0.4665, 0.2478]) -Greedy action tensor([-1.8817, -0.1112, 0.5961, -0.1455]) tensor([0.0409, 0.2401, 0.4870, 0.2320]) -Greedy action tensor([ 0.7683, -0.5506, 1.4349, -0.0430]) tensor([0.2733, 0.0731, 0.5322, 0.1214]) -Greedy action tensor([ 0.0278, -1.1183, -1.0242, 0.1217]) tensor([0.3616, 0.1149, 0.1263, 0.3972]) -Greedy action tensor([ 0.5947, 0.3611, 1.7254, -0.2757]) tensor([0.1884, 0.1491, 0.5836, 0.0789]) -Greedy action tensor([ 0.4595, -1.7428, 0.3541, 0.4034]) tensor([0.3383, 0.0374, 0.3044, 0.3199]) -Greedy action tensor([ 0.1344, 0.3184, -0.5639, 0.2593]) tensor([0.2609, 0.3136, 0.1298, 0.2956]) -Greedy action tensor([-0.9566, -0.6291, 0.3655, 0.1350]) tensor([0.1097, 0.1522, 0.4114, 0.3267]) -Greedy action tensor([ 0.0213, -2.1416, -0.7123, 0.4091]) tensor([0.3259, 0.0375, 0.1565, 0.4802]) -Greedy action tensor([ 0.4809, -1.7216, 0.6120, -0.0799]) tensor([0.3544, 0.0392, 0.4041, 0.2023]) -Greedy action tensor([-0.6121, 0.2106, -0.5544, 0.1238]) tensor([0.1557, 0.3544, 0.1649, 0.3249]) -Greedy action tensor([-0.3787, 0.7932, -0.4875, 0.1728]) tensor([0.1458, 0.4705, 0.1307, 0.2530]) -Greedy action tensor([-0.6108, -1.6549, 0.1816, -0.6300]) tensor([0.2202, 0.0775, 0.4863, 0.2160]) -Greedy action tensor([ 0.2793, -2.0761, 0.9738, 0.4868]) tensor([0.2310, 0.0219, 0.4627, 0.2843]) -Greedy action tensor([ 0.1153, -0.6980, 1.9611, -0.2890]) tensor([0.1184, 0.0525, 0.7500, 0.0790]) -Greedy action tensor([ 0.3119, -0.3520, -0.5431, 0.5567]) tensor([0.3108, 0.1600, 0.1322, 0.3970]) -Greedy action tensor([ 0.6186, -0.8055, -0.7656, 1.0955]) tensor([0.3223, 0.0776, 0.0808, 0.5193]) -Greedy action tensor([ 0.8311, -0.5254, -0.6010, 0.8678]) tensor([0.3947, 0.1017, 0.0942, 0.4094]) -Greedy action tensor([-1.2866, -0.7377, 0.6084, -1.1340]) tensor([0.0948, 0.1641, 0.6306, 0.1104]) -Greedy action tensor([-0.2433, -0.0496, -0.2745, -1.4402]) tensor([0.2869, 0.3483, 0.2781, 0.0867]) -Greedy action tensor([ 1.5983, -0.2320, 1.0015, 0.3824]) tensor([0.4982, 0.0799, 0.2743, 0.1477]) -Greedy action tensor([-0.0674, 1.0002, -0.8941, -0.2968]) tensor([0.1945, 0.5657, 0.0851, 0.1546]) -Greedy action tensor([-1.1058, -0.4449, 1.1545, -0.8172]) tensor([0.0722, 0.1398, 0.6918, 0.0963]) -Greedy action tensor([ 0.4264, -0.9710, -0.7060, -0.0954]) tensor([0.4623, 0.1143, 0.1490, 0.2744]) -Greedy action tensor([-0.0664, -0.5733, -0.0687, -0.0153]) tensor([0.2738, 0.1649, 0.2732, 0.2881]) -Greedy action tensor([ 0.4149, -2.0175, 1.0594, -0.3914]) tensor([0.2907, 0.0255, 0.5539, 0.1298]) -Greedy action tensor([ 1.0470, -0.1735, -0.0538, 0.4708]) tensor([0.4567, 0.1348, 0.1519, 0.2567]) -Greedy action tensor([-1.3888, 0.3764, 0.6643, -0.9527]) tensor([0.0618, 0.3611, 0.4815, 0.0956]) -Greedy action tensor([-1.0003, 0.3786, -0.1237, -0.7931]) tensor([0.1162, 0.4615, 0.2793, 0.1430]) -Greedy action tensor([ 0.6702, -1.0039, -0.3865, 1.7668]) tensor([0.2208, 0.0414, 0.0767, 0.6611]) -Greedy action tensor([ 0.0883, 0.3754, -0.2258, -0.7386]) tensor([0.2857, 0.3807, 0.2087, 0.1250]) -Greedy action tensor([ 0.8552, -0.3121, 0.4084, -0.1454]) tensor([0.4313, 0.1342, 0.2759, 0.1586]) -Greedy action tensor([1.2308, 0.6155, 0.7952, 0.0775]) tensor([0.3995, 0.2159, 0.2584, 0.1261]) -Greedy action tensor([ 0.1177, -0.3663, 0.0052, -0.0702]) tensor([0.2995, 0.1846, 0.2677, 0.2482]) -Greedy action tensor([-0.9533, 0.6340, 0.0886, -0.9642]) tensor([0.1029, 0.5034, 0.2918, 0.1018]) -Greedy action tensor([ 0.7871, -1.0741, -0.2519, 1.1077]) tensor([0.3464, 0.0539, 0.1225, 0.4773]) -Greedy action tensor([ 1.2810, -1.1985, 0.5136, 1.4625]) tensor([0.3640, 0.0305, 0.1690, 0.4365]) -Greedy action tensor([ 0.2415, -0.3387, 0.3186, -0.3040]) tensor([0.3106, 0.1739, 0.3355, 0.1800]) -Greedy action tensor([-1.0252, -0.9106, 0.9837, -1.5116]) tensor([0.0981, 0.1100, 0.7315, 0.0603]) -Greedy action tensor([-0.5824, -0.5827, 0.6291, -0.4228]) tensor([0.1531, 0.1531, 0.5142, 0.1796]) -Greedy action tensor([ 0.0511, -0.7262, 1.1236, 0.7200]) tensor([0.1579, 0.0726, 0.4614, 0.3082]) -Greedy action tensor([-0.2061, 0.1467, -0.6433, -0.1642]) tensor([0.2432, 0.3461, 0.1571, 0.2536]) -Greedy action tensor([ 0.3391, -2.5200, 0.0099, 0.1005]) tensor([0.3899, 0.0223, 0.2806, 0.3072]) -Greedy action tensor([ 0.4814, 0.5944, -0.6175, -0.7678]) tensor([0.3650, 0.4087, 0.1216, 0.1047]) -Greedy action tensor([-0.4341, -0.0585, 1.2600, 0.1438]) tensor([0.1033, 0.1504, 0.5622, 0.1841]) -Greedy action tensor([-0.8219, -0.1043, -0.4622, -0.1444]) tensor([0.1550, 0.3177, 0.2221, 0.3052]) -Greedy action tensor([ 0.4611, -1.0266, -0.0255, 1.4015]) tensor([0.2272, 0.0513, 0.1397, 0.5818]) -Greedy action tensor([ 0.3226, -0.3607, 0.4446, -0.0397]) tensor([0.3002, 0.1516, 0.3392, 0.2090]) -Greedy action tensor([ 0.3152, -1.2232, -0.4449, 0.3375]) tensor([0.3697, 0.0794, 0.1729, 0.3780]) -Greedy action tensor([1.1897, 0.2871, 0.7267, 0.5765]) tensor([0.3881, 0.1574, 0.2443, 0.2102]) -Greedy action tensor([ 0.6029, -1.8497, -0.2540, 0.3498]) tensor([0.4373, 0.0376, 0.1856, 0.3395]) -Greedy action tensor([ 0.5131, -0.4656, -0.4222, -0.3590]) tensor([0.4574, 0.1719, 0.1795, 0.1912]) -Greedy action tensor([-0.5824, -0.0888, 0.3402, -0.2833]) tensor([0.1538, 0.2519, 0.3869, 0.2074]) -Greedy action tensor([ 0.1981, 0.5207, 1.2363, -0.6250]) tensor([0.1772, 0.2446, 0.5004, 0.0778]) -Greedy action tensor([ 0.0077, -0.2218, -0.4691, 0.4896]) tensor([0.2478, 0.1970, 0.1539, 0.4013]) -Greedy action tensor([-1.0742, -0.9258, 0.4281, -0.9333]) tensor([0.1281, 0.1487, 0.5756, 0.1475]) -Greedy action tensor([-1.8086, -0.0341, 0.5808, -1.2192]) tensor([0.0510, 0.3008, 0.5563, 0.0920]) -Greedy action tensor([ 0.1827, -0.6353, 0.2461, 0.2877]) tensor([0.2764, 0.1220, 0.2945, 0.3070]) -Greedy action tensor([ 0.2598, -0.0489, 0.0219, -0.2320]) tensor([0.3191, 0.2343, 0.2515, 0.1951]) -Greedy action tensor([ 1.2371, -0.4633, -0.1448, 0.4899]) tensor([0.5243, 0.0957, 0.1316, 0.2483]) -Greedy action tensor([ 0.0693, -0.5040, 0.2909, -0.1730]) tensor([0.2780, 0.1567, 0.3470, 0.2182]) -Greedy action tensor([-0.3440, -0.1129, 0.1072, 0.5284]) tensor([0.1607, 0.2025, 0.2523, 0.3845]) -Greedy action tensor([ 1.6678, -0.2693, 0.5382, 1.4703]) tensor([0.4371, 0.0630, 0.1412, 0.3587]) -Greedy action tensor([-1.0266, -0.7145, 1.1902, -0.8603]) tensor([0.0786, 0.1074, 0.7212, 0.0928]) -Greedy action tensor([ 0.1922, -0.1434, -0.5124, -0.8326]) tensor([0.3894, 0.2784, 0.1925, 0.1397]) -Greedy action tensor([ 1.1620, -0.3768, 0.7105, 1.0091]) tensor([0.3691, 0.0792, 0.2350, 0.3167]) -Greedy action tensor([-1.0742, 1.0992, 0.7526, -0.4377]) tensor([0.0559, 0.4912, 0.3473, 0.1056]) -Greedy action tensor([0.9650, 0.1060, 0.3345, 0.5669]) tensor([0.3806, 0.1612, 0.2026, 0.2556]) -Greedy action tensor([-1.2490, -1.1959, -0.2494, -1.0070]) tensor([0.1654, 0.1744, 0.4495, 0.2107]) -Greedy action tensor([-0.4296, 0.2686, 1.3638, -1.5876]) tensor([0.1071, 0.2154, 0.6439, 0.0336]) -Greedy action tensor([ 0.1206, -0.0946, 0.2474, -0.7355]) tensor([0.2971, 0.2395, 0.3372, 0.1262]) -Greedy action tensor([-0.1166, 0.0120, -0.4388, 0.1203]) tensor([0.2422, 0.2754, 0.1755, 0.3069]) -Greedy action tensor([-0.7050, -0.4345, -0.8946, -0.5242]) tensor([0.2306, 0.3023, 0.1908, 0.2763]) -Greedy action tensor([ 1.6895, -0.0023, 1.6797, 0.6753]) tensor([0.3941, 0.0726, 0.3903, 0.1430]) -Greedy action tensor([0.5699, 0.2888, 0.8824, 0.7330]) tensor([0.2326, 0.1756, 0.3179, 0.2738]) -Greedy action tensor([ 0.5211, -0.5939, 0.4452, 0.9175]) tensor([0.2673, 0.0876, 0.2478, 0.3973]) -Greedy action tensor([-1.0036, -1.6885, 0.1205, -0.3631]) tensor([0.1543, 0.0778, 0.4750, 0.2929]) -Greedy action tensor([-0.1949, -1.3612, 0.8686, -0.5911]) tensor([0.2049, 0.0638, 0.5934, 0.1379]) -Greedy action tensor([-0.0272, 0.3853, 0.4231, 0.1020]) tensor([0.1917, 0.2895, 0.3007, 0.2181]) -Greedy action tensor([-1.0642, -2.6072, 0.5935, -0.3288]) tensor([0.1170, 0.0250, 0.6139, 0.2441]) -Greedy action tensor([ 1.0442, -0.9485, -0.5708, 0.4570]) tensor([0.5288, 0.0721, 0.1052, 0.2939]) -Greedy action tensor([ 1.1338, -0.8037, 1.1250, -0.4500]) tensor([0.4273, 0.0616, 0.4235, 0.0877]) -Greedy action tensor([-0.6693, -0.4457, 1.4776, 0.8956]) tensor([0.0641, 0.0802, 0.5489, 0.3067]) -Greedy action tensor([-1.5090, -1.1123, 0.3837, -0.5903]) tensor([0.0860, 0.1278, 0.5707, 0.2155]) -Greedy action tensor([ 1.6231, -0.7710, -0.0394, 0.3688]) tensor([0.6385, 0.0583, 0.1211, 0.1822]) -Greedy action tensor([ 1.5380, -0.3172, -0.5459, 0.4201]) tensor([0.6220, 0.0973, 0.0774, 0.2034]) -Greedy action tensor([ 1.7412, -0.9693, -0.0405, 0.5552]) tensor([0.6492, 0.0432, 0.1093, 0.1983]) -Greedy action tensor([ 1.8500, -1.4438, -0.2142, 0.2132]) tensor([0.7360, 0.0273, 0.0934, 0.1432]) -Greedy action tensor([ 1.2179, 0.1176, -0.7239, 0.0668]) tensor([0.5579, 0.1857, 0.0800, 0.1765]) -Greedy action tensor([ 1.2839, -0.2599, -0.5878, 0.3471]) tensor([0.5684, 0.1214, 0.0875, 0.2228]) -Greedy action tensor([ 1.3610, -0.6739, -0.3680, 0.4869]) tensor([0.5796, 0.0757, 0.1029, 0.2418]) -Greedy action tensor([ 1.8179, -0.2764, -0.9310, -0.0111]) tensor([0.7420, 0.0914, 0.0475, 0.1191]) -Greedy action tensor([ 1.5525, -0.6480, -0.0666, 0.2256]) tensor([0.6353, 0.0704, 0.1258, 0.1685]) -Greedy action tensor([ 2.0780, -1.2908, 0.1181, -0.0074]) tensor([0.7695, 0.0265, 0.1084, 0.0956]) -Greedy action tensor([ 1.5765, -0.5808, -0.1954, 0.2471]) tensor([0.6450, 0.0746, 0.1097, 0.1707]) -Greedy action tensor([ 1.6338, -0.8836, -0.2281, 0.1169]) tensor([0.6871, 0.0554, 0.1067, 0.1507]) -Greedy action tensor([ 1.7234, 0.0063, -0.7633, 0.7908]) tensor([0.6038, 0.1084, 0.0502, 0.2376]) -Greedy action tensor([ 1.6952, -0.7729, -0.1672, 0.3832]) tensor([0.6625, 0.0561, 0.1029, 0.1784]) -Greedy action tensor([ 0.5507, 0.0882, 0.0642, -0.0901]) tensor([0.3608, 0.2272, 0.2218, 0.1901]) -Greedy action tensor([1.7456, 0.2234, 0.0880, 0.2799]) tensor([0.6099, 0.1331, 0.1162, 0.1408]) -Greedy action tensor([ 1.6707, -0.9061, -0.4427, 0.4876]) tensor([0.6653, 0.0506, 0.0804, 0.2038]) -Greedy action tensor([ 0.9874, 0.0368, -0.7284, 0.2375]) tensor([0.4905, 0.1896, 0.0882, 0.2317]) -Greedy action tensor([ 1.4710, -0.8285, -0.4048, 0.2632]) tensor([0.6442, 0.0646, 0.0987, 0.1925]) -Greedy action tensor([ 1.0338, -0.0493, -0.5718, 0.2612]) tensor([0.4997, 0.1692, 0.1003, 0.2308]) -Greedy action tensor([ 1.9083, -0.9691, -0.3503, 0.7579]) tensor([0.6769, 0.0381, 0.0707, 0.2142]) -Greedy action tensor([ 1.1746, -0.5825, -0.4925, -0.1798]) tensor([0.6175, 0.1065, 0.1166, 0.1594]) -Greedy action tensor([ 1.4756, -0.7626, -0.1418, 0.1040]) tensor([0.6415, 0.0684, 0.1273, 0.1628]) -Greedy action tensor([ 1.2389, -0.0887, -0.4949, -0.2711]) tensor([0.6015, 0.1595, 0.1062, 0.1329]) -Greedy action tensor([ 2.0883, -1.0855, 0.0425, 0.2788]) tensor([0.7491, 0.0313, 0.0969, 0.1227]) -Greedy action tensor([ 1.4581, -0.1356, -0.2584, 0.5513]) tensor([0.5597, 0.1137, 0.1006, 0.2260]) -Greedy action tensor([ 1.2144, 0.0410, -0.7025, 0.1449]) tensor([0.5557, 0.1719, 0.0817, 0.1907]) -Greedy action tensor([1.1812, 0.3304, 0.1196, 0.4363]) tensor([0.4449, 0.1900, 0.1539, 0.2112]) -Greedy action tensor([ 1.5917e+00, -6.6146e-01, -2.2851e-01, -5.6314e-04]) tensor([0.6800, 0.0714, 0.1102, 0.1384]) -Greedy action tensor([ 1.4345, -0.6076, 0.0746, 0.1789]) tensor([0.5983, 0.0776, 0.1536, 0.1705]) -Greedy action tensor([ 1.4457, -0.5450, -0.9562, -0.0289]) tensor([0.6868, 0.0938, 0.0622, 0.1572]) -Greedy action tensor([ 1.6907, -0.2201, 0.3381, 0.1299]) tensor([0.6186, 0.0915, 0.1600, 0.1299]) -Greedy action tensor([ 1.2894, -0.3605, -0.4236, 0.0461]) tensor([0.6021, 0.1156, 0.1086, 0.1737]) -Greedy action tensor([ 1.9804, -1.1885, -0.1268, 0.5122]) tensor([0.7174, 0.0302, 0.0872, 0.1652]) -Greedy action tensor([ 1.0935, -0.4619, -0.2988, 0.1912]) tensor([0.5361, 0.1132, 0.1332, 0.2175]) -Greedy action tensor([ 1.3029, -0.6230, -0.3071, -0.0030]) tensor([0.6186, 0.0902, 0.1237, 0.1676]) -Greedy action tensor([ 1.3143, -0.4490, -0.1706, 0.2078]) tensor([0.5785, 0.0992, 0.1310, 0.1913]) -Greedy action tensor([ 1.4264, -0.4457, -0.1810, 0.5121]) tensor([0.5698, 0.0876, 0.1142, 0.2284]) -Greedy action tensor([ 2.6112, -1.0931, -0.4244, 1.2384]) tensor([0.7541, 0.0186, 0.0362, 0.1911]) -Greedy action tensor([1.7125, 0.0531, 0.2592, 0.4378]) tensor([0.5870, 0.1117, 0.1372, 0.1641]) -Greedy action tensor([ 2.0707, -0.3646, -0.5790, 0.5397]) tensor([0.7275, 0.0637, 0.0514, 0.1574]) -Greedy action tensor([ 1.5542, -0.3015, -0.1482, -0.1711]) tensor([0.6593, 0.1031, 0.1202, 0.1174]) -Greedy action tensor([ 1.7657, -0.5574, -0.7111, 0.3764]) tensor([0.6987, 0.0685, 0.0587, 0.1741]) -Greedy action tensor([ 1.3426, -1.0255, -0.0658, 0.6229]) tensor([0.5479, 0.0513, 0.1340, 0.2668]) -Greedy action tensor([ 1.4263, -0.7504, -0.3552, -0.0681]) tensor([0.6639, 0.0753, 0.1118, 0.1490]) -Greedy action tensor([ 0.8639, -0.2709, -0.0071, 0.0347]) tensor([0.4595, 0.1477, 0.1923, 0.2005]) -Greedy action tensor([ 2.3054, -0.9433, -0.5168, 0.7675]) tensor([0.7615, 0.0296, 0.0453, 0.1636]) -Greedy action tensor([ 2.1090, -0.8917, -0.3007, 0.6055]) tensor([0.7342, 0.0365, 0.0660, 0.1633]) -Greedy action tensor([ 1.7594, -0.7426, -0.8304, 0.3521]) tensor([0.7134, 0.0584, 0.0535, 0.1746]) -Greedy action tensor([ 1.2868, -0.4482, -0.0126, -0.1564]) tensor([0.5934, 0.1047, 0.1618, 0.1401]) -Greedy action tensor([ 2.2182, -0.9655, -0.2443, 0.2114]) tensor([0.7930, 0.0329, 0.0676, 0.1066]) -Greedy action tensor([ 2.5153, -1.2123, 0.2598, 1.2348]) tensor([0.7108, 0.0171, 0.0745, 0.1976]) -Greedy action tensor([ 1.4387, -0.3406, -0.4387, 0.1911]) tensor([0.6215, 0.1049, 0.0951, 0.1785]) -Greedy action tensor([ 1.2408, -0.5898, -0.2992, 0.2842]) tensor([0.5685, 0.0911, 0.1219, 0.2184]) -Greedy action tensor([ 1.6942, -0.7173, -0.2980, 0.4182]) tensor([0.6643, 0.0596, 0.0906, 0.1855]) -Greedy action tensor([ 1.5824, -0.4155, -0.2616, 0.1850]) tensor([0.6489, 0.0880, 0.1027, 0.1604]) -Greedy action tensor([ 2.1235, -0.5033, -1.2709, 0.5908]) tensor([0.7565, 0.0547, 0.0254, 0.1634]) -Greedy action tensor([ 1.3234, -0.5319, -0.4846, 0.4246]) tensor([0.5789, 0.0905, 0.0949, 0.2356]) -Greedy action tensor([ 1.7084, -0.0937, -0.5550, -0.0934]) tensor([0.6974, 0.1150, 0.0725, 0.1151]) -Greedy action tensor([ 1.4472, -0.1289, -0.7063, 0.5185]) tensor([0.5821, 0.1204, 0.0676, 0.2300]) -Greedy action tensor([ 1.9203, -1.0343, -0.4078, 0.5921]) tensor([0.7069, 0.0368, 0.0689, 0.1873]) -Greedy action tensor([ 1.5670, 0.1922, -0.0849, -0.1133]) tensor([0.6132, 0.1551, 0.1175, 0.1142]) -Greedy action tensor([ 1.8479, -0.5184, -0.2744, 0.1526]) tensor([0.7158, 0.0672, 0.0857, 0.1314]) -Greedy action tensor([ 1.2730, -0.3827, -0.5736, 0.2589]) tensor([0.5843, 0.1116, 0.0922, 0.2119]) -Greedy action tensor([ 2.2780, -0.0850, -0.0109, 0.3495]) tensor([0.7458, 0.0702, 0.0756, 0.1084]) -Greedy action tensor([ 2.3577, -0.8046, -0.4281, 0.1428]) tensor([0.8243, 0.0349, 0.0508, 0.0900]) -Greedy action tensor([ 1.2102, -0.3770, -0.0285, -0.0940]) tensor([0.5664, 0.1158, 0.1641, 0.1537]) -Greedy action tensor([ 1.9106, -0.8952, 0.2122, 0.1118]) tensor([0.7098, 0.0429, 0.1299, 0.1175]) -Greedy action tensor([ 1.2344, -0.1714, -0.6276, 0.1780]) tensor([0.5720, 0.1402, 0.0889, 0.1989]) -Greedy action tensor([ 1.6568, -0.0394, -0.5368, 0.1651]) tensor([0.6579, 0.1206, 0.0734, 0.1480]) -Greedy action tensor([ 1.6871, -0.2365, -0.3974, 0.5326]) tensor([0.6306, 0.0921, 0.0784, 0.1988]) -Greedy action tensor([1.8304, 0.0995, 0.3098, 0.0203]) tensor([0.6413, 0.1136, 0.1402, 0.1049]) -Greedy action tensor([ 1.3922, -0.0074, -0.2703, 0.5533]) tensor([0.5352, 0.1320, 0.1015, 0.2313]) -Greedy action tensor([ 1.5644, -0.7544, -0.6093, 0.4731]) tensor([0.6460, 0.0636, 0.0735, 0.2169]) -Greedy action tensor([ 1.7414, -0.4243, -0.2433, 0.1368]) tensor([0.6882, 0.0789, 0.0946, 0.1383]) -Greedy action tensor([ 0.9764, -0.2693, -0.0081, 0.0832]) tensor([0.4829, 0.1390, 0.1804, 0.1977]) -Greedy action tensor([ 1.4257, -0.1661, -1.1010, 0.5057]) tensor([0.5945, 0.1210, 0.0475, 0.2369]) -Greedy action tensor([ 1.3148, -0.8846, -0.1974, 0.0691]) tensor([0.6177, 0.0685, 0.1362, 0.1777]) -Greedy action tensor([ 2.1540, -1.2545, -0.1911, 0.4363]) tensor([0.7643, 0.0253, 0.0732, 0.1372]) -Greedy action tensor([ 1.3971, -0.6754, -0.2288, 0.4608]) tensor([0.5832, 0.0734, 0.1147, 0.2287]) -Greedy action tensor([ 2.1538, -1.2503, -0.1684, 0.6434]) tensor([0.7396, 0.0246, 0.0725, 0.1633]) -Greedy action tensor([ 0.7371, -0.4261, -0.1152, -0.3306]) tensor([0.4801, 0.1500, 0.2048, 0.1651]) -Greedy action tensor([ 0.9041, -0.6166, -0.0494, -0.5043]) tensor([0.5410, 0.1182, 0.2085, 0.1323]) -Greedy action tensor([ 0.7620, -0.4302, -0.0067, -0.2698]) tensor([0.4709, 0.1429, 0.2183, 0.1678]) -Greedy action tensor([ 0.4185, -0.1291, 0.0452, -0.2461]) tensor([0.3596, 0.2079, 0.2475, 0.1850]) -Greedy action tensor([ 0.8050, -0.4813, -0.0603, -0.4351]) tensor([0.5034, 0.1391, 0.2119, 0.1457]) -Greedy action tensor([ 0.5185, -0.1442, -0.0948, -0.1429]) tensor([0.3886, 0.2003, 0.2105, 0.2006]) -Greedy action tensor([ 0.8546, -0.4786, -0.2988, -0.4824]) tensor([0.5429, 0.1431, 0.1713, 0.1426]) -Greedy action tensor([ 1.1054, -0.7098, -0.0320, -0.6650]) tensor([0.6047, 0.0984, 0.1939, 0.1030]) -Greedy action tensor([ 0.6983, -0.6178, -0.0928, -0.3040]) tensor([0.4788, 0.1284, 0.2171, 0.1757]) -Greedy action tensor([ 0.3518, 0.2385, -0.0328, 0.0566]) tensor([0.3014, 0.2691, 0.2052, 0.2244]) -Greedy action tensor([ 0.2566, 0.0707, 0.0316, -0.1571]) tensor([0.3040, 0.2524, 0.2427, 0.2010]) -Greedy action tensor([ 0.2145, 0.0690, -0.1324, -0.0834]) tensor([0.3018, 0.2609, 0.2133, 0.2240]) -Greedy action tensor([ 0.7547, 0.0027, 0.0121, -0.3325]) tensor([0.4377, 0.2064, 0.2083, 0.1476]) -Greedy action tensor([ 0.8818, -0.6301, 0.0519, -0.2224]) tensor([0.5030, 0.1109, 0.2194, 0.1667]) -Greedy action tensor([ 0.9054, -0.8685, 0.1756, -0.8726]) tensor([0.5493, 0.0932, 0.2647, 0.0928]) -Greedy action tensor([ 1.1409, -0.7633, -0.1233, -0.4130]) tensor([0.6087, 0.0907, 0.1719, 0.1287]) -Greedy action tensor([ 0.8649, -0.5952, -0.0999, -0.4267]) tensor([0.5296, 0.1230, 0.2018, 0.1456]) -Greedy action tensor([ 0.6133, -0.5841, -0.1879, -0.7400]) tensor([0.4977, 0.1503, 0.2234, 0.1286]) -Greedy action tensor([ 1.0211, -0.5241, -0.1689, -0.0925]) tensor([0.5418, 0.1155, 0.1648, 0.1779]) -Greedy action tensor([ 0.3593, 0.0012, -0.1155, 0.0161]) tensor([0.3300, 0.2306, 0.2053, 0.2341]) -Greedy action tensor([ 0.3003, -0.1280, -0.2541, -0.5607]) tensor([0.3775, 0.2460, 0.2169, 0.1596]) -Greedy action tensor([ 1.1916, -0.7467, -0.1590, -0.3724]) tensor([0.6202, 0.0893, 0.1607, 0.1298]) -Greedy action tensor([ 0.6343, -0.4884, -0.1040, -0.3058]) tensor([0.4558, 0.1483, 0.2178, 0.1780]) -Greedy action tensor([ 0.7713, -0.4540, -0.1681, -0.2746]) tensor([0.4912, 0.1442, 0.1920, 0.1726]) -Greedy action tensor([ 0.7959, -0.6822, 0.1713, -0.3105]) tensor([0.4775, 0.1089, 0.2557, 0.1579]) -Greedy action tensor([ 0.6051, -0.0111, -0.0920, 0.0222]) tensor([0.3852, 0.2080, 0.1918, 0.2150]) -Greedy action tensor([ 0.4698, -0.2969, -0.1478, -0.3197]) tensor([0.4069, 0.1890, 0.2194, 0.1847]) -Greedy action tensor([ 0.7906, -0.2095, -0.2050, -0.1898]) tensor([0.4734, 0.1741, 0.1749, 0.1776]) -Greedy action tensor([ 0.7876, -0.4008, 0.0274, -0.3851]) tensor([0.4803, 0.1464, 0.2246, 0.1487]) -Greedy action tensor([ 0.5347, -0.1788, -0.0113, -0.1161]) tensor([0.3860, 0.1891, 0.2236, 0.2013]) -Greedy action tensor([ 0.7216, -0.7037, -0.0764, -0.6419]) tensor([0.5138, 0.1235, 0.2313, 0.1314]) -Greedy action tensor([ 0.6924, -0.5207, -0.0412, -0.3297]) tensor([0.4679, 0.1391, 0.2247, 0.1684]) -Greedy action tensor([ 0.9543, -0.3972, -0.0319, -0.3863]) tensor([0.5281, 0.1367, 0.1970, 0.1382]) -Greedy action tensor([ 0.6192, -0.0401, -0.0064, -0.2629]) tensor([0.4055, 0.2097, 0.2169, 0.1678]) -Greedy action tensor([ 0.6192, 0.1892, -0.1492, -0.0690]) tensor([0.3822, 0.2486, 0.1772, 0.1920]) -Greedy action tensor([ 0.8705, -0.5023, -0.1488, -0.2303]) tensor([0.5136, 0.1302, 0.1853, 0.1708]) -Greedy action tensor([ 1.0061, -0.6820, 0.0210, -0.5358]) tensor([0.5642, 0.1043, 0.2107, 0.1207]) -Greedy action tensor([ 0.5832, 0.0309, -0.0212, 0.0944]) tensor([0.3656, 0.2104, 0.1997, 0.2242]) -Greedy action tensor([ 0.7860, -0.3021, -0.1562, -0.1570]) tensor([0.4726, 0.1592, 0.1842, 0.1841]) -Greedy action tensor([ 1.0398, -0.8451, 0.1235, -0.6652]) tensor([0.5768, 0.0876, 0.2307, 0.1049]) -Greedy action tensor([ 0.3833, 0.0089, -0.0550, -0.1280]) tensor([0.3410, 0.2345, 0.2200, 0.2045]) -Greedy action tensor([ 0.8109, -0.3261, -0.1300, -0.1911]) tensor([0.4812, 0.1544, 0.1878, 0.1767]) -Greedy action tensor([ 0.4664, -0.1481, 0.0701, -0.0995]) tensor([0.3595, 0.1945, 0.2419, 0.2042]) -Greedy action tensor([ 0.8056, -0.5258, -0.1129, -0.2135]) tensor([0.4940, 0.1305, 0.1972, 0.1783]) -Greedy action tensor([ 0.7463, -0.2433, -0.2053, -0.1629]) tensor([0.4628, 0.1720, 0.1787, 0.1864]) -Greedy action tensor([ 0.9328, -0.3697, -0.1462, -0.5149]) tensor([0.5415, 0.1472, 0.1841, 0.1273]) -Greedy action tensor([ 0.8181, -0.2979, 0.0459, -0.4032]) tensor([0.4798, 0.1572, 0.2216, 0.1414]) -Greedy action tensor([ 1.0369, -0.8320, 0.0412, -0.6768]) tensor([0.5869, 0.0905, 0.2168, 0.1057]) -Greedy action tensor([ 0.8021, -0.4585, 0.0333, -0.1363]) tensor([0.4677, 0.1326, 0.2168, 0.1830]) -Greedy action tensor([ 0.8182, -0.9235, -0.0896, -0.3680]) tensor([0.5308, 0.0930, 0.2141, 0.1621]) -Greedy action tensor([ 0.6123, -0.5662, 0.0093, -0.1965]) tensor([0.4347, 0.1338, 0.2379, 0.1936]) -Greedy action tensor([ 0.7435, -0.4769, 0.0437, -0.2627]) tensor([0.4635, 0.1368, 0.2302, 0.1695]) -Greedy action tensor([ 0.5348, -0.3380, -0.0475, -0.2898]) tensor([0.4141, 0.1730, 0.2313, 0.1815]) -Greedy action tensor([ 0.7887, -0.4147, -0.0271, -0.3786]) tensor([0.4869, 0.1462, 0.2154, 0.1515]) -Greedy action tensor([ 0.5108, -0.3338, -0.1496, 0.0134]) tensor([0.3915, 0.1682, 0.2023, 0.2380]) -Greedy action tensor([ 0.6077, -0.2069, -0.1412, -0.0508]) tensor([0.4110, 0.1820, 0.1943, 0.2127]) -Greedy action tensor([ 0.0998, 0.1285, -0.2049, -0.1778]) tensor([0.2838, 0.2920, 0.2092, 0.2150]) -Greedy action tensor([ 0.7546, -0.0652, -0.0310, 0.0261]) tensor([0.4203, 0.1852, 0.1916, 0.2029]) -Greedy action tensor([ 0.7491, -0.6752, -0.1696, -0.1353]) tensor([0.4872, 0.1172, 0.1944, 0.2012]) -Greedy action tensor([ 1.0945, -0.7583, 0.0456, -0.5055]) tensor([0.5851, 0.0917, 0.2050, 0.1181]) -Greedy action tensor([ 0.9380, -0.4876, -0.1114, -0.3379]) tensor([0.5348, 0.1286, 0.1873, 0.1493]) -Greedy action tensor([ 0.6986, -0.4841, -0.1099, -0.1656]) tensor([0.4601, 0.1410, 0.2050, 0.1939]) -Greedy action tensor([ 0.4531, -0.1755, 0.0650, -0.1328]) tensor([0.3612, 0.1927, 0.2450, 0.2011]) -Greedy action tensor([ 0.8339, -0.4081, -0.0844, -0.2976]) tensor([0.4974, 0.1436, 0.1986, 0.1604]) -Greedy action tensor([ 0.6115, -0.1587, -0.0506, -0.4727]) tensor([0.4316, 0.1998, 0.2226, 0.1460]) -Greedy action tensor([ 0.9796, -0.5863, -0.1442, -0.2535]) tensor([0.5478, 0.1144, 0.1781, 0.1596]) -Greedy action tensor([ 0.6727, -0.2230, -0.2145, -0.3719]) tensor([0.4604, 0.1880, 0.1896, 0.1620]) -Greedy action tensor([ 0.8827, -0.5881, 0.1068, -0.3273]) tensor([0.5030, 0.1156, 0.2315, 0.1500]) -Greedy action tensor([ 0.6349, -0.0599, -0.0031, 0.0024]) tensor([0.3908, 0.1951, 0.2065, 0.2076]) -Greedy action tensor([ 0.6827, -0.2561, -0.1769, -0.1908]) tensor([0.4480, 0.1752, 0.1897, 0.1871]) -Greedy action tensor([ 0.6198, -0.4192, -0.0711, -0.2465]) tensor([0.4395, 0.1555, 0.2202, 0.1848]) -Greedy action tensor([ 1.0962, -0.7409, -0.1073, -0.4332]) tensor([0.5966, 0.0950, 0.1791, 0.1293]) -Greedy action tensor([ 0.6157, -0.5316, -0.1310, -0.1502]) tensor([0.4432, 0.1407, 0.2100, 0.2060]) -Greedy action tensor([ 0.8956, -0.7858, 0.1084, -0.3723]) tensor([0.5201, 0.0968, 0.2367, 0.1464]) -Greedy action tensor([ 0.7648, -0.6070, 0.0756, -0.3033]) tensor([0.4764, 0.1208, 0.2391, 0.1637]) -Greedy action tensor([ 0.7644, -0.5555, 0.0484, -0.6500]) tensor([0.5003, 0.1337, 0.2445, 0.1216]) -Greedy action tensor([ 0.4225, -0.1726, -0.0195, -0.2405]) tensor([0.3691, 0.2035, 0.2372, 0.1902]) -Greedy action tensor([ 0.5154, -0.5104, -0.1732, -0.2181]) tensor([0.4272, 0.1531, 0.2146, 0.2051]) -Greedy action tensor([ 1.1181, -0.6823, 0.0260, -0.7057]) tensor([0.6016, 0.0994, 0.2019, 0.0971]) -Greedy action tensor([ 1.0219, -0.4663, -0.1252, -0.3926]) tensor([0.5598, 0.1264, 0.1778, 0.1360]) -Greedy action tensor([ 1.0068, -0.5912, 0.0315, -0.4971]) tensor([0.5550, 0.1123, 0.2093, 0.1234]) -Greedy action tensor([-1.9202, -0.4378, 0.6669, -0.1522]) tensor([0.0407, 0.1793, 0.5413, 0.2386]) -Greedy action tensor([-1.8928, -0.4253, 0.6367, -0.1359]) tensor([0.0422, 0.1832, 0.5299, 0.2447]) -Greedy action tensor([-0.7212, 0.6099, 0.0481, 0.0607]) tensor([0.1095, 0.4146, 0.2364, 0.2394]) -Greedy action tensor([-1.8991, -0.3836, 0.6348, -0.1533]) tensor([0.0419, 0.1906, 0.5276, 0.2399]) -Greedy action tensor([-1.2872, -0.4556, 0.3241, 0.1920]) tensor([0.0788, 0.1809, 0.3946, 0.3457]) -Greedy action tensor([-1.9044, -0.3740, 0.6391, -0.1607]) tensor([0.0416, 0.1920, 0.5288, 0.2377]) -Greedy action tensor([-1.9189, -0.4150, 0.6537, -0.1652]) tensor([0.0410, 0.1846, 0.5374, 0.2370]) -Greedy action tensor([-0.8122, 0.7584, 0.2165, 0.4308]) tensor([0.0828, 0.3984, 0.2317, 0.2871]) -Greedy action tensor([-1.9382, -0.4435, 0.6640, -0.1766]) tensor([0.0404, 0.1800, 0.5447, 0.2350]) -Greedy action tensor([-1.9193, -0.4387, 0.6518, -0.1688]) tensor([0.0413, 0.1814, 0.5398, 0.2376]) -Greedy action tensor([-1.8630, -0.3819, 0.5962, -0.1733]) tensor([0.0444, 0.1954, 0.5195, 0.2407]) -Greedy action tensor([-1.9067, -0.4349, 0.6469, -0.1631]) tensor([0.0418, 0.1821, 0.5372, 0.2390]) -Greedy action tensor([-1.6792, 0.0418, 0.5116, -0.5277]) tensor([0.0535, 0.2990, 0.4783, 0.1692]) -Greedy action tensor([-1.8886, -0.3473, 0.6290, -0.1510]) tensor([0.0421, 0.1966, 0.5220, 0.2393]) -Greedy action tensor([-1.5626, -0.1831, 0.5357, 0.0982]) tensor([0.0544, 0.2160, 0.4433, 0.2862]) -Greedy action tensor([-1.8269, -0.3911, 0.5998, -0.1197]) tensor([0.0454, 0.1907, 0.5137, 0.2502]) -Greedy action tensor([-1.6475, -0.2194, 0.5684, 0.0355]) tensor([0.0507, 0.2115, 0.4649, 0.2729]) -Greedy action tensor([-1.5829, -0.5354, 0.4954, 0.0691]) tensor([0.0586, 0.1671, 0.4684, 0.3058]) -Greedy action tensor([-1.7874, -0.2622, 0.5619, -0.1125]) tensor([0.0467, 0.2146, 0.4893, 0.2493]) -Greedy action tensor([-1.9441, -0.4531, 0.6739, -0.1752]) tensor([0.0400, 0.1776, 0.5480, 0.2344]) -Greedy action tensor([-1.7610, -0.2815, 0.6272, -0.0310]) tensor([0.0456, 0.2003, 0.4969, 0.2573]) -Greedy action tensor([-1.9006, -0.4562, 0.6501, -0.1605]) tensor([0.0421, 0.1785, 0.5395, 0.2399]) -Greedy action tensor([-1.8979, -0.4517, 0.6492, -0.1558]) tensor([0.0421, 0.1790, 0.5382, 0.2406]) -Greedy action tensor([-1.3614, -0.6280, 0.3620, 0.1755]) tensor([0.0750, 0.1561, 0.4202, 0.3487]) -Greedy action tensor([-1.6028, -0.3497, 0.5453, -0.2051]) tensor([0.0584, 0.2046, 0.5006, 0.2364]) -Greedy action tensor([-1.8857, -0.4548, 0.6453, -0.1478]) tensor([0.0427, 0.1785, 0.5362, 0.2426]) -Greedy action tensor([-1.7549, -0.2041, 0.5615, -0.0676]) tensor([0.0470, 0.2218, 0.4769, 0.2542]) -Greedy action tensor([-1.6430, -0.5098, 0.5178, 0.0216]) tensor([0.0553, 0.1719, 0.4803, 0.2924]) -Greedy action tensor([-1.7418, -0.3508, 0.7508, 0.1770]) tensor([0.0418, 0.1680, 0.5054, 0.2848]) -Greedy action tensor([-1.9389, -0.4413, 0.6658, -0.1760]) tensor([0.0403, 0.1801, 0.5448, 0.2348]) -Greedy action tensor([-1.7244, -0.2393, 0.6158, -0.0624]) tensor([0.0475, 0.2096, 0.4928, 0.2501]) -Greedy action tensor([-1.9125, -0.3980, 0.6502, -0.1553]) tensor([0.0411, 0.1870, 0.5335, 0.2384]) -Greedy action tensor([-1.8014, -0.4799, 0.6470, 0.0144]) tensor([0.0445, 0.1669, 0.5150, 0.2736]) -Greedy action tensor([-0.0273, 1.0588, -0.0648, 0.3337]) tensor([0.1572, 0.4658, 0.1514, 0.2256]) -Greedy action tensor([-1.7645, -0.4590, 0.5924, -0.0695]) tensor([0.0483, 0.1783, 0.5102, 0.2632]) -Greedy action tensor([-1.9215, -0.3657, 0.6437, -0.1612]) tensor([0.0407, 0.1930, 0.5295, 0.2368]) -Greedy action tensor([-1.9219, -0.4544, 0.6602, -0.1703]) tensor([0.0411, 0.1783, 0.5436, 0.2369]) -Greedy action tensor([-1.7915, -0.2984, 0.6091, -0.0510]) tensor([0.0451, 0.2007, 0.4973, 0.2570]) -Greedy action tensor([-0.9686, 0.8936, 0.1205, 0.2319]) tensor([0.0728, 0.4689, 0.2164, 0.2419]) -Greedy action tensor([-1.8694, -0.2547, 0.6051, -0.1246]) tensor([0.0423, 0.2127, 0.5026, 0.2423]) -Greedy action tensor([-1.8700, -0.4383, 0.6368, -0.1384]) tensor([0.0433, 0.1812, 0.5310, 0.2446]) -Greedy action tensor([-1.4513, 0.1384, 0.4234, -0.1396]) tensor([0.0620, 0.3039, 0.4041, 0.2301]) -Greedy action tensor([-1.0103, -0.0231, 0.0445, 0.3638]) tensor([0.0952, 0.2554, 0.2733, 0.3761]) -Greedy action tensor([-1.8931, -0.4150, 0.6391, -0.1460]) tensor([0.0422, 0.1850, 0.5308, 0.2421]) -Greedy action tensor([-1.7648, -0.3641, 0.5835, -0.0628]) tensor([0.0476, 0.1931, 0.4982, 0.2611]) -Greedy action tensor([-1.8165, -0.4411, 0.6270, -0.0730]) tensor([0.0451, 0.1783, 0.5189, 0.2577]) -Greedy action tensor([-1.9286, -0.4092, 0.6500, -0.1687]) tensor([0.0407, 0.1861, 0.5366, 0.2366]) -Greedy action tensor([-1.8709, -0.4509, 0.6342, -0.1404]) tensor([0.0434, 0.1797, 0.5318, 0.2451]) -Greedy action tensor([-1.6714, -0.3131, 0.5581, -0.1761]) tensor([0.0536, 0.2086, 0.4985, 0.2392]) -Greedy action tensor([-1.2895, -0.5453, 0.4144, -0.1058]) tensor([0.0843, 0.1774, 0.4631, 0.2753]) -Greedy action tensor([-1.8969, -0.3944, 0.6344, -0.1499]) tensor([0.0420, 0.1888, 0.5281, 0.2411]) -Greedy action tensor([-1.9352, -0.4437, 0.6645, -0.1736]) tensor([0.0404, 0.1797, 0.5444, 0.2355]) -Greedy action tensor([-1.8746, -0.2783, 0.6221, -0.1397]) tensor([0.0421, 0.2078, 0.5114, 0.2387]) -Greedy action tensor([-1.7905, -0.4881, 0.5865, -0.1083]) tensor([0.0480, 0.1766, 0.5172, 0.2582]) -Greedy action tensor([-0.7306, 0.1225, 0.1785, -0.0929]) tensor([0.1295, 0.3040, 0.3215, 0.2451]) -Greedy action tensor([-1.5253, 0.0391, 0.4090, -0.0515]) tensor([0.0586, 0.2801, 0.4055, 0.2558]) -Greedy action tensor([-1.7730, -0.2531, 0.6253, -0.1063]) tensor([0.0457, 0.2090, 0.5031, 0.2421]) -Greedy action tensor([-1.5693, -0.1528, 0.4247, 0.0273]) tensor([0.0575, 0.2369, 0.4220, 0.2836]) -Greedy action tensor([-1.4420, -0.5862, 0.4064, 0.0644]) tensor([0.0704, 0.1656, 0.4467, 0.3173]) -Greedy action tensor([-1.4552, -0.6189, 0.5918, 0.3456]) tensor([0.0585, 0.1349, 0.4527, 0.3539]) -Greedy action tensor([-1.8592, -0.4642, 0.6326, -0.1339]) tensor([0.0440, 0.1775, 0.5315, 0.2470]) -Greedy action tensor([-1.8417, -0.4988, 0.6113, -0.1441]) tensor([0.0456, 0.1748, 0.5304, 0.2492]) -Greedy action tensor([-1.8638, -0.4186, 0.6231, -0.1328]) tensor([0.0436, 0.1852, 0.5248, 0.2464]) -Greedy action tensor([-1.5891, -0.4721, 0.4751, 0.0034]) tensor([0.0593, 0.1813, 0.4676, 0.2917]) -Greedy action tensor([-1.9105, -0.4069, 0.6409, -0.1577]) tensor([0.0415, 0.1867, 0.5323, 0.2395]) -Greedy action tensor([-1.9280, -0.4493, 0.6622, -0.1704]) tensor([0.0408, 0.1789, 0.5438, 0.2365]) -Greedy action tensor([-1.7722, 0.0620, 0.5126, -0.0451]) tensor([0.0440, 0.2757, 0.4326, 0.2477]) -Greedy action tensor([-1.9320, -0.4150, 0.6580, -0.1677]) tensor([0.0404, 0.1844, 0.5391, 0.2361]) -Greedy action tensor([-1.8361, -0.3676, 0.6076, -0.1264]) tensor([0.0447, 0.1940, 0.5144, 0.2469]) -Greedy action tensor([-1.5662, -0.1781, 0.4696, -0.0956]) tensor([0.0588, 0.2355, 0.4500, 0.2557]) -Greedy action tensor([-1.9237, -0.4135, 0.6547, -0.1642]) tensor([0.0408, 0.1847, 0.5375, 0.2370]) -Greedy action tensor([-1.8068, -0.0979, 0.5304, -0.0794]) tensor([0.0444, 0.2455, 0.4601, 0.2500]) -Greedy action tensor([-0.3178, 0.4989, -0.0754, 0.6655]) tensor([0.1387, 0.3138, 0.1767, 0.3707]) -Greedy action tensor([-1.9356, -0.4414, 0.6626, -0.1749]) tensor([0.0405, 0.1803, 0.5438, 0.2354]) -Greedy action tensor([-1.8858, -0.2514, 0.6095, -0.1343]) tensor([0.0416, 0.2135, 0.5049, 0.2400]) -Greedy action tensor([-1.8491, -0.2815, 0.6101, -0.0791]) tensor([0.0428, 0.2053, 0.5006, 0.2513]) -Greedy action tensor([-1.9401, -0.3815, 0.6531, -0.1796]) tensor([0.0401, 0.1905, 0.5362, 0.2332]) -Greedy action tensor([-1.8483, -0.3532, 0.5942, -0.1223]) tensor([0.0443, 0.1975, 0.5094, 0.2488]) -Greedy action tensor([-1.8166, -0.4233, 0.5993, -0.1219]) tensor([0.0461, 0.1859, 0.5168, 0.2512]) -Greedy action tensor([-1.8668, -0.4454, 0.6270, -0.1405]) tensor([0.0437, 0.1812, 0.5294, 0.2457]) -Greedy action tensor([-1.9334, -0.4491, 0.6593, -0.1771]) tensor([0.0407, 0.1796, 0.5440, 0.2357]) -Greedy action tensor([-0.3247, 0.0039, 0.3071, -0.8445]) tensor([0.2056, 0.2855, 0.3867, 0.1222]) -Greedy action tensor([ 0.0072, 0.1508, -0.1179, -0.8769]) tensor([0.2899, 0.3346, 0.2558, 0.1197]) -Greedy action tensor([ 0.4917, -1.1094, 2.3433, -0.3131]) tensor([0.1247, 0.0252, 0.7944, 0.0558]) -Greedy action tensor([ 1.1370, -0.5379, 0.3057, 0.8452]) tensor([0.4220, 0.0791, 0.1838, 0.3152]) -Greedy action tensor([ 1.2329, -0.8059, 0.2868, 0.4205]) tensor([0.5096, 0.0663, 0.1979, 0.2262]) -Greedy action tensor([ 0.9066, 0.4349, -0.3548, -0.6939]) tensor([0.4742, 0.2958, 0.1343, 0.0957]) -Greedy action tensor([ 0.2356, -0.5608, -0.2996, 0.5291]) tensor([0.2961, 0.1335, 0.1734, 0.3971]) -Greedy action tensor([-0.2580, 0.0677, 0.0715, -0.2135]) tensor([0.2074, 0.2873, 0.2884, 0.2169]) -Greedy action tensor([ 0.3272, -0.6221, 0.2620, -0.1652]) tensor([0.3407, 0.1319, 0.3192, 0.2082]) -Greedy action tensor([-0.2276, 0.2836, -0.9094, -1.0365]) tensor([0.2764, 0.4608, 0.1398, 0.1231]) -Greedy action tensor([-0.5164, -0.4926, 0.5279, -1.1336]) tensor([0.1850, 0.1895, 0.5257, 0.0998]) -Greedy action tensor([-0.1178, -0.4183, 1.1934, -0.2190]) tensor([0.1574, 0.1165, 0.5839, 0.1422]) -Greedy action tensor([-0.9745, 0.4133, 0.0563, -0.5300]) tensor([0.1067, 0.4276, 0.2992, 0.1665]) -Greedy action tensor([-0.9783, 0.4674, -0.3093, -0.7957]) tensor([0.1191, 0.5055, 0.2325, 0.1429]) -Greedy action tensor([ 1.2509, -0.1365, 0.0885, 0.0559]) tensor([0.5362, 0.1339, 0.1677, 0.1623]) -Greedy action tensor([-0.2876, -0.9147, -1.6840, 0.4022]) tensor([0.2649, 0.1415, 0.0656, 0.5280]) -Greedy action tensor([-0.8923, -0.2070, 0.5025, -1.5116]) tensor([0.1323, 0.2626, 0.5338, 0.0712]) -Greedy action tensor([-0.0211, -0.6679, 0.0225, -0.6260]) tensor([0.3211, 0.1682, 0.3354, 0.1754]) -Greedy action tensor([-0.0030, -0.3071, -0.5738, -0.6355]) tensor([0.3528, 0.2603, 0.1994, 0.1875]) -Greedy action tensor([-0.5204, -0.6798, -0.2562, -0.0642]) tensor([0.2113, 0.1801, 0.2752, 0.3334]) -Greedy action tensor([ 0.9607, -0.4428, -0.2638, 0.6440]) tensor([0.4409, 0.1083, 0.1296, 0.3212]) -Greedy action tensor([ 0.1933, -0.4220, -0.7582, 0.3266]) tensor([0.3258, 0.1761, 0.1258, 0.3723]) -Greedy action tensor([ 0.8374, -0.5630, 0.8125, -0.2358]) tensor([0.3900, 0.0961, 0.3805, 0.1334]) -Greedy action tensor([ 0.1607, -0.7654, 0.9641, 1.0679]) tensor([0.1638, 0.0649, 0.3657, 0.4057]) -Greedy action tensor([ 1.4063, -0.5928, 0.8876, -0.0680]) tensor([0.5103, 0.0691, 0.3038, 0.1168]) -Greedy action tensor([-1.0107, -1.3232, -0.0662, -0.6150]) tensor([0.1728, 0.1264, 0.4442, 0.2566]) -Greedy action tensor([-1.4388, -1.5404, -0.0980, -0.9511]) tensor([0.1360, 0.1228, 0.5197, 0.2215]) -Greedy action tensor([ 0.5682, -0.3410, -0.4611, -0.6156]) tensor([0.4840, 0.1950, 0.1729, 0.1482]) -Greedy action tensor([-0.6158, -1.2269, 0.3954, 0.1370]) tensor([0.1559, 0.0846, 0.4285, 0.3309]) -Greedy action tensor([-0.6820, -1.6038, -0.2064, 1.2430]) tensor([0.1014, 0.0403, 0.1632, 0.6951]) -Greedy action tensor([-0.4107, -0.0156, 0.1197, -0.5593]) tensor([0.1982, 0.2942, 0.3368, 0.1708]) -Greedy action tensor([ 0.5908, -1.1443, 0.5232, 0.7891]) tensor([0.3003, 0.0530, 0.2806, 0.3661]) -Greedy action tensor([-1.4792, 0.1860, 0.9021, -0.9748]) tensor([0.0533, 0.2818, 0.5767, 0.0883]) -Greedy action tensor([ 1.0775, 0.0130, -0.6702, 0.1980]) tensor([0.5171, 0.1783, 0.0901, 0.2146]) -Greedy action tensor([ 1.0751, -0.8828, 0.3323, 0.1691]) tensor([0.4948, 0.0698, 0.2354, 0.2000]) -Greedy action tensor([ 1.4937, -0.0567, 0.9407, 1.5286]) tensor([0.3542, 0.0752, 0.2038, 0.3668]) -Greedy action tensor([-0.4547, -0.8479, 0.2310, -0.8964]) tensor([0.2324, 0.1568, 0.4613, 0.1494]) -Greedy action tensor([-0.9389, 0.0793, 1.1029, -0.8855]) tensor([0.0798, 0.2210, 0.6150, 0.0842]) -Greedy action tensor([-0.6608, -0.3815, 0.6481, -1.4862]) tensor([0.1547, 0.2046, 0.5729, 0.0678]) -Greedy action tensor([1.3983, 0.6448, 0.6048, 1.6740]) tensor([0.3086, 0.1453, 0.1396, 0.4066]) -Greedy action tensor([-1.0883, -0.7113, 0.0265, -0.3772]) tensor([0.1326, 0.1933, 0.4042, 0.2700]) -Greedy action tensor([ 0.5815, -1.0428, -0.3591, 0.3573]) tensor([0.4190, 0.0826, 0.1636, 0.3349]) -Greedy action tensor([0.5864, 0.0303, 0.6282, 0.5407]) tensor([0.2800, 0.1606, 0.2919, 0.2675]) -Greedy action tensor([ 0.7551, -0.4946, 0.1979, 0.9941]) tensor([0.3196, 0.0916, 0.1830, 0.4058]) -Greedy action tensor([-0.6869, 0.3406, 0.5471, -0.1821]) tensor([0.1125, 0.3144, 0.3866, 0.1864]) -Greedy action tensor([ 1.3969, -0.0078, 0.7940, 0.0143]) tensor([0.4893, 0.1201, 0.2678, 0.1228]) -Greedy action tensor([-1.4815, 0.5852, -0.6604, -0.4199]) tensor([0.0711, 0.5617, 0.1616, 0.2056]) -Greedy action tensor([-0.6231, -1.3797, 0.4909, -1.0259]) tensor([0.1929, 0.0905, 0.5877, 0.1289]) -Greedy action tensor([-0.5732, -0.7000, 0.2554, 0.8058]) tensor([0.1228, 0.1082, 0.2813, 0.4877]) -Greedy action tensor([1.1907, 0.5860, 0.7766, 0.5527]) tensor([0.3656, 0.1997, 0.2416, 0.1932]) -Greedy action tensor([-0.4095, -0.2715, -0.4821, -0.0501]) tensor([0.2217, 0.2545, 0.2062, 0.3176]) -Greedy action tensor([ 0.8047, -0.4344, -0.0314, 1.1802]) tensor([0.3146, 0.0911, 0.1364, 0.4579]) -Greedy action tensor([-0.8798, -0.3692, -0.0777, -0.6123]) tensor([0.1612, 0.2686, 0.3595, 0.2107]) -Greedy action tensor([-0.2348, -0.3697, 1.3813, -0.0426]) tensor([0.1232, 0.1076, 0.6199, 0.1493]) -Greedy action tensor([ 0.2320, 0.3626, -0.1190, -0.5088]) tensor([0.3012, 0.3432, 0.2120, 0.1436]) -Greedy action tensor([ 1.6405, -0.8959, 0.9591, 0.3074]) tensor([0.5409, 0.0428, 0.2736, 0.1426]) -Greedy action tensor([-0.1641, -0.8305, 0.1638, 0.0492]) tensor([0.2416, 0.1241, 0.3353, 0.2990]) -Greedy action tensor([-0.3097, -1.4128, -0.5627, -0.0076]) tensor([0.2889, 0.0959, 0.2243, 0.3908]) -Greedy action tensor([ 0.1174, -0.1952, -0.1802, 1.0239]) tensor([0.2020, 0.1478, 0.1500, 0.5002]) -Greedy action tensor([-0.0349, 0.8649, -0.6509, -0.6381]) tensor([0.2199, 0.5409, 0.1188, 0.1203]) -Greedy action tensor([-0.2300, 0.0794, -0.6569, -0.3843]) tensor([0.2583, 0.3519, 0.1685, 0.2213]) -Greedy action tensor([ 1.0819, -0.7439, -0.0159, 0.3715]) tensor([0.5035, 0.0811, 0.1680, 0.2474]) -Greedy action tensor([ 0.6504, -0.3308, 0.9130, -0.1727]) tensor([0.3211, 0.1204, 0.4175, 0.1410]) -Greedy action tensor([ 1.1989, -0.8842, -0.2152, 1.0140]) tensor([0.4548, 0.0566, 0.1106, 0.3780]) -Greedy action tensor([ 0.0332, -0.3541, 0.2597, 0.8193]) tensor([0.1950, 0.1324, 0.2446, 0.4280]) -Greedy action tensor([ 0.8678, -0.2602, -0.1067, -0.3540]) tensor([0.5011, 0.1622, 0.1891, 0.1477]) -Greedy action tensor([ 0.5805, -0.0534, 0.6140, 1.7666]) tensor([0.1713, 0.0909, 0.1771, 0.5608]) -Greedy action tensor([ 1.2828, -0.4196, 0.7831, 0.5517]) tensor([0.4405, 0.0803, 0.2672, 0.2120]) -Greedy action tensor([ 0.6890, -1.2349, 0.5195, -1.0274]) tensor([0.4609, 0.0673, 0.3890, 0.0828]) -Greedy action tensor([-0.4764, 0.5238, -0.9086, -0.4352]) tensor([0.1848, 0.5026, 0.1200, 0.1926]) -Greedy action tensor([-0.2245, -1.8813, -0.4454, 1.0573]) tensor([0.1787, 0.0341, 0.1433, 0.6439]) -Greedy action tensor([ 0.3396, -1.0014, -1.0081, 0.3717]) tensor([0.3915, 0.1024, 0.1017, 0.4043]) -Greedy action tensor([-0.2209, 0.2202, 1.0307, -0.4272]) tensor([0.1457, 0.2265, 0.5093, 0.1185]) -Greedy action tensor([ 0.3216, -0.0103, -0.1788, -0.4455]) tensor([0.3587, 0.2574, 0.2174, 0.1665]) -Greedy action tensor([ 0.6594, -1.1266, -0.3340, 0.1876]) tensor([0.4626, 0.0775, 0.1713, 0.2886]) -Greedy action tensor([ 0.4446, -0.1515, 0.7066, 0.5740]) tensor([0.2507, 0.1381, 0.3258, 0.2853]) -Greedy action tensor([0.8991, 0.3165, 0.1997, 0.4656]) tensor([0.3699, 0.2066, 0.1838, 0.2398]) -Greedy action tensor([ 1.0017, 0.5378, 0.9727, -0.3440]) tensor([0.3496, 0.2198, 0.3396, 0.0910]) -Greedy action tensor([ 0.2567, 0.1253, 0.3500, -0.7362]) tensor([0.2989, 0.2621, 0.3282, 0.1108]) -Greedy action tensor([ 0.1354, -0.1863, 0.5755, -0.3237]) tensor([0.2558, 0.1854, 0.3972, 0.1616]) -Greedy action tensor([ 1.7578e+00, -2.4534e-01, -7.3236e-01, 2.8670e-05]) tensor([0.7193, 0.0970, 0.0596, 0.1240]) -Greedy action tensor([ 1.2769, -0.6000, -0.4206, 0.4454]) tensor([0.5645, 0.0864, 0.1034, 0.2458]) -Greedy action tensor([ 0.2226, -0.2833, -0.2346, 0.0507]) tensor([0.3249, 0.1959, 0.2057, 0.2736]) -Greedy action tensor([ 1.5457, -0.4814, -0.4267, 0.2820]) tensor([0.6437, 0.0848, 0.0896, 0.1819]) -Greedy action tensor([ 1.7070, -0.4372, -0.4623, 0.6162]) tensor([0.6380, 0.0747, 0.0729, 0.2143]) -Greedy action tensor([ 0.8612, -0.4441, 0.0108, -0.1308]) tensor([0.4833, 0.1310, 0.2065, 0.1792]) -Greedy action tensor([ 2.1059, -0.5339, -0.2722, 0.3793]) tensor([0.7451, 0.0532, 0.0691, 0.1326]) -Greedy action tensor([ 1.4289, 0.0337, -0.8428, 0.2855]) tensor([0.5989, 0.1484, 0.0618, 0.1909]) -Greedy action tensor([ 1.9846, 0.4477, -0.1796, 0.3085]) tensor([0.6592, 0.1418, 0.0757, 0.1233]) -Greedy action tensor([ 1.3093, 0.1062, -0.4830, 0.2921]) tensor([0.5469, 0.1642, 0.0911, 0.1978]) -Greedy action tensor([ 1.2452, -0.5143, -0.2383, 0.0644]) tensor([0.5862, 0.1009, 0.1330, 0.1800]) -Greedy action tensor([ 1.9991, -0.4710, -0.4256, 0.5272]) tensor([0.7130, 0.0603, 0.0631, 0.1636]) -Greedy action tensor([ 1.1611, -0.5778, -0.1778, 0.5844]) tensor([0.5001, 0.0879, 0.1311, 0.2809]) -Greedy action tensor([1.8713, 0.7130, 0.0058, 0.1592]) tensor([0.6063, 0.1904, 0.0939, 0.1094]) -Greedy action tensor([ 1.2614, 0.0103, -0.5018, 0.1941]) tensor([0.5551, 0.1588, 0.0952, 0.1909]) -Greedy action tensor([ 3.2294, -1.3484, -0.3310, 0.5788]) tensor([0.9015, 0.0093, 0.0256, 0.0637]) -Greedy action tensor([ 1.9931, -0.8772, -0.4511, 0.5659]) tensor([0.7228, 0.0410, 0.0627, 0.1735]) -Greedy action tensor([ 1.2250, -0.3848, -0.2239, 0.1302]) tensor([0.5652, 0.1130, 0.1327, 0.1891]) -Greedy action tensor([ 1.2206, -0.5096, -0.5222, 0.4730]) tensor([0.5477, 0.0971, 0.0959, 0.2593]) -Greedy action tensor([ 1.1913, -0.4544, -0.2509, 0.2369]) tensor([0.5512, 0.1063, 0.1303, 0.2122]) -Greedy action tensor([ 2.2399, -0.7720, -0.6353, 0.1939]) tensor([0.8098, 0.0398, 0.0457, 0.1047]) -Greedy action tensor([ 1.4754, -1.0669, -0.3230, 0.3093]) tensor([0.6427, 0.0506, 0.1064, 0.2003]) -Greedy action tensor([ 1.7251, -0.2076, -0.3903, 0.5096]) tensor([0.6402, 0.0927, 0.0772, 0.1899]) -Greedy action tensor([ 1.5410, -0.7217, -0.5220, 0.4864]) tensor([0.6331, 0.0659, 0.0805, 0.2205]) -Greedy action tensor([ 1.7583, -0.5552, -0.2414, 0.3823]) tensor([0.6725, 0.0665, 0.0910, 0.1699]) -Greedy action tensor([ 1.3833, -0.2396, -0.6560, 0.2692]) tensor([0.6040, 0.1192, 0.0786, 0.1982]) -Greedy action tensor([ 1.1904, -0.5110, 0.0309, 0.5782]) tensor([0.4906, 0.0895, 0.1539, 0.2660]) -Greedy action tensor([ 1.3947, -0.0690, -0.3866, 0.5784]) tensor([0.5429, 0.1256, 0.0914, 0.2400]) -Greedy action tensor([ 1.4738, -0.5328, -0.4098, 0.0995]) tensor([0.6496, 0.0873, 0.0988, 0.1643]) -Greedy action tensor([ 1.6708, -0.9261, -0.2539, 0.2109]) tensor([0.6884, 0.0513, 0.1004, 0.1599]) -Greedy action tensor([ 1.1546, -0.7435, -0.1792, 0.4561]) tensor([0.5234, 0.0784, 0.1379, 0.2603]) -Greedy action tensor([ 1.0888, -0.6684, -0.8360, 0.4025]) tensor([0.5489, 0.0947, 0.0801, 0.2763]) -Greedy action tensor([ 1.9405, 0.3881, -0.2000, -0.0118]) tensor([0.6797, 0.1439, 0.0799, 0.0965]) -Greedy action tensor([ 1.5066, -0.3353, -0.4964, 0.2504]) tensor([0.6337, 0.1004, 0.0855, 0.1804]) -Greedy action tensor([ 1.4772, -0.2018, -0.3747, 0.3050]) tensor([0.6049, 0.1128, 0.0949, 0.1873]) -Greedy action tensor([ 1.4539, -0.3885, -0.6242, -0.0410]) tensor([0.6632, 0.1051, 0.0830, 0.1487]) -Greedy action tensor([ 1.2286, -0.7176, -0.1748, 0.3240]) tensor([0.5576, 0.0796, 0.1370, 0.2257]) -Greedy action tensor([ 1.7854, -0.8099, -0.3947, 0.3396]) tensor([0.7026, 0.0524, 0.0794, 0.1655]) -Greedy action tensor([ 1.1821, -0.3753, -0.4882, 0.0982]) tensor([0.5756, 0.1213, 0.1083, 0.1947]) -Greedy action tensor([ 1.5127, -0.2221, -0.5633, 0.2252]) tensor([0.6338, 0.1118, 0.0795, 0.1749]) -Greedy action tensor([ 1.3315, -0.0301, -0.5181, 0.3957]) tensor([0.5538, 0.1419, 0.0871, 0.2172]) -Greedy action tensor([ 1.9751, -1.2027, -0.2745, 0.4411]) tensor([0.7338, 0.0306, 0.0774, 0.1583]) -Greedy action tensor([ 1.4427, 0.2882, -0.8094, 0.5797]) tensor([0.5428, 0.1711, 0.0571, 0.2290]) -Greedy action tensor([ 1.5206, -0.4006, -0.2262, 0.4555]) tensor([0.6004, 0.0879, 0.1047, 0.2070]) -Greedy action tensor([ 1.0815, -0.3342, -0.1141, -0.0530]) tensor([0.5357, 0.1300, 0.1620, 0.1723]) -Greedy action tensor([ 1.0720, -0.1079, -0.5863, 0.1863]) tensor([0.5235, 0.1609, 0.0997, 0.2159]) -Greedy action tensor([ 0.8606, -0.2178, -0.2134, 0.1268]) tensor([0.4626, 0.1573, 0.1580, 0.2221]) -Greedy action tensor([ 1.3356, -0.2833, -0.3376, 0.0113]) tensor([0.6054, 0.1199, 0.1136, 0.1610]) -Greedy action tensor([ 1.9358, -0.5660, -0.4820, 0.5777]) tensor([0.7002, 0.0574, 0.0624, 0.1800]) -Greedy action tensor([ 1.7180, -0.2079, -0.2440, -0.5511]) tensor([0.7196, 0.1049, 0.1011, 0.0744]) -Greedy action tensor([ 2.0304, -0.6820, -0.3804, 0.6237]) tensor([0.7137, 0.0474, 0.0641, 0.1748]) -Greedy action tensor([ 1.4234, -0.3799, -0.3757, -0.1554]) tensor([0.6509, 0.1072, 0.1077, 0.1342]) -Greedy action tensor([ 1.7438, -0.8648, -0.5978, 0.4459]) tensor([0.6930, 0.0510, 0.0667, 0.1893]) -Greedy action tensor([ 1.1984, -0.6232, -0.2329, 0.2440]) tensor([0.5600, 0.0906, 0.1338, 0.2156]) -Greedy action tensor([ 3.1464, -2.0253, -0.3262, 0.7733]) tensor([0.8850, 0.0050, 0.0275, 0.0825]) -Greedy action tensor([ 1.0327, -0.1340, -0.6983, 0.1458]) tensor([0.5262, 0.1639, 0.0932, 0.2167]) -Greedy action tensor([ 0.9481, -0.6543, -0.1547, -0.2086]) tensor([0.5412, 0.1090, 0.1796, 0.1702]) -Greedy action tensor([ 1.1776, 0.0228, -1.0200, 0.1941]) tensor([0.5555, 0.1750, 0.0617, 0.2078]) -Greedy action tensor([ 1.0843, -0.0484, -0.7629, 0.0698]) tensor([0.5427, 0.1749, 0.0856, 0.1968]) -Greedy action tensor([ 1.3155, -0.1918, -0.5086, 0.1531]) tensor([0.5897, 0.1306, 0.0952, 0.1844]) -Greedy action tensor([ 1.4794, -0.4024, -0.0782, 0.0517]) tensor([0.6239, 0.0950, 0.1314, 0.1496]) -Greedy action tensor([ 1.5547, -0.4216, -0.1747, 0.5205]) tensor([0.5983, 0.0829, 0.1061, 0.2127]) -Greedy action tensor([ 1.2530, 0.0354, 0.0828, -0.3047]) tensor([0.5504, 0.1629, 0.1708, 0.1159]) -Greedy action tensor([ 1.6137, -0.4502, -0.7619, 0.3925]) tensor([0.6602, 0.0838, 0.0614, 0.1947]) -Greedy action tensor([ 1.2787, -0.6510, -0.4647, 0.2269]) tensor([0.5990, 0.0870, 0.1048, 0.2092]) -Greedy action tensor([ 1.4134, -0.2204, -0.2819, -0.0426]) tensor([0.6204, 0.1211, 0.1139, 0.1447]) -Greedy action tensor([ 0.9723, 0.4459, -1.1753, 0.2157]) tensor([0.4594, 0.2714, 0.0536, 0.2156]) -Greedy action tensor([ 1.5698, -0.3916, -0.6772, 0.7478]) tensor([0.5932, 0.0834, 0.0627, 0.2607]) -Greedy action tensor([ 1.5764, -0.1611, -0.5264, -0.1270]) tensor([0.6756, 0.1189, 0.0825, 0.1230]) -Greedy action tensor([ 1.5917, 0.3477, -0.0144, 0.2178]) tensor([0.5740, 0.1655, 0.1152, 0.1453]) -Greedy action tensor([ 1.3019, 0.3775, -0.4691, 0.4334]) tensor([0.5034, 0.1997, 0.0857, 0.2112]) -Greedy action tensor([ 1.5788, -1.1070, 0.0104, 0.0902]) tensor([0.6657, 0.0454, 0.1387, 0.1502]) -Greedy action tensor([ 1.1702, 0.1371, -0.4352, 0.2851]) tensor([0.5078, 0.1807, 0.1020, 0.2096]) -Greedy action tensor([ 2.1492, 0.3709, -0.0212, 0.3057]) tensor([0.6938, 0.1172, 0.0792, 0.1098]) -Greedy action tensor([ 1.6273, -0.4751, -1.0381, 0.4538]) tensor([0.6662, 0.0814, 0.0463, 0.2060]) -Greedy action tensor([ 1.3578, -0.1335, -0.3203, 0.4167]) tensor([0.5549, 0.1249, 0.1036, 0.2165]) -Greedy action tensor([ 1.5345, 0.1212, -0.8852, 0.5798]) tensor([0.5823, 0.1417, 0.0518, 0.2242]) -Greedy action tensor([ 1.7101, -0.5162, -0.5129, 0.2575]) tensor([0.6896, 0.0744, 0.0747, 0.1613]) -Greedy action tensor([ 1.4842, 0.3718, -0.1750, 0.4057]) tensor([0.5379, 0.1768, 0.1024, 0.1829]) -Greedy action tensor([ 1.3333, 0.1198, -0.5909, 0.8100]) tensor([0.4912, 0.1460, 0.0717, 0.2911]) -Greedy action tensor([ 1.5235, -0.6938, -0.2026, -0.0231]) tensor([0.6667, 0.0726, 0.1187, 0.1420]) -Greedy action tensor([ 1.5040, -0.6582, -0.0999, 0.2975]) tensor([0.6190, 0.0712, 0.1245, 0.1852]) -Greedy action tensor([ 0.6960, -0.3732, -0.0738, -0.1880]) tensor([0.4505, 0.1547, 0.2087, 0.1861]) -Greedy action tensor([ 0.4723, -0.1858, 0.0504, -0.2874]) tensor([0.3786, 0.1960, 0.2483, 0.1771]) -Greedy action tensor([ 0.6005, -0.0507, -0.0863, 0.0939]) tensor([0.3806, 0.1985, 0.1915, 0.2294]) -Greedy action tensor([ 0.4827, 0.0826, -0.0194, -0.0891]) tensor([0.3521, 0.2360, 0.2131, 0.1988]) -Greedy action tensor([ 0.4686, -0.0346, -0.0621, 0.0317]) tensor([0.3523, 0.2130, 0.2072, 0.2276]) -Greedy action tensor([ 0.9603, -0.4216, 0.2666, -0.2516]) tensor([0.4882, 0.1226, 0.2439, 0.1453]) -Greedy action tensor([ 1.0572, -0.6543, 0.1444, -0.4198]) tensor([0.5524, 0.0998, 0.2217, 0.1261]) -Greedy action tensor([ 0.5747, -0.2033, -0.0117, -0.1924]) tensor([0.4032, 0.1852, 0.2243, 0.1872]) -Greedy action tensor([ 0.6913, -0.0064, -0.0010, -0.2886]) tensor([0.4213, 0.2097, 0.2108, 0.1582]) -Greedy action tensor([ 0.4601, -0.2013, 0.0222, -0.2327]) tensor([0.3757, 0.1939, 0.2425, 0.1879]) -Greedy action tensor([ 0.2834, 0.0662, -0.1161, -0.6767]) tensor([0.3499, 0.2816, 0.2346, 0.1339]) -Greedy action tensor([ 0.7874, -0.6362, 0.0791, -0.5975]) tensor([0.5041, 0.1214, 0.2483, 0.1262]) -Greedy action tensor([ 0.5940, -0.3591, 0.0129, -0.1233]) tensor([0.4110, 0.1585, 0.2299, 0.2006]) -Greedy action tensor([ 0.2571, -0.0682, -0.2344, -0.0627]) tensor([0.3268, 0.2360, 0.1999, 0.2373]) -Greedy action tensor([ 0.7181, -0.5325, -0.0597, -0.3753]) tensor([0.4806, 0.1376, 0.2208, 0.1610]) -Greedy action tensor([ 1.0506, -0.4623, 0.0084, -0.4892]) tensor([0.5595, 0.1232, 0.1973, 0.1200]) -Greedy action tensor([ 1.0666e+00, -9.4430e-01, 3.1328e-04, -5.6838e-01]) tensor([0.5977, 0.0800, 0.2058, 0.1165]) -Greedy action tensor([ 0.7298, -0.4694, -0.0943, -0.2464]) tensor([0.4724, 0.1424, 0.2072, 0.1780]) -Greedy action tensor([ 0.7016, -0.7287, 0.0133, -0.3222]) tensor([0.4760, 0.1139, 0.2392, 0.1710]) -Greedy action tensor([ 0.9021, -0.6587, 0.1647, -0.4178]) tensor([0.5114, 0.1074, 0.2446, 0.1366]) -Greedy action tensor([ 0.4445, -0.1620, 0.0387, -0.1708]) tensor([0.3634, 0.1981, 0.2421, 0.1964]) -Greedy action tensor([ 0.9331, -0.6482, 0.1468, -0.6726]) tensor([0.5371, 0.1105, 0.2446, 0.1078]) -Greedy action tensor([ 0.6632, -0.4339, -0.0735, -0.3247]) tensor([0.4577, 0.1528, 0.2191, 0.1704]) -Greedy action tensor([ 0.2892, 0.3330, -0.2301, 0.0568]) tensor([0.2914, 0.3044, 0.1733, 0.2309]) -Greedy action tensor([ 0.5918, 0.0453, -0.2454, 0.0226]) tensor([0.3879, 0.2246, 0.1679, 0.2195]) -Greedy action tensor([ 0.3686, -0.2577, -0.0926, -0.1187]) tensor([0.3598, 0.1923, 0.2269, 0.2210]) -Greedy action tensor([ 0.5330, -0.0706, -0.0840, 0.0039]) tensor([0.3738, 0.2044, 0.2017, 0.2202]) -Greedy action tensor([ 0.8815, -0.5906, 0.2293, -0.3996]) tensor([0.4931, 0.1131, 0.2568, 0.1369]) -Greedy action tensor([ 0.3551, -0.0889, -0.0416, -0.1121]) tensor([0.3401, 0.2181, 0.2287, 0.2131]) -Greedy action tensor([ 0.7793, -0.4306, -0.0414, -0.3581]) tensor([0.4857, 0.1448, 0.2137, 0.1557]) -Greedy action tensor([ 0.5553, -0.1824, -0.1595, -0.4695]) tensor([0.4299, 0.2056, 0.2103, 0.1543]) -Greedy action tensor([ 0.5559, -0.3849, -0.1210, -0.1784]) tensor([0.4205, 0.1641, 0.2137, 0.2018]) -Greedy action tensor([ 0.7859, -0.7558, -0.1354, -0.1933]) tensor([0.5031, 0.1077, 0.2002, 0.1890]) -Greedy action tensor([ 0.2918, -0.3239, -0.3068, 0.0025]) tensor([0.3523, 0.1903, 0.1936, 0.2638]) -Greedy action tensor([ 0.7486, -0.6130, -0.1111, -0.2053]) tensor([0.4843, 0.1241, 0.2050, 0.1866]) -Greedy action tensor([ 0.7045, -0.3427, -0.0600, -0.2247]) tensor([0.4522, 0.1587, 0.2105, 0.1786]) -Greedy action tensor([ 0.9641, -0.5286, -0.1618, -0.3392]) tensor([0.5492, 0.1234, 0.1781, 0.1492]) -Greedy action tensor([ 0.7093, -0.6758, -0.0531, -0.2579]) tensor([0.4769, 0.1194, 0.2225, 0.1813]) -Greedy action tensor([ 0.4415, 0.0008, -0.0159, -0.0521]) tensor([0.3464, 0.2229, 0.2192, 0.2114]) -Greedy action tensor([ 0.8226, -0.3586, 0.0860, -0.1784]) tensor([0.4644, 0.1425, 0.2223, 0.1707]) -Greedy action tensor([ 0.5173, -0.2854, -0.0283, -0.0576]) tensor([0.3860, 0.1730, 0.2237, 0.2172]) -Greedy action tensor([ 0.9650, -0.5514, -0.0421, -0.5169]) tensor([0.5519, 0.1211, 0.2016, 0.1254]) -Greedy action tensor([ 0.8037, -0.3991, 0.0149, -0.4368]) tensor([0.4892, 0.1469, 0.2223, 0.1415]) -Greedy action tensor([ 0.3352, -0.0422, -0.1216, -0.1121]) tensor([0.3380, 0.2318, 0.2141, 0.2161]) -Greedy action tensor([ 0.8006, -0.5884, -0.0417, -0.1835]) tensor([0.4869, 0.1214, 0.2097, 0.1820]) -Greedy action tensor([ 0.9460, -0.0529, -0.0071, 0.0517]) tensor([0.4624, 0.1703, 0.1783, 0.1891]) -Greedy action tensor([ 0.5458, -0.1563, -0.0016, -0.0300]) tensor([0.3793, 0.1880, 0.2194, 0.2133]) -Greedy action tensor([ 0.4470, -0.0866, 0.0192, -0.3547]) tensor([0.3722, 0.2183, 0.2426, 0.1669]) -Greedy action tensor([ 0.6938, -0.2914, -0.1089, -0.1080]) tensor([0.4405, 0.1645, 0.1974, 0.1976]) -Greedy action tensor([ 0.8439, -0.3982, 0.0346, -0.1461]) tensor([0.4749, 0.1372, 0.2114, 0.1765]) -Greedy action tensor([ 0.3683, -0.1512, 0.0064, -0.0698]) tensor([0.3406, 0.2026, 0.2371, 0.2197]) -Greedy action tensor([ 0.7338, -0.4831, -0.1293, -0.1790]) tensor([0.4718, 0.1397, 0.1990, 0.1894]) -Greedy action tensor([ 0.4417, -0.1341, -0.0103, -0.2994]) tensor([0.3738, 0.2102, 0.2379, 0.1782]) -Greedy action tensor([ 0.7550, -0.4194, -0.2196, 0.0069]) tensor([0.4631, 0.1431, 0.1747, 0.2191]) -Greedy action tensor([ 0.6767, -0.4743, -0.1163, -0.1358]) tensor([0.4520, 0.1430, 0.2045, 0.2006]) -Greedy action tensor([ 0.9877, -0.3054, -0.0726, -0.1561]) tensor([0.5156, 0.1415, 0.1786, 0.1643]) -Greedy action tensor([ 0.6721, -0.5038, -0.1108, 0.0195]) tensor([0.4374, 0.1350, 0.1999, 0.2278]) -Greedy action tensor([ 0.6752, -0.5925, -0.2465, -0.3907]) tensor([0.4941, 0.1391, 0.1966, 0.1702]) -Greedy action tensor([ 0.5877, -0.5236, -0.1979, -0.2017]) tensor([0.4466, 0.1470, 0.2036, 0.2028]) -Greedy action tensor([ 0.8962, -0.4986, -0.0923, -0.3376]) tensor([0.5232, 0.1297, 0.1947, 0.1524]) -Greedy action tensor([ 0.8695, -0.5863, 0.0790, -0.5735]) tensor([0.5200, 0.1213, 0.2359, 0.1228]) -Greedy action tensor([ 0.7698, -0.5080, -0.2906, -0.3598]) tensor([0.5133, 0.1430, 0.1778, 0.1659]) -Greedy action tensor([ 4.9330e-01, -2.8136e-01, 1.9783e-04, -2.2156e-02]) tensor([0.3747, 0.1727, 0.2288, 0.2238]) -Greedy action tensor([ 0.2740, 0.4007, -0.0565, -0.2549]) tensor([0.2904, 0.3297, 0.2087, 0.1711]) -Greedy action tensor([ 0.8229, -0.5504, -0.1539, -0.4392]) tensor([0.5228, 0.1324, 0.1968, 0.1480]) -Greedy action tensor([ 0.3693, 0.0079, -0.1011, -0.1592]) tensor([0.3435, 0.2393, 0.2146, 0.2025]) -Greedy action tensor([ 0.5790, 0.0911, -0.0353, 0.1088]) tensor([0.3597, 0.2209, 0.1946, 0.2248]) -Greedy action tensor([ 0.5644, 0.0089, -0.1156, -0.2222]) tensor([0.3943, 0.2263, 0.1998, 0.1796]) -Greedy action tensor([ 0.5068, 0.4063, -0.0362, -0.0522]) tensor([0.3271, 0.2958, 0.1901, 0.1870]) -Greedy action tensor([ 0.4966, -0.3802, -0.1646, -0.4362]) tensor([0.4300, 0.1789, 0.2220, 0.1692]) -Greedy action tensor([ 0.7191, -0.3703, -0.0124, -0.3242]) tensor([0.4608, 0.1550, 0.2218, 0.1624]) -Greedy action tensor([ 1.1352, -0.7680, -0.2392, -0.4779]) tensor([0.6245, 0.0931, 0.1580, 0.1244]) -Greedy action tensor([ 1.2395, -0.7306, -0.1490, -0.3909]) tensor([0.6310, 0.0880, 0.1574, 0.1236]) -Greedy action tensor([ 0.6925, -0.6302, 0.0418, -0.4253]) tensor([0.4728, 0.1260, 0.2466, 0.1546]) -Greedy action tensor([ 1.0516, -0.7339, 0.0602, -0.5718]) tensor([0.5761, 0.0966, 0.2137, 0.1136]) -Greedy action tensor([ 0.6134, -0.3034, -0.1032, -0.1836]) tensor([0.4276, 0.1709, 0.2088, 0.1927]) -Greedy action tensor([ 0.8324, -0.2993, -0.0875, -0.8222]) tensor([0.5230, 0.1686, 0.2084, 0.1000]) -Greedy action tensor([ 1.0735, -0.9305, 0.0145, -0.4559]) tensor([0.5888, 0.0794, 0.2042, 0.1276]) -Greedy action tensor([ 0.9025, -0.4000, 0.0228, -0.1723]) tensor([0.4931, 0.1340, 0.2046, 0.1683]) -Greedy action tensor([ 0.8537, -0.4678, -0.0538, -0.2654]) tensor([0.5008, 0.1336, 0.2021, 0.1635]) -Greedy action tensor([ 0.5201, -0.4213, -0.0986, -0.3451]) tensor([0.4256, 0.1660, 0.2292, 0.1792]) -Greedy action tensor([-1.5504, -0.5422, 0.4931, -0.1029]) tensor([0.0637, 0.1745, 0.4912, 0.2707]) -Greedy action tensor([-1.8837, -0.4719, 0.6696, -0.0871]) tensor([0.0417, 0.1711, 0.5358, 0.2514]) -Greedy action tensor([-1.7081, -0.4074, 0.5465, -0.0699]) tensor([0.0517, 0.1898, 0.4926, 0.2660]) -Greedy action tensor([-1.8179, -0.4749, 0.5981, -0.0994]) tensor([0.0463, 0.1773, 0.5184, 0.2581]) -Greedy action tensor([-0.7708, 0.2382, 0.5871, 1.3499]) tensor([0.0626, 0.1718, 0.2435, 0.5221]) -Greedy action tensor([-0.8296, 0.8171, -0.1359, -0.3004]) tensor([0.1011, 0.5248, 0.2024, 0.1717]) -Greedy action tensor([-1.7724, -0.4612, 0.5946, -0.0556]) tensor([0.0477, 0.1772, 0.5093, 0.2658]) -Greedy action tensor([-1.7903, -0.4451, 0.6181, -0.0698]) tensor([0.0464, 0.1782, 0.5160, 0.2594]) -Greedy action tensor([-1.8231, -0.4992, 0.6179, -0.1049]) tensor([0.0458, 0.1723, 0.5264, 0.2555]) -Greedy action tensor([-1.7753, -0.4224, 0.5707, -0.0884]) tensor([0.0483, 0.1868, 0.5042, 0.2608]) -Greedy action tensor([-1.3103, -0.4227, 0.3603, 0.2157]) tensor([0.0749, 0.1820, 0.3983, 0.3447]) -Greedy action tensor([-1.8895, -0.4574, 0.6449, -0.1518]) tensor([0.0426, 0.1783, 0.5370, 0.2421]) -Greedy action tensor([-1.6695, 0.2349, 0.4458, 0.0413]) tensor([0.0464, 0.3117, 0.3849, 0.2569]) -Greedy action tensor([-1.8450, -0.1351, 0.5592, -0.1009]) tensor([0.0429, 0.2371, 0.4747, 0.2453]) -Greedy action tensor([-1.6611, -0.2431, 0.5888, -0.0806]) tensor([0.0514, 0.2120, 0.4872, 0.2494]) -Greedy action tensor([0.4419, 1.2409, 0.0140, 0.2958]) tensor([0.2110, 0.4691, 0.1375, 0.1823]) -Greedy action tensor([-0.1813, -0.1624, 0.8323, 1.5426]) tensor([0.0963, 0.0982, 0.2654, 0.5401]) -Greedy action tensor([0.4978, 1.2894, 0.0127, 0.4781]) tensor([0.2082, 0.4595, 0.1282, 0.2041]) -Greedy action tensor([-1.6313, -0.4010, 0.5292, 0.0270]) tensor([0.0545, 0.1865, 0.4728, 0.2861]) -Greedy action tensor([-0.7240, -0.7420, 0.3903, 0.3705]) tensor([0.1247, 0.1225, 0.3801, 0.3727]) -Greedy action tensor([-1.9193, -0.4481, 0.6570, -0.1686]) tensor([0.0412, 0.1795, 0.5419, 0.2374]) -Greedy action tensor([-1.7998, -0.4607, 0.5956, -0.1038]) tensor([0.0471, 0.1796, 0.5166, 0.2567]) -Greedy action tensor([-1.7813, -0.4644, 0.5783, -0.1022]) tensor([0.0484, 0.1805, 0.5120, 0.2592]) -Greedy action tensor([-1.8010, -0.4706, 0.6101, -0.0831]) tensor([0.0465, 0.1759, 0.5184, 0.2592]) -Greedy action tensor([-1.0548, -0.0407, 0.9238, 0.8711]) tensor([0.0560, 0.1544, 0.4052, 0.3844]) -Greedy action tensor([-1.7675, -0.0161, 0.5053, -0.0482]) tensor([0.0454, 0.2613, 0.4402, 0.2531]) -Greedy action tensor([-1.4699, 0.0201, 0.3743, -0.0300]) tensor([0.0626, 0.2777, 0.3957, 0.2641]) -Greedy action tensor([-1.9044, -0.4428, 0.6490, -0.1599]) tensor([0.0419, 0.1806, 0.5380, 0.2396]) -Greedy action tensor([-1.8179, -0.4505, 0.6154, -0.1058]) tensor([0.0457, 0.1795, 0.5213, 0.2534]) -Greedy action tensor([-1.6367, -0.4897, 0.5297, -0.0913]) tensor([0.0569, 0.1793, 0.4968, 0.2670]) -Greedy action tensor([-0.5759, 0.7816, -0.0168, 0.1766]) tensor([0.1142, 0.4438, 0.1997, 0.2423]) -Greedy action tensor([-0.4388, 1.0169, 0.0438, 0.1695]) tensor([0.1144, 0.4903, 0.1853, 0.2101]) -Greedy action tensor([-1.8920, -0.3982, 0.6603, -0.1433]) tensor([0.0416, 0.1853, 0.5340, 0.2391]) -Greedy action tensor([-1.9402, -0.4458, 0.6612, -0.1784]) tensor([0.0404, 0.1800, 0.5445, 0.2351]) -Greedy action tensor([-1.9243, -0.4399, 0.6664, -0.1652]) tensor([0.0407, 0.1797, 0.5431, 0.2365]) -Greedy action tensor([-1.9350, -0.4297, 0.6573, -0.1762]) tensor([0.0405, 0.1826, 0.5415, 0.2353]) -Greedy action tensor([-1.6029, 0.3648, 0.3766, 0.0048]) tensor([0.0491, 0.3510, 0.3551, 0.2448]) -Greedy action tensor([-1.6905, -0.2281, 0.4992, 0.0209]) tensor([0.0505, 0.2182, 0.4515, 0.2798]) -Greedy action tensor([-1.9466, -0.4536, 0.6688, -0.1813]) tensor([0.0401, 0.1783, 0.5476, 0.2340]) -Greedy action tensor([-1.9151, -0.4083, 0.6511, -0.1650]) tensor([0.0412, 0.1858, 0.5360, 0.2370]) -Greedy action tensor([-1.1938, 0.7050, 0.1807, 0.0930]) tensor([0.0656, 0.4378, 0.2592, 0.2374]) -Greedy action tensor([-1.8871, -0.4211, 0.6399, -0.1908]) tensor([0.0429, 0.1859, 0.5371, 0.2341]) -Greedy action tensor([-1.8910, -0.3413, 0.6277, -0.1599]) tensor([0.0421, 0.1981, 0.5222, 0.2376]) -Greedy action tensor([-0.9591, 0.7963, 0.1588, 0.0730]) tensor([0.0790, 0.4573, 0.2417, 0.2219]) -Greedy action tensor([-1.8215, -0.2703, 0.5799, -0.1034]) tensor([0.0448, 0.2113, 0.4943, 0.2496]) -Greedy action tensor([-1.8197, -0.3507, 0.6071, -0.1001]) tensor([0.0449, 0.1953, 0.5089, 0.2509]) -Greedy action tensor([-0.6064, 0.9320, -0.0143, 0.4884]) tensor([0.0957, 0.4455, 0.1729, 0.2859]) -Greedy action tensor([-1.8105, -0.4683, 0.6016, -0.1387]) tensor([0.0469, 0.1796, 0.5237, 0.2498]) -Greedy action tensor([-1.6967, -0.4939, 0.5656, -0.0344]) tensor([0.0521, 0.1734, 0.5001, 0.2745]) -Greedy action tensor([-1.9462, -0.4533, 0.6657, -0.1822]) tensor([0.0401, 0.1786, 0.5470, 0.2343]) -Greedy action tensor([-1.8132, -0.2947, 0.6041, -0.0889]) tensor([0.0447, 0.2039, 0.5009, 0.2505]) -Greedy action tensor([-1.5686, -0.4203, 0.7168, 0.3776]) tensor([0.0477, 0.1502, 0.4684, 0.3337]) -Greedy action tensor([-0.3722, 1.0282, 0.0565, 0.1468]) tensor([0.1209, 0.4904, 0.1856, 0.2031]) -Greedy action tensor([-1.7996, -0.8219, 0.9722, 0.0210]) tensor([0.0387, 0.1030, 0.6191, 0.2392]) -Greedy action tensor([-1.8018, -0.1335, 0.5794, 0.0202]) tensor([0.0429, 0.2275, 0.4642, 0.2654]) -Greedy action tensor([-1.6502, -0.4298, 0.5267, -0.1188]) tensor([0.0561, 0.1900, 0.4945, 0.2593]) -Greedy action tensor([-1.8661, -0.3618, 0.6355, -0.1153]) tensor([0.0426, 0.1918, 0.5201, 0.2455]) -Greedy action tensor([-0.6196, 0.7428, -0.3562, -0.1333]) tensor([0.1277, 0.4986, 0.1661, 0.2076]) -Greedy action tensor([-1.9316, -0.4282, 0.6613, -0.1734]) tensor([0.0405, 0.1823, 0.5419, 0.2352]) -Greedy action tensor([-1.9270, -0.4227, 0.6582, -0.1707]) tensor([0.0407, 0.1833, 0.5402, 0.2358]) -Greedy action tensor([-1.8292, -0.3412, 0.6060, -0.1094]) tensor([0.0446, 0.1974, 0.5091, 0.2489]) -Greedy action tensor([-0.4829, 0.2603, 0.4857, 0.8768]) tensor([0.1038, 0.2183, 0.2735, 0.4044]) -Greedy action tensor([-1.8021, -0.3968, 0.6511, -0.0987]) tensor([0.0451, 0.1837, 0.5238, 0.2475]) -Greedy action tensor([-1.6789, -0.1521, 0.5002, -0.0950]) tensor([0.0518, 0.2383, 0.4576, 0.2523]) -Greedy action tensor([-1.9172, -0.4107, 0.6556, -0.1635]) tensor([0.0410, 0.1850, 0.5372, 0.2368]) -Greedy action tensor([-1.5638, -0.2456, 0.6028, 0.0947]) tensor([0.0534, 0.1996, 0.4663, 0.2806]) -Greedy action tensor([-1.8718, -0.4497, 0.6374, -0.1395]) tensor([0.0433, 0.1795, 0.5324, 0.2448]) -Greedy action tensor([-1.5759, -0.3337, 0.5750, 0.1120]) tensor([0.0542, 0.1876, 0.4654, 0.2929]) -Greedy action tensor([-1.4883, 0.6181, 0.3452, 0.1039]) tensor([0.0490, 0.4031, 0.3068, 0.2410]) -Greedy action tensor([-1.7499, -0.1685, 0.5884, 0.0429]) tensor([0.0450, 0.2187, 0.4662, 0.2702]) -Greedy action tensor([-1.7008, -0.5168, 0.5484, -0.0980]) tensor([0.0534, 0.1746, 0.5066, 0.2654]) -Greedy action tensor([-1.5818, -0.0574, 0.3989, 0.1160]) tensor([0.0546, 0.2509, 0.3960, 0.2984]) -Greedy action tensor([-1.6107, -0.2643, 0.4787, -0.0741]) tensor([0.0569, 0.2187, 0.4598, 0.2646]) -Greedy action tensor([-1.8410, -0.3780, 0.6073, -0.1275]) tensor([0.0446, 0.1925, 0.5156, 0.2473]) -Greedy action tensor([-1.8322, -0.3030, 0.5917, -0.1459]) tensor([0.0448, 0.2069, 0.5062, 0.2421]) -Greedy action tensor([-1.5210, -0.2046, 0.5061, 0.0529]) tensor([0.0583, 0.2175, 0.4428, 0.2814]) -Greedy action tensor([-0.8471, -0.0738, 0.2364, -0.1017]) tensor([0.1215, 0.2633, 0.3591, 0.2561]) -Greedy action tensor([-1.1032, 0.9545, 0.1936, 0.2952]) tensor([0.0605, 0.4734, 0.2212, 0.2449]) -Greedy action tensor([-1.6073, -0.5600, 0.4652, 0.0072]) tensor([0.0594, 0.1694, 0.4723, 0.2988]) -Greedy action tensor([-1.5731, -0.4639, 0.6954, 0.1630]) tensor([0.0516, 0.1565, 0.4989, 0.2929]) -Greedy action tensor([-1.6219, -0.5327, 0.5402, -0.0510]) tensor([0.0572, 0.1701, 0.4973, 0.2753]) -Greedy action tensor([-1.7988, -0.2629, 0.6115, -0.0973]) tensor([0.0449, 0.2086, 0.5002, 0.2462]) -Greedy action tensor([ 0.2136, -0.1213, -0.1149, -0.9236]) tensor([0.3628, 0.2596, 0.2612, 0.1164]) -Greedy action tensor([ 0.5672, -0.5750, -0.5622, -0.3805]) tensor([0.4926, 0.1572, 0.1592, 0.1909]) -Greedy action tensor([-0.8508, -0.2330, -0.0969, -0.4007]) tensor([0.1527, 0.2833, 0.3245, 0.2395]) -Greedy action tensor([ 1.0821, 0.2737, 0.5497, -0.4093]) tensor([0.4429, 0.1973, 0.2601, 0.0997]) -Greedy action tensor([-0.2032, -0.3975, -0.2740, 0.1797]) tensor([0.2369, 0.1950, 0.2207, 0.3474]) -Greedy action tensor([1.1547, 0.3825, 0.5173, 0.6814]) tensor([0.3826, 0.1768, 0.2023, 0.2383]) -Greedy action tensor([-0.4371, 1.0290, 0.6183, -0.5062]) tensor([0.1094, 0.4741, 0.3144, 0.1021]) -Greedy action tensor([ 0.6274, 0.4467, -0.3795, 0.3890]) tensor([0.3347, 0.2794, 0.1223, 0.2637]) -Greedy action tensor([ 0.0992, -1.7376, -0.2065, 0.6416]) tensor([0.2765, 0.0441, 0.2037, 0.4757]) -Greedy action tensor([ 0.4891, -1.0406, -0.3177, -0.2633]) tensor([0.4686, 0.1015, 0.2091, 0.2208]) -Greedy action tensor([ 0.6030, 0.5352, 0.3301, -0.5315]) tensor([0.3314, 0.3097, 0.2523, 0.1066]) -Greedy action tensor([ 0.4450, -0.0640, 0.3846, -0.1388]) tensor([0.3226, 0.1939, 0.3036, 0.1799]) -Greedy action tensor([-0.4787, 0.0666, 1.0586, -0.4110]) tensor([0.1184, 0.2042, 0.5507, 0.1267]) -Greedy action tensor([-0.3433, 0.5990, -0.7597, -0.0826]) tensor([0.1811, 0.4646, 0.1194, 0.2350]) -Greedy action tensor([-0.7658, -1.0350, -1.1509, 0.0434]) tensor([0.2132, 0.1629, 0.1451, 0.4789]) -Greedy action tensor([ 1.0850, -1.6912, -0.0741, 0.6701]) tensor([0.4910, 0.0306, 0.1541, 0.3243]) -Greedy action tensor([-0.7091, -1.3557, 0.2496, -0.0744]) tensor([0.1662, 0.0870, 0.4334, 0.3135]) -Greedy action tensor([ 1.5010, -1.5255, 0.4183, 0.8771]) tensor([0.5200, 0.0252, 0.1761, 0.2787]) -Greedy action tensor([-0.6332, -0.3934, -1.7935, -0.5772]) tensor([0.2746, 0.3490, 0.0861, 0.2904]) -Greedy action tensor([ 1.2685, -1.3390, 0.8824, 0.5924]) tensor([0.4421, 0.0326, 0.3005, 0.2248]) -Greedy action tensor([-0.2829, -1.1217, -0.5550, 0.5920]) tensor([0.2177, 0.0941, 0.1659, 0.5223]) -Greedy action tensor([-1.4741, -1.2690, 0.7951, -0.2405]) tensor([0.0652, 0.0801, 0.6308, 0.2239]) -Greedy action tensor([-0.2525, -0.3169, 0.0164, -0.4502]) tensor([0.2459, 0.2306, 0.3218, 0.2018]) -Greedy action tensor([ 1.0296, -1.4826, -0.3203, 1.2589]) tensor([0.3849, 0.0312, 0.0998, 0.4841]) -Greedy action tensor([ 0.7005, -0.1442, -0.0634, -0.1975]) tensor([0.4342, 0.1866, 0.2023, 0.1769]) -Greedy action tensor([-1.0562, -0.0405, -0.4843, -0.5203]) tensor([0.1381, 0.3813, 0.2446, 0.2360]) -Greedy action tensor([ 0.4415, -0.7905, 0.3836, -0.8930]) tensor([0.4002, 0.1167, 0.3777, 0.1054]) -Greedy action tensor([-1.0999, -0.2029, -0.5340, -0.9779]) tensor([0.1577, 0.3866, 0.2776, 0.1781]) -Greedy action tensor([ 0.1646, 0.0592, -0.3925, 0.3141]) tensor([0.2752, 0.2476, 0.1576, 0.3195]) -Greedy action tensor([ 0.1784, -0.4556, 0.4914, -0.1130]) tensor([0.2743, 0.1455, 0.3752, 0.2050]) -Greedy action tensor([-1.1020, -0.7734, 0.0567, 0.7249]) tensor([0.0848, 0.1178, 0.2702, 0.5271]) -Greedy action tensor([-0.5109, -0.2132, -0.9591, -0.0418]) tensor([0.2181, 0.2938, 0.1393, 0.3487]) -Greedy action tensor([-0.8280, -1.0105, 0.0348, 0.4963]) tensor([0.1256, 0.1046, 0.2976, 0.4722]) -Greedy action tensor([ 0.3678, -1.8926, -0.7070, 0.9213]) tensor([0.3140, 0.0328, 0.1072, 0.5461]) -Greedy action tensor([ 0.0299, 0.0008, 0.6445, -0.3079]) tensor([0.2206, 0.2143, 0.4078, 0.1573]) -Greedy action tensor([ 0.2452, -0.3553, -0.0156, -0.7241]) tensor([0.3706, 0.2033, 0.2855, 0.1406]) -Greedy action tensor([-0.0416, -0.0584, 0.3389, -0.1073]) tensor([0.2282, 0.2244, 0.3338, 0.2137]) -Greedy action tensor([ 0.1118, -1.5508, 0.8034, -0.0020]) tensor([0.2452, 0.0465, 0.4896, 0.2188]) -Greedy action tensor([-2.1779, -1.0114, 0.1527, -0.1337]) tensor([0.0450, 0.1445, 0.4629, 0.3476]) -Greedy action tensor([-0.3207, -0.5393, -0.1746, -0.5724]) tensor([0.2675, 0.2150, 0.3096, 0.2080]) -Greedy action tensor([ 0.9253, -0.4377, -0.6149, -0.2322]) tensor([0.5604, 0.1434, 0.1201, 0.1761]) -Greedy action tensor([0.2060, 0.2975, 0.4710, 0.0013]) tensor([0.2373, 0.2600, 0.3093, 0.1934]) -Greedy action tensor([ 0.2528, -0.2627, 0.0984, -0.5806]) tensor([0.3462, 0.2067, 0.2966, 0.1504]) -Greedy action tensor([-0.5092, -1.2522, 0.2252, -0.4490]) tensor([0.2164, 0.1029, 0.4509, 0.2298]) -Greedy action tensor([-1.0535, -1.0292, 0.2538, -0.7594]) tensor([0.1416, 0.1451, 0.5233, 0.1900]) -Greedy action tensor([ 0.2705, -0.6527, 0.6773, 0.4265]) tensor([0.2458, 0.0976, 0.3692, 0.2873]) -Greedy action tensor([ 0.2344, 0.4439, -0.5488, 0.1772]) tensor([0.2751, 0.3393, 0.1257, 0.2599]) -Greedy action tensor([-0.4712, -1.1402, -0.1953, -1.1908]) tensor([0.3015, 0.1544, 0.3973, 0.1468]) -Greedy action tensor([ 0.1800, -0.3338, 0.8017, -0.7530]) tensor([0.2595, 0.1552, 0.4832, 0.1021]) -Greedy action tensor([-0.5206, -0.6583, 0.0394, -0.4844]) tensor([0.2147, 0.1870, 0.3758, 0.2226]) -Greedy action tensor([-0.5029, -0.1500, 0.5023, -0.6636]) tensor([0.1665, 0.2369, 0.4549, 0.1418]) -Greedy action tensor([ 1.1278, -0.9133, -0.8333, 0.3913]) tensor([0.5716, 0.0742, 0.0804, 0.2737]) -Greedy action tensor([-0.3768, -0.3245, 0.5250, -0.5147]) tensor([0.1856, 0.1955, 0.4573, 0.1617]) -Greedy action tensor([-0.6665, -0.3583, -0.1052, -0.7611]) tensor([0.1991, 0.2709, 0.3489, 0.1811]) -Greedy action tensor([ 0.8454, -0.6335, 0.2183, 0.8349]) tensor([0.3634, 0.0828, 0.1941, 0.3596]) -Greedy action tensor([ 0.2742, -0.7262, -0.0459, 0.5099]) tensor([0.2976, 0.1095, 0.2161, 0.3768]) -Greedy action tensor([-0.1322, -0.0485, -0.2036, 0.0760]) tensor([0.2353, 0.2558, 0.2191, 0.2898]) -Greedy action tensor([ 0.3900, -1.0335, 0.0470, 0.1116]) tensor([0.3693, 0.0890, 0.2621, 0.2796]) -Greedy action tensor([-0.2909, 0.3624, -1.0259, -0.1789]) tensor([0.2212, 0.4252, 0.1061, 0.2475]) -Greedy action tensor([ 0.9415, -0.1712, -0.2359, 0.0323]) tensor([0.4903, 0.1611, 0.1510, 0.1975]) -Greedy action tensor([ 0.7361, -0.8195, 0.2997, -0.2194]) tensor([0.4460, 0.0941, 0.2883, 0.1715]) -Greedy action tensor([ 0.6685, -0.6325, 0.6803, 0.2847]) tensor([0.3372, 0.0918, 0.3412, 0.2297]) -Greedy action tensor([ 0.2151, -0.2198, 0.6542, -0.4775]) tensor([0.2704, 0.1750, 0.4194, 0.1352]) -Greedy action tensor([ 0.8140, -0.1376, 0.6415, -0.0749]) tensor([0.3790, 0.1463, 0.3189, 0.1558]) -Greedy action tensor([-0.9579, -0.3367, -0.2828, -0.2797]) tensor([0.1472, 0.2739, 0.2890, 0.2899]) -Greedy action tensor([ 0.5699, 0.1448, 0.0545, -0.2211]) tensor([0.3698, 0.2417, 0.2209, 0.1676]) -Greedy action tensor([ 0.0311, 0.0778, -0.2904, -0.2649]) tensor([0.2844, 0.2980, 0.2062, 0.2115]) -Greedy action tensor([-0.1401, 0.2286, -0.9106, -0.2950]) tensor([0.2656, 0.3840, 0.1229, 0.2275]) -Greedy action tensor([-0.9005, -0.2389, -0.0754, -0.8010]) tensor([0.1581, 0.3064, 0.3608, 0.1747]) -Greedy action tensor([-0.1207, -0.5873, 0.3572, -0.1974]) tensor([0.2400, 0.1505, 0.3871, 0.2223]) -Greedy action tensor([-0.2849, 0.0826, 0.0663, -0.3143]) tensor([0.2068, 0.2986, 0.2938, 0.2008]) -Greedy action tensor([ 0.7046, -1.0622, -0.2370, -0.3054]) tensor([0.5194, 0.0888, 0.2026, 0.1892]) -Greedy action tensor([ 0.6776, -0.1278, 0.4392, -0.0076]) tensor([0.3651, 0.1632, 0.2877, 0.1840]) -Greedy action tensor([ 0.2783, -1.6855, 0.1138, -0.8761]) tensor([0.4341, 0.0609, 0.3682, 0.1368]) -Greedy action tensor([ 0.2832, -1.1942, -0.1815, 0.8514]) tensor([0.2761, 0.0630, 0.1735, 0.4874]) -Greedy action tensor([-0.9670, -0.5763, -1.8179, 0.6137]) tensor([0.1288, 0.1904, 0.0550, 0.6258]) -Greedy action tensor([-0.0485, -0.9173, 0.4732, -1.0038]) tensor([0.2866, 0.1202, 0.4829, 0.1103]) -Greedy action tensor([-0.6040, -0.2847, 0.7762, -0.2835]) tensor([0.1294, 0.1780, 0.5143, 0.1782]) -Greedy action tensor([-0.2137, -0.3240, 0.2774, -0.6548]) tensor([0.2396, 0.2146, 0.3916, 0.1542]) -Greedy action tensor([ 0.1650, -0.2769, -0.1011, 0.3458]) tensor([0.2772, 0.1782, 0.2124, 0.3321]) -Greedy action tensor([ 1.0847, -0.0412, -0.2582, 0.3118]) tensor([0.4885, 0.1584, 0.1275, 0.2255]) -Greedy action tensor([-0.2554, -0.8150, 0.0091, -0.4527]) tensor([0.2706, 0.1546, 0.3526, 0.2222]) -Greedy action tensor([ 0.9465, -0.4460, -0.0699, -0.3251]) tensor([0.5289, 0.1314, 0.1914, 0.1483]) -Greedy action tensor([ 0.8899, -0.3691, 0.0288, -0.2052]) tensor([0.4899, 0.1391, 0.2071, 0.1639]) -Greedy action tensor([ 0.8377, -0.8761, 0.0360, -0.6173]) tensor([0.5370, 0.0968, 0.2409, 0.1253]) -Greedy action tensor([ 0.8997, -0.8450, 0.0972, -0.4807]) tensor([0.5335, 0.0932, 0.2391, 0.1342]) -Greedy action tensor([ 0.7352, -0.3990, 0.0244, -0.2118]) tensor([0.4544, 0.1462, 0.2232, 0.1763]) -Greedy action tensor([ 1.0544, -0.8665, 0.0498, -0.2480]) tensor([0.5604, 0.0821, 0.2052, 0.1523]) -Greedy action tensor([ 0.8397, -0.4166, -0.1059, -0.3137]) tensor([0.5028, 0.1432, 0.1953, 0.1587]) -Greedy action tensor([ 1.0013, -0.4306, -0.1419, -0.3743]) tensor([0.5524, 0.1319, 0.1761, 0.1396]) -Greedy action tensor([ 0.6086, 0.2447, -0.1406, -0.3169]) tensor([0.3900, 0.2710, 0.1844, 0.1546]) -Greedy action tensor([ 1.0497, -0.7180, 0.1315, -0.5149]) tensor([0.5621, 0.0960, 0.2244, 0.1176]) -Greedy action tensor([ 0.7276, -0.3271, 0.0417, -0.2499]) tensor([0.4488, 0.1563, 0.2260, 0.1689]) -Greedy action tensor([ 0.5459, -0.2894, -0.0507, -0.3809]) tensor([0.4201, 0.1822, 0.2313, 0.1663]) -Greedy action tensor([ 1.0078, -0.5715, -0.0666, -0.5244]) tensor([0.5670, 0.1169, 0.1936, 0.1225]) -Greedy action tensor([ 1.1236, -0.3883, 0.0342, -0.2015]) tensor([0.5486, 0.1210, 0.1846, 0.1458]) -Greedy action tensor([ 1.2157, -0.3948, 0.0792, -0.6878]) tensor([0.5989, 0.1197, 0.1922, 0.0893]) -Greedy action tensor([ 0.6401, -0.3685, 0.0099, -0.2265]) tensor([0.4315, 0.1574, 0.2298, 0.1814]) -Greedy action tensor([ 0.7648, -0.5312, -0.0230, -0.4066]) tensor([0.4906, 0.1342, 0.2231, 0.1520]) -Greedy action tensor([ 1.0088, -0.6776, 0.1621, -0.5087]) tensor([0.5455, 0.1010, 0.2339, 0.1196]) -Greedy action tensor([ 0.5174, -0.2517, -0.1033, -0.1238]) tensor([0.3956, 0.1833, 0.2127, 0.2084]) -Greedy action tensor([ 0.9716, -0.9856, 0.0553, -0.7352]) tensor([0.5805, 0.0820, 0.2322, 0.1053]) -Greedy action tensor([ 0.7006, -0.2132, -0.0811, -0.0784]) tensor([0.4315, 0.1730, 0.1975, 0.1980]) -Greedy action tensor([ 1.2439, -0.8069, 0.0754, -0.5123]) tensor([0.6203, 0.0798, 0.1928, 0.1071]) -Greedy action tensor([ 1.0978, -0.8211, -0.0372, -0.8172]) tensor([0.6190, 0.0908, 0.1990, 0.0912]) -Greedy action tensor([ 0.2052, 0.2973, -0.1096, -0.4854]) tensor([0.3005, 0.3295, 0.2194, 0.1506]) -Greedy action tensor([ 0.5958, -0.4618, -0.0985, -0.2185]) tensor([0.4367, 0.1517, 0.2181, 0.1935]) -Greedy action tensor([ 0.6438, -0.5251, -0.0081, -0.2861]) tensor([0.4492, 0.1396, 0.2340, 0.1772]) -Greedy action tensor([ 0.5351, -0.1751, 0.0899, -0.1946]) tensor([0.3825, 0.1880, 0.2451, 0.1844]) -Greedy action tensor([ 0.5096, -0.5527, -0.1065, -0.1715]) tensor([0.4181, 0.1445, 0.2258, 0.2116]) -Greedy action tensor([ 0.7746, -0.6115, 0.0600, -0.1311]) tensor([0.4665, 0.1166, 0.2283, 0.1886]) -Greedy action tensor([ 0.7578, -0.7322, 0.0314, -0.4231]) tensor([0.4960, 0.1118, 0.2399, 0.1523]) -Greedy action tensor([ 0.8173, -0.2360, -0.1793, -0.1810]) tensor([0.4793, 0.1672, 0.1769, 0.1766]) -Greedy action tensor([ 0.3251, -0.1138, 0.0334, -0.1026]) tensor([0.3285, 0.2118, 0.2454, 0.2142]) -Greedy action tensor([ 0.8477, -0.5271, 0.0964, -0.8252]) tensor([0.5229, 0.1322, 0.2467, 0.0982]) -Greedy action tensor([ 0.6185, -0.4941, -0.0414, -0.4366]) tensor([0.4558, 0.1498, 0.2356, 0.1587]) -Greedy action tensor([ 0.6234, 0.1172, -0.3008, -0.0378]) tensor([0.3975, 0.2396, 0.1577, 0.2052]) -Greedy action tensor([ 0.9787, -0.5729, -0.2472, -0.3994]) tensor([0.5690, 0.1206, 0.1670, 0.1434]) -Greedy action tensor([ 0.7404, -0.0896, -0.1461, -0.4861]) tensor([0.4670, 0.2036, 0.1924, 0.1370]) -Greedy action tensor([ 0.5098, -0.2077, -0.0356, 0.0106]) tensor([0.3739, 0.1825, 0.2167, 0.2270]) -Greedy action tensor([ 0.5121, -0.2057, 0.0104, -0.0573]) tensor([0.3761, 0.1834, 0.2277, 0.2128]) -Greedy action tensor([ 0.8562, -0.5380, -0.0956, -0.2092]) tensor([0.5054, 0.1254, 0.1951, 0.1742]) -Greedy action tensor([ 0.7169, -0.3967, -0.1468, -0.4045]) tensor([0.4817, 0.1582, 0.2031, 0.1570]) -Greedy action tensor([ 0.6449, -0.4318, -0.0070, -0.3433]) tensor([0.4476, 0.1525, 0.2332, 0.1666]) -Greedy action tensor([ 0.4189, -0.2906, -0.1599, -0.0685]) tensor([0.3750, 0.1844, 0.2102, 0.2303]) -Greedy action tensor([ 0.5842, -0.5310, -0.1281, -0.1984]) tensor([0.4394, 0.1441, 0.2156, 0.2009]) -Greedy action tensor([ 0.5241, -0.1825, -0.0293, -0.0839]) tensor([0.3827, 0.1888, 0.2201, 0.2084]) -Greedy action tensor([ 0.5158, 0.1174, -0.0086, -0.2787]) tensor([0.3683, 0.2473, 0.2180, 0.1664]) -Greedy action tensor([0.1493, 0.3244, 0.0054, 0.0547]) tensor([0.2521, 0.3003, 0.2183, 0.2293]) -Greedy action tensor([ 0.9416, -0.3603, -0.1963, -0.3187]) tensor([0.5330, 0.1450, 0.1708, 0.1511]) -Greedy action tensor([ 0.7683, -0.3247, -0.0951, -0.3664]) tensor([0.4811, 0.1613, 0.2029, 0.1547]) -Greedy action tensor([ 0.8858, -0.6486, 0.1368, -0.4292]) tensor([0.5110, 0.1102, 0.2416, 0.1372]) -Greedy action tensor([ 0.6786, 0.1033, -0.1304, -0.0906]) tensor([0.4047, 0.2276, 0.1802, 0.1875]) -Greedy action tensor([ 0.6055, -0.5546, -0.1288, -0.2014]) tensor([0.4465, 0.1400, 0.2143, 0.1992]) -Greedy action tensor([ 0.7090, -0.5792, -0.1573, -0.2752]) tensor([0.4831, 0.1332, 0.2032, 0.1805]) -Greedy action tensor([ 0.8733, -0.2489, 0.0494, -0.6541]) tensor([0.5047, 0.1643, 0.2214, 0.1096]) -Greedy action tensor([ 0.8493, -0.5459, -0.0318, -0.5038]) tensor([0.5207, 0.1290, 0.2157, 0.1346]) -Greedy action tensor([ 0.8560, -0.4639, -0.0635, -0.3376]) tensor([0.5079, 0.1357, 0.2025, 0.1540]) -Greedy action tensor([ 1.6492, -0.8330, -0.0455, -0.6859]) tensor([0.7331, 0.0613, 0.1346, 0.0710]) -Greedy action tensor([ 0.8353, -0.3082, -0.0477, 0.0642]) tensor([0.4556, 0.1452, 0.1884, 0.2107]) -Greedy action tensor([ 0.7419, -0.1352, -0.0045, -0.2690]) tensor([0.4437, 0.1846, 0.2103, 0.1614]) -Greedy action tensor([ 0.4037, -0.1811, -0.0568, -0.3730]) tensor([0.3776, 0.2104, 0.2383, 0.1737]) -Greedy action tensor([ 0.5811, -0.0558, -0.1333, 0.0155]) tensor([0.3866, 0.2045, 0.1893, 0.2196]) -Greedy action tensor([ 0.4902, -0.5160, -0.1100, -0.2656]) tensor([0.4195, 0.1534, 0.2302, 0.1970]) -Greedy action tensor([ 0.8321, -0.2046, -0.1960, -0.5948]) tensor([0.5122, 0.1816, 0.1832, 0.1229]) -Greedy action tensor([ 0.7726, -0.5498, -0.2292, -0.6967]) tensor([0.5365, 0.1430, 0.1970, 0.1235]) -Greedy action tensor([ 0.8851, -0.3449, -0.0688, -0.4436]) tensor([0.5148, 0.1505, 0.1983, 0.1363]) -Greedy action tensor([ 1.0000, -0.7738, -0.1011, -0.3701]) tensor([0.5694, 0.0966, 0.1893, 0.1447]) -Greedy action tensor([ 1.1317, -0.5118, 0.0323, -0.4097]) tensor([0.5745, 0.1111, 0.1914, 0.1230]) -Greedy action tensor([ 1.0091, -0.5842, -0.0906, -0.4410]) tensor([0.5647, 0.1148, 0.1880, 0.1325]) -Greedy action tensor([ 0.9163, -0.4260, -0.0198, -0.5845]) tensor([0.5330, 0.1392, 0.2090, 0.1188]) -Greedy action tensor([ 0.1961, 0.0960, -0.1088, -0.3729]) tensor([0.3117, 0.2820, 0.2298, 0.1765]) -Greedy action tensor([ 0.1346, 0.0878, -0.1130, -0.3176]) tensor([0.2966, 0.2831, 0.2316, 0.1887]) -Greedy action tensor([ 0.7407, -0.6628, -0.0660, -0.2695]) tensor([0.4863, 0.1195, 0.2171, 0.1771]) -Greedy action tensor([ 0.7100, -0.5180, -0.0417, -0.2881]) tensor([0.4688, 0.1373, 0.2211, 0.1728]) -Greedy action tensor([ 1.0624, -0.9451, 0.0990, -0.5982]) tensor([0.5862, 0.0787, 0.2237, 0.1114]) -Greedy action tensor([ 0.4719, 0.0960, -0.0176, -0.1263]) tensor([0.3509, 0.2410, 0.2151, 0.1930]) -Greedy action tensor([ 0.4833, -0.1174, -0.1645, -0.2728]) tensor([0.3935, 0.2158, 0.2059, 0.1848]) -Greedy action tensor([ 0.3514, -0.2572, -0.0235, -0.1937]) tensor([0.3557, 0.1935, 0.2445, 0.2062]) -Greedy action tensor([ 0.5899, -0.4341, -0.1240, -0.3314]) tensor([0.4451, 0.1598, 0.2179, 0.1771]) -Greedy action tensor([ 0.4331, -0.3277, -0.1202, -0.0320]) tensor([0.3745, 0.1750, 0.2153, 0.2352]) -Greedy action tensor([ 0.5994, -0.4745, -0.1975, -0.1117]) tensor([0.4379, 0.1496, 0.1974, 0.2151]) -Greedy action tensor([ 0.3741, -0.2852, -0.1013, -0.1376]) tensor([0.3652, 0.1889, 0.2270, 0.2189]) -Greedy action tensor([ 1.8943e+00, -5.0783e-02, -1.8242e-03, -2.1272e-02]) tensor([0.6943, 0.0993, 0.1042, 0.1022]) -Greedy action tensor([ 1.2991, -0.3925, -0.2762, 0.1596]) tensor([0.5844, 0.1077, 0.1209, 0.1870]) -Greedy action tensor([ 1.6720, -0.2002, -0.5615, 0.1343]) tensor([0.6776, 0.1042, 0.0726, 0.1456]) -Greedy action tensor([ 1.0704, -0.5054, -0.1550, 0.5903]) tensor([0.4719, 0.0976, 0.1386, 0.2920]) -Greedy action tensor([ 1.2148, -0.2768, -0.1077, -0.0911]) tensor([0.5674, 0.1277, 0.1512, 0.1537]) -Greedy action tensor([ 0.8452, -0.2675, -0.0595, 0.0975]) tensor([0.4532, 0.1489, 0.1834, 0.2146]) -Greedy action tensor([ 1.5602e+00, -3.8749e-01, -4.3996e-01, 7.9679e-04]) tensor([0.6720, 0.0958, 0.0909, 0.1413]) -Greedy action tensor([ 1.7220, -0.4093, -0.1080, 0.0718]) tensor([0.6798, 0.0807, 0.1090, 0.1305]) -Greedy action tensor([ 1.4488, -0.4243, -0.1169, 0.4856]) tensor([0.5733, 0.0881, 0.1198, 0.2188]) -Greedy action tensor([ 2.0211, -0.2063, -0.6480, 0.5245]) tensor([0.7138, 0.0769, 0.0495, 0.1598]) -Greedy action tensor([ 1.4381, -0.2220, -1.0322, 0.4382]) tensor([0.6088, 0.1157, 0.0515, 0.2240]) -Greedy action tensor([ 1.0549, -0.3099, 0.0908, 0.0859]) tensor([0.4960, 0.1267, 0.1891, 0.1882]) -Greedy action tensor([ 1.2512, -0.7236, -0.4273, 0.6489]) tensor([0.5339, 0.0741, 0.0997, 0.2923]) -Greedy action tensor([ 1.6630, -0.0602, -0.4802, 0.0402]) tensor([0.6697, 0.1196, 0.0785, 0.1322]) -Greedy action tensor([ 1.6759, -0.5404, -0.2513, 0.7639]) tensor([0.6038, 0.0658, 0.0879, 0.2425]) -Greedy action tensor([ 1.3955, -0.5486, -0.3378, 0.5668]) tensor([0.5693, 0.0815, 0.1006, 0.2486]) -Greedy action tensor([ 1.3516, -0.4520, -0.4735, -0.0337]) tensor([0.6345, 0.1045, 0.1023, 0.1588]) -Greedy action tensor([ 1.4484, -0.4274, -0.6211, 0.5001]) tensor([0.5999, 0.0919, 0.0757, 0.2324]) -Greedy action tensor([ 1.2660, -0.3882, -0.3485, 0.4168]) tensor([0.5501, 0.1052, 0.1095, 0.2353]) -Greedy action tensor([ 1.0612, -0.0359, -0.8140, -0.0502]) tensor([0.5506, 0.1838, 0.0844, 0.1812]) -Greedy action tensor([ 1.6721, -0.0607, -0.1725, 0.5879]) tensor([0.5977, 0.1057, 0.0945, 0.2021]) -Greedy action tensor([ 1.3727, -0.7842, -0.2106, 0.0160]) tensor([0.6335, 0.0733, 0.1301, 0.1631]) -Greedy action tensor([ 1.3833, -0.6591, -0.3443, 0.0512]) tensor([0.6364, 0.0825, 0.1131, 0.1680]) -Greedy action tensor([ 1.6877, -0.1171, -0.7994, 0.4670]) tensor([0.6482, 0.1066, 0.0539, 0.1912]) -Greedy action tensor([ 1.3967, 0.1465, -0.7658, 0.3757]) tensor([0.5676, 0.1626, 0.0653, 0.2045]) -Greedy action tensor([ 1.5212, -0.7514, -0.3049, 0.6563]) tensor([0.5934, 0.0611, 0.0956, 0.2499]) -Greedy action tensor([ 1.6069, -0.7048, -0.3371, 0.3660]) tensor([0.6530, 0.0647, 0.0935, 0.1888]) -Greedy action tensor([ 1.9208, -1.2116, -0.1216, 1.0300]) tensor([0.6315, 0.0275, 0.0819, 0.2591]) -Greedy action tensor([ 1.4182, -0.5655, 0.0248, 0.6725]) tensor([0.5376, 0.0739, 0.1334, 0.2550]) -Greedy action tensor([ 1.4217, -0.0875, -0.9617, 0.0139]) tensor([0.6419, 0.1419, 0.0592, 0.1570]) -Greedy action tensor([ 1.8523, -0.3203, -0.8497, 0.5792]) tensor([0.6845, 0.0779, 0.0459, 0.1916]) -Greedy action tensor([ 1.9345, -0.8554, -0.1579, 0.5359]) tensor([0.6984, 0.0429, 0.0862, 0.1725]) -Greedy action tensor([ 1.8491, -0.4480, -0.1361, 0.6249]) tensor([0.6528, 0.0656, 0.0897, 0.1919]) -Greedy action tensor([ 1.4769, -0.5348, -0.6156, 0.2564]) tensor([0.6442, 0.0862, 0.0795, 0.1901]) -Greedy action tensor([ 1.3277, 0.6873, -0.1118, 0.5222]) tensor([0.4523, 0.2384, 0.1072, 0.2021]) -Greedy action tensor([ 1.0853, -0.5226, 0.0717, -0.1984]) tensor([0.5434, 0.1088, 0.1972, 0.1505]) -Greedy action tensor([ 1.8519, 0.0351, -0.4695, 0.4199]) tensor([0.6669, 0.1084, 0.0654, 0.1593]) -Greedy action tensor([ 1.3417, -0.2948, -0.3821, 0.2812]) tensor([0.5816, 0.1132, 0.1038, 0.2014]) -Greedy action tensor([ 1.6041, -0.5108, -0.4880, 0.3824]) tensor([0.6499, 0.0784, 0.0802, 0.1915]) -Greedy action tensor([ 1.3731, -0.0130, -0.6683, 0.3507]) tensor([0.5748, 0.1437, 0.0746, 0.2068]) -Greedy action tensor([ 2.1097, -1.0567, -0.3941, 0.8111]) tensor([0.7159, 0.0302, 0.0585, 0.1954]) -Greedy action tensor([ 1.7568, -1.0053, -0.5594, -0.2168]) tensor([0.7688, 0.0486, 0.0758, 0.1068]) -Greedy action tensor([ 2.1216, -0.3983, -0.6314, 0.3632]) tensor([0.7596, 0.0611, 0.0484, 0.1309]) -Greedy action tensor([ 1.4748, -0.7562, -0.5161, 0.6185]) tensor([0.5993, 0.0644, 0.0818, 0.2545]) -Greedy action tensor([2.2446, 0.4747, 0.1210, 0.2746]) tensor([0.6996, 0.1192, 0.0837, 0.0976]) -Greedy action tensor([ 1.5450, -0.4653, -0.5317, 0.3354]) tensor([0.6420, 0.0860, 0.0805, 0.1915]) -Greedy action tensor([ 1.7392, 0.3821, -0.9270, 0.0682]) tensor([0.6601, 0.1699, 0.0459, 0.1241]) -Greedy action tensor([ 1.3925, -0.2729, -0.4628, 0.3360]) tensor([0.5906, 0.1117, 0.0924, 0.2053]) -Greedy action tensor([ 1.5365, -0.5328, -0.3106, 0.6230]) tensor([0.5934, 0.0749, 0.0936, 0.2380]) -Greedy action tensor([ 1.6647, -0.3213, -0.7487, 0.5584]) tensor([0.6420, 0.0881, 0.0575, 0.2124]) -Greedy action tensor([2.5053, 0.4998, 0.3031, 0.1987]) tensor([0.7436, 0.1001, 0.0822, 0.0741]) -Greedy action tensor([ 1.2503, -0.5002, -0.2422, 0.2133]) tensor([0.5704, 0.0991, 0.1282, 0.2022]) -Greedy action tensor([ 1.6091, 0.4187, -0.4857, 0.3154]) tensor([0.5877, 0.1787, 0.0723, 0.1612]) -Greedy action tensor([ 1.4326, -0.6026, -0.1865, 0.5867]) tensor([0.5689, 0.0743, 0.1127, 0.2441]) -Greedy action tensor([ 1.8237, -0.7318, -0.3344, 0.6754]) tensor([0.6621, 0.0514, 0.0765, 0.2100]) -Greedy action tensor([ 1.8349, -1.0548, -0.2193, 0.6008]) tensor([0.6780, 0.0377, 0.0869, 0.1974]) -Greedy action tensor([ 1.8319, -0.2216, -0.0536, 0.5615]) tensor([0.6407, 0.0822, 0.0972, 0.1799]) -Greedy action tensor([ 1.7826, -0.9791, -0.0821, 0.2361]) tensor([0.6987, 0.0442, 0.1083, 0.1488]) -Greedy action tensor([ 1.5199, -0.3295, -0.6083, 0.5950]) tensor([0.5978, 0.0940, 0.0712, 0.2370]) -Greedy action tensor([ 1.1994, -0.6953, -0.4285, 0.7983]) tensor([0.4960, 0.0746, 0.0974, 0.3321]) -Greedy action tensor([ 1.8483, -0.8137, -0.5059, 0.4560]) tensor([0.7076, 0.0494, 0.0672, 0.1758]) -Greedy action tensor([ 1.2809, -0.3093, -0.5842, 0.1932]) tensor([0.5897, 0.1202, 0.0913, 0.1987]) -Greedy action tensor([ 1.7168, 0.0554, -1.0562, -0.1803]) tensor([0.7131, 0.1354, 0.0445, 0.1070]) -Greedy action tensor([ 0.7173, -0.5407, 0.0321, 0.1046]) tensor([0.4292, 0.1220, 0.2163, 0.2326]) -Greedy action tensor([ 1.3760, -0.4885, -0.4732, 0.1879]) tensor([0.6184, 0.0958, 0.0973, 0.1885]) -Greedy action tensor([2.5619, 0.4987, 0.1772, 0.4288]) tensor([0.7476, 0.0950, 0.0689, 0.0886]) -Greedy action tensor([ 1.7323, 0.0261, -0.6873, 0.6007]) tensor([0.6277, 0.1140, 0.0558, 0.2025]) -Greedy action tensor([ 1.1782, -0.7555, -0.2976, 0.1528]) tensor([0.5774, 0.0835, 0.1320, 0.2071]) -Greedy action tensor([ 1.5044, 0.3762, -0.4611, 0.5001]) tensor([0.5464, 0.1768, 0.0765, 0.2002]) -Greedy action tensor([ 1.5158, -0.5546, 0.0537, 0.5362]) tensor([0.5769, 0.0728, 0.1337, 0.2166]) -Greedy action tensor([ 2.3370, -0.7101, -0.2822, 0.6465]) tensor([0.7664, 0.0364, 0.0558, 0.1414]) -Greedy action tensor([ 1.4026, -0.3448, -0.0475, 0.4118]) tensor([0.5618, 0.0979, 0.1318, 0.2086]) -Greedy action tensor([ 1.5286, -0.3428, -0.7927, 0.3994]) tensor([0.6348, 0.0977, 0.0623, 0.2052]) -Greedy action tensor([ 1.6761, -0.7182, -0.4124, 0.3805]) tensor([0.6717, 0.0613, 0.0832, 0.1839]) -Greedy action tensor([ 1.3965e+00, -2.7842e-01, 3.6055e-04, 6.5813e-01]) tensor([0.5228, 0.0979, 0.1294, 0.2498]) -Greedy action tensor([ 0.8962, -0.4027, -0.5700, 0.2993]) tensor([0.4868, 0.1328, 0.1124, 0.2680]) -Greedy action tensor([-0.0607, -0.3493, -0.2774, 0.2210]) tensor([0.2577, 0.1931, 0.2075, 0.3416]) -Greedy action tensor([ 1.4670, -0.6186, -0.2561, 0.3937]) tensor([0.6081, 0.0755, 0.1085, 0.2079]) -Greedy action tensor([ 1.7032, -0.2575, -0.2589, 0.2423]) tensor([0.6608, 0.0930, 0.0929, 0.1533]) -Greedy action tensor([ 1.8701, -0.4115, -0.3226, 0.5038]) tensor([0.6808, 0.0695, 0.0760, 0.1736]) -Greedy action tensor([ 1.7066, -0.4634, -0.4011, 0.2684]) tensor([0.6789, 0.0775, 0.0825, 0.1611]) -Greedy action tensor([-1.4388, -0.4322, 0.5547, 0.3189]) tensor([0.0593, 0.1621, 0.4350, 0.3436]) -Greedy action tensor([-1.7088, 0.1950, 0.4734, 0.0105]) tensor([0.0451, 0.3029, 0.4001, 0.2519]) -Greedy action tensor([-1.8486, -0.4535, 0.6482, -0.1001]) tensor([0.0436, 0.1760, 0.5297, 0.2506]) -Greedy action tensor([-1.9038, -0.4341, 0.6444, -0.1653]) tensor([0.0420, 0.1825, 0.5367, 0.2388]) -Greedy action tensor([-1.7845, 0.0465, 0.4957, -0.0849]) tensor([0.0445, 0.2774, 0.4348, 0.2433]) -Greedy action tensor([-1.9054, -0.4547, 0.6517, -0.1594]) tensor([0.0418, 0.1785, 0.5398, 0.2399]) -Greedy action tensor([-1.7581, -0.4883, 0.6236, -0.1410]) tensor([0.0490, 0.1743, 0.5300, 0.2467]) -Greedy action tensor([-1.5576, -0.4478, 0.6714, 0.3197]) tensor([0.0504, 0.1527, 0.4678, 0.3291]) -Greedy action tensor([-1.8692, -0.2809, 0.6163, -0.1935]) tensor([0.0430, 0.2106, 0.5166, 0.2298]) -Greedy action tensor([-1.3574, -0.5963, 0.3746, 0.0676]) tensor([0.0772, 0.1653, 0.4364, 0.3211]) -Greedy action tensor([-1.9238, -0.4370, 0.6564, -0.1690]) tensor([0.0410, 0.1812, 0.5409, 0.2369]) -Greedy action tensor([-1.7021, -0.3556, 0.7411, 0.2711]) tensor([0.0425, 0.1632, 0.4888, 0.3055]) -Greedy action tensor([-1.8529, -0.3408, 0.6229, -0.1208]) tensor([0.0433, 0.1965, 0.5152, 0.2449]) -Greedy action tensor([-1.9376, -0.4416, 0.6637, -0.1756]) tensor([0.0404, 0.1802, 0.5443, 0.2351]) -Greedy action tensor([-1.9073, -0.4527, 0.6540, -0.1590]) tensor([0.0417, 0.1786, 0.5401, 0.2396]) -Greedy action tensor([-1.1495, 0.0191, 0.1951, -0.0207]) tensor([0.0897, 0.2887, 0.3442, 0.2774]) -Greedy action tensor([-0.7596, -0.5226, 0.2066, 0.2877]) tensor([0.1291, 0.1636, 0.3393, 0.3680]) -Greedy action tensor([-1.8279, -0.4659, 0.6177, -0.1205]) tensor([0.0455, 0.1778, 0.5255, 0.2512]) -Greedy action tensor([-1.9155, -0.3920, 0.6503, -0.1631]) tensor([0.0410, 0.1883, 0.5340, 0.2367]) -Greedy action tensor([-0.6329, -0.3282, 0.2359, 0.0227]) tensor([0.1500, 0.2034, 0.3576, 0.2889]) -Greedy action tensor([-1.8279, -0.4559, 0.6045, -0.1286]) tensor([0.0459, 0.1809, 0.5223, 0.2509]) -Greedy action tensor([-1.4344, 0.5174, 0.3118, -0.0391]) tensor([0.0561, 0.3954, 0.3219, 0.2266]) -Greedy action tensor([-1.8823, -0.3464, 0.6104, -0.1373]) tensor([0.0426, 0.1980, 0.5154, 0.2440]) -Greedy action tensor([-0.5296, -0.5400, 0.2611, 0.3374]) tensor([0.1521, 0.1505, 0.3354, 0.3620]) -Greedy action tensor([-1.5513, -0.1421, 0.4885, 0.3114]) tensor([0.0520, 0.2129, 0.4000, 0.3351]) -Greedy action tensor([-1.9134, -0.4495, 0.6496, -0.1649]) tensor([0.0416, 0.1798, 0.5396, 0.2390]) -Greedy action tensor([-1.8298, -0.4749, 0.6127, -0.1108]) tensor([0.0455, 0.1765, 0.5238, 0.2541]) -Greedy action tensor([-1.9306, -0.4342, 0.6605, -0.1725]) tensor([0.0406, 0.1814, 0.5422, 0.2357]) -Greedy action tensor([-1.2035, 0.2048, 0.1481, 0.2570]) tensor([0.0754, 0.3084, 0.2914, 0.3249]) -Greedy action tensor([-1.9244, -0.4082, 0.6540, -0.1607]) tensor([0.0407, 0.1854, 0.5364, 0.2375]) -Greedy action tensor([-1.8926, -0.4515, 0.6616, -0.1510]) tensor([0.0420, 0.1776, 0.5406, 0.2398]) -Greedy action tensor([-1.6112, -0.5161, 0.4799, 0.0853]) tensor([0.0570, 0.1705, 0.4615, 0.3110]) -Greedy action tensor([-1.8800, -0.4581, 0.6519, -0.1083]) tensor([0.0424, 0.1756, 0.5329, 0.2492]) -Greedy action tensor([-1.8612, -0.4180, 0.6217, -0.1451]) tensor([0.0439, 0.1859, 0.5259, 0.2443]) -Greedy action tensor([-1.8522, -0.4345, 0.6110, -0.1410]) tensor([0.0446, 0.1842, 0.5241, 0.2471]) -Greedy action tensor([-0.6467, 0.5042, 0.1187, 0.0132]) tensor([0.1213, 0.3834, 0.2607, 0.2346]) -Greedy action tensor([-1.4532, 0.4645, 0.3731, 0.2149]) tensor([0.0518, 0.3523, 0.3215, 0.2745]) -Greedy action tensor([-1.6509, -0.2806, 0.5535, 0.0412]) tensor([0.0515, 0.2026, 0.4665, 0.2795]) -Greedy action tensor([-1.0218, 0.9441, 0.1136, 0.4402]) tensor([0.0642, 0.4587, 0.1999, 0.2771]) -Greedy action tensor([-1.9301, -0.4312, 0.6588, -0.1689]) tensor([0.0406, 0.1819, 0.5410, 0.2365]) -Greedy action tensor([-1.8942, -0.2329, 0.6091, -0.1452]) tensor([0.0413, 0.2173, 0.5043, 0.2372]) -Greedy action tensor([-1.9299, -0.4579, 0.6618, -0.1680]) tensor([0.0408, 0.1776, 0.5442, 0.2374]) -Greedy action tensor([-1.5099, -0.5917, 0.4520, 0.0654]) tensor([0.0647, 0.1621, 0.4604, 0.3128]) -Greedy action tensor([-1.8783, -0.3921, 0.6456, -0.1475]) tensor([0.0425, 0.1878, 0.5300, 0.2398]) -Greedy action tensor([-1.9212, -0.4697, 0.6733, -0.1677]) tensor([0.0409, 0.1747, 0.5480, 0.2363]) -Greedy action tensor([-1.9270, -0.4578, 0.6752, -0.1673]) tensor([0.0406, 0.1763, 0.5474, 0.2357]) -Greedy action tensor([-1.3069, -0.3128, 0.3469, 0.1314]) tensor([0.0761, 0.2056, 0.3977, 0.3206]) -Greedy action tensor([-0.6393, 0.5625, 0.0482, 0.0387]) tensor([0.1207, 0.4015, 0.2400, 0.2378]) -Greedy action tensor([-1.9420, -0.4483, 0.6671, -0.1775]) tensor([0.0402, 0.1790, 0.5461, 0.2347]) -Greedy action tensor([-1.8896, -0.3934, 0.6381, -0.1475]) tensor([0.0422, 0.1884, 0.5285, 0.2409]) -Greedy action tensor([-0.9994, 0.8912, 0.1048, 0.2741]) tensor([0.0704, 0.4660, 0.2122, 0.2514]) -Greedy action tensor([-1.4409, 0.2109, 0.4201, 0.1066]) tensor([0.0577, 0.3007, 0.3707, 0.2709]) -Greedy action tensor([-1.8645, -0.4452, 0.6241, -0.1371]) tensor([0.0439, 0.1813, 0.5282, 0.2467]) -Greedy action tensor([-1.8353, -0.2704, 0.5901, -0.0927]) tensor([0.0439, 0.2097, 0.4959, 0.2505]) -Greedy action tensor([-1.3996, 0.7378, 0.2534, 0.2020]) tensor([0.0509, 0.4312, 0.2656, 0.2523]) -Greedy action tensor([-1.7373, -0.0782, 0.5197, -0.0486]) tensor([0.0471, 0.2476, 0.4502, 0.2550]) -Greedy action tensor([-1.8063, -0.4741, 0.6015, -0.0985]) tensor([0.0467, 0.1770, 0.5187, 0.2576]) -Greedy action tensor([-1.9175, -0.4485, 0.6571, -0.1655]) tensor([0.0413, 0.1793, 0.5416, 0.2379]) -Greedy action tensor([-1.8449, -0.4297, 0.6162, -0.1367]) tensor([0.0447, 0.1842, 0.5242, 0.2469]) -Greedy action tensor([-1.8917, -0.4352, 0.6382, -0.1611]) tensor([0.0426, 0.1827, 0.5344, 0.2403]) -Greedy action tensor([-0.7288, -0.0016, 0.2295, -0.1445]) tensor([0.1339, 0.2770, 0.3490, 0.2401]) -Greedy action tensor([-1.7753, -0.2774, 0.6006, -0.0456]) tensor([0.0457, 0.2045, 0.4920, 0.2578]) -Greedy action tensor([-1.6743, 0.0944, 0.5104, 0.0784]) tensor([0.0465, 0.2724, 0.4130, 0.2681]) -Greedy action tensor([-2.0088, -0.9092, 0.5210, -0.3126]) tensor([0.0454, 0.1365, 0.5703, 0.2478]) -Greedy action tensor([-0.7988, 0.3220, -0.1897, 0.1546]) tensor([0.1176, 0.3608, 0.2163, 0.3052]) -Greedy action tensor([-0.5275, 1.0300, 0.0218, 0.5692]) tensor([0.0955, 0.4532, 0.1654, 0.2859]) -Greedy action tensor([-1.3095, -0.1960, 0.3026, 0.2403]) tensor([0.0726, 0.2212, 0.3641, 0.3421]) -Greedy action tensor([-0.8356, 0.9428, 0.0943, 0.2362]) tensor([0.0808, 0.4784, 0.2048, 0.2360]) -Greedy action tensor([-1.8822, -0.4451, 0.6241, -0.1426]) tensor([0.0432, 0.1817, 0.5293, 0.2459]) -Greedy action tensor([-1.5048, -0.2419, 0.6857, 0.3494]) tensor([0.0503, 0.1780, 0.4501, 0.3216]) -Greedy action tensor([-1.9015, -0.4498, 0.6414, -0.1488]) tensor([0.0421, 0.1798, 0.5353, 0.2429]) -Greedy action tensor([-1.2894, -0.5819, 0.3297, 0.1620]) tensor([0.0810, 0.1643, 0.4089, 0.3458]) -Greedy action tensor([-1.7066, 0.0832, 0.4791, -0.0556]) tensor([0.0474, 0.2838, 0.4217, 0.2471]) -Greedy action tensor([-1.8390, -0.2757, 0.5800, -0.1333]) tensor([0.0444, 0.2121, 0.4990, 0.2445]) -Greedy action tensor([-1.9183, -0.4040, 0.6453, -0.1666]) tensor([0.0412, 0.1871, 0.5344, 0.2373]) -Greedy action tensor([-1.7233, -0.4796, 0.5617, -0.1148]) tensor([0.0518, 0.1798, 0.5094, 0.2590]) -Greedy action tensor([-1.9070, -0.4346, 0.6474, -0.1614]) tensor([0.0417, 0.1820, 0.5370, 0.2392]) -Greedy action tensor([-1.4538, -0.1060, 0.4922, -0.5432]) tensor([0.0698, 0.2685, 0.4884, 0.1734]) -Greedy action tensor([-1.9342, -0.4182, 0.6559, -0.1739]) tensor([0.0405, 0.1844, 0.5397, 0.2354]) -Greedy action tensor([-1.6630, 0.0137, 0.5032, 0.0186]) tensor([0.0489, 0.2616, 0.4267, 0.2628]) -Greedy action tensor([-1.9151, -0.4180, 0.6481, -0.1673]) tensor([0.0413, 0.1847, 0.5365, 0.2374]) -Greedy action tensor([-0.6630, -0.4474, -0.2002, -1.3391]) tensor([0.2305, 0.2860, 0.3662, 0.1172]) -Greedy action tensor([-0.4899, -1.0840, -0.1671, 0.6872]) tensor([0.1619, 0.0894, 0.2235, 0.5252]) -Greedy action tensor([ 0.5218, -0.4182, 1.2476, 0.1121]) tensor([0.2427, 0.0948, 0.5014, 0.1611]) -Greedy action tensor([ 0.2638, -0.1482, -1.0349, 0.2049]) tensor([0.3475, 0.2301, 0.0948, 0.3276]) -Greedy action tensor([ 1.6014, -0.7125, 1.2523, 1.3426]) tensor([0.3882, 0.0384, 0.2738, 0.2997]) -Greedy action tensor([-0.9735, -1.0998, 0.4311, -1.1990]) tensor([0.1481, 0.1305, 0.6033, 0.1182]) -Greedy action tensor([ 0.2587, -0.6497, -0.6113, 0.7230]) tensor([0.2930, 0.1181, 0.1228, 0.4661]) -Greedy action tensor([ 0.1842, 0.4183, -0.1502, -1.0181]) tensor([0.3049, 0.3853, 0.2182, 0.0916]) -Greedy action tensor([ 0.8422, -1.1677, 0.0057, 0.5129]) tensor([0.4373, 0.0586, 0.1895, 0.3146]) -Greedy action tensor([ 1.2839, -0.5459, 0.3491, 0.0353]) tensor([0.5435, 0.0872, 0.2134, 0.1559]) -Greedy action tensor([ 0.0618, -0.1511, 0.4019, -0.6195]) tensor([0.2689, 0.2173, 0.3778, 0.1360]) -Greedy action tensor([-0.0376, -0.6257, -0.0602, -0.9162]) tensor([0.3392, 0.1884, 0.3316, 0.1409]) -Greedy action tensor([-0.2256, -1.1751, 0.1866, -0.0342]) tensor([0.2434, 0.0942, 0.3676, 0.2948]) -Greedy action tensor([-0.5908, -0.8092, 1.1896, -0.9899]) tensor([0.1189, 0.0956, 0.7056, 0.0798]) -Greedy action tensor([-1.2872, -0.0505, 0.0119, -0.8980]) tensor([0.1043, 0.3593, 0.3824, 0.1540]) -Greedy action tensor([-0.4369, -1.8077, -0.1457, -0.9409]) tensor([0.3129, 0.0794, 0.4187, 0.1890]) -Greedy action tensor([ 1.0600, -0.2327, 1.0119, 0.0632]) tensor([0.3851, 0.1057, 0.3670, 0.1421]) -Greedy action tensor([ 0.6344, -0.5278, 0.7506, 0.2673]) tensor([0.3196, 0.1000, 0.3590, 0.2214]) -Greedy action tensor([ 0.5720, -0.5219, -0.1924, 1.1394]) tensor([0.2806, 0.0940, 0.1306, 0.4948]) -Greedy action tensor([ 1.6042, -0.6956, 0.6741, 1.1681]) tensor([0.4670, 0.0468, 0.1842, 0.3019]) -Greedy action tensor([-1.5639, -0.4021, 0.9696, -1.2168]) tensor([0.0549, 0.1755, 0.6919, 0.0777]) -Greedy action tensor([-0.0535, -0.8801, -0.2710, -0.0421]) tensor([0.3074, 0.1345, 0.2473, 0.3109]) -Greedy action tensor([ 0.4086, -0.5421, 1.7420, 0.1433]) tensor([0.1681, 0.0650, 0.6379, 0.1290]) -Greedy action tensor([ 0.2887, -1.0009, -0.9085, -0.0841]) tensor([0.4412, 0.1215, 0.1333, 0.3040]) -Greedy action tensor([-0.8278, -1.4895, -0.6032, 0.3534]) tensor([0.1660, 0.0856, 0.2077, 0.5407]) -Greedy action tensor([-0.3793, 0.3615, -0.6129, 0.0749]) tensor([0.1830, 0.3839, 0.1449, 0.2882]) -Greedy action tensor([ 1.0226, -0.5656, 1.0181, -1.1746]) tensor([0.4327, 0.0884, 0.4308, 0.0481]) -Greedy action tensor([ 0.1262, -1.0436, -0.7300, 0.5915]) tensor([0.3005, 0.0933, 0.1276, 0.4785]) -Greedy action tensor([ 0.6525, 0.3666, 1.2384, -0.1560]) tensor([0.2504, 0.1881, 0.4499, 0.1116]) -Greedy action tensor([ 1.1134, -1.4360, 0.7415, 0.6392]) tensor([0.4184, 0.0327, 0.2885, 0.2604]) -Greedy action tensor([ 0.9369, 0.2347, -0.2604, -0.2890]) tensor([0.4782, 0.2370, 0.1444, 0.1404]) -Greedy action tensor([ 1.2681, -0.7651, 1.1235, -1.0733]) tensor([0.4779, 0.0626, 0.4136, 0.0460]) -Greedy action tensor([-0.9265, -0.7432, -1.1258, 0.1720]) tensor([0.1661, 0.1995, 0.1361, 0.4983]) -Greedy action tensor([-0.3134, -1.0402, 0.1796, -0.3367]) tensor([0.2440, 0.1180, 0.3995, 0.2384]) -Greedy action tensor([-1.1802, -0.4124, 0.1862, -0.4718]) tensor([0.1098, 0.2366, 0.4306, 0.2230]) -Greedy action tensor([-0.8072, -0.8309, 0.6586, -0.3212]) tensor([0.1260, 0.1231, 0.5459, 0.2049]) -Greedy action tensor([-0.1868, -1.1123, -0.0261, -0.0176]) tensor([0.2663, 0.1055, 0.3127, 0.3154]) -Greedy action tensor([ 0.9021, -1.7564, -0.3451, 0.1573]) tensor([0.5458, 0.0382, 0.1568, 0.2592]) -Greedy action tensor([ 0.1236, 0.2502, 0.6607, -0.5407]) tensor([0.2293, 0.2603, 0.3924, 0.1180]) -Greedy action tensor([ 0.9711, -0.3855, -0.6867, -0.3316]) tensor([0.5814, 0.1497, 0.1108, 0.1580]) -Greedy action tensor([-0.2182, -0.8177, -0.5970, 0.4545]) tensor([0.2385, 0.1309, 0.1633, 0.4673]) -Greedy action tensor([0.8512, 0.3288, 0.3101, 0.3833]) tensor([0.3569, 0.2117, 0.2078, 0.2236]) -Greedy action tensor([ 0.4545, -1.0766, 0.3405, 1.0841]) tensor([0.2509, 0.0543, 0.2239, 0.4709]) -Greedy action tensor([ 0.1529, -1.1861, 0.4017, 0.2195]) tensor([0.2768, 0.0725, 0.3549, 0.2958]) -Greedy action tensor([-0.5341, -1.0345, 0.2699, -1.0054]) tensor([0.2240, 0.1358, 0.5005, 0.1398]) -Greedy action tensor([ 0.2409, -0.3539, -0.4277, 0.5602]) tensor([0.2907, 0.1604, 0.1489, 0.4000]) -Greedy action tensor([-0.1667, -1.4894, -0.5461, -0.9152]) tensor([0.4126, 0.1099, 0.2823, 0.1952]) -Greedy action tensor([-1.5689, 0.3891, -0.1947, -0.3666]) tensor([0.0651, 0.4611, 0.2572, 0.2166]) -Greedy action tensor([-0.6638, 0.1984, 0.1673, -0.3926]) tensor([0.1434, 0.3395, 0.3291, 0.1880]) -Greedy action tensor([-0.4689, -0.9635, 0.3198, -0.6518]) tensor([0.2154, 0.1313, 0.4739, 0.1794]) -Greedy action tensor([ 0.1542, -0.4429, 0.4010, -0.7001]) tensor([0.3071, 0.1690, 0.3931, 0.1307]) -Greedy action tensor([-0.8972, -0.2434, 0.2912, -0.8544]) tensor([0.1380, 0.2653, 0.4528, 0.1440]) -Greedy action tensor([ 0.3545, -0.9426, 0.0329, 0.5284]) tensor([0.3137, 0.0857, 0.2274, 0.3732]) -Greedy action tensor([-0.3094, 0.2966, 0.0520, -0.4719]) tensor([0.1954, 0.3581, 0.2804, 0.1661]) -Greedy action tensor([-1.3496, 0.6361, 0.4395, -0.5199]) tensor([0.0604, 0.4398, 0.3613, 0.1384]) -Greedy action tensor([-0.2165, -1.1082, 0.6537, -0.2492]) tensor([0.2099, 0.0860, 0.5010, 0.2031]) -Greedy action tensor([ 1.0936, -1.2190, 0.7104, 0.8518]) tensor([0.3897, 0.0386, 0.2657, 0.3060]) -Greedy action tensor([ 1.6497, -1.0552, 0.7811, 0.3704]) tensor([0.5667, 0.0379, 0.2377, 0.1577]) -Greedy action tensor([-0.4511, -0.0433, -0.5320, -0.7979]) tensor([0.2420, 0.3638, 0.2232, 0.1711]) -Greedy action tensor([-0.2646, -0.8020, 0.7640, -0.3999]) tensor([0.1903, 0.1112, 0.5323, 0.1662]) -Greedy action tensor([ 0.6835, -0.3534, 1.1912, -0.0192]) tensor([0.2848, 0.1010, 0.4732, 0.1410]) -Greedy action tensor([ 0.7234, -0.9564, 0.3238, -0.3928]) tensor([0.4578, 0.0853, 0.3070, 0.1499]) -Greedy action tensor([-0.5758, -0.1121, -0.2995, -0.1574]) tensor([0.1842, 0.2929, 0.2429, 0.2800]) -Greedy action tensor([ 1.0546, -1.5283, 0.6769, 0.6795]) tensor([0.4085, 0.0309, 0.2800, 0.2807]) -Greedy action tensor([ 0.4484, -1.1309, -0.2647, 0.8245]) tensor([0.3172, 0.0654, 0.1555, 0.4620]) -Greedy action tensor([-0.2262, -0.3684, -0.1493, 0.0667]) tensor([0.2332, 0.2023, 0.2519, 0.3126]) -Greedy action tensor([ 0.2058, -0.0263, -0.8741, -0.6202]) tensor([0.3891, 0.3085, 0.1321, 0.1703]) -Greedy action tensor([ 1.2593, -0.3737, 0.2962, 0.8071]) tensor([0.4518, 0.0883, 0.1725, 0.2874]) -Greedy action tensor([ 0.5996, -1.3133, -0.6819, 0.0891]) tensor([0.4937, 0.0729, 0.1371, 0.2963]) -Greedy action tensor([-1.2902, -0.5180, 0.0511, -1.0061]) tensor([0.1202, 0.2602, 0.4598, 0.1597]) -Greedy action tensor([-0.3322, -0.5168, 1.2385, -0.6174]) tensor([0.1353, 0.1125, 0.6506, 0.1017]) -Greedy action tensor([ 0.4000, -0.3527, 0.1878, 0.3673]) tensor([0.3079, 0.1451, 0.2490, 0.2980]) -Greedy action tensor([ 1.1346, 0.1150, -0.0760, -0.0286]) tensor([0.5073, 0.1830, 0.1512, 0.1585]) -Greedy action tensor([-1.3203, -0.0126, 0.8028, -0.8230]) tensor([0.0680, 0.2516, 0.5686, 0.1119]) -Greedy action tensor([-0.7619, -0.5911, 0.1056, -0.7642]) tensor([0.1797, 0.2132, 0.4279, 0.1793]) -Greedy action tensor([ 0.1958, -0.8272, 1.0264, -0.0356]) tensor([0.2248, 0.0808, 0.5159, 0.1784]) -Greedy action tensor([-0.4123, 1.0753, 0.4787, -0.3716]) tensor([0.1123, 0.4970, 0.2737, 0.1170]) -Greedy action tensor([-0.1866, -0.4603, 1.1765, -0.5529]) tensor([0.1572, 0.1195, 0.6143, 0.1090]) -Greedy action tensor([-0.2081, -1.7024, -0.3975, 0.3261]) tensor([0.2661, 0.0597, 0.2202, 0.4540]) -Greedy action tensor([ 0.4734, -0.7523, 1.3014, -0.0780]) tensor([0.2405, 0.0706, 0.5504, 0.1385]) -Greedy action tensor([-0.9097, 0.1551, -0.1990, -0.0241]) tensor([0.1196, 0.3469, 0.2435, 0.2900]) -Greedy action tensor([ 0.4238, -0.1397, -0.1587, -0.2118]) tensor([0.3763, 0.2142, 0.2102, 0.1993]) -Greedy action tensor([ 0.9156, -0.2462, -0.0384, -0.4346]) tensor([0.5109, 0.1599, 0.1968, 0.1324]) -Greedy action tensor([ 0.8409, -0.3587, -0.0638, -0.4920]) tensor([0.5077, 0.1530, 0.2054, 0.1339]) -Greedy action tensor([ 0.4924, -0.0753, 0.0499, -0.0642]) tensor([0.3594, 0.2037, 0.2309, 0.2060]) -Greedy action tensor([ 0.4685, -0.4091, -0.0311, -0.4828]) tensor([0.4152, 0.1726, 0.2519, 0.1603]) -Greedy action tensor([ 0.7726, -0.6116, -0.0476, -0.3170]) tensor([0.4933, 0.1236, 0.2172, 0.1659]) -Greedy action tensor([ 0.8764, -0.3671, 0.0394, -0.1959]) tensor([0.4846, 0.1397, 0.2098, 0.1658]) -Greedy action tensor([ 0.8449, -0.4881, -0.1144, -0.3317]) tensor([0.5115, 0.1349, 0.1960, 0.1577]) -Greedy action tensor([ 0.7751, -0.7027, -0.0628, -0.2804]) tensor([0.4978, 0.1136, 0.2154, 0.1732]) -Greedy action tensor([ 0.5307, -0.6649, -0.1303, -0.1664]) tensor([0.4316, 0.1306, 0.2229, 0.2149]) -Greedy action tensor([ 0.7409, -0.5237, -0.1095, -0.5463]) tensor([0.5036, 0.1422, 0.2152, 0.1390]) -Greedy action tensor([ 0.8274, -0.6196, -0.0699, -0.6472]) tensor([0.5342, 0.1257, 0.2178, 0.1223]) -Greedy action tensor([ 0.3157, 0.2527, -0.2405, 0.1828]) tensor([0.2952, 0.2772, 0.1692, 0.2584]) -Greedy action tensor([ 1.0612, -0.3323, -0.0480, -0.2302]) tensor([0.5397, 0.1340, 0.1780, 0.1484]) -Greedy action tensor([ 0.8946, -0.3170, -0.0901, -0.1345]) tensor([0.4930, 0.1468, 0.1841, 0.1761]) -Greedy action tensor([ 0.5381, -0.2542, 0.0085, -0.2986]) tensor([0.4041, 0.1830, 0.2379, 0.1750]) -Greedy action tensor([ 0.4549, -0.2672, -0.1412, -0.1025]) tensor([0.3832, 0.1861, 0.2111, 0.2195]) -Greedy action tensor([ 0.7780, -1.0913, 0.1511, -0.6056]) tensor([0.5157, 0.0795, 0.2755, 0.1293]) -Greedy action tensor([ 0.9227, -0.7118, 0.0559, -0.3816]) tensor([0.5300, 0.1034, 0.2228, 0.1438]) -Greedy action tensor([ 0.7150, -0.0413, -0.1148, -0.0731]) tensor([0.4237, 0.1989, 0.1848, 0.1926]) -Greedy action tensor([ 0.7766, -0.1647, -0.1306, -0.0743]) tensor([0.4503, 0.1757, 0.1818, 0.1923]) -Greedy action tensor([ 0.6963, -0.1443, -0.0237, -0.2616]) tensor([0.4344, 0.1874, 0.2114, 0.1667]) -Greedy action tensor([ 1.0964, -0.7029, -0.0089, -0.6362]) tensor([0.5976, 0.0989, 0.1979, 0.1057]) -Greedy action tensor([ 0.6269, -0.6824, 0.0265, -0.2146]) tensor([0.4445, 0.1200, 0.2439, 0.1916]) -Greedy action tensor([ 1.0177, -1.0091, 0.1209, -0.5624]) tensor([0.5729, 0.0755, 0.2336, 0.1180]) -Greedy action tensor([ 0.4651, -0.2310, -0.0992, 0.0042]) tensor([0.3707, 0.1848, 0.2108, 0.2338]) -Greedy action tensor([ 0.6982, -0.1889, -0.0541, -0.2123]) tensor([0.4376, 0.1802, 0.2062, 0.1760]) -Greedy action tensor([ 0.5952, -0.1318, -0.0126, -0.0573]) tensor([0.3924, 0.1897, 0.2137, 0.2043]) -Greedy action tensor([ 0.8762, -0.2517, -0.0105, -0.1074]) tensor([0.4740, 0.1534, 0.1953, 0.1773]) -Greedy action tensor([ 0.8971, -0.5484, -0.0901, -0.3588]) tensor([0.5283, 0.1245, 0.1968, 0.1504]) -Greedy action tensor([ 0.6386, -0.1125, -0.0332, -0.3947]) tensor([0.4276, 0.2018, 0.2184, 0.1522]) -Greedy action tensor([ 0.8959, -0.6124, -0.1337, -0.5011]) tensor([0.5477, 0.1212, 0.1956, 0.1355]) -Greedy action tensor([ 0.7410, -0.6988, 0.0594, -0.4717]) tensor([0.4902, 0.1162, 0.2479, 0.1458]) -Greedy action tensor([ 0.8154, -0.2548, -0.1076, -0.3088]) tensor([0.4842, 0.1661, 0.1924, 0.1573]) -Greedy action tensor([ 0.5912, -0.1665, -0.0812, -0.0122]) tensor([0.3959, 0.1856, 0.2021, 0.2165]) -Greedy action tensor([ 0.7666, -0.3341, -0.0097, -0.1679]) tensor([0.4575, 0.1522, 0.2105, 0.1797]) -Greedy action tensor([ 0.5528, -0.4419, 0.0172, -0.2020]) tensor([0.4123, 0.1525, 0.2413, 0.1938]) -Greedy action tensor([ 0.9646, -0.6951, -0.1074, -0.3387]) tensor([0.5543, 0.1054, 0.1897, 0.1506]) -Greedy action tensor([ 0.2596, 0.5147, -0.3453, -0.2006]) tensor([0.2884, 0.3722, 0.1575, 0.1820]) -Greedy action tensor([ 0.9760, -0.9431, 0.1337, -0.6086]) tensor([0.5610, 0.0823, 0.2416, 0.1150]) -Greedy action tensor([ 1.0340, -0.5667, -0.0906, -0.4599]) tensor([0.5711, 0.1152, 0.1855, 0.1282]) -Greedy action tensor([ 0.6817, -0.4424, -0.0946, 0.0047]) tensor([0.4361, 0.1417, 0.2006, 0.2216]) -Greedy action tensor([ 0.5515, -0.5465, -0.1358, -0.3048]) tensor([0.4422, 0.1475, 0.2224, 0.1878]) -Greedy action tensor([ 0.5891, -0.1467, 0.1503, -0.5880]) tensor([0.4112, 0.1970, 0.2651, 0.1267]) -Greedy action tensor([ 0.8041, -0.4347, -0.1733, -0.4062]) tensor([0.5091, 0.1475, 0.1916, 0.1518]) -Greedy action tensor([ 0.8401, -0.4586, -0.1289, -0.2969]) tensor([0.5068, 0.1383, 0.1923, 0.1626]) -Greedy action tensor([ 0.4342, -0.1869, 0.0288, -0.2523]) tensor([0.3694, 0.1985, 0.2463, 0.1859]) -Greedy action tensor([ 1.0859, -0.4181, 0.0502, -0.1574]) tensor([0.5360, 0.1191, 0.1903, 0.1546]) -Greedy action tensor([ 0.9584, -0.3142, -0.0986, -0.1448]) tensor([0.5104, 0.1430, 0.1773, 0.1693]) -Greedy action tensor([ 0.6976, -0.5481, -0.1541, -0.3811]) tensor([0.4867, 0.1401, 0.2077, 0.1655]) -Greedy action tensor([ 0.7918, -0.4167, 0.0096, -0.8325]) tensor([0.5120, 0.1529, 0.2342, 0.1009]) -Greedy action tensor([ 0.9785, -0.2842, 0.1039, -0.1602]) tensor([0.4950, 0.1400, 0.2064, 0.1585]) -Greedy action tensor([ 0.6236, -0.2541, -0.0327, -0.0489]) tensor([0.4090, 0.1700, 0.2122, 0.2088]) -Greedy action tensor([ 0.3768, -0.1480, 0.0126, 0.0618]) tensor([0.3315, 0.1962, 0.2303, 0.2420]) -Greedy action tensor([ 0.9286, -0.5917, -0.0400, -0.2756]) tensor([0.5268, 0.1152, 0.2000, 0.1580]) -Greedy action tensor([ 0.4349, -0.3915, 0.0218, -0.7209]) tensor([0.4143, 0.1813, 0.2741, 0.1304]) -Greedy action tensor([ 0.4263, -0.4986, -0.1942, -0.1325]) tensor([0.3990, 0.1582, 0.2145, 0.2282]) -Greedy action tensor([ 0.7647, 0.3528, 0.1306, -0.2433]) tensor([0.3910, 0.2590, 0.2074, 0.1427]) -Greedy action tensor([ 0.5817, -0.5395, 0.0254, -0.2219]) tensor([0.4261, 0.1389, 0.2443, 0.1908]) -Greedy action tensor([ 1.2328, -0.7147, -0.0953, -0.2717]) tensor([0.6136, 0.0875, 0.1626, 0.1363]) -Greedy action tensor([ 0.7859, -0.5717, 0.0048, -0.5969]) tensor([0.5086, 0.1309, 0.2329, 0.1276]) -Greedy action tensor([ 0.7727, -0.1536, 0.1391, -0.2425]) tensor([0.4369, 0.1730, 0.2318, 0.1583]) -Greedy action tensor([ 1.3622, -0.8317, -0.1425, -0.6041]) tensor([0.6786, 0.0757, 0.1507, 0.0950]) -Greedy action tensor([ 0.2890, -0.3799, -0.2040, -0.1515]) tensor([0.3614, 0.1851, 0.2208, 0.2327]) -Greedy action tensor([ 0.7259, -0.6192, 0.0135, -0.1048]) tensor([0.4573, 0.1191, 0.2243, 0.1993]) -Greedy action tensor([ 0.5752, 0.1539, 0.0228, -0.1453]) tensor([0.3679, 0.2414, 0.2117, 0.1790]) -Greedy action tensor([ 0.4831, -0.2570, -0.0286, -0.1740]) tensor([0.3854, 0.1838, 0.2310, 0.1998]) -Greedy action tensor([ 0.8403, -0.6322, -0.0486, -0.1422]) tensor([0.4963, 0.1138, 0.2040, 0.1858]) -Greedy action tensor([ 1.1424e+00, -4.5627e-01, -5.9211e-04, -7.4408e-01]) tensor([0.5978, 0.1209, 0.1906, 0.0906]) -Greedy action tensor([ 0.8631, -0.4620, -0.0061, -0.3205]) tensor([0.5022, 0.1335, 0.2106, 0.1538]) -Greedy action tensor([ 0.4276, -0.0706, 0.0416, -0.0905]) tensor([0.3469, 0.2107, 0.2358, 0.2066]) -Greedy action tensor([ 0.3946, 0.5846, -0.3502, 0.1394]) tensor([0.2891, 0.3496, 0.1373, 0.2240]) -Greedy action tensor([ 0.4941, -0.1144, -0.0511, -0.1171]) tensor([0.3750, 0.2041, 0.2174, 0.2035]) -Greedy action tensor([ 0.7061, -0.4819, -0.1750, -0.4180]) tensor([0.4892, 0.1491, 0.2027, 0.1590]) -Greedy action tensor([ 0.6376, -0.8070, -0.0208, -0.2540]) tensor([0.4622, 0.1090, 0.2393, 0.1895]) -Greedy action tensor([ 0.4837, -0.5862, -0.2622, -0.1936]) tensor([0.4301, 0.1475, 0.2040, 0.2185]) -Greedy action tensor([ 1.0090, -0.4238, -0.1248, -0.3177]) tensor([0.5477, 0.1307, 0.1763, 0.1453]) -Greedy action tensor([ 0.5930, -0.4087, -0.0607, -0.2675]) tensor([0.4328, 0.1590, 0.2251, 0.1831]) -Greedy action tensor([ 1.0185, -0.5372, -0.0655, -0.2798]) tensor([0.5488, 0.1158, 0.1856, 0.1498]) -Greedy action tensor([ 0.5324, -0.3393, -0.2630, -0.3998]) tensor([0.4418, 0.1848, 0.1994, 0.1739]) -Greedy action tensor([ 0.9824, -0.8559, -0.2296, -0.3621]) tensor([0.5823, 0.0926, 0.1733, 0.1518]) -Greedy action tensor([ 1.7818, -1.0877, -0.0483, 0.5314]) tensor([0.6651, 0.0377, 0.1067, 0.1905]) -Greedy action tensor([ 1.5060, -0.2757, -0.4974, 0.0638]) tensor([0.6495, 0.1093, 0.0876, 0.1535]) -Greedy action tensor([ 1.2777, -0.4086, -0.4217, 0.4012]) tensor([0.5605, 0.1038, 0.1024, 0.2333]) -Greedy action tensor([ 1.5732, -0.8555, -0.8284, 0.9147]) tensor([0.5895, 0.0520, 0.0534, 0.3051]) -Greedy action tensor([ 1.2960, -0.2199, -0.2478, 0.0176]) tensor([0.5842, 0.1283, 0.1248, 0.1627]) -Greedy action tensor([ 2.5881, -1.4630, -0.1602, 0.6338]) tensor([0.8176, 0.0142, 0.0524, 0.1158]) -Greedy action tensor([ 1.5460, -0.4548, -0.3935, 0.1325]) tensor([0.6569, 0.0888, 0.0944, 0.1598]) -Greedy action tensor([ 0.9948, -0.3081, -0.2890, 0.0331]) tensor([0.5179, 0.1407, 0.1434, 0.1980]) -Greedy action tensor([ 1.7913, -0.7660, -0.1958, 0.3363]) tensor([0.6906, 0.0535, 0.0947, 0.1612]) -Greedy action tensor([ 2.1781, -0.0260, -0.6482, 0.4361]) tensor([0.7436, 0.0821, 0.0440, 0.1303]) -Greedy action tensor([ 1.7676, 0.1981, -0.5113, -0.3096]) tensor([0.6965, 0.1450, 0.0713, 0.0872]) -Greedy action tensor([ 1.5956, -0.6220, -0.4332, 0.5330]) tensor([0.6305, 0.0686, 0.0829, 0.2179]) -Greedy action tensor([ 1.0041, -0.2601, -0.4511, 0.3865]) tensor([0.4866, 0.1374, 0.1136, 0.2624]) -Greedy action tensor([ 1.5551, -0.3544, -0.4552, 0.1600]) tensor([0.6536, 0.0968, 0.0876, 0.1620]) -Greedy action tensor([ 1.4846, -0.5337, -0.1609, 0.1579]) tensor([0.6285, 0.0835, 0.1212, 0.1668]) -Greedy action tensor([ 1.6691, -0.7630, -0.2051, 0.2348]) tensor([0.6758, 0.0594, 0.1037, 0.1610]) -Greedy action tensor([ 1.2109, -0.4362, -0.5251, 0.6690]) tensor([0.5127, 0.0987, 0.0904, 0.2982]) -Greedy action tensor([ 1.5072, 0.0046, -0.5461, 0.2565]) tensor([0.6108, 0.1359, 0.0784, 0.1749]) -Greedy action tensor([ 1.9508, -1.3958, -0.3755, 0.4280]) tensor([0.7402, 0.0261, 0.0723, 0.1614]) -Greedy action tensor([ 2.0269, -0.7682, -0.5161, 0.5946]) tensor([0.7254, 0.0443, 0.0570, 0.1732]) -Greedy action tensor([ 1.2683, -0.4388, -0.1605, 0.1175]) tensor([0.5756, 0.1044, 0.1379, 0.1821]) -Greedy action tensor([ 1.7682, -0.7133, -0.3387, 0.1137]) tensor([0.7161, 0.0599, 0.0871, 0.1369]) -Greedy action tensor([ 2.1165, -0.5321, -0.5767, 0.6822]) tensor([0.7264, 0.0514, 0.0491, 0.1731]) -Greedy action tensor([ 1.2887, -0.3107, -0.4589, 0.7405]) tensor([0.5117, 0.1034, 0.0891, 0.2958]) -Greedy action tensor([ 1.5856, -0.0850, -0.8556, 0.2556]) tensor([0.6495, 0.1222, 0.0565, 0.1718]) -Greedy action tensor([ 2.3058, -1.3557, -0.2008, 0.5973]) tensor([0.7762, 0.0199, 0.0633, 0.1406]) -Greedy action tensor([ 1.6746, -0.8176, 0.0837, -0.0771]) tensor([0.6850, 0.0567, 0.1395, 0.1188]) -Greedy action tensor([ 1.5472, -0.8596, -0.2918, 0.4112]) tensor([0.6369, 0.0574, 0.1013, 0.2045]) -Greedy action tensor([ 0.8258, -0.1541, -0.3467, -0.1787]) tensor([0.4875, 0.1830, 0.1509, 0.1785]) -Greedy action tensor([ 1.6019, -0.8819, -0.1805, 0.0106]) tensor([0.6871, 0.0573, 0.1156, 0.1399]) -Greedy action tensor([ 2.2891, -1.5499, 0.0788, 0.4963]) tensor([0.7706, 0.0166, 0.0845, 0.1283]) -Greedy action tensor([ 1.5360, -0.6784, -0.4546, 0.4451]) tensor([0.6322, 0.0691, 0.0864, 0.2124]) -Greedy action tensor([ 1.4376, -0.6136, -0.4388, 0.2020]) tensor([0.6360, 0.0818, 0.0974, 0.1849]) -Greedy action tensor([ 1.5507, -0.5980, -0.0297, -0.2249]) tensor([0.6703, 0.0782, 0.1380, 0.1135]) -Greedy action tensor([ 1.0613, -0.1943, -0.3519, 0.1784]) tensor([0.5150, 0.1467, 0.1253, 0.2130]) -Greedy action tensor([ 2.0764, 0.6939, -0.1131, 0.4165]) tensor([0.6439, 0.1616, 0.0721, 0.1224]) -Greedy action tensor([ 1.4503, -0.7954, -0.0812, 0.3505]) tensor([0.6042, 0.0640, 0.1306, 0.2012]) -Greedy action tensor([ 1.2264, -0.3296, -0.4786, 0.4027]) tensor([0.5460, 0.1152, 0.0992, 0.2396]) -Greedy action tensor([1.8411, 0.3919, 0.3101, 0.0051]) tensor([0.6209, 0.1458, 0.1343, 0.0990]) -Greedy action tensor([ 0.2919, -0.3261, -0.0212, 0.3795]) tensor([0.2975, 0.1603, 0.2175, 0.3247]) -Greedy action tensor([ 1.7811, -0.6176, -0.3683, -0.1024]) tensor([0.7356, 0.0668, 0.0857, 0.1119]) -Greedy action tensor([ 1.5896, -0.3782, -0.4233, 0.0671]) tensor([0.6705, 0.0937, 0.0896, 0.1463]) -Greedy action tensor([ 1.3910, -0.8908, -0.3852, 0.0701]) tensor([0.6501, 0.0664, 0.1101, 0.1735]) -Greedy action tensor([ 1.7149, -0.9756, -0.3267, 0.0783]) tensor([0.7182, 0.0487, 0.0932, 0.1398]) -Greedy action tensor([ 1.2209, -0.3729, -0.5980, 0.0796]) tensor([0.5936, 0.1206, 0.0963, 0.1896]) -Greedy action tensor([ 1.3844, -0.6888, -0.2364, 0.3728]) tensor([0.5927, 0.0746, 0.1172, 0.2155]) -Greedy action tensor([ 1.0227, -0.4810, -0.3017, 0.3825]) tensor([0.4962, 0.1103, 0.1320, 0.2616]) -Greedy action tensor([ 1.4193, -0.6646, -0.3022, 0.2680]) tensor([0.6175, 0.0768, 0.1104, 0.1953]) -Greedy action tensor([ 1.7741, -0.3664, -0.4347, 0.5237]) tensor([0.6606, 0.0777, 0.0726, 0.1892]) -Greedy action tensor([ 1.5322, -0.3630, -0.9457, 0.6050]) tensor([0.6135, 0.0922, 0.0515, 0.2428]) -Greedy action tensor([ 1.5263, -0.5547, -0.5159, 0.3122]) tensor([0.6445, 0.0804, 0.0836, 0.1914]) -Greedy action tensor([ 1.5829, -0.3186, -0.3120, -0.0022]) tensor([0.6646, 0.0993, 0.0999, 0.1362]) -Greedy action tensor([ 1.2173, -0.2746, -0.3882, 0.4149]) tensor([0.5336, 0.1200, 0.1071, 0.2392]) -Greedy action tensor([ 1.8270, -0.7451, -0.3247, 0.1958]) tensor([0.7203, 0.0550, 0.0838, 0.1410]) -Greedy action tensor([ 1.4149, -0.4233, -0.6958, 0.1100]) tensor([0.6446, 0.1026, 0.0781, 0.1748]) -Greedy action tensor([ 2.0061, -1.0079, -0.1992, 0.7191]) tensor([0.6967, 0.0342, 0.0768, 0.1924]) -Greedy action tensor([ 1.5429, -0.3509, -0.3434, 0.3252]) tensor([0.6258, 0.0942, 0.0949, 0.1852]) -Greedy action tensor([ 1.5223, -0.5777, -0.3522, -0.0653]) tensor([0.6755, 0.0827, 0.1036, 0.1381]) -Greedy action tensor([ 1.6938, -0.0329, -0.4719, -0.3352]) tensor([0.7022, 0.1249, 0.0805, 0.0923]) -Greedy action tensor([ 1.1054, -0.4293, -0.2252, 0.2603]) tensor([0.5238, 0.1129, 0.1384, 0.2249]) -Greedy action tensor([ 1.5832, -0.5312, -0.7302, 0.2438]) tensor([0.6749, 0.0815, 0.0668, 0.1768]) -Greedy action tensor([ 1.4235, -0.4549, -0.2941, 0.4356]) tensor([0.5866, 0.0897, 0.1053, 0.2184]) -Greedy action tensor([ 1.5596, -0.5435, -0.1486, 0.1565]) tensor([0.6455, 0.0788, 0.1170, 0.1587]) -Greedy action tensor([ 1.3141, -0.9392, -0.0271, 0.0232]) tensor([0.6092, 0.0640, 0.1593, 0.1675]) -Greedy action tensor([ 2.0251, 0.8663, 0.3455, -0.1709]) tensor([0.6205, 0.1948, 0.1157, 0.0690]) -Greedy action tensor([ 1.8082, -1.0819, -0.2550, 0.3621]) tensor([0.7052, 0.0392, 0.0896, 0.1661]) -Greedy action tensor([ 1.8749, -0.8958, -0.1377, 0.6532]) tensor([0.6707, 0.0420, 0.0896, 0.1977]) -Greedy action tensor([ 0.8394, -0.4157, 0.0642, 0.0332]) tensor([0.4562, 0.1300, 0.2101, 0.2037]) -Greedy action tensor([ 1.5546, -0.6423, -0.7696, 0.3295]) tensor([0.6655, 0.0740, 0.0651, 0.1955]) -Greedy action tensor([ 1.1019, -0.4011, -0.5730, 0.2452]) tensor([0.5452, 0.1213, 0.1021, 0.2314]) -Greedy action tensor([ 1.8453, -0.6773, -0.0789, 0.0722]) tensor([0.7163, 0.0575, 0.1046, 0.1216]) -Greedy action tensor([ 1.6571, -0.2312, -0.1171, 0.1202]) tensor([0.6510, 0.0985, 0.1104, 0.1400]) -Greedy action tensor([ 1.8674, -0.0713, -0.6578, 0.2337]) tensor([0.7047, 0.1014, 0.0564, 0.1375]) -Greedy action tensor([ 1.6199, -0.6379, -0.4547, 0.3805]) tensor([0.6580, 0.0688, 0.0827, 0.1905]) -Greedy action tensor([ 1.6594, 0.0224, -1.0139, 0.7064]) tensor([0.6064, 0.1180, 0.0419, 0.2338]) -Greedy action tensor([ 1.5315, 0.1859, -0.5785, 0.2542]) tensor([0.6023, 0.1568, 0.0730, 0.1679]) -Greedy action tensor([ 2.0146, -0.3478, -0.2875, 0.2341]) tensor([0.7338, 0.0691, 0.0734, 0.1237]) -Greedy action tensor([ 1.0474, -0.4411, -0.3863, 0.6530]) tensor([0.4677, 0.1056, 0.1115, 0.3153]) -Greedy action tensor([ 1.4982, 0.0106, -0.3734, 0.4687]) tensor([0.5757, 0.1301, 0.0886, 0.2056]) -Greedy action tensor([ 1.1875, -0.4498, -0.2319, 0.7042]) tensor([0.4871, 0.0947, 0.1178, 0.3004]) -Greedy action tensor([ 2.0830, -1.2902, -0.3592, 0.2408]) tensor([0.7814, 0.0268, 0.0680, 0.1238]) -Greedy action tensor([ 1.0050, -0.2661, -0.1567, -0.2353]) tensor([0.5311, 0.1490, 0.1662, 0.1536]) -Greedy action tensor([ 0.9663, -0.6586, -0.1377, -0.4702]) tensor([0.5662, 0.1115, 0.1877, 0.1346]) -Greedy action tensor([ 0.8288, -1.0545, 0.1664, -0.3517]) tensor([0.5064, 0.0770, 0.2611, 0.1555]) -Greedy action tensor([ 0.9170, -0.8268, 0.1951, -0.8727]) tensor([0.5471, 0.0957, 0.2658, 0.0914]) -Greedy action tensor([ 0.4342, -0.0419, 0.1478, -0.3378]) tensor([0.3528, 0.2192, 0.2650, 0.1630]) -Greedy action tensor([ 0.7028, -0.4159, -0.0131, -0.2551]) tensor([0.4547, 0.1486, 0.2222, 0.1745]) -Greedy action tensor([ 0.6151, 0.3635, -0.1192, 0.1526]) tensor([0.3464, 0.2693, 0.1662, 0.2181]) -Greedy action tensor([ 0.9749, -0.8940, -0.0640, -0.4150]) tensor([0.5691, 0.0878, 0.2014, 0.1418]) -Greedy action tensor([ 0.5941, -0.5335, -0.0861, -0.2321]) tensor([0.4409, 0.1428, 0.2233, 0.1930]) -Greedy action tensor([ 1.0992, -0.5166, -0.1396, -0.4945]) tensor([0.5911, 0.1175, 0.1713, 0.1201]) -Greedy action tensor([ 0.7655, -0.1798, 0.0835, -0.1617]) tensor([0.4367, 0.1697, 0.2208, 0.1728]) -Greedy action tensor([ 0.5199, -0.3573, -0.1906, -0.0456]) tensor([0.4040, 0.1680, 0.1985, 0.2295]) -Greedy action tensor([ 0.7558, -0.1897, 0.1214, -0.1908]) tensor([0.4335, 0.1684, 0.2299, 0.1682]) -Greedy action tensor([ 0.3433, -0.0668, 0.0662, -0.6781]) tensor([0.3595, 0.2386, 0.2725, 0.1294]) -Greedy action tensor([ 0.8391, -0.5101, 0.0234, -0.3750]) tensor([0.5003, 0.1298, 0.2213, 0.1486]) -Greedy action tensor([0.0777, 0.1817, 0.0097, 0.0334]) tensor([0.2500, 0.2774, 0.2335, 0.2391]) -Greedy action tensor([ 1.1871, -0.5616, -0.1566, -0.5141]) tensor([0.6183, 0.1076, 0.1613, 0.1128]) -Greedy action tensor([ 0.2986, 0.3392, -0.2230, -0.1724]) tensor([0.3068, 0.3195, 0.1821, 0.1916]) -Greedy action tensor([ 0.6972, -0.2934, -0.0582, -0.2112]) tensor([0.4456, 0.1655, 0.2093, 0.1796]) -Greedy action tensor([ 0.8540, -0.4192, -0.1260, -0.4044]) tensor([0.5156, 0.1443, 0.1935, 0.1465]) -Greedy action tensor([ 0.5987, 0.0113, -0.0733, -0.0031]) tensor([0.3825, 0.2126, 0.1953, 0.2096]) -Greedy action tensor([ 0.8678, -0.5652, -0.0190, -0.3791]) tensor([0.5160, 0.1231, 0.2126, 0.1483]) -Greedy action tensor([ 0.3412, -0.2280, -0.0331, -0.0797]) tensor([0.3436, 0.1945, 0.2363, 0.2256]) -Greedy action tensor([ 0.2666, 0.1263, -0.0007, 0.0326]) tensor([0.2919, 0.2537, 0.2234, 0.2310]) -Greedy action tensor([ 0.7379, -0.8555, 0.1916, -0.6840]) tensor([0.4942, 0.1004, 0.2862, 0.1192]) -Greedy action tensor([ 0.5137, -0.1219, 0.2462, -0.6415]) tensor([0.3832, 0.2029, 0.2932, 0.1207]) -Greedy action tensor([ 0.6289, -0.6213, -0.0883, -0.2539]) tensor([0.4570, 0.1309, 0.2231, 0.1890]) -Greedy action tensor([ 0.7873, -0.5111, 0.3267, -0.4926]) tensor([0.4583, 0.1251, 0.2891, 0.1274]) -Greedy action tensor([ 0.9383, -0.6834, -0.0379, -0.4795]) tensor([0.5505, 0.1088, 0.2074, 0.1334]) -Greedy action tensor([ 0.6718, -0.4428, -0.0678, -0.3017]) tensor([0.4581, 0.1503, 0.2186, 0.1730]) -Greedy action tensor([ 0.3100, -0.0681, -0.0565, -0.0429]) tensor([0.3246, 0.2224, 0.2250, 0.2281]) -Greedy action tensor([ 1.0554, -0.6162, 0.0173, -0.4081]) tensor([0.5639, 0.1060, 0.1997, 0.1305]) -Greedy action tensor([ 0.8351, -0.5590, 0.1170, -0.3574]) tensor([0.4904, 0.1216, 0.2391, 0.1488]) -Greedy action tensor([ 0.9884, -0.7153, -0.0495, -0.3169]) tensor([0.5533, 0.1007, 0.1960, 0.1500]) -Greedy action tensor([ 0.2181, 0.2029, -0.2129, -0.1318]) tensor([0.2994, 0.2949, 0.1946, 0.2110]) -Greedy action tensor([ 0.8609, -0.3195, 0.0539, -0.1324]) tensor([0.4709, 0.1446, 0.2101, 0.1744]) -Greedy action tensor([ 0.2838, 0.0407, -0.0102, -0.2346]) tensor([0.3200, 0.2510, 0.2385, 0.1906]) -Greedy action tensor([ 0.8195, -0.4604, 0.0221, -0.3234]) tensor([0.4884, 0.1358, 0.2200, 0.1558]) -Greedy action tensor([ 0.8175, -0.5791, 0.0026, -0.3366]) tensor([0.4987, 0.1234, 0.2207, 0.1572]) -Greedy action tensor([ 0.6640, -0.5908, -0.2289, -0.3171]) tensor([0.4832, 0.1378, 0.1979, 0.1812]) -Greedy action tensor([ 0.7837, -0.7345, 0.0288, -0.8527]) tensor([0.5308, 0.1163, 0.2495, 0.1033]) -Greedy action tensor([ 1.2706, -0.8982, 0.1282, -0.5833]) tensor([0.6289, 0.0719, 0.2007, 0.0985]) -Greedy action tensor([ 0.3838, 0.0040, 0.2088, -0.0814]) tensor([0.3173, 0.2170, 0.2664, 0.1993]) -Greedy action tensor([ 1.2032, -0.8244, 0.0166, -0.6187]) tensor([0.6255, 0.0824, 0.1909, 0.1012]) -Greedy action tensor([ 0.8676, -0.5507, -0.3713, -0.4310]) tensor([0.5541, 0.1342, 0.1605, 0.1512]) -Greedy action tensor([ 0.4810, -0.1501, 0.0656, 0.0676]) tensor([0.3504, 0.1864, 0.2313, 0.2318]) -Greedy action tensor([ 0.7110, -0.4643, -0.1784, -0.3701]) tensor([0.4857, 0.1500, 0.1996, 0.1648]) -Greedy action tensor([ 1.1258, -0.5466, -0.1823, -0.5893]) tensor([0.6105, 0.1147, 0.1650, 0.1099]) -Greedy action tensor([ 0.4316, -0.0938, -0.0622, -0.2379]) tensor([0.3685, 0.2179, 0.2249, 0.1887]) -Greedy action tensor([ 0.7990, -0.4904, -0.0588, -0.3405]) tensor([0.4952, 0.1364, 0.2100, 0.1584]) -Greedy action tensor([ 0.7538, -0.4709, -0.0976, -0.0550]) tensor([0.4617, 0.1357, 0.1971, 0.2056]) -Greedy action tensor([ 0.9922, -0.7239, 0.0153, -0.4500]) tensor([0.5578, 0.1003, 0.2100, 0.1319]) -Greedy action tensor([ 0.7485, -0.4215, -0.1626, -0.3517]) tensor([0.4889, 0.1517, 0.1966, 0.1627]) -Greedy action tensor([ 1.0979, -0.6853, 0.0227, -0.5219]) tensor([0.5857, 0.0985, 0.1999, 0.1159]) -Greedy action tensor([ 0.8677, -0.6476, -0.1354, -0.5390]) tensor([0.5460, 0.1200, 0.2002, 0.1338]) -Greedy action tensor([ 1.2561, -0.6874, -0.1382, -0.5592]) tensor([0.6435, 0.0922, 0.1596, 0.1048]) -Greedy action tensor([ 0.6555, -0.2320, 0.0018, -0.6327]) tensor([0.4530, 0.1865, 0.2356, 0.1249]) -Greedy action tensor([ 1.0669, -0.6035, -0.0563, -0.7684]) tensor([0.5977, 0.1125, 0.1944, 0.0954]) -Greedy action tensor([ 0.3017, -0.2029, -0.0713, -0.1264]) tensor([0.3397, 0.2051, 0.2339, 0.2214]) -Greedy action tensor([ 0.8571, 0.0454, 0.2980, -0.4415]) tensor([0.4369, 0.1940, 0.2498, 0.1192]) -Greedy action tensor([ 0.6908, -0.2238, 0.0187, -0.0484]) tensor([0.4186, 0.1677, 0.2138, 0.1999]) -Greedy action tensor([ 0.7746, -0.5382, -0.1123, -0.2933]) tensor([0.4939, 0.1329, 0.2034, 0.1698]) -Greedy action tensor([ 6.9189e-01, -8.8901e-02, -1.8479e-02, 4.1379e-04]) tensor([0.4081, 0.1869, 0.2006, 0.2044]) -Greedy action tensor([ 0.4827, -0.2890, -0.0171, -0.4871]) tensor([0.4085, 0.1888, 0.2478, 0.1549]) -Greedy action tensor([ 0.7869, -0.3209, 0.0093, -0.1232]) tensor([0.4561, 0.1507, 0.2096, 0.1836]) -Greedy action tensor([ 0.8461, -0.4341, -0.0021, -0.2164]) tensor([0.4874, 0.1355, 0.2087, 0.1684]) -Greedy action tensor([ 0.8028, -0.6119, 0.0461, -0.6988]) tensor([0.5168, 0.1256, 0.2425, 0.1151]) -Greedy action tensor([ 1.0955, -0.4594, -0.0013, -0.6830]) tensor([0.5834, 0.1232, 0.1948, 0.0985]) -Greedy action tensor([ 0.6017, -0.2387, -0.1053, -0.1715]) tensor([0.4191, 0.1808, 0.2067, 0.1934]) -Greedy action tensor([ 0.3739, -0.0588, -0.0284, -0.4364]) tensor([0.3620, 0.2349, 0.2421, 0.1610]) -Greedy action tensor([ 0.3778, -0.1919, -0.0924, -0.1723]) tensor([0.3613, 0.2044, 0.2258, 0.2085]) -Greedy action tensor([ 0.9299, -0.5340, -0.1003, -0.6739]) tensor([0.5589, 0.1293, 0.1995, 0.1124]) -Greedy action tensor([ 0.9961, -0.7363, 0.0763, -0.5057]) tensor([0.5561, 0.0984, 0.2217, 0.1239]) -Greedy action tensor([ 0.6408, -0.3537, -0.1473, -0.2180]) tensor([0.4448, 0.1645, 0.2022, 0.1885]) -Greedy action tensor([ 0.6436, -0.5021, 0.0532, -0.3017]) tensor([0.4423, 0.1407, 0.2451, 0.1719]) -Greedy action tensor([ 0.4705, -0.3373, 0.0626, -0.2596]) tensor([0.3857, 0.1720, 0.2565, 0.1859]) -Greedy action tensor([ 0.7003, -0.4863, -0.0964, -0.3502]) tensor([0.4749, 0.1450, 0.2141, 0.1661]) -Greedy action tensor([ 0.6405, -0.4463, -0.0450, -0.2456]) tensor([0.4438, 0.1497, 0.2236, 0.1830]) -Greedy action tensor([ 0.5493, -0.1048, -0.0502, -0.1156]) tensor([0.3871, 0.2013, 0.2126, 0.1991]) -Greedy action tensor([ 0.8509, -0.5854, -0.1704, -0.3673]) tensor([0.5281, 0.1256, 0.1902, 0.1562]) -Greedy action tensor([ 0.5364, -0.2316, -0.0787, -0.0451]) tensor([0.3901, 0.1810, 0.2109, 0.2181]) -Greedy action tensor([-1.7883, -0.2195, 0.5874, -0.0768]) tensor([0.0453, 0.2173, 0.4869, 0.2506]) -Greedy action tensor([-1.8405, -0.3095, 0.6133, -0.1195]) tensor([0.0438, 0.2023, 0.5092, 0.2447]) -Greedy action tensor([-1.9193, -0.4420, 0.6615, -0.1677]) tensor([0.0411, 0.1799, 0.5423, 0.2367]) -Greedy action tensor([-1.8179, -0.3886, 0.6704, -0.0754]) tensor([0.0436, 0.1821, 0.5252, 0.2491]) -Greedy action tensor([-1.8584, -0.4564, 0.6297, -0.1353]) tensor([0.0440, 0.1790, 0.5302, 0.2467]) -Greedy action tensor([-1.7584, -0.4005, 0.2610, -0.1469]) tensor([0.0574, 0.2230, 0.4322, 0.2874]) -Greedy action tensor([-1.4490, -0.6279, 0.4404, 0.0967]) tensor([0.0686, 0.1559, 0.4538, 0.3218]) -Greedy action tensor([-0.5592, -0.1415, 0.5424, 1.0054]) tensor([0.0970, 0.1473, 0.2919, 0.4638]) -Greedy action tensor([-1.9444, -0.4496, 0.6677, -0.1805]) tensor([0.0401, 0.1789, 0.5468, 0.2341]) -Greedy action tensor([-1.8835, -0.3750, 0.6298, -0.1470]) tensor([0.0425, 0.1920, 0.5244, 0.2412]) -Greedy action tensor([-1.8704, -0.4248, 0.6194, -0.1394]) tensor([0.0436, 0.1849, 0.5255, 0.2460]) -Greedy action tensor([-1.9147, -0.4345, 0.6540, -0.1628]) tensor([0.0413, 0.1815, 0.5390, 0.2382]) -Greedy action tensor([-1.4956, 0.0755, 0.3873, -0.0605]) tensor([0.0603, 0.2902, 0.3963, 0.2532]) -Greedy action tensor([-1.7696, -0.4686, 0.5821, -0.0890]) tensor([0.0487, 0.1788, 0.5112, 0.2613]) -Greedy action tensor([-1.1682, -0.3314, 0.2489, 0.2932]) tensor([0.0851, 0.1966, 0.3512, 0.3671]) -Greedy action tensor([-1.5491, 0.5075, 0.3220, 0.0986]) tensor([0.0488, 0.3813, 0.3167, 0.2533]) -Greedy action tensor([-1.6760, -0.0541, 0.5371, -0.0100]) tensor([0.0488, 0.2470, 0.4461, 0.2581]) -Greedy action tensor([-1.8576, -0.3001, 0.6129, -0.1301]) tensor([0.0431, 0.2046, 0.5098, 0.2425]) -Greedy action tensor([-1.7367, -0.1651, 0.5551, -0.1726]) tensor([0.0488, 0.2350, 0.4829, 0.2333]) -Greedy action tensor([-1.9150, -0.4049, 0.6381, -0.1595]) tensor([0.0414, 0.1874, 0.5317, 0.2395]) -Greedy action tensor([-1.9376, -0.4376, 0.6614, -0.1760]) tensor([0.0404, 0.1810, 0.5434, 0.2352]) -Greedy action tensor([-1.8675, -0.3911, 0.6497, -0.1073]) tensor([0.0424, 0.1856, 0.5255, 0.2465]) -Greedy action tensor([-1.8385, -0.4300, 0.6165, -0.1251]) tensor([0.0449, 0.1835, 0.5226, 0.2490]) -Greedy action tensor([-1.8625, -0.1125, 0.5820, -0.0987]) tensor([0.0415, 0.2387, 0.4779, 0.2419]) -Greedy action tensor([-0.3815, 0.6712, 0.1130, 0.1352]) tensor([0.1392, 0.3990, 0.2283, 0.2334]) -Greedy action tensor([-1.8573, -0.4557, 0.6247, -0.1230]) tensor([0.0441, 0.1790, 0.5273, 0.2496]) -Greedy action tensor([-1.9046, -0.4506, 0.6515, -0.1589]) tensor([0.0418, 0.1791, 0.5393, 0.2398]) -Greedy action tensor([-1.9391, -0.4456, 0.6652, -0.1775]) tensor([0.0403, 0.1796, 0.5453, 0.2348]) -Greedy action tensor([-1.8598, -0.3470, 0.6432, -0.1297]) tensor([0.0427, 0.1940, 0.5222, 0.2411]) -Greedy action tensor([-1.8281, -0.4376, 0.5928, -0.1017]) tensor([0.0457, 0.1835, 0.5141, 0.2567]) -Greedy action tensor([-1.6785, -0.2321, 0.5431, 0.0122]) tensor([0.0503, 0.2135, 0.4636, 0.2726]) -Greedy action tensor([-1.9330, -0.4400, 0.6627, -0.1738]) tensor([0.0405, 0.1804, 0.5435, 0.2355]) -Greedy action tensor([-0.3684, 1.0793, 0.0507, 0.2526]) tensor([0.1158, 0.4926, 0.1761, 0.2155]) -Greedy action tensor([-1.9389, -0.4380, 0.6626, -0.1771]) tensor([0.0403, 0.1809, 0.5439, 0.2349]) -Greedy action tensor([-1.9467, -0.4529, 0.6693, -0.1811]) tensor([0.0400, 0.1783, 0.5477, 0.2340]) -Greedy action tensor([-1.5356, -0.3095, 0.6266, 0.0489]) tensor([0.0556, 0.1896, 0.4834, 0.2713]) -Greedy action tensor([-0.9540, 0.0141, 0.2432, -0.0279]) tensor([0.1056, 0.2781, 0.3497, 0.2666]) -Greedy action tensor([-1.9092, -0.3716, 0.6437, -0.1608]) tensor([0.0412, 0.1920, 0.5298, 0.2370]) -Greedy action tensor([-1.8047, -0.3352, 0.5901, -0.1001]) tensor([0.0458, 0.1993, 0.5027, 0.2521]) -Greedy action tensor([-1.6761, 0.2475, 0.4510, 0.0793]) tensor([0.0454, 0.3109, 0.3810, 0.2627]) -Greedy action tensor([-1.8690, -0.3143, 0.6239, -0.0908]) tensor([0.0421, 0.1993, 0.5093, 0.2492]) -Greedy action tensor([-1.2990, -0.3268, 0.6909, 0.6100]) tensor([0.0565, 0.1493, 0.4132, 0.3810]) -Greedy action tensor([-1.9433, -0.4462, 0.6663, -0.1790]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-0.7493, 0.8262, -0.3136, -0.2987]) tensor([0.1117, 0.5401, 0.1728, 0.1754]) -Greedy action tensor([-1.6091, -0.3117, 0.5567, 0.0265]) tensor([0.0540, 0.1977, 0.4711, 0.2772]) -Greedy action tensor([-1.8861, -0.3906, 0.6302, -0.1530]) tensor([0.0425, 0.1898, 0.5269, 0.2407]) -Greedy action tensor([-1.8171, -0.4235, 0.6035, -0.1211]) tensor([0.0460, 0.1854, 0.5177, 0.2509]) -Greedy action tensor([-0.6978, -0.6656, 0.4114, 0.4028]) tensor([0.1239, 0.1280, 0.3757, 0.3725]) -Greedy action tensor([-1.8914, -0.4477, 0.6454, -0.1521]) tensor([0.0424, 0.1797, 0.5363, 0.2416]) -Greedy action tensor([-1.1147, -0.2191, 0.2549, -0.0038]) tensor([0.0960, 0.2350, 0.3775, 0.2915]) -Greedy action tensor([-1.6379, -0.3994, 0.5398, -0.1362]) tensor([0.0563, 0.1942, 0.4968, 0.2527]) -Greedy action tensor([-1.9210, -0.2367, 0.6272, -0.1806]) tensor([0.0402, 0.2167, 0.5140, 0.2292]) -Greedy action tensor([-1.8967, -0.3729, 0.6472, -0.1284]) tensor([0.0414, 0.1898, 0.5264, 0.2424]) -Greedy action tensor([-1.9037, -0.3874, 0.6430, -0.1495]) tensor([0.0415, 0.1890, 0.5297, 0.2398]) -Greedy action tensor([-1.9061, -0.4655, 0.6460, -0.1672]) tensor([0.0421, 0.1778, 0.5404, 0.2396]) -Greedy action tensor([-1.8856, -0.4178, 0.6333, -0.1514]) tensor([0.0427, 0.1853, 0.5301, 0.2419]) -Greedy action tensor([-1.7851, -0.2916, 0.6350, -0.0744]) tensor([0.0450, 0.2003, 0.5059, 0.2489]) -Greedy action tensor([-1.7625, -0.3758, 0.5660, -0.0926]) tensor([0.0486, 0.1945, 0.4988, 0.2582]) -Greedy action tensor([-1.1393, 0.0167, 0.1443, 0.2871]) tensor([0.0837, 0.2658, 0.3021, 0.3484]) -Greedy action tensor([-1.8507, -0.4467, 0.6677, -0.1157]) tensor([0.0432, 0.1759, 0.5360, 0.2449]) -Greedy action tensor([-1.9355, -0.4539, 0.6602, -0.1766]) tensor([0.0406, 0.1788, 0.5447, 0.2359]) -Greedy action tensor([-1.8680, -0.3570, 0.6300, -0.1330]) tensor([0.0428, 0.1940, 0.5205, 0.2427]) -Greedy action tensor([-1.5805, 0.3055, 0.4233, -0.0379]) tensor([0.0508, 0.3349, 0.3768, 0.2376]) -Greedy action tensor([-1.2824, 0.0194, 0.5230, 0.5969]) tensor([0.0578, 0.2124, 0.3514, 0.3784]) -Greedy action tensor([-0.7960, -0.4957, 0.2654, 0.4102]) tensor([0.1165, 0.1573, 0.3368, 0.3893]) -Greedy action tensor([-1.3586, 0.0858, 0.3421, 0.0058]) tensor([0.0683, 0.2898, 0.3744, 0.2675]) -Greedy action tensor([-1.4890, 0.6821, 0.2920, 0.1263]) tensor([0.0482, 0.4229, 0.2863, 0.2426]) -Greedy action tensor([-1.6441, -0.2251, 0.5696, 0.0262]) tensor([0.0510, 0.2109, 0.4669, 0.2712]) -Greedy action tensor([-1.0249, -0.1967, 0.4190, 0.5737]) tensor([0.0802, 0.1835, 0.3397, 0.3966]) -Greedy action tensor([-1.8865, -0.4509, 0.6451, -0.1498]) tensor([0.0426, 0.1792, 0.5361, 0.2421]) -Greedy action tensor([-1.8942, -0.4004, 0.6386, -0.1817]) tensor([0.0424, 0.1888, 0.5338, 0.2350]) -Greedy action tensor([-1.6502, -0.3946, 0.5239, -0.0038]) tensor([0.0541, 0.1898, 0.4756, 0.2806]) -Greedy action tensor([-1.8721, -0.4422, 0.6316, -0.1453]) tensor([0.0434, 0.1814, 0.5310, 0.2442]) -Greedy action tensor([-1.9332, -0.4282, 0.6615, -0.1714]) tensor([0.0405, 0.1822, 0.5418, 0.2356]) -Greedy action tensor([-1.2697, -0.5550, 0.4335, 0.0728]) tensor([0.0809, 0.1653, 0.4442, 0.3097]) -Greedy action tensor([-1.8154, -0.0379, 0.5370, -0.0686]) tensor([0.0432, 0.2554, 0.4538, 0.2477]) -Greedy action tensor([-1.9335, -0.4391, 0.6604, -0.1741]) tensor([0.0406, 0.1808, 0.5429, 0.2357]) -Greedy action tensor([-1.8689, -0.4652, 0.6375, -0.1441]) tensor([0.0436, 0.1774, 0.5344, 0.2446]) -Greedy action tensor([-1.6503, 0.0121, 0.5587, 0.0019]) tensor([0.0486, 0.2560, 0.4421, 0.2534]) -Greedy action tensor([ 0.7186, -0.2672, 0.9284, 1.7944]) tensor([0.1805, 0.0674, 0.2227, 0.5294]) -Greedy action tensor([-1.6522, -0.3397, 0.8524, 0.3089]) tensor([0.0416, 0.1544, 0.5086, 0.2954]) -Greedy action tensor([ 1.8535, -0.7144, -0.5434, 0.2840]) tensor([0.7268, 0.0557, 0.0661, 0.1513]) -Greedy action tensor([ 1.7002, -1.0776, -0.0676, -0.0935]) tensor([0.7147, 0.0444, 0.1220, 0.1189]) -Greedy action tensor([ 1.1541, 0.0244, -0.1288, 0.4141]) tensor([0.4814, 0.1555, 0.1334, 0.2297]) -Greedy action tensor([ 1.3885, -0.5622, -0.1316, -0.0105]) tensor([0.6220, 0.0884, 0.1360, 0.1535]) -Greedy action tensor([ 1.5244, -0.0263, -0.4814, 0.3511]) tensor([0.6039, 0.1281, 0.0813, 0.1868]) -Greedy action tensor([ 1.7201, -0.2496, -0.8264, 1.0389]) tensor([0.5801, 0.0809, 0.0455, 0.2935]) -Greedy action tensor([ 1.7392, -1.0639, 0.0648, 0.2438]) tensor([0.6793, 0.0412, 0.1273, 0.1523]) -Greedy action tensor([ 1.5605, -0.5233, -0.4614, 0.7945]) tensor([0.5808, 0.0723, 0.0769, 0.2700]) -Greedy action tensor([ 1.4746, -0.7157, -0.3491, 0.3857]) tensor([0.6211, 0.0695, 0.1003, 0.2091]) -Greedy action tensor([ 1.7899, -0.7330, -0.3355, 0.5315]) tensor([0.6740, 0.0541, 0.0805, 0.1915]) -Greedy action tensor([ 1.8482, -0.5830, -0.3002, 0.4554]) tensor([0.6882, 0.0605, 0.0803, 0.1709]) -Greedy action tensor([ 1.6015, -0.2126, -0.7933, 0.1690]) tensor([0.6699, 0.1092, 0.0611, 0.1599]) -Greedy action tensor([ 1.1862, -0.3135, -0.2981, 0.2882]) tensor([0.5384, 0.1202, 0.1220, 0.2193]) -Greedy action tensor([ 1.6502, -1.1644, 0.1316, 0.6723]) tensor([0.6042, 0.0362, 0.1323, 0.2272]) -Greedy action tensor([ 0.9415, -0.6231, 0.1897, -0.1347]) tensor([0.4947, 0.1035, 0.2332, 0.1686]) -Greedy action tensor([ 2.0666, -0.8308, -0.1765, 0.6640]) tensor([0.7106, 0.0392, 0.0754, 0.1748]) -Greedy action tensor([ 1.5521, -0.2948, -0.7218, 0.2948]) tensor([0.6472, 0.1021, 0.0666, 0.1841]) -Greedy action tensor([ 0.2567, -0.0967, 0.1111, -0.0690]) tensor([0.3041, 0.2135, 0.2629, 0.2195]) -Greedy action tensor([ 1.9229, -0.4457, -0.0424, 0.3082]) tensor([0.6980, 0.0653, 0.0978, 0.1389]) -Greedy action tensor([ 1.7663, 0.5167, -0.3822, 0.2698]) tensor([0.6146, 0.1761, 0.0717, 0.1376]) -Greedy action tensor([ 1.1511, -0.2853, -0.6431, 0.6235]) tensor([0.5015, 0.1192, 0.0834, 0.2959]) -Greedy action tensor([ 1.0072, -0.6451, -0.4331, 0.4575]) tensor([0.4986, 0.0955, 0.1181, 0.2878]) -Greedy action tensor([ 1.4138, -0.3922, -0.2029, 0.4336]) tensor([0.5753, 0.0945, 0.1142, 0.2159]) -Greedy action tensor([ 1.1439, -0.7340, -0.1068, 0.1869]) tensor([0.5485, 0.0839, 0.1570, 0.2106]) -Greedy action tensor([ 2.4741, -0.6458, -0.4705, 0.2817]) tensor([0.8275, 0.0365, 0.0435, 0.0924]) -Greedy action tensor([ 1.2053, -0.1019, -0.5724, 0.8491]) tensor([0.4673, 0.1264, 0.0790, 0.3273]) -Greedy action tensor([ 1.1155, -0.4764, -0.5359, 0.2184]) tensor([0.5546, 0.1129, 0.1064, 0.2261]) -Greedy action tensor([ 0.8913, -0.2372, -0.1927, -0.0416]) tensor([0.4866, 0.1574, 0.1646, 0.1914]) -Greedy action tensor([ 2.0077, -1.0471, -0.3698, 0.6073]) tensor([0.7213, 0.0340, 0.0669, 0.1778]) -Greedy action tensor([ 1.9372, -0.7543, -0.4715, 0.1563]) tensor([0.7540, 0.0511, 0.0678, 0.1270]) -Greedy action tensor([ 1.2391, -0.0791, -0.7030, 0.3583]) tensor([0.5478, 0.1466, 0.0786, 0.2270]) -Greedy action tensor([ 1.5073, -0.5522, -0.4537, 0.1606]) tensor([0.6543, 0.0834, 0.0921, 0.1702]) -Greedy action tensor([ 1.4648, -0.8192, -0.1990, 0.4276]) tensor([0.6076, 0.0619, 0.1151, 0.2154]) -Greedy action tensor([ 1.8544, -0.9740, -0.5408, 1.2370]) tensor([0.5919, 0.0350, 0.0539, 0.3192]) -Greedy action tensor([ 1.3594, -0.5473, -0.6526, 0.3456]) tensor([0.6079, 0.0903, 0.0813, 0.2206]) -Greedy action tensor([ 1.1125, -0.3123, -0.4147, 0.1732]) tensor([0.5410, 0.1301, 0.1175, 0.2115]) -Greedy action tensor([ 1.4209, -0.6100, -0.3791, 0.5510]) tensor([0.5829, 0.0765, 0.0964, 0.2442]) -Greedy action tensor([ 1.2595, -0.4169, -0.2955, 0.0750]) tensor([0.5868, 0.1098, 0.1239, 0.1795]) -Greedy action tensor([ 1.2248, -0.5973, -0.1583, 0.3408]) tensor([0.5478, 0.0886, 0.1374, 0.2263]) -Greedy action tensor([ 0.9879, -0.0649, -0.2855, -0.0217]) tensor([0.5017, 0.1751, 0.1404, 0.1828]) -Greedy action tensor([ 2.3216, -0.3305, -0.3671, 0.4436]) tensor([0.7744, 0.0546, 0.0526, 0.1184]) -Greedy action tensor([ 1.1149, -0.5459, -0.3325, 0.5223]) tensor([0.5056, 0.0961, 0.1189, 0.2795]) -Greedy action tensor([ 1.7966, -0.5717, -0.5560, 0.6040]) tensor([0.6702, 0.0628, 0.0637, 0.2033]) -Greedy action tensor([ 1.7457, -0.6512, -0.3502, 0.2757]) tensor([0.6926, 0.0630, 0.0852, 0.1592]) -Greedy action tensor([ 1.7044, -0.9553, -0.0259, 0.5296]) tensor([0.6426, 0.0450, 0.1139, 0.1985]) -Greedy action tensor([ 1.2726, -0.3272, -0.5344, 0.3843]) tensor([0.5626, 0.1136, 0.0923, 0.2314]) -Greedy action tensor([ 1.4965, -0.1639, -0.9181, 0.2168]) tensor([0.6420, 0.1220, 0.0574, 0.1786]) -Greedy action tensor([ 1.7498, -0.5875, -0.3854, 0.2214]) tensor([0.6985, 0.0675, 0.0826, 0.1515]) -Greedy action tensor([ 1.5639, 0.4692, -0.0605, -0.4989]) tensor([0.6029, 0.2017, 0.1188, 0.0766]) -Greedy action tensor([ 1.7246, -0.8216, -0.3345, -0.0857]) tensor([0.7302, 0.0572, 0.0931, 0.1195]) -Greedy action tensor([ 2.1311, -1.3745, -0.2053, 0.2210]) tensor([0.7845, 0.0236, 0.0758, 0.1162]) -Greedy action tensor([ 1.3283, -0.4676, -0.4208, 0.4466]) tensor([0.5701, 0.0946, 0.0992, 0.2361]) -Greedy action tensor([ 1.9841, -0.9766, -0.5043, 0.2274]) tensor([0.7649, 0.0396, 0.0635, 0.1320]) -Greedy action tensor([ 1.5898, -0.2903, -0.5355, 0.2655]) tensor([0.6502, 0.0992, 0.0776, 0.1730]) -Greedy action tensor([ 0.6853, -0.3167, 0.0783, 0.2606]) tensor([0.3897, 0.1431, 0.2124, 0.2549]) -Greedy action tensor([ 1.8497, -0.2894, -0.3091, 0.0244]) tensor([0.7172, 0.0845, 0.0828, 0.1156]) -Greedy action tensor([ 1.4765, -0.7212, -0.1433, -0.0225]) tensor([0.6526, 0.0725, 0.1292, 0.1458]) -Greedy action tensor([ 1.5484, -0.6200, -0.1373, 0.2545]) tensor([0.6354, 0.0727, 0.1177, 0.1742]) -Greedy action tensor([ 1.2586, -0.1314, -0.2214, 0.5770]) tensor([0.5044, 0.1256, 0.1148, 0.2551]) -Greedy action tensor([ 1.3176, -0.2303, -0.4061, 0.4293]) tensor([0.5548, 0.1180, 0.0990, 0.2282]) -Greedy action tensor([ 2.0933, -1.1599, -0.0649, 0.5467]) tensor([0.7315, 0.0283, 0.0845, 0.1558]) -Greedy action tensor([ 1.4566e+00, -1.2672e-03, -9.3596e-01, 1.1073e-01]) tensor([0.6311, 0.1469, 0.0577, 0.1643]) -Greedy action tensor([ 2.2800, -1.1360, -0.1367, 0.5075]) tensor([0.7740, 0.0254, 0.0691, 0.1315]) -Greedy action tensor([ 1.3240, -0.5509, -0.1666, 0.2577]) tensor([0.5804, 0.0890, 0.1307, 0.1998]) -Greedy action tensor([ 0.6699, -0.2164, -0.1268, 0.0087]) tensor([0.4203, 0.1732, 0.1895, 0.2170]) -Greedy action tensor([ 1.4404, -0.3273, -0.7935, 0.2734]) tensor([0.6293, 0.1074, 0.0674, 0.1959]) -Greedy action tensor([ 1.5290, 0.2310, -0.4500, 0.5425]) tensor([0.5605, 0.1531, 0.0775, 0.2090]) -Greedy action tensor([ 1.3185, -1.1231, -0.5490, 0.8171]) tensor([0.5414, 0.0471, 0.0836, 0.3279]) -Greedy action tensor([ 1.5222, -0.8838, -0.0743, 0.1800]) tensor([0.6435, 0.0580, 0.1304, 0.1681]) -Greedy action tensor([ 1.9084, -0.5071, -0.7543, 0.8211]) tensor([0.6683, 0.0597, 0.0466, 0.2253]) -Greedy action tensor([ 1.4580, -0.0593, -0.7156, 0.6231]) tensor([0.5659, 0.1241, 0.0644, 0.2456]) -Greedy action tensor([ 1.5290, -0.3357, -0.3571, 0.2776]) tensor([0.6279, 0.0973, 0.0952, 0.1796]) -Greedy action tensor([ 1.8438, -0.9416, -0.2787, 0.4948]) tensor([0.6940, 0.0428, 0.0831, 0.1801]) -Greedy action tensor([ 1.3329, -0.5388, -0.1935, 0.2877]) tensor([0.5804, 0.0893, 0.1261, 0.2041]) -Greedy action tensor([ 1.4156, -0.1603, -0.4094, 0.3147]) tensor([0.5880, 0.1216, 0.0948, 0.1956]) -Greedy action tensor([ 2.2408, -1.3143, -0.1081, 0.5768]) tensor([0.7614, 0.0218, 0.0727, 0.1442]) -Greedy action tensor([ 1.3516, 0.1289, -0.2056, 0.4777]) tensor([0.5202, 0.1532, 0.1096, 0.2171]) -Greedy action tensor([ 1.3274, -0.3469, -0.1112, 0.3365]) tensor([0.5568, 0.1044, 0.1321, 0.2067]) -Greedy action tensor([ 1.4333, -0.8236, -0.1666, 0.6489]) tensor([0.5672, 0.0594, 0.1145, 0.2589]) -Greedy action tensor([ 1.7401, -0.3927, -0.5909, 0.3202]) tensor([0.6861, 0.0813, 0.0667, 0.1659]) -Greedy action tensor([ 1.1214, -0.0796, -0.7060, -0.1164]) tensor([0.5709, 0.1718, 0.0918, 0.1656]) -Greedy action tensor([-0.2457, -1.1802, -0.1344, -1.2959]) tensor([0.3496, 0.1373, 0.3907, 0.1223]) -Greedy action tensor([-0.4270, -0.1360, -0.6937, -0.4166]) tensor([0.2431, 0.3252, 0.1862, 0.2456]) -Greedy action tensor([-0.0050, 0.7104, 0.3476, -0.6878]) tensor([0.2011, 0.4112, 0.2861, 0.1016]) -Greedy action tensor([-0.1581, 0.1636, -0.1076, -0.1530]) tensor([0.2254, 0.3109, 0.2371, 0.2266]) -Greedy action tensor([-0.2028, 0.3995, 0.0994, -0.8947]) tensor([0.2137, 0.3903, 0.2891, 0.1070]) -Greedy action tensor([ 0.7410, -0.6109, 0.0420, -0.2980]) tensor([0.4740, 0.1226, 0.2356, 0.1677]) -Greedy action tensor([ 0.3803, -0.4052, 0.9360, 0.5812]) tensor([0.2262, 0.1031, 0.3942, 0.2765]) -Greedy action tensor([-0.1720, -0.3384, -0.0690, 0.2778]) tensor([0.2211, 0.1872, 0.2451, 0.3466]) -Greedy action tensor([ 0.0802, -0.5435, 0.6794, -0.3519]) tensor([0.2496, 0.1338, 0.4545, 0.1621]) -Greedy action tensor([ 0.7017, -0.5375, 0.4682, 0.0014]) tensor([0.3879, 0.1123, 0.3071, 0.1926]) -Greedy action tensor([-0.3046, -0.8910, -0.5489, -0.1893]) tensor([0.2889, 0.1607, 0.2263, 0.3242]) -Greedy action tensor([ 0.4955, -1.7130, -0.1971, 0.3870]) tensor([0.3988, 0.0438, 0.1995, 0.3578]) -Greedy action tensor([ 0.5117, -1.2712, 0.0492, -0.8569]) tensor([0.4873, 0.0819, 0.3068, 0.1240]) -Greedy action tensor([ 0.4334, 0.4492, 0.2878, -0.0960]) tensor([0.2882, 0.2928, 0.2492, 0.1698]) -Greedy action tensor([-0.5876, -1.7862, -0.5126, 0.9896]) tensor([0.1385, 0.0418, 0.1493, 0.6705]) -Greedy action tensor([ 1.0893, 0.1627, 0.4447, -0.1419]) tensor([0.4519, 0.1789, 0.2372, 0.1319]) -Greedy action tensor([ 0.6879, -1.3993, 1.2325, -0.4448]) tensor([0.3154, 0.0391, 0.5438, 0.1016]) -Greedy action tensor([ 0.6478, -1.4237, 0.4325, -0.4275]) tensor([0.4398, 0.0554, 0.3546, 0.1501]) -Greedy action tensor([ 0.6215, -0.5475, -0.3680, 0.1260]) tensor([0.4364, 0.1356, 0.1622, 0.2659]) -Greedy action tensor([ 0.2866, 1.0726, 0.3464, -0.2426]) tensor([0.2064, 0.4529, 0.2191, 0.1216]) -Greedy action tensor([ 0.3560, -1.1410, 0.1427, 0.4816]) tensor([0.3159, 0.0707, 0.2552, 0.3582]) -Greedy action tensor([ 1.6045, 0.2503, 1.3093, -0.5500]) tensor([0.4720, 0.1219, 0.3514, 0.0547]) -Greedy action tensor([ 0.8351, 0.2863, 0.6042, -0.4953]) tensor([0.3794, 0.2191, 0.3012, 0.1003]) -Greedy action tensor([ 1.1717, -0.4633, -0.4635, -0.1104]) tensor([0.5998, 0.1169, 0.1169, 0.1664]) -Greedy action tensor([-0.0492, -0.1201, -0.5889, -0.1692]) tensor([0.2940, 0.2739, 0.1714, 0.2608]) -Greedy action tensor([ 0.0743, -0.2634, 0.4851, -0.0825]) tensor([0.2453, 0.1750, 0.3699, 0.2097]) -Greedy action tensor([-0.1783, -1.2665, 0.4306, 0.3488]) tensor([0.2054, 0.0692, 0.3776, 0.3479]) -Greedy action tensor([ 0.4058, -0.8706, -0.4820, 0.2172]) tensor([0.3970, 0.1108, 0.1634, 0.3288]) -Greedy action tensor([-0.0456, -0.4423, 0.1156, 0.2371]) tensor([0.2396, 0.1611, 0.2815, 0.3178]) -Greedy action tensor([-0.9660, -0.6897, -0.4528, 0.1838]) tensor([0.1399, 0.1845, 0.2338, 0.4418]) -Greedy action tensor([ 0.2371, -0.8645, 0.5648, -0.2205]) tensor([0.2983, 0.0991, 0.4139, 0.1887]) -Greedy action tensor([ 0.5277, -1.2883, 1.0271, -0.6010]) tensor([0.3191, 0.0519, 0.5258, 0.1032]) -Greedy action tensor([-0.7667, -0.3533, 0.4824, -1.3543]) tensor([0.1526, 0.2307, 0.5320, 0.0848]) -Greedy action tensor([ 1.0833, 0.4274, 0.8233, -0.2901]) tensor([0.3932, 0.2041, 0.3032, 0.0996]) -Greedy action tensor([ 0.2445, -0.2082, 1.3302, 0.3612]) tensor([0.1748, 0.1112, 0.5176, 0.1964]) -Greedy action tensor([ 0.4899, -0.9880, -0.0590, 0.1738]) tensor([0.3945, 0.0900, 0.2279, 0.2876]) -Greedy action tensor([-0.0629, -0.8779, 1.6967, -1.1573]) tensor([0.1318, 0.0583, 0.7657, 0.0441]) -Greedy action tensor([-0.3076, -0.3352, 1.0771, -1.0676]) tensor([0.1554, 0.1512, 0.6207, 0.0727]) -Greedy action tensor([-0.9205, -0.3640, 0.4629, -0.5753]) tensor([0.1228, 0.2142, 0.4897, 0.1734]) -Greedy action tensor([ 0.0643, 0.4243, 0.0898, -0.3685]) tensor([0.2434, 0.3489, 0.2497, 0.1579]) -Greedy action tensor([ 0.9263, -0.1098, -0.8814, -0.4230]) tensor([0.5624, 0.1995, 0.0922, 0.1459]) -Greedy action tensor([ 0.8632, -1.4795, -0.1430, 0.3566]) tensor([0.4844, 0.0465, 0.1771, 0.2919]) -Greedy action tensor([1.0978, 0.2799, 0.1615, 0.1223]) tensor([0.4524, 0.1997, 0.1774, 0.1706]) -Greedy action tensor([ 0.4197, -0.8058, 0.5466, 0.5250]) tensor([0.2825, 0.0829, 0.3207, 0.3138]) -Greedy action tensor([-0.6355, -0.2289, 0.3741, -0.6277]) tensor([0.1599, 0.2401, 0.4388, 0.1611]) -Greedy action tensor([-0.1660, -0.1276, 1.8182, -0.2224]) tensor([0.0975, 0.1013, 0.7091, 0.0921]) -Greedy action tensor([ 0.2403, 0.4898, 0.2557, -0.6678]) tensor([0.2701, 0.3467, 0.2743, 0.1089]) -Greedy action tensor([-1.3619, -0.0830, -0.6222, -0.9266]) tensor([0.1215, 0.4363, 0.2545, 0.1877]) -Greedy action tensor([ 0.1795, 1.0343, 0.3147, -0.4129]) tensor([0.1981, 0.4656, 0.2267, 0.1095]) -Greedy action tensor([ 0.4608, -1.2798, 0.3833, -0.3649]) tensor([0.3939, 0.0691, 0.3645, 0.1725]) -Greedy action tensor([-0.6994, -1.5462, -0.2263, 0.6538]) tensor([0.1448, 0.0621, 0.2325, 0.5606]) -Greedy action tensor([-0.2858, 0.3107, 1.0003, -0.2378]) tensor([0.1336, 0.2426, 0.4836, 0.1402]) -Greedy action tensor([ 0.3911, -0.1457, 0.5041, -0.3508]) tensor([0.3144, 0.1838, 0.3520, 0.1497]) -Greedy action tensor([-3.2663e-05, -8.8573e-01, -1.0651e+00, -1.9766e-01]) tensor([0.3879, 0.1600, 0.1337, 0.3184]) -Greedy action tensor([-0.0779, 0.3436, 0.2201, -0.4213]) tensor([0.2183, 0.3327, 0.2941, 0.1549]) -Greedy action tensor([ 0.0677, -1.0549, -0.5537, 0.1898]) tensor([0.3342, 0.1088, 0.1795, 0.3776]) -Greedy action tensor([-0.3005, -0.2667, 0.9556, -0.1467]) tensor([0.1490, 0.1541, 0.5232, 0.1737]) -Greedy action tensor([ 1.3814, -0.1829, -0.1616, -0.9326]) tensor([0.6571, 0.1375, 0.1404, 0.0650]) -Greedy action tensor([-0.3074, -1.0401, 0.0809, -0.0255]) tensor([0.2336, 0.1123, 0.3444, 0.3097]) -Greedy action tensor([-0.5601, -0.4268, 0.5363, -0.9823]) tensor([0.1727, 0.1973, 0.5169, 0.1132]) -Greedy action tensor([-0.0931, 0.1434, 0.5282, 0.3089]) tensor([0.1778, 0.2253, 0.3310, 0.2658]) -Greedy action tensor([-0.3425, 0.5490, -0.1537, -0.4398]) tensor([0.1801, 0.4391, 0.2175, 0.1634]) -Greedy action tensor([ 0.4335, -0.2084, -0.9721, -0.0151]) tensor([0.4149, 0.2184, 0.1017, 0.2649]) -Greedy action tensor([ 0.3603, -1.3874, -0.5253, 1.5499]) tensor([0.2052, 0.0357, 0.0847, 0.6744]) -Greedy action tensor([ 0.8255, -0.5218, -0.0938, 0.8299]) tensor([0.3755, 0.0976, 0.1498, 0.3771]) -Greedy action tensor([ 0.5087, -0.5205, 0.7813, -0.1148]) tensor([0.3118, 0.1114, 0.4096, 0.1672]) -Greedy action tensor([ 0.1657, -0.8580, 0.1576, 0.3406]) tensor([0.2823, 0.1014, 0.2800, 0.3363]) -Greedy action tensor([-1.5837, 0.4043, -0.7347, -0.5990]) tensor([0.0751, 0.5483, 0.1755, 0.2011]) -Greedy action tensor([ 0.3908, -0.2154, 0.9625, -0.3211]) tensor([0.2627, 0.1433, 0.4652, 0.1289]) -Greedy action tensor([-1.5111, -0.7808, -0.5952, 0.2724]) tensor([0.0868, 0.1801, 0.2168, 0.5163]) -Greedy action tensor([ 0.3293, -0.2437, 0.5991, -0.5859]) tensor([0.3054, 0.1722, 0.4000, 0.1223]) -Greedy action tensor([ 0.7210, -0.8096, -0.2312, -0.3913]) tensor([0.5178, 0.1121, 0.1998, 0.1703]) -Greedy action tensor([ 0.3103, -0.3764, -1.4843, -1.1234]) tensor([0.5241, 0.2638, 0.0871, 0.1250]) -Greedy action tensor([-1.3044, -1.2762, 0.5458, -0.9079]) tensor([0.1013, 0.1041, 0.6441, 0.1505]) -Greedy action tensor([ 0.9132, -0.3951, 0.8598, -0.7615]) tensor([0.4157, 0.1124, 0.3941, 0.0779]) -Greedy action tensor([ 4.0143e-01, 8.9782e-04, 1.1278e+00, -9.2830e-03]) tensor([0.2272, 0.1522, 0.4698, 0.1507]) -Greedy action tensor([ 0.4008, -0.7524, 1.2011, -0.8078]) tensor([0.2604, 0.0822, 0.5797, 0.0778]) -Greedy action tensor([ 0.0581, -0.2008, 1.1504, -0.3495]) tensor([0.1846, 0.1425, 0.5502, 0.1228]) -Greedy action tensor([ 0.5972, 0.3301, -0.2497, 0.6249]) tensor([0.3103, 0.2376, 0.1331, 0.3190]) -Greedy action tensor([ 0.2829, -1.4194, -0.5914, 0.4275]) tensor([0.3630, 0.0662, 0.1514, 0.4194]) -Greedy action tensor([ 2.0243, -1.0373, 1.4037, 0.3528]) tensor([0.5642, 0.0264, 0.3033, 0.1061]) -Greedy action tensor([ 0.6745, -0.4392, -0.0672, -0.1729]) tensor([0.4478, 0.1470, 0.2133, 0.1919]) -Greedy action tensor([ 0.7467, -0.4934, -0.0102, -0.1714]) tensor([0.4634, 0.1341, 0.2174, 0.1850]) -Greedy action tensor([ 5.6480e-01, -4.0522e-04, -7.4842e-02, -2.1685e-01]) tensor([0.3916, 0.2225, 0.2066, 0.1792]) -Greedy action tensor([ 0.8719, -0.4031, -0.1399, -0.2987]) tensor([0.5120, 0.1431, 0.1861, 0.1588]) -Greedy action tensor([ 1.5502, -1.0542, -0.1734, -0.8272]) tensor([0.7434, 0.0550, 0.1326, 0.0690]) -Greedy action tensor([ 0.5311, -0.4038, -0.1995, -0.0527]) tensor([0.4112, 0.1614, 0.1980, 0.2293]) -Greedy action tensor([ 0.9052, -0.3419, -0.0667, -0.1709]) tensor([0.4984, 0.1432, 0.1885, 0.1699]) -Greedy action tensor([ 1.1756, -0.9687, 0.0720, -0.5046]) tensor([0.6116, 0.0716, 0.2028, 0.1140]) -Greedy action tensor([ 0.4522, 0.0144, -0.0243, -0.0382]) tensor([0.3474, 0.2242, 0.2157, 0.2127]) -Greedy action tensor([ 0.7889, -0.5936, -0.0721, -0.3426]) tensor([0.5009, 0.1257, 0.2118, 0.1616]) -Greedy action tensor([ 0.7909, -0.4826, -0.0339, -0.3556]) tensor([0.4912, 0.1375, 0.2153, 0.1561]) -Greedy action tensor([ 0.5737, -0.1431, -0.0468, -0.6274]) tensor([0.4298, 0.2099, 0.2311, 0.1293]) -Greedy action tensor([ 0.5375, -0.4496, 0.0039, -0.4113]) tensor([0.4262, 0.1588, 0.2500, 0.1650]) -Greedy action tensor([ 0.8538, -0.5032, -0.1013, -0.3097]) tensor([0.5116, 0.1317, 0.1969, 0.1598]) -Greedy action tensor([ 0.6237, -0.4025, -0.1142, -0.1867]) tensor([0.4384, 0.1571, 0.2096, 0.1949]) -Greedy action tensor([ 0.6524, -0.3306, 0.0383, -0.5197]) tensor([0.4494, 0.1682, 0.2432, 0.1392]) -Greedy action tensor([ 1.1004, -0.6289, -0.0567, -0.3901]) tensor([0.5824, 0.1033, 0.1831, 0.1312]) -Greedy action tensor([ 0.8616, -0.4134, 0.1158, -0.5107]) tensor([0.4982, 0.1392, 0.2363, 0.1263]) -Greedy action tensor([ 0.7419, -0.5055, -0.0068, -0.7607]) tensor([0.5043, 0.1449, 0.2385, 0.1122]) -Greedy action tensor([ 0.5114, -0.1429, -0.0211, -0.1659]) tensor([0.3824, 0.1988, 0.2245, 0.1943]) -Greedy action tensor([ 0.1890, 0.2093, -0.2415, -0.2108]) tensor([0.2993, 0.3054, 0.1946, 0.2007]) -Greedy action tensor([ 0.7628, -0.7797, -0.0774, -0.3352]) tensor([0.5053, 0.1081, 0.2181, 0.1685]) -Greedy action tensor([ 0.2367, 0.5203, -0.1475, -0.3578]) tensor([0.2808, 0.3729, 0.1912, 0.1550]) -Greedy action tensor([ 0.5084, -0.1079, -0.1676, -0.0504]) tensor([0.3816, 0.2060, 0.1941, 0.2182]) -Greedy action tensor([ 0.8015, -0.2513, 0.0203, -0.4051]) tensor([0.4748, 0.1657, 0.2174, 0.1421]) -Greedy action tensor([ 0.8807, -0.6169, -0.0713, -0.4861]) tensor([0.5363, 0.1200, 0.2070, 0.1367]) -Greedy action tensor([ 0.6281, -0.4955, 0.1249, -0.3599]) tensor([0.4344, 0.1412, 0.2626, 0.1617]) -Greedy action tensor([ 0.7824, -0.3798, 0.0413, -0.3045]) tensor([0.4702, 0.1471, 0.2241, 0.1586]) -Greedy action tensor([ 0.5938, -0.3086, -0.0659, -0.1544]) tensor([0.4174, 0.1693, 0.2158, 0.1975]) -Greedy action tensor([ 0.9271, -0.6059, -0.0635, -0.4783]) tensor([0.5457, 0.1178, 0.2026, 0.1338]) -Greedy action tensor([ 0.8766, -0.8963, 0.1754, -0.3784]) tensor([0.5126, 0.0871, 0.2542, 0.1461]) -Greedy action tensor([ 0.4844, -0.3161, 0.0164, -0.3256]) tensor([0.3968, 0.1782, 0.2485, 0.1765]) -Greedy action tensor([ 0.7891, -0.2104, -0.2145, -0.2493]) tensor([0.4788, 0.1762, 0.1755, 0.1695]) -Greedy action tensor([ 1.1239, -0.6868, 0.0092, -0.4463]) tensor([0.5884, 0.0962, 0.1930, 0.1224]) -Greedy action tensor([ 0.6575, -0.3906, -0.0020, -0.1822]) tensor([0.4349, 0.1525, 0.2249, 0.1878]) -Greedy action tensor([ 0.5420, -0.2372, -0.0728, -0.0529]) tensor([0.3920, 0.1798, 0.2120, 0.2162]) -Greedy action tensor([ 1.0579, -0.8829, 0.0332, -0.7179]) tensor([0.5981, 0.0859, 0.2147, 0.1013]) -Greedy action tensor([ 1.0096, -1.0077, 0.1294, -0.5644]) tensor([0.5698, 0.0758, 0.2363, 0.1181]) -Greedy action tensor([ 0.5799, -0.6824, -0.0240, -0.0951]) tensor([0.4276, 0.1210, 0.2337, 0.2177]) -Greedy action tensor([ 0.6722, -0.4570, 0.0550, -0.3437]) tensor([0.4495, 0.1453, 0.2425, 0.1627]) -Greedy action tensor([ 0.8123, -0.6689, 0.0674, -0.2827]) tensor([0.4910, 0.1116, 0.2331, 0.1643]) -Greedy action tensor([ 0.9676, -0.7676, -0.0105, -0.3515]) tensor([0.5495, 0.0969, 0.2066, 0.1469]) -Greedy action tensor([ 0.5023, -0.1307, 0.0100, -0.3639]) tensor([0.3902, 0.2072, 0.2385, 0.1641]) -Greedy action tensor([ 1.0716, -0.8702, -0.0397, -0.5405]) tensor([0.5981, 0.0858, 0.1968, 0.1193]) -Greedy action tensor([ 0.8792, -0.7881, 0.1110, -0.3263]) tensor([0.5123, 0.0967, 0.2376, 0.1534]) -Greedy action tensor([ 0.7527, -0.5822, -0.0700, -0.5255]) tensor([0.5048, 0.1329, 0.2217, 0.1406]) -Greedy action tensor([ 1.1172, -0.7546, -0.1261, -0.5493]) tensor([0.6131, 0.0943, 0.1768, 0.1158]) -Greedy action tensor([ 0.8893, -0.6488, 0.1295, -0.5420]) tensor([0.5204, 0.1118, 0.2434, 0.1244]) -Greedy action tensor([ 1.2384, -0.8804, 0.0605, -0.7508]) tensor([0.6390, 0.0768, 0.1968, 0.0874]) -Greedy action tensor([ 0.6393, -0.4247, 0.0766, -0.3435]) tensor([0.4369, 0.1508, 0.2489, 0.1635]) -Greedy action tensor([ 0.4300, -0.0862, 0.0303, -0.0154]) tensor([0.3439, 0.2052, 0.2306, 0.2203]) -Greedy action tensor([ 0.6107, -0.1236, -0.1905, -0.2227]) tensor([0.4232, 0.2030, 0.1899, 0.1839]) -Greedy action tensor([ 0.8386, -0.7166, 0.0902, -0.4815]) tensor([0.5125, 0.1082, 0.2425, 0.1369]) -Greedy action tensor([ 0.5065, -0.3175, -0.0871, -0.3357]) tensor([0.4129, 0.1811, 0.2281, 0.1779]) -Greedy action tensor([ 0.8684, -0.5995, 0.0049, -0.3425]) tensor([0.5128, 0.1182, 0.2162, 0.1528]) -Greedy action tensor([ 1.2730, -0.6313, -0.0680, -0.5204]) tensor([0.6342, 0.0944, 0.1659, 0.1055]) -Greedy action tensor([ 1.1719, -0.5539, -0.0130, -0.5379]) tensor([0.6007, 0.1069, 0.1837, 0.1087]) -Greedy action tensor([ 1.0895, -1.0932, 0.0799, -0.3871]) tensor([0.5863, 0.0661, 0.2136, 0.1339]) -Greedy action tensor([ 1.0799, -0.6928, -0.0925, -0.3061]) tensor([0.5782, 0.0982, 0.1790, 0.1446]) -Greedy action tensor([ 1.2388, -0.4964, -0.1750, -0.3113]) tensor([0.6128, 0.1081, 0.1491, 0.1301]) -Greedy action tensor([ 0.5596, -0.2408, -0.0368, -0.1536]) tensor([0.4016, 0.1804, 0.2212, 0.1968]) -Greedy action tensor([0.5958, 0.0437, 0.0079, 0.0476]) tensor([0.3691, 0.2125, 0.2050, 0.2133]) -Greedy action tensor([ 0.6459, -0.0078, -0.0755, -0.3954]) tensor([0.4239, 0.2205, 0.2060, 0.1496]) -Greedy action tensor([ 0.5837, -0.2991, -0.0114, -0.2698]) tensor([0.4182, 0.1730, 0.2306, 0.1781]) -Greedy action tensor([ 0.6784, -0.4656, -0.0347, -0.4984]) tensor([0.4724, 0.1505, 0.2315, 0.1456]) -Greedy action tensor([ 0.3089, -0.0131, -0.0845, -0.3190]) tensor([0.3409, 0.2471, 0.2301, 0.1820]) -Greedy action tensor([ 0.6615, -0.7057, 0.1596, -0.6050]) tensor([0.4668, 0.1190, 0.2826, 0.1316]) -Greedy action tensor([ 0.5341, -0.1471, 0.0010, -0.0522]) tensor([0.3775, 0.1910, 0.2215, 0.2100]) -Greedy action tensor([ 1.2274, -0.3882, -0.0857, -0.4051]) tensor([0.6012, 0.1195, 0.1617, 0.1175]) -Greedy action tensor([ 0.5873, -0.6811, -0.0339, -0.1483]) tensor([0.4352, 0.1224, 0.2338, 0.2086]) -Greedy action tensor([ 0.8579, -0.5830, 0.0200, -0.3787]) tensor([0.5103, 0.1208, 0.2208, 0.1482]) -Greedy action tensor([ 0.3254, 0.0464, -0.0601, -0.1839]) tensor([0.3292, 0.2491, 0.2239, 0.1978]) -Greedy action tensor([ 0.8533, -0.0348, 0.1335, -0.4268]) tensor([0.4595, 0.1891, 0.2237, 0.1277]) -Greedy action tensor([ 0.4764, -0.3549, -0.0242, -0.2272]) tensor([0.3943, 0.1717, 0.2390, 0.1951]) -Greedy action tensor([ 0.4746, -0.3242, 0.0402, -0.4006]) tensor([0.3977, 0.1789, 0.2576, 0.1658]) -Greedy action tensor([ 1.1127, -0.6258, -0.1078, -0.2574]) tensor([0.5797, 0.1019, 0.1711, 0.1473]) -Greedy action tensor([ 0.6708, -0.5385, 0.0659, -0.3875]) tensor([0.4563, 0.1362, 0.2492, 0.1584]) -Greedy action tensor([ 0.2448, 0.1550, -0.2226, 0.0564]) tensor([0.2968, 0.2713, 0.1860, 0.2459]) -Greedy action tensor([ 0.1441, 0.2555, -0.1131, -0.2676]) tensor([0.2814, 0.3146, 0.2176, 0.1864]) -Greedy action tensor([ 0.6763, -0.5313, -0.0045, -0.4952]) tensor([0.4728, 0.1413, 0.2393, 0.1465]) -Greedy action tensor([ 0.8556, -0.4561, 0.0017, -0.7269]) tensor([0.5262, 0.1417, 0.2240, 0.1081]) -Greedy action tensor([ 1.5066, -0.3246, -0.3762, 0.2704]) tensor([0.6239, 0.1000, 0.0949, 0.1812]) -Greedy action tensor([ 1.6406, -0.5367, -0.4296, 0.5588]) tensor([0.6335, 0.0718, 0.0799, 0.2148]) -Greedy action tensor([ 1.6027, 0.5565, -0.2839, 0.2992]) tensor([0.5636, 0.1980, 0.0854, 0.1530]) -Greedy action tensor([ 1.6005, 0.0634, -0.1410, -0.0193]) tensor([0.6296, 0.1354, 0.1104, 0.1246]) -Greedy action tensor([ 2.3831, -0.8529, -0.7518, 0.8330]) tensor([0.7722, 0.0304, 0.0336, 0.1639]) -Greedy action tensor([ 1.8500, -0.6189, -0.1768, 0.0762]) tensor([0.7214, 0.0611, 0.0950, 0.1224]) -Greedy action tensor([ 1.5507, -0.3885, -0.2713, 0.1900]) tensor([0.6402, 0.0921, 0.1035, 0.1642]) -Greedy action tensor([ 1.5604, -0.5565, -0.1797, 0.3028]) tensor([0.6328, 0.0762, 0.1111, 0.1799]) -Greedy action tensor([ 1.3090, -0.3064, -0.4664, 0.5755]) tensor([0.5410, 0.1076, 0.0917, 0.2598]) -Greedy action tensor([ 1.3513, 0.0864, -0.3146, -0.4036]) tensor([0.6082, 0.1717, 0.1150, 0.1052]) -Greedy action tensor([ 2.2569, -0.9888, -0.1363, 0.3525]) tensor([0.7817, 0.0304, 0.0714, 0.1164]) -Greedy action tensor([ 1.5775, 0.3057, -0.8347, 0.6102]) tensor([0.5714, 0.1602, 0.0512, 0.2172]) -Greedy action tensor([ 1.1104, 0.0332, -0.3642, -0.2174]) tensor([0.5451, 0.1856, 0.1248, 0.1445]) -Greedy action tensor([ 1.5706, -0.7318, -0.7006, 0.2920]) tensor([0.6749, 0.0675, 0.0696, 0.1879]) -Greedy action tensor([ 2.6263, -1.8440, -0.4140, 0.6386]) tensor([0.8359, 0.0096, 0.0400, 0.1145]) -Greedy action tensor([ 2.0887, -1.0748, -0.6574, 0.3297]) tensor([0.7821, 0.0331, 0.0502, 0.1347]) -Greedy action tensor([ 1.3141, -0.4813, -0.0749, 0.2176]) tensor([0.5716, 0.0949, 0.1425, 0.1909]) -Greedy action tensor([ 1.7188, -0.6254, -0.2255, 0.1578]) tensor([0.6902, 0.0662, 0.0988, 0.1449]) -Greedy action tensor([ 1.0838, -0.1652, 0.1010, -0.1604]) tensor([0.5130, 0.1471, 0.1920, 0.1478]) -Greedy action tensor([ 1.2779, -0.4990, -0.2388, 0.3887]) tensor([0.5557, 0.0940, 0.1219, 0.2284]) -Greedy action tensor([ 1.3458, -0.2831, -0.6875, 0.4317]) tensor([0.5787, 0.1135, 0.0758, 0.2320]) -Greedy action tensor([ 1.4195, -0.6691, 0.0527, 0.4210]) tensor([0.5723, 0.0709, 0.1459, 0.2109]) -Greedy action tensor([ 1.2586, -0.1684, -0.3743, 0.2053]) tensor([0.5605, 0.1345, 0.1095, 0.1955]) -Greedy action tensor([ 1.4085, -0.2799, -0.0848, -0.1338]) tensor([0.6160, 0.1138, 0.1384, 0.1318]) -Greedy action tensor([ 1.1362, -0.1944, -0.5998, 0.1136]) tensor([0.5555, 0.1468, 0.0979, 0.1998]) -Greedy action tensor([ 2.0165, -1.0273, -0.4677, 0.2275]) tensor([0.7703, 0.0367, 0.0642, 0.1287]) -Greedy action tensor([ 1.1856, -0.0759, -0.6380, 0.1548]) tensor([0.5551, 0.1572, 0.0896, 0.1980]) -Greedy action tensor([ 1.9343, -0.7371, -0.3179, 0.4943]) tensor([0.7086, 0.0490, 0.0745, 0.1679]) -Greedy action tensor([ 1.9279, -0.5444, -0.5443, 1.0917]) tensor([0.6242, 0.0527, 0.0527, 0.2705]) -Greedy action tensor([ 1.6523, -0.5684, -0.2538, 0.3296]) tensor([0.6563, 0.0712, 0.0976, 0.1749]) -Greedy action tensor([ 1.3500, -0.3829, -0.5420, 0.2522]) tensor([0.6020, 0.1064, 0.0908, 0.2008]) -Greedy action tensor([ 1.1411, -0.2026, -0.0641, 0.2893]) tensor([0.5032, 0.1313, 0.1508, 0.2147]) -Greedy action tensor([ 1.0010, -0.7341, -0.0822, 0.3103]) tensor([0.4960, 0.0875, 0.1679, 0.2486]) -Greedy action tensor([ 1.5126, -0.5861, -0.2152, -0.0237]) tensor([0.6599, 0.0809, 0.1172, 0.1420]) -Greedy action tensor([ 1.3819, -0.2283, -0.3302, 0.4950]) tensor([0.5580, 0.1115, 0.1007, 0.2298]) -Greedy action tensor([ 2.0747, -0.2559, -0.6017, 0.2732]) tensor([0.7513, 0.0731, 0.0517, 0.1240]) -Greedy action tensor([ 1.1467, -0.5151, -0.0611, 0.1823]) tensor([0.5348, 0.1015, 0.1598, 0.2039]) -Greedy action tensor([ 1.3022, -0.2205, -0.2779, 0.5337]) tensor([0.5297, 0.1155, 0.1091, 0.2456]) -Greedy action tensor([ 1.3757, -0.2773, -0.8124, 0.2199]) tensor([0.6179, 0.1183, 0.0693, 0.1945]) -Greedy action tensor([ 1.8644, -0.1563, -0.0920, 0.0110]) tensor([0.6990, 0.0927, 0.0988, 0.1095]) -Greedy action tensor([ 1.5265, -0.9109, -0.2860, 0.1421]) tensor([0.6662, 0.0582, 0.1087, 0.1669]) -Greedy action tensor([ 1.9662, 0.4410, -0.3831, 0.4072]) tensor([0.6564, 0.1428, 0.0626, 0.1381]) -Greedy action tensor([ 1.7559, -0.0324, -0.3793, 0.4345]) tensor([0.6442, 0.1077, 0.0762, 0.1719]) -Greedy action tensor([ 2.0173, -0.7455, -0.4909, 0.6499]) tensor([0.7146, 0.0451, 0.0582, 0.1821]) -Greedy action tensor([ 1.3120, -0.3795, -0.2612, 0.2118]) tensor([0.5799, 0.1068, 0.1203, 0.1930]) -Greedy action tensor([ 1.4904, 0.3678, -0.0738, -0.1106]) tensor([0.5759, 0.1874, 0.1205, 0.1162]) -Greedy action tensor([ 1.5040, -0.4773, -0.2873, 0.2146]) tensor([0.6329, 0.0873, 0.1055, 0.1743]) -Greedy action tensor([ 1.8209, -0.8101, -0.3965, 0.4374]) tensor([0.6985, 0.0503, 0.0761, 0.1751]) -Greedy action tensor([ 1.3288, -0.3683, -0.6695, 0.4705]) tensor([0.5738, 0.1051, 0.0778, 0.2432]) -Greedy action tensor([ 1.9172, -0.5825, -0.5019, 0.2684]) tensor([0.7335, 0.0602, 0.0653, 0.1410]) -Greedy action tensor([ 1.7253, -0.6228, -0.5672, 0.6117]) tensor([0.6558, 0.0627, 0.0662, 0.2153]) -Greedy action tensor([ 2.0147, -0.6854, -0.7292, 0.6350]) tensor([0.7230, 0.0486, 0.0465, 0.1819]) -Greedy action tensor([ 1.0927, -0.1952, -0.1561, 0.2150]) tensor([0.5054, 0.1394, 0.1450, 0.2101]) -Greedy action tensor([ 2.5789, -1.4918, 0.2330, 1.1795]) tensor([0.7355, 0.0126, 0.0704, 0.1815]) -Greedy action tensor([ 1.5087, -0.3913, -0.8240, 0.5116]) tensor([0.6190, 0.0926, 0.0601, 0.2284]) -Greedy action tensor([ 1.7682, 0.3457, -0.3002, 0.1389]) tensor([0.6396, 0.1542, 0.0808, 0.1254]) -Greedy action tensor([ 1.2860, -0.6729, -0.1071, 0.4729]) tensor([0.5456, 0.0769, 0.1355, 0.2420]) -Greedy action tensor([ 2.0251, -0.2154, 0.1830, 0.5358]) tensor([0.6709, 0.0714, 0.1063, 0.1513]) -Greedy action tensor([ 2.0822, -0.6079, -0.5314, 0.5177]) tensor([0.7405, 0.0503, 0.0543, 0.1549]) -Greedy action tensor([ 1.0787, -0.4469, -0.2998, 0.5316]) tensor([0.4883, 0.1062, 0.1230, 0.2825]) -Greedy action tensor([ 1.3409, -0.4059, -0.0915, 0.5163]) tensor([0.5401, 0.0942, 0.1289, 0.2368]) -Greedy action tensor([ 1.9578, -0.3255, -0.3382, 0.5301]) tensor([0.6933, 0.0707, 0.0698, 0.1663]) -Greedy action tensor([ 1.3031, -0.2416, -0.3196, 0.5291]) tensor([0.5342, 0.1140, 0.1054, 0.2464]) -Greedy action tensor([ 1.4002, -0.5887, -0.4762, 0.3530]) tensor([0.6094, 0.0834, 0.0933, 0.2139]) -Greedy action tensor([ 1.7789, -0.5405, -0.9996, 0.2609]) tensor([0.7248, 0.0713, 0.0450, 0.1588]) -Greedy action tensor([ 1.5454, -0.9193, -0.2506, 0.2112]) tensor([0.6604, 0.0561, 0.1096, 0.1739]) -Greedy action tensor([ 1.2777, -0.6114, -0.4013, 0.3161]) tensor([0.5814, 0.0879, 0.1085, 0.2222]) -Greedy action tensor([ 1.6141, -0.8590, 0.0327, 0.5809]) tensor([0.6076, 0.0512, 0.1250, 0.2162]) -Greedy action tensor([ 2.2017, -1.1868, -0.1970, 0.8589]) tensor([0.7217, 0.0244, 0.0655, 0.1884]) -Greedy action tensor([ 1.8713, 0.0273, -0.8800, 0.4348]) tensor([0.6850, 0.1084, 0.0437, 0.1629]) -Greedy action tensor([ 1.5481, -1.0433, -0.4106, 0.6229]) tensor([0.6202, 0.0465, 0.0875, 0.2459]) -Greedy action tensor([ 1.9466, -1.0985, -0.3297, 0.6374]) tensor([0.7041, 0.0335, 0.0723, 0.1901]) -Greedy action tensor([ 1.3380, -0.5678, -0.1617, 0.4658]) tensor([0.5587, 0.0831, 0.1247, 0.2335]) -Greedy action tensor([ 1.3712, 0.3649, -0.2648, -0.4573]) tensor([0.5811, 0.2124, 0.1132, 0.0934]) -Greedy action tensor([ 1.3971, -0.7496, -0.2841, 0.3426]) tensor([0.6055, 0.0708, 0.1127, 0.2110]) -Greedy action tensor([ 1.2603, -0.0342, -0.4852, -0.3001]) tensor([0.6029, 0.1652, 0.1052, 0.1266]) -Greedy action tensor([ 1.3187, 0.0643, -0.8896, 0.4543]) tensor([0.5505, 0.1570, 0.0605, 0.2319]) -Greedy action tensor([ 1.0551, -0.5462, -0.4846, 0.2257]) tensor([0.5398, 0.1089, 0.1158, 0.2355]) -Greedy action tensor([ 2.0317, -0.6039, -0.3559, 0.4323]) tensor([0.7323, 0.0525, 0.0673, 0.1479]) -Greedy action tensor([ 1.9560, -0.6907, -0.5217, 0.6272]) tensor([0.7044, 0.0499, 0.0591, 0.1865]) -Greedy action tensor([ 1.2137, -0.6011, -0.5239, 0.5155]) tensor([0.5446, 0.0887, 0.0958, 0.2709]) -Greedy action tensor([ 0.9574, -0.7834, -0.7611, 0.7068]) tensor([0.4688, 0.0822, 0.0841, 0.3649]) -Greedy action tensor([ 0.7146, -0.5129, 0.7091, -0.8840]) tensor([0.4016, 0.1177, 0.3995, 0.0812]) -Greedy action tensor([-0.0062, 0.0329, 0.0124, -0.2911]) tensor([0.2624, 0.2729, 0.2674, 0.1974]) -Greedy action tensor([ 0.3180, -0.8156, -0.8361, -0.2675]) tensor([0.4558, 0.1467, 0.1437, 0.2538]) -Greedy action tensor([ 0.4042, 0.2074, -0.2792, -0.3848]) tensor([0.3596, 0.2954, 0.1816, 0.1634]) -Greedy action tensor([ 0.4190, -0.7073, -0.8651, 0.5801]) tensor([0.3602, 0.1168, 0.0998, 0.4232]) -Greedy action tensor([ 0.6188, -0.1313, 0.4392, -0.0943]) tensor([0.3574, 0.1688, 0.2986, 0.1752]) -Greedy action tensor([ 1.1230, -0.4455, -0.0940, 0.8357]) tensor([0.4435, 0.0924, 0.1313, 0.3328]) -Greedy action tensor([-0.7749, 0.1362, 0.8423, -0.2323]) tensor([0.0976, 0.2427, 0.4918, 0.1679]) -Greedy action tensor([-0.3643, -0.8485, 0.1814, -0.5323]) tensor([0.2388, 0.1472, 0.4121, 0.2019]) -Greedy action tensor([-0.5975, -0.2741, -0.9746, -0.2551]) tensor([0.2234, 0.3087, 0.1532, 0.3146]) -Greedy action tensor([ 0.7987, 0.1185, 0.7820, -0.4893]) tensor([0.3616, 0.1831, 0.3556, 0.0997]) -Greedy action tensor([ 0.8788, -1.2291, -0.0400, -0.4260]) tensor([0.5581, 0.0678, 0.2227, 0.1514]) -Greedy action tensor([-0.7265, -0.7071, -0.6976, -1.2228]) tensor([0.2734, 0.2787, 0.2814, 0.1664]) -Greedy action tensor([-0.2544, -2.0843, -0.4557, 0.1875]) tensor([0.2830, 0.0454, 0.2314, 0.4402]) -Greedy action tensor([ 1.1046, -0.4291, -0.6414, 0.7238]) tensor([0.4823, 0.1040, 0.0841, 0.3295]) -Greedy action tensor([ 0.7468, -0.8965, -0.1254, 0.2468]) tensor([0.4509, 0.0872, 0.1885, 0.2735]) -Greedy action tensor([-0.3919, -0.7859, 0.6707, -0.4982]) tensor([0.1829, 0.1233, 0.5293, 0.1645]) -Greedy action tensor([ 1.6889, -1.7003, 0.5679, 0.1813]) tensor([0.6324, 0.0213, 0.2062, 0.1401]) -Greedy action tensor([-0.3099, -1.0005, 0.1267, -0.1614]) tensor([0.2376, 0.1191, 0.3677, 0.2756]) -Greedy action tensor([ 0.4168, 0.3562, -0.3720, 0.6432]) tensor([0.2740, 0.2579, 0.1245, 0.3436]) -Greedy action tensor([-0.1376, -1.6723, -0.7249, -0.2084]) tensor([0.3699, 0.0797, 0.2056, 0.3447]) -Greedy action tensor([-0.0960, -0.7496, 0.7765, -0.8701]) tensor([0.2286, 0.1189, 0.5471, 0.1054]) -Greedy action tensor([ 0.1312, -0.0149, 0.0504, -0.7903]) tensor([0.3140, 0.2713, 0.2897, 0.1250]) -Greedy action tensor([ 1.0338, -0.7008, 1.2181, -1.0789]) tensor([0.4000, 0.0706, 0.4810, 0.0484]) -Greedy action tensor([-0.5727, -1.1794, 0.5745, -0.1564]) tensor([0.1610, 0.0878, 0.5071, 0.2441]) -Greedy action tensor([ 0.0720, -1.4024, -0.5966, -0.5407]) tensor([0.4380, 0.1003, 0.2244, 0.2373]) -Greedy action tensor([ 0.3034, -1.2897, 0.7995, 0.1375]) tensor([0.2708, 0.0551, 0.4447, 0.2294]) -Greedy action tensor([-1.1631, -0.8771, -0.3180, -0.1392]) tensor([0.1343, 0.1788, 0.3128, 0.3740]) -Greedy action tensor([ 0.8421, 0.2820, 0.3996, -0.0793]) tensor([0.3829, 0.2187, 0.2460, 0.1524]) -Greedy action tensor([ 0.8237, -0.1536, 0.7106, -0.2047]) tensor([0.3807, 0.1433, 0.3400, 0.1361]) -Greedy action tensor([-1.0902, -0.4821, 0.6754, -0.5409]) tensor([0.0960, 0.1764, 0.5613, 0.1663]) -Greedy action tensor([-0.2694, -0.3225, 1.1967, -0.7460]) tensor([0.1449, 0.1374, 0.6277, 0.0900]) -Greedy action tensor([ 0.0534, -1.0497, -1.0945, 0.0886]) tensor([0.3724, 0.1236, 0.1182, 0.3858]) -Greedy action tensor([ 0.5504, -1.2870, 0.9401, 1.8208]) tensor([0.1613, 0.0257, 0.2382, 0.5747]) -Greedy action tensor([ 0.2947, -0.3309, 1.4586, -0.4499]) tensor([0.1919, 0.1026, 0.6144, 0.0911]) -Greedy action tensor([ 0.0985, -0.4412, -0.1605, -0.9316]) tensor([0.3688, 0.2150, 0.2846, 0.1316]) -Greedy action tensor([ 0.9966, -1.4315, 1.5726, 0.1954]) tensor([0.3016, 0.0266, 0.5365, 0.1353]) -Greedy action tensor([ 0.1506, -1.2658, 0.0241, 0.4704]) tensor([0.2857, 0.0693, 0.2517, 0.3933]) -Greedy action tensor([ 0.1438, -1.5387, -0.6305, 0.7802]) tensor([0.2828, 0.0526, 0.1304, 0.5343]) -Greedy action tensor([ 0.1451, -0.6668, -0.7193, -0.3091]) tensor([0.4000, 0.1776, 0.1685, 0.2539]) -Greedy action tensor([ 0.4778, 0.3495, 0.1324, -0.2960]) tensor([0.3280, 0.2885, 0.2322, 0.1513]) -Greedy action tensor([-0.6046, -0.7978, -0.5037, 1.1644]) tensor([0.1137, 0.0937, 0.1258, 0.6668]) -Greedy action tensor([ 0.7188, -0.4906, 0.7426, 0.4150]) tensor([0.3267, 0.0975, 0.3346, 0.2412]) -Greedy action tensor([-0.3342, -0.1158, 0.9215, 0.2033]) tensor([0.1339, 0.1666, 0.4702, 0.2293]) -Greedy action tensor([-0.0909, -0.7754, -0.1872, 0.0869]) tensor([0.2772, 0.1398, 0.2518, 0.3312]) -Greedy action tensor([-0.7127, -0.3029, 0.4676, -0.1342]) tensor([0.1325, 0.1997, 0.4314, 0.2364]) -Greedy action tensor([-0.0568, -2.0621, 0.1157, 0.7444]) tensor([0.2197, 0.0296, 0.2611, 0.4896]) -Greedy action tensor([-0.7168, -1.2067, 0.8315, -0.0995]) tensor([0.1224, 0.0750, 0.5757, 0.2269]) -Greedy action tensor([ 0.1293, 0.5200, 0.3149, -0.3500]) tensor([0.2325, 0.3436, 0.2799, 0.1440]) -Greedy action tensor([-1.0451, -0.4786, 0.9818, -0.5820]) tensor([0.0837, 0.1476, 0.6356, 0.1331]) -Greedy action tensor([ 0.0167, -1.6052, 0.0342, 0.3668]) tensor([0.2751, 0.0543, 0.2800, 0.3905]) -Greedy action tensor([ 1.0100, -0.2617, -0.2129, -0.4786]) tensor([0.5554, 0.1557, 0.1635, 0.1254]) -Greedy action tensor([ 0.7983, -1.1639, -0.2150, 0.0891]) tensor([0.5011, 0.0704, 0.1819, 0.2466]) -Greedy action tensor([-0.7638, -1.4587, 0.5151, -0.6573]) tensor([0.1612, 0.0804, 0.5791, 0.1793]) -Greedy action tensor([-0.7030, -1.9373, 0.3245, -0.0587]) tensor([0.1670, 0.0486, 0.4665, 0.3180]) -Greedy action tensor([-0.1538, -1.6606, 0.5348, -0.1557]) tensor([0.2375, 0.0526, 0.4728, 0.2370]) -Greedy action tensor([-0.1261, -0.6395, 0.0992, 0.0469]) tensor([0.2475, 0.1481, 0.3101, 0.2943]) -Greedy action tensor([-0.0534, -0.1755, -0.3828, 0.1402]) tensor([0.2619, 0.2318, 0.1884, 0.3179]) -Greedy action tensor([-0.9836, -0.9119, -0.2907, -0.2122]) tensor([0.1603, 0.1723, 0.3206, 0.3468]) -Greedy action tensor([-0.0018, 0.6062, -0.2009, 0.3189]) tensor([0.1986, 0.3649, 0.1628, 0.2737]) -Greedy action tensor([-1.0335, -0.0795, 0.6923, -0.5691]) tensor([0.0926, 0.2403, 0.5199, 0.1473]) -Greedy action tensor([ 1.4421, 0.0085, 0.6283, -0.0681]) tensor([0.5256, 0.1253, 0.2329, 0.1161]) -Greedy action tensor([-0.3276, -1.1334, -0.1476, -0.5984]) tensor([0.2935, 0.1311, 0.3514, 0.2239]) -Greedy action tensor([-0.8629, 0.2111, -0.3772, -0.2221]) tensor([0.1342, 0.3929, 0.2181, 0.2548]) -Greedy action tensor([-0.1397, -1.6807, 0.7752, 1.2637]) tensor([0.1285, 0.0275, 0.3209, 0.5230]) -Greedy action tensor([ 1.5152, -1.1171, 1.1350, 1.1383]) tensor([0.4096, 0.0295, 0.2800, 0.2810]) -Greedy action tensor([ 0.2961, -0.6799, -0.0066, 0.1658]) tensor([0.3341, 0.1259, 0.2468, 0.2932]) -Greedy action tensor([ 1.7509, -0.6047, 1.4976, 0.3752]) tensor([0.4709, 0.0447, 0.3655, 0.1190]) -Greedy action tensor([ 1.2120, -0.3625, 1.6256, -0.1038]) tensor([0.3347, 0.0693, 0.5062, 0.0898]) -Greedy action tensor([ 0.7047, -0.7488, -0.8519, 0.7500]) tensor([0.4015, 0.0938, 0.0846, 0.4201]) -Greedy action tensor([-0.2016, -1.3689, 0.4703, -0.0701]) tensor([0.2268, 0.0706, 0.4440, 0.2586]) -Greedy action tensor([ 0.1729, -0.3420, -0.0819, 0.0282]) tensor([0.3088, 0.1846, 0.2394, 0.2672]) -Greedy action tensor([ 0.3041, -1.0734, -0.2401, -0.2951]) tensor([0.4199, 0.1059, 0.2436, 0.2306]) -Greedy action tensor([ 0.3761, -0.2248, 0.1626, 0.3415]) tensor([0.3010, 0.1650, 0.2431, 0.2908]) -Greedy action tensor([-0.2216, -0.7121, -0.2311, 0.1616]) tensor([0.2457, 0.1505, 0.2434, 0.3604]) -Greedy action tensor([-0.8624, -0.9299, -0.9521, -0.4669]) tensor([0.2307, 0.2157, 0.2109, 0.3427]) -Greedy action tensor([-0.3675, -0.0118, -1.0259, -0.6875]) tensor([0.2724, 0.3888, 0.1410, 0.1978]) -Greedy action tensor([-0.5928, -1.5533, -0.4174, -0.1949]) tensor([0.2461, 0.0942, 0.2933, 0.3664]) -Greedy action tensor([ 0.5156, 0.3061, 0.4913, -0.0456]) tensor([0.2978, 0.2415, 0.2907, 0.1699]) -Greedy action tensor([ 0.2687, 0.1829, 0.1208, -0.6848]) tensor([0.3159, 0.2899, 0.2725, 0.1217]) -Greedy action tensor([-1.8502, -0.4426, 0.6174, -0.1298]) tensor([0.0445, 0.1819, 0.5249, 0.2487]) -Greedy action tensor([-0.9214, 0.4210, 0.2085, -0.0344]) tensor([0.0966, 0.3698, 0.2990, 0.2345]) -Greedy action tensor([-1.7633, -0.3781, 0.6585, 0.0052]) tensor([0.0452, 0.1806, 0.5092, 0.2650]) -Greedy action tensor([-1.9369, -0.4382, 0.6640, -0.1764]) tensor([0.0404, 0.1807, 0.5441, 0.2348]) -Greedy action tensor([-1.9140, -0.3990, 0.6519, -0.1567]) tensor([0.0411, 0.1868, 0.5342, 0.2380]) -Greedy action tensor([-1.9390, -0.4487, 0.6673, -0.1788]) tensor([0.0403, 0.1790, 0.5463, 0.2344]) -Greedy action tensor([-1.9276, -0.3969, 0.6526, -0.1665]) tensor([0.0406, 0.1876, 0.5357, 0.2362]) -Greedy action tensor([-1.7890, -0.5096, 0.5851, -0.1000]) tensor([0.0482, 0.1732, 0.5176, 0.2609]) -Greedy action tensor([-1.7294, -0.1754, 0.5962, -0.0149]) tensor([0.0465, 0.2198, 0.4756, 0.2581]) -Greedy action tensor([-0.9741, -0.1953, 0.2412, -0.0231]) tensor([0.1094, 0.2384, 0.3689, 0.2832]) -Greedy action tensor([-1.7961, -0.3938, 0.5944, -0.0916]) tensor([0.0466, 0.1892, 0.5083, 0.2560]) -Greedy action tensor([-1.8480, -0.4314, 0.6853, -0.0834]) tensor([0.0424, 0.1750, 0.5346, 0.2479]) -Greedy action tensor([-1.9108, -0.4427, 0.6528, -0.1624]) tensor([0.0415, 0.1804, 0.5394, 0.2387]) -Greedy action tensor([-1.4534, -0.0120, 0.5184, 0.0541]) tensor([0.0591, 0.2497, 0.4244, 0.2668]) -Greedy action tensor([-1.9122, -0.4370, 0.6683, -0.1644]) tensor([0.0411, 0.1798, 0.5430, 0.2361]) -Greedy action tensor([-1.8066, -0.3945, 0.6029, -0.0961]) tensor([0.0459, 0.1886, 0.5113, 0.2542]) -Greedy action tensor([-1.8157, -0.4328, 0.6064, -0.1029]) tensor([0.0459, 0.1829, 0.5170, 0.2543]) -Greedy action tensor([-1.8833, -0.4449, 0.6380, -0.1534]) tensor([0.0429, 0.1809, 0.5341, 0.2421]) -Greedy action tensor([-1.5555, -0.4690, 0.9030, 0.4290]) tensor([0.0436, 0.1293, 0.5098, 0.3173]) -Greedy action tensor([-1.8419, -0.4246, 0.6060, -0.1165]) tensor([0.0448, 0.1850, 0.5185, 0.2517]) -Greedy action tensor([-0.8727, 0.9470, 0.0798, 0.4244]) tensor([0.0745, 0.4597, 0.1931, 0.2726]) -Greedy action tensor([ 0.2022, -0.1265, 0.6972, 0.7855]) tensor([0.1941, 0.1397, 0.3184, 0.3478]) -Greedy action tensor([-1.9202, -0.4120, 0.6538, -0.1676]) tensor([0.0410, 0.1851, 0.5375, 0.2364]) -Greedy action tensor([-1.7660, -0.2392, 0.5502, -0.1110]) tensor([0.0477, 0.2195, 0.4833, 0.2495]) -Greedy action tensor([-1.8986, -0.4445, 0.6494, -0.1530]) tensor([0.0420, 0.1799, 0.5372, 0.2408]) -Greedy action tensor([-1.8847, -0.4361, 0.6382, -0.1531]) tensor([0.0428, 0.1822, 0.5333, 0.2417]) -Greedy action tensor([-1.9312, -0.4359, 0.6615, -0.1730]) tensor([0.0406, 0.1811, 0.5427, 0.2356]) -Greedy action tensor([-1.9473, -0.4538, 0.6661, -0.1828]) tensor([0.0401, 0.1786, 0.5472, 0.2341]) -Greedy action tensor([-1.8305, -0.4503, 0.6196, -0.1180]) tensor([0.0452, 0.1798, 0.5242, 0.2507]) -Greedy action tensor([-1.8919, -0.3275, 0.6281, -0.1521]) tensor([0.0418, 0.1999, 0.5199, 0.2383]) -Greedy action tensor([-1.8624, -0.3573, 0.6094, -0.1530]) tensor([0.0437, 0.1969, 0.5178, 0.2416]) -Greedy action tensor([-1.1354, 0.2535, 0.3733, 0.1685]) tensor([0.0757, 0.3035, 0.3421, 0.2788]) -Greedy action tensor([-1.8770, -0.1409, 0.5924, -0.1104]) tensor([0.0411, 0.2332, 0.4854, 0.2404]) -Greedy action tensor([-1.6785e+00, -2.9530e-01, 5.3662e-01, -6.0815e-04]) tensor([0.0513, 0.2044, 0.4698, 0.2745]) -Greedy action tensor([-1.0737, -0.6616, 0.3008, 0.1390]) tensor([0.1018, 0.1537, 0.4023, 0.3422]) -Greedy action tensor([-0.6186, 0.4880, 0.2663, 0.3485]) tensor([0.1102, 0.3331, 0.2669, 0.2898]) -Greedy action tensor([-1.9349, -0.4447, 0.6647, -0.1748]) tensor([0.0405, 0.1796, 0.5447, 0.2353]) -Greedy action tensor([-1.8601, -0.4609, 0.6337, -0.1317]) tensor([0.0439, 0.1778, 0.5312, 0.2471]) -Greedy action tensor([-1.8399, -0.4357, 0.6134, -0.1272]) tensor([0.0450, 0.1831, 0.5227, 0.2492]) -Greedy action tensor([-1.8896, -0.4577, 0.6360, -0.1458]) tensor([0.0427, 0.1789, 0.5340, 0.2444]) -Greedy action tensor([-1.8084, -0.4681, 0.6183, -0.0214]) tensor([0.0452, 0.1727, 0.5120, 0.2700]) -Greedy action tensor([-0.3396, 0.5155, 0.4912, 0.6018]) tensor([0.1218, 0.2864, 0.2796, 0.3122]) -Greedy action tensor([-1.7583, -0.5034, 0.5733, -0.1182]) tensor([0.0501, 0.1757, 0.5158, 0.2583]) -Greedy action tensor([-1.8237, -0.6820, 1.1793, 0.1499]) tensor([0.0318, 0.0995, 0.6401, 0.2286]) -Greedy action tensor([-1.8351, -0.3452, 0.6067, -0.1395]) tensor([0.0447, 0.1982, 0.5136, 0.2435]) -Greedy action tensor([-1.8192, -0.4569, 0.6572, -0.0873]) tensor([0.0445, 0.1739, 0.5299, 0.2517]) -Greedy action tensor([-1.9279, -0.3626, 0.6424, -0.1974]) tensor([0.0408, 0.1953, 0.5335, 0.2304]) -Greedy action tensor([-1.8831, -0.3495, 0.6272, -0.1455]) tensor([0.0423, 0.1962, 0.5210, 0.2406]) -Greedy action tensor([-1.5067, -0.5071, 0.5608, 0.1355]) tensor([0.0596, 0.1618, 0.4709, 0.3077]) -Greedy action tensor([-1.4325, -0.0490, 0.5433, 0.0486]) tensor([0.0602, 0.2403, 0.4345, 0.2649]) -Greedy action tensor([-1.8966, -0.4508, 0.6460, -0.1584]) tensor([0.0423, 0.1795, 0.5376, 0.2405]) -Greedy action tensor([-1.7993, -0.4643, 0.6031, -0.1099]) tensor([0.0470, 0.1787, 0.5196, 0.2547]) -Greedy action tensor([-1.9034, -0.4319, 0.6605, -0.1405]) tensor([0.0414, 0.1802, 0.5373, 0.2412]) -Greedy action tensor([-1.8984, -0.4053, 0.6444, -0.1572]) tensor([0.0419, 0.1865, 0.5327, 0.2390]) -Greedy action tensor([-1.9388, -0.4380, 0.6629, -0.1777]) tensor([0.0403, 0.1809, 0.5440, 0.2347]) -Greedy action tensor([-1.9383, -0.4417, 0.6649, -0.1772]) tensor([0.0403, 0.1802, 0.5448, 0.2347]) -Greedy action tensor([-1.7811, -0.2860, 0.6250, -0.0630]) tensor([0.0452, 0.2016, 0.5013, 0.2519]) -Greedy action tensor([-1.8620, -0.3861, 0.6227, -0.1310]) tensor([0.0434, 0.1901, 0.5212, 0.2453]) -Greedy action tensor([-1.7298, -0.5137, 0.5632, -0.1083]) tensor([0.0517, 0.1745, 0.5121, 0.2617]) -Greedy action tensor([-1.9116, -0.4373, 0.6544, -0.1638]) tensor([0.0415, 0.1811, 0.5395, 0.2380]) -Greedy action tensor([-1.9302, -0.4339, 0.6511, -0.1894]) tensor([0.0410, 0.1831, 0.5420, 0.2339]) -Greedy action tensor([-1.8754, -0.4399, 0.6434, -0.1147]) tensor([0.0427, 0.1793, 0.5298, 0.2482]) -Greedy action tensor([-1.6046, -0.2540, 0.5740, 0.0711]) tensor([0.0525, 0.2028, 0.4641, 0.2807]) -Greedy action tensor([-0.9988, 0.3587, 0.1445, 0.0769]) tensor([0.0913, 0.3548, 0.2863, 0.2676]) -Greedy action tensor([-1.8306, -0.3851, 0.6346, -0.1028]) tensor([0.0442, 0.1875, 0.5197, 0.2486]) -Greedy action tensor([-1.7300, -0.1698, 0.5339, -0.1281]) tensor([0.0492, 0.2340, 0.4729, 0.2439]) -Greedy action tensor([-0.9037, -0.6561, 0.2453, 0.2342]) tensor([0.1169, 0.1497, 0.3687, 0.3647]) -Greedy action tensor([-1.7130, -0.2249, 0.5628, -0.0498]) tensor([0.0489, 0.2167, 0.4763, 0.2581]) -Greedy action tensor([-1.6323, -0.4941, 0.6090, 0.1575]) tensor([0.0512, 0.1599, 0.4820, 0.3069]) -Greedy action tensor([-1.9021, -0.4344, 0.6484, -0.1541]) tensor([0.0419, 0.1816, 0.5362, 0.2403]) -Greedy action tensor([-1.9046, -0.4510, 0.6512, -0.1612]) tensor([0.0419, 0.1792, 0.5395, 0.2394]) -Greedy action tensor([-1.2594, -0.1098, 0.5940, 0.3568]) tensor([0.0642, 0.2027, 0.4098, 0.3233]) -Greedy action tensor([-1.8579, -0.4513, 0.6259, -0.1366]) tensor([0.0441, 0.1801, 0.5290, 0.2468]) -Greedy action tensor([-1.9320, -0.4267, 0.6574, -0.1727]) tensor([0.0406, 0.1829, 0.5408, 0.2358]) -Greedy action tensor([-1.4311, -0.2485, 0.4386, -0.1064]) tensor([0.0689, 0.2249, 0.4470, 0.2592]) -Greedy action tensor([-1.6448, 0.1425, 0.4429, -0.0551]) tensor([0.0501, 0.2995, 0.4045, 0.2458]) -Greedy action tensor([-1.7836, -0.5105, 0.5839, -0.1299]) tensor([0.0489, 0.1745, 0.5213, 0.2553]) -Greedy action tensor([-1.2734, -0.1858, 0.5262, -0.3828]) tensor([0.0803, 0.2383, 0.4857, 0.1957]) -Greedy action tensor([-1.9414, -0.4379, 0.6633, -0.1781]) tensor([0.0402, 0.1809, 0.5442, 0.2346]) -Greedy action tensor([-1.8704, -0.4221, 0.6260, -0.1353]) tensor([0.0434, 0.1845, 0.5263, 0.2458]) -Greedy action tensor([-1.7070, -0.4484, 0.5497, -0.0716]) tensor([0.0521, 0.1833, 0.4974, 0.2672]) -Greedy action tensor([ 0.0170, 0.2464, -0.1769, -0.5521]) tensor([0.2742, 0.3448, 0.2258, 0.1552]) -Greedy action tensor([ 0.7291, -0.4507, 0.1589, -0.5031]) tensor([0.4620, 0.1420, 0.2612, 0.1347]) -Greedy action tensor([ 0.9343, -0.7203, 0.0373, -0.2767]) tensor([0.5272, 0.1008, 0.2150, 0.1571]) -Greedy action tensor([ 0.3379, 0.0992, -0.0413, -0.1425]) tensor([0.3236, 0.2549, 0.2214, 0.2001]) -Greedy action tensor([ 0.9885, -0.5797, -0.0338, -0.4822]) tensor([0.5562, 0.1159, 0.2001, 0.1278]) -Greedy action tensor([ 1.1404, -0.8067, 0.1802, -0.5430]) tensor([0.5844, 0.0834, 0.2237, 0.1085]) -Greedy action tensor([ 0.7650, -0.2944, -0.1481, -0.3131]) tensor([0.4789, 0.1660, 0.1922, 0.1629]) -Greedy action tensor([ 0.8068, -0.5306, 0.1697, -0.0672]) tensor([0.4528, 0.1189, 0.2394, 0.1889]) -Greedy action tensor([ 0.6422, -0.2425, -0.0022, -0.1164]) tensor([0.4156, 0.1716, 0.2182, 0.1946]) -Greedy action tensor([ 0.3772, -0.0068, -0.0381, -0.0205]) tensor([0.3319, 0.2261, 0.2191, 0.2230]) -Greedy action tensor([ 1.2740, -0.7607, -0.0668, -0.9241]) tensor([0.6652, 0.0869, 0.1740, 0.0738]) -Greedy action tensor([ 1.2138, -0.7763, 0.0826, -0.5546]) tensor([0.6135, 0.0839, 0.1980, 0.1047]) -Greedy action tensor([ 0.4395, 0.0208, -0.1968, -0.3534]) tensor([0.3788, 0.2492, 0.2005, 0.1714]) -Greedy action tensor([ 0.6954, -0.1699, 0.0547, -0.0195]) tensor([0.4103, 0.1727, 0.2162, 0.2007]) -Greedy action tensor([ 0.9097, -0.3910, -0.1776, -0.1505]) tensor([0.5113, 0.1392, 0.1724, 0.1771]) -Greedy action tensor([ 0.8080, -0.7064, 0.0440, -0.1501]) tensor([0.4832, 0.1063, 0.2251, 0.1854]) -Greedy action tensor([ 0.3541, -0.2404, 0.0050, -0.0075]) tensor([0.3386, 0.1868, 0.2388, 0.2358]) -Greedy action tensor([ 0.6508, -0.1970, -0.1747, -0.2338]) tensor([0.4388, 0.1879, 0.1922, 0.1811]) -Greedy action tensor([ 0.8815, -0.4700, 0.0346, -0.1906]) tensor([0.4926, 0.1275, 0.2112, 0.1686]) -Greedy action tensor([ 0.3693, -0.1782, -0.0627, -0.1411]) tensor([0.3536, 0.2045, 0.2296, 0.2123]) -Greedy action tensor([ 0.4433, -0.1493, 0.0167, -0.1656]) tensor([0.3637, 0.2011, 0.2374, 0.1978]) -Greedy action tensor([ 0.5765, -0.1324, -0.0391, -0.3630]) tensor([0.4127, 0.2031, 0.2230, 0.1613]) -Greedy action tensor([ 0.5209, -0.3272, -0.1725, -0.0516]) tensor([0.4013, 0.1718, 0.2006, 0.2263]) -Greedy action tensor([ 0.4094, -0.0755, -0.1389, -0.0414]) tensor([0.3533, 0.2175, 0.2042, 0.2251]) -Greedy action tensor([ 0.5261, 0.0918, -0.1149, -0.5080]) tensor([0.3952, 0.2560, 0.2082, 0.1405]) -Greedy action tensor([ 0.7363, -0.3035, 0.0309, -0.1839]) tensor([0.4452, 0.1574, 0.2199, 0.1774]) -Greedy action tensor([ 0.7133, -0.4026, 0.0449, -0.0985]) tensor([0.4378, 0.1434, 0.2244, 0.1944]) -Greedy action tensor([ 0.5674, -0.2592, -0.1331, -0.2074]) tensor([0.4176, 0.1827, 0.2073, 0.1924]) -Greedy action tensor([ 1.0174, -0.7509, 0.1337, -0.3625]) tensor([0.5448, 0.0930, 0.2252, 0.1371]) -Greedy action tensor([ 0.2979, -0.2899, -0.1271, -0.1488]) tensor([0.3510, 0.1950, 0.2295, 0.2245]) -Greedy action tensor([ 0.4157, 0.0402, -0.0033, -0.1713]) tensor([0.3447, 0.2368, 0.2268, 0.1917]) -Greedy action tensor([ 1.0866, -0.2492, -0.1026, -0.1887]) tensor([0.5415, 0.1424, 0.1649, 0.1513]) -Greedy action tensor([ 0.7303, -0.1079, 0.0298, -0.0205]) tensor([0.4165, 0.1801, 0.2067, 0.1966]) -Greedy action tensor([ 0.8743, -0.4334, 0.2249, -0.3557]) tensor([0.4796, 0.1297, 0.2505, 0.1402]) -Greedy action tensor([ 0.7741, -0.4949, -0.1250, -0.2970]) tensor([0.4925, 0.1384, 0.2004, 0.1687]) -Greedy action tensor([ 0.7448, -0.2822, -0.0288, -0.2321]) tensor([0.4554, 0.1631, 0.2101, 0.1714]) -Greedy action tensor([ 0.7096, -0.3112, -0.1002, -0.2289]) tensor([0.4553, 0.1640, 0.2026, 0.1781]) -Greedy action tensor([ 1.0864, -0.6947, 0.0653, -0.6033]) tensor([0.5837, 0.0983, 0.2102, 0.1077]) -Greedy action tensor([ 0.5739, -0.3432, -0.0185, -0.1671]) tensor([0.4116, 0.1645, 0.2276, 0.1962]) -Greedy action tensor([ 0.3255, 0.1037, -0.0711, -0.3013]) tensor([0.3324, 0.2663, 0.2236, 0.1776]) -Greedy action tensor([ 0.8777, -0.4923, -0.0652, -0.4313]) tensor([0.5226, 0.1328, 0.2035, 0.1411]) -Greedy action tensor([ 0.6662, -0.2654, -0.0408, -0.1879]) tensor([0.4324, 0.1703, 0.2132, 0.1841]) -Greedy action tensor([ 0.6602, -0.7212, -0.0694, -0.2506]) tensor([0.4683, 0.1176, 0.2258, 0.1883]) -Greedy action tensor([ 0.4313, -0.2004, -0.0417, -0.2157]) tensor([0.3734, 0.1985, 0.2326, 0.1955]) -Greedy action tensor([ 0.8752, -0.4265, -0.0197, -0.2545]) tensor([0.4990, 0.1358, 0.2039, 0.1613]) -Greedy action tensor([ 1.0383, -0.4315, -0.0300, -0.2598]) tensor([0.5415, 0.1245, 0.1861, 0.1479]) -Greedy action tensor([ 0.8725, -0.4392, -0.2231, -0.3196]) tensor([0.5243, 0.1412, 0.1753, 0.1592]) -Greedy action tensor([0.2946, 0.1593, 0.1424, 0.3462]) tensor([0.2642, 0.2307, 0.2269, 0.2782]) -Greedy action tensor([ 0.5651, -0.4745, -0.1362, -0.1836]) tensor([0.4306, 0.1522, 0.2135, 0.2036]) -Greedy action tensor([ 0.5464, -0.2780, -0.0795, -0.3291]) tensor([0.4184, 0.1835, 0.2238, 0.1743]) -Greedy action tensor([ 0.9176, -0.6684, 0.2109, -0.3977]) tensor([0.5085, 0.1041, 0.2509, 0.1365]) -Greedy action tensor([ 0.8500, -0.5316, 0.0209, -0.4647]) tensor([0.5112, 0.1284, 0.2231, 0.1373]) -Greedy action tensor([ 1.3972, -0.9785, -0.0527, -0.6440]) tensor([0.6861, 0.0638, 0.1610, 0.0891]) -Greedy action tensor([ 0.8135, -0.6191, 0.0458, -0.5690]) tensor([0.5119, 0.1222, 0.2375, 0.1284]) -Greedy action tensor([ 0.4154, 0.0011, 0.0294, -0.2031]) tensor([0.3473, 0.2295, 0.2361, 0.1871]) -Greedy action tensor([ 0.5154, -0.3893, 0.0326, -0.2279]) tensor([0.4004, 0.1620, 0.2471, 0.1904]) -Greedy action tensor([ 0.4637, -0.3538, -0.1314, -0.0742]) tensor([0.3881, 0.1713, 0.2140, 0.2266]) -Greedy action tensor([ 0.3201, -0.1803, -0.1126, -0.3480]) tensor([0.3613, 0.2191, 0.2344, 0.1852]) -Greedy action tensor([ 0.5777, -0.4811, 0.1738, -0.6274]) tensor([0.4321, 0.1499, 0.2885, 0.1295]) -Greedy action tensor([ 0.7978, -0.0431, 0.0672, -0.1386]) tensor([0.4338, 0.1871, 0.2090, 0.1701]) -Greedy action tensor([ 0.6691, -0.5153, -0.0468, -0.0675]) tensor([0.4399, 0.1346, 0.2150, 0.2106]) -Greedy action tensor([ 1.0015, -0.7375, 0.1131, -0.4165]) tensor([0.5467, 0.0961, 0.2249, 0.1324]) -Greedy action tensor([ 0.5795, -0.0860, -0.0040, -0.1239]) tensor([0.3896, 0.2003, 0.2174, 0.1928]) -Greedy action tensor([ 0.5140, -0.3523, 0.0073, -0.3339]) tensor([0.4079, 0.1715, 0.2458, 0.1747]) -Greedy action tensor([ 0.9064, -0.6917, -0.1095, -0.2193]) tensor([0.5295, 0.1071, 0.1917, 0.1718]) -Greedy action tensor([ 1.1118, -1.1446, 0.0728, -0.4930]) tensor([0.6026, 0.0631, 0.2132, 0.1211]) -Greedy action tensor([ 6.9943e-01, -4.0216e-01, -3.8031e-04, -1.9486e-01]) tensor([0.4468, 0.1485, 0.2219, 0.1827]) -Greedy action tensor([ 0.7567, -0.1434, -0.1974, -0.2032]) tensor([0.4598, 0.1869, 0.1771, 0.1761]) -Greedy action tensor([ 0.6596, -0.4664, -0.1536, -0.5310]) tensor([0.4827, 0.1565, 0.2140, 0.1468]) -Greedy action tensor([ 0.8871, -0.3820, 0.0022, -0.2228]) tensor([0.4942, 0.1389, 0.2040, 0.1629]) -Greedy action tensor([ 0.9463, -0.3649, -0.1090, -0.2892]) tensor([0.5240, 0.1412, 0.1824, 0.1523]) -Greedy action tensor([ 0.6255, -0.1577, 0.0028, -0.0265]) tensor([0.3977, 0.1817, 0.2134, 0.2072]) -Greedy action tensor([ 0.6555, -0.5484, -0.1051, -0.3689]) tensor([0.4703, 0.1411, 0.2198, 0.1688]) -Greedy action tensor([ 0.4419, -0.3889, -0.0797, -0.3440]) tensor([0.4024, 0.1753, 0.2389, 0.1834]) -Greedy action tensor([ 0.7500, 0.3482, -0.1423, 0.2524]) tensor([0.3722, 0.2490, 0.1525, 0.2263]) -Greedy action tensor([ 0.7223, -0.6792, -0.0226, -0.3290]) tensor([0.4830, 0.1189, 0.2293, 0.1688]) -Greedy action tensor([ 1.3330, -0.7662, 0.1090, -0.7145]) tensor([0.6470, 0.0793, 0.1902, 0.0835]) -Greedy action tensor([ 0.6086, -0.3558, -0.1166, -0.1725]) tensor([0.4304, 0.1641, 0.2084, 0.1971]) -Greedy action tensor([ 0.9298, -0.6327, -0.1137, -0.3295]) tensor([0.5418, 0.1136, 0.1908, 0.1538]) -Greedy action tensor([ 0.7921, -0.4558, 0.1660, -0.6841]) tensor([0.4877, 0.1400, 0.2608, 0.1115]) -Greedy action tensor([ 0.7887, -0.4495, -0.1334, -0.4976]) tensor([0.5092, 0.1476, 0.2025, 0.1407]) -Greedy action tensor([ 2.0321, -0.2955, -0.5762, 0.3007]) tensor([0.7417, 0.0723, 0.0546, 0.1313]) -Greedy action tensor([ 1.3674, -0.2176, -0.6850, 0.2036]) tensor([0.6077, 0.1245, 0.0780, 0.1898]) -Greedy action tensor([ 1.4540, -0.2836, -0.5863, 0.2076]) tensor([0.6276, 0.1104, 0.0816, 0.1805]) -Greedy action tensor([ 2.1014, -0.8060, -0.3899, 0.7558]) tensor([0.7154, 0.0391, 0.0592, 0.1863]) -Greedy action tensor([ 1.7526, -0.9362, -0.1537, 0.2787]) tensor([0.6917, 0.0470, 0.1028, 0.1584]) -Greedy action tensor([1.2877, 0.0525, 0.0258, 0.2897]) tensor([0.5148, 0.1497, 0.1457, 0.1898]) -Greedy action tensor([ 2.5413e+00, -1.5519e-03, -3.7882e-01, 2.9324e-01]) tensor([0.8076, 0.0635, 0.0436, 0.0853]) -Greedy action tensor([ 0.6872, 0.0667, 0.0752, -0.1853]) tensor([0.4003, 0.2153, 0.2171, 0.1673]) -Greedy action tensor([ 2.3669, -1.4003, -0.1579, 1.1162]) tensor([0.7197, 0.0166, 0.0576, 0.2061]) -Greedy action tensor([ 1.2046, -0.4222, -0.4471, 0.2345]) tensor([0.5658, 0.1112, 0.1085, 0.2145]) -Greedy action tensor([ 1.7148, -0.4455, -0.8063, 0.3927]) tensor([0.6839, 0.0788, 0.0550, 0.1823]) -Greedy action tensor([ 1.1144, -0.2901, -0.0748, 0.3865]) tensor([0.4919, 0.1208, 0.1498, 0.2375]) -Greedy action tensor([ 1.9571, -0.8114, -0.3998, 0.7762]) tensor([0.6828, 0.0429, 0.0647, 0.2096]) -Greedy action tensor([ 1.3039, -0.8861, 0.1901, 0.3843]) tensor([0.5438, 0.0609, 0.1785, 0.2168]) -Greedy action tensor([ 0.4101, -0.2325, 0.3872, -0.1032]) tensor([0.3224, 0.1696, 0.3151, 0.1930]) -Greedy action tensor([ 1.3932, -0.0677, -0.5270, 0.2446]) tensor([0.5897, 0.1368, 0.0864, 0.1870]) -Greedy action tensor([ 1.0723, -0.1187, -0.3073, 0.3837]) tensor([0.4859, 0.1477, 0.1223, 0.2441]) -Greedy action tensor([ 2.0255, -1.5559, -0.1808, 0.3225]) tensor([0.7575, 0.0211, 0.0834, 0.1380]) -Greedy action tensor([ 2.0164, -1.2294, -0.1947, 0.4934]) tensor([0.7318, 0.0285, 0.0802, 0.1596]) -Greedy action tensor([ 2.2119, -0.2268, 0.2474, 0.2106]) tensor([0.7339, 0.0640, 0.1029, 0.0992]) -Greedy action tensor([ 1.2704, -0.9074, -0.2905, 0.1894]) tensor([0.6015, 0.0681, 0.1263, 0.2041]) -Greedy action tensor([ 2.0173, -0.6998, -0.4108, 0.4107]) tensor([0.7381, 0.0488, 0.0651, 0.1480]) -Greedy action tensor([ 1.7768, -0.3769, -0.5103, 0.0281]) tensor([0.7186, 0.0834, 0.0730, 0.1250]) -Greedy action tensor([ 1.2318, -0.3888, -1.2842, 0.3354]) tensor([0.5929, 0.1173, 0.0479, 0.2419]) -Greedy action tensor([ 1.6497, 0.1384, -0.5244, 0.3227]) tensor([0.6251, 0.1379, 0.0711, 0.1658]) -Greedy action tensor([ 1.1205, -0.1809, -0.6566, 0.2431]) tensor([0.5385, 0.1465, 0.0911, 0.2239]) -Greedy action tensor([ 1.8255, -0.9225, 0.1256, 0.5574]) tensor([0.6544, 0.0419, 0.1196, 0.1841]) -Greedy action tensor([ 2.1764, -1.5290, -0.0083, 0.5323]) tensor([0.7517, 0.0185, 0.0846, 0.1452]) -Greedy action tensor([ 1.5107, -0.4265, -0.3762, 0.2830]) tensor([0.6295, 0.0907, 0.0954, 0.1844]) -Greedy action tensor([ 1.5983, -0.5447, -0.5963, 0.4567]) tensor([0.6460, 0.0758, 0.0720, 0.2063]) -Greedy action tensor([ 1.2854, -0.6839, -0.1889, 0.4131]) tensor([0.5598, 0.0781, 0.1282, 0.2340]) -Greedy action tensor([ 0.8612, -0.4319, -0.7679, 0.4089]) tensor([0.4747, 0.1303, 0.0931, 0.3020]) -Greedy action tensor([ 1.8711, -0.5011, -0.5006, 0.5343]) tensor([0.6900, 0.0644, 0.0644, 0.1813]) -Greedy action tensor([ 1.4666, -0.4207, -0.3984, -0.1131]) tensor([0.6612, 0.1002, 0.1024, 0.1362]) -Greedy action tensor([ 1.6142, -0.6410, -0.2937, 0.0613]) tensor([0.6826, 0.0716, 0.1013, 0.1445]) -Greedy action tensor([ 1.2804, 0.3233, -1.0014, 0.2849]) tensor([0.5389, 0.2069, 0.0550, 0.1992]) -Greedy action tensor([ 1.2481, -0.5038, -0.2036, 0.8364]) tensor([0.4831, 0.0838, 0.1131, 0.3200]) -Greedy action tensor([ 1.4470, -0.6317, -0.4627, 0.0759]) tensor([0.6549, 0.0819, 0.0970, 0.1662]) -Greedy action tensor([ 1.7832, -0.5501, -0.9823, 0.4007]) tensor([0.7088, 0.0687, 0.0446, 0.1779]) -Greedy action tensor([ 1.3192, -0.5926, -0.1421, 0.4761]) tensor([0.5525, 0.0817, 0.1281, 0.2378]) -Greedy action tensor([ 1.6212, -0.5634, -0.3642, 0.3637]) tensor([0.6518, 0.0733, 0.0895, 0.1853]) -Greedy action tensor([ 1.2894, -0.3253, -0.4454, 0.1099]) tensor([0.5943, 0.1182, 0.1048, 0.1827]) -Greedy action tensor([ 9.3872e-01, 5.7615e-02, -5.3031e-02, 1.5505e-05]) tensor([0.4595, 0.1904, 0.1704, 0.1797]) -Greedy action tensor([ 1.3301, -0.2204, -0.4237, 0.0428]) tensor([0.6019, 0.1277, 0.1042, 0.1662]) -Greedy action tensor([ 1.4678, 0.3799, -0.3923, 0.2489]) tensor([0.5593, 0.1884, 0.0870, 0.1653]) -Greedy action tensor([ 1.1960, -0.4815, -0.1230, 0.1960]) tensor([0.5488, 0.1025, 0.1468, 0.2019]) -Greedy action tensor([ 1.4572, -0.6142, -0.6546, 0.1361]) tensor([0.6606, 0.0832, 0.0799, 0.1763]) -Greedy action tensor([ 1.5927, -1.0099, -0.2649, -0.1206]) tensor([0.7090, 0.0525, 0.1106, 0.1278]) -Greedy action tensor([ 1.1278, -0.6708, -0.2279, 0.4248]) tensor([0.5213, 0.0863, 0.1344, 0.2581]) -Greedy action tensor([ 0.8806, 0.0166, -0.4091, 0.7198]) tensor([0.3924, 0.1654, 0.1080, 0.3341]) -Greedy action tensor([ 1.2926, -0.7576, -0.3590, 0.3650]) tensor([0.5828, 0.0750, 0.1117, 0.2305]) -Greedy action tensor([ 1.2562, -0.5354, -0.4409, 0.4016]) tensor([0.5633, 0.0939, 0.1032, 0.2396]) -Greedy action tensor([ 1.5597, -0.3956, -0.4317, 0.3475]) tensor([0.6347, 0.0898, 0.0866, 0.1888]) -Greedy action tensor([ 2.0321, -0.6439, -0.4645, 0.4746]) tensor([0.7343, 0.0505, 0.0605, 0.1547]) -Greedy action tensor([ 1.0208, -0.6284, 0.2383, -0.1929]) tensor([0.5137, 0.0987, 0.2349, 0.1526]) -Greedy action tensor([ 1.7597, -0.7826, -0.3590, 0.3213]) tensor([0.6963, 0.0548, 0.0837, 0.1652]) -Greedy action tensor([ 1.4168, -0.8778, -0.1557, 0.2353]) tensor([0.6192, 0.0624, 0.1285, 0.1900]) -Greedy action tensor([ 1.2126, -0.5059, -0.2087, 0.5013]) tensor([0.5231, 0.0938, 0.1263, 0.2568]) -Greedy action tensor([ 1.5215, -0.7234, -0.3261, 0.5192]) tensor([0.6133, 0.0650, 0.0967, 0.2251]) -Greedy action tensor([ 1.2971, -0.3677, -0.3595, 0.1316]) tensor([0.5911, 0.1118, 0.1128, 0.1843]) -Greedy action tensor([ 1.4922, 0.1996, -0.9016, 0.4786]) tensor([0.5785, 0.1588, 0.0528, 0.2099]) -Greedy action tensor([ 0.9592, -0.1491, -0.0030, -0.0484]) tensor([0.4814, 0.1589, 0.1839, 0.1758]) -Greedy action tensor([ 0.9412, -0.0143, -0.2008, 0.2519]) tensor([0.4534, 0.1744, 0.1447, 0.2276]) -Greedy action tensor([ 1.3535, -0.2387, -0.6556, 0.3022]) tensor([0.5928, 0.1206, 0.0795, 0.2071]) -Greedy action tensor([ 1.8039, 0.2269, -0.3763, 0.1976]) tensor([0.6578, 0.1359, 0.0743, 0.1320]) -Greedy action tensor([ 1.2231, -0.5345, 0.1509, -0.0558]) tensor([0.5577, 0.0962, 0.1909, 0.1552]) -Greedy action tensor([ 1.4495, -0.5349, -0.6013, 0.2741]) tensor([0.6350, 0.0873, 0.0817, 0.1960]) -Greedy action tensor([ 1.7792, -0.8512, -0.3253, 0.5549]) tensor([0.6721, 0.0484, 0.0819, 0.1976]) -Greedy action tensor([ 1.2708, 0.0496, -0.3435, 0.0749]) tensor([0.5567, 0.1642, 0.1108, 0.1684]) -Greedy action tensor([ 1.5952, -0.5270, -0.8692, 0.1466]) tensor([0.6946, 0.0832, 0.0591, 0.1631]) -Greedy action tensor([ 1.6862, -0.6850, -0.2067, 0.3080]) tensor([0.6684, 0.0624, 0.1007, 0.1685]) -Greedy action tensor([ 1.1595, -0.2885, -1.0567, 0.2505]) tensor([0.5724, 0.1345, 0.0624, 0.2306]) -Greedy action tensor([ 1.1637, -0.1866, -1.0238, 0.4721]) tensor([0.5341, 0.1384, 0.0599, 0.2675]) -Greedy action tensor([ 2.0079, 0.4528, -0.0504, 0.2917]) tensor([0.6585, 0.1391, 0.0841, 0.1184]) -Greedy action tensor([ 1.5117, 0.1786, -0.0336, -0.0212]) tensor([0.5907, 0.1558, 0.1260, 0.1275]) -Greedy action tensor([ 1.9153, -0.4460, -0.2206, 0.4092]) tensor([0.6972, 0.0657, 0.0824, 0.1546]) -Greedy action tensor([ 1.5968, -0.2023, -0.9379, 0.4477]) tensor([0.6403, 0.1059, 0.0508, 0.2029]) -Greedy action tensor([ 1.1679, -0.4322, -0.1557, -0.0504]) tensor([0.5670, 0.1145, 0.1509, 0.1677]) -Greedy action tensor([ 1.1391, -0.5433, -0.3908, 0.2488]) tensor([0.5516, 0.1026, 0.1194, 0.2264]) -Greedy action tensor([ 1.7622, -0.2762, -0.2724, 0.2386]) tensor([0.6762, 0.0881, 0.0884, 0.1474]) -Greedy action tensor([ 1.5159, -0.6295, -0.1436, 0.4427]) tensor([0.6064, 0.0710, 0.1153, 0.2073]) -Greedy action tensor([-0.8593, 0.1630, 0.4148, -0.3633]) tensor([0.1112, 0.3089, 0.3974, 0.1825]) -Greedy action tensor([ 0.9043, -0.5998, 0.9016, 0.8891]) tensor([0.3121, 0.0693, 0.3112, 0.3074]) -Greedy action tensor([-1.0172, -1.0648, 0.6405, -0.7939]) tensor([0.1183, 0.1128, 0.6209, 0.1479]) -Greedy action tensor([ 0.3626, 0.1917, 0.8343, -0.3332]) tensor([0.2535, 0.2137, 0.4063, 0.1264]) -Greedy action tensor([ 1.0330, -1.0342, 0.2232, 1.5863]) tensor([0.3021, 0.0382, 0.1344, 0.5253]) -Greedy action tensor([ 0.6144, -0.0700, 0.3643, -0.8276]) tensor([0.3969, 0.2002, 0.3091, 0.0938]) -Greedy action tensor([-0.3516, -1.2090, 1.1471, -0.3838]) tensor([0.1456, 0.0618, 0.6517, 0.1410]) -Greedy action tensor([ 0.8737, -0.3040, -0.1912, 0.5174]) tensor([0.4250, 0.1309, 0.1465, 0.2976]) -Greedy action tensor([ 1.4732, -1.1647, 0.5149, 0.6633]) tensor([0.5263, 0.0376, 0.2019, 0.2342]) -Greedy action tensor([-0.3993, -1.1362, 0.9712, 0.3535]) tensor([0.1327, 0.0635, 0.5223, 0.2816]) -Greedy action tensor([-0.0018, -0.8425, -0.3472, -0.0296]) tensor([0.3213, 0.1386, 0.2275, 0.3125]) -Greedy action tensor([ 0.4566, -0.0688, -0.2549, 0.1779]) tensor([0.3522, 0.2083, 0.1729, 0.2666]) -Greedy action tensor([ 1.3125, -1.4953, 0.4686, 1.0191]) tensor([0.4472, 0.0270, 0.1923, 0.3335]) -Greedy action tensor([ 0.2728, 0.3126, -0.8755, 0.0083]) tensor([0.3200, 0.3329, 0.1015, 0.2456]) -Greedy action tensor([-0.1464, 0.7704, -0.4259, -0.4561]) tensor([0.2004, 0.5011, 0.1515, 0.1470]) -Greedy action tensor([-0.4694, 0.6583, 0.6706, -0.6157]) tensor([0.1238, 0.3823, 0.3870, 0.1069]) -Greedy action tensor([-0.0246, -0.9466, 0.3457, 1.3171]) tensor([0.1499, 0.0596, 0.2171, 0.5734]) -Greedy action tensor([ 0.2093, -0.6058, 0.2514, -0.4852]) tensor([0.3350, 0.1483, 0.3494, 0.1673]) -Greedy action tensor([-0.1923, -0.9846, 0.5163, 0.8264]) tensor([0.1599, 0.0724, 0.3248, 0.4429]) -Greedy action tensor([ 1.4191, -0.8622, 0.1426, 0.8032]) tensor([0.5205, 0.0532, 0.1452, 0.2811]) -Greedy action tensor([-0.0025, -0.7586, 1.2764, -1.0513]) tensor([0.1847, 0.0867, 0.6638, 0.0647]) -Greedy action tensor([-0.7509, -0.5288, -0.2146, -0.5816]) tensor([0.1944, 0.2428, 0.3324, 0.2303]) -Greedy action tensor([ 0.2331, -1.4068, 0.5215, 0.1242]) tensor([0.2920, 0.0566, 0.3896, 0.2618]) -Greedy action tensor([ 1.6668, -0.0110, 0.0765, 0.4987]) tensor([0.5877, 0.1098, 0.1198, 0.1827]) -Greedy action tensor([-1.0283, -0.6242, -0.1238, -0.5376]) tensor([0.1515, 0.2269, 0.3742, 0.2474]) -Greedy action tensor([-0.9866, -0.5206, -1.8669, -0.2690]) tensor([0.1977, 0.3151, 0.0820, 0.4052]) -Greedy action tensor([-1.0435, -0.8024, 0.2233, -0.8347]) tensor([0.1418, 0.1804, 0.5032, 0.1747]) -Greedy action tensor([-0.0323, -1.4570, -0.2658, 0.6480]) tensor([0.2496, 0.0600, 0.1976, 0.4928]) -Greedy action tensor([ 0.6385, -1.0410, -0.3797, 0.2723]) tensor([0.4462, 0.0832, 0.1612, 0.3094]) -Greedy action tensor([-0.7012, -1.0081, -0.1879, -0.1021]) tensor([0.1913, 0.1408, 0.3196, 0.3483]) -Greedy action tensor([ 0.8655, -0.3395, -0.0710, 0.2603]) tensor([0.4469, 0.1339, 0.1752, 0.2440]) -Greedy action tensor([-0.6189, -0.2000, 0.6930, -0.3843]) tensor([0.1334, 0.2028, 0.4952, 0.1686]) -Greedy action tensor([ 0.0068, -0.0251, 0.7012, 0.1537]) tensor([0.1950, 0.1888, 0.3904, 0.2258]) -Greedy action tensor([ 0.1872, 0.2156, -0.8734, 0.1412]) tensor([0.3003, 0.3089, 0.1040, 0.2868]) -Greedy action tensor([ 0.2138, -1.0028, 0.4607, 0.0817]) tensor([0.2896, 0.0858, 0.3708, 0.2538]) -Greedy action tensor([0.3899, 0.3340, 0.1098, 1.1273]) tensor([0.2087, 0.1973, 0.1577, 0.4363]) -Greedy action tensor([ 0.1528, -0.2552, -0.8689, 1.2800]) tensor([0.1956, 0.1301, 0.0704, 0.6039]) -Greedy action tensor([-0.3078, 0.4061, 0.5011, -1.6256]) tensor([0.1800, 0.3676, 0.4042, 0.0482]) -Greedy action tensor([ 0.6271, -0.2996, 0.6451, -0.7104]) tensor([0.3736, 0.1479, 0.3804, 0.0981]) -Greedy action tensor([ 0.1523, -0.2433, 0.4728, 0.8545]) tensor([0.1973, 0.1328, 0.2718, 0.3981]) -Greedy action tensor([ 0.1836, -0.0847, -0.3174, -0.1186]) tensor([0.3216, 0.2459, 0.1948, 0.2377]) -Greedy action tensor([-0.2516, 0.5331, -0.0077, -0.5035]) tensor([0.1906, 0.4178, 0.2433, 0.1482]) -Greedy action tensor([ 0.3215, -0.2225, -0.5185, 0.5369]) tensor([0.3074, 0.1785, 0.1327, 0.3814]) -Greedy action tensor([ 0.0143, -0.9255, 0.4954, 0.7431]) tensor([0.1968, 0.0769, 0.3184, 0.4079]) -Greedy action tensor([-0.4808, -0.4255, 0.5435, -0.3546]) tensor([0.1673, 0.1768, 0.4660, 0.1898]) -Greedy action tensor([ 0.5005, -1.2521, 1.3141, 0.3145]) tensor([0.2348, 0.0407, 0.5296, 0.1949]) -Greedy action tensor([ 0.0627, 0.0005, -0.2554, 0.1571]) tensor([0.2655, 0.2495, 0.1932, 0.2918]) -Greedy action tensor([ 0.4045, -0.3177, 0.9871, -0.2970]) tensor([0.2651, 0.1288, 0.4747, 0.1314]) -Greedy action tensor([ 0.7603, -1.0904, -0.1191, 0.5961]) tensor([0.4131, 0.0649, 0.1715, 0.3505]) -Greedy action tensor([ 0.3331, -0.8365, 1.0865, 0.7542]) tensor([0.2017, 0.0626, 0.4284, 0.3073]) -Greedy action tensor([-0.7093, -0.4587, -0.7169, -0.5349]) tensor([0.2238, 0.2876, 0.2221, 0.2665]) -Greedy action tensor([-0.9720, -1.1978, -0.2827, -0.7035]) tensor([0.1962, 0.1565, 0.3908, 0.2566]) -Greedy action tensor([ 0.8920, -1.2695, -0.3865, -0.4654]) tensor([0.6057, 0.0697, 0.1687, 0.1559]) -Greedy action tensor([-0.5588, 0.2521, 0.6418, -0.6780]) tensor([0.1341, 0.3016, 0.4454, 0.1190]) -Greedy action tensor([1.1144, 0.5658, 0.3839, 0.5940]) tensor([0.3768, 0.2177, 0.1815, 0.2239]) -Greedy action tensor([ 0.2789, -1.2352, -0.2259, 0.8120]) tensor([0.2835, 0.0624, 0.1711, 0.4831]) -Greedy action tensor([ 1.6074, -0.6862, 1.0517, 1.1602]) tensor([0.4322, 0.0436, 0.2479, 0.2763]) -Greedy action tensor([ 0.2242, 0.7628, -0.7698, 0.6235]) tensor([0.2186, 0.3746, 0.0809, 0.3259]) -Greedy action tensor([-0.6300, -0.1553, 0.4139, -0.8232]) tensor([0.1594, 0.2563, 0.4528, 0.1314]) -Greedy action tensor([ 0.3172, -0.6525, -0.3997, -0.1546]) tensor([0.4014, 0.1522, 0.1960, 0.2504]) -Greedy action tensor([ 0.7313, -0.8110, 0.0526, 0.2069]) tensor([0.4323, 0.0925, 0.2193, 0.2559]) -Greedy action tensor([-0.0515, 0.6559, 0.7454, 0.4101]) tensor([0.1463, 0.2969, 0.3247, 0.2322]) -Greedy action tensor([-2.1147, 0.6662, -1.0078, -0.8621]) tensor([0.0423, 0.6820, 0.1279, 0.1479]) -Greedy action tensor([ 0.5136, -1.1664, -0.1543, -0.5976]) tensor([0.4930, 0.0919, 0.2528, 0.1623]) -Greedy action tensor([-0.9035, -0.4462, -0.5934, -0.3095]) tensor([0.1738, 0.2745, 0.2370, 0.3147]) -Greedy action tensor([ 0.1955, 0.2532, 1.2939, -0.3993]) tensor([0.1782, 0.1888, 0.5346, 0.0983]) -Greedy action tensor([ 0.7721, -0.4167, -0.9967, 1.0359]) tensor([0.3601, 0.1097, 0.0614, 0.4688]) -Greedy action tensor([ 0.2871, 0.6376, -0.6172, -0.5165]) tensor([0.3056, 0.4339, 0.1237, 0.1368]) -Greedy action tensor([ 0.8442, 0.1027, 0.4500, -0.0223]) tensor([0.3890, 0.1853, 0.2622, 0.1635]) -Greedy action tensor([-0.5350, -0.1342, 0.1604, -0.4403]) tensor([0.1787, 0.2668, 0.3581, 0.1964]) -Greedy action tensor([ 0.3893, -1.1138, 0.6467, -0.4503]) tensor([0.3392, 0.0755, 0.4388, 0.1465]) -Greedy action tensor([-0.4085, -1.0687, 0.4167, -0.3533]) tensor([0.2059, 0.1064, 0.4700, 0.2176]) -Greedy action tensor([ 0.8649, -1.9268, 0.0995, 1.3125]) tensor([0.3235, 0.0198, 0.1505, 0.5062]) -Greedy action tensor([-0.3964, 0.2607, 0.0498, -0.2943]) tensor([0.1786, 0.3446, 0.2790, 0.1978]) -Greedy action tensor([-0.2255, -0.8884, -0.4908, -0.7487]) tensor([0.3478, 0.1793, 0.2668, 0.2061]) -Greedy action tensor([ 0.1271, -1.2813, 0.1134, -0.1918]) tensor([0.3381, 0.0827, 0.3335, 0.2458]) -Greedy action tensor([ 1.1042, 0.4681, -0.6109, 0.4682]) tensor([0.4467, 0.2365, 0.0804, 0.2365]) -Greedy action tensor([-0.4173, -1.6844, -0.7124, 0.6102]) tensor([0.2075, 0.0584, 0.1544, 0.5797]) -Greedy action tensor([ 0.2617, -0.8990, -0.3265, 0.1253]) tensor([0.3648, 0.1143, 0.2026, 0.3183]) -Greedy action tensor([ 0.4339, -1.1769, 0.1745, -0.0930]) tensor([0.3904, 0.0780, 0.3012, 0.2305]) -Greedy action tensor([ 0.1343, -0.8720, 1.3565, -0.3099]) tensor([0.1851, 0.0677, 0.6285, 0.1187]) -Greedy action tensor([-0.0590, -0.2196, -0.5070, 0.7562]) tensor([0.2105, 0.1793, 0.1345, 0.4757]) -Greedy action tensor([-1.7563, -0.3223, 0.5327, -0.0654]) tensor([0.0488, 0.2048, 0.4816, 0.2648]) -Greedy action tensor([-1.8872, -0.4417, 0.6389, -0.1528]) tensor([0.0427, 0.1813, 0.5341, 0.2420]) -Greedy action tensor([-1.8457, -0.4347, 0.6146, -0.1353]) tensor([0.0448, 0.1835, 0.5241, 0.2476]) -Greedy action tensor([-1.9299, -0.4347, 0.6605, -0.1717]) tensor([0.0407, 0.1813, 0.5421, 0.2359]) -Greedy action tensor([-1.8374, -0.4827, 0.6746, -0.0264]) tensor([0.0429, 0.1662, 0.5287, 0.2623]) -Greedy action tensor([-1.9131, -0.4514, 0.6565, -0.1620]) tensor([0.0414, 0.1787, 0.5412, 0.2387]) -Greedy action tensor([-1.4309, 0.1753, 0.6215, -0.6304]) tensor([0.0625, 0.3116, 0.4867, 0.1392]) -Greedy action tensor([-1.2334, -0.5231, 0.2856, 0.1936]) tensor([0.0850, 0.1729, 0.3881, 0.3540]) -Greedy action tensor([-1.8297, -0.4421, 0.6072, -0.1155]) tensor([0.0455, 0.1821, 0.5200, 0.2524]) -Greedy action tensor([-1.8262, -0.4648, 0.5957, -0.1107]) tensor([0.0460, 0.1796, 0.5186, 0.2559]) -Greedy action tensor([-1.7431, 0.0348, 0.4756, -0.0578]) tensor([0.0465, 0.2752, 0.4276, 0.2508]) -Greedy action tensor([-0.2576, -0.0472, 0.2321, 0.0145]) tensor([0.1931, 0.2383, 0.3151, 0.2535]) -Greedy action tensor([-1.7548, -0.5271, 0.4934, -0.1546]) tensor([0.0531, 0.1812, 0.5027, 0.2630]) -Greedy action tensor([-1.7482, -0.4020, 0.5878, -0.0562]) tensor([0.0485, 0.1864, 0.5016, 0.2634]) -Greedy action tensor([-1.8339, -0.1680, 0.5700, -0.0920]) tensor([0.0434, 0.2294, 0.4798, 0.2475]) -Greedy action tensor([-1.9026, -0.3928, 0.6491, -0.1563]) tensor([0.0415, 0.1879, 0.5326, 0.2380]) -Greedy action tensor([-0.0659, 1.2300, 0.0552, 0.4625]) tensor([0.1337, 0.4886, 0.1509, 0.2268]) -Greedy action tensor([-1.6583, -0.4878, 0.5445, -0.0235]) tensor([0.0543, 0.1752, 0.4918, 0.2787]) -Greedy action tensor([-1.9185, -0.4037, 0.6518, -0.1660]) tensor([0.0410, 0.1865, 0.5359, 0.2366]) -Greedy action tensor([-0.8233, -0.4135, 0.2253, 0.0894]) tensor([0.1274, 0.1919, 0.3635, 0.3173]) -Greedy action tensor([-1.9226, -0.4310, 0.6574, -0.1656]) tensor([0.0409, 0.1819, 0.5401, 0.2372]) -Greedy action tensor([-1.6524, -0.2307, 0.5029, -0.1064]) tensor([0.0541, 0.2244, 0.4673, 0.2541]) -Greedy action tensor([-1.8993, -0.3924, 0.6421, -0.1510]) tensor([0.0417, 0.1884, 0.5301, 0.2398]) -Greedy action tensor([-0.7339, 0.4707, 0.0986, -0.1161]) tensor([0.1178, 0.3929, 0.2708, 0.2185]) -Greedy action tensor([-0.5582, 0.9485, 0.0669, 0.1077]) tensor([0.1072, 0.4838, 0.2003, 0.2087]) -Greedy action tensor([-1.9012, -0.4562, 0.6521, -0.1574]) tensor([0.0420, 0.1782, 0.5396, 0.2402]) -Greedy action tensor([-1.9117, -0.3136, 0.6268, -0.1619]) tensor([0.0411, 0.2030, 0.5198, 0.2362]) -Greedy action tensor([-1.8684, -0.3352, 0.6319, -0.1422]) tensor([0.0427, 0.1977, 0.5199, 0.2398]) -Greedy action tensor([-1.7256, -0.3780, 0.5917, -0.0297]) tensor([0.0489, 0.1882, 0.4963, 0.2666]) -Greedy action tensor([-1.9093, -0.4030, 0.6497, -0.1617]) tensor([0.0414, 0.1866, 0.5346, 0.2375]) -Greedy action tensor([-1.9010, -0.4383, 0.6410, -0.1546]) tensor([0.0421, 0.1817, 0.5348, 0.2414]) -Greedy action tensor([-1.5831, 0.3281, 0.4131, 0.0455]) tensor([0.0495, 0.3344, 0.3641, 0.2521]) -Greedy action tensor([-0.6797, -0.4859, 1.0465, 1.3866]) tensor([0.0636, 0.0772, 0.3573, 0.5020]) -Greedy action tensor([-1.8915, -0.4513, 0.6419, -0.1580]) tensor([0.0426, 0.1798, 0.5365, 0.2411]) -Greedy action tensor([-1.8044, -0.3580, 0.5731, -0.0949]) tensor([0.0464, 0.1971, 0.5001, 0.2564]) -Greedy action tensor([-1.7924, -0.3860, 0.5961, -0.0863]) tensor([0.0465, 0.1899, 0.5072, 0.2563]) -Greedy action tensor([-1.8466, -0.4549, 0.6218, -0.1344]) tensor([0.0447, 0.1798, 0.5277, 0.2477]) -Greedy action tensor([-1.8889, -0.4563, 0.6459, -0.1505]) tensor([0.0426, 0.1783, 0.5370, 0.2421]) -Greedy action tensor([-1.6125, -0.5410, 0.4999, 0.0685]) tensor([0.0570, 0.1663, 0.4709, 0.3059]) -Greedy action tensor([-1.9071, -0.4286, 0.6461, -0.1635]) tensor([0.0417, 0.1831, 0.5364, 0.2387]) -Greedy action tensor([-1.8037, -0.3985, 0.5878, -0.1198]) tensor([0.0467, 0.1905, 0.5109, 0.2518]) -Greedy action tensor([-1.9120, -0.4181, 0.6469, -0.1616]) tensor([0.0414, 0.1846, 0.5354, 0.2385]) -Greedy action tensor([-1.9224, -0.4262, 0.6565, -0.1691]) tensor([0.0409, 0.1828, 0.5398, 0.2364]) -Greedy action tensor([-1.8375, -0.4493, 0.6128, -0.1241]) tensor([0.0452, 0.1810, 0.5234, 0.2505]) -Greedy action tensor([-1.8584, -0.3953, 0.6179, -0.1330]) tensor([0.0438, 0.1892, 0.5211, 0.2459]) -Greedy action tensor([-1.7579, -0.4276, 0.5893, -0.0725]) tensor([0.0485, 0.1833, 0.5068, 0.2615]) -Greedy action tensor([-1.5750, -0.4035, 0.5193, -0.0292]) tensor([0.0587, 0.1894, 0.4766, 0.2754]) -Greedy action tensor([-1.7853, -0.4563, 0.5982, -0.0575]) tensor([0.0471, 0.1778, 0.5103, 0.2649]) -Greedy action tensor([-0.6421, -0.4738, 0.1974, 0.3176]) tensor([0.1407, 0.1664, 0.3257, 0.3672]) -Greedy action tensor([-1.9318, -0.4443, 0.6625, -0.1729]) tensor([0.0406, 0.1798, 0.5438, 0.2358]) -Greedy action tensor([-1.0781, -0.1677, 0.3937, 0.5078]) tensor([0.0786, 0.1953, 0.3424, 0.3838]) -Greedy action tensor([-1.9265, -0.4701, 0.6572, -0.1665]) tensor([0.0411, 0.1762, 0.5440, 0.2387]) -Greedy action tensor([-1.8035, -0.1209, 0.5633, -0.1301]) tensor([0.0447, 0.2405, 0.4766, 0.2382]) -Greedy action tensor([0.3624, 1.1096, 0.0731, 0.7482]) tensor([0.1876, 0.3960, 0.1405, 0.2759]) -Greedy action tensor([-1.6156, 0.3813, 0.4306, 0.1541]) tensor([0.0455, 0.3352, 0.3522, 0.2671]) -Greedy action tensor([-1.8941, -0.3887, 0.6379, -0.1473]) tensor([0.0420, 0.1892, 0.5281, 0.2408]) -Greedy action tensor([-1.9178, -0.4498, 0.6543, -0.1664]) tensor([0.0413, 0.1794, 0.5411, 0.2382]) -Greedy action tensor([-1.9165, -0.4468, 0.6616, -0.1598]) tensor([0.0411, 0.1788, 0.5418, 0.2383]) -Greedy action tensor([-0.4117, 0.1615, 0.7334, 1.2490]) tensor([0.0894, 0.1587, 0.2811, 0.4708]) -Greedy action tensor([-1.7098, -0.4320, 0.6443, 0.2047]) tensor([0.0457, 0.1639, 0.4807, 0.3097]) -Greedy action tensor([-0.3165, 0.9145, 0.3768, 0.5910]) tensor([0.1123, 0.3847, 0.2247, 0.2783]) -Greedy action tensor([-1.6921, -0.4549, 0.6221, 0.0890]) tensor([0.0488, 0.1681, 0.4935, 0.2896]) -Greedy action tensor([-1.9048, -0.4170, 0.6575, -0.1533]) tensor([0.0414, 0.1833, 0.5367, 0.2386]) -Greedy action tensor([-1.9145, -0.4365, 0.6524, -0.1646]) tensor([0.0414, 0.1814, 0.5391, 0.2381]) -Greedy action tensor([-1.6419, -0.3614, 0.7886, 0.3855]) tensor([0.0424, 0.1528, 0.4824, 0.3224]) -Greedy action tensor([-1.9206, -0.4400, 0.6618, -0.1694]) tensor([0.0410, 0.1803, 0.5425, 0.2363]) -Greedy action tensor([-1.9279, -0.4283, 0.6572, -0.1714]) tensor([0.0408, 0.1826, 0.5406, 0.2361]) -Greedy action tensor([-1.8860, -0.4459, 0.6405, -0.1509]) tensor([0.0427, 0.1804, 0.5346, 0.2423]) -Greedy action tensor([-1.2125, -0.5917, 0.3325, 0.2553]) tensor([0.0841, 0.1565, 0.3943, 0.3651]) -Greedy action tensor([-1.9398, -0.4453, 0.6691, -0.1726]) tensor([0.0402, 0.1790, 0.5457, 0.2352]) -Greedy action tensor([-0.6870, 0.9834, 0.0639, 0.2638]) tensor([0.0907, 0.4822, 0.1923, 0.2348]) -Greedy action tensor([-1.4986, -0.2257, 0.6085, 0.4322]) tensor([0.0508, 0.1814, 0.4177, 0.3502]) -Greedy action tensor([-1.8270, -0.4075, 0.6112, -0.1178]) tensor([0.0452, 0.1870, 0.5179, 0.2498]) -Greedy action tensor([-0.6966, 0.9846, 0.0586, 0.2981]) tensor([0.0893, 0.4795, 0.1899, 0.2413]) -Greedy action tensor([-0.7447, 0.9579, 0.0552, 0.3150]) tensor([0.0862, 0.4732, 0.1919, 0.2488]) -Greedy action tensor([-1.8799, -0.4334, 0.6349, -0.1486]) tensor([0.0430, 0.1826, 0.5315, 0.2428]) -Greedy action tensor([-1.9394, -0.4438, 0.6653, -0.1776]) tensor([0.0403, 0.1798, 0.5452, 0.2347]) -Greedy action tensor([-0.8172, 0.2890, 0.3874, 0.5633]) tensor([0.0882, 0.2667, 0.2943, 0.3508]) -Greedy action tensor([-1.1160, 0.8064, 0.2158, -0.0272]) tensor([0.0685, 0.4685, 0.2595, 0.2035]) -Greedy action tensor([-1.3592, 0.2417, 0.3123, -0.0328]) tensor([0.0665, 0.3295, 0.3536, 0.2504]) -Greedy action tensor([-1.8176, -0.4424, 0.6039, -0.1134]) tensor([0.0461, 0.1822, 0.5186, 0.2531]) -Greedy action tensor([ 1.1675, -0.7005, -0.0772, -0.4884]) tensor([0.6122, 0.0945, 0.1763, 0.1169]) -Greedy action tensor([ 0.5986, -0.2707, -0.0153, -0.2419]) tensor([0.4181, 0.1753, 0.2263, 0.1804]) -Greedy action tensor([ 0.4453, -0.4765, -0.1424, -0.2039]) tensor([0.4039, 0.1607, 0.2244, 0.2110]) -Greedy action tensor([ 0.6274, -0.0054, -0.1687, 0.0441]) tensor([0.3937, 0.2091, 0.1776, 0.2197]) -Greedy action tensor([ 0.7404, -0.4345, -0.1593, -0.1398]) tensor([0.4694, 0.1450, 0.1909, 0.1947]) -Greedy action tensor([ 0.8412, -0.6202, -0.0147, -0.3776]) tensor([0.5122, 0.1188, 0.2176, 0.1514]) -Greedy action tensor([ 0.4605, -0.1343, -0.0486, -0.0461]) tensor([0.3629, 0.2002, 0.2182, 0.2187]) -Greedy action tensor([ 0.4694, -0.1902, -0.0616, -0.1019]) tensor([0.3746, 0.1937, 0.2202, 0.2115]) -Greedy action tensor([ 0.7741, -0.3574, -0.1307, -0.1341]) tensor([0.4694, 0.1514, 0.1899, 0.1893]) -Greedy action tensor([ 0.5060, -0.5424, -0.2176, -0.4696]) tensor([0.4520, 0.1584, 0.2192, 0.1704]) -Greedy action tensor([ 0.9781, -0.6842, 0.0232, -0.3434]) tensor([0.5431, 0.1030, 0.2090, 0.1449]) -Greedy action tensor([ 0.9504, -0.4469, 0.0017, -0.3091]) tensor([0.5213, 0.1289, 0.2019, 0.1479]) -Greedy action tensor([ 0.9936, -0.4144, -0.0103, -0.1753]) tensor([0.5203, 0.1273, 0.1907, 0.1617]) -Greedy action tensor([ 0.7844, -0.2121, -0.3153, -0.1163]) tensor([0.4743, 0.1751, 0.1579, 0.1927]) -Greedy action tensor([ 0.8098, 0.0985, -0.0252, -0.3366]) tensor([0.4459, 0.2189, 0.1935, 0.1417]) -Greedy action tensor([ 0.8931, -0.5915, -0.0053, -0.3477]) tensor([0.5200, 0.1178, 0.2118, 0.1504]) -Greedy action tensor([ 6.6576e-01, -3.3281e-01, 4.9295e-04, -7.1952e-02]) tensor([0.4236, 0.1561, 0.2178, 0.2026]) -Greedy action tensor([ 0.8868, -0.1022, 0.0243, -0.0947]) tensor([0.4611, 0.1715, 0.1946, 0.1728]) -Greedy action tensor([ 0.7631, -0.4654, -0.0725, -0.2476]) tensor([0.4784, 0.1400, 0.2074, 0.1741]) -Greedy action tensor([ 0.9710, -0.2864, 0.0173, -0.2500]) tensor([0.5090, 0.1448, 0.1961, 0.1501]) -Greedy action tensor([ 0.8793, -0.3033, -0.0387, -0.0762]) tensor([0.4784, 0.1466, 0.1910, 0.1840]) -Greedy action tensor([ 0.9322, -0.0786, 0.0125, -0.0212]) tensor([0.4655, 0.1694, 0.1856, 0.1794]) -Greedy action tensor([ 0.7649, -0.3394, 0.0372, -0.2843]) tensor([0.4620, 0.1531, 0.2231, 0.1618]) -Greedy action tensor([ 0.8651, -0.4837, -0.0727, -0.3102]) tensor([0.5103, 0.1324, 0.1998, 0.1575]) -Greedy action tensor([ 0.6341, -0.2562, -0.1540, 0.0212]) tensor([0.4154, 0.1706, 0.1889, 0.2251]) -Greedy action tensor([ 0.8027, -0.8823, 0.0032, -0.3096]) tensor([0.5092, 0.0944, 0.2289, 0.1674]) -Greedy action tensor([ 0.7799, 0.3157, -0.1703, -0.3161]) tensor([0.4256, 0.2676, 0.1646, 0.1422]) -Greedy action tensor([ 0.8016, -0.4579, 0.1200, -0.1827]) tensor([0.4622, 0.1312, 0.2338, 0.1727]) -Greedy action tensor([ 0.8255, -0.5547, 0.0120, -0.4553]) tensor([0.5069, 0.1275, 0.2247, 0.1408]) -Greedy action tensor([ 0.6244, -0.3894, -0.1124, -0.2208]) tensor([0.4403, 0.1598, 0.2108, 0.1891]) -Greedy action tensor([ 0.4721, -0.0422, -0.0096, -0.0485]) tensor([0.3559, 0.2128, 0.2198, 0.2115]) -Greedy action tensor([ 1.1618, -0.6473, -0.0803, -0.4083]) tensor([0.6022, 0.0986, 0.1739, 0.1253]) -Greedy action tensor([ 1.1512, -0.6889, -0.0915, -0.4362]) tensor([0.6054, 0.0961, 0.1747, 0.1238]) -Greedy action tensor([ 0.9516, -0.4821, -0.1447, -0.2765]) tensor([0.5361, 0.1278, 0.1791, 0.1570]) -Greedy action tensor([ 0.5079, -0.0682, 0.0018, -0.0007]) tensor([0.3615, 0.2032, 0.2179, 0.2174]) -Greedy action tensor([ 0.7908, -0.1402, -0.0459, -0.0963]) tensor([0.4466, 0.1760, 0.1934, 0.1839]) -Greedy action tensor([ 1.0045, -0.5646, -0.0524, -0.3347]) tensor([0.5501, 0.1145, 0.1912, 0.1442]) -Greedy action tensor([ 0.8108, -0.6230, 0.1133, -0.3970]) tensor([0.4914, 0.1172, 0.2446, 0.1469]) -Greedy action tensor([ 0.4991, -0.3426, 0.0326, -0.1245]) tensor([0.3855, 0.1661, 0.2418, 0.2066]) -Greedy action tensor([ 0.2222, -0.3598, 0.0282, -0.3388]) tensor([0.3386, 0.1892, 0.2789, 0.1932]) -Greedy action tensor([ 0.9093, -0.5196, 0.0551, -0.6296]) tensor([0.5320, 0.1275, 0.2264, 0.1142]) -Greedy action tensor([ 1.0576, -0.9827, 0.1452, -0.5051]) tensor([0.5743, 0.0747, 0.2306, 0.1204]) -Greedy action tensor([ 0.6542, -0.4954, -0.0810, -0.2529]) tensor([0.4546, 0.1440, 0.2179, 0.1835]) -Greedy action tensor([ 0.3295, -0.0242, -0.0866, -0.1268]) tensor([0.3338, 0.2344, 0.2202, 0.2115]) -Greedy action tensor([ 1.0332, -0.5871, 0.1144, -0.5296]) tensor([0.5536, 0.1095, 0.2209, 0.1160]) -Greedy action tensor([ 0.5131, -0.0649, -0.0925, -0.0073]) tensor([0.3702, 0.2077, 0.2021, 0.2200]) -Greedy action tensor([ 0.7478, -0.9722, 0.0430, -0.5849]) tensor([0.5163, 0.0924, 0.2551, 0.1362]) -Greedy action tensor([ 0.6360, -0.4657, -0.0787, -0.7552]) tensor([0.4830, 0.1605, 0.2363, 0.1202]) -Greedy action tensor([ 0.7187, -0.5459, -0.1602, -0.2535]) tensor([0.4817, 0.1360, 0.2000, 0.1822]) -Greedy action tensor([ 0.7753, -0.4196, 0.0489, -0.2616]) tensor([0.4671, 0.1414, 0.2259, 0.1656]) -Greedy action tensor([ 0.5217, -0.2206, 0.0995, -0.3096]) tensor([0.3896, 0.1854, 0.2554, 0.1696]) -Greedy action tensor([ 0.7512, -0.3713, 0.1119, -0.2889]) tensor([0.4532, 0.1475, 0.2391, 0.1602]) -Greedy action tensor([ 0.9872, -0.9327, 0.1147, -0.5061]) tensor([0.5589, 0.0819, 0.2336, 0.1256]) -Greedy action tensor([ 0.5970, -0.3490, -0.1251, -0.0448]) tensor([0.4166, 0.1618, 0.2024, 0.2193]) -Greedy action tensor([ 1.0495, -1.3489, -0.0635, -0.9179]) tensor([0.6413, 0.0583, 0.2107, 0.0897]) -Greedy action tensor([ 0.9911, -0.5223, 0.0896, -0.3768]) tensor([0.5317, 0.1171, 0.2159, 0.1354]) -Greedy action tensor([ 0.7793, -0.1007, -0.0151, -0.1935]) tensor([0.4455, 0.1848, 0.2013, 0.1684]) -Greedy action tensor([ 1.0724, -0.6795, -0.1588, -0.8838]) tensor([0.6224, 0.1079, 0.1817, 0.0880]) -Greedy action tensor([ 0.7564, -0.3909, -0.0884, -0.4773]) tensor([0.4906, 0.1558, 0.2108, 0.1429]) -Greedy action tensor([ 1.0029, -0.8245, 0.0453, -0.5326]) tensor([0.5682, 0.0914, 0.2181, 0.1224]) -Greedy action tensor([ 0.8668, -0.3792, -0.0600, -0.3063]) tensor([0.5018, 0.1443, 0.1986, 0.1553]) -Greedy action tensor([ 0.6345, -0.5437, -0.0832, -0.4082]) tensor([0.4655, 0.1433, 0.2271, 0.1641]) -Greedy action tensor([ 0.9695, -0.9416, 0.1594, -0.5486]) tensor([0.5519, 0.0816, 0.2455, 0.1209]) -Greedy action tensor([ 0.2346, -0.0275, -0.1003, -0.0930]) tensor([0.3120, 0.2400, 0.2232, 0.2248]) -Greedy action tensor([ 0.7745, -0.1533, 0.0148, -0.0210]) tensor([0.4320, 0.1708, 0.2021, 0.1950]) -Greedy action tensor([ 0.4877, -0.4192, -0.2201, 0.0128]) tensor([0.3971, 0.1603, 0.1957, 0.2469]) -Greedy action tensor([ 1.0276, -0.4215, -0.1795, -0.2367]) tensor([0.5506, 0.1293, 0.1646, 0.1555]) -Greedy action tensor([ 0.9840, -0.6855, -0.0685, -0.3922]) tensor([0.5587, 0.1052, 0.1950, 0.1411]) -Greedy action tensor([ 0.7599, -0.1591, 0.1526, -0.1495]) tensor([0.4262, 0.1700, 0.2322, 0.1716]) -Greedy action tensor([ 0.5657, -0.3556, 0.0232, -0.1087]) tensor([0.4018, 0.1599, 0.2336, 0.2047]) -Greedy action tensor([ 0.4116, 0.0103, -0.1899, -0.0082]) tensor([0.3479, 0.2329, 0.1906, 0.2286]) -Greedy action tensor([ 0.9175, -0.7167, 0.0406, -0.4750]) tensor([0.5377, 0.1049, 0.2237, 0.1336]) -Greedy action tensor([ 0.5343, -0.2000, -0.0169, -0.0874]) tensor([0.3856, 0.1850, 0.2222, 0.2071]) -Greedy action tensor([ 0.5429, -0.4079, -0.0588, -0.0801]) tensor([0.4047, 0.1564, 0.2218, 0.2171]) -Greedy action tensor([ 0.8697, -0.4207, -0.1604, -0.1836]) tensor([0.5048, 0.1389, 0.1802, 0.1761]) -Greedy action tensor([ 0.4730, 0.0474, -0.1481, -0.1720]) tensor([0.3683, 0.2406, 0.1979, 0.1932]) -Greedy action tensor([ 0.8632, -0.3927, -0.0667, -0.4113]) tensor([0.5105, 0.1454, 0.2014, 0.1427]) -Greedy action tensor([ 1.1905, -0.6076, -0.1649, -0.4078]) tensor([0.6151, 0.1019, 0.1586, 0.1244]) -Greedy action tensor([ 0.8007, -0.4488, -0.1943, -0.5759]) tensor([0.5239, 0.1502, 0.1937, 0.1323]) -Greedy action tensor([ 0.6019, 0.0043, -0.1835, -0.3749]) tensor([0.4197, 0.2309, 0.1914, 0.1580]) -Greedy action tensor([ 0.9120, -0.6064, -0.0849, -0.3670]) tensor([0.5358, 0.1174, 0.1977, 0.1491]) -Greedy action tensor([ 1.7080, 0.3825, -0.2153, 0.4149]) tensor([0.5930, 0.1576, 0.0867, 0.1627]) -Greedy action tensor([ 1.2571, -0.2385, -0.0461, 0.0201]) tensor([0.5599, 0.1255, 0.1521, 0.1625]) -Greedy action tensor([ 1.8275, 0.1777, -0.1925, 0.5351]) tensor([0.6252, 0.1201, 0.0829, 0.1717]) -Greedy action tensor([ 3.0058, -1.0549, -0.2087, 0.9559]) tensor([0.8431, 0.0145, 0.0339, 0.1085]) -Greedy action tensor([ 2.0001, -0.9384, -0.2829, 0.5588]) tensor([0.7186, 0.0380, 0.0733, 0.1700]) -Greedy action tensor([ 1.8119, -1.0148, -0.3352, 0.4329]) tensor([0.7004, 0.0415, 0.0818, 0.1764]) -Greedy action tensor([ 1.4524, -0.0709, -0.9834, 0.2907]) tensor([0.6179, 0.1347, 0.0541, 0.1934]) -Greedy action tensor([ 1.5039, -0.2164, -0.4738, 0.1387]) tensor([0.6359, 0.1138, 0.0880, 0.1623]) -Greedy action tensor([ 1.0471, -0.3582, -0.0418, 0.4007]) tensor([0.4749, 0.1165, 0.1598, 0.2488]) -Greedy action tensor([ 1.6406, -0.5762, -0.8092, 0.5868]) tensor([0.6477, 0.0706, 0.0559, 0.2258]) -Greedy action tensor([ 1.4914, -0.4399, -0.6840, 0.0453]) tensor([0.6693, 0.0970, 0.0760, 0.1576]) -Greedy action tensor([ 1.1893, -0.2330, -0.1720, 0.1463]) tensor([0.5406, 0.1304, 0.1386, 0.1905]) -Greedy action tensor([ 1.7022, -1.0019, -0.2807, 0.5531]) tensor([0.6572, 0.0440, 0.0905, 0.2083]) -Greedy action tensor([ 1.3055, -0.2050, -1.1478, 0.3200]) tensor([0.5952, 0.1314, 0.0512, 0.2222]) -Greedy action tensor([ 1.6465, 0.4284, -0.4432, 0.3162]) tensor([0.5938, 0.1757, 0.0735, 0.1570]) -Greedy action tensor([ 0.7016, -0.4706, -0.2584, 0.3824]) tensor([0.4133, 0.1280, 0.1583, 0.3004]) -Greedy action tensor([ 2.2530, -0.8579, 0.1973, -0.0395]) tensor([0.7852, 0.0350, 0.1005, 0.0793]) -Greedy action tensor([ 1.4346, -0.0808, -0.1620, 0.2385]) tensor([0.5798, 0.1274, 0.1175, 0.1753]) -Greedy action tensor([ 1.1184, -0.5343, -0.6585, 0.4257]) tensor([0.5374, 0.1029, 0.0909, 0.2688]) -Greedy action tensor([ 1.5915, -0.3816, -0.4025, 0.2715]) tensor([0.6484, 0.0901, 0.0883, 0.1732]) -Greedy action tensor([ 1.6163, -0.9774, -0.4963, 0.3622]) tensor([0.6752, 0.0505, 0.0816, 0.1927]) -Greedy action tensor([ 1.1341, -0.0635, -0.8041, 0.4164]) tensor([0.5171, 0.1561, 0.0744, 0.2523]) -Greedy action tensor([ 1.7459, -1.0799, 0.1028, 0.4715]) tensor([0.6526, 0.0387, 0.1262, 0.1825]) -Greedy action tensor([ 0.7480, -0.2493, -0.0544, 0.2876]) tensor([0.4085, 0.1507, 0.1831, 0.2578]) -Greedy action tensor([ 1.3830, -0.5557, -0.2235, 0.7883]) tensor([0.5274, 0.0759, 0.1058, 0.2910]) -Greedy action tensor([ 1.0900, -0.4575, -0.1943, 0.0350]) tensor([0.5441, 0.1158, 0.1506, 0.1895]) -Greedy action tensor([ 1.5135, -0.4818, -0.5502, 0.5465]) tensor([0.6086, 0.0827, 0.0773, 0.2314]) -Greedy action tensor([ 1.5007, -0.0353, -0.2831, 0.6389]) tensor([0.5538, 0.1192, 0.0930, 0.2339]) -Greedy action tensor([ 1.5080, -0.4733, -0.9120, 0.3850]) tensor([0.6443, 0.0888, 0.0573, 0.2096]) -Greedy action tensor([ 1.6767, -0.6579, -0.6874, 0.1839]) tensor([0.7064, 0.0684, 0.0664, 0.1588]) -Greedy action tensor([ 1.3073, -0.5441, -0.4699, 0.3722]) tensor([0.5819, 0.0914, 0.0984, 0.2284]) -Greedy action tensor([ 1.6141, -0.5227, -0.7939, 0.0990]) tensor([0.7004, 0.0827, 0.0630, 0.1539]) -Greedy action tensor([ 1.3892, -0.7262, -0.0951, 0.4157]) tensor([0.5797, 0.0699, 0.1314, 0.2190]) -Greedy action tensor([ 1.4309, -0.2477, -0.3509, 0.5652]) tensor([0.5632, 0.1051, 0.0948, 0.2369]) -Greedy action tensor([ 1.6184, -0.7040, -0.4629, 0.5806]) tensor([0.6341, 0.0622, 0.0791, 0.2246]) -Greedy action tensor([ 2.4498, -0.0234, 0.4426, 0.1626]) tensor([0.7574, 0.0639, 0.1018, 0.0769]) -Greedy action tensor([ 1.6306, -0.6191, -0.3649, 0.2850]) tensor([0.6659, 0.0702, 0.0905, 0.1734]) -Greedy action tensor([ 1.5012, 0.1830, -0.9316, -0.2027]) tensor([0.6505, 0.1741, 0.0571, 0.1184]) -Greedy action tensor([ 0.8585, -0.5370, -0.2191, 0.1593]) tensor([0.4796, 0.1188, 0.1633, 0.2383]) -Greedy action tensor([ 1.6805, -0.3534, -0.4058, 0.3415]) tensor([0.6592, 0.0862, 0.0818, 0.1728]) -Greedy action tensor([ 2.3810, -0.4153, -0.6490, 0.4320]) tensor([0.7989, 0.0488, 0.0386, 0.1138]) -Greedy action tensor([ 1.8960, -0.4479, -0.2934, -0.0702]) tensor([0.7419, 0.0712, 0.0831, 0.1039]) -Greedy action tensor([ 1.2768, -0.6295, -0.2865, 0.2539]) tensor([0.5822, 0.0865, 0.1219, 0.2093]) -Greedy action tensor([ 1.4857, -0.6622, 0.0094, 0.1640]) tensor([0.6204, 0.0724, 0.1417, 0.1654]) -Greedy action tensor([ 1.7041, 0.0938, -0.6777, 0.5360]) tensor([0.6238, 0.1246, 0.0576, 0.1940]) -Greedy action tensor([ 1.4352, 0.2129, -0.1961, 0.3277]) tensor([0.5493, 0.1618, 0.1075, 0.1815]) -Greedy action tensor([ 1.5209, -0.2405, -0.9638, 0.6464]) tensor([0.5980, 0.1027, 0.0498, 0.2494]) -Greedy action tensor([ 1.6296, -0.8838, -0.8039, 0.5506]) tensor([0.6629, 0.0537, 0.0581, 0.2253]) -Greedy action tensor([ 1.5997, 0.0721, -0.1892, 0.4174]) tensor([0.5914, 0.1284, 0.0989, 0.1813]) -Greedy action tensor([ 2.1548, -0.5209, -0.7081, 0.5237]) tensor([0.7566, 0.0521, 0.0432, 0.1481]) -Greedy action tensor([ 1.0108, -0.8144, -0.5383, 1.1220]) tensor([0.4014, 0.0647, 0.0853, 0.4486]) -Greedy action tensor([ 2.1593, -0.2393, -0.5692, 0.5362]) tensor([0.7388, 0.0671, 0.0483, 0.1458]) -Greedy action tensor([ 1.5782, -0.8255, -0.4182, 0.4125]) tensor([0.6502, 0.0588, 0.0883, 0.2027]) -Greedy action tensor([ 1.3109, -0.3025, -0.2834, 0.5545]) tensor([0.5343, 0.1064, 0.1085, 0.2508]) -Greedy action tensor([ 1.9632, -0.6806, -0.1585, 0.7591]) tensor([0.6707, 0.0477, 0.0804, 0.2012]) -Greedy action tensor([ 1.2392, -0.4637, -0.2937, 0.5138]) tensor([0.5313, 0.0968, 0.1147, 0.2572]) -Greedy action tensor([ 1.3914, -0.5714, -0.1357, 0.2311]) tensor([0.5984, 0.0841, 0.1300, 0.1875]) -Greedy action tensor([ 1.4816, 0.4456, -0.6101, 0.3428]) tensor([0.5560, 0.1973, 0.0687, 0.1780]) -Greedy action tensor([ 1.6639, -0.6431, -0.4595, 0.2783]) tensor([0.6806, 0.0678, 0.0814, 0.1703]) -Greedy action tensor([ 1.6603, -0.7912, -0.2581, 0.4153]) tensor([0.6575, 0.0567, 0.0965, 0.1893]) -Greedy action tensor([ 1.2504, -0.4984, -0.3202, 0.1991]) tensor([0.5776, 0.1005, 0.1201, 0.2018]) -Greedy action tensor([ 2.6837, -1.8949, 0.1477, 0.1015]) tensor([0.8583, 0.0088, 0.0680, 0.0649]) -Greedy action tensor([ 1.2719, -0.4929, -0.4029, 0.7453]) tensor([0.5130, 0.0878, 0.0961, 0.3030]) -Greedy action tensor([ 1.6981, -0.9924, -0.0890, 0.4113]) tensor([0.6616, 0.0449, 0.1108, 0.1827]) -Greedy action tensor([ 1.6939, -0.7883, -0.0855, 0.7718]) tensor([0.6061, 0.0506, 0.1023, 0.2410]) -Greedy action tensor([ 1.7335, -0.7480, -0.3449, 0.5371]) tensor([0.6618, 0.0553, 0.0828, 0.2000]) -Greedy action tensor([ 2.1829, -0.9440, -0.4920, 0.6192]) tensor([0.7564, 0.0332, 0.0521, 0.1584]) -Greedy action tensor([ 2.2227, 0.2215, -0.0414, 0.5448]) tensor([0.7013, 0.0948, 0.0729, 0.1310]) -Greedy action tensor([ 1.2310, -0.5679, -0.3512, 0.7230]) tensor([0.5069, 0.0839, 0.1042, 0.3050]) -Greedy action tensor([ 2.1366, -0.5621, -0.4729, 0.4613]) tensor([0.7529, 0.0507, 0.0554, 0.1410]) -Greedy action tensor([ 1.7077, -0.7398, 0.0216, 0.5154]) tensor([0.6348, 0.0549, 0.1176, 0.1927]) -Greedy action tensor([ 1.4973, -0.8214, -0.5600, 0.5194]) tensor([0.6241, 0.0614, 0.0798, 0.2347]) -Greedy action tensor([ 1.4133, -0.7151, -0.6453, 0.4208]) tensor([0.6183, 0.0736, 0.0789, 0.2292]) -Greedy action tensor([ 1.7772, -0.2029, -0.3563, -0.0322]) tensor([0.7041, 0.0972, 0.0834, 0.1153]) -Greedy action tensor([ 2.0257, -0.2390, -0.1435, 0.4678]) tensor([0.6999, 0.0727, 0.0800, 0.1474]) -Greedy action tensor([ 1.3750, -0.0414, -0.3164, 0.3028]) tensor([0.5653, 0.1371, 0.1042, 0.1935]) -Greedy action tensor([ 1.0990, -0.3047, -0.3545, 0.4790]) tensor([0.4957, 0.1218, 0.1159, 0.2667]) -Greedy action tensor([ 0.8797, 0.0418, 0.0215, -0.1337]) tensor([0.4506, 0.1949, 0.1910, 0.1635]) -Greedy action tensor([ 1.2575e+00, -4.3070e-01, -1.8721e-01, -1.9908e-04]) tensor([0.5865, 0.1084, 0.1383, 0.1668]) -Greedy action tensor([ 1.4415, -0.4550, 0.0859, 0.2164]) tensor([0.5877, 0.0882, 0.1515, 0.1726]) -Greedy action tensor([ 1.0440, -0.4180, -0.2789, -0.3145]) tensor([0.5697, 0.1321, 0.1518, 0.1464]) -Greedy action tensor([-0.4833, -1.0203, 0.9763, -0.9788]) tensor([0.1539, 0.0900, 0.6624, 0.0938]) -Greedy action tensor([ 0.4731, -1.1687, 0.6539, -0.2058]) tensor([0.3450, 0.0668, 0.4133, 0.1749]) -Greedy action tensor([-1.8548, -1.5141, 1.7344, -1.2425]) tensor([0.0247, 0.0348, 0.8949, 0.0456]) -Greedy action tensor([-0.5801, -0.0718, 0.7130, 0.3781]) tensor([0.1122, 0.1865, 0.4088, 0.2925]) -Greedy action tensor([ 0.0596, 0.5211, 0.4326, -0.3190]) tensor([0.2117, 0.3359, 0.3074, 0.1450]) -Greedy action tensor([-0.3034, 0.6459, -0.0698, -0.3756]) tensor([0.1731, 0.4473, 0.2186, 0.1610]) -Greedy action tensor([-0.1747, -0.1404, -0.7577, -0.0428]) tensor([0.2678, 0.2772, 0.1495, 0.3056]) -Greedy action tensor([-0.0918, -1.1231, 0.5441, 0.1598]) tensor([0.2207, 0.0787, 0.4168, 0.2838]) -Greedy action tensor([ 0.1621, -0.9970, -0.0582, 0.3430]) tensor([0.3017, 0.0947, 0.2421, 0.3615]) -Greedy action tensor([-0.3832, 0.5545, 0.1429, 0.1406]) tensor([0.1442, 0.3683, 0.2440, 0.2435]) -Greedy action tensor([ 0.3572, -1.1667, 0.3622, 0.5462]) tensor([0.2915, 0.0635, 0.2929, 0.3521]) -Greedy action tensor([ 0.0440, -0.5908, -0.7378, 0.3219]) tensor([0.3023, 0.1602, 0.1383, 0.3991]) -Greedy action tensor([ 0.8853, -0.9990, -0.1572, 1.1777]) tensor([0.3516, 0.0534, 0.1240, 0.4710]) -Greedy action tensor([0.0215, 0.0876, 0.0763, 0.0172]) tensor([0.2427, 0.2593, 0.2564, 0.2416]) -Greedy action tensor([0.2334, 0.0048, 0.7466, 0.4263]) tensor([0.2137, 0.1700, 0.3570, 0.2592]) -Greedy action tensor([-0.2049, 0.3482, 0.0549, -0.4220]) tensor([0.2066, 0.3592, 0.2679, 0.1663]) -Greedy action tensor([ 0.0978, -0.2738, 0.1370, -0.6112]) tensor([0.3104, 0.2141, 0.3228, 0.1528]) -Greedy action tensor([-0.1595, -1.1054, -0.3028, 0.1912]) tensor([0.2721, 0.1057, 0.2358, 0.3864]) -Greedy action tensor([ 0.3168, -0.1990, -0.2141, -0.0620]) tensor([0.3485, 0.2080, 0.2049, 0.2386]) -Greedy action tensor([ 0.4745, -0.2619, -0.6189, 0.2428]) tensor([0.3836, 0.1837, 0.1285, 0.3042]) -Greedy action tensor([ 1.0382, -1.0195, 0.0113, 0.9811]) tensor([0.4115, 0.0526, 0.1474, 0.3886]) -Greedy action tensor([ 0.2254, -0.0769, 0.8670, -0.5548]) tensor([0.2441, 0.1804, 0.4636, 0.1119]) -Greedy action tensor([ 0.4119, -0.3888, 0.0116, 0.5395]) tensor([0.3072, 0.1379, 0.2059, 0.3490]) -Greedy action tensor([ 0.6119, -0.9692, 1.6069, -0.0608]) tensor([0.2262, 0.0465, 0.6118, 0.1154]) -Greedy action tensor([ 0.1979, -1.3923, -0.5575, 0.3561]) tensor([0.3515, 0.0717, 0.1651, 0.4117]) -Greedy action tensor([ 0.5454, -2.1325, 0.6721, -0.1472]) tensor([0.3698, 0.0254, 0.4198, 0.1850]) -Greedy action tensor([-0.4407, -0.0212, 0.7545, 0.2126]) tensor([0.1291, 0.1963, 0.4265, 0.2481]) -Greedy action tensor([-0.4575, -0.8161, 0.2971, -0.4104]) tensor([0.2052, 0.1434, 0.4364, 0.2151]) -Greedy action tensor([ 0.8029, -0.2638, 1.0782, 0.6910]) tensor([0.2813, 0.0968, 0.3704, 0.2515]) -Greedy action tensor([-0.0497, -0.7519, -0.0948, -0.4963]) tensor([0.3235, 0.1603, 0.3092, 0.2070]) -Greedy action tensor([-0.6566, -0.5098, 0.9386, -0.8105]) tensor([0.1259, 0.1458, 0.6204, 0.1079]) -Greedy action tensor([-1.2493, -1.0030, -0.9500, 0.2446]) tensor([0.1237, 0.1583, 0.1669, 0.5511]) -Greedy action tensor([ 0.1160, -0.4157, -0.3676, 0.0051]) tensor([0.3227, 0.1896, 0.1989, 0.2888]) -Greedy action tensor([ 0.6856, -0.5702, 0.7553, 0.2719]) tensor([0.3313, 0.0944, 0.3552, 0.2191]) -Greedy action tensor([-0.0103, -0.7513, 1.5091, 0.3787]) tensor([0.1329, 0.0634, 0.6075, 0.1962]) -Greedy action tensor([ 0.1292, -0.0446, 0.4309, 0.2550]) tensor([0.2311, 0.1943, 0.3125, 0.2621]) -Greedy action tensor([ 0.5662, -0.6578, 0.1258, -0.4519]) tensor([0.4350, 0.1279, 0.2800, 0.1571]) -Greedy action tensor([ 0.5159, 0.1890, -0.6088, -0.3385]) tensor([0.4046, 0.2918, 0.1314, 0.1722]) -Greedy action tensor([-0.9497, -0.0042, -0.5792, 0.0380]) tensor([0.1297, 0.3340, 0.1879, 0.3484]) -Greedy action tensor([-1.5977, -0.9816, -0.1156, -0.6008]) tensor([0.1004, 0.1858, 0.4418, 0.2720]) -Greedy action tensor([0.7754, 0.3328, 0.2970, 0.7858]) tensor([0.3056, 0.1963, 0.1894, 0.3088]) -Greedy action tensor([ 1.0290, -1.0928, -0.0810, 1.0200]) tensor([0.4098, 0.0491, 0.1350, 0.4061]) -Greedy action tensor([-0.9657, -1.8698, -0.3538, 0.7118]) tensor([0.1163, 0.0471, 0.2144, 0.6223]) -Greedy action tensor([ 1.2330, -0.7493, -0.5823, 0.5804]) tensor([0.5491, 0.0756, 0.0894, 0.2859]) -Greedy action tensor([-0.3145, -1.4418, -0.1207, -0.2469]) tensor([0.2772, 0.0898, 0.3365, 0.2966]) -Greedy action tensor([ 0.2618, -0.5086, -0.6135, -0.5592]) tensor([0.4311, 0.1995, 0.1797, 0.1897]) -Greedy action tensor([-0.1572, -2.1031, -0.6188, 0.2604]) tensor([0.3038, 0.0434, 0.1915, 0.4613]) -Greedy action tensor([ 0.1127, 0.3496, 0.5764, -0.7335]) tensor([0.2333, 0.2957, 0.3709, 0.1001]) -Greedy action tensor([ 1.1106, -0.6674, -0.5164, 0.8708]) tensor([0.4646, 0.0785, 0.0913, 0.3656]) -Greedy action tensor([-0.1644, -0.8337, 0.4294, -1.2590]) tensor([0.2734, 0.1400, 0.4951, 0.0915]) -Greedy action tensor([ 0.1056, 0.5107, 0.1066, -0.8252]) tensor([0.2568, 0.3850, 0.2570, 0.1012]) -Greedy action tensor([ 0.0358, -1.3177, -0.5884, -1.0031]) tensor([0.4656, 0.1203, 0.2494, 0.1647]) -Greedy action tensor([-0.1175, -1.0661, -0.3271, -0.7347]) tensor([0.3653, 0.1415, 0.2962, 0.1970]) -Greedy action tensor([ 0.4287, -1.4801, 0.0328, 0.2318]) tensor([0.3784, 0.0561, 0.2547, 0.3108]) -Greedy action tensor([ 0.1102, -0.6735, 0.0863, -0.1288]) tensor([0.3105, 0.1418, 0.3032, 0.2445]) -Greedy action tensor([ 0.2198, 0.2033, 0.0070, -0.3393]) tensor([0.2973, 0.2924, 0.2403, 0.1700]) -Greedy action tensor([-0.5714, -1.4142, 0.1509, 0.1742]) tensor([0.1787, 0.0769, 0.3679, 0.3766]) -Greedy action tensor([ 0.2190, 0.6348, 0.2932, -0.1395]) tensor([0.2330, 0.3532, 0.2510, 0.1628]) -Greedy action tensor([ 1.3459, -0.7010, 1.0618, 1.1210]) tensor([0.3731, 0.0482, 0.2808, 0.2979]) -Greedy action tensor([ 0.2197, -0.2467, -0.9000, -0.3328]) tensor([0.3954, 0.2480, 0.1290, 0.2276]) -Greedy action tensor([ 0.0350, 0.0015, -1.0605, -0.3522]) tensor([0.3355, 0.3245, 0.1122, 0.2278]) -Greedy action tensor([ 0.0358, -0.1943, 0.2859, 0.2124]) tensor([0.2341, 0.1860, 0.3006, 0.2793]) -Greedy action tensor([ 0.9265, -0.9767, 0.7557, -0.5588]) tensor([0.4507, 0.0672, 0.3800, 0.1021]) -Greedy action tensor([-0.5733, -0.1024, 0.3490, -0.4578]) tensor([0.1603, 0.2567, 0.4031, 0.1799]) -Greedy action tensor([-0.2592, -1.2475, 1.3237, -0.4869]) tensor([0.1421, 0.0529, 0.6919, 0.1132]) -Greedy action tensor([-1.4951, -0.3824, 0.8793, -1.0911]) tensor([0.0614, 0.1868, 0.6598, 0.0920]) -Greedy action tensor([-0.0505, 0.1225, -0.5863, 0.0912]) tensor([0.2547, 0.3028, 0.1490, 0.2935]) -Greedy action tensor([-0.2504, -0.6544, -0.0051, -0.1444]) tensor([0.2465, 0.1645, 0.3150, 0.2740]) -Greedy action tensor([ 0.3581, -0.5697, 0.3128, 0.5717]) tensor([0.2786, 0.1102, 0.2663, 0.3450]) -Greedy action tensor([ 0.8333, -0.4232, -0.1949, -0.4668]) tensor([0.5223, 0.1487, 0.1868, 0.1423]) -Greedy action tensor([-0.3719, -1.0199, 0.2395, -0.4442]) tensor([0.2328, 0.1217, 0.4290, 0.2165]) -Greedy action tensor([ 0.2944, -0.4600, 0.1506, 0.2848]) tensor([0.3006, 0.1414, 0.2603, 0.2977]) -Greedy action tensor([-1.2604, 0.3040, -0.5853, -0.2450]) tensor([0.0952, 0.4550, 0.1870, 0.2628]) -Greedy action tensor([-0.5699, -1.6044, 0.6699, 0.1754]) tensor([0.1446, 0.0514, 0.4995, 0.3046]) -Greedy action tensor([ 0.6232, 0.5837, 0.6130, -0.4852]) tensor([0.3048, 0.2930, 0.3017, 0.1006]) -Greedy action tensor([9.6396e-01, 5.1254e-04, 1.4305e+00, 1.4535e+00]) tensor([0.2170, 0.0828, 0.3461, 0.3541]) -Greedy action tensor([ 0.5311, -0.3893, 0.0866, 0.9113]) tensor([0.2855, 0.1137, 0.1831, 0.4176]) -Greedy action tensor([-0.5712, -1.0407, 0.0323, -0.9148]) tensor([0.2402, 0.1502, 0.4392, 0.1704]) -Greedy action tensor([-0.3279, -0.7930, 0.3134, -0.1649]) tensor([0.2126, 0.1335, 0.4037, 0.2502]) -Greedy action tensor([-0.4509, 0.0372, -0.1749, -0.0134]) tensor([0.1819, 0.2965, 0.2398, 0.2818]) -Greedy action tensor([-0.0218, -0.3545, 0.5115, -0.2579]) tensor([0.2375, 0.1703, 0.4048, 0.1875]) -Greedy action tensor([-1.9178, -0.4324, 0.6515, -0.1689]) tensor([0.0413, 0.1823, 0.5391, 0.2373]) -Greedy action tensor([-1.8363, -0.2026, 0.5521, -0.0995]) tensor([0.0441, 0.2257, 0.4800, 0.2502]) -Greedy action tensor([-0.9622, -0.8822, 0.5850, -0.2325]) tensor([0.1129, 0.1223, 0.5305, 0.2343]) -Greedy action tensor([-1.4845, -0.1543, 0.4157, -0.0155]) tensor([0.0632, 0.2391, 0.4229, 0.2748]) -Greedy action tensor([-1.9112, -0.4225, 0.6533, -0.1611]) tensor([0.0414, 0.1833, 0.5374, 0.2380]) -Greedy action tensor([-1.3307, 0.5960, 0.1850, 0.2451]) tensor([0.0580, 0.3980, 0.2638, 0.2802]) -Greedy action tensor([-1.7241, -0.2061, 0.5264, -0.1067]) tensor([0.0498, 0.2271, 0.4724, 0.2508]) -Greedy action tensor([-0.4791, 0.0932, 0.1091, -0.0078]) tensor([0.1619, 0.2870, 0.2916, 0.2594]) -Greedy action tensor([-1.9124, -0.4302, 0.6489, -0.1662]) tensor([0.0415, 0.1828, 0.5377, 0.2380]) -Greedy action tensor([-1.9159, -0.4545, 0.6588, -0.1664]) tensor([0.0413, 0.1782, 0.5426, 0.2378]) -Greedy action tensor([-0.8717, 0.4248, 0.4843, 0.7179]) tensor([0.0744, 0.2721, 0.2888, 0.3647]) -Greedy action tensor([-1.9423, -0.4500, 0.6650, -0.1796]) tensor([0.0403, 0.1791, 0.5460, 0.2346]) -Greedy action tensor([-1.7079, -0.5925, 0.6936, -0.0442]) tensor([0.0491, 0.1498, 0.5420, 0.2592]) -Greedy action tensor([-1.9173, -0.4329, 0.6535, -0.1654]) tensor([0.0412, 0.1819, 0.5391, 0.2377]) -Greedy action tensor([-1.8915, -0.3487, 0.6362, -0.1316]) tensor([0.0416, 0.1948, 0.5216, 0.2420]) -Greedy action tensor([-1.7762, -0.4467, 0.5811, -0.0820]) tensor([0.0481, 0.1818, 0.5082, 0.2618]) -Greedy action tensor([-1.8157, -0.1145, 0.5800, -0.0297]) tensor([0.0427, 0.2340, 0.4686, 0.2547]) -Greedy action tensor([-1.8823, -0.4243, 0.6387, -0.1457]) tensor([0.0427, 0.1835, 0.5313, 0.2425]) -Greedy action tensor([-1.9353, -0.4435, 0.6600, -0.1771]) tensor([0.0406, 0.1803, 0.5437, 0.2354]) -Greedy action tensor([-1.9202, -0.4465, 0.6592, -0.1665]) tensor([0.0411, 0.1794, 0.5421, 0.2374]) -Greedy action tensor([-1.7090, -0.3676, 0.6472, 0.1562]) tensor([0.0458, 0.1752, 0.4833, 0.2958]) -Greedy action tensor([-1.6338, 0.3203, 0.4278, -0.0744]) tensor([0.0484, 0.3414, 0.3801, 0.2301]) -Greedy action tensor([-1.7897, -0.3961, 0.5802, -0.1122]) tensor([0.0474, 0.1912, 0.5075, 0.2539]) -Greedy action tensor([-0.7809, 0.0725, 0.1488, -0.0483]) tensor([0.1256, 0.2949, 0.3182, 0.2613]) -Greedy action tensor([-1.8495, -0.4008, 0.6506, -0.1325]) tensor([0.0435, 0.1850, 0.5295, 0.2420]) -Greedy action tensor([-1.6717, 0.3594, 0.4502, 0.0246]) tensor([0.0446, 0.3399, 0.3723, 0.2432]) -Greedy action tensor([-1.4445, -0.4048, 0.6264, 0.3327]) tensor([0.0566, 0.1600, 0.4488, 0.3346]) -Greedy action tensor([-1.9384, -0.4430, 0.6649, -0.1770]) tensor([0.0403, 0.1800, 0.5449, 0.2348]) -Greedy action tensor([-1.8875, -0.2650, 0.6304, -0.1974]) tensor([0.0419, 0.2121, 0.5192, 0.2269]) -Greedy action tensor([-1.8956, -0.4368, 0.6384, -0.1575]) tensor([0.0424, 0.1823, 0.5343, 0.2411]) -Greedy action tensor([-1.7431, -0.4795, 0.5432, -0.0375]) tensor([0.0503, 0.1780, 0.4949, 0.2769]) -Greedy action tensor([-1.8016, -0.4274, 0.5918, -0.1008]) tensor([0.0468, 0.1848, 0.5122, 0.2562]) -Greedy action tensor([-1.7970, 0.0386, 0.5517, -0.0863]) tensor([0.0430, 0.2694, 0.4500, 0.2377]) -Greedy action tensor([-1.7256, -0.3925, 0.5304, -0.0555]) tensor([0.0509, 0.1930, 0.4857, 0.2704]) -Greedy action tensor([-1.6029, -0.0226, 0.4518, -0.2732]) tensor([0.0573, 0.2784, 0.4475, 0.2167]) -Greedy action tensor([-1.9033, -0.3665, 0.6340, -0.1539]) tensor([0.0416, 0.1934, 0.5259, 0.2392]) -Greedy action tensor([-1.7627, -0.4908, 0.5591, -0.0707]) tensor([0.0495, 0.1767, 0.5048, 0.2689]) -Greedy action tensor([-0.5080, 1.0283, 0.0661, 0.2114]) tensor([0.1055, 0.4904, 0.1874, 0.2167]) -Greedy action tensor([-1.8387, -0.3707, 0.6474, -0.1187]) tensor([0.0436, 0.1892, 0.5237, 0.2435]) -Greedy action tensor([-1.2153, -0.1629, 0.2521, 0.2272]) tensor([0.0804, 0.2304, 0.3489, 0.3403]) -Greedy action tensor([-1.9127, -0.4484, 0.6574, -0.1636]) tensor([0.0414, 0.1791, 0.5413, 0.2382]) -Greedy action tensor([-1.9332, -0.4503, 0.6638, -0.1738]) tensor([0.0406, 0.1788, 0.5448, 0.2358]) -Greedy action tensor([-1.9381, -0.4461, 0.6651, -0.1768]) tensor([0.0404, 0.1795, 0.5452, 0.2349]) -Greedy action tensor([-1.9136, -0.3404, 0.6325, -0.1616]) tensor([0.0411, 0.1981, 0.5240, 0.2369]) -Greedy action tensor([-1.4922, -0.4154, 0.4395, 0.0732]) tensor([0.0640, 0.1879, 0.4418, 0.3063]) -Greedy action tensor([-1.7679, -0.4409, 0.6186, -0.0463]) tensor([0.0471, 0.1775, 0.5120, 0.2634]) -Greedy action tensor([-1.5178, -0.4596, 0.6513, 0.4929]) tensor([0.0497, 0.1433, 0.4353, 0.3716]) -Greedy action tensor([-1.7576, -0.4204, 0.5859, -0.0420]) tensor([0.0481, 0.1832, 0.5012, 0.2675]) -Greedy action tensor([-1.7145, -0.2027, 0.5440, -0.0127]) tensor([0.0486, 0.2203, 0.4648, 0.2664]) -Greedy action tensor([-1.8395, -0.4267, 0.6426, -0.0844]) tensor([0.0437, 0.1797, 0.5235, 0.2531]) -Greedy action tensor([-1.8465, -0.6398, 0.8444, 0.0649]) tensor([0.0387, 0.1293, 0.5704, 0.2616]) -Greedy action tensor([-1.9465, -0.4506, 0.6678, -0.1821]) tensor([0.0401, 0.1788, 0.5472, 0.2339]) -Greedy action tensor([-1.8160, -0.4436, 0.6074, -0.1110]) tensor([0.0460, 0.1815, 0.5193, 0.2532]) -Greedy action tensor([-1.7742, -0.4023, 0.6725, -0.0409]) tensor([0.0451, 0.1780, 0.5214, 0.2555]) -Greedy action tensor([-1.4377, -0.4651, 0.4270, -0.0303]) tensor([0.0705, 0.1865, 0.4550, 0.2880]) -Greedy action tensor([-1.9269, -0.4291, 0.6595, -0.1707]) tensor([0.0407, 0.1822, 0.5411, 0.2359]) -Greedy action tensor([-1.8815, 0.0019, 0.5694, -0.1768]) tensor([0.0405, 0.2665, 0.4701, 0.2229]) -Greedy action tensor([-1.8719, -0.2955, 0.6051, -0.1360]) tensor([0.0427, 0.2066, 0.5084, 0.2423]) -Greedy action tensor([-1.9348, -0.4553, 0.6707, -0.1703]) tensor([0.0404, 0.1773, 0.5466, 0.2357]) -Greedy action tensor([-1.9122, -0.4481, 0.6551, -0.1623]) tensor([0.0415, 0.1793, 0.5405, 0.2387]) -Greedy action tensor([-1.8882, -0.6807, 0.4003, -0.4205]) tensor([0.0539, 0.1804, 0.5317, 0.2340]) -Greedy action tensor([-1.4257, -0.3617, 0.3966, 0.0369]) tensor([0.0694, 0.2012, 0.4296, 0.2998]) -Greedy action tensor([-1.4250, -0.4622, 0.6921, 0.6505]) tensor([0.0503, 0.1316, 0.4176, 0.4005]) -Greedy action tensor([-1.9012, -0.3339, 0.6182, -0.1469]) tensor([0.0417, 0.1998, 0.5177, 0.2409]) -Greedy action tensor([-1.3792, 0.0646, 0.3553, -0.0823]) tensor([0.0687, 0.2910, 0.3891, 0.2512]) -Greedy action tensor([-1.8592, -0.3901, 0.6273, -0.1267]) tensor([0.0434, 0.1888, 0.5221, 0.2456]) -Greedy action tensor([-1.9426, -0.4469, 0.6660, -0.1797]) tensor([0.0402, 0.1794, 0.5460, 0.2344]) -Greedy action tensor([-1.6067, -0.5094, 0.5192, 0.0162]) tensor([0.0573, 0.1718, 0.4804, 0.2905]) -Greedy action tensor([-1.5629, -0.3458, 0.4582, -0.0197]) tensor([0.0602, 0.2034, 0.4545, 0.2818]) -Greedy action tensor([-1.9219, -0.4518, 0.6587, -0.1701]) tensor([0.0411, 0.1789, 0.5430, 0.2370]) -Greedy action tensor([-1.8398, -0.4334, 0.6568, -0.0760]) tensor([0.0434, 0.1770, 0.5266, 0.2531]) -Greedy action tensor([-1.7991, -0.2905, 0.5689, -0.1056]) tensor([0.0462, 0.2089, 0.4935, 0.2514]) -Greedy action tensor([-1.9367, -0.4258, 0.6617, -0.1719]) tensor([0.0403, 0.1826, 0.5417, 0.2354]) -Greedy action tensor([-1.8211, -0.4254, 0.6881, -0.0994]) tensor([0.0436, 0.1761, 0.5363, 0.2440]) -Greedy action tensor([-1.8810, -0.4508, 0.6375, -0.1497]) tensor([0.0430, 0.1799, 0.5341, 0.2431]) -Greedy action tensor([-1.9286, -0.4341, 0.6612, -0.1714]) tensor([0.0407, 0.1813, 0.5422, 0.2358]) -Greedy action tensor([-1.8839, -0.3706, 0.6292, -0.1503]) tensor([0.0425, 0.1929, 0.5242, 0.2404]) -Greedy action tensor([-0.2506, 1.2191, 0.0633, 0.6651]) tensor([0.1085, 0.4718, 0.1485, 0.2711]) -Greedy action tensor([-1.6956, 0.3252, 0.4208, 0.0250]) tensor([0.0446, 0.3363, 0.3700, 0.2491]) -Greedy action tensor([-1.8935, -0.2897, 0.6174, -0.1401]) tensor([0.0416, 0.2066, 0.5118, 0.2400]) -Greedy action tensor([-1.0826, 0.0423, 0.6504, 0.8742]) tensor([0.0595, 0.1832, 0.3365, 0.4209]) -Greedy action tensor([ 0.7862, -0.6773, -0.1912, -0.2449]) tensor([0.5091, 0.1178, 0.1916, 0.1815]) -Greedy action tensor([ 0.2862, 0.0517, -0.0400, -0.0852]) tensor([0.3123, 0.2470, 0.2254, 0.2154]) -Greedy action tensor([ 0.4959, -0.2164, -0.0458, -0.0177]) tensor([0.3744, 0.1837, 0.2178, 0.2240]) -Greedy action tensor([ 0.7352, 0.0643, -0.1906, -0.6081]) tensor([0.4611, 0.2358, 0.1827, 0.1204]) -Greedy action tensor([ 0.8218, -0.2151, -0.1058, -0.0547]) tensor([0.4616, 0.1637, 0.1826, 0.1922]) -Greedy action tensor([ 0.5517, -0.5317, 0.0749, -0.3739]) tensor([0.4245, 0.1437, 0.2636, 0.1682]) -Greedy action tensor([ 0.5618, -0.4250, -0.1835, -0.1635]) tensor([0.4289, 0.1599, 0.2036, 0.2077]) -Greedy action tensor([ 1.2484, -0.6575, -0.1846, -0.2209]) tensor([0.6183, 0.0919, 0.1475, 0.1423]) -Greedy action tensor([ 0.8669, -0.5285, 0.1685, -0.2358]) tensor([0.4814, 0.1193, 0.2395, 0.1598]) -Greedy action tensor([ 0.6468, -0.5013, -0.1280, -0.4215]) tensor([0.4713, 0.1495, 0.2172, 0.1619]) -Greedy action tensor([ 0.6112, -0.3260, -0.0746, -0.2132]) tensor([0.4285, 0.1678, 0.2158, 0.1879]) -Greedy action tensor([ 0.6107, -0.4933, -0.1518, -0.4533]) tensor([0.4666, 0.1547, 0.2177, 0.1610]) -Greedy action tensor([ 0.8058, -0.5645, -0.1068, -0.3874]) tensor([0.5105, 0.1297, 0.2050, 0.1548]) -Greedy action tensor([0.5842, 0.1501, 0.0012, 0.0728]) tensor([0.3564, 0.2309, 0.1990, 0.2137]) -Greedy action tensor([ 0.8849, -0.3366, 0.0612, -0.3163]) tensor([0.4915, 0.1449, 0.2157, 0.1479]) -Greedy action tensor([ 1.0201, -0.5708, -0.1486, -0.4488]) tensor([0.5732, 0.1168, 0.1781, 0.1319]) -Greedy action tensor([ 0.9058, -0.4137, -0.0766, -0.3186]) tensor([0.5166, 0.1381, 0.1934, 0.1519]) -Greedy action tensor([ 0.6368, -0.2760, -0.0550, -0.1361]) tensor([0.4231, 0.1698, 0.2118, 0.1953]) -Greedy action tensor([ 0.9166, -0.5683, 0.0452, -0.2747]) tensor([0.5132, 0.1162, 0.2147, 0.1559]) -Greedy action tensor([ 0.8288, -0.6288, 0.0810, -0.3853]) tensor([0.4992, 0.1162, 0.2363, 0.1483]) -Greedy action tensor([ 0.4881, -0.4601, 0.0640, -0.3568]) tensor([0.4046, 0.1568, 0.2648, 0.1738]) -Greedy action tensor([ 1.0771, -0.3366, 0.2655, -0.4631]) tensor([0.5258, 0.1279, 0.2336, 0.1127]) -Greedy action tensor([ 0.6185, -0.2147, -0.0665, -0.0648]) tensor([0.4092, 0.1779, 0.2063, 0.2066]) -Greedy action tensor([ 0.4975, -0.3844, -0.1749, 0.0024]) tensor([0.3946, 0.1634, 0.2015, 0.2405]) -Greedy action tensor([ 0.8961, -0.6861, 0.0043, -0.6490]) tensor([0.5468, 0.1124, 0.2242, 0.1166]) -Greedy action tensor([ 1.2332, -0.7688, -0.1102, -0.7333]) tensor([0.6511, 0.0879, 0.1699, 0.0911]) -Greedy action tensor([ 0.6339, -0.4029, -0.0628, -0.2602]) tensor([0.4421, 0.1568, 0.2203, 0.1808]) -Greedy action tensor([ 1.1979, -0.2695, -0.0588, -0.6270]) tensor([0.5965, 0.1375, 0.1698, 0.0962]) -Greedy action tensor([ 0.7105, -0.0561, -0.0507, 0.0491]) tensor([0.4085, 0.1898, 0.1908, 0.2109]) -Greedy action tensor([ 1.0136, -0.5336, -0.0250, -0.2373]) tensor([0.5397, 0.1149, 0.1910, 0.1545]) -Greedy action tensor([ 1.0297, -0.5640, -0.0485, -0.4117]) tensor([0.5618, 0.1141, 0.1911, 0.1329]) -Greedy action tensor([ 0.7687, -0.4093, 0.2427, -0.7691]) tensor([0.4731, 0.1457, 0.2796, 0.1017]) -Greedy action tensor([ 0.6445, -0.4659, -0.1766, -0.5605]) tensor([0.4833, 0.1592, 0.2126, 0.1448]) -Greedy action tensor([ 0.3412, -0.0011, -0.0113, -0.0230]) tensor([0.3218, 0.2285, 0.2262, 0.2236]) -Greedy action tensor([ 0.3791, 0.0301, -0.1078, -0.0500]) tensor([0.3366, 0.2374, 0.2068, 0.2191]) -Greedy action tensor([ 0.5876, -0.2728, -0.0218, -0.1047]) tensor([0.4053, 0.1715, 0.2204, 0.2028]) -Greedy action tensor([ 0.8410, -0.6600, -0.0095, -0.5457]) tensor([0.5263, 0.1173, 0.2248, 0.1315]) -Greedy action tensor([ 0.9174, -0.6597, -0.1303, -0.3596]) tensor([0.5446, 0.1125, 0.1910, 0.1519]) -Greedy action tensor([ 1.0242, -0.3975, -0.0853, -0.6014]) tensor([0.5657, 0.1365, 0.1865, 0.1113]) -Greedy action tensor([ 0.7035, -0.4199, -0.1207, -0.1033]) tensor([0.4525, 0.1471, 0.1985, 0.2019]) -Greedy action tensor([ 0.6917, 0.0494, 0.0858, -0.0607]) tensor([0.3933, 0.2069, 0.2145, 0.1853]) -Greedy action tensor([ 0.5822, -0.2160, -0.0030, -0.0802]) tensor([0.3964, 0.1784, 0.2208, 0.2044]) -Greedy action tensor([ 0.7772, -0.4223, -0.2041, -0.4259]) tensor([0.5060, 0.1525, 0.1896, 0.1519]) -Greedy action tensor([ 0.8553, -0.2695, 0.0274, -0.2666]) tensor([0.4791, 0.1556, 0.2093, 0.1560]) -Greedy action tensor([ 0.2049, -0.4319, -0.2084, -0.0790]) tensor([0.3398, 0.1797, 0.2247, 0.2558]) -Greedy action tensor([ 0.7836, -0.6413, -0.1254, -0.2740]) tensor([0.5023, 0.1208, 0.2024, 0.1745]) -Greedy action tensor([ 0.4339, 0.1451, 0.0623, -0.1967]) tensor([0.3366, 0.2521, 0.2321, 0.1792]) -Greedy action tensor([ 0.3055, 0.1907, -0.0883, -0.3081]) tensor([0.3218, 0.2869, 0.2171, 0.1742]) -Greedy action tensor([ 0.4422, 0.0026, -0.0828, 0.0079]) tensor([0.3468, 0.2234, 0.2052, 0.2246]) -Greedy action tensor([ 0.4999, -0.3409, -0.1747, -0.0591]) tensor([0.3980, 0.1717, 0.2027, 0.2276]) -Greedy action tensor([ 0.5041, -0.3742, 0.0836, -0.6647]) tensor([0.4196, 0.1744, 0.2756, 0.1304]) -Greedy action tensor([ 0.7177, -0.2282, -0.0377, -0.1456]) tensor([0.4386, 0.1703, 0.2061, 0.1850]) -Greedy action tensor([ 0.6930, -0.7749, -0.0269, -0.3370]) tensor([0.4821, 0.1111, 0.2347, 0.1721]) -Greedy action tensor([ 0.9958, -0.5506, -0.0991, -0.3331]) tensor([0.5518, 0.1175, 0.1846, 0.1461]) -Greedy action tensor([ 0.8415, -0.2331, 0.1338, -0.5090]) tensor([0.4777, 0.1631, 0.2354, 0.1238]) -Greedy action tensor([ 0.7729, -0.2439, -0.1000, -0.1843]) tensor([0.4622, 0.1672, 0.1931, 0.1775]) -Greedy action tensor([ 0.8800, -0.4583, 0.0871, -0.3604]) tensor([0.4990, 0.1309, 0.2258, 0.1443]) -Greedy action tensor([ 0.7552, 0.0387, -0.3562, -0.5342]) tensor([0.4778, 0.2334, 0.1572, 0.1316]) -Greedy action tensor([ 0.9590, -1.2588, 0.0120, -0.7377]) tensor([0.5952, 0.0648, 0.2309, 0.1091]) -Greedy action tensor([ 1.0671, -0.6326, -0.0162, -0.5551]) tensor([0.5818, 0.1063, 0.1969, 0.1149]) -Greedy action tensor([ 0.3345, -0.4378, -0.2401, -0.0307]) tensor([0.3678, 0.1699, 0.2070, 0.2553]) -Greedy action tensor([ 0.9524, -1.0705, -0.0639, -0.4087]) tensor([0.5712, 0.0756, 0.2067, 0.1465]) -Greedy action tensor([ 0.7176, -0.5879, -0.0795, -0.1676]) tensor([0.4686, 0.1270, 0.2111, 0.1933]) -Greedy action tensor([ 0.5366, -0.1714, -0.0762, -0.2823]) tensor([0.4040, 0.1990, 0.2189, 0.1781]) -Greedy action tensor([ 1.0235, -0.4184, -0.0756, -0.4689]) tensor([0.5573, 0.1318, 0.1857, 0.1253]) -Greedy action tensor([ 1.0507, -1.0467, 0.0340, -0.4715]) tensor([0.5873, 0.0721, 0.2125, 0.1282]) -Greedy action tensor([ 1.1372, -1.0613, 0.0243, -0.7044]) tensor([0.6257, 0.0694, 0.2056, 0.0992]) -Greedy action tensor([ 0.4816, -0.1750, 0.0150, -0.1619]) tensor([0.3744, 0.1942, 0.2348, 0.1967]) -Greedy action tensor([ 0.5539, -0.1642, -0.0375, -0.3174]) tensor([0.4066, 0.1983, 0.2251, 0.1701]) -Greedy action tensor([ 0.5261, -0.4168, -0.0450, -0.3669]) tensor([0.4230, 0.1648, 0.2390, 0.1732]) -Greedy action tensor([ 0.7163, -0.2628, -0.0898, -0.4107]) tensor([0.4659, 0.1750, 0.2081, 0.1510]) -Greedy action tensor([ 0.7803, -0.5939, 0.0728, -0.4727]) tensor([0.4922, 0.1246, 0.2426, 0.1406]) -Greedy action tensor([ 0.6902, -0.3095, -0.0231, -0.3565]) tensor([0.4527, 0.1666, 0.2218, 0.1589]) -Greedy action tensor([ 0.2801, -0.0980, 0.0329, -0.3961]) tensor([0.3362, 0.2303, 0.2625, 0.1710]) -Greedy action tensor([ 0.9441, -0.5996, -0.0461, -0.2750]) tensor([0.5317, 0.1136, 0.1975, 0.1571]) -Greedy action tensor([ 0.5562, -0.2692, -0.1423, -0.0462]) tensor([0.4028, 0.1764, 0.2003, 0.2205]) -Greedy action tensor([ 0.7210, -0.3288, -0.0028, -0.2205]) tensor([0.4495, 0.1573, 0.2179, 0.1753]) -Greedy action tensor([ 0.7044, -0.2817, -0.1362, -0.4168]) tensor([0.4694, 0.1751, 0.2025, 0.1530]) -Greedy action tensor([ 0.6450, -0.5243, 0.0396, -0.1417]) tensor([0.4326, 0.1344, 0.2361, 0.1970]) -Greedy action tensor([ 0.6230, -0.2511, -0.0068, -0.0767]) tensor([0.4087, 0.1705, 0.2177, 0.2030]) -Greedy action tensor([ 0.8660, -0.9041, -0.0666, -0.3169]) tensor([0.5347, 0.0911, 0.2104, 0.1638]) -Greedy action tensor([ 1.4957, -0.4192, -0.6735, 0.0613]) tensor([0.6667, 0.0982, 0.0762, 0.1589]) -Greedy action tensor([ 1.2496, -0.4530, -0.0334, -0.0421]) tensor([0.5766, 0.1051, 0.1598, 0.1585]) -Greedy action tensor([ 1.9474, -0.0769, -0.2846, 0.3781]) tensor([0.6908, 0.0912, 0.0741, 0.1438]) -Greedy action tensor([ 1.2816, -0.4003, -0.4613, 0.4265]) tensor([0.5598, 0.1041, 0.0980, 0.2381]) -Greedy action tensor([ 0.9193, -0.4533, -0.8192, 0.4664]) tensor([0.4843, 0.1227, 0.0851, 0.3079]) -Greedy action tensor([ 2.0721, -0.9944, -0.1284, 0.2852]) tensor([0.7548, 0.0352, 0.0836, 0.1264]) -Greedy action tensor([ 0.9266, -0.4213, -0.3682, 0.3829]) tensor([0.4730, 0.1229, 0.1296, 0.2746]) -Greedy action tensor([ 1.4550, -0.5906, -0.5740, 0.3540]) tensor([0.6276, 0.0812, 0.0825, 0.2087]) -Greedy action tensor([ 1.9798, -1.0074, -0.2357, 0.6826]) tensor([0.6979, 0.0352, 0.0761, 0.1907]) -Greedy action tensor([ 1.1067, -0.6942, -0.2717, 0.6789]) tensor([0.4833, 0.0798, 0.1218, 0.3151]) -Greedy action tensor([ 2.3345, -0.9121, -0.5067, 1.1358]) tensor([0.7149, 0.0278, 0.0417, 0.2156]) -Greedy action tensor([ 1.3665, -0.1245, -0.8077, 0.1044]) tensor([0.6166, 0.1388, 0.0701, 0.1745]) -Greedy action tensor([ 1.1198, -0.6429, -0.3437, 0.6884]) tensor([0.4872, 0.0836, 0.1127, 0.3165]) -Greedy action tensor([ 1.9094, -0.3949, -1.1065, 0.3761]) tensor([0.7328, 0.0731, 0.0359, 0.1582]) -Greedy action tensor([ 2.0049, -1.1915, -0.0519, 0.7446]) tensor([0.6885, 0.0282, 0.0880, 0.1952]) -Greedy action tensor([ 1.3980, -0.0917, -0.1299, -0.1110]) tensor([0.6011, 0.1355, 0.1304, 0.1329]) -Greedy action tensor([ 1.8915, -0.8745, -0.4337, 0.1247]) tensor([0.7510, 0.0472, 0.0734, 0.1283]) -Greedy action tensor([ 1.0306, -0.0523, -0.5702, 0.4601]) tensor([0.4749, 0.1608, 0.0958, 0.2684]) -Greedy action tensor([ 1.3504, -0.6470, 0.1288, 0.8737]) tensor([0.4875, 0.0661, 0.1437, 0.3027]) -Greedy action tensor([ 1.7745, -0.3003, -0.4911, 0.7235]) tensor([0.6333, 0.0795, 0.0657, 0.2214]) -Greedy action tensor([ 1.7500, -0.6982, -0.4328, 0.3628]) tensor([0.6902, 0.0597, 0.0778, 0.1724]) -Greedy action tensor([ 1.7912, -0.6237, -0.3658, 0.3537]) tensor([0.6932, 0.0620, 0.0802, 0.1647]) -Greedy action tensor([ 1.1884, -1.1289, -0.0396, -0.0697]) tensor([0.5968, 0.0588, 0.1748, 0.1696]) -Greedy action tensor([ 2.0821, -1.0327, -0.5943, 0.7295]) tensor([0.7290, 0.0324, 0.0502, 0.1885]) -Greedy action tensor([ 1.3255, -0.8224, -0.3615, 0.4837]) tensor([0.5771, 0.0674, 0.1068, 0.2487]) -Greedy action tensor([ 1.4796, 0.0936, -0.4280, -0.1720]) tensor([0.6288, 0.1573, 0.0933, 0.1206]) -Greedy action tensor([ 1.1390, -0.2237, -0.1513, -0.3370]) tensor([0.5683, 0.1455, 0.1564, 0.1299]) -Greedy action tensor([ 1.3919, -0.5252, -0.4882, 0.1042]) tensor([0.6347, 0.0933, 0.0968, 0.1751]) -Greedy action tensor([ 1.5459, -1.3496, 0.0722, 0.1366]) tensor([0.6542, 0.0362, 0.1498, 0.1598]) -Greedy action tensor([ 2.5040, -1.1518, -0.1033, 0.0687]) tensor([0.8424, 0.0218, 0.0621, 0.0738]) -Greedy action tensor([ 1.5548, -0.8400, -0.2362, 0.1170]) tensor([0.6687, 0.0610, 0.1115, 0.1588]) -Greedy action tensor([ 2.1931, -1.1927, -0.0922, 0.5683]) tensor([0.7504, 0.0254, 0.0764, 0.1478]) -Greedy action tensor([ 1.2507, -0.5177, -0.2384, 0.1289]) tensor([0.5808, 0.0991, 0.1310, 0.1892]) -Greedy action tensor([ 1.3817, -0.3985, -0.7240, 0.4428]) tensor([0.5947, 0.1003, 0.0724, 0.2326]) -Greedy action tensor([ 1.4616, -0.0857, -0.4126, 0.2004]) tensor([0.6062, 0.1290, 0.0930, 0.1717]) -Greedy action tensor([ 1.4426, -0.9051, -0.1750, 0.5732]) tensor([0.5837, 0.0558, 0.1158, 0.2447]) -Greedy action tensor([ 1.2061, 0.0086, 0.2718, -0.0451]) tensor([0.5048, 0.1524, 0.1983, 0.1445]) -Greedy action tensor([ 2.0916, -0.9801, -0.4957, 0.8334]) tensor([0.7114, 0.0330, 0.0535, 0.2022]) -Greedy action tensor([ 1.2918, -0.5400, -0.1854, 0.1700]) tensor([0.5834, 0.0934, 0.1332, 0.1900]) -Greedy action tensor([ 1.7402, -0.2079, -0.7068, 0.6769]) tensor([0.6352, 0.0905, 0.0550, 0.2193]) -Greedy action tensor([ 1.2783, -0.4056, -0.6087, 0.1759]) tensor([0.5991, 0.1112, 0.0908, 0.1989]) -Greedy action tensor([ 1.1655, -0.1040, -0.4726, 0.0488]) tensor([0.5547, 0.1559, 0.1078, 0.1816]) -Greedy action tensor([ 1.3637, -0.6351, -0.3218, 0.4356]) tensor([0.5827, 0.0790, 0.1080, 0.2303]) -Greedy action tensor([ 1.2670, -0.0028, -0.4902, 0.4947]) tensor([0.5221, 0.1467, 0.0901, 0.2412]) -Greedy action tensor([ 1.0031, -0.4059, 0.0305, 0.1788]) tensor([0.4852, 0.1186, 0.1835, 0.2128]) -Greedy action tensor([ 0.7759, -0.2828, -0.0868, -0.0146]) tensor([0.4499, 0.1561, 0.1899, 0.2041]) -Greedy action tensor([ 1.5091, -0.6546, -0.1500, 0.3914]) tensor([0.6126, 0.0704, 0.1166, 0.2004]) -Greedy action tensor([ 1.1861, -0.4287, -0.3053, 0.3758]) tensor([0.5351, 0.1065, 0.1204, 0.2380]) -Greedy action tensor([ 1.0969, 0.1455, -0.3602, 0.6468]) tensor([0.4431, 0.1711, 0.1032, 0.2825]) -Greedy action tensor([ 2.6766, -1.0867, -0.3186, 0.9216]) tensor([0.8025, 0.0186, 0.0401, 0.1388]) -Greedy action tensor([ 1.5761, -0.3963, -0.5165, 0.4239]) tensor([0.6335, 0.0881, 0.0782, 0.2002]) -Greedy action tensor([ 1.4694, -0.2237, -0.2046, 0.7322]) tensor([0.5406, 0.0994, 0.1014, 0.2586]) -Greedy action tensor([ 1.3303, -0.2010, -0.6113, 0.5518]) tensor([0.5498, 0.1189, 0.0789, 0.2524]) -Greedy action tensor([ 1.6374, -0.1669, -0.0947, 0.4151]) tensor([0.6112, 0.1006, 0.1081, 0.1800]) -Greedy action tensor([ 1.3246, -0.6621, 0.0712, 0.0284]) tensor([0.5895, 0.0808, 0.1683, 0.1613]) -Greedy action tensor([ 1.6277, -0.3779, -0.8014, 0.4573]) tensor([0.6523, 0.0878, 0.0575, 0.2024]) -Greedy action tensor([ 1.9665, 0.2104, -0.5052, 0.7315]) tensor([0.6460, 0.1116, 0.0545, 0.1879]) -Greedy action tensor([ 1.5259, -0.2826, -0.1765, 0.2293]) tensor([0.6174, 0.1012, 0.1125, 0.1688]) -Greedy action tensor([ 2.0327, -0.8799, -0.2987, 0.2918]) tensor([0.7537, 0.0410, 0.0732, 0.1322]) -Greedy action tensor([ 1.2205, -0.1820, -0.3797, 0.4483]) tensor([0.5236, 0.1288, 0.1057, 0.2419]) -Greedy action tensor([ 1.6766, -0.3668, -0.5199, 0.3061]) tensor([0.6690, 0.0867, 0.0744, 0.1699]) -Greedy action tensor([ 1.8141, -0.7799, -0.4781, 0.5759]) tensor([0.6823, 0.0510, 0.0689, 0.1978]) -Greedy action tensor([ 1.4935, -0.7894, -0.4095, 0.2498]) tensor([0.6496, 0.0662, 0.0969, 0.1873]) -Greedy action tensor([ 1.5977, -0.7163, -0.3038, 0.2172]) tensor([0.6668, 0.0659, 0.0996, 0.1677]) -Greedy action tensor([ 1.5350, -0.5949, -0.4915, 0.5711]) tensor([0.6127, 0.0728, 0.0807, 0.2337]) -Greedy action tensor([ 1.7111, -0.1455, -0.7238, 0.2946]) tensor([0.6728, 0.1051, 0.0589, 0.1632]) -Greedy action tensor([ 2.0497, -1.0122, -0.1010, 0.8977]) tensor([0.6760, 0.0316, 0.0787, 0.2136]) -Greedy action tensor([ 2.2887, -0.7290, -0.4642, 0.0912]) tensor([0.8172, 0.0400, 0.0521, 0.0908]) -Greedy action tensor([ 1.4537, -0.5578, -0.3837, 0.4140]) tensor([0.6073, 0.0813, 0.0967, 0.2147]) -Greedy action tensor([ 1.4459, -0.2580, -0.6897, 0.5820]) tensor([0.5808, 0.1057, 0.0686, 0.2448]) -Greedy action tensor([ 1.5084, -0.6881, -0.1764, -0.0313]) tensor([0.6618, 0.0736, 0.1227, 0.1419]) -Greedy action tensor([ 1.2295, -0.2593, -0.7385, 0.3056]) tensor([0.5674, 0.1280, 0.0793, 0.2252]) -Greedy action tensor([ 1.6388, -0.5011, -1.0966, 0.2401]) tensor([0.6996, 0.0823, 0.0454, 0.1727]) -Greedy action tensor([ 1.0824, -0.3063, -0.1339, 0.2727]) tensor([0.5023, 0.1253, 0.1488, 0.2235]) -Greedy action tensor([ 1.2335, -0.2849, -0.2000, -0.0161]) tensor([0.5734, 0.1256, 0.1367, 0.1643]) -Greedy action tensor([ 1.1583, -0.4277, -0.3918, 0.1049]) tensor([0.5663, 0.1160, 0.1202, 0.1975]) -Greedy action tensor([ 1.6209, -0.2626, -1.0435, 0.7499]) tensor([0.6097, 0.0927, 0.0425, 0.2552]) -Greedy action tensor([ 1.1967, 0.2779, -1.3852, 0.1212]) tensor([0.5507, 0.2197, 0.0417, 0.1879]) -Greedy action tensor([ 1.9817, -0.8324, -0.5642, 0.4734]) tensor([0.7355, 0.0441, 0.0577, 0.1628]) -Greedy action tensor([ 2.0019, -1.2266, 0.0316, 0.7837]) tensor([0.6781, 0.0269, 0.0945, 0.2005]) -Greedy action tensor([ 1.6183, -0.3387, -0.6853, 0.2077]) tensor([0.6733, 0.0951, 0.0673, 0.1643]) -Greedy action tensor([ 0.0669, -0.9474, 0.5133, -0.2215]) tensor([0.2721, 0.0987, 0.4252, 0.2040]) -Greedy action tensor([-0.1259, -1.4274, -0.9806, 1.0630]) tensor([0.2008, 0.0546, 0.0854, 0.6592]) -Greedy action tensor([ 1.2707, -0.0575, -0.3558, -0.6820]) tensor([0.6237, 0.1652, 0.1226, 0.0885]) -Greedy action tensor([ 0.6626, -0.2025, 1.2692, 0.3336]) tensor([0.2516, 0.1059, 0.4615, 0.1810]) -Greedy action tensor([-0.1264, 0.6683, -0.1699, -1.3505]) tensor([0.2239, 0.4958, 0.2144, 0.0659]) -Greedy action tensor([-1.2344, -2.0219, -0.4870, -0.0760]) tensor([0.1481, 0.0674, 0.3128, 0.4717]) -Greedy action tensor([-1.1905, -1.6788, 0.5318, -0.1863]) tensor([0.1006, 0.0617, 0.5631, 0.2746]) -Greedy action tensor([-0.3498, -0.6498, -1.3034, -0.1123]) tensor([0.2946, 0.2183, 0.1135, 0.3736]) -Greedy action tensor([-0.5412, -0.4020, -0.4870, -0.8639]) tensor([0.2545, 0.2925, 0.2687, 0.1843]) -Greedy action tensor([-0.4872, -0.7816, 0.7186, -0.2646]) tensor([0.1579, 0.1176, 0.5272, 0.1972]) -Greedy action tensor([-0.4135, -1.5654, 0.1957, -0.1393]) tensor([0.2237, 0.0707, 0.4114, 0.2943]) -Greedy action tensor([ 0.5980, -0.7174, -0.3686, -0.0209]) tensor([0.4572, 0.1227, 0.1739, 0.2462]) -Greedy action tensor([ 1.1675, -0.4811, 0.2483, 1.4236]) tensor([0.3468, 0.0667, 0.1383, 0.4481]) -Greedy action tensor([ 0.1436, 0.1159, 0.4927, -0.7723]) tensor([0.2638, 0.2566, 0.3740, 0.1056]) -Greedy action tensor([-0.2400, -0.3295, 1.6997, -0.8127]) tensor([0.1060, 0.0969, 0.7373, 0.0598]) -Greedy action tensor([ 0.3665, 0.3064, -0.5376, -0.7240]) tensor([0.3728, 0.3510, 0.1509, 0.1253]) -Greedy action tensor([-1.1145, -0.7173, -0.9103, 0.2719]) tensor([0.1296, 0.1928, 0.1590, 0.5186]) -Greedy action tensor([ 0.2087, -0.4829, -0.7810, -0.4566]) tensor([0.4190, 0.2098, 0.1557, 0.2154]) -Greedy action tensor([ 0.0498, -0.1896, -0.0961, 0.3638]) tensor([0.2487, 0.1958, 0.2150, 0.3405]) -Greedy action tensor([ 1.2596, -0.7778, 1.2702, 0.3538]) tensor([0.3929, 0.0512, 0.3971, 0.1588]) -Greedy action tensor([ 0.3570, 0.0757, -0.2661, 0.2181]) tensor([0.3163, 0.2387, 0.1696, 0.2753]) -Greedy action tensor([ 0.3444, -1.0875, -0.2112, 0.6020]) tensor([0.3219, 0.0769, 0.1847, 0.4165]) -Greedy action tensor([-1.3607, -0.3842, 0.0113, 0.0015]) tensor([0.0869, 0.2308, 0.3428, 0.3395]) -Greedy action tensor([-0.9751, -0.6036, -0.3942, -0.5586]) tensor([0.1738, 0.2520, 0.3107, 0.2636]) -Greedy action tensor([ 1.4035, -1.0057, 0.6809, 0.2873]) tensor([0.5255, 0.0472, 0.2551, 0.1721]) -Greedy action tensor([-0.5358, -0.5568, -0.6583, -0.7405]) tensor([0.2718, 0.2662, 0.2405, 0.2215]) -Greedy action tensor([ 0.1458, 0.0456, -0.1056, -1.5063]) tensor([0.3479, 0.3148, 0.2706, 0.0667]) -Greedy action tensor([-0.7467, 0.0201, 0.9695, 0.0331]) tensor([0.0918, 0.1976, 0.5105, 0.2002]) -Greedy action tensor([ 0.5664, -0.0468, 0.0412, -0.3858]) tensor([0.3970, 0.2150, 0.2348, 0.1532]) -Greedy action tensor([-0.2634, -1.0834, -0.5237, 0.7939]) tensor([0.1965, 0.0865, 0.1514, 0.5656]) -Greedy action tensor([-0.2530, -0.9975, 0.6703, -0.4558]) tensor([0.2079, 0.0988, 0.5235, 0.1698]) -Greedy action tensor([ 1.3213, -1.0303, 0.4048, 0.6624]) tensor([0.4969, 0.0473, 0.1987, 0.2571]) -Greedy action tensor([-0.2760, -0.3682, -0.2823, 0.0604]) tensor([0.2323, 0.2118, 0.2308, 0.3251]) -Greedy action tensor([-0.4737, -0.9949, -0.1953, -1.5597]) tensor([0.3075, 0.1826, 0.4062, 0.1038]) -Greedy action tensor([ 0.1035, -1.2176, 0.9606, 0.7029]) tensor([0.1837, 0.0490, 0.4328, 0.3345]) -Greedy action tensor([ 1.1377, 0.0496, 0.7873, -0.2167]) tensor([0.4349, 0.1465, 0.3063, 0.1123]) -Greedy action tensor([ 0.1667, -0.2003, 0.0343, -0.2199]) tensor([0.3079, 0.2133, 0.2697, 0.2092]) -Greedy action tensor([ 0.5123, -0.4124, 0.9882, 0.5121]) tensor([0.2496, 0.0990, 0.4018, 0.2496]) -Greedy action tensor([-0.5205, -0.5921, 0.3247, -0.3591]) tensor([0.1840, 0.1713, 0.4285, 0.2162]) -Greedy action tensor([0.2901, 0.8743, 0.3943, 0.0070]) tensor([0.2147, 0.3851, 0.2383, 0.1618]) -Greedy action tensor([ 0.7602, -0.5484, -0.0751, -0.2490]) tensor([0.4835, 0.1306, 0.2097, 0.1762]) -Greedy action tensor([ 0.1594, 0.2331, -0.2306, -0.1549]) tensor([0.2870, 0.3090, 0.1943, 0.2096]) -Greedy action tensor([ 0.2211, -0.2611, 0.9935, 0.0988]) tensor([0.2143, 0.1323, 0.4639, 0.1896]) -Greedy action tensor([ 0.5704, -1.0270, 0.3061, -0.2208]) tensor([0.4126, 0.0835, 0.3168, 0.1870]) -Greedy action tensor([ 0.8099, -0.8654, -0.1035, 0.9792]) tensor([0.3606, 0.0675, 0.1447, 0.4272]) -Greedy action tensor([ 0.9372, -0.0258, 0.3486, 0.2708]) tensor([0.4081, 0.1558, 0.2265, 0.2096]) -Greedy action tensor([ 0.4951, -1.3717, 0.0710, 0.2498]) tensor([0.3859, 0.0597, 0.2525, 0.3019]) -Greedy action tensor([ 0.7850, -0.0245, 0.2848, -0.3987]) tensor([0.4242, 0.1888, 0.2572, 0.1298]) -Greedy action tensor([ 0.1127, -0.4727, 0.3356, 0.4426]) tensor([0.2382, 0.1327, 0.2977, 0.3314]) -Greedy action tensor([ 0.4637, -0.1817, 0.5382, 0.3393]) tensor([0.2870, 0.1505, 0.3091, 0.2534]) -Greedy action tensor([-0.7630, -0.0331, 0.1390, 0.0835]) tensor([0.1271, 0.2636, 0.3131, 0.2962]) -Greedy action tensor([ 0.5683, -0.6472, 1.1229, -0.0044]) tensor([0.2776, 0.0823, 0.4834, 0.1566]) -Greedy action tensor([ 0.0468, -0.1525, -0.4105, -0.1583]) tensor([0.3061, 0.2508, 0.1938, 0.2493]) -Greedy action tensor([-0.8361, -0.0572, 0.4973, 0.1380]) tensor([0.1039, 0.2265, 0.3943, 0.2753]) -Greedy action tensor([-0.7581, -0.7494, -0.4312, 0.2725]) tensor([0.1613, 0.1627, 0.2237, 0.4522]) -Greedy action tensor([-0.5255, -0.7626, -0.6332, -1.4896]) tensor([0.3259, 0.2571, 0.2927, 0.1243]) -Greedy action tensor([-0.0306, -1.8644, 1.8915, -0.4658]) tensor([0.1157, 0.0185, 0.7909, 0.0749]) -Greedy action tensor([ 0.4905, -0.3470, -0.3517, -0.2062]) tensor([0.4234, 0.1833, 0.1824, 0.2110]) -Greedy action tensor([-0.7001, -0.5571, 0.8006, -0.5343]) tensor([0.1279, 0.1476, 0.5736, 0.1510]) -Greedy action tensor([ 0.4617, 0.1669, -0.5210, 0.4248]) tensor([0.3244, 0.2416, 0.1214, 0.3126]) -Greedy action tensor([ 0.9686, -1.1280, 1.1224, 0.2471]) tensor([0.3604, 0.0443, 0.4202, 0.1751]) -Greedy action tensor([ 0.0643, -0.5178, 0.4523, 0.5114]) tensor([0.2176, 0.1216, 0.3207, 0.3402]) -Greedy action tensor([ 1.3158, -0.4936, 0.9804, 0.7480]) tensor([0.4089, 0.0670, 0.2924, 0.2318]) -Greedy action tensor([-0.5030, -0.8318, 0.1940, 0.0325]) tensor([0.1840, 0.1324, 0.3694, 0.3143]) -Greedy action tensor([ 0.8776, -1.3574, 0.5176, 0.6038]) tensor([0.3898, 0.0417, 0.2720, 0.2965]) -Greedy action tensor([ 0.1425, -1.7265, -0.0617, 0.4339]) tensor([0.3023, 0.0466, 0.2465, 0.4046]) -Greedy action tensor([ 0.9082, -0.7253, 0.3671, -0.7286]) tensor([0.5071, 0.0990, 0.2952, 0.0987]) -Greedy action tensor([ 0.0648, -1.3058, 0.7869, 1.4587]) tensor([0.1362, 0.0346, 0.2804, 0.5489]) -Greedy action tensor([ 1.4307, -1.2041, 0.1593, 1.0040]) tensor([0.4988, 0.0358, 0.1399, 0.3255]) -Greedy action tensor([ 1.5875, -0.0490, -0.2828, 1.8448]) tensor([0.3785, 0.0737, 0.0583, 0.4895]) -Greedy action tensor([ 0.6033, -1.1796, 0.2032, 1.1237]) tensor([0.2840, 0.0478, 0.1904, 0.4779]) -Greedy action tensor([ 0.6711, -1.1502, 0.2504, 0.2572]) tensor([0.4033, 0.0653, 0.2648, 0.2666]) -Greedy action tensor([-1.0772, -0.0988, 0.3934, -0.5796]) tensor([0.1036, 0.2755, 0.4507, 0.1703]) -Greedy action tensor([-0.1209, -0.3565, 0.2588, -0.2939]) tensor([0.2443, 0.1930, 0.3571, 0.2055]) -Greedy action tensor([ 0.4991, -0.9957, -0.7253, -0.5308]) tensor([0.5332, 0.1196, 0.1567, 0.1904]) -Greedy action tensor([ 0.2793, 0.6549, 0.8070, -0.2596]) tensor([0.2112, 0.3075, 0.3580, 0.1232]) -Greedy action tensor([ 1.1097, -1.8705, 0.8589, 0.0813]) tensor([0.4574, 0.0232, 0.3559, 0.1635]) -Greedy action tensor([-0.1638, 0.1291, 0.8544, -1.0781]) tensor([0.1815, 0.2433, 0.5025, 0.0727]) -Greedy action tensor([-0.3004, 0.2730, -0.7980, 0.1367]) tensor([0.2028, 0.3599, 0.1233, 0.3140]) -Greedy action tensor([-0.1027, -0.7704, -0.1515, 0.1544]) tensor([0.2661, 0.1365, 0.2534, 0.3441]) -Greedy action tensor([-0.1675, -0.6100, 0.6430, 0.2280]) tensor([0.1860, 0.1195, 0.4183, 0.2762]) -Greedy action tensor([-1.9321, -0.4435, 0.6612, -0.1736]) tensor([0.0406, 0.1800, 0.5435, 0.2358]) -Greedy action tensor([-1.9224, -0.4297, 0.6572, -0.1672]) tensor([0.0409, 0.1822, 0.5401, 0.2368]) -Greedy action tensor([-1.9369, -0.4385, 0.6628, -0.1744]) tensor([0.0404, 0.1807, 0.5436, 0.2353]) -Greedy action tensor([-1.8784, -0.3655, 0.6221, -0.1466]) tensor([0.0428, 0.1942, 0.5213, 0.2417]) -Greedy action tensor([-1.7335, -0.5257, 0.5562, -0.0529]) tensor([0.0511, 0.1708, 0.5040, 0.2741]) -Greedy action tensor([-1.6027, -0.5546, 0.4848, 0.0289]) tensor([0.0587, 0.1675, 0.4736, 0.3002]) -Greedy action tensor([-1.8510, -0.4388, 0.6173, -0.1329]) tensor([0.0445, 0.1826, 0.5250, 0.2479]) -Greedy action tensor([-1.9154, -0.4522, 0.6581, -0.1634]) tensor([0.0413, 0.1785, 0.5419, 0.2383]) -Greedy action tensor([-1.3998, -0.5342, 0.3838, 0.1141]) tensor([0.0721, 0.1713, 0.4290, 0.3276]) -Greedy action tensor([-0.7816, 0.3598, -0.2352, -0.2531]) tensor([0.1324, 0.4145, 0.2286, 0.2246]) -Greedy action tensor([-1.8981, -0.4403, 0.6462, -0.1594]) tensor([0.0422, 0.1811, 0.5368, 0.2399]) -Greedy action tensor([-1.8654, -0.4737, 0.6267, -0.1528]) tensor([0.0441, 0.1775, 0.5336, 0.2447]) -Greedy action tensor([-1.8915, -0.4490, 0.6339, -0.1508]) tensor([0.0427, 0.1806, 0.5333, 0.2434]) -Greedy action tensor([-1.1278, -0.3100, 0.2298, 0.3299]) tensor([0.0874, 0.1979, 0.3395, 0.3752]) -Greedy action tensor([-0.9767, -0.2576, 0.3218, 0.4555]) tensor([0.0917, 0.1882, 0.3360, 0.3841]) -Greedy action tensor([-1.9170, -0.4567, 0.6514, -0.1674]) tensor([0.0415, 0.1787, 0.5412, 0.2387]) -Greedy action tensor([-0.5818, 1.0314, 0.0134, 0.4904]) tensor([0.0930, 0.4667, 0.1686, 0.2717]) -Greedy action tensor([-1.6127, 0.2255, 0.4238, -0.0688]) tensor([0.0509, 0.3202, 0.3904, 0.2385]) -Greedy action tensor([-1.8473, -0.4825, 0.6219, -0.1387]) tensor([0.0449, 0.1760, 0.5309, 0.2482]) -Greedy action tensor([-1.8966, -0.4156, 0.6399, -0.1576]) tensor([0.0422, 0.1854, 0.5326, 0.2399]) -Greedy action tensor([-1.8984, -0.4495, 0.6372, -0.1584]) tensor([0.0424, 0.1806, 0.5354, 0.2416]) -Greedy action tensor([-1.2652, -0.6119, 0.2549, 0.2471]) tensor([0.0831, 0.1597, 0.3801, 0.3771]) -Greedy action tensor([-1.9269, -0.4292, 0.6568, -0.1714]) tensor([0.0408, 0.1825, 0.5406, 0.2361]) -Greedy action tensor([-1.3907, -0.4653, 0.7507, 0.6682]) tensor([0.0503, 0.1270, 0.4283, 0.3944]) -Greedy action tensor([-1.8016, -0.3831, 0.5842, -0.0954]) tensor([0.0465, 0.1921, 0.5053, 0.2561]) -Greedy action tensor([-1.9433, -0.4527, 0.6711, -0.1800]) tensor([0.0401, 0.1781, 0.5479, 0.2339]) -Greedy action tensor([-0.8048, -0.0946, 0.0194, -0.2836]) tensor([0.1429, 0.2907, 0.3258, 0.2406]) -Greedy action tensor([-1.8208, -0.4270, 0.6162, -0.1079]) tensor([0.0454, 0.1831, 0.5196, 0.2519]) -Greedy action tensor([-1.7359, -0.2467, 0.5326, -0.0899]) tensor([0.0493, 0.2186, 0.4765, 0.2557]) -Greedy action tensor([-1.8414, -0.4783, 0.6144, -0.1455]) tensor([0.0454, 0.1775, 0.5294, 0.2476]) -Greedy action tensor([-1.9054, -0.4417, 0.6562, -0.1549]) tensor([0.0416, 0.1798, 0.5391, 0.2395]) -Greedy action tensor([-1.8485, -0.4541, 0.6318, -0.1077]) tensor([0.0441, 0.1778, 0.5267, 0.2514]) -Greedy action tensor([-0.5286, 0.9304, 0.0097, 0.1895]) tensor([0.1103, 0.4745, 0.1890, 0.2262]) -Greedy action tensor([-1.7548, -0.1959, 0.5358, -0.0256]) tensor([0.0470, 0.2235, 0.4645, 0.2650]) -Greedy action tensor([0.5170, 0.6607, 0.4847, 1.2056]) tensor([0.1956, 0.2258, 0.1893, 0.3893]) -Greedy action tensor([-1.9317, -0.4359, 0.6616, -0.1732]) tensor([0.0406, 0.1811, 0.5428, 0.2355]) -Greedy action tensor([-1.8290, -0.2257, 0.5841, -0.1191]) tensor([0.0441, 0.2192, 0.4927, 0.2439]) -Greedy action tensor([-1.8967, -0.4346, 0.6454, -0.1554]) tensor([0.0421, 0.1819, 0.5355, 0.2404]) -Greedy action tensor([-1.6904e+00, -5.0834e-01, 5.6008e-01, 1.1581e-03]) tensor([0.0521, 0.1700, 0.4949, 0.2830]) -Greedy action tensor([-1.5533, -0.1721, 0.5848, 0.0815]) tensor([0.0538, 0.2141, 0.4563, 0.2759]) -Greedy action tensor([-1.3950, -0.4351, 0.3523, 0.1164]) tensor([0.0720, 0.1881, 0.4134, 0.3265]) -Greedy action tensor([-1.9134, -0.4378, 0.6513, -0.1657]) tensor([0.0415, 0.1814, 0.5390, 0.2381]) -Greedy action tensor([-1.4784, 0.1834, 0.3629, -0.0402]) tensor([0.0596, 0.3139, 0.3756, 0.2510]) -Greedy action tensor([-1.1473, -0.3720, 0.4656, 0.4708]) tensor([0.0756, 0.1641, 0.3792, 0.3812]) -Greedy action tensor([-1.8294, -0.4391, 0.6544, -0.0986]) tensor([0.0442, 0.1773, 0.5293, 0.2492]) -Greedy action tensor([-1.8256, -0.4481, 0.6070, -0.1186]) tensor([0.0457, 0.1813, 0.5208, 0.2521]) -Greedy action tensor([-0.9796, -0.0165, 0.0919, -0.0683]) tensor([0.1108, 0.2902, 0.3234, 0.2756]) -Greedy action tensor([-1.8745, -0.3817, 0.6260, -0.1468]) tensor([0.0430, 0.1912, 0.5239, 0.2419]) -Greedy action tensor([-1.1584, 0.7573, 0.1721, 0.1225]) tensor([0.0659, 0.4476, 0.2493, 0.2372]) -Greedy action tensor([-1.8354, -0.4110, 0.6271, -0.0658]) tensor([0.0439, 0.1826, 0.5156, 0.2579]) -Greedy action tensor([-1.4414, -0.1053, 0.6466, 0.3070]) tensor([0.0537, 0.2043, 0.4334, 0.3086]) -Greedy action tensor([-1.8855, -0.4548, 0.6431, -0.1515]) tensor([0.0428, 0.1788, 0.5362, 0.2422]) -Greedy action tensor([-1.5887, -0.1272, 0.3910, -0.0056]) tensor([0.0574, 0.2475, 0.4156, 0.2795]) -Greedy action tensor([-1.4817, -0.3562, 0.3632, 0.1073]) tensor([0.0653, 0.2013, 0.4134, 0.3200]) -Greedy action tensor([-1.8040, -0.1874, 0.5865, -0.0951]) tensor([0.0445, 0.2240, 0.4858, 0.2457]) -Greedy action tensor([-1.7814, -0.5228, 0.6453, -0.1145]) tensor([0.0473, 0.1665, 0.5356, 0.2505]) -Greedy action tensor([-1.8523, -0.4384, 0.6162, -0.1323]) tensor([0.0444, 0.1827, 0.5246, 0.2482]) -Greedy action tensor([-1.7711, -0.4102, 0.5816, -0.0823]) tensor([0.0480, 0.1872, 0.5048, 0.2599]) -Greedy action tensor([-1.1077, 0.0045, 0.0840, -0.3088]) tensor([0.1046, 0.3182, 0.3445, 0.2326]) -Greedy action tensor([-0.6325, 0.7075, 0.0218, 0.0870]) tensor([0.1137, 0.4342, 0.2187, 0.2334]) -Greedy action tensor([-1.7213, -0.2747, 0.5232, -0.0825]) tensor([0.0504, 0.2142, 0.4758, 0.2596]) -Greedy action tensor([-0.8992, 0.8810, 0.1231, 0.1239]) tensor([0.0801, 0.4748, 0.2225, 0.2227]) -Greedy action tensor([-1.9216, -0.4404, 0.6598, -0.1689]) tensor([0.0410, 0.1804, 0.5420, 0.2366]) -Greedy action tensor([-1.8999, -0.4077, 0.6458, -0.1521]) tensor([0.0418, 0.1857, 0.5327, 0.2398]) -Greedy action tensor([-1.9381, -0.4439, 0.6651, -0.1768]) tensor([0.0404, 0.1798, 0.5450, 0.2348]) -Greedy action tensor([-1.5685, -0.5306, 0.4927, -0.0488]) tensor([0.0615, 0.1737, 0.4834, 0.2813]) -Greedy action tensor([-1.9059, -0.4791, 0.6672, -0.1577]) tensor([0.0416, 0.1734, 0.5457, 0.2392]) -Greedy action tensor([-1.9164, -0.4271, 0.6550, -0.1577]) tensor([0.0411, 0.1823, 0.5379, 0.2387]) -Greedy action tensor([-1.4210, 0.4139, 0.2286, 0.0148]) tensor([0.0600, 0.3757, 0.3122, 0.2521]) -Greedy action tensor([-1.7803, -0.4325, 0.5888, -0.1143]) tensor([0.0480, 0.1848, 0.5131, 0.2540]) -Greedy action tensor([-1.8240, -0.4428, 0.6168, -0.1123]) tensor([0.0455, 0.1809, 0.5219, 0.2517]) -Greedy action tensor([-1.8527, -0.3696, 0.6142, -0.1425]) tensor([0.0440, 0.1939, 0.5187, 0.2434]) -Greedy action tensor([-1.8753, -0.4266, 0.6312, -0.1254]) tensor([0.0430, 0.1829, 0.5269, 0.2472]) -Greedy action tensor([-1.8445, -0.3881, 0.6279, -0.1200]) tensor([0.0440, 0.1886, 0.5209, 0.2466]) -Greedy action tensor([-1.8929, -0.4571, 0.6376, -0.1513]) tensor([0.0426, 0.1791, 0.5352, 0.2431]) -Greedy action tensor([-1.8936, -0.3550, 0.6261, -0.1508]) tensor([0.0420, 0.1958, 0.5221, 0.2401]) -Greedy action tensor([-1.8048, -0.0792, 0.5640, -0.0903]) tensor([0.0438, 0.2457, 0.4675, 0.2430]) -Greedy action tensor([-1.9403, -0.4398, 0.6631, -0.1780]) tensor([0.0403, 0.1807, 0.5443, 0.2347]) -Greedy action tensor([-1.8268, -0.4661, 0.6138, -0.1158]) tensor([0.0456, 0.1779, 0.5239, 0.2526]) -Greedy action tensor([-1.8761, -0.3696, 0.5799, -0.1571]) tensor([0.0440, 0.1983, 0.5125, 0.2452]) -Greedy action tensor([-0.4207, 0.6497, 0.2012, 0.5661]) tensor([0.1182, 0.3447, 0.2201, 0.3170]) -Greedy action tensor([ 0.4784, -0.3086, -0.0664, -0.1157]) tensor([0.3865, 0.1759, 0.2242, 0.2134]) -Greedy action tensor([ 0.8046, -0.6170, -0.1723, -0.4495]) tensor([0.5254, 0.1268, 0.1978, 0.1499]) -Greedy action tensor([0.1372, 0.0813, 0.1200, 0.2648]) tensor([0.2460, 0.2327, 0.2418, 0.2795]) -Greedy action tensor([ 0.8584, -0.4712, 0.2477, -0.6491]) tensor([0.4929, 0.1304, 0.2676, 0.1091]) -Greedy action tensor([ 0.8992, -0.1671, -0.1350, -0.2751]) tensor([0.4978, 0.1714, 0.1770, 0.1538]) -Greedy action tensor([ 1.0485, -0.6240, -0.2339, -0.7272]) tensor([0.6118, 0.1149, 0.1697, 0.1036]) -Greedy action tensor([ 0.6121, -0.0194, -0.0700, 0.0112]) tensor([0.3868, 0.2057, 0.1955, 0.2121]) -Greedy action tensor([ 0.4620, -0.1231, -0.0621, -0.4647]) tensor([0.3929, 0.2189, 0.2327, 0.1555]) -Greedy action tensor([ 0.8805, -0.4805, -0.0439, -0.1604]) tensor([0.4984, 0.1278, 0.1978, 0.1760]) -Greedy action tensor([ 0.7130, -0.4985, -0.1405, -0.0891]) tensor([0.4604, 0.1371, 0.1961, 0.2064]) -Greedy action tensor([ 0.8547, -0.4356, -0.1786, -0.4050]) tensor([0.5223, 0.1437, 0.1858, 0.1482]) -Greedy action tensor([ 0.6456, -0.5247, 0.1818, -0.7246]) tensor([0.4560, 0.1415, 0.2867, 0.1158]) -Greedy action tensor([ 0.7638, -0.5582, 0.0261, -0.4213]) tensor([0.4877, 0.1300, 0.2332, 0.1491]) -Greedy action tensor([ 0.9144, -0.9076, 0.0324, -0.3786]) tensor([0.5405, 0.0874, 0.2237, 0.1483]) -Greedy action tensor([ 0.5481, -0.4208, -0.1330, -0.3526]) tensor([0.4363, 0.1656, 0.2208, 0.1773]) -Greedy action tensor([ 0.8515, -0.6161, -0.0324, -0.2656]) tensor([0.5074, 0.1169, 0.2096, 0.1660]) -Greedy action tensor([ 0.6157, 0.3083, -0.1576, 0.2581]) tensor([0.3453, 0.2539, 0.1593, 0.2415]) -Greedy action tensor([ 0.5512, -0.2892, -0.0372, -0.2645]) tensor([0.4117, 0.1777, 0.2286, 0.1821]) -Greedy action tensor([ 0.4206, -0.0887, -0.1073, -0.2774]) tensor([0.3720, 0.2235, 0.2194, 0.1851]) -Greedy action tensor([ 0.8094, -0.0504, -0.0080, -0.0376]) tensor([0.4360, 0.1845, 0.1925, 0.1869]) -Greedy action tensor([ 0.4738, -0.1623, -0.0116, -0.2378]) tensor([0.3794, 0.2008, 0.2335, 0.1862]) -Greedy action tensor([ 1.0998, -0.4975, -0.1482, -0.6339]) tensor([0.6002, 0.1215, 0.1723, 0.1060]) -Greedy action tensor([ 0.9698, -0.5187, 0.1918, -0.1849]) tensor([0.5000, 0.1128, 0.2296, 0.1576]) -Greedy action tensor([ 0.7264, -0.6033, 0.1300, -0.4197]) tensor([0.4688, 0.1240, 0.2582, 0.1490]) -Greedy action tensor([ 0.7612, -0.1948, -0.1176, -0.3461]) tensor([0.4694, 0.1805, 0.1950, 0.1551]) -Greedy action tensor([ 1.1407, -0.8668, 0.0807, -0.8612]) tensor([0.6189, 0.0831, 0.2144, 0.0836]) -Greedy action tensor([ 0.8288, -0.7077, 0.2266, -0.3687]) tensor([0.4843, 0.1042, 0.2652, 0.1462]) -Greedy action tensor([ 0.4723, 0.0379, -0.0053, -0.1069]) tensor([0.3536, 0.2290, 0.2193, 0.1981]) -Greedy action tensor([ 0.7810, -0.4272, -0.1040, -0.3296]) tensor([0.4900, 0.1464, 0.2022, 0.1614]) -Greedy action tensor([ 0.4644, -0.6345, -0.0790, -0.2423]) tensor([0.4154, 0.1384, 0.2413, 0.2049]) -Greedy action tensor([ 1.1682, -0.8168, 0.0318, -0.5318]) tensor([0.6094, 0.0837, 0.1956, 0.1113]) -Greedy action tensor([ 0.6623, -0.0587, -0.1348, -0.3130]) tensor([0.4322, 0.2101, 0.1947, 0.1629]) -Greedy action tensor([ 1.0552, -0.6065, 0.0527, -0.4467]) tensor([0.5620, 0.1067, 0.2062, 0.1251]) -Greedy action tensor([ 0.5710, 0.0420, -0.1027, -0.0234]) tensor([0.3772, 0.2223, 0.1923, 0.2082]) -Greedy action tensor([ 0.6802, 0.0127, -0.0538, -0.1756]) tensor([0.4136, 0.2122, 0.1985, 0.1757]) -Greedy action tensor([ 0.8325, -0.7947, 0.0620, -0.3007]) tensor([0.5047, 0.0992, 0.2336, 0.1625]) -Greedy action tensor([ 0.9577, -0.7052, -0.0043, -0.3713]) tensor([0.5445, 0.1032, 0.2081, 0.1442]) -Greedy action tensor([ 0.7883, -0.5160, -0.0063, -0.2025]) tensor([0.4775, 0.1296, 0.2157, 0.1773]) -Greedy action tensor([ 0.4911, 0.1826, -0.0663, -0.0679]) tensor([0.3473, 0.2551, 0.1989, 0.1986]) -Greedy action tensor([0.4809, 0.1868, 0.3413, 0.4171]) tensor([0.2814, 0.2097, 0.2448, 0.2640]) -Greedy action tensor([ 1.1884, -0.7859, 0.0242, -0.5446]) tensor([0.6143, 0.0853, 0.1918, 0.1086]) -Greedy action tensor([ 0.5271, -0.1482, 0.0269, -0.0317]) tensor([0.3721, 0.1894, 0.2257, 0.2128]) -Greedy action tensor([ 0.5249, -0.1753, 0.0426, -0.7650]) tensor([0.4186, 0.2078, 0.2584, 0.1152]) -Greedy action tensor([ 0.6012, -0.0466, 0.0754, -0.2169]) tensor([0.3913, 0.2047, 0.2313, 0.1727]) -Greedy action tensor([ 1.0185, -0.7957, -0.0746, -0.4644]) tensor([0.5797, 0.0945, 0.1943, 0.1316]) -Greedy action tensor([ 1.0128, -0.4820, -0.1171, -0.3774]) tensor([0.5567, 0.1249, 0.1798, 0.1386]) -Greedy action tensor([ 0.2830, -0.1391, -0.0726, -0.1530]) tensor([0.3330, 0.2183, 0.2333, 0.2153]) -Greedy action tensor([ 0.7975, -0.1893, -0.0668, 0.0595]) tensor([0.4401, 0.1641, 0.1854, 0.2104]) -Greedy action tensor([ 1.0162, -0.8462, 0.0469, -0.4182]) tensor([0.5641, 0.0876, 0.2140, 0.1344]) -Greedy action tensor([ 0.8831, -0.5342, 0.0501, -0.3067]) tensor([0.5047, 0.1223, 0.2194, 0.1536]) -Greedy action tensor([ 0.5303, 0.2106, -0.2211, -0.0703]) tensor([0.3641, 0.2645, 0.1717, 0.1997]) -Greedy action tensor([ 0.7518, -0.7791, -0.0100, -0.3092]) tensor([0.4928, 0.1066, 0.2300, 0.1706]) -Greedy action tensor([ 0.6172, -0.3935, -0.0565, -0.0860]) tensor([0.4222, 0.1537, 0.2152, 0.2090]) -Greedy action tensor([ 0.6175, 0.0144, -0.0052, 0.0670]) tensor([0.3759, 0.2057, 0.2017, 0.2168]) -Greedy action tensor([ 0.9892, -0.6628, -0.1994, -0.8433]) tensor([0.6037, 0.1157, 0.1839, 0.0966]) -Greedy action tensor([ 0.7196, -0.5166, -0.1259, -0.3117]) tensor([0.4816, 0.1399, 0.2068, 0.1717]) -Greedy action tensor([ 0.7743, -0.4371, -0.0088, -0.2757]) tensor([0.4751, 0.1415, 0.2171, 0.1663]) -Greedy action tensor([ 0.9080, -0.6145, -0.1269, -0.2555]) tensor([0.5303, 0.1157, 0.1884, 0.1657]) -Greedy action tensor([ 0.9925, -0.8604, 0.1093, -0.4761]) tensor([0.5554, 0.0871, 0.2296, 0.1279]) -Greedy action tensor([ 0.7601, -0.5182, -0.0500, -0.1658]) tensor([0.4718, 0.1314, 0.2099, 0.1869]) -Greedy action tensor([ 0.6459, -0.1036, 0.1078, -0.0324]) tensor([0.3900, 0.1843, 0.2277, 0.1979]) -Greedy action tensor([ 0.7121, -0.6373, -0.1676, -0.1485]) tensor([0.4768, 0.1237, 0.1978, 0.2017]) -Greedy action tensor([ 0.8190, -0.4096, 0.0914, -0.2857]) tensor([0.4746, 0.1389, 0.2293, 0.1572]) -Greedy action tensor([ 0.6988, -0.6151, -0.0588, -0.2280]) tensor([0.4687, 0.1260, 0.2197, 0.1855]) -Greedy action tensor([ 1.4107, -1.2657, 0.1264, -0.8011]) tensor([0.6872, 0.0473, 0.1903, 0.0753]) -Greedy action tensor([ 0.5842, -0.4604, -0.0431, -0.1863]) tensor([0.4258, 0.1498, 0.2274, 0.1970]) -Greedy action tensor([ 0.4304, -0.4739, -0.0291, -0.1914]) tensor([0.3886, 0.1573, 0.2454, 0.2087]) -Greedy action tensor([ 0.7727, 0.1230, -0.0413, -0.0401]) tensor([0.4151, 0.2168, 0.1839, 0.1842]) -Greedy action tensor([ 0.5189, -0.4552, -0.1169, -0.1940]) tensor([0.4171, 0.1575, 0.2209, 0.2045]) -Greedy action tensor([ 0.4097, -0.1041, 0.1219, -0.1843]) tensor([0.3448, 0.2063, 0.2586, 0.1904]) -Greedy action tensor([ 8.1013e-01, -6.9505e-01, -6.7052e-04, -4.0385e-01]) tensor([0.5093, 0.1131, 0.2264, 0.1513]) -Greedy action tensor([ 8.4777e-01, -4.9281e-01, 1.2270e-04, -2.0886e-01]) tensor([0.4907, 0.1284, 0.2102, 0.1706]) -Greedy action tensor([ 0.6632, -0.2737, -0.0937, -0.3060]) tensor([0.4464, 0.1749, 0.2094, 0.1693]) -Greedy action tensor([ 0.5108, -0.5220, -0.0949, -0.2488]) tensor([0.4220, 0.1502, 0.2303, 0.1974]) -Greedy action tensor([ 0.5409, -0.1940, -0.0938, -0.0586]) tensor([0.3908, 0.1874, 0.2072, 0.2146]) -Greedy action tensor([ 0.8200, -0.1435, -0.0261, -0.1209]) tensor([0.4543, 0.1734, 0.1950, 0.1773]) -Greedy action tensor([ 0.7185, -0.3460, -0.1049, -0.1153]) tensor([0.4508, 0.1555, 0.1979, 0.1958]) -Greedy action tensor([ 0.3906, -0.0300, -0.1145, -0.3535]) tensor([0.3656, 0.2401, 0.2206, 0.1737]) -Greedy action tensor([ 0.8269, -0.5540, -0.1727, -0.3067]) tensor([0.5151, 0.1295, 0.1896, 0.1658]) -Greedy action tensor([ 1.0183, -0.4039, 0.0499, -0.5034]) tensor([0.5437, 0.1311, 0.2064, 0.1187]) -Greedy action tensor([ 0.9612, -0.6732, -0.0776, -0.3954]) tensor([0.5535, 0.1080, 0.1959, 0.1426]) -Greedy action tensor([ 1.5403, -0.5368, -0.2948, 0.5810]) tensor([0.5995, 0.0751, 0.0957, 0.2297]) -Greedy action tensor([ 2.0433, -1.0274, -0.3356, 0.2709]) tensor([0.7640, 0.0354, 0.0708, 0.1298]) -Greedy action tensor([ 1.8159, -0.0168, -1.7088, 0.3376]) tensor([0.7055, 0.1129, 0.0208, 0.1609]) -Greedy action tensor([ 1.2674, -0.3173, -0.3480, 0.5317]) tensor([0.5311, 0.1089, 0.1056, 0.2545]) -Greedy action tensor([ 0.9234, -0.6097, 0.0126, -0.2261]) tensor([0.5168, 0.1116, 0.2079, 0.1637]) -Greedy action tensor([ 0.9561, 0.0631, -0.7916, 0.3588]) tensor([0.4686, 0.1919, 0.0816, 0.2579]) -Greedy action tensor([ 1.6175, -0.5824, -0.7057, 0.2343]) tensor([0.6852, 0.0759, 0.0671, 0.1718]) -Greedy action tensor([ 1.7815, -0.1612, -0.5939, 0.0197]) tensor([0.7102, 0.1018, 0.0660, 0.1220]) -Greedy action tensor([ 1.6765, 0.0111, -0.7050, 0.9141]) tensor([0.5721, 0.1082, 0.0529, 0.2669]) -Greedy action tensor([ 1.5211, -0.4267, -0.2238, 0.3545]) tensor([0.6140, 0.0876, 0.1072, 0.1912]) -Greedy action tensor([ 1.8310, -0.6257, -0.5769, 0.2108]) tensor([0.7280, 0.0624, 0.0655, 0.1440]) -Greedy action tensor([ 1.2808, -0.5336, -0.0505, 0.0213]) tensor([0.5845, 0.0952, 0.1544, 0.1659]) -Greedy action tensor([ 2.0943, -1.0581, 0.0355, 0.8794]) tensor([0.6816, 0.0291, 0.0870, 0.2023]) -Greedy action tensor([ 1.4774, -0.7386, -0.1705, 0.4130]) tensor([0.6074, 0.0662, 0.1169, 0.2095]) -Greedy action tensor([ 1.0196, 0.0211, -0.4772, 0.0797]) tensor([0.5043, 0.1858, 0.1129, 0.1970]) -Greedy action tensor([ 1.4425, -0.4618, -0.3229, 0.2248]) tensor([0.6188, 0.0922, 0.1059, 0.1831]) -Greedy action tensor([ 1.4211, 0.0253, -0.4457, 0.7389]) tensor([0.5242, 0.1298, 0.0810, 0.2650]) -Greedy action tensor([ 1.5958, -0.2373, -0.2715, 0.3888]) tensor([0.6198, 0.0991, 0.0958, 0.1854]) -Greedy action tensor([ 1.1375, 0.1101, -0.2615, 0.5784]) tensor([0.4594, 0.1645, 0.1134, 0.2627]) -Greedy action tensor([ 1.7118, -0.8048, -0.6456, 0.2037]) tensor([0.7159, 0.0578, 0.0678, 0.1585]) -Greedy action tensor([ 1.4324, -0.4069, -0.4025, 0.5765]) tensor([0.5736, 0.0912, 0.0916, 0.2437]) -Greedy action tensor([ 1.7720, -0.5002, -0.5906, 0.7181]) tensor([0.6469, 0.0667, 0.0609, 0.2255]) -Greedy action tensor([ 1.1678, -0.4829, -0.1500, 0.4580]) tensor([0.5124, 0.0984, 0.1372, 0.2520]) -Greedy action tensor([ 1.4419, -0.7170, -0.1170, 0.2824]) tensor([0.6100, 0.0704, 0.1283, 0.1913]) -Greedy action tensor([ 1.2551, -0.3256, -0.1825, 0.5670]) tensor([0.5139, 0.1058, 0.1221, 0.2583]) -Greedy action tensor([ 1.9933, -1.3385, -0.0797, 0.4606]) tensor([0.7260, 0.0259, 0.0913, 0.1568]) -Greedy action tensor([ 1.9433, -0.8844, -0.7186, 0.5255]) tensor([0.7293, 0.0431, 0.0509, 0.1767]) -Greedy action tensor([ 1.0007, 0.0909, -0.7033, 0.3212]) tensor([0.4781, 0.1925, 0.0870, 0.2424]) -Greedy action tensor([ 1.3143, -0.9433, -0.1557, 0.4496]) tensor([0.5696, 0.0596, 0.1310, 0.2399]) -Greedy action tensor([ 2.5094, -1.0179, 0.1158, 1.0871]) tensor([0.7343, 0.0216, 0.0670, 0.1771]) -Greedy action tensor([ 1.1457, 0.1339, -0.4877, 0.5960]) tensor([0.4682, 0.1702, 0.0914, 0.2702]) -Greedy action tensor([ 1.9804, -0.0678, -1.0057, 0.7827]) tensor([0.6751, 0.0871, 0.0341, 0.2038]) -Greedy action tensor([ 1.4135, -0.5582, -0.3295, 0.4508]) tensor([0.5896, 0.0821, 0.1032, 0.2252]) -Greedy action tensor([ 1.2871, -0.0628, -0.8783, 0.4847]) tensor([0.5488, 0.1423, 0.0630, 0.2460]) -Greedy action tensor([ 0.9480, -0.8965, -0.1655, 0.0616]) tensor([0.5267, 0.0833, 0.1730, 0.2171]) -Greedy action tensor([ 1.1833, -0.2820, -0.6481, 0.4233]) tensor([0.5380, 0.1243, 0.0862, 0.2516]) -Greedy action tensor([ 1.6951e+00, -2.4370e-01, -4.7558e-01, -1.4471e-03]) tensor([0.6938, 0.0998, 0.0792, 0.1272]) -Greedy action tensor([ 1.2057, -0.5745, -0.1050, 0.3938]) tensor([0.5313, 0.0896, 0.1433, 0.2359]) -Greedy action tensor([ 1.8611, -0.8788, -0.2397, 0.5186]) tensor([0.6906, 0.0446, 0.0845, 0.1804]) -Greedy action tensor([ 1.6834, -0.7387, -0.2331, 0.3077]) tensor([0.6718, 0.0596, 0.0988, 0.1697]) -Greedy action tensor([ 1.3640, -0.6734, -0.4849, 0.4474]) tensor([0.5925, 0.0772, 0.0933, 0.2369]) -Greedy action tensor([ 1.7612, -0.7110, 0.0682, 0.4671]) tensor([0.6483, 0.0547, 0.1193, 0.1777]) -Greedy action tensor([ 1.6082, -0.3632, -0.3945, 0.7740]) tensor([0.5853, 0.0815, 0.0790, 0.2542]) -Greedy action tensor([ 1.3002, -0.1086, -0.4333, 0.7169]) tensor([0.5053, 0.1235, 0.0893, 0.2820]) -Greedy action tensor([ 1.1767, -0.1215, -0.3228, 0.0295]) tensor([0.5513, 0.1505, 0.1231, 0.1751]) -Greedy action tensor([ 2.4940, -1.5735, 0.1796, 0.8079]) tensor([0.7685, 0.0132, 0.0759, 0.1424]) -Greedy action tensor([ 1.8351, -0.9956, 0.0198, 0.2983]) tensor([0.6960, 0.0410, 0.1133, 0.1497]) -Greedy action tensor([ 1.5646, -0.5435, -0.4419, 0.4035]) tensor([0.6373, 0.0774, 0.0857, 0.1996]) -Greedy action tensor([ 1.8019, -0.2872, -0.1050, 0.0877]) tensor([0.6885, 0.0852, 0.1023, 0.1240]) -Greedy action tensor([ 2.5819, -1.6171, -0.4715, 0.3000]) tensor([0.8589, 0.0129, 0.0405, 0.0877]) -Greedy action tensor([ 1.4926, -0.6665, -0.2096, 0.0763]) tensor([0.6492, 0.0749, 0.1183, 0.1575]) -Greedy action tensor([ 1.9284, -1.2607, 0.1328, 0.9232]) tensor([0.6356, 0.0262, 0.1055, 0.2326]) -Greedy action tensor([ 1.4486, -0.3791, -0.5327, 0.4092]) tensor([0.6052, 0.0973, 0.0835, 0.2140]) -Greedy action tensor([ 2.3848, -1.0226, -0.5291, 0.5887]) tensor([0.7979, 0.0264, 0.0433, 0.1324]) -Greedy action tensor([ 1.3842, -0.8219, -0.3351, 0.6457]) tensor([0.5659, 0.0623, 0.1014, 0.2704]) -Greedy action tensor([ 1.8941, -0.8012, -0.6778, 0.5291]) tensor([0.7146, 0.0483, 0.0546, 0.1825]) -Greedy action tensor([ 1.4659, -0.8000, 0.0436, 0.8588]) tensor([0.5291, 0.0549, 0.1276, 0.2883]) -Greedy action tensor([ 1.1545, -0.6177, -0.2222, 0.2950]) tensor([0.5418, 0.0921, 0.1368, 0.2294]) -Greedy action tensor([ 1.2881, -0.3873, 0.0662, 0.4253]) tensor([0.5252, 0.0983, 0.1548, 0.2216]) -Greedy action tensor([ 1.2166, -0.1356, -0.4788, 0.5764]) tensor([0.5078, 0.1313, 0.0932, 0.2677]) -Greedy action tensor([ 1.2465, 0.0454, -0.7964, 0.2133]) tensor([0.5598, 0.1684, 0.0726, 0.1992]) -Greedy action tensor([ 1.1423, -0.9509, -0.1033, 1.1413]) tensor([0.4149, 0.0512, 0.1194, 0.4145]) -Greedy action tensor([ 0.9905, 0.0900, -0.9025, 0.0361]) tensor([0.5149, 0.2092, 0.0776, 0.1983]) -Greedy action tensor([ 1.2728, -0.5830, -0.6538, 0.3862]) tensor([0.5834, 0.0912, 0.0850, 0.2404]) -Greedy action tensor([ 1.8632, -0.2698, -0.5360, 0.5237]) tensor([0.6797, 0.0805, 0.0617, 0.1781]) -Greedy action tensor([ 1.4256, -0.3280, -0.3688, 0.3861]) tensor([0.5907, 0.1023, 0.0982, 0.2089]) -Greedy action tensor([ 1.1338, -0.1026, -0.9524, 0.2843]) tensor([0.5428, 0.1577, 0.0674, 0.2321]) -Greedy action tensor([ 1.0042, -0.3556, -0.1787, -0.5248]) tensor([0.5618, 0.1442, 0.1721, 0.1218]) -Greedy action tensor([ 1.0821, -0.0022, -0.6136, 0.1159]) tensor([0.5257, 0.1778, 0.0965, 0.2001]) -Greedy action tensor([ 1.6214, -0.5471, -0.6611, 0.7562]) tensor([0.6107, 0.0698, 0.0623, 0.2571]) -Greedy action tensor([ 1.3987, -0.0934, -0.1431, 0.3385]) tensor([0.5601, 0.1260, 0.1199, 0.1940]) -Greedy action tensor([ 1.8911e+00, -7.9247e-01, -4.6255e-01, -4.6322e-04]) tensor([0.7609, 0.0520, 0.0723, 0.1148]) -Greedy action tensor([ 1.0976, -0.4123, 0.1369, 0.1911]) tensor([0.4981, 0.1101, 0.1906, 0.2012]) -Greedy action tensor([ 2.1608, -0.1212, -0.0996, 0.1705]) tensor([0.7446, 0.0760, 0.0777, 0.1017]) -Greedy action tensor([ 1.1926, 0.0456, -1.0785, 0.0680]) tensor([0.5729, 0.1819, 0.0591, 0.1861]) -Greedy action tensor([ 1.5062, -0.2842, -0.1361, 0.4506]) tensor([0.5853, 0.0977, 0.1133, 0.2037]) -Greedy action tensor([ 1.6490, 0.2454, -1.0223, 0.1117]) tensor([0.6537, 0.1606, 0.0452, 0.1405]) -Greedy action tensor([ 1.1426, -0.0671, -0.0939, -0.0433]) tensor([0.5279, 0.1575, 0.1533, 0.1613]) -Greedy action tensor([ 1.8198, -0.5598, -0.3736, 0.3963]) tensor([0.6921, 0.0641, 0.0772, 0.1667]) -Greedy action tensor([ 1.0715, -0.3008, -0.1258, 0.1421]) tensor([0.5127, 0.1300, 0.1548, 0.2024]) -Greedy action tensor([ 1.6333, -0.5443, -0.0976, 0.0923]) tensor([0.6646, 0.0753, 0.1177, 0.1423]) -Greedy action tensor([-0.2705, -1.2586, -0.8431, 0.6261]) tensor([0.2279, 0.0849, 0.1286, 0.5587]) -Greedy action tensor([-0.8852, -0.0637, 0.3971, -0.7000]) tensor([0.1237, 0.2814, 0.4460, 0.1489]) -Greedy action tensor([-0.8730, -0.4803, 0.3002, -0.8594]) tensor([0.1487, 0.2202, 0.4805, 0.1507]) -Greedy action tensor([-0.0020, -0.1483, 0.6745, -1.4134]) tensor([0.2454, 0.2120, 0.4828, 0.0598]) -Greedy action tensor([ 0.1392, -1.2521, -0.3271, 0.3478]) tensor([0.3217, 0.0800, 0.2018, 0.3964]) -Greedy action tensor([-1.0395, 0.0034, 0.7149, -0.6919]) tensor([0.0906, 0.2572, 0.5239, 0.1283]) -Greedy action tensor([ 0.6136, -0.4691, -0.7016, 0.3871]) tensor([0.4159, 0.1409, 0.1116, 0.3316]) -Greedy action tensor([ 1.3957, 0.4088, 1.6091, -0.7392]) tensor([0.3665, 0.1366, 0.4536, 0.0433]) -Greedy action tensor([-0.4228, -0.3757, -0.5080, 0.5313]) tensor([0.1798, 0.1884, 0.1651, 0.4667]) -Greedy action tensor([ 0.1974, -0.0622, 0.1651, -0.0946]) tensor([0.2868, 0.2213, 0.2777, 0.2142]) -Greedy action tensor([ 1.1569, -0.0529, 0.0803, 0.8993]) tensor([0.4146, 0.1237, 0.1413, 0.3204]) -Greedy action tensor([ 0.7792, -1.1484, 0.4005, -0.0292]) tensor([0.4394, 0.0639, 0.3009, 0.1958]) -Greedy action tensor([ 0.5000, -0.5210, 0.2065, -0.7618]) tensor([0.4186, 0.1508, 0.3121, 0.1185]) -Greedy action tensor([-0.3718, -0.7975, 0.5750, -0.6868]) tensor([0.2016, 0.1317, 0.5196, 0.1471]) -Greedy action tensor([ 0.7089, -0.1290, 1.0661, 0.4870]) tensor([0.2730, 0.1181, 0.3902, 0.2187]) -Greedy action tensor([ 0.2160, 0.0233, -0.3510, -0.0812]) tensor([0.3190, 0.2631, 0.1809, 0.2370]) -Greedy action tensor([0.8427, 0.1346, 0.8872, 0.6282]) tensor([0.2990, 0.1473, 0.3126, 0.2412]) -Greedy action tensor([-0.6682, -0.5838, 0.6597, -1.3447]) tensor([0.1570, 0.1708, 0.5924, 0.0798]) -Greedy action tensor([ 0.2646, 0.1476, 1.9623, -0.7117]) tensor([0.1294, 0.1151, 0.7067, 0.0487]) -Greedy action tensor([-0.0447, -0.3077, 0.7146, -0.1091]) tensor([0.2065, 0.1587, 0.4412, 0.1936]) -Greedy action tensor([-0.1375, -0.6406, 0.5410, 0.6540]) tensor([0.1729, 0.1046, 0.3408, 0.3816]) -Greedy action tensor([ 1.3831, -1.3336, 0.2542, 0.5877]) tensor([0.5432, 0.0359, 0.1757, 0.2452]) -Greedy action tensor([-0.0620, -1.5647, 0.5478, 0.7326]) tensor([0.1895, 0.0422, 0.3488, 0.4195]) -Greedy action tensor([ 0.7118, -0.2775, -0.3803, -0.2990]) tensor([0.4828, 0.1795, 0.1620, 0.1757]) -Greedy action tensor([-0.2981, -0.9662, 0.1928, -0.0579]) tensor([0.2264, 0.1160, 0.3698, 0.2878]) -Greedy action tensor([-0.1121, 0.7984, 0.1334, -0.6409]) tensor([0.1868, 0.4643, 0.2388, 0.1101]) -Greedy action tensor([ 0.0454, -0.5397, -0.3237, 0.4266]) tensor([0.2694, 0.1501, 0.1862, 0.3944]) -Greedy action tensor([-0.4896, -1.2131, -0.0427, -0.9316]) tensor([0.2709, 0.1314, 0.4235, 0.1741]) -Greedy action tensor([ 1.5553, -0.5340, 0.6907, 1.0130]) tensor([0.4703, 0.0582, 0.1981, 0.2734]) -Greedy action tensor([ 0.5212, -1.2142, -0.4428, -0.2132]) tensor([0.4908, 0.0865, 0.1872, 0.2355]) -Greedy action tensor([ 0.6482, 0.3040, -0.0949, -0.2836]) tensor([0.3879, 0.2749, 0.1845, 0.1528]) -Greedy action tensor([ 1.6196, 0.0038, -0.0508, 0.7497]) tensor([0.5537, 0.1100, 0.1042, 0.2320]) -Greedy action tensor([-0.1708, 0.9960, 1.1914, -0.2331]) tensor([0.1104, 0.3546, 0.4312, 0.1038]) -Greedy action tensor([ 0.5948, -0.5683, 0.0453, 0.7668]) tensor([0.3249, 0.1015, 0.1876, 0.3859]) -Greedy action tensor([ 0.1443, -1.0181, -0.5030, 0.3055]) tensor([0.3321, 0.1039, 0.1738, 0.3902]) -Greedy action tensor([-0.2113, -0.9720, 0.9773, -0.9211]) tensor([0.1908, 0.0892, 0.6262, 0.0938]) -Greedy action tensor([-0.4916, 0.1197, 0.1098, -0.2928]) tensor([0.1699, 0.3130, 0.3099, 0.2072]) -Greedy action tensor([-0.2038, 0.2748, -0.2114, -0.6132]) tensor([0.2342, 0.3779, 0.2324, 0.1555]) -Greedy action tensor([5.3097e-01, 5.5112e-04, 8.5650e-01, 5.2845e-02]) tensor([0.2783, 0.1637, 0.3854, 0.1725]) -Greedy action tensor([-0.3698, -2.0284, 0.5690, 0.1723]) tensor([0.1829, 0.0348, 0.4677, 0.3145]) -Greedy action tensor([-1.2441, -0.6097, 0.3387, -0.5316]) tensor([0.1021, 0.1926, 0.4971, 0.2082]) -Greedy action tensor([ 0.2192, -0.0677, 0.9298, -0.1196]) tensor([0.2223, 0.1669, 0.4524, 0.1584]) -Greedy action tensor([-0.5489, -0.4686, -0.8931, -0.7237]) tensor([0.2753, 0.2983, 0.1951, 0.2312]) -Greedy action tensor([-0.8767, -0.6223, -0.2402, -0.8619]) tensor([0.1925, 0.2483, 0.3638, 0.1954]) -Greedy action tensor([ 0.0983, 0.1128, -0.0264, -0.5762]) tensor([0.2935, 0.2978, 0.2591, 0.1495]) -Greedy action tensor([ 0.8711, -0.7223, 2.1135, 0.8947]) tensor([0.1757, 0.0357, 0.6087, 0.1799]) -Greedy action tensor([-0.0348, -0.5040, 0.5700, -0.3753]) tensor([0.2399, 0.1501, 0.4393, 0.1707]) -Greedy action tensor([ 0.4436, -0.0255, 0.9182, -0.2714]) tensor([0.2687, 0.1681, 0.4318, 0.1314]) -Greedy action tensor([ 0.8671, -0.4047, -0.5464, 0.6043]) tensor([0.4362, 0.1223, 0.1061, 0.3354]) -Greedy action tensor([ 0.6600, -1.7871, 0.9864, 0.5346]) tensor([0.2981, 0.0258, 0.4131, 0.2630]) -Greedy action tensor([ 0.4347, -0.6163, 0.5351, 0.2371]) tensor([0.3053, 0.1067, 0.3375, 0.2505]) -Greedy action tensor([-0.0631, 0.0264, -0.6172, -0.8102]) tensor([0.3183, 0.3481, 0.1829, 0.1508]) -Greedy action tensor([1.2341, 0.0174, 0.5766, 0.4543]) tensor([0.4400, 0.1303, 0.2280, 0.2017]) -Greedy action tensor([ 0.3812, 0.3215, 0.3226, -0.3219]) tensor([0.2958, 0.2787, 0.2790, 0.1464]) -Greedy action tensor([ 1.3077, -0.9874, 0.0739, 1.2220]) tensor([0.4330, 0.0436, 0.1261, 0.3974]) -Greedy action tensor([-0.0982, -0.6066, 0.2111, -0.1714]) tensor([0.2569, 0.1545, 0.3499, 0.2387]) -Greedy action tensor([ 0.3086, -0.1323, 1.4681, -0.2445]) tensor([0.1849, 0.1190, 0.5897, 0.1064]) -Greedy action tensor([-0.3954, 0.1193, -0.3291, -0.1673]) tensor([0.2001, 0.3348, 0.2138, 0.2514]) -Greedy action tensor([-0.5287, -0.8583, 0.4352, -0.6952]) tensor([0.1928, 0.1386, 0.5054, 0.1632]) -Greedy action tensor([ 0.7834, -1.3094, 0.3196, -0.0205]) tensor([0.4546, 0.0561, 0.2859, 0.2035]) -Greedy action tensor([ 0.0894, 0.0843, -0.4136, -0.4866]) tensor([0.3163, 0.3147, 0.1913, 0.1778]) -Greedy action tensor([ 0.2341, -0.0185, -0.8580, -0.5615]) tensor([0.3901, 0.3030, 0.1309, 0.1761]) -Greedy action tensor([-0.8677, -0.8188, -0.5721, -0.6054]) tensor([0.2130, 0.2237, 0.2863, 0.2769]) -Greedy action tensor([ 0.1896, -1.3637, 0.6826, -0.3506]) tensor([0.2914, 0.0616, 0.4771, 0.1698]) -Greedy action tensor([0.9382, 0.1038, 0.5912, 1.4599]) tensor([0.2614, 0.1135, 0.1848, 0.4404]) -Greedy action tensor([ 1.3031, -0.0331, 0.1818, -0.2635]) tensor([0.5564, 0.1462, 0.1813, 0.1161]) -Greedy action tensor([-0.4162, -0.6625, 0.9630, -0.9451]) tensor([0.1577, 0.1232, 0.6262, 0.0929]) -Greedy action tensor([-0.9605, 0.8307, 0.3054, -0.7000]) tensor([0.0845, 0.5065, 0.2995, 0.1096]) -Greedy action tensor([ 0.3524, -0.5846, 0.3822, 0.1846]) tensor([0.3060, 0.1199, 0.3153, 0.2588]) -Greedy action tensor([-0.8142, -0.2309, -0.0650, -0.6461]) tensor([0.1642, 0.2942, 0.3473, 0.1943]) -Greedy action tensor([ 0.5078, -0.9574, 0.4689, -0.3077]) tensor([0.3795, 0.0877, 0.3650, 0.1679]) -Greedy action tensor([ 0.0924, -0.1458, 1.2104, 0.1706]) tensor([0.1687, 0.1329, 0.5160, 0.1824]) -Greedy action tensor([-1.6062, 0.7989, -0.3550, -1.4746]) tensor([0.0598, 0.6629, 0.2091, 0.0682]) -Greedy action tensor([ 0.2849, 0.3277, -0.8788, -0.8441]) tensor([0.3732, 0.3895, 0.1166, 0.1207]) -Greedy action tensor([-0.4880, -0.6467, 0.1596, -1.1547]) tensor([0.2338, 0.1995, 0.4467, 0.1200]) -Greedy action tensor([-0.2473, -2.2125, -0.1659, 0.1729]) tensor([0.2669, 0.0374, 0.2895, 0.4062]) -Greedy action tensor([ 0.8522, 0.1359, 0.0385, -0.1879]) tensor([0.4376, 0.2138, 0.1939, 0.1547]) -Greedy action tensor([-0.3592, -2.1043, 0.0950, -0.9662]) tensor([0.3035, 0.0530, 0.4781, 0.1654]) -Greedy action tensor([-0.9353, -1.2268, 0.5093, -0.8400]) tensor([0.1411, 0.1054, 0.5983, 0.1552]) -Greedy action tensor([ 0.0293, -0.0545, 1.5417, -0.5414]) tensor([0.1424, 0.1310, 0.6462, 0.0805]) -Greedy action tensor([-0.8022, -1.2190, -0.2161, 0.9738]) tensor([0.1068, 0.0704, 0.1919, 0.6309]) -Greedy action tensor([-1.7854, -0.4761, 0.5939, -0.1034]) tensor([0.0479, 0.1774, 0.5172, 0.2575]) -Greedy action tensor([-1.5608, -0.4452, 0.4939, -0.0109]) tensor([0.0604, 0.1842, 0.4711, 0.2844]) -Greedy action tensor([-1.9300, -0.4426, 0.6603, -0.1749]) tensor([0.0407, 0.1803, 0.5433, 0.2357]) -Greedy action tensor([-1.9149, -0.4424, 0.6531, -0.1658]) tensor([0.0414, 0.1805, 0.5400, 0.2381]) -Greedy action tensor([-1.9296, -0.4370, 0.6654, -0.1695]) tensor([0.0406, 0.1804, 0.5433, 0.2358]) -Greedy action tensor([-1.6741, -0.2144, 0.6251, 0.0413]) tensor([0.0480, 0.2067, 0.4785, 0.2669]) -Greedy action tensor([-1.8075, -0.4585, 0.6023, -0.1155]) tensor([0.0467, 0.1799, 0.5198, 0.2536]) -Greedy action tensor([-1.9005, -0.3528, 0.6397, -0.1496]) tensor([0.0414, 0.1947, 0.5253, 0.2386]) -Greedy action tensor([-1.6787, -0.3276, 0.5313, -0.1079]) tensor([0.0532, 0.2055, 0.4852, 0.2560]) -Greedy action tensor([-1.3851, -0.0717, 0.5265, 0.5425]) tensor([0.0545, 0.2026, 0.3685, 0.3744]) -Greedy action tensor([-1.6522, 0.0591, 0.5110, 0.0796]) tensor([0.0479, 0.2651, 0.4165, 0.2706]) -Greedy action tensor([-0.4641, -0.0521, 0.0805, 0.0706]) tensor([0.1683, 0.2541, 0.2902, 0.2873]) -Greedy action tensor([-1.9113, -0.4174, 0.6467, -0.1657]) tensor([0.0415, 0.1849, 0.5358, 0.2378]) -Greedy action tensor([-1.8576, -0.2461, 0.5879, -0.1277]) tensor([0.0431, 0.2161, 0.4975, 0.2432]) -Greedy action tensor([-0.6342, 0.8347, 0.0068, -0.0567]) tensor([0.1108, 0.4814, 0.2104, 0.1974]) -Greedy action tensor([-1.3750, -0.4922, 0.3498, 0.1220]) tensor([0.0741, 0.1791, 0.4157, 0.3311]) -Greedy action tensor([-1.1400, 0.8346, 0.1330, 0.3230]) tensor([0.0621, 0.4476, 0.2219, 0.2684]) -Greedy action tensor([-1.8665, -0.1592, 0.5970, -0.2181]) tensor([0.0426, 0.2351, 0.5007, 0.2216]) -Greedy action tensor([-1.9409, -0.4425, 0.6643, -0.1784]) tensor([0.0403, 0.1802, 0.5449, 0.2346]) -Greedy action tensor([-1.8686, -0.4523, 0.6287, -0.1475]) tensor([0.0437, 0.1803, 0.5314, 0.2445]) -Greedy action tensor([-1.9252, -0.4074, 0.6541, -0.1694]) tensor([0.0408, 0.1859, 0.5374, 0.2359]) -Greedy action tensor([-0.6131, 0.8213, -0.0217, 0.1404]) tensor([0.1096, 0.4598, 0.1979, 0.2327]) -Greedy action tensor([-0.9696, 0.2400, 0.1037, 0.0821]) tensor([0.0986, 0.3306, 0.2885, 0.2823]) -Greedy action tensor([-1.9427, -0.4437, 0.6661, -0.1779]) tensor([0.0402, 0.1798, 0.5455, 0.2345]) -Greedy action tensor([-1.0557, 0.0223, 0.4703, 0.2268]) tensor([0.0823, 0.2420, 0.3788, 0.2969]) -Greedy action tensor([-1.9116, -0.4666, 0.6483, -0.1644]) tensor([0.0418, 0.1774, 0.5409, 0.2400]) -Greedy action tensor([-1.5731, 0.0293, 0.4392, -0.0277]) tensor([0.0551, 0.2738, 0.4125, 0.2586]) -Greedy action tensor([-1.8938, -0.3896, 0.6403, -0.1502]) tensor([0.0420, 0.1889, 0.5291, 0.2400]) -Greedy action tensor([-1.7702, -0.3232, 0.6026, -0.0864]) tensor([0.0468, 0.1989, 0.5021, 0.2521]) -Greedy action tensor([ 0.5178, 1.1890, -0.0516, 0.5055]) tensor([0.2217, 0.4338, 0.1255, 0.2190]) -Greedy action tensor([-1.9070, -0.4547, 0.6529, -0.1613]) tensor([0.0418, 0.1785, 0.5403, 0.2394]) -Greedy action tensor([-1.8922, -0.3935, 0.6316, -0.1529]) tensor([0.0423, 0.1893, 0.5276, 0.2408]) -Greedy action tensor([-0.9300, -0.1245, 0.7376, 0.9763]) tensor([0.0655, 0.1466, 0.3472, 0.4407]) -Greedy action tensor([-1.8619, -0.2674, 0.6011, -0.1246]) tensor([0.0428, 0.2110, 0.5028, 0.2434]) -Greedy action tensor([-1.6600, 0.0888, 0.4103, 0.0207]) tensor([0.0499, 0.2868, 0.3955, 0.2679]) -Greedy action tensor([-1.0868, 0.7852, 0.1323, 0.2963]) tensor([0.0672, 0.4371, 0.2275, 0.2681]) -Greedy action tensor([-1.7541, -0.2762, 0.6066, -0.0796]) tensor([0.0469, 0.2056, 0.4972, 0.2503]) -Greedy action tensor([-1.4493, -0.5151, 0.5175, 0.3354]) tensor([0.0601, 0.1529, 0.4293, 0.3578]) -Greedy action tensor([-1.8651, -0.4484, 0.7881, 0.1319]) tensor([0.0375, 0.1545, 0.5320, 0.2760]) -Greedy action tensor([-1.9129, -0.4141, 0.6488, -0.1596]) tensor([0.0413, 0.1849, 0.5353, 0.2385]) -Greedy action tensor([-1.9290, -0.4503, 0.6766, -0.1690]) tensor([0.0404, 0.1773, 0.5473, 0.2350]) -Greedy action tensor([-1.8820, -0.4553, 0.6404, -0.1533]) tensor([0.0430, 0.1791, 0.5357, 0.2422]) -Greedy action tensor([-1.7895, -0.4347, 0.6025, -0.0753]) tensor([0.0468, 0.1814, 0.5119, 0.2599]) -Greedy action tensor([-1.7233, -0.2616, 0.5318, -0.0903]) tensor([0.0501, 0.2160, 0.4775, 0.2564]) -Greedy action tensor([-1.8707, -0.4645, 0.6240, -0.1391]) tensor([0.0438, 0.1786, 0.5304, 0.2473]) -Greedy action tensor([-1.8183, -0.2053, 0.5648, -0.1122]) tensor([0.0447, 0.2244, 0.4846, 0.2463]) -Greedy action tensor([-1.7658, -0.5184, 0.5627, -0.1383]) tensor([0.0504, 0.1755, 0.5174, 0.2567]) -Greedy action tensor([-1.9231, -0.4381, 0.6569, -0.1697]) tensor([0.0410, 0.1810, 0.5412, 0.2368]) -Greedy action tensor([-1.9056, -0.4522, 0.6455, -0.1620]) tensor([0.0420, 0.1796, 0.5383, 0.2401]) -Greedy action tensor([-1.8606, -0.4418, 0.6273, -0.1355]) tensor([0.0439, 0.1814, 0.5283, 0.2464]) -Greedy action tensor([-1.9239, -0.4556, 0.6674, -0.1627]) tensor([0.0408, 0.1772, 0.5446, 0.2375]) -Greedy action tensor([-1.8925, -0.4558, 0.6419, -0.1554]) tensor([0.0426, 0.1791, 0.5366, 0.2418]) -Greedy action tensor([-1.8873, -0.4625, 0.6370, -0.1582]) tensor([0.0430, 0.1786, 0.5363, 0.2421]) -Greedy action tensor([-1.6628, 0.2327, 0.4274, -0.0085]) tensor([0.0477, 0.3174, 0.3856, 0.2493]) -Greedy action tensor([-1.9244, -0.4516, 0.6590, -0.1700]) tensor([0.0410, 0.1789, 0.5431, 0.2370]) -Greedy action tensor([-1.8834, -0.3697, 0.6320, -0.1440]) tensor([0.0424, 0.1924, 0.5240, 0.2412]) -Greedy action tensor([-1.8515, -0.4516, 0.6236, -0.1340]) tensor([0.0444, 0.1801, 0.5279, 0.2475]) -Greedy action tensor([-1.9387, -0.4551, 0.6709, -0.1702]) tensor([0.0402, 0.1773, 0.5467, 0.2358]) -Greedy action tensor([-1.9243, -0.4282, 0.6582, -0.1683]) tensor([0.0408, 0.1823, 0.5404, 0.2365]) -Greedy action tensor([-1.2427, 0.7101, 0.2312, 0.0328]) tensor([0.0625, 0.4407, 0.2730, 0.2238]) -Greedy action tensor([-1.8615, -0.4572, 0.6959, -0.0980]) tensor([0.0420, 0.1711, 0.5419, 0.2450]) -Greedy action tensor([-1.4741, 0.4994, 0.2923, 0.1053]) tensor([0.0529, 0.3808, 0.3096, 0.2567]) -Greedy action tensor([-1.7006, -0.3008, 0.5459, -0.0646]) tensor([0.0509, 0.2064, 0.4813, 0.2614]) -Greedy action tensor([-1.8882, -0.3709, 0.6285, -0.1426]) tensor([0.0422, 0.1926, 0.5232, 0.2420]) -Greedy action tensor([-1.6225, -0.3416, 0.4887, -0.0866]) tensor([0.0571, 0.2057, 0.4718, 0.2654]) -Greedy action tensor([-1.4541, -0.3793, 0.4880, -0.1763]) tensor([0.0690, 0.2022, 0.4812, 0.2476]) -Greedy action tensor([-1.8091, -0.4812, 0.5888, -0.1042]) tensor([0.0470, 0.1774, 0.5171, 0.2586]) -Greedy action tensor([-1.7521, -0.3607, 0.6036, -0.0715]) tensor([0.0478, 0.1921, 0.5037, 0.2565]) -Greedy action tensor([-1.3407, 0.2652, 0.3214, 0.1534]) tensor([0.0637, 0.3172, 0.3355, 0.2836]) -Greedy action tensor([-1.7034, -0.2819, 0.5281, -0.0970]) tensor([0.0514, 0.2131, 0.4791, 0.2564]) -Greedy action tensor([-0.7886, -0.3126, 0.5365, 0.7855]) tensor([0.0893, 0.1437, 0.3360, 0.4310]) -Greedy action tensor([-0.9832, -0.3806, 0.3133, -0.0737]) tensor([0.1115, 0.2038, 0.4078, 0.2769]) -Greedy action tensor([-0.6673, 0.2252, -0.0451, 0.0144]) tensor([0.1373, 0.3353, 0.2559, 0.2715]) -Greedy action tensor([-1.7984, -0.3788, 0.6645, 0.0196]) tensor([0.0434, 0.1795, 0.5096, 0.2674]) -Greedy action tensor([-0.5205, 0.9406, 0.0467, 0.1928]) tensor([0.1097, 0.4729, 0.1935, 0.2239]) -Greedy action tensor([-1.4334, -0.0810, 0.3732, 0.0578]) tensor([0.0649, 0.2511, 0.3955, 0.2885]) -Greedy action tensor([-1.7778, -0.5024, 0.8220, 0.1193]) tensor([0.0405, 0.1449, 0.5448, 0.2698]) -Greedy action tensor([-1.8512, -0.5493, 0.9172, -0.0605]) tensor([0.0376, 0.1382, 0.5989, 0.2253]) -Greedy action tensor([-1.8471, -0.4700, 0.6386, -0.1278]) tensor([0.0443, 0.1757, 0.5325, 0.2474]) -Greedy action tensor([-1.6073, -0.4920, 0.5792, 0.1823]) tensor([0.0528, 0.1610, 0.4701, 0.3161]) -Greedy action tensor([-1.9315, -0.4427, 0.6623, -0.1727]) tensor([0.0406, 0.1800, 0.5435, 0.2358]) -Greedy action tensor([ 0.7664, -0.0531, -0.1811, -0.0873]) tensor([0.4436, 0.1955, 0.1720, 0.1889]) -Greedy action tensor([ 0.3697, 0.0785, 0.0466, -0.3408]) tensor([0.3375, 0.2523, 0.2443, 0.1659]) -Greedy action tensor([ 0.7756, -0.4169, -0.1004, -0.2380]) tensor([0.4801, 0.1457, 0.1999, 0.1742]) -Greedy action tensor([ 0.4511, -0.1574, -0.0807, -0.0142]) tensor([0.3624, 0.1972, 0.2129, 0.2276]) -Greedy action tensor([ 0.8406, -0.6999, -0.0575, -0.4229]) tensor([0.5251, 0.1125, 0.2139, 0.1484]) -Greedy action tensor([ 0.7343, -0.4432, -0.0812, -0.3262]) tensor([0.4769, 0.1469, 0.2110, 0.1652]) -Greedy action tensor([ 0.6882, -0.0614, 0.1239, -0.0579]) tensor([0.3975, 0.1879, 0.2261, 0.1885]) -Greedy action tensor([ 0.8472, -0.4664, 0.0892, -0.1746]) tensor([0.4768, 0.1282, 0.2234, 0.1716]) -Greedy action tensor([ 0.9335, -0.6151, 0.1128, -0.3423]) tensor([0.5176, 0.1100, 0.2278, 0.1445]) -Greedy action tensor([ 0.3688, -0.1375, -0.0439, -0.1468]) tensor([0.3494, 0.2106, 0.2313, 0.2087]) -Greedy action tensor([ 0.5420, -0.5442, -0.0328, -0.1923]) tensor([0.4201, 0.1418, 0.2365, 0.2016]) -Greedy action tensor([ 0.6140, -0.2263, -0.0093, -0.2510]) tensor([0.4186, 0.1807, 0.2245, 0.1763]) -Greedy action tensor([ 0.9451, -0.8278, 0.0475, -0.3081]) tensor([0.5368, 0.0912, 0.2188, 0.1533]) -Greedy action tensor([ 0.8642, -0.5892, 0.1012, -0.3598]) tensor([0.5015, 0.1172, 0.2338, 0.1475]) -Greedy action tensor([ 0.5900, -0.1023, -0.0524, -0.1327]) tensor([0.3981, 0.1992, 0.2094, 0.1933]) -Greedy action tensor([ 0.5281, -0.0678, -0.0117, -0.0064]) tensor([0.3677, 0.2026, 0.2143, 0.2154]) -Greedy action tensor([ 0.7833, -0.5043, -0.0148, -0.2284]) tensor([0.4785, 0.1320, 0.2154, 0.1740]) -Greedy action tensor([ 0.1989, 0.0771, -0.1375, -0.2835]) tensor([0.3109, 0.2752, 0.2221, 0.1919]) -Greedy action tensor([ 0.6135, 0.3276, -0.0791, 0.0989]) tensor([0.3510, 0.2637, 0.1756, 0.2098]) -Greedy action tensor([ 0.3708, -0.2343, -0.1414, -0.0706]) tensor([0.3586, 0.1958, 0.2149, 0.2307]) -Greedy action tensor([ 0.4172, -0.5044, -0.1241, -0.1239]) tensor([0.3903, 0.1553, 0.2272, 0.2272]) -Greedy action tensor([ 1.0533, -0.7046, 0.1846, -0.3015]) tensor([0.5406, 0.0932, 0.2268, 0.1395]) -Greedy action tensor([ 0.9481, -0.4256, 0.0813, -0.2557]) tensor([0.5067, 0.1283, 0.2130, 0.1520]) -Greedy action tensor([ 0.6824, -0.4700, 0.0064, -0.4011]) tensor([0.4623, 0.1460, 0.2352, 0.1565]) -Greedy action tensor([ 0.8763, -0.6980, -0.0042, -0.3075]) tensor([0.5187, 0.1074, 0.2150, 0.1588]) -Greedy action tensor([ 0.8293, -0.4262, 0.0336, -0.6065]) tensor([0.5065, 0.1443, 0.2286, 0.1205]) -Greedy action tensor([ 0.3854, -0.0043, -0.0828, 0.0249]) tensor([0.3333, 0.2257, 0.2087, 0.2324]) -Greedy action tensor([ 1.0830, -0.7669, 0.0136, -0.5432]) tensor([0.5892, 0.0927, 0.2022, 0.1159]) -Greedy action tensor([ 0.5815, -0.1484, 0.0434, -0.1629]) tensor([0.3936, 0.1897, 0.2298, 0.1870]) -Greedy action tensor([ 0.3934, -0.0811, -0.0456, -0.4007]) tensor([0.3678, 0.2288, 0.2371, 0.1663]) -Greedy action tensor([ 0.8471, -0.3532, 0.0333, -0.3383]) tensor([0.4878, 0.1469, 0.2162, 0.1491]) -Greedy action tensor([ 0.5698, -0.3901, 0.0913, -0.3542]) tensor([0.4167, 0.1596, 0.2583, 0.1654]) -Greedy action tensor([ 1.1323, -0.9320, 0.0339, -0.7610]) tensor([0.6208, 0.0788, 0.2070, 0.0935]) -Greedy action tensor([ 1.2098, -0.9102, 0.0079, -0.6417]) tensor([0.6338, 0.0761, 0.1906, 0.0995]) -Greedy action tensor([ 0.5202, -0.2515, -0.1051, -0.1839]) tensor([0.4013, 0.1855, 0.2147, 0.1985]) -Greedy action tensor([ 0.2959, -0.1713, -0.0664, -0.1924]) tensor([0.3405, 0.2134, 0.2370, 0.2090]) -Greedy action tensor([ 1.2019, -1.1569, 0.0435, -0.4106]) tensor([0.6219, 0.0588, 0.1953, 0.1240]) -Greedy action tensor([ 0.7776, -0.4024, 0.0156, -0.2557]) tensor([0.4695, 0.1443, 0.2191, 0.1671]) -Greedy action tensor([ 0.6234, -0.5894, 0.0079, -0.3982]) tensor([0.4550, 0.1353, 0.2459, 0.1638]) -Greedy action tensor([ 0.4562, 0.6716, -0.3544, 0.3202]) tensor([0.2811, 0.3486, 0.1250, 0.2453]) -Greedy action tensor([ 0.4128, -0.0107, 0.0260, -0.5042]) tensor([0.3658, 0.2395, 0.2485, 0.1462]) -Greedy action tensor([ 0.8758, -0.6600, 0.0103, -0.4435]) tensor([0.5254, 0.1131, 0.2211, 0.1404]) -Greedy action tensor([ 0.8496, -0.5628, -0.0452, -0.6868]) tensor([0.5355, 0.1304, 0.2189, 0.1152]) -Greedy action tensor([ 1.0694, -0.7238, 0.0957, -0.5351]) tensor([0.5730, 0.0954, 0.2164, 0.1152]) -Greedy action tensor([ 1.1201, -0.7829, 0.2574, -0.5470]) tensor([0.5682, 0.0847, 0.2398, 0.1073]) -Greedy action tensor([ 0.7612, -0.6108, 0.0339, -0.3862]) tensor([0.4868, 0.1234, 0.2352, 0.1545]) -Greedy action tensor([ 0.8618, -0.5887, -0.0203, -0.4420]) tensor([0.5209, 0.1221, 0.2156, 0.1414]) -Greedy action tensor([ 0.9924, -0.6117, 0.1333, -0.7896]) tensor([0.5578, 0.1121, 0.2362, 0.0939]) -Greedy action tensor([ 0.7225, -0.8279, -0.1227, -0.1713]) tensor([0.4876, 0.1035, 0.2094, 0.1995]) -Greedy action tensor([ 0.7774, -0.5232, 0.0549, -0.3810]) tensor([0.4827, 0.1315, 0.2343, 0.1515]) -Greedy action tensor([ 0.4917, -0.3290, -0.1631, -0.0608]) tensor([0.3945, 0.1736, 0.2049, 0.2270]) -Greedy action tensor([ 1.0079, -0.6514, -0.0076, -0.7397]) tensor([0.5792, 0.1102, 0.2098, 0.1009]) -Greedy action tensor([ 1.1597, -0.8173, -0.0080, -0.3286]) tensor([0.5969, 0.0827, 0.1857, 0.1348]) -Greedy action tensor([ 0.5973, -0.4509, -0.1163, -0.2168]) tensor([0.4379, 0.1535, 0.2145, 0.1940]) -Greedy action tensor([ 0.4988, -0.1814, -0.1703, -0.3428]) tensor([0.4082, 0.2068, 0.2091, 0.1760]) -Greedy action tensor([ 0.7720, -0.3475, -0.0297, -0.1569]) tensor([0.4608, 0.1504, 0.2067, 0.1820]) -Greedy action tensor([ 0.5644, -0.4873, 0.1130, -0.3337]) tensor([0.4178, 0.1460, 0.2660, 0.1702]) -Greedy action tensor([ 0.7172, -0.0752, 0.1047, -0.3241]) tensor([0.4259, 0.1928, 0.2309, 0.1504]) -Greedy action tensor([ 0.6291, -0.0913, -0.1014, -0.0089]) tensor([0.4005, 0.1949, 0.1929, 0.2116]) -Greedy action tensor([ 0.7544, -0.6083, 0.0921, -0.4113]) tensor([0.4800, 0.1229, 0.2475, 0.1496]) -Greedy action tensor([ 0.3990, -0.0242, -0.0518, 0.0398]) tensor([0.3344, 0.2190, 0.2131, 0.2335]) -Greedy action tensor([ 0.8743, -0.4096, 0.1246, -0.1684]) tensor([0.4757, 0.1318, 0.2248, 0.1677]) -Greedy action tensor([ 0.6520, -0.4635, 0.3698, 0.0129]) tensor([0.3832, 0.1256, 0.2890, 0.2022]) -Greedy action tensor([ 0.2129, 0.1530, -0.0704, -0.4532]) tensor([0.3116, 0.2935, 0.2348, 0.1601]) -Greedy action tensor([ 0.7501, -0.2928, 0.0320, -0.0173]) tensor([0.4340, 0.1529, 0.2116, 0.2015]) -Greedy action tensor([ 1.0004, -0.6547, -0.1337, -0.5352]) tensor([0.5787, 0.1106, 0.1862, 0.1246]) -Greedy action tensor([ 0.3797, -0.2640, -0.1125, -0.1032]) tensor([0.3632, 0.1908, 0.2220, 0.2241]) -Greedy action tensor([ 1.0755, -0.5878, -0.1358, -0.4329]) tensor([0.5853, 0.1109, 0.1743, 0.1295]) -Greedy action tensor([ 0.5722, 0.1353, -0.0435, 0.0297]) tensor([0.3613, 0.2334, 0.1952, 0.2100]) -Greedy action tensor([ 0.9195, -0.7583, -0.0857, -0.7641]) tensor([0.5752, 0.1074, 0.2105, 0.1068]) -Greedy action tensor([ 0.5862, 0.1812, -0.1268, -0.1752]) tensor([0.3811, 0.2542, 0.1868, 0.1780]) -Greedy action tensor([ 0.5676, -0.6385, -0.2317, -0.2391]) tensor([0.4555, 0.1364, 0.2048, 0.2033]) -Greedy action tensor([ 2.1423e-01, 1.5476e-04, -1.5281e-01, -1.6644e-01]) tensor([0.3141, 0.2536, 0.2176, 0.2147]) -Greedy action tensor([ 0.6530, -0.1706, -0.1238, 0.0074]) tensor([0.4127, 0.1811, 0.1898, 0.2164]) -Greedy action tensor([ 0.5139, -0.4942, -0.0868, -0.2617]) tensor([0.4213, 0.1537, 0.2310, 0.1940]) -Greedy action tensor([ 1.0203, -0.6863, -0.1266, -0.5236]) tensor([0.5839, 0.1060, 0.1855, 0.1247]) -Greedy action tensor([ 1.0562, -0.6413, 0.1505, -0.5069]) tensor([0.5565, 0.1019, 0.2250, 0.1166]) -Greedy action tensor([ 1.1824, -0.6574, 0.0550, -0.7694]) tensor([0.6155, 0.0978, 0.1993, 0.0874]) -Greedy action tensor([ 0.8321, -0.3390, 0.0034, -0.3331]) tensor([0.4858, 0.1506, 0.2121, 0.1515]) -Greedy action tensor([ 0.8130, -0.4508, -0.1835, -0.4670]) tensor([0.5182, 0.1464, 0.1913, 0.1441]) -Greedy action tensor([ 0.8064, -0.6413, 0.0815, -0.4515]) tensor([0.4991, 0.1173, 0.2417, 0.1419]) -Greedy action tensor([ 2.0145, -1.0190, -0.5227, 0.3452]) tensor([0.7601, 0.0366, 0.0601, 0.1432]) -Greedy action tensor([ 1.9044, 0.7389, 0.3019, -0.4684]) tensor([0.6225, 0.1941, 0.1254, 0.0580]) -Greedy action tensor([ 1.6563, -0.3332, -0.8706, -0.0624]) tensor([0.7164, 0.0980, 0.0572, 0.1284]) -Greedy action tensor([ 1.4378, -0.5976, -0.4275, 0.1978]) tensor([0.6350, 0.0829, 0.0983, 0.1838]) -Greedy action tensor([ 1.3144, -0.4263, -0.4922, 0.1199]) tensor([0.6088, 0.1068, 0.1000, 0.1844]) -Greedy action tensor([ 1.3494, -0.5622, -0.1458, 0.3699]) tensor([0.5722, 0.0846, 0.1283, 0.2149]) -Greedy action tensor([ 2.0034, -0.5015, -0.7200, 0.8676]) tensor([0.6810, 0.0556, 0.0447, 0.2187]) -Greedy action tensor([ 1.6942, -1.2495, -0.0745, 0.5549]) tensor([0.6480, 0.0341, 0.1105, 0.2074]) -Greedy action tensor([ 1.9616, -0.6589, -0.4100, 0.6307]) tensor([0.6991, 0.0509, 0.0653, 0.1847]) -Greedy action tensor([ 1.5094, -0.5310, -1.2596, 0.1156]) tensor([0.6941, 0.0902, 0.0435, 0.1722]) -Greedy action tensor([ 1.5125, -0.6959, -0.3797, 0.7015]) tensor([0.5865, 0.0644, 0.0884, 0.2606]) -Greedy action tensor([ 1.3936, -0.8086, -0.2019, 0.2650]) tensor([0.6109, 0.0675, 0.1239, 0.1976]) -Greedy action tensor([ 1.1397, -0.5919, -0.5622, 0.5276]) tensor([0.5259, 0.0931, 0.0959, 0.2851]) -Greedy action tensor([ 1.0881, -0.1912, -0.4784, 0.5518]) tensor([0.4826, 0.1343, 0.1008, 0.2823]) -Greedy action tensor([ 1.6333, -0.2732, -0.2010, 0.2806]) tensor([0.6382, 0.0948, 0.1019, 0.1650]) -Greedy action tensor([ 1.5483, -0.5855, -0.3128, 0.2688]) tensor([0.6443, 0.0763, 0.1002, 0.1792]) -Greedy action tensor([ 1.2856, -0.2785, -0.6029, 0.6178]) tensor([0.5338, 0.1117, 0.0808, 0.2737]) -Greedy action tensor([ 1.8304, -0.9289, -0.2908, 0.5227]) tensor([0.6879, 0.0436, 0.0825, 0.1860]) -Greedy action tensor([ 1.3645, -0.5833, -1.0597, 0.5324]) tensor([0.6002, 0.0856, 0.0531, 0.2611]) -Greedy action tensor([ 1.3710, -0.2560, -0.3890, 0.2054]) tensor([0.5951, 0.1170, 0.1024, 0.1855]) -Greedy action tensor([ 1.1312, -0.0917, -0.6652, 0.2389]) tensor([0.5348, 0.1574, 0.0887, 0.2191]) -Greedy action tensor([ 2.5045, -1.1561, 0.0776, 0.7225]) tensor([0.7798, 0.0201, 0.0689, 0.1312]) -Greedy action tensor([ 1.7755, -0.6748, -0.1284, 0.4972]) tensor([0.6606, 0.0570, 0.0984, 0.1840]) -Greedy action tensor([ 1.6910, -0.0645, -0.5716, 0.3354]) tensor([0.6516, 0.1126, 0.0678, 0.1680]) -Greedy action tensor([ 1.2477, -0.4086, -0.5544, 0.3160]) tensor([0.5715, 0.1091, 0.0943, 0.2251]) -Greedy action tensor([ 2.6282, -1.2044, -0.5140, 0.6625]) tensor([0.8299, 0.0180, 0.0358, 0.1162]) -Greedy action tensor([ 1.5108, -0.2718, -0.7259, -0.0342]) tensor([0.6719, 0.1130, 0.0718, 0.1433]) -Greedy action tensor([ 1.4654, -0.4028, -0.3716, 0.5644]) tensor([0.5815, 0.0898, 0.0926, 0.2362]) -Greedy action tensor([ 1.4640, -0.2163, -0.4479, 0.2876]) tensor([0.6088, 0.1134, 0.0900, 0.1877]) -Greedy action tensor([ 1.3291, -0.5060, -0.3608, 0.3913]) tensor([0.5762, 0.0920, 0.1063, 0.2256]) -Greedy action tensor([ 0.8510, -0.4105, -0.0782, -0.0078]) tensor([0.4758, 0.1348, 0.1879, 0.2016]) -Greedy action tensor([ 1.7898, -0.4344, -0.7991, 0.5925]) tensor([0.6733, 0.0728, 0.0506, 0.2033]) -Greedy action tensor([ 1.6420, -0.3718, -0.8440, 0.2786]) tensor([0.6791, 0.0906, 0.0565, 0.1737]) -Greedy action tensor([ 1.1458, -0.3342, -0.1453, -0.1048]) tensor([0.5590, 0.1272, 0.1537, 0.1601]) -Greedy action tensor([ 2.1124, -1.2113, -0.1941, 0.6265]) tensor([0.7342, 0.0264, 0.0731, 0.1662]) -Greedy action tensor([ 1.4586, -0.6484, -0.5017, 0.6592]) tensor([0.5841, 0.0710, 0.0823, 0.2626]) -Greedy action tensor([ 1.6291, -0.4117, -0.4113, 0.3684]) tensor([0.6479, 0.0842, 0.0842, 0.1837]) -Greedy action tensor([ 1.6048, -0.6137, -0.4381, 0.1429]) tensor([0.6802, 0.0740, 0.0882, 0.1577]) -Greedy action tensor([ 1.3954, -0.3259, -0.3554, 0.2190]) tensor([0.6021, 0.1077, 0.1045, 0.1857]) -Greedy action tensor([ 1.2788, -0.8167, -0.5104, 0.2157]) tensor([0.6114, 0.0752, 0.1022, 0.2112]) -Greedy action tensor([ 1.2532, -0.3189, -0.1522, 0.0926]) tensor([0.5662, 0.1176, 0.1389, 0.1774]) -Greedy action tensor([ 1.4267, -0.4618, -0.3087, 0.6753]) tensor([0.5558, 0.0841, 0.0980, 0.2622]) -Greedy action tensor([ 1.3010, -0.3910, -0.2858, 0.3237]) tensor([0.5666, 0.1043, 0.1159, 0.2132]) -Greedy action tensor([ 1.6461, -0.5374, -0.2889, 0.0976]) tensor([0.6804, 0.0766, 0.0983, 0.1446]) -Greedy action tensor([ 1.4422, -0.6195, -0.3177, 0.3151]) tensor([0.6160, 0.0784, 0.1060, 0.1996]) -Greedy action tensor([ 1.2818, -0.5404, -0.1251, 0.2211]) tensor([0.5705, 0.0922, 0.1397, 0.1975]) -Greedy action tensor([ 1.6466, -0.8400, -0.3257, 0.6676]) tensor([0.6258, 0.0521, 0.0871, 0.2351]) -Greedy action tensor([ 1.7013, -0.6415, -0.2828, 0.0798]) tensor([0.6987, 0.0671, 0.0961, 0.1381]) -Greedy action tensor([ 1.4738, -0.2496, 0.1433, -0.1703]) tensor([0.6113, 0.1091, 0.1616, 0.1181]) -Greedy action tensor([ 1.8887, -0.6295, 0.0304, 0.4957]) tensor([0.6735, 0.0543, 0.1050, 0.1672]) -Greedy action tensor([ 1.3251, -0.2962, -0.6944, -0.0043]) tensor([0.6269, 0.1239, 0.0832, 0.1659]) -Greedy action tensor([ 2.5639, 0.6507, 0.1616, -0.0295]) tensor([0.7617, 0.1124, 0.0689, 0.0569]) -Greedy action tensor([ 0.9314, -0.1412, -0.4126, 0.7728]) tensor([0.4071, 0.1393, 0.1062, 0.3474]) -Greedy action tensor([ 1.4480, 0.0526, -0.4742, 0.1966]) tensor([0.5952, 0.1474, 0.0871, 0.1703]) -Greedy action tensor([ 1.4089, -0.1790, -0.8199, 0.1413]) tensor([0.6275, 0.1282, 0.0676, 0.1767]) -Greedy action tensor([ 0.9912, -0.3849, -0.1955, 0.0066]) tensor([0.5178, 0.1308, 0.1580, 0.1934]) -Greedy action tensor([ 1.6467, -0.8853, -0.8155, 0.5940]) tensor([0.6606, 0.0525, 0.0563, 0.2305]) -Greedy action tensor([ 1.6525, -0.4629, -0.0224, 0.5806]) tensor([0.6060, 0.0731, 0.1135, 0.2075]) -Greedy action tensor([ 1.5910, -0.4056, -0.6256, 0.5756]) tensor([0.6223, 0.0845, 0.0678, 0.2254]) -Greedy action tensor([ 2.0496, -1.2117, -0.3187, 0.3750]) tensor([0.7580, 0.0291, 0.0710, 0.1420]) -Greedy action tensor([ 1.4262, -0.6800, -0.3909, 0.3653]) tensor([0.6134, 0.0746, 0.0997, 0.2123]) -Greedy action tensor([ 1.8070, -1.1595, -0.5535, 0.4727]) tensor([0.7096, 0.0365, 0.0670, 0.1869]) -Greedy action tensor([ 1.9540, -0.7028, -0.2734, 0.3518]) tensor([0.7249, 0.0509, 0.0782, 0.1460]) -Greedy action tensor([ 1.0045, -0.3109, -0.4493, 0.5419]) tensor([0.4691, 0.1259, 0.1096, 0.2954]) -Greedy action tensor([ 0.9230, -0.2870, -0.0337, -0.0382]) tensor([0.4843, 0.1444, 0.1860, 0.1852]) -Greedy action tensor([ 2.3286, -0.7224, -0.5441, 0.7463]) tensor([0.7637, 0.0361, 0.0432, 0.1569]) -Greedy action tensor([ 1.1078, -0.4483, -0.3559, 0.2719]) tensor([0.5331, 0.1125, 0.1233, 0.2311]) -Greedy action tensor([ 1.7482, -0.4891, -0.6075, 0.5579]) tensor([0.6641, 0.0709, 0.0630, 0.2020]) -Greedy action tensor([ 1.1177, -0.2085, -0.1830, 0.1862]) tensor([0.5177, 0.1374, 0.1410, 0.2039]) -Greedy action tensor([ 1.0379, -0.5162, -0.2684, 0.0162]) tensor([0.5428, 0.1147, 0.1470, 0.1954]) -Greedy action tensor([ 1.7002, -0.8209, -0.5734, 0.1277]) tensor([0.7190, 0.0578, 0.0740, 0.1492]) -Greedy action tensor([ 0.8424, -0.1609, -0.5302, 0.1370]) tensor([0.4730, 0.1735, 0.1199, 0.2336]) -Greedy action tensor([ 1.3192, 0.0663, -0.8296, 0.0793]) tensor([0.5911, 0.1689, 0.0689, 0.1711]) -Greedy action tensor([ 0.9719, 0.0730, -0.7190, 0.6019]) tensor([0.4382, 0.1783, 0.0808, 0.3027]) -Greedy action tensor([ 1.0807, -0.6630, -0.0491, 0.0572]) tensor([0.5384, 0.0942, 0.1740, 0.1935]) -Greedy action tensor([ 1.1922, -0.2024, -0.6971, 0.3485]) tensor([0.5467, 0.1355, 0.0826, 0.2351]) -Greedy action tensor([ 1.0425, -0.3664, -0.6219, 0.0600]) tensor([0.5531, 0.1352, 0.1047, 0.2071]) -Greedy action tensor([ 1.3022, -0.4750, -0.1771, 0.3061]) tensor([0.5662, 0.0958, 0.1290, 0.2091]) -Greedy action tensor([ 1.3675, -0.2082, -0.1917, 0.4719]) tensor([0.5478, 0.1133, 0.1152, 0.2237]) -Greedy action tensor([ 1.2908, -0.3272, -1.1799, 0.1574]) tensor([0.6231, 0.1236, 0.0527, 0.2006]) -Greedy action tensor([ 1.8219, -0.5936, -0.3678, 0.1551]) tensor([0.7194, 0.0643, 0.0805, 0.1359]) -Greedy action tensor([ 0.4626, 0.1261, -0.4309, 0.0624]) tensor([0.3580, 0.2557, 0.1465, 0.2399]) -Greedy action tensor([ 0.5674, -1.3747, -0.5999, 1.0979]) tensor([0.3170, 0.0455, 0.0987, 0.5389]) -Greedy action tensor([-0.9948, -0.2210, 0.4408, 1.2887]) tensor([0.0582, 0.1262, 0.2446, 0.5710]) -Greedy action tensor([-0.8534, 0.2798, 0.0457, -1.1565]) tensor([0.1370, 0.4253, 0.3366, 0.1011]) -Greedy action tensor([ 0.8382, -0.6494, -0.4248, 0.4132]) tensor([0.4624, 0.1045, 0.1308, 0.3023]) -Greedy action tensor([-1.2877, -0.4652, 0.1417, -0.8367]) tensor([0.1108, 0.2523, 0.4629, 0.1740]) -Greedy action tensor([ 0.0856, 0.2000, -1.1353, 0.3259]) tensor([0.2712, 0.3040, 0.0800, 0.3448]) -Greedy action tensor([-0.6438, 0.8065, 0.6130, -0.4158]) tensor([0.0997, 0.4250, 0.3502, 0.1252]) -Greedy action tensor([-0.0706, -0.4423, 0.1447, -0.3433]) tensor([0.2709, 0.1868, 0.3360, 0.2063]) -Greedy action tensor([-0.7793, 0.3316, -0.6104, -0.4062]) tensor([0.1499, 0.4551, 0.1774, 0.2176]) -Greedy action tensor([ 0.0975, -0.6534, 0.7684, -0.5739]) tensor([0.2539, 0.1198, 0.4966, 0.1297]) -Greedy action tensor([-1.1135, -0.8038, -0.5257, -0.5416]) tensor([0.1685, 0.2297, 0.3033, 0.2985]) -Greedy action tensor([-0.2454, -0.3698, 0.0961, -1.0518]) tensor([0.2676, 0.2363, 0.3766, 0.1195]) -Greedy action tensor([ 0.6050, -1.1984, -0.7585, 0.3112]) tensor([0.4617, 0.0761, 0.1181, 0.3442]) -Greedy action tensor([-0.4869, -0.4785, -0.1435, -0.0892]) tensor([0.2038, 0.2055, 0.2873, 0.3034]) -Greedy action tensor([-0.9298, -1.0736, 0.7266, 0.2350]) tensor([0.0970, 0.0840, 0.5082, 0.3108]) -Greedy action tensor([ 0.4282, -0.4358, -0.1857, -0.3501]) tensor([0.4129, 0.1740, 0.2235, 0.1896]) -Greedy action tensor([-0.8606, -0.7895, 0.3231, -0.3875]) tensor([0.1440, 0.1546, 0.4703, 0.2311]) -Greedy action tensor([-0.6647, 0.0128, 0.3368, -1.0405]) tensor([0.1568, 0.3087, 0.4268, 0.1077]) -Greedy action tensor([-0.3216, -0.5674, -0.2077, -1.2753]) tensor([0.3041, 0.2379, 0.3408, 0.1172]) -Greedy action tensor([0.4527, 0.4829, 0.7384, 0.4226]) tensor([0.2309, 0.2379, 0.3072, 0.2240]) -Greedy action tensor([-0.1352, 0.1419, -0.8750, -0.6667]) tensor([0.2955, 0.3898, 0.1410, 0.1737]) -Greedy action tensor([ 1.0088, -2.4544, 0.0826, 0.5325]) tensor([0.4882, 0.0153, 0.1933, 0.3032]) -Greedy action tensor([-0.1036, -1.2938, -0.2132, 0.9232]) tensor([0.2003, 0.0609, 0.1795, 0.5593]) -Greedy action tensor([ 0.7377, -0.0103, -0.3018, -0.8700]) tensor([0.4933, 0.2335, 0.1744, 0.0988]) -Greedy action tensor([ 0.0432, -0.8677, 0.3616, -0.1954]) tensor([0.2805, 0.1128, 0.3857, 0.2210]) -Greedy action tensor([ 0.3235, -0.9316, -0.1373, 0.5837]) tensor([0.3112, 0.0887, 0.1963, 0.4037]) -Greedy action tensor([ 1.9780, 0.1511, -0.6714, 0.3212]) tensor([0.7031, 0.1131, 0.0497, 0.1341]) -Greedy action tensor([ 0.7296, -0.3247, -0.1188, -0.3253]) tensor([0.4706, 0.1640, 0.2015, 0.1639]) -Greedy action tensor([-0.7397, -0.7040, -0.3838, 0.5778]) tensor([0.1389, 0.1440, 0.1983, 0.5188]) -Greedy action tensor([-0.0999, -0.3653, 0.5275, -1.0675]) tensor([0.2488, 0.1908, 0.4659, 0.0945]) -Greedy action tensor([ 0.1275, -1.2462, 1.0089, 0.1560]) tensor([0.2129, 0.0539, 0.5141, 0.2191]) -Greedy action tensor([-0.7510, -0.9752, 0.0620, -0.6141]) tensor([0.1923, 0.1537, 0.4335, 0.2205]) -Greedy action tensor([0.9745, 0.0364, 0.6200, 0.6068]) tensor([0.3590, 0.1405, 0.2519, 0.2486]) -Greedy action tensor([ 0.9823, -0.0152, -0.3508, 0.1544]) tensor([0.4832, 0.1782, 0.1274, 0.2111]) -Greedy action tensor([ 1.1319, -0.5819, -0.2253, 0.9318]) tensor([0.4432, 0.0799, 0.1141, 0.3628]) -Greedy action tensor([ 0.2887, -0.2296, 0.8484, -0.4614]) tensor([0.2619, 0.1560, 0.4584, 0.1237]) -Greedy action tensor([ 0.2795, -0.8927, 0.1852, -0.1291]) tensor([0.3467, 0.1074, 0.3155, 0.2304]) -Greedy action tensor([ 0.2202, 0.0115, -0.3420, 0.8078]) tensor([0.2392, 0.1941, 0.1363, 0.4304]) -Greedy action tensor([-0.2023, -1.5596, 0.0590, 0.1698]) tensor([0.2496, 0.0642, 0.3241, 0.3621]) -Greedy action tensor([-0.1461, -0.6772, 0.0266, -0.6328]) tensor([0.2949, 0.1734, 0.3505, 0.1813]) -Greedy action tensor([-0.1073, -0.9537, 1.5228, -0.6272]) tensor([0.1403, 0.0602, 0.7161, 0.0834]) -Greedy action tensor([ 1.0465, -0.7790, 0.5151, 0.1406]) tensor([0.4644, 0.0748, 0.2730, 0.1877]) -Greedy action tensor([ 1.1317, 0.3803, -0.1420, 0.0168]) tensor([0.4809, 0.2268, 0.1346, 0.1577]) -Greedy action tensor([ 0.7084, -2.1981, 0.4817, -0.0918]) tensor([0.4346, 0.0238, 0.3464, 0.1952]) -Greedy action tensor([ 0.6551, -0.5663, 0.1978, -0.2356]) tensor([0.4277, 0.1261, 0.2707, 0.1755]) -Greedy action tensor([ 0.0504, -0.3563, -0.4411, 0.2483]) tensor([0.2860, 0.1904, 0.1750, 0.3486]) -Greedy action tensor([-1.1160, -0.7194, 0.4898, -0.2638]) tensor([0.1019, 0.1515, 0.5077, 0.2389]) -Greedy action tensor([-0.3472, -0.0096, -0.4819, -0.3218]) tensor([0.2325, 0.3259, 0.2032, 0.2385]) -Greedy action tensor([ 1.1603, -0.0397, 0.9352, 0.1470]) tensor([0.4061, 0.1223, 0.3242, 0.1474]) -Greedy action tensor([ 0.3800, -1.9804, 0.1663, 0.2133]) tensor([0.3639, 0.0343, 0.2938, 0.3080]) -Greedy action tensor([-0.7853, -1.4207, 1.0972, -0.9316]) tensor([0.1116, 0.0591, 0.7329, 0.0964]) -Greedy action tensor([ 0.8699, -1.0343, -0.0842, 0.1219]) tensor([0.4982, 0.0742, 0.1919, 0.2358]) -Greedy action tensor([ 0.6687, -0.4380, -0.2790, 0.5150]) tensor([0.3882, 0.1284, 0.1505, 0.3329]) -Greedy action tensor([-0.0160, 0.8262, 0.9707, -0.3261]) tensor([0.1484, 0.3446, 0.3981, 0.1089]) -Greedy action tensor([ 0.2693, -0.3716, -0.1644, 0.7045]) tensor([0.2688, 0.1416, 0.1742, 0.4154]) -Greedy action tensor([-0.8130, -1.6687, -0.3797, -0.0725]) tensor([0.1975, 0.0839, 0.3046, 0.4141]) -Greedy action tensor([ 0.3087, -0.1973, 0.0867, -0.1331]) tensor([0.3282, 0.1979, 0.2629, 0.2110]) -Greedy action tensor([-1.3702, -0.3520, -1.1109, -0.1901]) tensor([0.1202, 0.3328, 0.1558, 0.3912]) -Greedy action tensor([ 0.4566, -1.6426, 0.0966, 0.4600]) tensor([0.3541, 0.0434, 0.2471, 0.3554]) -Greedy action tensor([ 0.3856, -1.4148, 0.5892, 0.9146]) tensor([0.2446, 0.0404, 0.2998, 0.4151]) -Greedy action tensor([ 0.6337, -0.9460, 2.1989, 0.4183]) tensor([0.1471, 0.0303, 0.7039, 0.1186]) -Greedy action tensor([ 0.8899, -0.4587, 1.0190, -0.4127]) tensor([0.3746, 0.0973, 0.4263, 0.1018]) -Greedy action tensor([-0.2589, -1.3575, -0.2569, 0.2990]) tensor([0.2450, 0.0817, 0.2454, 0.4279]) -Greedy action tensor([ 0.7113, -0.5568, -0.5437, 0.2498]) tensor([0.4552, 0.1281, 0.1298, 0.2869]) -Greedy action tensor([-0.6912, -0.7091, -0.4878, -0.1730]) tensor([0.2046, 0.2010, 0.2508, 0.3436]) -Greedy action tensor([-0.2552, -0.3332, 0.1186, 0.1860]) tensor([0.2027, 0.1875, 0.2946, 0.3152]) -Greedy action tensor([-0.3243, -0.6007, 0.5169, -0.5595]) tensor([0.2054, 0.1558, 0.4764, 0.1624]) -Greedy action tensor([-0.2947, -0.3156, 0.7601, -0.4117]) tensor([0.1742, 0.1706, 0.5002, 0.1550]) -Greedy action tensor([ 0.2979, -1.1203, 2.0800, -0.6470]) tensor([0.1320, 0.0320, 0.7847, 0.0513]) -Greedy action tensor([ 0.4440, -0.8480, -1.0339, -0.5737]) tensor([0.5364, 0.1474, 0.1224, 0.1939]) -Greedy action tensor([ 0.5891, -0.6355, 0.6807, 1.0175]) tensor([0.2548, 0.0749, 0.2792, 0.3911]) -Greedy action tensor([ 1.3743, 0.6200, -0.5228, 0.6285]) tensor([0.4774, 0.2245, 0.0716, 0.2265]) -Greedy action tensor([-0.0409, -0.5362, -0.6260, -0.1912]) tensor([0.3304, 0.2013, 0.1840, 0.2843]) -Greedy action tensor([ 0.2934, 0.2120, 0.0292, -0.1595]) tensor([0.3007, 0.2772, 0.2309, 0.1912]) -Greedy action tensor([-0.5502, 0.3545, 0.2974, -1.5003]) tensor([0.1615, 0.3991, 0.3770, 0.0625]) -Greedy action tensor([ 0.0954, -0.1364, -0.1269, 0.6560]) tensor([0.2301, 0.1825, 0.1843, 0.4031]) -Greedy action tensor([ 1.2730, 0.1076, -0.1355, -0.5662]) tensor([0.5830, 0.1818, 0.1426, 0.0927]) -Greedy action tensor([ 0.2929, -1.8639, -0.2135, 0.7594]) tensor([0.3019, 0.0349, 0.1819, 0.4813]) -Greedy action tensor([-0.3982, -0.7373, -1.0175, -0.1607]) tensor([0.2842, 0.2025, 0.1530, 0.3604]) -Greedy action tensor([ 5.6351e-01, -1.8009e+00, -1.2717e-03, -4.4239e-01]) tensor([0.4930, 0.0464, 0.2803, 0.1803]) -Greedy action tensor([-1.9399, -0.4378, 0.6631, -0.1781]) tensor([0.0403, 0.1810, 0.5441, 0.2346]) -Greedy action tensor([-1.6354, -0.2757, 0.4830, 0.0850]) tensor([0.0532, 0.2072, 0.4424, 0.2972]) -Greedy action tensor([-1.8760, -0.4291, 0.6722, -0.1215]) tensor([0.0420, 0.1785, 0.5368, 0.2427]) -Greedy action tensor([-1.4908, -0.5964, 0.4425, 0.0880]) tensor([0.0658, 0.1608, 0.4545, 0.3189]) -Greedy action tensor([-1.4056, 0.1500, 0.3254, -0.6595]) tensor([0.0741, 0.3511, 0.4185, 0.1563]) -Greedy action tensor([-0.1389, 1.1175, 0.0431, 0.1825]) tensor([0.1410, 0.4954, 0.1692, 0.1945]) -Greedy action tensor([-1.5177, -0.2329, 0.6164, 0.0204]) tensor([0.0564, 0.2040, 0.4768, 0.2628]) -Greedy action tensor([-1.1384, 0.7851, 0.1296, 0.2689]) tensor([0.0646, 0.4421, 0.2295, 0.2638]) -Greedy action tensor([-1.1837, 0.7426, 0.1489, 0.2121]) tensor([0.0637, 0.4374, 0.2416, 0.2573]) -Greedy action tensor([-1.9231, -0.4254, 0.6579, -0.1643]) tensor([0.0408, 0.1826, 0.5395, 0.2371]) -Greedy action tensor([-1.8636, -0.2760, 0.5985, -0.1386]) tensor([0.0430, 0.2106, 0.5048, 0.2416]) -Greedy action tensor([-1.7710, -0.4071, 0.6611, -0.0943]) tensor([0.0462, 0.1807, 0.5260, 0.2471]) -Greedy action tensor([-1.7108, -0.2312, 0.5664, -0.0585]) tensor([0.0491, 0.2157, 0.4789, 0.2563]) -Greedy action tensor([-1.8381, -0.3655, 0.6360, -0.1059]) tensor([0.0437, 0.1905, 0.5187, 0.2470]) -Greedy action tensor([-1.8471, -0.0985, 0.5616, -0.1119]) tensor([0.0425, 0.2442, 0.4724, 0.2409]) -Greedy action tensor([-1.9021, -0.4190, 0.6560, -0.1632]) tensor([0.0417, 0.1835, 0.5378, 0.2370]) -Greedy action tensor([-1.7588, -0.2781, 0.5838, -0.0511]) tensor([0.0469, 0.2062, 0.4882, 0.2587]) -Greedy action tensor([-1.9484, -0.4554, 0.6723, -0.1821]) tensor([0.0399, 0.1777, 0.5488, 0.2336]) -Greedy action tensor([-1.4783, -0.6213, 0.4461, 0.0558]) tensor([0.0674, 0.1587, 0.4615, 0.3124]) -Greedy action tensor([-1.5243, -0.5561, 0.4313, 0.0419]) tensor([0.0646, 0.1700, 0.4563, 0.3091]) -Greedy action tensor([-1.8711, -0.3896, 0.6246, -0.1378]) tensor([0.0431, 0.1897, 0.5231, 0.2440]) -Greedy action tensor([-1.7996, -0.3841, 0.5757, -0.1006]) tensor([0.0469, 0.1930, 0.5039, 0.2562]) -Greedy action tensor([-1.5591, -0.5212, 0.4562, 0.0146]) tensor([0.0619, 0.1748, 0.4646, 0.2987]) -Greedy action tensor([-1.9350, -0.4287, 0.6615, -0.1709]) tensor([0.0404, 0.1821, 0.5418, 0.2357]) -Greedy action tensor([-1.8822, -0.0072, 0.5700, -0.1428]) tensor([0.0403, 0.2626, 0.4678, 0.2293]) -Greedy action tensor([-1.6580, -0.4935, 0.5969, 0.2304]) tensor([0.0491, 0.1575, 0.4686, 0.3248]) -Greedy action tensor([-1.9114, -0.3815, 0.6494, -0.1406]) tensor([0.0409, 0.1889, 0.5297, 0.2404]) -Greedy action tensor([-1.9329, -0.4442, 0.6653, -0.1757]) tensor([0.0405, 0.1796, 0.5448, 0.2350]) -Greedy action tensor([-1.5863, -0.0682, 0.5283, 0.2071]) tensor([0.0504, 0.2298, 0.4173, 0.3026]) -Greedy action tensor([-1.9220, -0.4336, 0.6550, -0.1667]) tensor([0.0410, 0.1817, 0.5399, 0.2374]) -Greedy action tensor([-1.1252, -0.2042, 0.4372, 0.0619]) tensor([0.0865, 0.2173, 0.4127, 0.2835]) -Greedy action tensor([-1.8652, -0.4126, 0.6299, -0.1382]) tensor([0.0434, 0.1857, 0.5266, 0.2443]) -Greedy action tensor([-1.6793, -0.1860, 0.4837, -0.0075]) tensor([0.0514, 0.2286, 0.4467, 0.2733]) -Greedy action tensor([-1.8854, -0.4210, 0.6350, -0.1480]) tensor([0.0427, 0.1845, 0.5304, 0.2424]) -Greedy action tensor([-1.3373, -0.0255, 0.2876, 0.2025]) tensor([0.0692, 0.2569, 0.3513, 0.3226]) -Greedy action tensor([-1.6640, -0.3367, 0.6687, 0.1491]) tensor([0.0472, 0.1778, 0.4860, 0.2890]) -Greedy action tensor([-0.7542, -0.2430, 0.2247, -0.0702]) tensor([0.1368, 0.2281, 0.3641, 0.2711]) -Greedy action tensor([-1.8937, -0.4430, 0.6434, -0.1565]) tensor([0.0424, 0.1809, 0.5359, 0.2408]) -Greedy action tensor([-1.8759, -0.4491, 0.6340, -0.1508]) tensor([0.0433, 0.1805, 0.5330, 0.2432]) -Greedy action tensor([-1.5949, -0.6766, 0.3306, -0.0880]) tensor([0.0672, 0.1684, 0.4610, 0.3034]) -Greedy action tensor([-1.9087, -0.4034, 0.6487, -0.1573]) tensor([0.0414, 0.1864, 0.5338, 0.2384]) -Greedy action tensor([-1.8637, -0.4289, 0.6257, -0.1417]) tensor([0.0438, 0.1838, 0.5276, 0.2449]) -Greedy action tensor([-1.8445, -0.3554, 0.6179, -0.1068]) tensor([0.0438, 0.1940, 0.5135, 0.2488]) -Greedy action tensor([-1.7781, -0.4749, 0.5804, -0.1007]) tensor([0.0485, 0.1786, 0.5132, 0.2597]) -Greedy action tensor([-1.5861, 0.1294, 0.4010, -0.0079]) tensor([0.0535, 0.2973, 0.3901, 0.2592]) -Greedy action tensor([-1.8014, -0.4725, 0.5955, -0.1361]) tensor([0.0475, 0.1794, 0.5220, 0.2511]) -Greedy action tensor([-1.9209, -0.4244, 0.6568, -0.1643]) tensor([0.0409, 0.1828, 0.5391, 0.2371]) -Greedy action tensor([-1.9274, -0.4296, 0.6518, -0.1703]) tensor([0.0409, 0.1829, 0.5392, 0.2370]) -Greedy action tensor([-0.4683, 0.9321, 0.0657, 0.0887]) tensor([0.1175, 0.4768, 0.2005, 0.2052]) -Greedy action tensor([-0.8614, 0.1105, 0.6763, 0.9572]) tensor([0.0692, 0.1828, 0.3219, 0.4262]) -Greedy action tensor([-1.8147, -0.2130, 0.6009, -0.0990]) tensor([0.0440, 0.2184, 0.4928, 0.2448]) -Greedy action tensor([-1.4085, -0.0408, 0.4035, -0.0887]) tensor([0.0676, 0.2654, 0.4139, 0.2530]) -Greedy action tensor([-1.1693, 0.2541, 0.2739, -0.0499]) tensor([0.0803, 0.3335, 0.3401, 0.2460]) -Greedy action tensor([-1.8853, -0.3223, 0.6265, -0.1510]) tensor([0.0421, 0.2008, 0.5187, 0.2384]) -Greedy action tensor([-0.9580, 0.3653, 0.5397, 1.0876]) tensor([0.0590, 0.2214, 0.2636, 0.4560]) -Greedy action tensor([-1.9192, -0.4124, 0.6508, -0.1675]) tensor([0.0411, 0.1854, 0.5367, 0.2368]) -Greedy action tensor([-1.9156, -0.4231, 0.6546, -0.1653]) tensor([0.0412, 0.1833, 0.5384, 0.2371]) -Greedy action tensor([-1.8310, -0.2211, 0.6006, -0.1124]) tensor([0.0436, 0.2179, 0.4956, 0.2429]) -Greedy action tensor([-1.9109, -0.4528, 0.6515, -0.1665]) tensor([0.0417, 0.1792, 0.5406, 0.2386]) -Greedy action tensor([-1.4535, -0.3529, 0.4505, -0.0993]) tensor([0.0685, 0.2060, 0.4600, 0.2655]) -Greedy action tensor([-1.9452, -0.4548, 0.6741, -0.1788]) tensor([0.0400, 0.1775, 0.5487, 0.2338]) -Greedy action tensor([-1.5882, -0.1556, 0.4651, -0.0698]) tensor([0.0570, 0.2388, 0.4441, 0.2601]) -Greedy action tensor([-1.3917, -0.5062, 0.4149, 0.3365]) tensor([0.0660, 0.1601, 0.4021, 0.3718]) -Greedy action tensor([-0.7772, -0.5190, 0.2136, 0.0589]) tensor([0.1371, 0.1775, 0.3692, 0.3163]) -Greedy action tensor([-1.9136, -0.3278, 0.6319, -0.1538]) tensor([0.0409, 0.1998, 0.5216, 0.2377]) -Greedy action tensor([-1.9031, -0.4519, 0.6848, -0.1243]) tensor([0.0408, 0.1743, 0.5431, 0.2418]) -Greedy action tensor([-1.9182, -0.3908, 0.6516, -0.1575]) tensor([0.0408, 0.1881, 0.5335, 0.2375]) -Greedy action tensor([-1.6077, -0.0821, 0.4570, -0.0787]) tensor([0.0553, 0.2541, 0.4357, 0.2550]) -Greedy action tensor([-1.9264, -0.4748, 0.7783, -0.0474]) tensor([0.0374, 0.1595, 0.5585, 0.2446]) -Greedy action tensor([-0.8455, 0.5295, 0.1712, -0.1014]) tensor([0.1018, 0.4026, 0.2814, 0.2142]) -Greedy action tensor([-1.7916, -0.2616, 0.5638, -0.0988]) tensor([0.0463, 0.2139, 0.4882, 0.2517]) -Greedy action tensor([-1.8731, -0.4370, 0.6234, -0.1594]) tensor([0.0437, 0.1836, 0.5303, 0.2424]) -Greedy action tensor([-1.8887, -0.3865, 0.6376, -0.1409]) tensor([0.0421, 0.1892, 0.5268, 0.2419]) -Greedy action tensor([-1.0684, -0.3582, 0.1356, -0.1180]) tensor([0.1117, 0.2272, 0.3723, 0.2889]) -Greedy action tensor([-1.7023, -0.4791, 0.5547, -0.0562]) tensor([0.0522, 0.1775, 0.4992, 0.2710]) -Greedy action tensor([-1.0327, 0.0166, 0.3976, -0.4451]) tensor([0.1017, 0.2903, 0.4250, 0.1830]) -Greedy action tensor([-1.4199, -0.3367, 0.4001, 0.0511]) tensor([0.0691, 0.2040, 0.4263, 0.3007]) -Greedy action tensor([-1.6956, -0.1773, 0.4988, -0.0576]) tensor([0.0508, 0.2319, 0.4559, 0.2614]) -Greedy action tensor([-1.7943, -0.3484, 0.5504, -0.1016]) tensor([0.0474, 0.2011, 0.4941, 0.2574]) -Greedy action tensor([-1.8426, -0.4648, 0.6221, -0.1337]) tensor([0.0449, 0.1783, 0.5286, 0.2482]) -Greedy action tensor([-1.7973, -0.4589, 0.6027, -0.1035]) tensor([0.0470, 0.1792, 0.5181, 0.2557]) -Greedy action tensor([ 0.6212, 0.0246, -0.0271, 0.1123]) tensor([0.3739, 0.2059, 0.1955, 0.2248]) -Greedy action tensor([ 0.2916, -0.0870, -0.1136, -0.3321]) tensor([0.3463, 0.2372, 0.2309, 0.1856]) -Greedy action tensor([ 1.0511, 0.3609, -0.0931, -0.3427]) tensor([0.4835, 0.2425, 0.1540, 0.1200]) -Greedy action tensor([ 0.6337, -0.1993, 0.0308, -0.4967]) tensor([0.4339, 0.1886, 0.2374, 0.1401]) -Greedy action tensor([ 0.5261, -0.3506, -0.1538, 0.0025]) tensor([0.3976, 0.1655, 0.2014, 0.2355]) -Greedy action tensor([ 1.3089, -0.8919, -0.0801, -0.7927]) tensor([0.6746, 0.0747, 0.1682, 0.0825]) -Greedy action tensor([ 0.5909, -0.3689, -0.0644, -0.0237]) tensor([0.4093, 0.1568, 0.2126, 0.2214]) -Greedy action tensor([ 1.0586, -0.6252, 0.0200, -0.4448]) tensor([0.5675, 0.1054, 0.2009, 0.1262]) -Greedy action tensor([ 0.9421, -0.3155, -0.0472, -0.1253]) tensor([0.5000, 0.1422, 0.1859, 0.1719]) -Greedy action tensor([ 1.0929, -0.5077, -0.0379, -0.2278]) tensor([0.5582, 0.1126, 0.1802, 0.1490]) -Greedy action tensor([ 0.9768, -0.9472, 0.1296, -0.4581]) tensor([0.5516, 0.0806, 0.2364, 0.1314]) -Greedy action tensor([ 0.8364, -0.5857, 0.0622, -0.6391]) tensor([0.5179, 0.1249, 0.2388, 0.1184]) -Greedy action tensor([ 1.0113, -0.7901, 0.0088, -0.5167]) tensor([0.5718, 0.0944, 0.2098, 0.1241]) -Greedy action tensor([ 0.5390, -0.2260, -0.1330, 0.0112]) tensor([0.3897, 0.1814, 0.1990, 0.2299]) -Greedy action tensor([ 0.8347, -0.5169, -0.1218, -0.1600]) tensor([0.4968, 0.1286, 0.1909, 0.1837]) -Greedy action tensor([ 0.9929, -0.4783, -0.0233, -0.1844]) tensor([0.5264, 0.1209, 0.1905, 0.1622]) -Greedy action tensor([ 0.7061, -0.1616, -0.1633, -0.0190]) tensor([0.4304, 0.1807, 0.1804, 0.2084]) -Greedy action tensor([ 0.4501, -0.2050, -0.0355, -0.1562]) tensor([0.3731, 0.1938, 0.2296, 0.2035]) -Greedy action tensor([ 1.1930, -1.0160, 0.0233, -0.6969]) tensor([0.6364, 0.0699, 0.1976, 0.0962]) -Greedy action tensor([0.4717, 0.0300, 0.0370, 0.0692]) tensor([0.3380, 0.2173, 0.2188, 0.2260]) -Greedy action tensor([ 0.9952, -0.5819, -0.1876, -0.4462]) tensor([0.5716, 0.1181, 0.1751, 0.1352]) -Greedy action tensor([ 0.6351, -0.5501, -0.0544, -0.3496]) tensor([0.4585, 0.1401, 0.2301, 0.1713]) -Greedy action tensor([ 0.8219, -0.1923, 0.1364, -0.1208]) tensor([0.4432, 0.1608, 0.2233, 0.1727]) -Greedy action tensor([ 0.9851, -0.4269, 0.0815, -0.3576]) tensor([0.5236, 0.1276, 0.2121, 0.1367]) -Greedy action tensor([ 0.5747, -0.0996, -0.0964, -0.3227]) tensor([0.4118, 0.2098, 0.2105, 0.1679]) -Greedy action tensor([ 1.0042, -0.8165, 0.0389, -0.4203]) tensor([0.5607, 0.0908, 0.2136, 0.1349]) -Greedy action tensor([ 0.6542, -0.4850, -0.1179, -0.2491]) tensor([0.4572, 0.1463, 0.2112, 0.1853]) -Greedy action tensor([ 0.5108, -0.5037, -0.0495, -0.2815]) tensor([0.4190, 0.1519, 0.2393, 0.1897]) -Greedy action tensor([ 1.0294, -0.5008, 0.0303, -0.3266]) tensor([0.5428, 0.1175, 0.1998, 0.1399]) -Greedy action tensor([ 0.6453, 0.3813, -0.0725, -0.1789]) tensor([0.3711, 0.2850, 0.1811, 0.1628]) -Greedy action tensor([ 0.5274, -0.1844, 0.0845, -0.2736]) tensor([0.3873, 0.1901, 0.2487, 0.1739]) -Greedy action tensor([ 0.5550, -0.0894, -0.1993, -0.3347]) tensor([0.4156, 0.2182, 0.1955, 0.1707]) -Greedy action tensor([ 0.1352, 0.0473, -0.1281, -0.0946]) tensor([0.2874, 0.2633, 0.2209, 0.2284]) -Greedy action tensor([ 0.7269, -0.7075, 0.1498, -0.2638]) tensor([0.4606, 0.1097, 0.2586, 0.1710]) -Greedy action tensor([ 0.7267, -0.3721, 0.2078, -0.5110]) tensor([0.4507, 0.1502, 0.2683, 0.1307]) -Greedy action tensor([ 0.6887, -0.4326, -0.1158, -0.3116]) tensor([0.4671, 0.1522, 0.2089, 0.1718]) -Greedy action tensor([ 0.5233, -0.3414, -0.1442, -0.1053]) tensor([0.4053, 0.1707, 0.2079, 0.2161]) -Greedy action tensor([ 0.5978, -0.4049, -0.0886, -0.3013]) tensor([0.4391, 0.1611, 0.2210, 0.1787]) -Greedy action tensor([ 0.8596, -0.5310, 0.0425, -0.5609]) tensor([0.5175, 0.1288, 0.2286, 0.1250]) -Greedy action tensor([ 0.7811, -0.6627, 0.0673, -0.3770]) tensor([0.4902, 0.1157, 0.2401, 0.1540]) -Greedy action tensor([ 0.5704, -0.2682, -0.0392, -0.3857]) tensor([0.4237, 0.1832, 0.2303, 0.1629]) -Greedy action tensor([ 0.8610, -0.5909, 0.1006, -0.5098]) tensor([0.5114, 0.1197, 0.2391, 0.1298]) -Greedy action tensor([ 0.3265, -0.4238, -0.1489, -0.2401]) tensor([0.3758, 0.1774, 0.2336, 0.2132]) -Greedy action tensor([ 0.8972, -0.7680, -0.1539, -0.3822]) tensor([0.5504, 0.1041, 0.1924, 0.1531]) -Greedy action tensor([ 0.6655, -0.7243, -0.1057, -0.4324]) tensor([0.4890, 0.1218, 0.2261, 0.1631]) -Greedy action tensor([ 0.8542, -0.2558, -0.0209, 0.0083]) tensor([0.4596, 0.1515, 0.1916, 0.1973]) -Greedy action tensor([ 1.0271, -0.6725, -0.0446, -0.3516]) tensor([0.5627, 0.1028, 0.1927, 0.1417]) -Greedy action tensor([ 0.6314, 0.0791, -0.1050, 0.0104]) tensor([0.3858, 0.2221, 0.1848, 0.2073]) -Greedy action tensor([ 0.8222, -0.7896, 0.1694, -0.4063]) tensor([0.4968, 0.0991, 0.2586, 0.1454]) -Greedy action tensor([ 0.6356, -0.2547, -0.1448, -0.0388]) tensor([0.4205, 0.1726, 0.1927, 0.2142]) -Greedy action tensor([ 0.5380, -0.6253, -0.1053, -0.4073]) tensor([0.4491, 0.1403, 0.2360, 0.1745]) -Greedy action tensor([ 0.5339, -0.6768, -0.1151, -0.2961]) tensor([0.4431, 0.1321, 0.2316, 0.1932]) -Greedy action tensor([ 0.8026, -0.5010, -0.0540, -0.1382]) tensor([0.4793, 0.1302, 0.2035, 0.1871]) -Greedy action tensor([ 0.8681, -0.5936, 0.1223, -0.2664]) tensor([0.4931, 0.1143, 0.2339, 0.1586]) -Greedy action tensor([ 0.5947, -0.1734, -0.0775, -0.0970]) tensor([0.4040, 0.1874, 0.2063, 0.2023]) -Greedy action tensor([ 0.5015, -0.1599, -0.0508, -0.0989]) tensor([0.3787, 0.1955, 0.2180, 0.2078]) -Greedy action tensor([ 0.7071, -0.4231, -0.0905, -0.2410]) tensor([0.4628, 0.1495, 0.2084, 0.1793]) -Greedy action tensor([ 0.4954, -0.0537, -0.0666, -0.4756]) tensor([0.3958, 0.2286, 0.2257, 0.1499]) -Greedy action tensor([ 0.9371, -0.9865, 0.0338, -0.5157]) tensor([0.5602, 0.0818, 0.2270, 0.1310]) -Greedy action tensor([ 0.6700, -0.4951, 0.0117, -0.1536]) tensor([0.4408, 0.1375, 0.2282, 0.1935]) -Greedy action tensor([ 0.7391, -0.4519, -0.0720, -0.1255]) tensor([0.4609, 0.1401, 0.2048, 0.1942]) -Greedy action tensor([ 0.6546, -0.4288, 0.0042, -0.0092]) tensor([0.4210, 0.1425, 0.2197, 0.2168]) -Greedy action tensor([ 0.5972, -0.3207, -0.0708, -0.0625]) tensor([0.4117, 0.1644, 0.2111, 0.2128]) -Greedy action tensor([ 0.3219, -0.0837, 0.1754, -0.5982]) tensor([0.3414, 0.2276, 0.2949, 0.1361]) -Greedy action tensor([ 0.5822, -0.2683, -0.1637, -0.0887]) tensor([0.4145, 0.1771, 0.1966, 0.2119]) -Greedy action tensor([ 1.3981, -0.7115, -0.1763, -0.5428]) tensor([0.6793, 0.0824, 0.1407, 0.0975]) -Greedy action tensor([ 0.9227, -0.5465, -0.0543, -0.4047]) tensor([0.5343, 0.1229, 0.2011, 0.1417]) -Greedy action tensor([ 0.6514, -0.4001, -0.1406, -0.2418]) tensor([0.4521, 0.1580, 0.2048, 0.1851]) -Greedy action tensor([ 1.0353, -0.7159, 0.1820, -0.4670]) tensor([0.5488, 0.0953, 0.2338, 0.1222]) -Greedy action tensor([ 0.4612, -0.0611, -0.0349, -0.0500]) tensor([0.3569, 0.2117, 0.2173, 0.2141]) -Greedy action tensor([ 0.8250, -0.6813, 0.0269, -0.5349]) tensor([0.5185, 0.1150, 0.2334, 0.1331]) -Greedy action tensor([ 0.6910, -0.6187, -0.0809, -0.3795]) tensor([0.4820, 0.1301, 0.2227, 0.1652]) -Greedy action tensor([ 1.1022, -0.5708, -0.1483, -0.3230]) tensor([0.5833, 0.1095, 0.1670, 0.1403]) -Greedy action tensor([ 0.6618, -0.1343, -0.0236, -0.2317]) tensor([0.4230, 0.1908, 0.2131, 0.1731]) -Greedy action tensor([ 0.7251, -0.6146, -0.1312, -0.1966]) tensor([0.4797, 0.1257, 0.2038, 0.1909]) -Greedy action tensor([ 0.8170, -0.7620, 0.0738, -0.3059]) tensor([0.4982, 0.1027, 0.2369, 0.1621]) -Greedy action tensor([ 0.2945, 0.0606, -0.0630, -0.1491]) tensor([0.3192, 0.2526, 0.2233, 0.2049]) -Greedy action tensor([ 1.0396, -0.6625, -0.1136, -0.4898]) tensor([0.5832, 0.1063, 0.1841, 0.1264]) -Greedy action tensor([ 0.7633, -0.4297, -0.0379, -0.6512]) tensor([0.5012, 0.1520, 0.2249, 0.1218]) -Greedy action tensor([ 0.8389, -0.6638, -0.0855, -0.2269]) tensor([0.5092, 0.1133, 0.2020, 0.1754]) -Greedy action tensor([ 1.1201, -0.8115, -0.0953, -0.4006]) tensor([0.6024, 0.0873, 0.1787, 0.1317]) -Greedy action tensor([ 2.1977, -0.1831, -0.1897, 0.2777]) tensor([0.7513, 0.0695, 0.0690, 0.1102]) -Greedy action tensor([ 1.9563, -0.8141, -0.1829, 0.1905]) tensor([0.7400, 0.0463, 0.0871, 0.1266]) -Greedy action tensor([ 1.2475, -0.1177, -0.0360, 0.5665]) tensor([0.4905, 0.1253, 0.1359, 0.2483]) -Greedy action tensor([ 1.9362, -0.8672, -0.0831, 0.4964]) tensor([0.6992, 0.0424, 0.0928, 0.1657]) -Greedy action tensor([ 1.5940, -0.7106, -0.4742, 0.2721]) tensor([0.6699, 0.0669, 0.0847, 0.1786]) -Greedy action tensor([ 1.4385, -0.4623, -0.6357, 0.4861]) tensor([0.6021, 0.0900, 0.0757, 0.2323]) -Greedy action tensor([ 1.2606, -0.0340, -0.4145, -0.1233]) tensor([0.5842, 0.1601, 0.1094, 0.1464]) -Greedy action tensor([ 1.4704, -0.3903, -0.2334, 0.2763]) tensor([0.6096, 0.0948, 0.1109, 0.1847]) -Greedy action tensor([ 2.0205, -0.1563, -1.1067, 0.7596]) tensor([0.6941, 0.0787, 0.0304, 0.1967]) -Greedy action tensor([ 1.3722, -0.3848, -0.7120, 0.2469]) tensor([0.6167, 0.1064, 0.0767, 0.2002]) -Greedy action tensor([ 1.3379, -0.4247, -0.4788, 0.5149]) tensor([0.5639, 0.0968, 0.0917, 0.2476]) -Greedy action tensor([ 1.6612, 0.0240, -0.4015, 0.3996]) tensor([0.6231, 0.1212, 0.0792, 0.1765]) -Greedy action tensor([ 1.9639, 0.5327, -0.2390, 0.1594]) tensor([0.6605, 0.1579, 0.0730, 0.1087]) -Greedy action tensor([ 1.5298, -0.5169, -0.5808, 0.2991]) tensor([0.6483, 0.0837, 0.0786, 0.1894]) -Greedy action tensor([ 1.3062, -0.2795, -0.6208, 0.4543]) tensor([0.5628, 0.1153, 0.0819, 0.2401]) -Greedy action tensor([ 2.2584, -1.0607, -0.5947, 1.0445]) tensor([0.7190, 0.0260, 0.0415, 0.2136]) -Greedy action tensor([ 2.1036, -0.4521, -0.2374, 0.3344]) tensor([0.7439, 0.0578, 0.0716, 0.1268]) -Greedy action tensor([ 1.3394, -0.5941, -0.2307, 0.4709]) tensor([0.5642, 0.0816, 0.1174, 0.2368]) -Greedy action tensor([ 1.6135, -0.1383, -0.3433, 0.4520]) tensor([0.6143, 0.1066, 0.0868, 0.1923]) -Greedy action tensor([ 1.7539, -0.3870, -0.4683, 0.2190]) tensor([0.6938, 0.0815, 0.0752, 0.1495]) -Greedy action tensor([ 1.1633, -0.5169, -0.4763, 0.2368]) tensor([0.5630, 0.1049, 0.1092, 0.2229]) -Greedy action tensor([ 1.3378, -0.2542, -0.6596, 0.4039]) tensor([0.5773, 0.1175, 0.0783, 0.2269]) -Greedy action tensor([ 1.7333, -0.2103, -0.4169, 0.4332]) tensor([0.6527, 0.0935, 0.0760, 0.1779]) -Greedy action tensor([ 1.0595, -0.2126, -0.0407, 0.4682]) tensor([0.4615, 0.1294, 0.1536, 0.2555]) -Greedy action tensor([ 1.5028, -0.1574, -0.4589, 0.5094]) tensor([0.5879, 0.1118, 0.0827, 0.2177]) -Greedy action tensor([ 1.3826, 0.0725, -0.3481, -0.3085]) tensor([0.6130, 0.1654, 0.1086, 0.1130]) -Greedy action tensor([ 1.8167, -1.0470, -0.2193, 0.6568]) tensor([0.6662, 0.0380, 0.0870, 0.2089]) -Greedy action tensor([ 1.5466, -0.3463, -0.4358, 0.3213]) tensor([0.6321, 0.0952, 0.0871, 0.1856]) -Greedy action tensor([ 1.7965, -0.8240, -0.4159, 0.5709]) tensor([0.6776, 0.0493, 0.0742, 0.1989]) -Greedy action tensor([ 1.6601, -0.0647, -0.4755, 0.5871]) tensor([0.6104, 0.1088, 0.0721, 0.2087]) -Greedy action tensor([ 1.9094, -0.4361, -0.6289, 0.9119]) tensor([0.6478, 0.0621, 0.0512, 0.2389]) -Greedy action tensor([ 1.5121, -0.8391, -0.3299, 0.3969]) tensor([0.6323, 0.0602, 0.1002, 0.2073]) -Greedy action tensor([ 1.2197, -0.1004, -0.3799, 0.6510]) tensor([0.4913, 0.1312, 0.0992, 0.2782]) -Greedy action tensor([ 1.9453, -0.8042, -0.4582, 0.5540]) tensor([0.7127, 0.0456, 0.0644, 0.1773]) -Greedy action tensor([ 2.2151, -1.3169, -0.2193, 0.6195]) tensor([0.7578, 0.0222, 0.0664, 0.1537]) -Greedy action tensor([ 1.1849, -0.2027, -0.1282, -0.8239]) tensor([0.6050, 0.1511, 0.1628, 0.0812]) -Greedy action tensor([ 1.6103, -0.5402, -0.6175, 0.1239]) tensor([0.6895, 0.0803, 0.0743, 0.1559]) -Greedy action tensor([ 2.1100, -0.0595, -0.7020, 0.3570]) tensor([0.7421, 0.0848, 0.0446, 0.1286]) -Greedy action tensor([ 1.4949, -0.5796, -0.3295, 0.3245]) tensor([0.6261, 0.0787, 0.1010, 0.1942]) -Greedy action tensor([ 1.4807, -0.7910, -0.4207, 0.5197]) tensor([0.6116, 0.0631, 0.0914, 0.2339]) -Greedy action tensor([ 1.2072, -0.1405, -0.6404, 0.3977]) tensor([0.5369, 0.1395, 0.0846, 0.2390]) -Greedy action tensor([ 1.6896, -0.4720, -0.4472, 0.6667]) tensor([0.6279, 0.0723, 0.0741, 0.2257]) -Greedy action tensor([ 2.0091, -1.4352, -0.1978, 1.0151]) tensor([0.6613, 0.0211, 0.0728, 0.2448]) -Greedy action tensor([ 2.0495, -0.9059, -0.7219, 0.2208]) tensor([0.7842, 0.0408, 0.0491, 0.1260]) -Greedy action tensor([ 1.7930, -0.4894, -0.4779, 0.8623]) tensor([0.6252, 0.0638, 0.0645, 0.2465]) -Greedy action tensor([ 1.2150, -0.5070, -0.7434, 0.1305]) tensor([0.6032, 0.1078, 0.0851, 0.2039]) -Greedy action tensor([ 2.0656, -0.7196, -0.4999, 1.0561]) tensor([0.6653, 0.0411, 0.0512, 0.2424]) -Greedy action tensor([ 1.6521, -0.5106, -0.6458, 0.0017]) tensor([0.7105, 0.0817, 0.0714, 0.1364]) -Greedy action tensor([ 2.1354, 0.2765, -0.2145, 0.3000]) tensor([0.7088, 0.1105, 0.0676, 0.1131]) -Greedy action tensor([ 1.8906, 0.0468, -0.6607, 0.0980]) tensor([0.7129, 0.1128, 0.0556, 0.1187]) -Greedy action tensor([ 1.1999, -0.2518, -0.7824, 0.1191]) tensor([0.5844, 0.1368, 0.0805, 0.1983]) -Greedy action tensor([ 1.0547, -0.1639, -1.0820, 0.3162]) tensor([0.5287, 0.1563, 0.0624, 0.2526]) -Greedy action tensor([ 1.9883, -0.6257, -0.4877, 0.7312]) tensor([0.6936, 0.0508, 0.0583, 0.1973]) -Greedy action tensor([ 1.0293, -0.5727, -0.2389, -0.0741]) tensor([0.5511, 0.1110, 0.1550, 0.1828]) -Greedy action tensor([ 1.8625, -0.6771, -0.0834, 0.5350]) tensor([0.6725, 0.0531, 0.0961, 0.1783]) -Greedy action tensor([ 1.3547, -0.5057, -0.6526, 0.5051]) tensor([0.5822, 0.0906, 0.0782, 0.2489]) -Greedy action tensor([ 1.2646, -0.1765, -0.0409, 0.3367]) tensor([0.5255, 0.1244, 0.1424, 0.2078]) -Greedy action tensor([ 0.9035, -0.3017, -0.3568, 0.3610]) tensor([0.4620, 0.1384, 0.1310, 0.2685]) -Greedy action tensor([ 1.6566, -0.4189, -0.7768, 0.3959]) tensor([0.6681, 0.0839, 0.0586, 0.1894]) -Greedy action tensor([ 2.1407, -1.1768, 0.2410, 0.9016]) tensor([0.6777, 0.0246, 0.1014, 0.1963]) -Greedy action tensor([ 2.3824, -1.0894, -0.3043, 0.5985]) tensor([0.7892, 0.0245, 0.0537, 0.1326]) -Greedy action tensor([ 1.5585, -0.3555, -0.7478, 0.3536]) tensor([0.6465, 0.0954, 0.0644, 0.1938]) -Greedy action tensor([ 1.6153, -0.4736, -0.6126, 0.9824]) tensor([0.5673, 0.0702, 0.0611, 0.3013]) -Greedy action tensor([ 1.8107, -0.8201, -0.3134, 0.2528]) tensor([0.7132, 0.0514, 0.0853, 0.1502]) -Greedy action tensor([ 1.3960, -0.4238, -0.1817, 0.5161]) tensor([0.5607, 0.0909, 0.1158, 0.2326]) -Greedy action tensor([ 2.0585, -1.3200, -0.2767, 0.1879]) tensor([0.7783, 0.0265, 0.0753, 0.1199]) -Greedy action tensor([ 1.4532, -0.2179, -0.5750, 0.4147]) tensor([0.5975, 0.1124, 0.0786, 0.2115]) -Greedy action tensor([ 1.3740, -0.1110, -1.0158, 0.4388]) tensor([0.5846, 0.1324, 0.0536, 0.2294]) -Greedy action tensor([ 1.4221, -0.5977, -0.4868, 0.7707]) tensor([0.5549, 0.0736, 0.0823, 0.2893]) -Greedy action tensor([ 0.6443, -0.4891, -0.0280, 0.4022]) tensor([0.3820, 0.1230, 0.1950, 0.2999]) -Greedy action tensor([ 1.3397, -0.1484, -0.6604, 0.3862]) tensor([0.5726, 0.1293, 0.0775, 0.2207]) -Greedy action tensor([ 1.5808, -0.1776, -0.4144, 0.5499]) tensor([0.6006, 0.1035, 0.0817, 0.2142]) -Greedy action tensor([ 1.3866, -0.5226, -0.1056, 0.1981]) tensor([0.5961, 0.0883, 0.1340, 0.1816]) -Greedy action tensor([ 1.4059, -0.4974, -0.0407, 0.3418]) tensor([0.5782, 0.0862, 0.1361, 0.1995]) -Greedy action tensor([ 1.3595, -0.2688, -0.3885, 0.2635]) tensor([0.5867, 0.1151, 0.1021, 0.1961]) -Greedy action tensor([ 1.7269, -0.6721, -0.4931, 0.2499]) tensor([0.7004, 0.0636, 0.0761, 0.1599]) -Greedy action tensor([ 1.7604, -0.9151, -0.3450, 0.2914]) tensor([0.7038, 0.0485, 0.0857, 0.1620]) -Greedy action tensor([ 1.2134, -0.1695, -0.8689, 0.4764]) tensor([0.5394, 0.1353, 0.0672, 0.2581]) -Greedy action tensor([ 1.2390, -0.5913, -0.3255, 0.5305]) tensor([0.5371, 0.0861, 0.1124, 0.2644]) -Greedy action tensor([ 1.2631, -0.2094, -0.5296, 0.2698]) tensor([0.5662, 0.1299, 0.0943, 0.2097]) -Greedy action tensor([ 1.1591, -0.5306, -0.1162, 0.0446]) tensor([0.5580, 0.1030, 0.1559, 0.1831]) -Greedy action tensor([ 1.0092, -0.5171, 0.4721, 0.2477]) tensor([0.4408, 0.0958, 0.2576, 0.2058]) -Greedy action tensor([-0.9407, 0.0441, -1.5151, 0.2819]) tensor([0.1310, 0.3506, 0.0737, 0.4447]) -Greedy action tensor([ 0.3217, -0.8503, 1.0370, 0.6825]) tensor([0.2088, 0.0647, 0.4270, 0.2995]) -Greedy action tensor([ 0.3170, -0.4081, 0.2649, -0.2332]) tensor([0.3322, 0.1609, 0.3153, 0.1916]) -Greedy action tensor([-0.1271, -0.0508, -0.5316, 0.0115]) tensor([0.2567, 0.2771, 0.1713, 0.2949]) -Greedy action tensor([ 0.2224, -1.1449, 0.3274, 0.4511]) tensor([0.2761, 0.0703, 0.3066, 0.3470]) -Greedy action tensor([-0.3823, 0.5533, 0.3408, -0.2312]) tensor([0.1477, 0.3763, 0.3043, 0.1717]) -Greedy action tensor([-1.1736, 0.7775, -0.8584, -0.8573]) tensor([0.0928, 0.6528, 0.1272, 0.1273]) -Greedy action tensor([ 0.3874, 0.3462, -0.9205, -0.5158]) tensor([0.3795, 0.3642, 0.1026, 0.1538]) -Greedy action tensor([-0.4763, 0.3496, -0.3764, -0.3337]) tensor([0.1804, 0.4121, 0.1994, 0.2081]) -Greedy action tensor([ 1.1953, -0.8521, -0.3018, 0.7647]) tensor([0.4992, 0.0644, 0.1117, 0.3246]) -Greedy action tensor([ 0.7696, -2.0696, -0.1595, 0.1991]) tensor([0.4954, 0.0290, 0.1956, 0.2800]) -Greedy action tensor([ 0.3532, -1.1249, -0.6367, 0.4641]) tensor([0.3681, 0.0839, 0.1368, 0.4112]) -Greedy action tensor([ 0.7686, 0.0411, 0.1918, -0.1103]) tensor([0.4065, 0.1964, 0.2283, 0.1688]) -Greedy action tensor([-0.6961, -1.6308, 1.3335, -0.4365]) tensor([0.0971, 0.0381, 0.7389, 0.1259]) -Greedy action tensor([-0.3466, -1.0649, -0.9541, -0.1128]) tensor([0.3034, 0.1479, 0.1653, 0.3834]) -Greedy action tensor([-1.4413, -1.3344, 0.7483, -0.6329]) tensor([0.0753, 0.0837, 0.6721, 0.1689]) -Greedy action tensor([-0.5389, -1.5129, 0.0276, -0.2243]) tensor([0.2218, 0.0837, 0.3908, 0.3037]) -Greedy action tensor([-0.0637, 0.2619, -0.3464, 0.2052]) tensor([0.2249, 0.3114, 0.1695, 0.2942]) -Greedy action tensor([ 0.3408, -0.5306, -0.5565, -0.3057]) tensor([0.4255, 0.1780, 0.1735, 0.2229]) -Greedy action tensor([ 1.6191, 0.4014, -0.1128, 0.7614]) tensor([0.5271, 0.1560, 0.0933, 0.2236]) -Greedy action tensor([ 0.6456, -1.3933, -0.4337, -0.4263]) tensor([0.5518, 0.0718, 0.1875, 0.1889]) -Greedy action tensor([ 0.0221, -0.4951, 0.0622, -0.7831]) tensor([0.3242, 0.1933, 0.3375, 0.1449]) -Greedy action tensor([-0.0788, -0.2480, 0.2013, 0.4987]) tensor([0.2020, 0.1706, 0.2674, 0.3600]) -Greedy action tensor([ 0.0095, -0.5604, -1.2436, 0.2326]) tensor([0.3225, 0.1824, 0.0921, 0.4031]) -Greedy action tensor([ 0.1346, 0.1537, 0.5838, -0.4889]) tensor([0.2426, 0.2472, 0.3801, 0.1300]) -Greedy action tensor([-0.0302, -1.2857, -0.9310, 0.0504]) tensor([0.3604, 0.1027, 0.1464, 0.3906]) -Greedy action tensor([ 0.9848, -0.0815, -0.5711, 0.9959]) tensor([0.3896, 0.1341, 0.0822, 0.3940]) -Greedy action tensor([ 0.6958, -0.5967, 0.7817, 0.4557]) tensor([0.3174, 0.0871, 0.3458, 0.2496]) -Greedy action tensor([ 0.6558, -0.1888, 1.5678, -0.6963]) tensor([0.2394, 0.1029, 0.5958, 0.0619]) -Greedy action tensor([-0.4078, -1.2193, 0.4830, -0.3249]) tensor([0.2013, 0.0894, 0.4906, 0.2187]) -Greedy action tensor([-0.4919, -0.4985, -0.6904, 0.1135]) tensor([0.2153, 0.2139, 0.1765, 0.3944]) -Greedy action tensor([-0.6570, -0.2665, 0.4467, 0.1935]) tensor([0.1277, 0.1886, 0.3849, 0.2988]) -Greedy action tensor([-1.2329, -0.6564, 0.4367, -0.9348]) tensor([0.1060, 0.1886, 0.5627, 0.1428]) -Greedy action tensor([ 0.5732, -1.3417, 0.0319, -0.1073]) tensor([0.4473, 0.0659, 0.2603, 0.2265]) -Greedy action tensor([-1.1781, -1.4182, -0.4529, 0.8593]) tensor([0.0868, 0.0683, 0.1792, 0.6657]) -Greedy action tensor([-0.6241, -0.8745, 0.3133, 0.0551]) tensor([0.1586, 0.1235, 0.4050, 0.3129]) -Greedy action tensor([ 1.6945, -0.9740, 0.4427, 0.2170]) tensor([0.6315, 0.0438, 0.1806, 0.1441]) -Greedy action tensor([-0.8998, -1.0473, 0.5914, 0.4939]) tensor([0.0968, 0.0835, 0.4298, 0.3899]) -Greedy action tensor([ 0.1173, -1.7032, -0.4942, 0.1717]) tensor([0.3623, 0.0587, 0.1965, 0.3825]) -Greedy action tensor([-0.8310, -0.7797, -0.6393, -0.8065]) tensor([0.2332, 0.2454, 0.2824, 0.2390]) -Greedy action tensor([ 0.0508, -0.7132, 0.1530, -0.7745]) tensor([0.3321, 0.1547, 0.3678, 0.1455]) -Greedy action tensor([-0.7438, 0.0630, -0.3707, -0.8766]) tensor([0.1796, 0.4024, 0.2608, 0.1572]) -Greedy action tensor([-0.1669, -0.5523, 2.0933, -0.7266]) tensor([0.0845, 0.0575, 0.8098, 0.0483]) -Greedy action tensor([ 0.2873, 0.3837, -0.4449, -0.9840]) tensor([0.3493, 0.3847, 0.1680, 0.0980]) -Greedy action tensor([ 0.0279, -0.7890, -0.4824, 0.4337]) tensor([0.2823, 0.1247, 0.1695, 0.4236]) -Greedy action tensor([ 0.2882, -0.8470, 2.7089, -0.4728]) tensor([0.0767, 0.0246, 0.8629, 0.0358]) -Greedy action tensor([-1.1948, -0.8657, -0.7221, 0.4739]) tensor([0.1075, 0.1494, 0.1725, 0.5705]) -Greedy action tensor([-0.4256, 0.4810, -0.7930, -1.3357]) tensor([0.2188, 0.5417, 0.1515, 0.0881]) -Greedy action tensor([ 0.3109, -0.4214, -0.0644, 0.5360]) tensor([0.2924, 0.1406, 0.2009, 0.3662]) -Greedy action tensor([-0.5663, -1.6064, -0.4624, 0.0335]) tensor([0.2334, 0.0825, 0.2589, 0.4252]) -Greedy action tensor([-0.3311, -0.9598, -0.2611, 0.0998]) tensor([0.2413, 0.1287, 0.2588, 0.3713]) -Greedy action tensor([ 0.9702, -1.6241, 0.4391, 0.4974]) tensor([0.4375, 0.0327, 0.2572, 0.2726]) -Greedy action tensor([-0.8117, -0.2682, 1.0014, -0.5178]) tensor([0.0981, 0.1689, 0.6013, 0.1316]) -Greedy action tensor([-0.3507, -1.4643, 0.4685, -1.0265]) tensor([0.2435, 0.0800, 0.5526, 0.1239]) -Greedy action tensor([ 0.0687, 0.0780, 1.3457, -0.4866]) tensor([0.1621, 0.1636, 0.5813, 0.0930]) -Greedy action tensor([-0.8030, -1.7318, -0.2869, 1.2446]) tensor([0.0924, 0.0365, 0.1549, 0.7162]) -Greedy action tensor([-0.6830, 0.0908, -0.0359, -1.1188]) tensor([0.1747, 0.3787, 0.3336, 0.1130]) -Greedy action tensor([ 0.2991, -0.7518, -0.6085, 0.1014]) tensor([0.3885, 0.1358, 0.1568, 0.3188]) -Greedy action tensor([ 0.2646, -1.4684, 0.6526, 1.1891]) tensor([0.1934, 0.0342, 0.2850, 0.4874]) -Greedy action tensor([ 0.2895, -1.6932, 0.0197, 0.5153]) tensor([0.3170, 0.0436, 0.2420, 0.3973]) -Greedy action tensor([-0.0462, -1.0295, -0.6251, -0.6900]) tensor([0.4065, 0.1521, 0.2279, 0.2136]) -Greedy action tensor([ 0.4509, 0.5955, -0.1778, 0.5171]) tensor([0.2662, 0.3076, 0.1419, 0.2843]) -Greedy action tensor([-0.2362, -0.2579, -0.5112, -0.6555]) tensor([0.2945, 0.2882, 0.2237, 0.1936]) -Greedy action tensor([ 0.1354, -1.2204, -0.4047, 0.0409]) tensor([0.3636, 0.0937, 0.2119, 0.3308]) -Greedy action tensor([ 0.6090, -0.7713, 0.8732, 0.5548]) tensor([0.2856, 0.0718, 0.3720, 0.2706]) -Greedy action tensor([-0.1766, -1.7151, -0.6753, 1.0546]) tensor([0.1906, 0.0409, 0.1157, 0.6528]) -Greedy action tensor([ 1.1475, -0.0528, 0.0397, 0.1046]) tensor([0.5041, 0.1518, 0.1665, 0.1777]) -Greedy action tensor([ 0.6541, -0.4174, 0.6136, -0.3650]) tensor([0.3754, 0.1286, 0.3605, 0.1355]) -Greedy action tensor([-0.4719, 0.3021, 0.0829, -0.9291]) tensor([0.1804, 0.3912, 0.3142, 0.1142]) -Greedy action tensor([-0.0638, -0.3279, -0.8406, 0.7961]) tensor([0.2178, 0.1673, 0.1002, 0.5147]) -Greedy action tensor([ 0.2851, -0.3203, 0.8499, -0.6827]) tensor([0.2714, 0.1481, 0.4774, 0.1031]) -Greedy action tensor([-0.1661, -0.7319, 0.2172, -0.3068]) tensor([0.2562, 0.1455, 0.3758, 0.2225]) -Greedy action tensor([ 0.1144, -0.5575, -0.5086, 0.1574]) tensor([0.3235, 0.1652, 0.1735, 0.3377]) -Greedy action tensor([ 0.9403, -0.6167, -0.2752, 0.5316]) tensor([0.4604, 0.0970, 0.1366, 0.3060]) -Greedy action tensor([ 0.6323, -1.4117, 0.0250, 0.4986]) tensor([0.3923, 0.0508, 0.2137, 0.3432]) -Greedy action tensor([ 1.9680, -0.4984, -0.0344, 1.5012]) tensor([0.5414, 0.0460, 0.0731, 0.3395]) -Greedy action tensor([ 0.6325, -1.0248, -0.8501, 0.9161]) tensor([0.3642, 0.0694, 0.0827, 0.4836]) -Greedy action tensor([-0.9798, -0.0884, 0.9299, -1.2144]) tensor([0.0911, 0.2221, 0.6148, 0.0720]) -Greedy action tensor([ 0.9251, 0.4938, -0.7757, 0.0076]) tensor([0.4481, 0.2911, 0.0818, 0.1790]) -Greedy action tensor([ 1.3334, -0.8228, 0.2455, 0.6961]) tensor([0.5047, 0.0584, 0.1700, 0.2668]) -Greedy action tensor([ 1.1550, -0.9217, 0.1028, -0.5420]) tensor([0.6032, 0.0756, 0.2106, 0.1105]) -Greedy action tensor([0.3940, 0.0016, 0.0051, 0.0230]) tensor([0.3286, 0.2219, 0.2227, 0.2267]) -Greedy action tensor([ 1.1291, -0.5154, -0.1585, -0.1905]) tensor([0.5759, 0.1112, 0.1589, 0.1539]) -Greedy action tensor([ 0.7641, -0.3966, -0.1040, -0.4059]) tensor([0.4894, 0.1533, 0.2054, 0.1519]) -Greedy action tensor([ 0.7806, -0.5879, 0.0429, -0.2994]) tensor([0.4825, 0.1228, 0.2308, 0.1639]) -Greedy action tensor([ 0.8731, -0.8523, 0.0576, -0.4699]) tensor([0.5315, 0.0947, 0.2351, 0.1387]) -Greedy action tensor([ 0.6037, -0.3915, -0.0739, -0.0568]) tensor([0.4177, 0.1544, 0.2121, 0.2158]) -Greedy action tensor([ 1.2195, -0.9327, 0.0894, -0.6395]) tensor([0.6270, 0.0729, 0.2025, 0.0977]) -Greedy action tensor([ 0.5421, -0.1630, -0.0407, -0.0620]) tensor([0.3848, 0.1901, 0.2148, 0.2103]) -Greedy action tensor([ 0.7608, -0.5767, -0.1181, -0.1773]) tensor([0.4833, 0.1269, 0.2007, 0.1891]) -Greedy action tensor([ 0.7002, -0.6417, 0.0286, -0.3770]) tensor([0.4733, 0.1237, 0.2418, 0.1612]) -Greedy action tensor([ 0.7755, -0.4274, -0.0357, -0.2695]) tensor([0.4770, 0.1433, 0.2119, 0.1678]) -Greedy action tensor([ 1.1436, -0.7746, 0.0416, -0.5203]) tensor([0.5993, 0.0880, 0.1991, 0.1135]) -Greedy action tensor([ 0.5635, -0.0846, 0.1084, -0.2902]) tensor([0.3871, 0.2025, 0.2456, 0.1648]) -Greedy action tensor([ 0.6312, 0.0184, -0.0687, -0.3582]) tensor([0.4149, 0.2248, 0.2060, 0.1543]) -Greedy action tensor([ 0.9502, -0.6019, -0.0712, -0.5235]) tensor([0.5553, 0.1176, 0.1999, 0.1272]) -Greedy action tensor([ 1.2047, -0.8324, -0.1747, -0.5689]) tensor([0.6444, 0.0840, 0.1622, 0.1094]) -Greedy action tensor([ 1.2486, -0.7908, 0.1029, -0.9737]) tensor([0.6425, 0.0836, 0.2043, 0.0696]) -Greedy action tensor([ 0.4871, -0.0120, -0.1235, -0.0768]) tensor([0.3678, 0.2233, 0.1997, 0.2093]) -Greedy action tensor([ 0.8355, -0.2816, 0.0216, -0.1341]) tensor([0.4652, 0.1522, 0.2061, 0.1764]) -Greedy action tensor([ 0.6927, -0.2186, 0.0136, -0.1362]) tensor([0.4263, 0.1714, 0.2162, 0.1861]) -Greedy action tensor([ 0.3306, 0.2203, -0.1089, -0.0818]) tensor([0.3123, 0.2797, 0.2012, 0.2068]) -Greedy action tensor([ 0.3879, -0.0220, 0.0645, -0.1332]) tensor([0.3354, 0.2226, 0.2427, 0.1992]) -Greedy action tensor([ 1.3536, -0.7099, -0.1968, -0.6844]) tensor([0.6805, 0.0864, 0.1444, 0.0887]) -Greedy action tensor([ 0.7787, -0.8199, -0.0113, -0.3943]) tensor([0.5088, 0.1029, 0.2309, 0.1574]) -Greedy action tensor([ 0.9774, -0.5581, -0.0730, -0.4704]) tensor([0.5555, 0.1196, 0.1943, 0.1306]) -Greedy action tensor([ 0.5406, -0.4169, 0.0049, -0.0321]) tensor([0.3948, 0.1515, 0.2310, 0.2226]) -Greedy action tensor([ 0.9302, -0.4976, 0.0275, -0.3581]) tensor([0.5206, 0.1248, 0.2111, 0.1435]) -Greedy action tensor([ 0.5570, -0.6262, 0.0574, -0.4799]) tensor([0.4410, 0.1351, 0.2676, 0.1564]) -Greedy action tensor([ 0.5911, -0.4338, -0.0142, -0.1480]) tensor([0.4198, 0.1506, 0.2292, 0.2005]) -Greedy action tensor([ 0.8935, -0.4573, -0.0910, -0.2935]) tensor([0.5160, 0.1337, 0.1928, 0.1575]) -Greedy action tensor([ 0.6749, -0.7668, -0.0876, -0.2749]) tensor([0.4785, 0.1132, 0.2232, 0.1851]) -Greedy action tensor([ 0.6743, -0.3415, -0.0715, -0.2245]) tensor([0.4457, 0.1614, 0.2114, 0.1814]) -Greedy action tensor([ 0.5297, 0.2661, -0.2384, 0.0926]) tensor([0.3475, 0.2669, 0.1612, 0.2244]) -Greedy action tensor([ 0.6841, -0.0846, -0.0842, -0.1593]) tensor([0.4241, 0.1966, 0.1967, 0.1825]) -Greedy action tensor([ 0.6216, -0.6021, -0.0321, -0.1364]) tensor([0.4380, 0.1288, 0.2278, 0.2053]) -Greedy action tensor([ 0.9069, -0.3784, -0.1005, -0.1758]) tensor([0.5049, 0.1397, 0.1844, 0.1710]) -Greedy action tensor([ 0.4844, 0.1088, -0.1408, 0.1048]) tensor([0.3441, 0.2364, 0.1841, 0.2354]) -Greedy action tensor([ 0.2731, -0.0950, -0.0428, 0.1478]) tensor([0.3027, 0.2095, 0.2207, 0.2671]) -Greedy action tensor([ 0.9835, -0.5015, -0.1205, -0.3293]) tensor([0.5473, 0.1240, 0.1815, 0.1473]) -Greedy action tensor([ 0.6223, -0.3566, -0.1188, -0.1586]) tensor([0.4328, 0.1626, 0.2063, 0.1982]) -Greedy action tensor([ 0.6817, -0.6273, -0.1726, -0.7241]) tensor([0.5152, 0.1392, 0.2193, 0.1263]) -Greedy action tensor([ 0.6707, -0.2396, -0.0819, -0.2082]) tensor([0.4369, 0.1758, 0.2059, 0.1814]) -Greedy action tensor([ 0.5397, -0.1325, -0.0538, -0.0939]) tensor([0.3856, 0.1969, 0.2130, 0.2046]) -Greedy action tensor([ 0.9075, -0.4455, 0.0409, -0.3430]) tensor([0.5089, 0.1315, 0.2139, 0.1457]) -Greedy action tensor([ 1.0919, -0.6729, -0.0788, -0.6861]) tensor([0.6059, 0.1037, 0.1879, 0.1024]) -Greedy action tensor([ 0.6765, -0.5754, -0.1788, -0.4946]) tensor([0.4948, 0.1415, 0.2103, 0.1534]) -Greedy action tensor([ 0.4697, 0.0335, -0.1154, 0.0412]) tensor([0.3503, 0.2264, 0.1951, 0.2282]) -Greedy action tensor([ 1.0446, -0.4066, 0.0161, -0.4137]) tensor([0.5481, 0.1284, 0.1960, 0.1275]) -Greedy action tensor([ 1.3806, -0.6230, -0.0573, -0.3667]) tensor([0.6466, 0.0872, 0.1535, 0.1127]) -Greedy action tensor([ 0.3702, -0.2246, 0.0490, -0.2901]) tensor([0.3579, 0.1975, 0.2596, 0.1850]) -Greedy action tensor([ 0.9384, -0.4212, -0.0971, -0.2803]) tensor([0.5243, 0.1346, 0.1861, 0.1550]) -Greedy action tensor([ 0.5142, -0.1918, -0.1509, -0.0654]) tensor([0.3894, 0.1922, 0.2003, 0.2181]) -Greedy action tensor([ 0.5696, -0.3401, -0.1001, -0.0353]) tensor([0.4064, 0.1636, 0.2080, 0.2219]) -Greedy action tensor([ 0.4821, -0.2028, 0.1255, -0.4322]) tensor([0.3839, 0.1935, 0.2687, 0.1538]) -Greedy action tensor([ 0.6070, -0.1836, -0.1044, -0.0069]) tensor([0.4023, 0.1825, 0.1975, 0.2177]) -Greedy action tensor([ 0.4626, -0.1373, -0.0553, -0.0470]) tensor([0.3642, 0.1999, 0.2170, 0.2188]) -Greedy action tensor([ 0.8641, -0.9277, 0.1370, -0.4328]) tensor([0.5199, 0.0867, 0.2513, 0.1421]) -Greedy action tensor([ 0.7832, -0.1187, -0.0334, -0.0337]) tensor([0.4368, 0.1772, 0.1930, 0.1930]) -Greedy action tensor([ 0.4631, -0.0849, -0.0342, -0.0839]) tensor([0.3617, 0.2091, 0.2200, 0.2093]) -Greedy action tensor([ 0.6516, -0.1661, -0.0055, -0.0082]) tensor([0.4038, 0.1782, 0.2093, 0.2087]) -Greedy action tensor([ 0.9838, -0.2654, -0.0908, -0.0993]) tensor([0.5085, 0.1458, 0.1736, 0.1721]) -Greedy action tensor([ 0.6451, -0.4011, -0.0674, -0.0753]) tensor([0.4295, 0.1509, 0.2106, 0.2090]) -Greedy action tensor([ 0.5754, -0.4481, -0.1493, -0.1110]) tensor([0.4260, 0.1531, 0.2064, 0.2145]) -Greedy action tensor([ 0.3025, -0.1371, -0.0716, -0.1234]) tensor([0.3350, 0.2158, 0.2304, 0.2188]) -Greedy action tensor([ 0.5618, -0.0282, -0.0683, 0.0280]) tensor([0.3741, 0.2074, 0.1992, 0.2193]) -Greedy action tensor([ 0.4966, -0.0334, -0.1055, -0.0104]) tensor([0.3651, 0.2149, 0.2000, 0.2199]) -Greedy action tensor([ 0.6452, -0.1732, 0.0633, -0.1142]) tensor([0.4052, 0.1787, 0.2264, 0.1896]) -Greedy action tensor([ 0.5013, -0.0069, -0.1027, -0.0946]) tensor([0.3705, 0.2229, 0.2025, 0.2042]) -Greedy action tensor([ 0.2709, 0.1176, -0.1614, -0.2371]) tensor([0.3217, 0.2760, 0.2088, 0.1936]) -Greedy action tensor([ 0.6734, -0.3555, 0.2200, -0.3642]) tensor([0.4260, 0.1523, 0.2707, 0.1510]) -Greedy action tensor([ 0.7841, -0.3460, -0.1331, -0.1904]) tensor([0.4762, 0.1538, 0.1903, 0.1797]) -Greedy action tensor([ 0.4934, -0.2580, -0.0129, -0.0295]) tensor([0.3749, 0.1768, 0.2260, 0.2223]) -Greedy action tensor([ 0.6836, -0.3924, -0.3384, -0.2440]) tensor([0.4770, 0.1626, 0.1717, 0.1887]) -Greedy action tensor([ 0.9003, -0.6387, 0.0378, -0.5058]) tensor([0.5314, 0.1140, 0.2243, 0.1302]) -Greedy action tensor([ 0.9325, -0.9713, 0.1350, -0.6973]) tensor([0.5570, 0.0830, 0.2509, 0.1091]) -Greedy action tensor([ 0.5573, -0.3458, -0.0660, -0.2280]) tensor([0.4171, 0.1691, 0.2236, 0.1902]) -Greedy action tensor([ 1.3143, -0.8244, -0.0670, -0.8869]) tensor([0.6758, 0.0796, 0.1698, 0.0748]) -Greedy action tensor([ 0.3068, -0.3867, -0.1186, -0.0247]) tensor([0.3483, 0.1741, 0.2276, 0.2500]) -Greedy action tensor([ 0.6543, -0.6235, -0.1626, -0.2637]) tensor([0.4717, 0.1315, 0.2084, 0.1884]) -Greedy action tensor([ 1.0396, -0.4468, -0.1469, -0.4940]) tensor([0.5723, 0.1295, 0.1747, 0.1235]) -Greedy action tensor([-1.8912, -0.3382, 0.6338, -0.1205]) tensor([0.0415, 0.1962, 0.5185, 0.2439]) -Greedy action tensor([-1.9399, -0.4369, 0.6646, -0.1747]) tensor([0.0402, 0.1808, 0.5440, 0.2350]) -Greedy action tensor([-0.9731, -0.3628, 0.3541, -0.1546]) tensor([0.1126, 0.2073, 0.4247, 0.2553]) -Greedy action tensor([-0.7636, 0.4629, 0.1702, -0.1828]) tensor([0.1144, 0.3900, 0.2911, 0.2045]) -Greedy action tensor([-1.8139, -0.0693, 0.5515, -0.0952]) tensor([0.0436, 0.2494, 0.4640, 0.2430]) -Greedy action tensor([-1.6047, -0.2807, 0.4449, 0.0080]) tensor([0.0570, 0.2143, 0.4427, 0.2860]) -Greedy action tensor([-1.6481, -0.2869, 0.5744, 0.0734]) tensor([0.0507, 0.1978, 0.4680, 0.2836]) -Greedy action tensor([-1.7245, -0.2796, 0.6304, -0.0311]) tensor([0.0471, 0.1999, 0.4967, 0.2563]) -Greedy action tensor([-1.9087, -0.4566, 0.6549, -0.1599]) tensor([0.0417, 0.1780, 0.5409, 0.2395]) -Greedy action tensor([-1.1071, -0.1587, 0.6275, 0.1321]) tensor([0.0787, 0.2033, 0.4461, 0.2719]) -Greedy action tensor([-1.7463, -0.4842, 0.5877, -0.0986]) tensor([0.0499, 0.1762, 0.5147, 0.2591]) -Greedy action tensor([-1.8981, -0.3906, 0.6372, -0.1597]) tensor([0.0420, 0.1895, 0.5297, 0.2388]) -Greedy action tensor([-1.6248, -0.1938, 0.5641, 0.0480]) tensor([0.0514, 0.2152, 0.4592, 0.2741]) -Greedy action tensor([-0.6330, 0.8319, -0.0768, -0.0303]) tensor([0.1124, 0.4863, 0.1960, 0.2053]) -Greedy action tensor([-1.6212, -0.0211, 0.5577, -0.0057]) tensor([0.0505, 0.2499, 0.4458, 0.2538]) -Greedy action tensor([-1.7875, -0.2590, 0.6045, -0.0807]) tensor([0.0453, 0.2091, 0.4958, 0.2498]) -Greedy action tensor([-1.8539, -0.3598, 0.6212, -0.1227]) tensor([0.0435, 0.1938, 0.5170, 0.2457]) -Greedy action tensor([-1.9444, -0.4492, 0.6676, -0.1799]) tensor([0.0401, 0.1789, 0.5467, 0.2342]) -Greedy action tensor([-1.8243, -0.3039, 0.5915, -0.1092]) tensor([0.0448, 0.2048, 0.5015, 0.2489]) -Greedy action tensor([-1.8971, -0.4483, 0.6466, -0.1569]) tensor([0.0422, 0.1798, 0.5374, 0.2406]) -Greedy action tensor([-1.4509, -0.0086, 0.5988, 0.2955]) tensor([0.0534, 0.2259, 0.4146, 0.3061]) -Greedy action tensor([-1.8556, -0.4265, 0.6723, -0.1131]) tensor([0.0427, 0.1783, 0.5350, 0.2439]) -Greedy action tensor([-1.7439, -0.1903, 0.5361, -0.0735]) tensor([0.0480, 0.2271, 0.4696, 0.2553]) -Greedy action tensor([-1.6699, -0.3729, 0.4873, -0.0135]) tensor([0.0539, 0.1973, 0.4663, 0.2826]) -Greedy action tensor([-1.2997, -0.5392, 0.2703, 0.2311]) tensor([0.0796, 0.1702, 0.3824, 0.3678]) -Greedy action tensor([-1.8979, -0.4512, 0.6488, -0.1547]) tensor([0.0421, 0.1791, 0.5379, 0.2409]) -Greedy action tensor([-1.6897, -0.4283, 0.6644, 0.0242]) tensor([0.0485, 0.1713, 0.5108, 0.2693]) -Greedy action tensor([-1.4951, -0.2752, 0.6748, 0.3036]) tensor([0.0521, 0.1765, 0.4564, 0.3149]) -Greedy action tensor([-1.9477, -0.4528, 0.6691, -0.1826]) tensor([0.0400, 0.1784, 0.5478, 0.2338]) -Greedy action tensor([-1.9385, -0.4425, 0.6652, -0.1773]) tensor([0.0403, 0.1800, 0.5450, 0.2347]) -Greedy action tensor([-1.8951, -0.4423, 0.6401, -0.1594]) tensor([0.0424, 0.1814, 0.5354, 0.2407]) -Greedy action tensor([-1.9344, -0.4374, 0.6632, -0.1742]) tensor([0.0405, 0.1808, 0.5435, 0.2353]) -Greedy action tensor([-1.8700, -0.4504, 0.6259, -0.1434]) tensor([0.0437, 0.1807, 0.5300, 0.2456]) -Greedy action tensor([-1.8068, -0.4115, 0.5934, -0.1218]) tensor([0.0466, 0.1881, 0.5139, 0.2514]) -Greedy action tensor([-1.7668, -0.3965, 0.6845, 0.0232]) tensor([0.0444, 0.1747, 0.5150, 0.2659]) -Greedy action tensor([-1.7922, -0.3248, 0.5801, -0.1147]) tensor([0.0467, 0.2026, 0.5007, 0.2500]) -Greedy action tensor([-1.7523, -0.0211, 0.4920, -0.0570]) tensor([0.0464, 0.2623, 0.4382, 0.2531]) -Greedy action tensor([-1.6398, -0.2241, 0.5928, 0.0056]) tensor([0.0510, 0.2099, 0.4751, 0.2641]) -Greedy action tensor([-1.9208, -0.4544, 0.6571, -0.1698]) tensor([0.0412, 0.1786, 0.5428, 0.2374]) -Greedy action tensor([-1.7751, -0.3957, 0.4577, -0.0927]) tensor([0.0508, 0.2019, 0.4739, 0.2734]) -Greedy action tensor([-1.8345, -0.2828, 0.6183, -0.1177]) tensor([0.0437, 0.2060, 0.5073, 0.2430]) -Greedy action tensor([-1.9479, -0.4491, 0.6667, -0.1826]) tensor([0.0400, 0.1792, 0.5469, 0.2339]) -Greedy action tensor([-1.7496, 0.0402, 0.4921, -0.0753]) tensor([0.0460, 0.2756, 0.4329, 0.2455]) -Greedy action tensor([-1.9267, -0.4250, 0.6564, -0.1693]) tensor([0.0408, 0.1830, 0.5398, 0.2364]) -Greedy action tensor([-1.8400, -0.4213, 0.6110, -0.1325]) tensor([0.0450, 0.1857, 0.5214, 0.2479]) -Greedy action tensor([-1.9221, -0.3874, 0.6449, -0.1708]) tensor([0.0409, 0.1899, 0.5333, 0.2359]) -Greedy action tensor([-1.7423, -0.3855, 0.5341, -0.0551]) tensor([0.0499, 0.1939, 0.4864, 0.2698]) -Greedy action tensor([-1.8638, -0.4384, 0.6306, -0.1213]) tensor([0.0435, 0.1810, 0.5270, 0.2485]) -Greedy action tensor([-1.7730, -0.3922, 0.5826, -0.0964]) tensor([0.0479, 0.1906, 0.5052, 0.2562]) -Greedy action tensor([-1.8938, -0.4534, 0.6449, -0.1548]) tensor([0.0424, 0.1791, 0.5371, 0.2414]) -Greedy action tensor([-1.7305, -0.3251, 0.6516, 0.0187]) tensor([0.0462, 0.1883, 0.5000, 0.2655]) -Greedy action tensor([-1.8872, -0.3978, 0.6479, -0.1526]) tensor([0.0422, 0.1870, 0.5320, 0.2389]) -Greedy action tensor([-1.7019, -0.4380, 0.5982, 0.1804]) tensor([0.0474, 0.1679, 0.4731, 0.3115]) -Greedy action tensor([-0.7238, 0.4599, 0.1179, 0.4042]) tensor([0.1033, 0.3376, 0.2398, 0.3193]) -Greedy action tensor([-1.7664, -0.4791, 0.5832, -0.0833]) tensor([0.0488, 0.1768, 0.5116, 0.2627]) -Greedy action tensor([-1.9426, -0.4477, 0.6670, -0.1789]) tensor([0.0402, 0.1792, 0.5462, 0.2344]) -Greedy action tensor([-1.9303, -0.4260, 0.6598, -0.1723]) tensor([0.0406, 0.1827, 0.5412, 0.2355]) -Greedy action tensor([-1.9033, -0.3282, 0.6220, -0.1535]) tensor([0.0415, 0.2006, 0.5189, 0.2389]) -Greedy action tensor([-1.9181, -0.4585, 0.6669, -0.1531]) tensor([0.0410, 0.1763, 0.5434, 0.2393]) -Greedy action tensor([-1.6464, -0.3845, 0.5825, 0.1532]) tensor([0.0503, 0.1778, 0.4675, 0.3044]) -Greedy action tensor([-1.8023, -0.2870, 0.5703, -0.1034]) tensor([0.0460, 0.2093, 0.4933, 0.2515]) -Greedy action tensor([-1.9051, -0.3811, 0.6411, -0.1552]) tensor([0.0415, 0.1905, 0.5293, 0.2387]) -Greedy action tensor([-1.9049, -0.4250, 0.6538, -0.1585]) tensor([0.0416, 0.1827, 0.5373, 0.2385]) -Greedy action tensor([-1.9433, -0.4486, 0.6687, -0.1778]) tensor([0.0401, 0.1788, 0.5466, 0.2344]) -Greedy action tensor([-1.6980, -0.2755, 0.5848, 0.0052]) tensor([0.0489, 0.2029, 0.4796, 0.2686]) -Greedy action tensor([-1.9019, -0.4430, 0.6708, -0.1453]) tensor([0.0413, 0.1778, 0.5415, 0.2394]) -Greedy action tensor([-1.8725, -0.2999, 0.6280, -0.1419]) tensor([0.0423, 0.2038, 0.5153, 0.2386]) -Greedy action tensor([-1.9034, -0.4419, 0.6476, -0.1607]) tensor([0.0419, 0.1808, 0.5376, 0.2396]) -Greedy action tensor([-0.6612, -0.2361, 0.2487, -0.1559]) tensor([0.1499, 0.2293, 0.3724, 0.2485]) -Greedy action tensor([-1.8859, -0.4543, 0.6441, -0.1493]) tensor([0.0427, 0.1787, 0.5361, 0.2425]) -Greedy action tensor([-1.9456, -0.4538, 0.6663, -0.1820]) tensor([0.0402, 0.1785, 0.5471, 0.2342]) -Greedy action tensor([-1.7684, -0.2889, 0.5922, -0.0627]) tensor([0.0465, 0.2043, 0.4931, 0.2561]) -Greedy action tensor([-0.0974, 1.1713, 0.0087, 0.4242]) tensor([0.1360, 0.4836, 0.1512, 0.2291]) -Greedy action tensor([-1.9435, -0.4452, 0.6659, -0.1793]) tensor([0.0402, 0.1797, 0.5458, 0.2344]) -Greedy action tensor([-1.8384, -0.4405, 0.6897, -0.0551]) tensor([0.0425, 0.1720, 0.5326, 0.2529]) -Greedy action tensor([-1.9327, -0.4358, 0.6627, -0.1737]) tensor([0.0405, 0.1811, 0.5431, 0.2353]) -Greedy action tensor([-1.8609, -0.4657, 0.6346, -0.1339]) tensor([0.0439, 0.1771, 0.5322, 0.2468]) -Greedy action tensor([-1.8850, -0.4547, 0.6436, -0.1506]) tensor([0.0428, 0.1788, 0.5362, 0.2423]) -Greedy action tensor([-1.7416, -1.0815, 0.4404, -0.4450]) tensor([0.0647, 0.1252, 0.5735, 0.2366]) -Greedy action tensor([-1.0687, 0.6912, 0.1618, 0.0302]) tensor([0.0756, 0.4391, 0.2586, 0.2267]) -Greedy action tensor([-1.9129, -0.4351, 0.6696, -0.1498]) tensor([0.0409, 0.1793, 0.5412, 0.2385]) -Greedy action tensor([ 1.0690, -0.4086, 0.0509, -0.1656]) tensor([0.5318, 0.1213, 0.1921, 0.1547]) -Greedy action tensor([ 1.7123, -0.3221, -0.7096, 0.3243]) tensor([0.6807, 0.0890, 0.0604, 0.1699]) -Greedy action tensor([ 1.5041, -0.3343, -0.4212, 0.3784]) tensor([0.6137, 0.0976, 0.0895, 0.1991]) -Greedy action tensor([ 1.4840, 0.3332, -0.4573, 0.0542]) tensor([0.5885, 0.1862, 0.0845, 0.1409]) -Greedy action tensor([ 1.6513, -0.5142, -0.4339, 0.3390]) tensor([0.6630, 0.0760, 0.0824, 0.1785]) -Greedy action tensor([ 1.9297, -1.2298, -0.3059, 0.0587]) tensor([0.7673, 0.0326, 0.0820, 0.1181]) -Greedy action tensor([ 1.6241, -0.7369, -0.4215, 0.7653]) tensor([0.6071, 0.0573, 0.0785, 0.2572]) -Greedy action tensor([ 1.1907, -0.2248, -0.2540, 0.3380]) tensor([0.5250, 0.1275, 0.1238, 0.2238]) -Greedy action tensor([ 2.0079, -0.0627, -0.1010, 0.2225]) tensor([0.7066, 0.0891, 0.0858, 0.1185]) -Greedy action tensor([ 2.0239, -1.2548, 0.0559, 0.7163]) tensor([0.6907, 0.0260, 0.0965, 0.1868]) -Greedy action tensor([ 1.3814, -0.5628, -0.1008, 0.3594]) tensor([0.5780, 0.0827, 0.1313, 0.2080]) -Greedy action tensor([ 1.5307, -0.2474, -0.9466, 0.5104]) tensor([0.6198, 0.1047, 0.0520, 0.2234]) -Greedy action tensor([ 1.3061, -0.7031, 0.1222, -0.3168]) tensor([0.6107, 0.0819, 0.1869, 0.1205]) -Greedy action tensor([ 1.2491, -0.4749, -0.2502, 0.1783]) tensor([0.5733, 0.1022, 0.1280, 0.1965]) -Greedy action tensor([ 1.9267, -1.1287, -0.3050, 0.9580]) tensor([0.6519, 0.0307, 0.0700, 0.2474]) -Greedy action tensor([ 2.2557, -1.5020, 0.2447, 0.4172]) tensor([0.7597, 0.0177, 0.1017, 0.1208]) -Greedy action tensor([ 1.4159, -0.0083, -0.5373, 0.6756]) tensor([0.5378, 0.1295, 0.0763, 0.2565]) -Greedy action tensor([ 1.0316, -0.0373, -0.1892, 0.1326]) tensor([0.4889, 0.1679, 0.1442, 0.1990]) -Greedy action tensor([ 1.2141, -0.2657, -0.2274, 0.2628]) tensor([0.5404, 0.1230, 0.1278, 0.2087]) -Greedy action tensor([ 0.4955, -0.2328, 0.0667, -0.0284]) tensor([0.3668, 0.1771, 0.2389, 0.2172]) -Greedy action tensor([ 1.7293, -0.8115, 0.1061, -0.1775]) tensor([0.7019, 0.0553, 0.1385, 0.1043]) -Greedy action tensor([ 1.3127, -0.1576, -0.7173, 0.3228]) tensor([0.5771, 0.1327, 0.0758, 0.2145]) -Greedy action tensor([ 1.5031, -0.6263, -0.0409, 0.3156]) tensor([0.6107, 0.0726, 0.1304, 0.1863]) -Greedy action tensor([ 1.2153, 0.3025, -0.4305, 0.4566]) tensor([0.4848, 0.1946, 0.0935, 0.2271]) -Greedy action tensor([ 1.5570, -0.4948, -0.4279, 0.3407]) tensor([0.6401, 0.0823, 0.0879, 0.1897]) -Greedy action tensor([ 1.5219, -0.4782, -0.3330, 0.3699]) tensor([0.6220, 0.0842, 0.0973, 0.1965]) -Greedy action tensor([ 1.1575, -0.2502, -0.2310, 0.2108]) tensor([0.5313, 0.1300, 0.1325, 0.2062]) -Greedy action tensor([ 1.5415, -0.5886, -0.8611, 0.7194]) tensor([0.6065, 0.0721, 0.0549, 0.2666]) -Greedy action tensor([ 1.1909, -0.2712, -0.4193, 0.1819]) tensor([0.5567, 0.1290, 0.1113, 0.2030]) -Greedy action tensor([ 1.6351, -0.2141, -0.7076, 0.1308]) tensor([0.6777, 0.1066, 0.0651, 0.1506]) -Greedy action tensor([ 0.9887, -0.6243, -0.4978, 0.3977]) tensor([0.5053, 0.1007, 0.1143, 0.2798]) -Greedy action tensor([ 1.2358, 0.1611, 0.0164, -0.4341]) tensor([0.5479, 0.1871, 0.1619, 0.1032]) -Greedy action tensor([ 1.5644, -0.5520, -0.4214, 0.2431]) tensor([0.6559, 0.0790, 0.0900, 0.1750]) -Greedy action tensor([ 1.6119, -0.3851, -1.4274, 0.3032]) tensor([0.6879, 0.0934, 0.0329, 0.1858]) -Greedy action tensor([ 1.0399, -0.6745, -0.2049, 0.2209]) tensor([0.5239, 0.0943, 0.1509, 0.2309]) -Greedy action tensor([ 1.5334, -0.7460, -0.3101, 0.3878]) tensor([0.6334, 0.0648, 0.1003, 0.2015]) -Greedy action tensor([ 2.1724, -0.3742, -0.6726, 0.8428]) tensor([0.7137, 0.0559, 0.0415, 0.1888]) -Greedy action tensor([ 1.7758, -0.0853, -0.6692, 0.3473]) tensor([0.6748, 0.1049, 0.0585, 0.1617]) -Greedy action tensor([ 2.4051, -0.1881, -0.3926, 0.3945]) tensor([0.7876, 0.0589, 0.0480, 0.1055]) -Greedy action tensor([ 1.1071, -0.0958, -0.7072, 0.3215]) tensor([0.5211, 0.1565, 0.0849, 0.2375]) -Greedy action tensor([ 1.8955, -0.8217, -0.2814, 0.3217]) tensor([0.7211, 0.0476, 0.0818, 0.1495]) -Greedy action tensor([ 1.6431, -0.1161, -0.4553, 0.5515]) tensor([0.6133, 0.1056, 0.0752, 0.2059]) -Greedy action tensor([ 1.3469, -0.3613, -0.5266, 0.1810]) tensor([0.6074, 0.1101, 0.0933, 0.1893]) -Greedy action tensor([ 1.9949, -1.2772, -0.3264, 1.0637]) tensor([0.6535, 0.0248, 0.0641, 0.2575]) -Greedy action tensor([ 1.7567, 0.0358, -0.0503, 0.0662]) tensor([0.6547, 0.1171, 0.1075, 0.1207]) -Greedy action tensor([ 1.1862, -0.5020, -0.1287, 0.4482]) tensor([0.5177, 0.0957, 0.1390, 0.2475]) -Greedy action tensor([ 1.3806, -0.5689, -0.1060, 0.0671]) tensor([0.6107, 0.0869, 0.1381, 0.1642]) -Greedy action tensor([ 1.4944, -0.3111, -0.6084, 0.4067]) tensor([0.6160, 0.1013, 0.0752, 0.2076]) -Greedy action tensor([ 2.5345, 0.3590, -0.4528, 0.6261]) tensor([0.7620, 0.0865, 0.0384, 0.1130]) -Greedy action tensor([ 2.0300, -0.5442, -0.2112, 0.5048]) tensor([0.7142, 0.0544, 0.0759, 0.1554]) -Greedy action tensor([ 1.6166, -0.6311, -0.4311, 0.4695]) tensor([0.6442, 0.0681, 0.0831, 0.2046]) -Greedy action tensor([ 1.2837, -0.2212, -0.2708, 0.0733]) tensor([0.5776, 0.1282, 0.1220, 0.1722]) -Greedy action tensor([ 1.4994, -0.5484, -0.2774, 0.1392]) tensor([0.6432, 0.0830, 0.1088, 0.1650]) -Greedy action tensor([ 2.0361, -0.4244, -0.1863, 0.0633]) tensor([0.7503, 0.0641, 0.0813, 0.1043]) -Greedy action tensor([ 1.4183, -0.6318, -0.6033, 0.3462]) tensor([0.6237, 0.0803, 0.0826, 0.2135]) -Greedy action tensor([ 1.6035, -0.7612, -0.0407, 0.2970]) tensor([0.6419, 0.0603, 0.1240, 0.1738]) -Greedy action tensor([ 1.3024, -0.4156, -0.7341, 0.5414]) tensor([0.5627, 0.1010, 0.0734, 0.2629]) -Greedy action tensor([ 0.9364, -0.3194, -0.2303, -0.5655]) tensor([0.5498, 0.1566, 0.1712, 0.1224]) -Greedy action tensor([ 1.5553, -0.1750, -0.0602, -0.0581]) tensor([0.6348, 0.1125, 0.1262, 0.1265]) -Greedy action tensor([ 1.5966, -0.7268, 0.0734, 0.2351]) tensor([0.6361, 0.0623, 0.1387, 0.1630]) -Greedy action tensor([ 1.1504, -0.2516, -0.3869, 0.3424]) tensor([0.5244, 0.1291, 0.1127, 0.2338]) -Greedy action tensor([ 2.3063, -0.8031, -0.5131, 0.4975]) tensor([0.7886, 0.0352, 0.0470, 0.1292]) -Greedy action tensor([ 1.1737, -0.2348, -0.7534, 0.5591]) tensor([0.5179, 0.1266, 0.0754, 0.2801]) -Greedy action tensor([ 1.3389, -0.5128, -0.2350, 0.2136]) tensor([0.5921, 0.0930, 0.1227, 0.1922]) -Greedy action tensor([ 1.6103, -0.5929, -0.1240, 0.3584]) tensor([0.6357, 0.0702, 0.1122, 0.1818]) -Greedy action tensor([ 1.6964, -0.3706, -0.0300, 0.5820]) tensor([0.6125, 0.0775, 0.1090, 0.2010]) -Greedy action tensor([ 1.2779, -0.3596, -0.2691, 0.2779]) tensor([0.5633, 0.1095, 0.1199, 0.2072]) -Greedy action tensor([ 1.1685, -0.4416, -0.3003, 0.1774]) tensor([0.5552, 0.1110, 0.1278, 0.2061]) -Greedy action tensor([ 1.9558, -0.9196, -0.5199, 0.4291]) tensor([0.7365, 0.0415, 0.0619, 0.1600]) -Greedy action tensor([ 1.5505, -0.6642, -0.1629, 0.2955]) tensor([0.6351, 0.0693, 0.1145, 0.1811]) -Greedy action tensor([ 1.0012, -0.1848, 0.0661, 0.1597]) tensor([0.4697, 0.1435, 0.1844, 0.2025]) -Greedy action tensor([ 1.2555, -0.2359, 0.0725, 0.0280]) tensor([0.5481, 0.1234, 0.1679, 0.1606]) -Greedy action tensor([ 2.5566, -1.4925, -0.2357, 0.6825]) tensor([0.8116, 0.0142, 0.0497, 0.1246]) -Greedy action tensor([ 1.2424, -0.3342, -0.3685, 0.3085]) tensor([0.5557, 0.1149, 0.1110, 0.2184]) -Greedy action tensor([ 1.0305, -0.5413, -0.6160, 0.4424]) tensor([0.5113, 0.1062, 0.0985, 0.2840]) -Greedy action tensor([ 1.8558, -0.0489, -0.8691, 0.0564]) tensor([0.7247, 0.1079, 0.0475, 0.1199]) -Greedy action tensor([ 1.0069, -0.3575, -0.2050, 0.2972]) tensor([0.4890, 0.1250, 0.1455, 0.2405]) -Greedy action tensor([ 1.1266, 0.3293, 0.4973, -0.5242]) tensor([0.4597, 0.2071, 0.2450, 0.0882]) -Greedy action tensor([ 1.6314, -1.1159, -0.4691, 0.0054]) tensor([0.7229, 0.0463, 0.0885, 0.1422]) -Greedy action tensor([ 1.5279, -0.7428, -0.0578, 0.6635]) tensor([0.5782, 0.0597, 0.1184, 0.2436]) -Greedy action tensor([ 1.3328, -0.4079, -0.6601, 0.3909]) tensor([0.5877, 0.1031, 0.0801, 0.2291]) -Greedy action tensor([ 1.3434, -0.7163, 0.6803, 0.3741]) tensor([0.4945, 0.0630, 0.2548, 0.1876]) -Greedy action tensor([ 0.4244, -1.0070, 0.6515, 0.1787]) tensor([0.3052, 0.0729, 0.3831, 0.2388]) -Greedy action tensor([-0.8138, -1.3885, -0.0892, -0.0976]) tensor([0.1763, 0.0992, 0.3638, 0.3608]) -Greedy action tensor([ 1.1703, -0.1640, 0.7499, 0.4382]) tensor([0.4165, 0.1097, 0.2736, 0.2003]) -Greedy action tensor([ 0.8334, -0.1612, 0.4557, 0.4550]) tensor([0.3649, 0.1350, 0.2501, 0.2500]) -Greedy action tensor([ 0.4623, -1.2172, -0.7642, -0.2712]) tensor([0.5102, 0.0951, 0.1496, 0.2450]) -Greedy action tensor([-0.3017, -0.1803, -0.2065, 0.2011]) tensor([0.2048, 0.2313, 0.2253, 0.3387]) -Greedy action tensor([ 0.5949, -1.1815, -0.8581, 1.4808]) tensor([0.2612, 0.0442, 0.0611, 0.6335]) -Greedy action tensor([-0.7793, -0.1337, 0.2895, -0.3839]) tensor([0.1369, 0.2611, 0.3987, 0.2033]) -Greedy action tensor([-0.1057, -0.5184, -0.0103, -0.6520]) tensor([0.2993, 0.1981, 0.3293, 0.1733]) -Greedy action tensor([ 1.1943, -1.2873, 0.0958, 1.7932]) tensor([0.3089, 0.0258, 0.1030, 0.5623]) -Greedy action tensor([-0.6120, -0.5975, 0.9438, 0.4901]) tensor([0.1024, 0.1039, 0.4853, 0.3083]) -Greedy action tensor([-0.6048, 0.5335, 0.7963, -1.0302]) tensor([0.1132, 0.3533, 0.4595, 0.0740]) -Greedy action tensor([ 0.7378, -1.6525, 0.8057, -0.0021]) tensor([0.3789, 0.0347, 0.4055, 0.1808]) -Greedy action tensor([ 1.0563, -0.1800, 0.8569, 0.0878]) tensor([0.4017, 0.1167, 0.3291, 0.1525]) -Greedy action tensor([ 0.5835, 0.4571, -0.0907, 0.6777]) tensor([0.2866, 0.2525, 0.1460, 0.3149]) -Greedy action tensor([-0.4366, -1.4115, 0.1201, -0.5064]) tensor([0.2466, 0.0930, 0.4304, 0.2300]) -Greedy action tensor([1.0454, 0.3643, 0.0327, 0.3447]) tensor([0.4227, 0.2139, 0.1536, 0.2098]) -Greedy action tensor([-0.5918, -1.2887, 0.6033, 0.1652]) tensor([0.1442, 0.0718, 0.4765, 0.3075]) -Greedy action tensor([ 0.4981, -1.0779, -0.7398, -0.6063]) tensor([0.5470, 0.1131, 0.1586, 0.1813]) -Greedy action tensor([ 0.0896, 0.6045, -1.1099, -0.5276]) tensor([0.2846, 0.4762, 0.0857, 0.1535]) -Greedy action tensor([-0.9265, 0.2449, 0.4212, -0.8356]) tensor([0.1090, 0.3519, 0.4197, 0.1194]) -Greedy action tensor([-0.0111, -0.5732, 0.0045, 0.0248]) tensor([0.2761, 0.1574, 0.2804, 0.2862]) -Greedy action tensor([-0.2957, -0.2996, 0.2863, -0.2761]) tensor([0.2081, 0.2073, 0.3724, 0.2122]) -Greedy action tensor([ 1.5458, -0.5491, 0.9681, 0.4980]) tensor([0.4914, 0.0605, 0.2758, 0.1724]) -Greedy action tensor([ 0.4426, 0.1660, 0.0671, -0.0636]) tensor([0.3281, 0.2488, 0.2254, 0.1978]) -Greedy action tensor([ 0.1235, -0.6978, 0.1730, 1.5545]) tensor([0.1498, 0.0659, 0.1575, 0.6268]) -Greedy action tensor([-1.0752, -0.6206, 0.2598, -1.0902]) tensor([0.1359, 0.2140, 0.5163, 0.1338]) -Greedy action tensor([ 0.3797, -1.5514, -0.1722, 0.7587]) tensor([0.3143, 0.0456, 0.1810, 0.4591]) -Greedy action tensor([ 0.0219, -1.0454, 1.3694, -0.0616]) tensor([0.1636, 0.0563, 0.6296, 0.1505]) -Greedy action tensor([ 0.0487, 0.9008, 0.3173, -0.4983]) tensor([0.1912, 0.4482, 0.2501, 0.1106]) -Greedy action tensor([ 0.9691, -1.8583, -0.1477, 0.0055]) tensor([0.5656, 0.0335, 0.1851, 0.2158]) -Greedy action tensor([-0.3670, 0.8996, -0.0999, -1.2286]) tensor([0.1593, 0.5653, 0.2081, 0.0673]) -Greedy action tensor([ 0.2658, 0.7131, -0.1389, -0.5119]) tensor([0.2710, 0.4238, 0.1808, 0.1245]) -Greedy action tensor([ 0.2266, 0.6637, 0.1706, -0.1801]) tensor([0.2404, 0.3722, 0.2273, 0.1601]) -Greedy action tensor([-0.5338, -0.3435, -0.0590, -1.0124]) tensor([0.2254, 0.2726, 0.3623, 0.1397]) -Greedy action tensor([ 0.0699, -0.0157, 0.1518, -0.1803]) tensor([0.2644, 0.2427, 0.2870, 0.2059]) -Greedy action tensor([-1.2872, -1.5430, 0.5450, -0.5477]) tensor([0.0988, 0.0765, 0.6176, 0.2071]) -Greedy action tensor([-0.5106, -1.6389, 1.4080, -0.4507]) tensor([0.1087, 0.0352, 0.7406, 0.1154]) -Greedy action tensor([-0.0362, -0.5340, -1.0799, 0.2319]) tensor([0.3060, 0.1860, 0.1078, 0.4002]) -Greedy action tensor([-0.2200, -1.1798, 0.2490, -1.3731]) tensor([0.3033, 0.1162, 0.4848, 0.0957]) -Greedy action tensor([ 0.6444, -1.5200, -0.4025, 1.2210]) tensor([0.3081, 0.0354, 0.1081, 0.5484]) -Greedy action tensor([-0.1045, -1.1648, -0.3068, 0.7162]) tensor([0.2255, 0.0781, 0.1842, 0.5123]) -Greedy action tensor([-0.4064, -0.3894, 0.5172, -1.5435]) tensor([0.2059, 0.2094, 0.5186, 0.0660]) -Greedy action tensor([ 0.7123, -0.2210, -0.1031, -0.0333]) tensor([0.4329, 0.1702, 0.1915, 0.2054]) -Greedy action tensor([-0.5939, 0.1244, 0.5041, -0.9062]) tensor([0.1475, 0.3025, 0.4421, 0.1079]) -Greedy action tensor([ 0.4435, 0.2593, -0.0680, -0.5824]) tensor([0.3584, 0.2981, 0.2149, 0.1285]) -Greedy action tensor([-0.2540, -0.4993, 0.4799, -0.5324]) tensor([0.2163, 0.1693, 0.4507, 0.1638]) -Greedy action tensor([ 1.4415, -0.7062, 1.0414, -0.1702]) tensor([0.5034, 0.0588, 0.3374, 0.1004]) -Greedy action tensor([ 0.1235, -0.5524, 0.1239, 0.4060]) tensor([0.2607, 0.1326, 0.2608, 0.3458]) -Greedy action tensor([-0.0924, 0.0553, 0.3781, -0.6626]) tensor([0.2312, 0.2680, 0.3701, 0.1307]) -Greedy action tensor([-0.0458, -0.1450, -1.1937, -0.6492]) tensor([0.3610, 0.3269, 0.1146, 0.1975]) -Greedy action tensor([ 0.0931, -0.9956, 0.0553, 0.2146]) tensor([0.2916, 0.0982, 0.2808, 0.3293]) -Greedy action tensor([ 0.3204, 0.0640, -0.0341, -0.5528]) tensor([0.3457, 0.2675, 0.2425, 0.1444]) -Greedy action tensor([ 0.6256, -0.0712, -0.2741, 0.8224]) tensor([0.3203, 0.1595, 0.1302, 0.3899]) -Greedy action tensor([-0.8800, -1.7447, -0.3626, -0.7233]) tensor([0.2343, 0.0987, 0.3930, 0.2740]) -Greedy action tensor([ 0.9604, -0.3253, -0.8710, 0.1266]) tensor([0.5345, 0.1478, 0.0856, 0.2322]) -Greedy action tensor([-0.0127, -0.5575, -0.4344, -0.1577]) tensor([0.3225, 0.1870, 0.2115, 0.2790]) -Greedy action tensor([-0.3356, -0.4205, -0.2335, -1.0622]) tensor([0.2849, 0.2617, 0.3156, 0.1378]) -Greedy action tensor([ 0.3600, -1.4872, 0.8129, -0.1634]) tensor([0.3009, 0.0474, 0.4733, 0.1783]) -Greedy action tensor([ 0.2192, -0.6409, -0.5289, 0.9967]) tensor([0.2455, 0.1039, 0.1162, 0.5343]) -Greedy action tensor([-1.0815, -0.7192, 0.0514, -0.8528]) tensor([0.1471, 0.2113, 0.4567, 0.1849]) -Greedy action tensor([ 0.4204, -0.0480, 1.2740, 0.2146]) tensor([0.2088, 0.1307, 0.4904, 0.1700]) -Greedy action tensor([-0.0610, -0.5940, 0.4818, -0.1580]) tensor([0.2372, 0.1392, 0.4082, 0.2153]) -Greedy action tensor([ 0.9092, -1.2974, -0.3724, 0.9830]) tensor([0.4058, 0.0447, 0.1127, 0.4369]) -Greedy action tensor([-0.0892, -1.3363, -0.8673, -0.0428]) tensor([0.3579, 0.1028, 0.1644, 0.3749]) -Greedy action tensor([-0.5494, -0.8131, -0.0153, 0.0787]) tensor([0.1870, 0.1436, 0.3190, 0.3504]) -Greedy action tensor([-0.2567, 0.5466, 0.5205, -0.1392]) tensor([0.1531, 0.3418, 0.3330, 0.1722]) -Greedy action tensor([-0.2007, 0.2135, 0.3779, -0.4104]) tensor([0.1958, 0.2963, 0.3492, 0.1588]) -Greedy action tensor([-1.7553, -0.3991, 1.2095, -1.1284]) tensor([0.0383, 0.1485, 0.7417, 0.0716]) -Greedy action tensor([ 0.0676, -0.8582, -0.6327, -0.2776]) tensor([0.3845, 0.1524, 0.1909, 0.2723]) -Greedy action tensor([-0.0421, 0.0888, 0.4324, -1.2166]) tensor([0.2465, 0.2810, 0.3962, 0.0762]) -Greedy action tensor([ 1.1452, -1.2097, 0.0096, 0.6917]) tensor([0.4874, 0.0463, 0.1566, 0.3097]) -Greedy action tensor([-6.6482e-01, -5.0827e-02, -9.7120e-04, -9.8421e-01]) tensor([0.1813, 0.3349, 0.3521, 0.1317]) -Greedy action tensor([ 0.9773, -1.3137, -0.2981, 1.4531]) tensor([0.3345, 0.0338, 0.0934, 0.5383]) -Greedy action tensor([ 0.9568, -0.6823, -0.3770, 0.3415]) tensor([0.5005, 0.0972, 0.1319, 0.2705]) -Greedy action tensor([-0.2182, -1.6817, -0.7132, 0.1236]) tensor([0.3078, 0.0712, 0.1877, 0.4333]) -Greedy action tensor([-0.5201, -0.8613, 0.4658, 1.1718]) tensor([0.1018, 0.0724, 0.2729, 0.5529]) -Greedy action tensor([ 1.3305, -0.2525, -0.1489, 0.5322]) tensor([0.5310, 0.1090, 0.1210, 0.2390]) -Greedy action tensor([ 0.3239, -0.4613, -0.7434, 0.2033]) tensor([0.3723, 0.1698, 0.1280, 0.3300]) -Greedy action tensor([-0.9394, 1.0303, -0.2801, -0.6909]) tensor([0.0878, 0.6297, 0.1698, 0.1126]) -Greedy action tensor([ 0.7944, -0.5990, 0.0887, -0.4654]) tensor([0.4937, 0.1225, 0.2438, 0.1401]) -Greedy action tensor([ 0.2350, -0.0108, -0.0016, -0.0225]) tensor([0.2990, 0.2339, 0.2360, 0.2311]) -Greedy action tensor([ 0.7747, -0.3923, -0.1817, -0.3464]) tensor([0.4947, 0.1540, 0.1901, 0.1612]) -Greedy action tensor([ 0.6743, -0.5060, -0.1923, -0.0819]) tensor([0.4552, 0.1398, 0.1913, 0.2137]) -Greedy action tensor([ 0.6549, -0.3507, -0.0827, -0.2244]) tensor([0.4426, 0.1619, 0.2117, 0.1837]) -Greedy action tensor([ 1.0136, -0.6647, 0.1009, -0.6497]) tensor([0.5625, 0.1050, 0.2258, 0.1066]) -Greedy action tensor([ 0.8873, -0.5042, -0.0968, -0.1917]) tensor([0.5096, 0.1267, 0.1905, 0.1732]) -Greedy action tensor([ 0.5850, -0.1636, -0.0810, -0.0493]) tensor([0.3973, 0.1879, 0.2041, 0.2107]) -Greedy action tensor([ 0.7847, -0.3671, -0.0865, -0.2163]) tensor([0.4757, 0.1504, 0.1991, 0.1748]) -Greedy action tensor([ 1.2438, -0.6754, -0.0491, -0.8332]) tensor([0.6466, 0.0949, 0.1775, 0.0810]) -Greedy action tensor([ 0.7281, -0.2510, -0.1173, -0.1967]) tensor([0.4542, 0.1706, 0.1950, 0.1801]) -Greedy action tensor([ 0.8668, -0.2617, -0.0597, -0.1348]) tensor([0.4792, 0.1550, 0.1897, 0.1760]) -Greedy action tensor([ 0.8878, -0.3822, -0.0560, -0.1360]) tensor([0.4928, 0.1384, 0.1918, 0.1770]) -Greedy action tensor([ 0.9753, -0.3193, -0.2295, -0.2817]) tensor([0.5381, 0.1475, 0.1613, 0.1531]) -Greedy action tensor([ 0.7094, -0.4946, -0.0680, -0.2828]) tensor([0.4694, 0.1408, 0.2157, 0.1740]) -Greedy action tensor([ 0.6116, -0.2902, 0.1025, -0.3110]) tensor([0.4159, 0.1688, 0.2500, 0.1653]) -Greedy action tensor([ 1.1449, -0.4528, -0.0479, -0.5740]) tensor([0.5935, 0.1201, 0.1800, 0.1064]) -Greedy action tensor([ 0.8057, -0.5274, -0.0180, -0.2248]) tensor([0.4856, 0.1280, 0.2131, 0.1733]) -Greedy action tensor([ 0.5508, 0.1966, -0.0784, 0.0467]) tensor([0.3523, 0.2472, 0.1878, 0.2128]) -Greedy action tensor([ 1.0042, -0.4904, 0.0561, -0.1816]) tensor([0.5216, 0.1170, 0.2021, 0.1593]) -Greedy action tensor([ 0.6983, -0.5528, -0.1249, -0.2007]) tensor([0.4690, 0.1342, 0.2059, 0.1909]) -Greedy action tensor([ 0.6199, -0.1370, -0.0799, -0.1809]) tensor([0.4141, 0.1943, 0.2057, 0.1859]) -Greedy action tensor([ 1.0218, -0.3016, -0.0130, -0.0967]) tensor([0.5133, 0.1366, 0.1824, 0.1677]) -Greedy action tensor([ 0.9653, -0.9540, 0.0089, -0.3649]) tensor([0.5570, 0.0817, 0.2140, 0.1473]) -Greedy action tensor([ 0.6635, -0.3112, -0.0105, -0.2948]) tensor([0.4404, 0.1662, 0.2245, 0.1689]) -Greedy action tensor([ 0.2005, -0.0033, -0.1291, -0.1420]) tensor([0.3082, 0.2514, 0.2217, 0.2188]) -Greedy action tensor([ 0.8411, -0.6502, 0.0742, -0.3454]) tensor([0.5013, 0.1128, 0.2328, 0.1530]) -Greedy action tensor([0.3913, 0.0484, 0.0781, 0.1207]) tensor([0.3121, 0.2215, 0.2282, 0.2381]) -Greedy action tensor([ 1.0396, -0.7908, -0.0551, -0.4796]) tensor([0.5835, 0.0936, 0.1953, 0.1277]) -Greedy action tensor([ 0.7729, -0.3596, 0.0088, -0.5587]) tensor([0.4873, 0.1570, 0.2270, 0.1287]) -Greedy action tensor([ 0.7677, -0.5258, -0.1666, -0.3395]) tensor([0.5006, 0.1373, 0.1967, 0.1654]) -Greedy action tensor([ 0.9414, -0.6015, 0.0838, -0.4734]) tensor([0.5316, 0.1137, 0.2255, 0.1292]) -Greedy action tensor([ 0.8916, -0.6050, -0.1787, -0.3413]) tensor([0.5381, 0.1205, 0.1845, 0.1568]) -Greedy action tensor([ 0.6720, -0.1514, 0.0016, -0.3076]) tensor([0.4299, 0.1887, 0.2199, 0.1614]) -Greedy action tensor([ 0.8480, -0.5379, -0.1103, -0.3279]) tensor([0.5149, 0.1288, 0.1975, 0.1589]) -Greedy action tensor([ 0.7113, -0.4107, -0.0641, -0.1208]) tensor([0.4502, 0.1466, 0.2073, 0.1959]) -Greedy action tensor([ 1.1521, -0.8440, -0.0515, -0.5688]) tensor([0.6193, 0.0841, 0.1858, 0.1108]) -Greedy action tensor([ 0.6409, -0.2468, -0.0124, -0.2918]) tensor([0.4300, 0.1770, 0.2238, 0.1692]) -Greedy action tensor([ 0.7729, -0.9546, 0.2874, -0.7670]) tensor([0.4981, 0.0885, 0.3065, 0.1068]) -Greedy action tensor([ 1.0030, -0.7203, -0.0593, -0.4234]) tensor([0.5668, 0.1012, 0.1959, 0.1361]) -Greedy action tensor([ 0.5037, -0.0914, -0.1113, -0.3125]) tensor([0.3946, 0.2176, 0.2133, 0.1745]) -Greedy action tensor([ 0.3427, -0.1463, -0.0374, -0.0222]) tensor([0.3343, 0.2050, 0.2286, 0.2321]) -Greedy action tensor([ 0.9339, -0.7400, 0.0424, -0.4772]) tensor([0.5430, 0.1018, 0.2227, 0.1324]) -Greedy action tensor([ 1.0522, -0.3741, -0.2053, -0.1656]) tensor([0.5493, 0.1319, 0.1562, 0.1625]) -Greedy action tensor([ 0.7773, -0.2242, -0.1297, -0.0909]) tensor([0.4565, 0.1677, 0.1843, 0.1916]) -Greedy action tensor([ 0.3711, -0.0836, -0.2044, -0.0475]) tensor([0.3503, 0.2223, 0.1970, 0.2305]) -Greedy action tensor([ 0.6474, -0.6543, -0.1119, -0.3509]) tensor([0.4743, 0.1290, 0.2219, 0.1748]) -Greedy action tensor([ 0.5827, -0.3331, -0.0730, -0.5312]) tensor([0.4449, 0.1781, 0.2309, 0.1461]) -Greedy action tensor([ 0.6949, -0.2642, -0.0298, -0.1051]) tensor([0.4316, 0.1654, 0.2091, 0.1939]) -Greedy action tensor([ 0.8855, -0.6590, -0.0337, -0.6004]) tensor([0.5439, 0.1161, 0.2169, 0.1231]) -Greedy action tensor([ 0.6614, -0.5340, -0.1544, -0.1938]) tensor([0.4608, 0.1394, 0.2038, 0.1959]) -Greedy action tensor([ 0.9114, -0.9553, 0.0379, -0.4213]) tensor([0.5447, 0.0842, 0.2274, 0.1437]) -Greedy action tensor([ 0.6781, 0.0142, 0.0898, -0.1384]) tensor([0.3981, 0.2049, 0.2210, 0.1759]) -Greedy action tensor([ 0.6286, -0.3465, -0.0495, -0.1170]) tensor([0.4239, 0.1599, 0.2151, 0.2011]) -Greedy action tensor([ 1.0190, -0.3716, -0.0505, -0.4124]) tensor([0.5461, 0.1359, 0.1874, 0.1305]) -Greedy action tensor([ 0.5146, -0.5874, -0.1814, -0.2546]) tensor([0.4359, 0.1448, 0.2173, 0.2020]) -Greedy action tensor([ 0.7712, -0.3587, -0.1393, -0.1303]) tensor([0.4692, 0.1516, 0.1888, 0.1905]) -Greedy action tensor([ 0.2846, 0.2900, -0.1203, 0.0828]) tensor([0.2865, 0.2881, 0.1911, 0.2342]) -Greedy action tensor([ 0.7826, -0.3580, -0.0508, -0.5224]) tensor([0.4937, 0.1578, 0.2146, 0.1339]) -Greedy action tensor([ 0.7307, -0.4161, -0.0447, -0.1218]) tensor([0.4536, 0.1441, 0.2089, 0.1934]) -Greedy action tensor([ 0.8890, -0.5800, -0.1871, -0.3634]) tensor([0.5385, 0.1239, 0.1836, 0.1539]) -Greedy action tensor([ 1.1852, -0.5369, -0.2703, -0.4648]) tensor([0.6234, 0.1114, 0.1454, 0.1197]) -Greedy action tensor([ 1.1601, -0.9725, 0.0925, -0.5364]) tensor([0.6077, 0.0720, 0.2089, 0.1114]) -Greedy action tensor([ 0.5722, -0.5179, 0.1519, -0.5909]) tensor([0.4337, 0.1458, 0.2849, 0.1356]) -Greedy action tensor([ 0.4932, -0.2844, -0.0167, -0.0461]) tensor([0.3783, 0.1738, 0.2272, 0.2206]) -Greedy action tensor([ 0.7565, -0.5168, -0.1141, -0.4516]) tensor([0.5006, 0.1401, 0.2096, 0.1496]) -Greedy action tensor([ 0.5026, 0.0276, -0.0017, -0.0856]) tensor([0.3596, 0.2236, 0.2171, 0.1997]) -Greedy action tensor([ 0.7976, -0.5731, -0.0398, -0.5625]) tensor([0.5146, 0.1307, 0.2227, 0.1321]) -Greedy action tensor([ 0.7083, -0.4612, -0.1417, -0.1832]) tensor([0.4656, 0.1446, 0.1990, 0.1909]) -Greedy action tensor([ 1.0018, -0.6610, 0.0184, -0.5074]) tensor([0.5603, 0.1062, 0.2096, 0.1239]) -Greedy action tensor([ 0.7217, -0.5455, 0.0586, -0.2443]) tensor([0.4592, 0.1293, 0.2366, 0.1748]) -Greedy action tensor([ 0.8029, -0.1092, -0.0257, -0.1447]) tensor([0.4492, 0.1804, 0.1962, 0.1742]) -Greedy action tensor([ 0.9138, -0.5183, 0.0118, -0.5453]) tensor([0.5328, 0.1272, 0.2162, 0.1238]) -Greedy action tensor([ 0.8641, -0.1929, -0.1182, -0.3909]) tensor([0.4982, 0.1731, 0.1866, 0.1420]) -Greedy action tensor([ 0.6672, -0.6226, 0.0242, -0.2730]) tensor([0.4563, 0.1256, 0.2399, 0.1782]) -Greedy action tensor([ 0.8798, -0.6810, -0.0210, -0.4376]) tensor([0.5308, 0.1114, 0.2156, 0.1422]) -Greedy action tensor([ 0.7214, -0.4392, -0.1099, -0.1848]) tensor([0.4645, 0.1455, 0.2023, 0.1877]) -Greedy action tensor([ 0.6206, -0.5788, -0.0173, -0.2843]) tensor([0.4476, 0.1349, 0.2365, 0.1811]) -Greedy action tensor([ 1.2095, -0.5590, -0.0429, -0.3259]) tensor([0.5982, 0.1020, 0.1710, 0.1288]) -Greedy action tensor([ 0.7600, -0.4197, -0.1927, -0.2838]) tensor([0.4890, 0.1503, 0.1886, 0.1722]) -Greedy action tensor([ 0.8998, -0.5466, 0.0548, -0.3559]) tensor([0.5129, 0.1207, 0.2203, 0.1461]) -Greedy action tensor([-1.8206, -0.4826, 0.5975, -0.1046]) tensor([0.0463, 0.1765, 0.5197, 0.2575]) -Greedy action tensor([-1.9223, -0.4605, 0.6806, -0.1525]) tensor([0.0405, 0.1747, 0.5470, 0.2378]) -Greedy action tensor([-1.9033, -0.4504, 0.6522, -0.1579]) tensor([0.0419, 0.1790, 0.5392, 0.2399]) -Greedy action tensor([-1.8602, -0.4757, 0.6923, -0.0484]) tensor([0.0417, 0.1667, 0.5360, 0.2556]) -Greedy action tensor([-1.9030, -0.4245, 0.6490, -0.1567]) tensor([0.0417, 0.1831, 0.5358, 0.2394]) -Greedy action tensor([-1.9337, -0.4374, 0.6629, -0.1742]) tensor([0.0405, 0.1808, 0.5434, 0.2353]) -Greedy action tensor([-1.1312, -0.3613, 0.1803, 0.0339]) tensor([0.0992, 0.2143, 0.3683, 0.3182]) -Greedy action tensor([-1.0577, 0.3808, 0.1108, -0.4739]) tensor([0.0978, 0.4122, 0.3146, 0.1754]) -Greedy action tensor([-1.8418, -0.4381, 0.6128, -0.1261]) tensor([0.0449, 0.1828, 0.5227, 0.2497]) -Greedy action tensor([-1.7262, -0.4763, 0.5831, -0.0398]) tensor([0.0501, 0.1749, 0.5044, 0.2706]) -Greedy action tensor([-1.9462, -0.4476, 0.6653, -0.1814]) tensor([0.0401, 0.1795, 0.5462, 0.2342]) -Greedy action tensor([-1.7947, -0.2669, 0.5489, -0.0755]) tensor([0.0463, 0.2133, 0.4822, 0.2583]) -Greedy action tensor([-1.6608, -0.5482, 0.5000, -0.0385]) tensor([0.0562, 0.1711, 0.4879, 0.2848]) -Greedy action tensor([-0.9323, 0.9103, 0.0846, 0.3985]) tensor([0.0721, 0.4554, 0.1994, 0.2730]) -Greedy action tensor([-1.7312, -0.0582, 0.5045, -0.0977]) tensor([0.0481, 0.2561, 0.4496, 0.2462]) -Greedy action tensor([-1.6419, -0.1080, 0.6272, 0.0835]) tensor([0.0478, 0.2216, 0.4622, 0.2684]) -Greedy action tensor([-1.9139, -0.4596, 0.7046, -0.1347]) tensor([0.0401, 0.1718, 0.5503, 0.2378]) -Greedy action tensor([-1.7761, -0.4954, 0.6313, 0.0139]) tensor([0.0461, 0.1659, 0.5119, 0.2761]) -Greedy action tensor([-1.8521, -0.4630, 0.6281, -0.1359]) tensor([0.0444, 0.1781, 0.5304, 0.2471]) -Greedy action tensor([-1.8602, -0.3916, 0.6573, -0.1334]) tensor([0.0428, 0.1859, 0.5306, 0.2407]) -Greedy action tensor([-0.7689, 1.0080, 0.0565, 0.4659]) tensor([0.0792, 0.4680, 0.1807, 0.2721]) -Greedy action tensor([-0.9394, 0.9197, 0.1176, 0.2074]) tensor([0.0744, 0.4774, 0.2141, 0.2342]) -Greedy action tensor([-1.6858, -0.2482, 0.5335, -0.0298]) tensor([0.0509, 0.2143, 0.4682, 0.2666]) -Greedy action tensor([-1.9440, -0.4475, 0.6666, -0.1796]) tensor([0.0401, 0.1793, 0.5462, 0.2343]) -Greedy action tensor([-1.7248, 0.0785, 0.4743, -0.0717]) tensor([0.0469, 0.2848, 0.4231, 0.2451]) -Greedy action tensor([-1.0154, 0.1693, -0.6043, -0.5405]) tensor([0.1354, 0.4427, 0.2042, 0.2177]) -Greedy action tensor([-1.9162, -0.4481, 0.6710, -0.1508]) tensor([0.0409, 0.1773, 0.5431, 0.2387]) -Greedy action tensor([-1.9038, -0.4419, 0.6516, -0.1577]) tensor([0.0418, 0.1803, 0.5383, 0.2396]) -Greedy action tensor([-1.1574, -0.1950, 0.3644, -0.1321]) tensor([0.0910, 0.2383, 0.4169, 0.2538]) -Greedy action tensor([-1.7832, -0.4918, 0.7802, 0.0924]) tensor([0.0414, 0.1507, 0.5376, 0.2703]) -Greedy action tensor([-1.9378, -0.4445, 0.6647, -0.1759]) tensor([0.0404, 0.1797, 0.5449, 0.2351]) -Greedy action tensor([-0.9422, -0.1226, 0.7649, 0.8103]) tensor([0.0687, 0.1560, 0.3789, 0.3964]) -Greedy action tensor([-1.7018, -0.2830, 0.6031, -0.0502]) tensor([0.0491, 0.2029, 0.4920, 0.2560]) -Greedy action tensor([-1.8720, -0.4524, 0.6275, -0.1472]) tensor([0.0436, 0.1804, 0.5312, 0.2448]) -Greedy action tensor([-0.7294, -0.1792, 0.4092, 0.8186]) tensor([0.0947, 0.1642, 0.2957, 0.4453]) -Greedy action tensor([-1.6931, -0.3592, 0.5278, -0.0798]) tensor([0.0525, 0.1995, 0.4842, 0.2638]) -Greedy action tensor([-1.8648, -0.4391, 0.6236, -0.1389]) tensor([0.0438, 0.1823, 0.5277, 0.2462]) -Greedy action tensor([-1.7126, -0.4040, 0.7296, 0.1543]) tensor([0.0441, 0.1633, 0.5073, 0.2854]) -Greedy action tensor([-1.8461, -0.1958, 0.5959, -0.1180]) tensor([0.0429, 0.2232, 0.4927, 0.2413]) -Greedy action tensor([-1.7383, -0.4249, 0.6451, 0.1560]) tensor([0.0450, 0.1674, 0.4882, 0.2993]) -Greedy action tensor([-1.0577, -0.4761, 0.3202, -0.0626]) tensor([0.1057, 0.1891, 0.4193, 0.2859]) -Greedy action tensor([-1.7938, -0.3847, 0.6507, 0.0485]) tensor([0.0436, 0.1785, 0.5026, 0.2753]) -Greedy action tensor([-1.5995, -0.5143, 0.5189, -0.0831]) tensor([0.0594, 0.1758, 0.4941, 0.2706]) -Greedy action tensor([-1.7594, 0.0907, 0.5008, -0.0603]) tensor([0.0446, 0.2838, 0.4276, 0.2440]) -Greedy action tensor([-1.9282, -0.4222, 0.6590, -0.1715]) tensor([0.0407, 0.1833, 0.5405, 0.2356]) -Greedy action tensor([-1.9232, -0.4311, 0.6523, -0.1675]) tensor([0.0410, 0.1824, 0.5391, 0.2375]) -Greedy action tensor([-1.8362, -0.4556, 0.6159, -0.1274]) tensor([0.0452, 0.1799, 0.5252, 0.2497]) -Greedy action tensor([-1.9257, -0.4141, 0.6506, -0.1694]) tensor([0.0409, 0.1853, 0.5373, 0.2366]) -Greedy action tensor([-1.9091, -0.4485, 0.6534, -0.1630]) tensor([0.0416, 0.1794, 0.5401, 0.2388]) -Greedy action tensor([-1.8334, -0.3991, 0.5925, -0.1163]) tensor([0.0453, 0.1901, 0.5124, 0.2522]) -Greedy action tensor([-1.7615, -0.4795, 0.5758, -0.1151]) tensor([0.0496, 0.1789, 0.5139, 0.2576]) -Greedy action tensor([-1.9070, -0.4133, 0.6482, -0.1604]) tensor([0.0416, 0.1851, 0.5350, 0.2383]) -Greedy action tensor([-1.9004, -0.3929, 0.6444, -0.1524]) tensor([0.0417, 0.1881, 0.5309, 0.2393]) -Greedy action tensor([-1.1649, -0.2602, 0.3886, -0.1677]) tensor([0.0917, 0.2265, 0.4334, 0.2485]) -Greedy action tensor([-1.8734, -0.3519, 0.6143, -0.1606]) tensor([0.0432, 0.1977, 0.5197, 0.2394]) -Greedy action tensor([ 0.7390, 1.2425, -0.0279, 0.4230]) tensor([0.2599, 0.4300, 0.1207, 0.1895]) -Greedy action tensor([-1.9069, -0.4347, 0.6462, -0.1626]) tensor([0.0418, 0.1822, 0.5369, 0.2391]) -Greedy action tensor([-0.7613, 0.8260, 0.1324, -0.0337]) tensor([0.0961, 0.4700, 0.2349, 0.1990]) -Greedy action tensor([-1.2496, -0.1170, 0.5737, 0.1204]) tensor([0.0703, 0.2181, 0.4351, 0.2765]) -Greedy action tensor([-1.6545, -0.1207, 0.5165, 0.0413]) tensor([0.0504, 0.2335, 0.4416, 0.2746]) -Greedy action tensor([-1.7591, -0.2108, 0.5995, -0.0956]) tensor([0.0464, 0.2182, 0.4906, 0.2448]) -Greedy action tensor([-1.9429, -0.4459, 0.6666, -0.1786]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-1.1963, -0.2678, 0.5014, 0.3690]) tensor([0.0726, 0.1837, 0.3964, 0.3473]) -Greedy action tensor([-1.0995, 0.4831, 0.0986, -0.0183]) tensor([0.0824, 0.4013, 0.2732, 0.2431]) -Greedy action tensor([-1.7285, -0.3472, 0.6313, -0.0057]) tensor([0.0472, 0.1880, 0.5002, 0.2645]) -Greedy action tensor([-0.6761, -0.4420, 0.1244, 0.7599]) tensor([0.1150, 0.1454, 0.2561, 0.4835]) -Greedy action tensor([-1.8671, -0.4926, 0.6449, -0.1423]) tensor([0.0437, 0.1727, 0.5386, 0.2451]) -Greedy action tensor([-0.7729, 0.1919, 0.0642, -0.0346]) tensor([0.1246, 0.3270, 0.2878, 0.2607]) -Greedy action tensor([-1.8313, -0.3673, 0.5994, -0.1436]) tensor([0.0453, 0.1956, 0.5144, 0.2447]) -Greedy action tensor([-1.3976, -0.2130, 0.6413, -0.6417]) tensor([0.0710, 0.2322, 0.5456, 0.1512]) -Greedy action tensor([-1.9317, -0.3875, 0.6506, -0.1701]) tensor([0.0404, 0.1894, 0.5348, 0.2354]) -Greedy action tensor([-1.9406, -0.4451, 0.6660, -0.1784]) tensor([0.0403, 0.1796, 0.5456, 0.2345]) -Greedy action tensor([-1.8978, -0.4535, 0.6478, -0.1573]) tensor([0.0422, 0.1789, 0.5382, 0.2406]) -Greedy action tensor([-1.2970, -0.0936, 0.0248, -0.3745]) tensor([0.0944, 0.3144, 0.3539, 0.2374]) -Greedy action tensor([-1.2296, 0.0744, 0.4468, 0.1277]) tensor([0.0719, 0.2647, 0.3842, 0.2792]) -Greedy action tensor([-1.8908, -0.4267, 0.6634, -0.1453]) tensor([0.0418, 0.1808, 0.5378, 0.2396]) -Greedy action tensor([-1.8765, -0.4572, 0.6411, -0.1460]) tensor([0.0431, 0.1784, 0.5350, 0.2435]) -Greedy action tensor([-1.8914, -0.3722, 0.6230, -0.1498]) tensor([0.0423, 0.1933, 0.5229, 0.2415]) -Greedy action tensor([-1.8560, -0.3622, 0.6191, -0.1835]) tensor([0.0441, 0.1965, 0.5243, 0.2350]) -Greedy action tensor([-1.4242, 0.6094, 0.4555, -0.3351]) tensor([0.0551, 0.4207, 0.3607, 0.1636]) -Greedy action tensor([-1.8154, -0.4308, 0.5786, -0.1941]) tensor([0.0476, 0.1901, 0.5215, 0.2408]) -Greedy action tensor([ 1.5869, -0.7118, -0.7264, 0.2743]) tensor([0.6810, 0.0684, 0.0674, 0.1833]) -Greedy action tensor([ 1.0460, -0.5206, -0.5747, 0.2936]) tensor([0.5326, 0.1112, 0.1053, 0.2510]) -Greedy action tensor([ 2.2478, -0.5164, -0.5182, 0.2457]) tensor([0.7930, 0.0500, 0.0499, 0.1071]) -Greedy action tensor([ 0.8449, -0.5469, 0.1709, 0.0709]) tensor([0.4506, 0.1120, 0.2296, 0.2078]) -Greedy action tensor([ 2.2633, -1.6219, -0.2081, 0.2703]) tensor([0.8056, 0.0166, 0.0680, 0.1098]) -Greedy action tensor([ 1.7469, -0.7726, -0.0299, 0.3988]) tensor([0.6625, 0.0533, 0.1121, 0.1721]) -Greedy action tensor([ 1.5961, -0.4931, -0.5922, 0.2586]) tensor([0.6674, 0.0826, 0.0748, 0.1752]) -Greedy action tensor([ 1.5047, -0.7192, -0.1820, 0.4474]) tensor([0.6095, 0.0659, 0.1128, 0.2117]) -Greedy action tensor([ 1.5967, -0.1995, -0.4363, 0.3931]) tensor([0.6262, 0.1039, 0.0820, 0.1879]) -Greedy action tensor([ 2.4110, -0.9130, -0.2331, 0.3982]) tensor([0.8060, 0.0290, 0.0573, 0.1077]) -Greedy action tensor([ 1.6987, -0.5414, -0.4134, 0.5281]) tensor([0.6504, 0.0692, 0.0787, 0.2017]) -Greedy action tensor([ 1.3040, 0.3385, -0.6545, 0.0989]) tensor([0.5490, 0.2090, 0.0774, 0.1645]) -Greedy action tensor([ 1.1764, -0.7437, -0.3407, 0.5331]) tensor([0.5287, 0.0775, 0.1160, 0.2778]) -Greedy action tensor([ 1.6802, -0.4920, -0.0644, 0.5201]) tensor([0.6242, 0.0711, 0.1090, 0.1956]) -Greedy action tensor([ 1.6923, -0.8116, -0.2524, 0.3778]) tensor([0.6696, 0.0548, 0.0958, 0.1799]) -Greedy action tensor([ 1.4773, -0.1801, -0.1873, 0.5026]) tensor([0.5691, 0.1085, 0.1077, 0.2147]) -Greedy action tensor([ 1.6839, 0.3553, -0.1568, -0.0698]) tensor([0.6263, 0.1659, 0.0994, 0.1084]) -Greedy action tensor([ 1.4492, -0.2151, -0.2458, 0.1069]) tensor([0.6119, 0.1159, 0.1124, 0.1599]) -Greedy action tensor([ 1.2422, -0.4172, -0.6330, 0.0101]) tensor([0.6115, 0.1163, 0.0938, 0.1784]) -Greedy action tensor([ 1.2258, -0.7169, -0.2114, 0.7509]) tensor([0.4993, 0.0716, 0.1186, 0.3105]) -Greedy action tensor([ 1.8437, -0.4116, -0.8253, 0.6369]) tensor([0.6787, 0.0712, 0.0471, 0.2031]) -Greedy action tensor([ 1.1725, -0.4172, 0.0302, 0.1379]) tensor([0.5323, 0.1086, 0.1699, 0.1892]) -Greedy action tensor([ 1.8142, -0.7574, -0.4489, 0.4486]) tensor([0.6965, 0.0532, 0.0725, 0.1778]) -Greedy action tensor([ 1.1610, -0.2235, -0.5052, 0.8401]) tensor([0.4619, 0.1157, 0.0873, 0.3351]) -Greedy action tensor([ 0.9304, 0.3728, -0.6296, 0.9415]) tensor([0.3579, 0.2049, 0.0752, 0.3619]) -Greedy action tensor([ 2.3384, -0.8006, -0.0731, 0.6817]) tensor([0.7554, 0.0327, 0.0677, 0.1441]) -Greedy action tensor([ 1.5687, -0.6862, -0.3934, 0.5671]) tensor([0.6201, 0.0650, 0.0872, 0.2277]) -Greedy action tensor([ 1.5545, -0.4114, -0.4724, 0.2071]) tensor([0.6529, 0.0914, 0.0860, 0.1697]) -Greedy action tensor([ 1.7124, 0.2554, -0.5829, 0.6703]) tensor([0.5930, 0.1381, 0.0597, 0.2092]) -Greedy action tensor([ 1.7213, -0.7426, -0.3862, 0.3151]) tensor([0.6888, 0.0586, 0.0837, 0.1688]) -Greedy action tensor([ 2.0398, -0.9171, -0.1900, 0.7783]) tensor([0.6931, 0.0360, 0.0745, 0.1963]) -Greedy action tensor([ 2.2690, -0.7292, -0.4241, 0.1733]) tensor([0.8061, 0.0402, 0.0546, 0.0991]) -Greedy action tensor([ 2.1992, -1.2506, -0.1529, 0.8133]) tensor([0.7262, 0.0231, 0.0691, 0.1816]) -Greedy action tensor([ 1.5955, -0.4071, 0.1501, 0.5898]) tensor([0.5759, 0.0777, 0.1357, 0.2107]) -Greedy action tensor([ 1.7756, -0.8919, -0.4683, 0.6249]) tensor([0.6703, 0.0465, 0.0711, 0.2121]) -Greedy action tensor([ 1.1834, -0.2095, -0.6255, 0.3166]) tensor([0.5457, 0.1355, 0.0894, 0.2294]) -Greedy action tensor([ 1.4510, -0.5790, -0.3814, 0.3898]) tensor([0.6107, 0.0802, 0.0977, 0.2113]) -Greedy action tensor([ 1.7152, -0.3054, -0.7484, 0.3116]) tensor([0.6833, 0.0906, 0.0582, 0.1679]) -Greedy action tensor([ 2.0558, -0.4863, -0.4486, 0.4124]) tensor([0.7387, 0.0581, 0.0604, 0.1428]) -Greedy action tensor([ 1.6122, -0.6169, -0.3099, 0.1378]) tensor([0.6744, 0.0726, 0.0987, 0.1544]) -Greedy action tensor([ 1.8154, -0.6192, -0.3642, 0.5758]) tensor([0.6710, 0.0588, 0.0759, 0.1943]) -Greedy action tensor([ 1.3212, -0.5587, -0.1481, 0.3318]) tensor([0.5700, 0.0870, 0.1311, 0.2119]) -Greedy action tensor([ 0.9152, -0.7025, 0.2461, -0.0187]) tensor([0.4754, 0.0943, 0.2435, 0.1868]) -Greedy action tensor([ 1.7841, -0.0413, -0.7610, 0.1133]) tensor([0.7004, 0.1129, 0.0550, 0.1318]) -Greedy action tensor([ 1.6676, -0.1882, -0.3470, 0.3126]) tensor([0.6461, 0.1010, 0.0862, 0.1667]) -Greedy action tensor([ 1.4058, -0.1246, -0.5621, 0.0997]) tensor([0.6146, 0.1330, 0.0859, 0.1665]) -Greedy action tensor([ 2.3146, -1.0000, -0.3580, 0.4519]) tensor([0.7932, 0.0288, 0.0548, 0.1231]) -Greedy action tensor([ 1.6594, 0.4645, -0.4863, 0.3219]) tensor([0.5945, 0.1800, 0.0695, 0.1560]) -Greedy action tensor([ 1.9828, -0.8700, 0.4044, 0.4460]) tensor([0.6761, 0.0390, 0.1395, 0.1454]) -Greedy action tensor([ 1.4180, -0.9631, -0.3296, 0.7381]) tensor([0.5639, 0.0521, 0.0982, 0.2857]) -Greedy action tensor([ 2.0950, -0.5533, 0.1221, 0.1515]) tensor([0.7391, 0.0523, 0.1028, 0.1058]) -Greedy action tensor([ 1.1598, -0.1364, -0.8526, 0.3963]) tensor([0.5338, 0.1460, 0.0714, 0.2488]) -Greedy action tensor([ 1.7827, -0.5478, -0.5478, 0.7774]) tensor([0.6408, 0.0623, 0.0623, 0.2345]) -Greedy action tensor([ 2.7667, -1.7009, 0.0414, 0.6807]) tensor([0.8325, 0.0096, 0.0546, 0.1034]) -Greedy action tensor([ 1.6004, -0.7142, -0.3251, 0.3475]) tensor([0.6535, 0.0646, 0.0953, 0.1867]) -Greedy action tensor([ 0.9288, -0.2647, -0.0047, 0.2175]) tensor([0.4572, 0.1386, 0.1797, 0.2245]) -Greedy action tensor([ 2.2870, -0.8907, -0.2255, 0.6982]) tensor([0.7536, 0.0314, 0.0611, 0.1539]) -Greedy action tensor([ 1.2038, -0.6301, -0.3232, 0.1287]) tensor([0.5820, 0.0930, 0.1264, 0.1986]) -Greedy action tensor([ 1.3216, -0.9658, -0.4089, 1.0203]) tensor([0.4954, 0.0503, 0.0878, 0.3665]) -Greedy action tensor([ 1.9070, -0.8926, -0.1317, 0.3699]) tensor([0.7112, 0.0433, 0.0926, 0.1529]) -Greedy action tensor([ 2.0786, -0.2818, -0.3329, 0.2183]) tensor([0.7464, 0.0705, 0.0669, 0.1162]) -Greedy action tensor([ 1.3044, -0.2410, -0.5471, 0.2753]) tensor([0.5788, 0.1234, 0.0909, 0.2069]) -Greedy action tensor([ 1.9381, -1.4419, -0.2339, 0.8676]) tensor([0.6708, 0.0228, 0.0764, 0.2300]) -Greedy action tensor([ 1.8037, -0.3809, -0.5630, 0.3009]) tensor([0.6999, 0.0788, 0.0656, 0.1557]) -Greedy action tensor([ 1.5994, -0.5226, -0.4863, 0.6975]) tensor([0.6061, 0.0726, 0.0753, 0.2460]) -Greedy action tensor([ 1.7244, -0.4763, -0.9156, 0.0342]) tensor([0.7318, 0.0810, 0.0522, 0.1350]) -Greedy action tensor([ 1.4156, -0.6055, -0.8089, 0.3967]) tensor([0.6244, 0.0827, 0.0675, 0.2254]) -Greedy action tensor([ 2.0301, -1.0305, -0.3567, 0.3761]) tensor([0.7518, 0.0352, 0.0691, 0.1438]) -Greedy action tensor([ 2.0892, -1.0954, -0.4670, 0.3255]) tensor([0.7750, 0.0321, 0.0601, 0.1328]) -Greedy action tensor([ 1.3013, -0.0297, -0.5443, 0.2765]) tensor([0.5615, 0.1484, 0.0887, 0.2015]) -Greedy action tensor([ 1.2677, -0.4905, -0.0065, 0.2114]) tensor([0.5556, 0.0958, 0.1554, 0.1932]) -Greedy action tensor([ 1.1310, -0.3047, -0.1475, 0.3670]) tensor([0.5045, 0.1200, 0.1405, 0.2350]) -Greedy action tensor([ 1.2998, -0.3308, -0.7225, 0.3789]) tensor([0.5793, 0.1134, 0.0767, 0.2306]) -Greedy action tensor([ 0.9085, -0.2973, -0.1381, 0.2222]) tensor([0.4642, 0.1390, 0.1630, 0.2337]) -Greedy action tensor([ 0.9656, -0.5267, -0.5829, 0.3947]) tensor([0.4994, 0.1123, 0.1062, 0.2822]) -Greedy action tensor([ 1.2779, -0.0493, -0.9484, 0.4344]) tensor([0.5545, 0.1471, 0.0598, 0.2386]) -Greedy action tensor([ 1.5782, -0.0822, -0.2917, 0.1665]) tensor([0.6297, 0.1197, 0.0971, 0.1535]) -Greedy action tensor([ 2.1240, -0.8860, -0.2047, 0.8910]) tensor([0.6953, 0.0343, 0.0677, 0.2026]) -Greedy action tensor([ 1.8468, -0.7517, -0.5241, 0.3173]) tensor([0.7223, 0.0537, 0.0675, 0.1565]) -Greedy action tensor([ 1.1396, -0.1217, -0.6095, 0.1567]) tensor([0.5460, 0.1547, 0.0950, 0.2043]) -Greedy action tensor([ 1.2136, -0.0458, -0.7198, 0.2265]) tensor([0.5552, 0.1576, 0.0803, 0.2069]) -Greedy action tensor([-0.9302, -0.0061, -0.8346, -0.0904]) tensor([0.1442, 0.3633, 0.1586, 0.3339]) -Greedy action tensor([-0.0267, 0.0391, 0.6387, -0.7368]) tensor([0.2220, 0.2371, 0.4318, 0.1091]) -Greedy action tensor([ 1.0413, 0.2034, 1.0418, -0.0584]) tensor([0.3615, 0.1564, 0.3617, 0.1204]) -Greedy action tensor([-1.7549, -0.5864, 1.1809, -1.4903]) tensor([0.0411, 0.1321, 0.7734, 0.0535]) -Greedy action tensor([ 1.0400, -0.4548, 0.6053, 0.4177]) tensor([0.4152, 0.0931, 0.2688, 0.2228]) -Greedy action tensor([-0.4560, -2.0210, -0.3247, -0.5529]) tensor([0.3070, 0.0642, 0.3501, 0.2787]) -Greedy action tensor([ 0.7468, -1.1848, 0.4400, 0.7812]) tensor([0.3430, 0.0497, 0.2524, 0.3550]) -Greedy action tensor([-0.9583, 0.1204, -0.1070, -1.2133]) tensor([0.1417, 0.4166, 0.3319, 0.1098]) -Greedy action tensor([0.9805, 0.9359, 0.7383, 0.8405]) tensor([0.2770, 0.2649, 0.2174, 0.2408]) -Greedy action tensor([-1.5283, -0.1293, -0.0909, -0.1374]) tensor([0.0753, 0.3051, 0.3170, 0.3026]) -Greedy action tensor([-0.2832, -0.4960, 0.6789, -0.6960]) tensor([0.1966, 0.1589, 0.5144, 0.1301]) -Greedy action tensor([ 0.8871, -0.4925, 0.2950, 0.8350]) tensor([0.3631, 0.0914, 0.2008, 0.3447]) -Greedy action tensor([ 0.0296, -1.7469, 0.3001, 0.1870]) tensor([0.2739, 0.0464, 0.3591, 0.3206]) -Greedy action tensor([-0.9886, -0.9079, -0.7223, -0.5941]) tensor([0.2052, 0.2225, 0.2678, 0.3045]) -Greedy action tensor([-0.5638, -1.5185, -0.1871, 0.3272]) tensor([0.1894, 0.0729, 0.2760, 0.4617]) -Greedy action tensor([ 1.4041, -1.2990, 1.2486, 1.1863]) tensor([0.3667, 0.0246, 0.3139, 0.2949]) -Greedy action tensor([ 1.1000, 0.0453, 0.1243, -0.0089]) tensor([0.4866, 0.1695, 0.1834, 0.1605]) -Greedy action tensor([-1.2916, -1.2233, 0.5030, -0.8440]) tensor([0.1036, 0.1109, 0.6234, 0.1621]) -Greedy action tensor([ 1.7002, -0.7840, 0.9589, 1.1261]) tensor([0.4710, 0.0393, 0.2244, 0.2653]) -Greedy action tensor([-0.6448, 1.1244, -0.3345, -1.6879]) tensor([0.1165, 0.6835, 0.1589, 0.0411]) -Greedy action tensor([ 0.9792, -0.6018, 0.8457, 0.0945]) tensor([0.4010, 0.0825, 0.3509, 0.1656]) -Greedy action tensor([-0.0866, -0.8608, 0.4213, -0.6366]) tensor([0.2703, 0.1246, 0.4492, 0.1559]) -Greedy action tensor([-0.7021, 0.1876, -0.6330, -1.3393]) tensor([0.1986, 0.4835, 0.2128, 0.1050]) -Greedy action tensor([-0.1463, -0.2423, -0.1556, 0.0831]) tensor([0.2406, 0.2185, 0.2383, 0.3026]) -Greedy action tensor([-0.7239, -1.4783, -0.4087, -0.7857]) tensor([0.2645, 0.1244, 0.3625, 0.2486]) -Greedy action tensor([ 0.2211, 0.1033, -0.9388, 0.7258]) tensor([0.2591, 0.2303, 0.0812, 0.4293]) -Greedy action tensor([-0.9345, -1.1160, 0.7501, -0.0565]) tensor([0.1038, 0.0866, 0.5597, 0.2498]) -Greedy action tensor([ 0.4595, 0.6212, -0.2775, 0.0069]) tensor([0.3039, 0.3573, 0.1455, 0.1933]) -Greedy action tensor([ 0.3517, 0.1794, -0.4175, -0.1221]) tensor([0.3416, 0.2875, 0.1583, 0.2127]) -Greedy action tensor([ 0.3680, -1.5028, -0.0465, -0.1079]) tensor([0.4105, 0.0632, 0.2712, 0.2551]) -Greedy action tensor([ 0.2780, -1.9543, 0.4704, 0.1079]) tensor([0.3161, 0.0339, 0.3832, 0.2667]) -Greedy action tensor([ 0.5763, -1.1627, -0.1499, 0.4633]) tensor([0.3917, 0.0688, 0.1895, 0.3499]) -Greedy action tensor([-0.2837, -1.4772, -0.1514, -0.1697]) tensor([0.2805, 0.0850, 0.3201, 0.3144]) -Greedy action tensor([-1.7278, 0.8985, -0.4336, 0.1465]) tensor([0.0400, 0.5532, 0.1460, 0.2608]) -Greedy action tensor([-0.7123, -0.6732, 0.0531, 0.0377]) tensor([0.1586, 0.1649, 0.3409, 0.3357]) -Greedy action tensor([ 0.1555, 0.8375, 0.4868, -0.5407]) tensor([0.2054, 0.4062, 0.2861, 0.1024]) -Greedy action tensor([ 0.6107, -0.8227, 0.2982, 0.3360]) tensor([0.3663, 0.0874, 0.2680, 0.2783]) -Greedy action tensor([ 0.4611, -0.4310, -0.0765, -0.4675]) tensor([0.4186, 0.1715, 0.2445, 0.1654]) -Greedy action tensor([-0.2351, -0.5158, -0.4320, -1.0613]) tensor([0.3318, 0.2506, 0.2725, 0.1452]) -Greedy action tensor([ 0.4822, -1.6877, -1.1269, 0.7021]) tensor([0.3906, 0.0446, 0.0781, 0.4867]) -Greedy action tensor([-0.6921, -0.6256, 0.6924, -1.2725]) tensor([0.1510, 0.1614, 0.6030, 0.0845]) -Greedy action tensor([ 0.0598, -0.7801, -0.5394, -0.3796]) tensor([0.3809, 0.1645, 0.2092, 0.2454]) -Greedy action tensor([ 0.4764, -1.8764, 0.0389, 1.0022]) tensor([0.2913, 0.0277, 0.1881, 0.4929]) -Greedy action tensor([-1.4915, -0.3142, -0.9786, 0.0354]) tensor([0.0951, 0.3085, 0.1588, 0.4376]) -Greedy action tensor([ 0.5614, -1.3590, -0.5069, -0.2114]) tensor([0.5123, 0.0751, 0.1760, 0.2366]) -Greedy action tensor([-0.6338, -1.2267, 0.5758, -0.6863]) tensor([0.1708, 0.0944, 0.5727, 0.1621]) -Greedy action tensor([ 1.1508, -1.1871, 1.3372, -0.1517]) tensor([0.3886, 0.0375, 0.4682, 0.1056]) -Greedy action tensor([ 0.0191, -0.0175, 0.6457, -0.2223]) tensor([0.2164, 0.2086, 0.4050, 0.1700]) -Greedy action tensor([ 0.1085, -0.0036, 0.1511, -0.3249]) tensor([0.2789, 0.2493, 0.2910, 0.1808]) -Greedy action tensor([ 0.2300, -0.2897, 0.4525, 0.0582]) tensor([0.2713, 0.1613, 0.3389, 0.2285]) -Greedy action tensor([-1.2038, -1.5846, 0.1009, -0.1453]) tensor([0.1212, 0.0828, 0.4468, 0.3493]) -Greedy action tensor([-0.2871, -1.1405, 0.7687, -0.5602]) tensor([0.1976, 0.0842, 0.5679, 0.1504]) -Greedy action tensor([-0.4032, 0.2240, 0.7850, -1.1112]) tensor([0.1505, 0.2817, 0.4937, 0.0741]) -Greedy action tensor([-0.3387, 0.4015, -1.4549, -0.0220]) tensor([0.2085, 0.4371, 0.0683, 0.2862]) -Greedy action tensor([ 0.1895, 0.8678, 0.0022, -0.9734]) tensor([0.2432, 0.4792, 0.2016, 0.0760]) -Greedy action tensor([-0.0262, -1.7075, -0.6541, 0.7679]) tensor([0.2543, 0.0473, 0.1357, 0.5626]) -Greedy action tensor([-0.9496, -0.2342, -1.0158, -0.3053]) tensor([0.1699, 0.3475, 0.1590, 0.3236]) -Greedy action tensor([-0.6057, -1.1674, 0.5647, -1.0160]) tensor([0.1833, 0.1045, 0.5907, 0.1216]) -Greedy action tensor([ 0.7620, -0.0183, 0.6834, 0.4703]) tensor([0.3195, 0.1464, 0.2954, 0.2387]) -Greedy action tensor([-0.0570, -0.3974, -0.5181, 0.8149]) tensor([0.2113, 0.1503, 0.1332, 0.5052]) -Greedy action tensor([ 0.1471, -1.7682, -0.2787, 0.9526]) tensor([0.2476, 0.0365, 0.1618, 0.5541]) -Greedy action tensor([ 0.1310, -0.8405, -0.2753, -0.1152]) tensor([0.3538, 0.1339, 0.2357, 0.2766]) -Greedy action tensor([-0.4740, -1.6720, 0.2757, -0.7504]) tensor([0.2394, 0.0723, 0.5067, 0.1816]) -Greedy action tensor([ 0.5995, -0.1706, -0.1669, 0.4266]) tensor([0.3612, 0.1672, 0.1678, 0.3038]) -Greedy action tensor([ 0.1778, 0.2612, -0.4445, -0.3357]) tensor([0.3104, 0.3374, 0.1666, 0.1857]) -Greedy action tensor([ 0.1048, -1.1420, 1.1347, -0.6649]) tensor([0.2197, 0.0631, 0.6154, 0.1018]) -Greedy action tensor([-0.6626, -0.2305, 0.2854, -0.6416]) tensor([0.1628, 0.2508, 0.4201, 0.1663]) -Greedy action tensor([-0.1966, -1.2942, 0.7156, -0.6919]) tensor([0.2256, 0.0753, 0.5617, 0.1375]) -Greedy action tensor([ 0.5668, -2.0207, 0.1862, 0.5594]) tensor([0.3635, 0.0273, 0.2484, 0.3608]) -Greedy action tensor([ 1.2404, -0.7812, -0.3624, -0.3949]) tensor([0.6542, 0.0866, 0.1317, 0.1275]) -Greedy action tensor([ 0.0189, -1.1885, 0.2703, 0.5578]) tensor([0.2326, 0.0695, 0.2991, 0.3987]) -Greedy action tensor([1.2531, 0.0663, 0.4142, 0.6634]) tensor([0.4363, 0.1332, 0.1886, 0.2419]) -Greedy action tensor([-0.3562, -2.4176, 0.5607, 0.3660]) tensor([0.1758, 0.0224, 0.4398, 0.3620]) -Greedy action tensor([-1.3855, -0.8851, 0.2783, -0.4655]) tensor([0.0958, 0.1580, 0.5058, 0.2404]) -Greedy action tensor([-0.5835, 0.0188, -0.2941, -0.1754]) tensor([0.1765, 0.3223, 0.2357, 0.2654]) -Greedy action tensor([-0.1242, -1.2091, 0.4913, -0.2452]) tensor([0.2454, 0.0829, 0.4542, 0.2175]) -Greedy action tensor([ 0.5571, -0.9767, -0.1323, -0.3301]) tensor([0.4696, 0.1013, 0.2357, 0.1934]) -Greedy action tensor([-0.2102, -0.3104, 0.4630, 0.7678]) tensor([0.1533, 0.1387, 0.3005, 0.4076]) -Greedy action tensor([-0.4678, -0.4076, -0.2723, 0.4777]) tensor([0.1709, 0.1815, 0.2078, 0.4399]) -Greedy action tensor([-0.9657, -0.9940, -0.0840, -0.6740]) tensor([0.1746, 0.1698, 0.4218, 0.2338]) -Greedy action tensor([-0.0103, 0.0997, -0.3348, 0.2770]) tensor([0.2397, 0.2676, 0.1733, 0.3195]) -Greedy action tensor([ 0.3436, -0.7385, 0.7485, -0.3552]) tensor([0.2998, 0.1016, 0.4495, 0.1491]) -Greedy action tensor([ 0.5868, -0.3443, -0.1308, -0.2207]) tensor([0.4296, 0.1693, 0.2096, 0.1916]) -Greedy action tensor([ 0.8742, -0.4121, 0.0915, -0.4487]) tensor([0.5000, 0.1382, 0.2286, 0.1332]) -Greedy action tensor([ 0.8041, -0.5588, -0.0519, -0.6707]) tensor([0.5237, 0.1340, 0.2225, 0.1198]) -Greedy action tensor([ 1.2185, -0.4158, -0.1930, -0.1850]) tensor([0.5936, 0.1158, 0.1447, 0.1459]) -Greedy action tensor([ 0.8473, -0.4985, -0.0300, -0.4605]) tensor([0.5137, 0.1337, 0.2136, 0.1389]) -Greedy action tensor([ 0.5755, -0.0981, -0.0822, -0.0090]) tensor([0.3868, 0.1972, 0.2004, 0.2156]) -Greedy action tensor([ 0.6869, -0.4505, 0.0450, -0.1512]) tensor([0.4387, 0.1407, 0.2309, 0.1897]) -Greedy action tensor([ 1.1878, -0.5553, -0.0723, -0.4200]) tensor([0.6028, 0.1055, 0.1710, 0.1208]) -Greedy action tensor([ 0.3011, 0.1857, -0.0908, -0.0354]) tensor([0.3048, 0.2716, 0.2060, 0.2177]) -Greedy action tensor([ 0.5509, -0.2839, 0.0278, -0.3204]) tensor([0.4090, 0.1775, 0.2424, 0.1711]) -Greedy action tensor([ 0.9144, -0.4837, 0.0227, -0.3337]) tensor([0.5144, 0.1271, 0.2109, 0.1477]) -Greedy action tensor([ 1.0233, -0.3452, -0.0248, -0.1249]) tensor([0.5202, 0.1324, 0.1824, 0.1650]) -Greedy action tensor([-0.0093, 0.2199, -0.0286, -0.2643]) tensor([0.2492, 0.3133, 0.2444, 0.1931]) -Greedy action tensor([ 0.5669, -0.0932, -0.0559, -0.0117]) tensor([0.3826, 0.1977, 0.2052, 0.2145]) -Greedy action tensor([ 0.7972, -0.7662, -0.0188, -0.3643]) tensor([0.5090, 0.1066, 0.2251, 0.1593]) -Greedy action tensor([ 0.4639, 0.1871, -0.1696, -0.4112]) tensor([0.3696, 0.2802, 0.1961, 0.1540]) -Greedy action tensor([ 0.8115, -0.2734, 0.1936, -0.1605]) tensor([0.4434, 0.1498, 0.2390, 0.1678]) -Greedy action tensor([ 0.8091, -1.1233, 0.0844, -0.4506]) tensor([0.5227, 0.0757, 0.2532, 0.1483]) -Greedy action tensor([ 1.0142, -0.4333, 0.2650, -0.5562]) tensor([0.5220, 0.1227, 0.2468, 0.1085]) -Greedy action tensor([ 0.6836, 0.2947, -0.2045, 0.2899]) tensor([0.3618, 0.2452, 0.1489, 0.2441]) -Greedy action tensor([ 0.5001, -0.1238, 0.0108, -0.0191]) tensor([0.3644, 0.1953, 0.2234, 0.2168]) -Greedy action tensor([ 0.9696, -0.8421, -0.0188, -0.3312]) tensor([0.5531, 0.0904, 0.2059, 0.1506]) -Greedy action tensor([ 0.7652, -0.4832, 0.0767, -0.4338]) tensor([0.4783, 0.1373, 0.2403, 0.1442]) -Greedy action tensor([ 1.0923, -0.5824, -0.0652, -0.4544]) tensor([0.5832, 0.1093, 0.1833, 0.1242]) -Greedy action tensor([ 0.9185, -0.4488, -0.0235, -0.3767]) tensor([0.5212, 0.1328, 0.2032, 0.1427]) -Greedy action tensor([ 0.7453, -0.5322, 0.1544, -0.1970]) tensor([0.4500, 0.1254, 0.2492, 0.1754]) -Greedy action tensor([ 0.5404, -0.3185, 0.0307, -0.0379]) tensor([0.3868, 0.1639, 0.2324, 0.2170]) -Greedy action tensor([ 0.8330, -0.6965, 0.0665, -0.2741]) tensor([0.4971, 0.1077, 0.2310, 0.1643]) -Greedy action tensor([ 0.7213, -0.6677, 0.0451, -0.3333]) tensor([0.4748, 0.1184, 0.2415, 0.1654]) -Greedy action tensor([ 0.2720, -0.0622, -0.0725, -0.1545]) tensor([0.3250, 0.2326, 0.2303, 0.2121]) -Greedy action tensor([ 0.7681, -0.2451, -0.1718, -0.2277]) tensor([0.4710, 0.1710, 0.1840, 0.1740]) -Greedy action tensor([ 0.8991, -0.4899, -0.1333, -0.6835]) tensor([0.5522, 0.1377, 0.1967, 0.1134]) -Greedy action tensor([ 0.8623, -0.1043, 0.0434, -0.5603]) tensor([0.4849, 0.1844, 0.2138, 0.1169]) -Greedy action tensor([ 0.3526, 0.3041, -0.0842, -0.1025]) tensor([0.3093, 0.2947, 0.1998, 0.1962]) -Greedy action tensor([ 0.7945, -0.4249, 0.0337, -0.2752]) tensor([0.4749, 0.1403, 0.2219, 0.1629]) -Greedy action tensor([ 0.8137, -0.4102, -0.0541, -0.3363]) tensor([0.4925, 0.1448, 0.2068, 0.1559]) -Greedy action tensor([ 0.9876, -0.7682, 0.0641, -0.3639]) tensor([0.5468, 0.0945, 0.2172, 0.1415]) -Greedy action tensor([ 1.0626, -0.5963, -0.0273, -0.5902]) tensor([0.5820, 0.1108, 0.1957, 0.1115]) -Greedy action tensor([ 0.7458, -0.6949, -0.1200, -0.2937]) tensor([0.4972, 0.1177, 0.2092, 0.1758]) -Greedy action tensor([ 0.7147, -0.4895, -0.1138, -0.1714]) tensor([0.4653, 0.1396, 0.2032, 0.1918]) -Greedy action tensor([ 0.8782, -0.7422, -0.0893, -0.3165]) tensor([0.5317, 0.1052, 0.2021, 0.1610]) -Greedy action tensor([ 0.3070, 0.1010, -0.1233, -0.0984]) tensor([0.3194, 0.2599, 0.2077, 0.2129]) -Greedy action tensor([ 0.4930, -0.2072, 0.0065, -0.2616]) tensor([0.3874, 0.1923, 0.2381, 0.1821]) -Greedy action tensor([ 0.7237, -0.8336, -0.0574, -0.3726]) tensor([0.4993, 0.1052, 0.2287, 0.1668]) -Greedy action tensor([ 0.8167, -0.4401, -0.1042, -0.2335]) tensor([0.4920, 0.1400, 0.1959, 0.1721]) -Greedy action tensor([ 0.8682, -0.9717, -0.0850, -0.4003]) tensor([0.5478, 0.0870, 0.2112, 0.1541]) -Greedy action tensor([ 0.5388, -0.5014, -0.0791, 0.0037]) tensor([0.4035, 0.1426, 0.2175, 0.2363]) -Greedy action tensor([ 1.0651, -0.6836, -0.1139, -0.4769]) tensor([0.5898, 0.1026, 0.1814, 0.1262]) -Greedy action tensor([ 1.0387, -0.5756, 0.1890, -0.4986]) tensor([0.5430, 0.1081, 0.2322, 0.1167]) -Greedy action tensor([ 0.7797, -0.6185, -0.0336, -0.4952]) tensor([0.5076, 0.1254, 0.2251, 0.1419]) -Greedy action tensor([ 0.6873, -0.4014, 0.0028, -0.3374]) tensor([0.4546, 0.1530, 0.2292, 0.1631]) -Greedy action tensor([ 0.7091, -0.1968, -0.2579, 0.0228]) tensor([0.4371, 0.1767, 0.1662, 0.2201]) -Greedy action tensor([ 0.6824, -0.4277, -0.1056, -0.2082]) tensor([0.4556, 0.1501, 0.2072, 0.1870]) -Greedy action tensor([ 0.7260, -0.6056, 0.0316, -0.3393]) tensor([0.4744, 0.1253, 0.2369, 0.1635]) -Greedy action tensor([ 0.4629, -0.1136, -0.0154, -0.0360]) tensor([0.3586, 0.2015, 0.2222, 0.2177]) -Greedy action tensor([ 9.2616e-01, -4.5133e-01, 3.2824e-04, -5.5619e-01]) tensor([0.5332, 0.1345, 0.2112, 0.1211]) -Greedy action tensor([ 0.7562, -0.3877, 0.0148, -0.1648]) tensor([0.4559, 0.1453, 0.2173, 0.1815]) -Greedy action tensor([ 0.7407, -0.2870, 0.1090, -0.2314]) tensor([0.4410, 0.1578, 0.2344, 0.1668]) -Greedy action tensor([ 0.5233, 0.0185, -0.1547, 0.0433]) tensor([0.3663, 0.2211, 0.1859, 0.2267]) -Greedy action tensor([ 0.9525, -0.8974, -0.2409, -0.8545]) tensor([0.6155, 0.0968, 0.1866, 0.1010]) -Greedy action tensor([ 1.4089, -0.7490, 0.1639, -0.8198]) tensor([0.6617, 0.0765, 0.1905, 0.0712]) -Greedy action tensor([ 0.2888, -0.1891, 0.0912, -0.3237]) tensor([0.3353, 0.2079, 0.2751, 0.1817]) -Greedy action tensor([ 0.8042, -0.1993, -0.1704, -0.2425]) tensor([0.4773, 0.1750, 0.1801, 0.1676]) -Greedy action tensor([ 0.6634, -0.3927, -0.0415, -0.4865]) tensor([0.4633, 0.1611, 0.2289, 0.1467]) -Greedy action tensor([ 1.2932, -1.0280, 0.0757, -0.8402]) tensor([0.6611, 0.0649, 0.1957, 0.0783]) -Greedy action tensor([ 0.9581, -0.5443, -0.0493, -0.4311]) tensor([0.5444, 0.1212, 0.1988, 0.1357]) -Greedy action tensor([ 0.6719, -0.5330, 0.1235, -0.2570]) tensor([0.4400, 0.1319, 0.2543, 0.1738]) -Greedy action tensor([ 0.6215, -0.1011, -0.0471, 0.0102]) tensor([0.3936, 0.1911, 0.2017, 0.2136]) -Greedy action tensor([ 0.6008, 0.1057, -0.0435, 0.0847]) tensor([0.3661, 0.2232, 0.1922, 0.2185]) -Greedy action tensor([ 0.5075, -0.1390, -0.0065, 0.0228]) tensor([0.3652, 0.1913, 0.2185, 0.2250]) -Greedy action tensor([ 1.2077, -0.5183, -0.0320, -0.8935]) tensor([0.6290, 0.1120, 0.1821, 0.0769]) -Greedy action tensor([ 0.8238, -0.3502, -0.1052, -0.3860]) tensor([0.4994, 0.1544, 0.1972, 0.1489]) -Greedy action tensor([ 0.8539, -0.6388, 0.0527, -0.1675]) tensor([0.4917, 0.1105, 0.2207, 0.1771]) -Greedy action tensor([ 0.8956, -0.7427, 0.2517, -0.4176]) tensor([0.5029, 0.0977, 0.2641, 0.1353]) -Greedy action tensor([0.8875, 0.0189, 0.0151, 0.0727]) tensor([0.4385, 0.1840, 0.1833, 0.1942]) -Greedy action tensor([ 0.9844, -0.5029, -0.3322, -0.5531]) tensor([0.5852, 0.1322, 0.1568, 0.1258]) -Greedy action tensor([ 1.0990, -0.4938, -0.1272, -0.3550]) tensor([0.5779, 0.1175, 0.1696, 0.1350]) -Greedy action tensor([ 0.9160, -0.5198, -0.0242, -0.3698]) tensor([0.5250, 0.1249, 0.2050, 0.1451]) -Greedy action tensor([ 0.6579, -0.5171, 0.0343, -0.4287]) tensor([0.4583, 0.1415, 0.2456, 0.1546]) -Greedy action tensor([ 0.2068, -0.0050, 0.0150, 0.1488]) tensor([0.2795, 0.2261, 0.2307, 0.2637]) -Greedy action tensor([ 0.2374, -0.3191, -0.1127, -0.0051]) tensor([0.3265, 0.1872, 0.2301, 0.2562]) -Greedy action tensor([-1.9081, -0.4349, 0.6470, -0.1634]) tensor([0.0417, 0.1821, 0.5373, 0.2389]) -Greedy action tensor([-1.9121, -0.2877, 0.6260, -0.1630]) tensor([0.0408, 0.2073, 0.5170, 0.2349]) -Greedy action tensor([-1.8335, -0.4570, 0.6256, -0.1100]) tensor([0.0449, 0.1779, 0.5254, 0.2518]) -Greedy action tensor([-1.8812, -0.4520, 0.6427, -0.1442]) tensor([0.0429, 0.1789, 0.5347, 0.2435]) -Greedy action tensor([-1.8999, -0.3552, 0.6254, -0.1447]) tensor([0.0417, 0.1956, 0.5213, 0.2414]) -Greedy action tensor([-1.9209, -0.4334, 0.6559, -0.1652]) tensor([0.0410, 0.1816, 0.5398, 0.2375]) -Greedy action tensor([-1.7819, -0.4577, 0.5862, -0.1046]) tensor([0.0481, 0.1808, 0.5136, 0.2574]) -Greedy action tensor([-1.9020, -0.4458, 0.6494, -0.1576]) tensor([0.0420, 0.1800, 0.5380, 0.2401]) -Greedy action tensor([-1.9018, -0.4407, 0.6429, -0.1591]) tensor([0.0421, 0.1814, 0.5361, 0.2404]) -Greedy action tensor([-1.9274, -0.4027, 0.6501, -0.1769]) tensor([0.0408, 0.1874, 0.5370, 0.2349]) -Greedy action tensor([-1.5703, 0.1468, 0.3948, -0.0317]) tensor([0.0545, 0.3033, 0.3886, 0.2537]) -Greedy action tensor([-1.2806, 0.1572, 0.4353, 0.2701]) tensor([0.0646, 0.2719, 0.3591, 0.3044]) -Greedy action tensor([-1.8405, -0.4385, 0.6205, -0.1197]) tensor([0.0447, 0.1817, 0.5238, 0.2499]) -Greedy action tensor([-1.0839, 0.6579, 0.2253, -0.0830]) tensor([0.0762, 0.4346, 0.2820, 0.2072]) -Greedy action tensor([-1.9243, -0.4235, 0.6567, -0.1645]) tensor([0.0408, 0.1830, 0.5390, 0.2371]) -Greedy action tensor([-1.9211, -0.4409, 0.6585, -0.1656]) tensor([0.0410, 0.1803, 0.5413, 0.2374]) -Greedy action tensor([-1.1404, 0.8196, 0.2032, 0.4654]) tensor([0.0591, 0.4197, 0.2266, 0.2945]) -Greedy action tensor([-1.7112, -0.4557, 0.5515, -0.0995]) tensor([0.0523, 0.1835, 0.5023, 0.2620]) -Greedy action tensor([-1.6156, -0.4378, 0.4922, 0.0051]) tensor([0.0570, 0.1852, 0.4694, 0.2884]) -Greedy action tensor([-1.7720, -0.1604, 0.5337, -0.0985]) tensor([0.0468, 0.2344, 0.4694, 0.2494]) -Greedy action tensor([-1.7884, -0.1343, 0.5591, -0.1239]) tensor([0.0455, 0.2380, 0.4761, 0.2405]) -Greedy action tensor([-1.8952, -0.4502, 0.6420, -0.1610]) tensor([0.0425, 0.1801, 0.5369, 0.2405]) -Greedy action tensor([-1.3621, -0.3554, 0.3401, 0.1504]) tensor([0.0727, 0.1989, 0.3987, 0.3298]) -Greedy action tensor([-1.8227, -0.3735, 0.6054, -0.1098]) tensor([0.0452, 0.1924, 0.5120, 0.2504]) -Greedy action tensor([-1.7054, -0.4539, 0.5511, -0.0508]) tensor([0.0519, 0.1813, 0.4954, 0.2714]) -Greedy action tensor([-0.9062, 0.8631, 0.0454, 0.3188]) tensor([0.0778, 0.4562, 0.2014, 0.2647]) -Greedy action tensor([-1.5040, -0.4883, 0.5429, 0.3469]) tensor([0.0560, 0.1545, 0.4333, 0.3562]) -Greedy action tensor([-1.9144, -0.3787, 0.6471, -0.1640]) tensor([0.0411, 0.1907, 0.5319, 0.2364]) -Greedy action tensor([-1.3126, -0.2425, 0.3266, 0.0467]) tensor([0.0772, 0.2250, 0.3975, 0.3004]) -Greedy action tensor([-0.2691, -0.0462, 0.5128, 0.8526]) tensor([0.1332, 0.1665, 0.2912, 0.4091]) -Greedy action tensor([-1.6244, 0.3879, 0.3747, -0.0077]) tensor([0.0478, 0.3579, 0.3532, 0.2410]) -Greedy action tensor([-1.4783, -0.3232, 0.4802, 0.2038]) tensor([0.0601, 0.1908, 0.4260, 0.3231]) -Greedy action tensor([-1.6655, -0.4904, 0.5219, -0.0499]) tensor([0.0550, 0.1781, 0.4902, 0.2767]) -Greedy action tensor([-1.5236, 0.5416, 0.3241, 0.0348]) tensor([0.0500, 0.3947, 0.3175, 0.2378]) -Greedy action tensor([-1.8644, -0.2773, 0.6258, -0.1232]) tensor([0.0423, 0.2067, 0.5099, 0.2411]) -Greedy action tensor([-0.9881, 0.9558, 0.1561, 0.2593]) tensor([0.0685, 0.4783, 0.2150, 0.2383]) -Greedy action tensor([-1.9033, -0.4428, 0.6685, -0.1203]) tensor([0.0411, 0.1770, 0.5377, 0.2443]) -Greedy action tensor([-1.8339, -0.4123, 0.6620, -0.1019]) tensor([0.0436, 0.1807, 0.5291, 0.2465]) -Greedy action tensor([-1.9475, -0.4515, 0.6684, -0.1823]) tensor([0.0400, 0.1786, 0.5475, 0.2338]) -Greedy action tensor([-1.7243, 0.2675, 0.4783, -0.0616]) tensor([0.0442, 0.3236, 0.3995, 0.2328]) -Greedy action tensor([-1.8890, -0.4421, 0.6402, -0.1495]) tensor([0.0426, 0.1809, 0.5341, 0.2424]) -Greedy action tensor([-1.7363, -0.2799, 0.5770, -0.0284]) tensor([0.0478, 0.2051, 0.4833, 0.2638]) -Greedy action tensor([-0.8765, 0.6471, 0.1494, 0.1438]) tensor([0.0897, 0.4115, 0.2501, 0.2487]) -Greedy action tensor([-1.8200, -0.3793, 0.5949, -0.1202]) tensor([0.0457, 0.1930, 0.5112, 0.2501]) -Greedy action tensor([-1.9259, -0.4058, 0.6535, -0.1637]) tensor([0.0407, 0.1860, 0.5364, 0.2369]) -Greedy action tensor([-1.9205, -0.4357, 0.6524, -0.1696]) tensor([0.0412, 0.1818, 0.5397, 0.2372]) -Greedy action tensor([-1.5234, 0.0638, 0.4034, -0.0328]) tensor([0.0581, 0.2843, 0.3993, 0.2582]) -Greedy action tensor([-1.3798, 0.5244, 0.2523, 0.0605]) tensor([0.0586, 0.3938, 0.3000, 0.2476]) -Greedy action tensor([-1.8617, -0.4282, 0.6234, -0.1327]) tensor([0.0438, 0.1837, 0.5257, 0.2468]) -Greedy action tensor([-1.9257, -0.4587, 0.6570, -0.1729]) tensor([0.0411, 0.1782, 0.5437, 0.2371]) -Greedy action tensor([-1.8966, -0.4776, 0.7934, -0.0486]) tensor([0.0382, 0.1577, 0.5620, 0.2421]) -Greedy action tensor([-1.9043, -0.3949, 0.6490, -0.1603]) tensor([0.0415, 0.1878, 0.5333, 0.2374]) -Greedy action tensor([-1.8490, -0.3769, 0.6557, -0.1225]) tensor([0.0431, 0.1877, 0.5271, 0.2421]) -Greedy action tensor([-1.8411, -0.4525, 0.6185, -0.1323]) tensor([0.0450, 0.1803, 0.5263, 0.2484]) -Greedy action tensor([-1.6022, 0.4748, 0.3692, 0.1203]) tensor([0.0460, 0.3668, 0.3300, 0.2573]) -Greedy action tensor([ 0.1232, 1.0835, -0.0062, 0.8049]) tensor([0.1546, 0.4039, 0.1358, 0.3057]) -Greedy action tensor([-1.2429, -0.6301, 0.2847, 0.2541]) tensor([0.0839, 0.1548, 0.3865, 0.3748]) -Greedy action tensor([-1.7044, -0.0494, 0.1603, -0.4860]) tensor([0.0622, 0.3257, 0.4016, 0.2105]) -Greedy action tensor([-1.3475, -0.3681, 0.3206, 0.1500]) tensor([0.0744, 0.1982, 0.3946, 0.3327]) -Greedy action tensor([-1.2502, -0.6026, 0.3511, 0.2772]) tensor([0.0802, 0.1532, 0.3975, 0.3692]) -Greedy action tensor([-1.8905, -0.3421, 0.6332, -0.1494]) tensor([0.0419, 0.1970, 0.5223, 0.2388]) -Greedy action tensor([-1.8994, -0.3029, 0.6184, -0.1440]) tensor([0.0415, 0.2046, 0.5141, 0.2398]) -Greedy action tensor([-1.8893, -0.4024, 0.6657, -0.1171]) tensor([0.0414, 0.1829, 0.5323, 0.2434]) -Greedy action tensor([-1.9085, -0.4092, 0.6510, -0.1599]) tensor([0.0414, 0.1854, 0.5353, 0.2379]) -Greedy action tensor([-1.5955, 0.4025, 0.5187, -0.3233]) tensor([0.0494, 0.3646, 0.4095, 0.1764]) -Greedy action tensor([-1.9037, -0.4394, 0.6454, -0.1547]) tensor([0.0419, 0.1812, 0.5361, 0.2409]) -Greedy action tensor([-1.2529, -0.5913, 0.4393, 0.4056]) tensor([0.0734, 0.1423, 0.3988, 0.3855]) -Greedy action tensor([-1.8737, -0.4515, 0.6355, -0.1438]) tensor([0.0433, 0.1796, 0.5327, 0.2444]) -Greedy action tensor([-1.7482, -0.4425, 0.6333, -0.0924]) tensor([0.0482, 0.1778, 0.5215, 0.2524]) -Greedy action tensor([-1.9336, -0.4480, 0.6585, -0.1757]) tensor([0.0407, 0.1798, 0.5435, 0.2360]) -Greedy action tensor([-1.9132, -0.4399, 0.6490, -0.1662]) tensor([0.0416, 0.1813, 0.5387, 0.2384]) -Greedy action tensor([-1.3779, -0.2255, 0.4130, -0.0944]) tensor([0.0726, 0.2299, 0.4354, 0.2621]) -Greedy action tensor([-1.8586, -0.3081, 0.6401, -0.1131]) tensor([0.0424, 0.1997, 0.5153, 0.2427]) -Greedy action tensor([-1.8348, -0.3794, 0.6290, -0.0616]) tensor([0.0436, 0.1870, 0.5125, 0.2569]) -Greedy action tensor([-1.9178, -0.4466, 0.6580, -0.1648]) tensor([0.0412, 0.1794, 0.5415, 0.2378]) -Greedy action tensor([-1.8063, -0.4503, 0.6320, 0.0203]) tensor([0.0444, 0.1721, 0.5080, 0.2756]) -Greedy action tensor([-1.9137, -0.3851, 0.6463, -0.1631]) tensor([0.0411, 0.1897, 0.5322, 0.2369]) -Greedy action tensor([-1.8377, -0.3947, 0.6464, -0.0663]) tensor([0.0433, 0.1832, 0.5190, 0.2545]) -Greedy action tensor([-1.2886, -0.5373, 0.3604, 0.2048]) tensor([0.0783, 0.1659, 0.4072, 0.3486]) -Greedy action tensor([-1.5682, -0.5466, 0.4569, 0.0163]) tensor([0.0616, 0.1711, 0.4668, 0.3005]) -Greedy action tensor([-1.8034, -0.3189, 0.5822, -0.0850]) tensor([0.0458, 0.2019, 0.4972, 0.2551]) -Greedy action tensor([ 1.6917, 0.1316, -0.4886, 0.2858]) tensor([0.6376, 0.1340, 0.0721, 0.1563]) -Greedy action tensor([ 2.4689, -0.9866, -0.4558, 0.3976]) tensor([0.8256, 0.0261, 0.0443, 0.1040]) -Greedy action tensor([ 1.8210, -0.5407, -0.5664, 0.2860]) tensor([0.7135, 0.0673, 0.0655, 0.1537]) -Greedy action tensor([ 1.4455, -0.5838, -0.1260, 0.5655]) tensor([0.5701, 0.0749, 0.1184, 0.2365]) -Greedy action tensor([ 1.2010, 0.0448, -0.2728, 0.0542]) tensor([0.5372, 0.1691, 0.1231, 0.1707]) -Greedy action tensor([ 1.7019, 0.2925, -0.2376, 0.3341]) tensor([0.6088, 0.1487, 0.0875, 0.1550]) -Greedy action tensor([ 1.6821, 0.1501, -0.5433, 0.2249]) tensor([0.6423, 0.1388, 0.0694, 0.1496]) -Greedy action tensor([ 2.0844, 0.9272, -0.2050, -0.1014]) tensor([0.6544, 0.2057, 0.0663, 0.0735]) -Greedy action tensor([ 1.4797, -0.2369, -0.4126, 0.1269]) tensor([0.6293, 0.1131, 0.0949, 0.1627]) -Greedy action tensor([ 0.8078, -0.4031, 0.0535, -0.1233]) tensor([0.4625, 0.1378, 0.2175, 0.1823]) -Greedy action tensor([ 1.8376, -0.4880, -0.0755, 0.0551]) tensor([0.7074, 0.0691, 0.1044, 0.1190]) -Greedy action tensor([ 1.2203, -0.6932, -0.3173, 0.5699]) tensor([0.5307, 0.0783, 0.1140, 0.2770]) -Greedy action tensor([ 2.2845, -0.8945, -0.7389, 0.8090]) tensor([0.7582, 0.0316, 0.0369, 0.1734]) -Greedy action tensor([ 1.5810, -0.6607, -0.6302, 0.3963]) tensor([0.6572, 0.0698, 0.0720, 0.2010]) -Greedy action tensor([ 0.6681, -0.3680, -0.0579, 0.2394]) tensor([0.4016, 0.1425, 0.1943, 0.2616]) -Greedy action tensor([ 1.7894, -0.4918, -0.3881, 0.0211]) tensor([0.7215, 0.0737, 0.0818, 0.1231]) -Greedy action tensor([ 1.5596, -1.0624, -0.1891, 0.5984]) tensor([0.6138, 0.0446, 0.1068, 0.2348]) -Greedy action tensor([ 1.8834, -0.7750, -0.6552, 0.5997]) tensor([0.7012, 0.0491, 0.0554, 0.1943]) -Greedy action tensor([ 1.5626, -0.6892, -0.3569, 0.5630]) tensor([0.6173, 0.0649, 0.0905, 0.2272]) -Greedy action tensor([ 2.2983, -1.3746, -0.0770, 0.3079]) tensor([0.7968, 0.0202, 0.0741, 0.1089]) -Greedy action tensor([ 1.7410, -0.5689, -0.6261, 0.4092]) tensor([0.6863, 0.0681, 0.0643, 0.1812]) -Greedy action tensor([ 1.7388, -0.4978, -0.5316, 0.3428]) tensor([0.6860, 0.0733, 0.0708, 0.1698]) -Greedy action tensor([ 1.7596, -0.4759, -0.3387, 0.2996]) tensor([0.6841, 0.0732, 0.0839, 0.1589]) -Greedy action tensor([ 1.3414, -0.4983, -0.3947, 0.1513]) tensor([0.6100, 0.0969, 0.1075, 0.1856]) -Greedy action tensor([ 1.5749, -0.5775, -0.2562, 0.3408]) tensor([0.6379, 0.0741, 0.1022, 0.1857]) -Greedy action tensor([ 1.4658, -0.2208, -0.4515, 0.6135]) tensor([0.5686, 0.1053, 0.0836, 0.2425]) -Greedy action tensor([ 1.1728, -0.0622, -0.2251, 0.0053]) tensor([0.5408, 0.1573, 0.1336, 0.1683]) -Greedy action tensor([ 1.4805, -0.5663, -0.1684, 0.2346]) tensor([0.6215, 0.0803, 0.1195, 0.1788]) -Greedy action tensor([ 2.0789, -0.9003, -0.1072, 0.1327]) tensor([0.7657, 0.0389, 0.0860, 0.1094]) -Greedy action tensor([ 1.2614, -0.5417, -0.4734, 0.4517]) tensor([0.5598, 0.0923, 0.0988, 0.2491]) -Greedy action tensor([ 2.0360, -0.6685, -0.6908, 1.4619]) tensor([0.5898, 0.0395, 0.0386, 0.3322]) -Greedy action tensor([ 1.2854, -0.7501, -0.0226, 0.1433]) tensor([0.5814, 0.0759, 0.1572, 0.1855]) -Greedy action tensor([ 1.3113, -1.0022, -0.2326, 0.2481]) tensor([0.6032, 0.0597, 0.1288, 0.2083]) -Greedy action tensor([ 1.6634, -0.7474, -0.0888, 0.2118]) tensor([0.6679, 0.0599, 0.1158, 0.1564]) -Greedy action tensor([ 1.3920, 0.2527, -1.2218, 0.7387]) tensor([0.5226, 0.1672, 0.0383, 0.2719]) -Greedy action tensor([ 1.4616, -0.7214, -0.1202, 0.4130]) tensor([0.5993, 0.0675, 0.1232, 0.2100]) -Greedy action tensor([ 0.9408, -0.5875, -0.0684, -0.0701]) tensor([0.5141, 0.1115, 0.1874, 0.1871]) -Greedy action tensor([ 1.5720, -0.5880, 0.0120, 0.6280]) tensor([0.5833, 0.0673, 0.1226, 0.2269]) -Greedy action tensor([ 1.0336, -0.6362, -0.4706, 0.7323]) tensor([0.4650, 0.0876, 0.1033, 0.3441]) -Greedy action tensor([ 1.4061, -0.2802, -0.0351, 0.2467]) tensor([0.5762, 0.1067, 0.1364, 0.1807]) -Greedy action tensor([ 1.0757, -0.2905, -0.0790, 0.2857]) tensor([0.4941, 0.1260, 0.1557, 0.2242]) -Greedy action tensor([ 1.5076, -0.0615, -1.0638, 0.7057]) tensor([0.5770, 0.1201, 0.0441, 0.2588]) -Greedy action tensor([ 1.7634, -0.4264, -0.4897, 0.2611]) tensor([0.6946, 0.0778, 0.0730, 0.1546]) -Greedy action tensor([ 1.8985, -0.4374, -0.7515, 0.5647]) tensor([0.6989, 0.0676, 0.0494, 0.1841]) -Greedy action tensor([ 2.0610, -1.0691, -0.3492, 0.5953]) tensor([0.7329, 0.0320, 0.0658, 0.1692]) -Greedy action tensor([ 2.7947, 0.8341, 0.2179, -0.3205]) tensor([0.7929, 0.1116, 0.0603, 0.0352]) -Greedy action tensor([ 1.2255, -0.7402, -0.1473, 0.4057]) tensor([0.5453, 0.0764, 0.1382, 0.2402]) -Greedy action tensor([ 1.2295, -0.3032, -0.5808, 0.2194]) tensor([0.5735, 0.1238, 0.0938, 0.2089]) -Greedy action tensor([ 1.8435, -0.5757, -0.2980, 0.6875]) tensor([0.6574, 0.0585, 0.0772, 0.2069]) -Greedy action tensor([ 1.2933, -0.7759, -0.1973, 0.5289]) tensor([0.5503, 0.0695, 0.1240, 0.2562]) -Greedy action tensor([ 2.1505, -0.7599, -0.3864, 0.2546]) tensor([0.7790, 0.0424, 0.0616, 0.1170]) -Greedy action tensor([ 2.0508, -0.9985, -0.3865, 0.6543]) tensor([0.7235, 0.0343, 0.0632, 0.1790]) -Greedy action tensor([ 2.0782, -1.3503, -0.1274, 0.2895]) tensor([0.7635, 0.0248, 0.0841, 0.1276]) -Greedy action tensor([ 1.3629, -0.7136, -0.2796, 0.3727]) tensor([0.5916, 0.0742, 0.1145, 0.2198]) -Greedy action tensor([ 1.8383, -1.3426, -0.1870, 0.1398]) tensor([0.7372, 0.0306, 0.0973, 0.1349]) -Greedy action tensor([ 1.7806, -0.2443, -0.5632, 0.4239]) tensor([0.6732, 0.0889, 0.0646, 0.1734]) -Greedy action tensor([ 1.2323, -0.2995, -0.1313, 0.0807]) tensor([0.5593, 0.1209, 0.1430, 0.1768]) -Greedy action tensor([ 1.1057, -0.0689, -0.6517, 0.2375]) tensor([0.5260, 0.1625, 0.0907, 0.2208]) -Greedy action tensor([ 9.5991e-01, -5.6844e-01, -2.4307e-01, -2.2072e-04]) tensor([0.5263, 0.1142, 0.1580, 0.2015]) -Greedy action tensor([ 1.2008, 0.1298, -0.6185, 0.1285]) tensor([0.5414, 0.1855, 0.0878, 0.1853]) -Greedy action tensor([ 1.3566, 0.0457, -0.5178, 0.5630]) tensor([0.5333, 0.1438, 0.0818, 0.2411]) -Greedy action tensor([ 1.5087, 0.3782, -0.2339, 0.3006]) tensor([0.5566, 0.1797, 0.0974, 0.1663]) -Greedy action tensor([ 1.5000, -0.1158, -0.3438, 0.5458]) tensor([0.5740, 0.1141, 0.0908, 0.2211]) -Greedy action tensor([ 1.5687, -0.7317, -0.4350, -0.0867]) tensor([0.7012, 0.0703, 0.0946, 0.1340]) -Greedy action tensor([ 1.3039, -0.8909, -0.2746, 0.4966]) tensor([0.5670, 0.0631, 0.1170, 0.2529]) -Greedy action tensor([ 1.3981, -0.4272, -0.1530, 0.6906]) tensor([0.5359, 0.0864, 0.1136, 0.2641]) -Greedy action tensor([ 1.9053, -1.3520, 0.2263, 1.0205]) tensor([0.6106, 0.0235, 0.1139, 0.2520]) -Greedy action tensor([ 1.1817, -0.2940, -0.2894, 0.4466]) tensor([0.5161, 0.1180, 0.1185, 0.2474]) -Greedy action tensor([ 1.8084, -0.9129, -0.0292, 0.4603]) tensor([0.6735, 0.0443, 0.1072, 0.1749]) -Greedy action tensor([ 2.0073, -0.2691, -0.8670, 0.7605]) tensor([0.6913, 0.0710, 0.0390, 0.1987]) -Greedy action tensor([ 1.1388, -0.2343, -0.4183, 0.0995]) tensor([0.5501, 0.1394, 0.1159, 0.1946]) -Greedy action tensor([ 1.6260, -0.3549, 0.3015, 0.3850]) tensor([0.5907, 0.0815, 0.1571, 0.1708]) -Greedy action tensor([ 0.7245, 0.0459, -0.0233, -0.0899]) tensor([0.4126, 0.2093, 0.1953, 0.1827]) -Greedy action tensor([ 1.2611, -0.2769, -0.2541, 0.0301]) tensor([0.5792, 0.1244, 0.1273, 0.1691]) -Greedy action tensor([ 1.4347, -0.5777, -0.7671, 0.3225]) tensor([0.6357, 0.0850, 0.0703, 0.2090]) -Greedy action tensor([ 1.2525, -0.1419, -0.9422, 0.5183]) tensor([0.5437, 0.1348, 0.0606, 0.2609]) -Greedy action tensor([ 1.4752, -0.1504, -0.9947, 0.1752]) tensor([0.6435, 0.1266, 0.0544, 0.1754]) -Greedy action tensor([ 1.1117, -0.3621, -0.5142, 0.7286]) tensor([0.4745, 0.1087, 0.0933, 0.3235]) -Greedy action tensor([ 1.5588, -0.1769, -0.5071, 0.6821]) tensor([0.5817, 0.1025, 0.0737, 0.2421]) -Greedy action tensor([ 1.2300, -0.3369, -0.8506, 0.8081]) tensor([0.5027, 0.1049, 0.0628, 0.3297]) -Greedy action tensor([ 1.2313, -0.1052, -0.8564, 0.0812]) tensor([0.5871, 0.1543, 0.0728, 0.1859]) -Greedy action tensor([-0.0652, -1.0982, 0.7848, 0.7343]) tensor([0.1689, 0.0601, 0.3952, 0.3757]) -Greedy action tensor([-0.3197, -0.9815, 0.2499, -0.1303]) tensor([0.2226, 0.1148, 0.3935, 0.2690]) -Greedy action tensor([-0.5889, 0.1118, -0.1731, -0.6508]) tensor([0.1828, 0.3684, 0.2770, 0.1718]) -Greedy action tensor([ 0.3142, -1.3472, 0.4589, 0.0482]) tensor([0.3213, 0.0610, 0.3714, 0.2463]) -Greedy action tensor([ 0.2737, -1.5645, 0.4930, -0.6397]) tensor([0.3564, 0.0567, 0.4439, 0.1430]) -Greedy action tensor([-0.3061, -0.6539, -0.8102, -0.0927]) tensor([0.2818, 0.1990, 0.1702, 0.3489]) -Greedy action tensor([ 0.4120, -0.2293, 0.4987, -0.3791]) tensor([0.3257, 0.1715, 0.3552, 0.1476]) -Greedy action tensor([-0.1793, -0.8464, -0.0425, -0.4435]) tensor([0.2917, 0.1497, 0.3345, 0.2240]) -Greedy action tensor([-0.5451, -0.6526, -0.8543, 0.7317]) tensor([0.1608, 0.1444, 0.1181, 0.5766]) -Greedy action tensor([ 0.3653, -0.4385, -0.0993, -0.8249]) tensor([0.4201, 0.1881, 0.2640, 0.1278]) -Greedy action tensor([-0.1954, -1.3994, 0.1340, 0.7073]) tensor([0.1939, 0.0582, 0.2696, 0.4783]) -Greedy action tensor([-0.4368, -0.8000, -0.3515, -0.4610]) tensor([0.2659, 0.1849, 0.2896, 0.2596]) -Greedy action tensor([-0.0795, 0.0521, 0.9339, -0.6846]) tensor([0.1838, 0.2096, 0.5063, 0.1003]) -Greedy action tensor([ 0.2257, -1.1408, -0.1674, 0.1111]) tensor([0.3544, 0.0904, 0.2392, 0.3160]) -Greedy action tensor([-0.0191, -1.0187, 0.5782, -0.8886]) tensor([0.2774, 0.1021, 0.5042, 0.1163]) -Greedy action tensor([ 1.0086, -0.4001, -0.6186, 1.2589]) tensor([0.3669, 0.0897, 0.0721, 0.4713]) -Greedy action tensor([-0.4035, -0.1322, 1.0289, -0.1758]) tensor([0.1289, 0.1691, 0.5400, 0.1619]) -Greedy action tensor([ 0.2478, 0.0858, 0.7268, -0.7952]) tensor([0.2620, 0.2228, 0.4229, 0.0923]) -Greedy action tensor([-1.1787, -0.7327, -0.7476, 0.5174]) tensor([0.1047, 0.1635, 0.1611, 0.5708]) -Greedy action tensor([-0.2841, -0.2557, 0.3541, 0.0069]) tensor([0.1901, 0.1956, 0.3599, 0.2543]) -Greedy action tensor([ 0.5540, -0.9754, -0.8370, 0.1098]) tensor([0.4746, 0.1028, 0.1181, 0.3044]) -Greedy action tensor([-0.4351, -0.5049, 0.3160, -0.6762]) tensor([0.2067, 0.1928, 0.4381, 0.1624]) -Greedy action tensor([-0.2758, -0.6141, -0.3423, 0.3434]) tensor([0.2219, 0.1582, 0.2077, 0.4122]) -Greedy action tensor([ 0.5640, 0.0208, 0.7476, -0.1012]) tensor([0.3033, 0.1762, 0.3645, 0.1560]) -Greedy action tensor([0.7757, 0.3644, 0.8417, 0.4827]) tensor([0.2876, 0.1906, 0.3072, 0.2146]) -Greedy action tensor([ 0.3200, -0.1448, -0.0640, 0.2963]) tensor([0.3043, 0.1912, 0.2073, 0.2972]) -Greedy action tensor([-1.3876, -0.0910, 0.9170, -1.0396]) tensor([0.0621, 0.2272, 0.6226, 0.0880]) -Greedy action tensor([-0.5746, 0.4984, -0.3659, 0.0398]) tensor([0.1428, 0.4174, 0.1759, 0.2639]) -Greedy action tensor([ 1.2111, 0.1234, -0.0759, 0.0567]) tensor([0.5186, 0.1748, 0.1432, 0.1635]) -Greedy action tensor([-0.2947, -1.1943, -0.7460, -1.0026]) tensor([0.3943, 0.1604, 0.2511, 0.1943]) -Greedy action tensor([-0.8319, -1.5963, 0.8378, -0.0690]) tensor([0.1121, 0.0522, 0.5953, 0.2404]) -Greedy action tensor([0.3678, 0.2449, 1.2822, 0.0068]) tensor([0.1970, 0.1742, 0.4915, 0.1373]) -Greedy action tensor([-0.6703, -0.1192, -1.5963, -0.3066]) tensor([0.2188, 0.3797, 0.0867, 0.3148]) -Greedy action tensor([ 0.3572, -1.8441, 0.3089, 0.1435]) tensor([0.3483, 0.0385, 0.3319, 0.2813]) -Greedy action tensor([ 0.0200, -1.1500, -0.0326, -0.8578]) tensor([0.3738, 0.1160, 0.3547, 0.1554]) -Greedy action tensor([ 0.6823, -0.0905, 0.0322, -0.6038]) tensor([0.4425, 0.2043, 0.2310, 0.1223]) -Greedy action tensor([0.5604, 0.0623, 0.0401, 0.4999]) tensor([0.3181, 0.1933, 0.1891, 0.2995]) -Greedy action tensor([ 0.1608, -0.5639, 0.3693, 0.4197]) tensor([0.2493, 0.1208, 0.3071, 0.3229]) -Greedy action tensor([ 0.2096, 1.0968, -0.4578, -0.7791]) tensor([0.2318, 0.5630, 0.1189, 0.0863]) -Greedy action tensor([ 0.8670, -1.2085, 0.0312, 0.4900]) tensor([0.4454, 0.0559, 0.1931, 0.3055]) -Greedy action tensor([-0.8481, -0.9973, 0.5850, -0.6747]) tensor([0.1381, 0.1189, 0.5788, 0.1642]) -Greedy action tensor([ 0.0362, 0.0581, -0.9422, -0.9970]) tensor([0.3631, 0.3712, 0.1365, 0.1292]) -Greedy action tensor([ 0.0618, -0.7746, 0.6858, -0.3418]) tensor([0.2520, 0.1092, 0.4704, 0.1683]) -Greedy action tensor([-0.9194, 0.2815, -0.2590, -0.1636]) tensor([0.1192, 0.3962, 0.2308, 0.2538]) -Greedy action tensor([ 0.2540, -0.3295, -0.7521, 0.1707]) tensor([0.3517, 0.1962, 0.1286, 0.3236]) -Greedy action tensor([-0.8446, -1.1310, 0.5989, -0.0180]) tensor([0.1209, 0.0908, 0.5120, 0.2763]) -Greedy action tensor([ 0.9652, -0.2184, -0.4868, -0.1970]) tensor([0.5396, 0.1652, 0.1263, 0.1688]) -Greedy action tensor([ 0.2755, -0.2670, -0.4297, -0.2527]) tensor([0.3752, 0.2181, 0.1854, 0.2213]) -Greedy action tensor([-0.7032, -0.8247, 0.2869, -0.9080]) tensor([0.1855, 0.1642, 0.4992, 0.1511]) -Greedy action tensor([-0.6546, -0.9523, 0.3667, 0.4183]) tensor([0.1344, 0.0998, 0.3731, 0.3928]) -Greedy action tensor([-0.8531, 0.1343, 0.8928, -1.2008]) tensor([0.0988, 0.2652, 0.5662, 0.0698]) -Greedy action tensor([-0.9794, -0.5505, 0.7020, 0.0997]) tensor([0.0922, 0.1415, 0.4952, 0.2711]) -Greedy action tensor([-0.5904, -0.8605, 0.3951, -0.5246]) tensor([0.1815, 0.1385, 0.4862, 0.1938]) -Greedy action tensor([ 0.3849, -0.9950, 0.8090, 0.1061]) tensor([0.2828, 0.0711, 0.4321, 0.2140]) -Greedy action tensor([ 1.5022, -0.5704, 1.5189, 0.5927]) tensor([0.3929, 0.0494, 0.3995, 0.1582]) -Greedy action tensor([ 1.2377, -0.5255, 0.6408, 0.5753]) tensor([0.4469, 0.0766, 0.2460, 0.2304]) -Greedy action tensor([ 0.3853, -1.4823, -0.0338, -0.4172]) tensor([0.4424, 0.0683, 0.2909, 0.1983]) -Greedy action tensor([ 1.0152, -0.3620, 0.0525, 0.6025]) tensor([0.4355, 0.1099, 0.1663, 0.2883]) -Greedy action tensor([-1.1030, 0.2454, 0.3036, -0.3649]) tensor([0.0907, 0.3493, 0.3703, 0.1897]) -Greedy action tensor([-0.3913, 0.3072, 0.8265, -0.5403]) tensor([0.1379, 0.2773, 0.4660, 0.1188]) -Greedy action tensor([-0.1392, -1.1632, 0.0671, -0.2745]) tensor([0.2889, 0.1037, 0.3550, 0.2523]) -Greedy action tensor([-0.2715, -0.6173, -0.1283, 0.0654]) tensor([0.2346, 0.1660, 0.2707, 0.3286]) -Greedy action tensor([-1.0366, -0.0500, -0.2273, -0.4770]) tensor([0.1302, 0.3493, 0.2926, 0.2279]) -Greedy action tensor([-0.4405, -0.7319, -0.0860, -1.3069]) tensor([0.2783, 0.2079, 0.3967, 0.1170]) -Greedy action tensor([ 0.4736, -1.5540, -0.1273, 0.4062]) tensor([0.3824, 0.0503, 0.2097, 0.3575]) -Greedy action tensor([ 0.9678, -0.4857, -0.1147, 0.8552]) tensor([0.4055, 0.0948, 0.1374, 0.3623]) -Greedy action tensor([ 0.6736, -0.4069, -0.5004, 0.1916]) tensor([0.4413, 0.1498, 0.1364, 0.2725]) -Greedy action tensor([ 0.1478, -0.5815, -0.2855, 1.0889]) tensor([0.2131, 0.1028, 0.1382, 0.5460]) -Greedy action tensor([ 1.1200, -0.4261, 0.4640, 0.2459]) tensor([0.4653, 0.0991, 0.2414, 0.1941]) -Greedy action tensor([-1.2178, -1.4455, -0.5824, -0.0891]) tensor([0.1476, 0.1175, 0.2786, 0.4563]) -Greedy action tensor([-0.3507, -1.0001, 0.8389, -1.2687]) tensor([0.1920, 0.1003, 0.6310, 0.0767]) -Greedy action tensor([-0.6352, -1.1714, 0.3361, -0.2685]) tensor([0.1764, 0.1032, 0.4659, 0.2545]) -Greedy action tensor([-1.6651, -0.6226, 1.3377, -0.4370]) tensor([0.0365, 0.1035, 0.7353, 0.1247]) -Greedy action tensor([-0.0969, -0.3924, 0.8158, -0.7008]) tensor([0.2091, 0.1556, 0.5209, 0.1143]) -Greedy action tensor([ 0.8935, -0.3690, -0.6442, 0.0609]) tensor([0.5174, 0.1464, 0.1112, 0.2250]) -Greedy action tensor([-0.2816, -1.4924, 0.9155, -0.3001]) tensor([0.1789, 0.0533, 0.5922, 0.1756]) -Greedy action tensor([ 0.9729, -1.7147, 0.0473, 0.8801]) tensor([0.4209, 0.0286, 0.1668, 0.3836]) -Greedy action tensor([ 0.7210, -1.5673, 2.6107, 0.4461]) tensor([0.1179, 0.0120, 0.7805, 0.0896]) -Greedy action tensor([ 0.8367, -0.9119, 0.1022, -0.5174]) tensor([0.5230, 0.0910, 0.2509, 0.1350]) -Greedy action tensor([ 0.2872, -1.2463, -0.9338, -0.5180]) tensor([0.5108, 0.1102, 0.1507, 0.2283]) -Greedy action tensor([-0.4375, -0.2546, 0.0046, 0.6028]) tensor([0.1518, 0.1823, 0.2362, 0.4297]) -Greedy action tensor([ 1.1752, -0.8177, -0.3365, 0.0538]) tensor([0.5943, 0.0810, 0.1311, 0.1936]) -Greedy action tensor([ 0.5488, -0.0195, -0.1407, -0.1143]) tensor([0.3871, 0.2193, 0.1942, 0.1994]) -Greedy action tensor([ 0.9624, -0.7051, 0.0763, -0.4586]) tensor([0.5428, 0.1024, 0.2238, 0.1311]) -Greedy action tensor([ 0.7439, -0.2824, 0.0713, -0.3967]) tensor([0.4570, 0.1638, 0.2332, 0.1461]) -Greedy action tensor([ 1.1454, -0.6883, -0.0312, -0.5650]) tensor([0.6064, 0.0969, 0.1870, 0.1096]) -Greedy action tensor([ 1.0833, -0.4805, -0.2113, -0.4194]) tensor([0.5862, 0.1227, 0.1606, 0.1304]) -Greedy action tensor([ 0.5635, -0.2514, 0.0239, -0.2970]) tensor([0.4084, 0.1808, 0.2381, 0.1727]) -Greedy action tensor([0.4536, 0.0109, 0.0171, 0.0845]) tensor([0.3356, 0.2155, 0.2169, 0.2320]) -Greedy action tensor([ 0.8069, -0.4357, -0.1403, -0.2194]) tensor([0.4915, 0.1419, 0.1906, 0.1761]) -Greedy action tensor([ 0.7719, -0.3217, -0.0476, -0.2423]) tensor([0.4676, 0.1567, 0.2061, 0.1696]) -Greedy action tensor([ 0.5121, -0.5189, -0.0851, -0.1106]) tensor([0.4093, 0.1460, 0.2252, 0.2196]) -Greedy action tensor([ 0.7987, -0.1889, -0.1215, -0.1222]) tensor([0.4610, 0.1717, 0.1837, 0.1836]) -Greedy action tensor([ 1.0532, -0.6390, -0.1332, -0.4303]) tensor([0.5827, 0.1073, 0.1779, 0.1322]) -Greedy action tensor([ 0.6211, -0.2797, -0.1593, 0.0074]) tensor([0.4157, 0.1689, 0.1905, 0.2250]) -Greedy action tensor([ 0.6680, -0.3073, -0.1235, -0.4429]) tensor([0.4631, 0.1746, 0.2098, 0.1525]) -Greedy action tensor([ 0.5983, -0.3009, -0.1132, -0.0855]) tensor([0.4162, 0.1694, 0.2043, 0.2101]) -Greedy action tensor([ 0.4898, 0.1776, -0.1452, 0.0143]) tensor([0.3468, 0.2538, 0.1838, 0.2156]) -Greedy action tensor([ 0.5580, -0.5871, -0.1024, -0.0992]) tensor([0.4250, 0.1352, 0.2196, 0.2203]) -Greedy action tensor([ 0.6960, -0.4773, -0.1361, -0.2718]) tensor([0.4707, 0.1456, 0.2048, 0.1788]) -Greedy action tensor([ 0.9351, -0.8031, -0.0156, -0.5034]) tensor([0.5557, 0.0977, 0.2148, 0.1319]) -Greedy action tensor([ 0.6877, -0.1569, -0.0826, -0.0693]) tensor([0.4234, 0.1820, 0.1960, 0.1986]) -Greedy action tensor([ 0.9064, -0.4719, -0.0217, -0.5016]) tensor([0.5285, 0.1332, 0.2090, 0.1293]) -Greedy action tensor([ 0.9139, -0.8806, 0.0341, -0.4334]) tensor([0.5432, 0.0903, 0.2253, 0.1412]) -Greedy action tensor([ 0.4812, -0.3774, 0.0883, -0.2585]) tensor([0.3882, 0.1645, 0.2621, 0.1853]) -Greedy action tensor([ 0.8409, -0.7538, -0.0690, -0.5599]) tensor([0.5400, 0.1096, 0.2174, 0.1330]) -Greedy action tensor([ 0.8031, -0.4323, -0.0337, -0.3057]) tensor([0.4869, 0.1416, 0.2109, 0.1607]) -Greedy action tensor([ 0.9620, -0.5069, 0.1247, -0.3707]) tensor([0.5190, 0.1195, 0.2247, 0.1369]) -Greedy action tensor([ 0.6007, -0.3247, -0.0192, -0.0967]) tensor([0.4111, 0.1630, 0.2212, 0.2047]) -Greedy action tensor([ 0.6440, -0.3582, 0.0151, -0.1405]) tensor([0.4244, 0.1558, 0.2262, 0.1936]) -Greedy action tensor([ 0.4020, -0.5119, -0.2134, -0.1840]) tensor([0.4003, 0.1605, 0.2163, 0.2228]) -Greedy action tensor([ 0.5888, -0.4434, -0.0753, -0.1968]) tensor([0.4298, 0.1531, 0.2212, 0.1959]) -Greedy action tensor([ 1.5346, -0.7181, 0.1271, -0.5372]) tensor([0.6776, 0.0712, 0.1658, 0.0854]) -Greedy action tensor([ 0.7978, -0.4437, -0.1176, -0.2723]) tensor([0.4921, 0.1422, 0.1970, 0.1688]) -Greedy action tensor([ 0.7327, -0.6307, -0.1232, -0.6144]) tensor([0.5153, 0.1318, 0.2189, 0.1340]) -Greedy action tensor([0.6777, 0.1870, 0.1379, 0.0695]) tensor([0.3651, 0.2235, 0.2128, 0.1987]) -Greedy action tensor([ 1.1945, -0.9235, 0.0101, -0.5674]) tensor([0.6258, 0.0753, 0.1915, 0.1075]) -Greedy action tensor([ 0.8588, -0.3570, -0.0802, -0.0650]) tensor([0.4797, 0.1422, 0.1876, 0.1905]) -Greedy action tensor([ 0.3977, -0.0563, 0.0120, -0.3401]) tensor([0.3580, 0.2274, 0.2434, 0.1712]) -Greedy action tensor([ 0.7930, -0.3954, -0.0786, -0.2125]) tensor([0.4787, 0.1459, 0.2002, 0.1751]) -Greedy action tensor([ 1.1909, -0.8710, -0.0777, -0.4221]) tensor([0.6220, 0.0791, 0.1749, 0.1240]) -Greedy action tensor([ 0.6058, -0.2194, -0.2190, -0.2587]) tensor([0.4352, 0.1907, 0.1908, 0.1833]) -Greedy action tensor([ 0.5157, -0.5437, -0.2971, -0.2208]) tensor([0.4407, 0.1528, 0.1955, 0.2110]) -Greedy action tensor([ 1.0770, -0.8174, -0.0436, -0.5231]) tensor([0.5958, 0.0896, 0.1943, 0.1203]) -Greedy action tensor([ 0.6363, -0.7348, 0.0529, -0.2507]) tensor([0.4497, 0.1141, 0.2509, 0.1852]) -Greedy action tensor([ 0.8703, -0.4219, -0.1426, -0.4069]) tensor([0.5217, 0.1433, 0.1895, 0.1455]) -Greedy action tensor([ 0.6859, -0.5700, -0.0230, -0.2552]) tensor([0.4614, 0.1314, 0.2271, 0.1800]) -Greedy action tensor([ 0.4060, 0.3425, -0.2792, 0.1267]) tensor([0.3126, 0.2934, 0.1575, 0.2364]) -Greedy action tensor([ 0.3086, -0.0153, -0.0927, -0.1671]) tensor([0.3318, 0.2400, 0.2221, 0.2062]) -Greedy action tensor([ 0.9915, -0.7698, -0.0452, -0.3815]) tensor([0.5619, 0.0965, 0.1993, 0.1423]) -Greedy action tensor([ 0.8769, -0.6037, -0.0423, -0.4946]) tensor([0.5319, 0.1210, 0.2122, 0.1350]) -Greedy action tensor([ 0.8695, -0.6974, -0.0349, -0.5295]) tensor([0.5375, 0.1122, 0.2176, 0.1327]) -Greedy action tensor([ 1.0494, -0.6203, 0.1230, -0.4802]) tensor([0.5553, 0.1046, 0.2199, 0.1203]) -Greedy action tensor([ 0.6503, -0.0412, -0.0121, 0.0042]) tensor([0.3936, 0.1971, 0.2030, 0.2063]) -Greedy action tensor([ 0.9037, -0.6471, -0.0256, -0.3719]) tensor([0.5302, 0.1124, 0.2093, 0.1481]) -Greedy action tensor([ 0.6857, -0.5052, -0.0925, -0.4012]) tensor([0.4761, 0.1447, 0.2186, 0.1606]) -Greedy action tensor([ 0.4969, -0.2498, -0.1317, -0.0511]) tensor([0.3868, 0.1833, 0.2063, 0.2236]) -Greedy action tensor([ 0.9989, -0.7674, 0.1065, -0.3582]) tensor([0.5440, 0.0930, 0.2229, 0.1400]) -Greedy action tensor([ 1.0460, -0.4099, -0.2204, -0.4962]) tensor([0.5784, 0.1349, 0.1630, 0.1237]) -Greedy action tensor([ 0.7172, -0.4708, -0.1100, -0.3342]) tensor([0.4781, 0.1457, 0.2091, 0.1671]) -Greedy action tensor([ 0.7768, -0.5281, 0.0346, -0.3872]) tensor([0.4856, 0.1317, 0.2312, 0.1516]) -Greedy action tensor([ 0.9160, -0.5056, -0.0368, -0.1952]) tensor([0.5112, 0.1234, 0.1971, 0.1683]) -Greedy action tensor([ 0.9532, -0.8493, 0.0575, -0.8508]) tensor([0.5754, 0.0949, 0.2350, 0.0947]) -Greedy action tensor([ 0.5057, -0.3151, -0.1091, -0.5047]) tensor([0.4265, 0.1877, 0.2306, 0.1553]) -Greedy action tensor([ 0.4659, -0.0580, -0.0346, -0.0084]) tensor([0.3545, 0.2099, 0.2149, 0.2206]) -Greedy action tensor([ 0.9067, -0.6160, 0.0114, -0.5155]) tensor([0.5354, 0.1168, 0.2187, 0.1291]) -Greedy action tensor([ 0.7689, -0.4819, -0.0426, -0.4578]) tensor([0.4941, 0.1415, 0.2195, 0.1449]) -Greedy action tensor([ 1.0385, -0.9830, 0.0453, -0.5631]) tensor([0.5867, 0.0777, 0.2173, 0.1183]) -Greedy action tensor([ 0.4892, -0.5862, -0.1906, -0.2077]) tensor([0.4263, 0.1454, 0.2160, 0.2123]) -Greedy action tensor([ 0.7641, -0.5451, -0.0662, -0.4117]) tensor([0.4964, 0.1341, 0.2164, 0.1532]) -Greedy action tensor([ 0.8737, -0.7186, 0.1764, -0.3379]) tensor([0.5002, 0.1018, 0.2491, 0.1489]) -Greedy action tensor([ 0.2159, -0.1823, -0.0356, -0.1700]) tensor([0.3196, 0.2146, 0.2485, 0.2173]) -Greedy action tensor([ 0.5883, 0.1925, -0.0846, 0.1628]) tensor([0.3525, 0.2373, 0.1799, 0.2303]) -Greedy action tensor([ 0.8292, -0.1836, -0.0726, -0.0566]) tensor([0.4584, 0.1665, 0.1861, 0.1890]) -Greedy action tensor([0.0908, 0.1768, 0.1317, 0.3257]) tensor([0.2275, 0.2479, 0.2370, 0.2877]) -Greedy action tensor([ 0.6887, -0.5609, -0.1076, -0.4487]) tensor([0.4858, 0.1392, 0.2191, 0.1558]) -Greedy action tensor([ 0.3852, 0.0757, -0.0110, -0.3178]) tensor([0.3446, 0.2529, 0.2319, 0.1706]) -Greedy action tensor([ 0.5042, -0.4425, 0.2530, -0.3035]) tensor([0.3829, 0.1486, 0.2978, 0.1707]) -Greedy action tensor([ 0.7427, -0.7588, 0.0688, -0.2377]) tensor([0.4745, 0.1057, 0.2418, 0.1780]) -Greedy action tensor([ 0.8739, -0.2508, -0.0519, -0.0805]) tensor([0.4748, 0.1542, 0.1881, 0.1828]) -Greedy action tensor([ 0.8047, -0.3767, -0.1416, -0.2547]) tensor([0.4898, 0.1503, 0.1901, 0.1698]) -Greedy action tensor([ 1.0865, -0.4741, -0.0798, -0.1948]) tensor([0.5558, 0.1167, 0.1731, 0.1543]) -Greedy action tensor([ 0.4127, -0.1045, -0.2295, -0.6669]) tensor([0.4062, 0.2421, 0.2137, 0.1380]) -Greedy action tensor([ 0.8546, -0.4887, -0.0811, -0.4347]) tensor([0.5185, 0.1353, 0.2034, 0.1428]) -Greedy action tensor([ 1.5422, -0.3950, -0.7145, 0.0573]) tensor([0.6778, 0.0977, 0.0710, 0.1535]) -Greedy action tensor([ 1.4805, -0.5753, -0.2504, 0.3557]) tensor([0.6136, 0.0785, 0.1087, 0.1992]) -Greedy action tensor([ 1.9850, -0.6932, -0.2034, 0.5439]) tensor([0.7055, 0.0485, 0.0791, 0.1670]) -Greedy action tensor([ 1.6813, -0.6505, -0.3165, 0.8015]) tensor([0.6070, 0.0589, 0.0823, 0.2518]) -Greedy action tensor([ 1.4978, -0.1350, -0.6925, -0.3856]) tensor([0.6852, 0.1339, 0.0767, 0.1042]) -Greedy action tensor([ 2.3605, -0.4619, -0.1854, 0.7519]) tensor([0.7474, 0.0444, 0.0586, 0.1496]) -Greedy action tensor([ 1.3108, -0.4857, -0.2102, 0.4492]) tensor([0.5534, 0.0918, 0.1209, 0.2338]) -Greedy action tensor([ 0.9284, -0.0428, -0.7310, 0.4009]) tensor([0.4632, 0.1754, 0.0881, 0.2733]) -Greedy action tensor([ 2.0188, 0.2606, -0.0290, 0.1619]) tensor([0.6861, 0.1182, 0.0885, 0.1071]) -Greedy action tensor([ 1.5635, -0.1300, -0.4256, 0.2741]) tensor([0.6265, 0.1152, 0.0857, 0.1726]) -Greedy action tensor([ 1.5839, -0.7194, -0.2672, 0.3455]) tensor([0.6465, 0.0646, 0.1015, 0.1874]) -Greedy action tensor([ 1.2743, -0.2941, -0.5028, 0.3786]) tensor([0.5600, 0.1167, 0.0947, 0.2286]) -Greedy action tensor([ 1.4041, -0.5979, -0.3532, 0.4939]) tensor([0.5848, 0.0790, 0.1009, 0.2353]) -Greedy action tensor([ 1.3424, -0.7473, -0.0939, 0.2572]) tensor([0.5884, 0.0728, 0.1399, 0.1988]) -Greedy action tensor([ 1.5751, -0.7325, -0.1649, 0.5198]) tensor([0.6161, 0.0613, 0.1081, 0.2145]) -Greedy action tensor([ 1.3995, -0.5403, -0.2518, 0.1961]) tensor([0.6114, 0.0879, 0.1173, 0.1835]) -Greedy action tensor([ 1.2269, -0.5163, 0.1893, 0.0239]) tensor([0.5466, 0.0956, 0.1937, 0.1641]) -Greedy action tensor([ 2.1795, -0.6654, -0.2068, 0.5863]) tensor([0.7389, 0.0430, 0.0680, 0.1502]) -Greedy action tensor([ 3.1152, -1.1034, -0.4664, 0.7291]) tensor([0.8814, 0.0130, 0.0245, 0.0811]) -Greedy action tensor([ 1.8926, -0.8297, -0.4638, 0.2741]) tensor([0.7360, 0.0484, 0.0697, 0.1459]) -Greedy action tensor([ 1.1295, 0.2045, -0.4840, 0.3366]) tensor([0.4882, 0.1936, 0.0973, 0.2209]) -Greedy action tensor([ 1.0809, -0.6232, -0.0800, 0.3251]) tensor([0.5090, 0.0926, 0.1594, 0.2390]) -Greedy action tensor([ 1.7735, -0.6356, -0.3607, 0.1991]) tensor([0.7065, 0.0635, 0.0836, 0.1463]) -Greedy action tensor([ 1.1822, -0.2989, -0.0914, 0.0669]) tensor([0.5449, 0.1239, 0.1525, 0.1786]) -Greedy action tensor([ 2.6109, 0.8827, 0.0795, -0.1620]) tensor([0.7578, 0.1346, 0.0603, 0.0473]) -Greedy action tensor([ 1.4202, -0.4796, -0.6219, 0.3331]) tensor([0.6186, 0.0925, 0.0803, 0.2086]) -Greedy action tensor([ 1.6658, -0.9566, -0.2436, 0.3600]) tensor([0.6704, 0.0487, 0.0993, 0.1816]) -Greedy action tensor([ 1.3787, -0.2607, -0.6038, 0.3070]) tensor([0.5973, 0.1159, 0.0823, 0.2045]) -Greedy action tensor([ 1.6035, -0.7187, -0.3372, 0.4609]) tensor([0.6408, 0.0628, 0.0920, 0.2044]) -Greedy action tensor([ 1.9866, -0.5870, -0.9275, 0.7439]) tensor([0.7047, 0.0537, 0.0382, 0.2034]) -Greedy action tensor([ 1.4532, -0.4780, -0.3608, -0.2819]) tensor([0.6737, 0.0977, 0.1098, 0.1188]) -Greedy action tensor([ 1.0617, -0.1425, -0.4726, 0.1998]) tensor([0.5160, 0.1548, 0.1113, 0.2180]) -Greedy action tensor([ 1.6626, -0.4926, -0.9573, 0.3090]) tensor([0.6911, 0.0801, 0.0503, 0.1785]) -Greedy action tensor([ 2.0418, -1.1995, 0.0129, 1.0256]) tensor([0.6525, 0.0255, 0.0858, 0.2362]) -Greedy action tensor([ 1.3020, -0.5229, -0.4019, 0.2755]) tensor([0.5877, 0.0948, 0.1069, 0.2106]) -Greedy action tensor([ 1.4968, -0.7153, -0.1979, 0.0727]) tensor([0.6520, 0.0714, 0.1197, 0.1569]) -Greedy action tensor([ 1.4457, -0.5084, -0.5658, 0.4725]) tensor([0.6048, 0.0857, 0.0809, 0.2285]) -Greedy action tensor([ 1.8051, -0.5358, -0.5220, 0.3794]) tensor([0.6973, 0.0671, 0.0680, 0.1676]) -Greedy action tensor([ 1.0768, -0.3597, -0.2048, 0.2309]) tensor([0.5143, 0.1223, 0.1428, 0.2207]) -Greedy action tensor([ 1.1751, -0.3981, -0.3763, 0.1374]) tensor([0.5638, 0.1169, 0.1195, 0.1998]) -Greedy action tensor([ 1.1991, -0.6723, -0.1410, 0.0817]) tensor([0.5738, 0.0883, 0.1502, 0.1877]) -Greedy action tensor([ 1.8885, -0.5673, -0.1721, 0.4727]) tensor([0.6869, 0.0589, 0.0875, 0.1667]) -Greedy action tensor([ 1.0738, -0.4979, -0.1817, 0.4118]) tensor([0.4979, 0.1034, 0.1419, 0.2568]) -Greedy action tensor([ 1.4080, -0.4874, -1.0440, -0.7884]) tensor([0.7421, 0.1115, 0.0639, 0.0825]) -Greedy action tensor([ 1.0854, -0.2103, -0.2622, 0.2636]) tensor([0.5068, 0.1387, 0.1317, 0.2228]) -Greedy action tensor([ 1.2517, -0.5583, -0.2313, 0.5819]) tensor([0.5256, 0.0860, 0.1193, 0.2690]) -Greedy action tensor([ 1.4139, -0.4791, -0.3830, 0.5451]) tensor([0.5761, 0.0868, 0.0955, 0.2416]) -Greedy action tensor([ 3.1256, 0.0499, 0.4334, -0.0389]) tensor([0.8650, 0.0399, 0.0586, 0.0365]) -Greedy action tensor([ 1.7547, -0.5529, -0.3602, 0.1604]) tensor([0.7026, 0.0699, 0.0848, 0.1427]) -Greedy action tensor([ 2.1240, -0.7842, -0.3885, 0.1663]) tensor([0.7832, 0.0427, 0.0635, 0.1106]) -Greedy action tensor([ 2.1189, -1.0269, 0.0642, 0.4861]) tensor([0.7318, 0.0315, 0.0938, 0.1430]) -Greedy action tensor([ 1.5480, -0.5698, -0.3441, 0.3036]) tensor([0.6414, 0.0772, 0.0967, 0.1848]) -Greedy action tensor([ 1.6471, 0.1251, -0.4455, 0.5606]) tensor([0.5956, 0.1300, 0.0735, 0.2009]) -Greedy action tensor([ 1.9079, -0.2380, -0.7784, 0.7971]) tensor([0.6603, 0.0772, 0.0450, 0.2175]) -Greedy action tensor([ 1.9647, -0.6971, -0.2082, 0.7145]) tensor([0.6802, 0.0475, 0.0774, 0.1948]) -Greedy action tensor([2.1308, 0.0284, 0.0064, 0.2741]) tensor([0.7154, 0.0874, 0.0855, 0.1117]) -Greedy action tensor([ 0.5363, -0.2893, -0.1243, 0.1561]) tensor([0.3790, 0.1660, 0.1958, 0.2592]) -Greedy action tensor([ 1.5732, -0.5696, -0.3145, 0.5373]) tensor([0.6159, 0.0723, 0.0933, 0.2186]) -Greedy action tensor([1.3663, 0.1750, 0.0790, 0.4103]) tensor([0.5091, 0.1547, 0.1405, 0.1957]) -Greedy action tensor([ 1.3259, 0.1299, -0.4039, 0.5344]) tensor([0.5174, 0.1564, 0.0917, 0.2345]) -Greedy action tensor([ 1.4907, -0.3889, -0.1785, 0.2173]) tensor([0.6169, 0.0942, 0.1162, 0.1727]) -Greedy action tensor([ 1.6702, -0.5367, 0.0716, 0.6675]) tensor([0.5956, 0.0655, 0.1204, 0.2185]) -Greedy action tensor([1.0003, 0.0197, 0.0929, 0.2774]) tensor([0.4417, 0.1657, 0.1783, 0.2144]) -Greedy action tensor([ 1.3332, -1.0019, -0.0688, 0.3977]) tensor([0.5763, 0.0558, 0.1418, 0.2261]) -Greedy action tensor([ 1.6114, -0.4092, -0.4719, 0.1514]) tensor([0.6714, 0.0890, 0.0836, 0.1559]) -Greedy action tensor([ 1.2688, 0.1218, -0.3004, 0.1195]) tensor([0.5427, 0.1724, 0.1130, 0.1719]) -Greedy action tensor([ 1.1301, 0.0086, -0.3817, 0.1577]) tensor([0.5196, 0.1693, 0.1146, 0.1965]) -Greedy action tensor([ 1.3345, -0.4411, -0.7157, 0.0639]) tensor([0.6334, 0.1073, 0.0815, 0.1778]) -Greedy action tensor([ 1.8443, -1.1293, -0.1172, 0.2401]) tensor([0.7180, 0.0367, 0.1010, 0.1443]) -Greedy action tensor([ 2.4255, -1.0940, -0.6466, 0.8365]) tensor([0.7812, 0.0231, 0.0362, 0.1595]) -Greedy action tensor([ 1.7290, -0.2713, -0.1644, 0.4541]) tensor([0.6388, 0.0864, 0.0962, 0.1785]) -Greedy action tensor([ 1.1077, -0.1149, -0.9157, 0.8104]) tensor([0.4609, 0.1357, 0.0609, 0.3424]) -Greedy action tensor([ 1.6981, -0.8002, -0.6541, 0.0217]) tensor([0.7329, 0.0603, 0.0697, 0.1371]) -Greedy action tensor([ 1.5723, -0.7574, -0.4365, -0.0936]) tensor([0.7040, 0.0685, 0.0944, 0.1331]) -Greedy action tensor([ 1.2684, -0.4248, -0.0418, 0.0866]) tensor([0.5680, 0.1045, 0.1532, 0.1742]) -Greedy action tensor([ 1.2625, -0.6244, -0.5092, 0.5241]) tensor([0.5557, 0.0842, 0.0945, 0.2656]) -Greedy action tensor([ 1.1412, -0.4058, -0.5654, 0.4907]) tensor([0.5219, 0.1111, 0.0947, 0.2723]) -Greedy action tensor([ 1.0438, -0.1498, 0.0062, -0.1764]) tensor([0.5121, 0.1552, 0.1815, 0.1512]) -Greedy action tensor([ 2.0698, -0.4715, -0.5577, 0.4415]) tensor([0.7422, 0.0585, 0.0536, 0.1457]) -Greedy action tensor([ 1.0745, 0.1469, -0.6666, -0.0012]) tensor([0.5231, 0.2069, 0.0917, 0.1784]) -Greedy action tensor([ 1.6940, -0.6143, -0.5981, 0.0020]) tensor([0.7222, 0.0718, 0.0730, 0.1330]) -Greedy action tensor([ 1.5693, -0.4249, -0.2975, 0.1981]) tensor([0.6474, 0.0881, 0.1001, 0.1643]) -Greedy action tensor([-1.2007, 0.3644, 0.4163, 0.4926]) tensor([0.0615, 0.2942, 0.3099, 0.3344]) -Greedy action tensor([-0.9002, 0.0362, 0.1636, 0.0461]) tensor([0.1108, 0.2827, 0.3211, 0.2855]) -Greedy action tensor([-0.8843, -0.7469, 0.3842, 0.4130]) tensor([0.1068, 0.1225, 0.3798, 0.3909]) -Greedy action tensor([-1.8597, -0.2519, 0.6080, -0.1245]) tensor([0.0426, 0.2128, 0.5028, 0.2417]) -Greedy action tensor([-1.9492, -0.4530, 0.6674, -0.1837]) tensor([0.0400, 0.1786, 0.5476, 0.2338]) -Greedy action tensor([-1.3000, -0.6744, 0.3832, 0.0841]) tensor([0.0817, 0.1527, 0.4397, 0.3260]) -Greedy action tensor([-0.9061, -0.5498, 0.5328, -0.0083]) tensor([0.1099, 0.1570, 0.4634, 0.2697]) -Greedy action tensor([-1.8658, -0.4620, 0.6359, -0.1419]) tensor([0.0437, 0.1779, 0.5334, 0.2450]) -Greedy action tensor([-1.5870, -0.4610, 0.4891, -0.0381]) tensor([0.0597, 0.1839, 0.4757, 0.2807]) -Greedy action tensor([-1.8536, -0.4835, 0.6230, -0.1355]) tensor([0.0446, 0.1756, 0.5310, 0.2487]) -Greedy action tensor([-1.8189, -0.4351, 0.6124, -0.1051]) tensor([0.0456, 0.1821, 0.5190, 0.2533]) -Greedy action tensor([-1.1951, 0.9160, 0.2247, 0.2489]) tensor([0.0567, 0.4683, 0.2346, 0.2403]) -Greedy action tensor([-1.8901, -0.4471, 0.6420, -0.1536]) tensor([0.0426, 0.1802, 0.5355, 0.2417]) -Greedy action tensor([-1.2793, -0.6227, 0.3101, 0.2106]) tensor([0.0815, 0.1572, 0.3995, 0.3617]) -Greedy action tensor([-1.2306, -0.0752, 0.1163, -0.3422]) tensor([0.0957, 0.3038, 0.3679, 0.2326]) -Greedy action tensor([-1.4717, -0.3476, 0.3927, 0.0483]) tensor([0.0662, 0.2038, 0.4272, 0.3028]) -Greedy action tensor([-1.9031, -0.4453, 0.6705, -0.1538]) tensor([0.0414, 0.1778, 0.5428, 0.2380]) -Greedy action tensor([-1.0391, 0.9646, 0.1662, 0.3277]) tensor([0.0638, 0.4731, 0.2129, 0.2502]) -Greedy action tensor([-1.9309, -0.4441, 0.6611, -0.1726]) tensor([0.0407, 0.1799, 0.5433, 0.2361]) -Greedy action tensor([-1.9289, -0.4276, 0.6531, -0.1700]) tensor([0.0408, 0.1830, 0.5394, 0.2368]) -Greedy action tensor([-1.6017, -0.5232, 0.4803, -0.0902]) tensor([0.0606, 0.1783, 0.4863, 0.2748]) -Greedy action tensor([-1.3786, 0.3561, 0.4035, 0.1765]) tensor([0.0577, 0.3267, 0.3426, 0.2730]) -Greedy action tensor([-1.9296, -0.4358, 0.6586, -0.1708]) tensor([0.0407, 0.1813, 0.5417, 0.2363]) -Greedy action tensor([-1.9336, -0.4556, 0.6742, -0.1685]) tensor([0.0403, 0.1768, 0.5472, 0.2356]) -Greedy action tensor([-1.1597, -0.3945, 0.4212, 0.2740]) tensor([0.0820, 0.1761, 0.3982, 0.3437]) -Greedy action tensor([-0.6441, -0.1765, 0.5930, 0.7671]) tensor([0.0986, 0.1574, 0.3397, 0.4043]) -Greedy action tensor([-1.7940, -0.4643, 0.6015, -0.0114]) tensor([0.0461, 0.1742, 0.5057, 0.2740]) -Greedy action tensor([-1.9277, -0.3955, 0.6541, -0.1724]) tensor([0.0406, 0.1879, 0.5367, 0.2348]) -Greedy action tensor([-1.9050, -0.4408, 0.6472, -0.1597]) tensor([0.0419, 0.1810, 0.5373, 0.2398]) -Greedy action tensor([-1.2728, -0.5100, 0.5101, 0.4270]) tensor([0.0687, 0.1472, 0.4083, 0.3758]) -Greedy action tensor([-1.9052, -0.4457, 0.6445, -0.1625]) tensor([0.0420, 0.1807, 0.5375, 0.2398]) -Greedy action tensor([-1.4525, -0.5288, 0.4614, 0.1316]) tensor([0.0659, 0.1660, 0.4468, 0.3213]) -Greedy action tensor([-1.2196, 0.5108, 0.2291, 0.1446]) tensor([0.0675, 0.3809, 0.2874, 0.2641]) -Greedy action tensor([-1.8611, -0.4281, 0.6232, -0.1404]) tensor([0.0439, 0.1841, 0.5266, 0.2454]) -Greedy action tensor([-1.8584, -0.4225, 0.6316, -0.1037]) tensor([0.0434, 0.1824, 0.5233, 0.2509]) -Greedy action tensor([-1.9331, -0.4587, 0.6777, -0.1635]) tensor([0.0402, 0.1758, 0.5478, 0.2362]) -Greedy action tensor([-1.5551, 0.5322, 0.3727, 0.0799]) tensor([0.0475, 0.3827, 0.3263, 0.2435]) -Greedy action tensor([-1.7005, -0.5118, 0.5470, -0.0517]) tensor([0.0528, 0.1733, 0.4995, 0.2745]) -Greedy action tensor([-0.9767, -0.5513, 0.2963, -0.0292]) tensor([0.1152, 0.1763, 0.4114, 0.2971]) -Greedy action tensor([-1.5322, -0.2785, 0.5160, -0.1770]) tensor([0.0620, 0.2171, 0.4806, 0.2403]) -Greedy action tensor([-0.7728, -0.1716, 0.2217, -0.0782]) tensor([0.1328, 0.2423, 0.3590, 0.2660]) -Greedy action tensor([-1.0482, 0.1028, 0.5234, 0.5577]) tensor([0.0716, 0.2265, 0.3449, 0.3570]) -Greedy action tensor([-1.3517, -0.4463, 0.6412, 0.3686]) tensor([0.0610, 0.1508, 0.4475, 0.3407]) -Greedy action tensor([-1.9299, -0.3797, 0.6478, -0.1714]) tensor([0.0405, 0.1909, 0.5334, 0.2351]) -Greedy action tensor([-1.9037, -0.4526, 0.6511, -0.1640]) tensor([0.0420, 0.1791, 0.5400, 0.2390]) -Greedy action tensor([-1.9153, -0.4552, 0.6585, -0.1637]) tensor([0.0413, 0.1781, 0.5423, 0.2383]) -Greedy action tensor([-1.3344, 0.6394, -0.0308, -0.8586]) tensor([0.0741, 0.5336, 0.2730, 0.1193]) -Greedy action tensor([-0.9790, -0.0034, -0.4111, -0.8333]) tensor([0.1521, 0.4035, 0.2684, 0.1760]) -Greedy action tensor([-1.9294, -0.4267, 0.6521, -0.1776]) tensor([0.0409, 0.1836, 0.5400, 0.2355]) -Greedy action tensor([-1.8585, -0.3607, 0.6100, -0.1216]) tensor([0.0436, 0.1948, 0.5142, 0.2474]) -Greedy action tensor([-1.7746, -0.3799, 0.5739, -0.0991]) tensor([0.0480, 0.1935, 0.5023, 0.2562]) -Greedy action tensor([-1.7663, -0.5059, 0.5698, -0.1194]) tensor([0.0499, 0.1758, 0.5155, 0.2588]) -Greedy action tensor([-1.8796, -0.4521, 0.6408, -0.1458]) tensor([0.0430, 0.1792, 0.5345, 0.2434]) -Greedy action tensor([-1.5847, -0.5359, 0.5039, -0.0980]) tensor([0.0612, 0.1746, 0.4938, 0.2705]) -Greedy action tensor([-1.7963, -0.3901, 0.6426, -0.0487]) tensor([0.0449, 0.1831, 0.5144, 0.2576]) -Greedy action tensor([-0.6273, 0.9777, 0.0140, 0.3343]) tensor([0.0953, 0.4744, 0.1810, 0.2493]) -Greedy action tensor([-0.6980, -0.0668, 0.1900, -0.0762]) tensor([0.1394, 0.2621, 0.3388, 0.2596]) -Greedy action tensor([-1.8345, -0.4112, 0.6244, -0.1149]) tensor([0.0446, 0.1851, 0.5214, 0.2489]) -Greedy action tensor([-1.9245, -0.4366, 0.6535, -0.1727]) tensor([0.0410, 0.1817, 0.5406, 0.2366]) -Greedy action tensor([-1.7949, -0.4466, 0.5961, -0.1008]) tensor([0.0471, 0.1815, 0.5149, 0.2565]) -Greedy action tensor([-0.8480, 0.9685, 0.0755, 0.3325]) tensor([0.0774, 0.4759, 0.1948, 0.2519]) -Greedy action tensor([-1.4010, -0.6196, 0.3783, 0.1592]) tensor([0.0721, 0.1575, 0.4272, 0.3432]) -Greedy action tensor([-1.2925, -0.2006, 0.5143, 0.2109]) tensor([0.0686, 0.2046, 0.4181, 0.3087]) -Greedy action tensor([-1.7789, -0.4458, 0.5965, -0.0882]) tensor([0.0477, 0.1809, 0.5129, 0.2586]) -Greedy action tensor([-1.3025, 0.4356, 0.3007, -0.0737]) tensor([0.0663, 0.3773, 0.3297, 0.2267]) -Greedy action tensor([-1.9000, -0.3565, 0.6352, -0.1425]) tensor([0.0415, 0.1943, 0.5236, 0.2406]) -Greedy action tensor([-1.9227, -0.4422, 0.6568, -0.1694]) tensor([0.0411, 0.1804, 0.5415, 0.2370]) -Greedy action tensor([-1.9371, -0.4424, 0.6646, -0.1761]) tensor([0.0404, 0.1800, 0.5446, 0.2350]) -Greedy action tensor([-0.5395, 0.0288, -0.2032, -0.5147]) tensor([0.1927, 0.3401, 0.2697, 0.1975]) -Greedy action tensor([-1.9047, -0.4555, 0.6540, -0.1587]) tensor([0.0418, 0.1782, 0.5403, 0.2397]) -Greedy action tensor([-1.9226, -0.3902, 0.6509, -0.1690]) tensor([0.0408, 0.1888, 0.5348, 0.2356]) -Greedy action tensor([-1.9251, -0.3958, 0.6479, -0.1683]) tensor([0.0408, 0.1883, 0.5346, 0.2364]) -Greedy action tensor([-1.9399, -0.4577, 0.6701, -0.1738]) tensor([0.0402, 0.1772, 0.5473, 0.2353]) -Greedy action tensor([-1.8344, -0.3345, 0.6225, -0.1096]) tensor([0.0439, 0.1969, 0.5126, 0.2465]) -Greedy action tensor([-1.9254, -0.4622, 0.6557, -0.1710]) tensor([0.0411, 0.1777, 0.5434, 0.2377]) -Greedy action tensor([-1.8897, -0.4448, 0.6763, -0.1462]) tensor([0.0417, 0.1769, 0.5429, 0.2385]) -Greedy action tensor([-1.8198, -0.4757, 0.6480, -0.0930]) tensor([0.0449, 0.1723, 0.5301, 0.2527]) -Greedy action tensor([-1.8997, -0.3314, 0.6294, -0.1426]) tensor([0.0414, 0.1988, 0.5196, 0.2401]) -Greedy action tensor([-1.8553, -0.3244, 0.6399, -0.1167]) tensor([0.0427, 0.1972, 0.5173, 0.2428]) -Greedy action tensor([-1.9435, -0.4444, 0.6657, -0.1791]) tensor([0.0402, 0.1798, 0.5456, 0.2344]) -Greedy action tensor([-1.8984, -0.4449, 0.6768, -0.1287]) tensor([0.0412, 0.1762, 0.5409, 0.2417]) -Greedy action tensor([ 0.8498, -0.3959, -0.0068, -0.2116]) tensor([0.4858, 0.1398, 0.2063, 0.1681]) -Greedy action tensor([ 0.6685, -0.5564, 0.0056, -0.2117]) tensor([0.4497, 0.1321, 0.2317, 0.1865]) -Greedy action tensor([ 0.5359, -0.3135, -0.0021, -0.0683]) tensor([0.3909, 0.1672, 0.2283, 0.2136]) -Greedy action tensor([ 0.5657, -0.3101, -0.0467, -0.2263]) tensor([0.4147, 0.1727, 0.2248, 0.1878]) -Greedy action tensor([ 0.4866, -0.4546, -0.1211, -0.1865]) tensor([0.4090, 0.1596, 0.2227, 0.2086]) -Greedy action tensor([ 0.9359, -0.1153, -0.0302, -0.5268]) tensor([0.5098, 0.1782, 0.1940, 0.1181]) -Greedy action tensor([ 1.1584, -0.9371, 0.0775, -0.6685]) tensor([0.6161, 0.0758, 0.2090, 0.0991]) -Greedy action tensor([ 0.5540, -0.4429, -0.0389, -0.1032]) tensor([0.4098, 0.1512, 0.2265, 0.2124]) -Greedy action tensor([ 0.7014, -0.3010, 0.0023, -0.6647]) tensor([0.4719, 0.1732, 0.2345, 0.1204]) -Greedy action tensor([ 0.6050, -0.3160, 0.1290, -0.4868]) tensor([0.4246, 0.1691, 0.2638, 0.1425]) -Greedy action tensor([ 0.5318, -0.3106, -0.2871, -0.6035]) tensor([0.4560, 0.1964, 0.2011, 0.1465]) -Greedy action tensor([ 0.4148, -0.1751, 0.0760, -0.6095]) tensor([0.3808, 0.2111, 0.2714, 0.1367]) -Greedy action tensor([ 0.5816, -0.3038, -0.0646, 0.0201]) tensor([0.3989, 0.1646, 0.2090, 0.2275]) -Greedy action tensor([ 0.9014, -0.4436, 0.0849, -0.6107]) tensor([0.5200, 0.1355, 0.2298, 0.1146]) -Greedy action tensor([ 0.7317, -0.3960, 0.0898, -0.2326]) tensor([0.4482, 0.1451, 0.2359, 0.1709]) -Greedy action tensor([ 0.6431, -0.4272, -0.0801, -0.2529]) tensor([0.4471, 0.1533, 0.2170, 0.1825]) -Greedy action tensor([ 1.0360, -0.6567, -0.0147, -0.5034]) tensor([0.5720, 0.1053, 0.2000, 0.1227]) -Greedy action tensor([ 0.5227, -0.4813, -0.1008, -0.2524]) tensor([0.4232, 0.1551, 0.2268, 0.1949]) -Greedy action tensor([ 0.8128, -0.5643, -0.0583, -0.4006]) tensor([0.5081, 0.1282, 0.2126, 0.1510]) -Greedy action tensor([ 0.6340, -0.1014, -0.0594, -0.0847]) tensor([0.4054, 0.1943, 0.2026, 0.1976]) -Greedy action tensor([ 0.3761, -0.1069, -0.0334, -0.1520]) tensor([0.3483, 0.2149, 0.2313, 0.2054]) -Greedy action tensor([ 0.7426, -0.2970, -0.0635, -0.0759]) tensor([0.4462, 0.1578, 0.1993, 0.1968]) -Greedy action tensor([ 0.6394, -0.2368, -0.0566, -0.1641]) tensor([0.4232, 0.1762, 0.2110, 0.1895]) -Greedy action tensor([ 0.3940, -0.1889, -0.0734, -0.1736]) tensor([0.3634, 0.2029, 0.2277, 0.2060]) -Greedy action tensor([ 0.8076, -0.5419, 0.0023, -0.2343]) tensor([0.4856, 0.1260, 0.2171, 0.1713]) -Greedy action tensor([ 1.0722, -0.4712, -0.1142, -0.4570]) tensor([0.5761, 0.1231, 0.1759, 0.1249]) -Greedy action tensor([ 1.0452, -0.7487, 0.0850, -0.6654]) tensor([0.5781, 0.0961, 0.2213, 0.1045]) -Greedy action tensor([ 1.0825, -0.9852, -0.0278, -0.5039]) tensor([0.6022, 0.0762, 0.1984, 0.1232]) -Greedy action tensor([ 1.0872, -0.3794, 0.0683, -0.3032]) tensor([0.5433, 0.1253, 0.1961, 0.1353]) -Greedy action tensor([ 0.4534, -0.1690, -0.0994, -0.3746]) tensor([0.3923, 0.2105, 0.2257, 0.1714]) -Greedy action tensor([ 0.9392, -0.2288, -0.2885, -0.6727]) tensor([0.5545, 0.1724, 0.1625, 0.1106]) -Greedy action tensor([ 0.5170, -0.0537, -0.1686, -0.2984]) tensor([0.3982, 0.2250, 0.2006, 0.1762]) -Greedy action tensor([ 0.5428, -0.5544, 0.0236, -0.6756]) tensor([0.4495, 0.1501, 0.2675, 0.1329]) -Greedy action tensor([ 0.8873, -0.1838, -0.2328, -0.0739]) tensor([0.4875, 0.1670, 0.1590, 0.1864]) -Greedy action tensor([ 0.2931, 0.2267, -0.0836, 0.0563]) tensor([0.2932, 0.2743, 0.2012, 0.2313]) -Greedy action tensor([ 0.4757, -0.7710, -0.1259, -0.2118]) tensor([0.4277, 0.1229, 0.2343, 0.2151]) -Greedy action tensor([ 0.7258, -0.4675, -0.2081, -0.1594]) tensor([0.4742, 0.1438, 0.1864, 0.1957]) -Greedy action tensor([ 1.1950, -0.8236, -0.0359, -0.6241]) tensor([0.6301, 0.0837, 0.1840, 0.1022]) -Greedy action tensor([ 0.7525, -0.5778, 0.0305, -0.4458]) tensor([0.4874, 0.1289, 0.2367, 0.1470]) -Greedy action tensor([ 1.0994, -0.9197, 0.0408, -0.4726]) tensor([0.5927, 0.0787, 0.2056, 0.1231]) -Greedy action tensor([ 0.7792, -0.5384, -0.0218, -0.5489]) tensor([0.5046, 0.1351, 0.2265, 0.1337]) -Greedy action tensor([ 0.8452, -0.2645, -0.1472, -0.6853]) tensor([0.5217, 0.1720, 0.1934, 0.1129]) -Greedy action tensor([ 0.7473, -0.5927, -0.0635, -0.5021]) tensor([0.5017, 0.1314, 0.2230, 0.1438]) -Greedy action tensor([ 0.5574, -0.2158, 0.0199, -0.1533]) tensor([0.3942, 0.1819, 0.2303, 0.1937]) -Greedy action tensor([ 0.3984, 0.0497, -0.0052, -0.0553]) tensor([0.3324, 0.2345, 0.2220, 0.2111]) -Greedy action tensor([ 0.6640, -0.5889, -0.2856, -0.3893]) tensor([0.4947, 0.1413, 0.1914, 0.1725]) -Greedy action tensor([ 0.5226, -0.4716, 0.0214, -0.4128]) tensor([0.4223, 0.1562, 0.2558, 0.1657]) -Greedy action tensor([ 0.5460, -0.4618, 0.0622, -0.2282]) tensor([0.4094, 0.1494, 0.2524, 0.1888]) -Greedy action tensor([ 0.7167, -0.4214, -0.0588, -0.3508]) tensor([0.4706, 0.1508, 0.2167, 0.1618]) -Greedy action tensor([ 0.5627, -0.2202, -0.0753, -0.1638]) tensor([0.4050, 0.1851, 0.2140, 0.1959]) -Greedy action tensor([ 0.7072, -0.5340, -0.0398, -0.4874]) tensor([0.4841, 0.1399, 0.2294, 0.1466]) -Greedy action tensor([ 0.8892, -0.5509, -0.0940, -0.4116]) tensor([0.5310, 0.1258, 0.1986, 0.1446]) -Greedy action tensor([ 0.8121, -0.5905, -0.0239, -0.3845]) tensor([0.5046, 0.1241, 0.2187, 0.1525]) -Greedy action tensor([ 0.2379, 0.2142, -0.1714, -0.1360]) tensor([0.3004, 0.2934, 0.1995, 0.2067]) -Greedy action tensor([ 0.6562, -0.5146, -0.1461, -0.4657]) tensor([0.4798, 0.1488, 0.2151, 0.1563]) -Greedy action tensor([ 1.3472, -0.5604, -0.0022, -0.6013]) tensor([0.6450, 0.0957, 0.1673, 0.0919]) -Greedy action tensor([ 0.6093, -0.5788, -0.0767, -0.2539]) tensor([0.4484, 0.1367, 0.2258, 0.1891]) -Greedy action tensor([ 0.7800, -0.6799, 0.1492, -0.2395]) tensor([0.4705, 0.1093, 0.2504, 0.1698]) -Greedy action tensor([ 0.9740, -0.3353, -0.0747, -0.4521]) tensor([0.5374, 0.1451, 0.1883, 0.1291]) -Greedy action tensor([ 0.3820, -0.0855, 0.0227, -0.2826]) tensor([0.3522, 0.2207, 0.2459, 0.1812]) -Greedy action tensor([ 1.2278, -0.6230, -0.1103, -0.5599]) tensor([0.6302, 0.0990, 0.1653, 0.1055]) -Greedy action tensor([ 0.4052, 0.1005, -0.1246, 0.0968]) tensor([0.3267, 0.2409, 0.1924, 0.2400]) -Greedy action tensor([ 0.6899, -0.5740, 0.0042, -0.2447]) tensor([0.4589, 0.1297, 0.2312, 0.1802]) -Greedy action tensor([ 0.6766, -0.4609, -0.0842, -0.3769]) tensor([0.4680, 0.1501, 0.2187, 0.1632]) -Greedy action tensor([ 0.6130, -0.3857, -0.0571, -0.1969]) tensor([0.4301, 0.1584, 0.2201, 0.1914]) -Greedy action tensor([ 0.7776, 0.0911, -0.0564, 0.0781]) tensor([0.4108, 0.2068, 0.1784, 0.2041]) -Greedy action tensor([ 0.4251, -0.2373, -0.1482, -0.0210]) tensor([0.3677, 0.1896, 0.2073, 0.2354]) -Greedy action tensor([ 0.8510, -0.2604, -0.0429, -0.0992]) tensor([0.4706, 0.1549, 0.1925, 0.1820]) -Greedy action tensor([ 1.0606, -0.5074, 0.0619, -0.5270]) tensor([0.5614, 0.1170, 0.2068, 0.1148]) -Greedy action tensor([ 0.9965, -0.9079, 0.2428, -0.6087]) tensor([0.5493, 0.0818, 0.2585, 0.1103]) -Greedy action tensor([ 0.4819, 0.1121, -0.0403, 0.0206]) tensor([0.3431, 0.2371, 0.2035, 0.2163]) -Greedy action tensor([ 1.1069, -0.7150, -0.0691, -0.4952]) tensor([0.5982, 0.0967, 0.1845, 0.1205]) -Greedy action tensor([ 1.0420, -0.8305, -0.0452, -0.6419]) tensor([0.5965, 0.0917, 0.2011, 0.1107]) -Greedy action tensor([ 0.5052, -0.4636, 0.0521, -0.2530]) tensor([0.4026, 0.1528, 0.2559, 0.1886]) -Greedy action tensor([ 0.4289, 0.4045, -0.1811, 0.1828]) tensor([0.3029, 0.2956, 0.1646, 0.2368]) -Greedy action tensor([ 0.8705, -0.7258, 0.0605, -0.5215]) tensor([0.5274, 0.1069, 0.2346, 0.1311]) -Greedy action tensor([ 0.5671, -0.0309, 0.0591, -0.1148]) tensor([0.3763, 0.2069, 0.2264, 0.1903]) -Greedy action tensor([ 1.0179, -0.3283, -0.2711, -0.1853]) tensor([0.5446, 0.1417, 0.1501, 0.1635]) -Greedy action tensor([ 0.4747, 0.1140, -0.0623, -0.2502]) tensor([0.3615, 0.2521, 0.2113, 0.1751]) -Greedy action tensor([ 0.7092, -0.0866, -0.0251, -0.0219]) tensor([0.4145, 0.1870, 0.1989, 0.1995]) -Greedy action tensor([ 0.8098, -0.4612, -0.1020, -0.3246]) tensor([0.4990, 0.1400, 0.2005, 0.1605]) -Greedy action tensor([ 0.6816, -0.2718, 0.7672, -0.0223]) tensor([0.3368, 0.1298, 0.3668, 0.1666]) -Greedy action tensor([ 1.7481, -1.0616, 0.2705, 0.8938]) tensor([0.5834, 0.0351, 0.1331, 0.2483]) -Greedy action tensor([ 0.6045, -1.3714, -0.3469, 0.4345]) tensor([0.4222, 0.0585, 0.1631, 0.3562]) -Greedy action tensor([1.2099, 0.9331, 0.5739, 0.6610]) tensor([0.3490, 0.2646, 0.1848, 0.2016]) -Greedy action tensor([ 1.2965, -0.6095, 0.3132, -0.5590]) tensor([0.5955, 0.0885, 0.2228, 0.0931]) -Greedy action tensor([ 0.3119, -1.0614, -0.2706, 0.3698]) tensor([0.3483, 0.0882, 0.1945, 0.3690]) -Greedy action tensor([ 0.4617, -0.1439, 1.0099, -0.6241]) tensor([0.2767, 0.1510, 0.4788, 0.0934]) -Greedy action tensor([ 0.8011, 0.8771, -0.3052, 0.3549]) tensor([0.3279, 0.3538, 0.1085, 0.2099]) -Greedy action tensor([-0.7108, -0.2488, 0.6363, -0.1418]) tensor([0.1220, 0.1936, 0.4691, 0.2154]) -Greedy action tensor([-0.3323, -0.3713, -0.2225, -0.4599]) tensor([0.2526, 0.2430, 0.2820, 0.2224]) -Greedy action tensor([ 0.2878, -1.4507, 0.6326, 0.1595]) tensor([0.2884, 0.0507, 0.4072, 0.2537]) -Greedy action tensor([ 1.7490, 0.0952, 1.4196, -0.1173]) tensor([0.4842, 0.0926, 0.3483, 0.0749]) -Greedy action tensor([ 0.8418, -0.0219, -0.2806, 0.6480]) tensor([0.3890, 0.1640, 0.1266, 0.3204]) -Greedy action tensor([-0.2700, -0.1292, -0.6649, -0.4246]) tensor([0.2716, 0.3127, 0.1830, 0.2327]) -Greedy action tensor([-1.0833, 0.8655, -0.0639, -0.4562]) tensor([0.0790, 0.5543, 0.2189, 0.1478]) -Greedy action tensor([-0.4591, 0.2047, -0.3651, -1.2142]) tensor([0.2217, 0.4306, 0.2435, 0.1042]) -Greedy action tensor([ 0.0583, -2.2350, 0.2663, 0.0960]) tensor([0.2967, 0.0299, 0.3653, 0.3081]) -Greedy action tensor([ 0.1407, -1.0966, -0.4197, 1.6120]) tensor([0.1609, 0.0467, 0.0919, 0.7006]) -Greedy action tensor([ 1.1437, -0.9011, 0.8059, 0.2350]) tensor([0.4453, 0.0576, 0.3176, 0.1795]) -Greedy action tensor([-0.0174, 0.2260, -0.0065, -0.0294]) tensor([0.2340, 0.2984, 0.2365, 0.2311]) -Greedy action tensor([-0.4408, -0.3931, -0.9090, -0.5461]) tensor([0.2797, 0.2934, 0.1751, 0.2518]) -Greedy action tensor([ 0.6732, -1.5993, -0.3203, 1.0271]) tensor([0.3451, 0.0356, 0.1278, 0.4916]) -Greedy action tensor([0.8597, 0.0823, 0.8497, 0.4435]) tensor([0.3216, 0.1478, 0.3184, 0.2121]) -Greedy action tensor([ 0.5779, -0.0204, -0.0708, 0.0660]) tensor([0.3743, 0.2058, 0.1956, 0.2243]) -Greedy action tensor([ 0.0526, -0.5087, -0.1391, -0.5176]) tensor([0.3377, 0.1926, 0.2788, 0.1909]) -Greedy action tensor([ 0.3122, -0.4778, -0.8181, -0.5345]) tensor([0.4534, 0.2058, 0.1464, 0.1944]) -Greedy action tensor([ 1.3949, -0.4369, 0.8432, -0.1149]) tensor([0.5110, 0.0818, 0.2943, 0.1129]) -Greedy action tensor([ 1.1932, -0.9031, -0.2405, -0.3743]) tensor([0.6370, 0.0783, 0.1519, 0.1329]) -Greedy action tensor([ 1.2275, -0.3155, 0.3272, 0.9513]) tensor([0.4204, 0.0899, 0.1709, 0.3189]) -Greedy action tensor([ 0.4720, -0.6946, 0.8470, -0.8858]) tensor([0.3307, 0.1030, 0.4812, 0.0851]) -Greedy action tensor([ 0.6866, -0.4264, -0.5014, 0.8298]) tensor([0.3588, 0.1179, 0.1094, 0.4140]) -Greedy action tensor([ 1.3652, -0.3170, 1.3977, 0.2616]) tensor([0.3920, 0.0729, 0.4050, 0.1300]) -Greedy action tensor([ 0.4036, -1.4456, 1.4590, 1.1496]) tensor([0.1629, 0.0256, 0.4680, 0.3435]) -Greedy action tensor([ 0.2721, 0.5593, 0.1552, -0.1384]) tensor([0.2574, 0.3430, 0.2290, 0.1707]) -Greedy action tensor([ 0.6518, -0.1471, -0.4135, -0.7091]) tensor([0.4876, 0.2193, 0.1680, 0.1250]) -Greedy action tensor([ 1.1292, 0.2678, 1.7682, -0.7478]) tensor([0.2882, 0.1218, 0.5460, 0.0441]) -Greedy action tensor([-0.3265, -0.0669, -0.4275, -0.5155]) tensor([0.2482, 0.3219, 0.2244, 0.2055]) -Greedy action tensor([ 0.4657, -0.3457, -0.4282, 0.6265]) tensor([0.3303, 0.1467, 0.1351, 0.3879]) -Greedy action tensor([-0.2849, -0.6675, 0.2052, -0.1698]) tensor([0.2254, 0.1537, 0.3680, 0.2529]) -Greedy action tensor([-0.6685, -0.7742, 0.5291, -0.8467]) tensor([0.1653, 0.1487, 0.5476, 0.1383]) -Greedy action tensor([ 1.2696, -1.1142, 0.7596, 0.5747]) tensor([0.4562, 0.0421, 0.2740, 0.2277]) -Greedy action tensor([ 0.9906, -0.9461, 1.0779, -0.2090]) tensor([0.3942, 0.0568, 0.4302, 0.1188]) -Greedy action tensor([ 0.4872, -0.3465, 0.3234, -0.1586]) tensor([0.3562, 0.1547, 0.3024, 0.1867]) -Greedy action tensor([-0.5278, -1.2719, 0.8660, -1.4711]) tensor([0.1697, 0.0806, 0.6837, 0.0660]) -Greedy action tensor([ 1.0705, -0.4545, 0.0190, -0.7037]) tensor([0.5758, 0.1253, 0.2012, 0.0977]) -Greedy action tensor([-1.2209, -1.5830, -0.9913, 0.5415]) tensor([0.1139, 0.0793, 0.1433, 0.6635]) -Greedy action tensor([ 0.2942, -1.4537, 0.5679, -0.9903]) tensor([0.3616, 0.0630, 0.4754, 0.1001]) -Greedy action tensor([-0.3049, -1.3555, -0.9993, -0.1026]) tensor([0.3254, 0.1138, 0.1625, 0.3983]) -Greedy action tensor([1.5420, 0.0030, 1.6457, 0.4579]) tensor([0.3756, 0.0806, 0.4167, 0.1271]) -Greedy action tensor([ 0.5065, -0.3619, -0.1116, -0.0210]) tensor([0.3924, 0.1647, 0.2115, 0.2315]) -Greedy action tensor([-0.6109, -1.0831, 0.6720, 0.0636]) tensor([0.1390, 0.0867, 0.5014, 0.2729]) -Greedy action tensor([ 0.8096, -0.5326, -0.3921, 0.3734]) tensor([0.4528, 0.1183, 0.1361, 0.2927]) -Greedy action tensor([-1.1634, 0.3055, -0.0482, 0.1072]) tensor([0.0836, 0.3633, 0.2551, 0.2980]) -Greedy action tensor([-0.0546, -0.1748, 0.4479, -0.6316]) tensor([0.2438, 0.2162, 0.4030, 0.1369]) -Greedy action tensor([-1.3308, -0.0452, 0.2689, -0.1703]) tensor([0.0784, 0.2835, 0.3881, 0.2501]) -Greedy action tensor([-0.3304, -0.7494, 0.5054, -0.6755]) tensor([0.2140, 0.1408, 0.4937, 0.1516]) -Greedy action tensor([-0.6548, -0.3348, -0.8436, 0.1416]) tensor([0.1844, 0.2540, 0.1527, 0.4089]) -Greedy action tensor([-0.2614, 1.0553, -1.0421, -1.2185]) tensor([0.1794, 0.6695, 0.0822, 0.0689]) -Greedy action tensor([-0.3323, -0.6237, 0.6278, 0.1081]) tensor([0.1691, 0.1264, 0.4418, 0.2627]) -Greedy action tensor([ 0.1663, -0.3463, -0.5658, -0.1964]) tensor([0.3603, 0.2158, 0.1733, 0.2507]) -Greedy action tensor([ 0.0019, 0.2131, -0.1315, 0.3354]) tensor([0.2219, 0.2741, 0.1942, 0.3098]) -Greedy action tensor([-0.9402, -0.2122, -0.6274, 0.0593]) tensor([0.1398, 0.2894, 0.1911, 0.3797]) -Greedy action tensor([ 1.8389, -0.0120, 0.9518, 0.6328]) tensor([0.5352, 0.0841, 0.2204, 0.1602]) -Greedy action tensor([-0.3033, -0.4567, -0.0520, 0.1060]) tensor([0.2151, 0.1845, 0.2765, 0.3239]) -Greedy action tensor([-0.4100, -1.4349, -0.7227, -0.2357]) tensor([0.3048, 0.1094, 0.2230, 0.3628]) -Greedy action tensor([ 0.8190, -1.1595, 1.6376, 0.4352]) tensor([0.2447, 0.0338, 0.5548, 0.1667]) -Greedy action tensor([-0.2583, -0.9743, -0.7031, 0.2785]) tensor([0.2604, 0.1273, 0.1669, 0.4454]) -Greedy action tensor([ 0.6832, -0.2844, -0.8231, -0.0914]) tensor([0.4848, 0.1842, 0.1075, 0.2235]) -Greedy action tensor([ 0.6605, -0.1426, -1.2203, 0.1697]) tensor([0.4520, 0.2025, 0.0689, 0.2767]) -Greedy action tensor([-0.5965, -0.5668, 0.3329, -0.7993]) tensor([0.1859, 0.1915, 0.4709, 0.1518]) -Greedy action tensor([-0.3203, -0.1891, 0.4986, -1.0025]) tensor([0.2035, 0.2320, 0.4616, 0.1029]) -Greedy action tensor([-0.5009, -0.5304, 0.4279, -0.3253]) tensor([0.1756, 0.1705, 0.4446, 0.2093]) -Greedy action tensor([ 0.0176, -0.6264, 0.7622, 0.4350]) tensor([0.1942, 0.1020, 0.4089, 0.2948]) -Greedy action tensor([1.3814, 0.0062, 0.1539, 0.2289]) tensor([0.5372, 0.1358, 0.1574, 0.1697]) -Greedy action tensor([ 0.7737, -0.8988, -0.1805, 0.5399]) tensor([0.4229, 0.0794, 0.1629, 0.3348]) -Greedy action tensor([ 0.1579, 0.0972, 0.6982, -0.3203]) tensor([0.2338, 0.2200, 0.4013, 0.1449]) -Greedy action tensor([ 0.6234, -0.1512, -0.0471, -0.1301]) tensor([0.4093, 0.1887, 0.2093, 0.1927]) -Greedy action tensor([1.4189, 0.5185, 0.3841, 0.6839]) tensor([0.4462, 0.1813, 0.1585, 0.2139]) -Greedy action tensor([-0.0773, -1.1150, 0.0052, 0.2057]) tensor([0.2654, 0.0940, 0.2883, 0.3523]) -Greedy action tensor([-0.0958, -0.6967, -0.6359, -0.3746]) tensor([0.3463, 0.1899, 0.2018, 0.2620]) -Greedy action tensor([ 1.0049, -0.1412, 1.0249, -0.0277]) tensor([0.3712, 0.1180, 0.3787, 0.1322]) -Greedy action tensor([-1.2218, -0.8760, -0.3272, -0.0495]) tensor([0.1236, 0.1747, 0.3024, 0.3992]) -Greedy action tensor([ 1.7368, -0.6107, -0.2710, 0.3850]) tensor([0.6717, 0.0642, 0.0902, 0.1738]) -Greedy action tensor([ 1.1529, -0.5026, -0.4597, 0.2051]) tensor([0.5624, 0.1074, 0.1121, 0.2180]) -Greedy action tensor([ 1.6536, -0.8487, -0.0909, 0.0946]) tensor([0.6817, 0.0558, 0.1191, 0.1434]) -Greedy action tensor([ 1.2689, 0.1360, -0.9750, 0.4760]) tensor([0.5317, 0.1713, 0.0564, 0.2406]) -Greedy action tensor([ 1.7042, -0.8223, -0.3539, 0.5845]) tensor([0.6519, 0.0521, 0.0832, 0.2128]) -Greedy action tensor([ 2.1612, 0.5113, -0.3003, 0.2652]) tensor([0.7005, 0.1345, 0.0598, 0.1052]) -Greedy action tensor([ 1.6256, -0.8759, -0.6902, 0.4665]) tensor([0.6692, 0.0548, 0.0660, 0.2100]) -Greedy action tensor([ 1.7314, -0.4849, -0.0809, 0.0966]) tensor([0.6815, 0.0743, 0.1113, 0.1329]) -Greedy action tensor([ 1.2843, -0.5626, -0.4871, 0.5534]) tensor([0.5527, 0.0872, 0.0940, 0.2661]) -Greedy action tensor([ 1.8807, -0.0784, -0.7819, 0.1867]) tensor([0.7171, 0.1011, 0.0500, 0.1318]) -Greedy action tensor([ 1.3523, 0.1483, -0.4243, 0.1639]) tensor([0.5637, 0.1691, 0.0954, 0.1718]) -Greedy action tensor([ 2.3509, -1.0902, -0.4931, 0.9796]) tensor([0.7440, 0.0238, 0.0433, 0.1888]) -Greedy action tensor([ 1.1769, -0.2805, -0.3064, 0.3114]) tensor([0.5318, 0.1238, 0.1206, 0.2238]) -Greedy action tensor([ 1.9562, -0.8972, -0.3796, 0.6344]) tensor([0.7037, 0.0406, 0.0681, 0.1876]) -Greedy action tensor([ 1.1613, -0.5428, 0.1907, 0.1611]) tensor([0.5185, 0.0943, 0.1964, 0.1907]) -Greedy action tensor([ 1.0377, -0.2473, -0.3061, -0.0174]) tensor([0.5303, 0.1467, 0.1383, 0.1846]) -Greedy action tensor([ 0.9035, 0.1380, -0.1278, 0.3091]) tensor([0.4213, 0.1960, 0.1502, 0.2325]) -Greedy action tensor([ 1.6853, -0.9169, -0.0379, 0.2680]) tensor([0.6689, 0.0496, 0.1194, 0.1621]) -Greedy action tensor([ 1.5104, -0.3805, -0.1207, 0.5093]) tensor([0.5834, 0.0881, 0.1142, 0.2144]) -Greedy action tensor([ 2.2490, -1.0788, -0.3217, 0.4857]) tensor([0.7789, 0.0279, 0.0596, 0.1336]) -Greedy action tensor([ 1.7654, -0.0492, -0.8694, -0.4385]) tensor([0.7435, 0.1211, 0.0533, 0.0821]) -Greedy action tensor([ 2.3220, -1.3002, -0.3891, 0.7177]) tensor([0.7727, 0.0206, 0.0514, 0.1553]) -Greedy action tensor([ 1.1167, -0.5773, 0.2917, 0.0206]) tensor([0.5112, 0.0940, 0.2240, 0.1708]) -Greedy action tensor([ 2.3143, -0.5242, 0.0053, 0.3703]) tensor([0.7686, 0.0450, 0.0764, 0.1100]) -Greedy action tensor([ 1.6327, -0.4304, -0.4775, 0.5243]) tensor([0.6336, 0.0805, 0.0768, 0.2091]) -Greedy action tensor([ 1.3187, -0.3905, -0.4365, 0.3990]) tensor([0.5706, 0.1033, 0.0986, 0.2275]) -Greedy action tensor([ 1.6323, -0.3811, -0.2062, 0.1362]) tensor([0.6594, 0.0880, 0.1049, 0.1477]) -Greedy action tensor([ 1.5218, -0.9864, -0.1930, 0.6193]) tensor([0.5999, 0.0488, 0.1080, 0.2433]) -Greedy action tensor([ 1.3559, -0.4478, -0.4534, 0.0741]) tensor([0.6227, 0.1025, 0.1020, 0.1728]) -Greedy action tensor([ 1.1110, -0.1941, -0.6477, 0.3629]) tensor([0.5217, 0.1415, 0.0899, 0.2469]) -Greedy action tensor([ 2.0242, 0.3346, -0.8138, 0.4285]) tensor([0.6916, 0.1277, 0.0405, 0.1402]) -Greedy action tensor([ 1.3718, -0.1945, -0.5248, 0.1848]) tensor([0.6010, 0.1255, 0.0902, 0.1834]) -Greedy action tensor([ 2.2955, -0.4864, -0.0298, 0.3933]) tensor([0.7640, 0.0473, 0.0747, 0.1140]) -Greedy action tensor([ 2.1410, -1.4930, -0.4562, 0.5814]) tensor([0.7627, 0.0201, 0.0568, 0.1603]) -Greedy action tensor([ 2.3721, -1.3142, -0.3368, 0.8975]) tensor([0.7573, 0.0190, 0.0504, 0.1733]) -Greedy action tensor([ 1.5897, -0.3255, -0.6158, 0.7217]) tensor([0.5962, 0.0878, 0.0657, 0.2503]) -Greedy action tensor([ 1.4765, -0.0493, -0.4759, 0.2357]) tensor([0.6066, 0.1319, 0.0861, 0.1754]) -Greedy action tensor([ 1.4078, -0.7339, -0.2868, 0.2905]) tensor([0.6141, 0.0721, 0.1128, 0.2009]) -Greedy action tensor([ 1.8964, -0.5446, -0.2733, 0.4652]) tensor([0.6943, 0.0605, 0.0793, 0.1660]) -Greedy action tensor([ 1.6011, -0.6461, -0.2936, 0.8745]) tensor([0.5748, 0.0608, 0.0864, 0.2780]) -Greedy action tensor([ 1.5105, -0.3217, -0.4235, 0.5586]) tensor([0.5915, 0.0947, 0.0855, 0.2283]) -Greedy action tensor([ 1.0012, -0.4829, -0.0307, 0.3723]) tensor([0.4726, 0.1071, 0.1684, 0.2519]) -Greedy action tensor([ 1.4310, -0.3856, -0.4485, -0.0046]) tensor([0.6438, 0.1047, 0.0983, 0.1532]) -Greedy action tensor([ 1.8847, -0.6658, -0.3342, 0.4807]) tensor([0.6981, 0.0545, 0.0759, 0.1715]) -Greedy action tensor([ 1.5204, -0.0772, -0.3012, 0.3278]) tensor([0.5997, 0.1214, 0.0970, 0.1820]) -Greedy action tensor([ 1.6126, -0.6190, -0.5131, 0.3877]) tensor([0.6577, 0.0706, 0.0785, 0.1932]) -Greedy action tensor([ 1.8951, -1.0735, -0.1091, 0.5556]) tensor([0.6906, 0.0355, 0.0931, 0.1809]) -Greedy action tensor([ 1.2407, -0.7379, 0.2016, 0.0394]) tensor([0.5578, 0.0771, 0.1973, 0.1678]) -Greedy action tensor([ 1.5600, -0.8494, 0.0098, 0.1562]) tensor([0.6461, 0.0581, 0.1371, 0.1587]) -Greedy action tensor([ 2.2021, -1.3114, 0.1873, 0.9406]) tensor([0.6914, 0.0206, 0.0922, 0.1958]) -Greedy action tensor([ 1.4407, -0.2191, -0.4196, 0.3502]) tensor([0.5946, 0.1131, 0.0925, 0.1998]) -Greedy action tensor([ 1.8538, -0.8220, -0.3336, 0.4053]) tensor([0.7062, 0.0486, 0.0792, 0.1659]) -Greedy action tensor([ 1.2559, -0.4798, -1.1017, 0.4722]) tensor([0.5788, 0.1020, 0.0548, 0.2643]) -Greedy action tensor([ 1.7290, -0.1436, -0.2827, 0.0884]) tensor([0.6751, 0.1038, 0.0903, 0.1309]) -Greedy action tensor([ 1.1688, -0.4373, 0.0268, 0.1597]) tensor([0.5307, 0.1065, 0.1694, 0.1935]) -Greedy action tensor([ 1.2889, -0.3733, -0.4090, 0.5206]) tensor([0.5445, 0.1033, 0.0997, 0.2525]) -Greedy action tensor([ 1.2486, -0.1941, -0.1824, 0.0613]) tensor([0.5617, 0.1327, 0.1343, 0.1713]) -Greedy action tensor([ 1.6889, -0.4062, -0.1117, 0.3295]) tensor([0.6472, 0.0796, 0.1069, 0.1662]) -Greedy action tensor([ 1.4765, -0.6639, -0.6157, 0.3495]) tensor([0.6390, 0.0751, 0.0789, 0.2070]) -Greedy action tensor([ 1.6331, -0.1109, -0.4823, 0.1026]) tensor([0.6614, 0.1156, 0.0798, 0.1432]) -Greedy action tensor([ 1.6681, -0.6683, -0.2519, 0.1119]) tensor([0.6877, 0.0665, 0.1008, 0.1450]) -Greedy action tensor([ 1.4299, -0.4680, -0.5806, 0.3416]) tensor([0.6171, 0.0925, 0.0826, 0.2078]) -Greedy action tensor([ 1.4387, -0.3346, -0.6998, -0.0417]) tensor([0.6600, 0.1121, 0.0778, 0.1502]) -Greedy action tensor([ 1.9742, -0.6360, -0.1008, 0.6139]) tensor([0.6870, 0.0505, 0.0863, 0.1763]) -Greedy action tensor([ 0.9011, -0.1276, -0.1034, -0.1109]) tensor([0.4791, 0.1713, 0.1755, 0.1741]) -Greedy action tensor([ 1.5940, -0.1582, -0.2304, 0.4235]) tensor([0.6079, 0.1054, 0.0981, 0.1886]) -Greedy action tensor([ 1.0523, 0.1225, -0.5791, 0.1042]) tensor([0.5056, 0.1995, 0.0989, 0.1959]) -Greedy action tensor([ 1.5925, -0.3086, -1.0833, 0.6007]) tensor([0.6293, 0.0940, 0.0433, 0.2334]) -Greedy action tensor([ 0.6595, -0.3642, -0.0642, 0.0121]) tensor([0.4224, 0.1517, 0.2048, 0.2211]) -Greedy action tensor([ 1.8064, -0.2810, -0.5656, 0.3420]) tensor([0.6904, 0.0856, 0.0644, 0.1596]) -Greedy action tensor([ 1.3643, -0.7081, -0.2270, 0.0781]) tensor([0.6227, 0.0784, 0.1268, 0.1721]) -Greedy action tensor([ 1.8886, -0.4630, -0.3459, -0.0062]) tensor([0.7393, 0.0704, 0.0791, 0.1112]) -Greedy action tensor([ 0.9984, -0.4648, -0.0327, 0.4104]) tensor([0.4665, 0.1080, 0.1664, 0.2591]) -Greedy action tensor([ 1.6100, -0.8624, -0.0293, 0.1927]) tensor([0.6575, 0.0555, 0.1276, 0.1594]) -Greedy action tensor([ 1.1762, -0.4296, -0.0456, 0.3601]) tensor([0.5161, 0.1036, 0.1521, 0.2282]) -Greedy action tensor([ 1.4887, -0.5543, -0.5056, 0.3671]) tensor([0.6283, 0.0815, 0.0855, 0.2047]) -Greedy action tensor([ 1.2893, -0.6004, -0.1008, 0.0552]) tensor([0.5913, 0.0893, 0.1473, 0.1721]) -Greedy action tensor([ 1.8644, -0.4307, -0.0866, 0.2900]) tensor([0.6897, 0.0695, 0.0980, 0.1428]) -Greedy action tensor([ 1.6244, -0.3295, -0.7641, 0.4897]) tensor([0.6431, 0.0911, 0.0590, 0.2068]) -Greedy action tensor([ 2.0371, -0.5403, 0.0759, 0.5946]) tensor([0.6882, 0.0523, 0.0968, 0.1627]) -Greedy action tensor([ 1.7642, -0.6405, -0.2666, 0.5857]) tensor([0.6539, 0.0590, 0.0858, 0.2012]) -Greedy action tensor([-1.7271, -0.3792, 0.6673, -0.0084]) tensor([0.0468, 0.1800, 0.5125, 0.2608]) -Greedy action tensor([-0.9270, 0.7488, 0.1009, 0.0125]) tensor([0.0855, 0.4568, 0.2390, 0.2187]) -Greedy action tensor([-1.5426, 0.4242, 0.3523, 0.0067]) tensor([0.0513, 0.3664, 0.3410, 0.2413]) -Greedy action tensor([-0.0172, -0.2807, 0.9318, 1.6684]) tensor([0.1026, 0.0788, 0.2650, 0.5536]) -Greedy action tensor([-1.9117, -0.4439, 0.6513, -0.1632]) tensor([0.0416, 0.1804, 0.5392, 0.2388]) -Greedy action tensor([-1.8280, -0.4488, 0.5958, -0.1542]) tensor([0.0463, 0.1839, 0.5228, 0.2470]) -Greedy action tensor([0.0389, 0.0175, 0.9217, 1.5420]) tensor([0.1125, 0.1101, 0.2719, 0.5056]) -Greedy action tensor([-1.7924, -0.5074, 0.5879, -0.1383]) tensor([0.0484, 0.1750, 0.5234, 0.2532]) -Greedy action tensor([-1.8276, -0.4445, 0.6666, -0.0692]) tensor([0.0437, 0.1741, 0.5289, 0.2534]) -Greedy action tensor([-1.9444, -0.4511, 0.6696, -0.1801]) tensor([0.0401, 0.1785, 0.5474, 0.2340]) -Greedy action tensor([-1.8926, -0.4239, 0.6358, -0.1482]) tensor([0.0424, 0.1841, 0.5311, 0.2425]) -Greedy action tensor([-1.3347, -0.4582, 0.3500, 0.1691]) tensor([0.0752, 0.1808, 0.4056, 0.3385]) -Greedy action tensor([-1.8500, -0.3809, 0.6098, -0.1498]) tensor([0.0444, 0.1929, 0.5196, 0.2431]) -Greedy action tensor([-1.6476, -0.4652, 0.5455, 0.0823]) tensor([0.0530, 0.1729, 0.4751, 0.2990]) -Greedy action tensor([-1.2474, 0.7296, 0.3410, -0.2355]) tensor([0.0630, 0.4551, 0.3085, 0.1734]) -Greedy action tensor([-1.6387, 0.2793, 0.4091, -0.0481]) tensor([0.0489, 0.3326, 0.3787, 0.2398]) -Greedy action tensor([-1.9137, -0.4466, 0.6534, -0.1655]) tensor([0.0415, 0.1799, 0.5404, 0.2383]) -Greedy action tensor([-1.9283, -0.4450, 0.6598, -0.1715]) tensor([0.0408, 0.1799, 0.5429, 0.2364]) -Greedy action tensor([-0.7568, 0.4439, 0.0151, -0.0375]) tensor([0.1171, 0.3891, 0.2534, 0.2404]) -Greedy action tensor([-1.9231, -0.4171, 0.6545, -0.1663]) tensor([0.0409, 0.1843, 0.5381, 0.2368]) -Greedy action tensor([-1.9540, -0.0450, -0.2535, -0.3609]) tensor([0.0551, 0.3719, 0.3019, 0.2711]) -Greedy action tensor([-1.8722, -0.4130, 0.6403, -0.1385]) tensor([0.0429, 0.1847, 0.5294, 0.2430]) -Greedy action tensor([-0.6754, -0.6169, 0.1082, 0.3347]) tensor([0.1429, 0.1516, 0.3130, 0.3925]) -Greedy action tensor([-1.7142, -0.3249, 0.6467, -0.0614]) tensor([0.0480, 0.1926, 0.5088, 0.2506]) -Greedy action tensor([-1.7628, 0.0167, 0.5019, -0.0473]) tensor([0.0452, 0.2680, 0.4354, 0.2514]) -Greedy action tensor([-1.9329, -0.4343, 0.6654, -0.1647]) tensor([0.0404, 0.1806, 0.5425, 0.2365]) -Greedy action tensor([-1.9391, -0.3954, 0.6541, -0.1777]) tensor([0.0402, 0.1882, 0.5376, 0.2340]) -Greedy action tensor([-1.9213, -0.4489, 0.6727, -0.1600]) tensor([0.0407, 0.1775, 0.5448, 0.2369]) -Greedy action tensor([-1.0282, -0.2389, 0.1278, 0.1704]) tensor([0.1032, 0.2271, 0.3277, 0.3420]) -Greedy action tensor([-1.4458, 0.2477, 0.3919, 0.0476]) tensor([0.0582, 0.3167, 0.3658, 0.2593]) -Greedy action tensor([-1.8477, -0.4470, 0.6187, -0.1299]) tensor([0.0446, 0.1811, 0.5257, 0.2486]) -Greedy action tensor([-1.9216, -0.3914, 0.6430, -0.1673]) tensor([0.0410, 0.1893, 0.5328, 0.2369]) -Greedy action tensor([-1.9303, -0.4285, 0.6598, -0.1693]) tensor([0.0406, 0.1822, 0.5411, 0.2361]) -Greedy action tensor([-1.7706, -0.4660, 0.5774, -0.1826]) tensor([0.0499, 0.1839, 0.5221, 0.2441]) -Greedy action tensor([-1.8243, -0.3355, 0.5902, -0.1333]) tensor([0.0454, 0.2011, 0.5074, 0.2461]) -Greedy action tensor([-1.8242, -0.1242, 0.5690, -0.1552]) tensor([0.0440, 0.2408, 0.4817, 0.2335]) -Greedy action tensor([-1.7870, -0.4114, 0.6909, 0.0065]) tensor([0.0437, 0.1729, 0.5207, 0.2627]) -Greedy action tensor([-1.7486, -0.4213, 0.5770, -0.0747]) tensor([0.0492, 0.1854, 0.5032, 0.2622]) -Greedy action tensor([-1.8948, -0.4642, 0.6418, -0.1601]) tensor([0.0426, 0.1780, 0.5381, 0.2413]) -Greedy action tensor([-1.8598, -0.3860, 0.6337, -0.1241]) tensor([0.0432, 0.1887, 0.5230, 0.2451]) -Greedy action tensor([-1.9340, -0.4270, 0.6598, -0.1736]) tensor([0.0405, 0.1827, 0.5415, 0.2353]) -Greedy action tensor([-1.2879, -0.1211, 0.5452, -0.6025]) tensor([0.0803, 0.2580, 0.5023, 0.1594]) -Greedy action tensor([-1.8993, -0.3926, 0.6414, -0.1495]) tensor([0.0417, 0.1883, 0.5297, 0.2402]) -Greedy action tensor([-0.1793, 1.0173, -0.0158, 0.3332]) tensor([0.1397, 0.4624, 0.1646, 0.2333]) -Greedy action tensor([-1.9033, -0.4139, 0.6415, -0.1643]) tensor([0.0419, 0.1858, 0.5338, 0.2385]) -Greedy action tensor([-1.4955, -0.4562, 0.4650, -0.0859]) tensor([0.0666, 0.1882, 0.4727, 0.2725]) -Greedy action tensor([-1.5461, -0.5145, 0.4834, 0.0347]) tensor([0.0614, 0.1724, 0.4676, 0.2986]) -Greedy action tensor([-1.9186, -0.4120, 0.6511, -0.1656]) tensor([0.0411, 0.1853, 0.5365, 0.2371]) -Greedy action tensor([-1.0585, -0.4095, 0.2658, 0.3428]) tensor([0.0932, 0.1783, 0.3502, 0.3783]) -Greedy action tensor([-1.9136, -0.3784, 0.6443, -0.1613]) tensor([0.0411, 0.1909, 0.5308, 0.2372]) -Greedy action tensor([-1.5004, -0.5707, 0.4226, 0.1062]) tensor([0.0651, 0.1649, 0.4454, 0.3246]) -Greedy action tensor([-1.9327, -0.3930, 0.6506, -0.1729]) tensor([0.0405, 0.1887, 0.5357, 0.2351]) -Greedy action tensor([-0.9738, -0.2026, 0.4855, 1.0549]) tensor([0.0664, 0.1435, 0.2855, 0.5046]) -Greedy action tensor([-1.8404, -0.4472, 0.6148, -0.1272]) tensor([0.0450, 0.1812, 0.5242, 0.2496]) -Greedy action tensor([-1.8534, -0.4284, 0.6744, -0.1031]) tensor([0.0427, 0.1774, 0.5344, 0.2456]) -Greedy action tensor([-1.8787, -0.4382, 0.6351, -0.1538]) tensor([0.0431, 0.1821, 0.5327, 0.2420]) -Greedy action tensor([-0.2344, 1.1054, 0.0235, 0.2988]) tensor([0.1279, 0.4885, 0.1656, 0.2180]) -Greedy action tensor([-1.0009, -0.4975, 0.7414, 0.9002]) tensor([0.0664, 0.1099, 0.3792, 0.4445]) -Greedy action tensor([-1.9400, -0.4513, 0.6655, -0.1769]) tensor([0.0403, 0.1787, 0.5459, 0.2351]) -Greedy action tensor([-1.8899, -0.2951, 0.6193, -0.1338]) tensor([0.0416, 0.2052, 0.5120, 0.2411]) -Greedy action tensor([-1.8732, -0.4357, 0.6283, -0.1495]) tensor([0.0434, 0.1829, 0.5301, 0.2435]) -Greedy action tensor([0.2406, 0.8389, 0.2606, 1.0279]) tensor([0.1657, 0.3013, 0.1690, 0.3640]) -Greedy action tensor([0.1972, 1.2189, 0.0352, 0.2970]) tensor([0.1744, 0.4845, 0.1483, 0.1927]) -Greedy action tensor([-1.3769, -0.1721, 0.3517, 0.0102]) tensor([0.0716, 0.2388, 0.4031, 0.2865]) -Greedy action tensor([-1.7768, 0.0271, 0.6132, -0.3300]) tensor([0.0450, 0.2731, 0.4908, 0.1911]) -Greedy action tensor([-1.8978, -0.3886, 0.6435, -0.1522]) tensor([0.0418, 0.1889, 0.5301, 0.2392]) -Greedy action tensor([-1.9232, -0.4413, 0.6588, -0.1680]) tensor([0.0410, 0.1803, 0.5417, 0.2370]) -Greedy action tensor([-1.7861, -0.4486, 0.5854, -0.1319]) tensor([0.0482, 0.1836, 0.5163, 0.2520]) -Greedy action tensor([-1.4449, 0.6472, 0.2921, 0.2848]) tensor([0.0490, 0.3967, 0.2782, 0.2761]) -Greedy action tensor([-1.9361, -0.4460, 0.6639, -0.1754]) tensor([0.0405, 0.1795, 0.5447, 0.2353]) -Greedy action tensor([-1.9215, -0.4530, 0.6595, -0.1681]) tensor([0.0411, 0.1785, 0.5430, 0.2374]) -Greedy action tensor([-1.9343, -0.4540, 0.6632, -0.1737]) tensor([0.0406, 0.1783, 0.5451, 0.2360]) -Greedy action tensor([-1.6268, -0.4402, 0.7789, 0.3225]) tensor([0.0447, 0.1463, 0.4952, 0.3137]) -Greedy action tensor([-1.8463, -0.3357, 0.6055, -0.1313]) tensor([0.0441, 0.1996, 0.5115, 0.2448]) -Greedy action tensor([-1.7701, -0.2616, 0.6349, -0.0628]) tensor([0.0452, 0.2044, 0.5010, 0.2494]) -Greedy action tensor([-1.7756, -0.4439, 0.6027, -0.2082]) tensor([0.0491, 0.1859, 0.5296, 0.2354]) -Greedy action tensor([-1.5589, -0.4450, 0.4880, -0.0964]) tensor([0.0621, 0.1891, 0.4808, 0.2680]) -Greedy action tensor([-1.9094, -0.4512, 0.6531, -0.1626]) tensor([0.0417, 0.1791, 0.5403, 0.2390]) -Greedy action tensor([-1.7441, -0.4158, 0.5898, -0.0538]) tensor([0.0487, 0.1840, 0.5030, 0.2643]) -Greedy action tensor([-1.3321, -0.3105, 0.3173, 0.1424]) tensor([0.0749, 0.2080, 0.3898, 0.3272]) -Greedy action tensor([-1.9226, -0.4183, 0.6546, -0.1668]) tensor([0.0409, 0.1841, 0.5383, 0.2367]) -Greedy action tensor([-1.3922, -0.4173, 0.4502, 0.2383]) tensor([0.0664, 0.1759, 0.4189, 0.3389]) -Greedy action tensor([ 0.9103, -0.6004, -0.0787, -0.2528]) tensor([0.5249, 0.1159, 0.1952, 0.1640]) -Greedy action tensor([ 0.3757, 0.3022, -0.1153, 0.1437]) tensor([0.2999, 0.2787, 0.1836, 0.2378]) -Greedy action tensor([ 0.7353, -0.8917, 0.0725, -0.3465]) tensor([0.4876, 0.0958, 0.2513, 0.1653]) -Greedy action tensor([ 0.7030, -0.0042, -0.0835, -0.1068]) tensor([0.4178, 0.2060, 0.1903, 0.1859]) -Greedy action tensor([ 0.7161, -0.5227, 0.0907, -0.5082]) tensor([0.4720, 0.1367, 0.2525, 0.1387]) -Greedy action tensor([ 0.6589, -0.4424, -0.1506, -0.3151]) tensor([0.4640, 0.1543, 0.2065, 0.1752]) -Greedy action tensor([ 1.0708, -0.7657, -0.0209, -0.3989]) tensor([0.5797, 0.0924, 0.1946, 0.1333]) -Greedy action tensor([ 0.6425, -0.6809, 0.0209, -0.1901]) tensor([0.4468, 0.1189, 0.2400, 0.1943]) -Greedy action tensor([ 4.5772e-01, -4.9408e-02, -7.5459e-05, 5.4731e-02]) tensor([0.3444, 0.2074, 0.2179, 0.2302]) -Greedy action tensor([ 0.5530, -0.2084, 0.0131, -0.1242]) tensor([0.3910, 0.1826, 0.2278, 0.1986]) -Greedy action tensor([ 0.6302, -0.3027, -0.0862, -0.1333]) tensor([0.4259, 0.1676, 0.2081, 0.1985]) -Greedy action tensor([ 0.7088, -0.5735, 0.1274, -0.2954]) tensor([0.4540, 0.1259, 0.2538, 0.1663]) -Greedy action tensor([ 0.9990, -0.8334, 0.1533, -0.6242]) tensor([0.5597, 0.0896, 0.2403, 0.1104]) -Greedy action tensor([ 0.5464, -0.0910, -0.0733, -0.1359]) tensor([0.3888, 0.2055, 0.2092, 0.1965]) -Greedy action tensor([ 0.5908, -0.4412, 0.0213, -0.3728]) tensor([0.4341, 0.1547, 0.2456, 0.1656]) -Greedy action tensor([ 0.8871, -0.1188, -0.1747, -0.2814]) tensor([0.4945, 0.1808, 0.1710, 0.1537]) -Greedy action tensor([ 0.3740, -0.4045, -0.0834, -0.1517]) tensor([0.3727, 0.1711, 0.2359, 0.2203]) -Greedy action tensor([ 0.6089, -0.2365, -0.0256, -0.1244]) tensor([0.4098, 0.1760, 0.2173, 0.1969]) -Greedy action tensor([ 0.7708, -1.0108, -0.1675, -0.2859]) tensor([0.5243, 0.0883, 0.2052, 0.1823]) -Greedy action tensor([ 0.6586, -0.3892, -0.0957, -0.2273]) tensor([0.4477, 0.1570, 0.2106, 0.1846]) -Greedy action tensor([ 9.0425e-01, -6.1748e-01, 8.5032e-04, -2.8222e-01]) tensor([0.5185, 0.1132, 0.2101, 0.1583]) -Greedy action tensor([ 0.4538, -0.3474, -0.1924, -0.0240]) tensor([0.3857, 0.1731, 0.2021, 0.2392]) -Greedy action tensor([ 0.8086, -0.4991, -0.1108, -0.1977]) tensor([0.4915, 0.1329, 0.1960, 0.1797]) -Greedy action tensor([ 0.7115, -0.3174, -0.0207, -0.1160]) tensor([0.4395, 0.1571, 0.2113, 0.1921]) -Greedy action tensor([ 0.8361, -0.5561, 0.0463, -0.6062]) tensor([0.5158, 0.1282, 0.2341, 0.1219]) -Greedy action tensor([ 1.2843, -0.8779, -0.0934, -0.4848]) tensor([0.6503, 0.0748, 0.1640, 0.1109]) -Greedy action tensor([ 0.4902, -0.4798, -0.0711, -0.1896]) tensor([0.4071, 0.1543, 0.2323, 0.2063]) -Greedy action tensor([ 0.5985, -0.5802, 0.0608, -0.5811]) tensor([0.4547, 0.1399, 0.2656, 0.1398]) -Greedy action tensor([ 1.0032, -0.7997, 0.1155, -0.6400]) tensor([0.5651, 0.0931, 0.2326, 0.1093]) -Greedy action tensor([ 0.8637, -0.4600, -0.1346, -0.4160]) tensor([0.5228, 0.1391, 0.1926, 0.1454]) -Greedy action tensor([ 0.4862, -0.2830, -0.0821, -0.3281]) tensor([0.4044, 0.1874, 0.2291, 0.1791]) -Greedy action tensor([ 0.5759, -0.2453, 0.0289, -0.5245]) tensor([0.4253, 0.1871, 0.2461, 0.1415]) -Greedy action tensor([ 0.9054, -0.8195, 0.1110, -0.6499]) tensor([0.5431, 0.0968, 0.2454, 0.1147]) -Greedy action tensor([ 0.6252, -0.2387, -0.1045, -0.4465]) tensor([0.4452, 0.1877, 0.2146, 0.1525]) -Greedy action tensor([ 0.6606, -0.2736, 0.0974, -0.3372]) tensor([0.4290, 0.1685, 0.2443, 0.1582]) -Greedy action tensor([ 0.3726, 0.0570, -0.0290, -0.0034]) tensor([0.3241, 0.2364, 0.2169, 0.2226]) -Greedy action tensor([ 0.2541, 0.2823, 0.0443, -0.2815]) tensor([0.2920, 0.3003, 0.2367, 0.1709]) -Greedy action tensor([ 0.6108, -0.4721, -0.0399, -0.1939]) tensor([0.4334, 0.1467, 0.2261, 0.1938]) -Greedy action tensor([ 0.7575, -0.3838, 0.1367, -0.2775]) tensor([0.4521, 0.1444, 0.2430, 0.1606]) -Greedy action tensor([ 0.7407, -0.0454, -0.2503, -0.0117]) tensor([0.4352, 0.1983, 0.1615, 0.2051]) -Greedy action tensor([ 0.8711, -0.5113, -0.1141, -0.2671]) tensor([0.5142, 0.1290, 0.1920, 0.1648]) -Greedy action tensor([ 0.9880, -0.5520, -0.0487, -0.2525]) tensor([0.5381, 0.1154, 0.1908, 0.1557]) -Greedy action tensor([ 0.6293, -0.2598, -0.0663, -0.2553]) tensor([0.4305, 0.1770, 0.2147, 0.1778]) -Greedy action tensor([ 0.8445, -0.5327, -0.1798, -0.2471]) tensor([0.5136, 0.1296, 0.1844, 0.1724]) -Greedy action tensor([ 0.6610, -0.7754, -0.0279, -0.3312]) tensor([0.4738, 0.1127, 0.2379, 0.1757]) -Greedy action tensor([ 1.3508, -1.1171, -0.0248, -0.7549]) tensor([0.6853, 0.0581, 0.1732, 0.0834]) -Greedy action tensor([ 0.3748, -0.0569, -0.1558, -0.2653]) tensor([0.3617, 0.2349, 0.2128, 0.1907]) -Greedy action tensor([ 0.7840, -0.1793, 0.1782, -0.0497]) tensor([0.4234, 0.1616, 0.2310, 0.1839]) -Greedy action tensor([ 1.1653, -1.1182, 0.0187, -0.4874]) tensor([0.6207, 0.0633, 0.1972, 0.1189]) -Greedy action tensor([ 0.8905, -0.8824, 0.0097, -0.3916]) tensor([0.5371, 0.0912, 0.2226, 0.1490]) -Greedy action tensor([ 0.4161, 0.9798, -0.3578, -0.1849]) tensor([0.2655, 0.4665, 0.1224, 0.1456]) -Greedy action tensor([ 0.7172, -0.5131, 0.0660, -0.2369]) tensor([0.4548, 0.1329, 0.2371, 0.1752]) -Greedy action tensor([ 0.7489, -0.5909, 0.0023, -0.2526]) tensor([0.4755, 0.1245, 0.2254, 0.1747]) -Greedy action tensor([ 0.4603, -0.3666, -0.1121, -0.5610]) tensor([0.4234, 0.1852, 0.2389, 0.1525]) -Greedy action tensor([ 0.7544, -0.1465, 0.0531, -0.5371]) tensor([0.4593, 0.1866, 0.2278, 0.1263]) -Greedy action tensor([ 0.3413, 0.0217, -0.0886, -0.1036]) tensor([0.3314, 0.2407, 0.2156, 0.2124]) -Greedy action tensor([ 1.0859, -0.7789, 0.1554, -0.8185]) tensor([0.5889, 0.0912, 0.2322, 0.0877]) -Greedy action tensor([ 0.2257, 0.0160, -0.0691, 0.0093]) tensor([0.2975, 0.2413, 0.2216, 0.2396]) -Greedy action tensor([ 0.8447, -0.0933, 0.0064, -0.0427]) tensor([0.4473, 0.1751, 0.1934, 0.1842]) -Greedy action tensor([ 0.3756, -0.1699, -0.0576, -0.3089]) tensor([0.3660, 0.2121, 0.2373, 0.1846]) -Greedy action tensor([ 0.6589, -0.5149, -0.0721, -0.2573]) tensor([0.4565, 0.1411, 0.2198, 0.1826]) -Greedy action tensor([ 0.8037, -0.4818, -0.2120, -0.2899]) tensor([0.5067, 0.1401, 0.1835, 0.1697]) -Greedy action tensor([ 0.6833, -0.4167, -0.0761, -0.1748]) tensor([0.4495, 0.1496, 0.2103, 0.1906]) -Greedy action tensor([ 0.7716, -0.2442, -0.0316, -0.0902]) tensor([0.4479, 0.1622, 0.2006, 0.1892]) -Greedy action tensor([ 0.7513, -0.6307, 0.0803, -0.3647]) tensor([0.4785, 0.1201, 0.2446, 0.1568]) -Greedy action tensor([ 0.5221, -0.2967, -0.0665, -0.2216]) tensor([0.4046, 0.1784, 0.2246, 0.1923]) -Greedy action tensor([ 1.0634, -0.9033, -0.0984, -0.4370]) tensor([0.5967, 0.0835, 0.1867, 0.1331]) -Greedy action tensor([ 0.6629, -0.5672, -0.0327, -0.1030]) tensor([0.4433, 0.1296, 0.2211, 0.2061]) -Greedy action tensor([ 0.5701, -0.0233, -0.0504, -0.1989]) tensor([0.3916, 0.2163, 0.2106, 0.1815]) -Greedy action tensor([ 0.7235, -0.9870, 0.2809, -0.9260]) tensor([0.4962, 0.0897, 0.3187, 0.0953]) -Greedy action tensor([ 0.9399, -0.5822, -0.0500, -0.3415]) tensor([0.5355, 0.1169, 0.1990, 0.1487]) -Greedy action tensor([ 1.0175, -0.9658, -0.0963, -0.5883]) tensor([0.6000, 0.0826, 0.1970, 0.1204]) -Greedy action tensor([ 0.8489, -0.5790, -0.0380, -0.2385]) tensor([0.5028, 0.1206, 0.2071, 0.1695]) -Greedy action tensor([ 5.9912e-01, -2.3581e-01, 5.7882e-04, -2.7662e-02]) tensor([0.3972, 0.1723, 0.2183, 0.2122]) -Greedy action tensor([ 1.3654, -0.9642, 0.0498, -0.7746]) tensor([0.6742, 0.0656, 0.1809, 0.0793]) -Greedy action tensor([ 0.4126, 0.2020, -0.1307, 0.0619]) tensor([0.3231, 0.2617, 0.1877, 0.2275]) -Greedy action tensor([ 0.9890, -0.7188, 0.1224, -0.7136]) tensor([0.5606, 0.1016, 0.2356, 0.1021]) -Greedy action tensor([ 0.5393, -0.3298, 0.0323, -0.4712]) tensor([0.4192, 0.1758, 0.2525, 0.1526]) -Greedy action tensor([ 0.9594, -1.0347, 0.0819, -0.6303]) tensor([0.5695, 0.0775, 0.2368, 0.1162]) -Greedy action tensor([ 0.8160, -0.6153, 0.0455, -0.2242]) tensor([0.4866, 0.1163, 0.2252, 0.1719]) -Greedy action tensor([ 0.7940, -0.4716, -0.1508, -0.1524]) tensor([0.4857, 0.1370, 0.1888, 0.1885]) -Greedy action tensor([-0.4379, -0.5750, -0.0835, 0.8595]) tensor([0.1437, 0.1253, 0.2049, 0.5260]) -Greedy action tensor([-0.1637, 1.4554, 0.5353, -0.4201]) tensor([0.1132, 0.5715, 0.2277, 0.0876]) -Greedy action tensor([ 0.1526, 0.4574, -0.8153, -0.1737]) tensor([0.2892, 0.3923, 0.1099, 0.2087]) -Greedy action tensor([-0.4920, 0.4823, 0.1131, -1.0744]) tensor([0.1656, 0.4387, 0.3033, 0.0925]) -Greedy action tensor([-0.1090, -0.3341, -0.0775, 0.2475]) tensor([0.2348, 0.1875, 0.2423, 0.3354]) -Greedy action tensor([-0.5829, -0.1585, 0.6214, -0.5501]) tensor([0.1450, 0.2217, 0.4835, 0.1498]) -Greedy action tensor([ 0.9301, -0.8185, -0.1922, -0.2232]) tensor([0.5509, 0.0959, 0.1793, 0.1739]) -Greedy action tensor([-0.5140, -1.6244, -0.2392, -0.4563]) tensor([0.2699, 0.0889, 0.3553, 0.2859]) -Greedy action tensor([ 0.0445, 0.0271, 0.5359, -0.3784]) tensor([0.2341, 0.2300, 0.3826, 0.1533]) -Greedy action tensor([-1.8006, -1.0964, 1.1867, -0.6750]) tensor([0.0386, 0.0780, 0.7647, 0.1188]) -Greedy action tensor([ 0.2285, -0.3679, -0.1292, -0.2153]) tensor([0.3458, 0.1905, 0.2418, 0.2219]) -Greedy action tensor([ 0.0157, -0.7554, 0.0024, -0.8039]) tensor([0.3460, 0.1600, 0.3415, 0.1525]) -Greedy action tensor([-1.5415, -0.2160, -0.5073, -0.4916]) tensor([0.0958, 0.3607, 0.2696, 0.2738]) -Greedy action tensor([-0.0365, 0.4174, 0.1672, 0.0222]) tensor([0.2057, 0.3239, 0.2522, 0.2182]) -Greedy action tensor([-0.0558, 1.0897, 0.6237, -0.7305]) tensor([0.1509, 0.4745, 0.2977, 0.0769]) -Greedy action tensor([ 0.3490, -0.8905, -0.0636, -0.0346]) tensor([0.3798, 0.1100, 0.2514, 0.2588]) -Greedy action tensor([ 0.3007, 0.1494, -0.2467, -0.6161]) tensor([0.3524, 0.3029, 0.2038, 0.1409]) -Greedy action tensor([-0.8970, -0.4021, -0.9243, -0.5081]) tensor([0.1965, 0.3223, 0.1912, 0.2899]) -Greedy action tensor([-0.6781, 0.3907, 0.6924, -0.7543]) tensor([0.1140, 0.3318, 0.4486, 0.1056]) -Greedy action tensor([ 0.7893, -0.0228, 0.4218, 1.3637]) tensor([0.2556, 0.1135, 0.1770, 0.4540]) -Greedy action tensor([ 1.0588, -0.7566, -0.5024, -0.0186]) tensor([0.5837, 0.0950, 0.1225, 0.1988]) -Greedy action tensor([ 0.0594, -1.0013, 0.2431, -0.1532]) tensor([0.2979, 0.1032, 0.3580, 0.2409]) -Greedy action tensor([ 0.6266, -0.3766, 0.0697, 0.6138]) tensor([0.3417, 0.1253, 0.1958, 0.3373]) -Greedy action tensor([-0.6517, -1.8876, 0.0362, 0.8203]) tensor([0.1309, 0.0380, 0.2605, 0.5706]) -Greedy action tensor([ 0.7554, 0.6739, -0.1112, 0.5248]) tensor([0.3189, 0.2939, 0.1340, 0.2532]) -Greedy action tensor([-0.1426, -0.5530, -0.0705, -0.5063]) tensor([0.2913, 0.1932, 0.3130, 0.2025]) -Greedy action tensor([-0.5431, -0.9737, 1.0490, -0.4057]) tensor([0.1297, 0.0843, 0.6372, 0.1488]) -Greedy action tensor([ 1.0583, -0.4342, -0.6385, 0.1855]) tensor([0.5477, 0.1231, 0.1004, 0.2288]) -Greedy action tensor([ 1.6433, -0.4636, -1.2119, 0.2713]) tensor([0.6980, 0.0849, 0.0402, 0.1770]) -Greedy action tensor([-0.2840, 0.8940, 0.0688, -1.0942]) tensor([0.1635, 0.5311, 0.2327, 0.0727]) -Greedy action tensor([-0.2126, -0.2305, 0.0734, 1.2220]) tensor([0.1331, 0.1308, 0.1772, 0.5589]) -Greedy action tensor([-0.2445, -0.6277, 0.0121, -1.0828]) tensor([0.2935, 0.2001, 0.3794, 0.1269]) -Greedy action tensor([ 0.0679, -0.4151, 0.1043, 0.3959]) tensor([0.2474, 0.1526, 0.2566, 0.3434]) -Greedy action tensor([ 0.8795, -0.1175, 0.4719, -0.4988]) tensor([0.4374, 0.1614, 0.2910, 0.1102]) -Greedy action tensor([ 1.2307, 0.4329, -0.1925, 0.9001]) tensor([0.4150, 0.1869, 0.1000, 0.2981]) -Greedy action tensor([-1.4524, -0.4863, 1.0470, -1.7796]) tensor([0.0605, 0.1590, 0.7368, 0.0436]) -Greedy action tensor([ 0.0269, 0.5709, -0.5190, -0.8298]) tensor([0.2683, 0.4623, 0.1555, 0.1139]) -Greedy action tensor([-0.7041, 0.7714, -0.0494, 0.6231]) tensor([0.0903, 0.3951, 0.1739, 0.3407]) -Greedy action tensor([-0.1607, -0.1054, 0.2714, -0.2860]) tensor([0.2232, 0.2359, 0.3439, 0.1969]) -Greedy action tensor([ 1.7075, -0.9316, 0.0443, 0.8562]) tensor([0.5925, 0.0423, 0.1123, 0.2529]) -Greedy action tensor([0.2584, 0.4521, 0.0294, 0.3907]) tensor([0.2409, 0.2924, 0.1916, 0.2750]) -Greedy action tensor([ 1.1731, -0.6223, 0.3919, 1.0571]) tensor([0.3977, 0.0660, 0.1821, 0.3541]) -Greedy action tensor([ 0.9339, -1.1484, 0.5557, 0.9678]) tensor([0.3516, 0.0438, 0.2409, 0.3637]) -Greedy action tensor([ 0.3338, 0.0228, -0.5661, -0.2260]) tensor([0.3689, 0.2703, 0.1500, 0.2108]) -Greedy action tensor([-0.5180, 0.2510, -1.2036, -0.1665]) tensor([0.1968, 0.4245, 0.0991, 0.2796]) -Greedy action tensor([ 0.3928, -0.8820, -0.0019, 0.3745]) tensor([0.3407, 0.0952, 0.2296, 0.3345]) -Greedy action tensor([ 0.0258, -2.3284, -0.7219, -0.4304]) tensor([0.4541, 0.0431, 0.2150, 0.2878]) -Greedy action tensor([ 0.0527, 0.2194, 0.2882, -0.0664]) tensor([0.2307, 0.2725, 0.2920, 0.2048]) -Greedy action tensor([ 0.1530, -0.4827, -0.7888, 0.7061]) tensor([0.2734, 0.1448, 0.1066, 0.4753]) -Greedy action tensor([-0.6536, -0.8740, -0.7218, 0.3094]) tensor([0.1867, 0.1498, 0.1744, 0.4891]) -Greedy action tensor([-1.3420, -1.5708, 0.0082, -0.1317]) tensor([0.1110, 0.0883, 0.4283, 0.3724]) -Greedy action tensor([-0.4224, 0.8013, 0.5890, -1.1825]) tensor([0.1313, 0.4463, 0.3610, 0.0614]) -Greedy action tensor([-0.8537, 0.6701, -0.0122, -1.2141]) tensor([0.1162, 0.5333, 0.2695, 0.0810]) -Greedy action tensor([ 0.4380, -1.0333, 0.3436, -0.0540]) tensor([0.3635, 0.0835, 0.3308, 0.2223]) -Greedy action tensor([-0.1232, -0.7639, 0.2420, -0.4180]) tensor([0.2694, 0.1419, 0.3881, 0.2006]) -Greedy action tensor([ 0.7624, 0.1079, -0.2846, 0.2756]) tensor([0.4024, 0.2091, 0.1412, 0.2473]) -Greedy action tensor([-0.8497, -0.0833, 0.7555, -1.4115]) tensor([0.1149, 0.2473, 0.5722, 0.0655]) -Greedy action tensor([-1.2898, -0.5792, -0.9205, 0.1047]) tensor([0.1174, 0.2390, 0.1699, 0.4736]) -Greedy action tensor([ 0.3700, -0.6662, 1.5531, -0.3584]) tensor([0.1960, 0.0695, 0.6398, 0.0946]) -Greedy action tensor([ 0.1773, -2.2242, -0.3487, -0.6058]) tensor([0.4676, 0.0424, 0.2763, 0.2137]) -Greedy action tensor([-0.1918, -0.2013, -0.6484, -0.1915]) tensor([0.2759, 0.2733, 0.1748, 0.2760]) -Greedy action tensor([ 0.5409, -0.5631, 0.3642, -0.0600]) tensor([0.3679, 0.1220, 0.3083, 0.2017]) -Greedy action tensor([-0.4375, -0.1231, 0.6013, -0.9104]) tensor([0.1719, 0.2354, 0.4857, 0.1071]) -Greedy action tensor([ 0.2075, -0.3689, -0.6781, -0.3296]) tensor([0.3908, 0.2196, 0.1612, 0.2284]) -Greedy action tensor([ 0.0647, -0.4591, 1.3576, -0.1861]) tensor([0.1663, 0.0985, 0.6058, 0.1294]) -Greedy action tensor([ 1.6765, -1.0186, 0.2364, 0.4601]) tensor([0.6247, 0.0422, 0.1480, 0.1851]) -Greedy action tensor([-1.2063, -0.1992, 0.7764, -1.0529]) tensor([0.0822, 0.2250, 0.5969, 0.0958]) -Greedy action tensor([-0.3255, -1.0636, -0.8459, -1.0148]) tensor([0.3885, 0.1857, 0.2309, 0.1950]) -Greedy action tensor([-1.4723, -0.5402, 0.0335, -0.7945]) tensor([0.0998, 0.2536, 0.4500, 0.1966]) -Greedy action tensor([0.8717, 0.0541, 0.0014, 1.2907]) tensor([0.2958, 0.1306, 0.1239, 0.4497]) -Greedy action tensor([0.3492, 0.1582, 0.0561, 0.2995]) tensor([0.2838, 0.2344, 0.2117, 0.2701]) -Greedy action tensor([-0.1493, 0.6392, 0.1733, -0.5547]) tensor([0.1906, 0.4193, 0.2631, 0.1271]) -Greedy action tensor([-0.4427, -1.1419, 0.9993, -1.2605]) tensor([0.1621, 0.0806, 0.6857, 0.0716]) -Greedy action tensor([ 0.4202, 0.0887, -0.7994, -0.5174]) tensor([0.4158, 0.2985, 0.1228, 0.1628]) -Greedy action tensor([ 0.6494, -1.1968, -1.0673, 0.5519]) tensor([0.4455, 0.0703, 0.0800, 0.4041]) -Greedy action tensor([-0.2720, -0.0104, -0.2181, -0.2797]) tensor([0.2301, 0.2988, 0.2428, 0.2283]) -Greedy action tensor([ 0.3920, -0.0574, -0.4305, 0.7879]) tensor([0.2807, 0.1791, 0.1233, 0.4170]) -Greedy action tensor([ 0.1937, -0.8210, 1.4197, 0.4674]) tensor([0.1643, 0.0596, 0.5600, 0.2161]) -Greedy action tensor([ 0.2638, -0.7185, 0.0753, -0.5840]) tensor([0.3801, 0.1423, 0.3148, 0.1628]) -Greedy action tensor([-1.0011, -1.6844, 0.2669, 0.0064]) tensor([0.1283, 0.0648, 0.4558, 0.3512]) -Greedy action tensor([-0.0914, -0.3524, 0.0680, -0.3870]) tensor([0.2712, 0.2089, 0.3181, 0.2018]) -Greedy action tensor([ 0.4677, -1.1810, -1.5400, 0.5272]) tensor([0.4188, 0.0805, 0.0562, 0.4444]) -Greedy action tensor([ 1.6859, -0.7916, -0.5309, 0.4497]) tensor([0.6741, 0.0566, 0.0735, 0.1958]) -Greedy action tensor([ 1.2678, -0.1060, -0.5771, 0.1673]) tensor([0.5734, 0.1452, 0.0906, 0.1908]) -Greedy action tensor([ 1.2834, 0.0228, -0.6563, 0.3730]) tensor([0.5466, 0.1549, 0.0786, 0.2199]) -Greedy action tensor([ 2.0158, -0.4025, -0.0535, 0.7895]) tensor([0.6628, 0.0590, 0.0837, 0.1945]) -Greedy action tensor([ 0.8460, -0.3674, 0.0939, 0.1072]) tensor([0.4452, 0.1323, 0.2099, 0.2127]) -Greedy action tensor([ 1.7626, -0.2314, -0.0921, -0.2388]) tensor([0.7004, 0.0954, 0.1096, 0.0947]) -Greedy action tensor([ 1.9849, -0.1551, -0.4985, 0.1075]) tensor([0.7385, 0.0869, 0.0616, 0.1130]) -Greedy action tensor([ 1.9945, -0.2786, -0.6749, 0.5094]) tensor([0.7149, 0.0736, 0.0495, 0.1619]) -Greedy action tensor([ 2.0727, 0.0289, -0.3040, 0.3743]) tensor([0.7115, 0.0922, 0.0661, 0.1302]) -Greedy action tensor([ 1.7519, -0.0258, -0.1073, 0.2553]) tensor([0.6457, 0.1091, 0.1006, 0.1446]) -Greedy action tensor([ 1.6188, -0.8615, -0.0923, 0.4457]) tensor([0.6354, 0.0532, 0.1148, 0.1966]) -Greedy action tensor([ 1.3780, -0.7093, -0.4923, 0.3043]) tensor([0.6174, 0.0766, 0.0951, 0.2110]) -Greedy action tensor([ 1.6534, -0.6009, -0.6460, 0.9546]) tensor([0.5874, 0.0616, 0.0589, 0.2921]) -Greedy action tensor([ 1.5588, -0.8120, -0.1636, 0.1401]) tensor([0.6605, 0.0617, 0.1180, 0.1599]) -Greedy action tensor([ 1.5794, -0.6561, -0.6751, 0.0967]) tensor([0.6950, 0.0743, 0.0729, 0.1578]) -Greedy action tensor([ 1.2853, -0.8136, -0.0804, 0.1608]) tensor([0.5873, 0.0720, 0.1499, 0.1908]) -Greedy action tensor([ 2.3694, -0.4827, -0.3561, 0.4607]) tensor([0.7865, 0.0454, 0.0515, 0.1166]) -Greedy action tensor([ 2.5263, -1.4620, -0.0683, 0.5014]) tensor([0.8162, 0.0151, 0.0609, 0.1077]) -Greedy action tensor([ 1.5356, -0.4310, -0.2238, 0.1309]) tensor([0.6420, 0.0898, 0.1105, 0.1576]) -Greedy action tensor([ 1.5651, -0.1212, -0.6180, 0.5940]) tensor([0.5965, 0.1105, 0.0672, 0.2259]) -Greedy action tensor([ 1.2329, -0.2184, -0.5135, 0.2403]) tensor([0.5620, 0.1317, 0.0980, 0.2083]) -Greedy action tensor([ 1.0312, -0.4858, -0.3734, 0.1397]) tensor([0.5334, 0.1170, 0.1309, 0.2187]) -Greedy action tensor([ 1.7510, -0.0417, -0.4186, 0.4760]) tensor([0.6410, 0.1067, 0.0732, 0.1791]) -Greedy action tensor([ 1.0216, -0.2374, -0.1816, 0.1497]) tensor([0.4994, 0.1418, 0.1499, 0.2088]) -Greedy action tensor([ 2.0800, -0.9531, -0.2755, 0.8316]) tensor([0.6993, 0.0337, 0.0663, 0.2007]) -Greedy action tensor([ 1.1504, -0.0989, -0.3001, 0.0242]) tensor([0.5419, 0.1554, 0.1270, 0.1757]) -Greedy action tensor([ 1.3832, -0.3798, -0.5642, 0.3878]) tensor([0.5939, 0.1019, 0.0847, 0.2195]) -Greedy action tensor([ 1.2718, -0.1553, -0.8454, 0.1772]) tensor([0.5900, 0.1416, 0.0710, 0.1974]) -Greedy action tensor([ 1.7205, -0.4032, -0.7633, 0.0312]) tensor([0.7206, 0.0862, 0.0601, 0.1331]) -Greedy action tensor([ 1.6919, -0.7337, -0.6077, -0.0896]) tensor([0.7369, 0.0652, 0.0739, 0.1241]) -Greedy action tensor([ 1.5561, -0.8644, -0.3925, 0.7599]) tensor([0.5944, 0.0528, 0.0847, 0.2681]) -Greedy action tensor([ 2.1053, -0.3925, -1.1656, 0.2572]) tensor([0.7826, 0.0644, 0.0297, 0.1233]) -Greedy action tensor([ 1.5161, -0.5565, -1.3590, 0.2521]) tensor([0.6827, 0.0859, 0.0385, 0.1929]) -Greedy action tensor([ 1.3827, -0.0062, -0.4945, 0.2786]) tensor([0.5767, 0.1438, 0.0883, 0.1912]) -Greedy action tensor([ 1.2063, -0.3311, -0.0680, -0.0264]) tensor([0.5599, 0.1203, 0.1566, 0.1632]) -Greedy action tensor([ 1.7511, -0.7362, -0.1679, 0.4800]) tensor([0.6621, 0.0550, 0.0972, 0.1857]) -Greedy action tensor([ 1.7225, -0.9394, -0.3462, 0.1148]) tensor([0.7161, 0.0500, 0.0905, 0.1435]) -Greedy action tensor([ 0.9970, -0.3974, 0.0257, 0.1599]) tensor([0.4855, 0.1204, 0.1838, 0.2102]) -Greedy action tensor([ 1.4340, -0.5738, -0.3825, -0.1691]) tensor([0.6675, 0.0896, 0.1085, 0.1344]) -Greedy action tensor([ 1.3030, -0.6699, 0.0277, 0.1219]) tensor([0.5796, 0.0806, 0.1619, 0.1779]) -Greedy action tensor([ 1.1612, -0.3553, -0.1537, 0.4571]) tensor([0.5044, 0.1107, 0.1354, 0.2495]) -Greedy action tensor([ 1.1494, -0.5255, -0.3595, 0.2140]) tensor([0.5553, 0.1040, 0.1228, 0.2179]) -Greedy action tensor([ 1.3301, -0.4852, -0.2928, 0.4152]) tensor([0.5680, 0.0925, 0.1121, 0.2275]) -Greedy action tensor([ 1.8550, 0.2066, -0.3364, -0.0086]) tensor([0.6853, 0.1318, 0.0766, 0.1063]) -Greedy action tensor([ 1.8762, -0.4552, -0.3750, 0.2161]) tensor([0.7181, 0.0698, 0.0756, 0.1365]) -Greedy action tensor([ 1.4974, -0.6127, -0.5006, 0.0696]) tensor([0.6682, 0.0810, 0.0906, 0.1602]) -Greedy action tensor([ 1.8431, -0.5170, -0.4549, 0.4767]) tensor([0.6897, 0.0651, 0.0693, 0.1759]) -Greedy action tensor([ 1.8055, -0.1339, -0.6617, 0.4893]) tensor([0.6681, 0.0961, 0.0567, 0.1792]) -Greedy action tensor([ 1.4265, 0.4692, 0.5116, -0.0395]) tensor([0.4962, 0.1905, 0.1988, 0.1145]) -Greedy action tensor([ 1.3351, -0.3718, -0.5887, 0.3646]) tensor([0.5860, 0.1063, 0.0856, 0.2221]) -Greedy action tensor([ 1.3267, -0.9696, -0.5028, 0.7795]) tensor([0.5436, 0.0547, 0.0872, 0.3145]) -Greedy action tensor([ 0.7160, -0.2614, -0.3237, 0.2525]) tensor([0.4239, 0.1595, 0.1499, 0.2667]) -Greedy action tensor([ 0.9103, -0.0196, -1.0023, 0.5951]) tensor([0.4402, 0.1737, 0.0650, 0.3212]) -Greedy action tensor([ 1.2988, -0.6170, -0.4725, -0.0285]) tensor([0.6319, 0.0930, 0.1075, 0.1676]) -Greedy action tensor([ 1.8118, -0.2361, -0.3190, -0.2953]) tensor([0.7303, 0.0942, 0.0867, 0.0888]) -Greedy action tensor([ 2.2422, -1.1626, -0.3303, 0.6012]) tensor([0.7673, 0.0255, 0.0586, 0.1487]) -Greedy action tensor([ 1.5846, -0.5395, -0.4646, 0.4447]) tensor([0.6377, 0.0762, 0.0821, 0.2040]) -Greedy action tensor([ 0.8652, 0.0564, -0.3307, 0.0404]) tensor([0.4574, 0.2037, 0.1383, 0.2005]) -Greedy action tensor([ 1.3710, -0.0562, -0.5883, 0.0899]) tensor([0.6029, 0.1447, 0.0850, 0.1674]) -Greedy action tensor([ 2.2051, -0.9507, -0.3181, 0.4859]) tensor([0.7680, 0.0327, 0.0616, 0.1376]) -Greedy action tensor([ 1.5386, -0.2258, -0.8581, 0.1333]) tensor([0.6633, 0.1136, 0.0604, 0.1627]) -Greedy action tensor([ 1.3699, -0.6128, -0.3696, 0.3658]) tensor([0.5953, 0.0820, 0.1046, 0.2181]) -Greedy action tensor([ 1.3369, -0.7350, -0.3330, 0.5510]) tensor([0.5650, 0.0712, 0.1064, 0.2575]) -Greedy action tensor([ 1.4623, -0.5565, -0.5677, 0.5258]) tensor([0.6038, 0.0802, 0.0793, 0.2367]) -Greedy action tensor([ 1.2953, -0.3591, -0.2310, 0.0346]) tensor([0.5910, 0.1130, 0.1284, 0.1675]) -Greedy action tensor([ 2.0573, -0.2204, 0.1531, -0.0766]) tensor([0.7300, 0.0748, 0.1087, 0.0864]) -Greedy action tensor([ 1.3551, -0.5601, -0.1483, 0.1012]) tensor([0.6042, 0.0890, 0.1344, 0.1724]) -Greedy action tensor([ 1.6671, -0.6362, -0.4643, 0.2054]) tensor([0.6895, 0.0689, 0.0818, 0.1598]) -Greedy action tensor([ 1.7041, -0.5893, -0.6476, 0.2296]) tensor([0.7017, 0.0708, 0.0668, 0.1606]) -Greedy action tensor([ 1.1227, -0.6228, -0.0506, 0.1550]) tensor([0.5365, 0.0937, 0.1660, 0.2039]) -Greedy action tensor([ 1.0722, -0.5700, -0.3424, 0.0876]) tensor([0.5524, 0.1069, 0.1342, 0.2064]) -Greedy action tensor([ 1.1548, -0.2904, -0.1411, 0.1332]) tensor([0.5349, 0.1261, 0.1464, 0.1926]) -Greedy action tensor([ 1.1275, 0.1324, -0.7808, 0.2085]) tensor([0.5217, 0.1929, 0.0774, 0.2081]) -Greedy action tensor([ 1.0086, -0.2292, -0.3312, -0.1031]) tensor([0.5317, 0.1542, 0.1392, 0.1749]) -Greedy action tensor([ 1.6441, -0.2820, -0.2511, 0.2239]) tensor([0.6503, 0.0948, 0.0977, 0.1572]) -Greedy action tensor([ 1.3091, -0.4404, -0.3874, 0.1448]) tensor([0.5990, 0.1041, 0.1098, 0.1870]) -Greedy action tensor([ 1.9758, -0.7319, -0.3360, 0.2014]) tensor([0.7489, 0.0499, 0.0742, 0.1270]) -Greedy action tensor([ 2.0503, -0.7687, -0.6054, 0.5012]) tensor([0.7450, 0.0444, 0.0523, 0.1583]) -Greedy action tensor([ 1.0050, -0.4094, -0.2123, 0.0059]) tensor([0.5243, 0.1274, 0.1552, 0.1930]) -Greedy action tensor([ 1.7826, -0.2611, -0.7328, 0.6261]) tensor([0.6557, 0.0850, 0.0530, 0.2063]) -Greedy action tensor([ 1.5885, -0.5941, -0.3100, 0.3570]) tensor([0.6433, 0.0725, 0.0964, 0.1878]) -Greedy action tensor([ 0.7795, -0.7315, -0.0178, -0.3994]) tensor([0.5053, 0.1115, 0.2277, 0.1555]) -Greedy action tensor([ 0.5396, -0.4017, -0.0275, -0.3778]) tensor([0.4243, 0.1655, 0.2406, 0.1695]) -Greedy action tensor([ 0.3465, 0.0416, -0.2060, 0.0575]) tensor([0.3266, 0.2408, 0.1880, 0.2446]) -Greedy action tensor([ 0.5051, 0.0948, -0.0212, -0.0873]) tensor([0.3562, 0.2363, 0.2104, 0.1970]) -Greedy action tensor([ 0.8415, -0.2297, -0.1549, -0.0933]) tensor([0.4752, 0.1628, 0.1754, 0.1866]) -Greedy action tensor([ 0.7664, -0.3554, -0.0300, -0.1977]) tensor([0.4634, 0.1509, 0.2090, 0.1767]) -Greedy action tensor([ 0.2026, 0.3460, -0.0243, 0.2381]) tensor([0.2508, 0.2895, 0.1999, 0.2599]) -Greedy action tensor([ 0.7667, -0.4106, -0.0527, -0.2032]) tensor([0.4699, 0.1448, 0.2071, 0.1782]) -Greedy action tensor([ 0.9332, -0.3473, 0.0478, -0.3734]) tensor([0.5099, 0.1417, 0.2104, 0.1381]) -Greedy action tensor([ 0.7582, -0.3354, -0.1113, -0.3233]) tensor([0.4777, 0.1600, 0.2002, 0.1620]) -Greedy action tensor([ 0.4425, -0.1442, 0.0495, -0.0869]) tensor([0.3546, 0.1972, 0.2394, 0.2088]) -Greedy action tensor([ 0.9139, -0.9761, 0.0834, -0.5340]) tensor([0.5489, 0.0829, 0.2392, 0.1290]) -Greedy action tensor([ 0.6541, -0.3053, -0.2302, -0.2643]) tensor([0.4555, 0.1745, 0.1881, 0.1818]) -Greedy action tensor([ 0.3465, -0.0212, -0.1291, -0.1062]) tensor([0.3390, 0.2347, 0.2107, 0.2156]) -Greedy action tensor([ 0.4240, -0.1021, -0.0083, -0.0121]) tensor([0.3464, 0.2047, 0.2249, 0.2240]) -Greedy action tensor([ 0.5745, -0.3976, 0.1118, -0.3423]) tensor([0.4154, 0.1571, 0.2615, 0.1661]) -Greedy action tensor([ 0.7923, -0.6283, -0.0539, -0.1663]) tensor([0.4868, 0.1176, 0.2089, 0.1867]) -Greedy action tensor([ 0.8018, -0.6263, -0.0573, -0.6472]) tensor([0.5268, 0.1263, 0.2231, 0.1237]) -Greedy action tensor([ 0.6813, -0.5091, 0.0908, -0.2551]) tensor([0.4444, 0.1351, 0.2462, 0.1742]) -Greedy action tensor([ 1.0360, -0.5252, -0.0019, -0.3619]) tensor([0.5521, 0.1159, 0.1956, 0.1364]) -Greedy action tensor([ 0.4757, 0.1058, -0.0626, 0.0463]) tensor([0.3418, 0.2361, 0.1995, 0.2225]) -Greedy action tensor([ 0.5023, -0.0821, -0.0820, -0.0311]) tensor([0.3701, 0.2064, 0.2064, 0.2171]) -Greedy action tensor([ 0.7029, -0.5836, 0.0124, -0.5306]) tensor([0.4834, 0.1335, 0.2423, 0.1408]) -Greedy action tensor([ 0.9545, -0.6980, -0.0010, -0.3772]) tensor([0.5434, 0.1041, 0.2090, 0.1435]) -Greedy action tensor([ 0.4976, -0.1353, -0.0744, -0.1983]) tensor([0.3855, 0.2047, 0.2176, 0.1922]) -Greedy action tensor([ 0.9007, -0.5426, 0.0523, -0.5800]) tensor([0.5286, 0.1248, 0.2263, 0.1202]) -Greedy action tensor([ 1.1577, -1.1202, 0.0703, -0.7145]) tensor([0.6276, 0.0643, 0.2116, 0.0965]) -Greedy action tensor([ 0.7008, -0.4020, 0.0222, -0.2703]) tensor([0.4509, 0.1497, 0.2287, 0.1707]) -Greedy action tensor([ 0.5544, -0.3607, -0.0859, 0.0063]) tensor([0.3991, 0.1598, 0.2104, 0.2307]) -Greedy action tensor([ 0.7711, -0.6769, 0.0339, -0.2438]) tensor([0.4817, 0.1132, 0.2305, 0.1746]) -Greedy action tensor([ 0.5715, 0.1546, -0.0313, 0.1531]) tensor([0.3491, 0.2301, 0.1911, 0.2297]) -Greedy action tensor([ 0.4235, -0.3417, -0.1563, -0.4249]) tensor([0.4076, 0.1896, 0.2283, 0.1745]) -Greedy action tensor([ 0.8674, -0.6171, 0.0300, -0.3024]) tensor([0.5077, 0.1150, 0.2197, 0.1576]) -Greedy action tensor([ 0.8580, -0.7761, 0.1264, -0.2495]) tensor([0.4983, 0.0972, 0.2398, 0.1646]) -Greedy action tensor([ 0.4395, 0.1095, -0.0013, -0.0038]) tensor([0.3329, 0.2393, 0.2142, 0.2137]) -Greedy action tensor([ 0.4060, -0.1664, -0.0710, -0.1411]) tensor([0.3619, 0.2042, 0.2246, 0.2094]) -Greedy action tensor([ 0.6423, -0.1566, 0.1034, -0.0722]) tensor([0.3964, 0.1783, 0.2313, 0.1940]) -Greedy action tensor([ 0.4355, 0.1083, -0.0725, -0.1502]) tensor([0.3473, 0.2504, 0.2090, 0.1933]) -Greedy action tensor([ 1.5493, -0.8318, -0.1557, -0.6198]) tensor([0.7202, 0.0666, 0.1309, 0.0823]) -Greedy action tensor([ 0.5932, -0.3143, -0.1323, -0.1377]) tensor([0.4221, 0.1703, 0.2043, 0.2032]) -Greedy action tensor([ 0.8274, -0.4330, -0.1192, -0.3057]) tensor([0.5016, 0.1422, 0.1946, 0.1615]) -Greedy action tensor([ 0.7777, -0.1537, -0.0525, 0.0137]) tensor([0.4356, 0.1716, 0.1899, 0.2029]) -Greedy action tensor([ 0.5365, -0.2234, 0.0110, -0.0165]) tensor([0.3796, 0.1776, 0.2245, 0.2184]) -Greedy action tensor([ 0.4593, -0.1845, -0.0735, -0.3704]) tensor([0.3924, 0.2061, 0.2303, 0.1712]) -Greedy action tensor([ 1.3517, -0.9555, -0.1116, -0.7138]) tensor([0.6860, 0.0683, 0.1588, 0.0870]) -Greedy action tensor([ 0.7321, -0.6300, 0.0147, -0.3576]) tensor([0.4807, 0.1231, 0.2346, 0.1616]) -Greedy action tensor([ 1.2429, -0.7643, -0.0186, -0.3913]) tensor([0.6201, 0.0833, 0.1756, 0.1210]) -Greedy action tensor([ 0.8288, -0.6813, -0.1170, -0.3007]) tensor([0.5175, 0.1143, 0.2010, 0.1672]) -Greedy action tensor([ 0.5942, -0.3483, -0.1296, -0.2569]) tensor([0.4345, 0.1693, 0.2107, 0.1855]) -Greedy action tensor([ 0.4413, 0.0159, -0.0074, -0.0114]) tensor([0.3415, 0.2232, 0.2181, 0.2172]) -Greedy action tensor([ 0.4869, -0.0359, -0.1086, -0.1164]) tensor([0.3716, 0.2203, 0.2048, 0.2033]) -Greedy action tensor([ 0.7371, -0.6249, -0.1151, -0.2118]) tensor([0.4832, 0.1238, 0.2060, 0.1871]) -Greedy action tensor([ 1.0948, -0.9699, -0.0681, -0.6441]) tensor([0.6191, 0.0785, 0.1935, 0.1088]) -Greedy action tensor([ 0.0942, 0.4570, -0.1486, -0.0579]) tensor([0.2451, 0.3522, 0.1922, 0.2105]) -Greedy action tensor([ 0.8207, -0.6675, -0.0233, -0.5271]) tensor([0.5220, 0.1179, 0.2245, 0.1356]) -Greedy action tensor([ 0.5562, -0.3489, -0.0641, -0.4102]) tensor([0.4305, 0.1742, 0.2315, 0.1638]) -Greedy action tensor([ 0.7361, -0.3390, 0.0288, -0.4356]) tensor([0.4664, 0.1592, 0.2299, 0.1445]) -Greedy action tensor([ 0.3850, -0.0460, -0.0445, -0.1545]) tensor([0.3468, 0.2254, 0.2257, 0.2022]) -Greedy action tensor([ 0.8856, -0.5641, -0.1673, -0.3321]) tensor([0.5321, 0.1248, 0.1856, 0.1574]) -Greedy action tensor([ 0.8382, -0.3902, -0.1412, -0.1874]) tensor([0.4934, 0.1444, 0.1853, 0.1769]) -Greedy action tensor([ 0.6993, -0.3157, -0.0664, -0.1816]) tensor([0.4461, 0.1617, 0.2074, 0.1849]) -Greedy action tensor([ 0.2618, -0.0438, 0.0102, 0.2180]) tensor([0.2881, 0.2122, 0.2240, 0.2757]) -Greedy action tensor([ 0.9359, -0.7854, 0.2040, -0.3962]) tensor([0.5198, 0.0930, 0.2500, 0.1372]) -Greedy action tensor([ 1.0740, -0.6495, -0.0719, -0.3191]) tensor([0.5732, 0.1023, 0.1822, 0.1423]) -Greedy action tensor([ 0.7992, -0.1728, -0.0542, -0.1079]) tensor([0.4529, 0.1713, 0.1929, 0.1828]) -Greedy action tensor([ 1.0309, -0.6685, -0.0330, -0.2919]) tensor([0.5573, 0.1019, 0.1923, 0.1485]) -Greedy action tensor([ 0.3433, -0.3566, -0.1693, -0.0466]) tensor([0.3607, 0.1791, 0.2160, 0.2442]) -Greedy action tensor([ 0.7380, -0.6833, -0.1083, -0.3762]) tensor([0.5004, 0.1208, 0.2147, 0.1642]) -Greedy action tensor([ 0.3546, -0.0170, -0.0948, -0.2253]) tensor([0.3463, 0.2388, 0.2210, 0.1939]) -Greedy action tensor([ 0.5956, -0.2746, 0.0400, -0.1806]) tensor([0.4077, 0.1708, 0.2339, 0.1876]) -Greedy action tensor([ 0.7136, -0.3684, 0.0059, -0.0936]) tensor([0.4390, 0.1488, 0.2163, 0.1958]) -Greedy action tensor([ 3.5274e-01, -1.9571e-04, -1.0385e-01, 4.0801e-02]) tensor([0.3259, 0.2290, 0.2065, 0.2386]) -Greedy action tensor([ 1.0472, -0.1758, 0.0759, -0.2508]) tensor([0.5139, 0.1513, 0.1945, 0.1403]) -Greedy action tensor([ 1.0656, -0.3929, 0.0585, -0.5754]) tensor([0.5581, 0.1298, 0.2039, 0.1082]) -Greedy action tensor([ 0.7654, -0.0783, -0.2220, -0.3475]) tensor([0.4692, 0.2018, 0.1748, 0.1542]) -Greedy action tensor([ 1.3338, -1.0164, 0.0412, -0.5557]) tensor([0.6574, 0.0627, 0.1805, 0.0994]) -Greedy action tensor([ 0.8215, -0.3849, 0.1320, -0.1469]) tensor([0.4586, 0.1372, 0.2301, 0.1741]) -Greedy action tensor([ 0.9819, -0.4059, -0.0239, -0.1423]) tensor([0.5154, 0.1287, 0.1885, 0.1675]) -Greedy action tensor([ 0.7292, -0.8352, 0.1067, -0.6357]) tensor([0.4997, 0.1045, 0.2681, 0.1276]) -Greedy action tensor([ 0.9150, -0.6693, -0.0732, -0.5105]) tensor([0.5501, 0.1128, 0.2048, 0.1322]) -Greedy action tensor([ 1.3781, -0.5108, -0.0588, -0.4827]) tensor([0.6475, 0.0979, 0.1539, 0.1007]) -Greedy action tensor([-1.8745, -0.4393, 0.6254, -0.1345]) tensor([0.0433, 0.1820, 0.5278, 0.2469]) -Greedy action tensor([-1.9100, -0.4273, 0.6480, -0.1629]) tensor([0.0416, 0.1831, 0.5367, 0.2386]) -Greedy action tensor([-1.7015, -0.4275, 0.5946, 0.0738]) tensor([0.0490, 0.1751, 0.4867, 0.2891]) -Greedy action tensor([-1.9343, -0.4376, 0.6633, -0.1748]) tensor([0.0405, 0.1808, 0.5436, 0.2351]) -Greedy action tensor([-1.7171, -0.2680, 0.5634, -0.0139]) tensor([0.0487, 0.2074, 0.4764, 0.2675]) -Greedy action tensor([-0.8686, 0.9164, 0.1070, 0.1924]) tensor([0.0800, 0.4767, 0.2122, 0.2311]) -Greedy action tensor([-1.8485, -0.4453, 0.6125, -0.1368]) tensor([0.0448, 0.1822, 0.5249, 0.2481]) -Greedy action tensor([-1.9220, -0.4366, 0.6583, -0.1681]) tensor([0.0410, 0.1810, 0.5411, 0.2368]) -Greedy action tensor([-1.9433, -0.4499, 0.6677, -0.1804]) tensor([0.0402, 0.1788, 0.5468, 0.2342]) -Greedy action tensor([-1.8178, -0.4686, 0.6124, -0.1194]) tensor([0.0461, 0.1778, 0.5240, 0.2521]) -Greedy action tensor([-1.9175, -0.4618, 0.6581, -0.1640]) tensor([0.0413, 0.1772, 0.5429, 0.2386]) -Greedy action tensor([-1.9395, -0.4492, 0.6657, -0.1774]) tensor([0.0403, 0.1790, 0.5458, 0.2349]) -Greedy action tensor([-1.4336, -0.3078, 0.4984, -0.1998]) tensor([0.0694, 0.2138, 0.4787, 0.2381]) -Greedy action tensor([-1.8655, -0.3816, 0.6216, -0.1949]) tensor([0.0440, 0.1938, 0.5286, 0.2336]) -Greedy action tensor([-1.9238, -0.4167, 0.6561, -0.1683]) tensor([0.0408, 0.1843, 0.5387, 0.2362]) -Greedy action tensor([-1.4563, -0.5171, 0.5405, 0.2581]) tensor([0.0607, 0.1552, 0.4470, 0.3370]) -Greedy action tensor([-1.9132, -0.4204, 0.6517, -0.1635]) tensor([0.0413, 0.1838, 0.5371, 0.2377]) -Greedy action tensor([-1.9261, -0.4388, 0.6581, -0.1709]) tensor([0.0409, 0.1809, 0.5418, 0.2365]) -Greedy action tensor([-1.8311, -0.5098, 0.5760, -0.1293]) tensor([0.0469, 0.1757, 0.5204, 0.2570]) -Greedy action tensor([-1.9082, -0.4070, 0.6449, -0.1630]) tensor([0.0416, 0.1865, 0.5339, 0.2380]) -Greedy action tensor([-1.5381, -0.5496, 0.5143, 0.1470]) tensor([0.0593, 0.1593, 0.4617, 0.3197]) -Greedy action tensor([-1.8424, -0.4833, 0.6193, -0.1314]) tensor([0.0451, 0.1757, 0.5293, 0.2498]) -Greedy action tensor([-1.9361, -0.4381, 0.6635, -0.1735]) tensor([0.0404, 0.1807, 0.5436, 0.2354]) -Greedy action tensor([-1.3523, -0.5377, 0.3795, -0.0261]) tensor([0.0789, 0.1782, 0.4458, 0.2971]) -Greedy action tensor([-1.6888, 0.3622, 0.4181, -0.0489]) tensor([0.0451, 0.3510, 0.3712, 0.2327]) -Greedy action tensor([-1.2013, 0.5911, 0.2994, -0.1321]) tensor([0.0694, 0.4169, 0.3114, 0.2023]) -Greedy action tensor([-1.4720, -0.5496, 0.4506, 0.0479]) tensor([0.0670, 0.1685, 0.4582, 0.3063]) -Greedy action tensor([-0.7129, -0.1039, 0.2330, -0.2078]) tensor([0.1414, 0.2600, 0.3642, 0.2344]) -Greedy action tensor([-1.5899, -0.5525, 0.4880, -0.0110]) tensor([0.0600, 0.1694, 0.4795, 0.2911]) -Greedy action tensor([-1.7447, -0.4606, 0.5618, -0.0338]) tensor([0.0495, 0.1789, 0.4974, 0.2742]) -Greedy action tensor([-1.0594, 0.8091, 0.1525, 0.1604]) tensor([0.0703, 0.4554, 0.2362, 0.2381]) -Greedy action tensor([-1.9094, -0.4546, 0.6554, -0.1633]) tensor([0.0416, 0.1784, 0.5413, 0.2387]) -Greedy action tensor([-1.8915, -0.3588, 0.6298, -0.1331]) tensor([0.0419, 0.1939, 0.5212, 0.2430]) -Greedy action tensor([-1.6064, -0.3398, 0.5373, 0.0677]) tensor([0.0543, 0.1927, 0.4633, 0.2897]) -Greedy action tensor([-1.6031, -0.2748, 0.4872, -0.0063]) tensor([0.0562, 0.2121, 0.4544, 0.2774]) -Greedy action tensor([-0.6146, 0.9954, 0.0105, 0.3847]) tensor([0.0944, 0.4725, 0.1765, 0.2566]) -Greedy action tensor([-1.8831, -0.4513, 0.6371, -0.1531]) tensor([0.0430, 0.1800, 0.5345, 0.2425]) -Greedy action tensor([-1.7936, -0.4813, 0.6259, -0.0292]) tensor([0.0459, 0.1705, 0.5158, 0.2679]) -Greedy action tensor([-1.9022, -0.3928, 0.6482, -0.1433]) tensor([0.0414, 0.1874, 0.5307, 0.2405]) -Greedy action tensor([-1.8229, -0.4966, 0.5987, -0.1254]) tensor([0.0465, 0.1753, 0.5241, 0.2541]) -Greedy action tensor([-1.8615, -0.4380, 0.6236, -0.1413]) tensor([0.0440, 0.1826, 0.5278, 0.2456]) -Greedy action tensor([-1.7770, -0.4135, 0.6785, 0.0178]) tensor([0.0443, 0.1732, 0.5160, 0.2665]) -Greedy action tensor([-1.8688, -0.5061, 0.7500, -0.1056]) tensor([0.0409, 0.1597, 0.5610, 0.2384]) -Greedy action tensor([-1.2284, -0.6059, 0.3589, 0.0036]) tensor([0.0894, 0.1667, 0.4373, 0.3066]) -Greedy action tensor([-1.4935, 0.2859, 0.4626, 0.0678]) tensor([0.0533, 0.3159, 0.3769, 0.2539]) -Greedy action tensor([-1.7841, -0.4793, 0.6553, -0.2233]) tensor([0.0478, 0.1763, 0.5482, 0.2277]) -Greedy action tensor([-1.6070, -0.3293, 0.5706, 0.0698]) tensor([0.0533, 0.1913, 0.4704, 0.2851]) -Greedy action tensor([-1.5990, 0.3303, 0.3849, -0.0284]) tensor([0.0501, 0.3448, 0.3642, 0.2409]) -Greedy action tensor([-1.8749, -0.4571, 0.6384, -0.1451]) tensor([0.0433, 0.1786, 0.5341, 0.2440]) -Greedy action tensor([-1.9198, -0.4506, 0.6577, -0.1671]) tensor([0.0412, 0.1790, 0.5422, 0.2376]) -Greedy action tensor([-1.9121, -0.3906, 0.6468, -0.1628]) tensor([0.0412, 0.1888, 0.5328, 0.2371]) -Greedy action tensor([-0.6280, 1.0517, 0.0580, 0.3579]) tensor([0.0907, 0.4863, 0.1800, 0.2430]) -Greedy action tensor([-1.8855, -0.2821, 0.6195, -0.1207]) tensor([0.0416, 0.2066, 0.5090, 0.2428]) -Greedy action tensor([-1.7552, -0.3731, 0.6544, -0.0373]) tensor([0.0461, 0.1837, 0.5132, 0.2570]) -Greedy action tensor([-1.9024, -0.4400, 0.6437, -0.1602]) tensor([0.0420, 0.1815, 0.5364, 0.2401]) -Greedy action tensor([-0.1738, -0.6515, 0.1447, 0.5192]) tensor([0.2002, 0.1242, 0.2753, 0.4003]) -Greedy action tensor([-1.8687, -0.4425, 0.6306, -0.1370]) tensor([0.0435, 0.1811, 0.5296, 0.2458]) -Greedy action tensor([-1.8617, -0.4202, 0.6196, -0.1352]) tensor([0.0439, 0.1854, 0.5243, 0.2465]) -Greedy action tensor([-1.7845, -0.3328, 0.6095, -0.2024]) tensor([0.0474, 0.2025, 0.5195, 0.2307]) -Greedy action tensor([-1.8722, -0.3780, 0.6268, -0.1419]) tensor([0.0430, 0.1915, 0.5231, 0.2425]) -Greedy action tensor([-1.8928, -0.4471, 0.6400, -0.1598]) tensor([0.0426, 0.1807, 0.5359, 0.2408]) -Greedy action tensor([-1.8328, -0.3248, 0.5952, -0.1398]) tensor([0.0449, 0.2027, 0.5086, 0.2439]) -Greedy action tensor([-1.6210, -0.3360, 0.6639, 0.0836]) tensor([0.0502, 0.1813, 0.4927, 0.2758]) -Greedy action tensor([-1.9295, -0.4446, 0.6614, -0.1717]) tensor([0.0407, 0.1798, 0.5433, 0.2362]) -Greedy action tensor([-1.9378, -0.4400, 0.6638, -0.1748]) tensor([0.0403, 0.1804, 0.5440, 0.2352]) -Greedy action tensor([-1.8898, -0.4485, 0.6423, -0.1538]) tensor([0.0426, 0.1800, 0.5358, 0.2417]) -Greedy action tensor([-1.8106, -0.4045, 0.6627, -0.0769]) tensor([0.0442, 0.1805, 0.5248, 0.2505]) -Greedy action tensor([-1.7874, -0.4311, 0.5856, -0.1084]) tensor([0.0477, 0.1851, 0.5116, 0.2556]) -Greedy action tensor([-1.7150, -0.5032, 0.6306, -0.1079]) tensor([0.0505, 0.1698, 0.5276, 0.2521]) -Greedy action tensor([-1.8922, -0.3918, 0.6456, -0.1413]) tensor([0.0419, 0.1876, 0.5295, 0.2410]) -Greedy action tensor([-1.8757, -0.4563, 0.6407, -0.1418]) tensor([0.0431, 0.1784, 0.5342, 0.2443]) -Greedy action tensor([-1.8349, -0.4804, 0.6060, -0.1122]) tensor([0.0455, 0.1765, 0.5230, 0.2550]) -Greedy action tensor([-1.4643, -0.0871, 0.6284, 0.2445]) tensor([0.0538, 0.2132, 0.4360, 0.2970]) -Greedy action tensor([-0.0166, 0.8391, -0.1251, 0.5176]) tensor([0.1679, 0.3950, 0.1506, 0.2864]) -Greedy action tensor([-1.9419, -0.4481, 0.6667, -0.1791]) tensor([0.0402, 0.1792, 0.5462, 0.2344]) -Greedy action tensor([-1.6947, -0.3699, 0.5503, -0.0241]) tensor([0.0512, 0.1927, 0.4837, 0.2723]) -Greedy action tensor([-1.9406, -0.4505, 0.6642, -0.1796]) tensor([0.0403, 0.1790, 0.5458, 0.2348]) -Greedy action tensor([-1.9302, -0.4351, 0.6625, -0.1727]) tensor([0.0406, 0.1811, 0.5428, 0.2355]) -Greedy action tensor([1.0573, 1.3264, 0.0246, 0.6525]) tensor([0.3001, 0.3928, 0.1069, 0.2002]) -Greedy action tensor([-1.9065, -0.4519, 0.6312, -0.1625]) tensor([0.0423, 0.1811, 0.5348, 0.2418]) -Greedy action tensor([-1.9119, -0.4296, 0.6522, -0.1640]) tensor([0.0414, 0.1824, 0.5382, 0.2379]) -Greedy action tensor([ 0.7522, -1.2304, 0.9730, 0.4065]) tensor([0.3234, 0.0445, 0.4033, 0.2289]) -Greedy action tensor([ 0.0498, -1.0209, -0.3909, -0.4512]) tensor([0.3858, 0.1322, 0.2483, 0.2337]) -Greedy action tensor([ 0.3947, -0.3876, 0.6712, 0.4819]) tensor([0.2586, 0.1183, 0.3410, 0.2822]) -Greedy action tensor([-0.0327, 0.4628, -0.3822, -0.3197]) tensor([0.2441, 0.4006, 0.1721, 0.1832]) -Greedy action tensor([-0.4920, -1.1574, 0.3654, -0.8880]) tensor([0.2201, 0.1131, 0.5187, 0.1481]) -Greedy action tensor([ 0.9035, 0.9023, 0.7141, -0.5189]) tensor([0.3260, 0.3256, 0.2698, 0.0786]) -Greedy action tensor([ 0.6901, -1.6708, -0.2445, -0.2403]) tensor([0.5315, 0.0501, 0.2087, 0.2096]) -Greedy action tensor([ 0.1289, -0.8518, -0.3709, 0.3835]) tensor([0.3057, 0.1146, 0.1854, 0.3943]) -Greedy action tensor([ 1.4779, -1.3940, 0.7561, 1.2656]) tensor([0.4253, 0.0241, 0.2067, 0.3440]) -Greedy action tensor([-0.5129, -0.5787, 1.4757, -1.1451]) tensor([0.1023, 0.0958, 0.7475, 0.0544]) -Greedy action tensor([ 0.9853, -0.7038, -0.1243, 1.3817]) tensor([0.3332, 0.0615, 0.1099, 0.4954]) -Greedy action tensor([ 0.0873, -1.2752, 0.3294, 0.5718]) tensor([0.2408, 0.0616, 0.3067, 0.3909]) -Greedy action tensor([-0.9155, -1.0364, 0.3020, -0.5694]) tensor([0.1497, 0.1327, 0.5059, 0.2117]) -Greedy action tensor([ 0.5017, -0.3992, 0.5227, -0.1664]) tensor([0.3401, 0.1382, 0.3473, 0.1744]) -Greedy action tensor([ 0.9230, -0.9195, 0.1857, 0.5120]) tensor([0.4348, 0.0689, 0.2080, 0.2883]) -Greedy action tensor([-0.3367, -0.5320, 0.0535, -0.1534]) tensor([0.2222, 0.1827, 0.3282, 0.2669]) -Greedy action tensor([ 0.5063, -0.4159, 0.0852, -0.0154]) tensor([0.3777, 0.1502, 0.2479, 0.2242]) -Greedy action tensor([-0.8062, 0.2481, 0.4417, -0.2860]) tensor([0.1107, 0.3176, 0.3855, 0.1862]) -Greedy action tensor([-0.4487, 0.8346, 0.0376, -1.1755]) tensor([0.1488, 0.5371, 0.2421, 0.0720]) -Greedy action tensor([ 0.6641, -0.5942, -0.6858, 0.1564]) tensor([0.4661, 0.1324, 0.1209, 0.2806]) -Greedy action tensor([-0.1231, -2.4918, 0.2859, 0.0547]) tensor([0.2636, 0.0247, 0.3968, 0.3149]) -Greedy action tensor([-0.7637, -1.0618, 0.0997, -0.7654]) tensor([0.1956, 0.1452, 0.4639, 0.1953]) -Greedy action tensor([-0.2046, 0.6221, 0.8572, -0.2236]) tensor([0.1397, 0.3193, 0.4039, 0.1371]) -Greedy action tensor([ 1.3257, -1.1306, 0.6705, 0.2952]) tensor([0.5097, 0.0437, 0.2647, 0.1819]) -Greedy action tensor([ 0.7953, 0.3242, 0.4794, -0.1801]) tensor([0.3662, 0.2286, 0.2670, 0.1381]) -Greedy action tensor([ 0.9537, -0.4601, -0.2732, -0.2031]) tensor([0.5403, 0.1314, 0.1584, 0.1699]) -Greedy action tensor([ 0.1051, -0.5101, 0.2075, 0.1160]) tensor([0.2733, 0.1477, 0.3028, 0.2763]) -Greedy action tensor([ 0.0697, -0.5437, -0.1417, -0.1757]) tensor([0.3191, 0.1728, 0.2583, 0.2497]) -Greedy action tensor([ 1.4877, -0.8382, 1.1966, 0.3930]) tensor([0.4588, 0.0448, 0.3429, 0.1535]) -Greedy action tensor([ 0.1975, -1.2655, -0.4846, 0.7147]) tensor([0.2929, 0.0678, 0.1481, 0.4913]) -Greedy action tensor([ 0.2879, -0.7740, 0.7733, 0.5228]) tensor([0.2361, 0.0816, 0.3836, 0.2986]) -Greedy action tensor([ 1.0360, -0.1400, 0.3168, 0.0141]) tensor([0.4639, 0.1431, 0.2260, 0.1670]) -Greedy action tensor([1.0964, 0.3589, 0.4544, 0.5360]) tensor([0.3883, 0.1857, 0.2043, 0.2217]) -Greedy action tensor([ 0.4028, -1.0653, 0.3969, -0.2455]) tensor([0.3640, 0.0838, 0.3618, 0.1903]) -Greedy action tensor([ 0.7980, 0.0150, -0.3731, -0.9958]) tensor([0.5172, 0.2364, 0.1604, 0.0860]) -Greedy action tensor([ 0.7624, -1.2690, -0.0528, 0.3134]) tensor([0.4521, 0.0593, 0.2001, 0.2885]) -Greedy action tensor([-0.1881, 0.6293, -0.1279, -0.6691]) tensor([0.2022, 0.4580, 0.2148, 0.1250]) -Greedy action tensor([-0.3815, -1.9610, 2.2778, -0.6034]) tensor([0.0614, 0.0126, 0.8768, 0.0492]) -Greedy action tensor([ 0.5850, -1.4979, 1.1169, -0.5677]) tensor([0.3182, 0.0396, 0.5416, 0.1005]) -Greedy action tensor([ 0.0032, -0.0304, 0.2456, 0.1362]) tensor([0.2281, 0.2206, 0.2907, 0.2606]) -Greedy action tensor([ 0.3095, -0.3248, 0.3353, 0.2157]) tensor([0.2884, 0.1530, 0.2960, 0.2626]) -Greedy action tensor([ 0.3714, -0.5152, -1.0745, -0.1580]) tensor([0.4471, 0.1842, 0.1053, 0.2633]) -Greedy action tensor([ 0.9393, -0.4558, 0.8015, 0.1337]) tensor([0.3897, 0.0966, 0.3396, 0.1741]) -Greedy action tensor([ 0.1699, 0.1077, 0.2678, -0.4606]) tensor([0.2797, 0.2629, 0.3085, 0.1489]) -Greedy action tensor([ 0.5889, -1.0365, -0.1852, 0.4699]) tensor([0.3928, 0.0773, 0.1811, 0.3487]) -Greedy action tensor([ 0.5406, -1.1179, 0.3278, -0.2961]) tensor([0.4112, 0.0783, 0.3324, 0.1781]) -Greedy action tensor([ 0.2647, -2.1080, -0.1441, 0.3852]) tensor([0.3465, 0.0323, 0.2302, 0.3909]) -Greedy action tensor([ 0.0010, -0.8644, -0.7169, 0.2266]) tensor([0.3163, 0.1331, 0.1543, 0.3963]) -Greedy action tensor([-0.5646, -0.2355, -0.3817, -0.6480]) tensor([0.2217, 0.3081, 0.2662, 0.2040]) -Greedy action tensor([ 0.9632, -0.3628, -0.0650, 1.2441]) tensor([0.3393, 0.0901, 0.1213, 0.4493]) -Greedy action tensor([-0.0890, -0.9242, -0.2071, -0.4274]) tensor([0.3295, 0.1429, 0.2928, 0.2349]) -Greedy action tensor([-0.3800, -0.2825, -0.3141, -0.6353]) tensor([0.2535, 0.2794, 0.2707, 0.1964]) -Greedy action tensor([-0.0758, 0.1572, -0.8490, -0.7643]) tensor([0.3100, 0.3913, 0.1431, 0.1557]) -Greedy action tensor([-0.2896, -0.6093, 0.1329, 0.1195]) tensor([0.2102, 0.1527, 0.3207, 0.3164]) -Greedy action tensor([ 0.2618, -1.1355, -0.2164, 0.6905]) tensor([0.2939, 0.0727, 0.1822, 0.4512]) -Greedy action tensor([ 0.7322, -0.3267, 0.6385, 0.7138]) tensor([0.3087, 0.1071, 0.2811, 0.3031]) -Greedy action tensor([-0.5643, -1.5505, 0.1992, 0.9497]) tensor([0.1240, 0.0463, 0.2661, 0.5636]) -Greedy action tensor([ 1.0795, -1.1036, 0.2139, -1.1214]) tensor([0.6082, 0.0685, 0.2559, 0.0673]) -Greedy action tensor([ 0.3240, -1.1974, 0.2047, -0.2854]) tensor([0.3774, 0.0824, 0.3350, 0.2052]) -Greedy action tensor([-0.3588, -0.5908, 1.0811, -1.3342]) tensor([0.1565, 0.1241, 0.6604, 0.0590]) -Greedy action tensor([ 0.0729, -0.1425, 0.8342, -0.2657]) tensor([0.2146, 0.1730, 0.4595, 0.1530]) -Greedy action tensor([ 0.0612, 0.1116, 0.6554, -0.3542]) tensor([0.2211, 0.2325, 0.4005, 0.1459]) -Greedy action tensor([-0.3330, -0.6587, 1.0919, -0.5731]) tensor([0.1500, 0.1083, 0.6237, 0.1180]) -Greedy action tensor([-0.5169, -0.5014, 0.3782, -1.2138]) tensor([0.2016, 0.2047, 0.4933, 0.1004]) -Greedy action tensor([-0.5650, -0.6880, 0.1860, -0.0340]) tensor([0.1753, 0.1550, 0.3715, 0.2982]) -Greedy action tensor([ 1.6914, 0.2150, -1.1841, -0.2916]) tensor([0.7030, 0.1606, 0.0396, 0.0968]) -Greedy action tensor([ 0.2341, -0.5333, 0.7054, -0.2352]) tensor([0.2709, 0.1257, 0.4340, 0.1694]) -Greedy action tensor([ 0.0810, -1.1242, 0.0056, 0.3858]) tensor([0.2791, 0.0836, 0.2588, 0.3785]) -Greedy action tensor([-0.3132, 0.0388, -0.1619, -0.0287]) tensor([0.2035, 0.2893, 0.2367, 0.2704]) -Greedy action tensor([-0.1577, 0.4720, -0.4624, -0.3843]) tensor([0.2267, 0.4255, 0.1671, 0.1807]) -Greedy action tensor([-0.7578, -0.6477, 0.6159, -1.3820]) tensor([0.1515, 0.1691, 0.5983, 0.0811]) -Greedy action tensor([ 0.1643, -0.6862, 0.0222, 0.3427]) tensor([0.2865, 0.1224, 0.2486, 0.3425]) -Greedy action tensor([ 1.1497, -0.6466, 0.9311, 0.0663]) tensor([0.4333, 0.0719, 0.3482, 0.1466]) -Greedy action tensor([-1.1584, -0.1932, -0.2019, -0.9899]) tensor([0.1349, 0.3542, 0.3512, 0.1597]) -Greedy action tensor([ 0.3338, 0.5394, -0.7571, -0.2616]) tensor([0.3210, 0.3942, 0.1078, 0.1770]) -Greedy action tensor([ 1.3427, -0.3785, 0.3076, 0.5886]) tensor([0.4989, 0.0892, 0.1772, 0.2347]) -Greedy action tensor([-0.0705, -1.2050, 0.7753, -0.0904]) tensor([0.2159, 0.0694, 0.5030, 0.2117]) -Greedy action tensor([-0.1909, -0.0356, -0.7877, -0.5266]) tensor([0.2912, 0.3402, 0.1604, 0.2082]) -Greedy action tensor([-0.1354, -0.6932, 0.1997, -0.9173]) tensor([0.2917, 0.1670, 0.4078, 0.1335]) -Greedy action tensor([ 1.1701, -0.7788, 1.8461, -0.4110]) tensor([0.3017, 0.0430, 0.5932, 0.0621]) -Greedy action tensor([ 0.6120, -0.4319, -0.2592, 0.2184]) tensor([0.4090, 0.1440, 0.1711, 0.2759]) -Greedy action tensor([ 1.0620, 0.3866, -0.3506, -0.5736]) tensor([0.5135, 0.2614, 0.1250, 0.1001]) -Greedy action tensor([ 1.2521, -0.1502, -0.3099, 0.3834]) tensor([0.5333, 0.1312, 0.1118, 0.2237]) -Greedy action tensor([ 1.2974, -0.1045, -1.2745, 0.5010]) tensor([0.5639, 0.1388, 0.0431, 0.2543]) -Greedy action tensor([ 1.5510, -0.2995, -0.4851, -0.0890]) tensor([0.6749, 0.1061, 0.0881, 0.1309]) -Greedy action tensor([ 1.9167, -0.9476, -0.0617, 0.4423]) tensor([0.7021, 0.0400, 0.0971, 0.1607]) -Greedy action tensor([ 1.5798, -0.5481, -0.3223, 0.5964]) tensor([0.6089, 0.0725, 0.0909, 0.2277]) -Greedy action tensor([ 1.3935, -0.4178, -0.4135, 0.3824]) tensor([0.5912, 0.0966, 0.0970, 0.2151]) -Greedy action tensor([ 1.6230, -0.5680, -0.3326, 0.0450]) tensor([0.6851, 0.0766, 0.0969, 0.1414]) -Greedy action tensor([ 1.3877, -0.1950, -0.3682, 0.3823]) tensor([0.5734, 0.1178, 0.0991, 0.2098]) -Greedy action tensor([ 1.6026, 0.0049, -0.2536, 0.1543]) tensor([0.6275, 0.1270, 0.0981, 0.1475]) -Greedy action tensor([ 1.8333, 0.1909, -0.2640, 0.1751]) tensor([0.6637, 0.1284, 0.0815, 0.1264]) -Greedy action tensor([ 1.5368, -0.4362, -0.9091, 0.1229]) tensor([0.6808, 0.0947, 0.0590, 0.1656]) -Greedy action tensor([ 1.5481, -0.1429, -1.1188, 0.4942]) tensor([0.6241, 0.1150, 0.0434, 0.2175]) -Greedy action tensor([ 1.4911, -0.1302, -0.7461, 0.4502]) tensor([0.6033, 0.1192, 0.0644, 0.2130]) -Greedy action tensor([ 1.1269, -0.0411, -0.4091, 0.4338]) tensor([0.4935, 0.1535, 0.1062, 0.2468]) -Greedy action tensor([ 1.2114, -0.3062, -0.3470, 0.0341]) tensor([0.5754, 0.1262, 0.1211, 0.1773]) -Greedy action tensor([ 2.0092, -1.4050, -0.4055, 0.2054]) tensor([0.7770, 0.0256, 0.0695, 0.1280]) -Greedy action tensor([ 1.3341, -0.5679, -0.2205, 0.2711]) tensor([0.5862, 0.0875, 0.1238, 0.2025]) -Greedy action tensor([ 1.3295, -0.3370, -0.4881, -0.0268]) tensor([0.6215, 0.1174, 0.1009, 0.1601]) -Greedy action tensor([ 0.9216, -0.0240, -0.4011, -0.0333]) tensor([0.4903, 0.1904, 0.1306, 0.1887]) -Greedy action tensor([ 1.1765, -0.4280, -0.3507, 0.4041]) tensor([0.5319, 0.1069, 0.1155, 0.2457]) -Greedy action tensor([ 1.4909, -0.0800, -0.8053, 0.2600]) tensor([0.6248, 0.1299, 0.0629, 0.1825]) -Greedy action tensor([ 1.2432, -0.4506, -0.7360, 0.3275]) tensor([0.5807, 0.1067, 0.0802, 0.2324]) -Greedy action tensor([ 1.6317, -0.6286, -0.2283, 0.3449]) tensor([0.6510, 0.0679, 0.1013, 0.1798]) -Greedy action tensor([ 1.2131, -0.7144, 0.1238, -0.1603]) tensor([0.5763, 0.0839, 0.1939, 0.1459]) -Greedy action tensor([ 1.8520, -0.3114, -0.6245, 0.2555]) tensor([0.7135, 0.0820, 0.0600, 0.1446]) -Greedy action tensor([ 2.4250, 0.7376, -0.0357, -0.1336]) tensor([0.7420, 0.1373, 0.0633, 0.0574]) -Greedy action tensor([ 2.1195, -0.5116, -0.5730, 0.7095]) tensor([0.7226, 0.0520, 0.0489, 0.1764]) -Greedy action tensor([ 1.4968, -0.5330, -0.0942, 1.0079]) tensor([0.5132, 0.0674, 0.1046, 0.3148]) -Greedy action tensor([ 1.3212, -0.1946, -0.0749, 0.0552]) tensor([0.5717, 0.1256, 0.1415, 0.1612]) -Greedy action tensor([ 1.6821, 0.1343, -0.4247, -0.0178]) tensor([0.6592, 0.1402, 0.0802, 0.1204]) -Greedy action tensor([ 1.8406, -0.7317, -0.3319, 0.6628]) tensor([0.6675, 0.0510, 0.0760, 0.2055]) -Greedy action tensor([ 1.5838, -0.5510, -0.6283, 0.2892]) tensor([0.6659, 0.0788, 0.0729, 0.1825]) -Greedy action tensor([ 0.5873, -0.4188, 0.1364, -0.0867]) tensor([0.3980, 0.1455, 0.2536, 0.2029]) -Greedy action tensor([ 1.5838, -0.1888, -0.4588, 0.3175]) tensor([0.6323, 0.1074, 0.0820, 0.1782]) -Greedy action tensor([ 1.3257, -0.4089, -0.3754, -0.0539]) tensor([0.6209, 0.1096, 0.1133, 0.1563]) -Greedy action tensor([ 1.5917, -0.2133, -1.0845, 0.2018]) tensor([0.6746, 0.1110, 0.0464, 0.1680]) -Greedy action tensor([ 2.1894, -0.7596, -0.5374, 0.7062]) tensor([0.7436, 0.0390, 0.0487, 0.1687]) -Greedy action tensor([ 1.1730, -0.4667, -0.2016, 0.0409]) tensor([0.5652, 0.1097, 0.1430, 0.1822]) -Greedy action tensor([ 1.3986, 0.0265, -0.6955, -0.0034]) tensor([0.6162, 0.1563, 0.0759, 0.1516]) -Greedy action tensor([ 1.4752, -0.5542, -0.3745, 0.0101]) tensor([0.6580, 0.0865, 0.1035, 0.1520]) -Greedy action tensor([ 1.2547, -0.1264, -0.8861, 0.2138]) tensor([0.5807, 0.1459, 0.0683, 0.2051]) -Greedy action tensor([ 1.1226, -0.2146, -0.0174, 0.2041]) tensor([0.5047, 0.1325, 0.1614, 0.2014]) -Greedy action tensor([ 2.0684, -0.7735, -0.6425, 0.5544]) tensor([0.7436, 0.0434, 0.0494, 0.1636]) -Greedy action tensor([ 1.6803, -0.2939, -0.5330, 0.3671]) tensor([0.6591, 0.0915, 0.0721, 0.1773]) -Greedy action tensor([ 1.5663, -0.6604, -0.8425, 0.3398]) tensor([0.6706, 0.0723, 0.0603, 0.1967]) -Greedy action tensor([ 1.6504, -0.1968, -0.6123, 0.3915]) tensor([0.6470, 0.1020, 0.0673, 0.1837]) -Greedy action tensor([ 1.4603, -0.2695, -0.6360, 0.4776]) tensor([0.5972, 0.1059, 0.0734, 0.2235]) -Greedy action tensor([ 1.5883, 0.2340, -0.5048, 0.3293]) tensor([0.6005, 0.1550, 0.0740, 0.1705]) -Greedy action tensor([ 1.0422, -0.0079, -0.5022, 0.7464]) tensor([0.4334, 0.1516, 0.0925, 0.3224]) -Greedy action tensor([ 1.1671, -0.1275, -0.5255, 0.3585]) tensor([0.5253, 0.1439, 0.0967, 0.2340]) -Greedy action tensor([ 1.5948, -1.0817, -0.5416, 0.6975]) tensor([0.6271, 0.0431, 0.0740, 0.2557]) -Greedy action tensor([ 1.1590, -0.1084, -0.4610, 0.0025]) tensor([0.5574, 0.1569, 0.1103, 0.1754]) -Greedy action tensor([ 1.5855, -0.4396, -0.2250, 0.4657]) tensor([0.6166, 0.0814, 0.1009, 0.2012]) -Greedy action tensor([ 0.9754, -0.3216, -0.1690, 0.4317]) tensor([0.4603, 0.1258, 0.1466, 0.2673]) -Greedy action tensor([ 0.9283, -0.4389, -0.0233, 0.1707]) tensor([0.4740, 0.1208, 0.1830, 0.2222]) -Greedy action tensor([ 1.3078, -0.3302, -0.4930, 0.5715]) tensor([0.5439, 0.1057, 0.0898, 0.2605]) -Greedy action tensor([ 1.8129, -0.6504, -0.9825, 0.5572]) tensor([0.6987, 0.0595, 0.0427, 0.1991]) -Greedy action tensor([ 1.3806, -0.1919, -0.4039, 0.3245]) tensor([0.5803, 0.1204, 0.0974, 0.2018]) -Greedy action tensor([ 1.1322, 0.2886, -0.4131, 0.3741]) tensor([0.4735, 0.2037, 0.1010, 0.2218]) -Greedy action tensor([ 1.6539, -0.4803, -0.5671, 0.6310]) tensor([0.6303, 0.0746, 0.0684, 0.2267]) -Greedy action tensor([ 1.9010, -0.1366, -0.7265, 0.4630]) tensor([0.6945, 0.0905, 0.0502, 0.1649]) -Greedy action tensor([ 0.7106, -0.5498, 0.0331, 0.0577]) tensor([0.4325, 0.1226, 0.2197, 0.2251]) -Greedy action tensor([ 1.2646, -0.7563, -0.5479, 0.9251]) tensor([0.4980, 0.0660, 0.0813, 0.3547]) -Greedy action tensor([ 1.1181, -0.0813, -0.5217, 0.1231]) tensor([0.5361, 0.1616, 0.1040, 0.1982]) -Greedy action tensor([ 2.3059, -0.9491, -0.5067, 0.3483]) tensor([0.8066, 0.0311, 0.0484, 0.1139]) -Greedy action tensor([ 1.0790, 0.0761, 0.0357, -0.5146]) tensor([0.5202, 0.1908, 0.1833, 0.1057]) -Greedy action tensor([ 1.9326, -1.2734, 0.4856, 0.8573]) tensor([0.6184, 0.0251, 0.1455, 0.2110]) -Greedy action tensor([ 1.5905, -0.3839, -0.5196, 0.1051]) tensor([0.6727, 0.0934, 0.0815, 0.1523]) -Greedy action tensor([ 1.7362, -0.4693, -0.6157, 0.1512]) tensor([0.7090, 0.0781, 0.0675, 0.1453]) -Greedy action tensor([ 1.1092, -0.4684, 0.1343, 0.2097]) tensor([0.5024, 0.1037, 0.1895, 0.2044]) -Greedy action tensor([ 1.2926, -0.2731, -0.2124, -0.2992]) tensor([0.6118, 0.1278, 0.1358, 0.1245]) -Greedy action tensor([ 1.4471, -0.5883, -0.4762, 0.2819]) tensor([0.6295, 0.0822, 0.0920, 0.1963]) -Greedy action tensor([ 1.3524, -0.5905, -0.6199, 0.3339]) tensor([0.6084, 0.0872, 0.0847, 0.2197]) -Greedy action tensor([ 1.9992, -0.0746, -0.2191, -0.0382]) tensor([0.7327, 0.0921, 0.0797, 0.0955]) -Greedy action tensor([ 1.6581, 0.1349, -0.7137, 0.1218]) tensor([0.6551, 0.1428, 0.0611, 0.1410]) -Greedy action tensor([ 1.7265, -0.9182, -0.9774, 0.5597]) tensor([0.6900, 0.0490, 0.0462, 0.2148]) -Greedy action tensor([ 1.9066, -0.8900, -0.4352, 0.5623]) tensor([0.7053, 0.0430, 0.0678, 0.1839]) -Greedy action tensor([ 1.4490, -0.9143, -0.4240, 0.5960]) tensor([0.5974, 0.0562, 0.0918, 0.2546]) -Greedy action tensor([ 1.2229, -0.4956, -0.2061, 0.8156]) tensor([0.4798, 0.0860, 0.1149, 0.3192]) -Greedy action tensor([ 0.9237, -0.1899, -0.1502, -0.0063]) tensor([0.4844, 0.1590, 0.1655, 0.1911]) -Greedy action tensor([ 0.4419, -0.2776, -0.0690, -0.0808]) tensor([0.3732, 0.1817, 0.2239, 0.2213]) -Greedy action tensor([ 0.6545, -0.0452, -0.1285, -0.6187]) tensor([0.4477, 0.2224, 0.2046, 0.1253]) -Greedy action tensor([ 0.8459, -0.6475, -0.0549, -0.3967]) tensor([0.5210, 0.1170, 0.2117, 0.1504]) -Greedy action tensor([ 0.5885, -0.4194, 0.1415, -0.4720]) tensor([0.4254, 0.1553, 0.2721, 0.1473]) -Greedy action tensor([ 0.5039, -0.4831, 0.0030, -0.3530]) tensor([0.4161, 0.1551, 0.2522, 0.1766]) -Greedy action tensor([ 0.8478, -0.5264, -0.0581, -0.2259]) tensor([0.5003, 0.1266, 0.2022, 0.1710]) -Greedy action tensor([ 0.6780, -0.1432, -0.0804, -0.1561]) tensor([0.4269, 0.1878, 0.1999, 0.1854]) -Greedy action tensor([ 1.0105, -0.7423, 0.2486, -0.5688]) tensor([0.5417, 0.0939, 0.2528, 0.1116]) -Greedy action tensor([ 0.8091, -0.7211, 0.0214, -0.3696]) tensor([0.5053, 0.1094, 0.2298, 0.1555]) -Greedy action tensor([ 1.1583, -0.6796, 0.0798, -0.6316]) tensor([0.6001, 0.0955, 0.2041, 0.1002]) -Greedy action tensor([ 1.0253, -0.4195, -0.1469, -0.2926]) tensor([0.5515, 0.1300, 0.1708, 0.1476]) -Greedy action tensor([ 0.6301, -0.5439, 0.0273, -0.3451]) tensor([0.4477, 0.1384, 0.2450, 0.1688]) -Greedy action tensor([ 0.8421, -0.4093, 0.0270, -0.2876]) tensor([0.4874, 0.1394, 0.2157, 0.1575]) -Greedy action tensor([ 0.6991, -0.2091, 0.0161, -0.3063]) tensor([0.4397, 0.1773, 0.2221, 0.1609]) -Greedy action tensor([ 0.8994, -0.6720, 0.0043, -0.4186]) tensor([0.5308, 0.1103, 0.2169, 0.1421]) -Greedy action tensor([ 0.8451, -0.6808, -0.0239, -0.5173]) tensor([0.5283, 0.1149, 0.2216, 0.1353]) -Greedy action tensor([ 0.7289, -0.7000, 0.1000, -0.3821]) tensor([0.4757, 0.1140, 0.2537, 0.1566]) -Greedy action tensor([ 0.8954, -0.5511, -0.0077, -0.4857]) tensor([0.5285, 0.1244, 0.2142, 0.1328]) -Greedy action tensor([ 0.5102, 0.2357, -0.0598, 0.0275]) tensor([0.3398, 0.2583, 0.1922, 0.2097]) -Greedy action tensor([ 0.8458, 0.0092, -0.1301, -0.0099]) tensor([0.4474, 0.1938, 0.1686, 0.1901]) -Greedy action tensor([ 0.6698, 0.0082, -0.0949, 0.1197]) tensor([0.3909, 0.2017, 0.1819, 0.2255]) -Greedy action tensor([ 0.6254, -0.6065, 0.0572, -0.4003]) tensor([0.4511, 0.1316, 0.2556, 0.1617]) -Greedy action tensor([ 1.3104, -0.8413, 0.0110, -0.6230]) tensor([0.6520, 0.0758, 0.1778, 0.0943]) -Greedy action tensor([ 0.6957, -0.5010, -0.1978, -0.1130]) tensor([0.4636, 0.1401, 0.1897, 0.2065]) -Greedy action tensor([ 1.2597, -1.1083, 0.0391, -0.6830]) tensor([0.6527, 0.0611, 0.1926, 0.0935]) -Greedy action tensor([ 0.6846, -0.4932, -0.0563, -0.2025]) tensor([0.4553, 0.1402, 0.2170, 0.1875]) -Greedy action tensor([ 0.6278, -0.1355, -0.1072, -0.0969]) tensor([0.4115, 0.1918, 0.1973, 0.1994]) -Greedy action tensor([ 1.0971, -0.3280, -0.2232, -0.3493]) tensor([0.5737, 0.1380, 0.1532, 0.1351]) -Greedy action tensor([ 0.8202, -0.4773, -0.0975, -0.0960]) tensor([0.4825, 0.1318, 0.1927, 0.1930]) -Greedy action tensor([ 1.0651, -0.4376, 0.0571, -0.3821]) tensor([0.5486, 0.1221, 0.2002, 0.1291]) -Greedy action tensor([ 0.7695, -0.5730, -0.0274, -0.4326]) tensor([0.4969, 0.1298, 0.2240, 0.1493]) -Greedy action tensor([ 0.4232, -0.1161, 0.0156, -0.1817]) tensor([0.3578, 0.2087, 0.2381, 0.1954]) -Greedy action tensor([ 0.9105, -0.5001, -0.0581, -0.5696]) tensor([0.5402, 0.1318, 0.2051, 0.1230]) -Greedy action tensor([ 0.9741, -0.6517, -0.1179, -0.3421]) tensor([0.5554, 0.1093, 0.1864, 0.1489]) -Greedy action tensor([ 0.7496, -0.2966, -0.1208, -0.3537]) tensor([0.4758, 0.1671, 0.1993, 0.1579]) -Greedy action tensor([ 0.8280, -0.6598, 0.0090, -0.4114]) tensor([0.5112, 0.1155, 0.2254, 0.1480]) -Greedy action tensor([ 1.0116, -0.5112, 0.0526, -0.5258]) tensor([0.5506, 0.1201, 0.2110, 0.1183]) -Greedy action tensor([ 0.3565, -0.0866, -0.0054, -0.5434]) tensor([0.3643, 0.2339, 0.2537, 0.1481]) -Greedy action tensor([ 0.6363, -0.1280, -0.0331, -0.1497]) tensor([0.4110, 0.1914, 0.2104, 0.1873]) -Greedy action tensor([ 0.5560, -0.4187, -0.1416, -0.2707]) tensor([0.4324, 0.1632, 0.2152, 0.1892]) -Greedy action tensor([ 0.7411, -0.5387, 0.0195, -0.1971]) tensor([0.4639, 0.1290, 0.2255, 0.1816]) -Greedy action tensor([ 0.4513, -0.1618, 0.1982, -0.3973]) tensor([0.3641, 0.1973, 0.2827, 0.1559]) -Greedy action tensor([ 0.9307, -0.5452, -0.1768, -0.3697]) tensor([0.5460, 0.1248, 0.1804, 0.1488]) -Greedy action tensor([ 0.9395, -0.6940, 0.0244, -0.5142]) tensor([0.5466, 0.1067, 0.2189, 0.1278]) -Greedy action tensor([ 0.6748, 0.1362, -0.0573, -0.0717]) tensor([0.3939, 0.2299, 0.1894, 0.1867]) -Greedy action tensor([ 0.5446, -0.5391, -0.1194, -0.3086]) tensor([0.4387, 0.1485, 0.2259, 0.1869]) -Greedy action tensor([ 0.5482, -0.5041, -0.1442, -0.1531]) tensor([0.4264, 0.1489, 0.2133, 0.2114]) -Greedy action tensor([ 0.3540, -0.4623, -0.1238, -0.1014]) tensor([0.3709, 0.1639, 0.2300, 0.2352]) -Greedy action tensor([ 0.5933, -0.0509, -0.0451, 0.0202]) tensor([0.3821, 0.2006, 0.2018, 0.2154]) -Greedy action tensor([ 0.9034, -0.4234, 0.0484, -0.2366]) tensor([0.4974, 0.1320, 0.2115, 0.1591]) -Greedy action tensor([ 0.9584, -0.4874, 0.0534, -0.4790]) tensor([0.5326, 0.1254, 0.2155, 0.1265]) -Greedy action tensor([ 0.4368, -0.3813, 0.0779, -0.2907]) tensor([0.3813, 0.1682, 0.2663, 0.1842]) -Greedy action tensor([ 0.5649, -0.1893, -0.0615, -0.0572]) tensor([0.3934, 0.1851, 0.2103, 0.2112]) -Greedy action tensor([ 0.8645, -0.4950, -0.0753, -0.3405]) tensor([0.5136, 0.1319, 0.2006, 0.1539]) -Greedy action tensor([ 1.2225, -0.6210, -0.2602, -0.2471]) tensor([0.6191, 0.0980, 0.1406, 0.1424]) -Greedy action tensor([ 0.9546, -0.5412, -0.0864, -0.5609]) tensor([0.5565, 0.1247, 0.1965, 0.1223]) -Greedy action tensor([ 0.4362, -0.4626, 0.0623, -0.4943]) tensor([0.4017, 0.1635, 0.2764, 0.1584]) -Greedy action tensor([ 0.6602, -0.1441, -0.0536, 0.0302]) tensor([0.4049, 0.1812, 0.1983, 0.2156]) -Greedy action tensor([ 0.5993, -0.4052, -0.1026, -0.2045]) tensor([0.4330, 0.1586, 0.2146, 0.1938]) -Greedy action tensor([ 0.5586, -0.4941, -0.1090, -0.3758]) tensor([0.4435, 0.1548, 0.2275, 0.1742]) -Greedy action tensor([ 0.8708, -0.5278, -0.1356, -0.3588]) tensor([0.5250, 0.1296, 0.1919, 0.1535]) -Greedy action tensor([ 0.7077, -0.1972, -0.0678, -0.0778]) tensor([0.4309, 0.1743, 0.1984, 0.1964]) -Greedy action tensor([ 0.7443, -0.6032, -0.0120, -0.6271]) tensor([0.5043, 0.1311, 0.2367, 0.1280]) -Greedy action tensor([ 0.7000, -0.6625, 0.0564, -0.2235]) tensor([0.4590, 0.1175, 0.2412, 0.1823]) -Greedy action tensor([ 0.6105, -0.1048, -0.0377, -0.0784]) tensor([0.3977, 0.1945, 0.2080, 0.1997]) -Greedy action tensor([ 0.8516, -0.3569, 0.0736, -0.2237]) tensor([0.4764, 0.1423, 0.2188, 0.1625]) -Greedy action tensor([ 0.9059, -0.7067, 0.0215, -0.2851]) tensor([0.5219, 0.1040, 0.2155, 0.1586]) -Greedy action tensor([ 1.0635, -0.5283, -0.1635, -0.4030]) tensor([0.5789, 0.1178, 0.1697, 0.1336]) -Greedy action tensor([ 0.4272, -0.1247, 0.1687, -0.2178]) tensor([0.3481, 0.2005, 0.2688, 0.1826]) -Greedy action tensor([ 1.2490, -0.8790, 0.2491, -0.5614]) tensor([0.6059, 0.0721, 0.2229, 0.0991]) -Greedy action tensor([ 0.5604, -0.0575, 0.0060, -0.1955]) tensor([0.3871, 0.2087, 0.2224, 0.1818]) -Greedy action tensor([ 1.1485, -0.9560, 0.0862, -0.5401]) tensor([0.6052, 0.0738, 0.2092, 0.1118]) -Greedy action tensor([ 0.5840, 0.0813, -0.1159, -0.4750]) tensor([0.4084, 0.2471, 0.2028, 0.1417]) -Greedy action tensor([ 0.7793, -0.2600, 0.0277, -0.2924]) tensor([0.4613, 0.1632, 0.2176, 0.1580]) -Greedy action tensor([ 0.8082, -0.2952, -0.1190, -0.0764]) tensor([0.4672, 0.1550, 0.1849, 0.1929]) -Greedy action tensor([ 0.8439, -0.1773, -0.1186, -0.1529]) tensor([0.4737, 0.1706, 0.1809, 0.1748]) -Greedy action tensor([ 0.1903, 0.1419, -0.1863, -0.3749]) tensor([0.3118, 0.2971, 0.2140, 0.1772]) -Greedy action tensor([ 0.7721, -0.2181, 0.0632, -0.2337]) tensor([0.4486, 0.1666, 0.2208, 0.1641]) -Greedy action tensor([ 0.8110, -0.5264, -0.0891, -0.2796]) tensor([0.4987, 0.1309, 0.2027, 0.1676]) -Greedy action tensor([ 0.8041, -0.6001, -0.1061, -0.2088]) tensor([0.4972, 0.1221, 0.2001, 0.1806]) -Greedy action tensor([ 1.0767, -1.4037, 0.0285, -0.9762]) tensor([0.6399, 0.0536, 0.2243, 0.0821]) -Greedy action tensor([-1.9022, -0.3482, 0.6405, -0.1563]) tensor([0.0414, 0.1957, 0.5259, 0.2371]) -Greedy action tensor([-1.7546, -0.4667, 0.6714, -0.0109]) tensor([0.0462, 0.1674, 0.5224, 0.2641]) -Greedy action tensor([-1.8927, -0.4386, 0.6391, -0.1616]) tensor([0.0425, 0.1821, 0.5351, 0.2403]) -Greedy action tensor([-1.8321, -0.4352, 0.6022, -0.1195]) tensor([0.0455, 0.1838, 0.5187, 0.2520]) -Greedy action tensor([-1.6492, 0.0124, 0.6483, -0.4585]) tensor([0.0513, 0.2700, 0.5100, 0.1686]) -Greedy action tensor([-1.7507, -0.3526, 0.5569, -0.0738]) tensor([0.0489, 0.1980, 0.4915, 0.2616]) -Greedy action tensor([-1.1843, -0.5477, 0.3516, -0.0051]) tensor([0.0927, 0.1752, 0.4306, 0.3014]) -Greedy action tensor([-1.7295, -0.1653, 0.3613, -0.5334]) tensor([0.0582, 0.2782, 0.4710, 0.1925]) -Greedy action tensor([-1.6776, -0.5298, 0.5359, -0.0826]) tensor([0.0549, 0.1729, 0.5019, 0.2704]) -Greedy action tensor([-1.9174, -0.4557, 0.6515, -0.1641]) tensor([0.0414, 0.1787, 0.5407, 0.2392]) -Greedy action tensor([-1.7349, -0.3725, 0.6831, -0.0254]) tensor([0.0462, 0.1804, 0.5183, 0.2552]) -Greedy action tensor([-1.8990, -0.4564, 0.6485, -0.1602]) tensor([0.0422, 0.1786, 0.5391, 0.2401]) -Greedy action tensor([-1.9306, -0.4307, 0.6598, -0.1696]) tensor([0.0406, 0.1819, 0.5413, 0.2362]) -Greedy action tensor([-1.9363, -0.4423, 0.6637, -0.1760]) tensor([0.0404, 0.1801, 0.5444, 0.2351]) -Greedy action tensor([-1.9088, -0.4456, 0.6509, -0.1649]) tensor([0.0417, 0.1802, 0.5395, 0.2386]) -Greedy action tensor([-1.4410, 0.3675, 0.3379, -0.0084]) tensor([0.0581, 0.3544, 0.3441, 0.2434]) -Greedy action tensor([-1.7965, -0.4197, 0.6666, -0.0145]) tensor([0.0442, 0.1750, 0.5185, 0.2624]) -Greedy action tensor([-1.5415, -0.2968, 0.4499, -0.0318]) tensor([0.0613, 0.2127, 0.4488, 0.2772]) -Greedy action tensor([-0.0362, 1.0893, 0.0705, 0.7576]) tensor([0.1350, 0.4161, 0.1502, 0.2986]) -Greedy action tensor([-0.9485, 0.1404, 0.1951, 0.4871]) tensor([0.0884, 0.2627, 0.2774, 0.3715]) -Greedy action tensor([-1.5237, -0.5662, 0.4357, 0.0933]) tensor([0.0635, 0.1655, 0.4508, 0.3201]) -Greedy action tensor([-1.3000, 0.5086, 0.2056, 0.2742]) tensor([0.0608, 0.3712, 0.2742, 0.2937]) -Greedy action tensor([-1.7885, -0.3900, 0.5723, -0.1188]) tensor([0.0477, 0.1932, 0.5057, 0.2534]) -Greedy action tensor([-1.7445, 0.1081, 0.4905, -0.0253]) tensor([0.0448, 0.2859, 0.4191, 0.2502]) -Greedy action tensor([-1.9230, -0.4353, 0.6611, -0.1705]) tensor([0.0409, 0.1811, 0.5420, 0.2360]) -Greedy action tensor([-1.8911, -0.3427, 0.6228, -0.1499]) tensor([0.0421, 0.1980, 0.5199, 0.2401]) -Greedy action tensor([-1.6580, -0.1053, 0.4571, 0.0044]) tensor([0.0519, 0.2450, 0.4298, 0.2734]) -Greedy action tensor([-1.7170, -0.3588, 0.5541, -0.0604]) tensor([0.0505, 0.1962, 0.4889, 0.2645]) -Greedy action tensor([-1.9329, -0.4270, 0.6607, -0.1692]) tensor([0.0405, 0.1824, 0.5412, 0.2360]) -Greedy action tensor([-1.8947, -0.3893, 0.6497, -0.1352]) tensor([0.0416, 0.1873, 0.5295, 0.2416]) -Greedy action tensor([-1.4413, -0.3291, 0.4813, 0.1587]) tensor([0.0632, 0.1921, 0.4319, 0.3128]) -Greedy action tensor([-1.8591, -0.4314, 0.6213, -0.1335]) tensor([0.0440, 0.1834, 0.5255, 0.2471]) -Greedy action tensor([-1.7807, -0.3828, 0.5872, -0.0620]) tensor([0.0470, 0.1900, 0.5012, 0.2619]) -Greedy action tensor([-1.7801, -0.1333, 0.5414, -0.0816]) tensor([0.0458, 0.2376, 0.4664, 0.2502]) -Greedy action tensor([-1.8503, -0.2288, 0.5896, -0.1056]) tensor([0.0430, 0.2176, 0.4933, 0.2461]) -Greedy action tensor([-1.8691, -0.4330, 0.6263, -0.1451]) tensor([0.0436, 0.1833, 0.5287, 0.2444]) -Greedy action tensor([-1.3527, -0.2075, 0.5001, 0.1951]) tensor([0.0657, 0.2065, 0.4190, 0.3088]) -Greedy action tensor([-1.6321, -0.1435, 0.5583, 0.0025]) tensor([0.0513, 0.2273, 0.4585, 0.2630]) -Greedy action tensor([-1.8899, -0.3823, 0.6383, -0.1483]) tensor([0.0421, 0.1901, 0.5275, 0.2402]) -Greedy action tensor([-1.7375, -0.4269, 0.4999, -0.3607]) tensor([0.0554, 0.2056, 0.5194, 0.2196]) -Greedy action tensor([-1.8156, -0.4069, 0.4022, -0.3658]) tensor([0.0539, 0.2206, 0.4955, 0.2299]) -Greedy action tensor([-1.9022, -0.3379, 0.6302, -0.1487]) tensor([0.0414, 0.1980, 0.5213, 0.2392]) -Greedy action tensor([-1.0540, 0.6856, 0.1577, 0.2058]) tensor([0.0736, 0.4194, 0.2474, 0.2596]) -Greedy action tensor([-1.9250, -0.4254, 0.6584, -0.1695]) tensor([0.0408, 0.1828, 0.5403, 0.2361]) -Greedy action tensor([-1.8955, -0.4452, 0.6828, -0.1416]) tensor([0.0413, 0.1761, 0.5440, 0.2386]) -Greedy action tensor([-1.7745, -0.4255, 0.5830, -0.1041]) tensor([0.0482, 0.1859, 0.5096, 0.2563]) -Greedy action tensor([-1.8701, -0.1429, 0.5877, -0.1639]) tensor([0.0420, 0.2362, 0.4905, 0.2313]) -Greedy action tensor([-1.1683, -0.4092, 0.4175, -0.2186]) tensor([0.0943, 0.2015, 0.4605, 0.2438]) -Greedy action tensor([-1.9416, -0.4451, 0.6660, -0.1788]) tensor([0.0402, 0.1796, 0.5457, 0.2344]) -Greedy action tensor([-0.7602, -0.4829, 0.2197, 0.0569]) tensor([0.1380, 0.1821, 0.3676, 0.3124]) -Greedy action tensor([-1.8101, -0.1655, 0.5795, -0.1663]) tensor([0.0449, 0.2326, 0.4900, 0.2324]) -Greedy action tensor([-1.8807, -0.4855, 1.1028, 0.4106]) tensor([0.0288, 0.1164, 0.5697, 0.2851]) -Greedy action tensor([-1.7286, 0.1351, 0.4748, -0.0488]) tensor([0.0457, 0.2948, 0.4141, 0.2453]) -Greedy action tensor([-1.9291, -0.4524, 0.6642, -0.1712]) tensor([0.0407, 0.1783, 0.5447, 0.2362]) -Greedy action tensor([-1.9379, -0.4548, 0.6644, -0.1781]) tensor([0.0405, 0.1783, 0.5461, 0.2352]) -Greedy action tensor([-1.9516, -1.0021, 0.7428, -0.3087]) tensor([0.0425, 0.1097, 0.6283, 0.2195]) -Greedy action tensor([-0.6631, 0.9184, 0.0257, 0.4548]) tensor([0.0916, 0.4456, 0.1825, 0.2803]) -Greedy action tensor([-1.7561, -0.5295, 0.6792, 0.0418]) tensor([0.0457, 0.1559, 0.5222, 0.2761]) -Greedy action tensor([-1.4738, -0.1501, 0.5925, 0.5163]) tensor([0.0501, 0.1882, 0.3954, 0.3664]) -Greedy action tensor([-1.7339, -0.4221, 0.5828, -0.0531]) tensor([0.0494, 0.1836, 0.5015, 0.2655]) -Greedy action tensor([-1.8728, -0.3452, 0.6237, -0.1359]) tensor([0.0427, 0.1967, 0.5182, 0.2424]) -Greedy action tensor([-1.9287, -0.4506, 0.6573, -0.1735]) tensor([0.0409, 0.1793, 0.5431, 0.2366]) -Greedy action tensor([-1.9091, -0.4251, 0.6448, -0.1663]) tensor([0.0417, 0.1839, 0.5361, 0.2382]) -Greedy action tensor([-1.8602, -0.4426, 0.6227, -0.1387]) tensor([0.0441, 0.1818, 0.5277, 0.2464]) -Greedy action tensor([-1.8406, -0.3117, 0.6096, -0.1080]) tensor([0.0437, 0.2018, 0.5070, 0.2474]) -Greedy action tensor([-1.9314, -0.4348, 0.6607, -0.1730]) tensor([0.0406, 0.1814, 0.5424, 0.2356]) -Greedy action tensor([-0.9765, -0.5716, 0.3639, 0.3559]) tensor([0.0989, 0.1483, 0.3779, 0.3749]) -Greedy action tensor([-1.7748, -0.2968, 0.6242, -0.0620]) tensor([0.0456, 0.1998, 0.5019, 0.2527]) -Greedy action tensor([-1.9443, -0.4513, 0.6669, -0.1805]) tensor([0.0402, 0.1787, 0.5468, 0.2343]) -Greedy action tensor([-1.4278, 0.4691, 0.3000, -0.0163]) tensor([0.0575, 0.3831, 0.3235, 0.2358]) -Greedy action tensor([-1.9240, -0.4358, 0.6548, -0.1717]) tensor([0.0410, 0.1817, 0.5407, 0.2366]) -Greedy action tensor([-1.8948, -0.4711, 0.6397, -0.1676]) tensor([0.0428, 0.1775, 0.5392, 0.2405]) -Greedy action tensor([-1.9342, -0.4403, 0.6647, -0.1725]) tensor([0.0404, 0.1802, 0.5439, 0.2355]) -Greedy action tensor([-1.4233, -0.5819, 0.3841, 0.1057]) tensor([0.0713, 0.1654, 0.4345, 0.3289]) -Greedy action tensor([-1.8225, -0.4353, 0.6061, -0.1142]) tensor([0.0457, 0.1831, 0.5187, 0.2524]) -Greedy action tensor([-1.9090, -0.4354, 0.6518, -0.1586]) tensor([0.0415, 0.1814, 0.5379, 0.2392]) -Greedy action tensor([-1.9180, -0.4313, 0.6538, -0.1661]) tensor([0.0412, 0.1822, 0.5392, 0.2375]) -Greedy action tensor([-1.8814, -0.2594, 0.6044, -0.1456]) tensor([0.0421, 0.2132, 0.5058, 0.2389]) -Greedy action tensor([-0.0643, -0.6138, 0.9710, 1.7173]) tensor([0.0968, 0.0559, 0.2725, 0.5748]) -Greedy action tensor([-1.8273, 0.0564, 0.5400, -0.1337]) tensor([0.0422, 0.2777, 0.4504, 0.2296]) -Greedy action tensor([-1.5833, 0.0356, 0.4763, 0.2593]) tensor([0.0495, 0.2498, 0.3882, 0.3125]) -Greedy action tensor([ 0.9367, -0.5240, 0.3914, -0.6083]) tensor([0.4938, 0.1146, 0.2862, 0.1053]) -Greedy action tensor([ 0.3050, -1.0305, -0.6163, -0.1505]) tensor([0.4357, 0.1146, 0.1734, 0.2763]) -Greedy action tensor([-1.0910, 0.2750, -0.3465, -0.1083]) tensor([0.1031, 0.4042, 0.2171, 0.2755]) -Greedy action tensor([ 0.1146, -1.0193, 0.0974, -0.1153]) tensor([0.3226, 0.1038, 0.3172, 0.2564]) -Greedy action tensor([ 0.0069, 0.3794, 0.9300, -0.4889]) tensor([0.1793, 0.2602, 0.4513, 0.1092]) -Greedy action tensor([-0.6891, -1.4813, 0.5011, -0.3904]) tensor([0.1642, 0.0744, 0.5400, 0.2214]) -Greedy action tensor([ 1.1016, -0.5812, -0.1802, 0.0371]) tensor([0.5530, 0.1028, 0.1535, 0.1907]) -Greedy action tensor([-0.5694, 1.6623, 0.0060, -0.8456]) tensor([0.0778, 0.7248, 0.1383, 0.0590]) -Greedy action tensor([-0.4726, -0.1194, 0.5953, -0.6794]) tensor([0.1627, 0.2316, 0.4734, 0.1323]) -Greedy action tensor([ 0.3822, -1.4690, -0.8712, 0.8270]) tensor([0.3330, 0.0523, 0.0951, 0.5196]) -Greedy action tensor([ 0.2477, 0.6620, 0.1247, -0.5163]) tensor([0.2588, 0.3917, 0.2289, 0.1206]) -Greedy action tensor([-0.0366, -1.3207, 0.1863, 0.9242]) tensor([0.1945, 0.0539, 0.2431, 0.5085]) -Greedy action tensor([-0.5416, -0.2787, -0.3790, 0.0572]) tensor([0.1888, 0.2455, 0.2221, 0.3436]) -Greedy action tensor([-0.3205, -1.5437, 0.2904, -0.6557]) tensor([0.2596, 0.0764, 0.4783, 0.1857]) -Greedy action tensor([-0.1672, -0.2478, 0.6359, -0.8901]) tensor([0.2155, 0.1988, 0.4811, 0.1046]) -Greedy action tensor([ 0.3026, -1.1617, 0.1288, 0.0338]) tensor([0.3526, 0.0815, 0.2963, 0.2695]) -Greedy action tensor([ 1.3975, -0.3315, 0.1877, -0.8638]) tensor([0.6329, 0.1123, 0.1888, 0.0660]) -Greedy action tensor([ 0.2708, -1.4287, 0.2478, 0.2448]) tensor([0.3190, 0.0583, 0.3118, 0.3109]) -Greedy action tensor([ 1.6611, -0.0235, 0.1895, -0.2520]) tensor([0.6399, 0.1187, 0.1469, 0.0945]) -Greedy action tensor([ 0.3229, 0.0491, 0.1337, -0.0721]) tensor([0.3066, 0.2332, 0.2537, 0.2065]) -Greedy action tensor([-0.7134, -0.4740, -0.2682, -0.3096]) tensor([0.1877, 0.2384, 0.2929, 0.2810]) -Greedy action tensor([-0.0053, -0.8525, 0.2969, -0.8056]) tensor([0.3095, 0.1327, 0.4188, 0.1390]) -Greedy action tensor([ 0.2059, -0.5794, 0.8392, -0.2480]) tensor([0.2516, 0.1147, 0.4739, 0.1598]) -Greedy action tensor([ 1.1885, -0.6281, 0.4814, 0.6685]) tensor([0.4444, 0.0723, 0.2191, 0.2642]) -Greedy action tensor([-0.4423, -1.0425, 0.1605, -1.0575]) tensor([0.2553, 0.1401, 0.4666, 0.1380]) -Greedy action tensor([ 1.0797, 0.2587, 0.7299, -0.0797]) tensor([0.4068, 0.1790, 0.2867, 0.1276]) -Greedy action tensor([-0.8038, -0.7600, 0.0489, -0.7612]) tensor([0.1840, 0.1922, 0.4317, 0.1920]) -Greedy action tensor([-0.5615, -1.4522, -0.1330, -0.2617]) tensor([0.2328, 0.0955, 0.3574, 0.3142]) -Greedy action tensor([0.6102, 1.1912, 0.3622, 0.0044]) tensor([0.2431, 0.4346, 0.1897, 0.1326]) -Greedy action tensor([ 0.2285, -0.2859, -0.2137, 0.3165]) tensor([0.3001, 0.1794, 0.1928, 0.3277]) -Greedy action tensor([ 0.1478, -1.2500, -0.3507, -0.6014]) tensor([0.4297, 0.1062, 0.2610, 0.2031]) -Greedy action tensor([-1.3770, -0.9017, 1.2221, -1.0389]) tensor([0.0573, 0.0921, 0.7703, 0.0803]) -Greedy action tensor([-0.7508, -0.2400, -0.8834, -0.1880]) tensor([0.1888, 0.3146, 0.1653, 0.3314]) -Greedy action tensor([-0.5181, -1.0229, -0.3819, -0.6523]) tensor([0.2759, 0.1666, 0.3162, 0.2413]) -Greedy action tensor([ 0.3146, 0.3892, -0.2075, -0.8502]) tensor([0.3353, 0.3612, 0.1989, 0.1046]) -Greedy action tensor([ 0.1764, -1.6720, 0.4290, -0.0483]) tensor([0.3083, 0.0486, 0.3969, 0.2463]) -Greedy action tensor([ 0.7242, -1.1233, -0.3479, 0.6908]) tensor([0.4053, 0.0639, 0.1387, 0.3920]) -Greedy action tensor([-0.5343, -0.6999, -0.1328, -0.1410]) tensor([0.2073, 0.1757, 0.3098, 0.3072]) -Greedy action tensor([ 1.7910, -0.9672, 0.2758, 0.9557]) tensor([0.5825, 0.0369, 0.1280, 0.2526]) -Greedy action tensor([ 1.6079, -0.8283, 1.0096, 1.3122]) tensor([0.4200, 0.0367, 0.2309, 0.3124]) -Greedy action tensor([-0.3358, -0.3191, 0.5672, -0.0366]) tensor([0.1715, 0.1743, 0.4230, 0.2312]) -Greedy action tensor([-0.0089, 0.3253, 0.2139, 0.0194]) tensor([0.2139, 0.2988, 0.2673, 0.2200]) -Greedy action tensor([ 0.5380, 0.0029, -0.5126, 1.1398]) tensor([0.2659, 0.1557, 0.0930, 0.4854]) -Greedy action tensor([ 0.5688, -0.7017, 0.0129, 0.3362]) tensor([0.3778, 0.1061, 0.2167, 0.2994]) -Greedy action tensor([ 0.2087, -0.3421, 0.1952, 0.1198]) tensor([0.2875, 0.1658, 0.2837, 0.2631]) -Greedy action tensor([ 0.9318, 0.1345, 0.5715, -0.1582]) tensor([0.4025, 0.1814, 0.2808, 0.1353]) -Greedy action tensor([ 0.3704, 0.6014, -0.1346, -0.7056]) tensor([0.3121, 0.3932, 0.1883, 0.1064]) -Greedy action tensor([ 0.4375, -0.9032, -1.1628, 0.8471]) tensor([0.3367, 0.0881, 0.0680, 0.5072]) -Greedy action tensor([ 0.7068, -1.1605, 0.5179, 0.5709]) tensor([0.3502, 0.0541, 0.2899, 0.3057]) -Greedy action tensor([ 0.0099, -1.3764, -0.4374, 0.6173]) tensor([0.2685, 0.0671, 0.1716, 0.4928]) -Greedy action tensor([ 0.7605, -0.2873, -0.6363, 0.1168]) tensor([0.4709, 0.1652, 0.1165, 0.2474]) -Greedy action tensor([-0.1216, -0.1615, -0.1434, -0.4147]) tensor([0.2714, 0.2607, 0.2655, 0.2024]) -Greedy action tensor([-0.9083, -0.4491, 0.5975, -1.5311]) tensor([0.1311, 0.2075, 0.5910, 0.0703]) -Greedy action tensor([-0.6277, -0.7351, 1.1455, -0.8421]) tensor([0.1164, 0.1045, 0.6853, 0.0939]) -Greedy action tensor([ 0.2661, -1.1955, 0.1352, 0.2369]) tensor([0.3246, 0.0753, 0.2848, 0.3153]) -Greedy action tensor([ 0.9376, -1.8047, -0.2584, 0.3996]) tensor([0.5126, 0.0330, 0.1550, 0.2993]) -Greedy action tensor([-0.6876, -1.9731, 1.0099, 0.5738]) tensor([0.0974, 0.0269, 0.5318, 0.3439]) -Greedy action tensor([ 0.1526, -0.4881, 0.4226, 0.8317]) tensor([0.2079, 0.1096, 0.2724, 0.4101]) -Greedy action tensor([-0.0077, -0.2160, -0.4855, -0.3821]) tensor([0.3205, 0.2603, 0.1988, 0.2204]) -Greedy action tensor([-0.4274, -1.6567, -0.3610, 1.3804]) tensor([0.1182, 0.0346, 0.1263, 0.7208]) -Greedy action tensor([ 0.1390, 0.1252, 0.5620, -0.2227]) tensor([0.2376, 0.2343, 0.3626, 0.1655]) -Greedy action tensor([-0.0055, -0.4259, 0.1076, 0.3032]) tensor([0.2416, 0.1587, 0.2706, 0.3290]) -Greedy action tensor([ 0.5058, -1.4250, 0.3050, 0.4741]) tensor([0.3411, 0.0495, 0.2790, 0.3304]) -Greedy action tensor([ 0.6451, -0.7404, 1.1271, -0.8751]) tensor([0.3238, 0.0810, 0.5243, 0.0708]) -Greedy action tensor([ 1.5557, 0.6790, -0.1129, 0.5837]) tensor([0.5043, 0.2099, 0.0951, 0.1908]) -Greedy action tensor([ 0.3155, -1.6163, -0.0029, 0.2724]) tensor([0.3534, 0.0512, 0.2570, 0.3384]) -Greedy action tensor([-0.3527, -0.6604, -0.8449, 0.4725]) tensor([0.2160, 0.1588, 0.1321, 0.4931]) -Greedy action tensor([ 0.9839, -0.7537, -0.1380, 0.3074]) tensor([0.4975, 0.0875, 0.1620, 0.2529]) -Greedy action tensor([-0.4245, -1.8949, 0.5727, 0.3919]) tensor([0.1612, 0.0371, 0.4370, 0.3647]) -Greedy action tensor([ 0.2877, -1.0787, 0.1682, -0.8039]) tensor([0.4035, 0.1029, 0.3581, 0.1355]) -Greedy action tensor([ 0.0387, -1.3188, 0.5012, 0.2131]) tensor([0.2478, 0.0638, 0.3935, 0.2950]) -Greedy action tensor([-0.0952, 0.0388, -0.2506, -0.9872]) tensor([0.2933, 0.3354, 0.2511, 0.1202]) -Greedy action tensor([-0.0781, -1.2934, 0.2660, -0.1840]) tensor([0.2773, 0.0822, 0.3911, 0.2494]) -Greedy action tensor([ 0.1427, 0.3351, 0.1049, -0.4091]) tensor([0.2666, 0.3231, 0.2567, 0.1535]) -Greedy action tensor([-0.7690, 0.2050, -1.1090, 0.0072]) tensor([0.1530, 0.4054, 0.1089, 0.3326]) -Greedy action tensor([ 0.0243, -1.8978, 1.9738, -0.6664]) tensor([0.1153, 0.0169, 0.8100, 0.0578]) -Greedy action tensor([-0.1681, -0.7485, 0.6613, -0.4033]) tensor([0.2154, 0.1206, 0.4937, 0.1703]) -Greedy action tensor([ 0.9378, -1.0592, -0.0990, 0.3428]) tensor([0.4897, 0.0665, 0.1737, 0.2701]) -Greedy action tensor([ 1.3783, -1.2690, 0.6675, 1.9747]) tensor([0.2960, 0.0210, 0.1454, 0.5375]) -Greedy action tensor([-1.0283, -0.5669, -1.3941, -0.3629]) tensor([0.1914, 0.3036, 0.1328, 0.3723]) -Greedy action tensor([-1.2815, -0.4300, 0.7617, -0.7480]) tensor([0.0784, 0.1836, 0.6045, 0.1336]) -Greedy action tensor([ 1.4854, -0.3265, -0.6218, 0.8686]) tensor([0.5481, 0.0895, 0.0666, 0.2958]) -Greedy action tensor([ 1.3946, -0.2212, -0.6604, 0.6373]) tensor([0.5569, 0.1107, 0.0713, 0.2611]) -Greedy action tensor([ 2.0055, -0.8577, -0.5076, 0.9278]) tensor([0.6764, 0.0386, 0.0548, 0.2302]) -Greedy action tensor([ 0.9526, -0.1429, -0.4285, 0.3217]) tensor([0.4722, 0.1579, 0.1187, 0.2513]) -Greedy action tensor([ 1.7241, -0.1507, -0.7262, 0.1152]) tensor([0.6946, 0.1065, 0.0599, 0.1390]) -Greedy action tensor([ 2.0132, -1.1370, -0.1045, 0.3466]) tensor([0.7396, 0.0317, 0.0890, 0.1397]) -Greedy action tensor([ 2.8547, -1.5531, 0.0300, 0.9566]) tensor([0.8187, 0.0100, 0.0486, 0.1227]) -Greedy action tensor([ 1.2644, -0.7729, -0.4940, -0.1348]) tensor([0.6454, 0.0841, 0.1112, 0.1593]) -Greedy action tensor([ 2.0555, 0.5219, -0.1590, 0.3943]) tensor([0.6601, 0.1424, 0.0721, 0.1254]) -Greedy action tensor([ 0.9802, -0.1751, -0.8396, 0.5679]) tensor([0.4675, 0.1472, 0.0758, 0.3095]) -Greedy action tensor([ 1.6881, -0.4897, -0.2595, 0.3605]) tensor([0.6574, 0.0745, 0.0938, 0.1743]) -Greedy action tensor([ 1.2517, -0.5372, -0.5143, 0.2309]) tensor([0.5888, 0.0984, 0.1007, 0.2121]) -Greedy action tensor([ 1.2394, -0.1881, -0.3125, 0.3334]) tensor([0.5388, 0.1293, 0.1141, 0.2178]) -Greedy action tensor([ 1.3603, -0.7120, -0.8563, 0.6317]) tensor([0.5822, 0.0733, 0.0635, 0.2810]) -Greedy action tensor([ 1.0744, -0.5615, -0.2468, 0.2981]) tensor([0.5204, 0.1014, 0.1388, 0.2394]) -Greedy action tensor([ 1.2373, -0.7114, -0.2851, 0.9161]) tensor([0.4794, 0.0683, 0.1046, 0.3477]) -Greedy action tensor([ 1.5092, 0.2492, -0.3399, 0.3257]) tensor([0.5723, 0.1623, 0.0901, 0.1753]) -Greedy action tensor([ 1.3758, -0.5291, -0.5420, 0.2616]) tensor([0.6158, 0.0917, 0.0905, 0.2021]) -Greedy action tensor([ 0.7598, -0.2239, 0.1017, -0.0357]) tensor([0.4268, 0.1596, 0.2210, 0.1926]) -Greedy action tensor([ 2.1356, 0.5799, -0.2917, 0.1309]) tensor([0.6974, 0.1472, 0.0616, 0.0939]) -Greedy action tensor([ 1.2967, -0.4639, -0.0748, 0.3465]) tensor([0.5518, 0.0949, 0.1400, 0.2134]) -Greedy action tensor([ 1.3061, -0.1791, -0.5979, 0.2778]) tensor([0.5770, 0.1307, 0.0860, 0.2064]) -Greedy action tensor([ 1.3877, -0.5364, -0.1004, 0.1333]) tensor([0.6035, 0.0881, 0.1363, 0.1721]) -Greedy action tensor([ 1.3466, -0.8342, -0.2920, 0.5720]) tensor([0.5656, 0.0639, 0.1099, 0.2607]) -Greedy action tensor([ 1.1303, -0.3307, 0.0361, 0.1875]) tensor([0.5111, 0.1186, 0.1711, 0.1991]) -Greedy action tensor([ 1.8512, -0.6596, -0.6830, -0.0073]) tensor([0.7596, 0.0617, 0.0603, 0.1184]) -Greedy action tensor([ 1.3265, -0.8480, -0.3490, 0.2913]) tensor([0.6039, 0.0686, 0.1130, 0.2145]) -Greedy action tensor([ 1.7141, -0.4263, -0.4554, 0.3337]) tensor([0.6742, 0.0793, 0.0770, 0.1695]) -Greedy action tensor([ 1.3483, -0.0200, -0.4899, 0.5933]) tensor([0.5309, 0.1351, 0.0845, 0.2495]) -Greedy action tensor([ 1.2088, -0.7304, -0.2253, 0.2933]) tensor([0.5610, 0.0807, 0.1337, 0.2246]) -Greedy action tensor([ 1.5427, 0.0847, -0.9437, 0.5936]) tensor([0.5872, 0.1366, 0.0489, 0.2273]) -Greedy action tensor([ 2.3846, -0.8888, -0.2130, 0.9816]) tensor([0.7363, 0.0279, 0.0548, 0.1810]) -Greedy action tensor([ 1.6041, -0.6651, -0.3644, 0.0417]) tensor([0.6884, 0.0712, 0.0961, 0.1443]) -Greedy action tensor([ 1.6797, 0.0586, -0.3882, 0.3262]) tensor([0.6319, 0.1249, 0.0799, 0.1633]) -Greedy action tensor([ 1.2199, -0.1686, -0.7995, 0.1070]) tensor([0.5845, 0.1458, 0.0776, 0.1921]) -Greedy action tensor([ 0.9859, -0.4356, -0.4431, 0.3603]) tensor([0.4961, 0.1197, 0.1188, 0.2654]) -Greedy action tensor([ 1.8277, -0.9749, -0.3257, 0.6003]) tensor([0.6804, 0.0413, 0.0790, 0.1994]) -Greedy action tensor([ 1.0983, -0.3037, -0.4103, 0.0585]) tensor([0.5492, 0.1352, 0.1215, 0.1942]) -Greedy action tensor([ 1.8455, -0.2105, -0.1730, 0.3739]) tensor([0.6710, 0.0859, 0.0891, 0.1540]) -Greedy action tensor([ 2.0153, -1.2489, -0.0257, 0.8719]) tensor([0.6726, 0.0257, 0.0874, 0.2144]) -Greedy action tensor([ 1.1585, -0.1334, -0.8749, 0.2723]) tensor([0.5501, 0.1511, 0.0720, 0.2268]) -Greedy action tensor([ 1.8642, -0.8041, -0.1579, 0.6572]) tensor([0.6663, 0.0462, 0.0882, 0.1993]) -Greedy action tensor([ 0.9816, -0.4235, -0.1940, -0.0286]) tensor([0.5213, 0.1279, 0.1609, 0.1899]) -Greedy action tensor([ 1.2345, -0.4648, -0.5790, 0.6463]) tensor([0.5260, 0.0962, 0.0858, 0.2921]) -Greedy action tensor([ 1.9606, 0.3647, -0.1942, -0.1809]) tensor([0.6963, 0.1412, 0.0807, 0.0818]) -Greedy action tensor([ 1.5973, -0.6314, -0.1788, 0.0299]) tensor([0.6732, 0.0725, 0.1140, 0.1404]) -Greedy action tensor([ 2.0503, -0.9353, -0.2779, 0.5536]) tensor([0.7289, 0.0368, 0.0711, 0.1632]) -Greedy action tensor([ 1.8694, -0.9584, -0.2263, 0.5969]) tensor([0.6839, 0.0404, 0.0841, 0.1916]) -Greedy action tensor([ 1.0548, -0.4295, -0.3744, 0.7649]) tensor([0.4516, 0.1023, 0.1081, 0.3379]) -Greedy action tensor([ 1.1204, 0.0747, -0.4500, 0.1296]) tensor([0.5180, 0.1820, 0.1077, 0.1923]) -Greedy action tensor([ 0.8244, -0.5402, 0.0548, 0.0248]) tensor([0.4612, 0.1178, 0.2136, 0.2073]) -Greedy action tensor([ 1.3494, 0.0246, -0.6165, 0.3268]) tensor([0.5664, 0.1506, 0.0793, 0.2037]) -Greedy action tensor([ 1.3786, -0.4740, -0.6106, 0.4949]) tensor([0.5859, 0.0919, 0.0802, 0.2421]) -Greedy action tensor([ 1.1507, -0.5595, -0.1518, -0.1026]) tensor([0.5753, 0.1040, 0.1564, 0.1643]) -Greedy action tensor([ 2.0694, -1.3603, 0.0785, 0.2073]) tensor([0.7551, 0.0245, 0.1031, 0.1173]) -Greedy action tensor([ 1.3231, -0.5347, -0.4519, 0.4646]) tensor([0.5716, 0.0892, 0.0969, 0.2423]) -Greedy action tensor([ 1.0528, -0.5035, -0.3724, 0.0611]) tensor([0.5487, 0.1157, 0.1319, 0.2036]) -Greedy action tensor([ 1.2248, -0.1285, -0.1047, 0.1403]) tensor([0.5373, 0.1388, 0.1422, 0.1817]) -Greedy action tensor([ 1.2154, -0.4400, -0.3405, 0.2137]) tensor([0.5652, 0.1080, 0.1193, 0.2076]) -Greedy action tensor([ 1.0480, 0.0113, -0.2613, 0.2462]) tensor([0.4823, 0.1711, 0.1302, 0.2164]) -Greedy action tensor([ 1.6427, -0.7015, -0.3885, 0.4668]) tensor([0.6512, 0.0625, 0.0854, 0.2009]) -Greedy action tensor([ 1.3664, -0.2909, -0.4217, 0.2308]) tensor([0.5955, 0.1135, 0.0996, 0.1913]) -Greedy action tensor([ 1.2697, 0.1529, -1.1029, 0.2819]) tensor([0.5577, 0.1826, 0.0520, 0.2077]) -Greedy action tensor([ 1.2164, 0.0277, -0.8584, -0.1747]) tensor([0.5956, 0.1814, 0.0748, 0.1482]) -Greedy action tensor([ 1.1309, -0.5828, -0.4490, 0.5534]) tensor([0.5135, 0.0925, 0.1058, 0.2882]) -Greedy action tensor([ 1.4981, -0.3630, -0.5447, -0.0571]) tensor([0.6683, 0.1039, 0.0867, 0.1411]) -Greedy action tensor([ 1.1440, 0.1454, -0.7679, 0.4817]) tensor([0.4921, 0.1813, 0.0727, 0.2538]) -Greedy action tensor([ 1.6155, -0.8248, -0.4457, 0.4571]) tensor([0.6543, 0.0570, 0.0833, 0.2054]) -Greedy action tensor([ 1.4311, -0.7372, -0.3029, 0.5201]) tensor([0.5906, 0.0676, 0.1043, 0.2375]) -Greedy action tensor([ 1.5831, -0.5176, 0.0782, 0.5949]) tensor([0.5825, 0.0713, 0.1293, 0.2168]) -Greedy action tensor([ 2.5451, -1.6230, -0.1240, 0.4386]) tensor([0.8289, 0.0128, 0.0575, 0.1008]) -Greedy action tensor([ 1.0727, -0.5351, -0.4257, 0.2513]) tensor([0.5366, 0.1075, 0.1199, 0.2360]) -Greedy action tensor([ 2.1282, -1.0690, -0.3853, 0.5373]) tensor([0.7544, 0.0308, 0.0611, 0.1537]) -Greedy action tensor([ 1.4809, -0.4851, -0.4359, 0.2940]) tensor([0.6280, 0.0879, 0.0924, 0.1917]) -Greedy action tensor([ 1.9175, -0.7198, -0.1405, 0.0279]) tensor([0.7405, 0.0530, 0.0946, 0.1119]) -Greedy action tensor([ 1.8772, -0.7047, -0.5916, 0.1597]) tensor([0.7464, 0.0564, 0.0632, 0.1340]) -Greedy action tensor([ 0.9628, -0.1367, -0.2943, -0.4050]) tensor([0.5341, 0.1779, 0.1519, 0.1360]) -Greedy action tensor([ 1.6652, -0.3304, -0.4639, 0.2204]) tensor([0.6708, 0.0912, 0.0798, 0.1582]) -Greedy action tensor([ 1.1134, -0.5162, -0.5125, 0.3251]) tensor([0.5413, 0.1061, 0.1065, 0.2461]) -Greedy action tensor([ 1.3592, -0.1628, -0.6315, 0.1369]) tensor([0.6063, 0.1323, 0.0828, 0.1786]) -Greedy action tensor([ 1.7840, -0.9048, -0.3398, 0.4688]) tensor([0.6868, 0.0467, 0.0821, 0.1844]) -Greedy action tensor([ 0.7814, -0.6267, 0.0855, -0.6782]) tensor([0.5062, 0.1238, 0.2524, 0.1176]) -Greedy action tensor([ 0.8724, -0.5648, -0.0351, -0.4711]) tensor([0.5258, 0.1249, 0.2122, 0.1372]) -Greedy action tensor([ 0.4162, -0.1639, -0.0160, -0.1310]) tensor([0.3588, 0.2008, 0.2329, 0.2076]) -Greedy action tensor([ 0.6876, 0.0849, 0.1194, -0.1675]) tensor([0.3938, 0.2156, 0.2231, 0.1675]) -Greedy action tensor([ 0.9904, -0.5572, -0.0493, -0.2107]) tensor([0.5356, 0.1139, 0.1893, 0.1611]) -Greedy action tensor([ 0.4005, -0.4400, -0.1480, -0.0558]) tensor([0.3784, 0.1633, 0.2186, 0.2397]) -Greedy action tensor([ 0.5441, -0.3609, 0.1239, -0.2430]) tensor([0.3974, 0.1607, 0.2610, 0.1809]) -Greedy action tensor([ 0.8437, -0.4204, -0.0300, -0.3569]) tensor([0.4998, 0.1412, 0.2086, 0.1504]) -Greedy action tensor([ 1.3459, -0.9283, 0.0666, -0.4822]) tensor([0.6486, 0.0667, 0.1804, 0.1042]) -Greedy action tensor([ 0.4271, -0.2609, -0.0124, -0.3539]) tensor([0.3839, 0.1929, 0.2474, 0.1758]) -Greedy action tensor([ 0.2883, -0.1211, -0.0546, -0.0449]) tensor([0.3236, 0.2149, 0.2297, 0.2319]) -Greedy action tensor([ 1.1193, -0.8175, 0.2211, -0.6341]) tensor([0.5798, 0.0836, 0.2362, 0.1004]) -Greedy action tensor([ 0.8838, -0.7961, 0.1016, -0.4183]) tensor([0.5220, 0.0973, 0.2387, 0.1420]) -Greedy action tensor([ 0.8992, -0.6334, 0.0240, -0.6313]) tensor([0.5408, 0.1168, 0.2254, 0.1170]) -Greedy action tensor([ 0.7195, -0.2503, -0.0711, -0.1657]) tensor([0.4454, 0.1689, 0.2020, 0.1838]) -Greedy action tensor([ 1.3163, -0.4739, -0.3086, -0.3307]) tensor([0.6425, 0.1072, 0.1265, 0.1238]) -Greedy action tensor([ 1.1349, -0.8230, -0.0497, -0.6089]) tensor([0.6166, 0.0870, 0.1886, 0.1078]) -Greedy action tensor([ 0.4357, -0.5399, -0.1973, -0.1811]) tensor([0.4086, 0.1540, 0.2169, 0.2205]) -Greedy action tensor([ 1.0921, -0.3186, 0.0520, -0.5756]) tensor([0.5599, 0.1366, 0.1979, 0.1056]) -Greedy action tensor([ 1.3642, -0.3925, 0.1352, -0.6253]) tensor([0.6242, 0.1077, 0.1826, 0.0854]) -Greedy action tensor([ 0.5087, -0.3009, 0.0134, -0.3619]) tensor([0.4044, 0.1800, 0.2464, 0.1693]) -Greedy action tensor([ 0.7543, -0.3977, 0.0650, -0.1662]) tensor([0.4512, 0.1426, 0.2265, 0.1797]) -Greedy action tensor([ 0.8151, -0.4906, 0.0563, -0.2622]) tensor([0.4808, 0.1303, 0.2251, 0.1637]) -Greedy action tensor([ 0.8537, -0.4453, -0.1488, -0.1375]) tensor([0.4973, 0.1357, 0.1825, 0.1846]) -Greedy action tensor([ 0.6314, -0.4091, -0.1465, -0.4383]) tensor([0.4639, 0.1639, 0.2131, 0.1592]) -Greedy action tensor([ 0.8703, -0.5592, -0.0607, -0.4624]) tensor([0.5271, 0.1262, 0.2077, 0.1390]) -Greedy action tensor([ 1.0015, -0.8324, 0.1316, -0.5368]) tensor([0.5575, 0.0891, 0.2336, 0.1197]) -Greedy action tensor([ 0.7812, -0.2478, 0.0675, -0.2550]) tensor([0.4542, 0.1623, 0.2224, 0.1611]) -Greedy action tensor([ 1.1354, -0.8225, 0.1192, -0.7672]) tensor([0.6052, 0.0854, 0.2191, 0.0903]) -Greedy action tensor([ 0.9003, -0.4831, -0.0858, -0.2493]) tensor([0.5153, 0.1292, 0.1922, 0.1632]) -Greedy action tensor([ 0.9225, -0.6205, -0.0751, -0.3786]) tensor([0.5392, 0.1152, 0.1988, 0.1468]) -Greedy action tensor([ 0.6418, -0.5315, -0.0536, -0.1647]) tensor([0.4435, 0.1372, 0.2213, 0.1980]) -Greedy action tensor([ 0.8125, -0.6428, 0.0335, -0.4242]) tensor([0.5044, 0.1177, 0.2315, 0.1465]) -Greedy action tensor([ 0.5722, 0.0553, -0.0403, -0.1483]) tensor([0.3810, 0.2272, 0.2065, 0.1854]) -Greedy action tensor([ 0.8466, -0.8203, -0.0554, -0.4621]) tensor([0.5363, 0.1013, 0.2176, 0.1449]) -Greedy action tensor([ 0.5594, -0.3594, -0.1739, -0.2372]) tensor([0.4292, 0.1712, 0.2061, 0.1935]) -Greedy action tensor([ 1.2265, -0.7240, -0.0615, -0.6683]) tensor([0.6376, 0.0907, 0.1759, 0.0959]) -Greedy action tensor([ 0.6451, -0.3136, 0.0021, -0.1258]) tensor([0.4217, 0.1617, 0.2217, 0.1950]) -Greedy action tensor([ 0.4182, -0.1092, -0.0178, -0.0492]) tensor([0.3492, 0.2061, 0.2258, 0.2188]) -Greedy action tensor([ 0.3624, -0.0476, -0.0359, -0.1733]) tensor([0.3424, 0.2272, 0.2299, 0.2004]) -Greedy action tensor([ 0.6159, -0.3404, -0.0948, -0.1442]) tensor([0.4268, 0.1640, 0.2097, 0.1996]) -Greedy action tensor([ 0.4989, -0.1244, -0.1282, -0.3075]) tensor([0.3973, 0.2130, 0.2122, 0.1774]) -Greedy action tensor([ 0.6973, -0.6364, -0.1256, -0.2728]) tensor([0.4804, 0.1266, 0.2110, 0.1821]) -Greedy action tensor([ 0.8761, -0.7570, -0.0564, -0.5931]) tensor([0.5498, 0.1074, 0.2164, 0.1265]) -Greedy action tensor([ 1.2610, -0.2619, -0.1303, -0.0017]) tensor([0.5715, 0.1246, 0.1422, 0.1617]) -Greedy action tensor([ 1.0547, -0.2798, -0.1912, 0.0113]) tensor([0.5254, 0.1383, 0.1511, 0.1851]) -Greedy action tensor([0.4156, 0.0848, 0.1279, 0.1567]) tensor([0.3086, 0.2217, 0.2315, 0.2382]) -Greedy action tensor([ 0.6415, -0.4801, -0.2121, -0.4840]) tensor([0.4817, 0.1569, 0.2051, 0.1563]) -Greedy action tensor([ 0.8315, -0.0293, 0.0451, -0.1828]) tensor([0.4462, 0.1887, 0.2032, 0.1618]) -Greedy action tensor([ 0.5210, -0.0137, -0.0378, 0.0139]) tensor([0.3623, 0.2123, 0.2072, 0.2182]) -Greedy action tensor([ 0.8061, -0.6182, -0.0710, -0.3313]) tensor([0.5057, 0.1217, 0.2104, 0.1622]) -Greedy action tensor([ 0.9479, -0.4456, -0.3486, -0.4164]) tensor([0.5627, 0.1397, 0.1539, 0.1438]) -Greedy action tensor([ 1.0964, -0.6795, -0.0302, -0.5324]) tensor([0.5918, 0.1002, 0.1918, 0.1161]) -Greedy action tensor([ 0.5034, 0.0012, -0.0621, -0.0112]) tensor([0.3609, 0.2184, 0.2050, 0.2157]) -Greedy action tensor([ 1.2366, -0.5954, -0.0986, -0.3831]) tensor([0.6168, 0.0988, 0.1623, 0.1221]) -Greedy action tensor([ 0.8129, -0.7425, 0.0602, -0.3040]) tensor([0.4976, 0.1051, 0.2344, 0.1629]) -Greedy action tensor([ 0.8067, -0.7842, -0.0139, -0.4167]) tensor([0.5160, 0.1051, 0.2271, 0.1518]) -Greedy action tensor([ 1.0754, -1.1031, 0.0574, -0.4173]) tensor([0.5885, 0.0666, 0.2126, 0.1323]) -Greedy action tensor([ 0.7944, 0.3930, -0.2241, -0.1622]) tensor([0.4141, 0.2772, 0.1496, 0.1591]) -Greedy action tensor([ 0.9316, -0.1221, -0.1547, -0.3089]) tensor([0.5062, 0.1765, 0.1708, 0.1464]) -Greedy action tensor([ 0.6952, -0.3581, -0.0639, -0.3041]) tensor([0.4577, 0.1596, 0.2142, 0.1685]) -Greedy action tensor([ 0.5728, -0.5515, -0.1070, -0.2810]) tensor([0.4430, 0.1439, 0.2245, 0.1886]) -Greedy action tensor([ 1.2407, -0.6280, -0.2238, -0.3528]) tensor([0.6294, 0.0971, 0.1455, 0.1279]) -Greedy action tensor([ 0.4723, -0.0085, -0.1253, -0.0806]) tensor([0.3645, 0.2254, 0.2005, 0.2097]) -Greedy action tensor([ 0.8631, -0.3880, -0.0550, -0.7178]) tensor([0.5287, 0.1513, 0.2111, 0.1088]) -Greedy action tensor([ 0.7971, -0.2262, -0.0956, -0.2095]) tensor([0.4685, 0.1684, 0.1919, 0.1712]) -Greedy action tensor([ 0.7048, -0.1343, -0.1914, -0.1436]) tensor([0.4408, 0.1905, 0.1799, 0.1887]) -Greedy action tensor([ 0.3475, 0.1251, 0.1304, -0.2492]) tensor([0.3169, 0.2537, 0.2550, 0.1745]) -Greedy action tensor([ 0.5021, -0.3903, 0.0530, -0.3428]) tensor([0.4036, 0.1654, 0.2576, 0.1734]) -Greedy action tensor([ 0.9110, -0.5676, -0.1180, -0.1830]) tensor([0.5208, 0.1187, 0.1861, 0.1744]) -Greedy action tensor([ 0.6366, -0.1205, -0.1252, 0.1138]) tensor([0.3955, 0.1855, 0.1846, 0.2344]) -Greedy action tensor([ 0.9214, -0.5445, -0.0836, -0.2259]) tensor([0.5223, 0.1206, 0.1912, 0.1658]) -Greedy action tensor([ 0.9200, -0.5245, -0.1569, -0.3204]) tensor([0.5360, 0.1264, 0.1826, 0.1550]) -Greedy action tensor([ 0.4241, -0.2380, -0.1215, -0.0404]) tensor([0.3671, 0.1894, 0.2128, 0.2307]) -Greedy action tensor([ 0.1627, 0.1831, -0.1359, -0.4394]) tensor([0.3021, 0.3083, 0.2241, 0.1654]) -Greedy action tensor([ 0.8309, -0.1261, -0.0146, -0.1317]) tensor([0.4555, 0.1749, 0.1956, 0.1740]) -Greedy action tensor([ 0.7505, -0.4865, -0.0228, -0.1917]) tensor([0.4670, 0.1355, 0.2155, 0.1820]) -Greedy action tensor([ 1.0294, -1.1191, 0.1064, -0.4759]) tensor([0.5761, 0.0672, 0.2289, 0.1279]) -Greedy action tensor([ 0.4268, -0.3594, -0.1677, -0.0544]) tensor([0.3809, 0.1735, 0.2102, 0.2354]) -Greedy action tensor([ 0.6026, -0.3910, 0.0304, -0.1448]) tensor([0.4153, 0.1537, 0.2343, 0.1967]) -Greedy action tensor([ 0.8979, -0.7139, -0.0631, -0.4672]) tensor([0.5442, 0.1086, 0.2082, 0.1390]) -Greedy action tensor([-1.8750, -0.4120, 0.6252, -0.1492]) tensor([0.0433, 0.1868, 0.5270, 0.2429]) -Greedy action tensor([-1.8013, -0.4110, 0.6049, -0.0899]) tensor([0.0462, 0.1856, 0.5124, 0.2558]) -Greedy action tensor([-1.8386, -0.3074, 0.5981, -0.1342]) tensor([0.0443, 0.2050, 0.5069, 0.2437]) -Greedy action tensor([-1.9112, -0.4348, 0.6497, -0.1630]) tensor([0.0416, 0.1819, 0.5379, 0.2387]) -Greedy action tensor([-1.9420, -0.4454, 0.6652, -0.1790]) tensor([0.0402, 0.1797, 0.5456, 0.2345]) -Greedy action tensor([-1.2083, -0.1573, 0.5023, 0.3514]) tensor([0.0707, 0.2021, 0.3910, 0.3362]) -Greedy action tensor([-1.8576, -0.3069, 0.6035, -0.1337]) tensor([0.0434, 0.2046, 0.5086, 0.2433]) -Greedy action tensor([-1.8836, -0.4397, 0.6349, -0.1634]) tensor([0.0430, 0.1824, 0.5341, 0.2404]) -Greedy action tensor([-1.4872, 0.0078, 0.4017, -0.0562]) tensor([0.0615, 0.2744, 0.4068, 0.2573]) -Greedy action tensor([-1.9205, -0.4339, 0.6496, -0.1712]) tensor([0.0413, 0.1824, 0.5391, 0.2372]) -Greedy action tensor([-1.7675, -0.4124, 0.6700, -0.0201]) tensor([0.0453, 0.1757, 0.5188, 0.2602]) -Greedy action tensor([0.1773, 1.0732, 0.0188, 0.1646]) tensor([0.1890, 0.4630, 0.1613, 0.1866]) -Greedy action tensor([-1.6101, -0.2868, 0.6969, 0.3122]) tensor([0.0462, 0.1736, 0.4642, 0.3160]) -Greedy action tensor([-1.9449, -0.4500, 0.6684, -0.1800]) tensor([0.0401, 0.1788, 0.5470, 0.2342]) -Greedy action tensor([-1.4018, 0.6298, 0.2431, 0.1680]) tensor([0.0537, 0.4097, 0.2783, 0.2582]) -Greedy action tensor([-1.9020, -0.4060, 0.6551, -0.1393]) tensor([0.0413, 0.1845, 0.5332, 0.2409]) -Greedy action tensor([-1.8812, -0.3149, 0.6178, -0.1525]) tensor([0.0424, 0.2030, 0.5158, 0.2388]) -Greedy action tensor([-1.7288, -0.2959, 0.5426, -0.0942]) tensor([0.0500, 0.2094, 0.4844, 0.2562]) -Greedy action tensor([-1.9330, -0.4386, 0.6598, -0.1742]) tensor([0.0406, 0.1810, 0.5427, 0.2357]) -Greedy action tensor([-1.7004, -0.4899, 0.5434, 0.0400]) tensor([0.0513, 0.1722, 0.4839, 0.2925]) -Greedy action tensor([-1.9144, -0.4212, 0.6542, -0.1627]) tensor([0.0412, 0.1835, 0.5378, 0.2376]) -Greedy action tensor([-1.8809, -0.4351, 0.6356, -0.1365]) tensor([0.0428, 0.1818, 0.5304, 0.2450]) -Greedy action tensor([-1.7481, -0.4966, 0.5605, -0.0597]) tensor([0.0501, 0.1751, 0.5039, 0.2710]) -Greedy action tensor([-1.8017, -0.3552, 0.6166, -0.0883]) tensor([0.0454, 0.1929, 0.5098, 0.2519]) -Greedy action tensor([-1.7480, -0.2319, 0.5321, -0.0513]) tensor([0.0481, 0.2191, 0.4703, 0.2625]) -Greedy action tensor([-1.7900, -0.3689, 0.6076, -0.0662]) tensor([0.0460, 0.1905, 0.5057, 0.2578]) -Greedy action tensor([-1.9386, -0.4432, 0.6672, -0.1766]) tensor([0.0403, 0.1797, 0.5454, 0.2346]) -Greedy action tensor([-1.9000, -0.3912, 0.6493, -0.1484]) tensor([0.0415, 0.1877, 0.5314, 0.2393]) -Greedy action tensor([-1.6386, -0.4848, 0.7565, 0.1779]) tensor([0.0470, 0.1489, 0.5152, 0.2889]) -Greedy action tensor([-1.3174, 0.2370, 0.3434, -0.0486]) tensor([0.0687, 0.3252, 0.3617, 0.2444]) -Greedy action tensor([-1.9280, -0.4516, 0.6736, -0.1687]) tensor([0.0405, 0.1774, 0.5466, 0.2354]) -Greedy action tensor([-1.0456, -0.3680, 0.0841, 0.4816]) tensor([0.0937, 0.1846, 0.2901, 0.4316]) -Greedy action tensor([-1.7077, -0.2608, 0.6436, -0.0187]) tensor([0.0472, 0.2008, 0.4961, 0.2558]) -Greedy action tensor([-1.9185, -0.4264, 0.6576, -0.1664]) tensor([0.0411, 0.1825, 0.5397, 0.2367]) -Greedy action tensor([-1.5879, -0.5066, 0.4740, 0.0234]) tensor([0.0595, 0.1753, 0.4674, 0.2979]) -Greedy action tensor([-1.6839, -0.1045, 0.5295, -0.0487]) tensor([0.0497, 0.2411, 0.4544, 0.2549]) -Greedy action tensor([-1.9201, -0.3966, 0.6521, -0.1573]) tensor([0.0408, 0.1872, 0.5342, 0.2378]) -Greedy action tensor([-1.8478, -0.3759, 0.6197, -0.1248]) tensor([0.0440, 0.1915, 0.5183, 0.2462]) -Greedy action tensor([-1.9102, -0.4463, 0.6501, -0.1641]) tensor([0.0417, 0.1802, 0.5393, 0.2389]) -Greedy action tensor([-1.7784, -0.3145, 0.5678, -0.1219]) tensor([0.0476, 0.2058, 0.4972, 0.2495]) -Greedy action tensor([-1.6521, -0.4404, 0.5189, -0.0819]) tensor([0.0558, 0.1873, 0.4889, 0.2681]) -Greedy action tensor([-1.6217, -0.4281, 0.5104, 0.0095]) tensor([0.0560, 0.1849, 0.4726, 0.2864]) -Greedy action tensor([-1.9250, -0.4361, 0.6569, -0.1697]) tensor([0.0409, 0.1814, 0.5410, 0.2367]) -Greedy action tensor([-1.6738, -0.5488, 0.5438, -0.0569]) tensor([0.0546, 0.1683, 0.5018, 0.2752]) -Greedy action tensor([-1.9255, -0.4251, 0.6552, -0.1643]) tensor([0.0408, 0.1829, 0.5389, 0.2374]) -Greedy action tensor([-1.7585, -0.2397, 0.5360, -0.0632]) tensor([0.0478, 0.2181, 0.4738, 0.2603]) -Greedy action tensor([-1.6961, -0.5119, 0.5431, -0.0206]) tensor([0.0526, 0.1721, 0.4941, 0.2812]) -Greedy action tensor([-1.7577e+00, -2.3026e-01, 6.1592e-01, 1.4321e-03]) tensor([0.0451, 0.2080, 0.4847, 0.2622]) -Greedy action tensor([-1.8911, -0.4492, 0.6431, -0.1578]) tensor([0.0426, 0.1800, 0.5366, 0.2409]) -Greedy action tensor([-1.9276, -0.4350, 0.6595, -0.1718]) tensor([0.0408, 0.1814, 0.5419, 0.2360]) -Greedy action tensor([-1.8347, -0.4461, 0.6329, -0.1122]) tensor([0.0446, 0.1790, 0.5265, 0.2499]) -Greedy action tensor([-1.7536, -0.4428, 0.5648, -0.1136]) tensor([0.0499, 0.1852, 0.5074, 0.2575]) -Greedy action tensor([-1.8885, -0.4405, 0.6378, -0.1567]) tensor([0.0427, 0.1817, 0.5342, 0.2414]) -Greedy action tensor([-1.7597, -0.3085, 0.5544, -0.0424]) tensor([0.0477, 0.2037, 0.4828, 0.2658]) -Greedy action tensor([-1.9077, -0.4441, 0.6505, -0.1614]) tensor([0.0417, 0.1803, 0.5388, 0.2392]) -Greedy action tensor([-1.9351, -0.4422, 0.6631, -0.1748]) tensor([0.0405, 0.1801, 0.5440, 0.2354]) -Greedy action tensor([-1.9275, -0.4456, 0.6607, -0.1703]) tensor([0.0408, 0.1796, 0.5430, 0.2365]) -Greedy action tensor([-1.7877, -0.2920, 0.5962, -0.1068]) tensor([0.0461, 0.2058, 0.5003, 0.2477]) -Greedy action tensor([-1.8877, -0.3732, 0.6308, -0.1509]) tensor([0.0423, 0.1924, 0.5251, 0.2403]) -Greedy action tensor([-1.9458, -0.4481, 0.6672, -0.1808]) tensor([0.0401, 0.1792, 0.5466, 0.2341]) -Greedy action tensor([-1.9290, -0.9790, 0.7894, -0.0803]) tensor([0.0399, 0.1030, 0.6040, 0.2531]) -Greedy action tensor([-1.7107, -0.5177, 0.7010, 0.1478]) tensor([0.0457, 0.1508, 0.5101, 0.2934]) -Greedy action tensor([-0.9706, 0.2829, 0.4080, 0.1823]) tensor([0.0859, 0.3009, 0.3410, 0.2721]) -Greedy action tensor([-1.9329, -0.4548, 0.6650, -0.1739]) tensor([0.0406, 0.1780, 0.5456, 0.2358]) -Greedy action tensor([-1.7920, -0.3778, 0.6547, -0.0330]) tensor([0.0445, 0.1831, 0.5140, 0.2584]) -Greedy action tensor([-1.9233, -0.4251, 0.6610, -0.1537]) tensor([0.0407, 0.1819, 0.5389, 0.2386]) -Greedy action tensor([-0.6827, 0.0654, 0.1903, 0.1414]) tensor([0.1284, 0.2714, 0.3075, 0.2928]) -Greedy action tensor([-1.1743, -0.3006, 0.2483, 0.4470]) tensor([0.0793, 0.1901, 0.3291, 0.4014]) -Greedy action tensor([-1.8198, -0.3441, 0.6365, -0.0963]) tensor([0.0442, 0.1932, 0.5151, 0.2475]) -Greedy action tensor([-1.9110, -0.4305, 0.6466, -0.1646]) tensor([0.0416, 0.1829, 0.5370, 0.2386]) -Greedy action tensor([-1.8775, -0.8494, 0.8457, -0.0240]) tensor([0.0394, 0.1100, 0.5994, 0.2512]) -Greedy action tensor([-1.9136, -0.3831, 0.6481, -0.1627]) tensor([0.0411, 0.1898, 0.5324, 0.2366]) -Greedy action tensor([-1.7813, -0.3665, 0.6557, -0.0953]) tensor([0.0456, 0.1875, 0.5211, 0.2459]) -Greedy action tensor([-1.9295, -0.4368, 0.6619, -0.1724]) tensor([0.0407, 0.1809, 0.5428, 0.2356]) -Greedy action tensor([-1.8652, -0.4846, 0.6243, -0.1409]) tensor([0.0442, 0.1757, 0.5324, 0.2477]) -Greedy action tensor([-0.6042, -0.5555, 0.1607, 0.0675]) tensor([0.1624, 0.1705, 0.3490, 0.3180]) -Greedy action tensor([-1.8795, -0.3643, 0.6339, -0.1376]) tensor([0.0424, 0.1928, 0.5231, 0.2418]) -Greedy action tensor([-1.8467, -0.4254, 0.6166, -0.1330]) tensor([0.0446, 0.1846, 0.5234, 0.2474]) -Greedy action tensor([-1.8854, -0.4027, 0.6385, -0.1438]) tensor([0.0424, 0.1867, 0.5290, 0.2419]) -Greedy action tensor([-1.9316, -0.4664, 0.6901, -0.1673]) tensor([0.0401, 0.1737, 0.5520, 0.2342]) -Greedy action tensor([-1.8672, -0.4484, 0.6305, -0.1481]) tensor([0.0437, 0.1807, 0.5316, 0.2440]) -Greedy action tensor([ 1.7610, -0.9266, -0.0787, 0.3759]) tensor([0.6769, 0.0461, 0.1075, 0.1694]) -Greedy action tensor([ 1.5890, -0.2801, -1.0212, 0.0637]) tensor([0.6919, 0.1067, 0.0509, 0.1505]) -Greedy action tensor([ 1.2765, 0.2911, -0.6805, 0.3831]) tensor([0.5198, 0.1940, 0.0734, 0.2127]) -Greedy action tensor([ 1.1872, 0.1366, -0.5785, 0.5983]) tensor([0.4818, 0.1685, 0.0824, 0.2673]) -Greedy action tensor([ 0.8603, -0.4173, -0.2662, 0.4717]) tensor([0.4384, 0.1222, 0.1421, 0.2973]) -Greedy action tensor([ 1.2043, -0.5935, -0.2902, 0.3540]) tensor([0.5503, 0.0912, 0.1235, 0.2351]) -Greedy action tensor([ 1.1422, -0.3294, -0.1961, -0.1529]) tensor([0.5664, 0.1300, 0.1485, 0.1551]) -Greedy action tensor([ 1.9867, -0.7324, -0.4827, 0.1972]) tensor([0.7589, 0.0500, 0.0642, 0.1268]) -Greedy action tensor([ 1.7148, -0.6374, -0.2344, 0.7928]) tensor([0.6115, 0.0582, 0.0871, 0.2432]) -Greedy action tensor([ 1.0029, -0.3082, -0.0153, 0.3125]) tensor([0.4690, 0.1264, 0.1694, 0.2351]) -Greedy action tensor([ 1.3722, -0.2962, -0.7000, 0.1270]) tensor([0.6241, 0.1177, 0.0786, 0.1797]) -Greedy action tensor([ 1.5074, -1.1197, -0.1381, 0.4577]) tensor([0.6191, 0.0448, 0.1194, 0.2167]) -Greedy action tensor([ 1.5349, -0.9859, -0.2127, 0.6872]) tensor([0.5942, 0.0478, 0.1035, 0.2546]) -Greedy action tensor([ 1.0247, -0.5841, -0.2304, 0.9101]) tensor([0.4207, 0.0842, 0.1199, 0.3751]) -Greedy action tensor([ 1.7531, -1.0565, -0.5455, 0.8231]) tensor([0.6430, 0.0387, 0.0646, 0.2537]) -Greedy action tensor([ 2.1293, -1.2862, -0.2351, 0.7814]) tensor([0.7212, 0.0237, 0.0678, 0.1874]) -Greedy action tensor([ 1.0641, -0.5852, -0.2331, 0.2656]) tensor([0.5221, 0.1003, 0.1427, 0.2349]) -Greedy action tensor([ 1.6530, -0.8616, -0.1678, 0.4017]) tensor([0.6541, 0.0529, 0.1059, 0.1871]) -Greedy action tensor([ 1.9422, -0.4270, -0.7207, 1.0012]) tensor([0.6437, 0.0602, 0.0449, 0.2512]) -Greedy action tensor([ 1.4792, -0.6389, -0.5479, 0.1031]) tensor([0.6646, 0.0799, 0.0876, 0.1679]) -Greedy action tensor([ 1.5656, -0.1541, -0.3712, 0.5115]) tensor([0.5982, 0.1071, 0.0862, 0.2085]) -Greedy action tensor([ 1.6589, -0.7852, -0.2666, 0.3325]) tensor([0.6675, 0.0579, 0.0973, 0.1772]) -Greedy action tensor([ 3.4291, 1.4464, 0.4806, -0.2379]) tensor([0.8226, 0.1133, 0.0431, 0.0210]) -Greedy action tensor([ 1.8833, -0.5473, -0.1384, 0.3521]) tensor([0.6960, 0.0612, 0.0922, 0.1505]) -Greedy action tensor([ 1.2206, -0.0934, -0.7893, 0.2302]) tensor([0.5636, 0.1515, 0.0755, 0.2094]) -Greedy action tensor([ 1.7136, -0.6405, -0.3475, 0.3326]) tensor([0.6786, 0.0645, 0.0864, 0.1706]) -Greedy action tensor([ 2.0753, -0.2572, -0.9079, 0.5144]) tensor([0.7366, 0.0715, 0.0373, 0.1546]) -Greedy action tensor([ 1.5051, -0.5956, -0.1402, 0.1805]) tensor([0.6324, 0.0774, 0.1220, 0.1682]) -Greedy action tensor([ 1.1332, -0.5613, -0.5085, 0.6616]) tensor([0.4997, 0.0918, 0.0968, 0.3118]) -Greedy action tensor([ 1.8346, -0.7543, -0.3509, 0.4884]) tensor([0.6907, 0.0519, 0.0777, 0.1797]) -Greedy action tensor([ 1.4029, -0.5932, -0.2141, 0.6105]) tensor([0.5596, 0.0760, 0.1111, 0.2533]) -Greedy action tensor([ 1.8569, -0.4475, -0.5772, 0.4295]) tensor([0.7006, 0.0699, 0.0614, 0.1681]) -Greedy action tensor([ 1.2952, 0.0979, -0.5163, 0.3442]) tensor([0.5400, 0.1631, 0.0882, 0.2086]) -Greedy action tensor([ 1.4942, -0.2977, -0.8775, 0.5236]) tensor([0.6102, 0.1017, 0.0569, 0.2312]) -Greedy action tensor([ 1.7685, -0.7946, -1.0693, 0.3815]) tensor([0.7218, 0.0556, 0.0423, 0.1803]) -Greedy action tensor([ 1.3456, -0.6007, -0.4600, 0.1277]) tensor([0.6238, 0.0891, 0.1025, 0.1845]) -Greedy action tensor([ 1.3551, -0.5585, -0.0692, 0.2758]) tensor([0.5787, 0.0854, 0.1393, 0.1967]) -Greedy action tensor([ 1.5559, -0.2141, -1.0417, 0.1664]) tensor([0.6693, 0.1140, 0.0498, 0.1668]) -Greedy action tensor([ 1.3037, -0.2189, -0.6151, 0.0377]) tensor([0.6072, 0.1325, 0.0891, 0.1712]) -Greedy action tensor([ 1.8797, -1.0157, -0.1496, 0.5100]) tensor([0.6940, 0.0384, 0.0912, 0.1764]) -Greedy action tensor([ 1.7491, -0.5983, -1.0118, 0.2439]) tensor([0.7242, 0.0692, 0.0458, 0.1608]) -Greedy action tensor([ 2.0269, -1.5931, 0.0202, 0.2050]) tensor([0.7559, 0.0202, 0.1016, 0.1222]) -Greedy action tensor([ 2.2795, -1.3967, -0.1684, 0.6074]) tensor([0.7694, 0.0195, 0.0665, 0.1445]) -Greedy action tensor([ 1.4022, -0.3899, -0.8730, 0.7381]) tensor([0.5605, 0.0934, 0.0576, 0.2885]) -Greedy action tensor([ 1.5246, -0.2529, -0.6973, 0.2490]) tensor([0.6424, 0.1086, 0.0696, 0.1794]) -Greedy action tensor([ 1.3631, -0.5297, -0.5322, 0.4761]) tensor([0.5838, 0.0880, 0.0877, 0.2405]) -Greedy action tensor([ 1.1114, 0.0591, -1.1483, 0.6183]) tensor([0.4844, 0.1691, 0.0506, 0.2959]) -Greedy action tensor([ 1.2811, -0.3535, -0.9379, 0.7853]) tensor([0.5228, 0.1020, 0.0568, 0.3184]) -Greedy action tensor([ 1.3797, 0.1140, -0.2758, 0.4302]) tensor([0.5377, 0.1516, 0.1027, 0.2080]) -Greedy action tensor([ 1.6845, -0.0473, -0.5578, 0.3297]) tensor([0.6489, 0.1148, 0.0689, 0.1674]) -Greedy action tensor([ 1.7016, -0.3669, -0.1336, 0.4824]) tensor([0.6323, 0.0799, 0.1009, 0.1868]) -Greedy action tensor([ 1.3128, -0.1857, -0.2877, 0.0785]) tensor([0.5827, 0.1302, 0.1176, 0.1696]) -Greedy action tensor([ 1.6786, -0.3804, -0.5700, 0.1744]) tensor([0.6871, 0.0877, 0.0725, 0.1527]) -Greedy action tensor([ 1.7998, -0.8231, -0.3779, 0.6102]) tensor([0.6710, 0.0487, 0.0760, 0.2042]) -Greedy action tensor([ 1.8820, -0.6983, -0.3944, 0.3301]) tensor([0.7193, 0.0545, 0.0738, 0.1524]) -Greedy action tensor([ 1.5650, -0.5873, -0.3437, 0.6277]) tensor([0.6038, 0.0702, 0.0895, 0.2365]) -Greedy action tensor([ 1.7243, -0.8588, -0.1722, 0.6037]) tensor([0.6444, 0.0487, 0.0967, 0.2101]) -Greedy action tensor([ 0.9196, -0.0368, -0.0668, -0.1232]) tensor([0.4740, 0.1821, 0.1768, 0.1671]) -Greedy action tensor([ 1.0783, -0.4760, 0.2399, 0.1242]) tensor([0.4929, 0.1042, 0.2131, 0.1898]) -Greedy action tensor([ 1.5539, -0.2848, -0.6781, 0.6056]) tensor([0.6047, 0.0962, 0.0649, 0.2343]) -Greedy action tensor([ 1.6874, -0.9068, -0.2557, 0.4667]) tensor([0.6609, 0.0494, 0.0947, 0.1950]) -Greedy action tensor([ 1.0753, 0.1619, -0.7847, 0.2179]) tensor([0.5048, 0.2025, 0.0786, 0.2142]) -Greedy action tensor([ 1.5373, -0.5134, -0.7581, 0.1391]) tensor([0.6773, 0.0871, 0.0682, 0.1673]) -Greedy action tensor([ 1.7164, 0.5648, -0.4558, 0.3122]) tensor([0.5968, 0.1887, 0.0680, 0.1466]) -Greedy action tensor([ 1.7884, -0.4787, -0.3386, 0.5323]) tensor([0.6633, 0.0687, 0.0791, 0.1889]) -Greedy action tensor([ 1.0695, -0.4461, -0.0393, 0.7460]) tensor([0.4399, 0.0966, 0.1451, 0.3183]) -Greedy action tensor([ 1.5552, -0.9529, -0.4822, 0.6313]) tensor([0.6216, 0.0506, 0.0810, 0.2468]) -Greedy action tensor([ 1.0304, -0.3580, -0.1128, 0.2169]) tensor([0.4971, 0.1240, 0.1585, 0.2204]) -Greedy action tensor([ 1.2814, -0.1474, -0.4554, 0.2391]) tensor([0.5655, 0.1355, 0.0996, 0.1994]) -Greedy action tensor([ 1.0523, -0.0561, -0.1450, 0.1702]) tensor([0.4888, 0.1613, 0.1476, 0.2023]) -Greedy action tensor([ 1.1208, -0.4505, -0.2887, 0.6075]) tensor([0.4877, 0.1013, 0.1191, 0.2919]) -Greedy action tensor([ 1.4587, -0.7509, -0.2889, 0.3312]) tensor([0.6220, 0.0683, 0.1083, 0.2014]) -Greedy action tensor([ 1.3952, -0.3813, -1.0457, 0.0800]) tensor([0.6559, 0.1110, 0.0571, 0.1760]) -Greedy action tensor([ 1.7393, -0.9871, -0.2228, -0.0723]) tensor([0.7302, 0.0478, 0.1026, 0.1193]) -Greedy action tensor([ 2.0122, -0.8259, -0.0681, 0.1029]) tensor([0.7510, 0.0440, 0.0938, 0.1113]) -Greedy action tensor([ 1.4126, -0.2364, -0.3078, 0.5866]) tensor([0.5528, 0.1063, 0.0989, 0.2420]) -Greedy action tensor([ 1.5450, -0.7114, -0.2917, 0.5460]) tensor([0.6126, 0.0642, 0.0976, 0.2256]) -Greedy action tensor([ 1.5006, -0.7151, -0.0557, 0.5026]) tensor([0.5922, 0.0646, 0.1249, 0.2183]) -Greedy action tensor([ 1.3334, -0.7125, -0.3702, 0.8364]) tensor([0.5209, 0.0673, 0.0948, 0.3169]) -Greedy action tensor([ 1.5217, -0.8059, -0.3521, 0.4195]) tensor([0.6316, 0.0616, 0.0970, 0.2098]) -Greedy action tensor([ 1.0062, -0.2270, -0.4447, 0.2037]) tensor([0.5066, 0.1476, 0.1187, 0.2271]) -Greedy action tensor([ 0.5148, -1.1450, 0.4881, 0.1529]) tensor([0.3496, 0.0665, 0.3404, 0.2435]) -Greedy action tensor([-0.7148, -0.9221, -0.0175, -0.0185]) tensor([0.1716, 0.1395, 0.3446, 0.3443]) -Greedy action tensor([ 0.9636, -0.5829, -0.5310, 0.6048]) tensor([0.4682, 0.0997, 0.1050, 0.3270]) -Greedy action tensor([ 0.2464, -1.2742, 0.2886, 0.4585]) tensor([0.2859, 0.0625, 0.2982, 0.3534]) -Greedy action tensor([ 1.4877, -0.4996, -0.3612, 0.6973]) tensor([0.5721, 0.0784, 0.0900, 0.2595]) -Greedy action tensor([-0.5112, -0.7849, 0.4179, -0.3254]) tensor([0.1819, 0.1384, 0.4607, 0.2191]) -Greedy action tensor([ 0.3081, -1.3880, -0.2269, 0.0078]) tensor([0.3985, 0.0731, 0.2334, 0.2951]) -Greedy action tensor([-0.6907, -0.0498, 0.3768, -1.0670]) tensor([0.1540, 0.2924, 0.4479, 0.1057]) -Greedy action tensor([-0.3790, -0.0184, 0.6141, -0.3352]) tensor([0.1619, 0.2321, 0.4369, 0.1691]) -Greedy action tensor([ 0.3810, 0.1504, -0.7693, -0.4503]) tensor([0.3928, 0.3119, 0.1243, 0.1710]) -Greedy action tensor([-0.6048, -0.0846, 0.4409, -0.9513]) tensor([0.1604, 0.2698, 0.4564, 0.1134]) -Greedy action tensor([-0.3652, 0.1454, -0.5911, 0.2816]) tensor([0.1861, 0.3101, 0.1485, 0.3553]) -Greedy action tensor([ 0.8949, -1.3400, -0.2690, 0.6876]) tensor([0.4480, 0.0479, 0.1399, 0.3641]) -Greedy action tensor([ 0.9311, -0.3394, -0.3818, -0.2952]) tensor([0.5426, 0.1523, 0.1460, 0.1592]) -Greedy action tensor([ 0.6626, -0.5401, -0.6084, 0.0622]) tensor([0.4696, 0.1411, 0.1317, 0.2576]) -Greedy action tensor([ 0.1484, -0.8140, -1.1967, 0.7283]) tensor([0.2917, 0.1114, 0.0760, 0.5209]) -Greedy action tensor([1.5515, 0.5531, 1.2045, 0.6913]) tensor([0.4003, 0.1475, 0.2829, 0.1693]) -Greedy action tensor([-0.9077, -0.7334, 0.3986, -0.7222]) tensor([0.1411, 0.1680, 0.5210, 0.1699]) -Greedy action tensor([ 1.2819, -0.2796, 0.0288, 0.8652]) tensor([0.4641, 0.0974, 0.1326, 0.3060]) -Greedy action tensor([-0.1374, 0.4930, 0.2511, -0.1980]) tensor([0.1889, 0.3548, 0.2786, 0.1778]) -Greedy action tensor([-0.2897, -0.0335, -0.9901, -0.1519]) tensor([0.2541, 0.3282, 0.1261, 0.2916]) -Greedy action tensor([-0.4808, -0.2222, -0.2380, -0.0492]) tensor([0.1957, 0.2535, 0.2495, 0.3013]) -Greedy action tensor([ 1.1623, 0.3742, 0.7397, -0.3382]) tensor([0.4286, 0.1949, 0.2809, 0.0956]) -Greedy action tensor([ 0.1784, 0.3489, -0.0678, -0.3553]) tensor([0.2814, 0.3337, 0.2200, 0.1650]) -Greedy action tensor([-0.0233, -0.8436, 0.3978, -0.3083]) tensor([0.2691, 0.1185, 0.4100, 0.2024]) -Greedy action tensor([-0.2606, -1.0879, -0.6614, -0.5441]) tensor([0.3496, 0.1529, 0.2342, 0.2633]) -Greedy action tensor([-0.6911, 0.5768, -0.5642, 0.0759]) tensor([0.1275, 0.4531, 0.1448, 0.2746]) -Greedy action tensor([ 0.0536, -1.5820, 0.3780, 0.3823]) tensor([0.2521, 0.0491, 0.3487, 0.3502]) -Greedy action tensor([ 0.0855, -0.5188, -0.2956, -0.6899]) tensor([0.3717, 0.2031, 0.2539, 0.1712]) -Greedy action tensor([-0.3616, -0.6660, 0.0261, 0.3234]) tensor([0.1925, 0.1420, 0.2837, 0.3818]) -Greedy action tensor([ 0.8415, -0.6549, 0.9250, -0.3716]) tensor([0.3834, 0.0859, 0.4168, 0.1140]) -Greedy action tensor([-2.0351, -0.2651, 1.3139, -0.8997]) tensor([0.0260, 0.1527, 0.7404, 0.0809]) -Greedy action tensor([ 0.2969, -0.1012, -0.4215, 1.1062]) tensor([0.2270, 0.1525, 0.1107, 0.5099]) -Greedy action tensor([-1.2735, -0.9559, 0.1271, -0.7849]) tensor([0.1240, 0.1704, 0.5033, 0.2022]) -Greedy action tensor([ 0.0919, -0.5465, 0.4829, -0.3498]) tensor([0.2740, 0.1447, 0.4051, 0.1762]) -Greedy action tensor([ 1.2085, -0.6470, 0.9985, 0.8903]) tensor([0.3711, 0.0580, 0.3008, 0.2700]) -Greedy action tensor([ 0.1164, 0.5561, -0.0822, -0.3398]) tensor([0.2496, 0.3875, 0.2047, 0.1582]) -Greedy action tensor([ 0.8796, -0.4022, -0.6003, 0.4309]) tensor([0.4665, 0.1295, 0.1062, 0.2978]) -Greedy action tensor([ 1.0209, -1.3076, 0.2955, 0.2584]) tensor([0.4883, 0.0476, 0.2364, 0.2278]) -Greedy action tensor([-0.0974, -0.8958, -0.3761, -0.0706]) tensor([0.3092, 0.1392, 0.2340, 0.3176]) -Greedy action tensor([-0.3686, -0.7416, -0.5077, -0.3973]) tensor([0.2833, 0.1951, 0.2465, 0.2752]) -Greedy action tensor([ 0.6556, -0.2579, -0.1586, 0.5510]) tensor([0.3643, 0.1461, 0.1614, 0.3281]) -Greedy action tensor([-0.2602, -1.0410, -0.0395, 0.7398]) tensor([0.1844, 0.0845, 0.2299, 0.5012]) -Greedy action tensor([ 1.2270, -0.0891, 0.9043, 0.5130]) tensor([0.4029, 0.1080, 0.2918, 0.1973]) -Greedy action tensor([ 1.1256, -0.4187, 2.0363, 0.0186]) tensor([0.2481, 0.0530, 0.6169, 0.0820]) -Greedy action tensor([ 0.2768, -0.1959, -0.5438, 0.7273]) tensor([0.2753, 0.1716, 0.1212, 0.4320]) -Greedy action tensor([ 0.3025, -1.0352, -0.8155, -0.2174]) tensor([0.4579, 0.1202, 0.1497, 0.2723]) -Greedy action tensor([-0.6430, -0.4958, 0.0043, -0.3052]) tensor([0.1828, 0.2118, 0.3492, 0.2562]) -Greedy action tensor([-0.2740, -1.0118, 0.3236, -0.7056]) tensor([0.2534, 0.1212, 0.4607, 0.1646]) -Greedy action tensor([ 0.4285, 0.1523, -0.5034, 1.6001]) tensor([0.1859, 0.1410, 0.0732, 0.5999]) -Greedy action tensor([-0.0979, -0.3467, 0.0338, -0.9145]) tensor([0.2974, 0.2319, 0.3393, 0.1314]) -Greedy action tensor([ 0.6272, -0.1608, 0.1250, 0.6299]) tensor([0.3265, 0.1485, 0.1976, 0.3274]) -Greedy action tensor([-0.6284, -1.0225, 0.2419, -0.7986]) tensor([0.2038, 0.1375, 0.4867, 0.1720]) -Greedy action tensor([ 0.9302, -0.4125, 0.5581, 0.0720]) tensor([0.4212, 0.1100, 0.2903, 0.1785]) -Greedy action tensor([-0.6528, -0.4970, 1.4555, -0.7204]) tensor([0.0882, 0.1031, 0.7263, 0.0824]) -Greedy action tensor([-0.3341, 0.0428, -0.5577, -0.6082]) tensor([0.2489, 0.3628, 0.1990, 0.1892]) -Greedy action tensor([ 0.5304, -1.1493, 0.3418, 0.0291]) tensor([0.3817, 0.0711, 0.3160, 0.2312]) -Greedy action tensor([-0.5194, 0.0456, 0.1117, -0.6511]) tensor([0.1813, 0.3190, 0.3408, 0.1589]) -Greedy action tensor([ 0.6777, -0.5047, -0.5345, 0.8316]) tensor([0.3609, 0.1106, 0.1074, 0.4210]) -Greedy action tensor([-0.4500, -0.8448, 0.5188, -0.5151]) tensor([0.1906, 0.1285, 0.5023, 0.1786]) -Greedy action tensor([ 0.1421, -1.6754, 0.1161, -0.0812]) tensor([0.3405, 0.0553, 0.3318, 0.2724]) -Greedy action tensor([ 0.0624, -1.3157, 0.7119, -1.0292]) tensor([0.2855, 0.0720, 0.5467, 0.0958]) -Greedy action tensor([ 0.6283, -0.7984, 0.8677, 0.0655]) tensor([0.3247, 0.0780, 0.4125, 0.1849]) -Greedy action tensor([ 0.7453, -0.2262, 0.4556, -0.4568]) tensor([0.4119, 0.1559, 0.3083, 0.1238]) -Greedy action tensor([-0.6551, -0.8495, -0.6011, -0.4129]) tensor([0.2408, 0.1983, 0.2542, 0.3068]) -Greedy action tensor([ 0.0194, 0.1562, 0.4191, -0.5574]) tensor([0.2381, 0.2730, 0.3551, 0.1338]) -Greedy action tensor([ 0.3837, -1.2248, -0.4882, 0.3995]) tensor([0.3796, 0.0760, 0.1587, 0.3857]) -Greedy action tensor([ 1.0423, 0.1829, -0.2503, 0.4804]) tensor([0.4409, 0.1867, 0.1211, 0.2514]) -Greedy action tensor([-0.1125, -0.5424, 0.4487, 0.3167]) tensor([0.2025, 0.1317, 0.3549, 0.3110]) -Greedy action tensor([ 0.7247, -0.8061, -0.5583, -0.9765]) tensor([0.5966, 0.1291, 0.1654, 0.1089]) -Greedy action tensor([-0.6953, 0.8704, -0.3402, -0.6934]) tensor([0.1217, 0.5827, 0.1736, 0.1220]) -Greedy action tensor([-0.7526, -0.7308, 0.2881, -0.8813]) tensor([0.1745, 0.1783, 0.4939, 0.1534]) -Greedy action tensor([ 1.2426, -0.7404, -0.0919, 0.7472]) tensor([0.4974, 0.0685, 0.1310, 0.3031]) -Greedy action tensor([-0.1658, -0.3438, -1.2380, 0.3169]) tensor([0.2632, 0.2203, 0.0901, 0.4265]) -Greedy action tensor([ 0.1425, -1.4138, 1.2557, -0.6401]) tensor([0.2122, 0.0448, 0.6460, 0.0970]) -Greedy action tensor([ 1.0885, -0.1613, 0.2460, 0.4664]) tensor([0.4437, 0.1271, 0.1910, 0.2382]) -Greedy action tensor([ 0.5987, -0.4171, -0.5169, 0.4817]) tensor([0.3877, 0.1404, 0.1270, 0.3449]) -Greedy action tensor([ 0.7188, -0.7985, 1.3139, 1.4922]) tensor([0.1923, 0.0422, 0.3487, 0.4168]) -Greedy action tensor([-0.3134, 0.1310, 0.1169, -0.7657]) tensor([0.2113, 0.3295, 0.3249, 0.1344]) -Greedy action tensor([ 0.8072, -0.5471, 0.3088, 0.0583]) tensor([0.4276, 0.1104, 0.2598, 0.2022]) -Greedy action tensor([-0.7909, -0.7361, 0.0018, -0.7617]) tensor([0.1889, 0.1995, 0.4172, 0.1944]) -Greedy action tensor([ 1.0573, -0.5271, -0.1710, -0.3600]) tensor([0.5746, 0.1178, 0.1683, 0.1393]) -Greedy action tensor([ 0.5281, -0.1372, -0.0831, -0.3707]) tensor([0.4059, 0.2087, 0.2203, 0.1652]) -Greedy action tensor([ 0.5111, 0.1815, -0.0485, 0.0436]) tensor([0.3428, 0.2465, 0.1959, 0.2148]) -Greedy action tensor([ 0.7298, -0.4928, -0.1684, -0.3399]) tensor([0.4890, 0.1440, 0.1992, 0.1678]) -Greedy action tensor([ 1.4211, -0.7917, -0.0752, -0.5486]) tensor([0.6790, 0.0743, 0.1521, 0.0947]) -Greedy action tensor([ 0.4986, 0.1267, -0.1860, -0.1709]) tensor([0.3696, 0.2548, 0.1864, 0.1892]) -Greedy action tensor([ 0.3181, -0.0207, -0.0935, -0.1213]) tensor([0.3312, 0.2360, 0.2194, 0.2134]) -Greedy action tensor([ 0.7404, -0.5967, 0.0554, -0.2314]) tensor([0.4662, 0.1224, 0.2350, 0.1764]) -Greedy action tensor([ 0.9690, -0.7893, 0.0272, -0.3503]) tensor([0.5466, 0.0942, 0.2131, 0.1461]) -Greedy action tensor([ 0.7058, -0.6744, -0.0253, -0.5784]) tensor([0.4976, 0.1251, 0.2395, 0.1378]) -Greedy action tensor([ 0.9307, -0.7637, 0.0444, -0.5197]) tensor([0.5463, 0.1004, 0.2252, 0.1281]) -Greedy action tensor([ 0.8968, -0.5080, -0.0961, -0.2329]) tensor([0.5157, 0.1266, 0.1911, 0.1666]) -Greedy action tensor([ 0.9430, -0.6000, 0.1356, -0.3286]) tensor([0.5154, 0.1102, 0.2299, 0.1445]) -Greedy action tensor([ 0.6405, 0.0236, 0.0240, -0.0026]) tensor([0.3839, 0.2071, 0.2072, 0.2018]) -Greedy action tensor([ 0.7605, -0.7629, 0.0748, -0.3540]) tensor([0.4879, 0.1063, 0.2457, 0.1601]) -Greedy action tensor([ 0.9351, -0.3433, 0.0872, -0.0727]) tensor([0.4827, 0.1344, 0.2067, 0.1762]) -Greedy action tensor([ 0.5213, -0.1676, 0.2236, -0.4483]) tensor([0.3811, 0.1914, 0.2830, 0.1445]) -Greedy action tensor([ 0.7253, -0.4627, -0.0471, -0.3332]) tensor([0.4731, 0.1442, 0.2185, 0.1642]) -Greedy action tensor([ 0.8762, -0.5345, 0.0151, -0.5662]) tensor([0.5255, 0.1282, 0.2221, 0.1242]) -Greedy action tensor([ 0.5477, -0.3583, 0.0526, -0.3500]) tensor([0.4130, 0.1669, 0.2518, 0.1683]) -Greedy action tensor([ 0.7262, -0.5415, -0.1425, -0.1655]) tensor([0.4737, 0.1333, 0.1987, 0.1942]) -Greedy action tensor([ 0.6080, -0.3533, 0.0256, -0.1929]) tensor([0.4184, 0.1600, 0.2337, 0.1879]) -Greedy action tensor([ 1.0396, -0.6319, -0.1821, -0.2550]) tensor([0.5693, 0.1070, 0.1678, 0.1560]) -Greedy action tensor([ 0.4950, -0.2460, -0.1274, -0.1804]) tensor([0.3965, 0.1890, 0.2128, 0.2018]) -Greedy action tensor([ 0.8793, -0.6579, -0.1306, -0.1813]) tensor([0.5193, 0.1116, 0.1892, 0.1798]) -Greedy action tensor([ 1.0139, -0.4821, -0.1934, -0.2296]) tensor([0.5521, 0.1237, 0.1651, 0.1592]) -Greedy action tensor([ 0.8237, -0.6135, -0.0634, -0.5545]) tensor([0.5259, 0.1250, 0.2166, 0.1325]) -Greedy action tensor([ 0.7216, -0.6017, -0.0893, -0.1588]) tensor([0.4705, 0.1253, 0.2091, 0.1951]) -Greedy action tensor([ 0.7089, -0.2109, -0.0896, -0.0780]) tensor([0.4340, 0.1730, 0.1953, 0.1976]) -Greedy action tensor([ 1.0183, -0.8482, 0.0119, -0.5097]) tensor([0.5757, 0.0890, 0.2104, 0.1249]) -Greedy action tensor([ 0.8274, -0.2682, -0.0144, -0.0834]) tensor([0.4614, 0.1543, 0.1988, 0.1856]) -Greedy action tensor([ 0.9479, -0.6213, -0.0566, -0.4812]) tensor([0.5513, 0.1148, 0.2019, 0.1320]) -Greedy action tensor([ 0.9361, -0.5201, 0.1147, -0.2861]) tensor([0.5083, 0.1185, 0.2235, 0.1497]) -Greedy action tensor([ 0.6419, -0.3434, -0.0632, -0.1893]) tensor([0.4342, 0.1621, 0.2145, 0.1891]) -Greedy action tensor([ 0.5885, -0.0900, 0.0984, -0.0082]) tensor([0.3744, 0.1900, 0.2294, 0.2062]) -Greedy action tensor([ 0.7024, -0.1725, 0.0640, -0.3628]) tensor([0.4367, 0.1821, 0.2307, 0.1505]) -Greedy action tensor([ 0.8176, -0.5992, -0.1593, -0.7431]) tensor([0.5468, 0.1326, 0.2058, 0.1148]) -Greedy action tensor([ 0.5776, -0.2413, -0.1153, -0.1574]) tensor([0.4131, 0.1822, 0.2066, 0.1981]) -Greedy action tensor([ 0.2636, -0.4296, -0.1362, -0.1437]) tensor([0.3526, 0.1763, 0.2364, 0.2346]) -Greedy action tensor([ 0.2448, -0.1117, -0.0578, -0.0463]) tensor([0.3138, 0.2197, 0.2319, 0.2346]) -Greedy action tensor([ 1.0237, -0.5254, 0.0403, -0.3555]) tensor([0.5440, 0.1156, 0.2035, 0.1370]) -Greedy action tensor([ 0.7894, -0.3145, 0.1087, -0.2476]) tensor([0.4561, 0.1512, 0.2309, 0.1617]) -Greedy action tensor([ 0.6556, -0.2927, -0.0562, -0.0364]) tensor([0.4204, 0.1629, 0.2063, 0.2104]) -Greedy action tensor([ 0.6603, -0.0424, 0.0078, -0.1095]) tensor([0.4034, 0.1998, 0.2100, 0.1868]) -Greedy action tensor([ 0.9466, -0.2826, -0.2176, -0.2740]) tensor([0.5264, 0.1540, 0.1643, 0.1553]) -Greedy action tensor([ 0.8575, -0.7212, -0.0252, -0.2219]) tensor([0.5103, 0.1052, 0.2111, 0.1734]) -Greedy action tensor([ 0.6963, -0.5481, -0.1321, -0.2741]) tensor([0.4753, 0.1370, 0.2076, 0.1801]) -Greedy action tensor([ 0.9790, -0.8974, 0.0380, -0.4772]) tensor([0.5629, 0.0862, 0.2197, 0.1312]) -Greedy action tensor([ 0.6433, -0.2491, 0.0457, -0.1693]) tensor([0.4161, 0.1704, 0.2289, 0.1846]) -Greedy action tensor([ 0.9296, -0.5232, -0.1804, -0.3117]) tensor([0.5398, 0.1263, 0.1779, 0.1560]) -Greedy action tensor([ 1.0055, -0.3870, 0.0834, -0.2361]) tensor([0.5168, 0.1284, 0.2055, 0.1493]) -Greedy action tensor([ 0.7178, -0.4450, -0.1196, -0.1450]) tensor([0.4614, 0.1442, 0.1997, 0.1947]) -Greedy action tensor([ 0.2316, 0.1661, -0.1063, 0.0105]) tensor([0.2897, 0.2713, 0.2067, 0.2323]) -Greedy action tensor([ 0.8505, -0.8622, -0.1217, -0.4389]) tensor([0.5452, 0.0983, 0.2062, 0.1502]) -Greedy action tensor([ 1.0646, -0.9872, 0.2520, -0.6085]) tensor([0.5682, 0.0730, 0.2521, 0.1066]) -Greedy action tensor([ 0.6592, -0.2991, -0.0651, -0.2456]) tensor([0.4400, 0.1687, 0.2132, 0.1780]) -Greedy action tensor([ 1.0159, -0.8738, 0.1040, -0.5622]) tensor([0.5684, 0.0859, 0.2284, 0.1173]) -Greedy action tensor([ 0.8607, 0.0646, -0.1152, -0.2180]) tensor([0.4613, 0.2081, 0.1738, 0.1568]) -Greedy action tensor([ 0.4635, 0.0076, 0.0371, -0.0694]) tensor([0.3480, 0.2206, 0.2272, 0.2042]) -Greedy action tensor([0.2884, 0.0654, 0.1741, 0.2288]) tensor([0.2752, 0.2202, 0.2454, 0.2592]) -Greedy action tensor([ 0.6635, -0.3794, -0.1043, -0.2284]) tensor([0.4492, 0.1583, 0.2084, 0.1841]) -Greedy action tensor([ 0.7568, -0.4326, -0.2030, -0.2309]) tensor([0.4855, 0.1478, 0.1859, 0.1808]) -Greedy action tensor([ 0.7721, -0.7630, 0.0884, -0.7594]) tensor([0.5164, 0.1113, 0.2607, 0.1117]) -Greedy action tensor([ 0.8712, -0.3244, -0.1397, -0.0020]) tensor([0.4798, 0.1452, 0.1746, 0.2004]) -Greedy action tensor([ 0.8941, -0.3573, -0.1694, -0.2337]) tensor([0.5115, 0.1463, 0.1766, 0.1656]) -Greedy action tensor([ 0.7309, -0.4669, -0.0174, -0.2062]) tensor([0.4615, 0.1393, 0.2184, 0.1808]) -Greedy action tensor([ 0.8790, -0.6639, 0.0099, -0.2269]) tensor([0.5092, 0.1088, 0.2135, 0.1685]) -Greedy action tensor([ 0.8710, -0.3076, 0.0902, -0.1657]) tensor([0.4716, 0.1451, 0.2160, 0.1673]) -Greedy action tensor([ 0.6599, -0.3676, -0.1184, -0.2256]) tensor([0.4485, 0.1605, 0.2060, 0.1850]) -Greedy action tensor([ 0.3740, -0.0956, 0.0743, -0.2057]) tensor([0.3417, 0.2137, 0.2532, 0.1914]) -Greedy action tensor([ 0.8912, -0.4218, -0.0314, -0.1476]) tensor([0.4949, 0.1332, 0.1968, 0.1752]) -Greedy action tensor([ 0.5331, 0.2303, -0.4952, -0.2527]) tensor([0.3918, 0.2895, 0.1401, 0.1786]) -Greedy action tensor([ 0.6655, -0.4739, -0.1269, -0.3158]) tensor([0.4656, 0.1490, 0.2108, 0.1745]) -Greedy action tensor([ 0.8839, -0.3880, -0.1446, -0.3846]) tensor([0.5211, 0.1461, 0.1863, 0.1466]) -Greedy action tensor([ 0.7604, -0.5517, -0.0134, -0.2709]) tensor([0.4791, 0.1290, 0.2210, 0.1708]) -Greedy action tensor([ 0.1313, -0.0274, -0.1162, -0.4247]) tensor([0.3118, 0.2660, 0.2434, 0.1788]) -Greedy action tensor([ 0.6194, -0.4051, -0.1004, -0.1653]) tensor([0.4344, 0.1559, 0.2115, 0.1982]) -Greedy action tensor([ 0.3614, -0.2096, -0.0589, -0.2161]) tensor([0.3593, 0.2030, 0.2360, 0.2017]) -Greedy action tensor([ 0.8999, -0.6466, 0.0617, -0.3572]) tensor([0.5181, 0.1104, 0.2241, 0.1474]) -Greedy action tensor([ 0.6634, -0.4055, -0.1162, -0.2063]) tensor([0.4502, 0.1546, 0.2065, 0.1887]) -Greedy action tensor([ 1.0541, -0.2716, -0.2213, -0.5206]) tensor([0.5708, 0.1516, 0.1594, 0.1182]) -Greedy action tensor([ 1.5062, -0.2883, -1.0077, 0.3330]) tensor([0.6425, 0.1068, 0.0520, 0.1988]) -Greedy action tensor([ 2.1616, -0.7859, -0.2641, 0.6437]) tensor([0.7353, 0.0386, 0.0650, 0.1611]) -Greedy action tensor([ 1.6845, -0.0616, 0.1085, 0.4557]) tensor([0.5974, 0.1042, 0.1235, 0.1748]) -Greedy action tensor([ 1.5413, -0.8628, -0.5485, 0.5908]) tensor([0.6248, 0.0564, 0.0773, 0.2415]) -Greedy action tensor([ 1.4037, -0.0901, -0.7803, 0.1611]) tensor([0.6151, 0.1381, 0.0693, 0.1775]) -Greedy action tensor([ 1.7818, -0.1277, -0.8058, 0.0442]) tensor([0.7147, 0.1059, 0.0537, 0.1257]) -Greedy action tensor([ 2.2445, -1.0181, -0.5728, 0.9870]) tensor([0.7234, 0.0277, 0.0432, 0.2057]) -Greedy action tensor([ 1.6657, -0.7567, -0.1653, 0.2045]) tensor([0.6753, 0.0599, 0.1082, 0.1566]) -Greedy action tensor([ 1.3673, -0.6563, -0.3216, 0.5427]) tensor([0.5697, 0.0753, 0.1052, 0.2498]) -Greedy action tensor([ 1.5194, -0.3089, -0.5643, 0.3920]) tensor([0.6215, 0.0999, 0.0774, 0.2013]) -Greedy action tensor([ 1.3314, -0.4852, -0.5869, 0.6562]) tensor([0.5499, 0.0894, 0.0808, 0.2799]) -Greedy action tensor([ 1.6735, -0.3396, -0.2679, 0.0820]) tensor([0.6754, 0.0902, 0.0969, 0.1375]) -Greedy action tensor([ 1.8852, -1.0381, -0.1730, 0.5231]) tensor([0.6956, 0.0374, 0.0888, 0.1782]) -Greedy action tensor([ 2.4381, -1.6172, -0.1774, 0.3440]) tensor([0.8240, 0.0143, 0.0603, 0.1015]) -Greedy action tensor([ 2.4031, -1.2760, -0.3595, 0.7330]) tensor([0.7833, 0.0198, 0.0494, 0.1474]) -Greedy action tensor([ 1.4045, -0.4112, -0.9437, 0.2844]) tensor([0.6311, 0.1027, 0.0603, 0.2059]) -Greedy action tensor([ 1.2769, -0.3003, -0.7356, 0.1241]) tensor([0.6039, 0.1247, 0.0807, 0.1907]) -Greedy action tensor([ 1.4565, 0.4891, -0.7214, 0.1056]) tensor([0.5707, 0.2169, 0.0646, 0.1478]) -Greedy action tensor([ 1.7357, -0.8874, -0.1496, 0.6641]) tensor([0.6382, 0.0463, 0.0969, 0.2186]) -Greedy action tensor([ 1.4389, -0.2204, -0.3080, 0.1876]) tensor([0.6058, 0.1153, 0.1056, 0.1733]) -Greedy action tensor([ 1.8415, -0.6282, -0.2218, 0.5704]) tensor([0.6702, 0.0567, 0.0851, 0.1880]) -Greedy action tensor([ 1.4536, -0.7234, -0.1696, 0.2756]) tensor([0.6178, 0.0700, 0.1219, 0.1902]) -Greedy action tensor([ 2.0649, -1.0482, -0.2974, 0.2993]) tensor([0.7635, 0.0339, 0.0719, 0.1306]) -Greedy action tensor([ 1.3148, -0.5008, -0.4976, 0.4718]) tensor([0.5693, 0.0927, 0.0929, 0.2451]) -Greedy action tensor([ 0.9757, -0.5166, -0.5310, 0.6231]) tensor([0.4653, 0.1046, 0.1031, 0.3270]) -Greedy action tensor([ 1.6422, -0.3048, -0.2954, 0.2119]) tensor([0.6553, 0.0935, 0.0944, 0.1568]) -Greedy action tensor([ 1.2704, -0.4026, -0.2961, 0.0517]) tensor([0.5910, 0.1109, 0.1234, 0.1747]) -Greedy action tensor([ 1.3429, -1.3465, -0.0683, 0.1789]) tensor([0.6158, 0.0418, 0.1501, 0.1923]) -Greedy action tensor([ 2.3228, -0.9513, -0.2524, 0.6796]) tensor([0.7649, 0.0290, 0.0582, 0.1479]) -Greedy action tensor([ 1.4508, -0.2506, -0.3833, 0.5543]) tensor([0.5714, 0.1042, 0.0913, 0.2331]) -Greedy action tensor([ 1.1753, -0.0958, -0.7488, 0.0141]) tensor([0.5748, 0.1613, 0.0839, 0.1800]) -Greedy action tensor([ 1.1843, -0.0900, -0.5903, 0.2590]) tensor([0.5418, 0.1515, 0.0919, 0.2148]) -Greedy action tensor([ 1.5963, -0.5211, -0.6518, -0.0095]) tensor([0.7009, 0.0844, 0.0740, 0.1407]) -Greedy action tensor([ 1.3930, 0.2447, -0.5635, 0.4129]) tensor([0.5453, 0.1730, 0.0771, 0.2046]) -Greedy action tensor([ 1.0583, -0.1901, -0.6984, 0.0920]) tensor([0.5435, 0.1559, 0.0938, 0.2068]) -Greedy action tensor([ 1.5513, -0.0183, -0.8175, 0.6199]) tensor([0.5897, 0.1227, 0.0552, 0.2323]) -Greedy action tensor([ 2.4326, 0.2733, -0.0522, 0.4933]) tensor([0.7449, 0.0860, 0.0621, 0.1071]) -Greedy action tensor([ 1.3110, -0.1219, -0.9640, 0.4235]) tensor([0.5704, 0.1361, 0.0586, 0.2348]) -Greedy action tensor([ 1.6912, -0.2286, -0.1580, -0.0832]) tensor([0.6786, 0.0995, 0.1068, 0.1151]) -Greedy action tensor([ 1.0701, -0.5680, -0.1213, 0.2146]) tensor([0.5200, 0.1011, 0.1580, 0.2210]) -Greedy action tensor([ 1.7418, -0.6698, -0.4749, 0.0572]) tensor([0.7225, 0.0648, 0.0787, 0.1340]) -Greedy action tensor([ 1.6331, -0.1493, -0.9414, 0.5180]) tensor([0.6360, 0.1070, 0.0485, 0.2085]) -Greedy action tensor([ 1.5736, -0.2660, -0.9240, 0.4325]) tensor([0.6408, 0.1018, 0.0527, 0.2047]) -Greedy action tensor([ 1.3245, -0.4751, -0.1944, 0.1553]) tensor([0.5900, 0.0976, 0.1292, 0.1833]) -Greedy action tensor([ 1.0986, 0.1019, -0.6282, 0.5645]) tensor([0.4688, 0.1730, 0.0834, 0.2748]) -Greedy action tensor([ 1.5443, -0.7661, -0.3256, -0.0566]) tensor([0.6872, 0.0682, 0.1059, 0.1386]) -Greedy action tensor([ 1.7189, -0.5263, -1.0482, 0.4142]) tensor([0.6944, 0.0735, 0.0436, 0.1884]) -Greedy action tensor([ 1.8630, -0.6047, -0.4191, 0.3266]) tensor([0.7133, 0.0605, 0.0728, 0.1535]) -Greedy action tensor([ 1.5193, -0.4963, -0.5314, 0.4299]) tensor([0.6257, 0.0834, 0.0805, 0.2105]) -Greedy action tensor([ 1.3950, 0.2838, -0.3610, 0.3550]) tensor([0.5390, 0.1774, 0.0931, 0.1905]) -Greedy action tensor([ 2.0596, -0.9220, -0.5101, 0.1642]) tensor([0.7828, 0.0397, 0.0599, 0.1176]) -Greedy action tensor([ 1.2463, -0.1831, -1.1663, 0.2842]) tensor([0.5844, 0.1399, 0.0524, 0.2233]) -Greedy action tensor([ 1.5254, -0.3659, -0.5358, 0.3343]) tensor([0.6321, 0.0954, 0.0805, 0.1921]) -Greedy action tensor([ 1.5847, -0.3843, -0.4332, 0.5302]) tensor([0.6169, 0.0861, 0.0820, 0.2149]) -Greedy action tensor([ 0.9501, -0.1185, 0.0289, 0.2768]) tensor([0.4441, 0.1526, 0.1768, 0.2265]) -Greedy action tensor([ 1.3921, 0.0360, -0.6954, 0.5250]) tensor([0.5550, 0.1430, 0.0688, 0.2332]) -Greedy action tensor([ 1.4906, -0.8404, -0.6954, 0.1167]) tensor([0.6837, 0.0665, 0.0768, 0.1731]) -Greedy action tensor([ 1.8407, -0.7580, -0.4914, 0.3585]) tensor([0.7150, 0.0532, 0.0694, 0.1624]) -Greedy action tensor([ 1.5557, -0.8681, -0.2849, 0.3774]) tensor([0.6430, 0.0570, 0.1021, 0.1979]) -Greedy action tensor([ 1.8441, -0.0354, -0.1771, 0.3398]) tensor([0.6634, 0.1013, 0.0879, 0.1474]) -Greedy action tensor([ 2.1394, -1.0162, -0.6612, 0.8756]) tensor([0.7215, 0.0307, 0.0439, 0.2039]) -Greedy action tensor([ 1.0085, -0.6390, -0.1871, -0.2095]) tensor([0.5584, 0.1075, 0.1689, 0.1652]) -Greedy action tensor([ 1.5030, -0.4744, -0.4312, 0.4288]) tensor([0.6156, 0.0852, 0.0890, 0.2103]) -Greedy action tensor([ 1.5284, -0.4461, -0.5576, 0.2288]) tensor([0.6512, 0.0904, 0.0809, 0.1776]) -Greedy action tensor([ 1.3193, -0.1793, -1.2350, 0.6032]) tensor([0.5587, 0.1248, 0.0434, 0.2730]) -Greedy action tensor([ 1.6752, -0.8424, -0.6317, 0.3396]) tensor([0.6929, 0.0559, 0.0690, 0.1822]) -Greedy action tensor([ 1.8963, 0.0245, -0.4297, 0.2795]) tensor([0.6896, 0.1061, 0.0674, 0.1369]) -Greedy action tensor([ 1.4093, 0.0696, -0.9041, -0.1057]) tensor([0.6327, 0.1657, 0.0626, 0.1391]) -Greedy action tensor([ 1.6843, -0.8149, -0.6049, 0.3226]) tensor([0.6946, 0.0571, 0.0704, 0.1780]) -Greedy action tensor([ 1.5236, -0.2671, -0.3250, 0.4440]) tensor([0.6010, 0.1003, 0.0946, 0.2042]) -Greedy action tensor([ 1.9133, -0.5122, -0.3450, 0.3259]) tensor([0.7156, 0.0633, 0.0748, 0.1463]) -Greedy action tensor([ 1.8548, -0.8874, -0.3463, 0.3939]) tensor([0.7107, 0.0458, 0.0787, 0.1649]) -Greedy action tensor([ 2.1392, -1.1758, -0.2333, 0.7664]) tensor([0.7231, 0.0263, 0.0674, 0.1832]) -Greedy action tensor([ 1.2665, -0.6673, -0.3398, 0.3527]) tensor([0.5727, 0.0828, 0.1149, 0.2296]) -Greedy action tensor([ 1.1328, -0.4133, -0.2085, 0.5478]) tensor([0.4922, 0.1049, 0.1287, 0.2742]) -Greedy action tensor([ 1.2645, -0.1230, -0.5613, 0.2371]) tensor([0.5654, 0.1412, 0.0911, 0.2024]) -Greedy action tensor([ 1.6102, -0.4401, -0.6169, 0.7454]) tensor([0.6033, 0.0776, 0.0651, 0.2540]) -Greedy action tensor([ 2.0562, -0.0592, 0.3165, 0.1955]) tensor([0.6888, 0.0831, 0.1209, 0.1072]) -Greedy action tensor([ 1.3565, -0.9237, -0.2343, 0.5485]) tensor([0.5709, 0.0584, 0.1163, 0.2544]) -Greedy action tensor([ 1.5453, -0.2386, -1.1598, 0.2923]) tensor([0.6577, 0.1105, 0.0440, 0.1879]) -Greedy action tensor([ 1.3193, -0.6423, 0.2397, 0.2737]) tensor([0.5459, 0.0768, 0.1855, 0.1919]) -Greedy action tensor([-1.9357, -0.4311, 0.6630, -0.1712]) tensor([0.0403, 0.1816, 0.5425, 0.2356]) -Greedy action tensor([-1.8883, -0.3007, 0.6277, -0.1469]) tensor([0.0417, 0.2040, 0.5163, 0.2380]) -Greedy action tensor([-1.9126, -0.4544, 0.6568, -0.1629]) tensor([0.0415, 0.1783, 0.5416, 0.2386]) -Greedy action tensor([-1.8946, -0.4328, 0.6403, -0.1540]) tensor([0.0423, 0.1825, 0.5339, 0.2413]) -Greedy action tensor([-1.7884, -0.3911, 0.6005, -0.0744]) tensor([0.0465, 0.1881, 0.5071, 0.2582]) -Greedy action tensor([-1.8298, -0.4856, 0.6230, -0.0863]) tensor([0.0451, 0.1730, 0.5241, 0.2578]) -Greedy action tensor([-1.9190, -0.4420, 0.6562, -0.1653]) tensor([0.0412, 0.1803, 0.5407, 0.2378]) -Greedy action tensor([-1.8308, -0.4439, 0.5856, -0.1734]) tensor([0.0466, 0.1866, 0.5223, 0.2445]) -Greedy action tensor([-1.9235, -0.4728, 0.6561, -0.1693]) tensor([0.0413, 0.1760, 0.5443, 0.2384]) -Greedy action tensor([-0.2034, 0.6742, 0.1264, 0.6238]) tensor([0.1412, 0.3396, 0.1964, 0.3229]) -Greedy action tensor([-1.9304, -0.4371, 0.6613, -0.1725]) tensor([0.0406, 0.1809, 0.5427, 0.2357]) -Greedy action tensor([-1.7979, -0.3803, 0.6039, -0.0944]) tensor([0.0462, 0.1905, 0.5098, 0.2536]) -Greedy action tensor([-1.9426, -0.4548, 0.6770, -0.1735]) tensor([0.0400, 0.1769, 0.5487, 0.2344]) -Greedy action tensor([-1.8461, -0.3076, 0.5996, -0.2347]) tensor([0.0450, 0.2097, 0.5196, 0.2256]) -Greedy action tensor([-1.3241, 0.1598, 0.3468, -0.0398]) tensor([0.0697, 0.3075, 0.3708, 0.2519]) -Greedy action tensor([-1.9212, -0.4404, 0.6524, -0.1722]) tensor([0.0412, 0.1812, 0.5406, 0.2370]) -Greedy action tensor([-1.6178, -0.2781, 0.4873, -0.0900]) tensor([0.0567, 0.2165, 0.4655, 0.2613]) -Greedy action tensor([-1.3111, 0.3604, 0.3825, -0.1736]) tensor([0.0672, 0.3576, 0.3656, 0.2096]) -Greedy action tensor([-1.7141, -0.4099, 0.5991, -0.2614]) tensor([0.0524, 0.1933, 0.5301, 0.2242]) -Greedy action tensor([-1.8501, -0.4568, 0.7705, 0.1174]) tensor([0.0386, 0.1554, 0.5301, 0.2759]) -Greedy action tensor([-1.9105, -0.4626, 0.6478, -0.1646]) tensor([0.0418, 0.1780, 0.5403, 0.2398]) -Greedy action tensor([-1.4665, -0.2219, 0.5634, 0.1422]) tensor([0.0585, 0.2032, 0.4457, 0.2925]) -Greedy action tensor([-1.8839, -0.4222, 0.6359, -0.1510]) tensor([0.0427, 0.1844, 0.5311, 0.2418]) -Greedy action tensor([-1.2157, -0.1254, 0.4571, -0.4321]) tensor([0.0870, 0.2589, 0.4636, 0.1905]) -Greedy action tensor([-1.9100, -0.4855, 0.7607, -0.1246]) tensor([0.0391, 0.1625, 0.5652, 0.2332]) -Greedy action tensor([-1.7772, -0.3396, 0.5678, -0.1147]) tensor([0.0478, 0.2013, 0.4988, 0.2521]) -Greedy action tensor([-1.3768, -0.5549, 0.3908, 0.1257]) tensor([0.0734, 0.1670, 0.4299, 0.3298]) -Greedy action tensor([-0.8043, -0.2355, 0.2253, 0.0098]) tensor([0.1278, 0.2258, 0.3579, 0.2885]) -Greedy action tensor([-1.9130, -0.4347, 0.6540, -0.1626]) tensor([0.0414, 0.1814, 0.5390, 0.2382]) -Greedy action tensor([-1.8370, -0.1901, 0.5747, -0.1051]) tensor([0.0435, 0.2257, 0.4850, 0.2458]) -Greedy action tensor([-1.9003, -0.4227, 0.6483, -0.1550]) tensor([0.0418, 0.1834, 0.5351, 0.2397]) -Greedy action tensor([-1.9133, -0.4393, 0.6543, -0.1616]) tensor([0.0414, 0.1807, 0.5394, 0.2385]) -Greedy action tensor([-1.8046, -0.3074, 0.4981, -0.1356]) tensor([0.0481, 0.2151, 0.4813, 0.2554]) -Greedy action tensor([-0.4613, -0.0846, 0.4755, 0.7095]) tensor([0.1215, 0.1770, 0.3099, 0.3916]) -Greedy action tensor([-1.4295, -0.3892, 0.4108, -0.0228]) tensor([0.0704, 0.1991, 0.4432, 0.2873]) -Greedy action tensor([-1.3403, -0.2629, 0.3477, 0.1530]) tensor([0.0725, 0.2129, 0.3920, 0.3227]) -Greedy action tensor([-0.6538, -0.4810, 0.1950, 0.0253]) tensor([0.1539, 0.1829, 0.3597, 0.3035]) -Greedy action tensor([-1.6956, -0.4039, 0.5244, -0.0994]) tensor([0.0532, 0.1938, 0.4903, 0.2627]) -Greedy action tensor([-1.5786, -0.4255, 0.6072, -0.2986]) tensor([0.0600, 0.1901, 0.5340, 0.2159]) -Greedy action tensor([-1.4016, -0.2608, 0.5019, 0.1591]) tensor([0.0641, 0.2006, 0.4301, 0.3053]) -Greedy action tensor([-1.9193, -0.4825, 0.7601, -0.1203]) tensor([0.0387, 0.1629, 0.5644, 0.2340]) -Greedy action tensor([-1.6775, 0.2290, 0.4285, -0.0247]) tensor([0.0472, 0.3179, 0.3881, 0.2467]) -Greedy action tensor([-1.4821, 0.2662, 0.3525, 0.1698]) tensor([0.0549, 0.3152, 0.3436, 0.2863]) -Greedy action tensor([-1.5446, -0.3619, 0.5023, 0.0874]) tensor([0.0584, 0.1906, 0.4523, 0.2987]) -Greedy action tensor([-1.8578, -0.3563, 0.6230, -0.1259]) tensor([0.0433, 0.1944, 0.5175, 0.2448]) -Greedy action tensor([-1.9251, -0.4261, 0.6586, -0.1695]) tensor([0.0408, 0.1827, 0.5404, 0.2361]) -Greedy action tensor([-1.2340, -0.0786, 0.5406, 0.2882]) tensor([0.0682, 0.2167, 0.4024, 0.3127]) -Greedy action tensor([-1.8907, -0.1517, 0.5931, -0.1513]) tensor([0.0410, 0.2335, 0.4918, 0.2336]) -Greedy action tensor([-1.6857, -0.4268, 0.5831, 0.0142]) tensor([0.0509, 0.1791, 0.4917, 0.2784]) -Greedy action tensor([-1.8211, -0.3414, 0.5942, -0.0987]) tensor([0.0451, 0.1980, 0.5046, 0.2523]) -Greedy action tensor([-0.5364, 0.6709, -0.0819, -0.0169]) tensor([0.1316, 0.4400, 0.2073, 0.2212]) -Greedy action tensor([-1.9129, -0.4400, 0.6543, -0.1621]) tensor([0.0414, 0.1806, 0.5395, 0.2385]) -Greedy action tensor([-1.9396, -0.4422, 0.6653, -0.1779]) tensor([0.0403, 0.1801, 0.5451, 0.2346]) -Greedy action tensor([-1.8731, -0.4059, 0.6345, -0.1380]) tensor([0.0429, 0.1863, 0.5273, 0.2435]) -Greedy action tensor([-1.9103, -0.4408, 0.6487, -0.1675]) tensor([0.0417, 0.1812, 0.5388, 0.2382]) -Greedy action tensor([-1.8665, -0.4719, 0.6205, -0.1379]) tensor([0.0441, 0.1777, 0.5300, 0.2482]) -Greedy action tensor([-1.9292, -0.4334, 0.6583, -0.1728]) tensor([0.0407, 0.1818, 0.5416, 0.2359]) -Greedy action tensor([-1.9419, -0.4451, 0.6670, -0.1776]) tensor([0.0402, 0.1795, 0.5458, 0.2345]) -Greedy action tensor([-1.4746, -0.4357, 0.4294, 0.0891]) tensor([0.0653, 0.1845, 0.4383, 0.3119]) -Greedy action tensor([-1.8590, -0.1162, 0.5600, -0.1206]) tensor([0.0423, 0.2417, 0.4753, 0.2407]) -Greedy action tensor([-1.9013, -0.4006, 0.6450, -0.1596]) tensor([0.0417, 0.1872, 0.5327, 0.2383]) -Greedy action tensor([-1.9395, -0.4644, 0.6965, -0.1672]) tensor([0.0397, 0.1734, 0.5536, 0.2334]) -Greedy action tensor([-1.9248, -0.4613, 0.6724, -0.1655]) tensor([0.0407, 0.1760, 0.5468, 0.2365]) -Greedy action tensor([-1.9253, -0.4297, 0.6595, -0.1693]) tensor([0.0408, 0.1820, 0.5410, 0.2362]) -Greedy action tensor([-1.8599, -0.4363, 0.6259, -0.1418]) tensor([0.0440, 0.1826, 0.5283, 0.2452]) -Greedy action tensor([-1.8627, -0.3378, 0.6106, -0.1541]) tensor([0.0435, 0.2000, 0.5162, 0.2403]) -Greedy action tensor([-1.8771, -0.4375, 0.6323, -0.1300]) tensor([0.0430, 0.1814, 0.5288, 0.2467]) -Greedy action tensor([-1.9288, -0.4446, 0.6640, -0.1647]) tensor([0.0406, 0.1792, 0.5431, 0.2371]) -Greedy action tensor([-1.9253, -0.4436, 0.6575, -0.1705]) tensor([0.0410, 0.1802, 0.5420, 0.2368]) -Greedy action tensor([-1.4467, -0.1003, 0.6049, 0.3666]) tensor([0.0533, 0.2049, 0.4148, 0.3269]) -Greedy action tensor([-1.3990, -0.2912, 0.6408, 0.1565]) tensor([0.0608, 0.1840, 0.4673, 0.2879]) -Greedy action tensor([-1.8666, -0.3589, 0.6165, -0.1447]) tensor([0.0433, 0.1956, 0.5188, 0.2423]) -Greedy action tensor([-1.4669, -0.2236, 0.4721, 0.2007]) tensor([0.0598, 0.2074, 0.4158, 0.3170]) -Greedy action tensor([-1.9354, -0.4330, 0.6629, -0.1709]) tensor([0.0404, 0.1814, 0.5426, 0.2357]) -Greedy action tensor([-1.9283, -0.4483, 0.6652, -0.1640]) tensor([0.0406, 0.1785, 0.5436, 0.2372]) -Greedy action tensor([-1.9469, -0.4525, 0.6673, -0.1820]) tensor([0.0401, 0.1786, 0.5473, 0.2341]) -Greedy action tensor([-1.9287, -0.4182, 0.6572, -0.1714]) tensor([0.0406, 0.1841, 0.5396, 0.2356]) -Greedy action tensor([-1.8974, -0.3633, 0.6413, -0.1518]) tensor([0.0416, 0.1930, 0.5270, 0.2384]) -Greedy action tensor([-1.2433, 0.7657, 0.1918, 0.1900]) tensor([0.0594, 0.4425, 0.2493, 0.2488]) -Greedy action tensor([-1.4831, -0.8078, 0.5757, -0.3398]) tensor([0.0717, 0.1409, 0.5622, 0.2251]) -Greedy action tensor([-1.8491, -0.0964, 0.5735, -0.0809]) tensor([0.0418, 0.2414, 0.4717, 0.2451]) -Greedy action tensor([ 0.0636, -0.7261, -0.0857, -0.6594]) tensor([0.3571, 0.1621, 0.3075, 0.1733]) -Greedy action tensor([-0.6715, -0.6571, 0.0118, -1.0318]) tensor([0.2131, 0.2162, 0.4221, 0.1486]) -Greedy action tensor([-0.4407, 0.1344, 0.1947, -0.9966]) tensor([0.1909, 0.3393, 0.3604, 0.1095]) -Greedy action tensor([ 0.5020, -0.5640, -0.3132, 0.2938]) tensor([0.3848, 0.1325, 0.1703, 0.3124]) -Greedy action tensor([ 0.0084, 0.8946, 0.7488, -0.0787]) tensor([0.1553, 0.3767, 0.3256, 0.1423]) -Greedy action tensor([0.1745, 0.3195, 1.0486, 0.1415]) tensor([0.1811, 0.2094, 0.4342, 0.1753]) -Greedy action tensor([ 0.4190, 0.4921, -0.9488, -0.2011]) tensor([0.3486, 0.3751, 0.0888, 0.1875]) -Greedy action tensor([ 1.1141, -0.9263, -0.3056, 0.6368]) tensor([0.5020, 0.0652, 0.1214, 0.3114]) -Greedy action tensor([ 0.7659, -1.7630, 1.4895, 0.1212]) tensor([0.2727, 0.0217, 0.5624, 0.1431]) -Greedy action tensor([-0.5957, 0.5127, 0.0163, -0.4383]) tensor([0.1420, 0.4301, 0.2618, 0.1662]) -Greedy action tensor([ 0.8871, -0.2174, 0.2861, 0.9811]) tensor([0.3358, 0.1113, 0.1841, 0.3689]) -Greedy action tensor([-0.1382, 0.0114, -0.4338, -1.1012]) tensor([0.3042, 0.3533, 0.2264, 0.1161]) -Greedy action tensor([ 1.0138, 0.0500, -0.5243, -0.3785]) tensor([0.5421, 0.2068, 0.1164, 0.1347]) -Greedy action tensor([ 0.7170, -0.7219, -0.2824, 0.1299]) tensor([0.4627, 0.1097, 0.1703, 0.2572]) -Greedy action tensor([-0.6796, 0.9228, 0.0484, -1.6597]) tensor([0.1189, 0.5903, 0.2462, 0.0446]) -Greedy action tensor([-0.0326, -0.7637, 0.5954, -0.1251]) tensor([0.2344, 0.1128, 0.4392, 0.2137]) -Greedy action tensor([ 0.6008, -0.7702, 1.0176, 0.2824]) tensor([0.2858, 0.0726, 0.4337, 0.2079]) -Greedy action tensor([ 0.6568, 0.2752, 0.5108, -0.2613]) tensor([0.3394, 0.2317, 0.2933, 0.1355]) -Greedy action tensor([ 0.4929, -0.0885, 0.3384, 0.5375]) tensor([0.2889, 0.1615, 0.2475, 0.3021]) -Greedy action tensor([ 0.6081, 0.0683, 0.6308, -0.8992]) tensor([0.3537, 0.2062, 0.3618, 0.0783]) -Greedy action tensor([-0.7803, -0.8886, 0.3300, -0.8534]) tensor([0.1706, 0.1531, 0.5178, 0.1586]) -Greedy action tensor([ 1.3084, -1.2454, 1.0057, 0.2018]) tensor([0.4657, 0.0362, 0.3441, 0.1540]) -Greedy action tensor([ 0.5027, -1.3886, -0.1293, 0.0070]) tensor([0.4364, 0.0658, 0.2320, 0.2658]) -Greedy action tensor([-0.0174, 0.1313, 0.0049, -0.1001]) tensor([0.2437, 0.2828, 0.2492, 0.2244]) -Greedy action tensor([-0.3693, -0.1145, -1.1073, -1.0294]) tensor([0.3044, 0.3928, 0.1455, 0.1573]) -Greedy action tensor([-0.3215, 0.2881, 0.4853, -0.7283]) tensor([0.1740, 0.3202, 0.3899, 0.1159]) -Greedy action tensor([-0.1883, -0.9455, -0.7314, -0.1001]) tensor([0.3183, 0.1493, 0.1849, 0.3476]) -Greedy action tensor([-0.5786, -0.6507, 0.7921, 0.6183]) tensor([0.1090, 0.1014, 0.4291, 0.3606]) -Greedy action tensor([ 0.0438, -0.7098, -0.0906, 0.1150]) tensor([0.2925, 0.1377, 0.2557, 0.3141]) -Greedy action tensor([ 0.4292, -0.1911, 0.7254, -0.2778]) tensor([0.2962, 0.1593, 0.3984, 0.1461]) -Greedy action tensor([ 0.2498, 0.1402, -0.1913, -0.2881]) tensor([0.3202, 0.2869, 0.2060, 0.1870]) -Greedy action tensor([ 0.5251, -0.8836, 0.2174, -0.5495]) tensor([0.4308, 0.1053, 0.3167, 0.1471]) -Greedy action tensor([ 0.1712, -1.4584, 0.4692, -0.0578]) tensor([0.2995, 0.0587, 0.4035, 0.2382]) -Greedy action tensor([1.2532, 0.9698, 0.3426, 0.5276]) tensor([0.3789, 0.2854, 0.1524, 0.1834]) -Greedy action tensor([ 0.7975, -1.3091, 1.8276, -0.2702]) tensor([0.2344, 0.0285, 0.6565, 0.0806]) -Greedy action tensor([ 0.0475, -0.1742, -0.2988, 0.1210]) tensor([0.2790, 0.2235, 0.1973, 0.3002]) -Greedy action tensor([ 0.6230, -1.3841, 0.0274, 0.0284]) tensor([0.4470, 0.0601, 0.2464, 0.2466]) -Greedy action tensor([-0.6170, -0.4656, 0.3291, -0.7399]) tensor([0.1778, 0.2069, 0.4580, 0.1573]) -Greedy action tensor([-0.2062, -0.9130, 0.1885, -1.0539]) tensor([0.2936, 0.1448, 0.4358, 0.1258]) -Greedy action tensor([ 0.9379, -0.7198, -0.2318, 1.0337]) tensor([0.3844, 0.0733, 0.1193, 0.4230]) -Greedy action tensor([ 0.7342, -2.1899, -0.0876, 1.0201]) tensor([0.3541, 0.0190, 0.1557, 0.4712]) -Greedy action tensor([ 1.8990, -2.0527, 0.5786, 0.8096]) tensor([0.6163, 0.0118, 0.1646, 0.2073]) -Greedy action tensor([ 0.5747, 0.6716, -0.3077, 0.3129]) tensor([0.3044, 0.3354, 0.1260, 0.2343]) -Greedy action tensor([ 0.4345, -0.9351, 0.2606, 1.0220]) tensor([0.2568, 0.0653, 0.2158, 0.4621]) -Greedy action tensor([ 0.2521, -0.7851, 0.7688, 0.1962]) tensor([0.2515, 0.0891, 0.4216, 0.2378]) -Greedy action tensor([ 0.4774, -0.0574, -0.4061, -0.2507]) tensor([0.4029, 0.2360, 0.1665, 0.1945]) -Greedy action tensor([ 0.5129, -0.8446, -0.2650, -0.5098]) tensor([0.4816, 0.1239, 0.2212, 0.1732]) -Greedy action tensor([ 0.3420, 0.1154, 0.1507, -0.0729]) tensor([0.3045, 0.2428, 0.2515, 0.2011]) -Greedy action tensor([ 0.4535, -1.0274, -0.5447, -0.9538]) tensor([0.5432, 0.1236, 0.2002, 0.1330]) -Greedy action tensor([-0.8579, -1.0800, -1.4986, 0.9781]) tensor([0.1163, 0.0931, 0.0613, 0.7293]) -Greedy action tensor([ 0.6691, -1.2778, 0.3636, -0.4590]) tensor([0.4539, 0.0648, 0.3344, 0.1469]) -Greedy action tensor([ 0.5173, -1.1608, -0.6457, -0.0470]) tensor([0.4836, 0.0903, 0.1511, 0.2750]) -Greedy action tensor([-0.6705, -0.0703, -0.8060, 0.7104]) tensor([0.1303, 0.2375, 0.1138, 0.5184]) -Greedy action tensor([-0.5990, -1.0691, -0.3982, -0.1305]) tensor([0.2250, 0.1406, 0.2750, 0.3594]) -Greedy action tensor([-0.0407, -1.4538, 1.3172, -0.6803]) tensor([0.1767, 0.0430, 0.6871, 0.0932]) -Greedy action tensor([ 1.2848, -0.9330, 0.0596, 0.3668]) tensor([0.5550, 0.0604, 0.1630, 0.2216]) -Greedy action tensor([ 0.2540, 0.2421, 1.1626, -0.4240]) tensor([0.2009, 0.1986, 0.4985, 0.1020]) -Greedy action tensor([-0.3419, -1.1948, 0.3680, -0.2059]) tensor([0.2171, 0.0925, 0.4416, 0.2488]) -Greedy action tensor([-1.3173, -0.8512, 0.2673, -0.9257]) tensor([0.1117, 0.1781, 0.5449, 0.1653]) -Greedy action tensor([0.3251, 0.4412, 0.2605, 0.0428]) tensor([0.2622, 0.2944, 0.2458, 0.1977]) -Greedy action tensor([ 0.0481, -0.0757, 0.3217, -0.1519]) tensor([0.2490, 0.2200, 0.3273, 0.2038]) -Greedy action tensor([ 0.6757, -1.5242, 0.6968, -0.3088]) tensor([0.3991, 0.0442, 0.4076, 0.1491]) -Greedy action tensor([-0.9473, 0.3229, -0.3365, -0.3195]) tensor([0.1208, 0.4303, 0.2225, 0.2263]) -Greedy action tensor([ 0.1322, -0.6457, -0.2896, -0.5265]) tensor([0.3798, 0.1745, 0.2491, 0.1966]) -Greedy action tensor([-0.2435, -1.3845, -0.7851, -0.3343]) tensor([0.3553, 0.1135, 0.2067, 0.3245]) -Greedy action tensor([-0.3740, -1.3113, 0.6031, -0.0889]) tensor([0.1859, 0.0728, 0.4940, 0.2473]) -Greedy action tensor([-0.1253, -1.1285, 0.3610, -0.6893]) tensor([0.2808, 0.1030, 0.4566, 0.1597]) -Greedy action tensor([-1.1357, -1.7851, 0.0089, 0.1940]) tensor([0.1184, 0.0619, 0.3720, 0.4477]) -Greedy action tensor([-0.4346, -0.8664, -0.2697, -0.0600]) tensor([0.2335, 0.1516, 0.2753, 0.3396]) -Greedy action tensor([ 1.4255, -1.1365, -0.0654, 0.5288]) tensor([0.5847, 0.0451, 0.1317, 0.2385]) -Greedy action tensor([-0.2582, -2.5843, -0.4714, -0.0665]) tensor([0.3208, 0.0313, 0.2592, 0.3886]) -Greedy action tensor([0.8540, 0.0583, 0.3405, 1.2731]) tensor([0.2801, 0.1264, 0.1676, 0.4259]) -Greedy action tensor([ 0.8116, -0.6505, 0.5269, 0.1870]) tensor([0.3969, 0.0920, 0.2986, 0.2125]) -Greedy action tensor([ 0.1512, -1.8901, -0.2272, 1.2665]) tensor([0.2055, 0.0267, 0.1408, 0.6270]) -Greedy action tensor([ 0.6815, 0.2423, 0.4484, -0.1042]) tensor([0.3457, 0.2228, 0.2738, 0.1576]) -Greedy action tensor([-0.1539, -0.1317, 0.2872, -1.0924]) tensor([0.2520, 0.2577, 0.3917, 0.0986]) -Greedy action tensor([-0.8145, -0.5803, 0.2424, 0.3059]) tensor([0.1218, 0.1540, 0.3506, 0.3736]) -Greedy action tensor([-0.3131, -1.0629, 0.5576, -0.3308]) tensor([0.2065, 0.0975, 0.4932, 0.2028]) -Greedy action tensor([ 1.1205, -0.3547, 0.4585, 0.9273]) tensor([0.3893, 0.0890, 0.2008, 0.3209]) -Greedy action tensor([0.2462, 0.0684, 0.1905, 0.1048]) tensor([0.2739, 0.2293, 0.2591, 0.2378]) -Greedy action tensor([ 0.3792, 0.2438, 0.7713, -0.2868]) tensor([0.2586, 0.2258, 0.3827, 0.1328]) -Greedy action tensor([-0.3828, -0.2376, -0.2550, -0.2927]) tensor([0.2280, 0.2636, 0.2590, 0.2494]) -Greedy action tensor([ 0.7888, -0.5551, 0.1457, -0.7243]) tensor([0.4983, 0.1300, 0.2620, 0.1097]) -Greedy action tensor([ 0.4981, -0.2853, -0.0457, -0.0458]) tensor([0.3820, 0.1745, 0.2218, 0.2217]) -Greedy action tensor([ 1.0503, -0.7952, 0.0872, -0.6919]) tensor([0.5832, 0.0921, 0.2226, 0.1021]) -Greedy action tensor([ 0.4253, -0.0457, -0.0329, -0.3070]) tensor([0.3653, 0.2281, 0.2310, 0.1756]) -Greedy action tensor([ 0.5374, -0.2910, -0.0119, -0.1554]) tensor([0.3977, 0.1737, 0.2296, 0.1989]) -Greedy action tensor([ 0.6102, -0.6049, -0.0965, -0.1667]) tensor([0.4445, 0.1319, 0.2192, 0.2044]) -Greedy action tensor([ 1.2506, -0.3940, -0.0792, -0.4004]) tensor([0.6063, 0.1171, 0.1604, 0.1163]) -Greedy action tensor([ 0.7464, -0.2458, -0.0012, 0.0152]) tensor([0.4300, 0.1594, 0.2036, 0.2070]) -Greedy action tensor([ 1.1212, -0.4418, -0.0736, -0.4029]) tensor([0.5780, 0.1211, 0.1750, 0.1259]) -Greedy action tensor([ 1.1691, -0.5377, 0.1556, -0.3955]) tensor([0.5703, 0.1035, 0.2070, 0.1193]) -Greedy action tensor([ 0.7143, -0.2216, -0.0858, -0.1768]) tensor([0.4441, 0.1742, 0.1995, 0.1822]) -Greedy action tensor([ 0.7005, -0.4155, -0.1240, -0.4958]) tensor([0.4835, 0.1584, 0.2120, 0.1462]) -Greedy action tensor([ 0.8099, -0.2069, 0.0441, -0.6009]) tensor([0.4829, 0.1747, 0.2246, 0.1178]) -Greedy action tensor([ 0.2882, 0.0044, -0.0397, -0.3204]) tensor([0.3314, 0.2495, 0.2388, 0.1803]) -Greedy action tensor([ 0.8682, 0.6335, -0.0107, -0.1502]) tensor([0.3895, 0.3080, 0.1617, 0.1407]) -Greedy action tensor([ 8.6824e-01, -4.5371e-01, 2.1613e-04, -2.6092e-01]) tensor([0.4976, 0.1327, 0.2089, 0.1609]) -Greedy action tensor([ 1.2878, -0.9631, -0.1027, -0.2854]) tensor([0.6404, 0.0674, 0.1594, 0.1328]) -Greedy action tensor([ 1.1489, -0.9037, 0.0428, -0.6394]) tensor([0.6148, 0.0789, 0.2034, 0.1028]) -Greedy action tensor([ 0.6734, -0.4871, -0.2145, -0.1460]) tensor([0.4618, 0.1447, 0.1900, 0.2035]) -Greedy action tensor([ 0.5284, -0.3662, -0.1458, -0.1239]) tensor([0.4100, 0.1676, 0.2089, 0.2135]) -Greedy action tensor([ 1.3029, -0.6742, -0.0120, -0.3239]) tensor([0.6236, 0.0864, 0.1674, 0.1226]) -Greedy action tensor([ 0.8125, 0.0683, -0.0470, 0.0307]) tensor([0.4244, 0.2017, 0.1797, 0.1942]) -Greedy action tensor([ 0.9875, -0.6548, -0.0303, -0.4790]) tensor([0.5600, 0.1084, 0.2024, 0.1292]) -Greedy action tensor([ 0.6908, -0.6343, 0.0756, -0.2567]) tensor([0.4558, 0.1211, 0.2464, 0.1767]) -Greedy action tensor([ 1.0470, -0.4903, -0.0294, -0.4725]) tensor([0.5635, 0.1211, 0.1920, 0.1233]) -Greedy action tensor([ 0.4624, -0.5745, -0.1401, -0.1245]) tensor([0.4068, 0.1442, 0.2227, 0.2262]) -Greedy action tensor([ 0.4316, -0.1008, 0.0686, -0.1403]) tensor([0.3512, 0.2062, 0.2443, 0.1983]) -Greedy action tensor([ 0.6827, -0.1933, -0.0058, -0.3686]) tensor([0.4409, 0.1836, 0.2215, 0.1541]) -Greedy action tensor([ 1.0063, -0.6442, -0.0745, -0.4560]) tensor([0.5672, 0.1089, 0.1925, 0.1314]) -Greedy action tensor([ 0.5522, -0.4158, -0.0452, -0.1098]) tensor([0.4089, 0.1553, 0.2250, 0.2109]) -Greedy action tensor([ 0.4864, -0.1337, 0.0350, -0.4748]) tensor([0.3911, 0.2104, 0.2490, 0.1496]) -Greedy action tensor([ 0.3544, 0.0671, -0.0103, -0.0594]) tensor([0.3220, 0.2416, 0.2236, 0.2129]) -Greedy action tensor([ 0.8658, -0.6872, -0.0144, -0.3212]) tensor([0.5177, 0.1096, 0.2147, 0.1580]) -Greedy action tensor([ 0.6770, -0.1442, 0.0605, 0.0587]) tensor([0.3970, 0.1747, 0.2143, 0.2139]) -Greedy action tensor([ 0.7909, -0.5322, -0.0386, -0.1432]) tensor([0.4772, 0.1271, 0.2082, 0.1875]) -Greedy action tensor([ 0.7766, -0.4796, -0.0336, -0.5569]) tensor([0.5018, 0.1429, 0.2232, 0.1322]) -Greedy action tensor([ 0.6705, -0.6468, 0.0954, -0.2107]) tensor([0.4455, 0.1193, 0.2507, 0.1845]) -Greedy action tensor([ 0.9839, -0.6121, -0.0659, -0.3764]) tensor([0.5527, 0.1120, 0.1934, 0.1418]) -Greedy action tensor([ 0.3848, 0.3844, -0.3358, 0.1022]) tensor([0.3086, 0.3085, 0.1501, 0.2327]) -Greedy action tensor([ 1.0250, -0.8562, -0.0729, -0.4227]) tensor([0.5810, 0.0886, 0.1938, 0.1366]) -Greedy action tensor([ 0.4863, -0.4168, -0.0708, -0.4329]) tensor([0.4207, 0.1705, 0.2410, 0.1678]) -Greedy action tensor([ 0.5975, 0.1591, -0.1677, -0.1064]) tensor([0.3839, 0.2476, 0.1786, 0.1899]) -Greedy action tensor([ 0.2292, -0.0335, 0.0391, 0.1807]) tensor([0.2818, 0.2167, 0.2330, 0.2685]) -Greedy action tensor([ 0.4960, 0.0205, 0.0491, -0.2340]) tensor([0.3646, 0.2266, 0.2332, 0.1757]) -Greedy action tensor([ 0.6571, -0.1268, -0.0967, -0.2265]) tensor([0.4272, 0.1951, 0.2011, 0.1766]) -Greedy action tensor([ 0.9237, -0.4492, -0.2350, -0.7776]) tensor([0.5715, 0.1448, 0.1794, 0.1043]) -Greedy action tensor([ 1.2078, -0.9009, 0.0734, -0.5891]) tensor([0.6216, 0.0755, 0.1999, 0.1031]) -Greedy action tensor([ 0.5715, -0.3619, 0.0245, -0.4915]) tensor([0.4315, 0.1697, 0.2497, 0.1491]) -Greedy action tensor([ 0.6494, -0.5006, -0.2414, -0.2878]) tensor([0.4720, 0.1495, 0.1937, 0.1849]) -Greedy action tensor([ 0.5453, -0.0690, -0.0594, 0.0444]) tensor([0.3713, 0.2009, 0.2028, 0.2250]) -Greedy action tensor([ 0.9318, -0.4748, 0.1770, -0.1525]) tensor([0.4870, 0.1193, 0.2290, 0.1647]) -Greedy action tensor([ 0.4179, -0.1052, -0.0408, -0.0878]) tensor([0.3536, 0.2096, 0.2235, 0.2133]) -Greedy action tensor([ 0.7537, -0.6583, 0.0151, -0.4162]) tensor([0.4922, 0.1199, 0.2352, 0.1528]) -Greedy action tensor([ 0.6835, -0.3851, 0.1096, -0.2703]) tensor([0.4363, 0.1499, 0.2458, 0.1681]) -Greedy action tensor([ 0.8460, -0.7273, 0.0663, -0.5490]) tensor([0.5225, 0.1084, 0.2396, 0.1295]) -Greedy action tensor([ 0.6427, -0.3389, -0.0054, -0.4162]) tensor([0.4455, 0.1669, 0.2330, 0.1545]) -Greedy action tensor([ 0.5793, -0.4061, -0.1066, -0.2358]) tensor([0.4311, 0.1609, 0.2171, 0.1908]) -Greedy action tensor([ 1.1549, -0.9770, 0.1665, -0.6456]) tensor([0.6039, 0.0716, 0.2247, 0.0998]) -Greedy action tensor([ 0.8889, -0.5449, -0.0060, -0.4758]) tensor([0.5256, 0.1253, 0.2148, 0.1343]) -Greedy action tensor([ 0.8290, -0.2950, -0.1154, -0.2904]) tensor([0.4901, 0.1593, 0.1906, 0.1600]) -Greedy action tensor([ 0.8694, -0.8107, 0.1444, -0.3881]) tensor([0.5115, 0.0953, 0.2477, 0.1455]) -Greedy action tensor([ 0.9186, -0.6900, -0.0247, -0.3060]) tensor([0.5310, 0.1063, 0.2067, 0.1560]) -Greedy action tensor([ 0.7464, -0.4057, -0.0856, -0.1252]) tensor([0.4609, 0.1457, 0.2006, 0.1928]) -Greedy action tensor([ 0.5368, -0.3961, -0.1608, -0.1352]) tensor([0.4163, 0.1638, 0.2072, 0.2126]) -Greedy action tensor([ 0.4947, -0.5884, 0.0318, -0.1479]) tensor([0.4010, 0.1357, 0.2524, 0.2109]) -Greedy action tensor([ 0.7364, -0.4144, 0.1040, -0.5454]) tensor([0.4705, 0.1489, 0.2500, 0.1306]) -Greedy action tensor([ 0.5316, 0.2931, -0.1726, 0.0991]) tensor([0.3412, 0.2688, 0.1687, 0.2214]) -Greedy action tensor([ 1.0597, -0.9190, -0.0288, -0.4904]) tensor([0.5927, 0.0819, 0.1996, 0.1258]) -Greedy action tensor([ 0.6862, -0.2924, -0.0523, -0.0922]) tensor([0.4324, 0.1625, 0.2066, 0.1985]) -Greedy action tensor([ 0.5280, 0.1462, -0.0847, 0.0658]) tensor([0.3503, 0.2391, 0.1898, 0.2207]) -Greedy action tensor([ 0.7879, -0.3283, -0.0530, -0.1175]) tensor([0.4623, 0.1514, 0.1994, 0.1869]) -Greedy action tensor([ 0.7066, -0.5819, -0.0539, -0.3088]) tensor([0.4750, 0.1309, 0.2220, 0.1721]) -Greedy action tensor([ 0.7750, -0.5381, -0.0795, -0.2208]) tensor([0.4845, 0.1303, 0.2062, 0.1790]) -Greedy action tensor([ 0.6138, -0.3173, -0.1731, -0.1768]) tensor([0.4342, 0.1711, 0.1977, 0.1970]) -Greedy action tensor([ 0.6085, -0.4350, -0.0927, 0.0191]) tensor([0.4162, 0.1466, 0.2064, 0.2308]) -Greedy action tensor([ 0.8938, -0.4883, 0.0369, -0.5368]) tensor([0.5223, 0.1311, 0.2217, 0.1249]) -Greedy action tensor([ 0.3416, 0.1287, -0.0956, -0.1327]) tensor([0.3251, 0.2627, 0.2099, 0.2023]) -Greedy action tensor([ 0.3419, -0.0784, -0.0117, -0.4410]) tensor([0.3551, 0.2332, 0.2493, 0.1623]) -Greedy action tensor([ 0.8932, -0.6778, -0.1499, -0.2299]) tensor([0.5304, 0.1102, 0.1869, 0.1725]) -Greedy action tensor([ 0.3972, -0.3867, -0.0611, -0.1026]) tensor([0.3710, 0.1694, 0.2346, 0.2251]) -Greedy action tensor([ 0.7502, -0.1408, -0.1919, 0.1813]) tensor([0.4226, 0.1734, 0.1647, 0.2393]) -Greedy action tensor([ 2.9855, -0.8624, -0.3138, 0.8261]) tensor([0.8521, 0.0182, 0.0314, 0.0983]) -Greedy action tensor([ 1.3632, -0.6410, -0.3529, 0.2682]) tensor([0.6064, 0.0817, 0.1090, 0.2029]) -Greedy action tensor([ 1.5391, -0.5461, -0.6416, 0.4517]) tensor([0.6352, 0.0789, 0.0718, 0.2141]) -Greedy action tensor([ 1.8228, -0.1036, -1.0798, 0.9139]) tensor([0.6236, 0.0908, 0.0342, 0.2513]) -Greedy action tensor([ 2.3551, -1.2198, -0.5387, 0.9731]) tensor([0.7494, 0.0210, 0.0415, 0.1882]) -Greedy action tensor([ 1.2365, -0.2275, -0.4378, -0.1301]) tensor([0.5975, 0.1382, 0.1120, 0.1523]) -Greedy action tensor([ 2.0631, -0.5748, -0.2982, 0.6514]) tensor([0.7095, 0.0507, 0.0669, 0.1729]) -Greedy action tensor([ 1.1872, -0.5074, 0.0333, 0.2175]) tensor([0.5324, 0.0978, 0.1679, 0.2019]) -Greedy action tensor([ 1.4404, -0.5180, -0.6389, 0.5946]) tensor([0.5899, 0.0832, 0.0737, 0.2532]) -Greedy action tensor([ 0.8810, -0.4388, -0.2809, -0.0169]) tensor([0.5031, 0.1344, 0.1574, 0.2050]) -Greedy action tensor([ 1.3228, -0.0520, -0.2470, -0.5954]) tensor([0.6219, 0.1573, 0.1294, 0.0913]) -Greedy action tensor([ 1.2971, -0.4063, -0.5043, 0.3006]) tensor([0.5826, 0.1061, 0.0962, 0.2151]) -Greedy action tensor([ 1.8996, -0.7478, -0.4997, 0.8011]) tensor([0.6689, 0.0474, 0.0607, 0.2230]) -Greedy action tensor([ 1.2617, -0.5431, -0.3528, 0.7856]) tensor([0.5039, 0.0829, 0.1003, 0.3130]) -Greedy action tensor([ 1.5442, -0.7038, -0.3556, 0.2560]) tensor([0.6532, 0.0690, 0.0977, 0.1801]) -Greedy action tensor([ 2.3636, -0.7754, -0.5756, 0.4590]) tensor([0.8031, 0.0348, 0.0425, 0.1196]) -Greedy action tensor([ 1.6053, -0.2725, -0.3435, 0.2924]) tensor([0.6392, 0.0978, 0.0911, 0.1720]) -Greedy action tensor([ 1.6272, -0.9257, -0.2549, 0.5397]) tensor([0.6381, 0.0497, 0.0972, 0.2151]) -Greedy action tensor([ 1.1952, -0.3232, -0.1264, -0.1949]) tensor([0.5764, 0.1263, 0.1537, 0.1436]) -Greedy action tensor([ 1.2275, -0.1716, -0.6324, 0.3476]) tensor([0.5503, 0.1358, 0.0857, 0.2283]) -Greedy action tensor([ 1.1059, 0.0149, -0.1700, -0.0803]) tensor([0.5207, 0.1749, 0.1454, 0.1590]) -Greedy action tensor([ 1.6323, -0.2574, -0.7994, 0.5207]) tensor([0.6377, 0.0964, 0.0560, 0.2098]) -Greedy action tensor([ 1.1757, -0.1877, -0.0358, 0.0776]) tensor([0.5299, 0.1356, 0.1578, 0.1767]) -Greedy action tensor([ 0.7815, -0.3041, -0.0851, 0.0475]) tensor([0.4468, 0.1509, 0.1878, 0.2145]) -Greedy action tensor([ 1.3532, 0.1755, -0.6997, 0.4026]) tensor([0.5486, 0.1690, 0.0704, 0.2120]) -Greedy action tensor([ 1.8965, -0.8105, -0.5238, 0.3577]) tensor([0.7298, 0.0487, 0.0649, 0.1566]) -Greedy action tensor([ 1.7204, -0.8409, -0.3965, 0.1222]) tensor([0.7143, 0.0552, 0.0860, 0.1445]) -Greedy action tensor([ 0.8795, -0.2811, 0.0011, 0.1175]) tensor([0.4555, 0.1427, 0.1892, 0.2126]) -Greedy action tensor([ 1.0314, -0.5232, -0.0314, 0.1649]) tensor([0.5058, 0.1069, 0.1747, 0.2126]) -Greedy action tensor([ 1.6847, -0.5803, -0.0869, -0.0607]) tensor([0.6904, 0.0717, 0.1174, 0.1205]) -Greedy action tensor([ 2.1326, -0.2101, -0.3937, 0.5770]) tensor([0.7209, 0.0693, 0.0576, 0.1522]) -Greedy action tensor([ 1.9747, -1.0934, 0.0088, 0.3424]) tensor([0.7236, 0.0337, 0.1013, 0.1414]) -Greedy action tensor([ 0.9114, -0.3361, -0.8118, 0.2643]) tensor([0.5027, 0.1444, 0.0897, 0.2632]) -Greedy action tensor([ 1.7477, -0.9991, -0.2240, 0.4862]) tensor([0.6727, 0.0431, 0.0937, 0.1905]) -Greedy action tensor([ 1.7134, -0.7488, -0.1939, 0.5592]) tensor([0.6456, 0.0550, 0.0959, 0.2036]) -Greedy action tensor([ 2.2835, -0.4515, -0.5182, 0.5936]) tensor([0.7633, 0.0495, 0.0463, 0.1409]) -Greedy action tensor([ 0.9766, -0.1748, -0.4420, 0.1124]) tensor([0.5051, 0.1597, 0.1223, 0.2129]) -Greedy action tensor([ 1.5187, -0.4993, -0.4097, 0.7424]) tensor([0.5752, 0.0765, 0.0836, 0.2647]) -Greedy action tensor([ 1.6552, -0.7351, -0.4166, 0.2536]) tensor([0.6832, 0.0626, 0.0860, 0.1682]) -Greedy action tensor([ 1.1932, -0.3274, -0.3901, -0.2673]) tensor([0.6039, 0.1320, 0.1240, 0.1402]) -Greedy action tensor([ 1.1514, -0.3541, -0.3196, 0.4579]) tensor([0.5125, 0.1137, 0.1177, 0.2561]) -Greedy action tensor([ 1.5774, -0.9313, -0.2437, 0.2942]) tensor([0.6577, 0.0535, 0.1065, 0.1823]) -Greedy action tensor([ 1.4872, -0.4623, -0.9197, 0.1882]) tensor([0.6643, 0.0946, 0.0599, 0.1812]) -Greedy action tensor([ 1.4134, -0.4265, -0.0438, 0.1472]) tensor([0.5975, 0.0949, 0.1392, 0.1684]) -Greedy action tensor([ 1.8069, 0.1928, -0.5795, -0.1703]) tensor([0.6995, 0.1393, 0.0643, 0.0969]) -Greedy action tensor([ 2.0630, -1.4332, -0.4540, 1.0532]) tensor([0.6778, 0.0205, 0.0547, 0.2469]) -Greedy action tensor([ 2.0650, -0.9293, -0.4004, 0.3333]) tensor([0.7622, 0.0382, 0.0648, 0.1349]) -Greedy action tensor([ 1.2120, -0.2333, -0.6437, 0.2563]) tensor([0.5629, 0.1327, 0.0880, 0.2165]) -Greedy action tensor([ 2.0504, -0.8112, -0.8370, 0.2946]) tensor([0.7778, 0.0445, 0.0433, 0.1344]) -Greedy action tensor([ 1.4443, 0.2596, -0.5992, 0.3392]) tensor([0.5661, 0.1731, 0.0733, 0.1875]) -Greedy action tensor([ 1.7123, -0.3165, -0.5473, 0.4928]) tensor([0.6531, 0.0859, 0.0682, 0.1929]) -Greedy action tensor([ 1.9668, -0.0059, -1.1575, 0.8101]) tensor([0.6678, 0.0929, 0.0294, 0.2100]) -Greedy action tensor([ 1.6367, -0.6660, -0.5894, 1.0530]) tensor([0.5663, 0.0566, 0.0611, 0.3159]) -Greedy action tensor([ 1.2283, 0.3659, -0.8091, 0.1195]) tensor([0.5312, 0.2243, 0.0693, 0.1753]) -Greedy action tensor([ 1.4820, -0.1030, -0.5211, 0.3085]) tensor([0.6064, 0.1243, 0.0818, 0.1875]) -Greedy action tensor([ 2.3411, 0.4057, 0.0420, -0.0180]) tensor([0.7467, 0.1078, 0.0749, 0.0706]) -Greedy action tensor([ 1.6782, -0.7469, -0.1318, 0.1852]) tensor([0.6771, 0.0599, 0.1108, 0.1521]) -Greedy action tensor([ 1.6562, -0.6974, -0.3717, 0.3390]) tensor([0.6691, 0.0636, 0.0881, 0.1792]) -Greedy action tensor([ 1.9556, -0.6359, -0.7058, 0.3941]) tensor([0.7382, 0.0553, 0.0516, 0.1549]) -Greedy action tensor([ 1.4182, -0.5484, -0.3206, 0.3876]) tensor([0.5979, 0.0837, 0.1051, 0.2133]) -Greedy action tensor([ 0.9455, -0.4517, 0.1896, 0.0551]) tensor([0.4701, 0.1162, 0.2207, 0.1930]) -Greedy action tensor([ 2.0026, -1.1438, 0.1314, 0.9218]) tensor([0.6509, 0.0280, 0.1002, 0.2209]) -Greedy action tensor([ 1.8942, -0.6542, -0.0901, 0.5969]) tensor([0.6716, 0.0525, 0.0923, 0.1835]) -Greedy action tensor([ 0.4540, -0.5224, -0.3065, 0.3558]) tensor([0.3636, 0.1369, 0.1699, 0.3296]) -Greedy action tensor([ 1.3815, 0.1182, -0.3501, 0.4189]) tensor([0.5430, 0.1535, 0.0961, 0.2074]) -Greedy action tensor([ 1.6118, -0.5825, -0.2216, 0.1089]) tensor([0.6694, 0.0746, 0.1070, 0.1489]) -Greedy action tensor([ 1.7108, -0.6008, -0.0694, 0.4538]) tensor([0.6442, 0.0638, 0.1086, 0.1833]) -Greedy action tensor([ 1.4768, -0.6514, -0.3380, 0.1936]) tensor([0.6414, 0.0764, 0.1045, 0.1778]) -Greedy action tensor([ 1.7355, -0.5066, 0.0556, 0.3321]) tensor([0.6500, 0.0691, 0.1212, 0.1597]) -Greedy action tensor([ 1.7900, -0.3515, -0.6775, 0.4262]) tensor([0.6859, 0.0806, 0.0582, 0.1754]) -Greedy action tensor([ 2.1340, -0.5483, -0.2529, 0.2344]) tensor([0.7634, 0.0522, 0.0702, 0.1142]) -Greedy action tensor([ 1.0375, -0.7603, -0.2777, -0.3036]) tensor([0.5897, 0.0977, 0.1583, 0.1543]) -Greedy action tensor([ 1.6427, -0.5641, -0.8286, 0.1656]) tensor([0.7028, 0.0773, 0.0594, 0.1605]) -Greedy action tensor([ 0.4524, -0.3624, -0.3541, 0.1977]) tensor([0.3753, 0.1662, 0.1676, 0.2909]) -Greedy action tensor([ 1.5285, -0.2370, -0.5562, 0.0925]) tensor([0.6522, 0.1116, 0.0811, 0.1551]) -Greedy action tensor([ 1.7721, -0.7033, 0.0394, 0.1771]) tensor([0.6831, 0.0575, 0.1208, 0.1386]) -Greedy action tensor([ 1.8637, -0.3653, -0.6568, 0.3453]) tensor([0.7107, 0.0765, 0.0572, 0.1557]) -Greedy action tensor([ 1.2902, -0.4324, -0.2266, 0.3936]) tensor([0.5537, 0.0989, 0.1215, 0.2259]) -Greedy action tensor([ 1.4742, -0.1520, -0.5966, 0.4952]) tensor([0.5888, 0.1158, 0.0742, 0.2212]) -Greedy action tensor([ 1.8067, -0.9198, -0.4556, 0.1184]) tensor([0.7383, 0.0483, 0.0769, 0.1365]) -Greedy action tensor([ 1.4511, -0.5102, -0.1565, 0.1947]) tensor([0.6151, 0.0865, 0.1232, 0.1751]) -Greedy action tensor([0.9061, 0.0365, 0.7234, 0.8913]) tensor([0.3089, 0.1295, 0.2573, 0.3043]) -Greedy action tensor([-0.6884, 0.0291, 0.1885, -0.7110]) tensor([0.1555, 0.3187, 0.3738, 0.1520]) -Greedy action tensor([-0.6567, -1.0849, 0.1022, -1.1243]) tensor([0.2265, 0.1476, 0.4839, 0.1419]) -Greedy action tensor([-0.2864, -0.7878, 0.4102, -0.3241]) tensor([0.2186, 0.1324, 0.4386, 0.2105]) -Greedy action tensor([ 0.8983, -1.2821, 1.4266, -0.6351]) tensor([0.3306, 0.0374, 0.5607, 0.0713]) -Greedy action tensor([-0.0303, -0.7842, -0.3730, -0.1342]) tensor([0.3245, 0.1527, 0.2303, 0.2925]) -Greedy action tensor([-0.2463, 0.3836, -1.0161, 0.1374]) tensor([0.2080, 0.3905, 0.0963, 0.3053]) -Greedy action tensor([-0.8410, -0.5520, 0.2240, -0.6298]) tensor([0.1545, 0.2063, 0.4483, 0.1909]) -Greedy action tensor([-0.8391, 0.0643, -0.9718, 0.1484]) tensor([0.1423, 0.3512, 0.1246, 0.3820]) -Greedy action tensor([ 0.7439, 0.8254, 1.1367, -0.9890]) tensor([0.2672, 0.2899, 0.3957, 0.0472]) -Greedy action tensor([-0.7435, -0.7975, -0.5931, -0.1679]) tensor([0.2046, 0.1938, 0.2378, 0.3638]) -Greedy action tensor([-0.1396, -0.4802, 0.2662, 0.7721]) tensor([0.1754, 0.1248, 0.2632, 0.4366]) -Greedy action tensor([ 0.2783, -0.6278, 0.0020, 0.3303]) tensor([0.3109, 0.1256, 0.2359, 0.3275]) -Greedy action tensor([ 0.4997, -1.2979, 0.3401, -0.2868]) tensor([0.4043, 0.0670, 0.3446, 0.1841]) -Greedy action tensor([ 1.4134, -0.7212, 1.5038, 0.3667]) tensor([0.3900, 0.0461, 0.4269, 0.1369]) -Greedy action tensor([ 1.1906, -1.1799, 0.1219, -0.9010]) tensor([0.6409, 0.0599, 0.2201, 0.0791]) -Greedy action tensor([ 0.5072, -0.8716, 0.0673, -0.0934]) tensor([0.4091, 0.1030, 0.2635, 0.2244]) -Greedy action tensor([-0.7616, -0.9864, -0.1481, -1.1887]) tensor([0.2327, 0.1858, 0.4297, 0.1518]) -Greedy action tensor([ 0.5017, -0.2712, 0.1768, -0.6882]) tensor([0.4018, 0.1855, 0.2904, 0.1223]) -Greedy action tensor([ 1.5225, -0.5328, 0.6559, 0.1055]) tensor([0.5584, 0.0715, 0.2347, 0.1354]) -Greedy action tensor([ 0.1052, -0.1178, -0.4574, -0.7196]) tensor([0.3561, 0.2849, 0.2029, 0.1561]) -Greedy action tensor([ 0.5271, -0.0598, 0.1254, -0.2024]) tensor([0.3694, 0.2054, 0.2472, 0.1781]) -Greedy action tensor([-0.0270, -0.0254, 0.2238, -0.7159]) tensor([0.2639, 0.2644, 0.3392, 0.1325]) -Greedy action tensor([-0.3485, 0.1789, 0.6939, 0.0172]) tensor([0.1434, 0.2430, 0.4068, 0.2068]) -Greedy action tensor([-1.1972, -0.8787, 0.3597, -0.0835]) tensor([0.0984, 0.1353, 0.4667, 0.2996]) -Greedy action tensor([1.0179, 0.6112, 0.6324, 0.2619]) tensor([0.3552, 0.2365, 0.2415, 0.1668]) -Greedy action tensor([ 1.3702, -1.3048, 0.7562, -0.4434]) tensor([0.5640, 0.0389, 0.3052, 0.0920]) -Greedy action tensor([-0.3001, -0.7280, 0.2825, 0.0179]) tensor([0.2076, 0.1353, 0.3717, 0.2853]) -Greedy action tensor([-0.4626, -0.6233, 0.6604, -1.2258]) tensor([0.1855, 0.1579, 0.5701, 0.0865]) -Greedy action tensor([-0.0540, -0.4337, 0.0374, -0.3301]) tensor([0.2826, 0.1933, 0.3096, 0.2144]) -Greedy action tensor([ 0.1749, 0.7613, 0.5390, -0.5612]) tensor([0.2121, 0.3812, 0.3052, 0.1016]) -Greedy action tensor([ 1.3732, -0.6532, 0.8086, -0.2823]) tensor([0.5287, 0.0697, 0.3006, 0.1010]) -Greedy action tensor([ 0.4098, -0.0379, 0.1378, -0.0873]) tensor([0.3323, 0.2124, 0.2532, 0.2021]) -Greedy action tensor([-0.4349, 0.2178, -0.4994, 0.0509]) tensor([0.1824, 0.3503, 0.1710, 0.2964]) -Greedy action tensor([ 0.6069, 0.7580, 0.2948, -0.5854]) tensor([0.3127, 0.3636, 0.2288, 0.0949]) -Greedy action tensor([-1.0136, -1.0887, 1.1608, -0.9788]) tensor([0.0850, 0.0789, 0.7480, 0.0880]) -Greedy action tensor([-0.2392, 0.0185, 0.3461, 0.4146]) tensor([0.1663, 0.2152, 0.2986, 0.3198]) -Greedy action tensor([-0.0142, 0.6519, 0.4510, -0.7710]) tensor([0.1997, 0.3887, 0.3179, 0.0937]) -Greedy action tensor([ 0.3820, -1.3234, 0.2384, -0.7391]) tensor([0.4213, 0.0765, 0.3649, 0.1373]) -Greedy action tensor([ 0.0011, -0.9716, 0.7868, 0.9571]) tensor([0.1620, 0.0612, 0.3554, 0.4214]) -Greedy action tensor([ 0.3011, -0.5089, 0.3638, -0.1217]) tensor([0.3160, 0.1406, 0.3364, 0.2070]) -Greedy action tensor([-0.4802, -0.1506, 0.2162, -0.9878]) tensor([0.2000, 0.2782, 0.4014, 0.1204]) -Greedy action tensor([ 1.7010, -1.7961, -0.2978, 1.0467]) tensor([0.5933, 0.0180, 0.0804, 0.3084]) -Greedy action tensor([ 0.4498, -0.9069, 0.6821, -1.1742]) tensor([0.3682, 0.0948, 0.4644, 0.0726]) -Greedy action tensor([-0.5286, -0.8606, 0.0923, -0.4542]) tensor([0.2148, 0.1541, 0.3997, 0.2314]) -Greedy action tensor([ 1.3244, -0.1638, -0.5565, 0.1498]) tensor([0.5927, 0.1338, 0.0904, 0.1831]) -Greedy action tensor([ 0.8963, -1.4031, 0.2790, 0.5268]) tensor([0.4290, 0.0430, 0.2314, 0.2965]) -Greedy action tensor([ 0.3461, -2.3474, -0.3941, 0.3303]) tensor([0.3954, 0.0267, 0.1886, 0.3892]) -Greedy action tensor([ 1.0058, -1.1999, -0.4498, 0.4461]) tensor([0.5223, 0.0575, 0.1218, 0.2984]) -Greedy action tensor([-1.0180, -1.1654, -0.0117, -0.8790]) tensor([0.1740, 0.1501, 0.4759, 0.1999]) -Greedy action tensor([-0.1826, -0.4362, -0.0014, -0.0751]) tensor([0.2446, 0.1898, 0.2932, 0.2724]) -Greedy action tensor([ 0.3460, -0.8434, -0.6188, -0.7650]) tensor([0.4963, 0.1511, 0.1891, 0.1634]) -Greedy action tensor([-0.2988, -0.6656, 0.3165, -0.9071]) tensor([0.2447, 0.1695, 0.4527, 0.1332]) -Greedy action tensor([ 1.0936, 0.2583, -0.8855, 0.1722]) tensor([0.5076, 0.2202, 0.0702, 0.2020]) -Greedy action tensor([ 0.0054, 0.6693, 0.6271, -0.4949]) tensor([0.1848, 0.3590, 0.3441, 0.1121]) -Greedy action tensor([-0.5153, -0.7181, 0.3820, 0.4067]) tensor([0.1474, 0.1204, 0.3616, 0.3706]) -Greedy action tensor([ 0.1214, -2.2327, 0.0707, -0.0935]) tensor([0.3506, 0.0333, 0.3333, 0.2828]) -Greedy action tensor([ 1.2132, -1.7130, 0.2297, 1.0293]) tensor([0.4425, 0.0237, 0.1655, 0.3682]) -Greedy action tensor([-0.0097, -1.3892, 0.1877, -0.4975]) tensor([0.3243, 0.0816, 0.3950, 0.1991]) -Greedy action tensor([ 0.4615, 0.4935, -0.4484, -0.4094]) tensor([0.3504, 0.3618, 0.1411, 0.1467]) -Greedy action tensor([-0.0191, -0.2628, -0.8040, 0.6431]) tensor([0.2393, 0.1875, 0.1092, 0.4640]) -Greedy action tensor([ 0.6356, -0.5620, 0.5623, 0.2129]) tensor([0.3464, 0.1046, 0.3220, 0.2270]) -Greedy action tensor([-0.5396, 0.0348, -0.2794, -1.2991]) tensor([0.2202, 0.3911, 0.2857, 0.1030]) -Greedy action tensor([ 0.9370, 0.2201, 0.4109, -0.6690]) tensor([0.4386, 0.2142, 0.2592, 0.0880]) -Greedy action tensor([ 1.3624, -0.7224, -0.3414, 1.2574]) tensor([0.4532, 0.0563, 0.0825, 0.4080]) -Greedy action tensor([ 0.8429, 0.3436, -0.6654, 0.9990]) tensor([0.3337, 0.2025, 0.0738, 0.3900]) -Greedy action tensor([ 0.0505, -1.3720, 0.4993, -1.2119]) tensor([0.3236, 0.0780, 0.5069, 0.0916]) -Greedy action tensor([-1.3936, -0.3047, -0.2262, -0.0420]) tensor([0.0905, 0.2689, 0.2909, 0.3497]) -Greedy action tensor([-0.0514, -0.3927, -0.7955, 0.2938]) tensor([0.2779, 0.1976, 0.1321, 0.3925]) -Greedy action tensor([ 0.4226, -0.8200, -1.1210, 0.0369]) tensor([0.4583, 0.1323, 0.0979, 0.3116]) -Greedy action tensor([-0.0689, -0.9884, -0.1976, -0.6971]) tensor([0.3557, 0.1418, 0.3127, 0.1898]) -Greedy action tensor([ 1.1031, -0.2436, 1.3052, 0.7846]) tensor([0.3114, 0.0810, 0.3812, 0.2265]) -Greedy action tensor([0.9659, 0.4590, 1.0958, 0.0376]) tensor([0.3188, 0.1921, 0.3631, 0.1260]) -Greedy action tensor([ 0.7401, -0.6789, 0.7534, -0.8703]) tensor([0.4073, 0.0986, 0.4128, 0.0814]) -Greedy action tensor([ 0.5631, -0.5599, -0.3699, -0.4169]) tensor([0.4776, 0.1554, 0.1879, 0.1792]) -Greedy action tensor([-0.1799, -1.3303, 0.3773, -0.3282]) tensor([0.2548, 0.0807, 0.4449, 0.2197]) -Greedy action tensor([ 0.1987, -1.3073, 0.3558, 0.0926]) tensor([0.3038, 0.0674, 0.3555, 0.2733]) -Greedy action tensor([ 0.2636, 0.5002, -0.2144, -0.2918]) tensor([0.2890, 0.3661, 0.1792, 0.1658]) -Greedy action tensor([ 3.7229e-04, 4.6161e-03, -2.5432e-01, -5.9702e-01]) tensor([0.3003, 0.3016, 0.2328, 0.1653]) -Greedy action tensor([ 0.9700, -0.6834, 0.4290, 0.3455]) tensor([0.4331, 0.0829, 0.2521, 0.2319]) -Greedy action tensor([ 0.5114, -0.6072, -0.7214, 0.1092]) tensor([0.4372, 0.1429, 0.1274, 0.2924]) -Greedy action tensor([-1.2528, -0.4175, 0.3969, -0.0663]) tensor([0.0848, 0.1956, 0.4416, 0.2779]) -Greedy action tensor([-1.7137, -0.3570, 0.6893, 0.1306]) tensor([0.0449, 0.1744, 0.4966, 0.2840]) -Greedy action tensor([-1.9248, -0.4083, 0.6529, -0.1676]) tensor([0.0408, 0.1858, 0.5370, 0.2364]) -Greedy action tensor([-1.6778, -0.4591, 0.5298, -0.0533]) tensor([0.0539, 0.1823, 0.4902, 0.2736]) -Greedy action tensor([-1.9275, -0.3518, 0.6407, -0.1683]) tensor([0.0405, 0.1958, 0.5284, 0.2353]) -Greedy action tensor([-1.8042, -0.2231, 0.5824, -0.0874]) tensor([0.0448, 0.2179, 0.4877, 0.2496]) -Greedy action tensor([0.4307, 1.1306, 0.1610, 0.8670]) tensor([0.1878, 0.3782, 0.1434, 0.2906]) -Greedy action tensor([-1.7466, -0.3867, 0.6068, -0.0865]) tensor([0.0484, 0.1884, 0.5088, 0.2544]) -Greedy action tensor([-1.9288, -0.4469, 0.6605, -0.1720]) tensor([0.0408, 0.1795, 0.5433, 0.2363]) -Greedy action tensor([-1.8373, -0.3146, 0.6039, -0.0972]) tensor([0.0439, 0.2013, 0.5045, 0.2502]) -Greedy action tensor([-1.8975, -0.3764, 0.6337, -0.1476]) tensor([0.0418, 0.1915, 0.5259, 0.2408]) -Greedy action tensor([-1.3910, 0.0324, 0.3872, -0.0877]) tensor([0.0678, 0.2814, 0.4012, 0.2496]) -Greedy action tensor([-1.8965, -0.4333, 0.6436, -0.1581]) tensor([0.0422, 0.1824, 0.5353, 0.2401]) -Greedy action tensor([-1.7864, -0.4743, 0.6658, 0.0615]) tensor([0.0441, 0.1638, 0.5122, 0.2799]) -Greedy action tensor([-1.8956, -0.4369, 0.6425, -0.1541]) tensor([0.0423, 0.1817, 0.5349, 0.2411]) -Greedy action tensor([-1.9170, -0.4135, 0.6530, -0.1646]) tensor([0.0411, 0.1848, 0.5370, 0.2371]) -Greedy action tensor([-1.9168, -0.4218, 0.6526, -0.1616]) tensor([0.0411, 0.1835, 0.5373, 0.2380]) -Greedy action tensor([-1.0544, 0.9501, 0.2078, 0.1792]) tensor([0.0650, 0.4823, 0.2296, 0.2231]) -Greedy action tensor([-1.8701, -0.4504, 0.6334, -0.1433]) tensor([0.0435, 0.1800, 0.5319, 0.2446]) -Greedy action tensor([-1.9192, -0.4116, 0.6554, -0.1663]) tensor([0.0410, 0.1850, 0.5377, 0.2364]) -Greedy action tensor([-1.9308, -0.4464, 0.6623, -0.1722]) tensor([0.0407, 0.1794, 0.5438, 0.2361]) -Greedy action tensor([-0.9998, 0.8871, 0.1155, 0.2360]) tensor([0.0710, 0.4683, 0.2165, 0.2442]) -Greedy action tensor([-1.6658, -0.3886, 0.5103, -0.0555]) tensor([0.0543, 0.1949, 0.4788, 0.2719]) -Greedy action tensor([-1.8125, -0.2280, 0.5645, -0.1057]) tensor([0.0451, 0.2201, 0.4861, 0.2487]) -Greedy action tensor([-1.4757, -0.5728, 0.6079, -0.0132]) tensor([0.0632, 0.1559, 0.5079, 0.2729]) -Greedy action tensor([-1.1543, -0.2153, 0.1839, 0.2389]) tensor([0.0877, 0.2244, 0.3345, 0.3534]) -Greedy action tensor([-1.3485, -0.5545, 0.3614, 0.1982]) tensor([0.0744, 0.1646, 0.4115, 0.3495]) -Greedy action tensor([-0.7404, -0.6240, 1.1619, 1.4702]) tensor([0.0557, 0.0626, 0.3734, 0.5083]) -Greedy action tensor([-1.8569, -0.3786, 0.6091, -0.1542]) tensor([0.0441, 0.1936, 0.5199, 0.2423]) -Greedy action tensor([-1.9114, -0.4559, 0.6560, -0.1656]) tensor([0.0416, 0.1782, 0.5419, 0.2383]) -Greedy action tensor([-1.9457, -0.4506, 0.6678, -0.1812]) tensor([0.0401, 0.1788, 0.5471, 0.2341]) -Greedy action tensor([-1.1266, 0.6158, 0.1652, 0.0746]) tensor([0.0731, 0.4176, 0.2661, 0.2431]) -Greedy action tensor([-1.7627, 0.1270, 0.5128, -0.0751]) tensor([0.0439, 0.2908, 0.4277, 0.2376]) -Greedy action tensor([-1.8018, 0.4314, 0.3453, -0.1522]) tensor([0.0415, 0.3872, 0.3553, 0.2160]) -Greedy action tensor([-1.5683, -0.4696, 0.5010, 0.0504]) tensor([0.0589, 0.1768, 0.4668, 0.2974]) -Greedy action tensor([-1.9105, -0.4610, 0.6603, -0.1538]) tensor([0.0414, 0.1766, 0.5419, 0.2401]) -Greedy action tensor([-1.9315, -0.4401, 0.6625, -0.1736]) tensor([0.0406, 0.1804, 0.5434, 0.2355]) -Greedy action tensor([-1.8961, -0.2904, 0.6169, -0.1498]) tensor([0.0416, 0.2071, 0.5130, 0.2383]) -Greedy action tensor([-1.7703, 0.0433, 0.5085, -0.0484]) tensor([0.0445, 0.2726, 0.4341, 0.2488]) -Greedy action tensor([-1.5993, 0.0395, 0.4385, -0.0679]) tensor([0.0542, 0.2791, 0.4160, 0.2507]) -Greedy action tensor([-0.1389, 0.4197, -0.0948, 0.0744]) tensor([0.1988, 0.3475, 0.2077, 0.2460]) -Greedy action tensor([-1.9350, -0.4203, 0.6558, -0.1743]) tensor([0.0405, 0.1841, 0.5400, 0.2354]) -Greedy action tensor([-1.8666, -0.3469, 0.6291, -0.0985]) tensor([0.0424, 0.1940, 0.5148, 0.2487]) -Greedy action tensor([-1.8299, -0.3152, 0.5930, -0.1374]) tensor([0.0449, 0.2043, 0.5067, 0.2441]) -Greedy action tensor([-1.4288, -0.4044, 0.5759, 0.5625]) tensor([0.0540, 0.1503, 0.4005, 0.3952]) -Greedy action tensor([-1.6687, -0.4935, 0.5306, -0.0354]) tensor([0.0544, 0.1762, 0.4907, 0.2786]) -Greedy action tensor([-1.8971, -0.3797, 0.6325, -0.1519]) tensor([0.0420, 0.1913, 0.5265, 0.2403]) -Greedy action tensor([-1.8417, -0.4384, 0.6183, -0.1640]) tensor([0.0452, 0.1839, 0.5290, 0.2419]) -Greedy action tensor([-1.9145, -0.3957, 0.6507, -0.1638]) tensor([0.0411, 0.1877, 0.5345, 0.2367]) -Greedy action tensor([-1.9248, -0.4226, 0.6568, -0.1700]) tensor([0.0408, 0.1834, 0.5397, 0.2361]) -Greedy action tensor([-1.8335, -0.4897, 0.6060, -0.1234]) tensor([0.0458, 0.1756, 0.5253, 0.2533]) -Greedy action tensor([-1.3840, 0.1094, 0.5191, -0.5793]) tensor([0.0695, 0.3093, 0.4659, 0.1553]) -Greedy action tensor([-1.7049, -0.5260, 0.5376, -0.0543]) tensor([0.0530, 0.1722, 0.4988, 0.2760]) -Greedy action tensor([-1.9438, -0.4512, 0.6713, -0.1765]) tensor([0.0400, 0.1781, 0.5474, 0.2344]) -Greedy action tensor([-1.8877, -0.4541, 0.6273, -0.1499]) tensor([0.0430, 0.1804, 0.5320, 0.2446]) -Greedy action tensor([-1.9110, -0.4140, 0.6529, -0.1622]) tensor([0.0413, 0.1846, 0.5366, 0.2375]) -Greedy action tensor([-0.9666, 0.0360, 0.1162, -0.0463]) tensor([0.1088, 0.2966, 0.3214, 0.2732]) -Greedy action tensor([-1.8914, -0.3776, 0.6334, -0.1507]) tensor([0.0421, 0.1915, 0.5262, 0.2402]) -Greedy action tensor([-1.9231, -0.3473, 0.6392, -0.1704]) tensor([0.0407, 0.1968, 0.5277, 0.2348]) -Greedy action tensor([-0.1198, 1.1771, 0.0253, 0.3913]) tensor([0.1337, 0.4889, 0.1545, 0.2228]) -Greedy action tensor([-1.9028, -0.3644, 0.6451, -0.1483]) tensor([0.0413, 0.1923, 0.5277, 0.2387]) -Greedy action tensor([-0.6708, 0.6551, 0.2142, 0.4280]) tensor([0.0981, 0.3696, 0.2378, 0.2945]) -Greedy action tensor([-1.5432, -0.4968, 0.4607, 0.0025]) tensor([0.0627, 0.1784, 0.4649, 0.2940]) -Greedy action tensor([-1.8663, -0.4476, 0.6302, -0.1451]) tensor([0.0437, 0.1807, 0.5310, 0.2446]) -Greedy action tensor([-1.4051, -0.3301, 0.6957, 0.4202]) tensor([0.0546, 0.1600, 0.4464, 0.3389]) -Greedy action tensor([-0.8094, 0.8868, 0.0918, 0.1330]) tensor([0.0871, 0.4749, 0.2145, 0.2235]) -Greedy action tensor([-1.8853, -0.4514, 0.6412, -0.1491]) tensor([0.0428, 0.1794, 0.5351, 0.2428]) -Greedy action tensor([-1.4773, 0.2937, 0.2786, 0.0908]) tensor([0.0573, 0.3365, 0.3315, 0.2747]) -Greedy action tensor([-1.6161, -0.5236, 0.5087, -0.0699]) tensor([0.0587, 0.1749, 0.4911, 0.2753]) -Greedy action tensor([-1.0392, 0.5135, 0.2667, -0.1738]) tensor([0.0848, 0.4007, 0.3130, 0.2015]) -Greedy action tensor([-1.9171, -0.4313, 0.6509, -0.1682]) tensor([0.0413, 0.1825, 0.5387, 0.2375]) -Greedy action tensor([-1.9262, -0.4452, 0.6601, -0.1701]) tensor([0.0409, 0.1797, 0.5428, 0.2366]) -Greedy action tensor([-1.8531, -0.3840, 0.6109, -0.1453]) tensor([0.0442, 0.1922, 0.5197, 0.2440]) -Greedy action tensor([-1.6613, 0.0902, 0.4625, 0.1344]) tensor([0.0473, 0.2725, 0.3954, 0.2848]) -Greedy action tensor([-1.9360, -0.4382, 0.6614, -0.1766]) tensor([0.0405, 0.1810, 0.5435, 0.2351]) -Greedy action tensor([-1.8896, -0.4838, 0.6852, -0.1349]) tensor([0.0417, 0.1700, 0.5473, 0.2410]) -Greedy action tensor([-1.9092, -0.4159, 0.6543, -0.1432]) tensor([0.0412, 0.1833, 0.5347, 0.2408]) -Greedy action tensor([-1.1188, 0.2358, 0.2163, 0.0165]) tensor([0.0848, 0.3287, 0.3224, 0.2640]) -Greedy action tensor([-1.9225, -0.4389, 0.6570, -0.1689]) tensor([0.0410, 0.1809, 0.5412, 0.2369]) -Greedy action tensor([-1.8973, -0.4381, 0.6486, -0.1542]) tensor([0.0421, 0.1810, 0.5365, 0.2404]) -Greedy action tensor([-1.6573, -0.3357, 0.5696, 0.0196]) tensor([0.0516, 0.1936, 0.4786, 0.2762]) -Greedy action tensor([ 0.5809, -0.3500, 0.0288, -0.2621]) tensor([0.4166, 0.1642, 0.2398, 0.1793]) -Greedy action tensor([ 0.4290, 0.5123, -0.5125, 0.0038]) tensor([0.3194, 0.3472, 0.1246, 0.2088]) -Greedy action tensor([ 0.4463, 0.2178, -0.0646, -0.4490]) tensor([0.3566, 0.2838, 0.2140, 0.1457]) -Greedy action tensor([ 0.3151, -0.1873, 0.0096, -0.4005]) tensor([0.3533, 0.2138, 0.2603, 0.1727]) -Greedy action tensor([ 0.9931, -0.4514, -0.0109, -0.2032]) tensor([0.5250, 0.1238, 0.1924, 0.1587]) -Greedy action tensor([ 1.1144, -0.7140, 0.1313, -0.7312]) tensor([0.5908, 0.0949, 0.2210, 0.0933]) -Greedy action tensor([ 0.6542, -0.3933, 0.0618, -0.1281]) tensor([0.4235, 0.1486, 0.2342, 0.1937]) -Greedy action tensor([ 0.8742, -0.4960, -0.1185, -0.3877]) tensor([0.5242, 0.1332, 0.1942, 0.1484]) -Greedy action tensor([ 0.8126, -0.4464, 0.0087, -0.4096]) tensor([0.4936, 0.1401, 0.2209, 0.1454]) -Greedy action tensor([ 1.0250, -0.3564, -0.0457, -0.3714]) tensor([0.5430, 0.1364, 0.1861, 0.1344]) -Greedy action tensor([ 0.4561, -0.0548, -0.0804, 0.0273]) tensor([0.3526, 0.2116, 0.2062, 0.2296]) -Greedy action tensor([ 0.8127, -0.6079, -0.0050, -0.4170]) tensor([0.5062, 0.1223, 0.2235, 0.1480]) -Greedy action tensor([ 1.1294, -0.7766, -0.1562, -0.6283]) tensor([0.6259, 0.0931, 0.1731, 0.1079]) -Greedy action tensor([ 0.7395, -0.4752, 0.1634, -0.3161]) tensor([0.4531, 0.1345, 0.2547, 0.1577]) -Greedy action tensor([ 0.8434, -0.5547, 0.0136, -0.4370]) tensor([0.5099, 0.1260, 0.2224, 0.1417]) -Greedy action tensor([ 0.5869, 0.2962, -0.0930, -0.0210]) tensor([0.3573, 0.2672, 0.1810, 0.1945]) -Greedy action tensor([ 0.8883, -0.6716, 0.0299, -0.5411]) tensor([0.5338, 0.1122, 0.2262, 0.1278]) -Greedy action tensor([ 1.1458, -0.6312, -0.0894, -0.5735]) tensor([0.6101, 0.1032, 0.1774, 0.1093]) -Greedy action tensor([ 0.7369, -0.3202, 0.1130, -0.2916]) tensor([0.4463, 0.1551, 0.2391, 0.1595]) -Greedy action tensor([ 0.2763, 0.0416, -0.1595, -0.1142]) tensor([0.3211, 0.2539, 0.2077, 0.2173]) -Greedy action tensor([ 0.2942, -0.0320, -0.1003, -0.1849]) tensor([0.3317, 0.2394, 0.2236, 0.2054]) -Greedy action tensor([ 0.4950, -0.4286, -0.1162, -0.0366]) tensor([0.3957, 0.1571, 0.2147, 0.2325]) -Greedy action tensor([ 0.4470, -0.0661, 0.0142, -0.0108]) tensor([0.3472, 0.2079, 0.2252, 0.2197]) -Greedy action tensor([ 0.5934, -0.3728, -0.0030, -0.2791]) tensor([0.4257, 0.1620, 0.2345, 0.1779]) -Greedy action tensor([ 0.6772, -0.4884, -0.0251, -0.3931]) tensor([0.4651, 0.1450, 0.2304, 0.1595]) -Greedy action tensor([ 1.0486, -0.1456, -0.0348, -0.5451]) tensor([0.5421, 0.1642, 0.1835, 0.1101]) -Greedy action tensor([ 0.8165, -0.6644, 0.0046, -0.3061]) tensor([0.5008, 0.1139, 0.2224, 0.1630]) -Greedy action tensor([ 0.9352, 0.0365, -0.1403, 0.1255]) tensor([0.4560, 0.1856, 0.1555, 0.2029]) -Greedy action tensor([ 0.4592, -0.2519, -0.1260, -0.2143]) tensor([0.3909, 0.1920, 0.2177, 0.1993]) -Greedy action tensor([ 1.0606, -0.9586, 0.1883, -0.6355]) tensor([0.5767, 0.0766, 0.2410, 0.1058]) -Greedy action tensor([ 0.5708, -0.4150, -0.0631, -0.2936]) tensor([0.4301, 0.1605, 0.2282, 0.1812]) -Greedy action tensor([ 0.5375, -0.2714, -0.1104, -0.2610]) tensor([0.4135, 0.1841, 0.2163, 0.1861]) -Greedy action tensor([ 0.9017, -0.3826, 0.0152, -0.4283]) tensor([0.5119, 0.1417, 0.2110, 0.1354]) -Greedy action tensor([ 0.8442, -0.5420, -0.1328, -0.2739]) tensor([0.5119, 0.1280, 0.1927, 0.1673]) -Greedy action tensor([ 0.2599, -0.0317, -0.0798, -0.2122]) tensor([0.3244, 0.2423, 0.2310, 0.2023]) -Greedy action tensor([ 0.6718, -0.4908, -0.0793, -0.5655]) tensor([0.4820, 0.1507, 0.2274, 0.1399]) -Greedy action tensor([ 0.6513, -0.0816, -0.0820, -0.0151]) tensor([0.4041, 0.1942, 0.1941, 0.2076]) -Greedy action tensor([ 0.4902, -0.1754, 0.0251, -0.1843]) tensor([0.3771, 0.1938, 0.2369, 0.1921]) -Greedy action tensor([ 0.4743, -0.2268, -0.0507, -0.0772]) tensor([0.3754, 0.1862, 0.2221, 0.2163]) -Greedy action tensor([ 1.0671, -0.5825, 0.1064, -0.5066]) tensor([0.5612, 0.1078, 0.2147, 0.1163]) -Greedy action tensor([ 1.1461, -0.3976, -0.3834, -0.6028]) tensor([0.6234, 0.1331, 0.1350, 0.1084]) -Greedy action tensor([ 1.2540, -0.5595, 0.2301, -0.6129]) tensor([0.5963, 0.0973, 0.2142, 0.0922]) -Greedy action tensor([ 0.7725, -0.3688, -0.0714, -0.1428]) tensor([0.4652, 0.1486, 0.2000, 0.1862]) -Greedy action tensor([ 0.9171, -0.4974, -0.1838, -0.2611]) tensor([0.5309, 0.1290, 0.1766, 0.1634]) -Greedy action tensor([ 0.6802, -0.5094, -0.1300, -0.2011]) tensor([0.4622, 0.1407, 0.2056, 0.1915]) -Greedy action tensor([ 0.8602, -0.3053, -0.0691, -0.1272]) tensor([0.4810, 0.1499, 0.1899, 0.1792]) -Greedy action tensor([ 0.9545, -0.3520, -0.0864, -0.3567]) tensor([0.5281, 0.1430, 0.1865, 0.1423]) -Greedy action tensor([ 1.0451, -1.1151, 0.0757, -0.6423]) tensor([0.5954, 0.0686, 0.2258, 0.1101]) -Greedy action tensor([ 0.8793, -0.5857, -0.0382, -0.5207]) tensor([0.5327, 0.1231, 0.2128, 0.1314]) -Greedy action tensor([ 0.3577, -0.0864, -0.0842, -0.0759]) tensor([0.3410, 0.2187, 0.2192, 0.2210]) -Greedy action tensor([ 0.8292, -0.1797, -0.0387, -0.0602]) tensor([0.4555, 0.1661, 0.1912, 0.1872]) -Greedy action tensor([ 0.8364, -0.8079, -0.0656, -0.8378]) tensor([0.5598, 0.1081, 0.2271, 0.1049]) -Greedy action tensor([ 0.6129, -0.5409, -0.0256, -0.2415]) tensor([0.4407, 0.1390, 0.2327, 0.1875]) -Greedy action tensor([ 0.5702, -0.4089, 0.0590, -0.5861]) tensor([0.4367, 0.1640, 0.2619, 0.1374]) -Greedy action tensor([ 1.1326, -0.5785, 0.0703, -0.5533]) tensor([0.5843, 0.1056, 0.2019, 0.1082]) -Greedy action tensor([ 3.2254e-01, -7.4047e-02, 2.7898e-04, -3.3259e-01]) tensor([0.3429, 0.2306, 0.2484, 0.1781]) -Greedy action tensor([ 0.4284, -0.0710, 0.0454, -0.2513]) tensor([0.3577, 0.2171, 0.2439, 0.1813]) -Greedy action tensor([ 0.9599, -0.2283, -0.0361, -0.3810]) tensor([0.5166, 0.1574, 0.1908, 0.1352]) -Greedy action tensor([ 1.1451, -0.4267, 0.0415, -0.2624]) tensor([0.5605, 0.1164, 0.1859, 0.1372]) -Greedy action tensor([ 0.9765, -0.5584, -0.0368, -0.5306]) tensor([0.5555, 0.1197, 0.2017, 0.1231]) -Greedy action tensor([ 0.8760, -0.5925, -0.0112, -0.4696]) tensor([0.5256, 0.1210, 0.2165, 0.1369]) -Greedy action tensor([ 0.8164, -0.3288, 0.0438, -0.4366]) tensor([0.4841, 0.1540, 0.2236, 0.1383]) -Greedy action tensor([ 0.9424, -0.8109, 0.1573, -0.5009]) tensor([0.5361, 0.0928, 0.2445, 0.1266]) -Greedy action tensor([ 0.7881, -0.6452, 0.0301, -0.3578]) tensor([0.4938, 0.1178, 0.2314, 0.1570]) -Greedy action tensor([ 0.7725, -0.6552, -0.1761, -0.5283]) tensor([0.5265, 0.1263, 0.2039, 0.1434]) -Greedy action tensor([ 0.6513, -0.3041, -0.0440, -0.3043]) tensor([0.4409, 0.1696, 0.2200, 0.1696]) -Greedy action tensor([ 0.9096, -0.3197, -0.0879, -0.2749]) tensor([0.5083, 0.1487, 0.1875, 0.1555]) -Greedy action tensor([ 0.8442, -0.5589, 0.1181, -0.2611]) tensor([0.4853, 0.1193, 0.2348, 0.1607]) -Greedy action tensor([ 0.2894, -0.2386, 0.0022, -0.0907]) tensor([0.3307, 0.1950, 0.2481, 0.2261]) -Greedy action tensor([ 0.8378, -0.7847, -0.0549, -0.6595]) tensor([0.5462, 0.1078, 0.2237, 0.1222]) -Greedy action tensor([ 0.9736, -0.6916, -0.0040, -0.5381]) tensor([0.5599, 0.1059, 0.2107, 0.1235]) -Greedy action tensor([ 0.4648, -0.1488, -0.1522, -0.3712]) tensor([0.3977, 0.2153, 0.2146, 0.1724]) -Greedy action tensor([ 0.6377, -0.3246, -0.1207, -0.1357]) tensor([0.4326, 0.1652, 0.2026, 0.1996]) -Greedy action tensor([ 0.9077, -0.6837, 0.0407, -0.4181]) tensor([0.5293, 0.1078, 0.2224, 0.1406]) -Greedy action tensor([ 0.9086, -0.5507, -0.1330, -0.1917]) tensor([0.5214, 0.1212, 0.1840, 0.1735]) -Greedy action tensor([ 1.0139, -0.3811, -0.0816, -0.5820]) tensor([0.5603, 0.1388, 0.1873, 0.1136]) -Greedy action tensor([ 1.1480, -0.6692, 0.0337, -0.3199]) tensor([0.5811, 0.0944, 0.1907, 0.1339]) -Greedy action tensor([ 0.9751, -0.5390, -0.0192, -0.4514]) tensor([0.5464, 0.1202, 0.2022, 0.1312]) -Greedy action tensor([ 0.9719, -0.5456, -0.0784, -0.2783]) tensor([0.5389, 0.1182, 0.1885, 0.1544]) -Greedy action tensor([ 0.4485, -0.2561, -0.0713, -0.0852]) tensor([0.3738, 0.1848, 0.2223, 0.2192]) -Greedy action tensor([ 0.4657, -0.3030, -0.0755, -0.1184]) tensor([0.3841, 0.1781, 0.2236, 0.2142]) -Greedy action tensor([ 1.8454, -0.6661, -0.8853, 0.8399]) tensor([0.6613, 0.0537, 0.0431, 0.2419]) -Greedy action tensor([ 0.5327, -0.0719, -0.0458, -0.0718]) tensor([0.3769, 0.2059, 0.2113, 0.2059]) -Greedy action tensor([ 1.3291, -0.5777, -0.5271, 0.5036]) tensor([0.5738, 0.0852, 0.0897, 0.2513]) -Greedy action tensor([ 1.5846, -0.3883, -0.4109, 0.0111]) tensor([0.6746, 0.0938, 0.0917, 0.1399]) -Greedy action tensor([ 1.1136, -0.3789, -0.8345, 0.1252]) tensor([0.5749, 0.1292, 0.0819, 0.2140]) -Greedy action tensor([ 1.4143, -0.0530, -0.3625, 0.5618]) tensor([0.5476, 0.1263, 0.0926, 0.2335]) -Greedy action tensor([ 1.3578, -0.0238, -0.4250, 0.5080]) tensor([0.5415, 0.1360, 0.0911, 0.2315]) -Greedy action tensor([ 1.5862, -0.2643, -0.5931, 0.0683]) tensor([0.6714, 0.1055, 0.0759, 0.1472]) -Greedy action tensor([ 1.9890, 0.1198, -0.0252, 0.2396]) tensor([0.6842, 0.1055, 0.0913, 0.1190]) -Greedy action tensor([ 1.5781, -0.6813, -0.4249, 0.4012]) tensor([0.6462, 0.0675, 0.0872, 0.1992]) -Greedy action tensor([ 1.3368, -0.3107, -0.4876, 0.1955]) tensor([0.5976, 0.1151, 0.0964, 0.1909]) -Greedy action tensor([ 1.7262, -0.4455, 0.0720, 0.0577]) tensor([0.6694, 0.0763, 0.1280, 0.1262]) -Greedy action tensor([ 1.8272, 0.0071, -0.8958, 0.0776]) tensor([0.7135, 0.1156, 0.0469, 0.1240]) -Greedy action tensor([ 1.5441, -0.4500, -0.5611, 0.4596]) tensor([0.6265, 0.0853, 0.0763, 0.2118]) -Greedy action tensor([ 2.2251, -1.2484, -0.0076, 0.9778]) tensor([0.7015, 0.0218, 0.0752, 0.2015]) -Greedy action tensor([ 1.2769, 0.1111, -0.2929, 0.2727]) tensor([0.5302, 0.1653, 0.1103, 0.1942]) -Greedy action tensor([ 1.9867, 0.3190, -0.3467, 0.3467]) tensor([0.6758, 0.1275, 0.0655, 0.1311]) -Greedy action tensor([ 1.7690, -1.1594, -0.4572, 0.2485]) tensor([0.7246, 0.0388, 0.0782, 0.1584]) -Greedy action tensor([ 2.3955, 0.3439, -0.0786, 0.2305]) tensor([0.7533, 0.0968, 0.0635, 0.0864]) -Greedy action tensor([ 1.3639, -0.2866, -0.8065, 0.4623]) tensor([0.5841, 0.1121, 0.0667, 0.2371]) -Greedy action tensor([ 1.1911, -0.3437, -0.1489, -0.3138]) tensor([0.5884, 0.1268, 0.1541, 0.1307]) -Greedy action tensor([ 1.5441, -0.0594, -0.7005, 0.3253]) tensor([0.6239, 0.1255, 0.0661, 0.1844]) -Greedy action tensor([ 1.2547, -0.4494, -0.6770, 0.5701]) tensor([0.5461, 0.0994, 0.0791, 0.2754]) -Greedy action tensor([ 2.0083, -0.9454, -0.1714, 0.4887]) tensor([0.7225, 0.0377, 0.0817, 0.1581]) -Greedy action tensor([ 1.6323, -0.5023, -0.4572, 0.2290]) tensor([0.6721, 0.0795, 0.0832, 0.1652]) -Greedy action tensor([ 0.9622, -0.1793, -0.8650, 0.2797]) tensor([0.5036, 0.1608, 0.0810, 0.2545]) -Greedy action tensor([ 2.1934, -0.0764, -0.0088, 0.3728]) tensor([0.7268, 0.0751, 0.0804, 0.1177]) -Greedy action tensor([ 1.8639, -1.3212, -0.1964, 0.2261]) tensor([0.7336, 0.0304, 0.0935, 0.1426]) -Greedy action tensor([ 1.9642, -1.0463, -0.7222, 0.0612]) tensor([0.7896, 0.0389, 0.0538, 0.1177]) -Greedy action tensor([ 1.1092, -0.5812, -0.0918, 0.3706]) tensor([0.5094, 0.0940, 0.1533, 0.2434]) -Greedy action tensor([ 1.7047, -0.4905, -0.6182, 0.5217]) tensor([0.6598, 0.0735, 0.0646, 0.2021]) -Greedy action tensor([ 1.4352, 0.1060, -0.6572, 0.0025]) tensor([0.6147, 0.1627, 0.0758, 0.1467]) -Greedy action tensor([ 1.7675, -0.6248, 0.2333, -0.1792]) tensor([0.6898, 0.0631, 0.1487, 0.0985]) -Greedy action tensor([ 2.8165, -0.9146, -0.3824, 0.3506]) tensor([0.8698, 0.0208, 0.0355, 0.0739]) -Greedy action tensor([ 1.5240, -0.6698, -0.1510, 0.2950]) tensor([0.6284, 0.0701, 0.1177, 0.1839]) -Greedy action tensor([ 1.4596, -0.5936, -0.1290, 0.1168]) tensor([0.6275, 0.0805, 0.1281, 0.1638]) -Greedy action tensor([ 1.9381, -0.5410, -0.3296, 0.7161]) tensor([0.6748, 0.0566, 0.0699, 0.1988]) -Greedy action tensor([ 1.8611, 0.2475, 0.5453, -0.1024]) tensor([0.6220, 0.1239, 0.1669, 0.0873]) -Greedy action tensor([ 1.4643, -0.8439, 0.0473, 0.3640]) tensor([0.5972, 0.0594, 0.1448, 0.1987]) -Greedy action tensor([ 1.3158, -0.5816, -0.4403, 0.6962]) tensor([0.5374, 0.0806, 0.0928, 0.2892]) -Greedy action tensor([ 1.8129, -0.3870, -0.4798, 0.4149]) tensor([0.6855, 0.0760, 0.0692, 0.1694]) -Greedy action tensor([ 1.4647, -0.5942, -0.4915, 0.4039]) tensor([0.6191, 0.0790, 0.0875, 0.2143]) -Greedy action tensor([ 1.6231, -0.0550, -0.3157, 0.2652]) tensor([0.6298, 0.1176, 0.0906, 0.1620]) -Greedy action tensor([ 1.3403, -0.2726, 0.1435, 0.4655]) tensor([0.5213, 0.1039, 0.1575, 0.2173]) -Greedy action tensor([ 1.1297, -0.4743, -0.3403, 0.4696]) tensor([0.5134, 0.1032, 0.1180, 0.2653]) -Greedy action tensor([ 1.2825, -0.5690, -0.2034, 0.3104]) tensor([0.5677, 0.0891, 0.1285, 0.2147]) -Greedy action tensor([ 1.1063, -0.5797, -0.1742, 0.2127]) tensor([0.5341, 0.0989, 0.1484, 0.2185]) -Greedy action tensor([ 1.0837, -0.4706, -0.0028, 0.2161]) tensor([0.5079, 0.1074, 0.1714, 0.2133]) -Greedy action tensor([ 1.5135, -0.1549, -0.6597, 0.2397]) tensor([0.6321, 0.1192, 0.0719, 0.1768]) -Greedy action tensor([ 1.6624, -0.6303, -0.6995, -0.0043]) tensor([0.7225, 0.0730, 0.0681, 0.1365]) -Greedy action tensor([ 1.4611, 0.1084, -0.9716, 0.4690]) tensor([0.5824, 0.1506, 0.0511, 0.2159]) -Greedy action tensor([ 1.4818, -0.2747, -0.7991, 0.4643]) tensor([0.6111, 0.1055, 0.0625, 0.2209]) -Greedy action tensor([ 1.3233, -0.6574, -0.2154, 0.4797]) tensor([0.5609, 0.0774, 0.1204, 0.2413]) -Greedy action tensor([ 1.5126, -0.5320, -0.8822, 0.4972]) tensor([0.6318, 0.0818, 0.0576, 0.2289]) -Greedy action tensor([ 1.4167, 0.0662, -0.8369, 0.2949]) tensor([0.5918, 0.1533, 0.0622, 0.1927]) -Greedy action tensor([ 1.0665, -0.5535, -0.1029, 0.1017]) tensor([0.5292, 0.1047, 0.1644, 0.2017]) -Greedy action tensor([ 1.8627, 0.4930, -0.4419, 0.3238]) tensor([0.6375, 0.1620, 0.0636, 0.1368]) -Greedy action tensor([ 1.5266, -0.4090, -0.7770, 0.4223]) tensor([0.6346, 0.0916, 0.0634, 0.2104]) -Greedy action tensor([ 1.5617, -0.1919, -0.7491, 0.7112]) tensor([0.5884, 0.1019, 0.0584, 0.2514]) -Greedy action tensor([ 1.5670, -0.8558, -0.2429, 0.0403]) tensor([0.6805, 0.0603, 0.1114, 0.1478]) -Greedy action tensor([ 1.6461, -0.3708, -0.2296, 0.3214]) tensor([0.6442, 0.0857, 0.0987, 0.1713]) -Greedy action tensor([ 1.1721, -0.3422, 0.0661, 0.2231]) tensor([0.5160, 0.1135, 0.1707, 0.1998]) -Greedy action tensor([ 1.5008, -0.5502, -0.5553, 0.6229]) tensor([0.5980, 0.0769, 0.0765, 0.2486]) -Greedy action tensor([ 1.4769, -0.4860, -0.3058, 0.4499]) tensor([0.6000, 0.0843, 0.1009, 0.2148]) -Greedy action tensor([ 1.0200, -0.5333, -0.2306, 0.0616]) tensor([0.5315, 0.1124, 0.1522, 0.2038]) -Greedy action tensor([ 1.1420, -0.4363, -0.3413, -0.1834]) tensor([0.5886, 0.1214, 0.1335, 0.1564]) -Greedy action tensor([ 1.9955, -0.5893, -0.3859, 0.0921]) tensor([0.7594, 0.0573, 0.0702, 0.1132]) -Greedy action tensor([ 1.0962, 0.1183, -0.3843, 0.0468]) tensor([0.5118, 0.1925, 0.1165, 0.1792]) -Greedy action tensor([ 3.0181, -1.9926, -0.1904, 0.6770]) tensor([0.8747, 0.0058, 0.0353, 0.0842]) -Greedy action tensor([ 1.8967, -0.2133, -0.6305, 0.6656]) tensor([0.6698, 0.0812, 0.0535, 0.1955]) -Greedy action tensor([ 1.7742, -0.4617, -0.3090, 0.3475]) tensor([0.6796, 0.0726, 0.0846, 0.1632]) -Greedy action tensor([ 1.5477, -0.5674, -0.3641, 0.3696]) tensor([0.6344, 0.0765, 0.0938, 0.1953]) -Greedy action tensor([ 1.4717, -0.7784, -0.2419, 0.2807]) tensor([0.6291, 0.0663, 0.1134, 0.1912]) -Greedy action tensor([ 1.3214, -0.3036, 0.0718, 0.5191]) tensor([0.5177, 0.1019, 0.1484, 0.2320]) -Greedy action tensor([ 1.5122, -0.9103, 0.0590, 0.1452]) tensor([0.6340, 0.0562, 0.1482, 0.1616]) -Greedy action tensor([ 2.0049, -1.0089, -0.0333, 0.5032]) tensor([0.7132, 0.0350, 0.0929, 0.1589]) -Greedy action tensor([ 1.2075, -0.3988, -0.3459, 0.3648]) tensor([0.5427, 0.1089, 0.1148, 0.2337]) -Greedy action tensor([ 1.4437, -0.8273, -0.0825, -0.1121]) tensor([0.6529, 0.0674, 0.1419, 0.1378]) -Greedy action tensor([ 1.8814, -1.0899, -0.2741, 0.1943]) tensor([0.7396, 0.0379, 0.0857, 0.1369]) -Greedy action tensor([2.3483, 0.7654, 0.0670, 0.3417]) tensor([0.6935, 0.1424, 0.0708, 0.0932]) -Greedy action tensor([ 1.9418, -0.1793, 0.0523, 0.3850]) tensor([0.6748, 0.0809, 0.1020, 0.1423]) -Greedy action tensor([-0.8095, 0.1153, -0.2902, -0.2932]) tensor([0.1454, 0.3666, 0.2444, 0.2437]) -Greedy action tensor([ 0.1604, -0.1127, 0.2090, -0.4953]) tensor([0.3003, 0.2285, 0.3153, 0.1559]) -Greedy action tensor([ 0.8872, -2.3671, 0.1024, -0.0614]) tensor([0.5313, 0.0205, 0.2424, 0.2058]) -Greedy action tensor([ 0.1331, -0.3816, 0.2870, 0.4037]) tensor([0.2454, 0.1467, 0.2862, 0.3217]) -Greedy action tensor([-0.0289, -0.3995, 1.1537, -0.3957]) tensor([0.1771, 0.1223, 0.5779, 0.1227]) -Greedy action tensor([ 1.0076, -1.9290, 0.1075, -0.2699]) tensor([0.5753, 0.0305, 0.2339, 0.1604]) -Greedy action tensor([-0.3228, -0.1257, 0.5237, -0.5467]) tensor([0.1870, 0.2277, 0.4359, 0.1495]) -Greedy action tensor([-0.2576, -0.3993, -1.0422, -0.9358]) tensor([0.3531, 0.3065, 0.1611, 0.1792]) -Greedy action tensor([-0.9384, -1.2103, 0.4560, -0.2270]) tensor([0.1277, 0.0973, 0.5149, 0.2601]) -Greedy action tensor([ 0.3701, -0.6917, 0.3221, -0.8442]) tensor([0.3852, 0.1332, 0.3672, 0.1144]) -Greedy action tensor([-1.0001, -0.7919, 0.6384, -0.7631]) tensor([0.1157, 0.1424, 0.5953, 0.1466]) -Greedy action tensor([-0.5501, -1.0773, 1.1920, -0.7918]) tensor([0.1237, 0.0730, 0.7062, 0.0971]) -Greedy action tensor([ 0.2660, -1.1393, -0.3112, 0.6220]) tensor([0.3092, 0.0758, 0.1736, 0.4414]) -Greedy action tensor([-1.5255, 0.3010, 0.5817, -0.7064]) tensor([0.0565, 0.3509, 0.4645, 0.1281]) -Greedy action tensor([-0.2226, 0.1409, 0.4494, -0.0631]) tensor([0.1796, 0.2582, 0.3516, 0.2106]) -Greedy action tensor([-1.4757, -0.7597, -0.5693, 0.1690]) tensor([0.0934, 0.1912, 0.2313, 0.4840]) -Greedy action tensor([ 0.6099, -0.7767, 1.2804, -0.8118]) tensor([0.2902, 0.0725, 0.5673, 0.0700]) -Greedy action tensor([ 0.2930, 0.4815, 0.4225, -0.4637]) tensor([0.2621, 0.3165, 0.2984, 0.1230]) -Greedy action tensor([-0.3357, 0.7679, 0.0773, 0.4977]) tensor([0.1278, 0.3852, 0.1931, 0.2940]) -Greedy action tensor([ 0.7190, -0.8616, -0.1662, 1.0701]) tensor([0.3290, 0.0677, 0.1358, 0.4675]) -Greedy action tensor([-0.1374, 0.3250, 0.6972, -0.6632]) tensor([0.1824, 0.2896, 0.4202, 0.1078]) -Greedy action tensor([-0.0891, 0.1013, 1.1162, 0.5011]) tensor([0.1360, 0.1645, 0.4540, 0.2454]) -Greedy action tensor([-0.6318, -1.0708, 0.3911, -0.0660]) tensor([0.1616, 0.1042, 0.4495, 0.2846]) -Greedy action tensor([-0.6513, -0.3234, 0.3495, -0.8752]) tensor([0.1693, 0.2350, 0.4605, 0.1353]) -Greedy action tensor([ 0.1222, -0.5144, 0.7973, -0.1387]) tensor([0.2345, 0.1241, 0.4607, 0.1807]) -Greedy action tensor([-1.0500, -0.6072, 0.5099, -0.6244]) tensor([0.1130, 0.1760, 0.5379, 0.1730]) -Greedy action tensor([ 0.9769, -1.3909, -0.2617, 0.6268]) tensor([0.4789, 0.0449, 0.1388, 0.3374]) -Greedy action tensor([ 1.0850, -0.8057, 0.5778, -0.7091]) tensor([0.5210, 0.0787, 0.3137, 0.0866]) -Greedy action tensor([ 1.1330, -1.2246, 0.3581, -0.7426]) tensor([0.5853, 0.0554, 0.2696, 0.0897]) -Greedy action tensor([ 0.1091, 0.6616, 0.4250, -0.6955]) tensor([0.2195, 0.3814, 0.3010, 0.0982]) -Greedy action tensor([ 1.2243, 0.5256, 0.2733, -0.2586]) tensor([0.4738, 0.2356, 0.1831, 0.1075]) -Greedy action tensor([ 0.4434, -0.0436, -0.2428, 0.0966]) tensor([0.3540, 0.2175, 0.1782, 0.2503]) -Greedy action tensor([-0.6066, 0.0742, 0.5504, -1.2634]) tensor([0.1498, 0.2960, 0.4765, 0.0777]) -Greedy action tensor([ 0.3076, -0.7931, -0.7151, 0.7340]) tensor([0.3102, 0.1032, 0.1115, 0.4751]) -Greedy action tensor([-1.2354, -0.8097, 0.4494, -0.3433]) tensor([0.0965, 0.1477, 0.5203, 0.2355]) -Greedy action tensor([0.3764, 0.6382, 0.0277, 0.4060]) tensor([0.2478, 0.3220, 0.1749, 0.2553]) -Greedy action tensor([ 0.3963, -1.3073, 0.4898, -0.2394]) tensor([0.3559, 0.0648, 0.3908, 0.1885]) -Greedy action tensor([ 0.4196, -0.0330, 0.5874, -0.3872]) tensor([0.3063, 0.1948, 0.3623, 0.1367]) -Greedy action tensor([-0.7893, 0.0265, 0.3817, -0.4892]) tensor([0.1276, 0.2885, 0.4116, 0.1723]) -Greedy action tensor([-0.1553, -1.1240, -0.1892, -0.4928]) tensor([0.3268, 0.1241, 0.3159, 0.2332]) -Greedy action tensor([ 0.2283, -2.1459, 1.4663, 0.0201]) tensor([0.1868, 0.0174, 0.6442, 0.1517]) -Greedy action tensor([ 0.6865, -1.2608, -0.1761, 0.0723]) tensor([0.4749, 0.0677, 0.2004, 0.2570]) -Greedy action tensor([-0.4469, -0.9196, 0.2942, -0.3287]) tensor([0.2063, 0.1286, 0.4329, 0.2322]) -Greedy action tensor([-0.2883, -0.9614, -0.4630, -0.4320]) tensor([0.3109, 0.1586, 0.2611, 0.2693]) -Greedy action tensor([ 0.1253, -0.9092, 0.3517, -0.4132]) tensor([0.3132, 0.1113, 0.3927, 0.1828]) -Greedy action tensor([ 0.1905, -0.7758, 0.6703, -0.4903]) tensor([0.2855, 0.1086, 0.4613, 0.1445]) -Greedy action tensor([ 0.1702, 0.0445, 1.0476, -0.3398]) tensor([0.2046, 0.1804, 0.4921, 0.1229]) -Greedy action tensor([-0.1502, -0.2632, 0.0584, 0.6407]) tensor([0.1876, 0.1676, 0.2311, 0.4137]) -Greedy action tensor([ 0.9764, -1.2169, 1.7330, 0.5645]) tensor([0.2561, 0.0286, 0.5457, 0.1696]) -Greedy action tensor([-0.6663, -0.9444, -0.1123, -0.4217]) tensor([0.2095, 0.1586, 0.3645, 0.2675]) -Greedy action tensor([ 0.0516, -0.1417, -0.1329, -0.4817]) tensor([0.3084, 0.2542, 0.2564, 0.1809]) -Greedy action tensor([-0.0245, -1.3165, 0.0525, -0.1564]) tensor([0.3095, 0.0850, 0.3343, 0.2712]) -Greedy action tensor([ 0.5439, -0.1176, 0.1543, -0.6921]) tensor([0.4026, 0.2078, 0.2727, 0.1170]) -Greedy action tensor([ 0.9328, -0.9276, -0.0921, 1.0200]) tensor([0.3838, 0.0597, 0.1377, 0.4188]) -Greedy action tensor([1.2707, 0.1096, 0.0842, 0.6879]) tensor([0.4594, 0.1439, 0.1402, 0.2565]) -Greedy action tensor([ 0.2179, -1.6668, -0.4166, -0.7402]) tensor([0.4841, 0.0735, 0.2567, 0.1857]) -Greedy action tensor([-0.6495, 0.0606, -1.4701, 0.6691]) tensor([0.1386, 0.2820, 0.0610, 0.5183]) -Greedy action tensor([ 0.8019, -1.0824, -0.1964, -0.7537]) tensor([0.5775, 0.0877, 0.2128, 0.1219]) -Greedy action tensor([ 0.0933, 0.2073, -0.1570, -0.9098]) tensor([0.3062, 0.3431, 0.2384, 0.1123]) -Greedy action tensor([-0.4952, -0.6289, 0.7857, -0.1570]) tensor([0.1454, 0.1272, 0.5235, 0.2039]) -Greedy action tensor([-1.5509, -0.5269, 0.5212, -0.6068]) tensor([0.0699, 0.1948, 0.5555, 0.1798]) -Greedy action tensor([ 0.2435, -0.6296, -0.5815, -0.5232]) tensor([0.4309, 0.1800, 0.1889, 0.2002]) -Greedy action tensor([ 7.9204e-01, 3.5605e-01, 1.4357e-01, -1.3590e-05]) tensor([0.3813, 0.2466, 0.1994, 0.1727]) -Greedy action tensor([-0.1198, 0.3069, -0.7259, 0.1340]) tensor([0.2290, 0.3509, 0.1249, 0.2952]) -Greedy action tensor([ 0.9642, -0.7887, 0.2846, 0.4471]) tensor([0.4393, 0.0761, 0.2226, 0.2619]) -Greedy action tensor([-1.7519, -0.7288, -0.7931, -0.0993]) tensor([0.0861, 0.2396, 0.2247, 0.4496]) -Greedy action tensor([-0.0288, -1.4700, 0.2397, -0.4551]) tensor([0.3127, 0.0740, 0.4091, 0.2042]) -Greedy action tensor([-0.2900, -0.7513, -1.2225, -0.0864]) tensor([0.3077, 0.1940, 0.1211, 0.3772]) -Greedy action tensor([ 1.3724, -0.6846, 1.3561, 1.3578]) tensor([0.3229, 0.0413, 0.3177, 0.3182]) -Greedy action tensor([ 0.9786, -1.4712, 1.2600, 0.0607]) tensor([0.3558, 0.0307, 0.4714, 0.1421]) -Greedy action tensor([-0.5157, -1.0502, 0.4117, -1.4850]) tensor([0.2225, 0.1304, 0.5626, 0.0844]) -Greedy action tensor([-0.6562, -0.3733, 0.0981, 0.2179]) tensor([0.1460, 0.1937, 0.3104, 0.3499]) -Greedy action tensor([-0.6878, 0.3523, 0.6035, -1.6422]) tensor([0.1273, 0.3604, 0.4633, 0.0490]) -Greedy action tensor([0.8155, 0.0187, 0.1493, 0.2301]) tensor([0.3966, 0.1788, 0.2037, 0.2209]) -Greedy action tensor([ 0.7438, -1.5975, -0.0515, 1.5225]) tensor([0.2684, 0.0258, 0.1211, 0.5847]) -Greedy action tensor([-0.2225, -0.5741, 0.8969, -1.1037]) tensor([0.1930, 0.1358, 0.5912, 0.0800]) -Greedy action tensor([ 0.6028, -1.0061, 0.1256, 0.5622]) tensor([0.3596, 0.0720, 0.2231, 0.3453]) -Greedy action tensor([-0.5310, -1.7194, -0.2540, 0.2334]) tensor([0.2096, 0.0639, 0.2765, 0.4501]) -Greedy action tensor([ 0.5759, -0.1668, -0.3037, 0.0405]) tensor([0.4038, 0.1922, 0.1676, 0.2364]) -Greedy action tensor([ 0.8595, -0.1605, 0.1454, 1.0373]) tensor([0.3284, 0.1184, 0.1608, 0.3923]) -Greedy action tensor([-1.1784, -0.8378, -0.1097, -0.1420]) tensor([0.1229, 0.1728, 0.3578, 0.3465]) -Greedy action tensor([-1.8966, -0.4230, 0.6489, -0.1544]) tensor([0.0420, 0.1832, 0.5351, 0.2397]) -Greedy action tensor([-1.9070, -0.3856, 0.6295, -0.1509]) tensor([0.0417, 0.1907, 0.5264, 0.2412]) -Greedy action tensor([-1.8986, -0.4545, 0.6500, -0.1549]) tensor([0.0421, 0.1785, 0.5386, 0.2408]) -Greedy action tensor([-1.8858, -0.4144, 0.6482, -0.1496]) tensor([0.0423, 0.1843, 0.5333, 0.2402]) -Greedy action tensor([-1.7127, -0.2375, 0.5306, -0.1035]) tensor([0.0505, 0.2208, 0.4761, 0.2525]) -Greedy action tensor([-1.9384, -0.4436, 0.6649, -0.1769]) tensor([0.0403, 0.1799, 0.5450, 0.2348]) -Greedy action tensor([-1.9130, -0.4484, 0.6850, -0.1516]) tensor([0.0407, 0.1760, 0.5466, 0.2368]) -Greedy action tensor([-1.8782, -0.4361, 0.6519, -0.1403]) tensor([0.0426, 0.1802, 0.5349, 0.2422]) -Greedy action tensor([-1.9181, -0.4247, 0.6568, -0.1661]) tensor([0.0411, 0.1828, 0.5393, 0.2368]) -Greedy action tensor([-1.9403, -0.4470, 0.6661, -0.1781]) tensor([0.0403, 0.1793, 0.5458, 0.2346]) -Greedy action tensor([-0.7602, 0.7203, 0.0596, 0.0189]) tensor([0.1016, 0.4465, 0.2306, 0.2214]) -Greedy action tensor([-1.9376, -0.4475, 0.6637, -0.1767]) tensor([0.0404, 0.1794, 0.5450, 0.2352]) -Greedy action tensor([-0.5299, -0.0958, 0.1002, 0.4565]) tensor([0.1408, 0.2173, 0.2644, 0.3775]) -Greedy action tensor([-1.9048, -0.4242, 0.6427, -0.1669]) tensor([0.0419, 0.1843, 0.5355, 0.2383]) -Greedy action tensor([-0.4092, 0.0074, 0.1258, -0.0881]) tensor([0.1785, 0.2707, 0.3047, 0.2461]) -Greedy action tensor([-1.3573, 0.4706, 0.3054, -0.0647]) tensor([0.0620, 0.3855, 0.3268, 0.2257]) -Greedy action tensor([-1.7448, -0.2520, 0.6007, -0.0543]) tensor([0.0469, 0.2088, 0.4898, 0.2544]) -Greedy action tensor([-0.8157, 0.5633, 0.1118, 0.0344]) tensor([0.1016, 0.4036, 0.2570, 0.2378]) -Greedy action tensor([-1.8476, -0.3625, 0.6328, -0.0918]) tensor([0.0432, 0.1907, 0.5160, 0.2500]) -Greedy action tensor([-1.6986, -0.2330, 0.4639, -0.0145]) tensor([0.0515, 0.2231, 0.4478, 0.2776]) -Greedy action tensor([-1.2791, -0.5250, 0.4977, -0.2031]) tensor([0.0835, 0.1776, 0.4938, 0.2450]) -Greedy action tensor([-1.7905, -0.5030, 0.5899, -0.1233]) tensor([0.0482, 0.1748, 0.5214, 0.2555]) -Greedy action tensor([-1.9436, -0.4466, 0.6676, -0.1783]) tensor([0.0401, 0.1792, 0.5462, 0.2344]) -Greedy action tensor([-1.8932, -0.3805, 0.6403, -0.1513]) tensor([0.0419, 0.1904, 0.5283, 0.2394]) -Greedy action tensor([-1.9083, -0.4556, 0.6541, -0.1636]) tensor([0.0417, 0.1784, 0.5410, 0.2389]) -Greedy action tensor([-1.9456, -0.4507, 0.6673, -0.1807]) tensor([0.0401, 0.1788, 0.5469, 0.2342]) -Greedy action tensor([-1.9258, -0.4172, 0.6566, -0.1704]) tensor([0.0408, 0.1842, 0.5392, 0.2358]) -Greedy action tensor([-0.9174, 0.0759, 0.4383, 0.5278]) tensor([0.0846, 0.2284, 0.3281, 0.3589]) -Greedy action tensor([-1.9129, -0.4501, 0.6576, -0.1642]) tensor([0.0414, 0.1789, 0.5416, 0.2381]) -Greedy action tensor([-1.6352, -0.2235, 0.4850, -0.0844]) tensor([0.0551, 0.2260, 0.4591, 0.2598]) -Greedy action tensor([-1.8217, -0.2348, 0.5860, -0.0949]) tensor([0.0442, 0.2161, 0.4911, 0.2486]) -Greedy action tensor([-1.1889, 0.7357, 0.1244, 0.2531]) tensor([0.0633, 0.4337, 0.2353, 0.2677]) -Greedy action tensor([-1.7600, -0.4411, 0.5774, -0.0957]) tensor([0.0491, 0.1835, 0.5082, 0.2592]) -Greedy action tensor([-1.7286, -0.1768, 0.5594, 0.0832]) tensor([0.0461, 0.2175, 0.4542, 0.2821]) -Greedy action tensor([-1.7403, -0.2653, 0.6195, -0.2984]) tensor([0.0495, 0.2165, 0.5245, 0.2095]) -Greedy action tensor([-1.9442, -0.4518, 0.6678, -0.1801]) tensor([0.0401, 0.1785, 0.5470, 0.2343]) -Greedy action tensor([-1.5656, -0.2548, 0.5911, 0.3428]) tensor([0.0498, 0.1846, 0.4301, 0.3355]) -Greedy action tensor([-1.9280, -0.4243, 0.6600, -0.1643]) tensor([0.0406, 0.1826, 0.5400, 0.2368]) -Greedy action tensor([-1.8809, -0.4757, 0.6339, -0.1463]) tensor([0.0433, 0.1764, 0.5351, 0.2452]) -Greedy action tensor([-1.7715, -0.1866, 0.5295, -0.0987]) tensor([0.0472, 0.2302, 0.4712, 0.2514]) -Greedy action tensor([-0.8003, -0.5174, 0.1678, 1.0172]) tensor([0.0900, 0.1194, 0.2369, 0.5538]) -Greedy action tensor([-1.8919, -0.4392, 0.6477, -0.1512]) tensor([0.0423, 0.1807, 0.5359, 0.2411]) -Greedy action tensor([-1.7371, 0.2087, 0.4589, -0.0643]) tensor([0.0448, 0.3136, 0.4028, 0.2387]) -Greedy action tensor([-0.9676, -0.5750, 0.3529, -0.0585]) tensor([0.1148, 0.1700, 0.4301, 0.2850]) -Greedy action tensor([-1.8901, -0.4278, 0.6454, -0.1445]) tensor([0.0423, 0.1824, 0.5333, 0.2421]) -Greedy action tensor([-1.4125, -0.4286, 0.3794, 0.1161]) tensor([0.0700, 0.1872, 0.4200, 0.3228]) -Greedy action tensor([-1.8442, -0.4547, 0.6132, -0.1347]) tensor([0.0450, 0.1806, 0.5256, 0.2488]) -Greedy action tensor([-1.8053, -0.4356, 0.5947, -0.1187]) tensor([0.0468, 0.1842, 0.5161, 0.2529]) -Greedy action tensor([-0.4077, 0.8586, -0.0430, 0.1638]) tensor([0.1289, 0.4572, 0.1856, 0.2283]) -Greedy action tensor([-1.8600, -0.3776, 0.6497, -0.1206]) tensor([0.0427, 0.1882, 0.5257, 0.2433]) -Greedy action tensor([-1.8858, -0.4412, 0.6415, -0.1471]) tensor([0.0426, 0.1808, 0.5339, 0.2426]) -Greedy action tensor([-1.7371, -0.1016, 0.5556, -0.0441]) tensor([0.0466, 0.2390, 0.4612, 0.2532]) -Greedy action tensor([-1.8845, -0.3548, 0.6346, -0.1448]) tensor([0.0421, 0.1946, 0.5233, 0.2400]) -Greedy action tensor([-1.7983, -0.4256, 0.6773, -0.0602]) tensor([0.0444, 0.1752, 0.5279, 0.2525]) -Greedy action tensor([-1.9250, -0.4308, 0.6636, -0.1562]) tensor([0.0406, 0.1809, 0.5404, 0.2381]) -Greedy action tensor([-1.8772, -0.3437, 0.6325, -0.1382]) tensor([0.0423, 0.1961, 0.5206, 0.2409]) -Greedy action tensor([-1.1624, 0.7928, 0.1542, 0.2184]) tensor([0.0634, 0.4479, 0.2365, 0.2522]) -Greedy action tensor([-1.6406, -0.1514, 0.6043, -0.4314]) tensor([0.0549, 0.2433, 0.5180, 0.1839]) -Greedy action tensor([-1.5981, -0.2084, 0.4782, -0.0661]) tensor([0.0568, 0.2278, 0.4527, 0.2627]) -Greedy action tensor([-1.8936, -0.4279, 0.6454, -0.1433]) tensor([0.0421, 0.1823, 0.5333, 0.2423]) -Greedy action tensor([-1.8601, -0.4900, 0.7817, 0.0056]) tensor([0.0393, 0.1547, 0.5519, 0.2540]) -Greedy action tensor([-1.7668, 0.0893, 0.4963, -0.0617]) tensor([0.0444, 0.2842, 0.4270, 0.2444]) -Greedy action tensor([-1.8246, -0.3897, 0.5366, -0.1285]) tensor([0.0470, 0.1976, 0.4989, 0.2565]) -Greedy action tensor([-1.8022, -0.3694, 0.5938, -0.0826]) tensor([0.0460, 0.1926, 0.5047, 0.2566]) -Greedy action tensor([-1.8113, -0.4514, 0.6020, -0.1188]) tensor([0.0465, 0.1812, 0.5196, 0.2527]) -Greedy action tensor([-1.9346, -0.4253, 0.6559, -0.1742]) tensor([0.0405, 0.1833, 0.5405, 0.2357]) -Greedy action tensor([-1.2728, 0.7633, 0.2134, 0.1873]) tensor([0.0575, 0.4406, 0.2542, 0.2477]) -Greedy action tensor([-1.8603, -0.4446, 0.6283, -0.1363]) tensor([0.0439, 0.1809, 0.5290, 0.2462]) -Greedy action tensor([-1.2383, -0.0140, 0.3737, -0.1200]) tensor([0.0802, 0.2727, 0.4018, 0.2453]) -Greedy action tensor([-1.7279, -0.3932, 0.5460, -0.0987]) tensor([0.0510, 0.1937, 0.4954, 0.2600]) -Greedy action tensor([-1.6762, -0.3594, 0.5748, 0.0088]) tensor([0.0510, 0.1902, 0.4840, 0.2748]) -Greedy action tensor([-1.0501, 0.5709, 0.1500, 0.0116]) tensor([0.0815, 0.4122, 0.2706, 0.2356]) -Greedy action tensor([-1.9228, -0.4339, 0.6525, -0.1712]) tensor([0.0411, 0.1822, 0.5399, 0.2369]) -Greedy action tensor([-1.8786, -0.4107, 0.6554, -0.1255]) tensor([0.0422, 0.1830, 0.5315, 0.2434]) -Greedy action tensor([-1.9178, -0.3730, 0.6492, -0.1630]) tensor([0.0408, 0.1913, 0.5318, 0.2361]) -Greedy action tensor([-1.9308, -0.4796, 0.7195, -0.1658]) tensor([0.0396, 0.1689, 0.5603, 0.2312]) -Greedy action tensor([-1.8868, -0.4133, 0.6516, -0.1485]) tensor([0.0422, 0.1841, 0.5339, 0.2399]) -Greedy action tensor([-0.5903, -0.7379, 0.2862, 0.5086]) tensor([0.1376, 0.1187, 0.3306, 0.4130]) -Greedy action tensor([-1.4233, -0.5250, 0.3804, -0.0711]) tensor([0.0747, 0.1833, 0.4534, 0.2887]) -Greedy action tensor([-1.7454, -0.1341, 0.5856, -0.0721]) tensor([0.0462, 0.2316, 0.4757, 0.2464]) -Greedy action tensor([-1.9359, -0.4257, 0.6610, -0.1739]) tensor([0.0404, 0.1828, 0.5418, 0.2351]) -Greedy action tensor([ 0.6773, -0.3809, -0.1482, -0.1694]) tensor([0.4517, 0.1568, 0.1979, 0.1937]) -Greedy action tensor([ 0.8521, -0.7604, 0.1719, -0.1595]) tensor([0.4832, 0.0963, 0.2447, 0.1757]) -Greedy action tensor([ 0.9157, -0.6354, -0.0343, -0.2501]) tensor([0.5235, 0.1110, 0.2024, 0.1631]) -Greedy action tensor([ 0.4590, -0.0969, 0.0418, -0.3842]) tensor([0.3755, 0.2154, 0.2475, 0.1616]) -Greedy action tensor([ 0.6310, -0.3116, 0.1057, -0.1113]) tensor([0.4070, 0.1586, 0.2407, 0.1937]) -Greedy action tensor([ 0.7095, -0.4941, -0.0025, -0.5007]) tensor([0.4787, 0.1437, 0.2349, 0.1427]) -Greedy action tensor([ 1.0168, -0.8918, 0.1800, -0.5908]) tensor([0.5613, 0.0832, 0.2431, 0.1125]) -Greedy action tensor([ 0.5012, 0.2601, -0.3939, -0.0828]) tensor([0.3634, 0.2855, 0.1485, 0.2026]) -Greedy action tensor([ 0.7201, -0.1030, -0.1028, -0.0288]) tensor([0.4253, 0.1868, 0.1868, 0.2011]) -Greedy action tensor([ 0.7980, -0.2048, 0.1523, -0.6221]) tensor([0.4689, 0.1720, 0.2458, 0.1133]) -Greedy action tensor([ 0.4679, 0.5449, -0.2638, 0.1372]) tensor([0.3049, 0.3293, 0.1467, 0.2191]) -Greedy action tensor([ 0.5162, -0.2321, 0.0452, -0.1712]) tensor([0.3845, 0.1820, 0.2401, 0.1934]) -Greedy action tensor([ 0.4603, -0.0741, -0.1014, -0.1423]) tensor([0.3699, 0.2168, 0.2109, 0.2025]) -Greedy action tensor([ 0.8754, -0.3976, -0.1983, -0.5204]) tensor([0.5349, 0.1498, 0.1828, 0.1325]) -Greedy action tensor([ 1.0368, -0.6300, -0.0694, -0.3700]) tensor([0.5667, 0.1070, 0.1875, 0.1388]) -Greedy action tensor([ 1.0435, -0.7262, 0.0295, -0.6635]) tensor([0.5832, 0.0994, 0.2116, 0.1058]) -Greedy action tensor([ 0.8199, -0.3309, -0.0181, -0.1085]) tensor([0.4664, 0.1476, 0.2018, 0.1843]) -Greedy action tensor([ 0.9032, -0.6853, -0.0181, -0.4245]) tensor([0.5355, 0.1094, 0.2132, 0.1420]) -Greedy action tensor([ 0.8992, -0.1871, 0.0665, -0.0965]) tensor([0.4669, 0.1576, 0.2030, 0.1725]) -Greedy action tensor([ 0.8233, -0.3652, -0.1575, -0.1518]) tensor([0.4862, 0.1481, 0.1823, 0.1834]) -Greedy action tensor([ 0.9790, -0.6265, -0.1847, -0.5171]) tensor([0.5757, 0.1156, 0.1798, 0.1290]) -Greedy action tensor([ 0.9922, -0.3270, -0.1196, -0.4086]) tensor([0.5427, 0.1451, 0.1785, 0.1337]) -Greedy action tensor([ 0.4571, 0.2108, -0.2450, 0.2317]) tensor([0.3251, 0.2542, 0.1611, 0.2595]) -Greedy action tensor([ 0.8629, -0.8320, -0.1174, -0.3665]) tensor([0.5402, 0.0992, 0.2027, 0.1580]) -Greedy action tensor([ 0.9457, -0.5612, -0.0501, -0.3764]) tensor([0.5383, 0.1193, 0.1989, 0.1435]) -Greedy action tensor([ 0.7174, -0.6411, 0.1295, -0.3803]) tensor([0.4659, 0.1198, 0.2588, 0.1555]) -Greedy action tensor([ 1.0089, -0.4179, -0.0799, -0.3272]) tensor([0.5436, 0.1305, 0.1830, 0.1429]) -Greedy action tensor([ 1.4517, -1.2769, 0.1010, -0.8422]) tensor([0.7016, 0.0458, 0.1818, 0.0708]) -Greedy action tensor([ 0.4914, -0.1079, -0.0352, -0.2837]) tensor([0.3845, 0.2112, 0.2271, 0.1772]) -Greedy action tensor([ 0.7434, -0.3853, 0.0767, -0.2006]) tensor([0.4493, 0.1453, 0.2306, 0.1748]) -Greedy action tensor([ 1.0082, 0.1792, 0.2561, -0.4086]) tensor([0.4650, 0.2030, 0.2192, 0.1128]) -Greedy action tensor([ 0.7589, -0.5625, -0.0082, -0.4127]) tensor([0.4900, 0.1307, 0.2275, 0.1518]) -Greedy action tensor([ 8.5656e-01, -4.6259e-01, -5.5659e-04, -3.9066e-01]) tensor([0.5053, 0.1351, 0.2144, 0.1452]) -Greedy action tensor([ 0.6460, -0.2507, -0.0346, -0.3188]) tensor([0.4357, 0.1777, 0.2206, 0.1660]) -Greedy action tensor([ 0.4618, -0.1327, 0.0572, -0.1950]) tensor([0.3653, 0.2016, 0.2437, 0.1894]) -Greedy action tensor([ 0.6707, -0.3453, -0.0462, -0.1694]) tensor([0.4382, 0.1587, 0.2140, 0.1892]) -Greedy action tensor([ 1.3764, -0.9243, -0.0309, -0.5387]) tensor([0.6701, 0.0671, 0.1640, 0.0987]) -Greedy action tensor([ 0.7652, -0.3143, 0.0326, -0.1181]) tensor([0.4477, 0.1521, 0.2152, 0.1851]) -Greedy action tensor([ 0.8757, -0.4436, -0.1741, -0.6212]) tensor([0.5431, 0.1452, 0.1901, 0.1216]) -Greedy action tensor([ 1.0237, -0.4812, 0.0560, -0.4342]) tensor([0.5450, 0.1210, 0.2071, 0.1268]) -Greedy action tensor([ 0.9801, -0.5348, -0.0218, -0.2521]) tensor([0.5323, 0.1170, 0.1954, 0.1552]) -Greedy action tensor([ 0.9671, -0.7196, -0.1859, -0.5985]) tensor([0.5849, 0.1083, 0.1846, 0.1222]) -Greedy action tensor([ 1.0513, -0.6010, 0.1048, -0.4240]) tensor([0.5530, 0.1060, 0.2146, 0.1265]) -Greedy action tensor([ 0.5269, -0.2151, 0.0282, -0.1928]) tensor([0.3890, 0.1852, 0.2363, 0.1894]) -Greedy action tensor([ 0.2908, -0.3244, -0.1595, 0.0644]) tensor([0.3361, 0.1817, 0.2142, 0.2680]) -Greedy action tensor([ 0.9897, 0.2285, -0.0242, -0.2494]) tensor([0.4718, 0.2204, 0.1712, 0.1366]) -Greedy action tensor([ 0.7491, -0.4664, 0.1035, -0.2546]) tensor([0.4572, 0.1356, 0.2397, 0.1676]) -Greedy action tensor([ 0.7275, -0.4094, -0.1116, -0.3830]) tensor([0.4802, 0.1541, 0.2075, 0.1582]) -Greedy action tensor([ 0.3381, 0.1545, -0.1259, 0.0792]) tensor([0.3093, 0.2574, 0.1945, 0.2388]) -Greedy action tensor([ 0.2474, 0.2158, -0.1755, 0.0938]) tensor([0.2872, 0.2783, 0.1882, 0.2463]) -Greedy action tensor([ 0.6808, -0.5108, -0.1135, -0.4279]) tensor([0.4795, 0.1456, 0.2167, 0.1582]) -Greedy action tensor([ 0.6268, -0.3571, 0.0355, -0.1282]) tensor([0.4171, 0.1559, 0.2309, 0.1960]) -Greedy action tensor([ 0.6249, 0.2600, -0.1579, 0.2654]) tensor([0.3510, 0.2437, 0.1604, 0.2450]) -Greedy action tensor([ 0.5275, -0.3368, 0.0913, -0.2347]) tensor([0.3945, 0.1663, 0.2551, 0.1841]) -Greedy action tensor([ 0.1170, 0.1565, 0.1376, -0.0793]) tensor([0.2575, 0.2679, 0.2629, 0.2116]) -Greedy action tensor([ 1.1549, -0.8931, 0.1859, -0.6562]) tensor([0.5981, 0.0772, 0.2270, 0.0978]) -Greedy action tensor([ 0.9944, -0.5364, 0.0041, -0.2540]) tensor([0.5334, 0.1154, 0.1981, 0.1531]) -Greedy action tensor([ 0.5718, -0.3132, -0.0655, -0.1932]) tensor([0.4155, 0.1715, 0.2197, 0.1933]) -Greedy action tensor([ 0.7583, -0.4769, -0.2277, -0.4566]) tensor([0.5100, 0.1483, 0.1903, 0.1514]) -Greedy action tensor([ 1.1412, -0.5081, 0.0186, -0.6177]) tensor([0.5918, 0.1137, 0.1926, 0.1019]) -Greedy action tensor([ 0.7316, -0.5843, 0.1000, -0.3341]) tensor([0.4663, 0.1251, 0.2480, 0.1606]) -Greedy action tensor([ 0.7866, -0.5912, -0.0327, -0.2084]) tensor([0.4848, 0.1222, 0.2137, 0.1793]) -Greedy action tensor([ 0.9357, -0.8001, 0.0358, -0.4473]) tensor([0.5454, 0.0961, 0.2217, 0.1368]) -Greedy action tensor([ 0.6102, -0.0817, -0.0301, -0.2000]) tensor([0.4044, 0.2025, 0.2132, 0.1799]) -Greedy action tensor([ 7.1954e-01, -3.3134e-01, 5.7137e-04, -1.0458e-01]) tensor([0.4395, 0.1536, 0.2141, 0.1928]) -Greedy action tensor([ 1.3032, -0.6361, 0.1584, -0.6984]) tensor([0.6261, 0.0900, 0.1993, 0.0846]) -Greedy action tensor([ 0.4679, 0.0966, -0.0011, 0.0502]) tensor([0.3362, 0.2320, 0.2104, 0.2214]) -Greedy action tensor([ 0.6989, -0.5445, 0.0248, -0.1661]) tensor([0.4506, 0.1300, 0.2297, 0.1898]) -Greedy action tensor([ 1.0402, -0.3649, -0.0299, -0.2011]) tensor([0.5327, 0.1307, 0.1827, 0.1540]) -Greedy action tensor([ 1.1654, -1.0980, -0.0229, -0.6586]) tensor([0.6369, 0.0662, 0.1941, 0.1028]) -Greedy action tensor([ 1.2266, -0.5087, 0.0082, -0.3202]) tensor([0.5935, 0.1047, 0.1755, 0.1264]) -Greedy action tensor([ 1.0110, -0.9352, 0.1196, -0.4814]) tensor([0.5625, 0.0803, 0.2307, 0.1265]) -Greedy action tensor([ 1.0778, -0.7281, -0.0216, -0.5691]) tensor([0.5917, 0.0972, 0.1971, 0.1140]) -Greedy action tensor([ 1.2515, -0.4514, -0.3214, -0.6580]) tensor([0.6503, 0.1185, 0.1349, 0.0963]) -Greedy action tensor([ 0.6208, -0.3572, 0.0886, -0.3212]) tensor([0.4249, 0.1598, 0.2496, 0.1657]) -Greedy action tensor([ 0.8485, -0.4151, -0.0348, -0.2851]) tensor([0.4956, 0.1401, 0.2049, 0.1595]) -Greedy action tensor([ 0.4070, -0.2322, -0.0971, -0.2111]) tensor([0.3744, 0.1976, 0.2262, 0.2018]) -Greedy action tensor([ 0.7664, -0.6505, -0.0447, -0.3803]) tensor([0.4989, 0.1210, 0.2217, 0.1585]) -Greedy action tensor([ 1.0574, -0.4663, -0.0807, -0.4819]) tensor([0.5705, 0.1243, 0.1828, 0.1224]) -Greedy action tensor([ 0.5089, -0.1197, 0.0215, -0.0774]) tensor([0.3698, 0.1972, 0.2272, 0.2058]) -Greedy action tensor([ 0.6638, -0.2762, -0.0842, -0.0242]) tensor([0.4225, 0.1651, 0.2000, 0.2124]) -Greedy action tensor([ 1.5941, -0.2581, -0.4592, 0.4994]) tensor([0.6174, 0.0969, 0.0792, 0.2066]) -Greedy action tensor([ 2.4954, -0.3702, 0.5410, -0.0994]) tensor([0.7854, 0.0447, 0.1113, 0.0586]) -Greedy action tensor([ 1.3976, -0.0595, -0.1280, 0.4775]) tensor([0.5409, 0.1260, 0.1176, 0.2155]) -Greedy action tensor([ 1.2544, -0.6737, -0.3023, 0.1694]) tensor([0.5903, 0.0858, 0.1245, 0.1994]) -Greedy action tensor([ 1.0326, -0.0961, 0.0113, 0.1604]) tensor([0.4758, 0.1539, 0.1714, 0.1989]) -Greedy action tensor([ 0.8853, -0.5503, 0.1923, 0.2526]) tensor([0.4407, 0.1049, 0.2204, 0.2341]) -Greedy action tensor([ 1.0134, -0.3509, -0.0951, 0.3677]) tensor([0.4740, 0.1211, 0.1564, 0.2485]) -Greedy action tensor([ 1.4564, -0.7030, -0.1401, 0.0971]) tensor([0.6350, 0.0733, 0.1287, 0.1631]) -Greedy action tensor([ 1.7338, -0.3616, -0.6852, 0.1177]) tensor([0.7089, 0.0872, 0.0631, 0.1408]) -Greedy action tensor([ 1.3300, -0.3615, -0.8515, 0.4274]) tensor([0.5873, 0.1082, 0.0663, 0.2382]) -Greedy action tensor([ 1.4013, -0.0543, -0.8340, 0.3880]) tensor([0.5871, 0.1369, 0.0628, 0.2131]) -Greedy action tensor([ 2.5376, -0.1336, -1.1669, 0.5176]) tensor([0.8154, 0.0564, 0.0201, 0.1082]) -Greedy action tensor([ 1.2670, 0.0239, -1.1443, 0.0869]) tensor([0.5933, 0.1712, 0.0532, 0.1823]) -Greedy action tensor([ 1.5609, -0.8469, -0.6230, 0.9173]) tensor([0.5787, 0.0521, 0.0652, 0.3041]) -Greedy action tensor([ 1.0974, -0.3781, -0.3725, -0.0508]) tensor([0.5631, 0.1288, 0.1295, 0.1786]) -Greedy action tensor([ 0.4894, -0.1840, 0.1611, -0.0703]) tensor([0.3569, 0.1820, 0.2571, 0.2040]) -Greedy action tensor([ 1.6908, -0.3385, -0.6802, 0.2353]) tensor([0.6858, 0.0901, 0.0640, 0.1600]) -Greedy action tensor([ 1.6105, -0.6629, -0.5648, 0.7586]) tensor([0.6086, 0.0627, 0.0691, 0.2596]) -Greedy action tensor([ 2.4083, -0.6636, -0.1584, 0.6320]) tensor([0.7738, 0.0359, 0.0594, 0.1310]) -Greedy action tensor([ 2.1747, -1.0521, -0.6492, 0.4597]) tensor([0.7818, 0.0310, 0.0464, 0.1407]) -Greedy action tensor([ 1.4335, -0.2478, -0.6101, 0.2742]) tensor([0.6137, 0.1142, 0.0795, 0.1925]) -Greedy action tensor([ 1.1367, -0.3962, -0.3522, 0.4165]) tensor([0.5186, 0.1120, 0.1170, 0.2524]) -Greedy action tensor([ 1.3571, -0.8362, -0.8014, 0.5307]) tensor([0.6007, 0.0670, 0.0694, 0.2629]) -Greedy action tensor([ 1.7669, -0.5202, -0.8501, 0.5910]) tensor([0.6743, 0.0685, 0.0492, 0.2080]) -Greedy action tensor([ 1.4758, -0.5750, -0.4219, 0.1084]) tensor([0.6522, 0.0839, 0.0978, 0.1662]) -Greedy action tensor([ 2.3859, -1.2561, 0.3199, 0.6383]) tensor([0.7535, 0.0197, 0.0955, 0.1313]) -Greedy action tensor([ 2.0502, -0.9060, -0.8257, 1.1202]) tensor([0.6654, 0.0346, 0.0375, 0.2625]) -Greedy action tensor([ 1.6083, -0.7537, -0.3006, 0.2531]) tensor([0.6665, 0.0628, 0.0988, 0.1719]) -Greedy action tensor([ 1.7847, -0.3313, -0.2590, 0.5509]) tensor([0.6488, 0.0782, 0.0841, 0.1889]) -Greedy action tensor([ 1.5733, -1.0583, -0.5092, 0.2496]) tensor([0.6836, 0.0492, 0.0852, 0.1820]) -Greedy action tensor([ 1.7655, -0.8228, -0.3861, -0.1078]) tensor([0.7435, 0.0559, 0.0865, 0.1142]) -Greedy action tensor([ 1.3827, -0.2688, -0.7279, 0.1006]) tensor([0.6288, 0.1206, 0.0762, 0.1745]) -Greedy action tensor([ 1.3811, -0.7543, -0.8329, 0.5812]) tensor([0.5964, 0.0705, 0.0652, 0.2680]) -Greedy action tensor([ 1.5228, -0.4245, -0.2486, 0.0655]) tensor([0.6470, 0.0923, 0.1100, 0.1507]) -Greedy action tensor([ 2.1224, -1.0022, -0.3494, 0.7333]) tensor([0.7259, 0.0319, 0.0613, 0.1810]) -Greedy action tensor([ 1.5763, -0.7196, -0.4103, 0.3010]) tensor([0.6591, 0.0664, 0.0904, 0.1841]) -Greedy action tensor([ 1.8047, -0.3441, -0.8944, 0.6031]) tensor([0.6736, 0.0786, 0.0453, 0.2026]) -Greedy action tensor([ 1.2263, -0.0660, -0.8412, 0.2786]) tensor([0.5590, 0.1535, 0.0707, 0.2167]) -Greedy action tensor([ 1.2400, -0.4572, -0.8792, 0.6614]) tensor([0.5365, 0.0983, 0.0644, 0.3008]) -Greedy action tensor([ 1.5857, -0.6868, 0.0040, 0.2327]) tensor([0.6381, 0.0658, 0.1312, 0.1649]) -Greedy action tensor([ 1.3923, -0.4386, -0.8049, 0.7078]) tensor([0.5631, 0.0902, 0.0626, 0.2840]) -Greedy action tensor([ 1.9194, -0.8356, -0.2806, 0.3703]) tensor([0.7211, 0.0459, 0.0799, 0.1532]) -Greedy action tensor([ 1.9512, -0.7577, -0.3294, 0.6206]) tensor([0.6978, 0.0465, 0.0713, 0.1844]) -Greedy action tensor([ 1.6439, -0.8924, -0.4072, 0.7645]) tensor([0.6162, 0.0488, 0.0792, 0.2558]) -Greedy action tensor([ 2.1197, -0.6180, 0.2410, 1.1599]) tensor([0.6248, 0.0404, 0.0955, 0.2393]) -Greedy action tensor([ 1.2996, -0.4387, -0.3586, 0.2065]) tensor([0.5877, 0.1033, 0.1120, 0.1970]) -Greedy action tensor([ 1.5740, -0.6006, -0.4709, 0.4175]) tensor([0.6420, 0.0730, 0.0831, 0.2020]) -Greedy action tensor([ 0.8355, -0.1987, -0.0932, -0.3668]) tensor([0.4875, 0.1733, 0.1926, 0.1465]) -Greedy action tensor([ 1.2394, -0.2838, -1.1922, 0.6246]) tensor([0.5415, 0.1181, 0.0476, 0.2928]) -Greedy action tensor([ 1.1742, 0.0759, -0.7230, -0.0568]) tensor([0.5633, 0.1878, 0.0845, 0.1645]) -Greedy action tensor([ 1.6346, -0.4838, -0.4555, 0.6237]) tensor([0.6220, 0.0748, 0.0769, 0.2263]) -Greedy action tensor([ 2.0227, -0.6973, -0.4112, 0.2741]) tensor([0.7532, 0.0496, 0.0661, 0.1311]) -Greedy action tensor([ 1.3260, -0.3541, -0.6756, 0.2937]) tensor([0.5961, 0.1111, 0.0805, 0.2123]) -Greedy action tensor([ 1.7610, -0.5584, -0.8075, 0.4107]) tensor([0.6973, 0.0686, 0.0534, 0.1807]) -Greedy action tensor([ 1.7797, -0.5631, -0.6450, 1.0331]) tensor([0.6029, 0.0579, 0.0534, 0.2858]) -Greedy action tensor([ 1.4300, -0.5008, -0.7496, 0.8586]) tensor([0.5486, 0.0796, 0.0620, 0.3098]) -Greedy action tensor([1.8830, 0.7644, 0.0132, 0.2658]) tensor([0.5955, 0.1946, 0.0918, 0.1182]) -Greedy action tensor([ 1.4800, -0.1226, -0.0106, 0.1393]) tensor([0.5923, 0.1193, 0.1334, 0.1550]) -Greedy action tensor([ 1.3430, -0.2898, -0.9124, 0.1055]) tensor([0.6288, 0.1229, 0.0659, 0.1824]) -Greedy action tensor([ 2.2853, -0.1040, -0.0316, 0.4571]) tensor([0.7402, 0.0679, 0.0730, 0.1189]) -Greedy action tensor([ 1.4968, -0.2707, -0.5907, 0.5707]) tensor([0.5914, 0.1010, 0.0733, 0.2343]) -Greedy action tensor([ 1.5805, -0.5709, -0.3322, 0.5739]) tensor([0.6137, 0.0714, 0.0906, 0.2243]) -Greedy action tensor([ 1.8283, -0.2795, -0.4859, 0.5445]) tensor([0.6679, 0.0811, 0.0660, 0.1850]) -Greedy action tensor([ 0.7303, -0.2097, 0.0575, 0.0902]) tensor([0.4118, 0.1609, 0.2101, 0.2171]) -Greedy action tensor([ 2.2290, -1.4413, -0.2157, 0.8881]) tensor([0.7279, 0.0185, 0.0631, 0.1904]) -Greedy action tensor([ 0.7346, -0.4578, 0.0730, 0.1244]) tensor([0.4232, 0.1285, 0.2184, 0.2299]) -Greedy action tensor([ 1.3778, -0.3157, -0.5570, 0.0090]) tensor([0.6318, 0.1162, 0.0913, 0.1607]) -Greedy action tensor([ 1.7429, -0.2896, -0.8322, 0.7403]) tensor([0.6353, 0.0832, 0.0484, 0.2331]) -Greedy action tensor([ 1.7304, -0.9872, -0.2878, 0.5438]) tensor([0.6648, 0.0439, 0.0883, 0.2029]) -Greedy action tensor([ 1.4619, -0.6188, -0.2200, 0.1260]) tensor([0.6354, 0.0793, 0.1182, 0.1671]) -Greedy action tensor([ 1.2652, -0.0568, -0.4419, 0.3449]) tensor([0.5416, 0.1444, 0.0982, 0.2158]) -Greedy action tensor([ 1.3622, 0.1231, -0.5334, 0.2313]) tensor([0.5673, 0.1643, 0.0852, 0.1831]) -Greedy action tensor([ 1.6053, -0.8576, -0.2808, -0.0360]) tensor([0.6990, 0.0595, 0.1060, 0.1354]) -Greedy action tensor([ 1.0773, 0.3358, -0.9713, 0.2909]) tensor([0.4852, 0.2312, 0.0626, 0.2210]) -Greedy action tensor([ 1.4954, -0.5519, -0.4555, 0.1555]) tensor([0.6523, 0.0842, 0.0927, 0.1708]) -Greedy action tensor([ 1.5019, 0.0933, -0.1864, 0.3305]) tensor([0.5750, 0.1406, 0.1063, 0.1782]) -Greedy action tensor([ 1.6099, -0.4773, 0.0163, -0.1105]) tensor([0.6639, 0.0823, 0.1349, 0.1188]) -Greedy action tensor([ 2.4958, -1.2475, 0.3116, 0.8247]) tensor([0.7551, 0.0179, 0.0850, 0.1420]) -Greedy action tensor([ 1.5142, -0.7045, -0.2727, 0.5293]) tensor([0.6062, 0.0659, 0.1015, 0.2264]) -Greedy action tensor([ 1.9255, -0.8928, -0.4788, 0.5103]) tensor([0.7179, 0.0429, 0.0648, 0.1744]) -Greedy action tensor([ 1.1809, -0.2097, -0.0805, -0.0044]) tensor([0.5441, 0.1354, 0.1541, 0.1663]) -Greedy action tensor([0.7525, 0.0512, 0.4266, 0.8724]) tensor([0.2989, 0.1483, 0.2158, 0.3370]) -Greedy action tensor([-0.2193, -1.1715, 0.1906, -0.1527]) tensor([0.2524, 0.0974, 0.3803, 0.2698]) -Greedy action tensor([-0.2818, -0.1803, -0.2578, 0.5473]) tensor([0.1844, 0.2041, 0.1889, 0.4226]) -Greedy action tensor([-0.6831, -0.3180, 0.7730, -1.0776]) tensor([0.1351, 0.1946, 0.5793, 0.0910]) -Greedy action tensor([ 1.5312, -0.5623, 0.6360, 0.9184]) tensor([0.4823, 0.0594, 0.1970, 0.2613]) -Greedy action tensor([-0.3338, -0.0807, -1.3696, -0.5002]) tensor([0.2866, 0.3691, 0.1017, 0.2426]) -Greedy action tensor([-0.0498, -1.3329, -0.0949, 0.8973]) tensor([0.2078, 0.0576, 0.1987, 0.5359]) -Greedy action tensor([ 0.2764, -0.2617, -0.0374, -0.1463]) tensor([0.3367, 0.1966, 0.2460, 0.2206]) -Greedy action tensor([ 0.6434, -2.1697, 0.1559, 0.1571]) tensor([0.4369, 0.0262, 0.2683, 0.2686]) -Greedy action tensor([-0.2040, -1.3705, -0.1630, -0.0639]) tensor([0.2854, 0.0889, 0.2974, 0.3283]) -Greedy action tensor([ 0.4796, -0.8986, 0.0522, 0.2695]) tensor([0.3684, 0.0928, 0.2402, 0.2986]) -Greedy action tensor([-0.4381, -0.6280, 0.8974, -0.3298]) tensor([0.1483, 0.1226, 0.5638, 0.1653]) -Greedy action tensor([ 0.5902, 0.0542, 0.2961, -0.1890]) tensor([0.3585, 0.2098, 0.2672, 0.1645]) -Greedy action tensor([ 1.1725, -0.6429, 0.0585, 1.0097]) tensor([0.4272, 0.0695, 0.1402, 0.3630]) -Greedy action tensor([-0.7357, -0.6505, -0.7047, 0.6445]) tensor([0.1409, 0.1535, 0.1454, 0.5603]) -Greedy action tensor([-0.8619, -0.3294, -0.7822, -1.2818]) tensor([0.2251, 0.3833, 0.2437, 0.1479]) -Greedy action tensor([ 0.4936, 0.9283, 0.5654, -0.9049]) tensor([0.2587, 0.3995, 0.2779, 0.0639]) -Greedy action tensor([ 0.0458, -1.1092, -0.3061, 0.2078]) tensor([0.3131, 0.0986, 0.2202, 0.3681]) -Greedy action tensor([-0.2351, -0.6196, 1.9567, -0.5604]) tensor([0.0881, 0.0600, 0.7884, 0.0636]) -Greedy action tensor([ 0.2168, 0.4296, -0.8469, -0.4377]) tensor([0.3224, 0.3988, 0.1113, 0.1675]) -Greedy action tensor([-0.4314, -0.2723, -0.5785, -0.0392]) tensor([0.2214, 0.2596, 0.1911, 0.3278]) -Greedy action tensor([ 0.7470, -0.6878, -0.4301, 0.4156]) tensor([0.4416, 0.1052, 0.1361, 0.3171]) -Greedy action tensor([-0.7939, -0.9512, 0.0867, -0.2080]) tensor([0.1649, 0.1409, 0.3978, 0.2963]) -Greedy action tensor([-0.1583, -0.9900, -0.4186, -0.2859]) tensor([0.3240, 0.1410, 0.2497, 0.2852]) -Greedy action tensor([ 0.0316, -1.1557, -0.6275, -0.3836]) tensor([0.4028, 0.1229, 0.2084, 0.2659]) -Greedy action tensor([-0.4525, -0.1260, 0.4633, -0.4649]) tensor([0.1703, 0.2360, 0.4255, 0.1682]) -Greedy action tensor([-0.2789, -1.0119, 0.7751, -0.5353]) tensor([0.1952, 0.0938, 0.5600, 0.1510]) -Greedy action tensor([ 1.3559, 0.4708, -0.2737, 0.2678]) tensor([0.5140, 0.2121, 0.1008, 0.1731]) -Greedy action tensor([-0.4594, -1.3499, -0.1443, 0.7265]) tensor([0.1652, 0.0678, 0.2263, 0.5407]) -Greedy action tensor([ 0.4560, -0.5852, 0.0215, 0.2074]) tensor([0.3597, 0.1270, 0.2329, 0.2805]) -Greedy action tensor([-0.4961, -1.1666, 0.4538, -0.5248]) tensor([0.1973, 0.1009, 0.5101, 0.1917]) -Greedy action tensor([ 0.3169, -0.9450, -0.8490, 1.2347]) tensor([0.2440, 0.0691, 0.0760, 0.6109]) -Greedy action tensor([-1.0444, -0.5634, -1.2966, -0.1444]) tensor([0.1708, 0.2763, 0.1327, 0.4201]) -Greedy action tensor([ 0.7753, 0.3971, 0.2797, -0.3311]) tensor([0.3809, 0.2610, 0.2321, 0.1260]) -Greedy action tensor([ 1.2854, -0.9669, 0.9437, 0.9480]) tensor([0.3954, 0.0416, 0.2809, 0.2821]) -Greedy action tensor([ 0.5470, -0.6297, -0.2423, -0.0488]) tensor([0.4322, 0.1332, 0.1963, 0.2382]) -Greedy action tensor([-0.6416, -1.2350, 0.3870, 0.0364]) tensor([0.1582, 0.0874, 0.4426, 0.3117]) -Greedy action tensor([ 1.0508, -0.8611, -0.6488, -0.2032]) tensor([0.6189, 0.0915, 0.1131, 0.1766]) -Greedy action tensor([-0.0871, -0.5273, 1.0593, 0.3510]) tensor([0.1577, 0.1016, 0.4963, 0.2444]) -Greedy action tensor([ 0.6983, -0.8235, -0.0917, -0.4872]) tensor([0.5056, 0.1104, 0.2295, 0.1545]) -Greedy action tensor([-0.2812, 0.2437, 0.2731, -0.4665]) tensor([0.1901, 0.3212, 0.3308, 0.1579]) -Greedy action tensor([-0.0096, -0.3238, -0.6766, -0.3387]) tensor([0.3375, 0.2465, 0.1732, 0.2428]) -Greedy action tensor([ 0.7457, -0.1792, 0.7533, 1.4027]) tensor([0.2308, 0.0915, 0.2325, 0.4452]) -Greedy action tensor([-0.2224, -1.0203, -0.6211, -0.1635]) tensor([0.3143, 0.1415, 0.2109, 0.3333]) -Greedy action tensor([-1.8552, -0.4693, -0.8551, 0.4986]) tensor([0.0548, 0.2192, 0.1490, 0.5770]) -Greedy action tensor([-1.6529, -0.6601, -0.2160, -0.0187]) tensor([0.0767, 0.2071, 0.3229, 0.3933]) -Greedy action tensor([ 1.0506, -1.0935, -0.7318, 0.3892]) tensor([0.5551, 0.0650, 0.0934, 0.2865]) -Greedy action tensor([-0.7813, -1.3177, 0.4467, -0.4481]) tensor([0.1564, 0.0915, 0.5339, 0.2182]) -Greedy action tensor([-0.1743, -1.2458, 0.4347, -0.1431]) tensor([0.2374, 0.0813, 0.4364, 0.2449]) -Greedy action tensor([-0.1913, 0.8181, -0.2103, -0.7120]) tensor([0.1880, 0.5158, 0.1845, 0.1117]) -Greedy action tensor([ 0.4575, 0.0086, -0.1177, 0.6835]) tensor([0.2895, 0.1848, 0.1629, 0.3629]) -Greedy action tensor([ 0.2924, 0.2340, -0.4274, -0.0805]) tensor([0.3206, 0.3025, 0.1561, 0.2208]) -Greedy action tensor([-0.1808, -0.4690, -0.4640, -0.7090]) tensor([0.3234, 0.2424, 0.2436, 0.1907]) -Greedy action tensor([-0.6563, -0.7666, 1.2196, -0.3357]) tensor([0.1020, 0.0914, 0.6660, 0.1406]) -Greedy action tensor([ 0.4175, -0.4105, -0.6425, 0.5660]) tensor([0.3397, 0.1484, 0.1177, 0.3941]) -Greedy action tensor([ 1.2983, -1.1241, 1.1588, 0.0407]) tensor([0.4459, 0.0396, 0.3878, 0.1268]) -Greedy action tensor([-0.7199, -1.3797, 0.4831, -0.3135]) tensor([0.1575, 0.0814, 0.5245, 0.2365]) -Greedy action tensor([ 0.1054, 0.4414, 0.3223, -0.4084]) tensor([0.2359, 0.3300, 0.2930, 0.1411]) -Greedy action tensor([ 0.2899, -0.5822, 0.2820, -0.2020]) tensor([0.3309, 0.1384, 0.3283, 0.2024]) -Greedy action tensor([ 1.0259, -0.1074, -0.2747, 0.7817]) tensor([0.4206, 0.1354, 0.1146, 0.3294]) -Greedy action tensor([ 0.1702, 0.0274, 0.3791, -0.2800]) tensor([0.2676, 0.2320, 0.3298, 0.1706]) -Greedy action tensor([ 0.0931, -0.7233, 0.4311, -0.1365]) tensor([0.2748, 0.1215, 0.3853, 0.2184]) -Greedy action tensor([ 1.1241, -0.7279, -0.4700, 0.8870]) tensor([0.4653, 0.0730, 0.0945, 0.3671]) -Greedy action tensor([ 0.3977, 0.2487, 0.7049, -0.3070]) tensor([0.2691, 0.2319, 0.3659, 0.1330]) -Greedy action tensor([ 1.5438, -1.0452, 1.1349, -0.7878]) tensor([0.5445, 0.0409, 0.3618, 0.0529]) -Greedy action tensor([ 0.2756, -0.7345, 0.6014, 1.1538]) tensor([0.1939, 0.0706, 0.2686, 0.4668]) -Greedy action tensor([ 0.2412, -2.0499, -0.1138, 0.9739]) tensor([0.2575, 0.0261, 0.1806, 0.5359]) -Greedy action tensor([ 0.7268, -1.3096, 0.5563, -0.6287]) tensor([0.4481, 0.0585, 0.3779, 0.1155]) -Greedy action tensor([ 0.6097, -1.2897, -0.1666, -0.9158]) tensor([0.5472, 0.0819, 0.2518, 0.1190]) -Greedy action tensor([-0.0148, -0.7883, 0.3353, 0.2049]) tensor([0.2424, 0.1118, 0.3439, 0.3019]) -Greedy action tensor([-0.1346, -0.3421, 0.2684, 0.8249]) tensor([0.1689, 0.1373, 0.2528, 0.4410]) -Greedy action tensor([ 0.6641, -0.4648, 0.0066, 0.4375]) tensor([0.3790, 0.1226, 0.1964, 0.3021]) -Greedy action tensor([-0.3533, -1.4822, 0.7430, -0.8282]) tensor([0.2025, 0.0655, 0.6061, 0.1259]) -Greedy action tensor([-0.4230, -0.1412, -0.8334, -0.3021]) tensor([0.2429, 0.3219, 0.1611, 0.2741]) -Greedy action tensor([-0.0975, 0.0417, -0.4001, -0.5362]) tensor([0.2830, 0.3253, 0.2091, 0.1825]) -Greedy action tensor([-1.0900, -1.3378, -0.3467, -0.7825]) tensor([0.1907, 0.1489, 0.4011, 0.2594]) -Greedy action tensor([-1.0096, -1.0623, 0.8443, -0.5393]) tensor([0.1007, 0.0955, 0.6427, 0.1611]) -Greedy action tensor([ 0.0495, -1.5053, -0.5073, 0.5247]) tensor([0.2948, 0.0623, 0.1689, 0.4741]) -Greedy action tensor([ 0.9469, -0.4715, -0.0271, 0.4580]) tensor([0.4478, 0.1084, 0.1691, 0.2747]) -Greedy action tensor([-0.9375, -0.6093, 0.3787, -0.9113]) tensor([0.1400, 0.1944, 0.5220, 0.1437]) -Greedy action tensor([ 0.3887, 0.3862, 0.4286, -0.0659]) tensor([0.2723, 0.2716, 0.2833, 0.1728]) -Greedy action tensor([ 0.8647, -0.3834, -0.1396, -0.5234]) tensor([0.5255, 0.1509, 0.1925, 0.1311]) -Greedy action tensor([ 0.5300, -0.5270, -0.1632, -0.0487]) tensor([0.4153, 0.1443, 0.2076, 0.2328]) -Greedy action tensor([ 0.8538, -0.7555, 0.0769, -0.4409]) tensor([0.5171, 0.1034, 0.2378, 0.1417]) -Greedy action tensor([ 0.5096, -0.2317, -0.0095, -0.0275]) tensor([0.3765, 0.1794, 0.2240, 0.2200]) -Greedy action tensor([ 1.0420, 0.3360, -0.1857, -0.2835]) tensor([0.4873, 0.2405, 0.1427, 0.1295]) -Greedy action tensor([ 0.3683, 0.0704, -0.0435, -0.0381]) tensor([0.3256, 0.2417, 0.2157, 0.2169]) -Greedy action tensor([ 0.6121, -0.3811, -0.0958, -0.2967]) tensor([0.4413, 0.1634, 0.2174, 0.1778]) -Greedy action tensor([ 0.6549, -0.2670, -0.1841, -0.0652]) tensor([0.4317, 0.1717, 0.1865, 0.2101]) -Greedy action tensor([ 0.6666, -0.3653, -0.0858, -0.3614]) tensor([0.4576, 0.1631, 0.2156, 0.1637]) -Greedy action tensor([ 1.0095, -0.7549, 0.1521, -0.5084]) tensor([0.5510, 0.0944, 0.2338, 0.1208]) -Greedy action tensor([ 0.4762, 0.0291, 0.0157, -0.1467]) tensor([0.3563, 0.2278, 0.2248, 0.1911]) -Greedy action tensor([ 0.5419, -0.2559, 0.0076, -0.0890]) tensor([0.3893, 0.1753, 0.2282, 0.2072]) -Greedy action tensor([ 0.4707, -0.4234, -0.0777, -0.0468]) tensor([0.3872, 0.1583, 0.2237, 0.2308]) -Greedy action tensor([ 0.8333, -0.1384, 0.0009, -0.1578]) tensor([0.4578, 0.1732, 0.1991, 0.1699]) -Greedy action tensor([ 0.7691, 0.0978, 0.0752, -0.5984]) tensor([0.4414, 0.2256, 0.2205, 0.1124]) -Greedy action tensor([ 1.0313, -0.6890, -0.0639, -0.3300]) tensor([0.5650, 0.1011, 0.1890, 0.1448]) -Greedy action tensor([ 0.9056, -0.5989, -0.1194, -0.4163]) tensor([0.5413, 0.1202, 0.1942, 0.1443]) -Greedy action tensor([ 1.0260, -0.6626, -0.0326, -0.4705]) tensor([0.5696, 0.1052, 0.1976, 0.1275]) -Greedy action tensor([ 0.5046, -0.3048, -0.0099, -0.1657]) tensor([0.3915, 0.1743, 0.2340, 0.2003]) -Greedy action tensor([ 0.9827, -0.8791, 0.1951, -0.4779]) tensor([0.5428, 0.0843, 0.2469, 0.1260]) -Greedy action tensor([ 0.7345, -0.2858, -0.0133, -0.0925]) tensor([0.4403, 0.1587, 0.2084, 0.1926]) -Greedy action tensor([ 0.6713, -0.5182, -0.1020, -0.2981]) tensor([0.4662, 0.1419, 0.2151, 0.1768]) -Greedy action tensor([ 0.9653, -0.6800, 0.0778, -0.5268]) tensor([0.5466, 0.1055, 0.2250, 0.1229]) -Greedy action tensor([ 0.7489, 0.3527, -0.2878, -0.1149]) tensor([0.4083, 0.2747, 0.1448, 0.1721]) -Greedy action tensor([ 0.6954, -0.3708, -0.0089, -0.4509]) tensor([0.4637, 0.1597, 0.2293, 0.1474]) -Greedy action tensor([ 1.0013, -0.2925, -0.0300, -0.1116]) tensor([0.5104, 0.1400, 0.1820, 0.1677]) -Greedy action tensor([ 0.8379, -0.3816, -0.0224, -0.1375]) tensor([0.4772, 0.1410, 0.2019, 0.1799]) -Greedy action tensor([ 0.6846, -0.5988, -0.0711, -0.1313]) tensor([0.4568, 0.1266, 0.2146, 0.2020]) -Greedy action tensor([ 1.1647, -0.5291, -0.1571, -0.5578]) tensor([0.6138, 0.1128, 0.1637, 0.1096]) -Greedy action tensor([ 0.4728, -0.4330, -0.1510, -0.1011]) tensor([0.3995, 0.1615, 0.2141, 0.2250]) -Greedy action tensor([ 0.8112, -0.7677, 0.0879, -0.3736]) tensor([0.5007, 0.1032, 0.2429, 0.1531]) -Greedy action tensor([ 0.9388, -0.7499, 0.0016, -0.3855]) tensor([0.5427, 0.1003, 0.2126, 0.1444]) -Greedy action tensor([ 0.9561, -0.5413, 0.0288, -0.6093]) tensor([0.5469, 0.1224, 0.2164, 0.1143]) -Greedy action tensor([ 0.6459, 0.0887, 0.0816, -0.2222]) tensor([0.3904, 0.2236, 0.2221, 0.1639]) -Greedy action tensor([ 0.9958, -0.3169, 0.0043, -0.3954]) tensor([0.5294, 0.1425, 0.1964, 0.1317]) -Greedy action tensor([ 0.6393, -0.4101, -0.1722, -0.2382]) tensor([0.4524, 0.1584, 0.2010, 0.1881]) -Greedy action tensor([ 0.9186, -0.4971, -0.0272, -0.2853]) tensor([0.5178, 0.1257, 0.2011, 0.1554]) -Greedy action tensor([ 0.9360, -0.5705, -0.0495, -0.2135]) tensor([0.5231, 0.1160, 0.1953, 0.1657]) -Greedy action tensor([ 0.5157, -0.0333, -0.0223, -0.0066]) tensor([0.3630, 0.2097, 0.2120, 0.2153]) -Greedy action tensor([ 0.6011, -0.2790, -0.0401, 0.0106]) tensor([0.4007, 0.1662, 0.2111, 0.2220]) -Greedy action tensor([ 0.9232, -0.4551, -0.1477, -0.4030]) tensor([0.5376, 0.1355, 0.1842, 0.1427]) -Greedy action tensor([ 0.7346, -0.1122, -0.1112, -0.1523]) tensor([0.4405, 0.1889, 0.1891, 0.1815]) -Greedy action tensor([ 1.1481, -0.7169, -0.1041, -0.2793]) tensor([0.5950, 0.0922, 0.1701, 0.1428]) -Greedy action tensor([ 1.0342, -0.9483, 0.0618, -0.4264]) tensor([0.5721, 0.0788, 0.2163, 0.1328]) -Greedy action tensor([ 0.9468, -0.3653, -0.1041, -0.4779]) tensor([0.5378, 0.1448, 0.1880, 0.1294]) -Greedy action tensor([ 0.9549, -0.8151, 0.0823, -0.5165]) tensor([0.5501, 0.0937, 0.2299, 0.1263]) -Greedy action tensor([ 0.6108, 0.0245, -0.0909, 0.0498]) tensor([0.3813, 0.2121, 0.1890, 0.2176]) -Greedy action tensor([ 0.5652, -0.3198, 0.0766, -0.2764]) tensor([0.4070, 0.1680, 0.2497, 0.1754]) -Greedy action tensor([ 0.9939, -0.5725, -0.0361, -0.2934]) tensor([0.5429, 0.1134, 0.1938, 0.1499]) -Greedy action tensor([ 0.8224, -0.2903, 0.3249, -0.2809]) tensor([0.4408, 0.1449, 0.2680, 0.1463]) -Greedy action tensor([ 0.9145, -0.6490, 0.0303, -0.3412]) tensor([0.5243, 0.1098, 0.2166, 0.1494]) -Greedy action tensor([ 1.1311, -0.8318, 0.0953, -0.5055]) tensor([0.5917, 0.0831, 0.2100, 0.1152]) -Greedy action tensor([ 0.5977, -0.2302, -0.0302, -0.0655]) tensor([0.4023, 0.1758, 0.2147, 0.2072]) -Greedy action tensor([ 0.8499, -0.4068, 0.1668, -0.2336]) tensor([0.4699, 0.1337, 0.2373, 0.1590]) -Greedy action tensor([ 1.4988, -0.6716, -0.1385, -0.5175]) tensor([0.6936, 0.0792, 0.1349, 0.0923]) -Greedy action tensor([ 0.7880, -0.5379, 0.0297, -0.2101]) tensor([0.4756, 0.1263, 0.2228, 0.1753]) -Greedy action tensor([ 0.5724, -0.3259, 0.0373, -0.1501]) tensor([0.4035, 0.1643, 0.2363, 0.1959]) -Greedy action tensor([ 0.7626, -0.5708, 0.1028, -0.2409]) tensor([0.4657, 0.1228, 0.2408, 0.1707]) -Greedy action tensor([ 1.1126, -0.9881, -0.3076, -0.3629]) tensor([0.6279, 0.0768, 0.1517, 0.1436]) -Greedy action tensor([ 1.0833, -0.6947, -0.0696, -0.4106]) tensor([0.5851, 0.0989, 0.1847, 0.1313]) -Greedy action tensor([ 0.6871, -0.6042, 0.0119, -0.2491]) tensor([0.4595, 0.1263, 0.2339, 0.1802]) -Greedy action tensor([ 0.8077, -0.3893, 0.0657, -0.2521]) tensor([0.4706, 0.1422, 0.2241, 0.1631]) -Greedy action tensor([ 0.7073, -0.1704, -0.0898, -0.2966]) tensor([0.4479, 0.1862, 0.2018, 0.1641]) -Greedy action tensor([ 0.4633, -0.0104, -0.0833, 0.0914]) tensor([0.3459, 0.2154, 0.2002, 0.2385]) -Greedy action tensor([ 0.6864, -0.1524, 0.1222, 0.0098]) tensor([0.3985, 0.1722, 0.2267, 0.2026]) -Greedy action tensor([ 0.8738, -0.6486, -0.0099, -0.4707]) tensor([0.5285, 0.1153, 0.2184, 0.1378]) -Greedy action tensor([ 0.6254, -0.2548, 0.0826, -0.0207]) tensor([0.3968, 0.1646, 0.2306, 0.2080]) -Greedy action tensor([ 0.6554, -0.5563, -0.0770, -0.2517]) tensor([0.4583, 0.1364, 0.2203, 0.1850]) -Greedy action tensor([ 0.7499, -0.3184, 0.0107, -0.2838]) tensor([0.4594, 0.1578, 0.2194, 0.1634]) -Greedy action tensor([ 0.7926, -0.3641, 0.0256, -0.3357]) tensor([0.4756, 0.1496, 0.2209, 0.1539]) -Greedy action tensor([ 0.8293, -0.9030, 0.2642, -0.4857]) tensor([0.4966, 0.0878, 0.2822, 0.1333]) -Greedy action tensor([ 0.6500, -0.1345, -0.0662, -0.1903]) tensor([0.4208, 0.1920, 0.2056, 0.1816]) -Greedy action tensor([ 0.9594, -0.4001, -0.0353, -0.2186]) tensor([0.5169, 0.1327, 0.1912, 0.1592]) -Greedy action tensor([ 0.8559, -0.2972, -0.0096, -0.1275]) tensor([0.4738, 0.1496, 0.1994, 0.1772]) -Greedy action tensor([ 0.8389, -0.7775, 0.0910, -0.5676]) tensor([0.5217, 0.1036, 0.2469, 0.1278]) -Greedy action tensor([ 1.0122, -0.4975, -0.0930, -0.4643]) tensor([0.5616, 0.1241, 0.1860, 0.1283]) -Greedy action tensor([ 0.5890, -0.3392, -0.1488, -0.0685]) tensor([0.4181, 0.1653, 0.1999, 0.2167]) -Greedy action tensor([ 0.5733, -0.4688, -0.1674, -0.1761]) tensor([0.4344, 0.1532, 0.2071, 0.2053]) -Greedy action tensor([ 0.9413, -0.4095, -0.0951, -0.1836]) tensor([0.5159, 0.1336, 0.1830, 0.1675]) -Greedy action tensor([ 1.1153, -0.5280, 0.0014, -0.4349]) tensor([0.5768, 0.1115, 0.1893, 0.1224]) -Greedy action tensor([ 0.9512, -0.3203, 0.0013, -0.5109]) tensor([0.5266, 0.1477, 0.2037, 0.1220]) -Greedy action tensor([-1.9135, -0.4363, 0.6532, -0.1645]) tensor([0.0414, 0.1814, 0.5392, 0.2380]) -Greedy action tensor([-1.7612, -0.4452, 0.5784, -0.0871]) tensor([0.0489, 0.1824, 0.5077, 0.2610]) -Greedy action tensor([-1.7125, -0.3829, 0.5966, -0.0042]) tensor([0.0491, 0.1856, 0.4943, 0.2710]) -Greedy action tensor([-1.9387, -0.4387, 0.6632, -0.1752]) tensor([0.0403, 0.1807, 0.5438, 0.2352]) -Greedy action tensor([-1.7749, 0.0180, 0.5087, -0.0858]) tensor([0.0450, 0.2702, 0.4413, 0.2435]) -Greedy action tensor([-1.9062, -0.4294, 0.6457, -0.1618]) tensor([0.0418, 0.1830, 0.5362, 0.2391]) -Greedy action tensor([ 1.4229, -0.4442, 0.8949, 0.6385]) tensor([0.4544, 0.0702, 0.2680, 0.2074]) -Greedy action tensor([-1.8887, -0.4130, 0.6437, -0.1463]) tensor([0.0423, 0.1848, 0.5317, 0.2413]) -Greedy action tensor([-1.9022, -0.4564, 0.6507, -0.1610]) tensor([0.0420, 0.1784, 0.5398, 0.2397]) -Greedy action tensor([-1.6285, -0.4630, 0.6058, 0.1161]) tensor([0.0519, 0.1664, 0.4847, 0.2970]) -Greedy action tensor([-1.3353, 0.6347, 0.1870, 0.2298]) tensor([0.0570, 0.4089, 0.2613, 0.2728]) -Greedy action tensor([-1.6051, -0.9977, 1.1858, 0.5826]) tensor([0.0357, 0.0654, 0.5810, 0.3179]) -Greedy action tensor([-1.9114, -0.4333, 0.6524, -0.1609]) tensor([0.0414, 0.1817, 0.5382, 0.2386]) -Greedy action tensor([-0.9043, 0.9387, 0.0913, 0.2955]) tensor([0.0750, 0.4734, 0.2029, 0.2488]) -Greedy action tensor([-1.9265, -0.4573, 0.6581, -0.1726]) tensor([0.0410, 0.1782, 0.5438, 0.2370]) -Greedy action tensor([-1.9103, -0.4496, 0.6530, -0.1621]) tensor([0.0416, 0.1793, 0.5401, 0.2390]) -Greedy action tensor([-1.8938, -0.3407, 0.6246, -0.1443]) tensor([0.0419, 0.1979, 0.5195, 0.2408]) -Greedy action tensor([-1.9290, -0.4398, 0.6604, -0.1717]) tensor([0.0407, 0.1806, 0.5426, 0.2361]) -Greedy action tensor([-1.7883, -0.3651, 0.5884, -0.1598]) tensor([0.0476, 0.1975, 0.5124, 0.2425]) -Greedy action tensor([-1.8130, -0.0672, 0.5569, -0.0836]) tensor([0.0434, 0.2485, 0.4638, 0.2444]) -Greedy action tensor([-1.3995, 0.2013, 0.4629, 0.2222]) tensor([0.0573, 0.2839, 0.3688, 0.2899]) -Greedy action tensor([-1.5926, -0.3989, 0.5594, 0.0620]) tensor([0.0551, 0.1820, 0.4744, 0.2885]) -Greedy action tensor([-1.8311, -0.4472, 0.6185, -0.1210]) tensor([0.0452, 0.1805, 0.5241, 0.2502]) -Greedy action tensor([-1.8965, -0.4556, 0.6462, -0.1570]) tensor([0.0423, 0.1788, 0.5380, 0.2409]) -Greedy action tensor([-1.4711, -0.4765, 0.5541, 0.3349]) tensor([0.0576, 0.1557, 0.4363, 0.3504]) -Greedy action tensor([-1.3674, -0.3585, 0.6328, 0.6996]) tensor([0.0525, 0.1441, 0.3883, 0.4151]) -Greedy action tensor([-1.9103, -0.4112, 0.6420, -0.1711]) tensor([0.0417, 0.1865, 0.5347, 0.2371]) -Greedy action tensor([-1.7250, -0.2348, 0.5648, -0.0621]) tensor([0.0486, 0.2156, 0.4796, 0.2562]) -Greedy action tensor([-1.7566, -0.3809, 0.5906, -0.0607]) tensor([0.0479, 0.1897, 0.5011, 0.2613]) -Greedy action tensor([-1.8884, -0.4471, 0.6330, -0.1444]) tensor([0.0427, 0.1807, 0.5321, 0.2445]) -Greedy action tensor([-1.8805, -0.4501, 0.6363, -0.1476]) tensor([0.0431, 0.1800, 0.5334, 0.2436]) -Greedy action tensor([-1.8773, -0.4032, 0.6625, -0.2379]) tensor([0.0431, 0.1883, 0.5465, 0.2221]) -Greedy action tensor([-1.9150, -0.3787, 0.6473, -0.1868]) tensor([0.0412, 0.1917, 0.5348, 0.2322]) -Greedy action tensor([-1.6138, 0.4858, 0.2112, -0.6357]) tensor([0.0555, 0.4529, 0.3441, 0.1475]) -Greedy action tensor([-1.8671, -0.2783, 0.6102, -0.1424]) tensor([0.0427, 0.2092, 0.5086, 0.2396]) -Greedy action tensor([-0.6612, 0.7926, 0.0569, -0.0059]) tensor([0.1080, 0.4624, 0.2215, 0.2081]) -Greedy action tensor([-1.7048, -0.4160, 0.5460, -0.0658]) tensor([0.0519, 0.1883, 0.4927, 0.2672]) -Greedy action tensor([-1.8527, -0.1924, 0.5964, -0.1040]) tensor([0.0424, 0.2230, 0.4909, 0.2437]) -Greedy action tensor([-1.2800, -0.5549, 0.2936, 0.2954]) tensor([0.0786, 0.1623, 0.3792, 0.3799]) -Greedy action tensor([-1.8361, -0.3986, 0.6143, -0.0972]) tensor([0.0445, 0.1872, 0.5154, 0.2530]) -Greedy action tensor([-1.6622, -0.4235, 0.4941, -0.0506]) tensor([0.0552, 0.1907, 0.4773, 0.2768]) -Greedy action tensor([-1.9028, -0.4521, 0.6485, -0.1621]) tensor([0.0420, 0.1793, 0.5390, 0.2396]) -Greedy action tensor([-1.8463, -0.4267, 0.6102, -0.1239]) tensor([0.0446, 0.1846, 0.5208, 0.2499]) -Greedy action tensor([-1.4620, 0.5476, 0.2880, 0.0765]) tensor([0.0530, 0.3953, 0.3049, 0.2468]) -Greedy action tensor([-1.1738, 0.5512, -1.0965, -1.3426]) tensor([0.1171, 0.6574, 0.1265, 0.0989]) -Greedy action tensor([-1.2399, -0.5204, 0.3760, -0.0585]) tensor([0.0881, 0.1810, 0.4436, 0.2873]) -Greedy action tensor([-1.1040, -0.9164, 1.0597, 0.0405]) tensor([0.0712, 0.0859, 0.6194, 0.2236]) -Greedy action tensor([-1.8779, -0.4219, 0.6312, -0.1505]) tensor([0.0431, 0.1848, 0.5297, 0.2424]) -Greedy action tensor([-1.8544, -0.4218, 0.6222, -0.1291]) tensor([0.0440, 0.1845, 0.5242, 0.2473]) -Greedy action tensor([-1.8987, -0.4215, 0.6667, -0.1399]) tensor([0.0413, 0.1811, 0.5376, 0.2400]) -Greedy action tensor([-1.9108, -0.4438, 0.6470, -0.1651]) tensor([0.0417, 0.1809, 0.5384, 0.2390]) -Greedy action tensor([-1.7379, -0.4605, 0.5753, -0.0919]) tensor([0.0503, 0.1805, 0.5084, 0.2609]) -Greedy action tensor([-0.9119, -0.4949, 0.2178, 0.0041]) tensor([0.1233, 0.1871, 0.3815, 0.3081]) -Greedy action tensor([-0.9509, -0.4104, 0.2512, 0.1378]) tensor([0.1109, 0.1905, 0.3691, 0.3295]) -Greedy action tensor([-1.8995, -0.4727, 0.6536, -0.1528]) tensor([0.0421, 0.1754, 0.5410, 0.2415]) -Greedy action tensor([-1.9024, -0.4116, 0.6495, -0.1530]) tensor([0.0416, 0.1849, 0.5341, 0.2394]) -Greedy action tensor([-1.9108, -0.4564, 0.6965, -0.1159]) tensor([0.0402, 0.1722, 0.5455, 0.2421]) -Greedy action tensor([-1.8602, -0.4527, 0.6296, -0.1355]) tensor([0.0439, 0.1795, 0.5299, 0.2466]) -Greedy action tensor([-1.8131, -0.4232, 0.6060, -0.1017]) tensor([0.0459, 0.1843, 0.5157, 0.2541]) -Greedy action tensor([-1.8856, -0.3977, 0.6496, -0.1480]) tensor([0.0421, 0.1866, 0.5318, 0.2395]) -Greedy action tensor([-1.5112, -0.2376, 0.3731, 0.1072]) tensor([0.0617, 0.2206, 0.4063, 0.3114]) -Greedy action tensor([-1.3505, -0.5256, 0.3798, 0.4118]) tensor([0.0678, 0.1547, 0.3825, 0.3950]) -Greedy action tensor([-1.7249, -0.4384, 0.5206, -0.0560]) tensor([0.0516, 0.1869, 0.4876, 0.2739]) -Greedy action tensor([-1.8137, -0.1307, 0.5675, -0.0905]) tensor([0.0439, 0.2360, 0.4744, 0.2457]) -Greedy action tensor([-0.9999, -0.5611, 0.2329, 0.2557]) tensor([0.1054, 0.1634, 0.3615, 0.3698]) -Greedy action tensor([-1.9248, -0.4443, 0.6699, -0.1681]) tensor([0.0407, 0.1788, 0.5448, 0.2357]) -Greedy action tensor([-1.7966, -0.4962, 0.5948, -0.1433]) tensor([0.0480, 0.1763, 0.5248, 0.2509]) -Greedy action tensor([-1.9447, -0.4515, 0.6652, -0.1811]) tensor([0.0402, 0.1789, 0.5465, 0.2344]) -Greedy action tensor([-0.2491, 0.0589, 1.0943, 1.6116]) tensor([0.0792, 0.1078, 0.3036, 0.5093]) -Greedy action tensor([-1.9286, -0.4446, 0.6585, -0.1728]) tensor([0.0408, 0.1801, 0.5427, 0.2364]) -Greedy action tensor([-1.8673, -0.3705, 0.6160, -0.1486]) tensor([0.0434, 0.1940, 0.5203, 0.2422]) -Greedy action tensor([-1.9299, -0.4424, 0.6610, -0.1718]) tensor([0.0407, 0.1802, 0.5430, 0.2361]) -Greedy action tensor([-1.9271, -0.4003, 0.6507, -0.1674]) tensor([0.0407, 0.1873, 0.5357, 0.2364]) -Greedy action tensor([-1.6207, 0.4759, 0.3666, 0.0562]) tensor([0.0459, 0.3736, 0.3349, 0.2456]) -Greedy action tensor([-1.5988, 0.3796, 0.3901, -0.0499]) tensor([0.0494, 0.3572, 0.3610, 0.2325]) -Greedy action tensor([-1.6946, -0.3881, 0.5520, -0.0192]) tensor([0.0513, 0.1895, 0.4852, 0.2740]) -Greedy action tensor([-1.7798, -0.4512, 0.5920, -0.0802]) tensor([0.0477, 0.1801, 0.5112, 0.2610]) -Greedy action tensor([-0.8948, -0.5265, 0.9193, 1.1727]) tensor([0.0607, 0.0877, 0.3722, 0.4795]) -Greedy action tensor([-1.9365, -0.4348, 0.6623, -0.1736]) tensor([0.0404, 0.1813, 0.5430, 0.2354]) -Greedy action tensor([-1.0994, 0.2475, 0.4303, 0.7526]) tensor([0.0631, 0.2429, 0.2916, 0.4024]) -Greedy action tensor([-1.8722, -0.3893, 0.6348, -0.1371]) tensor([0.0428, 0.1887, 0.5255, 0.2429]) -Greedy action tensor([ 1.7782, 0.2809, -0.5087, 0.1348]) tensor([0.6585, 0.1473, 0.0669, 0.1273]) -Greedy action tensor([ 1.6136, -1.0099, -0.2110, 0.2875]) tensor([0.6670, 0.0484, 0.1076, 0.1771]) -Greedy action tensor([ 1.6209, -0.4182, -0.3494, 0.1268]) tensor([0.6693, 0.0871, 0.0933, 0.1502]) -Greedy action tensor([ 1.9511, -0.4344, -0.5023, 0.1579]) tensor([0.7438, 0.0685, 0.0640, 0.1238]) -Greedy action tensor([ 1.4528, -0.6417, -0.1243, 0.2942]) tensor([0.6084, 0.0749, 0.1257, 0.1910]) -Greedy action tensor([ 1.2918, -0.2272, -0.6751, 0.1671]) tensor([0.5940, 0.1300, 0.0831, 0.1929]) -Greedy action tensor([ 1.4107, -0.6844, -0.4195, 0.8753]) tensor([0.5351, 0.0658, 0.0858, 0.3133]) -Greedy action tensor([ 1.2435, -0.6794, -0.0804, 0.4202]) tensor([0.5402, 0.0790, 0.1437, 0.2371]) -Greedy action tensor([ 1.2384, 0.0556, -0.5564, 0.5842]) tensor([0.5019, 0.1538, 0.0834, 0.2609]) -Greedy action tensor([ 0.7750, -0.0946, -0.1406, -0.0418]) tensor([0.4422, 0.1853, 0.1770, 0.1954]) -Greedy action tensor([ 2.0058, -0.9518, -0.3044, 0.2362]) tensor([0.7567, 0.0393, 0.0751, 0.1289]) -Greedy action tensor([ 1.6235, -0.8339, -0.5239, 0.5736]) tensor([0.6441, 0.0552, 0.0752, 0.2254]) -Greedy action tensor([ 2.0479, -0.3702, -0.3189, 0.4532]) tensor([0.7216, 0.0643, 0.0677, 0.1465]) -Greedy action tensor([ 1.7217, -0.8639, -0.5475, 0.2453]) tensor([0.7106, 0.0535, 0.0735, 0.1624]) -Greedy action tensor([ 1.9324, -0.9279, -0.3041, 0.9440]) tensor([0.6509, 0.0373, 0.0695, 0.2423]) -Greedy action tensor([ 2.0069, -0.4181, -0.8717, 0.1137]) tensor([0.7720, 0.0683, 0.0434, 0.1163]) -Greedy action tensor([ 2.2171, -0.9496, -0.7947, 0.5072]) tensor([0.7860, 0.0331, 0.0387, 0.1422]) -Greedy action tensor([ 1.5551, -0.7216, -0.2717, 0.3046]) tensor([0.6452, 0.0662, 0.1038, 0.1848]) -Greedy action tensor([ 1.4179, -0.2183, -0.4107, 0.1310]) tensor([0.6129, 0.1193, 0.0985, 0.1693]) -Greedy action tensor([ 1.5945, -0.5416, -0.1507, 0.9812]) tensor([0.5452, 0.0644, 0.0952, 0.2952]) -Greedy action tensor([ 1.4008, -0.8058, 0.1519, 0.9084]) tensor([0.4980, 0.0548, 0.1428, 0.3044]) -Greedy action tensor([ 1.6467, -0.6447, -0.3434, -0.4824]) tensor([0.7370, 0.0745, 0.1007, 0.0877]) -Greedy action tensor([ 1.3382, -0.2120, -0.3069, 0.0023]) tensor([0.5995, 0.1272, 0.1157, 0.1576]) -Greedy action tensor([ 1.6602, -0.4641, -0.2272, 0.1364]) tensor([0.6717, 0.0803, 0.1017, 0.1463]) -Greedy action tensor([ 2.0171, -1.0734, -0.1063, 0.6046]) tensor([0.7099, 0.0323, 0.0849, 0.1729]) -Greedy action tensor([ 1.3202, -0.3963, -0.5983, 0.3781]) tensor([0.5826, 0.1047, 0.0855, 0.2271]) -Greedy action tensor([ 1.8939, -0.7433, 0.0882, 0.5677]) tensor([0.6660, 0.0477, 0.1095, 0.1768]) -Greedy action tensor([ 1.2458, -0.2915, -0.7691, 0.3906]) tensor([0.5639, 0.1212, 0.0752, 0.2397]) -Greedy action tensor([ 0.9946, -0.4562, -0.2484, 0.3132]) tensor([0.4929, 0.1155, 0.1422, 0.2494]) -Greedy action tensor([ 1.4710, -0.1403, -0.4214, -0.1497]) tensor([0.6459, 0.1290, 0.0973, 0.1277]) -Greedy action tensor([ 2.0131, -0.8729, -0.4904, 0.1365]) tensor([0.7748, 0.0432, 0.0634, 0.1186]) -Greedy action tensor([ 1.5810, -0.1634, -0.5508, 0.1061]) tensor([0.6570, 0.1148, 0.0779, 0.1503]) -Greedy action tensor([ 1.3886, -0.5946, -0.2572, 0.4659]) tensor([0.5787, 0.0796, 0.1116, 0.2300]) -Greedy action tensor([ 1.8815, -0.3795, -0.1040, 0.3488]) tensor([0.6861, 0.0715, 0.0942, 0.1482]) -Greedy action tensor([ 1.2729, -0.4135, -0.2737, 0.2127]) tensor([0.5732, 0.1062, 0.1221, 0.1986]) -Greedy action tensor([ 1.1735, -0.6370, -0.1549, 0.3932]) tensor([0.5300, 0.0867, 0.1404, 0.2429]) -Greedy action tensor([ 1.6718, -0.2122, -0.9479, 0.3010]) tensor([0.6763, 0.1028, 0.0492, 0.1717]) -Greedy action tensor([ 1.8460, -0.7080, -0.7341, 0.2116]) tensor([0.7415, 0.0577, 0.0562, 0.1447]) -Greedy action tensor([ 1.8880, -0.0759, -0.3615, 0.5954]) tensor([0.6577, 0.0923, 0.0694, 0.1806]) -Greedy action tensor([ 1.0502, -0.3484, -0.5164, 0.4425]) tensor([0.4999, 0.1235, 0.1044, 0.2723]) -Greedy action tensor([ 1.6673, -0.7210, -0.5499, 0.1490]) tensor([0.7043, 0.0647, 0.0767, 0.1543]) -Greedy action tensor([ 1.2411, -0.3763, -0.3279, 0.2755]) tensor([0.5595, 0.1110, 0.1165, 0.2130]) -Greedy action tensor([ 1.8051, -0.4714, -0.3999, 0.5164]) tensor([0.6718, 0.0690, 0.0741, 0.1852]) -Greedy action tensor([ 1.3827, -0.0486, -0.4986, 0.5336]) tensor([0.5497, 0.1314, 0.0838, 0.2352]) -Greedy action tensor([ 1.3052, -0.2610, -0.4630, -0.2410]) tensor([0.6279, 0.1311, 0.1072, 0.1338]) -Greedy action tensor([ 1.8828, -0.1390, -0.7011, 0.5761]) tensor([0.6763, 0.0896, 0.0510, 0.1831]) -Greedy action tensor([ 2.0549, -1.3451, 0.0349, 0.3233]) tensor([0.7446, 0.0248, 0.0988, 0.1318]) -Greedy action tensor([ 1.9693, -0.2880, -0.0305, 0.3997]) tensor([0.6905, 0.0723, 0.0935, 0.1437]) -Greedy action tensor([ 1.7293, -0.2439, -0.3193, 0.2163]) tensor([0.6720, 0.0934, 0.0866, 0.1480]) -Greedy action tensor([ 1.2983, -0.3783, -0.5291, -0.0058]) tensor([0.6176, 0.1155, 0.0993, 0.1676]) -Greedy action tensor([ 1.9331, -0.9955, -0.4106, 0.3268]) tensor([0.7407, 0.0396, 0.0711, 0.1486]) -Greedy action tensor([ 1.3010, -0.6759, -0.0717, -0.1150]) tensor([0.6118, 0.0847, 0.1550, 0.1485]) -Greedy action tensor([ 1.6622, -0.1429, -0.4876, 0.1648]) tensor([0.6646, 0.1093, 0.0774, 0.1487]) -Greedy action tensor([ 1.1700, -0.1939, -0.5508, 0.2648]) tensor([0.5438, 0.1390, 0.0973, 0.2199]) -Greedy action tensor([ 1.3316, -0.6492, -0.0949, -0.2118]) tensor([0.6282, 0.0867, 0.1509, 0.1342]) -Greedy action tensor([ 1.7512, -0.6687, -0.9139, 0.2361]) tensor([0.7255, 0.0645, 0.0505, 0.1594]) -Greedy action tensor([ 1.4660, -0.7071, -0.1690, 1.0078]) tensor([0.5151, 0.0586, 0.1004, 0.3258]) -Greedy action tensor([ 2.1759, -1.3702, -0.2624, 0.1594]) tensor([0.8005, 0.0231, 0.0699, 0.1066]) -Greedy action tensor([ 0.9434, -0.5648, -0.3640, 0.2862]) tensor([0.4975, 0.1101, 0.1346, 0.2578]) -Greedy action tensor([ 1.6472, 0.0321, -0.3895, 0.1108]) tensor([0.6475, 0.1288, 0.0845, 0.1393]) -Greedy action tensor([ 1.1878, -0.3326, -0.3660, 0.3669]) tensor([0.5347, 0.1169, 0.1131, 0.2353]) -Greedy action tensor([ 1.5758, -0.3129, -0.6050, 0.6027]) tensor([0.6090, 0.0921, 0.0688, 0.2301]) -Greedy action tensor([ 1.4269, -0.3531, -1.0089, 0.3843]) tensor([0.6216, 0.1048, 0.0544, 0.2191]) -Greedy action tensor([ 1.5662, -1.0594, -0.2007, 0.3903]) tensor([0.6444, 0.0467, 0.1101, 0.1988]) -Greedy action tensor([ 1.5158, -0.8250, -0.2732, 0.6013]) tensor([0.6009, 0.0578, 0.1004, 0.2408]) -Greedy action tensor([ 1.2882, -0.3810, -0.7200, 0.3977]) tensor([0.5770, 0.1087, 0.0774, 0.2368]) -Greedy action tensor([ 1.3639, -0.2483, -0.4931, -0.2323]) tensor([0.6417, 0.1280, 0.1002, 0.1301]) -Greedy action tensor([ 1.0291, -0.1656, -0.5047, 0.6363]) tensor([0.4559, 0.1380, 0.0983, 0.3078]) -Greedy action tensor([ 1.2147, -0.5624, -0.6238, 0.4780]) tensor([0.5534, 0.0936, 0.0880, 0.2649]) -Greedy action tensor([ 1.8361, -0.3067, -0.2532, 0.6959]) tensor([0.6407, 0.0752, 0.0793, 0.2049]) -Greedy action tensor([ 1.1090, -0.4255, -0.1983, 0.2682]) tensor([0.5215, 0.1124, 0.1411, 0.2250]) -Greedy action tensor([ 1.7801, -0.2569, -1.3345, 0.6201]) tensor([0.6719, 0.0876, 0.0298, 0.2106]) -Greedy action tensor([ 1.3465, -0.0701, -0.5931, 0.3036]) tensor([0.5751, 0.1395, 0.0827, 0.2027]) -Greedy action tensor([ 1.8048, -0.5121, -0.3946, 0.3933]) tensor([0.6881, 0.0678, 0.0763, 0.1678]) -Greedy action tensor([ 1.5643, -0.6839, -0.6232, 0.5124]) tensor([0.6381, 0.0674, 0.0716, 0.2229]) -Greedy action tensor([ 1.4241, -0.6278, -0.5935, 0.2588]) tensor([0.6356, 0.0817, 0.0845, 0.1982]) -Greedy action tensor([ 1.5162, -0.2992, -0.4944, 0.2069]) tensor([0.6383, 0.1039, 0.0855, 0.1723]) -Greedy action tensor([ 1.2878, -0.0847, -0.7912, 0.4794]) tensor([0.5482, 0.1390, 0.0686, 0.2443]) -Greedy action tensor([ 1.3216, -0.1811, -0.5087, 0.2922]) tensor([0.5747, 0.1279, 0.0922, 0.2053]) -Greedy action tensor([ 1.5752, -0.4123, -0.7979, 0.0231]) tensor([0.6935, 0.0950, 0.0646, 0.1469]) -Greedy action tensor([ 1.7434, -0.8784, -0.3686, 0.5567]) tensor([0.6672, 0.0485, 0.0807, 0.2036]) -Greedy action tensor([ 1.3037, -0.6922, 0.0269, -0.8404]) tensor([0.6527, 0.0887, 0.1821, 0.0765]) -Greedy action tensor([ 0.9248, -0.5794, -0.0983, -0.3041]) tensor([0.5335, 0.1186, 0.1918, 0.1561]) -Greedy action tensor([ 0.8637, -0.3955, 0.0204, -0.4655]) tensor([0.5053, 0.1435, 0.2174, 0.1338]) -Greedy action tensor([ 1.1086, -0.6021, -0.2005, -0.3785]) tensor([0.5964, 0.1078, 0.1611, 0.1348]) -Greedy action tensor([ 0.7555, -0.0856, -0.1556, -0.1692]) tensor([0.4484, 0.1934, 0.1803, 0.1779]) -Greedy action tensor([ 0.7932, -0.2022, 0.0563, -0.2851]) tensor([0.4570, 0.1689, 0.2187, 0.1554]) -Greedy action tensor([ 0.3947, -0.0701, -0.1388, -0.0322]) tensor([0.3488, 0.2191, 0.2046, 0.2276]) -Greedy action tensor([ 0.6506, -0.4907, -0.1041, -0.3015]) tensor([0.4597, 0.1468, 0.2161, 0.1774]) -Greedy action tensor([ 0.6529, -0.6005, -0.1471, -0.1095]) tensor([0.4543, 0.1297, 0.2041, 0.2119]) -Greedy action tensor([ 1.0249, -0.4502, -0.1606, -0.3337]) tensor([0.5582, 0.1277, 0.1706, 0.1435]) -Greedy action tensor([ 0.7616, -0.6793, -0.0661, -0.9381]) tensor([0.5386, 0.1275, 0.2354, 0.0984]) -Greedy action tensor([ 0.8744, -0.6425, 0.0848, -0.2468]) tensor([0.5002, 0.1097, 0.2271, 0.1630]) -Greedy action tensor([ 0.6931, -0.2012, 0.0560, -0.3161]) tensor([0.4344, 0.1776, 0.2297, 0.1583]) -Greedy action tensor([ 1.1445, -0.4962, 0.1027, -0.4111]) tensor([0.5689, 0.1103, 0.2007, 0.1201]) -Greedy action tensor([ 0.6340, -0.6346, -0.0217, -0.1696]) tensor([0.4448, 0.1251, 0.2309, 0.1992]) -Greedy action tensor([ 0.6312, -0.4077, -0.1774, 0.0858]) tensor([0.4204, 0.1487, 0.1873, 0.2436]) -Greedy action tensor([ 1.2320, -0.5460, -0.0397, -0.4332]) tensor([0.6103, 0.1031, 0.1711, 0.1154]) -Greedy action tensor([ 0.6853, -0.4313, -0.0609, -0.2254]) tensor([0.4538, 0.1486, 0.2152, 0.1825]) -Greedy action tensor([ 0.8151, -0.1524, -0.0116, -0.1381]) tensor([0.4539, 0.1725, 0.1986, 0.1750]) -Greedy action tensor([ 0.7775, -0.5155, -0.0224, -0.2885]) tensor([0.4835, 0.1327, 0.2173, 0.1665]) -Greedy action tensor([ 0.6543, -0.6178, -0.1558, -0.2519]) tensor([0.4697, 0.1316, 0.2089, 0.1898]) -Greedy action tensor([ 1.1882, -1.0535, -0.0448, -0.5835]) tensor([0.6379, 0.0678, 0.1859, 0.1085]) -Greedy action tensor([ 0.8545, -0.8376, -0.0040, -0.4027]) tensor([0.5284, 0.0973, 0.2239, 0.1503]) -Greedy action tensor([ 0.6619, -0.0332, -0.1138, -0.0515]) tensor([0.4083, 0.2037, 0.1880, 0.2000]) -Greedy action tensor([ 0.5422, -0.4307, -0.0347, -0.2316]) tensor([0.4165, 0.1574, 0.2339, 0.1921]) -Greedy action tensor([ 1.2289, -0.7377, -0.0046, -0.4906]) tensor([0.6210, 0.0869, 0.1809, 0.1113]) -Greedy action tensor([ 0.7747, -0.4125, -0.1097, -0.2100]) tensor([0.4781, 0.1459, 0.1974, 0.1786]) -Greedy action tensor([ 0.9823, -0.6892, 0.2479, -0.4560]) tensor([0.5249, 0.0987, 0.2519, 0.1246]) -Greedy action tensor([ 0.8402, -0.6061, -0.0850, -0.4117]) tensor([0.5214, 0.1228, 0.2067, 0.1491]) -Greedy action tensor([ 1.3014, -0.8829, -0.0760, -0.7474]) tensor([0.6695, 0.0754, 0.1689, 0.0863]) -Greedy action tensor([ 1.2062, -0.6182, -0.0780, -0.6894]) tensor([0.6296, 0.1016, 0.1743, 0.0946]) -Greedy action tensor([ 1.2351, -0.5247, -0.0860, -0.4150]) tensor([0.6131, 0.1055, 0.1636, 0.1177]) -Greedy action tensor([ 1.0614, -0.6688, 0.0082, -0.4918]) tensor([0.5755, 0.1020, 0.2007, 0.1218]) -Greedy action tensor([ 0.7770, -0.3731, -0.0176, -0.5292]) tensor([0.4904, 0.1553, 0.2215, 0.1328]) -Greedy action tensor([ 1.1250, -1.0193, -0.0790, -0.5513]) tensor([0.6234, 0.0730, 0.1870, 0.1166]) -Greedy action tensor([ 0.6457, -0.5705, -0.0019, -0.3186]) tensor([0.4544, 0.1347, 0.2378, 0.1732]) -Greedy action tensor([ 0.8086, -0.1442, -0.2103, -0.2624]) tensor([0.4786, 0.1846, 0.1728, 0.1640]) -Greedy action tensor([ 0.6364, 0.0465, 0.0061, -0.3824]) tensor([0.4085, 0.2265, 0.2175, 0.1475]) -Greedy action tensor([ 0.8247, -0.2872, -0.0666, -0.1191]) tensor([0.4699, 0.1546, 0.1927, 0.1828]) -Greedy action tensor([ 0.6898, -0.5466, -0.1811, -0.3974]) tensor([0.4887, 0.1419, 0.2046, 0.1648]) -Greedy action tensor([ 0.7232, -0.0846, -0.0250, 0.0327]) tensor([0.4132, 0.1842, 0.1955, 0.2071]) -Greedy action tensor([ 0.8600, -0.8149, -0.2037, -0.3664]) tensor([0.5477, 0.1026, 0.1891, 0.1607]) -Greedy action tensor([ 0.9368, -0.8978, 0.2159, -0.6026]) tensor([0.5375, 0.0858, 0.2614, 0.1153]) -Greedy action tensor([ 0.6231, -0.4476, -0.0361, -0.2211]) tensor([0.4367, 0.1497, 0.2259, 0.1877]) -Greedy action tensor([ 0.7194, -0.5557, -0.1140, -0.1846]) tensor([0.4720, 0.1319, 0.2051, 0.1911]) -Greedy action tensor([ 1.1880, -0.6320, -0.1818, -0.6985]) tensor([0.6378, 0.1033, 0.1621, 0.0967]) -Greedy action tensor([ 0.4668, -0.0516, -0.1787, -0.0663]) tensor([0.3695, 0.2200, 0.1937, 0.2168]) -Greedy action tensor([ 1.0587, -0.4852, 0.0782, -0.2564]) tensor([0.5385, 0.1150, 0.2020, 0.1445]) -Greedy action tensor([ 0.1715, 0.1449, 0.0809, -0.0898]) tensor([0.2734, 0.2663, 0.2497, 0.2106]) -Greedy action tensor([ 0.5605, -0.4742, 0.3611, -0.8261]) tensor([0.4125, 0.1466, 0.3379, 0.1031]) -Greedy action tensor([ 0.8109, -0.8424, -0.1407, -0.4317]) tensor([0.5359, 0.1026, 0.2069, 0.1547]) -Greedy action tensor([ 1.0535, -0.9937, 0.1335, -0.5501]) tensor([0.5784, 0.0747, 0.2305, 0.1164]) -Greedy action tensor([ 1.0202, -0.9396, 0.2600, -0.4468]) tensor([0.5438, 0.0766, 0.2542, 0.1254]) -Greedy action tensor([ 0.7192, -0.4077, -0.0963, -0.1730]) tensor([0.4595, 0.1489, 0.2033, 0.1883]) -Greedy action tensor([ 0.2513, 0.5169, -0.1059, 0.0794]) tensor([0.2600, 0.3391, 0.1819, 0.2190]) -Greedy action tensor([ 0.7898, -0.6471, -0.0679, -0.2483]) tensor([0.4960, 0.1179, 0.2104, 0.1757]) -Greedy action tensor([ 0.6061, -0.2072, 0.0104, -0.0367]) tensor([0.3968, 0.1759, 0.2187, 0.2086]) -Greedy action tensor([ 0.6489, -0.0556, 0.0686, -0.2827]) tensor([0.4085, 0.2019, 0.2286, 0.1609]) -Greedy action tensor([ 0.8752, -0.3615, 0.0089, -0.6992]) tensor([0.5214, 0.1514, 0.2192, 0.1080]) -Greedy action tensor([ 1.1136, -0.5425, 0.0467, -0.4770]) tensor([0.5751, 0.1098, 0.1979, 0.1172]) -Greedy action tensor([ 0.5416, -0.3861, -0.1591, -0.1005]) tensor([0.4136, 0.1636, 0.2052, 0.2176]) -Greedy action tensor([ 0.8212, -0.4391, -0.0814, -0.5753]) tensor([0.5164, 0.1464, 0.2094, 0.1278]) -Greedy action tensor([ 1.3793, -0.5705, -0.0409, -0.3085]) tensor([0.6374, 0.0907, 0.1540, 0.1179]) -Greedy action tensor([ 0.4650, -0.3608, -0.2288, -0.1971]) tensor([0.4076, 0.1785, 0.2037, 0.2102]) -Greedy action tensor([ 0.8393, -0.3098, -0.1118, -0.1253]) tensor([0.4798, 0.1521, 0.1853, 0.1828]) -Greedy action tensor([ 1.0987, -0.4440, -0.0519, -0.5797]) tensor([0.5825, 0.1245, 0.1843, 0.1087]) -Greedy action tensor([ 0.7406, -0.1038, -0.2160, -0.3602]) tensor([0.4658, 0.2002, 0.1790, 0.1549]) -Greedy action tensor([ 0.5680, 0.4461, -0.0837, 0.1059]) tensor([0.3293, 0.2916, 0.1716, 0.2075]) -Greedy action tensor([ 0.6931, -0.5991, -0.0932, -0.1463]) tensor([0.4625, 0.1270, 0.2107, 0.1998]) -Greedy action tensor([ 0.6074, -0.2485, 0.0301, -0.2979]) tensor([0.4183, 0.1777, 0.2348, 0.1692]) -Greedy action tensor([ 1.0018, -0.6148, -0.0428, -0.5444]) tensor([0.5671, 0.1126, 0.1995, 0.1208]) -Greedy action tensor([ 0.7214, -0.5396, -0.0942, -0.5328]) tensor([0.4973, 0.1409, 0.2200, 0.1419]) -Greedy action tensor([ 0.6367, -0.3832, -0.0686, -0.0088]) tensor([0.4204, 0.1516, 0.2076, 0.2204]) -Greedy action tensor([ 1.0523, -0.5269, -0.0594, -0.4968]) tensor([0.5722, 0.1180, 0.1883, 0.1216]) -Greedy action tensor([ 0.3107, 0.0791, -0.1213, -0.0844]) tensor([0.3209, 0.2546, 0.2083, 0.2162]) -Greedy action tensor([ 0.6972, -0.5168, -0.0393, -0.2695]) tensor([0.4638, 0.1378, 0.2221, 0.1764]) -Greedy action tensor([ 0.9114, -0.8186, 0.1040, -0.4971]) tensor([0.5354, 0.0949, 0.2388, 0.1309]) -Greedy action tensor([ 1.0137, -0.7615, 0.1216, -0.2266]) tensor([0.5352, 0.0907, 0.2193, 0.1548]) -Greedy action tensor([ 0.4373, 0.0319, -0.0917, 0.0519]) tensor([0.3406, 0.2271, 0.2007, 0.2317]) -Greedy action tensor([ 1.0123, -0.6317, -0.1108, -0.6137]) tensor([0.5830, 0.1126, 0.1896, 0.1147]) -Greedy action tensor([ 0.4734, 0.0746, -0.0141, -0.1125]) tensor([0.3519, 0.2362, 0.2161, 0.1959]) -Greedy action tensor([-0.7463, -1.0661, -0.1412, -0.9571]) tensor([0.2290, 0.1663, 0.4193, 0.1854]) -Greedy action tensor([1.5570, 0.0397, 0.7254, 0.6589]) tensor([0.4850, 0.1064, 0.2111, 0.1975]) -Greedy action tensor([ 0.1782, -1.8573, 0.2499, 1.1491]) tensor([0.2064, 0.0270, 0.2217, 0.5449]) -Greedy action tensor([-0.5488, -1.1139, 1.1174, -0.1859]) tensor([0.1205, 0.0685, 0.6378, 0.1732]) -Greedy action tensor([ 1.0671, -0.3470, 0.8173, -0.0925]) tensor([0.4281, 0.1041, 0.3335, 0.1343]) -Greedy action tensor([-0.1700, 0.5875, -0.2253, -0.5120]) tensor([0.2088, 0.4453, 0.1976, 0.1483]) -Greedy action tensor([-0.4263, 0.6058, 1.1268, -1.3142]) tensor([0.1118, 0.3138, 0.5284, 0.0460]) -Greedy action tensor([ 0.2009, -0.8239, -0.6042, 0.3787]) tensor([0.3333, 0.1196, 0.1490, 0.3981]) -Greedy action tensor([ 0.4082, 0.1782, 0.3766, -0.7270]) tensor([0.3242, 0.2575, 0.3141, 0.1042]) -Greedy action tensor([-0.6645, -0.2672, 0.6236, -0.1594]) tensor([0.1287, 0.1915, 0.4666, 0.2132]) -Greedy action tensor([-0.9672, -0.9265, -0.7698, 1.4008]) tensor([0.0718, 0.0747, 0.0874, 0.7661]) -Greedy action tensor([-0.2056, -1.7819, 0.4354, -0.6956]) tensor([0.2690, 0.0556, 0.5106, 0.1648]) -Greedy action tensor([-0.8957, -0.8286, 0.3693, 0.7114]) tensor([0.0943, 0.1009, 0.3342, 0.4706]) -Greedy action tensor([-0.1228, -0.2448, 0.6063, -0.0519]) tensor([0.1987, 0.1759, 0.4120, 0.2133]) -Greedy action tensor([ 0.2559, -0.7384, -0.3673, -0.4975]) tensor([0.4207, 0.1557, 0.2256, 0.1981]) -Greedy action tensor([ 1.0985, -0.3800, 0.9151, 0.6894]) tensor([0.3670, 0.0837, 0.3055, 0.2438]) -Greedy action tensor([ 0.2897, -1.4361, 0.5311, -0.1669]) tensor([0.3242, 0.0577, 0.4127, 0.2054]) -Greedy action tensor([ 1.5394, -1.0403, 1.2399, 0.5591]) tensor([0.4562, 0.0346, 0.3381, 0.1712]) -Greedy action tensor([ 0.7234, -1.4643, -0.3467, -0.7424]) tensor([0.5931, 0.0665, 0.2034, 0.1369]) -Greedy action tensor([-0.3292, -0.8208, 0.2212, -0.9233]) tensor([0.2566, 0.1569, 0.4449, 0.1416]) -Greedy action tensor([-1.1731, -0.9653, 0.7759, -1.1032]) tensor([0.0968, 0.1192, 0.6801, 0.1039]) -Greedy action tensor([-0.0245, -0.8841, -0.2057, 0.2197]) tensor([0.2829, 0.1198, 0.2361, 0.3612]) -Greedy action tensor([ 1.1291, -0.6275, -0.3454, 0.2491]) tensor([0.5506, 0.0950, 0.1260, 0.2284]) -Greedy action tensor([-0.2196, -0.6255, 0.5236, -0.3131]) tensor([0.2137, 0.1424, 0.4493, 0.1946]) -Greedy action tensor([-0.4407, -1.0988, 0.1579, -0.5900]) tensor([0.2382, 0.1233, 0.4334, 0.2051]) -Greedy action tensor([-0.2537, 0.0314, -0.7880, -1.2995]) tensor([0.3061, 0.4070, 0.1794, 0.1076]) -Greedy action tensor([ 0.2182, -0.1413, -0.0330, -0.2813]) tensor([0.3244, 0.2264, 0.2523, 0.1969]) -Greedy action tensor([ 0.3422, -1.3329, -0.1509, 0.3052]) tensor([0.3621, 0.0678, 0.2212, 0.3489]) -Greedy action tensor([-0.0025, -0.9120, -0.9171, 0.3011]) tensor([0.3166, 0.1275, 0.1269, 0.4290]) -Greedy action tensor([ 0.5129, 0.0047, 0.6455, -1.0366]) tensor([0.3383, 0.2035, 0.3863, 0.0718]) -Greedy action tensor([ 0.1039, -0.4125, -0.9697, 0.4784]) tensor([0.2947, 0.1759, 0.1007, 0.4287]) -Greedy action tensor([-1.6811, 0.0270, 0.7062, -0.6698]) tensor([0.0496, 0.2738, 0.5401, 0.1364]) -Greedy action tensor([-0.0235, -0.5416, 0.0847, 0.1117]) tensor([0.2594, 0.1545, 0.2891, 0.2970]) -Greedy action tensor([1.1703, 0.0921, 1.0080, 1.5977]) tensor([0.2686, 0.0914, 0.2283, 0.4118]) -Greedy action tensor([-0.0775, -0.6822, -0.6624, 0.5315]) tensor([0.2537, 0.1386, 0.1413, 0.4664]) -Greedy action tensor([ 0.9681, -2.0413, 0.2143, 0.1035]) tensor([0.5152, 0.0254, 0.2424, 0.2170]) -Greedy action tensor([-0.7605, -0.5080, -0.8825, -0.5295]) tensor([0.2256, 0.2904, 0.1997, 0.2842]) -Greedy action tensor([ 0.3063, -0.2117, 1.1497, -0.1867]) tensor([0.2207, 0.1315, 0.5130, 0.1348]) -Greedy action tensor([1.0729, 0.0782, 0.0660, 0.1773]) tensor([0.4665, 0.1725, 0.1704, 0.1905]) -Greedy action tensor([ 0.3518, -0.8238, -0.9158, -0.0356]) tensor([0.4407, 0.1360, 0.1241, 0.2992]) -Greedy action tensor([-0.7899, -0.4939, 0.1059, -0.2890]) tensor([0.1552, 0.2086, 0.3801, 0.2561]) -Greedy action tensor([ 0.5785, -1.2902, -0.0705, -0.3667]) tensor([0.4841, 0.0747, 0.2530, 0.1881]) -Greedy action tensor([ 0.0494, -0.6844, 1.0905, -0.9059]) tensor([0.2129, 0.1022, 0.6030, 0.0819]) -Greedy action tensor([ 0.8164, -0.5568, -0.7307, 0.5440]) tensor([0.4489, 0.1137, 0.0955, 0.3419]) -Greedy action tensor([ 1.2179, 0.6624, 0.5378, -0.0514]) tensor([0.4235, 0.2430, 0.2145, 0.1190]) -Greedy action tensor([ 0.7864, 0.2262, -0.5044, -0.2555]) tensor([0.4548, 0.2597, 0.1251, 0.1604]) -Greedy action tensor([ 0.1063, -0.7912, 0.3287, -0.4734]) tensor([0.3109, 0.1267, 0.3883, 0.1741]) -Greedy action tensor([-0.0080, 0.2405, 0.0996, -0.4031]) tensor([0.2457, 0.3151, 0.2737, 0.1655]) -Greedy action tensor([ 1.0551e-01, -1.6659e+00, 8.0947e-01, 8.4066e-04]) tensor([0.2444, 0.0416, 0.4940, 0.2201]) -Greedy action tensor([ 0.6645, -0.8900, 0.6797, 0.5608]) tensor([0.3197, 0.0675, 0.3246, 0.2882]) -Greedy action tensor([ 0.2218, -0.0740, 0.0258, 0.5078]) tensor([0.2566, 0.1909, 0.2109, 0.3416]) -Greedy action tensor([-0.4415, 0.0230, 0.7713, -0.6180]) tensor([0.1472, 0.2343, 0.4951, 0.1234]) -Greedy action tensor([ 0.0352, -0.9972, -0.5274, -0.3488]) tensor([0.3836, 0.1366, 0.2185, 0.2613]) -Greedy action tensor([ 1.4638, -1.4907, 0.4178, 0.1366]) tensor([0.5993, 0.0312, 0.2105, 0.1589]) -Greedy action tensor([ 0.1226, -0.8777, -0.0715, -0.1974]) tensor([0.3427, 0.1261, 0.2823, 0.2489]) -Greedy action tensor([-0.1770, 0.0164, 1.1724, -0.0173]) tensor([0.1381, 0.1676, 0.5324, 0.1620]) -Greedy action tensor([-0.5179, -0.0692, 0.3755, 0.2588]) tensor([0.1392, 0.2180, 0.3401, 0.3027]) -Greedy action tensor([ 0.2212, 0.2248, 0.3795, -0.3022]) tensor([0.2654, 0.2664, 0.3109, 0.1573]) -Greedy action tensor([-0.2442, 0.0229, 0.1422, -0.6188]) tensor([0.2239, 0.2925, 0.3296, 0.1540]) -Greedy action tensor([-0.7034, 0.9967, 0.6209, -1.3080]) tensor([0.0928, 0.5078, 0.3487, 0.0507]) -Greedy action tensor([ 0.9793, -1.2104, -0.1055, 0.3190]) tensor([0.5085, 0.0569, 0.1719, 0.2627]) -Greedy action tensor([ 0.0366, -0.5443, 0.2486, -0.5501]) tensor([0.2984, 0.1669, 0.3688, 0.1659]) -Greedy action tensor([ 1.2975, -0.5013, 1.2798, 0.0207]) tensor([0.4121, 0.0682, 0.4048, 0.1149]) -Greedy action tensor([ 0.7742, -0.9355, 1.5400, 0.5037]) tensor([0.2442, 0.0442, 0.5253, 0.1863]) -Greedy action tensor([-0.3415, 0.7697, 0.0112, 0.1866]) tensor([0.1397, 0.4245, 0.1988, 0.2369]) -Greedy action tensor([-0.0417, -0.4804, -0.1080, -0.2569]) tensor([0.2952, 0.1904, 0.2763, 0.2381]) -Greedy action tensor([-0.2730, -0.6044, 1.0491, -1.0017]) tensor([0.1680, 0.1206, 0.6303, 0.0811]) -Greedy action tensor([-1.1227, -0.2545, -0.0134, -0.3303]) tensor([0.1160, 0.2763, 0.3516, 0.2561]) -Greedy action tensor([-0.4603, 0.1530, 0.2954, -1.0162]) tensor([0.1802, 0.3328, 0.3837, 0.1034]) -Greedy action tensor([-0.4526, -1.9477, -0.0851, 0.4258]) tensor([0.1970, 0.0442, 0.2845, 0.4743]) -Greedy action tensor([ 0.4582, -0.4685, 0.7265, -0.2582]) tensor([0.3133, 0.1240, 0.4097, 0.1530]) -Greedy action tensor([-0.3487, 0.4553, 0.2091, -0.1949]) tensor([0.1627, 0.3635, 0.2841, 0.1897]) -Greedy action tensor([ 0.0749, -1.3860, 0.5209, -1.0962]) tensor([0.3222, 0.0747, 0.5032, 0.0999]) -Greedy action tensor([ 0.2700, -0.7318, 0.2821, 0.0606]) tensor([0.3134, 0.1151, 0.3173, 0.2542]) -Greedy action tensor([ 1.1756, -1.2025, 1.1909, 0.7444]) tensor([0.3626, 0.0336, 0.3682, 0.2356]) -Greedy action tensor([-0.8395, -0.5763, 0.0740, -0.0398]) tensor([0.1425, 0.1854, 0.3552, 0.3170]) -Greedy action tensor([ 0.3768, -1.1288, -0.5321, 0.5158]) tensor([0.3605, 0.0800, 0.1453, 0.4142]) -Greedy action tensor([ 0.1623, -0.8710, -0.8737, -0.6266]) tensor([0.4619, 0.1644, 0.1639, 0.2098]) -Greedy action tensor([ 1.0196, 0.7140, -0.5776, 0.0186]) tensor([0.4335, 0.3194, 0.0878, 0.1593]) -Greedy action tensor([-0.6037, -0.2425, -0.1343, -0.4321]) tensor([0.1915, 0.2749, 0.3063, 0.2274]) -Greedy action tensor([ 0.1447, -0.5131, 0.9916, -0.3082]) tensor([0.2229, 0.1155, 0.5199, 0.1417]) -Greedy action tensor([-1.7668, -0.4625, 0.5676, -0.1055]) tensor([0.0493, 0.1818, 0.5092, 0.2597]) -Greedy action tensor([-1.8843, -0.4354, 0.6345, -0.1486]) tensor([0.0428, 0.1824, 0.5318, 0.2430]) -Greedy action tensor([-1.3277, -0.4742, 0.3642, 0.1707]) tensor([0.0755, 0.1772, 0.4097, 0.3376]) -Greedy action tensor([-1.8980, -0.2906, 0.6187, -0.1406]) tensor([0.0414, 0.2064, 0.5124, 0.2398]) -Greedy action tensor([1.1870, 1.3022, 0.1990, 0.9216]) tensor([0.3066, 0.3441, 0.1142, 0.2352]) -Greedy action tensor([-1.8635, -0.4898, 0.6416, -0.1473]) tensor([0.0439, 0.1736, 0.5380, 0.2444]) -Greedy action tensor([-1.6941, -0.3593, 0.5483, -0.0515]) tensor([0.0516, 0.1960, 0.4858, 0.2666]) -Greedy action tensor([-1.9165, -0.4561, 0.6595, -0.1661]) tensor([0.0413, 0.1779, 0.5430, 0.2378]) -Greedy action tensor([-1.4173, -0.2194, 0.4300, 0.2015]) tensor([0.0637, 0.2110, 0.4039, 0.3214]) -Greedy action tensor([-0.5964, -0.2029, 0.7131, 1.1775]) tensor([0.0828, 0.1227, 0.3066, 0.4879]) -Greedy action tensor([-1.7934, -0.3986, 0.6868, 0.0057]) tensor([0.0434, 0.1752, 0.5188, 0.2626]) -Greedy action tensor([-1.7896, -0.4530, 0.5981, -0.0968]) tensor([0.0473, 0.1801, 0.5153, 0.2572]) -Greedy action tensor([-1.8093, -0.3200, 0.5955, -0.0836]) tensor([0.0452, 0.2004, 0.5006, 0.2538]) -Greedy action tensor([-1.9417, -0.4449, 0.6651, -0.1787]) tensor([0.0402, 0.1797, 0.5455, 0.2346]) -Greedy action tensor([-1.3763, 0.2218, 0.3130, 0.0559]) tensor([0.0643, 0.3180, 0.3483, 0.2694]) -Greedy action tensor([-1.9453, -0.4500, 0.6684, -0.1807]) tensor([0.0401, 0.1788, 0.5471, 0.2341]) -Greedy action tensor([-1.6701, 0.1627, 0.4383, -0.0321]) tensor([0.0485, 0.3030, 0.3992, 0.2494]) -Greedy action tensor([-1.9172, -0.4559, 0.6588, -0.1676]) tensor([0.0413, 0.1781, 0.5430, 0.2376]) -Greedy action tensor([-1.3536, 0.1244, 0.3339, -0.0125]) tensor([0.0684, 0.3000, 0.3699, 0.2616]) -Greedy action tensor([-1.9250, -0.4127, 0.6552, -0.1696]) tensor([0.0408, 0.1850, 0.5383, 0.2359]) -Greedy action tensor([-1.9230, -0.4239, 0.6562, -0.1682]) tensor([0.0409, 0.1832, 0.5394, 0.2365]) -Greedy action tensor([-1.7400, -0.4861, 0.5555, -0.0642]) tensor([0.0506, 0.1772, 0.5021, 0.2702]) -Greedy action tensor([-1.9235, -0.4430, 0.6574, -0.1671]) tensor([0.0410, 0.1802, 0.5414, 0.2374]) -Greedy action tensor([-1.6033, -0.3520, 0.5454, 0.0656]) tensor([0.0544, 0.1902, 0.4666, 0.2888]) -Greedy action tensor([-1.2628, 0.7662, 0.2159, 0.1503]) tensor([0.0585, 0.4448, 0.2565, 0.2402]) -Greedy action tensor([-1.8878, -0.4512, 0.6544, -0.1255]) tensor([0.0421, 0.1772, 0.5353, 0.2454]) -Greedy action tensor([-1.7914, -0.3893, 0.5943, -0.0977]) tensor([0.0468, 0.1902, 0.5085, 0.2545]) -Greedy action tensor([-1.7932, -0.2116, 0.5999, -0.0589]) tensor([0.0445, 0.2164, 0.4871, 0.2520]) -Greedy action tensor([-1.7329, -0.4478, 0.5639, -0.0986]) tensor([0.0508, 0.1836, 0.5051, 0.2604]) -Greedy action tensor([-0.6108, -0.4602, 0.3425, 0.6308]) tensor([0.1217, 0.1415, 0.3157, 0.4212]) -Greedy action tensor([-1.6265e+00, -3.6532e-04, 4.4011e-01, -6.3316e-02]) tensor([0.0533, 0.2711, 0.4211, 0.2545]) -Greedy action tensor([-1.4366, -0.4549, 0.5388, 0.3449]) tensor([0.0595, 0.1587, 0.4287, 0.3531]) -Greedy action tensor([-1.6156, -0.4739, 0.4874, 0.0983]) tensor([0.0559, 0.1752, 0.4583, 0.3105]) -Greedy action tensor([-0.5539, 0.0071, 0.8903, 1.1524]) tensor([0.0800, 0.1402, 0.3391, 0.4407]) -Greedy action tensor([-1.7394, -0.0248, 0.4824, -0.0720]) tensor([0.0474, 0.2635, 0.4376, 0.2514]) -Greedy action tensor([-1.8841, -0.4404, 0.6519, -0.1455]) tensor([0.0425, 0.1798, 0.5362, 0.2415]) -Greedy action tensor([-1.9279, -0.4733, 0.7025, -0.1544]) tensor([0.0399, 0.1709, 0.5540, 0.2351]) -Greedy action tensor([-0.6195, -0.1884, 0.6798, 0.4523]) tensor([0.1096, 0.1686, 0.4018, 0.3200]) -Greedy action tensor([-0.8607, -0.4173, 0.8729, 1.1084]) tensor([0.0650, 0.1013, 0.3680, 0.4657]) -Greedy action tensor([-0.7205, -0.8789, 1.0406, 1.4142]) tensor([0.0620, 0.0529, 0.3608, 0.5242]) -Greedy action tensor([-1.9433, -0.4448, 0.6669, -0.1795]) tensor([0.0401, 0.1796, 0.5460, 0.2342]) -Greedy action tensor([-1.8266, -0.4226, 0.6336, -0.0863]) tensor([0.0445, 0.1811, 0.5208, 0.2536]) -Greedy action tensor([-0.5480, -0.5334, 0.4788, 0.5745]) tensor([0.1269, 0.1288, 0.3544, 0.3899]) -Greedy action tensor([-1.0480, -0.1726, 0.1516, 0.4051]) tensor([0.0909, 0.2183, 0.3019, 0.3889]) -Greedy action tensor([-1.8802, -0.4501, 0.6391, -0.1452]) tensor([0.0430, 0.1796, 0.5338, 0.2436]) -Greedy action tensor([-1.7686, 0.0880, 0.4975, -0.0733]) tensor([0.0445, 0.2846, 0.4287, 0.2422]) -Greedy action tensor([-1.9275, -0.4406, 0.6591, -0.1711]) tensor([0.0408, 0.1805, 0.5422, 0.2364]) -Greedy action tensor([-1.8621, -0.4166, 0.6619, -0.1223]) tensor([0.0427, 0.1812, 0.5328, 0.2432]) -Greedy action tensor([-1.9440, -0.4493, 0.6673, -0.1804]) tensor([0.0401, 0.1790, 0.5467, 0.2342]) -Greedy action tensor([-1.8919, -0.4244, 0.6359, -0.1606]) tensor([0.0425, 0.1845, 0.5327, 0.2402]) -Greedy action tensor([-1.9402, -0.4549, 0.6685, -0.1801]) tensor([0.0403, 0.1780, 0.5474, 0.2343]) -Greedy action tensor([-1.9105, -0.4402, 0.6530, -0.1612]) tensor([0.0415, 0.1807, 0.5390, 0.2388]) -Greedy action tensor([-1.9222, -0.4316, 0.6532, -0.1687]) tensor([0.0411, 0.1823, 0.5395, 0.2371]) -Greedy action tensor([-1.2831, -0.6083, 0.3257, 0.3281]) tensor([0.0771, 0.1514, 0.3853, 0.3862]) -Greedy action tensor([-1.8810, -0.4473, 0.6209, -0.1478]) tensor([0.0434, 0.1819, 0.5293, 0.2454]) -Greedy action tensor([-0.8543, -0.2674, 0.2542, -0.0792]) tensor([0.1250, 0.2248, 0.3788, 0.2714]) -Greedy action tensor([-1.9081, -0.3504, 0.6331, -0.1534]) tensor([0.0413, 0.1960, 0.5241, 0.2387]) -Greedy action tensor([-1.9201, -0.4385, 0.6537, -0.1685]) tensor([0.0412, 0.1812, 0.5402, 0.2374]) -Greedy action tensor([-1.8390, -0.3499, 0.6053, -0.1365]) tensor([0.0446, 0.1975, 0.5134, 0.2445]) -Greedy action tensor([-1.7607, -0.4331, 0.6374, -0.0461]) tensor([0.0469, 0.1769, 0.5158, 0.2604]) -Greedy action tensor([-1.8935, -0.3363, 0.6309, -0.1521]) tensor([0.0418, 0.1983, 0.5216, 0.2384]) -Greedy action tensor([-1.5514, -0.0905, 0.4422, -0.0319]) tensor([0.0581, 0.2503, 0.4263, 0.2654]) -Greedy action tensor([-1.9301, -0.4293, 0.6645, -0.1616]) tensor([0.0404, 0.1813, 0.5413, 0.2370]) -Greedy action tensor([-1.9458, -0.4507, 0.6679, -0.1813]) tensor([0.0401, 0.1788, 0.5471, 0.2340]) -Greedy action tensor([-1.9291, -0.4110, 0.6555, -0.1664]) tensor([0.0406, 0.1851, 0.5379, 0.2364]) -Greedy action tensor([-1.9427, -0.4450, 0.6664, -0.1795]) tensor([0.0402, 0.1796, 0.5459, 0.2343]) -Greedy action tensor([-1.8112, -0.4713, 0.6181, -0.0752]) tensor([0.0458, 0.1748, 0.5196, 0.2598]) -Greedy action tensor([-1.8087, -0.2455, 0.5652, -0.0884]) tensor([0.0453, 0.2160, 0.4859, 0.2528]) -Greedy action tensor([-1.9369, -0.4394, 0.6644, -0.1763]) tensor([0.0404, 0.1805, 0.5443, 0.2348]) -Greedy action tensor([-1.9069, -0.4263, 0.6647, -0.1482]) tensor([0.0412, 0.1810, 0.5388, 0.2390]) -Greedy action tensor([-1.9237, -0.3810, 0.6473, -0.1641]) tensor([0.0407, 0.1904, 0.5324, 0.2365]) -Greedy action tensor([-1.8986, -0.4620, 0.6626, -0.1506]) tensor([0.0418, 0.1760, 0.5419, 0.2403]) -Greedy action tensor([-1.8660, -0.4630, 0.6381, -0.1346]) tensor([0.0436, 0.1772, 0.5331, 0.2461]) -Greedy action tensor([-1.6274, 0.1379, -0.3572, -0.4313]) tensor([0.0729, 0.4261, 0.2597, 0.2412]) -Greedy action tensor([-0.8033, 0.8482, -0.0298, 0.5724]) tensor([0.0810, 0.4226, 0.1756, 0.3207]) -Greedy action tensor([-1.9185, -0.4234, 0.6495, -0.1606]) tensor([0.0412, 0.1835, 0.5366, 0.2387]) -Greedy action tensor([-1.9032, -0.2731, 0.6249, -0.1606]) tensor([0.0411, 0.2097, 0.5146, 0.2346]) -Greedy action tensor([-1.7428, -0.2053, 0.5536, -0.2026]) tensor([0.0494, 0.2297, 0.4906, 0.2303]) -Greedy action tensor([-1.8762, -0.4502, 0.6350, -0.1493]) tensor([0.0433, 0.1801, 0.5332, 0.2434]) -Greedy action tensor([-1.8859, -0.4589, 0.6429, -0.1544]) tensor([0.0428, 0.1784, 0.5369, 0.2419]) -Greedy action tensor([-1.9202, -0.4051, 0.6544, -0.1667]) tensor([0.0409, 0.1861, 0.5368, 0.2362]) -Greedy action tensor([ 1.6328, -0.4276, -0.6073, 0.2724]) tensor([0.6710, 0.0855, 0.0714, 0.1721]) -Greedy action tensor([ 1.4432, -0.3466, -0.7847, 0.3140]) tensor([0.6258, 0.1045, 0.0674, 0.2023]) -Greedy action tensor([ 1.1970, -0.3176, -0.4536, 0.1460]) tensor([0.5677, 0.1248, 0.1090, 0.1985]) -Greedy action tensor([ 1.3625, -0.5989, 0.0751, -0.0789]) tensor([0.6049, 0.0851, 0.1669, 0.1431]) -Greedy action tensor([ 1.7575, -0.5054, -0.7760, 0.3987]) tensor([0.6943, 0.0722, 0.0551, 0.1784]) -Greedy action tensor([ 1.2910, -0.3939, -0.3241, 0.2117]) tensor([0.5800, 0.1076, 0.1153, 0.1971]) -Greedy action tensor([ 0.8623, -0.4569, -0.3699, 0.4240]) tensor([0.4537, 0.1213, 0.1323, 0.2927]) -Greedy action tensor([ 1.3449, 0.0175, -0.6539, 0.0522]) tensor([0.5969, 0.1583, 0.0809, 0.1639]) -Greedy action tensor([ 2.1239, -0.9402, -0.1241, 0.7262]) tensor([0.7146, 0.0334, 0.0755, 0.1766]) -Greedy action tensor([ 2.4790, -1.0819, -0.5248, 1.0600]) tensor([0.7576, 0.0215, 0.0376, 0.1833]) -Greedy action tensor([ 1.5841, -0.1759, -1.0903, 0.6763]) tensor([0.6081, 0.1046, 0.0419, 0.2453]) -Greedy action tensor([ 1.2940, -0.1938, -0.4508, 0.1289]) tensor([0.5840, 0.1319, 0.1020, 0.1821]) -Greedy action tensor([ 0.8013, -0.4489, -0.1673, 0.1954]) tensor([0.4522, 0.1295, 0.1716, 0.2467]) -Greedy action tensor([ 1.5894, -0.4689, -0.0563, 0.7098]) tensor([0.5762, 0.0736, 0.1111, 0.2391]) -Greedy action tensor([ 2.0814, 0.4955, -0.1334, 0.2842]) tensor([0.6758, 0.1384, 0.0738, 0.1120]) -Greedy action tensor([ 1.5650, -1.1746, -0.5148, -0.2949]) tensor([0.7434, 0.0480, 0.0929, 0.1157]) -Greedy action tensor([ 1.8129, -0.5947, 0.1112, 0.5199]) tensor([0.6465, 0.0582, 0.1179, 0.1774]) -Greedy action tensor([ 0.9738, -0.1258, -0.4799, -0.0758]) tensor([0.5217, 0.1737, 0.1219, 0.1826]) -Greedy action tensor([ 1.8434, -1.0300, -0.2781, 0.8376]) tensor([0.6485, 0.0366, 0.0777, 0.2372]) -Greedy action tensor([ 1.2841, -0.4714, -0.3778, 0.1176]) tensor([0.5973, 0.1032, 0.1134, 0.1860]) -Greedy action tensor([ 1.3257, -0.1891, -0.9462, 0.3933]) tensor([0.5826, 0.1281, 0.0601, 0.2293]) -Greedy action tensor([ 1.1345, -0.2777, -0.5162, -0.0100]) tensor([0.5702, 0.1389, 0.1094, 0.1815]) -Greedy action tensor([ 1.4928, -0.5590, 0.0918, 0.4792]) tensor([0.5754, 0.0740, 0.1418, 0.2088]) -Greedy action tensor([ 1.2441, -0.7192, -0.1524, 0.5232]) tensor([0.5336, 0.0749, 0.1320, 0.2595]) -Greedy action tensor([ 1.7248, -0.9947, -0.0570, 0.3559]) tensor([0.6718, 0.0443, 0.1131, 0.1709]) -Greedy action tensor([ 2.0398, -0.3387, -0.0535, 0.3876]) tensor([0.7104, 0.0658, 0.0876, 0.1361]) -Greedy action tensor([ 1.4066, -0.0914, -0.0323, 0.4102]) tensor([0.5465, 0.1222, 0.1296, 0.2018]) -Greedy action tensor([ 1.6532, -1.0983, 0.1204, 0.2454]) tensor([0.6560, 0.0419, 0.1417, 0.1605]) -Greedy action tensor([ 1.3786, -0.4939, -0.2162, 0.0099]) tensor([0.6207, 0.0954, 0.1260, 0.1579]) -Greedy action tensor([ 1.5133, 0.2337, -0.6742, 0.5047]) tensor([0.5698, 0.1585, 0.0639, 0.2078]) -Greedy action tensor([ 1.1843, -0.8580, -0.3383, 0.0228]) tensor([0.6021, 0.0781, 0.1313, 0.1885]) -Greedy action tensor([ 1.0817, -0.4857, -0.2375, 0.1892]) tensor([0.5304, 0.1106, 0.1418, 0.2172]) -Greedy action tensor([ 1.7922, -0.7220, 0.1251, -0.0064]) tensor([0.6967, 0.0564, 0.1315, 0.1153]) -Greedy action tensor([ 1.1140, -0.0533, -0.1761, 0.0865]) tensor([0.5143, 0.1601, 0.1416, 0.1841]) -Greedy action tensor([ 1.5300, -0.5475, -0.3187, 0.4788]) tensor([0.6127, 0.0767, 0.0965, 0.2141]) -Greedy action tensor([ 1.8498, -0.6617, -0.5896, 0.1189]) tensor([0.7432, 0.0603, 0.0648, 0.1316]) -Greedy action tensor([ 1.7848, -0.3117, -0.4754, 0.6793]) tensor([0.6417, 0.0789, 0.0670, 0.2124]) -Greedy action tensor([ 1.0821, -0.7799, -0.2154, 0.0196]) tensor([0.5636, 0.0876, 0.1540, 0.1948]) -Greedy action tensor([ 0.9043, -0.4619, -0.6915, 0.3206]) tensor([0.4961, 0.1265, 0.1006, 0.2767]) -Greedy action tensor([ 1.3599, -0.8036, 0.2510, -0.1469]) tensor([0.6001, 0.0690, 0.1980, 0.1330]) -Greedy action tensor([ 1.1186, 0.0303, -0.5533, 0.0373]) tensor([0.5365, 0.1807, 0.1008, 0.1820]) -Greedy action tensor([ 0.9113, -0.2825, -0.7522, 0.5484]) tensor([0.4570, 0.1385, 0.0866, 0.3179]) -Greedy action tensor([ 0.9473, -0.1260, -0.2256, 0.3628]) tensor([0.4527, 0.1548, 0.1401, 0.2524]) -Greedy action tensor([ 1.3917, -0.1821, -0.1499, -0.2072]) tensor([0.6160, 0.1277, 0.1318, 0.1245]) -Greedy action tensor([ 1.9403, -0.3997, -0.6104, 0.4850]) tensor([0.7104, 0.0684, 0.0554, 0.1658]) -Greedy action tensor([ 1.4595, -0.5795, 0.1713, 0.9217]) tensor([0.5025, 0.0654, 0.1386, 0.2935]) -Greedy action tensor([ 1.4036, -0.7552, -0.2895, 0.2324]) tensor([0.6214, 0.0717, 0.1143, 0.1926]) -Greedy action tensor([ 1.8589, -0.6737, -0.3681, 0.6335]) tensor([0.6753, 0.0536, 0.0728, 0.1983]) -Greedy action tensor([ 1.2864, -1.1621, -0.2567, 0.5831]) tensor([0.5571, 0.0481, 0.1191, 0.2757]) -Greedy action tensor([ 1.6206, -0.6919, -0.1073, 0.3345]) tensor([0.6439, 0.0638, 0.1144, 0.1779]) -Greedy action tensor([ 1.6457, 0.0074, -0.7898, 0.4047]) tensor([0.6366, 0.1237, 0.0557, 0.1840]) -Greedy action tensor([ 1.0801, -0.3579, -0.2041, 0.1621]) tensor([0.5226, 0.1241, 0.1447, 0.2087]) -Greedy action tensor([ 1.2480, -0.6597, 0.2520, -0.0205]) tensor([0.5558, 0.0825, 0.2053, 0.1563]) -Greedy action tensor([ 1.8096, -0.5605, -0.6545, 0.4465]) tensor([0.6972, 0.0652, 0.0593, 0.1784]) -Greedy action tensor([ 1.3888, -0.4996, -0.4051, 0.2563]) tensor([0.6098, 0.0923, 0.1014, 0.1965]) -Greedy action tensor([ 1.5276, -0.5287, -0.7132, -0.0180]) tensor([0.6908, 0.0884, 0.0735, 0.1473]) -Greedy action tensor([ 0.8630, -0.1764, -0.4210, 0.0926]) tensor([0.4777, 0.1689, 0.1323, 0.2211]) -Greedy action tensor([ 1.4917, -0.9920, -0.1493, 0.0081]) tensor([0.6649, 0.0555, 0.1288, 0.1508]) -Greedy action tensor([ 1.4042, -0.3526, -0.4525, 0.4957]) tensor([0.5774, 0.0997, 0.0902, 0.2328]) -Greedy action tensor([ 1.6498, -0.5015, -0.6625, 0.3898]) tensor([0.6671, 0.0776, 0.0661, 0.1892]) -Greedy action tensor([ 1.5781, -0.6270, -0.3866, 0.4090]) tensor([0.6406, 0.0706, 0.0898, 0.1990]) -Greedy action tensor([ 1.4917, -0.6650, -0.6787, -0.0782]) tensor([0.6955, 0.0805, 0.0794, 0.1447]) -Greedy action tensor([ 1.4749, -0.5871, -0.3877, 0.5266]) tensor([0.5989, 0.0762, 0.0930, 0.2320]) -Greedy action tensor([ 1.5100, -0.3996, -0.5049, 0.4398]) tensor([0.6156, 0.0912, 0.0821, 0.2111]) -Greedy action tensor([ 1.0280, -0.4659, -0.0365, -0.5717]) tensor([0.5645, 0.1267, 0.1947, 0.1140]) -Greedy action tensor([ 1.7204, -0.0852, -0.4955, 0.5785]) tensor([0.6279, 0.1032, 0.0685, 0.2004]) -Greedy action tensor([ 1.6913, -0.7237, -0.3932, 0.7670]) tensor([0.6209, 0.0555, 0.0772, 0.2464]) -Greedy action tensor([ 1.4693, -0.8284, 0.0647, 0.0349]) tensor([0.6312, 0.0634, 0.1549, 0.1504]) -Greedy action tensor([ 1.2877, -0.1461, -0.5658, 0.6272]) tensor([0.5231, 0.1247, 0.0820, 0.2702]) -Greedy action tensor([ 2.9896, -1.2464, -0.0101, 1.2810]) tensor([0.8030, 0.0116, 0.0400, 0.1454]) -Greedy action tensor([ 1.0578, -0.1098, -0.6137, -0.0579]) tensor([0.5474, 0.1703, 0.1029, 0.1794]) -Greedy action tensor([ 1.6386, -0.9123, -0.5589, 1.0996]) tensor([0.5642, 0.0440, 0.0627, 0.3291]) -Greedy action tensor([2.5489, 0.6480, 0.0572, 0.1384]) tensor([0.7564, 0.1130, 0.0626, 0.0679]) -Greedy action tensor([ 1.6278, -0.8621, -0.1777, 0.7805]) tensor([0.5967, 0.0495, 0.0981, 0.2557]) -Greedy action tensor([ 1.9521, -1.2976, 0.1983, 0.3218]) tensor([0.7103, 0.0276, 0.1230, 0.1391]) -Greedy action tensor([ 2.4449, 0.2547, -0.0678, -0.0976]) tensor([0.7864, 0.0880, 0.0637, 0.0619]) -Greedy action tensor([ 1.6990, -0.7781, -0.5611, 0.7265]) tensor([0.6384, 0.0536, 0.0666, 0.2414]) -Greedy action tensor([ 1.5202, -0.4425, -0.8398, 0.2894]) tensor([0.6549, 0.0920, 0.0618, 0.1913]) -Greedy action tensor([ 0.9224, 0.3347, -1.0674, 0.2558]) tensor([0.4534, 0.2519, 0.0620, 0.2328]) -Greedy action tensor([ 1.8420, -1.0144, -0.2614, 0.2836]) tensor([0.7194, 0.0413, 0.0878, 0.1514]) -Greedy action tensor([ 1.3509, 0.0641, -0.7444, 0.2861]) tensor([0.5734, 0.1583, 0.0705, 0.1977]) -Greedy action tensor([ 0.6992, -0.5105, -0.0718, -0.1820]) tensor([0.4597, 0.1371, 0.2127, 0.1905]) -Greedy action tensor([ 0.8887, -0.7146, -0.0436, -0.4351]) tensor([0.5374, 0.1081, 0.2115, 0.1430]) -Greedy action tensor([ 0.7100, -0.1426, 0.0460, -0.1487]) tensor([0.4229, 0.1803, 0.2177, 0.1792]) -Greedy action tensor([ 0.8254, -0.5494, 0.0078, -0.1713]) tensor([0.4846, 0.1226, 0.2139, 0.1789]) -Greedy action tensor([ 0.6191, -0.5940, -0.0501, -0.1889]) tensor([0.4434, 0.1318, 0.2271, 0.1977]) -Greedy action tensor([ 0.7661, -0.3167, 0.0248, -0.2214]) tensor([0.4571, 0.1548, 0.2178, 0.1703]) -Greedy action tensor([ 0.9823, -0.8473, 0.0302, -0.4871]) tensor([0.5629, 0.0903, 0.2172, 0.1295]) -Greedy action tensor([ 0.4636, -0.2497, -0.1929, -0.2982]) tensor([0.4040, 0.1979, 0.2095, 0.1886]) -Greedy action tensor([ 0.5307, -0.1571, -0.0246, -0.0950]) tensor([0.3829, 0.1925, 0.2198, 0.2048]) -Greedy action tensor([ 0.2544, 0.1959, -0.0358, -0.2622]) tensor([0.3042, 0.2869, 0.2275, 0.1814]) -Greedy action tensor([ 0.5276, -0.0435, -0.2479, 0.0392]) tensor([0.3789, 0.2141, 0.1745, 0.2325]) -Greedy action tensor([ 0.4396, 0.0676, -0.0849, 0.1162]) tensor([0.3328, 0.2294, 0.1970, 0.2408]) -Greedy action tensor([ 0.6455, -0.1068, -0.0526, -0.6611]) tensor([0.4465, 0.2104, 0.2222, 0.1209]) -Greedy action tensor([ 0.6591, 0.4050, -0.1106, -0.3278]) tensor([0.3829, 0.2970, 0.1774, 0.1427]) -Greedy action tensor([ 0.6567, -0.3650, -0.0191, -0.1945]) tensor([0.4356, 0.1568, 0.2216, 0.1860]) -Greedy action tensor([ 0.8634, -0.7865, -0.0618, -0.6296]) tensor([0.5515, 0.1059, 0.2186, 0.1239]) -Greedy action tensor([ 1.0972, -0.6830, 0.0246, -0.4041]) tensor([0.5769, 0.0973, 0.1973, 0.1285]) -Greedy action tensor([ 0.7771, -0.2996, -0.0198, -0.2019]) tensor([0.4615, 0.1572, 0.2080, 0.1734]) -Greedy action tensor([ 0.9015, -0.3782, 0.0661, -0.1184]) tensor([0.4825, 0.1342, 0.2093, 0.1740]) -Greedy action tensor([ 1.0333, -0.8342, -0.0177, -0.3529]) tensor([0.5701, 0.0881, 0.1993, 0.1425]) -Greedy action tensor([ 0.9583, -0.3965, -0.0603, -0.3893]) tensor([0.5322, 0.1373, 0.1922, 0.1383]) -Greedy action tensor([ 0.8716, -0.2980, -0.0227, -0.5133]) tensor([0.5077, 0.1576, 0.2076, 0.1271]) -Greedy action tensor([ 0.8012, -0.3303, -0.1294, -0.0096]) tensor([0.4627, 0.1492, 0.1824, 0.2057]) -Greedy action tensor([ 0.6093, -0.3753, -0.0345, -0.1827]) tensor([0.4252, 0.1588, 0.2234, 0.1926]) -Greedy action tensor([ 1.3393, -0.8787, -0.0168, -0.7446]) tensor([0.6707, 0.0730, 0.1728, 0.0835]) -Greedy action tensor([ 0.9998, -0.4815, -0.1822, -0.2744]) tensor([0.5514, 0.1253, 0.1691, 0.1542]) -Greedy action tensor([ 0.3596, 0.4723, -0.1978, 0.1292]) tensor([0.2869, 0.3211, 0.1643, 0.2278]) -Greedy action tensor([ 0.5115, -0.1473, -0.0982, -0.1137]) tensor([0.3852, 0.1993, 0.2094, 0.2061]) -Greedy action tensor([ 0.7251, -0.3141, -0.0616, -0.1906]) tensor([0.4526, 0.1601, 0.2061, 0.1812]) -Greedy action tensor([ 0.6327, -0.5103, -0.1423, -0.1618]) tensor([0.4482, 0.1429, 0.2065, 0.2025]) -Greedy action tensor([ 0.6711, -0.4776, -0.0632, -0.1120]) tensor([0.4437, 0.1407, 0.2129, 0.2028]) -Greedy action tensor([ 0.5835, -0.3158, -0.1083, -0.2261]) tensor([0.4251, 0.1729, 0.2128, 0.1892]) -Greedy action tensor([ 0.5815, -0.1679, 0.0711, -0.0507]) tensor([0.3840, 0.1815, 0.2305, 0.2041]) -Greedy action tensor([ 0.6104, -0.8010, -0.0990, -0.3299]) tensor([0.4703, 0.1147, 0.2314, 0.1837]) -Greedy action tensor([ 1.4386, -0.5870, -0.0934, -0.4218]) tensor([0.6651, 0.0877, 0.1437, 0.1035]) -Greedy action tensor([ 0.7308, -0.2126, -0.1055, -0.4918]) tensor([0.4724, 0.1839, 0.2047, 0.1391]) -Greedy action tensor([ 0.7164, -0.6082, -0.0686, -0.3737]) tensor([0.4859, 0.1292, 0.2216, 0.1633]) -Greedy action tensor([ 0.3546, 0.3024, -0.2557, 0.0976]) tensor([0.3062, 0.2906, 0.1663, 0.2368]) -Greedy action tensor([ 0.5438, -0.3214, -0.0391, -0.1460]) tensor([0.4031, 0.1697, 0.2250, 0.2022]) -Greedy action tensor([ 0.8749, -0.6132, 0.0584, -0.4089]) tensor([0.5142, 0.1161, 0.2273, 0.1424]) -Greedy action tensor([ 0.4377, 0.0372, -0.2253, -0.0763]) tensor([0.3593, 0.2407, 0.1851, 0.2149]) -Greedy action tensor([ 0.9341, -0.7890, 0.0217, -0.7028]) tensor([0.5635, 0.1006, 0.2263, 0.1096]) -Greedy action tensor([ 0.5369, -0.3592, -0.0774, -0.0946]) tensor([0.4031, 0.1645, 0.2181, 0.2143]) -Greedy action tensor([ 0.9817, -0.6307, 0.0116, -0.4063]) tensor([0.5471, 0.1091, 0.2073, 0.1365]) -Greedy action tensor([ 1.1690, -0.6669, -0.0830, -0.3548]) tensor([0.6012, 0.0959, 0.1719, 0.1310]) -Greedy action tensor([ 0.7441, -0.4052, 0.0696, -0.2373]) tensor([0.4543, 0.1440, 0.2314, 0.1703]) -Greedy action tensor([ 1.1181, -0.5104, -0.0942, -0.2122]) tensor([0.5688, 0.1116, 0.1692, 0.1504]) -Greedy action tensor([ 0.7492, -0.0987, 0.0493, -0.0422]) tensor([0.4205, 0.1801, 0.2088, 0.1906]) -Greedy action tensor([ 0.6255, -0.5608, -0.0773, -0.1943]) tensor([0.4462, 0.1362, 0.2210, 0.1966]) -Greedy action tensor([ 0.9972, -0.6086, 0.2076, -0.5526]) tensor([0.5356, 0.1075, 0.2432, 0.1137]) -Greedy action tensor([ 0.5035, -0.3681, -0.2094, -0.1277]) tensor([0.4098, 0.1714, 0.2009, 0.2180]) -Greedy action tensor([ 0.9769, -0.9550, 0.1050, -0.6186]) tensor([0.5663, 0.0820, 0.2368, 0.1149]) -Greedy action tensor([ 1.0026, -0.7429, 0.0559, -0.5833]) tensor([0.5658, 0.0988, 0.2196, 0.1159]) -Greedy action tensor([ 1.1554, -0.4076, -0.1956, -0.5737]) tensor([0.6076, 0.1273, 0.1573, 0.1078]) -Greedy action tensor([ 0.3119, -0.1579, -0.0038, -0.0993]) tensor([0.3314, 0.2072, 0.2417, 0.2197]) -Greedy action tensor([ 0.8370, -0.8612, -0.0115, -0.3504]) tensor([0.5219, 0.0955, 0.2234, 0.1592]) -Greedy action tensor([ 0.8913, -0.4747, -0.0851, -0.3170]) tensor([0.5180, 0.1322, 0.1951, 0.1547]) -Greedy action tensor([ 0.6511, -0.3018, -0.1488, -0.2580]) tensor([0.4468, 0.1723, 0.2008, 0.1800]) -Greedy action tensor([ 0.7974, -0.4680, -0.0445, -0.3883]) tensor([0.4954, 0.1398, 0.2135, 0.1514]) -Greedy action tensor([ 0.8721, -0.4043, 0.0467, -0.2829]) tensor([0.4921, 0.1373, 0.2156, 0.1550]) -Greedy action tensor([ 0.5902, -0.6765, -0.0060, -0.1620]) tensor([0.4340, 0.1223, 0.2391, 0.2046]) -Greedy action tensor([ 1.0517, -0.6503, -0.0167, -0.4912]) tensor([0.5748, 0.1048, 0.1975, 0.1229]) -Greedy action tensor([ 0.9491, -0.3800, -0.0939, -0.4314]) tensor([0.5352, 0.1417, 0.1886, 0.1346]) -Greedy action tensor([ 0.9632, -0.6236, -0.0267, -0.3290]) tensor([0.5403, 0.1105, 0.2008, 0.1484]) -Greedy action tensor([ 0.6519, -0.1327, -0.0412, -0.2325]) tensor([0.4221, 0.1926, 0.2110, 0.1743]) -Greedy action tensor([ 0.7274, -0.4281, -0.0864, -0.4345]) tensor([0.4829, 0.1521, 0.2140, 0.1511]) -Greedy action tensor([ 0.9192, -0.5456, 0.0314, -0.5562]) tensor([0.5344, 0.1235, 0.2199, 0.1222]) -Greedy action tensor([ 0.5313, -0.5574, -0.1323, -0.1594]) tensor([0.4250, 0.1431, 0.2189, 0.2130]) -Greedy action tensor([ 0.6749, -0.2136, -0.1038, -0.0963]) tensor([0.4287, 0.1763, 0.1968, 0.1983]) -Greedy action tensor([0.7428, 0.0206, 0.0180, 0.0439]) tensor([0.4053, 0.1969, 0.1963, 0.2015]) -Greedy action tensor([ 0.8054, -0.6422, -0.0096, -0.6306]) tensor([0.5220, 0.1227, 0.2311, 0.1242]) -Greedy action tensor([ 0.9732, -0.9344, 0.0995, -0.4643]) tensor([0.5545, 0.0823, 0.2315, 0.1317]) -Greedy action tensor([ 0.9873, -0.7792, 0.1133, -0.4333]) tensor([0.5465, 0.0934, 0.2280, 0.1320]) -Greedy action tensor([ 0.7750, -0.2310, -0.0828, -0.0772]) tensor([0.4512, 0.1650, 0.1914, 0.1924]) -Greedy action tensor([ 0.6582, -0.1674, -0.0579, -0.0396]) tensor([0.4125, 0.1807, 0.2016, 0.2053]) -Greedy action tensor([ 0.2890, -0.0775, -0.1058, -0.2011]) tensor([0.3356, 0.2326, 0.2261, 0.2056]) -Greedy action tensor([ 0.8619, -0.3516, 0.1254, -0.2499]) tensor([0.4751, 0.1412, 0.2275, 0.1563]) -Greedy action tensor([ 0.8612, -0.3433, -0.0494, -0.3864]) tensor([0.5027, 0.1507, 0.2022, 0.1444]) -Greedy action tensor([ 0.6125, -0.3627, -0.1689, -0.4063]) tensor([0.4554, 0.1717, 0.2085, 0.1644]) -Greedy action tensor([ 0.5526, -0.1491, -0.1233, 0.0246]) tensor([0.3855, 0.1911, 0.1961, 0.2274]) -Greedy action tensor([ 1.3689, -0.7489, -0.0606, -0.5281]) tensor([0.6624, 0.0797, 0.1586, 0.0994]) -Greedy action tensor([ 0.8232, -1.0648, -0.3674, 0.0300]) tensor([0.5242, 0.0793, 0.1594, 0.2371]) -Greedy action tensor([-0.4781, -1.2536, 0.4464, -1.1970]) tensor([0.2238, 0.1031, 0.5641, 0.1091]) -Greedy action tensor([ 0.5976, -1.2729, 0.6115, -0.2767]) tensor([0.3868, 0.0596, 0.3922, 0.1614]) -Greedy action tensor([-0.4507, -1.8876, 1.1959, -0.6467]) tensor([0.1380, 0.0328, 0.7159, 0.1134]) -Greedy action tensor([-1.6923, -0.5365, 0.2254, -0.1430]) tensor([0.0637, 0.2025, 0.4337, 0.3001]) -Greedy action tensor([-0.1957, -0.1872, 0.2498, -0.5212]) tensor([0.2330, 0.2350, 0.3638, 0.1683]) -Greedy action tensor([ 0.6630, -0.9490, -0.6030, 0.4548]) tensor([0.4360, 0.0870, 0.1229, 0.3541]) -Greedy action tensor([ 1.6966, 0.9148, 0.2047, -0.2493]) tensor([0.5478, 0.2507, 0.1232, 0.0783]) -Greedy action tensor([ 0.4987, 0.1876, 0.0035, -0.8250]) tensor([0.3834, 0.2809, 0.2337, 0.1020]) -Greedy action tensor([ 0.1439, -0.5983, 0.0157, -0.4023]) tensor([0.3407, 0.1622, 0.2997, 0.1973]) -Greedy action tensor([ 0.2409, -0.6188, 0.2326, -1.0945]) tensor([0.3734, 0.1581, 0.3703, 0.0982]) -Greedy action tensor([-0.2148, -0.6062, -0.4162, 0.4957]) tensor([0.2208, 0.1493, 0.1805, 0.4494]) -Greedy action tensor([-0.5693, -0.3293, -0.1712, -0.2734]) tensor([0.1959, 0.2490, 0.2917, 0.2634]) -Greedy action tensor([ 0.5365, -0.3332, 0.2018, -0.8642]) tensor([0.4200, 0.1760, 0.3005, 0.1035]) -Greedy action tensor([ 0.0762, -0.7532, -0.3307, -0.1394]) tensor([0.3439, 0.1500, 0.2289, 0.2772]) -Greedy action tensor([ 0.6497, -0.4446, -0.7993, -0.2350]) tensor([0.5044, 0.1689, 0.1184, 0.2083]) -Greedy action tensor([ 0.1546, -0.1099, 0.4288, -0.0660]) tensor([0.2574, 0.1976, 0.3386, 0.2065]) -Greedy action tensor([ 0.0975, 0.4361, -0.2131, -0.5867]) tensor([0.2747, 0.3854, 0.2013, 0.1386]) -Greedy action tensor([ 0.3095, -1.2378, -0.4687, 0.4728]) tensor([0.3510, 0.0747, 0.1612, 0.4132]) -Greedy action tensor([-0.0260, 0.3574, 0.2988, -0.5381]) tensor([0.2247, 0.3297, 0.3109, 0.1346]) -Greedy action tensor([ 0.1534, 0.1999, -0.3385, -0.1766]) tensor([0.2960, 0.3101, 0.1810, 0.2128]) -Greedy action tensor([ 0.4758, -0.0953, 0.8582, 0.5411]) tensor([0.2440, 0.1378, 0.3577, 0.2605]) -Greedy action tensor([ 0.2170, -0.5812, 0.1310, 0.0441]) tensor([0.3116, 0.1403, 0.2860, 0.2621]) -Greedy action tensor([-1.2107, -1.0191, 0.1153, -1.0594]) tensor([0.1400, 0.1696, 0.5274, 0.1629]) -Greedy action tensor([-0.3839, -0.8456, 0.3588, -0.1752]) tensor([0.2015, 0.1270, 0.4234, 0.2482]) -Greedy action tensor([ 0.6278, -1.1275, 0.6803, -0.1238]) tensor([0.3706, 0.0641, 0.3906, 0.1748]) -Greedy action tensor([-0.1648, -0.0204, 0.3279, -0.0767]) tensor([0.2047, 0.2366, 0.3351, 0.2236]) -Greedy action tensor([ 0.9551, -0.0074, 0.3722, -0.1757]) tensor([0.4419, 0.1688, 0.2467, 0.1426]) -Greedy action tensor([-0.5878, -1.7255, -0.4467, 0.7390]) tensor([0.1602, 0.0514, 0.1845, 0.6039]) -Greedy action tensor([-0.2368, -0.7876, 0.6985, -0.5378]) tensor([0.2056, 0.1185, 0.5238, 0.1521]) -Greedy action tensor([ 0.2576, 0.2870, 0.0941, -0.0707]) tensor([0.2778, 0.2861, 0.2359, 0.2001]) -Greedy action tensor([ 0.9155, -0.7379, -0.1626, 0.4439]) tensor([0.4639, 0.0888, 0.1578, 0.2895]) -Greedy action tensor([ 0.8431, -0.1776, 0.2243, 0.4056]) tensor([0.3930, 0.1416, 0.2117, 0.2537]) -Greedy action tensor([-1.0745, -0.2785, -0.6256, -0.1809]) tensor([0.1384, 0.3067, 0.2168, 0.3382]) -Greedy action tensor([-0.4008, -0.0551, 0.4264, -0.7021]) tensor([0.1838, 0.2598, 0.4204, 0.1360]) -Greedy action tensor([ 0.6256, -0.4588, -0.4292, 0.4901]) tensor([0.3907, 0.1321, 0.1361, 0.3412]) -Greedy action tensor([-0.6113, 0.8349, -0.2397, -0.8052]) tensor([0.1330, 0.5647, 0.1928, 0.1095]) -Greedy action tensor([-0.1904, -0.0866, 0.2875, -0.0433]) tensor([0.2049, 0.2273, 0.3304, 0.2374]) -Greedy action tensor([-0.5388, -1.3904, -0.6819, -0.3079]) tensor([0.2814, 0.1201, 0.2439, 0.3545]) -Greedy action tensor([ 0.3985, -1.1693, 1.2405, -0.9294]) tensor([0.2635, 0.0549, 0.6117, 0.0698]) -Greedy action tensor([ 0.0886, 0.8289, -0.5881, 0.1040]) tensor([0.2164, 0.4538, 0.1100, 0.2198]) -Greedy action tensor([0.2475, 0.0517, 0.2352, 0.1078]) tensor([0.2718, 0.2235, 0.2684, 0.2363]) -Greedy action tensor([-0.9577, -0.6243, -0.2201, 0.6184]) tensor([0.1073, 0.1497, 0.2243, 0.5187]) -Greedy action tensor([-0.4226, -0.5713, 1.1001, -0.8261]) tensor([0.1406, 0.1211, 0.6444, 0.0939]) -Greedy action tensor([ 0.3670, -0.5895, 0.3819, 1.0926]) tensor([0.2239, 0.0861, 0.2273, 0.4627]) -Greedy action tensor([-0.0597, -0.6774, 0.4330, -0.0567]) tensor([0.2393, 0.1290, 0.3917, 0.2400]) -Greedy action tensor([ 0.2756, 0.3516, 0.1138, -0.8986]) tensor([0.3088, 0.3332, 0.2627, 0.0954]) -Greedy action tensor([ 1.7064, -0.8336, 1.7498, 0.2589]) tensor([0.4240, 0.0334, 0.4428, 0.0997]) -Greedy action tensor([ 0.5022, -1.9124, 0.5250, 0.1679]) tensor([0.3536, 0.0316, 0.3617, 0.2531]) -Greedy action tensor([-0.2386, -0.6003, 0.3787, -0.3644]) tensor([0.2256, 0.1571, 0.4183, 0.1990]) -Greedy action tensor([ 1.7347, -0.5149, 0.7555, -0.4761]) tensor([0.6287, 0.0663, 0.2361, 0.0689]) -Greedy action tensor([ 0.7683, -0.9936, 0.4601, 1.6733]) tensor([0.2284, 0.0392, 0.1678, 0.5646]) -Greedy action tensor([-0.0470, -1.0656, 0.4143, -0.3651]) tensor([0.2721, 0.0983, 0.4316, 0.1980]) -Greedy action tensor([-0.3718, -0.1759, -0.2954, -0.1778]) tensor([0.2217, 0.2697, 0.2393, 0.2692]) -Greedy action tensor([ 0.0763, -0.5136, 1.1822, 0.6090]) tensor([0.1592, 0.0883, 0.4812, 0.2713]) -Greedy action tensor([ 0.0250, -0.7001, 0.6032, -0.2316]) tensor([0.2475, 0.1198, 0.4412, 0.1915]) -Greedy action tensor([ 0.0444, -0.4929, 0.3717, -0.6780]) tensor([0.2893, 0.1690, 0.4013, 0.1405]) -Greedy action tensor([ 0.2682, -0.7448, 0.0396, -0.9378]) tensor([0.4068, 0.1477, 0.3237, 0.1218]) -Greedy action tensor([-0.8704, -0.7509, 0.3225, -0.4750]) tensor([0.1447, 0.1631, 0.4772, 0.2149]) -Greedy action tensor([ 0.4188, -0.5465, -0.4069, -0.2055]) tensor([0.4247, 0.1618, 0.1860, 0.2275]) -Greedy action tensor([-0.5799, -1.1517, -0.9458, -0.2001]) tensor([0.2688, 0.1517, 0.1864, 0.3930]) -Greedy action tensor([-0.3633, 0.4224, -0.0951, -0.0135]) tensor([0.1689, 0.3706, 0.2209, 0.2396]) -Greedy action tensor([ 0.9687, -0.4016, 0.8276, -0.5869]) tensor([0.4285, 0.1089, 0.3721, 0.0904]) -Greedy action tensor([ 0.6810, -1.6793, -0.5758, 0.1448]) tensor([0.5092, 0.0481, 0.1449, 0.2979]) -Greedy action tensor([ 0.9338, -0.4352, 0.1004, -0.4648]) tensor([0.5166, 0.1314, 0.2245, 0.1276]) -Greedy action tensor([ 0.2078, -2.1005, 0.4921, 0.4679]) tensor([0.2684, 0.0267, 0.3567, 0.3482]) -Greedy action tensor([ 0.9649, -0.9490, 0.6390, 1.4394]) tensor([0.2876, 0.0424, 0.2076, 0.4623]) -Greedy action tensor([-0.6365, -0.9903, 0.2739, -0.8273]) tensor([0.1994, 0.1400, 0.4957, 0.1648]) -Greedy action tensor([ 0.7394, -0.0795, 0.1613, 0.2890]) tensor([0.3789, 0.1671, 0.2125, 0.2415]) -Greedy action tensor([ 1.1664, -0.9641, 0.5261, 0.7660]) tensor([0.4318, 0.0513, 0.2276, 0.2893]) -Greedy action tensor([-0.9252, -0.5818, -1.7071, 0.7426]) tensor([0.1224, 0.1726, 0.0560, 0.6490]) -Greedy action tensor([ 0.4282, -1.2057, -0.6737, -0.0307]) tensor([0.4631, 0.0904, 0.1539, 0.2927]) -Greedy action tensor([ 0.4405, -0.5811, -0.1332, -0.1730]) tensor([0.4057, 0.1461, 0.2286, 0.2197]) -Greedy action tensor([ 0.8587, -1.6507, 1.1777, 0.6470]) tensor([0.3062, 0.0249, 0.4212, 0.2477]) -Greedy action tensor([ 0.3368, -0.1348, -1.0107, 0.6512]) tensor([0.3074, 0.1918, 0.0799, 0.4209]) -Greedy action tensor([-0.6837, -0.5946, 0.9032, -0.7212]) tensor([0.1259, 0.1376, 0.6153, 0.1212]) -Greedy action tensor([-0.4114, -0.8554, 0.7662, -0.4414]) tensor([0.1707, 0.1095, 0.5542, 0.1656]) -Greedy action tensor([-1.5547, -0.7506, 0.4691, -1.1638]) tensor([0.0814, 0.1820, 0.6162, 0.1204]) -Greedy action tensor([ 0.7838, -0.5201, -0.3191, -0.3390]) tensor([0.5185, 0.1407, 0.1721, 0.1687]) -Greedy action tensor([-1.9754, -0.7423, -0.2325, 0.5756]) tensor([0.0435, 0.1494, 0.2488, 0.5582]) -Greedy action tensor([-1.4157, -0.8203, -0.0668, -0.7422]) tensor([0.1159, 0.2102, 0.4466, 0.2273]) -Greedy action tensor([ 2.2618, -0.0408, -0.5523, 0.2163]) tensor([0.7756, 0.0776, 0.0465, 0.1003]) -Greedy action tensor([ 0.6113, -0.4396, 0.1566, 0.2310]) tensor([0.3748, 0.1310, 0.2379, 0.2562]) -Greedy action tensor([ 1.4461, -0.7466, -0.1093, 0.6034]) tensor([0.5704, 0.0637, 0.1204, 0.2456]) -Greedy action tensor([ 1.6414, -0.3912, -0.7450, 0.2123]) tensor([0.6838, 0.0896, 0.0629, 0.1638]) -Greedy action tensor([ 1.7073, -0.9041, -0.2730, -0.2905]) tensor([0.7423, 0.0545, 0.1025, 0.1007]) -Greedy action tensor([ 1.5250, -0.0539, -0.4224, 0.5932]) tensor([0.5738, 0.1183, 0.0819, 0.2260]) -Greedy action tensor([ 1.3466, -0.8176, -0.2271, 0.8074]) tensor([0.5248, 0.0603, 0.1088, 0.3061]) -Greedy action tensor([ 1.6887, -0.3858, -0.8851, 0.4101]) tensor([0.6756, 0.0849, 0.0515, 0.1881]) -Greedy action tensor([ 1.1076, -0.4892, -0.9646, 0.1659]) tensor([0.5819, 0.1179, 0.0733, 0.2269]) -Greedy action tensor([ 1.2259, -0.3270, -0.5390, 0.5014]) tensor([0.5355, 0.1133, 0.0917, 0.2595]) -Greedy action tensor([ 2.2331, 0.2829, -0.0769, 0.2086]) tensor([0.7280, 0.1036, 0.0723, 0.0961]) -Greedy action tensor([ 1.5460, -0.5143, -0.5932, 0.3086]) tensor([0.6513, 0.0830, 0.0767, 0.1890]) -Greedy action tensor([ 1.8373, -0.8368, -0.2724, 0.1090]) tensor([0.7311, 0.0504, 0.0887, 0.1298]) -Greedy action tensor([ 1.0759, -0.0134, -0.8843, 0.2996]) tensor([0.5162, 0.1737, 0.0727, 0.2375]) -Greedy action tensor([ 1.2674, -0.2641, -0.6182, 0.5835]) tensor([0.5340, 0.1155, 0.0810, 0.2695]) -Greedy action tensor([ 1.1635, -0.4655, -0.4119, 0.0997]) tensor([0.5720, 0.1122, 0.1184, 0.1974]) -Greedy action tensor([ 1.5106, -0.2936, -0.0250, 0.2855]) tensor([0.5975, 0.0984, 0.1287, 0.1755]) -Greedy action tensor([ 1.1989, -0.6757, -0.3846, 0.1389]) tensor([0.5865, 0.0900, 0.1204, 0.2032]) -Greedy action tensor([ 1.7652, -0.6219, -0.4409, 0.2526]) tensor([0.7030, 0.0646, 0.0774, 0.1549]) -Greedy action tensor([ 1.4469, -0.0841, -0.1899, 0.2707]) tensor([0.5816, 0.1258, 0.1132, 0.1794]) -Greedy action tensor([ 2.0538, -0.8482, -0.5599, 0.9616]) tensor([0.6832, 0.0375, 0.0501, 0.2292]) -Greedy action tensor([ 1.4639, -0.1233, -0.9323, 0.2220]) tensor([0.6312, 0.1291, 0.0575, 0.1823]) -Greedy action tensor([ 1.1611, -0.3735, -0.4495, 0.6397]) tensor([0.4978, 0.1073, 0.0994, 0.2955]) -Greedy action tensor([ 1.3886, -0.5217, -0.4102, 0.3695]) tensor([0.5972, 0.0884, 0.0988, 0.2155]) -Greedy action tensor([ 2.2181, -0.8835, -0.6685, 0.8151]) tensor([0.7426, 0.0334, 0.0414, 0.1826]) -Greedy action tensor([ 1.9803, -0.2954, -0.5301, 0.0849]) tensor([0.7495, 0.0770, 0.0609, 0.1126]) -Greedy action tensor([ 2.5022, 0.5651, -0.1108, -0.0026]) tensor([0.7697, 0.1109, 0.0564, 0.0629]) -Greedy action tensor([ 1.6508, -0.0961, -0.8318, 0.2890]) tensor([0.6605, 0.1151, 0.0552, 0.1692]) -Greedy action tensor([ 1.9220, -1.1377, -0.7521, 0.2562]) tensor([0.7663, 0.0359, 0.0529, 0.1449]) -Greedy action tensor([ 1.0375, -0.2617, -0.2385, 0.2104]) tensor([0.5027, 0.1371, 0.1403, 0.2198]) -Greedy action tensor([ 1.9368, 0.2160, -0.0933, 0.1998]) tensor([0.6728, 0.1204, 0.0884, 0.1184]) -Greedy action tensor([ 1.4608, -0.6963, -0.4123, 0.0971]) tensor([0.6557, 0.0758, 0.1007, 0.1677]) -Greedy action tensor([ 1.2780, -0.5457, -0.6275, 0.2406]) tensor([0.6008, 0.0970, 0.0894, 0.2129]) -Greedy action tensor([ 2.4103, -0.2023, -0.1132, 0.4151]) tensor([0.7755, 0.0569, 0.0622, 0.1055]) -Greedy action tensor([ 1.9870, 0.1561, -0.3735, 0.3824]) tensor([0.6870, 0.1101, 0.0648, 0.1381]) -Greedy action tensor([ 1.1343, -0.4590, -0.7616, 0.4003]) tensor([0.5454, 0.1109, 0.0819, 0.2618]) -Greedy action tensor([ 1.4626, -0.6166, -0.6104, 0.1965]) tensor([0.6524, 0.0816, 0.0821, 0.1839]) -Greedy action tensor([ 1.1460, -0.0107, -0.6552, 0.1968]) tensor([0.5357, 0.1685, 0.0884, 0.2074]) -Greedy action tensor([ 1.1698, -0.6811, -0.2146, 0.0845]) tensor([0.5729, 0.0900, 0.1435, 0.1935]) -Greedy action tensor([ 1.1944, -0.4055, -0.3995, 0.4579]) tensor([0.5308, 0.1072, 0.1078, 0.2541]) -Greedy action tensor([ 1.6436, -0.0217, -0.9269, 0.2504]) tensor([0.6605, 0.1249, 0.0505, 0.1640]) -Greedy action tensor([ 1.4268, -0.5665, -0.2170, -0.0696]) tensor([0.6437, 0.0877, 0.1244, 0.1441]) -Greedy action tensor([ 1.6593, -0.5954, -0.3641, 0.6982]) tensor([0.6174, 0.0648, 0.0816, 0.2362]) -Greedy action tensor([ 1.4074, -0.8278, -0.0638, 0.6289]) tensor([0.5569, 0.0596, 0.1279, 0.2557]) -Greedy action tensor([ 1.5738, -0.4108, -0.3337, 0.1841]) tensor([0.6514, 0.0895, 0.0967, 0.1623]) -Greedy action tensor([ 1.5188, -0.1882, -1.0642, 0.2664]) tensor([0.6482, 0.1176, 0.0490, 0.1853]) -Greedy action tensor([ 2.4431, -1.0132, -0.0683, 0.3685]) tensor([0.8075, 0.0255, 0.0655, 0.1014]) -Greedy action tensor([ 1.3245, -0.1073, -0.2005, -0.0809]) tensor([0.5876, 0.1404, 0.1279, 0.1441]) -Greedy action tensor([ 1.5634, -0.6100, -0.3407, 0.2569]) tensor([0.6521, 0.0742, 0.0971, 0.1766]) -Greedy action tensor([ 1.5009, -0.5525, -0.3940, 0.2839]) tensor([0.6350, 0.0815, 0.0955, 0.1880]) -Greedy action tensor([ 1.6627, -0.4622, -0.0462, -0.0125]) tensor([0.6721, 0.0803, 0.1217, 0.1259]) -Greedy action tensor([ 1.6123, -0.6935, -0.3976, 0.3553]) tensor([0.6587, 0.0657, 0.0883, 0.1874]) -Greedy action tensor([ 1.7216, -0.4486, -0.4863, 0.3721]) tensor([0.6741, 0.0770, 0.0741, 0.1748]) -Greedy action tensor([ 2.0848, -1.0141, -0.3809, 0.4515]) tensor([0.7545, 0.0340, 0.0641, 0.1473]) -Greedy action tensor([ 0.8717, -0.3980, -0.1980, -0.0444]) tensor([0.4941, 0.1388, 0.1695, 0.1977]) -Greedy action tensor([ 1.7973, -0.4481, -1.1861, 0.3577]) tensor([0.7176, 0.0760, 0.0363, 0.1701]) -Greedy action tensor([ 1.5138, -0.8388, -0.1368, 0.7320]) tensor([0.5732, 0.0545, 0.1100, 0.2623]) -Greedy action tensor([ 1.6293, -0.4167, -1.2728, 0.1295]) tensor([0.7106, 0.0918, 0.0390, 0.1586]) -Greedy action tensor([ 1.1469, -0.1408, -0.5866, 0.2285]) tensor([0.5400, 0.1490, 0.0954, 0.2156]) -Greedy action tensor([ 1.3158, -0.6694, 0.0793, 0.4483]) tensor([0.5412, 0.0743, 0.1572, 0.2273]) -Greedy action tensor([ 1.5081, -0.3811, -0.5345, 0.5716]) tensor([0.5978, 0.0904, 0.0775, 0.2343]) -Greedy action tensor([ 1.1735, -0.6111, -0.3846, 0.2551]) tensor([0.5626, 0.0944, 0.1184, 0.2246]) -Greedy action tensor([ 1.4422, -0.5046, -0.2275, 0.4818]) tensor([0.5835, 0.0833, 0.1099, 0.2233]) -Greedy action tensor([ 1.6792, -0.4337, -0.4461, 0.3676]) tensor([0.6624, 0.0801, 0.0791, 0.1785]) -Greedy action tensor([ 2.1101, -0.9218, -0.4079, 0.5901]) tensor([0.7421, 0.0358, 0.0598, 0.1623]) -Greedy action tensor([ 1.7920, -0.5775, -0.5904, 0.5281]) tensor([0.6810, 0.0637, 0.0629, 0.1924]) -Greedy action tensor([ 1.1967, -0.3142, 0.1570, -0.0183]) tensor([0.5345, 0.1180, 0.1890, 0.1586]) -Greedy action tensor([ 1.5447, -0.4786, -0.9815, 0.3033]) tensor([0.6662, 0.0881, 0.0533, 0.1925]) -Greedy action tensor([ 1.5794, -0.4678, -0.2866, 0.1623]) tensor([0.6552, 0.0846, 0.1014, 0.1588]) -Greedy action tensor([ 1.9361, -0.8244, -0.5515, 0.3472]) tensor([0.7405, 0.0468, 0.0615, 0.1512]) -Greedy action tensor([ 1.5164, -0.8177, -0.5628, 0.0146]) tensor([0.6922, 0.0671, 0.0865, 0.1542]) -Greedy action tensor([ 1.7342, -1.0862, 0.0730, 0.3710]) tensor([0.6643, 0.0396, 0.1262, 0.1700]) -Greedy action tensor([ 1.0015, -0.4962, -0.5078, 0.1907]) tensor([0.5293, 0.1184, 0.1170, 0.2353]) -Greedy action tensor([ 1.4019, -0.6378, -0.1667, 0.5552]) tensor([0.5659, 0.0736, 0.1179, 0.2427]) -Greedy action tensor([ 1.9496, -1.2257, -0.2240, 0.3457]) tensor([0.7371, 0.0308, 0.0839, 0.1482]) -Greedy action tensor([ 2.1083, -1.0162, -0.3297, 0.4604]) tensor([0.7554, 0.0332, 0.0660, 0.1454]) -Greedy action tensor([ 1.8027, -0.4803, -0.7258, 0.1832]) tensor([0.7248, 0.0739, 0.0578, 0.1435]) -Greedy action tensor([1.9654, 0.4326, 0.0563, 0.1868]) tensor([0.6523, 0.1409, 0.0967, 0.1102]) -Greedy action tensor([ 1.7489, -0.5388, -0.1306, 0.8280]) tensor([0.6052, 0.0614, 0.0924, 0.2410]) -Greedy action tensor([ 1.7841, 0.3902, -0.4781, 0.6011]) tensor([0.6029, 0.1496, 0.0628, 0.1847]) -Greedy action tensor([ 2.0272, -1.2325, -0.2605, 0.5550]) tensor([0.7303, 0.0280, 0.0741, 0.1675]) -Greedy action tensor([-1.7886, -0.4519, 0.5870, -0.0954]) tensor([0.0476, 0.1813, 0.5122, 0.2589]) -Greedy action tensor([-0.5895, 0.0858, 0.1482, 0.0602]) tensor([0.1435, 0.2818, 0.3000, 0.2747]) -Greedy action tensor([-1.9273, -0.4395, 0.6620, -0.1731]) tensor([0.0408, 0.1805, 0.5431, 0.2356]) -Greedy action tensor([-1.1070, -0.6576, 0.2249, 0.3192]) tensor([0.0951, 0.1490, 0.3601, 0.3958]) -Greedy action tensor([-1.5571, -0.4287, 0.4565, 0.0250]) tensor([0.0608, 0.1879, 0.4554, 0.2958]) -Greedy action tensor([-1.7798, -0.5011, 0.5820, -0.1034]) tensor([0.0487, 0.1748, 0.5163, 0.2602]) -Greedy action tensor([-1.8872, -0.4537, 0.6490, -0.1304]) tensor([0.0423, 0.1775, 0.5348, 0.2453]) -Greedy action tensor([-1.5605, -0.0852, 0.5147, 0.0986]) tensor([0.0538, 0.2352, 0.4284, 0.2826]) -Greedy action tensor([-1.7097, -0.3128, 0.5513, -0.3147]) tensor([0.0536, 0.2165, 0.5138, 0.2161]) -Greedy action tensor([-1.8959, -0.4369, 0.6441, -0.1575]) tensor([0.0422, 0.1817, 0.5357, 0.2403]) -Greedy action tensor([-1.7284, -0.4990, 0.5618, -0.1037]) tensor([0.0516, 0.1765, 0.5098, 0.2621]) -Greedy action tensor([-1.7772, -0.5273, 0.8180, 0.0576]) tensor([0.0414, 0.1445, 0.5548, 0.2593]) -Greedy action tensor([-1.9382, -0.4093, 0.6567, -0.1755]) tensor([0.0403, 0.1857, 0.5393, 0.2347]) -Greedy action tensor([-1.8841, -0.3653, 0.6242, -0.1576]) tensor([0.0426, 0.1946, 0.5234, 0.2395]) -Greedy action tensor([-1.9372, -0.4322, 0.6605, -0.1759]) tensor([0.0404, 0.1819, 0.5426, 0.2351]) -Greedy action tensor([-1.2985, -0.5065, 0.4840, 0.4336]) tensor([0.0675, 0.1491, 0.4015, 0.3818]) -Greedy action tensor([-1.2634, -0.3278, 0.2922, -0.0486]) tensor([0.0858, 0.2186, 0.4065, 0.2891]) -Greedy action tensor([-0.6677, -0.6420, 0.2451, 0.4484]) tensor([0.1321, 0.1355, 0.3291, 0.4033]) -Greedy action tensor([-1.8374, -0.3440, 0.6252, -0.1127]) tensor([0.0439, 0.1953, 0.5147, 0.2461]) -Greedy action tensor([-1.8788, -0.3337, 0.6239, -0.1236]) tensor([0.0422, 0.1979, 0.5157, 0.2442]) -Greedy action tensor([-0.8760, 0.7967, 0.1285, 0.0404]) tensor([0.0865, 0.4609, 0.2363, 0.2163]) -Greedy action tensor([-1.8539, -0.4302, 0.6606, -0.0997]) tensor([0.0429, 0.1783, 0.5307, 0.2481]) -Greedy action tensor([-1.7992, -0.3727, 0.5861, -0.1069]) tensor([0.0466, 0.1941, 0.5062, 0.2531]) -Greedy action tensor([-1.2759, -0.3082, -0.3563, -0.2058]) tensor([0.1104, 0.2906, 0.2770, 0.3220]) -Greedy action tensor([-1.3872, -0.3858, 0.3512, 0.1189]) tensor([0.0718, 0.1956, 0.4087, 0.3239]) -Greedy action tensor([-1.8101, -0.4401, 0.6318, -0.0816]) tensor([0.0453, 0.1784, 0.5210, 0.2553]) -Greedy action tensor([-1.8189, -0.3968, 0.6066, -0.1042]) tensor([0.0454, 0.1884, 0.5138, 0.2524]) -Greedy action tensor([-1.9299, -0.4325, 0.6582, -0.1732]) tensor([0.0407, 0.1820, 0.5415, 0.2358]) -Greedy action tensor([-1.6367, -0.2569, 0.6315, 0.2202]) tensor([0.0475, 0.1889, 0.4592, 0.3044]) -Greedy action tensor([-1.7758, -0.3896, 0.6507, -0.0855]) tensor([0.0460, 0.1840, 0.5207, 0.2494]) -Greedy action tensor([-0.3865, -0.2356, 0.1648, 0.1842]) tensor([0.1764, 0.2052, 0.3062, 0.3122]) -Greedy action tensor([-1.3900, 0.6479, 0.2750, 0.0397]) tensor([0.0551, 0.4231, 0.2914, 0.2303]) -Greedy action tensor([-1.1389, -0.4619, 0.3750, 0.3187]) tensor([0.0847, 0.1667, 0.3849, 0.3638]) -Greedy action tensor([-1.9179, -0.4574, 0.6910, -0.1096]) tensor([0.0400, 0.1724, 0.5435, 0.2441]) -Greedy action tensor([-1.8506, -0.3479, 0.6375, -0.1189]) tensor([0.0431, 0.1939, 0.5193, 0.2437]) -Greedy action tensor([-1.8662, -0.2533, 0.6191, -0.1168]) tensor([0.0421, 0.2111, 0.5050, 0.2419]) -Greedy action tensor([-1.9387, -0.4547, 0.6627, -0.1779]) tensor([0.0405, 0.1785, 0.5456, 0.2354]) -Greedy action tensor([-1.8782, -0.3550, 0.6267, -0.1436]) tensor([0.0426, 0.1952, 0.5210, 0.2412]) -Greedy action tensor([-1.4455, 0.1163, 0.3446, 0.0108]) tensor([0.0623, 0.2971, 0.3733, 0.2673]) -Greedy action tensor([-1.4271, 0.0055, 0.4327, -0.1436]) tensor([0.0657, 0.2752, 0.4219, 0.2371]) -Greedy action tensor([-1.9235, -0.4561, 0.6573, -0.1712]) tensor([0.0411, 0.1784, 0.5432, 0.2372]) -Greedy action tensor([-0.9722, 0.4967, 0.3910, -0.5150]) tensor([0.0923, 0.4010, 0.3608, 0.1458]) -Greedy action tensor([-1.9319, -0.4046, 0.6528, -0.1724]) tensor([0.0405, 0.1867, 0.5374, 0.2354]) -Greedy action tensor([-0.8722, -0.3599, 0.6525, 0.9564]) tensor([0.0741, 0.1237, 0.3406, 0.4616]) -Greedy action tensor([-1.8583, -0.4187, 0.6225, -0.1420]) tensor([0.0440, 0.1856, 0.5257, 0.2447]) -Greedy action tensor([-1.8712, -0.2895, 0.6121, -0.1221]) tensor([0.0424, 0.2061, 0.5078, 0.2437]) -Greedy action tensor([-1.8084, -0.4386, 0.5992, -0.1029]) tensor([0.0464, 0.1826, 0.5155, 0.2555]) -Greedy action tensor([-0.6307, 0.4196, 0.4749, 0.9682]) tensor([0.0846, 0.2417, 0.2554, 0.4183]) -Greedy action tensor([-1.9017, -0.4556, 0.6511, -0.1443]) tensor([0.0419, 0.1778, 0.5377, 0.2427]) -Greedy action tensor([-0.1322, 1.1148, -0.0042, 0.3558]) tensor([0.1380, 0.4803, 0.1569, 0.2248]) -Greedy action tensor([-1.8052, -0.2487, 0.6247, -0.0740]) tensor([0.0440, 0.2085, 0.4993, 0.2483]) -Greedy action tensor([-1.8988, -0.4033, 0.6537, -0.1421]) tensor([0.0415, 0.1852, 0.5329, 0.2404]) -Greedy action tensor([-1.7268, -0.3617, 0.6488, -0.0316]) tensor([0.0473, 0.1854, 0.5093, 0.2579]) -Greedy action tensor([-0.5226, 0.5011, 0.4134, 0.8506]) tensor([0.0973, 0.2707, 0.2480, 0.3840]) -Greedy action tensor([-1.8876, -0.4692, 0.6370, -0.1539]) tensor([0.0430, 0.1774, 0.5364, 0.2432]) -Greedy action tensor([-0.7158, 0.4509, 0.3732, 0.5800]) tensor([0.0923, 0.2963, 0.2742, 0.3372]) -Greedy action tensor([-1.3925, -0.4460, 0.5972, 0.5465]) tensor([0.0560, 0.1444, 0.4099, 0.3896]) -Greedy action tensor([-1.9289, -0.4345, 0.6606, -0.1715]) tensor([0.0407, 0.1813, 0.5421, 0.2359]) -Greedy action tensor([-1.8964, -0.4049, 0.6426, -0.1494]) tensor([0.0419, 0.1863, 0.5312, 0.2406]) -Greedy action tensor([-1.9106, -0.4502, 0.6511, -0.1668]) tensor([0.0417, 0.1796, 0.5403, 0.2384]) -Greedy action tensor([-1.9337, -0.4459, 0.6594, -0.1776]) tensor([0.0407, 0.1801, 0.5438, 0.2355]) -Greedy action tensor([-1.5223, 0.2878, 0.4040, -0.0955]) tensor([0.0551, 0.3369, 0.3784, 0.2296]) -Greedy action tensor([-1.2221, 0.4276, 0.2941, -0.6001]) tensor([0.0792, 0.4124, 0.3608, 0.1476]) -Greedy action tensor([-1.9140, -0.4404, 0.6727, -0.1568]) tensor([0.0409, 0.1785, 0.5435, 0.2371]) -Greedy action tensor([-1.2077, 0.6075, 0.1659, 0.0303]) tensor([0.0688, 0.4224, 0.2716, 0.2372]) -Greedy action tensor([-1.5950, -0.4345, 0.5071, -0.0109]) tensor([0.0580, 0.1850, 0.4744, 0.2826]) -Greedy action tensor([-1.8343, -0.4337, 0.6087, -0.1446]) tensor([0.0455, 0.1846, 0.5235, 0.2465]) -Greedy action tensor([-1.9459, -0.4479, 0.6668, -0.1816]) tensor([0.0401, 0.1793, 0.5466, 0.2340]) -Greedy action tensor([-1.7653, -0.4831, 0.5835, -0.0927]) tensor([0.0490, 0.1767, 0.5133, 0.2610]) -Greedy action tensor([-1.6217, -0.1590, 0.6246, 0.1908]) tensor([0.0479, 0.2066, 0.4524, 0.2932]) -Greedy action tensor([-1.5622, -0.0305, 0.4397, -0.0366]) tensor([0.0567, 0.2624, 0.4200, 0.2609]) -Greedy action tensor([-1.7477, -0.3270, 0.5397, -0.1080]) tensor([0.0496, 0.2055, 0.4890, 0.2559]) -Greedy action tensor([-1.8328, -0.4111, 0.6058, -0.1359]) tensor([0.0453, 0.1879, 0.5194, 0.2474]) -Greedy action tensor([-1.4770, 0.5694, 0.2779, 0.1626]) tensor([0.0508, 0.3934, 0.2939, 0.2619]) -Greedy action tensor([-1.9411, -0.4445, 0.6647, -0.1782]) tensor([0.0403, 0.1798, 0.5452, 0.2347]) -Greedy action tensor([-1.7688, -0.2313, 0.5963, -0.0589]) tensor([0.0458, 0.2132, 0.4877, 0.2533]) -Greedy action tensor([-1.3789, -0.2518, 0.6050, 0.1184]) tensor([0.0632, 0.1950, 0.4594, 0.2824]) -Greedy action tensor([-1.8969, -0.3838, 0.6336, -0.1580]) tensor([0.0420, 0.1908, 0.5279, 0.2392]) -Greedy action tensor([-1.9305, -0.4379, 0.6611, -0.1716]) tensor([0.0406, 0.1808, 0.5426, 0.2360]) -Greedy action tensor([-1.8993, -0.3528, 0.6264, -0.1484]) tensor([0.0417, 0.1960, 0.5218, 0.2404]) -Greedy action tensor([-1.9033, -0.4010, 0.6463, -0.1600]) tensor([0.0416, 0.1871, 0.5332, 0.2381]) -Greedy action tensor([ 1.0041, -1.1181, 0.1372, -0.4723]) tensor([0.5655, 0.0677, 0.2376, 0.1292]) -Greedy action tensor([ 0.8638, -0.7780, -0.0829, -0.4860]) tensor([0.5432, 0.1052, 0.2108, 0.1408]) -Greedy action tensor([ 0.9589, -0.6313, 0.2107, -0.5635]) tensor([0.5276, 0.1076, 0.2497, 0.1151]) -Greedy action tensor([ 0.6016, -0.3504, -0.1337, -0.3547]) tensor([0.4445, 0.1716, 0.2131, 0.1708]) -Greedy action tensor([ 1.2104, -0.6258, -0.0025, -0.5908]) tensor([0.6166, 0.0983, 0.1833, 0.1018]) -Greedy action tensor([ 0.8576, -0.5033, 0.0807, -0.6599]) tensor([0.5167, 0.1325, 0.2376, 0.1133]) -Greedy action tensor([ 0.9173, -0.5607, 0.0311, -0.5205]) tensor([0.5326, 0.1215, 0.2195, 0.1265]) -Greedy action tensor([ 0.6339, -0.3905, 0.0198, -0.0558]) tensor([0.4163, 0.1495, 0.2253, 0.2089]) -Greedy action tensor([ 0.8040, -0.5723, -0.1287, -0.0950]) tensor([0.4871, 0.1230, 0.1917, 0.1982]) -Greedy action tensor([ 0.5377, -0.0370, -0.0489, 0.0361]) tensor([0.3670, 0.2066, 0.2041, 0.2223]) -Greedy action tensor([ 0.5030, -0.2012, -0.0578, -0.0161]) tensor([0.3759, 0.1859, 0.2145, 0.2237]) -Greedy action tensor([ 1.1399, -0.5032, -0.1173, -0.6166]) tensor([0.6059, 0.1172, 0.1723, 0.1046]) -Greedy action tensor([ 1.2095, -0.6671, 0.0236, -0.4757]) tensor([0.6083, 0.0931, 0.1858, 0.1128]) -Greedy action tensor([ 0.9772, -0.6615, -0.0400, -0.3870]) tensor([0.5521, 0.1072, 0.1996, 0.1411]) -Greedy action tensor([ 0.4037, -0.1645, -0.0704, -0.1441]) tensor([0.3614, 0.2047, 0.2249, 0.2090]) -Greedy action tensor([ 0.7382, -0.3732, 0.0609, -0.1764]) tensor([0.4469, 0.1471, 0.2270, 0.1791]) -Greedy action tensor([ 0.6841, 0.0355, 0.0279, -0.1266]) tensor([0.4022, 0.2103, 0.2087, 0.1788]) -Greedy action tensor([ 0.7266, -0.8132, 0.0509, -0.2639]) tensor([0.4774, 0.1024, 0.2429, 0.1773]) -Greedy action tensor([ 0.7015, 0.0195, 0.0492, -0.3411]) tensor([0.4203, 0.2125, 0.2189, 0.1482]) -Greedy action tensor([ 0.8111, -0.5741, -0.0421, -0.2799]) tensor([0.4970, 0.1244, 0.2117, 0.1669]) -Greedy action tensor([ 0.4093, -0.3743, -0.1544, -0.1205]) tensor([0.3825, 0.1747, 0.2177, 0.2252]) -Greedy action tensor([ 0.6397, -0.1676, 0.0534, -0.0743]) tensor([0.4013, 0.1790, 0.2233, 0.1965]) -Greedy action tensor([ 0.8420, -0.4427, -0.0956, -0.3757]) tensor([0.5091, 0.1409, 0.1994, 0.1506]) -Greedy action tensor([ 1.1247, -0.2745, 0.0988, -0.3262]) tensor([0.5436, 0.1342, 0.1949, 0.1274]) -Greedy action tensor([ 0.5026, -0.3287, -0.1621, -0.3569]) tensor([0.4213, 0.1835, 0.2168, 0.1784]) -Greedy action tensor([ 0.7412, -0.7468, -0.0903, -0.0985]) tensor([0.4778, 0.1079, 0.2080, 0.2063]) -Greedy action tensor([ 0.9081, -0.5899, 0.0541, -0.4238]) tensor([0.5227, 0.1168, 0.2225, 0.1380]) -Greedy action tensor([ 0.3726, -0.3611, -0.1416, -0.1627]) tensor([0.3754, 0.1803, 0.2245, 0.2198]) -Greedy action tensor([ 1.1420, -0.3257, -0.0053, -0.2534]) tensor([0.5569, 0.1283, 0.1768, 0.1380]) -Greedy action tensor([ 0.7537, -0.8432, 0.1338, -0.3210]) tensor([0.4803, 0.0973, 0.2584, 0.1640]) -Greedy action tensor([ 0.6129, -0.5781, -0.0658, -0.5866]) tensor([0.4734, 0.1439, 0.2401, 0.1427]) -Greedy action tensor([ 0.7375, -0.3976, -0.1231, -0.1792]) tensor([0.4664, 0.1499, 0.1972, 0.1865]) -Greedy action tensor([ 0.7390, -0.5455, -0.1330, -0.3832]) tensor([0.4949, 0.1370, 0.2069, 0.1611]) -Greedy action tensor([ 0.3445, 0.0642, -0.0893, -0.1291]) tensor([0.3304, 0.2497, 0.2141, 0.2058]) -Greedy action tensor([ 0.5177, -0.3485, -0.1188, -0.0887]) tensor([0.4008, 0.1686, 0.2121, 0.2186]) -Greedy action tensor([ 0.7820, -0.4745, -0.0402, -0.4029]) tensor([0.4926, 0.1402, 0.2165, 0.1506]) -Greedy action tensor([ 0.6510, 0.1816, -0.1083, -0.0348]) tensor([0.3851, 0.2408, 0.1802, 0.1939]) -Greedy action tensor([ 0.4681, 0.0526, -0.0958, -0.3740]) tensor([0.3760, 0.2481, 0.2139, 0.1620]) -Greedy action tensor([ 0.5513, 0.0332, -0.0495, 0.0598]) tensor([0.3629, 0.2161, 0.1990, 0.2220]) -Greedy action tensor([ 0.5765, 0.0246, -0.1708, -0.3291]) tensor([0.4075, 0.2347, 0.1930, 0.1648]) -Greedy action tensor([ 0.8010, -0.3499, -0.0398, -0.2462]) tensor([0.4765, 0.1507, 0.2055, 0.1672]) -Greedy action tensor([ 0.8304, -0.5234, -0.0119, -0.1170]) tensor([0.4815, 0.1243, 0.2074, 0.1867]) -Greedy action tensor([ 7.7559e-01, -5.0922e-01, -4.5028e-04, -8.9962e-02]) tensor([0.4634, 0.1282, 0.2133, 0.1950]) -Greedy action tensor([ 0.8363, -0.4959, -0.1574, -0.5198]) tensor([0.5286, 0.1395, 0.1957, 0.1362]) -Greedy action tensor([ 1.0094, -0.9088, 0.1571, -0.6279]) tensor([0.5657, 0.0831, 0.2412, 0.1100]) -Greedy action tensor([ 1.1313, -0.6597, -0.0383, -0.4684]) tensor([0.5955, 0.0993, 0.1849, 0.1203]) -Greedy action tensor([ 0.3032, -0.4467, -0.0945, -0.0947]) tensor([0.3551, 0.1678, 0.2386, 0.2385]) -Greedy action tensor([ 1.1781, -0.9529, 0.0717, -0.6816]) tensor([0.6230, 0.0740, 0.2060, 0.0970]) -Greedy action tensor([ 0.7668, -0.4691, 0.0186, -0.2423]) tensor([0.4698, 0.1365, 0.2224, 0.1713]) -Greedy action tensor([ 0.8832, -0.6555, -0.0150, -0.5856]) tensor([0.5399, 0.1159, 0.2199, 0.1243]) -Greedy action tensor([ 0.6987, -0.4572, -0.0517, -0.0208]) tensor([0.4398, 0.1384, 0.2076, 0.2141]) -Greedy action tensor([ 0.8900, -0.7123, 0.1190, -0.3878]) tensor([0.5148, 0.1037, 0.2381, 0.1434]) -Greedy action tensor([ 0.3350, -0.0105, -0.0477, -0.0511]) tensor([0.3258, 0.2306, 0.2222, 0.2214]) -Greedy action tensor([ 0.6844, -0.2910, 0.1514, -0.4780]) tensor([0.4392, 0.1656, 0.2578, 0.1374]) -Greedy action tensor([ 0.9899, -0.5144, -0.0185, -0.4800]) tensor([0.5504, 0.1223, 0.2008, 0.1266]) -Greedy action tensor([ 1.2553, -0.6230, -0.2197, -0.5067]) tensor([0.6438, 0.0984, 0.1473, 0.1105]) -Greedy action tensor([ 0.4069, -0.0525, -0.1317, -0.0629]) tensor([0.3521, 0.2224, 0.2055, 0.2201]) -Greedy action tensor([ 0.3807, -0.0887, -0.0108, -0.0112]) tensor([0.3359, 0.2101, 0.2271, 0.2270]) -Greedy action tensor([ 0.6674, -0.4383, 0.0363, -0.4159]) tensor([0.4542, 0.1503, 0.2417, 0.1537]) -Greedy action tensor([ 0.5997, -0.3337, -0.0101, -0.2783]) tensor([0.4251, 0.1672, 0.2310, 0.1767]) -Greedy action tensor([ 0.7857, -0.3338, -0.0945, -0.4671]) tensor([0.4934, 0.1611, 0.2046, 0.1410]) -Greedy action tensor([ 1.0131, -0.7794, 0.1404, -0.4951]) tensor([0.5538, 0.0922, 0.2314, 0.1226]) -Greedy action tensor([ 0.4688, -0.3246, -0.0496, -0.2538]) tensor([0.3947, 0.1785, 0.2351, 0.1916]) -Greedy action tensor([ 0.5810, -0.4991, 0.0307, -0.5891]) tensor([0.4491, 0.1525, 0.2590, 0.1394]) -Greedy action tensor([ 7.3253e-01, -8.8099e-02, -3.0050e-04, -1.5666e-01]) tensor([0.4289, 0.1888, 0.2061, 0.1763]) -Greedy action tensor([ 1.1219, -0.4171, -0.0698, -0.1913]) tensor([0.5595, 0.1201, 0.1699, 0.1505]) -Greedy action tensor([ 0.7772, -0.3267, -0.3405, -0.4110]) tensor([0.5093, 0.1689, 0.1666, 0.1552]) -Greedy action tensor([ 0.6362, -0.8404, 0.1326, -0.5172]) tensor([0.4655, 0.1063, 0.2813, 0.1469]) -Greedy action tensor([ 1.2376, -0.8336, -0.0494, -0.8798]) tensor([0.6568, 0.0828, 0.1813, 0.0790]) -Greedy action tensor([ 0.9427, -0.7115, -0.0044, -0.4169]) tensor([0.5447, 0.1042, 0.2113, 0.1399]) -Greedy action tensor([ 0.1923, -0.0489, -0.1502, 0.0809]) tensor([0.2950, 0.2318, 0.2094, 0.2639]) -Greedy action tensor([ 0.6659, -0.3383, 0.0162, -0.0896]) tensor([0.4240, 0.1553, 0.2214, 0.1992]) -Greedy action tensor([ 0.6119, -0.3538, -0.0961, -0.0895]) tensor([0.4221, 0.1607, 0.2079, 0.2093]) -Greedy action tensor([ 0.7235, -0.3363, 0.1509, -0.0365]) tensor([0.4205, 0.1457, 0.2372, 0.1966]) -Greedy action tensor([ 1.0252, -0.6054, 0.0523, -0.6665]) tensor([0.5688, 0.1114, 0.2150, 0.1048]) -Greedy action tensor([ 0.5627, -0.3070, -0.0600, 0.0297]) tensor([0.3933, 0.1648, 0.2110, 0.2308]) -Greedy action tensor([ 0.5121, 0.1938, -0.0009, -0.0265]) tensor([0.3437, 0.2500, 0.2058, 0.2006]) -Greedy action tensor([ 0.6900, -0.2477, 0.0579, -0.1096]) tensor([0.4215, 0.1650, 0.2240, 0.1895]) -Greedy action tensor([ 0.6065, -0.5890, -0.2211, -0.2152]) tensor([0.4589, 0.1388, 0.2006, 0.2017]) -Greedy action tensor([ 0.7610, -0.1085, 0.0165, -0.4252]) tensor([0.4546, 0.1906, 0.2160, 0.1388]) -Greedy action tensor([ 0.8468, -0.6209, -0.0592, -0.4306]) tensor([0.5226, 0.1205, 0.2112, 0.1457]) -Greedy action tensor([ 1.6308, -1.1035, -0.0629, 0.3392]) tensor([0.6563, 0.0426, 0.1207, 0.1804]) -Greedy action tensor([ 1.8295, -0.7532, -0.2814, 0.4999]) tensor([0.6843, 0.0517, 0.0829, 0.1811]) -Greedy action tensor([ 1.2770, -0.4181, -0.3415, 0.3680]) tensor([0.5603, 0.1029, 0.1110, 0.2258]) -Greedy action tensor([ 2.2397, -1.3400, -0.0125, 0.3659]) tensor([0.7772, 0.0217, 0.0817, 0.1193]) -Greedy action tensor([ 1.1891, -0.1139, -0.1000, -0.0752]) tensor([0.5466, 0.1485, 0.1506, 0.1544]) -Greedy action tensor([ 0.7629, -0.5413, -0.1682, 0.3387]) tensor([0.4311, 0.1170, 0.1699, 0.2820]) -Greedy action tensor([ 1.4683, -0.5393, -0.5945, 0.6799]) tensor([0.5828, 0.0783, 0.0741, 0.2649]) -Greedy action tensor([ 1.6251, -0.6027, -0.7505, 0.2095]) tensor([0.6928, 0.0747, 0.0644, 0.1682]) -Greedy action tensor([ 1.9838, 0.6528, -0.6400, 0.0770]) tensor([0.6733, 0.1779, 0.0488, 0.1000]) -Greedy action tensor([ 1.4715, -0.2917, -0.2467, 0.2585]) tensor([0.6067, 0.1041, 0.1088, 0.1804]) -Greedy action tensor([ 1.8056, -0.7343, -0.6396, 0.3792]) tensor([0.7114, 0.0561, 0.0617, 0.1708]) -Greedy action tensor([ 3.2565, -1.0743, -0.5432, 0.7153]) tensor([0.8974, 0.0118, 0.0201, 0.0707]) -Greedy action tensor([ 1.3537, -0.0819, -0.1151, 0.2636]) tensor([0.5542, 0.1319, 0.1276, 0.1863]) -Greedy action tensor([ 1.7722, -0.7774, -0.4112, 0.4392]) tensor([0.6875, 0.0537, 0.0775, 0.1813]) -Greedy action tensor([ 1.6479, -0.5602, -0.5648, -0.0387]) tensor([0.7120, 0.0783, 0.0779, 0.1318]) -Greedy action tensor([ 1.4954, -0.2112, -0.4855, 0.4066]) tensor([0.6038, 0.1096, 0.0833, 0.2033]) -Greedy action tensor([ 2.0097, -1.0662, -0.6179, 0.7149]) tensor([0.7182, 0.0331, 0.0519, 0.1968]) -Greedy action tensor([ 0.9609, -0.5417, -0.0752, 0.1079]) tensor([0.4991, 0.1111, 0.1771, 0.2127]) -Greedy action tensor([ 1.4985, -0.5685, -0.1815, 0.4719]) tensor([0.5984, 0.0757, 0.1115, 0.2144]) -Greedy action tensor([ 2.0675, -1.0635, -0.2275, 0.1317]) tensor([0.7760, 0.0339, 0.0782, 0.1120]) -Greedy action tensor([ 1.4931, -0.5356, -0.3391, 0.5600]) tensor([0.5935, 0.0780, 0.0950, 0.2334]) -Greedy action tensor([ 1.1722, -0.2281, -0.4960, -0.0936]) tensor([0.5824, 0.1436, 0.1098, 0.1642]) -Greedy action tensor([ 1.4095, -0.5631, -0.2131, 0.2929]) tensor([0.6010, 0.0836, 0.1186, 0.1968]) -Greedy action tensor([ 2.0653, -0.4851, -0.6218, 0.0955]) tensor([0.7778, 0.0607, 0.0530, 0.1085]) -Greedy action tensor([ 0.8954, -0.0959, -0.1699, 0.0140]) tensor([0.4695, 0.1742, 0.1618, 0.1945]) -Greedy action tensor([ 2.1645, -1.0479, -0.1869, 0.9459]) tensor([0.6987, 0.0281, 0.0665, 0.2066]) -Greedy action tensor([ 1.3363, -0.6910, -0.2698, 0.3125]) tensor([0.5912, 0.0779, 0.1186, 0.2124]) -Greedy action tensor([ 1.8969, -0.2831, -1.1633, 0.5465]) tensor([0.7047, 0.0797, 0.0330, 0.1826]) -Greedy action tensor([ 1.1638, -0.5050, -0.4821, 0.3984]) tensor([0.5416, 0.1021, 0.1044, 0.2519]) -Greedy action tensor([ 2.3475, -0.7753, -0.5912, 0.9491]) tensor([0.7441, 0.0328, 0.0394, 0.1838]) -Greedy action tensor([ 1.6928, -0.6717, -0.3421, 0.2751]) tensor([0.6817, 0.0641, 0.0891, 0.1652]) -Greedy action tensor([ 1.5340, -0.8451, -0.1152, 0.2715]) tensor([0.6379, 0.0591, 0.1226, 0.1805]) -Greedy action tensor([ 1.0508, -0.4367, -0.0783, 0.1722]) tensor([0.5090, 0.1150, 0.1646, 0.2114]) -Greedy action tensor([ 1.6512, -0.7708, -0.7916, -0.2731]) tensor([0.7566, 0.0671, 0.0658, 0.1105]) -Greedy action tensor([ 2.0756, -0.2010, -0.6111, 0.3217]) tensor([0.7441, 0.0764, 0.0507, 0.1288]) -Greedy action tensor([ 2.3946, -0.4834, -0.2480, 0.8601]) tensor([0.7446, 0.0419, 0.0530, 0.1605]) -Greedy action tensor([ 0.9737, 0.0831, -0.9125, 0.3448]) tensor([0.4773, 0.1959, 0.0724, 0.2545]) -Greedy action tensor([ 1.3879, -0.0715, -0.4036, 0.4841]) tensor([0.5543, 0.1288, 0.0924, 0.2245]) -Greedy action tensor([ 1.4640, -0.5746, -0.2595, 0.3456]) tensor([0.6114, 0.0796, 0.1091, 0.1998]) -Greedy action tensor([ 1.2321, -0.2605, -0.7404, 0.4090]) tensor([0.5547, 0.1247, 0.0772, 0.2435]) -Greedy action tensor([ 1.8992, -0.3761, -0.0129, 0.7608]) tensor([0.6366, 0.0654, 0.0941, 0.2039]) -Greedy action tensor([ 2.1197, -1.3558, -0.2959, 0.5226]) tensor([0.7560, 0.0234, 0.0675, 0.1531]) -Greedy action tensor([ 1.3094, -0.0315, -0.7199, 0.0709]) tensor([0.5942, 0.1555, 0.0781, 0.1722]) -Greedy action tensor([ 0.9525, -0.5794, 0.0590, 0.0203]) tensor([0.4953, 0.1070, 0.2027, 0.1950]) -Greedy action tensor([ 1.8199, -0.2331, -0.7263, 0.3501]) tensor([0.6960, 0.0893, 0.0546, 0.1601]) -Greedy action tensor([ 1.2935, -1.0541, 0.0595, 0.2423]) tensor([0.5760, 0.0551, 0.1677, 0.2013]) -Greedy action tensor([ 2.1021, -0.1379, -0.3347, 0.4983]) tensor([0.7168, 0.0763, 0.0627, 0.1442]) -Greedy action tensor([ 1.3300, -0.2188, -0.8190, 0.1394]) tensor([0.6123, 0.1301, 0.0714, 0.1862]) -Greedy action tensor([ 1.7411, -0.7273, -0.4255, 0.3053]) tensor([0.6958, 0.0589, 0.0797, 0.1655]) -Greedy action tensor([ 1.6161, -0.6358, -0.1541, 0.3082]) tensor([0.6469, 0.0681, 0.1102, 0.1749]) -Greedy action tensor([ 1.4957, -0.6945, -0.8722, 0.1172]) tensor([0.6861, 0.0768, 0.0643, 0.1729]) -Greedy action tensor([ 1.4560, -0.5432, -0.2951, 0.0112]) tensor([0.6473, 0.0877, 0.1124, 0.1526]) -Greedy action tensor([ 1.3180, -0.1104, -0.6941, 0.3971]) tensor([0.5645, 0.1353, 0.0755, 0.2247]) -Greedy action tensor([ 1.5566, -0.4528, -0.6209, 0.3678]) tensor([0.6443, 0.0864, 0.0730, 0.1963]) -Greedy action tensor([ 2.1266, 0.3879, -0.4499, 0.0231]) tensor([0.7279, 0.1279, 0.0553, 0.0888]) -Greedy action tensor([ 1.4727, -0.7942, 0.0017, 0.1576]) tensor([0.6243, 0.0647, 0.1434, 0.1676]) -Greedy action tensor([ 1.5716, -0.4056, -0.6136, 0.9679]) tensor([0.5563, 0.0770, 0.0626, 0.3042]) -Greedy action tensor([ 1.2364, -0.3813, -0.6399, 0.0840]) tensor([0.5997, 0.1190, 0.0919, 0.1894]) -Greedy action tensor([ 1.8854, -0.2640, -0.9392, 0.1629]) tensor([0.7383, 0.0860, 0.0438, 0.1319]) -Greedy action tensor([ 1.3426, -0.1603, -1.0735, 0.2083]) tensor([0.6122, 0.1362, 0.0547, 0.1969]) -Greedy action tensor([ 1.4069, -0.5163, -0.7363, 0.0980]) tensor([0.6521, 0.0953, 0.0765, 0.1761]) -Greedy action tensor([ 1.4038, -0.5373, -0.5121, 0.7459]) tensor([0.5529, 0.0794, 0.0814, 0.2864]) -Greedy action tensor([ 1.5065, -0.4485, -0.4395, 0.7711]) tensor([0.5670, 0.0803, 0.0810, 0.2718]) -Greedy action tensor([ 1.3612, -0.3648, -0.3929, 0.2535]) tensor([0.5947, 0.1059, 0.1029, 0.1965]) -Greedy action tensor([ 1.3644, -0.5779, -0.3331, 0.3954]) tensor([0.5862, 0.0840, 0.1074, 0.2224]) -Greedy action tensor([ 1.5149, -0.6644, -0.9599, 0.3781]) tensor([0.6587, 0.0745, 0.0554, 0.2113]) -Greedy action tensor([ 2.0411, -0.1440, -0.5715, 0.2396]) tensor([0.7403, 0.0833, 0.0543, 0.1222]) -Greedy action tensor([ 1.1086, -0.5136, -0.1796, -0.1186]) tensor([0.5661, 0.1118, 0.1561, 0.1659]) -Greedy action tensor([ 2.0571, 0.3551, -0.0808, 0.5106]) tensor([0.6608, 0.1205, 0.0779, 0.1408]) -Greedy action tensor([ 1.3652, 0.0431, -0.1971, 0.0505]) tensor([0.5731, 0.1528, 0.1202, 0.1539]) -Greedy action tensor([ 1.1267, 0.1699, -0.7593, 0.1017]) tensor([0.5278, 0.2028, 0.0801, 0.1894]) -Greedy action tensor([ 1.2601, 0.1841, -0.0668, 0.1047]) tensor([0.5205, 0.1775, 0.1381, 0.1639]) -Greedy action tensor([ 1.0919, -0.3158, -0.6754, 0.0047]) tensor([0.5706, 0.1396, 0.0974, 0.1924]) -Greedy action tensor([2.1855, 0.3019, 0.0109, 0.2304]) tensor([0.7106, 0.1080, 0.0808, 0.1006]) -Greedy action tensor([ 1.5869, -0.2642, -0.9203, 0.0324]) tensor([0.6897, 0.1083, 0.0562, 0.1457]) -Greedy action tensor([ 1.4109, -0.7929, -0.4449, 0.8115]) tensor([0.5507, 0.0608, 0.0861, 0.3024]) -Greedy action tensor([ 2.0441, -0.9663, -0.3562, 0.5792]) tensor([0.7294, 0.0359, 0.0661, 0.1686]) -Greedy action tensor([ 1.4755, -0.3875, -0.3906, 0.3660]) tensor([0.6099, 0.0947, 0.0944, 0.2011]) -Greedy action tensor([ 1.5969, 0.2557, -0.3474, -0.0274]) tensor([0.6244, 0.1633, 0.0893, 0.1230]) -Greedy action tensor([ 1.4782, -0.5422, -1.0118, 0.2521]) tensor([0.6627, 0.0879, 0.0549, 0.1945]) -Greedy action tensor([ 2.0435, -0.4445, -0.5776, 0.0756]) tensor([0.7719, 0.0641, 0.0561, 0.1079]) -Greedy action tensor([-0.4682, -0.8471, 1.2395, -1.1475]) tensor([0.1297, 0.0888, 0.7157, 0.0658]) -Greedy action tensor([0.8362, 0.1746, 1.2838, 0.5220]) tensor([0.2624, 0.1354, 0.4105, 0.1917]) -Greedy action tensor([-0.3467, -1.3698, 0.0697, -1.1888]) tensor([0.3024, 0.1087, 0.4586, 0.1303]) -Greedy action tensor([-0.1768, -0.3064, -0.4814, -0.6742]) tensor([0.3102, 0.2725, 0.2287, 0.1886]) -Greedy action tensor([ 0.1997, -0.5413, -0.1770, -0.3851]) tensor([0.3676, 0.1752, 0.2523, 0.2049]) -Greedy action tensor([-1.8053, -0.4698, 0.0791, -1.4507]) tensor([0.0781, 0.2968, 0.5138, 0.1113]) -Greedy action tensor([ 1.0553, -0.6564, 0.4047, 0.4454]) tensor([0.4453, 0.0804, 0.2323, 0.2420]) -Greedy action tensor([-0.6744, -0.6213, 1.4808, -0.8569]) tensor([0.0868, 0.0916, 0.7493, 0.0723]) -Greedy action tensor([1.2031, 0.3067, 0.1209, 0.7212]) tensor([0.4229, 0.1726, 0.1433, 0.2612]) -Greedy action tensor([-1.0745, -1.3474, -0.6091, 0.8378]) tensor([0.0988, 0.0752, 0.1573, 0.6687]) -Greedy action tensor([ 0.7296, -0.7044, 1.2145, 0.1325]) tensor([0.2930, 0.0698, 0.4759, 0.1613]) -Greedy action tensor([ 0.1873, 0.3838, 0.1623, -0.4894]) tensor([0.2702, 0.3289, 0.2635, 0.1373]) -Greedy action tensor([-0.2163, -0.2774, 0.8972, -0.9337]) tensor([0.1827, 0.1719, 0.5563, 0.0892]) -Greedy action tensor([ 0.7519, -1.3248, 0.3364, 0.4753]) tensor([0.3931, 0.0493, 0.2595, 0.2981]) -Greedy action tensor([-0.6924, 0.2779, -1.1255, 0.3799]) tensor([0.1387, 0.3660, 0.0900, 0.4053]) -Greedy action tensor([ 0.0712, 0.1776, 0.3785, -0.6538]) tensor([0.2528, 0.2811, 0.3437, 0.1224]) -Greedy action tensor([ 0.1260, -0.9419, -0.2599, -0.6978]) tensor([0.4061, 0.1396, 0.2761, 0.1782]) -Greedy action tensor([ 0.2792, -3.0069, 0.2387, 0.2216]) tensor([0.3399, 0.0127, 0.3264, 0.3209]) -Greedy action tensor([ 0.8663, -1.2139, 1.3628, 0.3677]) tensor([0.2963, 0.0370, 0.4868, 0.1800]) -Greedy action tensor([ 0.3929, -0.1938, -0.0827, -0.0025]) tensor([0.3507, 0.1951, 0.2180, 0.2362]) -Greedy action tensor([ 0.5864, -0.9715, -0.1453, -0.1292]) tensor([0.4586, 0.0966, 0.2206, 0.2242]) -Greedy action tensor([-0.1662, -0.2330, -0.7492, -0.2346]) tensor([0.2918, 0.2729, 0.1629, 0.2725]) -Greedy action tensor([ 0.9218, -0.5002, -0.0045, 0.0564]) tensor([0.4859, 0.1172, 0.1924, 0.2045]) -Greedy action tensor([ 0.0298, -2.0279, 0.2900, -0.1226]) tensor([0.3045, 0.0389, 0.3951, 0.2615]) -Greedy action tensor([ 1.0245, -0.2893, 0.0426, 0.6277]) tensor([0.4318, 0.1161, 0.1618, 0.2904]) -Greedy action tensor([-0.1872, 0.2508, 0.4667, -0.9530]) tensor([0.2025, 0.3138, 0.3895, 0.0942]) -Greedy action tensor([ 2.0684, -1.2514, -0.0825, 0.2586]) tensor([0.7598, 0.0275, 0.0884, 0.1244]) -Greedy action tensor([0.9831, 0.2245, 0.8768, 0.7308]) tensor([0.3180, 0.1489, 0.2859, 0.2471]) -Greedy action tensor([ 0.2433, 0.6642, -0.4419, -0.1916]) tensor([0.2721, 0.4146, 0.1372, 0.1762]) -Greedy action tensor([-0.2978, -0.7270, 0.3624, 0.0980]) tensor([0.1972, 0.1284, 0.3816, 0.2929]) -Greedy action tensor([ 0.2435, 0.5080, 1.2734, -0.6831]) tensor([0.1818, 0.2369, 0.5093, 0.0720]) -Greedy action tensor([0.8143, 0.8326, 0.1579, 0.1182]) tensor([0.3294, 0.3355, 0.1709, 0.1642]) -Greedy action tensor([ 0.6927, -1.2160, -0.0317, 0.2103]) tensor([0.4444, 0.0659, 0.2154, 0.2743]) -Greedy action tensor([-0.6716, -1.7169, -0.5614, -0.8305]) tensor([0.3011, 0.1059, 0.3362, 0.2569]) -Greedy action tensor([ 0.2264, -0.3276, 1.5009, 0.1111]) tensor([0.1655, 0.0951, 0.5920, 0.1475]) -Greedy action tensor([-0.5397, -0.2981, 0.0478, -0.1021]) tensor([0.1779, 0.2265, 0.3201, 0.2755]) -Greedy action tensor([ 0.0178, -1.9459, -0.2718, 0.5351]) tensor([0.2804, 0.0394, 0.2099, 0.4704]) -Greedy action tensor([-2.0760, 0.2968, -0.3869, -0.5867]) tensor([0.0463, 0.4972, 0.2509, 0.2055]) -Greedy action tensor([ 1.2436, -1.2470, 0.5568, -0.3381]) tensor([0.5582, 0.0462, 0.2808, 0.1148]) -Greedy action tensor([ 1.0713, -1.3169, 1.1051, 0.7747]) tensor([0.3485, 0.0320, 0.3605, 0.2591]) -Greedy action tensor([-0.2377, 0.0627, 0.7068, -0.8495]) tensor([0.1830, 0.2471, 0.4706, 0.0993]) -Greedy action tensor([ 1.0460, 0.5680, 0.6906, -0.6030]) tensor([0.3979, 0.2467, 0.2789, 0.0765]) -Greedy action tensor([-0.7061, -1.4605, -0.9092, 0.5464]) tensor([0.1728, 0.0813, 0.1411, 0.6048]) -Greedy action tensor([ 0.3688, -0.6303, -0.4278, -0.3314]) tensor([0.4319, 0.1590, 0.1947, 0.2144]) -Greedy action tensor([ 0.7850, -0.9912, 2.0021, 1.0504]) tensor([0.1709, 0.0289, 0.5773, 0.2229]) -Greedy action tensor([-0.3033, -0.7282, 0.7561, 0.4264]) tensor([0.1512, 0.0989, 0.4362, 0.3137]) -Greedy action tensor([-0.1581, -0.7436, 0.1381, -0.5501]) tensor([0.2795, 0.1557, 0.3759, 0.1889]) -Greedy action tensor([0.1325, 0.2873, 0.3203, 0.0319]) tensor([0.2337, 0.2729, 0.2820, 0.2114]) -Greedy action tensor([-2.1565, -0.7347, 0.2916, -1.1679]) tensor([0.0516, 0.2137, 0.5962, 0.1385]) -Greedy action tensor([ 1.4489, -0.5479, -0.2990, 0.9953]) tensor([0.5141, 0.0698, 0.0895, 0.3266]) -Greedy action tensor([ 0.8292, 0.0589, -0.7661, 0.9450]) tensor([0.3586, 0.1660, 0.0727, 0.4027]) -Greedy action tensor([ 0.5658, 0.0400, -0.0181, -0.4945]) tensor([0.4008, 0.2369, 0.2235, 0.1388]) -Greedy action tensor([ 0.3809, -1.1879, -0.4703, 0.9009]) tensor([0.3015, 0.0628, 0.1287, 0.5071]) -Greedy action tensor([-0.1272, -0.3615, -0.8127, -0.0848]) tensor([0.2996, 0.2370, 0.1509, 0.3125]) -Greedy action tensor([ 0.6884, -0.4175, 1.0455, 0.1154]) tensor([0.3008, 0.0996, 0.4300, 0.1696]) -Greedy action tensor([-0.0328, -0.3165, -0.1838, -0.4416]) tensor([0.3051, 0.2298, 0.2624, 0.2027]) -Greedy action tensor([-0.1299, -1.1763, -0.0201, -0.4791]) tensor([0.3152, 0.1107, 0.3518, 0.2223]) -Greedy action tensor([ 0.1740, -0.8842, 0.3886, -0.5574]) tensor([0.3260, 0.1131, 0.4040, 0.1569]) -Greedy action tensor([-0.8506, -0.3998, 0.4940, -1.0640]) tensor([0.1386, 0.2176, 0.5318, 0.1120]) -Greedy action tensor([ 0.0737, -0.6101, 0.0506, 1.0699]) tensor([0.1927, 0.0972, 0.1883, 0.5218]) -Greedy action tensor([-0.9142, -0.1483, 0.6867, -0.2045]) tensor([0.0986, 0.2121, 0.4888, 0.2005]) -Greedy action tensor([ 0.0721, 0.6904, 0.2239, -0.1927]) tensor([0.2089, 0.3877, 0.2431, 0.1603]) -Greedy action tensor([ 1.0601, -1.2750, -0.2872, 0.6016]) tensor([0.5028, 0.0487, 0.1307, 0.3179]) -Greedy action tensor([-0.4220, -0.6895, -0.4278, -0.1740]) tensor([0.2475, 0.1894, 0.2460, 0.3171]) -Greedy action tensor([ 0.3333, -0.2657, 0.1464, -0.1250]) tensor([0.3321, 0.1824, 0.2755, 0.2100]) -Greedy action tensor([-1.3197, -1.2713, 0.5458, -0.7782]) tensor([0.0978, 0.1026, 0.6315, 0.1680]) -Greedy action tensor([ 0.7944, -1.0281, -0.6004, -0.0962]) tensor([0.5495, 0.0888, 0.1362, 0.2255]) -Greedy action tensor([ 1.0769, -1.4120, -0.6600, -0.6325]) tensor([0.6944, 0.0576, 0.1223, 0.1257]) -Greedy action tensor([ 0.4515, -1.3984, 0.0931, -0.2529]) tensor([0.4255, 0.0669, 0.2973, 0.2103]) -Greedy action tensor([-0.3659, -1.2477, 0.6002, 0.5964]) tensor([0.1502, 0.0622, 0.3946, 0.3931]) -Greedy action tensor([0.3799, 0.0148, 0.1026, 0.0948]) tensor([0.3121, 0.2166, 0.2365, 0.2347]) -Greedy action tensor([-0.9332, -0.7174, -0.9776, 0.7028]) tensor([0.1200, 0.1489, 0.1148, 0.6163]) -Greedy action tensor([-0.3216, -0.3234, -0.6825, -0.5492]) tensor([0.2864, 0.2859, 0.1996, 0.2281]) -Greedy action tensor([-0.2840, 0.8671, 0.2700, -0.2797]) tensor([0.1448, 0.4578, 0.2520, 0.1454]) -Greedy action tensor([ 1.0216, -0.1488, -0.0207, 0.4875]) tensor([0.4446, 0.1379, 0.1568, 0.2606]) -Greedy action tensor([-0.1981, 0.3712, -0.6277, -0.6777]) tensor([0.2477, 0.4377, 0.1612, 0.1533]) -Greedy action tensor([-0.9240, 0.5962, 0.2867, -0.6822]) tensor([0.0980, 0.4483, 0.3289, 0.1248]) -Greedy action tensor([-1.4125, -0.5739, -0.5846, 0.6081]) tensor([0.0761, 0.1760, 0.1741, 0.5738]) -Greedy action tensor([ 0.0142, -0.5594, -0.4223, -0.7494]) tensor([0.3737, 0.2106, 0.2415, 0.1741]) -Greedy action tensor([-1.1231, 0.2414, -0.7102, -0.1609]) tensor([0.1106, 0.4328, 0.1671, 0.2895]) -Greedy action tensor([ 1.2831, -0.6563, -0.5386, 0.0084]) tensor([0.6309, 0.0907, 0.1020, 0.1763]) -Greedy action tensor([-0.8442, -0.3629, 1.3565, -1.0523]) tensor([0.0802, 0.1299, 0.7247, 0.0652]) -Greedy action tensor([-1.3590, -0.4008, 0.3538, 0.1332]) tensor([0.0735, 0.1917, 0.4077, 0.3270]) -Greedy action tensor([-1.9322, -0.4052, 0.6497, -0.1731]) tensor([0.0406, 0.1869, 0.5367, 0.2357]) -Greedy action tensor([-1.4271, 0.0079, 0.3724, -0.0489]) tensor([0.0657, 0.2760, 0.3974, 0.2608]) -Greedy action tensor([-1.5375, -0.2479, 0.5553, 0.0978]) tensor([0.0560, 0.2032, 0.4537, 0.2871]) -Greedy action tensor([-1.9287, -0.4357, 0.6601, -0.1708]) tensor([0.0407, 0.1812, 0.5420, 0.2361]) -Greedy action tensor([-1.5242, -0.2914, 0.3764, 0.0696]) tensor([0.0623, 0.2138, 0.4170, 0.3068]) -Greedy action tensor([-1.7074, -0.3392, 0.5591, -0.0254]) tensor([0.0501, 0.1969, 0.4835, 0.2695]) -Greedy action tensor([-1.4800, 0.6181, 0.3541, -0.0920]) tensor([0.0515, 0.4198, 0.3224, 0.2064]) -Greedy action tensor([-1.2690, -0.5211, 0.4519, 0.3139]) tensor([0.0737, 0.1557, 0.4119, 0.3588]) -Greedy action tensor([-1.9042, -0.4055, 0.6491, -0.1498]) tensor([0.0415, 0.1857, 0.5331, 0.2398]) -Greedy action tensor([-1.7582, -0.2935, 0.5141, -0.0526]) tensor([0.0487, 0.2107, 0.4725, 0.2681]) -Greedy action tensor([-1.4475, 0.1348, 0.3879, -0.0749]) tensor([0.0622, 0.3026, 0.3898, 0.2454]) -Greedy action tensor([-1.9311, -0.3170, 0.6408, -0.1805]) tensor([0.0402, 0.2020, 0.5263, 0.2315]) -Greedy action tensor([-1.7757, -0.4439, 0.6713, 0.1495]) tensor([0.0431, 0.1633, 0.4981, 0.2956]) -Greedy action tensor([-1.9101, -0.4327, 0.6491, -0.1609]) tensor([0.0416, 0.1821, 0.5373, 0.2390]) -Greedy action tensor([-1.1079, -0.3329, 0.3287, 0.4016]) tensor([0.0840, 0.1824, 0.3534, 0.3802]) -Greedy action tensor([-1.8016, -0.4030, 0.5949, -0.0960]) tensor([0.0464, 0.1880, 0.5100, 0.2556]) -Greedy action tensor([-1.6387, -0.4601, 0.5098, -0.0262]) tensor([0.0561, 0.1822, 0.4806, 0.2812]) -Greedy action tensor([-1.7623, -0.3842, 0.5733, -0.1263]) tensor([0.0489, 0.1941, 0.5057, 0.2512]) -Greedy action tensor([-1.7594, -0.4576, 0.6036, 0.0188]) tensor([0.0471, 0.1732, 0.5007, 0.2790]) -Greedy action tensor([-0.7815, -0.5598, 0.3046, 0.3696]) tensor([0.1194, 0.1491, 0.3539, 0.3776]) -Greedy action tensor([-1.7602, -0.4472, 0.6118, -0.0362]) tensor([0.0475, 0.1767, 0.5094, 0.2664]) -Greedy action tensor([-1.7850, 0.2870, 0.4754, -0.0532]) tensor([0.0414, 0.3284, 0.3965, 0.2337]) -Greedy action tensor([-1.5873, -0.4457, 0.4629, 0.0093]) tensor([0.0594, 0.1860, 0.4614, 0.2932]) -Greedy action tensor([-1.2469, 0.5168, 0.3560, 0.6219]) tensor([0.0547, 0.3191, 0.2717, 0.3545]) -Greedy action tensor([-1.9033, -0.4516, 0.6822, -0.1601]) tensor([0.0412, 0.1761, 0.5471, 0.2356]) -Greedy action tensor([-1.7199, -0.5596, 0.6773, -0.0823]) tensor([0.0492, 0.1570, 0.5408, 0.2530]) -Greedy action tensor([-1.8492, -0.4507, 0.6181, -0.1392]) tensor([0.0447, 0.1810, 0.5271, 0.2472]) -Greedy action tensor([ 0.3496, 1.1824, -0.1790, 0.3207]) tensor([0.2057, 0.4731, 0.1213, 0.1999]) -Greedy action tensor([-1.7219, 0.2269, 0.6328, -0.4311]) tensor([0.0451, 0.3164, 0.4747, 0.1638]) -Greedy action tensor([-0.9180, -0.5461, 0.2499, 0.0879]) tensor([0.1191, 0.1727, 0.3828, 0.3255]) -Greedy action tensor([-1.2994, 0.7919, 0.3129, -0.1791]) tensor([0.0582, 0.4713, 0.2919, 0.1785]) -Greedy action tensor([-1.8342, -0.4198, 0.6101, -0.1254]) tensor([0.0451, 0.1857, 0.5200, 0.2492]) -Greedy action tensor([-1.5802, -0.5549, 1.1706, 0.8494]) tensor([0.0325, 0.0905, 0.5083, 0.3687]) -Greedy action tensor([-1.8578, -0.3456, 0.6321, -0.1272]) tensor([0.0430, 0.1952, 0.5189, 0.2428]) -Greedy action tensor([-1.7847, -0.4113, 0.6672, 0.0516]) tensor([0.0438, 0.1729, 0.5085, 0.2748]) -Greedy action tensor([ 3.3574e-01, 1.1825e+00, -3.9381e-04, 5.2752e-01]) tensor([0.1902, 0.4435, 0.1359, 0.2304]) -Greedy action tensor([-1.6040, -0.2305, 0.4701, -0.0638]) tensor([0.0569, 0.2247, 0.4528, 0.2655]) -Greedy action tensor([-1.8529, -0.4727, 0.6154, -0.1218]) tensor([0.0446, 0.1773, 0.5263, 0.2518]) -Greedy action tensor([-1.8345, -0.4178, 0.6145, -0.1128]) tensor([0.0449, 0.1850, 0.5193, 0.2509]) -Greedy action tensor([-1.9101, -0.4683, 0.7237, -0.1266]) tensor([0.0398, 0.1684, 0.5547, 0.2370]) -Greedy action tensor([-1.6184, 0.5377, 0.3908, -0.0452]) tensor([0.0456, 0.3941, 0.3403, 0.2200]) -Greedy action tensor([-1.8377, -0.3061, 0.6426, -0.1060]) tensor([0.0431, 0.1992, 0.5144, 0.2433]) -Greedy action tensor([-1.8771, -0.3579, 0.6141, -0.1368]) tensor([0.0428, 0.1957, 0.5173, 0.2441]) -Greedy action tensor([-1.9478, -0.4538, 0.6707, -0.1824]) tensor([0.0400, 0.1781, 0.5483, 0.2336]) -Greedy action tensor([-1.9008, -0.4235, 0.6410, -0.1601]) tensor([0.0420, 0.1842, 0.5341, 0.2397]) -Greedy action tensor([1.1990, 1.3511, 0.0318, 0.6302]) tensor([0.3288, 0.3828, 0.1023, 0.1861]) -Greedy action tensor([-1.9042, -0.3524, 0.6359, -0.1459]) tensor([0.0413, 0.1950, 0.5239, 0.2397]) -Greedy action tensor([-1.6112, -0.1776, 0.6020, 0.0378]) tensor([0.0512, 0.2146, 0.4680, 0.2662]) -Greedy action tensor([-1.8913, -0.4428, 0.6461, -0.1500]) tensor([0.0424, 0.1803, 0.5357, 0.2416]) -Greedy action tensor([-1.6650, -0.3379, 0.6806, 0.1276]) tensor([0.0471, 0.1777, 0.4921, 0.2831]) -Greedy action tensor([-1.4746, 0.0956, 0.5676, 0.1957]) tensor([0.0531, 0.2553, 0.4093, 0.2822]) -Greedy action tensor([-1.9283, -0.4260, 0.6578, -0.1713]) tensor([0.0407, 0.1829, 0.5405, 0.2359]) -Greedy action tensor([-1.9168, -0.3414, 0.6321, -0.1583]) tensor([0.0409, 0.1978, 0.5237, 0.2376]) -Greedy action tensor([-1.5805, -0.0784, 0.5271, 0.1334]) tensor([0.0519, 0.2331, 0.4270, 0.2880]) -Greedy action tensor([-1.9132, -0.3847, 0.6482, -0.1628]) tensor([0.0411, 0.1896, 0.5326, 0.2367]) -Greedy action tensor([-1.7504, -0.4088, 0.5696, -0.0842]) tensor([0.0493, 0.1885, 0.5015, 0.2608]) -Greedy action tensor([-1.9125, -0.4253, 0.6636, -0.1459]) tensor([0.0409, 0.1812, 0.5383, 0.2396]) -Greedy action tensor([-1.9136, -0.3966, 0.6514, -0.1615]) tensor([0.0411, 0.1874, 0.5344, 0.2371]) -Greedy action tensor([-1.8562, -0.4566, 0.6983, -0.0987]) tensor([0.0422, 0.1709, 0.5425, 0.2445]) -Greedy action tensor([-1.8650, -0.4586, 0.6310, -0.1446]) tensor([0.0439, 0.1790, 0.5322, 0.2450]) -Greedy action tensor([-1.4780, 0.0644, 0.5036, 0.1203]) tensor([0.0559, 0.2616, 0.4058, 0.2766]) -Greedy action tensor([-1.9347, -0.4376, 0.6627, -0.1749]) tensor([0.0405, 0.1809, 0.5435, 0.2352]) -Greedy action tensor([-0.5804, 0.8736, 0.0740, 0.1240]) tensor([0.1084, 0.4639, 0.2085, 0.2192]) -Greedy action tensor([-1.4604, 0.3167, 0.3640, -0.0691]) tensor([0.0584, 0.3451, 0.3619, 0.2347]) -Greedy action tensor([-1.9347, -0.4494, 0.6628, -0.1752]) tensor([0.0406, 0.1791, 0.5447, 0.2356]) -Greedy action tensor([-1.2637, 0.3849, 0.3532, -0.1619]) tensor([0.0702, 0.3650, 0.3536, 0.2112]) -Greedy action tensor([-1.3451, -0.5392, 0.3775, 0.1504]) tensor([0.0752, 0.1683, 0.4210, 0.3355]) -Greedy action tensor([-1.1124, 0.7672, 0.1514, 0.1752]) tensor([0.0680, 0.4452, 0.2405, 0.2463]) -Greedy action tensor([-1.7908, -0.4214, 0.5874, -0.1277]) tensor([0.0476, 0.1873, 0.5137, 0.2513]) -Greedy action tensor([-1.8257, -0.3817, 0.5840, -0.1086]) tensor([0.0456, 0.1932, 0.5074, 0.2538]) -Greedy action tensor([-1.9272, -0.4478, 0.6640, -0.1654]) tensor([0.0407, 0.1788, 0.5434, 0.2371]) -Greedy action tensor([-1.0482, 0.8484, 0.1273, 0.2009]) tensor([0.0695, 0.4630, 0.2251, 0.2423]) -Greedy action tensor([-1.7988, -0.3858, 0.5858, -0.1247]) tensor([0.0470, 0.1929, 0.5097, 0.2505]) -Greedy action tensor([-1.9286, -0.4424, 0.6598, -0.1724]) tensor([0.0408, 0.1803, 0.5428, 0.2362]) -Greedy action tensor([-1.7977, -0.3618, 0.6284, -0.1017]) tensor([0.0455, 0.1913, 0.5150, 0.2481]) -Greedy action tensor([-1.8389, -0.3874, 0.6069, -0.1281]) tensor([0.0448, 0.1911, 0.5165, 0.2477]) -Greedy action tensor([-0.9707, -0.2250, 0.3165, -0.1305]) tensor([0.1105, 0.2330, 0.4004, 0.2561]) -Greedy action tensor([-1.8960, -0.3540, 0.6390, -0.1537]) tensor([0.0417, 0.1947, 0.5257, 0.2379]) -Greedy action tensor([-1.9264, -0.3691, 0.6431, -0.1698]) tensor([0.0407, 0.1929, 0.5309, 0.2355]) -Greedy action tensor([-1.1668, 0.5899, 0.1489, 0.0954]) tensor([0.0712, 0.4122, 0.2652, 0.2514]) -Greedy action tensor([ 0.5449, -0.2791, -0.0324, -0.3412]) tensor([0.4145, 0.1818, 0.2327, 0.1709]) -Greedy action tensor([ 0.5901, -0.2484, -0.0133, -0.4798]) tensor([0.4306, 0.1862, 0.2355, 0.1477]) -Greedy action tensor([ 0.4707, -0.3028, -0.0584, -0.1052]) tensor([0.3827, 0.1766, 0.2255, 0.2152]) -Greedy action tensor([ 0.7032, -0.5911, 0.0716, -0.4715]) tensor([0.4729, 0.1296, 0.2514, 0.1461]) -Greedy action tensor([ 0.9690, -0.9901, -0.0335, -0.4277]) tensor([0.5697, 0.0803, 0.2091, 0.1409]) -Greedy action tensor([ 1.2670, -0.8605, 0.0042, -0.9314]) tensor([0.6610, 0.0787, 0.1870, 0.0734]) -Greedy action tensor([ 0.8308, -0.2224, -0.0428, -0.0715]) tensor([0.4604, 0.1606, 0.1922, 0.1868]) -Greedy action tensor([ 0.8244, -0.5735, 0.0507, -0.4293]) tensor([0.5015, 0.1239, 0.2314, 0.1432]) -Greedy action tensor([ 0.9825, -0.5567, 0.0236, -0.4329]) tensor([0.5433, 0.1166, 0.2082, 0.1319]) -Greedy action tensor([ 0.7846, -0.5025, -0.0548, -0.4162]) tensor([0.4978, 0.1374, 0.2150, 0.1498]) -Greedy action tensor([ 0.5045, -0.5341, 0.0380, -0.1559]) tensor([0.4003, 0.1417, 0.2511, 0.2068]) -Greedy action tensor([ 0.3380, -0.0834, 0.0072, -0.0188]) tensor([0.3253, 0.2134, 0.2337, 0.2277]) -Greedy action tensor([ 1.2468, -0.6403, -0.1465, -0.5277]) tensor([0.6372, 0.0966, 0.1582, 0.1081]) -Greedy action tensor([ 0.8796, -0.3492, -0.0936, -0.1510]) tensor([0.4933, 0.1443, 0.1864, 0.1760]) -Greedy action tensor([ 1.0351, -0.5952, 0.0545, -0.1975]) tensor([0.5369, 0.1052, 0.2014, 0.1565]) -Greedy action tensor([ 0.8622, -0.5359, 0.0080, -0.4606]) tensor([0.5157, 0.1274, 0.2195, 0.1374]) -Greedy action tensor([ 1.0446, -0.3153, 0.1323, -0.4724]) tensor([0.5326, 0.1367, 0.2139, 0.1168]) -Greedy action tensor([ 1.1830, -1.0655, 0.2169, -0.4507]) tensor([0.5948, 0.0628, 0.2263, 0.1161]) -Greedy action tensor([ 0.8422, -0.5269, 0.0604, -0.2029]) tensor([0.4846, 0.1233, 0.2217, 0.1704]) -Greedy action tensor([ 0.7034, -0.4410, 0.1455, -0.5062]) tensor([0.4568, 0.1455, 0.2615, 0.1363]) -Greedy action tensor([ 0.8580, -0.5958, -0.1358, -0.7623]) tensor([0.5550, 0.1297, 0.2055, 0.1098]) -Greedy action tensor([ 1.1041, -0.3323, -0.1844, -0.1761]) tensor([0.5582, 0.1327, 0.1539, 0.1552]) -Greedy action tensor([ 0.3626, -0.2161, 0.0106, -0.5761]) tensor([0.3766, 0.2112, 0.2649, 0.1473]) -Greedy action tensor([ 0.6540, -0.5682, -0.1988, -0.1936]) tensor([0.4653, 0.1371, 0.1983, 0.1993]) -Greedy action tensor([ 0.9610, -0.3093, -0.0952, -0.3318]) tensor([0.5255, 0.1475, 0.1827, 0.1442]) -Greedy action tensor([ 0.6617, 0.1808, -0.1290, 0.1279]) tensor([0.3762, 0.2326, 0.1706, 0.2206]) -Greedy action tensor([ 0.6276, -0.2721, 0.0521, -0.3644]) tensor([0.4274, 0.1738, 0.2404, 0.1585]) -Greedy action tensor([ 0.8215, -0.6637, -0.0485, -0.3683]) tensor([0.5129, 0.1162, 0.2149, 0.1561]) -Greedy action tensor([ 0.8940, -0.6677, -0.0683, -0.3628]) tensor([0.5330, 0.1118, 0.2036, 0.1517]) -Greedy action tensor([ 0.6788, -0.2702, 0.0703, -0.1246]) tensor([0.4203, 0.1627, 0.2287, 0.1882]) -Greedy action tensor([ 0.8525, -0.0830, 0.0016, -0.0125]) tensor([0.4463, 0.1751, 0.1906, 0.1879]) -Greedy action tensor([ 0.4125, -0.2343, 0.0940, -0.3309]) tensor([0.3668, 0.1921, 0.2667, 0.1744]) -Greedy action tensor([ 0.7482, -0.3710, -0.2593, -0.4466]) tensor([0.5014, 0.1637, 0.1831, 0.1518]) -Greedy action tensor([0.1711, 0.2456, 0.0819, 0.1548]) tensor([0.2515, 0.2710, 0.2301, 0.2475]) -Greedy action tensor([ 0.7608, -0.6502, -0.0573, -0.3132]) tensor([0.4934, 0.1203, 0.2177, 0.1686]) -Greedy action tensor([ 0.5615, -0.1195, -0.1188, -0.1450]) tensor([0.3991, 0.2020, 0.2021, 0.1969]) -Greedy action tensor([ 0.6831, -0.3232, -0.0517, -0.3311]) tensor([0.4529, 0.1656, 0.2172, 0.1643]) -Greedy action tensor([ 0.9299, -1.0263, 0.0882, -0.5654]) tensor([0.5566, 0.0787, 0.2399, 0.1248]) -Greedy action tensor([ 0.5714, -0.5487, -0.0218, -0.3269]) tensor([0.4374, 0.1427, 0.2417, 0.1781]) -Greedy action tensor([ 0.7375, -0.5945, -0.0726, -0.3487]) tensor([0.4887, 0.1290, 0.2174, 0.1649]) -Greedy action tensor([ 0.5680, -0.3785, -0.0747, -0.1609]) tensor([0.4173, 0.1620, 0.2194, 0.2013]) -Greedy action tensor([ 0.8614, -0.5046, -0.0292, -0.4130]) tensor([0.5141, 0.1312, 0.2110, 0.1437]) -Greedy action tensor([ 0.3772, 0.0168, -0.1480, -0.1660]) tensor([0.3485, 0.2430, 0.2061, 0.2024]) -Greedy action tensor([ 0.8730, -0.4178, 0.1657, -0.0796]) tensor([0.4643, 0.1277, 0.2289, 0.1791]) -Greedy action tensor([ 0.8483, -0.2488, 0.0551, -0.0130]) tensor([0.4527, 0.1511, 0.2048, 0.1913]) -Greedy action tensor([ 0.6078, 0.1393, -0.1239, 0.0298]) tensor([0.3748, 0.2346, 0.1803, 0.2103]) -Greedy action tensor([ 0.4403, -0.5283, -0.1534, -0.1412]) tensor([0.4014, 0.1524, 0.2217, 0.2244]) -Greedy action tensor([ 0.9871, -0.1703, 0.0983, -0.7144]) tensor([0.5242, 0.1647, 0.2155, 0.0956]) -Greedy action tensor([ 0.6512, -0.3705, 0.0249, -0.3445]) tensor([0.4417, 0.1590, 0.2361, 0.1632]) -Greedy action tensor([ 0.5577, -0.1930, 0.1210, -0.2268]) tensor([0.3884, 0.1833, 0.2510, 0.1773]) -Greedy action tensor([ 0.5076, -0.4249, -0.1082, -0.3000]) tensor([0.4202, 0.1654, 0.2270, 0.1874]) -Greedy action tensor([ 1.0207, -0.5800, 0.0453, -0.4559]) tensor([0.5534, 0.1116, 0.2086, 0.1264]) -Greedy action tensor([ 0.8332, -0.7750, 0.2509, -0.7686]) tensor([0.5101, 0.1021, 0.2850, 0.1028]) -Greedy action tensor([ 0.7996, -0.4552, -0.1137, -0.1560]) tensor([0.4829, 0.1377, 0.1937, 0.1857]) -Greedy action tensor([ 0.7095, -0.3740, -0.0455, -0.1732]) tensor([0.4500, 0.1523, 0.2115, 0.1862]) -Greedy action tensor([ 1.0353, -0.2439, 0.1048, -0.4835]) tensor([0.5287, 0.1471, 0.2085, 0.1158]) -Greedy action tensor([ 0.6311, -0.2775, -0.0844, -0.4147]) tensor([0.4457, 0.1797, 0.2179, 0.1566]) -Greedy action tensor([ 0.6385, -0.1007, -0.0276, -0.0234]) tensor([0.3989, 0.1905, 0.2049, 0.2058]) -Greedy action tensor([ 0.9304, -0.6555, -0.1306, -0.3694]) tensor([0.5484, 0.1123, 0.1898, 0.1495]) -Greedy action tensor([ 0.8256, -0.0273, -0.2077, -0.1777]) tensor([0.4654, 0.1983, 0.1656, 0.1706]) -Greedy action tensor([ 0.5936, -0.5914, -0.0012, -0.6023]) tensor([0.4630, 0.1416, 0.2554, 0.1400]) -Greedy action tensor([ 0.5048, 0.0672, -0.0713, -0.2898]) tensor([0.3760, 0.2428, 0.2114, 0.1699]) -Greedy action tensor([ 0.8922, -0.3620, 0.0343, -0.0710]) tensor([0.4782, 0.1364, 0.2028, 0.1825]) -Greedy action tensor([ 6.7109e-01, -5.1554e-01, -5.9557e-04, -3.3240e-01]) tensor([0.4582, 0.1398, 0.2340, 0.1680]) -Greedy action tensor([ 0.6852, -0.5314, -0.0381, -0.2204]) tensor([0.4575, 0.1355, 0.2220, 0.1850]) -Greedy action tensor([ 0.8975, -0.6372, 0.0031, -0.6229]) tensor([0.5426, 0.1169, 0.2218, 0.1186]) -Greedy action tensor([ 0.9501, -0.6851, -0.0122, -0.4599]) tensor([0.5491, 0.1070, 0.2098, 0.1341]) -Greedy action tensor([ 0.8234, -0.4428, -0.0471, -0.3574]) tensor([0.4981, 0.1404, 0.2086, 0.1529]) -Greedy action tensor([ 1.0429, -0.7251, 0.2577, -0.6249]) tensor([0.5509, 0.0940, 0.2512, 0.1039]) -Greedy action tensor([ 0.8593, -0.1570, -0.0299, -0.0212]) tensor([0.4572, 0.1654, 0.1879, 0.1895]) -Greedy action tensor([ 0.7365, -0.4519, -0.0621, -0.4529]) tensor([0.4857, 0.1480, 0.2185, 0.1478]) -Greedy action tensor([ 0.6857, -0.4116, -0.1513, -0.1326]) tensor([0.4529, 0.1512, 0.1961, 0.1998]) -Greedy action tensor([ 0.6134, -0.2203, -0.0336, -0.1138]) tensor([0.4096, 0.1780, 0.2145, 0.1980]) -Greedy action tensor([ 0.6464, -0.5111, -0.1510, -0.1559]) tensor([0.4519, 0.1420, 0.2036, 0.2026]) -Greedy action tensor([ 0.7745, -0.4562, -0.0978, -0.6078]) tensor([0.5099, 0.1489, 0.2131, 0.1280]) -Greedy action tensor([ 0.8830, -0.4679, 0.0056, -0.1775]) tensor([0.4948, 0.1282, 0.2058, 0.1713]) -Greedy action tensor([ 1.0029, -0.6814, 0.0868, -0.5090]) tensor([0.5537, 0.1027, 0.2215, 0.1221]) -Greedy action tensor([ 0.3831, 0.0428, -0.1594, -0.2043]) tensor([0.3510, 0.2498, 0.2041, 0.1951]) -Greedy action tensor([ 0.7899, -0.7805, 0.0195, -0.3630]) tensor([0.5034, 0.1047, 0.2330, 0.1589]) -Greedy action tensor([ 0.7322, -0.1488, -0.0505, -0.0140]) tensor([0.4263, 0.1767, 0.1949, 0.2021]) -Greedy action tensor([ 0.6732, -0.4621, -0.0312, -0.1917]) tensor([0.4471, 0.1437, 0.2210, 0.1883]) -Greedy action tensor([ 1.4324, -0.7438, -0.2044, 0.2867]) tensor([0.6150, 0.0698, 0.1197, 0.1956]) -Greedy action tensor([ 1.3286, -0.3728, -0.2277, 0.5397]) tensor([0.5412, 0.0987, 0.1142, 0.2459]) -Greedy action tensor([ 1.3060, -0.1344, -0.6003, 0.3797]) tensor([0.5613, 0.1330, 0.0834, 0.2223]) -Greedy action tensor([ 1.5644, -0.7545, -0.4842, 0.5134]) tensor([0.6342, 0.0624, 0.0818, 0.2217]) -Greedy action tensor([ 1.5538, -0.4693, -0.4916, -0.0656]) tensor([0.6851, 0.0906, 0.0886, 0.1357]) -Greedy action tensor([ 1.3455, -0.2261, -0.6173, 0.5049]) tensor([0.5619, 0.1167, 0.0789, 0.2424]) -Greedy action tensor([ 1.5013, -0.8670, -0.7217, 0.4718]) tensor([0.6414, 0.0601, 0.0695, 0.2291]) -Greedy action tensor([ 1.6151, -0.3779, -0.3431, 0.3372]) tensor([0.6427, 0.0876, 0.0907, 0.1791]) -Greedy action tensor([ 1.5565, -0.4904, -0.4709, 0.8999]) tensor([0.5620, 0.0726, 0.0740, 0.2915]) -Greedy action tensor([ 1.4492, -0.4804, -0.2896, 0.0556]) tensor([0.6373, 0.0925, 0.1120, 0.1582]) -Greedy action tensor([ 1.9184, -0.7993, -0.6425, 0.6613]) tensor([0.7004, 0.0462, 0.0541, 0.1992]) -Greedy action tensor([ 1.8337, -0.7020, -0.2828, 0.6797]) tensor([0.6601, 0.0523, 0.0795, 0.2082]) -Greedy action tensor([ 0.9711, -0.4051, 0.1251, 0.2293]) tensor([0.4634, 0.1170, 0.1989, 0.2207]) -Greedy action tensor([ 1.0409, -0.3736, -0.0820, 0.2931]) tensor([0.4898, 0.1190, 0.1593, 0.2319]) -Greedy action tensor([ 1.5868, -0.2906, -0.2029, 0.1301]) tensor([0.6439, 0.0985, 0.1075, 0.1500]) -Greedy action tensor([ 0.9532, -0.5661, -0.1825, 0.1599]) tensor([0.5019, 0.1098, 0.1612, 0.2270]) -Greedy action tensor([ 1.3229, -0.3341, -0.4774, 0.1788]) tensor([0.5972, 0.1139, 0.0987, 0.1902]) -Greedy action tensor([ 1.2450, -0.7796, -0.1620, 0.5682]) tensor([0.5305, 0.0700, 0.1299, 0.2696]) -Greedy action tensor([ 2.2929, -0.9284, -0.6221, 0.7359]) tensor([0.7664, 0.0306, 0.0415, 0.1615]) -Greedy action tensor([ 1.7563, -0.1643, -0.3303, 0.1112]) tensor([0.6832, 0.1001, 0.0848, 0.1319]) -Greedy action tensor([ 1.4265, -0.2300, -0.5140, 0.1187]) tensor([0.6231, 0.1189, 0.0895, 0.1685]) -Greedy action tensor([ 0.6906, -0.3803, -0.3337, 0.6123]) tensor([0.3808, 0.1305, 0.1367, 0.3521]) -Greedy action tensor([ 1.7971, -0.8319, -0.6261, 0.7348]) tensor([0.6638, 0.0479, 0.0588, 0.2295]) -Greedy action tensor([ 1.7069, -0.7940, -0.8590, 0.2839]) tensor([0.7144, 0.0586, 0.0549, 0.1722]) -Greedy action tensor([ 0.9943, -0.1615, -1.1658, 0.4422]) tensor([0.4985, 0.1569, 0.0575, 0.2870]) -Greedy action tensor([ 1.8629, -0.6661, -0.2957, -0.0180]) tensor([0.7420, 0.0592, 0.0857, 0.1131]) -Greedy action tensor([ 1.8944, -0.2293, -0.5017, -0.0806]) tensor([0.7411, 0.0886, 0.0675, 0.1028]) -Greedy action tensor([ 1.3860, -0.2131, -0.6995, -0.3176]) tensor([0.6630, 0.1340, 0.0824, 0.1207]) -Greedy action tensor([ 1.7603, -0.5886, -0.8213, 0.2390]) tensor([0.7197, 0.0687, 0.0544, 0.1572]) -Greedy action tensor([ 1.6259, -1.0453, -0.6732, 0.4222]) tensor([0.6804, 0.0471, 0.0683, 0.2042]) -Greedy action tensor([ 1.9541, -0.1042, -0.5200, 0.8511]) tensor([0.6478, 0.0827, 0.0546, 0.2150]) -Greedy action tensor([ 1.3420, -0.6384, -0.1116, 0.2860]) tensor([0.5815, 0.0803, 0.1359, 0.2023]) -Greedy action tensor([ 1.7521, -0.4631, -0.5460, 0.3006]) tensor([0.6926, 0.0756, 0.0696, 0.1622]) -Greedy action tensor([ 1.5414, -0.6499, 0.0916, 0.6887]) tensor([0.5641, 0.0631, 0.1324, 0.2405]) -Greedy action tensor([ 1.5193, -0.6633, -1.0299, -0.0099]) tensor([0.7104, 0.0801, 0.0555, 0.1540]) -Greedy action tensor([ 1.5109, -0.5111, -0.7896, 0.3746]) tensor([0.6437, 0.0852, 0.0645, 0.2066]) -Greedy action tensor([ 1.9505, -1.5272, -0.3243, 0.1915]) tensor([0.7657, 0.0236, 0.0787, 0.1319]) -Greedy action tensor([ 1.6458, -0.4969, -0.4059, 0.2385]) tensor([0.6708, 0.0787, 0.0862, 0.1642]) -Greedy action tensor([ 1.9786, -0.6630, -0.5463, 0.1585]) tensor([0.7614, 0.0543, 0.0610, 0.1234]) -Greedy action tensor([ 1.4129, -0.2809, -0.7613, 0.1225]) tensor([0.6359, 0.1169, 0.0723, 0.1750]) -Greedy action tensor([ 1.0777, -0.5319, -0.5418, 0.3211]) tensor([0.5356, 0.1071, 0.1060, 0.2513]) -Greedy action tensor([ 1.6699, -0.6779, -0.3722, 0.1874]) tensor([0.6885, 0.0658, 0.0893, 0.1563]) -Greedy action tensor([ 1.4172, -0.3324, -0.6341, 0.3294]) tensor([0.6100, 0.1060, 0.0784, 0.2055]) -Greedy action tensor([ 1.3958, -0.4635, -0.8196, 0.6457]) tensor([0.5757, 0.0897, 0.0628, 0.2719]) -Greedy action tensor([ 1.7378, -0.5826, -0.6929, 0.4673]) tensor([0.6817, 0.0670, 0.0600, 0.1914]) -Greedy action tensor([ 1.7439, -0.6709, -0.3318, 0.0497]) tensor([0.7150, 0.0639, 0.0897, 0.1314]) -Greedy action tensor([ 1.8532, -0.2390, -0.4219, 0.2299]) tensor([0.7025, 0.0867, 0.0722, 0.1386]) -Greedy action tensor([ 1.4955, -0.4396, -0.4530, -0.0702]) tensor([0.6685, 0.0965, 0.0953, 0.1397]) -Greedy action tensor([ 1.4248, -0.3892, -0.9980, 0.4561]) tensor([0.6130, 0.0999, 0.0544, 0.2327]) -Greedy action tensor([ 1.4787, -0.2325, -0.5448, 0.0494]) tensor([0.6442, 0.1164, 0.0852, 0.1543]) -Greedy action tensor([ 2.0741, 0.3312, -0.0420, 0.1201]) tensor([0.6958, 0.1218, 0.0838, 0.0986]) -Greedy action tensor([ 1.3653, -0.5246, -0.1323, 0.4566]) tensor([0.5625, 0.0850, 0.1258, 0.2267]) -Greedy action tensor([ 1.1877, -0.0511, -0.2148, -0.1092]) tensor([0.5528, 0.1602, 0.1360, 0.1511]) -Greedy action tensor([ 1.9488, -1.0261, -0.1945, 0.4247]) tensor([0.7214, 0.0368, 0.0846, 0.1571]) -Greedy action tensor([ 1.8223, -0.9482, -0.5382, 0.5940]) tensor([0.6897, 0.0432, 0.0651, 0.2020]) -Greedy action tensor([2.2028, 0.3795, 0.0646, 0.1175]) tensor([0.7124, 0.1151, 0.0840, 0.0885]) -Greedy action tensor([ 1.1492, -0.0592, -0.5892, 0.1489]) tensor([0.5428, 0.1621, 0.0954, 0.1996]) -Greedy action tensor([ 1.5956, -0.1705, -0.8521, 0.3654]) tensor([0.6453, 0.1103, 0.0558, 0.1886]) -Greedy action tensor([ 1.6625, 0.4190, -0.5014, 0.3181]) tensor([0.6010, 0.1733, 0.0690, 0.1567]) -Greedy action tensor([ 1.1473, -0.3145, -0.1316, -0.2411]) tensor([0.5683, 0.1317, 0.1582, 0.1418]) -Greedy action tensor([ 0.9000, -0.6969, -0.4693, -0.1127]) tensor([0.5494, 0.1113, 0.1397, 0.1996]) -Greedy action tensor([ 1.5371, -0.3532, -0.4329, 0.2440]) tensor([0.6390, 0.0965, 0.0891, 0.1754]) -Greedy action tensor([ 1.1673, -0.6381, -0.6410, 0.4136]) tensor([0.5559, 0.0914, 0.0911, 0.2616]) -Greedy action tensor([ 1.7872, -0.4213, -0.6251, 0.2878]) tensor([0.7029, 0.0772, 0.0630, 0.1569]) -Greedy action tensor([ 1.2981, -0.7829, -0.2124, 0.4423]) tensor([0.5648, 0.0705, 0.1247, 0.2400]) -Greedy action tensor([ 1.4770, -0.4009, -0.7977, 0.1373]) tensor([0.6589, 0.1008, 0.0678, 0.1726]) -Greedy action tensor([ 1.8094, -0.9755, -0.2378, 0.5378]) tensor([0.6797, 0.0420, 0.0877, 0.1906]) -Greedy action tensor([ 1.7591, 0.3402, -0.3215, 0.2516]) tensor([0.6296, 0.1524, 0.0786, 0.1394]) -Greedy action tensor([ 1.6679, 0.1118, -0.9011, 0.3342]) tensor([0.6447, 0.1360, 0.0494, 0.1699]) -Greedy action tensor([ 1.4848, -0.5487, 0.0134, 0.5248]) tensor([0.5736, 0.0751, 0.1317, 0.2196]) -Greedy action tensor([ 1.8418, -0.2012, -0.8721, 0.0822]) tensor([0.7310, 0.0948, 0.0484, 0.1258]) -Greedy action tensor([ 1.6009, -0.9934, -0.5214, 0.6032]) tensor([0.6397, 0.0478, 0.0766, 0.2359]) -Greedy action tensor([ 1.1917, -0.3976, -0.2429, 0.1914]) tensor([0.5525, 0.1127, 0.1316, 0.2032]) -Greedy action tensor([ 1.8514, -0.8606, -0.3026, 0.5699]) tensor([0.6849, 0.0455, 0.0795, 0.1901]) -Greedy action tensor([ 1.4249, -0.8739, -0.2890, 0.1987]) tensor([0.6354, 0.0638, 0.1145, 0.1864]) -Greedy action tensor([ 1.7069, -0.0738, -0.1587, -0.0812]) tensor([0.6709, 0.1131, 0.1039, 0.1122]) -Greedy action tensor([ 0.9710, -0.1644, -0.2784, 0.1578]) tensor([0.4875, 0.1566, 0.1397, 0.2162]) -Greedy action tensor([ 1.6417, 0.4271, -0.7987, 0.1299]) tensor([0.6233, 0.1850, 0.0543, 0.1374]) -Greedy action tensor([ 1.2641, -0.2390, -0.4282, 0.4341]) tensor([0.5427, 0.1207, 0.0999, 0.2367]) -Greedy action tensor([ 1.4058, -0.3964, -0.7839, 0.2059]) tensor([0.6337, 0.1045, 0.0709, 0.1909]) -Greedy action tensor([ 1.3177, -0.3390, -0.6751, 0.1258]) tensor([0.6132, 0.1170, 0.0836, 0.1862]) -Greedy action tensor([-0.0215, -1.5260, -0.6043, -0.8849]) tensor([0.4541, 0.1009, 0.2535, 0.1915]) -Greedy action tensor([-0.6380, 0.0211, 0.3110, -0.3831]) tensor([0.1469, 0.2840, 0.3795, 0.1896]) -Greedy action tensor([ 0.5732, -1.7725, 0.1301, -0.1951]) tensor([0.4542, 0.0435, 0.2916, 0.2107]) -Greedy action tensor([ 0.2905, -1.5669, -0.0213, -0.5643]) tensor([0.4322, 0.0675, 0.3164, 0.1839]) -Greedy action tensor([-0.9837, 0.2445, 0.0844, -0.9480]) tensor([0.1196, 0.4084, 0.3480, 0.1239]) -Greedy action tensor([0.8162, 0.2446, 0.3697, 0.5359]) tensor([0.3378, 0.1908, 0.2162, 0.2552]) -Greedy action tensor([ 0.5960, -0.3374, -0.1763, 0.0546]) tensor([0.4103, 0.1613, 0.1895, 0.2388]) -Greedy action tensor([ 1.7980, 0.0332, -0.5914, 0.9344]) tensor([0.5936, 0.1016, 0.0544, 0.2503]) -Greedy action tensor([ 1.1481, -0.3516, 1.4253, 0.3247]) tensor([0.3354, 0.0749, 0.4425, 0.1472]) -Greedy action tensor([ 1.1350, -1.7879, 0.6324, 1.0784]) tensor([0.3841, 0.0207, 0.2323, 0.3629]) -Greedy action tensor([ 0.1954, -0.7135, 0.5984, -0.1282]) tensor([0.2760, 0.1112, 0.4130, 0.1997]) -Greedy action tensor([-0.4564, -0.6605, 0.4880, 0.0733]) tensor([0.1643, 0.1340, 0.4225, 0.2791]) -Greedy action tensor([ 0.3188, -0.8288, 0.9147, -0.0222]) tensor([0.2602, 0.0826, 0.4722, 0.1850]) -Greedy action tensor([ 0.2820, -0.9245, -0.2830, 0.0310]) tensor([0.3780, 0.1131, 0.2148, 0.2941]) -Greedy action tensor([ 0.6706, 0.2331, -0.4315, -0.1525]) tensor([0.4138, 0.2671, 0.1374, 0.1817]) -Greedy action tensor([ 0.7394, -1.2656, 0.3182, -0.0454]) tensor([0.4450, 0.0599, 0.2920, 0.2030]) -Greedy action tensor([ 0.1833, 0.3427, 0.6124, -0.3369]) tensor([0.2324, 0.2726, 0.3569, 0.1381]) -Greedy action tensor([ 0.1557, 0.4788, 0.1415, -0.0069]) tensor([0.2371, 0.3276, 0.2338, 0.2015]) -Greedy action tensor([ 0.3151, 0.8929, -0.0534, 0.0935]) tensor([0.2339, 0.4169, 0.1618, 0.1874]) -Greedy action tensor([-0.6815, -0.5039, -1.0987, 0.3236]) tensor([0.1790, 0.2138, 0.1180, 0.4892]) -Greedy action tensor([ 0.1509, -0.7540, 0.4452, -0.4712]) tensor([0.3045, 0.1232, 0.4087, 0.1635]) -Greedy action tensor([-0.4077, -0.0203, -0.7744, -0.6439]) tensor([0.2528, 0.3724, 0.1752, 0.1996]) -Greedy action tensor([-0.3512, -1.1715, -0.2955, -0.8100]) tensor([0.3195, 0.1407, 0.3378, 0.2020]) -Greedy action tensor([ 0.2693, -1.4720, -0.4486, 1.0576]) tensor([0.2589, 0.0454, 0.1263, 0.5695]) -Greedy action tensor([-0.4815, 0.5714, 0.4258, -0.8967]) tensor([0.1428, 0.4092, 0.3538, 0.0943]) -Greedy action tensor([ 0.1292, -0.6298, 0.4714, 0.6468]) tensor([0.2196, 0.1028, 0.3092, 0.3685]) -Greedy action tensor([-0.5066, -0.3098, 0.9830, -0.5016]) tensor([0.1306, 0.1590, 0.5792, 0.1312]) -Greedy action tensor([ 0.6198, -0.3978, 0.1331, 0.0816]) tensor([0.3906, 0.1412, 0.2401, 0.2281]) -Greedy action tensor([-0.2801, -0.7503, 0.5762, -1.5934]) tensor([0.2354, 0.1471, 0.5542, 0.0633]) -Greedy action tensor([-0.0210, 0.2412, -0.3218, 0.2741]) tensor([0.2281, 0.2965, 0.1689, 0.3064]) -Greedy action tensor([0.1637, 1.3122, 0.5470, 0.0543]) tensor([0.1534, 0.4839, 0.2251, 0.1375]) -Greedy action tensor([ 0.4038, 0.4242, 0.6500, -0.4090]) tensor([0.2671, 0.2726, 0.3417, 0.1185]) -Greedy action tensor([ 0.4040, -0.2557, 0.8447, -0.7912]) tensor([0.2964, 0.1533, 0.4606, 0.0897]) -Greedy action tensor([-0.6138, 0.0013, -0.3964, -0.2361]) tensor([0.1801, 0.3332, 0.2239, 0.2628]) -Greedy action tensor([ 0.5990, -0.7348, -0.6999, -0.6633]) tensor([0.5496, 0.1448, 0.1500, 0.1556]) -Greedy action tensor([ 0.3796, -0.8876, 0.3332, 0.0662]) tensor([0.3370, 0.0949, 0.3217, 0.2463]) -Greedy action tensor([-0.5303, -0.0103, -0.1259, -0.1821]) tensor([0.1787, 0.3005, 0.2677, 0.2531]) -Greedy action tensor([-0.2452, -0.4498, -0.3155, -0.6128]) tensor([0.2907, 0.2369, 0.2710, 0.2013]) -Greedy action tensor([ 0.3448, -0.2459, -0.1262, -0.4403]) tensor([0.3796, 0.2103, 0.2370, 0.1731]) -Greedy action tensor([ 0.5442, 0.2763, -0.2417, 0.2419]) tensor([0.3379, 0.2585, 0.1540, 0.2497]) -Greedy action tensor([-0.0222, 0.2467, 0.3416, -0.4023]) tensor([0.2257, 0.2953, 0.3247, 0.1543]) -Greedy action tensor([-1.0731, 0.0501, -0.3114, -0.8222]) tensor([0.1333, 0.4099, 0.2855, 0.1713]) -Greedy action tensor([ 0.6536, -0.6183, -0.8907, -0.3232]) tensor([0.5347, 0.1499, 0.1141, 0.2013]) -Greedy action tensor([-0.1545, -0.7002, -1.2527, -0.3157]) tensor([0.3618, 0.2096, 0.1206, 0.3079]) -Greedy action tensor([ 0.7661, -0.0608, 0.1825, -0.7376]) tensor([0.4510, 0.1972, 0.2516, 0.1002]) -Greedy action tensor([-0.6174, 0.0108, 0.4705, -0.3008]) tensor([0.1386, 0.2598, 0.4114, 0.1902]) -Greedy action tensor([ 0.2638, -1.0495, 0.4932, -0.7660]) tensor([0.3468, 0.0933, 0.4362, 0.1238]) -Greedy action tensor([ 0.8238, 0.0860, -0.6204, -0.4809]) tensor([0.5037, 0.2408, 0.1188, 0.1366]) -Greedy action tensor([ 0.2959, -0.9634, -0.2234, 0.0526]) tensor([0.3756, 0.1066, 0.2234, 0.2944]) -Greedy action tensor([ 0.9769, -0.7570, 0.2904, 0.2536]) tensor([0.4619, 0.0816, 0.2325, 0.2241]) -Greedy action tensor([ 1.8145, -0.8761, -0.4422, 0.6905]) tensor([0.6678, 0.0453, 0.0699, 0.2170]) -Greedy action tensor([-0.1228, -1.4241, 0.2173, -0.7167]) tensor([0.3096, 0.0843, 0.4351, 0.1710]) -Greedy action tensor([ 0.0548, -1.4778, 0.3761, 0.3170]) tensor([0.2568, 0.0555, 0.3540, 0.3337]) -Greedy action tensor([ 1.7690, -0.6117, 0.1947, 0.9315]) tensor([0.5772, 0.0534, 0.1196, 0.2498]) -Greedy action tensor([-0.9894, -0.5789, 0.6791, -0.0580]) tensor([0.0966, 0.1457, 0.5125, 0.2452]) -Greedy action tensor([ 1.2837, -1.4818, -0.2771, -0.1594]) tensor([0.6626, 0.0417, 0.1391, 0.1565]) -Greedy action tensor([-0.3237, 0.1709, -0.3177, 0.4639]) tensor([0.1711, 0.2806, 0.1721, 0.3761]) -Greedy action tensor([ 0.5643, -0.3028, -0.8193, 0.3965]) tensor([0.3974, 0.1670, 0.0996, 0.3360]) -Greedy action tensor([-0.1882, -0.8541, 0.7666, -0.9990]) tensor([0.2195, 0.1128, 0.5702, 0.0976]) -Greedy action tensor([ 0.0078, 0.6428, -0.5214, -1.3249]) tensor([0.2674, 0.5046, 0.1575, 0.0705]) -Greedy action tensor([-0.8922, -0.2944, 0.8929, -0.3818]) tensor([0.0957, 0.1741, 0.5707, 0.1595]) -Greedy action tensor([ 0.0179, -1.6913, -0.9403, 1.4179]) tensor([0.1780, 0.0322, 0.0683, 0.7216]) -Greedy action tensor([-0.9036, 0.3957, -1.1988, -0.1817]) tensor([0.1339, 0.4909, 0.0997, 0.2756]) -Greedy action tensor([ 0.0981, -1.2586, -0.4001, -0.0356]) tensor([0.3650, 0.0940, 0.2218, 0.3193]) -Greedy action tensor([-0.4649, 0.1689, -0.2284, 0.2214]) tensor([0.1629, 0.3071, 0.2064, 0.3236]) -Greedy action tensor([-0.2424, -0.6948, -0.1641, 0.8390]) tensor([0.1765, 0.1123, 0.1908, 0.5204]) -Greedy action tensor([ 0.4515, -1.8651, -0.4444, 0.1096]) tensor([0.4510, 0.0445, 0.1841, 0.3204]) -Greedy action tensor([ 0.2241, -0.6265, -0.2632, -0.0542]) tensor([0.3573, 0.1526, 0.2195, 0.2705]) -Greedy action tensor([ 0.8842, 0.3792, -0.4586, -0.5140]) tensor([0.4736, 0.2858, 0.1237, 0.1170]) -Greedy action tensor([ 0.4982, -1.1690, 0.6584, -0.5619]) tensor([0.3691, 0.0697, 0.4333, 0.1279]) -Greedy action tensor([ 0.4835, -0.7254, 0.2126, -0.4521]) tensor([0.4076, 0.1217, 0.3108, 0.1599]) -Greedy action tensor([-0.6699, -0.4652, 1.1799, -0.6052]) tensor([0.1036, 0.1271, 0.6587, 0.1105]) -Greedy action tensor([-0.0715, 0.0987, -0.6862, -0.2056]) tensor([0.2777, 0.3292, 0.1502, 0.2429]) -Greedy action tensor([-0.9632, -1.0541, 0.6132, -0.3643]) tensor([0.1167, 0.1065, 0.5644, 0.2124]) -Greedy action tensor([ 0.1311, -1.2768, -0.4464, 0.1805]) tensor([0.3501, 0.0856, 0.1965, 0.3678]) -Greedy action tensor([ 0.7786, -0.3947, 0.7191, -0.4172]) tensor([0.3915, 0.1211, 0.3689, 0.1184]) -Greedy action tensor([ 0.3685, -1.4260, 0.1378, 0.2361]) tensor([0.3526, 0.0586, 0.2799, 0.3089]) -Greedy action tensor([-1.2200, -0.6127, 1.3086, -0.7615]) tensor([0.0590, 0.1083, 0.7395, 0.0933]) -Greedy action tensor([-1.5950, -0.1517, -0.3843, 0.1070]) tensor([0.0710, 0.3009, 0.2384, 0.3897]) -Greedy action tensor([ 0.2350, -2.2202, 0.1433, 0.3721]) tensor([0.3180, 0.0273, 0.2901, 0.3647]) -Greedy action tensor([ 0.7243, -0.4502, 0.1770, -0.3204]) tensor([0.4466, 0.1380, 0.2584, 0.1571]) -Greedy action tensor([ 0.7487, -0.4515, 0.7937, 1.2433]) tensor([0.2508, 0.0755, 0.2624, 0.4113]) -Greedy action tensor([-1.9327, -0.4288, 0.6614, -0.1697]) tensor([0.0405, 0.1820, 0.5416, 0.2359]) -Greedy action tensor([-1.8812, -0.3728, 0.6340, -0.1419]) tensor([0.0424, 0.1916, 0.5245, 0.2414]) -Greedy action tensor([-1.2137, -0.0618, 0.8395, 0.7477]) tensor([0.0524, 0.1660, 0.4087, 0.3729]) -Greedy action tensor([-1.8734, -0.3906, 0.6261, -0.1501]) tensor([0.0431, 0.1900, 0.5252, 0.2417]) -Greedy action tensor([-0.9500, -0.4384, 0.3340, 0.6451]) tensor([0.0892, 0.1488, 0.3222, 0.4398]) -Greedy action tensor([-0.8713, -0.1905, 0.0742, -0.2473]) tensor([0.1348, 0.2664, 0.3471, 0.2517]) -Greedy action tensor([-1.8202, -0.2267, 0.5764, -0.1115]) tensor([0.0446, 0.2194, 0.4898, 0.2462]) -Greedy action tensor([-1.9213, -0.4479, 0.6563, -0.1715]) tensor([0.0412, 0.1797, 0.5422, 0.2369]) -Greedy action tensor([-1.2045, 0.7758, 0.1585, 0.2777]) tensor([0.0604, 0.4376, 0.2360, 0.2659]) -Greedy action tensor([-1.6465, 0.2534, 0.4176, 0.0553]) tensor([0.0475, 0.3176, 0.3743, 0.2605]) -Greedy action tensor([-1.8579, -0.3885, 0.6174, -0.1507]) tensor([0.0440, 0.1911, 0.5225, 0.2424]) -Greedy action tensor([-1.7708, -0.2789, 0.5971, -0.0535]) tensor([0.0461, 0.2050, 0.4922, 0.2568]) -Greedy action tensor([-1.8983, -0.4055, 0.6528, -0.1568]) tensor([0.0417, 0.1856, 0.5347, 0.2380]) -Greedy action tensor([-0.7417, 0.8418, 0.1031, 0.0503]) tensor([0.0961, 0.4681, 0.2236, 0.2121]) -Greedy action tensor([-1.6649, -0.4643, 0.6346, -0.2968]) tensor([0.0549, 0.1823, 0.5472, 0.2156]) -Greedy action tensor([-1.3540, -0.5952, 0.3791, 0.0219]) tensor([0.0784, 0.1675, 0.4437, 0.3104]) -Greedy action tensor([-1.7081, 0.2341, 0.4565, 0.0115]) tensor([0.0449, 0.3132, 0.3912, 0.2507]) -Greedy action tensor([-1.7291, -0.2425, 0.5382, -0.1050]) tensor([0.0496, 0.2195, 0.4791, 0.2518]) -Greedy action tensor([-1.6489, -0.4570, 0.8112, 0.1884]) tensor([0.0449, 0.1478, 0.5254, 0.2819]) -Greedy action tensor([-1.8346, -0.2699, 0.5893, -0.1377]) tensor([0.0444, 0.2122, 0.5012, 0.2422]) -Greedy action tensor([-0.9965, 0.3914, 0.1532, -0.4888]) tensor([0.1018, 0.4078, 0.3213, 0.1691]) -Greedy action tensor([-1.5007, -0.4427, 0.6659, 0.4348]) tensor([0.0512, 0.1474, 0.4468, 0.3546]) -Greedy action tensor([-1.8670, -0.4465, 0.6340, -0.1393]) tensor([0.0435, 0.1803, 0.5311, 0.2451]) -Greedy action tensor([-1.8872, -0.3165, 0.6285, -0.1485]) tensor([0.0419, 0.2015, 0.5183, 0.2383]) -Greedy action tensor([-1.4011, -0.2538, 0.3305, 0.1182]) tensor([0.0696, 0.2192, 0.3932, 0.3180]) -Greedy action tensor([-1.9255, -0.4386, 0.6668, -0.1644]) tensor([0.0406, 0.1798, 0.5430, 0.2365]) -Greedy action tensor([-1.8275, -0.3713, 0.6017, -0.1135]) tensor([0.0451, 0.1933, 0.5115, 0.2502]) -Greedy action tensor([-1.6585, -0.8120, 0.9880, 0.0122]) tensor([0.0440, 0.1025, 0.6199, 0.2336]) -Greedy action tensor([-1.7899, -0.2037, 0.5591, -0.1153]) tensor([0.0461, 0.2252, 0.4828, 0.2460]) -Greedy action tensor([-1.8904, -0.4202, 0.6351, -0.1519]) tensor([0.0425, 0.1848, 0.5310, 0.2417]) -Greedy action tensor([-1.6785, -0.2743, 0.5203, -0.1071]) tensor([0.0529, 0.2155, 0.4769, 0.2547]) -Greedy action tensor([-1.1594, -0.4663, 0.3581, -0.0143]) tensor([0.0934, 0.1868, 0.4261, 0.2936]) -Greedy action tensor([-1.8118, -0.3982, 0.6099, -0.1125]) tensor([0.0458, 0.1882, 0.5156, 0.2504]) -Greedy action tensor([-1.5786, -0.6103, 0.4626, 0.0033]) tensor([0.0617, 0.1626, 0.4754, 0.3003]) -Greedy action tensor([-1.9271, -0.4463, 0.6577, -0.1737]) tensor([0.0409, 0.1800, 0.5428, 0.2363]) -Greedy action tensor([-1.9348, -0.4571, 0.6661, -0.1699]) tensor([0.0405, 0.1775, 0.5456, 0.2365]) -Greedy action tensor([-1.3852, -0.3204, 0.4067, 0.1924]) tensor([0.0678, 0.1967, 0.4070, 0.3285]) -Greedy action tensor([-1.9049, -0.4223, 0.6427, -0.1607]) tensor([0.0418, 0.1843, 0.5345, 0.2394]) -Greedy action tensor([-1.6809, -0.5457, 0.5358, -0.0690]) tensor([0.0546, 0.1700, 0.5014, 0.2739]) -Greedy action tensor([-1.8859, -0.4446, 0.6392, -0.1554]) tensor([0.0428, 0.1809, 0.5347, 0.2416]) -Greedy action tensor([-1.8784, -0.3502, 0.6432, -0.1092]) tensor([0.0418, 0.1927, 0.5203, 0.2452]) -Greedy action tensor([-1.9161, -0.4371, 0.6559, -0.1643]) tensor([0.0412, 0.1810, 0.5400, 0.2378]) -Greedy action tensor([-1.9067, -0.4559, 0.6531, -0.1631]) tensor([0.0418, 0.1784, 0.5407, 0.2391]) -Greedy action tensor([-1.7433, -0.4500, 0.5749, -0.0725]) tensor([0.0497, 0.1812, 0.5049, 0.2643]) -Greedy action tensor([ 0.5097, -0.3392, 1.0376, 1.7111]) tensor([0.1551, 0.0664, 0.2629, 0.5156]) -Greedy action tensor([-1.9389, -0.4360, 0.6636, -0.1748]) tensor([0.0403, 0.1810, 0.5436, 0.2351]) -Greedy action tensor([-1.3136, -0.4387, 0.4445, 0.2900]) tensor([0.0706, 0.1693, 0.4094, 0.3508]) -Greedy action tensor([-0.0499, 0.9007, -0.2047, 0.1094]) tensor([0.1780, 0.4606, 0.1525, 0.2088]) -Greedy action tensor([-1.7649, -0.4183, 0.5965, -0.0674]) tensor([0.0478, 0.1838, 0.5072, 0.2611]) -Greedy action tensor([-1.7147, 0.0701, 0.4937, 0.0395]) tensor([0.0458, 0.2728, 0.4167, 0.2646]) -Greedy action tensor([-1.9341, -0.4352, 0.6621, -0.1740]) tensor([0.0405, 0.1812, 0.5430, 0.2353]) -Greedy action tensor([-1.9099, -0.4405, 0.6515, -0.1661]) tensor([0.0416, 0.1810, 0.5393, 0.2381]) -Greedy action tensor([-1.9353, -0.4391, 0.6624, -0.1742]) tensor([0.0405, 0.1806, 0.5435, 0.2354]) -Greedy action tensor([-1.2690, -0.5948, 0.3072, 0.1615]) tensor([0.0835, 0.1638, 0.4037, 0.3490]) -Greedy action tensor([-1.9441, -0.4489, 0.6680, -0.1792]) tensor([0.0401, 0.1789, 0.5467, 0.2343]) -Greedy action tensor([-1.1419, 0.0347, 0.3151, -0.0965]) tensor([0.0879, 0.2850, 0.3772, 0.2499]) -Greedy action tensor([-1.6551, -0.4878, 0.5298, 0.0644]) tensor([0.0535, 0.1720, 0.4758, 0.2987]) -Greedy action tensor([-1.4446, 0.4065, 0.4294, -0.6493]) tensor([0.0621, 0.3955, 0.4047, 0.1376]) -Greedy action tensor([-1.5451, -0.1082, 0.5337, 0.0845]) tensor([0.0546, 0.2299, 0.4368, 0.2787]) -Greedy action tensor([-1.7574, -0.4462, 0.5693, -0.1058]) tensor([0.0496, 0.1840, 0.5079, 0.2586]) -Greedy action tensor([-1.9054, -0.4505, 0.6511, -0.1591]) tensor([0.0418, 0.1792, 0.5392, 0.2398]) -Greedy action tensor([-1.0938, 0.5343, -1.1271, -1.3029]) tensor([0.1270, 0.6471, 0.1229, 0.1030]) -Greedy action tensor([-0.8740, -0.1912, 0.1703, -0.1008]) tensor([0.1252, 0.2478, 0.3557, 0.2713]) -Greedy action tensor([-1.8385, -0.4245, 0.6137, -0.1353]) tensor([0.0450, 0.1851, 0.5227, 0.2472]) -Greedy action tensor([-1.3105, 0.3691, 0.2826, -0.0379]) tensor([0.0673, 0.3611, 0.3312, 0.2404]) -Greedy action tensor([-0.4968, 0.2539, 0.5387, 1.2602]) tensor([0.0853, 0.1806, 0.2401, 0.4940]) -Greedy action tensor([-1.9241, -0.4446, 0.6600, -0.1696]) tensor([0.0409, 0.1798, 0.5426, 0.2367]) -Greedy action tensor([-1.8292, -0.4507, 0.6087, -0.1183]) tensor([0.0456, 0.1808, 0.5216, 0.2521]) -Greedy action tensor([-1.9115, -0.3819, 0.6378, -0.1637]) tensor([0.0414, 0.1911, 0.5298, 0.2377]) -Greedy action tensor([-1.9069, -0.4437, 0.6506, -0.1631]) tensor([0.0418, 0.1804, 0.5390, 0.2389]) -Greedy action tensor([-1.9019, -0.4124, 0.6529, -0.1583]) tensor([0.0416, 0.1846, 0.5357, 0.2380]) -Greedy action tensor([-1.3878, -0.2486, 0.0040, -0.4468]) tensor([0.0934, 0.2917, 0.3756, 0.2393]) -Greedy action tensor([-1.9027, -0.4322, 0.6453, -0.1617]) tensor([0.0420, 0.1826, 0.5362, 0.2393]) -Greedy action tensor([-1.8909, -0.4494, 0.6427, -0.1562]) tensor([0.0426, 0.1799, 0.5363, 0.2412]) -Greedy action tensor([-1.7846, -0.1766, 0.5573, -0.1319]) tensor([0.0463, 0.2310, 0.4812, 0.2416]) -Greedy action tensor([-1.9234, -0.4548, 0.6621, -0.1603]) tensor([0.0409, 0.1777, 0.5429, 0.2385]) -Greedy action tensor([-1.4548, -0.5602, 0.3948, 0.1508]) tensor([0.0676, 0.1655, 0.4300, 0.3369]) -Greedy action tensor([-1.8190, -0.3774, 0.6352, -0.1137]) tensor([0.0447, 0.1890, 0.5203, 0.2460]) -Greedy action tensor([-1.9140, -0.4452, 0.6561, -0.1625]) tensor([0.0414, 0.1797, 0.5405, 0.2384]) -Greedy action tensor([-1.9268, -0.4537, 0.6619, -0.1726]) tensor([0.0409, 0.1784, 0.5444, 0.2363]) -Greedy action tensor([-1.9134, -0.4472, 0.6558, -0.1626]) tensor([0.0414, 0.1794, 0.5407, 0.2385]) -Greedy action tensor([ 0.5887, -0.5471, -0.1712, -0.0902]) tensor([0.4355, 0.1399, 0.2037, 0.2209]) -Greedy action tensor([ 1.0511, -0.3358, -0.2382, -0.4651]) tensor([0.5731, 0.1432, 0.1579, 0.1258]) -Greedy action tensor([ 1.0072, -0.4514, 0.0138, -0.2241]) tensor([0.5278, 0.1227, 0.1954, 0.1541]) -Greedy action tensor([ 0.3934, -0.1343, 0.0130, -0.2245]) tensor([0.3555, 0.2098, 0.2431, 0.1917]) -Greedy action tensor([ 0.8311, -0.4500, -0.0385, -0.2638]) tensor([0.4923, 0.1367, 0.2063, 0.1647]) -Greedy action tensor([ 1.0446, -0.5624, 0.0242, -0.2531]) tensor([0.5452, 0.1093, 0.1965, 0.1489]) -Greedy action tensor([ 0.5966, -0.4571, -0.0797, -0.2208]) tensor([0.4350, 0.1517, 0.2212, 0.1921]) -Greedy action tensor([ 0.8169, -0.4811, -0.1401, -0.2255]) tensor([0.4976, 0.1359, 0.1911, 0.1754]) -Greedy action tensor([ 0.9714, -0.7042, 0.0698, -0.2273]) tensor([0.5278, 0.0988, 0.2142, 0.1592]) -Greedy action tensor([ 0.7929, -0.5736, -0.0988, -0.2923]) tensor([0.4993, 0.1273, 0.2047, 0.1687]) -Greedy action tensor([ 1.1518, -0.5730, -0.2364, -0.7390]) tensor([0.6334, 0.1129, 0.1581, 0.0956]) -Greedy action tensor([ 0.6371, 0.0343, -0.1009, 0.1752]) tensor([0.3766, 0.2061, 0.1800, 0.2373]) -Greedy action tensor([ 0.9648, -0.3349, -0.0502, -0.3115]) tensor([0.5224, 0.1424, 0.1893, 0.1458]) -Greedy action tensor([ 0.7710, -0.7781, 0.0505, -0.2261]) tensor([0.4836, 0.1027, 0.2353, 0.1784]) -Greedy action tensor([ 0.8022, -0.3553, -0.0239, -0.4107]) tensor([0.4880, 0.1533, 0.2136, 0.1451]) -Greedy action tensor([ 1.2591, -0.7063, -0.2036, -0.3942]) tensor([0.6397, 0.0896, 0.1482, 0.1225]) -Greedy action tensor([ 0.7705, -0.5595, -0.1319, -0.3936]) tensor([0.5045, 0.1334, 0.2046, 0.1575]) -Greedy action tensor([ 0.6545, -0.6797, 0.0251, -0.2793]) tensor([0.4568, 0.1203, 0.2434, 0.1795]) -Greedy action tensor([ 0.2998, 0.0772, -0.0512, -0.0848]) tensor([0.3140, 0.2513, 0.2210, 0.2137]) -Greedy action tensor([ 1.1325, -0.7809, -0.0286, -0.7207]) tensor([0.6183, 0.0912, 0.1936, 0.0969]) -Greedy action tensor([ 1.1075, -1.1805, 0.0996, -0.5002]) tensor([0.5999, 0.0609, 0.2190, 0.1202]) -Greedy action tensor([ 1.0960, -0.4055, -0.0380, -0.2842]) tensor([0.5568, 0.1240, 0.1791, 0.1400]) -Greedy action tensor([ 0.9297, -0.5859, -0.1522, -0.3129]) tensor([0.5414, 0.1189, 0.1835, 0.1562]) -Greedy action tensor([ 0.7056, -0.5919, -0.0552, -0.3114]) tensor([0.4757, 0.1300, 0.2223, 0.1721]) -Greedy action tensor([ 1.0019, -0.5705, -0.0033, -0.3723]) tensor([0.5475, 0.1136, 0.2003, 0.1385]) -Greedy action tensor([ 0.9208, -0.8983, 0.2022, -0.6377]) tensor([0.5376, 0.0872, 0.2620, 0.1131]) -Greedy action tensor([ 0.3146, -0.0583, -0.0721, -0.1012]) tensor([0.3303, 0.2275, 0.2244, 0.2179]) -Greedy action tensor([ 1.0603, -0.6627, -0.0967, -0.4183]) tensor([0.5811, 0.1037, 0.1827, 0.1325]) -Greedy action tensor([ 0.5799, -0.3502, -0.1192, -0.0814]) tensor([0.4153, 0.1639, 0.2064, 0.2144]) -Greedy action tensor([ 0.3484, -0.2037, -0.1109, -0.3779]) tensor([0.3716, 0.2139, 0.2347, 0.1797]) -Greedy action tensor([ 0.6888, -0.3251, -0.0042, -0.0944]) tensor([0.4311, 0.1564, 0.2156, 0.1970]) -Greedy action tensor([ 1.0165, -0.5659, 0.0158, -0.3232]) tensor([0.5450, 0.1120, 0.2003, 0.1427]) -Greedy action tensor([ 0.8166, -0.4467, -0.0720, -0.2532]) tensor([0.4909, 0.1388, 0.2019, 0.1684]) -Greedy action tensor([ 0.6845, 0.1011, -0.1074, -0.6263]) tensor([0.4385, 0.2447, 0.1986, 0.1182]) -Greedy action tensor([ 0.6686, -0.0059, -0.1121, 0.0232]) tensor([0.4013, 0.2044, 0.1838, 0.2104]) -Greedy action tensor([ 1.1304, -1.0239, 0.0014, -0.5542]) tensor([0.6154, 0.0714, 0.1990, 0.1142]) -Greedy action tensor([ 1.1607, -0.2232, 0.0775, -0.4327]) tensor([0.5579, 0.1398, 0.1889, 0.1134]) -Greedy action tensor([ 1.5474, -0.8941, -0.1763, -0.5145]) tensor([0.7181, 0.0625, 0.1281, 0.0913]) -Greedy action tensor([ 1.1051e+00, -4.7200e-01, 3.1763e-04, -4.1828e-01]) tensor([0.5695, 0.1176, 0.1887, 0.1241]) -Greedy action tensor([ 0.9770, -0.9879, -0.1293, -0.6450]) tensor([0.5994, 0.0840, 0.1983, 0.1184]) -Greedy action tensor([ 1.1092, -0.5714, -0.0685, -0.6002]) tensor([0.5969, 0.1112, 0.1838, 0.1080]) -Greedy action tensor([ 0.5323, 0.1753, -0.1523, 0.0789]) tensor([0.3522, 0.2464, 0.1776, 0.2238]) -Greedy action tensor([ 0.1525, 0.2039, -0.1195, -0.3598]) tensor([0.2929, 0.3084, 0.2232, 0.1755]) -Greedy action tensor([ 0.8685, -0.2697, -0.0631, -0.1094]) tensor([0.4784, 0.1533, 0.1884, 0.1799]) -Greedy action tensor([ 0.7897, -0.4371, 0.0790, -0.4761]) tensor([0.4839, 0.1419, 0.2377, 0.1365]) -Greedy action tensor([ 0.3664, -0.4908, -0.2526, -0.1682]) tensor([0.3923, 0.1665, 0.2113, 0.2299]) -Greedy action tensor([ 0.7496, -0.2403, -0.0538, -0.1176]) tensor([0.4465, 0.1659, 0.1999, 0.1876]) -Greedy action tensor([ 0.6802, -0.3827, -0.0617, -0.1136]) tensor([0.4398, 0.1519, 0.2094, 0.1989]) -Greedy action tensor([ 1.0429, -0.5589, -0.0168, -0.4005]) tensor([0.5605, 0.1130, 0.1942, 0.1323]) -Greedy action tensor([ 0.9802, -0.6134, -0.2246, -0.4336]) tensor([0.5727, 0.1164, 0.1717, 0.1393]) -Greedy action tensor([ 0.6491, -0.5012, 0.0248, -0.5783]) tensor([0.4662, 0.1475, 0.2497, 0.1366]) -Greedy action tensor([ 1.0301, -0.9867, 0.1429, -0.5056]) tensor([0.5681, 0.0756, 0.2340, 0.1223]) -Greedy action tensor([ 1.0287, -1.0286, 0.0513, -0.3711]) tensor([0.5712, 0.0730, 0.2149, 0.1409]) -Greedy action tensor([ 0.6955, -0.2845, -0.1552, -0.1714]) tensor([0.4499, 0.1688, 0.1922, 0.1891]) -Greedy action tensor([ 0.9781, -0.3615, -0.2560, -0.1521]) tensor([0.5330, 0.1396, 0.1552, 0.1722]) -Greedy action tensor([ 0.9969, -0.7046, -0.0172, -0.4295]) tensor([0.5601, 0.1022, 0.2032, 0.1345]) -Greedy action tensor([ 1.0418e+00, -7.8199e-01, -9.4083e-04, -4.1022e-01]) tensor([0.5721, 0.0923, 0.2016, 0.1339]) -Greedy action tensor([ 0.6910, -0.7654, 0.0268, -0.3301]) tensor([0.4744, 0.1106, 0.2442, 0.1709]) -Greedy action tensor([ 0.6499, -0.5487, -0.0430, -0.2139]) tensor([0.4498, 0.1357, 0.2250, 0.1896]) -Greedy action tensor([ 0.7103, -0.3989, -0.0725, -0.0584]) tensor([0.4443, 0.1465, 0.2031, 0.2060]) -Greedy action tensor([ 0.8769, -0.5660, -0.0193, -0.3969]) tensor([0.5197, 0.1228, 0.2121, 0.1454]) -Greedy action tensor([ 1.4143, -0.8316, 0.0521, -1.0586]) tensor([0.6914, 0.0732, 0.1771, 0.0583]) -Greedy action tensor([ 0.7011, -0.2323, -0.0359, -0.3439]) tensor([0.4498, 0.1769, 0.2152, 0.1582]) -Greedy action tensor([ 0.7319, -0.4191, -0.0242, -0.2348]) tensor([0.4616, 0.1460, 0.2167, 0.1756]) -Greedy action tensor([ 0.7657, -0.9038, -0.0489, -0.4554]) tensor([0.5192, 0.0978, 0.2299, 0.1531]) -Greedy action tensor([ 0.5570, -0.3082, -0.0162, -0.1405]) tensor([0.4028, 0.1696, 0.2271, 0.2005]) -Greedy action tensor([ 0.9284, -0.5440, 0.0762, -0.2515]) tensor([0.5094, 0.1168, 0.2172, 0.1565]) -Greedy action tensor([ 0.6378, 0.0354, -0.0926, 0.0113]) tensor([0.3901, 0.2136, 0.1879, 0.2085]) -Greedy action tensor([ 0.8121, -0.5963, 0.0571, -0.6241]) tensor([0.5122, 0.1253, 0.2407, 0.1218]) -Greedy action tensor([ 0.6103, -0.1448, -0.4468, -0.5030]) tensor([0.4660, 0.2190, 0.1619, 0.1531]) -Greedy action tensor([ 0.8135, -0.2741, 0.1288, -0.1458]) tensor([0.4496, 0.1515, 0.2267, 0.1722]) -Greedy action tensor([ 0.7654, -0.0291, -0.0885, -0.0808]) tensor([0.4335, 0.1959, 0.1846, 0.1860]) -Greedy action tensor([ 1.0618, -0.7062, -0.0187, -0.4982]) tensor([0.5813, 0.0992, 0.1973, 0.1222]) -Greedy action tensor([ 0.4108, -0.3496, -0.0729, -0.2208]) tensor([0.3823, 0.1787, 0.2357, 0.2033]) -Greedy action tensor([ 0.6810, -0.1159, -0.1035, -0.2019]) tensor([0.4309, 0.1942, 0.1966, 0.1782]) -Greedy action tensor([ 0.3679, -0.0438, -0.0740, -0.0722]) tensor([0.3391, 0.2246, 0.2179, 0.2183]) -Greedy action tensor([ 0.5246, -0.3247, 0.0934, -0.1019]) tensor([0.3829, 0.1638, 0.2487, 0.2046]) -Greedy action tensor([ 0.8295, -0.2334, -0.0569, -0.0732]) tensor([0.4623, 0.1597, 0.1905, 0.1875]) -Greedy action tensor([ 0.5421, -0.2352, -0.0408, -0.2406]) tensor([0.4040, 0.1857, 0.2255, 0.1847]) -Greedy action tensor([ 0.7001, -0.3496, 0.0749, -0.3392]) tensor([0.4466, 0.1563, 0.2390, 0.1580]) -Greedy action tensor([ 0.8199, -0.6525, 0.0413, -0.4826]) tensor([0.5101, 0.1170, 0.2342, 0.1387]) -Greedy action tensor([ 1.8502, -0.3848, -0.1224, 0.5226]) tensor([0.6617, 0.0708, 0.0920, 0.1754]) -Greedy action tensor([ 2.4228, -1.3043, -0.1916, 0.9183]) tensor([0.7579, 0.0182, 0.0555, 0.1684]) -Greedy action tensor([ 1.4126, -0.1043, -0.7434, -0.1421]) tensor([0.6467, 0.1419, 0.0749, 0.1366]) -Greedy action tensor([ 1.6573, -0.4462, -0.5332, 0.3784]) tensor([0.6613, 0.0807, 0.0740, 0.1841]) -Greedy action tensor([ 1.2482, -0.4308, -0.2616, 0.3683]) tensor([0.5487, 0.1024, 0.1213, 0.2276]) -Greedy action tensor([ 1.7206, 0.3027, -0.0725, 0.0419]) tensor([0.6268, 0.1518, 0.1043, 0.1170]) -Greedy action tensor([ 1.5656, -0.0486, -0.7522, 0.2904]) tensor([0.6341, 0.1262, 0.0625, 0.1772]) -Greedy action tensor([ 1.1259, -0.1572, -0.2682, 0.0688]) tensor([0.5340, 0.1480, 0.1325, 0.1856]) -Greedy action tensor([ 1.5871, -0.0615, -0.2640, 0.4302]) tensor([0.6010, 0.1156, 0.0944, 0.1890]) -Greedy action tensor([ 1.6167, -0.6231, -0.2597, 0.7118]) tensor([0.6009, 0.0640, 0.0920, 0.2431]) -Greedy action tensor([ 1.4947, -0.5123, -0.5569, 0.5427]) tensor([0.6065, 0.0815, 0.0779, 0.2341]) -Greedy action tensor([ 2.4154, 0.9594, 0.4114, -0.0605]) tensor([0.6887, 0.1606, 0.0928, 0.0579]) -Greedy action tensor([ 1.9855, -0.4491, -0.1996, 0.0650]) tensor([0.7426, 0.0651, 0.0835, 0.1088]) -Greedy action tensor([ 1.3718, -0.0683, -0.3343, 0.6061]) tensor([0.5309, 0.1258, 0.0964, 0.2469]) -Greedy action tensor([ 1.2261, -0.7517, -0.0973, 0.2457]) tensor([0.5619, 0.0777, 0.1496, 0.2108]) -Greedy action tensor([ 1.5218, -0.5797, -0.6495, 0.0810]) tensor([0.6789, 0.0830, 0.0774, 0.1607]) -Greedy action tensor([ 2.1885, -1.4663, 0.0046, 0.4661]) tensor([0.7592, 0.0196, 0.0855, 0.1356]) -Greedy action tensor([ 1.4109, -0.2720, -0.7395, 0.3084]) tensor([0.6119, 0.1137, 0.0712, 0.2032]) -Greedy action tensor([ 0.8812, 0.2956, -0.8707, 0.2521]) tensor([0.4418, 0.2460, 0.0766, 0.2355]) -Greedy action tensor([ 1.4053, -0.6730, -0.2733, 0.1551]) tensor([0.6257, 0.0783, 0.1168, 0.1792]) -Greedy action tensor([ 1.8189, -0.4353, -0.2459, 0.5379]) tensor([0.6625, 0.0695, 0.0840, 0.1840]) -Greedy action tensor([ 1.2316, -0.7531, -0.4211, 0.5299]) tensor([0.5480, 0.0753, 0.1050, 0.2717]) -Greedy action tensor([ 1.4515, -0.4376, -0.3116, 0.3852]) tensor([0.5999, 0.0907, 0.1029, 0.2065]) -Greedy action tensor([ 2.4116, -0.8664, -0.1381, 0.7853]) tensor([0.7619, 0.0287, 0.0595, 0.1498]) -Greedy action tensor([ 1.3194, -0.7073, -0.3726, 0.0783]) tensor([0.6231, 0.0821, 0.1147, 0.1801]) -Greedy action tensor([ 1.4458, -0.1529, -1.1176, 0.2872]) tensor([0.6277, 0.1269, 0.0484, 0.1970]) -Greedy action tensor([ 1.3936, -0.6578, -0.2083, 0.1973]) tensor([0.6126, 0.0788, 0.1235, 0.1852]) -Greedy action tensor([ 1.4496, -0.6020, -0.6499, 0.0921]) tensor([0.6630, 0.0852, 0.0812, 0.1706]) -Greedy action tensor([ 2.7427, -1.6930, -0.0438, 1.1436]) tensor([0.7840, 0.0093, 0.0483, 0.1584]) -Greedy action tensor([ 1.4043, -0.1841, -0.2115, 0.5065]) tensor([0.5524, 0.1128, 0.1098, 0.2251]) -Greedy action tensor([ 1.3869, -0.3578, -0.4786, 0.3005]) tensor([0.5999, 0.1048, 0.0929, 0.2024]) -Greedy action tensor([ 2.0461, -0.6098, -0.5134, 0.4744]) tensor([0.7379, 0.0518, 0.0571, 0.1532]) -Greedy action tensor([ 1.5606, -0.8807, -0.5105, 0.5012]) tensor([0.6411, 0.0558, 0.0808, 0.2223]) -Greedy action tensor([2.4647, 0.6928, 0.2098, 0.4632]) tensor([0.7092, 0.1206, 0.0744, 0.0958]) -Greedy action tensor([ 1.1280, -0.0570, -0.2643, 0.1652]) tensor([0.5165, 0.1579, 0.1284, 0.1972]) -Greedy action tensor([ 1.5949, 0.0649, -0.6043, 0.1357]) tensor([0.6411, 0.1388, 0.0711, 0.1490]) -Greedy action tensor([ 1.6962, -0.4101, -0.4897, 0.3956]) tensor([0.6638, 0.0808, 0.0746, 0.1808]) -Greedy action tensor([ 1.7635, -0.9842, -0.1646, 0.4042]) tensor([0.6820, 0.0437, 0.0992, 0.1752]) -Greedy action tensor([ 1.2573, -0.5608, -0.3324, 0.3127]) tensor([0.5698, 0.0925, 0.1162, 0.2215]) -Greedy action tensor([ 1.7885, -0.7952, -0.0195, 0.5194]) tensor([0.6576, 0.0496, 0.1078, 0.1849]) -Greedy action tensor([ 1.0715, -0.0564, -0.5897, 0.4029]) tensor([0.4936, 0.1598, 0.0937, 0.2529]) -Greedy action tensor([ 1.9560, -0.4225, -0.9073, 0.7957]) tensor([0.6834, 0.0634, 0.0390, 0.2142]) -Greedy action tensor([ 1.4767, -0.5871, -0.4198, 0.3971]) tensor([0.6185, 0.0785, 0.0928, 0.2101]) -Greedy action tensor([ 1.2476, -0.5686, -0.5015, 0.1120]) tensor([0.6032, 0.0981, 0.1049, 0.1938]) -Greedy action tensor([ 1.6781, -0.0846, -0.3093, -0.1811]) tensor([0.6828, 0.1172, 0.0936, 0.1064]) -Greedy action tensor([ 2.0255, -1.1240, -0.1524, 0.8135]) tensor([0.6879, 0.0295, 0.0779, 0.2047]) -Greedy action tensor([ 1.9813, -0.3813, -0.7242, 0.5659]) tensor([0.7123, 0.0671, 0.0476, 0.1730]) -Greedy action tensor([ 0.9341, -0.2656, -0.0142, 0.3391]) tensor([0.4464, 0.1345, 0.1729, 0.2462]) -Greedy action tensor([ 2.0686, 0.8548, -0.1879, 0.1354]) tensor([0.6466, 0.1921, 0.0677, 0.0936]) -Greedy action tensor([ 1.7552, -0.5356, -0.1841, 0.8425]) tensor([0.6074, 0.0615, 0.0873, 0.2438]) -Greedy action tensor([ 1.1240, -0.2855, -0.7910, 0.4706]) tensor([0.5230, 0.1278, 0.0771, 0.2721]) -Greedy action tensor([ 2.1587, 0.5607, -0.2689, 0.0638]) tensor([0.7074, 0.1431, 0.0624, 0.0871]) -Greedy action tensor([ 1.7413, -0.3032, -0.8406, 0.5196]) tensor([0.6667, 0.0863, 0.0504, 0.1965]) -Greedy action tensor([ 1.6336, -0.6310, -0.2481, 0.4789]) tensor([0.6364, 0.0661, 0.0969, 0.2006]) -Greedy action tensor([ 1.4220, -0.1793, -0.3237, -0.0412]) tensor([0.6220, 0.1254, 0.1086, 0.1440]) -Greedy action tensor([ 1.2232, 0.0180, -0.7199, 0.3263]) tensor([0.5403, 0.1619, 0.0774, 0.2204]) -Greedy action tensor([ 1.4785, -0.3900, -0.4312, 0.7225]) tensor([0.5643, 0.0871, 0.0836, 0.2650]) -Greedy action tensor([ 2.3658, -1.5791, 0.0521, 0.1428]) tensor([0.8153, 0.0158, 0.0806, 0.0883]) -Greedy action tensor([ 1.2829, -0.5721, -0.3954, 0.3793]) tensor([0.5720, 0.0895, 0.1068, 0.2317]) -Greedy action tensor([ 1.6510, -0.7876, -0.0673, 0.4335]) tensor([0.6399, 0.0559, 0.1148, 0.1894]) -Greedy action tensor([ 1.6174, -0.8941, -0.4334, 0.7603]) tensor([0.6119, 0.0497, 0.0787, 0.2597]) -Greedy action tensor([ 1.6148, -0.5526, -0.5946, -0.0021]) tensor([0.7029, 0.0805, 0.0772, 0.1395]) -Greedy action tensor([ 1.8444, -0.8433, -0.1550, 0.5700]) tensor([0.6743, 0.0459, 0.0913, 0.1885]) -Greedy action tensor([ 1.6927, -0.8463, 0.0872, 0.2472]) tensor([0.6599, 0.0521, 0.1325, 0.1555]) -Greedy action tensor([ 1.3037, 0.2030, -1.0964, 0.2202]) tensor([0.5676, 0.1888, 0.0515, 0.1921]) -Greedy action tensor([ 1.4868, -0.2016, -1.4585, 0.3126]) tensor([0.6466, 0.1195, 0.0340, 0.1999]) -Greedy action tensor([ 1.4144, -0.4732, -0.2461, 0.4864]) tensor([0.5758, 0.0872, 0.1094, 0.2276]) -Greedy action tensor([ 1.5649, -0.6265, -0.0942, 0.2465]) tensor([0.6371, 0.0712, 0.1212, 0.1705]) -Greedy action tensor([ 1.5705, -0.2377, -0.7040, 0.7861]) tensor([0.5803, 0.0951, 0.0597, 0.2649]) -Greedy action tensor([ 1.2434, -0.2862, -0.3388, -0.0474]) tensor([0.5892, 0.1276, 0.1211, 0.1621]) -Greedy action tensor([ 2.0280, -0.7677, -0.1286, 0.0305]) tensor([0.7619, 0.0465, 0.0882, 0.1034]) -Greedy action tensor([ 1.0942, -0.2952, -0.9837, 0.1949]) tensor([0.5614, 0.1399, 0.0703, 0.2284]) -Greedy action tensor([ 1.1022, -0.1490, -0.5338, 0.4451]) tensor([0.5002, 0.1431, 0.0974, 0.2593]) -Greedy action tensor([ 1.3718, -0.6815, -0.4795, 0.4669]) tensor([0.5918, 0.0759, 0.0929, 0.2394]) -Greedy action tensor([ 1.2214, -0.6732, 0.1266, 0.6278]) tensor([0.4908, 0.0738, 0.1642, 0.2711]) -Greedy action tensor([ 1.9709, -1.1267, -0.4800, 0.1387]) tensor([0.7743, 0.0350, 0.0668, 0.1239]) -Greedy action tensor([ 1.7091, 0.0092, -0.2470, 0.2835]) tensor([0.6392, 0.1168, 0.0904, 0.1536]) -Greedy action tensor([ 1.7250, -0.7963, -0.8846, 0.2397]) tensor([0.7245, 0.0582, 0.0533, 0.1640]) -Greedy action tensor([ 1.7907, -0.6732, -0.0731, 0.3924]) tensor([0.6724, 0.0572, 0.1043, 0.1661]) -Greedy action tensor([ 1.5430, 0.6073, -0.1185, 0.2943]) tensor([0.5350, 0.2099, 0.1016, 0.1535]) -Greedy action tensor([ 1.4740, -0.5314, -0.3626, -0.0168]) tensor([0.6583, 0.0886, 0.1049, 0.1482]) -Greedy action tensor([ 0.7810, -0.2342, 0.1720, 0.1848]) tensor([0.4070, 0.1475, 0.2213, 0.2242]) -Greedy action tensor([ 1.1416, 0.0985, 0.1320, -0.5954]) tensor([0.5283, 0.1862, 0.1925, 0.0930]) -Greedy action tensor([0.6526, 0.5991, 0.2206, 0.1340]) tensor([0.3132, 0.2969, 0.2034, 0.1865]) -Greedy action tensor([ 0.7768, -0.5800, -0.6755, -0.3839]) tensor([0.5541, 0.1427, 0.1297, 0.1736]) -Greedy action tensor([ 1.6923, -0.8410, 1.4080, 0.8185]) tensor([0.4446, 0.0353, 0.3346, 0.1856]) -Greedy action tensor([-0.1482, -2.0419, 0.8549, 0.9818]) tensor([0.1434, 0.0216, 0.3910, 0.4440]) -Greedy action tensor([-1.0245, -0.8230, 0.0483, -1.3182]) tensor([0.1697, 0.2076, 0.4962, 0.1265]) -Greedy action tensor([ 0.5387, -0.9078, -0.9653, -0.3787]) tensor([0.5384, 0.1267, 0.1197, 0.2152]) -Greedy action tensor([ 0.6624, -1.4973, -0.5424, 0.1627]) tensor([0.4946, 0.0571, 0.1483, 0.3001]) -Greedy action tensor([-0.3274, -2.0539, -0.2258, 0.3842]) tensor([0.2314, 0.0412, 0.2561, 0.4714]) -Greedy action tensor([-1.0866, -0.3983, -0.0284, -0.1758]) tensor([0.1196, 0.2381, 0.3447, 0.2975]) -Greedy action tensor([ 0.1104, 0.2018, -0.7458, -0.2759]) tensor([0.3125, 0.3424, 0.1327, 0.2124]) -Greedy action tensor([-0.1778, -1.4513, -0.9667, -0.3305]) tensor([0.3857, 0.1079, 0.1753, 0.3311]) -Greedy action tensor([-0.0140, -0.3501, 0.6082, -0.7871]) tensor([0.2476, 0.1769, 0.4612, 0.1143]) -Greedy action tensor([ 0.8788, -1.2050, -0.1744, 0.0126]) tensor([0.5280, 0.0657, 0.1842, 0.2221]) -Greedy action tensor([ 0.5585, -1.2653, 0.0575, -0.4377]) tensor([0.4680, 0.0755, 0.2836, 0.1728]) -Greedy action tensor([ 1.5416, -0.2766, 0.7303, 0.6877]) tensor([0.4920, 0.0799, 0.2186, 0.2095]) -Greedy action tensor([ 1.0038, 0.2637, -0.5721, -0.1561]) tensor([0.5006, 0.2389, 0.1035, 0.1570]) -Greedy action tensor([-0.5281, -0.3995, 0.6233, -1.0455]) tensor([0.1696, 0.1929, 0.5364, 0.1011]) -Greedy action tensor([-0.4051, -0.6888, 0.3311, -1.0842]) tensor([0.2300, 0.1732, 0.4802, 0.1166]) -Greedy action tensor([ 0.4461, 0.1755, 0.0987, -0.8025]) tensor([0.3628, 0.2768, 0.2563, 0.1041]) -Greedy action tensor([-0.4557, -0.3933, 0.8809, -1.1549]) tensor([0.1571, 0.1672, 0.5977, 0.0781]) -Greedy action tensor([ 0.4949, 0.5582, -0.1729, -0.9025]) tensor([0.3539, 0.3771, 0.1815, 0.0875]) -Greedy action tensor([ 0.5268, -0.1888, 0.9867, -0.5743]) tensor([0.2937, 0.1436, 0.4651, 0.0976]) -Greedy action tensor([-0.2479, -0.8034, -0.7819, 0.6998]) tensor([0.2110, 0.1211, 0.1237, 0.5443]) -Greedy action tensor([-0.3321, -0.5628, 1.1968, -0.0051]) tensor([0.1283, 0.1019, 0.5919, 0.1779]) -Greedy action tensor([ 1.2942, -1.1215, -0.0055, 1.4451]) tensor([0.3961, 0.0354, 0.1080, 0.4606]) -Greedy action tensor([-0.3808, -0.3739, -0.9515, -0.4548]) tensor([0.2857, 0.2876, 0.1614, 0.2653]) -Greedy action tensor([-0.0315, -0.8349, -0.0953, 0.0378]) tensor([0.2892, 0.1295, 0.2713, 0.3099]) -Greedy action tensor([-0.2555, -1.4543, -0.0862, 0.2089]) tensor([0.2453, 0.0740, 0.2905, 0.3903]) -Greedy action tensor([ 0.4383, 0.3692, 0.0136, -0.4556]) tensor([0.3337, 0.3115, 0.2183, 0.1365]) -Greedy action tensor([-0.6584, -0.1596, -1.2747, -0.0124]) tensor([0.1963, 0.3232, 0.1060, 0.3745]) -Greedy action tensor([ 0.3650, -0.4733, -0.4783, -0.0699]) tensor([0.3984, 0.1723, 0.1714, 0.2579]) -Greedy action tensor([ 0.8818, -0.0605, -0.4269, 0.1801]) tensor([0.4639, 0.1808, 0.1253, 0.2300]) -Greedy action tensor([ 0.2256, -0.5401, -0.1926, 0.1467]) tensor([0.3282, 0.1526, 0.2160, 0.3033]) -Greedy action tensor([-1.1922, -0.6831, 0.8732, -1.0082]) tensor([0.0851, 0.1415, 0.6711, 0.1023]) -Greedy action tensor([1.2314, 0.0295, 0.0323, 0.1448]) tensor([0.5156, 0.1550, 0.1554, 0.1739]) -Greedy action tensor([-0.2615, 0.1849, -1.0451, -0.3034]) tensor([0.2514, 0.3928, 0.1148, 0.2410]) -Greedy action tensor([ 1.2392, -0.3955, -0.2869, -0.0823]) tensor([0.5955, 0.1161, 0.1295, 0.1588]) -Greedy action tensor([ 0.0285, -0.9876, 0.4114, -0.4856]) tensor([0.2918, 0.1056, 0.4280, 0.1745]) -Greedy action tensor([-0.7094, 0.2879, -0.8339, 0.0875]) tensor([0.1468, 0.3979, 0.1296, 0.3257]) -Greedy action tensor([-0.2933, -0.6189, 1.6052, -0.5848]) tensor([0.1093, 0.0790, 0.7300, 0.0817]) -Greedy action tensor([-0.5930, -1.7241, -0.2411, -0.1876]) tensor([0.2356, 0.0760, 0.3350, 0.3534]) -Greedy action tensor([ 1.4966, 0.0959, 0.3134, -0.5723]) tensor([0.5956, 0.1468, 0.1824, 0.0752]) -Greedy action tensor([-0.1017, -0.1116, 0.2636, -0.9900]) tensor([0.2603, 0.2577, 0.3750, 0.1071]) -Greedy action tensor([ 1.0493, -1.3619, 0.2203, 1.2408]) tensor([0.3653, 0.0328, 0.1595, 0.4424]) -Greedy action tensor([ 0.0280, 0.2482, -0.4080, 0.0458]) tensor([0.2557, 0.3187, 0.1653, 0.2603]) -Greedy action tensor([-0.7068, 0.2898, 1.1228, -0.9584]) tensor([0.0933, 0.2528, 0.5814, 0.0725]) -Greedy action tensor([ 0.9369, -0.1457, 0.2875, 0.4203]) tensor([0.4069, 0.1378, 0.2125, 0.2427]) -Greedy action tensor([-0.4288, -0.5977, -0.2905, -0.1960]) tensor([0.2350, 0.1985, 0.2699, 0.2966]) -Greedy action tensor([ 0.0302, -0.1877, 0.0323, -0.3866]) tensor([0.2886, 0.2321, 0.2892, 0.1902]) -Greedy action tensor([ 1.2183, 0.1004, 0.3839, -0.0405]) tensor([0.4890, 0.1599, 0.2123, 0.1389]) -Greedy action tensor([-0.4210, -0.6764, -0.5544, -0.4684]) tensor([0.2775, 0.2150, 0.2429, 0.2647]) -Greedy action tensor([ 0.9830, -0.5644, -0.2363, 0.9557]) tensor([0.4030, 0.0858, 0.1191, 0.3922]) -Greedy action tensor([ 1.3814, -1.0101, -0.0523, 0.9842]) tensor([0.4995, 0.0457, 0.1191, 0.3358]) -Greedy action tensor([-0.2212, -0.9640, 0.8255, -0.7456]) tensor([0.2034, 0.0968, 0.5794, 0.1204]) -Greedy action tensor([-0.9728, -0.7659, 0.4649, -0.5667]) tensor([0.1259, 0.1549, 0.5302, 0.1890]) -Greedy action tensor([1.0817, 0.6760, 0.7830, 0.6813]) tensor([0.3248, 0.2165, 0.2410, 0.2177]) -Greedy action tensor([-0.3723, -1.4107, -0.5813, 0.5296]) tensor([0.2160, 0.0765, 0.1753, 0.5323]) -Greedy action tensor([-1.0701, -0.9145, -0.0739, -0.5772]) tensor([0.1535, 0.1794, 0.4158, 0.2513]) -Greedy action tensor([-1.3053, -1.3514, -1.1588, 0.0884]) tensor([0.1400, 0.1337, 0.1621, 0.5642]) -Greedy action tensor([-0.2883, -1.5700, 0.1205, -0.5643]) tensor([0.2824, 0.0784, 0.4250, 0.2143]) -Greedy action tensor([ 0.4359, -1.0957, -0.1221, 0.0077]) tensor([0.4098, 0.0886, 0.2346, 0.2671]) -Greedy action tensor([-0.2695, -0.0815, -0.1724, -1.1470]) tensor([0.2685, 0.3240, 0.2959, 0.1116]) -Greedy action tensor([-0.8276, -0.6676, -1.2170, 0.0540]) tensor([0.1899, 0.2229, 0.1287, 0.4586]) -Greedy action tensor([-0.5441, 0.4702, 0.4884, -0.2491]) tensor([0.1264, 0.3487, 0.3550, 0.1698]) -Greedy action tensor([-0.4660, -0.9106, 0.4125, -0.8694]) tensor([0.2120, 0.1359, 0.5104, 0.1416]) -Greedy action tensor([ 1.2429, -0.1227, -0.0747, 0.1790]) tensor([0.5353, 0.1366, 0.1433, 0.1847]) -Greedy action tensor([-0.3552, -1.6355, 1.6842, -0.3518]) tensor([0.1003, 0.0279, 0.7711, 0.1007]) -Greedy action tensor([-9.8312e-04, -1.0612e+00, 5.4274e-01, -8.1523e-01]) tensor([0.2848, 0.0986, 0.4905, 0.1261]) -Greedy action tensor([ 1.3786, -0.7322, 1.2541, -0.0533]) tensor([0.4458, 0.0540, 0.3937, 0.1065]) -Greedy action tensor([-0.2029, 0.1350, 0.1932, -0.4249]) tensor([0.2133, 0.2990, 0.3169, 0.1708]) -Greedy action tensor([ 1.0581, -0.1081, -0.7343, 0.7886]) tensor([0.4460, 0.1390, 0.0743, 0.3407]) -Greedy action tensor([ 0.5472, -0.4263, -0.3627, 0.0256]) tensor([0.4212, 0.1591, 0.1696, 0.2500]) -Greedy action tensor([-0.6501, -0.6585, -1.0435, -0.6814]) tensor([0.2751, 0.2728, 0.1856, 0.2666]) -Greedy action tensor([-0.2863, 0.2457, -0.7769, 0.0315]) tensor([0.2133, 0.3631, 0.1306, 0.2931]) -Greedy action tensor([-0.1452, -1.5224, 0.4371, -0.7998]) tensor([0.2807, 0.0708, 0.5025, 0.1459]) -Greedy action tensor([-0.0411, -0.4875, 0.7037, -0.7606]) tensor([0.2362, 0.1512, 0.4975, 0.1151]) -Greedy action tensor([ 0.9630, 0.5446, -0.1809, -0.0956]) tensor([0.4304, 0.2832, 0.1371, 0.1493]) -Greedy action tensor([-0.9147, -0.7638, 0.8584, -1.7681]) tensor([0.1180, 0.1372, 0.6946, 0.0502]) -Greedy action tensor([ 0.4484, -1.7413, 1.9881, -0.7709]) tensor([0.1647, 0.0184, 0.7682, 0.0487]) -Greedy action tensor([-1.8536, -0.3935, 0.6135, -0.1458]) tensor([0.0442, 0.1905, 0.5213, 0.2440]) -Greedy action tensor([-1.9106, -0.4257, 0.6526, -0.1596]) tensor([0.0414, 0.1828, 0.5373, 0.2385]) -Greedy action tensor([-1.8885, -0.4207, 0.6400, -0.1519]) tensor([0.0425, 0.1843, 0.5322, 0.2411]) -Greedy action tensor([-0.7852, 0.3534, 0.5418, 0.1793]) tensor([0.0951, 0.2969, 0.3585, 0.2495]) -Greedy action tensor([-1.4527, -0.3880, 0.7778, 0.7881]) tensor([0.0442, 0.1283, 0.4116, 0.4159]) -Greedy action tensor([-1.9117, -0.4078, 0.6520, -0.1614]) tensor([0.0413, 0.1856, 0.5357, 0.2375]) -Greedy action tensor([-1.8134, -0.4249, 0.6161, -0.1039]) tensor([0.0457, 0.1832, 0.5187, 0.2525]) -Greedy action tensor([-1.8996, -0.4265, 0.6421, -0.1589]) tensor([0.0421, 0.1836, 0.5344, 0.2399]) -Greedy action tensor([-1.9411, -0.4437, 0.6677, -0.1753]) tensor([0.0402, 0.1795, 0.5455, 0.2348]) -Greedy action tensor([-1.8035, -0.2864, 0.6312, -0.0901]) tensor([0.0444, 0.2025, 0.5068, 0.2464]) -Greedy action tensor([-1.8737, -0.4408, 0.6442, -0.1395]) tensor([0.0430, 0.1802, 0.5333, 0.2436]) -Greedy action tensor([-1.9370, -0.4282, 0.6600, -0.1750]) tensor([0.0404, 0.1825, 0.5420, 0.2351]) -Greedy action tensor([-1.8921, -0.3529, 0.6377, -0.1487]) tensor([0.0418, 0.1948, 0.5245, 0.2389]) -Greedy action tensor([-1.9223, -0.3996, 0.6462, -0.1748]) tensor([0.0410, 0.1881, 0.5353, 0.2355]) -Greedy action tensor([-1.9204, -0.4793, 0.6871, -0.1762]) tensor([0.0408, 0.1724, 0.5534, 0.2334]) -Greedy action tensor([0.3008, 1.2719, 0.0278, 0.4444]) tensor([0.1800, 0.4753, 0.1370, 0.2078]) -Greedy action tensor([-1.8776, -0.4880, 0.6490, -0.1392]) tensor([0.0431, 0.1729, 0.5390, 0.2451]) -Greedy action tensor([-1.8715, -0.4037, 0.6307, -0.1306]) tensor([0.0430, 0.1866, 0.5251, 0.2453]) -Greedy action tensor([-1.6330, -0.1756, 0.4816, -0.0031]) tensor([0.0535, 0.2299, 0.4435, 0.2731]) -Greedy action tensor([-0.7659, -0.0944, 0.2443, -0.1378]) tensor([0.1320, 0.2583, 0.3624, 0.2473]) -Greedy action tensor([-0.8137, -0.2126, 0.2436, -0.4427]) tensor([0.1398, 0.2550, 0.4025, 0.2026]) -Greedy action tensor([-1.9090, -0.4486, 0.6535, -0.1606]) tensor([0.0416, 0.1793, 0.5399, 0.2392]) -Greedy action tensor([-0.7645, 0.6587, 0.0934, -0.0224]) tensor([0.1041, 0.4319, 0.2454, 0.2186]) -Greedy action tensor([-1.8033, -0.3750, 0.6368, -0.0858]) tensor([0.0450, 0.1878, 0.5165, 0.2507]) -Greedy action tensor([-1.8743, -0.3825, 0.6078, -0.1327]) tensor([0.0433, 0.1923, 0.5176, 0.2468]) -Greedy action tensor([-1.8847, -0.4439, 0.6373, -0.1508]) tensor([0.0428, 0.1810, 0.5336, 0.2426]) -Greedy action tensor([-1.8799, -0.3109, 0.6312, -0.1337]) tensor([0.0419, 0.2013, 0.5164, 0.2403]) -Greedy action tensor([-0.6989, 0.2350, 0.4544, 0.6171]) tensor([0.0958, 0.2437, 0.3035, 0.3571]) -Greedy action tensor([-1.8943, -0.3056, 0.6292, -0.1501]) tensor([0.0415, 0.2033, 0.5177, 0.2375]) -Greedy action tensor([-1.7918, -0.3151, 0.5921, -0.1633]) tensor([0.0469, 0.2054, 0.5087, 0.2390]) -Greedy action tensor([-1.8834, -0.4544, 0.6401, -0.1498]) tensor([0.0429, 0.1791, 0.5351, 0.2429]) -Greedy action tensor([-1.6087, 0.5481, 0.4058, 0.0458]) tensor([0.0447, 0.3864, 0.3351, 0.2338]) -Greedy action tensor([-1.8750, -0.4496, 0.6378, -0.1419]) tensor([0.0432, 0.1796, 0.5329, 0.2443]) -Greedy action tensor([-1.5988, 0.1089, 0.4603, 0.0035]) tensor([0.0518, 0.2855, 0.4058, 0.2570]) -Greedy action tensor([-1.7253, -0.5027, 0.5609, -0.0533]) tensor([0.0511, 0.1737, 0.5030, 0.2722]) -Greedy action tensor([-1.4033, 0.2022, 0.2376, 0.1450]) tensor([0.0631, 0.3144, 0.3257, 0.2969]) -Greedy action tensor([-1.8177, -0.4658, 0.6177, -0.0835]) tensor([0.0456, 0.1761, 0.5203, 0.2581]) -Greedy action tensor([-1.8179, -0.4334, 0.6027, -0.1298]) tensor([0.0462, 0.1844, 0.5197, 0.2498]) -Greedy action tensor([-1.8710, -0.2053, 0.5906, -0.1340]) tensor([0.0422, 0.2232, 0.4948, 0.2397]) -Greedy action tensor([-1.9084, -0.4008, 0.6572, -0.1309]) tensor([0.0409, 0.1848, 0.5323, 0.2420]) -Greedy action tensor([-1.8824, -0.4623, 0.6404, -0.1417]) tensor([0.0429, 0.1776, 0.5348, 0.2447]) -Greedy action tensor([-1.8431, -0.4135, 0.6160, -0.1319]) tensor([0.0446, 0.1864, 0.5219, 0.2470]) -Greedy action tensor([-1.6424, -0.3468, 0.6442, 0.0354]) tensor([0.0504, 0.1840, 0.4958, 0.2697]) -Greedy action tensor([-0.8821, 0.6569, 0.1547, -0.0676]) tensor([0.0931, 0.4340, 0.2626, 0.2103]) -Greedy action tensor([-1.9146, -0.4304, 0.6543, -0.1656]) tensor([0.0413, 0.1822, 0.5391, 0.2374]) -Greedy action tensor([-1.7369, 0.1341, 0.4987, -0.0293]) tensor([0.0447, 0.2904, 0.4182, 0.2466]) -Greedy action tensor([-1.9015, -0.4118, 0.6411, -0.1565]) tensor([0.0419, 0.1858, 0.5325, 0.2398]) -Greedy action tensor([-1.7463, -0.3025, 0.5776, -0.0245]) tensor([0.0475, 0.2013, 0.4854, 0.2658]) -Greedy action tensor([-1.9223, -0.4369, 0.6481, -0.1668]) tensor([0.0412, 0.1820, 0.5385, 0.2384]) -Greedy action tensor([-1.9327, -0.4400, 0.6666, -0.1716]) tensor([0.0404, 0.1800, 0.5442, 0.2354]) -Greedy action tensor([-1.5197, -0.3125, 0.4284, 0.0681]) tensor([0.0615, 0.2058, 0.4317, 0.3011]) -Greedy action tensor([-1.7748, -0.2433, 0.6124, -0.2940]) tensor([0.0478, 0.2213, 0.5206, 0.2103]) -Greedy action tensor([-1.5413, 0.3880, 0.3558, 0.2798]) tensor([0.0482, 0.3321, 0.3216, 0.2981]) -Greedy action tensor([-1.9197, -0.4702, 0.6719, -0.1460]) tensor([0.0408, 0.1739, 0.5448, 0.2405]) -Greedy action tensor([-1.9018, -0.4363, 0.6485, -0.1468]) tensor([0.0418, 0.1810, 0.5355, 0.2417]) -Greedy action tensor([-1.9409, -0.4530, 0.6696, -0.1783]) tensor([0.0402, 0.1781, 0.5473, 0.2344]) -Greedy action tensor([-1.8975, -0.4523, 0.6485, -0.1560]) tensor([0.0422, 0.1790, 0.5381, 0.2407]) -Greedy action tensor([-1.8357, -0.1821, 0.5889, -0.0619]) tensor([0.0427, 0.2232, 0.4825, 0.2517]) -Greedy action tensor([-0.7848, 0.0498, 0.4049, 0.7944]) tensor([0.0874, 0.2014, 0.2872, 0.4240]) -Greedy action tensor([-1.0733, 0.8256, 0.2089, 0.0458]) tensor([0.0697, 0.4656, 0.2513, 0.2135]) -Greedy action tensor([-1.5341, -0.5297, 0.4835, -0.0836]) tensor([0.0645, 0.1760, 0.4847, 0.2749]) -Greedy action tensor([-1.7225, 0.1377, 0.4755, -0.0300]) tensor([0.0457, 0.2939, 0.4119, 0.2485]) -Greedy action tensor([-1.8536, -0.4615, 0.6272, -0.1397]) tensor([0.0444, 0.1786, 0.5306, 0.2464]) -Greedy action tensor([-1.7200, -0.4662, 0.5544, -0.0690]) tensor([0.0514, 0.1802, 0.5002, 0.2682]) -Greedy action tensor([-1.6732, -0.2045, 0.4966, -0.0739]) tensor([0.0525, 0.2280, 0.4597, 0.2598]) -Greedy action tensor([-1.9055, -0.4579, 0.6476, -0.1639]) tensor([0.0420, 0.1786, 0.5396, 0.2397]) -Greedy action tensor([-1.4441, 0.5925, 0.2754, 0.1941]) tensor([0.0516, 0.3952, 0.2878, 0.2654]) -Greedy action tensor([-1.0706, 0.3081, 0.1966, 0.0630]) tensor([0.0860, 0.3414, 0.3054, 0.2672]) -Greedy action tensor([-1.1883, -0.1037, 0.5416, 0.2764]) tensor([0.0718, 0.2124, 0.4051, 0.3107]) -Greedy action tensor([-1.8646, -0.2508, 0.6056, -0.1294]) tensor([0.0425, 0.2135, 0.5028, 0.2411]) -Greedy action tensor([-1.6114, -0.2048, 0.5544, 0.0040]) tensor([0.0531, 0.2167, 0.4631, 0.2671]) -Greedy action tensor([-0.7467, -0.4008, 0.2507, -0.0712]) tensor([0.1411, 0.1993, 0.3824, 0.2772]) -Greedy action tensor([-1.6662, -0.3802, 0.5180, -0.0887]) tensor([0.0545, 0.1972, 0.4843, 0.2640]) -Greedy action tensor([-1.8737, -0.4407, 0.6367, -0.1383]) tensor([0.0432, 0.1809, 0.5312, 0.2447]) -Greedy action tensor([-0.9628, -0.4616, 0.3514, 0.8149]) tensor([0.0814, 0.1343, 0.3029, 0.4814]) -Greedy action tensor([-1.7382, -0.5661, 0.6359, -0.1130]) tensor([0.0499, 0.1610, 0.5358, 0.2533]) -Greedy action tensor([-1.7498, -0.3253, 0.5402, -0.0415]) tensor([0.0487, 0.2022, 0.4805, 0.2686]) -Greedy action tensor([-1.5468, -0.3280, 0.4583, -0.0546]) tensor([0.0615, 0.2081, 0.4568, 0.2736]) -Greedy action tensor([-1.9202, -0.4549, 0.6612, -0.1665]) tensor([0.0411, 0.1780, 0.5434, 0.2375]) -Greedy action tensor([-1.8809, -0.4386, 0.6345, -0.1490]) tensor([0.0430, 0.1819, 0.5320, 0.2430]) -Greedy action tensor([-1.3289, -0.3442, 0.2947, 0.1697]) tensor([0.0756, 0.2024, 0.3835, 0.3384]) -Greedy action tensor([ 0.7075, -0.5166, -0.0277, -0.4050]) tensor([0.4757, 0.1399, 0.2280, 0.1564]) -Greedy action tensor([ 0.2880, 0.1077, -0.3396, -0.5908]) tensor([0.3592, 0.2999, 0.1918, 0.1492]) -Greedy action tensor([ 0.6297, -0.2262, -0.1106, -0.4961]) tensor([0.4492, 0.1909, 0.2143, 0.1457]) -Greedy action tensor([ 0.7331, -0.4685, -0.0864, -0.2649]) tensor([0.4739, 0.1425, 0.2088, 0.1747]) -Greedy action tensor([ 0.8709, -0.6957, -0.1022, -0.3587]) tensor([0.5322, 0.1111, 0.2011, 0.1556]) -Greedy action tensor([ 0.8075, -0.6869, -0.1903, -0.2651]) tensor([0.5168, 0.1159, 0.1905, 0.1768]) -Greedy action tensor([ 0.8457, -0.4194, -0.1862, -0.1433]) tensor([0.4974, 0.1404, 0.1772, 0.1850]) -Greedy action tensor([ 0.7406, -0.6619, -0.0613, -0.3270]) tensor([0.4906, 0.1207, 0.2200, 0.1687]) -Greedy action tensor([ 1.0967, -0.7464, 0.1868, -0.4141]) tensor([0.5613, 0.0889, 0.2259, 0.1239]) -Greedy action tensor([ 0.9031, -0.4272, -0.0884, -0.4791]) tensor([0.5301, 0.1402, 0.1967, 0.1331]) -Greedy action tensor([ 0.6629, -0.3985, -0.2295, -0.3280]) tensor([0.4702, 0.1627, 0.1926, 0.1745]) -Greedy action tensor([ 0.9009, -0.5442, -0.1308, -0.3271]) tensor([0.5305, 0.1250, 0.1891, 0.1554]) -Greedy action tensor([ 0.6180, -0.2666, 0.0723, -0.1461]) tensor([0.4068, 0.1680, 0.2357, 0.1895]) -Greedy action tensor([ 0.8805, -0.2031, -0.0398, -0.1057]) tensor([0.4740, 0.1604, 0.1888, 0.1768]) -Greedy action tensor([ 0.3212, -0.0773, -0.0090, -0.2845]) tensor([0.3406, 0.2287, 0.2448, 0.1859]) -Greedy action tensor([ 0.4859, -0.1697, 0.0582, 0.1012]) tensor([0.3507, 0.1820, 0.2286, 0.2387]) -Greedy action tensor([ 0.9043, -0.9930, 0.1093, -0.3236]) tensor([0.5278, 0.0792, 0.2384, 0.1546]) -Greedy action tensor([ 0.7488, -0.3783, -0.0344, -0.1816]) tensor([0.4597, 0.1489, 0.2101, 0.1813]) -Greedy action tensor([ 0.5012, -0.3648, -0.0394, -0.5104]) tensor([0.4225, 0.1777, 0.2461, 0.1536]) -Greedy action tensor([ 0.0778, -0.0224, -0.2358, 0.0314]) tensor([0.2785, 0.2520, 0.2036, 0.2659]) -Greedy action tensor([ 0.8456, -0.5320, -0.1027, -0.3284]) tensor([0.5131, 0.1294, 0.1988, 0.1586]) -Greedy action tensor([ 0.9679, -0.6860, 0.0068, -0.3603]) tensor([0.5439, 0.1040, 0.2080, 0.1441]) -Greedy action tensor([ 0.6635, -0.5057, 0.0581, -0.4776]) tensor([0.4596, 0.1427, 0.2509, 0.1468]) -Greedy action tensor([ 0.5665, -0.1493, -0.1508, -0.1022]) tensor([0.4017, 0.1964, 0.1961, 0.2058]) -Greedy action tensor([ 0.3481, -0.0282, -0.0248, -0.1965]) tensor([0.3384, 0.2323, 0.2331, 0.1963]) -Greedy action tensor([ 0.9048, -0.6867, 0.0132, -0.4074]) tensor([0.5311, 0.1081, 0.2177, 0.1430]) -Greedy action tensor([ 0.7689, -0.8224, 0.1329, -0.4363]) tensor([0.4920, 0.1002, 0.2604, 0.1474]) -Greedy action tensor([ 0.9510, -0.3059, 0.1340, -0.4069]) tensor([0.5042, 0.1435, 0.2227, 0.1297]) -Greedy action tensor([ 0.7357, -0.3380, -0.0135, -0.0828]) tensor([0.4433, 0.1515, 0.2096, 0.1956]) -Greedy action tensor([ 0.6786, -0.7834, 0.2021, -0.7397]) tensor([0.4774, 0.1106, 0.2964, 0.1156]) -Greedy action tensor([ 1.2575, -0.4459, -0.2320, -0.6362]) tensor([0.6418, 0.1169, 0.1447, 0.0966]) -Greedy action tensor([ 0.5097, -0.5914, 0.2142, -0.5221]) tensor([0.4110, 0.1367, 0.3059, 0.1465]) -Greedy action tensor([ 0.9207, -0.6246, 0.0703, -0.3734]) tensor([0.5223, 0.1114, 0.2231, 0.1432]) -Greedy action tensor([ 0.7943, -0.1552, -0.0607, -0.1387]) tensor([0.4534, 0.1754, 0.1928, 0.1783]) -Greedy action tensor([ 0.6511, -0.3190, -0.0576, -0.1410]) tensor([0.4303, 0.1631, 0.2118, 0.1949]) -Greedy action tensor([ 0.8760, -0.6734, 0.1804, -0.1960]) tensor([0.4870, 0.1034, 0.2429, 0.1667]) -Greedy action tensor([ 0.2100, 0.3736, -0.3638, -0.0649]) tensor([0.2857, 0.3364, 0.1609, 0.2170]) -Greedy action tensor([ 0.3783, -0.5873, -0.1794, -0.1266]) tensor([0.3911, 0.1489, 0.2239, 0.2361]) -Greedy action tensor([ 0.9944, -0.5640, -0.2132, -0.1854]) tensor([0.5504, 0.1159, 0.1645, 0.1692]) -Greedy action tensor([ 0.5589, -0.2176, -0.0547, -0.4717]) tensor([0.4240, 0.1951, 0.2296, 0.1513]) -Greedy action tensor([ 0.8668, -0.6475, -0.0610, -0.3324]) tensor([0.5217, 0.1148, 0.2063, 0.1573]) -Greedy action tensor([ 0.9523, -0.7068, 0.0511, -0.4558]) tensor([0.5432, 0.1034, 0.2206, 0.1329]) -Greedy action tensor([ 1.2835, -0.6735, -0.0598, -0.5685]) tensor([0.6414, 0.0906, 0.1674, 0.1006]) -Greedy action tensor([ 0.8562, -0.0759, -0.0439, -0.4756]) tensor([0.4844, 0.1907, 0.1969, 0.1279]) -Greedy action tensor([ 1.1723, -0.5746, 0.0939, -0.4909]) tensor([0.5869, 0.1023, 0.1996, 0.1112]) -Greedy action tensor([ 0.7303, -0.4900, -0.0845, -0.2013]) tensor([0.4691, 0.1385, 0.2077, 0.1848]) -Greedy action tensor([ 0.6906, -0.0038, 0.0623, 0.0022]) tensor([0.3944, 0.1970, 0.2104, 0.1982]) -Greedy action tensor([ 0.7280, -0.2784, -0.0856, -0.0903]) tensor([0.4445, 0.1625, 0.1970, 0.1961]) -Greedy action tensor([ 0.6465, -0.3862, -0.0032, -0.1180]) tensor([0.4267, 0.1519, 0.2228, 0.1986]) -Greedy action tensor([ 0.5561, -0.2606, 0.0558, -0.2539]) tensor([0.4011, 0.1772, 0.2432, 0.1784]) -Greedy action tensor([ 0.8703, -0.7469, 0.1254, -0.3656]) tensor([0.5092, 0.1011, 0.2418, 0.1480]) -Greedy action tensor([ 0.2238, 0.2466, -0.0757, 0.0690]) tensor([0.2762, 0.2825, 0.2047, 0.2366]) -Greedy action tensor([ 0.8962, -1.1711, 0.0697, -0.4945]) tensor([0.5516, 0.0698, 0.2414, 0.1373]) -Greedy action tensor([ 1.1306, -0.6408, -0.0736, -0.3510]) tensor([0.5892, 0.1002, 0.1767, 0.1339]) -Greedy action tensor([ 1.0341, -0.3245, 0.0381, -0.4679]) tensor([0.5408, 0.1390, 0.1998, 0.1204]) -Greedy action tensor([ 0.8863, -0.7517, 0.0061, -0.2848]) tensor([0.5211, 0.1013, 0.2161, 0.1615]) -Greedy action tensor([ 0.6535, -0.2495, 0.0577, -0.4055]) tensor([0.4342, 0.1760, 0.2393, 0.1506]) -Greedy action tensor([ 1.0423, -0.5459, -0.0184, -0.2579]) tensor([0.5486, 0.1121, 0.1899, 0.1495]) -Greedy action tensor([ 1.3094, -0.7201, -0.0532, -1.0354]) tensor([0.6742, 0.0886, 0.1726, 0.0646]) -Greedy action tensor([ 0.6864, -0.3329, 0.0203, -0.3175]) tensor([0.4462, 0.1610, 0.2292, 0.1635]) -Greedy action tensor([ 0.8533, -0.4234, -0.1038, -0.2500]) tensor([0.5013, 0.1398, 0.1925, 0.1663]) -Greedy action tensor([ 0.7303, -0.4988, 0.2000, -0.6177]) tensor([0.4671, 0.1367, 0.2749, 0.1213]) -Greedy action tensor([ 0.6580, -0.3789, -0.0975, -0.1469]) tensor([0.4402, 0.1561, 0.2068, 0.1968]) -Greedy action tensor([ 1.0469, -0.7553, 0.0018, -0.2373]) tensor([0.5576, 0.0920, 0.1961, 0.1544]) -Greedy action tensor([ 0.6546, -0.3558, -0.0093, -0.3678]) tensor([0.4467, 0.1626, 0.2300, 0.1607]) -Greedy action tensor([ 0.9038, -0.3657, -0.0424, -0.1664]) tensor([0.4970, 0.1396, 0.1929, 0.1704]) -Greedy action tensor([ 0.6850, -0.5381, -0.1224, -0.0749]) tensor([0.4529, 0.1333, 0.2020, 0.2118]) -Greedy action tensor([ 0.9066, -0.6855, -0.0578, -0.2523]) tensor([0.5267, 0.1072, 0.2008, 0.1653]) -Greedy action tensor([ 1.2227, -0.2675, -0.0279, -0.1333]) tensor([0.5652, 0.1274, 0.1618, 0.1456]) -Greedy action tensor([ 0.6155, -0.1745, -0.0668, -0.4930]) tensor([0.4368, 0.1982, 0.2208, 0.1442]) -Greedy action tensor([ 0.9871, 0.0936, 0.1586, -0.1803]) tensor([0.4636, 0.1897, 0.2024, 0.1443]) -Greedy action tensor([ 0.8240, -0.4895, -0.0185, -0.4443]) tensor([0.5048, 0.1357, 0.2174, 0.1420]) -Greedy action tensor([ 0.4827, -0.0627, 0.1500, -0.3195]) tensor([0.3643, 0.2112, 0.2612, 0.1633]) -Greedy action tensor([ 0.8352, -0.8008, 0.2058, -0.6397]) tensor([0.5111, 0.0995, 0.2724, 0.1169]) -Greedy action tensor([ 0.7345, -0.5452, 0.0527, -0.3807]) tensor([0.4735, 0.1317, 0.2395, 0.1553]) -Greedy action tensor([ 0.5495, -0.4572, -0.1255, -0.0641]) tensor([0.4139, 0.1513, 0.2107, 0.2241]) -Greedy action tensor([ 0.8790, -0.9318, 0.0652, -0.5062]) tensor([0.5385, 0.0881, 0.2387, 0.1348]) -Greedy action tensor([ 0.6878, -0.3529, 0.0143, -0.2532]) tensor([0.4438, 0.1568, 0.2263, 0.1732]) -Greedy action tensor([ 0.2068, -0.0118, -0.0966, 0.0265]) tensor([0.2961, 0.2380, 0.2186, 0.2473]) -Greedy action tensor([ 0.7945, -0.6943, 0.0595, -0.4197]) tensor([0.4995, 0.1127, 0.2395, 0.1483]) -Greedy action tensor([ 1.5791, -1.0574, -0.0723, -0.7180]) tensor([0.7332, 0.0525, 0.1406, 0.0737]) -Greedy action tensor([ 1.3371, -0.4689, -0.2701, 0.0367]) tensor([0.6108, 0.1004, 0.1224, 0.1664]) -Greedy action tensor([ 1.4209, -0.3557, -0.8063, 0.3954]) tensor([0.6114, 0.1034, 0.0659, 0.2192]) -Greedy action tensor([ 1.6443, -0.7052, -0.2655, 0.3076]) tensor([0.6639, 0.0633, 0.0983, 0.1744]) -Greedy action tensor([ 1.5488, 0.3447, -0.3940, -0.2068]) tensor([0.6188, 0.1856, 0.0887, 0.1069]) -Greedy action tensor([ 1.9418, -0.6908, -0.6026, 0.7473]) tensor([0.6881, 0.0495, 0.0540, 0.2084]) -Greedy action tensor([ 2.2289, -0.8658, -0.3656, 0.4388]) tensor([0.7770, 0.0352, 0.0580, 0.1297]) -Greedy action tensor([ 1.3366, -0.6658, -0.3934, 0.4071]) tensor([0.5858, 0.0791, 0.1039, 0.2313]) -Greedy action tensor([ 1.4855, -0.3454, -0.7473, 0.3530]) tensor([0.6290, 0.1008, 0.0675, 0.2027]) -Greedy action tensor([ 1.6279, -0.4921, -0.9165, 0.2583]) tensor([0.6883, 0.0826, 0.0541, 0.1750]) -Greedy action tensor([ 1.3783, -0.3191, -0.7723, 0.6046]) tensor([0.5679, 0.1040, 0.0661, 0.2620]) -Greedy action tensor([ 0.9605, 0.4137, -0.5734, 0.2164]) tensor([0.4406, 0.2550, 0.0950, 0.2094]) -Greedy action tensor([ 0.9250, -0.2318, -0.2091, 0.3802]) tensor([0.4512, 0.1419, 0.1452, 0.2617]) -Greedy action tensor([ 1.8391, -0.4163, -0.1521, -0.2527]) tensor([0.7327, 0.0768, 0.1000, 0.0905]) -Greedy action tensor([ 1.6109, -0.6738, -0.3722, 0.3370]) tensor([0.6583, 0.0670, 0.0906, 0.1841]) -Greedy action tensor([ 1.6122, -0.3239, -0.3055, 0.1621]) tensor([0.6554, 0.0946, 0.0963, 0.1537]) -Greedy action tensor([ 1.3175, -0.3944, -0.0893, 0.0127]) tensor([0.5894, 0.1064, 0.1444, 0.1599]) -Greedy action tensor([ 1.7303, -0.7172, -0.2302, 0.1786]) tensor([0.6948, 0.0601, 0.0978, 0.1472]) -Greedy action tensor([ 1.9670, -0.7478, -0.5455, -0.0556]) tensor([0.7815, 0.0517, 0.0633, 0.1034]) -Greedy action tensor([ 1.8284, -0.7643, -0.3713, -0.0418]) tensor([0.7464, 0.0558, 0.0827, 0.1150]) -Greedy action tensor([ 1.3868, -0.4846, -0.5207, 0.5281]) tensor([0.5793, 0.0892, 0.0860, 0.2455]) -Greedy action tensor([ 1.5555, -0.5102, -0.0340, 0.2897]) tensor([0.6201, 0.0786, 0.1265, 0.1749]) -Greedy action tensor([ 1.9638, -0.3157, -0.6222, 0.2253]) tensor([0.7389, 0.0756, 0.0557, 0.1299]) -Greedy action tensor([ 1.4258, -0.1797, -0.1168, 0.5799]) tensor([0.5424, 0.1089, 0.1160, 0.2328]) -Greedy action tensor([ 2.0359, -0.3958, -0.5092, 0.4884]) tensor([0.7251, 0.0637, 0.0569, 0.1543]) -Greedy action tensor([ 1.3011, -0.2209, -0.7049, 0.3939]) tensor([0.5693, 0.1243, 0.0766, 0.2298]) -Greedy action tensor([ 1.6773, -0.4099, -0.7457, 0.2929]) tensor([0.6834, 0.0848, 0.0606, 0.1712]) -Greedy action tensor([ 1.4279, -0.4096, -0.2928, -0.0394]) tensor([0.6375, 0.1015, 0.1141, 0.1470]) -Greedy action tensor([ 2.1147, 0.0082, -0.1303, 0.3762]) tensor([0.7126, 0.0867, 0.0755, 0.1253]) -Greedy action tensor([ 1.9251, -0.6610, -0.5692, 0.3326]) tensor([0.7346, 0.0553, 0.0606, 0.1494]) -Greedy action tensor([ 1.4200, -0.6683, -0.1798, 0.1967]) tensor([0.6173, 0.0765, 0.1246, 0.1816]) -Greedy action tensor([ 0.9149, -0.3862, -0.5055, 0.2073]) tensor([0.4983, 0.1357, 0.1204, 0.2456]) -Greedy action tensor([ 1.5580, -0.1364, -0.0355, -0.5938]) tensor([0.6652, 0.1222, 0.1352, 0.0774]) -Greedy action tensor([ 1.0534, -0.5536, -0.2875, 0.5507]) tensor([0.4838, 0.0970, 0.1266, 0.2926]) -Greedy action tensor([ 1.3603, -0.2443, -0.6859, 0.3155]) tensor([0.5945, 0.1195, 0.0768, 0.2091]) -Greedy action tensor([ 1.8336, -0.1349, -0.3267, 0.4695]) tensor([0.6620, 0.0925, 0.0763, 0.1692]) -Greedy action tensor([ 0.7648, -0.0743, -0.1045, -0.0506]) tensor([0.4360, 0.1884, 0.1828, 0.1929]) -Greedy action tensor([ 1.3535, -0.4850, -0.7619, 0.2172]) tensor([0.6247, 0.0994, 0.0753, 0.2005]) -Greedy action tensor([ 1.2846, -0.7760, -0.1927, 0.2143]) tensor([0.5887, 0.0750, 0.1344, 0.2019]) -Greedy action tensor([ 1.3188, -0.4717, -0.5393, 0.8757]) tensor([0.5089, 0.0849, 0.0794, 0.3268]) -Greedy action tensor([ 1.3467, -0.3567, -1.0163, 0.1939]) tensor([0.6282, 0.1144, 0.0591, 0.1983]) -Greedy action tensor([ 1.8672, 0.2101, -0.1900, 0.2717]) tensor([0.6573, 0.1254, 0.0840, 0.1333]) -Greedy action tensor([ 0.9348, -0.4256, -0.0293, 0.5403]) tensor([0.4325, 0.1110, 0.1650, 0.2915]) -Greedy action tensor([ 0.8068, -0.2305, -0.3229, 0.0009]) tensor([0.4708, 0.1668, 0.1521, 0.2103]) -Greedy action tensor([ 1.1562, -0.3884, -0.0532, 0.0435]) tensor([0.5434, 0.1159, 0.1621, 0.1786]) -Greedy action tensor([ 1.2162, -0.0821, -0.7451, -0.0140]) tensor([0.5862, 0.1600, 0.0825, 0.1713]) -Greedy action tensor([ 1.6919, -0.7491, -0.3372, 0.1836]) tensor([0.6945, 0.0605, 0.0913, 0.1537]) -Greedy action tensor([ 1.2313, 0.0555, -0.5456, -0.3677]) tensor([0.5953, 0.1837, 0.1007, 0.1203]) -Greedy action tensor([ 2.2621, -1.0233, -0.0152, 0.7202]) tensor([0.7386, 0.0276, 0.0757, 0.1580]) -Greedy action tensor([ 1.4428, -0.4297, -0.3995, 0.2842]) tensor([0.6150, 0.0945, 0.0974, 0.1931]) -Greedy action tensor([ 1.4449, -0.4154, -0.4330, 0.6040]) tensor([0.5748, 0.0894, 0.0879, 0.2479]) -Greedy action tensor([ 1.2789, -0.4154, -1.1083, 0.3813]) tensor([0.5941, 0.1092, 0.0546, 0.2421]) -Greedy action tensor([ 1.2434, -0.5636, -0.0744, -0.0511]) tensor([0.5862, 0.0962, 0.1569, 0.1606]) -Greedy action tensor([ 1.4853, -0.3888, -0.5524, 0.6397]) tensor([0.5837, 0.0896, 0.0761, 0.2506]) -Greedy action tensor([ 1.5518, -0.5239, -0.6121, -0.0832]) tensor([0.6967, 0.0874, 0.0800, 0.1358]) -Greedy action tensor([ 1.4432, -0.7011, 0.0377, 0.2477]) tensor([0.6006, 0.0704, 0.1473, 0.1817]) -Greedy action tensor([ 1.3812, -0.4822, -0.3208, 0.1329]) tensor([0.6156, 0.0955, 0.1122, 0.1767]) -Greedy action tensor([ 1.5534, -0.5696, -0.3599, 0.1325]) tensor([0.6628, 0.0793, 0.0978, 0.1601]) -Greedy action tensor([ 1.2326, -0.1908, -1.1491, 0.4098]) tensor([0.5642, 0.1359, 0.0521, 0.2478]) -Greedy action tensor([ 1.4834, -0.2912, -0.5566, 0.2300]) tensor([0.6309, 0.1070, 0.0820, 0.1801]) -Greedy action tensor([ 2.2660, -1.2199, -0.0930, 0.7073]) tensor([0.7488, 0.0229, 0.0708, 0.1575]) -Greedy action tensor([ 2.0563, -0.8696, -0.0606, 0.9192]) tensor([0.6690, 0.0359, 0.0805, 0.2146]) -Greedy action tensor([ 1.6646, -0.2650, -0.7138, 0.4630]) tensor([0.6499, 0.0944, 0.0602, 0.1954]) -Greedy action tensor([ 1.5243, -0.4859, -0.7312, 0.0347]) tensor([0.6829, 0.0915, 0.0716, 0.1540]) -Greedy action tensor([ 1.4533, -0.4688, 0.0329, 0.2768]) tensor([0.5895, 0.0862, 0.1424, 0.1818]) -Greedy action tensor([ 1.3823, -0.5255, -0.7006, 0.1262]) tensor([0.6419, 0.0953, 0.0800, 0.1828]) -Greedy action tensor([ 2.3699, 0.7851, -0.0132, 0.2362]) tensor([0.7064, 0.1448, 0.0652, 0.0836]) -Greedy action tensor([ 1.3165, -0.5361, -0.5093, 0.6475]) tensor([0.5464, 0.0857, 0.0880, 0.2799]) -Greedy action tensor([ 1.2421, -0.1954, -0.6377, 0.4064]) tensor([0.5483, 0.1302, 0.0837, 0.2377]) -Greedy action tensor([ 1.3341, -0.4624, -0.3222, 0.3950]) tensor([0.5722, 0.0949, 0.1092, 0.2237]) -Greedy action tensor([ 1.8868, -0.3503, -0.5656, 0.7164]) tensor([0.6653, 0.0710, 0.0573, 0.2064]) -Greedy action tensor([ 1.4047, -0.3977, -0.6553, 0.1959]) tensor([0.6286, 0.1037, 0.0801, 0.1877]) -Greedy action tensor([ 1.3737, -0.8984, -0.2631, 0.5355]) tensor([0.5780, 0.0596, 0.1125, 0.2500]) -Greedy action tensor([ 1.7746, -0.8332, -0.2144, 0.2126]) tensor([0.7041, 0.0519, 0.0963, 0.1477]) -Greedy action tensor([ 2.1434, -0.4157, -0.0662, -0.4242]) tensor([0.7912, 0.0612, 0.0868, 0.0607]) -Greedy action tensor([ 0.9332, -0.2141, -0.8799, 0.3611]) tensor([0.4890, 0.1553, 0.0798, 0.2760]) -Greedy action tensor([ 2.1710, -1.2948, -0.2250, 0.4329]) tensor([0.7703, 0.0241, 0.0702, 0.1355]) -Greedy action tensor([ 1.9876, -1.1231, -0.1869, 0.3525]) tensor([0.7390, 0.0329, 0.0840, 0.1441]) -Greedy action tensor([1.8248, 0.3971, 0.6839, 0.3048]) tensor([0.5624, 0.1349, 0.1797, 0.1230]) -Greedy action tensor([ 2.1301, -1.2481, -0.3587, 0.7137]) tensor([0.7355, 0.0251, 0.0610, 0.1784]) -Greedy action tensor([ 1.2045, -0.2655, -1.1653, 0.2911]) tensor([0.5798, 0.1333, 0.0542, 0.2326]) -Greedy action tensor([ 1.2622, -0.5302, -0.5087, 0.4784]) tensor([0.5576, 0.0929, 0.0949, 0.2546]) -Greedy action tensor([ 0.2751, -1.0472, 0.1655, -0.9536]) tensor([0.4073, 0.1085, 0.3650, 0.1192]) -Greedy action tensor([ 0.3005, 0.2289, 0.1608, -0.3888]) tensor([0.3028, 0.2819, 0.2633, 0.1520]) -Greedy action tensor([ 0.1952, -0.9783, 0.4582, -0.6190]) tensor([0.3275, 0.1013, 0.4261, 0.1451]) -Greedy action tensor([ 0.5463, -1.5482, 0.3672, 0.8536]) tensor([0.3013, 0.0371, 0.2519, 0.4097]) -Greedy action tensor([-0.3444, -0.4759, -0.8647, -0.7156]) tensor([0.3164, 0.2774, 0.1880, 0.2182]) -Greedy action tensor([0.0791, 0.2899, 0.0145, 0.3363]) tensor([0.2239, 0.2765, 0.2099, 0.2896]) -Greedy action tensor([ 0.1193, -0.2874, -0.2096, 0.2693]) tensor([0.2819, 0.1877, 0.2029, 0.3275]) -Greedy action tensor([ 1.7638, -0.4771, 1.3150, 0.1156]) tensor([0.5162, 0.0549, 0.3296, 0.0993]) -Greedy action tensor([-0.0155, -1.6347, 0.0883, -0.4089]) tensor([0.3353, 0.0664, 0.3720, 0.2263]) -Greedy action tensor([-0.7015, -0.6302, 0.0300, -1.0339]) tensor([0.2054, 0.2206, 0.4268, 0.1473]) -Greedy action tensor([-0.9169, -0.2979, 0.1874, -0.7197]) tensor([0.1410, 0.2618, 0.4254, 0.1717]) -Greedy action tensor([ 0.8236, -2.1624, 0.6432, 0.5200]) tensor([0.3812, 0.0192, 0.3182, 0.2814]) -Greedy action tensor([-0.2706, -1.4408, -0.0276, 0.0884]) tensor([0.2489, 0.0772, 0.3174, 0.3564]) -Greedy action tensor([ 1.2142, 0.7114, -0.3187, -0.7202]) tensor([0.5088, 0.3078, 0.1099, 0.0735]) -Greedy action tensor([-0.3890, -1.3417, 0.8398, -0.4612]) tensor([0.1744, 0.0673, 0.5960, 0.1623]) -Greedy action tensor([ 1.5822, 0.4281, -0.2870, -0.0282]) tensor([0.5990, 0.1889, 0.0924, 0.1197]) -Greedy action tensor([ 0.7824, -0.9465, -0.0360, 0.2600]) tensor([0.4521, 0.0802, 0.1995, 0.2682]) -Greedy action tensor([ 0.1046, -0.5534, -0.3688, 0.1322]) tensor([0.3156, 0.1634, 0.1966, 0.3244]) -Greedy action tensor([ 0.1099, -1.6758, -0.4105, 0.2911]) tensor([0.3378, 0.0566, 0.2007, 0.4049]) -Greedy action tensor([0.2717, 0.0628, 0.4972, 0.4308]) tensor([0.2360, 0.1915, 0.2957, 0.2767]) -Greedy action tensor([ 0.3945, -0.3310, 0.3685, -0.3644]) tensor([0.3417, 0.1654, 0.3329, 0.1600]) -Greedy action tensor([ 0.4786, 0.2059, -0.2865, -0.7830]) tensor([0.3984, 0.3033, 0.1854, 0.1128]) -Greedy action tensor([-0.9818, -0.5530, 1.1256, -1.1317]) tensor([0.0860, 0.1321, 0.7078, 0.0741]) -Greedy action tensor([-0.7197, 0.6116, 0.7240, -1.0585]) tensor([0.1027, 0.3889, 0.4352, 0.0732]) -Greedy action tensor([-0.4248, -0.4997, 0.2167, -0.6618]) tensor([0.2166, 0.2010, 0.4114, 0.1709]) -Greedy action tensor([-0.9238, -0.6968, 0.3337, -0.7457]) tensor([0.1435, 0.1801, 0.5048, 0.1715]) -Greedy action tensor([-1.2069, 0.3716, -0.3482, -0.7046]) tensor([0.1014, 0.4916, 0.2394, 0.1676]) -Greedy action tensor([ 0.8833, -1.0337, -0.2716, -0.2901]) tensor([0.5645, 0.0830, 0.1779, 0.1746]) -Greedy action tensor([ 0.1998, -0.6374, 1.7166, -0.3875]) tensor([0.1527, 0.0661, 0.6962, 0.0849]) -Greedy action tensor([-0.5940, 0.3174, 0.1186, 0.2461]) tensor([0.1275, 0.3172, 0.2600, 0.2953]) -Greedy action tensor([-1.2569, 0.1410, 0.5541, -1.0227]) tensor([0.0805, 0.3256, 0.4922, 0.1017]) -Greedy action tensor([-0.2836, 0.5398, -0.1571, -0.1059]) tensor([0.1783, 0.4063, 0.2024, 0.2130]) -Greedy action tensor([-0.5148, 0.4598, 0.2113, -0.0432]) tensor([0.1366, 0.3620, 0.2824, 0.2189]) -Greedy action tensor([ 0.2857, -0.3618, -1.0433, -0.2842]) tensor([0.4249, 0.2224, 0.1125, 0.2403]) -Greedy action tensor([ 0.3209, 0.3475, -0.5969, 1.7846]) tensor([0.1482, 0.1522, 0.0592, 0.6404]) -Greedy action tensor([-0.5026, -0.6841, 0.3356, -0.7330]) tensor([0.2024, 0.1688, 0.4680, 0.1608]) -Greedy action tensor([ 0.7792, -1.5086, -0.2187, 0.4564]) tensor([0.4557, 0.0463, 0.1680, 0.3300]) -Greedy action tensor([-0.2497, 0.2176, 0.4113, -0.0137]) tensor([0.1725, 0.2752, 0.3340, 0.2184]) -Greedy action tensor([ 0.4748, -0.4366, 1.2899, -0.0697]) tensor([0.2358, 0.0948, 0.5327, 0.1368]) -Greedy action tensor([ 0.2880, -0.8941, 0.2234, -0.2852]) tensor([0.3561, 0.1092, 0.3339, 0.2008]) -Greedy action tensor([-0.0905, -0.8367, -0.5072, 0.5634]) tensor([0.2465, 0.1169, 0.1625, 0.4741]) -Greedy action tensor([-0.6167, -0.6454, -0.2196, -0.5035]) tensor([0.2184, 0.2122, 0.3248, 0.2446]) -Greedy action tensor([ 1.5726, -0.0854, -0.7700, 0.4841]) tensor([0.6160, 0.1174, 0.0592, 0.2074]) -Greedy action tensor([-0.3601, -1.1031, 1.2573, -0.1813]) tensor([0.1297, 0.0617, 0.6536, 0.1551]) -Greedy action tensor([-0.3306, -0.3440, 0.7476, -0.3175]) tensor([0.1684, 0.1661, 0.4949, 0.1706]) -Greedy action tensor([-0.0848, -1.1031, -0.8167, -0.1906]) tensor([0.3647, 0.1317, 0.1754, 0.3281]) -Greedy action tensor([ 0.5881, -1.1237, 0.0638, -0.3224]) tensor([0.4598, 0.0830, 0.2722, 0.1850]) -Greedy action tensor([-0.2586, -0.4151, 0.1773, -0.6558]) tensor([0.2455, 0.2099, 0.3796, 0.1650]) -Greedy action tensor([-0.0545, -1.7825, -0.6345, 0.3195]) tensor([0.3134, 0.0557, 0.1754, 0.4555]) -Greedy action tensor([ 0.7146, -1.2816, 0.3047, 0.2282]) tensor([0.4142, 0.0563, 0.2749, 0.2547]) -Greedy action tensor([ 0.3043, -0.3119, 0.1612, -0.3379]) tensor([0.3410, 0.1841, 0.2955, 0.1794]) -Greedy action tensor([-0.6985, -0.6896, -0.5943, -0.4572]) tensor([0.2277, 0.2297, 0.2527, 0.2898]) -Greedy action tensor([ 1.0538, 0.6640, -0.1858, 0.1186]) tensor([0.4239, 0.2870, 0.1227, 0.1664]) -Greedy action tensor([-1.1526, -1.1996, -0.0403, 0.4220]) tensor([0.1018, 0.0971, 0.3096, 0.4915]) -Greedy action tensor([-0.6599, 0.5246, -0.0374, -0.7080]) tensor([0.1411, 0.4614, 0.2630, 0.1345]) -Greedy action tensor([-1.0762, -0.4435, 0.5491, -2.3299]) tensor([0.1212, 0.2283, 0.6159, 0.0346]) -Greedy action tensor([ 1.2409, -0.5097, 1.5540, -0.5730]) tensor([0.3698, 0.0642, 0.5057, 0.0603]) -Greedy action tensor([-0.7552, -1.2561, -0.9199, -0.2118]) tensor([0.2395, 0.1451, 0.2031, 0.4123]) -Greedy action tensor([-0.0761, 0.2957, -0.0105, -0.6807]) tensor([0.2460, 0.3568, 0.2627, 0.1344]) -Greedy action tensor([ 1.1114, -0.2894, -0.3550, 0.3853]) tensor([0.5100, 0.1257, 0.1177, 0.2467]) -Greedy action tensor([ 0.8188, -0.9206, 0.7620, -0.4268]) tensor([0.4152, 0.0729, 0.3923, 0.1195]) -Greedy action tensor([-0.6118, -0.9433, -1.5472, 0.8665]) tensor([0.1539, 0.1105, 0.0604, 0.6751]) -Greedy action tensor([ 1.4356, -0.0730, 0.4150, -0.7868]) tensor([0.5917, 0.1309, 0.2132, 0.0641]) -Greedy action tensor([ 0.5204, -1.0677, -0.0923, -0.4126]) tensor([0.4674, 0.0955, 0.2533, 0.1839]) -Greedy action tensor([ 0.2166, -0.4742, 0.4599, -0.4733]) tensor([0.3050, 0.1529, 0.3891, 0.1530]) -Greedy action tensor([ 0.1308, -1.3146, 0.3808, 0.0877]) tensor([0.2876, 0.0678, 0.3692, 0.2754]) -Greedy action tensor([ 0.7381, -0.0020, 0.4033, -0.0942]) tensor([0.3806, 0.1816, 0.2723, 0.1656]) -Greedy action tensor([-0.2498, -1.5893, 0.3479, -0.8477]) tensor([0.2755, 0.0722, 0.5008, 0.1515]) -Greedy action tensor([-0.6788, -0.2159, 0.3953, -1.1196]) tensor([0.1624, 0.2579, 0.4752, 0.1045]) -Greedy action tensor([-1.3274, -0.1047, -0.3146, -0.9093]) tensor([0.1154, 0.3918, 0.3176, 0.1752]) -Greedy action tensor([ 0.2232, -1.2988, 0.4449, -0.3890]) tensor([0.3324, 0.0725, 0.4149, 0.1802]) -Greedy action tensor([0.5791, 0.4972, 0.1510, 0.2424]) tensor([0.3042, 0.2803, 0.1983, 0.2172]) -Greedy action tensor([-0.7013, -0.8283, -0.0880, -0.4224]) tensor([0.1981, 0.1744, 0.3657, 0.2618]) -Greedy action tensor([ 0.9778, 0.0448, 0.3412, -0.4177]) tensor([0.4608, 0.1813, 0.2438, 0.1141]) -Greedy action tensor([ 0.3547, -0.2074, -0.8895, -0.4065]) tensor([0.4301, 0.2451, 0.1239, 0.2009]) -Greedy action tensor([ 0.9075, -0.9815, 0.6869, 0.3403]) tensor([0.3968, 0.0600, 0.3182, 0.2250]) -Greedy action tensor([ 0.6380, -1.2709, -0.5231, 0.3376]) tensor([0.4541, 0.0673, 0.1422, 0.3363]) -Greedy action tensor([ 0.0395, -1.4638, -0.7370, -0.1687]) tensor([0.4009, 0.0892, 0.1844, 0.3255]) -Greedy action tensor([ 0.2130, -0.5814, 0.3326, -0.4033]) tensor([0.3206, 0.1449, 0.3614, 0.1731]) -Greedy action tensor([-0.4594, -0.6267, 0.5405, -0.6085]) tensor([0.1843, 0.1559, 0.5010, 0.1588]) -Greedy action tensor([ 0.9861, -1.2582, -0.1661, 1.0062]) tensor([0.4094, 0.0434, 0.1294, 0.4178]) -Greedy action tensor([-1.0024, -1.1652, 0.5855, -1.1696]) tensor([0.1318, 0.1120, 0.6448, 0.1115]) -Greedy action tensor([ 0.7394, -0.3835, -0.1660, -0.1838]) tensor([0.4702, 0.1530, 0.1901, 0.1868]) -Greedy action tensor([ 0.8087, -0.3878, -0.0246, -0.2513]) tensor([0.4800, 0.1451, 0.2086, 0.1663]) -Greedy action tensor([ 1.1397, -0.6196, -0.1165, -0.6732]) tensor([0.6172, 0.1063, 0.1758, 0.1007]) -Greedy action tensor([ 0.7627, -0.6682, -0.0641, -0.1967]) tensor([0.4855, 0.1161, 0.2124, 0.1860]) -Greedy action tensor([ 0.7012, -0.3471, -0.1102, -0.1609]) tensor([0.4511, 0.1581, 0.2004, 0.1905]) -Greedy action tensor([ 0.4808, -0.1872, -0.1878, 0.0024]) tensor([0.3781, 0.1939, 0.1937, 0.2343]) -Greedy action tensor([ 0.8009, -0.3415, -0.0967, -0.1550]) tensor([0.4737, 0.1511, 0.1931, 0.1821]) -Greedy action tensor([ 0.9187, -0.8082, -0.0933, -0.3223]) tensor([0.5463, 0.0972, 0.1986, 0.1579]) -Greedy action tensor([ 0.6611, -0.3410, 0.0793, -0.0426]) tensor([0.4131, 0.1516, 0.2309, 0.2044]) -Greedy action tensor([0.7962, 0.0363, 0.0290, 0.1065]) tensor([0.4109, 0.1922, 0.1908, 0.2062]) -Greedy action tensor([ 0.4173, -0.2901, -0.1601, -0.0493]) tensor([0.3729, 0.1838, 0.2094, 0.2339]) -Greedy action tensor([ 0.7496, -0.4972, -0.1614, -0.1192]) tensor([0.4742, 0.1363, 0.1907, 0.1989]) -Greedy action tensor([ 0.3363, -0.1386, -0.0804, -0.2403]) tensor([0.3517, 0.2188, 0.2319, 0.1976]) -Greedy action tensor([ 0.8021, -0.5788, -0.1813, -0.6936]) tensor([0.5407, 0.1359, 0.2022, 0.1212]) -Greedy action tensor([ 0.5386, -0.1946, -0.0048, -0.0588]) tensor([0.3829, 0.1840, 0.2224, 0.2107]) -Greedy action tensor([ 0.7181, -0.4405, -0.0834, -0.1840]) tensor([0.4612, 0.1448, 0.2069, 0.1871]) -Greedy action tensor([ 0.9225, -0.5970, -0.0264, -0.5526]) tensor([0.5450, 0.1193, 0.2110, 0.1247]) -Greedy action tensor([ 1.3643, -1.1123, 0.0545, -0.7878]) tensor([0.6802, 0.0572, 0.1836, 0.0791]) -Greedy action tensor([ 0.6460, -0.1658, -0.0441, -0.1424]) tensor([0.4166, 0.1850, 0.2090, 0.1894]) -Greedy action tensor([ 0.6192, -0.2920, -0.1861, -0.1813]) tensor([0.4352, 0.1749, 0.1945, 0.1954]) -Greedy action tensor([ 1.3515, -0.8878, -0.0345, -0.8250]) tensor([0.6802, 0.0725, 0.1701, 0.0772]) -Greedy action tensor([ 0.9149, -0.6128, -0.1046, -0.4134]) tensor([0.5427, 0.1178, 0.1958, 0.1438]) -Greedy action tensor([ 0.6928, -0.5518, -0.1007, -0.1743]) tensor([0.4629, 0.1333, 0.2093, 0.1945]) -Greedy action tensor([ 0.6868, -0.5476, -0.1370, -0.2193]) tensor([0.4686, 0.1364, 0.2056, 0.1894]) -Greedy action tensor([ 1.1157, -0.9711, -0.0058, -0.7138]) tensor([0.6210, 0.0771, 0.2023, 0.0997]) -Greedy action tensor([ 0.8737, -0.3872, -0.1385, -0.0082]) tensor([0.4852, 0.1375, 0.1764, 0.2009]) -Greedy action tensor([ 1.0310, -0.6450, -0.1701, -0.3414]) tensor([0.5742, 0.1075, 0.1728, 0.1456]) -Greedy action tensor([ 0.7583, -0.4324, 0.0066, -0.2143]) tensor([0.4643, 0.1412, 0.2190, 0.1756]) -Greedy action tensor([ 1.3392, -0.6864, 0.0646, -0.5231]) tensor([0.6383, 0.0842, 0.1784, 0.0991]) -Greedy action tensor([ 0.7455, -0.4931, 0.0639, -0.3549]) tensor([0.4699, 0.1362, 0.2376, 0.1563]) -Greedy action tensor([ 0.6295, -0.4645, -0.0252, -0.1142]) tensor([0.4292, 0.1437, 0.2230, 0.2040]) -Greedy action tensor([ 0.6335, -0.0284, -0.1207, -0.0736]) tensor([0.4033, 0.2081, 0.1897, 0.1989]) -Greedy action tensor([ 0.6851, -0.2644, -0.0460, -0.2356]) tensor([0.4412, 0.1707, 0.2124, 0.1757]) -Greedy action tensor([ 0.9714, -0.9282, 0.2412, -0.4894]) tensor([0.5366, 0.0803, 0.2586, 0.1245]) -Greedy action tensor([ 0.7160, -0.4090, -0.1115, -0.4443]) tensor([0.4819, 0.1564, 0.2107, 0.1510]) -Greedy action tensor([ 0.9222, -0.2676, -0.2021, -0.4152]) tensor([0.5286, 0.1608, 0.1717, 0.1388]) -Greedy action tensor([ 1.0840, -0.2527, -0.0682, -0.4471]) tensor([0.5571, 0.1464, 0.1760, 0.1205]) -Greedy action tensor([ 1.0639, -0.9198, 0.0320, -0.4662]) tensor([0.5847, 0.0804, 0.2083, 0.1266]) -Greedy action tensor([ 0.6227, 0.2270, -0.2027, 0.1717]) tensor([0.3639, 0.2450, 0.1594, 0.2318]) -Greedy action tensor([ 0.4077, -0.0486, -0.0655, -0.0057]) tensor([0.3427, 0.2171, 0.2135, 0.2267]) -Greedy action tensor([ 0.7139, -0.4532, -0.0487, -0.0319]) tensor([0.4440, 0.1382, 0.2071, 0.2106]) -Greedy action tensor([ 0.6537, -0.5568, -0.1080, -0.2287]) tensor([0.4590, 0.1368, 0.2143, 0.1899]) -Greedy action tensor([ 1.0134, -0.7615, 0.1131, -0.7508]) tensor([0.5723, 0.0970, 0.2326, 0.0980]) -Greedy action tensor([ 0.6163, -0.1448, -0.0499, -0.0674]) tensor([0.4023, 0.1879, 0.2067, 0.2031]) -Greedy action tensor([ 1.0493, -0.9239, 0.0878, -0.6660]) tensor([0.5878, 0.0817, 0.2247, 0.1057]) -Greedy action tensor([ 0.8250, -0.3271, -0.1159, -0.0826]) tensor([0.4740, 0.1498, 0.1850, 0.1913]) -Greedy action tensor([ 0.9851, -0.4440, -0.1611, -0.6587]) tensor([0.5712, 0.1368, 0.1816, 0.1104]) -Greedy action tensor([ 0.6180, -0.2944, -0.0438, -0.2609]) tensor([0.4287, 0.1721, 0.2212, 0.1780]) -Greedy action tensor([ 0.9854, 0.2281, -0.1190, -0.0443]) tensor([0.4635, 0.2173, 0.1536, 0.1655]) -Greedy action tensor([ 0.3587, 0.0054, 0.0559, -0.0153]) tensor([0.3196, 0.2245, 0.2361, 0.2199]) -Greedy action tensor([ 1.4458, -1.3020, 0.1102, -0.8099]) tensor([0.6984, 0.0447, 0.1837, 0.0732]) -Greedy action tensor([ 0.5485, -0.2068, -0.0372, -0.1579]) tensor([0.3968, 0.1865, 0.2209, 0.1958]) -Greedy action tensor([ 0.6788, -0.1970, 0.1840, -0.3321]) tensor([0.4184, 0.1743, 0.2551, 0.1523]) -Greedy action tensor([ 0.9753, -0.4450, -0.1827, -0.2075]) tensor([0.5370, 0.1298, 0.1687, 0.1645]) -Greedy action tensor([ 0.6762, -0.5344, -0.1267, -0.2207]) tensor([0.4643, 0.1384, 0.2080, 0.1893]) -Greedy action tensor([ 0.6548, -0.4363, -0.1356, -0.2426]) tensor([0.4551, 0.1529, 0.2065, 0.1855]) -Greedy action tensor([ 0.7154, 0.3771, -0.3869, 0.1038]) tensor([0.3865, 0.2755, 0.1283, 0.2096]) -Greedy action tensor([ 0.2580, -0.2457, -0.1770, -0.1208]) tensor([0.3406, 0.2058, 0.2204, 0.2332]) -Greedy action tensor([ 0.8842, -0.3408, -0.0706, -0.2989]) tensor([0.5038, 0.1480, 0.1939, 0.1543]) -Greedy action tensor([ 0.1852, 0.0751, -0.1156, -0.1705]) tensor([0.2997, 0.2684, 0.2218, 0.2100]) -Greedy action tensor([ 0.9876, -0.3859, -0.0710, -0.3734]) tensor([0.5386, 0.1364, 0.1869, 0.1381]) -Greedy action tensor([ 0.9811, -0.6987, -0.0360, -0.5748]) tensor([0.5685, 0.1060, 0.2056, 0.1199]) -Greedy action tensor([ 1.0245, -0.6970, -0.0409, -0.5170]) tensor([0.5756, 0.1029, 0.1983, 0.1232]) -Greedy action tensor([ 0.5400, -0.4085, 0.0492, -0.3259]) tensor([0.4132, 0.1600, 0.2529, 0.1738]) -Greedy action tensor([ 0.6565, -0.3645, -0.0732, -0.0974]) tensor([0.4324, 0.1558, 0.2084, 0.2035]) -Greedy action tensor([ 0.6299, -0.4627, 0.0396, -0.4716]) tensor([0.4501, 0.1509, 0.2494, 0.1496]) -Greedy action tensor([ 0.3299, -0.1021, -0.0475, -0.2957]) tensor([0.3485, 0.2262, 0.2389, 0.1864]) -Greedy action tensor([ 0.5640, -0.2244, -0.0732, -0.1816]) tensor([0.4069, 0.1849, 0.2151, 0.1930]) -Greedy action tensor([ 0.3454, -0.0317, -0.1647, 0.0130]) tensor([0.3330, 0.2283, 0.1999, 0.2388]) -Greedy action tensor([ 0.6288, -0.4394, -0.0522, -0.0809]) tensor([0.4271, 0.1468, 0.2161, 0.2100]) -Greedy action tensor([ 1.1573, -0.7593, -0.0858, -0.4357]) tensor([0.6102, 0.0898, 0.1760, 0.1241]) -Greedy action tensor([ 0.1003, 1.0400, -0.3468, -0.2385]) tensor([0.2036, 0.5211, 0.1302, 0.1451]) -Greedy action tensor([ 0.7483, -0.3360, -0.1832, -0.3126]) tensor([0.4812, 0.1627, 0.1896, 0.1666]) -Greedy action tensor([ 0.8391, -0.4888, 0.0524, -0.3674]) tensor([0.4951, 0.1312, 0.2255, 0.1482]) -Greedy action tensor([ 0.3791, -0.0447, -0.0529, -0.3965]) tensor([0.3618, 0.2368, 0.2349, 0.1666]) -Greedy action tensor([ 0.9890, -0.3028, 0.0383, -0.3325]) tensor([0.5187, 0.1425, 0.2005, 0.1383]) -Greedy action tensor([ 0.8730, -0.4393, 0.0511, -0.3946]) tensor([0.5024, 0.1353, 0.2209, 0.1414]) -Greedy action tensor([ 0.6248, -0.3904, 0.0028, -0.2004]) tensor([0.4278, 0.1550, 0.2297, 0.1875]) -Greedy action tensor([ 0.5482, 0.1748, -0.0105, 0.0573]) tensor([0.3481, 0.2396, 0.1991, 0.2131]) -Greedy action tensor([ 0.2392, -0.0198, -0.2139, -0.3781]) tensor([0.3393, 0.2619, 0.2157, 0.1830]) -Greedy action tensor([ 0.5519, -0.0905, -0.0265, -0.0747]) tensor([0.3815, 0.2007, 0.2139, 0.2039]) -Greedy action tensor([-1.9376, -0.4410, 0.6645, -0.1739]) tensor([0.0403, 0.1801, 0.5442, 0.2353]) -Greedy action tensor([-0.8293, 0.5198, 0.1072, 0.0137]) tensor([0.1028, 0.3962, 0.2622, 0.2388]) -Greedy action tensor([-1.8408, -0.4178, 0.6522, -0.1061]) tensor([0.0436, 0.1811, 0.5279, 0.2473]) -Greedy action tensor([-1.7418, -0.4217, 0.5731, -0.0676]) tensor([0.0495, 0.1853, 0.5011, 0.2640]) -Greedy action tensor([-1.7738, -0.4794, 0.7185, -0.1343]) tensor([0.0457, 0.1667, 0.5523, 0.2354]) -Greedy action tensor([-1.7790, -0.4756, 0.6004, -0.0113]) tensor([0.0469, 0.1726, 0.5061, 0.2745]) -Greedy action tensor([-1.9451, -0.4503, 0.6679, -0.1804]) tensor([0.0401, 0.1788, 0.5469, 0.2342]) -Greedy action tensor([-1.8525, -0.4774, 0.6144, -0.1345]) tensor([0.0448, 0.1773, 0.5282, 0.2498]) -Greedy action tensor([-0.7679, 0.2427, 0.1091, -0.0284]) tensor([0.1213, 0.3332, 0.2915, 0.2541]) -Greedy action tensor([-0.1492, 0.2163, 0.5886, 0.8542]) tensor([0.1377, 0.1985, 0.2880, 0.3757]) -Greedy action tensor([-1.4926, -0.4959, 0.5892, -0.2966]) tensor([0.0665, 0.1802, 0.5333, 0.2200]) -Greedy action tensor([-1.1787, -0.5945, 0.4171, 0.3167]) tensor([0.0821, 0.1472, 0.4047, 0.3661]) -Greedy action tensor([-1.8536, -0.4715, 0.6369, -0.1302]) tensor([0.0441, 0.1758, 0.5327, 0.2474]) -Greedy action tensor([-1.4033, -0.2447, 0.6263, 0.5532]) tensor([0.0530, 0.1688, 0.4033, 0.3749]) -Greedy action tensor([-1.8435, -0.4466, 0.6163, -0.1241]) tensor([0.0448, 0.1811, 0.5242, 0.2500]) -Greedy action tensor([-1.8295, -0.4403, 0.6137, -0.1182]) tensor([0.0453, 0.1819, 0.5218, 0.2510]) -Greedy action tensor([-1.6387, -0.5109, 0.4927, 0.0360]) tensor([0.0560, 0.1730, 0.4720, 0.2990]) -Greedy action tensor([-1.9023, -0.3857, 0.6411, -0.1564]) tensor([0.0416, 0.1898, 0.5299, 0.2387]) -Greedy action tensor([-1.5046, -0.2619, 0.4288, -0.0354]) tensor([0.0636, 0.2204, 0.4396, 0.2764]) -Greedy action tensor([-1.8940, -0.4467, 0.6440, -0.1558]) tensor([0.0424, 0.1802, 0.5363, 0.2411]) -Greedy action tensor([-1.1695, -0.2612, 0.6512, 0.1311]) tensor([0.0750, 0.1861, 0.4634, 0.2755]) -Greedy action tensor([-1.9302, -0.4463, 0.6235, -0.1856]) tensor([0.0417, 0.1838, 0.5359, 0.2386]) -Greedy action tensor([-1.9254, -0.4230, 0.6497, -0.1672]) tensor([0.0409, 0.1839, 0.5376, 0.2375]) -Greedy action tensor([-1.9134, -0.4214, 0.6517, -0.1594]) tensor([0.0413, 0.1835, 0.5367, 0.2385]) -Greedy action tensor([-1.7994, -0.3246, 0.5752, -0.1039]) tensor([0.0464, 0.2026, 0.4983, 0.2527]) -Greedy action tensor([-1.1269, -0.0789, 0.2819, -0.0265]) tensor([0.0913, 0.2605, 0.3737, 0.2745]) -Greedy action tensor([-1.9303, -0.4263, 0.6632, -0.1645]) tensor([0.0405, 0.1820, 0.5411, 0.2365]) -Greedy action tensor([-0.9557, 0.9630, 0.1173, 0.3288]) tensor([0.0697, 0.4747, 0.2038, 0.2518]) -Greedy action tensor([-0.0593, 1.0358, 0.0379, 0.5711]) tensor([0.1435, 0.4289, 0.1581, 0.2695]) -Greedy action tensor([-1.9316, -0.4364, 0.6626, -0.1724]) tensor([0.0406, 0.1809, 0.5429, 0.2356]) -Greedy action tensor([-1.6357, -0.3155, 0.6142, 0.2761]) tensor([0.0476, 0.1783, 0.4518, 0.3222]) -Greedy action tensor([-1.0460, 0.5528, 0.1614, -0.0568]) tensor([0.0835, 0.4129, 0.2792, 0.2245]) -Greedy action tensor([-1.9241, -0.4490, 0.6568, -0.1736]) tensor([0.0411, 0.1796, 0.5427, 0.2366]) -Greedy action tensor([-1.5745, -0.3093, 0.7092, 0.1562]) tensor([0.0500, 0.1772, 0.4906, 0.2822]) -Greedy action tensor([-1.1499, -0.4551, 0.3828, 0.4030]) tensor([0.0809, 0.1621, 0.3747, 0.3823]) -Greedy action tensor([-1.1709, -0.3783, 0.2636, 0.2804]) tensor([0.0857, 0.1892, 0.3595, 0.3656]) -Greedy action tensor([-1.9449, -0.4497, 0.6688, -0.1805]) tensor([0.0401, 0.1788, 0.5471, 0.2340]) -Greedy action tensor([-1.1410, 0.4477, 0.3612, -0.2342]) tensor([0.0777, 0.3807, 0.3491, 0.1925]) -Greedy action tensor([-1.8319, -0.3222, 0.5758, -0.1528]) tensor([0.0455, 0.2058, 0.5050, 0.2437]) -Greedy action tensor([-0.4267, 0.9757, 0.0236, 0.1537]) tensor([0.1188, 0.4827, 0.1863, 0.2122]) -Greedy action tensor([-1.7395, -0.0385, 0.5130, -0.0827]) tensor([0.0471, 0.2581, 0.4479, 0.2469]) -Greedy action tensor([-1.5237, -0.4372, 0.6646, 0.3941]) tensor([0.0508, 0.1505, 0.4530, 0.3457]) -Greedy action tensor([-1.8082, -0.4535, 0.5950, -0.1013]) tensor([0.0466, 0.1807, 0.5156, 0.2570]) -Greedy action tensor([-1.9184, -0.4429, 0.6507, -0.1679]) tensor([0.0413, 0.1808, 0.5398, 0.2381]) -Greedy action tensor([-1.5251, -0.2842, 0.4416, -0.0511]) tensor([0.0626, 0.2165, 0.4475, 0.2734]) -Greedy action tensor([-0.9852, -0.2792, 0.3240, -0.2673]) tensor([0.1139, 0.2307, 0.4218, 0.2335]) -Greedy action tensor([-1.6945, -0.4660, 0.5405, -0.0706]) tensor([0.0531, 0.1814, 0.4962, 0.2693]) -Greedy action tensor([-1.8120, -0.4412, 0.5901, -0.1098]) tensor([0.0466, 0.1834, 0.5145, 0.2555]) -Greedy action tensor([-1.5677, -0.5816, 0.4214, -0.0951]) tensor([0.0651, 0.1746, 0.4762, 0.2841]) -Greedy action tensor([-1.8810, -0.3788, 0.6465, -0.1336]) tensor([0.0421, 0.1891, 0.5272, 0.2416]) -Greedy action tensor([-1.9479, -0.4524, 0.6693, -0.1826]) tensor([0.0400, 0.1785, 0.5478, 0.2337]) -Greedy action tensor([-1.9446, -0.4480, 0.6672, -0.1808]) tensor([0.0401, 0.1792, 0.5466, 0.2341]) -Greedy action tensor([-1.9288, -0.3895, 0.6498, -0.1706]) tensor([0.0406, 0.1892, 0.5348, 0.2354]) -Greedy action tensor([-1.5208, -0.3139, 0.4365, -0.0559]) tensor([0.0635, 0.2123, 0.4495, 0.2747]) -Greedy action tensor([-1.8856, -0.4547, 0.6371, -0.1589]) tensor([0.0430, 0.1798, 0.5356, 0.2416]) -Greedy action tensor([-1.8175, -0.4123, 0.6091, -0.1083]) tensor([0.0456, 0.1859, 0.5164, 0.2520]) -Greedy action tensor([-1.1344, -0.6083, 1.0035, 1.0652]) tensor([0.0495, 0.0838, 0.4200, 0.4467]) -Greedy action tensor([-1.8704, -0.4268, 0.6273, -0.1328]) tensor([0.0433, 0.1836, 0.5268, 0.2463]) -Greedy action tensor([-1.8702, -0.4297, 0.6283, -0.1644]) tensor([0.0437, 0.1845, 0.5314, 0.2405]) -Greedy action tensor([-1.6709, -0.3441, 0.5118, -0.0716]) tensor([0.0538, 0.2028, 0.4772, 0.2663]) -Greedy action tensor([-1.9203, -0.4169, 0.6539, -0.1681]) tensor([0.0410, 0.1844, 0.5381, 0.2365]) -Greedy action tensor([-1.8516, -0.4536, 0.6223, -0.1369]) tensor([0.0445, 0.1801, 0.5282, 0.2472]) -Greedy action tensor([-1.8367, -0.4226, 0.6214, -0.1206]) tensor([0.0447, 0.1839, 0.5225, 0.2488]) -Greedy action tensor([-1.8678, -0.4319, 0.6290, -0.1351]) tensor([0.0435, 0.1827, 0.5279, 0.2459]) -Greedy action tensor([-1.8622, -0.4461, 0.6258, -0.1378]) tensor([0.0439, 0.1810, 0.5287, 0.2464]) -Greedy action tensor([-1.0761, -0.6492, 0.2685, 0.1768]) tensor([0.1013, 0.1553, 0.3887, 0.3547]) -Greedy action tensor([-1.8930, -0.3953, 0.6392, -0.1494]) tensor([0.0421, 0.1881, 0.5293, 0.2405]) -Greedy action tensor([-1.9241, -0.4394, 0.6563, -0.1668]) tensor([0.0410, 0.1808, 0.5408, 0.2374]) -Greedy action tensor([-1.9188, -0.4365, 0.6546, -0.1628]) tensor([0.0411, 0.1812, 0.5395, 0.2382]) -Greedy action tensor([-1.9060, -0.4495, 0.6512, -0.1608]) tensor([0.0418, 0.1794, 0.5393, 0.2394]) -Greedy action tensor([-1.8560, -0.4649, 0.6456, -0.1190]) tensor([0.0437, 0.1755, 0.5328, 0.2480]) -Greedy action tensor([-1.9134, -0.3973, 0.6497, -0.1597]) tensor([0.0411, 0.1874, 0.5339, 0.2376]) -Greedy action tensor([-1.9228, -0.4261, 0.6580, -0.1585]) tensor([0.0408, 0.1822, 0.5388, 0.2381]) -Greedy action tensor([-1.1912, -0.2613, 0.2616, 0.0466]) tensor([0.0888, 0.2251, 0.3798, 0.3063]) -Greedy action tensor([-1.6545, -0.4087, 0.6546, -0.0413]) tensor([0.0511, 0.1777, 0.5146, 0.2566]) -Greedy action tensor([-1.9006, -0.4465, 0.6495, -0.1560]) tensor([0.0420, 0.1798, 0.5379, 0.2404]) -Greedy action tensor([-1.6607, -0.4364, 0.6000, 0.0925]) tensor([0.0506, 0.1721, 0.4852, 0.2921]) -Greedy action tensor([-1.8335, -0.2651, 0.6071, -0.1089]) tensor([0.0437, 0.2097, 0.5016, 0.2451]) -Greedy action tensor([-0.5104, 0.6601, 0.2631, 0.4792]) tensor([0.1101, 0.3550, 0.2387, 0.2962]) -Greedy action tensor([-1.7837, -0.3874, 0.6007, -0.0820]) tensor([0.0468, 0.1890, 0.5077, 0.2565]) -Greedy action tensor([-1.8598, -0.1500, 0.5974, -0.1329]) tensor([0.0420, 0.2320, 0.4899, 0.2360]) -Greedy action tensor([ 2.4533, -1.0706, 0.4798, 0.9169]) tensor([0.7228, 0.0213, 0.1004, 0.1555]) -Greedy action tensor([ 1.2324, 0.5154, -1.0570, -0.0396]) tensor([0.5348, 0.2611, 0.0542, 0.1499]) -Greedy action tensor([ 1.0231, -0.6053, 0.0754, -0.0676]) tensor([0.5209, 0.1022, 0.2019, 0.1750]) -Greedy action tensor([ 1.7212, -0.3328, -0.6527, 0.3157]) tensor([0.6819, 0.0874, 0.0635, 0.1672]) -Greedy action tensor([ 0.6599, -0.4661, -0.1337, -0.0168]) tensor([0.4377, 0.1420, 0.1979, 0.2225]) -Greedy action tensor([ 1.2716, -0.2604, -0.3048, 0.0260]) tensor([0.5846, 0.1263, 0.1208, 0.1682]) -Greedy action tensor([ 1.1144, -0.5941, -0.2528, 0.2835]) tensor([0.5343, 0.0968, 0.1361, 0.2328]) -Greedy action tensor([ 1.7551, 0.4096, -0.3876, 0.2298]) tensor([0.6268, 0.1632, 0.0735, 0.1364]) -Greedy action tensor([ 1.4751, -0.6965, -0.7385, 0.9133]) tensor([0.5576, 0.0636, 0.0609, 0.3179]) -Greedy action tensor([ 1.1878, -0.1345, -1.0986, 0.4842]) tensor([0.5368, 0.1431, 0.0546, 0.2656]) -Greedy action tensor([ 1.3565, -0.7652, -0.3305, 0.2521]) tensor([0.6111, 0.0732, 0.1131, 0.2025]) -Greedy action tensor([ 2.1899, -0.8997, -0.4213, 0.7442]) tensor([0.7383, 0.0336, 0.0542, 0.1739]) -Greedy action tensor([ 1.3443, -0.5277, -0.2534, 0.2401]) tensor([0.5925, 0.0911, 0.1199, 0.1964]) -Greedy action tensor([ 1.8018, -0.2021, -0.7471, 0.2156]) tensor([0.7054, 0.0951, 0.0551, 0.1444]) -Greedy action tensor([ 1.8361, 0.5960, -0.1413, 0.2917]) tensor([0.6093, 0.1763, 0.0843, 0.1301]) -Greedy action tensor([ 0.4080, -0.3197, -0.3787, 0.3888]) tensor([0.3426, 0.1655, 0.1560, 0.3360]) -Greedy action tensor([ 1.3205, -0.3989, -0.2633, 0.1523]) tensor([0.5899, 0.1057, 0.1210, 0.1834]) -Greedy action tensor([ 1.2962, -0.5244, -0.1035, 0.1091]) tensor([0.5835, 0.0945, 0.1439, 0.1780]) -Greedy action tensor([ 2.0541, -0.2753, -0.0200, 0.4332]) tensor([0.7039, 0.0685, 0.0885, 0.1392]) -Greedy action tensor([ 2.0726, -0.8765, -0.7430, 0.5159]) tensor([0.7558, 0.0396, 0.0453, 0.1594]) -Greedy action tensor([ 1.6976, -0.7333, -0.1255, 0.3212]) tensor([0.6658, 0.0586, 0.1075, 0.1681]) -Greedy action tensor([ 2.0064, -0.3190, -1.0144, 0.6441]) tensor([0.7130, 0.0697, 0.0348, 0.1826]) -Greedy action tensor([ 1.2789, -0.5788, -0.8016, 0.3401]) tensor([0.5981, 0.0933, 0.0747, 0.2339]) -Greedy action tensor([ 1.8299, -0.3371, -0.2366, -0.0340]) tensor([0.7162, 0.0820, 0.0907, 0.1111]) -Greedy action tensor([ 1.3997, -0.4927, -0.5917, 0.6880]) tensor([0.5624, 0.0848, 0.0768, 0.2760]) -Greedy action tensor([ 1.1655, -0.6003, -0.4476, 0.0702]) tensor([0.5866, 0.1003, 0.1169, 0.1962]) -Greedy action tensor([ 1.3729, -0.0535, -0.7358, 0.1669]) tensor([0.6021, 0.1446, 0.0731, 0.1802]) -Greedy action tensor([ 1.5793, -0.3737, -0.7818, 1.1025]) tensor([0.5385, 0.0764, 0.0508, 0.3343]) -Greedy action tensor([ 1.4001, -0.6547, -0.6047, 0.4273]) tensor([0.6095, 0.0781, 0.0821, 0.2304]) -Greedy action tensor([ 1.5145, -0.7634, -0.4761, 0.3596]) tensor([0.6434, 0.0660, 0.0879, 0.2027]) -Greedy action tensor([ 1.5493, -0.3843, -0.9871, 0.5087]) tensor([0.6341, 0.0917, 0.0502, 0.2240]) -Greedy action tensor([ 1.5786, -0.4562, -0.5366, 0.2915]) tensor([0.6547, 0.0856, 0.0790, 0.1807]) -Greedy action tensor([ 1.9368, -1.0258, -0.1899, 0.5385]) tensor([0.7053, 0.0365, 0.0841, 0.1742]) -Greedy action tensor([ 1.6092, -0.3627, -0.5368, 0.5108]) tensor([0.6291, 0.0876, 0.0736, 0.2098]) -Greedy action tensor([ 1.4623, -0.5034, -0.5322, 0.3899]) tensor([0.6179, 0.0865, 0.0841, 0.2114]) -Greedy action tensor([ 1.7932, -1.0780, -0.2902, -0.0524]) tensor([0.7468, 0.0423, 0.0930, 0.1179]) -Greedy action tensor([ 1.2090e+00, -3.9170e-01, -2.6865e-04, 1.9211e-01]) tensor([0.5371, 0.1084, 0.1603, 0.1943]) -Greedy action tensor([ 1.2144, -0.4371, -0.4268, 0.4685]) tensor([0.5377, 0.1031, 0.1042, 0.2550]) -Greedy action tensor([ 2.0498, -0.0514, -0.1585, 0.0987]) tensor([0.7276, 0.0890, 0.0800, 0.1034]) -Greedy action tensor([ 1.3760, -0.6020, -0.4400, 0.2942]) tensor([0.6097, 0.0844, 0.0992, 0.2067]) -Greedy action tensor([ 2.6620, -0.2459, -0.5509, 0.2783]) tensor([0.8424, 0.0460, 0.0339, 0.0777]) -Greedy action tensor([ 1.3330, -0.2931, 0.1032, 0.7151]) tensor([0.4931, 0.0970, 0.1441, 0.2658]) -Greedy action tensor([ 1.3056, -0.1045, -0.7681, 0.3885]) tensor([0.5651, 0.1380, 0.0710, 0.2259]) -Greedy action tensor([ 1.2187, 0.0565, -0.7273, 0.4583]) tensor([0.5200, 0.1627, 0.0743, 0.2431]) -Greedy action tensor([ 1.3457, -0.2033, -0.0838, 0.7655]) tensor([0.4971, 0.1056, 0.1190, 0.2783]) -Greedy action tensor([ 1.3687, -0.1819, -0.4757, 0.4405]) tensor([0.5664, 0.1201, 0.0896, 0.2239]) -Greedy action tensor([ 1.1078, -0.5089, -0.4024, 0.1832]) tensor([0.5506, 0.1093, 0.1216, 0.2184]) -Greedy action tensor([ 2.1264, -0.1369, -1.0545, 0.0430]) tensor([0.7874, 0.0819, 0.0327, 0.0980]) -Greedy action tensor([ 1.2437, -0.4395, -0.2661, 0.3936]) tensor([0.5452, 0.1013, 0.1205, 0.2330]) -Greedy action tensor([ 1.8672, -0.7402, -0.5105, 0.2049]) tensor([0.7374, 0.0544, 0.0684, 0.1399]) -Greedy action tensor([ 1.1815, -0.0720, -0.1776, -0.2258]) tensor([0.5595, 0.1598, 0.1437, 0.1370]) -Greedy action tensor([ 1.2972, -0.5979, -0.3492, 0.2423]) tensor([0.5913, 0.0889, 0.1140, 0.2059]) -Greedy action tensor([ 1.4562, -0.3657, 0.1746, 0.3360]) tensor([0.5664, 0.0916, 0.1572, 0.1848]) -Greedy action tensor([ 1.9459, 0.6460, -0.0826, 0.0533]) tensor([0.6432, 0.1753, 0.0846, 0.0969]) -Greedy action tensor([ 1.4760, 0.2150, -0.3499, 0.4993]) tensor([0.5492, 0.1556, 0.0884, 0.2068]) -Greedy action tensor([ 1.3088, 0.0620, -0.8005, 0.1302]) tensor([0.5826, 0.1675, 0.0707, 0.1793]) -Greedy action tensor([ 1.8964, 0.4038, -0.1923, -0.2919]) tensor([0.6846, 0.1539, 0.0848, 0.0767]) -Greedy action tensor([ 1.8774, -0.9506, -0.0556, 0.3871]) tensor([0.6997, 0.0414, 0.1013, 0.1576]) -Greedy action tensor([ 1.7016, -0.8045, -0.3713, 0.3211]) tensor([0.6855, 0.0559, 0.0863, 0.1724]) -Greedy action tensor([ 1.1988, -0.3536, -0.2259, 0.1936]) tensor([0.5500, 0.1164, 0.1323, 0.2013]) -Greedy action tensor([ 1.3050, -0.0452, -0.8539, -0.0512]) tensor([0.6126, 0.1588, 0.0707, 0.1578]) -Greedy action tensor([ 1.2333, -0.1342, -0.4452, 0.4437]) tensor([0.5276, 0.1344, 0.0985, 0.2395]) -Greedy action tensor([ 1.1676, -0.3537, -0.3250, 0.1351]) tensor([0.5558, 0.1214, 0.1249, 0.1979]) -Greedy action tensor([ 1.2960, -0.4015, -0.4189, 0.5247]) tensor([0.5478, 0.1003, 0.0986, 0.2533]) -Greedy action tensor([ 1.3410, -0.7492, -0.2634, 0.3190]) tensor([0.5936, 0.0734, 0.1193, 0.2136]) -Greedy action tensor([2.1510, 0.9323, 0.0062, 0.3080]) tensor([0.6365, 0.1882, 0.0745, 0.1008]) -Greedy action tensor([ 1.6396, -0.6278, -0.3822, 0.2020]) tensor([0.6787, 0.0703, 0.0899, 0.1612]) -Greedy action tensor([ 1.0348, 0.1357, -0.2455, 0.2177]) tensor([0.4702, 0.1914, 0.1307, 0.2077]) -Greedy action tensor([ 1.5206, 0.2852, -0.4141, 0.4489]) tensor([0.5625, 0.1635, 0.0813, 0.1926]) -Greedy action tensor([ 1.9000, 0.3323, -0.3193, 0.3760]) tensor([0.6514, 0.1358, 0.0708, 0.1419]) -Greedy action tensor([ 1.5390, -0.3781, -0.3140, 0.0598]) tensor([0.6529, 0.0960, 0.1023, 0.1487]) -Greedy action tensor([ 1.8282, -0.1798, -0.2002, 0.3753]) tensor([0.6668, 0.0895, 0.0877, 0.1560]) -Greedy action tensor([ 1.8731, -0.4687, -0.9332, 0.5285]) tensor([0.7056, 0.0678, 0.0426, 0.1839]) -Greedy action tensor([ 1.4562, -0.2617, -0.5062, 0.4138]) tensor([0.5979, 0.1073, 0.0840, 0.2108]) -Greedy action tensor([ 1.1163, -0.6227, -0.2514, 0.4686]) tensor([0.5119, 0.0899, 0.1304, 0.2678]) -Greedy action tensor([ 1.8162, -1.0554, -0.4449, 0.4013]) tensor([0.7123, 0.0403, 0.0743, 0.1731]) -Greedy action tensor([ 1.6774, -0.6891, -0.3456, 0.4970]) tensor([0.6522, 0.0612, 0.0863, 0.2003]) -Greedy action tensor([ 1.7754, -0.3231, -0.9601, 0.1803]) tensor([0.7192, 0.0882, 0.0466, 0.1459]) -Greedy action tensor([ 1.2687, -0.8484, -0.4274, 0.4694]) tensor([0.5703, 0.0687, 0.1046, 0.2564]) -Greedy action tensor([ 1.4156, -0.4494, -0.7314, 0.2072]) tensor([0.6368, 0.0986, 0.0744, 0.1902]) -Greedy action tensor([ 1.8112, -0.9140, -0.0294, 0.8474]) tensor([0.6228, 0.0408, 0.0989, 0.2376]) -Greedy action tensor([-1.2263, 0.1923, -0.4502, 0.4730]) tensor([0.0783, 0.3234, 0.1701, 0.4282]) -Greedy action tensor([ 1.6440, -1.6408, 0.2129, 1.1302]) tensor([0.5334, 0.0200, 0.1275, 0.3191]) -Greedy action tensor([ 0.8202, -1.1117, 1.4766, -0.6252]) tensor([0.3023, 0.0438, 0.5827, 0.0712]) -Greedy action tensor([ 0.1474, -0.1447, 0.0974, -0.2612]) tensor([0.2974, 0.2221, 0.2829, 0.1976]) -Greedy action tensor([-1.1416, -0.9518, 0.1277, 0.2093]) tensor([0.1039, 0.1256, 0.3696, 0.4010]) -Greedy action tensor([ 0.0796, 0.3160, -0.4654, -0.6776]) tensor([0.3016, 0.3820, 0.1749, 0.1415]) -Greedy action tensor([-0.5618, 0.3253, 0.5995, -0.9587]) tensor([0.1371, 0.3329, 0.4379, 0.0922]) -Greedy action tensor([ 0.8618, -0.4783, 0.4488, -0.4121]) tensor([0.4539, 0.1188, 0.3003, 0.1270]) -Greedy action tensor([-1.8421, -1.0830, -0.3738, -0.0327]) tensor([0.0736, 0.1573, 0.3196, 0.4495]) -Greedy action tensor([-0.2177, -1.4924, -0.3165, 0.6185]) tensor([0.2226, 0.0622, 0.2016, 0.5136]) -Greedy action tensor([-0.2598, -0.0668, -1.0844, -0.3032]) tensor([0.2771, 0.3361, 0.1215, 0.2653]) -Greedy action tensor([ 0.0254, 0.5753, 0.4126, -0.1801]) tensor([0.1992, 0.3452, 0.2934, 0.1622]) -Greedy action tensor([-0.6394, 0.0719, -0.1176, -0.8758]) tensor([0.1815, 0.3695, 0.3058, 0.1432]) -Greedy action tensor([-0.1976, 1.1204, 0.3087, -0.4260]) tensor([0.1391, 0.5195, 0.2307, 0.1107]) -Greedy action tensor([ 1.0205, -0.9764, 1.0181, 0.2924]) tensor([0.3822, 0.0519, 0.3813, 0.1846]) -Greedy action tensor([ 0.5598, -0.6688, -0.9155, -0.0785]) tensor([0.4879, 0.1428, 0.1116, 0.2577]) -Greedy action tensor([ 0.6616, -0.8225, 0.1128, -0.7681]) tensor([0.4893, 0.1109, 0.2826, 0.1171]) -Greedy action tensor([-1.0022, -0.3495, -0.7633, -0.2345]) tensor([0.1576, 0.3027, 0.2001, 0.3396]) -Greedy action tensor([ 0.5213, -0.2688, -0.7182, -0.5965]) tensor([0.4830, 0.2192, 0.1398, 0.1579]) -Greedy action tensor([ 0.0338, -1.4143, 0.1685, -0.8942]) tensor([0.3604, 0.0847, 0.4124, 0.1425]) -Greedy action tensor([-0.1571, -0.2368, -0.6882, 0.5384]) tensor([0.2214, 0.2045, 0.1302, 0.4439]) -Greedy action tensor([ 0.7154, -0.6615, -0.7557, 1.2941]) tensor([0.3062, 0.0773, 0.0703, 0.5462]) -Greedy action tensor([ 0.2717, -1.4470, -0.9516, 0.0732]) tensor([0.4360, 0.0782, 0.1283, 0.3575]) -Greedy action tensor([ 1.2473, -0.1600, -0.7532, -0.2858]) tensor([0.6266, 0.1534, 0.0848, 0.1353]) -Greedy action tensor([-0.3070, -1.3310, -0.1748, -1.6891]) tensor([0.3634, 0.1305, 0.4148, 0.0912]) -Greedy action tensor([ 0.2180, 0.0746, -0.1885, -0.4077]) tensor([0.3260, 0.2825, 0.2171, 0.1744]) -Greedy action tensor([ 0.9947, -0.7917, 0.2183, 1.1879]) tensor([0.3520, 0.0590, 0.1620, 0.4270]) -Greedy action tensor([ 0.3854, -0.4068, -0.4919, 0.2888]) tensor([0.3601, 0.1631, 0.1498, 0.3270]) -Greedy action tensor([ 1.1816, -0.1694, 0.1144, -0.1999]) tensor([0.5393, 0.1397, 0.1855, 0.1355]) -Greedy action tensor([-0.0190, -0.4685, 0.7729, -0.1608]) tensor([0.2122, 0.1353, 0.4684, 0.1841]) -Greedy action tensor([-0.4794, 0.0841, -0.1465, -0.5861]) tensor([0.1980, 0.3478, 0.2762, 0.1780]) -Greedy action tensor([-0.1870, -0.5475, -1.2981, -0.2167]) tensor([0.3336, 0.2326, 0.1098, 0.3239]) -Greedy action tensor([-0.2446, -0.9795, -1.0615, 1.0773]) tensor([0.1763, 0.0845, 0.0779, 0.6612]) -Greedy action tensor([-0.4863, -0.4153, 0.8975, -0.2791]) tensor([0.1371, 0.1472, 0.5470, 0.1687]) -Greedy action tensor([-0.0679, -0.5605, 0.7247, -1.0010]) tensor([0.2373, 0.1450, 0.5243, 0.0933]) -Greedy action tensor([ 0.5644, -0.3841, 0.1205, -0.4794]) tensor([0.4200, 0.1627, 0.2694, 0.1479]) -Greedy action tensor([ 1.0429, -1.4673, -0.9470, 0.1145]) tensor([0.6199, 0.0504, 0.0847, 0.2450]) -Greedy action tensor([-0.4110, 0.4636, -0.2386, -0.8467]) tensor([0.1911, 0.4582, 0.2270, 0.1236]) -Greedy action tensor([ 1.0103, -0.4629, 1.1145, -0.7713]) tensor([0.3988, 0.0914, 0.4426, 0.0671]) -Greedy action tensor([ 1.0181, -1.2958, -0.5110, -0.0989]) tensor([0.6087, 0.0602, 0.1319, 0.1992]) -Greedy action tensor([-0.1900, 0.6901, -0.0165, -0.9864]) tensor([0.1980, 0.4773, 0.2355, 0.0893]) -Greedy action tensor([ 0.9679, -0.0474, 0.2850, 0.4914]) tensor([0.4019, 0.1456, 0.2030, 0.2495]) -Greedy action tensor([-1.1520, -0.9516, 0.5765, -0.8309]) tensor([0.1083, 0.1323, 0.6100, 0.1493]) -Greedy action tensor([-0.2647, -1.5792, -0.2684, 0.4066]) tensor([0.2369, 0.0636, 0.2360, 0.4635]) -Greedy action tensor([ 0.5271, -1.6736, -0.5503, 0.3898]) tensor([0.4305, 0.0477, 0.1466, 0.3753]) -Greedy action tensor([ 1.3075, -0.2114, 0.0985, 0.2764]) tensor([0.5336, 0.1168, 0.1593, 0.1903]) -Greedy action tensor([ 1.2643, -1.0769, 1.3214, 0.7224]) tensor([0.3654, 0.0352, 0.3869, 0.2125]) -Greedy action tensor([ 0.7592, -1.7082, -0.4039, 1.0976]) tensor([0.3571, 0.0303, 0.1116, 0.5010]) -Greedy action tensor([ 0.3833, -1.0553, 0.0246, 0.2217]) tensor([0.3589, 0.0851, 0.2507, 0.3053]) -Greedy action tensor([ 0.1684, -0.3302, -1.1152, -0.6527]) tensor([0.4302, 0.2613, 0.1192, 0.1893]) -Greedy action tensor([ 0.6484, -1.6732, -0.2864, -0.3723]) tensor([0.5402, 0.0530, 0.2121, 0.1947]) -Greedy action tensor([ 0.2501, 0.4120, 0.2433, -0.3187]) tensor([0.2677, 0.3148, 0.2659, 0.1516]) -Greedy action tensor([ 1.3285, -0.0627, 0.0682, 0.2358]) tensor([0.5354, 0.1332, 0.1518, 0.1795]) -Greedy action tensor([-0.4663, -0.1359, 0.1536, -0.7228]) tensor([0.1990, 0.2770, 0.3700, 0.1540]) -Greedy action tensor([0.8540, 0.0320, 1.2492, 0.7914]) tensor([0.2588, 0.1138, 0.3843, 0.2431]) -Greedy action tensor([-0.1579, 0.2920, -0.5621, -0.9358]) tensor([0.2706, 0.4244, 0.1807, 0.1243]) -Greedy action tensor([ 1.8968, -1.4572, 0.0672, 1.1287]) tensor([0.6027, 0.0211, 0.0967, 0.2796]) -Greedy action tensor([-0.7756, -1.1112, -0.1027, -0.0304]) tensor([0.1730, 0.1237, 0.3390, 0.3644]) -Greedy action tensor([ 0.5739, -0.4392, 0.0505, 1.2485]) tensor([0.2552, 0.0927, 0.1512, 0.5010]) -Greedy action tensor([ 0.0913, 0.3186, 0.9148, -0.2366]) tensor([0.1903, 0.2389, 0.4337, 0.1371]) -Greedy action tensor([ 0.6051, -1.6199, -0.5516, 1.4304]) tensor([0.2699, 0.0292, 0.0849, 0.6161]) -Greedy action tensor([-0.7579, -1.2702, 2.0560, -1.2497]) tensor([0.0530, 0.0317, 0.8829, 0.0324]) -Greedy action tensor([-0.5361, -0.5786, -0.5171, -0.7093]) tensor([0.2619, 0.2510, 0.2669, 0.2202]) -Greedy action tensor([ 0.1899, -0.3300, 0.1732, -0.8228]) tensor([0.3400, 0.2021, 0.3344, 0.1235]) -Greedy action tensor([-0.7825, -0.5335, 0.3442, -0.3512]) tensor([0.1448, 0.1857, 0.4467, 0.2229]) -Greedy action tensor([ 0.1240, 0.6186, 0.3385, -0.0854]) tensor([0.2132, 0.3496, 0.2642, 0.1729]) -Greedy action tensor([-0.2719, -2.2122, -0.0924, 1.2994]) tensor([0.1398, 0.0201, 0.1673, 0.6728]) -Greedy action tensor([ 0.2884, -1.2574, -0.1039, -0.5363]) tensor([0.4297, 0.0916, 0.2903, 0.1884]) -Greedy action tensor([-0.4168, 0.7104, -0.8051, -0.1960]) tensor([0.1663, 0.5134, 0.1128, 0.2074]) -Greedy action tensor([-0.0294, -1.3685, -0.7409, -0.2967]) tensor([0.3971, 0.1041, 0.1949, 0.3039]) -Greedy action tensor([-0.0086, 0.2742, 0.2863, -0.3657]) tensor([0.2289, 0.3037, 0.3073, 0.1601]) -Greedy action tensor([ 0.9350, -0.4617, -0.3592, 1.0220]) tensor([0.3828, 0.0947, 0.1049, 0.4176]) -Greedy action tensor([ 0.1197, -0.2439, 0.0352, -0.0273]) tensor([0.2876, 0.1999, 0.2643, 0.2483]) -Greedy action tensor([ 0.9012, -0.7276, -0.7546, 0.6230]) tensor([0.4664, 0.0915, 0.0890, 0.3531]) -Greedy action tensor([-0.2203, 0.2355, -0.1624, -0.6336]) tensor([0.2326, 0.3670, 0.2465, 0.1539]) -Greedy action tensor([-0.1686, -0.7675, -0.5660, 0.4151]) tensor([0.2491, 0.1369, 0.1674, 0.4466]) -Greedy action tensor([ 0.1598, -0.1243, -0.8825, -0.0104]) tensor([0.3391, 0.2553, 0.1196, 0.2861]) -Greedy action tensor([-0.6221, -0.8270, -0.8899, -0.3862]) tensor([0.2600, 0.2118, 0.1989, 0.3292]) -Greedy action tensor([ 0.7822, 0.6796, -0.6512, -0.1733]) tensor([0.3959, 0.3573, 0.0944, 0.1523]) -Greedy action tensor([-1.3531, -0.5366, 0.4864, -0.8985]) tensor([0.0898, 0.2033, 0.5654, 0.1415]) -Greedy action tensor([-0.1341, -1.2057, 0.2646, -0.0207]) tensor([0.2530, 0.0866, 0.3769, 0.2834]) -Greedy action tensor([ 0.7576, -0.3195, 0.1272, -0.1943]) tensor([0.4427, 0.1508, 0.2357, 0.1709]) -Greedy action tensor([ 0.4751, -0.2199, -0.1875, 0.0092]) tensor([0.3785, 0.1889, 0.1951, 0.2375]) -Greedy action tensor([ 0.9079, -0.9408, 0.0135, -0.4923]) tensor([0.5516, 0.0869, 0.2255, 0.1360]) -Greedy action tensor([ 1.1800, -0.6228, -0.0799, -0.1619]) tensor([0.5848, 0.0964, 0.1659, 0.1529]) -Greedy action tensor([ 0.5183, -0.0905, -0.1019, -0.4188]) tensor([0.4043, 0.2199, 0.2174, 0.1584]) -Greedy action tensor([ 0.7395, -0.5787, 0.0080, -0.2587]) tensor([0.4723, 0.1264, 0.2273, 0.1741]) -Greedy action tensor([ 0.6575, 0.0720, -0.1539, -0.0616]) tensor([0.4019, 0.2238, 0.1785, 0.1958]) -Greedy action tensor([ 0.8105, -0.4388, -0.0662, -0.2254]) tensor([0.4860, 0.1393, 0.2022, 0.1725]) -Greedy action tensor([ 0.6323, -0.6669, -0.1706, -0.2247]) tensor([0.4662, 0.1271, 0.2088, 0.1979]) -Greedy action tensor([ 0.9440, -0.8710, 0.0656, -0.6196]) tensor([0.5594, 0.0911, 0.2324, 0.1171]) -Greedy action tensor([ 1.1367, -0.6378, -0.1095, -0.3925]) tensor([0.5974, 0.1013, 0.1718, 0.1295]) -Greedy action tensor([ 0.9847, -0.4670, -0.2140, -0.8469]) tensor([0.5896, 0.1381, 0.1778, 0.0944]) -Greedy action tensor([ 0.7733, -0.5872, -0.1047, -0.3369]) tensor([0.4996, 0.1282, 0.2076, 0.1646]) -Greedy action tensor([ 0.7780, -0.2327, -0.2143, -0.3003]) tensor([0.4820, 0.1754, 0.1787, 0.1640]) -Greedy action tensor([0.4209, 0.1543, 0.0623, 0.0798]) tensor([0.3149, 0.2412, 0.2200, 0.2239]) -Greedy action tensor([ 0.4932, -0.4237, 0.0165, -0.1976]) tensor([0.3965, 0.1585, 0.2462, 0.1987]) -Greedy action tensor([ 0.4444, -0.0519, -0.0334, -0.0748]) tensor([0.3541, 0.2156, 0.2196, 0.2107]) -Greedy action tensor([ 0.8463, -0.3884, -0.0481, -0.2618]) tensor([0.4926, 0.1433, 0.2014, 0.1627]) -Greedy action tensor([ 0.8893, -0.5416, 0.1621, -0.2691]) tensor([0.4911, 0.1174, 0.2373, 0.1542]) -Greedy action tensor([ 0.7842, -0.3676, -0.0537, -0.2290]) tensor([0.4735, 0.1497, 0.2049, 0.1719]) -Greedy action tensor([ 0.5034, -0.1415, -0.0965, -0.3244]) tensor([0.3983, 0.2090, 0.2186, 0.1741]) -Greedy action tensor([ 0.6270, -0.3503, 0.1626, -0.4505]) tensor([0.4264, 0.1605, 0.2680, 0.1452]) -Greedy action tensor([ 0.4592, -0.4895, -0.1359, -0.1023]) tensor([0.3985, 0.1543, 0.2198, 0.2273]) -Greedy action tensor([ 0.4381, -0.1783, 0.1255, -0.5137]) tensor([0.3763, 0.2032, 0.2753, 0.1453]) -Greedy action tensor([ 1.0286, -0.6874, -0.0797, -0.4014]) tensor([0.5717, 0.1028, 0.1887, 0.1368]) -Greedy action tensor([ 0.8274, -0.4660, -0.0612, -0.0317]) tensor([0.4741, 0.1301, 0.1950, 0.2008]) -Greedy action tensor([ 0.8374, -0.6933, 0.0938, -0.3094]) tensor([0.4976, 0.1077, 0.2366, 0.1581]) -Greedy action tensor([ 1.0231, -0.7438, 0.0466, -0.7422]) tensor([0.5818, 0.0994, 0.2192, 0.0996]) -Greedy action tensor([ 0.7001, -0.3928, -0.0889, -0.0414]) tensor([0.4413, 0.1479, 0.2005, 0.2102]) -Greedy action tensor([ 1.0264, -0.5144, 0.2145, -0.3022]) tensor([0.5200, 0.1114, 0.2309, 0.1377]) -Greedy action tensor([ 0.4093, -0.0882, -0.0495, 0.0396]) tensor([0.3412, 0.2074, 0.2156, 0.2357]) -Greedy action tensor([ 0.7557, -0.5949, 0.0594, -0.4762]) tensor([0.4880, 0.1264, 0.2432, 0.1424]) -Greedy action tensor([ 0.5245, -0.1942, 0.0736, -0.0972]) tensor([0.3757, 0.1831, 0.2394, 0.2018]) -Greedy action tensor([ 0.9270, -0.3050, -0.1867, -0.3369]) tensor([0.5256, 0.1533, 0.1726, 0.1485]) -Greedy action tensor([ 1.2249, -0.4192, 0.1876, -0.6182]) tensor([0.5862, 0.1132, 0.2077, 0.0928]) -Greedy action tensor([ 0.1366, 0.0443, -0.3919, -0.4327]) tensor([0.3260, 0.2973, 0.1922, 0.1845]) -Greedy action tensor([ 0.4748, -0.2369, -0.1197, -0.2305]) tensor([0.3942, 0.1935, 0.2176, 0.1947]) -Greedy action tensor([ 0.8846, -0.5033, 0.0449, -0.3195]) tensor([0.5047, 0.1260, 0.2180, 0.1514]) -Greedy action tensor([ 0.9937, -0.6622, -0.0280, -0.6065]) tensor([0.5705, 0.1089, 0.2054, 0.1152]) -Greedy action tensor([ 0.3311, -0.1363, -0.0476, 0.0022]) tensor([0.3299, 0.2067, 0.2259, 0.2374]) -Greedy action tensor([ 0.6316, -0.4881, -0.1938, -0.1340]) tensor([0.4485, 0.1464, 0.1965, 0.2086]) -Greedy action tensor([ 0.9177, -0.8136, 0.0360, -0.3220]) tensor([0.5317, 0.0942, 0.2202, 0.1539]) -Greedy action tensor([ 0.6108, -0.3809, -0.1777, -0.0756]) tensor([0.4294, 0.1593, 0.1952, 0.2162]) -Greedy action tensor([ 0.9642, -0.5654, 0.0117, -0.5044]) tensor([0.5457, 0.1182, 0.2105, 0.1256]) -Greedy action tensor([ 0.6609, -0.4779, -0.1707, -0.3633]) tensor([0.4729, 0.1514, 0.2059, 0.1698]) -Greedy action tensor([ 0.4488, -0.1732, -0.1866, 0.1202]) tensor([0.3589, 0.1927, 0.1901, 0.2584]) -Greedy action tensor([ 0.6843, -0.3855, -0.0329, -0.7460]) tensor([0.4830, 0.1657, 0.2358, 0.1156]) -Greedy action tensor([ 0.5510, -0.3348, -0.1616, -0.0149]) tensor([0.4047, 0.1669, 0.1985, 0.2299]) -Greedy action tensor([ 0.9715, -0.7648, 0.1218, -0.6699]) tensor([0.5563, 0.0980, 0.2379, 0.1078]) -Greedy action tensor([ 1.0013, -0.8143, 0.0127, -0.5656]) tensor([0.5735, 0.0933, 0.2134, 0.1197]) -Greedy action tensor([ 0.6059, -0.3226, -0.0216, -0.0662]) tensor([0.4099, 0.1620, 0.2188, 0.2093]) -Greedy action tensor([ 0.7670, -0.5241, -0.1256, -0.0537]) tensor([0.4707, 0.1294, 0.1928, 0.2071]) -Greedy action tensor([ 1.1426, -0.6913, 0.0785, -0.8088]) tensor([0.6072, 0.0970, 0.2095, 0.0863]) -Greedy action tensor([ 0.6762, -0.1350, -0.1510, -0.0106]) tensor([0.4193, 0.1863, 0.1834, 0.2110]) -Greedy action tensor([ 0.9935, -0.9555, -0.0658, -0.5144]) tensor([0.5846, 0.0833, 0.2027, 0.1294]) -Greedy action tensor([ 0.4557, -0.3690, -0.0038, -0.0811]) tensor([0.3767, 0.1651, 0.2379, 0.2202]) -Greedy action tensor([ 0.8357, 0.0587, 0.1789, -0.4044]) tensor([0.4410, 0.2028, 0.2287, 0.1276]) -Greedy action tensor([ 0.9693, -0.5475, -0.0576, -0.9038]) tensor([0.5776, 0.1267, 0.2069, 0.0888]) -Greedy action tensor([ 0.5900, -0.2917, -0.1284, -0.1059]) tensor([0.4166, 0.1725, 0.2031, 0.2077]) -Greedy action tensor([ 0.5536, -0.0493, -0.1472, -0.0874]) tensor([0.3891, 0.2129, 0.1931, 0.2050]) -Greedy action tensor([ 0.4840, 0.1987, -0.2290, 0.0403]) tensor([0.3468, 0.2607, 0.1700, 0.2225]) -Greedy action tensor([ 0.3488, -0.0235, -0.1812, -0.3812]) tensor([0.3624, 0.2497, 0.2133, 0.1746]) -Greedy action tensor([ 1.0346, -0.8979, 0.0908, -0.6659]) tensor([0.5826, 0.0843, 0.2267, 0.1064]) -Greedy action tensor([ 1.2127e+00, -8.1723e-01, -5.3197e-04, -7.4962e-01]) tensor([0.6373, 0.0837, 0.1894, 0.0896]) -Greedy action tensor([ 0.6533, -0.4257, -0.0948, -0.3674]) tensor([0.4601, 0.1564, 0.2177, 0.1658]) -Greedy action tensor([ 0.5200, -0.1020, -0.2768, -0.5201]) tensor([0.4272, 0.2293, 0.1925, 0.1510]) -Greedy action tensor([ 0.9292, -0.8419, -0.0193, -0.8243]) tensor([0.5778, 0.0983, 0.2238, 0.1001]) -Greedy action tensor([ 0.9524, -0.5796, -0.0791, -0.3164]) tensor([0.5394, 0.1166, 0.1923, 0.1517]) -Greedy action tensor([ 0.4678, -0.1399, -0.0935, -0.0512]) tensor([0.3690, 0.2010, 0.2105, 0.2196]) -Greedy action tensor([ 0.5561, -0.2473, 0.0788, -0.1686]) tensor([0.3918, 0.1754, 0.2430, 0.1898]) -Greedy action tensor([ 0.2659, 0.0800, -0.0373, -0.0016]) tensor([0.2999, 0.2491, 0.2215, 0.2295]) -Greedy action tensor([ 0.4182, -0.2577, -0.0295, -0.1957]) tensor([0.3719, 0.1892, 0.2377, 0.2013]) -Greedy action tensor([ 0.9322, -0.2764, -0.1075, -0.0444]) tensor([0.4929, 0.1472, 0.1743, 0.1856]) -Greedy action tensor([ 1.2259, -0.7247, -0.1176, -0.3995]) tensor([0.6250, 0.0889, 0.1631, 0.1230]) -Greedy action tensor([ 1.0140, -0.0957, 0.0828, -0.2268]) tensor([0.4968, 0.1638, 0.1958, 0.1437]) -Greedy action tensor([ 0.6557, -0.6594, -0.1645, -0.1397]) tensor([0.4629, 0.1243, 0.2038, 0.2090]) -Greedy action tensor([ 1.0117, -0.6547, 0.1576, -0.4649]) tensor([0.5426, 0.1025, 0.2310, 0.1239]) -Greedy action tensor([ 1.0718, -0.2779, 0.3321, -0.5273]) tensor([0.5158, 0.1338, 0.2462, 0.1042]) -Greedy action tensor([ 0.7272, -0.3378, -0.2097, -0.2840]) tensor([0.4761, 0.1641, 0.1866, 0.1732]) -Greedy action tensor([ 0.4424, -0.1801, 0.0042, -0.1803]) tensor([0.3679, 0.1974, 0.2374, 0.1974]) -Greedy action tensor([ 0.7300, -0.1175, 0.2009, -0.3690]) tensor([0.4254, 0.1823, 0.2506, 0.1417]) -Greedy action tensor([-1.9294, -0.4330, 0.6603, -0.1717]) tensor([0.0407, 0.1816, 0.5419, 0.2358]) -Greedy action tensor([-1.8975, -0.4423, 0.6462, -0.1548]) tensor([0.0422, 0.1806, 0.5364, 0.2408]) -Greedy action tensor([-1.9052, -0.3798, 0.6446, -0.1577]) tensor([0.0414, 0.1904, 0.5304, 0.2378]) -Greedy action tensor([-1.7653, -0.0907, 0.3725, -0.1504]) tensor([0.0504, 0.2689, 0.4273, 0.2533]) -Greedy action tensor([-1.8525, -0.4594, 0.6207, -0.1401]) tensor([0.0446, 0.1796, 0.5288, 0.2471]) -Greedy action tensor([-1.8967, -0.4067, 0.6477, -0.1540]) tensor([0.0419, 0.1858, 0.5332, 0.2392]) -Greedy action tensor([-0.8238, -1.2814, 0.3942, -0.6482]) tensor([0.1611, 0.1020, 0.5448, 0.1921]) -Greedy action tensor([-1.5976, -0.3774, 0.4324, -0.0018]) tensor([0.0591, 0.2001, 0.4496, 0.2913]) -Greedy action tensor([-1.8175, -0.4280, 0.6097, -0.1049]) tensor([0.0457, 0.1834, 0.5176, 0.2533]) -Greedy action tensor([-1.8776, -0.2587, 0.6148, -0.1396]) tensor([0.0420, 0.2119, 0.5075, 0.2387]) -Greedy action tensor([-1.5706, -0.6115, 0.8221, 0.3084]) tensor([0.0474, 0.1237, 0.5186, 0.3103]) -Greedy action tensor([-1.9038, -0.4610, 0.6546, -0.1459]) tensor([0.0418, 0.1767, 0.5393, 0.2422]) -Greedy action tensor([-1.9073, -0.4495, 0.6432, -0.1642]) tensor([0.0420, 0.1803, 0.5378, 0.2399]) -Greedy action tensor([-1.8984, -0.4189, 0.6396, -0.1627]) tensor([0.0422, 0.1851, 0.5335, 0.2392]) -Greedy action tensor([-1.0161, 0.1948, -0.2378, -0.1527]) tensor([0.1123, 0.3769, 0.2445, 0.2663]) -Greedy action tensor([-1.9128, -0.4428, 0.6540, -0.1635]) tensor([0.0415, 0.1803, 0.5399, 0.2384]) -Greedy action tensor([-1.8537, -0.3943, 0.6432, -0.1305]) tensor([0.0434, 0.1867, 0.5269, 0.2430]) -Greedy action tensor([-1.5724, -0.1356, 0.5448, 0.0342]) tensor([0.0541, 0.2274, 0.4491, 0.2695]) -Greedy action tensor([-1.9087, -0.4213, 0.6537, -0.1601]) tensor([0.0414, 0.1833, 0.5372, 0.2381]) -Greedy action tensor([-1.9343, -0.4375, 0.6631, -0.1748]) tensor([0.0405, 0.1808, 0.5435, 0.2351]) -Greedy action tensor([-1.8941, -0.4579, 0.6525, -0.1292]) tensor([0.0420, 0.1766, 0.5361, 0.2453]) -Greedy action tensor([-1.7233, -0.2383, 0.5848, -0.0250]) tensor([0.0478, 0.2109, 0.4803, 0.2610]) -Greedy action tensor([-1.8659, -0.4473, 0.6315, -0.1373]) tensor([0.0436, 0.1803, 0.5303, 0.2458]) -Greedy action tensor([-1.7484, -0.2696, 0.6121, -0.0799]) tensor([0.0470, 0.2061, 0.4977, 0.2492]) -Greedy action tensor([-1.5238, -0.1400, 0.6120, 0.1170]) tensor([0.0537, 0.2144, 0.4547, 0.2772]) -Greedy action tensor([-1.7933, -0.4395, 0.6182, -0.0789]) tensor([0.0463, 0.1795, 0.5168, 0.2574]) -Greedy action tensor([-1.8284, -0.4663, 0.6159, -0.1208]) tensor([0.0456, 0.1779, 0.5251, 0.2514]) -Greedy action tensor([-1.9135, -0.4522, 0.6514, -0.1535]) tensor([0.0415, 0.1787, 0.5389, 0.2409]) -Greedy action tensor([-1.8867, -0.5786, 1.3911, 0.5240]) tensor([0.0236, 0.0873, 0.6260, 0.2630]) -Greedy action tensor([-1.9307, -0.4102, 0.6538, -0.1698]) tensor([0.0406, 0.1856, 0.5378, 0.2360]) -Greedy action tensor([-1.9046, -0.4265, 0.6486, -0.1569]) tensor([0.0417, 0.1829, 0.5359, 0.2395]) -Greedy action tensor([-1.7257, 0.0449, 0.4828, -0.0557]) tensor([0.0470, 0.2759, 0.4276, 0.2495]) -Greedy action tensor([-1.8387, -0.4582, 0.6190, -0.1187]) tensor([0.0450, 0.1788, 0.5251, 0.2511]) -Greedy action tensor([-1.7756, -0.4120, 0.6542, -0.1060]) tensor([0.0463, 0.1812, 0.5263, 0.2461]) -Greedy action tensor([-1.9296, -0.4240, 0.6562, -0.1716]) tensor([0.0407, 0.1833, 0.5400, 0.2360]) -Greedy action tensor([-1.9325, -0.4434, 0.6620, -0.1735]) tensor([0.0406, 0.1800, 0.5437, 0.2358]) -Greedy action tensor([-1.8328, -0.2200, 0.5981, -0.0375]) tensor([0.0427, 0.2143, 0.4857, 0.2572]) -Greedy action tensor([-1.9127, -0.4346, 0.6480, -0.1661]) tensor([0.0416, 0.1822, 0.5379, 0.2383]) -Greedy action tensor([-1.4240, 0.7384, 0.2588, 0.2057]) tensor([0.0496, 0.4308, 0.2667, 0.2529]) -Greedy action tensor([-1.2167, 0.6568, 0.1663, 0.2099]) tensor([0.0638, 0.4157, 0.2545, 0.2659]) -Greedy action tensor([-1.8635, -0.4638, 0.6330, -0.1424]) tensor([0.0439, 0.1779, 0.5328, 0.2454]) -Greedy action tensor([-1.6953, -0.4401, 0.5630, -0.0626]) tensor([0.0521, 0.1828, 0.4984, 0.2666]) -Greedy action tensor([-1.6730, -0.2890, 0.5154, -0.1062]) tensor([0.0535, 0.2134, 0.4770, 0.2562]) -Greedy action tensor([-1.9216, -0.2899, 0.6049, -0.1784]) tensor([0.0411, 0.2101, 0.5140, 0.2348]) -Greedy action tensor([-1.8941, -0.3392, 0.6357, -0.1521]) tensor([0.0417, 0.1973, 0.5231, 0.2379]) -Greedy action tensor([-1.5191, 0.6304, 0.3107, 0.2885]) tensor([0.0456, 0.3916, 0.2845, 0.2782]) -Greedy action tensor([-1.8813, -0.4208, 0.6339, -0.1521]) tensor([0.0429, 0.1848, 0.5306, 0.2418]) -Greedy action tensor([-1.7405, -0.3936, 0.5555, -0.0107]) tensor([0.0490, 0.1883, 0.4865, 0.2762]) -Greedy action tensor([-1.9420, -0.4459, 0.6664, -0.1793]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-1.9451, -0.4523, 0.6703, -0.1808]) tensor([0.0401, 0.1783, 0.5478, 0.2339]) -Greedy action tensor([-1.9400, -0.4438, 0.6651, -0.1774]) tensor([0.0403, 0.1798, 0.5451, 0.2348]) -Greedy action tensor([-1.8673, -0.4260, 0.6229, -0.1321]) tensor([0.0436, 0.1841, 0.5254, 0.2470]) -Greedy action tensor([-1.3921, -0.1921, 0.3865, -0.0149]) tensor([0.0704, 0.2337, 0.4169, 0.2790]) -Greedy action tensor([-1.7702, -0.4229, 0.5883, -0.0890]) tensor([0.0481, 0.1850, 0.5086, 0.2583]) -Greedy action tensor([-1.9185, -0.4507, 0.6569, -0.1686]) tensor([0.0413, 0.1791, 0.5422, 0.2375]) -Greedy action tensor([-1.7409e+00, -3.9928e-02, 5.1237e-01, 1.1762e-03]) tensor([0.0461, 0.2524, 0.4385, 0.2630]) -Greedy action tensor([-1.3260, -0.5892, 0.3161, 0.2254]) tensor([0.0771, 0.1610, 0.3982, 0.3637]) -Greedy action tensor([-1.7835, -0.2879, 0.6525, -0.0600]) tensor([0.0445, 0.1984, 0.5080, 0.2491]) -Greedy action tensor([-1.9058, -0.3858, 0.6488, -0.1590]) tensor([0.0414, 0.1891, 0.5322, 0.2373]) -Greedy action tensor([-0.0870, 1.1373, 0.0039, 0.3320]) tensor([0.1425, 0.4848, 0.1561, 0.2167]) -Greedy action tensor([-1.7460, 0.1145, 0.4890, -0.0405]) tensor([0.0449, 0.2885, 0.4196, 0.2471]) -Greedy action tensor([-1.7616, -0.4091, 0.6546, -0.0831]) tensor([0.0467, 0.1805, 0.5228, 0.2500]) -Greedy action tensor([-1.7474, -0.2182, 0.5100, -0.2949]) tensor([0.0514, 0.2373, 0.4915, 0.2198]) -Greedy action tensor([-1.9282, -0.4215, 0.6551, -0.1706]) tensor([0.0407, 0.1838, 0.5393, 0.2362]) -Greedy action tensor([-1.5413, -0.4668, 0.5180, 0.0843]) tensor([0.0593, 0.1738, 0.4653, 0.3016]) -Greedy action tensor([-1.8084, -0.0935, 0.5472, -0.0851]) tensor([0.0440, 0.2447, 0.4644, 0.2468]) -Greedy action tensor([-0.6852, 0.2403, 0.5085, 0.5500]) tensor([0.0974, 0.2459, 0.3215, 0.3352]) -Greedy action tensor([-1.9218, -0.4312, 0.6540, -0.1675]) tensor([0.0410, 0.1822, 0.5395, 0.2373]) -Greedy action tensor([-1.3963, 0.5055, 0.2158, 0.1106]) tensor([0.0581, 0.3889, 0.2911, 0.2620]) -Greedy action tensor([-1.3959, -0.1834, 0.4166, -0.1124]) tensor([0.0709, 0.2385, 0.4345, 0.2560]) -Greedy action tensor([-1.9285, -0.4197, 0.6568, -0.1721]) tensor([0.0407, 0.1839, 0.5398, 0.2356]) -Greedy action tensor([-1.8486, -0.4659, 0.6271, -0.1339]) tensor([0.0446, 0.1777, 0.5301, 0.2477]) -Greedy action tensor([-0.9761, 0.3312, -0.0965, -0.5902]) tensor([0.1166, 0.4309, 0.2810, 0.1715]) -Greedy action tensor([-1.8503, -0.4495, 0.6236, -0.1359]) tensor([0.0445, 0.1805, 0.5279, 0.2470]) -Greedy action tensor([-1.7663, -0.4339, 0.6276, -0.0171]) tensor([0.0465, 0.1763, 0.5097, 0.2675]) -Greedy action tensor([-1.7808, -0.1607, 0.5804, -0.0683]) tensor([0.0450, 0.2276, 0.4776, 0.2497]) -Greedy action tensor([-1.9153, -0.4336, 0.6526, -0.1643]) tensor([0.0413, 0.1818, 0.5388, 0.2380]) -Greedy action tensor([-1.2864, -0.5464, 0.3495, 0.3001]) tensor([0.0762, 0.1598, 0.3914, 0.3726]) -Greedy action tensor([-1.8590, -0.4054, 0.6356, -0.1206]) tensor([0.0433, 0.1853, 0.5249, 0.2464]) -Greedy action tensor([-1.8616, -0.4473, 0.6259, -0.1453]) tensor([0.0440, 0.1812, 0.5298, 0.2450]) -Greedy action tensor([-1.3648, -0.0327, 0.3331, 0.0185]) tensor([0.0702, 0.2661, 0.3836, 0.2801]) -Greedy action tensor([ 2.0014, -0.6038, -0.3626, 0.5636]) tensor([0.7115, 0.0526, 0.0669, 0.1690]) -Greedy action tensor([ 1.7037, -0.8270, 0.0270, 0.5433]) tensor([0.6329, 0.0504, 0.1184, 0.1983]) -Greedy action tensor([ 1.4268, -0.5752, -0.3924, 0.4582]) tensor([0.5964, 0.0805, 0.0967, 0.2264]) -Greedy action tensor([ 1.0087, -0.2551, -0.1200, 0.2294]) tensor([0.4843, 0.1369, 0.1567, 0.2222]) -Greedy action tensor([ 1.2470, -0.4548, 0.0183, 0.2086]) tensor([0.5467, 0.0997, 0.1600, 0.1936]) -Greedy action tensor([ 1.2361, -0.6263, -0.2259, -0.2443]) tensor([0.6193, 0.0962, 0.1435, 0.1409]) -Greedy action tensor([ 1.1520, -0.2309, -0.3449, 0.5372]) tensor([0.4962, 0.1245, 0.1111, 0.2683]) -Greedy action tensor([ 2.4817, -0.8958, -0.8143, 0.6800]) tensor([0.8090, 0.0276, 0.0300, 0.1335]) -Greedy action tensor([ 1.2952, -0.1009, -0.2715, 0.6699]) tensor([0.5022, 0.1243, 0.1048, 0.2687]) -Greedy action tensor([ 1.7586, -0.3061, -0.5040, 0.5189]) tensor([0.6577, 0.0834, 0.0685, 0.1904]) -Greedy action tensor([ 1.8125, -0.5581, -0.4304, 0.3622]) tensor([0.6973, 0.0651, 0.0740, 0.1635]) -Greedy action tensor([ 1.5011, -0.3312, -1.1587, 0.7596]) tensor([0.5860, 0.0938, 0.0410, 0.2792]) -Greedy action tensor([ 1.2247, -0.4632, -0.7107, 0.0830]) tensor([0.6066, 0.1122, 0.0876, 0.1937]) -Greedy action tensor([ 1.5842, -0.4285, -0.6308, 0.3333]) tensor([0.6540, 0.0874, 0.0714, 0.1872]) -Greedy action tensor([ 1.7181, -0.6796, -0.0374, 0.3640]) tensor([0.6571, 0.0597, 0.1136, 0.1696]) -Greedy action tensor([ 1.7645, -0.8047, -0.5266, 0.4018]) tensor([0.6975, 0.0534, 0.0706, 0.1785]) -Greedy action tensor([ 1.6076, -0.4449, -0.3332, 0.4282]) tensor([0.6331, 0.0813, 0.0909, 0.1947]) -Greedy action tensor([ 1.3415, -0.1201, -0.3248, 0.0475]) tensor([0.5900, 0.1368, 0.1115, 0.1618]) -Greedy action tensor([ 1.8548, 0.2324, -0.5996, 0.0993]) tensor([0.6867, 0.1356, 0.0590, 0.1187]) -Greedy action tensor([ 2.0814, 0.5094, -0.3994, 0.1198]) tensor([0.6984, 0.1450, 0.0584, 0.0982]) -Greedy action tensor([ 1.7978, -0.3488, -0.6513, 0.3714]) tensor([0.6928, 0.0810, 0.0598, 0.1664]) -Greedy action tensor([ 1.5621, -0.4095, 0.0765, 0.3584]) tensor([0.6003, 0.0836, 0.1359, 0.1802]) -Greedy action tensor([ 1.4992, -0.0680, -1.1658, 0.3341]) tensor([0.6289, 0.1312, 0.0438, 0.1962]) -Greedy action tensor([ 1.4491, -0.4261, -0.3417, 0.8301]) tensor([0.5380, 0.0825, 0.0898, 0.2897]) -Greedy action tensor([ 0.9292, -0.3281, 0.0867, 0.1588]) tensor([0.4592, 0.1306, 0.1977, 0.2125]) -Greedy action tensor([ 1.5243, -0.9948, -0.2037, -0.2113]) tensor([0.6971, 0.0561, 0.1238, 0.1229]) -Greedy action tensor([ 1.4816, -0.5455, -0.5821, 0.1989]) tensor([0.6510, 0.0858, 0.0827, 0.1805]) -Greedy action tensor([ 1.7517, -0.7328, -0.2558, 0.3638]) tensor([0.6815, 0.0568, 0.0915, 0.1701]) -Greedy action tensor([ 1.0194, -0.4985, 0.1461, -0.1328]) tensor([0.5121, 0.1122, 0.2138, 0.1618]) -Greedy action tensor([ 1.8233, -0.0329, -0.0589, 0.3972]) tensor([0.6457, 0.1009, 0.0983, 0.1551]) -Greedy action tensor([ 1.8813, -0.8669, -0.3305, 0.8068]) tensor([0.6601, 0.0423, 0.0723, 0.2254]) -Greedy action tensor([ 1.5024, -0.8921, -0.4225, -0.0404]) tensor([0.6892, 0.0629, 0.1005, 0.1473]) -Greedy action tensor([ 2.0008, -0.1616, -0.3079, 0.3018]) tensor([0.7157, 0.0823, 0.0711, 0.1309]) -Greedy action tensor([ 0.9197, 0.1078, -0.6373, -0.0419]) tensor([0.4909, 0.2180, 0.1035, 0.1877]) -Greedy action tensor([ 1.0285, -0.2034, -0.3856, 0.2955]) tensor([0.4962, 0.1448, 0.1206, 0.2384]) -Greedy action tensor([ 2.9841, -1.2666, -0.3966, 0.8328]) tensor([0.8587, 0.0122, 0.0292, 0.0999]) -Greedy action tensor([ 1.2424, -0.2946, -0.4243, 0.2187]) tensor([0.5672, 0.1219, 0.1071, 0.2038]) -Greedy action tensor([ 1.2049, -0.0076, 0.0911, -0.2012]) tensor([0.5345, 0.1590, 0.1755, 0.1310]) -Greedy action tensor([ 1.2956, -0.3202, -0.7346, 0.2785]) tensor([0.5911, 0.1175, 0.0776, 0.2138]) -Greedy action tensor([ 1.4148, -0.1960, -0.7073, 0.1154]) tensor([0.6281, 0.1254, 0.0752, 0.1713]) -Greedy action tensor([ 1.7710, -0.6692, -0.3755, 0.0402]) tensor([0.7240, 0.0631, 0.0846, 0.1282]) -Greedy action tensor([ 1.0775, -0.2304, -0.2937, 0.2403]) tensor([0.5110, 0.1382, 0.1297, 0.2212]) -Greedy action tensor([ 1.7605, 0.0463, -0.4446, -0.2552]) tensor([0.7025, 0.1265, 0.0774, 0.0936]) -Greedy action tensor([ 1.8467, -0.9216, -0.0036, 1.1577]) tensor([0.5807, 0.0364, 0.0913, 0.2916]) -Greedy action tensor([ 1.5852, -0.2448, -0.8264, 0.1703]) tensor([0.6698, 0.1074, 0.0601, 0.1627]) -Greedy action tensor([ 1.3846, -0.3532, -0.3200, 0.3566]) tensor([0.5829, 0.1025, 0.1060, 0.2085]) -Greedy action tensor([ 1.3080, -0.5213, -0.2477, 0.1784]) tensor([0.5901, 0.0947, 0.1245, 0.1907]) -Greedy action tensor([ 1.5085, -0.4540, -0.5814, 0.8518]) tensor([0.5609, 0.0788, 0.0694, 0.2909]) -Greedy action tensor([ 1.1006, -0.4892, -0.3993, 0.5794]) tensor([0.4948, 0.1009, 0.1104, 0.2938]) -Greedy action tensor([ 1.5347, -0.6868, -0.1727, 0.3802]) tensor([0.6230, 0.0676, 0.1130, 0.1964]) -Greedy action tensor([ 1.5130, -0.7457, -0.6983, -0.1175]) tensor([0.7093, 0.0741, 0.0777, 0.1389]) -Greedy action tensor([ 1.2074, -0.2313, -0.2024, -0.0230]) tensor([0.5638, 0.1338, 0.1377, 0.1647]) -Greedy action tensor([ 1.1720, -0.1202, -0.9231, 0.5174]) tensor([0.5216, 0.1432, 0.0642, 0.2710]) -Greedy action tensor([ 1.5388, -1.1472, -0.2610, 0.0318]) tensor([0.6873, 0.0468, 0.1136, 0.1523]) -Greedy action tensor([ 1.5599, -0.0853, -1.1072, 0.2338]) tensor([0.6545, 0.1263, 0.0455, 0.1738]) -Greedy action tensor([ 1.5901, -1.0071, -0.3002, 0.3375]) tensor([0.6617, 0.0493, 0.0999, 0.1891]) -Greedy action tensor([ 2.1810, -0.9909, -0.3174, 0.3795]) tensor([0.7757, 0.0325, 0.0638, 0.1280]) -Greedy action tensor([ 1.5095, -0.3839, -0.3407, 0.0842]) tensor([0.6459, 0.0972, 0.1015, 0.1553]) -Greedy action tensor([ 1.2923, -0.2758, -0.5801, 0.4453]) tensor([0.5584, 0.1164, 0.0859, 0.2394]) -Greedy action tensor([ 0.9172, -0.3654, -0.2036, -0.1136]) tensor([0.5102, 0.1415, 0.1663, 0.1820]) -Greedy action tensor([ 1.2934, -0.8802, -0.1170, 0.3792]) tensor([0.5686, 0.0647, 0.1388, 0.2279]) -Greedy action tensor([ 1.5104, -0.9400, -0.4369, 0.0859]) tensor([0.6805, 0.0587, 0.0971, 0.1637]) -Greedy action tensor([ 0.9981, -0.4982, -0.0766, 0.3477]) tensor([0.4791, 0.1073, 0.1636, 0.2500]) -Greedy action tensor([ 1.2088, -0.1729, -0.0561, -0.0021]) tensor([0.5460, 0.1371, 0.1541, 0.1627]) -Greedy action tensor([ 1.1977, -0.5351, -0.2969, 0.1974]) tensor([0.5653, 0.0999, 0.1268, 0.2079]) -Greedy action tensor([ 1.7957, -0.7475, -0.2742, 0.7055]) tensor([0.6490, 0.0510, 0.0819, 0.2181]) -Greedy action tensor([ 1.6916, -0.3966, -0.1905, 0.2471]) tensor([0.6614, 0.0820, 0.1007, 0.1560]) -Greedy action tensor([ 2.6663, 1.0329, 0.4082, -0.4080]) tensor([0.7429, 0.1451, 0.0777, 0.0343]) -Greedy action tensor([ 1.4795, -0.1126, -0.8763, 0.3117]) tensor([0.6214, 0.1264, 0.0589, 0.1933]) -Greedy action tensor([ 1.4754, -0.6390, -0.5875, -0.0755]) tensor([0.6850, 0.0827, 0.0871, 0.1453]) -Greedy action tensor([ 1.3556, -0.4120, -0.4783, 0.4800]) tensor([0.5724, 0.0977, 0.0915, 0.2385]) -Greedy action tensor([ 1.3568, -0.3884, -0.4790, 0.2766]) tensor([0.5975, 0.1043, 0.0953, 0.2029]) -Greedy action tensor([ 1.2874, -0.7241, -0.5685, 0.7555]) tensor([0.5326, 0.0713, 0.0832, 0.3129]) -Greedy action tensor([ 1.3930, -0.5758, -0.2984, 0.3972]) tensor([0.5906, 0.0825, 0.1088, 0.2182]) -Greedy action tensor([ 1.2955, -0.3712, -0.4608, 0.0412]) tensor([0.6072, 0.1147, 0.1049, 0.1732]) -Greedy action tensor([ 1.1002, -0.4767, -0.2334, 0.2833]) tensor([0.5230, 0.1081, 0.1378, 0.2311]) -Greedy action tensor([ 1.9859, -0.3808, -0.5251, 0.1806]) tensor([0.7466, 0.0700, 0.0606, 0.1228]) -Greedy action tensor([ 1.3440, -0.1509, -0.1970, 0.5066]) tensor([0.5344, 0.1198, 0.1145, 0.2313]) -Greedy action tensor([ 1.6163, -0.3525, -0.9424, 0.2677]) tensor([0.6772, 0.0946, 0.0524, 0.1758]) -Greedy action tensor([ 1.2966, -0.4210, -0.1455, 0.3430]) tensor([0.5552, 0.0996, 0.1313, 0.2139]) -Greedy action tensor([ 1.9991, -0.0488, -0.3044, 0.4755]) tensor([0.6912, 0.0892, 0.0691, 0.1506]) -Greedy action tensor([ 1.0867, -0.9127, 0.0465, -0.5425]) tensor([0.5935, 0.0804, 0.2097, 0.1164]) -Greedy action tensor([ 0.6301, -0.2663, 0.0050, -0.3106]) tensor([0.4285, 0.1748, 0.2293, 0.1673]) -Greedy action tensor([ 0.3313, -0.0391, -0.0710, -0.4237]) tensor([0.3535, 0.2440, 0.2364, 0.1661]) -Greedy action tensor([ 0.8995, -0.5677, -0.0437, -0.1695]) tensor([0.5093, 0.1174, 0.1983, 0.1749]) -Greedy action tensor([ 0.7675, -0.8107, -0.0735, -0.4068]) tensor([0.5137, 0.1060, 0.2215, 0.1588]) -Greedy action tensor([ 0.6077, -0.3194, -0.0797, -0.2290]) tensor([0.4289, 0.1697, 0.2157, 0.1858]) -Greedy action tensor([ 0.5756, -0.4515, -0.1248, -0.1478]) tensor([0.4274, 0.1530, 0.2122, 0.2074]) -Greedy action tensor([ 0.8690, -0.8386, 0.1940, -0.3472]) tensor([0.5033, 0.0913, 0.2563, 0.1492]) -Greedy action tensor([ 0.8597, -0.5446, -0.0239, -0.2403]) tensor([0.5021, 0.1233, 0.2075, 0.1671]) -Greedy action tensor([ 0.6722, -0.4269, -0.0566, -0.2349]) tensor([0.4506, 0.1501, 0.2174, 0.1819]) -Greedy action tensor([ 0.9437, -0.5774, -0.1348, -0.3763]) tensor([0.5477, 0.1197, 0.1863, 0.1463]) -Greedy action tensor([ 0.9766, -0.8134, 0.1897, -0.3222]) tensor([0.5277, 0.0881, 0.2402, 0.1440]) -Greedy action tensor([ 0.3219, 0.1867, -0.1647, -0.1668]) tensor([0.3224, 0.2816, 0.1982, 0.1978]) -Greedy action tensor([ 0.6707, -0.2236, -0.0094, -0.0979]) tensor([0.4203, 0.1719, 0.2129, 0.1949]) -Greedy action tensor([ 0.8960, -0.5308, -0.0639, -0.8038]) tensor([0.5538, 0.1329, 0.2121, 0.1012]) -Greedy action tensor([ 0.7576, -0.3664, -0.0064, -0.1718]) tensor([0.4576, 0.1487, 0.2131, 0.1806]) -Greedy action tensor([ 0.6861, -0.4401, -0.0978, -0.1380]) tensor([0.4506, 0.1461, 0.2057, 0.1976]) -Greedy action tensor([ 0.9717, -0.5635, 0.0845, -0.6730]) tensor([0.5494, 0.1183, 0.2262, 0.1061]) -Greedy action tensor([ 0.6583, -0.1601, -0.0466, -0.1769]) tensor([0.4221, 0.1862, 0.2086, 0.1831]) -Greedy action tensor([ 0.5003, 0.1211, -0.0820, 0.0661]) tensor([0.3459, 0.2367, 0.1932, 0.2241]) -Greedy action tensor([ 1.0830, -0.8043, -0.0141, -0.5531]) tensor([0.5952, 0.0902, 0.1987, 0.1159]) -Greedy action tensor([ 0.5400, -0.3086, -0.0248, -0.0161]) tensor([0.3891, 0.1665, 0.2212, 0.2231]) -Greedy action tensor([ 0.4504, -0.1804, -0.0372, -0.0942]) tensor([0.3668, 0.1952, 0.2252, 0.2128]) -Greedy action tensor([ 0.6167, -0.3120, -0.0877, -0.1598]) tensor([0.4256, 0.1682, 0.2104, 0.1958]) -Greedy action tensor([ 0.4806, -0.3129, -0.0825, -0.1005]) tensor([0.3875, 0.1752, 0.2206, 0.2167]) -Greedy action tensor([ 0.8518, -0.5383, -0.0754, -0.2769]) tensor([0.5081, 0.1265, 0.2010, 0.1643]) -Greedy action tensor([ 0.8687, -0.6449, -0.0916, -0.2370]) tensor([0.5171, 0.1138, 0.1979, 0.1711]) -Greedy action tensor([ 1.1156, 0.2424, -0.0070, -0.3187]) tensor([0.5047, 0.2108, 0.1642, 0.1203]) -Greedy action tensor([ 0.5535, -0.3635, -0.2595, -0.1704]) tensor([0.4295, 0.1717, 0.1905, 0.2083]) -Greedy action tensor([ 1.2517, 0.0593, -0.1064, -0.4398]) tensor([0.5731, 0.1739, 0.1474, 0.1056]) -Greedy action tensor([ 0.8725, -0.6310, -0.0257, -0.3249]) tensor([0.5177, 0.1151, 0.2109, 0.1563]) -Greedy action tensor([ 0.3423, -0.2094, -0.1487, -0.1133]) tensor([0.3544, 0.2041, 0.2169, 0.2247]) -Greedy action tensor([ 0.8885, -0.5439, -0.0443, -0.4558]) tensor([0.5283, 0.1261, 0.2079, 0.1377]) -Greedy action tensor([ 0.6166, -0.3618, -0.2870, -0.4746]) tensor([0.4724, 0.1776, 0.1914, 0.1586]) -Greedy action tensor([ 0.6206, -0.4672, -0.0160, -0.2015]) tensor([0.4337, 0.1461, 0.2295, 0.1906]) -Greedy action tensor([ 0.9513, -1.3093, -0.1296, -0.5688]) tensor([0.6016, 0.0627, 0.2041, 0.1316]) -Greedy action tensor([ 0.8352, -0.5947, -0.0507, -0.2811]) tensor([0.5053, 0.1209, 0.2083, 0.1655]) -Greedy action tensor([ 0.7085, -0.3762, -0.0400, -0.3257]) tensor([0.4616, 0.1560, 0.2183, 0.1641]) -Greedy action tensor([ 0.8600, -0.7301, 0.1184, -0.4658]) tensor([0.5139, 0.1048, 0.2448, 0.1365]) -Greedy action tensor([ 1.0008, -0.3400, -0.0208, -0.0427]) tensor([0.5066, 0.1326, 0.1824, 0.1784]) -Greedy action tensor([ 0.5447, -0.6011, -0.0821, -0.2245]) tensor([0.4318, 0.1373, 0.2307, 0.2001]) -Greedy action tensor([ 0.9075, -0.9807, 0.1920, -0.4946]) tensor([0.5301, 0.0802, 0.2592, 0.1304]) -Greedy action tensor([ 0.8370, -0.4320, 0.0073, -0.1694]) tensor([0.4801, 0.1350, 0.2094, 0.1755]) -Greedy action tensor([ 0.4170, -0.1343, -0.0261, -0.0723]) tensor([0.3532, 0.2035, 0.2268, 0.2165]) -Greedy action tensor([ 0.7014, -0.3565, 0.0313, -0.3746]) tensor([0.4546, 0.1578, 0.2326, 0.1550]) -Greedy action tensor([ 0.5663, -0.4145, -0.1177, -0.0443]) tensor([0.4128, 0.1548, 0.2083, 0.2241]) -Greedy action tensor([ 1.0628, -0.5776, -0.0850, -0.3397]) tensor([0.5691, 0.1104, 0.1806, 0.1400]) -Greedy action tensor([ 0.7179, -0.3317, -0.1011, -0.2058]) tensor([0.4570, 0.1600, 0.2015, 0.1815]) -Greedy action tensor([ 0.8256, -0.3261, -0.0379, -0.2602]) tensor([0.4818, 0.1523, 0.2032, 0.1627]) -Greedy action tensor([ 0.3099, 0.1912, -0.0901, -0.0052]) tensor([0.3041, 0.2701, 0.2039, 0.2219]) -Greedy action tensor([ 0.7715, 0.0761, -0.1564, -0.4290]) tensor([0.4555, 0.2272, 0.1801, 0.1371]) -Greedy action tensor([ 1.1116, -0.5569, -0.1046, -0.5126]) tensor([0.5945, 0.1121, 0.1762, 0.1172]) -Greedy action tensor([ 0.5438, -0.2551, 0.0207, -0.3068]) tensor([0.4049, 0.1821, 0.2400, 0.1730]) -Greedy action tensor([ 0.5316, -0.3210, 0.0526, -0.1534]) tensor([0.3922, 0.1672, 0.2429, 0.1977]) -Greedy action tensor([ 0.6396, -0.1946, -0.1034, -0.0142]) tensor([0.4115, 0.1787, 0.1958, 0.2140]) -Greedy action tensor([ 0.3479, 0.0696, -0.1898, 0.1052]) tensor([0.3199, 0.2422, 0.1869, 0.2510]) -Greedy action tensor([ 0.4060, -0.1139, -0.0600, -0.0984]) tensor([0.3539, 0.2104, 0.2221, 0.2137]) -Greedy action tensor([ 0.8977, -0.2771, 0.1537, -0.2408]) tensor([0.4752, 0.1468, 0.2258, 0.1522]) -Greedy action tensor([ 0.8981, -0.6370, -0.1431, -0.3018]) tensor([0.5349, 0.1152, 0.1888, 0.1611]) -Greedy action tensor([ 0.2732, -0.0376, 0.1477, 0.0973]) tensor([0.2895, 0.2122, 0.2554, 0.2429]) -Greedy action tensor([ 1.2933, -0.7539, -0.2184, -0.5533]) tensor([0.6634, 0.0856, 0.1463, 0.1047]) -Greedy action tensor([ 0.6620, -0.5391, -0.0100, -0.2100]) tensor([0.4485, 0.1349, 0.2290, 0.1875]) -Greedy action tensor([ 0.3083, -0.2009, -0.0554, -0.2702]) tensor([0.3500, 0.2104, 0.2433, 0.1963]) -Greedy action tensor([ 0.3332, 0.0899, -0.2524, -0.1586]) tensor([0.3387, 0.2656, 0.1886, 0.2071]) -Greedy action tensor([ 0.5546, -0.1952, -0.1030, -0.0434]) tensor([0.3936, 0.1860, 0.2039, 0.2165]) -Greedy action tensor([ 0.3795, 0.1370, -0.0859, 0.0149]) tensor([0.3219, 0.2525, 0.2021, 0.2235]) -Greedy action tensor([ 0.7806, -0.5792, -0.0905, -0.2657]) tensor([0.4935, 0.1267, 0.2065, 0.1733]) -Greedy action tensor([ 0.6710, -0.7134, -0.0914, -0.2204]) tensor([0.4701, 0.1178, 0.2193, 0.1928]) -Greedy action tensor([ 0.9177, -0.4014, -0.1173, -0.3444]) tensor([0.5247, 0.1403, 0.1864, 0.1485]) -Greedy action tensor([ 1.2081, -0.5446, -0.1527, -0.4010]) tensor([0.6136, 0.1063, 0.1574, 0.1228]) -Greedy action tensor([ 0.6616, -0.5582, -0.0284, -0.3159]) tensor([0.4602, 0.1359, 0.2308, 0.1731]) -Greedy action tensor([ 0.6480, -0.3510, -0.0236, -0.3403]) tensor([0.4442, 0.1636, 0.2269, 0.1653]) -Greedy action tensor([ 0.7429, 0.0310, -0.0044, -0.2537]) tensor([0.4285, 0.2103, 0.2030, 0.1582]) -Greedy action tensor([ 0.8241, -0.6450, 0.1366, -0.3503]) tensor([0.4897, 0.1127, 0.2463, 0.1513]) -Greedy action tensor([ 0.9751, -0.5514, -0.0814, -0.3503]) tensor([0.5463, 0.1187, 0.1899, 0.1451]) -Greedy action tensor([ 0.6647, -0.1400, -0.0794, -0.0533]) tensor([0.4149, 0.1856, 0.1971, 0.2024]) -Greedy action tensor([ 0.8403, -0.5182, 0.0309, -0.2327]) tensor([0.4892, 0.1258, 0.2178, 0.1673]) -Greedy action tensor([ 0.7165, -0.2550, -0.0375, -0.4216]) tensor([0.4609, 0.1745, 0.2169, 0.1477]) -Greedy action tensor([ 0.6942, -0.4945, -0.1778, -0.1645]) tensor([0.4659, 0.1419, 0.1948, 0.1974]) -Greedy action tensor([ 0.5040, -0.3085, -0.0077, -0.1943]) tensor([0.3936, 0.1747, 0.2359, 0.1958]) -Greedy action tensor([ 1.0406, -0.5167, -0.0495, -0.3948]) tensor([0.5602, 0.1181, 0.1884, 0.1333]) -Greedy action tensor([ 1.2076, -0.6855, 0.6425, -0.2100]) tensor([0.5099, 0.0768, 0.2898, 0.1235]) -Greedy action tensor([ 0.0154, 0.6198, -0.3003, -0.1908]) tensor([0.2287, 0.4185, 0.1668, 0.1861]) -Greedy action tensor([ 1.0395, -1.6963, 0.7203, 0.8912]) tensor([0.3768, 0.0244, 0.2739, 0.3249]) -Greedy action tensor([ 0.5485, -1.5311, -0.4482, -0.2320]) tensor([0.5122, 0.0640, 0.1891, 0.2347]) -Greedy action tensor([ 0.0756, -0.9193, -0.1422, -0.9208]) tensor([0.3932, 0.1454, 0.3162, 0.1452]) -Greedy action tensor([ 1.1840, -0.6175, 0.3271, 0.4731]) tensor([0.4806, 0.0793, 0.2040, 0.2361]) -Greedy action tensor([ 0.1949, -1.7103, -0.3540, 0.3154]) tensor([0.3503, 0.0521, 0.2024, 0.3952]) -Greedy action tensor([-2.0725, -0.9096, 1.4196, -1.2189]) tensor([0.0254, 0.0812, 0.8338, 0.0596]) -Greedy action tensor([-0.1293, -0.5009, 0.9112, -0.7747]) tensor([0.1982, 0.1367, 0.5611, 0.1040]) -Greedy action tensor([-0.0348, -0.4666, 0.6658, 0.0521]) tensor([0.2103, 0.1366, 0.4237, 0.2294]) -Greedy action tensor([ 2.0284, -1.0424, 0.2350, 0.8731]) tensor([0.6546, 0.0304, 0.1089, 0.2062]) -Greedy action tensor([ 0.0954, -1.1416, -0.0628, 0.9304]) tensor([0.2248, 0.0652, 0.1919, 0.5181]) -Greedy action tensor([ 0.6503, -1.3101, -0.2145, -0.3611]) tensor([0.5193, 0.0731, 0.2187, 0.1889]) -Greedy action tensor([ 0.0811, 0.5719, 0.6383, -0.3046]) tensor([0.1976, 0.3229, 0.3451, 0.1344]) -Greedy action tensor([ 1.2298, -0.5573, -0.2902, 0.6079]) tensor([0.5200, 0.0871, 0.1137, 0.2792]) -Greedy action tensor([-0.7756, 0.0768, -0.8442, -0.3428]) tensor([0.1718, 0.4029, 0.1604, 0.2648]) -Greedy action tensor([-0.4091, -0.5069, -0.0139, -0.2901]) tensor([0.2213, 0.2007, 0.3286, 0.2493]) -Greedy action tensor([0.7612, 0.5167, 0.2061, 0.0038]) tensor([0.3539, 0.2771, 0.2031, 0.1659]) -Greedy action tensor([-0.2393, -0.7962, -0.0068, 0.0111]) tensor([0.2428, 0.1391, 0.3063, 0.3118]) -Greedy action tensor([-0.0863, -0.5603, 0.1987, -0.6167]) tensor([0.2824, 0.1758, 0.3756, 0.1662]) -Greedy action tensor([-0.0459, -1.5302, 0.4466, -0.2672]) tensor([0.2729, 0.0618, 0.4466, 0.2187]) -Greedy action tensor([-0.2849, 0.2808, 0.6688, -0.4225]) tensor([0.1606, 0.2827, 0.4167, 0.1399]) -Greedy action tensor([-0.0317, 0.1276, -0.6238, -0.4443]) tensor([0.2952, 0.3462, 0.1633, 0.1954]) -Greedy action tensor([-0.4828, -0.2893, -0.2076, -0.7515]) tensor([0.2329, 0.2825, 0.3066, 0.1780]) -Greedy action tensor([-1.3860, -0.3000, -0.4035, 0.1709]) tensor([0.0879, 0.2604, 0.2348, 0.4170]) -Greedy action tensor([-1.4642, -0.8158, 0.4348, -0.9449]) tensor([0.0887, 0.1697, 0.5925, 0.1491]) -Greedy action tensor([-0.1931, -0.2362, -0.3170, -0.6027]) tensor([0.2853, 0.2733, 0.2520, 0.1894]) -Greedy action tensor([-0.0407, -2.6416, -0.3872, -0.2510]) tensor([0.3858, 0.0286, 0.2728, 0.3127]) -Greedy action tensor([ 1.5394, -0.9786, 1.2046, 2.0882]) tensor([0.2835, 0.0229, 0.2028, 0.4908]) -Greedy action tensor([-1.4372, -0.5278, 0.4430, -0.6621]) tensor([0.0819, 0.2034, 0.5369, 0.1778]) -Greedy action tensor([-0.2305, -0.8565, 0.4863, -0.1728]) tensor([0.2154, 0.1152, 0.4412, 0.2282]) -Greedy action tensor([ 0.9250, -1.2065, -0.6238, 1.2847]) tensor([0.3618, 0.0429, 0.0769, 0.5184]) -Greedy action tensor([ 0.1277, -0.2749, 0.6337, -0.1346]) tensor([0.2441, 0.1632, 0.4049, 0.1878]) -Greedy action tensor([ 1.4661, -0.3455, -0.6055, 0.2349]) tensor([0.6324, 0.1033, 0.0797, 0.1846]) -Greedy action tensor([ 0.4962, 0.1103, 0.5118, -0.2935]) tensor([0.3175, 0.2159, 0.3225, 0.1441]) -Greedy action tensor([ 0.1186, -0.5887, 0.4111, 1.1872]) tensor([0.1741, 0.0858, 0.2332, 0.5069]) -Greedy action tensor([ 0.1048, -0.9844, 0.1466, -0.4023]) tensor([0.3354, 0.1129, 0.3497, 0.2020]) -Greedy action tensor([ 0.2927, -1.2670, -0.1856, 0.2390]) tensor([0.3600, 0.0757, 0.2232, 0.3412]) -Greedy action tensor([ 0.8249, -0.2166, -0.4406, 0.5251]) tensor([0.4209, 0.1485, 0.1187, 0.3118]) -Greedy action tensor([ 1.8721, -0.6820, 0.1991, 0.4695]) tensor([0.6616, 0.0515, 0.1242, 0.1627]) -Greedy action tensor([ 0.1964, -0.4898, -0.3385, -0.7781]) tensor([0.4054, 0.2041, 0.2375, 0.1530]) -Greedy action tensor([ 0.8403, -0.5908, -0.2185, 0.6612]) tensor([0.4129, 0.0987, 0.1432, 0.3452]) -Greedy action tensor([ 0.5652, 0.3468, -0.5443, -0.0741]) tensor([0.3758, 0.3021, 0.1239, 0.1983]) -Greedy action tensor([ 0.3489, 0.0722, -0.6187, -0.1186]) tensor([0.3617, 0.2743, 0.1374, 0.2266]) -Greedy action tensor([ 1.4408, -1.0917, 1.5739, 0.4559]) tensor([0.3853, 0.0306, 0.4402, 0.1439]) -Greedy action tensor([0.8981, 0.2212, 0.8067, 1.0104]) tensor([0.2825, 0.1436, 0.2578, 0.3161]) -Greedy action tensor([-0.5533, -0.8905, -0.5667, 0.1153]) tensor([0.2150, 0.1534, 0.2121, 0.4195]) -Greedy action tensor([ 0.7887, -1.0013, -0.7479, 0.0502]) tensor([0.5377, 0.0898, 0.1157, 0.2569]) -Greedy action tensor([ 0.6628, -1.0988, 0.1615, 0.3831]) tensor([0.3947, 0.0678, 0.2391, 0.2984]) -Greedy action tensor([-0.6510, -1.0831, 0.2263, 0.4093]) tensor([0.1441, 0.0935, 0.3464, 0.4160]) -Greedy action tensor([ 0.3099, 0.1451, 0.8365, -0.3512]) tensor([0.2465, 0.2090, 0.4173, 0.1272]) -Greedy action tensor([-0.0890, -0.6645, -0.4518, -1.2343]) tensor([0.3882, 0.2183, 0.2700, 0.1235]) -Greedy action tensor([ 0.3064, -0.3476, 0.1353, 0.8829]) tensor([0.2414, 0.1255, 0.2034, 0.4296]) -Greedy action tensor([-1.5994, -0.4856, -1.4116, 0.3100]) tensor([0.0833, 0.2538, 0.1005, 0.5624]) -Greedy action tensor([-0.4279, -0.9063, 0.0626, -1.1334]) tensor([0.2669, 0.1654, 0.4359, 0.1318]) -Greedy action tensor([ 0.2881, -1.3898, 0.4609, 0.4592]) tensor([0.2807, 0.0524, 0.3337, 0.3331]) -Greedy action tensor([-2.5555, 0.3216, -0.3072, -0.6562]) tensor([0.0286, 0.5087, 0.2713, 0.1914]) -Greedy action tensor([-0.7384, -0.8254, 0.2621, -0.6143]) tensor([0.1734, 0.1589, 0.4715, 0.1963]) -Greedy action tensor([ 1.7806, -1.0156, -0.6469, 0.0183]) tensor([0.7570, 0.0462, 0.0668, 0.1300]) -Greedy action tensor([ 0.7279, -0.0823, 0.2101, 0.4953]) tensor([0.3530, 0.1570, 0.2103, 0.2797]) -Greedy action tensor([ 0.7567, -0.1848, 0.4650, 0.2070]) tensor([0.3684, 0.1437, 0.2752, 0.2126]) -Greedy action tensor([-0.0977, -2.3725, -0.0591, 0.0758]) tensor([0.3001, 0.0309, 0.3120, 0.3570]) -Greedy action tensor([-1.1789, -0.7111, 0.5561, -0.6182]) tensor([0.0998, 0.1594, 0.5659, 0.1749]) -Greedy action tensor([ 0.0734, -1.6618, 0.1304, -0.9706]) tensor([0.3865, 0.0682, 0.4092, 0.1361]) -Greedy action tensor([ 0.3326, -1.5698, 0.6681, 0.1898]) tensor([0.2928, 0.0437, 0.4096, 0.2539]) -Greedy action tensor([-0.5875, -0.0952, -1.1265, 0.2741]) tensor([0.1790, 0.2929, 0.1044, 0.4237]) -Greedy action tensor([ 1.1878, 0.2550, -0.3687, 0.4808]) tensor([0.4768, 0.1876, 0.1005, 0.2351]) -Greedy action tensor([ 0.5845, -1.2011, 0.0170, -0.5155]) tensor([0.4837, 0.0811, 0.2742, 0.1610]) -Greedy action tensor([ 0.0477, -0.2387, 0.0958, -0.7462]) tensor([0.3075, 0.2309, 0.3226, 0.1390]) -Greedy action tensor([-0.2729, 0.7013, -0.8836, -0.0085]) tensor([0.1820, 0.4821, 0.0988, 0.2371]) -Greedy action tensor([-0.4385, 0.9557, 0.5911, -0.8156]) tensor([0.1174, 0.4733, 0.3287, 0.0805]) -Greedy action tensor([ 0.3254, 0.0346, 0.2382, -0.2618]) tensor([0.3106, 0.2322, 0.2846, 0.1726]) -Greedy action tensor([ 1.1985, 0.3575, 0.8976, -0.7021]) tensor([0.4309, 0.1858, 0.3189, 0.0644]) -Greedy action tensor([-0.2270, 0.1313, -0.1632, -0.5022]) tensor([0.2350, 0.3362, 0.2504, 0.1784]) -Greedy action tensor([ 0.8617, -0.1868, 0.7343, -0.1545]) tensor([0.3857, 0.1352, 0.3396, 0.1396]) -Greedy action tensor([ 0.4681, 0.4543, -0.0870, 0.3417]) tensor([0.2906, 0.2866, 0.1668, 0.2561]) -Greedy action tensor([-0.1599, -1.1350, -0.0170, 0.1342]) tensor([0.2582, 0.0974, 0.2979, 0.3465]) -Greedy action tensor([ 0.4087, 0.0688, 1.1956, -0.2753]) tensor([0.2266, 0.1613, 0.4978, 0.1143]) -Greedy action tensor([ 0.4029, -0.3641, 0.0374, 0.5003]) tensor([0.3067, 0.1424, 0.2128, 0.3381]) -Greedy action tensor([ 0.3462, -0.1205, 0.5500, 0.4132]) tensor([0.2549, 0.1599, 0.3126, 0.2726]) -Greedy action tensor([-0.0927, 0.1789, 0.1397, -1.1143]) tensor([0.2542, 0.3335, 0.3207, 0.0915]) -Greedy action tensor([ 0.1306, -0.7236, 0.5240, -0.7095]) tensor([0.2994, 0.1275, 0.4438, 0.1293]) -Greedy action tensor([-1.8833, -0.4432, 0.6392, -0.1532]) tensor([0.0429, 0.1810, 0.5342, 0.2419]) -Greedy action tensor([-1.8905, -0.3702, 0.6300, -0.1459]) tensor([0.0421, 0.1927, 0.5240, 0.2412]) -Greedy action tensor([-1.9169, -0.4433, 0.6731, -0.1463]) tensor([0.0407, 0.1776, 0.5426, 0.2391]) -Greedy action tensor([-1.8115, -0.4576, 0.6042, -0.1091]) tensor([0.0464, 0.1796, 0.5194, 0.2545]) -Greedy action tensor([-1.0519, -0.3149, 0.1807, -0.0201]) tensor([0.1072, 0.2241, 0.3678, 0.3009]) -Greedy action tensor([-1.9392, -0.4471, 0.6673, -0.1758]) tensor([0.0403, 0.1791, 0.5458, 0.2349]) -Greedy action tensor([-1.4292, 0.2607, 0.3619, -0.0692]) tensor([0.0613, 0.3322, 0.3676, 0.2389]) -Greedy action tensor([-1.5272, -0.3282, 0.6339, 0.0588]) tensor([0.0559, 0.1855, 0.4854, 0.2731]) -Greedy action tensor([-1.8581, -0.3610, 0.6378, -0.1053]) tensor([0.0428, 0.1912, 0.5191, 0.2469]) -Greedy action tensor([-1.9052, -0.4380, 0.6740, -0.1532]) tensor([0.0412, 0.1786, 0.5429, 0.2374]) -Greedy action tensor([-1.8635, -0.4072, 0.6217, -0.1388]) tensor([0.0437, 0.1873, 0.5241, 0.2450]) -Greedy action tensor([-1.4944, 0.2227, 0.3625, -0.0196]) tensor([0.0577, 0.3211, 0.3693, 0.2520]) -Greedy action tensor([-1.8712, -0.2695, 0.6020, -0.1296]) tensor([0.0425, 0.2109, 0.5041, 0.2425]) -Greedy action tensor([-1.8643, -0.4615, 0.6346, -0.1407]) tensor([0.0438, 0.1780, 0.5328, 0.2454]) -Greedy action tensor([-1.9081, -0.4537, 0.6539, -0.1625]) tensor([0.0417, 0.1786, 0.5407, 0.2390]) -Greedy action tensor([-1.9202, -0.4460, 0.6555, -0.1676]) tensor([0.0412, 0.1799, 0.5413, 0.2376]) -Greedy action tensor([-1.8479, -0.3544, 0.6155, -0.1239]) tensor([0.0439, 0.1953, 0.5150, 0.2459]) -Greedy action tensor([-1.9464, -0.4509, 0.6682, -0.1814]) tensor([0.0401, 0.1787, 0.5472, 0.2340]) -Greedy action tensor([-1.8723, -0.3891, 0.6219, -0.1770]) tensor([0.0435, 0.1919, 0.5274, 0.2372]) -Greedy action tensor([-1.5389, -0.4423, 0.4673, -0.0487]) tensor([0.0630, 0.1887, 0.4686, 0.2797]) -Greedy action tensor([0.9131, 1.3159, 0.0106, 0.5177]) tensor([0.2797, 0.4185, 0.1134, 0.1884]) -Greedy action tensor([-1.3581, -0.4723, 0.3521, 0.0360]) tensor([0.0770, 0.1867, 0.4258, 0.3104]) -Greedy action tensor([-1.8731, -0.2939, 0.6264, -0.1042]) tensor([0.0419, 0.2030, 0.5096, 0.2455]) -Greedy action tensor([-1.8659, -0.4009, 0.6219, -0.1477]) tensor([0.0436, 0.1887, 0.5247, 0.2430]) -Greedy action tensor([-1.8414, -0.4149, 0.6140, -0.1202]) tensor([0.0446, 0.1858, 0.5200, 0.2495]) -Greedy action tensor([-1.8226, -0.4458, 0.6066, -0.1249]) tensor([0.0459, 0.1820, 0.5213, 0.2508]) -Greedy action tensor([-1.2819, -0.5173, 0.7606, 0.8721]) tensor([0.0513, 0.1103, 0.3958, 0.4425]) -Greedy action tensor([-1.4980, -0.4521, 0.5495, 0.1992]) tensor([0.0586, 0.1669, 0.4544, 0.3201]) -Greedy action tensor([-1.8958, -0.4194, 0.6379, -0.1445]) tensor([0.0421, 0.1844, 0.5308, 0.2427]) -Greedy action tensor([-1.8858, -0.4423, 0.6391, -0.1538]) tensor([0.0428, 0.1812, 0.5343, 0.2418]) -Greedy action tensor([-1.9371, -0.4444, 0.6652, -0.1767]) tensor([0.0404, 0.1797, 0.5451, 0.2349]) -Greedy action tensor([-1.8057, -0.4875, 0.6014, -0.1203]) tensor([0.0471, 0.1760, 0.5228, 0.2541]) -Greedy action tensor([-1.9037, -0.4550, 0.6526, -0.1598]) tensor([0.0419, 0.1784, 0.5400, 0.2397]) -Greedy action tensor([-1.8849, -0.4165, 0.6389, -0.1487]) tensor([0.0426, 0.1848, 0.5310, 0.2416]) -Greedy action tensor([-1.9026, -0.4371, 0.6493, -0.1547]) tensor([0.0418, 0.1811, 0.5368, 0.2402]) -Greedy action tensor([-1.7475, -0.4135, 0.6412, -0.0406]) tensor([0.0472, 0.1790, 0.5139, 0.2599]) -Greedy action tensor([-1.6330, -0.3059, 0.6072, 0.0699]) tensor([0.0509, 0.1918, 0.4780, 0.2793]) -Greedy action tensor([-1.9223, -0.4364, 0.6570, -0.1692]) tensor([0.0410, 0.1813, 0.5409, 0.2368]) -Greedy action tensor([-1.3705, 0.5000, 0.2772, 0.0972]) tensor([0.0587, 0.3813, 0.3051, 0.2549]) -Greedy action tensor([-0.3726, 1.1031, 0.0243, 0.3867]) tensor([0.1111, 0.4861, 0.1653, 0.2375]) -Greedy action tensor([-1.9330, -0.3710, 0.6464, -0.1710]) tensor([0.0404, 0.1924, 0.5322, 0.2350]) -Greedy action tensor([-1.9122, -0.4450, 0.6539, -0.1626]) tensor([0.0415, 0.1799, 0.5399, 0.2386]) -Greedy action tensor([-1.8993, -0.4359, 0.6468, -0.1570]) tensor([0.0420, 0.1816, 0.5363, 0.2400]) -Greedy action tensor([-1.5475, -0.2907, 0.4505, -0.0512]) tensor([0.0611, 0.2149, 0.4509, 0.2730]) -Greedy action tensor([-1.2699, -0.3357, 0.3025, 0.2260]) tensor([0.0780, 0.1984, 0.3756, 0.3480]) -Greedy action tensor([-1.8872, -0.4881, 0.7467, -0.1032]) tensor([0.0401, 0.1625, 0.5586, 0.2388]) -Greedy action tensor([-1.8055, -0.1803, 0.5453, -0.0969]) tensor([0.0453, 0.2299, 0.4750, 0.2499]) -Greedy action tensor([-1.9385, -0.4463, 0.6651, -0.1768]) tensor([0.0404, 0.1794, 0.5453, 0.2349]) -Greedy action tensor([-1.9399, -0.4445, 0.6670, -0.1753]) tensor([0.0402, 0.1795, 0.5454, 0.2349]) -Greedy action tensor([-1.9194, -0.4497, 0.6572, -0.1647]) tensor([0.0412, 0.1791, 0.5417, 0.2381]) -Greedy action tensor([-1.9155, -0.4128, 0.6510, -0.1638]) tensor([0.0412, 0.1851, 0.5363, 0.2374]) -Greedy action tensor([-1.8883, -0.4059, 0.6429, -0.1489]) tensor([0.0423, 0.1861, 0.5311, 0.2406]) -Greedy action tensor([-1.8775, -0.4045, 0.6272, -0.1504]) tensor([0.0431, 0.1878, 0.5270, 0.2422]) -Greedy action tensor([-1.7671, -0.3791, 0.6218, -0.0745]) tensor([0.0469, 0.1877, 0.5108, 0.2546]) -Greedy action tensor([-0.3465, 0.2436, -0.5655, -0.3704]) tensor([0.2182, 0.3936, 0.1753, 0.2130]) -Greedy action tensor([-1.9272, -0.4434, 0.6598, -0.1729]) tensor([0.0408, 0.1801, 0.5429, 0.2361]) -Greedy action tensor([-1.6273, 0.2731, 0.4104, 0.0699]) tensor([0.0480, 0.3213, 0.3685, 0.2622]) -Greedy action tensor([-1.8152, -0.2724, 0.6080, -0.0869]) tensor([0.0443, 0.2071, 0.4994, 0.2493]) -Greedy action tensor([-1.0856, 0.9313, 0.2693, 0.1364]) tensor([0.0634, 0.4761, 0.2456, 0.2150]) -Greedy action tensor([-1.8740, -0.3998, 0.6623, -0.1396]) tensor([0.0423, 0.1845, 0.5338, 0.2394]) -Greedy action tensor([-1.9178, -0.4148, 0.6555, -0.1665]) tensor([0.0410, 0.1845, 0.5380, 0.2365]) -Greedy action tensor([-1.9230, -0.4331, 0.6596, -0.1692]) tensor([0.0409, 0.1815, 0.5413, 0.2363]) -Greedy action tensor([-1.1735, -0.3996, 0.4467, 0.4154]) tensor([0.0762, 0.1652, 0.3852, 0.3733]) -Greedy action tensor([-1.8716, -0.4528, 0.6465, -0.1896]) tensor([0.0436, 0.1803, 0.5414, 0.2346]) -Greedy action tensor([-1.6545, 0.0908, 0.4959, 0.0170]) tensor([0.0485, 0.2776, 0.4162, 0.2578]) -Greedy action tensor([-1.8461, -0.4056, 0.6134, -0.1342]) tensor([0.0445, 0.1880, 0.5208, 0.2466]) -Greedy action tensor([0.2919, 1.1563, 0.2244, 0.9788]) tensor([0.1588, 0.3770, 0.1485, 0.3157]) -Greedy action tensor([-1.7415, -0.4871, 0.5825, -0.0775]) tensor([0.0500, 0.1753, 0.5108, 0.2640]) -Greedy action tensor([-1.8717, -0.4496, 0.6376, -0.1176]) tensor([0.0431, 0.1785, 0.5296, 0.2489]) -Greedy action tensor([-1.6739, -0.4596, 0.5275, -0.0341]) tensor([0.0539, 0.1815, 0.4870, 0.2777]) -Greedy action tensor([-1.8907, -0.3834, 0.6477, -0.1110]) tensor([0.0415, 0.1873, 0.5253, 0.2459]) -Greedy action tensor([-1.0239, 0.1605, 0.0790, 0.4385]) tensor([0.0862, 0.2818, 0.2598, 0.3722]) -Greedy action tensor([-1.3098, -0.5485, 0.3922, 0.4450]) tensor([0.0694, 0.1486, 0.3807, 0.4013]) -Greedy action tensor([-1.9012, -0.4381, 0.6452, -0.1590]) tensor([0.0420, 0.1816, 0.5364, 0.2400]) -Greedy action tensor([-0.7706, -0.0514, 0.2471, 0.6170]) tensor([0.1018, 0.2089, 0.2816, 0.4077]) -Greedy action tensor([-0.7251, 0.4398, 0.0738, 0.0082]) tensor([0.1175, 0.3767, 0.2612, 0.2446]) -Greedy action tensor([-1.7495, -0.5042, 0.5634, -0.0891]) tensor([0.0504, 0.1751, 0.5093, 0.2652]) -Greedy action tensor([-1.8314, -0.3181, 0.6285, -0.0903]) tensor([0.0436, 0.1979, 0.5100, 0.2485]) -Greedy action tensor([-1.8718, -0.2879, 0.6222, -0.1328]) tensor([0.0422, 0.2059, 0.5115, 0.2404]) -Greedy action tensor([-0.9789, -0.5185, 0.7164, 0.8438]) tensor([0.0703, 0.1114, 0.3831, 0.4351]) -Greedy action tensor([-1.1123, 0.1396, 0.3904, -0.6573]) tensor([0.0946, 0.3309, 0.4253, 0.1492]) -Greedy action tensor([-1.6724, 0.4214, 0.3898, -0.0632]) tensor([0.0455, 0.3693, 0.3578, 0.2274]) -Greedy action tensor([ 1.3096, -0.4258, -0.5670, 0.1582]) tensor([0.6077, 0.1072, 0.0930, 0.1921]) -Greedy action tensor([ 1.4033, -0.4051, -0.2110, 0.0893]) tensor([0.6129, 0.1005, 0.1220, 0.1647]) -Greedy action tensor([ 1.1216, -0.3357, -0.4476, 0.1846]) tensor([0.5456, 0.1270, 0.1136, 0.2138]) -Greedy action tensor([ 1.4243, -0.6161, -0.3661, 0.6120]) tensor([0.5745, 0.0747, 0.0959, 0.2550]) -Greedy action tensor([ 1.3974, -0.5928, -0.5695, 0.1374]) tensor([0.6410, 0.0876, 0.0897, 0.1818]) -Greedy action tensor([ 1.7801, -0.3307, -0.5096, 0.1602]) tensor([0.7041, 0.0853, 0.0713, 0.1393]) -Greedy action tensor([ 1.3951, -0.2623, -0.7870, 0.5300]) tensor([0.5799, 0.1105, 0.0654, 0.2441]) -Greedy action tensor([ 1.5702, -0.4556, -0.6150, 0.2927]) tensor([0.6566, 0.0866, 0.0738, 0.1830]) -Greedy action tensor([ 1.6645, -0.5890, -0.6396, 0.1175]) tensor([0.7053, 0.0741, 0.0704, 0.1501]) -Greedy action tensor([ 1.6418, -0.9081, -0.2477, 0.3357]) tensor([0.6666, 0.0521, 0.1008, 0.1806]) -Greedy action tensor([ 1.6670, -0.7252, -0.3446, 0.3766]) tensor([0.6665, 0.0609, 0.0892, 0.1834]) -Greedy action tensor([ 1.7529, -0.5336, -0.3843, 0.3225]) tensor([0.6855, 0.0697, 0.0809, 0.1640]) -Greedy action tensor([ 1.8358, -0.7044, -0.5274, 0.5042]) tensor([0.6959, 0.0549, 0.0655, 0.1838]) -Greedy action tensor([ 1.5530, -0.5538, -0.0966, 0.2373]) tensor([0.6321, 0.0769, 0.1214, 0.1696]) -Greedy action tensor([ 1.4035, -0.5706, -0.2344, 0.3716]) tensor([0.5919, 0.0822, 0.1150, 0.2109]) -Greedy action tensor([ 1.9044, 0.3023, -0.0873, 0.2663]) tensor([0.6526, 0.1315, 0.0891, 0.1268]) -Greedy action tensor([ 1.4454, 0.3620, -0.2445, 0.2355]) tensor([0.5491, 0.1858, 0.1013, 0.1638]) -Greedy action tensor([ 1.0866, -0.1315, -0.6455, -0.0409]) tensor([0.5566, 0.1646, 0.0985, 0.1803]) -Greedy action tensor([ 1.9172, -0.4723, -0.8851, 0.2052]) tensor([0.7503, 0.0688, 0.0455, 0.1354]) -Greedy action tensor([ 1.7750, -0.4809, -0.4039, 0.3254]) tensor([0.6884, 0.0721, 0.0779, 0.1615]) -Greedy action tensor([ 1.2147, -0.4762, -0.2003, 0.0677]) tensor([0.5731, 0.1056, 0.1392, 0.1820]) -Greedy action tensor([ 1.0887, -0.3325, -0.6495, 0.3992]) tensor([0.5211, 0.1258, 0.0916, 0.2615]) -Greedy action tensor([ 1.2882, -0.4770, -0.5151, 0.5995]) tensor([0.5440, 0.0931, 0.0896, 0.2732]) -Greedy action tensor([ 1.0712, -0.1837, -0.2652, -0.2765]) tensor([0.5532, 0.1577, 0.1454, 0.1437]) -Greedy action tensor([ 1.8300, -0.7718, -0.3142, 0.7023]) tensor([0.6600, 0.0489, 0.0773, 0.2137]) -Greedy action tensor([ 1.2039, -0.5098, -0.2847, 0.0303]) tensor([0.5830, 0.1051, 0.1316, 0.1803]) -Greedy action tensor([ 2.5166, -1.4190, -0.0908, 0.9239]) tensor([0.7712, 0.0151, 0.0569, 0.1568]) -Greedy action tensor([ 1.1508, -0.5608, -0.4857, 0.1000]) tensor([0.5797, 0.1047, 0.1128, 0.2027]) -Greedy action tensor([ 2.3220, -1.3231, -0.3833, 0.7913]) tensor([0.7637, 0.0199, 0.0511, 0.1653]) -Greedy action tensor([ 1.2514, -0.6336, -0.1562, 0.3089]) tensor([0.5598, 0.0850, 0.1370, 0.2181]) -Greedy action tensor([ 0.5012, -0.1524, -0.3593, 0.7256]) tensor([0.3130, 0.1628, 0.1324, 0.3918]) -Greedy action tensor([ 1.5895, -1.4385, 0.1172, -0.2274]) tensor([0.6943, 0.0336, 0.1593, 0.1128]) -Greedy action tensor([ 1.3981, -0.4314, -0.2750, 0.0150]) tensor([0.6254, 0.1004, 0.1174, 0.1568]) -Greedy action tensor([ 0.9115, -0.4712, -0.0468, -0.1177]) tensor([0.5021, 0.1260, 0.1926, 0.1794]) -Greedy action tensor([ 1.5346, -0.2624, -0.4338, 0.1031]) tensor([0.6475, 0.1074, 0.0904, 0.1547]) -Greedy action tensor([ 1.0943, -0.3950, -0.2543, 0.3111]) tensor([0.5149, 0.1161, 0.1337, 0.2353]) -Greedy action tensor([ 1.2276, -0.3676, -0.2733, 0.1114]) tensor([0.5704, 0.1157, 0.1271, 0.1868]) -Greedy action tensor([ 1.6964, -0.3490, -0.5966, 0.9322]) tensor([0.5896, 0.0763, 0.0595, 0.2746]) -Greedy action tensor([ 0.8361, -0.3577, -0.0731, -0.0776]) tensor([0.4746, 0.1438, 0.1912, 0.1903]) -Greedy action tensor([ 1.8367, -0.9705, -0.5934, 0.4193]) tensor([0.7190, 0.0434, 0.0633, 0.1743]) -Greedy action tensor([ 1.8430, -1.0290, 0.0615, -0.0578]) tensor([0.7276, 0.0412, 0.1225, 0.1087]) -Greedy action tensor([ 1.4368, -0.2011, -0.6860, 0.1952]) tensor([0.6238, 0.1213, 0.0747, 0.1802]) -Greedy action tensor([ 1.4927, -0.1976, -0.9969, 0.2484]) tensor([0.6429, 0.1186, 0.0533, 0.1852]) -Greedy action tensor([ 0.9840, 0.0111, -0.3776, -0.0518]) tensor([0.5027, 0.1900, 0.1288, 0.1784]) -Greedy action tensor([ 1.9047, -1.0840, -0.1635, 0.0875]) tensor([0.7467, 0.0376, 0.0944, 0.1213]) -Greedy action tensor([ 1.4452, -0.5504, -0.1008, 0.1957]) tensor([0.6114, 0.0831, 0.1303, 0.1752]) -Greedy action tensor([ 1.1774, -0.2497, -0.3555, 0.2730]) tensor([0.5374, 0.1290, 0.1160, 0.2175]) -Greedy action tensor([ 1.5770, -0.6674, -0.4801, 0.6257]) tensor([0.6173, 0.0654, 0.0789, 0.2384]) -Greedy action tensor([ 1.7248, -0.1417, -0.3130, 0.4740]) tensor([0.6365, 0.0984, 0.0829, 0.1822]) -Greedy action tensor([ 1.6239, -0.2193, -0.8002, 0.3599]) tensor([0.6539, 0.1035, 0.0579, 0.1847]) -Greedy action tensor([ 1.4001, -0.6097, -0.5372, 0.3537]) tensor([0.6138, 0.0823, 0.0884, 0.2156]) -Greedy action tensor([ 0.9260, -0.6194, 0.2807, 0.1700]) tensor([0.4531, 0.0966, 0.2376, 0.2127]) -Greedy action tensor([ 1.5139, -0.6501, -0.4496, 0.4615]) tensor([0.6233, 0.0716, 0.0875, 0.2176]) -Greedy action tensor([ 0.8498, -0.2919, -0.3887, 0.3177]) tensor([0.4553, 0.1454, 0.1320, 0.2674]) -Greedy action tensor([ 1.2526, -0.2349, -1.0326, 0.6818]) tensor([0.5283, 0.1194, 0.0538, 0.2986]) -Greedy action tensor([ 2.6725, -0.9604, -0.1423, 0.6739]) tensor([0.8184, 0.0216, 0.0490, 0.1109]) -Greedy action tensor([ 2.1720, -0.2548, -0.3534, 0.0934]) tensor([0.7731, 0.0683, 0.0619, 0.0967]) -Greedy action tensor([ 1.5215, -0.0398, -0.5275, 0.1595]) tensor([0.6270, 0.1316, 0.0808, 0.1606]) -Greedy action tensor([ 1.0740, -0.2889, -0.2228, -0.2627]) tensor([0.5580, 0.1428, 0.1526, 0.1466]) -Greedy action tensor([ 1.5708, -0.5317, -0.2127, 0.4073]) tensor([0.6240, 0.0762, 0.1049, 0.1949]) -Greedy action tensor([ 2.1666, 0.6233, -0.1730, 0.3015]) tensor([0.6826, 0.1459, 0.0658, 0.1057]) -Greedy action tensor([ 1.5339, 0.2301, -0.8085, 0.3608]) tensor([0.5963, 0.1619, 0.0573, 0.1845]) -Greedy action tensor([ 0.8774, 0.0759, -0.3114, 0.3195]) tensor([0.4300, 0.1929, 0.1310, 0.2461]) -Greedy action tensor([ 1.6210, 0.3547, -0.6448, 0.5365]) tensor([0.5801, 0.1635, 0.0602, 0.1961]) -Greedy action tensor([ 1.6025, -0.1396, -0.8064, 0.1788]) tensor([0.6641, 0.1163, 0.0597, 0.1599]) -Greedy action tensor([ 1.3896, -0.7451, -0.4567, 0.3253]) tensor([0.6169, 0.0730, 0.0973, 0.2128]) -Greedy action tensor([ 1.5143, -0.4035, -0.2461, 0.5943]) tensor([0.5823, 0.0856, 0.1001, 0.2321]) -Greedy action tensor([ 1.6432, -0.0546, -0.9270, 0.8486]) tensor([0.5843, 0.1070, 0.0447, 0.2640]) -Greedy action tensor([ 2.6378, -1.5012, -0.1425, 0.4689]) tensor([0.8387, 0.0134, 0.0520, 0.0959]) -Greedy action tensor([ 1.9371, -0.7674, -0.1830, 0.2916]) tensor([0.7247, 0.0485, 0.0870, 0.1398]) -Greedy action tensor([ 1.6463, -0.2361, -0.4832, 0.3199]) tensor([0.6508, 0.0991, 0.0774, 0.1727]) -Greedy action tensor([ 1.2921, -0.6075, -0.0785, 0.2698]) tensor([0.5671, 0.0849, 0.1440, 0.2040]) -Greedy action tensor([ 1.5078, 0.0801, -1.0278, 0.4197]) tensor([0.6039, 0.1448, 0.0478, 0.2034]) -Greedy action tensor([ 1.6690, -0.7253, -0.3893, 0.3305]) tensor([0.6751, 0.0616, 0.0862, 0.1771]) -Greedy action tensor([ 1.9385, -0.6686, -0.5548, 0.3426]) tensor([0.7358, 0.0543, 0.0608, 0.1492]) -Greedy action tensor([ 1.4275, 0.0730, -0.1779, 0.3498]) tensor([0.5558, 0.1434, 0.1116, 0.1892]) -Greedy action tensor([ 2.1419, -0.3116, -1.4141, 0.8098]) tensor([0.7254, 0.0624, 0.0207, 0.1915]) -Greedy action tensor([ 1.0962, -0.3726, -0.6262, 0.4515]) tensor([0.5172, 0.1190, 0.0924, 0.2714]) -Greedy action tensor([ 1.0939, -0.1414, -0.3947, 0.3698]) tensor([0.4997, 0.1453, 0.1128, 0.2422]) -Greedy action tensor([ 1.5103, -0.2909, -0.4002, 0.2679]) tensor([0.6243, 0.1031, 0.0924, 0.1802]) -Greedy action tensor([ 1.8405, -0.1511, -0.3490, 0.4619]) tensor([0.6665, 0.0910, 0.0746, 0.1679]) -Greedy action tensor([ 0.8767, -0.7041, 0.0114, -0.5312]) tensor([0.5344, 0.1100, 0.2249, 0.1307]) -Greedy action tensor([ 0.7031, -0.0203, -0.0069, -0.0393]) tensor([0.4077, 0.1978, 0.2004, 0.1941]) -Greedy action tensor([ 0.7230, -0.4285, -0.2309, -0.2563]) tensor([0.4815, 0.1522, 0.1855, 0.1808]) -Greedy action tensor([ 0.5767, -0.2394, -0.0643, 0.0613]) tensor([0.3897, 0.1723, 0.2053, 0.2327]) -Greedy action tensor([ 0.4603, -0.1673, -0.0803, -0.1440]) tensor([0.3756, 0.2005, 0.2187, 0.2052]) -Greedy action tensor([ 1.0940, -0.8363, 0.1945, -0.5447]) tensor([0.5727, 0.0831, 0.2330, 0.1112]) -Greedy action tensor([ 0.4722, 0.4529, -0.1974, -0.3061]) tensor([0.3388, 0.3323, 0.1734, 0.1556]) -Greedy action tensor([ 0.7167, -0.6426, -0.0324, -0.3738]) tensor([0.4841, 0.1243, 0.2289, 0.1627]) -Greedy action tensor([ 0.5899, -0.1888, -0.1366, -0.2401]) tensor([0.4204, 0.1930, 0.2033, 0.1833]) -Greedy action tensor([ 0.7689, -0.6209, -0.0390, -0.2532]) tensor([0.4867, 0.1212, 0.2170, 0.1751]) -Greedy action tensor([ 0.6924, -0.5045, -0.0609, -0.2217]) tensor([0.4600, 0.1390, 0.2166, 0.1844]) -Greedy action tensor([ 0.7603, -0.5734, -0.0889, -0.4242]) tensor([0.5007, 0.1319, 0.2142, 0.1532]) -Greedy action tensor([ 0.8626, -0.4675, -0.1094, -0.3428]) tensor([0.5148, 0.1361, 0.1948, 0.1542]) -Greedy action tensor([ 0.8610, -0.7197, -0.0366, -0.3177]) tensor([0.5205, 0.1071, 0.2122, 0.1602]) -Greedy action tensor([ 1.0085, -0.5825, -0.0207, -0.1213]) tensor([0.5308, 0.1081, 0.1896, 0.1715]) -Greedy action tensor([ 0.8230, -0.4858, -0.0532, -0.3350]) tensor([0.4999, 0.1350, 0.2081, 0.1570]) -Greedy action tensor([ 0.9421, -0.4981, 0.0664, -0.4212]) tensor([0.5238, 0.1241, 0.2182, 0.1340]) -Greedy action tensor([ 0.6398, -0.0385, 0.0314, -0.5289]) tensor([0.4233, 0.2148, 0.2304, 0.1315]) -Greedy action tensor([ 0.6613, -0.3719, 0.1727, -0.6318]) tensor([0.4457, 0.1586, 0.2734, 0.1223]) -Greedy action tensor([ 0.7958, -0.3922, 0.1700, -0.5987]) tensor([0.4790, 0.1460, 0.2562, 0.1188]) -Greedy action tensor([ 0.5631, -0.4547, -0.2060, -0.0974]) tensor([0.4271, 0.1543, 0.1979, 0.2206]) -Greedy action tensor([ 0.9596, -0.5491, -0.1613, -0.3121]) tensor([0.5472, 0.1210, 0.1784, 0.1534]) -Greedy action tensor([ 0.5884, -0.6573, -0.0323, -0.3563]) tensor([0.4517, 0.1300, 0.2428, 0.1756]) -Greedy action tensor([ 0.7440, -0.5156, -0.0483, -0.2585]) tensor([0.4754, 0.1349, 0.2152, 0.1744]) -Greedy action tensor([ 0.7763, -0.1772, -0.0082, -0.1742]) tensor([0.4488, 0.1729, 0.2048, 0.1735]) -Greedy action tensor([ 0.9195, -0.4249, -0.1144, -0.2876]) tensor([0.5221, 0.1361, 0.1857, 0.1561]) -Greedy action tensor([ 0.6767, -0.2334, -0.0964, -0.1435]) tensor([0.4340, 0.1746, 0.2003, 0.1911]) -Greedy action tensor([ 0.9241, -0.7037, 0.0743, -0.4070]) tensor([0.5296, 0.1040, 0.2264, 0.1399]) -Greedy action tensor([ 0.8083, -0.1427, 0.0021, -0.1798]) tensor([0.4535, 0.1752, 0.2025, 0.1688]) -Greedy action tensor([ 0.9354, -0.7116, -0.0254, -0.4216]) tensor([0.5457, 0.1051, 0.2088, 0.1405]) -Greedy action tensor([ 0.4603, 0.2698, -0.2379, 0.0765]) tensor([0.3328, 0.2750, 0.1655, 0.2267]) -Greedy action tensor([ 0.7670, -0.2894, 0.0547, -0.1901]) tensor([0.4500, 0.1565, 0.2207, 0.1728]) -Greedy action tensor([ 0.8886, -0.4174, -0.0818, -0.5928]) tensor([0.5327, 0.1443, 0.2019, 0.1211]) -Greedy action tensor([ 0.6899, -0.5679, -0.0442, -0.3106]) tensor([0.4691, 0.1333, 0.2251, 0.1725]) -Greedy action tensor([ 0.4185, -0.1009, -0.1556, -0.3269]) tensor([0.3798, 0.2260, 0.2139, 0.1803]) -Greedy action tensor([ 0.4651, -0.2787, -0.1211, -0.1041]) tensor([0.3850, 0.1830, 0.2142, 0.2179]) -Greedy action tensor([ 0.7497, -0.7950, -0.0801, -0.2141]) tensor([0.4924, 0.1051, 0.2147, 0.1878]) -Greedy action tensor([ 0.9615, -0.2131, -0.0997, -0.0094]) tensor([0.4917, 0.1519, 0.1701, 0.1862]) -Greedy action tensor([ 0.8845, -0.5212, -0.1021, -0.3166]) tensor([0.5211, 0.1278, 0.1943, 0.1568]) -Greedy action tensor([ 0.4382, -0.1487, -0.0190, -0.0419]) tensor([0.3561, 0.1980, 0.2255, 0.2204]) -Greedy action tensor([ 0.2972, -0.1104, -0.0437, -0.1993]) tensor([0.3350, 0.2229, 0.2382, 0.2039]) -Greedy action tensor([ 0.9542, -0.5435, -0.0043, -0.7619]) tensor([0.5596, 0.1252, 0.2146, 0.1006]) -Greedy action tensor([ 0.4605, -0.3271, 0.0528, -0.2510]) tensor([0.3830, 0.1742, 0.2548, 0.1880]) -Greedy action tensor([ 0.4897, -0.1337, -0.1813, 0.0182]) tensor([0.3743, 0.2007, 0.1914, 0.2336]) -Greedy action tensor([ 5.5601e-01, -1.9724e-01, -5.4149e-04, -3.0340e-01]) tensor([0.4053, 0.1908, 0.2323, 0.1716]) -Greedy action tensor([ 0.3788, -0.2623, -0.1163, -0.0968]) tensor([0.3626, 0.1910, 0.2210, 0.2254]) -Greedy action tensor([ 1.0313, -1.0661, 0.1790, -0.4477]) tensor([0.5627, 0.0691, 0.2400, 0.1282]) -Greedy action tensor([ 0.4890, 0.1445, -0.1669, 0.0456]) tensor([0.3485, 0.2470, 0.1809, 0.2237]) -Greedy action tensor([ 1.1542, -0.5012, 0.0844, -0.5721]) tensor([0.5841, 0.1116, 0.2004, 0.1039]) -Greedy action tensor([ 0.8768, -0.5859, 0.0215, -0.2095]) tensor([0.5014, 0.1161, 0.2132, 0.1692]) -Greedy action tensor([ 0.8053, -0.4127, 0.1225, -0.0693]) tensor([0.4509, 0.1334, 0.2278, 0.1880]) -Greedy action tensor([ 0.6143, -0.1717, -0.0386, -0.1922]) tensor([0.4128, 0.1881, 0.2149, 0.1843]) -Greedy action tensor([ 0.4520, -0.1856, 0.1640, -0.2009]) tensor([0.3573, 0.1889, 0.2679, 0.1860]) -Greedy action tensor([ 0.3375, 0.1336, -0.0366, 0.0497]) tensor([0.3074, 0.2507, 0.2114, 0.2305]) -Greedy action tensor([ 0.7011, -0.3063, 0.0336, -0.3171]) tensor([0.4466, 0.1631, 0.2291, 0.1613]) -Greedy action tensor([ 0.8120, -0.3802, -0.1359, -0.2288]) tensor([0.4892, 0.1485, 0.1896, 0.1728]) -Greedy action tensor([ 0.8622, -0.5205, -0.1334, -0.2862]) tensor([0.5161, 0.1295, 0.1907, 0.1637]) -Greedy action tensor([ 0.9190, -0.3586, -0.2731, -0.5593]) tensor([0.5524, 0.1539, 0.1677, 0.1260]) -Greedy action tensor([ 1.2940, -1.0656, 0.2559, -0.7505]) tensor([0.6337, 0.0599, 0.2244, 0.0820]) -Greedy action tensor([ 0.3697, 0.2965, -0.2132, 0.1421]) tensor([0.3045, 0.2830, 0.1700, 0.2425]) -Greedy action tensor([ 1.0485, -0.6660, 0.0068, -0.3354]) tensor([0.5607, 0.1010, 0.1978, 0.1405]) -Greedy action tensor([ 0.6038, -0.3934, -0.1769, -0.0604]) tensor([0.4270, 0.1575, 0.1956, 0.2198]) -Greedy action tensor([ 0.9292, -0.5035, -0.1679, -0.4780]) tensor([0.5503, 0.1313, 0.1837, 0.1347]) -Greedy action tensor([ 0.7698, -0.3279, -0.1033, -0.3195]) tensor([0.4790, 0.1598, 0.2001, 0.1612]) -Greedy action tensor([ 0.9225, -0.7770, 0.1706, -0.6168]) tensor([0.5351, 0.0978, 0.2523, 0.1148]) -Greedy action tensor([ 1.0522, -0.8437, 0.4129, -0.8562]) tensor([0.5476, 0.0822, 0.2889, 0.0812]) -Greedy action tensor([ 0.3779, 0.0439, 0.0964, -0.1875]) tensor([0.3291, 0.2356, 0.2483, 0.1870]) -Greedy action tensor([ 0.8443, -0.5482, 0.0777, -0.1674]) tensor([0.4815, 0.1196, 0.2237, 0.1751]) -Greedy action tensor([ 0.6627, -0.1787, -0.0326, -0.2056]) tensor([0.4256, 0.1835, 0.2123, 0.1786]) -Greedy action tensor([ 1.1746, -0.6731, -0.0118, -0.7021]) tensor([0.6188, 0.0975, 0.1889, 0.0947]) -Greedy action tensor([ 0.8482, -0.3862, -0.1382, -0.2980]) tensor([0.5046, 0.1468, 0.1882, 0.1604]) -Greedy action tensor([ 0.5118, -0.4087, -0.1448, -0.1071]) tensor([0.4073, 0.1622, 0.2112, 0.2193]) -Greedy action tensor([ 0.6306, -0.3318, -0.0833, -0.2034]) tensor([0.4337, 0.1657, 0.2124, 0.1883]) -Greedy action tensor([ 0.4168, -0.2942, -0.0370, -0.1121]) tensor([0.3682, 0.1809, 0.2339, 0.2170]) -Greedy action tensor([ 0.8673, -0.7909, 0.1118, -0.2938]) tensor([0.5067, 0.0965, 0.2381, 0.1587]) -Greedy action tensor([ 0.5021, -0.4679, -0.1268, -0.1456]) tensor([0.4106, 0.1556, 0.2189, 0.2149]) -Greedy action tensor([ 0.9384, -0.5943, -0.1145, -0.2162]) tensor([0.5319, 0.1149, 0.1856, 0.1677]) -Greedy action tensor([ 0.7622, -0.5925, 0.0573, -0.1784]) tensor([0.4667, 0.1204, 0.2306, 0.1822]) -Greedy action tensor([ 0.3500, -0.0738, -0.2022, -0.5357]) tensor([0.3784, 0.2477, 0.2178, 0.1561]) -Greedy action tensor([ 0.9148, -0.6414, 0.1644, -0.1458]) tensor([0.4928, 0.1039, 0.2327, 0.1706]) -Greedy action tensor([ 0.5562, -0.2297, -0.0984, -0.1623]) tensor([0.4060, 0.1850, 0.2110, 0.1979]) -Greedy action tensor([ 0.0099, -0.8480, 0.6834, -0.0203]) tensor([0.2296, 0.0974, 0.4503, 0.2228]) -Greedy action tensor([-0.3736, -0.6661, -0.1748, 0.1557]) tensor([0.2144, 0.1600, 0.2616, 0.3640]) -Greedy action tensor([-0.3270, -0.4008, -0.8295, 0.9232]) tensor([0.1660, 0.1542, 0.1004, 0.5794]) -Greedy action tensor([ 0.3600, -1.0614, 0.8912, -0.9221]) tensor([0.3106, 0.0750, 0.5283, 0.0862]) -Greedy action tensor([ 0.5685, -0.8046, -0.1209, 0.3293]) tensor([0.3933, 0.0996, 0.1974, 0.3096]) -Greedy action tensor([-0.0219, 0.4887, 0.5371, -0.1932]) tensor([0.1902, 0.3169, 0.3326, 0.1603]) -Greedy action tensor([ 0.6686, -1.1953, 2.1230, 0.2980]) tensor([0.1632, 0.0253, 0.6988, 0.1127]) -Greedy action tensor([ 0.2979, -0.9256, 0.1573, -0.2815]) tensor([0.3672, 0.1080, 0.3191, 0.2057]) -Greedy action tensor([-0.2051, -0.0493, -0.2264, -0.6095]) tensor([0.2621, 0.3063, 0.2566, 0.1749]) -Greedy action tensor([-0.1442, 0.0238, -1.1410, 0.1946]) tensor([0.2528, 0.2991, 0.0933, 0.3548]) -Greedy action tensor([ 1.0068, -0.1033, -0.4673, 0.3416]) tensor([0.4825, 0.1590, 0.1105, 0.2481]) -Greedy action tensor([ 1.4312, -0.9212, 0.4447, 0.3685]) tensor([0.5514, 0.0525, 0.2056, 0.1905]) -Greedy action tensor([-0.3873, -0.0093, 0.4153, -0.5861]) tensor([0.1815, 0.2648, 0.4049, 0.1488]) -Greedy action tensor([ 0.5148, -0.4344, 0.5740, -0.8504]) tensor([0.3699, 0.1432, 0.3925, 0.0944]) -Greedy action tensor([ 0.0530, -0.3836, 0.1238, -0.2628]) tensor([0.2900, 0.1874, 0.3112, 0.2114]) -Greedy action tensor([-0.0982, -0.7702, -0.2578, 0.0628]) tensor([0.2827, 0.1443, 0.2410, 0.3320]) -Greedy action tensor([ 0.3539, 0.1221, -0.5218, -0.6707]) tensor([0.3893, 0.3088, 0.1622, 0.1397]) -Greedy action tensor([ 0.3013, -1.6859, -0.1384, -0.3381]) tensor([0.4331, 0.0594, 0.2790, 0.2285]) -Greedy action tensor([ 0.7351, -0.5497, 0.0198, -0.0066]) tensor([0.4460, 0.1234, 0.2181, 0.2124]) -Greedy action tensor([ 0.7207, -0.3075, 0.3133, 0.3116]) tensor([0.3721, 0.1331, 0.2476, 0.2472]) -Greedy action tensor([-0.1912, -0.7398, 0.3663, -1.0396]) tensor([0.2665, 0.1540, 0.4654, 0.1141]) -Greedy action tensor([ 0.1002, -1.5640, 0.0425, 0.1755]) tensor([0.3114, 0.0590, 0.2939, 0.3357]) -Greedy action tensor([ 0.9090, -1.2769, 0.0467, 0.9087]) tensor([0.3946, 0.0443, 0.1666, 0.3945]) -Greedy action tensor([-0.7883, 0.0982, -0.9510, 0.6550]) tensor([0.1175, 0.2851, 0.0999, 0.4975]) -Greedy action tensor([ 0.4101, -1.0431, 0.0389, -0.0777]) tensor([0.3941, 0.0921, 0.2719, 0.2419]) -Greedy action tensor([-2.2541, -0.6281, 1.2489, -1.6354]) tensor([0.0243, 0.1235, 0.8071, 0.0451]) -Greedy action tensor([-0.2467, -1.0791, 0.5236, -0.6180]) tensor([0.2334, 0.1015, 0.5041, 0.1610]) -Greedy action tensor([ 0.6981, 0.1248, -0.0556, 0.4312]) tensor([0.3571, 0.2013, 0.1681, 0.2735]) -Greedy action tensor([ 0.4650, -0.6348, 0.6489, 0.1795]) tensor([0.3043, 0.1013, 0.3657, 0.2287]) -Greedy action tensor([-0.9994, -0.2906, 0.3653, -0.4121]) tensor([0.1144, 0.2323, 0.4476, 0.2057]) -Greedy action tensor([-0.3402, 0.3698, -0.1939, -0.9818]) tensor([0.2120, 0.4311, 0.2454, 0.1116]) -Greedy action tensor([ 0.2246, -0.9215, -0.1564, 0.2590]) tensor([0.3294, 0.1047, 0.2250, 0.3409]) -Greedy action tensor([ 0.0250, -0.1843, 0.0821, -0.5473]) tensor([0.2912, 0.2362, 0.3083, 0.1643]) -Greedy action tensor([-0.3350, -0.6967, -0.8766, 0.0411]) tensor([0.2677, 0.1865, 0.1558, 0.3900]) -Greedy action tensor([ 0.9320, 0.1925, -0.3788, -0.1405]) tensor([0.4787, 0.2285, 0.1291, 0.1638]) -Greedy action tensor([-0.2015, -0.3598, -0.1441, -0.4199]) tensor([0.2691, 0.2297, 0.2850, 0.2163]) -Greedy action tensor([ 0.3930, -1.1812, 0.0738, -0.1875]) tensor([0.4010, 0.0831, 0.2914, 0.2244]) -Greedy action tensor([ 0.0099, 0.5179, -0.3465, -1.0590]) tensor([0.2699, 0.4485, 0.1890, 0.0927]) -Greedy action tensor([0.5079, 0.0522, 0.7286, 0.6531]) tensor([0.2477, 0.1570, 0.3089, 0.2864]) -Greedy action tensor([ 0.7128, -0.4667, -0.8577, 0.8553]) tensor([0.3747, 0.1152, 0.0779, 0.4321]) -Greedy action tensor([ 1.2128, -1.3387, 0.4182, -0.4208]) tensor([0.5797, 0.0452, 0.2619, 0.1132]) -Greedy action tensor([ 1.9792, -0.2808, -0.3066, -0.5449]) tensor([0.7775, 0.0811, 0.0791, 0.0623]) -Greedy action tensor([ 0.0389, -1.3234, -0.2004, -0.2712]) tensor([0.3602, 0.0922, 0.2835, 0.2641]) -Greedy action tensor([ 0.1237, -0.0527, 0.4948, 0.0188]) tensor([0.2388, 0.2002, 0.3461, 0.2150]) -Greedy action tensor([ 0.3412, -0.9453, 0.5321, -0.2711]) tensor([0.3302, 0.0912, 0.3996, 0.1790]) -Greedy action tensor([-0.3993, -0.3216, 0.5001, 0.0697]) tensor([0.1629, 0.1761, 0.4005, 0.2604]) -Greedy action tensor([-0.2198, -1.2154, -0.4499, -0.5033]) tensor([0.3428, 0.1267, 0.2723, 0.2582]) -Greedy action tensor([ 1.7269, -0.0533, -0.3569, 0.1654]) tensor([0.6654, 0.1122, 0.0828, 0.1396]) -Greedy action tensor([ 0.5355, -0.3253, 0.0858, -0.3542]) tensor([0.4046, 0.1711, 0.2581, 0.1662]) -Greedy action tensor([-0.5870, 0.7571, -0.5583, -0.2676]) tensor([0.1381, 0.5296, 0.1421, 0.1901]) -Greedy action tensor([ 0.1814, -0.7041, -0.8228, 0.0760]) tensor([0.3733, 0.1540, 0.1368, 0.3359]) -Greedy action tensor([ 0.8967, -0.6758, 0.9370, 1.0910]) tensor([0.2887, 0.0599, 0.3006, 0.3507]) -Greedy action tensor([-0.2054, 0.4337, 0.1103, -1.1414]) tensor([0.2147, 0.4068, 0.2944, 0.0842]) -Greedy action tensor([-0.4922, 0.8147, -0.5025, -0.2989]) tensor([0.1450, 0.5356, 0.1435, 0.1759]) -Greedy action tensor([ 0.1758, 0.0427, 0.1076, -0.4336]) tensor([0.2982, 0.2611, 0.2786, 0.1621]) -Greedy action tensor([ 0.3003, 0.2451, -0.8145, -0.2126]) tensor([0.3481, 0.3294, 0.1142, 0.2084]) -Greedy action tensor([ 0.5754, 0.0804, 0.3582, -0.2167]) tensor([0.3488, 0.2126, 0.2807, 0.1580]) -Greedy action tensor([-0.9528, -0.4498, -1.0131, -1.1194]) tensor([0.2251, 0.3723, 0.2120, 0.1906]) -Greedy action tensor([-0.2299, -0.7557, -0.5532, -1.5744]) tensor([0.3883, 0.2295, 0.2810, 0.1012]) -Greedy action tensor([-0.7276, -0.2255, -0.3839, 1.0968]) tensor([0.0974, 0.1610, 0.1374, 0.6041]) -Greedy action tensor([ 0.6567, -0.3251, 0.0701, 0.7525]) tensor([0.3299, 0.1236, 0.1835, 0.3630]) -Greedy action tensor([ 0.4904, -0.5394, 0.3996, 0.2706]) tensor([0.3254, 0.1162, 0.2972, 0.2612]) -Greedy action tensor([ 0.2098, -0.6917, 0.5453, -0.5491]) tensor([0.3056, 0.1240, 0.4274, 0.1431]) -Greedy action tensor([ 0.5320, -0.7686, -0.5177, 0.6345]) tensor([0.3662, 0.0998, 0.1282, 0.4058]) -Greedy action tensor([ 0.9001, -1.2779, 0.4019, 0.2334]) tensor([0.4476, 0.0507, 0.2720, 0.2298]) -Greedy action tensor([ 0.4430, -0.2741, 0.6170, -0.5929]) tensor([0.3297, 0.1609, 0.3924, 0.1170]) -Greedy action tensor([-0.1434, 0.2983, -0.3222, -0.8001]) tensor([0.2557, 0.3978, 0.2139, 0.1326]) -Greedy action tensor([ 0.1791, -1.0065, 0.5360, -0.4749]) tensor([0.3073, 0.0939, 0.4390, 0.1598]) -Greedy action tensor([ 0.2556, -1.9219, 0.6903, 0.5325]) tensor([0.2515, 0.0285, 0.3884, 0.3317]) -Greedy action tensor([ 1.2531, -0.0403, 0.1763, 0.0394]) tensor([0.5230, 0.1435, 0.1782, 0.1554]) -Greedy action tensor([-0.9622, -0.3365, 0.3003, -0.7227]) tensor([0.1303, 0.2436, 0.4605, 0.1656]) -Greedy action tensor([-0.3130, -2.1314, 0.1053, -0.1528]) tensor([0.2594, 0.0421, 0.3941, 0.3044]) -Greedy action tensor([-0.0487, 0.1358, 0.2107, -0.7541]) tensor([0.2504, 0.3012, 0.3246, 0.1237]) -Greedy action tensor([ 0.7917, -0.3229, 0.2159, -0.1458]) tensor([0.4382, 0.1438, 0.2464, 0.1716]) -Greedy action tensor([ 0.3963, -0.9308, 0.1011, -0.3541]) tensor([0.4029, 0.1069, 0.2999, 0.1903]) -Greedy action tensor([-0.5290, -1.4854, -0.2697, 0.0201]) tensor([0.2267, 0.0871, 0.2937, 0.3925]) -Greedy action tensor([ 0.2247, -0.6107, -1.2362, -0.0843]) tensor([0.4167, 0.1807, 0.0967, 0.3059]) -Greedy action tensor([ 0.4434, -0.3216, 1.1814, 0.9652]) tensor([0.1908, 0.0888, 0.3990, 0.3214]) -Greedy action tensor([ 0.2798, -0.6618, 0.3926, -0.2386]) tensor([0.3221, 0.1256, 0.3605, 0.1918]) -Greedy action tensor([-0.2503, 0.1923, 1.0372, -0.8259]) tensor([0.1483, 0.2309, 0.5374, 0.0834]) -Greedy action tensor([-0.6701, -1.1795, -1.1940, 0.5201]) tensor([0.1825, 0.1096, 0.1081, 0.5999]) -Greedy action tensor([ 1.3240, -0.7735, -0.3477, 0.9857]) tensor([0.4942, 0.0607, 0.0929, 0.3523]) -Greedy action tensor([ 1.8906, -1.0471, -0.0419, 0.7174]) tensor([0.6635, 0.0352, 0.0961, 0.2053]) -Greedy action tensor([ 1.9880, -1.2883, -0.1827, 0.4204]) tensor([0.7351, 0.0278, 0.0839, 0.1533]) -Greedy action tensor([ 1.4879, -0.3584, -0.6842, 0.0046]) tensor([0.6673, 0.1053, 0.0760, 0.1514]) -Greedy action tensor([ 2.2795, -1.0726, -0.1726, 0.4935]) tensor([0.7760, 0.0272, 0.0668, 0.1301]) -Greedy action tensor([ 1.5152, -0.3149, -0.4644, -0.0622]) tensor([0.6645, 0.1066, 0.0918, 0.1372]) -Greedy action tensor([ 1.4093, -0.7353, -0.4752, 1.1186]) tensor([0.4958, 0.0581, 0.0753, 0.3708]) -Greedy action tensor([ 1.3515, 0.1374, -0.5331, 0.1891]) tensor([0.5677, 0.1686, 0.0862, 0.1775]) -Greedy action tensor([ 1.8859, -0.3641, -0.1702, 0.3972]) tensor([0.6854, 0.0722, 0.0877, 0.1547]) -Greedy action tensor([ 1.2918, -0.3639, -0.1497, -0.0142]) tensor([0.5888, 0.1124, 0.1393, 0.1595]) -Greedy action tensor([ 0.5193, -0.1576, 0.1812, 0.0038]) tensor([0.3548, 0.1803, 0.2530, 0.2119]) -Greedy action tensor([ 1.7138, -0.3770, -1.3593, 0.3822]) tensor([0.6974, 0.0862, 0.0323, 0.1841]) -Greedy action tensor([ 1.0167, -0.6487, -0.2364, 0.0407]) tensor([0.5401, 0.1021, 0.1543, 0.2035]) -Greedy action tensor([ 1.4007, -0.5977, -0.0930, 0.1190]) tensor([0.6106, 0.0828, 0.1371, 0.1695]) -Greedy action tensor([ 0.8246, -0.5611, -0.3620, 0.2632]) tensor([0.4704, 0.1177, 0.1436, 0.2683]) -Greedy action tensor([ 0.4817, -0.2936, 0.0242, -0.0625]) tensor([0.3740, 0.1723, 0.2367, 0.2170]) -Greedy action tensor([ 1.1264, -0.6783, 0.0922, 0.3136]) tensor([0.5092, 0.0838, 0.1810, 0.2259]) -Greedy action tensor([ 1.6097, -0.0539, -0.3154, 0.0480]) tensor([0.6472, 0.1226, 0.0944, 0.1358]) -Greedy action tensor([ 1.6232, -0.0272, -0.3902, 0.2793]) tensor([0.6304, 0.1210, 0.0842, 0.1644]) -Greedy action tensor([ 1.6980, -0.1006, -0.2084, -0.4905]) tensor([0.7012, 0.1161, 0.1042, 0.0786]) -Greedy action tensor([ 2.1713, 0.1186, -0.2419, 0.6750]) tensor([0.6935, 0.0890, 0.0621, 0.1553]) -Greedy action tensor([ 1.4273, -0.7533, -0.3714, 0.3971]) tensor([0.6115, 0.0691, 0.1012, 0.2182]) -Greedy action tensor([ 1.0637, -0.4535, -0.1046, -0.0394]) tensor([0.5370, 0.1178, 0.1670, 0.1782]) -Greedy action tensor([ 1.0444, -0.2278, -0.7147, 0.4716]) tensor([0.4959, 0.1390, 0.0854, 0.2797]) -Greedy action tensor([ 0.9926, -0.3032, -0.5081, 0.5313]) tensor([0.4701, 0.1287, 0.1048, 0.2964]) -Greedy action tensor([ 1.9050, -0.7111, -0.4727, 0.4706]) tensor([0.7122, 0.0521, 0.0661, 0.1697]) -Greedy action tensor([ 1.3492, -0.2196, -0.8336, 0.1196]) tensor([0.6198, 0.1291, 0.0699, 0.1812]) -Greedy action tensor([ 1.4402, -0.2384, -0.6663, 0.2665]) tensor([0.6182, 0.1154, 0.0752, 0.1912]) -Greedy action tensor([ 1.2223, -0.1043, -0.7233, 0.1936]) tensor([0.5663, 0.1503, 0.0809, 0.2024]) -Greedy action tensor([2.0691, 0.5810, 0.1209, 0.0468]) tensor([0.6664, 0.1505, 0.0950, 0.0882]) -Greedy action tensor([ 1.3160, -0.3810, -0.0839, 0.2469]) tensor([0.5640, 0.1033, 0.1391, 0.1936]) -Greedy action tensor([ 1.4424, -0.3398, -0.2589, 0.2469]) tensor([0.6049, 0.1018, 0.1104, 0.1830]) -Greedy action tensor([ 1.3818, 0.0206, -0.5102, -0.0642]) tensor([0.6088, 0.1561, 0.0918, 0.1434]) -Greedy action tensor([ 1.4389, -0.9008, -0.2437, 0.3218]) tensor([0.6213, 0.0599, 0.1155, 0.2033]) -Greedy action tensor([ 0.8347, 0.2692, -0.6163, -0.0516]) tensor([0.4516, 0.2565, 0.1058, 0.1861]) -Greedy action tensor([ 1.4016, -0.1481, -0.6439, 0.3191]) tensor([0.5951, 0.1263, 0.0770, 0.2016]) -Greedy action tensor([ 0.9075, 0.0260, -0.4012, -0.0027]) tensor([0.4792, 0.1985, 0.1295, 0.1929]) -Greedy action tensor([ 1.2098, -0.2751, -0.6722, 0.0924]) tensor([0.5862, 0.1328, 0.0893, 0.1918]) -Greedy action tensor([ 1.5823, -0.7649, -0.4005, 0.5106]) tensor([0.6346, 0.0607, 0.0874, 0.2173]) -Greedy action tensor([ 1.4907, -0.1842, -0.7723, 0.1775]) tensor([0.6409, 0.1201, 0.0667, 0.1724]) -Greedy action tensor([ 1.8045, -0.7976, -0.2394, 0.3780]) tensor([0.6926, 0.0513, 0.0897, 0.1663]) -Greedy action tensor([ 1.4900, -0.4415, -0.4602, -0.0901]) tensor([0.6697, 0.0971, 0.0953, 0.1379]) -Greedy action tensor([ 1.0721, -0.4083, -0.4592, 0.0466]) tensor([0.5548, 0.1262, 0.1200, 0.1990]) -Greedy action tensor([ 1.1437, -0.4983, -0.0431, 0.1442]) tensor([0.5357, 0.1037, 0.1635, 0.1972]) -Greedy action tensor([ 2.0353, -1.3051, -0.5893, 0.0034]) tensor([0.8071, 0.0286, 0.0585, 0.1058]) -Greedy action tensor([ 1.8570, -0.6811, -0.4731, 0.4710]) tensor([0.7011, 0.0554, 0.0682, 0.1753]) -Greedy action tensor([ 1.1842, -0.3437, -0.1502, 0.6318]) tensor([0.4864, 0.1055, 0.1281, 0.2800]) -Greedy action tensor([ 2.0728, -1.3325, -0.1742, 0.4890]) tensor([0.7440, 0.0247, 0.0787, 0.1527]) -Greedy action tensor([ 1.5505, -0.2401, -0.8775, 0.5652]) tensor([0.6141, 0.1025, 0.0542, 0.2293]) -Greedy action tensor([ 1.5390, -0.6772, 0.0752, 0.6912]) tensor([0.5654, 0.0616, 0.1308, 0.2422]) -Greedy action tensor([ 1.6027, -0.3827, -0.5606, 0.7163]) tensor([0.6008, 0.0825, 0.0691, 0.2476]) -Greedy action tensor([ 2.0597, -0.7768, -0.8133, 1.0519]) tensor([0.6756, 0.0396, 0.0382, 0.2466]) -Greedy action tensor([ 1.6661, -0.7651, -0.3723, 0.1636]) tensor([0.6941, 0.0610, 0.0904, 0.1545]) -Greedy action tensor([ 0.7838, -0.1728, -0.5740, 0.7862]) tensor([0.3782, 0.1453, 0.0973, 0.3792]) -Greedy action tensor([ 0.8081, 0.1273, -0.9156, 0.0522]) tensor([0.4642, 0.2350, 0.0828, 0.2180]) -Greedy action tensor([ 1.6877, -0.6671, -0.5177, 0.3870]) tensor([0.6769, 0.0642, 0.0746, 0.1843]) -Greedy action tensor([ 1.8810, -0.9106, -0.2342, 0.5736]) tensor([0.6885, 0.0422, 0.0830, 0.1862]) -Greedy action tensor([ 1.4793, 0.2209, -0.3255, 0.4113]) tensor([0.5579, 0.1585, 0.0918, 0.1918]) -Greedy action tensor([ 1.4678, -0.6904, -0.4780, 0.3440]) tensor([0.6315, 0.0730, 0.0902, 0.2053]) -Greedy action tensor([ 0.9400, -0.2548, -0.2087, -0.0133]) tensor([0.4987, 0.1510, 0.1581, 0.1922]) -Greedy action tensor([ 1.6296, -0.9298, -0.1804, 0.3414]) tensor([0.6593, 0.0510, 0.1079, 0.1818]) -Greedy action tensor([ 1.7072, 0.3544, -0.6459, 0.5768]) tensor([0.5965, 0.1542, 0.0567, 0.1926]) -Greedy action tensor([ 1.4271, -0.4820, -0.5637, 0.1678]) tensor([0.6375, 0.0945, 0.0871, 0.1809]) -Greedy action tensor([ 1.3986, -0.2931, -0.7331, 0.6143]) tensor([0.5684, 0.1047, 0.0674, 0.2594]) -Greedy action tensor([ 0.9620, 0.3274, -0.1736, 0.2520]) tensor([0.4268, 0.2263, 0.1371, 0.2098]) -Greedy action tensor([ 1.3492, -0.6525, -0.5079, 0.4070]) tensor([0.5949, 0.0804, 0.0929, 0.2319]) -Greedy action tensor([ 1.8698, -1.2423, -0.0364, 0.7725]) tensor([0.6549, 0.0291, 0.0973, 0.2186]) -Greedy action tensor([ 1.4413, -0.3762, -0.4505, 0.3542]) tensor([0.6059, 0.0984, 0.0914, 0.2043]) -Greedy action tensor([ 1.7032, -0.8349, -0.3186, 0.3721]) tensor([0.6777, 0.0535, 0.0897, 0.1790]) -Greedy action tensor([ 1.3389, -0.1661, -0.7893, 0.3018]) tensor([0.5898, 0.1309, 0.0702, 0.2091]) -Greedy action tensor([ 1.8723, -0.6651, -0.4239, 0.3955]) tensor([0.7102, 0.0562, 0.0715, 0.1622]) -Greedy action tensor([ 2.1853, -0.9876, -0.3579, 0.4205]) tensor([0.7742, 0.0324, 0.0609, 0.1326]) -Greedy action tensor([ 1.4069, -0.4065, -0.4204, 0.1010]) tensor([0.6270, 0.1023, 0.1008, 0.1699]) -Greedy action tensor([ 1.5190, -0.4838, -0.2350, 0.2862]) tensor([0.6252, 0.0844, 0.1082, 0.1822]) -Greedy action tensor([ 1.4432, -0.2807, -0.9357, 0.4866]) tensor([0.6042, 0.1078, 0.0560, 0.2321]) -Greedy action tensor([ 0.9018, -0.0463, 0.0977, -0.0878]) tensor([0.4532, 0.1756, 0.2028, 0.1684]) -Greedy action tensor([ 1.1765, -0.5428, -0.1934, 0.1576]) tensor([0.5573, 0.0999, 0.1416, 0.2012]) -Greedy action tensor([ 1.2769, -0.5304, -0.4918, 0.6034]) tensor([0.5421, 0.0890, 0.0925, 0.2765]) -Greedy action tensor([ 1.3674, -0.6453, -0.0599, -0.0571]) tensor([0.6195, 0.0828, 0.1486, 0.1491]) -Greedy action tensor([ 1.4005, -0.3076, -1.0510, 0.5519]) tensor([0.5899, 0.1069, 0.0508, 0.2524]) -Greedy action tensor([ 3.0803, -0.0221, 0.0771, 0.5077]) tensor([0.8540, 0.0384, 0.0424, 0.0652]) -Greedy action tensor([-1.6639, 0.0825, 0.4498, -0.0746]) tensor([0.0502, 0.2880, 0.4157, 0.2461]) -Greedy action tensor([-1.8927, -0.4430, 0.6475, -0.1504]) tensor([0.0423, 0.1802, 0.5361, 0.2414]) -Greedy action tensor([-1.8549, -0.1043, 0.5763, -0.1582]) tensor([0.0424, 0.2441, 0.4822, 0.2313]) -Greedy action tensor([-1.7679, -0.3461, 0.5747, -0.0755]) tensor([0.0477, 0.1975, 0.4960, 0.2589]) -Greedy action tensor([-1.8942, -0.4164, 0.6463, -0.1523]) tensor([0.0421, 0.1843, 0.5335, 0.2401]) -Greedy action tensor([-1.5093, -0.3661, 0.7442, 0.3068]) tensor([0.0505, 0.1584, 0.4807, 0.3104]) -Greedy action tensor([0.3278, 1.1081, 0.1090, 0.7840]) tensor([0.1797, 0.3922, 0.1444, 0.2836]) -Greedy action tensor([-1.6296, -0.4579, 0.7918, 0.2944]) tensor([0.0448, 0.1445, 0.5042, 0.3066]) -Greedy action tensor([-1.9110, -0.3983, 0.6385, -0.1663]) tensor([0.0416, 0.1886, 0.5320, 0.2379]) -Greedy action tensor([-1.8303, -0.1982, 0.5739, -0.1073]) tensor([0.0439, 0.2245, 0.4858, 0.2458]) -Greedy action tensor([-1.7314, 0.1207, 0.4724, -0.0177]) tensor([0.0455, 0.2899, 0.4121, 0.2525]) -Greedy action tensor([-1.9110, -0.4436, 0.6527, -0.1615]) tensor([0.0415, 0.1802, 0.5393, 0.2389]) -Greedy action tensor([-1.8734, -0.3171, 0.6242, -0.1428]) tensor([0.0425, 0.2014, 0.5163, 0.2398]) -Greedy action tensor([-1.7020, -0.5220, 0.5520, -0.1037]) tensor([0.0534, 0.1738, 0.5087, 0.2641]) -Greedy action tensor([-1.4888, 0.5608, 0.3771, -0.0913]) tensor([0.0519, 0.4029, 0.3353, 0.2099]) -Greedy action tensor([-1.1056, 0.7463, 0.1855, 0.0106]) tensor([0.0711, 0.4531, 0.2586, 0.2171]) -Greedy action tensor([-1.4435, -0.4250, 0.5393, 0.2471]) tensor([0.0608, 0.1683, 0.4414, 0.3295]) -Greedy action tensor([-0.6013, -0.3736, 0.7792, 0.8094]) tensor([0.0968, 0.1215, 0.3849, 0.3967]) -Greedy action tensor([-1.8554, -0.3643, 0.6091, -0.1337]) tensor([0.0439, 0.1949, 0.5158, 0.2454]) -Greedy action tensor([-1.9001, -0.4406, 0.6486, -0.1603]) tensor([0.0420, 0.1809, 0.5376, 0.2394]) -Greedy action tensor([-1.5656, -0.1969, 0.6075, 0.0071]) tensor([0.0540, 0.2120, 0.4740, 0.2600]) -Greedy action tensor([-1.9025, -0.3937, 0.6537, -0.1340]) tensor([0.0412, 0.1863, 0.5310, 0.2415]) -Greedy action tensor([-0.8449, 0.1584, 0.4631, 0.4281]) tensor([0.0909, 0.2480, 0.3363, 0.3247]) -Greedy action tensor([-1.8524, -0.2280, 0.4764, -0.1784]) tensor([0.0461, 0.2342, 0.4736, 0.2461]) -Greedy action tensor([-1.9025, -0.4173, 0.6588, -0.1563]) tensor([0.0415, 0.1832, 0.5374, 0.2378]) -Greedy action tensor([-1.7514, -0.3602, 0.5777, -0.0488]) tensor([0.0481, 0.1935, 0.4942, 0.2642]) -Greedy action tensor([-0.6112, 0.9785, 0.0736, 0.4383]) tensor([0.0931, 0.4564, 0.1846, 0.2659]) -Greedy action tensor([-1.3745, -0.1246, 0.5080, -0.7073]) tensor([0.0769, 0.2683, 0.5050, 0.1498]) -Greedy action tensor([-1.3653, 0.1021, 0.3257, 0.0077]) tensor([0.0680, 0.2949, 0.3688, 0.2683]) -Greedy action tensor([-1.7009, 0.2004, 0.4791, -0.1220]) tensor([0.0467, 0.3130, 0.4136, 0.2267]) -Greedy action tensor([-1.7862, -0.1768, 0.5902, -0.0628]) tensor([0.0447, 0.2235, 0.4813, 0.2505]) -Greedy action tensor([-0.5883, 0.6902, -0.0550, -0.0240]) tensor([0.1242, 0.4459, 0.2116, 0.2183]) -Greedy action tensor([-1.5597, 0.3127, 0.3488, 0.0862]) tensor([0.0515, 0.3347, 0.3470, 0.2669]) -Greedy action tensor([-1.8364, -0.4157, 0.6146, -0.1751]) tensor([0.0454, 0.1881, 0.5271, 0.2393]) -Greedy action tensor([-1.9436, -0.4449, 0.6653, -0.1801]) tensor([0.0402, 0.1798, 0.5457, 0.2343]) -Greedy action tensor([-1.9418, -0.4469, 0.6663, -0.1790]) tensor([0.0402, 0.1794, 0.5460, 0.2345]) -Greedy action tensor([-1.6202, -0.3574, 0.5937, -0.2549]) tensor([0.0568, 0.2008, 0.5199, 0.2225]) -Greedy action tensor([-1.6571, -0.3023, 0.5136, -0.0055]) tensor([0.0530, 0.2056, 0.4648, 0.2766]) -Greedy action tensor([-1.8320, -0.4091, 0.6190, -0.1115]) tensor([0.0448, 0.1858, 0.5193, 0.2502]) -Greedy action tensor([-1.3280, 0.5934, 0.2453, 0.0290]) tensor([0.0605, 0.4130, 0.2916, 0.2349]) -Greedy action tensor([-1.8923, -0.4020, 0.6477, -0.1460]) tensor([0.0419, 0.1861, 0.5316, 0.2404]) -Greedy action tensor([-1.9449, -0.4491, 0.6671, -0.1801]) tensor([0.0401, 0.1790, 0.5466, 0.2343]) -Greedy action tensor([-1.8799, -0.4065, 0.6551, -0.1181]) tensor([0.0420, 0.1833, 0.5300, 0.2446]) -Greedy action tensor([-1.9454, -0.4459, 0.6659, -0.1812]) tensor([0.0401, 0.1797, 0.5461, 0.2341]) -Greedy action tensor([-1.9274, -0.3973, 0.6504, -0.1688]) tensor([0.0407, 0.1878, 0.5355, 0.2360]) -Greedy action tensor([-1.6702, -0.4748, 0.4965, -0.0144]) tensor([0.0547, 0.1809, 0.4778, 0.2866]) -Greedy action tensor([-1.1536, -0.6050, 0.2642, 0.2768]) tensor([0.0906, 0.1568, 0.3739, 0.3787]) -Greedy action tensor([-1.4033, 0.1537, 0.3780, -0.0285]) tensor([0.0639, 0.3034, 0.3797, 0.2529]) -Greedy action tensor([-0.9460, 0.2861, 0.5550, 0.9126]) tensor([0.0652, 0.2236, 0.2927, 0.4185]) -Greedy action tensor([-1.8685, -0.4067, 0.6573, -0.1253]) tensor([0.0425, 0.1833, 0.5313, 0.2429]) -Greedy action tensor([-0.0823, 1.0430, 0.0908, 0.7991]) tensor([0.1301, 0.4010, 0.1547, 0.3142]) -Greedy action tensor([-1.7678, -0.3968, 0.6120, -0.0174]) tensor([0.0465, 0.1832, 0.5025, 0.2678]) -Greedy action tensor([-1.8510, -0.3796, 0.6047, -0.1321]) tensor([0.0443, 0.1928, 0.5160, 0.2470]) -Greedy action tensor([-1.8855, -0.3793, 0.6357, -0.1443]) tensor([0.0423, 0.1906, 0.5260, 0.2411]) -Greedy action tensor([-1.8975, -0.4250, 0.6382, -0.1578]) tensor([0.0422, 0.1841, 0.5331, 0.2405]) -Greedy action tensor([-1.9167, -0.4496, 0.6617, -0.1583]) tensor([0.0411, 0.1783, 0.5419, 0.2387]) -Greedy action tensor([-1.9196, -0.4124, 0.6522, -0.1687]) tensor([0.0410, 0.1853, 0.5373, 0.2364]) -Greedy action tensor([-1.9258, -0.4303, 0.6581, -0.1697]) tensor([0.0408, 0.1821, 0.5408, 0.2363]) -Greedy action tensor([-1.7675, -0.4981, 0.5829, -0.1280]) tensor([0.0495, 0.1762, 0.5193, 0.2551]) -Greedy action tensor([-1.8903, -0.4125, 0.6348, -0.1478]) tensor([0.0424, 0.1858, 0.5296, 0.2421]) -Greedy action tensor([-1.8987, -0.4354, 0.6468, -0.1563]) tensor([0.0420, 0.1817, 0.5361, 0.2402]) -Greedy action tensor([-1.9452, -0.4491, 0.6676, -0.1809]) tensor([0.0401, 0.1790, 0.5468, 0.2341]) -Greedy action tensor([-1.8348, -0.4329, 0.6172, -0.1143]) tensor([0.0449, 0.1825, 0.5216, 0.2510]) -Greedy action tensor([-1.7187, -0.4887, 0.6078, -0.0370]) tensor([0.0499, 0.1707, 0.5111, 0.2682]) -Greedy action tensor([-1.8879, -0.3833, 0.6414, -0.1535]) tensor([0.0422, 0.1899, 0.5290, 0.2389]) -Greedy action tensor([-1.9388, -0.4461, 0.6654, -0.1772]) tensor([0.0403, 0.1795, 0.5454, 0.2348]) -Greedy action tensor([-0.8174, 0.8432, -0.0775, -0.2973]) tensor([0.0996, 0.5241, 0.2087, 0.1675]) -Greedy action tensor([-1.9015, -0.3768, 0.6450, -0.1528]) tensor([0.0415, 0.1906, 0.5295, 0.2384]) -Greedy action tensor([-1.9021, -0.4333, 0.6577, -0.1591]) tensor([0.0417, 0.1811, 0.5391, 0.2382]) -Greedy action tensor([-1.9293, -0.4219, 0.6591, -0.1721]) tensor([0.0406, 0.1834, 0.5406, 0.2354]) -Greedy action tensor([-1.6388, -0.4865, 0.5234, -0.0238]) tensor([0.0559, 0.1770, 0.4859, 0.2812]) -Greedy action tensor([-1.8552, -0.4022, 0.6549, -0.1179]) tensor([0.0430, 0.1838, 0.5290, 0.2443]) -Greedy action tensor([-1.5391, -0.4871, 0.5173, 0.0742]) tensor([0.0599, 0.1714, 0.4681, 0.3006]) -Greedy action tensor([-1.0502, -0.5057, 0.3522, 0.7118]) tensor([0.0793, 0.1367, 0.3223, 0.4618]) -Greedy action tensor([-1.9305, -0.4467, 0.6629, -0.1720]) tensor([0.0407, 0.1793, 0.5440, 0.2360]) -Greedy action tensor([-1.6187, -0.5075, 0.4897, 0.0733]) tensor([0.0565, 0.1716, 0.4652, 0.3067]) -Greedy action tensor([-0.6841, 0.9052, -0.4255, -0.6144]) tensor([0.1210, 0.5927, 0.1566, 0.1297]) -Greedy action tensor([-1.8012, -0.3785, 0.6180, -0.0558]) tensor([0.0452, 0.1876, 0.5081, 0.2590]) -Greedy action tensor([-1.4366, 0.0298, 0.3633, -0.0248]) tensor([0.0646, 0.2798, 0.3906, 0.2650]) -Greedy action tensor([-1.9328, -0.4347, 0.6619, -0.1739]) tensor([0.0405, 0.1813, 0.5428, 0.2353]) -Greedy action tensor([-1.8665, -0.4365, 0.6266, -0.1476]) tensor([0.0438, 0.1828, 0.5294, 0.2441]) -Greedy action tensor([ 0.4509, -0.3306, -0.1852, -0.0555]) tensor([0.3861, 0.1767, 0.2044, 0.2327]) -Greedy action tensor([ 1.1924, -0.6782, -0.1170, -0.7722]) tensor([0.6393, 0.0985, 0.1726, 0.0896]) -Greedy action tensor([ 0.8266, -0.3024, -0.0702, -0.3086]) tensor([0.4872, 0.1575, 0.1987, 0.1566]) -Greedy action tensor([ 0.5626, -0.5182, -0.1350, -0.0021]) tensor([0.4157, 0.1411, 0.2069, 0.2363]) -Greedy action tensor([ 0.8788, -0.1548, 0.0579, -0.0623]) tensor([0.4575, 0.1627, 0.2013, 0.1785]) -Greedy action tensor([ 0.6918, -0.5828, -0.0701, -0.1815]) tensor([0.4621, 0.1292, 0.2157, 0.1930]) -Greedy action tensor([ 0.6351, -0.4021, -0.3081, -0.3702]) tensor([0.4740, 0.1680, 0.1846, 0.1735]) -Greedy action tensor([ 1.1230, -0.9643, -0.0499, -0.5098]) tensor([0.6139, 0.0761, 0.1900, 0.1200]) -Greedy action tensor([ 1.2887, -0.7332, 0.0108, -0.9231]) tensor([0.6577, 0.0871, 0.1833, 0.0720]) -Greedy action tensor([ 0.8422, -0.7784, 0.0402, -0.4761]) tensor([0.5225, 0.1033, 0.2343, 0.1398]) -Greedy action tensor([ 0.5091, 0.0038, -0.2355, 0.0282]) tensor([0.3708, 0.2238, 0.1761, 0.2293]) -Greedy action tensor([ 0.5113, -0.3130, -0.0227, -0.1843]) tensor([0.3963, 0.1738, 0.2323, 0.1976]) -Greedy action tensor([ 0.5698, -0.3549, -0.0531, -0.3840]) tensor([0.4313, 0.1711, 0.2314, 0.1662]) -Greedy action tensor([ 0.4735, -0.1627, -0.0752, -0.0860]) tensor([0.3734, 0.1976, 0.2157, 0.2134]) -Greedy action tensor([ 0.9046, -0.6884, 0.0632, -0.5773]) tensor([0.5372, 0.1092, 0.2316, 0.1220]) -Greedy action tensor([ 0.9205, -0.7955, 0.0063, -0.3313]) tensor([0.5357, 0.0963, 0.2147, 0.1532]) -Greedy action tensor([ 0.9496, -0.5562, -0.1167, -0.3121]) tensor([0.5407, 0.1200, 0.1862, 0.1531]) -Greedy action tensor([ 0.8444, -0.5542, 0.0296, -0.2843]) tensor([0.4967, 0.1227, 0.2199, 0.1607]) -Greedy action tensor([ 0.4738, -0.3520, -0.1408, -0.5227]) tensor([0.4259, 0.1865, 0.2304, 0.1572]) -Greedy action tensor([ 1.0336, -0.5438, 0.0111, -0.5240]) tensor([0.5628, 0.1162, 0.2024, 0.1186]) -Greedy action tensor([ 0.8193, -0.6794, 0.0355, -0.4445]) tensor([0.5095, 0.1138, 0.2327, 0.1440]) -Greedy action tensor([ 1.1626, -0.4803, 0.1230, -0.3605]) tensor([0.5666, 0.1096, 0.2003, 0.1235]) -Greedy action tensor([ 1.1228, -0.8818, -0.0845, -0.8073]) tensor([0.6334, 0.0853, 0.1894, 0.0919]) -Greedy action tensor([ 0.6249, -0.3816, -0.1659, -0.6220]) tensor([0.4747, 0.1735, 0.2153, 0.1364]) -Greedy action tensor([ 0.7946, -0.4607, 0.0221, -0.3416]) tensor([0.4836, 0.1378, 0.2233, 0.1553]) -Greedy action tensor([ 6.1851e-01, -8.0763e-02, 3.6809e-04, -2.2483e-01]) tensor([0.4055, 0.2015, 0.2185, 0.1745]) -Greedy action tensor([ 0.7852, -0.4757, 0.0205, -0.3492]) tensor([0.4830, 0.1369, 0.2248, 0.1553]) -Greedy action tensor([ 0.4809, 0.3394, -0.1215, -0.2117]) tensor([0.3429, 0.2977, 0.1878, 0.1716]) -Greedy action tensor([ 0.9835, -0.9415, 0.1163, -0.3709]) tensor([0.5482, 0.0800, 0.2303, 0.1415]) -Greedy action tensor([ 0.8258, -0.9005, 0.0874, -0.1967]) tensor([0.4962, 0.0883, 0.2371, 0.1785]) -Greedy action tensor([ 0.5678, 0.0785, 0.1870, -0.1695]) tensor([0.3604, 0.2209, 0.2463, 0.1724]) -Greedy action tensor([ 0.8189, -0.5051, 0.0471, -0.4324]) tensor([0.4964, 0.1321, 0.2294, 0.1420]) -Greedy action tensor([ 0.9366, -0.3735, -0.1647, -0.3196]) tensor([0.5299, 0.1430, 0.1762, 0.1509]) -Greedy action tensor([ 0.6941, -0.4461, -0.0573, -0.3324]) tensor([0.4652, 0.1487, 0.2194, 0.1667]) -Greedy action tensor([ 0.3551, -0.0517, -0.0146, -0.1152]) tensor([0.3354, 0.2233, 0.2317, 0.2096]) -Greedy action tensor([ 0.8171, -0.5429, -0.0106, -0.4108]) tensor([0.5034, 0.1292, 0.2200, 0.1474]) -Greedy action tensor([ 0.3852, -0.0058, 0.0220, -0.2512]) tensor([0.3447, 0.2331, 0.2397, 0.1824]) -Greedy action tensor([ 0.4848, -0.1164, 0.0875, 0.1308]) tensor([0.3422, 0.1876, 0.2300, 0.2402]) -Greedy action tensor([ 0.7995, -0.5550, 0.0192, -0.3207]) tensor([0.4896, 0.1264, 0.2244, 0.1597]) -Greedy action tensor([ 0.4898, -0.0061, -0.0762, -0.1857]) tensor([0.3724, 0.2268, 0.2114, 0.1895]) -Greedy action tensor([ 1.0745, -0.7294, 0.0336, -0.3680]) tensor([0.5701, 0.0939, 0.2013, 0.1347]) -Greedy action tensor([ 0.4922, -0.0456, -0.1615, -0.0589]) tensor([0.3731, 0.2179, 0.1941, 0.2150]) -Greedy action tensor([ 0.3905, -0.0489, -0.0785, -0.4456]) tensor([0.3699, 0.2384, 0.2314, 0.1603]) -Greedy action tensor([ 1.0349, -0.8754, -0.0054, -0.3551]) tensor([0.5713, 0.0846, 0.2019, 0.1423]) -Greedy action tensor([ 0.8988, -0.2742, -0.1326, -0.3158]) tensor([0.5095, 0.1577, 0.1816, 0.1512]) -Greedy action tensor([ 0.4072, -0.1691, -0.0937, 0.1306]) tensor([0.3417, 0.1920, 0.2071, 0.2592]) -Greedy action tensor([ 0.7105, 0.0206, -0.1542, -0.4242]) tensor([0.4456, 0.2235, 0.1877, 0.1433]) -Greedy action tensor([ 0.5850, -0.4508, -0.0493, -0.2752]) tensor([0.4332, 0.1538, 0.2297, 0.1833]) -Greedy action tensor([ 1.5377, -0.9649, -0.0813, -0.7640]) tensor([0.7246, 0.0593, 0.1435, 0.0725]) -Greedy action tensor([ 0.8954, -0.5877, 0.0044, -0.6031]) tensor([0.5375, 0.1220, 0.2205, 0.1201]) -Greedy action tensor([ 0.9260, -0.6278, 0.0553, -0.3004]) tensor([0.5199, 0.1099, 0.2177, 0.1525]) -Greedy action tensor([ 0.6358, -0.4902, -0.1150, -0.3162]) tensor([0.4582, 0.1486, 0.2163, 0.1769]) -Greedy action tensor([ 0.9243, -0.8474, -0.0120, -0.3966]) tensor([0.5467, 0.0930, 0.2144, 0.1459]) -Greedy action tensor([ 0.6670, -0.4487, -0.2406, -0.5982]) tensor([0.4967, 0.1628, 0.2004, 0.1402]) -Greedy action tensor([ 5.6736e-01, -2.5593e-01, -2.5505e-04, -1.2389e-01]) tensor([0.3989, 0.1751, 0.2261, 0.1998]) -Greedy action tensor([ 0.5122, -0.6321, -0.1164, -0.2805]) tensor([0.4340, 0.1382, 0.2314, 0.1964]) -Greedy action tensor([ 0.8128, -0.4169, -0.1274, -0.3864]) tensor([0.5039, 0.1473, 0.1968, 0.1519]) -Greedy action tensor([ 0.7976, -0.3876, -0.0142, 0.0054]) tensor([0.4540, 0.1388, 0.2016, 0.2056]) -Greedy action tensor([ 0.8949, -0.3933, -0.0984, -0.2747]) tensor([0.5111, 0.1409, 0.1893, 0.1587]) -Greedy action tensor([ 0.8571, -0.5252, -0.0121, -0.4701]) tensor([0.5167, 0.1297, 0.2166, 0.1370]) -Greedy action tensor([ 1.1206, -0.5794, -0.0023, -0.3472]) tensor([0.5752, 0.1051, 0.1871, 0.1325]) -Greedy action tensor([ 0.6921, -0.4185, -0.0442, -0.3860]) tensor([0.4654, 0.1533, 0.2229, 0.1584]) -Greedy action tensor([ 0.4681, 0.0314, -0.1622, 0.0262]) tensor([0.3544, 0.2290, 0.1887, 0.2278]) -Greedy action tensor([ 0.5967, -0.5055, -0.1262, -0.1742]) tensor([0.4386, 0.1457, 0.2129, 0.2029]) -Greedy action tensor([ 0.6927, -0.3509, -0.1401, -0.1769]) tensor([0.4533, 0.1596, 0.1971, 0.1900]) -Greedy action tensor([ 0.8569, -0.6092, 0.0461, -0.5260]) tensor([0.5192, 0.1198, 0.2308, 0.1302]) -Greedy action tensor([ 0.3616, -0.1750, -0.1104, -0.3334]) tensor([0.3693, 0.2160, 0.2304, 0.1843]) -Greedy action tensor([ 0.7188, -0.5712, -0.1078, -0.4144]) tensor([0.4914, 0.1353, 0.2150, 0.1582]) -Greedy action tensor([ 0.7537, -0.1080, -0.1767, -0.1934]) tensor([0.4536, 0.1916, 0.1789, 0.1759]) -Greedy action tensor([ 0.5088, -0.2244, -0.0716, -0.0046]) tensor([0.3790, 0.1821, 0.2121, 0.2268]) -Greedy action tensor([ 0.9414, -0.4421, 0.1222, -0.6030]) tensor([0.5250, 0.1316, 0.2314, 0.1120]) -Greedy action tensor([ 0.4564, 0.0540, -0.1481, 0.0449]) tensor([0.3475, 0.2324, 0.1898, 0.2303]) -Greedy action tensor([ 1.0537, -0.5618, -0.1675, -0.5567]) tensor([0.5905, 0.1174, 0.1741, 0.1180]) -Greedy action tensor([ 0.5106, -0.4378, -0.2005, -0.2830]) tensor([0.4291, 0.1662, 0.2107, 0.1940]) -Greedy action tensor([ 0.8991, -0.8589, 0.0532, -0.5576]) tensor([0.5451, 0.0940, 0.2339, 0.1270]) -Greedy action tensor([ 0.6055, -0.3405, -0.0608, -0.2632]) tensor([0.4308, 0.1673, 0.2212, 0.1807]) -Greedy action tensor([ 0.5576, -0.3053, -0.0948, -0.1325]) tensor([0.4091, 0.1726, 0.2131, 0.2052]) -Greedy action tensor([ 1.0332, -0.4532, -0.0153, -0.4671]) tensor([0.5557, 0.1257, 0.1947, 0.1239]) -Greedy action tensor([ 0.5288, -0.6501, 0.0136, -0.3039]) tensor([0.4274, 0.1315, 0.2553, 0.1858]) -Greedy action tensor([ 1.0435, -0.5453, -0.1098, -0.2600]) tensor([0.5583, 0.1140, 0.1762, 0.1516]) -Greedy action tensor([ 0.8875, -0.4162, -0.0766, -0.4171]) tensor([0.5197, 0.1411, 0.1982, 0.1410]) -Greedy action tensor([-0.0558, -0.6830, 0.0418, 0.4126]) tensor([0.2362, 0.1261, 0.2604, 0.3773]) -Greedy action tensor([-0.5521, -0.1034, -0.3669, -0.2880]) tensor([0.1972, 0.3088, 0.2373, 0.2568]) -Greedy action tensor([0.5598, 0.1653, 0.7793, 0.5038]) tensor([0.2587, 0.1744, 0.3222, 0.2446]) -Greedy action tensor([ 1.3456, -0.0098, 0.5051, 0.7745]) tensor([0.4436, 0.1144, 0.1914, 0.2506]) -Greedy action tensor([-0.1773, -0.4304, 1.0039, -0.6487]) tensor([0.1767, 0.1372, 0.5758, 0.1103]) -Greedy action tensor([ 0.6180, -0.0271, 0.1462, -0.0305]) tensor([0.3744, 0.1964, 0.2335, 0.1957]) -Greedy action tensor([-0.2100, -0.7683, -0.8396, -1.3505]) tensor([0.4124, 0.2360, 0.2197, 0.1318]) -Greedy action tensor([-0.7938, -1.0485, 1.0563, -0.6433]) tensor([0.1075, 0.0834, 0.6841, 0.1250]) -Greedy action tensor([ 0.5046, 0.1046, -0.1837, -0.1068]) tensor([0.3683, 0.2469, 0.1850, 0.1998]) -Greedy action tensor([ 0.2049, 0.1267, 1.5721, -0.7707]) tensor([0.1606, 0.1485, 0.6303, 0.0605]) -Greedy action tensor([ 0.4782, -0.1258, 0.1699, -0.0252]) tensor([0.3465, 0.1894, 0.2546, 0.2095]) -Greedy action tensor([ 0.2626, 1.1286, -0.3572, -1.1257]) tensor([0.2401, 0.5708, 0.1292, 0.0599]) -Greedy action tensor([-1.1525, -1.0749, 1.2248, -1.6081]) tensor([0.0741, 0.0801, 0.7988, 0.0470]) -Greedy action tensor([ 0.4428, -1.0547, 0.0037, 0.4735]) tensor([0.3449, 0.0771, 0.2223, 0.3556]) -Greedy action tensor([-0.7086, -0.0043, 0.6550, -0.7181]) tensor([0.1262, 0.2552, 0.4935, 0.1250]) -Greedy action tensor([ 0.7055, -0.5719, 0.2197, -0.4832]) tensor([0.4548, 0.1268, 0.2798, 0.1385]) -Greedy action tensor([ 0.9027, -0.1961, 0.8226, 0.9805]) tensor([0.2996, 0.0999, 0.2766, 0.3239]) -Greedy action tensor([ 0.3113, -2.2145, -0.2892, 0.3671]) tensor([0.3723, 0.0298, 0.2042, 0.3937]) -Greedy action tensor([-0.4836, -1.2330, 0.2803, 0.6177]) tensor([0.1509, 0.0713, 0.3239, 0.4539]) -Greedy action tensor([ 0.7735, -0.5961, -0.4222, -0.2243]) tensor([0.5194, 0.1320, 0.1571, 0.1915]) -Greedy action tensor([ 1.0686, -1.0181, -1.0548, -0.5072]) tensor([0.6894, 0.0855, 0.0825, 0.1426]) -Greedy action tensor([ 2.6997, -1.5611, 0.4897, 1.2235]) tensor([0.7395, 0.0104, 0.0811, 0.1690]) -Greedy action tensor([ 0.4603, -0.3664, -0.0060, -0.0746]) tensor([0.3773, 0.1651, 0.2367, 0.2210]) -Greedy action tensor([ 0.2770, -0.5557, 0.9147, 0.5425]) tensor([0.2159, 0.0939, 0.4086, 0.2816]) -Greedy action tensor([1.7169, 0.0838, 0.3794, 0.5725]) tensor([0.5630, 0.1100, 0.1478, 0.1793]) -Greedy action tensor([-0.3135, -0.5750, -0.9128, -0.0686]) tensor([0.2780, 0.2141, 0.1527, 0.3552]) -Greedy action tensor([ 0.2361, -0.0901, -0.6582, -0.0315]) tensor([0.3453, 0.2492, 0.1412, 0.2643]) -Greedy action tensor([-0.9876, -2.6589, 0.3518, 0.4852]) tensor([0.1068, 0.0201, 0.4075, 0.4657]) -Greedy action tensor([ 0.6502, -0.6840, -0.2340, -0.0827]) tensor([0.4636, 0.1221, 0.1915, 0.2228]) -Greedy action tensor([-0.4246, -0.5101, -0.0876, 0.1202]) tensor([0.1983, 0.1820, 0.2778, 0.3419]) -Greedy action tensor([-0.5774, -0.1653, -0.7311, -0.1455]) tensor([0.2038, 0.3077, 0.1747, 0.3138]) -Greedy action tensor([-0.1213, 0.1745, 1.0550, -0.6691]) tensor([0.1622, 0.2181, 0.5259, 0.0938]) -Greedy action tensor([-0.0154, -0.8612, 0.3391, 1.2686]) tensor([0.1547, 0.0664, 0.2205, 0.5585]) -Greedy action tensor([-0.1312, -0.5643, 0.0103, -0.0331]) tensor([0.2562, 0.1661, 0.2951, 0.2826]) -Greedy action tensor([ 0.7135, -0.2319, -0.2919, -0.5607]) tensor([0.4916, 0.1910, 0.1799, 0.1375]) -Greedy action tensor([-0.3200, -1.3714, 0.2161, -0.7560]) tensor([0.2699, 0.0943, 0.4613, 0.1745]) -Greedy action tensor([-0.6848, -0.3530, -0.1915, -0.7834]) tensor([0.2025, 0.2822, 0.3317, 0.1835]) -Greedy action tensor([ 1.5228, -0.0573, 0.0389, 0.3331]) tensor([0.5757, 0.1186, 0.1305, 0.1752]) -Greedy action tensor([ 0.3219, -1.0959, 0.0295, 0.3857]) tensor([0.3274, 0.0793, 0.2444, 0.3489]) -Greedy action tensor([-1.1178, -0.9598, 0.8248, -0.2978]) tensor([0.0876, 0.1026, 0.6110, 0.1988]) -Greedy action tensor([ 1.1233, -0.7591, 0.2114, 0.8967]) tensor([0.4253, 0.0647, 0.1709, 0.3391]) -Greedy action tensor([ 0.1751, -1.3119, 0.6555, -0.0660]) tensor([0.2756, 0.0623, 0.4455, 0.2165]) -Greedy action tensor([-0.3156, -0.9076, -0.6383, 0.2510]) tensor([0.2475, 0.1369, 0.1793, 0.4362]) -Greedy action tensor([ 0.8921, -0.4151, 0.4642, -0.0250]) tensor([0.4307, 0.1165, 0.2807, 0.1721]) -Greedy action tensor([-0.4644, -0.3515, -0.7941, 0.2027]) tensor([0.2089, 0.2339, 0.1502, 0.4070]) -Greedy action tensor([-0.7780, -0.5509, -0.3675, -0.8634]) tensor([0.2136, 0.2681, 0.3221, 0.1962]) -Greedy action tensor([ 0.3259, -0.5263, -0.4740, 0.0321]) tensor([0.3815, 0.1627, 0.1714, 0.2844]) -Greedy action tensor([ 0.3857, -0.5958, 0.2994, 0.7631]) tensor([0.2666, 0.0999, 0.2446, 0.3889]) -Greedy action tensor([-0.3385, 0.0339, 0.2578, -1.0594]) tensor([0.2104, 0.3053, 0.3820, 0.1023]) -Greedy action tensor([ 0.5288, -0.1988, 0.5571, -0.2191]) tensor([0.3350, 0.1618, 0.3446, 0.1586]) -Greedy action tensor([-0.6219, -0.6828, 0.5279, 0.0905]) tensor([0.1401, 0.1318, 0.4424, 0.2857]) -Greedy action tensor([ 1.0502, -0.2284, 0.4778, 0.7553]) tensor([0.3865, 0.1076, 0.2181, 0.2878]) -Greedy action tensor([ 0.3728, 0.3985, 1.1592, -0.0628]) tensor([0.2054, 0.2108, 0.4510, 0.1329]) -Greedy action tensor([-1.1024, -0.8838, 0.1722, 0.1547]) tensor([0.1071, 0.1333, 0.3831, 0.3765]) -Greedy action tensor([ 0.3919, -0.4158, -0.0051, -0.6100]) tensor([0.4024, 0.1794, 0.2705, 0.1477]) -Greedy action tensor([-0.1144, -0.8127, -0.3062, -0.2357]) tensor([0.3116, 0.1550, 0.2573, 0.2761]) -Greedy action tensor([ 1.0430, -0.3019, -0.5884, 0.2020]) tensor([0.5298, 0.1380, 0.1037, 0.2285]) -Greedy action tensor([ 0.3919, -0.8721, 0.1863, -0.3056]) tensor([0.3854, 0.1089, 0.3138, 0.1919]) -Greedy action tensor([ 0.5463, 0.2654, -0.3657, -0.0754]) tensor([0.3712, 0.2803, 0.1491, 0.1994]) -Greedy action tensor([ 0.8445, -1.7320, -0.2546, 0.9298]) tensor([0.4003, 0.0304, 0.1334, 0.4359]) -Greedy action tensor([ 0.7310, -0.8271, 1.1116, 0.6375]) tensor([0.2790, 0.0587, 0.4082, 0.2541]) -Greedy action tensor([-1.5302, -0.0795, -0.5477, -0.5392]) tensor([0.0941, 0.4013, 0.2513, 0.2534]) -Greedy action tensor([-0.6572, -0.1869, -0.4612, -0.0234]) tensor([0.1754, 0.2807, 0.2134, 0.3306]) -Greedy action tensor([-0.2202, 0.1189, 0.5198, -0.6855]) tensor([0.1950, 0.2737, 0.4088, 0.1225]) -Greedy action tensor([ 0.7665, -1.2331, 0.8159, 1.1582]) tensor([0.2728, 0.0369, 0.2866, 0.4036]) -Greedy action tensor([-0.6755, -0.4451, -0.5329, 0.3610]) tensor([0.1605, 0.2021, 0.1851, 0.4524]) -Greedy action tensor([ 0.3467, -0.8233, 0.8402, -0.7107]) tensor([0.3034, 0.0942, 0.4970, 0.1054]) -Greedy action tensor([ 0.9249, 0.1060, -0.4073, -0.0810]) tensor([0.4830, 0.2129, 0.1274, 0.1766]) -Greedy action tensor([-1.6876, -0.4226, -1.3254, 0.3170]) tensor([0.0746, 0.2643, 0.1072, 0.5539]) -Greedy action tensor([-0.8448, -0.6553, 0.6736, -1.4129]) tensor([0.1362, 0.1647, 0.6219, 0.0772]) -Greedy action tensor([ 1.0419, -0.3128, 0.0355, 0.1819]) tensor([0.4886, 0.1261, 0.1786, 0.2068]) -Greedy action tensor([-0.2621, -0.5364, 0.3654, 0.1086]) tensor([0.1968, 0.1496, 0.3686, 0.2851]) -Greedy action tensor([ 0.6124, 0.3247, -0.1621, -0.1827]) tensor([0.3756, 0.2817, 0.1731, 0.1696]) -Greedy action tensor([ 0.4850, -0.1286, -0.5146, -0.0657]) tensor([0.4023, 0.2178, 0.1480, 0.2319]) -Greedy action tensor([-0.7519, -0.2621, -0.2318, -0.0564]) tensor([0.1583, 0.2583, 0.2662, 0.3173]) -Greedy action tensor([ 1.2529, -0.9758, -0.2396, 0.6899]) tensor([0.5258, 0.0566, 0.1182, 0.2994]) -Greedy action tensor([-0.1196, -0.6267, 0.2336, 0.1440]) tensor([0.2311, 0.1392, 0.3290, 0.3008]) -Greedy action tensor([ 0.6521, -0.9337, -0.5901, 0.2399]) tensor([0.4639, 0.0950, 0.1339, 0.3072]) -Greedy action tensor([-0.9649, -0.6536, 0.2713, -1.4146]) tensor([0.1552, 0.2118, 0.5341, 0.0990]) -Greedy action tensor([ 1.1262, -0.6160, -0.5843, 0.7579]) tensor([0.4883, 0.0855, 0.0883, 0.3379]) -Greedy action tensor([-1.0186, -0.0136, 0.2993, -0.2052]) tensor([0.1029, 0.2810, 0.3842, 0.2320]) -Greedy action tensor([ 0.2355, 0.5281, 0.7393, -0.6166]) tensor([0.2262, 0.3030, 0.3743, 0.0965]) -Greedy action tensor([ 1.6455, -0.2333, -0.4473, 0.4861]) tensor([0.6290, 0.0961, 0.0776, 0.1973]) -Greedy action tensor([ 1.3069, -0.6160, 0.0313, 0.4177]) tensor([0.5445, 0.0796, 0.1521, 0.2238]) -Greedy action tensor([ 1.0303, -0.2975, -0.5665, -0.0156]) tensor([0.5498, 0.1457, 0.1113, 0.1932]) -Greedy action tensor([ 1.7808, -0.9540, -0.3173, 0.3401]) tensor([0.7021, 0.0456, 0.0861, 0.1662]) -Greedy action tensor([ 0.5622, -0.2085, -0.5187, 0.8333]) tensor([0.3212, 0.1486, 0.1090, 0.4212]) -Greedy action tensor([ 1.6664, -0.7683, -0.0729, 0.5145]) tensor([0.6332, 0.0555, 0.1112, 0.2001]) -Greedy action tensor([ 2.1798, -1.4796, -0.0030, 0.1505]) tensor([0.7875, 0.0203, 0.0888, 0.1035]) -Greedy action tensor([ 2.0190, -1.0604, -0.4262, 0.4311]) tensor([0.7479, 0.0344, 0.0648, 0.1528]) -Greedy action tensor([ 1.0917, 0.1662, -0.5236, -0.0187]) tensor([0.5196, 0.2059, 0.1033, 0.1712]) -Greedy action tensor([ 1.2089, -0.3633, -0.4867, 0.1491]) tensor([0.5755, 0.1195, 0.1056, 0.1994]) -Greedy action tensor([ 1.6732, -0.8821, -0.2299, 0.2200]) tensor([0.6847, 0.0532, 0.1021, 0.1601]) -Greedy action tensor([ 1.7955, -0.9985, -0.1745, 0.8508]) tensor([0.6292, 0.0385, 0.0877, 0.2446]) -Greedy action tensor([ 1.2732, -0.1389, -1.0240, 0.5033]) tensor([0.5533, 0.1348, 0.0556, 0.2562]) -Greedy action tensor([ 1.3653, -0.7651, -0.5121, 0.2953]) tensor([0.6193, 0.0736, 0.0947, 0.2124]) -Greedy action tensor([ 1.8452, -0.9288, -0.1221, 0.4213]) tensor([0.6930, 0.0433, 0.0969, 0.1669]) -Greedy action tensor([ 1.1570, -0.4244, -0.6012, 0.6043]) tensor([0.5119, 0.1053, 0.0882, 0.2946]) -Greedy action tensor([ 1.2615, -0.0688, -0.8637, 0.5049]) tensor([0.5397, 0.1427, 0.0644, 0.2532]) -Greedy action tensor([ 1.4978, -0.8803, -0.4748, 0.5460]) tensor([0.6181, 0.0573, 0.0860, 0.2386]) -Greedy action tensor([ 1.6351, -0.4810, -0.9953, 0.1747]) tensor([0.7019, 0.0846, 0.0506, 0.1629]) -Greedy action tensor([ 1.3581, -0.7170, -0.1963, -0.0940]) tensor([0.6366, 0.0799, 0.1345, 0.1490]) -Greedy action tensor([ 0.9653, -0.6009, -0.2240, -0.0288]) tensor([0.5310, 0.1109, 0.1616, 0.1965]) -Greedy action tensor([ 1.8763, 0.2210, -0.1246, 0.1877]) tensor([0.6618, 0.1264, 0.0895, 0.1223]) -Greedy action tensor([ 1.0274, -0.0815, -0.6338, -0.0166]) tensor([0.5342, 0.1763, 0.1015, 0.1881]) -Greedy action tensor([ 2.5963, -0.9348, 0.1102, 0.9709]) tensor([0.7638, 0.0224, 0.0636, 0.1503]) -Greedy action tensor([ 1.1350, -0.1973, -0.6220, 0.2468]) tensor([0.5412, 0.1428, 0.0934, 0.2226]) -Greedy action tensor([ 1.8136, -0.2061, -0.6436, 0.3087]) tensor([0.6942, 0.0921, 0.0595, 0.1542]) -Greedy action tensor([ 1.3527, -0.3677, -0.4218, 0.1335]) tensor([0.6083, 0.1089, 0.1031, 0.1797]) -Greedy action tensor([ 1.7024, -0.7324, -0.5901, 0.1680]) tensor([0.7122, 0.0624, 0.0719, 0.1535]) -Greedy action tensor([ 1.7424, -0.5728, -0.4180, 0.0916]) tensor([0.7113, 0.0702, 0.0820, 0.1365]) -Greedy action tensor([ 1.7595, -0.4076, -0.1380, 0.2067]) tensor([0.6774, 0.0776, 0.1016, 0.1434]) -Greedy action tensor([ 1.3566, -0.5896, 0.0656, 0.1416]) tensor([0.5833, 0.0833, 0.1604, 0.1731]) -Greedy action tensor([ 1.5938, -0.7315, -0.0843, -0.0299]) tensor([0.6749, 0.0660, 0.1260, 0.1331]) -Greedy action tensor([ 1.5582, -0.9829, -0.1220, 0.5950]) tensor([0.6072, 0.0478, 0.1132, 0.2318]) -Greedy action tensor([ 1.3400, -0.0601, 0.0384, 0.4118]) tensor([0.5225, 0.1288, 0.1422, 0.2065]) -Greedy action tensor([ 1.6984, 0.1376, -0.6968, 0.4501]) tensor([0.6297, 0.1322, 0.0574, 0.1807]) -Greedy action tensor([ 1.8562, -0.9243, -0.9074, 0.4127]) tensor([0.7347, 0.0456, 0.0463, 0.1734]) -Greedy action tensor([ 1.7357, -0.8916, -0.2269, 0.6127]) tensor([0.6502, 0.0470, 0.0913, 0.2115]) -Greedy action tensor([ 0.9679, -0.3706, -0.5362, -0.1050]) tensor([0.5475, 0.1436, 0.1217, 0.1873]) -Greedy action tensor([ 2.3440, -0.1452, -0.6433, -0.0141]) tensor([0.8143, 0.0676, 0.0411, 0.0770]) -Greedy action tensor([ 1.4756, 0.0168, -1.1639, 0.0886]) tensor([0.6436, 0.1497, 0.0460, 0.1608]) -Greedy action tensor([ 1.7517, -1.1423, -0.3709, 0.6083]) tensor([0.6694, 0.0371, 0.0801, 0.2134]) -Greedy action tensor([ 1.9016, -0.7732, -0.5606, 0.5970]) tensor([0.7015, 0.0483, 0.0598, 0.1903]) -Greedy action tensor([ 0.9816, -0.1122, -0.5052, 0.1293]) tensor([0.5032, 0.1685, 0.1138, 0.2146]) -Greedy action tensor([ 1.1341, -0.1276, -0.5335, 0.2803]) tensor([0.5270, 0.1492, 0.0994, 0.2244]) -Greedy action tensor([ 2.0126, 0.3234, -0.1467, 0.1806]) tensor([0.6848, 0.1265, 0.0790, 0.1096]) -Greedy action tensor([ 1.6256, -0.5066, -0.4600, 0.2827]) tensor([0.6649, 0.0788, 0.0826, 0.1736]) -Greedy action tensor([ 2.1300, -0.6953, -0.3977, 0.0399]) tensor([0.7919, 0.0470, 0.0632, 0.0979]) -Greedy action tensor([ 1.1583, 0.1702, -0.3559, 0.1169]) tensor([0.5141, 0.1914, 0.1131, 0.1815]) -Greedy action tensor([ 1.1668, -0.6116, -0.5630, 0.5750]) tensor([0.5264, 0.0889, 0.0934, 0.2913]) -Greedy action tensor([ 1.3971, -0.0516, 0.0161, 0.1146]) tensor([0.5670, 0.1332, 0.1425, 0.1573]) -Greedy action tensor([ 1.1057, -0.3745, -0.3454, 0.4213]) tensor([0.5085, 0.1158, 0.1192, 0.2565]) -Greedy action tensor([ 1.8868, -1.1139, -0.1055, 0.3396]) tensor([0.7148, 0.0356, 0.0975, 0.1521]) -Greedy action tensor([ 1.4189, -0.2766, -0.1881, 0.2315]) tensor([0.5921, 0.1087, 0.1187, 0.1806]) -Greedy action tensor([ 1.2992, -0.4357, 0.0194, 0.2328]) tensor([0.5559, 0.0981, 0.1546, 0.1914]) -Greedy action tensor([ 1.3110, -0.4516, -0.5008, 0.4319]) tensor([0.5714, 0.0981, 0.0933, 0.2372]) -Greedy action tensor([ 0.9731, -0.6478, -0.2147, -0.3257]) tensor([0.5632, 0.1114, 0.1717, 0.1537]) -Greedy action tensor([ 1.7906, -1.1071, -0.2936, 0.3220]) tensor([0.7093, 0.0391, 0.0882, 0.1633]) -Greedy action tensor([ 1.2422, -0.5537, 0.0024, 0.1489]) tensor([0.5585, 0.0927, 0.1616, 0.1872]) -Greedy action tensor([ 1.3407, -0.2882, -0.4676, 0.4256]) tensor([0.5680, 0.1114, 0.0931, 0.2275]) -Greedy action tensor([ 1.5002, -0.3148, -0.4012, 0.0913]) tensor([0.6424, 0.1046, 0.0960, 0.1570]) -Greedy action tensor([ 1.6093, -0.6388, -0.3083, 0.7303]) tensor([0.5996, 0.0633, 0.0881, 0.2490]) -Greedy action tensor([ 1.1075, -0.4700, -0.0703, 0.0256]) tensor([0.5396, 0.1114, 0.1661, 0.1829]) -Greedy action tensor([ 0.9596, -0.2714, -0.5022, 0.5207]) tensor([0.4611, 0.1347, 0.1069, 0.2973]) -Greedy action tensor([ 1.5405, -0.2049, -0.4271, 0.5536]) tensor([0.5927, 0.1035, 0.0829, 0.2209]) -Greedy action tensor([ 1.4090, -0.1237, -0.3886, 0.3064]) tensor([0.5835, 0.1260, 0.0967, 0.1937]) -Greedy action tensor([2.4530, 0.7873, 0.4270, 0.2623]) tensor([0.6979, 0.1320, 0.0920, 0.0781]) -Greedy action tensor([ 1.5263, -0.3083, -0.6632, 0.6093]) tensor([0.5983, 0.0955, 0.0670, 0.2392]) -Greedy action tensor([ 1.3542, -0.3069, -0.6572, 0.2284]) tensor([0.6068, 0.1152, 0.0812, 0.1968]) -Greedy action tensor([ 1.9974, -0.9085, -0.2657, 0.6207]) tensor([0.7086, 0.0388, 0.0737, 0.1789]) -Greedy action tensor([ 1.6050, -0.8392, -1.0000, 0.3053]) tensor([0.6977, 0.0606, 0.0516, 0.1902]) -Greedy action tensor([ 1.3854, -0.6393, -0.3359, 0.1855]) tensor([0.6203, 0.0819, 0.1109, 0.1869]) -Greedy action tensor([ 1.2123, -0.3727, -0.8077, 0.2793]) tensor([0.5777, 0.1184, 0.0766, 0.2272]) -Greedy action tensor([ 1.3769, -0.5497, -0.5123, 0.0307]) tensor([0.6422, 0.0935, 0.0971, 0.1671]) -Greedy action tensor([ 1.7611, -0.5251, -0.4815, 0.3950]) tensor([0.6836, 0.0695, 0.0726, 0.1744]) -Greedy action tensor([ 1.2362, -0.2193, -0.6062, 0.0917]) tensor([0.5848, 0.1364, 0.0926, 0.1862]) -Greedy action tensor([ 0.4278, -0.1165, 0.0349, 0.0658]) tensor([0.3388, 0.1966, 0.2287, 0.2359]) -Greedy action tensor([ 1.0039, -0.7621, -0.5304, -0.1601]) tensor([0.5886, 0.1007, 0.1269, 0.1838]) -Greedy action tensor([ 0.9526, -0.2175, -0.2352, 0.1956]) tensor([0.4798, 0.1489, 0.1463, 0.2250]) -Greedy action tensor([ 1.0795, -0.4757, -0.1915, 0.4633]) tensor([0.4922, 0.1039, 0.1381, 0.2658]) -Greedy action tensor([ 1.9126, -0.5298, -0.2958, 0.3794]) tensor([0.7079, 0.0616, 0.0778, 0.1528]) -Greedy action tensor([ 0.8127, -0.1480, 0.0804, -0.0751]) tensor([0.4396, 0.1682, 0.2114, 0.1809]) -Greedy action tensor([-1.8451, -0.4621, 0.6225, -0.1341]) tensor([0.0448, 0.1787, 0.5285, 0.2480]) -Greedy action tensor([-1.9096, -0.4159, 0.6490, -0.1580]) tensor([0.0414, 0.1845, 0.5352, 0.2388]) -Greedy action tensor([-1.8217, -0.3994, 0.5957, -0.1118]) tensor([0.0457, 0.1894, 0.5124, 0.2525]) -Greedy action tensor([-1.8776, -0.4232, 0.6508, -0.1477]) tensor([0.0426, 0.1826, 0.5343, 0.2405]) -Greedy action tensor([-1.3830, -0.5168, 0.7098, 0.6239]) tensor([0.0528, 0.1256, 0.4284, 0.3931]) -Greedy action tensor([-1.7208, -0.3325, 0.6015, -0.2646]) tensor([0.0513, 0.2056, 0.5231, 0.2200]) -Greedy action tensor([-1.3042, -0.0592, 0.5196, -0.4989]) tensor([0.0775, 0.2691, 0.4800, 0.1734]) -Greedy action tensor([-1.8510, -0.4665, 0.6102, -0.1292]) tensor([0.0448, 0.1790, 0.5254, 0.2508]) -Greedy action tensor([-1.9364, -0.4386, 0.6640, -0.1738]) tensor([0.0404, 0.1805, 0.5438, 0.2353]) -Greedy action tensor([-1.5407, -0.5104, 0.5072, 0.2039]) tensor([0.0579, 0.1622, 0.4486, 0.3313]) -Greedy action tensor([-1.9072, -0.3928, 0.6449, -0.1554]) tensor([0.0414, 0.1883, 0.5315, 0.2388]) -Greedy action tensor([-1.8901, -0.4540, 0.6434, -0.1523]) tensor([0.0426, 0.1790, 0.5364, 0.2421]) -Greedy action tensor([-1.9053, -0.4491, 0.6496, -0.1610]) tensor([0.0419, 0.1796, 0.5389, 0.2396]) -Greedy action tensor([-1.9376, -0.4055, 0.6559, -0.1815]) tensor([0.0403, 0.1866, 0.5395, 0.2335]) -Greedy action tensor([-1.0610, 0.0944, 0.2162, 0.1998]) tensor([0.0886, 0.2813, 0.3177, 0.3125]) -Greedy action tensor([-1.9261, -0.4389, 0.6599, -0.1695]) tensor([0.0408, 0.1806, 0.5420, 0.2365]) -Greedy action tensor([-1.8762, -0.3245, 0.6200, -0.1239]) tensor([0.0423, 0.1998, 0.5137, 0.2442]) -Greedy action tensor([-1.7434, -0.0228, 0.5158, -0.0135]) tensor([0.0459, 0.2563, 0.4392, 0.2587]) -Greedy action tensor([-1.8427, -0.4578, 0.6350, -0.0993]) tensor([0.0442, 0.1765, 0.5266, 0.2527]) -Greedy action tensor([-1.2843, 0.4901, 0.2603, -0.0500]) tensor([0.0666, 0.3926, 0.3120, 0.2288]) -Greedy action tensor([-1.2357, 0.3088, 0.2883, -0.0905]) tensor([0.0745, 0.3492, 0.3421, 0.2342]) -Greedy action tensor([-1.8439, -0.3634, 0.6051, -0.1455]) tensor([0.0446, 0.1959, 0.5160, 0.2436]) -Greedy action tensor([-1.9216, -0.3763, 0.6359, -0.1776]) tensor([0.0411, 0.1929, 0.5307, 0.2353]) -Greedy action tensor([-1.4327, 0.1789, 0.3861, -0.1009]) tensor([0.0626, 0.3139, 0.3862, 0.2373]) -Greedy action tensor([-1.7006, -0.0988, 0.5307, -0.1779]) tensor([0.0504, 0.2499, 0.4689, 0.2309]) -Greedy action tensor([-1.9358, -0.4406, 0.6638, -0.1745]) tensor([0.0404, 0.1803, 0.5440, 0.2353]) -Greedy action tensor([-1.8284, -0.3022, 0.5917, -0.1069]) tensor([0.0446, 0.2050, 0.5012, 0.2492]) -Greedy action tensor([-1.9201, -0.4437, 0.6500, -0.1692]) tensor([0.0413, 0.1808, 0.5399, 0.2380]) -Greedy action tensor([-0.9450, -0.7848, 1.4211, 1.2027]) tensor([0.0467, 0.0549, 0.4981, 0.4003]) -Greedy action tensor([-1.7947, -0.4871, 0.5871, -0.0985]) tensor([0.0477, 0.1763, 0.5161, 0.2600]) -Greedy action tensor([-1.5995, -0.1696, 0.5207, 0.0767]) tensor([0.0530, 0.2216, 0.4419, 0.2835]) -Greedy action tensor([-1.8293, -0.3877, 0.6162, -0.1072]) tensor([0.0447, 0.1891, 0.5159, 0.2503]) -Greedy action tensor([-1.9051, -0.4017, 0.6524, -0.1553]) tensor([0.0414, 0.1862, 0.5342, 0.2382]) -Greedy action tensor([-1.3753, 0.0343, 0.5210, 0.5239]) tensor([0.0542, 0.2221, 0.3613, 0.3624]) -Greedy action tensor([-1.9441, -0.4460, 0.6667, -0.1794]) tensor([0.0401, 0.1795, 0.5461, 0.2343]) -Greedy action tensor([-1.8234, -0.3918, 0.5971, -0.1303]) tensor([0.0457, 0.1913, 0.5144, 0.2485]) -Greedy action tensor([-1.8900, -0.3949, 0.6425, -0.1485]) tensor([0.0421, 0.1878, 0.5299, 0.2402]) -Greedy action tensor([-1.8962, -0.4470, 0.6447, -0.1601]) tensor([0.0423, 0.1803, 0.5372, 0.2402]) -Greedy action tensor([-0.7542, -0.4000, 0.2479, -0.0952]) tensor([0.1412, 0.2012, 0.3846, 0.2729]) -Greedy action tensor([-1.9387, -0.4516, 0.6655, -0.1768]) tensor([0.0404, 0.1786, 0.5459, 0.2351]) -Greedy action tensor([-1.9327, -0.4369, 0.6610, -0.1745]) tensor([0.0406, 0.1811, 0.5429, 0.2354]) -Greedy action tensor([-1.9278, -0.4445, 0.6612, -0.1708]) tensor([0.0408, 0.1798, 0.5431, 0.2364]) -Greedy action tensor([-1.9218, -0.4200, 0.6542, -0.1675]) tensor([0.0410, 0.1839, 0.5384, 0.2367]) -Greedy action tensor([-1.9188, -0.4208, 0.6561, -0.1610]) tensor([0.0410, 0.1833, 0.5381, 0.2377]) -Greedy action tensor([-1.8084, -0.0966, 0.5431, -0.0853]) tensor([0.0442, 0.2446, 0.4638, 0.2474]) -Greedy action tensor([-1.9419, -0.4490, 0.6642, -0.1787]) tensor([0.0403, 0.1792, 0.5456, 0.2349]) -Greedy action tensor([-1.6253, -0.1226, 0.4800, -0.0906]) tensor([0.0545, 0.2450, 0.4476, 0.2529]) -Greedy action tensor([-1.8641, -0.4055, 0.6202, -0.1438]) tensor([0.0437, 0.1879, 0.5242, 0.2442]) -Greedy action tensor([0.9805, 0.5122, 0.6044, 1.4308]) tensor([0.2577, 0.1613, 0.1769, 0.4042]) -Greedy action tensor([-1.3883, -0.1789, 0.3737, -0.0210]) tensor([0.0709, 0.2377, 0.4130, 0.2783]) -Greedy action tensor([-0.6094, -0.9340, 0.8489, 0.0769]) tensor([0.1249, 0.0903, 0.5368, 0.2481]) -Greedy action tensor([-1.5764, -0.0206, 0.4886, 0.0189]) tensor([0.0539, 0.2554, 0.4250, 0.2657]) -Greedy action tensor([-1.6760, -0.4686, 0.5259, -0.0424]) tensor([0.0540, 0.1807, 0.4885, 0.2767]) -Greedy action tensor([-1.0734, 0.4223, 0.1899, 0.0766]) tensor([0.0823, 0.3671, 0.2909, 0.2598]) -Greedy action tensor([-1.9398, -0.4433, 0.6658, -0.1761]) tensor([0.0403, 0.1798, 0.5451, 0.2349]) -Greedy action tensor([-1.2205, -0.0028, 0.5032, 0.3268]) tensor([0.0681, 0.2302, 0.3817, 0.3200]) -Greedy action tensor([-1.9132, -0.4275, 0.6510, -0.1652]) tensor([0.0414, 0.1829, 0.5379, 0.2378]) -Greedy action tensor([-1.8655, -0.3867, 0.6225, -0.1676]) tensor([0.0437, 0.1917, 0.5259, 0.2387]) -Greedy action tensor([-1.9108, -0.3922, 0.6507, -0.1623]) tensor([0.0412, 0.1882, 0.5338, 0.2368]) -Greedy action tensor([-1.9000, -0.4429, 0.6697, -0.1463]) tensor([0.0414, 0.1779, 0.5413, 0.2393]) -Greedy action tensor([-1.5407, 0.2463, 0.5589, -0.5912]) tensor([0.0564, 0.3370, 0.4607, 0.1459]) -Greedy action tensor([-1.3302, 0.7211, 0.2523, 0.0590]) tensor([0.0566, 0.4405, 0.2756, 0.2272]) -Greedy action tensor([-1.2573, 0.6866, 0.2456, 0.0028]) tensor([0.0625, 0.4364, 0.2808, 0.2203]) -Greedy action tensor([-1.7928, -0.4762, 0.6785, 0.0211]) tensor([0.0440, 0.1643, 0.5214, 0.2702]) -Greedy action tensor([-1.8192, -0.1504, 0.5645, -0.0857]) tensor([0.0438, 0.2326, 0.4754, 0.2481]) -Greedy action tensor([-1.7569, -0.3981, 0.5823, -0.0657]) tensor([0.0483, 0.1881, 0.5013, 0.2622]) -Greedy action tensor([-1.8472, -0.1911, 0.5852, -0.1182]) tensor([0.0430, 0.2252, 0.4895, 0.2423]) -Greedy action tensor([-1.6845, -0.4360, 0.5278, -0.0556]) tensor([0.0534, 0.1862, 0.4881, 0.2723]) -Greedy action tensor([-1.0462, -0.2689, 0.2708, 0.3032]) tensor([0.0929, 0.2021, 0.3468, 0.3582]) -Greedy action tensor([-1.2707, 0.4570, 0.2407, 0.0038]) tensor([0.0679, 0.3819, 0.3076, 0.2427]) -Greedy action tensor([-0.9544, -0.2587, 0.3159, 0.5538]) tensor([0.0902, 0.1809, 0.3213, 0.4076]) -Greedy action tensor([-1.8839, -0.2796, 0.6254, -0.1294]) tensor([0.0416, 0.2068, 0.5113, 0.2403]) -Greedy action tensor([-1.9126, -0.4327, 0.6530, -0.1612]) tensor([0.0414, 0.1818, 0.5383, 0.2385]) -Greedy action tensor([-1.9326, -0.4462, 0.6679, -0.1723]) tensor([0.0405, 0.1790, 0.5452, 0.2353]) -Greedy action tensor([-1.7681, -0.5026, 0.5729, -0.0763]) tensor([0.0491, 0.1741, 0.5103, 0.2666]) -Greedy action tensor([-1.5587, -0.2849, 0.4167, 0.0591]) tensor([0.0594, 0.2124, 0.4285, 0.2997]) -Greedy action tensor([-1.2347, 0.0274, 0.5281, 0.2225]) tensor([0.0682, 0.2411, 0.3977, 0.2930]) -Greedy action tensor([-1.5634, 0.3311, 0.3489, 0.0071]) tensor([0.0520, 0.3458, 0.3521, 0.2501]) -Greedy action tensor([-1.9097, -0.4692, 0.6691, -0.1560]) tensor([0.0414, 0.1746, 0.5451, 0.2389]) -Greedy action tensor([-1.8497, -0.4394, 0.6850, -0.0345]) tensor([0.0419, 0.1718, 0.5288, 0.2575]) -Greedy action tensor([-1.7898, -0.0437, 0.5442, -0.0867]) tensor([0.0444, 0.2543, 0.4578, 0.2436]) -Greedy action tensor([ 0.9770, -0.7863, 0.0550, -0.7306]) tensor([0.5713, 0.0980, 0.2272, 0.1036]) -Greedy action tensor([ 0.4976, -0.3374, -0.1135, -0.1450]) tensor([0.3996, 0.1734, 0.2169, 0.2102]) -Greedy action tensor([ 0.7948, -0.5077, -0.0325, -0.4727]) tensor([0.5024, 0.1366, 0.2196, 0.1414]) -Greedy action tensor([ 0.7960, -0.6692, 0.0150, -0.1841]) tensor([0.4844, 0.1119, 0.2219, 0.1818]) -Greedy action tensor([ 0.7252, -0.2546, -0.0344, -0.1267]) tensor([0.4406, 0.1654, 0.2061, 0.1879]) -Greedy action tensor([ 0.8193, -0.1577, -0.0901, -0.0261]) tensor([0.4528, 0.1704, 0.1824, 0.1944]) -Greedy action tensor([ 0.4785, -0.3403, -0.1853, -0.1758]) tensor([0.4039, 0.1781, 0.2080, 0.2100]) -Greedy action tensor([ 0.6453, -0.5865, -0.0533, -0.1273]) tensor([0.4443, 0.1296, 0.2209, 0.2052]) -Greedy action tensor([ 1.1072, 0.1287, -0.1360, -0.4618]) tensor([0.5340, 0.2007, 0.1540, 0.1112]) -Greedy action tensor([ 0.1666, 0.0689, -0.1518, -0.3000]) tensor([0.3066, 0.2781, 0.2230, 0.1923]) -Greedy action tensor([ 0.5416, 0.0282, -0.1718, -0.0869]) tensor([0.3814, 0.2283, 0.1869, 0.2034]) -Greedy action tensor([ 0.8350, -0.7244, -0.0401, -0.4815]) tensor([0.5277, 0.1109, 0.2199, 0.1415]) -Greedy action tensor([ 0.7287, 0.2832, -0.0691, -0.1848]) tensor([0.4013, 0.2570, 0.1807, 0.1610]) -Greedy action tensor([ 0.5832, 0.1712, -0.0369, -0.4096]) tensor([0.3890, 0.2576, 0.2092, 0.1441]) -Greedy action tensor([ 0.4960, -0.3531, 0.0078, -0.5140]) tensor([0.4157, 0.1778, 0.2551, 0.1514]) -Greedy action tensor([ 1.1349, -0.3431, -0.3025, -0.2591]) tensor([0.5835, 0.1331, 0.1386, 0.1448]) -Greedy action tensor([ 0.6608, -0.5065, -0.0355, -0.3829]) tensor([0.4626, 0.1440, 0.2306, 0.1629]) -Greedy action tensor([ 0.7521, -0.3678, 0.0047, 0.0027]) tensor([0.4400, 0.1436, 0.2084, 0.2080]) -Greedy action tensor([ 0.8002, -0.6947, -0.1400, -0.2151]) tensor([0.5058, 0.1134, 0.1975, 0.1832]) -Greedy action tensor([ 1.0951, -1.0567, 0.2100, -0.5219]) tensor([0.5789, 0.0673, 0.2389, 0.1149]) -Greedy action tensor([ 0.7923, -0.8618, -0.0917, -0.3375]) tensor([0.5188, 0.0992, 0.2143, 0.1676]) -Greedy action tensor([ 1.2821, -0.8067, -0.0512, -0.6145]) tensor([0.6504, 0.0805, 0.1715, 0.0976]) -Greedy action tensor([ 0.2039, 0.0611, -0.2258, 0.1405]) tensor([0.2893, 0.2508, 0.1883, 0.2716]) -Greedy action tensor([ 1.0953, -0.2801, -0.0846, -0.1926]) tensor([0.5447, 0.1377, 0.1674, 0.1502]) -Greedy action tensor([ 0.5214, -0.3547, -0.0366, -0.2443]) tensor([0.4075, 0.1697, 0.2332, 0.1895]) -Greedy action tensor([ 0.6728, -0.4127, 0.0008, -0.1465]) tensor([0.4368, 0.1475, 0.2231, 0.1925]) -Greedy action tensor([ 0.4738, 0.1002, -0.0703, -0.3437]) tensor([0.3690, 0.2540, 0.2141, 0.1629]) -Greedy action tensor([ 1.0293, -0.8326, 0.0099, -0.6089]) tensor([0.5846, 0.0908, 0.2109, 0.1136]) -Greedy action tensor([ 0.4156, -0.2856, -0.0276, -0.2090]) tensor([0.3740, 0.1855, 0.2401, 0.2003]) -Greedy action tensor([ 0.8298, -0.3933, -0.0765, -0.1719]) tensor([0.4841, 0.1425, 0.1956, 0.1778]) -Greedy action tensor([ 0.6587, -0.5875, -0.1080, -0.1639]) tensor([0.4563, 0.1312, 0.2120, 0.2004]) -Greedy action tensor([ 0.7826, -0.6329, -0.0252, -0.3450]) tensor([0.4969, 0.1207, 0.2215, 0.1609]) -Greedy action tensor([ 0.7245, -0.2737, -0.0459, -0.0781]) tensor([0.4387, 0.1617, 0.2030, 0.1966]) -Greedy action tensor([ 0.8955, -0.5730, 0.0825, -0.3238]) tensor([0.5078, 0.1169, 0.2252, 0.1500]) -Greedy action tensor([ 0.8386, -0.5483, -0.0805, -0.5569]) tensor([0.5273, 0.1317, 0.2103, 0.1306]) -Greedy action tensor([ 0.6989, -0.4358, -0.1258, -0.2042]) tensor([0.4619, 0.1485, 0.2025, 0.1872]) -Greedy action tensor([ 0.5850, -0.6070, -0.1169, -0.2620]) tensor([0.4489, 0.1363, 0.2225, 0.1924]) -Greedy action tensor([ 0.9822, -0.7115, 0.1207, -0.5462]) tensor([0.5485, 0.1008, 0.2317, 0.1190]) -Greedy action tensor([ 0.5057, -0.2734, -0.0391, -0.0948]) tensor([0.3865, 0.1773, 0.2242, 0.2120]) -Greedy action tensor([ 0.6661, -0.4873, -0.0430, -0.4631]) tensor([0.4693, 0.1481, 0.2309, 0.1517]) -Greedy action tensor([ 0.9057, -0.6673, -0.0213, -0.1598]) tensor([0.5134, 0.1065, 0.2032, 0.1769]) -Greedy action tensor([ 0.7909, -0.3792, 0.1750, -0.0992]) tensor([0.4422, 0.1372, 0.2389, 0.1816]) -Greedy action tensor([ 0.3991, -0.2218, -0.1796, -0.0108]) tensor([0.3621, 0.1946, 0.2030, 0.2403]) -Greedy action tensor([ 0.8507, -0.5606, 0.1600, -0.3412]) tensor([0.4881, 0.1190, 0.2447, 0.1482]) -Greedy action tensor([ 0.8155, -0.8311, 0.0039, -0.5005]) tensor([0.5249, 0.1012, 0.2331, 0.1408]) -Greedy action tensor([ 0.7837, -0.4617, -0.0688, -0.2515]) tensor([0.4832, 0.1391, 0.2060, 0.1716]) -Greedy action tensor([ 0.5776, -0.4211, -0.0591, -0.4672]) tensor([0.4446, 0.1638, 0.2352, 0.1564]) -Greedy action tensor([ 0.7939, 0.0669, -0.0531, -0.1298]) tensor([0.4331, 0.2093, 0.1857, 0.1720]) -Greedy action tensor([ 1.0870, -0.6249, 0.0580, -0.6741]) tensor([0.5849, 0.1056, 0.2090, 0.1005]) -Greedy action tensor([ 0.4237, -0.0447, -0.0323, -0.1153]) tensor([0.3517, 0.2202, 0.2229, 0.2052]) -Greedy action tensor([ 0.5142, -0.5010, -0.1227, -0.1100]) tensor([0.4120, 0.1493, 0.2179, 0.2207]) -Greedy action tensor([ 1.1035, -0.7560, 0.4152, -0.6157]) tensor([0.5442, 0.0848, 0.2734, 0.0975]) -Greedy action tensor([ 0.9970, -0.5424, -0.1091, -0.3499]) tensor([0.5539, 0.1188, 0.1833, 0.1440]) -Greedy action tensor([ 1.0059, -0.5519, -0.0448, -0.3606]) tensor([0.5509, 0.1160, 0.1926, 0.1405]) -Greedy action tensor([ 1.0616, -0.4533, -0.0164, -0.2995]) tensor([0.5505, 0.1210, 0.1873, 0.1411]) -Greedy action tensor([ 0.8139, -0.6717, 0.2789, -0.6349]) tensor([0.4885, 0.1106, 0.2861, 0.1147]) -Greedy action tensor([ 0.4595, 0.1056, -0.2981, -0.0543]) tensor([0.3611, 0.2535, 0.1693, 0.2160]) -Greedy action tensor([ 0.9768, -0.5402, 0.0101, -0.2653]) tensor([0.5295, 0.1162, 0.2014, 0.1529]) -Greedy action tensor([ 0.9636, -0.8082, -0.2506, -0.9574]) tensor([0.6198, 0.1054, 0.1840, 0.0908]) -Greedy action tensor([ 0.9330, -0.7484, 0.0162, -0.4979]) tensor([0.5479, 0.1020, 0.2191, 0.1310]) -Greedy action tensor([ 0.8581, -0.5441, 0.1225, -0.1847]) tensor([0.4813, 0.1184, 0.2306, 0.1696]) -Greedy action tensor([ 0.9757, -0.6774, -0.1395, -0.6648]) tensor([0.5837, 0.1118, 0.1914, 0.1132]) -Greedy action tensor([ 0.9534, -0.3544, 0.0299, -0.5958]) tensor([0.5319, 0.1438, 0.2112, 0.1130]) -Greedy action tensor([ 0.9043, -0.3108, -0.1141, -0.4468]) tensor([0.5217, 0.1548, 0.1884, 0.1351]) -Greedy action tensor([ 0.9245, -0.6481, 0.1828, -0.3588]) tensor([0.5100, 0.1058, 0.2429, 0.1413]) -Greedy action tensor([ 0.8764, -0.3542, -0.0595, -0.1795]) tensor([0.4921, 0.1437, 0.1930, 0.1712]) -Greedy action tensor([ 0.7899, -0.4838, 0.0046, -0.2660]) tensor([0.4799, 0.1343, 0.2188, 0.1670]) -Greedy action tensor([ 0.8583, -0.4227, 0.0271, -0.2687]) tensor([0.4909, 0.1363, 0.2138, 0.1590]) -Greedy action tensor([ 0.7009, -0.2765, 0.0297, -0.0748]) tensor([0.4259, 0.1603, 0.2177, 0.1961]) -Greedy action tensor([ 0.5228, -0.4142, 0.0608, -0.4286]) tensor([0.4153, 0.1627, 0.2616, 0.1604]) -Greedy action tensor([ 0.2393, 0.0386, -0.1578, -0.2480]) tensor([0.3221, 0.2635, 0.2165, 0.1979]) -Greedy action tensor([ 0.7546, -0.4314, -0.0205, -0.2623]) tensor([0.4700, 0.1435, 0.2165, 0.1700]) -Greedy action tensor([ 0.6084, -0.4018, -0.0470, -0.3556]) tensor([0.4416, 0.1608, 0.2293, 0.1684]) -Greedy action tensor([ 0.4398, -0.3496, 0.0040, -0.1416]) tensor([0.3759, 0.1707, 0.2431, 0.2102]) -Greedy action tensor([ 0.9368, -0.8636, -0.0045, -0.5991]) tensor([0.5648, 0.0933, 0.2203, 0.1216]) -Greedy action tensor([ 0.3962, -0.3196, -0.0840, -0.2427]) tensor([0.3795, 0.1855, 0.2348, 0.2003]) -Greedy action tensor([ 0.9650, -0.6625, -0.1753, -0.3039]) tensor([0.5564, 0.1093, 0.1779, 0.1564]) -Greedy action tensor([ 0.9291, -0.9272, 0.0688, -0.5191]) tensor([0.5512, 0.0861, 0.2332, 0.1295]) -Greedy action tensor([ 0.3983, -0.1430, 0.0601, -0.0703]) tensor([0.3424, 0.1992, 0.2441, 0.2143]) -Greedy action tensor([ 0.4701, 0.0761, -0.0273, 0.1011]) tensor([0.3363, 0.2267, 0.2045, 0.2325]) -Greedy action tensor([ 0.5584, -0.2166, -0.0091, 0.0338]) tensor([0.3818, 0.1759, 0.2164, 0.2259]) -Greedy action tensor([ 1.7666, -0.8772, -0.8473, 0.4302]) tensor([0.7107, 0.0505, 0.0521, 0.1868]) -Greedy action tensor([ 1.4952, -0.8648, -0.3819, 0.5008]) tensor([0.6183, 0.0584, 0.0946, 0.2287]) -Greedy action tensor([ 1.1157, -0.2638, -0.5923, 0.1210]) tensor([0.5547, 0.1396, 0.1005, 0.2052]) -Greedy action tensor([ 1.6744, -0.5866, -0.7942, 0.2145]) tensor([0.7036, 0.0734, 0.0596, 0.1634]) -Greedy action tensor([ 1.4924, -0.1808, -0.3078, 0.5599]) tensor([0.5726, 0.1074, 0.0946, 0.2253]) -Greedy action tensor([ 2.6145, -1.7720, 0.0048, 0.6826]) tensor([0.8124, 0.0101, 0.0598, 0.1177]) -Greedy action tensor([ 2.0826, -0.9804, -0.4969, 1.1180]) tensor([0.6650, 0.0311, 0.0504, 0.2535]) -Greedy action tensor([ 1.3934, -0.4113, -0.1450, 0.0904]) tensor([0.6057, 0.0997, 0.1301, 0.1646]) -Greedy action tensor([ 1.7096, -0.2092, -1.0969, 0.4975]) tensor([0.6646, 0.0975, 0.0401, 0.1977]) -Greedy action tensor([ 1.8858, -1.0156, -0.3695, 0.6308]) tensor([0.6921, 0.0380, 0.0726, 0.1973]) -Greedy action tensor([ 1.5569, -0.1670, -0.6415, 0.4088]) tensor([0.6224, 0.1110, 0.0691, 0.1975]) -Greedy action tensor([ 1.1132, -0.2323, -0.9680, 0.4256]) tensor([0.5297, 0.1379, 0.0661, 0.2663]) -Greedy action tensor([ 1.4466, -0.3424, -0.9425, 0.4754]) tensor([0.6107, 0.1021, 0.0560, 0.2312]) -Greedy action tensor([ 1.0387, -0.5134, 0.0249, 0.1071]) tensor([0.5080, 0.1076, 0.1843, 0.2001]) -Greedy action tensor([ 0.9080, -0.0813, -0.1118, 0.0480]) tensor([0.4639, 0.1725, 0.1673, 0.1963]) -Greedy action tensor([ 1.8857, 0.3375, -0.3103, 0.4713]) tensor([0.6382, 0.1357, 0.0710, 0.1551]) -Greedy action tensor([ 1.0903, -0.2816, -0.5753, 0.5228]) tensor([0.4976, 0.1262, 0.0941, 0.2821]) -Greedy action tensor([ 1.6370, -0.6945, -0.4065, 0.3646]) tensor([0.6636, 0.0645, 0.0860, 0.1859]) -Greedy action tensor([ 1.3221, -0.5016, -0.3300, 0.2006]) tensor([0.5956, 0.0962, 0.1141, 0.1941]) -Greedy action tensor([ 2.0228, -0.9490, -0.3189, 0.3649]) tensor([0.7474, 0.0383, 0.0719, 0.1424]) -Greedy action tensor([ 1.6375, -0.5211, -0.6592, 0.5249]) tensor([0.6473, 0.0748, 0.0651, 0.2128]) -Greedy action tensor([ 1.5753, -0.6721, -0.4884, 0.4014]) tensor([0.6486, 0.0685, 0.0824, 0.2005]) -Greedy action tensor([ 1.1528, -0.3562, 0.0138, 0.3821]) tensor([0.4990, 0.1103, 0.1598, 0.2309]) -Greedy action tensor([ 1.3838, 0.0402, -1.0251, 0.2766]) tensor([0.5948, 0.1552, 0.0535, 0.1966]) -Greedy action tensor([ 1.3549, -0.2487, -0.2729, 0.3223]) tensor([0.5702, 0.1147, 0.1120, 0.2031]) -Greedy action tensor([ 1.3923, -0.5203, -0.2641, 0.1886]) tensor([0.6103, 0.0901, 0.1165, 0.1831]) -Greedy action tensor([ 2.0603, -1.0067, -0.5806, 0.7620]) tensor([0.7190, 0.0335, 0.0513, 0.1963]) -Greedy action tensor([ 2.1110, -0.1701, -1.3967, 0.2552]) tensor([0.7761, 0.0793, 0.0233, 0.1213]) -Greedy action tensor([ 1.2900, -0.1801, -1.0594, 0.2146]) tensor([0.6001, 0.1380, 0.0573, 0.2047]) -Greedy action tensor([ 1.0837, -0.4524, -0.5548, 0.1849]) tensor([0.5505, 0.1185, 0.1069, 0.2241]) -Greedy action tensor([ 1.1693, -0.7133, -0.0363, 0.4324]) tensor([0.5181, 0.0788, 0.1552, 0.2479]) -Greedy action tensor([ 1.4590, -0.4134, -0.5753, 0.3079]) tensor([0.6247, 0.0960, 0.0817, 0.1976]) -Greedy action tensor([ 1.4284, -0.4646, -0.1706, 0.2594]) tensor([0.6012, 0.0906, 0.1215, 0.1868]) -Greedy action tensor([ 1.7971, 0.0954, -1.0150, 0.2195]) tensor([0.6902, 0.1259, 0.0415, 0.1425]) -Greedy action tensor([ 1.6094, -0.7932, -0.2502, 0.3203]) tensor([0.6571, 0.0595, 0.1023, 0.1811]) -Greedy action tensor([ 2.0240, -1.0172, -0.2988, 0.1606]) tensor([0.7687, 0.0367, 0.0753, 0.1193]) -Greedy action tensor([ 1.3735, -0.2102, -0.9117, 0.3872]) tensor([0.5953, 0.1222, 0.0606, 0.2220]) -Greedy action tensor([ 1.4317, -0.3705, -0.1887, 0.1695]) tensor([0.6076, 0.1002, 0.1202, 0.1720]) -Greedy action tensor([ 1.2015, -0.4138, -0.1888, 0.1370]) tensor([0.5578, 0.1109, 0.1389, 0.1924]) -Greedy action tensor([ 1.8137, -0.5761, -0.5613, 1.0618]) tensor([0.6038, 0.0553, 0.0562, 0.2847]) -Greedy action tensor([ 1.6132, -0.2190, -1.3541, 0.0404]) tensor([0.7047, 0.1128, 0.0363, 0.1462]) -Greedy action tensor([ 0.7474, -0.0350, -0.2495, 0.2663]) tensor([0.4091, 0.1871, 0.1510, 0.2529]) -Greedy action tensor([ 2.0604, -0.7513, -0.5033, 0.2902]) tensor([0.7649, 0.0460, 0.0589, 0.1303]) -Greedy action tensor([ 1.3217, -0.2789, -0.7308, 0.4019]) tensor([0.5784, 0.1167, 0.0743, 0.2306]) -Greedy action tensor([ 1.0581, -0.2098, -0.7107, 0.2214]) tensor([0.5305, 0.1493, 0.0905, 0.2298]) -Greedy action tensor([ 1.3099, -0.5080, -0.4513, 0.4214]) tensor([0.5729, 0.0930, 0.0984, 0.2356]) -Greedy action tensor([ 1.0468, -0.1443, -0.7555, 0.3177]) tensor([0.5125, 0.1558, 0.0845, 0.2472]) -Greedy action tensor([ 2.4286, -1.1520, -0.1992, 0.7070]) tensor([0.7819, 0.0218, 0.0565, 0.1398]) -Greedy action tensor([ 2.9766, -1.5888, -0.2044, 0.8197]) tensor([0.8564, 0.0089, 0.0356, 0.0991]) -Greedy action tensor([ 1.0148, -0.2754, -0.1549, 0.1562]) tensor([0.4977, 0.1370, 0.1545, 0.2109]) -Greedy action tensor([ 1.6642, -0.1691, -0.0992, 0.6821]) tensor([0.5862, 0.0937, 0.1005, 0.2195]) -Greedy action tensor([ 1.9977, -0.6035, -0.4057, 0.6165]) tensor([0.7063, 0.0524, 0.0639, 0.1775]) -Greedy action tensor([ 1.9845, -1.1221, -0.7092, 0.1888]) tensor([0.7822, 0.0350, 0.0529, 0.1299]) -Greedy action tensor([ 1.4112, -0.1738, -0.1769, 0.0727]) tensor([0.5983, 0.1226, 0.1222, 0.1569]) -Greedy action tensor([ 1.2837, -0.4788, -0.3246, 0.0297]) tensor([0.6034, 0.1036, 0.1208, 0.1722]) -Greedy action tensor([ 1.9087, -0.3248, -0.3476, 0.0325]) tensor([0.7326, 0.0785, 0.0767, 0.1122]) -Greedy action tensor([ 1.5647, -0.7915, -0.1655, 0.4410]) tensor([0.6261, 0.0593, 0.1110, 0.2035]) -Greedy action tensor([ 1.7324, -0.6738, -0.3807, 0.3329]) tensor([0.6860, 0.0618, 0.0829, 0.1693]) -Greedy action tensor([ 1.0163, -0.1328, -0.2703, 0.1071]) tensor([0.5010, 0.1588, 0.1384, 0.2018]) -Greedy action tensor([ 1.6667, -0.6906, -0.6848, 0.4434]) tensor([0.6738, 0.0638, 0.0642, 0.1983]) -Greedy action tensor([ 1.7077, -0.5632, -0.8107, 0.0876]) tensor([0.7237, 0.0747, 0.0583, 0.1432]) -Greedy action tensor([ 1.3979, -0.2586, -0.5294, -0.0067]) tensor([0.6322, 0.1206, 0.0920, 0.1552]) -Greedy action tensor([ 1.8628, 0.1430, -0.1819, 0.4469]) tensor([0.6447, 0.1155, 0.0834, 0.1565]) -Greedy action tensor([ 1.3654, -0.4439, 0.0427, 0.3284]) tensor([0.5603, 0.0918, 0.1493, 0.1986]) -Greedy action tensor([ 1.8067, -0.5945, -0.1824, 0.3869]) tensor([0.6807, 0.0617, 0.0931, 0.1646]) -Greedy action tensor([ 2.6747, -1.6263, -0.4817, 1.0864]) tensor([0.7934, 0.0108, 0.0338, 0.1621]) -Greedy action tensor([ 1.2538, -0.1936, -0.4620, 0.1253]) tensor([0.5752, 0.1353, 0.1034, 0.1861]) -Greedy action tensor([ 1.9172, -0.8406, -0.2600, 0.2718]) tensor([0.7301, 0.0463, 0.0828, 0.1409]) -Greedy action tensor([ 1.2905, -0.6561, -0.8692, 0.5577]) tensor([0.5752, 0.0821, 0.0663, 0.2764]) -Greedy action tensor([ 1.7755, -1.0595, -0.4293, 0.3434]) tensor([0.7103, 0.0417, 0.0783, 0.1696]) -Greedy action tensor([ 0.7545, -0.2816, 0.0623, -0.0069]) tensor([0.4306, 0.1528, 0.2155, 0.2011]) -Greedy action tensor([ 1.7322, -1.4402, -0.1632, -0.2467]) tensor([0.7517, 0.0315, 0.1129, 0.1039]) -Greedy action tensor([ 1.4835, -0.4665, -0.2119, 0.2829]) tensor([0.6147, 0.0875, 0.1128, 0.1850]) -Greedy action tensor([ 1.8509, -0.1901, -0.7661, 0.2439]) tensor([0.7126, 0.0926, 0.0520, 0.1429]) -Greedy action tensor([ 1.5653, -0.2487, -0.4694, 0.2831]) tensor([0.6365, 0.1037, 0.0832, 0.1766]) -Greedy action tensor([ 1.6172, -0.0157, -0.5827, 0.3487]) tensor([0.6299, 0.1231, 0.0698, 0.1772]) -Greedy action tensor([ 1.3840, -0.4587, 0.2176, -0.1669]) tensor([0.5946, 0.0942, 0.1852, 0.1261]) -Greedy action tensor([ 1.9237, -0.7782, -0.5253, 0.1469]) tensor([0.7561, 0.0507, 0.0653, 0.1279]) -Greedy action tensor([ 1.7689, -0.9224, -0.4829, 0.7870]) tensor([0.6462, 0.0438, 0.0680, 0.2421]) -Greedy action tensor([ 0.9549, 0.0791, -0.9991, 0.3773]) tensor([0.4718, 0.1965, 0.0669, 0.2648]) -Greedy action tensor([ 1.0403, -0.3295, -0.2033, 0.1133]) tensor([0.5159, 0.1311, 0.1488, 0.2042]) -Greedy action tensor([-0.5968, -0.8995, 0.2262, -0.5991]) tensor([0.1994, 0.1474, 0.4542, 0.1990]) -Greedy action tensor([ 0.4714, -0.5551, 0.0120, -1.0075]) tensor([0.4509, 0.1615, 0.2848, 0.1028]) -Greedy action tensor([ 0.1719, -0.9614, 0.1760, 0.1596]) tensor([0.3018, 0.0972, 0.3030, 0.2981]) -Greedy action tensor([-0.4400, -0.6441, -0.5289, -1.1689]) tensor([0.3113, 0.2538, 0.2848, 0.1502]) -Greedy action tensor([-0.5614, -0.6270, -0.6405, 0.4515]) tensor([0.1781, 0.1668, 0.1646, 0.4905]) -Greedy action tensor([-0.2607, -0.3069, -0.0351, 0.6208]) tensor([0.1779, 0.1698, 0.2229, 0.4294]) -Greedy action tensor([ 1.0576, -0.4687, -0.3588, 1.1262]) tensor([0.3951, 0.0859, 0.0958, 0.4232]) -Greedy action tensor([ 0.2224, 0.1971, -0.0149, -0.4877]) tensor([0.3072, 0.2995, 0.2423, 0.1510]) -Greedy action tensor([-0.9322, -1.5138, -0.1749, -0.5723]) tensor([0.1951, 0.1091, 0.4161, 0.2796]) -Greedy action tensor([ 0.9555, -0.7717, -0.1135, 0.1569]) tensor([0.5073, 0.0902, 0.1742, 0.2283]) -Greedy action tensor([ 0.4516, -1.1392, 0.2981, 0.1979]) tensor([0.3524, 0.0718, 0.3023, 0.2735]) -Greedy action tensor([ 0.0619, 0.6754, 0.3190, -0.8153]) tensor([0.2195, 0.4054, 0.2838, 0.0913]) -Greedy action tensor([ 0.9982, -1.2464, 0.2283, -0.1432]) tensor([0.5295, 0.0561, 0.2452, 0.1691]) -Greedy action tensor([-0.6838, -0.5224, 0.6646, -0.1790]) tensor([0.1302, 0.1530, 0.5013, 0.2156]) -Greedy action tensor([-0.0981, -1.3919, 1.5017, -0.8176]) tensor([0.1490, 0.0408, 0.7377, 0.0725]) -Greedy action tensor([-0.1716, -0.2124, 0.4345, -0.6377]) tensor([0.2262, 0.2172, 0.4147, 0.1419]) -Greedy action tensor([ 0.2314, -1.4855, -0.1731, -0.5802]) tensor([0.4365, 0.0784, 0.2913, 0.1939]) -Greedy action tensor([ 1.0880, -0.9340, 0.2983, -0.5599]) tensor([0.5622, 0.0744, 0.2552, 0.1082]) -Greedy action tensor([-0.1472, -1.0740, 0.3212, 0.3924]) tensor([0.2124, 0.0841, 0.3393, 0.3643]) -Greedy action tensor([-0.2085, -0.7239, 0.1217, -0.7175]) tensor([0.2786, 0.1664, 0.3876, 0.1675]) -Greedy action tensor([-0.4998, -0.4290, 0.8734, -0.5566]) tensor([0.1436, 0.1541, 0.5667, 0.1356]) -Greedy action tensor([-0.1595, -0.7758, -0.7637, 0.0108]) tensor([0.3056, 0.1650, 0.1670, 0.3623]) -Greedy action tensor([-0.7627, -1.1562, 0.2029, -1.1002]) tensor([0.1994, 0.1345, 0.5237, 0.1423]) -Greedy action tensor([ 0.0156, 0.1378, -0.0575, -0.5808]) tensor([0.2770, 0.3130, 0.2575, 0.1526]) -Greedy action tensor([ 0.0811, -1.0343, 0.2301, -0.6360]) tensor([0.3360, 0.1101, 0.3899, 0.1640]) -Greedy action tensor([-0.8758, 0.1156, 0.6070, -0.0558]) tensor([0.0964, 0.2599, 0.4248, 0.2189]) -Greedy action tensor([ 0.4233, -0.9696, -0.3029, 0.4823]) tensor([0.3580, 0.0889, 0.1732, 0.3798]) -Greedy action tensor([ 0.5006, 0.7768, 0.4608, -0.7106]) tensor([0.2796, 0.3685, 0.2687, 0.0833]) -Greedy action tensor([-0.0829, -0.4212, -0.5685, 0.1553]) tensor([0.2780, 0.1982, 0.1710, 0.3528]) -Greedy action tensor([-1.0338, -0.2826, 0.7048, -0.7687]) tensor([0.0989, 0.2096, 0.5626, 0.1289]) -Greedy action tensor([-0.1366, -0.0737, 0.9457, 0.4309]) tensor([0.1475, 0.1571, 0.4353, 0.2601]) -Greedy action tensor([-1.1476, -0.2450, 0.2225, -0.7093]) tensor([0.1117, 0.2755, 0.4397, 0.1732]) -Greedy action tensor([-0.1995, -1.8067, 1.0502, 0.4954]) tensor([0.1494, 0.0299, 0.5213, 0.2993]) -Greedy action tensor([ 0.5928, -2.2100, -0.4130, -0.3016]) tensor([0.5449, 0.0330, 0.1993, 0.2228]) -Greedy action tensor([ 0.2418, -0.5688, 0.2537, -0.1921]) tensor([0.3221, 0.1432, 0.3260, 0.2087]) -Greedy action tensor([-0.1705, -0.0310, 0.5268, -0.7483]) tensor([0.2119, 0.2436, 0.4256, 0.1189]) -Greedy action tensor([ 0.0941, -0.4339, 0.1560, 0.0806]) tensor([0.2747, 0.1620, 0.2923, 0.2710]) -Greedy action tensor([-1.3510, -0.6573, 0.0971, -0.2611]) tensor([0.0977, 0.1956, 0.4159, 0.2907]) -Greedy action tensor([0.9514, 0.7039, 0.0379, 0.3727]) tensor([0.3646, 0.2847, 0.1463, 0.2044]) -Greedy action tensor([ 0.6851, -1.5610, -0.9632, 0.5452]) tensor([0.4613, 0.0488, 0.0887, 0.4011]) -Greedy action tensor([-1.0816, 0.0826, 0.6306, -1.3798]) tensor([0.0954, 0.3055, 0.5284, 0.0708]) -Greedy action tensor([-1.5023, -0.5501, -0.3252, 0.2197]) tensor([0.0804, 0.2085, 0.2610, 0.4501]) -Greedy action tensor([-1.6545, -0.0400, 0.9950, -0.9108]) tensor([0.0449, 0.2256, 0.6351, 0.0944]) -Greedy action tensor([-0.2175, -0.3873, 0.0545, -0.9131]) tensor([0.2736, 0.2309, 0.3591, 0.1365]) -Greedy action tensor([ 0.1257, -1.0967, -0.5620, 0.2787]) tensor([0.3375, 0.0994, 0.1697, 0.3933]) -Greedy action tensor([ 0.0018, -0.8158, -0.2724, 0.0799]) tensor([0.3046, 0.1345, 0.2316, 0.3294]) -Greedy action tensor([ 0.3645, -0.0926, 0.3044, 0.0245]) tensor([0.3043, 0.1926, 0.2865, 0.2166]) -Greedy action tensor([-0.7933, 0.0933, -0.6839, 0.0137]) tensor([0.1474, 0.3578, 0.1645, 0.3304]) -Greedy action tensor([ 0.9087, -1.0177, -0.5012, 0.5880]) tensor([0.4727, 0.0689, 0.1154, 0.3430]) -Greedy action tensor([ 0.2249, -0.4768, 0.3066, 0.0369]) tensor([0.2933, 0.1454, 0.3183, 0.2430]) -Greedy action tensor([-0.8172, -1.4690, 0.7052, -1.1695]) tensor([0.1469, 0.0766, 0.6733, 0.1033]) -Greedy action tensor([-0.5680, -0.0624, -0.8481, 0.3508]) tensor([0.1689, 0.2801, 0.1277, 0.4234]) -Greedy action tensor([-0.2589, -0.2386, 0.7020, -1.1361]) tensor([0.1980, 0.2021, 0.5176, 0.0824]) -Greedy action tensor([-1.0925, 0.1970, 1.0391, -0.6095]) tensor([0.0681, 0.2473, 0.5741, 0.1104]) -Greedy action tensor([ 0.2471, 0.4287, -0.2819, -0.3654]) tensor([0.3003, 0.3601, 0.1769, 0.1627]) -Greedy action tensor([-1.4143, -0.8317, 0.7072, -1.1831]) tensor([0.0807, 0.1445, 0.6732, 0.1017]) -Greedy action tensor([ 0.6944, -0.0830, 0.4449, 0.6810]) tensor([0.3100, 0.1425, 0.2416, 0.3059]) -Greedy action tensor([ 0.8458, -0.4343, 0.7784, -0.1670]) tensor([0.3882, 0.1079, 0.3629, 0.1410]) -Greedy action tensor([-0.4544, -0.6435, -1.0146, -0.0228]) tensor([0.2539, 0.2102, 0.1450, 0.3909]) -Greedy action tensor([ 0.3192, -0.4267, 1.3088, -0.7511]) tensor([0.2218, 0.1052, 0.5968, 0.0761]) -Greedy action tensor([ 0.4911, -0.0228, -0.0252, -0.2766]) tensor([0.3761, 0.2250, 0.2244, 0.1745]) -Greedy action tensor([-0.0723, -1.5400, -0.4670, 0.7082]) tensor([0.2447, 0.0564, 0.1649, 0.5340]) -Greedy action tensor([-0.9903, -0.6737, -0.9046, 0.0853]) tensor([0.1564, 0.2146, 0.1704, 0.4585]) -Greedy action tensor([ 0.7524, -1.2151, -0.3166, 0.5138]) tensor([0.4404, 0.0616, 0.1512, 0.3469]) -Greedy action tensor([-0.0597, -0.7709, -1.3277, -0.1214]) tensor([0.3687, 0.1810, 0.1037, 0.3466]) -Greedy action tensor([ 0.9859, -1.4976, -0.3244, 0.5386]) tensor([0.5019, 0.0419, 0.1354, 0.3209]) -Greedy action tensor([-0.2189, -1.0370, 0.1954, -0.8977]) tensor([0.2889, 0.1275, 0.4371, 0.1465]) -Greedy action tensor([-0.4419, -0.4233, 0.5501, -0.1591]) tensor([0.1655, 0.1686, 0.4463, 0.2196]) -Greedy action tensor([-0.1898, -0.9828, 0.9677, -0.8106]) tensor([0.1934, 0.0875, 0.6152, 0.1039]) -Greedy action tensor([-0.4784, -0.8290, -0.2177, 0.5373]) tensor([0.1735, 0.1222, 0.2252, 0.4791]) -Greedy action tensor([-0.5434, -0.4139, 0.3550, -1.1162]) tensor([0.1939, 0.2207, 0.4761, 0.1093]) -Greedy action tensor([-0.0588, -1.6773, 0.4846, 0.4099]) tensor([0.2213, 0.0439, 0.3811, 0.3537]) -Greedy action tensor([ 0.7086, -0.6657, 0.0832, 0.0111]) tensor([0.4375, 0.1107, 0.2341, 0.2178]) -Greedy action tensor([ 0.2527, -0.9089, 0.4348, -0.7159]) tensor([0.3458, 0.1082, 0.4148, 0.1312]) -Greedy action tensor([-0.3547, -0.3986, -0.2320, 0.0088]) tensor([0.2210, 0.2115, 0.2498, 0.3178]) -Greedy action tensor([ 0.2733, -0.1427, 0.2181, -0.1232]) tensor([0.3050, 0.2012, 0.2886, 0.2052]) -Greedy action tensor([ 1.9242, -1.3885, -0.4426, 0.7409]) tensor([0.6962, 0.0254, 0.0653, 0.2132]) -Greedy action tensor([ 0.5044, -1.3159, 0.1895, -0.4072]) tensor([0.4360, 0.0706, 0.3182, 0.1752]) -Greedy action tensor([-1.1261, -0.2395, -0.2986, -1.5656]) tensor([0.1573, 0.3817, 0.3597, 0.1013]) -Greedy action tensor([-0.4554, -0.7760, -0.6666, 0.0228]) tensor([0.2411, 0.1749, 0.1952, 0.3889]) -Greedy action tensor([-0.1952, 0.2162, -0.6111, 0.2669]) tensor([0.2103, 0.3173, 0.1387, 0.3338]) -Greedy action tensor([-1.4603, -0.0448, 0.4426, -0.1498]) tensor([0.0644, 0.2652, 0.4317, 0.2387]) -Greedy action tensor([-1.7254, -0.4695, 0.5671, -0.0578]) tensor([0.0507, 0.1781, 0.5023, 0.2689]) -Greedy action tensor([-1.8492, -0.4622, 0.6231, -0.1372]) tensor([0.0447, 0.1788, 0.5292, 0.2474]) -Greedy action tensor([-1.8764, -0.4599, 0.6405, -0.1428]) tensor([0.0432, 0.1779, 0.5347, 0.2443]) -Greedy action tensor([-1.8678, -0.4069, 0.6637, -0.1153]) tensor([0.0423, 0.1822, 0.5316, 0.2439]) -Greedy action tensor([-1.9351, -0.4455, 0.6692, -0.1671]) tensor([0.0403, 0.1787, 0.5449, 0.2361]) -Greedy action tensor([-1.9099, -0.6252, 0.3858, -0.3492]) tensor([0.0518, 0.1872, 0.5144, 0.2467]) -Greedy action tensor([-1.9230, -0.4307, 0.6577, -0.1675]) tensor([0.0409, 0.1820, 0.5404, 0.2368]) -Greedy action tensor([-1.5061, -0.7291, 0.8848, 0.0536]) tensor([0.0530, 0.1154, 0.5793, 0.2523]) -Greedy action tensor([-1.9380, -0.4428, 0.6639, -0.1756]) tensor([0.0404, 0.1800, 0.5444, 0.2352]) -Greedy action tensor([-1.9278, -0.4498, 0.6618, -0.1710]) tensor([0.0408, 0.1789, 0.5438, 0.2365]) -Greedy action tensor([-1.6278, -0.3107, 0.6337, -0.0082]) tensor([0.0516, 0.1926, 0.4952, 0.2606]) -Greedy action tensor([-1.9171, -0.3973, 0.6483, -0.1619]) tensor([0.0410, 0.1876, 0.5339, 0.2374]) -Greedy action tensor([-1.6024, -0.5726, 0.4961, 0.0149]) tensor([0.0588, 0.1648, 0.4798, 0.2965]) -Greedy action tensor([-1.8990, -0.4380, 0.6502, -0.1599]) tensor([0.0420, 0.1811, 0.5377, 0.2392]) -Greedy action tensor([-1.8606, -0.4722, 0.6015, -0.1455]) tensor([0.0449, 0.1798, 0.5261, 0.2493]) -Greedy action tensor([-0.8313, 0.2687, -0.0954, -0.2831]) tensor([0.1278, 0.3841, 0.2669, 0.2212]) -Greedy action tensor([-1.8729, -0.1614, 0.5883, -0.1243]) tensor([0.0417, 0.2307, 0.4882, 0.2394]) -Greedy action tensor([-1.1723, -0.6533, 0.3893, -0.6399]) tensor([0.1093, 0.1836, 0.5209, 0.1861]) -Greedy action tensor([-1.8944, -0.4584, 0.6469, -0.1570]) tensor([0.0424, 0.1783, 0.5384, 0.2410]) -Greedy action tensor([-1.5585, -0.1685, 0.4291, -0.0238]) tensor([0.0590, 0.2368, 0.4305, 0.2737]) -Greedy action tensor([-1.8454, -0.4438, 0.6186, -0.1281]) tensor([0.0447, 0.1815, 0.5250, 0.2488]) -Greedy action tensor([-1.9301, -0.4288, 0.6598, -0.1724]) tensor([0.0406, 0.1823, 0.5415, 0.2356]) -Greedy action tensor([-1.6695, 0.2488, 0.4284, -0.0019]) tensor([0.0470, 0.3203, 0.3833, 0.2493]) -Greedy action tensor([-1.6843, -0.3497, 0.4687, -0.1679]) tensor([0.0557, 0.2114, 0.4793, 0.2536]) -Greedy action tensor([-1.6618, -0.2748, 0.5217, -0.1603]) tensor([0.0544, 0.2179, 0.4833, 0.2444]) -Greedy action tensor([-1.8422, -0.2647, 0.5947, -0.1278]) tensor([0.0438, 0.2121, 0.5009, 0.2432]) -Greedy action tensor([-1.7879, -0.3744, 0.5809, -0.0889]) tensor([0.0470, 0.1933, 0.5025, 0.2572]) -Greedy action tensor([-1.6294, -0.5299, 0.4903, -0.0152]) tensor([0.0576, 0.1730, 0.4799, 0.2895]) -Greedy action tensor([-1.8823, -0.4547, 0.6428, -0.1479]) tensor([0.0429, 0.1787, 0.5355, 0.2429]) -Greedy action tensor([-1.8677, -0.4316, 0.6270, -0.1434]) tensor([0.0436, 0.1833, 0.5285, 0.2446]) -Greedy action tensor([-1.8352, -0.1901, 0.6583, -0.3072]) tensor([0.0437, 0.2263, 0.5287, 0.2013]) -Greedy action tensor([-1.9255, -0.4458, 0.6601, -0.1692]) tensor([0.0409, 0.1796, 0.5427, 0.2368]) -Greedy action tensor([-1.8339, -0.4233, 0.6141, -0.1203]) tensor([0.0450, 0.1845, 0.5207, 0.2498]) -Greedy action tensor([-1.9232, -0.4594, 0.6567, -0.1703]) tensor([0.0412, 0.1780, 0.5433, 0.2376]) -Greedy action tensor([-1.8073, -0.4459, 0.6022, -0.1096]) tensor([0.0465, 0.1815, 0.5178, 0.2541]) -Greedy action tensor([-1.4677, -0.4060, 0.4005, 0.0806]) tensor([0.0664, 0.1918, 0.4297, 0.3121]) -Greedy action tensor([-1.0069, -0.2995, 0.2514, 0.4600]) tensor([0.0919, 0.1864, 0.3234, 0.3984]) -Greedy action tensor([-1.3308, -0.6030, 0.3376, 0.1700]) tensor([0.0778, 0.1610, 0.4124, 0.3488]) -Greedy action tensor([-1.2548, -0.3162, 0.2627, 0.2019]) tensor([0.0806, 0.2060, 0.3675, 0.3459]) -Greedy action tensor([-1.8870, -0.4509, 0.6415, -0.1548]) tensor([0.0428, 0.1797, 0.5358, 0.2417]) -Greedy action tensor([-1.9287, -0.4435, 0.6627, -0.1719]) tensor([0.0407, 0.1798, 0.5435, 0.2359]) -Greedy action tensor([-1.7447, -0.3540, 0.6580, -0.0744]) tensor([0.0468, 0.1879, 0.5169, 0.2485]) -Greedy action tensor([-1.9290, -0.4394, 0.6599, -0.1714]) tensor([0.0407, 0.1807, 0.5424, 0.2362]) -Greedy action tensor([-1.9353, -0.4406, 0.6612, -0.1756]) tensor([0.0405, 0.1806, 0.5435, 0.2354]) -Greedy action tensor([-1.8772, -0.4346, 0.6343, -0.1477]) tensor([0.0431, 0.1825, 0.5313, 0.2431]) -Greedy action tensor([-1.7053, -0.4035, 0.5645, -0.0461]) tensor([0.0510, 0.1875, 0.4935, 0.2680]) -Greedy action tensor([-1.6281, 0.0342, 0.4392, 0.0642]) tensor([0.0510, 0.2689, 0.4031, 0.2770]) -Greedy action tensor([-0.6710, 0.3275, 0.4611, 0.5744]) tensor([0.0972, 0.2637, 0.3015, 0.3376]) -Greedy action tensor([-1.4887, 0.5693, 0.3583, -0.0923]) tensor([0.0521, 0.4076, 0.3301, 0.2103]) -Greedy action tensor([-1.8727, -0.2378, 0.6010, -0.1197]) tensor([0.0421, 0.2158, 0.4993, 0.2429]) -Greedy action tensor([-1.9019, -0.4140, 0.6505, -0.1535]) tensor([0.0416, 0.1844, 0.5347, 0.2393]) -Greedy action tensor([0.3966, 1.3345, 0.2850, 0.9768]) tensor([0.1604, 0.4097, 0.1434, 0.2865]) -Greedy action tensor([-1.7683, -0.4682, 0.5638, -0.0835]) tensor([0.0491, 0.1802, 0.5059, 0.2648]) -Greedy action tensor([-1.9177, -0.4065, 0.6477, -0.1638]) tensor([0.0411, 0.1864, 0.5349, 0.2376]) -Greedy action tensor([-1.7617, -0.4471, 0.5780, -0.0791]) tensor([0.0488, 0.1818, 0.5067, 0.2627]) -Greedy action tensor([-1.9000, -0.4433, 0.6494, -0.1550]) tensor([0.0420, 0.1802, 0.5374, 0.2404]) -Greedy action tensor([-1.3824, 0.2545, 0.2881, 0.2070]) tensor([0.0611, 0.3142, 0.3250, 0.2997]) -Greedy action tensor([-1.8696, -0.3764, 0.6489, -0.1043]) tensor([0.0422, 0.1878, 0.5235, 0.2465]) -Greedy action tensor([-1.5625, 0.3648, 0.3961, -0.0866]) tensor([0.0517, 0.3554, 0.3666, 0.2263]) -Greedy action tensor([0.7628, 0.5631, 0.5740, 1.3649]) tensor([0.2236, 0.1831, 0.1851, 0.4082]) -Greedy action tensor([-1.9327, -0.4363, 0.6627, -0.1739]) tensor([0.0405, 0.1810, 0.5432, 0.2353]) -Greedy action tensor([-1.6557, -0.3151, 0.5855, 0.0324]) tensor([0.0509, 0.1946, 0.4790, 0.2755]) -Greedy action tensor([-1.8470, -0.3855, 0.6444, -0.1060]) tensor([0.0433, 0.1867, 0.5230, 0.2470]) -Greedy action tensor([-1.9049, -0.4361, 0.6465, -0.1602]) tensor([0.0419, 0.1818, 0.5368, 0.2396]) -Greedy action tensor([-0.9896, -0.4020, 0.2761, 0.3159]) tensor([0.0997, 0.1793, 0.3533, 0.3677]) -Greedy action tensor([-1.6814, 0.0744, 0.4590, -0.0445]) tensor([0.0489, 0.2833, 0.4162, 0.2515]) -Greedy action tensor([-1.8708, -0.4454, 0.6860, -0.1114]) tensor([0.0419, 0.1743, 0.5403, 0.2434]) -Greedy action tensor([-1.7183, -0.3317, 0.5913, -0.0275]) tensor([0.0488, 0.1952, 0.4913, 0.2646]) -Greedy action tensor([-1.9095, -0.3975, 0.6455, -0.1568]) tensor([0.0414, 0.1876, 0.5324, 0.2387]) -Greedy action tensor([-1.1448, 0.6332, 0.3689, 0.3897]) tensor([0.0621, 0.3676, 0.2822, 0.2881]) -Greedy action tensor([-1.4370, -0.3637, 0.5958, -0.3687]) tensor([0.0691, 0.2021, 0.5276, 0.2011]) -Greedy action tensor([-1.9272, -0.4291, 0.6602, -0.1684]) tensor([0.0407, 0.1820, 0.5410, 0.2362]) -Greedy action tensor([-1.1218, 0.0837, 0.4353, 0.2665]) tensor([0.0764, 0.2550, 0.3625, 0.3062]) -Greedy action tensor([-1.9213, -0.4069, 0.6531, -0.1596]) tensor([0.0408, 0.1856, 0.5358, 0.2377]) -Greedy action tensor([-1.8080, -0.4510, 0.6089, -0.1077]) tensor([0.0464, 0.1801, 0.5197, 0.2538]) -Greedy action tensor([-1.9434, -0.4569, 0.6657, -0.1805]) tensor([0.0403, 0.1780, 0.5470, 0.2347]) -Greedy action tensor([0.5246, 1.0433, 0.0110, 0.7464]) tensor([0.2209, 0.3711, 0.1322, 0.2758]) -Greedy action tensor([-1.9376, -0.4135, 0.6576, -0.1748]) tensor([0.0403, 0.1850, 0.5399, 0.2349]) -Greedy action tensor([-1.8562, -0.4407, 0.6200, -0.1442]) tensor([0.0443, 0.1826, 0.5274, 0.2456]) -Greedy action tensor([-1.5035, -0.2416, 0.6114, 0.1393]) tensor([0.0556, 0.1963, 0.4607, 0.2874]) -Greedy action tensor([-1.8587, -0.0250, 0.5637, -0.1457]) tensor([0.0415, 0.2599, 0.4682, 0.2303]) -Greedy action tensor([ 0.8144, -0.6138, -0.0429, -0.2858]) tensor([0.5008, 0.1201, 0.2125, 0.1667]) -Greedy action tensor([ 0.7355, -0.1152, 0.0791, -0.0492]) tensor([0.4163, 0.1778, 0.2159, 0.1899]) -Greedy action tensor([ 0.6695, -0.4618, -0.1854, -0.0880]) tensor([0.4511, 0.1455, 0.1919, 0.2115]) -Greedy action tensor([ 0.4754, -0.3870, 0.1283, -0.4584]) tensor([0.3965, 0.1674, 0.2802, 0.1559]) -Greedy action tensor([ 0.4486, -0.0620, -0.1429, -0.0670]) tensor([0.3635, 0.2182, 0.2012, 0.2171]) -Greedy action tensor([ 0.5001, -0.1951, -0.0050, -0.3222]) tensor([0.3934, 0.1963, 0.2374, 0.1729]) -Greedy action tensor([ 1.5109, -0.9815, -0.1648, -0.7537]) tensor([0.7279, 0.0602, 0.1363, 0.0756]) -Greedy action tensor([ 0.3532, -0.2654, -0.2107, 0.0017]) tensor([0.3557, 0.1916, 0.2024, 0.2503]) -Greedy action tensor([ 0.9236, -0.7310, -0.1036, -0.6196]) tensor([0.5672, 0.1084, 0.2031, 0.1212]) -Greedy action tensor([ 0.3246, -0.0111, 0.0669, -0.1582]) tensor([0.3221, 0.2302, 0.2489, 0.1988]) -Greedy action tensor([ 0.9621, -0.4090, 0.0692, -0.6141]) tensor([0.5347, 0.1357, 0.2190, 0.1106]) -Greedy action tensor([ 0.9006, -0.3013, -0.0177, -0.1906]) tensor([0.4913, 0.1477, 0.1961, 0.1650]) -Greedy action tensor([ 0.7980, -0.6924, 0.1455, -0.1900]) tensor([0.4721, 0.1064, 0.2458, 0.1758]) -Greedy action tensor([ 0.1898, -0.0446, 0.0106, 0.4510]) tensor([0.2548, 0.2015, 0.2130, 0.3308]) -Greedy action tensor([ 0.9782, -0.9024, -0.0321, -0.5228]) tensor([0.5749, 0.0877, 0.2093, 0.1281]) -Greedy action tensor([ 0.8543, -0.4517, -0.0148, -0.2774]) tensor([0.4968, 0.1346, 0.2083, 0.1602]) -Greedy action tensor([ 0.5962, -0.3003, -0.1390, -0.0960]) tensor([0.4188, 0.1709, 0.2008, 0.2096]) -Greedy action tensor([ 1.3874, -0.3795, -0.1642, -0.5591]) tensor([0.6555, 0.1120, 0.1389, 0.0936]) -Greedy action tensor([ 0.4962, 0.0081, 0.0534, -0.3986]) tensor([0.3753, 0.2303, 0.2410, 0.1534]) -Greedy action tensor([ 1.0018, -0.6912, 0.1379, -0.5892]) tensor([0.5527, 0.1017, 0.2330, 0.1126]) -Greedy action tensor([ 0.9842, -0.5974, 0.0183, -0.4737]) tensor([0.5498, 0.1130, 0.2093, 0.1279]) -Greedy action tensor([ 0.8732, -0.4369, 0.1172, -0.2613]) tensor([0.4852, 0.1309, 0.2278, 0.1560]) -Greedy action tensor([ 0.8316, -0.4463, 0.0975, -0.2626]) tensor([0.4777, 0.1331, 0.2293, 0.1599]) -Greedy action tensor([ 0.4859, -0.3740, -0.0767, -0.0276]) tensor([0.3859, 0.1633, 0.2199, 0.2309]) -Greedy action tensor([ 0.8339, -0.6722, -0.0564, -0.5703]) tensor([0.5325, 0.1181, 0.2186, 0.1308]) -Greedy action tensor([ 0.6879, -0.4102, -0.0028, -0.0895]) tensor([0.4359, 0.1454, 0.2185, 0.2003]) -Greedy action tensor([ 0.9389, -0.5236, -0.1761, -0.3184]) tensor([0.5423, 0.1256, 0.1778, 0.1542]) -Greedy action tensor([ 0.7850, -0.3214, -0.0171, -0.1632]) tensor([0.4616, 0.1527, 0.2070, 0.1788]) -Greedy action tensor([ 0.6233, -0.2801, -0.1754, -0.3653]) tensor([0.4490, 0.1819, 0.2020, 0.1671]) -Greedy action tensor([ 0.4801, -0.2435, -0.0235, -0.3241]) tensor([0.3942, 0.1912, 0.2382, 0.1764]) -Greedy action tensor([ 0.4931, -0.2947, 0.0276, -0.2090]) tensor([0.3879, 0.1764, 0.2435, 0.1922]) -Greedy action tensor([ 0.3561, -0.0960, -0.0411, -0.1736]) tensor([0.3452, 0.2196, 0.2320, 0.2032]) -Greedy action tensor([ 0.6567, -0.4106, -0.0877, -0.4330]) tensor([0.4640, 0.1596, 0.2204, 0.1560]) -Greedy action tensor([ 0.5398, -0.4227, 0.2396, -0.4617]) tensor([0.4016, 0.1534, 0.2975, 0.1475]) -Greedy action tensor([ 1.1029, -0.6930, -0.0454, -0.4454]) tensor([0.5897, 0.0979, 0.1870, 0.1254]) -Greedy action tensor([ 0.7252, -0.6294, -0.2252, -0.4229]) tensor([0.5097, 0.1315, 0.1971, 0.1617]) -Greedy action tensor([ 0.3775, -0.1419, -0.1252, 0.0390]) tensor([0.3433, 0.2042, 0.2077, 0.2447]) -Greedy action tensor([ 0.8244, -0.7146, -0.0933, -0.2315]) tensor([0.5097, 0.1094, 0.2036, 0.1773]) -Greedy action tensor([ 0.8095, 0.0392, -0.0844, -0.1808]) tensor([0.4458, 0.2063, 0.1823, 0.1656]) -Greedy action tensor([ 0.4768, 0.0593, 0.0051, -0.0337]) tensor([0.3469, 0.2285, 0.2164, 0.2082]) -Greedy action tensor([ 0.7197, -0.5104, 0.0037, -0.1565]) tensor([0.4551, 0.1330, 0.2224, 0.1895]) -Greedy action tensor([ 0.7082, -0.0798, 0.0229, -0.1903]) tensor([0.4227, 0.1922, 0.2130, 0.1721]) -Greedy action tensor([ 0.8562, -0.1895, 0.0968, -0.3838]) tensor([0.4742, 0.1667, 0.2219, 0.1372]) -Greedy action tensor([ 0.7166, -0.6680, 0.0946, -0.2912]) tensor([0.4646, 0.1163, 0.2494, 0.1696]) -Greedy action tensor([ 1.2272, -0.8521, 0.1056, -0.6463]) tensor([0.6233, 0.0779, 0.2030, 0.0957]) -Greedy action tensor([ 0.7087, -0.2209, -0.0575, -0.0650]) tensor([0.4309, 0.1701, 0.2003, 0.1988]) -Greedy action tensor([ 0.9285, -0.7032, -0.0012, -0.6394]) tensor([0.5559, 0.1087, 0.2194, 0.1159]) -Greedy action tensor([ 0.6196, -0.6077, -0.0663, -0.2264]) tensor([0.4493, 0.1317, 0.2263, 0.1928]) -Greedy action tensor([ 1.2512, -0.2860, -0.0173, 0.0116]) tensor([0.5600, 0.1204, 0.1575, 0.1621]) -Greedy action tensor([ 0.4358, -0.1897, -0.1013, -0.0225]) tensor([0.3634, 0.1944, 0.2124, 0.2298]) -Greedy action tensor([ 1.2803, -0.5784, -0.1706, -0.3666]) tensor([0.6318, 0.0985, 0.1481, 0.1217]) -Greedy action tensor([ 1.0361, -0.4690, -0.0375, -0.2259]) tensor([0.5415, 0.1202, 0.1851, 0.1533]) -Greedy action tensor([ 0.6213, -0.6977, 0.2052, -0.3839]) tensor([0.4361, 0.1166, 0.2877, 0.1596]) -Greedy action tensor([ 1.1988, -0.8559, -0.0279, -0.6301]) tensor([0.6321, 0.0810, 0.1854, 0.1015]) -Greedy action tensor([ 0.8414, -0.8102, -0.0509, -0.3246]) tensor([0.5227, 0.1002, 0.2142, 0.1629]) -Greedy action tensor([ 0.4317, -0.4983, -0.1196, -0.1702]) tensor([0.3971, 0.1567, 0.2288, 0.2175]) -Greedy action tensor([ 0.7703, -0.5058, -0.0592, -0.5435]) tensor([0.5040, 0.1407, 0.2199, 0.1355]) -Greedy action tensor([ 0.5430, -0.1690, 0.0767, -0.4289]) tensor([0.4006, 0.1966, 0.2513, 0.1516]) -Greedy action tensor([ 0.8395, -0.3929, -0.1000, -0.4374]) tensor([0.5099, 0.1487, 0.1993, 0.1422]) -Greedy action tensor([ 0.5060, 0.0726, -0.0425, 0.0296]) tensor([0.3512, 0.2277, 0.2029, 0.2181]) -Greedy action tensor([ 0.8977, -0.0765, -0.1722, -0.5212]) tensor([0.5096, 0.1924, 0.1748, 0.1233]) -Greedy action tensor([ 0.4335, -0.1639, 0.0078, -0.3644]) tensor([0.3768, 0.2073, 0.2462, 0.1697]) -Greedy action tensor([ 0.8273, -0.5014, -0.0169, -0.3489]) tensor([0.4992, 0.1322, 0.2146, 0.1540]) -Greedy action tensor([ 0.4127, -0.3936, -0.0351, -0.1483]) tensor([0.3765, 0.1681, 0.2406, 0.2148]) -Greedy action tensor([ 0.5443, -0.2712, 0.0015, -0.1442]) tensor([0.3959, 0.1752, 0.2301, 0.1989]) -Greedy action tensor([ 0.9523, -0.5903, 0.1130, -0.1347]) tensor([0.5043, 0.1078, 0.2178, 0.1701]) -Greedy action tensor([ 7.1816e-01, 4.8375e-04, -1.9624e-02, -4.3119e-01]) tensor([0.4380, 0.2137, 0.2095, 0.1388]) -Greedy action tensor([ 0.5726, -0.1771, -0.1182, 0.0369]) tensor([0.3908, 0.1847, 0.1959, 0.2287]) -Greedy action tensor([ 1.0352, -0.4708, -0.0681, -0.4990]) tensor([0.5652, 0.1254, 0.1875, 0.1219]) -Greedy action tensor([ 0.8428, -0.5149, -0.0738, -0.3810]) tensor([0.5125, 0.1318, 0.2049, 0.1507]) -Greedy action tensor([ 0.8348, -0.6317, -0.0378, -0.5439]) tensor([0.5262, 0.1214, 0.2199, 0.1325]) -Greedy action tensor([ 0.4249, -0.1770, -0.0067, -0.1940]) tensor([0.3655, 0.2002, 0.2374, 0.1968]) -Greedy action tensor([ 0.6939, -0.1571, 0.0044, -0.0430]) tensor([0.4154, 0.1774, 0.2084, 0.1988]) -Greedy action tensor([ 0.8250, -0.2407, 0.0397, -0.3509]) tensor([0.4742, 0.1633, 0.2162, 0.1463]) -Greedy action tensor([ 1.2079, -0.9275, -0.0302, -0.6705]) tensor([0.6406, 0.0757, 0.1857, 0.0979]) -Greedy action tensor([ 0.8666, -0.5288, 0.0050, -0.3736]) tensor([0.5103, 0.1264, 0.2156, 0.1477]) -Greedy action tensor([ 0.6019, -0.3223, 0.1791, -0.3652]) tensor([0.4111, 0.1632, 0.2694, 0.1563]) -Greedy action tensor([ 0.9010, -0.3024, -0.1750, -0.2600]) tensor([0.5117, 0.1536, 0.1745, 0.1602]) -Greedy action tensor([ 0.8987, -0.8591, 0.2293, -0.6050]) tensor([0.5245, 0.0904, 0.2685, 0.1166]) -Greedy action tensor([ 1.0528, -0.5046, 0.0781, -0.3325]) tensor([0.5440, 0.1146, 0.2052, 0.1361]) -Greedy action tensor([ 0.7692, -0.4273, -0.0827, -0.2476]) tensor([0.4783, 0.1446, 0.2041, 0.1730]) -Greedy action tensor([ 1.2346, 0.1100, 0.1886, -0.1395]) tensor([0.5183, 0.1684, 0.1821, 0.1312]) -Greedy action tensor([ 1.0708, -0.1448, -0.0887, 0.2660]) tensor([0.4861, 0.1441, 0.1524, 0.2174]) -Greedy action tensor([ 1.2333, -0.3817, -0.6249, 0.2559]) tensor([0.5777, 0.1149, 0.0901, 0.2174]) -Greedy action tensor([ 1.0241, -0.1908, -0.7053, 0.2603]) tensor([0.5155, 0.1530, 0.0914, 0.2401]) -Greedy action tensor([ 2.0736, -0.2235, -0.9136, 0.1780]) tensor([0.7685, 0.0773, 0.0388, 0.1155]) -Greedy action tensor([ 0.7741, -0.3308, 0.0334, 0.5129]) tensor([0.3879, 0.1285, 0.1849, 0.2987]) -Greedy action tensor([ 1.4668, -0.3465, -0.6146, 0.3897]) tensor([0.6141, 0.1002, 0.0766, 0.2092]) -Greedy action tensor([ 1.9145, -1.0079, -0.4773, 0.5342]) tensor([0.7159, 0.0385, 0.0655, 0.1800]) -Greedy action tensor([ 1.4087, -0.5456, -0.0955, 0.4981]) tensor([0.5662, 0.0802, 0.1258, 0.2278]) -Greedy action tensor([ 1.7799, -0.5973, -0.5371, 0.5847]) tensor([0.6693, 0.0621, 0.0660, 0.2026]) -Greedy action tensor([ 1.5662, -0.4730, -0.9313, 0.5193]) tensor([0.6396, 0.0832, 0.0526, 0.2245]) -Greedy action tensor([ 1.2159, -0.6453, -0.0445, 0.2114]) tensor([0.5539, 0.0861, 0.1571, 0.2029]) -Greedy action tensor([ 1.6350, -0.8071, -0.0645, 0.5403]) tensor([0.6233, 0.0542, 0.1139, 0.2086]) -Greedy action tensor([ 1.6541, -0.8306, -0.5951, 0.3483]) tensor([0.6850, 0.0571, 0.0723, 0.1856]) -Greedy action tensor([ 1.2594, -0.4244, -0.3431, 0.5440]) tensor([0.5330, 0.0990, 0.1073, 0.2607]) -Greedy action tensor([ 2.0252, 0.2570, -0.0476, 0.3202]) tensor([0.6765, 0.1154, 0.0851, 0.1230]) -Greedy action tensor([ 1.9046, -0.9554, -0.3380, 0.3127]) tensor([0.7315, 0.0419, 0.0777, 0.1489]) -Greedy action tensor([ 1.1896, -0.1855, -0.3852, 0.5645]) tensor([0.5012, 0.1267, 0.1038, 0.2683]) -Greedy action tensor([ 1.9283, -0.3235, -0.9088, 0.2691]) tensor([0.7385, 0.0777, 0.0433, 0.1405]) -Greedy action tensor([ 1.2664, -0.5157, -0.1642, 0.3568]) tensor([0.5525, 0.0930, 0.1321, 0.2225]) -Greedy action tensor([ 1.3098, -0.5676, 0.0096, 0.2459]) tensor([0.5648, 0.0864, 0.1539, 0.1949]) -Greedy action tensor([ 0.6299, -0.4459, -0.0344, 0.0544]) tensor([0.4135, 0.1410, 0.2128, 0.2326]) -Greedy action tensor([ 2.2488, -1.3299, -0.2365, 0.8864]) tensor([0.7314, 0.0204, 0.0609, 0.1873]) -Greedy action tensor([ 1.7350, -0.6396, -0.3130, 0.3769]) tensor([0.6761, 0.0629, 0.0872, 0.1738]) -Greedy action tensor([ 1.1169, -0.5116, -0.1706, 0.6312]) tensor([0.4790, 0.0940, 0.1322, 0.2948]) -Greedy action tensor([ 1.5344, -0.5420, -0.4652, 0.4618]) tensor([0.6239, 0.0782, 0.0845, 0.2134]) -Greedy action tensor([ 1.1279, -0.3534, -0.2967, 0.1413]) tensor([0.5432, 0.1235, 0.1307, 0.2025]) -Greedy action tensor([ 1.8761, -1.3946, -0.0973, 0.5476]) tensor([0.6935, 0.0263, 0.0964, 0.1837]) -Greedy action tensor([ 1.2559, -0.3334, -0.6565, -0.0317]) tensor([0.6144, 0.1254, 0.0908, 0.1695]) -Greedy action tensor([ 1.3964, 0.0749, -1.0771, -0.0366]) tensor([0.6291, 0.1678, 0.0530, 0.1501]) -Greedy action tensor([ 1.7268, -0.8021, -0.4315, 0.8593]) tensor([0.6191, 0.0494, 0.0715, 0.2600]) -Greedy action tensor([ 1.1852, -0.2344, -0.4578, 0.1822]) tensor([0.5550, 0.1342, 0.1073, 0.2035]) -Greedy action tensor([ 1.8573, -1.0483, -0.5995, 0.6612]) tensor([0.6931, 0.0379, 0.0594, 0.2096]) -Greedy action tensor([ 1.3240, -0.0871, -0.5609, 0.1892]) tensor([0.5823, 0.1420, 0.0884, 0.1872]) -Greedy action tensor([ 2.0913, -0.9941, 0.1291, 0.5824]) tensor([0.7105, 0.0325, 0.0999, 0.1571]) -Greedy action tensor([ 1.1707, -0.0982, -0.9571, 0.3904]) tensor([0.5381, 0.1513, 0.0641, 0.2466]) -Greedy action tensor([ 1.4062, -0.7255, -0.4194, 0.1321]) tensor([0.6413, 0.0761, 0.1033, 0.1793]) -Greedy action tensor([ 1.6562, -0.2755, -0.2997, 0.4736]) tensor([0.6278, 0.0910, 0.0888, 0.1924]) -Greedy action tensor([ 1.6342, -0.3574, -0.5117, 0.7602]) tensor([0.5986, 0.0817, 0.0700, 0.2497]) -Greedy action tensor([ 1.2022, -0.2194, -0.6640, 0.2327]) tensor([0.5633, 0.1359, 0.0871, 0.2136]) -Greedy action tensor([ 1.4903, -1.0652, 0.0821, 0.4616]) tensor([0.5953, 0.0462, 0.1456, 0.2128]) -Greedy action tensor([ 1.4574, -0.4169, -0.3265, 0.6424]) tensor([0.5669, 0.0870, 0.0952, 0.2509]) -Greedy action tensor([ 0.9517, -0.7679, -0.1950, 0.1487]) tensor([0.5142, 0.0921, 0.1633, 0.2303]) -Greedy action tensor([ 1.5738, -0.8562, -0.2782, 0.1818]) tensor([0.6695, 0.0589, 0.1051, 0.1664]) -Greedy action tensor([ 1.9664, -0.4059, -1.0021, 1.1784]) tensor([0.6252, 0.0583, 0.0321, 0.2843]) -Greedy action tensor([ 1.4581, -0.3464, -0.6610, 0.1814]) tensor([0.6395, 0.1052, 0.0768, 0.1784]) -Greedy action tensor([ 1.6512, -0.5725, -0.3639, 0.2873]) tensor([0.6679, 0.0723, 0.0890, 0.1708]) -Greedy action tensor([ 1.5109, -0.2901, -0.3997, 0.7849]) tensor([0.5565, 0.0919, 0.0824, 0.2693]) -Greedy action tensor([ 1.9766, -0.8293, -0.4853, 0.6128]) tensor([0.7136, 0.0431, 0.0608, 0.1824]) -Greedy action tensor([ 1.8646, -1.4805, -0.2012, 0.3475]) tensor([0.7239, 0.0255, 0.0917, 0.1588]) -Greedy action tensor([ 1.6111, -0.4802, 0.1787, 0.4840]) tensor([0.5930, 0.0733, 0.1416, 0.1921]) -Greedy action tensor([ 2.1970, -1.0789, -0.0700, 0.2296]) tensor([0.7805, 0.0295, 0.0809, 0.1091]) -Greedy action tensor([ 1.0952, 0.1567, -0.6843, 0.1370]) tensor([0.5145, 0.2013, 0.0868, 0.1974]) -Greedy action tensor([ 1.4942, -0.6146, -0.3191, 0.1925]) tensor([0.6424, 0.0780, 0.1048, 0.1748]) -Greedy action tensor([ 1.6305, -0.7446, -0.4312, 0.4497]) tensor([0.6548, 0.0609, 0.0833, 0.2010]) -Greedy action tensor([ 2.0297, -1.3746, -0.1989, 0.2284]) tensor([0.7657, 0.0254, 0.0824, 0.1264]) -Greedy action tensor([ 1.0088, -0.3933, -0.7217, 0.0879]) tensor([0.5490, 0.1351, 0.0973, 0.2186]) -Greedy action tensor([ 1.9303, -0.2264, -0.6599, 0.5238]) tensor([0.6965, 0.0806, 0.0522, 0.1706]) -Greedy action tensor([ 1.7604, -0.9853, -0.0380, 0.6976]) tensor([0.6348, 0.0408, 0.1051, 0.2193]) -Greedy action tensor([ 1.1361, -0.0684, -0.1233, 0.0442]) tensor([0.5210, 0.1562, 0.1479, 0.1749]) -Greedy action tensor([ 1.5882, -0.7303, -0.8505, 0.3970]) tensor([0.6713, 0.0661, 0.0586, 0.2040]) -Greedy action tensor([ 1.8653, -0.8126, -0.0293, 0.5743]) tensor([0.6693, 0.0460, 0.1007, 0.1841]) -Greedy action tensor([ 1.1862, -0.2130, -0.1765, 0.2563]) tensor([0.5270, 0.1301, 0.1349, 0.2080]) -Greedy action tensor([ 1.5187, -0.4073, -0.3959, 0.2285]) tensor([0.6376, 0.0929, 0.0940, 0.1755]) -Greedy action tensor([ 1.1007, -0.0952, -0.4365, 0.3544]) tensor([0.5021, 0.1519, 0.1079, 0.2381]) -Greedy action tensor([ 1.8008, -1.4780, -0.2998, 0.2467]) tensor([0.7292, 0.0275, 0.0892, 0.1541]) -Greedy action tensor([ 2.0519, -0.8036, -0.6789, 0.7849]) tensor([0.7121, 0.0410, 0.0464, 0.2006]) -Greedy action tensor([ 1.5291, -0.8211, -0.1282, 0.1075]) tensor([0.6547, 0.0624, 0.1248, 0.1580]) -Greedy action tensor([ 1.0503, -0.6205, -0.5508, 0.4300]) tensor([0.5188, 0.0976, 0.1046, 0.2790]) -Greedy action tensor([ 1.4351, -0.2350, -0.5147, -0.1181]) tensor([0.6485, 0.1221, 0.0923, 0.1372]) -Greedy action tensor([ 1.7897, -0.7224, -0.7885, 0.4647]) tensor([0.7028, 0.0570, 0.0534, 0.1868]) -Greedy action tensor([ 1.8708, -0.5005, -1.1070, 0.6220]) tensor([0.6988, 0.0652, 0.0356, 0.2004]) -Greedy action tensor([ 1.8719, -0.4648, -0.9827, 0.0763]) tensor([0.7574, 0.0732, 0.0436, 0.1258]) -Greedy action tensor([ 1.7265, -0.9376, -0.1067, 0.6831]) tensor([0.6322, 0.0440, 0.1011, 0.2227]) -Greedy action tensor([ 1.0460, -0.3654, -0.5874, 0.2115]) tensor([0.5339, 0.1302, 0.1042, 0.2317]) -Greedy action tensor([ 1.4452, -0.6510, -0.2343, 0.3667]) tensor([0.6063, 0.0745, 0.1130, 0.2062]) -Greedy action tensor([ 1.4065, -0.9060, -0.3675, 0.3641]) tensor([0.6168, 0.0611, 0.1046, 0.2175]) -Greedy action tensor([ 0.6454, -0.2869, 0.0188, -0.1334]) tensor([0.4189, 0.1649, 0.2239, 0.1923]) -Greedy action tensor([ 1.4759, -0.2583, -0.8460, 0.1717]) tensor([0.6468, 0.1142, 0.0634, 0.1755]) -Greedy action tensor([ 1.6071, -0.8054, -0.2076, 0.3459]) tensor([0.6511, 0.0583, 0.1061, 0.1845]) -Greedy action tensor([ 1.3977, -0.2255, -0.5089, 0.1763]) tensor([0.6095, 0.1202, 0.0906, 0.1797]) -Greedy action tensor([ 0.4297, -1.4104, -0.3822, -0.0377]) tensor([0.4485, 0.0712, 0.1992, 0.2811]) -Greedy action tensor([ 1.5748, -1.3465, 0.0688, 0.4547]) tensor([0.6243, 0.0336, 0.1385, 0.2037]) -Greedy action tensor([ 0.4393, -1.2383, -0.0956, 0.7889]) tensor([0.3134, 0.0585, 0.1836, 0.4445]) -Greedy action tensor([ 0.5485, -0.2548, 0.0758, 0.1676]) tensor([0.3630, 0.1626, 0.2263, 0.2481]) -Greedy action tensor([ 0.5217, 0.5818, -0.5377, -0.5671]) tensor([0.3643, 0.3868, 0.1263, 0.1226]) -Greedy action tensor([ 0.3494, -0.7178, -0.0076, -0.3067]) tensor([0.3902, 0.1342, 0.2731, 0.2025]) -Greedy action tensor([ 0.9577, 0.6018, -0.0436, -0.0966]) tensor([0.4138, 0.2899, 0.1520, 0.1442]) -Greedy action tensor([ 0.6697, 0.3048, 0.5129, -0.0258]) tensor([0.3281, 0.2278, 0.2805, 0.1637]) -Greedy action tensor([0.6135, 0.4413, 0.1285, 0.0790]) tensor([0.3286, 0.2766, 0.2023, 0.1925]) -Greedy action tensor([ 0.0424, -1.1455, -0.0993, 0.1660]) tensor([0.3026, 0.0923, 0.2627, 0.3425]) -Greedy action tensor([-0.2061, -0.4265, 0.6267, -0.6461]) tensor([0.2107, 0.1690, 0.4846, 0.1357]) -Greedy action tensor([ 0.5322, -0.4663, 1.4518, 0.0458]) tensor([0.2226, 0.0820, 0.5584, 0.1369]) -Greedy action tensor([ 0.3619, -0.4623, 1.5552, -0.4529]) tensor([0.1931, 0.0847, 0.6368, 0.0855]) -Greedy action tensor([-0.3853, 0.6326, -0.8242, -0.2023]) tensor([0.1782, 0.4930, 0.1149, 0.2139]) -Greedy action tensor([-0.0015, -0.1956, -0.3800, 0.3433]) tensor([0.2551, 0.2101, 0.1747, 0.3601]) -Greedy action tensor([ 0.1648, 0.8199, -0.5246, -0.1027]) tensor([0.2385, 0.4592, 0.1197, 0.1825]) -Greedy action tensor([-0.3290, 0.4368, 0.4125, -0.0710]) tensor([0.1528, 0.3286, 0.3208, 0.1978]) -Greedy action tensor([ 0.1823, -0.2507, 0.7342, -0.2164]) tensor([0.2465, 0.1599, 0.4281, 0.1655]) -Greedy action tensor([ 0.8385, -0.5817, -0.3735, -0.3825]) tensor([0.5452, 0.1318, 0.1622, 0.1608]) -Greedy action tensor([ 0.2409, -1.3647, 0.2813, -0.2250]) tensor([0.3485, 0.0700, 0.3628, 0.2187]) -Greedy action tensor([ 0.7794, -0.2965, 0.4467, 1.0588]) tensor([0.2958, 0.1009, 0.2121, 0.3912]) -Greedy action tensor([ 0.3276, -0.2291, 0.8176, -0.6375]) tensor([0.2788, 0.1598, 0.4551, 0.1062]) -Greedy action tensor([ 1.0765, -0.0801, 0.0696, 0.4100]) tensor([0.4559, 0.1434, 0.1666, 0.2341]) -Greedy action tensor([ 0.0915, -0.6137, 0.7094, 0.0724]) tensor([0.2309, 0.1141, 0.4284, 0.2266]) -Greedy action tensor([ 0.2726, 0.3603, 0.1993, -0.5028]) tensor([0.2872, 0.3136, 0.2669, 0.1323]) -Greedy action tensor([-1.0072, -0.6532, 0.5399, -0.7266]) tensor([0.1184, 0.1687, 0.5562, 0.1567]) -Greedy action tensor([ 0.3852, -0.9143, 0.9328, -0.5259]) tensor([0.2938, 0.0801, 0.5080, 0.1181]) -Greedy action tensor([-0.1660, -0.0623, -0.4906, -0.1139]) tensor([0.2574, 0.2855, 0.1860, 0.2711]) -Greedy action tensor([-1.1837, -0.6198, 0.8770, -0.0750]) tensor([0.0733, 0.1289, 0.5757, 0.2222]) -Greedy action tensor([ 0.2251, 0.2899, -0.1835, -0.3988]) tensor([0.3061, 0.3266, 0.2034, 0.1640]) -Greedy action tensor([ 0.7137, -0.4921, 0.0994, 0.5864]) tensor([0.3675, 0.1100, 0.1988, 0.3236]) -Greedy action tensor([ 1.3395, -0.3292, -0.2214, 0.1437]) tensor([0.5879, 0.1108, 0.1234, 0.1778]) -Greedy action tensor([ 0.1892, -1.0715, 0.0659, -0.4014]) tensor([0.3675, 0.1042, 0.3248, 0.2036]) -Greedy action tensor([-1.5535, -0.0889, -0.7368, -0.0609]) tensor([0.0831, 0.3594, 0.1880, 0.3696]) -Greedy action tensor([ 0.8463, -0.3173, 1.2737, -0.1574]) tensor([0.3113, 0.0972, 0.4773, 0.1141]) -Greedy action tensor([ 0.2459, -1.7959, 0.8017, 0.6346]) tensor([0.2300, 0.0298, 0.4009, 0.3392]) -Greedy action tensor([-0.5689, -0.1120, -0.8502, -0.0970]) tensor([0.2025, 0.3199, 0.1529, 0.3247]) -Greedy action tensor([ 0.0795, -0.7616, -0.5691, -0.2053]) tensor([0.3695, 0.1593, 0.1932, 0.2779]) -Greedy action tensor([-0.1977, 0.0667, 0.0536, -0.8824]) tensor([0.2443, 0.3183, 0.3142, 0.1232]) -Greedy action tensor([ 0.9491, 0.3566, -0.4731, -0.2038]) tensor([0.4740, 0.2621, 0.1143, 0.1496]) -Greedy action tensor([-0.2802, -0.1573, 0.4616, -1.2855]) tensor([0.2176, 0.2460, 0.4568, 0.0796]) -Greedy action tensor([ 0.4847, -0.4536, 0.5371, 0.4158]) tensor([0.2960, 0.1158, 0.3119, 0.2763]) -Greedy action tensor([ 1.3752, -0.2410, 0.1224, 0.0956]) tensor([0.5674, 0.1127, 0.1621, 0.1578]) -Greedy action tensor([-0.1013, -0.5461, 0.1031, -0.8391]) tensor([0.2989, 0.1916, 0.3666, 0.1429]) -Greedy action tensor([ 0.0698, -1.0563, 0.5949, -0.2380]) tensor([0.2667, 0.0865, 0.4508, 0.1960]) -Greedy action tensor([ 0.0057, -1.4771, -0.8212, 0.7499]) tensor([0.2653, 0.0602, 0.1160, 0.5584]) -Greedy action tensor([-0.1439, -0.7986, 0.0641, -0.2177]) tensor([0.2718, 0.1412, 0.3346, 0.2524]) -Greedy action tensor([ 0.7260, -0.1678, -0.9541, 0.4318]) tensor([0.4272, 0.1748, 0.0796, 0.3184]) -Greedy action tensor([ 0.6088, -0.6244, -0.2081, -0.3282]) tensor([0.4706, 0.1371, 0.2079, 0.1844]) -Greedy action tensor([ 0.0627, -0.4947, -0.6273, -0.1115]) tensor([0.3431, 0.1965, 0.1721, 0.2883]) -Greedy action tensor([ 0.6344, -1.2030, 0.0883, 0.1229]) tensor([0.4277, 0.0681, 0.2477, 0.2565]) -Greedy action tensor([-0.4045, -1.2861, -0.6185, 0.6857]) tensor([0.1924, 0.0797, 0.1554, 0.5725]) -Greedy action tensor([ 0.5607, -0.4787, -0.1467, 0.5645]) tensor([0.3508, 0.1241, 0.1729, 0.3522]) -Greedy action tensor([-0.2807, -0.0034, 0.5922, -1.5381]) tensor([0.2001, 0.2640, 0.4790, 0.0569]) -Greedy action tensor([-0.5130, -0.0711, 0.1987, -0.4672]) tensor([0.1773, 0.2758, 0.3613, 0.1856]) -Greedy action tensor([ 0.2215, -0.2178, -0.5288, -0.9887]) tensor([0.4141, 0.2669, 0.1955, 0.1235]) -Greedy action tensor([-0.3809, -1.2335, 0.4519, -0.1028]) tensor([0.1982, 0.0845, 0.4557, 0.2617]) -Greedy action tensor([ 1.6156, -0.8114, -0.4026, 1.4761]) tensor([0.4782, 0.0422, 0.0636, 0.4160]) -Greedy action tensor([ 0.1457, -0.2964, 0.3670, 1.3797]) tensor([0.1581, 0.1016, 0.1973, 0.5431]) -Greedy action tensor([0.7921, 0.4391, 0.2619, 0.1690]) tensor([0.3537, 0.2485, 0.2081, 0.1897]) -Greedy action tensor([-0.5638, 0.6828, 0.0492, -0.8574]) tensor([0.1414, 0.4920, 0.2611, 0.1055]) -Greedy action tensor([-0.5300, -1.3621, 0.1712, -0.4139]) tensor([0.2186, 0.0951, 0.4407, 0.2455]) -Greedy action tensor([ 1.0829, -1.3137, -0.0462, -0.1154]) tensor([0.5827, 0.0530, 0.1884, 0.1758]) -Greedy action tensor([ 0.5019, -0.0314, 0.2141, 0.0602]) tensor([0.3356, 0.1969, 0.2517, 0.2158]) -Greedy action tensor([ 0.7417, -0.7935, 0.0040, 0.0501]) tensor([0.4557, 0.0982, 0.2179, 0.2282]) -Greedy action tensor([ 0.7645, 0.2300, -0.4017, -0.1743]) tensor([0.4370, 0.2560, 0.1361, 0.1709]) -Greedy action tensor([ 1.2805, -1.0100, -0.7432, 0.6036]) tensor([0.5742, 0.0581, 0.0759, 0.2918]) -Greedy action tensor([ 0.7449, -0.1208, 0.0528, 0.5128]) tensor([0.3684, 0.1550, 0.1844, 0.2921]) -Greedy action tensor([ 1.3355, -0.8868, 0.8570, 0.7663]) tensor([0.4359, 0.0472, 0.2702, 0.2467]) -Greedy action tensor([ 0.2697, -0.0798, 1.1780, -0.1663]) tensor([0.2070, 0.1459, 0.5133, 0.1338]) -Greedy action tensor([ 0.1745, -0.5993, 0.3039, -0.4255]) tensor([0.3176, 0.1465, 0.3615, 0.1743]) -Greedy action tensor([ 0.3718, -0.5890, -0.3162, -0.4433]) tensor([0.4296, 0.1644, 0.2159, 0.1901]) -Greedy action tensor([-0.4243, -1.0683, -0.0328, -0.7496]) tensor([0.2683, 0.1409, 0.3969, 0.1938]) -Greedy action tensor([ 0.5996, -0.0511, 0.9173, -0.1927]) tensor([0.2987, 0.1558, 0.4103, 0.1352]) -Greedy action tensor([ 0.7966, 0.5681, 0.0225, -0.5629]) tensor([0.3978, 0.3166, 0.1834, 0.1022]) -Greedy action tensor([0.5626, 0.5523, 0.3013, 0.3951]) tensor([0.2773, 0.2745, 0.2136, 0.2346]) -Greedy action tensor([ 0.0167, -1.4060, 0.0734, 0.6004]) tensor([0.2444, 0.0589, 0.2586, 0.4381]) -Greedy action tensor([ 0.1985, 0.9882, -1.0086, 0.8508]) tensor([0.1844, 0.4063, 0.0552, 0.3541]) -Greedy action tensor([-0.4557, -0.6223, 0.2774, 0.7869]) tensor([0.1353, 0.1145, 0.2816, 0.4687]) -Greedy action tensor([0.0869, 0.1234, 0.3536, 0.2598]) tensor([0.2207, 0.2289, 0.2881, 0.2623]) -Greedy action tensor([ 1.5016, -0.4426, 0.5787, 1.5604]) tensor([0.3845, 0.0550, 0.1528, 0.4077]) -Greedy action tensor([-0.5168, -0.5212, -1.1421, 0.1393]) tensor([0.2243, 0.2233, 0.1200, 0.4323]) -Greedy action tensor([-1.0599, 0.2964, 0.6602, 0.7385]) tensor([0.0606, 0.2352, 0.3383, 0.3659]) -Greedy action tensor([-1.8642, -0.4852, 0.6600, -0.1076]) tensor([0.0430, 0.1708, 0.5369, 0.2492]) -Greedy action tensor([-0.2926, 1.0381, 0.0648, 0.0188]) tensor([0.1319, 0.4993, 0.1886, 0.1802]) -Greedy action tensor([-1.2138, 0.6497, -0.5804, -0.7060]) tensor([0.0910, 0.5864, 0.1714, 0.1512]) -Greedy action tensor([-1.9205, -0.4544, 0.6596, -0.1667]) tensor([0.0411, 0.1782, 0.5430, 0.2376]) -Greedy action tensor([-1.9171, -0.4537, 0.6783, -0.1662]) tensor([0.0408, 0.1765, 0.5474, 0.2353]) -Greedy action tensor([-0.9508, -0.0219, 0.1564, 0.0042]) tensor([0.1092, 0.2765, 0.3305, 0.2838]) -Greedy action tensor([-1.9399, -0.4472, 0.6651, -0.1779]) tensor([0.0403, 0.1794, 0.5455, 0.2348]) -Greedy action tensor([-1.4116, -0.0856, 0.4373, -0.5888]) tensor([0.0747, 0.2811, 0.4742, 0.1700]) -Greedy action tensor([-1.6233, -0.5118, 0.5116, 0.0227]) tensor([0.0566, 0.1719, 0.4783, 0.2933]) -Greedy action tensor([-1.9438, -0.4490, 0.6675, -0.1794]) tensor([0.0401, 0.1789, 0.5466, 0.2343]) -Greedy action tensor([-1.4691, -0.4742, 0.6210, 0.3652]) tensor([0.0554, 0.1498, 0.4479, 0.3468]) -Greedy action tensor([-1.9216, -0.4336, 0.6592, -0.1682]) tensor([0.0410, 0.1814, 0.5411, 0.2366]) -Greedy action tensor([-1.8933, -0.4514, 0.6448, -0.1581]) tensor([0.0425, 0.1795, 0.5373, 0.2407]) -Greedy action tensor([-1.0476, 0.8810, 0.2171, 0.1176]) tensor([0.0684, 0.4703, 0.2421, 0.2192]) -Greedy action tensor([-1.9237, -0.4199, 0.6505, -0.1687]) tensor([0.0410, 0.1843, 0.5377, 0.2370]) -Greedy action tensor([-0.7495, 0.5174, 0.2486, 0.3484]) tensor([0.0975, 0.3459, 0.2644, 0.2922]) -Greedy action tensor([-1.8036, -0.0316, 0.5567, -0.0759]) tensor([0.0433, 0.2546, 0.4585, 0.2436]) -Greedy action tensor([-1.9076, -0.4310, 0.6491, -0.1586]) tensor([0.0416, 0.1823, 0.5368, 0.2393]) -Greedy action tensor([-1.5294, -0.2808, 0.5218, 0.1447]) tensor([0.0568, 0.1981, 0.4420, 0.3031]) -Greedy action tensor([-1.8740, -0.4565, 0.6348, -0.1486]) tensor([0.0434, 0.1792, 0.5336, 0.2438]) -Greedy action tensor([-1.9299, -0.4488, 0.6632, -0.1714]) tensor([0.0407, 0.1790, 0.5442, 0.2362]) -Greedy action tensor([-1.2255, 0.7333, 0.2606, 0.3708]) tensor([0.0573, 0.4065, 0.2534, 0.2829]) -Greedy action tensor([-1.6403, -0.4008, 0.6102, 0.0945]) tensor([0.0510, 0.1761, 0.4840, 0.2889]) -Greedy action tensor([-0.4216, 0.9918, 0.0818, 0.0294]) tensor([0.1200, 0.4931, 0.1985, 0.1884]) -Greedy action tensor([-1.8843, -0.4434, 0.6399, -0.1508]) tensor([0.0428, 0.1808, 0.5342, 0.2423]) -Greedy action tensor([-1.7413, 0.1781, 0.4728, -0.0807]) tensor([0.0450, 0.3066, 0.4117, 0.2367]) -Greedy action tensor([-1.6607, -0.3692, 0.6211, -0.0318]) tensor([0.0512, 0.1863, 0.5015, 0.2610]) -Greedy action tensor([-1.7868, 0.0706, 0.5226, -0.1303]) tensor([0.0440, 0.2821, 0.4432, 0.2307]) -Greedy action tensor([-1.7457, -0.4836, 0.5767, -0.1293]) tensor([0.0506, 0.1787, 0.5160, 0.2547]) -Greedy action tensor([-1.5701, -0.1249, 0.4341, -0.0329]) tensor([0.0578, 0.2450, 0.4286, 0.2686]) -Greedy action tensor([-1.7262, -0.5031, 0.5576, -0.0769]) tensor([0.0515, 0.1750, 0.5055, 0.2680]) -Greedy action tensor([-1.9217, -0.4284, 0.6640, -0.1592]) tensor([0.0407, 0.1813, 0.5406, 0.2373]) -Greedy action tensor([-1.8403, -0.3270, 0.6086, -0.1087]) tensor([0.0439, 0.1995, 0.5084, 0.2482]) -Greedy action tensor([ 0.3131, 0.8857, -0.1298, 0.0787]) tensor([0.2378, 0.4215, 0.1527, 0.1881]) -Greedy action tensor([-1.0343, 0.3512, 0.2209, -0.0174]) tensor([0.0887, 0.3547, 0.3113, 0.2453]) -Greedy action tensor([-0.0224, 1.1116, 0.0083, 0.3276]) tensor([0.1525, 0.4739, 0.1572, 0.2164]) -Greedy action tensor([-0.9745, 0.1840, 0.0141, -0.0064]) tensor([0.1052, 0.3351, 0.2827, 0.2770]) -Greedy action tensor([-1.7416, -0.3753, 0.5523, -0.0971]) tensor([0.0500, 0.1959, 0.4954, 0.2588]) -Greedy action tensor([-1.9205, -0.4461, 0.6567, -0.1660]) tensor([0.0411, 0.1797, 0.5414, 0.2378]) -Greedy action tensor([-0.8061, -0.3350, 0.3809, 0.6265]) tensor([0.0993, 0.1591, 0.3255, 0.4161]) -Greedy action tensor([-1.7241, -0.3722, 0.5795, -0.0229]) tensor([0.0491, 0.1899, 0.4918, 0.2692]) -Greedy action tensor([-1.6084, -0.1672, 0.5479, 0.0366]) tensor([0.0525, 0.2219, 0.4536, 0.2720]) -Greedy action tensor([-1.9285, -0.4243, 0.6585, -0.1716]) tensor([0.0407, 0.1831, 0.5406, 0.2357]) -Greedy action tensor([-0.8270, 0.1165, 0.5376, 0.4074]) tensor([0.0916, 0.2353, 0.3585, 0.3147]) -Greedy action tensor([-1.7065, -0.4767, 0.7368, -0.0812]) tensor([0.0476, 0.1628, 0.5478, 0.2418]) -Greedy action tensor([-1.4287, 0.2206, 0.4101, 0.1504]) tensor([0.0577, 0.3000, 0.3626, 0.2797]) -Greedy action tensor([-1.8166, -0.2209, 0.5556, -0.1011]) tensor([0.0450, 0.2220, 0.4827, 0.2503]) -Greedy action tensor([-1.9149, -0.4261, 0.6545, -0.1664]) tensor([0.0413, 0.1829, 0.5388, 0.2371]) -Greedy action tensor([-1.7227, 0.0936, 0.4701, -0.0089]) tensor([0.0462, 0.2839, 0.4137, 0.2562]) -Greedy action tensor([-1.6928, -0.3649, 0.6478, 0.0323]) tensor([0.0481, 0.1816, 0.5000, 0.2702]) -Greedy action tensor([-1.6938, -0.4655, 0.5479, -0.0408]) tensor([0.0525, 0.1793, 0.4940, 0.2742]) -Greedy action tensor([-1.8068, -0.4485, 0.6068, -0.1072]) tensor([0.0464, 0.1806, 0.5189, 0.2541]) -Greedy action tensor([-1.6383, -0.4438, 0.4675, 0.0121]) tensor([0.0564, 0.1863, 0.4634, 0.2939]) -Greedy action tensor([-1.8626, -0.2905, 0.6194, -0.1292]) tensor([0.0427, 0.2055, 0.5104, 0.2414]) -Greedy action tensor([-1.8860, -0.4338, 0.6423, -0.1480]) tensor([0.0426, 0.1819, 0.5335, 0.2420]) -Greedy action tensor([-1.9420, -0.4484, 0.6669, -0.1789]) tensor([0.0402, 0.1791, 0.5463, 0.2345]) -Greedy action tensor([-1.9300, -0.4169, 0.6588, -0.1644]) tensor([0.0405, 0.1838, 0.5390, 0.2366]) -Greedy action tensor([-1.7311, -0.5016, 0.5558, -0.0812]) tensor([0.0514, 0.1756, 0.5056, 0.2674]) -Greedy action tensor([-1.9484, -0.4520, 0.6688, -0.1828]) tensor([0.0400, 0.1786, 0.5477, 0.2337]) -Greedy action tensor([-1.9133, -0.3320, 0.6322, -0.1635]) tensor([0.0410, 0.1995, 0.5233, 0.2362]) -Greedy action tensor([-1.8653, -0.3747, 0.6482, -0.1138]) tensor([0.0425, 0.1885, 0.5243, 0.2447]) -Greedy action tensor([-1.3352, 0.3822, 0.3052, -0.0470]) tensor([0.0651, 0.3628, 0.3359, 0.2362]) -Greedy action tensor([-1.7768, -0.4572, 0.6250, -0.0413]) tensor([0.0466, 0.1744, 0.5147, 0.2643]) -Greedy action tensor([-1.8631, -0.2324, 0.6042, -0.1428]) tensor([0.0426, 0.2175, 0.5021, 0.2379]) -Greedy action tensor([-1.7934, 0.0136, 0.5360, -0.0120]) tensor([0.0429, 0.2614, 0.4408, 0.2548]) -Greedy action tensor([-1.7576, 0.1587, 0.5108, -0.0806]) tensor([0.0438, 0.2979, 0.4237, 0.2345]) -Greedy action tensor([-1.6931, -0.2373, 0.5421, 0.0025]) tensor([0.0498, 0.2135, 0.4654, 0.2713]) -Greedy action tensor([-1.9220, -0.4232, 0.6606, -0.1624]) tensor([0.0408, 0.1826, 0.5397, 0.2370]) -Greedy action tensor([-1.3430, -0.5990, 0.9190, 0.7925]) tensor([0.0472, 0.0994, 0.4536, 0.3997]) -Greedy action tensor([-1.7867, -0.4542, 0.6281, -0.0386]) tensor([0.0460, 0.1745, 0.5151, 0.2644]) -Greedy action tensor([-1.7814, -0.1100, 0.5371, -0.0706]) tensor([0.0454, 0.2417, 0.4616, 0.2514]) -Greedy action tensor([-1.6765, -0.2790, 0.5277, -0.0366]) tensor([0.0519, 0.2100, 0.4705, 0.2676]) -Greedy action tensor([-1.9412, -0.4488, 0.6663, -0.1779]) tensor([0.0403, 0.1790, 0.5460, 0.2347]) -Greedy action tensor([-1.7577, -0.2667, 0.6267, -0.0439]) tensor([0.0458, 0.2033, 0.4968, 0.2541]) -Greedy action tensor([-1.7601, -0.4049, 0.5584, -0.0776]) tensor([0.0490, 0.1899, 0.4977, 0.2634]) -Greedy action tensor([-1.6011e+00, -4.1465e-01, 5.0667e-01, 9.4056e-05]) tensor([0.0573, 0.1876, 0.4712, 0.2839]) -Greedy action tensor([-1.8658, -0.4642, 0.6354, -0.1441]) tensor([0.0438, 0.1777, 0.5337, 0.2448]) -Greedy action tensor([-1.9349, -0.4393, 0.6632, -0.1750]) tensor([0.0405, 0.1806, 0.5438, 0.2352]) -Greedy action tensor([-1.3587, -0.2229, 0.4165, -0.1099]) tensor([0.0741, 0.2306, 0.4371, 0.2582]) -Greedy action tensor([-1.8664, -0.4428, 0.6262, -0.1518]) tensor([0.0439, 0.1821, 0.5304, 0.2436]) -Greedy action tensor([ 0.8010, -0.3738, -0.1118, -0.3642]) tensor([0.4945, 0.1527, 0.1985, 0.1542]) -Greedy action tensor([ 0.2161, 0.0989, -0.0964, -0.1585]) tensor([0.3022, 0.2688, 0.2211, 0.2078]) -Greedy action tensor([ 0.3820, -0.0906, -0.1007, 0.0033]) tensor([0.3419, 0.2131, 0.2110, 0.2341]) -Greedy action tensor([ 0.9139, -0.4480, -0.0655, -0.3155]) tensor([0.5197, 0.1331, 0.1952, 0.1520]) -Greedy action tensor([ 0.6048, -0.4845, -0.0340, -0.2523]) tensor([0.4369, 0.1470, 0.2307, 0.1854]) -Greedy action tensor([ 0.9226, -0.4904, -0.2514, -0.4033]) tensor([0.5500, 0.1339, 0.1700, 0.1461]) -Greedy action tensor([ 1.0309, -0.5792, 0.0013, -0.1995]) tensor([0.5408, 0.1081, 0.1931, 0.1580]) -Greedy action tensor([ 1.0779, -0.7203, -0.0509, -0.5298]) tensor([0.5919, 0.0980, 0.1914, 0.1186]) -Greedy action tensor([ 0.4596, 0.0122, -0.0816, -0.1945]) tensor([0.3648, 0.2332, 0.2123, 0.1897]) -Greedy action tensor([ 0.9905, -0.8320, 0.0567, -0.5533]) tensor([0.5655, 0.0914, 0.2223, 0.1208]) -Greedy action tensor([ 0.8661, -0.5330, 0.0747, -0.3898]) tensor([0.5038, 0.1244, 0.2283, 0.1435]) -Greedy action tensor([ 0.5161, -0.3403, -0.0803, -0.0233]) tensor([0.3908, 0.1660, 0.2153, 0.2279]) -Greedy action tensor([ 0.6205, -0.5619, -0.1467, -0.1100]) tensor([0.4440, 0.1361, 0.2061, 0.2138]) -Greedy action tensor([ 0.9700, -0.4058, -0.0228, -0.3091]) tensor([0.5259, 0.1329, 0.1949, 0.1464]) -Greedy action tensor([ 0.5586, -0.2572, -0.0455, -0.3245]) tensor([0.4163, 0.1841, 0.2275, 0.1721]) -Greedy action tensor([ 0.7773, -0.6675, -0.1158, -0.3247]) tensor([0.5057, 0.1192, 0.2070, 0.1680]) -Greedy action tensor([ 1.0602, -0.9837, 0.1269, -0.5015]) tensor([0.5772, 0.0748, 0.2270, 0.1211]) -Greedy action tensor([ 0.6903, 0.1566, -0.0668, 0.1118]) tensor([0.3822, 0.2241, 0.1793, 0.2143]) -Greedy action tensor([ 0.8811, -0.6510, 0.0337, -0.4563]) tensor([0.5243, 0.1133, 0.2247, 0.1377]) -Greedy action tensor([ 1.0487, -0.5040, -0.2134, -0.3203]) tensor([0.5717, 0.1210, 0.1618, 0.1454]) -Greedy action tensor([ 0.9168, -0.4922, 0.0468, -0.4208]) tensor([0.5193, 0.1269, 0.2175, 0.1363]) -Greedy action tensor([ 0.9776, -0.6986, 0.1851, -0.5049]) tensor([0.5357, 0.1002, 0.2425, 0.1216]) -Greedy action tensor([ 0.9185, -0.4780, -0.0390, -0.2614]) tensor([0.5158, 0.1277, 0.1980, 0.1585]) -Greedy action tensor([ 0.8875, -0.9020, 0.0486, -0.5071]) tensor([0.5414, 0.0904, 0.2340, 0.1342]) -Greedy action tensor([ 0.8355, -0.5174, -0.0639, -0.1939]) tensor([0.4944, 0.1278, 0.2011, 0.1766]) -Greedy action tensor([ 0.5273, -0.1324, 0.0371, -0.1461]) tensor([0.3789, 0.1959, 0.2321, 0.1932]) -Greedy action tensor([ 1.0984, -0.6135, 0.0478, -0.5567]) tensor([0.5810, 0.1049, 0.2032, 0.1110]) -Greedy action tensor([ 0.5474, 0.2434, -0.1172, -0.0054]) tensor([0.3536, 0.2609, 0.1819, 0.2035]) -Greedy action tensor([ 0.6353, -0.4092, -0.0301, -0.3104]) tensor([0.4436, 0.1561, 0.2280, 0.1723]) -Greedy action tensor([ 1.1160, -0.9729, 0.1429, -0.4611]) tensor([0.5854, 0.0725, 0.2212, 0.1209]) -Greedy action tensor([ 0.7869, -0.4724, -0.1328, -0.2031]) tensor([0.4869, 0.1382, 0.1941, 0.1809]) -Greedy action tensor([ 0.5868, -0.2107, -0.0307, -0.1209]) tensor([0.4028, 0.1815, 0.2172, 0.1985]) -Greedy action tensor([ 0.6408, -0.1051, -0.0351, -0.0657]) tensor([0.4038, 0.1915, 0.2054, 0.1992]) -Greedy action tensor([ 1.1226, 0.2383, -0.0536, 0.1088]) tensor([0.4798, 0.1981, 0.1480, 0.1741]) -Greedy action tensor([ 0.5710, -0.1623, -0.0703, -0.0293]) tensor([0.3913, 0.1880, 0.2061, 0.2147]) -Greedy action tensor([ 0.5548, 0.0612, -0.0440, -0.0348]) tensor([0.3684, 0.2249, 0.2024, 0.2043]) -Greedy action tensor([ 0.6934, -0.0756, -0.0292, -0.0024]) tensor([0.4086, 0.1893, 0.1983, 0.2037]) -Greedy action tensor([ 0.6215, -0.3082, -0.1288, -0.2232]) tensor([0.4354, 0.1718, 0.2056, 0.1871]) -Greedy action tensor([ 0.8853, -0.8133, 0.0408, -0.4991]) tensor([0.5367, 0.0982, 0.2307, 0.1344]) -Greedy action tensor([ 0.6002, -0.4934, -0.0227, -0.2379]) tensor([0.4340, 0.1454, 0.2328, 0.1877]) -Greedy action tensor([ 0.6428, -0.3434, -0.0698, -0.2993]) tensor([0.4438, 0.1655, 0.2176, 0.1730]) -Greedy action tensor([ 0.4794, -0.4633, -0.2529, -0.1306]) tensor([0.4143, 0.1614, 0.1992, 0.2251]) -Greedy action tensor([ 0.8296, -0.1655, -0.1718, -0.0399]) tensor([0.4638, 0.1715, 0.1704, 0.1944]) -Greedy action tensor([ 1.0007, -0.3759, -0.0993, -0.2348]) tensor([0.5330, 0.1346, 0.1774, 0.1550]) -Greedy action tensor([ 0.6958, -0.5368, 0.0826, -0.1440]) tensor([0.4415, 0.1287, 0.2391, 0.1906]) -Greedy action tensor([ 0.6722, -0.1942, -0.0176, -0.0567]) tensor([0.4159, 0.1749, 0.2086, 0.2006]) -Greedy action tensor([ 0.8068, -0.8500, -0.0297, -0.3376]) tensor([0.5148, 0.0982, 0.2230, 0.1639]) -Greedy action tensor([ 0.4227, -0.2824, -0.0733, -0.1747]) tensor([0.3769, 0.1862, 0.2295, 0.2074]) -Greedy action tensor([ 0.8835, -0.5032, -0.0364, -0.2347]) tensor([0.5062, 0.1265, 0.2018, 0.1655]) -Greedy action tensor([ 0.8481, -0.5688, -0.0730, -0.2221]) tensor([0.5042, 0.1222, 0.2007, 0.1729]) -Greedy action tensor([ 0.6413, -0.1506, -0.1093, -0.1275]) tensor([0.4187, 0.1896, 0.1976, 0.1941]) -Greedy action tensor([ 0.8948, -0.5343, -0.1839, -0.3839]) tensor([0.5382, 0.1289, 0.1830, 0.1498]) -Greedy action tensor([ 0.7098, -0.2759, -0.0824, -0.1931]) tensor([0.4482, 0.1672, 0.2029, 0.1817]) -Greedy action tensor([ 0.7734, -0.4985, 0.0859, -0.4710]) tensor([0.4828, 0.1353, 0.2428, 0.1391]) -Greedy action tensor([ 0.7733, -0.3342, -0.0832, -0.1463]) tensor([0.4643, 0.1534, 0.1972, 0.1851]) -Greedy action tensor([ 1.0607, -0.9902, 0.1243, -0.5288]) tensor([0.5798, 0.0746, 0.2273, 0.1183]) -Greedy action tensor([ 0.2343, -0.2438, -0.0897, -0.2685]) tensor([0.3392, 0.2103, 0.2453, 0.2052]) -Greedy action tensor([ 0.8493, -0.7395, 0.0670, -0.3803]) tensor([0.5118, 0.1045, 0.2341, 0.1497]) -Greedy action tensor([ 0.6006, -0.3656, -0.0965, -0.1300]) tensor([0.4237, 0.1612, 0.2110, 0.2041]) -Greedy action tensor([ 0.5247, -0.2945, 0.0421, -0.4400]) tensor([0.4100, 0.1807, 0.2530, 0.1562]) -Greedy action tensor([ 0.7598, 0.0852, -0.0897, 0.0837]) tensor([0.4089, 0.2083, 0.1749, 0.2080]) -Greedy action tensor([ 0.7939, -0.1497, 0.0414, -0.0233]) tensor([0.4344, 0.1691, 0.2047, 0.1919]) -Greedy action tensor([ 0.6703, -0.3336, -0.1121, -0.2885]) tensor([0.4531, 0.1660, 0.2072, 0.1737]) -Greedy action tensor([0.1005, 0.9107, 0.1056, 0.5290]) tensor([0.1728, 0.3884, 0.1736, 0.2652]) -Greedy action tensor([ 0.6386, -0.5481, 0.0332, -0.3599]) tensor([0.4506, 0.1375, 0.2459, 0.1660]) -Greedy action tensor([ 0.8131, -0.5113, -0.0653, -0.4880]) tensor([0.5119, 0.1361, 0.2126, 0.1393]) -Greedy action tensor([ 0.7898, -0.4603, -0.0963, -0.2082]) tensor([0.4837, 0.1386, 0.1994, 0.1783]) -Greedy action tensor([ 0.4280, -0.1912, 0.0278, -0.2281]) tensor([0.3667, 0.1974, 0.2457, 0.1902]) -Greedy action tensor([ 0.4048, 0.0913, -0.0865, -0.4367]) tensor([0.3605, 0.2635, 0.2206, 0.1554]) -Greedy action tensor([ 0.8808, -0.4701, 0.0195, -0.5900]) tensor([0.5232, 0.1355, 0.2211, 0.1202]) -Greedy action tensor([ 1.3597, -1.0785, 0.0331, -0.8042]) tensor([0.6814, 0.0595, 0.1808, 0.0783]) -Greedy action tensor([ 0.3041, -0.0463, -0.0812, -0.1498]) tensor([0.3311, 0.2333, 0.2253, 0.2103]) -Greedy action tensor([ 0.6315, -0.4190, 0.0020, -0.2148]) tensor([0.4326, 0.1513, 0.2305, 0.1856]) -Greedy action tensor([ 0.5474, -0.4622, -0.1393, -0.0649]) tensor([0.4150, 0.1512, 0.2088, 0.2250]) -Greedy action tensor([ 0.6346, -0.3350, -0.0532, -0.4871]) tensor([0.4530, 0.1718, 0.2277, 0.1475]) -Greedy action tensor([ 0.9660, -0.4783, -0.0291, -0.2853]) tensor([0.5286, 0.1247, 0.1954, 0.1512]) -Greedy action tensor([ 0.9011, -0.4308, -0.0190, -0.4378]) tensor([0.5196, 0.1372, 0.2070, 0.1362]) -Greedy action tensor([ 0.9632, -0.4307, -0.1421, -0.4662]) tensor([0.5499, 0.1364, 0.1821, 0.1317]) -Greedy action tensor([ 1.0054, -0.5757, -0.0715, -0.6498]) tensor([0.5756, 0.1184, 0.1961, 0.1100]) -Greedy action tensor([ 1.4179, -0.8843, -0.1142, -0.4587]) tensor([0.6806, 0.0681, 0.1471, 0.1042]) -Greedy action tensor([ 0.6896, -0.0927, -0.0251, -0.2790]) tensor([0.4299, 0.1966, 0.2103, 0.1632]) -Greedy action tensor([ 1.6409, 0.2348, -0.1097, 0.1301]) tensor([0.6099, 0.1495, 0.1059, 0.1346]) -Greedy action tensor([ 1.3879, -0.2975, -0.4057, 0.3938]) tensor([0.5808, 0.1077, 0.0966, 0.2149]) -Greedy action tensor([ 1.1673, -0.6454, -0.3519, 0.4007]) tensor([0.5415, 0.0884, 0.1185, 0.2516]) -Greedy action tensor([ 1.3327, -0.4956, -0.2731, 0.3779]) tensor([0.5726, 0.0920, 0.1149, 0.2204]) -Greedy action tensor([ 1.7218, -0.7828, -0.9106, -0.1391]) tensor([0.7638, 0.0624, 0.0549, 0.1188]) -Greedy action tensor([ 1.2428, -0.1389, -0.7215, 0.0980]) tensor([0.5849, 0.1469, 0.0820, 0.1862]) -Greedy action tensor([ 1.4998, -0.0473, -0.2277, 0.1184]) tensor([0.6091, 0.1296, 0.1083, 0.1530]) -Greedy action tensor([ 1.9493, -1.1396, -0.5789, 0.3395]) tensor([0.7545, 0.0344, 0.0602, 0.1509]) -Greedy action tensor([ 1.2948, -0.3798, -0.5883, 0.6604]) tensor([0.5348, 0.1002, 0.0814, 0.2836]) -Greedy action tensor([ 1.2737, -0.4963, -0.0282, 0.0133]) tensor([0.5794, 0.0987, 0.1576, 0.1643]) -Greedy action tensor([ 1.5075, -0.6031, -0.2482, 0.2864]) tensor([0.6294, 0.0763, 0.1087, 0.1856]) -Greedy action tensor([ 1.8801, -0.7851, -0.1729, 0.7171]) tensor([0.6620, 0.0461, 0.0850, 0.2069]) -Greedy action tensor([ 2.2718, -0.9838, -0.1452, 0.6352]) tensor([0.7562, 0.0292, 0.0674, 0.1472]) -Greedy action tensor([ 1.3104, -0.0830, -0.9510, 0.3604]) tensor([0.5750, 0.1427, 0.0599, 0.2224]) -Greedy action tensor([ 1.7188, -0.6845, -0.3445, 0.4056]) tensor([0.6728, 0.0608, 0.0855, 0.1809]) -Greedy action tensor([ 2.0138, -0.9963, -0.3864, 0.6140]) tensor([0.7212, 0.0355, 0.0654, 0.1779]) -Greedy action tensor([ 1.5620, -0.4793, -0.6264, 0.0701]) tensor([0.6817, 0.0885, 0.0764, 0.1533]) -Greedy action tensor([ 1.9910, -0.6182, -0.3423, 0.2206]) tensor([0.7458, 0.0549, 0.0723, 0.1270]) -Greedy action tensor([ 1.7228, -0.5673, -0.8701, 0.3451]) tensor([0.7002, 0.0709, 0.0524, 0.1765]) -Greedy action tensor([ 1.7391, -0.4130, -0.6785, 0.3509]) tensor([0.6873, 0.0799, 0.0613, 0.1715]) -Greedy action tensor([ 2.2388, 0.0075, -0.4533, 0.4873]) tensor([0.7415, 0.0796, 0.0502, 0.1287]) -Greedy action tensor([ 1.5802, -0.1061, -0.8472, 0.6125]) tensor([0.6048, 0.1120, 0.0534, 0.2298]) -Greedy action tensor([ 1.9168, -0.5983, -0.2052, 0.1303]) tensor([0.7309, 0.0591, 0.0876, 0.1225]) -Greedy action tensor([ 1.1672, -0.4634, -0.4900, 0.5833]) tensor([0.5143, 0.1007, 0.0981, 0.2869]) -Greedy action tensor([ 1.2806e+00, 1.0973e-01, -5.9635e-01, 2.6174e-04]) tensor([0.5744, 0.1781, 0.0879, 0.1596]) -Greedy action tensor([ 0.5615, -0.3029, 0.1763, 0.0011]) tensor([0.3742, 0.1576, 0.2546, 0.2137]) -Greedy action tensor([ 1.0842, 0.0809, -0.6064, 0.3870]) tensor([0.4880, 0.1789, 0.0900, 0.2430]) -Greedy action tensor([ 1.4484, -0.7087, -0.4780, 0.5376]) tensor([0.6011, 0.0695, 0.0876, 0.2418]) -Greedy action tensor([ 1.6655, -0.7252, -0.4492, 0.3783]) tensor([0.6719, 0.0615, 0.0811, 0.1855]) -Greedy action tensor([ 1.9302, -0.3812, -0.7102, 0.6586]) tensor([0.6893, 0.0683, 0.0492, 0.1933]) -Greedy action tensor([ 1.1503, -0.1360, -0.2220, 0.0126]) tensor([0.5404, 0.1493, 0.1370, 0.1732]) -Greedy action tensor([ 1.8462, -0.5698, -0.5791, 0.5148]) tensor([0.6936, 0.0619, 0.0613, 0.1832]) -Greedy action tensor([ 1.5577, -0.7442, -0.4067, -0.2108]) tensor([0.7088, 0.0709, 0.0994, 0.1209]) -Greedy action tensor([ 1.6226, -0.4294, -0.6678, 0.1045]) tensor([0.6902, 0.0887, 0.0699, 0.1512]) -Greedy action tensor([ 1.4009, -0.3439, -0.7094, 0.2545]) tensor([0.6197, 0.1083, 0.0751, 0.1969]) -Greedy action tensor([ 1.2783, -0.4547, -0.8817, 0.3996]) tensor([0.5857, 0.1035, 0.0675, 0.2433]) -Greedy action tensor([ 1.8675, -0.7471, -0.3045, -0.0614]) tensor([0.7505, 0.0549, 0.0855, 0.1090]) -Greedy action tensor([ 1.5709, -0.5570, -0.8745, 0.3260]) tensor([0.6694, 0.0797, 0.0580, 0.1928]) -Greedy action tensor([ 1.0573, -0.9696, -0.2459, -0.1855]) tensor([0.5910, 0.0779, 0.1606, 0.1706]) -Greedy action tensor([ 1.5810, -0.2001, -0.2229, 0.1680]) tensor([0.6343, 0.1069, 0.1044, 0.1544]) -Greedy action tensor([ 1.4775, -0.7063, -0.0319, 0.4575]) tensor([0.5902, 0.0665, 0.1305, 0.2128]) -Greedy action tensor([ 2.1551, -0.9240, -0.3581, 0.4197]) tensor([0.7673, 0.0353, 0.0622, 0.1353]) -Greedy action tensor([ 0.9507, 0.0225, -0.6214, -0.1177]) tensor([0.5138, 0.2031, 0.1067, 0.1765]) -Greedy action tensor([ 1.5255, -0.3452, -0.7174, 0.3136]) tensor([0.6419, 0.0989, 0.0681, 0.1911]) -Greedy action tensor([ 1.7653, -0.4343, -0.6567, 0.7095]) tensor([0.6462, 0.0716, 0.0573, 0.2248]) -Greedy action tensor([ 1.4749, -0.2466, -0.4318, 0.0585]) tensor([0.6370, 0.1139, 0.0946, 0.1545]) -Greedy action tensor([ 2.4538, -1.0722, 0.1064, 0.5699]) tensor([0.7831, 0.0230, 0.0749, 0.1190]) -Greedy action tensor([ 1.4251, -0.8320, -0.0857, 0.6354]) tensor([0.5620, 0.0588, 0.1241, 0.2551]) -Greedy action tensor([ 2.1053, -0.6785, -0.7462, 0.0305]) tensor([0.8031, 0.0496, 0.0464, 0.1009]) -Greedy action tensor([ 1.7984, -0.4361, -0.5329, 0.3352]) tensor([0.6965, 0.0746, 0.0677, 0.1612]) -Greedy action tensor([ 0.9086, -0.6644, -0.2527, 0.1151]) tensor([0.5069, 0.1051, 0.1587, 0.2293]) -Greedy action tensor([ 1.4724, -0.2602, -0.3335, 0.1675]) tensor([0.6202, 0.1097, 0.1019, 0.1682]) -Greedy action tensor([ 0.8893, -0.2180, -0.1915, 0.0283]) tensor([0.4779, 0.1579, 0.1622, 0.2020]) -Greedy action tensor([ 2.1133, -1.0275, -0.3996, 0.8765]) tensor([0.7069, 0.0306, 0.0573, 0.2052]) -Greedy action tensor([ 2.7674, -1.0123, -0.5241, 0.6951]) tensor([0.8432, 0.0193, 0.0314, 0.1062]) -Greedy action tensor([ 2.0087, 0.3168, -0.1885, 0.3566]) tensor([0.6725, 0.1239, 0.0747, 0.1289]) -Greedy action tensor([ 1.3736, -0.0615, -0.9282, 0.3968]) tensor([0.5832, 0.1389, 0.0584, 0.2196]) -Greedy action tensor([ 1.5461, -0.1138, -1.0653, 0.5191]) tensor([0.6167, 0.1173, 0.0453, 0.2208]) -Greedy action tensor([ 1.4871, -0.4759, -0.7057, 0.1544]) tensor([0.6597, 0.0926, 0.0736, 0.1740]) -Greedy action tensor([ 2.0459, -1.1556, 0.1209, 1.0799]) tensor([0.6381, 0.0260, 0.0931, 0.2429]) -Greedy action tensor([ 1.0408, -0.4116, -0.5233, 0.5605]) tensor([0.4850, 0.1135, 0.1015, 0.3000]) -Greedy action tensor([ 1.4157, -0.3773, -0.6311, 0.6679]) tensor([0.5653, 0.0941, 0.0730, 0.2676]) -Greedy action tensor([ 1.3579, -0.5062, -0.7233, 0.2225]) tensor([0.6246, 0.0968, 0.0779, 0.2007]) -Greedy action tensor([ 1.4158, -0.2260, -0.6367, 0.5818]) tensor([0.5694, 0.1102, 0.0731, 0.2473]) -Greedy action tensor([ 1.7789, -1.0968, -0.7578, 0.8514]) tensor([0.6531, 0.0368, 0.0517, 0.2584]) -Greedy action tensor([ 1.3371, -0.6018, -0.3577, 0.3660]) tensor([0.5861, 0.0843, 0.1076, 0.2219]) -Greedy action tensor([ 1.6771, -0.3110, -0.7893, 0.3244]) tensor([0.6755, 0.0925, 0.0573, 0.1746]) -Greedy action tensor([ 1.5130, -0.3544, -0.5900, 0.5928]) tensor([0.5970, 0.0923, 0.0729, 0.2379]) -Greedy action tensor([ 1.4089, -0.5119, -0.3108, 0.2526]) tensor([0.6097, 0.0893, 0.1092, 0.1918]) -Greedy action tensor([ 1.3630, 0.0785, -0.3122, 0.2733]) tensor([0.5554, 0.1537, 0.1040, 0.1868]) -Greedy action tensor([ 1.8860, -1.0077, -0.1466, 0.8080]) tensor([0.6550, 0.0363, 0.0858, 0.2229]) -Greedy action tensor([ 1.7533, -1.2609, -0.2314, 0.2341]) tensor([0.7115, 0.0349, 0.0978, 0.1558]) -Greedy action tensor([ 2.1972, -0.8878, -0.2754, 0.7668]) tensor([0.7303, 0.0334, 0.0616, 0.1747]) -Greedy action tensor([ 2.4727, -1.4511, -0.1096, 0.7270]) tensor([0.7875, 0.0156, 0.0595, 0.1374]) -Greedy action tensor([ 1.9219, -0.5180, -0.6642, 0.8523]) tensor([0.6642, 0.0579, 0.0500, 0.2279]) -Greedy action tensor([ 1.6133, -0.6284, -0.7135, -0.0066]) tensor([0.7134, 0.0758, 0.0696, 0.1412]) -Greedy action tensor([ 1.6730, -0.5259, -0.4979, 0.2539]) tensor([0.6817, 0.0756, 0.0778, 0.1649]) -Greedy action tensor([ 1.4174, -0.3221, -0.9500, 0.5940]) tensor([0.5854, 0.1028, 0.0549, 0.2569]) -Greedy action tensor([ 1.3946, -0.8850, -0.0492, -0.1112]) tensor([0.6409, 0.0656, 0.1513, 0.1422]) -Greedy action tensor([ 1.4724, -0.5419, 0.0022, 0.3655]) tensor([0.5903, 0.0788, 0.1357, 0.1952]) -Greedy action tensor([ 2.2876, -0.8065, -0.8223, 0.7483]) tensor([0.7666, 0.0347, 0.0342, 0.1645]) -Greedy action tensor([-0.6206, 0.3873, 0.1984, -0.8662]) tensor([0.1473, 0.4035, 0.3340, 0.1152]) -Greedy action tensor([ 0.5952, 0.3223, 1.0313, -0.6131]) tensor([0.2773, 0.2110, 0.4289, 0.0828]) -Greedy action tensor([-1.7576, -0.4708, -1.3312, 0.9060]) tensor([0.0488, 0.1766, 0.0747, 0.6999]) -Greedy action tensor([ 0.2313, -1.1269, -0.5915, 0.2444]) tensor([0.3691, 0.0949, 0.1621, 0.3739]) -Greedy action tensor([ 1.0551, -1.1970, -0.4046, 0.2910]) tensor([0.5546, 0.0583, 0.1288, 0.2583]) -Greedy action tensor([ 0.9260, -0.5852, -0.2282, 0.9336]) tensor([0.3931, 0.0867, 0.1240, 0.3961]) -Greedy action tensor([ 0.2707, -0.5698, 0.5163, -0.0781]) tensor([0.2928, 0.1263, 0.3743, 0.2066]) -Greedy action tensor([ 0.0737, 0.2696, -1.1303, 0.8784]) tensor([0.2104, 0.2559, 0.0631, 0.4705]) -Greedy action tensor([ 1.4772, -0.3690, 0.5820, 1.4859]) tensor([0.3883, 0.0613, 0.1586, 0.3917]) -Greedy action tensor([-0.0558, 0.0231, 0.8898, -0.4884]) tensor([0.1885, 0.2040, 0.4852, 0.1223]) -Greedy action tensor([-0.0577, -0.5953, 0.5506, -0.1065]) tensor([0.2286, 0.1336, 0.4201, 0.2178]) -Greedy action tensor([-0.3063, 0.4801, -0.1122, -0.4276]) tensor([0.1888, 0.4146, 0.2293, 0.1673]) -Greedy action tensor([ 0.1576, -0.8317, 0.4620, 0.5475]) tensor([0.2378, 0.0884, 0.3225, 0.3513]) -Greedy action tensor([ 0.1277, -1.0720, 1.6935, 0.0069]) tensor([0.1434, 0.0432, 0.6863, 0.1271]) -Greedy action tensor([-0.4650, -0.2161, -0.0046, -0.7097]) tensor([0.2150, 0.2758, 0.3408, 0.1684]) -Greedy action tensor([ 0.1388, -0.5655, -1.0136, -0.3887]) tensor([0.4166, 0.2060, 0.1316, 0.2458]) -Greedy action tensor([ 0.7191, -0.7548, 0.5567, -0.1871]) tensor([0.4027, 0.0922, 0.3423, 0.1627]) -Greedy action tensor([-0.2444, -0.0863, -0.2353, -0.3637]) tensor([0.2458, 0.2879, 0.2481, 0.2182]) -Greedy action tensor([ 0.1365, -2.1160, 0.4013, -0.3476]) tensor([0.3306, 0.0348, 0.4309, 0.2038]) -Greedy action tensor([-0.5668, -1.5989, 0.1720, 0.1865]) tensor([0.1794, 0.0639, 0.3756, 0.3811]) -Greedy action tensor([ 0.4744, 0.6962, 0.0014, -0.0405]) tensor([0.2883, 0.3599, 0.1796, 0.1723]) -Greedy action tensor([-0.6103, -1.0663, 0.9148, 0.6233]) tensor([0.1035, 0.0656, 0.4756, 0.3553]) -Greedy action tensor([-0.4190, -0.4449, -0.1178, 0.0390]) tensor([0.2038, 0.1986, 0.2754, 0.3222]) -Greedy action tensor([-0.4277, -2.1121, 0.2022, -0.0885]) tensor([0.2239, 0.0415, 0.4203, 0.3143]) -Greedy action tensor([-0.5468, -0.9414, 0.2848, -0.3047]) tensor([0.1907, 0.1285, 0.4379, 0.2429]) -Greedy action tensor([-0.2491, 0.9842, 0.6325, -0.6872]) tensor([0.1335, 0.4581, 0.3223, 0.0861]) -Greedy action tensor([ 0.9725, -0.8508, 0.7085, -0.4432]) tensor([0.4604, 0.0743, 0.3535, 0.1118]) -Greedy action tensor([ 0.5781, 0.2568, -0.1127, 0.6923]) tensor([0.2987, 0.2167, 0.1497, 0.3349]) -Greedy action tensor([-0.0525, -0.7319, -0.5972, -0.8353]) tensor([0.3931, 0.1993, 0.2280, 0.1797]) -Greedy action tensor([ 0.4589, -0.9545, 0.2829, -0.3087]) tensor([0.3928, 0.0956, 0.3294, 0.1823]) -Greedy action tensor([ 0.0839, -1.3552, 0.5526, -1.1188]) tensor([0.3189, 0.0756, 0.5096, 0.0958]) -Greedy action tensor([-1.3878, -0.3963, 0.6824, -1.0479]) tensor([0.0768, 0.2069, 0.6085, 0.1078]) -Greedy action tensor([-0.0190, 0.0591, -0.7179, -0.7742]) tensor([0.3281, 0.3547, 0.1631, 0.1542]) -Greedy action tensor([-0.1397, -0.7862, -0.5566, -0.3218]) tensor([0.3315, 0.1737, 0.2185, 0.2763]) -Greedy action tensor([ 1.0938, 1.2119, -0.1601, -0.6344]) tensor([0.3864, 0.4348, 0.1103, 0.0686]) -Greedy action tensor([ 0.6130, 0.3121, 0.0990, -0.4467]) tensor([0.3725, 0.2757, 0.2228, 0.1291]) -Greedy action tensor([ 1.0014, 0.0923, -0.3603, -0.0613]) tensor([0.4989, 0.2010, 0.1278, 0.1724]) -Greedy action tensor([ 0.8904, -1.1476, -0.7717, 1.0270]) tensor([0.4055, 0.0528, 0.0769, 0.4648]) -Greedy action tensor([-0.5823, -1.7016, 0.5555, 1.6926]) tensor([0.0706, 0.0230, 0.2201, 0.6863]) -Greedy action tensor([-0.3730, 0.8638, 0.3240, -0.3227]) tensor([0.1333, 0.4590, 0.2676, 0.1401]) -Greedy action tensor([-1.0607, -0.7900, 0.0851, -0.8297]) tensor([0.1489, 0.1952, 0.4683, 0.1876]) -Greedy action tensor([-0.3091, -1.0106, -0.1492, -0.9693]) tensor([0.3139, 0.1556, 0.3683, 0.1622]) -Greedy action tensor([ 0.3043, 0.2809, -0.4486, -0.3714]) tensor([0.3382, 0.3304, 0.1593, 0.1721]) -Greedy action tensor([ 0.6241, -1.9848, -0.7388, 0.5722]) tensor([0.4388, 0.0323, 0.1123, 0.4166]) -Greedy action tensor([ 0.4256, 1.1975, -0.0429, -0.4981]) tensor([0.2388, 0.5168, 0.1495, 0.0948]) -Greedy action tensor([ 0.4222, -0.5769, 0.5742, 0.1931]) tensor([0.3005, 0.1107, 0.3498, 0.2390]) -Greedy action tensor([ 0.2995, -1.1183, -0.1773, -0.2237]) tensor([0.4072, 0.0986, 0.2528, 0.2413]) -Greedy action tensor([ 0.1869, -0.9130, -0.3177, -0.3861]) tensor([0.3999, 0.1331, 0.2415, 0.2255]) -Greedy action tensor([ 0.2919, -0.1298, 0.3020, 0.0126]) tensor([0.2922, 0.1917, 0.2951, 0.2210]) -Greedy action tensor([ 0.2552, -0.4537, -0.0271, -0.1665]) tensor([0.3446, 0.1696, 0.2598, 0.2260]) -Greedy action tensor([ 0.2841, -0.2707, -0.0340, 0.2224]) tensor([0.3085, 0.1771, 0.2244, 0.2900]) -Greedy action tensor([-0.8784, -2.4958, -0.7231, 0.8320]) tensor([0.1266, 0.0251, 0.1479, 0.7004]) -Greedy action tensor([ 0.6418, -0.2031, 0.5272, 0.2343]) tensor([0.3348, 0.1438, 0.2986, 0.2228]) -Greedy action tensor([-0.9559, 0.0134, 0.4252, -0.5962]) tensor([0.1105, 0.2913, 0.4398, 0.1584]) -Greedy action tensor([ 0.1328, -0.2533, -1.3048, 0.0589]) tensor([0.3514, 0.2388, 0.0835, 0.3263]) -Greedy action tensor([ 1.7915, -0.6879, 0.0430, 1.0098]) tensor([0.5829, 0.0488, 0.1014, 0.2668]) -Greedy action tensor([-1.3398, 0.1213, 0.3748, 0.0037]) tensor([0.0680, 0.2933, 0.3779, 0.2608]) -Greedy action tensor([ 1.0785, 0.8983, 1.0409, -0.4681]) tensor([0.3321, 0.2773, 0.3198, 0.0707]) -Greedy action tensor([ 0.4221, -0.5986, 0.2412, 0.3667]) tensor([0.3184, 0.1147, 0.2657, 0.3012]) -Greedy action tensor([ 1.3748, -0.1051, 0.1422, 0.1218]) tensor([0.5541, 0.1261, 0.1615, 0.1583]) -Greedy action tensor([-0.8014, -0.2617, -0.9176, -0.3832]) tensor([0.1951, 0.3347, 0.1737, 0.2964]) -Greedy action tensor([ 1.3182, -0.9212, -0.6290, 1.0040]) tensor([0.5052, 0.0538, 0.0721, 0.3690]) -Greedy action tensor([ 0.2944, 0.4741, 0.4506, -0.0598]) tensor([0.2458, 0.2942, 0.2874, 0.1725]) -Greedy action tensor([ 0.5263, 0.1903, 0.2155, -0.1301]) tensor([0.3371, 0.2409, 0.2471, 0.1749]) -Greedy action tensor([ 0.4035, -1.5563, 0.5469, -0.0379]) tensor([0.3404, 0.0480, 0.3928, 0.2189]) -Greedy action tensor([1.3055, 0.6461, 0.4789, 0.5176]) tensor([0.4150, 0.2146, 0.1816, 0.1888]) -Greedy action tensor([-0.2285, 0.4062, -0.0026, 0.2684]) tensor([0.1729, 0.3262, 0.2167, 0.2842]) -Greedy action tensor([ 1.6827, -0.1168, 0.3970, 0.7452]) tensor([0.5454, 0.0902, 0.1508, 0.2136]) -Greedy action tensor([-0.7382, -1.5583, 0.7094, -0.6418]) tensor([0.1472, 0.0648, 0.6259, 0.1621]) -Greedy action tensor([-0.1313, -0.7329, -0.5842, 0.5237]) tensor([0.2434, 0.1334, 0.1547, 0.4685]) -Greedy action tensor([-0.0201, -0.0013, 0.1247, 0.2080]) tensor([0.2257, 0.2300, 0.2608, 0.2835]) -Greedy action tensor([-0.7499, -0.4184, 0.2960, -0.6028]) tensor([0.1563, 0.2177, 0.4449, 0.1811]) -Greedy action tensor([-0.1886, -0.5604, 0.5926, -0.1627]) tensor([0.2041, 0.1407, 0.4457, 0.2094]) -Greedy action tensor([-1.1719, -0.6670, -0.1251, -0.8489]) tensor([0.1452, 0.2406, 0.4136, 0.2006]) -Greedy action tensor([ 0.4959, -1.6478, 0.0945, 0.4477]) tensor([0.3650, 0.0428, 0.2444, 0.3478]) -Greedy action tensor([ 0.2396, 0.0841, -0.6736, -0.7266]) tensor([0.3791, 0.3245, 0.1521, 0.1443]) -Greedy action tensor([ 1.1250, -0.5385, -0.2266, 0.9729]) tensor([0.4334, 0.0821, 0.1122, 0.3723]) -Greedy action tensor([-0.1810, -0.4307, -0.4867, 0.0377]) tensor([0.2659, 0.2072, 0.1959, 0.3310]) -Greedy action tensor([ 0.1725, -0.9853, -0.2729, -0.3129]) tensor([0.3891, 0.1222, 0.2492, 0.2395]) -Greedy action tensor([-0.1170, -1.4507, -0.1670, 0.2853]) tensor([0.2695, 0.0710, 0.2564, 0.4030]) -Greedy action tensor([ 1.4125, -0.5243, 1.3296, 0.3094]) tensor([0.4173, 0.0602, 0.3841, 0.1385]) -Greedy action tensor([ 0.6252, -0.0375, 0.0322, -0.0846]) tensor([0.3906, 0.2014, 0.2159, 0.1921]) -Greedy action tensor([ 0.8787, -0.8516, -0.0245, -0.3430]) tensor([0.5327, 0.0944, 0.2159, 0.1570]) -Greedy action tensor([ 0.8120, -0.5445, -0.0031, -0.3710]) tensor([0.4984, 0.1284, 0.2206, 0.1527]) -Greedy action tensor([ 0.7966, -0.5696, 0.0779, -0.4540]) tensor([0.4929, 0.1257, 0.2402, 0.1411]) -Greedy action tensor([ 0.8945, -0.1761, -0.0094, -0.6181]) tensor([0.5081, 0.1742, 0.2058, 0.1120]) -Greedy action tensor([ 0.6001, -0.5358, -0.1383, -0.0918]) tensor([0.4348, 0.1397, 0.2078, 0.2177]) -Greedy action tensor([ 0.8783, -0.6024, 0.0687, -0.4691]) tensor([0.5175, 0.1177, 0.2303, 0.1345]) -Greedy action tensor([ 0.8435, -0.7114, 0.0855, -0.2853]) tensor([0.4992, 0.1054, 0.2339, 0.1615]) -Greedy action tensor([ 1.0865, -0.9581, 0.1633, -0.6425]) tensor([0.5868, 0.0759, 0.2331, 0.1041]) -Greedy action tensor([ 0.7934, -0.3666, 0.0157, -0.1376]) tensor([0.4615, 0.1447, 0.2120, 0.1819]) -Greedy action tensor([ 0.4496, 0.2320, -0.1627, 0.1136]) tensor([0.3267, 0.2628, 0.1771, 0.2335]) -Greedy action tensor([ 0.2097, 0.0444, 0.2162, -0.1897]) tensor([0.2837, 0.2405, 0.2856, 0.1903]) -Greedy action tensor([ 0.8316, -0.4482, 0.0023, -0.1983]) tensor([0.4827, 0.1343, 0.2106, 0.1724]) -Greedy action tensor([ 0.6725, -0.6221, 0.1155, -0.3647]) tensor([0.4543, 0.1245, 0.2603, 0.1610]) -Greedy action tensor([ 0.6545, -0.3737, 0.0281, -0.2797]) tensor([0.4376, 0.1565, 0.2339, 0.1719]) -Greedy action tensor([ 0.6867, -0.5336, 0.0302, -0.4823]) tensor([0.4707, 0.1389, 0.2441, 0.1462]) -Greedy action tensor([ 0.6910, -0.4435, -0.0172, -0.3075]) tensor([0.4582, 0.1473, 0.2257, 0.1688]) -Greedy action tensor([ 0.6374, -0.5777, -0.1365, -0.1049]) tensor([0.4476, 0.1328, 0.2065, 0.2131]) -Greedy action tensor([ 0.6030, -0.4696, -0.0346, -0.2334]) tensor([0.4340, 0.1485, 0.2294, 0.1881]) -Greedy action tensor([ 0.4882, -0.2384, 0.0128, 0.0398]) tensor([0.3645, 0.1762, 0.2266, 0.2328]) -Greedy action tensor([ 1.2505, -0.8406, 0.1074, -0.8100]) tensor([0.6370, 0.0787, 0.2031, 0.0812]) -Greedy action tensor([ 0.7896, 0.2274, 0.0359, -0.2835]) tensor([0.4197, 0.2392, 0.1975, 0.1435]) -Greedy action tensor([ 0.6323, -0.3941, -0.1043, -0.2211]) tensor([0.4419, 0.1583, 0.2116, 0.1882]) -Greedy action tensor([ 0.2198, 0.4923, -0.3335, -0.0376]) tensor([0.2731, 0.3587, 0.1571, 0.2111]) -Greedy action tensor([ 0.9138, -0.5317, -0.0748, -0.3029]) tensor([0.5252, 0.1238, 0.1954, 0.1556]) -Greedy action tensor([ 0.2746, 0.0680, -0.1142, -0.1609]) tensor([0.3187, 0.2592, 0.2160, 0.2062]) -Greedy action tensor([ 0.8218, -0.6821, 0.2261, -0.5564]) tensor([0.4937, 0.1097, 0.2721, 0.1244]) -Greedy action tensor([ 0.4105, -0.2278, -0.0945, -0.0705]) tensor([0.3637, 0.1921, 0.2195, 0.2248]) -Greedy action tensor([ 0.7961, -0.8282, 0.1478, -0.2744]) tensor([0.4848, 0.0955, 0.2535, 0.1662]) -Greedy action tensor([ 0.7748, -0.5566, -0.0187, -0.4416]) tensor([0.4969, 0.1312, 0.2247, 0.1472]) -Greedy action tensor([ 0.8215, -0.3095, -0.0969, -0.0481]) tensor([0.4671, 0.1507, 0.1864, 0.1958]) -Greedy action tensor([ 0.5092, -0.2715, 0.1600, -0.1041]) tensor([0.3697, 0.1693, 0.2607, 0.2002]) -Greedy action tensor([ 0.8496, -0.6715, 0.0199, -0.2077]) tensor([0.4995, 0.1091, 0.2179, 0.1735]) -Greedy action tensor([ 0.5121, -0.1781, -0.0816, 0.0100]) tensor([0.3761, 0.1886, 0.2077, 0.2276]) -Greedy action tensor([ 0.5955, -0.3514, 0.0388, -0.3782]) tensor([0.4276, 0.1659, 0.2451, 0.1615]) -Greedy action tensor([ 1.0806, -0.8709, 0.0646, -0.5079]) tensor([0.5854, 0.0832, 0.2119, 0.1195]) -Greedy action tensor([ 0.8731, -0.6030, -0.0082, -0.2072]) tensor([0.5045, 0.1153, 0.2090, 0.1713]) -Greedy action tensor([ 0.5557, 0.4535, -0.2836, 0.2422]) tensor([0.3262, 0.2945, 0.1409, 0.2384]) -Greedy action tensor([ 1.0624, -0.7080, -0.1126, -0.3621]) tensor([0.5815, 0.0990, 0.1796, 0.1399]) -Greedy action tensor([ 0.7787, -0.4407, -0.1660, -0.3940]) tensor([0.5016, 0.1482, 0.1950, 0.1553]) -Greedy action tensor([ 0.8022, -0.8179, 0.1149, -0.3797]) tensor([0.4982, 0.0986, 0.2505, 0.1528]) -Greedy action tensor([ 0.7307, -0.3515, -0.1289, -0.1822]) tensor([0.4622, 0.1566, 0.1957, 0.1855]) -Greedy action tensor([ 0.4084, -0.1238, 0.0960, -0.2574]) tensor([0.3530, 0.2073, 0.2583, 0.1814]) -Greedy action tensor([ 0.5586, 0.3158, -0.2494, 0.0238]) tensor([0.3551, 0.2786, 0.1583, 0.2080]) -Greedy action tensor([ 0.1578, 0.7891, -0.2465, -0.2491]) tensor([0.2373, 0.4462, 0.1584, 0.1580]) -Greedy action tensor([ 0.9277, -0.5589, 0.0011, -0.4013]) tensor([0.5300, 0.1199, 0.2098, 0.1403]) -Greedy action tensor([ 0.8881, -0.6980, -0.0157, -0.4560]) tensor([0.5346, 0.1094, 0.2165, 0.1394]) -Greedy action tensor([ 0.6809, -0.5957, -0.1656, -0.2548]) tensor([0.4761, 0.1328, 0.2042, 0.1868]) -Greedy action tensor([ 0.8629, -0.3385, -0.0384, -0.1677]) tensor([0.4846, 0.1458, 0.1968, 0.1729]) -Greedy action tensor([ 0.7355, -0.6577, 0.0458, -0.1928]) tensor([0.4661, 0.1157, 0.2339, 0.1842]) -Greedy action tensor([ 0.7018, -0.3273, -0.0544, -0.3477]) tensor([0.4594, 0.1641, 0.2156, 0.1608]) -Greedy action tensor([ 0.8360, -0.4202, 0.1118, -0.2045]) tensor([0.4711, 0.1341, 0.2283, 0.1664]) -Greedy action tensor([ 0.2549, 0.0480, -0.1125, -0.2740]) tensor([0.3231, 0.2627, 0.2238, 0.1904]) -Greedy action tensor([ 0.9218, -0.7937, 0.0527, -0.4561]) tensor([0.5402, 0.0972, 0.2265, 0.1362]) -Greedy action tensor([ 0.7058, -0.5022, -0.0842, -0.2761]) tensor([0.4701, 0.1405, 0.2133, 0.1761]) -Greedy action tensor([ 0.6650, -0.4865, -0.1842, -0.1534]) tensor([0.4576, 0.1447, 0.1958, 0.2019]) -Greedy action tensor([ 0.7944, -0.8611, -0.0753, -0.4421]) tensor([0.5262, 0.1005, 0.2205, 0.1528]) -Greedy action tensor([ 0.6512, -0.4190, -0.0963, -0.0864]) tensor([0.4358, 0.1495, 0.2064, 0.2084]) -Greedy action tensor([ 0.6992, -0.4391, 0.0200, -0.2318]) tensor([0.4501, 0.1442, 0.2282, 0.1774]) -Greedy action tensor([ 0.7318, -0.6321, -0.0109, -0.4881]) tensor([0.4934, 0.1262, 0.2348, 0.1457]) -Greedy action tensor([ 0.6894, -0.6858, 0.1096, -0.3359]) tensor([0.4605, 0.1164, 0.2579, 0.1652]) -Greedy action tensor([ 0.8946, -0.4191, 0.0622, -0.3529]) tensor([0.5023, 0.1350, 0.2185, 0.1443]) -Greedy action tensor([ 0.6253, -0.5737, -0.1176, -0.1800]) tensor([0.4496, 0.1356, 0.2139, 0.2009]) -Greedy action tensor([ 0.4127, -0.1052, -0.0437, -0.0580]) tensor([0.3504, 0.2088, 0.2220, 0.2188]) -Greedy action tensor([ 0.6683, -0.5737, 0.0172, -0.3508]) tensor([0.4606, 0.1330, 0.2402, 0.1662]) -Greedy action tensor([ 0.8627, -0.5140, -0.0600, -0.3922]) tensor([0.5168, 0.1305, 0.2054, 0.1473]) -Greedy action tensor([ 1.1925, -0.5745, -0.0766, -0.4320]) tensor([0.6064, 0.1036, 0.1705, 0.1195]) -Greedy action tensor([ 0.6208, -0.3075, -0.1800, -0.2200]) tensor([0.4395, 0.1737, 0.1973, 0.1896]) -Greedy action tensor([ 0.6204, -0.3003, -0.0623, -0.0716]) tensor([0.4160, 0.1657, 0.2102, 0.2082]) -Greedy action tensor([ 1.1123, -0.8286, 0.0610, -0.6722]) tensor([0.6021, 0.0864, 0.2104, 0.1011]) -Greedy action tensor([ 0.9011, -0.4630, -0.0883, -0.3519]) tensor([0.5227, 0.1336, 0.1943, 0.1493]) -Greedy action tensor([ 0.7376, -0.1882, -0.1450, -0.3965]) tensor([0.4691, 0.1859, 0.1941, 0.1509]) -Greedy action tensor([ 1.0324, -0.9139, 0.0764, -0.3852]) tensor([0.5651, 0.0807, 0.2172, 0.1369]) -Greedy action tensor([ 0.9743, -0.5610, -0.0299, -0.6469]) tensor([0.5620, 0.1210, 0.2059, 0.1111]) -Greedy action tensor([ 1.0332, -0.8188, 0.1020, -0.5573]) tensor([0.5699, 0.0894, 0.2246, 0.1161]) -Greedy action tensor([ 0.7408, -0.5410, -0.2059, -0.3208]) tensor([0.4971, 0.1380, 0.1929, 0.1720]) -Greedy action tensor([ 0.7352, -0.3332, 0.0048, -0.1767]) tensor([0.4490, 0.1543, 0.2163, 0.1804]) -Greedy action tensor([ 0.8820, -0.6930, -0.0371, -0.5053]) tensor([0.5389, 0.1116, 0.2150, 0.1346]) -Greedy action tensor([ 0.9064, -0.5284, -0.2080, -0.5329]) tensor([0.5545, 0.1321, 0.1819, 0.1315]) -Greedy action tensor([ 0.5601, -0.0340, -0.1390, -0.3660]) tensor([0.4090, 0.2258, 0.2033, 0.1620]) -Greedy action tensor([ 0.8877, -0.3088, 0.1437, -0.2977]) tensor([0.4801, 0.1451, 0.2281, 0.1467]) -Greedy action tensor([-1.6232, 0.2826, 0.3897, -0.0317]) tensor([0.0497, 0.3342, 0.3720, 0.2441]) -Greedy action tensor([-1.9137, -0.4416, 0.6478, -0.1610]) tensor([0.0415, 0.1810, 0.5379, 0.2396]) -Greedy action tensor([-1.3348, -0.1859, 0.5158, 0.2348]) tensor([0.0653, 0.2059, 0.4153, 0.3136]) -Greedy action tensor([-1.8307, -0.2798, 0.6214, -0.0862]) tensor([0.0434, 0.2046, 0.5038, 0.2483]) -Greedy action tensor([-1.3105, -0.3920, 0.5473, 0.4179]) tensor([0.0643, 0.1612, 0.4123, 0.3623]) -Greedy action tensor([-1.8913, -0.4464, 0.6447, -0.1561]) tensor([0.0425, 0.1802, 0.5365, 0.2409]) -Greedy action tensor([-1.9419, -0.4447, 0.6657, -0.1794]) tensor([0.0402, 0.1798, 0.5457, 0.2344]) -Greedy action tensor([-1.9131, -0.4442, 0.6515, -0.1651]) tensor([0.0415, 0.1804, 0.5396, 0.2385]) -Greedy action tensor([-1.5197, -0.3582, 0.6303, 0.1511]) tensor([0.0553, 0.1765, 0.4744, 0.2938]) -Greedy action tensor([-1.9126, -0.4408, 0.6708, -0.1686]) tensor([0.0411, 0.1792, 0.5445, 0.2352]) -Greedy action tensor([-1.7162, -0.4722, 0.5549, -0.0684]) tensor([0.0517, 0.1793, 0.5006, 0.2684]) -Greedy action tensor([-1.9063, -0.3319, 0.6368, -0.1471]) tensor([0.0411, 0.1982, 0.5222, 0.2385]) -Greedy action tensor([-0.8173, 0.5858, 0.1153, -0.0073]) tensor([0.1015, 0.4127, 0.2578, 0.2281]) -Greedy action tensor([-1.9108, -0.4463, 0.6533, -0.1622]) tensor([0.0416, 0.1798, 0.5399, 0.2388]) -Greedy action tensor([-1.9282, -0.4458, 0.6616, -0.1655]) tensor([0.0407, 0.1793, 0.5427, 0.2373]) -Greedy action tensor([-1.7635, -0.4484, 0.5898, -0.0788]) tensor([0.0485, 0.1805, 0.5098, 0.2612]) -Greedy action tensor([-0.9350, -0.0696, 0.0059, 0.5618]) tensor([0.0961, 0.2283, 0.2462, 0.4293]) -Greedy action tensor([-1.9064, -0.4472, 0.6510, -0.1591]) tensor([0.0418, 0.1797, 0.5388, 0.2397]) -Greedy action tensor([-1.4190, -0.4276, 0.3528, 0.0895]) tensor([0.0709, 0.1912, 0.4172, 0.3206]) -Greedy action tensor([-1.8910, -0.4374, 0.6409, -0.1403]) tensor([0.0423, 0.1812, 0.5326, 0.2438]) -Greedy action tensor([-1.8929, -0.4508, 0.6454, -0.1549]) tensor([0.0424, 0.1794, 0.5370, 0.2412]) -Greedy action tensor([-1.8643, -0.4014, 0.6374, -0.1284]) tensor([0.0431, 0.1862, 0.5261, 0.2446]) -Greedy action tensor([-1.8807, -0.3105, 0.6110, -0.1303]) tensor([0.0423, 0.2033, 0.5109, 0.2435]) -Greedy action tensor([-1.7797, -0.3379, 0.5861, -0.1535]) tensor([0.0477, 0.2017, 0.5081, 0.2425]) -Greedy action tensor([-1.7751, -0.4352, 0.5862, -0.0925]) tensor([0.0481, 0.1836, 0.5098, 0.2586]) -Greedy action tensor([-1.9347, -0.4361, 0.6631, -0.1750]) tensor([0.0405, 0.1810, 0.5434, 0.2351]) -Greedy action tensor([-1.9367, -0.4625, 0.6917, -0.1678]) tensor([0.0399, 0.1741, 0.5522, 0.2338]) -Greedy action tensor([-1.8280, -0.1304, 0.5579, -0.1171]) tensor([0.0437, 0.2388, 0.4754, 0.2421]) -Greedy action tensor([-1.9240, -0.4366, 0.6572, -0.1690]) tensor([0.0409, 0.1812, 0.5410, 0.2368]) -Greedy action tensor([-1.0889, 0.7126, 0.1012, 0.2479]) tensor([0.0707, 0.4281, 0.2323, 0.2690]) -Greedy action tensor([-1.1551e+00, -2.5054e-01, 2.7580e-01, -3.3504e-04]) tensor([0.0924, 0.2282, 0.3863, 0.2931]) -Greedy action tensor([-1.7744, -0.3557, 0.6407, -0.0824]) tensor([0.0460, 0.1899, 0.5145, 0.2496]) -Greedy action tensor([-1.0448, 0.5702, 0.1113, 0.1025]) tensor([0.0809, 0.4070, 0.2572, 0.2549]) -Greedy action tensor([-1.8993, -0.4484, 0.6460, -0.1596]) tensor([0.0422, 0.1800, 0.5376, 0.2402]) -Greedy action tensor([0.0248, 0.6019, 0.4956, 0.7705]) tensor([0.1541, 0.2744, 0.2467, 0.3248]) -Greedy action tensor([-1.9466, -0.4497, 0.6675, -0.1816]) tensor([0.0401, 0.1790, 0.5470, 0.2340]) -Greedy action tensor([-1.9152, -0.4365, 0.6505, -0.1666]) tensor([0.0414, 0.1817, 0.5388, 0.2380]) -Greedy action tensor([-1.9364, -0.4117, 0.6542, -0.1739]) tensor([0.0404, 0.1855, 0.5387, 0.2353]) -Greedy action tensor([-1.2152e+00, -2.0528e-04, 4.9352e-01, 3.1517e-01]) tensor([0.0689, 0.2322, 0.3805, 0.3183]) -Greedy action tensor([-1.8127, -0.4623, 0.5890, -0.1057]) tensor([0.0467, 0.1802, 0.5157, 0.2574]) -Greedy action tensor([-1.9186, -0.4159, 0.6532, -0.1669]) tensor([0.0411, 0.1846, 0.5376, 0.2368]) -Greedy action tensor([-1.8970, -0.3726, 0.6258, -0.1474]) tensor([0.0420, 0.1929, 0.5235, 0.2416]) -Greedy action tensor([-1.9230, -0.4343, 0.6625, -0.1623]) tensor([0.0408, 0.1807, 0.5412, 0.2372]) -Greedy action tensor([-1.2512, 0.4010, 0.2488, 0.0070]) tensor([0.0703, 0.3670, 0.3152, 0.2475]) -Greedy action tensor([-1.8243, -0.4175, 0.6401, -0.1186]) tensor([0.0448, 0.1827, 0.5262, 0.2464]) -Greedy action tensor([-1.8637, -0.5600, 0.8083, -0.1553]) tensor([0.0405, 0.1493, 0.5865, 0.2237]) -Greedy action tensor([-1.4379, 0.1446, 0.3289, 0.0776]) tensor([0.0615, 0.2991, 0.3597, 0.2797]) -Greedy action tensor([-1.8780, -0.4336, 0.6387, -0.1440]) tensor([0.0429, 0.1820, 0.5319, 0.2431]) -Greedy action tensor([-1.8798, -0.3336, 0.6324, -0.1485]) tensor([0.0422, 0.1983, 0.5209, 0.2386]) -Greedy action tensor([-1.7650, -0.4705, 0.5774, -0.1206]) tensor([0.0494, 0.1804, 0.5143, 0.2559]) -Greedy action tensor([0.3476, 0.6147, 0.5055, 1.1293]) tensor([0.1766, 0.2307, 0.2068, 0.3859]) -Greedy action tensor([-1.7426, -0.3673, 0.5546, -0.0977]) tensor([0.0498, 0.1970, 0.4953, 0.2579]) -Greedy action tensor([-1.8156, -0.2873, 0.5834, -0.1438]) tensor([0.0456, 0.2101, 0.5018, 0.2425]) -Greedy action tensor([-0.6819, 0.9629, 0.0263, 0.3365]) tensor([0.0911, 0.4718, 0.1849, 0.2522]) -Greedy action tensor([-1.8898, -0.4363, 0.6401, -0.1545]) tensor([0.0426, 0.1820, 0.5341, 0.2413]) -Greedy action tensor([-1.8072, -0.4393, 0.5956, -0.1077]) tensor([0.0466, 0.1831, 0.5153, 0.2550]) -Greedy action tensor([-1.9217, -0.4424, 0.6477, -0.1672]) tensor([0.0413, 0.1812, 0.5390, 0.2386]) -Greedy action tensor([-1.9278, -0.4301, 0.6582, -0.1711]) tensor([0.0407, 0.1822, 0.5410, 0.2361]) -Greedy action tensor([-1.8899, -0.3827, 0.6410, -0.1464]) tensor([0.0420, 0.1897, 0.5280, 0.2403]) -Greedy action tensor([-0.8856, 0.7965, 0.1410, -0.0606]) tensor([0.0873, 0.4696, 0.2438, 0.1993]) -Greedy action tensor([-1.9219, -0.4148, 0.6566, -0.1675]) tensor([0.0409, 0.1845, 0.5385, 0.2362]) -Greedy action tensor([-1.3156, -0.3384, 0.3520, 0.2274]) tensor([0.0733, 0.1949, 0.3887, 0.3431]) -Greedy action tensor([-0.4262, -0.3228, 0.9017, 1.3930]) tensor([0.0830, 0.0920, 0.3132, 0.5118]) -Greedy action tensor([-1.9411, -0.4478, 0.6668, -0.1778]) tensor([0.0402, 0.1791, 0.5460, 0.2346]) -Greedy action tensor([-1.9193, -0.4540, 0.6588, -0.1695]) tensor([0.0412, 0.1785, 0.5431, 0.2372]) -Greedy action tensor([-1.9002, -0.3895, 0.6533, -0.1357]) tensor([0.0413, 0.1870, 0.5306, 0.2411]) -Greedy action tensor([-0.7575, 0.6534, 0.2107, 0.3423]) tensor([0.0931, 0.3818, 0.2453, 0.2798]) -Greedy action tensor([-1.9234, -0.4527, 0.6610, -0.1703]) tensor([0.0410, 0.1785, 0.5437, 0.2368]) -Greedy action tensor([-0.9035, -0.1996, 0.2583, -0.0405]) tensor([0.1165, 0.2354, 0.3721, 0.2760]) -Greedy action tensor([-1.9470, -0.4515, 0.6685, -0.1818]) tensor([0.0400, 0.1786, 0.5474, 0.2339]) -Greedy action tensor([-1.8139, -0.4117, 0.6435, -0.1123]) tensor([0.0450, 0.1829, 0.5254, 0.2467]) -Greedy action tensor([-1.7928, -0.4711, 0.5934, -0.1154]) tensor([0.0477, 0.1788, 0.5184, 0.2552]) -Greedy action tensor([-1.6352, -0.4031, 0.7053, -0.0140]) tensor([0.0503, 0.1725, 0.5226, 0.2546]) -Greedy action tensor([-1.9304, -0.4218, 0.6572, -0.1705]) tensor([0.0406, 0.1835, 0.5399, 0.2360]) -Greedy action tensor([-1.3795, -0.5865, 0.4320, -0.0651]) tensor([0.0766, 0.1693, 0.4689, 0.2852]) -Greedy action tensor([-1.9347, -0.4353, 0.6567, -0.1762]) tensor([0.0406, 0.1818, 0.5419, 0.2356]) -Greedy action tensor([-1.6504, -0.1823, 0.6288, 0.0524]) tensor([0.0485, 0.2107, 0.4742, 0.2665]) -Greedy action tensor([-1.8940, -0.4486, 0.6451, -0.1454]) tensor([0.0423, 0.1794, 0.5355, 0.2429]) -Greedy action tensor([-1.9001, -0.4541, 0.6512, -0.1553]) tensor([0.0420, 0.1784, 0.5390, 0.2406]) -Greedy action tensor([-1.5804, -0.4223, 0.8297, 0.4718]) tensor([0.0433, 0.1378, 0.4819, 0.3370]) -Greedy action tensor([-1.8806, -0.4782, 0.7178, -0.1165]) tensor([0.0411, 0.1670, 0.5522, 0.2398]) -Greedy action tensor([ 1.7538, -0.8859, -0.4731, 0.3200]) tensor([0.7054, 0.0503, 0.0761, 0.1682]) -Greedy action tensor([ 1.7964, -0.6379, -0.2609, 0.2124]) tensor([0.7039, 0.0617, 0.0900, 0.1444]) -Greedy action tensor([ 1.0079, -0.3478, -0.4511, 0.1323]) tensor([0.5244, 0.1352, 0.1219, 0.2185]) -Greedy action tensor([ 1.5031, -0.9509, -0.1765, 0.3565]) tensor([0.6289, 0.0541, 0.1173, 0.1998]) -Greedy action tensor([ 1.3273, -0.3461, 0.0509, 0.1751]) tensor([0.5610, 0.1052, 0.1565, 0.1772]) -Greedy action tensor([ 1.3767, -0.5565, -0.1797, 0.3885]) tensor([0.5788, 0.0837, 0.1221, 0.2154]) -Greedy action tensor([ 1.2043, 0.0084, -0.5329, 0.1949]) tensor([0.5426, 0.1641, 0.0955, 0.1978]) -Greedy action tensor([ 1.3316, -0.2397, -0.2738, 0.0116]) tensor([0.5968, 0.1240, 0.1198, 0.1594]) -Greedy action tensor([ 1.1530, -0.7242, 0.0516, 0.0850]) tensor([0.5467, 0.0837, 0.1817, 0.1879]) -Greedy action tensor([ 1.1829, -0.2929, -0.3043, 0.0134]) tensor([0.5665, 0.1295, 0.1280, 0.1759]) -Greedy action tensor([ 2.1939, -0.7088, -0.3634, 0.4261]) tensor([0.7674, 0.0421, 0.0595, 0.1310]) -Greedy action tensor([ 1.5585, -0.2213, -1.0140, 0.6431]) tensor([0.6078, 0.1025, 0.0464, 0.2433]) -Greedy action tensor([ 1.4416, -0.1998, -0.6549, 0.4128]) tensor([0.5974, 0.1157, 0.0734, 0.2135]) -Greedy action tensor([ 1.5055, 0.3051, -0.8078, 0.5224]) tensor([0.5636, 0.1697, 0.0558, 0.2109]) -Greedy action tensor([ 1.0939, -0.0047, -0.4987, 0.0274]) tensor([0.5316, 0.1772, 0.1081, 0.1830]) -Greedy action tensor([ 1.3042, -0.5752, -0.2987, 0.1511]) tensor([0.5989, 0.0914, 0.1206, 0.1891]) -Greedy action tensor([ 1.1504, -0.1170, 0.0349, 0.0571]) tensor([0.5143, 0.1448, 0.1686, 0.1723]) -Greedy action tensor([ 1.0786, 0.2990, -0.8876, 0.4015]) tensor([0.4747, 0.2177, 0.0665, 0.2412]) -Greedy action tensor([ 1.2475, -0.7166, -0.2608, 0.5078]) tensor([0.5438, 0.0763, 0.1203, 0.2596]) -Greedy action tensor([ 1.5204, -0.6838, -0.3625, 0.3768]) tensor([0.6324, 0.0698, 0.0962, 0.2015]) -Greedy action tensor([ 2.3772, -1.3085, 0.2396, 1.3993]) tensor([0.6583, 0.0165, 0.0776, 0.2476]) -Greedy action tensor([ 1.7630, -0.7003, -0.9120, 0.0099]) tensor([0.7534, 0.0642, 0.0519, 0.1305]) -Greedy action tensor([ 2.3265, -1.2039, -0.2808, 0.7481]) tensor([0.7637, 0.0224, 0.0563, 0.1576]) -Greedy action tensor([ 1.2741, -0.5048, -0.1556, 0.3951]) tensor([0.5484, 0.0926, 0.1313, 0.2277]) -Greedy action tensor([ 2.1249, 0.5035, -0.0980, 0.1148]) tensor([0.6945, 0.1372, 0.0752, 0.0930]) -Greedy action tensor([ 1.2677, -0.2511, -0.3396, 0.3734]) tensor([0.5469, 0.1198, 0.1096, 0.2237]) -Greedy action tensor([ 1.7867, -1.4579, -0.2521, 0.2477]) tensor([0.7227, 0.0282, 0.0941, 0.1551]) -Greedy action tensor([ 1.4361, -0.2919, -1.2153, 0.4754]) tensor([0.6132, 0.1089, 0.0433, 0.2346]) -Greedy action tensor([ 1.1364, -0.4597, -0.0966, 0.2882]) tensor([0.5202, 0.1054, 0.1516, 0.2227]) -Greedy action tensor([ 1.1321, -0.4026, -0.3734, 0.1373]) tensor([0.5533, 0.1193, 0.1228, 0.2046]) -Greedy action tensor([ 1.8863, -0.6845, -0.8418, 0.4372]) tensor([0.7264, 0.0556, 0.0475, 0.1706]) -Greedy action tensor([ 2.1239, -1.4023, 0.3158, 0.8466]) tensor([0.6793, 0.0200, 0.1114, 0.1894]) -Greedy action tensor([ 0.9386, 0.0503, -0.9926, 0.3367]) tensor([0.4753, 0.1955, 0.0689, 0.2603]) -Greedy action tensor([ 1.1245, -0.2525, -0.5283, 0.1501]) tensor([0.5491, 0.1386, 0.1052, 0.2072]) -Greedy action tensor([ 1.6049, -0.8738, -0.2956, 0.4214]) tensor([0.6496, 0.0545, 0.0971, 0.1989]) -Greedy action tensor([ 1.2018, -0.1515, -0.5261, 0.0235]) tensor([0.5734, 0.1482, 0.1019, 0.1765]) -Greedy action tensor([ 1.3223, -0.4610, -0.7160, 0.5312]) tensor([0.5709, 0.0960, 0.0744, 0.2588]) -Greedy action tensor([ 1.2833, -0.0858, -0.9810, 0.3904]) tensor([0.5657, 0.1439, 0.0588, 0.2316]) -Greedy action tensor([ 0.5910, -0.2528, -0.1017, -0.0230]) tensor([0.4046, 0.1740, 0.2024, 0.2190]) -Greedy action tensor([ 1.9678, -1.1387, -0.1570, 0.4726]) tensor([0.7202, 0.0322, 0.0860, 0.1615]) -Greedy action tensor([ 0.6828, -0.1052, 0.1021, -0.0953]) tensor([0.4043, 0.1839, 0.2262, 0.1857]) -Greedy action tensor([ 1.5288, -0.4513, -0.5473, 0.2914]) tensor([0.6437, 0.0889, 0.0807, 0.1867]) -Greedy action tensor([ 1.2026, -0.0895, -1.0624, 0.5069]) tensor([0.5327, 0.1463, 0.0553, 0.2657]) -Greedy action tensor([ 1.4751, -0.7775, 0.0356, 0.2751]) tensor([0.6085, 0.0640, 0.1442, 0.1833]) -Greedy action tensor([ 0.3490, -0.4578, 0.0875, 0.2048]) tensor([0.3245, 0.1448, 0.2498, 0.2809]) -Greedy action tensor([ 1.9105, -0.8229, -0.3536, 0.4593]) tensor([0.7127, 0.0463, 0.0741, 0.1670]) -Greedy action tensor([ 1.5861, -0.2310, -0.9474, 0.2821]) tensor([0.6608, 0.1074, 0.0525, 0.1794]) -Greedy action tensor([ 1.4007, -0.4907, 0.0574, 0.7283]) tensor([0.5202, 0.0785, 0.1358, 0.2656]) -Greedy action tensor([ 1.4529, -1.1153, -0.1437, 0.4857]) tensor([0.6026, 0.0462, 0.1221, 0.2291]) -Greedy action tensor([ 1.8223, -0.2198, -0.5232, 0.0241]) tensor([0.7188, 0.0933, 0.0689, 0.1190]) -Greedy action tensor([ 1.8770, 0.1797, -0.7599, 0.1541]) tensor([0.6977, 0.1278, 0.0499, 0.1246]) -Greedy action tensor([ 2.1978, -1.0055, -0.2245, 0.6281]) tensor([0.7477, 0.0304, 0.0663, 0.1556]) -Greedy action tensor([ 1.6999, -0.2891, 0.1705, 0.5196]) tensor([0.6022, 0.0824, 0.1305, 0.1850]) -Greedy action tensor([ 1.3562, -0.1677, -0.2654, 0.2651]) tensor([0.5710, 0.1244, 0.1128, 0.1918]) -Greedy action tensor([ 1.2843, -0.3907, -0.5276, 0.2029]) tensor([0.5918, 0.1108, 0.0967, 0.2007]) -Greedy action tensor([ 1.6304, -0.8387, -0.3236, 0.1769]) tensor([0.6849, 0.0580, 0.0970, 0.1601]) -Greedy action tensor([ 1.5862, -0.3056, -0.4073, 0.5063]) tensor([0.6148, 0.0927, 0.0837, 0.2088]) -Greedy action tensor([ 1.9998, -0.9093, -0.4978, 0.4084]) tensor([0.7460, 0.0407, 0.0614, 0.1519]) -Greedy action tensor([ 1.3081, -0.3433, -0.6335, 0.5051]) tensor([0.5608, 0.1075, 0.0805, 0.2512]) -Greedy action tensor([ 1.4205, -0.5686, -0.0135, 0.0173]) tensor([0.6169, 0.0844, 0.1470, 0.1516]) -Greedy action tensor([ 1.5632, -0.6532, -0.4481, -0.1230]) tensor([0.7003, 0.0763, 0.0937, 0.1297]) -Greedy action tensor([ 1.3783, -0.1025, -0.5358, 0.4080]) tensor([0.5702, 0.1297, 0.0841, 0.2161]) -Greedy action tensor([ 1.3572, -0.5959, -0.2930, -0.2145]) tensor([0.6487, 0.0920, 0.1246, 0.1347]) -Greedy action tensor([ 1.8018, -1.0670, -0.3959, 0.3770]) tensor([0.7100, 0.0403, 0.0789, 0.1708]) -Greedy action tensor([ 1.4069, -0.6803, -0.5424, 0.0393]) tensor([0.6574, 0.0815, 0.0936, 0.1674]) -Greedy action tensor([ 1.8977, -0.7605, -0.0584, 0.7802]) tensor([0.6499, 0.0455, 0.0919, 0.2126]) -Greedy action tensor([ 1.5953, -0.1842, -0.8804, 0.5133]) tensor([0.6282, 0.1060, 0.0528, 0.2129]) -Greedy action tensor([ 1.7226, -0.8960, -0.3829, 0.6197]) tensor([0.6550, 0.0478, 0.0798, 0.2174]) -Greedy action tensor([ 1.5861, 0.2556, -0.2479, 0.3440]) tensor([0.5838, 0.1543, 0.0933, 0.1686]) -Greedy action tensor([ 1.7050, -0.2262, -0.5118, 0.2994]) tensor([0.6671, 0.0967, 0.0727, 0.1636]) -Greedy action tensor([ 1.2181, -0.5676, -0.8623, 0.4192]) tensor([0.5739, 0.0962, 0.0717, 0.2582]) -Greedy action tensor([ 1.5466, -0.3303, -0.7118, 0.3323]) tensor([0.6433, 0.0985, 0.0672, 0.1910]) -Greedy action tensor([ 2.0634, -0.9415, -0.1441, 0.7641]) tensor([0.6982, 0.0346, 0.0768, 0.1904]) -Greedy action tensor([ 1.6400, -0.6394, -0.3125, 0.3075]) tensor([0.6631, 0.0679, 0.0941, 0.1749]) -Greedy action tensor([ 1.6418, -1.0963, -0.1075, 0.3829]) tensor([0.6568, 0.0425, 0.1142, 0.1865]) -Greedy action tensor([ 1.6541, -0.6361, -0.6667, 0.2238]) tensor([0.6951, 0.0704, 0.0683, 0.1663]) -Greedy action tensor([ 1.7977, -0.5978, -0.5301, 0.5434]) tensor([0.6785, 0.0618, 0.0662, 0.1936]) -Greedy action tensor([ 1.7894, -0.4648, -0.3886, 0.1414]) tensor([0.7089, 0.0744, 0.0803, 0.1364]) -Greedy action tensor([ 1.2335, 0.0409, -0.4144, 0.1589]) tensor([0.5443, 0.1651, 0.1047, 0.1858]) -Greedy action tensor([ 1.8498, -0.3281, -0.4754, 0.7051]) tensor([0.6539, 0.0741, 0.0639, 0.2081]) -Greedy action tensor([ 2.1898, -0.7706, -0.5346, 0.4733]) tensor([0.7710, 0.0399, 0.0506, 0.1385]) -Greedy action tensor([-1.6985, -0.1677, -1.3979, 0.6707]) tensor([0.0566, 0.2617, 0.0765, 0.6052]) -Greedy action tensor([ 0.7329, -0.9638, 0.4910, -0.0094]) tensor([0.4091, 0.0750, 0.3212, 0.1947]) -Greedy action tensor([-1.6834, -1.3800, -0.3882, 0.8872]) tensor([0.0524, 0.0710, 0.1914, 0.6852]) -Greedy action tensor([-0.6597, -0.0168, -0.1653, 0.2114]) tensor([0.1443, 0.2744, 0.2366, 0.3448]) -Greedy action tensor([-1.2336, 0.1036, -0.4092, -0.6540]) tensor([0.1127, 0.4291, 0.2570, 0.2012]) -Greedy action tensor([-0.0074, -0.4415, 0.1892, -0.3208]) tensor([0.2781, 0.1802, 0.3385, 0.2033]) -Greedy action tensor([-0.8448, -0.2306, -0.4697, -0.0278]) tensor([0.1523, 0.2814, 0.2216, 0.3447]) -Greedy action tensor([-0.1223, -1.3770, -0.0194, 0.1252]) tensor([0.2722, 0.0776, 0.3017, 0.3486]) -Greedy action tensor([ 1.1075, -0.1560, 0.0908, 0.2320]) tensor([0.4852, 0.1371, 0.1755, 0.2021]) -Greedy action tensor([ 1.1465, -0.6552, 0.3842, -0.3359]) tensor([0.5380, 0.0888, 0.2510, 0.1222]) -Greedy action tensor([-0.0384, -0.6377, -0.5659, -0.5015]) tensor([0.3612, 0.1984, 0.2131, 0.2273]) -Greedy action tensor([-0.5256, -0.5847, 0.7069, -0.4976]) tensor([0.1562, 0.1473, 0.5358, 0.1607]) -Greedy action tensor([-0.3165, -1.1726, 0.0322, -0.6828]) tensor([0.2829, 0.1202, 0.4009, 0.1961]) -Greedy action tensor([ 0.5057, -1.8021, -0.8114, 0.1516]) tensor([0.4833, 0.0481, 0.1295, 0.3392]) -Greedy action tensor([ 0.8597, 0.0012, -0.2953, -0.5951]) tensor([0.5070, 0.2149, 0.1597, 0.1184]) -Greedy action tensor([ 0.0766, 0.1588, -0.3195, -0.7120]) tensor([0.3112, 0.3379, 0.2094, 0.1414]) -Greedy action tensor([-0.4867, -0.4432, 0.7138, -0.0657]) tensor([0.1451, 0.1516, 0.4821, 0.2211]) -Greedy action tensor([ 0.4025, -0.1456, -0.0689, 0.5422]) tensor([0.2983, 0.1724, 0.1862, 0.3431]) -Greedy action tensor([ 0.3542, -2.0160, -0.5366, 0.2674]) tensor([0.4131, 0.0386, 0.1695, 0.3788]) -Greedy action tensor([-0.5379, -2.5192, -0.2677, 0.9595]) tensor([0.1445, 0.0199, 0.1894, 0.6461]) -Greedy action tensor([ 0.2287, -0.3035, -0.4383, 0.4531]) tensor([0.2983, 0.1752, 0.1531, 0.3734]) -Greedy action tensor([ 0.0368, -0.0679, 0.0083, -0.9133]) tensor([0.3068, 0.2763, 0.2982, 0.1186]) -Greedy action tensor([-0.9852, -0.2631, -0.9583, -0.2007]) tensor([0.1593, 0.3280, 0.1636, 0.3491]) -Greedy action tensor([ 0.3434, -0.2080, -0.2034, -0.5771]) tensor([0.3917, 0.2257, 0.2267, 0.1560]) -Greedy action tensor([ 0.6801, -0.5043, -0.7351, 0.2035]) tensor([0.4609, 0.1410, 0.1119, 0.2862]) -Greedy action tensor([0.8992, 0.5931, 0.3793, 0.3434]) tensor([0.3443, 0.2535, 0.2047, 0.1975]) -Greedy action tensor([ 0.2242, -0.5856, -0.0088, -0.2278]) tensor([0.3480, 0.1548, 0.2757, 0.2215]) -Greedy action tensor([-0.0115, -0.2218, -0.0554, -0.1207]) tensor([0.2729, 0.2212, 0.2612, 0.2447]) -Greedy action tensor([-0.9783, -0.5171, 0.9096, -1.0040]) tensor([0.0984, 0.1560, 0.6497, 0.0959]) -Greedy action tensor([ 0.0110, -0.8626, -1.0088, 0.2126]) tensor([0.3332, 0.1391, 0.1202, 0.4076]) -Greedy action tensor([ 0.2056, -1.4838, -0.0310, -0.5245]) tensor([0.4072, 0.0752, 0.3214, 0.1962]) -Greedy action tensor([-0.0162, -0.8344, -0.8301, 0.0804]) tensor([0.3349, 0.1478, 0.1484, 0.3689]) -Greedy action tensor([-0.3668, 0.1694, -0.1974, -1.3279]) tensor([0.2338, 0.3998, 0.2770, 0.0894]) -Greedy action tensor([ 0.8194, -0.9729, 1.2609, 0.6159]) tensor([0.2827, 0.0471, 0.4396, 0.2306]) -Greedy action tensor([ 1.0681, -0.5839, 1.4643, 0.5270]) tensor([0.3068, 0.0588, 0.4559, 0.1786]) -Greedy action tensor([ 0.0956, -0.3387, 0.2507, -0.4511]) tensor([0.2946, 0.1908, 0.3440, 0.1705]) -Greedy action tensor([-0.1075, 0.3340, -0.3005, -0.8117]) tensor([0.2581, 0.4014, 0.2128, 0.1276]) -Greedy action tensor([-0.2745, -0.7565, 0.3076, 0.1132]) tensor([0.2049, 0.1265, 0.3667, 0.3019]) -Greedy action tensor([-0.2429, -0.0179, -0.0634, -0.8524]) tensor([0.2505, 0.3137, 0.2997, 0.1362]) -Greedy action tensor([ 0.3666, -0.4987, -0.0336, 0.6852]) tensor([0.2885, 0.1214, 0.1933, 0.3967]) -Greedy action tensor([ 0.8300, -0.7760, 0.8914, -0.7748]) tensor([0.4057, 0.0814, 0.4314, 0.0815]) -Greedy action tensor([ 0.1316, -0.1712, 0.1732, -0.7681]) tensor([0.3137, 0.2317, 0.3270, 0.1276]) -Greedy action tensor([ 0.7718, -1.0312, 0.2336, -0.1526]) tensor([0.4661, 0.0768, 0.2721, 0.1849]) -Greedy action tensor([ 0.8883, -1.3405, 0.4833, 0.6072]) tensor([0.3953, 0.0426, 0.2637, 0.2985]) -Greedy action tensor([-0.7451, -0.8622, -0.2201, -0.7203]) tensor([0.2172, 0.1932, 0.3671, 0.2226]) -Greedy action tensor([-0.0728, 0.1783, 0.8071, -0.1287]) tensor([0.1772, 0.2278, 0.4273, 0.1676]) -Greedy action tensor([0.8923, 0.0443, 0.1188, 0.6779]) tensor([0.3708, 0.1588, 0.1711, 0.2993]) -Greedy action tensor([ 0.0282, -0.7503, 1.4077, 0.0496]) tensor([0.1550, 0.0711, 0.6156, 0.1583]) -Greedy action tensor([-0.3640, -0.1586, -0.0483, -0.5608]) tensor([0.2262, 0.2778, 0.3102, 0.1858]) -Greedy action tensor([-0.2128, -0.0530, 1.7423, -1.0289]) tensor([0.1033, 0.1212, 0.7298, 0.0457]) -Greedy action tensor([ 0.7967, -1.8972, -0.5025, 0.5273]) tensor([0.4752, 0.0321, 0.1296, 0.3630]) -Greedy action tensor([ 0.0991, -1.6382, -0.0055, -0.0912]) tensor([0.3444, 0.0606, 0.3102, 0.2848]) -Greedy action tensor([1.0064, 0.1953, 0.6720, 0.0907]) tensor([0.3906, 0.1735, 0.2795, 0.1563]) -Greedy action tensor([ 0.5145, -0.9694, -0.7568, -0.0692]) tensor([0.4842, 0.1098, 0.1358, 0.2701]) -Greedy action tensor([ 0.0687, 0.1593, 0.2998, -0.1933]) tensor([0.2425, 0.2655, 0.3055, 0.1866]) -Greedy action tensor([-0.4300, -0.8971, 0.2346, -0.2070]) tensor([0.2075, 0.1300, 0.4032, 0.2593]) -Greedy action tensor([-0.0034, -0.1791, -0.0227, 0.5273]) tensor([0.2212, 0.1856, 0.2170, 0.3761]) -Greedy action tensor([-0.1100, -0.6711, -0.4579, -1.0704]) tensor([0.3760, 0.2146, 0.2655, 0.1439]) -Greedy action tensor([ 0.8011, -1.1271, 0.3568, 1.1003]) tensor([0.3189, 0.0464, 0.2045, 0.4302]) -Greedy action tensor([-1.3760, -0.2103, 0.0097, -1.5895]) tensor([0.1109, 0.3559, 0.4435, 0.0896]) -Greedy action tensor([-1.0799, -0.8504, -0.1189, -0.5996]) tensor([0.1541, 0.1939, 0.4029, 0.2491]) -Greedy action tensor([-1.0150, -0.3223, 1.1426, -0.5118]) tensor([0.0752, 0.1503, 0.6502, 0.1243]) -Greedy action tensor([ 0.0469, -0.1792, 0.7197, -0.2330]) tensor([0.2216, 0.1767, 0.4342, 0.1675]) -Greedy action tensor([ 0.0928, 0.0271, -0.1380, -0.9986]) tensor([0.3262, 0.3054, 0.2589, 0.1095]) -Greedy action tensor([-0.6351, -1.2002, 0.1427, 0.2137]) tensor([0.1644, 0.0934, 0.3579, 0.3842]) -Greedy action tensor([ 0.3893, -0.2705, 1.3670, 0.5629]) tensor([0.1864, 0.0964, 0.4955, 0.2217]) -Greedy action tensor([ 0.5993, -0.6335, -0.1220, 0.4698]) tensor([0.3765, 0.1097, 0.1830, 0.3308]) -Greedy action tensor([ 0.3532, 0.1011, 0.4702, -0.1331]) tensor([0.2844, 0.2210, 0.3197, 0.1749]) -Greedy action tensor([ 0.1413, -1.5875, 0.8642, -0.9767]) tensor([0.2805, 0.0498, 0.5780, 0.0917]) -Greedy action tensor([0.7095, 0.4340, 0.2653, 0.0980]) tensor([0.3398, 0.2580, 0.2179, 0.1843]) -Greedy action tensor([ 0.6975, -0.2293, -0.4680, 0.2520]) tensor([0.4259, 0.1686, 0.1328, 0.2728]) -Greedy action tensor([ 0.4282, -1.1700, 0.3706, -0.1479]) tensor([0.3692, 0.0747, 0.3486, 0.2075]) -Greedy action tensor([-0.2268, -2.2996, -0.1226, 0.2790]) tensor([0.2568, 0.0323, 0.2850, 0.4259]) -Greedy action tensor([ 0.8208, -0.2521, -0.2006, 0.3829]) tensor([0.4260, 0.1457, 0.1534, 0.2749]) -Greedy action tensor([-0.1722, -0.3626, -1.2106, 0.5765]) tensor([0.2328, 0.1925, 0.0824, 0.4923]) -Greedy action tensor([ 0.0838, -0.6678, -0.1762, -0.9045]) tensor([0.3824, 0.1804, 0.2949, 0.1423]) -Greedy action tensor([0.1666, 0.3614, 0.6267, 0.2194]) tensor([0.2060, 0.2503, 0.3264, 0.2172]) -Greedy action tensor([-0.3790, -0.4331, -0.3251, -0.4656]) tensor([0.2551, 0.2417, 0.2693, 0.2340]) -Greedy action tensor([-1.1143, -0.5951, -0.3138, -0.9451]) tensor([0.1642, 0.2759, 0.3655, 0.1944]) -Greedy action tensor([ 0.0441, -0.4972, 0.3409, -0.5194]) tensor([0.2860, 0.1664, 0.3848, 0.1628]) -Greedy action tensor([ 1.1348, -0.2221, 0.6002, -0.0030]) tensor([0.4621, 0.1190, 0.2708, 0.1481]) -Greedy action tensor([ 0.0545, -1.0479, 0.4800, 0.3275]) tensor([0.2395, 0.0795, 0.3664, 0.3146]) -Greedy action tensor([ 0.1472, 0.3853, -0.1430, -0.3743]) tensor([0.2770, 0.3514, 0.2072, 0.1644]) -Greedy action tensor([ 0.7527, -0.4850, -0.0427, -0.4358]) tensor([0.4887, 0.1418, 0.2206, 0.1489]) -Greedy action tensor([ 0.8151, -0.4609, -0.0797, -0.3152]) tensor([0.4973, 0.1388, 0.2033, 0.1606]) -Greedy action tensor([ 0.8340, -0.7026, 0.1702, -0.2688]) tensor([0.4850, 0.1043, 0.2497, 0.1610]) -Greedy action tensor([ 0.6794, -0.0592, 0.0719, -0.1033]) tensor([0.4033, 0.1927, 0.2197, 0.1844]) -Greedy action tensor([ 0.5345, -0.3605, -0.0415, -0.2544]) tensor([0.4124, 0.1685, 0.2318, 0.1873]) -Greedy action tensor([ 0.6470, -0.3705, 0.0111, -0.1214]) tensor([0.4247, 0.1535, 0.2249, 0.1969]) -Greedy action tensor([ 0.7446, -0.5249, 0.0325, -0.3944]) tensor([0.4781, 0.1343, 0.2345, 0.1531]) -Greedy action tensor([ 6.1979e-01, -4.6326e-01, 1.9869e-04, -3.3965e-01]) tensor([0.4425, 0.1498, 0.2381, 0.1695]) -Greedy action tensor([ 1.0514, 0.1614, -0.0597, -0.1637]) tensor([0.4910, 0.2016, 0.1616, 0.1457]) -Greedy action tensor([ 0.7795, -0.5852, 0.2415, -0.5190]) tensor([0.4734, 0.1209, 0.2764, 0.1292]) -Greedy action tensor([ 0.4353, -0.3306, 0.0801, -0.5844]) tensor([0.3958, 0.1840, 0.2775, 0.1428]) -Greedy action tensor([ 0.9094, -0.6447, -0.0889, -0.6185]) tensor([0.5565, 0.1176, 0.2051, 0.1208]) -Greedy action tensor([ 1.1698, -0.6788, 0.1834, -0.6125]) tensor([0.5887, 0.0927, 0.2195, 0.0991]) -Greedy action tensor([ 0.4472, -0.0052, 0.0298, -0.0332]) tensor([0.3433, 0.2183, 0.2261, 0.2123]) -Greedy action tensor([ 0.5165, -0.4273, -0.1900, -0.0577]) tensor([0.4089, 0.1591, 0.2017, 0.2303]) -Greedy action tensor([ 0.7539, -0.7105, 0.1061, -0.6848]) tensor([0.5021, 0.1161, 0.2627, 0.1191]) -Greedy action tensor([ 1.1201, -0.7982, -0.1186, -0.6770]) tensor([0.6241, 0.0916, 0.1808, 0.1035]) -Greedy action tensor([ 0.5239, -0.1856, 0.0561, -0.1219]) tensor([0.3784, 0.1861, 0.2370, 0.1984]) -Greedy action tensor([ 1.4137, -0.8023, -0.0620, -0.7104]) tensor([0.6862, 0.0748, 0.1569, 0.0820]) -Greedy action tensor([ 1.0693, -1.0163, 0.0095, -0.4002]) tensor([0.5879, 0.0730, 0.2037, 0.1353]) -Greedy action tensor([ 0.6606, -0.1917, 0.1058, -0.5336]) tensor([0.4341, 0.1851, 0.2493, 0.1315]) -Greedy action tensor([ 2.6286e-01, -6.5477e-02, -4.1026e-02, 1.0344e-04]) tensor([0.3099, 0.2232, 0.2287, 0.2383]) -Greedy action tensor([ 0.9637, -0.7444, -0.1449, -0.2477]) tensor([0.5528, 0.1002, 0.1824, 0.1646]) -Greedy action tensor([ 0.8694, -0.6281, 0.0205, -0.5386]) tensor([0.5274, 0.1180, 0.2257, 0.1290]) -Greedy action tensor([ 0.8293, -0.4207, -0.0445, -0.3313]) tensor([0.4957, 0.1420, 0.2069, 0.1553]) -Greedy action tensor([ 0.3892, -0.1550, 0.0055, -0.0348]) tensor([0.3429, 0.1990, 0.2336, 0.2244]) -Greedy action tensor([ 0.9106, -0.7365, 0.0668, -0.2111]) tensor([0.5132, 0.0989, 0.2207, 0.1672]) -Greedy action tensor([ 0.5152, -0.2605, 0.0052, -0.6582]) tensor([0.4219, 0.1942, 0.2533, 0.1305]) -Greedy action tensor([ 0.5530, -0.3798, 0.1535, -0.5299]) tensor([0.4162, 0.1638, 0.2791, 0.1409]) -Greedy action tensor([ 0.4180, -0.1223, 0.0162, -0.4445]) tensor([0.3740, 0.2179, 0.2503, 0.1579]) -Greedy action tensor([ 0.6356, -0.3407, 0.0261, -0.3020]) tensor([0.4325, 0.1629, 0.2351, 0.1694]) -Greedy action tensor([ 0.7220, -0.5534, -0.1041, -0.1171]) tensor([0.4653, 0.1300, 0.2037, 0.2010]) -Greedy action tensor([ 0.9552, -0.5777, -0.0140, -0.2330]) tensor([0.5263, 0.1136, 0.1997, 0.1604]) -Greedy action tensor([ 0.7346, -0.5860, -0.1345, -0.2540]) tensor([0.4858, 0.1297, 0.2037, 0.1808]) -Greedy action tensor([ 0.6544, -0.4143, -0.1292, -0.0751]) tensor([0.4381, 0.1505, 0.2001, 0.2113]) -Greedy action tensor([ 0.9640, -0.2653, -0.2027, -0.1353]) tensor([0.5162, 0.1510, 0.1608, 0.1720]) -Greedy action tensor([ 0.4851, -0.0344, 0.0133, -0.1085]) tensor([0.3609, 0.2147, 0.2252, 0.1993]) -Greedy action tensor([ 0.7780, -0.1293, -0.1095, -0.3418]) tensor([0.4669, 0.1885, 0.1922, 0.1524]) -Greedy action tensor([ 0.6846, -0.2677, -0.0137, -0.1032]) tensor([0.4277, 0.1650, 0.2128, 0.1945]) -Greedy action tensor([ 0.3981, 0.0806, 0.0279, -0.4863]) tensor([0.3532, 0.2571, 0.2439, 0.1458]) -Greedy action tensor([ 0.6814, -0.3734, -0.0133, -0.2124]) tensor([0.4431, 0.1543, 0.2212, 0.1813]) -Greedy action tensor([ 0.7061, 0.1669, -0.3806, 0.0468]) tensor([0.4102, 0.2392, 0.1384, 0.2122]) -Greedy action tensor([ 0.8031, -0.1472, 0.0128, -0.0026]) tensor([0.4372, 0.1690, 0.1984, 0.1953]) -Greedy action tensor([ 0.3815, 0.0330, -0.0686, 0.0443]) tensor([0.3271, 0.2309, 0.2085, 0.2335]) -Greedy action tensor([ 0.7991, -0.3917, 0.1224, -0.2953]) tensor([0.4658, 0.1416, 0.2367, 0.1559]) -Greedy action tensor([ 0.9616, -0.6273, -0.0193, -0.3727]) tensor([0.5428, 0.1108, 0.2035, 0.1429]) -Greedy action tensor([ 0.4885, -0.6295, -0.1479, -0.2530]) tensor([0.4287, 0.1402, 0.2269, 0.2042]) -Greedy action tensor([ 0.8313, -0.4681, -0.1409, -0.3369]) tensor([0.5097, 0.1390, 0.1928, 0.1585]) -Greedy action tensor([ 1.0094, -0.5418, -0.0292, -0.4043]) tensor([0.5527, 0.1172, 0.1956, 0.1344]) -Greedy action tensor([ 0.7165, -0.3382, -0.0536, -0.1878]) tensor([0.4512, 0.1572, 0.2089, 0.1827]) -Greedy action tensor([ 0.8909, -0.5942, -0.0758, -0.2952]) tensor([0.5229, 0.1184, 0.1989, 0.1597]) -Greedy action tensor([ 0.9754, -1.0646, 0.3268, -0.8844]) tensor([0.5529, 0.0719, 0.2891, 0.0861]) -Greedy action tensor([ 0.7365, -0.2813, 0.0221, -0.2524]) tensor([0.4499, 0.1626, 0.2202, 0.1673]) -Greedy action tensor([ 0.8984, 0.3154, -0.1543, -0.3013]) tensor([0.4528, 0.2528, 0.1580, 0.1364]) -Greedy action tensor([ 0.3785, -0.4745, -0.2204, -0.0620]) tensor([0.3818, 0.1627, 0.2098, 0.2458]) -Greedy action tensor([ 1.2910, -0.4658, -0.1589, -0.5375]) tensor([0.6378, 0.1101, 0.1496, 0.1025]) -Greedy action tensor([ 0.5929, -0.2213, -0.0540, -0.2079]) tensor([0.4140, 0.1834, 0.2168, 0.1859]) -Greedy action tensor([ 0.6423, -0.1686, -0.0082, -0.0598]) tensor([0.4062, 0.1805, 0.2120, 0.2013]) -Greedy action tensor([ 0.6712, -0.7068, -0.0394, -0.3860]) tensor([0.4783, 0.1206, 0.2350, 0.1662]) -Greedy action tensor([ 1.0333, -0.8004, 0.1147, -0.3899]) tensor([0.5556, 0.0888, 0.2217, 0.1339]) -Greedy action tensor([ 0.5267, -0.4036, -0.0306, -0.0458]) tensor([0.3951, 0.1558, 0.2263, 0.2228]) -Greedy action tensor([ 0.6407, -0.1220, 0.0513, -0.0825]) tensor([0.3990, 0.1861, 0.2213, 0.1936]) -Greedy action tensor([ 1.0523, -0.9150, 0.0779, -0.4668]) tensor([0.5760, 0.0805, 0.2174, 0.1261]) -Greedy action tensor([ 0.9314, -0.6501, -0.0650, -0.4488]) tensor([0.5475, 0.1126, 0.2022, 0.1377]) -Greedy action tensor([ 0.8986, -0.4342, -0.0707, -0.3737]) tensor([0.5200, 0.1371, 0.1972, 0.1457]) -Greedy action tensor([ 0.5749, -0.0528, -0.0720, -0.0028]) tensor([0.3819, 0.2038, 0.2000, 0.2143]) -Greedy action tensor([ 0.8531, -0.5923, -0.1154, -0.3210]) tensor([0.5196, 0.1225, 0.1973, 0.1606]) -Greedy action tensor([ 0.8068, -0.6482, -0.1254, -0.1959]) tensor([0.5015, 0.1171, 0.1974, 0.1840]) -Greedy action tensor([ 0.9239, -0.6733, -0.0641, -0.5299]) tensor([0.5530, 0.1120, 0.2059, 0.1292]) -Greedy action tensor([ 0.4308, 0.0912, -0.0355, -0.3278]) tensor([0.3562, 0.2536, 0.2234, 0.1668]) -Greedy action tensor([ 0.6652, -0.2739, 0.0324, -0.5611]) tensor([0.4514, 0.1765, 0.2397, 0.1324]) -Greedy action tensor([ 0.7033, -0.0492, -0.0840, 0.0017]) tensor([0.4129, 0.1945, 0.1879, 0.2047]) -Greedy action tensor([ 0.9866, -0.6619, 0.0492, -0.5157]) tensor([0.5535, 0.1065, 0.2168, 0.1232]) -Greedy action tensor([ 0.9415, -0.7075, 0.0370, -0.6249]) tensor([0.5538, 0.1065, 0.2241, 0.1156]) -Greedy action tensor([ 0.9199, -0.3850, -0.1141, -0.2567]) tensor([0.5168, 0.1401, 0.1837, 0.1593]) -Greedy action tensor([ 0.4447, -0.2826, -0.0674, -0.1542]) tensor([0.3800, 0.1836, 0.2277, 0.2088]) -Greedy action tensor([ 1.2195, -0.6836, 0.0677, -0.6492]) tensor([0.6175, 0.0921, 0.1952, 0.0953]) -Greedy action tensor([ 0.6210, 0.1062, -0.1541, 0.1310]) tensor([0.3744, 0.2237, 0.1725, 0.2294]) -Greedy action tensor([ 0.6300, 0.0405, -0.1441, -0.0929]) tensor([0.3998, 0.2217, 0.1844, 0.1941]) -Greedy action tensor([ 0.8709, -0.4861, -0.0981, -0.1503]) tensor([0.5007, 0.1289, 0.1900, 0.1803]) -Greedy action tensor([-1.8316, -0.0772, 0.5586, -0.1013]) tensor([0.0428, 0.2477, 0.4677, 0.2418]) -Greedy action tensor([-1.7614, 0.0853, 0.5161, -0.0752]) tensor([0.0445, 0.2818, 0.4336, 0.2401]) -Greedy action tensor([-1.8131, -0.3721, 0.5933, -0.1108]) tensor([0.0459, 0.1938, 0.5088, 0.2516]) -Greedy action tensor([-1.4083, -1.1408, 0.2275, -0.6572]) tensor([0.1046, 0.1367, 0.5370, 0.2217]) -Greedy action tensor([-1.7073, -0.2262, 0.6111, -0.0317]) tensor([0.0478, 0.2104, 0.4861, 0.2556]) -Greedy action tensor([-1.9180, -0.4415, 0.6557, -0.1669]) tensor([0.0412, 0.1805, 0.5407, 0.2375]) -Greedy action tensor([-1.8708, -0.3377, 0.6030, -0.1297]) tensor([0.0431, 0.1997, 0.5114, 0.2458]) -Greedy action tensor([-1.9387, -0.4423, 0.6645, -0.1761]) tensor([0.0403, 0.1801, 0.5446, 0.2350]) -Greedy action tensor([-1.5311, -0.2450, 0.6336, -0.0507]) tensor([0.0564, 0.2041, 0.4915, 0.2479]) -Greedy action tensor([-1.8301, -0.4469, 0.6114, -0.1288]) tensor([0.0455, 0.1816, 0.5233, 0.2496]) -Greedy action tensor([-1.8601, -0.4407, 0.6210, -0.1357]) tensor([0.0441, 0.1822, 0.5267, 0.2471]) -Greedy action tensor([-1.6228, -0.4773, 0.5718, 0.0476]) tensor([0.0542, 0.1705, 0.4869, 0.2883]) -Greedy action tensor([-0.7558, 0.3487, -0.8126, -0.1323]) tensor([0.1465, 0.4420, 0.1384, 0.2732]) -Greedy action tensor([-1.0335, -0.0449, -0.0074, -0.1909]) tensor([0.1136, 0.3054, 0.3171, 0.2639]) -Greedy action tensor([-1.5519, -0.3411, 0.4710, -0.0283]) tensor([0.0606, 0.2033, 0.4581, 0.2780]) -Greedy action tensor([-1.9462, -0.4499, 0.6676, -0.1813]) tensor([0.0401, 0.1789, 0.5470, 0.2340]) -Greedy action tensor([-1.6876, 0.2597, 0.4281, -0.0053]) tensor([0.0461, 0.3233, 0.3826, 0.2480]) -Greedy action tensor([-0.5918, 0.8389, 0.0469, 0.2040]) tensor([0.1076, 0.4500, 0.2038, 0.2385]) -Greedy action tensor([-1.8699, -0.3976, 0.6415, -0.1375]) tensor([0.0429, 0.1868, 0.5281, 0.2423]) -Greedy action tensor([-1.8797, -0.4578, 0.6603, -0.1361]) tensor([0.0425, 0.1761, 0.5386, 0.2429]) -Greedy action tensor([-1.6397, 0.1242, 0.4505, -0.0578]) tensor([0.0505, 0.2949, 0.4087, 0.2458]) -Greedy action tensor([-1.7682, -0.0981, 0.5507, -0.0414]) tensor([0.0452, 0.2404, 0.4599, 0.2544]) -Greedy action tensor([-1.5949, 0.4504, 0.3834, 0.1696]) tensor([0.0459, 0.3546, 0.3317, 0.2678]) -Greedy action tensor([-1.8002, -0.4048, 0.6603, -0.0930]) tensor([0.0449, 0.1813, 0.5261, 0.2477]) -Greedy action tensor([-1.7165, 0.5447, 0.4423, -0.1370]) tensor([0.0415, 0.3980, 0.3592, 0.2013]) -Greedy action tensor([-1.8224, -0.4035, 0.6497, -0.1067]) tensor([0.0444, 0.1833, 0.5256, 0.2467]) -Greedy action tensor([-1.8919, -0.4376, 0.6392, -0.1548]) tensor([0.0425, 0.1820, 0.5341, 0.2414]) -Greedy action tensor([-1.9252, -0.4334, 0.6624, -0.1721]) tensor([0.0408, 0.1813, 0.5424, 0.2355]) -Greedy action tensor([-1.7572, -0.3555, 0.5572, -0.1027]) tensor([0.0490, 0.1990, 0.4957, 0.2562]) -Greedy action tensor([-1.9171, -0.4468, 0.6545, -0.1662]) tensor([0.0413, 0.1798, 0.5408, 0.2381]) -Greedy action tensor([-1.2792, 0.4522, 0.3883, -0.2240]) tensor([0.0675, 0.3811, 0.3575, 0.1938]) -Greedy action tensor([-1.7689, -0.3131, 0.6346, -0.0771]) tensor([0.0459, 0.1969, 0.5079, 0.2493]) -Greedy action tensor([-1.2136, -0.1133, 0.4952, -0.4866]) tensor([0.0862, 0.2591, 0.4762, 0.1784]) -Greedy action tensor([-1.9311, -0.4328, 0.6588, -0.1713]) tensor([0.0406, 0.1818, 0.5415, 0.2361]) -Greedy action tensor([-1.8868, -0.4395, 0.6278, -0.1447]) tensor([0.0429, 0.1823, 0.5300, 0.2448]) -Greedy action tensor([-1.1678, -0.4112, 0.2883, 0.0426]) tensor([0.0928, 0.1978, 0.3981, 0.3113]) -Greedy action tensor([-1.8642, -0.4083, 0.6222, -0.1480]) tensor([0.0437, 0.1875, 0.5255, 0.2433]) -Greedy action tensor([-1.8144, -0.0637, 0.5436, -0.0903]) tensor([0.0436, 0.2511, 0.4609, 0.2445]) -Greedy action tensor([-1.9441, -0.4484, 0.6683, -0.1784]) tensor([0.0401, 0.1789, 0.5466, 0.2344]) -Greedy action tensor([-1.7801, -0.2946, 0.6046, -0.0694]) tensor([0.0459, 0.2026, 0.4978, 0.2537]) -Greedy action tensor([-0.1716, 1.0664, -0.0167, 0.1010]) tensor([0.1443, 0.4977, 0.1685, 0.1895]) -Greedy action tensor([-1.9028, -0.4453, 0.6457, -0.1600]) tensor([0.0420, 0.1805, 0.5374, 0.2401]) -Greedy action tensor([-1.7534, -0.3049, 0.6094, -0.0520]) tensor([0.0468, 0.1993, 0.4972, 0.2567]) -Greedy action tensor([-1.8478, -0.3876, 0.6082, -0.1279]) tensor([0.0443, 0.1910, 0.5170, 0.2476]) -Greedy action tensor([-1.7118, 0.4459, 0.6067, -0.4607]) tensor([0.0429, 0.3712, 0.4360, 0.1499]) -Greedy action tensor([-1.9139, -0.3978, 0.6443, -0.1616]) tensor([0.0413, 0.1879, 0.5328, 0.2380]) -Greedy action tensor([-1.3166, 0.4241, 0.2366, 0.0541]) tensor([0.0651, 0.3710, 0.3076, 0.2563]) -Greedy action tensor([-1.6353, 0.0104, 0.4809, -0.0114]) tensor([0.0511, 0.2651, 0.4244, 0.2594]) -Greedy action tensor([-1.8756, -0.2611, 0.6047, -0.1319]) tensor([0.0422, 0.2121, 0.5042, 0.2414]) -Greedy action tensor([-1.9102, -0.4523, 0.6529, -0.1620]) tensor([0.0416, 0.1789, 0.5403, 0.2392]) -Greedy action tensor([-1.7096, -0.4838, 0.5614, -0.0620]) tensor([0.0518, 0.1766, 0.5023, 0.2693]) -Greedy action tensor([-1.7431, -0.2318, 0.5800, -0.0242]) tensor([0.0469, 0.2126, 0.4788, 0.2617]) -Greedy action tensor([-1.8552, -0.2650, 0.6197, -0.0966]) tensor([0.0424, 0.2079, 0.5036, 0.2461]) -Greedy action tensor([-1.0862, -0.3036, 0.4919, 0.7857]) tensor([0.0688, 0.1505, 0.3334, 0.4473]) -Greedy action tensor([-1.8044, -0.4086, 0.5925, -0.1317]) tensor([0.0468, 0.1891, 0.5146, 0.2494]) -Greedy action tensor([-1.8291, 0.1211, 0.6324, -0.4048]) tensor([0.0418, 0.2941, 0.4903, 0.1738]) -Greedy action tensor([-1.8191, -0.4373, 0.5956, -0.1198]) tensor([0.0462, 0.1840, 0.5170, 0.2528]) -Greedy action tensor([-1.8166, -0.2941, 0.5804, -0.1038]) tensor([0.0452, 0.2072, 0.4969, 0.2507]) -Greedy action tensor([-1.6517, -0.4626, 0.5394, 0.0261]) tensor([0.0538, 0.1767, 0.4813, 0.2881]) -Greedy action tensor([-1.8105, -0.3890, 0.6042, -0.1068]) tensor([0.0458, 0.1899, 0.5126, 0.2518]) -Greedy action tensor([-0.8780, 0.6952, 0.1711, 0.2610]) tensor([0.0847, 0.4086, 0.2419, 0.2647]) -Greedy action tensor([-1.8031, -0.0334, 0.5575, -0.0804]) tensor([0.0434, 0.2545, 0.4594, 0.2428]) -Greedy action tensor([-1.7274, -0.4218, 0.6472, -0.1131]) tensor([0.0489, 0.1803, 0.5252, 0.2456]) -Greedy action tensor([-1.8473, -0.4235, 0.5994, -0.1434]) tensor([0.0451, 0.1871, 0.5203, 0.2476]) -Greedy action tensor([-1.8493, -0.3706, 0.6387, -0.1279]) tensor([0.0434, 0.1906, 0.5230, 0.2430]) -Greedy action tensor([-1.9321, -0.4377, 0.6600, -0.1745]) tensor([0.0406, 0.1811, 0.5427, 0.2356]) -Greedy action tensor([-1.8736, -0.3990, 0.6223, -0.1463]) tensor([0.0432, 0.1889, 0.5246, 0.2432]) -Greedy action tensor([-1.8506, -0.4117, 0.6096, -0.1233]) tensor([0.0443, 0.1870, 0.5192, 0.2495]) -Greedy action tensor([-1.9126, -0.4376, 0.6528, -0.1611]) tensor([0.0414, 0.1811, 0.5388, 0.2387]) -Greedy action tensor([-1.9326, -0.4572, 0.6700, -0.1689]) tensor([0.0405, 0.1770, 0.5464, 0.2361]) -Greedy action tensor([-1.9025, -0.4289, 0.6448, -0.1603]) tensor([0.0419, 0.1830, 0.5356, 0.2394]) -Greedy action tensor([-1.8962, -0.4290, 0.6441, -0.1565]) tensor([0.0422, 0.1829, 0.5348, 0.2402]) -Greedy action tensor([-1.9277, -0.4313, 0.6601, -0.1706]) tensor([0.0407, 0.1818, 0.5415, 0.2360]) -Greedy action tensor([-1.2340, 0.3890, 0.3506, -0.2334]) tensor([0.0732, 0.3709, 0.3569, 0.1990]) -Greedy action tensor([-1.5074, 0.4432, 0.3709, 0.0519]) tensor([0.0517, 0.3638, 0.3384, 0.2460]) -Greedy action tensor([-1.0665, 0.4080, 0.4818, 0.6325]) tensor([0.0643, 0.2811, 0.3026, 0.3519]) -Greedy action tensor([-0.8549, 0.7283, 0.1591, -0.1409]) tensor([0.0937, 0.4565, 0.2584, 0.1914]) -Greedy action tensor([-1.8892, -0.3766, 0.6380, -0.1479]) tensor([0.0421, 0.1910, 0.5268, 0.2401]) -Greedy action tensor([-1.7798, -0.4007, 0.5738, -0.0969]) tensor([0.0479, 0.1902, 0.5041, 0.2578]) -Greedy action tensor([-1.8801, -0.4373, 0.6363, -0.1467]) tensor([0.0430, 0.1818, 0.5321, 0.2432]) -Greedy action tensor([-1.8041, -0.2728, 0.6224, -0.0815]) tensor([0.0444, 0.2051, 0.5021, 0.2484]) -Greedy action tensor([ 1.1692, -0.4171, -0.6089, 0.6065]) tensor([0.5146, 0.1053, 0.0869, 0.2931]) -Greedy action tensor([ 1.2155, -0.5431, -0.7625, 0.6897]) tensor([0.5259, 0.0906, 0.0727, 0.3108]) -Greedy action tensor([ 1.8723, 0.3219, -0.4758, 0.3604]) tensor([0.6544, 0.1388, 0.0625, 0.1443]) -Greedy action tensor([ 1.3152, -0.3033, -0.3391, 0.1542]) tensor([0.5873, 0.1164, 0.1123, 0.1839]) -Greedy action tensor([ 1.4283, -0.2343, -1.1901, 0.2187]) tensor([0.6407, 0.1215, 0.0467, 0.1911]) -Greedy action tensor([ 1.0075, -0.1497, -0.4592, 0.5628]) tensor([0.4574, 0.1438, 0.1055, 0.2932]) -Greedy action tensor([ 1.8665, -0.5826, -0.5973, 0.4741]) tensor([0.7042, 0.0608, 0.0599, 0.1750]) -Greedy action tensor([ 1.8120, -0.5822, -0.3272, 0.1694]) tensor([0.7130, 0.0651, 0.0840, 0.1380]) -Greedy action tensor([ 1.7893, -0.8684, -0.0499, 0.0774]) tensor([0.7094, 0.0497, 0.1128, 0.1281]) -Greedy action tensor([ 1.7608, -0.6437, 0.2053, 0.1293]) tensor([0.6680, 0.0603, 0.1410, 0.1307]) -Greedy action tensor([ 1.4860, -0.0240, -1.1074, 0.1999]) tensor([0.6361, 0.1405, 0.0476, 0.1758]) -Greedy action tensor([ 1.6077, -0.3658, -0.6175, 0.2271]) tensor([0.6674, 0.0927, 0.0721, 0.1678]) -Greedy action tensor([ 1.3842, -0.3627, -0.2676, 0.7301]) tensor([0.5303, 0.0924, 0.1016, 0.2757]) -Greedy action tensor([ 1.3932, -0.3564, -0.6656, 0.5170]) tensor([0.5821, 0.1012, 0.0743, 0.2424]) -Greedy action tensor([ 1.5551, -0.1321, -1.0245, 0.6501]) tensor([0.6005, 0.1111, 0.0455, 0.2429]) -Greedy action tensor([ 1.9786, -1.2757, 0.0041, 0.9379]) tensor([0.6533, 0.0252, 0.0907, 0.2308]) -Greedy action tensor([ 1.7548, -0.7919, -0.3454, 0.3285]) tensor([0.6940, 0.0544, 0.0850, 0.1667]) -Greedy action tensor([ 1.3274, -0.4601, -0.4084, 0.4023]) tensor([0.5747, 0.0962, 0.1013, 0.2279]) -Greedy action tensor([ 1.6736, -0.6831, -0.5293, 0.2255]) tensor([0.6943, 0.0658, 0.0767, 0.1632]) -Greedy action tensor([ 1.9317, -0.2681, -0.5562, 0.5671]) tensor([0.6899, 0.0765, 0.0573, 0.1763]) -Greedy action tensor([ 1.0021, 0.0456, -0.0788, 0.0791]) tensor([0.4715, 0.1812, 0.1600, 0.1873]) -Greedy action tensor([ 1.5065, -0.3918, -0.3408, 0.2721]) tensor([0.6256, 0.0937, 0.0986, 0.1821]) -Greedy action tensor([ 1.1929, -0.0779, -0.5317, 0.2152]) tensor([0.5450, 0.1529, 0.0971, 0.2050]) -Greedy action tensor([ 1.6789, -0.3982, -0.3305, 0.0928]) tensor([0.6830, 0.0856, 0.0916, 0.1398]) -Greedy action tensor([ 1.3562, 0.1436, -0.3890, 0.3156]) tensor([0.5479, 0.1629, 0.0957, 0.1935]) -Greedy action tensor([ 1.2941, -0.2720, -0.5303, 0.3803]) tensor([0.5646, 0.1179, 0.0911, 0.2264]) -Greedy action tensor([ 2.1608, -1.1388, -0.0715, 0.8572]) tensor([0.7063, 0.0261, 0.0758, 0.1918]) -Greedy action tensor([ 1.1082, 0.1939, -0.6385, 0.3047]) tensor([0.4943, 0.1981, 0.0862, 0.2214]) -Greedy action tensor([ 1.7613, -0.6498, -0.6228, 1.0573]) tensor([0.5965, 0.0535, 0.0550, 0.2950]) -Greedy action tensor([ 1.8564, -0.7487, -0.4234, 0.2533]) tensor([0.7260, 0.0536, 0.0743, 0.1461]) -Greedy action tensor([ 1.3096, -1.0317, -0.3359, -0.0849]) tensor([0.6506, 0.0626, 0.1255, 0.1613]) -Greedy action tensor([ 1.4635, -0.5959, -0.8967, 0.0597]) tensor([0.6814, 0.0869, 0.0643, 0.1674]) -Greedy action tensor([ 1.7920, 0.0301, -0.1655, 0.2128]) tensor([0.6583, 0.1130, 0.0930, 0.1357]) -Greedy action tensor([ 1.2336, -0.3725, -0.5588, 0.3475]) tensor([0.5620, 0.1128, 0.0936, 0.2317]) -Greedy action tensor([ 1.3459, -0.6635, -0.2592, -0.2235]) tensor([0.6480, 0.0869, 0.1302, 0.1349]) -Greedy action tensor([ 1.3509, -0.6558, -0.1023, 0.1718]) tensor([0.5967, 0.0802, 0.1395, 0.1835]) -Greedy action tensor([ 1.3964, -0.4764, -0.7646, 0.0166]) tensor([0.6577, 0.1011, 0.0758, 0.1655]) -Greedy action tensor([ 1.1242, -0.0132, -0.9046, 0.4361]) tensor([0.5116, 0.1640, 0.0673, 0.2571]) -Greedy action tensor([ 1.6699, -0.8244, 0.0583, 0.4775]) tensor([0.6307, 0.0521, 0.1259, 0.1914]) -Greedy action tensor([ 3.0556, -1.7274, -0.2362, 0.6077]) tensor([0.8834, 0.0074, 0.0329, 0.0764]) -Greedy action tensor([ 1.6175, -0.6699, -0.3384, 0.2604]) tensor([0.6665, 0.0677, 0.0943, 0.1716]) -Greedy action tensor([ 1.4644, -0.2606, -0.9864, 0.5007]) tensor([0.6076, 0.1083, 0.0524, 0.2318]) -Greedy action tensor([ 1.0791, -0.2498, -0.6237, 0.0392]) tensor([0.5554, 0.1471, 0.1012, 0.1963]) -Greedy action tensor([ 1.2465, -0.1608, -0.2479, 0.2856]) tensor([0.5400, 0.1322, 0.1212, 0.2066]) -Greedy action tensor([ 1.7902, -0.8542, -0.1598, 0.2315]) tensor([0.7024, 0.0499, 0.0999, 0.1478]) -Greedy action tensor([ 1.8354, -0.2078, -0.3574, 0.1625]) tensor([0.6998, 0.0907, 0.0781, 0.1314]) -Greedy action tensor([ 1.2897, 0.0694, -0.5640, 0.1101]) tensor([0.5684, 0.1678, 0.0891, 0.1747]) -Greedy action tensor([ 1.7535, -0.8140, 0.0201, -0.2268]) tensor([0.7187, 0.0551, 0.1270, 0.0992]) -Greedy action tensor([ 1.4289, -0.5827, -0.2561, 0.3183]) tensor([0.6066, 0.0811, 0.1125, 0.1998]) -Greedy action tensor([ 1.1123, -0.3111, -0.1202, 0.0671]) tensor([0.5308, 0.1279, 0.1547, 0.1866]) -Greedy action tensor([ 1.5732, 0.0315, -0.9269, 0.3207]) tensor([0.6322, 0.1353, 0.0519, 0.1807]) -Greedy action tensor([ 1.3903, -0.2864, -0.5257, 0.1094]) tensor([0.6204, 0.1160, 0.0913, 0.1723]) -Greedy action tensor([ 1.3517, -0.6508, -0.4641, 0.2986]) tensor([0.6073, 0.0820, 0.0988, 0.2119]) -Greedy action tensor([ 1.4528, -0.7231, -0.1804, 0.4886]) tensor([0.5917, 0.0672, 0.1156, 0.2256]) -Greedy action tensor([ 1.2617, 0.0484, -0.5168, -0.0631]) tensor([0.5774, 0.1716, 0.0975, 0.1535]) -Greedy action tensor([ 1.6834, -0.3652, 0.1623, 0.3995]) tensor([0.6156, 0.0794, 0.1345, 0.1705]) -Greedy action tensor([ 1.4709, -0.6736, -0.7095, 0.0605]) tensor([0.6784, 0.0795, 0.0766, 0.1655]) -Greedy action tensor([ 1.3482, -0.0220, -0.6411, 0.2666]) tensor([0.5781, 0.1469, 0.0791, 0.1960]) -Greedy action tensor([ 1.8721, -1.0925, -0.4421, 0.1632]) tensor([0.7510, 0.0387, 0.0742, 0.1360]) -Greedy action tensor([ 1.3776, -0.8409, -0.5151, 0.1495]) tensor([0.6442, 0.0701, 0.0971, 0.1887]) -Greedy action tensor([ 1.6508, -0.8435, -0.2702, 0.2440]) tensor([0.6785, 0.0560, 0.0994, 0.1662]) -Greedy action tensor([ 1.6104, -1.1772, -0.3099, 0.2859]) tensor([0.6784, 0.0418, 0.0994, 0.1804]) -Greedy action tensor([ 1.1674, -0.1672, -0.4958, -0.1808]) tensor([0.5839, 0.1537, 0.1107, 0.1517]) -Greedy action tensor([ 1.1329, -0.3371, -0.2104, 0.2281]) tensor([0.5276, 0.1213, 0.1377, 0.2135]) -Greedy action tensor([ 1.7071, -0.2461, -0.9187, 0.0422]) tensor([0.7125, 0.1011, 0.0516, 0.1348]) -Greedy action tensor([ 1.4808, -0.0408, -1.5387, 0.4547]) tensor([0.6152, 0.1343, 0.0300, 0.2205]) -Greedy action tensor([ 1.5280, -0.2232, -0.6285, 0.2675]) tensor([0.6358, 0.1104, 0.0736, 0.1802]) -Greedy action tensor([ 0.9130, -0.3371, -0.3487, 0.6503]) tensor([0.4276, 0.1225, 0.1211, 0.3288]) -Greedy action tensor([ 1.4465, -0.3326, -0.6298, 0.2875]) tensor([0.6219, 0.1050, 0.0780, 0.1951]) -Greedy action tensor([ 1.3127, -0.4143, -0.2092, 0.1398]) tensor([0.5863, 0.1043, 0.1280, 0.1815]) -Greedy action tensor([ 1.3228, -0.5192, -0.6055, 0.0821]) tensor([0.6277, 0.0995, 0.0913, 0.1815]) -Greedy action tensor([ 2.1022, -0.9233, -0.2144, 0.1074]) tensor([0.7793, 0.0378, 0.0768, 0.1060]) -Greedy action tensor([ 1.9541, -0.4727, -0.2589, 0.6365]) tensor([0.6824, 0.0603, 0.0746, 0.1827]) -Greedy action tensor([ 1.5594, -0.5686, -0.5702, 0.1825]) tensor([0.6710, 0.0799, 0.0798, 0.1693]) -Greedy action tensor([ 1.4352, -0.8482, -0.3183, 0.2036]) tensor([0.6382, 0.0651, 0.1105, 0.1862]) -Greedy action tensor([1.9508, 0.1174, 0.2230, 0.1543]) tensor([0.6651, 0.1063, 0.1182, 0.1103]) -Greedy action tensor([ 1.6327, -0.1158, -0.1957, 0.3971]) tensor([0.6153, 0.1071, 0.0988, 0.1788]) -Greedy action tensor([ 0.9364, -0.2977, -0.5527, 0.0988]) tensor([0.5130, 0.1493, 0.1157, 0.2220]) -Greedy action tensor([ 1.6600, -0.7901, -0.6634, 0.4234]) tensor([0.6782, 0.0585, 0.0664, 0.1969]) -Greedy action tensor([ 1.3485, -0.2946, -0.2556, 0.1411]) tensor([0.5905, 0.1142, 0.1187, 0.1766]) -Greedy action tensor([ 0.4139, -0.1922, 0.0722, 0.0124]) tensor([0.3418, 0.1865, 0.2429, 0.2288]) -Greedy action tensor([ 0.5067, -0.3482, -0.1423, -0.0734]) tensor([0.3988, 0.1696, 0.2084, 0.2232]) -Greedy action tensor([ 0.7878, -0.5772, 0.0646, -0.4491]) tensor([0.4924, 0.1257, 0.2389, 0.1429]) -Greedy action tensor([ 1.2277, -0.6671, -0.0763, -0.3098]) tensor([0.6110, 0.0919, 0.1658, 0.1313]) -Greedy action tensor([ 1.1553, -1.1532, 0.0844, -0.6391]) tensor([0.6218, 0.0618, 0.2131, 0.1034]) -Greedy action tensor([ 0.9969, -0.5368, 0.0590, -0.5047]) tensor([0.5465, 0.1179, 0.2139, 0.1217]) -Greedy action tensor([ 0.9336, -0.5663, -0.1455, -0.4303]) tensor([0.5498, 0.1227, 0.1869, 0.1406]) -Greedy action tensor([ 0.7308, -0.4663, -0.0310, -0.3300]) tensor([0.4728, 0.1428, 0.2207, 0.1637]) -Greedy action tensor([ 0.9338, -0.8169, -0.0122, -0.3535]) tensor([0.5441, 0.0945, 0.2113, 0.1502]) -Greedy action tensor([ 0.3155, 0.0802, -0.0223, 0.0682]) tensor([0.3045, 0.2406, 0.2172, 0.2378]) -Greedy action tensor([ 0.8402, -0.4826, 0.0168, -0.2112]) tensor([0.4867, 0.1296, 0.2136, 0.1701]) -Greedy action tensor([ 0.7506, -0.5536, -0.0381, -0.4222]) tensor([0.4913, 0.1333, 0.2233, 0.1521]) -Greedy action tensor([ 1.0595, -1.0521, 0.1707, -0.4919]) tensor([0.5734, 0.0694, 0.2357, 0.1215]) -Greedy action tensor([ 0.7995, -0.5882, 0.0674, -0.4254]) tensor([0.4940, 0.1233, 0.2376, 0.1451]) -Greedy action tensor([ 1.2207, -0.6057, -0.1502, -0.7875]) tensor([0.6455, 0.1039, 0.1639, 0.0866]) -Greedy action tensor([ 0.6614, -0.3784, -0.0144, -0.2454]) tensor([0.4413, 0.1560, 0.2245, 0.1782]) -Greedy action tensor([ 0.5351, -0.5060, -0.0595, -0.5131]) tensor([0.4434, 0.1565, 0.2446, 0.1554]) -Greedy action tensor([ 0.9195, -0.6188, 0.0136, -0.6427]) tensor([0.5469, 0.1174, 0.2210, 0.1147]) -Greedy action tensor([ 0.7417, 0.0917, -0.0465, 0.0943]) tensor([0.4000, 0.2088, 0.1819, 0.2093]) -Greedy action tensor([ 0.7326, -0.5771, 0.1529, -0.5460]) tensor([0.4743, 0.1280, 0.2656, 0.1321]) -Greedy action tensor([ 1.1668, -0.7147, -0.0439, -0.6288]) tensor([0.6187, 0.0943, 0.1844, 0.1027]) -Greedy action tensor([ 0.8800, -0.2804, -0.0484, -0.1735]) tensor([0.4861, 0.1523, 0.1921, 0.1695]) -Greedy action tensor([ 0.3385, -0.1006, -0.0865, -0.4708]) tensor([0.3645, 0.2350, 0.2383, 0.1623]) -Greedy action tensor([ 0.6002, -0.2571, 0.1604, -0.5668]) tensor([0.4202, 0.1783, 0.2707, 0.1308]) -Greedy action tensor([ 0.7337, -0.5210, -0.0476, -0.3727]) tensor([0.4822, 0.1375, 0.2208, 0.1595]) -Greedy action tensor([ 0.8089, -0.6425, -0.0364, -0.3145]) tensor([0.5028, 0.1178, 0.2159, 0.1635]) -Greedy action tensor([ 0.3079, 0.0012, 0.1420, -0.5405]) tensor([0.3321, 0.2444, 0.2813, 0.1422]) -Greedy action tensor([ 0.7778, -0.8037, -0.0074, -0.3396]) tensor([0.5028, 0.1034, 0.2293, 0.1645]) -Greedy action tensor([ 0.9274, -0.3581, -0.1214, 0.0401]) tensor([0.4905, 0.1356, 0.1719, 0.2020]) -Greedy action tensor([ 0.2996, 0.1264, -0.0399, 0.0304]) tensor([0.3015, 0.2535, 0.2147, 0.2303]) -Greedy action tensor([ 0.8134, -0.3272, -0.1192, -0.3215]) tensor([0.4915, 0.1571, 0.1934, 0.1580]) -Greedy action tensor([ 0.5556, -0.1024, 0.0548, -0.0225]) tensor([0.3725, 0.1929, 0.2257, 0.2089]) -Greedy action tensor([ 0.8901, -0.5769, -0.0157, -0.3293]) tensor([0.5181, 0.1195, 0.2094, 0.1530]) -Greedy action tensor([ 1.0380, -0.2895, 0.1004, -0.0977]) tensor([0.5056, 0.1340, 0.1980, 0.1624]) -Greedy action tensor([ 0.6172, -0.4901, -0.0792, -0.1553]) tensor([0.4366, 0.1443, 0.2176, 0.2016]) -Greedy action tensor([ 0.4043, 0.0026, -0.2084, -0.0150]) tensor([0.3486, 0.2333, 0.1889, 0.2292]) -Greedy action tensor([ 0.6904, -0.4003, -0.1174, -0.2815]) tensor([0.4629, 0.1555, 0.2064, 0.1752]) -Greedy action tensor([ 0.8324, -0.1821, -0.0649, -0.1794]) tensor([0.4686, 0.1699, 0.1911, 0.1704]) -Greedy action tensor([ 0.7071, -0.6237, -0.0822, -0.3508]) tensor([0.4841, 0.1279, 0.2199, 0.1681]) -Greedy action tensor([ 0.6104, -0.2435, -0.0340, -0.1688]) tensor([0.4150, 0.1767, 0.2179, 0.1904]) -Greedy action tensor([ 0.7324, 0.1842, 0.0491, -0.3385]) tensor([0.4123, 0.2383, 0.2082, 0.1413]) -Greedy action tensor([ 0.3590, -0.0908, -0.0768, -0.3934]) tensor([0.3629, 0.2314, 0.2347, 0.1710]) -Greedy action tensor([ 0.8206, -0.6102, 0.0199, -0.3848]) tensor([0.5031, 0.1203, 0.2259, 0.1507]) -Greedy action tensor([ 0.3831, -0.2933, 0.0905, -0.5923]) tensor([0.3800, 0.1932, 0.2836, 0.1433]) -Greedy action tensor([ 0.9396, -0.5860, -0.1635, -0.3264]) tensor([0.5461, 0.1188, 0.1812, 0.1540]) -Greedy action tensor([ 0.8471, -0.7670, -0.0967, -0.3807]) tensor([0.5316, 0.1058, 0.2069, 0.1557]) -Greedy action tensor([ 0.7603, -0.1025, -0.0728, -0.0178]) tensor([0.4318, 0.1822, 0.1877, 0.1983]) -Greedy action tensor([ 0.7993, -0.4145, -0.2018, -0.2848]) tensor([0.4993, 0.1483, 0.1835, 0.1689]) -Greedy action tensor([ 0.5663, -0.2564, -0.0268, -0.5268]) tensor([0.4297, 0.1888, 0.2375, 0.1440]) -Greedy action tensor([ 0.9120, -0.5553, -0.0982, -0.3619]) tensor([0.5335, 0.1230, 0.1943, 0.1492]) -Greedy action tensor([ 0.8185, -0.6492, 0.1289, -0.3951]) tensor([0.4928, 0.1136, 0.2473, 0.1464]) -Greedy action tensor([ 1.2312, -0.9272, 0.0817, -0.8403]) tensor([0.6417, 0.0741, 0.2033, 0.0809]) -Greedy action tensor([ 0.6816, -0.3463, 0.2004, -0.1272]) tensor([0.4130, 0.1478, 0.2553, 0.1840]) -Greedy action tensor([ 0.8153, -0.6395, 0.0533, -0.4287]) tensor([0.5029, 0.1174, 0.2347, 0.1450]) -Greedy action tensor([ 1.0165, -0.3118, -0.0373, -0.1856]) tensor([0.5224, 0.1384, 0.1821, 0.1570]) -Greedy action tensor([ 0.8712, -0.7096, 0.2312, -0.3448]) tensor([0.4927, 0.1014, 0.2598, 0.1461]) -Greedy action tensor([ 0.5588, -0.1049, 0.0442, -0.2320]) tensor([0.3897, 0.2007, 0.2329, 0.1767]) -Greedy action tensor([ 0.6718, -0.4996, 0.0378, -0.1289]) tensor([0.4368, 0.1354, 0.2317, 0.1961]) -Greedy action tensor([ 0.9795, -0.3821, -0.0177, -0.2526]) tensor([0.5217, 0.1337, 0.1924, 0.1522]) -Greedy action tensor([0.4637, 0.4061, 0.4363, 0.6939]) tensor([0.2395, 0.2261, 0.2330, 0.3015]) -Greedy action tensor([ 0.8765, -0.7887, 0.0696, -0.1388]) tensor([0.5006, 0.0947, 0.2234, 0.1813]) -Greedy action tensor([ 1.0228, -0.4529, 0.1064, -0.3948]) tensor([0.5345, 0.1222, 0.2138, 0.1295]) -Greedy action tensor([ 0.3891, 0.1606, -0.2533, 0.1831]) tensor([0.3189, 0.2538, 0.1678, 0.2596]) -Greedy action tensor([ 0.6588, -0.3723, -0.0046, -0.1768]) tensor([0.4338, 0.1547, 0.2234, 0.1881]) -Greedy action tensor([ 1.2646, -0.8391, 0.0749, -0.6080]) tensor([0.6329, 0.0772, 0.1926, 0.0973]) -Greedy action tensor([ 0.7701, -0.3830, -0.1247, -0.3625]) tensor([0.4886, 0.1542, 0.1997, 0.1574]) -Greedy action tensor([0.6707, 0.1078, 0.0561, 0.0594]) tensor([0.3769, 0.2147, 0.2039, 0.2045]) -Greedy action tensor([ 0.9049, -0.6476, 0.0109, -0.5484]) tensor([0.5392, 0.1142, 0.2206, 0.1261]) -Greedy action tensor([ 0.6025, -0.4903, -0.0748, -0.0664]) tensor([0.4245, 0.1423, 0.2156, 0.2175]) -Greedy action tensor([ 0.8294, -0.4260, -0.0987, -0.4528]) tensor([0.5108, 0.1456, 0.2019, 0.1417]) -Greedy action tensor([ 1.1763, -0.5510, -0.0727, -0.5561]) tensor([0.6092, 0.1083, 0.1747, 0.1078]) -Greedy action tensor([ 0.6821, -0.2359, 0.0151, -0.1545]) tensor([0.4263, 0.1702, 0.2188, 0.1847]) -Greedy action tensor([ 0.7845, -0.2752, 0.0761, -0.0580]) tensor([0.4406, 0.1527, 0.2170, 0.1897]) -Greedy action tensor([ 0.6795, -0.3915, -0.0175, -0.4062]) tensor([0.4590, 0.1573, 0.2286, 0.1550]) -Greedy action tensor([ 0.9007, -0.6714, -0.0176, -0.3776]) tensor([0.5304, 0.1101, 0.2117, 0.1477]) -Greedy action tensor([ 0.6712, -0.1950, -0.0184, -0.1340]) tensor([0.4221, 0.1775, 0.2118, 0.1887]) -Greedy action tensor([ 0.6592, -0.3324, 0.0299, -0.5947]) tensor([0.4568, 0.1695, 0.2434, 0.1304]) -Greedy action tensor([ 0.3766, -0.1902, -0.0568, -0.2156]) tensor([0.3612, 0.2049, 0.2342, 0.1998]) -Greedy action tensor([ 0.4867, -0.2076, -0.0932, -0.0938]) tensor([0.3818, 0.1907, 0.2138, 0.2137]) -Greedy action tensor([ 0.8526, -0.8578, 0.0400, -0.5148]) tensor([0.5321, 0.0962, 0.2361, 0.1356]) -Greedy action tensor([ 0.9311, -0.5210, -0.0839, -0.3842]) tensor([0.5362, 0.1255, 0.1943, 0.1439]) -Greedy action tensor([ 0.7515, -0.4944, -0.1331, -0.1675]) tensor([0.4763, 0.1370, 0.1967, 0.1900]) -Greedy action tensor([ 1.0133, -0.6545, -0.1337, -0.3620]) tensor([0.5685, 0.1073, 0.1805, 0.1437]) -Greedy action tensor([-0.8687, -1.0553, 0.5692, -0.7530]) tensor([0.1396, 0.1158, 0.5879, 0.1567]) -Greedy action tensor([ 1.0940, -0.9710, 1.3344, 0.8887]) tensor([0.3112, 0.0395, 0.3958, 0.2535]) -Greedy action tensor([-0.4276, -1.3781, -0.8847, -0.4468]) tensor([0.3333, 0.1288, 0.2110, 0.3269]) -Greedy action tensor([-0.1460, 0.6027, -0.5254, 0.3416]) tensor([0.1843, 0.3896, 0.1261, 0.3001]) -Greedy action tensor([-0.4620, 0.6918, -0.6850, -0.6575]) tensor([0.1726, 0.5473, 0.1381, 0.1420]) -Greedy action tensor([-0.0417, -0.8522, -0.1576, -0.3400]) tensor([0.3250, 0.1445, 0.2894, 0.2411]) -Greedy action tensor([ 0.0578, -1.1454, -0.0872, 0.1949]) tensor([0.3019, 0.0906, 0.2612, 0.3463]) -Greedy action tensor([ 0.4315, -0.1485, -0.3452, -0.5261]) tensor([0.4160, 0.2329, 0.1913, 0.1597]) -Greedy action tensor([-0.4515, 0.1465, 0.6764, -0.6207]) tensor([0.1481, 0.2693, 0.4575, 0.1251]) -Greedy action tensor([ 0.4625, -0.7203, 1.2078, -0.4161]) tensor([0.2612, 0.0800, 0.5503, 0.1085]) -Greedy action tensor([-0.7724, 0.1390, 0.3910, -0.6447]) tensor([0.1278, 0.3179, 0.4091, 0.1452]) -Greedy action tensor([ 0.9508, -0.8705, 0.6930, 0.4582]) tensor([0.3928, 0.0636, 0.3036, 0.2400]) -Greedy action tensor([-0.1017, -1.7931, -0.5808, -0.2273]) tensor([0.3724, 0.0686, 0.2306, 0.3284]) -Greedy action tensor([ 0.1100, 0.3066, 0.0575, -0.3408]) tensor([0.2629, 0.3201, 0.2495, 0.1675]) -Greedy action tensor([ 0.6971, -0.4870, 0.0119, -0.2871]) tensor([0.4579, 0.1401, 0.2308, 0.1711]) -Greedy action tensor([-0.6360, -1.5273, 0.6229, -0.1196]) tensor([0.1513, 0.0621, 0.5329, 0.2536]) -Greedy action tensor([-1.3942, 0.0090, -0.2843, -0.5361]) tensor([0.0956, 0.3889, 0.2900, 0.2255]) -Greedy action tensor([-0.2153, 0.1497, 0.3450, -0.7072]) tensor([0.2082, 0.2999, 0.3646, 0.1273]) -Greedy action tensor([0.6103, 0.3039, 0.2039, 0.2910]) tensor([0.3196, 0.2353, 0.2129, 0.2322]) -Greedy action tensor([-0.3259, -0.6088, 0.7199, -0.1494]) tensor([0.1726, 0.1301, 0.4913, 0.2060]) -Greedy action tensor([-0.7586, -0.5521, 0.3844, -0.3381]) tensor([0.1452, 0.1785, 0.4553, 0.2211]) -Greedy action tensor([-0.7393, -0.8959, 0.5565, -0.8722]) tensor([0.1566, 0.1339, 0.5723, 0.1371]) -Greedy action tensor([ 0.2492, 0.2435, -0.1965, 0.1166]) tensor([0.2849, 0.2832, 0.1824, 0.2495]) -Greedy action tensor([ 1.4034, -1.0587, 0.1517, 0.5550]) tensor([0.5557, 0.0474, 0.1590, 0.2379]) -Greedy action tensor([ 0.3936, -0.6627, 0.6237, 0.1259]) tensor([0.2966, 0.1031, 0.3733, 0.2269]) -Greedy action tensor([-0.4503, -1.8915, 0.6388, -0.6631]) tensor([0.1993, 0.0472, 0.5924, 0.1611]) -Greedy action tensor([ 0.8559, -0.7469, -0.3947, -0.3858]) tensor([0.5629, 0.1133, 0.1612, 0.1626]) -Greedy action tensor([-0.4337, -0.7200, -0.4000, -0.8011]) tensor([0.2875, 0.2159, 0.2974, 0.1991]) -Greedy action tensor([ 0.0211, -0.4379, 0.0539, 0.3118]) tensor([0.2498, 0.1579, 0.2582, 0.3341]) -Greedy action tensor([-0.3338, -2.0668, -0.1778, 0.2311]) tensor([0.2436, 0.0431, 0.2847, 0.4286]) -Greedy action tensor([-0.8440, -0.9521, -1.4018, -0.2259]) tensor([0.2312, 0.2075, 0.1323, 0.4290]) -Greedy action tensor([ 1.3174, -1.6242, -0.2473, 1.1011]) tensor([0.4837, 0.0255, 0.1012, 0.3896]) -Greedy action tensor([-0.1634, 0.2356, -0.4908, 0.0945]) tensor([0.2220, 0.3308, 0.1600, 0.2873]) -Greedy action tensor([ 0.4064, 0.2318, -0.6095, 0.7091]) tensor([0.2813, 0.2362, 0.1018, 0.3807]) -Greedy action tensor([ 0.2795, -0.3533, -0.2925, -0.2114]) tensor([0.3693, 0.1962, 0.2085, 0.2261]) -Greedy action tensor([ 1.4424, -0.8060, -0.9477, 0.7319]) tensor([0.5922, 0.0625, 0.0543, 0.2910]) -Greedy action tensor([ 0.4635, -0.9416, 0.0559, 0.1953]) tensor([0.3738, 0.0917, 0.2487, 0.2859]) -Greedy action tensor([ 0.0581, -0.6114, 1.1052, -0.4496]) tensor([0.2015, 0.1031, 0.5741, 0.1213]) -Greedy action tensor([ 0.6329, -0.6630, 0.4833, 0.9352]) tensor([0.2867, 0.0785, 0.2469, 0.3879]) -Greedy action tensor([ 0.1640, 0.5831, -0.4626, -0.6869]) tensor([0.2872, 0.4367, 0.1535, 0.1226]) -Greedy action tensor([-0.5064, -0.1816, -0.1778, -0.2430]) tensor([0.1971, 0.2727, 0.2737, 0.2565]) -Greedy action tensor([-1.2329, -1.0134, -0.3739, -0.1016]) tensor([0.1298, 0.1616, 0.3063, 0.4023]) -Greedy action tensor([-0.5118, -0.6553, -0.7303, -0.9196]) tensor([0.2998, 0.2598, 0.2410, 0.1994]) -Greedy action tensor([0.6509, 0.9172, 0.0497, 0.4181]) tensor([0.2743, 0.3580, 0.1504, 0.2173]) -Greedy action tensor([ 1.9081, -1.1312, 0.6619, -0.0739]) tensor([0.6788, 0.0325, 0.1952, 0.0935]) -Greedy action tensor([ 0.5769, -0.0461, 0.4588, -0.1910]) tensor([0.3461, 0.1857, 0.3076, 0.1606]) -Greedy action tensor([ 0.2809, -0.7428, 0.9322, -0.5916]) tensor([0.2706, 0.0972, 0.5191, 0.1131]) -Greedy action tensor([ 0.9775, -1.6688, -0.8339, -0.0138]) tensor([0.6229, 0.0442, 0.1018, 0.2312]) -Greedy action tensor([ 0.6423, -0.6708, -0.2123, 0.5899]) tensor([0.3783, 0.1018, 0.1609, 0.3590]) -Greedy action tensor([1.0472, 0.3220, 0.0495, 0.1882]) tensor([0.4393, 0.2127, 0.1620, 0.1861]) -Greedy action tensor([-0.1272, -0.7985, 1.4067, -0.1954]) tensor([0.1412, 0.0722, 0.6547, 0.1319]) -Greedy action tensor([ 0.3947, -0.4962, 0.9327, 0.2083]) tensor([0.2530, 0.1038, 0.4333, 0.2100]) -Greedy action tensor([ 0.6170, -1.5045, -0.5709, -0.5753]) tensor([0.5786, 0.0694, 0.1764, 0.1756]) -Greedy action tensor([ 0.2244, -1.0578, -0.3193, 0.1878]) tensor([0.3543, 0.0983, 0.2057, 0.3416]) -Greedy action tensor([-0.1929, -0.3173, 0.5642, -0.7796]) tensor([0.2188, 0.1932, 0.4664, 0.1217]) -Greedy action tensor([ 0.2611, -0.9264, -1.1989, 0.3908]) tensor([0.3737, 0.1140, 0.0868, 0.4255]) -Greedy action tensor([-0.6213, -0.3683, 0.1506, -0.1884]) tensor([0.1668, 0.2149, 0.3610, 0.2572]) -Greedy action tensor([ 0.0448, 0.3562, -0.0731, 0.5212]) tensor([0.2056, 0.2807, 0.1827, 0.3310]) -Greedy action tensor([0.0592, 0.3832, 0.6675, 0.4684]) tensor([0.1747, 0.2415, 0.3209, 0.2630]) -Greedy action tensor([-0.9781, -0.7855, -1.0778, -0.6163]) tensor([0.2196, 0.2663, 0.1988, 0.3153]) -Greedy action tensor([ 0.3151, -0.6120, -0.2924, 0.1866]) tensor([0.3546, 0.1403, 0.1932, 0.3119]) -Greedy action tensor([-0.1315, -1.0734, -0.4287, -0.7083]) tensor([0.3711, 0.1447, 0.2757, 0.2085]) -Greedy action tensor([ 0.2837, -1.2255, 0.1993, -0.3839]) tensor([0.3769, 0.0833, 0.3464, 0.1933]) -Greedy action tensor([-0.3351, -0.7835, -0.5177, -0.4133]) tensor([0.2944, 0.1880, 0.2453, 0.2723]) -Greedy action tensor([-0.0125, -1.1600, -0.6675, 0.2430]) tensor([0.3197, 0.1015, 0.1661, 0.4128]) -Greedy action tensor([-0.3406, -0.6460, 0.6449, -0.5235]) tensor([0.1905, 0.1404, 0.5104, 0.1587]) -Greedy action tensor([-0.6672, -0.3035, -0.9847, -0.0507]) tensor([0.1992, 0.2866, 0.1450, 0.3691]) -Greedy action tensor([ 0.1953, -1.5812, -0.3808, -0.7606]) tensor([0.4726, 0.0800, 0.2657, 0.1817]) -Greedy action tensor([ 0.4321, -0.9074, 0.4690, -0.3892]) tensor([0.3651, 0.0956, 0.3788, 0.1606]) -Greedy action tensor([-0.3509, -0.5222, 0.4002, -0.6490]) tensor([0.2126, 0.1791, 0.4505, 0.1578]) -Greedy action tensor([-1.2725, -0.4307, 0.0676, -1.2160]) tensor([0.1220, 0.2831, 0.4659, 0.1291]) -Greedy action tensor([-1.5038, -0.3503, 0.0410, 0.4728]) tensor([0.0622, 0.1972, 0.2916, 0.4490]) -Greedy action tensor([ 0.2436, -1.2339, -0.6110, 0.2361]) tensor([0.3779, 0.0862, 0.1608, 0.3751]) -Greedy action tensor([ 0.2899, -0.6866, 0.6313, -0.0241]) tensor([0.2846, 0.1072, 0.4004, 0.2079]) -Greedy action tensor([-0.5636, 0.4403, 0.9864, -1.3127]) tensor([0.1122, 0.3062, 0.5286, 0.0530]) -Greedy action tensor([ 0.1680, -0.8691, -0.3110, -0.6467]) tensor([0.4138, 0.1467, 0.2563, 0.1832]) -Greedy action tensor([ 0.5524, -1.2217, -0.5347, -0.2821]) tensor([0.5152, 0.0874, 0.1737, 0.2237]) -Greedy action tensor([-0.1670, -1.1151, 0.1561, -0.4864]) tensor([0.2861, 0.1108, 0.3952, 0.2079]) -Greedy action tensor([-0.8494, -0.3736, -1.2210, -0.0821]) tensor([0.1834, 0.2951, 0.1265, 0.3950]) -Greedy action tensor([ 0.1053, 0.5873, 0.1328, -0.5558]) tensor([0.2402, 0.3889, 0.2469, 0.1240]) -Greedy action tensor([-1.8389, -0.7444, 0.2688, -0.7842]) tensor([0.0663, 0.1980, 0.5454, 0.1903]) -Greedy action tensor([-0.4649, 0.0701, -0.1076, -0.1027]) tensor([0.1794, 0.3064, 0.2565, 0.2577]) -Greedy action tensor([-1.9347, -0.3948, 0.6527, -0.1727]) tensor([0.0404, 0.1882, 0.5365, 0.2350]) -Greedy action tensor([-0.9091, 0.9493, 0.0843, 0.4383]) tensor([0.0716, 0.4594, 0.1934, 0.2756]) -Greedy action tensor([-1.7162, -0.5187, 0.5415, -0.0568]) tensor([0.0523, 0.1731, 0.4998, 0.2748]) -Greedy action tensor([-1.8338, -0.4368, 0.6107, -0.1189]) tensor([0.0452, 0.1827, 0.5209, 0.2511]) -Greedy action tensor([-1.7854, -0.4481, 0.5878, -0.0929]) tensor([0.0477, 0.1816, 0.5117, 0.2590]) -Greedy action tensor([-1.9053, -0.3932, 0.6425, -0.1564]) tensor([0.0416, 0.1885, 0.5311, 0.2389]) -Greedy action tensor([-1.8996, -0.2483, 0.6355, -0.2061]) tensor([0.0412, 0.2148, 0.5199, 0.2241]) -Greedy action tensor([-1.9046, -0.3734, 0.6300, -0.1614]) tensor([0.0418, 0.1930, 0.5266, 0.2386]) -Greedy action tensor([-1.9046, -0.4525, 0.6510, -0.1621]) tensor([0.0419, 0.1790, 0.5397, 0.2393]) -Greedy action tensor([-1.9307, -0.4377, 0.6618, -0.1733]) tensor([0.0406, 0.1808, 0.5430, 0.2356]) -Greedy action tensor([-1.7074, -0.3372, 0.5431, -0.0136]) tensor([0.0503, 0.1981, 0.4777, 0.2738]) -Greedy action tensor([-1.7893, -0.3081, 0.5702, -0.1208]) tensor([0.0470, 0.2066, 0.4973, 0.2492]) -Greedy action tensor([-1.8430, -0.3503, 0.6169, -0.1076]) tensor([0.0438, 0.1949, 0.5128, 0.2485]) -Greedy action tensor([-1.6047, -0.1313, 0.6090, 0.1362]) tensor([0.0495, 0.2159, 0.4526, 0.2821]) -Greedy action tensor([-1.9378, -0.4449, 0.6650, -0.1766]) tensor([0.0404, 0.1797, 0.5450, 0.2349]) -Greedy action tensor([-1.9342, -0.4671, 0.7025, -0.1645]) tensor([0.0397, 0.1723, 0.5548, 0.2332]) -Greedy action tensor([-1.9140, -0.4705, 0.6693, -0.1570]) tensor([0.0412, 0.1745, 0.5455, 0.2388]) -Greedy action tensor([-1.8363, -0.4162, 0.6045, -0.1256]) tensor([0.0451, 0.1868, 0.5183, 0.2498]) -Greedy action tensor([-0.7540, 0.9583, 0.0887, 0.2133]) tensor([0.0870, 0.4821, 0.2021, 0.2289]) -Greedy action tensor([-1.8748, -0.3855, 0.6322, -0.1493]) tensor([0.0429, 0.1902, 0.5261, 0.2408]) -Greedy action tensor([-1.8094, -0.4999, 0.6292, -0.0942]) tensor([0.0460, 0.1706, 0.5275, 0.2559]) -Greedy action tensor([-1.9279, -0.4401, 0.6590, -0.1701]) tensor([0.0408, 0.1806, 0.5420, 0.2366]) -Greedy action tensor([-1.8163, -0.4289, 0.5993, -0.1122]) tensor([0.0461, 0.1846, 0.5160, 0.2533]) -Greedy action tensor([-1.3516, -0.1738, 0.5691, 0.1483]) tensor([0.0643, 0.2088, 0.4388, 0.2881]) -Greedy action tensor([-1.5853, 0.0621, 0.4289, -0.1055]) tensor([0.0553, 0.2872, 0.4145, 0.2429]) -Greedy action tensor([-1.3645, -0.6090, 0.3521, 0.1949]) tensor([0.0744, 0.1583, 0.4138, 0.3536]) -Greedy action tensor([-1.8779, -0.4451, 0.6382, -0.1467]) tensor([0.0431, 0.1805, 0.5332, 0.2432]) -Greedy action tensor([-1.8216, -0.3504, 0.6151, -0.1047]) tensor([0.0447, 0.1948, 0.5115, 0.2490]) -Greedy action tensor([-1.9130, -0.4319, 0.6523, -0.1629]) tensor([0.0414, 0.1820, 0.5383, 0.2382]) -Greedy action tensor([-1.8532, -0.4468, 0.6174, -0.1394]) tensor([0.0445, 0.1817, 0.5267, 0.2471]) -Greedy action tensor([-1.4526, -0.2599, 0.3979, -0.0118]) tensor([0.0672, 0.2215, 0.4275, 0.2838]) -Greedy action tensor([-1.8199, -0.7798, 0.2490, -0.4218]) tensor([0.0633, 0.1792, 0.5012, 0.2563]) -Greedy action tensor([-1.8973, -0.4475, 0.6779, -0.1473]) tensor([0.0414, 0.1765, 0.5438, 0.2383]) -Greedy action tensor([-1.3333, 0.5296, 0.2333, 0.0313]) tensor([0.0619, 0.3990, 0.2967, 0.2424]) -Greedy action tensor([-1.9203, -0.4235, 0.6609, -0.1586]) tensor([0.0408, 0.1823, 0.5392, 0.2376]) -Greedy action tensor([-1.5608, 0.5606, 0.3409, -0.0192]) tensor([0.0483, 0.4028, 0.3233, 0.2256]) -Greedy action tensor([-1.5881, 0.4669, 0.3653, -0.0292]) tensor([0.0485, 0.3787, 0.3421, 0.2306]) -Greedy action tensor([-1.7003, -0.3716, 0.5342, -0.0918]) tensor([0.0523, 0.1976, 0.4888, 0.2614]) -Greedy action tensor([ 0.0990, -0.5687, 0.1804, 0.9666]) tensor([0.2009, 0.1030, 0.2179, 0.4783]) -Greedy action tensor([-1.3053, -0.4828, 0.3662, 0.1727]) tensor([0.0770, 0.1754, 0.4099, 0.3377]) -Greedy action tensor([-1.6593, -0.3084, 0.4663, -0.0047]) tensor([0.0541, 0.2090, 0.4536, 0.2832]) -Greedy action tensor([-1.4858, -0.6330, 0.5540, 0.1193]) tensor([0.0624, 0.1465, 0.4802, 0.3109]) -Greedy action tensor([-1.6436, -0.5757, 0.5292, -0.0031]) tensor([0.0560, 0.1630, 0.4920, 0.2889]) -Greedy action tensor([-1.6023, -0.2787, 0.6254, -0.0168]) tensor([0.0529, 0.1986, 0.4905, 0.2581]) -Greedy action tensor([-1.4079, -0.0150, 0.3567, -0.0160]) tensor([0.0672, 0.2704, 0.3922, 0.2702]) -Greedy action tensor([-1.6461e+00, -4.1256e-01, 5.3168e-01, 7.8851e-04]) tensor([0.0542, 0.1861, 0.4784, 0.2813]) -Greedy action tensor([-1.2241, -0.5578, 0.3255, 0.1611]) tensor([0.0858, 0.1671, 0.4042, 0.3429]) -Greedy action tensor([-1.9280, -0.4292, 0.6596, -0.1714]) tensor([0.0407, 0.1822, 0.5413, 0.2358]) -Greedy action tensor([-1.9380, -0.4405, 0.6649, -0.1768]) tensor([0.0403, 0.1803, 0.5446, 0.2347]) -Greedy action tensor([-0.9958, 0.4289, 0.2410, -0.1381]) tensor([0.0913, 0.3793, 0.3143, 0.2151]) -Greedy action tensor([-1.8444, -0.3302, 0.6013, -0.1655]) tensor([0.0446, 0.2025, 0.5141, 0.2388]) -Greedy action tensor([-1.9282, -0.4221, 0.6575, -0.1689]) tensor([0.0407, 0.1834, 0.5398, 0.2362]) -Greedy action tensor([-1.9317, -0.4506, 0.6594, -0.1744]) tensor([0.0408, 0.1792, 0.5438, 0.2362]) -Greedy action tensor([-1.9226, -0.4411, 0.6581, -0.1680]) tensor([0.0410, 0.1804, 0.5415, 0.2371]) -Greedy action tensor([-1.5732, -0.4360, 0.4749, -0.0257]) tensor([0.0603, 0.1882, 0.4679, 0.2836]) -Greedy action tensor([-1.9289, -0.4426, 0.6569, -0.1737]) tensor([0.0408, 0.1806, 0.5423, 0.2363]) -Greedy action tensor([-1.9289, -0.4177, 0.6563, -0.1681]) tensor([0.0406, 0.1841, 0.5389, 0.2363]) -Greedy action tensor([-1.7448, -0.5048, 0.7485, 0.1659]) tensor([0.0429, 0.1482, 0.5190, 0.2899]) -Greedy action tensor([-1.7072, -0.3198, 0.5568, -0.0187]) tensor([0.0499, 0.1998, 0.4802, 0.2701]) -Greedy action tensor([-0.7271, -0.4669, 0.2700, -0.1320]) tensor([0.1466, 0.1902, 0.3974, 0.2658]) -Greedy action tensor([-1.7327, -0.3641, 0.5499, -0.0832]) tensor([0.0502, 0.1971, 0.4917, 0.2610]) -Greedy action tensor([-1.7239, -0.2978, 0.6321, 0.0118]) tensor([0.0468, 0.1946, 0.4933, 0.2653]) -Greedy action tensor([-1.9161, -0.4342, 0.6538, -0.1638]) tensor([0.0413, 0.1816, 0.5391, 0.2380]) -Greedy action tensor([-0.4128, -0.1600, 0.1454, -0.0770]) tensor([0.1840, 0.2369, 0.3216, 0.2575]) -Greedy action tensor([-1.8942, -0.3125, 0.6265, -0.1529]) tensor([0.0417, 0.2026, 0.5181, 0.2377]) -Greedy action tensor([-1.2214, 0.2157, 0.4011, -0.2209]) tensor([0.0770, 0.3239, 0.3899, 0.2093]) -Greedy action tensor([-1.8585, -0.4208, 0.6092, -0.1250]) tensor([0.0441, 0.1858, 0.5204, 0.2497]) -Greedy action tensor([-1.6924, -0.4565, 0.5347, -0.0623]) tensor([0.0531, 0.1829, 0.4928, 0.2712]) -Greedy action tensor([-1.8989, -0.4546, 0.6524, -0.1469]) tensor([0.0420, 0.1779, 0.5382, 0.2420]) -Greedy action tensor([-1.7904, -0.2718, 0.6251, -0.0774]) tensor([0.0448, 0.2047, 0.5019, 0.2486]) -Greedy action tensor([-1.7905, -0.4183, 0.5847, -0.1275]) tensor([0.0477, 0.1881, 0.5127, 0.2515]) -Greedy action tensor([-0.3447, 0.7726, -1.0063, -0.1250]) tensor([0.1719, 0.5253, 0.0887, 0.2141]) -Greedy action tensor([-1.9345, -0.4014, 0.6547, -0.1721]) tensor([0.0404, 0.1870, 0.5375, 0.2352]) -Greedy action tensor([-1.0508, -0.2867, 0.3157, -0.0688]) tensor([0.1027, 0.2205, 0.4027, 0.2742]) -Greedy action tensor([-1.7506, -0.2433, 0.5832, -0.0460]) tensor([0.0469, 0.2116, 0.4837, 0.2578]) -Greedy action tensor([-1.8216, -0.4214, 0.6112, -0.1101]) tensor([0.0455, 0.1845, 0.5181, 0.2519]) -Greedy action tensor([-1.9120, -0.4281, 0.6522, -0.1608]) tensor([0.0414, 0.1825, 0.5376, 0.2384]) -Greedy action tensor([-1.8476, -0.4087, 0.6077, -0.1291]) tensor([0.0446, 0.1879, 0.5191, 0.2485]) -Greedy action tensor([-1.9212, -0.2723, 0.6343, -0.1863]) tensor([0.0404, 0.2102, 0.5204, 0.2290]) -Greedy action tensor([-1.4917, -0.5462, 0.5543, 0.1615]) tensor([0.0605, 0.1557, 0.4679, 0.3159]) -Greedy action tensor([-1.6537, -0.3818, 0.6308, -0.0508]) tensor([0.0517, 0.1843, 0.5074, 0.2566]) -Greedy action tensor([ 1.5087, 0.2434, 0.0200, -0.1641]) tensor([0.5898, 0.1664, 0.1331, 0.1107]) -Greedy action tensor([ 2.0534, -1.0055, -0.5315, 0.2766]) tensor([0.7743, 0.0363, 0.0584, 0.1310]) -Greedy action tensor([ 1.5098, -0.6013, -0.2866, 0.2893]) tensor([0.6321, 0.0765, 0.1049, 0.1865]) -Greedy action tensor([ 1.5606, -0.7320, 0.0863, 0.2356]) tensor([0.6267, 0.0633, 0.1435, 0.1666]) -Greedy action tensor([ 1.3892, -0.2746, -0.3376, 0.2558]) tensor([0.5920, 0.1121, 0.1053, 0.1906]) -Greedy action tensor([ 1.6668, -0.5740, -0.5229, 0.4975]) tensor([0.6540, 0.0696, 0.0732, 0.2032]) -Greedy action tensor([ 1.3265, -0.2063, -0.8699, 0.4065]) tensor([0.5795, 0.1251, 0.0644, 0.2309]) -Greedy action tensor([ 2.2785, 0.1606, -0.2668, 0.7460]) tensor([0.7069, 0.0850, 0.0555, 0.1527]) -Greedy action tensor([ 1.6903, -0.7670, -0.4676, 0.4181]) tensor([0.6750, 0.0578, 0.0780, 0.1891]) -Greedy action tensor([ 1.1574, -1.3110, 0.1899, -0.4518]) tensor([0.6007, 0.0509, 0.2283, 0.1202]) -Greedy action tensor([ 1.2130, 0.1625, -0.4541, 0.4694]) tensor([0.4965, 0.1737, 0.0937, 0.2361]) -Greedy action tensor([ 1.5009, -0.6068, -0.1628, 0.4123]) tensor([0.6069, 0.0738, 0.1150, 0.2043]) -Greedy action tensor([ 1.7732, -0.8402, 0.0390, 0.1618]) tensor([0.6899, 0.0506, 0.1218, 0.1377]) -Greedy action tensor([ 1.0992, -0.1410, -0.4062, -0.1383]) tensor([0.5551, 0.1606, 0.1232, 0.1611]) -Greedy action tensor([ 1.5916, -0.7133, -0.6772, 0.3925]) tensor([0.6646, 0.0663, 0.0687, 0.2004]) -Greedy action tensor([ 1.4871, -0.5744, -0.7289, 0.1579]) tensor([0.6662, 0.0848, 0.0726, 0.1763]) -Greedy action tensor([ 1.2873, 0.1294, -0.5184, 0.0250]) tensor([0.5677, 0.1783, 0.0933, 0.1607]) -Greedy action tensor([ 1.5307, -0.5793, -0.4073, 0.3510]) tensor([0.6359, 0.0771, 0.0916, 0.1954]) -Greedy action tensor([ 1.4567, -0.7147, -0.5793, 0.1312]) tensor([0.6622, 0.0755, 0.0864, 0.1759]) -Greedy action tensor([ 1.9808, -1.0888, -0.2475, 0.5992]) tensor([0.7116, 0.0330, 0.0766, 0.1787]) -Greedy action tensor([ 1.2125, -0.2022, 0.0595, 0.1311]) tensor([0.5269, 0.1280, 0.1663, 0.1787]) -Greedy action tensor([ 1.4539, -0.4342, -0.9350, 0.8696]) tensor([0.5554, 0.0841, 0.0509, 0.3096]) -Greedy action tensor([ 2.3643, -1.0647, -0.2159, 0.5434]) tensor([0.7874, 0.0255, 0.0596, 0.1275]) -Greedy action tensor([ 1.5863, -0.3933, -0.2890, 0.0480]) tensor([0.6639, 0.0917, 0.1018, 0.1426]) -Greedy action tensor([ 1.5389, -0.5093, -0.7525, 0.7787]) tensor([0.5891, 0.0760, 0.0596, 0.2754]) -Greedy action tensor([ 1.3544, 0.1241, -0.4691, 0.5870]) tensor([0.5214, 0.1524, 0.0842, 0.2420]) -Greedy action tensor([ 0.9859, -0.1879, 0.3690, 0.0850]) tensor([0.4434, 0.1371, 0.2393, 0.1801]) -Greedy action tensor([ 1.3934, -0.5637, -0.7713, 0.2646]) tensor([0.6331, 0.0894, 0.0727, 0.2048]) -Greedy action tensor([ 1.7852, -1.1065, -0.2566, 0.8597]) tensor([0.6323, 0.0351, 0.0821, 0.2506]) -Greedy action tensor([ 1.2483, -0.6757, -0.3659, 0.2626]) tensor([0.5820, 0.0850, 0.1158, 0.2172]) -Greedy action tensor([ 1.2346, -0.0501, -0.9409, 0.0533]) tensor([0.5892, 0.1631, 0.0669, 0.1808]) -Greedy action tensor([ 1.3293, -0.0723, -0.7363, -0.1527]) tensor([0.6250, 0.1539, 0.0792, 0.1420]) -Greedy action tensor([ 1.4190, 0.1152, -0.8385, 0.2265]) tensor([0.5954, 0.1616, 0.0623, 0.1807]) -Greedy action tensor([ 1.7722, -1.0496, -0.1641, 0.0069]) tensor([0.7273, 0.0433, 0.1049, 0.1245]) -Greedy action tensor([ 1.6581, -0.7673, -0.7782, 0.2669]) tensor([0.7019, 0.0621, 0.0614, 0.1746]) -Greedy action tensor([ 1.4653, -0.3194, -0.6706, 0.2019]) tensor([0.6375, 0.1070, 0.0753, 0.1802]) -Greedy action tensor([ 1.0477, -0.4777, -0.0286, 0.4317]) tensor([0.4765, 0.1037, 0.1624, 0.2574]) -Greedy action tensor([ 1.1482, -0.5352, -0.1517, 0.3989]) tensor([0.5179, 0.0962, 0.1412, 0.2448]) -Greedy action tensor([ 1.3910, -0.5955, -0.3417, 0.4677]) tensor([0.5844, 0.0802, 0.1033, 0.2321]) -Greedy action tensor([ 2.0447, -1.0878, -0.0300, 0.0898]) tensor([0.7629, 0.0333, 0.0958, 0.1080]) -Greedy action tensor([ 2.0846, -0.9782, -0.7202, 0.4523]) tensor([0.7676, 0.0359, 0.0465, 0.1500]) -Greedy action tensor([ 1.9689, -0.4792, -0.6828, 0.6611]) tensor([0.7006, 0.0606, 0.0494, 0.1894]) -Greedy action tensor([ 1.7546, -0.7680, -0.2216, 0.3156]) tensor([0.6868, 0.0551, 0.0952, 0.1629]) -Greedy action tensor([ 1.0789, -0.4859, -0.4220, 0.2168]) tensor([0.5393, 0.1128, 0.1202, 0.2277]) -Greedy action tensor([ 1.4833, -0.6307, 0.0126, 0.3515]) tensor([0.5977, 0.0722, 0.1373, 0.1927]) -Greedy action tensor([ 1.3277, -0.6837, 0.0545, 0.0702]) tensor([0.5889, 0.0788, 0.1649, 0.1675]) -Greedy action tensor([ 1.0937, -0.0819, -0.6778, 0.1512]) tensor([0.5352, 0.1652, 0.0910, 0.2086]) -Greedy action tensor([ 1.3261, -0.7995, -0.0561, 0.0213]) tensor([0.6092, 0.0727, 0.1529, 0.1652]) -Greedy action tensor([ 1.8154, -0.9173, -0.2244, 0.6523]) tensor([0.6633, 0.0431, 0.0863, 0.2073]) -Greedy action tensor([ 1.3063, -0.7214, 0.0333, 0.1174]) tensor([0.5827, 0.0767, 0.1632, 0.1775]) -Greedy action tensor([ 1.5785, -0.8376, -0.2116, 0.1839]) tensor([0.6648, 0.0593, 0.1110, 0.1648]) -Greedy action tensor([ 1.5926, -0.1064, -0.9141, 0.7969]) tensor([0.5829, 0.1066, 0.0475, 0.2630]) -Greedy action tensor([ 1.1118, -0.3622, -0.3674, 0.1738]) tensor([0.5411, 0.1239, 0.1233, 0.2118]) -Greedy action tensor([ 0.7663, 0.1269, -1.1166, 0.3418]) tensor([0.4285, 0.2261, 0.0652, 0.2803]) -Greedy action tensor([ 1.2100, -0.5925, -0.5240, 0.7286]) tensor([0.5104, 0.0842, 0.0901, 0.3154]) -Greedy action tensor([ 1.2079, -0.2197, -0.1021, -0.1737]) tensor([0.5679, 0.1362, 0.1532, 0.1426]) -Greedy action tensor([ 1.2196, -0.2049, -0.6236, 0.1876]) tensor([0.5697, 0.1371, 0.0902, 0.2030]) -Greedy action tensor([ 1.2478, -0.6697, -0.2464, 0.2611]) tensor([0.5733, 0.0843, 0.1287, 0.2137]) -Greedy action tensor([ 2.3160, -0.6472, -0.4810, -0.0434]) tensor([0.8284, 0.0428, 0.0505, 0.0783]) -Greedy action tensor([ 2.0725, -0.9458, -0.1464, 0.3824]) tensor([0.7451, 0.0364, 0.0810, 0.1375]) -Greedy action tensor([ 1.4958, -0.6332, -0.3240, 0.8341]) tensor([0.5565, 0.0662, 0.0902, 0.2871]) -Greedy action tensor([ 1.6104, -0.4330, -0.2166, 0.2308]) tensor([0.6484, 0.0840, 0.1043, 0.1632]) -Greedy action tensor([ 1.3056, -0.1014, -0.7690, 0.5322]) tensor([0.5459, 0.1337, 0.0686, 0.2519]) -Greedy action tensor([ 2.4257, -0.7872, 0.0428, 0.7933]) tensor([0.7530, 0.0303, 0.0695, 0.1472]) -Greedy action tensor([ 0.9376, -0.1851, -0.0175, 0.2948]) tensor([0.4472, 0.1455, 0.1721, 0.2352]) -Greedy action tensor([ 1.7901, -0.3213, -0.4791, 0.0789]) tensor([0.7117, 0.0862, 0.0736, 0.1286]) -Greedy action tensor([ 1.3659, 0.0735, -0.2983, 0.2389]) tensor([0.5593, 0.1536, 0.1059, 0.1812]) -Greedy action tensor([ 1.5204, 0.2657, -0.2346, 0.1884]) tensor([0.5807, 0.1656, 0.1004, 0.1533]) -Greedy action tensor([ 1.3441, -0.0292, -0.8890, 0.0680]) tensor([0.6099, 0.1545, 0.0654, 0.1702]) -Greedy action tensor([ 1.1053, -0.1107, -0.6560, -0.0370]) tensor([0.5595, 0.1658, 0.0961, 0.1785]) -Greedy action tensor([ 1.2084, -0.1025, -1.1121, 0.3734]) tensor([0.5551, 0.1496, 0.0545, 0.2408]) -Greedy action tensor([ 1.8192, -0.1644, -0.5429, 0.6326]) tensor([0.6506, 0.0895, 0.0613, 0.1986]) -Greedy action tensor([ 1.2373, -0.4069, -0.5519, 0.3909]) tensor([0.5589, 0.1080, 0.0934, 0.2398]) -Greedy action tensor([ 1.3652, -0.3527, -0.4529, 0.3101]) tensor([0.5917, 0.1062, 0.0961, 0.2060]) -Greedy action tensor([ 1.4070, -0.0702, -0.3846, -0.4578]) tensor([0.6452, 0.1473, 0.1076, 0.1000]) -Greedy action tensor([ 1.0783, -0.1946, 0.1099, 0.2207]) tensor([0.4799, 0.1344, 0.1822, 0.2035]) -Greedy action tensor([ 1.8731, -0.3786, -0.5495, 0.3930]) tensor([0.7035, 0.0740, 0.0624, 0.1601]) -Greedy action tensor([ 0.8652, -0.4218, -0.0117, 0.3377]) tensor([0.4382, 0.1210, 0.1823, 0.2586]) -Greedy action tensor([ 1.3942, -0.2452, -0.4662, 0.1153]) tensor([0.6142, 0.1192, 0.0956, 0.1710]) -Greedy action tensor([ 1.3882, -0.1752, -0.7692, 0.5919]) tensor([0.5630, 0.1179, 0.0651, 0.2539]) -Greedy action tensor([ 0.9583, -0.6332, -0.7327, 0.6747]) tensor([0.4671, 0.0951, 0.0861, 0.3517]) -Greedy action tensor([ 0.8906, -0.6042, 0.0179, -0.3679]) tensor([0.5192, 0.1164, 0.2169, 0.1475]) -Greedy action tensor([ 1.1790, -0.9900, -0.1242, -0.5559]) tensor([0.6400, 0.0732, 0.1739, 0.1129]) -Greedy action tensor([ 0.8462, -0.2790, -0.0159, -0.2516]) tensor([0.4807, 0.1560, 0.2030, 0.1604]) -Greedy action tensor([ 0.5429, -0.2229, -0.0155, -0.0648]) tensor([0.3873, 0.1801, 0.2216, 0.2109]) -Greedy action tensor([ 0.5635, -0.3346, -0.0369, -0.1861]) tensor([0.4118, 0.1677, 0.2259, 0.1946]) -Greedy action tensor([ 0.7550, -0.4160, 0.0483, -0.2701]) tensor([0.4625, 0.1434, 0.2281, 0.1659]) -Greedy action tensor([ 0.5955, -0.4806, -0.1876, -0.0950]) tensor([0.4349, 0.1483, 0.1988, 0.2180]) -Greedy action tensor([ 0.6538, -0.0739, 0.0316, -0.0183]) tensor([0.3952, 0.1909, 0.2121, 0.2018]) -Greedy action tensor([ 0.7237, -0.2583, 0.0516, -0.2463]) tensor([0.4417, 0.1654, 0.2255, 0.1674]) -Greedy action tensor([ 0.7083, -0.7038, -0.1180, -0.0853]) tensor([0.4687, 0.1142, 0.2051, 0.2120]) -Greedy action tensor([ 0.6744, -0.4020, -0.1579, -0.1851]) tensor([0.4547, 0.1550, 0.1978, 0.1925]) -Greedy action tensor([ 0.9256, -0.3618, 0.0358, -0.4186]) tensor([0.5135, 0.1417, 0.2109, 0.1339]) -Greedy action tensor([ 0.5549, 0.0070, -0.1587, 0.0015]) tensor([0.3784, 0.2187, 0.1853, 0.2175]) -Greedy action tensor([ 0.9944, -0.7020, 0.0133, -0.4562]) tensor([0.5578, 0.1023, 0.2091, 0.1308]) -Greedy action tensor([ 0.8667, -0.9164, -0.0789, -0.3589]) tensor([0.5405, 0.0909, 0.2100, 0.1587]) -Greedy action tensor([ 0.8399, -0.5774, -0.0690, -0.5166]) tensor([0.5255, 0.1274, 0.2118, 0.1354]) -Greedy action tensor([ 0.5569, 0.0993, -0.2677, -0.0737]) tensor([0.3841, 0.2431, 0.1684, 0.2044]) -Greedy action tensor([ 0.9784, -0.3308, -0.0224, -0.1538]) tensor([0.5102, 0.1378, 0.1875, 0.1644]) -Greedy action tensor([ 0.7612, -0.7204, 0.2452, -0.8116]) tensor([0.4922, 0.1119, 0.2938, 0.1021]) -Greedy action tensor([ 0.9229, -0.3644, -0.1698, -0.2800]) tensor([0.5231, 0.1444, 0.1754, 0.1571]) -Greedy action tensor([ 0.8183, -0.7677, -0.2105, -0.2991]) tensor([0.5293, 0.1084, 0.1892, 0.1731]) -Greedy action tensor([ 0.9672, -0.5712, -0.0184, -0.3877]) tensor([0.5417, 0.1163, 0.2022, 0.1398]) -Greedy action tensor([ 0.8328, -0.4931, 0.1420, -0.6417]) tensor([0.5011, 0.1331, 0.2511, 0.1147]) -Greedy action tensor([ 1.0849, -0.4863, -0.1528, -0.4248]) tensor([0.5818, 0.1209, 0.1687, 0.1286]) -Greedy action tensor([ 0.8093, -0.3675, -0.2478, -0.4162]) tensor([0.5130, 0.1581, 0.1783, 0.1506]) -Greedy action tensor([ 0.4164, -0.3168, -0.0490, -0.3162]) tensor([0.3863, 0.1856, 0.2425, 0.1857]) -Greedy action tensor([ 0.5165, -0.3482, -0.0779, -0.2349]) tensor([0.4090, 0.1723, 0.2257, 0.1930]) -Greedy action tensor([ 0.5140, -0.2574, 0.0917, -0.3339]) tensor([0.3927, 0.1816, 0.2575, 0.1682]) -Greedy action tensor([ 0.6736, -0.4299, -0.1809, -0.2237]) tensor([0.4619, 0.1532, 0.1965, 0.1883]) -Greedy action tensor([ 0.8034, -0.4642, -0.0037, -0.2291]) tensor([0.4799, 0.1351, 0.2141, 0.1709]) -Greedy action tensor([ 0.5287, -0.2447, -0.1168, -0.0099]) tensor([0.3892, 0.1796, 0.2041, 0.2271]) -Greedy action tensor([ 0.7630, -0.4953, -0.2122, -0.4712]) tensor([0.5122, 0.1455, 0.1932, 0.1491]) -Greedy action tensor([ 0.8091, -0.6258, 0.0244, -0.2752]) tensor([0.4920, 0.1172, 0.2245, 0.1664]) -Greedy action tensor([ 0.8413, -0.6785, 0.1076, -0.2160]) tensor([0.4887, 0.1069, 0.2346, 0.1698]) -Greedy action tensor([ 0.4276, -0.1297, -0.0994, -0.1958]) tensor([0.3705, 0.2122, 0.2187, 0.1986]) -Greedy action tensor([ 0.3931, -0.2903, 0.0614, -0.1512]) tensor([0.3568, 0.1801, 0.2561, 0.2070]) -Greedy action tensor([ 0.7130, -0.3865, -0.0301, -0.2591]) tensor([0.4573, 0.1523, 0.2175, 0.1730]) -Greedy action tensor([ 0.7504, -0.7417, -0.1084, -0.3300]) tensor([0.5030, 0.1131, 0.2131, 0.1708]) -Greedy action tensor([ 0.5143, -0.1784, -0.0236, -0.0956]) tensor([0.3806, 0.1904, 0.2222, 0.2068]) -Greedy action tensor([ 0.7505, -0.5125, -0.0687, -0.2502]) tensor([0.4782, 0.1352, 0.2108, 0.1758]) -Greedy action tensor([ 1.1226, -1.2427, 0.0690, -0.3993]) tensor([0.6021, 0.0565, 0.2099, 0.1314]) -Greedy action tensor([ 0.6210, -0.5273, -0.1073, -0.3123]) tensor([0.4560, 0.1446, 0.2201, 0.1793]) -Greedy action tensor([ 1.0854, -0.1763, 0.1032, -0.2412]) tensor([0.5200, 0.1473, 0.1947, 0.1380]) -Greedy action tensor([ 0.8355, -0.5861, -0.0522, -0.2334]) tensor([0.5009, 0.1209, 0.2062, 0.1720]) -Greedy action tensor([ 1.1245, -0.9860, 0.0880, -0.6143]) tensor([0.6055, 0.0734, 0.2148, 0.1064]) -Greedy action tensor([ 0.1525, 0.2939, -0.1319, -0.2524]) tensor([0.2800, 0.3225, 0.2107, 0.1868]) -Greedy action tensor([ 0.8251, -0.6458, 0.0499, -0.4416]) tensor([0.5071, 0.1165, 0.2336, 0.1429]) -Greedy action tensor([ 0.5826, -0.1744, 0.1305, -0.1039]) tensor([0.3833, 0.1798, 0.2439, 0.1929]) -Greedy action tensor([ 0.8116, -0.5481, -0.2267, -0.4869]) tensor([0.5308, 0.1363, 0.1880, 0.1449]) -Greedy action tensor([ 0.8645, -0.6792, 0.0791, -0.4771]) tensor([0.5179, 0.1106, 0.2361, 0.1354]) -Greedy action tensor([ 0.7824, -0.5065, -0.1608, -0.3986]) tensor([0.5071, 0.1397, 0.1975, 0.1557]) -Greedy action tensor([ 1.0263, -0.6313, 0.0685, -0.4245]) tensor([0.5529, 0.1054, 0.2122, 0.1296]) -Greedy action tensor([ 0.9768, -0.3307, -0.0198, -0.5729]) tensor([0.5400, 0.1461, 0.1993, 0.1146]) -Greedy action tensor([ 1.1297, -0.4939, -0.2430, -0.2990]) tensor([0.5917, 0.1167, 0.1499, 0.1418]) -Greedy action tensor([ 0.7205, -0.5528, -0.1400, -0.4857]) tensor([0.4995, 0.1398, 0.2112, 0.1495]) -Greedy action tensor([ 0.5949, -0.3961, -0.0921, -0.2086]) tensor([0.4306, 0.1599, 0.2167, 0.1928]) -Greedy action tensor([ 0.5170, -0.4750, -0.0686, -0.4478]) tensor([0.4331, 0.1606, 0.2412, 0.1651]) -Greedy action tensor([ 0.9310, -0.2812, -0.0372, -0.4957]) tensor([0.5215, 0.1552, 0.1981, 0.1252]) -Greedy action tensor([ 0.4387, -0.1274, -0.0068, -0.0421]) tensor([0.3538, 0.2009, 0.2266, 0.2188]) -Greedy action tensor([ 0.5948, -0.2212, -0.0485, -0.0451]) tensor([0.4008, 0.1772, 0.2106, 0.2114]) -Greedy action tensor([ 0.6868, -0.6249, -0.0881, -0.0674]) tensor([0.4544, 0.1224, 0.2094, 0.2138]) -Greedy action tensor([ 0.8715, -0.6289, 0.0192, -0.4551]) tensor([0.5222, 0.1165, 0.2227, 0.1386]) -Greedy action tensor([ 0.4047, 0.2383, -0.1650, -0.0941]) tensor([0.3311, 0.2804, 0.1873, 0.2011]) -Greedy action tensor([ 0.6517, -0.4427, 0.1010, -0.2011]) tensor([0.4278, 0.1432, 0.2466, 0.1823]) -Greedy action tensor([ 0.9885, -0.3390, -0.1416, -0.2257]) tensor([0.5305, 0.1406, 0.1713, 0.1575]) -Greedy action tensor([ 0.9983, -0.5653, -0.1221, -0.3766]) tensor([0.5592, 0.1171, 0.1824, 0.1414]) -Greedy action tensor([ 1.1649, -0.4450, 0.0622, -0.3328]) tensor([0.5696, 0.1139, 0.1891, 0.1274]) -Greedy action tensor([ 0.6637, 0.0371, -0.3809, 0.0393]) tensor([0.4129, 0.2207, 0.1453, 0.2211]) -Greedy action tensor([ 0.9370, -0.5452, -0.0012, -0.5732]) tensor([0.5437, 0.1235, 0.2127, 0.1201]) -Greedy action tensor([ 0.7037, -0.5669, -0.0718, -0.3654]) tensor([0.4797, 0.1346, 0.2209, 0.1647]) -Greedy action tensor([ 0.6820, -0.8624, 0.0963, -0.3614]) tensor([0.4711, 0.1006, 0.2623, 0.1660]) -Greedy action tensor([ 0.7323, -0.4457, 0.1855, -0.4740]) tensor([0.4575, 0.1408, 0.2648, 0.1369]) -Greedy action tensor([ 0.6658, -0.2815, 0.0300, -0.1326]) tensor([0.4224, 0.1638, 0.2237, 0.1901]) -Greedy action tensor([ 0.4954, 0.0710, -0.1269, -0.1302]) tensor([0.3669, 0.2400, 0.1969, 0.1963]) -Greedy action tensor([ 0.7390, -0.4067, -0.0416, -0.3139]) tensor([0.4706, 0.1496, 0.2156, 0.1642]) -Greedy action tensor([ 0.8968, -0.4158, -0.0517, -0.1435]) tensor([0.4976, 0.1339, 0.1927, 0.1758]) -Greedy action tensor([ 0.6774, -0.2937, -0.0340, -0.2495]) tensor([0.4414, 0.1671, 0.2167, 0.1747]) -Greedy action tensor([ 0.6913, -0.5891, -0.1357, -0.2229]) tensor([0.4726, 0.1313, 0.2067, 0.1894]) -Greedy action tensor([ 0.8814, -0.0244, -0.0046, -0.1636]) tensor([0.4612, 0.1864, 0.1902, 0.1622]) -Greedy action tensor([ 1.0088, -0.9471, 0.0386, -0.6208]) tensor([0.5826, 0.0824, 0.2208, 0.1142]) -Greedy action tensor([ 0.9996, -0.7781, 0.0029, -0.5148]) tensor([0.5688, 0.0961, 0.2099, 0.1251]) -Greedy action tensor([ 0.0157, -1.6414, -0.7284, -0.0144]) tensor([0.3793, 0.0723, 0.1802, 0.3681]) -Greedy action tensor([ 0.5319, -0.1677, -0.4012, 0.0663]) tensor([0.3972, 0.1973, 0.1562, 0.2493]) -Greedy action tensor([0.1730, 0.6542, 0.2961, 0.1229]) tensor([0.2128, 0.3442, 0.2406, 0.2024]) -Greedy action tensor([-0.1709, -2.3630, 0.2919, 0.1295]) tensor([0.2469, 0.0276, 0.3922, 0.3334]) -Greedy action tensor([-0.1453, -0.6840, 0.2597, -1.3014]) tensor([0.2943, 0.1717, 0.4413, 0.0926]) -Greedy action tensor([ 0.1006, -1.1901, -0.1242, 0.8114]) tensor([0.2433, 0.0669, 0.1943, 0.4954]) -Greedy action tensor([-0.6891, -1.3301, 0.5730, -0.5102]) tensor([0.1599, 0.0842, 0.5648, 0.1912]) -Greedy action tensor([ 1.4496, -0.2715, 0.0671, 1.0255]) tensor([0.4798, 0.0858, 0.1204, 0.3140]) -Greedy action tensor([-0.1463, -0.4504, -0.9304, -0.1772]) tensor([0.3161, 0.2332, 0.1443, 0.3064]) -Greedy action tensor([-0.8425, 0.6736, 0.5448, -1.1558]) tensor([0.0972, 0.4426, 0.3891, 0.0710]) -Greedy action tensor([-0.6425, -0.7318, -0.4330, -0.6193]) tensor([0.2397, 0.2193, 0.2956, 0.2454]) -Greedy action tensor([ 0.2156, -0.8828, 0.1612, 0.0939]) tensor([0.3159, 0.1053, 0.2991, 0.2797]) -Greedy action tensor([-0.5241, -0.6620, 0.3956, -0.5597]) tensor([0.1871, 0.1630, 0.4693, 0.1806]) -Greedy action tensor([ 0.2652, -0.3715, 0.5878, -0.2028]) tensor([0.2828, 0.1496, 0.3905, 0.1771]) -Greedy action tensor([-0.1478, 0.0203, -0.1406, -0.9881]) tensor([0.2761, 0.3266, 0.2781, 0.1192]) -Greedy action tensor([ 0.6330, -1.1054, -0.2894, 0.8773]) tensor([0.3509, 0.0617, 0.1395, 0.4480]) -Greedy action tensor([-0.2397, -0.5663, 0.0354, -0.3915]) tensor([0.2566, 0.1851, 0.3378, 0.2204]) -Greedy action tensor([-0.1166, -0.5762, 0.0886, -0.2998]) tensor([0.2708, 0.1711, 0.3326, 0.2255]) -Greedy action tensor([ 0.5722, 0.2532, 0.6222, -0.8247]) tensor([0.3305, 0.2403, 0.3475, 0.0818]) -Greedy action tensor([ 0.7641, 0.1318, 1.5378, -0.9202]) tensor([0.2574, 0.1368, 0.5580, 0.0478]) -Greedy action tensor([-0.0475, -1.2954, -0.1969, -0.0070]) tensor([0.3135, 0.0900, 0.2700, 0.3265]) -Greedy action tensor([ 0.6201, -0.4249, -0.3858, 0.7769]) tensor([0.3464, 0.1218, 0.1267, 0.4052]) -Greedy action tensor([ 0.2423, -1.4840, 0.0022, 0.3720]) tensor([0.3223, 0.0573, 0.2535, 0.3669]) -Greedy action tensor([-0.2601, -0.4570, 0.2130, 0.1569]) tensor([0.2023, 0.1661, 0.3246, 0.3070]) -Greedy action tensor([ 0.2097, -1.1764, 0.0955, 0.5465]) tensor([0.2823, 0.0706, 0.2518, 0.3953]) -Greedy action tensor([-0.7928, -0.5302, 0.4296, -0.9243]) tensor([0.1521, 0.1978, 0.5166, 0.1334]) -Greedy action tensor([-0.7859, 0.1973, 0.1245, -0.1303]) tensor([0.1237, 0.3306, 0.3074, 0.2383]) -Greedy action tensor([-0.2739, -1.4953, -0.7847, -0.1810]) tensor([0.3342, 0.0985, 0.2005, 0.3667]) -Greedy action tensor([ 0.3141, -1.3412, 0.0096, 0.6930]) tensor([0.2951, 0.0564, 0.2176, 0.4310]) -Greedy action tensor([-0.9171, 0.6498, -0.3622, -0.3214]) tensor([0.1070, 0.5126, 0.1863, 0.1941]) -Greedy action tensor([ 0.0344, -1.4810, 0.7085, -0.1588]) tensor([0.2496, 0.0548, 0.4898, 0.2058]) -Greedy action tensor([-0.9082, -1.4269, -0.5094, 1.0488]) tensor([0.0984, 0.0586, 0.1466, 0.6964]) -Greedy action tensor([ 0.1944, -1.2518, 0.4303, 0.6124]) tensor([0.2487, 0.0586, 0.3149, 0.3778]) -Greedy action tensor([-0.8556, -0.6629, -0.6238, -0.4760]) tensor([0.2026, 0.2457, 0.2555, 0.2962]) -Greedy action tensor([ 1.3050, -1.2440, 0.3420, 0.0470]) tensor([0.5733, 0.0448, 0.2189, 0.1630]) -Greedy action tensor([ 0.3363, -0.4524, 0.5979, -0.5644]) tensor([0.3165, 0.1438, 0.4111, 0.1286]) -Greedy action tensor([ 0.0242, -0.2321, -0.6921, -0.1755]) tensor([0.3245, 0.2511, 0.1585, 0.2658]) -Greedy action tensor([ 0.1079, -1.1501, 0.4453, 0.3489]) tensor([0.2527, 0.0718, 0.3540, 0.3215]) -Greedy action tensor([ 0.1390, -0.2144, -0.4823, 0.8257]) tensor([0.2366, 0.1661, 0.1271, 0.4701]) -Greedy action tensor([ 1.3028, 0.1174, -0.1965, 0.8385]) tensor([0.4635, 0.1417, 0.1035, 0.2914]) -Greedy action tensor([-0.0387, -1.4005, 1.3178, -0.4812]) tensor([0.1730, 0.0443, 0.6716, 0.1111]) -Greedy action tensor([-1.0610, -0.6676, 0.2055, 0.0080]) tensor([0.1118, 0.1657, 0.3968, 0.3257]) -Greedy action tensor([ 1.4392, -0.5532, -0.1571, 0.4552]) tensor([0.5838, 0.0796, 0.1183, 0.2182]) -Greedy action tensor([ 0.4449, 0.2969, 0.3562, -0.0146]) tensor([0.2933, 0.2530, 0.2684, 0.1853]) -Greedy action tensor([-0.1106, 0.4161, 0.1082, -0.8301]) tensor([0.2260, 0.3827, 0.2813, 0.1101]) -Greedy action tensor([-0.7522, -0.7652, -0.4749, 0.1226]) tensor([0.1753, 0.1730, 0.2313, 0.4204]) -Greedy action tensor([ 0.3155, -0.7949, -0.2329, -0.2000]) tensor([0.3993, 0.1315, 0.2307, 0.2385]) -Greedy action tensor([ 0.0457, -0.3690, -0.0481, -0.5620]) tensor([0.3210, 0.2120, 0.2922, 0.1748]) -Greedy action tensor([-0.0327, -0.1957, 0.4759, -0.3952]) tensor([0.2376, 0.2019, 0.3951, 0.1654]) -Greedy action tensor([-1.0263, -0.9031, 0.2862, -1.6794]) tensor([0.1571, 0.1777, 0.5835, 0.0817]) -Greedy action tensor([-1.4744, -0.8068, -0.7324, 0.4847]) tensor([0.0824, 0.1606, 0.1729, 0.5841]) -Greedy action tensor([-0.4916, -0.4268, 0.1508, -0.9893]) tensor([0.2185, 0.2332, 0.4154, 0.1329]) -Greedy action tensor([-0.2921, -0.2531, 1.1491, -0.1970]) tensor([0.1358, 0.1412, 0.5738, 0.1493]) -Greedy action tensor([-0.1703, -0.3069, -0.3742, -0.7730]) tensor([0.3091, 0.2696, 0.2521, 0.1692]) -Greedy action tensor([ 0.6941, -0.4631, 0.3094, 0.3152]) tensor([0.3732, 0.1173, 0.2540, 0.2555]) -Greedy action tensor([-0.0408, -0.1230, -0.1923, 0.0326]) tensor([0.2593, 0.2388, 0.2228, 0.2790]) -Greedy action tensor([-0.1869, -0.8152, -0.0105, 0.2963]) tensor([0.2300, 0.1227, 0.2744, 0.3729]) -Greedy action tensor([-0.9101, -0.3663, -1.0945, -0.2946]) tensor([0.1850, 0.3187, 0.1539, 0.3424]) -Greedy action tensor([ 0.4298, -1.3907, -0.6751, -0.0029]) tensor([0.4669, 0.0756, 0.1546, 0.3029]) -Greedy action tensor([-0.8579, -1.5066, -0.1298, 0.2989]) tensor([0.1476, 0.0772, 0.3058, 0.4694]) -Greedy action tensor([-0.5116, -0.3196, -0.5022, 0.3342]) tensor([0.1802, 0.2183, 0.1819, 0.4197]) -Greedy action tensor([-0.5916, -1.4409, -0.1088, -0.0963]) tensor([0.2132, 0.0912, 0.3456, 0.3500]) -Greedy action tensor([-0.1111, -1.3164, -0.2906, -0.1077]) tensor([0.3186, 0.0955, 0.2663, 0.3197]) -Greedy action tensor([ 0.1279, -0.1023, 1.1548, -0.3171]) tensor([0.1913, 0.1520, 0.5342, 0.1226]) -Greedy action tensor([-1.3079, -1.6715, 0.3233, -0.4630]) tensor([0.1095, 0.0761, 0.5595, 0.2549]) -Greedy action tensor([-0.1546, -0.2836, 0.1189, -0.5121]) tensor([0.2569, 0.2258, 0.3377, 0.1797]) -Greedy action tensor([ 0.3234, -1.2960, 0.8352, 0.9548]) tensor([0.2107, 0.0417, 0.3515, 0.3961]) -Greedy action tensor([ 0.2185, 0.0211, -0.1262, 0.2512]) tensor([0.2807, 0.2304, 0.1989, 0.2900]) -Greedy action tensor([ 0.7936, -0.1683, 0.6053, 0.5282]) tensor([0.3359, 0.1284, 0.2782, 0.2576]) -Greedy action tensor([-0.3528, 0.1354, -0.2600, -0.5375]) tensor([0.2194, 0.3575, 0.2407, 0.1824]) -Greedy action tensor([-0.2785, -0.6643, -0.5366, -0.8904]) tensor([0.3339, 0.2270, 0.2580, 0.1811]) -Greedy action tensor([-0.3026, -1.9966, 0.2589, 1.3138]) tensor([0.1254, 0.0231, 0.2199, 0.6316]) -Greedy action tensor([-0.2446, -0.4635, 0.1876, -0.9070]) tensor([0.2591, 0.2082, 0.3992, 0.1336]) -Greedy action tensor([-0.5927, -0.6919, -1.7984, 0.7925]) tensor([0.1613, 0.1460, 0.0483, 0.6444]) -Greedy action tensor([ 0.0678, -0.5161, 0.5318, 0.3213]) tensor([0.2254, 0.1257, 0.3585, 0.2904]) -Greedy action tensor([ 1.1523, -1.0722, 0.0447, 1.2322]) tensor([0.3966, 0.0429, 0.1310, 0.4295]) -Greedy action tensor([-0.3873, -0.9253, 0.6345, -0.5812]) tensor([0.1928, 0.1126, 0.5357, 0.1589]) -Greedy action tensor([-0.7813, -1.7221, -0.4440, -0.2059]) tensor([0.2189, 0.0854, 0.3066, 0.3891]) -Greedy action tensor([ 0.6556, -1.3470, 0.0555, 0.5400]) tensor([0.3884, 0.0524, 0.2131, 0.3460]) -Greedy action tensor([ 0.9361, -0.0100, -0.3751, -0.4311]) tensor([0.5228, 0.2030, 0.1409, 0.1332]) -Greedy action tensor([-1.2316, -0.2440, 0.6068, -0.7599]) tensor([0.0864, 0.2320, 0.5432, 0.1385]) -Greedy action tensor([ 1.2177, -0.3407, -0.4815, 0.6359]) tensor([0.5123, 0.1078, 0.0937, 0.2863]) -Greedy action tensor([ 1.8449, -0.7186, -0.4176, 0.4027]) tensor([0.7054, 0.0543, 0.0734, 0.1668]) -Greedy action tensor([ 1.3270, -0.5634, -0.4305, 0.5531]) tensor([0.5603, 0.0846, 0.0966, 0.2584]) -Greedy action tensor([ 1.4495, 0.1771, -0.8617, 1.0750]) tensor([0.4838, 0.1355, 0.0480, 0.3327]) -Greedy action tensor([ 0.7915, -0.5644, -0.1167, 0.1190]) tensor([0.4605, 0.1187, 0.1857, 0.2351]) -Greedy action tensor([ 0.8877, -0.6843, -0.0768, 0.2103]) tensor([0.4769, 0.0990, 0.1818, 0.2423]) -Greedy action tensor([ 1.8601, -1.1637, -0.0520, 0.8476]) tensor([0.6412, 0.0312, 0.0947, 0.2329]) -Greedy action tensor([ 1.7948, -0.6745, -0.5152, 0.2587]) tensor([0.7147, 0.0605, 0.0709, 0.1538]) -Greedy action tensor([ 2.5721, -0.1143, -0.1595, 0.5545]) tensor([0.7898, 0.0538, 0.0514, 0.1050]) -Greedy action tensor([ 1.9678, -0.1147, -0.2257, 0.6123]) tensor([0.6694, 0.0834, 0.0747, 0.1726]) -Greedy action tensor([ 2.1432, -1.0441, -0.3676, 0.4775]) tensor([0.7625, 0.0315, 0.0619, 0.1441]) -Greedy action tensor([ 1.4997, -0.4349, -0.2746, 0.1280]) tensor([0.6378, 0.0922, 0.1082, 0.1618]) -Greedy action tensor([ 1.6883, -0.2512, -0.1558, 0.5263]) tensor([0.6193, 0.0890, 0.0980, 0.1937]) -Greedy action tensor([ 1.4464, -0.3791, -0.1101, 0.2595]) tensor([0.5962, 0.0961, 0.1257, 0.1820]) -Greedy action tensor([ 1.5827, -0.6498, -0.4665, 0.2459]) tensor([0.6672, 0.0716, 0.0860, 0.1753]) -Greedy action tensor([ 1.5992, -0.5215, -0.6662, 0.0882]) tensor([0.6923, 0.0830, 0.0719, 0.1528]) -Greedy action tensor([ 1.5915, -0.4316, -0.1884, 0.1471]) tensor([0.6507, 0.0861, 0.1097, 0.1535]) -Greedy action tensor([ 0.8556, -0.4585, -0.2245, 0.0456]) tensor([0.4870, 0.1309, 0.1654, 0.2167]) -Greedy action tensor([ 1.3091, -0.4115, -0.6599, 0.4160]) tensor([0.5787, 0.1036, 0.0808, 0.2369]) -Greedy action tensor([ 1.6979, -0.3795, -0.8450, 0.8748]) tensor([0.6086, 0.0762, 0.0479, 0.2673]) -Greedy action tensor([ 1.1008, -0.2608, -0.2315, 0.5086]) tensor([0.4823, 0.1236, 0.1273, 0.2668]) -Greedy action tensor([1.3909, 0.0671, 0.0286, 0.5414]) tensor([0.5129, 0.1365, 0.1313, 0.2193]) -Greedy action tensor([ 2.8531, -0.6805, -0.3151, 0.6065]) tensor([0.8496, 0.0248, 0.0358, 0.0899]) -Greedy action tensor([ 1.4015, -0.1028, -0.1780, 0.0659]) tensor([0.5913, 0.1314, 0.1219, 0.1555]) -Greedy action tensor([ 1.7811, -0.6682, -0.1241, 0.4658]) tensor([0.6651, 0.0574, 0.0990, 0.1785]) -Greedy action tensor([ 1.2134, -0.6192, -0.4088, 0.4247]) tensor([0.5519, 0.0883, 0.1090, 0.2508]) -Greedy action tensor([ 1.1201, -0.2194, -0.5045, 0.1641]) tensor([0.5425, 0.1421, 0.1069, 0.2085]) -Greedy action tensor([ 2.3091, -0.3080, -0.5446, 0.0077]) tensor([0.8125, 0.0593, 0.0468, 0.0813]) -Greedy action tensor([ 1.4718, -0.5167, -0.8163, 0.2107]) tensor([0.6572, 0.0900, 0.0667, 0.1862]) -Greedy action tensor([ 1.0009, -0.2716, -0.8017, 0.3007]) tensor([0.5151, 0.1443, 0.0849, 0.2557]) -Greedy action tensor([ 1.1952, -0.6400, -0.0632, -0.0813]) tensor([0.5805, 0.0926, 0.1649, 0.1620]) -Greedy action tensor([ 1.3671, -0.2329, -0.2505, 0.3136]) tensor([0.5718, 0.1154, 0.1134, 0.1994]) -Greedy action tensor([ 1.5142, -0.1740, -0.5247, 0.4489]) tensor([0.6025, 0.1114, 0.0784, 0.2077]) -Greedy action tensor([ 2.2100, -0.9492, -0.3512, 0.2718]) tensor([0.7914, 0.0336, 0.0611, 0.1139]) -Greedy action tensor([ 1.5807, -0.1155, -0.9093, -0.4612]) tensor([0.7163, 0.1314, 0.0594, 0.0930]) -Greedy action tensor([ 1.7063, -0.8581, -0.1823, 0.6718]) tensor([0.6315, 0.0486, 0.0955, 0.2244]) -Greedy action tensor([ 1.6017, -0.3004, -0.5630, 0.6362]) tensor([0.6080, 0.0907, 0.0698, 0.2315]) -Greedy action tensor([ 2.1049, -1.2085, 0.0968, 0.0535]) tensor([0.7697, 0.0280, 0.1033, 0.0989]) -Greedy action tensor([ 1.6035, -0.6952, -0.0200, 0.5090]) tensor([0.6126, 0.0615, 0.1208, 0.2050]) -Greedy action tensor([ 1.2033, -0.4548, -0.1435, 0.2273]) tensor([0.5472, 0.1042, 0.1423, 0.2062]) -Greedy action tensor([ 1.3661, -0.8715, -0.3934, 0.2533]) tensor([0.6221, 0.0664, 0.1071, 0.2044]) -Greedy action tensor([ 1.5759, -1.1141, -0.2230, 0.3667]) tensor([0.6528, 0.0443, 0.1080, 0.1948]) -Greedy action tensor([ 1.0761, -0.2251, -0.6145, 0.0821]) tensor([0.5474, 0.1490, 0.1009, 0.2026]) -Greedy action tensor([ 1.0724, -0.2490, -0.1275, 0.3649]) tensor([0.4852, 0.1294, 0.1462, 0.2392]) -Greedy action tensor([ 1.8267, 0.3777, -0.3219, 0.1943]) tensor([0.6465, 0.1518, 0.0754, 0.1264]) -Greedy action tensor([ 1.3609, -0.5465, -0.7965, 0.2709]) tensor([0.6249, 0.0928, 0.0723, 0.2101]) -Greedy action tensor([ 1.5981, -0.5954, -0.4752, 0.3796]) tensor([0.6523, 0.0728, 0.0820, 0.1929]) -Greedy action tensor([ 2.0138, -0.9319, -0.4287, 0.3781]) tensor([0.7494, 0.0394, 0.0652, 0.1460]) -Greedy action tensor([ 1.6476, -1.1043, -0.5015, 0.4045]) tensor([0.6808, 0.0434, 0.0794, 0.1964]) -Greedy action tensor([ 1.4517, -0.7808, -0.3523, 0.3677]) tensor([0.6211, 0.0666, 0.1022, 0.2101]) -Greedy action tensor([2.7066, 0.0233, 0.0704, 0.7514]) tensor([0.7803, 0.0533, 0.0559, 0.1104]) -Greedy action tensor([ 2.3050, 0.3354, 0.4146, -0.2376]) tensor([0.7304, 0.1019, 0.1103, 0.0575]) -Greedy action tensor([ 1.8618, -0.9558, -0.3240, 0.6649]) tensor([0.6783, 0.0405, 0.0762, 0.2049]) -Greedy action tensor([ 1.5394, -0.1051, -0.1423, 0.2737]) tensor([0.6020, 0.1163, 0.1120, 0.1698]) -Greedy action tensor([ 1.5754, -0.4624, -0.3317, 0.2527]) tensor([0.6472, 0.0843, 0.0961, 0.1724]) -Greedy action tensor([ 1.4389, 0.3213, -0.7432, 0.3503]) tensor([0.5629, 0.1841, 0.0635, 0.1895]) -Greedy action tensor([ 1.7317, -0.1150, -0.0062, 0.4123]) tensor([0.6246, 0.0985, 0.1099, 0.1670]) -Greedy action tensor([ 1.1952, -0.3562, -0.2390, 0.3156]) tensor([0.5361, 0.1136, 0.1278, 0.2225]) -Greedy action tensor([ 1.7054, -0.6617, -0.7909, 0.7490]) tensor([0.6409, 0.0601, 0.0528, 0.2463]) -Greedy action tensor([ 1.5646, -1.0721, -0.1319, 0.4308]) tensor([0.6342, 0.0454, 0.1163, 0.2041]) -Greedy action tensor([ 1.2192, -0.4346, -0.5942, 0.0928]) tensor([0.5957, 0.1140, 0.0972, 0.1931]) -Greedy action tensor([ 1.2770e+00, -2.6739e-01, 7.5604e-04, -3.8734e-02]) tensor([0.5679, 0.1212, 0.1585, 0.1524]) -Greedy action tensor([ 1.5706, -0.6656, -0.2171, -0.2852]) tensor([0.6990, 0.0747, 0.1170, 0.1093]) -Greedy action tensor([ 1.6490, 0.2411, -0.3754, -0.3687]) tensor([0.6624, 0.1621, 0.0875, 0.0881]) -Greedy action tensor([ 1.8703, -1.0252, -0.3586, 0.4432]) tensor([0.7128, 0.0394, 0.0767, 0.1711]) -Greedy action tensor([ 1.8653, -0.7561, -0.4887, -0.0200]) tensor([0.7579, 0.0551, 0.0720, 0.1150]) -Greedy action tensor([ 1.4868, -0.2925, -1.3850, 0.2839]) tensor([0.6554, 0.1106, 0.0371, 0.1969]) -Greedy action tensor([ 1.7546, -0.6718, -0.8611, 0.1187]) tensor([0.7373, 0.0651, 0.0539, 0.1436]) -Greedy action tensor([ 2.6185, -1.4522, -0.1278, 0.3379]) tensor([0.8450, 0.0144, 0.0542, 0.0864]) -Greedy action tensor([ 1.2061, -0.4590, -0.6049, 0.4154]) tensor([0.5536, 0.1047, 0.0905, 0.2511]) -Greedy action tensor([ 1.3194, -0.0483, -0.9500, 0.0482]) tensor([0.6103, 0.1554, 0.0631, 0.1712]) -Greedy action tensor([ 1.2158, -0.6743, -0.1103, 0.0258]) tensor([0.5811, 0.0878, 0.1543, 0.1768]) -Greedy action tensor([ 1.7997, -1.0382, 0.2046, 0.1338]) tensor([0.6894, 0.0404, 0.1399, 0.1303]) -Greedy action tensor([ 1.3350, -0.2705, -0.3683, 0.0565]) tensor([0.6019, 0.1209, 0.1096, 0.1676]) -Greedy action tensor([ 1.0578, -0.6805, -0.2011, 0.3559]) tensor([0.5114, 0.0899, 0.1452, 0.2535]) -Greedy action tensor([ 1.3832, -0.0616, -0.2276, 0.1200]) tensor([0.5820, 0.1372, 0.1162, 0.1646]) -Greedy action tensor([ 1.9820, -1.0392, 0.0101, 0.6999]) tensor([0.6824, 0.0333, 0.0950, 0.1893]) -Greedy action tensor([ 1.9812, -0.5522, -0.3662, 0.6159]) tensor([0.6991, 0.0555, 0.0669, 0.1785]) -Greedy action tensor([ 2.0103, -1.1283, 0.1792, 0.8732]) tensor([0.6560, 0.0284, 0.1051, 0.2104]) -Greedy action tensor([ 2.3199, -1.3748, -0.1365, 0.6038]) tensor([0.7750, 0.0193, 0.0665, 0.1393]) -Greedy action tensor([ 1.4826, 0.1553, -0.9106, 0.6318]) tensor([0.5607, 0.1487, 0.0512, 0.2394]) -Greedy action tensor([-1.9435, -0.4526, 0.6672, -0.1795]) tensor([0.0402, 0.1785, 0.5469, 0.2345]) -Greedy action tensor([-1.9156, -0.4223, 0.6486, -0.1666]) tensor([0.0413, 0.1840, 0.5370, 0.2376]) -Greedy action tensor([-1.8920, -0.4228, 0.6355, -0.1543]) tensor([0.0425, 0.1845, 0.5317, 0.2413]) -Greedy action tensor([-1.9347, -0.4130, 0.6568, -0.1841]) tensor([0.0405, 0.1855, 0.5407, 0.2332]) -Greedy action tensor([-1.8142, -0.4704, 0.6130, -0.1149]) tensor([0.0462, 0.1772, 0.5236, 0.2529]) -Greedy action tensor([-1.2002, -0.5763, 0.5913, 0.8822]) tensor([0.0592, 0.1105, 0.3552, 0.4751]) -Greedy action tensor([-1.5189, 0.3645, 0.3401, 0.1470]) tensor([0.0519, 0.3410, 0.3328, 0.2744]) -Greedy action tensor([-1.7724, -0.3915, 0.5957, -0.0745]) tensor([0.0474, 0.1884, 0.5056, 0.2587]) -Greedy action tensor([-1.9052, -0.4628, 0.6499, -0.1660]) tensor([0.0420, 0.1778, 0.5410, 0.2392]) -Greedy action tensor([-1.9128, -0.4378, 0.6554, -0.1632]) tensor([0.0414, 0.1809, 0.5397, 0.2380]) -Greedy action tensor([-1.1781, -0.0864, 0.2815, 0.0036]) tensor([0.0866, 0.2581, 0.3729, 0.2824]) -Greedy action tensor([-0.8231, 0.7059, 0.0582, 0.0646]) tensor([0.0956, 0.4412, 0.2308, 0.2323]) -Greedy action tensor([-1.7982, -0.4520, 0.5940, -0.1037]) tensor([0.0471, 0.1811, 0.5153, 0.2565]) -Greedy action tensor([-1.7371, -0.2111, 0.5569, -0.0303]) tensor([0.0476, 0.2188, 0.4715, 0.2621]) -Greedy action tensor([-1.7434, -0.4751, 0.5815, -0.0870]) tensor([0.0499, 0.1776, 0.5107, 0.2618]) -Greedy action tensor([-1.7948, -0.3763, 0.6696, -0.0307]) tensor([0.0440, 0.1818, 0.5174, 0.2568]) -Greedy action tensor([-1.9107, -0.4600, 0.6506, -0.1654]) tensor([0.0418, 0.1781, 0.5409, 0.2392]) -Greedy action tensor([-1.4866, -0.4914, 0.4040, 0.1099]) tensor([0.0655, 0.1772, 0.4339, 0.3234]) -Greedy action tensor([-0.8238, 0.0700, 0.1456, -0.0870]) tensor([0.1224, 0.2992, 0.3227, 0.2557]) -Greedy action tensor([-1.6674e+00, -3.9356e-01, 5.4453e-01, 1.0121e-04]) tensor([0.0526, 0.1881, 0.4805, 0.2788]) -Greedy action tensor([-1.8686, -0.2788, 0.6052, -0.1315]) tensor([0.0426, 0.2091, 0.5061, 0.2422]) -Greedy action tensor([-1.9060, -0.3649, 0.6426, -0.1583]) tensor([0.0413, 0.1930, 0.5285, 0.2372]) -Greedy action tensor([-1.8573, -0.4154, 0.6489, -0.1237]) tensor([0.0432, 0.1827, 0.5296, 0.2446]) -Greedy action tensor([-1.8934, -0.4447, 0.6456, -0.1552]) tensor([0.0424, 0.1803, 0.5365, 0.2409]) -Greedy action tensor([-1.7070, -0.4726, 0.6070, 0.0363]) tensor([0.0493, 0.1696, 0.4991, 0.2820]) -Greedy action tensor([-1.9424, -0.4505, 0.6698, -0.1761]) tensor([0.0401, 0.1784, 0.5468, 0.2347]) -Greedy action tensor([-1.8153, -0.3592, 0.6144, -0.1009]) tensor([0.0451, 0.1932, 0.5115, 0.2502]) -Greedy action tensor([-1.7559, -0.0997, 0.5975, 0.0225]) tensor([0.0441, 0.2310, 0.4639, 0.2610]) -Greedy action tensor([-1.9210, -0.4165, 0.6568, -0.1680]) tensor([0.0409, 0.1842, 0.5387, 0.2362]) -Greedy action tensor([-1.6072, 0.0270, 0.4031, 0.0519]) tensor([0.0531, 0.2720, 0.3961, 0.2788]) -Greedy action tensor([-1.7716, -0.4615, 0.5862, -0.0867]) tensor([0.0484, 0.1794, 0.5113, 0.2609]) -Greedy action tensor([-1.8902, -0.4365, 0.6360, -0.1560]) tensor([0.0426, 0.1825, 0.5333, 0.2415]) -Greedy action tensor([-1.9350, -0.4275, 0.6585, -0.1745]) tensor([0.0405, 0.1828, 0.5414, 0.2354]) -Greedy action tensor([-1.3829, -0.5157, 0.3907, 0.1052]) tensor([0.0730, 0.1737, 0.4300, 0.3233]) -Greedy action tensor([-1.8396, -0.2735, 0.6114, -0.1143]) tensor([0.0435, 0.2082, 0.5043, 0.2441]) -Greedy action tensor([-1.8817, -0.4222, 0.6328, -0.1492]) tensor([0.0429, 0.1846, 0.5301, 0.2425]) -Greedy action tensor([-1.8700, -0.2816, 0.6207, -0.1337]) tensor([0.0423, 0.2071, 0.5105, 0.2401]) -Greedy action tensor([-1.6573, -0.0080, 0.4979, -0.1440]) tensor([0.0516, 0.2686, 0.4454, 0.2344]) -Greedy action tensor([-1.3992, -0.4921, 0.7010, 0.6073]) tensor([0.0524, 0.1298, 0.4280, 0.3898]) -Greedy action tensor([-1.8023, -0.4617, 0.6035, -0.1036]) tensor([0.0468, 0.1788, 0.5187, 0.2557]) -Greedy action tensor([-1.6652, -0.2239, 0.4995, -0.1027]) tensor([0.0534, 0.2259, 0.4657, 0.2550]) -Greedy action tensor([-1.9243, -0.4223, 0.6523, -0.1701]) tensor([0.0409, 0.1839, 0.5385, 0.2366]) -Greedy action tensor([-1.9230, -0.4404, 0.6564, -0.1657]) tensor([0.0410, 0.1806, 0.5408, 0.2377]) -Greedy action tensor([-1.5618, -0.2009, 0.4522, -0.0640]) tensor([0.0593, 0.2312, 0.4443, 0.2652]) -Greedy action tensor([-1.8791, -0.4251, 0.6393, -0.1434]) tensor([0.0428, 0.1832, 0.5312, 0.2428]) -Greedy action tensor([-1.8791, -0.4606, 0.6409, -0.1487]) tensor([0.0431, 0.1780, 0.5357, 0.2432]) -Greedy action tensor([-1.5755, -0.5152, 0.8120, -0.3438]) tensor([0.0549, 0.1586, 0.5981, 0.1883]) -Greedy action tensor([-1.3921, 0.2847, 0.2971, -0.0036]) tensor([0.0634, 0.3391, 0.3433, 0.2542]) -Greedy action tensor([-1.6612, -0.4827, 0.7024, 0.2305]) tensor([0.0465, 0.1511, 0.4942, 0.3083]) -Greedy action tensor([-1.8090, -0.2781, 0.6078, -0.0864]) tensor([0.0446, 0.2061, 0.4998, 0.2496]) -Greedy action tensor([-1.7369, -0.4249, 0.5598, -0.1071]) tensor([0.0506, 0.1880, 0.5032, 0.2583]) -Greedy action tensor([-0.2524, 0.9107, 0.1222, 0.5784]) tensor([0.1258, 0.4025, 0.1830, 0.2887]) -Greedy action tensor([-1.8952, -0.3544, 0.6292, -0.1497]) tensor([0.0419, 0.1955, 0.5227, 0.2399]) -Greedy action tensor([-0.9166, 0.9455, 0.1147, 0.2607]) tensor([0.0741, 0.4773, 0.2080, 0.2406]) -Greedy action tensor([-1.9335, -0.4228, 0.6547, -0.1741]) tensor([0.0406, 0.1838, 0.5399, 0.2357]) -Greedy action tensor([-1.9052, -0.4426, 0.6509, -0.1584]) tensor([0.0418, 0.1803, 0.5382, 0.2396]) -Greedy action tensor([-1.9283, -0.4318, 0.6600, -0.1696]) tensor([0.0407, 0.1817, 0.5414, 0.2362]) -Greedy action tensor([-1.8035, -0.2842, 0.5988, -0.0714]) tensor([0.0449, 0.2052, 0.4961, 0.2538]) -Greedy action tensor([-1.8146, -0.3301, 0.6223, -0.1115]) tensor([0.0448, 0.1975, 0.5119, 0.2458]) -Greedy action tensor([0.1719, 0.9509, 0.0510, 0.6721]) tensor([0.1750, 0.3814, 0.1551, 0.2886]) -Greedy action tensor([-1.9333, -0.4299, 0.6602, -0.1702]) tensor([0.0405, 0.1820, 0.5415, 0.2360]) -Greedy action tensor([-1.8068, -0.1009, 0.5572, -0.1055]) tensor([0.0442, 0.2434, 0.4701, 0.2423]) -Greedy action tensor([-1.9460, -0.4584, 0.6791, -0.1790]) tensor([0.0399, 0.1765, 0.5504, 0.2333]) -Greedy action tensor([-1.7342, 0.0230, 0.4910, -0.0476]) tensor([0.0466, 0.2702, 0.4314, 0.2518]) -Greedy action tensor([-1.9020, -0.3906, 0.6501, -0.1413]) tensor([0.0414, 0.1874, 0.5307, 0.2405]) -Greedy action tensor([-1.8845, -0.4414, 0.6375, -0.1529]) tensor([0.0429, 0.1814, 0.5337, 0.2421]) -Greedy action tensor([-1.7574, -0.4712, 0.5765, -0.0896]) tensor([0.0494, 0.1788, 0.5098, 0.2619]) -Greedy action tensor([-1.8048, -0.2493, 0.5706, -0.1266]) tensor([0.0458, 0.2168, 0.4923, 0.2451]) -Greedy action tensor([-1.8718, -0.3371, 0.6163, -0.1402]) tensor([0.0429, 0.1989, 0.5161, 0.2422]) -Greedy action tensor([-1.8793, -0.4381, 0.6350, -0.1463]) tensor([0.0430, 0.1818, 0.5317, 0.2434]) -Greedy action tensor([-1.8486, 0.1445, 0.5397, -0.1759]) tensor([0.0407, 0.2988, 0.4436, 0.2169]) -Greedy action tensor([-1.6864, -0.5345, 0.5319, -0.0585]) tensor([0.0542, 0.1715, 0.4982, 0.2761]) -Greedy action tensor([-0.7990, -0.1857, 0.2710, -0.2484]) tensor([0.1334, 0.2463, 0.3889, 0.2314]) -Greedy action tensor([-1.1534, -0.0211, 0.3365, -0.1121]) tensor([0.0879, 0.2728, 0.3901, 0.2491]) -Greedy action tensor([-0.7329, 0.5189, -0.0587, -0.2649]) tensor([0.1241, 0.4341, 0.2436, 0.1982]) -Greedy action tensor([-1.8474, -0.4468, 0.6221, -0.1397]) tensor([0.0447, 0.1812, 0.5278, 0.2464]) -Greedy action tensor([-1.9414, -0.4552, 0.6698, -0.1774]) tensor([0.0402, 0.1777, 0.5474, 0.2346]) -Greedy action tensor([-1.7347, -0.5315, 0.5618, -0.1115]) tensor([0.0517, 0.1722, 0.5139, 0.2621]) -Greedy action tensor([-1.8871, -0.4581, 0.6462, -0.1522]) tensor([0.0427, 0.1781, 0.5374, 0.2418]) -Greedy action tensor([-1.9076, -0.3956, 0.6469, -0.1563]) tensor([0.0414, 0.1877, 0.5324, 0.2385]) -Greedy action tensor([-1.9068, -0.4544, 0.6517, -0.1637]) tensor([0.0418, 0.1788, 0.5403, 0.2391]) -Greedy action tensor([ 0.8387, -0.5126, -0.0784, -0.3409]) tensor([0.5087, 0.1317, 0.2033, 0.1564]) -Greedy action tensor([ 0.8979, -0.1549, -0.1275, -0.2638]) tensor([0.4949, 0.1727, 0.1775, 0.1549]) -Greedy action tensor([ 1.0352, -0.5259, -0.1312, -0.2119]) tensor([0.5529, 0.1161, 0.1722, 0.1589]) -Greedy action tensor([ 0.7537, -0.3963, -0.0353, -0.2073]) tensor([0.4644, 0.1470, 0.2110, 0.1776]) -Greedy action tensor([ 0.6610, -0.1364, -0.0503, -0.0499]) tensor([0.4111, 0.1852, 0.2018, 0.2019]) -Greedy action tensor([ 0.8784, -0.6112, 0.0227, -0.3404]) tensor([0.5139, 0.1159, 0.2184, 0.1519]) -Greedy action tensor([ 0.4800, -0.3457, -0.1564, -0.3631]) tensor([0.4171, 0.1827, 0.2207, 0.1795]) -Greedy action tensor([ 0.4514, -0.3446, -0.0916, -0.2471]) tensor([0.3953, 0.1784, 0.2297, 0.1966]) -Greedy action tensor([ 0.5803, -0.1373, -0.0512, -0.0299]) tensor([0.3902, 0.1904, 0.2075, 0.2120]) -Greedy action tensor([ 0.4426, 0.1043, -0.0498, 0.0290]) tensor([0.3350, 0.2388, 0.2047, 0.2215]) -Greedy action tensor([ 0.7768, -0.3931, -0.0931, -0.3356]) tensor([0.4859, 0.1508, 0.2036, 0.1597]) -Greedy action tensor([ 0.5117, 0.3549, -0.2083, 0.1469]) tensor([0.3294, 0.2816, 0.1603, 0.2287]) -Greedy action tensor([ 0.8645, -0.5526, 0.1134, -0.3205]) tensor([0.4950, 0.1200, 0.2336, 0.1514]) -Greedy action tensor([ 0.5483, -0.5175, -0.1121, -0.1365]) tensor([0.4228, 0.1456, 0.2184, 0.2132]) -Greedy action tensor([ 0.9393, -0.6128, 0.0628, -0.4623]) tensor([0.5336, 0.1130, 0.2221, 0.1314]) -Greedy action tensor([ 6.1700e-01, 9.9773e-02, 5.0747e-04, -1.3389e-01]) tensor([0.3834, 0.2286, 0.2070, 0.1810]) -Greedy action tensor([ 1.2470, -0.0963, 0.1107, -0.1893]) tensor([0.5495, 0.1434, 0.1764, 0.1307]) -Greedy action tensor([ 1.0302, -0.8997, 0.1121, -0.3191]) tensor([0.5544, 0.0805, 0.2213, 0.1438]) -Greedy action tensor([ 1.0750, -0.5890, -0.0262, -0.5045]) tensor([0.5787, 0.1096, 0.1924, 0.1193]) -Greedy action tensor([ 0.7111, -0.7134, -0.0631, -0.4475]) tensor([0.4961, 0.1194, 0.2288, 0.1557]) -Greedy action tensor([ 5.8519e-01, -3.4303e-01, -3.0948e-04, -2.9351e-01]) tensor([0.4224, 0.1670, 0.2352, 0.1754]) -Greedy action tensor([ 0.8091, -0.7266, 0.0293, -0.4424]) tensor([0.5102, 0.1099, 0.2339, 0.1460]) -Greedy action tensor([ 0.9698, -0.5138, 0.1123, -0.3144]) tensor([0.5187, 0.1176, 0.2200, 0.1436]) -Greedy action tensor([ 1.1737, -0.4776, 0.2192, -0.5167]) tensor([0.5678, 0.1089, 0.2186, 0.1047]) -Greedy action tensor([ 0.6656, -0.3625, 0.1009, -0.2754]) tensor([0.4317, 0.1544, 0.2454, 0.1685]) -Greedy action tensor([ 1.3924, -0.9566, -0.1620, -0.4968]) tensor([0.6859, 0.0655, 0.1449, 0.1037]) -Greedy action tensor([ 0.6944, -0.2752, -0.0812, -0.1706]) tensor([0.4423, 0.1678, 0.2037, 0.1862]) -Greedy action tensor([ 0.7582, -0.3395, -0.1307, -0.3139]) tensor([0.4792, 0.1599, 0.1970, 0.1640]) -Greedy action tensor([ 1.1282, -0.9790, 0.0251, -0.3231]) tensor([0.5925, 0.0720, 0.1966, 0.1388]) -Greedy action tensor([ 0.7739, -0.4325, -0.0985, -0.2002]) tensor([0.4774, 0.1429, 0.1995, 0.1802]) -Greedy action tensor([ 0.5777, -0.4753, -0.1785, -0.3594]) tensor([0.4525, 0.1579, 0.2124, 0.1773]) -Greedy action tensor([ 0.5110, -0.5041, -0.1389, -0.2104]) tensor([0.4219, 0.1529, 0.2202, 0.2050]) -Greedy action tensor([ 0.4409, -0.1193, -0.0369, 0.0252]) tensor([0.3507, 0.2003, 0.2175, 0.2314]) -Greedy action tensor([ 0.4572, 0.5425, -0.2704, -0.3724]) tensor([0.3324, 0.3620, 0.1606, 0.1450]) -Greedy action tensor([ 0.7291, -0.4092, -0.0767, -0.1120]) tensor([0.4549, 0.1457, 0.2032, 0.1962]) -Greedy action tensor([ 0.4614, -0.4456, -0.1948, -0.2293]) tensor([0.4126, 0.1666, 0.2141, 0.2068]) -Greedy action tensor([ 0.8716, -0.9794, 0.1706, -0.4109]) tensor([0.5180, 0.0814, 0.2570, 0.1437]) -Greedy action tensor([ 0.9270, -0.5985, -0.0108, -0.4312]) tensor([0.5359, 0.1166, 0.2098, 0.1378]) -Greedy action tensor([ 0.8717, -0.5596, -0.0989, -0.2247]) tensor([0.5123, 0.1224, 0.1941, 0.1712]) -Greedy action tensor([ 1.0041, -0.6510, -0.0303, -0.7015]) tensor([0.5787, 0.1106, 0.2057, 0.1051]) -Greedy action tensor([ 1.3509, -0.5894, -0.1819, -0.5184]) tensor([0.6606, 0.0949, 0.1426, 0.1019]) -Greedy action tensor([ 0.6678, -0.3531, -0.0255, -0.2133]) tensor([0.4397, 0.1584, 0.2198, 0.1822]) -Greedy action tensor([ 0.4625, 0.0374, 0.0049, -0.0444]) tensor([0.3462, 0.2263, 0.2191, 0.2085]) -Greedy action tensor([ 0.3797, -0.0127, -0.0302, -0.0597]) tensor([0.3352, 0.2264, 0.2225, 0.2160]) -Greedy action tensor([ 0.3748, 0.0587, 0.0115, -0.4527]) tensor([0.3495, 0.2548, 0.2430, 0.1528]) -Greedy action tensor([ 0.8064, -0.5997, -0.1404, -0.1699]) tensor([0.4976, 0.1219, 0.1931, 0.1874]) -Greedy action tensor([ 0.8059, -0.3335, -0.0293, -0.4784]) tensor([0.4924, 0.1576, 0.2136, 0.1363]) -Greedy action tensor([ 0.8587, -0.4169, -0.1582, -0.4004]) tensor([0.5195, 0.1451, 0.1879, 0.1475]) -Greedy action tensor([ 0.9499, -0.5267, 0.0498, -0.2525]) tensor([0.5167, 0.1180, 0.2100, 0.1553]) -Greedy action tensor([ 0.7604, -0.4511, -0.0673, -0.3835]) tensor([0.4870, 0.1450, 0.2128, 0.1551]) -Greedy action tensor([ 1.1457, -0.5158, -0.0180, -0.4494]) tensor([0.5865, 0.1113, 0.1832, 0.1190]) -Greedy action tensor([ 0.9111, -0.5884, -0.0312, -0.3468]) tensor([0.5271, 0.1177, 0.2054, 0.1498]) -Greedy action tensor([ 0.7373, -0.4675, 0.0309, -0.1920]) tensor([0.4570, 0.1370, 0.2255, 0.1805]) -Greedy action tensor([ 1.1373, -0.6721, -0.0662, -0.5099]) tensor([0.6037, 0.0989, 0.1812, 0.1163]) -Greedy action tensor([ 0.8855, -0.2595, 0.0177, -0.5947]) tensor([0.5087, 0.1619, 0.2136, 0.1158]) -Greedy action tensor([ 0.6526, -0.1387, 0.0802, -0.0896]) tensor([0.4010, 0.1818, 0.2263, 0.1909]) -Greedy action tensor([ 0.9436, -0.7615, 0.0593, -0.6034]) tensor([0.5532, 0.1005, 0.2285, 0.1178]) -Greedy action tensor([ 0.5090, -0.0418, -0.1052, -0.1568]) tensor([0.3800, 0.2191, 0.2056, 0.1953]) -Greedy action tensor([ 0.4959, -0.2345, 0.1228, -0.2102]) tensor([0.3754, 0.1808, 0.2585, 0.1853]) -Greedy action tensor([ 0.7377, -0.7032, 0.0648, -0.3653]) tensor([0.4811, 0.1139, 0.2454, 0.1596]) -Greedy action tensor([ 0.9291, -0.6746, 0.1821, -0.1857]) tensor([0.4993, 0.1004, 0.2365, 0.1638]) -Greedy action tensor([ 0.7065, -0.0528, 0.0770, -0.1806]) tensor([0.4145, 0.1940, 0.2209, 0.1707]) -Greedy action tensor([ 0.6282, -0.3923, 0.0159, -0.2043]) tensor([0.4278, 0.1542, 0.2319, 0.1861]) -Greedy action tensor([ 1.0472, -0.8140, 0.0078, -0.4669]) tensor([0.5783, 0.0899, 0.2045, 0.1272]) -Greedy action tensor([ 0.7815, -0.5167, 0.1021, -0.4809]) tensor([0.4848, 0.1324, 0.2457, 0.1372]) -Greedy action tensor([ 0.6666, -0.3306, -0.0959, -0.0200]) tensor([0.4276, 0.1577, 0.1995, 0.2152]) -Greedy action tensor([ 1.0172, -0.7582, 0.0173, -0.5363]) tensor([0.5718, 0.0969, 0.2104, 0.1209]) -Greedy action tensor([ 0.6839, -0.5567, 0.0299, -0.3348]) tensor([0.4608, 0.1333, 0.2396, 0.1664]) -Greedy action tensor([ 0.8927, -0.2311, 0.1216, -0.4978]) tensor([0.4910, 0.1596, 0.2271, 0.1222]) -Greedy action tensor([ 1.1408, -0.3939, -0.2147, -0.3754]) tensor([0.5907, 0.1273, 0.1523, 0.1297]) -Greedy action tensor([ 0.9169, -1.1710, -0.0695, -0.5058]) tensor([0.5754, 0.0713, 0.2146, 0.1387]) -Greedy action tensor([ 0.7009, -0.6591, -0.1413, -0.5087]) tensor([0.5036, 0.1293, 0.2169, 0.1502]) -Greedy action tensor([ 0.1414, -0.1660, -0.1908, -0.1086]) tensor([0.3095, 0.2276, 0.2220, 0.2410]) -Greedy action tensor([ 0.6860, -0.4023, -0.0161, -0.5282]) tensor([0.4696, 0.1582, 0.2327, 0.1395]) -Greedy action tensor([ 0.5581, -0.4236, 0.0115, -0.3623]) tensor([0.4252, 0.1593, 0.2462, 0.1694]) -Greedy action tensor([ 0.5897, -0.0454, -0.0428, -0.0365]) tensor([0.3852, 0.2041, 0.2047, 0.2060]) -Greedy action tensor([ 0.8027, -0.2909, 0.0358, -0.5666]) tensor([0.4869, 0.1631, 0.2261, 0.1238]) -Greedy action tensor([ 0.5887, -0.2332, -0.1235, -0.3619]) tensor([0.4317, 0.1898, 0.2118, 0.1668]) -Greedy action tensor([ 0.8147, -0.7720, 0.0180, -0.2724]) tensor([0.5018, 0.1027, 0.2263, 0.1692]) -Greedy action tensor([ 0.7830, -0.1790, -0.0212, -0.0834]) tensor([0.4444, 0.1698, 0.1989, 0.1869]) -Greedy action tensor([ 0.7993, -0.5583, 0.0770, -0.3575]) tensor([0.4860, 0.1251, 0.2360, 0.1529]) -Greedy action tensor([ 2.0964, -0.9526, -0.3890, 0.8155]) tensor([0.7100, 0.0337, 0.0591, 0.1972]) -Greedy action tensor([ 1.7620, -0.0793, -0.7011, 0.8772]) tensor([0.6037, 0.0957, 0.0514, 0.2492]) -Greedy action tensor([ 1.5895, -0.3353, -0.5115, 0.6360]) tensor([0.6047, 0.0882, 0.0740, 0.2331]) -Greedy action tensor([ 1.3076, -0.5943, -0.1968, 0.4467]) tensor([0.5573, 0.0832, 0.1238, 0.2356]) -Greedy action tensor([ 1.4378, -0.3054, -0.3333, 0.4118]) tensor([0.5870, 0.1027, 0.0999, 0.2104]) -Greedy action tensor([ 1.4413, -0.4799, -0.5125, 0.2067]) tensor([0.6333, 0.0927, 0.0898, 0.1843]) -Greedy action tensor([ 1.6181, -0.8758, -0.5992, 0.1597]) tensor([0.7022, 0.0580, 0.0765, 0.1633]) -Greedy action tensor([ 1.9026, -0.5077, -0.5231, 0.4779]) tensor([0.7048, 0.0633, 0.0623, 0.1696]) -Greedy action tensor([ 1.9589, -0.6180, -0.3173, 0.5825]) tensor([0.6987, 0.0531, 0.0717, 0.1764]) -Greedy action tensor([ 1.3361, -0.2396, -0.3302, 0.1388]) tensor([0.5890, 0.1218, 0.1113, 0.1779]) -Greedy action tensor([ 1.3239, -0.3095, -0.0830, 0.2035]) tensor([0.5662, 0.1105, 0.1386, 0.1846]) -Greedy action tensor([ 1.2417, -0.7477, -0.2204, 0.4511]) tensor([0.5488, 0.0751, 0.1272, 0.2489]) -Greedy action tensor([ 1.2945, -0.2665, -0.3873, 0.2421]) tensor([0.5730, 0.1203, 0.1066, 0.2001]) -Greedy action tensor([ 1.5680, -0.4900, -0.2655, 0.3570]) tensor([0.6307, 0.0806, 0.1008, 0.1879]) -Greedy action tensor([ 0.8046, -0.4289, -0.2414, 0.2810]) tensor([0.4474, 0.1303, 0.1572, 0.2650]) -Greedy action tensor([ 1.5870, -0.4761, -0.7501, 0.3110]) tensor([0.6654, 0.0846, 0.0643, 0.1858]) -Greedy action tensor([ 1.4670, -0.2532, -0.9413, 0.2178]) tensor([0.6428, 0.1151, 0.0578, 0.1843]) -Greedy action tensor([ 1.1389, 0.0933, -0.8666, 0.5177]) tensor([0.4942, 0.1737, 0.0665, 0.2656]) -Greedy action tensor([ 2.0743, -0.4128, -0.6545, 0.2932]) tensor([0.7594, 0.0631, 0.0496, 0.1279]) -Greedy action tensor([ 2.2534, -0.9392, -0.5248, 0.6903]) tensor([0.7618, 0.0313, 0.0473, 0.1596]) -Greedy action tensor([ 1.3353, -0.5692, -0.2669, 0.4293]) tensor([0.5700, 0.0849, 0.1148, 0.2303]) -Greedy action tensor([ 1.7772, -1.0532, -0.2885, 0.4133]) tensor([0.6938, 0.0409, 0.0879, 0.1774]) -Greedy action tensor([2.6661, 0.8001, 0.1341, 0.2667]) tensor([0.7547, 0.1168, 0.0600, 0.0685]) -Greedy action tensor([ 1.2864, -0.3888, -0.3858, 0.2446]) tensor([0.5787, 0.1084, 0.1087, 0.2042]) -Greedy action tensor([ 1.2296, -0.2951, -1.0048, 0.2309]) tensor([0.5906, 0.1286, 0.0632, 0.2176]) -Greedy action tensor([ 1.5309, -0.1033, -0.2947, 0.0038]) tensor([0.6356, 0.1240, 0.1024, 0.1380]) -Greedy action tensor([ 1.3973, -0.2150, -0.1601, 0.3570]) tensor([0.5671, 0.1131, 0.1195, 0.2004]) -Greedy action tensor([ 0.7376, -0.3425, 0.0199, -0.0912]) tensor([0.4417, 0.1500, 0.2155, 0.1928]) -Greedy action tensor([ 1.9956, -0.5562, -1.1264, 0.1808]) tensor([0.7783, 0.0607, 0.0343, 0.1268]) -Greedy action tensor([ 1.3761, -0.1981, -0.5501, 0.0655]) tensor([0.6163, 0.1277, 0.0898, 0.1662]) -Greedy action tensor([ 1.3522, -0.4296, -0.5518, 0.2603]) tensor([0.6050, 0.1018, 0.0901, 0.2030]) -Greedy action tensor([ 2.2681, -1.0246, -0.3742, 0.3892]) tensor([0.7929, 0.0295, 0.0565, 0.1211]) -Greedy action tensor([ 2.4363, -1.2743, -0.0176, 0.6310]) tensor([0.7844, 0.0192, 0.0674, 0.1290]) -Greedy action tensor([ 1.4615, -0.3373, -0.4638, 0.5246]) tensor([0.5871, 0.0972, 0.0856, 0.2301]) -Greedy action tensor([ 2.2380, -1.2620, -0.4100, 0.6162]) tensor([0.7701, 0.0233, 0.0545, 0.1521]) -Greedy action tensor([ 1.5817, -0.3765, -0.5359, 0.4099]) tensor([0.6364, 0.0898, 0.0766, 0.1972]) -Greedy action tensor([ 1.8471, -0.7878, -0.6152, 0.1986]) tensor([0.7411, 0.0532, 0.0632, 0.1425]) -Greedy action tensor([ 1.2370, -0.4887, -0.1671, -0.0598]) tensor([0.5893, 0.1049, 0.1447, 0.1611]) -Greedy action tensor([ 1.2583, -0.7098, -0.3693, 0.3899]) tensor([0.5696, 0.0796, 0.1119, 0.2390]) -Greedy action tensor([ 1.6506, -0.5778, -0.8792, 0.2754]) tensor([0.6944, 0.0748, 0.0553, 0.1755]) -Greedy action tensor([ 1.3443, -0.1392, -1.2302, 0.1534]) tensor([0.6223, 0.1412, 0.0474, 0.1891]) -Greedy action tensor([ 1.8923, -1.1373, -0.1867, 0.3132]) tensor([0.7249, 0.0350, 0.0906, 0.1494]) -Greedy action tensor([ 1.5008, -1.0437, -0.2937, 0.3842]) tensor([0.6361, 0.0499, 0.1057, 0.2083]) -Greedy action tensor([ 1.1973, -0.5276, -0.5361, 0.2725]) tensor([0.5710, 0.1017, 0.1009, 0.2264]) -Greedy action tensor([ 1.5834, -0.9150, -0.1214, 0.9053]) tensor([0.5645, 0.0464, 0.1026, 0.2865]) -Greedy action tensor([ 2.0068, -0.6406, -0.4870, 0.3752]) tensor([0.7413, 0.0525, 0.0612, 0.1450]) -Greedy action tensor([ 1.1378, -0.3162, -0.7154, 0.3029]) tensor([0.5481, 0.1281, 0.0859, 0.2379]) -Greedy action tensor([ 2.0111, -0.9831, 0.0086, 0.8871]) tensor([0.6622, 0.0332, 0.0894, 0.2152]) -Greedy action tensor([ 1.1756, -0.1335, -0.1238, 0.0592]) tensor([0.5347, 0.1444, 0.1458, 0.1751]) -Greedy action tensor([ 1.3409, -0.5278, -0.1502, 0.2874]) tensor([0.5786, 0.0893, 0.1303, 0.2018]) -Greedy action tensor([ 1.8781, -0.8832, -0.3056, 0.5121]) tensor([0.6988, 0.0442, 0.0787, 0.1783]) -Greedy action tensor([ 1.5734, -0.7578, -0.3164, 0.4964]) tensor([0.6294, 0.0612, 0.0951, 0.2144]) -Greedy action tensor([ 1.5765, -0.9586, -0.0644, -0.2002]) tensor([0.6934, 0.0549, 0.1344, 0.1173]) -Greedy action tensor([ 1.2045, -0.4789, -0.2716, 0.2538]) tensor([0.5553, 0.1031, 0.1269, 0.2146]) -Greedy action tensor([ 1.5619, -0.5799, -0.6187, 0.4949]) tensor([0.6351, 0.0746, 0.0718, 0.2185]) -Greedy action tensor([ 0.8261, -0.3610, 0.1135, -0.0819]) tensor([0.4548, 0.1388, 0.2230, 0.1834]) -Greedy action tensor([ 1.2046, -0.4346, -1.0665, 0.4246]) tensor([0.5696, 0.1106, 0.0588, 0.2611]) -Greedy action tensor([ 1.8208, -0.7098, -0.4722, 0.4216]) tensor([0.7006, 0.0558, 0.0707, 0.1729]) -Greedy action tensor([ 2.2605, -1.2403, -0.3080, 0.7046]) tensor([0.7588, 0.0229, 0.0582, 0.1601]) -Greedy action tensor([ 1.1858, -0.0724, -0.1919, -0.2234]) tensor([0.5616, 0.1596, 0.1416, 0.1372]) -Greedy action tensor([ 1.6959, 0.0242, -0.3845, 0.5218]) tensor([0.6166, 0.1159, 0.0770, 0.1906]) -Greedy action tensor([ 1.1408, -0.4351, 0.0099, 0.1955]) tensor([0.5213, 0.1078, 0.1683, 0.2026]) -Greedy action tensor([ 1.2548, -0.4307, -0.2026, 0.3611]) tensor([0.5472, 0.1014, 0.1274, 0.2239]) -Greedy action tensor([ 1.3876, -0.9786, 0.0095, 0.1774]) tensor([0.6083, 0.0571, 0.1533, 0.1813]) -Greedy action tensor([ 1.8095, -0.7153, -0.6089, 0.3949]) tensor([0.7081, 0.0567, 0.0631, 0.1721]) -Greedy action tensor([ 1.6852, -0.9973, -0.3952, 0.5505]) tensor([0.6602, 0.0451, 0.0824, 0.2123]) -Greedy action tensor([ 1.2967, -0.0549, -0.7214, 0.1381]) tensor([0.5863, 0.1517, 0.0779, 0.1840]) -Greedy action tensor([ 1.4253, -0.6997, -0.2449, 0.6277]) tensor([0.5688, 0.0679, 0.1071, 0.2562]) -Greedy action tensor([ 1.4797, -0.8114, -0.9669, 0.3550]) tensor([0.6611, 0.0669, 0.0572, 0.2147]) -Greedy action tensor([ 0.8056, -0.4156, -0.1564, 0.4150]) tensor([0.4249, 0.1253, 0.1624, 0.2875]) -Greedy action tensor([ 0.9272, -0.2160, -0.8383, 0.2894]) tensor([0.4955, 0.1579, 0.0848, 0.2618]) -Greedy action tensor([ 1.4757, -0.1426, -0.1861, 0.2953]) tensor([0.5899, 0.1169, 0.1120, 0.1812]) -Greedy action tensor([ 1.3615, -0.4181, -0.4033, 0.6096]) tensor([0.5521, 0.0931, 0.0945, 0.2603]) -Greedy action tensor([ 1.7999, -0.7868, -0.5528, 0.5569]) tensor([0.6854, 0.0516, 0.0652, 0.1978]) -Greedy action tensor([ 1.3286, -0.3891, -0.2614, 0.1991]) tensor([0.5860, 0.1052, 0.1195, 0.1894]) -Greedy action tensor([ 1.7356, -0.9653, -0.3063, 0.2340]) tensor([0.7044, 0.0473, 0.0914, 0.1569]) -Greedy action tensor([ 1.4411, -0.3311, -1.0026, 0.2935]) tensor([0.6352, 0.1080, 0.0552, 0.2016]) -Greedy action tensor([ 1.5602, 0.0389, -0.3465, 0.5519]) tensor([0.5774, 0.1261, 0.0858, 0.2107]) -Greedy action tensor([ 1.5611, -0.7526, -0.5147, 0.1923]) tensor([0.6762, 0.0669, 0.0848, 0.1720]) -Greedy action tensor([ 1.8140, -0.9247, -0.2588, 0.1595]) tensor([0.7238, 0.0468, 0.0911, 0.1384]) -Greedy action tensor([ 1.9498, -0.7990, -0.2813, 0.4020]) tensor([0.7225, 0.0462, 0.0776, 0.1537]) -Greedy action tensor([-1.8527, -0.4293, 0.6197, -0.1420]) tensor([0.0444, 0.1842, 0.5259, 0.2455]) -Greedy action tensor([-1.8923, -0.4502, 0.6387, -0.1585]) tensor([0.0426, 0.1803, 0.5357, 0.2414]) -Greedy action tensor([-1.8845, -0.4362, 0.7029, -0.0650]) tensor([0.0405, 0.1722, 0.5378, 0.2495]) -Greedy action tensor([-1.9045, -0.3576, 0.6424, -0.1584]) tensor([0.0413, 0.1941, 0.5277, 0.2369]) -Greedy action tensor([-1.9257, -0.4448, 0.6594, -0.1708]) tensor([0.0409, 0.1799, 0.5426, 0.2366]) -Greedy action tensor([-1.3242, 0.4789, 0.3655, -0.1648]) tensor([0.0638, 0.3872, 0.3456, 0.2034]) -Greedy action tensor([-1.7483, -0.2832, 0.5836, -0.0627]) tensor([0.0476, 0.2059, 0.4899, 0.2567]) -Greedy action tensor([-1.8935, -0.3613, 0.6373, -0.1542]) tensor([0.0419, 0.1938, 0.5260, 0.2384]) -Greedy action tensor([-1.5809, 0.5646, 0.4749, -0.6759]) tensor([0.0504, 0.4310, 0.3940, 0.1246]) -Greedy action tensor([-0.9763, -0.3107, 0.2191, -0.1400]) tensor([0.1168, 0.2273, 0.3861, 0.2697]) -Greedy action tensor([-1.9390, -0.4467, 0.6679, -0.1769]) tensor([0.0403, 0.1791, 0.5460, 0.2346]) -Greedy action tensor([-1.7204, -0.4249, 0.6874, -0.0893]) tensor([0.0479, 0.1750, 0.5323, 0.2448]) -Greedy action tensor([-1.1581, -0.6062, 0.3305, -0.0111]) tensor([0.0969, 0.1683, 0.4295, 0.3052]) -Greedy action tensor([-1.8126, -0.4420, 0.6120, -0.0914]) tensor([0.0458, 0.1804, 0.5176, 0.2562]) -Greedy action tensor([-1.8823, -0.4450, 0.6380, -0.1533]) tensor([0.0430, 0.1808, 0.5341, 0.2421]) -Greedy action tensor([-1.7164, -0.2182, 0.5221, -0.1010]) tensor([0.0503, 0.2250, 0.4717, 0.2530]) -Greedy action tensor([-1.8904, -0.3616, 0.6304, -0.1538]) tensor([0.0421, 0.1944, 0.5242, 0.2393]) -Greedy action tensor([-1.4514, -0.3103, 0.4559, -0.0481]) tensor([0.0670, 0.2096, 0.4510, 0.2724]) -Greedy action tensor([-0.7636, -0.0359, 0.2695, -0.3184]) tensor([0.1344, 0.2782, 0.3776, 0.2098]) -Greedy action tensor([-1.6102, 0.3097, 0.5020, -0.2749]) tensor([0.0503, 0.3429, 0.4157, 0.1911]) -Greedy action tensor([-1.5002, 0.0835, 0.4805, 0.3401]) tensor([0.0515, 0.2509, 0.3732, 0.3243]) -Greedy action tensor([-1.8775, -0.4241, 0.6497, -0.1205]) tensor([0.0424, 0.1813, 0.5306, 0.2456]) -Greedy action tensor([-1.7117, -0.4073, 0.5463, -0.0377]) tensor([0.0511, 0.1882, 0.4884, 0.2724]) -Greedy action tensor([-1.6614, -0.4655, 0.5309, -0.0440]) tensor([0.0546, 0.1807, 0.4893, 0.2754]) -Greedy action tensor([-1.8437, -0.3718, 0.6092, -0.1524]) tensor([0.0446, 0.1945, 0.5187, 0.2422]) -Greedy action tensor([-1.8222, -0.4054, 0.5968, -0.1382]) tensor([0.0460, 0.1896, 0.5166, 0.2477]) -Greedy action tensor([-1.9094, -0.3928, 0.6482, -0.1645]) tensor([0.0413, 0.1884, 0.5335, 0.2367]) -Greedy action tensor([-1.8979, -0.4190, 0.6427, -0.1546]) tensor([0.0420, 0.1844, 0.5333, 0.2403]) -Greedy action tensor([-0.8813, 0.2332, 0.2949, -0.3573]) tensor([0.1114, 0.3395, 0.3611, 0.1881]) -Greedy action tensor([-1.1101, 0.3781, 0.3253, -0.2982]) tensor([0.0842, 0.3727, 0.3536, 0.1895]) -Greedy action tensor([-1.2935, -0.2462, -0.0051, -0.1137]) tensor([0.0932, 0.2656, 0.3380, 0.3032]) -Greedy action tensor([-1.6765, -0.4282, 0.5585, -0.0120]) tensor([0.0523, 0.1823, 0.4890, 0.2764]) -Greedy action tensor([-1.8648, -0.2774, 0.6181, -0.1292]) tensor([0.0425, 0.2078, 0.5088, 0.2410]) -Greedy action tensor([-1.8741, -0.3783, 0.6242, -0.1520]) tensor([0.0431, 0.1922, 0.5237, 0.2410]) -Greedy action tensor([-1.9435, -0.4495, 0.6669, -0.1800]) tensor([0.0402, 0.1790, 0.5465, 0.2343]) -Greedy action tensor([-1.5043, -0.0561, 0.3407, 0.0952]) tensor([0.0605, 0.2574, 0.3827, 0.2994]) -Greedy action tensor([-1.7194, -0.4864, 0.5420, -0.0454]) tensor([0.0516, 0.1772, 0.4957, 0.2755]) -Greedy action tensor([-1.7504, -0.4402, 0.5726, -0.0742]) tensor([0.0494, 0.1830, 0.5038, 0.2639]) -Greedy action tensor([-1.1168, -0.5852, 0.9895, 1.1028]) tensor([0.0497, 0.0846, 0.4084, 0.4574]) -Greedy action tensor([ 0.2670, -0.0458, 0.6764, 1.1992]) tensor([0.1731, 0.1266, 0.2607, 0.4397]) -Greedy action tensor([-1.9273, -0.4147, 0.6496, -0.1742]) tensor([0.0409, 0.1855, 0.5377, 0.2359]) -Greedy action tensor([-1.1875, 0.0216, 0.3287, -0.4180]) tensor([0.0904, 0.3028, 0.4117, 0.1951]) -Greedy action tensor([-1.8871, -0.4231, 0.6377, -0.1514]) tensor([0.0426, 0.1841, 0.5318, 0.2416]) -Greedy action tensor([-1.8895, -0.3753, 0.6362, -0.1350]) tensor([0.0420, 0.1908, 0.5246, 0.2426]) -Greedy action tensor([-1.6751, -0.3362, 0.6011, -0.0398]) tensor([0.0508, 0.1938, 0.4948, 0.2607]) -Greedy action tensor([-1.3885, 0.7125, -0.0272, -0.4989]) tensor([0.0645, 0.5270, 0.2515, 0.1569]) -Greedy action tensor([-1.9071, -0.4349, 0.1565, -0.3839]) tensor([0.0561, 0.2446, 0.4419, 0.2574]) -Greedy action tensor([-1.8543, -0.3078, 0.6042, -0.1224]) tensor([0.0434, 0.2038, 0.5074, 0.2453]) -Greedy action tensor([-0.2038, 1.0440, 0.0125, 0.1714]) tensor([0.1393, 0.4851, 0.1729, 0.2027]) -Greedy action tensor([-1.9244, -0.4215, 0.6539, -0.1650]) tensor([0.0409, 0.1836, 0.5382, 0.2373]) -Greedy action tensor([-1.5713, -0.1268, 0.5354, 0.0715]) tensor([0.0537, 0.2276, 0.4413, 0.2775]) -Greedy action tensor([-1.8646, -0.4503, 0.6677, -0.1045]) tensor([0.0425, 0.1750, 0.5352, 0.2473]) -Greedy action tensor([-1.9240, -0.4589, 0.6667, -0.1635]) tensor([0.0408, 0.1768, 0.5448, 0.2375]) -Greedy action tensor([-1.9200, -0.4176, 0.6537, -0.1678]) tensor([0.0410, 0.1843, 0.5380, 0.2366]) -Greedy action tensor([-1.7345, -0.0475, 0.5573, -0.0274]) tensor([0.0459, 0.2478, 0.4536, 0.2528]) -Greedy action tensor([-1.8758, -0.3778, 0.6257, -0.1483]) tensor([0.0429, 0.1920, 0.5236, 0.2415]) -Greedy action tensor([-1.6161, 0.0297, 0.4519, 0.0224]) tensor([0.0520, 0.2695, 0.4110, 0.2675]) -Greedy action tensor([-1.8001, -0.2852, 0.6265, -0.0791]) tensor([0.0445, 0.2025, 0.5040, 0.2489]) -Greedy action tensor([-1.3571, -0.5250, 0.3912, -0.0225]) tensor([0.0779, 0.1790, 0.4474, 0.2958]) -Greedy action tensor([-1.8125, 0.0745, 0.5242, -0.0777]) tensor([0.0423, 0.2795, 0.4382, 0.2400]) -Greedy action tensor([-1.9227, -0.4163, 0.6544, -0.1680]) tensor([0.0409, 0.1845, 0.5382, 0.2365]) -Greedy action tensor([-1.5655, -0.4960, 0.4845, 0.1048]) tensor([0.0588, 0.1714, 0.4570, 0.3127]) -Greedy action tensor([-1.9218, -0.4552, 0.6612, -0.1676]) tensor([0.0411, 0.1780, 0.5436, 0.2373]) -Greedy action tensor([-1.4292, -0.4016, 0.4387, 0.1498]) tensor([0.0661, 0.1848, 0.4283, 0.3208]) -Greedy action tensor([-1.7982, -0.4003, 0.6064, -0.0713]) tensor([0.0460, 0.1861, 0.5093, 0.2586]) -Greedy action tensor([-0.5211, 0.8345, -0.0431, 0.2545]) tensor([0.1154, 0.4477, 0.1862, 0.2507]) -Greedy action tensor([-1.8856, -0.4534, 0.6433, -0.1455]) tensor([0.0427, 0.1788, 0.5353, 0.2432]) -Greedy action tensor([-1.8814, -0.4080, 0.6273, -0.1347]) tensor([0.0428, 0.1866, 0.5254, 0.2452]) -Greedy action tensor([-1.7898, -0.4326, 0.6356, -0.0439]) tensor([0.0456, 0.1772, 0.5157, 0.2614]) -Greedy action tensor([-1.9427, -0.4476, 0.6667, -0.1797]) tensor([0.0402, 0.1792, 0.5462, 0.2343]) -Greedy action tensor([-1.8971, -0.4445, 0.6443, -0.1586]) tensor([0.0423, 0.1807, 0.5367, 0.2404]) -Greedy action tensor([-1.9247, -0.4494, 0.6589, -0.1695]) tensor([0.0410, 0.1792, 0.5428, 0.2371]) -Greedy action tensor([-0.8484, 0.5373, 0.1817, -0.0790]) tensor([0.1004, 0.4015, 0.2813, 0.2168]) -Greedy action tensor([-1.9326, -0.4224, 0.6590, -0.1742]) tensor([0.0405, 0.1834, 0.5409, 0.2351]) -Greedy action tensor([-1.8666, -0.3921, 0.6252, -0.1139]) tensor([0.0431, 0.1881, 0.5203, 0.2485]) -Greedy action tensor([-1.6511, -0.2163, 0.4283, 0.0142]) tensor([0.0541, 0.2271, 0.4328, 0.2860]) -Greedy action tensor([-1.7791, -0.0947, 0.5303, -0.0827]) tensor([0.0456, 0.2460, 0.4595, 0.2489]) -Greedy action tensor([-1.7656, -0.4387, 0.5603, -0.1446]) tensor([0.0498, 0.1879, 0.5102, 0.2521]) -Greedy action tensor([-1.8713, -0.3939, 0.6246, -0.1454]) tensor([0.0432, 0.1894, 0.5245, 0.2429]) -Greedy action tensor([-1.8283, -0.3879, 0.6477, -0.0799]) tensor([0.0437, 0.1847, 0.5202, 0.2513]) -Greedy action tensor([-1.6773, -0.1997, 0.4945, -0.0764]) tensor([0.0523, 0.2293, 0.4590, 0.2594]) -Greedy action tensor([ 0.8409, -0.4591, -0.1488, -0.4652]) tensor([0.5222, 0.1423, 0.1941, 0.1414]) -Greedy action tensor([ 1.1481, -0.6475, 0.0787, -0.5976]) tensor([0.5939, 0.0986, 0.2038, 0.1037]) -Greedy action tensor([ 0.5405, -0.2443, -0.0388, -0.2313]) tensor([0.4034, 0.1841, 0.2260, 0.1865]) -Greedy action tensor([ 1.0727, -0.8095, 0.0023, -0.3584]) tensor([0.5766, 0.0878, 0.1977, 0.1378]) -Greedy action tensor([ 0.6616, -0.3241, -0.0317, -0.4674]) tensor([0.4553, 0.1699, 0.2276, 0.1472]) -Greedy action tensor([ 1.0106, -0.6490, -0.0649, -0.3902]) tensor([0.5625, 0.1070, 0.1919, 0.1386]) -Greedy action tensor([ 0.5030, 0.0779, -0.1059, 0.0360]) tensor([0.3540, 0.2314, 0.1926, 0.2219]) -Greedy action tensor([ 0.4492, -0.1699, 0.0488, -0.0648]) tensor([0.3563, 0.1918, 0.2387, 0.2131]) -Greedy action tensor([ 0.8227, -0.6948, -0.0084, -0.2528]) tensor([0.5010, 0.1099, 0.2182, 0.1709]) -Greedy action tensor([ 0.9601, -0.7729, -0.0443, -0.6217]) tensor([0.5719, 0.1011, 0.2095, 0.1176]) -Greedy action tensor([ 0.9991, -1.0308, 0.0802, -0.5880]) tensor([0.5764, 0.0757, 0.2300, 0.1179]) -Greedy action tensor([ 0.7260, -0.4603, 0.0240, -0.1633]) tensor([0.4521, 0.1381, 0.2241, 0.1858]) -Greedy action tensor([ 0.5345, -0.1352, 0.2027, -0.4476]) tensor([0.3840, 0.1966, 0.2756, 0.1438]) -Greedy action tensor([ 0.6576, -0.3694, -0.0164, -0.1733]) tensor([0.4342, 0.1555, 0.2213, 0.1891]) -Greedy action tensor([ 1.0544, -0.5714, -0.0552, -0.7020]) tensor([0.5885, 0.1158, 0.1940, 0.1016]) -Greedy action tensor([ 0.8562, -0.5073, 0.0446, -0.5018]) tensor([0.5110, 0.1307, 0.2269, 0.1314]) -Greedy action tensor([ 1.1907, -0.6158, -0.0681, -0.5855]) tensor([0.6182, 0.1015, 0.1756, 0.1047]) -Greedy action tensor([ 0.6662, -0.4833, -0.0153, -0.1533]) tensor([0.4418, 0.1400, 0.2235, 0.1947]) -Greedy action tensor([ 0.6223, -0.7397, -0.1253, -0.3568]) tensor([0.4750, 0.1217, 0.2249, 0.1784]) -Greedy action tensor([ 0.3739, 0.3058, -0.1702, 0.1624]) tensor([0.3008, 0.2811, 0.1746, 0.2435]) -Greedy action tensor([ 0.7336, 0.1898, -0.1228, 0.1973]) tensor([0.3861, 0.2241, 0.1640, 0.2258]) -Greedy action tensor([ 1.0689, -0.5871, 0.0611, -0.5164]) tensor([0.5679, 0.1084, 0.2073, 0.1164]) -Greedy action tensor([ 0.6617, -0.3031, -0.0411, -0.3773]) tensor([0.4484, 0.1709, 0.2220, 0.1587]) -Greedy action tensor([ 0.8133, -0.7791, -0.0588, -0.2444]) tensor([0.5079, 0.1033, 0.2123, 0.1764]) -Greedy action tensor([ 0.6435, -0.4185, -0.0552, -0.5446]) tensor([0.4656, 0.1610, 0.2315, 0.1419]) -Greedy action tensor([ 0.8560, -0.6478, 0.2701, -0.3320]) tensor([0.4799, 0.1067, 0.2671, 0.1463]) -Greedy action tensor([ 0.9215, -0.6988, -0.0480, -0.5527]) tensor([0.5537, 0.1095, 0.2100, 0.1268]) -Greedy action tensor([ 1.0853, -0.5303, -0.0804, -0.3902]) tensor([0.5750, 0.1143, 0.1792, 0.1315]) -Greedy action tensor([ 1.1119, -0.7715, -0.0988, -0.4215]) tensor([0.6003, 0.0913, 0.1789, 0.1295]) -Greedy action tensor([ 1.2126, -0.7436, 0.0230, -0.3232]) tensor([0.6020, 0.0851, 0.1832, 0.1296]) -Greedy action tensor([ 0.3591, -0.2202, -0.1228, -0.0981]) tensor([0.3557, 0.1993, 0.2197, 0.2252]) -Greedy action tensor([ 0.9291, -0.7378, 0.1334, -0.5752]) tensor([0.5370, 0.1014, 0.2423, 0.1193]) -Greedy action tensor([ 0.6456, -0.3044, -0.0015, -0.2868]) tensor([0.4341, 0.1679, 0.2273, 0.1708]) -Greedy action tensor([ 0.7097, -0.3833, 0.0642, -0.4567]) tensor([0.4606, 0.1544, 0.2415, 0.1435]) -Greedy action tensor([ 0.8452, -0.5046, -0.1533, -0.3066]) tensor([0.5145, 0.1334, 0.1895, 0.1626]) -Greedy action tensor([ 0.8772, -0.5996, -0.0012, -0.3918]) tensor([0.5195, 0.1186, 0.2158, 0.1460]) -Greedy action tensor([ 0.8672, -0.5950, -0.0340, -0.1970]) tensor([0.5043, 0.1169, 0.2048, 0.1740]) -Greedy action tensor([ 1.0584, -0.7414, 0.0813, -0.4488]) tensor([0.5671, 0.0938, 0.2135, 0.1256]) -Greedy action tensor([ 0.2182, 0.0655, -0.1571, -0.1578]) tensor([0.3094, 0.2656, 0.2126, 0.2124]) -Greedy action tensor([ 0.8443, -0.5260, -0.0191, -0.4340]) tensor([0.5117, 0.1300, 0.2158, 0.1425]) -Greedy action tensor([ 1.1309, -0.9504, 0.0302, -0.8082]) tensor([0.6245, 0.0779, 0.2077, 0.0898]) -Greedy action tensor([ 0.8672, -0.4584, 0.0424, -0.3301]) tensor([0.4985, 0.1324, 0.2185, 0.1506]) -Greedy action tensor([ 0.4213, -0.2638, 0.0135, -0.4867]) tensor([0.3887, 0.1959, 0.2586, 0.1568]) -Greedy action tensor([ 1.1281, -0.7984, -0.1654, -0.4216]) tensor([0.6127, 0.0892, 0.1680, 0.1301]) -Greedy action tensor([ 0.7672, -0.4638, -0.1730, -0.4444]) tensor([0.5050, 0.1475, 0.1972, 0.1503]) -Greedy action tensor([ 1.0655, -0.8063, 0.0265, -0.5082]) tensor([0.5831, 0.0897, 0.2063, 0.1209]) -Greedy action tensor([ 0.9978, -1.0004, -0.1554, -0.4763]) tensor([0.5952, 0.0807, 0.1878, 0.1363]) -Greedy action tensor([ 0.7975, -0.6253, 0.1136, -0.3597]) tensor([0.4854, 0.1170, 0.2450, 0.1526]) -Greedy action tensor([ 0.2813, 0.3909, -0.1302, 0.0250]) tensor([0.2815, 0.3141, 0.1865, 0.2178]) -Greedy action tensor([ 1.1540, -0.6598, 0.1093, -0.3173]) tensor([0.5733, 0.0935, 0.2017, 0.1316]) -Greedy action tensor([ 1.0716, -0.4681, -0.0654, -0.2236]) tensor([0.5528, 0.1185, 0.1773, 0.1514]) -Greedy action tensor([ 0.6314, -0.2960, -0.0840, -0.1693]) tensor([0.4285, 0.1695, 0.2095, 0.1924]) -Greedy action tensor([ 0.5928, -0.2637, -0.1999, -0.0528]) tensor([0.4164, 0.1768, 0.1885, 0.2183]) -Greedy action tensor([ 0.3877, 0.1005, -0.0285, 0.0349]) tensor([0.3213, 0.2411, 0.2119, 0.2258]) -Greedy action tensor([ 0.7375, -0.5295, -0.0291, -0.4782]) tensor([0.4895, 0.1379, 0.2274, 0.1452]) -Greedy action tensor([ 0.8268, -0.5114, 0.0128, -0.3773]) tensor([0.4987, 0.1308, 0.2209, 0.1496]) -Greedy action tensor([ 0.8102, -0.5022, -0.1075, -0.4686]) tensor([0.5136, 0.1383, 0.2052, 0.1430]) -Greedy action tensor([ 0.9166, -0.5137, 0.0326, -0.2415]) tensor([0.5085, 0.1217, 0.2101, 0.1597]) -Greedy action tensor([ 1.2309, -0.5891, 0.0125, -0.5947]) tensor([0.6177, 0.1001, 0.1827, 0.0995]) -Greedy action tensor([ 1.1389, -0.5512, 0.1142, -0.4992]) tensor([0.5755, 0.1062, 0.2065, 0.1118]) -Greedy action tensor([ 0.9349, -0.3534, -0.0446, -0.2110]) tensor([0.5078, 0.1400, 0.1907, 0.1615]) -Greedy action tensor([ 0.5928, -0.3982, -0.1695, -0.3509]) tensor([0.4491, 0.1667, 0.2095, 0.1748]) -Greedy action tensor([ 0.7748, -0.4929, 0.1233, -0.4054]) tensor([0.4739, 0.1334, 0.2470, 0.1456]) -Greedy action tensor([ 0.6985, -1.0044, -0.1598, -0.3688]) tensor([0.5128, 0.0934, 0.2174, 0.1764]) -Greedy action tensor([ 0.9160, -1.0475, 0.1997, -0.4175]) tensor([0.5284, 0.0742, 0.2582, 0.1393]) -Greedy action tensor([ 0.6676, -0.4444, -0.1092, -0.2936]) tensor([0.4606, 0.1515, 0.2118, 0.1761]) -Greedy action tensor([ 0.7695, -0.4912, -0.0874, -0.3217]) tensor([0.4893, 0.1387, 0.2077, 0.1643]) -Greedy action tensor([ 0.8836, -0.5349, -0.0564, -0.1984]) tensor([0.5072, 0.1228, 0.1981, 0.1719]) -Greedy action tensor([ 0.7372, -0.6949, 0.1310, -0.7345]) tensor([0.4966, 0.1186, 0.2708, 0.1140]) -Greedy action tensor([ 0.7677, -0.7389, -0.0080, -0.5004]) tensor([0.5093, 0.1129, 0.2345, 0.1433]) -Greedy action tensor([ 0.4879, -0.1025, -0.1720, -0.0484]) tensor([0.3765, 0.2086, 0.1946, 0.2202]) -Greedy action tensor([ 0.9477, -0.5491, 0.0761, -0.6226]) tensor([0.5405, 0.1210, 0.2261, 0.1124]) -Greedy action tensor([ 0.5864, -0.5240, -0.2264, -0.3369]) tensor([0.4608, 0.1518, 0.2044, 0.1830]) -Greedy action tensor([ 0.8009, -0.4563, 0.0779, -0.2106]) tensor([0.4687, 0.1333, 0.2275, 0.1705]) -Greedy action tensor([ 1.0114, -0.6618, -0.0398, -0.6726]) tensor([0.5805, 0.1089, 0.2029, 0.1077]) -Greedy action tensor([ 0.9413, -0.5513, 0.2876, -0.8056]) tensor([0.5210, 0.1171, 0.2710, 0.0908]) -Greedy action tensor([ 1.1692, -0.7026, 0.0875, -0.6435]) tensor([0.6038, 0.0929, 0.2047, 0.0986]) -Greedy action tensor([ 0.5921, -0.3039, 0.2542, -0.4519]) tensor([0.4043, 0.1650, 0.2884, 0.1423]) -Greedy action tensor([ 0.9261, -0.5955, -0.1175, -0.3666]) tensor([0.5420, 0.1183, 0.1909, 0.1488]) -Greedy action tensor([ 0.6868, -0.0860, 0.0451, -0.1079]) tensor([0.4099, 0.1892, 0.2157, 0.1851]) -Greedy action tensor([ 0.9517, -0.8288, -0.0573, -0.2954]) tensor([0.5493, 0.0926, 0.2003, 0.1578]) -Greedy action tensor([ 0.7187, -0.4633, 0.3664, 1.0354]) tensor([0.2957, 0.0907, 0.2079, 0.4058]) -Greedy action tensor([-0.6352, -1.2724, 0.4481, -0.4884]) tensor([0.1773, 0.0937, 0.5237, 0.2053]) -Greedy action tensor([ 0.0471, -1.0108, -0.1116, -0.1119]) tensor([0.3275, 0.1137, 0.2794, 0.2794]) -Greedy action tensor([ 0.3133, -0.6742, -0.2359, 0.1250]) tensor([0.3599, 0.1341, 0.2078, 0.2981]) -Greedy action tensor([-0.8789, -0.6712, -0.4142, 1.0543]) tensor([0.0932, 0.1147, 0.1483, 0.6439]) -Greedy action tensor([ 0.1555, -0.0442, -0.2145, -0.2616]) tensor([0.3156, 0.2585, 0.2180, 0.2080]) -Greedy action tensor([ 0.4510, -1.0225, 0.2851, 0.3138]) tensor([0.3392, 0.0777, 0.2873, 0.2957]) -Greedy action tensor([-1.1911, -0.9381, -1.8164, -0.1239]) tensor([0.1745, 0.2247, 0.0934, 0.5074]) -Greedy action tensor([-0.2106, 0.4502, -0.1229, 0.2616]) tensor([0.1776, 0.3438, 0.1938, 0.2847]) -Greedy action tensor([ 0.9567, -1.0651, 0.7432, -0.6712]) tensor([0.4681, 0.0620, 0.3781, 0.0919]) -Greedy action tensor([ 0.0727, -0.1638, -0.2488, -0.9094]) tensor([0.3461, 0.2732, 0.2510, 0.1296]) -Greedy action tensor([-0.0856, 0.2170, -1.0256, 0.3046]) tensor([0.2369, 0.3206, 0.0925, 0.3500]) -Greedy action tensor([ 1.3809, -0.3598, 0.7420, 0.5860]) tensor([0.4641, 0.0814, 0.2450, 0.2096]) -Greedy action tensor([ 0.0822, 0.5341, -0.0475, -0.0331]) tensor([0.2304, 0.3620, 0.2024, 0.2053]) -Greedy action tensor([ 0.1440, 0.1990, -0.1056, -0.0061]) tensor([0.2705, 0.2858, 0.2108, 0.2328]) -Greedy action tensor([ 0.9970, -0.4482, 0.3327, 0.3690]) tensor([0.4378, 0.1032, 0.2253, 0.2336]) -Greedy action tensor([-0.1398, -0.7865, 0.3442, -0.1378]) tensor([0.2411, 0.1263, 0.3911, 0.2415]) -Greedy action tensor([ 0.3697, -0.4019, -1.0300, 0.1702]) tensor([0.3956, 0.1829, 0.0976, 0.3240]) -Greedy action tensor([-0.4880, -1.4033, 0.5722, -0.9465]) tensor([0.2033, 0.0814, 0.5868, 0.1285]) -Greedy action tensor([ 0.6023, -0.4601, -0.0791, -0.4582]) tensor([0.4550, 0.1573, 0.2302, 0.1576]) -Greedy action tensor([ 0.9355, -0.2981, -0.3373, 1.3037]) tensor([0.3315, 0.0966, 0.0928, 0.4791]) -Greedy action tensor([-0.5521, -0.3455, 0.8423, -1.0232]) tensor([0.1452, 0.1785, 0.5856, 0.0907]) -Greedy action tensor([ 0.8314, 0.4532, 0.0566, -0.0616]) tensor([0.3914, 0.2681, 0.1803, 0.1602]) -Greedy action tensor([ 0.5981, -0.2019, 0.1738, 0.6885]) tensor([0.3127, 0.1405, 0.2046, 0.3423]) -Greedy action tensor([ 0.1613, -1.1749, 0.1411, 0.5549]) tensor([0.2684, 0.0706, 0.2631, 0.3979]) -Greedy action tensor([ 0.2245, -0.7329, 0.0486, -0.5084]) tensor([0.3699, 0.1420, 0.3103, 0.1778]) -Greedy action tensor([-1.0603, -0.7377, -0.7622, -0.1761]) tensor([0.1626, 0.2245, 0.2191, 0.3937]) -Greedy action tensor([ 0.7831, -0.4948, 0.3600, -0.4498]) tensor([0.4494, 0.1252, 0.2944, 0.1310]) -Greedy action tensor([ 0.8540, -1.2418, 0.2378, -0.0854]) tensor([0.4869, 0.0599, 0.2629, 0.1903]) -Greedy action tensor([ 0.7148, 0.3946, -0.7757, -0.2651]) tensor([0.4298, 0.3120, 0.0968, 0.1613]) -Greedy action tensor([ 0.7453, 0.3904, 0.5280, -0.2805]) tensor([0.3491, 0.2448, 0.2809, 0.1252]) -Greedy action tensor([-0.7863, -0.4804, -0.0056, 0.3070]) tensor([0.1329, 0.1804, 0.2901, 0.3966]) -Greedy action tensor([ 0.1374, -1.9599, 0.4545, 0.0671]) tensor([0.2917, 0.0358, 0.4006, 0.2719]) -Greedy action tensor([ 0.0704, -0.0033, -0.8028, -0.8946]) tensor([0.3666, 0.3406, 0.1531, 0.1397]) -Greedy action tensor([ 0.5989, -0.1080, -0.2040, -0.0383]) tensor([0.4049, 0.1997, 0.1814, 0.2141]) -Greedy action tensor([ 1.5902, 0.0311, 0.3094, -0.2201]) tensor([0.6054, 0.1273, 0.1682, 0.0991]) -Greedy action tensor([-0.6263, -0.9893, -0.9854, -0.6404]) tensor([0.2959, 0.2058, 0.2066, 0.2917]) -Greedy action tensor([-0.5589, -0.1347, 0.2844, -0.6941]) tensor([0.1746, 0.2669, 0.4059, 0.1525]) -Greedy action tensor([ 0.3088, -0.1390, 0.3692, -0.6585]) tensor([0.3245, 0.2074, 0.3447, 0.1234]) -Greedy action tensor([-0.0318, -0.9353, 0.4214, 0.5884]) tensor([0.2067, 0.0837, 0.3252, 0.3843]) -Greedy action tensor([-0.1214, -0.1237, -0.1533, -0.6000]) tensor([0.2789, 0.2782, 0.2701, 0.1728]) -Greedy action tensor([ 0.2393, -0.1455, -0.3194, -0.9164]) tensor([0.3895, 0.2651, 0.2228, 0.1226]) -Greedy action tensor([ 0.3439, -0.8146, 0.1047, 0.0541]) tensor([0.3509, 0.1102, 0.2763, 0.2626]) -Greedy action tensor([ 0.5222, 0.0519, -0.4057, -0.3998]) tensor([0.4136, 0.2584, 0.1635, 0.1645]) -Greedy action tensor([ 0.1089, -0.2619, 0.3789, -0.3966]) tensor([0.2775, 0.1915, 0.3635, 0.1674]) -Greedy action tensor([ 1.1635, -0.6314, 0.8673, 1.5987]) tensor([0.2894, 0.0481, 0.2152, 0.4473]) -Greedy action tensor([ 0.2093, -0.2852, 0.5291, -0.5041]) tensor([0.2876, 0.1754, 0.3960, 0.1409]) -Greedy action tensor([-1.1748, -0.8602, -0.0290, -0.2107]) tensor([0.1229, 0.1683, 0.3865, 0.3223]) -Greedy action tensor([-0.2360, -0.1157, -0.2074, 0.3623]) tensor([0.2010, 0.2267, 0.2068, 0.3656]) -Greedy action tensor([ 0.1648, 0.0190, -0.2211, -0.8181]) tensor([0.3426, 0.2962, 0.2330, 0.1282]) -Greedy action tensor([-1.0311, -0.7094, 0.4049, -0.7460]) tensor([0.1264, 0.1743, 0.5312, 0.1681]) -Greedy action tensor([ 0.5586, 0.0575, -0.0261, -0.2575]) tensor([0.3838, 0.2325, 0.2139, 0.1697]) -Greedy action tensor([ 0.6179, -0.2033, 0.3113, 0.5466]) tensor([0.3218, 0.1416, 0.2369, 0.2997]) -Greedy action tensor([1.1013, 0.0453, 0.6544, 0.0717]) tensor([0.4265, 0.1484, 0.2728, 0.1523]) -Greedy action tensor([ 0.5018, -0.7087, 0.9611, 0.6480]) tensor([0.2476, 0.0738, 0.3920, 0.2866]) -Greedy action tensor([ 1.1926, -0.7774, 0.6989, 0.3286]) tensor([0.4606, 0.0642, 0.2811, 0.1941]) -Greedy action tensor([-1.1179, 0.3208, 0.9567, -0.6549]) tensor([0.0677, 0.2855, 0.5392, 0.1076]) -Greedy action tensor([ 0.9059, -0.5375, -0.5764, -0.5668]) tensor([0.5908, 0.1395, 0.1342, 0.1355]) -Greedy action tensor([ 0.3460, -1.4092, -0.0669, 0.1920]) tensor([0.3715, 0.0642, 0.2458, 0.3185]) -Greedy action tensor([-0.0724, 0.2587, 0.4795, -0.6968]) tensor([0.2144, 0.2985, 0.3723, 0.1148]) -Greedy action tensor([-0.4018, 0.4367, 0.3847, -0.6690]) tensor([0.1594, 0.3686, 0.3500, 0.1220]) -Greedy action tensor([-0.6492, -1.1247, -0.0512, 0.0987]) tensor([0.1801, 0.1119, 0.3275, 0.3805]) -Greedy action tensor([ 0.1540, -0.8125, 0.5132, -0.3659]) tensor([0.2935, 0.1117, 0.4204, 0.1745]) -Greedy action tensor([ 0.3901, 0.0960, 0.7405, -0.0923]) tensor([0.2644, 0.1970, 0.3753, 0.1632]) -Greedy action tensor([-0.7586, -0.7611, -0.9831, -0.5876]) tensor([0.2511, 0.2504, 0.2006, 0.2979]) -Greedy action tensor([ 0.2534, 0.7409, -0.7397, -0.7653]) tensor([0.2976, 0.4846, 0.1103, 0.1075]) -Greedy action tensor([-0.8739, -0.0366, -0.0299, 0.0545]) tensor([0.1225, 0.2829, 0.2848, 0.3099]) -Greedy action tensor([-0.7590, -0.6873, 0.7868, -1.1991]) tensor([0.1350, 0.1450, 0.6332, 0.0869]) -Greedy action tensor([-0.5389, -0.4718, 0.1641, -0.5284]) tensor([0.1961, 0.2097, 0.3961, 0.1982]) -Greedy action tensor([-1.2125, -0.5444, 0.4477, -0.8257]) tensor([0.1033, 0.2014, 0.5433, 0.1520]) -Greedy action tensor([-0.1836, 0.0984, -0.8349, -0.2931]) tensor([0.2671, 0.3542, 0.1393, 0.2394]) -Greedy action tensor([-0.4544, -0.1981, -0.1370, -0.0849]) tensor([0.1956, 0.2527, 0.2686, 0.2830]) -Greedy action tensor([ 0.9892, -0.5996, 0.0389, -0.1201]) tensor([0.5207, 0.1063, 0.2013, 0.1717]) -Greedy action tensor([-1.0083, 0.2893, 0.5204, -0.7388]) tensor([0.0945, 0.3459, 0.4358, 0.1237]) -Greedy action tensor([-0.5420, -1.0694, -0.8816, -0.6137]) tensor([0.3093, 0.1825, 0.2202, 0.2879]) -Greedy action tensor([ 0.3776, -0.0115, 0.4058, -0.2339]) tensor([0.3078, 0.2086, 0.3166, 0.1670]) -Greedy action tensor([-0.0984, -0.2626, -0.4418, 0.1588]) tensor([0.2597, 0.2203, 0.1842, 0.3358]) -Greedy action tensor([-0.5055, -0.9672, 0.6237, -0.7977]) tensor([0.1828, 0.1152, 0.5655, 0.1365]) -Greedy action tensor([-0.2339, -0.3378, -0.5995, -1.5412]) tensor([0.3490, 0.3145, 0.2421, 0.0944]) -Greedy action tensor([ 0.7221, -0.5290, 1.0141, 1.1108]) tensor([0.2439, 0.0698, 0.3266, 0.3598]) -Greedy action tensor([-0.1530, 0.4528, -0.5571, -0.3047]) tensor([0.2294, 0.4204, 0.1531, 0.1971]) -Greedy action tensor([ 1.4166, -0.3904, -1.0602, 0.5809]) tensor([0.5946, 0.0976, 0.0500, 0.2578]) -Greedy action tensor([ 0.9562, -0.1795, -0.4964, 0.5876]) tensor([0.4451, 0.1429, 0.1041, 0.3079]) -Greedy action tensor([ 1.3863, -0.5780, -0.0088, 0.2939]) tensor([0.5802, 0.0814, 0.1438, 0.1946]) -Greedy action tensor([ 0.9346, -0.3343, -0.1523, 0.0639]) tensor([0.4909, 0.1380, 0.1656, 0.2055]) -Greedy action tensor([ 2.0430, -1.5496, -0.1989, 0.3784]) tensor([0.7558, 0.0208, 0.0803, 0.1431]) -Greedy action tensor([ 1.4327, -1.1730, -0.3128, -0.0090]) tensor([0.6734, 0.0497, 0.1175, 0.1593]) -Greedy action tensor([ 1.9941, -0.5594, -0.8914, 0.2403]) tensor([0.7653, 0.0595, 0.0427, 0.1325]) -Greedy action tensor([ 2.0272, -0.6506, -0.9592, 0.4862]) tensor([0.7500, 0.0515, 0.0379, 0.1606]) -Greedy action tensor([ 1.5859, 0.1884, -0.5619, -0.0211]) tensor([0.6392, 0.1580, 0.0746, 0.1281]) -Greedy action tensor([ 2.3376, -1.3121, -0.3080, 0.6859]) tensor([0.7760, 0.0202, 0.0551, 0.1488]) -Greedy action tensor([ 0.9738, -0.5276, 0.2667, -0.1427]) tensor([0.4894, 0.1091, 0.2413, 0.1602]) -Greedy action tensor([ 2.9968, -1.0939, -0.1406, 0.6398]) tensor([0.8659, 0.0145, 0.0376, 0.0820]) -Greedy action tensor([ 2.2520, -0.4123, -0.3049, 0.6503]) tensor([0.7414, 0.0516, 0.0575, 0.1494]) -Greedy action tensor([ 1.9289, -0.7782, -0.1473, 0.0187]) tensor([0.7462, 0.0498, 0.0936, 0.1105]) -Greedy action tensor([ 1.6089, -0.6939, -0.5335, 0.2658]) tensor([0.6764, 0.0676, 0.0794, 0.1766]) -Greedy action tensor([ 1.5588, -0.2294, -0.4924, 0.2975]) tensor([0.6333, 0.1059, 0.0814, 0.1794]) -Greedy action tensor([ 1.6340, -0.1466, -0.8080, 0.2865]) tensor([0.6599, 0.1112, 0.0574, 0.1715]) -Greedy action tensor([ 1.9750, -0.2476, -0.2241, 0.4537]) tensor([0.6956, 0.0753, 0.0771, 0.1519]) -Greedy action tensor([ 0.8127, -0.0858, 0.0398, 0.2292]) tensor([0.4121, 0.1678, 0.1902, 0.2299]) -Greedy action tensor([ 1.6816, -0.9961, -0.5058, 0.1975]) tensor([0.7104, 0.0488, 0.0797, 0.1611]) -Greedy action tensor([ 2.0477, -1.3702, 0.0575, 0.3006]) tensor([0.7442, 0.0244, 0.1017, 0.1297]) -Greedy action tensor([ 1.5610, -0.9202, 0.1175, 0.8114]) tensor([0.5579, 0.0467, 0.1317, 0.2637]) -Greedy action tensor([ 1.7737, -0.6149, -0.6354, 0.0296]) tensor([0.7372, 0.0676, 0.0663, 0.1289]) -Greedy action tensor([ 1.3694, -0.4862, 0.0475, 0.9323]) tensor([0.4833, 0.0756, 0.1289, 0.3122]) -Greedy action tensor([ 2.2126, -1.0484, -0.5226, 0.7017]) tensor([0.7553, 0.0290, 0.0490, 0.1667]) -Greedy action tensor([ 1.2072, -0.2643, 0.0885, 0.3928]) tensor([0.5002, 0.1148, 0.1634, 0.2215]) -Greedy action tensor([ 0.9548, 0.1852, -1.0792, 0.6324]) tensor([0.4313, 0.1998, 0.0564, 0.3124]) -Greedy action tensor([ 2.4045, 0.6375, -0.0281, -0.0622]) tensor([0.7443, 0.1272, 0.0654, 0.0632]) -Greedy action tensor([ 1.1391, -0.4539, -0.2010, 0.2132]) tensor([0.5373, 0.1092, 0.1407, 0.2128]) -Greedy action tensor([ 1.2817, -0.3554, -0.4975, 0.6626]) tensor([0.5258, 0.1023, 0.0887, 0.2831]) -Greedy action tensor([ 1.2902, -0.2068, -0.9267, 0.0663]) tensor([0.6147, 0.1376, 0.0670, 0.1808]) -Greedy action tensor([ 1.4586, -0.7816, -0.3252, 0.3024]) tensor([0.6293, 0.0670, 0.1057, 0.1980]) -Greedy action tensor([ 1.8383, -0.6801, -0.2676, 0.0048]) tensor([0.7341, 0.0592, 0.0894, 0.1174]) -Greedy action tensor([ 1.4332, -0.7045, -0.6882, 0.2342]) tensor([0.6497, 0.0766, 0.0779, 0.1959]) -Greedy action tensor([ 1.4014, -0.5475, -0.3094, 0.5343]) tensor([0.5736, 0.0817, 0.1037, 0.2410]) -Greedy action tensor([ 1.8049, -0.6769, -0.0842, 0.7296]) tensor([0.6345, 0.0530, 0.0959, 0.2165]) -Greedy action tensor([ 1.3320, -0.1426, -0.3624, 0.2770]) tensor([0.5679, 0.1300, 0.1043, 0.1978]) -Greedy action tensor([ 2.3577, -0.9439, -0.1125, 0.6732]) tensor([0.7652, 0.0282, 0.0647, 0.1420]) -Greedy action tensor([ 1.1947, -0.0682, -0.4420, 0.1181]) tensor([0.5500, 0.1556, 0.1070, 0.1874]) -Greedy action tensor([ 1.7775, -0.0320, -0.4438, -0.0709]) tensor([0.6994, 0.1145, 0.0759, 0.1102]) -Greedy action tensor([ 1.5401, -0.6182, -0.3035, 0.0876]) tensor([0.6632, 0.0766, 0.1050, 0.1552]) -Greedy action tensor([ 1.6126, -0.4871, -0.5131, 0.0825]) tensor([0.6857, 0.0840, 0.0818, 0.1485]) -Greedy action tensor([ 0.9759, -0.1006, -0.1491, 0.0478]) tensor([0.4853, 0.1654, 0.1575, 0.1918]) -Greedy action tensor([ 1.8388, -0.3113, -0.5793, 0.1058]) tensor([0.7234, 0.0843, 0.0644, 0.1279]) -Greedy action tensor([ 1.4129, -0.3736, -0.4337, 0.3255]) tensor([0.6015, 0.1008, 0.0949, 0.2028]) -Greedy action tensor([ 2.0007, -0.8712, -0.2340, 0.6790]) tensor([0.6991, 0.0396, 0.0748, 0.1865]) -Greedy action tensor([ 1.4350, -0.1851, -0.9660, 0.2536]) tensor([0.6268, 0.1240, 0.0568, 0.1923]) -Greedy action tensor([ 0.8955, -0.5860, -0.1582, -0.1943]) tensor([0.5229, 0.1189, 0.1823, 0.1759]) -Greedy action tensor([ 2.1518, -1.0267, -0.2183, 0.5001]) tensor([0.7537, 0.0314, 0.0704, 0.1445]) -Greedy action tensor([ 1.5852, -0.9770, -0.3412, 0.6849]) tensor([0.6138, 0.0473, 0.0894, 0.2495]) -Greedy action tensor([ 1.3250, -0.0128, -1.5080, 0.4158]) tensor([0.5800, 0.1522, 0.0341, 0.2337]) -Greedy action tensor([ 0.9992, -0.2573, -0.0991, 0.3260]) tensor([0.4699, 0.1338, 0.1567, 0.2397]) -Greedy action tensor([ 1.7467, -0.3651, -0.8950, 0.2940]) tensor([0.7012, 0.0849, 0.0500, 0.1640]) -Greedy action tensor([ 2.5863, 0.9004, 0.1286, -0.2975]) tensor([0.7537, 0.1396, 0.0645, 0.0421]) -Greedy action tensor([ 0.9846, -0.3711, -0.3321, 0.5081]) tensor([0.4658, 0.1201, 0.1248, 0.2893]) -Greedy action tensor([ 1.8217, -0.8073, -0.5127, 0.2963]) tensor([0.7212, 0.0520, 0.0699, 0.1569]) -Greedy action tensor([ 1.2358, -0.4786, -0.5086, 0.2898]) tensor([0.5737, 0.1033, 0.1003, 0.2228]) -Greedy action tensor([ 1.2075, -0.4768, -0.9503, 0.2161]) tensor([0.5980, 0.1110, 0.0691, 0.2219]) -Greedy action tensor([ 1.4798, -0.3806, -1.0130, 0.2629]) tensor([0.6517, 0.1014, 0.0539, 0.1930]) -Greedy action tensor([ 1.3815, -0.5297, -0.6969, 0.4622]) tensor([0.5981, 0.0885, 0.0748, 0.2385]) -Greedy action tensor([ 1.3504, -0.6484, -0.3641, 0.6641]) tensor([0.5498, 0.0745, 0.0990, 0.2768]) -Greedy action tensor([ 1.3620, 0.2303, -0.8311, 0.4793]) tensor([0.5412, 0.1745, 0.0604, 0.2239]) -Greedy action tensor([ 1.5544, -0.6561, -0.8464, 0.2224]) tensor([0.6830, 0.0749, 0.0619, 0.1803]) -Greedy action tensor([ 1.4227, -0.6978, -0.3768, 0.5106]) tensor([0.5928, 0.0711, 0.0980, 0.2381]) -Greedy action tensor([ 1.1468, -0.3039, 0.0817, 0.2179]) tensor([0.5066, 0.1187, 0.1746, 0.2001]) -Greedy action tensor([ 1.4648, -0.5911, -0.5717, 0.1678]) tensor([0.6528, 0.0835, 0.0852, 0.1784]) -Greedy action tensor([ 1.2083, -0.2229, -1.1672, 0.9146]) tensor([0.4813, 0.1151, 0.0448, 0.3589]) -Greedy action tensor([ 1.1273, -0.5709, -0.2342, 0.0164]) tensor([0.5654, 0.1035, 0.1449, 0.1862]) -Greedy action tensor([ 1.2499, -0.8191, -0.0984, 0.3017]) tensor([0.5639, 0.0712, 0.1464, 0.2185]) -Greedy action tensor([ 1.0998, -0.4158, -0.1332, 0.3043]) tensor([0.5096, 0.1119, 0.1485, 0.2300]) -Greedy action tensor([ 1.3251, -0.6400, -0.2395, 0.4028]) tensor([0.5724, 0.0802, 0.1197, 0.2276]) -Greedy action tensor([ 1.3815, -0.5868, -0.2756, 0.2002]) tensor([0.6108, 0.0853, 0.1165, 0.1874]) -Greedy action tensor([ 1.0655, -0.6277, 0.1084, -0.0738]) tensor([0.5297, 0.0974, 0.2034, 0.1695]) -Greedy action tensor([ 1.1497, -0.1388, -0.4152, 0.6501]) tensor([0.4781, 0.1318, 0.1000, 0.2901]) -Greedy action tensor([ 2.1538, 0.4195, -0.0472, -0.0302]) tensor([0.7144, 0.1261, 0.0791, 0.0804]) -Greedy action tensor([ 2.1531, 0.1514, -0.0473, 0.5927]) tensor([0.6869, 0.0928, 0.0761, 0.1443]) -Greedy action tensor([ 1.3895, -0.6516, -0.3090, 0.0412]) tensor([0.6359, 0.0826, 0.1163, 0.1651]) -Greedy action tensor([ 2.3363, -0.8158, -0.6284, 0.7465]) tensor([0.7702, 0.0329, 0.0397, 0.1571]) -Greedy action tensor([ 1.1658, -0.2094, -0.1518, 0.0545]) tensor([0.5406, 0.1367, 0.1448, 0.1779]) -Greedy action tensor([ 1.2692, -0.0361, -1.5465, 0.3880]) tensor([0.5730, 0.1553, 0.0343, 0.2374]) -Greedy action tensor([ 1.5398, -0.4497, -0.6461, 0.2841]) tensor([0.6519, 0.0892, 0.0733, 0.1857]) -Greedy action tensor([-1.4397, -0.5800, 0.4102, 0.0590]) tensor([0.0704, 0.1664, 0.4479, 0.3153]) -Greedy action tensor([-1.1135, -0.5882, 0.2856, 0.1159]) tensor([0.0984, 0.1664, 0.3987, 0.3365]) -Greedy action tensor([-1.4485, 0.5410, 0.3015, 0.1790]) tensor([0.0522, 0.3817, 0.3004, 0.2657]) -Greedy action tensor([-1.9224, -0.4191, 0.6553, -0.1672]) tensor([0.0409, 0.1839, 0.5386, 0.2366]) -Greedy action tensor([-1.8977, -0.4365, 0.6446, -0.1568]) tensor([0.0422, 0.1817, 0.5357, 0.2404]) -Greedy action tensor([-1.9359, -0.3918, 0.6515, -0.1770]) tensor([0.0403, 0.1890, 0.5364, 0.2343]) -Greedy action tensor([-1.9346, -0.4448, 0.6626, -0.1747]) tensor([0.0405, 0.1798, 0.5441, 0.2356]) -Greedy action tensor([-1.0575, -0.5041, 0.5121, 0.3448]) tensor([0.0861, 0.1498, 0.4139, 0.3501]) -Greedy action tensor([-1.4886, 0.2657, 0.5197, -0.4796]) tensor([0.0589, 0.3405, 0.4390, 0.1616]) -Greedy action tensor([-1.7072, -0.5061, 0.5584, -0.0023]) tensor([0.0514, 0.1708, 0.4952, 0.2827]) -Greedy action tensor([-0.5787, -0.2157, 0.5534, 0.9215]) tensor([0.0998, 0.1434, 0.3095, 0.4473]) -Greedy action tensor([-1.5009, -0.4991, 0.4575, 0.0571]) tensor([0.0643, 0.1750, 0.4555, 0.3052]) -Greedy action tensor([-1.8022, -0.3762, 0.6326, -0.0474]) tensor([0.0447, 0.1862, 0.5105, 0.2586]) -Greedy action tensor([-1.9006, -0.3437, 0.6425, -0.1441]) tensor([0.0412, 0.1956, 0.5244, 0.2388]) -Greedy action tensor([-1.9073, -0.4386, 0.6453, -0.1604]) tensor([0.0418, 0.1816, 0.5368, 0.2398]) -Greedy action tensor([-0.9092, -0.5898, 0.2378, 0.0916]) tensor([0.1213, 0.1669, 0.3819, 0.3299]) -Greedy action tensor([-1.9460, -0.4570, 0.6751, -0.1803]) tensor([0.0400, 0.1771, 0.5494, 0.2336]) -Greedy action tensor([-1.8986, -0.3813, 0.6475, -0.1505]) tensor([0.0416, 0.1895, 0.5302, 0.2387]) -Greedy action tensor([-0.8659, 0.7552, 0.1003, 0.0239]) tensor([0.0899, 0.4549, 0.2363, 0.2189]) -Greedy action tensor([-1.8293, -0.4863, 0.6028, -0.1201]) tensor([0.0460, 0.1762, 0.5236, 0.2541]) -Greedy action tensor([-1.4655, -0.5205, 0.4651, -0.0373]) tensor([0.0683, 0.1758, 0.4709, 0.2850]) -Greedy action tensor([-0.7370, 1.0030, 0.0566, 0.3541]) tensor([0.0841, 0.4793, 0.1860, 0.2505]) -Greedy action tensor([-1.8702, -0.4566, 0.6239, -0.1287]) tensor([0.0436, 0.1793, 0.5282, 0.2489]) -Greedy action tensor([-1.6338, 0.1560, 0.6862, -0.5594]) tensor([0.0498, 0.2980, 0.5064, 0.1457]) -Greedy action tensor([-1.9046, -0.4461, 0.6452, -0.1651]) tensor([0.0420, 0.1807, 0.5380, 0.2393]) -Greedy action tensor([-1.2199, 0.7800, 0.2566, 0.0266]) tensor([0.0616, 0.4548, 0.2695, 0.2141]) -Greedy action tensor([-1.8594, -0.3524, 0.6250, -0.1293]) tensor([0.0432, 0.1950, 0.5181, 0.2437]) -Greedy action tensor([-1.0262, 0.4837, 0.1859, -0.0232]) tensor([0.0861, 0.3898, 0.2894, 0.2348]) -Greedy action tensor([-1.8047, -0.3832, 0.5876, -0.1265]) tensor([0.0466, 0.1933, 0.5103, 0.2498]) -Greedy action tensor([ 0.6197, 0.8580, -0.2759, 0.4224]) tensor([0.2859, 0.3628, 0.1167, 0.2347]) -Greedy action tensor([-1.2358, 0.7281, 0.1707, 0.2032]) tensor([0.0609, 0.4339, 0.2485, 0.2567]) -Greedy action tensor([-1.9409, -0.4553, 0.6731, -0.1762]) tensor([0.0401, 0.1773, 0.5481, 0.2344]) -Greedy action tensor([-1.6243, 0.1799, 0.3964, 0.0587]) tensor([0.0500, 0.3037, 0.3772, 0.2691]) -Greedy action tensor([-1.6861, -0.4034, 0.5520, -0.0222]) tensor([0.0519, 0.1872, 0.4867, 0.2741]) -Greedy action tensor([-0.0516, -0.6357, 0.6568, 0.5443]) tensor([0.1851, 0.1032, 0.3759, 0.3359]) -Greedy action tensor([-1.6094, -0.4285, 0.4967, -0.0154]) tensor([0.0575, 0.1872, 0.4723, 0.2830]) -Greedy action tensor([-1.0935, 0.4305, 0.4204, 0.2963]) tensor([0.0707, 0.3244, 0.3212, 0.2837]) -Greedy action tensor([-1.1548, -0.2540, 0.3467, -0.1034]) tensor([0.0925, 0.2277, 0.4151, 0.2647]) -Greedy action tensor([-1.9337, -0.4360, 0.6624, -0.1743]) tensor([0.0405, 0.1811, 0.5432, 0.2353]) -Greedy action tensor([-1.9179, -0.4045, 0.6542, -0.1644]) tensor([0.0410, 0.1861, 0.5364, 0.2366]) -Greedy action tensor([-1.8234, -0.4311, 0.6145, -0.1258]) tensor([0.0456, 0.1835, 0.5220, 0.2490]) -Greedy action tensor([-1.7614, -0.4268, 0.6045, -0.1126]) tensor([0.0484, 0.1839, 0.5158, 0.2518]) -Greedy action tensor([-1.8929, -0.3928, 0.6297, -0.1482]) tensor([0.0423, 0.1894, 0.5265, 0.2419]) -Greedy action tensor([-1.8919, -0.4456, 0.6439, -0.1561]) tensor([0.0425, 0.1804, 0.5362, 0.2409]) -Greedy action tensor([-1.8877, -0.4227, 0.6344, -0.1563]) tensor([0.0427, 0.1847, 0.5315, 0.2411]) -Greedy action tensor([-0.5792, 0.6124, 0.0389, 0.0061]) tensor([0.1259, 0.4145, 0.2336, 0.2260]) -Greedy action tensor([-1.8801, -0.2056, 0.5904, -0.1597]) tensor([0.0421, 0.2247, 0.4980, 0.2352]) -Greedy action tensor([-1.9185, -0.4403, 0.6533, -0.1683]) tensor([0.0413, 0.1810, 0.5402, 0.2375]) -Greedy action tensor([-1.8876, -0.4253, 0.6240, -0.1480]) tensor([0.0429, 0.1850, 0.5281, 0.2441]) -Greedy action tensor([-1.4715, -0.2771, 0.4674, 0.1731]) tensor([0.0609, 0.2009, 0.4230, 0.3152]) -Greedy action tensor([-1.9469, -0.4541, 0.6686, -0.1824]) tensor([0.0401, 0.1783, 0.5478, 0.2339]) -Greedy action tensor([-1.5596, -0.0588, 0.4434, 0.0156]) tensor([0.0564, 0.2530, 0.4181, 0.2725]) -Greedy action tensor([-1.7057, -0.0460, 0.4957, -0.0394]) tensor([0.0486, 0.2554, 0.4390, 0.2571]) -Greedy action tensor([-1.9039, -0.4269, 0.6439, -0.1617]) tensor([0.0419, 0.1835, 0.5354, 0.2392]) -Greedy action tensor([-1.8435, -0.4644, 0.6220, -0.0867]) tensor([0.0444, 0.1762, 0.5223, 0.2571]) -Greedy action tensor([-0.7222, 0.8843, 0.1629, -0.1857]) tensor([0.0988, 0.4927, 0.2395, 0.1690]) -Greedy action tensor([-1.7465, -0.4674, 0.5726, -0.0861]) tensor([0.0499, 0.1795, 0.5078, 0.2628]) -Greedy action tensor([-1.9204, -0.4566, 0.6737, -0.1529]) tensor([0.0407, 0.1760, 0.5449, 0.2384]) -Greedy action tensor([-1.8816, -0.3448, 0.6175, -0.1355]) tensor([0.0425, 0.1974, 0.5168, 0.2434]) -Greedy action tensor([-1.7420, -0.3704, 0.6660, 0.1670]) tensor([0.0439, 0.1729, 0.4874, 0.2959]) -Greedy action tensor([-1.8629, -0.1708, 0.5985, -0.1348]) tensor([0.0420, 0.2284, 0.4929, 0.2367]) -Greedy action tensor([-1.6675, -0.1411, 0.4655, -0.0233]) tensor([0.0520, 0.2394, 0.4392, 0.2694]) -Greedy action tensor([-1.8651, -0.4540, 0.6352, -0.1379]) tensor([0.0436, 0.1790, 0.5319, 0.2455]) -Greedy action tensor([-1.8821, -0.4518, 0.6390, -0.1524]) tensor([0.0430, 0.1797, 0.5349, 0.2424]) -Greedy action tensor([-1.9187, -0.4444, 0.6479, -0.1662]) tensor([0.0414, 0.1808, 0.5390, 0.2388]) -Greedy action tensor([-1.8286, -0.3309, 0.5956, -0.0998]) tensor([0.0446, 0.1996, 0.5042, 0.2515]) -Greedy action tensor([-0.8325, -0.0907, 0.5789, 0.5385]) tensor([0.0898, 0.1885, 0.3682, 0.3536]) -Greedy action tensor([-1.6799, -0.4600, 0.5291, -0.0230]) tensor([0.0534, 0.1808, 0.4860, 0.2798]) -Greedy action tensor([-1.8321, -0.3641, 0.5722, -0.1114]) tensor([0.0455, 0.1973, 0.5032, 0.2540]) -Greedy action tensor([-1.8511, -0.4043, 0.6380, -0.0876]) tensor([0.0432, 0.1837, 0.5209, 0.2521]) -Greedy action tensor([-1.8017, -0.3024, 0.6028, -0.0749]) tensor([0.0451, 0.2020, 0.4994, 0.2536]) -Greedy action tensor([-1.8864, -0.3822, 0.6318, -0.1499]) tensor([0.0424, 0.1908, 0.5260, 0.2407]) -Greedy action tensor([0.0386, 1.2363, 0.0518, 0.3624]) tensor([0.1491, 0.4938, 0.1510, 0.2061]) -Greedy action tensor([-1.7889, -0.4033, 0.6304, -0.0958]) tensor([0.0461, 0.1844, 0.5186, 0.2509]) -Greedy action tensor([-1.8571, -0.4588, 0.6271, -0.1398]) tensor([0.0442, 0.1791, 0.5304, 0.2463]) -Greedy action tensor([-0.9055, -0.5335, 0.2396, 0.0641]) tensor([0.1215, 0.1763, 0.3819, 0.3204]) -Greedy action tensor([-1.4956, 0.0589, 0.3875, -0.0092]) tensor([0.0598, 0.2829, 0.3930, 0.2643]) -Greedy action tensor([-1.8869, -0.4492, 0.6423, -0.1515]) tensor([0.0427, 0.1798, 0.5355, 0.2421]) -Greedy action tensor([-1.6757, 0.0090, 0.4768, -0.0138]) tensor([0.0493, 0.2660, 0.4247, 0.2600]) -Greedy action tensor([-1.6464, -0.4651, 0.5203, -0.0824]) tensor([0.0563, 0.1834, 0.4914, 0.2689]) -Greedy action tensor([-0.7031, 0.7307, 0.0709, 0.2205]) tensor([0.1012, 0.4245, 0.2195, 0.2549]) -Greedy action tensor([ 0.6461, -0.4703, -0.0898, -0.4752]) tensor([0.4689, 0.1536, 0.2247, 0.1528]) -Greedy action tensor([ 0.9265, -0.0878, -0.0448, -0.1035]) tensor([0.4766, 0.1728, 0.1804, 0.1701]) -Greedy action tensor([ 0.7626, -0.3561, -0.0158, -0.3884]) tensor([0.4757, 0.1554, 0.2184, 0.1505]) -Greedy action tensor([ 0.8290, -0.7869, -0.1635, -0.2186]) tensor([0.5208, 0.1035, 0.1930, 0.1827]) -Greedy action tensor([ 0.3372, 0.0601, -0.2898, -0.0414]) tensor([0.3359, 0.2546, 0.1794, 0.2300]) -Greedy action tensor([ 0.4939, -0.0358, 0.0765, -0.1097]) tensor([0.3579, 0.2107, 0.2357, 0.1957]) -Greedy action tensor([ 0.9354, -0.7337, 0.0196, -0.4416]) tensor([0.5432, 0.1023, 0.2174, 0.1371]) -Greedy action tensor([ 0.8704, -0.3690, -0.1721, -0.3422]) tensor([0.5156, 0.1493, 0.1818, 0.1534]) -Greedy action tensor([ 0.9275, -0.7065, 0.1355, -0.3555]) tensor([0.5194, 0.1014, 0.2353, 0.1440]) -Greedy action tensor([ 1.0113, -0.5109, -0.1453, -0.3496]) tensor([0.5589, 0.1220, 0.1758, 0.1433]) -Greedy action tensor([ 0.9714, -0.5948, -0.0639, -0.6224]) tensor([0.5659, 0.1182, 0.2010, 0.1150]) -Greedy action tensor([ 0.7590, -0.6448, -0.0244, -0.2574]) tensor([0.4844, 0.1190, 0.2213, 0.1753]) -Greedy action tensor([ 1.1293, -0.8721, 0.0729, -0.6254]) tensor([0.6039, 0.0816, 0.2100, 0.1045]) -Greedy action tensor([ 1.2906, -0.6412, -0.1055, -0.6176]) tensor([0.6490, 0.0940, 0.1607, 0.0963]) -Greedy action tensor([ 0.4425, -0.2294, 0.0313, -0.2086]) tensor([0.3710, 0.1895, 0.2460, 0.1935]) -Greedy action tensor([ 0.5555, 0.1391, -0.0652, 0.0914]) tensor([0.3539, 0.2334, 0.1902, 0.2225]) -Greedy action tensor([ 0.9600, -0.4003, 0.0362, -0.2186]) tensor([0.5099, 0.1308, 0.2024, 0.1569]) -Greedy action tensor([ 0.8457, -0.9655, -0.0373, -0.4447]) tensor([0.5399, 0.0883, 0.2233, 0.1486]) -Greedy action tensor([ 0.8183, -0.5074, -0.0407, -0.4676]) tensor([0.5087, 0.1351, 0.2155, 0.1406]) -Greedy action tensor([ 0.5247, -0.3877, 0.0271, -0.4913]) tensor([0.4216, 0.1693, 0.2564, 0.1527]) -Greedy action tensor([ 0.9824, -0.4406, -0.1085, -0.5157]) tensor([0.5554, 0.1338, 0.1866, 0.1242]) -Greedy action tensor([ 0.9689, -0.8800, 0.1071, -0.5771]) tensor([0.5577, 0.0878, 0.2356, 0.1189]) -Greedy action tensor([ 1.1397, -1.0823, 0.1282, -0.6982]) tensor([0.6130, 0.0665, 0.2230, 0.0976]) -Greedy action tensor([ 0.5896, -0.0923, -0.1978, -0.3463]) tensor([0.4250, 0.2149, 0.1934, 0.1667]) -Greedy action tensor([ 0.4670, -0.3676, 0.0610, -0.4435]) tensor([0.3996, 0.1734, 0.2662, 0.1608]) -Greedy action tensor([ 0.5845, -0.0576, -0.0665, -0.1346]) tensor([0.3945, 0.2076, 0.2057, 0.1922]) -Greedy action tensor([ 0.2933, -0.1633, -0.0168, -0.1121]) tensor([0.3296, 0.2088, 0.2418, 0.2198]) -Greedy action tensor([ 0.7673, -0.3878, -0.1288, -0.1159]) tensor([0.4680, 0.1474, 0.1910, 0.1935]) -Greedy action tensor([ 0.3070, 0.1341, -0.0499, -0.0388]) tensor([0.3078, 0.2589, 0.2154, 0.2178]) -Greedy action tensor([ 0.7690, -0.3559, -0.1269, -0.1422]) tensor([0.4684, 0.1521, 0.1912, 0.1883]) -Greedy action tensor([0.1936, 0.0788, 0.1027, 0.4971]) tensor([0.2404, 0.2143, 0.2195, 0.3257]) -Greedy action tensor([ 0.5221, 0.0176, -0.1067, -0.1724]) tensor([0.3793, 0.2290, 0.2023, 0.1894]) -Greedy action tensor([ 1.1617, -0.5670, 0.0480, -0.7277]) tensor([0.6035, 0.1071, 0.1982, 0.0912]) -Greedy action tensor([ 1.1102, -1.0164, 0.0992, -0.7519]) tensor([0.6103, 0.0728, 0.2221, 0.0948]) -Greedy action tensor([ 1.0239, -0.3814, 0.1160, -0.3111]) tensor([0.5231, 0.1283, 0.2110, 0.1376]) -Greedy action tensor([ 0.8433, -0.5478, -0.0424, -0.3150]) tensor([0.5063, 0.1260, 0.2088, 0.1590]) -Greedy action tensor([ 0.6212, -0.0329, -0.1629, -0.5196]) tensor([0.4355, 0.2264, 0.1988, 0.1392]) -Greedy action tensor([ 0.7276, -0.5853, -0.0018, -0.2581]) tensor([0.4707, 0.1266, 0.2270, 0.1757]) -Greedy action tensor([ 1.1194, -0.6943, 0.0884, -0.4341]) tensor([0.5776, 0.0942, 0.2060, 0.1222]) -Greedy action tensor([ 0.7492, -0.4242, 0.1377, -0.1562]) tensor([0.4432, 0.1371, 0.2405, 0.1792]) -Greedy action tensor([ 0.7270, -0.5140, -0.0049, -0.2877]) tensor([0.4689, 0.1356, 0.2255, 0.1700]) -Greedy action tensor([ 0.6531, -0.2162, 0.1129, -0.2526]) tensor([0.4156, 0.1742, 0.2421, 0.1680]) -Greedy action tensor([ 0.7741, -0.6004, -0.1091, -0.2689]) tensor([0.4953, 0.1253, 0.2048, 0.1746]) -Greedy action tensor([ 1.0469, -0.3768, -0.0267, -0.2279]) tensor([0.5370, 0.1293, 0.1836, 0.1501]) -Greedy action tensor([ 1.0995, -1.1686, 0.1097, -0.5903]) tensor([0.6025, 0.0624, 0.2239, 0.1112]) -Greedy action tensor([ 0.7793, -0.7450, 0.3111, -0.9245]) tensor([0.4936, 0.1075, 0.3091, 0.0898]) -Greedy action tensor([ 0.7435, -0.1845, -0.0392, -0.1260]) tensor([0.4402, 0.1740, 0.2013, 0.1845]) -Greedy action tensor([ 0.8280, -0.2014, -0.1457, -0.3389]) tensor([0.4887, 0.1746, 0.1846, 0.1522]) -Greedy action tensor([ 1.1902, -0.6672, -0.0061, -0.4449]) tensor([0.6048, 0.0944, 0.1828, 0.1179]) -Greedy action tensor([ 0.8122, 0.0433, 0.0380, -0.4507]) tensor([0.4530, 0.2100, 0.2089, 0.1281]) -Greedy action tensor([ 0.5168, -0.2037, -0.0790, -0.2620]) tensor([0.4006, 0.1949, 0.2207, 0.1838]) -Greedy action tensor([ 0.9915, -0.8837, -0.1531, -0.3445]) tensor([0.5765, 0.0884, 0.1835, 0.1516]) -Greedy action tensor([ 0.8754, -0.6136, -0.1377, -0.2987]) tensor([0.5269, 0.1189, 0.1913, 0.1629]) -Greedy action tensor([ 1.0152, -0.7386, -0.1022, -0.4597]) tensor([0.5783, 0.1001, 0.1892, 0.1323]) -Greedy action tensor([ 0.6455, -0.0726, -0.0346, -0.0084]) tensor([0.3977, 0.1940, 0.2015, 0.2068]) -Greedy action tensor([ 0.7520, -0.4094, 0.0035, -0.2602]) tensor([0.4652, 0.1456, 0.2201, 0.1691]) -Greedy action tensor([ 0.8638, -0.4312, 0.1112, -0.2695]) tensor([0.4838, 0.1325, 0.2279, 0.1558]) -Greedy action tensor([ 0.7502, -0.8747, 0.1296, -0.3853]) tensor([0.4864, 0.0958, 0.2615, 0.1563]) -Greedy action tensor([ 0.9266, -0.7484, -0.0139, -0.4827]) tensor([0.5488, 0.1028, 0.2143, 0.1341]) -Greedy action tensor([ 0.3844, -0.2315, -0.0593, -0.1850]) tensor([0.3639, 0.1966, 0.2335, 0.2059]) -Greedy action tensor([ 0.6658, -0.1876, -0.0693, -0.2871]) tensor([0.4365, 0.1859, 0.2093, 0.1683]) -Greedy action tensor([ 0.5654, -0.4789, -0.0962, -0.3286]) tensor([0.4392, 0.1546, 0.2266, 0.1796]) -Greedy action tensor([ 1.0281, -0.6565, 0.1873, -0.3262]) tensor([0.5333, 0.0989, 0.2301, 0.1377]) -Greedy action tensor([ 0.7222, -0.2390, 0.0358, -0.0932]) tensor([0.4295, 0.1643, 0.2162, 0.1900]) -Greedy action tensor([ 0.5373, -0.1046, -0.0724, 0.0232]) tensor([0.3748, 0.1973, 0.2037, 0.2242]) -Greedy action tensor([ 0.5675, 0.0540, 0.0350, -0.0578]) tensor([0.3676, 0.2199, 0.2158, 0.1967]) -Greedy action tensor([ 0.8141, -0.2673, -0.1055, -0.1403]) tensor([0.4711, 0.1597, 0.1878, 0.1814]) -Greedy action tensor([ 0.3544, -0.2466, 0.0484, -0.4524]) tensor([0.3662, 0.2008, 0.2696, 0.1634]) -Greedy action tensor([ 0.8164, -0.4688, -0.1025, -0.3726]) tensor([0.5050, 0.1397, 0.2015, 0.1538]) -Greedy action tensor([ 1.1629, -0.6727, -0.0198, -0.5163]) tensor([0.6051, 0.0965, 0.1855, 0.1129]) -Greedy action tensor([ 1.1349, -0.6149, -0.1505, -0.5312]) tensor([0.6100, 0.1060, 0.1687, 0.1153]) -Greedy action tensor([ 1.2042, -0.5926, -0.1195, -0.5554]) tensor([0.6234, 0.1034, 0.1659, 0.1073]) -Greedy action tensor([ 0.9207, -0.4736, 0.0592, -0.3278]) tensor([0.5109, 0.1267, 0.2159, 0.1466]) -Greedy action tensor([ 0.6557, -0.1780, -0.1338, -0.0788]) tensor([0.4223, 0.1834, 0.1917, 0.2026]) -Greedy action tensor([ 0.6683, -0.4507, -0.0981, -0.1043]) tensor([0.4438, 0.1450, 0.2062, 0.2050]) -Greedy action tensor([ 1.3441, -1.0068, 0.0631, -0.6397]) tensor([0.6620, 0.0631, 0.1839, 0.0910]) -Greedy action tensor([ 0.5139, -0.1733, -0.0630, -0.2412]) tensor([0.3945, 0.1984, 0.2216, 0.1854]) -Greedy action tensor([ 0.5067, -0.2067, 0.0818, -0.1010]) tensor([0.3720, 0.1823, 0.2432, 0.2026]) -Greedy action tensor([ 0.7154, -0.3512, -0.1520, -0.4575]) tensor([0.4823, 0.1660, 0.2026, 0.1492]) -Greedy action tensor([ 0.8926, -0.3447, 0.0259, -0.2886]) tensor([0.4957, 0.1438, 0.2084, 0.1521]) -Greedy action tensor([ 0.5969, -0.4673, -0.4323, -0.5527]) tensor([0.4953, 0.1709, 0.1770, 0.1569]) -Greedy action tensor([ 0.6966, -0.4416, -0.8573, 1.1977]) tensor([0.3142, 0.1007, 0.0664, 0.5187]) -Greedy action tensor([ 0.4295, -0.9039, 0.7246, 0.0379]) tensor([0.3046, 0.0803, 0.4092, 0.2059]) -Greedy action tensor([-0.2827, -1.4036, -0.4260, 0.4067]) tensor([0.2389, 0.0779, 0.2071, 0.4761]) -Greedy action tensor([-0.4520, -0.0360, -0.5134, -0.3966]) tensor([0.2216, 0.3359, 0.2084, 0.2342]) -Greedy action tensor([ 0.3090, -1.0794, -0.5316, -0.6266]) tensor([0.4823, 0.1203, 0.2081, 0.1892]) -Greedy action tensor([ 1.2648, -0.5437, 0.7397, -0.8985]) tensor([0.5347, 0.0876, 0.3162, 0.0615]) -Greedy action tensor([-0.1252, -0.4425, -0.6465, -0.5953]) tensor([0.3393, 0.2471, 0.2015, 0.2121]) -Greedy action tensor([ 0.3113, 0.0522, 0.9331, -0.2319]) tensor([0.2373, 0.1831, 0.4418, 0.1378]) -Greedy action tensor([ 0.1728, -0.0694, 0.1642, -0.1520]) tensor([0.2858, 0.2243, 0.2833, 0.2065]) -Greedy action tensor([ 0.8009, 0.2940, 0.6194, -0.2827]) tensor([0.3604, 0.2171, 0.3006, 0.1219]) -Greedy action tensor([ 0.0409, -0.8851, -0.7429, 0.0877]) tensor([0.3447, 0.1366, 0.1574, 0.3613]) -Greedy action tensor([-0.7090, -0.7709, 0.4500, -0.3722]) tensor([0.1532, 0.1440, 0.4882, 0.2146]) -Greedy action tensor([-0.7784, 0.0023, -0.5766, -0.1229]) tensor([0.1579, 0.3447, 0.1932, 0.3042]) -Greedy action tensor([ 0.7270, -0.2757, -0.5154, 0.1001]) tensor([0.4567, 0.1675, 0.1318, 0.2440]) -Greedy action tensor([ 0.2967, -0.5234, -0.0161, -0.4158]) tensor([0.3756, 0.1654, 0.2747, 0.1842]) -Greedy action tensor([ 0.3977, -1.6276, -0.4333, -0.0029]) tensor([0.4469, 0.0590, 0.1947, 0.2994]) -Greedy action tensor([-0.1241, 0.2574, 0.2285, -0.6515]) tensor([0.2233, 0.3271, 0.3178, 0.1318]) -Greedy action tensor([ 0.5909, -0.2431, -0.0953, 0.9717]) tensor([0.2940, 0.1277, 0.1480, 0.4303]) -Greedy action tensor([ 0.7859, 0.4668, 0.4697, -0.2489]) tensor([0.3557, 0.2585, 0.2593, 0.1264]) -Greedy action tensor([ 0.2992, -0.6682, -0.1246, 0.8096]) tensor([0.2702, 0.1027, 0.1769, 0.4502]) -Greedy action tensor([ 1.0704, -0.0758, -0.4660, 0.3487]) tensor([0.4953, 0.1574, 0.1066, 0.2407]) -Greedy action tensor([ 0.5955, -0.7014, 2.0988, -0.2346]) tensor([0.1611, 0.0440, 0.7246, 0.0703]) -Greedy action tensor([-0.5385, -0.3491, -0.4422, -0.5073]) tensor([0.2303, 0.2784, 0.2536, 0.2377]) -Greedy action tensor([ 0.0126, -1.8336, 0.0917, -0.2751]) tensor([0.3344, 0.0528, 0.3620, 0.2508]) -Greedy action tensor([ 0.3938, 0.4048, 0.0113, -0.6031]) tensor([0.3266, 0.3302, 0.2228, 0.1205]) -Greedy action tensor([-0.4074, 0.3162, 0.5438, -0.1374]) tensor([0.1437, 0.2962, 0.3719, 0.1882]) -Greedy action tensor([-0.5443, -0.2262, 0.3414, -0.5718]) tensor([0.1733, 0.2381, 0.4201, 0.1685]) -Greedy action tensor([-0.0918, -0.5676, 0.0311, -0.6379]) tensor([0.3002, 0.1865, 0.3394, 0.1739]) -Greedy action tensor([-0.5355, -0.3723, 1.0069, -0.3481]) tensor([0.1241, 0.1461, 0.5802, 0.1496]) -Greedy action tensor([-0.4031, 0.4380, -1.0100, -0.2979]) tensor([0.2010, 0.4661, 0.1096, 0.2233]) -Greedy action tensor([ 0.5173, -0.7088, -0.1856, -0.1296]) tensor([0.4325, 0.1269, 0.2141, 0.2265]) -Greedy action tensor([ 1.2335, -0.7376, 0.9089, -0.1958]) tensor([0.4758, 0.0663, 0.3439, 0.1140]) -Greedy action tensor([ 0.4469, -0.7988, -0.5265, -0.2431]) tensor([0.4614, 0.1328, 0.1743, 0.2315]) -Greedy action tensor([-0.9718, -0.7610, 0.4735, -0.5496]) tensor([0.1250, 0.1543, 0.5302, 0.1906]) -Greedy action tensor([ 1.5916, -0.6047, -0.3815, 0.2792]) tensor([0.6581, 0.0732, 0.0915, 0.1772]) -Greedy action tensor([ 0.3795, -0.2214, -0.2491, -0.2604]) tensor([0.3833, 0.2102, 0.2044, 0.2021]) -Greedy action tensor([-1.0004, -0.1213, -0.4012, -0.3265]) tensor([0.1391, 0.3350, 0.2532, 0.2728]) -Greedy action tensor([-0.4176, -0.3385, -0.7678, 0.2128]) tensor([0.2143, 0.2320, 0.1510, 0.4026]) -Greedy action tensor([ 0.7428, -0.7127, -0.0558, 0.0039]) tensor([0.4628, 0.1080, 0.2082, 0.2210]) -Greedy action tensor([ 0.1068, -0.8373, 1.1606, -0.6354]) tensor([0.2113, 0.0822, 0.6060, 0.1006]) -Greedy action tensor([-0.4630, -0.6874, -0.5487, -0.2693]) tensor([0.2544, 0.2033, 0.2335, 0.3088]) -Greedy action tensor([-1.1042, -0.5614, -0.0136, -0.6672]) tensor([0.1380, 0.2375, 0.4108, 0.2137]) -Greedy action tensor([ 0.1536, 0.1918, 0.0662, -0.0798]) tensor([0.2669, 0.2773, 0.2445, 0.2113]) -Greedy action tensor([-0.2750, -0.6454, 1.0679, -0.6074]) tensor([0.1603, 0.1107, 0.6140, 0.1150]) -Greedy action tensor([-0.7054, -0.9054, -0.6097, 0.7433]) tensor([0.1393, 0.1141, 0.1533, 0.5933]) -Greedy action tensor([-0.0545, -0.9079, 0.4360, -0.0777]) tensor([0.2477, 0.1055, 0.4046, 0.2421]) -Greedy action tensor([-0.4053, 0.1198, -0.4038, -0.8694]) tensor([0.2314, 0.3913, 0.2318, 0.1455]) -Greedy action tensor([-0.4081, -1.2545, 0.2413, 0.2467]) tensor([0.1898, 0.0814, 0.3634, 0.3654]) -Greedy action tensor([-0.7857, 1.1188, 0.1242, -0.3346]) tensor([0.0850, 0.5706, 0.2111, 0.1334]) -Greedy action tensor([-0.1971, -0.0025, 0.2455, -0.8221]) tensor([0.2322, 0.2821, 0.3614, 0.1243]) -Greedy action tensor([ 1.4770, -0.7229, -1.1093, 0.7855]) tensor([0.5928, 0.0657, 0.0446, 0.2969]) -Greedy action tensor([ 0.3175, 0.2274, 0.6064, -0.4317]) tensor([0.2687, 0.2456, 0.3587, 0.1270]) -Greedy action tensor([-0.3232, -0.2756, -0.4276, -1.0954]) tensor([0.2931, 0.3074, 0.2640, 0.1354]) -Greedy action tensor([-0.3239, -1.5013, 0.2363, -0.0412]) tensor([0.2280, 0.0702, 0.3992, 0.3025]) -Greedy action tensor([-0.0802, -1.2422, -0.1954, -0.1839]) tensor([0.3220, 0.1007, 0.2870, 0.2903]) -Greedy action tensor([ 0.4778, -0.5062, 0.0064, 0.3244]) tensor([0.3502, 0.1309, 0.2185, 0.3004]) -Greedy action tensor([ 1.3659, -0.3774, 0.4946, 0.1020]) tensor([0.5331, 0.0933, 0.2230, 0.1506]) -Greedy action tensor([ 0.6412, -0.8601, 0.0834, -0.0525]) tensor([0.4357, 0.0971, 0.2494, 0.2178]) -Greedy action tensor([ 0.4233, 0.0777, -0.1279, 0.7145]) tensor([0.2761, 0.1954, 0.1591, 0.3694]) -Greedy action tensor([ 0.3440, -0.9411, -0.2347, 1.2623]) tensor([0.2303, 0.0637, 0.1291, 0.5769]) -Greedy action tensor([-0.7218, 0.4960, -0.4237, -0.2088]) tensor([0.1352, 0.4569, 0.1821, 0.2258]) -Greedy action tensor([-0.7865, -0.9577, 0.4113, -1.7527]) tensor([0.1806, 0.1522, 0.5984, 0.0687]) -Greedy action tensor([ 0.6399, 0.0660, 0.8415, -0.4936]) tensor([0.3217, 0.1812, 0.3935, 0.1036]) -Greedy action tensor([-0.4192, 0.0456, 0.7350, -0.7732]) tensor([0.1547, 0.2462, 0.4906, 0.1086]) -Greedy action tensor([-0.3502, 0.7713, -0.3856, -0.5316]) tensor([0.1704, 0.5230, 0.1645, 0.1421]) -Greedy action tensor([ 0.2861, -1.6853, 0.4201, 1.8140]) tensor([0.1451, 0.0202, 0.1659, 0.6687]) -Greedy action tensor([ 0.7045, -0.1567, 0.2216, -0.0233]) tensor([0.3964, 0.1675, 0.2446, 0.1915]) -Greedy action tensor([-1.4308, -1.2817, 0.1869, 0.3010]) tensor([0.0778, 0.0903, 0.3922, 0.4396]) -Greedy action tensor([-0.0278, -0.5928, 0.8616, 0.3396]) tensor([0.1836, 0.1044, 0.4469, 0.2652]) -Greedy action tensor([-0.6708, 0.3044, 0.7964, -0.3766]) tensor([0.1072, 0.2842, 0.4648, 0.1438]) -Greedy action tensor([-0.3264, -1.1331, -0.2820, -0.0109]) tensor([0.2589, 0.1155, 0.2706, 0.3549]) -Greedy action tensor([-0.5907, 0.2565, 0.9151, -1.0334]) tensor([0.1179, 0.2750, 0.5314, 0.0757]) -Greedy action tensor([-0.2293, 0.0913, -0.4670, 0.4296]) tensor([0.1961, 0.2702, 0.1546, 0.3790]) -Greedy action tensor([-0.2717, -0.1638, 0.1149, -0.6698]) tensor([0.2349, 0.2616, 0.3457, 0.1578]) -Greedy action tensor([-0.2844, -0.3500, 0.6218, -0.2696]) tensor([0.1843, 0.1726, 0.4561, 0.1870]) -Greedy action tensor([ 0.4027, 0.3459, -0.5096, -0.9245]) tensor([0.3829, 0.3618, 0.1538, 0.1016]) -Greedy action tensor([ 0.6694, -0.9101, 0.6668, 0.8541]) tensor([0.2936, 0.0605, 0.2928, 0.3531]) -Greedy action tensor([-0.2328, -0.0068, -0.6805, -0.4964]) tensor([0.2731, 0.3424, 0.1746, 0.2099]) -Greedy action tensor([ 1.5059, -0.1742, 0.2305, 0.9995]) tensor([0.4835, 0.0901, 0.1350, 0.2914]) -Greedy action tensor([-1.3118, -0.4199, -0.0864, -0.0424]) tensor([0.0961, 0.2345, 0.3273, 0.3420]) -Greedy action tensor([-0.7870, -0.9698, -0.8591, -0.9060]) tensor([0.2739, 0.2281, 0.2548, 0.2432]) -Greedy action tensor([ 2.0181, -1.0367, -0.0321, 0.5457]) tensor([0.7116, 0.0335, 0.0916, 0.1632]) -Greedy action tensor([ 1.3533, 0.1493, -0.4456, 0.3169]) tensor([0.5494, 0.1648, 0.0909, 0.1949]) -Greedy action tensor([ 1.8831, 0.1451, -0.4557, 0.1504]) tensor([0.6901, 0.1214, 0.0666, 0.1220]) -Greedy action tensor([ 1.2659, -0.4155, -0.3355, 0.6426]) tensor([0.5198, 0.0967, 0.1048, 0.2787]) -Greedy action tensor([ 1.5138, -0.5454, -0.4232, 0.2157]) tensor([0.6474, 0.0826, 0.0933, 0.1768]) -Greedy action tensor([ 1.5103, -0.4266, -0.4284, 0.1581]) tensor([0.6465, 0.0932, 0.0930, 0.1672]) -Greedy action tensor([ 1.8579, -0.9005, -0.1990, 0.4333]) tensor([0.6984, 0.0443, 0.0893, 0.1680]) -Greedy action tensor([ 1.9255, -0.9343, -0.4230, 0.4978]) tensor([0.7180, 0.0411, 0.0686, 0.1722]) -Greedy action tensor([ 1.1496, -0.2500, -0.3147, 0.0313]) tensor([0.5541, 0.1367, 0.1281, 0.1811]) -Greedy action tensor([ 1.4851, -0.5252, -0.6279, 0.1096]) tensor([0.6633, 0.0889, 0.0802, 0.1676]) -Greedy action tensor([ 1.4382, -0.0805, -0.4892, 0.4157]) tensor([0.5800, 0.1270, 0.0844, 0.2086]) -Greedy action tensor([ 1.1222, -0.6194, -0.2259, -0.0410]) tensor([0.5723, 0.1003, 0.1486, 0.1788]) -Greedy action tensor([ 1.6698, -0.2638, -0.5472, 0.0678]) tensor([0.6873, 0.0994, 0.0749, 0.1385]) -Greedy action tensor([ 1.1172, 0.0403, -0.7429, 0.2716]) tensor([0.5193, 0.1769, 0.0808, 0.2229]) -Greedy action tensor([ 1.4361, 0.0398, -0.2468, 0.4712]) tensor([0.5512, 0.1364, 0.1024, 0.2100]) -Greedy action tensor([ 1.4588, -0.2002, -0.6060, 0.1085]) tensor([0.6344, 0.1207, 0.0805, 0.1644]) -Greedy action tensor([ 1.6390, -0.4662, -0.0787, 0.2768]) tensor([0.6421, 0.0782, 0.1152, 0.1644]) -Greedy action tensor([ 1.4837, -0.2876, -0.4847, 0.2828]) tensor([0.6208, 0.1056, 0.0867, 0.1868]) -Greedy action tensor([ 1.4696, -0.5675, -0.1845, 0.1368]) tensor([0.6307, 0.0823, 0.1206, 0.1664]) -Greedy action tensor([ 1.0518, -0.2521, -0.6962, 0.2615]) tensor([0.5265, 0.1429, 0.0917, 0.2389]) -Greedy action tensor([ 1.7239, -0.5875, -0.4104, 0.2164]) tensor([0.6950, 0.0689, 0.0822, 0.1539]) -Greedy action tensor([ 1.4247, -0.3218, -0.7297, 0.3770]) tensor([0.6094, 0.1063, 0.0707, 0.2137]) -Greedy action tensor([ 1.2204, -0.8702, -0.2411, 0.5284]) tensor([0.5388, 0.0666, 0.1249, 0.2697]) -Greedy action tensor([ 1.5598, -0.2249, -0.3590, 0.2349]) tensor([0.6327, 0.1062, 0.0929, 0.1682]) -Greedy action tensor([ 1.7077, -0.4933, -0.3446, 0.0882]) tensor([0.6958, 0.0770, 0.0894, 0.1378]) -Greedy action tensor([ 1.5448, -1.0304, -0.1247, 0.6646]) tensor([0.5955, 0.0453, 0.1122, 0.2470]) -Greedy action tensor([ 1.6239, 0.1709, -0.6710, 0.3477]) tensor([0.6197, 0.1449, 0.0624, 0.1730]) -Greedy action tensor([ 1.6005, -0.9481, 0.1299, 0.0222]) tensor([0.6604, 0.0516, 0.1517, 0.1363]) -Greedy action tensor([ 1.3639, -0.3160, -0.4129, 0.4982]) tensor([0.5630, 0.1049, 0.0952, 0.2369]) -Greedy action tensor([ 1.2274, -0.6771, -0.4122, 0.6372]) tensor([0.5271, 0.0785, 0.1023, 0.2921]) -Greedy action tensor([ 2.8097, -1.2353, 0.1991, 0.2826]) tensor([0.8540, 0.0150, 0.0628, 0.0682]) -Greedy action tensor([ 1.4664, -0.2233, -0.4378, 0.3449]) tensor([0.6027, 0.1112, 0.0898, 0.1963]) -Greedy action tensor([ 1.3539, -0.2362, -0.2513, 0.1723]) tensor([0.5843, 0.1191, 0.1174, 0.1792]) -Greedy action tensor([ 1.9708, -0.5712, -0.8685, 0.3596]) tensor([0.7480, 0.0589, 0.0437, 0.1493]) -Greedy action tensor([ 1.2478, -0.0826, -1.0580, 0.3978]) tensor([0.5582, 0.1476, 0.0556, 0.2386]) -Greedy action tensor([ 1.5031, -0.3538, -0.1778, -0.1048]) tensor([0.6482, 0.1012, 0.1207, 0.1298]) -Greedy action tensor([ 1.1563, -0.4546, 0.2272, -0.0542]) tensor([0.5283, 0.1055, 0.2087, 0.1575]) -Greedy action tensor([ 2.0702, -1.3854, -0.0402, 0.5349]) tensor([0.7309, 0.0231, 0.0886, 0.1574]) -Greedy action tensor([ 2.3722, -1.0690, -0.3621, 0.5698]) tensor([0.7925, 0.0254, 0.0515, 0.1307]) -Greedy action tensor([ 1.0799, -0.7722, -0.2904, 0.4060]) tensor([0.5207, 0.0817, 0.1323, 0.2654]) -Greedy action tensor([ 2.0116, -0.5686, 0.0125, 0.6017]) tensor([0.6871, 0.0521, 0.0931, 0.1678]) -Greedy action tensor([ 1.0367, -0.5742, -0.7868, 0.2243]) tensor([0.5540, 0.1106, 0.0895, 0.2459]) -Greedy action tensor([ 2.2973, -0.1787, -0.4237, 0.5349]) tensor([0.7567, 0.0636, 0.0498, 0.1299]) -Greedy action tensor([ 1.5856, -0.0202, -0.3768, 0.2264]) tensor([0.6257, 0.1256, 0.0879, 0.1607]) -Greedy action tensor([ 1.7106, -0.7451, -0.6132, 0.5388]) tensor([0.6696, 0.0575, 0.0656, 0.2074]) -Greedy action tensor([ 1.6666, -0.4849, -0.3475, 0.2856]) tensor([0.6662, 0.0775, 0.0889, 0.1674]) -Greedy action tensor([ 1.0328, -0.1955, -0.3549, 0.3917]) tensor([0.4833, 0.1415, 0.1207, 0.2545]) -Greedy action tensor([ 1.6313, -0.2359, -0.0257, 0.2499]) tensor([0.6264, 0.0968, 0.1195, 0.1574]) -Greedy action tensor([ 1.6033, -0.4485, -0.1990, -0.0562]) tensor([0.6740, 0.0866, 0.1112, 0.1282]) -Greedy action tensor([ 1.1824, -0.5590, -0.3253, 0.4583]) tensor([0.5315, 0.0932, 0.1177, 0.2577]) -Greedy action tensor([ 1.0676, -0.5085, -0.0346, 0.3098]) tensor([0.4981, 0.1030, 0.1654, 0.2335]) -Greedy action tensor([ 1.9357, -0.6187, -0.3051, 0.5796]) tensor([0.6936, 0.0539, 0.0738, 0.1787]) -Greedy action tensor([ 1.1057, -0.0811, -0.0804, 0.1713]) tensor([0.4991, 0.1523, 0.1524, 0.1961]) -Greedy action tensor([ 1.0389, -0.3029, -0.7569, 0.4194]) tensor([0.5088, 0.1330, 0.0844, 0.2738]) -Greedy action tensor([ 1.8305, -0.8698, -0.6206, 0.2900]) tensor([0.7312, 0.0491, 0.0630, 0.1567]) -Greedy action tensor([ 1.3085, -0.2768, -0.6294, 0.5966]) tensor([0.5436, 0.1114, 0.0783, 0.2668]) -Greedy action tensor([ 1.8960, -0.2517, -0.5670, -0.1236]) tensor([0.7493, 0.0875, 0.0638, 0.0994]) -Greedy action tensor([ 2.1819, -0.6214, -0.0430, 0.2240]) tensor([0.7634, 0.0463, 0.0825, 0.1078]) -Greedy action tensor([ 1.8980, -0.8916, -0.5372, 0.9903]) tensor([0.6441, 0.0396, 0.0564, 0.2599]) -Greedy action tensor([ 1.3764, 0.0292, -0.6216, 0.4829]) tensor([0.5541, 0.1441, 0.0751, 0.2267]) -Greedy action tensor([ 1.6864, -0.6514, -0.4044, 0.0186]) tensor([0.7098, 0.0685, 0.0877, 0.1339]) -Greedy action tensor([ 1.2870, -0.2502, -0.5254, 0.0911]) tensor([0.5950, 0.1279, 0.0971, 0.1799]) -Greedy action tensor([ 1.8374, -0.5779, -0.6566, 0.4726]) tensor([0.7006, 0.0626, 0.0579, 0.1790]) -Greedy action tensor([ 1.0092, -0.0118, -0.7899, 0.2752]) tensor([0.4986, 0.1796, 0.0825, 0.2393]) -Greedy action tensor([ 1.1147e+00, -4.2344e-01, 4.5270e-05, 1.1984e-01]) tensor([0.5228, 0.1123, 0.1715, 0.1933]) -Greedy action tensor([ 1.4401, 0.1560, -0.1822, 0.5988]) tensor([0.5248, 0.1453, 0.1036, 0.2263]) -Greedy action tensor([ 1.9457, -1.2690, -0.4526, 0.2574]) tensor([0.7600, 0.0305, 0.0691, 0.1405]) -Greedy action tensor([ 1.7808, -0.6524, -0.6298, 0.5358]) tensor([0.6824, 0.0599, 0.0612, 0.1965]) -Greedy action tensor([ 1.4924, -0.1518, -1.2470, 0.0290]) tensor([0.6715, 0.1297, 0.0434, 0.1554]) -Greedy action tensor([ 1.4303, -0.5070, -0.1624, 0.3250]) tensor([0.5957, 0.0858, 0.1212, 0.1973]) -Greedy action tensor([ 1.7003, -0.7164, -0.5637, 0.3001]) tensor([0.6946, 0.0620, 0.0722, 0.1712]) -Greedy action tensor([ 1.5135, -0.8990, -0.4145, 0.3801]) tensor([0.6423, 0.0575, 0.0934, 0.2068]) -Greedy action tensor([ 1.3978, -0.2354, -0.4310, 0.3637]) tensor([0.5843, 0.1141, 0.0938, 0.2077]) -Greedy action tensor([ 1.7860, -0.6157, -0.5685, 0.5986]) tensor([0.6709, 0.0608, 0.0637, 0.2046]) -Greedy action tensor([ 1.5086, -0.6640, -0.7908, -0.3079]) tensor([0.7263, 0.0827, 0.0729, 0.1181]) -Greedy action tensor([ 1.0157, -0.1624, -0.3629, 0.7947]) tensor([0.4235, 0.1304, 0.1067, 0.3395]) -Greedy action tensor([ 2.3026, -1.3516, -0.0951, 0.7965]) tensor([0.7471, 0.0193, 0.0679, 0.1657]) -Greedy action tensor([ 1.6409, -0.5198, -0.7176, 0.2561]) tensor([0.6849, 0.0789, 0.0648, 0.1715]) -Greedy action tensor([ 1.3418, -0.6278, -0.4922, -0.0970]) tensor([0.6508, 0.0908, 0.1040, 0.1544]) -Greedy action tensor([ 1.3678, -0.3958, -0.5675, 0.2773]) tensor([0.6054, 0.1038, 0.0874, 0.2034]) -Greedy action tensor([ 1.5814, -1.0630, -0.3124, 0.3285]) tensor([0.6635, 0.0471, 0.0999, 0.1895]) -Greedy action tensor([ 0.8214, -0.7416, 0.1432, -0.3818]) tensor([0.4957, 0.1039, 0.2516, 0.1488]) -Greedy action tensor([ 0.6025, 0.0093, -0.0175, -0.0682]) tensor([0.3843, 0.2124, 0.2068, 0.1965]) -Greedy action tensor([ 0.4555, 0.0570, -0.0194, -0.1306]) tensor([0.3509, 0.2356, 0.2182, 0.1953]) -Greedy action tensor([ 0.5605, -0.5757, -0.1260, -0.2984]) tensor([0.4448, 0.1428, 0.2239, 0.1884]) -Greedy action tensor([ 0.8707, -0.5249, 0.0723, -0.1777]) tensor([0.4882, 0.1209, 0.2197, 0.1711]) -Greedy action tensor([ 0.5780, -0.1395, -0.0063, -0.2738]) tensor([0.4045, 0.1974, 0.2255, 0.1726]) -Greedy action tensor([ 0.7976, -0.4284, -0.0687, -0.3028]) tensor([0.4886, 0.1434, 0.2055, 0.1626]) -Greedy action tensor([ 0.8742, -0.3487, -0.0350, -0.2281]) tensor([0.4928, 0.1451, 0.1985, 0.1637]) -Greedy action tensor([ 0.7389, -0.5886, -0.0305, -0.2239]) tensor([0.4739, 0.1256, 0.2196, 0.1809]) -Greedy action tensor([ 0.8801, -0.2986, -0.0571, -0.5673]) tensor([0.5169, 0.1590, 0.2025, 0.1216]) -Greedy action tensor([ 0.5761, -0.3161, -0.0254, -0.2022]) tensor([0.4138, 0.1695, 0.2267, 0.1900]) -Greedy action tensor([ 0.2764, -0.1357, -0.0780, -0.0740]) tensor([0.3259, 0.2158, 0.2287, 0.2296]) -Greedy action tensor([ 0.7471, -0.3537, -0.1092, -0.2447]) tensor([0.4699, 0.1563, 0.1996, 0.1743]) -Greedy action tensor([ 0.9631, -0.5269, -0.1125, -0.3093]) tensor([0.5415, 0.1220, 0.1847, 0.1517]) -Greedy action tensor([ 0.7580, -0.2860, -0.0919, -0.0172]) tensor([0.4464, 0.1572, 0.1908, 0.2056]) -Greedy action tensor([ 1.0986, -0.5192, -0.2326, -0.3115]) tensor([0.5860, 0.1162, 0.1548, 0.1430]) -Greedy action tensor([ 0.6932, -0.4414, -0.0641, -0.2663]) tensor([0.4601, 0.1479, 0.2157, 0.1763]) -Greedy action tensor([ 0.9069, -0.5418, 0.0817, -0.3856]) tensor([0.5135, 0.1206, 0.2250, 0.1410]) -Greedy action tensor([ 0.6630, -0.5448, -0.2668, -0.5320]) tensor([0.5010, 0.1497, 0.1977, 0.1516]) -Greedy action tensor([ 0.5464, -0.5326, -0.2108, -0.0123]) tensor([0.4200, 0.1428, 0.1970, 0.2402]) -Greedy action tensor([ 1.0998, -0.4289, -0.1078, -0.6146]) tensor([0.5897, 0.1279, 0.1763, 0.1062]) -Greedy action tensor([ 1.0206, -0.6278, -0.0954, -0.6080]) tensor([0.5827, 0.1121, 0.1909, 0.1143]) -Greedy action tensor([ 0.5264, -0.1809, -0.0174, -0.1537]) tensor([0.3876, 0.1911, 0.2250, 0.1963]) -Greedy action tensor([ 1.0404, -0.5812, -0.0466, -0.5331]) tensor([0.5740, 0.1134, 0.1936, 0.1190]) -Greedy action tensor([ 1.1266, -0.5177, -0.0907, -0.6621]) tensor([0.6037, 0.1166, 0.1787, 0.1009]) -Greedy action tensor([ 0.7071, -0.3100, 0.0636, -0.3412]) tensor([0.4469, 0.1616, 0.2348, 0.1567]) -Greedy action tensor([ 1.0338, -1.0596, 0.1503, -0.6010]) tensor([0.5775, 0.0712, 0.2387, 0.1126]) -Greedy action tensor([ 1.4414, -1.0480, 0.1292, -0.8664]) tensor([0.6889, 0.0571, 0.1855, 0.0685]) -Greedy action tensor([ 0.7301, -0.4089, -0.0339, -0.1809]) tensor([0.4570, 0.1463, 0.2129, 0.1838]) -Greedy action tensor([ 1.0892, -0.8130, 0.0834, -0.4074]) tensor([0.5751, 0.0858, 0.2103, 0.1287]) -Greedy action tensor([ 0.3564, -0.0571, 0.1207, -0.1666]) tensor([0.3285, 0.2173, 0.2595, 0.1947]) -Greedy action tensor([ 0.9182, -0.5219, -0.0807, -0.0607]) tensor([0.5048, 0.1196, 0.1859, 0.1897]) -Greedy action tensor([ 0.9044, -0.6975, -0.1500, -0.4371]) tensor([0.5521, 0.1112, 0.1923, 0.1443]) -Greedy action tensor([ 0.8522, -0.3958, -0.1537, -0.3259]) tensor([0.5100, 0.1464, 0.1865, 0.1570]) -Greedy action tensor([ 0.8808, -0.3250, 0.0846, -0.3812]) tensor([0.4917, 0.1473, 0.2218, 0.1392]) -Greedy action tensor([ 0.4982, -0.1298, -0.0120, -0.0780]) tensor([0.3709, 0.1979, 0.2227, 0.2085]) -Greedy action tensor([ 0.4367, 0.0431, -0.0383, 0.0372]) tensor([0.3370, 0.2274, 0.2096, 0.2260]) -Greedy action tensor([ 0.6701, -0.2537, -0.2915, -0.2461]) tensor([0.4589, 0.1822, 0.1754, 0.1836]) -Greedy action tensor([ 0.6285, -0.2403, 0.0435, -0.1769]) tensor([0.4126, 0.1731, 0.2299, 0.1844]) -Greedy action tensor([ 0.8025, -0.6440, -0.0842, -0.2714]) tensor([0.5027, 0.1183, 0.2071, 0.1718]) -Greedy action tensor([ 0.4241, 0.2135, -0.0992, 0.1797]) tensor([0.3139, 0.2543, 0.1860, 0.2458]) -Greedy action tensor([ 0.8339, -0.4310, -0.0182, -0.3198]) tensor([0.4940, 0.1394, 0.2107, 0.1559]) -Greedy action tensor([ 0.6862, -0.7581, 0.0225, -0.4721]) tensor([0.4843, 0.1143, 0.2494, 0.1521]) -Greedy action tensor([ 0.7362, -0.1686, 0.0089, -0.1061]) tensor([0.4313, 0.1745, 0.2084, 0.1858]) -Greedy action tensor([ 1.0457, -0.6307, 0.0693, -0.6296]) tensor([0.5711, 0.1068, 0.2151, 0.1069]) -Greedy action tensor([ 0.2951, -0.2792, 0.0585, 0.1861]) tensor([0.3078, 0.1733, 0.2429, 0.2760]) -Greedy action tensor([ 1.0359, -0.4475, -0.1257, -0.4832]) tensor([0.5686, 0.1290, 0.1780, 0.1245]) -Greedy action tensor([ 0.4620, 0.2097, -0.1021, 0.0567]) tensor([0.3319, 0.2579, 0.1888, 0.2213]) -Greedy action tensor([ 0.4757, 0.0088, -0.1508, -0.2642]) tensor([0.3790, 0.2376, 0.2026, 0.1808]) -Greedy action tensor([ 0.9173, -0.8485, 0.0288, -0.5097]) tensor([0.5488, 0.0939, 0.2257, 0.1317]) -Greedy action tensor([ 0.6064, -0.4348, -0.1355, -0.4229]) tensor([0.4574, 0.1615, 0.2178, 0.1634]) -Greedy action tensor([0.2145, 0.3170, 0.0937, 0.1426]) tensor([0.2548, 0.2823, 0.2258, 0.2371]) -Greedy action tensor([ 0.9031, -0.4999, 0.1296, -0.3414]) tensor([0.5012, 0.1232, 0.2312, 0.1444]) -Greedy action tensor([ 0.7524, -0.1439, -0.0408, -0.6269]) tensor([0.4734, 0.1932, 0.2142, 0.1192]) -Greedy action tensor([ 0.9568, -0.6609, 0.0559, -0.2866]) tensor([0.5283, 0.1048, 0.2146, 0.1524]) -Greedy action tensor([ 0.9455, -0.4911, -0.0453, -0.3338]) tensor([0.5299, 0.1260, 0.1967, 0.1474]) -Greedy action tensor([ 0.5637, -0.0280, -0.0387, -0.2334]) tensor([0.3919, 0.2169, 0.2146, 0.1766]) -Greedy action tensor([ 0.7412, -0.8049, -0.1213, -0.2760]) tensor([0.5008, 0.1067, 0.2114, 0.1811]) -Greedy action tensor([ 0.3925, -0.1698, -0.0838, -0.0907]) tensor([0.3561, 0.2030, 0.2212, 0.2197]) -Greedy action tensor([ 0.9605, -0.4791, -0.1002, -0.2951]) tensor([0.5353, 0.1269, 0.1853, 0.1525]) -Greedy action tensor([ 0.6349, -0.7439, 0.0418, -0.5049]) tensor([0.4707, 0.1186, 0.2601, 0.1506]) -Greedy action tensor([ 0.8083, -0.4975, -0.0644, -0.3355]) tensor([0.4982, 0.1350, 0.2081, 0.1587]) -Greedy action tensor([ 0.8845, -0.7934, -0.0249, -0.5753]) tensor([0.5489, 0.1025, 0.2211, 0.1275]) -Greedy action tensor([ 0.9296, -0.8392, -0.0187, -0.4389]) tensor([0.5517, 0.0941, 0.2137, 0.1404]) -Greedy action tensor([ 0.6537, -0.0761, -0.0286, 0.0139]) tensor([0.3976, 0.1917, 0.2010, 0.2097]) -Greedy action tensor([ 0.8037, -0.3751, 0.2728, -0.5113]) tensor([0.4621, 0.1422, 0.2717, 0.1241]) -Greedy action tensor([ 1.0658, -0.2201, -0.1553, -0.3065]) tensor([0.5480, 0.1515, 0.1616, 0.1389]) -Greedy action tensor([ 0.5694, -0.4001, -0.0303, -0.2592]) tensor([0.4229, 0.1604, 0.2321, 0.1846]) -Greedy action tensor([ 0.9472, -0.6044, -0.0140, -0.4371]) tensor([0.5421, 0.1149, 0.2073, 0.1358]) -Greedy action tensor([ 0.2514, 0.0553, -0.1395, -0.2008]) tensor([0.3190, 0.2622, 0.2158, 0.2030]) -Greedy action tensor([ 0.7858, -0.2039, -0.2068, 0.0992]) tensor([0.4453, 0.1655, 0.1650, 0.2241]) -Greedy action tensor([ 0.6225, -0.2264, -0.1127, -0.0835]) tensor([0.4165, 0.1782, 0.1997, 0.2056]) -Greedy action tensor([ 1.0005, -0.7122, -0.0216, -0.5291]) tensor([0.5692, 0.1027, 0.2048, 0.1233]) -Greedy action tensor([ 0.5896, -0.2916, 0.0155, -0.0839]) tensor([0.4020, 0.1666, 0.2264, 0.2050]) -Greedy action tensor([ 0.9529, -0.8933, 0.0838, -0.3750]) tensor([0.5428, 0.0857, 0.2276, 0.1439]) -Greedy action tensor([ 0.3614, -0.1572, 0.1091, -0.4961]) tensor([0.3576, 0.2129, 0.2779, 0.1517]) -Greedy action tensor([ 1.1537, -0.5390, 0.0993, -0.7209]) tensor([0.5932, 0.1092, 0.2067, 0.0910]) -Greedy action tensor([ 1.0016, -0.7079, 0.0658, -0.2788]) tensor([0.5402, 0.0978, 0.2119, 0.1501]) -Greedy action tensor([ 0.5461, -0.4415, -0.0933, -0.1991]) tensor([0.4211, 0.1568, 0.2222, 0.1999]) -Greedy action tensor([ 0.6410, -0.3905, -0.0315, -0.0513]) tensor([0.4224, 0.1506, 0.2156, 0.2114]) -Greedy action tensor([ 0.4510, -0.1816, -0.1301, -0.0800]) tensor([0.3733, 0.1983, 0.2088, 0.2195]) -Greedy action tensor([-1.9491, -0.4507, 0.6666, -0.1835]) tensor([0.0400, 0.1790, 0.5471, 0.2338]) -Greedy action tensor([-1.6278, -0.5385, 0.5153, -0.0829]) tensor([0.0582, 0.1729, 0.4961, 0.2728]) -Greedy action tensor([-0.4163, 1.1145, 0.0404, 0.4699]) tensor([0.1039, 0.4801, 0.1640, 0.2520]) -Greedy action tensor([-1.7536, -0.2152, 0.5456, -0.1077]) tensor([0.0481, 0.2238, 0.4789, 0.2492]) -Greedy action tensor([-1.7126, -0.3860, 0.5392, -0.1046]) tensor([0.0519, 0.1956, 0.4933, 0.2592]) -Greedy action tensor([1.2023, 1.2808, 0.1434, 0.8482]) tensor([0.3195, 0.3455, 0.1108, 0.2242]) -Greedy action tensor([-1.9266, -0.4408, 0.6615, -0.1710]) tensor([0.0408, 0.1803, 0.5428, 0.2361]) -Greedy action tensor([-1.9444, -0.4496, 0.6663, -0.1807]) tensor([0.0402, 0.1790, 0.5465, 0.2343]) -Greedy action tensor([-0.7012, 0.8291, 0.0600, 0.0040]) tensor([0.1022, 0.4721, 0.2188, 0.2069]) -Greedy action tensor([-1.8649, -0.4497, 0.6358, -0.1377]) tensor([0.0436, 0.1795, 0.5316, 0.2453]) -Greedy action tensor([-1.8999, -0.4352, 0.6440, -0.1596]) tensor([0.0421, 0.1821, 0.5359, 0.2399]) -Greedy action tensor([-1.4004, 0.4879, 0.3066, -0.0347]) tensor([0.0587, 0.3878, 0.3235, 0.2300]) -Greedy action tensor([-1.8560, -0.3176, 0.5603, -0.2622]) tensor([0.0459, 0.2138, 0.5143, 0.2260]) -Greedy action tensor([-1.8450, -0.4518, 0.6226, -0.1256]) tensor([0.0446, 0.1798, 0.5264, 0.2491]) -Greedy action tensor([-1.9412, -0.4410, 0.6645, -0.1774]) tensor([0.0402, 0.1803, 0.5447, 0.2347]) -Greedy action tensor([-1.6981, -0.4979, 0.5346, -0.0631]) tensor([0.0533, 0.1769, 0.4967, 0.2732]) -Greedy action tensor([-1.5986, -0.5219, 0.5195, -0.2931]) tensor([0.0627, 0.1841, 0.5217, 0.2315]) -Greedy action tensor([-1.9339, -0.4468, 0.6629, -0.1735]) tensor([0.0406, 0.1794, 0.5442, 0.2358]) -Greedy action tensor([-1.0333, 0.7767, 0.1500, 0.0527]) tensor([0.0750, 0.4581, 0.2448, 0.2221]) -Greedy action tensor([-1.8941, -0.3993, 0.6448, -0.1531]) tensor([0.0420, 0.1871, 0.5316, 0.2394]) -Greedy action tensor([-1.8159, -0.1630, 0.5686, -0.0703]) tensor([0.0438, 0.2290, 0.4759, 0.2512]) -Greedy action tensor([-1.0644, 0.6636, 0.2454, -0.1019]) tensor([0.0772, 0.4346, 0.2861, 0.2021]) -Greedy action tensor([-1.7235, -0.0739, 0.5579, -0.0709]) tensor([0.0471, 0.2453, 0.4615, 0.2461]) -Greedy action tensor([-1.7460, -0.5148, 0.5688, -0.1125]) tensor([0.0508, 0.1741, 0.5146, 0.2604]) -Greedy action tensor([-1.9292, -0.4430, 0.6600, -0.1721]) tensor([0.0408, 0.1802, 0.5429, 0.2362]) -Greedy action tensor([-1.8221, -0.3561, 0.6231, -0.0837]) tensor([0.0443, 0.1921, 0.5114, 0.2522]) -Greedy action tensor([-1.8363, -0.3455, 0.6026, -0.1399]) tensor([0.0447, 0.1986, 0.5126, 0.2440]) -Greedy action tensor([-1.8968, -0.4537, 0.6494, -0.1540]) tensor([0.0422, 0.1786, 0.5382, 0.2410]) -Greedy action tensor([-1.8748, -0.3933, 0.6516, -0.1189]) tensor([0.0422, 0.1857, 0.5279, 0.2443]) -Greedy action tensor([-1.9381, -0.4416, 0.6655, -0.1768]) tensor([0.0403, 0.1801, 0.5449, 0.2347]) -Greedy action tensor([-1.0605, 0.0942, 0.4156, 0.4330]) tensor([0.0769, 0.2441, 0.3366, 0.3425]) -Greedy action tensor([-1.9133, -0.4548, 0.6572, -0.1655]) tensor([0.0415, 0.1783, 0.5421, 0.2381]) -Greedy action tensor([-1.9233, -0.4187, 0.6590, -0.1672]) tensor([0.0408, 0.1836, 0.5395, 0.2361]) -Greedy action tensor([-1.8501, -0.3231, 0.6146, -0.1250]) tensor([0.0435, 0.2004, 0.5118, 0.2443]) -Greedy action tensor([-1.8939, -0.4441, 0.6444, -0.1560]) tensor([0.0424, 0.1806, 0.5362, 0.2409]) -Greedy action tensor([-1.7299, 0.2384, 0.4804, -0.0204]) tensor([0.0439, 0.3139, 0.3999, 0.2423]) -Greedy action tensor([-1.5180, 0.1305, 0.4384, 0.1353]) tensor([0.0541, 0.2811, 0.3824, 0.2824]) -Greedy action tensor([-1.8888, -0.4490, 0.6388, -0.1578]) tensor([0.0428, 0.1804, 0.5354, 0.2414]) -Greedy action tensor([-1.8271, -0.4421, 0.6618, -0.0496]) tensor([0.0436, 0.1740, 0.5248, 0.2576]) -Greedy action tensor([-1.7517, -0.4377, 0.6360, -0.0799]) tensor([0.0478, 0.1778, 0.5202, 0.2542]) -Greedy action tensor([-1.8572, -0.4452, 0.6253, -0.1349]) tensor([0.0441, 0.1810, 0.5280, 0.2469]) -Greedy action tensor([-0.9286, 0.2683, 0.4338, 0.2637]) tensor([0.0869, 0.2876, 0.3393, 0.2862]) -Greedy action tensor([-1.6981e+00, -1.8201e-01, 5.6474e-01, -6.4158e-04]) tensor([0.0485, 0.2208, 0.4660, 0.2647]) -Greedy action tensor([ 0.0363, -0.4383, 0.9377, 1.7333]) tensor([0.1048, 0.0652, 0.2581, 0.5719]) -Greedy action tensor([-1.6640, -0.4355, 0.5231, -0.0216]) tensor([0.0541, 0.1847, 0.4818, 0.2794]) -Greedy action tensor([-1.7541, -0.3848, 0.5598, -0.1044]) tensor([0.0494, 0.1942, 0.4994, 0.2570]) -Greedy action tensor([-1.8120, -0.4447, 0.6053, -0.0316]) tensor([0.0453, 0.1778, 0.5081, 0.2688]) -Greedy action tensor([-1.8796, -0.4260, 0.6487, -0.1311]) tensor([0.0425, 0.1816, 0.5320, 0.2439]) -Greedy action tensor([-1.9219, -0.4821, 0.7390, -0.1383]) tensor([0.0392, 0.1656, 0.5616, 0.2336]) -Greedy action tensor([-1.3194, -0.1863, 0.4099, -0.1137]) tensor([0.0764, 0.2374, 0.4309, 0.2553]) -Greedy action tensor([-1.8796, -0.4485, 0.6354, -0.1524]) tensor([0.0432, 0.1805, 0.5336, 0.2427]) -Greedy action tensor([-1.5733, -0.1568, 0.4706, -0.0642]) tensor([0.0576, 0.2374, 0.4446, 0.2604]) -Greedy action tensor([-1.8039, -0.2676, 0.5753, -0.1295]) tensor([0.0459, 0.2134, 0.4957, 0.2450]) -Greedy action tensor([-1.8678, -0.3735, 0.6206, -0.1411]) tensor([0.0433, 0.1927, 0.5208, 0.2432]) -Greedy action tensor([-1.8007, -0.2737, 0.5716, -0.1376]) tensor([0.0463, 0.2131, 0.4963, 0.2442]) -Greedy action tensor([-1.9210, -0.4424, 0.6588, -0.1681]) tensor([0.0411, 0.1801, 0.5418, 0.2370]) -Greedy action tensor([-0.6356, -0.0586, 0.3107, 0.5599]) tensor([0.1154, 0.2056, 0.2974, 0.3816]) -Greedy action tensor([-1.1161, 0.8706, 0.3562, -0.4432]) tensor([0.0684, 0.4990, 0.2984, 0.1341]) -Greedy action tensor([-1.8756, -0.4619, 0.6409, -0.1469]) tensor([0.0432, 0.1777, 0.5355, 0.2436]) -Greedy action tensor([-1.9391, -0.4419, 0.6648, -0.1759]) tensor([0.0403, 0.1801, 0.5446, 0.2350]) -Greedy action tensor([-1.6434, -0.4900, 0.5182, -0.0292]) tensor([0.0559, 0.1773, 0.4858, 0.2810]) -Greedy action tensor([-1.9174, -0.4410, 0.6532, -0.1677]) tensor([0.0413, 0.1809, 0.5402, 0.2377]) -Greedy action tensor([-1.7147, -0.3787, 0.5197, -0.0464]) tensor([0.0514, 0.1956, 0.4803, 0.2727]) -Greedy action tensor([-1.7145, -0.4449, 0.5653, -0.0525]) tensor([0.0510, 0.1816, 0.4986, 0.2688]) -Greedy action tensor([-1.8520, -0.4115, 0.6129, -0.1147]) tensor([0.0441, 0.1863, 0.5189, 0.2507]) -Greedy action tensor([-1.9015, -0.4317, 0.6479, -0.1543]) tensor([0.0419, 0.1820, 0.5359, 0.2402]) -Greedy action tensor([-1.8705, -0.3627, 0.6437, -0.1490]) tensor([0.0426, 0.1925, 0.5266, 0.2383]) -Greedy action tensor([-1.4034, 0.4309, 0.2400, 0.1549]) tensor([0.0582, 0.3643, 0.3010, 0.2765]) -Greedy action tensor([-1.9160, -0.4586, 0.6586, -0.1641]) tensor([0.0413, 0.1776, 0.5427, 0.2384]) -Greedy action tensor([-1.0467, 0.5157, 0.2331, -0.0868]) tensor([0.0835, 0.3983, 0.3002, 0.2180]) -Greedy action tensor([-1.4134, -0.3781, 0.4133, 0.0030]) tensor([0.0707, 0.1990, 0.4391, 0.2913]) -Greedy action tensor([-1.1335, -0.6235, 0.2365, 0.3050]) tensor([0.0925, 0.1540, 0.3639, 0.3897]) -Greedy action tensor([-1.9440, -0.4490, 0.6673, -0.1801]) tensor([0.0401, 0.1790, 0.5466, 0.2342]) -Greedy action tensor([0.0608, 1.1939, 0.0337, 0.3512]) tensor([0.1559, 0.4840, 0.1517, 0.2084]) -Greedy action tensor([-1.4510, 0.0540, 0.5671, 0.1903]) tensor([0.0550, 0.2476, 0.4136, 0.2838]) -Greedy action tensor([-1.7869, -0.4277, 0.6410, -0.0227]) tensor([0.0453, 0.1764, 0.5137, 0.2645]) -Greedy action tensor([-1.6463, 0.1448, 0.5006, -0.0313]) tensor([0.0486, 0.2913, 0.4158, 0.2443]) -Greedy action tensor([-1.7366, -0.4448, 0.5719, -0.0670]) tensor([0.0500, 0.1819, 0.5027, 0.2654]) -Greedy action tensor([-1.8983, -0.3211, 0.6157, -0.1433]) tensor([0.0417, 0.2019, 0.5152, 0.2412]) -Greedy action tensor([-1.5302, 0.4130, 0.3440, 0.0143]) tensor([0.0521, 0.3639, 0.3397, 0.2443]) -Greedy action tensor([-1.9404, -0.4572, 0.6657, -0.1778]) tensor([0.0404, 0.1778, 0.5466, 0.2352]) -Greedy action tensor([-0.0159, -1.0315, -0.8902, -0.3436]) tensor([0.4000, 0.1449, 0.1669, 0.2882]) -Greedy action tensor([-1.0937, -2.2196, -0.4954, -0.5157]) tensor([0.2030, 0.0658, 0.3693, 0.3619]) -Greedy action tensor([-0.6084, -0.2174, -1.7583, 0.1558]) tensor([0.2023, 0.2991, 0.0641, 0.4345]) -Greedy action tensor([ 1.2184, -0.3147, 0.5816, 0.7258]) tensor([0.4245, 0.0916, 0.2245, 0.2594]) -Greedy action tensor([ 0.2724, -0.4234, -0.5018, -0.5463]) tensor([0.4165, 0.2077, 0.1921, 0.1837]) -Greedy action tensor([ 0.2489, -1.0934, 0.1743, 0.1406]) tensor([0.3240, 0.0846, 0.3007, 0.2907]) -Greedy action tensor([ 0.5774, -1.1094, 0.4233, 0.2070]) tensor([0.3659, 0.0677, 0.3137, 0.2527]) -Greedy action tensor([ 0.1545, -1.8603, 0.0023, -0.3642]) tensor([0.3865, 0.0515, 0.3319, 0.2301]) -Greedy action tensor([-0.9233, 0.1731, -1.1077, -0.7614]) tensor([0.1666, 0.4988, 0.1386, 0.1959]) -Greedy action tensor([ 0.5333, -1.1284, -1.3286, -0.2671]) tensor([0.5573, 0.1058, 0.0866, 0.2503]) -Greedy action tensor([-0.7448, -1.3887, 0.2115, 0.0984]) tensor([0.1550, 0.0814, 0.4034, 0.3602]) -Greedy action tensor([-0.9427, 0.8759, 0.6717, -0.9740]) tensor([0.0760, 0.4684, 0.3819, 0.0737]) -Greedy action tensor([-1.9038, 0.1901, 1.3356, -1.4950]) tensor([0.0277, 0.2246, 0.7061, 0.0416]) -Greedy action tensor([-0.4621, -0.1037, -0.1435, 0.2550]) tensor([0.1708, 0.2444, 0.2349, 0.3499]) -Greedy action tensor([ 1.4467e+00, -8.3721e-04, 6.2237e-01, 1.8239e-02]) tensor([0.5226, 0.1229, 0.2292, 0.1253]) -Greedy action tensor([ 0.1755, 0.8944, -0.7484, -1.2036]) tensor([0.2702, 0.5545, 0.1073, 0.0680]) -Greedy action tensor([-1.2960, -1.1050, 1.3182, -1.5854]) tensor([0.0602, 0.0728, 0.8219, 0.0451]) -Greedy action tensor([-0.2709, -1.4180, 1.1377, -1.0391]) tensor([0.1703, 0.0541, 0.6966, 0.0790]) -Greedy action tensor([ 0.2445, 0.2204, 0.0461, -0.0066]) tensor([0.2798, 0.2731, 0.2294, 0.2177]) -Greedy action tensor([-1.7271, -0.1244, -1.4812, 0.3861]) tensor([0.0644, 0.3200, 0.0824, 0.5331]) -Greedy action tensor([ 0.0326, -1.2463, -1.1934, 0.3162]) tensor([0.3449, 0.0960, 0.1012, 0.4580]) -Greedy action tensor([ 0.2032, 0.0933, 1.1691, -0.2216]) tensor([0.1932, 0.1731, 0.5075, 0.1263]) -Greedy action tensor([ 0.2869, -0.7284, -0.2355, -0.1869]) tensor([0.3879, 0.1405, 0.2301, 0.2415]) -Greedy action tensor([-0.7233, -1.0106, -0.0698, -1.2662]) tensor([0.2351, 0.1764, 0.4519, 0.1366]) -Greedy action tensor([ 0.6584, -0.0809, -0.6096, 0.1635]) tensor([0.4222, 0.2016, 0.1188, 0.2574]) -Greedy action tensor([-0.1036, -0.3431, 0.0671, 2.0687]) tensor([0.0851, 0.0670, 0.1009, 0.7470]) -Greedy action tensor([ 0.3483, -0.1695, 1.3100, -1.3614]) tensor([0.2276, 0.1356, 0.5955, 0.0412]) -Greedy action tensor([-0.7053, -1.4501, 0.0133, 0.5451]) tensor([0.1425, 0.0677, 0.2923, 0.4975]) -Greedy action tensor([-0.3377, -0.2216, -0.0308, -0.5324]) tensor([0.2323, 0.2609, 0.3157, 0.1912]) -Greedy action tensor([ 0.0837, -2.4205, 0.4085, 0.1529]) tensor([0.2827, 0.0231, 0.3912, 0.3030]) -Greedy action tensor([-0.0626, -1.3841, -0.5306, 0.1286]) tensor([0.3222, 0.0859, 0.2018, 0.3901]) -Greedy action tensor([-0.2364, 0.6761, 0.5662, -0.5326]) tensor([0.1547, 0.3852, 0.3451, 0.1150]) -Greedy action tensor([ 0.9582, -0.9980, 0.2179, 0.7335]) tensor([0.4137, 0.0585, 0.1973, 0.3305]) -Greedy action tensor([ 1.0412, 0.2431, -1.2450, 0.3101]) tensor([0.4918, 0.2214, 0.0500, 0.2368]) -Greedy action tensor([-1.1095, 0.0576, 0.5876, -0.6817]) tensor([0.0892, 0.2867, 0.4871, 0.1369]) -Greedy action tensor([-0.4768, -0.2900, -0.9555, 0.2346]) tensor([0.2057, 0.2479, 0.1274, 0.4190]) -Greedy action tensor([-0.1235, -0.0361, 0.3724, -0.1689]) tensor([0.2133, 0.2327, 0.3502, 0.2038]) -Greedy action tensor([-0.3596, 0.1710, 0.0353, -0.0204]) tensor([0.1790, 0.3042, 0.2656, 0.2512]) -Greedy action tensor([-0.3391, -1.5806, -0.2480, -0.3767]) tensor([0.2987, 0.0863, 0.3272, 0.2877]) -Greedy action tensor([ 0.3896, 0.2574, 0.0532, -0.3075]) tensor([0.3238, 0.2837, 0.2313, 0.1613]) -Greedy action tensor([ 1.1354, -0.7747, 0.6800, 0.5255]) tensor([0.4300, 0.0637, 0.2727, 0.2337]) -Greedy action tensor([ 0.0582, 0.6167, 0.3175, -0.4771]) tensor([0.2160, 0.3776, 0.2799, 0.1265]) -Greedy action tensor([-0.3324, -0.3561, -0.9089, -0.4171]) tensor([0.2893, 0.2825, 0.1625, 0.2658]) -Greedy action tensor([ 0.4197, -1.2612, 0.4516, -0.2330]) tensor([0.3651, 0.0680, 0.3769, 0.1901]) -Greedy action tensor([ 0.0514, 0.3492, -0.3556, -0.3118]) tensor([0.2697, 0.3632, 0.1795, 0.1876]) -Greedy action tensor([-0.0695, -0.3393, -0.0743, 0.1888]) tensor([0.2467, 0.1884, 0.2455, 0.3194]) -Greedy action tensor([-1.1174, -1.2529, -0.0466, -0.6652]) tensor([0.1572, 0.1372, 0.4586, 0.2470]) -Greedy action tensor([-0.4831, -0.2722, 0.9059, -0.7145]) tensor([0.1421, 0.1754, 0.5698, 0.1127]) -Greedy action tensor([-0.7354, -0.4495, -0.5016, -0.3370]) tensor([0.1967, 0.2618, 0.2485, 0.2930]) -Greedy action tensor([-0.6006, -0.4125, 1.0774, -1.0871]) tensor([0.1223, 0.1476, 0.6549, 0.0752]) -Greedy action tensor([ 0.1863, -0.4054, 0.4524, -0.0547]) tensor([0.2744, 0.1519, 0.3581, 0.2156]) -Greedy action tensor([ 0.1755, 0.3832, 0.2460, -0.6637]) tensor([0.2677, 0.3295, 0.2872, 0.1156]) -Greedy action tensor([-0.9207, -1.2892, -0.1040, -0.0830]) tensor([0.1596, 0.1104, 0.3612, 0.3688]) -Greedy action tensor([-0.2756, -0.7496, 0.3636, -0.7337]) tensor([0.2410, 0.1500, 0.4566, 0.1524]) -Greedy action tensor([ 0.4081, -0.8218, -0.0259, -0.9081]) tensor([0.4528, 0.1324, 0.2934, 0.1214]) -Greedy action tensor([-0.0017, -0.3270, 0.2474, -0.1977]) tensor([0.2613, 0.1887, 0.3352, 0.2148]) -Greedy action tensor([ 0.1418, -0.2885, 0.2852, 0.1357]) tensor([0.2633, 0.1712, 0.3039, 0.2617]) -Greedy action tensor([-1.0829, -0.0250, 0.1436, -0.2986]) tensor([0.1055, 0.3038, 0.3596, 0.2311]) -Greedy action tensor([ 1.8456, 0.4155, -0.3884, -0.1690]) tensor([0.6758, 0.1617, 0.0724, 0.0901]) -Greedy action tensor([-0.6359, -0.4733, -0.3455, -0.2075]) tensor([0.1981, 0.2331, 0.2648, 0.3040]) -Greedy action tensor([-0.6344, -0.6178, 0.1805, -0.7715]) tensor([0.1943, 0.1975, 0.4388, 0.1694]) -Greedy action tensor([-0.4827, -0.7902, 0.6956, -0.9010]) tensor([0.1772, 0.1303, 0.5758, 0.1166]) -Greedy action tensor([ 0.0606, 0.0124, 0.2777, -0.5221]) tensor([0.2664, 0.2539, 0.3310, 0.1488]) -Greedy action tensor([-0.5154, -0.5528, 0.2131, -0.5411]) tensor([0.1996, 0.1923, 0.4136, 0.1945]) -Greedy action tensor([ 0.0442, 0.0300, -0.2667, 0.1389]) tensor([0.2619, 0.2582, 0.1919, 0.2879]) -Greedy action tensor([ 0.3340, -0.7179, 0.1476, -0.1996]) tensor([0.3616, 0.1263, 0.3001, 0.2121]) -Greedy action tensor([ 1.2635, -0.5079, 0.1986, 0.9671]) tensor([0.4428, 0.0753, 0.1527, 0.3292]) -Greedy action tensor([ 1.2929, -0.6150, 1.1860, 0.2786]) tensor([0.4150, 0.0616, 0.3729, 0.1505]) -Greedy action tensor([-0.1497, -1.0055, -0.0755, -0.4325]) tensor([0.3072, 0.1305, 0.3308, 0.2315]) -Greedy action tensor([-0.9015, -0.2221, 0.2124, -0.2667]) tensor([0.1265, 0.2495, 0.3853, 0.2386]) -Greedy action tensor([-0.5821, 0.3247, -0.9189, -0.5904]) tensor([0.1930, 0.4779, 0.1378, 0.1914]) -Greedy action tensor([ 0.7103, -0.6136, -0.9665, -0.4901]) tensor([0.5701, 0.1517, 0.1066, 0.1716]) -Greedy action tensor([-0.5184, -0.1498, 0.5078, -0.4726]) tensor([0.1592, 0.2301, 0.4441, 0.1666]) -Greedy action tensor([ 0.4192, -0.2750, -1.0607, 0.5428]) tensor([0.3498, 0.1747, 0.0796, 0.3958]) -Greedy action tensor([0.5501, 0.1717, 0.0989, 0.2466]) tensor([0.3268, 0.2238, 0.2081, 0.2413]) -Greedy action tensor([-0.7739, 0.5135, 0.5500, -0.6042]) tensor([0.1045, 0.3788, 0.3928, 0.1239]) -Greedy action tensor([-0.4035, -2.2345, -0.2853, 0.0487]) tensor([0.2592, 0.0415, 0.2918, 0.4075]) -Greedy action tensor([-0.8047, -0.2645, -0.7862, -0.2927]) tensor([0.1851, 0.3176, 0.1885, 0.3088]) -Greedy action tensor([-0.0315, -1.5610, -0.6904, 0.8849]) tensor([0.2362, 0.0512, 0.1222, 0.5905]) -Greedy action tensor([-0.1269, -0.9628, -1.0421, -0.8231]) tensor([0.4287, 0.1859, 0.1717, 0.2137]) -Greedy action tensor([ 0.6880, -0.3009, 0.1864, -1.3774]) tensor([0.4752, 0.1768, 0.2878, 0.0602]) -Greedy action tensor([ 1.3394, -0.1582, -0.2942, 0.0358]) tensor([0.5916, 0.1323, 0.1155, 0.1606]) -Greedy action tensor([ 1.7940, -0.7771, -0.4944, 0.8278]) tensor([0.6417, 0.0491, 0.0651, 0.2442]) -Greedy action tensor([ 1.0586, -0.5598, -0.4077, 0.0885]) tensor([0.5531, 0.1096, 0.1276, 0.2096]) -Greedy action tensor([ 3.1461, -1.8904, -0.3468, 1.0314]) tensor([0.8639, 0.0056, 0.0263, 0.1042]) -Greedy action tensor([ 1.3924, -0.1147, -0.8668, 0.7664]) tensor([0.5374, 0.1191, 0.0561, 0.2874]) -Greedy action tensor([ 1.4485, -0.4093, 0.0792, 0.0064]) tensor([0.6073, 0.0947, 0.1544, 0.1436]) -Greedy action tensor([ 1.7904, 0.2199, -0.0494, 0.1559]) tensor([0.6403, 0.1331, 0.1017, 0.1249]) -Greedy action tensor([ 1.1889, -0.1289, -1.1298, 0.3724]) tensor([0.5531, 0.1481, 0.0544, 0.2445]) -Greedy action tensor([ 1.4809, -0.7332, -0.2305, 0.3069]) tensor([0.6254, 0.0683, 0.1130, 0.1933]) -Greedy action tensor([ 1.8048, -0.1852, -0.1754, 0.3818]) tensor([0.6598, 0.0902, 0.0911, 0.1590]) -Greedy action tensor([ 1.8195, -0.3653, -0.8186, 0.3591]) tensor([0.7061, 0.0794, 0.0505, 0.1639]) -Greedy action tensor([ 1.7926, -0.4720, -0.6654, 0.5795]) tensor([0.6726, 0.0699, 0.0576, 0.1999]) -Greedy action tensor([ 1.7558, -0.9018, -0.4716, 0.7215]) tensor([0.6522, 0.0457, 0.0703, 0.2318]) -Greedy action tensor([ 1.4591, -1.1222, -0.2688, 0.0441]) tensor([0.6683, 0.0506, 0.1187, 0.1624]) -Greedy action tensor([ 1.4039, -0.6551, -0.3408, 0.4922]) tensor([0.5868, 0.0749, 0.1025, 0.2358]) -Greedy action tensor([ 1.7637, 0.3231, -0.2020, 0.3778]) tensor([0.6146, 0.1455, 0.0861, 0.1537]) -Greedy action tensor([ 1.5519, -0.7837, -0.2375, 0.4402]) tensor([0.6278, 0.0607, 0.1049, 0.2065]) -Greedy action tensor([ 1.6527, 0.0640, -0.6521, -0.2163]) tensor([0.6858, 0.1400, 0.0684, 0.1058]) -Greedy action tensor([ 2.0845, -1.0497, -0.1735, 0.5467]) tensor([0.7337, 0.0319, 0.0767, 0.1576]) -Greedy action tensor([ 1.4688, -0.2682, -1.1847, 0.4650]) tensor([0.6200, 0.1091, 0.0437, 0.2272]) -Greedy action tensor([ 1.3244, -0.3253, -0.2854, 0.1356]) tensor([0.5894, 0.1132, 0.1178, 0.1795]) -Greedy action tensor([ 1.4419, -0.0249, -0.3255, 0.3085]) tensor([0.5803, 0.1338, 0.0991, 0.1868]) -Greedy action tensor([ 1.9620, -0.4715, -0.5124, 0.5476]) tensor([0.7067, 0.0620, 0.0595, 0.1718]) -Greedy action tensor([ 2.0914, -0.9036, -0.2291, 0.5914]) tensor([0.7292, 0.0365, 0.0716, 0.1627]) -Greedy action tensor([ 1.0071, -0.1743, -0.2649, 0.1883]) tensor([0.4931, 0.1513, 0.1382, 0.2174]) -Greedy action tensor([2.0677, 0.5397, 0.0720, 0.1910]) tensor([0.6640, 0.1441, 0.0903, 0.1017]) -Greedy action tensor([ 1.3509, -0.3131, -1.1021, 0.2736]) tensor([0.6188, 0.1172, 0.0532, 0.2107]) -Greedy action tensor([ 1.5809, -0.5963, -0.8624, 0.7038]) tensor([0.6187, 0.0701, 0.0537, 0.2574]) -Greedy action tensor([ 1.4555, -0.7898, -0.8822, -0.0608]) tensor([0.7033, 0.0745, 0.0679, 0.1544]) -Greedy action tensor([ 1.1858, -0.5670, -0.2560, 0.3056]) tensor([0.5481, 0.0950, 0.1296, 0.2273]) -Greedy action tensor([ 1.7245, -0.8064, -0.6975, 0.5265]) tensor([0.6802, 0.0541, 0.0604, 0.2053]) -Greedy action tensor([ 1.1428, -0.9431, 0.0937, 0.5628]) tensor([0.4916, 0.0611, 0.1722, 0.2752]) -Greedy action tensor([ 2.0463, -0.6908, -0.5636, 0.7958]) tensor([0.7019, 0.0455, 0.0516, 0.2010]) -Greedy action tensor([ 1.5840, -0.8426, -0.4887, -0.1365]) tensor([0.7178, 0.0634, 0.0903, 0.1285]) -Greedy action tensor([ 1.2476, -0.1047, -0.8296, 0.3700]) tensor([0.5557, 0.1437, 0.0696, 0.2310]) -Greedy action tensor([ 2.5071, -0.4724, -0.6941, 0.9660]) tensor([0.7659, 0.0389, 0.0312, 0.1640]) -Greedy action tensor([ 1.9610, -0.7292, -0.3620, -0.0611]) tensor([0.7703, 0.0523, 0.0755, 0.1020]) -Greedy action tensor([ 1.4949, -0.8176, -0.5918, 0.1382]) tensor([0.6754, 0.0669, 0.0838, 0.1739]) -Greedy action tensor([ 1.3847, -0.2178, -0.4643, 0.1936]) tensor([0.6014, 0.1211, 0.0947, 0.1828]) -Greedy action tensor([ 0.7900, -0.4507, -0.1690, 0.2351]) tensor([0.4451, 0.1287, 0.1706, 0.2555]) -Greedy action tensor([ 1.2654, -0.3921, -0.5565, 0.1864]) tensor([0.5909, 0.1126, 0.0956, 0.2009]) -Greedy action tensor([ 1.5765, -0.6677, -0.9191, 0.2268]) tensor([0.6907, 0.0732, 0.0570, 0.1791]) -Greedy action tensor([ 1.2180, -0.4514, -0.0524, 0.2207]) tensor([0.5441, 0.1025, 0.1527, 0.2007]) -Greedy action tensor([ 0.5775, -0.3095, 0.1324, -0.0996]) tensor([0.3905, 0.1608, 0.2502, 0.1984]) -Greedy action tensor([ 1.5562, 0.5774, -0.3341, 0.3368]) tensor([0.5488, 0.2062, 0.0829, 0.1621]) -Greedy action tensor([ 1.7888, -1.0213, -0.0154, 0.9113]) tensor([0.6095, 0.0367, 0.1003, 0.2534]) -Greedy action tensor([ 2.2020, -0.8223, -0.2247, 0.5209]) tensor([0.7558, 0.0367, 0.0668, 0.1407]) -Greedy action tensor([ 1.1686, -0.4350, -0.3692, 0.2342]) tensor([0.5528, 0.1112, 0.1188, 0.2172]) -Greedy action tensor([ 1.2421, -0.3617, -0.8002, 0.4004]) tensor([0.5676, 0.1142, 0.0736, 0.2446]) -Greedy action tensor([ 1.2348, -0.0308, -0.6234, 0.0392]) tensor([0.5745, 0.1621, 0.0896, 0.1738]) -Greedy action tensor([ 1.4759, -0.4859, -0.3411, 0.3879]) tensor([0.6098, 0.0857, 0.0991, 0.2054]) -Greedy action tensor([ 1.3244, -0.1157, -0.2038, 0.1741]) tensor([0.5648, 0.1338, 0.1225, 0.1788]) -Greedy action tensor([ 1.0589, -0.0764, -0.5720, 0.1083]) tensor([0.5253, 0.1688, 0.1028, 0.2030]) -Greedy action tensor([ 2.0338, -0.5556, 0.3754, -0.3062]) tensor([0.7343, 0.0551, 0.1398, 0.0707]) -Greedy action tensor([ 1.0427, -0.3463, -0.8032, 0.5007]) tensor([0.5028, 0.1254, 0.0794, 0.2924]) -Greedy action tensor([ 1.3502, -0.4028, -0.5722, 0.7466]) tensor([0.5358, 0.0928, 0.0784, 0.2930]) -Greedy action tensor([ 1.1957, -0.8302, 0.0238, 0.2478]) tensor([0.5467, 0.0721, 0.1694, 0.2119]) -Greedy action tensor([ 1.6612, -0.6004, -0.4366, 0.2004]) tensor([0.6854, 0.0714, 0.0841, 0.1591]) -Greedy action tensor([ 1.2461, -0.2498, -0.4115, 0.6796]) tensor([0.5045, 0.1130, 0.0962, 0.2863]) -Greedy action tensor([ 1.3335, -0.4721, -0.4981, 0.2399]) tensor([0.6026, 0.0991, 0.0965, 0.2019]) -Greedy action tensor([ 1.5238, -0.0321, -0.3382, 0.4617]) tensor([0.5841, 0.1232, 0.0907, 0.2019]) -Greedy action tensor([ 1.5510, -0.6482, -0.2754, 0.3469]) tensor([0.6362, 0.0705, 0.1024, 0.1908]) -Greedy action tensor([ 0.7411, -0.2946, -0.4736, 0.1941]) tensor([0.4483, 0.1592, 0.1331, 0.2595]) -Greedy action tensor([ 1.6896, -0.5017, -0.3174, 0.6732]) tensor([0.6219, 0.0695, 0.0836, 0.2251]) -Greedy action tensor([ 1.1289, -0.4246, -0.4649, 0.5621]) tensor([0.5045, 0.1067, 0.1025, 0.2863]) -Greedy action tensor([ 1.4072, -0.7272, 0.0519, 0.4893]) tensor([0.5632, 0.0666, 0.1452, 0.2249]) -Greedy action tensor([ 1.4426, -0.5211, -0.4259, 0.3306]) tensor([0.6159, 0.0864, 0.0951, 0.2026]) -Greedy action tensor([ 1.5113, -0.4904, -0.5756, 0.4776]) tensor([0.6192, 0.0837, 0.0768, 0.2203]) -Greedy action tensor([ 1.6355, -0.5978, -0.0517, 0.2963]) tensor([0.6434, 0.0690, 0.1191, 0.1686]) -Greedy action tensor([ 1.0664, -0.2763, -0.1891, 0.6553]) tensor([0.4527, 0.1182, 0.1290, 0.3001]) -Greedy action tensor([ 1.3130, -0.7834, -0.2463, 0.5288]) tensor([0.5588, 0.0687, 0.1175, 0.2551]) -Greedy action tensor([ 1.8843, -0.3496, -0.8129, 0.8272]) tensor([0.6571, 0.0704, 0.0443, 0.2283]) -Greedy action tensor([ 1.2566, -0.4475, -0.6862, 0.1747]) tensor([0.6009, 0.1093, 0.0861, 0.2037]) -Greedy action tensor([ 1.3226, -0.1539, -1.2331, 0.6062]) tensor([0.5572, 0.1273, 0.0433, 0.2722]) -Greedy action tensor([ 1.5158, 0.2514, -0.2661, -0.2592]) tensor([0.6172, 0.1743, 0.1039, 0.1046]) -Greedy action tensor([ 1.6256, -0.0787, -0.5391, 0.4823]) tensor([0.6190, 0.1126, 0.0711, 0.1973]) -Greedy action tensor([ 1.6552, -0.7914, -0.2068, 0.0579]) tensor([0.6923, 0.0599, 0.1076, 0.1402]) -Greedy action tensor([ 2.0202, -1.1873, 0.0622, 0.4185]) tensor([0.7230, 0.0293, 0.1020, 0.1457]) -Greedy action tensor([ 1.7250, -0.5925, -0.7294, 0.2289]) tensor([0.7100, 0.0699, 0.0610, 0.1590]) -Greedy action tensor([ 1.9846, -0.8690, -0.4429, 0.2436]) tensor([0.7569, 0.0436, 0.0668, 0.1327]) -Greedy action tensor([ 1.4100, -0.8265, -0.2761, 0.3726]) tensor([0.6074, 0.0649, 0.1125, 0.2152]) -Greedy action tensor([ 0.7690, -0.5819, -0.1534, -0.4571]) tensor([0.5128, 0.1328, 0.2039, 0.1505]) -Greedy action tensor([ 0.8847, -0.5468, 0.1670, -0.5570]) tensor([0.5093, 0.1217, 0.2485, 0.1205]) -Greedy action tensor([ 0.8813, -0.7440, 0.0475, -0.6177]) tensor([0.5392, 0.1061, 0.2342, 0.1204]) -Greedy action tensor([ 0.7566, -0.3127, 0.0374, -0.2574]) tensor([0.4560, 0.1565, 0.2221, 0.1654]) -Greedy action tensor([ 0.5787, 0.0487, 0.0388, -0.4941]) tensor([0.3979, 0.2342, 0.2319, 0.1361]) -Greedy action tensor([ 0.0646, 0.3163, 0.0090, -0.2514]) tensor([0.2525, 0.3247, 0.2388, 0.1841]) -Greedy action tensor([ 0.9399, -0.6117, -0.0391, -0.3834]) tensor([0.5394, 0.1143, 0.2026, 0.1436]) -Greedy action tensor([ 3.0581e-01, 1.5692e-01, 2.8789e-04, -2.4692e-02]) tensor([0.3015, 0.2598, 0.2221, 0.2166]) -Greedy action tensor([ 0.8538, -0.3408, 0.1715, -0.3321]) tensor([0.4731, 0.1433, 0.2391, 0.1445]) -Greedy action tensor([ 0.7506, -0.5911, -0.0870, -0.2608]) tensor([0.4859, 0.1270, 0.2103, 0.1767]) -Greedy action tensor([ 1.2235, -0.8788, 0.0096, -0.8027]) tensor([0.6447, 0.0788, 0.1915, 0.0850]) -Greedy action tensor([ 0.9304, -0.8763, 0.0605, -0.5253]) tensor([0.5505, 0.0904, 0.2307, 0.1284]) -Greedy action tensor([ 0.7469, -0.6849, -0.0414, -0.4870]) tensor([0.5039, 0.1204, 0.2291, 0.1467]) -Greedy action tensor([ 0.9857, -0.1349, -0.0156, -0.0445]) tensor([0.4877, 0.1590, 0.1792, 0.1741]) -Greedy action tensor([ 0.5947, -0.2789, -0.1874, -0.1081]) tensor([0.4219, 0.1761, 0.1930, 0.2089]) -Greedy action tensor([ 0.2435, 0.0375, 0.0088, -0.1568]) tensor([0.3054, 0.2485, 0.2415, 0.2046]) -Greedy action tensor([ 0.8211, -0.5735, -0.1778, -0.2697]) tensor([0.5122, 0.1270, 0.1887, 0.1721]) -Greedy action tensor([ 0.9173, -0.4027, -0.1537, -0.1131]) tensor([0.5085, 0.1358, 0.1742, 0.1815]) -Greedy action tensor([ 0.7466, -0.5452, 0.1319, -0.2804]) tensor([0.4601, 0.1264, 0.2488, 0.1647]) -Greedy action tensor([ 0.7926, -0.5949, -0.0069, -0.4477]) tensor([0.5029, 0.1256, 0.2261, 0.1455]) -Greedy action tensor([ 0.6855, -0.3594, -0.1012, -0.1237]) tensor([0.4440, 0.1562, 0.2022, 0.1977]) -Greedy action tensor([ 0.8288, -0.3900, -0.1224, -0.2730]) tensor([0.4965, 0.1468, 0.1918, 0.1650]) -Greedy action tensor([ 0.9656, -0.5762, -0.0932, -0.4431]) tensor([0.5539, 0.1185, 0.1921, 0.1354]) -Greedy action tensor([ 0.3094, -0.0172, -0.0735, -0.1658]) tensor([0.3306, 0.2385, 0.2254, 0.2055]) -Greedy action tensor([ 0.6813, -0.5758, -0.0962, -0.1640]) tensor([0.4601, 0.1309, 0.2114, 0.1976]) -Greedy action tensor([ 1.1817, -0.7969, -0.1615, -0.3658]) tensor([0.6203, 0.0858, 0.1619, 0.1320]) -Greedy action tensor([ 0.8976, -0.7159, -0.0664, -0.1527]) tensor([0.5180, 0.1032, 0.1976, 0.1812]) -Greedy action tensor([ 1.0051, -0.6430, -0.1721, -0.3863]) tensor([0.5717, 0.1100, 0.1761, 0.1422]) -Greedy action tensor([ 0.7515, -0.4512, 0.0372, -0.2155]) tensor([0.4608, 0.1384, 0.2256, 0.1752]) -Greedy action tensor([ 0.3606, 0.2082, -0.1640, 0.1000]) tensor([0.3105, 0.2666, 0.1837, 0.2392]) -Greedy action tensor([ 1.5195, -0.6419, 0.0076, -0.4095]) tensor([0.6752, 0.0778, 0.1489, 0.0981]) -Greedy action tensor([ 0.3776, -0.2337, -0.0774, -0.0195]) tensor([0.3510, 0.1904, 0.2227, 0.2359]) -Greedy action tensor([ 0.8956, -0.8211, -0.3333, -0.7040]) tensor([0.5973, 0.1073, 0.1748, 0.1206]) -Greedy action tensor([ 0.6943, -0.4161, -0.0644, -0.0177]) tensor([0.4370, 0.1440, 0.2046, 0.2144]) -Greedy action tensor([ 0.3952, -0.2415, 0.0184, -0.6529]) tensor([0.3898, 0.2062, 0.2674, 0.1366]) -Greedy action tensor([ 0.9101, -0.2953, -0.0539, -0.1266]) tensor([0.4913, 0.1472, 0.1874, 0.1742]) -Greedy action tensor([ 0.2776, 0.3033, -0.1912, -0.1340]) tensor([0.3017, 0.3096, 0.1888, 0.1999]) -Greedy action tensor([ 0.8732, -0.5772, -0.0305, -0.4022]) tensor([0.5211, 0.1222, 0.2111, 0.1456]) -Greedy action tensor([ 0.4343, -0.1007, -0.0173, -0.2863]) tensor([0.3692, 0.2162, 0.2350, 0.1796]) -Greedy action tensor([ 0.9722, -0.7460, 0.0525, -0.6746]) tensor([0.5648, 0.1013, 0.2251, 0.1088]) -Greedy action tensor([ 1.1161, -0.6210, -0.0462, -0.5720]) tensor([0.5975, 0.1052, 0.1869, 0.1105]) -Greedy action tensor([ 0.7041, -0.3333, -0.0882, -0.2064]) tensor([0.4526, 0.1604, 0.2049, 0.1821]) -Greedy action tensor([ 0.3828, -0.0755, -0.0480, -0.0695]) tensor([0.3426, 0.2167, 0.2227, 0.2180]) -Greedy action tensor([ 1.0931, -0.8429, 0.1271, -0.4582]) tensor([0.5758, 0.0831, 0.2191, 0.1220]) -Greedy action tensor([ 0.8111, -0.2307, -0.1848, -0.4962]) tensor([0.5018, 0.1770, 0.1854, 0.1358]) -Greedy action tensor([ 0.9199, -0.8944, 0.0932, -0.4107]) tensor([0.5363, 0.0874, 0.2346, 0.1417]) -Greedy action tensor([ 0.3337, 0.3548, -0.1618, 0.1095]) tensor([0.2916, 0.2978, 0.1776, 0.2330]) -Greedy action tensor([ 0.7956, -0.6459, 0.0790, -0.1980]) tensor([0.4773, 0.1129, 0.2331, 0.1767]) -Greedy action tensor([ 0.9880, -0.7099, 0.1924, -0.3675]) tensor([0.5285, 0.0968, 0.2385, 0.1363]) -Greedy action tensor([ 0.6823, -0.3238, -0.0561, -0.1909]) tensor([0.4422, 0.1617, 0.2113, 0.1847]) -Greedy action tensor([ 0.4687, -0.4867, -0.1095, -0.1771]) tensor([0.4049, 0.1557, 0.2271, 0.2123]) -Greedy action tensor([ 0.3947, -0.0947, -0.1394, -0.0685]) tensor([0.3536, 0.2167, 0.2073, 0.2225]) -Greedy action tensor([ 0.4883, -0.4371, -0.1241, -0.2240]) tensor([0.4117, 0.1632, 0.2232, 0.2019]) -Greedy action tensor([ 0.7223, -0.5199, 0.0179, -0.5215]) tensor([0.4827, 0.1394, 0.2387, 0.1392]) -Greedy action tensor([ 0.9259, -0.6385, -0.0281, -0.3426]) tensor([0.5332, 0.1115, 0.2054, 0.1499]) -Greedy action tensor([ 0.8226, -0.2506, 0.1231, -0.2102]) tensor([0.4556, 0.1558, 0.2264, 0.1622]) -Greedy action tensor([ 0.8960, -0.4546, -0.1188, -0.2234]) tensor([0.5133, 0.1330, 0.1861, 0.1676]) -Greedy action tensor([ 0.8749, -0.4638, 0.0089, -0.3117]) tensor([0.5030, 0.1319, 0.2116, 0.1535]) -Greedy action tensor([ 0.6535, -0.1460, -0.0069, -0.3030]) tensor([0.4255, 0.1913, 0.2198, 0.1635]) -Greedy action tensor([ 0.4177, -0.3682, -0.1436, -0.1164]) tensor([0.3828, 0.1744, 0.2184, 0.2244]) -Greedy action tensor([ 0.4524, 0.1607, -0.1149, -0.0803]) tensor([0.3447, 0.2575, 0.1955, 0.2023]) -Greedy action tensor([ 1.1467, -0.5033, 0.0345, -0.8413]) tensor([0.6032, 0.1159, 0.1983, 0.0826]) -Greedy action tensor([ 0.7284, 0.0254, -0.2000, -0.0589]) tensor([0.4264, 0.2111, 0.1685, 0.1940]) -Greedy action tensor([ 0.7109, -0.2654, 0.0164, -0.1372]) tensor([0.4340, 0.1635, 0.2167, 0.1858]) -Greedy action tensor([ 0.7300, -0.0988, -0.0535, -0.0766]) tensor([0.4274, 0.1866, 0.1952, 0.1908]) -Greedy action tensor([ 0.7399, -0.7140, -0.1989, -0.3357]) tensor([0.5087, 0.1189, 0.1989, 0.1735]) -Greedy action tensor([ 0.7113, -0.4948, -0.0051, -0.3075]) tensor([0.4653, 0.1393, 0.2273, 0.1680]) -Greedy action tensor([ 0.8535, -0.4283, 0.0277, -0.3391]) tensor([0.4953, 0.1375, 0.2169, 0.1503]) -Greedy action tensor([ 0.9250, -0.9147, 0.4106, -0.6172]) tensor([0.5074, 0.0806, 0.3034, 0.1085]) -Greedy action tensor([ 0.9272, -0.7499, -0.0062, -0.4781]) tensor([0.5478, 0.1024, 0.2154, 0.1344]) -Greedy action tensor([ 0.6049, -0.5551, 0.0139, -0.4289]) tensor([0.4499, 0.1410, 0.2491, 0.1600]) -Greedy action tensor([ 0.6091, -0.3289, -0.0998, -0.0487]) tensor([0.4164, 0.1630, 0.2049, 0.2157]) -Greedy action tensor([ 0.4376, -0.1537, -0.0566, -0.1003]) tensor([0.3640, 0.2015, 0.2220, 0.2125]) -Greedy action tensor([ 0.7430, -0.3367, -0.0012, -0.3362]) tensor([0.4641, 0.1577, 0.2205, 0.1577]) -Greedy action tensor([ 0.7451, -0.3732, -0.0284, 0.0119]) tensor([0.4408, 0.1441, 0.2034, 0.2117]) -Greedy action tensor([ 0.7528, -0.4622, 0.0272, -0.4132]) tensor([0.4779, 0.1418, 0.2313, 0.1489]) -Greedy action tensor([ 1.0734, -0.9293, 0.0500, -0.3049]) tensor([0.5726, 0.0773, 0.2058, 0.1443]) -Greedy action tensor([ 0.9750, -0.3418, -0.1732, -0.3114]) tensor([0.5372, 0.1440, 0.1704, 0.1484]) -Greedy action tensor([ 0.3123, 0.1591, -0.0318, -0.3242]) tensor([0.3230, 0.2771, 0.2290, 0.1709]) -Greedy action tensor([ 0.5621, -0.5232, -0.2126, -0.2296]) tensor([0.4441, 0.1500, 0.2047, 0.2012]) -Greedy action tensor([ 0.7340, -0.4796, -0.0528, -0.3032]) tensor([0.4746, 0.1410, 0.2161, 0.1682]) -Greedy action tensor([-1.6584, -0.3736, 0.5466, 0.0332]) tensor([0.0523, 0.1891, 0.4746, 0.2840]) -Greedy action tensor([-1.3623, -0.4120, 0.5129, 0.1287]) tensor([0.0687, 0.1778, 0.4482, 0.3053]) -Greedy action tensor([-1.9267, -0.4107, 0.6570, -0.1700]) tensor([0.0407, 0.1852, 0.5386, 0.2356]) -Greedy action tensor([-1.8862, -0.4355, 0.6688, -0.1422]) tensor([0.0419, 0.1788, 0.5395, 0.2398]) -Greedy action tensor([-1.9146, -0.3818, 0.6487, -0.1635]) tensor([0.0410, 0.1900, 0.5326, 0.2364]) -Greedy action tensor([-1.8732, -0.4369, 0.6183, -0.1421]) tensor([0.0436, 0.1834, 0.5268, 0.2462]) -Greedy action tensor([-1.8504, -0.4591, 0.6251, -0.1280]) tensor([0.0444, 0.1786, 0.5282, 0.2488]) -Greedy action tensor([-1.9299, -0.4445, 0.6694, -0.1701]) tensor([0.0405, 0.1789, 0.5451, 0.2354]) -Greedy action tensor([-1.3755, -0.2136, 0.3844, -0.0535]) tensor([0.0727, 0.2323, 0.4224, 0.2726]) -Greedy action tensor([-1.8840, -0.4584, 0.6439, -0.1474]) tensor([0.0428, 0.1781, 0.5361, 0.2430]) -Greedy action tensor([-1.7591, -0.4704, 0.5789, -0.0904]) tensor([0.0493, 0.1788, 0.5105, 0.2614]) -Greedy action tensor([-1.6827, -0.1668, 0.5009, -0.0761]) tensor([0.0515, 0.2345, 0.4572, 0.2568]) -Greedy action tensor([-1.9322, -0.4117, 0.6554, -0.1705]) tensor([0.0405, 0.1852, 0.5385, 0.2358]) -Greedy action tensor([-1.9335, -0.4332, 0.6607, -0.1743]) tensor([0.0405, 0.1817, 0.5424, 0.2354]) -Greedy action tensor([-0.7018, -1.2166, 0.1557, -0.5954]) tensor([0.1974, 0.1179, 0.4652, 0.2195]) -Greedy action tensor([-1.5516, 0.4425, 0.3265, 0.0082]) tensor([0.0509, 0.3739, 0.3330, 0.2422]) -Greedy action tensor([-1.6110, -0.4022, 0.5427, 0.0574]) tensor([0.0547, 0.1833, 0.4716, 0.2903]) -Greedy action tensor([-0.9263, 0.8647, 0.1045, 0.1409]) tensor([0.0787, 0.4719, 0.2206, 0.2288]) -Greedy action tensor([-1.1183, -0.6768, 0.4968, 0.2740]) tensor([0.0861, 0.1340, 0.4332, 0.3467]) -Greedy action tensor([-1.9109, -0.4239, 0.6473, -0.1590]) tensor([0.0415, 0.1835, 0.5357, 0.2392]) -Greedy action tensor([-1.3500, 0.6068, 0.2592, 0.0412]) tensor([0.0585, 0.4140, 0.2924, 0.2351]) -Greedy action tensor([-1.8433, -0.2535, 0.5969, -0.1248]) tensor([0.0436, 0.2136, 0.4999, 0.2429]) -Greedy action tensor([-0.5619, 1.0355, 0.0996, 0.1792]) tensor([0.1002, 0.4952, 0.1942, 0.2103]) -Greedy action tensor([-1.8460, -0.4608, 0.6241, -0.1326]) tensor([0.0447, 0.1786, 0.5286, 0.2480]) -Greedy action tensor([-1.1837, -0.1736, 0.3183, 0.0339]) tensor([0.0861, 0.2364, 0.3866, 0.2909]) -Greedy action tensor([-1.9042, -0.3874, 0.6442, -0.1501]) tensor([0.0415, 0.1889, 0.5301, 0.2395]) -Greedy action tensor([-1.8679, -0.3608, 0.6187, -0.1308]) tensor([0.0431, 0.1944, 0.5178, 0.2447]) -Greedy action tensor([-1.6354, -0.5557, 0.6555, -0.0034]) tensor([0.0528, 0.1554, 0.5218, 0.2700]) -Greedy action tensor([-1.8988, -0.4064, 0.6408, -0.1606]) tensor([0.0420, 0.1868, 0.5323, 0.2389]) -Greedy action tensor([-1.6034, 0.1694, 0.4191, 0.0710]) tensor([0.0506, 0.2976, 0.3821, 0.2697]) -Greedy action tensor([-1.6728, -0.4385, 0.5409, -0.0140]) tensor([0.0531, 0.1824, 0.4857, 0.2788]) -Greedy action tensor([-1.5096, -0.4802, 0.4672, 0.0734]) tensor([0.0629, 0.1762, 0.4544, 0.3065]) -Greedy action tensor([-1.9334, -0.4162, 0.6581, -0.1748]) tensor([0.0405, 0.1845, 0.5402, 0.2349]) -Greedy action tensor([-1.8053, -0.4891, 0.6011, -0.0871]) tensor([0.0467, 0.1743, 0.5185, 0.2605]) -Greedy action tensor([-1.7852, -0.1137, 0.5373, -0.0889]) tensor([0.0455, 0.2421, 0.4642, 0.2482]) -Greedy action tensor([-1.8491, -0.3977, 0.6126, -0.1420]) tensor([0.0444, 0.1897, 0.5209, 0.2449]) -Greedy action tensor([-1.8854, -0.4485, 0.6401, -0.1501]) tensor([0.0428, 0.1800, 0.5346, 0.2426]) -Greedy action tensor([-1.9316, -0.4517, 0.6726, -0.1693]) tensor([0.0404, 0.1775, 0.5465, 0.2355]) -Greedy action tensor([-1.8577, -0.3903, 0.6171, -0.1258]) tensor([0.0437, 0.1897, 0.5194, 0.2471]) -Greedy action tensor([-1.5235, -0.1912, 0.4796, -0.1223]) tensor([0.0615, 0.2330, 0.4558, 0.2497]) -Greedy action tensor([-1.9276, -0.4440, 0.6611, -0.1723]) tensor([0.0408, 0.1799, 0.5432, 0.2361]) -Greedy action tensor([-1.5456, -0.3700, 0.6497, -0.4483]) tensor([0.0617, 0.1998, 0.5538, 0.1847]) -Greedy action tensor([-1.5011, -0.4456, 0.5236, 0.1600]) tensor([0.0598, 0.1719, 0.4532, 0.3150]) -Greedy action tensor([-1.9290, -0.4438, 0.6594, -0.1723]) tensor([0.0408, 0.1801, 0.5428, 0.2363]) -Greedy action tensor([-1.6689, -0.5095, 0.5362, -0.0795]) tensor([0.0551, 0.1756, 0.4995, 0.2699]) -Greedy action tensor([-1.0933, 0.6041, 0.2614, -0.1213]) tensor([0.0770, 0.4207, 0.2986, 0.2037]) -Greedy action tensor([-1.6118, -0.3470, 0.4895, -0.0086]) tensor([0.0565, 0.2003, 0.4623, 0.2809]) -Greedy action tensor([-1.4729, 0.0426, 0.5149, -0.6554]) tensor([0.0662, 0.3011, 0.4829, 0.1498]) -Greedy action tensor([-1.9263, -0.4400, 0.6591, -0.1694]) tensor([0.0408, 0.1806, 0.5419, 0.2367]) -Greedy action tensor([-1.7780, -0.3192, 0.5597, -0.0520]) tensor([0.0470, 0.2021, 0.4868, 0.2640]) -Greedy action tensor([-1.9358, -0.4380, 0.6632, -0.1753]) tensor([0.0404, 0.1808, 0.5437, 0.2351]) -Greedy action tensor([-1.9231, -0.4134, 0.6540, -0.1596]) tensor([0.0408, 0.1846, 0.5367, 0.2379]) -Greedy action tensor([-1.9257, -0.4622, 0.6743, -0.1638]) tensor([0.0406, 0.1756, 0.5471, 0.2366]) -Greedy action tensor([-1.7066, 0.0948, 0.4184, -0.0634]) tensor([0.0485, 0.2941, 0.4064, 0.2510]) -Greedy action tensor([-1.9458, -0.4488, 0.6675, -0.1813]) tensor([0.0401, 0.1791, 0.5468, 0.2340]) -Greedy action tensor([-1.0873, -0.2491, 0.2464, 0.0079]) tensor([0.0990, 0.2290, 0.3759, 0.2961]) -Greedy action tensor([-0.9997, 0.8320, 0.0895, 0.2275]) tensor([0.0734, 0.4582, 0.2181, 0.2503]) -Greedy action tensor([-1.8159, -0.0816, 0.5558, -0.1006]) tensor([0.0436, 0.2470, 0.4671, 0.2423]) -Greedy action tensor([-1.9328, -0.4396, 0.6614, -0.1740]) tensor([0.0406, 0.1806, 0.5432, 0.2356]) -Greedy action tensor([-1.4356, -0.5216, 0.4492, -0.0636]) tensor([0.0713, 0.1779, 0.4696, 0.2812]) -Greedy action tensor([-1.9477, -0.4531, 0.6669, -0.1829]) tensor([0.0401, 0.1786, 0.5473, 0.2340]) -Greedy action tensor([-1.5547, -0.3781, 0.7000, -0.5225]) tensor([0.0603, 0.1956, 0.5748, 0.1693]) -Greedy action tensor([-1.9293, -0.4293, 0.6532, -0.1707]) tensor([0.0408, 0.1828, 0.5396, 0.2368]) -Greedy action tensor([-1.7091e+00, 6.0161e-02, 4.6179e-01, 5.5379e-04]) tensor([0.0473, 0.2773, 0.4143, 0.2612]) -Greedy action tensor([-1.8408, -0.4258, 0.6087, -0.1374]) tensor([0.0451, 0.1855, 0.5219, 0.2475]) -Greedy action tensor([-1.9052, -0.4106, 0.6598, -0.1532]) tensor([0.0413, 0.1840, 0.5367, 0.2380]) -Greedy action tensor([-1.8508, -0.4429, 0.6283, -0.1293]) tensor([0.0442, 0.1808, 0.5276, 0.2474]) -Greedy action tensor([-1.7941, -0.3855, 0.6013, -0.0672]) tensor([0.0461, 0.1886, 0.5060, 0.2593]) -Greedy action tensor([-0.6659, 0.8543, 0.1488, 0.6287]) tensor([0.0871, 0.3983, 0.1967, 0.3179]) -Greedy action tensor([-1.9216, -0.4381, 0.6557, -0.1684]) tensor([0.0411, 0.1811, 0.5407, 0.2372]) -Greedy action tensor([-1.9181, -0.3579, 0.6406, -0.1582]) tensor([0.0408, 0.1944, 0.5275, 0.2373]) -Greedy action tensor([-1.8557, -0.4461, 0.6304, -0.1270]) tensor([0.0440, 0.1800, 0.5283, 0.2477]) -Greedy action tensor([-1.9102, -0.4520, 0.6519, -0.1662]) tensor([0.0417, 0.1792, 0.5406, 0.2385]) -Greedy action tensor([-1.6486, -0.3386, 0.5084, -0.0331]) tensor([0.0544, 0.2016, 0.4703, 0.2737]) -Greedy action tensor([-1.0817, 0.4616, 0.1693, 0.0956]) tensor([0.0805, 0.3768, 0.2813, 0.2613]) -Greedy action tensor([-0.8640, 0.2480, -0.4351, -0.4941]) tensor([0.1424, 0.4329, 0.2186, 0.2061]) -Greedy action tensor([-1.9353, -0.4347, 0.6611, -0.1752]) tensor([0.0405, 0.1815, 0.5429, 0.2352]) -Greedy action tensor([-1.8711, -0.4485, 0.6355, -0.1426]) tensor([0.0434, 0.1800, 0.5322, 0.2444]) -Greedy action tensor([-1.9151, -0.4427, 0.6695, -0.1608]) tensor([0.0410, 0.1787, 0.5434, 0.2369]) -Greedy action tensor([-1.9015, -0.4466, 0.6504, -0.1562]) tensor([0.0419, 0.1797, 0.5382, 0.2402]) -Greedy action tensor([-1.9244, -0.4264, 0.6555, -0.1700]) tensor([0.0409, 0.1829, 0.5397, 0.2364]) -Greedy action tensor([ 0.3111, -0.8116, 1.1209, 0.0151]) tensor([0.2317, 0.0754, 0.5206, 0.1723]) -Greedy action tensor([-0.4145, -0.1656, -0.0878, -0.4397]) tensor([0.2153, 0.2762, 0.2985, 0.2100]) -Greedy action tensor([-0.1888, -0.5506, 1.2434, -0.2739]) tensor([0.1470, 0.1024, 0.6156, 0.1350]) -Greedy action tensor([ 0.0442, -0.7745, -0.6384, -0.1142]) tensor([0.3572, 0.1575, 0.1805, 0.3048]) -Greedy action tensor([-1.1965, -0.2053, -0.3134, 0.2632]) tensor([0.0960, 0.2586, 0.2321, 0.4132]) -Greedy action tensor([-0.2563, 0.4706, -0.7000, -0.4494]) tensor([0.2205, 0.4562, 0.1415, 0.1818]) -Greedy action tensor([ 0.5510, -0.7319, 1.0321, -0.4965]) tensor([0.3081, 0.0854, 0.4984, 0.1081]) -Greedy action tensor([-0.4449, -0.4429, -0.4985, -0.6970]) tensor([0.2683, 0.2689, 0.2543, 0.2085]) -Greedy action tensor([ 0.3612, -0.4395, -0.1794, -0.1663]) tensor([0.3815, 0.1713, 0.2222, 0.2251]) -Greedy action tensor([-0.2511, -0.6480, -0.9320, -0.4308]) tensor([0.3318, 0.2231, 0.1679, 0.2772]) -Greedy action tensor([ 0.5629, -1.7481, 0.0261, 0.5750]) tensor([0.3709, 0.0368, 0.2168, 0.3754]) -Greedy action tensor([-0.2403, 0.2402, -0.1945, 0.6235]) tensor([0.1657, 0.2679, 0.1734, 0.3930]) -Greedy action tensor([-0.1225, -1.4873, 0.9395, -1.2100]) tensor([0.2230, 0.0570, 0.6449, 0.0752]) -Greedy action tensor([ 0.3892, -1.2793, 0.5273, -0.5559]) tensor([0.3669, 0.0692, 0.4213, 0.1426]) -Greedy action tensor([-0.3910, -0.1308, -0.8479, 0.0872]) tensor([0.2201, 0.2855, 0.1394, 0.3550]) -Greedy action tensor([-0.0555, -1.0626, 0.0721, -0.0985]) tensor([0.2891, 0.1056, 0.3284, 0.2769]) -Greedy action tensor([ 0.2584, -0.1811, -0.7625, -0.4907]) tensor([0.4036, 0.2601, 0.1454, 0.1908]) -Greedy action tensor([-0.9388, -0.2437, 0.0825, -0.6866]) tensor([0.1415, 0.2835, 0.3929, 0.1821]) -Greedy action tensor([-0.1820, -0.2216, 0.5740, -0.1209]) tensor([0.1940, 0.1865, 0.4132, 0.2063]) -Greedy action tensor([ 0.0139, -0.6891, -0.6408, 0.2797]) tensor([0.3013, 0.1492, 0.1565, 0.3930]) -Greedy action tensor([-0.7533, -1.6065, 0.0975, -0.3470]) tensor([0.1898, 0.0809, 0.4444, 0.2849]) -Greedy action tensor([-0.3369, 0.4849, 0.9223, -0.7929]) tensor([0.1346, 0.3061, 0.4740, 0.0853]) -Greedy action tensor([ 0.4289, -1.8080, -0.6343, 0.3471]) tensor([0.4213, 0.0450, 0.1455, 0.3882]) -Greedy action tensor([-0.2594, 0.7767, -0.7051, -0.7095]) tensor([0.1962, 0.5530, 0.1257, 0.1251]) -Greedy action tensor([ 0.0545, -0.7782, -0.3384, -0.7366]) tensor([0.3901, 0.1697, 0.2634, 0.1769]) -Greedy action tensor([-0.2914, -0.6119, -0.4618, -0.6031]) tensor([0.3029, 0.2198, 0.2555, 0.2218]) -Greedy action tensor([ 1.2026, -0.0313, 0.4722, 0.1881]) tensor([0.4683, 0.1363, 0.2256, 0.1698]) -Greedy action tensor([ 0.5018, -0.0777, -0.4778, -0.7752]) tensor([0.4516, 0.2530, 0.1695, 0.1259]) -Greedy action tensor([-0.0041, -0.4855, -0.9368, 0.3599]) tensor([0.2898, 0.1791, 0.1140, 0.4171]) -Greedy action tensor([-0.3470, -0.7032, 0.2779, -0.8310]) tensor([0.2390, 0.1674, 0.4464, 0.1473]) -Greedy action tensor([-0.5946, -0.5746, -1.0451, -0.4445]) tensor([0.2618, 0.2671, 0.1669, 0.3042]) -Greedy action tensor([ 1.4326, -1.0598, -0.2108, 1.1659]) tensor([0.4897, 0.0405, 0.0947, 0.3751]) -Greedy action tensor([-0.9747, 0.8221, 0.1139, -0.7018]) tensor([0.0884, 0.5330, 0.2625, 0.1161]) -Greedy action tensor([ 0.0187, -0.2382, -0.1002, 0.3141]) tensor([0.2497, 0.1931, 0.2217, 0.3355]) -Greedy action tensor([ 0.6759, -0.1307, -0.0298, 0.1052]) tensor([0.3992, 0.1782, 0.1971, 0.2256]) -Greedy action tensor([ 0.0434, -0.7527, 0.7503, -0.4011]) tensor([0.2427, 0.1095, 0.4922, 0.1556]) -Greedy action tensor([ 1.2525, -0.3221, -0.2921, 1.4639]) tensor([0.3765, 0.0780, 0.0803, 0.4652]) -Greedy action tensor([ 0.2316, -0.9198, 0.7663, 0.0753]) tensor([0.2578, 0.0815, 0.4401, 0.2205]) -Greedy action tensor([ 0.7879, 0.4741, 0.0245, -0.1058]) tensor([0.3838, 0.2804, 0.1789, 0.1570]) -Greedy action tensor([ 1.1941, -1.4004, 0.6119, -0.4435]) tensor([0.5471, 0.0409, 0.3057, 0.1064]) -Greedy action tensor([-0.1872, -1.2188, -0.1683, -0.2000]) tensor([0.2974, 0.1060, 0.3030, 0.2936]) -Greedy action tensor([ 0.9341, -0.0087, 0.0164, -0.2388]) tensor([0.4765, 0.1856, 0.1903, 0.1475]) -Greedy action tensor([ 0.1999, 0.3515, -0.3455, 0.2200]) tensor([0.2657, 0.3092, 0.1540, 0.2711]) -Greedy action tensor([ 0.4148, -0.7310, 0.5573, 0.5873]) tensor([0.2733, 0.0869, 0.3151, 0.3247]) -Greedy action tensor([ 0.2929, -0.5722, 0.5428, 0.1868]) tensor([0.2774, 0.1168, 0.3562, 0.2495]) -Greedy action tensor([-0.4871, -0.2247, 0.5842, -1.1619]) tensor([0.1746, 0.2270, 0.5096, 0.0889]) -Greedy action tensor([0.3080, 0.0965, 1.8894, 0.4507]) tensor([0.1278, 0.1034, 0.6213, 0.1474]) -Greedy action tensor([-1.4498, -0.1519, 0.8559, -1.4357]) tensor([0.0637, 0.2331, 0.6386, 0.0646]) -Greedy action tensor([-0.1333, 0.7723, -0.8147, -0.1284]) tensor([0.2006, 0.4962, 0.1015, 0.2016]) -Greedy action tensor([ 0.4329, -0.8984, 1.6148, 0.3835]) tensor([0.1826, 0.0482, 0.5954, 0.1738]) -Greedy action tensor([ 0.1542, -0.3283, -0.3596, 0.5685]) tensor([0.2682, 0.1655, 0.1604, 0.4058]) -Greedy action tensor([-2.3768, -0.5546, 0.2406, -0.5935]) tensor([0.0373, 0.2305, 0.5105, 0.2217]) -Greedy action tensor([ 0.3586, -0.3571, 0.6412, 0.1343]) tensor([0.2767, 0.1353, 0.3670, 0.2211]) -Greedy action tensor([ 0.5442, -1.3742, 0.6321, -0.7434]) tensor([0.3977, 0.0584, 0.4342, 0.1097]) -Greedy action tensor([-0.1010, -1.5653, 0.4210, -0.6223]) tensor([0.2849, 0.0659, 0.4801, 0.1691]) -Greedy action tensor([-0.1496, 0.3669, 0.5696, -0.7598]) tensor([0.1897, 0.3179, 0.3894, 0.1030]) -Greedy action tensor([-0.4161, -1.7935, -0.5230, 1.0115]) tensor([0.1582, 0.0399, 0.1422, 0.6596]) -Greedy action tensor([ 0.5023, -0.6342, -0.3179, 0.2774]) tensor([0.3906, 0.1254, 0.1720, 0.3120]) -Greedy action tensor([-0.1987, -0.2074, -1.5602, -0.4408]) tensor([0.3297, 0.3269, 0.0845, 0.2589]) -Greedy action tensor([ 1.5688, -0.6877, -0.7168, 1.1127]) tensor([0.5434, 0.0569, 0.0553, 0.3444]) -Greedy action tensor([ 0.9171, -0.5974, 0.6328, -0.5187]) tensor([0.4524, 0.0995, 0.3404, 0.1076]) -Greedy action tensor([-0.1640, -1.3537, -0.6001, -0.1811]) tensor([0.3408, 0.1037, 0.2204, 0.3351]) -Greedy action tensor([ 0.2594, -0.0879, 0.6770, -0.5003]) tensor([0.2708, 0.1913, 0.4112, 0.1267]) -Greedy action tensor([ 0.7475, 0.2147, -0.1021, -0.2883]) tensor([0.4220, 0.2477, 0.1805, 0.1498]) -Greedy action tensor([-0.1201, 1.2544, -0.0149, -0.6104]) tensor([0.1498, 0.5921, 0.1664, 0.0917]) -Greedy action tensor([ 0.3742, -0.1887, 0.3584, -0.6550]) tensor([0.3435, 0.1956, 0.3381, 0.1227]) -Greedy action tensor([ 0.3480, 0.3625, -0.0907, 0.0540]) tensor([0.2937, 0.2980, 0.1894, 0.2189]) -Greedy action tensor([ 1.0460, 0.4472, -0.1308, 0.1093]) tensor([0.4445, 0.2442, 0.1370, 0.1742]) -Greedy action tensor([ 1.2076, -0.3542, -0.3487, 0.2097]) tensor([0.5589, 0.1172, 0.1179, 0.2060]) -Greedy action tensor([ 0.0722, -0.3124, 0.5865, 0.2631]) tensor([0.2191, 0.1492, 0.3665, 0.2652]) -Greedy action tensor([ 1.1686, 0.5091, -0.6763, 0.5895]) tensor([0.4473, 0.2313, 0.0707, 0.2507]) -Greedy action tensor([-0.0094, -0.1817, 0.9073, -0.5357]) tensor([0.2027, 0.1706, 0.5069, 0.1198]) -Greedy action tensor([ 0.3414, -0.8131, -0.2238, -0.4353]) tensor([0.4267, 0.1345, 0.2425, 0.1963]) -Greedy action tensor([ 0.4426, -0.1865, 0.0635, 0.1083]) tensor([0.3409, 0.1817, 0.2333, 0.2440]) -Greedy action tensor([ 1.4073, -0.2858, 0.9938, -0.0885]) tensor([0.4833, 0.0889, 0.3196, 0.1083]) -Greedy action tensor([-0.2643, -0.6475, -0.3320, -0.2653]) tensor([0.2766, 0.1886, 0.2585, 0.2763]) -Greedy action tensor([-1.0704, -0.6872, -1.1960, -0.2490]) tensor([0.1779, 0.2609, 0.1569, 0.4044]) -Greedy action tensor([-0.7299, -0.8985, 0.8289, -0.9997]) tensor([0.1358, 0.1148, 0.6457, 0.1037]) -Greedy action tensor([ 0.1905, -0.2727, -0.5670, -0.5582]) tensor([0.3889, 0.2447, 0.1824, 0.1840]) -Greedy action tensor([ 1.4996, -0.1452, 0.2740, 1.0188]) tensor([0.4751, 0.0917, 0.1395, 0.2937]) -Greedy action tensor([ 1.0838, 0.1579, -0.6600, 0.0259]) tensor([0.5213, 0.2065, 0.0912, 0.1810]) -Greedy action tensor([ 1.2624, 0.1166, -0.5614, 0.1509]) tensor([0.5529, 0.1758, 0.0893, 0.1820]) -Greedy action tensor([ 1.7409, -0.4474, -0.5812, 0.2613]) tensor([0.6955, 0.0780, 0.0682, 0.1584]) -Greedy action tensor([ 0.9575, -0.3034, 0.0802, -0.0432]) tensor([0.4838, 0.1371, 0.2012, 0.1779]) -Greedy action tensor([ 0.9863, -0.6862, 0.0488, -0.1165]) tensor([0.5232, 0.0982, 0.2049, 0.1737]) -Greedy action tensor([ 1.6418, -0.9765, -0.2378, -0.0165]) tensor([0.7062, 0.0515, 0.1078, 0.1345]) -Greedy action tensor([ 1.5852, -0.5728, -0.5144, 0.7309]) tensor([0.6011, 0.0695, 0.0736, 0.2558]) -Greedy action tensor([ 1.3699, -0.4837, -0.5043, 0.0850]) tensor([0.6302, 0.0987, 0.0967, 0.1744]) -Greedy action tensor([ 1.5032, -1.0592, -0.2692, 0.5121]) tensor([0.6180, 0.0477, 0.1050, 0.2294]) -Greedy action tensor([ 1.3918, -0.6292, -0.2040, 0.1231]) tensor([0.6186, 0.0820, 0.1254, 0.1740]) -Greedy action tensor([ 1.9075e+00, 5.4699e-01, -3.7279e-01, -1.5741e-03]) tensor([0.6636, 0.1702, 0.0679, 0.0984]) -Greedy action tensor([ 2.1685, -0.7285, -0.9205, 0.3032]) tensor([0.7964, 0.0440, 0.0363, 0.1233]) -Greedy action tensor([ 1.1762, -0.4236, -0.1266, 0.3195]) tensor([0.5268, 0.1064, 0.1432, 0.2237]) -Greedy action tensor([ 1.7982, -0.6573, -0.5176, 0.4987]) tensor([0.6863, 0.0589, 0.0677, 0.1871]) -Greedy action tensor([ 1.2279, -0.6259, 0.0130, 0.2739]) tensor([0.5439, 0.0852, 0.1614, 0.2095]) -Greedy action tensor([ 0.9752, -0.3778, 0.1047, 0.1388]) tensor([0.4738, 0.1225, 0.1984, 0.2053]) -Greedy action tensor([ 1.2827, -0.3701, -0.1365, 0.1130]) tensor([0.5734, 0.1098, 0.1387, 0.1780]) -Greedy action tensor([ 0.6921, -0.4270, -0.0692, 0.2504]) tensor([0.4104, 0.1340, 0.1917, 0.2639]) -Greedy action tensor([ 2.2745, -0.3686, -0.0915, 0.5646]) tensor([0.7430, 0.0529, 0.0697, 0.1344]) -Greedy action tensor([ 1.5793, 0.3415, -1.2530, -0.1800]) tensor([0.6574, 0.1907, 0.0387, 0.1132]) -Greedy action tensor([ 1.4710, -0.2008, -0.4913, 0.3792]) tensor([0.6009, 0.1129, 0.0845, 0.2017]) -Greedy action tensor([ 2.2683, -0.8989, -0.3782, 0.1514]) tensor([0.8108, 0.0341, 0.0575, 0.0976]) -Greedy action tensor([ 1.7686, -0.2432, -0.5295, 0.5414]) tensor([0.6547, 0.0876, 0.0658, 0.1919]) -Greedy action tensor([ 2.0851, -0.7176, -0.3359, 0.2141]) tensor([0.7672, 0.0465, 0.0682, 0.1181]) -Greedy action tensor([ 1.6228, -0.3727, -0.2583, -0.0131]) tensor([0.6743, 0.0917, 0.1028, 0.1313]) -Greedy action tensor([ 1.0920, -0.3480, -0.5734, 0.2592]) tensor([0.5374, 0.1273, 0.1016, 0.2337]) -Greedy action tensor([ 2.3005, 0.0414, -0.3768, 0.0618]) tensor([0.7814, 0.0816, 0.0537, 0.0833]) -Greedy action tensor([ 2.0877, -0.3227, -0.1526, 0.5665]) tensor([0.7069, 0.0635, 0.0752, 0.1544]) -Greedy action tensor([ 1.6285, -0.6237, -0.2356, 0.0718]) tensor([0.6798, 0.0715, 0.1054, 0.1433]) -Greedy action tensor([ 1.3756, -0.3316, -0.2718, 0.0912]) tensor([0.6058, 0.1099, 0.1166, 0.1677]) -Greedy action tensor([ 1.4398, -0.6704, -0.4053, 0.0175]) tensor([0.6577, 0.0797, 0.1039, 0.1586]) -Greedy action tensor([ 1.8784, -0.3377, -0.4044, 0.6150]) tensor([0.6695, 0.0730, 0.0683, 0.1892]) -Greedy action tensor([ 1.2254, -0.0048, -1.2290, -0.0915]) tensor([0.6075, 0.1775, 0.0522, 0.1628]) -Greedy action tensor([ 1.3371, 0.0945, -0.4666, 0.4696]) tensor([0.5338, 0.1541, 0.0879, 0.2242]) -Greedy action tensor([ 1.4267, -0.6148, -0.4191, 0.2329]) tensor([0.6286, 0.0816, 0.0993, 0.1905]) -Greedy action tensor([ 1.9325, -0.7330, -0.7591, 0.1196]) tensor([0.7689, 0.0535, 0.0521, 0.1255]) -Greedy action tensor([ 2.2727, 0.3097, -0.5173, 0.1310]) tensor([0.7580, 0.1065, 0.0466, 0.0890]) -Greedy action tensor([ 1.5122, 0.0261, -0.3715, 0.1342]) tensor([0.6133, 0.1388, 0.0932, 0.1546]) -Greedy action tensor([ 1.5332, -0.7445, -1.0061, 0.3545]) tensor([0.6716, 0.0688, 0.0530, 0.2066]) -Greedy action tensor([ 1.4771, -1.1329, -0.0623, 0.5047]) tensor([0.6002, 0.0441, 0.1287, 0.2270]) -Greedy action tensor([ 1.1532, -0.3184, -0.2688, 0.5223]) tensor([0.4993, 0.1146, 0.1204, 0.2657]) -Greedy action tensor([ 0.4687, -0.1918, 0.0758, -0.2976]) tensor([0.3765, 0.1945, 0.2541, 0.1749]) -Greedy action tensor([ 0.8971, -0.1852, -0.4102, -0.0332]) tensor([0.4991, 0.1691, 0.1350, 0.1968]) -Greedy action tensor([ 1.6140, -0.0427, -0.3071, 0.7468]) tensor([0.5690, 0.1086, 0.0833, 0.2391]) -Greedy action tensor([ 1.4016, 0.0549, -0.2526, 0.1924]) tensor([0.5715, 0.1486, 0.1093, 0.1706]) -Greedy action tensor([ 1.4336, -0.2784, -0.4289, 0.1385]) tensor([0.6213, 0.1121, 0.0965, 0.1701]) -Greedy action tensor([ 1.9852, -0.4062, -1.2810, 0.0273]) tensor([0.7869, 0.0720, 0.0300, 0.1111]) -Greedy action tensor([ 0.9482, -0.5146, -0.1117, -0.1263]) tensor([0.5210, 0.1206, 0.1805, 0.1779]) -Greedy action tensor([ 1.8712, -0.2708, -0.6565, 0.1156]) tensor([0.7299, 0.0857, 0.0583, 0.1261]) -Greedy action tensor([ 0.7964, -0.2014, 0.1332, -0.2483]) tensor([0.4473, 0.1649, 0.2304, 0.1574]) -Greedy action tensor([ 2.1374, -1.1249, -0.0980, 0.4358]) tensor([0.7532, 0.0288, 0.0806, 0.1374]) -Greedy action tensor([ 2.1281, -0.7316, -0.3368, 0.3182]) tensor([0.7657, 0.0439, 0.0651, 0.1253]) -Greedy action tensor([ 1.7954, -0.4704, -0.7168, 0.2252]) tensor([0.7180, 0.0745, 0.0582, 0.1493]) -Greedy action tensor([ 1.3625, -0.4082, -0.6630, 0.3032]) tensor([0.6065, 0.1032, 0.0800, 0.2103]) -Greedy action tensor([ 1.2626, -0.3997, -0.5368, 0.7102]) tensor([0.5180, 0.0983, 0.0857, 0.2981]) -Greedy action tensor([ 1.4087, -0.5867, -0.4672, 0.2061]) tensor([0.6291, 0.0855, 0.0964, 0.1890]) -Greedy action tensor([ 1.7248, -0.8782, -0.0158, 0.2883]) tensor([0.6724, 0.0498, 0.1179, 0.1599]) -Greedy action tensor([ 1.7400, -1.0188, -0.3145, 0.9200]) tensor([0.6128, 0.0388, 0.0785, 0.2699]) -Greedy action tensor([ 1.2532, 0.0637, -0.6955, -0.1259]) tensor([0.5887, 0.1792, 0.0839, 0.1482]) -Greedy action tensor([ 2.4262, 0.5128, -0.0748, -0.2295]) tensor([0.7693, 0.1135, 0.0631, 0.0540]) -Greedy action tensor([ 1.8354, -1.0061, -0.2229, 0.5534]) tensor([0.6833, 0.0399, 0.0872, 0.1896]) -Greedy action tensor([ 1.6958, -0.8512, -0.2226, 0.1954]) tensor([0.6905, 0.0541, 0.1014, 0.1540]) -Greedy action tensor([ 1.6518, -0.1553, -0.7121, -0.0935]) tensor([0.6980, 0.1146, 0.0656, 0.1218]) -Greedy action tensor([ 1.7406, 0.1712, -0.6439, 0.3934]) tensor([0.6409, 0.1334, 0.0590, 0.1666]) -Greedy action tensor([ 1.6828, -0.4427, -0.7512, 0.6738]) tensor([0.6363, 0.0760, 0.0558, 0.2320]) -Greedy action tensor([ 1.0662, -0.7651, -0.4252, 0.0045]) tensor([0.5777, 0.0925, 0.1300, 0.1998]) -Greedy action tensor([ 1.9537, -0.6837, -0.4377, 0.2325]) tensor([0.7452, 0.0533, 0.0682, 0.1333]) -Greedy action tensor([ 1.7713, -0.4169, -0.0742, 0.6708]) tensor([0.6239, 0.0700, 0.0985, 0.2076]) -Greedy action tensor([ 1.6601, -0.9736, -0.2722, 0.1928]) tensor([0.6910, 0.0496, 0.1001, 0.1593]) -Greedy action tensor([ 2.0834, -0.5484, -0.3691, 0.5575]) tensor([0.7270, 0.0523, 0.0626, 0.1581]) -Greedy action tensor([ 1.5128, -0.0204, -0.9743, 0.2167]) tensor([0.6359, 0.1373, 0.0529, 0.1740]) -Greedy action tensor([ 1.7909, -0.3970, -0.3107, 0.4666]) tensor([0.6665, 0.0747, 0.0815, 0.1773]) -Greedy action tensor([ 1.6420, -0.5119, -0.6040, 0.2742]) tensor([0.6773, 0.0786, 0.0717, 0.1725]) -Greedy action tensor([ 2.0326, -1.0072, 0.0065, 0.2504]) tensor([0.7419, 0.0355, 0.0978, 0.1248]) -Greedy action tensor([ 1.1416, -0.2091, -0.3467, 0.4165]) tensor([0.5078, 0.1316, 0.1146, 0.2459]) -Greedy action tensor([ 1.6091, -0.6910, -0.4994, 0.2473]) tensor([0.6767, 0.0678, 0.0822, 0.1734]) -Greedy action tensor([ 2.6164, -0.9016, -0.0191, 1.0415]) tensor([0.7643, 0.0227, 0.0548, 0.1582]) -Greedy action tensor([ 1.1001, -0.2840, -0.0523, 0.2503]) tensor([0.5015, 0.1257, 0.1584, 0.2144]) -Greedy action tensor([ 1.4969, -0.0834, -0.7634, 0.7722]) tensor([0.5572, 0.1147, 0.0581, 0.2699]) -Greedy action tensor([ 0.8475, 0.1685, -0.0011, -0.7131]) tensor([0.4662, 0.2364, 0.1995, 0.0979]) -Greedy action tensor([ 1.8444, -0.0584, -0.4665, 0.4989]) tensor([0.6628, 0.0989, 0.0657, 0.1726]) -Greedy action tensor([ 1.4056, -0.4968, -0.1840, 0.5200]) tensor([0.5664, 0.0845, 0.1155, 0.2336]) -Greedy action tensor([ 0.7520, -0.2394, -0.0501, -0.0979]) tensor([0.4451, 0.1651, 0.1996, 0.1902]) -Greedy action tensor([ 0.5533, -0.0641, -0.1021, -0.0161]) tensor([0.3810, 0.2055, 0.1978, 0.2156]) -Greedy action tensor([ 0.3762, 0.1842, -0.0058, -0.2222]) tensor([0.3271, 0.2699, 0.2232, 0.1798]) -Greedy action tensor([ 0.8817, -0.5602, -0.0124, -0.3534]) tensor([0.5165, 0.1221, 0.2112, 0.1502]) -Greedy action tensor([ 0.8408, -0.3282, -0.1563, -0.2147]) tensor([0.4932, 0.1532, 0.1820, 0.1716]) -Greedy action tensor([ 0.8796, -0.3017, -0.1921, -0.2992]) tensor([0.5110, 0.1568, 0.1750, 0.1572]) -Greedy action tensor([ 0.4661, -0.3990, -0.1923, -0.6613]) tensor([0.4420, 0.1861, 0.2288, 0.1432]) -Greedy action tensor([ 0.4534, 0.1184, 0.0139, -0.1733]) tensor([0.3455, 0.2472, 0.2227, 0.1846]) -Greedy action tensor([ 0.5917, -0.0926, -0.0420, -0.5518]) tensor([0.4248, 0.2143, 0.2254, 0.1354]) -Greedy action tensor([ 0.6635, -0.5651, -0.1578, -0.1108]) tensor([0.4559, 0.1334, 0.2005, 0.2102]) -Greedy action tensor([ 0.6563, -0.3468, 0.0496, -0.2796]) tensor([0.4340, 0.1592, 0.2366, 0.1702]) -Greedy action tensor([ 0.8501, -0.6274, -0.0915, -0.3432]) tensor([0.5204, 0.1188, 0.2030, 0.1578]) -Greedy action tensor([ 0.6154, -0.5210, -0.1390, -0.1574]) tensor([0.4439, 0.1425, 0.2087, 0.2049]) -Greedy action tensor([ 0.6132, 0.0366, -0.0770, 0.0182]) tensor([0.3824, 0.2149, 0.1918, 0.2109]) -Greedy action tensor([ 0.8105, -0.5866, 0.2239, -0.5170]) tensor([0.4834, 0.1196, 0.2689, 0.1282]) -Greedy action tensor([ 0.8789, -0.3283, -0.0320, -0.1060]) tensor([0.4820, 0.1441, 0.1938, 0.1800]) -Greedy action tensor([ 0.7556, -0.6041, -0.0048, -0.4189]) tensor([0.4918, 0.1263, 0.2299, 0.1520]) -Greedy action tensor([ 0.3222, -0.0107, -0.0114, -0.0274]) tensor([0.3187, 0.2284, 0.2283, 0.2246]) -Greedy action tensor([ 0.7890, -0.6616, 0.1673, -0.1852]) tensor([0.4654, 0.1091, 0.2499, 0.1757]) -Greedy action tensor([ 0.8927, -0.4508, -0.0393, -0.2514]) tensor([0.5068, 0.1322, 0.1996, 0.1614]) -Greedy action tensor([ 0.9134, -0.2640, -0.0416, -0.0489]) tensor([0.4819, 0.1485, 0.1855, 0.1841]) -Greedy action tensor([ 1.0945, -0.3132, 0.0316, -0.1603]) tensor([0.5333, 0.1305, 0.1842, 0.1520]) -Greedy action tensor([ 1.1422, -0.8384, -0.0251, -0.7290]) tensor([0.6238, 0.0861, 0.1941, 0.0960]) -Greedy action tensor([ 0.7998, -0.1929, 0.0125, -0.0116]) tensor([0.4405, 0.1633, 0.2005, 0.1957]) -Greedy action tensor([ 0.6228, -0.4966, -0.0576, -0.1636]) tensor([0.4370, 0.1427, 0.2213, 0.1990]) -Greedy action tensor([ 0.9045, -0.7254, 0.1866, -0.6557]) tensor([0.5280, 0.1035, 0.2576, 0.1109]) -Greedy action tensor([ 0.8180, -0.3955, -0.2172, -0.1642]) tensor([0.4934, 0.1466, 0.1752, 0.1848]) -Greedy action tensor([ 0.5048, -0.2085, -0.0182, -0.1204]) tensor([0.3820, 0.1872, 0.2264, 0.2044]) -Greedy action tensor([ 0.7958, -0.6397, 0.0751, -0.2209]) tensor([0.4793, 0.1141, 0.2332, 0.1734]) -Greedy action tensor([ 0.9175, -0.2499, -0.1708, -0.4456]) tensor([0.5253, 0.1634, 0.1769, 0.1344]) -Greedy action tensor([ 0.9473, -0.6668, 0.0279, -0.4848]) tensor([0.5445, 0.1084, 0.2171, 0.1300]) -Greedy action tensor([ 0.6649, -0.3618, -0.1165, -0.2458]) tensor([0.4508, 0.1615, 0.2064, 0.1813]) -Greedy action tensor([ 0.8069, -0.2045, -0.1588, -0.1656]) tensor([0.4711, 0.1714, 0.1794, 0.1781]) -Greedy action tensor([ 1.2637, -0.7087, -0.1948, -0.5284]) tensor([0.6501, 0.0904, 0.1512, 0.1083]) -Greedy action tensor([ 0.4671, -0.5277, -0.0477, -0.1125]) tensor([0.3956, 0.1463, 0.2364, 0.2216]) -Greedy action tensor([ 0.6601, -0.5317, -0.1116, -0.0803]) tensor([0.4459, 0.1354, 0.2061, 0.2127]) -Greedy action tensor([ 0.8579, -0.5440, -0.1077, -0.5201]) tensor([0.5322, 0.1310, 0.2026, 0.1342]) -Greedy action tensor([ 0.4912, -0.2342, 0.0147, -0.0560]) tensor([0.3726, 0.1804, 0.2314, 0.2156]) -Greedy action tensor([ 1.0949, -0.6422, -0.0846, -0.5205]) tensor([0.5944, 0.1046, 0.1828, 0.1182]) -Greedy action tensor([ 0.5747, -0.5447, 0.0909, -0.4018]) tensor([0.4311, 0.1408, 0.2658, 0.1624]) -Greedy action tensor([ 1.0942, -0.9173, -0.0368, -0.3813]) tensor([0.5934, 0.0794, 0.1915, 0.1357]) -Greedy action tensor([ 0.8940, -0.5194, -0.1541, -0.4169]) tensor([0.5366, 0.1306, 0.1882, 0.1447]) -Greedy action tensor([ 0.5297, -0.1283, -0.0089, 0.0014]) tensor([0.3716, 0.1925, 0.2169, 0.2191]) -Greedy action tensor([ 0.9648, -0.7859, -0.0083, -0.6546]) tensor([0.5716, 0.0993, 0.2160, 0.1132]) -Greedy action tensor([ 0.4173, -0.1391, -0.0450, -0.0683]) tensor([0.3548, 0.2034, 0.2235, 0.2183]) -Greedy action tensor([ 0.5945, -0.2633, -0.0545, 0.0630]) tensor([0.3946, 0.1673, 0.2062, 0.2319]) -Greedy action tensor([ 0.2453, 0.1333, -0.0620, -0.0195]) tensor([0.2944, 0.2632, 0.2165, 0.2259]) -Greedy action tensor([ 0.9129, -0.6557, -0.0991, -0.5786]) tensor([0.5565, 0.1159, 0.2023, 0.1252]) -Greedy action tensor([ 0.5615, -0.2227, -0.1642, -0.0308]) tensor([0.4010, 0.1831, 0.1941, 0.2218]) -Greedy action tensor([ 0.8823, -0.2532, -0.0522, -0.0601]) tensor([0.4753, 0.1527, 0.1867, 0.1852]) -Greedy action tensor([ 0.7244, -0.7566, 0.0780, -0.2679]) tensor([0.4712, 0.1072, 0.2469, 0.1747]) -Greedy action tensor([ 0.6776, -0.3587, -0.0861, -0.1376]) tensor([0.4419, 0.1567, 0.2059, 0.1955]) -Greedy action tensor([ 0.6217, -0.3401, -0.0593, -0.0524]) tensor([0.4170, 0.1594, 0.2111, 0.2125]) -Greedy action tensor([ 0.8611, -0.6803, -0.0467, -0.3287]) tensor([0.5203, 0.1114, 0.2099, 0.1583]) -Greedy action tensor([ 0.9195, -0.2217, -0.0182, -0.1222]) tensor([0.4845, 0.1548, 0.1897, 0.1710]) -Greedy action tensor([ 0.5729, 0.0141, -0.0970, 0.0753]) tensor([0.3715, 0.2125, 0.1901, 0.2259]) -Greedy action tensor([ 0.9711, -0.6797, -0.0620, -0.2164]) tensor([0.5397, 0.1036, 0.1921, 0.1646]) -Greedy action tensor([ 0.7171, -0.3314, -0.0192, -0.1543]) tensor([0.4449, 0.1559, 0.2131, 0.1861]) -Greedy action tensor([ 0.8281, -0.4974, -0.1486, -0.4416]) tensor([0.5200, 0.1381, 0.1958, 0.1461]) -Greedy action tensor([ 1.2189, -0.6152, -0.1338, -0.5681]) tensor([0.6306, 0.1007, 0.1630, 0.1056]) -Greedy action tensor([ 0.6779, -0.5050, -0.1647, -0.4099]) tensor([0.4822, 0.1477, 0.2076, 0.1625]) -Greedy action tensor([ 0.6972, -0.3384, -0.0215, -0.0413]) tensor([0.4310, 0.1530, 0.2101, 0.2059]) -Greedy action tensor([ 0.6027, -0.1877, -0.0411, -0.0088]) tensor([0.3966, 0.1799, 0.2083, 0.2152]) -Greedy action tensor([ 0.9547, -0.6609, 0.0122, -0.6806]) tensor([0.5608, 0.1115, 0.2185, 0.1093]) -Greedy action tensor([ 0.5086, -0.3524, -0.0868, -0.1877]) tensor([0.4044, 0.1710, 0.2230, 0.2016]) -Greedy action tensor([ 0.7026, -0.3863, -0.0762, -0.1815]) tensor([0.4528, 0.1524, 0.2078, 0.1870]) -Greedy action tensor([ 0.5375, 0.0853, -0.0761, 0.0092]) tensor([0.3614, 0.2299, 0.1956, 0.2131]) -Greedy action tensor([ 0.4639, -0.2328, -0.0295, -0.2953]) tensor([0.3881, 0.1933, 0.2369, 0.1816]) -Greedy action tensor([ 0.9708, -0.8504, 0.0978, -0.3904]) tensor([0.5447, 0.0881, 0.2275, 0.1396]) -Greedy action tensor([ 0.9681, -0.4811, -0.0918, -0.4544]) tensor([0.5487, 0.1288, 0.1901, 0.1323]) -Greedy action tensor([ 1.2020, -0.7535, 0.1404, -0.6346]) tensor([0.6072, 0.0859, 0.2101, 0.0968]) -Greedy action tensor([ 0.6896, -0.4912, 0.0171, -0.4031]) tensor([0.4645, 0.1426, 0.2371, 0.1558]) -Greedy action tensor([ 0.6206, -0.1300, 0.1800, -0.1498]) tensor([0.3878, 0.1831, 0.2496, 0.1795]) -Greedy action tensor([ 0.4972, -0.3391, 0.1442, -0.3619]) tensor([0.3907, 0.1693, 0.2745, 0.1655]) -Greedy action tensor([ 1.2491, -0.6507, -0.1295, -0.8737]) tensor([0.6574, 0.0983, 0.1656, 0.0787]) -Greedy action tensor([ 1.2306, -0.6690, 0.0386, -0.7990]) tensor([0.6311, 0.0944, 0.1916, 0.0829]) -Greedy action tensor([ 0.4612, -0.0053, 0.0558, -0.2468]) tensor([0.3589, 0.2251, 0.2393, 0.1768]) -Greedy action tensor([ 1.0247, -0.3996, -0.0495, -0.7109]) tensor([0.5687, 0.1369, 0.1942, 0.1003]) -Greedy action tensor([ 0.6204, -0.1245, -0.0277, -0.1689]) tensor([0.4078, 0.1936, 0.2133, 0.1852]) -Greedy action tensor([ 0.8937, -0.7247, -0.1251, -0.3216]) tensor([0.5388, 0.1068, 0.1945, 0.1598]) -Greedy action tensor([ 0.8428, -0.5795, 0.0628, -0.5752]) tensor([0.5150, 0.1242, 0.2361, 0.1247]) -Greedy action tensor([-1.7524, -0.4154, 0.6296, -0.0197]) tensor([0.0470, 0.1788, 0.5085, 0.2657]) -Greedy action tensor([-1.7896, -0.4920, 0.5861, -0.0835]) tensor([0.0478, 0.1749, 0.5141, 0.2632]) -Greedy action tensor([-1.8539, -0.4640, 0.6310, -0.1348]) tensor([0.0443, 0.1777, 0.5311, 0.2469]) -Greedy action tensor([-1.9401, -0.4121, 0.6573, -0.1774]) tensor([0.0402, 0.1854, 0.5400, 0.2344]) -Greedy action tensor([-1.9367, -0.4405, 0.6640, -0.1762]) tensor([0.0404, 0.1804, 0.5443, 0.2349]) -Greedy action tensor([-1.9106, -0.4579, 0.6562, -0.1610]) tensor([0.0416, 0.1777, 0.5415, 0.2392]) -Greedy action tensor([-1.5167, -0.5101, 0.5055, 0.1578]) tensor([0.0601, 0.1646, 0.4544, 0.3209]) -Greedy action tensor([-1.1791, -0.4288, 0.3891, -0.1137]) tensor([0.0924, 0.1958, 0.4435, 0.2683]) -Greedy action tensor([-1.0680, 0.8176, 0.1652, 0.1024]) tensor([0.0702, 0.4626, 0.2409, 0.2263]) -Greedy action tensor([-1.9429, -0.4555, 0.6756, -0.1783]) tensor([0.0400, 0.1772, 0.5491, 0.2337]) -Greedy action tensor([-1.6264, -0.4509, 0.5192, -0.0424]) tensor([0.0566, 0.1834, 0.4839, 0.2760]) -Greedy action tensor([-1.9177, -0.4367, 0.6525, -0.1679]) tensor([0.0413, 0.1816, 0.5396, 0.2376]) -Greedy action tensor([-1.9218, -0.4431, 0.6581, -0.1688]) tensor([0.0411, 0.1801, 0.5418, 0.2370]) -Greedy action tensor([-0.7864, -1.0353, 0.3757, -0.3310]) tensor([0.1526, 0.1190, 0.4878, 0.2406]) -Greedy action tensor([-1.4262, -0.6072, 0.3983, 0.1065]) tensor([0.0709, 0.1609, 0.4397, 0.3285]) -Greedy action tensor([-1.9335, -0.4413, 0.6625, -0.1741]) tensor([0.0405, 0.1803, 0.5437, 0.2355]) -Greedy action tensor([-1.9218, -0.4346, 0.6626, -0.1724]) tensor([0.0409, 0.1811, 0.5425, 0.2354]) -Greedy action tensor([-1.5866e+00, 3.9663e-01, 3.6927e-01, -1.2169e-03]) tensor([0.0495, 0.3594, 0.3497, 0.2414]) -Greedy action tensor([-1.8734, -0.4606, 0.6330, -0.1355]) tensor([0.0434, 0.1782, 0.5318, 0.2466]) -Greedy action tensor([-1.8517, -0.4036, 0.6602, -0.1069]) tensor([0.0429, 0.1825, 0.5289, 0.2456]) -Greedy action tensor([-1.8578, -0.3193, 0.6309, -0.1294]) tensor([0.0429, 0.1996, 0.5162, 0.2413]) -Greedy action tensor([-0.5092, -0.1387, 0.3258, 0.6056]) tensor([0.1282, 0.1856, 0.2954, 0.3908]) -Greedy action tensor([-1.6953, -0.4413, 0.5743, -0.0103]) tensor([0.0511, 0.1790, 0.4944, 0.2755]) -Greedy action tensor([-1.9389, -0.4329, 0.6620, -0.1763]) tensor([0.0403, 0.1817, 0.5431, 0.2349]) -Greedy action tensor([-1.7318, -0.3773, 0.5539, -0.1093]) tensor([0.0506, 0.1960, 0.4973, 0.2562]) -Greedy action tensor([-1.8127, -0.3141, 0.6403, -0.0801]) tensor([0.0439, 0.1967, 0.5108, 0.2485]) -Greedy action tensor([-0.3945, 1.0217, 0.0501, 0.1655]) tensor([0.1186, 0.4888, 0.1850, 0.2076]) -Greedy action tensor([-1.8411, -0.3104, 0.6269, -0.1074]) tensor([0.0433, 0.2002, 0.5112, 0.2453]) -Greedy action tensor([-1.3610, 0.1129, 0.3162, 0.1100]) tensor([0.0664, 0.2897, 0.3550, 0.2889]) -Greedy action tensor([-1.1794, -0.6058, 0.3629, 0.0261]) tensor([0.0927, 0.1645, 0.4334, 0.3094]) -Greedy action tensor([-1.9352, -0.4361, 0.6632, -0.1754]) tensor([0.0404, 0.1811, 0.5435, 0.2350]) -Greedy action tensor([-1.9312, -0.4201, 0.6570, -0.1702]) tensor([0.0406, 0.1838, 0.5397, 0.2360]) -Greedy action tensor([-1.5480, 0.0146, 0.4438, 0.0547]) tensor([0.0554, 0.2641, 0.4057, 0.2749]) -Greedy action tensor([-0.7368, -0.2570, 0.2831, 0.1124]) tensor([0.1294, 0.2091, 0.3589, 0.3026]) -Greedy action tensor([-1.9324, -0.4458, 0.6714, -0.1709]) tensor([0.0404, 0.1786, 0.5459, 0.2351]) -Greedy action tensor([-0.4780, 1.0462, 0.0301, 0.3222]) tensor([0.1055, 0.4844, 0.1753, 0.2348]) -Greedy action tensor([-1.9089, -0.4413, 0.6508, -0.1593]) tensor([0.0416, 0.1806, 0.5383, 0.2394]) -Greedy action tensor([-1.4816, 0.3639, 0.2889, 0.1826]) tensor([0.0541, 0.3425, 0.3177, 0.2857]) -Greedy action tensor([-1.6377, -0.4585, 0.5271, -0.1042]) tensor([0.0568, 0.1848, 0.4951, 0.2633]) -Greedy action tensor([ 0.7997, 1.1651, -0.0339, 0.5885]) tensor([0.2714, 0.3910, 0.1179, 0.2197]) -Greedy action tensor([-1.7533, -0.1304, 0.5332, -0.0437]) tensor([0.0467, 0.2364, 0.4591, 0.2578]) -Greedy action tensor([-1.4818, 0.0770, 0.5264, 0.2188]) tensor([0.0535, 0.2544, 0.3988, 0.2932]) -Greedy action tensor([-1.6774, -0.5074, 0.5381, -0.0688]) tensor([0.0544, 0.1753, 0.4986, 0.2718]) -Greedy action tensor([-1.7403, -0.0038, 0.5009, -0.1021]) tensor([0.0471, 0.2674, 0.4430, 0.2424]) -Greedy action tensor([-1.9037, -0.4553, 0.6507, -0.1620]) tensor([0.0420, 0.1786, 0.5399, 0.2395]) -Greedy action tensor([-1.3656, -0.2252, 0.4393, 0.5001]) tensor([0.0600, 0.1877, 0.3647, 0.3876]) -Greedy action tensor([ 0.2770, -0.1684, 0.8998, 1.6863]) tensor([0.1316, 0.0843, 0.2454, 0.5387]) -Greedy action tensor([-1.9100, -0.4038, 0.6463, -0.1596]) tensor([0.0414, 0.1867, 0.5336, 0.2383]) -Greedy action tensor([-1.7610, -0.0324, 0.5809, -0.0193]) tensor([0.0440, 0.2477, 0.4574, 0.2510]) -Greedy action tensor([-1.3878, -0.3573, 0.4755, 0.1934]) tensor([0.0662, 0.1855, 0.4266, 0.3217]) -Greedy action tensor([-1.9299, -0.4231, 0.6592, -0.1704]) tensor([0.0406, 0.1831, 0.5405, 0.2358]) -Greedy action tensor([-1.9042, -0.4462, 0.6505, -0.1594]) tensor([0.0419, 0.1799, 0.5386, 0.2396]) -Greedy action tensor([-1.8913, -0.4500, 0.6448, -0.1539]) tensor([0.0425, 0.1795, 0.5366, 0.2414]) -Greedy action tensor([-1.8897, -0.4352, 0.6355, -0.1538]) tensor([0.0426, 0.1826, 0.5328, 0.2420]) -Greedy action tensor([-1.9208, -0.3783, 0.6473, -0.1557]) tensor([0.0407, 0.1904, 0.5310, 0.2379]) -Greedy action tensor([-1.7557, -0.5221, 0.5995, -0.0400]) tensor([0.0487, 0.1672, 0.5133, 0.2708]) -Greedy action tensor([-1.3719, -0.2852, 0.3281, 0.1972]) tensor([0.0702, 0.2082, 0.3844, 0.3372]) -Greedy action tensor([-1.8776, -0.4033, 0.6356, -0.1382]) tensor([0.0427, 0.1866, 0.5274, 0.2433]) -Greedy action tensor([-1.9051, -0.4531, 0.6533, -0.1593]) tensor([0.0418, 0.1786, 0.5400, 0.2396]) -Greedy action tensor([-0.6657, 0.8545, 0.1038, -0.0647]) tensor([0.1046, 0.4786, 0.2259, 0.1909]) -Greedy action tensor([-1.9048, -0.3848, 0.6470, -0.1583]) tensor([0.0414, 0.1894, 0.5316, 0.2376]) -Greedy action tensor([-1.6922, -0.3937, 0.6683, 0.0456]) tensor([0.0477, 0.1749, 0.5059, 0.2714]) -Greedy action tensor([-1.0840, -0.0873, 0.5929, 0.1996]) tensor([0.0789, 0.2139, 0.4222, 0.2850]) -Greedy action tensor([-1.9188, -0.4378, 0.6533, -0.1682]) tensor([0.0412, 0.1813, 0.5400, 0.2375]) -Greedy action tensor([-1.8951, -0.4587, 0.6476, -0.1566]) tensor([0.0424, 0.1781, 0.5385, 0.2410]) -Greedy action tensor([-0.9418, -0.4982, 0.2479, 0.1209]) tensor([0.1144, 0.1783, 0.3760, 0.3312]) -Greedy action tensor([-1.2460, -0.5277, 0.8497, 0.8533]) tensor([0.0517, 0.1060, 0.4204, 0.4219]) -Greedy action tensor([-1.8870, -0.3888, 0.6396, -0.1417]) tensor([0.0422, 0.1887, 0.5276, 0.2416]) -Greedy action tensor([-1.8683, -0.4185, 0.6127, -0.1304]) tensor([0.0437, 0.1861, 0.5220, 0.2483]) -Greedy action tensor([-0.1259, 0.9577, 0.1377, 0.8169]) tensor([0.1278, 0.3777, 0.1664, 0.3281]) -Greedy action tensor([-1.7320, -0.3761, 0.5658, -0.1410]) tensor([0.0507, 0.1966, 0.5041, 0.2487]) -Greedy action tensor([-1.8756, -0.4551, 0.6315, -0.1563]) tensor([0.0435, 0.1801, 0.5337, 0.2428]) -Greedy action tensor([-1.3647, -0.3541, 0.3819, 0.2038]) tensor([0.0700, 0.1924, 0.4016, 0.3361]) -Greedy action tensor([-1.8532, -0.1323, 0.5713, -0.1274]) tensor([0.0425, 0.2378, 0.4806, 0.2390]) -Greedy action tensor([-1.8525, -0.4605, 0.6174, -0.1411]) tensor([0.0447, 0.1798, 0.5282, 0.2474]) -Greedy action tensor([-1.8632, -0.4855, 0.6238, -0.1538]) tensor([0.0444, 0.1761, 0.5341, 0.2454]) -Greedy action tensor([-1.7312, -0.4252, 0.6525, 0.0310]) tensor([0.0468, 0.1728, 0.5077, 0.2727]) -Greedy action tensor([-1.9318, -0.4402, 0.6622, -0.1731]) tensor([0.0406, 0.1804, 0.5433, 0.2357]) -Greedy action tensor([-1.8758, -0.4683, 0.6416, -0.1514]) tensor([0.0433, 0.1769, 0.5369, 0.2429]) -Greedy action tensor([-1.7346, -0.4889, 0.5804, -0.1068]) tensor([0.0508, 0.1765, 0.5141, 0.2586]) -Greedy action tensor([-1.6671, -0.4999, 0.5544, -0.0046]) tensor([0.0535, 0.1718, 0.4930, 0.2818]) -Greedy action tensor([ 1.3747, -1.1379, 0.2745, -0.1958]) tensor([0.6166, 0.0500, 0.2052, 0.1282]) -Greedy action tensor([ 2.0639, -0.3489, -0.5006, 0.5745]) tensor([0.7184, 0.0643, 0.0553, 0.1620]) -Greedy action tensor([ 1.3593, -0.5606, -0.5002, -0.0357]) tensor([0.6451, 0.0946, 0.1005, 0.1599]) -Greedy action tensor([ 1.0211, -0.3695, -0.3445, 0.1869]) tensor([0.5159, 0.1284, 0.1317, 0.2240]) -Greedy action tensor([ 1.2687, -0.6156, -0.3915, -0.0278]) tensor([0.6190, 0.0940, 0.1177, 0.1693]) -Greedy action tensor([ 1.0032, -0.2170, -0.8400, 0.1733]) tensor([0.5292, 0.1562, 0.0838, 0.2308]) -Greedy action tensor([ 1.3613, -0.5825, -0.0949, 0.1402]) tensor([0.5984, 0.0857, 0.1395, 0.1765]) -Greedy action tensor([ 0.6076, -0.5451, 0.1261, 0.1440]) tensor([0.3902, 0.1232, 0.2411, 0.2455]) -Greedy action tensor([ 1.7652, -0.8761, -0.2343, 0.4148]) tensor([0.6822, 0.0486, 0.0924, 0.1768]) -Greedy action tensor([ 2.3916, -1.5776, 0.0575, 0.9853]) tensor([0.7348, 0.0139, 0.0712, 0.1801]) -Greedy action tensor([ 1.1236, -0.3263, -0.4723, 0.6401]) tensor([0.4869, 0.1142, 0.0987, 0.3002]) -Greedy action tensor([ 1.1892, -0.4235, -0.6897, 0.3605]) tensor([0.5591, 0.1114, 0.0854, 0.2441]) -Greedy action tensor([ 1.6622, 0.1294, -0.3225, -0.5357]) tensor([0.6829, 0.1474, 0.0938, 0.0758]) -Greedy action tensor([ 1.5670, -0.3657, -0.6310, 0.3845]) tensor([0.6401, 0.0927, 0.0711, 0.1962]) -Greedy action tensor([ 1.4870, -0.4248, -0.6501, 0.2700]) tensor([0.6402, 0.0946, 0.0755, 0.1896]) -Greedy action tensor([ 1.2963, 0.1767, -0.4823, 0.3352]) tensor([0.5326, 0.1738, 0.0899, 0.2037]) -Greedy action tensor([ 1.8953e+00, -1.2473e+00, 6.4451e-04, 7.4954e-01]) tensor([0.6616, 0.0286, 0.0995, 0.2104]) -Greedy action tensor([ 1.6581, -0.1099, -1.2328, 0.3759]) tensor([0.6651, 0.1135, 0.0369, 0.1845]) -Greedy action tensor([ 2.0150, -1.0373, -0.2597, 0.5509]) tensor([0.7239, 0.0342, 0.0744, 0.1674]) -Greedy action tensor([ 1.3020, -0.3683, -0.9064, 0.3403]) tensor([0.5951, 0.1120, 0.0654, 0.2275]) -Greedy action tensor([ 1.2254, -0.7234, -0.4127, 0.3025]) tensor([0.5767, 0.0821, 0.1121, 0.2291]) -Greedy action tensor([ 1.3954, -0.5146, -0.4325, -0.0459]) tensor([0.6471, 0.0958, 0.1040, 0.1531]) -Greedy action tensor([ 1.3065, -0.9829, -0.1038, 0.3498]) tensor([0.5782, 0.0586, 0.1411, 0.2221]) -Greedy action tensor([ 1.3017, -0.1823, -0.2919, 0.2300]) tensor([0.5642, 0.1279, 0.1146, 0.1932]) -Greedy action tensor([ 1.7906, -0.6151, -0.2336, 0.4055]) tensor([0.6791, 0.0612, 0.0897, 0.1700]) -Greedy action tensor([ 1.9259, 0.1001, -0.1840, 0.3945]) tensor([0.6673, 0.1075, 0.0809, 0.1443]) -Greedy action tensor([ 1.4335, -0.4463, -0.7766, 0.5015]) tensor([0.6038, 0.0922, 0.0662, 0.2378]) -Greedy action tensor([ 1.4614, -0.2555, -0.4652, 0.2899]) tensor([0.6115, 0.1099, 0.0891, 0.1895]) -Greedy action tensor([ 1.1251, -0.3154, -0.5629, 0.4766]) tensor([0.5143, 0.1218, 0.0951, 0.2689]) -Greedy action tensor([ 1.9206, -0.9087, -0.3383, 0.3990]) tensor([0.7237, 0.0427, 0.0756, 0.1580]) -Greedy action tensor([ 1.2450, -0.4999, -0.4761, 0.2845]) tensor([0.5760, 0.1006, 0.1030, 0.2204]) -Greedy action tensor([ 1.5283, -0.1220, -0.4744, 0.2470]) tensor([0.6232, 0.1197, 0.0841, 0.1731]) -Greedy action tensor([ 1.9754, -0.2456, -0.1861, 0.4109]) tensor([0.6979, 0.0757, 0.0804, 0.1460]) -Greedy action tensor([ 0.9535, -0.3190, 0.0798, 0.0707]) tensor([0.4737, 0.1327, 0.1977, 0.1959]) -Greedy action tensor([ 1.8197, -0.8288, -0.6040, 0.7759]) tensor([0.6616, 0.0468, 0.0586, 0.2330]) -Greedy action tensor([ 1.5780, -0.4421, -0.4942, 0.4061]) tensor([0.6376, 0.0846, 0.0803, 0.1975]) -Greedy action tensor([ 2.5845, -0.0047, -0.9442, 0.1505]) tensor([0.8388, 0.0630, 0.0246, 0.0736]) -Greedy action tensor([ 1.3495, -0.5783, -0.5722, 0.4238]) tensor([0.5924, 0.0862, 0.0867, 0.2347]) -Greedy action tensor([ 1.5024, 0.4016, -0.1359, 0.2082]) tensor([0.5552, 0.1847, 0.1079, 0.1522]) -Greedy action tensor([ 1.1747, -0.0404, -0.6719, 0.3390]) tensor([0.5297, 0.1571, 0.0836, 0.2296]) -Greedy action tensor([ 2.3846, -1.4826, 0.0627, 0.5939]) tensor([0.7777, 0.0163, 0.0763, 0.1298]) -Greedy action tensor([ 1.9305, -0.6600, -0.2249, 0.4436]) tensor([0.7058, 0.0529, 0.0818, 0.1595]) -Greedy action tensor([ 1.5644, -0.0772, -0.6257, 0.5709]) tensor([0.5967, 0.1156, 0.0668, 0.2209]) -Greedy action tensor([ 0.7158, -0.4390, 0.1034, 0.1173]) tensor([0.4155, 0.1309, 0.2252, 0.2284]) -Greedy action tensor([ 1.4513, -0.6855, -0.5898, 0.6084]) tensor([0.5958, 0.0703, 0.0774, 0.2565]) -Greedy action tensor([ 1.4030, -0.3856, -1.0241, 0.6700]) tensor([0.5761, 0.0963, 0.0509, 0.2768]) -Greedy action tensor([ 1.4035, -0.6466, -0.1006, 0.4171]) tensor([0.5801, 0.0747, 0.1289, 0.2163]) -Greedy action tensor([ 1.6270, -0.1187, -0.6578, 0.3604]) tensor([0.6418, 0.1120, 0.0653, 0.1809]) -Greedy action tensor([ 2.7924, 0.5643, -0.1346, -0.0593]) tensor([0.8203, 0.0884, 0.0439, 0.0474]) -Greedy action tensor([ 1.3758, 0.3389, -0.9776, 0.0711]) tensor([0.5811, 0.2060, 0.0552, 0.1576]) -Greedy action tensor([ 1.1699, -0.3778, -0.4883, 0.1338]) tensor([0.5688, 0.1210, 0.1084, 0.2018]) -Greedy action tensor([ 1.6237, -0.2992, -0.7344, -0.0515]) tensor([0.7003, 0.1024, 0.0662, 0.1311]) -Greedy action tensor([ 1.1844, -0.2660, -0.2699, -0.0079]) tensor([0.5645, 0.1324, 0.1318, 0.1713]) -Greedy action tensor([ 1.1487, -0.5502, 0.0098, 0.3697]) tensor([0.5097, 0.0932, 0.1632, 0.2339]) -Greedy action tensor([ 1.2996, -0.6026, -0.8021, 0.6956]) tensor([0.5500, 0.0821, 0.0672, 0.3007]) -Greedy action tensor([ 0.9859, -0.3945, -0.1357, 0.3635]) tensor([0.4731, 0.1190, 0.1541, 0.2539]) -Greedy action tensor([ 2.2040, -0.4122, -0.7636, 0.4148]) tensor([0.7742, 0.0566, 0.0398, 0.1294]) -Greedy action tensor([ 1.0225, -0.6102, -0.2277, 0.6702]) tensor([0.4577, 0.0894, 0.1311, 0.3218]) -Greedy action tensor([ 1.2639, -0.2983, -0.1556, 0.0954]) tensor([0.5674, 0.1190, 0.1372, 0.1764]) -Greedy action tensor([ 1.8000, -1.2667, -0.3045, 0.6437]) tensor([0.6743, 0.0314, 0.0822, 0.2121]) -Greedy action tensor([ 2.0216, -0.9538, -0.4269, 0.8347]) tensor([0.6932, 0.0354, 0.0599, 0.2115]) -Greedy action tensor([ 1.9435, -0.9012, -0.4965, 0.4550]) tensor([0.7294, 0.0424, 0.0636, 0.1646]) -Greedy action tensor([ 2.5478, -0.8056, -0.2590, 0.0072]) tensor([0.8517, 0.0298, 0.0514, 0.0671]) -Greedy action tensor([ 1.8058, -0.5192, -0.4418, 0.3630]) tensor([0.6946, 0.0679, 0.0734, 0.1641]) -Greedy action tensor([ 1.7796, -0.2536, -0.7753, 0.0657]) tensor([0.7201, 0.0943, 0.0559, 0.1297]) -Greedy action tensor([ 1.5872, -0.7385, -0.1376, 0.8917]) tensor([0.5635, 0.0551, 0.1004, 0.2811]) -Greedy action tensor([ 1.2912, -0.3448, -0.6428, 0.4111]) tensor([0.5701, 0.1110, 0.0824, 0.2364]) -Greedy action tensor([ 1.4398, 0.1000, -0.6742, 0.7508]) tensor([0.5306, 0.1390, 0.0641, 0.2664]) -Greedy action tensor([ 1.5888, -0.0218, -0.4464, 0.6226]) tensor([0.5845, 0.1168, 0.0764, 0.2224]) -Greedy action tensor([ 1.5668, -0.7499, -0.5757, 0.2669]) tensor([0.6718, 0.0662, 0.0788, 0.1831]) -Greedy action tensor([ 1.5336, -0.5260, -0.6197, 0.2524]) tensor([0.6573, 0.0838, 0.0763, 0.1825]) -Greedy action tensor([ 1.5032, -0.6498, -0.1646, 0.3327]) tensor([0.6192, 0.0719, 0.1168, 0.1921]) -Greedy action tensor([ 2.0472, -0.9021, -0.1924, 0.9901]) tensor([0.6639, 0.0348, 0.0707, 0.2307]) -Greedy action tensor([ 2.0272, -0.6441, -0.6363, 0.4601]) tensor([0.7421, 0.0513, 0.0517, 0.1548]) -Greedy action tensor([ 1.0681, -0.2727, -1.0831, 0.4466]) tensor([0.5222, 0.1366, 0.0607, 0.2805]) -Greedy action tensor([ 1.0676, -0.1745, -0.7983, -0.0184]) tensor([0.5614, 0.1621, 0.0869, 0.1895]) -Greedy action tensor([ 1.4889, -0.2364, -0.4601, -0.0800]) tensor([0.6541, 0.1165, 0.0932, 0.1362]) -Greedy action tensor([ 0.9699, -0.4241, 0.0231, -0.0918]) tensor([0.5045, 0.1252, 0.1958, 0.1745]) -Greedy action tensor([ 0.7537, -0.2876, -0.0662, 0.1634]) tensor([0.4259, 0.1504, 0.1876, 0.2361]) -Greedy action tensor([ 1.4827, -0.4210, -0.4168, 0.4664]) tensor([0.6022, 0.0897, 0.0901, 0.2180]) -Greedy action tensor([ 1.7747, -0.5699, -0.5211, 0.2359]) tensor([0.7086, 0.0679, 0.0713, 0.1521]) -Greedy action tensor([-0.2784, -2.0922, 0.5790, 0.1435]) tensor([0.1982, 0.0323, 0.4672, 0.3023]) -Greedy action tensor([-0.4495, -0.6249, 1.0383, -0.6961]) tensor([0.1419, 0.1191, 0.6282, 0.1109]) -Greedy action tensor([-0.1970, -0.2555, 0.7975, -0.5870]) tensor([0.1878, 0.1772, 0.5078, 0.1272]) -Greedy action tensor([-0.8196, 0.0892, 0.1620, -0.8653]) tensor([0.1407, 0.3492, 0.3756, 0.1344]) -Greedy action tensor([ 0.2406, -1.5392, -0.1378, 0.4052]) tensor([0.3298, 0.0556, 0.2259, 0.3888]) -Greedy action tensor([ 1.3327, -0.6761, 0.7142, 0.0706]) tensor([0.5113, 0.0686, 0.2754, 0.1447]) -Greedy action tensor([ 0.5026, -0.9386, -0.1618, 0.1961]) tensor([0.4020, 0.0951, 0.2069, 0.2959]) -Greedy action tensor([-0.2268, -1.3416, 0.7269, -0.4621]) tensor([0.2121, 0.0696, 0.5506, 0.1677]) -Greedy action tensor([-0.7008, -0.5424, 0.4391, -0.4268]) tensor([0.1512, 0.1772, 0.4727, 0.1989]) -Greedy action tensor([-0.1272, -2.0980, -0.6639, 0.5150]) tensor([0.2759, 0.0384, 0.1613, 0.5244]) -Greedy action tensor([-1.0712, -0.7704, -0.0159, -0.1371]) tensor([0.1287, 0.1739, 0.3698, 0.3276]) -Greedy action tensor([ 0.7465, 0.0023, -0.3465, 0.0667]) tensor([0.4316, 0.2051, 0.1447, 0.2187]) -Greedy action tensor([ 0.1591, -0.3851, 0.3390, 0.9905]) tensor([0.1971, 0.1144, 0.2359, 0.4526]) -Greedy action tensor([0.9573, 0.1844, 0.0735, 0.9301]) tensor([0.3511, 0.1621, 0.1451, 0.3417]) -Greedy action tensor([-0.9342, -0.6335, -0.2514, -0.5819]) tensor([0.1738, 0.2348, 0.3441, 0.2473]) -Greedy action tensor([ 1.2242, -0.9193, 0.1676, 0.3873]) tensor([0.5269, 0.0618, 0.1832, 0.2282]) -Greedy action tensor([-0.3073, 0.0355, 0.1764, -0.5446]) tensor([0.2075, 0.2923, 0.3365, 0.1636]) -Greedy action tensor([ 0.4620, -1.0736, 0.4646, 0.1947]) tensor([0.3352, 0.0722, 0.3361, 0.2566]) -Greedy action tensor([ 0.0223, 0.2516, -0.0458, -0.7776]) tensor([0.2746, 0.3454, 0.2565, 0.1234]) -Greedy action tensor([-0.6605, -0.4267, 1.2008, -0.0962]) tensor([0.0957, 0.1209, 0.6153, 0.1682]) -Greedy action tensor([ 0.1925, -0.0796, 0.0935, -0.1965]) tensor([0.2989, 0.2277, 0.2708, 0.2026]) -Greedy action tensor([ 0.1385, -1.0028, -0.0307, 0.0363]) tensor([0.3261, 0.1042, 0.2753, 0.2944]) -Greedy action tensor([-1.0231, -1.2582, -0.8496, 1.0240]) tensor([0.0932, 0.0737, 0.1109, 0.7222]) -Greedy action tensor([-0.3102, -0.8011, -0.6471, -0.8115]) tensor([0.3411, 0.2088, 0.2435, 0.2066]) -Greedy action tensor([-1.0635, -0.8831, 0.9527, -0.4046]) tensor([0.0859, 0.1029, 0.6452, 0.1660]) -Greedy action tensor([ 0.6417, -0.8430, 0.1480, -0.8376]) tensor([0.4843, 0.1097, 0.2956, 0.1103]) -Greedy action tensor([-1.5151, -1.1263, 0.5728, -1.0031]) tensor([0.0819, 0.1208, 0.6607, 0.1366]) -Greedy action tensor([ 1.3356, -0.7483, 0.3618, 0.8816]) tensor([0.4679, 0.0582, 0.1767, 0.2972]) -Greedy action tensor([-0.0066, -0.6157, 0.9292, -0.5611]) tensor([0.2142, 0.1165, 0.5462, 0.1231]) -Greedy action tensor([ 0.2821, 0.0190, -0.3882, -0.1403]) tensor([0.3406, 0.2618, 0.1742, 0.2233]) -Greedy action tensor([-0.6034, -0.4560, 0.5344, -0.2252]) tensor([0.1484, 0.1720, 0.4630, 0.2166]) -Greedy action tensor([-0.6471, -1.1656, 0.6471, -0.3138]) tensor([0.1506, 0.0897, 0.5495, 0.2102]) -Greedy action tensor([-0.3251, -0.8304, 1.2449, -0.5775]) tensor([0.1391, 0.0840, 0.6688, 0.1081]) -Greedy action tensor([ 0.7632, -1.4962, -0.6008, 0.0433]) tensor([0.5415, 0.0565, 0.1384, 0.2636]) -Greedy action tensor([-0.2575, -1.0192, -0.1438, -0.6650]) tensor([0.3074, 0.1435, 0.3445, 0.2045]) -Greedy action tensor([ 0.2619, 0.5206, 0.5352, -0.1214]) tensor([0.2330, 0.3019, 0.3063, 0.1588]) -Greedy action tensor([ 1.9165, -0.8724, 0.7543, 0.2663]) tensor([0.6384, 0.0393, 0.1997, 0.1226]) -Greedy action tensor([-0.8287, -1.1440, 0.3103, -0.4495]) tensor([0.1584, 0.1155, 0.4947, 0.2314]) -Greedy action tensor([1.1603, 0.1060, 0.0624, 0.4493]) tensor([0.4601, 0.1603, 0.1535, 0.2260]) -Greedy action tensor([-1.1276, -0.4400, -0.6334, 0.0393]) tensor([0.1276, 0.2537, 0.2091, 0.4097]) -Greedy action tensor([-0.2246, 0.0848, -0.0678, -1.0325]) tensor([0.2514, 0.3425, 0.2940, 0.1121]) -Greedy action tensor([-0.1155, -0.1863, -0.5236, -1.0394]) tensor([0.3341, 0.3112, 0.2221, 0.1326]) -Greedy action tensor([-0.1260, -2.1077, -0.7547, -0.1481]) tensor([0.3774, 0.0520, 0.2013, 0.3692]) -Greedy action tensor([-0.4314, -0.2258, 0.3659, 0.0667]) tensor([0.1641, 0.2016, 0.3643, 0.2701]) -Greedy action tensor([ 1.7133, -1.0526, 1.2418, 1.7422]) tensor([0.3681, 0.0232, 0.2298, 0.3790]) -Greedy action tensor([-0.5287, -0.5500, 0.3344, -1.3192]) tensor([0.2082, 0.2038, 0.4935, 0.0944]) -Greedy action tensor([0.3826, 0.5100, 0.5677, 0.4981]) tensor([0.2241, 0.2546, 0.2697, 0.2516]) -Greedy action tensor([-0.3983, 0.2648, -0.0024, -0.8398]) tensor([0.1973, 0.3828, 0.2931, 0.1268]) -Greedy action tensor([ 0.5873, 0.2632, 0.4217, -0.6515]) tensor([0.3496, 0.2528, 0.2962, 0.1013]) -Greedy action tensor([ 0.0937, -0.1762, 0.6483, -0.5502]) tensor([0.2481, 0.1895, 0.4321, 0.1303]) -Greedy action tensor([ 0.0648, -0.6206, -0.4698, -0.7479]) tensor([0.3947, 0.1989, 0.2313, 0.1751]) -Greedy action tensor([-0.0276, -0.8276, -0.0127, -1.1085]) tensor([0.3567, 0.1603, 0.3620, 0.1210]) -Greedy action tensor([ 1.7890, -0.2344, 0.1649, 1.5423]) tensor([0.4738, 0.0626, 0.0934, 0.3702]) -Greedy action tensor([-1.3178, -0.1925, -0.9942, -0.1234]) tensor([0.1141, 0.3515, 0.1577, 0.3767]) -Greedy action tensor([ 0.1587, -0.0578, -0.4001, 0.8544]) tensor([0.2282, 0.1838, 0.1305, 0.4576]) -Greedy action tensor([ 0.1991, -0.1261, 1.0550, -0.2630]) tensor([0.2125, 0.1535, 0.5001, 0.1339]) -Greedy action tensor([ 0.5678, -0.2130, -0.7883, 0.1958]) tensor([0.4158, 0.1904, 0.1071, 0.2866]) -Greedy action tensor([ 0.2120, 0.0611, 0.4714, -0.0647]) tensor([0.2555, 0.2197, 0.3311, 0.1937]) -Greedy action tensor([ 0.4827, -0.7627, -0.3717, 0.5244]) tensor([0.3629, 0.1044, 0.1544, 0.3783]) -Greedy action tensor([ 1.5886, -0.9321, -0.2043, 0.0793]) tensor([0.6812, 0.0548, 0.1134, 0.1506]) -Greedy action tensor([ 0.4779, -0.2513, 0.5212, 0.2460]) tensor([0.3013, 0.1453, 0.3146, 0.2389]) -Greedy action tensor([-0.5357, 0.1352, -1.0227, 0.1990]) tensor([0.1768, 0.3459, 0.1087, 0.3687]) -Greedy action tensor([ 0.2133, -0.1078, -0.2364, -1.2265]) tensor([0.3846, 0.2790, 0.2453, 0.0911]) -Greedy action tensor([-0.2598, -0.3200, 0.5396, -0.1709]) tensor([0.1902, 0.1791, 0.4229, 0.2078]) -Greedy action tensor([-1.3507, -1.0254, 0.4474, -1.1540]) tensor([0.1037, 0.1436, 0.6264, 0.1263]) -Greedy action tensor([-0.3626, -0.5579, 0.1879, -0.4177]) tensor([0.2221, 0.1827, 0.3851, 0.2102]) -Greedy action tensor([ 0.0647, -0.1018, 0.3255, -0.6160]) tensor([0.2739, 0.2319, 0.3555, 0.1387]) -Greedy action tensor([ 1.3107, -0.0417, 1.2534, 0.6685]) tensor([0.3664, 0.0948, 0.3460, 0.1928]) -Greedy action tensor([ 0.3491, -0.9041, -0.7089, -1.5865]) tensor([0.5627, 0.1607, 0.1953, 0.0812]) -Greedy action tensor([ 1.3451, -1.0129, 1.7409, -1.2603]) tensor([0.3768, 0.0356, 0.5597, 0.0278]) -Greedy action tensor([-0.1100, 0.5116, 0.5845, -0.8659]) tensor([0.1875, 0.3491, 0.3754, 0.0880]) -Greedy action tensor([ 0.1990, -1.1502, 0.2182, -0.2199]) tensor([0.3405, 0.0884, 0.3471, 0.2240]) -Greedy action tensor([ 0.3015, -0.7149, 0.4957, 0.0026]) tensor([0.3014, 0.1091, 0.3660, 0.2235]) -Greedy action tensor([-0.7470, -0.1437, -0.1687, -0.7387]) tensor([0.1780, 0.3253, 0.3173, 0.1794]) -Greedy action tensor([-0.3563, -0.2860, 0.4345, -0.6860]) tensor([0.2001, 0.2147, 0.4413, 0.1439]) -Greedy action tensor([-0.2313, -0.7216, 1.0102, -0.6817]) tensor([0.1751, 0.1072, 0.6060, 0.1116]) -Greedy action tensor([-0.0452, -0.2257, 0.4124, -0.8623]) tensor([0.2593, 0.2165, 0.4097, 0.1145]) -Greedy action tensor([ 0.1621, -0.0668, -0.4825, -0.9355]) tensor([0.3768, 0.2997, 0.1978, 0.1257]) -Greedy action tensor([-1.5363, -0.9417, -1.0125, 0.6256]) tensor([0.0758, 0.1374, 0.1280, 0.6587]) -Greedy action tensor([-3.2539e-01, -1.4840e+00, -1.3682e-03, -6.4689e-01]) tensor([0.2923, 0.0917, 0.4041, 0.2119]) -Greedy action tensor([ 0.4821, -1.3467, -0.7334, 0.5769]) tensor([0.3911, 0.0628, 0.1160, 0.4300]) -Greedy action tensor([ 0.6732, -0.7632, -0.0740, -0.3630]) tensor([0.4840, 0.1151, 0.2292, 0.1717]) -Greedy action tensor([ 1.0175, -0.6503, -0.0151, -0.5743]) tensor([0.5720, 0.1079, 0.2037, 0.1164]) -Greedy action tensor([ 0.4284, 0.3263, -0.1897, 0.0579]) tensor([0.3193, 0.2883, 0.1721, 0.2204]) -Greedy action tensor([ 0.7271, -0.3895, -0.0650, -0.2270]) tensor([0.4618, 0.1512, 0.2092, 0.1779]) -Greedy action tensor([ 0.7828, -0.3441, -0.0613, -0.3314]) tensor([0.4803, 0.1556, 0.2065, 0.1576]) -Greedy action tensor([ 0.9820, -0.5904, 0.0441, -0.4003]) tensor([0.5405, 0.1122, 0.2116, 0.1357]) -Greedy action tensor([ 0.7804, 0.0994, -0.0201, 0.0824]) tensor([0.4077, 0.2064, 0.1831, 0.2029]) -Greedy action tensor([ 0.8245, -0.6819, 0.0269, -0.3513]) tensor([0.5049, 0.1119, 0.2274, 0.1558]) -Greedy action tensor([ 0.4563, 0.0342, -0.0726, 0.0344]) tensor([0.3448, 0.2260, 0.2031, 0.2261]) -Greedy action tensor([ 0.8204, -0.3513, -0.1690, -0.0516]) tensor([0.4763, 0.1476, 0.1771, 0.1991]) -Greedy action tensor([ 0.1085, 0.3054, -0.1161, -0.3233]) tensor([0.2728, 0.3321, 0.2179, 0.1771]) -Greedy action tensor([ 0.8859, -0.6278, -0.1186, -0.2493]) tensor([0.5242, 0.1154, 0.1920, 0.1685]) -Greedy action tensor([ 0.8914, -0.7156, 0.0038, -0.3130]) tensor([0.5230, 0.1049, 0.2153, 0.1568]) -Greedy action tensor([ 0.8361, -0.4158, -0.0133, -0.3417]) tensor([0.4947, 0.1415, 0.2116, 0.1523]) -Greedy action tensor([ 0.7596, -0.1062, -0.0685, 0.0252]) tensor([0.4278, 0.1800, 0.1869, 0.2053]) -Greedy action tensor([ 0.7958, -0.3552, -0.1107, -0.2115]) tensor([0.4795, 0.1517, 0.1937, 0.1751]) -Greedy action tensor([ 0.6510, -0.3338, -0.1583, -0.1138]) tensor([0.4378, 0.1635, 0.1949, 0.2038]) -Greedy action tensor([ 0.9351, -0.6961, -0.0514, -0.3740]) tensor([0.5439, 0.1064, 0.2028, 0.1469]) -Greedy action tensor([ 1.2233, -0.6232, -0.1067, -0.5120]) tensor([0.6256, 0.0987, 0.1654, 0.1103]) -Greedy action tensor([ 0.9336, -0.5784, -0.0167, -0.3812]) tensor([0.5332, 0.1175, 0.2061, 0.1432]) -Greedy action tensor([ 0.5782, -0.2083, -0.0282, -0.2064]) tensor([0.4070, 0.1854, 0.2219, 0.1857]) -Greedy action tensor([ 0.7300, -0.2255, -0.0321, -0.4160]) tensor([0.4610, 0.1773, 0.2151, 0.1465]) -Greedy action tensor([ 1.1940, -0.6114, -0.0437, -0.5754]) tensor([0.6154, 0.1012, 0.1785, 0.1049]) -Greedy action tensor([ 0.8811, -0.4731, -0.0228, -0.4386]) tensor([0.5180, 0.1337, 0.2098, 0.1384]) -Greedy action tensor([ 0.9226, -0.2400, 0.0746, -0.1158]) tensor([0.4773, 0.1492, 0.2044, 0.1690]) -Greedy action tensor([ 0.6171, -0.0960, -0.0049, -0.2803]) tensor([0.4107, 0.2013, 0.2205, 0.1674]) -Greedy action tensor([ 0.2648, 0.2772, -0.0944, -0.2252]) tensor([0.3009, 0.3047, 0.2101, 0.1843]) -Greedy action tensor([ 0.8889, -0.7028, 0.0135, -0.2173]) tensor([0.5125, 0.1043, 0.2136, 0.1695]) -Greedy action tensor([ 0.5333, -0.2866, 0.0271, -0.3136]) tensor([0.4045, 0.1782, 0.2438, 0.1734]) -Greedy action tensor([ 1.0120, -0.5992, 0.1353, -0.6284]) tensor([0.5526, 0.1103, 0.2299, 0.1071]) -Greedy action tensor([ 0.7299, -0.4972, 0.0294, -0.3706]) tensor([0.4712, 0.1381, 0.2339, 0.1568]) -Greedy action tensor([ 1.1317, -0.7334, -0.1905, -0.6229]) tensor([0.6272, 0.0971, 0.1672, 0.1085]) -Greedy action tensor([ 0.8001, -0.4951, -0.0437, -0.4931]) tensor([0.5055, 0.1384, 0.2174, 0.1387]) -Greedy action tensor([ 1.0368, -1.1526, -0.2053, -0.8071]) tensor([0.6415, 0.0718, 0.1852, 0.1015]) -Greedy action tensor([ 0.8751, -0.6526, -0.1115, -0.3452]) tensor([0.5305, 0.1151, 0.1978, 0.1566]) -Greedy action tensor([ 0.9140, -0.5852, -0.1308, -0.3087]) tensor([0.5349, 0.1194, 0.1882, 0.1575]) -Greedy action tensor([ 0.6474, -0.2752, -0.0186, -0.1842]) tensor([0.4262, 0.1694, 0.2189, 0.1855]) -Greedy action tensor([ 0.8491, 0.1465, -0.4895, -0.4149]) tensor([0.4902, 0.2428, 0.1285, 0.1385]) -Greedy action tensor([ 0.4713, 0.0289, -0.0336, 0.0367]) tensor([0.3456, 0.2220, 0.2086, 0.2238]) -Greedy action tensor([ 0.6943, -0.4017, -0.0164, -0.5597]) tensor([0.4737, 0.1583, 0.2328, 0.1352]) -Greedy action tensor([ 0.4110, -0.0807, -0.1032, -0.3736]) tensor([0.3751, 0.2294, 0.2243, 0.1712]) -Greedy action tensor([ 0.3666, -0.4998, 0.1519, -0.4433]) tensor([0.3742, 0.1574, 0.3019, 0.1665]) -Greedy action tensor([ 0.8880, -0.3851, -0.0770, -0.4029]) tensor([0.5165, 0.1446, 0.1968, 0.1421]) -Greedy action tensor([ 0.2203, -0.0850, -0.1082, -0.4229]) tensor([0.3353, 0.2471, 0.2414, 0.1762]) -Greedy action tensor([ 0.3099, 0.1302, -0.0353, 0.0482]) tensor([0.3018, 0.2522, 0.2137, 0.2323]) -Greedy action tensor([ 0.8050, -0.7002, 0.0829, -0.3927]) tensor([0.4976, 0.1105, 0.2417, 0.1502]) -Greedy action tensor([ 0.7461, -0.5649, 0.0323, -0.2710]) tensor([0.4715, 0.1271, 0.2309, 0.1705]) -Greedy action tensor([ 0.8464, -0.5932, 0.0669, -0.1914]) tensor([0.4878, 0.1156, 0.2237, 0.1728]) -Greedy action tensor([ 0.7761, -0.6064, -0.1213, -0.3580]) tensor([0.5050, 0.1267, 0.2058, 0.1625]) -Greedy action tensor([ 0.6763, -0.1094, -0.0205, -0.0591]) tensor([0.4110, 0.1873, 0.2047, 0.1970]) -Greedy action tensor([ 0.6824, -0.0950, -0.0471, -0.0342]) tensor([0.4115, 0.1891, 0.1984, 0.2010]) -Greedy action tensor([ 0.8897, -0.6240, -0.0451, -0.3085]) tensor([0.5223, 0.1150, 0.2051, 0.1576]) -Greedy action tensor([ 9.0009e-01, -7.8781e-01, -2.2098e-04, -2.7029e-01]) tensor([0.5259, 0.0972, 0.2137, 0.1632]) -Greedy action tensor([ 0.6711, -0.3391, -0.1126, -0.2031]) tensor([0.4468, 0.1627, 0.2041, 0.1864]) -Greedy action tensor([ 0.7884, -0.7309, -0.0953, -0.2906]) tensor([0.5071, 0.1110, 0.2095, 0.1724]) -Greedy action tensor([ 0.5704, -0.0330, -0.0789, -0.0368]) tensor([0.3825, 0.2092, 0.1998, 0.2084]) -Greedy action tensor([ 0.6560, -0.0442, 0.0151, -0.0189]) tensor([0.3949, 0.1961, 0.2080, 0.2011]) -Greedy action tensor([ 1.1080, -0.4109, -0.2580, -0.4053]) tensor([0.5902, 0.1292, 0.1506, 0.1300]) -Greedy action tensor([ 0.7034, -0.4843, -0.0066, -0.2140]) tensor([0.4553, 0.1389, 0.2239, 0.1819]) -Greedy action tensor([ 0.5232, -0.4622, -0.1621, -0.2816]) tensor([0.4302, 0.1606, 0.2168, 0.1924]) -Greedy action tensor([ 0.9846, -0.5293, 0.0418, -0.7293]) tensor([0.5587, 0.1229, 0.2176, 0.1007]) -Greedy action tensor([ 0.7448, -0.3411, -0.1455, -0.0668]) tensor([0.4562, 0.1540, 0.1873, 0.2026]) -Greedy action tensor([ 0.6210, -0.4093, -0.0740, -0.3883]) tensor([0.4504, 0.1607, 0.2248, 0.1641]) -Greedy action tensor([ 0.7596, -0.6017, -0.0168, -0.4215]) tensor([0.4942, 0.1267, 0.2274, 0.1517]) -Greedy action tensor([ 0.9289, -0.4344, -0.0047, -0.6156]) tensor([0.5369, 0.1374, 0.2111, 0.1146]) -Greedy action tensor([ 0.6995, -0.3719, -0.1828, 0.0025]) tensor([0.4436, 0.1519, 0.1836, 0.2209]) -Greedy action tensor([ 0.8673, -0.7825, -0.0286, -0.4643]) tensor([0.5364, 0.1030, 0.2190, 0.1416]) -Greedy action tensor([ 1.1284, -0.6510, -0.0028, -0.7025]) tensor([0.6055, 0.1022, 0.1953, 0.0970]) -Greedy action tensor([ 0.8537, -0.2681, 0.1521, -0.2313]) tensor([0.4631, 0.1508, 0.2296, 0.1565]) -Greedy action tensor([ 1.0314, -0.5879, -0.0577, -0.6933]) tensor([0.5839, 0.1156, 0.1965, 0.1041]) -Greedy action tensor([ 1.0101, -0.6638, -0.2051, -0.5127]) tensor([0.5875, 0.1102, 0.1743, 0.1281]) -Greedy action tensor([ 0.4199, -0.2891, -0.1630, -0.0678]) tensor([0.3753, 0.1847, 0.2095, 0.2305]) -Greedy action tensor([ 0.8121, -0.7663, 0.0026, -0.3518]) tensor([0.5093, 0.1051, 0.2267, 0.1590]) -Greedy action tensor([ 1.2119, -0.3761, -0.1263, -0.3628]) tensor([0.5975, 0.1221, 0.1567, 0.1237]) -Greedy action tensor([ 1.1585, -0.6571, 0.0137, -0.4396]) tensor([0.5941, 0.0967, 0.1891, 0.1202]) -Greedy action tensor([ 0.7236, -0.4177, 0.0342, -0.5676]) tensor([0.4771, 0.1524, 0.2394, 0.1312]) -Greedy action tensor([ 0.7973, -0.6524, -0.0555, -0.2207]) tensor([0.4945, 0.1160, 0.2108, 0.1787]) -Greedy action tensor([ 0.3950, -0.0330, -0.0707, -0.0942]) tensor([0.3457, 0.2253, 0.2170, 0.2120]) -Greedy action tensor([ 1.1369, -0.6840, 0.0734, -0.5168]) tensor([0.5888, 0.0953, 0.2033, 0.1127]) -Greedy action tensor([ 0.4336, -0.4523, 0.0608, -0.3387]) tensor([0.3902, 0.1609, 0.2687, 0.1802]) -Greedy action tensor([ 1.0796, -0.6473, 0.0365, -0.5054]) tensor([0.5763, 0.1025, 0.2031, 0.1181]) -Greedy action tensor([ 1.0994, -0.3221, -0.4328, 0.2054]) tensor([0.5358, 0.1293, 0.1158, 0.2191]) -Greedy action tensor([ 1.4361, -0.7955, -0.4136, 0.7778]) tensor([0.5610, 0.0602, 0.0882, 0.2905]) -Greedy action tensor([ 1.9446, 0.3976, -0.5782, 0.0539]) tensor([0.6925, 0.1474, 0.0556, 0.1045]) -Greedy action tensor([ 1.4369, -0.1736, -0.4825, 0.3980]) tensor([0.5881, 0.1175, 0.0863, 0.2081]) -Greedy action tensor([ 1.0920, -0.4156, -0.2331, 0.2534]) tensor([0.5210, 0.1154, 0.1385, 0.2252]) -Greedy action tensor([ 2.1280, -1.6169, 0.1557, 0.5138]) tensor([0.7343, 0.0174, 0.1022, 0.1462]) -Greedy action tensor([1.4430, 0.4293, 0.0516, 0.0578]) tensor([0.5371, 0.1949, 0.1336, 0.1344]) -Greedy action tensor([ 1.6605, -0.9120, -0.7822, 0.2753]) tensor([0.7074, 0.0540, 0.0615, 0.1771]) -Greedy action tensor([ 1.2523, -0.3059, -0.2983, 0.1622]) tensor([0.5686, 0.1197, 0.1206, 0.1911]) -Greedy action tensor([ 1.4258, 0.4858, -0.6344, 0.2749]) tensor([0.5451, 0.2130, 0.0695, 0.1725]) -Greedy action tensor([ 2.0852, -1.0475, -0.2617, 0.2772]) tensor([0.7673, 0.0335, 0.0734, 0.1258]) -Greedy action tensor([ 1.1677, -0.2544, -0.3618, 0.1939]) tensor([0.5448, 0.1314, 0.1180, 0.2058]) -Greedy action tensor([ 1.9952, -0.2790, -0.5868, 0.2455]) tensor([0.7395, 0.0761, 0.0559, 0.1285]) -Greedy action tensor([ 1.1847, -0.5708, -0.5243, 0.2790]) tensor([0.5688, 0.0983, 0.1030, 0.2299]) -Greedy action tensor([ 1.1289, -0.3036, -0.4771, 0.1415]) tensor([0.5519, 0.1317, 0.1108, 0.2056]) -Greedy action tensor([ 1.5842, -0.1669, -0.8421, 0.3111]) tensor([0.6485, 0.1126, 0.0573, 0.1816]) -Greedy action tensor([ 1.5604, -0.5391, -0.7870, 0.4232]) tensor([0.6498, 0.0796, 0.0621, 0.2084]) -Greedy action tensor([ 2.2369, -1.1167, -0.2324, 0.5399]) tensor([0.7676, 0.0268, 0.0650, 0.1406]) -Greedy action tensor([ 1.7101, -0.8072, -0.4590, 0.5813]) tensor([0.6586, 0.0531, 0.0753, 0.2130]) -Greedy action tensor([ 1.4268, -1.0224, -0.4062, 0.7026]) tensor([0.5777, 0.0499, 0.0924, 0.2800]) -Greedy action tensor([ 1.8485, -0.7138, -0.1595, 0.3471]) tensor([0.6973, 0.0538, 0.0936, 0.1554]) -Greedy action tensor([ 1.2604, -0.5631, -0.1779, 0.4438]) tensor([0.5433, 0.0877, 0.1289, 0.2401]) -Greedy action tensor([ 1.5567, -0.5002, -0.8646, 0.2055]) tensor([0.6777, 0.0866, 0.0602, 0.1755]) -Greedy action tensor([ 1.6333, -0.0416, -0.4607, -0.0868]) tensor([0.6713, 0.1258, 0.0827, 0.1202]) -Greedy action tensor([ 1.6722, -0.5485, -0.7087, 0.9624]) tensor([0.5908, 0.0641, 0.0546, 0.2905]) -Greedy action tensor([ 2.1850, 0.1988, -0.0254, 0.2727]) tensor([0.7170, 0.0984, 0.0786, 0.1059]) -Greedy action tensor([ 1.2266, -0.2513, -0.5694, 0.5489]) tensor([0.5258, 0.1199, 0.0873, 0.2670]) -Greedy action tensor([ 1.0394, -0.1931, -0.1590, 0.4618]) tensor([0.4641, 0.1353, 0.1400, 0.2605]) -Greedy action tensor([ 1.3755, -0.4846, -0.7776, 0.0770]) tensor([0.6474, 0.1008, 0.0752, 0.1767]) -Greedy action tensor([ 1.6851, -0.3667, -0.0596, -0.0338]) tensor([0.6745, 0.0867, 0.1178, 0.1209]) -Greedy action tensor([ 1.8637, -0.1138, -0.2405, 0.2062]) tensor([0.6892, 0.0954, 0.0840, 0.1314]) -Greedy action tensor([ 1.4709, -0.6101, -0.4135, 0.2548]) tensor([0.6357, 0.0793, 0.0966, 0.1884]) -Greedy action tensor([ 2.3601, -0.8860, -0.3641, 1.0149]) tensor([0.7326, 0.0285, 0.0481, 0.1908]) -Greedy action tensor([ 1.4345, -0.8758, -0.4092, 0.1036]) tensor([0.6572, 0.0652, 0.1040, 0.1737]) -Greedy action tensor([ 1.0799, -0.3261, -0.4145, 0.1292]) tensor([0.5388, 0.1321, 0.1209, 0.2082]) -Greedy action tensor([ 2.5233, -0.8405, -1.0632, 0.4776]) tensor([0.8392, 0.0290, 0.0232, 0.1085]) -Greedy action tensor([ 1.6285, -0.6217, -0.2837, 0.4276]) tensor([0.6435, 0.0678, 0.0951, 0.1936]) -Greedy action tensor([ 1.9953, 0.3791, -0.3201, 0.3358]) tensor([0.6722, 0.1335, 0.0664, 0.1279]) -Greedy action tensor([ 1.3415, 0.2549, -0.4222, -0.4737]) tensor([0.5982, 0.2018, 0.1025, 0.0974]) -Greedy action tensor([ 2.1309, -1.0511, -0.0456, 0.5426]) tensor([0.7357, 0.0305, 0.0835, 0.1503]) -Greedy action tensor([ 1.1118, -0.2221, -0.3414, 0.3840]) tensor([0.5050, 0.1330, 0.1181, 0.2439]) -Greedy action tensor([ 2.0657, -0.4790, -0.1159, 0.3939]) tensor([0.7250, 0.0569, 0.0818, 0.1362]) -Greedy action tensor([ 1.5096, -0.3886, -0.6846, 0.0293]) tensor([0.6717, 0.1006, 0.0749, 0.1528]) -Greedy action tensor([ 1.8482, -0.8968, -0.1372, 0.5712]) tensor([0.6755, 0.0434, 0.0928, 0.1884]) -Greedy action tensor([ 1.3308, -0.1466, -0.5916, 0.2417]) tensor([0.5845, 0.1334, 0.0855, 0.1967]) -Greedy action tensor([ 1.3730, -0.6678, -0.3773, 0.6377]) tensor([0.5609, 0.0729, 0.0974, 0.2688]) -Greedy action tensor([ 1.0545, -0.2549, -0.8105, 0.6910]) tensor([0.4717, 0.1273, 0.0731, 0.3279]) -Greedy action tensor([ 1.3185, -0.1060, -0.4179, 0.5448]) tensor([0.5325, 0.1281, 0.0938, 0.2456]) -Greedy action tensor([ 1.6270, -0.7199, -0.2593, 0.5067]) tensor([0.6355, 0.0608, 0.0964, 0.2073]) -Greedy action tensor([ 2.2608, -0.4151, -0.5708, 0.2734]) tensor([0.7906, 0.0544, 0.0466, 0.1084]) -Greedy action tensor([ 1.7137, -0.9124, -0.3686, 0.1782]) tensor([0.7080, 0.0512, 0.0882, 0.1525]) -Greedy action tensor([ 0.8440, -0.5681, -0.3824, 0.5925]) tensor([0.4320, 0.1053, 0.1267, 0.3360]) -Greedy action tensor([ 1.0338, -0.3886, -0.6017, 0.5675]) tensor([0.4847, 0.1169, 0.0944, 0.3040]) -Greedy action tensor([ 2.3734, -1.4299, -0.0087, 0.5748]) tensor([0.7811, 0.0174, 0.0721, 0.1293]) -Greedy action tensor([ 1.9269, -0.5702, -0.5967, 0.1957]) tensor([0.7465, 0.0615, 0.0599, 0.1322]) -Greedy action tensor([ 1.2186, -0.1513, -0.1928, -0.0199]) tensor([0.5594, 0.1422, 0.1364, 0.1621]) -Greedy action tensor([ 1.2500, -0.5677, -0.5154, 0.3266]) tensor([0.5778, 0.0938, 0.0989, 0.2295]) -Greedy action tensor([ 2.3095, -0.8990, -0.2645, 0.6940]) tensor([0.7602, 0.0307, 0.0579, 0.1511]) -Greedy action tensor([ 1.2096, -0.2921, -0.2990, 0.2353]) tensor([0.5490, 0.1223, 0.1215, 0.2072]) -Greedy action tensor([ 1.9542, -0.5267, -0.8641, 0.6191]) tensor([0.7110, 0.0595, 0.0425, 0.1871]) -Greedy action tensor([ 1.2397, -0.6123, -0.3961, 0.2101]) tensor([0.5852, 0.0918, 0.1140, 0.2090]) -Greedy action tensor([ 1.4578, -0.2987, -0.5475, 0.2888]) tensor([0.6181, 0.1067, 0.0832, 0.1920]) -Greedy action tensor([ 1.9252, -0.7521, -0.2290, 0.2685]) tensor([0.7270, 0.0500, 0.0843, 0.1387]) -Greedy action tensor([ 1.2826, -0.5492, -0.4323, 0.0260]) tensor([0.6155, 0.0986, 0.1108, 0.1752]) -Greedy action tensor([ 1.5471, -0.2937, -0.4490, 0.1101]) tensor([0.6527, 0.1036, 0.0887, 0.1551]) -Greedy action tensor([ 1.0695, -0.3857, -0.3866, -0.0754]) tensor([0.5603, 0.1308, 0.1306, 0.1783]) -Greedy action tensor([ 1.6931, -0.3179, -0.7402, 0.2170]) tensor([0.6896, 0.0923, 0.0605, 0.1576]) -Greedy action tensor([ 1.7994, -0.1236, -0.1716, 0.3491]) tensor([0.6579, 0.0962, 0.0917, 0.1543]) -Greedy action tensor([ 2.2817, -1.2873, 0.4929, -0.0399]) tensor([0.7731, 0.0218, 0.1292, 0.0759]) -Greedy action tensor([ 1.2638, -0.3796, -0.4756, 0.5054]) tensor([0.5443, 0.1052, 0.0956, 0.2549]) -Greedy action tensor([ 1.6147, -0.3213, -0.7794, -0.0148]) tensor([0.6985, 0.1008, 0.0637, 0.1369]) -Greedy action tensor([ 1.1156, -0.6041, -0.2325, 0.1851]) tensor([0.5455, 0.0977, 0.1417, 0.2151]) -Greedy action tensor([ 1.4926, 0.0139, -1.2948, 0.0362]) tensor([0.6568, 0.1497, 0.0404, 0.1531]) -Greedy action tensor([ 2.2298, 0.3319, 0.2138, -0.0629]) tensor([0.7225, 0.1083, 0.0962, 0.0730]) -Greedy action tensor([ 1.2469, -0.2469, -0.1975, 0.1848]) tensor([0.5537, 0.1243, 0.1306, 0.1914]) -Greedy action tensor([ 1.3435, -0.3845, -0.4862, 0.0987]) tensor([0.6150, 0.1092, 0.0987, 0.1771]) -Greedy action tensor([ 1.4973, -0.6519, -0.7951, 0.0850]) tensor([0.6844, 0.0798, 0.0691, 0.1667]) -Greedy action tensor([1.3042, 0.0123, 0.0417, 0.2162]) tensor([0.5278, 0.1450, 0.1493, 0.1778]) -Greedy action tensor([ 1.4185, -0.3919, -0.3871, 0.4851]) tensor([0.5810, 0.0950, 0.0955, 0.2285]) -Greedy action tensor([ 1.3275, -0.0409, -0.3264, 0.3588]) tensor([0.5478, 0.1394, 0.1048, 0.2079]) -Greedy action tensor([ 1.8578, -0.6668, -0.0859, 0.4631]) tensor([0.6797, 0.0544, 0.0973, 0.1685]) -Greedy action tensor([ 1.7177, -0.7676, -0.5365, 0.3754]) tensor([0.6899, 0.0575, 0.0724, 0.1802]) -Greedy action tensor([-1.8944, -0.3614, 0.6369, -0.1503]) tensor([0.0418, 0.1936, 0.5254, 0.2391]) -Greedy action tensor([-1.0828, -0.5266, 0.8744, 0.9775]) tensor([0.0566, 0.0987, 0.4006, 0.4441]) -Greedy action tensor([-1.8507, -0.4959, 0.6313, -0.1251]) tensor([0.0445, 0.1726, 0.5328, 0.2501]) -Greedy action tensor([-1.6480, 0.0427, 0.4461, -0.0337]) tensor([0.0511, 0.2772, 0.4149, 0.2568]) -Greedy action tensor([-0.9695, -0.4075, 0.5760, 0.1890]) tensor([0.0941, 0.1650, 0.4413, 0.2996]) -Greedy action tensor([-1.7141, -0.3463, 0.6049, -0.0839]) tensor([0.0495, 0.1944, 0.5033, 0.2528]) -Greedy action tensor([-1.9042, -0.3865, 0.6452, -0.1555]) tensor([0.0415, 0.1892, 0.5309, 0.2384]) -Greedy action tensor([-1.3541, 0.6095, 0.2169, 0.1124]) tensor([0.0579, 0.4125, 0.2786, 0.2510]) -Greedy action tensor([-1.4545, 0.3681, 0.3481, -0.0940]) tensor([0.0583, 0.3608, 0.3536, 0.2273]) -Greedy action tensor([-1.8768, -0.4680, 0.6360, -0.1566]) tensor([0.0434, 0.1777, 0.5361, 0.2427]) -Greedy action tensor([-1.8700, -0.4216, 0.6481, -0.0975]) tensor([0.0425, 0.1808, 0.5268, 0.2499]) -Greedy action tensor([-1.3806, 0.0062, 0.5330, 0.3552]) tensor([0.0573, 0.2293, 0.3883, 0.3251]) -Greedy action tensor([-1.9384, -0.4358, 0.6640, -0.1735]) tensor([0.0403, 0.1810, 0.5435, 0.2352]) -Greedy action tensor([-1.7568, 0.1148, 0.4854, 0.0327]) tensor([0.0437, 0.2838, 0.4111, 0.2614]) -Greedy action tensor([-1.8582, -0.3757, 0.6233, -0.1256]) tensor([0.0434, 0.1913, 0.5196, 0.2457]) -Greedy action tensor([-1.1053, 0.2576, 0.3088, 0.3153]) tensor([0.0760, 0.2969, 0.3125, 0.3146]) -Greedy action tensor([-1.8664, -0.4427, 0.6312, -0.1411]) tensor([0.0436, 0.1812, 0.5303, 0.2449]) -Greedy action tensor([-0.9596, -0.4216, 0.2769, -0.0353]) tensor([0.1153, 0.1974, 0.3969, 0.2905]) -Greedy action tensor([-0.9555, -0.0589, 0.8923, 0.9404]) tensor([0.0608, 0.1490, 0.3856, 0.4046]) -Greedy action tensor([-1.9317, -0.4345, 0.6611, -0.1733]) tensor([0.0406, 0.1814, 0.5425, 0.2355]) -Greedy action tensor([-1.9170, -0.4345, 0.6546, -0.1637]) tensor([0.0412, 0.1815, 0.5393, 0.2380]) -Greedy action tensor([-1.4507, -0.0834, 0.5842, 0.1548]) tensor([0.0570, 0.2235, 0.4358, 0.2837]) -Greedy action tensor([-1.8454, -0.4601, 0.5158, -0.3066]) tensor([0.0494, 0.1973, 0.5234, 0.2300]) -Greedy action tensor([-1.8636, -0.4402, 0.6328, -0.1377]) tensor([0.0437, 0.1812, 0.5299, 0.2452]) -Greedy action tensor([-1.9441, -0.4548, 0.6722, -0.1786]) tensor([0.0401, 0.1776, 0.5482, 0.2341]) -Greedy action tensor([-1.9067, -0.3566, 0.6426, -0.1602]) tensor([0.0412, 0.1943, 0.5279, 0.2365]) -Greedy action tensor([0.8454, 0.6101, 0.5597, 1.3692]) tensor([0.2364, 0.1868, 0.1776, 0.3991]) -Greedy action tensor([-0.3307, 0.9970, 0.0125, 0.1771]) tensor([0.1275, 0.4810, 0.1797, 0.2118]) -Greedy action tensor([-1.9208, -0.4446, 0.6576, -0.1666]) tensor([0.0411, 0.1799, 0.5415, 0.2375]) -Greedy action tensor([-1.6252, -0.0383, 0.6232, -0.5776]) tensor([0.0549, 0.2684, 0.5201, 0.1565]) -Greedy action tensor([-1.9335, -0.4433, 0.6617, -0.1747]) tensor([0.0406, 0.1801, 0.5437, 0.2356]) -Greedy action tensor([-1.8047, -0.3137, 0.6427, -0.0767]) tensor([0.0442, 0.1963, 0.5108, 0.2488]) -Greedy action tensor([-1.8913, -0.4088, 0.6343, -0.1579]) tensor([0.0424, 0.1869, 0.5304, 0.2402]) -Greedy action tensor([-1.9090, -0.3907, 0.6405, -0.1581]) tensor([0.0415, 0.1892, 0.5306, 0.2388]) -Greedy action tensor([-1.7863, -0.1095, 0.5350, -0.0950]) tensor([0.0455, 0.2435, 0.4639, 0.2471]) -Greedy action tensor([-1.9110, -0.4201, 0.6392, -0.1641]) tensor([0.0417, 0.1851, 0.5340, 0.2392]) -Greedy action tensor([-1.9053, -0.4244, 0.6581, -0.1571]) tensor([0.0415, 0.1823, 0.5381, 0.2382]) -Greedy action tensor([-1.6708, -0.2930, 0.5049, -0.0719]) tensor([0.0534, 0.2118, 0.4705, 0.2643]) -Greedy action tensor([-1.6786, -0.4775, 0.5407, -0.0389]) tensor([0.0535, 0.1779, 0.4926, 0.2759]) -Greedy action tensor([-1.9316, -0.4435, 0.6624, -0.1729]) tensor([0.0406, 0.1799, 0.5437, 0.2358]) -Greedy action tensor([-1.5229, 0.2792, 0.3665, -0.0280]) tensor([0.0551, 0.3343, 0.3647, 0.2459]) -Greedy action tensor([-1.5217, 0.3640, 0.3640, 0.0943]) tensor([0.0520, 0.3430, 0.3430, 0.2619]) -Greedy action tensor([-1.8813, -0.4600, 0.6428, -0.1501]) tensor([0.0430, 0.1780, 0.5363, 0.2427]) -Greedy action tensor([-1.9272, -0.4434, 0.6572, -0.1722]) tensor([0.0409, 0.1804, 0.5422, 0.2365]) -Greedy action tensor([-1.5665, -0.1714, 0.6656, 0.1242]) tensor([0.0506, 0.2040, 0.4712, 0.2742]) -Greedy action tensor([-1.8836, -0.4227, 0.6365, -0.1481]) tensor([0.0427, 0.1841, 0.5309, 0.2423]) -Greedy action tensor([-1.9149, -0.3478, 0.6402, -0.1634]) tensor([0.0409, 0.1962, 0.5269, 0.2359]) -Greedy action tensor([-1.8837, -0.3786, 0.6406, -0.1430]) tensor([0.0422, 0.1902, 0.5269, 0.2407]) -Greedy action tensor([-1.1102, 0.6851, 0.2104, -0.3229]) tensor([0.0771, 0.4645, 0.2889, 0.1695]) -Greedy action tensor([-1.3942, 0.2772, 0.3275, -0.0221]) tensor([0.0631, 0.3355, 0.3528, 0.2487]) -Greedy action tensor([-1.8819, -0.4512, 0.6439, -0.1313]) tensor([0.0427, 0.1784, 0.5333, 0.2456]) -Greedy action tensor([-1.7815, 0.2468, 0.4975, -0.1249]) tensor([0.0424, 0.3220, 0.4137, 0.2220]) -Greedy action tensor([-1.5749, 0.0424, 0.4700, -0.1509]) tensor([0.0558, 0.2812, 0.4312, 0.2318]) -Greedy action tensor([-1.9137, -0.4237, 0.6506, -0.1620]) tensor([0.0413, 0.1834, 0.5370, 0.2383]) -Greedy action tensor([-1.9111, -0.4523, 0.6549, -0.1643]) tensor([0.0416, 0.1788, 0.5411, 0.2385]) -Greedy action tensor([-0.9673, -0.1256, 0.1799, -0.0670]) tensor([0.1120, 0.2598, 0.3527, 0.2755]) -Greedy action tensor([-0.7959, 0.8464, 0.1640, -0.0816]) tensor([0.0924, 0.4775, 0.2413, 0.1888]) -Greedy action tensor([-1.7996, -0.2985, 0.5746, -0.1274]) tensor([0.0464, 0.2082, 0.4984, 0.2470]) -Greedy action tensor([-1.8848, -0.3777, 0.6554, -0.1323]) tensor([0.0417, 0.1883, 0.5292, 0.2407]) -Greedy action tensor([-1.9436, -0.4492, 0.6703, -0.1779]) tensor([0.0401, 0.1786, 0.5471, 0.2342]) -Greedy action tensor([-1.7346, -0.4006, 0.5570, -0.1094]) tensor([0.0506, 0.1921, 0.5004, 0.2570]) -Greedy action tensor([-1.8915, -0.4337, 0.6359, -0.1545]) tensor([0.0426, 0.1829, 0.5329, 0.2417]) -Greedy action tensor([-1.8927, -0.3989, 0.6238, -0.1424]) tensor([0.0424, 0.1888, 0.5249, 0.2440]) -Greedy action tensor([-1.8438, -0.3990, 0.6099, -0.1386]) tensor([0.0447, 0.1895, 0.5198, 0.2459]) -Greedy action tensor([-1.5445, -0.4371, 0.4540, 0.0260]) tensor([0.0617, 0.1867, 0.4550, 0.2966]) -Greedy action tensor([-1.6835, -0.2596, 0.5090, -0.0814]) tensor([0.0524, 0.2178, 0.4696, 0.2602]) -Greedy action tensor([-1.0919, -0.2340, 0.3680, 0.3841]) tensor([0.0831, 0.1959, 0.3576, 0.3634]) -Greedy action tensor([-1.2029, 0.4190, 0.3737, -0.2402]) tensor([0.0740, 0.3745, 0.3579, 0.1937]) -Greedy action tensor([-1.8562, -0.3278, 0.6325, -0.1263]) tensor([0.0429, 0.1979, 0.5170, 0.2421]) -Greedy action tensor([-1.9007, -0.4115, 0.6415, -0.1614]) tensor([0.0420, 0.1860, 0.5332, 0.2389]) -Greedy action tensor([-1.8126, -0.2397, 0.6203, -0.0883]) tensor([0.0438, 0.2112, 0.4992, 0.2458]) -Greedy action tensor([-1.9187, -0.3990, 0.6500, -0.1663]) tensor([0.0410, 0.1874, 0.5351, 0.2365]) -Greedy action tensor([-1.8458, -0.4757, 0.6088, -0.1235]) tensor([0.0451, 0.1775, 0.5250, 0.2524]) -Greedy action tensor([-1.8568, -0.5039, 0.6628, -0.1213]) tensor([0.0435, 0.1685, 0.5410, 0.2470]) -Greedy action tensor([-1.8326, -0.0259, 0.5666, -0.1822]) tensor([0.0429, 0.2612, 0.4724, 0.2234]) -Greedy action tensor([-1.8979, -0.4133, 0.6429, -0.1564]) tensor([0.0420, 0.1853, 0.5330, 0.2397]) -Greedy action tensor([-1.8836, -0.4691, 0.6217, -0.1505]) tensor([0.0434, 0.1787, 0.5320, 0.2458]) -Greedy action tensor([-1.8368, -0.3294, 0.6102, -0.1143]) tensor([0.0441, 0.1992, 0.5097, 0.2470]) -Greedy action tensor([-1.3026, 0.2920, 0.2734, 0.0378]) tensor([0.0686, 0.3378, 0.3316, 0.2620]) -Greedy action tensor([-1.7161, -0.2605, 0.5722, -0.0265]) tensor([0.0486, 0.2085, 0.4794, 0.2635]) -Greedy action tensor([-1.9191, -0.4411, 0.6549, -0.1674]) tensor([0.0412, 0.1807, 0.5406, 0.2375]) -Greedy action tensor([-1.2494, 0.3494, -0.2224, 0.0475]) tensor([0.0807, 0.3990, 0.2253, 0.2950]) -Greedy action tensor([-0.3008, -0.9312, 0.1737, -1.1925]) tensor([0.2817, 0.1500, 0.4528, 0.1155]) -Greedy action tensor([ 0.7825, -0.7981, 1.0032, 0.2443]) tensor([0.3293, 0.0678, 0.4106, 0.1923]) -Greedy action tensor([-0.1421, -1.8057, 0.6617, -0.6533]) tensor([0.2486, 0.0471, 0.5553, 0.1491]) -Greedy action tensor([ 0.3319, -0.1634, 0.2683, -0.4072]) tensor([0.3305, 0.2014, 0.3102, 0.1579]) -Greedy action tensor([-1.4963, 0.3921, 0.9645, -0.4874]) tensor([0.0453, 0.2995, 0.5309, 0.1243]) -Greedy action tensor([ 0.8783, -1.2864, 0.4668, -0.3705]) tensor([0.4844, 0.0556, 0.3210, 0.1390]) -Greedy action tensor([ 1.7639, -0.3079, -0.2360, 0.2015]) tensor([0.6798, 0.0856, 0.0920, 0.1425]) -Greedy action tensor([1.1013, 0.0581, 0.6971, 0.8400]) tensor([0.3584, 0.1263, 0.2393, 0.2760]) -Greedy action tensor([-0.2344, -0.5280, -0.1086, -0.7459]) tensor([0.2874, 0.2143, 0.3259, 0.1723]) -Greedy action tensor([-0.1632, -0.6123, -0.4416, 0.1208]) tensor([0.2686, 0.1714, 0.2033, 0.3567]) -Greedy action tensor([-0.0329, -0.9000, 0.3066, -0.3097]) tensor([0.2791, 0.1173, 0.3919, 0.2116]) -Greedy action tensor([ 0.4565, 0.0814, -0.0209, 0.7943]) tensor([0.2696, 0.1853, 0.1672, 0.3779]) -Greedy action tensor([ 0.2828, -0.1767, -0.3075, 0.3511]) tensor([0.3071, 0.1940, 0.1702, 0.3288]) -Greedy action tensor([ 0.7617, 0.0630, 0.7406, -0.4961]) tensor([0.3622, 0.1801, 0.3547, 0.1030]) -Greedy action tensor([-0.4525, -0.7400, 0.0347, -0.2237]) tensor([0.2157, 0.1618, 0.3512, 0.2712]) -Greedy action tensor([ 0.3430, -1.3519, -0.7080, 0.8139]) tensor([0.3190, 0.0586, 0.1115, 0.5109]) -Greedy action tensor([ 0.8987, -1.4137, 0.2654, 1.1920]) tensor([0.3366, 0.0333, 0.1787, 0.4514]) -Greedy action tensor([ 1.5751, -0.0872, 0.4650, -0.8312]) tensor([0.6214, 0.1179, 0.2048, 0.0560]) -Greedy action tensor([ 0.5116, -0.3773, -0.3781, 0.0921]) tensor([0.4033, 0.1658, 0.1657, 0.2652]) -Greedy action tensor([ 0.0188, -0.9809, 1.2985, -0.4730]) tensor([0.1794, 0.0660, 0.6449, 0.1097]) -Greedy action tensor([-0.2275, -0.7324, -0.5896, -0.7088]) tensor([0.3427, 0.2069, 0.2386, 0.2118]) -Greedy action tensor([-0.2307, 0.5660, -0.7675, -0.2464]) tensor([0.2089, 0.4634, 0.1221, 0.2056]) -Greedy action tensor([ 0.8196, -1.4434, -0.5416, 0.3258]) tensor([0.5074, 0.0528, 0.1301, 0.3097]) -Greedy action tensor([-0.4025, -0.5620, 1.0191, 0.2787]) tensor([0.1254, 0.1069, 0.5198, 0.2479]) -Greedy action tensor([-0.7939, -0.3172, 0.3294, -0.8739]) tensor([0.1513, 0.2437, 0.4653, 0.1397]) -Greedy action tensor([ 0.2087, -0.4782, 0.1570, -0.5879]) tensor([0.3444, 0.1733, 0.3270, 0.1553]) -Greedy action tensor([ 1.0400, -1.1947, 0.7878, 0.7256]) tensor([0.3825, 0.0409, 0.2972, 0.2793]) -Greedy action tensor([-0.1741, -0.7113, -0.8580, 0.2078]) tensor([0.2814, 0.1644, 0.1420, 0.4122]) -Greedy action tensor([ 0.5073, -1.2290, -0.4542, 0.3001]) tensor([0.4217, 0.0743, 0.1612, 0.3428]) -Greedy action tensor([ 0.6406, 0.1503, -0.1923, 0.0351]) tensor([0.3857, 0.2362, 0.1677, 0.2105]) -Greedy action tensor([ 0.2278, -0.6811, -0.0558, 0.2663]) tensor([0.3130, 0.1261, 0.2357, 0.3253]) -Greedy action tensor([-0.1353, -0.4992, 0.5719, -0.3328]) tensor([0.2201, 0.1529, 0.4464, 0.1806]) -Greedy action tensor([ 0.5971, -0.8646, -0.2895, 0.6982]) tensor([0.3636, 0.0843, 0.1498, 0.4023]) -Greedy action tensor([ 0.5220, -0.7084, -0.9677, 0.9545]) tensor([0.3269, 0.0955, 0.0737, 0.5038]) -Greedy action tensor([-1.0887, -0.1391, -1.0153, -0.1828]) tensor([0.1402, 0.3623, 0.1508, 0.3468]) -Greedy action tensor([ 0.0777, -0.9577, 0.0096, -0.0342]) tensor([0.3141, 0.1115, 0.2934, 0.2809]) -Greedy action tensor([-1.2256, -0.1586, -1.0510, 0.2322]) tensor([0.1064, 0.3094, 0.1268, 0.4574]) -Greedy action tensor([ 1.2321, -0.3487, 1.0762, -0.7290]) tensor([0.4541, 0.0935, 0.3885, 0.0639]) -Greedy action tensor([-0.8708, 0.9306, 0.0092, -0.9107]) tensor([0.0959, 0.5808, 0.2312, 0.0921]) -Greedy action tensor([ 0.1593, -0.8525, 1.3400, -0.1225]) tensor([0.1860, 0.0676, 0.6059, 0.1404]) -Greedy action tensor([-0.2017, 0.0178, 0.3195, -1.0725]) tensor([0.2300, 0.2864, 0.3873, 0.0963]) -Greedy action tensor([ 0.8860, -0.8043, 0.2509, -0.3412]) tensor([0.4981, 0.0919, 0.2640, 0.1460]) -Greedy action tensor([-0.5939, -1.2520, 0.5907, -0.9616]) tensor([0.1825, 0.0945, 0.5967, 0.1263]) -Greedy action tensor([ 1.4370, -1.2843, -0.0174, -0.1015]) tensor([0.6605, 0.0435, 0.1543, 0.1418]) -Greedy action tensor([ 0.6190, -0.6301, 0.9297, -0.4956]) tensor([0.3357, 0.0963, 0.4580, 0.1101]) -Greedy action tensor([ 0.6234, 0.3577, -0.3198, 0.6674]) tensor([0.3124, 0.2395, 0.1216, 0.3264]) -Greedy action tensor([-0.8638, -0.5816, 0.9097, -0.8695]) tensor([0.1086, 0.1440, 0.6396, 0.1079]) -Greedy action tensor([ 0.6606, 0.4555, -0.1732, 0.5439]) tensor([0.3186, 0.2595, 0.1384, 0.2835]) -Greedy action tensor([-0.4284, -0.8753, -0.2915, -1.0912]) tensor([0.3029, 0.1937, 0.3473, 0.1561]) -Greedy action tensor([ 0.4021, -0.5397, 1.1828, -0.7414]) tensor([0.2570, 0.1002, 0.5610, 0.0819]) -Greedy action tensor([ 0.1570, -1.0267, 1.2019, -1.0957]) tensor([0.2255, 0.0690, 0.6411, 0.0644]) -Greedy action tensor([ 1.9899, -0.6341, -0.5959, 0.4077]) tensor([0.7389, 0.0536, 0.0557, 0.1519]) -Greedy action tensor([ 0.1121, -1.1418, 0.1437, 0.0234]) tensor([0.3093, 0.0883, 0.3193, 0.2831]) -Greedy action tensor([ 1.0133, -0.3738, -1.3605, 1.8122]) tensor([0.2804, 0.0700, 0.0261, 0.6234]) -Greedy action tensor([ 0.6600, -0.1237, 0.0249, 0.3538]) tensor([0.3673, 0.1677, 0.1946, 0.2704]) -Greedy action tensor([-0.2055, -0.6714, 0.0650, -0.5273]) tensor([0.2730, 0.1713, 0.3578, 0.1979]) -Greedy action tensor([ 0.4720, -1.0144, 0.0174, 0.1097]) tensor([0.3911, 0.0885, 0.2482, 0.2722]) -Greedy action tensor([ 0.4852, -1.0577, 0.4651, 0.7208]) tensor([0.2891, 0.0618, 0.2833, 0.3659]) -Greedy action tensor([-0.0637, 0.0434, -0.0738, -1.0157]) tensor([0.2866, 0.3190, 0.2837, 0.1106]) -Greedy action tensor([-0.6344, 0.0095, -0.8496, -0.1968]) tensor([0.1901, 0.3620, 0.1533, 0.2945]) -Greedy action tensor([ 0.7043, 0.1139, 0.5631, -0.3337]) tensor([0.3602, 0.1996, 0.3127, 0.1275]) -Greedy action tensor([ 0.5025, -2.4575, 0.6146, 0.4902]) tensor([0.3166, 0.0164, 0.3542, 0.3128]) -Greedy action tensor([ 0.5311, -1.8224, -0.5358, 0.0940]) tensor([0.4796, 0.0456, 0.1650, 0.3098]) -Greedy action tensor([ 0.2816, -1.4182, -0.8184, 1.5773]) tensor([0.1935, 0.0353, 0.0644, 0.7068]) -Greedy action tensor([ 0.5816, 0.1609, -0.1034, -0.3347]) tensor([0.3905, 0.2564, 0.1969, 0.1562]) -Greedy action tensor([ 0.0802, -1.1439, -0.0038, 0.0101]) tensor([0.3179, 0.0935, 0.2923, 0.2964]) -Greedy action tensor([-1.2507, 0.3872, -0.2935, -0.7275]) tensor([0.0958, 0.4929, 0.2496, 0.1617]) -Greedy action tensor([-0.3864, 0.3156, 0.8597, -0.2788]) tensor([0.1314, 0.2652, 0.4570, 0.1464]) -Greedy action tensor([-0.3071, -0.9629, 0.5273, -0.5291]) tensor([0.2163, 0.1123, 0.4982, 0.1732]) -Greedy action tensor([ 1.5328, -1.9132, 1.0745, 1.0681]) tensor([0.4362, 0.0139, 0.2758, 0.2741]) -Greedy action tensor([ 0.6356, -0.7613, 0.0051, 0.0558]) tensor([0.4274, 0.1057, 0.2275, 0.2394]) -Greedy action tensor([ 1.3417, -0.6331, 0.7901, 1.3672]) tensor([0.3649, 0.0506, 0.2102, 0.3743]) -Greedy action tensor([ 0.6594, -1.5203, -0.9415, 0.3428]) tensor([0.4894, 0.0553, 0.0987, 0.3566]) -Greedy action tensor([ 0.3221, -0.1522, 0.2262, 0.3323]) tensor([0.2824, 0.1757, 0.2566, 0.2853]) -Greedy action tensor([ 1.1143, -0.4322, 0.7897, 0.5515]) tensor([0.3991, 0.0850, 0.2885, 0.2273]) -Greedy action tensor([-1.2323, -2.7729, -0.2665, 0.0340]) tensor([0.1353, 0.0290, 0.3555, 0.4801]) -Greedy action tensor([-1.6213, -0.8290, -1.2847, 0.1811]) tensor([0.0937, 0.2069, 0.1312, 0.5682]) -Greedy action tensor([ 0.2539, -0.6296, 0.0506, 0.7803]) tensor([0.2550, 0.1054, 0.2081, 0.4316]) -Greedy action tensor([-0.5787, -0.7239, 0.0206, -0.0704]) tensor([0.1870, 0.1617, 0.3405, 0.3109]) -Greedy action tensor([-0.7152, -0.1186, -1.1628, -0.2393]) tensor([0.1975, 0.3586, 0.1262, 0.3178]) -Greedy action tensor([ 0.3991, -0.0585, -0.0557, 0.1176]) tensor([0.3309, 0.2094, 0.2100, 0.2497]) -Greedy action tensor([ 0.6064, 0.0248, 0.0464, -0.1959]) tensor([0.3878, 0.2168, 0.2215, 0.1739]) -Greedy action tensor([ 0.5051, -0.4928, -0.2458, -0.0699]) tensor([0.4161, 0.1534, 0.1964, 0.2341]) -Greedy action tensor([ 0.4401, -0.3476, -0.1340, -0.0434]) tensor([0.3795, 0.1727, 0.2138, 0.2340]) -Greedy action tensor([ 0.9414, -0.4148, 0.0119, -0.3229]) tensor([0.5168, 0.1332, 0.2040, 0.1460]) -Greedy action tensor([ 0.7215, 0.2417, -0.1852, 0.2254]) tensor([0.3800, 0.2352, 0.1535, 0.2314]) -Greedy action tensor([ 1.1418, -0.7972, 0.0325, -0.5876]) tensor([0.6057, 0.0871, 0.1998, 0.1074]) -Greedy action tensor([ 0.6051, -0.6014, -0.1321, -0.1224]) tensor([0.4423, 0.1324, 0.2116, 0.2137]) -Greedy action tensor([ 0.3492, -0.0578, -0.0230, -0.0938]) tensor([0.3337, 0.2221, 0.2300, 0.2143]) -Greedy action tensor([ 0.6477, -0.0812, -0.0022, -0.0604]) tensor([0.4005, 0.1932, 0.2091, 0.1973]) -Greedy action tensor([ 0.9735, -0.6241, 0.0306, -0.4719]) tensor([0.5472, 0.1107, 0.2131, 0.1290]) -Greedy action tensor([ 1.0979, -0.7925, 0.1164, -0.5533]) tensor([0.5822, 0.0879, 0.2182, 0.1117]) -Greedy action tensor([ 0.8427, -0.4811, -0.1710, -0.5921]) tensor([0.5356, 0.1425, 0.1943, 0.1276]) -Greedy action tensor([ 0.3041, -0.0651, -0.0160, -0.7002]) tensor([0.3592, 0.2483, 0.2608, 0.1316]) -Greedy action tensor([ 0.8777, -0.6806, -0.2636, -0.7844]) tensor([0.5815, 0.1224, 0.1857, 0.1103]) -Greedy action tensor([ 0.6959, -0.3411, -0.0978, -0.2349]) tensor([0.4544, 0.1611, 0.2054, 0.1791]) -Greedy action tensor([ 0.6225, -0.3905, 0.0237, 0.0460]) tensor([0.4041, 0.1468, 0.2221, 0.2271]) -Greedy action tensor([ 0.4506, -0.1014, -0.0957, -0.5122]) tensor([0.3942, 0.2270, 0.2283, 0.1505]) -Greedy action tensor([ 1.4864, -0.8628, 0.0805, -0.5705]) tensor([0.6810, 0.0650, 0.1669, 0.0871]) -Greedy action tensor([ 0.8436, -0.3821, -0.0201, -0.1882]) tensor([0.4827, 0.1417, 0.2035, 0.1720]) -Greedy action tensor([ 0.5401, -0.2456, 0.0144, -0.1386]) tensor([0.3915, 0.1784, 0.2314, 0.1986]) -Greedy action tensor([ 0.7063, -0.4780, -0.1391, -0.1832]) tensor([0.4659, 0.1426, 0.2001, 0.1914]) -Greedy action tensor([ 0.7777, -0.5890, -0.0898, -0.1221]) tensor([0.4804, 0.1225, 0.2018, 0.1954]) -Greedy action tensor([ 0.7848, -0.6037, 0.0190, -0.4163]) tensor([0.4962, 0.1238, 0.2307, 0.1493]) -Greedy action tensor([ 0.9306, -0.4973, 0.0452, -0.4005]) tensor([0.5218, 0.1251, 0.2153, 0.1378]) -Greedy action tensor([ 0.1610, -0.2526, 0.1386, -0.4564]) tensor([0.3146, 0.2080, 0.3076, 0.1697]) -Greedy action tensor([ 0.6927, -0.4078, -0.1293, -0.4097]) tensor([0.4752, 0.1581, 0.2089, 0.1578]) -Greedy action tensor([ 0.9432, -0.1201, -0.0268, 0.0415]) tensor([0.4694, 0.1621, 0.1780, 0.1905]) -Greedy action tensor([ 0.7753, -0.2568, -0.0117, -0.3522]) tensor([0.4683, 0.1668, 0.2132, 0.1517]) -Greedy action tensor([ 0.8943, -0.7288, 0.0500, -0.3052]) tensor([0.5186, 0.1023, 0.2229, 0.1563]) -Greedy action tensor([ 0.3292, -0.3080, 0.0067, -0.1037]) tensor([0.3446, 0.1822, 0.2496, 0.2235]) -Greedy action tensor([ 0.8916, -0.7002, 0.0749, -0.4101]) tensor([0.5215, 0.1062, 0.2304, 0.1419]) -Greedy action tensor([ 0.6903, -0.1956, -0.0052, -0.0621]) tensor([0.4197, 0.1731, 0.2094, 0.1978]) -Greedy action tensor([ 0.3580, -0.3405, -0.0913, -0.0522]) tensor([0.3573, 0.1777, 0.2280, 0.2371]) -Greedy action tensor([ 0.7561, -0.4309, -0.0071, -0.3125]) tensor([0.4729, 0.1443, 0.2204, 0.1624]) -Greedy action tensor([ 0.5925, -0.1895, 0.0649, -0.2211]) tensor([0.4015, 0.1837, 0.2369, 0.1780]) -Greedy action tensor([ 0.5342, 0.2668, -0.2594, 0.0724]) tensor([0.3512, 0.2688, 0.1588, 0.2213]) -Greedy action tensor([ 0.5348, -0.5565, 0.0301, -0.3899]) tensor([0.4281, 0.1437, 0.2584, 0.1698]) -Greedy action tensor([ 0.9234, -0.6164, -0.1863, -0.2607]) tensor([0.5405, 0.1159, 0.1782, 0.1654]) -Greedy action tensor([ 0.6665, -0.3329, -0.0910, -0.1906]) tensor([0.4422, 0.1628, 0.2073, 0.1877]) -Greedy action tensor([ 0.7185, -0.0415, -0.1166, 0.0929]) tensor([0.4104, 0.1919, 0.1781, 0.2196]) -Greedy action tensor([ 0.8347, -0.7779, -0.1314, -0.4149]) tensor([0.5358, 0.1068, 0.2039, 0.1536]) -Greedy action tensor([ 0.7261, -0.3382, -0.1396, -0.4186]) tensor([0.4798, 0.1655, 0.2019, 0.1527]) -Greedy action tensor([ 0.8458, -0.2201, 0.0518, 0.0083]) tensor([0.4486, 0.1545, 0.2028, 0.1941]) -Greedy action tensor([ 0.3088, -0.2887, -0.0613, -0.0304]) tensor([0.3386, 0.1863, 0.2339, 0.2412]) -Greedy action tensor([ 0.7014, -0.4506, 0.0404, -0.3214]) tensor([0.4562, 0.1442, 0.2356, 0.1641]) -Greedy action tensor([0.3438, 0.1167, 0.0158, 0.0033]) tensor([0.3097, 0.2468, 0.2231, 0.2203]) -Greedy action tensor([ 0.8603, -0.6814, -0.0064, -0.2407]) tensor([0.5084, 0.1088, 0.2137, 0.1691]) -Greedy action tensor([ 0.9558, -0.4001, 0.0459, -0.4845]) tensor([0.5271, 0.1358, 0.2122, 0.1248]) -Greedy action tensor([ 0.4724, -0.1893, -0.0692, -0.0631]) tensor([0.3727, 0.1923, 0.2168, 0.2182]) -Greedy action tensor([ 0.6497, -0.0843, -0.0489, -0.1558]) tensor([0.4125, 0.1980, 0.2051, 0.1843]) -Greedy action tensor([ 0.4978, -0.2332, 0.0317, -0.0768]) tensor([0.3743, 0.1802, 0.2348, 0.2107]) -Greedy action tensor([ 0.8495, -0.5515, 0.0483, -0.4832]) tensor([0.5105, 0.1258, 0.2291, 0.1347]) -Greedy action tensor([ 0.9872, -0.9672, -0.0347, -0.3507]) tensor([0.5669, 0.0803, 0.2040, 0.1488]) -Greedy action tensor([ 1.1234, -0.1308, -0.0967, -0.0768]) tensor([0.5314, 0.1516, 0.1569, 0.1600]) -Greedy action tensor([ 0.9316, -0.4198, 0.1649, -0.3819]) tensor([0.5019, 0.1299, 0.2332, 0.1350]) -Greedy action tensor([ 0.5780, 0.2991, -0.2263, 0.2111]) tensor([0.3452, 0.2612, 0.1544, 0.2392]) -Greedy action tensor([ 0.8423, -0.5434, 0.0951, -0.3226]) tensor([0.4912, 0.1229, 0.2327, 0.1532]) -Greedy action tensor([ 0.7655, -0.6116, -0.1343, -0.2131]) tensor([0.4915, 0.1240, 0.1998, 0.1847]) -Greedy action tensor([ 0.7674, -0.6613, 0.0745, -0.4264]) tensor([0.4895, 0.1173, 0.2448, 0.1484]) -Greedy action tensor([ 0.5166, -0.5218, -0.1529, -0.2974]) tensor([0.4331, 0.1533, 0.2217, 0.1919]) -Greedy action tensor([ 0.9497, -0.5334, 0.1038, -0.7622]) tensor([0.5445, 0.1236, 0.2337, 0.0983]) -Greedy action tensor([ 1.1983, -0.7142, -0.1384, -0.3179]) tensor([0.6135, 0.0906, 0.1612, 0.1347]) -Greedy action tensor([ 1.1033, -0.9235, -0.0448, -0.7441]) tensor([0.6224, 0.0820, 0.1975, 0.0981]) -Greedy action tensor([ 1.1265, -0.8907, 0.0488, -0.8291]) tensor([0.6192, 0.0824, 0.2108, 0.0876]) -Greedy action tensor([ 0.1576, 0.1600, -0.1263, -0.1015]) tensor([0.2835, 0.2842, 0.2135, 0.2188]) -Greedy action tensor([ 0.4704, -0.0865, 0.0156, -0.0685]) tensor([0.3583, 0.2053, 0.2274, 0.2090]) -Greedy action tensor([ 0.9040, -0.4731, -0.0524, -0.3368]) tensor([0.5193, 0.1310, 0.1995, 0.1502]) -Greedy action tensor([ 0.6670, 0.1352, -0.0185, -0.1796]) tensor([0.3968, 0.2331, 0.1999, 0.1702]) -Greedy action tensor([ 0.8570, -0.7985, 0.1186, -0.4528]) tensor([0.5158, 0.0985, 0.2465, 0.1392]) -Greedy action tensor([ 0.5580, -0.2823, -0.0942, -0.0663]) tensor([0.4019, 0.1735, 0.2093, 0.2153]) -Greedy action tensor([ 0.9629, -0.6594, 0.0045, -0.4045]) tensor([0.5447, 0.1076, 0.2089, 0.1388]) -Greedy action tensor([ 0.5705, 0.0400, -0.0007, -0.2862]) tensor([0.3879, 0.2282, 0.2191, 0.1647]) -Greedy action tensor([ 1.4010, -0.7249, -0.0525, -0.8204]) tensor([0.6842, 0.0816, 0.1599, 0.0742]) -Greedy action tensor([ 0.9673, -0.7556, 0.1593, -0.5702]) tensor([0.5437, 0.0971, 0.2424, 0.1169]) -Greedy action tensor([ 0.8315, -0.6066, -0.0963, -0.2970]) tensor([0.5112, 0.1213, 0.2021, 0.1654]) -Greedy action tensor([ 0.6214, -0.5237, -0.1286, -0.3042]) tensor([0.4573, 0.1455, 0.2160, 0.1812]) -Greedy action tensor([ 0.7877, -0.7585, -0.0188, -0.4002]) tensor([0.5091, 0.1085, 0.2273, 0.1552]) -Greedy action tensor([ 0.8316, -0.8399, -0.0684, -0.4477]) tensor([0.5340, 0.1004, 0.2171, 0.1486]) -Greedy action tensor([ 1.2885, -1.1725, 0.2497, -0.6347]) tensor([0.6308, 0.0538, 0.2232, 0.0922]) -Greedy action tensor([ 0.5884, -0.3657, -0.1449, 0.0262]) tensor([0.4106, 0.1581, 0.1972, 0.2340]) -Greedy action tensor([ 1.3874, -0.5449, -0.5674, 0.1747]) tensor([0.6314, 0.0914, 0.0894, 0.1878]) -Greedy action tensor([ 1.6032, -0.1707, -0.6192, 0.4255]) tensor([0.6305, 0.1070, 0.0683, 0.1942]) -Greedy action tensor([ 1.4467, -0.4237, -0.3588, 0.2129]) tensor([0.6212, 0.0957, 0.1021, 0.1809]) -Greedy action tensor([ 1.3007, -0.6018, -0.1402, -0.1121]) tensor([0.6137, 0.0916, 0.1453, 0.1494]) -Greedy action tensor([ 1.6696, -0.6739, -0.2877, 0.3461]) tensor([0.6651, 0.0638, 0.0939, 0.1771]) -Greedy action tensor([ 1.4862, -0.7630, -0.1513, 0.1683]) tensor([0.6379, 0.0673, 0.1240, 0.1708]) -Greedy action tensor([ 1.3317, -0.6491, -0.4081, 0.3929]) tensor([0.5866, 0.0809, 0.1030, 0.2294]) -Greedy action tensor([ 1.3549, 0.3600, -0.5302, -0.0604]) tensor([0.5668, 0.2096, 0.0860, 0.1376]) -Greedy action tensor([ 1.6975, -1.3061, -0.3716, 0.3665]) tensor([0.6944, 0.0344, 0.0877, 0.1835]) -Greedy action tensor([ 1.8437, -0.5832, -0.2618, 0.5767]) tensor([0.6703, 0.0592, 0.0816, 0.1888]) -Greedy action tensor([ 1.9236, -0.3306, -0.4743, 0.6763]) tensor([0.6742, 0.0708, 0.0613, 0.1937]) -Greedy action tensor([ 1.9269, -0.9003, -0.2013, 0.2950]) tensor([0.7279, 0.0431, 0.0867, 0.1424]) -Greedy action tensor([ 1.7156, -0.6959, -0.0788, 0.5403]) tensor([0.6391, 0.0573, 0.1062, 0.1973]) -Greedy action tensor([ 1.9895, -0.5551, -0.2523, 0.4178]) tensor([0.7181, 0.0564, 0.0763, 0.1492]) -Greedy action tensor([ 1.3803, -0.3180, -0.0397, 0.1566]) tensor([0.5818, 0.1065, 0.1406, 0.1711]) -Greedy action tensor([ 1.4326, -0.3778, -0.3392, 0.0567]) tensor([0.6304, 0.1031, 0.1072, 0.1593]) -Greedy action tensor([ 1.5973, -0.9434, -0.5675, 0.8549]) tensor([0.5990, 0.0472, 0.0687, 0.2851]) -Greedy action tensor([ 1.3619, -0.4819, -0.2413, 0.6560]) tensor([0.5396, 0.0854, 0.1086, 0.2664]) -Greedy action tensor([ 1.4957, -0.4831, -0.6381, 0.5733]) tensor([0.6045, 0.0836, 0.0716, 0.2403]) -Greedy action tensor([ 1.2525, -0.5245, -0.5136, 0.2262]) tensor([0.5888, 0.0996, 0.1007, 0.2110]) -Greedy action tensor([ 2.2321, 0.2624, -0.2393, 0.1079]) tensor([0.7443, 0.1038, 0.0629, 0.0890]) -Greedy action tensor([ 1.5715, -0.7022, -0.1763, 0.3590]) tensor([0.6351, 0.0654, 0.1106, 0.1889]) -Greedy action tensor([ 1.8538, -0.7924, -0.1538, 0.3007]) tensor([0.7058, 0.0501, 0.0948, 0.1493]) -Greedy action tensor([ 1.3223, -0.6492, 0.0080, 0.1406]) tensor([0.5832, 0.0812, 0.1567, 0.1789]) -Greedy action tensor([ 1.1273, 0.1993, -1.1701, 0.4440]) tensor([0.4998, 0.1976, 0.0502, 0.2524]) -Greedy action tensor([ 1.3929, -0.6011, -0.3473, 0.4531]) tensor([0.5874, 0.0800, 0.1031, 0.2295]) -Greedy action tensor([ 1.7935, 0.0153, -1.1662, 0.5012]) tensor([0.6687, 0.1130, 0.0347, 0.1837]) -Greedy action tensor([ 2.1421, -1.4179, -0.0602, 0.4920]) tensor([0.7513, 0.0214, 0.0831, 0.1443]) -Greedy action tensor([ 1.3565, -0.3990, -0.2297, 0.6295]) tensor([0.5374, 0.0929, 0.1100, 0.2597]) -Greedy action tensor([ 1.1494, -0.2010, -0.5316, 0.1686]) tensor([0.5493, 0.1424, 0.1023, 0.2060]) -Greedy action tensor([ 1.6644, -0.8809, -0.4044, 0.2745]) tensor([0.6878, 0.0540, 0.0869, 0.1713]) -Greedy action tensor([ 1.9665, -0.8398, -0.4173, 0.3348]) tensor([0.7417, 0.0448, 0.0684, 0.1451]) -Greedy action tensor([ 1.8756, -0.9095, -0.3655, 0.3602]) tensor([0.7206, 0.0445, 0.0766, 0.1583]) -Greedy action tensor([ 1.6363, -0.2374, -0.9801, 0.2459]) tensor([0.6777, 0.1041, 0.0495, 0.1687]) -Greedy action tensor([ 1.1790, -0.4344, 0.1394, -0.0397]) tensor([0.5410, 0.1078, 0.1913, 0.1599]) -Greedy action tensor([ 1.0098, -0.1260, -0.5430, 0.3214]) tensor([0.4914, 0.1578, 0.1040, 0.2468]) -Greedy action tensor([ 1.5708, 0.1432, -0.0834, 0.1635]) tensor([0.5967, 0.1431, 0.1141, 0.1461]) -Greedy action tensor([ 1.5888, -0.6578, -0.7643, 0.0634]) tensor([0.7050, 0.0746, 0.0670, 0.1534]) -Greedy action tensor([ 1.5171, -0.3435, -0.3843, 0.5620]) tensor([0.5918, 0.0921, 0.0884, 0.2277]) -Greedy action tensor([ 1.8029, -0.4429, -0.6282, 0.5398]) tensor([0.6773, 0.0717, 0.0596, 0.1915]) -Greedy action tensor([ 1.4830, -0.8043, -0.3850, 0.4136]) tensor([0.6253, 0.0635, 0.0966, 0.2146]) -Greedy action tensor([ 1.4894, -0.0911, -0.1175, -0.2445]) tensor([0.6317, 0.1301, 0.1267, 0.1116]) -Greedy action tensor([ 1.1434, -0.4071, -0.3254, 0.3283]) tensor([0.5305, 0.1126, 0.1221, 0.2348]) -Greedy action tensor([ 1.3272, -0.6785, -0.7212, 0.7213]) tensor([0.5528, 0.0744, 0.0713, 0.3016]) -Greedy action tensor([ 1.7489, -0.6516, -0.6589, 0.1260]) tensor([0.7257, 0.0658, 0.0653, 0.1432]) -Greedy action tensor([ 1.3488, -0.7532, -0.3398, 0.2981]) tensor([0.6036, 0.0738, 0.1115, 0.2111]) -Greedy action tensor([ 1.1811, -0.0862, -0.3724, 0.3408]) tensor([0.5196, 0.1463, 0.1099, 0.2242]) -Greedy action tensor([ 1.4569, -0.5412, -0.4267, 0.4281]) tensor([0.6079, 0.0824, 0.0924, 0.2173]) -Greedy action tensor([ 1.2624, 0.1816, -0.7327, 0.4426]) tensor([0.5220, 0.1771, 0.0710, 0.2299]) -Greedy action tensor([ 1.2960, -0.7548, 0.0075, 0.8405]) tensor([0.4906, 0.0631, 0.1352, 0.3111]) -Greedy action tensor([ 1.2642, -0.5629, -0.1543, 0.1522]) tensor([0.5774, 0.0929, 0.1398, 0.1899]) -Greedy action tensor([ 1.1713, -0.5064, -0.6124, 0.7181]) tensor([0.5024, 0.0939, 0.0844, 0.3193]) -Greedy action tensor([ 1.4561, -0.0058, -0.6826, 0.2702]) tensor([0.6042, 0.1401, 0.0712, 0.1846]) -Greedy action tensor([ 1.1097, -0.6614, -0.2516, 0.3709]) tensor([0.5252, 0.0894, 0.1346, 0.2509]) -Greedy action tensor([ 2.3046, -0.9496, -0.4588, 0.5852]) tensor([0.7807, 0.0301, 0.0492, 0.1399]) -Greedy action tensor([ 1.7770, -0.7164, 0.0317, 0.1187]) tensor([0.6908, 0.0571, 0.1206, 0.1316]) -Greedy action tensor([ 1.1450, -0.5490, -0.0578, 0.4157]) tensor([0.5086, 0.0935, 0.1527, 0.2452]) -Greedy action tensor([ 1.6939, -0.5723, -0.5962, 0.1331]) tensor([0.7068, 0.0733, 0.0716, 0.1484]) -Greedy action tensor([ 1.6743, -0.4387, -0.3120, 0.6531]) tensor([0.6180, 0.0747, 0.0848, 0.2226]) -Greedy action tensor([ 1.9422, -0.1928, -0.2391, 0.6204]) tensor([0.6677, 0.0789, 0.0754, 0.1780]) -Greedy action tensor([ 1.6181, -0.5508, -0.0522, 0.1714]) tensor([0.6503, 0.0743, 0.1224, 0.1530]) -Greedy action tensor([ 1.7730, -0.7471, -0.0172, 0.5893]) tensor([0.6437, 0.0518, 0.1075, 0.1971]) -Greedy action tensor([ 1.5907, -0.4780, -0.4075, 0.4306]) tensor([0.6348, 0.0802, 0.0861, 0.1990]) -Greedy action tensor([ 1.6089, -0.5357, -0.4494, 0.5328]) tensor([0.6306, 0.0739, 0.0805, 0.2150]) -Greedy action tensor([ 1.8007, -0.5733, -0.3213, 0.5043]) tensor([0.6728, 0.0626, 0.0806, 0.1840]) -Greedy action tensor([ 1.2753, -0.4036, -0.6338, 0.1508]) tensor([0.6026, 0.1124, 0.0893, 0.1957]) -Greedy action tensor([ 1.6329, -0.4657, -0.2757, 0.2572]) tensor([0.6564, 0.0805, 0.0973, 0.1658]) -Greedy action tensor([ 1.5756, -0.8283, -0.0780, 0.1978]) tensor([0.6519, 0.0589, 0.1248, 0.1644]) -Greedy action tensor([ 1.8406, -0.4107, -0.4545, 0.2655]) tensor([0.7077, 0.0745, 0.0713, 0.1465]) -Greedy action tensor([ 2.2082, -0.8133, -0.2614, 0.6558]) tensor([0.7434, 0.0362, 0.0629, 0.1574]) -Greedy action tensor([ 1.2598, -0.0383, -0.7184, -0.0437]) tensor([0.5942, 0.1623, 0.0822, 0.1614]) -Greedy action tensor([ 1.1110, -0.3475, -0.2709, -0.2421]) tensor([0.5740, 0.1335, 0.1441, 0.1483]) -Greedy action tensor([ 1.2846, 0.2403, -0.7344, -0.2763]) tensor([0.5901, 0.2077, 0.0784, 0.1239]) -Greedy action tensor([ 1.3110, -0.5690, -0.4756, 0.5152]) tensor([0.5645, 0.0861, 0.0946, 0.2547]) -Greedy action tensor([ 1.9613, -0.6441, -0.7322, 0.4395]) tensor([0.7354, 0.0543, 0.0497, 0.1606]) -Greedy action tensor([ 2.0877, -1.1869, -0.6723, 0.7434]) tensor([0.7343, 0.0278, 0.0465, 0.1914]) -Greedy action tensor([ 0.8371, -0.4297, 0.1216, 0.4706]) tensor([0.4059, 0.1144, 0.1985, 0.2813]) -Greedy action tensor([ 1.6223, -0.8232, -0.5118, 0.4378]) tensor([0.6618, 0.0574, 0.0783, 0.2025]) -Greedy action tensor([ 1.4168, -0.0833, -0.3692, 0.2099]) tensor([0.5918, 0.1320, 0.0992, 0.1770]) -Greedy action tensor([ 2.1896, -0.6864, -0.6551, 0.6235]) tensor([0.7556, 0.0426, 0.0439, 0.1578]) -Greedy action tensor([ 1.1919, -0.2009, -0.3119, 0.3299]) tensor([0.5283, 0.1312, 0.1174, 0.2231]) -Greedy action tensor([-1.7385e+00, -4.6051e-01, 6.1854e-01, 1.0665e-03]) tensor([0.0480, 0.1722, 0.5066, 0.2732]) -Greedy action tensor([-1.8971, -0.4318, 0.6896, -0.1036]) tensor([0.0406, 0.1758, 0.5395, 0.2441]) -Greedy action tensor([-1.8251, -0.3810, 0.6395, -0.1294]) tensor([0.0445, 0.1888, 0.5238, 0.2428]) -Greedy action tensor([-1.6882, 0.1534, 0.4479, -0.0264]) tensor([0.0475, 0.2997, 0.4024, 0.2504]) -Greedy action tensor([-1.6538, -0.0766, 0.4520, 0.0037]) tensor([0.0518, 0.2508, 0.4255, 0.2718]) -Greedy action tensor([-0.7180, 0.8993, 0.0737, 0.1196]) tensor([0.0947, 0.4773, 0.2091, 0.2189]) -Greedy action tensor([-1.5986, -0.5343, 0.4832, -0.0073]) tensor([0.0594, 0.1723, 0.4765, 0.2918]) -Greedy action tensor([-1.1096, 0.4111, 0.1890, 0.1058]) tensor([0.0793, 0.3628, 0.2905, 0.2674]) -Greedy action tensor([-0.3291, -0.0524, 0.5278, 1.0021]) tensor([0.1182, 0.1559, 0.2785, 0.4475]) -Greedy action tensor([-1.1189, 0.6370, 0.1953, 0.1040]) tensor([0.0719, 0.4162, 0.2676, 0.2443]) -Greedy action tensor([-1.8300, -0.3956, 0.6231, -0.0824]) tensor([0.0443, 0.1860, 0.5152, 0.2544]) -Greedy action tensor([-1.9302, -0.4256, 0.6516, -0.1713]) tensor([0.0408, 0.1835, 0.5390, 0.2367]) -Greedy action tensor([-1.8998, -0.3512, 0.6341, -0.1531]) tensor([0.0416, 0.1957, 0.5242, 0.2386]) -Greedy action tensor([-1.9234, -0.3971, 0.6495, -0.1632]) tensor([0.0408, 0.1877, 0.5344, 0.2371]) -Greedy action tensor([-1.8246, -0.3664, 0.6145, -0.1059]) tensor([0.0448, 0.1924, 0.5131, 0.2497]) -Greedy action tensor([-0.7042, 0.8492, -0.0802, 0.5865]) tensor([0.0890, 0.4210, 0.1662, 0.3237]) -Greedy action tensor([-1.7059, -0.1623, 0.4819, -0.0885]) tensor([0.0509, 0.2384, 0.4540, 0.2567]) -Greedy action tensor([-1.9167, -0.4167, 0.6552, -0.1605]) tensor([0.0410, 0.1840, 0.5373, 0.2377]) -Greedy action tensor([-1.9384, -0.4416, 0.6647, -0.1771]) tensor([0.0403, 0.1802, 0.5447, 0.2348]) -Greedy action tensor([-1.8675, -0.3646, 0.6332, -0.1408]) tensor([0.0429, 0.1928, 0.5231, 0.2412]) -Greedy action tensor([-1.9035, -0.3982, 0.6435, -0.1600]) tensor([0.0417, 0.1878, 0.5322, 0.2383]) -Greedy action tensor([-1.9347, -0.4216, 0.6585, -0.1715]) tensor([0.0404, 0.1835, 0.5404, 0.2356]) -Greedy action tensor([-1.8546, -0.4652, 0.6285, -0.1347]) tensor([0.0443, 0.1777, 0.5306, 0.2474]) -Greedy action tensor([-1.2230, -0.1350, 0.5392, 0.3096]) tensor([0.0693, 0.2058, 0.4039, 0.3210]) -Greedy action tensor([-1.9051, -0.4239, 0.6449, -0.1628]) tensor([0.0418, 0.1839, 0.5355, 0.2388]) -Greedy action tensor([-1.5022, 0.1850, 0.4451, 0.1294]) tensor([0.0540, 0.2917, 0.3784, 0.2759]) -Greedy action tensor([-0.8429, 0.9020, 0.0869, 0.2125]) tensor([0.0824, 0.4719, 0.2089, 0.2368]) -Greedy action tensor([-1.9222, -0.4172, 0.6536, -0.1677]) tensor([0.0409, 0.1844, 0.5380, 0.2366]) -Greedy action tensor([-1.8899, -0.4023, 0.6309, -0.1576]) tensor([0.0425, 0.1882, 0.5289, 0.2404]) -Greedy action tensor([-1.8875, -0.4554, 0.6421, -0.1551]) tensor([0.0428, 0.1790, 0.5365, 0.2417]) -Greedy action tensor([-1.4373, 0.5915, 0.2919, 0.1571]) tensor([0.0522, 0.3968, 0.2941, 0.2570]) -Greedy action tensor([-1.8729, -0.3604, 0.6274, -0.1248]) tensor([0.0426, 0.1934, 0.5193, 0.2447]) -Greedy action tensor([-1.9414, -0.4366, 0.6618, -0.1787]) tensor([0.0403, 0.1813, 0.5438, 0.2346]) -Greedy action tensor([-1.8883, -0.3674, 0.6376, -0.1482]) tensor([0.0421, 0.1925, 0.5258, 0.2397]) -Greedy action tensor([-1.7750, -0.4046, 0.5776, -0.0863]) tensor([0.0479, 0.1887, 0.5039, 0.2594]) -Greedy action tensor([-1.9176, -0.4022, 0.6498, -0.1613]) tensor([0.0410, 0.1867, 0.5347, 0.2376]) -Greedy action tensor([ 0.0505, -0.0799, 0.1268, 0.0688]) tensor([0.2515, 0.2208, 0.2715, 0.2562]) -Greedy action tensor([-1.9201, -0.4376, 0.6564, -0.1669]) tensor([0.0411, 0.1810, 0.5406, 0.2373]) -Greedy action tensor([-1.8306, -0.5036, 0.6235, -0.1077]) tensor([0.0454, 0.1713, 0.5288, 0.2545]) -Greedy action tensor([-1.4571, 0.3630, 0.4153, -0.1803]) tensor([0.0579, 0.3576, 0.3768, 0.2077]) -Greedy action tensor([-1.9312, -0.4461, 0.6607, -0.1732]) tensor([0.0407, 0.1797, 0.5435, 0.2361]) -Greedy action tensor([-0.8112, -0.5396, 0.6403, 1.3285]) tensor([0.0663, 0.0870, 0.2831, 0.5635]) -Greedy action tensor([-1.8219, -0.4541, 0.5999, -0.1426]) tensor([0.0464, 0.1822, 0.5227, 0.2488]) -Greedy action tensor([-1.9486, -0.4519, 0.6684, -0.1830]) tensor([0.0400, 0.1786, 0.5476, 0.2337]) -Greedy action tensor([-1.8863, -0.4344, 0.6371, -0.1539]) tensor([0.0427, 0.1826, 0.5330, 0.2417]) -Greedy action tensor([-1.8311, -0.4785, 0.6109, -0.1347]) tensor([0.0458, 0.1773, 0.5269, 0.2500]) -Greedy action tensor([-0.5484, 0.2770, 0.5005, 0.7631]) tensor([0.1015, 0.2318, 0.2898, 0.3769]) -Greedy action tensor([-1.3237, -0.4414, 0.4356, 0.4355]) tensor([0.0665, 0.1607, 0.3864, 0.3863]) -Greedy action tensor([-1.4609, -0.4986, 0.4767, -0.0837]) tensor([0.0689, 0.1802, 0.4780, 0.2729]) -Greedy action tensor([-1.8117, -0.3466, 0.5817, -0.0861]) tensor([0.0457, 0.1977, 0.5002, 0.2565]) -Greedy action tensor([-1.8967, -0.2786, 0.6175, -0.1554]) tensor([0.0415, 0.2092, 0.5126, 0.2367]) -Greedy action tensor([-1.8438, 0.2514, 0.3927, -0.3736]) tensor([0.0438, 0.3559, 0.4099, 0.1905]) -Greedy action tensor([-1.7645, -0.4620, 0.5769, -0.0966]) tensor([0.0491, 0.1805, 0.5102, 0.2602]) -Greedy action tensor([-1.6849, -0.0500, 0.4884, 0.0325]) tensor([0.0488, 0.2504, 0.4289, 0.2719]) -Greedy action tensor([-1.9047, -0.4395, 0.6454, -0.1622]) tensor([0.0419, 0.1815, 0.5371, 0.2395]) -Greedy action tensor([-1.9321, -0.4311, 0.6573, -0.1726]) tensor([0.0406, 0.1822, 0.5412, 0.2360]) -Greedy action tensor([-0.8403, 0.6796, 0.2020, -0.3077]) tensor([0.0989, 0.4522, 0.2805, 0.1685]) -Greedy action tensor([-1.0794, -0.3823, 0.3858, -0.1903]) tensor([0.1024, 0.2055, 0.4431, 0.2490]) -Greedy action tensor([-0.3574, -0.2844, 0.1709, -0.0622]) tensor([0.1955, 0.2103, 0.3316, 0.2626]) -Greedy action tensor([-1.8422, -0.3365, 0.6053, -0.1145]) tensor([0.0441, 0.1986, 0.5094, 0.2480]) -Greedy action tensor([-1.9283, -0.4423, 0.6612, -0.1698]) tensor([0.0407, 0.1800, 0.5428, 0.2364]) -Greedy action tensor([-1.4521, -0.5993, 0.4086, 0.0477]) tensor([0.0701, 0.1646, 0.4509, 0.3143]) -Greedy action tensor([-1.3125, -0.3879, 0.3381, 0.1516]) tensor([0.0766, 0.1931, 0.3991, 0.3312]) -Greedy action tensor([-1.7052, -0.1611, 0.5143, -0.1019]) tensor([0.0504, 0.2359, 0.4635, 0.2503]) -Greedy action tensor([-1.8329, -0.4684, 0.6114, -0.1460]) tensor([0.0458, 0.1792, 0.5276, 0.2474]) -Greedy action tensor([-1.9080, -0.4322, 0.6492, -0.1614]) tensor([0.0416, 0.1822, 0.5373, 0.2389]) -Greedy action tensor([-1.4143, 0.1039, 0.3681, -0.0287]) tensor([0.0645, 0.2943, 0.3834, 0.2578]) -Greedy action tensor([-1.2121, -0.1694, 0.3380, -0.0401]) tensor([0.0849, 0.2409, 0.4001, 0.2741]) -Greedy action tensor([-1.8717, -0.4408, 0.6272, -0.1408]) tensor([0.0435, 0.1819, 0.5291, 0.2455]) -Greedy action tensor([-1.6806, -0.4623, 0.5965, 0.0718]) tensor([0.0503, 0.1699, 0.4899, 0.2899]) -Greedy action tensor([-1.8885, -0.4202, 0.6678, -0.1378]) tensor([0.0417, 0.1810, 0.5372, 0.2401]) -Greedy action tensor([-1.8114, -0.4697, 0.6092, -0.1126]) tensor([0.0464, 0.1776, 0.5223, 0.2538]) -Greedy action tensor([-1.8297, -0.2284, 0.6044, -0.1148]) tensor([0.0436, 0.2164, 0.4976, 0.2424]) -Greedy action tensor([-1.9138, -0.4444, 0.6532, -0.1652]) tensor([0.0415, 0.1802, 0.5401, 0.2383]) -Greedy action tensor([-1.7918, -0.4538, 0.5891, -0.1026]) tensor([0.0475, 0.1811, 0.5140, 0.2573]) -Greedy action tensor([-1.4865, 0.4147, 0.2491, -0.0522]) tensor([0.0569, 0.3811, 0.3230, 0.2390]) -Greedy action tensor([-1.8206, 0.1753, 0.5344, -0.1668]) tensor([0.0415, 0.3050, 0.4369, 0.2167]) -Greedy action tensor([ 0.0663, 1.1442, -0.0233, 0.1742]) tensor([0.1676, 0.4925, 0.1532, 0.1867]) -Greedy action tensor([-0.7485, -0.4465, 0.3807, -0.3209]) tensor([0.1433, 0.1938, 0.4432, 0.2197]) -Greedy action tensor([-1.9007, -0.3397, 0.6396, -0.1563]) tensor([0.0414, 0.1971, 0.5248, 0.2368]) -Greedy action tensor([-0.6325, -0.0016, 0.4720, 0.7127]) tensor([0.1027, 0.1930, 0.3099, 0.3943]) -Greedy action tensor([ 0.5477, -0.0676, -0.0585, -0.1042]) tensor([0.3836, 0.2073, 0.2092, 0.1999]) -Greedy action tensor([ 1.1529, -0.7852, -0.0617, -0.6272]) tensor([0.6214, 0.0895, 0.1844, 0.1048]) -Greedy action tensor([ 0.8444, -0.3682, -0.1533, -0.2498]) tensor([0.4998, 0.1486, 0.1843, 0.1673]) -Greedy action tensor([ 0.4748, -0.1502, -0.0711, -0.0667]) tensor([0.3708, 0.1985, 0.2148, 0.2158]) -Greedy action tensor([ 0.7799, -0.6739, -0.0458, -0.2568]) tensor([0.4935, 0.1153, 0.2161, 0.1750]) -Greedy action tensor([ 1.0286, -0.7704, 0.0915, -0.6888]) tensor([0.5758, 0.0953, 0.2256, 0.1034]) -Greedy action tensor([ 0.8197, -0.2173, -0.0827, -0.5286]) tensor([0.4951, 0.1755, 0.2008, 0.1286]) -Greedy action tensor([ 0.7297, -0.5330, 0.0730, -0.4277]) tensor([0.4726, 0.1337, 0.2451, 0.1486]) -Greedy action tensor([ 0.9733, -0.5351, -0.0513, -0.5526]) tensor([0.5563, 0.1231, 0.1997, 0.1209]) -Greedy action tensor([ 0.8002, -0.4961, -0.0641, -0.4053]) tensor([0.5014, 0.1372, 0.2113, 0.1502]) -Greedy action tensor([ 0.8049, -0.4289, -0.0278, -0.1616]) tensor([0.4747, 0.1382, 0.2064, 0.1806]) -Greedy action tensor([ 0.5982, -0.3153, 0.0013, -0.1951]) tensor([0.4160, 0.1669, 0.2290, 0.1882]) -Greedy action tensor([ 1.1347, -0.9691, -0.3384, -0.7167]) tensor([0.6630, 0.0809, 0.1520, 0.1041]) -Greedy action tensor([ 0.9185, -0.2715, 0.0764, -0.3554]) tensor([0.4963, 0.1510, 0.2138, 0.1388]) -Greedy action tensor([ 0.4766, -0.4354, -0.1157, -0.2091]) tensor([0.4067, 0.1634, 0.2250, 0.2049]) -Greedy action tensor([ 0.7110, -0.3565, 0.0122, -0.2115]) tensor([0.4467, 0.1536, 0.2221, 0.1776]) -Greedy action tensor([ 0.6215, -0.3686, -0.0832, -0.0616]) tensor([0.4218, 0.1567, 0.2085, 0.2130]) -Greedy action tensor([ 0.6906, -0.4375, -0.1618, -0.5916]) tensor([0.4932, 0.1596, 0.2103, 0.1368]) -Greedy action tensor([ 0.7982, -0.1793, 0.1205, -0.2242]) tensor([0.4457, 0.1677, 0.2263, 0.1603]) -Greedy action tensor([ 0.4639, -0.2524, -0.0606, -0.1034]) tensor([0.3777, 0.1845, 0.2236, 0.2142]) -Greedy action tensor([ 1.0713, -0.7680, 0.1002, -0.6147]) tensor([0.5804, 0.0922, 0.2198, 0.1075]) -Greedy action tensor([ 0.8643, -0.7551, -0.0285, -0.5059]) tensor([0.5372, 0.1064, 0.2200, 0.1365]) -Greedy action tensor([ 1.0032e+00, -5.0804e-01, 5.7715e-04, -3.2971e-01]) tensor([0.5402, 0.1192, 0.1982, 0.1424]) -Greedy action tensor([ 1.0152, -0.6999, 0.0890, -0.4369]) tensor([0.5525, 0.0994, 0.2188, 0.1293]) -Greedy action tensor([ 1.1977, -0.5188, -0.1789, -0.5174]) tensor([0.6203, 0.1115, 0.1566, 0.1116]) -Greedy action tensor([ 0.8292, -0.4074, 0.0106, -0.2915]) tensor([0.4860, 0.1411, 0.2144, 0.1585]) -Greedy action tensor([ 0.4216, 0.1082, -0.0164, 0.0411]) tensor([0.3268, 0.2389, 0.2109, 0.2234]) -Greedy action tensor([ 0.5177, -0.5377, 0.0548, -0.2167]) tensor([0.4069, 0.1416, 0.2562, 0.1952]) -Greedy action tensor([ 0.8066, -0.1601, -0.0284, -0.0038]) tensor([0.4427, 0.1684, 0.1921, 0.1969]) -Greedy action tensor([ 0.1536, -0.1117, 0.0006, -0.1473]) tensor([0.2972, 0.2279, 0.2550, 0.2199]) -Greedy action tensor([ 0.4343, -0.2770, 0.0471, -0.3447]) tensor([0.3804, 0.1868, 0.2583, 0.1745]) -Greedy action tensor([ 0.2420, -0.1351, -0.0240, -0.4960]) tensor([0.3413, 0.2340, 0.2616, 0.1631]) -Greedy action tensor([ 0.4456, -0.3414, 0.0150, -0.1371]) tensor([0.3754, 0.1709, 0.2441, 0.2096]) -Greedy action tensor([ 0.9056, -0.6197, -0.1553, -0.3413]) tensor([0.5402, 0.1175, 0.1870, 0.1552]) -Greedy action tensor([ 0.9985, -0.8767, 0.1276, -0.4252]) tensor([0.5517, 0.0846, 0.2309, 0.1328]) -Greedy action tensor([ 0.1191, 0.0952, -0.1253, -0.6103]) tensor([0.3085, 0.3012, 0.2416, 0.1487]) -Greedy action tensor([ 0.6349, -0.2339, 0.0559, -0.0817]) tensor([0.4051, 0.1699, 0.2271, 0.1979]) -Greedy action tensor([ 0.6752, -0.4568, -0.0606, -0.1741]) tensor([0.4486, 0.1446, 0.2149, 0.1919]) -Greedy action tensor([ 0.8523, -0.5778, -0.0640, -0.3290]) tensor([0.5138, 0.1230, 0.2055, 0.1577]) -Greedy action tensor([ 0.7486, -0.2477, -0.0402, -0.3632]) tensor([0.4646, 0.1715, 0.2111, 0.1528]) -Greedy action tensor([ 0.9393, -0.5665, -0.0477, -0.3537]) tensor([0.5351, 0.1187, 0.1994, 0.1468]) -Greedy action tensor([ 0.7018, -0.3165, -0.0146, -0.3547]) tensor([0.4551, 0.1644, 0.2223, 0.1582]) -Greedy action tensor([ 0.8448, -0.3278, -0.0068, -0.3064]) tensor([0.4872, 0.1508, 0.2079, 0.1541]) -Greedy action tensor([ 0.7544, -0.9961, 0.3843, -0.5011]) tensor([0.4653, 0.0808, 0.3214, 0.1326]) -Greedy action tensor([ 1.0398, -0.6921, 0.0376, -0.6044]) tensor([0.5756, 0.1019, 0.2113, 0.1112]) -Greedy action tensor([ 0.9693, -0.6546, -0.0329, -0.4019]) tensor([0.5500, 0.1084, 0.2019, 0.1396]) -Greedy action tensor([ 0.9529, -0.5537, -0.0682, -0.5004]) tensor([0.5507, 0.1221, 0.1984, 0.1288]) -Greedy action tensor([ 0.8377, -0.6310, 0.1962, -0.1696]) tensor([0.4713, 0.1085, 0.2481, 0.1721]) -Greedy action tensor([ 1.1786, -0.8389, -0.0420, -0.6246]) tensor([0.6278, 0.0835, 0.1852, 0.1034]) -Greedy action tensor([ 1.1896, -0.6868, -0.1012, -0.4801]) tensor([0.6186, 0.0947, 0.1702, 0.1165]) -Greedy action tensor([ 0.9921, -0.8562, 0.1364, -0.5467]) tensor([0.5565, 0.0876, 0.2365, 0.1194]) -Greedy action tensor([ 0.5131, -0.4173, 0.2969, -0.1706]) tensor([0.3697, 0.1458, 0.2978, 0.1866]) -Greedy action tensor([ 0.8851, -0.3456, -0.0481, -0.1005]) tensor([0.4858, 0.1419, 0.1910, 0.1813]) -Greedy action tensor([ 0.9718, -0.5783, 0.0052, -0.2959]) tensor([0.5336, 0.1132, 0.2030, 0.1502]) -Greedy action tensor([ 0.8450, -0.5769, -0.0341, -0.3823]) tensor([0.5129, 0.1238, 0.2130, 0.1503]) -Greedy action tensor([ 1.3690, -0.7663, -0.0615, -0.5634]) tensor([0.6657, 0.0787, 0.1592, 0.0964]) -Greedy action tensor([ 1.2044, -0.5534, 0.1315, -0.5292]) tensor([0.5913, 0.1020, 0.2022, 0.1045]) -Greedy action tensor([ 0.7265, -0.3228, -0.0105, -0.0909]) tensor([0.4405, 0.1542, 0.2108, 0.1945]) -Greedy action tensor([ 0.7699, -0.3435, 0.0431, -0.2420]) tensor([0.4597, 0.1510, 0.2222, 0.1671]) -Greedy action tensor([ 0.5937, -0.2737, -0.1461, -0.1730]) tensor([0.4234, 0.1779, 0.2021, 0.1967]) -Greedy action tensor([ 0.8842, -0.2799, -0.1269, -0.5305]) tensor([0.5211, 0.1627, 0.1896, 0.1266]) -Greedy action tensor([ 1.1638, -0.7902, -0.0084, -0.7846]) tensor([0.6274, 0.0889, 0.1943, 0.0894]) -Greedy action tensor([ 0.7299, -0.6908, -0.0586, -0.2669]) tensor([0.4842, 0.1170, 0.2201, 0.1787]) -Greedy action tensor([ 0.5668, -0.0965, -0.1224, -0.0818]) tensor([0.3937, 0.2028, 0.1976, 0.2058]) -Greedy action tensor([ 0.7926, -0.4449, -0.1069, -0.3011]) tensor([0.4921, 0.1428, 0.2002, 0.1649]) -Greedy action tensor([ 0.9223, -0.7035, 0.1555, -0.2846]) tensor([0.5101, 0.1004, 0.2369, 0.1526]) -Greedy action tensor([ 0.9361, -0.4114, -0.0848, -0.0845]) tensor([0.5049, 0.1312, 0.1819, 0.1820]) -Greedy action tensor([ 1.0362, -0.5783, -0.2782, -0.3962]) tensor([0.5860, 0.1166, 0.1574, 0.1399]) -Greedy action tensor([ 0.4091, -0.5594, -0.1548, -0.1259]) tensor([0.3946, 0.1498, 0.2245, 0.2311]) -Greedy action tensor([ 0.6449, -0.2574, -0.0030, -0.2862]) tensor([0.4305, 0.1746, 0.2252, 0.1697]) -Greedy action tensor([ 0.1069, 0.0699, -0.1486, -0.0699]) tensor([0.2796, 0.2695, 0.2166, 0.2343]) -Greedy action tensor([ 0.5185, -0.0588, 0.0564, 0.0065]) tensor([0.3583, 0.2012, 0.2257, 0.2148]) -Greedy action tensor([ 0.3998, -0.1466, 0.1263, 0.1765]) tensor([0.3185, 0.1844, 0.2423, 0.2548]) -Greedy action tensor([ 0.8148, -0.6494, -0.0271, -0.3250]) tensor([0.5045, 0.1167, 0.2174, 0.1614]) -Greedy action tensor([ 0.7092, 0.1666, -0.1090, -0.1201]) tensor([0.4067, 0.2364, 0.1794, 0.1775]) -Greedy action tensor([ 1.1370, -0.6610, 0.0376, -0.4835]) tensor([0.5895, 0.0976, 0.1963, 0.1166]) -Greedy action tensor([ 0.5724, 0.1152, 0.0371, -0.2411]) tensor([0.3757, 0.2378, 0.2200, 0.1665]) -Greedy action tensor([ 0.6545, -0.3735, 0.0704, -0.3416]) tensor([0.4377, 0.1566, 0.2441, 0.1617]) -Greedy action tensor([ 0.5687, -0.2668, -0.0948, -0.4329]) tensor([0.4318, 0.1872, 0.2224, 0.1586]) -Greedy action tensor([ 0.7607, -0.2429, -0.1008, -0.1333]) tensor([0.4549, 0.1668, 0.1922, 0.1861]) -Greedy action tensor([ 0.6786, -0.3001, 0.0627, -0.4094]) tensor([0.4439, 0.1668, 0.2398, 0.1495]) -Greedy action tensor([-0.1712, -0.0410, -0.6822, -0.7527]) tensor([0.3032, 0.3454, 0.1819, 0.1695]) -Greedy action tensor([ 1.0475, -1.1093, 0.6391, 0.8886]) tensor([0.3797, 0.0439, 0.2524, 0.3239]) -Greedy action tensor([ 0.5165, -0.5139, 0.6256, -0.3033]) tensor([0.3433, 0.1225, 0.3829, 0.1512]) -Greedy action tensor([ 0.4800, -0.9250, -0.2852, 0.8205]) tensor([0.3209, 0.0787, 0.1493, 0.4511]) -Greedy action tensor([ 0.2871, -1.4553, -1.3752, 0.2434]) tensor([0.4307, 0.0754, 0.0817, 0.4122]) -Greedy action tensor([-0.3171, -0.6379, -0.2546, -0.2182]) tensor([0.2568, 0.1863, 0.2734, 0.2835]) -Greedy action tensor([-0.2897, -0.5967, 0.3524, 0.0214]) tensor([0.2000, 0.1471, 0.3800, 0.2729]) -Greedy action tensor([-0.2226, -0.7379, 1.2007, -1.1548]) tensor([0.1628, 0.0973, 0.6758, 0.0641]) -Greedy action tensor([-0.1857, -0.0457, 0.1312, -0.7803]) tensor([0.2454, 0.2823, 0.3369, 0.1354]) -Greedy action tensor([ 1.2018, -0.4943, 0.8053, 0.3573]) tensor([0.4375, 0.0802, 0.2943, 0.1880]) -Greedy action tensor([-0.3152, -0.0398, 0.2971, -0.2708]) tensor([0.1920, 0.2529, 0.3543, 0.2008]) -Greedy action tensor([-0.5056, -1.9053, -0.2907, -0.2063]) tensor([0.2607, 0.0643, 0.3232, 0.3517]) -Greedy action tensor([ 0.1483, -1.6865, 0.4918, 0.0521]) tensor([0.2875, 0.0459, 0.4054, 0.2612]) -Greedy action tensor([ 0.2414, -0.6572, -0.6009, -0.1652]) tensor([0.3994, 0.1626, 0.1720, 0.2660]) -Greedy action tensor([ 0.5297, -0.8568, -0.6595, -0.0196]) tensor([0.4691, 0.1173, 0.1428, 0.2708]) -Greedy action tensor([-0.6337, -0.5000, -1.2275, 0.3501]) tensor([0.1862, 0.2129, 0.1028, 0.4981]) -Greedy action tensor([ 0.2296, -0.8770, 0.1481, -0.0313]) tensor([0.3308, 0.1094, 0.3049, 0.2548]) -Greedy action tensor([ 0.9887, -0.4966, -0.3626, 0.7600]) tensor([0.4384, 0.0993, 0.1135, 0.3488]) -Greedy action tensor([-1.0961, -0.5370, -0.1198, -0.0837]) tensor([0.1226, 0.2145, 0.3255, 0.3374]) -Greedy action tensor([ 0.3743, -0.0946, -0.0013, -0.6398]) tensor([0.3738, 0.2339, 0.2567, 0.1356]) -Greedy action tensor([-0.9554, 0.1664, 0.1895, -1.0289]) tensor([0.1228, 0.3771, 0.3859, 0.1141]) -Greedy action tensor([ 0.5181, -0.4566, 0.3949, 0.8579]) tensor([0.2728, 0.1029, 0.2412, 0.3832]) -Greedy action tensor([-0.4363, 0.3456, 0.3980, -0.7073]) tensor([0.1600, 0.3496, 0.3684, 0.1220]) -Greedy action tensor([-0.4422, -0.0597, -0.2484, -0.2770]) tensor([0.2058, 0.3017, 0.2498, 0.2427]) -Greedy action tensor([-0.3973, -0.7266, -0.3805, -1.2210]) tensor([0.3149, 0.2266, 0.3203, 0.1382]) -Greedy action tensor([ 1.4051, -1.2779, 0.9230, 0.2680]) tensor([0.4984, 0.0341, 0.3077, 0.1598]) -Greedy action tensor([ 0.7250, -0.1423, 1.0512, -0.8596]) tensor([0.3321, 0.1395, 0.4602, 0.0681]) -Greedy action tensor([-0.8195, -0.2576, -0.3674, -1.0276]) tensor([0.1946, 0.3414, 0.3059, 0.1581]) -Greedy action tensor([-0.0713, 0.1886, 0.4343, -0.8413]) tensor([0.2264, 0.2935, 0.3753, 0.1048]) -Greedy action tensor([0.5909, 0.5204, 0.1679, 0.1130]) tensor([0.3118, 0.2906, 0.2042, 0.1934]) -Greedy action tensor([-0.0446, 0.3828, 0.2581, -1.2548]) tensor([0.2390, 0.3664, 0.3234, 0.0712]) -Greedy action tensor([-0.4463, -1.0129, 0.2829, -0.9036]) tensor([0.2340, 0.1328, 0.4851, 0.1481]) -Greedy action tensor([ 0.1689, -1.6660, 0.3126, 0.2712]) tensor([0.2922, 0.0466, 0.3374, 0.3237]) -Greedy action tensor([-0.1601, -0.4670, -0.1792, -0.6104]) tensor([0.2981, 0.2193, 0.2925, 0.1900]) -Greedy action tensor([ 0.7047, -1.9508, 0.1350, -0.4858]) tensor([0.5155, 0.0362, 0.2916, 0.1567]) -Greedy action tensor([ 0.6124, -1.5212, 0.5806, -0.3521]) tensor([0.4051, 0.0480, 0.3925, 0.1544]) -Greedy action tensor([-0.4009, 0.3686, 0.7766, -0.3028]) tensor([0.1332, 0.2875, 0.4324, 0.1469]) -Greedy action tensor([ 0.9391, 0.2128, -0.0065, 0.2707]) tensor([0.4193, 0.2028, 0.1629, 0.2149]) -Greedy action tensor([ 0.2981, -0.5511, -0.3621, -0.0690]) tensor([0.3792, 0.1622, 0.1959, 0.2627]) -Greedy action tensor([-0.1190, -1.3251, -0.5314, 0.1238]) tensor([0.3090, 0.0925, 0.2046, 0.3939]) -Greedy action tensor([-0.5478, 0.2606, 0.4957, -1.3993]) tensor([0.1536, 0.3447, 0.4361, 0.0656]) -Greedy action tensor([ 0.2701, -1.3862, 0.4295, 0.4826]) tensor([0.2777, 0.0530, 0.3258, 0.3435]) -Greedy action tensor([-0.2266, -0.7716, 0.2189, -0.2326]) tensor([0.2418, 0.1402, 0.3776, 0.2404]) -Greedy action tensor([ 0.6632, -1.0370, 0.3000, 0.9275]) tensor([0.3144, 0.0574, 0.2187, 0.4095]) -Greedy action tensor([-0.4323, -0.0687, 0.7549, -1.2494]) tensor([0.1624, 0.2336, 0.5323, 0.0717]) -Greedy action tensor([ 2.0470, -0.7968, -0.0864, 0.4768]) tensor([0.7222, 0.0420, 0.0855, 0.1502]) -Greedy action tensor([ 0.0419, 0.5568, 0.8317, -0.5869]) tensor([0.1849, 0.3094, 0.4072, 0.0986]) -Greedy action tensor([ 0.0679, -0.9374, -0.3663, 0.6886]) tensor([0.2581, 0.0945, 0.1672, 0.4802]) -Greedy action tensor([-0.6398, -0.8115, 0.4206, -1.0599]) tensor([0.1856, 0.1564, 0.5360, 0.1220]) -Greedy action tensor([-0.2936, -0.8332, 0.6258, -0.6854]) tensor([0.2098, 0.1223, 0.5261, 0.1418]) -Greedy action tensor([-0.2167, -0.1838, -0.0467, 0.3523]) tensor([0.2006, 0.2073, 0.2378, 0.3544]) -Greedy action tensor([-0.3352, 0.3150, 0.9871, -0.0472]) tensor([0.1250, 0.2394, 0.4689, 0.1667]) -Greedy action tensor([ 0.0616, -2.1027, 0.3302, -0.0474]) tensor([0.3012, 0.0346, 0.3940, 0.2701]) -Greedy action tensor([-0.3480, -0.1303, 0.5998, -0.6363]) tensor([0.1794, 0.2231, 0.4630, 0.1345]) -Greedy action tensor([ 0.1048, -0.0125, -0.9190, -0.3458]) tensor([0.3465, 0.3082, 0.1245, 0.2208]) -Greedy action tensor([-0.3751, -1.1033, 0.2000, -0.3651]) tensor([0.2342, 0.1131, 0.4162, 0.2365]) -Greedy action tensor([-0.9753, -0.9524, -0.6873, -0.5493]) tensor([0.2046, 0.2093, 0.2729, 0.3132]) -Greedy action tensor([ 0.4551, -0.3668, -0.0744, -0.0670]) tensor([0.3814, 0.1677, 0.2246, 0.2263]) -Greedy action tensor([ 0.1465, -2.0089, 0.4801, 0.2337]) tensor([0.2776, 0.0322, 0.3875, 0.3028]) -Greedy action tensor([-0.3388, -0.3100, -0.0135, -0.4289]) tensor([0.2311, 0.2378, 0.3199, 0.2112]) -Greedy action tensor([ 0.4078, -0.6178, -0.2644, -0.3055]) tensor([0.4239, 0.1520, 0.2164, 0.2077]) -Greedy action tensor([-0.4401, -0.8778, -0.5497, -0.2192]) tensor([0.2639, 0.1704, 0.2365, 0.3292]) -Greedy action tensor([ 1.6281, -0.4152, 0.1691, 0.5787]) tensor([0.5840, 0.0757, 0.1358, 0.2045]) -Greedy action tensor([-0.6080, -1.8441, 0.1938, 0.3180]) tensor([0.1654, 0.0481, 0.3689, 0.4176]) -Greedy action tensor([ 0.7598, -0.6033, 0.7805, 0.0165]) tensor([0.3633, 0.0930, 0.3709, 0.1728]) -Greedy action tensor([ 0.6803, -1.9204, 0.1417, 0.2068]) tensor([0.4385, 0.0325, 0.2559, 0.2731]) -Greedy action tensor([-0.4453, -0.4986, 0.3179, -0.9742]) tensor([0.2136, 0.2025, 0.4581, 0.1258]) -Greedy action tensor([ 1.0163, -1.4295, 1.3691, -0.0677]) tensor([0.3511, 0.0304, 0.4997, 0.1188]) -Greedy action tensor([-0.1972, -0.3860, 0.0279, 0.4194]) tensor([0.2027, 0.1678, 0.2539, 0.3755]) -Greedy action tensor([ 0.3120, -1.6967, -0.2351, 0.1746]) tensor([0.3869, 0.0519, 0.2239, 0.3373]) -Greedy action tensor([ 0.8876, 0.0227, -0.5997, 0.2303]) tensor([0.4618, 0.1945, 0.1044, 0.2393]) -Greedy action tensor([ 0.8037, -0.1511, 1.9315, -1.2457]) tensor([0.2173, 0.0836, 0.6711, 0.0280]) -Greedy action tensor([-0.0952, -0.7867, -0.1913, 0.7273]) tensor([0.2134, 0.1069, 0.1939, 0.4858]) -Greedy action tensor([ 0.6919, -0.0928, 0.3159, 1.3886]) tensor([0.2410, 0.1099, 0.1654, 0.4837]) -Greedy action tensor([ 1.3716, -1.4734, 0.9925, 0.4096]) tensor([0.4706, 0.0274, 0.3222, 0.1798]) -Greedy action tensor([-0.3298, -0.0745, -0.2320, -0.1308]) tensor([0.2167, 0.2798, 0.2390, 0.2645]) -Greedy action tensor([ 0.6869, -1.6056, -0.5455, 0.7847]) tensor([0.4008, 0.0405, 0.1168, 0.4419]) -Greedy action tensor([ 0.7970, 0.0009, -0.0495, -0.1988]) tensor([0.4445, 0.2005, 0.1907, 0.1642]) -Greedy action tensor([ 0.3330, 0.1485, 0.2479, -0.4261]) tensor([0.3107, 0.2584, 0.2854, 0.1455]) -Greedy action tensor([-0.1499, 0.2265, 0.0035, -0.4945]) tensor([0.2309, 0.3364, 0.2691, 0.1636]) -Greedy action tensor([-0.5456, -0.8705, 0.9275, -0.9172]) tensor([0.1476, 0.1067, 0.6439, 0.1018]) -Greedy action tensor([ 1.8017, -0.3847, -0.4716, 0.2075]) tensor([0.7050, 0.0792, 0.0726, 0.1432]) -Greedy action tensor([ 1.5052, -0.8239, -0.2843, 0.4130]) tensor([0.6250, 0.0609, 0.1044, 0.2097]) -Greedy action tensor([ 1.5531, -0.2525, -0.3705, 0.3629]) tensor([0.6193, 0.1018, 0.0905, 0.1884]) -Greedy action tensor([ 1.3632, -0.4092, -0.0904, 0.0030]) tensor([0.6023, 0.1023, 0.1408, 0.1546]) -Greedy action tensor([ 1.8216, -0.7439, -0.7482, 0.6764]) tensor([0.6795, 0.0522, 0.0520, 0.2162]) -Greedy action tensor([ 1.8647, -0.5953, -0.4785, 0.6000]) tensor([0.6831, 0.0584, 0.0656, 0.1929]) -Greedy action tensor([ 1.7394, -0.3101, -0.1509, 0.5494]) tensor([0.6313, 0.0813, 0.0953, 0.1921]) -Greedy action tensor([ 0.8494, -0.4321, 0.1354, 0.1942]) tensor([0.4373, 0.1214, 0.2141, 0.2271]) -Greedy action tensor([ 1.2343, -0.1117, -0.3522, 0.0460]) tensor([0.5651, 0.1471, 0.1156, 0.1722]) -Greedy action tensor([ 1.2471, -0.2400, 0.1005, 0.3944]) tensor([0.5076, 0.1147, 0.1613, 0.2164]) -Greedy action tensor([ 1.3804, -0.2632, -0.9219, -0.0034]) tensor([0.6477, 0.1252, 0.0648, 0.1623]) -Greedy action tensor([ 1.4736, -0.2441, -0.8694, 0.5311]) tensor([0.6005, 0.1078, 0.0577, 0.2340]) -Greedy action tensor([2.4332, 0.8796, 0.1071, 0.0185]) tensor([0.7150, 0.1512, 0.0698, 0.0639]) -Greedy action tensor([ 2.2006, -0.5748, -0.4551, 0.5503]) tensor([0.7550, 0.0471, 0.0530, 0.1449]) -Greedy action tensor([ 1.6893, -0.5338, -0.4083, 0.1129]) tensor([0.6955, 0.0753, 0.0854, 0.1438]) -Greedy action tensor([ 1.9449, -0.4549, -0.5791, 0.2740]) tensor([0.7359, 0.0668, 0.0590, 0.1384]) -Greedy action tensor([ 1.0143, -0.2987, -0.6283, 0.3512]) tensor([0.5056, 0.1360, 0.0978, 0.2605]) -Greedy action tensor([ 1.8650, -1.0989, -0.3521, 0.2564]) tensor([0.7349, 0.0379, 0.0801, 0.1471]) -Greedy action tensor([ 1.0408, -0.3858, 0.0806, 0.0179]) tensor([0.5044, 0.1211, 0.1931, 0.1814]) -Greedy action tensor([ 1.0296, 0.0556, -0.8493, 0.3641]) tensor([0.4891, 0.1847, 0.0747, 0.2514]) -Greedy action tensor([ 1.4314, -0.8298, -0.2506, -0.2259]) tensor([0.6753, 0.0704, 0.1256, 0.1287]) -Greedy action tensor([ 2.1169, -1.0800, -0.3185, 0.4933]) tensor([0.7543, 0.0308, 0.0661, 0.1488]) -Greedy action tensor([ 1.4513, -0.1149, -0.2696, 0.2892]) tensor([0.5880, 0.1228, 0.1052, 0.1840]) -Greedy action tensor([ 1.3346, -0.1864, -0.3728, 0.4595]) tensor([0.5505, 0.1203, 0.0998, 0.2294]) -Greedy action tensor([ 1.2867, -0.4872, -0.3613, 0.0687]) tensor([0.6032, 0.1023, 0.1161, 0.1784]) -Greedy action tensor([ 1.3767, -0.3363, -0.7562, 0.2321]) tensor([0.6184, 0.1115, 0.0733, 0.1969]) -Greedy action tensor([ 1.9606, -0.6622, -0.5863, 0.2705]) tensor([0.7488, 0.0544, 0.0587, 0.1382]) -Greedy action tensor([ 1.9210, -0.0546, -0.5899, 0.2066]) tensor([0.7143, 0.0991, 0.0580, 0.1286]) -Greedy action tensor([ 1.9462, -1.3073, -0.2712, 0.4700]) tensor([0.7267, 0.0281, 0.0791, 0.1661]) -Greedy action tensor([ 1.5834, -0.6677, -0.4127, 0.3632]) tensor([0.6509, 0.0685, 0.0884, 0.1921]) -Greedy action tensor([ 1.3225, -0.3668, -0.8315, 0.2210]) tensor([0.6124, 0.1131, 0.0710, 0.2035]) -Greedy action tensor([ 1.5688, -0.6326, -0.8510, 0.6506]) tensor([0.6255, 0.0692, 0.0556, 0.2497]) -Greedy action tensor([ 0.8520, -0.5241, -0.3234, 0.5025]) tensor([0.4412, 0.1114, 0.1362, 0.3111]) -Greedy action tensor([ 0.9692, -0.2846, -0.1853, -0.5219]) tensor([0.5477, 0.1563, 0.1727, 0.1233]) -Greedy action tensor([ 0.6950, -0.5354, -0.0927, -0.0034]) tensor([0.4455, 0.1302, 0.2027, 0.2216]) -Greedy action tensor([ 1.2464e+00, -2.9540e-01, 5.1048e-02, 1.0812e-03]) tensor([0.5542, 0.1186, 0.1677, 0.1595]) -Greedy action tensor([ 1.3296, -0.1624, -0.9039, 0.1857]) tensor([0.6058, 0.1363, 0.0649, 0.1930]) -Greedy action tensor([ 2.0982, -0.9189, -0.4609, 0.4544]) tensor([0.7578, 0.0371, 0.0586, 0.1464]) -Greedy action tensor([ 1.0875, -0.4462, -0.6199, 0.1815]) tensor([0.5552, 0.1198, 0.1007, 0.2244]) -Greedy action tensor([ 2.4765, -1.3834, -0.2086, 0.9383]) tensor([0.7668, 0.0162, 0.0523, 0.1647]) -Greedy action tensor([ 1.3501, 0.1433, -1.3257, 0.0152]) tensor([0.6131, 0.1834, 0.0422, 0.1613]) -Greedy action tensor([ 1.8851, 0.7249, -0.2175, -0.0693]) tensor([0.6340, 0.1987, 0.0774, 0.0898]) -Greedy action tensor([ 1.3417, -0.2957, -0.6398, 0.2932]) tensor([0.5943, 0.1156, 0.0819, 0.2082]) -Greedy action tensor([ 1.3068, -0.5915, -0.7445, 0.2157]) tensor([0.6195, 0.0928, 0.0796, 0.2081]) -Greedy action tensor([ 1.5939, -0.7034, -1.0133, 0.1290]) tensor([0.7116, 0.0715, 0.0525, 0.1644]) -Greedy action tensor([ 1.8388, -0.5005, -0.3258, 0.4286]) tensor([0.6871, 0.0662, 0.0789, 0.1677]) -Greedy action tensor([ 1.4021, -0.6421, -0.5852, 0.2377]) tensor([0.6334, 0.0820, 0.0868, 0.1977]) -Greedy action tensor([ 1.5617, -0.6222, -0.4532, 0.2019]) tensor([0.6655, 0.0749, 0.0887, 0.1708]) -Greedy action tensor([ 1.7387, -0.1629, -0.5149, -0.0226]) tensor([0.7012, 0.1047, 0.0736, 0.1205]) -Greedy action tensor([ 1.2601, -0.4814, -0.0603, 0.8482]) tensor([0.4751, 0.0833, 0.1269, 0.3147]) -Greedy action tensor([ 1.6569, -0.4764, -0.4631, 0.5043]) tensor([0.6434, 0.0762, 0.0772, 0.2032]) -Greedy action tensor([ 1.4623, -0.5397, -0.6238, 0.2535]) tensor([0.6419, 0.0867, 0.0797, 0.1916]) -Greedy action tensor([ 1.7303, -0.7442, -0.5697, 0.4004]) tensor([0.6901, 0.0581, 0.0692, 0.1826]) -Greedy action tensor([ 1.0600, -0.4364, 0.1225, 0.5289]) tensor([0.4538, 0.1016, 0.1777, 0.2668]) -Greedy action tensor([ 1.7884, -0.6845, -0.2259, 0.3251]) tensor([0.6900, 0.0582, 0.0921, 0.1597]) -Greedy action tensor([ 1.4311, -0.7476, -0.3942, 0.1966]) tensor([0.6388, 0.0723, 0.1030, 0.1859]) -Greedy action tensor([ 1.9581, 0.3847, 0.4251, -0.4397]) tensor([0.6604, 0.1369, 0.1426, 0.0600]) -Greedy action tensor([ 3.0924, -0.5277, 0.2785, 0.5876]) tensor([0.8558, 0.0229, 0.0513, 0.0699]) -Greedy action tensor([ 1.5363, -0.2111, -0.5707, -0.1307]) tensor([0.6736, 0.1174, 0.0819, 0.1272]) -Greedy action tensor([ 1.5880, -0.2845, -0.6685, 0.8844]) tensor([0.5704, 0.0877, 0.0597, 0.2822]) -Greedy action tensor([ 1.5750, -0.5619, -0.6987, -0.0253]) tensor([0.7029, 0.0830, 0.0723, 0.1419]) -Greedy action tensor([ 1.4080, -0.4659, -0.4892, 0.1093]) tensor([0.6344, 0.0974, 0.0951, 0.1731]) -Greedy action tensor([ 1.4532, -0.7568, 0.0510, 0.4683]) tensor([0.5783, 0.0634, 0.1423, 0.2160]) -Greedy action tensor([ 1.6617, -1.1980, -0.1401, 0.1605]) tensor([0.6920, 0.0396, 0.1142, 0.1542]) -Greedy action tensor([ 1.6488, -0.1281, -1.0496, 0.3652]) tensor([0.6607, 0.1118, 0.0445, 0.1830]) -Greedy action tensor([ 0.4999, -0.4477, 0.1814, -0.0275]) tensor([0.3697, 0.1433, 0.2688, 0.2182]) -Greedy action tensor([ 1.6343, -0.7632, -0.0135, 0.2263]) tensor([0.6544, 0.0595, 0.1260, 0.1601]) -Greedy action tensor([ 1.6732, -0.1836, -0.1936, 0.3349]) tensor([0.6357, 0.0993, 0.0983, 0.1667]) -Greedy action tensor([ 1.7235, -0.3207, -0.2459, 0.1578]) tensor([0.6766, 0.0876, 0.0944, 0.1414]) -Greedy action tensor([ 0.5749, -0.3627, -0.2112, 0.2216]) tensor([0.3922, 0.1536, 0.1787, 0.2755]) -Greedy action tensor([ 1.4413, -0.3952, -0.3632, 0.6842]) tensor([0.5577, 0.0889, 0.0918, 0.2616]) -Greedy action tensor([ 1.0831, -0.5398, -0.1344, 0.2310]) tensor([0.5209, 0.1028, 0.1542, 0.2222]) -Greedy action tensor([ 1.2518, -0.5507, -0.2566, 0.4828]) tensor([0.5406, 0.0891, 0.1196, 0.2506]) -Greedy action tensor([ 1.3141, -0.2570, -0.4294, 0.3988]) tensor([0.5608, 0.1165, 0.0981, 0.2245]) -Greedy action tensor([ 1.3486, -0.3588, -0.4993, 0.4001]) tensor([0.5793, 0.1051, 0.0913, 0.2244]) -Greedy action tensor([ 1.5291, -0.6986, -0.2000, 0.2361]) tensor([0.6412, 0.0691, 0.1138, 0.1760]) -Greedy action tensor([ 1.6991, -0.7734, -0.0242, 0.2969]) tensor([0.6627, 0.0559, 0.1183, 0.1631]) -Greedy action tensor([ 1.5977, -0.2470, -0.4909, 0.7115]) tensor([0.5903, 0.0933, 0.0731, 0.2433]) -Greedy action tensor([ 0.9835, -0.3353, -0.2825, 0.3643]) tensor([0.4790, 0.1281, 0.1351, 0.2579]) -Greedy action tensor([ 1.4842, -0.1913, -0.5504, 0.0045]) tensor([0.6470, 0.1211, 0.0846, 0.1473]) -Greedy action tensor([ 1.2078, -0.2516, -0.5206, 0.1465]) tensor([0.5695, 0.1323, 0.1011, 0.1971]) -Greedy action tensor([ 0.7533, -0.3660, 0.0788, -0.2773]) tensor([0.4561, 0.1489, 0.2323, 0.1627]) -Greedy action tensor([ 0.3602, -0.0591, -0.0958, -0.0997]) tensor([0.3421, 0.2250, 0.2169, 0.2160]) -Greedy action tensor([ 0.7663, -0.3002, 0.0716, -0.2303]) tensor([0.4520, 0.1556, 0.2256, 0.1668]) -Greedy action tensor([ 0.7316, -0.1422, -0.0466, -0.0621]) tensor([0.4294, 0.1792, 0.1972, 0.1942]) -Greedy action tensor([ 0.6600, -0.1344, 0.1280, -0.1069]) tensor([0.3994, 0.1805, 0.2346, 0.1855]) -Greedy action tensor([ 1.0579, -0.6531, -0.0887, -0.3144]) tensor([0.5708, 0.1031, 0.1814, 0.1447]) -Greedy action tensor([ 0.9160, -0.4643, -0.0376, -0.3941]) tensor([0.5245, 0.1319, 0.2021, 0.1415]) -Greedy action tensor([ 0.7337, -0.2497, -0.0400, -0.4258]) tensor([0.4653, 0.1741, 0.2147, 0.1459]) -Greedy action tensor([ 0.2666, -0.2795, -0.0872, -0.5825]) tensor([0.3691, 0.2138, 0.2591, 0.1579]) -Greedy action tensor([ 0.5658, -0.5770, 0.1417, -0.6970]) tensor([0.4432, 0.1414, 0.2900, 0.1254]) -Greedy action tensor([ 0.8765, -0.3505, -0.1178, -0.1507]) tensor([0.4948, 0.1451, 0.1830, 0.1771]) -Greedy action tensor([ 0.6240, -0.5390, 0.3533, -0.4403]) tensor([0.4131, 0.1291, 0.3152, 0.1425]) -Greedy action tensor([ 0.9532, -0.5791, 0.0191, -0.4444]) tensor([0.5387, 0.1164, 0.2117, 0.1332]) -Greedy action tensor([ 0.7091, -0.4455, -0.0093, -0.1983]) tensor([0.4533, 0.1429, 0.2210, 0.1829]) -Greedy action tensor([ 0.7219, -0.4377, -0.1417, -0.1436]) tensor([0.4638, 0.1455, 0.1955, 0.1952]) -Greedy action tensor([ 0.7669, -0.3561, 0.0743, -0.6412]) tensor([0.4830, 0.1571, 0.2417, 0.1182]) -Greedy action tensor([ 0.7740, -0.3636, 0.1236, -0.0569]) tensor([0.4390, 0.1407, 0.2291, 0.1912]) -Greedy action tensor([ 0.7549, -0.1313, -0.0475, -0.0224]) tensor([0.4310, 0.1777, 0.1932, 0.1981]) -Greedy action tensor([ 0.3292, -0.1096, 0.0033, -0.1488]) tensor([0.3348, 0.2159, 0.2417, 0.2076]) -Greedy action tensor([ 1.0291, -0.5385, -0.1111, -0.3751]) tensor([0.5637, 0.1176, 0.1803, 0.1384]) -Greedy action tensor([ 0.8698, -0.7235, -0.0150, -0.4280]) tensor([0.5293, 0.1076, 0.2185, 0.1446]) -Greedy action tensor([ 0.8150, -0.7029, -0.0747, -0.2083]) tensor([0.5027, 0.1102, 0.2065, 0.1807]) -Greedy action tensor([ 0.9324, -0.3320, -0.0644, -0.1411]) tensor([0.5017, 0.1417, 0.1852, 0.1715]) -Greedy action tensor([ 1.1778, -0.7864, -0.1075, -0.6164]) tensor([0.6317, 0.0886, 0.1747, 0.1050]) -Greedy action tensor([ 0.2726, 0.3422, -0.2401, 0.1836]) tensor([0.2789, 0.2990, 0.1670, 0.2551]) -Greedy action tensor([ 0.5883, -0.4776, -0.1523, -0.0640]) tensor([0.4270, 0.1471, 0.2036, 0.2224]) -Greedy action tensor([ 0.7043, -0.3738, 0.1381, -0.2544]) tensor([0.4364, 0.1485, 0.2477, 0.1673]) -Greedy action tensor([ 0.4838, -0.3603, -0.1532, -0.2174]) tensor([0.4074, 0.1752, 0.2154, 0.2020]) -Greedy action tensor([ 0.8619, -0.6427, -0.1713, -0.7228]) tensor([0.5609, 0.1246, 0.1996, 0.1150]) -Greedy action tensor([ 0.5488, -0.4248, 0.0394, -0.1308]) tensor([0.4023, 0.1520, 0.2418, 0.2039]) -Greedy action tensor([ 0.8132, -0.6951, 0.0630, -0.3742]) tensor([0.5003, 0.1107, 0.2363, 0.1526]) -Greedy action tensor([ 0.3349, 0.1918, -0.0388, 0.0337]) tensor([0.3035, 0.2630, 0.2089, 0.2246]) -Greedy action tensor([ 0.1402, -0.0605, -0.0921, -0.1401]) tensor([0.2971, 0.2430, 0.2355, 0.2244]) -Greedy action tensor([ 0.6922, -0.4542, -0.1689, -0.1655]) tensor([0.4620, 0.1468, 0.1953, 0.1960]) -Greedy action tensor([ 0.9190, -0.3014, -0.0430, -0.1869]) tensor([0.4980, 0.1470, 0.1903, 0.1648]) -Greedy action tensor([ 0.8557, -0.7449, 0.1464, -0.4832]) tensor([0.5113, 0.1032, 0.2515, 0.1340]) -Greedy action tensor([ 1.0603, -0.6050, -0.0772, -0.5435]) tensor([0.5845, 0.1105, 0.1874, 0.1176]) -Greedy action tensor([ 0.7281, -0.3079, -0.0922, -0.1850]) tensor([0.4553, 0.1616, 0.2005, 0.1827]) -Greedy action tensor([ 0.7769, -0.5439, 0.0453, -0.5226]) tensor([0.4949, 0.1321, 0.2381, 0.1349]) -Greedy action tensor([ 1.0071, -0.5798, -0.0552, -0.3495]) tensor([0.5532, 0.1132, 0.1912, 0.1425]) -Greedy action tensor([ 1.2384, -0.7754, -0.1326, -0.6012]) tensor([0.6467, 0.0863, 0.1642, 0.1028]) -Greedy action tensor([ 1.0018, -0.6444, 0.2408, -0.4916]) tensor([0.5306, 0.1023, 0.2479, 0.1192]) -Greedy action tensor([ 0.7709, -0.5723, -0.0508, -0.2814]) tensor([0.4878, 0.1273, 0.2145, 0.1703]) -Greedy action tensor([ 0.9771, -0.7063, -0.1169, -0.2850]) tensor([0.5544, 0.1030, 0.1857, 0.1569]) -Greedy action tensor([ 0.8956, -0.5456, -0.0044, -0.4436]) tensor([0.5249, 0.1242, 0.2134, 0.1375]) -Greedy action tensor([0.3703, 0.0468, 0.1471, 0.0089]) tensor([0.3105, 0.2247, 0.2484, 0.2163]) -Greedy action tensor([ 1.2876, -0.7056, 0.0994, -0.6910]) tensor([0.6332, 0.0863, 0.1930, 0.0875]) -Greedy action tensor([ 0.9022, -0.8050, 0.0118, -0.5430]) tensor([0.5472, 0.0992, 0.2246, 0.1290]) -Greedy action tensor([ 0.8711, -0.7329, 0.0114, -0.4683]) tensor([0.5301, 0.1066, 0.2244, 0.1389]) -Greedy action tensor([ 0.7212, -0.4930, -0.1708, -0.2169]) tensor([0.4766, 0.1415, 0.1953, 0.1865]) -Greedy action tensor([ 1.2722, -0.9349, -0.1167, -0.5999]) tensor([0.6609, 0.0727, 0.1648, 0.1016]) -Greedy action tensor([ 0.5869, -0.2941, -0.1254, -0.2363]) tensor([0.4266, 0.1768, 0.2093, 0.1873]) -Greedy action tensor([ 0.2622, 0.1727, -0.1630, 0.0457]) tensor([0.2964, 0.2711, 0.1938, 0.2387]) -Greedy action tensor([ 0.7004, 0.2552, 0.0256, -0.0445]) tensor([0.3810, 0.2441, 0.1940, 0.1809]) -Greedy action tensor([ 0.8173, -0.4349, 0.0231, -0.7947]) tensor([0.5162, 0.1476, 0.2333, 0.1030]) -Greedy action tensor([ 1.0410, -0.4515, -0.1367, -0.2146]) tensor([0.5502, 0.1237, 0.1694, 0.1567]) -Greedy action tensor([ 0.8254, -0.2609, -0.0048, -0.1337]) tensor([0.4637, 0.1565, 0.2021, 0.1777]) -Greedy action tensor([ 0.8982, -0.7756, -0.0790, -0.3814]) tensor([0.5429, 0.1018, 0.2043, 0.1510]) -Greedy action tensor([ 0.8867, -0.6013, -0.0998, -0.4704]) tensor([0.5388, 0.1217, 0.2009, 0.1387]) -Greedy action tensor([ 0.9174, -0.4551, -0.0393, -0.3841]) tensor([0.5236, 0.1327, 0.2012, 0.1425]) -Greedy action tensor([ 0.5054, -0.2707, 0.0192, -0.0661]) tensor([0.3788, 0.1743, 0.2329, 0.2139]) -Greedy action tensor([ 0.7424, -0.7017, -0.0074, -0.8547]) tensor([0.5233, 0.1235, 0.2472, 0.1060]) -Greedy action tensor([ 0.8977, -0.1501, -0.0460, -0.2991]) tensor([0.4897, 0.1717, 0.1906, 0.1480]) -Greedy action tensor([ 1.2071, -0.8011, 0.1511, -0.8098]) tensor([0.6191, 0.0831, 0.2154, 0.0824]) -Greedy action tensor([ 0.5854, -0.1880, 0.0247, -0.0323]) tensor([0.3889, 0.1794, 0.2220, 0.2097]) -Greedy action tensor([ 0.8869, -0.4186, -0.0304, -0.1682]) tensor([0.4953, 0.1343, 0.1979, 0.1725]) -Greedy action tensor([ 0.7599, -0.6032, -0.0755, -0.3539]) tensor([0.4956, 0.1268, 0.2149, 0.1627]) -Greedy action tensor([ 0.6684, -0.1395, 0.0923, -0.0153]) tensor([0.3980, 0.1774, 0.2237, 0.2009]) -Greedy action tensor([ 0.9781, -0.6454, -0.0631, -0.4253]) tensor([0.5568, 0.1098, 0.1966, 0.1368]) -Greedy action tensor([ 0.8325, -0.6292, 0.0104, -0.3688]) tensor([0.5071, 0.1176, 0.2229, 0.1525]) -Greedy action tensor([ 0.4327, 0.4537, -0.2603, 0.0810]) tensor([0.3101, 0.3167, 0.1551, 0.2181]) -Greedy action tensor([ 1.1102, -0.6095, -0.1310, -0.4377]) tensor([0.5949, 0.1066, 0.1720, 0.1265]) -Greedy action tensor([ 0.6808, -0.4946, -0.0239, -0.1305]) tensor([0.4450, 0.1374, 0.2199, 0.1977]) -Greedy action tensor([ 1.0794, -0.8801, 0.0688, -0.6689]) tensor([0.5956, 0.0839, 0.2168, 0.1037]) -Greedy action tensor([ 1.1060, -0.6512, 0.0666, -0.6060]) tensor([0.5859, 0.1011, 0.2072, 0.1058]) -Greedy action tensor([ 0.9132, -0.4182, -0.0446, -0.4355]) tensor([0.5243, 0.1385, 0.2012, 0.1361]) -Greedy action tensor([ 1.1492, -0.9297, 0.1572, -0.6830]) tensor([0.6039, 0.0755, 0.2239, 0.0967]) -Greedy action tensor([ 0.9806, -0.4177, 0.1809, -0.5527]) tensor([0.5229, 0.1292, 0.2350, 0.1129]) -Greedy action tensor([ 1.0052, -0.8296, 0.1060, -0.4798]) tensor([0.5577, 0.0890, 0.2269, 0.1263]) -Greedy action tensor([ 0.7687, -0.6981, -0.1473, -0.2746]) tensor([0.5043, 0.1163, 0.2018, 0.1776]) -Greedy action tensor([ 0.9666, -0.6117, -0.1025, -0.5585]) tensor([0.5659, 0.1168, 0.1943, 0.1231]) -Greedy action tensor([-1.7909, -0.2938, 0.5927, -0.1061]) tensor([0.0461, 0.2059, 0.4996, 0.2484]) -Greedy action tensor([-1.9213, -0.4380, 0.6574, -0.1669]) tensor([0.0410, 0.1809, 0.5409, 0.2372]) -Greedy action tensor([-1.5772, -0.2850, 0.4517, -0.0249]) tensor([0.0589, 0.2145, 0.4482, 0.2783]) -Greedy action tensor([-1.8947, -0.4086, 0.6513, -0.1442]) tensor([0.0418, 0.1847, 0.5330, 0.2406]) -Greedy action tensor([-1.1730, -0.4986, 0.6862, -0.5604]) tensor([0.0891, 0.1748, 0.5717, 0.1644]) -Greedy action tensor([-0.6893, 0.4562, 0.0790, 0.0377]) tensor([0.1195, 0.3757, 0.2576, 0.2472]) -Greedy action tensor([-1.9202, -0.4151, 0.6496, -0.1721]) tensor([0.0411, 0.1853, 0.5373, 0.2362]) -Greedy action tensor([-0.6832, 0.3824, 0.1161, -0.0511]) tensor([0.1249, 0.3625, 0.2777, 0.2350]) -Greedy action tensor([-1.5182, -0.5736, 0.4497, -0.0092]) tensor([0.0656, 0.1686, 0.4692, 0.2966]) -Greedy action tensor([-1.9000, -0.4268, 0.6412, -0.1592]) tensor([0.0421, 0.1836, 0.5343, 0.2400]) -Greedy action tensor([-1.8732, -0.4592, 0.6361, -0.1448]) tensor([0.0434, 0.1785, 0.5337, 0.2444]) -Greedy action tensor([-1.7581, -0.2693, 0.6530, -0.0444]) tensor([0.0452, 0.2003, 0.5037, 0.2508]) -Greedy action tensor([-1.8873, -0.4552, 0.6436, -0.1513]) tensor([0.0427, 0.1787, 0.5363, 0.2422]) -Greedy action tensor([-1.8687, -0.3942, 0.6450, -0.1440]) tensor([0.0429, 0.1873, 0.5294, 0.2405]) -Greedy action tensor([-1.9141, -0.4035, 0.6488, -0.1594]) tensor([0.0412, 0.1865, 0.5342, 0.2381]) -Greedy action tensor([-1.3832, -0.2997, 0.3679, 0.1682]) tensor([0.0693, 0.2047, 0.3991, 0.3269]) -Greedy action tensor([-1.9017, -0.4507, 0.6481, -0.1612]) tensor([0.0421, 0.1795, 0.5386, 0.2398]) -Greedy action tensor([-1.4483, -0.5653, 0.4196, 0.1503]) tensor([0.0674, 0.1630, 0.4363, 0.3333]) -Greedy action tensor([-1.9208, -0.4275, 0.6579, -0.1677]) tensor([0.0410, 0.1824, 0.5401, 0.2365]) -Greedy action tensor([-1.7441, -0.0944, 0.5048, -0.0572]) tensor([0.0474, 0.2469, 0.4495, 0.2562]) -Greedy action tensor([-1.9438, -0.4558, 0.6760, -0.1789]) tensor([0.0400, 0.1771, 0.5493, 0.2336]) -Greedy action tensor([-1.5561, -0.4904, 0.5491, -0.1096]) tensor([0.0611, 0.1774, 0.5017, 0.2597]) -Greedy action tensor([-1.9117, -0.3791, 0.6420, -0.1593]) tensor([0.0412, 0.1909, 0.5300, 0.2378]) -Greedy action tensor([-1.9063, -0.4075, 0.6422, -0.1586]) tensor([0.0417, 0.1865, 0.5327, 0.2392]) -Greedy action tensor([-1.7844, -0.2981, 0.5679, -0.0923]) tensor([0.0468, 0.2070, 0.4920, 0.2542]) -Greedy action tensor([-1.9115, -0.4520, 0.6551, -0.1653]) tensor([0.0416, 0.1789, 0.5412, 0.2383]) -Greedy action tensor([-1.6306, 0.3426, 0.4614, -0.1749]) tensor([0.0486, 0.3495, 0.3936, 0.2083]) -Greedy action tensor([-0.8285, -0.0030, 0.1736, -0.0315]) tensor([0.1216, 0.2775, 0.3311, 0.2697]) -Greedy action tensor([-1.9173, -0.4598, 0.6894, -0.1504]) tensor([0.0405, 0.1739, 0.5487, 0.2369]) -Greedy action tensor([-1.8595, -0.4550, 0.6280, -0.1409]) tensor([0.0441, 0.1796, 0.5304, 0.2459]) -Greedy action tensor([-1.7519, -0.1380, 0.5817, -0.0067]) tensor([0.0453, 0.2276, 0.4675, 0.2595]) -Greedy action tensor([-1.8779, -0.3359, 0.5831, -0.2322]) tensor([0.0443, 0.2070, 0.5190, 0.2297]) -Greedy action tensor([-1.8264, -0.4693, 0.6170, -0.1247]) tensor([0.0457, 0.1776, 0.5261, 0.2506]) -Greedy action tensor([-1.5542, 0.3176, 0.4082, 0.0724]) tensor([0.0508, 0.3299, 0.3612, 0.2582]) -Greedy action tensor([-1.7888, -0.0568, 0.5331, -0.0701]) tensor([0.0446, 0.2520, 0.4546, 0.2487]) -Greedy action tensor([-1.8614, -0.4456, 0.6310, -0.1387]) tensor([0.0438, 0.1806, 0.5300, 0.2455]) -Greedy action tensor([-1.8632, -0.4560, 0.6340, -0.1376]) tensor([0.0438, 0.1788, 0.5317, 0.2458]) -Greedy action tensor([-1.8801, -0.4471, 0.6417, -0.1478]) tensor([0.0429, 0.1799, 0.5345, 0.2427]) -Greedy action tensor([-1.8211, -0.3967, 0.6091, -0.1110]) tensor([0.0454, 0.1885, 0.5153, 0.2508]) -Greedy action tensor([-1.5250, -0.3045, 0.4682, -0.0981]) tensor([0.0629, 0.2132, 0.4618, 0.2621]) -Greedy action tensor([-1.7900, -0.3619, 0.5758, -0.1156]) tensor([0.0473, 0.1971, 0.5035, 0.2522]) -Greedy action tensor([-1.7296, -0.4011, 0.6621, -0.0287]) tensor([0.0472, 0.1782, 0.5160, 0.2586]) -Greedy action tensor([-0.9519, 0.8014, 0.1588, 0.0094]) tensor([0.0805, 0.4647, 0.2444, 0.2105]) -Greedy action tensor([-1.4029, -0.0245, 0.4825, 0.0658]) tensor([0.0629, 0.2496, 0.4144, 0.2732]) -Greedy action tensor([-1.9029, -0.4214, 0.6621, -0.1457]) tensor([0.0413, 0.1818, 0.5373, 0.2395]) -Greedy action tensor([-1.5304, -0.0436, 0.4246, -0.0553]) tensor([0.0593, 0.2623, 0.4190, 0.2593]) -Greedy action tensor([-0.6024, -0.5210, 0.3256, 0.4116]) tensor([0.1357, 0.1472, 0.3432, 0.3740]) -Greedy action tensor([-1.1181, 0.8009, 0.1479, 0.3496]) tensor([0.0637, 0.4340, 0.2259, 0.2764]) -Greedy action tensor([-1.6356, -0.4280, 0.5347, -0.1306]) tensor([0.0568, 0.1900, 0.4975, 0.2558]) -Greedy action tensor([-1.9246, -0.4337, 0.6577, -0.1689]) tensor([0.0409, 0.1816, 0.5409, 0.2367]) -Greedy action tensor([-1.6153, -0.4129, 0.5183, 0.0392]) tensor([0.0555, 0.1849, 0.4691, 0.2905]) -Greedy action tensor([-1.9188, -0.4446, 0.6743, -0.1566]) tensor([0.0407, 0.1778, 0.5443, 0.2372]) -Greedy action tensor([-1.5503, -0.5591, 0.4522, 0.0491]) tensor([0.0623, 0.1679, 0.4615, 0.3084]) -Greedy action tensor([-1.8987, -0.4543, 0.6506, -0.1558]) tensor([0.0421, 0.1785, 0.5388, 0.2406]) -Greedy action tensor([-1.3430, -0.4267, 0.6830, 0.7918]) tensor([0.0512, 0.1279, 0.3881, 0.4327]) -Greedy action tensor([-1.5699, -0.5726, 0.4537, 0.0385]) tensor([0.0615, 0.1666, 0.4650, 0.3070]) -Greedy action tensor([-1.8797, -0.4703, 0.6386, -0.1534]) tensor([0.0432, 0.1770, 0.5366, 0.2431]) -Greedy action tensor([-1.3824, 0.5770, 0.3340, -0.1250]) tensor([0.0582, 0.4131, 0.3240, 0.2047]) -Greedy action tensor([-1.7444, -0.4214, 0.6687, 0.0519]) tensor([0.0456, 0.1710, 0.5088, 0.2746]) -Greedy action tensor([-1.9217, -0.4352, 0.6528, -0.1706]) tensor([0.0411, 0.1819, 0.5400, 0.2370]) -Greedy action tensor([-1.2763, -0.5018, 0.5511, 0.0630]) tensor([0.0757, 0.1643, 0.4709, 0.2890]) -Greedy action tensor([-1.6571, 0.3398, 0.4102, 0.0064]) tensor([0.0464, 0.3419, 0.3668, 0.2449]) -Greedy action tensor([-1.8927, -0.4516, 0.6453, -0.1599]) tensor([0.0425, 0.1795, 0.5376, 0.2403]) -Greedy action tensor([-1.2325, -0.6202, 0.3156, 0.1264]) tensor([0.0874, 0.1613, 0.4111, 0.3402]) -Greedy action tensor([-0.4557, -0.3048, 0.1425, -0.0993]) tensor([0.1848, 0.2150, 0.3362, 0.2640]) -Greedy action tensor([-1.8750, -0.4394, 0.6285, -0.1441]) tensor([0.0433, 0.1821, 0.5298, 0.2447]) -Greedy action tensor([-1.6476, -0.3355, 0.5021, -0.0602]) tensor([0.0550, 0.2042, 0.4719, 0.2689]) -Greedy action tensor([-0.9934, -0.5966, 0.1897, 0.3198]) tensor([0.1056, 0.1570, 0.3447, 0.3926]) -Greedy action tensor([-1.7753, -0.5094, 0.5746, -0.0942]) tensor([0.0490, 0.1738, 0.5139, 0.2633]) -Greedy action tensor([-1.7747, -0.4571, 0.5859, -0.0907]) tensor([0.0483, 0.1803, 0.5115, 0.2600]) -Greedy action tensor([-1.4218, -0.6158, 0.3982, 0.1139]) tensor([0.0711, 0.1593, 0.4391, 0.3304]) -Greedy action tensor([-1.9240, -0.4501, 0.6618, -0.1707]) tensor([0.0410, 0.1788, 0.5437, 0.2365]) -Greedy action tensor([-1.8882, -0.4348, 0.6371, -0.1523]) tensor([0.0427, 0.1824, 0.5329, 0.2420]) -Greedy action tensor([-0.8556, -0.0764, 0.0912, 0.5316]) tensor([0.1024, 0.2233, 0.2641, 0.4102]) -Greedy action tensor([-1.7625, -0.1079, 0.5596, -0.1882]) tensor([0.0470, 0.2461, 0.4797, 0.2271]) -Greedy action tensor([-0.9903, 0.1770, 0.0030, 0.4671]) tensor([0.0892, 0.2867, 0.2409, 0.3832]) -Greedy action tensor([-1.4900, -0.6075, 0.4915, 0.0312]) tensor([0.0656, 0.1585, 0.4757, 0.3002]) -Greedy action tensor([-1.0123, 0.7399, 0.0768, 0.3422]) tensor([0.0735, 0.4236, 0.2183, 0.2846]) -Greedy action tensor([-1.9207, -0.4329, 0.6528, -0.1694]) tensor([0.0411, 0.1822, 0.5396, 0.2371]) -Greedy action tensor([-1.8453, -0.3660, 0.6382, -0.1219]) tensor([0.0435, 0.1910, 0.5215, 0.2439]) -Greedy action tensor([-1.8788, -0.4444, 0.7440, 0.0234]) tensor([0.0390, 0.1635, 0.5365, 0.2610]) -Greedy action tensor([ 0.1742, 0.0458, 0.6956, -0.0468]) tensor([0.2291, 0.2015, 0.3858, 0.1836]) -Greedy action tensor([-0.3835, -0.3950, 0.3865, -0.6137]) tensor([0.2023, 0.2000, 0.4370, 0.1607]) -Greedy action tensor([-0.4807, -0.4208, 0.4710, -0.6802]) tensor([0.1828, 0.1941, 0.4734, 0.1497]) -Greedy action tensor([ 0.1482, -0.1816, 0.8771, 0.4984]) tensor([0.1919, 0.1380, 0.3978, 0.2724]) -Greedy action tensor([ 0.0438, -0.8779, 1.0127, -0.2292]) tensor([0.2086, 0.0830, 0.5497, 0.1588]) -Greedy action tensor([-1.0125, -1.0742, 0.3521, -0.6252]) tensor([0.1365, 0.1283, 0.5342, 0.2010]) -Greedy action tensor([ 0.3549, -2.0095, -0.2926, 0.3790]) tensor([0.3785, 0.0356, 0.1981, 0.3878]) -Greedy action tensor([-0.2667, -0.8536, 0.5769, -0.3812]) tensor([0.2095, 0.1165, 0.4871, 0.1869]) -Greedy action tensor([ 1.6231, -0.3203, 0.4682, 0.5866]) tensor([0.5516, 0.0790, 0.1738, 0.1956]) -Greedy action tensor([-0.8163, -1.6458, 0.8295, 0.0818]) tensor([0.1102, 0.0481, 0.5713, 0.2705]) -Greedy action tensor([ 0.3684, -0.9303, 0.1097, -0.3245]) tensor([0.3929, 0.1072, 0.3033, 0.1965]) -Greedy action tensor([ 0.3028, 0.4179, -0.3938, -0.8516]) tensor([0.3407, 0.3822, 0.1697, 0.1074]) -Greedy action tensor([ 0.0516, -0.8841, 0.3081, -0.1475]) tensor([0.2854, 0.1120, 0.3688, 0.2339]) -Greedy action tensor([-0.0722, -1.7657, 0.3840, -0.0528]) tensor([0.2645, 0.0486, 0.4173, 0.2696]) -Greedy action tensor([-0.7721, -0.1929, -1.2187, 0.8396]) tensor([0.1185, 0.2116, 0.0758, 0.5941]) -Greedy action tensor([-0.8761, -0.9294, 0.8071, 0.1307]) tensor([0.0993, 0.0942, 0.5347, 0.2719]) -Greedy action tensor([ 0.1100, -0.3136, 0.7622, -0.3375]) tensor([0.2373, 0.1554, 0.4556, 0.1517]) -Greedy action tensor([-0.0607, -1.5689, -0.1970, 0.9317]) tensor([0.2087, 0.0462, 0.1821, 0.5630]) -Greedy action tensor([-0.3113, -0.3450, 0.2330, -0.1044]) tensor([0.2033, 0.1965, 0.3503, 0.2500]) -Greedy action tensor([-0.2552, -0.2869, 0.0239, -0.9424]) tensor([0.2636, 0.2554, 0.3485, 0.1326]) -Greedy action tensor([-0.1230, 0.7421, 0.2003, 0.0455]) tensor([0.1683, 0.3998, 0.2326, 0.1992]) -Greedy action tensor([-1.2951, 0.1951, -1.0009, 0.5233]) tensor([0.0773, 0.3429, 0.1037, 0.4761]) -Greedy action tensor([ 1.5436, -0.3149, 0.3125, -0.3003]) tensor([0.6226, 0.0971, 0.1818, 0.0985]) -Greedy action tensor([ 0.8458, -2.2239, -0.1001, 0.5955]) tensor([0.4518, 0.0210, 0.1755, 0.3518]) -Greedy action tensor([-0.0903, 0.8888, 0.8714, 0.6068]) tensor([0.1207, 0.3213, 0.3157, 0.2423]) -Greedy action tensor([-0.0190, 0.2244, 0.7589, -0.0576]) tensor([0.1847, 0.2356, 0.4021, 0.1777]) -Greedy action tensor([ 1.0520, -0.6600, 1.0992, 0.2135]) tensor([0.3758, 0.0678, 0.3939, 0.1625]) -Greedy action tensor([ 0.0244, -0.0919, -0.2275, -0.2810]) tensor([0.2938, 0.2615, 0.2283, 0.2164]) -Greedy action tensor([ 0.1542, -1.9127, 1.1435, -0.0978]) tensor([0.2177, 0.0276, 0.5855, 0.1692]) -Greedy action tensor([-0.4776, -0.1098, -0.2659, 0.4204]) tensor([0.1630, 0.2355, 0.2014, 0.4001]) -Greedy action tensor([-0.5105, -1.6179, 0.0290, -0.4413]) tensor([0.2429, 0.0803, 0.4166, 0.2603]) -Greedy action tensor([-0.5901, -0.6745, -0.8885, -0.8649]) tensor([0.2923, 0.2687, 0.2169, 0.2221]) -Greedy action tensor([-0.3739, 0.1191, 0.4002, -0.1435]) tensor([0.1649, 0.2699, 0.3576, 0.2076]) -Greedy action tensor([-0.5804, -1.6487, 0.1533, 0.6348]) tensor([0.1471, 0.0505, 0.3064, 0.4959]) -Greedy action tensor([ 0.5019, -0.9812, 0.8566, -0.9250]) tensor([0.3457, 0.0785, 0.4929, 0.0830]) -Greedy action tensor([-1.1220, -0.3388, -0.3580, -0.2883]) tensor([0.1309, 0.2866, 0.2811, 0.3014]) -Greedy action tensor([-1.0516, -0.7211, -0.2834, 0.2849]) tensor([0.1197, 0.1666, 0.2581, 0.4556]) -Greedy action tensor([-0.2465, -1.0520, 0.0356, 0.1601]) tensor([0.2339, 0.1045, 0.3102, 0.3513]) -Greedy action tensor([ 0.3169, -1.5687, -0.1614, 0.7235]) tensor([0.3055, 0.0464, 0.1894, 0.4588]) -Greedy action tensor([-1.9888, 0.5474, 0.0240, -0.6478]) tensor([0.0401, 0.5065, 0.3001, 0.1533]) -Greedy action tensor([ 0.6536, -1.3447, 0.0856, -0.7234]) tensor([0.5116, 0.0694, 0.2899, 0.1291]) -Greedy action tensor([ 0.2772, -0.1403, 0.4620, -0.3907]) tensor([0.2964, 0.1952, 0.3565, 0.1520]) -Greedy action tensor([-0.3270, -0.4689, 0.1434, -1.0452]) tensor([0.2528, 0.2193, 0.4046, 0.1233]) -Greedy action tensor([ 0.1166, -0.5974, 0.1879, 0.4781]) tensor([0.2501, 0.1225, 0.2685, 0.3589]) -Greedy action tensor([ 0.6295, 0.4713, -0.8113, -0.3138]) tensor([0.4033, 0.3443, 0.0955, 0.1570]) -Greedy action tensor([ 0.0393, -0.1558, 0.0247, 0.9359]) tensor([0.1901, 0.1564, 0.1874, 0.4661]) -Greedy action tensor([-0.0693, 0.1143, -0.5876, -0.1230]) tensor([0.2670, 0.3209, 0.1590, 0.2531]) -Greedy action tensor([-0.3597, 0.4977, -0.1130, -0.8018]) tensor([0.1894, 0.4465, 0.2424, 0.1217]) -Greedy action tensor([-0.0262, -0.7875, 0.3706, -0.6494]) tensor([0.2865, 0.1338, 0.4261, 0.1536]) -Greedy action tensor([-0.7533, 0.5052, -0.5915, -1.2617]) tensor([0.1588, 0.5590, 0.1867, 0.0955]) -Greedy action tensor([ 0.0745, -0.3130, -0.7485, -1.2153]) tensor([0.4179, 0.2836, 0.1835, 0.1150]) -Greedy action tensor([ 1.3600, -1.6703, -0.1288, 0.4269]) tensor([0.5998, 0.0290, 0.1353, 0.2359]) -Greedy action tensor([ 0.7524, -0.3315, 0.1623, -0.2854]) tensor([0.4451, 0.1506, 0.2467, 0.1577]) -Greedy action tensor([ 0.4925, -0.4737, 1.2167, 0.0088]) tensor([0.2463, 0.0937, 0.5081, 0.1518]) -Greedy action tensor([-0.2822, -2.0834, -0.5356, 0.9015]) tensor([0.1920, 0.0317, 0.1490, 0.6272]) -Greedy action tensor([ 0.6543, -0.8505, -0.6552, 0.0147]) tensor([0.4952, 0.1100, 0.1337, 0.2612]) -Greedy action tensor([-1.5890, -0.3353, -0.0697, -0.7888]) tensor([0.0885, 0.3101, 0.4044, 0.1970]) -Greedy action tensor([-0.1673, -0.3602, -0.3439, -0.7091]) tensor([0.3082, 0.2542, 0.2583, 0.1793]) -Greedy action tensor([-0.1135, -1.6864, 0.4130, -0.9931]) tensor([0.3016, 0.0626, 0.5106, 0.1252]) -Greedy action tensor([ 0.0083, 0.2179, 0.7226, -0.5458]) tensor([0.2062, 0.2542, 0.4212, 0.1185]) -Greedy action tensor([-0.9469, -0.8653, 0.2320, -0.3772]) tensor([0.1408, 0.1528, 0.4576, 0.2488]) -Greedy action tensor([ 0.5706, -0.7970, 0.9212, 0.2792]) tensor([0.2922, 0.0744, 0.4150, 0.2184]) -Greedy action tensor([ 0.4438, -0.4649, 1.4790, -0.8333]) tensor([0.2223, 0.0896, 0.6260, 0.0620]) -Greedy action tensor([-0.8113, -1.0301, 0.0698, -0.1292]) tensor([0.1614, 0.1297, 0.3896, 0.3193]) -Greedy action tensor([-0.1283, -0.4186, 1.3792, -0.6626]) tensor([0.1460, 0.1092, 0.6592, 0.0856]) -Greedy action tensor([-0.4983, 0.5664, -0.2592, -1.0796]) tensor([0.1745, 0.5062, 0.2217, 0.0976]) -Greedy action tensor([-0.2719, -1.2632, 0.7515, -0.4074]) tensor([0.1989, 0.0738, 0.5535, 0.1737]) -Greedy action tensor([ 1.1343, -0.4348, -0.2873, 0.7906]) tensor([0.4633, 0.0965, 0.1118, 0.3285]) -Greedy action tensor([-0.6281, 0.0579, 0.3732, -0.6837]) tensor([0.1503, 0.2985, 0.4091, 0.1422]) -Greedy action tensor([ 0.0207, -0.3210, 0.0060, -0.2088]) tensor([0.2865, 0.2035, 0.2823, 0.2277]) -Greedy action tensor([-0.6884, 0.5624, -1.2709, 0.3454]) tensor([0.1272, 0.4442, 0.0710, 0.3576]) -Greedy action tensor([ 0.4134, -1.5792, 0.5347, 0.4722]) tensor([0.3007, 0.0410, 0.3394, 0.3189]) -Greedy action tensor([-0.1357, -0.2867, -1.4345, 0.3013]) tensor([0.2717, 0.2336, 0.0741, 0.4206]) -Greedy action tensor([-0.6034, -0.4531, 0.8226, -1.0968]) tensor([0.1442, 0.1676, 0.6002, 0.0880]) -Greedy action tensor([0.3711, 0.2672, 0.0336, 0.2575]) tensor([0.2851, 0.2570, 0.2034, 0.2545]) -Greedy action tensor([ 0.3947, -0.6633, -0.5892, -0.5512]) tensor([0.4741, 0.1646, 0.1772, 0.1841]) -Greedy action tensor([ 0.6964, -1.1539, 0.0803, 0.0598]) tensor([0.4492, 0.0706, 0.2426, 0.2376]) -Greedy action tensor([-0.3995, -0.8440, 0.2911, -0.6752]) tensor([0.2275, 0.1459, 0.4539, 0.1727]) -Greedy action tensor([-0.2016, -0.5636, 0.1116, 0.3102]) tensor([0.2113, 0.1471, 0.2890, 0.3525]) -Greedy action tensor([ 0.6850, -0.1779, 0.3029, -0.3251]) tensor([0.4051, 0.1709, 0.2765, 0.1475]) -Greedy action tensor([-0.4560, -1.1129, 0.3393, -0.2840]) tensor([0.2032, 0.1053, 0.4501, 0.2413]) -Greedy action tensor([ 1.6666, -0.5510, -0.4934, 0.4344]) tensor([0.6597, 0.0718, 0.0761, 0.1924]) -Greedy action tensor([ 1.2387, -0.3895, -0.6056, 0.0633]) tensor([0.6013, 0.1180, 0.0951, 0.1856]) -Greedy action tensor([ 1.8925, -1.0637, -0.2482, 0.4019]) tensor([0.7169, 0.0373, 0.0843, 0.1615]) -Greedy action tensor([ 1.4171, -0.3637, -0.3368, 0.1999]) tensor([0.6106, 0.1029, 0.1057, 0.1808]) -Greedy action tensor([ 1.5121, -0.1302, -0.7662, 0.4493]) tensor([0.6092, 0.1179, 0.0624, 0.2105]) -Greedy action tensor([ 1.2978, -0.0788, 0.0032, -0.1348]) tensor([0.5665, 0.1430, 0.1552, 0.1352]) -Greedy action tensor([8.6339e-01, 4.8774e-04, 4.3323e-03, 2.5804e-01]) tensor([0.4182, 0.1764, 0.1771, 0.2283]) -Greedy action tensor([ 1.1775, 0.1832, -0.6906, 0.5534]) tensor([0.4854, 0.1796, 0.0750, 0.2600]) -Greedy action tensor([ 1.7454, -0.7201, -0.4261, 0.2740]) tensor([0.7000, 0.0595, 0.0798, 0.1607]) -Greedy action tensor([ 1.3154, -0.0108, -0.0658, 0.4835]) tensor([0.5123, 0.1360, 0.1287, 0.2230]) -Greedy action tensor([ 2.1060, -0.8305, -0.4323, 0.5186]) tensor([0.7482, 0.0397, 0.0591, 0.1530]) -Greedy action tensor([ 1.8647, -0.6592, -0.5343, 0.3237]) tensor([0.7219, 0.0579, 0.0656, 0.1546]) -Greedy action tensor([ 1.8926, -0.9190, -0.3714, 0.5092]) tensor([0.7068, 0.0425, 0.0735, 0.1772]) -Greedy action tensor([ 2.0462, -0.7011, -0.4641, 0.3211]) tensor([0.7556, 0.0484, 0.0614, 0.1346]) -Greedy action tensor([ 1.4753, -0.4311, -0.1472, 0.9788]) tensor([0.5116, 0.0760, 0.1010, 0.3114]) -Greedy action tensor([ 1.7152, -0.3247, -0.9309, 0.1236]) tensor([0.7120, 0.0926, 0.0505, 0.1450]) -Greedy action tensor([ 1.2824, -0.6787, -0.0485, 0.0014]) tensor([0.5943, 0.0836, 0.1570, 0.1651]) -Greedy action tensor([ 1.6368, -0.6517, -0.7189, 0.0114]) tensor([0.7178, 0.0728, 0.0681, 0.1413]) -Greedy action tensor([ 1.3806, -0.1004, -0.8916, 0.5640]) tensor([0.5642, 0.1283, 0.0582, 0.2493]) -Greedy action tensor([ 2.2751, -0.8270, -0.2346, 0.9755]) tensor([0.7149, 0.0321, 0.0581, 0.1949]) -Greedy action tensor([ 1.1091, -0.4284, 0.0270, 0.0736]) tensor([0.5239, 0.1126, 0.1775, 0.1860]) -Greedy action tensor([ 0.6665, -0.4838, -0.1079, 0.3578]) tensor([0.3981, 0.1260, 0.1835, 0.2924]) -Greedy action tensor([ 1.1765, -0.0542, -0.3174, 0.1027]) tensor([0.5381, 0.1572, 0.1208, 0.1839]) -Greedy action tensor([ 1.6643, -0.7501, -0.4152, 0.3575]) tensor([0.6734, 0.0602, 0.0842, 0.1823]) -Greedy action tensor([ 1.6323, -0.1376, -0.7288, 0.2040]) tensor([0.6647, 0.1132, 0.0627, 0.1593]) -Greedy action tensor([ 1.5457, -1.2197, -0.1652, 0.4861]) tensor([0.6288, 0.0396, 0.1136, 0.2179]) -Greedy action tensor([ 1.9844, -1.0361, -0.2856, 0.6530]) tensor([0.7061, 0.0344, 0.0729, 0.1865]) -Greedy action tensor([ 1.2833, -0.4051, -0.4476, 0.4197]) tensor([0.5607, 0.1036, 0.0993, 0.2364]) -Greedy action tensor([ 3.0796, -1.7796, 0.0704, 1.1955]) tensor([0.8271, 0.0064, 0.0408, 0.1257]) -Greedy action tensor([ 1.1495, -0.4326, -0.2396, 0.5918]) tensor([0.4932, 0.1014, 0.1230, 0.2824]) -Greedy action tensor([ 2.5307, -1.1950, -0.1601, 0.5737]) tensor([0.8109, 0.0195, 0.0550, 0.1146]) -Greedy action tensor([ 1.5761, -0.4319, -0.4992, 0.3267]) tensor([0.6466, 0.0868, 0.0812, 0.1854]) -Greedy action tensor([ 1.6919, -0.4248, -0.7079, 0.5281]) tensor([0.6564, 0.0791, 0.0596, 0.2050]) -Greedy action tensor([ 2.0435, -1.1744, 0.1351, 0.6017]) tensor([0.7018, 0.0281, 0.1041, 0.1660]) -Greedy action tensor([ 1.3926, -0.1185, -0.8063, 0.2697]) tensor([0.6035, 0.1332, 0.0669, 0.1964]) -Greedy action tensor([ 1.8610, 0.5914, -0.1960, 0.2464]) tensor([0.6220, 0.1747, 0.0795, 0.1238]) -Greedy action tensor([ 1.0303, -0.0484, -0.7737, 0.2222]) tensor([0.5127, 0.1743, 0.0844, 0.2285]) -Greedy action tensor([ 2.4009, -0.8932, 0.3092, 1.0250]) tensor([0.7076, 0.0263, 0.0874, 0.1787]) -Greedy action tensor([ 1.8556, -0.4116, -0.7406, 0.2175]) tensor([0.7286, 0.0755, 0.0543, 0.1416]) -Greedy action tensor([ 1.6178, -0.5713, -0.6148, 0.1568]) tensor([0.6891, 0.0772, 0.0739, 0.1599]) -Greedy action tensor([ 1.1844, -0.5839, -0.2533, 0.3802]) tensor([0.5389, 0.0920, 0.1280, 0.2411]) -Greedy action tensor([ 1.8796, -0.2409, -0.5680, 0.2792]) tensor([0.7101, 0.0852, 0.0614, 0.1433]) -Greedy action tensor([ 1.7844, -0.9196, -0.1199, 0.1760]) tensor([0.7062, 0.0473, 0.1052, 0.1414]) -Greedy action tensor([ 1.4905, -0.5423, -0.9036, 0.4679]) tensor([0.6322, 0.0828, 0.0577, 0.2274]) -Greedy action tensor([ 1.6465, -0.5176, -0.8065, 0.3537]) tensor([0.6778, 0.0778, 0.0583, 0.1861]) -Greedy action tensor([ 1.6160, -0.3619, -0.6236, 0.5225]) tensor([0.6329, 0.0876, 0.0674, 0.2121]) -Greedy action tensor([ 1.3674, -0.4418, 0.2157, -0.0338]) tensor([0.5793, 0.0949, 0.1831, 0.1427]) -Greedy action tensor([ 0.9417, -0.5608, -0.2702, 0.2689]) tensor([0.4925, 0.1096, 0.1466, 0.2513]) -Greedy action tensor([ 1.1806, -0.3368, -0.4658, 0.6822]) tensor([0.4952, 0.1086, 0.0954, 0.3008]) -Greedy action tensor([ 1.0387, -0.3102, -0.6254, 0.1633]) tensor([0.5360, 0.1391, 0.1015, 0.2234]) -Greedy action tensor([ 1.1814, -0.5974, -0.1687, 0.2642]) tensor([0.5471, 0.0924, 0.1418, 0.2187]) -Greedy action tensor([ 1.4023, -0.6500, -0.3102, 0.0211]) tensor([0.6410, 0.0823, 0.1156, 0.1611]) -Greedy action tensor([ 1.6345, -0.8853, -0.0579, 0.9178]) tensor([0.5705, 0.0459, 0.1050, 0.2786]) -Greedy action tensor([ 1.6628, -0.3242, -1.1190, -0.0454]) tensor([0.7245, 0.0993, 0.0449, 0.1313]) -Greedy action tensor([ 1.5410, -0.3171, -0.7684, 0.2558]) tensor([0.6528, 0.1018, 0.0648, 0.1806]) -Greedy action tensor([ 1.7532, 0.2505, -0.5099, 0.4073]) tensor([0.6302, 0.1402, 0.0656, 0.1640]) -Greedy action tensor([ 1.7471, -0.8333, -0.0927, 0.6287]) tensor([0.6405, 0.0485, 0.1017, 0.2093]) -Greedy action tensor([ 1.2170, -0.2746, -0.8134, 0.6935]) tensor([0.5131, 0.1155, 0.0674, 0.3040]) -Greedy action tensor([ 1.7173, -0.5249, -0.1589, 0.7494]) tensor([0.6100, 0.0648, 0.0934, 0.2317]) -Greedy action tensor([ 1.3870, 0.2198, -1.0447, 0.1846]) tensor([0.5884, 0.1831, 0.0517, 0.1768]) -Greedy action tensor([ 1.4785, -0.3542, -0.3839, 0.3795]) tensor([0.6066, 0.0970, 0.0942, 0.2021]) -Greedy action tensor([ 1.2286, -0.2243, -0.3915, 0.3565]) tensor([0.5406, 0.1264, 0.1070, 0.2260]) -Greedy action tensor([ 1.5170, -0.1507, -0.8162, 0.6910]) tensor([0.5802, 0.1095, 0.0563, 0.2540]) -Greedy action tensor([ 1.8028, -0.6053, -0.9500, 0.4343]) tensor([0.7101, 0.0639, 0.0453, 0.1807]) -Greedy action tensor([ 1.2946, -0.4216, -0.2151, 0.4141]) tensor([0.5509, 0.0990, 0.1217, 0.2284]) -Greedy action tensor([ 1.4502, -0.3191, -0.1080, 0.1660]) tensor([0.6032, 0.1028, 0.1270, 0.1670]) -Greedy action tensor([ 3.3787, -1.7669, -0.1605, 1.3303]) tensor([0.8593, 0.0050, 0.0250, 0.1108]) -Greedy action tensor([ 1.1635, -0.4940, -0.4912, 0.3707]) tensor([0.5452, 0.1039, 0.1042, 0.2467]) -Greedy action tensor([ 1.3584, -0.2687, -0.5640, -0.1114]) tensor([0.6358, 0.1249, 0.0930, 0.1462]) -Greedy action tensor([ 1.8695, -0.9845, -0.2627, 0.6743]) tensor([0.6762, 0.0390, 0.0802, 0.2047]) -Greedy action tensor([ 1.7847, -0.5337, -0.6718, 0.5309]) tensor([0.6805, 0.0670, 0.0583, 0.1942]) -Greedy action tensor([ 2.1556, -0.9037, -0.3367, 0.8499]) tensor([0.7140, 0.0335, 0.0591, 0.1935]) -Greedy action tensor([ 0.9673, -0.5781, -0.0811, 0.1699]) tensor([0.4965, 0.1059, 0.1740, 0.2236]) -Greedy action tensor([ 1.6311, -0.7147, -0.4339, 0.1585]) tensor([0.6888, 0.0660, 0.0873, 0.1579]) -Greedy action tensor([ 1.2696, -0.2310, -0.9254, 0.2433]) tensor([0.5908, 0.1317, 0.0658, 0.2117]) -Greedy action tensor([ 2.2845, -0.7055, 0.1867, 0.5929]) tensor([0.7368, 0.0370, 0.0904, 0.1357]) -Greedy action tensor([ 2.6076, 1.0279, -0.1455, -0.0974]) tensor([0.7481, 0.1541, 0.0477, 0.0500]) -Greedy action tensor([ 1.7299, -0.3605, -0.5147, 0.7496]) tensor([0.6231, 0.0770, 0.0660, 0.2338]) -Greedy action tensor([ 1.2056, -0.1682, -0.7436, 0.0607]) tensor([0.5835, 0.1477, 0.0831, 0.1857]) -Greedy action tensor([ 1.0735, -0.4942, -0.1437, 0.3180]) tensor([0.5065, 0.1056, 0.1499, 0.2379]) -Greedy action tensor([ 1.6176, -0.3655, -0.5208, 0.0727]) tensor([0.6808, 0.0937, 0.0802, 0.1452]) -Greedy action tensor([ 0.3738, -0.2627, -0.0318, -0.0711]) tensor([0.3525, 0.1865, 0.2350, 0.2259]) -Greedy action tensor([ 0.6889, -0.6652, -0.0929, -0.2347]) tensor([0.4733, 0.1222, 0.2166, 0.1879]) -Greedy action tensor([ 1.0945, -0.7127, 0.0870, -0.6357]) tensor([0.5860, 0.0962, 0.2140, 0.1039]) -Greedy action tensor([ 0.9575, -0.7332, 0.1014, -0.4388]) tensor([0.5386, 0.0993, 0.2288, 0.1333]) -Greedy action tensor([ 0.9004, -0.7669, 0.1322, -0.3034]) tensor([0.5121, 0.0967, 0.2376, 0.1537]) -Greedy action tensor([ 0.9185, -0.6662, 0.1550, -0.6187]) tensor([0.5302, 0.1087, 0.2471, 0.1140]) -Greedy action tensor([ 0.6868, -0.2899, -0.0106, -0.6562]) tensor([0.4683, 0.1763, 0.2331, 0.1223]) -Greedy action tensor([ 0.9169, -0.3250, -0.0796, -0.2715]) tensor([0.5095, 0.1472, 0.1881, 0.1552]) -Greedy action tensor([ 0.6132, -0.4107, -0.0142, -0.3225]) tensor([0.4375, 0.1572, 0.2336, 0.1716]) -Greedy action tensor([ 0.6923, -0.4585, -0.0041, -0.4133]) tensor([0.4660, 0.1474, 0.2323, 0.1543]) -Greedy action tensor([ 0.9420, -0.3530, 0.2367, -0.5101]) tensor([0.4995, 0.1368, 0.2467, 0.1169]) -Greedy action tensor([ 0.6344, -0.0840, 0.0993, -0.0382]) tensor([0.3871, 0.1887, 0.2267, 0.1976]) -Greedy action tensor([ 0.4756, -0.2374, -0.0307, -0.1579]) tensor([0.3812, 0.1868, 0.2297, 0.2023]) -Greedy action tensor([ 0.6422, -0.5668, 0.0408, -0.3167]) tensor([0.4485, 0.1339, 0.2458, 0.1719]) -Greedy action tensor([ 0.9954, -0.7155, 0.0662, -0.4998]) tensor([0.5556, 0.1004, 0.2194, 0.1246]) -Greedy action tensor([ 0.5793, -0.2418, -0.0695, -0.1145]) tensor([0.4061, 0.1787, 0.2123, 0.2029]) -Greedy action tensor([ 0.8943, -0.6478, -0.0334, -0.5203]) tensor([0.5398, 0.1155, 0.2135, 0.1312]) -Greedy action tensor([ 0.5964, -0.3324, -0.2552, -0.3667]) tensor([0.4538, 0.1793, 0.1937, 0.1732]) -Greedy action tensor([ 0.7896, -0.3019, -0.0432, -0.2025]) tensor([0.4670, 0.1568, 0.2031, 0.1732]) -Greedy action tensor([ 0.3404, 0.0126, -0.3066, -0.1554]) tensor([0.3505, 0.2525, 0.1835, 0.2135]) -Greedy action tensor([ 0.8026, -0.2770, -0.0632, -0.0563]) tensor([0.4579, 0.1555, 0.1926, 0.1940]) -Greedy action tensor([ 0.9845, -0.7742, 0.0445, -0.6320]) tensor([0.5677, 0.0978, 0.2218, 0.1127]) -Greedy action tensor([ 0.7863, -0.4606, 0.0271, -0.2978]) tensor([0.4776, 0.1373, 0.2236, 0.1615]) -Greedy action tensor([ 0.7350, -0.3055, 0.0075, -0.1554]) tensor([0.4451, 0.1572, 0.2150, 0.1827]) -Greedy action tensor([ 0.7747, -0.5500, -0.1074, -0.0705]) tensor([0.4741, 0.1261, 0.1962, 0.2036]) -Greedy action tensor([ 0.7250, -0.2854, 0.0350, -0.7144]) tensor([0.4756, 0.1731, 0.2385, 0.1127]) -Greedy action tensor([ 0.7587, -0.4828, 0.1518, -0.2128]) tensor([0.4520, 0.1306, 0.2463, 0.1711]) -Greedy action tensor([ 0.7879, -0.2267, -0.0558, -0.0915]) tensor([0.4530, 0.1642, 0.1948, 0.1880]) -Greedy action tensor([ 0.5763, -0.3388, 0.1549, -0.4940]) tensor([0.4168, 0.1669, 0.2734, 0.1429]) -Greedy action tensor([ 0.6934, -0.5979, 0.0339, -0.5047]) tensor([0.4776, 0.1313, 0.2470, 0.1441]) -Greedy action tensor([ 0.7936, -0.3431, -0.2997, -0.4453]) tensor([0.5140, 0.1649, 0.1722, 0.1489]) -Greedy action tensor([ 0.8252, -0.6994, 0.0663, -0.4371]) tensor([0.5079, 0.1106, 0.2378, 0.1437]) -Greedy action tensor([ 0.5002, -0.5393, -0.1775, -0.1413]) tensor([0.4188, 0.1481, 0.2126, 0.2205]) -Greedy action tensor([ 0.6318, -0.1754, 0.0013, -0.1581]) tensor([0.4111, 0.1834, 0.2189, 0.1866]) -Greedy action tensor([ 1.2397, -0.8907, -0.1238, -0.5441]) tensor([0.6483, 0.0770, 0.1658, 0.1089]) -Greedy action tensor([ 0.5865, 0.1073, -0.0892, 0.0631]) tensor([0.3676, 0.2276, 0.1870, 0.2178]) -Greedy action tensor([ 0.8473, -0.0997, -0.0291, -0.5263]) tensor([0.4861, 0.1885, 0.2023, 0.1231]) -Greedy action tensor([ 1.0039, -0.8698, -0.0125, -0.4259]) tensor([0.5699, 0.0875, 0.2062, 0.1364]) -Greedy action tensor([ 0.6059, -0.5576, -0.2413, -0.1854]) tensor([0.4557, 0.1424, 0.1953, 0.2066]) -Greedy action tensor([ 0.6743, 0.0505, -0.1617, -0.6910]) tensor([0.4495, 0.2409, 0.1948, 0.1148]) -Greedy action tensor([ 0.4047, -0.2422, 0.0004, -0.0366]) tensor([0.3528, 0.1848, 0.2355, 0.2269]) -Greedy action tensor([ 0.7456, -0.3461, -0.0292, -0.0837]) tensor([0.4479, 0.1503, 0.2064, 0.1954]) -Greedy action tensor([ 0.5418, -0.2672, 0.0536, -0.1125]) tensor([0.3878, 0.1727, 0.2380, 0.2016]) -Greedy action tensor([ 0.5676, -0.3050, -0.0947, -0.1717]) tensor([0.4148, 0.1733, 0.2139, 0.1980]) -Greedy action tensor([ 0.7334, -0.7470, -0.0766, -0.1457]) tensor([0.4790, 0.1090, 0.2131, 0.1989]) -Greedy action tensor([ 0.5759, -0.4435, -0.1794, -0.1050]) tensor([0.4279, 0.1544, 0.2011, 0.2166]) -Greedy action tensor([ 0.2091, -0.0937, -0.0890, -0.1187]) tensor([0.3124, 0.2307, 0.2318, 0.2251]) -Greedy action tensor([ 0.6985, -0.5026, -0.0740, -0.1388]) tensor([0.4555, 0.1370, 0.2104, 0.1971]) -Greedy action tensor([ 0.9275, -0.3526, -0.0955, -0.2016]) tensor([0.5100, 0.1418, 0.1833, 0.1649]) -Greedy action tensor([ 0.7059, -0.3507, -0.2292, -0.2594]) tensor([0.4715, 0.1639, 0.1851, 0.1796]) -Greedy action tensor([ 0.2600, 0.1076, -0.0991, -0.2307]) tensor([0.3155, 0.2709, 0.2204, 0.1932]) -Greedy action tensor([ 0.5231, 0.2389, -0.1866, 0.1014]) tensor([0.3448, 0.2595, 0.1696, 0.2262]) -Greedy action tensor([ 0.7534, -0.4240, 0.0156, -0.3774]) tensor([0.4742, 0.1461, 0.2267, 0.1531]) -Greedy action tensor([ 0.9638, -0.8009, 0.1427, -0.3883]) tensor([0.5348, 0.0916, 0.2353, 0.1384]) -Greedy action tensor([ 0.4563, -0.3667, 0.0186, -0.1787]) tensor([0.3825, 0.1679, 0.2469, 0.2027]) -Greedy action tensor([ 1.0946, -0.5648, -0.0585, -0.7170]) tensor([0.5991, 0.1140, 0.1891, 0.0979]) -Greedy action tensor([ 0.7226, -0.0601, 0.1077, -0.0945]) tensor([0.4099, 0.1874, 0.2216, 0.1811]) -Greedy action tensor([ 0.5078, -0.1376, -0.1183, -0.1752]) tensor([0.3900, 0.2045, 0.2085, 0.1970]) -Greedy action tensor([ 1.2939, -0.5888, -0.1594, -0.5021]) tensor([0.6443, 0.0981, 0.1507, 0.1069]) -Greedy action tensor([ 0.7947, -0.3150, 0.1088, -0.2816]) tensor([0.4600, 0.1516, 0.2316, 0.1568]) -Greedy action tensor([ 0.1079, 0.2755, -0.2117, 0.0254]) tensor([0.2611, 0.3088, 0.1897, 0.2404]) -Greedy action tensor([ 0.8748, -0.2342, 0.0676, -0.7086]) tensor([0.5047, 0.1665, 0.2252, 0.1036]) -Greedy action tensor([ 0.8032, -0.5033, -0.0343, -0.2506]) tensor([0.4873, 0.1319, 0.2109, 0.1699]) -Greedy action tensor([ 0.7830, -1.0251, -0.0383, -0.4702]) tensor([0.5293, 0.0868, 0.2328, 0.1511]) -Greedy action tensor([ 1.0790, -0.5993, -0.0917, -0.5127]) tensor([0.5881, 0.1098, 0.1824, 0.1197]) -Greedy action tensor([ 0.5584, -0.5577, -0.1764, -0.1709]) tensor([0.4368, 0.1431, 0.2095, 0.2106]) -Greedy action tensor([ 0.6100, -0.4212, -0.0500, -0.4830]) tensor([0.4528, 0.1615, 0.2340, 0.1518]) -Greedy action tensor([ 0.6337, -0.4745, -0.0208, -0.2185]) tensor([0.4393, 0.1450, 0.2283, 0.1874]) -Greedy action tensor([ 1.3440, -0.7939, 0.0339, -0.8867]) tensor([0.6688, 0.0789, 0.1804, 0.0719]) -Greedy action tensor([ 0.8963, -0.6155, -0.0562, -0.7366]) tensor([0.5551, 0.1224, 0.2141, 0.1084]) -Greedy action tensor([ 0.8001, -0.3828, -0.1922, -0.0874]) tensor([0.4787, 0.1467, 0.1775, 0.1971]) -Greedy action tensor([ 0.9385, -0.1954, -0.0377, -0.3402]) tensor([0.5058, 0.1628, 0.1906, 0.1408]) -Greedy action tensor([ 0.9025, -0.3639, -0.1596, -0.3427]) tensor([0.5221, 0.1471, 0.1805, 0.1503]) -Greedy action tensor([ 0.3013, -0.1053, -0.0829, -0.0884]) tensor([0.3307, 0.2202, 0.2252, 0.2239]) -Greedy action tensor([ 0.6623, 0.2540, -0.2026, -0.0820]) tensor([0.3905, 0.2596, 0.1644, 0.1855]) -Greedy action tensor([ 0.5536, -0.4603, 0.0192, -0.2680]) tensor([0.4187, 0.1519, 0.2453, 0.1841]) -Greedy action tensor([ 0.8937, -0.6309, -0.0104, -0.3054]) tensor([0.5197, 0.1132, 0.2104, 0.1567]) -Greedy action tensor([ 0.5679, -0.2144, -0.0086, -0.1412]) tensor([0.3982, 0.1821, 0.2237, 0.1960]) -Greedy action tensor([ 0.6772, -0.3168, -0.0479, -0.2774]) tensor([0.4465, 0.1653, 0.2163, 0.1719]) -Greedy action tensor([ 1.2690, -0.6947, 0.0530, -0.4449]) tensor([0.6185, 0.0868, 0.1833, 0.1114]) -Greedy action tensor([ 0.5132, -0.2932, -0.0882, 0.0390]) tensor([0.3821, 0.1706, 0.2094, 0.2378]) -Greedy action tensor([-1.9127, -0.4072, 0.6503, -0.1639]) tensor([0.0413, 0.1860, 0.5355, 0.2372]) -Greedy action tensor([-1.9102, -0.4103, 0.6526, -0.1511]) tensor([0.0412, 0.1847, 0.5347, 0.2394]) -Greedy action tensor([-1.8875, -0.3441, 0.6346, -0.1467]) tensor([0.0420, 0.1964, 0.5225, 0.2392]) -Greedy action tensor([-1.4311, -0.9347, 1.2294, 0.8561]) tensor([0.0373, 0.0613, 0.5338, 0.3675]) -Greedy action tensor([-1.8408, -0.3242, 0.5791, -0.1073]) tensor([0.0445, 0.2029, 0.5006, 0.2520]) -Greedy action tensor([-1.4274, -0.5431, 0.4104, 0.2203]) tensor([0.0671, 0.1625, 0.4217, 0.3487]) -Greedy action tensor([-1.9094, -0.4033, 0.6547, -0.1604]) tensor([0.0412, 0.1860, 0.5357, 0.2371]) -Greedy action tensor([-1.6438, -0.3095, 0.2575, -0.4692]) tensor([0.0679, 0.2578, 0.4545, 0.2198]) -Greedy action tensor([-1.8320, -0.5096, 0.6409, -0.1354]) tensor([0.0453, 0.1701, 0.5374, 0.2472]) -Greedy action tensor([-0.3616, 0.8096, -0.0334, 0.1270]) tensor([0.1380, 0.4453, 0.1917, 0.2250]) -Greedy action tensor([-1.7087, -0.3003, 0.6304, 0.0414]) tensor([0.0471, 0.1927, 0.4889, 0.2713]) -Greedy action tensor([-1.1525, -0.1410, 0.7530, 0.4980]) tensor([0.0638, 0.1753, 0.4287, 0.3322]) -Greedy action tensor([-1.8146, -0.2493, 0.5689, -0.1066]) tensor([0.0452, 0.2160, 0.4896, 0.2492]) -Greedy action tensor([-1.9450, -0.4431, 0.6654, -0.1806]) tensor([0.0401, 0.1801, 0.5456, 0.2342]) -Greedy action tensor([-1.8309, -0.4464, 0.6165, -0.1133]) tensor([0.0452, 0.1805, 0.5225, 0.2518]) -Greedy action tensor([-1.9447, -0.4565, 0.6691, -0.1804]) tensor([0.0401, 0.1777, 0.5479, 0.2343]) -Greedy action tensor([-1.8903, -0.4326, 0.6370, -0.1597]) tensor([0.0426, 0.1831, 0.5337, 0.2406]) -Greedy action tensor([-1.7248, -0.4775, 0.5484, -0.0399]) tensor([0.0511, 0.1777, 0.4959, 0.2753]) -Greedy action tensor([-1.8863, -0.4457, 0.6392, -0.1538]) tensor([0.0428, 0.1807, 0.5346, 0.2419]) -Greedy action tensor([-1.6111, -0.5520, 0.4972, 0.0137]) tensor([0.0582, 0.1677, 0.4789, 0.2953]) -Greedy action tensor([-1.9396, -0.4452, 0.6654, -0.1777]) tensor([0.0403, 0.1796, 0.5454, 0.2347]) -Greedy action tensor([-1.6025, -0.6165, 1.1255, 0.6425]) tensor([0.0352, 0.0943, 0.5384, 0.3321]) -Greedy action tensor([-0.9432, -0.4733, 0.4906, 0.6750]) tensor([0.0845, 0.1351, 0.3543, 0.4261]) -Greedy action tensor([-1.9134, -0.4487, 0.6563, -0.1647]) tensor([0.0414, 0.1792, 0.5412, 0.2381]) -Greedy action tensor([-1.8220, -0.3797, 0.6570, -0.1126]) tensor([0.0441, 0.1865, 0.5259, 0.2436]) -Greedy action tensor([-1.9230, -0.4539, 0.6591, -0.1725]) tensor([0.0411, 0.1786, 0.5436, 0.2367]) -Greedy action tensor([-1.5600, -0.4869, 0.4742, 0.0464]) tensor([0.0604, 0.1766, 0.4619, 0.3011]) -Greedy action tensor([-1.8983, -0.4143, 0.6459, -0.1802]) tensor([0.0422, 0.1860, 0.5369, 0.2350]) -Greedy action tensor([-1.3888, -0.5311, 0.3690, 0.1519]) tensor([0.0723, 0.1705, 0.4195, 0.3376]) -Greedy action tensor([-1.8006, -0.3848, 0.5917, -0.0883]) tensor([0.0463, 0.1907, 0.5064, 0.2566]) -Greedy action tensor([-1.3742e+00, 7.9623e-04, 4.3361e-01, 1.6614e-01]) tensor([0.0636, 0.2516, 0.3879, 0.2969]) -Greedy action tensor([-1.9441, -0.4480, 0.6674, -0.1804]) tensor([0.0401, 0.1792, 0.5466, 0.2341]) -Greedy action tensor([-1.9372, -0.4372, 0.6607, -0.1756]) tensor([0.0404, 0.1812, 0.5431, 0.2353]) -Greedy action tensor([-1.8704, -0.3197, 0.6430, -0.1200]) tensor([0.0420, 0.1979, 0.5184, 0.2417]) -Greedy action tensor([-1.7381, -0.2716, 0.5860, -0.0537]) tensor([0.0478, 0.2070, 0.4879, 0.2574]) -Greedy action tensor([-1.9324, -0.4300, 0.6610, -0.1726]) tensor([0.0405, 0.1820, 0.5420, 0.2355]) -Greedy action tensor([-1.7900, -0.4680, 0.5944, -0.1047]) tensor([0.0476, 0.1786, 0.5168, 0.2569]) -Greedy action tensor([-1.9078, -0.4517, 0.6508, -0.1646]) tensor([0.0418, 0.1793, 0.5400, 0.2389]) -Greedy action tensor([-1.9482, -0.4519, 0.6685, -0.1828]) tensor([0.0400, 0.1786, 0.5476, 0.2338]) -Greedy action tensor([-1.7345, -0.3324, 0.5444, -0.1018]) tensor([0.0501, 0.2037, 0.4896, 0.2566]) -Greedy action tensor([-1.8402, -0.4422, 0.6013, -0.1222]) tensor([0.0452, 0.1830, 0.5197, 0.2521]) -Greedy action tensor([-1.9194, -0.4094, 0.6483, -0.1633]) tensor([0.0411, 0.1859, 0.5353, 0.2378]) -Greedy action tensor([-1.8271, -0.4159, 0.6045, -0.1245]) tensor([0.0455, 0.1867, 0.5179, 0.2499]) -Greedy action tensor([-1.8937, -0.2443, 0.5116, -0.2047]) tensor([0.0441, 0.2292, 0.4882, 0.2385]) -Greedy action tensor([-1.9265, -0.4350, 0.6605, -0.1712]) tensor([0.0408, 0.1812, 0.5420, 0.2360]) -Greedy action tensor([-1.8160, -0.4744, 0.5763, -0.1145]) tensor([0.0471, 0.1800, 0.5149, 0.2580]) -Greedy action tensor([-1.7008, -0.3047, 0.5725, -0.1809]) tensor([0.0518, 0.2091, 0.5026, 0.2366]) -Greedy action tensor([-1.8871, -0.3590, 0.6389, -0.1444]) tensor([0.0420, 0.1935, 0.5248, 0.2398]) -Greedy action tensor([-1.7701, -0.3807, 0.6213, -0.0403]) tensor([0.0463, 0.1859, 0.5064, 0.2613]) -Greedy action tensor([-1.7957, -0.1968, 0.5833, -0.1003]) tensor([0.0451, 0.2230, 0.4864, 0.2456]) -Greedy action tensor([-1.7719, -0.0982, 0.5551, -0.0767]) tensor([0.0454, 0.2421, 0.4652, 0.2473]) -Greedy action tensor([-1.9210, -0.4414, 0.6582, -0.1667]) tensor([0.0411, 0.1803, 0.5414, 0.2373]) -Greedy action tensor([-1.3380, 0.1570, 0.3833, -0.1139]) tensor([0.0692, 0.3086, 0.3869, 0.2353]) -Greedy action tensor([-1.7775, -0.1492, 0.5379, -0.1014]) tensor([0.0464, 0.2362, 0.4696, 0.2478]) -Greedy action tensor([-1.9038, -0.4230, 0.6451, -0.1614]) tensor([0.0418, 0.1839, 0.5353, 0.2389]) -Greedy action tensor([-1.7636, -0.4611, 0.8131, 0.2845]) tensor([0.0391, 0.1438, 0.5141, 0.3030]) -Greedy action tensor([-1.7222, -0.1624, 0.5369, -0.0739]) tensor([0.0487, 0.2318, 0.4664, 0.2532]) -Greedy action tensor([-1.0476, 0.5326, 0.1775, -0.0417]) tensor([0.0834, 0.4048, 0.2838, 0.2280]) -Greedy action tensor([-1.6334, -0.3691, 0.4890, 0.0646]) tensor([0.0545, 0.1929, 0.4550, 0.2976]) -Greedy action tensor([-1.8401, -0.3453, 0.6014, -0.1431]) tensor([0.0446, 0.1990, 0.5128, 0.2436]) -Greedy action tensor([-1.6209, -0.3928, 0.7464, 0.4473]) tensor([0.0435, 0.1485, 0.4640, 0.3440]) -Greedy action tensor([-1.8930, -0.4363, 0.6438, -0.1533]) tensor([0.0423, 0.1816, 0.5350, 0.2411]) -Greedy action tensor([-1.9344, -0.4328, 0.6604, -0.1745]) tensor([0.0405, 0.1818, 0.5424, 0.2353]) -Greedy action tensor([-1.0336, -0.5673, 0.4956, 0.8070]) tensor([0.0740, 0.1180, 0.3416, 0.4664]) -Greedy action tensor([-1.7492, -0.4462, 0.5722, -0.0951]) tensor([0.0498, 0.1831, 0.5070, 0.2601]) -Greedy action tensor([-1.9029, -0.4397, 0.6720, -0.1513]) tensor([0.0413, 0.1784, 0.5423, 0.2380]) -Greedy action tensor([-1.9153, -0.4665, 0.6522, -0.1629]) tensor([0.0416, 0.1770, 0.5417, 0.2398]) -Greedy action tensor([-1.8958, -0.3030, 0.6228, -0.1408]) tensor([0.0415, 0.2039, 0.5147, 0.2399]) -Greedy action tensor([-1.9150, -0.3761, 0.6464, -0.1577]) tensor([0.0410, 0.1909, 0.5307, 0.2375]) -Greedy action tensor([-1.9113, -0.4049, 0.6472, -0.1591]) tensor([0.0413, 0.1864, 0.5339, 0.2384]) -Greedy action tensor([-1.9409, -0.4484, 0.6644, -0.1786]) tensor([0.0403, 0.1793, 0.5456, 0.2348]) -Greedy action tensor([-1.8338, -0.4261, 0.6427, -0.1076]) tensor([0.0442, 0.1808, 0.5264, 0.2486]) -Greedy action tensor([-1.8529, -0.3468, 0.6189, -0.1144]) tensor([0.0434, 0.1957, 0.5140, 0.2469]) -Greedy action tensor([-0.7286, 0.8411, 0.1087, -0.0193]) tensor([0.0985, 0.4735, 0.2277, 0.2003]) -Greedy action tensor([-1.7869, -0.4003, 0.5665, -0.0879]) tensor([0.0476, 0.1906, 0.5012, 0.2605]) -Greedy action tensor([-1.8210, -0.4608, 0.5905, -0.1311]) tensor([0.0466, 0.1815, 0.5194, 0.2524]) -Greedy action tensor([-1.4611, 0.0212, 0.5182, 0.3336]) tensor([0.0536, 0.2360, 0.3879, 0.3225]) -Greedy action tensor([-1.9341, -0.4418, 0.6633, -0.1743]) tensor([0.0405, 0.1802, 0.5439, 0.2354]) -Greedy action tensor([-1.9079, -0.4170, 0.6488, -0.1597]) tensor([0.0415, 0.1844, 0.5355, 0.2386]) -Greedy action tensor([-1.7672, -0.4476, 0.5745, -0.0931]) tensor([0.0488, 0.1828, 0.5079, 0.2605]) -Greedy action tensor([-1.5576, 0.4776, 0.3576, -0.0302]) tensor([0.0499, 0.3818, 0.3386, 0.2298]) -Greedy action tensor([-0.3113, -0.9309, -0.3801, 0.1009]) tensor([0.2511, 0.1352, 0.2344, 0.3793]) -Greedy action tensor([ 0.5357, -1.1381, 0.5746, 0.1504]) tensor([0.3440, 0.0645, 0.3576, 0.2340]) -Greedy action tensor([ 0.3740, -1.2289, -0.5558, 0.1575]) tensor([0.4164, 0.0838, 0.1643, 0.3354]) -Greedy action tensor([ 1.5110, -0.4074, 0.6731, 0.6007]) tensor([0.5046, 0.0741, 0.2183, 0.2031]) -Greedy action tensor([-1.1169, -0.1430, 0.1870, -1.1516]) tensor([0.1205, 0.3192, 0.4439, 0.1164]) -Greedy action tensor([ 1.0692, -0.0905, 0.7704, -0.5660]) tensor([0.4444, 0.1394, 0.3296, 0.0866]) -Greedy action tensor([-0.6736, -0.5870, -0.0104, -1.0685]) tensor([0.2125, 0.2318, 0.4125, 0.1432]) -Greedy action tensor([-0.4047, 0.3546, -0.5294, 0.1534]) tensor([0.1734, 0.3705, 0.1531, 0.3030]) -Greedy action tensor([-0.6533, -1.1652, -0.5994, 0.6173]) tensor([0.1608, 0.0964, 0.1697, 0.5730]) -Greedy action tensor([ 0.0821, -1.5408, 0.0339, -0.1133]) tensor([0.3364, 0.0664, 0.3205, 0.2767]) -Greedy action tensor([-0.3251, -0.2790, -0.5752, 0.2645]) tensor([0.2160, 0.2262, 0.1682, 0.3895]) -Greedy action tensor([-0.5740, 0.3583, -0.9253, 0.1051]) tensor([0.1609, 0.4086, 0.1132, 0.3173]) -Greedy action tensor([ 0.3666, -0.4091, -0.5402, -0.6243]) tensor([0.4473, 0.2060, 0.1806, 0.1661]) -Greedy action tensor([ 0.2560, -1.3376, 0.8164, 0.1808]) tensor([0.2576, 0.0523, 0.4511, 0.2389]) -Greedy action tensor([-1.3676, -0.2510, 0.2905, -0.3250]) tensor([0.0824, 0.2516, 0.4324, 0.2337]) -Greedy action tensor([-0.1612, -1.1637, 0.2474, -0.8353]) tensor([0.2957, 0.1085, 0.4450, 0.1507]) -Greedy action tensor([ 1.0023, -0.4979, 0.4634, -0.2327]) tensor([0.4768, 0.1064, 0.2782, 0.1387]) -Greedy action tensor([ 1.1833, -0.0672, -0.0772, -0.5541]) tensor([0.5728, 0.1640, 0.1624, 0.1008]) -Greedy action tensor([ 0.1717, -0.9111, -0.5788, -0.1624]) tensor([0.3958, 0.1340, 0.1868, 0.2834]) -Greedy action tensor([ 0.5412, 0.0036, -0.4750, -0.0162]) tensor([0.3970, 0.2319, 0.1437, 0.2274]) -Greedy action tensor([ 0.0432, -0.4991, -0.1963, -0.4962]) tensor([0.3388, 0.1970, 0.2667, 0.1975]) -Greedy action tensor([-1.1038, 0.1147, -0.6777, -0.3440]) tensor([0.1242, 0.4201, 0.1902, 0.2655]) -Greedy action tensor([-0.2268, -0.7386, 1.0140, -0.5999]) tensor([0.1740, 0.1043, 0.6018, 0.1198]) -Greedy action tensor([ 0.4323, -1.1404, 0.3577, -0.3548]) tensor([0.3860, 0.0801, 0.3582, 0.1757]) -Greedy action tensor([ 0.6975, -1.4151, 0.0629, -0.3845]) tensor([0.5025, 0.0608, 0.2664, 0.1703]) -Greedy action tensor([-0.2778, -0.0424, -0.5147, -0.0961]) tensor([0.2351, 0.2975, 0.1855, 0.2819]) -Greedy action tensor([-0.7971, -1.1994, 0.3606, -0.6233]) tensor([0.1655, 0.1107, 0.5268, 0.1970]) -Greedy action tensor([-1.4601, -0.6608, -0.9960, 0.0506]) tensor([0.1070, 0.2380, 0.1702, 0.4848]) -Greedy action tensor([-5.9587e-01, -8.3159e-04, -1.0252e+00, 4.8561e-01]) tensor([0.1559, 0.2827, 0.1015, 0.4598]) -Greedy action tensor([-0.3860, -0.7738, 1.4225, -0.9819]) tensor([0.1200, 0.0814, 0.7324, 0.0661]) -Greedy action tensor([ 1.4189, -0.5864, -0.3921, -0.0377]) tensor([0.6531, 0.0879, 0.1068, 0.1522]) -Greedy action tensor([-0.2900, 0.0721, -0.1099, -0.4112]) tensor([0.2213, 0.3178, 0.2649, 0.1960]) -Greedy action tensor([ 0.5896, -1.0041, 0.6913, 0.8221]) tensor([0.2799, 0.0569, 0.3099, 0.3532]) -Greedy action tensor([ 0.3935, 0.1003, -1.0117, -0.2181]) tensor([0.3947, 0.2944, 0.0968, 0.2141]) -Greedy action tensor([-0.4162, -0.0096, 0.7830, -0.2191]) tensor([0.1421, 0.2134, 0.4714, 0.1731]) -Greedy action tensor([-0.1339, -0.1734, -0.6720, -0.2079]) tensor([0.2879, 0.2767, 0.1681, 0.2673]) -Greedy action tensor([ 0.2893, -0.5890, -0.5246, 0.8065]) tensor([0.2828, 0.1175, 0.1253, 0.4744]) -Greedy action tensor([-0.4293, 0.3743, -0.0551, -0.1813]) tensor([0.1675, 0.3742, 0.2436, 0.2147]) -Greedy action tensor([-0.3169, -0.5117, -0.2489, 0.3840]) tensor([0.2037, 0.1676, 0.2180, 0.4106]) -Greedy action tensor([-0.3889, -1.0524, -0.0531, -0.5243]) tensor([0.2640, 0.1360, 0.3694, 0.2306]) -Greedy action tensor([ 0.6453, 0.4447, -0.9548, -0.8327]) tensor([0.4448, 0.3640, 0.0898, 0.1015]) -Greedy action tensor([-0.7094, -0.4326, 0.6301, -0.6237]) tensor([0.1384, 0.1825, 0.5283, 0.1508]) -Greedy action tensor([-0.2525, 0.2045, 0.3631, -0.4015]) tensor([0.1890, 0.2985, 0.3497, 0.1628]) -Greedy action tensor([-0.0929, -0.7199, 0.2595, -0.5165]) tensor([0.2769, 0.1479, 0.3939, 0.1813]) -Greedy action tensor([ 1.0297, -0.7297, -0.3918, 0.6627]) tensor([0.4748, 0.0817, 0.1146, 0.3289]) -Greedy action tensor([ 0.6720, -0.8606, 0.3427, 0.2089]) tensor([0.3899, 0.0842, 0.2805, 0.2454]) -Greedy action tensor([-1.5904, -0.9078, 0.2639, -1.3354]) tensor([0.0938, 0.1857, 0.5994, 0.1211]) -Greedy action tensor([-0.2472, -0.2072, -0.0662, 0.3279]) tensor([0.1993, 0.2075, 0.2389, 0.3543]) -Greedy action tensor([-0.4022, -1.1518, 0.5108, -1.3284]) tensor([0.2293, 0.1084, 0.5715, 0.0908]) -Greedy action tensor([-0.4897, -1.3783, 0.2936, 0.0767]) tensor([0.1865, 0.0767, 0.4082, 0.3286]) -Greedy action tensor([ 0.8118, -0.7095, -0.3872, -0.0010]) tensor([0.5093, 0.1112, 0.1535, 0.2259]) -Greedy action tensor([-0.7825, -0.9588, -0.4969, -0.4227]) tensor([0.2173, 0.1822, 0.2891, 0.3114]) -Greedy action tensor([ 0.4301, 0.1980, -0.5658, 0.1920]) tensor([0.3389, 0.2687, 0.1252, 0.2671]) -Greedy action tensor([ 0.3131, -0.1602, -0.4240, -1.2134]) tensor([0.4313, 0.2687, 0.2064, 0.0937]) -Greedy action tensor([ 0.1413, 0.0424, -0.7885, -0.6581]) tensor([0.3636, 0.3294, 0.1435, 0.1635]) -Greedy action tensor([-0.1005, -0.1999, 0.5420, -0.6767]) tensor([0.2289, 0.2073, 0.4352, 0.1287]) -Greedy action tensor([ 0.2052, -0.4133, -0.9607, -0.1148]) tensor([0.3881, 0.2091, 0.1210, 0.2818]) -Greedy action tensor([-0.1578, -0.2383, -0.3438, -0.9151]) tensor([0.3104, 0.2864, 0.2577, 0.1455]) -Greedy action tensor([-0.5280, -1.2984, 0.9737, -0.9057]) tensor([0.1507, 0.0697, 0.6763, 0.1033]) -Greedy action tensor([-0.3323, 0.5556, 0.0203, -0.4538]) tensor([0.1743, 0.4235, 0.2479, 0.1543]) -Greedy action tensor([ 0.4015, -0.4004, -0.0737, 0.4354]) tensor([0.3221, 0.1444, 0.2003, 0.3332]) -Greedy action tensor([-0.4092, -0.7761, 0.4146, 0.3006]) tensor([0.1665, 0.1154, 0.3795, 0.3386]) -Greedy action tensor([-0.1694, 0.3961, 0.8405, -0.6724]) tensor([0.1637, 0.2881, 0.4493, 0.0990]) -Greedy action tensor([ 0.8594, -0.2881, 1.7082, 0.7075]) tensor([0.2216, 0.0703, 0.5178, 0.1903]) -Greedy action tensor([-0.0060, -0.7273, -0.2824, 0.5551]) tensor([0.2502, 0.1216, 0.1898, 0.4385]) -Greedy action tensor([ 0.4416, -1.2545, 0.2187, 0.0233]) tensor([0.3785, 0.0694, 0.3029, 0.2491]) -Greedy action tensor([-0.5981, -1.2371, 0.5880, -0.5451]) tensor([0.1708, 0.0901, 0.5591, 0.1800]) -Greedy action tensor([-0.8722, -0.9628, 0.5536, -0.7407]) tensor([0.1386, 0.1266, 0.5767, 0.1581]) -Greedy action tensor([ 0.2938, -0.8437, 1.0094, -0.0273]) tensor([0.2444, 0.0784, 0.4999, 0.1773]) -Greedy action tensor([ 0.7830, -1.2885, -0.6168, 0.2029]) tensor([0.5175, 0.0652, 0.1276, 0.2897]) -Greedy action tensor([ 0.2783, 0.2082, -0.1893, -0.7346]) tensor([0.3422, 0.3191, 0.2144, 0.1243]) -Greedy action tensor([ 0.1851, 0.0554, -0.1875, -0.5973]) tensor([0.3306, 0.2904, 0.2278, 0.1512]) -Greedy action tensor([ 1.2087, -0.6609, -0.2982, -0.3578]) tensor([0.6311, 0.0973, 0.1399, 0.1318]) -Greedy action tensor([ 0.5466, -0.0285, 0.5813, 0.1231]) tensor([0.3074, 0.1730, 0.3183, 0.2013]) -Greedy action tensor([ 0.3438, -0.1955, 0.3310, -0.3713]) tensor([0.3269, 0.1906, 0.3227, 0.1599]) -Greedy action tensor([-1.1176, -0.0433, -0.9506, 0.6393]) tensor([0.0917, 0.2685, 0.1084, 0.5314]) -Greedy action tensor([ 0.1567, 0.2421, -0.1946, -0.9001]) tensor([0.3184, 0.3468, 0.2241, 0.1107]) -Greedy action tensor([-1.2284, -0.4764, -0.7420, -0.1080]) tensor([0.1280, 0.2715, 0.2081, 0.3924]) -Greedy action tensor([ 0.2563, -0.9499, -0.5863, -0.6612]) tensor([0.4696, 0.1406, 0.2022, 0.1876]) -Greedy action tensor([ 0.6980, -0.3204, -0.6671, 0.2962]) tensor([0.4375, 0.1580, 0.1117, 0.2927]) -Greedy action tensor([-0.8915, -0.1982, -0.8072, 0.2569]) tensor([0.1381, 0.2762, 0.1502, 0.4354]) -Greedy action tensor([ 2.0098, -0.2433, -0.3247, 0.0762]) tensor([0.7426, 0.0780, 0.0719, 0.1074]) -Greedy action tensor([ 2.5694, 0.0520, 0.3917, -0.0422]) tensor([0.7890, 0.0637, 0.0894, 0.0579]) -Greedy action tensor([ 1.2965, -0.3874, -0.4431, 0.4168]) tensor([0.5630, 0.1045, 0.0989, 0.2336]) -Greedy action tensor([ 1.6081, -0.3402, -0.2428, 0.1316]) tensor([0.6544, 0.0933, 0.1028, 0.1495]) -Greedy action tensor([ 1.6489, -0.4334, -0.6332, 0.3870]) tensor([0.6623, 0.0826, 0.0676, 0.1875]) -Greedy action tensor([ 1.3196, -0.4674, -0.5927, 0.0892]) tensor([0.6221, 0.1042, 0.0919, 0.1818]) -Greedy action tensor([ 1.0864, -0.3417, -0.2075, -0.1128]) tensor([0.5508, 0.1321, 0.1510, 0.1660]) -Greedy action tensor([ 2.0910, -0.5331, -0.4288, 0.2239]) tensor([0.7648, 0.0555, 0.0615, 0.1182]) -Greedy action tensor([ 1.1350, -0.3104, -0.2262, 0.4524]) tensor([0.5007, 0.1180, 0.1284, 0.2530]) -Greedy action tensor([ 1.8637, -1.2442, -0.2225, -0.0753]) tensor([0.7618, 0.0340, 0.0946, 0.1096]) -Greedy action tensor([ 1.4423, -0.4469, -1.1023, 0.1303]) tensor([0.6671, 0.1009, 0.0524, 0.1796]) -Greedy action tensor([ 1.7741, -0.8722, -0.9367, 0.5065]) tensor([0.7048, 0.0500, 0.0469, 0.1984]) -Greedy action tensor([ 1.2196, -0.2473, -1.0494, 0.3718]) tensor([0.5674, 0.1309, 0.0587, 0.2430]) -Greedy action tensor([ 1.4289, -0.7276, -0.0986, 0.2981]) tensor([0.6040, 0.0699, 0.1311, 0.1950]) -Greedy action tensor([ 1.7368, -0.5429, -0.3721, -0.0226]) tensor([0.7164, 0.0733, 0.0870, 0.1233]) -Greedy action tensor([ 1.5119, -1.0152, -0.3901, 0.9947]) tensor([0.5478, 0.0438, 0.0818, 0.3266]) -Greedy action tensor([ 1.4306, -0.2544, -0.8615, 0.3962]) tensor([0.6090, 0.1129, 0.0615, 0.2165]) -Greedy action tensor([ 1.2943, -0.2216, -0.4755, 0.0728]) tensor([0.5936, 0.1304, 0.1011, 0.1750]) -Greedy action tensor([ 1.2772, -0.1924, -0.4074, 0.2180]) tensor([0.5675, 0.1305, 0.1053, 0.1968]) -Greedy action tensor([ 1.9364, -0.2440, -0.3884, 0.3727]) tensor([0.7041, 0.0796, 0.0689, 0.1474]) -Greedy action tensor([ 1.9623, 0.4526, -0.0205, 0.3838]) tensor([0.6390, 0.1412, 0.0880, 0.1318]) -Greedy action tensor([ 1.7560, -1.1936, -0.2020, 1.0196]) tensor([0.5980, 0.0313, 0.0844, 0.2863]) -Greedy action tensor([ 1.0663, 0.0570, -0.3456, -0.4958]) tensor([0.5501, 0.2005, 0.1340, 0.1154]) -Greedy action tensor([ 1.0628, 0.0736, -0.3666, 0.0524]) tensor([0.5062, 0.1883, 0.1212, 0.1843]) -Greedy action tensor([ 1.3992, -0.1487, -1.2659, 0.3447]) tensor([0.6133, 0.1304, 0.0427, 0.2136]) -Greedy action tensor([ 1.2320, 0.1038, -0.5878, 0.4758]) tensor([0.5115, 0.1655, 0.0829, 0.2401]) -Greedy action tensor([ 2.0622, -0.6598, -0.5315, 0.3917]) tensor([0.7526, 0.0495, 0.0563, 0.1416]) -Greedy action tensor([ 1.5467, -0.8099, -0.4652, 1.0321]) tensor([0.5476, 0.0519, 0.0732, 0.3273]) -Greedy action tensor([ 1.3917, -0.2447, -0.7127, 0.6164]) tensor([0.5627, 0.1095, 0.0686, 0.2592]) -Greedy action tensor([ 1.7252, -0.3475, -0.5171, 0.3645]) tensor([0.6718, 0.0845, 0.0714, 0.1723]) -Greedy action tensor([ 1.6104, -1.0290, -0.0905, 0.0135]) tensor([0.6866, 0.0490, 0.1253, 0.1391]) -Greedy action tensor([ 1.7933, -0.9540, -0.0374, 0.6889]) tensor([0.6427, 0.0412, 0.1030, 0.2130]) -Greedy action tensor([ 1.7545, -0.8059, -0.4042, 0.3417]) tensor([0.6963, 0.0538, 0.0804, 0.1695]) -Greedy action tensor([ 1.2176, -0.3018, -0.3281, 0.3317]) tensor([0.5422, 0.1187, 0.1156, 0.2236]) -Greedy action tensor([ 1.9696, -1.0178, 0.1291, 0.5426]) tensor([0.6900, 0.0348, 0.1095, 0.1656]) -Greedy action tensor([ 1.6109, -0.3796, -0.2564, 0.2287]) tensor([0.6484, 0.0886, 0.1002, 0.1628]) -Greedy action tensor([ 2.2239, -0.5187, -0.3423, 0.4587]) tensor([0.7620, 0.0491, 0.0585, 0.1304]) -Greedy action tensor([ 1.2627, -0.4595, -0.3800, 0.1958]) tensor([0.5827, 0.1041, 0.1127, 0.2005]) -Greedy action tensor([ 2.0261, -0.7388, -0.5904, 0.8607]) tensor([0.6907, 0.0435, 0.0505, 0.2154]) -Greedy action tensor([ 1.7129, -0.9754, -0.7454, 0.1968]) tensor([0.7282, 0.0495, 0.0623, 0.1599]) -Greedy action tensor([ 1.5675, -0.5756, -0.4998, 0.3824]) tensor([0.6453, 0.0757, 0.0817, 0.1973]) -Greedy action tensor([ 1.9501, 0.1592, -0.7708, -0.1378]) tensor([0.7372, 0.1230, 0.0485, 0.0914]) -Greedy action tensor([ 1.2505, 0.0606, -1.1249, 0.5293]) tensor([0.5309, 0.1615, 0.0494, 0.2581]) -Greedy action tensor([ 1.5125, -0.1762, -0.3934, 0.5537]) tensor([0.5825, 0.1076, 0.0866, 0.2233]) -Greedy action tensor([ 1.2070, -0.3835, -0.1154, 0.2628]) tensor([0.5378, 0.1096, 0.1433, 0.2092]) -Greedy action tensor([ 1.2042, -0.3803, -0.5933, 0.3666]) tensor([0.5545, 0.1137, 0.0919, 0.2400]) -Greedy action tensor([ 1.7788, -0.9779, -0.4469, 0.3923]) tensor([0.7035, 0.0447, 0.0760, 0.1758]) -Greedy action tensor([ 1.1937, -0.2878, -0.3843, 0.2400]) tensor([0.5498, 0.1250, 0.1135, 0.2118]) -Greedy action tensor([ 1.5769, -0.4567, -0.5216, 0.7482]) tensor([0.5917, 0.0774, 0.0726, 0.2583]) -Greedy action tensor([ 1.2567, -0.4864, -0.1471, 0.3723]) tensor([0.5454, 0.0954, 0.1340, 0.2252]) -Greedy action tensor([ 2.1837, -1.1681, -0.2007, 0.3345]) tensor([0.7785, 0.0273, 0.0717, 0.1225]) -Greedy action tensor([ 1.4954, -0.7641, -0.5660, 0.2841]) tensor([0.6538, 0.0683, 0.0832, 0.1947]) -Greedy action tensor([ 1.5480, -0.6352, -0.7224, 0.3371]) tensor([0.6605, 0.0744, 0.0682, 0.1968]) -Greedy action tensor([ 1.3335, -0.4741, -0.7461, 0.1314]) tensor([0.6291, 0.1032, 0.0786, 0.1891]) -Greedy action tensor([ 1.5428, -0.5532, -0.5439, 0.5155]) tensor([0.6231, 0.0766, 0.0773, 0.2230]) -Greedy action tensor([ 1.5303, -0.2693, -0.1038, 0.0302]) tensor([0.6315, 0.1044, 0.1232, 0.1409]) -Greedy action tensor([ 1.5160, -0.8385, -0.1311, 0.1949]) tensor([0.6433, 0.0611, 0.1239, 0.1717]) -Greedy action tensor([ 1.2932, -0.2067, -0.3715, 0.4066]) tensor([0.5481, 0.1223, 0.1037, 0.2259]) -Greedy action tensor([ 1.2851, -0.1056, -0.4235, -0.0549]) tensor([0.5911, 0.1471, 0.1070, 0.1548]) -Greedy action tensor([ 0.6087, -0.3822, -0.0198, -0.1356]) tensor([0.4202, 0.1560, 0.2241, 0.1996]) -Greedy action tensor([ 1.4636, -0.8051, -0.1537, 0.1864]) tensor([0.6326, 0.0654, 0.1255, 0.1764]) -Greedy action tensor([ 1.5185, -0.2784, -0.3707, 0.0723]) tensor([0.6441, 0.1068, 0.0974, 0.1517]) -Greedy action tensor([ 0.9650, -0.2060, -0.3007, 0.4126]) tensor([0.4613, 0.1430, 0.1301, 0.2655]) -Greedy action tensor([ 1.7428, 0.1242, -0.4394, -0.0404]) tensor([0.6761, 0.1340, 0.0763, 0.1137]) -Greedy action tensor([ 1.8966, -0.8475, -1.0783, 0.2834]) tensor([0.7607, 0.0489, 0.0388, 0.1516]) -Greedy action tensor([ 1.3251, -0.4568, -0.4324, 0.0781]) tensor([0.6142, 0.1034, 0.1059, 0.1765]) -Greedy action tensor([ 1.4221, 0.3342, -0.5190, 0.2845]) tensor([0.5552, 0.1871, 0.0797, 0.1780]) -Greedy action tensor([ 1.3666, -0.4321, -1.1132, 0.3666]) tensor([0.6184, 0.1023, 0.0518, 0.2275]) -Greedy action tensor([ 1.5305, -0.2781, -0.6340, 0.4725]) tensor([0.6151, 0.1008, 0.0706, 0.2135]) -Greedy action tensor([ 1.5626, -0.4428, -0.2458, 0.1993]) tensor([0.6434, 0.0866, 0.1055, 0.1646]) -Greedy action tensor([ 1.2250, -0.4277, -0.4410, 0.3619]) tensor([0.5548, 0.1063, 0.1049, 0.2341]) -Greedy action tensor([ 1.9031, -0.2552, -0.2618, 0.5872]) tensor([0.6673, 0.0771, 0.0766, 0.1790]) -Greedy action tensor([ 1.6815, -0.0888, -0.4716, -0.0418]) tensor([0.6827, 0.1162, 0.0793, 0.1218]) -Greedy action tensor([ 1.7306, -0.3928, -0.5386, 0.5935]) tensor([0.6478, 0.0775, 0.0670, 0.2078]) -Greedy action tensor([ 1.1957, -0.4875, 0.1239, 0.2218]) tensor([0.5247, 0.0975, 0.1797, 0.1981]) -Greedy action tensor([ 1.5916, -0.3329, -0.2635, 0.0510]) tensor([0.6593, 0.0962, 0.1031, 0.1413]) -Greedy action tensor([ 1.7021, -0.7942, -0.2037, 0.3629]) tensor([0.6697, 0.0552, 0.0996, 0.1755]) -Greedy action tensor([ 1.2949, -0.0160, -0.5249, 0.4912]) tensor([0.5321, 0.1435, 0.0862, 0.2382]) -Greedy action tensor([ 0.9625, -0.4266, -0.4720, 0.4009]) tensor([0.4859, 0.1211, 0.1158, 0.2771]) -Greedy action tensor([ 1.4904, -0.4701, -0.2900, 0.1825]) tensor([0.6330, 0.0891, 0.1067, 0.1712]) -Greedy action tensor([ 2.1905, -1.5254, -0.1481, 1.0516]) tensor([0.6940, 0.0169, 0.0669, 0.2222]) -Greedy action tensor([ 0.7672, -0.2350, -0.1078, -0.1652]) tensor([0.4592, 0.1686, 0.1914, 0.1807]) -Greedy action tensor([ 1.0552, -0.7752, -0.0488, -0.4728]) tensor([0.5852, 0.0938, 0.1940, 0.1270]) -Greedy action tensor([ 1.2007, -0.5521, -0.2586, -0.0856]) tensor([0.5945, 0.1030, 0.1382, 0.1643]) -Greedy action tensor([ 0.4328, -0.2125, -0.0136, -0.1222]) tensor([0.3652, 0.1915, 0.2337, 0.2096]) -Greedy action tensor([ 0.4662, -0.3819, -0.1604, -0.0011]) tensor([0.3862, 0.1654, 0.2064, 0.2420]) -Greedy action tensor([ 0.6959, -0.3261, 0.0177, -0.2433]) tensor([0.4428, 0.1594, 0.2247, 0.1731]) -Greedy action tensor([ 0.7527, -0.4756, 0.0336, -0.5995]) tensor([0.4905, 0.1436, 0.2390, 0.1269]) -Greedy action tensor([ 0.9072, -0.3451, -0.1169, -0.0859]) tensor([0.4962, 0.1418, 0.1782, 0.1838]) -Greedy action tensor([ 1.1985, -0.8234, -0.1789, -0.4869]) tensor([0.6369, 0.0843, 0.1607, 0.1181]) -Greedy action tensor([ 7.5788e-01, -5.0948e-01, 1.9956e-04, -9.8437e-02]) tensor([0.4598, 0.1295, 0.2155, 0.1953]) -Greedy action tensor([ 0.7795, -0.7611, 0.1005, -0.4637]) tensor([0.4976, 0.1066, 0.2523, 0.1435]) -Greedy action tensor([ 0.8321, -0.4433, -0.1520, -0.2858]) tensor([0.5050, 0.1411, 0.1888, 0.1651]) -Greedy action tensor([ 0.4130, -0.1794, 0.0382, -0.2269]) tensor([0.3613, 0.1998, 0.2484, 0.1905]) -Greedy action tensor([ 1.0769, -1.1916, 0.1063, -0.5162]) tensor([0.5932, 0.0614, 0.2248, 0.1206]) -Greedy action tensor([ 0.5283, -0.2065, -0.0237, -0.6708]) tensor([0.4243, 0.2035, 0.2443, 0.1279]) -Greedy action tensor([ 1.1756, -0.6784, -0.0855, -0.5539]) tensor([0.6183, 0.0968, 0.1752, 0.1097]) -Greedy action tensor([ 0.9364, -0.5991, -0.0788, -0.4060]) tensor([0.5438, 0.1171, 0.1970, 0.1421]) -Greedy action tensor([ 0.6944, 0.3532, -0.1018, -0.0935]) tensor([0.3821, 0.2717, 0.1724, 0.1738]) -Greedy action tensor([ 0.3958, -0.0074, -0.1940, -0.0120]) tensor([0.3463, 0.2314, 0.1920, 0.2303]) -Greedy action tensor([ 0.3648, -0.0627, -0.0395, -0.0510]) tensor([0.3356, 0.2189, 0.2240, 0.2215]) -Greedy action tensor([ 0.5445, -0.1580, -0.0712, -0.0098]) tensor([0.3831, 0.1898, 0.2070, 0.2201]) -Greedy action tensor([ 0.6690, -0.4236, -0.0292, -0.1875]) tensor([0.4430, 0.1485, 0.2204, 0.1881]) -Greedy action tensor([ 1.0957, -0.7438, 0.0335, -0.7056]) tensor([0.5989, 0.0952, 0.2070, 0.0989]) -Greedy action tensor([ 0.8125, -0.8715, -0.0063, -0.3676]) tensor([0.5171, 0.0960, 0.2280, 0.1589]) -Greedy action tensor([ 1.1140, -0.6382, -0.0730, -0.6022]) tensor([0.6030, 0.1046, 0.1840, 0.1084]) -Greedy action tensor([ 0.5329, -0.4631, 0.0086, -0.1046]) tensor([0.4016, 0.1483, 0.2377, 0.2123]) -Greedy action tensor([ 0.4473, -0.1764, 0.0150, -0.2331]) tensor([0.3716, 0.1991, 0.2411, 0.1882]) -Greedy action tensor([ 0.4450, -0.4593, -0.1483, -0.6168]) tensor([0.4342, 0.1758, 0.2399, 0.1502]) -Greedy action tensor([ 0.8247, -0.1559, -0.2470, -0.6327]) tensor([0.5127, 0.1923, 0.1756, 0.1194]) -Greedy action tensor([ 0.5789, -0.3581, 0.0596, -0.2994]) tensor([0.4163, 0.1631, 0.2477, 0.1730]) -Greedy action tensor([ 0.7520, -0.3077, -0.0285, -0.2064]) tensor([0.4570, 0.1584, 0.2094, 0.1752]) -Greedy action tensor([ 0.8509, -0.5010, -0.0689, -0.4147]) tensor([0.5156, 0.1334, 0.2055, 0.1454]) -Greedy action tensor([ 0.8479, -0.6681, -0.0716, -0.4993]) tensor([0.5324, 0.1169, 0.2123, 0.1384]) -Greedy action tensor([0.7416, 0.1938, 0.1016, 0.0358]) tensor([0.3847, 0.2225, 0.2029, 0.1899]) -Greedy action tensor([ 0.8920, -0.6634, 0.0856, -0.6255]) tensor([0.5328, 0.1125, 0.2379, 0.1168]) -Greedy action tensor([ 0.9716, -0.6580, 0.0336, -0.6478]) tensor([0.5601, 0.1098, 0.2192, 0.1109]) -Greedy action tensor([ 0.6857, -0.3569, -0.0381, -0.2317]) tensor([0.4470, 0.1576, 0.2168, 0.1786]) -Greedy action tensor([ 0.9254, -0.6547, 0.0027, -0.3095]) tensor([0.5279, 0.1087, 0.2098, 0.1536]) -Greedy action tensor([ 0.9120, -0.5240, -0.0185, -0.4594]) tensor([0.5302, 0.1261, 0.2091, 0.1345]) -Greedy action tensor([ 0.8551, -0.6726, 0.0454, -0.3811]) tensor([0.5122, 0.1112, 0.2279, 0.1488]) -Greedy action tensor([ 0.9289, -0.7567, 0.0965, -0.2128]) tensor([0.5156, 0.0956, 0.2243, 0.1646]) -Greedy action tensor([ 1.2904, -0.8963, 0.0353, -0.5531]) tensor([0.6429, 0.0722, 0.1832, 0.1017]) -Greedy action tensor([ 0.9461, -0.4420, -0.0967, -0.3843]) tensor([0.5358, 0.1337, 0.1889, 0.1417]) -Greedy action tensor([ 1.0734, -0.6704, 0.0242, -0.4156]) tensor([0.5712, 0.0999, 0.2000, 0.1289]) -Greedy action tensor([ 0.4732, -0.1201, 0.0050, -0.1064]) tensor([0.3651, 0.2017, 0.2286, 0.2045]) -Greedy action tensor([ 0.6205, -0.5462, -0.1313, -0.0434]) tensor([0.4352, 0.1355, 0.2052, 0.2241]) -Greedy action tensor([ 0.5282, -0.3904, -0.1097, -0.1992]) tensor([0.4148, 0.1655, 0.2192, 0.2004]) -Greedy action tensor([ 0.9786, -0.5773, -0.1079, -0.2875]) tensor([0.5464, 0.1153, 0.1843, 0.1540]) -Greedy action tensor([ 0.8467, -0.1977, 0.0135, -0.1184]) tensor([0.4614, 0.1624, 0.2005, 0.1758]) -Greedy action tensor([ 0.8771, -0.2570, -0.0532, -0.0966]) tensor([0.4776, 0.1537, 0.1884, 0.1804]) -Greedy action tensor([ 0.9876, -0.6409, -0.0762, -0.3910]) tensor([0.5576, 0.1094, 0.1925, 0.1405]) -Greedy action tensor([ 1.1015, -0.4051, -0.2047, -0.6369]) tensor([0.5994, 0.1329, 0.1623, 0.1054]) -Greedy action tensor([ 0.2820, -0.0218, -0.0309, -0.1101]) tensor([0.3180, 0.2347, 0.2325, 0.2148]) -Greedy action tensor([ 0.7667, -0.4036, -0.0941, -0.2589]) tensor([0.4781, 0.1483, 0.2022, 0.1714]) -Greedy action tensor([ 0.9895, -0.6752, -0.0633, -0.2774]) tensor([0.5495, 0.1040, 0.1917, 0.1548]) -Greedy action tensor([ 0.2625, -0.0849, -0.1100, -0.3547]) tensor([0.3407, 0.2407, 0.2348, 0.1838]) -Greedy action tensor([ 1.1069, -0.8507, 0.1712, -0.7434]) tensor([0.5915, 0.0835, 0.2320, 0.0930]) -Greedy action tensor([ 0.9023, -0.5160, -0.0256, -0.4151]) tensor([0.5248, 0.1271, 0.2075, 0.1406]) -Greedy action tensor([ 0.7579, -0.3362, -0.0610, -0.1736]) tensor([0.4609, 0.1543, 0.2032, 0.1816]) -Greedy action tensor([ 0.7980, -0.6249, 0.0635, -0.3868]) tensor([0.4934, 0.1189, 0.2367, 0.1509]) -Greedy action tensor([ 0.6231, -0.4346, -0.0981, -0.1818]) tensor([0.4385, 0.1523, 0.2132, 0.1961]) -Greedy action tensor([ 0.9845, -0.5323, -0.0889, -0.3696]) tensor([0.5496, 0.1206, 0.1879, 0.1419]) -Greedy action tensor([ 0.7612, -0.5216, -0.0517, -0.2832]) tensor([0.4825, 0.1338, 0.2140, 0.1698]) -Greedy action tensor([ 0.7516, -0.7517, -0.1346, -0.3598]) tensor([0.5093, 0.1133, 0.2099, 0.1676]) -Greedy action tensor([ 0.9958, -0.6598, 0.0876, -0.5672]) tensor([0.5544, 0.1059, 0.2236, 0.1162]) -Greedy action tensor([ 0.5885, -0.1398, -0.0573, -0.2241]) tensor([0.4081, 0.1970, 0.2139, 0.1810]) -Greedy action tensor([ 0.8399, -0.2506, -0.0127, -0.1270]) tensor([0.4667, 0.1568, 0.1990, 0.1775]) -Greedy action tensor([ 0.4026, 0.1009, -0.1950, 0.0204]) tensor([0.3365, 0.2488, 0.1851, 0.2296]) -Greedy action tensor([ 0.5636, -0.3945, 0.0857, -0.3046]) tensor([0.4127, 0.1583, 0.2559, 0.1732]) -Greedy action tensor([ 0.7587, -0.5590, 0.0784, -0.1867]) tensor([0.4624, 0.1238, 0.2342, 0.1796]) -Greedy action tensor([ 0.9387, -0.4522, 0.1147, -0.2418]) tensor([0.5013, 0.1248, 0.2199, 0.1540]) -Greedy action tensor([ 0.5794, 0.1802, 0.1539, -0.1495]) tensor([0.3563, 0.2390, 0.2328, 0.1719]) -Greedy action tensor([ 0.3581, 0.0411, 0.1354, -0.2189]) tensor([0.3236, 0.2357, 0.2590, 0.1817]) -Greedy action tensor([ 0.7924, -0.2590, 0.0420, -0.1292]) tensor([0.4505, 0.1574, 0.2127, 0.1793]) -Greedy action tensor([ 1.4328, -1.0360, 0.0737, -1.0672]) tensor([0.7024, 0.0595, 0.1804, 0.0577]) -Greedy action tensor([ 0.9451, -0.5578, -0.1096, -0.4890]) tensor([0.5528, 0.1230, 0.1925, 0.1317]) -Greedy action tensor([ 1.0795, -0.7161, -0.0504, -0.4250]) tensor([0.5844, 0.0970, 0.1888, 0.1298]) -Greedy action tensor([ 0.6312, -0.1551, -0.0081, -0.3811]) tensor([0.4262, 0.1941, 0.2249, 0.1549]) -Greedy action tensor([ 0.9765, -0.6804, -0.1781, -0.2297]) tensor([0.5540, 0.1057, 0.1746, 0.1658]) -Greedy action tensor([ 0.7767, -0.5903, -0.0656, -0.3124]) tensor([0.4945, 0.1260, 0.2130, 0.1664]) -Greedy action tensor([ 0.8367, -0.3492, -0.1431, 0.0199]) tensor([0.4711, 0.1439, 0.1768, 0.2082]) -Greedy action tensor([ 0.7145, -0.4008, -0.5006, 0.7169]) tensor([0.3807, 0.1248, 0.1129, 0.3816]) -Greedy action tensor([-1.6641e+00, -1.7402e-02, 8.5175e-04, 1.7375e-01]) tensor([0.0563, 0.2922, 0.2976, 0.3538]) -Greedy action tensor([-0.0026, -0.3908, 1.3077, -0.2554]) tensor([0.1623, 0.1101, 0.6016, 0.1260]) -Greedy action tensor([-1.0376, -1.2024, -1.1168, -0.3000]) tensor([0.2056, 0.1744, 0.1900, 0.4300]) -Greedy action tensor([ 0.2986, -1.6371, 0.2299, 0.4513]) tensor([0.3084, 0.0445, 0.2879, 0.3592]) -Greedy action tensor([1.4737, 0.1472, 0.2528, 1.0378]) tensor([0.4531, 0.1203, 0.1336, 0.2930]) -Greedy action tensor([ 0.8107, -0.7827, -0.0305, 0.0204]) tensor([0.4789, 0.0973, 0.2065, 0.2173]) -Greedy action tensor([ 0.8055, -1.5781, 1.4671, -0.2330]) tensor([0.2955, 0.0273, 0.5726, 0.1046]) -Greedy action tensor([ 0.4294, -1.2843, -0.9409, -0.1795]) tensor([0.5055, 0.0911, 0.1284, 0.2750]) -Greedy action tensor([-0.3309, -0.6763, 0.0599, -0.2113]) tensor([0.2318, 0.1641, 0.3427, 0.2613]) -Greedy action tensor([ 0.4573, 0.0319, -0.4533, 0.0067]) tensor([0.3713, 0.2427, 0.1494, 0.2366]) -Greedy action tensor([ 0.1464, 0.1446, 0.3506, -0.2318]) tensor([0.2558, 0.2553, 0.3137, 0.1752]) -Greedy action tensor([ 0.4929, 0.0721, -1.1212, 0.4652]) tensor([0.3536, 0.2321, 0.0704, 0.3439]) -Greedy action tensor([-0.7767, -0.1430, -0.3227, -0.6137]) tensor([0.1774, 0.3344, 0.2794, 0.2088]) -Greedy action tensor([ 0.1247, -0.2618, 1.6946, -0.8415]) tensor([0.1456, 0.0990, 0.7000, 0.0554]) -Greedy action tensor([-0.9125, -0.3852, 0.5756, -0.3016]) tensor([0.1115, 0.1890, 0.4940, 0.2055]) -Greedy action tensor([ 1.3177, 0.3148, 1.8165, -0.1726]) tensor([0.3088, 0.1133, 0.5084, 0.0696]) -Greedy action tensor([ 0.8396, 0.4343, 0.5142, -0.0025]) tensor([0.3546, 0.2365, 0.2561, 0.1528]) -Greedy action tensor([ 0.6106, -1.8251, -0.2141, 0.1976]) tensor([0.4571, 0.0400, 0.2004, 0.3025]) -Greedy action tensor([-0.6702, -0.7053, 0.2788, -1.1506]) tensor([0.1935, 0.1869, 0.4999, 0.1197]) -Greedy action tensor([ 0.5081, -0.3740, -0.0293, -0.3979]) tensor([0.4163, 0.1723, 0.2432, 0.1682]) -Greedy action tensor([ 0.4749, -0.6875, -0.6573, 0.4967]) tensor([0.3763, 0.1177, 0.1213, 0.3847]) -Greedy action tensor([ 0.8085, -1.2366, -0.0577, 0.6554]) tensor([0.4153, 0.0537, 0.1746, 0.3563]) -Greedy action tensor([-0.5552, 0.6989, 1.1255, -0.4217]) tensor([0.0908, 0.3181, 0.4874, 0.1037]) -Greedy action tensor([ 0.1855, -0.0349, -0.2068, -0.2015]) tensor([0.3168, 0.2541, 0.2140, 0.2151]) -Greedy action tensor([-0.3088, -0.2639, 1.3335, -0.9946]) tensor([0.1296, 0.1355, 0.6696, 0.0653]) -Greedy action tensor([ 0.4947, -1.7923, -0.7252, -0.2951]) tensor([0.5403, 0.0549, 0.1595, 0.2453]) -Greedy action tensor([ 0.8836, -1.1495, -0.2135, 1.2237]) tensor([0.3485, 0.0456, 0.1163, 0.4896]) -Greedy action tensor([ 0.0959, -0.4301, -1.2284, 0.4707]) tensor([0.3020, 0.1784, 0.0803, 0.4393]) -Greedy action tensor([ 0.3039, -1.2752, 0.6870, 0.4862]) tensor([0.2582, 0.0532, 0.3787, 0.3098]) -Greedy action tensor([ 0.1744, -0.6744, 0.4895, -0.0679]) tensor([0.2791, 0.1194, 0.3825, 0.2190]) -Greedy action tensor([ 0.7456, -1.1336, -0.2392, 0.2619]) tensor([0.4667, 0.0713, 0.1743, 0.2877]) -Greedy action tensor([ 0.6536, -1.5280, 0.1201, -0.6360]) tensor([0.5064, 0.0572, 0.2970, 0.1394]) -Greedy action tensor([ 1.1674, -1.3838, 1.4400, 0.4651]) tensor([0.3464, 0.0270, 0.4550, 0.1716]) -Greedy action tensor([-0.4587, -1.1054, -0.5847, -0.6809]) tensor([0.3119, 0.1634, 0.2750, 0.2498]) -Greedy action tensor([-0.7053, -0.9390, 0.1892, -1.1247]) tensor([0.2043, 0.1617, 0.4997, 0.1343]) -Greedy action tensor([1.2834, 0.0920, 0.0053, 0.8604]) tensor([0.4470, 0.1358, 0.1245, 0.2928]) -Greedy action tensor([ 0.3096, -0.8084, 0.2286, -0.1904]) tensor([0.3502, 0.1145, 0.3229, 0.2124]) -Greedy action tensor([ 0.3502, -1.6757, -0.2942, 1.0903]) tensor([0.2665, 0.0351, 0.1399, 0.5585]) -Greedy action tensor([-0.7314, -1.8410, 0.0189, -0.3007]) tensor([0.2006, 0.0661, 0.4247, 0.3086]) -Greedy action tensor([ 1.3779, -1.0385, 0.5335, 0.5268]) tensor([0.5139, 0.0459, 0.2209, 0.2194]) -Greedy action tensor([ 1.5342, -0.2680, 1.8804, 0.1020]) tensor([0.3549, 0.0585, 0.5018, 0.0848]) -Greedy action tensor([ 0.2853, 0.2301, 0.0165, -0.7272]) tensor([0.3253, 0.3078, 0.2487, 0.1182]) -Greedy action tensor([ 0.2868, -0.8792, -0.3640, 0.4333]) tensor([0.3343, 0.1042, 0.1744, 0.3871]) -Greedy action tensor([-0.5862, -1.3688, 1.1212, -0.8836]) tensor([0.1296, 0.0593, 0.7148, 0.0963]) -Greedy action tensor([ 0.9287, -1.5289, -0.1152, 0.1958]) tensor([0.5213, 0.0446, 0.1835, 0.2505]) -Greedy action tensor([ 0.4548, -0.3247, -1.1029, -0.2940]) tensor([0.4668, 0.2141, 0.0983, 0.2208]) -Greedy action tensor([-0.2328, -1.3839, -0.1702, -0.9919]) tensor([0.3510, 0.1110, 0.3737, 0.1643]) -Greedy action tensor([ 0.8805, -1.0831, -1.0784, 0.1868]) tensor([0.5614, 0.0788, 0.0792, 0.2806]) -Greedy action tensor([-0.2443, -0.1463, -0.3283, -0.6355]) tensor([0.2704, 0.2982, 0.2486, 0.1828]) -Greedy action tensor([0.6243, 0.4232, 0.9424, 1.5379]) tensor([0.1759, 0.1438, 0.2418, 0.4385]) -Greedy action tensor([-0.5739, -1.0926, -0.3393, 0.2148]) tensor([0.1976, 0.1176, 0.2499, 0.4349]) -Greedy action tensor([ 0.1919, -0.0998, 0.4214, -1.3870]) tensor([0.3114, 0.2326, 0.3918, 0.0642]) -Greedy action tensor([ 0.1537, 0.0234, 0.3253, -0.2294]) tensor([0.2669, 0.2343, 0.3168, 0.1820]) -Greedy action tensor([ 0.0235, 0.0615, -0.4308, -0.7836]) tensor([0.3205, 0.3330, 0.2035, 0.1430]) -Greedy action tensor([ 1.0541, -0.5904, -0.4011, 0.9694]) tensor([0.4264, 0.0823, 0.0995, 0.3918]) -Greedy action tensor([ 0.1602, -0.8626, 0.4652, -0.0938]) tensor([0.2864, 0.1030, 0.3885, 0.2221]) -Greedy action tensor([ 0.0538, -0.3423, -0.1473, -0.0017]) tensor([0.2910, 0.1958, 0.2380, 0.2753]) -Greedy action tensor([-0.3386, 0.6993, -0.4025, -0.1697]) tensor([0.1682, 0.4749, 0.1578, 0.1991]) -Greedy action tensor([ 0.5392, -1.6878, -0.0886, 0.0982]) tensor([0.4376, 0.0472, 0.2336, 0.2816]) -Greedy action tensor([ 1.0745, -0.1630, -0.9544, -0.1146]) tensor([0.5793, 0.1681, 0.0762, 0.1764]) -Greedy action tensor([-0.9741, -0.5631, -0.7787, -0.8051]) tensor([0.2037, 0.3073, 0.2477, 0.2413]) -Greedy action tensor([-0.1488, -0.6723, 0.5393, -0.2463]) tensor([0.2227, 0.1320, 0.4433, 0.2021]) -Greedy action tensor([ 0.8205, -1.1765, 1.0693, -0.4133]) tensor([0.3691, 0.0501, 0.4733, 0.1075]) -Greedy action tensor([ 0.9640, -1.8804, -0.0594, -0.9742]) tensor([0.6404, 0.0373, 0.2301, 0.0922]) -Greedy action tensor([ 0.5329, -1.2429, -0.6682, 0.3869]) tensor([0.4284, 0.0725, 0.1289, 0.3702]) -Greedy action tensor([-0.2104, -0.6625, 0.5651, -1.2091]) tensor([0.2394, 0.1524, 0.5200, 0.0882]) -Greedy action tensor([-0.0300, 0.1073, -0.2441, -0.8581]) tensor([0.2949, 0.3383, 0.2380, 0.1288]) -Greedy action tensor([-1.4198, 0.5508, 0.9865, -1.8329]) tensor([0.0502, 0.3600, 0.5566, 0.0332]) -Greedy action tensor([ 0.2866, -0.8243, -0.2938, 1.4824]) tensor([0.1925, 0.0634, 0.1077, 0.6364]) -Greedy action tensor([-0.2018, -0.4219, 0.6441, -0.4337]) tensor([0.2030, 0.1629, 0.4731, 0.1610]) -Greedy action tensor([ 0.5540, -0.8894, -0.3714, -0.0309]) tensor([0.4567, 0.1078, 0.1810, 0.2544]) -Greedy action tensor([ 0.3043, -1.0149, -1.1684, -0.1664]) tensor([0.4714, 0.1260, 0.1081, 0.2944]) -Greedy action tensor([ 0.0455, 0.3527, 0.1116, -0.5993]) tensor([0.2530, 0.3440, 0.2703, 0.1328]) -Greedy action tensor([ 0.2820, -0.1277, -0.7276, 0.2126]) tensor([0.3377, 0.2242, 0.1230, 0.3151]) -Greedy action tensor([ 0.3137, 0.2634, -0.8769, -0.2892]) tensor([0.3569, 0.3393, 0.1085, 0.1953]) -Greedy action tensor([1.4490, 0.5072, 0.0447, 0.0699]) tensor([0.5299, 0.2066, 0.1301, 0.1334]) -Greedy action tensor([-1.4494, 0.7256, -0.6473, -0.7438]) tensor([0.0711, 0.6261, 0.1587, 0.1441]) -Greedy action tensor([ 0.4346, -0.1577, -0.2765, 0.2988]) tensor([0.3428, 0.1896, 0.1684, 0.2993]) -Greedy action tensor([-1.3502, -1.7755, -0.6825, -0.3880]) tensor([0.1608, 0.1051, 0.3134, 0.4208]) -Greedy action tensor([ 0.5322, -0.8953, -0.9033, 0.3891]) tensor([0.4265, 0.1023, 0.1015, 0.3697]) -Greedy action tensor([-1.1626, 0.1767, 0.2188, 0.0902]) tensor([0.0813, 0.3104, 0.3237, 0.2846]) -Greedy action tensor([-1.4312, 0.5509, 0.3580, -0.0706]) tensor([0.0551, 0.4001, 0.3299, 0.2149]) -Greedy action tensor([-1.2401, -0.4309, 0.3593, 0.0348]) tensor([0.0849, 0.1908, 0.4204, 0.3039]) -Greedy action tensor([-1.7153, -0.3050, 0.6227, -0.0602]) tensor([0.0483, 0.1980, 0.5007, 0.2529]) -Greedy action tensor([-1.3785, -0.4612, 0.3752, 0.2293]) tensor([0.0701, 0.1754, 0.4048, 0.3498]) -Greedy action tensor([-1.5886, -0.4154, 0.5348, -0.1677]) tensor([0.0598, 0.1932, 0.4996, 0.2475]) -Greedy action tensor([-1.8957, -0.3892, 0.6528, -0.1390]) tensor([0.0415, 0.1872, 0.5308, 0.2405]) -Greedy action tensor([-0.8428, 0.2138, 0.1908, 0.5861]) tensor([0.0921, 0.2648, 0.2588, 0.3843]) -Greedy action tensor([-1.8839, -0.4501, 0.6335, -0.1504]) tensor([0.0430, 0.1804, 0.5332, 0.2434]) -Greedy action tensor([-0.5591, 0.5946, 0.0469, 0.7355]) tensor([0.1036, 0.3284, 0.1899, 0.3781]) -Greedy action tensor([-1.3408, -0.3268, 0.3118, 0.1120]) tensor([0.0755, 0.2080, 0.3939, 0.3226]) -Greedy action tensor([-1.9090, -0.3982, 0.6512, -0.1533]) tensor([0.0412, 0.1868, 0.5334, 0.2386]) -Greedy action tensor([-1.8043, -0.3865, 0.6436, -0.1103]) tensor([0.0452, 0.1865, 0.5225, 0.2458]) -Greedy action tensor([-1.4472, -0.0328, 0.3940, -0.0486]) tensor([0.0646, 0.2660, 0.4076, 0.2618]) -Greedy action tensor([-1.2439, -0.0730, 0.6878, 0.5017]) tensor([0.0593, 0.1913, 0.4094, 0.3399]) -Greedy action tensor([-1.7683, -0.2515, 0.5544, -0.1152]) tensor([0.0477, 0.2172, 0.4862, 0.2489]) -Greedy action tensor([-1.5950, -0.4680, 0.5019, 0.0055]) tensor([0.0582, 0.1796, 0.4738, 0.2884]) -Greedy action tensor([-1.4786, -0.2838, 0.4420, -0.0731]) tensor([0.0658, 0.2172, 0.4489, 0.2682]) -Greedy action tensor([-1.3252, 0.4884, 0.2637, 0.2301]) tensor([0.0596, 0.3657, 0.2921, 0.2825]) -Greedy action tensor([-1.5975, -0.3993, 0.4632, 0.0086]) tensor([0.0583, 0.1933, 0.4578, 0.2906]) -Greedy action tensor([-0.6875, 0.4681, 0.0621, -0.0253]) tensor([0.1215, 0.3859, 0.2571, 0.2356]) -Greedy action tensor([-1.8709, -0.4320, 0.6291, -0.1445]) tensor([0.0434, 0.1832, 0.5292, 0.2442]) -Greedy action tensor([-1.8904, -0.4232, 0.6367, -0.1567]) tensor([0.0425, 0.1844, 0.5323, 0.2407]) -Greedy action tensor([-1.9119, -0.4221, 0.6543, -0.1623]) tensor([0.0413, 0.1833, 0.5378, 0.2377]) -Greedy action tensor([-1.8718, -0.4392, 0.6326, -0.1459]) tensor([0.0434, 0.1818, 0.5310, 0.2438]) -Greedy action tensor([-1.6688, -0.3145, 0.5905, -0.0363]) tensor([0.0511, 0.1980, 0.4894, 0.2615]) -Greedy action tensor([-1.9179, -0.4441, 0.6567, -0.1658]) tensor([0.0412, 0.1800, 0.5411, 0.2377]) -Greedy action tensor([-1.8349, -0.4794, 0.6273, -0.1140]) tensor([0.0450, 0.1747, 0.5284, 0.2518]) -Greedy action tensor([-1.9156, -0.3865, 0.6506, -0.1638]) tensor([0.0410, 0.1891, 0.5336, 0.2363]) -Greedy action tensor([-1.9257, -0.4505, 0.6583, -0.1678]) tensor([0.0409, 0.1790, 0.5425, 0.2375]) -Greedy action tensor([-1.8124, -0.3726, 0.6005, -0.1175]) tensor([0.0458, 0.1933, 0.5114, 0.2495]) -Greedy action tensor([-1.9152, -0.3627, 0.6463, -0.1560]) tensor([0.0408, 0.1929, 0.5291, 0.2372]) -Greedy action tensor([-1.8827, -0.4512, 0.6417, -0.1497]) tensor([0.0429, 0.1794, 0.5352, 0.2425]) -Greedy action tensor([-1.8304, -0.4154, 0.6027, -0.1241]) tensor([0.0454, 0.1870, 0.5175, 0.2502]) -Greedy action tensor([-1.9191, -0.4395, 0.6566, -0.1657]) tensor([0.0411, 0.1807, 0.5406, 0.2376]) -Greedy action tensor([-1.9301, -0.4379, 0.6615, -0.1721]) tensor([0.0407, 0.1808, 0.5428, 0.2358]) -Greedy action tensor([-1.9321, -0.4463, 0.6619, -0.1737]) tensor([0.0406, 0.1796, 0.5439, 0.2359]) -Greedy action tensor([-1.4838, -0.3182, 0.7254, 0.1816]) tensor([0.0538, 0.1724, 0.4896, 0.2842]) -Greedy action tensor([-1.1624, 0.3010, 0.4701, 0.4011]) tensor([0.0657, 0.2840, 0.3363, 0.3139]) -Greedy action tensor([-1.5284, -0.2213, 0.3318, 0.0972]) tensor([0.0617, 0.2281, 0.3966, 0.3136]) -Greedy action tensor([-1.2640, -0.5161, 0.3511, 0.1255]) tensor([0.0823, 0.1738, 0.4137, 0.3302]) -Greedy action tensor([-1.6862, 0.1741, 0.4404, -0.0726]) tensor([0.0480, 0.3084, 0.4026, 0.2410]) -Greedy action tensor([-1.2708, 0.5507, 0.1647, 0.2879]) tensor([0.0620, 0.3831, 0.2604, 0.2946]) -Greedy action tensor([-1.9395, -0.4436, 0.6661, -0.1757]) tensor([0.0403, 0.1797, 0.5451, 0.2349]) -Greedy action tensor([-1.7599, -0.4759, 0.5737, -0.0780]) tensor([0.0493, 0.1779, 0.5081, 0.2648]) -Greedy action tensor([-1.8426, -0.3657, 0.6134, -0.1306]) tensor([0.0443, 0.1940, 0.5164, 0.2454]) -Greedy action tensor([-1.9193, -0.4599, 0.6645, -0.1568]) tensor([0.0410, 0.1765, 0.5434, 0.2390]) -Greedy action tensor([-1.6520, 0.1045, 0.4576, -0.1610]) tensor([0.0513, 0.2974, 0.4233, 0.2280]) -Greedy action tensor([-1.2711, -0.5226, 0.3219, 0.1839]) tensor([0.0812, 0.1716, 0.3993, 0.3479]) -Greedy action tensor([-1.9483, -0.4518, 0.6685, -0.1829]) tensor([0.0400, 0.1786, 0.5476, 0.2337]) -Greedy action tensor([-1.1114, -0.6260, 0.2701, 0.1475]) tensor([0.0987, 0.1604, 0.3931, 0.3477]) -Greedy action tensor([-1.4152, -0.3014, 0.3977, 0.1340]) tensor([0.0672, 0.2047, 0.4118, 0.3163]) -Greedy action tensor([-1.7803, -0.2290, 0.5711, -0.1248]) tensor([0.0466, 0.2199, 0.4894, 0.2440]) -Greedy action tensor([-0.6390, 0.8107, 0.0560, 0.0695]) tensor([0.1076, 0.4584, 0.2155, 0.2185]) -Greedy action tensor([-1.9277, -0.3947, 0.6530, -0.1712]) tensor([0.0406, 0.1881, 0.5362, 0.2352]) -Greedy action tensor([-1.8960, -0.4184, 0.6437, -0.1503]) tensor([0.0420, 0.1842, 0.5329, 0.2409]) -Greedy action tensor([-1.8951, -0.4412, 0.6449, -0.1549]) tensor([0.0423, 0.1809, 0.5359, 0.2409]) -Greedy action tensor([-1.1582, -0.4797, 0.7602, 0.8872]) tensor([0.0571, 0.1125, 0.3889, 0.4415]) -Greedy action tensor([-1.8567, -0.3845, 0.6493, -0.1008]) tensor([0.0427, 0.1863, 0.5237, 0.2473]) -Greedy action tensor([-1.5677, -0.0718, 0.5555, 0.0210]) tensor([0.0534, 0.2385, 0.4465, 0.2616]) -Greedy action tensor([-1.8119, -0.4604, 0.6071, -0.1211]) tensor([0.0465, 0.1795, 0.5220, 0.2520]) -Greedy action tensor([-1.9491, -0.4508, 0.6675, -0.1834]) tensor([0.0400, 0.1789, 0.5474, 0.2337]) -Greedy action tensor([-1.7046, -0.4702, 0.5524, -0.1139]) tensor([0.0529, 0.1818, 0.5056, 0.2597]) -Greedy action tensor([-1.7224, -0.3173, 0.6300, -0.0179]) tensor([0.0474, 0.1933, 0.4985, 0.2608]) -Greedy action tensor([-1.8143, -0.1606, 0.6630, -0.3467]) tensor([0.0445, 0.2326, 0.5299, 0.1931]) -Greedy action tensor([-1.6307, -0.3949, 0.4950, -0.0132]) tensor([0.0560, 0.1927, 0.4691, 0.2822]) -Greedy action tensor([-1.9103, -0.4191, 0.6614, -0.1342]) tensor([0.0409, 0.1818, 0.5356, 0.2417]) -Greedy action tensor([-1.9027, -0.4635, 0.6464, -0.1631]) tensor([0.0422, 0.1779, 0.5397, 0.2402]) -Greedy action tensor([-1.9314, -0.4342, 0.6620, -0.1738]) tensor([0.0406, 0.1814, 0.5427, 0.2353]) -Greedy action tensor([-0.8579, 0.7322, -0.0303, -0.5548]) tensor([0.1047, 0.5138, 0.2397, 0.1418]) -Greedy action tensor([-1.8541, -0.3369, 0.5804, -0.1419]) tensor([0.0444, 0.2025, 0.5069, 0.2461]) -Greedy action tensor([-1.9383, -0.4455, 0.6639, -0.1772]) tensor([0.0404, 0.1797, 0.5449, 0.2350]) -Greedy action tensor([-1.9373, -0.3841, 0.6518, -0.1767]) tensor([0.0402, 0.1901, 0.5357, 0.2340]) -Greedy action tensor([-1.7021, 0.1394, 0.5964, -0.5101]) tensor([0.0486, 0.3067, 0.4844, 0.1602]) -Greedy action tensor([-1.5913, -0.0265, 0.5775, 0.0262]) tensor([0.0511, 0.2443, 0.4470, 0.2576]) -Greedy action tensor([-1.9306, -0.4384, 0.6613, -0.1724]) tensor([0.0406, 0.1807, 0.5428, 0.2358]) -Greedy action tensor([0.7983, 0.5951, 0.5668, 1.3894]) tensor([0.2265, 0.1848, 0.1797, 0.4090]) -Greedy action tensor([-1.9145, -0.4531, 0.6575, -0.1657]) tensor([0.0414, 0.1785, 0.5421, 0.2380]) -Greedy action tensor([-1.8947, -0.4459, 0.6457, -0.1574]) tensor([0.0423, 0.1802, 0.5369, 0.2405]) -Greedy action tensor([-0.8048, 0.7213, 0.0965, -0.0238]) tensor([0.0976, 0.4489, 0.2404, 0.2131]) -Greedy action tensor([-1.5396, 0.1430, 0.3985, -0.0746]) tensor([0.0566, 0.3047, 0.3934, 0.2452]) -Greedy action tensor([ 1.9559, 0.1249, -0.2876, 0.5473]) tensor([0.6619, 0.1061, 0.0702, 0.1618]) -Greedy action tensor([ 1.4906, -0.1024, -0.2489, 0.1385]) tensor([0.6106, 0.1242, 0.1072, 0.1580]) -Greedy action tensor([ 1.2640, -0.5890, -0.4758, 0.1851]) tensor([0.5980, 0.0937, 0.1050, 0.2033]) -Greedy action tensor([ 2.5019, -0.8426, -0.3469, 0.5741]) tensor([0.8073, 0.0285, 0.0468, 0.1174]) -Greedy action tensor([ 1.2891, 0.0201, -1.1191, 0.3334]) tensor([0.5696, 0.1601, 0.0513, 0.2190]) -Greedy action tensor([ 1.8421, -0.6809, -0.5142, 0.3550]) tensor([0.7138, 0.0573, 0.0676, 0.1613]) -Greedy action tensor([ 1.4679, -0.6316, -0.2211, 0.1105]) tensor([0.6391, 0.0783, 0.1181, 0.1645]) -Greedy action tensor([ 1.4141, 0.0548, -0.4672, 0.0635]) tensor([0.5994, 0.1540, 0.0913, 0.1553]) -Greedy action tensor([ 1.2631, -0.4162, -0.6456, 0.5207]) tensor([0.5523, 0.1030, 0.0819, 0.2629]) -Greedy action tensor([ 1.5090, -0.5997, -0.6436, 0.4248]) tensor([0.6346, 0.0770, 0.0737, 0.2146]) -Greedy action tensor([ 1.0614, -0.0192, -0.3725, 0.2151]) tensor([0.4983, 0.1691, 0.1188, 0.2138]) -Greedy action tensor([ 1.2646, -0.5360, -0.5004, 0.0398]) tensor([0.6134, 0.1013, 0.1050, 0.1802]) -Greedy action tensor([ 2.2705, -0.8988, -0.2885, 0.5743]) tensor([0.7676, 0.0323, 0.0594, 0.1408]) -Greedy action tensor([ 1.0017, -0.2780, -0.5654, 0.5060]) tensor([0.4771, 0.1327, 0.0996, 0.2906]) -Greedy action tensor([ 0.9108, -0.1848, -0.4065, 0.3862]) tensor([0.4558, 0.1524, 0.1221, 0.2697]) -Greedy action tensor([ 1.5696, -0.5345, -0.5271, 0.5242]) tensor([0.6264, 0.0764, 0.0770, 0.2202]) -Greedy action tensor([ 1.5226, -0.4923, -0.7652, 0.7235]) tensor([0.5936, 0.0791, 0.0602, 0.2670]) -Greedy action tensor([ 1.5497, -0.5062, -0.2703, 0.3370]) tensor([0.6300, 0.0806, 0.1021, 0.1873]) -Greedy action tensor([ 1.8316, 0.4841, -0.5734, 0.4196]) tensor([0.6274, 0.1631, 0.0566, 0.1529]) -Greedy action tensor([ 1.4081, 0.2158, -0.6323, 0.0161]) tensor([0.5945, 0.1804, 0.0773, 0.1478]) -Greedy action tensor([ 1.2773, -0.5782, -0.2326, 0.2439]) tensor([0.5770, 0.0902, 0.1275, 0.2053]) -Greedy action tensor([ 1.5972, 0.1681, -0.1611, -0.0256]) tensor([0.6214, 0.1488, 0.1071, 0.1226]) -Greedy action tensor([ 1.6182, -0.7805, -0.2421, 0.6877]) tensor([0.6094, 0.0554, 0.0948, 0.2403]) -Greedy action tensor([ 1.5797, -0.5394, -0.3692, 0.1819]) tensor([0.6624, 0.0796, 0.0943, 0.1637]) -Greedy action tensor([ 1.3487, -0.1438, -0.5033, 0.4397]) tensor([0.5603, 0.1260, 0.0879, 0.2258]) -Greedy action tensor([ 1.3361, -0.2177, -0.5791, 0.4759]) tensor([0.5612, 0.1187, 0.0827, 0.2374]) -Greedy action tensor([ 1.9296, -1.1008, -0.0365, 0.1319]) tensor([0.7386, 0.0357, 0.1034, 0.1224]) -Greedy action tensor([ 1.6926, -0.7845, -0.1790, 0.1391]) tensor([0.6899, 0.0579, 0.1062, 0.1459]) -Greedy action tensor([ 1.0707, -0.3533, 0.0727, 0.3989]) tensor([0.4717, 0.1136, 0.1739, 0.2409]) -Greedy action tensor([ 2.3707, -1.4266, -0.0668, 0.5299]) tensor([0.7883, 0.0177, 0.0689, 0.1251]) -Greedy action tensor([ 1.5017, -0.6353, -0.3604, 0.5637]) tensor([0.6007, 0.0709, 0.0933, 0.2351]) -Greedy action tensor([ 1.0204, -0.2387, -0.4300, 0.1503]) tensor([0.5162, 0.1466, 0.1210, 0.2162]) -Greedy action tensor([ 1.5543, -0.2502, -0.5124, -0.0379]) tensor([0.6691, 0.1101, 0.0847, 0.1361]) -Greedy action tensor([ 1.6686, -0.5934, -0.1874, 0.0885]) tensor([0.6819, 0.0710, 0.1066, 0.1405]) -Greedy action tensor([ 1.2943, -0.2138, -0.9277, 0.1308]) tensor([0.6090, 0.1348, 0.0660, 0.1902]) -Greedy action tensor([ 1.7280, -1.2020, -0.6490, 0.1572]) tensor([0.7385, 0.0394, 0.0686, 0.1535]) -Greedy action tensor([ 1.9102, -0.1143, -0.4684, 0.2502]) tensor([0.7068, 0.0933, 0.0655, 0.1344]) -Greedy action tensor([ 1.5680, -0.3134, -1.1046, 0.3866]) tensor([0.6543, 0.0997, 0.0452, 0.2008]) -Greedy action tensor([ 1.1201, -0.8189, -0.1298, 0.3957]) tensor([0.5222, 0.0751, 0.1496, 0.2531]) -Greedy action tensor([ 1.0723, -0.0676, -0.0978, 0.3425]) tensor([0.4734, 0.1514, 0.1469, 0.2282]) -Greedy action tensor([ 1.4174, -0.6140, -0.1864, -0.0073]) tensor([0.6358, 0.0834, 0.1279, 0.1530]) -Greedy action tensor([ 1.6656, -0.2791, -0.4803, 0.2385]) tensor([0.6667, 0.0954, 0.0780, 0.1600]) -Greedy action tensor([ 1.5874, -0.1838, -0.5390, 0.3107]) tensor([0.6376, 0.1085, 0.0760, 0.1779]) -Greedy action tensor([ 1.7356, 0.3811, -0.5361, 0.5327]) tensor([0.6019, 0.1553, 0.0621, 0.1807]) -Greedy action tensor([ 1.3905, 0.0234, -1.0589, 0.4191]) tensor([0.5815, 0.1482, 0.0502, 0.2201]) -Greedy action tensor([ 1.9381, -0.3705, -0.5471, 0.1737]) tensor([0.7385, 0.0734, 0.0615, 0.1265]) -Greedy action tensor([ 1.1521, -0.1757, -0.2711, 0.1671]) tensor([0.5321, 0.1410, 0.1282, 0.1987]) -Greedy action tensor([ 1.8354, -0.4515, -0.8131, 0.3219]) tensor([0.7181, 0.0730, 0.0508, 0.1581]) -Greedy action tensor([ 1.6938, -0.5168, -0.1347, 0.4536]) tensor([0.6412, 0.0703, 0.1030, 0.1855]) -Greedy action tensor([ 0.8201, -0.2043, -0.5695, 0.5269]) tensor([0.4248, 0.1525, 0.1059, 0.3168]) -Greedy action tensor([ 1.8206, -0.3719, -0.6174, 0.4459]) tensor([0.6888, 0.0769, 0.0602, 0.1742]) -Greedy action tensor([ 1.7777, -0.4946, -0.4326, -0.1743]) tensor([0.7381, 0.0761, 0.0810, 0.1048]) -Greedy action tensor([ 1.3280, -0.3557, -0.5705, 0.2726]) tensor([0.5940, 0.1103, 0.0890, 0.2067]) -Greedy action tensor([ 2.4430, -0.8514, -0.3824, 0.2665]) tensor([0.8266, 0.0307, 0.0490, 0.0938]) -Greedy action tensor([ 1.5115, -0.3387, -0.4059, -0.1451]) tensor([0.6689, 0.1052, 0.0983, 0.1276]) -Greedy action tensor([ 2.8620, -1.7858, 0.2889, 0.3055]) tensor([0.8595, 0.0082, 0.0656, 0.0667]) -Greedy action tensor([ 1.6200, 0.2590, -0.2463, 0.0047]) tensor([0.6211, 0.1593, 0.0961, 0.1235]) -Greedy action tensor([ 1.2424, -0.3310, -0.7985, 0.5111]) tensor([0.5499, 0.1140, 0.0714, 0.2647]) -Greedy action tensor([ 1.3712, -0.5772, -0.3161, 0.4780]) tensor([0.5757, 0.0820, 0.1065, 0.2357]) -Greedy action tensor([ 1.3131, -0.3598, -0.3524, 0.3865]) tensor([0.5641, 0.1059, 0.1067, 0.2233]) -Greedy action tensor([ 1.1936, -0.5620, -0.3240, 0.3719]) tensor([0.5459, 0.0943, 0.1197, 0.2400]) -Greedy action tensor([ 1.5088, -0.3908, -0.3630, 0.3387]) tensor([0.6197, 0.0927, 0.0953, 0.1923]) -Greedy action tensor([ 1.0945, -0.3758, -0.3773, 0.1751]) tensor([0.5382, 0.1237, 0.1235, 0.2146]) -Greedy action tensor([ 1.0897, -0.3119, -0.8062, 0.1939]) tensor([0.5541, 0.1364, 0.0832, 0.2262]) -Greedy action tensor([ 1.3047, -0.2439, -0.6185, 0.2866]) tensor([0.5814, 0.1236, 0.0850, 0.2101]) -Greedy action tensor([ 1.4928, -0.3360, -1.1638, 0.3183]) tensor([0.6494, 0.1043, 0.0456, 0.2007]) -Greedy action tensor([ 1.6516, -0.3757, -0.4123, 0.5392]) tensor([0.6300, 0.0830, 0.0800, 0.2071]) -Greedy action tensor([ 1.8681, -0.3549, -0.0958, 0.6285]) tensor([0.6501, 0.0704, 0.0912, 0.1882]) -Greedy action tensor([ 1.9014, -0.3455, -0.6749, 0.5764]) tensor([0.6908, 0.0730, 0.0525, 0.1836]) -Greedy action tensor([ 1.6534, -0.1607, -0.3814, -0.1180]) tensor([0.6832, 0.1113, 0.0893, 0.1162]) -Greedy action tensor([ 1.1689, -0.6219, -0.3671, 0.6061]) tensor([0.5124, 0.0855, 0.1103, 0.2919]) -Greedy action tensor([ 1.1892, -0.3754, -0.5787, 0.0485]) tensor([0.5884, 0.1231, 0.1004, 0.1881]) -Greedy action tensor([ 1.3587, -0.2832, -0.3771, 0.4922]) tensor([0.5586, 0.1081, 0.0985, 0.2348]) -Greedy action tensor([ 1.5062, -0.4304, -0.5876, 0.2944]) tensor([0.6389, 0.0921, 0.0787, 0.1902]) -Greedy action tensor([ 1.9001, -0.4858, -1.3191, 0.4116]) tensor([0.7365, 0.0678, 0.0295, 0.1662]) -Greedy action tensor([ 1.0643, 0.1937, -0.9161, 0.0593]) tensor([0.5201, 0.2178, 0.0718, 0.1904]) -Greedy action tensor([ 1.1015, -0.3967, -0.4337, 0.5319]) tensor([0.4988, 0.1115, 0.1075, 0.2822]) -Greedy action tensor([ 1.2325, 0.0018, -0.6514, -0.0086]) tensor([0.5770, 0.1685, 0.0877, 0.1668]) -Greedy action tensor([ 1.8544, -1.3845, -0.5131, 0.0352]) tensor([0.7722, 0.0303, 0.0724, 0.1252]) -Greedy action tensor([ 1.4189, -0.6152, -0.5272, 0.6328]) tensor([0.5783, 0.0756, 0.0826, 0.2635]) -Greedy action tensor([ 1.2636, -0.4529, -0.2086, 0.6132]) tensor([0.5179, 0.0931, 0.1188, 0.2702]) -Greedy action tensor([ 1.3842, -0.8076, -0.0244, -0.7622]) tensor([0.6788, 0.0758, 0.1660, 0.0794]) -Greedy action tensor([ 0.9568, -0.6250, 0.2414, -0.5920]) tensor([0.5243, 0.1078, 0.2564, 0.1114]) -Greedy action tensor([ 0.5715, -0.0027, -0.1011, 0.1226]) tensor([0.3687, 0.2077, 0.1882, 0.2354]) -Greedy action tensor([ 0.5587, -0.2693, -0.2740, -0.1460]) tensor([0.4226, 0.1847, 0.1838, 0.2089]) -Greedy action tensor([ 0.5041, -0.2752, 0.2474, -0.1993]) tensor([0.3667, 0.1682, 0.2837, 0.1815]) -Greedy action tensor([ 0.7509, -0.2280, -0.1451, 0.0073]) tensor([0.4426, 0.1663, 0.1807, 0.2104]) -Greedy action tensor([ 0.6710, -0.4591, 0.0421, -0.5586]) tensor([0.4654, 0.1503, 0.2481, 0.1361]) -Greedy action tensor([ 0.9099, -0.5970, 0.0613, -0.6274]) tensor([0.5363, 0.1188, 0.2295, 0.1153]) -Greedy action tensor([1.1602, 0.0410, 0.0033, 0.0249]) tensor([0.5096, 0.1664, 0.1602, 0.1637]) -Greedy action tensor([ 1.2455, -0.8821, 0.0146, -0.6323]) tensor([0.6393, 0.0762, 0.1867, 0.0978]) -Greedy action tensor([ 0.2540, 0.0059, -0.0904, -0.2569]) tensor([0.3237, 0.2526, 0.2294, 0.1942]) -Greedy action tensor([ 0.7611, -0.2065, -0.1123, -0.1186]) tensor([0.4520, 0.1718, 0.1887, 0.1875]) -Greedy action tensor([ 0.9041, -0.5804, 0.1550, -0.4984]) tensor([0.5140, 0.1165, 0.2430, 0.1264]) -Greedy action tensor([ 0.7454, -1.0125, 0.2563, -0.4068]) tensor([0.4758, 0.0820, 0.2918, 0.1503]) -Greedy action tensor([ 0.8659, -0.7304, 0.0997, -0.2606]) tensor([0.5021, 0.1018, 0.2334, 0.1628]) -Greedy action tensor([ 0.9715, -0.4000, -0.1875, -0.0563]) tensor([0.5194, 0.1318, 0.1630, 0.1858]) -Greedy action tensor([ 0.6146, -0.2802, -0.0168, -0.0861]) tensor([0.4104, 0.1677, 0.2183, 0.2036]) -Greedy action tensor([ 0.9543, 0.0230, -0.1480, -0.3086]) tensor([0.4978, 0.1961, 0.1653, 0.1408]) -Greedy action tensor([ 0.7805, 0.0112, -0.0846, 0.0681]) tensor([0.4211, 0.1951, 0.1773, 0.2065]) -Greedy action tensor([ 0.7511, -0.1334, -0.1642, -0.6177]) tensor([0.4836, 0.1997, 0.1936, 0.1230]) -Greedy action tensor([ 0.9554, -0.5964, -0.0503, -0.4505]) tensor([0.5486, 0.1162, 0.2007, 0.1345]) -Greedy action tensor([ 0.6668, -0.2102, 0.0080, -0.3267]) tensor([0.4341, 0.1806, 0.2246, 0.1607]) -Greedy action tensor([ 0.9333, -0.2366, -0.0975, -0.1338]) tensor([0.4972, 0.1543, 0.1774, 0.1710]) -Greedy action tensor([ 0.9896, -0.6083, -0.2489, -0.5799]) tensor([0.5881, 0.1190, 0.1705, 0.1224]) -Greedy action tensor([ 0.7420, -0.5048, -0.1451, -0.1231]) tensor([0.4716, 0.1356, 0.1942, 0.1986]) -Greedy action tensor([ 1.2573, -0.8310, -0.0371, -0.6659]) tensor([0.6476, 0.0802, 0.1775, 0.0946]) -Greedy action tensor([ 0.7280, -0.4260, -0.0261, -0.0654]) tensor([0.4468, 0.1409, 0.2102, 0.2021]) -Greedy action tensor([ 0.7569, -0.6622, -0.0892, -0.2473]) tensor([0.4908, 0.1187, 0.2106, 0.1798]) -Greedy action tensor([ 0.7988, -0.4675, -0.0291, -0.1864]) tensor([0.4780, 0.1347, 0.2089, 0.1785]) -Greedy action tensor([ 1.1622, -0.5518, -0.1299, -0.4512]) tensor([0.6046, 0.1089, 0.1661, 0.1204]) -Greedy action tensor([ 0.6594, -0.2974, -0.0326, -0.1700]) tensor([0.4309, 0.1655, 0.2157, 0.1880]) -Greedy action tensor([7.0885e-01, 1.6200e-01, 1.7619e-04, 1.8651e-02]) tensor([0.3887, 0.2250, 0.1914, 0.1949]) -Greedy action tensor([ 0.5701, -0.3912, -0.0580, -0.1554]) tensor([0.4166, 0.1593, 0.2223, 0.2017]) -Greedy action tensor([ 0.5056, -0.1618, 0.0461, -0.0538]) tensor([0.3682, 0.1889, 0.2325, 0.2104]) -Greedy action tensor([ 0.6118, -0.0175, -0.0602, -0.1252]) tensor([0.3965, 0.2113, 0.2025, 0.1897]) -Greedy action tensor([ 0.4062, -0.2555, -0.0460, -0.2715]) tensor([0.3760, 0.1940, 0.2392, 0.1909]) -Greedy action tensor([ 0.6992, -0.2595, -0.1573, -0.0994]) tensor([0.4429, 0.1698, 0.1881, 0.1993]) -Greedy action tensor([ 0.9313, -0.7499, 0.2115, -0.4885]) tensor([0.5223, 0.0972, 0.2543, 0.1263]) -Greedy action tensor([ 0.9465, -0.5984, -0.0263, -0.5796]) tensor([0.5529, 0.1179, 0.2090, 0.1202]) -Greedy action tensor([ 0.8492, -0.5716, 0.0036, -0.6094]) tensor([0.5254, 0.1269, 0.2256, 0.1222]) -Greedy action tensor([ 0.4828, -0.3618, -0.1738, -0.4846]) tensor([0.4295, 0.1846, 0.2227, 0.1632]) -Greedy action tensor([ 0.4309, -0.2005, -0.0532, -0.2605]) tensor([0.3775, 0.2008, 0.2326, 0.1891]) -Greedy action tensor([ 1.1167, -0.6412, -0.0796, -0.6639]) tensor([0.6085, 0.1049, 0.1840, 0.1026]) -Greedy action tensor([ 1.0318, -0.8690, 0.0234, -0.5654]) tensor([0.5825, 0.0871, 0.2125, 0.1179]) -Greedy action tensor([ 0.9579, -0.5280, -0.2349, -0.6333]) tensor([0.5769, 0.1306, 0.1750, 0.1175]) -Greedy action tensor([ 0.7683, -0.0464, -0.0410, 0.0222]) tensor([0.4233, 0.1874, 0.1885, 0.2008]) -Greedy action tensor([0.4838, 0.1515, 0.0794, 0.0062]) tensor([0.3328, 0.2387, 0.2221, 0.2064]) -Greedy action tensor([ 1.3409, -0.9017, 0.1847, -0.7700]) tensor([0.6485, 0.0689, 0.2041, 0.0785]) -Greedy action tensor([ 0.4928, -0.5611, -0.1567, -0.0579]) tensor([0.4086, 0.1424, 0.2134, 0.2356]) -Greedy action tensor([0.4103, 0.0131, 0.0399, 0.0417]) tensor([0.3274, 0.2201, 0.2261, 0.2265]) -Greedy action tensor([ 1.0797, -0.6317, -0.0777, -0.6527]) tensor([0.5982, 0.1080, 0.1880, 0.1058]) -Greedy action tensor([ 0.7297, -0.5773, -0.0847, -0.3779]) tensor([0.4893, 0.1324, 0.2167, 0.1616]) -Greedy action tensor([ 0.4374, -0.2570, 0.1008, 0.0917]) tensor([0.3423, 0.1709, 0.2445, 0.2423]) -Greedy action tensor([ 0.4987, -0.4918, -0.2376, -0.0889]) tensor([0.4157, 0.1544, 0.1990, 0.2310]) -Greedy action tensor([ 0.2252, 0.3824, -0.1258, -0.0077]) tensor([0.2728, 0.3192, 0.1920, 0.2161]) -Greedy action tensor([ 0.8923, -0.7954, 0.0021, -0.3863]) tensor([0.5336, 0.0987, 0.2191, 0.1486]) -Greedy action tensor([ 0.5043, 0.1059, -0.0461, 0.0906]) tensor([0.3437, 0.2308, 0.1982, 0.2273]) -Greedy action tensor([ 0.7475, -0.6734, 0.1781, -0.6813]) tensor([0.4885, 0.1180, 0.2764, 0.1170]) -Greedy action tensor([ 0.7864, -0.3568, 0.0283, -0.3838]) tensor([0.4767, 0.1520, 0.2234, 0.1479]) -Greedy action tensor([ 0.5082, -0.2313, 0.0152, -0.1125]) tensor([0.3808, 0.1818, 0.2326, 0.2047]) -Greedy action tensor([ 0.9009, -0.7453, 0.0526, -0.5831]) tensor([0.5412, 0.1043, 0.2317, 0.1227]) -Greedy action tensor([ 1.0259, -0.9474, -0.0966, -0.3405]) tensor([0.5816, 0.0808, 0.1893, 0.1483]) -Greedy action tensor([ 1.0637, -0.6784, -0.0265, -0.5687]) tensor([0.5859, 0.1026, 0.1969, 0.1145]) -Greedy action tensor([ 1.1445, -0.8525, 0.1425, -0.9679]) tensor([0.6158, 0.0836, 0.2261, 0.0745]) -Greedy action tensor([ 0.8706, -0.6984, 0.0417, -0.6264]) tensor([0.5352, 0.1115, 0.2336, 0.1198]) -Greedy action tensor([ 0.2945, -0.0120, 0.0842, -0.2238]) tensor([0.3183, 0.2343, 0.2579, 0.1895]) -Greedy action tensor([ 1.0036, -0.4630, -0.0737, -0.6362]) tensor([0.5665, 0.1307, 0.1929, 0.1099]) -Greedy action tensor([ 1.1016, -0.8544, 0.1135, -0.4083]) tensor([0.5765, 0.0815, 0.2146, 0.1274]) -Greedy action tensor([ 0.9370, -0.5439, 0.1337, -0.1967]) tensor([0.5007, 0.1139, 0.2242, 0.1612]) -Greedy action tensor([ 0.3569, -0.1772, -0.1618, -0.2011]) tensor([0.3631, 0.2129, 0.2162, 0.2078]) -Greedy action tensor([ 0.4439, -0.0756, -0.2113, 0.0539]) tensor([0.3583, 0.2131, 0.1861, 0.2426]) -Greedy action tensor([ 0.7233, -0.6120, 0.0119, -0.3092]) tensor([0.4739, 0.1247, 0.2327, 0.1688]) -Greedy action tensor([ 0.9565, -0.7284, 0.0518, -0.4129]) tensor([0.5422, 0.1006, 0.2194, 0.1379]) -Greedy action tensor([ 0.5719, -0.3228, 0.1140, -0.3813]) tensor([0.4120, 0.1684, 0.2607, 0.1589]) -Greedy action tensor([ 0.8166, -0.4614, -0.0047, -0.3964]) tensor([0.4961, 0.1382, 0.2182, 0.1475]) -Greedy action tensor([ 0.7793, -0.4210, -0.0667, -0.2338]) tensor([0.4777, 0.1438, 0.2050, 0.1735]) -Greedy action tensor([ 0.6917, -0.6426, -0.1442, -0.3407]) tensor([0.4871, 0.1283, 0.2111, 0.1735]) -Greedy action tensor([ 0.8287, -0.5412, 0.1357, -0.2287]) tensor([0.4758, 0.1209, 0.2380, 0.1653]) -Greedy action tensor([ 0.9124, -0.1569, 0.0025, -0.0840]) tensor([0.4728, 0.1623, 0.1903, 0.1746]) -Greedy action tensor([ 0.9392, -0.5075, -0.1762, -0.5156]) tensor([0.5566, 0.1310, 0.1825, 0.1299]) -Greedy action tensor([ 0.7261, -0.1374, -0.0128, -0.3145]) tensor([0.4439, 0.1872, 0.2120, 0.1568]) -Greedy action tensor([-0.3546, 0.1512, 0.6431, -1.3099]) tensor([0.1738, 0.2881, 0.4713, 0.0668]) -Greedy action tensor([-0.3546, -0.6636, 0.5081, -0.6855]) tensor([0.2074, 0.1523, 0.4914, 0.1490]) -Greedy action tensor([ 4.9327e-01, -1.3536e+00, 4.1708e-04, 5.7833e-02]) tensor([0.4140, 0.0653, 0.2529, 0.2678]) -Greedy action tensor([ 0.2230, -1.0277, 0.1384, -0.0771]) tensor([0.3394, 0.0972, 0.3119, 0.2514]) -Greedy action tensor([-1.4370, -0.1370, -0.0057, -0.1141]) tensor([0.0793, 0.2910, 0.3319, 0.2978]) -Greedy action tensor([ 0.5945, 0.2818, 0.8797, -0.3482]) tensor([0.2898, 0.2119, 0.3854, 0.1129]) -Greedy action tensor([-0.2159, -0.1244, 0.8098, -0.2593]) tensor([0.1712, 0.1876, 0.4774, 0.1639]) -Greedy action tensor([ 0.2648, -1.4046, 1.2579, -0.8884]) tensor([0.2379, 0.0448, 0.6422, 0.0751]) -Greedy action tensor([ 0.5181, -1.0263, 0.0048, 0.3434]) tensor([0.3771, 0.0805, 0.2257, 0.3167]) -Greedy action tensor([ 0.7773, -0.3338, 0.7827, 0.9164]) tensor([0.2870, 0.0945, 0.2886, 0.3299]) -Greedy action tensor([ 0.2723, -1.6184, 0.4098, 0.5612]) tensor([0.2752, 0.0415, 0.3158, 0.3674]) -Greedy action tensor([ 0.1831, -0.2660, 0.6335, -0.5298]) tensor([0.2705, 0.1726, 0.4243, 0.1326]) -Greedy action tensor([-0.6339, -0.5212, -0.0193, 0.5067]) tensor([0.1409, 0.1577, 0.2605, 0.4408]) -Greedy action tensor([-0.6398, -1.2028, -0.9996, 0.3375]) tensor([0.2031, 0.1156, 0.1417, 0.5396]) -Greedy action tensor([ 1.7292, 0.1140, -0.0297, 0.0045]) tensor([0.6455, 0.1283, 0.1112, 0.1150]) -Greedy action tensor([ 0.2901, 0.0010, -0.5252, -0.8618]) tensor([0.3988, 0.2987, 0.1765, 0.1260]) -Greedy action tensor([-0.3593, -0.4534, -0.2913, -0.8462]) tensor([0.2782, 0.2532, 0.2977, 0.1709]) -Greedy action tensor([ 0.0938, -2.2771, -0.7456, 0.0725]) tensor([0.3993, 0.0373, 0.1725, 0.3909]) -Greedy action tensor([-0.0463, 0.3613, 0.1052, 0.8611]) tensor([0.1627, 0.2446, 0.1894, 0.4033]) -Greedy action tensor([-0.1168, 0.1430, 0.4206, -0.2961]) tensor([0.2064, 0.2677, 0.3533, 0.1726]) -Greedy action tensor([ 0.4629, -0.8412, 0.8936, 0.0643]) tensor([0.2873, 0.0780, 0.4419, 0.1928]) -Greedy action tensor([-0.2720, -0.7194, -0.3378, 0.2236]) tensor([0.2371, 0.1516, 0.2220, 0.3893]) -Greedy action tensor([ 0.7303, -0.0929, -0.6748, -0.4420]) tensor([0.5015, 0.2202, 0.1230, 0.1553]) -Greedy action tensor([ 0.1453, -0.2406, 0.5627, -0.3158]) tensor([0.2612, 0.1776, 0.3965, 0.1647]) -Greedy action tensor([ 1.0252, -0.8296, -0.3471, -0.0175]) tensor([0.5674, 0.0888, 0.1438, 0.2000]) -Greedy action tensor([ 0.1594, -0.6794, 0.4179, -0.5661]) tensor([0.3114, 0.1346, 0.4033, 0.1507]) -Greedy action tensor([-0.5197, -1.8546, -0.5510, 0.3656]) tensor([0.2148, 0.0565, 0.2082, 0.5205]) -Greedy action tensor([ 0.8845, -1.5977, 0.5819, -0.1672]) tensor([0.4605, 0.0385, 0.3402, 0.1609]) -Greedy action tensor([-0.0021, 0.5712, -0.4089, -0.8413]) tensor([0.2583, 0.4582, 0.1720, 0.1116]) -Greedy action tensor([ 0.2674, -0.1047, -0.0078, 0.2406]) tensor([0.2922, 0.2014, 0.2219, 0.2845]) -Greedy action tensor([ 1.1040, -0.5125, -0.0017, -0.2206]) tensor([0.5569, 0.1106, 0.1843, 0.1481]) -Greedy action tensor([ 0.0313, -0.6525, 1.1634, -0.3267]) tensor([0.1885, 0.0951, 0.5847, 0.1318]) -Greedy action tensor([ 0.4289, -0.4462, -0.6167, -0.1364]) tensor([0.4280, 0.1784, 0.1504, 0.2432]) -Greedy action tensor([-0.9745, -0.0654, -1.3789, 0.0123]) tensor([0.1464, 0.3633, 0.0977, 0.3926]) -Greedy action tensor([ 1.6796, -0.3980, -0.1757, -0.8623]) tensor([0.7351, 0.0921, 0.1150, 0.0579]) -Greedy action tensor([ 0.1623, -0.9035, 0.0035, 1.0242]) tensor([0.2190, 0.0754, 0.1869, 0.5186]) -Greedy action tensor([ 0.6259, -1.0854, 0.1819, 0.9251]) tensor([0.3154, 0.0570, 0.2023, 0.4254]) -Greedy action tensor([-0.4154, -0.5122, -1.1872, -0.3229]) tensor([0.2884, 0.2618, 0.1333, 0.3164]) -Greedy action tensor([ 0.2910, 0.0461, -0.7037, -0.2652]) tensor([0.3668, 0.2872, 0.1357, 0.2103]) -Greedy action tensor([ 0.4183, 0.2773, -0.1773, 0.0526]) tensor([0.3212, 0.2790, 0.1770, 0.2228]) -Greedy action tensor([0.9384, 0.0177, 0.3145, 0.6825]) tensor([0.3692, 0.1470, 0.1979, 0.2859]) -Greedy action tensor([-0.9118, -0.2183, 1.7397, -1.0736]) tensor([0.0555, 0.1110, 0.7863, 0.0472]) -Greedy action tensor([-0.7222, -0.5014, -0.4642, 0.4256]) tensor([0.1494, 0.1863, 0.1934, 0.4709]) -Greedy action tensor([ 0.3783, -0.8804, -0.0508, 0.5673]) tensor([0.3182, 0.0904, 0.2071, 0.3843]) -Greedy action tensor([ 0.5341, -0.3359, 0.5186, 0.8519]) tensor([0.2647, 0.1109, 0.2606, 0.3637]) -Greedy action tensor([-0.8504, -1.4913, 0.2205, -0.7653]) tensor([0.1807, 0.0952, 0.5273, 0.1968]) -Greedy action tensor([ 0.2457, -0.0123, 0.2353, -0.1324]) tensor([0.2901, 0.2241, 0.2871, 0.1988]) -Greedy action tensor([-0.9981, -0.8274, -0.0992, -1.5500]) tensor([0.1916, 0.2273, 0.4708, 0.1103]) -Greedy action tensor([ 0.1016, -0.9562, -0.4020, 0.3691]) tensor([0.3069, 0.1066, 0.1855, 0.4010]) -Greedy action tensor([-0.2378, -1.3747, -0.8101, 0.0707]) tensor([0.3080, 0.0988, 0.1738, 0.4193]) -Greedy action tensor([-0.3734, -0.3794, 0.5673, -0.2607]) tensor([0.1762, 0.1752, 0.4514, 0.1972]) -Greedy action tensor([-0.8794, -1.6685, -0.0021, 0.4232]) tensor([0.1327, 0.0603, 0.3190, 0.4881]) -Greedy action tensor([-0.0317, -0.5955, 0.0759, 1.2243]) tensor([0.1614, 0.0919, 0.1798, 0.5669]) -Greedy action tensor([ 0.8132, -1.1759, 0.3097, 1.1324]) tensor([0.3208, 0.0439, 0.1939, 0.4414]) -Greedy action tensor([-0.8549, -0.4730, 0.1904, -1.2170]) tensor([0.1665, 0.2439, 0.4736, 0.1159]) -Greedy action tensor([0.2034, 0.1735, 0.0022, 0.3670]) tensor([0.2521, 0.2447, 0.2062, 0.2970]) -Greedy action tensor([ 0.4887, -0.8833, 0.3139, -0.7006]) tensor([0.4171, 0.1058, 0.3502, 0.1270]) -Greedy action tensor([-0.0205, -0.7267, 0.9487, -0.0683]) tensor([0.1968, 0.0971, 0.5186, 0.1876]) -Greedy action tensor([-0.3914, -0.6178, -0.0570, -0.6530]) tensor([0.2523, 0.2011, 0.3524, 0.1942]) -Greedy action tensor([-0.1045, -0.7268, 0.8129, -0.9393]) tensor([0.2235, 0.1200, 0.5595, 0.0970]) -Greedy action tensor([ 0.1339, 0.0031, -0.0563, -0.2339]) tensor([0.2944, 0.2583, 0.2434, 0.2038]) -Greedy action tensor([-0.7179, -0.6511, 0.4118, -0.9344]) tensor([0.1675, 0.1791, 0.5185, 0.1349]) -Greedy action tensor([-1.7582, -0.4978, 0.6712, -1.6924]) tensor([0.0590, 0.2081, 0.6699, 0.0630]) -Greedy action tensor([-0.8298, -1.1376, 1.0852, -0.7427]) tensor([0.1040, 0.0765, 0.7060, 0.1135]) -Greedy action tensor([-0.3856, -0.6628, -0.0130, -0.1269]) tensor([0.2220, 0.1682, 0.3222, 0.2875]) -Greedy action tensor([-0.7013, -0.3503, -0.0707, -0.9522]) tensor([0.1969, 0.2798, 0.3700, 0.1533]) -Greedy action tensor([-0.8367, -0.4696, 0.3961, -1.4691]) tensor([0.1561, 0.2254, 0.5356, 0.0829]) -Greedy action tensor([ 0.6428, -0.6746, -0.5840, -0.5691]) tensor([0.5380, 0.1441, 0.1578, 0.1601]) -Greedy action tensor([-0.2405, -0.6873, -0.0274, -0.7918]) tensor([0.2896, 0.1852, 0.3584, 0.1669]) -Greedy action tensor([-0.6213, -1.3445, -0.5795, -0.1574]) tensor([0.2428, 0.1178, 0.2532, 0.3862]) -Greedy action tensor([-0.0199, -1.0861, 0.8962, -0.1035]) tensor([0.2099, 0.0723, 0.5247, 0.1931]) -Greedy action tensor([ 0.9007, -0.9914, 0.5098, 1.0629]) tensor([0.3330, 0.0502, 0.2252, 0.3916]) -Greedy action tensor([-1.0420, -0.9273, 0.1352, -0.5613]) tensor([0.1432, 0.1606, 0.4647, 0.2316]) -Greedy action tensor([ 0.7069, -0.7990, -1.2896, -0.1417]) tensor([0.5600, 0.1242, 0.0761, 0.2397]) -Greedy action tensor([ 0.2094, -0.3460, -0.8160, 0.1724]) tensor([0.3453, 0.1981, 0.1238, 0.3327]) -Greedy action tensor([ 1.0466, -0.8144, -0.6232, 1.4025]) tensor([0.3609, 0.0561, 0.0679, 0.5151]) -Greedy action tensor([ 1.4425, 0.1025, -0.4500, 0.4248]) tensor([0.5637, 0.1476, 0.0849, 0.2037]) -Greedy action tensor([-0.3878, -0.9309, 0.7726, 0.0125]) tensor([0.1596, 0.0927, 0.5094, 0.2382]) -Greedy action tensor([ 0.3551, -1.1272, -0.1701, 0.6648]) tensor([0.3143, 0.0714, 0.1859, 0.4284]) -Greedy action tensor([ 0.3512, -1.7486, 0.0768, 0.4394]) tensor([0.3362, 0.0412, 0.2555, 0.3672]) -Greedy action tensor([-0.8347, -0.9809, -0.0595, -0.1543]) tensor([0.1664, 0.1438, 0.3613, 0.3286]) -Greedy action tensor([ 1.1478, -0.1192, -0.7900, 0.1166]) tensor([0.5611, 0.1580, 0.0808, 0.2001]) -Greedy action tensor([ 1.2835, -0.1505, -0.8231, 0.3923]) tensor([0.5649, 0.1347, 0.0687, 0.2317]) -Greedy action tensor([ 1.8886, -0.7753, -1.1421, 0.2630]) tensor([0.7606, 0.0530, 0.0367, 0.1497]) -Greedy action tensor([ 1.6135, -0.7358, -0.5546, 0.1502]) tensor([0.6938, 0.0662, 0.0794, 0.1606]) -Greedy action tensor([ 1.4125, -0.7251, -0.5626, 0.7519]) tensor([0.5640, 0.0665, 0.0782, 0.2913]) -Greedy action tensor([ 1.4870, 0.0810, -0.6189, 0.4144]) tensor([0.5852, 0.1434, 0.0712, 0.2002]) -Greedy action tensor([ 1.5212, -0.6851, -0.2188, 0.2791]) tensor([0.6352, 0.0699, 0.1115, 0.1834]) -Greedy action tensor([ 1.7055, -0.0103, -0.7405, 0.7369]) tensor([0.6075, 0.1092, 0.0526, 0.2306]) -Greedy action tensor([ 1.5026, -0.5393, -0.4775, 0.3299]) tensor([0.6340, 0.0823, 0.0875, 0.1962]) -Greedy action tensor([ 1.4178, -0.2769, -1.0438, 0.3902]) tensor([0.6147, 0.1129, 0.0524, 0.2200]) -Greedy action tensor([ 0.9157, -0.2887, -0.0223, -0.1291]) tensor([0.4895, 0.1468, 0.1916, 0.1722]) -Greedy action tensor([ 1.7606, -0.6752, -0.6179, 0.3514]) tensor([0.7020, 0.0614, 0.0651, 0.1715]) -Greedy action tensor([ 1.2932, -0.4999, -0.4489, 0.4075]) tensor([0.5701, 0.0949, 0.0999, 0.2351]) -Greedy action tensor([ 1.4337, -0.3100, -0.4143, 0.5287]) tensor([0.5757, 0.1007, 0.0907, 0.2329]) -Greedy action tensor([ 1.5655, -0.5283, -0.8636, 0.0388]) tensor([0.7000, 0.0862, 0.0617, 0.1521]) -Greedy action tensor([ 2.1498, 0.1930, -0.1113, -0.0791]) tensor([0.7390, 0.1044, 0.0770, 0.0796]) -Greedy action tensor([ 1.4382, -0.1086, -0.4284, 0.0761]) tensor([0.6159, 0.1311, 0.0952, 0.1577]) -Greedy action tensor([ 1.4152, -0.0926, -0.6634, 0.1010]) tensor([0.6191, 0.1371, 0.0775, 0.1663]) -Greedy action tensor([ 1.6306, -0.9827, -0.4729, 0.7720]) tensor([0.6176, 0.0453, 0.0754, 0.2617]) -Greedy action tensor([ 1.2261, -0.4702, -0.1056, -0.2378]) tensor([0.5957, 0.1092, 0.1573, 0.1378]) -Greedy action tensor([ 1.5885, -0.4938, -0.3312, 0.6859]) tensor([0.5964, 0.0743, 0.0875, 0.2418]) -Greedy action tensor([ 1.5590, -0.4923, -0.5193, 0.3520]) tensor([0.6440, 0.0828, 0.0806, 0.1926]) -Greedy action tensor([ 1.4348, -0.8572, -0.1835, 0.0556]) tensor([0.6447, 0.0652, 0.1278, 0.1623]) -Greedy action tensor([ 1.6701, -0.6495, -0.1918, 0.4717]) tensor([0.6429, 0.0632, 0.0999, 0.1940]) -Greedy action tensor([ 1.2446, -0.4440, -0.2064, 0.2664]) tensor([0.5571, 0.1029, 0.1305, 0.2095]) -Greedy action tensor([ 2.7867, -1.6285, -0.2144, 0.6149]) tensor([0.8505, 0.0103, 0.0423, 0.0969]) -Greedy action tensor([ 1.7710, -0.4904, -0.7171, 0.2032]) tensor([0.7164, 0.0747, 0.0595, 0.1494]) -Greedy action tensor([ 1.2306, -0.4296, -1.1102, 0.2255]) tensor([0.6052, 0.1150, 0.0582, 0.2215]) -Greedy action tensor([ 0.6910, -0.0771, 0.1717, -0.0296]) tensor([0.3929, 0.1823, 0.2337, 0.1911]) -Greedy action tensor([ 1.6859, -0.4829, -0.4135, 0.2804]) tensor([0.6747, 0.0771, 0.0827, 0.1655]) -Greedy action tensor([ 1.9151, 0.2014, -0.9272, 0.2793]) tensor([0.6977, 0.1257, 0.0407, 0.1359]) -Greedy action tensor([ 2.1409, -0.3737, -0.6653, -0.0741]) tensor([0.7997, 0.0647, 0.0483, 0.0873]) -Greedy action tensor([ 1.7076, 0.5316, -0.3827, 0.3813]) tensor([0.5891, 0.1817, 0.0728, 0.1564]) -Greedy action tensor([ 1.6889, -0.7950, -0.1673, 0.5756]) tensor([0.6377, 0.0532, 0.0996, 0.2095]) -Greedy action tensor([ 1.2444, -0.1692, -0.3049, 0.0419]) tensor([0.5694, 0.1385, 0.1209, 0.1711]) -Greedy action tensor([ 1.2137, -0.7412, -0.1285, 0.0154]) tensor([0.5867, 0.0831, 0.1533, 0.1770]) -Greedy action tensor([ 1.0626, -0.4197, -0.4748, 0.2802]) tensor([0.5265, 0.1196, 0.1132, 0.2408]) -Greedy action tensor([ 1.6154, -0.1176, -0.4767, 0.2972]) tensor([0.6379, 0.1127, 0.0787, 0.1707]) -Greedy action tensor([ 0.9111, -0.6160, -0.1019, 0.2152]) tensor([0.4810, 0.1045, 0.1747, 0.2399]) -Greedy action tensor([ 1.5626, -0.6063, -0.8056, 0.3789]) tensor([0.6605, 0.0755, 0.0619, 0.2022]) -Greedy action tensor([ 1.1029, -0.4140, -0.3730, -0.0267]) tensor([0.5646, 0.1239, 0.1291, 0.1825]) -Greedy action tensor([ 1.7488, -1.1415, -0.1056, 0.4620]) tensor([0.6719, 0.0373, 0.1052, 0.1856]) -Greedy action tensor([ 0.9498, -0.4358, -0.0278, 0.0625]) tensor([0.4906, 0.1227, 0.1846, 0.2020]) -Greedy action tensor([ 1.3307, -0.1075, -0.3131, 0.1675]) tensor([0.5737, 0.1362, 0.1109, 0.1793]) -Greedy action tensor([ 1.6178, -0.3218, -0.5840, -0.2605]) tensor([0.7106, 0.1022, 0.0786, 0.1086]) -Greedy action tensor([ 1.7113, -0.5953, -0.4892, -0.1363]) tensor([0.7310, 0.0728, 0.0810, 0.1152]) -Greedy action tensor([ 1.2880, -0.1571, -0.2021, -0.4159]) tensor([0.6086, 0.1435, 0.1372, 0.1107]) -Greedy action tensor([ 1.7642, -0.4610, -0.5378, 0.2078]) tensor([0.7047, 0.0761, 0.0705, 0.1486]) -Greedy action tensor([ 0.2509, -0.4747, 0.0291, 0.0767]) tensor([0.3200, 0.1549, 0.2563, 0.2688]) -Greedy action tensor([ 1.3610, -0.7073, -0.4330, 0.6286]) tensor([0.5639, 0.0713, 0.0938, 0.2711]) -Greedy action tensor([ 1.1507, -0.6295, -0.2316, 0.4047]) tensor([0.5280, 0.0890, 0.1325, 0.2504]) -Greedy action tensor([ 1.0689, 0.1327, -0.0300, 0.3588]) tensor([0.4511, 0.1769, 0.1503, 0.2217]) -Greedy action tensor([ 1.5571, -0.0799, -0.8841, 0.4464]) tensor([0.6208, 0.1208, 0.0540, 0.2044]) -Greedy action tensor([ 1.3815, -0.3697, -0.2252, 0.2795]) tensor([0.5861, 0.1017, 0.1175, 0.1947]) -Greedy action tensor([ 1.7585, -0.9002, -0.5352, 0.5256]) tensor([0.6838, 0.0479, 0.0690, 0.1993]) -Greedy action tensor([ 1.1946, -0.3845, -0.4195, 0.6338]) tensor([0.5061, 0.1043, 0.1007, 0.2889]) -Greedy action tensor([ 2.0917, -1.1548, -0.5995, 0.5579]) tensor([0.7562, 0.0294, 0.0513, 0.1631]) -Greedy action tensor([ 1.0827, -0.1503, -1.1111, -0.0674]) tensor([0.5816, 0.1695, 0.0648, 0.1841]) -Greedy action tensor([ 1.0035, -0.0373, -0.6870, 0.2928]) tensor([0.4929, 0.1741, 0.0909, 0.2422]) -Greedy action tensor([ 1.5170, -0.2268, -0.7427, 0.2667]) tensor([0.6387, 0.1117, 0.0667, 0.1829]) -Greedy action tensor([ 0.9119, -0.2544, -0.3633, 0.2070]) tensor([0.4796, 0.1494, 0.1340, 0.2370]) -Greedy action tensor([ 2.1665, -1.4369, -0.1590, 0.2670]) tensor([0.7845, 0.0214, 0.0767, 0.1174]) -Greedy action tensor([ 1.2431, -0.7804, 0.0527, -0.1040]) tensor([0.5895, 0.0779, 0.1793, 0.1533]) -Greedy action tensor([ 1.2280, -0.3847, -0.8702, 0.4650]) tensor([0.5592, 0.1115, 0.0686, 0.2607]) -Greedy action tensor([ 1.6589, 0.1031, -0.4276, 0.4786]) tensor([0.6089, 0.1285, 0.0756, 0.1870]) -Greedy action tensor([ 1.8477, -1.0507, -0.3635, 0.6099]) tensor([0.6874, 0.0379, 0.0753, 0.1994]) -Greedy action tensor([ 1.4426, 0.2967, -0.7670, 0.5390]) tensor([0.5456, 0.1735, 0.0599, 0.2210]) -Greedy action tensor([ 1.4168, -0.7033, -0.1666, 0.5232]) tensor([0.5766, 0.0692, 0.1183, 0.2359]) -Greedy action tensor([ 1.6787, -0.1919, -0.5328, 0.3621]) tensor([0.6529, 0.1006, 0.0715, 0.1750]) -Greedy action tensor([ 1.8019, -0.4674, -0.0491, 0.4512]) tensor([0.6581, 0.0680, 0.1034, 0.1705]) -Greedy action tensor([ 2.4135, -0.2879, 0.2316, 0.2285]) tensor([0.7737, 0.0519, 0.0873, 0.0870]) -Greedy action tensor([ 1.3058, -0.0139, -0.9878, 0.3711]) tensor([0.5679, 0.1518, 0.0573, 0.2230]) -Greedy action tensor([ 2.5522, -1.4267, -0.2544, 0.8140]) tensor([0.7968, 0.0149, 0.0481, 0.1401]) -Greedy action tensor([ 1.0351, -0.1110, -0.1950, 0.6338]) tensor([0.4387, 0.1394, 0.1282, 0.2937]) -Greedy action tensor([ 1.3450, -0.4056, -0.3848, 0.3543]) tensor([0.5806, 0.1008, 0.1030, 0.2156]) -Greedy action tensor([ 1.3943, -0.4390, -0.1954, -0.1310]) tensor([0.6323, 0.1011, 0.1290, 0.1376]) -Greedy action tensor([ 2.0574, -1.2588, -0.1859, 0.7477]) tensor([0.7081, 0.0257, 0.0751, 0.1911]) -Greedy action tensor([ 2.3414, -1.4972, -0.0365, 1.0209]) tensor([0.7240, 0.0156, 0.0671, 0.1933]) -Greedy action tensor([ 1.4268, -0.5708, -0.6254, 0.0130]) tensor([0.6634, 0.0900, 0.0852, 0.1614]) -Greedy action tensor([ 1.3442, -0.6547, -0.0725, 0.2072]) tensor([0.5887, 0.0798, 0.1428, 0.1888]) -Greedy action tensor([ 1.4972, -0.2644, -0.5213, 0.0678]) tensor([0.6476, 0.1112, 0.0860, 0.1551]) -Greedy action tensor([-1.9136, -0.4262, 0.6581, -0.1435]) tensor([0.0410, 0.1815, 0.5367, 0.2408]) -Greedy action tensor([-1.8760, -0.4568, 0.6413, -0.1414]) tensor([0.0431, 0.1782, 0.5344, 0.2443]) -Greedy action tensor([-1.8995, -0.4340, 0.6440, -0.1583]) tensor([0.0421, 0.1822, 0.5356, 0.2401]) -Greedy action tensor([-1.8399, -0.2610, 0.5914, -0.1291]) tensor([0.0439, 0.2131, 0.4998, 0.2432]) -Greedy action tensor([-1.8338, -0.4229, 0.6176, -0.1169]) tensor([0.0449, 0.1841, 0.5211, 0.2500]) -Greedy action tensor([-1.9279, -0.3849, 0.6486, -0.1768]) tensor([0.0407, 0.1903, 0.5348, 0.2343]) -Greedy action tensor([-1.8757, -0.4815, 0.7448, -0.0591]) tensor([0.0401, 0.1617, 0.5514, 0.2468]) -Greedy action tensor([1.5666, 0.8653, 0.0415, 0.3421]) tensor([0.4981, 0.2470, 0.1084, 0.1464]) -Greedy action tensor([-1.0270, 0.7459, 0.1891, -0.0139]) tensor([0.0768, 0.4524, 0.2592, 0.2116]) -Greedy action tensor([-1.7312, 0.0041, 0.4794, -0.0386]) tensor([0.0471, 0.2672, 0.4297, 0.2560]) -Greedy action tensor([-1.9098, -0.3348, 0.6368, -0.1470]) tensor([0.0409, 0.1978, 0.5226, 0.2386]) -Greedy action tensor([-1.7609, -0.4824, 0.5210, -0.0968]) tensor([0.0508, 0.1826, 0.4981, 0.2685]) -Greedy action tensor([-1.6247, -0.5364, 0.9222, 0.4167]) tensor([0.0409, 0.1215, 0.5224, 0.3151]) -Greedy action tensor([-1.8697, -0.6742, 1.3053, 0.4370]) tensor([0.0261, 0.0864, 0.6252, 0.2624]) -Greedy action tensor([-1.4384, 0.5125, 0.3155, -0.0126]) tensor([0.0556, 0.3914, 0.3214, 0.2315]) -Greedy action tensor([-1.8348, -0.2009, 0.5732, -0.0959]) tensor([0.0436, 0.2235, 0.4847, 0.2482]) -Greedy action tensor([-1.8986, -0.4514, 0.6483, -0.1598]) tensor([0.0422, 0.1793, 0.5385, 0.2400]) -Greedy action tensor([-1.0366, -0.4833, 0.7989, 0.9081]) tensor([0.0625, 0.1087, 0.3918, 0.4370]) -Greedy action tensor([-1.9304, -0.4035, 0.6502, -0.1734]) tensor([0.0406, 0.1871, 0.5367, 0.2355]) -Greedy action tensor([-0.4907, 1.1047, 0.0820, 0.2871]) tensor([0.1012, 0.4990, 0.1795, 0.2203]) -Greedy action tensor([-1.7026, -0.4668, 0.6264, -0.0262]) tensor([0.0499, 0.1716, 0.5120, 0.2666]) -Greedy action tensor([-1.8757, -0.3003, 0.6240, -0.1387]) tensor([0.0422, 0.2040, 0.5141, 0.2398]) -Greedy action tensor([-1.6452, -0.0050, 0.5073, -0.1679]) tensor([0.0522, 0.2693, 0.4496, 0.2289]) -Greedy action tensor([-1.4425, -0.0533, 0.4178, -0.1019]) tensor([0.0655, 0.2629, 0.4211, 0.2504]) -Greedy action tensor([-1.7431, -0.0447, 0.4963, -0.0328]) tensor([0.0468, 0.2556, 0.4390, 0.2586]) -Greedy action tensor([-1.8225, -0.3674, 0.5876, -0.1255]) tensor([0.0457, 0.1959, 0.5090, 0.2495]) -Greedy action tensor([-1.9339, -0.4390, 0.6620, -0.1745]) tensor([0.0405, 0.1807, 0.5434, 0.2354]) -Greedy action tensor([-1.9225, -0.4415, 0.6588, -0.1692]) tensor([0.0410, 0.1803, 0.5419, 0.2368]) -Greedy action tensor([-1.7748, -0.3951, 0.5968, -0.0869]) tensor([0.0474, 0.1884, 0.5079, 0.2563]) -Greedy action tensor([-1.9345, -0.4069, 0.6543, -0.1731]) tensor([0.0404, 0.1862, 0.5381, 0.2353]) -Greedy action tensor([-1.8819, -0.4555, 0.6404, -0.1517]) tensor([0.0430, 0.1790, 0.5355, 0.2425]) -Greedy action tensor([-1.9105, -0.3838, 0.6482, -0.1572]) tensor([0.0412, 0.1895, 0.5317, 0.2376]) -Greedy action tensor([-1.8845, -0.1566, 0.5979, -0.1640]) tensor([0.0413, 0.2327, 0.4949, 0.2310]) -Greedy action tensor([-0.7224, -0.4349, 0.2639, -0.1841]) tensor([0.1486, 0.1982, 0.3986, 0.2546]) -Greedy action tensor([-1.7340, -0.4564, 0.5606, -0.0732]) tensor([0.0506, 0.1815, 0.5017, 0.2662]) -Greedy action tensor([-1.8719, -0.4592, 0.6352, -0.1487]) tensor([0.0435, 0.1787, 0.5339, 0.2438]) -Greedy action tensor([-1.8074, -0.3283, 0.5919, -0.0962]) tensor([0.0456, 0.2001, 0.5021, 0.2523]) -Greedy action tensor([-1.8919, -0.4523, 0.6467, -0.1490]) tensor([0.0424, 0.1788, 0.5366, 0.2422]) -Greedy action tensor([0.0778, 0.6040, 0.0834, 0.7108]) tensor([0.1792, 0.3032, 0.1802, 0.3374]) -Greedy action tensor([-0.6566, -0.4812, 0.9241, 1.0592]) tensor([0.0793, 0.0945, 0.3852, 0.4410]) -Greedy action tensor([-1.8981, -0.4348, 0.6473, -0.1534]) tensor([0.0420, 0.1816, 0.5358, 0.2406]) -Greedy action tensor([-1.9206, -0.4360, 0.6707, -0.1611]) tensor([0.0407, 0.1796, 0.5432, 0.2365]) -Greedy action tensor([-1.8466, -0.4581, 0.6250, -0.1308]) tensor([0.0446, 0.1789, 0.5284, 0.2481]) -Greedy action tensor([-1.8724, -0.4321, 0.6752, -0.1448]) tensor([0.0423, 0.1787, 0.5408, 0.2382]) -Greedy action tensor([-1.7569, -0.2448, 0.5339, -0.0861]) tensor([0.0482, 0.2188, 0.4766, 0.2564]) -Greedy action tensor([-0.8588, 0.9507, 0.0917, 0.2757]) tensor([0.0781, 0.4770, 0.2020, 0.2429]) -Greedy action tensor([-0.3872, 1.1567, 0.0952, 0.2882]) tensor([0.1079, 0.5053, 0.1748, 0.2120]) -Greedy action tensor([-1.8299, -0.3149, 0.6070, -0.1429]) tensor([0.0447, 0.2032, 0.5108, 0.2413]) -Greedy action tensor([-1.8941, -0.2283, 0.6086, -0.1463]) tensor([0.0412, 0.2182, 0.5038, 0.2368]) -Greedy action tensor([-1.9164, -0.3395, 0.6367, -0.1550]) tensor([0.0408, 0.1975, 0.5242, 0.2375]) -Greedy action tensor([-1.9188, -0.4280, 0.6555, -0.1666]) tensor([0.0411, 0.1825, 0.5393, 0.2370]) -Greedy action tensor([-1.7642, -0.3738, 0.6113, -0.0526]) tensor([0.0469, 0.1885, 0.5047, 0.2599]) -Greedy action tensor([-1.8512, -0.4107, 0.6126, -0.1258]) tensor([0.0443, 0.1870, 0.5202, 0.2486]) -Greedy action tensor([-1.9065, -0.4323, 0.6483, -0.1600]) tensor([0.0417, 0.1822, 0.5369, 0.2392]) -Greedy action tensor([-1.8618, -0.2979, 0.6215, -0.2380]) tensor([0.0438, 0.2093, 0.5248, 0.2222]) -Greedy action tensor([-1.2744, 0.1271, 0.2790, 0.1011]) tensor([0.0728, 0.2955, 0.3439, 0.2879]) -Greedy action tensor([-1.8848, -0.4665, 0.6351, -0.1535]) tensor([0.0431, 0.1780, 0.5355, 0.2434]) -Greedy action tensor([-1.3464, 0.1630, 0.3284, 0.0140]) tensor([0.0678, 0.3065, 0.3617, 0.2641]) -Greedy action tensor([-1.5079, 0.2258, 0.3053, 0.0380]) tensor([0.0572, 0.3238, 0.3506, 0.2684]) -Greedy action tensor([-1.6064, -0.5415, 0.4976, -0.0156]) tensor([0.0588, 0.1705, 0.4821, 0.2886]) -Greedy action tensor([-1.7540, -0.3886, 0.5622, -0.1100]) tensor([0.0494, 0.1936, 0.5011, 0.2558]) -Greedy action tensor([-1.9290, -0.4427, 0.6607, -0.1711]) tensor([0.0407, 0.1801, 0.5429, 0.2363]) -Greedy action tensor([-1.5892, 0.1374, -0.6496, -1.0513]) tensor([0.0918, 0.5161, 0.2349, 0.1572]) -Greedy action tensor([-1.8702, -0.4653, 0.7279, 0.0087]) tensor([0.0399, 0.1626, 0.5363, 0.2612]) -Greedy action tensor([-1.9216, -0.4301, 0.6566, -0.1671]) tensor([0.0410, 0.1821, 0.5399, 0.2369]) -Greedy action tensor([-1.9147, -0.4429, 0.6546, -0.1631]) tensor([0.0414, 0.1802, 0.5400, 0.2384]) -Greedy action tensor([-1.9242, -0.4510, 0.6605, -0.1708]) tensor([0.0410, 0.1788, 0.5435, 0.2367]) -Greedy action tensor([-1.6671, -0.4383, 0.6092, -0.0050]) tensor([0.0515, 0.1759, 0.5014, 0.2713]) -Greedy action tensor([-1.5334, 0.2957, 0.3512, -0.0266]) tensor([0.0546, 0.3399, 0.3593, 0.2462]) -Greedy action tensor([-1.8833, -0.2978, 0.6176, -0.1357]) tensor([0.0420, 0.2050, 0.5120, 0.2410]) -Greedy action tensor([-0.8547, -0.1823, 0.3774, 0.5802]) tensor([0.0945, 0.1850, 0.3238, 0.3967]) -Greedy action tensor([-1.8707, -0.4262, 0.6259, -0.1457]) tensor([0.0435, 0.1844, 0.5280, 0.2441]) -Greedy action tensor([-1.8855, -0.4044, 0.6576, -0.1117]) tensor([0.0417, 0.1832, 0.5297, 0.2454]) -Greedy action tensor([-1.7645, -0.5108, 0.5842, -0.1126]) tensor([0.0495, 0.1735, 0.5186, 0.2584]) -Greedy action tensor([-1.7818, -0.3906, 0.6037, -0.0558]) tensor([0.0465, 0.1869, 0.5053, 0.2613]) -Greedy action tensor([-1.8153, -0.1958, 0.5685, -0.1109]) tensor([0.0447, 0.2255, 0.4843, 0.2455]) -Greedy action tensor([-1.8496, -0.4923, 0.6137, -0.1338]) tensor([0.0451, 0.1751, 0.5292, 0.2506]) -Greedy action tensor([-1.9189, -0.3825, 0.6481, -0.1678]) tensor([0.0409, 0.1902, 0.5331, 0.2358]) -Greedy action tensor([-0.5361, 0.9019, 0.0347, 0.4209]) tensor([0.1043, 0.4394, 0.1846, 0.2716]) -Greedy action tensor([-1.9099, -0.4472, 0.6501, -0.1634]) tensor([0.0417, 0.1800, 0.5393, 0.2391]) -Greedy action tensor([-1.9029, -0.3800, 0.6366, -0.1499]) tensor([0.0416, 0.1908, 0.5274, 0.2402]) -Greedy action tensor([-1.8952, -0.4360, 0.6412, -0.1521]) tensor([0.0423, 0.1819, 0.5342, 0.2416]) -Greedy action tensor([ 0.7684, -0.6873, -0.0277, -0.5360]) tensor([0.5113, 0.1193, 0.2307, 0.1387]) -Greedy action tensor([ 0.7402, -0.3139, -0.0525, -0.1307]) tensor([0.4505, 0.1570, 0.2039, 0.1886]) -Greedy action tensor([ 0.9908, -0.5780, -0.1646, -0.4314]) tensor([0.5668, 0.1181, 0.1785, 0.1367]) -Greedy action tensor([ 0.8560, -0.4784, -0.0909, -0.3366]) tensor([0.5116, 0.1347, 0.1985, 0.1552]) -Greedy action tensor([ 0.8031, -0.4052, 0.1341, -0.6283]) tensor([0.4878, 0.1457, 0.2499, 0.1166]) -Greedy action tensor([ 0.5624, -0.0790, -0.0913, 0.0680]) tensor([0.3764, 0.1982, 0.1958, 0.2296]) -Greedy action tensor([ 0.9141, -0.7542, 0.0721, -0.2974]) tensor([0.5216, 0.0984, 0.2247, 0.1553]) -Greedy action tensor([ 0.9685, -0.4506, -0.2037, -0.3832]) tensor([0.5524, 0.1336, 0.1711, 0.1429]) -Greedy action tensor([ 0.4567, -0.1412, -0.0173, -0.2911]) tensor([0.3780, 0.2078, 0.2353, 0.1789]) -Greedy action tensor([ 0.6419, -0.5268, 0.0026, -0.4946]) tensor([0.4631, 0.1439, 0.2443, 0.1486]) -Greedy action tensor([ 0.9501, -0.9085, -0.0825, -0.6981]) tensor([0.5867, 0.0915, 0.2089, 0.1129]) -Greedy action tensor([ 0.1530, -0.1619, -0.0288, -0.2428]) tensor([0.3090, 0.2255, 0.2576, 0.2080]) -Greedy action tensor([ 0.6405, -0.4521, 0.1191, -0.5846]) tensor([0.4499, 0.1509, 0.2671, 0.1322]) -Greedy action tensor([ 0.8073, -0.5469, -0.1541, -0.2062]) tensor([0.4991, 0.1289, 0.1909, 0.1812]) -Greedy action tensor([ 0.9277, -0.5723, -0.0393, -0.4214]) tensor([0.5368, 0.1198, 0.2041, 0.1393]) -Greedy action tensor([ 0.7934, -0.6041, 0.1411, -0.2162]) tensor([0.4690, 0.1159, 0.2442, 0.1709]) -Greedy action tensor([ 0.7318, -0.4509, -0.0938, -0.3927]) tensor([0.4833, 0.1481, 0.2116, 0.1570]) -Greedy action tensor([ 0.8438, -0.3477, -0.0074, -0.1727]) tensor([0.4779, 0.1452, 0.2040, 0.1729]) -Greedy action tensor([ 0.8885, -0.6000, 0.0111, -0.6091]) tensor([0.5361, 0.1210, 0.2229, 0.1199]) -Greedy action tensor([ 0.7063, -0.6877, -0.0607, -0.2279]) tensor([0.4750, 0.1178, 0.2206, 0.1866]) -Greedy action tensor([ 0.5988, -0.4489, -0.0091, -0.2446]) tensor([0.4300, 0.1508, 0.2341, 0.1850]) -Greedy action tensor([ 1.0656, -0.6923, 0.1186, -0.4622]) tensor([0.5626, 0.0970, 0.2183, 0.1221]) -Greedy action tensor([ 0.9452, -0.1397, 0.0182, -0.3497]) tensor([0.4981, 0.1683, 0.1971, 0.1364]) -Greedy action tensor([ 1.3149, -1.1078, 0.0299, -0.3561]) tensor([0.6438, 0.0571, 0.1781, 0.1211]) -Greedy action tensor([ 0.9767, -0.3907, -0.1129, -0.2120]) tensor([0.5275, 0.1344, 0.1774, 0.1607]) -Greedy action tensor([ 0.7175, -0.4443, -0.2324, -0.4466]) tensor([0.4971, 0.1555, 0.1922, 0.1552]) -Greedy action tensor([ 0.4798, -0.3867, -0.0516, -0.1149]) tensor([0.3906, 0.1642, 0.2296, 0.2155]) -Greedy action tensor([ 0.7160, -0.4449, 0.0159, -0.5674]) tensor([0.4792, 0.1501, 0.2379, 0.1328]) -Greedy action tensor([ 0.6520, -0.4985, -0.1214, 0.0975]) tensor([0.4251, 0.1345, 0.1962, 0.2442]) -Greedy action tensor([ 0.8144, -0.5533, -0.0440, -0.2420]) tensor([0.4935, 0.1257, 0.2092, 0.1716]) -Greedy action tensor([ 0.6735, -0.2875, 0.1700, -0.3632]) tensor([0.4271, 0.1634, 0.2581, 0.1515]) -Greedy action tensor([ 0.6985, -0.4433, -0.0247, -0.2402]) tensor([0.4555, 0.1454, 0.2210, 0.1782]) -Greedy action tensor([ 0.6686, -0.6559, -0.0403, -0.3403]) tensor([0.4711, 0.1253, 0.2319, 0.1718]) -Greedy action tensor([ 0.8965, -0.4691, -0.0588, -0.3533]) tensor([0.5191, 0.1325, 0.1997, 0.1488]) -Greedy action tensor([ 0.7130, -0.0303, 0.2591, -0.2830]) tensor([0.4032, 0.1917, 0.2561, 0.1489]) -Greedy action tensor([ 0.8723, -0.5474, -0.0647, -0.6224]) tensor([0.5382, 0.1301, 0.2109, 0.1207]) -Greedy action tensor([ 1.0225, -0.8259, -0.0359, -0.3287]) tensor([0.5671, 0.0893, 0.1968, 0.1468]) -Greedy action tensor([ 1.2386, -1.1066, 0.0377, -1.0522]) tensor([0.6676, 0.0640, 0.2009, 0.0675]) -Greedy action tensor([ 0.8125, -0.7389, 0.0617, -0.4436]) tensor([0.5079, 0.1077, 0.2398, 0.1446]) -Greedy action tensor([ 0.5636, -0.3114, -0.0214, -0.1132]) tensor([0.4029, 0.1679, 0.2245, 0.2048]) -Greedy action tensor([ 0.6324, -0.3111, -0.0919, -0.1457]) tensor([0.4286, 0.1668, 0.2077, 0.1968]) -Greedy action tensor([ 0.7465, -0.3265, -0.0329, -0.2671]) tensor([0.4622, 0.1581, 0.2120, 0.1677]) -Greedy action tensor([ 0.8179, -0.5159, -0.0527, -0.0914]) tensor([0.4796, 0.1264, 0.2008, 0.1932]) -Greedy action tensor([ 1.1191, -1.0064, 0.0289, -0.6666]) tensor([0.6161, 0.0735, 0.2071, 0.1033]) -Greedy action tensor([ 0.4436, -0.0801, 0.0195, -0.0204]) tensor([0.3478, 0.2060, 0.2276, 0.2187]) -Greedy action tensor([ 0.7329, -0.3371, -0.0699, -0.1861]) tensor([0.4566, 0.1566, 0.2046, 0.1822]) -Greedy action tensor([ 1.0580, -0.4760, -0.3064, -0.3931]) tensor([0.5863, 0.1265, 0.1498, 0.1374]) -Greedy action tensor([ 0.5991, -0.3637, -0.0718, -0.3171]) tensor([0.4361, 0.1665, 0.2229, 0.1745]) -Greedy action tensor([ 0.8609, -0.4880, -0.0425, -0.5180]) tensor([0.5218, 0.1354, 0.2114, 0.1314]) -Greedy action tensor([ 0.4142, -0.2611, -0.0744, -0.1544]) tensor([0.3719, 0.1893, 0.2282, 0.2106]) -Greedy action tensor([ 1.5786, -0.8847, 0.1148, -0.7238]) tensor([0.7060, 0.0601, 0.1633, 0.0706]) -Greedy action tensor([ 1.0446, -0.6805, 0.2634, -0.5731]) tensor([0.5451, 0.0971, 0.2496, 0.1081]) -Greedy action tensor([ 1.2739, -0.8305, -0.0386, -0.6406]) tensor([0.6500, 0.0792, 0.1749, 0.0958]) -Greedy action tensor([ 1.0451, -0.7852, 0.0883, -0.6538]) tensor([0.5789, 0.0928, 0.2224, 0.1059]) -Greedy action tensor([ 0.9725, -0.4322, 0.1823, -0.1636]) tensor([0.4950, 0.1215, 0.2246, 0.1589]) -Greedy action tensor([ 0.6996, -0.5203, -0.1366, -0.1827]) tensor([0.4668, 0.1378, 0.2023, 0.1932]) -Greedy action tensor([ 0.4502, -0.2346, -0.0710, -0.3852]) tensor([0.3950, 0.1992, 0.2345, 0.1713]) -Greedy action tensor([ 0.9393, -0.2528, 0.0585, -0.2409]) tensor([0.4938, 0.1499, 0.2046, 0.1517]) -Greedy action tensor([ 1.0790, -0.4945, -0.1516, -0.4839]) tensor([0.5852, 0.1213, 0.1709, 0.1226]) -Greedy action tensor([ 0.5441, -0.3019, 0.0071, -0.2758]) tensor([0.4075, 0.1749, 0.2382, 0.1795]) -Greedy action tensor([ 1.0160, -0.6346, -0.1353, -0.4263]) tensor([0.5732, 0.1100, 0.1813, 0.1355]) -Greedy action tensor([ 1.0347, -0.4522, -0.0572, -0.2363]) tensor([0.5428, 0.1227, 0.1822, 0.1523]) -Greedy action tensor([ 0.5809, -0.1739, -0.0412, -0.1174]) tensor([0.3993, 0.1877, 0.2144, 0.1986]) -Greedy action tensor([ 0.5082, -0.4134, -0.2751, -0.4195]) tensor([0.4444, 0.1768, 0.2031, 0.1757]) -Greedy action tensor([ 0.7849, -0.4822, -0.0621, -0.3088]) tensor([0.4889, 0.1377, 0.2096, 0.1638]) -Greedy action tensor([ 0.5888, -0.0860, -0.1012, -0.0339]) tensor([0.3926, 0.1999, 0.1969, 0.2106]) -Greedy action tensor([ 0.7014, -0.1049, -0.0335, -0.1489]) tensor([0.4249, 0.1897, 0.2038, 0.1816]) -Greedy action tensor([ 0.6549, -0.3443, -0.1262, -0.1734]) tensor([0.4419, 0.1627, 0.2024, 0.1930]) -Greedy action tensor([ 0.6034, -0.0485, -0.0361, -0.1921]) tensor([0.4000, 0.2084, 0.2110, 0.1805]) -Greedy action tensor([ 0.7100, -0.0169, 0.0113, 0.0519]) tensor([0.4002, 0.1935, 0.1990, 0.2073]) -Greedy action tensor([0.3817, 0.0019, 0.0836, 0.0788]) tensor([0.3160, 0.2161, 0.2345, 0.2334]) -Greedy action tensor([ 0.8180, -0.1935, 0.1557, -0.1344]) tensor([0.4415, 0.1606, 0.2276, 0.1703]) -Greedy action tensor([ 1.3232, -0.7155, -0.1130, -0.5136]) tensor([0.6547, 0.0852, 0.1557, 0.1043]) -Greedy action tensor([ 0.5368, -0.2964, -0.2015, 0.0453]) tensor([0.3961, 0.1722, 0.1893, 0.2423]) -Greedy action tensor([ 0.9548, -0.3492, -0.0287, -0.1235]) tensor([0.5036, 0.1367, 0.1883, 0.1713]) -Greedy action tensor([ 0.8125, -0.5026, -0.0259, -0.3110]) tensor([0.4936, 0.1325, 0.2134, 0.1605]) -Greedy action tensor([ 0.4517, -0.0068, -0.1669, -0.0089]) tensor([0.3569, 0.2256, 0.1923, 0.2252]) -Greedy action tensor([ 0.7928, -0.5967, 0.1085, -0.5712]) tensor([0.4977, 0.1240, 0.2511, 0.1272]) -Greedy action tensor([ 0.4470, -0.2322, -0.0534, -0.2099]) tensor([0.3800, 0.1927, 0.2304, 0.1970]) -Greedy action tensor([ 0.7146, -0.6572, 0.1067, -0.3865]) tensor([0.4693, 0.1191, 0.2555, 0.1561]) -Greedy action tensor([ 1.0993, -0.7045, 0.2015, -0.7290]) tensor([0.5771, 0.0950, 0.2352, 0.0927]) -Greedy action tensor([ 0.1369, -0.5064, 0.0792, 0.2799]) tensor([0.2760, 0.1450, 0.2605, 0.3184]) -Greedy action tensor([ 1.4742, -0.4734, 0.6700, 0.9973]) tensor([0.4523, 0.0645, 0.2024, 0.2807]) -Greedy action tensor([ 0.4217, -0.3064, -0.6862, 0.7745]) tensor([0.3090, 0.1492, 0.1020, 0.4397]) -Greedy action tensor([-0.0101, 0.6290, -0.2208, -1.0415]) tensor([0.2462, 0.4665, 0.1995, 0.0878]) -Greedy action tensor([ 0.3716, 0.1238, -1.0193, 0.6038]) tensor([0.3039, 0.2372, 0.0756, 0.3833]) -Greedy action tensor([-0.9266, -0.0632, -1.7916, 0.6572]) tensor([0.1154, 0.2736, 0.0486, 0.5624]) -Greedy action tensor([ 0.1557, 0.0241, 0.0645, -0.0856]) tensor([0.2797, 0.2452, 0.2553, 0.2197]) -Greedy action tensor([-0.4832, -0.4078, 0.3520, -0.3388]) tensor([0.1805, 0.1947, 0.4162, 0.2086]) -Greedy action tensor([ 0.0557, 0.2716, -0.1212, -1.0151]) tensor([0.2923, 0.3627, 0.2449, 0.1002]) -Greedy action tensor([-0.6011, -0.6710, -0.4662, 0.2286]) tensor([0.1862, 0.1737, 0.2131, 0.4270]) -Greedy action tensor([-0.0570, -2.1092, -0.1096, -0.1494]) tensor([0.3346, 0.0430, 0.3174, 0.3050]) -Greedy action tensor([-0.2055, -0.8060, 0.1406, -0.3922]) tensor([0.2637, 0.1447, 0.3728, 0.2188]) -Greedy action tensor([ 0.2552, -0.3907, -0.4975, -0.4160]) tensor([0.3990, 0.2091, 0.1880, 0.2039]) -Greedy action tensor([-0.2051, 0.0357, 1.1926, -0.8306]) tensor([0.1459, 0.1856, 0.5904, 0.0781]) -Greedy action tensor([ 0.1210, -0.3267, 0.5281, -0.4631]) tensor([0.2703, 0.1728, 0.4062, 0.1507]) -Greedy action tensor([ 0.1939, -1.1664, 0.2557, -0.1291]) tensor([0.3285, 0.0843, 0.3494, 0.2378]) -Greedy action tensor([-0.2395, -0.8302, 0.3897, 0.8949]) tensor([0.1529, 0.0847, 0.2869, 0.4755]) -Greedy action tensor([ 0.0122, 0.4824, -0.0360, 0.7754]) tensor([0.1755, 0.2808, 0.1672, 0.3764]) -Greedy action tensor([ 1.3489, -0.8343, 0.4704, 0.5952]) tensor([0.5003, 0.0564, 0.2078, 0.2355]) -Greedy action tensor([ 0.1139, -0.5440, 0.1261, -1.2217]) tensor([0.3580, 0.1854, 0.3624, 0.0942]) -Greedy action tensor([ 0.6261, -0.1654, 1.5629, 1.1740]) tensor([0.1744, 0.0790, 0.4450, 0.3016]) -Greedy action tensor([-0.4601, 0.7652, -0.5213, -0.8297]) tensor([0.1656, 0.5641, 0.1558, 0.1145]) -Greedy action tensor([ 0.0457, -1.4297, 0.1570, -0.6834]) tensor([0.3535, 0.0808, 0.3951, 0.1705]) -Greedy action tensor([-0.2747, -1.1320, -0.2395, 0.2075]) tensor([0.2451, 0.1040, 0.2539, 0.3970]) -Greedy action tensor([-0.4799, -0.7625, 0.7311, -0.2952]) tensor([0.1584, 0.1194, 0.5317, 0.1905]) -Greedy action tensor([ 0.0841, 0.0017, -0.0141, -0.5107]) tensor([0.2959, 0.2725, 0.2683, 0.1633]) -Greedy action tensor([ 0.2280, -1.0097, 0.0146, -0.2820]) tensor([0.3706, 0.1075, 0.2994, 0.2225]) -Greedy action tensor([-0.9485, 0.2913, -0.1883, 0.1343]) tensor([0.1048, 0.3619, 0.2240, 0.3093]) -Greedy action tensor([ 0.0155, -1.7443, -0.1617, 0.7377]) tensor([0.2458, 0.0423, 0.2059, 0.5060]) -Greedy action tensor([ 0.6452, -0.0823, -0.1718, -0.2679]) tensor([0.4299, 0.2077, 0.1899, 0.1725]) -Greedy action tensor([ 0.2617, -0.4951, 0.8569, -0.6267]) tensor([0.2707, 0.1270, 0.4909, 0.1114]) -Greedy action tensor([ 0.2736, -0.0396, -0.2277, -0.5176]) tensor([0.3584, 0.2620, 0.2171, 0.1625]) -Greedy action tensor([-0.0737, 0.3208, 0.0523, -0.4108]) tensor([0.2309, 0.3425, 0.2619, 0.1648]) -Greedy action tensor([ 0.3507, -1.3235, -0.3998, -0.7738]) tensor([0.5039, 0.0945, 0.2379, 0.1637]) -Greedy action tensor([ 0.2514, -0.7064, 0.5653, 0.0905]) tensor([0.2775, 0.1065, 0.3798, 0.2362]) -Greedy action tensor([ 0.1644, -0.8690, -0.2555, -0.4867]) tensor([0.3946, 0.1404, 0.2593, 0.2058]) -Greedy action tensor([-0.0291, -1.3261, -0.2489, 0.1913]) tensor([0.3010, 0.0823, 0.2416, 0.3752]) -Greedy action tensor([-0.6746, -1.2058, 0.4742, -0.2048]) tensor([0.1577, 0.0927, 0.4974, 0.2522]) -Greedy action tensor([ 0.1489, -0.8053, 0.5409, 1.1958]) tensor([0.1750, 0.0674, 0.2590, 0.4986]) -Greedy action tensor([ 0.2851, 0.4864, -0.8277, -0.3843]) tensor([0.3264, 0.3992, 0.1073, 0.1671]) -Greedy action tensor([ 0.2624, -1.7380, -0.5444, 0.3548]) tensor([0.3734, 0.0505, 0.1666, 0.4095]) -Greedy action tensor([ 0.0068, 0.6323, -0.3558, -0.5572]) tensor([0.2419, 0.4522, 0.1683, 0.1376]) -Greedy action tensor([-0.7336, -0.5934, 0.0885, 0.0301]) tensor([0.1522, 0.1751, 0.3462, 0.3266]) -Greedy action tensor([ 1.9795, -1.6828, 0.6323, 0.8635]) tensor([0.6199, 0.0159, 0.1612, 0.2031]) -Greedy action tensor([ 0.3326, -0.7644, -0.4773, 0.6240]) tensor([0.3208, 0.1071, 0.1427, 0.4294]) -Greedy action tensor([ 1.3817, -1.5252, 0.8506, 0.4378]) tensor([0.4922, 0.0269, 0.2894, 0.1915]) -Greedy action tensor([ 0.1805, -0.4894, 0.5736, -0.0102]) tensor([0.2618, 0.1340, 0.3879, 0.2163]) -Greedy action tensor([ 1.1734, -0.0827, 0.0113, -0.1653]) tensor([0.5377, 0.1531, 0.1682, 0.1410]) -Greedy action tensor([-0.9909, 0.5965, -0.0253, -1.2371]) tensor([0.1075, 0.5259, 0.2824, 0.0841]) -Greedy action tensor([-0.2522, -0.5805, 0.7877, -0.7998]) tensor([0.1950, 0.1404, 0.5517, 0.1128]) -Greedy action tensor([ 0.9995, -1.0477, 1.0537, 0.1690]) tensor([0.3816, 0.0493, 0.4028, 0.1663]) -Greedy action tensor([-0.4008, -0.5809, 0.2008, 0.5898]) tensor([0.1574, 0.1315, 0.2873, 0.4239]) -Greedy action tensor([-1.9283, -1.4704, 0.5646, -0.9914]) tensor([0.0580, 0.0918, 0.7021, 0.1481]) -Greedy action tensor([ 1.1110, -0.2793, -0.2155, 0.7291]) tensor([0.4552, 0.1133, 0.1208, 0.3107]) -Greedy action tensor([-0.5105, 0.1724, 0.9291, -1.9581]) tensor([0.1345, 0.2663, 0.5675, 0.0316]) -Greedy action tensor([ 0.1480, 0.4433, 0.5272, -0.6463]) tensor([0.2349, 0.3156, 0.3433, 0.1062]) -Greedy action tensor([ 0.2648, -0.6366, -0.0957, -0.5412]) tensor([0.3922, 0.1592, 0.2735, 0.1752]) -Greedy action tensor([-0.4914, -0.6886, -0.2931, 0.1106]) tensor([0.2055, 0.1687, 0.2506, 0.3752]) -Greedy action tensor([-1.7063, -0.6163, -0.5034, 0.9871]) tensor([0.0453, 0.1347, 0.1508, 0.6693]) -Greedy action tensor([-0.4507, -0.2984, 0.9241, -0.5330]) tensor([0.1420, 0.1654, 0.5617, 0.1308]) -Greedy action tensor([-1.1402, -0.9682, -0.6968, 0.5823]) tensor([0.1070, 0.1271, 0.1667, 0.5991]) -Greedy action tensor([ 1.4295, -1.1153, -0.1261, 1.0159]) tensor([0.5126, 0.0402, 0.1082, 0.3390]) -Greedy action tensor([-0.2334, 1.2752, -0.3295, -0.0120]) tensor([0.1303, 0.5889, 0.1183, 0.1626]) -Greedy action tensor([-0.8630, -1.2228, 1.3302, -0.4575]) tensor([0.0822, 0.0574, 0.7371, 0.1233]) -Greedy action tensor([ 1.1818, -0.2569, 0.5636, 1.4166]) tensor([0.3289, 0.0780, 0.1772, 0.4159]) -Greedy action tensor([-0.6138, -0.2027, 0.3480, -0.0402]) tensor([0.1449, 0.2186, 0.3792, 0.2572]) -Greedy action tensor([ 0.2905, 0.1623, -0.1650, -0.5230]) tensor([0.3382, 0.2975, 0.2144, 0.1499]) -Greedy action tensor([-0.0196, 0.2293, -0.4187, -0.4819]) tensor([0.2791, 0.3579, 0.1872, 0.1758]) -Greedy action tensor([ 0.2512, -0.5257, 0.0120, -0.8796]) tensor([0.3891, 0.1789, 0.3064, 0.1256]) -Greedy action tensor([ 0.0011, -0.9840, -0.0189, -0.8965]) tensor([0.3622, 0.1352, 0.3550, 0.1476]) -Greedy action tensor([-0.6464, -0.8299, -0.2883, -0.5697]) tensor([0.2303, 0.1917, 0.3294, 0.2486]) -Greedy action tensor([ 1.5542, -1.0043, 0.7636, 1.0169]) tensor([0.4727, 0.0366, 0.2144, 0.2762]) -Greedy action tensor([0.0546, 0.1322, 0.4992, 0.1008]) tensor([0.2133, 0.2305, 0.3328, 0.2234]) -Greedy action tensor([ 0.2195, 0.1598, 0.1083, -0.1426]) tensor([0.2830, 0.2666, 0.2533, 0.1971]) -Greedy action tensor([ 0.8397, -0.6208, 0.2582, -0.6011]) tensor([0.4931, 0.1145, 0.2757, 0.1167]) -Greedy action tensor([ 0.7631, -1.5723, 0.7820, -0.3205]) tensor([0.4074, 0.0394, 0.4152, 0.1379]) -Greedy action tensor([-0.4642, -1.2496, 0.4340, -1.0082]) tensor([0.2226, 0.1015, 0.5466, 0.1292]) -Greedy action tensor([-0.1030, 0.3586, 0.0253, -0.9653]) tensor([0.2412, 0.3827, 0.2742, 0.1018]) -Greedy action tensor([-0.0262, -0.4621, -1.1287, 0.0090]) tensor([0.3317, 0.2145, 0.1101, 0.3436]) -Greedy action tensor([-0.0624, -0.4046, -0.1549, -0.0530]) tensor([0.2754, 0.1956, 0.2511, 0.2780]) -Greedy action tensor([-0.4833, 0.0950, 0.5909, -0.6481]) tensor([0.1525, 0.2719, 0.4464, 0.1293]) -Greedy action tensor([ 1.4664, -0.2831, -0.7554, 0.3125]) tensor([0.6259, 0.1088, 0.0679, 0.1974]) -Greedy action tensor([ 2.4662, -1.1299, -0.3303, 0.4055]) tensor([0.8225, 0.0226, 0.0502, 0.1048]) -Greedy action tensor([ 1.5799, -0.8255, -0.0754, 0.7798]) tensor([0.5779, 0.0521, 0.1104, 0.2596]) -Greedy action tensor([ 1.6229, -0.3000, -0.7185, 0.1316]) tensor([0.6815, 0.0996, 0.0656, 0.1534]) -Greedy action tensor([ 1.2812, -0.4963, -0.4544, 0.6811]) tensor([0.5280, 0.0893, 0.0931, 0.2897]) -Greedy action tensor([ 1.8896, 0.4284, -0.4475, 0.2832]) tensor([0.6540, 0.1517, 0.0632, 0.1312]) -Greedy action tensor([ 1.3979, -0.0087, -0.2840, 0.2201]) tensor([0.5751, 0.1409, 0.1070, 0.1771]) -Greedy action tensor([ 1.3208, -0.4498, -0.5496, 0.5333]) tensor([0.5620, 0.0957, 0.0866, 0.2557]) -Greedy action tensor([ 1.3019, -0.0688, 0.2277, -0.0817]) tensor([0.5417, 0.1375, 0.1850, 0.1358]) -Greedy action tensor([ 1.2018, -0.0052, -0.6093, 0.2100]) tensor([0.5454, 0.1631, 0.0892, 0.2023]) -Greedy action tensor([ 2.3653, -0.7249, -0.5281, 0.5196]) tensor([0.7944, 0.0361, 0.0440, 0.1254]) -Greedy action tensor([ 1.7679, -0.8442, -0.2678, 0.4720]) tensor([0.6768, 0.0497, 0.0884, 0.1852]) -Greedy action tensor([ 2.0246, 0.1064, 0.1021, -0.0211]) tensor([0.7030, 0.1033, 0.1028, 0.0909]) -Greedy action tensor([ 0.7740, -0.1948, 0.0491, 0.0132]) tensor([0.4290, 0.1628, 0.2078, 0.2004]) -Greedy action tensor([ 1.1776, -0.4588, -0.0593, 0.2465]) tensor([0.5322, 0.1036, 0.1545, 0.2097]) -Greedy action tensor([ 1.0582, -0.0731, -0.7245, 0.6190]) tensor([0.4683, 0.1511, 0.0788, 0.3018]) -Greedy action tensor([ 1.7767, -0.6144, -1.2361, 0.1115]) tensor([0.7520, 0.0688, 0.0370, 0.1422]) -Greedy action tensor([ 1.4178, -0.6977, -0.1813, 0.2889]) tensor([0.6075, 0.0732, 0.1228, 0.1965]) -Greedy action tensor([ 1.4202, -0.9617, -0.1171, 0.2205]) tensor([0.6216, 0.0574, 0.1336, 0.1873]) -Greedy action tensor([ 1.0363, -0.3124, -0.4431, -0.0362]) tensor([0.5466, 0.1419, 0.1245, 0.1870]) -Greedy action tensor([ 1.7431, -0.1095, -0.3781, 0.4863]) tensor([0.6405, 0.1004, 0.0768, 0.1823]) -Greedy action tensor([ 1.8277, 0.3569, -0.1399, 0.2223]) tensor([0.6368, 0.1463, 0.0890, 0.1279]) -Greedy action tensor([ 1.5613, -0.2393, -0.7933, 0.0472]) tensor([0.6756, 0.1116, 0.0641, 0.1486]) -Greedy action tensor([ 1.5712, -0.6156, -0.2868, 0.3077]) tensor([0.6448, 0.0724, 0.1006, 0.1823]) -Greedy action tensor([ 1.4758, -0.0971, -0.3728, -0.1632]) tensor([0.6414, 0.1330, 0.1010, 0.1245]) -Greedy action tensor([ 1.0789, 0.0147, -0.4033, 0.0895]) tensor([0.5144, 0.1775, 0.1168, 0.1913]) -Greedy action tensor([ 1.4524, -0.1117, -0.2404, 0.4411]) tensor([0.5691, 0.1191, 0.1047, 0.2070]) -Greedy action tensor([ 1.4465, -0.4212, -0.5498, 0.1880]) tensor([0.6352, 0.0981, 0.0863, 0.1804]) -Greedy action tensor([ 2.2157, -0.9732, -0.1244, 0.6131]) tensor([0.7469, 0.0308, 0.0719, 0.1504]) -Greedy action tensor([ 2.0423, -1.3704, -0.0796, 0.3437]) tensor([0.7487, 0.0247, 0.0897, 0.1370]) -Greedy action tensor([ 2.0835, -1.0218, -0.1446, 0.2593]) tensor([0.7611, 0.0341, 0.0820, 0.1228]) -Greedy action tensor([ 1.8084, -0.9258, -0.2689, 0.3074]) tensor([0.7077, 0.0460, 0.0886, 0.1577]) -Greedy action tensor([ 1.6063, -0.5364, -0.7710, 0.0272]) tensor([0.7061, 0.0828, 0.0655, 0.1456]) -Greedy action tensor([ 1.6421, -0.2648, -0.5397, 0.4184]) tensor([0.6429, 0.0955, 0.0725, 0.1891]) -Greedy action tensor([ 1.3445, -0.2372, -0.6879, 0.4361]) tensor([0.5748, 0.1182, 0.0753, 0.2317]) -Greedy action tensor([ 1.9076, -0.7727, 0.0151, 0.4519]) tensor([0.6885, 0.0472, 0.1038, 0.1606]) -Greedy action tensor([ 1.4006, -0.8334, -0.5273, 0.4136]) tensor([0.6153, 0.0659, 0.0895, 0.2293]) -Greedy action tensor([ 1.2578, -0.5824, -0.2970, 0.7268]) tensor([0.5107, 0.0811, 0.1079, 0.3003]) -Greedy action tensor([ 1.5336, -0.5350, -0.2849, 0.2936]) tensor([0.6337, 0.0801, 0.1028, 0.1834]) -Greedy action tensor([ 1.4485, -0.2661, -0.1671, 0.4292]) tensor([0.5748, 0.1035, 0.1143, 0.2074]) -Greedy action tensor([ 1.4188, -0.3261, -0.7483, 0.3628]) tensor([0.6109, 0.1067, 0.0700, 0.2125]) -Greedy action tensor([ 1.3841, -0.6101, -0.1270, 0.3907]) tensor([0.5790, 0.0788, 0.1278, 0.2144]) -Greedy action tensor([ 1.0112, -0.3077, -0.0474, 0.4244]) tensor([0.4607, 0.1232, 0.1598, 0.2562]) -Greedy action tensor([ 1.9589, -1.1663, -0.0047, 0.2177]) tensor([0.7355, 0.0323, 0.1032, 0.1289]) -Greedy action tensor([ 1.0094, -0.5094, -0.2959, 0.3017]) tensor([0.5043, 0.1104, 0.1367, 0.2485]) -Greedy action tensor([ 1.7258, -0.7581, -0.1115, 0.0957]) tensor([0.6951, 0.0580, 0.1107, 0.1362]) -Greedy action tensor([ 1.6448, -0.4888, -0.3992, 0.2930]) tensor([0.6637, 0.0786, 0.0860, 0.1718]) -Greedy action tensor([ 1.6836, -0.7609, -0.3238, 0.3292]) tensor([0.6760, 0.0587, 0.0908, 0.1745]) -Greedy action tensor([ 1.1988, -0.3782, -0.6867, 0.2133]) tensor([0.5775, 0.1193, 0.0876, 0.2156]) -Greedy action tensor([ 1.3497, -0.2077, -0.8396, 0.3338]) tensor([0.5936, 0.1251, 0.0665, 0.2149]) -Greedy action tensor([ 0.7851, -0.2681, -0.2776, -0.0883]) tensor([0.4735, 0.1652, 0.1636, 0.1977]) -Greedy action tensor([ 1.8214, 0.2629, -0.3795, 0.5589]) tensor([0.6234, 0.1312, 0.0690, 0.1764]) -Greedy action tensor([ 1.3060, -0.3706, -0.4850, 0.2652]) tensor([0.5858, 0.1096, 0.0977, 0.2069]) -Greedy action tensor([ 0.7508, -0.5864, -0.4212, -0.1358]) tensor([0.5039, 0.1323, 0.1561, 0.2077]) -Greedy action tensor([ 1.6752, -0.1683, -0.6347, 0.7375]) tensor([0.6064, 0.0960, 0.0602, 0.2374]) -Greedy action tensor([ 1.2442, -0.4634, -0.5129, 0.4227]) tensor([0.5575, 0.1011, 0.0962, 0.2452]) -Greedy action tensor([ 2.0516, -0.4065, -0.5316, 0.3697]) tensor([0.7423, 0.0635, 0.0561, 0.1381]) -Greedy action tensor([ 2.7292, -0.8796, -0.1201, 0.5098]) tensor([0.8378, 0.0227, 0.0485, 0.0911]) -Greedy action tensor([ 1.4490, -0.2911, -0.8554, 0.3954]) tensor([0.6158, 0.1081, 0.0615, 0.2147]) -Greedy action tensor([ 1.4179, -0.2533, -0.2918, 0.5961]) tensor([0.5529, 0.1040, 0.1000, 0.2431]) -Greedy action tensor([ 1.7773, -0.3206, -0.3139, 0.1298]) tensor([0.6950, 0.0853, 0.0859, 0.1338]) -Greedy action tensor([ 1.4115, -0.3633, -0.4279, 0.3067]) tensor([0.6025, 0.1021, 0.0957, 0.1996]) -Greedy action tensor([ 1.4854, -0.7256, -0.0995, 0.2892]) tensor([0.6185, 0.0678, 0.1268, 0.1870]) -Greedy action tensor([ 1.8633, -1.0357, -0.1101, 0.7491]) tensor([0.6569, 0.0362, 0.0913, 0.2156]) -Greedy action tensor([ 1.4072, -0.7086, -0.1190, 0.2953]) tensor([0.5999, 0.0723, 0.1304, 0.1973]) -Greedy action tensor([ 0.9649, -0.5610, -0.1687, 0.5096]) tensor([0.4601, 0.1000, 0.1481, 0.2918]) -Greedy action tensor([ 1.4660, -0.0538, -1.0141, 0.3756]) tensor([0.6103, 0.1335, 0.0511, 0.2051]) -Greedy action tensor([ 1.9043, 0.1411, -0.4432, 0.5156]) tensor([0.6594, 0.1131, 0.0630, 0.1645]) -Greedy action tensor([ 1.7900, -1.2160, -0.6853, 0.2109]) tensor([0.7464, 0.0369, 0.0628, 0.1539]) -Greedy action tensor([ 1.3202, -0.4207, -0.5004, 0.2482]) tensor([0.5954, 0.1044, 0.0964, 0.2038]) -Greedy action tensor([ 1.7685, -0.5243, -0.0601, 0.6111]) tensor([0.6345, 0.0641, 0.1019, 0.1994]) -Greedy action tensor([ 1.9164, -1.1271, -0.5351, 0.3455]) tensor([0.7453, 0.0355, 0.0642, 0.1549]) -Greedy action tensor([ 1.8185, -0.8019, -0.4775, 0.4716]) tensor([0.6976, 0.0508, 0.0702, 0.1814]) -Greedy action tensor([ 1.0516, -0.5691, -0.4176, 0.4773]) tensor([0.5023, 0.0993, 0.1156, 0.2828]) -Greedy action tensor([ 1.2222, -0.0450, -0.7758, 0.5194]) tensor([0.5229, 0.1473, 0.0709, 0.2589]) -Greedy action tensor([ 1.5104, 0.0123, -0.8020, 0.3731]) tensor([0.6085, 0.1360, 0.0603, 0.1951]) -Greedy action tensor([ 3.0848, 1.0843, 0.1941, -0.0106]) tensor([0.8090, 0.1094, 0.0449, 0.0366]) -Greedy action tensor([ 1.5923, 0.2506, -0.4055, -0.1789]) tensor([0.6381, 0.1668, 0.0865, 0.1086]) -Greedy action tensor([ 1.7944, -0.9478, -0.1851, 0.6399]) tensor([0.6589, 0.0425, 0.0910, 0.2077]) -Greedy action tensor([ 1.3347, -0.2258, -0.3214, 0.1222]) tensor([0.5888, 0.1237, 0.1124, 0.1751]) -Greedy action tensor([ 1.1693, -0.3509, -0.0707, 0.1736]) tensor([0.5326, 0.1165, 0.1541, 0.1968]) -Greedy action tensor([-1.7892, -0.2519, 0.5906, -0.0558]) tensor([0.0452, 0.2104, 0.4885, 0.2559]) -Greedy action tensor([-1.9240, -0.4197, 0.6517, -0.1657]) tensor([0.0409, 0.1841, 0.5376, 0.2374]) -Greedy action tensor([-1.9204, -0.4123, 0.6477, -0.1642]) tensor([0.0411, 0.1855, 0.5356, 0.2378]) -Greedy action tensor([-1.4080, 0.0722, 0.4209, 0.0896]) tensor([0.0621, 0.2731, 0.3870, 0.2778]) -Greedy action tensor([-1.5514, -0.5152, 0.4824, 0.0279]) tensor([0.0613, 0.1728, 0.4685, 0.2974]) -Greedy action tensor([-0.7297, 0.7906, 0.1132, -0.0229]) tensor([0.1008, 0.4609, 0.2341, 0.2043]) -Greedy action tensor([-1.3572, 0.6861, 0.3556, -0.7413]) tensor([0.0621, 0.4789, 0.3441, 0.1149]) -Greedy action tensor([-1.9462, -0.4516, 0.6685, -0.1811]) tensor([0.0401, 0.1786, 0.5473, 0.2340]) -Greedy action tensor([-1.9078, -0.4496, 0.6954, -0.0987]) tensor([0.0401, 0.1725, 0.5422, 0.2451]) -Greedy action tensor([-1.7977, -0.4616, 0.5972, -0.1008]) tensor([0.0471, 0.1792, 0.5166, 0.2571]) -Greedy action tensor([-1.9332, -0.4372, 0.6631, -0.1745]) tensor([0.0405, 0.1809, 0.5435, 0.2352]) -Greedy action tensor([-1.9037, -0.4210, 0.6440, -0.1616]) tensor([0.0419, 0.1844, 0.5348, 0.2390]) -Greedy action tensor([-1.7597, -0.2833, 0.6011, -0.0358]) tensor([0.0463, 0.2028, 0.4911, 0.2598]) -Greedy action tensor([-0.7887, 0.9359, 0.0552, 0.3228]) tensor([0.0835, 0.4685, 0.1942, 0.2538]) -Greedy action tensor([-1.0695, -0.0167, 0.1470, -0.2198]) tensor([0.1044, 0.2991, 0.3523, 0.2441]) -Greedy action tensor([-1.9074, -0.4093, 0.6485, -0.1574]) tensor([0.0415, 0.1855, 0.5343, 0.2387]) -Greedy action tensor([-0.9645, -0.5571, 0.3718, 0.4422]) tensor([0.0962, 0.1446, 0.3662, 0.3929]) -Greedy action tensor([-1.8951, -0.4553, 0.6489, -0.1600]) tensor([0.0423, 0.1787, 0.5390, 0.2400]) -Greedy action tensor([-0.7758, 0.7167, 0.1005, 0.0127]) tensor([0.0995, 0.4426, 0.2390, 0.2189]) -Greedy action tensor([-1.8698, -0.4111, 0.6598, -0.1259]) tensor([0.0424, 0.1825, 0.5324, 0.2427]) -Greedy action tensor([-1.8899, -0.4337, 0.6382, -0.1545]) tensor([0.0426, 0.1826, 0.5334, 0.2414]) -Greedy action tensor([-1.9421, -0.4489, 0.6672, -0.1776]) tensor([0.0402, 0.1789, 0.5462, 0.2347]) -Greedy action tensor([-1.3638, 0.6894, 0.3326, -0.1371]) tensor([0.0566, 0.4413, 0.3089, 0.1931]) -Greedy action tensor([-1.1569, 0.1854, 0.2678, 0.2162]) tensor([0.0773, 0.2960, 0.3214, 0.3053]) -Greedy action tensor([-1.9427, -0.4458, 0.6661, -0.1789]) tensor([0.0402, 0.1795, 0.5458, 0.2345]) -Greedy action tensor([-1.7544, -0.3797, 0.6147, -0.2353]) tensor([0.0495, 0.1956, 0.5288, 0.2260]) -Greedy action tensor([-1.6774, -0.5081, 0.5183, -0.0077]) tensor([0.0540, 0.1739, 0.4853, 0.2868]) -Greedy action tensor([-1.6271, 0.1428, 0.4296, 0.0523]) tensor([0.0499, 0.2927, 0.3900, 0.2674]) -Greedy action tensor([-1.7944, -0.3855, 0.5809, -0.0908]) tensor([0.0469, 0.1917, 0.5040, 0.2574]) -Greedy action tensor([-0.6792, 0.7648, 0.2360, 0.4970]) tensor([0.0911, 0.3860, 0.2275, 0.2954]) -Greedy action tensor([-1.6585, -0.3120, 0.5045, -0.0658]) tensor([0.0542, 0.2083, 0.4712, 0.2664]) -Greedy action tensor([-1.8293, 0.2123, 0.5072, -0.1069]) tensor([0.0406, 0.3125, 0.4197, 0.2271]) -Greedy action tensor([-1.7734, -0.4517, 0.5888, -0.0625]) tensor([0.0479, 0.1794, 0.5079, 0.2648]) -Greedy action tensor([-1.9179, -0.2550, 0.6228, -0.1632]) tensor([0.0404, 0.2132, 0.5128, 0.2337]) -Greedy action tensor([-0.1263, 1.0756, -0.0219, 0.0980]) tensor([0.1495, 0.4974, 0.1660, 0.1871]) -Greedy action tensor([-1.8620, -0.3567, 0.6110, -0.1384]) tensor([0.0435, 0.1962, 0.5163, 0.2440]) -Greedy action tensor([-1.9133, -0.4169, 0.6392, -0.1590]) tensor([0.0415, 0.1854, 0.5331, 0.2400]) -Greedy action tensor([-1.9295, -0.4033, 0.6550, -0.1656]) tensor([0.0405, 0.1863, 0.5369, 0.2363]) -Greedy action tensor([-1.8518, -0.4336, 0.6162, -0.1386]) tensor([0.0445, 0.1837, 0.5250, 0.2468]) -Greedy action tensor([-1.0790, 0.0089, 0.5769, 0.1712]) tensor([0.0788, 0.2338, 0.4125, 0.2750]) -Greedy action tensor([-1.8998, -0.3687, 0.4815, -0.2585]) tensor([0.0463, 0.2140, 0.5008, 0.2389]) -Greedy action tensor([-1.9349, -0.4103, 0.6530, -0.1771]) tensor([0.0405, 0.1860, 0.5387, 0.2348]) -Greedy action tensor([-1.7777, -0.3780, 0.5990, -0.0804]) tensor([0.0470, 0.1905, 0.5060, 0.2565]) -Greedy action tensor([-1.8651, -0.4135, 0.6198, -0.1676]) tensor([0.0440, 0.1879, 0.5279, 0.2402]) -Greedy action tensor([-1.8527, -0.4429, 0.6211, -0.1355]) tensor([0.0444, 0.1818, 0.5267, 0.2472]) -Greedy action tensor([-1.9130, -0.4013, 0.6499, -0.1635]) tensor([0.0412, 0.1869, 0.5348, 0.2371]) -Greedy action tensor([-1.7211, 0.3565, 0.4719, -0.1677]) tensor([0.0441, 0.3522, 0.3953, 0.2085]) -Greedy action tensor([-1.9347, -0.3957, 0.6536, -0.1726]) tensor([0.0403, 0.1880, 0.5368, 0.2350]) -Greedy action tensor([-1.8799, -0.1518, 0.5894, -0.1324]) tensor([0.0413, 0.2328, 0.4885, 0.2373]) -Greedy action tensor([-1.9084, -0.4457, 0.6471, -0.1564]) tensor([0.0417, 0.1802, 0.5374, 0.2406]) -Greedy action tensor([-1.6854, -0.4022, 0.5233, -0.0518]) tensor([0.0531, 0.1916, 0.4834, 0.2720]) -Greedy action tensor([-1.9467, -0.4510, 0.6687, -0.1817]) tensor([0.0400, 0.1787, 0.5474, 0.2339]) -Greedy action tensor([-1.6652, -0.4130, 0.5343, -0.0193]) tensor([0.0535, 0.1870, 0.4823, 0.2772]) -Greedy action tensor([-1.8928, -0.3602, 0.6228, -0.1536]) tensor([0.0422, 0.1954, 0.5222, 0.2402]) -Greedy action tensor([-1.8855, -0.4096, 0.6220, -0.1465]) tensor([0.0428, 0.1874, 0.5259, 0.2438]) -Greedy action tensor([-1.8971, -0.4118, 0.6380, -0.1604]) tensor([0.0422, 0.1862, 0.5321, 0.2395]) -Greedy action tensor([-1.9117, -0.4366, 0.6464, -0.1643]) tensor([0.0416, 0.1820, 0.5375, 0.2389]) -Greedy action tensor([-1.4581, -0.1684, 0.4606, -0.1611]) tensor([0.0662, 0.2405, 0.4511, 0.2422]) -Greedy action tensor([-0.8580, -0.0872, 0.1481, -0.0690]) tensor([0.1235, 0.2669, 0.3377, 0.2718]) -Greedy action tensor([-0.3699, 0.0786, 0.7751, 1.1649]) tensor([0.0966, 0.1513, 0.3037, 0.4484]) -Greedy action tensor([-1.5226, -0.1348, 0.4125, -0.0376]) tensor([0.0612, 0.2451, 0.4237, 0.2701]) -Greedy action tensor([-1.8764, -0.3618, 0.6230, -0.1297]) tensor([0.0426, 0.1939, 0.5190, 0.2445]) -Greedy action tensor([-1.8920, -0.4349, 0.6409, -0.1584]) tensor([0.0425, 0.1824, 0.5347, 0.2404]) -Greedy action tensor([-1.9246, -0.4135, 0.6570, -0.1691]) tensor([0.0408, 0.1847, 0.5387, 0.2358]) -Greedy action tensor([-1.9249, -0.4229, 0.6562, -0.1664]) tensor([0.0408, 0.1832, 0.5391, 0.2368]) -Greedy action tensor([-1.8729, -0.4066, 0.6282, -0.1446]) tensor([0.0432, 0.1871, 0.5266, 0.2431]) -Greedy action tensor([-1.2558, 0.1767, 0.3369, 0.1325]) tensor([0.0709, 0.2968, 0.3484, 0.2840]) -Greedy action tensor([-1.8462, -0.4233, 0.6165, -0.1487]) tensor([0.0448, 0.1857, 0.5252, 0.2444]) -Greedy action tensor([-1.9218, -0.4071, 0.6524, -0.1672]) tensor([0.0409, 0.1860, 0.5367, 0.2364]) -Greedy action tensor([-1.6465, -0.4541, 0.7534, 0.3276]) tensor([0.0444, 0.1463, 0.4895, 0.3198]) -Greedy action tensor([-1.9165, -0.3737, 0.6421, -0.1597]) tensor([0.0410, 0.1918, 0.5296, 0.2376]) -Greedy action tensor([-1.9342, -0.4545, 0.6661, -0.1743]) tensor([0.0405, 0.1780, 0.5459, 0.2356]) -Greedy action tensor([-1.7488, -0.5110, 0.5599, -0.0634]) tensor([0.0502, 0.1732, 0.5055, 0.2710]) -Greedy action tensor([-1.7803, -0.1166, 0.5513, -0.0457]) tensor([0.0450, 0.2373, 0.4629, 0.2548]) -Greedy action tensor([-1.8776, -0.4392, 0.6341, -0.1519]) tensor([0.0432, 0.1820, 0.5323, 0.2425]) -Greedy action tensor([-1.9031, -0.4476, 0.6515, -0.1574]) tensor([0.0419, 0.1795, 0.5387, 0.2399]) -Greedy action tensor([-1.8706, -0.0909, 0.5843, -0.1629]) tensor([0.0415, 0.2461, 0.4834, 0.2290]) -Greedy action tensor([-1.7402, -0.5086, 0.5472, -0.0857]) tensor([0.0513, 0.1757, 0.5049, 0.2681]) -Greedy action tensor([-1.8856, -0.2459, 0.6163, -0.1512]) tensor([0.0416, 0.2145, 0.5080, 0.2358]) -Greedy action tensor([-1.7084, -0.2972, 0.5383, -0.1169]) tensor([0.0514, 0.2106, 0.4857, 0.2522]) -Greedy action tensor([-1.9021, -0.3615, 0.6295, -0.1643]) tensor([0.0418, 0.1951, 0.5255, 0.2376]) -Greedy action tensor([ 0.7151, -0.9750, -0.0745, -0.2464]) tensor([0.4948, 0.0913, 0.2247, 0.1892]) -Greedy action tensor([ 0.6793, -0.2208, 0.0205, -0.0582]) tensor([0.4163, 0.1692, 0.2154, 0.1991]) -Greedy action tensor([ 0.6106, -0.0044, -0.1393, 0.0717]) tensor([0.3852, 0.2082, 0.1819, 0.2247]) -Greedy action tensor([ 0.8920, -0.7258, 0.0923, -0.3996]) tensor([0.5201, 0.1032, 0.2338, 0.1429]) -Greedy action tensor([ 0.5471, 0.2501, -0.1444, 0.1819]) tensor([0.3404, 0.2529, 0.1705, 0.2362]) -Greedy action tensor([ 0.7981, -0.7275, 0.1528, -0.4472]) tensor([0.4927, 0.1071, 0.2584, 0.1418]) -Greedy action tensor([ 0.7357, -0.4382, 0.0331, -0.3269]) tensor([0.4651, 0.1438, 0.2304, 0.1607]) -Greedy action tensor([ 0.5078, -0.2964, -0.0418, -0.2769]) tensor([0.4031, 0.1803, 0.2327, 0.1839]) -Greedy action tensor([ 0.7645, -0.5130, -0.0900, -0.2069]) tensor([0.4801, 0.1338, 0.2043, 0.1817]) -Greedy action tensor([ 0.4866, -0.1309, 0.0652, -0.1462]) tensor([0.3668, 0.1978, 0.2406, 0.1948]) -Greedy action tensor([ 0.8780, -0.2445, -0.0301, -0.2089]) tensor([0.4840, 0.1575, 0.1952, 0.1632]) -Greedy action tensor([ 0.8908, -0.5812, -0.0703, -0.2762]) tensor([0.5200, 0.1193, 0.1989, 0.1619]) -Greedy action tensor([ 0.6442, -0.4995, -0.1609, -0.3772]) tensor([0.4704, 0.1499, 0.2103, 0.1694]) -Greedy action tensor([ 0.8543, -0.7621, 0.0734, -0.4617]) tensor([0.5195, 0.1032, 0.2379, 0.1393]) -Greedy action tensor([ 0.9297, -0.1139, -0.2259, -0.1698]) tensor([0.5000, 0.1761, 0.1574, 0.1665]) -Greedy action tensor([ 0.7084, -0.3786, -0.1345, -0.2632]) tensor([0.4659, 0.1571, 0.2006, 0.1763]) -Greedy action tensor([ 0.7071, -0.4246, -0.0439, -0.3017]) tensor([0.4632, 0.1494, 0.2186, 0.1689]) -Greedy action tensor([ 0.4114, -0.0750, -0.0822, -0.0948]) tensor([0.3536, 0.2174, 0.2158, 0.2131]) -Greedy action tensor([ 0.6791, -0.2932, 0.1137, -0.2008]) tensor([0.4235, 0.1602, 0.2406, 0.1757]) -Greedy action tensor([ 1.0071, -0.4787, -0.1617, -0.3600]) tensor([0.5581, 0.1263, 0.1734, 0.1422]) -Greedy action tensor([ 0.7884, -0.3261, 0.0764, -0.1318]) tensor([0.4510, 0.1480, 0.2213, 0.1797]) -Greedy action tensor([ 0.1862, -0.1012, 0.0032, -0.2201]) tensor([0.3078, 0.2309, 0.2563, 0.2050]) -Greedy action tensor([ 0.6437, 0.1274, -0.0697, 0.1918]) tensor([0.3672, 0.2191, 0.1799, 0.2337]) -Greedy action tensor([ 1.3163, -0.9138, 0.0387, -0.7483]) tensor([0.6609, 0.0711, 0.1842, 0.0838]) -Greedy action tensor([ 0.7291, -0.6304, 0.0564, -0.5392]) tensor([0.4882, 0.1254, 0.2491, 0.1373]) -Greedy action tensor([ 1.0130, -0.7754, -0.0368, -0.4263]) tensor([0.5700, 0.0953, 0.1995, 0.1351]) -Greedy action tensor([ 0.3632, -0.2125, -0.0021, -0.2537]) tensor([0.3576, 0.2011, 0.2482, 0.1930]) -Greedy action tensor([ 0.8027, -0.4094, 0.0446, -0.4585]) tensor([0.4880, 0.1452, 0.2286, 0.1382]) -Greedy action tensor([ 0.8706, -0.6622, 0.0170, -0.3342]) tensor([0.5150, 0.1112, 0.2194, 0.1544]) -Greedy action tensor([ 0.9993, -0.6004, -0.0685, -0.6876]) tensor([0.5778, 0.1167, 0.1986, 0.1069]) -Greedy action tensor([ 1.0381, -0.6567, -0.1930, -0.4428]) tensor([0.5872, 0.1078, 0.1714, 0.1335]) -Greedy action tensor([ 0.7474, -0.3767, -0.0803, -0.2957]) tensor([0.4730, 0.1537, 0.2067, 0.1666]) -Greedy action tensor([ 0.3529, 0.1913, 0.0257, -0.1089]) tensor([0.3123, 0.2657, 0.2252, 0.1968]) -Greedy action tensor([ 0.8697, -0.6859, 0.0442, -0.3901]) tensor([0.5174, 0.1092, 0.2266, 0.1468]) -Greedy action tensor([ 0.9588, -0.6863, -0.1339, -0.2772]) tensor([0.5498, 0.1061, 0.1844, 0.1597]) -Greedy action tensor([ 0.5595, 0.1715, -0.1307, 0.0582]) tensor([0.3590, 0.2435, 0.1800, 0.2175]) -Greedy action tensor([ 0.5127, -0.1002, 0.0854, -0.4682]) tensor([0.3892, 0.2109, 0.2539, 0.1460]) -Greedy action tensor([ 0.3374, 0.5073, -0.2346, 0.0984]) tensor([0.2827, 0.3351, 0.1596, 0.2226]) -Greedy action tensor([ 0.4092, -0.0473, -0.0184, -0.0155]) tensor([0.3402, 0.2155, 0.2218, 0.2225]) -Greedy action tensor([ 0.5880, 0.0108, -0.0944, -0.0090]) tensor([0.3821, 0.2145, 0.1931, 0.2103]) -Greedy action tensor([ 0.4379, 0.3676, -0.1870, 0.1294]) tensor([0.3123, 0.2911, 0.1672, 0.2294]) -Greedy action tensor([ 0.6481, 0.0354, -0.0354, 0.0523]) tensor([0.3849, 0.2086, 0.1943, 0.2121]) -Greedy action tensor([ 0.9070, -0.2226, -0.1458, -0.3763]) tensor([0.5130, 0.1658, 0.1790, 0.1422]) -Greedy action tensor([ 0.3950, 0.1243, -0.1769, -0.1378]) tensor([0.3431, 0.2618, 0.1937, 0.2014]) -Greedy action tensor([ 0.8185, -0.3951, 0.0435, -0.0521]) tensor([0.4594, 0.1365, 0.2117, 0.1924]) -Greedy action tensor([ 0.8358, -0.4893, -0.0861, -0.2382]) tensor([0.4987, 0.1325, 0.1984, 0.1704]) -Greedy action tensor([ 0.8040, -0.1191, -0.1182, -0.0736]) tensor([0.4524, 0.1797, 0.1799, 0.1881]) -Greedy action tensor([ 0.7045, -0.2315, 0.0535, -0.1099]) tensor([0.4243, 0.1664, 0.2213, 0.1879]) -Greedy action tensor([ 0.3906, 0.0838, 0.0389, -0.1491]) tensor([0.3309, 0.2435, 0.2328, 0.1929]) -Greedy action tensor([ 0.8022, -0.4668, 0.0276, -0.4368]) tensor([0.4922, 0.1384, 0.2268, 0.1426]) -Greedy action tensor([ 0.6215, -0.6421, 0.0985, -0.6469]) tensor([0.4637, 0.1311, 0.2748, 0.1304]) -Greedy action tensor([ 0.6688, -0.4294, -0.1298, -0.5895]) tensor([0.4836, 0.1613, 0.2176, 0.1374]) -Greedy action tensor([ 0.6501, -0.3298, -0.0689, -0.3774]) tensor([0.4504, 0.1690, 0.2194, 0.1612]) -Greedy action tensor([ 0.2589, -0.2346, -0.2016, -0.3995]) tensor([0.3624, 0.2213, 0.2287, 0.1876]) -Greedy action tensor([ 0.6055, -0.4132, 0.0113, -0.3310]) tensor([0.4338, 0.1566, 0.2395, 0.1701]) -Greedy action tensor([ 0.7750, -0.5405, -0.0820, -0.2440]) tensor([0.4869, 0.1307, 0.2067, 0.1758]) -Greedy action tensor([ 0.6182, -0.4563, -0.2129, -0.1729]) tensor([0.4483, 0.1531, 0.1953, 0.2033]) -Greedy action tensor([ 1.5176, -0.9830, -0.1368, -0.6739]) tensor([0.7220, 0.0592, 0.1381, 0.0807]) -Greedy action tensor([ 1.1959, -1.0100, 0.1689, -0.5829]) tensor([0.6108, 0.0673, 0.2187, 0.1031]) -Greedy action tensor([ 0.7307, -0.0590, -0.0785, -0.4545]) tensor([0.4535, 0.2059, 0.2019, 0.1386]) -Greedy action tensor([ 0.7731, -0.4219, -0.1469, -0.2331]) tensor([0.4838, 0.1465, 0.1928, 0.1769]) -Greedy action tensor([ 0.2829, -0.0495, -0.0728, 0.0609]) tensor([0.3107, 0.2228, 0.2177, 0.2488]) -Greedy action tensor([ 1.0541, -0.5960, 0.0169, -0.3325]) tensor([0.5567, 0.1069, 0.1973, 0.1391]) -Greedy action tensor([ 0.8591, -0.3256, -0.0241, -0.2781]) tensor([0.4902, 0.1499, 0.2027, 0.1572]) -Greedy action tensor([ 0.4025, -0.2853, -0.1825, -0.0296]) tensor([0.3692, 0.1856, 0.2057, 0.2396]) -Greedy action tensor([ 0.5365, 0.1474, -0.0855, 0.0823]) tensor([0.3509, 0.2378, 0.1884, 0.2228]) -Greedy action tensor([ 1.2534, -0.5353, -0.2094, -0.4120]) tensor([0.6298, 0.1053, 0.1458, 0.1191]) -Greedy action tensor([ 0.8771, -0.5863, -0.0309, -0.2627]) tensor([0.5116, 0.1184, 0.2063, 0.1637]) -Greedy action tensor([ 0.6811, -0.1900, -0.0694, -0.1452]) tensor([0.4295, 0.1797, 0.2028, 0.1880]) -Greedy action tensor([ 1.1562, -0.6796, 0.0827, -0.7743]) tensor([0.6074, 0.0969, 0.2076, 0.0881]) -Greedy action tensor([ 0.8379, -0.5957, 0.1143, -0.4927]) tensor([0.5031, 0.1200, 0.2440, 0.1330]) -Greedy action tensor([ 0.2961, 0.1036, -0.0631, -0.2919]) tensor([0.3248, 0.2679, 0.2268, 0.1804]) -Greedy action tensor([ 0.5914, -0.2820, 0.0048, -0.1054]) tensor([0.4045, 0.1689, 0.2250, 0.2015]) -Greedy action tensor([ 0.9028, -0.6315, -0.0062, -0.4435]) tensor([0.5323, 0.1148, 0.2145, 0.1385]) -Greedy action tensor([ 0.8306, -0.3382, -0.0229, -0.0928]) tensor([0.4686, 0.1456, 0.1996, 0.1861]) -Greedy action tensor([ 0.8971, -0.9385, -0.0292, -0.5923]) tensor([0.5615, 0.0896, 0.2224, 0.1266]) -Greedy action tensor([ 0.6320, -0.3040, -0.1219, -0.3945]) tensor([0.4503, 0.1766, 0.2118, 0.1613]) -Greedy action tensor([ 0.7848, -0.4757, -0.1018, -0.2995]) tensor([0.4917, 0.1394, 0.2026, 0.1663]) -Greedy action tensor([ 0.8436, -0.7397, -0.0253, -0.6128]) tensor([0.5383, 0.1105, 0.2258, 0.1255]) -Greedy action tensor([ 0.3178, 0.1112, 0.0231, -0.2383]) tensor([0.3193, 0.2597, 0.2378, 0.1831]) -Greedy action tensor([ 0.3057, -0.0986, -0.0808, -0.5316]) tensor([0.3597, 0.2401, 0.2444, 0.1557]) -Greedy action tensor([ 1.0723, -0.3262, -0.3283, 0.3346]) tensor([0.5072, 0.1253, 0.1250, 0.2426]) -Greedy action tensor([ 1.3122, -0.1846, -0.1424, 0.3507]) tensor([0.5436, 0.1217, 0.1269, 0.2078]) -Greedy action tensor([ 1.5925, -0.2814, -0.2335, 0.2329]) tensor([0.6364, 0.0977, 0.1025, 0.1634]) -Greedy action tensor([ 1.9474, -0.7882, -0.5709, -0.0475]) tensor([0.7803, 0.0506, 0.0629, 0.1061]) -Greedy action tensor([ 1.3319, -0.7374, -0.6775, -0.4024]) tensor([0.6960, 0.0879, 0.0933, 0.1228]) -Greedy action tensor([ 1.3751, -0.1460, -0.4512, 0.2557]) tensor([0.5862, 0.1281, 0.0944, 0.1914]) -Greedy action tensor([ 1.5254, -0.2013, -0.5594, 0.2347]) tensor([0.6340, 0.1128, 0.0788, 0.1744]) -Greedy action tensor([ 1.7756, 0.3451, -0.0880, 0.6737]) tensor([0.5792, 0.1385, 0.0898, 0.1924]) -Greedy action tensor([ 1.0669, -0.1527, -0.5735, 0.0364]) tensor([0.5417, 0.1600, 0.1050, 0.1933]) -Greedy action tensor([ 1.3438, 0.2340, -0.2134, 0.0640]) tensor([0.5499, 0.1813, 0.1159, 0.1529]) -Greedy action tensor([ 1.6199, -0.2967, -0.7894, -0.0397]) tensor([0.7007, 0.1031, 0.0630, 0.1333]) -Greedy action tensor([ 1.6716, -0.5963, -0.3630, 0.5788]) tensor([0.6371, 0.0660, 0.0833, 0.2136]) -Greedy action tensor([ 1.3769, -0.5248, -0.5791, 0.0328]) tensor([0.6445, 0.0962, 0.0912, 0.1681]) -Greedy action tensor([ 1.3318, -0.5383, -0.4979, 0.1452]) tensor([0.6173, 0.0951, 0.0991, 0.1885]) -Greedy action tensor([ 1.9066, 0.3226, -0.4248, 0.1013]) tensor([0.6818, 0.1399, 0.0662, 0.1121]) -Greedy action tensor([ 1.3285, -0.6051, -0.3466, 0.3194]) tensor([0.5895, 0.0853, 0.1104, 0.2149]) -Greedy action tensor([ 1.3565, -0.5808, -0.6275, 0.0200]) tensor([0.6475, 0.0933, 0.0890, 0.1701]) -Greedy action tensor([ 2.0133, -0.7180, -0.3920, 0.7498]) tensor([0.6954, 0.0453, 0.0628, 0.1966]) -Greedy action tensor([ 1.3882, -0.0591, -0.5308, 0.0495]) tensor([0.6082, 0.1431, 0.0893, 0.1595]) -Greedy action tensor([ 1.3895, -0.5361, -0.6202, 0.2768]) tensor([0.6217, 0.0906, 0.0833, 0.2043]) -Greedy action tensor([ 1.1014, -0.2114, -0.6688, 0.6783]) tensor([0.4775, 0.1285, 0.0813, 0.3127]) -Greedy action tensor([ 1.8484, -0.4759, -0.4727, 0.2761]) tensor([0.7125, 0.0697, 0.0699, 0.1479]) -Greedy action tensor([ 1.7137, -0.9151, -0.8437, 0.2581]) tensor([0.7231, 0.0522, 0.0560, 0.1687]) -Greedy action tensor([ 2.0835, -0.1232, -0.4092, 0.3523]) tensor([0.7300, 0.0803, 0.0604, 0.1293]) -Greedy action tensor([ 1.2545, 0.0681, -1.0216, 0.0788]) tensor([0.5825, 0.1779, 0.0598, 0.1798]) -Greedy action tensor([ 1.1191, -0.3005, -0.2976, 0.0124]) tensor([0.5510, 0.1332, 0.1336, 0.1822]) -Greedy action tensor([ 1.5410, -0.6615, -0.4265, 0.5501]) tensor([0.6167, 0.0682, 0.0862, 0.2289]) -Greedy action tensor([ 1.2189, 0.0234, -0.8566, -0.0098]) tensor([0.5812, 0.1758, 0.0729, 0.1701]) -Greedy action tensor([ 1.7944, -0.3662, -0.4859, 0.1199]) tensor([0.7118, 0.0820, 0.0728, 0.1334]) -Greedy action tensor([ 1.6834, -0.3316, -0.8313, 0.2391]) tensor([0.6896, 0.0919, 0.0558, 0.1627]) -Greedy action tensor([ 1.5308, -0.5248, -0.0235, 0.5247]) tensor([0.5865, 0.0751, 0.1239, 0.2145]) -Greedy action tensor([ 1.7942, -0.5590, -1.0256, 0.3825]) tensor([0.7151, 0.0680, 0.0426, 0.1743]) -Greedy action tensor([ 1.5426, -0.0091, -0.3789, 0.1303]) tensor([0.6243, 0.1323, 0.0914, 0.1521]) -Greedy action tensor([ 1.1577, -0.3558, -0.4386, 0.1867]) tensor([0.5551, 0.1222, 0.1125, 0.2102]) -Greedy action tensor([ 2.3698, 0.9892, 0.4175, -0.3469]) tensor([0.6852, 0.1723, 0.0973, 0.0453]) -Greedy action tensor([ 2.1077, -0.9556, 0.3072, 0.6821]) tensor([0.6886, 0.0322, 0.1138, 0.1655]) -Greedy action tensor([ 1.3791, -0.2534, -0.5101, 0.2556]) tensor([0.5982, 0.1169, 0.0904, 0.1945]) -Greedy action tensor([ 1.0286, -0.3386, -0.9581, 0.4474]) tensor([0.5125, 0.1306, 0.0703, 0.2866]) -Greedy action tensor([ 0.6197, -0.1945, -0.0568, -0.0014]) tensor([0.4018, 0.1780, 0.2043, 0.2159]) -Greedy action tensor([ 0.8771, -0.1019, -0.1324, 0.4881]) tensor([0.4136, 0.1554, 0.1507, 0.2803]) -Greedy action tensor([ 1.5461, -0.6179, -0.8895, 0.1226]) tensor([0.6929, 0.0796, 0.0607, 0.1669]) -Greedy action tensor([ 1.5676, -0.4982, -1.0485, 0.1865]) tensor([0.6891, 0.0873, 0.0504, 0.1732]) -Greedy action tensor([ 1.0540, -0.3546, -0.5322, 0.6073]) tensor([0.4787, 0.1170, 0.0980, 0.3063]) -Greedy action tensor([ 1.6728, -0.7208, -0.2812, 0.1111]) tensor([0.6931, 0.0633, 0.0982, 0.1454]) -Greedy action tensor([ 1.5183, -0.2810, -0.5969, 0.3150]) tensor([0.6304, 0.1043, 0.0760, 0.1893]) -Greedy action tensor([ 1.5288, -0.8965, -0.3866, 0.3231]) tensor([0.6514, 0.0576, 0.0959, 0.1951]) -Greedy action tensor([ 1.0692, -0.0136, -0.0180, 0.3828]) tensor([0.4589, 0.1554, 0.1547, 0.2310]) -Greedy action tensor([ 1.4501, -0.7158, -0.2423, 0.6016]) tensor([0.5791, 0.0664, 0.1066, 0.2479]) -Greedy action tensor([ 1.6667, -0.0435, -0.1523, 0.1912]) tensor([0.6363, 0.1150, 0.1032, 0.1455]) -Greedy action tensor([ 1.2758, -0.4816, -0.4665, 0.1270]) tensor([0.6007, 0.1036, 0.1052, 0.1904]) -Greedy action tensor([ 1.6393, -0.7182, 0.0941, 0.6082]) tensor([0.6008, 0.0569, 0.1281, 0.2142]) -Greedy action tensor([ 0.7203, -0.2203, -0.0505, 0.0688]) tensor([0.4212, 0.1644, 0.1948, 0.2195]) -Greedy action tensor([ 1.6874, -0.5626, -0.7653, 0.3696]) tensor([0.6853, 0.0722, 0.0590, 0.1835]) -Greedy action tensor([ 1.8973, 0.6091, -0.1305, 0.1026]) tensor([0.6355, 0.1752, 0.0836, 0.1056]) -Greedy action tensor([ 1.4633, -0.2897, -0.8103, -0.0637]) tensor([0.6696, 0.1160, 0.0689, 0.1454]) -Greedy action tensor([ 1.5854, -0.7449, 0.0103, 0.3941]) tensor([0.6219, 0.0605, 0.1287, 0.1889]) -Greedy action tensor([ 1.2323, -0.5499, -0.4984, 0.1443]) tensor([0.5944, 0.1000, 0.1053, 0.2003]) -Greedy action tensor([ 1.2192, -0.2445, -0.4795, 0.3122]) tensor([0.5500, 0.1273, 0.1006, 0.2221]) -Greedy action tensor([ 1.2294, -1.0266, -0.2118, 0.0582]) tensor([0.6055, 0.0634, 0.1433, 0.1877]) -Greedy action tensor([ 1.7561, -0.3632, -0.9047, 0.3770]) tensor([0.6936, 0.0833, 0.0485, 0.1746]) -Greedy action tensor([ 1.5479, -0.6539, -0.4830, 0.2217]) tensor([0.6634, 0.0734, 0.0871, 0.1761]) -Greedy action tensor([ 2.0169, -0.8849, 0.1309, 0.7132]) tensor([0.6765, 0.0372, 0.1026, 0.1837]) -Greedy action tensor([ 1.7043, -0.4245, -0.8873, 0.1191]) tensor([0.7149, 0.0851, 0.0535, 0.1465]) -Greedy action tensor([ 1.6676, -1.0045, -0.5420, 0.5234]) tensor([0.6679, 0.0462, 0.0733, 0.2127]) -Greedy action tensor([ 1.0726, -0.4044, -0.1042, -0.1708]) tensor([0.5480, 0.1251, 0.1689, 0.1580]) -Greedy action tensor([ 1.4701, -0.1817, 0.1157, 0.1667]) tensor([0.5809, 0.1114, 0.1499, 0.1578]) -Greedy action tensor([ 1.6006, -0.9439, -0.1747, 0.8251]) tensor([0.5854, 0.0460, 0.0992, 0.2695]) -Greedy action tensor([ 1.6500, -0.8032, -0.7291, 0.2557]) tensor([0.7009, 0.0603, 0.0649, 0.1738]) -Greedy action tensor([ 1.0959, -0.4296, -0.5784, 0.5162]) tensor([0.5089, 0.1107, 0.0954, 0.2850]) -Greedy action tensor([ 1.4304, -0.4839, -0.4698, -0.1461]) tensor([0.6650, 0.0981, 0.0994, 0.1375]) -Greedy action tensor([ 0.8907, -0.0453, 0.2313, -0.3220]) tensor([0.4531, 0.1777, 0.2344, 0.1348]) -Greedy action tensor([ 1.0562, -0.2163, -0.5063, 0.1836]) tensor([0.5242, 0.1468, 0.1099, 0.2190]) -Greedy action tensor([ 1.8404, -0.5688, -0.4622, 0.4446]) tensor([0.6957, 0.0625, 0.0696, 0.1723]) -Greedy action tensor([ 1.7094, -0.7448, -0.8401, 0.2253]) tensor([0.7190, 0.0618, 0.0562, 0.1630]) -Greedy action tensor([ 1.3267, -0.0082, -0.7776, -0.0014]) tensor([0.6060, 0.1595, 0.0739, 0.1606]) -Greedy action tensor([ 1.8032, -0.3729, -0.2483, 0.7406]) tensor([0.6299, 0.0715, 0.0810, 0.2177]) -Greedy action tensor([ 2.2052, -0.4725, -0.0263, -0.0074]) tensor([0.7779, 0.0535, 0.0835, 0.0851]) -Greedy action tensor([ 1.3482, -0.4555, -0.3985, 0.3002]) tensor([0.5918, 0.0975, 0.1032, 0.2075]) -Greedy action tensor([ 0.8144, -0.4257, -0.0576, -0.0024]) tensor([0.4653, 0.1346, 0.1945, 0.2056]) -Greedy action tensor([ 0.7480, 0.2184, -0.7027, 0.6345]) tensor([0.3682, 0.2168, 0.0863, 0.3287]) -Greedy action tensor([ 1.1262, -0.7181, -0.3047, -0.1855]) tensor([0.6000, 0.0949, 0.1435, 0.1616]) -Greedy action tensor([-0.4957, -1.0992, 0.9790, -0.5125]) tensor([0.1449, 0.0793, 0.6333, 0.1425]) -Greedy action tensor([-0.2425, -0.1963, -0.5309, -0.4294]) tensor([0.2758, 0.2888, 0.2067, 0.2287]) -Greedy action tensor([-0.4941, -0.5180, 0.0939, -0.7542]) tensor([0.2199, 0.2147, 0.3959, 0.1695]) -Greedy action tensor([-1.3063, -0.6379, 0.2647, 0.2402]) tensor([0.0803, 0.1566, 0.3862, 0.3769]) -Greedy action tensor([-1.1003, -0.6282, 0.2575, -0.7920]) tensor([0.1274, 0.2042, 0.4951, 0.1733]) -Greedy action tensor([-0.3688, 0.0810, -0.1819, -0.6627]) tensor([0.2213, 0.3470, 0.2668, 0.1649]) -Greedy action tensor([ 0.4075, -0.5232, 0.0547, 0.4178]) tensor([0.3218, 0.1269, 0.2262, 0.3251]) -Greedy action tensor([-0.7857, -0.2974, -1.1080, -0.1752]) tensor([0.1925, 0.3136, 0.1395, 0.3544]) -Greedy action tensor([ 0.1619, 0.5887, -0.6991, -0.6318]) tensor([0.2935, 0.4497, 0.1241, 0.1327]) -Greedy action tensor([-0.9150, -0.5354, 0.2707, -0.6286]) tensor([0.1415, 0.2069, 0.4632, 0.1884]) -Greedy action tensor([ 0.4544, -0.9258, 0.7390, 0.0437]) tensor([0.3083, 0.0775, 0.4097, 0.2044]) -Greedy action tensor([-0.3165, 0.2718, 0.4643, -0.5770]) tensor([0.1738, 0.3129, 0.3794, 0.1339]) -Greedy action tensor([-0.1826, -1.0833, 0.2774, -0.0857]) tensor([0.2444, 0.0993, 0.3871, 0.2692]) -Greedy action tensor([ 1.8329, -1.3918, -0.1385, 0.5549]) tensor([0.6861, 0.0273, 0.0955, 0.1911]) -Greedy action tensor([-0.1473, -0.4654, 1.2897, 0.3042]) tensor([0.1332, 0.0969, 0.5606, 0.2092]) -Greedy action tensor([-1.0391, 0.2748, 0.7337, -0.9650]) tensor([0.0856, 0.3184, 0.5038, 0.0922]) -Greedy action tensor([ 0.4879, 0.5582, -0.1116, -0.2689]) tensor([0.3235, 0.3471, 0.1776, 0.1518]) -Greedy action tensor([-1.0588, 0.2039, 0.0274, -1.3878]) tensor([0.1217, 0.4302, 0.3606, 0.0876]) -Greedy action tensor([-0.4384, 0.2247, -0.3617, -0.4286]) tensor([0.1988, 0.3858, 0.2146, 0.2008]) -Greedy action tensor([-0.2509, -0.9534, 0.9796, 0.2896]) tensor([0.1507, 0.0747, 0.5159, 0.2587]) -Greedy action tensor([-0.3048, -0.1387, 0.6706, -0.6546]) tensor([0.1806, 0.2132, 0.4789, 0.1273]) -Greedy action tensor([-0.0454, -1.1807, 0.0592, 0.1085]) tensor([0.2779, 0.0893, 0.3086, 0.3242]) -Greedy action tensor([ 1.9080, -0.0989, -0.2851, 1.0234]) tensor([0.6028, 0.0810, 0.0673, 0.2489]) -Greedy action tensor([-0.9692, -1.6935, 0.3415, -0.2478]) tensor([0.1379, 0.0668, 0.5115, 0.2837]) -Greedy action tensor([ 0.3609, -0.6165, 0.3737, 0.9176]) tensor([0.2419, 0.0910, 0.2450, 0.4221]) -Greedy action tensor([ 1.4915, -1.5790, 0.2829, 0.9232]) tensor([0.5231, 0.0243, 0.1562, 0.2964]) -Greedy action tensor([1.1060, 0.0552, 0.5096, 0.5765]) tensor([0.4017, 0.1405, 0.2213, 0.2366]) -Greedy action tensor([-0.5622, -0.3099, 0.4945, -0.6138]) tensor([0.1636, 0.2105, 0.4706, 0.1553]) -Greedy action tensor([-1.2253, -0.6369, -0.2183, -0.3555]) tensor([0.1262, 0.2273, 0.3454, 0.3011]) -Greedy action tensor([-0.6991, -0.9215, 0.0026, -0.7443]) tensor([0.2095, 0.1677, 0.4226, 0.2002]) -Greedy action tensor([-0.1533, -0.4364, 0.5534, -0.2274]) tensor([0.2124, 0.1600, 0.4305, 0.1972]) -Greedy action tensor([-0.3771, -0.3733, -0.6824, -0.3441]) tensor([0.2649, 0.2660, 0.1952, 0.2739]) -Greedy action tensor([0.6229, 0.0013, 0.2074, 0.2450]) tensor([0.3469, 0.1863, 0.2290, 0.2378]) -Greedy action tensor([ 0.0036, -0.4559, -0.6033, 0.2560]) tensor([0.2887, 0.1823, 0.1574, 0.3716]) -Greedy action tensor([-0.3458, 0.1922, 0.4980, -0.6225]) tensor([0.1725, 0.2955, 0.4012, 0.1308]) -Greedy action tensor([-0.2785, -0.5790, -0.2536, 0.0893]) tensor([0.2375, 0.1759, 0.2435, 0.3431]) -Greedy action tensor([ 0.2574, -0.0673, 0.5434, -0.7065]) tensor([0.2911, 0.2104, 0.3875, 0.1110]) -Greedy action tensor([ 0.1572, -1.0335, -0.4132, -0.0014]) tensor([0.3673, 0.1117, 0.2076, 0.3134]) -Greedy action tensor([ 1.1209, 0.0596, 0.0461, -0.0235]) tensor([0.4986, 0.1725, 0.1702, 0.1587]) -Greedy action tensor([ 0.0952, -0.2989, 1.0002, -0.3440]) tensor([0.2087, 0.1407, 0.5160, 0.1345]) -Greedy action tensor([ 0.2289, -1.0853, -0.1329, -0.2348]) tensor([0.3855, 0.1036, 0.2685, 0.2425]) -Greedy action tensor([-0.9795, -0.7579, 1.1060, -0.6088]) tensor([0.0851, 0.1063, 0.6853, 0.1233]) -Greedy action tensor([ 0.8683, -1.7066, -0.1937, 0.2255]) tensor([0.5134, 0.0391, 0.1775, 0.2700]) -Greedy action tensor([-1.0640, -1.1357, -0.0043, -0.8142]) tensor([0.1639, 0.1526, 0.4730, 0.2105]) -Greedy action tensor([-0.1880, -1.1161, 0.5139, 0.2910]) tensor([0.1989, 0.0786, 0.4013, 0.3211]) -Greedy action tensor([-0.4603, -0.2998, 0.0097, -0.2539]) tensor([0.1999, 0.2347, 0.3198, 0.2457]) -Greedy action tensor([-0.4249, -0.9937, 0.0706, -0.1640]) tensor([0.2219, 0.1257, 0.3643, 0.2881]) -Greedy action tensor([-0.0945, 0.1339, -0.0903, 1.0128]) tensor([0.1591, 0.1999, 0.1597, 0.4813]) -Greedy action tensor([ 0.3790, -0.5485, 1.0755, 0.2511]) tensor([0.2335, 0.0924, 0.4686, 0.2055]) -Greedy action tensor([ 0.3689, -0.0663, -0.1690, -0.5056]) tensor([0.3776, 0.2444, 0.2205, 0.1575]) -Greedy action tensor([-0.2423, 0.5275, 0.9162, -1.1482]) tensor([0.1482, 0.3200, 0.4720, 0.0599]) -Greedy action tensor([ 0.0674, -1.4140, 0.4276, -0.4101]) tensor([0.3048, 0.0693, 0.4369, 0.1890]) -Greedy action tensor([-0.8736, 0.0089, 0.5634, -1.4977]) tensor([0.1225, 0.2962, 0.5156, 0.0656]) -Greedy action tensor([-0.1360, -1.3033, 1.0929, -1.2301]) tensor([0.1975, 0.0615, 0.6749, 0.0661]) -Greedy action tensor([ 0.1930, 0.2132, 0.3075, -0.3215]) tensor([0.2674, 0.2729, 0.2999, 0.1599]) -Greedy action tensor([ 1.1523, -0.6962, -0.4814, 0.3076]) tensor([0.5610, 0.0884, 0.1095, 0.2411]) -Greedy action tensor([-0.4073, -0.0560, 0.2881, 0.2264]) tensor([0.1585, 0.2252, 0.3177, 0.2987]) -Greedy action tensor([ 0.4579, -0.9450, 0.3937, -0.5248]) tensor([0.3909, 0.0961, 0.3666, 0.1463]) -Greedy action tensor([-0.2776, -1.0654, 0.8209, -0.5162]) tensor([0.1908, 0.0868, 0.5722, 0.1503]) -Greedy action tensor([-0.6348, -0.4058, 0.2316, -0.7270]) tensor([0.1803, 0.2266, 0.4287, 0.1644]) -Greedy action tensor([ 0.9203, -0.1235, 0.2140, -0.5358]) tensor([0.4811, 0.1694, 0.2374, 0.1122]) -Greedy action tensor([ 0.2435, -1.0369, 0.5721, -1.1254]) tensor([0.3423, 0.0951, 0.4755, 0.0871]) -Greedy action tensor([ 0.0685, -0.4593, -1.0024, 0.3992]) tensor([0.3008, 0.1774, 0.1031, 0.4187]) -Greedy action tensor([ 0.2317, -1.0897, -0.4723, 0.6788]) tensor([0.3007, 0.0802, 0.1487, 0.4703]) -Greedy action tensor([ 0.0183, -1.4309, -0.0469, -0.5148]) tensor([0.3625, 0.0851, 0.3397, 0.2127]) -Greedy action tensor([ 1.4641, -0.1588, 0.8745, 0.5551]) tensor([0.4641, 0.0916, 0.2573, 0.1870]) -Greedy action tensor([ 0.7969, -0.2243, 0.2936, 0.3073]) tensor([0.3880, 0.1397, 0.2345, 0.2378]) -Greedy action tensor([-1.2456, -0.1494, -0.1771, -0.8267]) tensor([0.1187, 0.3553, 0.3455, 0.1805]) -Greedy action tensor([ 0.1600, -0.7317, -0.2140, 0.1262]) tensor([0.3263, 0.1338, 0.2245, 0.3155]) -Greedy action tensor([-0.4423, 0.4019, 0.1136, 0.0170]) tensor([0.1503, 0.3497, 0.2621, 0.2380]) -Greedy action tensor([-0.7123, -0.0473, -0.5996, -0.7882]) tensor([0.2004, 0.3896, 0.2243, 0.1857]) -Greedy action tensor([ 1.8854, -1.8360, 0.1623, 0.3926]) tensor([0.7006, 0.0170, 0.1251, 0.1574]) -Greedy action tensor([-0.5693, 0.6167, 0.3335, -0.4854]) tensor([0.1277, 0.4182, 0.3151, 0.1389]) -Greedy action tensor([ 0.4290, -1.0711, -0.2471, -1.1252]) tensor([0.5147, 0.1148, 0.2618, 0.1088]) -Greedy action tensor([ 0.8234, -1.4616, -0.2383, 0.8900]) tensor([0.3974, 0.0404, 0.1374, 0.4247]) -Greedy action tensor([ 0.2968, 0.6519, -0.0742, 0.1202]) tensor([0.2529, 0.3607, 0.1745, 0.2119]) -Greedy action tensor([-0.3047, -0.5925, 0.3078, 0.3894]) tensor([0.1787, 0.1340, 0.3297, 0.3577]) -Greedy action tensor([ 0.2942, -1.0089, -0.5083, 0.4708]) tensor([0.3433, 0.0933, 0.1539, 0.4096]) -Greedy action tensor([ 1.0450, -0.6729, -0.0965, -0.3436]) tensor([0.5720, 0.1026, 0.1827, 0.1427]) -Greedy action tensor([ 1.7870, -0.9588, -0.4131, 1.6502]) tensor([0.4885, 0.0314, 0.0541, 0.4260]) -Greedy action tensor([ 0.0082, -0.1180, 0.6715, -0.4581]) tensor([0.2247, 0.1981, 0.4362, 0.1410]) -Greedy action tensor([-0.1286, -0.8054, -0.6829, -0.4042]) tensor([0.3519, 0.1788, 0.2021, 0.2671]) -Greedy action tensor([-1.6650, -0.3530, 0.5161, -0.0858]) tensor([0.0543, 0.2016, 0.4808, 0.2634]) -Greedy action tensor([-1.7601, -0.2535, 0.5458, -0.0933]) tensor([0.0480, 0.2165, 0.4815, 0.2541]) -Greedy action tensor([-1.6242, -0.2044, 0.6934, 0.0994]) tensor([0.0479, 0.1980, 0.4859, 0.2683]) -Greedy action tensor([-1.7741, 0.1442, 0.4968, -0.0630]) tensor([0.0434, 0.2956, 0.4206, 0.2403]) -Greedy action tensor([-1.3756, 0.6794, 0.4430, -0.7067]) tensor([0.0591, 0.4613, 0.3642, 0.1154]) -Greedy action tensor([-0.4628, 0.9408, 0.0081, 0.0747]) tensor([0.1193, 0.4855, 0.1910, 0.2042]) -Greedy action tensor([-1.2286, 0.0860, 0.2718, 0.0247]) tensor([0.0787, 0.2930, 0.3528, 0.2755]) -Greedy action tensor([-1.8245, -0.4867, 0.6070, -0.1453]) tensor([0.0464, 0.1768, 0.5279, 0.2488]) -Greedy action tensor([-1.8736, -0.3429, 0.6176, -0.1554]) tensor([0.0430, 0.1986, 0.5189, 0.2395]) -Greedy action tensor([-1.4373, -0.4029, 0.4214, 0.1098]) tensor([0.0670, 0.1885, 0.4298, 0.3147]) -Greedy action tensor([-1.4525, 0.1888, 0.3820, -0.0911]) tensor([0.0613, 0.3162, 0.3836, 0.2390]) -Greedy action tensor([-1.8906, -0.4598, 0.6469, -0.1545]) tensor([0.0425, 0.1779, 0.5381, 0.2414]) -Greedy action tensor([-1.9068, -0.4198, 0.6455, -0.1624]) tensor([0.0417, 0.1845, 0.5352, 0.2386]) -Greedy action tensor([-1.7508, -0.3063, 0.5806, -0.0497]) tensor([0.0476, 0.2018, 0.4898, 0.2608]) -Greedy action tensor([-1.3028, -0.2212, 0.3568, 0.1452]) tensor([0.0743, 0.2191, 0.3906, 0.3161]) -Greedy action tensor([-1.8656, -0.4113, 0.6257, -0.1380]) tensor([0.0435, 0.1863, 0.5254, 0.2448]) -Greedy action tensor([-1.9281, -0.4528, 0.6697, -0.1753]) tensor([0.0407, 0.1779, 0.5466, 0.2348]) -Greedy action tensor([-0.6034, 0.8133, 0.0752, -0.0408]) tensor([0.1130, 0.4659, 0.2227, 0.1983]) -Greedy action tensor([-1.9324, -0.4391, 0.6651, -0.1668]) tensor([0.0404, 0.1800, 0.5431, 0.2364]) -Greedy action tensor([-1.8095, -0.4140, 0.6030, -0.1026]) tensor([0.0461, 0.1859, 0.5141, 0.2539]) -Greedy action tensor([-1.5546, -0.5359, 0.4844, -0.0717]) tensor([0.0631, 0.1746, 0.4845, 0.2778]) -Greedy action tensor([-1.8219, -0.4181, 0.6031, -0.1222]) tensor([0.0458, 0.1863, 0.5174, 0.2505]) -Greedy action tensor([-1.9034, -0.4386, 0.6442, -0.1647]) tensor([0.0420, 0.1818, 0.5370, 0.2391]) -Greedy action tensor([-1.8367, -0.2262, 0.5831, -0.1141]) tensor([0.0438, 0.2191, 0.4921, 0.2450]) -Greedy action tensor([-1.9087, -0.4497, 0.6502, -0.1661]) tensor([0.0418, 0.1797, 0.5398, 0.2387]) -Greedy action tensor([-1.8165, -0.4599, 0.6054, -0.1129]) tensor([0.0462, 0.1794, 0.5206, 0.2538]) -Greedy action tensor([-1.8942, -0.4460, 0.6419, -0.1583]) tensor([0.0424, 0.1806, 0.5361, 0.2408]) -Greedy action tensor([-1.7723, -0.3536, 0.6131, -0.0802]) tensor([0.0467, 0.1928, 0.5070, 0.2535]) -Greedy action tensor([-1.7767, -0.4443, 0.5849, -0.0934]) tensor([0.0481, 0.1824, 0.5104, 0.2591]) -Greedy action tensor([-1.3053, -0.5820, 0.3719, 0.1396]) tensor([0.0790, 0.1629, 0.4229, 0.3352]) -Greedy action tensor([-1.9459, -0.4559, 0.6756, -0.1801]) tensor([0.0399, 0.1772, 0.5494, 0.2335]) -Greedy action tensor([-1.6813, -0.4153, 0.5366, -0.1052]) tensor([0.0538, 0.1910, 0.4948, 0.2604]) -Greedy action tensor([-1.8692, -0.1084, 0.5821, -0.1480]) tensor([0.0416, 0.2423, 0.4832, 0.2329]) -Greedy action tensor([-1.9052, -0.4555, 0.6537, -0.1605]) tensor([0.0418, 0.1783, 0.5405, 0.2394]) -Greedy action tensor([-1.2226, -0.4160, 0.3521, -0.0517]) tensor([0.0885, 0.1983, 0.4276, 0.2855]) -Greedy action tensor([-1.9259, -0.4402, 0.6605, -0.1718]) tensor([0.0409, 0.1805, 0.5426, 0.2361]) -Greedy action tensor([-1.0833, 0.8290, 0.2378, 0.0075]) tensor([0.0690, 0.4670, 0.2586, 0.2054]) -Greedy action tensor([-1.7675, -0.3800, 0.5708, -0.0986]) tensor([0.0484, 0.1937, 0.5013, 0.2567]) -Greedy action tensor([-1.2607, 0.1330, 0.4092, 0.6757]) tensor([0.0579, 0.2333, 0.3075, 0.4014]) -Greedy action tensor([-1.4098, 0.6787, 0.2940, 0.0033]) tensor([0.0535, 0.4322, 0.2942, 0.2200]) -Greedy action tensor([-1.8842, -0.4505, 0.6433, -0.1461]) tensor([0.0427, 0.1792, 0.5351, 0.2430]) -Greedy action tensor([-1.9070, -0.4572, 0.6546, -0.1620]) tensor([0.0418, 0.1780, 0.5411, 0.2391]) -Greedy action tensor([-0.2372, 0.9941, -0.0188, 0.3701]) tensor([0.1332, 0.4565, 0.1658, 0.2445]) -Greedy action tensor([-1.9102, -0.4009, 0.6478, -0.1608]) tensor([0.0413, 0.1870, 0.5338, 0.2378]) -Greedy action tensor([-1.8570, -0.3112, 0.5951, -0.1426]) tensor([0.0437, 0.2053, 0.5080, 0.2430]) -Greedy action tensor([-1.8751, -0.4555, 0.6298, -0.1468]) tensor([0.0435, 0.1797, 0.5321, 0.2447]) -Greedy action tensor([-1.8976, -0.3910, 0.6427, -0.1514]) tensor([0.0418, 0.1885, 0.5301, 0.2396]) -Greedy action tensor([-1.8103, -0.4070, 0.6064, -0.1031]) tensor([0.0459, 0.1867, 0.5144, 0.2530]) -Greedy action tensor([-1.7300, -0.2125, 0.5198, -0.0795]) tensor([0.0494, 0.2252, 0.4683, 0.2572]) -Greedy action tensor([-1.9385, -0.4499, 0.6668, -0.1760]) tensor([0.0403, 0.1787, 0.5459, 0.2350]) -Greedy action tensor([-1.8482, -0.1608, 0.5798, -0.0954]) tensor([0.0425, 0.2299, 0.4821, 0.2454]) -Greedy action tensor([-1.4166, -0.5306, 0.4332, -0.0506]) tensor([0.0730, 0.1770, 0.4640, 0.2860]) -Greedy action tensor([-1.7266, -0.3093, 0.5896, -0.0234]) tensor([0.0482, 0.1988, 0.4884, 0.2646]) -Greedy action tensor([-1.9065, -0.3818, 0.6444, -0.1628]) tensor([0.0414, 0.1904, 0.5312, 0.2370]) -Greedy action tensor([ 0.0098, -0.4589, 0.6679, 0.3379]) tensor([0.2022, 0.1265, 0.3905, 0.2807]) -Greedy action tensor([-1.3002, -0.4527, 0.3005, 0.1534]) tensor([0.0796, 0.1857, 0.3944, 0.3404]) -Greedy action tensor([-1.9340, -0.4405, 0.6637, -0.1746]) tensor([0.0405, 0.1803, 0.5440, 0.2352]) -Greedy action tensor([-1.9180, -0.4070, 0.6494, -0.1654]) tensor([0.0411, 0.1862, 0.5356, 0.2371]) -Greedy action tensor([-1.9464, -0.4509, 0.6687, -0.1814]) tensor([0.0400, 0.1787, 0.5474, 0.2339]) -Greedy action tensor([-0.7947, 0.9832, 0.0468, 0.4418]) tensor([0.0789, 0.4666, 0.1829, 0.2716]) -Greedy action tensor([-1.8806, -0.4439, 0.6228, -0.1539]) tensor([0.0434, 0.1825, 0.5303, 0.2439]) -Greedy action tensor([-1.5167, -0.3481, 0.6759, 0.1134]) tensor([0.0547, 0.1760, 0.4900, 0.2792]) -Greedy action tensor([-1.9369, -0.4557, 0.6737, -0.1678]) tensor([0.0402, 0.1768, 0.5471, 0.2358]) -Greedy action tensor([-1.8540, -0.4095, 0.6053, -0.1243]) tensor([0.0443, 0.1878, 0.5181, 0.2498]) -Greedy action tensor([-1.8783, -0.4290, 0.6678, -0.0871]) tensor([0.0416, 0.1774, 0.5312, 0.2497]) -Greedy action tensor([-1.7971, -0.4345, 0.5986, -0.1717]) tensor([0.0477, 0.1864, 0.5236, 0.2424]) -Greedy action tensor([-1.7044, -0.5331, 0.5702, -0.1101]) tensor([0.0530, 0.1709, 0.5152, 0.2609]) -Greedy action tensor([-1.6591e+00, -5.2866e-01, 5.1774e-01, -7.0810e-04]) tensor([0.0550, 0.1705, 0.4854, 0.2890]) -Greedy action tensor([-1.9195, -0.4463, 0.6585, -0.1654]) tensor([0.0411, 0.1795, 0.5417, 0.2377]) -Greedy action tensor([-1.7123, -0.4309, 0.5461, -0.0853]) tensor([0.0519, 0.1870, 0.4968, 0.2642]) -Greedy action tensor([-1.8645, -0.0388, 0.5646, -0.1225]) tensor([0.0412, 0.2558, 0.4677, 0.2353]) -Greedy action tensor([-1.7846, -0.5015, 0.5769, -0.1001]) tensor([0.0485, 0.1751, 0.5148, 0.2616]) -Greedy action tensor([-1.9245, -0.4352, 0.6519, -0.1680]) tensor([0.0410, 0.1819, 0.5394, 0.2376]) -Greedy action tensor([-1.9139, -0.4008, 0.6476, -0.1610]) tensor([0.0412, 0.1871, 0.5339, 0.2378]) -Greedy action tensor([-1.8495, -0.4584, 0.6281, -0.1277]) tensor([0.0444, 0.1784, 0.5288, 0.2484]) -Greedy action tensor([-1.6452, 0.3099, 0.4219, 0.0178]) tensor([0.0471, 0.3326, 0.3720, 0.2483]) -Greedy action tensor([-1.9079, -0.4680, 0.6473, -0.1654]) tensor([0.0420, 0.1773, 0.5408, 0.2399]) -Greedy action tensor([-1.9274, -0.4272, 0.6542, -0.1702]) tensor([0.0408, 0.1830, 0.5396, 0.2366]) -Greedy action tensor([-1.1129, 0.6441, 0.2386, -0.0743]) tensor([0.0742, 0.4298, 0.2865, 0.2095]) -Greedy action tensor([-1.1267, -0.6553, 0.3489, 0.2317]) tensor([0.0920, 0.1475, 0.4025, 0.3580]) -Greedy action tensor([-1.5599, -0.5446, 0.4785, -0.0322]) tensor([0.0623, 0.1720, 0.4785, 0.2871]) -Greedy action tensor([ 0.6004, -0.0778, -0.0483, -0.4502]) tensor([0.4202, 0.2132, 0.2196, 0.1469]) -Greedy action tensor([ 0.6823, -0.5064, 0.1669, -0.3684]) tensor([0.4441, 0.1353, 0.2653, 0.1553]) -Greedy action tensor([ 0.8496, -0.5952, -0.0292, -0.2909]) tensor([0.5074, 0.1197, 0.2107, 0.1622]) -Greedy action tensor([ 0.8153, -0.5441, -0.1102, -0.3682]) tensor([0.5104, 0.1311, 0.2023, 0.1563]) -Greedy action tensor([ 0.9796, -0.5984, 0.0287, -0.2538]) tensor([0.5308, 0.1095, 0.2051, 0.1546]) -Greedy action tensor([ 0.8088, -0.7420, -0.0255, -0.4354]) tensor([0.5169, 0.1096, 0.2245, 0.1490]) -Greedy action tensor([ 1.0339, -0.4471, -0.1346, -0.3428]) tensor([0.5584, 0.1270, 0.1736, 0.1410]) -Greedy action tensor([ 0.7674, -0.3569, 0.1368, -0.1356]) tensor([0.4420, 0.1436, 0.2353, 0.1792]) -Greedy action tensor([ 1.0961, -0.6325, -0.0343, -0.5934]) tensor([0.5935, 0.1054, 0.1916, 0.1096]) -Greedy action tensor([ 0.7303, -0.4475, 0.0022, -0.5921]) tensor([0.4861, 0.1497, 0.2347, 0.1295]) -Greedy action tensor([ 0.7192, -0.8418, -0.0845, -0.4660]) tensor([0.5094, 0.1069, 0.2280, 0.1557]) -Greedy action tensor([0.3542, 0.0710, 0.1035, 0.1439]) tensor([0.2992, 0.2254, 0.2329, 0.2425]) -Greedy action tensor([ 0.5458, -0.3759, -0.1073, -0.0635]) tensor([0.4062, 0.1616, 0.2114, 0.2208]) -Greedy action tensor([ 1.1200, -0.6973, -0.0040, -0.5185]) tensor([0.5946, 0.0966, 0.1932, 0.1155]) -Greedy action tensor([ 0.9917, -1.1127, 0.0505, -0.7232]) tensor([0.5910, 0.0721, 0.2306, 0.1064]) -Greedy action tensor([ 0.7226, -0.3866, 0.1108, -0.3353]) tensor([0.4506, 0.1486, 0.2444, 0.1564]) -Greedy action tensor([ 1.1062, -0.7830, 0.0381, -0.6710]) tensor([0.6010, 0.0909, 0.2065, 0.1016]) -Greedy action tensor([ 0.9977, -0.4627, -0.0146, -0.5845]) tensor([0.5552, 0.1289, 0.2018, 0.1141]) -Greedy action tensor([ 0.9016, -0.3832, 0.0086, -0.5187]) tensor([0.5187, 0.1435, 0.2124, 0.1253]) -Greedy action tensor([ 0.9921, -0.8133, -0.1485, -0.2271]) tensor([0.5620, 0.0924, 0.1796, 0.1660]) -Greedy action tensor([ 0.2824, 0.3837, -0.1330, 0.1179]) tensor([0.2766, 0.3061, 0.1826, 0.2347]) -Greedy action tensor([ 0.8613, -0.7064, 0.0009, -0.4824]) tensor([0.5284, 0.1102, 0.2235, 0.1379]) -Greedy action tensor([ 0.5769, -0.4457, -0.2014, -0.0262]) tensor([0.4227, 0.1520, 0.1941, 0.2312]) -Greedy action tensor([ 1.1906, -0.6706, 0.0664, -0.6575]) tensor([0.6105, 0.0949, 0.1984, 0.0962]) -Greedy action tensor([ 0.7155, -0.2257, -0.1033, -0.0827]) tensor([0.4383, 0.1710, 0.1933, 0.1973]) -Greedy action tensor([ 1.0378, -0.4916, -0.1398, -0.2608]) tensor([0.5563, 0.1205, 0.1713, 0.1518]) -Greedy action tensor([ 0.7847, -0.6146, -0.0240, -0.4944]) tensor([0.5075, 0.1252, 0.2260, 0.1412]) -Greedy action tensor([ 1.0516, -0.6358, -0.0631, -0.4821]) tensor([0.5785, 0.1070, 0.1897, 0.1248]) -Greedy action tensor([ 0.7767, -0.1310, -0.0277, -0.3044]) tensor([0.4566, 0.1842, 0.2043, 0.1549]) -Greedy action tensor([ 1.0690, -0.7733, 0.2556, -0.7246]) tensor([0.5656, 0.0896, 0.2507, 0.0941]) -Greedy action tensor([ 1.0962, -0.5856, -0.1119, -0.5839]) tensor([0.5984, 0.1113, 0.1788, 0.1115]) -Greedy action tensor([ 0.9502, -0.5639, -0.0846, -0.4565]) tensor([0.5494, 0.1209, 0.1952, 0.1346]) -Greedy action tensor([ 0.8821, -0.7641, -0.0325, -0.3784]) tensor([0.5328, 0.1027, 0.2135, 0.1511]) -Greedy action tensor([ 0.4748, -0.0163, 0.0476, -0.5079]) tensor([0.3790, 0.2319, 0.2472, 0.1418]) -Greedy action tensor([ 0.6847, -0.4844, -0.0720, -0.0894]) tensor([0.4462, 0.1386, 0.2094, 0.2058]) -Greedy action tensor([ 1.0626, -0.5632, -0.1614, -0.4528]) tensor([0.5846, 0.1150, 0.1719, 0.1285]) -Greedy action tensor([ 0.7383, -0.5554, -0.0096, -0.4696]) tensor([0.4887, 0.1340, 0.2313, 0.1460]) -Greedy action tensor([ 0.3759, -0.0809, -0.0705, -0.1132]) tensor([0.3465, 0.2194, 0.2217, 0.2124]) -Greedy action tensor([ 0.6543, -0.6284, -0.0633, -0.2427]) tensor([0.4602, 0.1276, 0.2245, 0.1877]) -Greedy action tensor([ 0.8967, -0.4640, 0.0427, -0.5227]) tensor([0.5197, 0.1333, 0.2213, 0.1257]) -Greedy action tensor([ 0.8199, -0.3308, -0.0733, -0.4072]) tensor([0.4953, 0.1567, 0.2027, 0.1452]) -Greedy action tensor([ 1.1887, -0.7678, -0.1192, -0.4176]) tensor([0.6202, 0.0877, 0.1677, 0.1244]) -Greedy action tensor([ 0.5526, -0.5097, -0.2106, -0.1067]) tensor([0.4294, 0.1484, 0.2002, 0.2221]) -Greedy action tensor([ 0.8829, -0.4427, -0.1749, -0.1601]) tensor([0.5089, 0.1352, 0.1767, 0.1793]) -Greedy action tensor([ 1.0546, -0.6061, -0.0271, -0.4310]) tensor([0.5697, 0.1082, 0.1931, 0.1290]) -Greedy action tensor([ 0.6373, -0.3267, -0.2076, -0.3721]) tensor([0.4597, 0.1753, 0.1975, 0.1675]) -Greedy action tensor([ 0.8461, -0.5003, -0.1312, -0.5709]) tensor([0.5322, 0.1385, 0.2003, 0.1290]) -Greedy action tensor([ 1.0738, -0.6214, -0.1159, -0.4373]) tensor([0.5853, 0.1074, 0.1781, 0.1291]) -Greedy action tensor([ 1.2284, -1.0290, 0.0954, -0.5609]) tensor([0.6275, 0.0656, 0.2021, 0.1048]) -Greedy action tensor([ 0.7129, -0.4553, 0.0166, -0.3879]) tensor([0.4669, 0.1452, 0.2327, 0.1553]) -Greedy action tensor([ 0.8554, -0.5835, 0.1033, -0.3732]) tensor([0.4997, 0.1185, 0.2355, 0.1463]) -Greedy action tensor([ 1.2009, -0.6484, 0.0423, -0.4624]) tensor([0.6021, 0.0947, 0.1890, 0.1141]) -Greedy action tensor([ 0.9617, -0.5547, -0.0515, -0.3629]) tensor([0.5410, 0.1187, 0.1964, 0.1439]) -Greedy action tensor([ 0.6539, -0.5046, -0.1573, -0.3641]) tensor([0.4718, 0.1481, 0.2096, 0.1705]) -Greedy action tensor([ 0.7790, -0.2001, -0.0682, -0.0359]) tensor([0.4451, 0.1672, 0.1908, 0.1970]) -Greedy action tensor([ 1.2326, -0.4864, -0.1253, -0.3500]) tensor([0.6091, 0.1092, 0.1566, 0.1251]) -Greedy action tensor([ 0.9789, -0.7854, 0.1002, -0.5716]) tensor([0.5559, 0.0952, 0.2309, 0.1179]) -Greedy action tensor([ 0.3961, -0.0582, -0.0448, -0.0846]) tensor([0.3452, 0.2192, 0.2221, 0.2135]) -Greedy action tensor([ 1.0905, -0.7907, -0.0656, -0.5519]) tensor([0.6022, 0.0918, 0.1895, 0.1165]) -Greedy action tensor([ 0.7979, -0.6413, 0.1227, -0.1849]) tensor([0.4716, 0.1118, 0.2401, 0.1765]) -Greedy action tensor([ 0.2315, 0.0218, -0.2466, 0.1076]) tensor([0.3017, 0.2447, 0.1870, 0.2666]) -Greedy action tensor([ 0.7125, 0.0484, 0.0407, -0.0192]) tensor([0.3990, 0.2054, 0.2038, 0.1919]) -Greedy action tensor([ 0.7834, -0.3591, -0.1240, -0.0244]) tensor([0.4611, 0.1471, 0.1861, 0.2056]) -Greedy action tensor([ 0.7376, -0.2745, 0.0667, -0.3849]) tensor([0.4545, 0.1652, 0.2324, 0.1479]) -Greedy action tensor([ 0.5435, -0.2677, 0.0595, -0.2150]) tensor([0.3954, 0.1757, 0.2437, 0.1852]) -Greedy action tensor([ 0.6131, -0.4305, -0.0893, -0.5290]) tensor([0.4615, 0.1625, 0.2286, 0.1473]) -Greedy action tensor([ 0.8280, -0.3493, -0.1492, -0.3170]) tensor([0.4993, 0.1539, 0.1879, 0.1589]) -Greedy action tensor([ 0.9631, -0.5156, -0.0960, -0.5881]) tensor([0.5597, 0.1276, 0.1941, 0.1186]) -Greedy action tensor([ 0.2464, 0.1043, -0.0895, -0.0953]) tensor([0.3037, 0.2635, 0.2170, 0.2158]) -Greedy action tensor([ 0.3563, 0.0205, 0.0015, -0.0693]) tensor([0.3258, 0.2329, 0.2285, 0.2129]) -Greedy action tensor([ 1.0704, -0.5930, 0.0409, -0.6008]) tensor([0.5765, 0.1092, 0.2059, 0.1084]) -Greedy action tensor([ 1.0320, -0.7434, 0.0947, -0.5985]) tensor([0.5692, 0.0964, 0.2229, 0.1115]) -Greedy action tensor([ 0.5865, -0.4347, -0.0774, -0.2229]) tensor([0.4310, 0.1552, 0.2219, 0.1919]) -Greedy action tensor([ 0.7303, -0.2276, -0.0082, -0.0422]) tensor([0.4304, 0.1651, 0.2057, 0.1988]) -Greedy action tensor([ 0.1474, -0.2190, 0.0177, -0.7269]) tensor([0.3346, 0.2320, 0.2939, 0.1396]) -Greedy action tensor([ 0.7615, -0.3763, 0.0615, -0.1308]) tensor([0.4491, 0.1439, 0.2230, 0.1840]) -Greedy action tensor([ 0.5382, 0.0766, -0.1978, -0.0574]) tensor([0.3759, 0.2369, 0.1800, 0.2072]) -Greedy action tensor([ 1.0195, -0.8476, 0.1153, -0.6660]) tensor([0.5731, 0.0886, 0.2320, 0.1062]) -Greedy action tensor([ 0.9149, -0.3922, 0.0652, -0.3711]) tensor([0.5065, 0.1370, 0.2165, 0.1400]) -Greedy action tensor([ 0.6403, -0.1811, -0.0303, -0.1575]) tensor([0.4164, 0.1831, 0.2129, 0.1875]) -Greedy action tensor([ 0.9358, -0.3177, -0.0236, -0.3733]) tensor([0.5158, 0.1473, 0.1976, 0.1393]) -Greedy action tensor([ 1.0587, -0.5000, -0.2834, 0.1680]) tensor([0.5313, 0.1118, 0.1388, 0.2180]) -Greedy action tensor([ 1.4225, -0.6163, -0.3646, 0.2125]) tensor([0.6266, 0.0816, 0.1049, 0.1869]) -Greedy action tensor([ 1.1885, -0.3423, -0.2921, 0.0497]) tensor([0.5669, 0.1227, 0.1290, 0.1815]) -Greedy action tensor([ 1.7881, -0.3902, -0.7483, 0.0751]) tensor([0.7285, 0.0825, 0.0577, 0.1314]) -Greedy action tensor([ 1.6361, -0.6913, -0.6331, 0.1443]) tensor([0.7013, 0.0684, 0.0725, 0.1578]) -Greedy action tensor([ 1.5096, -0.6923, -0.1136, 0.0792]) tensor([0.6464, 0.0715, 0.1275, 0.1546]) -Greedy action tensor([ 1.7134, -0.3942, -0.7579, 0.1807]) tensor([0.7032, 0.0855, 0.0594, 0.1519]) -Greedy action tensor([ 1.0525, -0.1033, -0.1334, -0.0094]) tensor([0.5086, 0.1601, 0.1554, 0.1759]) -Greedy action tensor([ 1.9001, -0.4004, -0.5627, 0.1429]) tensor([0.7364, 0.0738, 0.0627, 0.1271]) -Greedy action tensor([ 1.2170, -0.0300, -0.7286, 0.2899]) tensor([0.5477, 0.1574, 0.0783, 0.2167]) -Greedy action tensor([ 1.4151, -0.3401, -0.6174, 0.3450]) tensor([0.6072, 0.1050, 0.0796, 0.2083]) -Greedy action tensor([ 2.5818, -0.3677, 0.0992, -0.1009]) tensor([0.8304, 0.0435, 0.0694, 0.0568]) -Greedy action tensor([ 1.8999, -0.5652, -0.7544, 0.3738]) tensor([0.7285, 0.0619, 0.0512, 0.1584]) -Greedy action tensor([ 0.8388, -0.2987, -0.0845, 0.0767]) tensor([0.4578, 0.1468, 0.1818, 0.2136]) -Greedy action tensor([ 1.4430, -0.1791, -0.3681, 0.4146]) tensor([0.5819, 0.1149, 0.0951, 0.2081]) -Greedy action tensor([ 1.4549, -0.2025, -0.7772, 0.0597]) tensor([0.6469, 0.1233, 0.0694, 0.1603]) -Greedy action tensor([ 1.6497, -0.4083, -0.5572, 0.4108]) tensor([0.6547, 0.0836, 0.0720, 0.1897]) -Greedy action tensor([ 2.0588, -0.0970, -0.0228, 0.4243]) tensor([0.6966, 0.0807, 0.0869, 0.1359]) -Greedy action tensor([ 1.9086, -1.4534, -0.1747, 0.3971]) tensor([0.7248, 0.0251, 0.0902, 0.1599]) -Greedy action tensor([ 2.0806, 0.4599, -0.6080, 0.1446]) tensor([0.7092, 0.1402, 0.0482, 0.1023]) -Greedy action tensor([ 1.7298, -0.9938, -0.7485, 0.3936]) tensor([0.7080, 0.0465, 0.0594, 0.1861]) -Greedy action tensor([ 1.5394, 0.0393, -0.2981, -0.4756]) tensor([0.6598, 0.1472, 0.1050, 0.0880]) -Greedy action tensor([ 1.1269, -0.3011, -0.2801, 0.0419]) tensor([0.5487, 0.1316, 0.1344, 0.1854]) -Greedy action tensor([ 1.2772, -0.7920, -0.2351, 0.0884]) tensor([0.6056, 0.0765, 0.1335, 0.1845]) -Greedy action tensor([ 1.1710, -0.3614, -0.4989, -0.0477]) tensor([0.5883, 0.1271, 0.1108, 0.1739]) -Greedy action tensor([ 2.1607, 0.0313, -0.1309, 0.4211]) tensor([0.7165, 0.0852, 0.0724, 0.1258]) -Greedy action tensor([ 1.6851, -0.8845, -0.1565, 0.1733]) tensor([0.6870, 0.0526, 0.1089, 0.1515]) -Greedy action tensor([ 1.8613, -0.7829, -0.1448, 0.4619]) tensor([0.6886, 0.0489, 0.0926, 0.1699]) -Greedy action tensor([ 1.6184, -0.3312, -0.7821, 0.2263]) tensor([0.6750, 0.0961, 0.0612, 0.1678]) -Greedy action tensor([ 1.5498, -0.3894, -0.4264, 0.5557]) tensor([0.6051, 0.0870, 0.0839, 0.2240]) -Greedy action tensor([ 1.6733, -1.3967, -0.1113, 0.3828]) tensor([0.6714, 0.0312, 0.1127, 0.1847]) -Greedy action tensor([ 1.4122, -0.4205, -0.4911, -0.1261]) tensor([0.6563, 0.1050, 0.0978, 0.1409]) -Greedy action tensor([ 1.9041, -0.5243, -0.2685, 0.6510]) tensor([0.6722, 0.0593, 0.0765, 0.1920]) -Greedy action tensor([ 1.4772, -0.7187, -0.0354, 0.2468]) tensor([0.6159, 0.0685, 0.1357, 0.1799]) -Greedy action tensor([ 1.3914, -0.4926, -0.6443, -0.0230]) tensor([0.6555, 0.0996, 0.0856, 0.1593]) -Greedy action tensor([ 1.4310, -0.0969, -0.7625, -0.3836]) tensor([0.6705, 0.1455, 0.0748, 0.1092]) -Greedy action tensor([ 1.9613, -0.8608, -0.4377, 0.6157]) tensor([0.7089, 0.0422, 0.0644, 0.1846]) -Greedy action tensor([ 1.5816, -0.6680, -0.5793, 0.0753]) tensor([0.6933, 0.0731, 0.0799, 0.1537]) -Greedy action tensor([ 1.8665, -0.3107, -0.5296, -0.0775]) tensor([0.7421, 0.0841, 0.0676, 0.1062]) -Greedy action tensor([ 1.7659, -0.9492, -0.5951, 0.3641]) tensor([0.7109, 0.0471, 0.0671, 0.1750]) -Greedy action tensor([ 1.6166, -0.2634, -0.5764, 0.4891]) tensor([0.6297, 0.0961, 0.0703, 0.2039]) -Greedy action tensor([ 0.9690, -0.5500, -0.3262, 0.6271]) tensor([0.4539, 0.0994, 0.1243, 0.3224]) -Greedy action tensor([ 1.3349, -0.4155, -0.6353, 0.2332]) tensor([0.6077, 0.1056, 0.0847, 0.2020]) -Greedy action tensor([ 2.6926, -0.0905, -0.1793, 0.3900]) tensor([0.8207, 0.0508, 0.0464, 0.0821]) -Greedy action tensor([ 1.6628, -0.3614, -0.3595, 0.1222]) tensor([0.6763, 0.0893, 0.0895, 0.1449]) -Greedy action tensor([ 1.4922, -0.5520, -0.2780, 0.5878]) tensor([0.5867, 0.0760, 0.0999, 0.2375]) -Greedy action tensor([ 1.3068, -0.5934, -0.4110, 0.1051]) tensor([0.6136, 0.0918, 0.1101, 0.1845]) -Greedy action tensor([ 1.2437, -0.3631, -0.4641, 0.1997]) tensor([0.5768, 0.1157, 0.1045, 0.2030]) -Greedy action tensor([ 1.2635, -0.7543, 0.2525, 0.1635]) tensor([0.5466, 0.0727, 0.1989, 0.1819]) -Greedy action tensor([ 1.4323, -0.4153, -0.1057, 0.4219]) tensor([0.5759, 0.0908, 0.1237, 0.2097]) -Greedy action tensor([ 1.3205, -0.4949, -0.3610, 0.1975]) tensor([0.5973, 0.0972, 0.1112, 0.1943]) -Greedy action tensor([ 1.5919, -0.5244, -0.3431, 0.5983]) tensor([0.6116, 0.0737, 0.0883, 0.2264]) -Greedy action tensor([ 1.9105, -0.2210, -0.1565, 0.3800]) tensor([0.6842, 0.0812, 0.0866, 0.1481]) -Greedy action tensor([ 1.3230, -0.1278, -0.2374, 0.4993]) tensor([0.5310, 0.1245, 0.1115, 0.2330]) -Greedy action tensor([ 1.2224, -0.4096, -0.4867, 0.3332]) tensor([0.5594, 0.1094, 0.1013, 0.2299]) -Greedy action tensor([ 1.4083, -0.6483, -0.7736, 0.1545]) tensor([0.6553, 0.0838, 0.0739, 0.1870]) -Greedy action tensor([ 1.4297, -0.5777, -0.7951, 0.5784]) tensor([0.5991, 0.0805, 0.0648, 0.2557]) -Greedy action tensor([ 0.9409, -0.8715, 0.0289, 0.2124]) tensor([0.4884, 0.0797, 0.1962, 0.2357]) -Greedy action tensor([ 1.3662, -0.4873, -0.8385, 0.3709]) tensor([0.6110, 0.0957, 0.0674, 0.2258]) -Greedy action tensor([ 1.7362, -0.7018, -0.2435, 0.2589]) tensor([0.6879, 0.0601, 0.0950, 0.1570]) -Greedy action tensor([ 1.5966, -0.2528, -0.3670, 0.2359]) tensor([0.6434, 0.1012, 0.0903, 0.1650]) -Greedy action tensor([ 1.8088, -0.3256, -0.6208, 0.3365]) tensor([0.6965, 0.0824, 0.0613, 0.1598]) -Greedy action tensor([ 1.4073, -0.0366, -0.7674, 0.3222]) tensor([0.5926, 0.1398, 0.0673, 0.2002]) -Greedy action tensor([ 1.8056, -0.1840, -0.6947, 0.7114]) tensor([0.6437, 0.0880, 0.0528, 0.2155]) -Greedy action tensor([ 1.1709e+00, 4.9242e-04, -1.2686e+00, 2.8116e-01]) tensor([0.5530, 0.1716, 0.0482, 0.2272]) -Greedy action tensor([ 1.1269, -0.4893, -0.0947, 0.1937]) tensor([0.5300, 0.1053, 0.1562, 0.2085]) -Greedy action tensor([ 1.3771, -0.3004, -0.2998, -0.0387]) tensor([0.6186, 0.1156, 0.1157, 0.1502]) -Greedy action tensor([ 1.3376, -0.0569, -0.8858, 0.0659]) tensor([0.6110, 0.1515, 0.0661, 0.1713]) -Greedy action tensor([ 2.0842, 0.3985, -0.2826, 0.0350]) tensor([0.7103, 0.1316, 0.0666, 0.0915]) -Greedy action tensor([ 1.4777, -0.7275, -0.5613, 0.2041]) tensor([0.6578, 0.0725, 0.0856, 0.1841]) -Greedy action tensor([ 1.3270, -0.4207, -0.2038, 0.2125]) tensor([0.5819, 0.1013, 0.1259, 0.1909]) -Greedy action tensor([ 1.4117, -0.3467, -0.7086, 0.3900]) tensor([0.6052, 0.1043, 0.0726, 0.2179]) -Greedy action tensor([ 0.8514, -0.1621, -0.0304, -0.1321]) tensor([0.4649, 0.1687, 0.1925, 0.1739]) -Greedy action tensor([ 1.8644, -0.4204, -0.5819, 0.3453]) tensor([0.7106, 0.0723, 0.0615, 0.1555]) -Greedy action tensor([ 1.1019, -0.4011, -0.1448, 0.4051]) tensor([0.4980, 0.1108, 0.1432, 0.2481]) -Greedy action tensor([ 1.4772, -0.6882, -0.2868, 0.0497]) tensor([0.6553, 0.0752, 0.1123, 0.1572]) -Greedy action tensor([ 1.0122, -0.0876, -0.0522, 0.6985]) tensor([0.4152, 0.1382, 0.1432, 0.3034]) -Greedy action tensor([ 1.6200, -0.8538, -0.6139, 0.1264]) tensor([0.7063, 0.0595, 0.0756, 0.1586]) -Greedy action tensor([ 1.2922, -0.7594, -0.5653, 0.7977]) tensor([0.5279, 0.0678, 0.0824, 0.3219]) -Greedy action tensor([ 1.2475, -0.4083, -0.3621, 0.0366]) tensor([0.5921, 0.1131, 0.1184, 0.1764]) -Greedy action tensor([ 1.5402, -0.6102, -0.3653, 0.2593]) tensor([0.6481, 0.0755, 0.0964, 0.1800]) -Greedy action tensor([-0.2158, 0.6157, -0.1138, -0.7946]) tensor([0.2014, 0.4626, 0.2230, 0.1129]) -Greedy action tensor([-0.5804, -0.4460, -0.3703, 0.5283]) tensor([0.1561, 0.1785, 0.1925, 0.4729]) -Greedy action tensor([-0.0072, -0.2106, 0.1979, -0.0619]) tensor([0.2506, 0.2045, 0.3076, 0.2373]) -Greedy action tensor([ 0.0279, -0.8011, 0.0900, -1.1378]) tensor([0.3556, 0.1552, 0.3784, 0.1108]) -Greedy action tensor([-0.0224, 0.3199, -0.5111, -0.7684]) tensor([0.2861, 0.4028, 0.1755, 0.1357]) -Greedy action tensor([ 1.0848, -0.4868, 0.6910, -0.4554]) tensor([0.4770, 0.0991, 0.3217, 0.1022]) -Greedy action tensor([-0.1650, 0.2066, -0.3558, -0.5517]) tensor([0.2528, 0.3666, 0.2089, 0.1717]) -Greedy action tensor([-1.0896, -1.1618, 0.8109, -0.7665]) tensor([0.1000, 0.0930, 0.6689, 0.1381]) -Greedy action tensor([ 0.4364, -0.8224, -0.5692, -0.6061]) tensor([0.4994, 0.1418, 0.1827, 0.1761]) -Greedy action tensor([-1.3438, -0.4012, -1.2786, -0.2715]) tensor([0.1323, 0.3397, 0.1413, 0.3867]) -Greedy action tensor([ 0.9666, -0.6889, -0.3011, 0.8488]) tensor([0.4235, 0.0809, 0.1192, 0.3764]) -Greedy action tensor([-0.6977, -0.9734, 0.0548, -0.3448]) tensor([0.1885, 0.1431, 0.4001, 0.2683]) -Greedy action tensor([-0.0645, -0.5982, -0.5530, 0.3332]) tensor([0.2711, 0.1590, 0.1663, 0.4035]) -Greedy action tensor([-0.2597, -0.8112, 1.0625, -0.5450]) tensor([0.1645, 0.0948, 0.6171, 0.1237]) -Greedy action tensor([ 0.3467, -0.3864, 0.0013, -0.3521]) tensor([0.3724, 0.1789, 0.2636, 0.1851]) -Greedy action tensor([ 0.2255, 0.2923, -0.1106, 0.2180]) tensor([0.2648, 0.2831, 0.1892, 0.2628]) -Greedy action tensor([ 0.0603, -1.1300, 0.4698, -0.8199]) tensor([0.3101, 0.0943, 0.4670, 0.1286]) -Greedy action tensor([ 0.2004, -1.7287, 1.3805, -0.1700]) tensor([0.1964, 0.0285, 0.6394, 0.1356]) -Greedy action tensor([-0.1095, -0.4942, -0.7962, 0.0601]) tensor([0.2968, 0.2021, 0.1494, 0.3517]) -Greedy action tensor([ 0.1391, -0.0899, 0.4851, -0.6731]) tensor([0.2738, 0.2177, 0.3870, 0.1215]) -Greedy action tensor([-0.1452, -0.0990, -0.8040, -0.2424]) tensor([0.2880, 0.3016, 0.1490, 0.2613]) -Greedy action tensor([ 0.4137, -2.0591, -0.2392, 0.6857]) tensor([0.3428, 0.0289, 0.1784, 0.4499]) -Greedy action tensor([ 0.2452, -0.2007, -0.0343, -0.3077]) tensor([0.3365, 0.2155, 0.2545, 0.1936]) -Greedy action tensor([-0.5118, -0.9865, -0.5285, -0.3773]) tensor([0.2667, 0.1659, 0.2623, 0.3051]) -Greedy action tensor([-0.5115, -2.0254, 0.1939, 0.3763]) tensor([0.1762, 0.0388, 0.3568, 0.4282]) -Greedy action tensor([-0.5721, -0.4696, -0.4944, 0.2978]) tensor([0.1794, 0.1987, 0.1939, 0.4281]) -Greedy action tensor([-0.1524, -0.4260, 0.0071, -0.8061]) tensor([0.2896, 0.2202, 0.3396, 0.1506]) -Greedy action tensor([ 0.3482, 0.1523, -0.8367, -0.4701]) tensor([0.3892, 0.3200, 0.1190, 0.1717]) -Greedy action tensor([ 0.1487, -0.4841, -0.5504, 0.0172]) tensor([0.3443, 0.1828, 0.1711, 0.3018]) -Greedy action tensor([ 0.5870, -0.2860, 0.2003, 0.3956]) tensor([0.3421, 0.1429, 0.2324, 0.2825]) -Greedy action tensor([-0.8181, -0.4612, 0.5696, -0.6587]) tensor([0.1314, 0.1878, 0.5266, 0.1542]) -Greedy action tensor([-0.4778, -0.7260, 0.0898, -0.4315]) tensor([0.2178, 0.1699, 0.3842, 0.2281]) -Greedy action tensor([ 1.0122, 0.5183, 0.6737, -0.2108]) tensor([0.3820, 0.2331, 0.2723, 0.1125]) -Greedy action tensor([1.6487, 0.1611, 0.7848, 0.7489]) tensor([0.4868, 0.1100, 0.2052, 0.1980]) -Greedy action tensor([ 0.8560, -0.5250, 0.0948, 0.4763]) tensor([0.4162, 0.1046, 0.1944, 0.2847]) -Greedy action tensor([-0.6149, -0.6647, 0.6418, -0.8567]) tensor([0.1600, 0.1522, 0.5622, 0.1256]) -Greedy action tensor([-0.7563, 0.1651, 0.4901, -1.1517]) tensor([0.1305, 0.3279, 0.4538, 0.0879]) -Greedy action tensor([ 0.6658, -1.2676, 0.5085, -0.9694]) tensor([0.4558, 0.0659, 0.3895, 0.0888]) -Greedy action tensor([-0.6983, -0.0625, 0.2041, -0.2687]) tensor([0.1451, 0.2741, 0.3578, 0.2230]) -Greedy action tensor([ 0.0538, -1.4021, -0.3411, -0.0684]) tensor([0.3582, 0.0835, 0.2413, 0.3170]) -Greedy action tensor([-1.1593, -0.8318, -1.0559, -0.3697]) tensor([0.1755, 0.2435, 0.1946, 0.3865]) -Greedy action tensor([ 1.4644, -0.0313, -0.5653, -0.1734]) tensor([0.6452, 0.1446, 0.0848, 0.1254]) -Greedy action tensor([-0.2429, -0.9537, 0.1802, 0.0297]) tensor([0.2309, 0.1134, 0.3525, 0.3032]) -Greedy action tensor([-0.3125, 0.0823, -0.8103, -0.3478]) tensor([0.2465, 0.3658, 0.1498, 0.2379]) -Greedy action tensor([-0.9769, -1.1394, -1.3358, -0.1601]) tensor([0.2078, 0.1767, 0.1452, 0.4704]) -Greedy action tensor([ 1.0078, -1.0219, -0.2616, 0.4478]) tensor([0.5041, 0.0662, 0.1417, 0.2880]) -Greedy action tensor([ 0.3257, -0.6640, -0.3718, 0.9841]) tensor([0.2631, 0.0978, 0.1310, 0.5082]) -Greedy action tensor([ 1.0199, -1.1619, 0.2227, 0.9749]) tensor([0.3969, 0.0448, 0.1788, 0.3794]) -Greedy action tensor([-0.7152, -0.7646, 1.0633, 0.1822]) tensor([0.0968, 0.0922, 0.5734, 0.2376]) -Greedy action tensor([-0.7871, -0.8524, 0.5609, -1.5027]) tensor([0.1593, 0.1493, 0.6135, 0.0779]) -Greedy action tensor([-0.1890, 0.3755, 0.2494, -0.4038]) tensor([0.1955, 0.3438, 0.3030, 0.1577]) -Greedy action tensor([ 0.5364, 0.1507, 0.2036, -0.7540]) tensor([0.3742, 0.2545, 0.2683, 0.1030]) -Greedy action tensor([-1.0081, -0.8872, -1.2598, -0.3748]) tensor([0.2088, 0.2356, 0.1623, 0.3933]) -Greedy action tensor([ 0.7569, -1.0951, -0.3331, 1.6317]) tensor([0.2570, 0.0403, 0.0864, 0.6163]) -Greedy action tensor([-0.3701, 0.8241, -0.6986, 0.1076]) tensor([0.1507, 0.4976, 0.1085, 0.2431]) -Greedy action tensor([-0.0130, 0.1512, 0.4131, 0.1842]) tensor([0.2029, 0.2391, 0.3107, 0.2472]) -Greedy action tensor([-0.4942, -0.2940, 0.3108, -0.3349]) tensor([0.1776, 0.2169, 0.3972, 0.2083]) -Greedy action tensor([ 0.1001, 0.1196, 0.6096, -0.4921]) tensor([0.2360, 0.2406, 0.3928, 0.1305]) -Greedy action tensor([-0.3505, -0.4304, 0.7294, -0.6046]) tensor([0.1772, 0.1636, 0.5218, 0.1374]) -Greedy action tensor([-0.3095, -1.5247, 0.4606, 0.3756]) tensor([0.1838, 0.0545, 0.3970, 0.3647]) -Greedy action tensor([-0.7988, -0.8860, -1.0257, 0.1529]) tensor([0.1885, 0.1728, 0.1503, 0.4884]) -Greedy action tensor([ 0.7062, -0.7716, -0.2868, 0.6468]) tensor([0.3936, 0.0898, 0.1458, 0.3709]) -Greedy action tensor([ 0.3213, -1.5138, -0.3579, 0.7459]) tensor([0.3129, 0.0499, 0.1587, 0.4785]) -Greedy action tensor([ 0.0450, 0.0837, -0.0568, -0.6646]) tensor([0.2912, 0.3027, 0.2630, 0.1432]) -Greedy action tensor([ 0.0354, -0.2366, -0.5251, -0.6827]) tensor([0.3545, 0.2701, 0.2024, 0.1729]) -Greedy action tensor([ 1.0255, 0.3748, -0.4346, -0.1717]) tensor([0.4864, 0.2537, 0.1129, 0.1469]) -Greedy action tensor([-0.1570, 0.2112, -0.4018, 0.0841]) tensor([0.2222, 0.3211, 0.1739, 0.2828]) -Greedy action tensor([ 0.6098, -0.5374, 0.1200, -0.7147]) tensor([0.4553, 0.1446, 0.2790, 0.1211]) -Greedy action tensor([ 0.7748, -1.2409, -0.5715, 0.8506]) tensor([0.4045, 0.0539, 0.1053, 0.4364]) -Greedy action tensor([ 0.4516, -0.7265, 0.0238, -0.6570]) tensor([0.4367, 0.1345, 0.2847, 0.1441]) -Greedy action tensor([ 0.2875, -0.8939, -0.1254, -0.1405]) tensor([0.3816, 0.1171, 0.2525, 0.2487]) -Greedy action tensor([-0.2075, -0.3725, 0.1663, 0.5156]) tensor([0.1865, 0.1581, 0.2710, 0.3843]) -Greedy action tensor([ 0.1746, -1.6671, -0.2690, 0.0542]) tensor([0.3722, 0.0590, 0.2388, 0.3300]) -Greedy action tensor([-0.0462, -0.4921, -0.2983, -0.0739]) tensor([0.2950, 0.1889, 0.2292, 0.2869]) -Greedy action tensor([ 1.1064, 0.0639, 0.4183, -0.3331]) tensor([0.4780, 0.1685, 0.2402, 0.1133]) -Greedy action tensor([ 0.6793, -1.2463, -0.0356, -0.7803]) tensor([0.5355, 0.0781, 0.2620, 0.1244]) -Greedy action tensor([-0.3207, -0.5856, 0.2857, -1.1374]) tensor([0.2474, 0.1898, 0.4536, 0.1093]) -Greedy action tensor([ 0.2043, 0.1058, -0.4347, 1.0257]) tensor([0.2124, 0.1925, 0.1121, 0.4830]) -Greedy action tensor([ 0.3799, -0.9059, 0.7000, -0.5606]) tensor([0.3285, 0.0908, 0.4524, 0.1283]) -Greedy action tensor([ 0.9503, -0.9331, -0.2090, 0.3808]) tensor([0.4922, 0.0749, 0.1544, 0.2785]) -Greedy action tensor([ 0.2682, -1.1658, -0.5067, -0.2481]) tensor([0.4356, 0.1038, 0.2007, 0.2599]) -Greedy action tensor([-1.1400, -0.1709, 0.2028, 0.3487]) tensor([0.0841, 0.2215, 0.3219, 0.3725]) -Greedy action tensor([-1.5974, -0.1204, -0.3506, -0.7458]) tensor([0.0893, 0.3910, 0.3106, 0.2092]) -Greedy action tensor([-1.9128, -0.4388, 0.6516, -0.1628]) tensor([0.0415, 0.1811, 0.5388, 0.2386]) -Greedy action tensor([-1.8655, -0.3931, 0.6206, -0.1476]) tensor([0.0436, 0.1900, 0.5236, 0.2429]) -Greedy action tensor([-1.9042, -0.3432, 0.6388, -0.1576]) tensor([0.0413, 0.1967, 0.5252, 0.2368]) -Greedy action tensor([-1.8695, -0.4372, 0.6293, -0.1442]) tensor([0.0435, 0.1823, 0.5297, 0.2444]) -Greedy action tensor([-1.9263, -0.4205, 0.6580, -0.1700]) tensor([0.0407, 0.1836, 0.5398, 0.2359]) -Greedy action tensor([-1.9182, -0.4409, 0.6549, -0.1670]) tensor([0.0412, 0.1807, 0.5405, 0.2376]) -Greedy action tensor([-1.3132, 0.7918, 0.2017, 0.2831]) tensor([0.0535, 0.4391, 0.2434, 0.2640]) -Greedy action tensor([-0.7987, 0.9160, 0.1210, 0.0828]) tensor([0.0871, 0.4840, 0.2186, 0.2104]) -Greedy action tensor([-1.8554, -0.4637, 0.6344, -0.1365]) tensor([0.0441, 0.1775, 0.5322, 0.2462]) -Greedy action tensor([-0.6280, 0.9939, 0.0144, 0.3524]) tensor([0.0941, 0.4763, 0.1788, 0.2508]) -Greedy action tensor([-1.7359, -0.3940, 0.3080, -0.4150]) tensor([0.0614, 0.2348, 0.4738, 0.2300]) -Greedy action tensor([-1.9313, -0.3763, 0.6511, -0.1698]) tensor([0.0403, 0.1910, 0.5338, 0.2349]) -Greedy action tensor([-1.9171, -0.4469, 0.6557, -0.1670]) tensor([0.0413, 0.1797, 0.5413, 0.2377]) -Greedy action tensor([-1.9393, -0.4430, 0.6661, -0.1775]) tensor([0.0403, 0.1799, 0.5453, 0.2346]) -Greedy action tensor([-1.5432, -0.4045, 0.4522, -0.0476]) tensor([0.0627, 0.1959, 0.4614, 0.2799]) -Greedy action tensor([-1.8694, -0.4569, 0.6326, -0.1473]) tensor([0.0437, 0.1792, 0.5328, 0.2443]) -Greedy action tensor([-1.9242, -0.4255, 0.6582, -0.1687]) tensor([0.0408, 0.1828, 0.5402, 0.2363]) -Greedy action tensor([-1.7453, -0.3231, 0.5825, -0.0468]) tensor([0.0479, 0.1987, 0.4915, 0.2619]) -Greedy action tensor([-1.8372, -0.4386, 0.6143, -0.1297]) tensor([0.0451, 0.1827, 0.5235, 0.2488]) -Greedy action tensor([-1.4610, -0.1025, 0.5276, 0.1435]) tensor([0.0582, 0.2266, 0.4254, 0.2898]) -Greedy action tensor([-0.0473, 1.1171, 0.0236, 0.1773]) tensor([0.1532, 0.4907, 0.1644, 0.1917]) -Greedy action tensor([-1.8366, -0.4661, 0.6202, -0.1274]) tensor([0.0452, 0.1779, 0.5272, 0.2496]) -Greedy action tensor([-1.1905, -0.5200, 0.3385, 0.3646]) tensor([0.0813, 0.1589, 0.3749, 0.3849]) -Greedy action tensor([ 0.9120, 1.1961, -0.0662, 0.4972]) tensor([0.2972, 0.3948, 0.1117, 0.1963]) -Greedy action tensor([-1.8623, -0.4328, 0.6576, -0.1328]) tensor([0.0430, 0.1797, 0.5347, 0.2426]) -Greedy action tensor([-1.8737, -0.1653, 0.5987, -0.1346]) tensor([0.0416, 0.2294, 0.4925, 0.2366]) -Greedy action tensor([-0.7108, -0.1337, 0.1294, -0.1304]) tensor([0.1453, 0.2587, 0.3365, 0.2595]) -Greedy action tensor([-1.8854, -0.4504, 0.6399, -0.1516]) tensor([0.0428, 0.1798, 0.5350, 0.2424]) -Greedy action tensor([-1.9414, -0.4475, 0.6664, -0.1789]) tensor([0.0402, 0.1793, 0.5460, 0.2345]) -Greedy action tensor([-1.9286, -0.4296, 0.6598, -0.1710]) tensor([0.0407, 0.1821, 0.5413, 0.2359]) -Greedy action tensor([-0.6426, 0.2847, 0.1589, 0.0742]) tensor([0.1281, 0.3239, 0.2856, 0.2624]) -Greedy action tensor([-1.8074, -0.2550, 0.6100, -0.0904]) tensor([0.0444, 0.2098, 0.4983, 0.2474]) -Greedy action tensor([-1.8278, -0.4714, 0.6418, -0.1076]) tensor([0.0449, 0.1742, 0.5303, 0.2506]) -Greedy action tensor([-1.8735, -0.4441, 0.6263, -0.1373]) tensor([0.0434, 0.1813, 0.5288, 0.2464]) -Greedy action tensor([-1.4376, -0.4299, 0.5979, 0.1641]) tensor([0.0611, 0.1675, 0.4681, 0.3033]) -Greedy action tensor([-1.6866, -0.3140, 0.5135, -0.0698]) tensor([0.0526, 0.2076, 0.4748, 0.2650]) -Greedy action tensor([-1.6457, -0.0026, 0.5305, 0.1719]) tensor([0.0473, 0.2446, 0.4169, 0.2912]) -Greedy action tensor([-1.3377, -0.4994, -0.5168, -1.1064]) tensor([0.1461, 0.3378, 0.3320, 0.1841]) -Greedy action tensor([-1.8504, -0.4808, 0.6220, -0.1350]) tensor([0.0448, 0.1761, 0.5304, 0.2488]) -Greedy action tensor([-1.9400, -0.4447, 0.6680, -0.1774]) tensor([0.0402, 0.1794, 0.5459, 0.2344]) -Greedy action tensor([-1.9050, -0.3954, 0.6497, -0.1614]) tensor([0.0415, 0.1877, 0.5337, 0.2371]) -Greedy action tensor([-1.9333, -0.4244, 0.6588, -0.1707]) tensor([0.0405, 0.1830, 0.5406, 0.2359]) -Greedy action tensor([-1.7534, -0.3871, 0.5707, -0.1254]) tensor([0.0494, 0.1938, 0.5050, 0.2518]) -Greedy action tensor([-1.9297, -0.4342, 0.6609, -0.1722]) tensor([0.0407, 0.1814, 0.5423, 0.2357]) -Greedy action tensor([-1.3895, -0.1727, 0.3717, -0.0196]) tensor([0.0708, 0.2389, 0.4118, 0.2785]) -Greedy action tensor([-1.7587, -0.4534, 0.5784, -0.0745]) tensor([0.0490, 0.1806, 0.5067, 0.2638]) -Greedy action tensor([-0.6884, 0.8580, 0.0719, 0.0613]) tensor([0.1005, 0.4718, 0.2150, 0.2127]) -Greedy action tensor([-1.8545, -0.4183, 0.6207, -0.1334]) tensor([0.0441, 0.1854, 0.5240, 0.2465]) -Greedy action tensor([ 0.6461, 1.1272, -0.0594, 0.5145]) tensor([0.2507, 0.4056, 0.1238, 0.2198]) -Greedy action tensor([-1.6303, -0.1763, 0.4872, 0.1222]) tensor([0.0517, 0.2211, 0.4293, 0.2980]) -Greedy action tensor([-1.6217, -0.4640, 0.5585, 0.0654]) tensor([0.0542, 0.1726, 0.4800, 0.2931]) -Greedy action tensor([-1.7948, -0.2894, 0.5739, -0.1287]) tensor([0.0466, 0.2098, 0.4974, 0.2463]) -Greedy action tensor([-1.9227, -0.3449, 0.6392, -0.1652]) tensor([0.0406, 0.1969, 0.5268, 0.2357]) -Greedy action tensor([-1.8935, -0.4377, 0.6375, -0.1678]) tensor([0.0426, 0.1827, 0.5354, 0.2393]) -Greedy action tensor([-1.9073, -0.4395, 0.6489, -0.1602]) tensor([0.0417, 0.1811, 0.5378, 0.2394]) -Greedy action tensor([-1.9489, -0.4527, 0.6689, -0.1832]) tensor([0.0400, 0.1785, 0.5479, 0.2337]) -Greedy action tensor([-1.8735, -0.4274, 0.6349, -0.1457]) tensor([0.0432, 0.1834, 0.5304, 0.2430]) -Greedy action tensor([-1.9124, -0.4554, 0.6564, -0.1649]) tensor([0.0415, 0.1783, 0.5419, 0.2383]) -Greedy action tensor([-1.8341, -0.3523, 0.6605, -0.1094]) tensor([0.0432, 0.1903, 0.5239, 0.2426]) -Greedy action tensor([-1.7286, -0.2762, 0.5285, -0.0594]) tensor([0.0497, 0.2122, 0.4745, 0.2636]) -Greedy action tensor([-1.8516, -0.3962, 0.6130, -0.1444]) tensor([0.0443, 0.1900, 0.5213, 0.2444]) -Greedy action tensor([-1.5085, 0.5840, 0.3702, 0.1637]) tensor([0.0477, 0.3864, 0.3121, 0.2538]) -Greedy action tensor([-0.5859, 0.7984, 0.0553, 0.0215]) tensor([0.1146, 0.4575, 0.2176, 0.2104]) -Greedy action tensor([-1.9369, -0.4068, 0.6545, -0.1776]) tensor([0.0404, 0.1864, 0.5388, 0.2344]) -Greedy action tensor([-1.2671, 0.1144, 0.2804, 0.0021]) tensor([0.0755, 0.3007, 0.3550, 0.2688]) -Greedy action tensor([-1.8903, -0.3715, 0.6293, -0.1614]) tensor([0.0423, 0.1933, 0.5259, 0.2385]) -Greedy action tensor([-1.9207, -0.3998, 0.6522, -0.1625]) tensor([0.0408, 0.1869, 0.5352, 0.2370]) -Greedy action tensor([-1.6718, -0.4537, 0.5206, -0.0535]) tensor([0.0544, 0.1839, 0.4873, 0.2744]) -Greedy action tensor([-1.8989, -0.4461, 0.6448, -0.1585]) tensor([0.0422, 0.1804, 0.5370, 0.2405]) -Greedy action tensor([-1.7301, -0.2896, 0.5641, -0.0713]) tensor([0.0490, 0.2071, 0.4863, 0.2576]) -Greedy action tensor([-1.9230, -0.4182, 0.6538, -0.1690]) tensor([0.0409, 0.1843, 0.5383, 0.2364]) -Greedy action tensor([-0.9352, 0.2513, 0.4114, 0.1787]) tensor([0.0896, 0.2933, 0.3443, 0.2728]) -Greedy action tensor([-1.8823, -0.4639, 0.6903, -0.0952]) tensor([0.0413, 0.1707, 0.5413, 0.2468]) -Greedy action tensor([-1.7811, -0.4907, 0.5878, -0.1265]) tensor([0.0487, 0.1768, 0.5200, 0.2545]) -Greedy action tensor([-1.9027, -0.4584, 0.6389, -0.1649]) tensor([0.0423, 0.1794, 0.5376, 0.2406]) -Greedy action tensor([-1.9347, -0.4290, 0.6627, -0.1682]) tensor([0.0403, 0.1818, 0.5418, 0.2360]) -Greedy action tensor([-1.9380, -0.4483, 0.6711, -0.1744]) tensor([0.0402, 0.1785, 0.5466, 0.2347]) -Greedy action tensor([-1.7688, -0.3803, 0.5792, -0.0703]) tensor([0.0478, 0.1914, 0.4998, 0.2610]) -Greedy action tensor([-1.7053, -0.5219, 0.5412, -0.0694]) tensor([0.0530, 0.1732, 0.5015, 0.2723]) -Greedy action tensor([ 0.3926, -0.0732, -0.0476, -0.2093]) tensor([0.3547, 0.2226, 0.2284, 0.1943]) -Greedy action tensor([ 0.9512, -0.1550, -0.1290, -0.3236]) tensor([0.5129, 0.1697, 0.1741, 0.1433]) -Greedy action tensor([ 0.6057, -0.4427, 0.2275, -0.4108]) tensor([0.4171, 0.1462, 0.2858, 0.1509]) -Greedy action tensor([ 0.6572, -0.5522, -0.1132, -0.4949]) tensor([0.4814, 0.1436, 0.2228, 0.1521]) -Greedy action tensor([ 1.1485e+00, -5.2822e-01, -8.9943e-05, -2.9716e-01]) tensor([0.5748, 0.1075, 0.1823, 0.1354]) -Greedy action tensor([ 1.0205, -0.6844, -0.0555, -0.5256]) tensor([0.5761, 0.1047, 0.1964, 0.1228]) -Greedy action tensor([ 0.9966, -0.6521, 0.0848, -0.6488]) tensor([0.5596, 0.1076, 0.2249, 0.1080]) -Greedy action tensor([ 1.1900, -0.6328, 0.0404, -0.3397]) tensor([0.5900, 0.0953, 0.1869, 0.1278]) -Greedy action tensor([ 0.5039, -0.3767, 0.0402, -0.0754]) tensor([0.3841, 0.1592, 0.2416, 0.2152]) -Greedy action tensor([ 0.7682, -0.3682, -0.3607, -0.5610]) tensor([0.5238, 0.1681, 0.1694, 0.1387]) -Greedy action tensor([ 0.8437, -0.6775, -0.0870, -0.7526]) tensor([0.5509, 0.1203, 0.2172, 0.1116]) -Greedy action tensor([ 0.6545, -0.2378, 0.0023, -0.0220]) tensor([0.4100, 0.1680, 0.2136, 0.2085]) -Greedy action tensor([ 0.7283, -0.5549, -0.0954, -0.3263]) tensor([0.4844, 0.1343, 0.2126, 0.1687]) -Greedy action tensor([ 1.0015, -0.7528, -0.0144, -0.6573]) tensor([0.5796, 0.1003, 0.2098, 0.1103]) -Greedy action tensor([ 1.1445, -0.6307, 0.0504, -0.3641]) tensor([0.5795, 0.0982, 0.1941, 0.1282]) -Greedy action tensor([ 0.7617, -0.3424, 0.0174, -0.2550]) tensor([0.4612, 0.1529, 0.2191, 0.1669]) -Greedy action tensor([0.5109, 0.2733, 0.0584, 0.0951]) tensor([0.3242, 0.2557, 0.2062, 0.2139]) -Greedy action tensor([ 0.8182, -0.7319, -0.0678, -0.2424]) tensor([0.5074, 0.1077, 0.2092, 0.1757]) -Greedy action tensor([ 0.8523, -0.2601, -0.1886, -0.3459]) tensor([0.5041, 0.1657, 0.1780, 0.1521]) -Greedy action tensor([ 0.7480, -0.4745, -0.0559, -0.2958]) tensor([0.4775, 0.1406, 0.2137, 0.1681]) -Greedy action tensor([ 0.8196, -0.3610, -0.1556, -0.3389]) tensor([0.5005, 0.1537, 0.1887, 0.1571]) -Greedy action tensor([ 1.3308, -0.8097, 0.0040, -0.6081]) tensor([0.6550, 0.0770, 0.1738, 0.0942]) -Greedy action tensor([ 0.7221, -0.5244, -0.0183, -0.4667]) tensor([0.4833, 0.1390, 0.2305, 0.1472]) -Greedy action tensor([ 0.5220, 0.0753, -0.0626, 0.0624]) tensor([0.3535, 0.2262, 0.1970, 0.2233]) -Greedy action tensor([ 0.6053, -0.4755, 0.1244, -0.1935]) tensor([0.4154, 0.1409, 0.2568, 0.1869]) -Greedy action tensor([ 0.6881, -0.2520, 0.0939, -0.1005]) tensor([0.4172, 0.1629, 0.2303, 0.1896]) -Greedy action tensor([ 0.9455, -0.5809, -0.0328, -0.4740]) tensor([0.5449, 0.1184, 0.2049, 0.1318]) -Greedy action tensor([ 1.1342, -0.6289, 0.0923, -0.6153]) tensor([0.5889, 0.1010, 0.2077, 0.1024]) -Greedy action tensor([ 0.9318, -0.4703, -0.1356, -0.3197]) tensor([0.5330, 0.1312, 0.1833, 0.1525]) -Greedy action tensor([ 0.6075, -0.8489, 0.0718, -0.5967]) tensor([0.4721, 0.1100, 0.2763, 0.1416]) -Greedy action tensor([ 0.5086, -0.1923, -0.0926, -0.2100]) tensor([0.3950, 0.1960, 0.2165, 0.1925]) -Greedy action tensor([ 0.6930, -0.4143, -0.0678, -0.2103]) tensor([0.4539, 0.1500, 0.2121, 0.1840]) -Greedy action tensor([ 0.8699, -0.5819, 0.1168, -0.3583]) tensor([0.5005, 0.1172, 0.2357, 0.1466]) -Greedy action tensor([ 0.6081, -0.3502, 0.0668, -0.2028]) tensor([0.4149, 0.1591, 0.2415, 0.1844]) -Greedy action tensor([ 0.5559, -0.3938, -0.0063, -0.2470]) tensor([0.4158, 0.1609, 0.2370, 0.1863]) -Greedy action tensor([ 0.2635, 0.3819, -0.2280, 0.0639]) tensor([0.2812, 0.3165, 0.1720, 0.2303]) -Greedy action tensor([ 0.9674, -0.5385, 0.0353, -0.4457]) tensor([0.5379, 0.1193, 0.2118, 0.1309]) -Greedy action tensor([ 1.0892, -0.8659, 0.1085, -0.6012]) tensor([0.5879, 0.0832, 0.2205, 0.1084]) -Greedy action tensor([ 0.6212, -0.3011, 0.0043, -0.1347]) tensor([0.4155, 0.1652, 0.2242, 0.1951]) -Greedy action tensor([ 1.0337, -0.5140, -0.1572, -0.6366]) tensor([0.5866, 0.1248, 0.1783, 0.1104]) -Greedy action tensor([ 0.6494, -0.4806, -0.0259, -0.2902]) tensor([0.4499, 0.1453, 0.2290, 0.1758]) -Greedy action tensor([ 0.6553, -0.3500, -0.0435, -0.2739]) tensor([0.4429, 0.1621, 0.2202, 0.1749]) -Greedy action tensor([ 0.6733, -0.7347, -0.0992, -0.3046]) tensor([0.4802, 0.1175, 0.2218, 0.1806]) -Greedy action tensor([ 0.7967, -0.2301, -0.1056, -0.1596]) tensor([0.4655, 0.1667, 0.1888, 0.1789]) -Greedy action tensor([ 1.2950, -0.9933, 0.0262, -0.4454]) tensor([0.6418, 0.0651, 0.1805, 0.1126]) -Greedy action tensor([ 0.7680, -0.2860, -0.0405, -0.1823]) tensor([0.4586, 0.1598, 0.2043, 0.1773]) -Greedy action tensor([ 1.1259, -0.8787, 0.1204, -0.5046]) tensor([0.5895, 0.0794, 0.2157, 0.1154]) -Greedy action tensor([ 0.7638, -0.0457, 0.2090, -0.0956]) tensor([0.4094, 0.1822, 0.2351, 0.1733]) -Greedy action tensor([ 1.1490, -0.4607, -0.3008, -0.1450]) tensor([0.5852, 0.1170, 0.1373, 0.1605]) -Greedy action tensor([ 1.0935, -0.8077, 0.1301, -0.7289]) tensor([0.5908, 0.0883, 0.2254, 0.0955]) -Greedy action tensor([ 0.6582, -0.1634, 0.2696, -0.7233]) tensor([0.4221, 0.1856, 0.2862, 0.1060]) -Greedy action tensor([ 0.8771, -0.0224, -0.4231, -0.4178]) tensor([0.5120, 0.2083, 0.1395, 0.1402]) -Greedy action tensor([ 0.8291, -0.3967, -0.0457, -0.3830]) tensor([0.4980, 0.1462, 0.2076, 0.1482]) -Greedy action tensor([ 0.9978, -0.8102, 0.0841, -0.6743]) tensor([0.5705, 0.0935, 0.2288, 0.1072]) -Greedy action tensor([ 0.6488, -0.3730, -0.0796, -0.5017]) tensor([0.4632, 0.1667, 0.2235, 0.1466]) -Greedy action tensor([ 0.8664, -0.3276, 0.0732, -0.5099]) tensor([0.4980, 0.1509, 0.2253, 0.1258]) -Greedy action tensor([ 0.9376, -0.7346, 0.0996, -0.4759]) tensor([0.5366, 0.1008, 0.2321, 0.1305]) -Greedy action tensor([ 1.1477, -0.9556, 0.0804, -0.5638]) tensor([0.6073, 0.0741, 0.2089, 0.1097]) -Greedy action tensor([ 0.8267, -0.6067, -0.0913, -0.3758]) tensor([0.5159, 0.1230, 0.2060, 0.1550]) -Greedy action tensor([ 0.6659, -0.2094, -0.0443, -0.2043]) tensor([0.4297, 0.1791, 0.2112, 0.1800]) -Greedy action tensor([ 0.5555, -0.2281, 0.1377, -0.0925]) tensor([0.3790, 0.1731, 0.2496, 0.1983]) -Greedy action tensor([ 0.4662, -0.0402, -0.0735, -0.0538]) tensor([0.3597, 0.2168, 0.2097, 0.2139]) -Greedy action tensor([ 0.7241, -0.4082, -0.0128, -0.2219]) tensor([0.4568, 0.1472, 0.2186, 0.1774]) -Greedy action tensor([ 0.4326, -0.1724, -0.1343, -0.1236]) tensor([0.3722, 0.2032, 0.2111, 0.2134]) -Greedy action tensor([ 1.2877, -0.7106, 0.0890, -0.6455]) tensor([0.6322, 0.0857, 0.1906, 0.0915]) -Greedy action tensor([ 0.6061, -0.3209, -0.0896, -0.0883]) tensor([0.4177, 0.1653, 0.2083, 0.2086]) -Greedy action tensor([ 0.8087, -0.2463, 0.1039, -0.2370]) tensor([0.4558, 0.1587, 0.2253, 0.1602]) -Greedy action tensor([ 0.3601, -0.0230, -0.0914, -0.0802]) tensor([0.3376, 0.2301, 0.2149, 0.2173]) -Greedy action tensor([ 0.7861, -0.2905, -0.1446, -0.2746]) tensor([0.4805, 0.1637, 0.1894, 0.1664]) -Greedy action tensor([ 0.8087, -0.4445, -0.1120, -0.1550]) tensor([0.4842, 0.1383, 0.1928, 0.1847]) -Greedy action tensor([ 0.8312, -0.6640, 0.0678, -0.3202]) tensor([0.4984, 0.1117, 0.2323, 0.1576]) -Greedy action tensor([ 0.8721, -0.7057, 0.1729, -0.8261]) tensor([0.5301, 0.1094, 0.2635, 0.0970]) -Greedy action tensor([ 0.8511, -0.5962, -0.1026, -0.6112]) tensor([0.5399, 0.1270, 0.2080, 0.1251]) -Greedy action tensor([ 0.6359, -0.4073, 0.0528, -0.1290]) tensor([0.4209, 0.1483, 0.2349, 0.1959]) -Greedy action tensor([ 1.0572, -0.3217, -0.0666, -0.2348]) tensor([0.5401, 0.1360, 0.1756, 0.1484]) -Greedy action tensor([ 0.9149, -0.4418, 0.3813, -0.5629]) tensor([0.4826, 0.1243, 0.2830, 0.1101]) -Greedy action tensor([ 0.8651, -0.7656, 0.1132, -0.5196]) tensor([0.5215, 0.1021, 0.2459, 0.1306]) -Greedy action tensor([ 0.5283, -0.4715, -0.0777, -0.4492]) tensor([0.4367, 0.1607, 0.2382, 0.1643]) -Greedy action tensor([ 0.9037, -0.4269, -0.1014, -0.2534]) tensor([0.5142, 0.1359, 0.1882, 0.1617]) -Greedy action tensor([ 0.8838, -0.4672, -0.0969, -0.1034]) tensor([0.4984, 0.1291, 0.1869, 0.1857]) -Greedy action tensor([ 0.9829, -0.5136, -0.1327, -0.6502]) tensor([0.5724, 0.1282, 0.1876, 0.1118]) -Greedy action tensor([ 1.8121, -0.4294, -1.1007, 0.2173]) tensor([0.7334, 0.0780, 0.0398, 0.1488]) -Greedy action tensor([ 1.2842, -0.4782, -0.2801, 0.4616]) tensor([0.5494, 0.0943, 0.1150, 0.2414]) -Greedy action tensor([ 1.5353, -0.5927, 0.1129, -0.2450]) tensor([0.6541, 0.0779, 0.1577, 0.1103]) -Greedy action tensor([ 2.0666, -1.2632, 0.0239, 0.8192]) tensor([0.6883, 0.0246, 0.0893, 0.1977]) -Greedy action tensor([ 0.7810, -1.0437, -0.3531, -0.3231]) tensor([0.5511, 0.0889, 0.1773, 0.1827]) -Greedy action tensor([ 1.1052, -0.2367, -0.4550, -0.0208]) tensor([0.5569, 0.1455, 0.1170, 0.1806]) -Greedy action tensor([ 1.6664, -0.8632, -0.2915, 0.6844]) tensor([0.6268, 0.0499, 0.0885, 0.2348]) -Greedy action tensor([ 1.1094, -0.3865, -0.3571, 0.2739]) tensor([0.5295, 0.1186, 0.1222, 0.2296]) -Greedy action tensor([ 1.7463, -0.2226, -0.8434, 0.2207]) tensor([0.6983, 0.0975, 0.0524, 0.1519]) -Greedy action tensor([ 1.6933, -1.0285, -0.3886, 0.0916]) tensor([0.7184, 0.0472, 0.0896, 0.1448]) -Greedy action tensor([ 1.5481, -0.6635, -0.5134, 0.2263]) tensor([0.6651, 0.0729, 0.0847, 0.1774]) -Greedy action tensor([ 0.9667, -0.4966, -0.3775, -0.0356]) tensor([0.5378, 0.1245, 0.1402, 0.1974]) -Greedy action tensor([ 1.3065, -0.4538, -0.6117, -0.0959]) tensor([0.6390, 0.1099, 0.0939, 0.1572]) -Greedy action tensor([ 1.9720, 0.1158, -0.0870, 0.0790]) tensor([0.6971, 0.1089, 0.0889, 0.1050]) -Greedy action tensor([ 1.4332, -0.2657, 0.0564, 0.1697]) tensor([0.5821, 0.1065, 0.1469, 0.1645]) -Greedy action tensor([ 1.8970, -0.0474, -0.7473, 0.3084]) tensor([0.7051, 0.1009, 0.0501, 0.1440]) -Greedy action tensor([ 1.8064, -0.1000, -0.4472, 0.2210]) tensor([0.6856, 0.1019, 0.0720, 0.1405]) -Greedy action tensor([ 1.7483, -0.8707, 0.0842, 0.0737]) tensor([0.6898, 0.0503, 0.1306, 0.1293]) -Greedy action tensor([ 0.9855, -0.3776, -0.3078, 0.3249]) tensor([0.4886, 0.1250, 0.1340, 0.2524]) -Greedy action tensor([ 1.3870, -0.3694, -0.6642, 0.2333]) tensor([0.6185, 0.1068, 0.0795, 0.1951]) -Greedy action tensor([ 1.9627, -0.8411, -0.1575, 0.5134]) tensor([0.7065, 0.0428, 0.0848, 0.1659]) -Greedy action tensor([ 1.8122, -0.3331, -0.8179, 0.2964]) tensor([0.7099, 0.0831, 0.0512, 0.1559]) -Greedy action tensor([1.2623, 0.1792, 0.0804, 0.0646]) tensor([0.5136, 0.1739, 0.1575, 0.1550]) -Greedy action tensor([ 1.4493, -0.9300, -0.2256, 0.3103]) tensor([0.6250, 0.0579, 0.1171, 0.2001]) -Greedy action tensor([ 1.4323, -0.4661, -0.6154, 0.4582]) tensor([0.6037, 0.0904, 0.0779, 0.2279]) -Greedy action tensor([ 1.3721, -0.8195, -0.5405, 0.6266]) tensor([0.5767, 0.0644, 0.0852, 0.2737]) -Greedy action tensor([ 1.8680, -0.7187, -0.1034, 0.0336]) tensor([0.7277, 0.0548, 0.1013, 0.1162]) -Greedy action tensor([ 1.6903, 0.4441, -0.3981, 0.2992]) tensor([0.6023, 0.1732, 0.0746, 0.1499]) -Greedy action tensor([ 1.5098, -0.3657, -0.4943, 0.2079]) tensor([0.6410, 0.0982, 0.0864, 0.1744]) -Greedy action tensor([ 1.4738, -0.3400, -0.8515, 0.2006]) tensor([0.6490, 0.1058, 0.0635, 0.1817]) -Greedy action tensor([ 1.4996, -0.5861, -0.1704, -0.2867]) tensor([0.6756, 0.0839, 0.1272, 0.1132]) -Greedy action tensor([ 1.7945, 0.6445, -0.7185, 0.1521]) tensor([0.6285, 0.1990, 0.0509, 0.1216]) -Greedy action tensor([ 1.8074, -0.9385, -0.3530, 0.3372]) tensor([0.7096, 0.0455, 0.0818, 0.1631]) -Greedy action tensor([ 1.8836, 0.6197, -0.1573, 0.3122]) tensor([0.6172, 0.1744, 0.0802, 0.1282]) -Greedy action tensor([ 2.0004, -0.9076, -0.1113, 0.8037]) tensor([0.6767, 0.0369, 0.0819, 0.2045]) -Greedy action tensor([ 1.6209, -0.3321, -0.8038, 0.1484]) tensor([0.6851, 0.0972, 0.0606, 0.1571]) -Greedy action tensor([ 2.0183, -1.0075, -0.3187, 0.4878]) tensor([0.7344, 0.0356, 0.0710, 0.1590]) -Greedy action tensor([ 1.1701, -0.3012, -0.3408, 0.1242]) tensor([0.5550, 0.1274, 0.1225, 0.1950]) -Greedy action tensor([ 1.1240, -0.0874, -0.7628, 0.5541]) tensor([0.4963, 0.1478, 0.0752, 0.2807]) -Greedy action tensor([ 1.3253, 0.0523, -0.8398, 0.4433]) tensor([0.5529, 0.1548, 0.0634, 0.2289]) -Greedy action tensor([ 1.2330, -0.5811, -0.7077, -0.2255]) tensor([0.6497, 0.1059, 0.0933, 0.1511]) -Greedy action tensor([ 2.3000, -0.2119, -0.6084, 0.4358]) tensor([0.7748, 0.0628, 0.0423, 0.1201]) -Greedy action tensor([ 1.5653, -0.7210, -0.0724, 0.3201]) tensor([0.6313, 0.0642, 0.1227, 0.1817]) -Greedy action tensor([ 1.6443, -0.4154, -0.5526, 1.3821]) tensor([0.4980, 0.0635, 0.0554, 0.3831]) -Greedy action tensor([ 1.4937, -0.4857, -0.1640, 0.3805]) tensor([0.6034, 0.0834, 0.1150, 0.1982]) -Greedy action tensor([ 1.6721, -0.7567, 0.0110, 0.4119]) tensor([0.6404, 0.0564, 0.1216, 0.1816]) -Greedy action tensor([ 2.2573, -0.4509, 0.1402, 0.6308]) tensor([0.7227, 0.0482, 0.0870, 0.1421]) -Greedy action tensor([ 2.0382, -1.1341, 0.0569, 0.1601]) tensor([0.7504, 0.0314, 0.1035, 0.1147]) -Greedy action tensor([ 1.5268, -0.4530, -0.5788, 0.3000]) tensor([0.6439, 0.0889, 0.0784, 0.1888]) -Greedy action tensor([ 1.4616, -0.2734, -0.6819, 0.1685]) tensor([0.6377, 0.1125, 0.0748, 0.1750]) -Greedy action tensor([ 1.3605, -0.2772, -0.4933, 0.3247]) tensor([0.5862, 0.1140, 0.0918, 0.2081]) -Greedy action tensor([ 1.0626, -0.2983, -0.1478, 0.2088]) tensor([0.5050, 0.1295, 0.1505, 0.2150]) -Greedy action tensor([ 1.5699, -0.6650, -0.4396, 0.7742]) tensor([0.5909, 0.0632, 0.0792, 0.2666]) -Greedy action tensor([ 1.9024, -0.1603, -0.4856, 0.3537]) tensor([0.6986, 0.0888, 0.0641, 0.1485]) -Greedy action tensor([ 1.4523, 0.0407, -0.3507, 0.1856]) tensor([0.5916, 0.1442, 0.0975, 0.1667]) -Greedy action tensor([ 1.6595, -0.4036, -0.4126, 0.3450]) tensor([0.6572, 0.0835, 0.0828, 0.1765]) -Greedy action tensor([ 2.2638, -0.7687, -0.3713, 0.5295]) tensor([0.7713, 0.0372, 0.0553, 0.1362]) -Greedy action tensor([ 1.9544, -0.8747, -0.2016, 0.4023]) tensor([0.7212, 0.0426, 0.0835, 0.1527]) -Greedy action tensor([ 1.7911, -0.3149, -0.6062, 0.1638]) tensor([0.7096, 0.0864, 0.0646, 0.1394]) -Greedy action tensor([ 1.3232, -0.4340, -0.2578, 0.0850]) tensor([0.5995, 0.1034, 0.1233, 0.1738]) -Greedy action tensor([ 1.8775, -0.9307, -0.3589, 0.4020]) tensor([0.7164, 0.0432, 0.0765, 0.1638]) -Greedy action tensor([ 1.1278, -0.2694, -0.3075, 0.4529]) tensor([0.5014, 0.1240, 0.1193, 0.2553]) -Greedy action tensor([ 1.4753, -0.3875, -0.2834, 0.1679]) tensor([0.6258, 0.0971, 0.1078, 0.1693]) -Greedy action tensor([ 1.6397, -0.5407, -0.4501, 0.3850]) tensor([0.6571, 0.0743, 0.0813, 0.1874]) -Greedy action tensor([ 1.4459, 0.1232, -0.2928, -0.0181]) tensor([0.5976, 0.1592, 0.1050, 0.1382]) -Greedy action tensor([ 1.2480, -0.4831, -0.4293, 0.2889]) tensor([0.5724, 0.1014, 0.1070, 0.2193]) -Greedy action tensor([ 1.1014, -0.1190, -0.4088, 0.1674]) tensor([0.5238, 0.1546, 0.1157, 0.2059]) -Greedy action tensor([ 1.8107, -1.1084, -0.1158, 0.6598]) tensor([0.6596, 0.0356, 0.0961, 0.2087]) -Greedy action tensor([ 1.6521, 0.0363, -0.7195, 0.0233]) tensor([0.6719, 0.1335, 0.0627, 0.1318]) -Greedy action tensor([ 1.5380, 0.0802, -0.8170, 0.5458]) tensor([0.5888, 0.1370, 0.0559, 0.2183]) -Greedy action tensor([ 2.8381, -1.0004, -0.3041, 0.9893]) tensor([0.8182, 0.0176, 0.0353, 0.1288]) -Greedy action tensor([ 1.2629, -0.7781, -0.1551, 0.4636]) tensor([0.5489, 0.0713, 0.1330, 0.2468]) -Greedy action tensor([ 1.4628, -0.4573, -0.8899, 0.4068]) tensor([0.6291, 0.0922, 0.0598, 0.2188]) -Greedy action tensor([ 1.2060, -0.1651, -0.4927, 0.1958]) tensor([0.5553, 0.1409, 0.1016, 0.2022]) -Greedy action tensor([ 1.1226, -0.0976, -0.1840, 0.1280]) tensor([0.5166, 0.1525, 0.1399, 0.1911]) -Greedy action tensor([ 1.0401, -0.0680, -0.6175, -0.0302]) tensor([0.5366, 0.1772, 0.1023, 0.1840]) -Greedy action tensor([ 2.2009, -1.8349, 0.1128, 0.0109]) tensor([0.7978, 0.0141, 0.0989, 0.0893]) -Greedy action tensor([ 1.8588, -0.4484, -0.4154, 0.5946]) tensor([0.6735, 0.0670, 0.0693, 0.1902]) -Greedy action tensor([ 0.9628, -0.2080, -0.1777, 0.0621]) tensor([0.4911, 0.1523, 0.1570, 0.1995]) -Greedy action tensor([ 1.1811, -0.3766, -1.0366, 0.2366]) tensor([0.5854, 0.1233, 0.0637, 0.2276]) -Greedy action tensor([ 1.3008, -0.7176, -0.0644, 0.2022]) tensor([0.5809, 0.0772, 0.1483, 0.1936]) -Greedy action tensor([-0.2087, -0.4411, 0.1090, -0.4442]) tensor([0.2527, 0.2003, 0.3472, 0.1997]) -Greedy action tensor([ 0.5488, -0.7293, 0.7730, 0.6077]) tensor([0.2785, 0.0776, 0.3485, 0.2954]) -Greedy action tensor([ 0.1639, -1.2815, 0.1661, -0.2998]) tensor([0.3488, 0.0822, 0.3496, 0.2194]) -Greedy action tensor([-0.5548, 0.1459, 0.0832, -0.6944]) tensor([0.1731, 0.3488, 0.3276, 0.1505]) -Greedy action tensor([-1.5770, -0.6675, 1.1691, -1.6904]) tensor([0.0501, 0.1244, 0.7807, 0.0447]) -Greedy action tensor([ 0.6580, -0.7414, -0.0865, 0.4984]) tensor([0.3885, 0.0959, 0.1845, 0.3312]) -Greedy action tensor([0.2486, 0.8970, 0.5206, 0.1636]) tensor([0.1944, 0.3718, 0.2552, 0.1786]) -Greedy action tensor([ 0.7939, -0.2529, 0.5493, -0.0395]) tensor([0.3893, 0.1367, 0.3048, 0.1692]) -Greedy action tensor([-0.1694, -0.7284, -0.8612, 0.1148]) tensor([0.2940, 0.1681, 0.1472, 0.3907]) -Greedy action tensor([ 0.5505, -0.0240, -0.0212, 0.2073]) tensor([0.3525, 0.1984, 0.1990, 0.2501]) -Greedy action tensor([-1.0149, -0.3907, -0.4685, -0.1709]) tensor([0.1445, 0.2698, 0.2496, 0.3361]) -Greedy action tensor([ 0.1981, -0.1386, -0.0632, -0.5938]) tensor([0.3405, 0.2431, 0.2622, 0.1542]) -Greedy action tensor([ 0.0669, -0.5976, 0.3396, -0.5215]) tensor([0.2956, 0.1521, 0.3882, 0.1641]) -Greedy action tensor([-0.3213, -0.5975, 0.8926, -1.1330]) tensor([0.1796, 0.1362, 0.6045, 0.0797]) -Greedy action tensor([-0.7361, -0.0056, 0.2863, -0.9198]) tensor([0.1495, 0.3104, 0.4156, 0.1244]) -Greedy action tensor([ 1.1479, -0.8371, -0.2702, -0.6464]) tensor([0.6469, 0.0889, 0.1567, 0.1075]) -Greedy action tensor([ 0.4352, -2.2108, 0.4093, 0.3125]) tensor([0.3413, 0.0242, 0.3326, 0.3019]) -Greedy action tensor([ 0.2926, -1.9543, -0.6119, 0.1204]) tensor([0.4251, 0.0449, 0.1721, 0.3579]) -Greedy action tensor([-5.7203e-01, -8.9122e-01, -8.5971e-04, 1.9976e-01]) tensor([0.1767, 0.1284, 0.3127, 0.3822]) -Greedy action tensor([ 0.3376, -0.3681, -0.3979, -0.5077]) tensor([0.4162, 0.2055, 0.1995, 0.1787]) -Greedy action tensor([ 0.4411, -1.6626, 0.3606, 0.5101]) tensor([0.3209, 0.0392, 0.2961, 0.3438]) -Greedy action tensor([-0.0384, 0.0127, 0.2288, -0.5681]) tensor([0.2533, 0.2666, 0.3309, 0.1492]) -Greedy action tensor([-0.7641, -1.1666, 0.2189, -0.6709]) tensor([0.1839, 0.1229, 0.4914, 0.2018]) -Greedy action tensor([ 0.3645, -1.5101, 0.7614, 0.3208]) tensor([0.2779, 0.0426, 0.4134, 0.2661]) -Greedy action tensor([ 0.5868, -1.4789, 1.4098, -0.3690]) tensor([0.2639, 0.0335, 0.6011, 0.1015]) -Greedy action tensor([ 1.3335, -0.2587, 0.3077, 0.7348]) tensor([0.4736, 0.0964, 0.1698, 0.2603]) -Greedy action tensor([ 0.8405, -0.7429, -0.3729, 0.2513]) tensor([0.4861, 0.0998, 0.1445, 0.2697]) -Greedy action tensor([-0.5993, -0.3832, -0.1997, -0.4140]) tensor([0.2026, 0.2515, 0.3021, 0.2438]) -Greedy action tensor([ 0.7340, -0.1403, -1.1721, -0.0345]) tensor([0.4927, 0.2055, 0.0732, 0.2285]) -Greedy action tensor([-0.4968, -0.9002, 0.0270, -0.5107]) tensor([0.2303, 0.1538, 0.3888, 0.2271]) -Greedy action tensor([ 1.7888, -1.7968, 1.8053, -0.4239]) tensor([0.4643, 0.0129, 0.4720, 0.0508]) -Greedy action tensor([ 0.1864, -0.3372, -0.2826, -0.6536]) tensor([0.3774, 0.2236, 0.2361, 0.1629]) -Greedy action tensor([ 0.4299, -0.2347, 0.2276, 0.8376]) tensor([0.2608, 0.1342, 0.2130, 0.3920]) -Greedy action tensor([-0.2796, -0.5213, 0.3791, -1.4396]) tensor([0.2481, 0.1948, 0.4793, 0.0778]) -Greedy action tensor([ 0.6376, -0.2820, 0.2108, -1.0359]) tensor([0.4467, 0.1781, 0.2915, 0.0838]) -Greedy action tensor([ 1.4177, -1.9860, 0.3843, -0.5630]) tensor([0.6549, 0.0218, 0.2330, 0.0904]) -Greedy action tensor([-0.7396, -0.8493, 0.8178, -0.7694]) tensor([0.1314, 0.1177, 0.6235, 0.1275]) -Greedy action tensor([ 0.1826, -0.2947, 0.5530, -0.1565]) tensor([0.2645, 0.1641, 0.3830, 0.1884]) -Greedy action tensor([-0.5624, -0.3065, -0.0740, -0.0577]) tensor([0.1793, 0.2316, 0.2922, 0.2970]) -Greedy action tensor([-0.7519, -1.0399, 0.4087, -0.7488]) tensor([0.1682, 0.1261, 0.5369, 0.1687]) -Greedy action tensor([-0.2093, -0.5694, 0.0894, -0.2952]) tensor([0.2523, 0.1760, 0.3402, 0.2315]) -Greedy action tensor([-0.5208, -0.1743, -0.9245, -0.5244]) tensor([0.2452, 0.3467, 0.1638, 0.2443]) -Greedy action tensor([-0.2361, -0.4304, 1.2192, -0.6676]) tensor([0.1480, 0.1218, 0.6341, 0.0961]) -Greedy action tensor([-1.0646, -0.9588, 0.6186, -1.0975]) tensor([0.1182, 0.1314, 0.6361, 0.1144]) -Greedy action tensor([ 0.3039, -1.3550, -0.4373, -0.2258]) tensor([0.4433, 0.0844, 0.2113, 0.2610]) -Greedy action tensor([ 0.8538, 0.0422, 0.8789, -0.2535]) tensor([0.3571, 0.1586, 0.3662, 0.1180]) -Greedy action tensor([ 0.1457, -0.1589, 0.8861, -1.0022]) tensor([0.2409, 0.1776, 0.5051, 0.0764]) -Greedy action tensor([ 0.9256, 0.0725, -0.2389, 0.5992]) tensor([0.4065, 0.1732, 0.1269, 0.2933]) -Greedy action tensor([-0.2231, 0.3729, 0.3190, 0.4474]) tensor([0.1541, 0.2797, 0.2650, 0.3013]) -Greedy action tensor([ 0.7478, -0.9474, 0.7531, -0.2586]) tensor([0.3915, 0.0719, 0.3936, 0.1431]) -Greedy action tensor([ 1.7815, -0.1249, 0.0899, 0.8766]) tensor([0.5756, 0.0855, 0.1060, 0.2329]) -Greedy action tensor([-1.1378, -0.5393, -0.4250, -0.0362]) tensor([0.1271, 0.2312, 0.2592, 0.3824]) -Greedy action tensor([ 0.4923, 0.0594, 0.0442, -0.0706]) tensor([0.3500, 0.2270, 0.2236, 0.1994]) -Greedy action tensor([ 0.9112, -1.3609, -0.0944, 0.5174]) tensor([0.4665, 0.0481, 0.1707, 0.3147]) -Greedy action tensor([-0.3930, -2.3635, 0.3844, 0.0829]) tensor([0.2031, 0.0283, 0.4418, 0.3268]) -Greedy action tensor([-0.4740, 0.2530, -0.1742, -1.4326]) tensor([0.2082, 0.4308, 0.2811, 0.0798]) -Greedy action tensor([ 0.7796, -1.0232, 0.0116, -0.8310]) tensor([0.5469, 0.0901, 0.2537, 0.1093]) -Greedy action tensor([ 0.1688, -0.3673, 0.4108, -0.6906]) tensor([0.3047, 0.1782, 0.3881, 0.1290]) -Greedy action tensor([ 0.0781, -0.8361, -0.4331, 0.7660]) tensor([0.2506, 0.1005, 0.1503, 0.4986]) -Greedy action tensor([0.5986, 0.0079, 0.3689, 0.0982]) tensor([0.3384, 0.1875, 0.2689, 0.2052]) -Greedy action tensor([-0.2947, -0.4487, 0.6190, -0.3261]) tensor([0.1880, 0.1612, 0.4687, 0.1822]) -Greedy action tensor([ 0.5474, -0.5312, 0.2200, -0.4724]) tensor([0.4130, 0.1404, 0.2977, 0.1489]) -Greedy action tensor([ 0.6817, -1.0113, -0.0459, -0.4378]) tensor([0.5016, 0.0923, 0.2423, 0.1638]) -Greedy action tensor([-0.7122, -0.8706, -0.6566, -0.8292]) tensor([0.2631, 0.2246, 0.2782, 0.2341]) -Greedy action tensor([ 0.3584, -1.5242, -0.7445, -0.6830]) tensor([0.5444, 0.0828, 0.1807, 0.1921]) -Greedy action tensor([-0.1233, -0.4932, -0.4132, 0.0251]) tensor([0.2778, 0.1919, 0.2079, 0.3223]) -Greedy action tensor([-0.5005, -0.6582, 0.4017, -1.4570]) tensor([0.2126, 0.1816, 0.5241, 0.0817]) -Greedy action tensor([ 0.3417, 0.5383, -0.4040, 0.0783]) tensor([0.2890, 0.3518, 0.1371, 0.2221]) -Greedy action tensor([-9.1231e-02, -2.1505e+00, 1.0475e-03, -2.6245e-01]) tensor([0.3261, 0.0416, 0.3576, 0.2748]) -Greedy action tensor([ 0.2746, -0.5742, 0.9484, -0.1243]) tensor([0.2463, 0.1054, 0.4831, 0.1653]) -Greedy action tensor([ 0.4583, -1.1747, 0.1792, -0.0525]) tensor([0.3919, 0.0766, 0.2965, 0.2351]) -Greedy action tensor([-0.0834, -0.5364, 0.7542, -0.0961]) tensor([0.2027, 0.1288, 0.4684, 0.2001]) -Greedy action tensor([ 0.2931, -0.7413, -0.8446, -0.3028]) tensor([0.4490, 0.1596, 0.1439, 0.2474]) -Greedy action tensor([ 0.4769, -0.5794, -0.3624, -0.0472]) tensor([0.4216, 0.1466, 0.1821, 0.2496]) -Greedy action tensor([ 0.1256, -0.3653, 0.2767, -0.3550]) tensor([0.2947, 0.1804, 0.3427, 0.1822]) -Greedy action tensor([ 0.0196, -1.4609, 0.4957, -0.3085]) tensor([0.2811, 0.0640, 0.4525, 0.2025]) -Greedy action tensor([-0.3446, -0.1561, -0.7653, -0.9575]) tensor([0.2936, 0.3545, 0.1928, 0.1591]) -Greedy action tensor([ 0.0190, 0.0551, -0.3269, 0.1659]) tensor([0.2563, 0.2657, 0.1813, 0.2968]) -Greedy action tensor([ 0.1404, -1.4737, 1.2982, -0.4431]) tensor([0.2024, 0.0403, 0.6443, 0.1129]) -Greedy action tensor([ 1.0445, -0.9625, -0.2052, 1.2369]) tensor([0.3798, 0.0510, 0.1088, 0.4603]) -Greedy action tensor([ 0.1258, -2.4965, 0.3027, -0.0059]) tensor([0.3182, 0.0231, 0.3797, 0.2789]) -Greedy action tensor([ 1.0097, -0.4900, -0.0529, -0.4078]) tensor([0.5522, 0.1232, 0.1908, 0.1338]) -Greedy action tensor([ 0.7679, -0.0610, 0.0123, -0.1116]) tensor([0.4308, 0.1881, 0.2024, 0.1788]) -Greedy action tensor([ 0.7524, -0.7289, -0.0643, -0.3033]) tensor([0.4957, 0.1127, 0.2191, 0.1725]) -Greedy action tensor([ 0.7827, -0.5663, -0.0440, -0.2183]) tensor([0.4844, 0.1257, 0.2119, 0.1780]) -Greedy action tensor([ 0.8364, -0.4940, -0.1413, -0.1911]) tensor([0.5004, 0.1323, 0.1882, 0.1791]) -Greedy action tensor([ 0.5769, -0.3240, -0.0380, -0.1019]) tensor([0.4075, 0.1655, 0.2203, 0.2067]) -Greedy action tensor([ 0.3788, 0.0280, 0.1529, -0.1571]) tensor([0.3239, 0.2281, 0.2584, 0.1895]) -Greedy action tensor([ 0.4532, -0.1622, -0.0458, -0.1376]) tensor([0.3702, 0.2000, 0.2247, 0.2050]) -Greedy action tensor([ 0.8834, -0.3779, 0.2236, -0.3069]) tensor([0.4752, 0.1346, 0.2457, 0.1445]) -Greedy action tensor([ 0.9970, -0.4136, -0.0320, -0.7631]) tensor([0.5639, 0.1376, 0.2015, 0.0970]) -Greedy action tensor([ 0.8197, -0.2722, -0.1279, -0.1394]) tensor([0.4747, 0.1593, 0.1840, 0.1819]) -Greedy action tensor([ 0.6087, -0.3207, 0.1541, -0.2138]) tensor([0.4051, 0.1599, 0.2571, 0.1779]) -Greedy action tensor([ 0.3880, -0.1096, -0.1382, -0.0207]) tensor([0.3493, 0.2123, 0.2063, 0.2321]) -Greedy action tensor([ 1.1281, -0.4570, -0.1107, -0.3292]) tensor([0.5789, 0.1186, 0.1677, 0.1348]) -Greedy action tensor([ 1.1878, -0.9188, -0.0352, -0.8284]) tensor([0.6455, 0.0785, 0.1900, 0.0860]) -Greedy action tensor([ 0.7342, -0.5175, 0.0637, -0.3055]) tensor([0.4649, 0.1330, 0.2378, 0.1644]) -Greedy action tensor([ 1.0232, -0.4774, -0.0512, -0.4462]) tensor([0.5572, 0.1243, 0.1903, 0.1282]) -Greedy action tensor([ 0.7851, -0.4351, 0.0859, -0.3028]) tensor([0.4697, 0.1386, 0.2334, 0.1582]) -Greedy action tensor([ 0.8453, -0.6431, 0.0728, -0.3574]) tensor([0.5030, 0.1135, 0.2323, 0.1511]) -Greedy action tensor([ 0.8946, -0.5404, 0.0387, -0.6286]) tensor([0.5316, 0.1266, 0.2259, 0.1159]) -Greedy action tensor([ 0.7422, -0.5195, 0.0026, -0.1668]) tensor([0.4622, 0.1309, 0.2206, 0.1862]) -Greedy action tensor([ 0.8463, -0.7095, 0.0097, -0.5265]) tensor([0.5270, 0.1112, 0.2283, 0.1335]) -Greedy action tensor([ 0.7595, -0.4350, -0.0110, -0.1585]) tensor([0.4619, 0.1399, 0.2138, 0.1844]) -Greedy action tensor([ 0.8169, -0.6467, 0.0845, -0.7942]) tensor([0.5231, 0.1210, 0.2515, 0.1044]) -Greedy action tensor([ 0.4753, -0.2681, -0.1081, -0.2262]) tensor([0.3954, 0.1880, 0.2206, 0.1960]) -Greedy action tensor([ 1.0362, -0.6554, 0.1734, -0.6454]) tensor([0.5579, 0.1028, 0.2354, 0.1038]) -Greedy action tensor([ 0.3704, 0.3071, -0.0965, -0.1996]) tensor([0.3194, 0.2998, 0.2002, 0.1806]) -Greedy action tensor([ 1.5246, -1.0250, 0.1443, -0.7793]) tensor([0.6996, 0.0546, 0.1759, 0.0699]) -Greedy action tensor([ 0.4098, -0.3281, -0.1296, -0.1233]) tensor([0.3776, 0.1806, 0.2202, 0.2216]) -Greedy action tensor([ 0.2952, 0.1086, -0.1528, -0.1132]) tensor([0.3191, 0.2648, 0.2039, 0.2121]) -Greedy action tensor([ 0.3515, -0.3573, -0.2116, -0.6640]) tensor([0.4126, 0.2031, 0.2349, 0.1494]) -Greedy action tensor([ 0.9943, -0.5488, -0.0382, -0.2397]) tensor([0.5374, 0.1148, 0.1914, 0.1564]) -Greedy action tensor([ 0.9572, -0.3992, -0.0403, -0.1527]) tensor([0.5113, 0.1317, 0.1885, 0.1685]) -Greedy action tensor([ 0.8704, -0.4298, -0.0596, -0.1938]) tensor([0.4970, 0.1354, 0.1961, 0.1715]) -Greedy action tensor([ 0.6853, -0.2131, -0.0225, -0.1304]) tensor([0.4269, 0.1739, 0.2104, 0.1888]) -Greedy action tensor([ 0.8162, -0.3523, 0.0723, -0.2271]) tensor([0.4676, 0.1454, 0.2223, 0.1648]) -Greedy action tensor([ 1.2358, -0.6995, -0.0337, -0.5903]) tensor([0.6304, 0.0910, 0.1771, 0.1015]) -Greedy action tensor([ 0.4601, -0.2426, -0.1469, -0.3280]) tensor([0.4008, 0.1985, 0.2184, 0.1823]) -Greedy action tensor([ 0.5661, -0.5670, -0.0730, -0.2236]) tensor([0.4341, 0.1398, 0.2291, 0.1971]) -Greedy action tensor([ 0.5366, -0.0430, -0.1575, 0.0724]) tensor([0.3720, 0.2084, 0.1858, 0.2338]) -Greedy action tensor([ 0.9036, -0.7710, 0.0490, -0.4448]) tensor([0.5340, 0.1001, 0.2272, 0.1387]) -Greedy action tensor([ 0.9500, -0.5692, 0.0163, -0.3380]) tensor([0.5297, 0.1159, 0.2082, 0.1461]) -Greedy action tensor([ 1.2095, -0.4481, 0.0591, -0.8224]) tensor([0.6104, 0.1163, 0.1932, 0.0800]) -Greedy action tensor([ 0.6458, -0.4230, -0.1167, -0.0334]) tensor([0.4316, 0.1482, 0.2013, 0.2188]) -Greedy action tensor([ 0.6354, -0.4200, -0.0687, -0.1746]) tensor([0.4372, 0.1522, 0.2162, 0.1945]) -Greedy action tensor([ 0.9971, -0.8430, 0.1512, -0.5724]) tensor([0.5568, 0.0884, 0.2389, 0.1159]) -Greedy action tensor([ 0.4757, -0.1162, -0.0861, -0.3002]) tensor([0.3870, 0.2141, 0.2207, 0.1781]) -Greedy action tensor([ 0.6504, -0.5289, 0.0029, -0.2164]) tensor([0.4442, 0.1366, 0.2325, 0.1867]) -Greedy action tensor([ 0.9528, -0.6618, 0.0327, -0.4561]) tensor([0.5429, 0.1080, 0.2164, 0.1327]) -Greedy action tensor([ 0.8723, -0.3871, -0.0576, -0.1059]) tensor([0.4868, 0.1382, 0.1921, 0.1830]) -Greedy action tensor([ 0.7426, -0.1984, 0.1092, -0.5292]) tensor([0.4543, 0.1773, 0.2411, 0.1273]) -Greedy action tensor([ 0.8599, -0.5069, -0.0212, -0.4888]) tensor([0.5185, 0.1322, 0.2148, 0.1346]) -Greedy action tensor([ 0.5746, 0.1974, -0.1836, 0.1298]) tensor([0.3578, 0.2453, 0.1676, 0.2293]) -Greedy action tensor([ 0.7858, -0.3472, -0.2117, -0.2695]) tensor([0.4904, 0.1580, 0.1809, 0.1707]) -Greedy action tensor([ 0.2220, 0.1189, -0.0208, -0.0481]) tensor([0.2899, 0.2615, 0.2274, 0.2213]) -Greedy action tensor([ 1.3869, -0.8196, -0.0245, -0.7998]) tensor([0.6820, 0.0751, 0.1663, 0.0766]) -Greedy action tensor([ 0.7921, -0.3972, 0.0058, -0.1363]) tensor([0.4640, 0.1413, 0.2114, 0.1834]) -Greedy action tensor([ 0.9241, -0.5039, -0.0908, -0.3647]) tensor([0.5325, 0.1277, 0.1930, 0.1468]) -Greedy action tensor([ 0.7966, -0.1423, -0.1086, -0.1503]) tensor([0.4580, 0.1791, 0.1852, 0.1777]) -Greedy action tensor([ 0.4554, -0.6240, -0.1287, -0.1743]) tensor([0.4115, 0.1398, 0.2294, 0.2192]) -Greedy action tensor([ 0.4220, -0.2551, -0.2904, -0.3084]) tensor([0.4032, 0.2049, 0.1978, 0.1942]) -Greedy action tensor([ 0.5951, -0.1698, 0.0890, -0.1753]) tensor([0.3951, 0.1839, 0.2382, 0.1829]) -Greedy action tensor([ 0.6572, -0.4051, -0.2530, -0.3577]) tensor([0.4738, 0.1638, 0.1907, 0.1717]) -Greedy action tensor([ 0.5940, -0.3644, -0.2235, -0.1325]) tensor([0.4332, 0.1661, 0.1912, 0.2095]) -Greedy action tensor([ 0.6951, -0.3848, -0.0208, -0.2165]) tensor([0.4484, 0.1523, 0.2191, 0.1802]) -Greedy action tensor([ 0.8397, -0.3428, 0.1156, -0.0629]) tensor([0.4552, 0.1395, 0.2207, 0.1846]) -Greedy action tensor([ 0.9709, -0.3810, -0.0384, -0.5056]) tensor([0.5401, 0.1397, 0.1968, 0.1234]) -Greedy action tensor([ 1.0219, -0.3702, -0.3545, -0.1639]) tensor([0.5535, 0.1376, 0.1398, 0.1691]) -Greedy action tensor([ 0.2644, -0.0264, -0.0239, -0.4059]) tensor([0.3324, 0.2485, 0.2491, 0.1700]) -Greedy action tensor([ 1.2338, -0.8736, -0.0535, -0.6976]) tensor([0.6483, 0.0788, 0.1789, 0.0940]) -Greedy action tensor([ 0.8478, -0.5496, 0.0569, -0.3359]) tensor([0.4983, 0.1232, 0.2259, 0.1525]) -Greedy action tensor([ 0.8162, -0.6660, -0.0604, -0.5168]) tensor([0.5244, 0.1191, 0.2183, 0.1383]) -Greedy action tensor([ 0.8106, -0.6566, -0.0381, -0.4756]) tensor([0.5168, 0.1192, 0.2212, 0.1428]) -Greedy action tensor([ 0.9019, -0.7448, -0.0083, -0.3693]) tensor([0.5332, 0.1027, 0.2146, 0.1495]) -Greedy action tensor([ 0.3007, -0.1508, -0.0589, -0.0066]) tensor([0.3257, 0.2074, 0.2273, 0.2395]) -Greedy action tensor([ 0.6999, -0.2527, -0.1969, -0.3612]) tensor([0.4674, 0.1803, 0.1906, 0.1617]) -Greedy action tensor([ 0.7798, -0.3042, -0.0679, -0.2198]) tensor([0.4684, 0.1585, 0.2007, 0.1724]) -Greedy action tensor([ 0.6932, -0.3285, 0.0702, -0.0606]) tensor([0.4225, 0.1521, 0.2266, 0.1988]) -Greedy action tensor([ 0.8114, -0.2903, -0.0203, -0.1933]) tensor([0.4686, 0.1557, 0.2040, 0.1716]) -Greedy action tensor([ 0.9795, -0.4895, 0.0222, -0.2985]) tensor([0.5283, 0.1216, 0.2029, 0.1472]) -Greedy action tensor([ 0.7527, -0.5848, -0.1235, -0.3394]) tensor([0.4964, 0.1303, 0.2067, 0.1666]) -Greedy action tensor([-1.9092, -0.4063, 0.6510, -0.1620]) tensor([0.0414, 0.1859, 0.5353, 0.2374]) -Greedy action tensor([-1.9225, -0.4578, 0.6575, -0.1643]) tensor([0.0411, 0.1778, 0.5425, 0.2385]) -Greedy action tensor([-1.6794, -0.4536, 0.5773, 0.0187]) tensor([0.0515, 0.1754, 0.4918, 0.2813]) -Greedy action tensor([-1.7154, -0.2104, 0.6335, -0.0134]) tensor([0.0466, 0.2098, 0.4880, 0.2555]) -Greedy action tensor([-1.8920, -0.4015, 0.6264, -0.1476]) tensor([0.0424, 0.1883, 0.5264, 0.2428]) -Greedy action tensor([-1.1764, -0.3785, 0.3938, -0.1640]) tensor([0.0928, 0.2060, 0.4459, 0.2553]) -Greedy action tensor([-1.7332, -0.3584, 0.5538, -0.0918]) tensor([0.0501, 0.1981, 0.4932, 0.2586]) -Greedy action tensor([-0.0791, -0.6888, 0.6813, 0.4337]) tensor([0.1868, 0.1015, 0.3997, 0.3120]) -Greedy action tensor([-1.4095, -0.0168, 0.4830, 0.1657]) tensor([0.0606, 0.2441, 0.4023, 0.2929]) -Greedy action tensor([-1.7309, -0.4789, 0.5702, -0.1255]) tensor([0.0514, 0.1797, 0.5131, 0.2559]) -Greedy action tensor([-1.9495, -0.4526, 0.6677, -0.1837]) tensor([0.0400, 0.1786, 0.5477, 0.2337]) -Greedy action tensor([-1.6625, -0.4501, 0.7031, 0.0729]) tensor([0.0483, 0.1625, 0.5149, 0.2742]) -Greedy action tensor([-1.9112, -0.4119, 0.6491, -0.1634]) tensor([0.0414, 0.1854, 0.5356, 0.2376]) -Greedy action tensor([-1.9053, -0.4258, 0.6462, -0.1616]) tensor([0.0418, 0.1834, 0.5359, 0.2389]) -Greedy action tensor([-1.7722, -0.4260, 0.5845, -0.1005]) tensor([0.0483, 0.1855, 0.5095, 0.2568]) -Greedy action tensor([-1.9275, -0.4218, 0.6590, -0.1704]) tensor([0.0407, 0.1833, 0.5403, 0.2357]) -Greedy action tensor([-1.9127, -0.4137, 0.6373, -0.1583]) tensor([0.0416, 0.1861, 0.5322, 0.2402]) -Greedy action tensor([-1.9384, -0.4401, 0.6638, -0.1774]) tensor([0.0403, 0.1805, 0.5444, 0.2347]) -Greedy action tensor([-1.7725, -0.2853, 0.5537, -0.0594]) tensor([0.0471, 0.2086, 0.4827, 0.2615]) -Greedy action tensor([-1.6093, -0.3129, 0.6480, -0.0030]) tensor([0.0521, 0.1904, 0.4978, 0.2596]) -Greedy action tensor([-1.7978, -0.4987, 0.6231, -0.0424]) tensor([0.0461, 0.1689, 0.5185, 0.2665]) -Greedy action tensor([-1.8726, -0.3806, 0.6244, -0.1502]) tensor([0.0431, 0.1917, 0.5238, 0.2414]) -Greedy action tensor([-0.5107, -0.3080, 0.1732, 0.1924]) tensor([0.1606, 0.1967, 0.3183, 0.3244]) -Greedy action tensor([-1.7655, -0.4447, 0.5810, -0.0841]) tensor([0.0486, 0.1821, 0.5080, 0.2612]) -Greedy action tensor([-1.2056, -0.7195, 1.3059, 0.9888]) tensor([0.0418, 0.0680, 0.5151, 0.3751]) -Greedy action tensor([-1.0835, -0.5557, 0.4369, -0.2255]) tensor([0.1039, 0.1761, 0.4751, 0.2450]) -Greedy action tensor([-1.9481, -0.4546, 0.6676, -0.1829]) tensor([0.0400, 0.1783, 0.5477, 0.2340]) -Greedy action tensor([-1.8883, -0.4541, 0.6530, -0.1245]) tensor([0.0421, 0.1769, 0.5351, 0.2459]) -Greedy action tensor([-1.3107, 0.1459, 0.5651, -0.6282]) tensor([0.0725, 0.3111, 0.4730, 0.1434]) -Greedy action tensor([-1.9347, -0.4269, 0.6595, -0.1714]) tensor([0.0404, 0.1826, 0.5412, 0.2358]) -Greedy action tensor([-1.9287, -0.4390, 0.6600, -0.1714]) tensor([0.0407, 0.1807, 0.5424, 0.2362]) -Greedy action tensor([-1.6398, -0.1319, 0.3608, -0.0809]) tensor([0.0566, 0.2557, 0.4186, 0.2691]) -Greedy action tensor([-1.4147, -0.3930, 0.4777, -0.1582]) tensor([0.0718, 0.1995, 0.4765, 0.2523]) -Greedy action tensor([-1.7953, -0.4719, 0.6189, -0.0195]) tensor([0.0458, 0.1720, 0.5119, 0.2704]) -Greedy action tensor([-1.8784, -0.4582, 0.6408, -0.1493]) tensor([0.0431, 0.1784, 0.5355, 0.2430]) -Greedy action tensor([-1.8399, -0.2553, 0.5907, -0.1463]) tensor([0.0441, 0.2150, 0.5011, 0.2398]) -Greedy action tensor([-1.7499, -0.4800, 0.5776, -0.0712]) tensor([0.0496, 0.1765, 0.5083, 0.2656]) -Greedy action tensor([-1.8897, -0.4100, 0.6360, -0.1570]) tensor([0.0425, 0.1865, 0.5308, 0.2402]) -Greedy action tensor([-1.2293, -0.0298, 0.4959, 0.2670]) tensor([0.0695, 0.2305, 0.3899, 0.3101]) -Greedy action tensor([-1.7351, 0.1197, 0.4747, -0.0398]) tensor([0.0456, 0.2911, 0.4152, 0.2482]) -Greedy action tensor([-1.9308, -0.4508, 0.6622, -0.1717]) tensor([0.0407, 0.1788, 0.5442, 0.2364]) -Greedy action tensor([-1.5378, 0.1724, 0.3847, 0.0684]) tensor([0.0545, 0.3013, 0.3726, 0.2716]) -Greedy action tensor([-1.9267, -0.4356, 0.6596, -0.1713]) tensor([0.0408, 0.1813, 0.5419, 0.2361]) -Greedy action tensor([-1.8439, -0.3772, 0.6181, -0.1226]) tensor([0.0441, 0.1913, 0.5177, 0.2468]) -Greedy action tensor([-1.9050, -0.3923, 0.6478, -0.1581]) tensor([0.0415, 0.1882, 0.5325, 0.2378]) -Greedy action tensor([-1.8784, -0.3835, 0.6550, -0.1469]) tensor([0.0422, 0.1881, 0.5314, 0.2383]) -Greedy action tensor([-1.8369, -0.4763, 0.6141, -0.1378]) tensor([0.0455, 0.1775, 0.5281, 0.2489]) -Greedy action tensor([-1.6822, 0.2269, 0.5558, -0.3186]) tensor([0.0475, 0.3208, 0.4457, 0.1859]) -Greedy action tensor([-1.3820, 0.3966, 0.2915, -0.0156]) tensor([0.0618, 0.3661, 0.3296, 0.2424]) -Greedy action tensor([-1.8182, -0.4371, 0.6011, -0.1158]) tensor([0.0461, 0.1833, 0.5178, 0.2528]) -Greedy action tensor([-1.8049, -0.3613, 0.5867, -0.1370]) tensor([0.0466, 0.1973, 0.5092, 0.2469]) -Greedy action tensor([-1.4040, -0.4500, 0.4330, -0.5984]) tensor([0.0826, 0.2143, 0.5183, 0.1848]) -Greedy action tensor([-1.7830, -0.4079, 0.5817, -0.1098]) tensor([0.0478, 0.1890, 0.5085, 0.2547]) -Greedy action tensor([-1.9068, -0.4238, 0.6436, -0.1630]) tensor([0.0418, 0.1841, 0.5352, 0.2389]) -Greedy action tensor([-1.9105, -0.2423, 0.6100, -0.1444]) tensor([0.0407, 0.2157, 0.5058, 0.2379]) -Greedy action tensor([-1.2023, -0.5173, 0.3420, 0.0533]) tensor([0.0895, 0.1775, 0.4191, 0.3140]) -Greedy action tensor([-1.6355, 0.3102, 0.4306, 0.0582]) tensor([0.0469, 0.3281, 0.3700, 0.2550]) -Greedy action tensor([-1.9321, -0.4257, 0.6589, -0.1743]) tensor([0.0406, 0.1830, 0.5412, 0.2352]) -Greedy action tensor([-1.5861, -0.0923, 0.5980, 0.1110]) tensor([0.0505, 0.2250, 0.4488, 0.2757]) -Greedy action tensor([-1.8530, -0.4159, 0.6344, -0.1148]) tensor([0.0436, 0.1836, 0.5247, 0.2481]) -Greedy action tensor([-1.8965, -0.4400, 0.6726, -0.1509]) tensor([0.0415, 0.1782, 0.5422, 0.2380]) -Greedy action tensor([-1.9401, -0.4526, 0.6670, -0.1777]) tensor([0.0403, 0.1784, 0.5465, 0.2348]) -Greedy action tensor([-1.8839, -0.4320, 0.6355, -0.1523]) tensor([0.0428, 0.1830, 0.5321, 0.2420]) -Greedy action tensor([-1.9307, -0.4425, 0.6620, -0.1722]) tensor([0.0407, 0.1801, 0.5434, 0.2359]) -Greedy action tensor([-1.8368, -0.3237, 0.6324, -0.1037]) tensor([0.0435, 0.1973, 0.5133, 0.2459]) -Greedy action tensor([-1.7553, -0.1034, 0.5922, -0.0380]) tensor([0.0450, 0.2345, 0.4702, 0.2504]) -Greedy action tensor([-1.8554, -0.3007, 0.6252, -0.1224]) tensor([0.0428, 0.2028, 0.5119, 0.2424]) -Greedy action tensor([-1.7118, -0.1933, 0.5080, -0.0858]) tensor([0.0504, 0.2300, 0.4636, 0.2560]) -Greedy action tensor([-1.9459, -0.4516, 0.6695, -0.1811]) tensor([0.0400, 0.1785, 0.5476, 0.2339]) -Greedy action tensor([-1.1198, 0.0460, 0.3074, -0.1803]) tensor([0.0915, 0.2934, 0.3811, 0.2340]) -Greedy action tensor([-1.8804, -0.3449, 0.6220, -0.1516]) tensor([0.0426, 0.1977, 0.5199, 0.2398]) -Greedy action tensor([-1.2360, -0.3374, 0.3262, -0.0260]) tensor([0.0864, 0.2121, 0.4119, 0.2896]) -Greedy action tensor([-1.1222, 0.3211, 0.2146, -0.0288]) tensor([0.0832, 0.3521, 0.3166, 0.2482]) -Greedy action tensor([-1.9208, -0.4439, 0.6596, -0.1662]) tensor([0.0410, 0.1798, 0.5419, 0.2373]) -Greedy action tensor([-1.9211, -0.4479, 0.6562, -0.1689]) tensor([0.0412, 0.1796, 0.5418, 0.2374]) -Greedy action tensor([-1.7905, -0.4668, 0.6305, -0.0786]) tensor([0.0464, 0.1743, 0.5223, 0.2570]) -Greedy action tensor([-1.8794, -0.4323, 0.6340, -0.1469]) tensor([0.0430, 0.1828, 0.5310, 0.2432]) -Greedy action tensor([-1.0885, -0.5102, 0.3484, 0.5555]) tensor([0.0822, 0.1465, 0.3458, 0.4254]) -Greedy action tensor([-1.5286, -0.4519, 0.7378, 0.5071]) tensor([0.0471, 0.1382, 0.4541, 0.3606]) -Greedy action tensor([-1.9232, -0.3781, 0.6451, -0.1799]) tensor([0.0409, 0.1918, 0.5335, 0.2338]) -Greedy action tensor([-1.0087, -0.5100, 0.3213, 0.5734]) tensor([0.0886, 0.1458, 0.3348, 0.4308]) -Greedy action tensor([ 1.5148, -0.4371, -0.2048, 0.0999]) tensor([0.6394, 0.0908, 0.1145, 0.1553]) -Greedy action tensor([ 1.4677, -0.7988, -0.0570, 0.5085]) tensor([0.5867, 0.0608, 0.1277, 0.2248]) -Greedy action tensor([ 1.2931, -0.1087, -0.2286, -0.1555]) tensor([0.5884, 0.1449, 0.1285, 0.1382]) -Greedy action tensor([ 1.8334, -0.9584, -0.3343, 0.2109]) tensor([0.7283, 0.0446, 0.0833, 0.1438]) -Greedy action tensor([ 1.3379, -0.0064, -1.0548, 0.0382]) tensor([0.6155, 0.1605, 0.0562, 0.1678]) -Greedy action tensor([ 1.6564, -0.5750, -0.5733, 0.0912]) tensor([0.7022, 0.0754, 0.0755, 0.1468]) -Greedy action tensor([ 1.3390, -0.2215, -1.0469, 0.5847]) tensor([0.5642, 0.1185, 0.0519, 0.2654]) -Greedy action tensor([ 1.0291, -0.4955, -0.0520, -0.0245]) tensor([0.5248, 0.1142, 0.1780, 0.1830]) -Greedy action tensor([ 1.5781, -0.1891, -1.0254, 0.6953]) tensor([0.6030, 0.1030, 0.0446, 0.2494]) -Greedy action tensor([ 1.2182, -0.6518, 0.0071, -0.1311]) tensor([0.5843, 0.0901, 0.1740, 0.1516]) -Greedy action tensor([ 2.2755, -0.9714, -0.3591, 0.4605]) tensor([0.7853, 0.0305, 0.0563, 0.1279]) -Greedy action tensor([ 1.9351, -0.6933, 0.1336, 1.0443]) tensor([0.6070, 0.0438, 0.1002, 0.2491]) -Greedy action tensor([ 1.6376, -0.2956, -0.7963, 0.3115]) tensor([0.6676, 0.0966, 0.0585, 0.1773]) -Greedy action tensor([ 2.1269, 0.4088, -0.1047, 0.1493]) tensor([0.7017, 0.1259, 0.0753, 0.0971]) -Greedy action tensor([ 2.0011, -0.8774, -0.1358, 0.5527]) tensor([0.7096, 0.0399, 0.0837, 0.1667]) -Greedy action tensor([ 1.4840, -0.7462, -0.4095, 0.1158]) tensor([0.6611, 0.0711, 0.0995, 0.1683]) -Greedy action tensor([ 1.3017, -0.3323, 0.0183, -0.0280]) tensor([0.5758, 0.1124, 0.1595, 0.1523]) -Greedy action tensor([ 0.9289, -0.2059, -0.3230, 0.2136]) tensor([0.4770, 0.1534, 0.1364, 0.2333]) -Greedy action tensor([ 1.0134, -0.2587, -0.1953, -0.1109]) tensor([0.5253, 0.1472, 0.1568, 0.1707]) -Greedy action tensor([ 1.7923, -1.0740, -0.1995, 0.2669]) tensor([0.7088, 0.0403, 0.0967, 0.1542]) -Greedy action tensor([ 1.5282, -0.2601, -0.3327, 0.5092]) tensor([0.5939, 0.0993, 0.0924, 0.2144]) -Greedy action tensor([ 1.0504, -0.4329, -0.7817, 0.4870]) tensor([0.5112, 0.1160, 0.0818, 0.2910]) -Greedy action tensor([ 1.4186, -0.1895, -0.3687, 0.4578]) tensor([0.5713, 0.1144, 0.0956, 0.2186]) -Greedy action tensor([ 1.5362, -0.5776, -0.3582, 0.3644]) tensor([0.6325, 0.0764, 0.0951, 0.1960]) -Greedy action tensor([ 1.6697, -0.8279, -0.4427, 0.1640]) tensor([0.7017, 0.0577, 0.0849, 0.1557]) -Greedy action tensor([ 1.5773, -0.8495, -0.0399, 0.0719]) tensor([0.6628, 0.0585, 0.1315, 0.1471]) -Greedy action tensor([ 1.3560, 0.0481, -0.5188, 0.6422]) tensor([0.5226, 0.1413, 0.0802, 0.2559]) -Greedy action tensor([ 0.9668, -0.8112, -0.0946, 0.4788]) tensor([0.4697, 0.0794, 0.1625, 0.2884]) -Greedy action tensor([ 1.7826, -0.3739, -0.5065, 0.5587]) tensor([0.6617, 0.0766, 0.0671, 0.1946]) -Greedy action tensor([ 1.3888, -0.4788, -0.4494, 0.1421]) tensor([0.6246, 0.0965, 0.0994, 0.1795]) -Greedy action tensor([ 1.6906, -0.9906, -0.2512, 0.6072]) tensor([0.6450, 0.0442, 0.0925, 0.2183]) -Greedy action tensor([ 1.0450, -0.0295, -0.8524, 0.0426]) tensor([0.5381, 0.1837, 0.0807, 0.1975]) -Greedy action tensor([ 1.8251, -0.6211, -1.0184, 0.1179]) tensor([0.7540, 0.0653, 0.0439, 0.1368]) -Greedy action tensor([ 1.2457, -0.6406, -0.7520, 0.5390]) tensor([0.5616, 0.0852, 0.0762, 0.2770]) -Greedy action tensor([ 1.3365, -0.4807, -0.5132, 0.0507]) tensor([0.6265, 0.1018, 0.0985, 0.1732]) -Greedy action tensor([ 0.5106, -0.4440, 0.0013, -0.1374]) tensor([0.3986, 0.1534, 0.2395, 0.2085]) -Greedy action tensor([ 1.9519, -0.7587, -0.6865, 0.8218]) tensor([0.6845, 0.0455, 0.0489, 0.2211]) -Greedy action tensor([ 1.4426, 0.1928, -0.3444, 0.4711]) tensor([0.5457, 0.1564, 0.0914, 0.2066]) -Greedy action tensor([ 2.0424, -0.7952, -0.0792, 0.6592]) tensor([0.6997, 0.0410, 0.0839, 0.1755]) -Greedy action tensor([ 3.1094, -0.9918, -0.2769, 0.6152]) tensor([0.8827, 0.0146, 0.0299, 0.0729]) -Greedy action tensor([ 1.1417, -0.4709, -0.3987, 0.1589]) tensor([0.5593, 0.1115, 0.1199, 0.2093]) -Greedy action tensor([ 2.1765, -0.6768, -0.4082, 0.4893]) tensor([0.7587, 0.0437, 0.0572, 0.1404]) -Greedy action tensor([ 1.3630, -0.3734, -0.5585, 0.5477]) tensor([0.5666, 0.0998, 0.0829, 0.2507]) -Greedy action tensor([ 1.8516, -0.6936, -0.4172, 0.3149]) tensor([0.7158, 0.0562, 0.0740, 0.1540]) -Greedy action tensor([ 1.1475, -0.2562, -0.1766, 0.1786]) tensor([0.5288, 0.1299, 0.1407, 0.2007]) -Greedy action tensor([ 1.0545, 0.1858, -0.4091, -0.3985]) tensor([0.5306, 0.2226, 0.1228, 0.1241]) -Greedy action tensor([ 0.9912, -0.5683, -0.0795, 0.3571]) tensor([0.4800, 0.1009, 0.1645, 0.2546]) -Greedy action tensor([ 1.2382, -0.7435, -0.3429, 0.6174]) tensor([0.5316, 0.0733, 0.1094, 0.2857]) -Greedy action tensor([ 1.9670, -0.8128, -0.7131, 0.1740]) tensor([0.7710, 0.0478, 0.0529, 0.1283]) -Greedy action tensor([ 1.6225, -0.3721, -0.5233, 0.2564]) tensor([0.6631, 0.0902, 0.0776, 0.1692]) -Greedy action tensor([ 1.2765, -0.1162, -0.5044, 0.0595]) tensor([0.5838, 0.1450, 0.0984, 0.1729]) -Greedy action tensor([ 2.0023, -0.8612, -0.7058, 0.2356]) tensor([0.7724, 0.0441, 0.0515, 0.1320]) -Greedy action tensor([ 1.7827, -0.7793, -0.1789, 0.3901]) tensor([0.6820, 0.0526, 0.0959, 0.1694]) -Greedy action tensor([ 1.7267, -0.6113, -0.6644, 1.0923]) tensor([0.5820, 0.0562, 0.0533, 0.3086]) -Greedy action tensor([ 1.6905, -0.3883, -0.5989, 0.2077]) tensor([0.6880, 0.0861, 0.0697, 0.1562]) -Greedy action tensor([ 1.4356, -0.3150, 0.0647, 0.2761]) tensor([0.5743, 0.0997, 0.1458, 0.1801]) -Greedy action tensor([ 1.4372, -0.0520, -0.2026, 0.3053]) tensor([0.5741, 0.1295, 0.1114, 0.1851]) -Greedy action tensor([ 1.3984, -0.2923, -0.5507, 0.2994]) tensor([0.6024, 0.1111, 0.0858, 0.2007]) -Greedy action tensor([ 1.5847, -0.2045, -0.8128, 0.3193]) tensor([0.6493, 0.1085, 0.0591, 0.1832]) -Greedy action tensor([ 1.5469, -1.1432, -0.2063, 0.0474]) tensor([0.6829, 0.0464, 0.1183, 0.1524]) -Greedy action tensor([ 1.3573, -0.4050, -0.4377, 0.6491]) tensor([0.5463, 0.0938, 0.0908, 0.2691]) -Greedy action tensor([ 1.2567, -0.3955, -0.3532, 0.0233]) tensor([0.5942, 0.1139, 0.1188, 0.1731]) -Greedy action tensor([ 2.0547, -1.2208, 0.0237, 0.9791]) tensor([0.6622, 0.0250, 0.0869, 0.2259]) -Greedy action tensor([ 2.0211, -1.3017, -0.2161, 0.6245]) tensor([0.7193, 0.0259, 0.0768, 0.1780]) -Greedy action tensor([ 1.4296, -0.3544, -0.3757, 0.3047]) tensor([0.6035, 0.1014, 0.0992, 0.1959]) -Greedy action tensor([ 1.9834, -1.1514, -0.0227, 0.4192]) tensor([0.7208, 0.0314, 0.0970, 0.1508]) -Greedy action tensor([ 1.6954, -0.4184, -0.0772, 0.1957]) tensor([0.6606, 0.0798, 0.1122, 0.1474]) -Greedy action tensor([ 1.9117, -0.8513, -0.0994, 0.7587]) tensor([0.6611, 0.0417, 0.0885, 0.2087]) -Greedy action tensor([ 2.2361, -1.0728, -0.2839, 0.6575]) tensor([0.7557, 0.0276, 0.0608, 0.1559]) -Greedy action tensor([ 1.1429, 0.3468, -0.9185, 0.1768]) tensor([0.5105, 0.2303, 0.0650, 0.1943]) -Greedy action tensor([ 1.3696, 0.0062, -0.4025, 0.4522]) tensor([0.5478, 0.1401, 0.0931, 0.2189]) -Greedy action tensor([ 1.9117, -1.0553, 0.0027, 0.6921]) tensor([0.6689, 0.0344, 0.0991, 0.1976]) -Greedy action tensor([ 1.1050, -0.3271, -0.7088, 0.2730]) tensor([0.5443, 0.1300, 0.0887, 0.2369]) -Greedy action tensor([ 1.5507, -0.7335, -0.4607, 0.1862]) tensor([0.6706, 0.0683, 0.0897, 0.1714]) -Greedy action tensor([ 1.5736, -0.3612, -0.2681, 0.5713]) tensor([0.5988, 0.0865, 0.0949, 0.2198]) -Greedy action tensor([ 1.7692, -0.9133, -0.1724, 0.3279]) tensor([0.6904, 0.0472, 0.0991, 0.1634]) -Greedy action tensor([ 0.9001, -0.1154, -0.6059, 0.1949]) tensor([0.4812, 0.1743, 0.1067, 0.2377]) -Greedy action tensor([ 1.3809, -0.5499, -0.2218, 0.4964]) tensor([0.5684, 0.0824, 0.1144, 0.2347]) -Greedy action tensor([ 2.1387, -0.8089, -0.3870, 0.3411]) tensor([0.7703, 0.0404, 0.0616, 0.1276]) -Greedy action tensor([ 1.2832, -0.3295, -0.4348, 0.2285]) tensor([0.5790, 0.1154, 0.1039, 0.2017]) -Greedy action tensor([ 1.5221, -0.6713, -0.4358, 0.3936]) tensor([0.6344, 0.0708, 0.0896, 0.2052]) -Greedy action tensor([-0.3658, -0.2437, -0.6724, -0.2822]) tensor([0.2530, 0.2858, 0.1862, 0.2750]) -Greedy action tensor([ 1.0274, -1.7745, 0.2483, 0.6244]) tensor([0.4571, 0.0277, 0.2097, 0.3055]) -Greedy action tensor([-0.0529, -0.8505, -0.7434, -0.6260]) tensor([0.3975, 0.1791, 0.1993, 0.2241]) -Greedy action tensor([-0.1348, -0.2447, -1.0984, -0.0251]) tensor([0.2947, 0.2640, 0.1124, 0.3289]) -Greedy action tensor([ 0.3518, -1.8397, -0.5019, 0.1957]) tensor([0.4179, 0.0467, 0.1779, 0.3575]) -Greedy action tensor([ 0.5743, -1.8410, -0.4693, 0.1694]) tensor([0.4743, 0.0424, 0.1670, 0.3163]) -Greedy action tensor([-0.6100, 0.0270, 0.2469, -0.8146]) tensor([0.1650, 0.3119, 0.3887, 0.1344]) -Greedy action tensor([ 1.3116, -0.0817, -0.1487, 1.4948]) tensor([0.3729, 0.0926, 0.0866, 0.4479]) -Greedy action tensor([ 1.0996, -1.2282, -0.0334, 0.5429]) tensor([0.5018, 0.0489, 0.1616, 0.2876]) -Greedy action tensor([ 0.0314, -0.3254, -0.9500, -0.9666]) tensor([0.4093, 0.2865, 0.1534, 0.1509]) -Greedy action tensor([-1.2393, -0.7971, -0.8401, 0.2229]) tensor([0.1196, 0.1861, 0.1783, 0.5160]) -Greedy action tensor([-0.1615, -0.5930, 0.0787, 0.8223]) tensor([0.1787, 0.1161, 0.2272, 0.4780]) -Greedy action tensor([-0.4759, -0.8338, -0.6968, -0.3010]) tensor([0.2709, 0.1894, 0.2172, 0.3226]) -Greedy action tensor([ 0.3594, -1.1799, 0.1405, 0.2970]) tensor([0.3381, 0.0725, 0.2717, 0.3177]) -Greedy action tensor([ 0.5718, -0.2512, 0.5050, 0.7359]) tensor([0.2815, 0.1236, 0.2633, 0.3316]) -Greedy action tensor([-0.3343, -0.7735, 0.7843, -0.5369]) tensor([0.1811, 0.1167, 0.5543, 0.1479]) -Greedy action tensor([0.1596, 0.5581, 0.1192, 0.2855]) tensor([0.2181, 0.3249, 0.2095, 0.2474]) -Greedy action tensor([ 0.5015, -0.3935, -0.8787, 0.5169]) tensor([0.3737, 0.1527, 0.0940, 0.3795]) -Greedy action tensor([ 0.3983, -1.3144, 0.4691, -0.3519]) tensor([0.3668, 0.0662, 0.3937, 0.1733]) -Greedy action tensor([-0.4521, -0.4221, 1.2805, -0.3308]) tensor([0.1134, 0.1169, 0.6416, 0.1281]) -Greedy action tensor([-0.2293, -0.2089, 0.0033, 0.6800]) tensor([0.1735, 0.1770, 0.2189, 0.4306]) -Greedy action tensor([ 0.1915, -0.4890, -0.1094, 0.3600]) tensor([0.2915, 0.1476, 0.2158, 0.3451]) -Greedy action tensor([ 0.5878, -0.0795, -0.5256, 0.1565]) tensor([0.4014, 0.2060, 0.1318, 0.2608]) -Greedy action tensor([ 0.0149, -1.0584, 0.7286, -0.1747]) tensor([0.2375, 0.0812, 0.4848, 0.1965]) -Greedy action tensor([ 0.1419, -0.0227, -0.5494, -0.1601]) tensor([0.3238, 0.2746, 0.1622, 0.2394]) -Greedy action tensor([-0.4928, -0.4267, 0.1113, -0.7340]) tensor([0.2135, 0.2281, 0.3906, 0.1677]) -Greedy action tensor([-0.2471, -0.5556, -0.2473, -0.2868]) tensor([0.2706, 0.1988, 0.2705, 0.2601]) -Greedy action tensor([ 0.3006, -1.0306, -0.1276, -0.2917]) tensor([0.4050, 0.1070, 0.2640, 0.2240]) -Greedy action tensor([ 0.1277, -0.2058, 0.9426, -0.2960]) tensor([0.2160, 0.1547, 0.4879, 0.1414]) -Greedy action tensor([-1.4459, -0.5964, 0.2369, -1.3200]) tensor([0.1015, 0.2373, 0.5461, 0.1151]) -Greedy action tensor([ 0.5704, -0.0217, -0.3771, 0.5919]) tensor([0.3375, 0.1867, 0.1309, 0.3449]) -Greedy action tensor([-0.1349, 0.2339, 0.1648, -0.2819]) tensor([0.2146, 0.3104, 0.2897, 0.1853]) -Greedy action tensor([-0.8504, -0.3551, -1.0112, 0.0813]) tensor([0.1658, 0.2721, 0.1412, 0.4209]) -Greedy action tensor([-0.2809, -0.9151, -0.9194, 0.0225]) tensor([0.2930, 0.1554, 0.1547, 0.3969]) -Greedy action tensor([-0.0139, -0.8097, 0.0819, -0.6346]) tensor([0.3237, 0.1461, 0.3562, 0.1740]) -Greedy action tensor([ 0.8885, -0.4849, -0.7560, 1.0204]) tensor([0.3865, 0.0979, 0.0746, 0.4410]) -Greedy action tensor([ 1.0896, -0.2085, -0.0376, 1.2273]) tensor([0.3643, 0.0995, 0.1180, 0.4181]) -Greedy action tensor([ 0.8674, -0.9895, 1.4787, 0.4630]) tensor([0.2728, 0.0426, 0.5026, 0.1820]) -Greedy action tensor([ 0.3938, -0.2027, 0.5111, -0.0413]) tensor([0.3010, 0.1658, 0.3384, 0.1948]) -Greedy action tensor([ 1.1791, -0.8648, 0.9111, 1.5372]) tensor([0.3007, 0.0390, 0.2301, 0.4302]) -Greedy action tensor([-0.7317, -0.3726, -0.0536, 0.0507]) tensor([0.1518, 0.2173, 0.2990, 0.3319]) -Greedy action tensor([ 0.9957, -0.7405, 0.2175, -0.4685]) tensor([0.5357, 0.0944, 0.2460, 0.1239]) -Greedy action tensor([-0.7473, -1.0092, -0.4842, 0.4766]) tensor([0.1545, 0.1189, 0.2010, 0.5255]) -Greedy action tensor([ 0.7650, -1.3397, 0.9026, -0.4526]) tensor([0.3898, 0.0475, 0.4473, 0.1154]) -Greedy action tensor([-1.0295, -1.3988, -0.5176, 1.4987]) tensor([0.0629, 0.0435, 0.1050, 0.7886]) -Greedy action tensor([-0.5600, -1.0559, -0.1387, -0.4088]) tensor([0.2328, 0.1418, 0.3547, 0.2708]) -Greedy action tensor([-0.0753, -0.6357, 0.4762, 0.0987]) tensor([0.2224, 0.1270, 0.3860, 0.2646]) -Greedy action tensor([ 1.5738, -1.2522, -0.4269, 0.4340]) tensor([0.6603, 0.0391, 0.0893, 0.2112]) -Greedy action tensor([-0.0851, 0.4342, 0.2144, -0.3802]) tensor([0.2095, 0.3520, 0.2826, 0.1559]) -Greedy action tensor([-0.4336, 0.2641, 0.0968, -1.0536]) tensor([0.1906, 0.3829, 0.3239, 0.1025]) -Greedy action tensor([ 0.7975, -0.6712, -0.4622, 0.3644]) tensor([0.4624, 0.1065, 0.1312, 0.2999]) -Greedy action tensor([-0.5504, -1.6649, -0.1976, -0.3696]) tensor([0.2532, 0.0831, 0.3603, 0.3034]) -Greedy action tensor([ 1.1054, -0.1057, 0.0180, 0.7162]) tensor([0.4324, 0.1288, 0.1458, 0.2930]) -Greedy action tensor([-0.1035, -0.7874, 0.3945, -0.6483]) tensor([0.2681, 0.1353, 0.4411, 0.1555]) -Greedy action tensor([ 0.0401, -0.7991, -0.8503, -0.1328]) tensor([0.3726, 0.1610, 0.1530, 0.3135]) -Greedy action tensor([-0.6135, -0.2362, 0.2958, -1.2167]) tensor([0.1822, 0.2657, 0.4524, 0.0997]) -Greedy action tensor([ 0.2474, -0.2888, 0.6091, -0.5607]) tensor([0.2885, 0.1688, 0.4142, 0.1286]) -Greedy action tensor([-0.6842, -0.7024, 0.6032, -1.0954]) tensor([0.1595, 0.1567, 0.5780, 0.1058]) -Greedy action tensor([-0.7334, -0.8697, 0.2308, 0.3871]) tensor([0.1322, 0.1154, 0.3468, 0.4055]) -Greedy action tensor([-0.7590, -0.3679, -0.0887, -1.0988]) tensor([0.1943, 0.2874, 0.3799, 0.1384]) -Greedy action tensor([ 0.1519, -0.1880, 0.6436, -0.2945]) tensor([0.2508, 0.1785, 0.4101, 0.1605]) -Greedy action tensor([ 1.1959, -0.5976, -0.2519, 0.3299]) tensor([0.5488, 0.0913, 0.1290, 0.2309]) -Greedy action tensor([-0.1564, 0.4094, -0.3901, -0.7266]) tensor([0.2428, 0.4276, 0.1922, 0.1373]) -Greedy action tensor([ 1.7672, -0.5496, 0.5219, 0.3512]) tensor([0.6138, 0.0605, 0.1767, 0.1490]) -Greedy action tensor([-1.0879, -1.0277, 0.8006, -0.7466]) tensor([0.0992, 0.1054, 0.6558, 0.1396]) -Greedy action tensor([ 0.8369, -1.6650, 0.0360, 0.1019]) tensor([0.4974, 0.0408, 0.2233, 0.2385]) -Greedy action tensor([-0.9098, -0.2940, -0.7155, -0.0414]) tensor([0.1551, 0.2871, 0.1883, 0.3696]) -Greedy action tensor([ 0.0323, -0.5970, -0.5165, 0.0316]) tensor([0.3216, 0.1714, 0.1857, 0.3213]) -Greedy action tensor([ 0.6604, -0.3681, 0.0329, -0.3145]) tensor([0.4408, 0.1576, 0.2353, 0.1663]) -Greedy action tensor([ 0.4173, -1.8486, -0.7383, -0.1187]) tensor([0.4991, 0.0518, 0.1571, 0.2920]) -Greedy action tensor([-0.0949, -1.0498, -0.6724, 0.1680]) tensor([0.3080, 0.1185, 0.1729, 0.4006]) -Greedy action tensor([ 0.0672, -0.8087, -0.8095, 0.0481]) tensor([0.3554, 0.1480, 0.1479, 0.3487]) -Greedy action tensor([ 1.0649, -1.1657, -0.0557, 0.5638]) tensor([0.4904, 0.0527, 0.1599, 0.2971]) -Greedy action tensor([ 0.1140, -1.4301, 0.8073, -1.1374]) tensor([0.2857, 0.0610, 0.5715, 0.0817]) -Greedy action tensor([-1.0898, -1.1494, -0.7052, 0.4129]) tensor([0.1265, 0.1192, 0.1858, 0.5685]) -Greedy action tensor([ 0.2113, -0.2018, -0.1877, -0.3874]) tensor([0.3470, 0.2296, 0.2328, 0.1907]) -Greedy action tensor([ 0.1613, -0.3282, -0.1404, -0.8698]) tensor([0.3691, 0.2262, 0.2730, 0.1316]) -Greedy action tensor([-1.4334, -0.2504, -1.1533, -0.2333]) tensor([0.1123, 0.3664, 0.1486, 0.3728]) -Greedy action tensor([ 0.8047, -0.1354, 0.2260, 0.6545]) tensor([0.3556, 0.1389, 0.1994, 0.3061]) -Greedy action tensor([ 0.6189, 0.2209, -0.2564, -0.1181]) tensor([0.3896, 0.2617, 0.1623, 0.1864]) -Greedy action tensor([ 1.2528, 0.4685, -0.4072, 0.7818]) tensor([0.4403, 0.2010, 0.0837, 0.2749]) -Greedy action tensor([ 0.7003, -0.3030, -0.0559, -0.1817]) tensor([0.4444, 0.1630, 0.2086, 0.1840]) -Greedy action tensor([ 0.4619, -0.1780, -0.1449, -0.0478]) tensor([0.3741, 0.1973, 0.2039, 0.2247]) -Greedy action tensor([ 0.2699, -0.1600, -0.0011, -0.5624]) tensor([0.3511, 0.2284, 0.2677, 0.1527]) -Greedy action tensor([ 1.0637, -0.5095, -0.1661, -0.3719]) tensor([0.5755, 0.1193, 0.1682, 0.1370]) -Greedy action tensor([ 0.8363, -0.6329, -0.0096, -0.3523]) tensor([0.5092, 0.1172, 0.2185, 0.1551]) -Greedy action tensor([ 0.5544, -0.3451, -0.1297, -0.2176]) tensor([0.4213, 0.1714, 0.2126, 0.1947]) -Greedy action tensor([ 0.6673, -0.3898, -0.1182, -0.0853]) tensor([0.4397, 0.1528, 0.2004, 0.2071]) -Greedy action tensor([ 0.9004, -0.6097, -0.0334, -0.5158]) tensor([0.5386, 0.1190, 0.2117, 0.1307]) -Greedy action tensor([ 0.7071, -0.4745, -0.0574, -0.3954]) tensor([0.4752, 0.1458, 0.2212, 0.1578]) -Greedy action tensor([ 1.2225, -0.5831, 0.0419, -0.6667]) tensor([0.6163, 0.1013, 0.1893, 0.0932]) -Greedy action tensor([ 0.8436, -0.2844, 0.0331, -0.0881]) tensor([0.4625, 0.1497, 0.2056, 0.1822]) -Greedy action tensor([ 0.3719, -0.1167, -0.2585, -0.4095]) tensor([0.3841, 0.2356, 0.2045, 0.1758]) -Greedy action tensor([ 0.7516, -0.3265, -0.1179, -0.1183]) tensor([0.4591, 0.1562, 0.1924, 0.1923]) -Greedy action tensor([ 1.2616, -0.9971, -0.0221, -0.6948]) tensor([0.6567, 0.0686, 0.1819, 0.0928]) -Greedy action tensor([ 0.2005, 0.0878, -0.1309, -0.2790]) tensor([0.3096, 0.2766, 0.2222, 0.1916]) -Greedy action tensor([ 0.4491, -0.2143, -0.0902, -0.2021]) tensor([0.3817, 0.1966, 0.2226, 0.1990]) -Greedy action tensor([ 0.9774, -0.4600, -0.1375, -0.3378]) tensor([0.5453, 0.1295, 0.1788, 0.1464]) -Greedy action tensor([ 0.6726, -0.1741, 0.1132, -0.0497]) tensor([0.4022, 0.1725, 0.2299, 0.1953]) -Greedy action tensor([ 0.6184, -0.2321, -0.0104, -0.1019]) tensor([0.4087, 0.1746, 0.2179, 0.1989]) -Greedy action tensor([ 0.2195, -0.0466, -0.0086, -0.0654]) tensor([0.3017, 0.2312, 0.2402, 0.2269]) -Greedy action tensor([ 0.7545, -0.4314, -0.0352, -0.2364]) tensor([0.4693, 0.1434, 0.2131, 0.1742]) -Greedy action tensor([ 0.8109, -0.5319, -0.1415, -0.2577]) tensor([0.5024, 0.1312, 0.1938, 0.1726]) -Greedy action tensor([ 0.8589, -0.6211, 0.0168, -0.3936]) tensor([0.5143, 0.1171, 0.2216, 0.1470]) -Greedy action tensor([ 1.1709, -1.1259, 0.1335, -0.7048]) tensor([0.6218, 0.0625, 0.2204, 0.0953]) -Greedy action tensor([ 0.8929, -0.4979, -0.1237, -0.3780]) tensor([0.5287, 0.1316, 0.1913, 0.1484]) -Greedy action tensor([ 0.2839, 0.1131, -0.0821, 0.0194]) tensor([0.3026, 0.2551, 0.2099, 0.2323]) -Greedy action tensor([ 0.4886, -0.1190, 0.0355, -0.1145]) tensor([0.3667, 0.1997, 0.2331, 0.2006]) -Greedy action tensor([ 0.6598, -0.4968, -0.0596, -0.1138]) tensor([0.4419, 0.1390, 0.2152, 0.2039]) -Greedy action tensor([ 0.4734, -0.0891, -0.0742, -0.0149]) tensor([0.3621, 0.2063, 0.2094, 0.2222]) -Greedy action tensor([ 0.7691, -0.5184, -0.1029, -0.1054]) tensor([0.4737, 0.1307, 0.1981, 0.1976]) -Greedy action tensor([ 0.8134, 0.0148, -0.2287, -0.5891]) tensor([0.4881, 0.2196, 0.1722, 0.1201]) -Greedy action tensor([ 0.6275, -0.3583, 0.0634, -0.3022]) tensor([0.4280, 0.1597, 0.2434, 0.1689]) -Greedy action tensor([ 0.6540, -0.3926, 0.1200, -0.6115]) tensor([0.4506, 0.1582, 0.2641, 0.1271]) -Greedy action tensor([ 1.0887, -0.7643, 0.1021, -0.3181]) tensor([0.5635, 0.0883, 0.2101, 0.1380]) -Greedy action tensor([ 0.4159, -0.1947, -0.0550, -0.3158]) tensor([0.3776, 0.2050, 0.2358, 0.1816]) -Greedy action tensor([ 1.1457, -0.6746, -0.1002, -0.4356]) tensor([0.6041, 0.0979, 0.1738, 0.1243]) -Greedy action tensor([ 0.8644, -0.7313, 0.0380, -0.1997]) tensor([0.5037, 0.1021, 0.2204, 0.1738]) -Greedy action tensor([ 1.4252, -0.9774, 0.0397, -0.7593]) tensor([0.6881, 0.0623, 0.1722, 0.0774]) -Greedy action tensor([ 0.7992, -0.2747, -0.0122, -0.1865]) tensor([0.4632, 0.1582, 0.2058, 0.1728]) -Greedy action tensor([ 0.6031, -0.4013, -0.1142, -0.2577]) tensor([0.4391, 0.1609, 0.2143, 0.1857]) -Greedy action tensor([ 0.8496, -0.4796, 0.0313, -0.1895]) tensor([0.4855, 0.1285, 0.2142, 0.1718]) -Greedy action tensor([ 0.5272, -0.0222, 0.2574, -0.1791]) tensor([0.3528, 0.2037, 0.2694, 0.1741]) -Greedy action tensor([ 0.7858, -0.5022, -0.0649, -0.2994]) tensor([0.4900, 0.1352, 0.2093, 0.1655]) -Greedy action tensor([ 1.0403, -0.5510, 0.0229, -0.3127]) tensor([0.5483, 0.1117, 0.1983, 0.1417]) -Greedy action tensor([ 0.4312, -0.1178, -0.1522, -0.2821]) tensor([0.3809, 0.2200, 0.2125, 0.1866]) -Greedy action tensor([ 0.9561, -0.5388, -0.0732, -0.2658]) tensor([0.5330, 0.1195, 0.1904, 0.1571]) -Greedy action tensor([ 0.9562, -0.6211, -0.0817, -0.1504]) tensor([0.5287, 0.1092, 0.1873, 0.1748]) -Greedy action tensor([ 0.9157, -0.4799, -0.0581, -0.3322]) tensor([0.5229, 0.1295, 0.1975, 0.1501]) -Greedy action tensor([ 1.3171, -0.7694, 0.0938, -0.7006]) tensor([0.6446, 0.0800, 0.1897, 0.0857]) -Greedy action tensor([ 1.1584, -1.0301, 0.0065, -0.4354]) tensor([0.6130, 0.0687, 0.1937, 0.1245]) -Greedy action tensor([ 0.8976, -0.1044, -0.0263, -0.1903]) tensor([0.4760, 0.1747, 0.1889, 0.1604]) -Greedy action tensor([ 0.7191, -0.2352, -0.0483, -0.2042]) tensor([0.4451, 0.1714, 0.2066, 0.1768]) -Greedy action tensor([ 0.6210, -0.4009, -0.1295, -0.3109]) tensor([0.4493, 0.1617, 0.2121, 0.1769]) -Greedy action tensor([ 0.5251, -0.2085, -0.1214, -0.4652]) tensor([0.4210, 0.2021, 0.2205, 0.1564]) -Greedy action tensor([ 0.5315, 0.2450, -0.1871, 0.1432]) tensor([0.3429, 0.2575, 0.1671, 0.2325]) -Greedy action tensor([ 1.0782, -0.6713, 0.0102, -0.6426]) tensor([0.5895, 0.1025, 0.2026, 0.1055]) -Greedy action tensor([ 0.8091, -0.7636, -0.0994, -0.3684]) tensor([0.5212, 0.1081, 0.2101, 0.1606]) -Greedy action tensor([ 0.7834, -0.4595, -0.0364, -0.4146]) tensor([0.4924, 0.1421, 0.2169, 0.1486]) -Greedy action tensor([ 0.7810, -0.6131, 0.1296, -0.4288]) tensor([0.4837, 0.1200, 0.2521, 0.1442]) -Greedy action tensor([ 1.0437, -0.5011, -0.1324, -0.2769]) tensor([0.5590, 0.1193, 0.1725, 0.1492]) -Greedy action tensor([ 0.3552, -0.0340, -0.0182, -0.1401]) tensor([0.3361, 0.2277, 0.2314, 0.2048]) -Greedy action tensor([ 1.1430, -0.4922, 0.0389, -0.2664]) tensor([0.5647, 0.1101, 0.1872, 0.1380]) -Greedy action tensor([ 0.5665, -0.6723, -0.2459, -0.2612]) tensor([0.4607, 0.1335, 0.2045, 0.2014]) -Greedy action tensor([ 1.0548, -1.1563, 0.0661, -0.3898]) tensor([0.5822, 0.0638, 0.2166, 0.1373]) -Greedy action tensor([ 1.3842, -0.7657, -0.0471, -0.9035]) tensor([0.6863, 0.0800, 0.1640, 0.0697]) -Greedy action tensor([ 0.9043, -0.3432, -0.0765, -0.3550]) tensor([0.5139, 0.1476, 0.1927, 0.1459]) -Greedy action tensor([ 0.7823, -0.2775, -0.0220, -0.1512]) tensor([0.4572, 0.1584, 0.2046, 0.1798]) -Greedy action tensor([ 0.7258, -0.7704, 0.0074, -0.3474]) tensor([0.4870, 0.1091, 0.2374, 0.1665]) -Greedy action tensor([ 0.4035, -0.2496, -0.1469, -0.0543]) tensor([0.3663, 0.1907, 0.2113, 0.2318]) -Greedy action tensor([ 0.1312, 0.3308, -0.2162, -0.3600]) tensor([0.2825, 0.3449, 0.1996, 0.1729]) -Greedy action tensor([ 0.9622, -0.4085, -0.0593, -0.1924]) tensor([0.5184, 0.1316, 0.1866, 0.1634]) -Greedy action tensor([ 0.5507, -0.2396, 0.0575, -0.1419]) tensor([0.3899, 0.1769, 0.2381, 0.1951]) -Greedy action tensor([ 1.3017, -0.7403, -0.0424, -1.0847]) tensor([0.6745, 0.0875, 0.1759, 0.0620]) -Greedy action tensor([ 0.9482, -0.5634, -0.0198, -0.4781]) tensor([0.5433, 0.1198, 0.2064, 0.1305]) -Greedy action tensor([ 0.5578, -0.5543, 0.0099, -0.2872]) tensor([0.4280, 0.1407, 0.2474, 0.1838]) -Greedy action tensor([ 0.8763, -0.3688, -0.1369, -0.2525]) tensor([0.5065, 0.1458, 0.1839, 0.1638]) -Greedy action tensor([ 1.1023, -0.6572, -0.0959, -0.3905]) tensor([0.5887, 0.1013, 0.1776, 0.1323]) -Greedy action tensor([ 0.4867, 0.0693, -0.0097, 0.0693]) tensor([0.3417, 0.2251, 0.2080, 0.2251]) -Greedy action tensor([ 0.5565, -0.2096, -0.1096, -0.0053]) tensor([0.3924, 0.1824, 0.2015, 0.2237]) -Greedy action tensor([ 0.7946, -0.6204, 0.1576, -0.3531]) tensor([0.4787, 0.1163, 0.2532, 0.1519]) -Greedy action tensor([ 0.9841, -0.7128, 0.0767, -0.4962]) tensor([0.5512, 0.1010, 0.2224, 0.1254]) -Greedy action tensor([-0.9569, 0.3060, 0.5321, 0.9130]) tensor([0.0647, 0.2288, 0.2868, 0.4198]) -Greedy action tensor([-1.1199, -0.3361, 0.3316, 0.2214]) tensor([0.0886, 0.1941, 0.3784, 0.3389]) -Greedy action tensor([-1.2905, 0.8149, 0.2163, 0.4176]) tensor([0.0520, 0.4267, 0.2345, 0.2868]) -Greedy action tensor([-1.8328, -0.1292, 0.5796, -0.1083]) tensor([0.0430, 0.2362, 0.4797, 0.2411]) -Greedy action tensor([-1.6679, -0.3134, 0.5238, -0.0258]) tensor([0.0527, 0.2040, 0.4713, 0.2720]) -Greedy action tensor([-1.9443, -0.4491, 0.6641, -0.1808]) tensor([0.0402, 0.1793, 0.5459, 0.2345]) -Greedy action tensor([-1.5939e+00, 9.2882e-04, 4.3016e-01, -5.6086e-02]) tensor([0.0551, 0.2715, 0.4170, 0.2564]) -Greedy action tensor([-1.3381, 0.0149, 0.3208, 0.0019]) tensor([0.0717, 0.2775, 0.3768, 0.2739]) -Greedy action tensor([-1.9197, -0.4257, 0.6553, -0.1679]) tensor([0.0411, 0.1829, 0.5392, 0.2368]) -Greedy action tensor([-1.9278, -0.4270, 0.6568, -0.1711]) tensor([0.0408, 0.1828, 0.5403, 0.2361]) -Greedy action tensor([-1.8610, -0.4644, 0.6364, -0.1381]) tensor([0.0439, 0.1773, 0.5331, 0.2457]) -Greedy action tensor([-1.6508, -0.3176, 0.5395, 0.0429]) tensor([0.0522, 0.1979, 0.4662, 0.2837]) -Greedy action tensor([-1.9070, -0.4275, 0.6594, -0.1568]) tensor([0.0414, 0.1817, 0.5387, 0.2382]) -Greedy action tensor([-1.9375, -0.4413, 0.6648, -0.1767]) tensor([0.0404, 0.1802, 0.5447, 0.2348]) -Greedy action tensor([-1.9476, -0.4518, 0.6682, -0.1824]) tensor([0.0400, 0.1786, 0.5475, 0.2339]) -Greedy action tensor([-1.8320, -0.4910, 0.6100, -0.1195]) tensor([0.0457, 0.1749, 0.5259, 0.2535]) -Greedy action tensor([-1.8934, -0.4616, 0.6400, -0.1629]) tensor([0.0427, 0.1787, 0.5377, 0.2409]) -Greedy action tensor([-1.8267, -0.3267, 0.6131, -0.1082]) tensor([0.0444, 0.1989, 0.5092, 0.2475]) -Greedy action tensor([-1.8200, -0.2544, 0.5991, -0.1073]) tensor([0.0443, 0.2121, 0.4979, 0.2457]) -Greedy action tensor([-1.3059, 0.3299, 0.4384, 0.1566]) tensor([0.0618, 0.3174, 0.3538, 0.2669]) -Greedy action tensor([-1.6854, -0.1980, 0.5758, -0.0256]) tensor([0.0493, 0.2182, 0.4731, 0.2593]) -Greedy action tensor([-1.5298, -0.3689, 0.4113, -0.1196]) tensor([0.0656, 0.2093, 0.4566, 0.2685]) -Greedy action tensor([-1.8845, -0.3190, 0.6346, -0.1332]) tensor([0.0417, 0.1997, 0.5181, 0.2405]) -Greedy action tensor([-1.7973, -0.4285, 0.6000, -0.0979]) tensor([0.0467, 0.1837, 0.5138, 0.2557]) -Greedy action tensor([-1.9374, -0.4401, 0.6662, -0.1729]) tensor([0.0403, 0.1801, 0.5444, 0.2352]) -Greedy action tensor([-0.5566, 1.0486, 0.0417, 0.3465]) tensor([0.0974, 0.4850, 0.1772, 0.2404]) -Greedy action tensor([-1.8104, -0.4583, 0.8129, 0.2287]) tensor([0.0380, 0.1468, 0.5234, 0.2918]) -Greedy action tensor([-1.9028, -0.4462, 0.6444, -0.1594]) tensor([0.0421, 0.1805, 0.5371, 0.2404]) -Greedy action tensor([-1.8976, -0.4349, 0.6459, -0.1591]) tensor([0.0421, 0.1819, 0.5362, 0.2397]) -Greedy action tensor([-1.9230, -0.4473, 0.6613, -0.1672]) tensor([0.0410, 0.1791, 0.5428, 0.2371]) -Greedy action tensor([-1.4458, -0.4827, 0.4618, -0.0918]) tensor([0.0703, 0.1841, 0.4735, 0.2722]) -Greedy action tensor([-1.1377, -0.0785, 0.2930, -0.0676]) tensor([0.0911, 0.2626, 0.3808, 0.2655]) -Greedy action tensor([-1.7007, -0.3701, 0.6019, -0.0348]) tensor([0.0498, 0.1885, 0.4982, 0.2635]) -Greedy action tensor([-1.9350, -0.4405, 0.6637, -0.1755]) tensor([0.0405, 0.1804, 0.5441, 0.2351]) -Greedy action tensor([-1.0255, -0.4155, 0.7152, 0.9138]) tensor([0.0645, 0.1188, 0.3679, 0.4488]) -Greedy action tensor([-0.4710, 0.8747, 0.0143, 0.4770]) tensor([0.1105, 0.4246, 0.1796, 0.2853]) -Greedy action tensor([-1.5280, -0.5353, 0.5116, 0.1777]) tensor([0.0592, 0.1598, 0.4551, 0.3259]) -Greedy action tensor([-1.9066, -0.2030, 0.6108, -0.1577]) tensor([0.0406, 0.2230, 0.5031, 0.2333]) -Greedy action tensor([-1.8812, -0.4173, 0.6314, -0.1460]) tensor([0.0429, 0.1853, 0.5288, 0.2430]) -Greedy action tensor([-1.8975, -0.3831, 0.6376, -0.1477]) tensor([0.0418, 0.1901, 0.5275, 0.2405]) -Greedy action tensor([-1.9059, -0.4564, 0.6455, -0.1649]) tensor([0.0420, 0.1791, 0.5391, 0.2397]) -Greedy action tensor([-1.1762, 0.6743, 0.1737, 0.3488]) tensor([0.0632, 0.4023, 0.2439, 0.2906]) -Greedy action tensor([-1.7674, -0.3184, 0.5916, -0.2189]) tensor([0.0487, 0.2073, 0.5150, 0.2290]) -Greedy action tensor([-0.5835, 0.7104, -0.0615, -0.1772]) tensor([0.1277, 0.4655, 0.2152, 0.1916]) -Greedy action tensor([-1.8974, -0.3475, 0.6418, -0.1566]) tensor([0.0415, 0.1956, 0.5261, 0.2368]) -Greedy action tensor([-1.9055, -0.4362, 0.6438, -0.1647]) tensor([0.0419, 0.1823, 0.5367, 0.2391]) -Greedy action tensor([-1.8941, -0.4423, 0.6446, -0.1543]) tensor([0.0423, 0.1807, 0.5359, 0.2411]) -Greedy action tensor([-1.6139, 0.1903, 0.3794, -0.0089]) tensor([0.0516, 0.3133, 0.3785, 0.2567]) -Greedy action tensor([-1.9200, -0.4481, 0.6524, -0.1637]) tensor([0.0412, 0.1797, 0.5402, 0.2388]) -Greedy action tensor([-1.8848, -0.4402, 0.6382, -0.1515]) tensor([0.0428, 0.1815, 0.5335, 0.2422]) -Greedy action tensor([-1.9020, -0.3130, 0.6176, -0.1444]) tensor([0.0415, 0.2031, 0.5151, 0.2404]) -Greedy action tensor([-1.8510, -0.3333, 0.6056, -0.1334]) tensor([0.0439, 0.2001, 0.5117, 0.2444]) -Greedy action tensor([-1.9345, -0.4435, 0.6618, -0.1743]) tensor([0.0405, 0.1800, 0.5438, 0.2357]) -Greedy action tensor([-1.7777, -0.4852, 0.2380, -0.4489]) tensor([0.0628, 0.2287, 0.4713, 0.2372]) -Greedy action tensor([-1.8636, -0.4618, 0.6340, -0.1410]) tensor([0.0438, 0.1781, 0.5327, 0.2454]) -Greedy action tensor([-1.7997, -0.2575, 0.5822, -0.0636]) tensor([0.0451, 0.2108, 0.4882, 0.2559]) -Greedy action tensor([-1.8489, -0.3511, 0.6218, -0.1283]) tensor([0.0437, 0.1954, 0.5168, 0.2441]) -Greedy action tensor([-1.7142, -0.4673, 0.6895, 0.1521]) tensor([0.0454, 0.1581, 0.5027, 0.2937]) -Greedy action tensor([-1.8766, -0.3836, 0.6308, -0.1335]) tensor([0.0427, 0.1899, 0.5236, 0.2438]) -Greedy action tensor([-1.9358, -0.4389, 0.6616, -0.1755]) tensor([0.0405, 0.1808, 0.5435, 0.2353]) -Greedy action tensor([-1.9134, -0.4396, 0.6531, -0.1635]) tensor([0.0414, 0.1809, 0.5394, 0.2384]) -Greedy action tensor([-1.8450, -0.4361, 0.6151, -0.1441]) tensor([0.0449, 0.1837, 0.5255, 0.2460]) -Greedy action tensor([0.1074, 0.6571, 0.3324, 0.9519]) tensor([0.1584, 0.2745, 0.1984, 0.3686]) -Greedy action tensor([-1.9295, -0.4297, 0.6584, -0.1706]) tensor([0.0407, 0.1822, 0.5410, 0.2361]) -Greedy action tensor([-1.8923, -0.4701, 0.6393, -0.1566]) tensor([0.0428, 0.1772, 0.5375, 0.2425]) -Greedy action tensor([-1.9014, -0.3895, 0.6513, -0.1593]) tensor([0.0415, 0.1883, 0.5332, 0.2370]) -Greedy action tensor([-1.9153, -0.3940, 0.6485, -0.1664]) tensor([0.0411, 0.1883, 0.5341, 0.2364]) -Greedy action tensor([-1.6936, -0.4056, 0.5358, -0.0964]) tensor([0.0530, 0.1922, 0.4928, 0.2619]) -Greedy action tensor([-1.6165, 0.3198, 0.3824, 0.0088]) tensor([0.0490, 0.3400, 0.3619, 0.2491]) -Greedy action tensor([-1.9253, -0.4148, 0.6570, -0.1697]) tensor([0.0407, 0.1845, 0.5389, 0.2358]) -Greedy action tensor([-1.6018, -0.2973, 0.4438, -0.0500]) tensor([0.0583, 0.2151, 0.4512, 0.2754]) -Greedy action tensor([-1.0657, 0.8830, 0.1141, 0.3170]) tensor([0.0655, 0.4600, 0.2132, 0.2612]) -Greedy action tensor([-1.4831, 0.1949, 0.3943, -0.0472]) tensor([0.0585, 0.3132, 0.3824, 0.2459]) -Greedy action tensor([-1.9196, -0.4186, 0.6552, -0.1663]) tensor([0.0410, 0.1840, 0.5383, 0.2367]) -Greedy action tensor([-1.8738, -0.4661, 0.6405, -0.1412]) tensor([0.0433, 0.1769, 0.5350, 0.2448]) -Greedy action tensor([ 0.4710, 1.1144, -0.2348, -0.0365]) tensor([0.2501, 0.4759, 0.1235, 0.1506]) -Greedy action tensor([-0.5153, 0.0812, 1.0092, 1.5443]) tensor([0.0656, 0.1191, 0.3012, 0.5142]) -Greedy action tensor([-1.8622, -0.3879, 0.6621, -0.1101]) tensor([0.0423, 0.1850, 0.5285, 0.2442]) -Greedy action tensor([-1.7394, -0.0960, 0.5337, -0.0592]) tensor([0.0471, 0.2434, 0.4569, 0.2526]) -Greedy action tensor([-1.9284, -0.4220, 0.6587, -0.1682]) tensor([0.0406, 0.1832, 0.5400, 0.2362]) -Greedy action tensor([-0.6949, 0.5134, 0.0813, 0.0373]) tensor([0.1163, 0.3892, 0.2527, 0.2418]) -Greedy action tensor([ 1.5452, -0.2303, -1.2873, 0.3553]) tensor([0.6525, 0.1105, 0.0384, 0.1985]) -Greedy action tensor([ 1.1461, 0.1375, -0.6251, 0.4118]) tensor([0.4964, 0.1810, 0.0844, 0.2382]) -Greedy action tensor([ 1.5439, -0.3991, -0.4724, 0.2826]) tensor([0.6411, 0.0919, 0.0854, 0.1816]) -Greedy action tensor([ 1.9587, -1.1019, -0.4116, 0.1367]) tensor([0.7680, 0.0360, 0.0718, 0.1242]) -Greedy action tensor([ 1.2090, -0.6498, -0.2804, 0.1802]) tensor([0.5751, 0.0896, 0.1297, 0.2056]) -Greedy action tensor([ 1.2970, -0.2091, -0.6359, -0.1216]) tensor([0.6217, 0.1379, 0.0900, 0.1505]) -Greedy action tensor([ 1.3363, -0.4506, -0.7464, 0.2192]) tensor([0.6175, 0.1034, 0.0769, 0.2021]) -Greedy action tensor([ 1.3040, -0.5465, -0.2327, 0.4114]) tensor([0.5612, 0.0882, 0.1207, 0.2299]) -Greedy action tensor([ 1.4315, -0.3568, -1.1305, 0.5376]) tensor([0.6048, 0.1012, 0.0467, 0.2474]) -Greedy action tensor([ 1.7290, -1.0939, -0.2111, 0.4998]) tensor([0.6686, 0.0397, 0.0961, 0.1956]) -Greedy action tensor([ 1.6411, -0.4232, -0.1550, 0.5829]) tensor([0.6098, 0.0774, 0.1012, 0.2117]) -Greedy action tensor([ 1.1258, -0.1737, -0.6034, 0.0145]) tensor([0.5620, 0.1533, 0.0997, 0.1850]) -Greedy action tensor([ 1.5923, -0.4269, -0.6259, 0.1889]) tensor([0.6723, 0.0893, 0.0732, 0.1652]) -Greedy action tensor([ 0.9726, -0.0357, -0.3777, 0.0686]) tensor([0.4929, 0.1798, 0.1277, 0.1996]) -Greedy action tensor([ 1.9654, -0.7107, -0.6925, 0.7309]) tensor([0.6993, 0.0481, 0.0490, 0.2035]) -Greedy action tensor([ 0.7401, -0.3003, -0.1530, 0.0872]) tensor([0.4380, 0.1547, 0.1793, 0.2280]) -Greedy action tensor([ 1.5002, -0.4185, -0.5166, 0.2868]) tensor([0.6341, 0.0931, 0.0844, 0.1884]) -Greedy action tensor([ 1.4997, -0.7089, -0.9189, 0.2821]) tensor([0.6690, 0.0735, 0.0596, 0.1980]) -Greedy action tensor([ 1.7424, -0.7153, -0.8025, 0.0449]) tensor([0.7422, 0.0636, 0.0583, 0.1359]) -Greedy action tensor([ 2.0479, -0.8794, -0.2074, 0.2631]) tensor([0.7540, 0.0404, 0.0791, 0.1265]) -Greedy action tensor([ 1.2538, -0.4854, -0.5203, 0.2801]) tensor([0.5804, 0.1020, 0.0985, 0.2192]) -Greedy action tensor([ 1.5668, -0.6606, -0.3571, 0.9497]) tensor([0.5576, 0.0601, 0.0814, 0.3008]) -Greedy action tensor([ 1.1328, -0.0523, -0.6793, 0.0707]) tensor([0.5510, 0.1685, 0.0900, 0.1905]) -Greedy action tensor([ 1.2209, 0.0813, -0.9282, 0.4083]) tensor([0.5318, 0.1702, 0.0620, 0.2360]) -Greedy action tensor([ 1.3765, -0.3377, -0.3692, 0.1987]) tensor([0.6015, 0.1083, 0.1050, 0.1852]) -Greedy action tensor([ 1.4827, -0.7135, -0.2602, 0.3104]) tensor([0.6266, 0.0697, 0.1097, 0.1940]) -Greedy action tensor([ 1.6059, -0.6096, -0.5910, 0.4121]) tensor([0.6565, 0.0716, 0.0730, 0.1989]) -Greedy action tensor([ 1.0119, 0.0248, -0.5745, 0.0797]) tensor([0.5074, 0.1891, 0.1038, 0.1997]) -Greedy action tensor([ 1.7892, -1.3565, -0.0279, 0.0449]) tensor([0.7245, 0.0312, 0.1177, 0.1266]) -Greedy action tensor([ 1.4148, 0.0440, -0.4570, 0.0744]) tensor([0.5990, 0.1521, 0.0922, 0.1568]) -Greedy action tensor([ 1.5768, -0.2753, -0.5545, 0.2825]) tensor([0.6453, 0.1013, 0.0766, 0.1769]) -Greedy action tensor([ 1.2952, -0.2699, -0.0906, 0.4204]) tensor([0.5330, 0.1114, 0.1333, 0.2222]) -Greedy action tensor([ 1.2203, -0.6340, -0.1137, 0.1028]) tensor([0.5724, 0.0896, 0.1508, 0.1872]) -Greedy action tensor([ 1.1993, -0.4299, -0.0215, 0.1534]) tensor([0.5428, 0.1064, 0.1601, 0.1907]) -Greedy action tensor([ 1.4433, -0.7635, -0.4745, -0.1961]) tensor([0.6891, 0.0758, 0.1013, 0.1338]) -Greedy action tensor([ 1.0506, 0.0518, -0.2660, 0.5662]) tensor([0.4440, 0.1635, 0.1190, 0.2735]) -Greedy action tensor([ 1.4374, 0.1658, -0.4426, 0.4809]) tensor([0.5503, 0.1543, 0.0840, 0.2115]) -Greedy action tensor([ 2.2137, -1.1418, -0.1107, 0.8280]) tensor([0.7231, 0.0252, 0.0708, 0.1809]) -Greedy action tensor([ 1.5609, -0.3792, -0.4525, 0.4467]) tensor([0.6229, 0.0895, 0.0832, 0.2044]) -Greedy action tensor([ 1.5580, -0.4058, -0.2978, 0.5978]) tensor([0.5954, 0.0836, 0.0931, 0.2279]) -Greedy action tensor([ 2.1095, -0.4212, -0.3236, 0.4453]) tensor([0.7371, 0.0587, 0.0647, 0.1396]) -Greedy action tensor([ 1.3357, -0.4706, -0.7807, 0.7509]) tensor([0.5429, 0.0892, 0.0654, 0.3025]) -Greedy action tensor([ 1.4609, -0.1924, -0.4696, -0.4630]) tensor([0.6745, 0.1291, 0.0979, 0.0985]) -Greedy action tensor([ 1.6443, -0.7882, -0.0484, 0.0793]) tensor([0.6753, 0.0593, 0.1243, 0.1412]) -Greedy action tensor([ 1.4837, -0.2966, -0.7479, 0.1407]) tensor([0.6506, 0.1097, 0.0698, 0.1699]) -Greedy action tensor([ 1.4356, -0.4291, -0.3730, 0.4815]) tensor([0.5869, 0.0909, 0.0962, 0.2260]) -Greedy action tensor([ 1.3514, -0.4268, -0.1611, -0.0863]) tensor([0.6147, 0.1038, 0.1355, 0.1460]) -Greedy action tensor([ 1.7076, -1.1079, -0.6305, 0.9240]) tensor([0.6199, 0.0371, 0.0598, 0.2831]) -Greedy action tensor([ 1.0044, -0.1697, -0.4493, 0.3693]) tensor([0.4825, 0.1491, 0.1128, 0.2557]) -Greedy action tensor([ 2.3352, -0.8987, -0.5384, 0.4393]) tensor([0.8025, 0.0316, 0.0453, 0.1205]) -Greedy action tensor([ 1.5562, 0.2357, -0.4412, -0.1392]) tensor([0.6304, 0.1683, 0.0855, 0.1157]) -Greedy action tensor([ 1.1504, -0.0431, -0.1972, 0.3237]) tensor([0.4999, 0.1515, 0.1299, 0.2187]) -Greedy action tensor([ 2.6846, 0.7966, -0.0797, 0.1862]) tensor([0.7712, 0.1167, 0.0486, 0.0634]) -Greedy action tensor([ 1.7366, -0.4624, -0.3519, 0.5395]) tensor([0.6507, 0.0722, 0.0806, 0.1965]) -Greedy action tensor([ 1.3069, -0.7866, -0.3509, 0.1933]) tensor([0.6089, 0.0751, 0.1160, 0.2000]) -Greedy action tensor([ 1.5199, -0.7522, -0.1380, 0.3519]) tensor([0.6232, 0.0642, 0.1188, 0.1938]) -Greedy action tensor([ 1.7577, -0.2353, -0.5671, 0.1674]) tensor([0.6954, 0.0948, 0.0680, 0.1418]) -Greedy action tensor([ 1.2139, -0.3653, -0.5170, -0.0593]) tensor([0.6013, 0.1239, 0.1065, 0.1683]) -Greedy action tensor([ 1.6355, -0.6775, -0.2639, -0.0074]) tensor([0.6935, 0.0686, 0.1038, 0.1341]) -Greedy action tensor([ 1.3461, -0.1023, -0.3831, 0.0963]) tensor([0.5886, 0.1383, 0.1044, 0.1687]) -Greedy action tensor([ 1.3599, -0.6177, -0.2643, 0.6164]) tensor([0.5522, 0.0764, 0.1088, 0.2626]) -Greedy action tensor([ 2.1728, -1.0281, -0.3440, 0.7447]) tensor([0.7346, 0.0299, 0.0593, 0.1761]) -Greedy action tensor([ 1.3461, -0.5804, -0.3105, 0.3953]) tensor([0.5804, 0.0845, 0.1107, 0.2243]) -Greedy action tensor([ 1.2820, -0.2477, -0.5442, 0.1572]) tensor([0.5874, 0.1272, 0.0946, 0.1907]) -Greedy action tensor([ 0.8914, -0.2046, -0.7155, 0.3879]) tensor([0.4675, 0.1562, 0.0937, 0.2826]) -Greedy action tensor([ 1.0226, -0.2128, -0.3697, -0.1480]) tensor([0.5407, 0.1572, 0.1344, 0.1677]) -Greedy action tensor([ 0.9321, 0.0094, -0.0277, 0.3948]) tensor([0.4229, 0.1681, 0.1620, 0.2471]) -Greedy action tensor([ 1.3246, -0.3613, -0.2953, 0.2720]) tensor([0.5773, 0.1070, 0.1143, 0.2015]) -Greedy action tensor([ 1.3355, -0.3079, -0.3841, 0.1916]) tensor([0.5913, 0.1143, 0.1059, 0.1884]) -Greedy action tensor([ 1.5093, -0.5123, -0.5415, 0.3321]) tensor([0.6373, 0.0844, 0.0820, 0.1964]) -Greedy action tensor([ 2.2252, -1.2505, 0.0106, 0.5724]) tensor([0.7510, 0.0232, 0.0820, 0.1438]) -Greedy action tensor([ 1.5615, -0.5722, -0.2972, 0.4071]) tensor([0.6291, 0.0745, 0.0981, 0.1983]) -Greedy action tensor([ 1.2409, -0.0380, -0.6997, 0.1622]) tensor([0.5675, 0.1580, 0.0815, 0.1930]) -Greedy action tensor([ 1.3874, -0.4669, -0.4453, 0.0936]) tensor([0.6286, 0.0984, 0.1006, 0.1724]) -Greedy action tensor([ 1.0211, -0.4248, -0.0357, 0.0510]) tensor([0.5096, 0.1200, 0.1771, 0.1932]) -Greedy action tensor([ 2.4404, -1.4891, 0.1458, 1.2120]) tensor([0.7076, 0.0139, 0.0713, 0.2071]) -Greedy action tensor([ 1.8743, -1.2775, -0.2227, 0.1864]) tensor([0.7405, 0.0317, 0.0909, 0.1369]) -Greedy action tensor([ 1.1511, 0.0279, -0.6459, 0.1649]) tensor([0.5365, 0.1745, 0.0889, 0.2001]) -Greedy action tensor([ 1.6088, -0.3496, -0.3151, 0.9147]) tensor([0.5597, 0.0790, 0.0817, 0.2796]) -Greedy action tensor([ 1.4736, -0.2879, -0.2369, 0.3147]) tensor([0.6001, 0.1031, 0.1085, 0.1883]) -Greedy action tensor([ 1.0503, -0.0825, -0.2230, 0.1848]) tensor([0.4943, 0.1592, 0.1384, 0.2080]) -Greedy action tensor([ 0.5811, -0.5163, -0.2333, -0.1840]) tensor([0.4460, 0.1489, 0.1976, 0.2075]) -Greedy action tensor([ 0.6225, 0.0054, -0.0090, 0.0245]) tensor([0.3815, 0.2058, 0.2029, 0.2098]) -Greedy action tensor([ 0.7156, 0.0630, 0.1761, -0.0723]) tensor([0.3908, 0.2035, 0.2279, 0.1778]) -Greedy action tensor([ 0.9241, -0.3211, -0.1670, -0.1922]) tensor([0.5125, 0.1475, 0.1721, 0.1678]) -Greedy action tensor([ 0.7743, -0.8612, 0.0365, -0.5643]) tensor([0.5167, 0.1007, 0.2471, 0.1355]) -Greedy action tensor([ 0.6366, -0.5317, -0.2233, -0.6512]) tensor([0.4975, 0.1547, 0.2105, 0.1373]) -Greedy action tensor([ 0.9850, -0.8839, -0.0158, -0.4879]) tensor([0.5711, 0.0881, 0.2099, 0.1309]) -Greedy action tensor([ 1.0665, -0.6110, -0.1720, -0.5149]) tensor([0.5944, 0.1111, 0.1723, 0.1223]) -Greedy action tensor([ 1.1909, -0.5823, -0.1706, -0.5922]) tensor([0.6273, 0.1065, 0.1608, 0.1055]) -Greedy action tensor([ 1.3714, -0.4880, -0.1104, -0.5421]) tensor([0.6534, 0.1018, 0.1485, 0.0964]) -Greedy action tensor([ 0.9325, -0.8275, 0.1450, -0.6656]) tensor([0.5467, 0.0940, 0.2487, 0.1106]) -Greedy action tensor([ 0.8718, -0.4499, 0.1621, -0.1913]) tensor([0.4753, 0.1268, 0.2338, 0.1642]) -Greedy action tensor([ 0.9081, -0.5394, 0.1379, -0.4795]) tensor([0.5134, 0.1207, 0.2377, 0.1282]) -Greedy action tensor([ 0.4995, -0.2946, -0.0968, -0.3694]) tensor([0.4128, 0.1866, 0.2274, 0.1731]) -Greedy action tensor([ 0.5982, -0.5007, -0.1533, -0.1482]) tensor([0.4388, 0.1462, 0.2070, 0.2080]) -Greedy action tensor([ 0.9115, -0.2265, -0.0656, -0.2697]) tensor([0.4991, 0.1599, 0.1878, 0.1532]) -Greedy action tensor([ 1.2134, -0.6260, -0.1963, -0.6554]) tensor([0.6421, 0.1020, 0.1568, 0.0991]) -Greedy action tensor([ 0.5909, -0.4086, -0.0086, -0.1293]) tensor([0.4160, 0.1531, 0.2284, 0.2024]) -Greedy action tensor([ 0.5619, 0.0394, -0.1863, -0.1214]) tensor([0.3889, 0.2307, 0.1840, 0.1964]) -Greedy action tensor([ 0.8603, -0.5374, -0.0944, -0.6712]) tensor([0.5410, 0.1337, 0.2082, 0.1170]) -Greedy action tensor([ 0.9726, -0.4622, 0.0586, -0.4025]) tensor([0.5286, 0.1259, 0.2119, 0.1336]) -Greedy action tensor([ 0.9906, -0.6304, -0.1450, -0.3186]) tensor([0.5590, 0.1105, 0.1796, 0.1509]) -Greedy action tensor([ 0.8369, -0.3656, -0.1727, -0.6458]) tensor([0.5286, 0.1588, 0.1926, 0.1200]) -Greedy action tensor([ 0.9851, -0.4291, -0.2083, -0.2485]) tensor([0.5442, 0.1323, 0.1650, 0.1585]) -Greedy action tensor([ 0.6324, -0.5147, -0.1929, -0.3697]) tensor([0.4711, 0.1496, 0.2064, 0.1729]) -Greedy action tensor([ 0.8749, -0.6149, -0.0688, -0.3883]) tensor([0.5271, 0.1188, 0.2051, 0.1490]) -Greedy action tensor([ 1.0723, -0.4684, 0.0102, -0.2942]) tensor([0.5510, 0.1180, 0.1905, 0.1405]) -Greedy action tensor([ 0.2127, -0.2075, -0.0848, -0.2706]) tensor([0.3315, 0.2178, 0.2462, 0.2045]) -Greedy action tensor([ 0.6865, -0.2955, -0.0888, -0.1152]) tensor([0.4379, 0.1640, 0.2017, 0.1964]) -Greedy action tensor([ 0.7657, -0.5071, -0.0711, -0.2370]) tensor([0.4808, 0.1346, 0.2082, 0.1764]) -Greedy action tensor([ 1.1316, -0.5712, 0.0347, -0.5893]) tensor([0.5900, 0.1075, 0.1970, 0.1055]) -Greedy action tensor([ 0.6419, -0.1126, -0.0974, -0.0026]) tensor([0.4044, 0.1902, 0.1931, 0.2123]) -Greedy action tensor([ 0.6935, -0.5353, -0.0736, -0.0589]) tensor([0.4488, 0.1313, 0.2084, 0.2115]) -Greedy action tensor([ 0.6337, -0.1948, -0.0461, -0.1283]) tensor([0.4149, 0.1812, 0.2103, 0.1937]) -Greedy action tensor([ 0.4031, -0.3680, -0.0699, -0.1245]) tensor([0.3737, 0.1729, 0.2329, 0.2205]) -Greedy action tensor([ 0.5382, -0.1585, 0.1399, -0.3512]) tensor([0.3875, 0.1931, 0.2602, 0.1592]) -Greedy action tensor([ 1.1370, -0.6539, -0.0107, -0.4288]) tensor([0.5906, 0.0985, 0.1874, 0.1234]) -Greedy action tensor([ 0.8612, -0.4523, 0.0802, -0.2802]) tensor([0.4887, 0.1314, 0.2238, 0.1561]) -Greedy action tensor([ 0.7788, 0.0654, -0.0563, 0.0595]) tensor([0.4148, 0.2032, 0.1799, 0.2020]) -Greedy action tensor([ 0.4823, -0.1250, -0.0227, 0.0340]) tensor([0.3588, 0.1955, 0.2165, 0.2292]) -Greedy action tensor([ 0.5103, -0.2507, 0.0161, -0.0586]) tensor([0.3783, 0.1767, 0.2308, 0.2142]) -Greedy action tensor([ 0.9625, -0.6871, -0.1169, -0.3946]) tensor([0.5589, 0.1074, 0.1899, 0.1439]) -Greedy action tensor([ 0.6322, -0.0686, -0.0397, 0.0197]) tensor([0.3923, 0.1947, 0.2004, 0.2126]) -Greedy action tensor([ 1.2316, -0.5520, 0.0823, -0.4120]) tensor([0.5959, 0.1001, 0.1888, 0.1152]) -Greedy action tensor([ 0.4954, -0.1840, -0.1080, -0.1370]) tensor([0.3868, 0.1961, 0.2116, 0.2055]) -Greedy action tensor([ 0.9905, -0.2484, -0.4709, -0.5602]) tensor([0.5768, 0.1671, 0.1338, 0.1223]) -Greedy action tensor([ 0.6880, -0.2447, -0.0523, -0.1224]) tensor([0.4319, 0.1700, 0.2060, 0.1921]) -Greedy action tensor([ 0.7487, -0.4756, -0.0647, -0.4271]) tensor([0.4888, 0.1437, 0.2167, 0.1508]) -Greedy action tensor([ 0.9552, -0.6091, 0.0092, -0.3700]) tensor([0.5367, 0.1123, 0.2084, 0.1426]) -Greedy action tensor([ 0.7803, -0.4995, 0.1133, -0.2361]) tensor([0.4644, 0.1292, 0.2384, 0.1681]) -Greedy action tensor([ 0.7654, -0.4910, -0.0173, -0.3018]) tensor([0.4794, 0.1365, 0.2192, 0.1649]) -Greedy action tensor([ 0.4034, -0.2650, -0.1189, -0.1164]) tensor([0.3703, 0.1898, 0.2197, 0.2202]) -Greedy action tensor([ 0.5170, -0.1337, -0.1234, -0.1523]) tensor([0.3905, 0.2037, 0.2058, 0.2000]) -Greedy action tensor([ 0.2983, 0.1646, 0.0151, -0.2670]) tensor([0.3128, 0.2737, 0.2357, 0.1778]) -Greedy action tensor([ 0.8117, -0.1634, -0.0516, -0.0299]) tensor([0.4484, 0.1691, 0.1891, 0.1933]) -Greedy action tensor([ 0.8082, -0.2663, 0.0413, -0.2842]) tensor([0.4670, 0.1595, 0.2169, 0.1566]) -Greedy action tensor([ 0.8056, -0.4514, -0.0432, -0.2977]) tensor([0.4892, 0.1392, 0.2093, 0.1623]) -Greedy action tensor([ 0.4597, -0.0456, -0.0781, 0.0963]) tensor([0.3469, 0.2093, 0.2026, 0.2412]) -Greedy action tensor([ 0.5382, -0.3069, -0.1059, 0.0636]) tensor([0.3881, 0.1667, 0.2038, 0.2414]) -Greedy action tensor([ 0.7910, -0.3527, -0.0516, -0.7143]) tensor([0.5073, 0.1617, 0.2184, 0.1126]) -Greedy action tensor([ 0.7935, -0.4698, -0.2046, -0.5614]) tensor([0.5238, 0.1481, 0.1930, 0.1351]) -Greedy action tensor([ 1.1588, -0.6956, -0.0112, -0.6994]) tensor([0.6162, 0.0965, 0.1912, 0.0961]) -Greedy action tensor([ 0.2781, 0.2532, -0.0501, 0.0229]) tensor([0.2881, 0.2811, 0.2075, 0.2233]) -Greedy action tensor([ 0.6665, -0.2426, -0.0772, -0.0698]) tensor([0.4243, 0.1709, 0.2017, 0.2032]) -Greedy action tensor([ 0.8829, -0.6551, -0.0912, -0.4495]) tensor([0.5387, 0.1157, 0.2034, 0.1421]) -Greedy action tensor([ 0.9742, -0.8473, 0.0672, -0.7042]) tensor([0.5707, 0.0923, 0.2304, 0.1065]) -Greedy action tensor([ 0.4150, 0.0654, -0.0526, 0.1357]) tensor([0.3239, 0.2283, 0.2029, 0.2449]) -Greedy action tensor([ 0.4947, -0.1145, -0.1198, -0.0258]) tensor([0.3733, 0.2030, 0.2019, 0.2218]) -Greedy action tensor([ 1.2321, -0.7167, -0.0786, -0.8279]) tensor([0.6495, 0.0925, 0.1751, 0.0828]) -Greedy action tensor([ 1.0597, -0.5398, 0.0405, -0.2748]) tensor([0.5476, 0.1106, 0.1976, 0.1442]) -Greedy action tensor([ 1.0571, -0.1342, 0.0891, -0.1045]) tensor([0.5008, 0.1522, 0.1902, 0.1568]) -Greedy action tensor([ 0.8628, -0.7393, -0.0724, -0.4297]) tensor([0.5352, 0.1078, 0.2101, 0.1469]) -Greedy action tensor([ 1.0258, -0.9120, -0.2453, -0.9174]) tensor([0.6379, 0.0919, 0.1789, 0.0914]) -Greedy action tensor([ 0.6223, -0.2307, -0.1430, -0.1438]) tensor([0.4244, 0.1809, 0.1974, 0.1973]) -Greedy action tensor([ 4.8548e-01, -2.0165e-01, -4.7701e-02, -1.4526e-04]) tensor([0.3697, 0.1860, 0.2169, 0.2275]) -Greedy action tensor([ 0.8119, -0.4914, 0.1393, -0.1647]) tensor([0.4633, 0.1258, 0.2364, 0.1745]) -Greedy action tensor([ 0.8021, -0.4128, -0.0783, -0.2187]) tensor([0.4827, 0.1432, 0.2001, 0.1739]) -Greedy action tensor([ 0.5801, -0.6524, -0.0752, -0.4379]) tensor([0.4604, 0.1342, 0.2391, 0.1663]) -Greedy action tensor([ 0.5643, -0.1340, -0.0030, -0.5287]) tensor([0.4167, 0.2073, 0.2363, 0.1397]) -Greedy action tensor([ 1.0097, -0.7439, -0.0803, -0.4600]) tensor([0.5749, 0.0995, 0.1933, 0.1322]) -Greedy action tensor([ 0.8929, -0.5657, 0.4040, -0.5143]) tensor([0.4783, 0.1112, 0.2933, 0.1171]) -Greedy action tensor([-0.1901, -0.3377, -1.0364, -0.5774]) tensor([0.3366, 0.2904, 0.1444, 0.2285]) -Greedy action tensor([ 0.0950, -1.7154, 1.1067, -0.3822]) tensor([0.2205, 0.0361, 0.6066, 0.1368]) -Greedy action tensor([-1.5593, -0.8604, 0.1687, 0.0832]) tensor([0.0724, 0.1457, 0.4077, 0.3743]) -Greedy action tensor([-1.5225, -0.5340, 0.0236, -1.2056]) tensor([0.1025, 0.2755, 0.4812, 0.1408]) -Greedy action tensor([ 0.5538, -0.3810, 0.2622, 0.3642]) tensor([0.3370, 0.1323, 0.2518, 0.2788]) -Greedy action tensor([ 1.1268, 0.0238, -0.1016, -0.8593]) tensor([0.5676, 0.1884, 0.1662, 0.0779]) -Greedy action tensor([0.6659, 0.1347, 0.4461, 0.4581]) tensor([0.3122, 0.1836, 0.2506, 0.2536]) -Greedy action tensor([-1.6394, -0.2216, 0.4670, -1.2579]) tensor([0.0675, 0.2787, 0.5549, 0.0989]) -Greedy action tensor([-0.4875, -1.1382, -0.4919, 0.1298]) tensor([0.2288, 0.1194, 0.2278, 0.4241]) -Greedy action tensor([0.6433, 0.0436, 0.5768, 0.1977]) tensor([0.3200, 0.1757, 0.2994, 0.2049]) -Greedy action tensor([-0.5798, 0.0574, -0.5399, 0.1465]) tensor([0.1667, 0.3152, 0.1735, 0.3446]) -Greedy action tensor([-0.7310, 0.1783, -0.0135, -0.5129]) tensor([0.1476, 0.3664, 0.3025, 0.1836]) -Greedy action tensor([-0.1781, -0.3222, -0.1114, -0.7215]) tensor([0.2844, 0.2463, 0.3041, 0.1652]) -Greedy action tensor([ 0.2985, -0.3247, -0.3649, 0.2997]) tensor([0.3276, 0.1757, 0.1687, 0.3280]) -Greedy action tensor([-0.7843, -0.8834, -0.5093, 0.1023]) tensor([0.1770, 0.1603, 0.2330, 0.4296]) -Greedy action tensor([-1.0786, -1.2677, -0.5308, 0.6332]) tensor([0.1099, 0.0910, 0.1901, 0.6089]) -Greedy action tensor([ 0.1373, 0.2895, -0.6492, -1.0755]) tensor([0.3428, 0.3992, 0.1561, 0.1019]) -Greedy action tensor([ 1.4683, -0.9696, 1.4666, 0.1363]) tensor([0.4256, 0.0372, 0.4249, 0.1123]) -Greedy action tensor([-0.5421, -0.8943, 0.3906, -0.9916]) tensor([0.2048, 0.1440, 0.5205, 0.1307]) -Greedy action tensor([-0.1243, 0.6028, 0.9130, 0.1228]) tensor([0.1395, 0.2885, 0.3935, 0.1785]) -Greedy action tensor([-0.5707, -0.2059, -0.4731, -0.5666]) tensor([0.2199, 0.3168, 0.2425, 0.2208]) -Greedy action tensor([ 0.0547, -1.2905, 0.4721, -0.3155]) tensor([0.2883, 0.0751, 0.4376, 0.1991]) -Greedy action tensor([ 1.0053, -1.1238, -0.3236, -0.4962]) tensor([0.6225, 0.0740, 0.1648, 0.1387]) -Greedy action tensor([ 0.1761, -0.3405, 0.6923, -0.2993]) tensor([0.2568, 0.1532, 0.4303, 0.1596]) -Greedy action tensor([ 0.0191, -0.6746, -0.1684, -0.3264]) tensor([0.3293, 0.1646, 0.2730, 0.2331]) -Greedy action tensor([-0.3033, -0.8952, -0.4826, 0.2718]) tensor([0.2400, 0.1328, 0.2006, 0.4266]) -Greedy action tensor([ 0.4621, -0.4295, 1.0746, -0.3706]) tensor([0.2710, 0.1111, 0.5000, 0.1179]) -Greedy action tensor([ 0.7999, -0.2883, -0.2347, 0.4806]) tensor([0.4134, 0.1392, 0.1469, 0.3004]) -Greedy action tensor([-0.2178, -0.7278, -0.5723, -0.5675]) tensor([0.3326, 0.1997, 0.2333, 0.2344]) -Greedy action tensor([ 0.7532, -0.6969, -0.4702, 0.8470]) tensor([0.3807, 0.0893, 0.1120, 0.4181]) -Greedy action tensor([-1.0753, 0.4292, -0.6207, -0.7192]) tensor([0.1176, 0.5293, 0.1852, 0.1679]) -Greedy action tensor([-0.4816, -0.6229, 0.5995, -0.7249]) tensor([0.1786, 0.1550, 0.5264, 0.1400]) -Greedy action tensor([ 1.9589, -1.2312, 1.5690, 0.4585]) tensor([0.5151, 0.0212, 0.3488, 0.1149]) -Greedy action tensor([-0.3750, -0.3590, 1.1317, -0.6866]) tensor([0.1377, 0.1400, 0.6215, 0.1009]) -Greedy action tensor([ 0.5762, -1.1920, -0.0275, -0.4813]) tensor([0.4843, 0.0826, 0.2648, 0.1682]) -Greedy action tensor([-0.9659, -0.6058, -0.1868, -0.0645]) tensor([0.1413, 0.2026, 0.3080, 0.3481]) -Greedy action tensor([-0.0521, -1.0197, -0.2872, 0.5140]) tensor([0.2543, 0.0966, 0.2010, 0.4480]) -Greedy action tensor([ 0.5269, -0.0854, -0.7222, -1.2099]) tensor([0.4988, 0.2704, 0.1430, 0.0878]) -Greedy action tensor([ 0.2360, -0.2291, -0.2084, 0.2637]) tensor([0.3033, 0.1905, 0.1945, 0.3118]) -Greedy action tensor([ 1.2307, -0.9263, 0.5180, 0.7004]) tensor([0.4557, 0.0527, 0.2234, 0.2681]) -Greedy action tensor([ 0.0373, -1.2024, 0.2414, -0.3993]) tensor([0.3162, 0.0915, 0.3878, 0.2044]) -Greedy action tensor([-0.8057, -0.6649, -0.1701, -0.2175]) tensor([0.1712, 0.1971, 0.3233, 0.3084]) -Greedy action tensor([ 0.2285, -1.8406, -0.3068, -0.0837]) tensor([0.4092, 0.0517, 0.2396, 0.2995]) -Greedy action tensor([-0.9869, 0.3795, 0.8093, -1.1740]) tensor([0.0849, 0.3329, 0.5117, 0.0704]) -Greedy action tensor([ 0.9298, -0.6289, 0.7167, 0.2808]) tensor([0.3935, 0.0828, 0.3180, 0.2057]) -Greedy action tensor([-0.5407, 0.4424, 0.5450, -0.3295]) tensor([0.1271, 0.3396, 0.3763, 0.1570]) -Greedy action tensor([ 0.7686, -0.2963, 0.3666, 1.1841]) tensor([0.2834, 0.0977, 0.1896, 0.4293]) -Greedy action tensor([ 0.7942, -0.9194, 0.4702, -0.8769]) tensor([0.4781, 0.0862, 0.3458, 0.0899]) -Greedy action tensor([-0.6491, -0.5087, -0.8260, 0.2992]) tensor([0.1795, 0.2066, 0.1504, 0.4634]) -Greedy action tensor([-0.5013, -0.7795, -0.3948, 0.8051]) tensor([0.1524, 0.1154, 0.1695, 0.5628]) -Greedy action tensor([-0.1584, -0.8841, -0.5894, -0.3199]) tensor([0.3350, 0.1622, 0.2177, 0.2851]) -Greedy action tensor([ 1.3987, -1.1270, 0.3058, 0.3121]) tensor([0.5706, 0.0456, 0.1913, 0.1925]) -Greedy action tensor([ 0.1170, -0.3405, -0.3105, -0.8508]) tensor([0.3752, 0.2375, 0.2447, 0.1426]) -Greedy action tensor([-0.5661, -0.9356, 0.6687, -0.3250]) tensor([0.1562, 0.1080, 0.5370, 0.1988]) -Greedy action tensor([ 1.1486, -0.1355, 0.4161, -0.4313]) tensor([0.5093, 0.1410, 0.2448, 0.1049]) -Greedy action tensor([0.8979, 0.0237, 0.0038, 0.0701]) tensor([0.4419, 0.1843, 0.1807, 0.1931]) -Greedy action tensor([ 0.7136, -1.4212, -0.3472, 0.0820]) tensor([0.5010, 0.0592, 0.1734, 0.2664]) -Greedy action tensor([ 0.7166, -0.8408, -0.4843, 0.6845]) tensor([0.4032, 0.0850, 0.1213, 0.3905]) -Greedy action tensor([ 0.3878, -1.3296, -0.0591, -0.1344]) tensor([0.4145, 0.0744, 0.2651, 0.2459]) -Greedy action tensor([-0.8165, 0.1815, 0.2005, -0.7803]) tensor([0.1331, 0.3610, 0.3679, 0.1380]) -Greedy action tensor([ 0.6704, 0.5336, -0.0479, 0.0668]) tensor([0.3440, 0.3001, 0.1678, 0.1881]) -Greedy action tensor([-0.4232, -0.4800, -0.0186, -0.4416]) tensor([0.2260, 0.2135, 0.3387, 0.2219]) -Greedy action tensor([ 0.2977, -1.2747, 0.2485, -0.7406]) tensor([0.3978, 0.0826, 0.3787, 0.1409]) -Greedy action tensor([-0.5538, -1.0708, -0.4975, -0.6285]) tensor([0.2791, 0.1665, 0.2953, 0.2591]) -Greedy action tensor([ 0.2014, -1.3150, 1.5173, -0.2392]) tensor([0.1789, 0.0393, 0.6668, 0.1151]) -Greedy action tensor([-0.3386, -0.6660, -1.0505, 0.0621]) tensor([0.2700, 0.1946, 0.1325, 0.4030]) -Greedy action tensor([-0.3841, -1.1513, 1.1301, -0.1522]) tensor([0.1375, 0.0639, 0.6252, 0.1734]) -Greedy action tensor([-0.1701, -0.8107, -0.5002, -0.8395]) tensor([0.3626, 0.1911, 0.2607, 0.1857]) -Greedy action tensor([ 0.5006, -0.3580, 0.2668, 0.9771]) tensor([0.2614, 0.1108, 0.2069, 0.4210]) -Greedy action tensor([ 1.5916, -1.0308, 0.0935, 0.9379]) tensor([0.5506, 0.0400, 0.1231, 0.2864]) -Greedy action tensor([ 1.1107, -0.8984, -0.6065, 0.2262]) tensor([0.5792, 0.0777, 0.1040, 0.2392]) -Greedy action tensor([-0.3506, 0.7878, -0.0434, -1.4104]) tensor([0.1716, 0.5356, 0.2333, 0.0595]) -Greedy action tensor([-0.3935, 0.1705, 0.9132, -0.6015]) tensor([0.1377, 0.2420, 0.5085, 0.1118]) -Greedy action tensor([ 0.5364, -1.8027, -0.0957, -0.2121]) tensor([0.4760, 0.0459, 0.2530, 0.2252]) -Greedy action tensor([ 1.4988, -1.0725, -0.1723, 1.5453]) tensor([0.4325, 0.0331, 0.0813, 0.4531]) -Greedy action tensor([-0.2977, 0.1419, 0.6189, -0.6933]) tensor([0.1746, 0.2710, 0.4367, 0.1176]) -Greedy action tensor([-0.9426, -1.2164, -0.9696, 0.3027]) tensor([0.1611, 0.1225, 0.1568, 0.5596]) -Greedy action tensor([-0.4337, 0.0201, -0.7781, -0.5419]) tensor([0.2392, 0.3766, 0.1695, 0.2147]) -Greedy action tensor([ 0.8412, 0.2093, -0.5029, 1.2217]) tensor([0.3072, 0.1633, 0.0801, 0.4494]) -Greedy action tensor([-1.3040, 0.4263, -0.6402, -0.9112]) tensor([0.0994, 0.5605, 0.1930, 0.1472]) -Greedy action tensor([-1.1746, -0.5741, 0.3802, -1.1528]) tensor([0.1166, 0.2125, 0.5518, 0.1191]) -Greedy action tensor([-1.2465, -0.0489, 1.1003, -1.3145]) tensor([0.0637, 0.2110, 0.6658, 0.0595]) -Greedy action tensor([-1.8716, -0.4541, 0.6291, -0.1280]) tensor([0.0434, 0.1791, 0.5292, 0.2482]) -Greedy action tensor([-1.7401, -0.4949, 0.5568, -0.0831]) tensor([0.0509, 0.1767, 0.5058, 0.2667]) -Greedy action tensor([-0.7061, 0.2179, 0.3095, 0.4526]) tensor([0.1056, 0.2661, 0.2917, 0.3366]) -Greedy action tensor([-1.8971, -0.3477, 0.6290, -0.1465]) tensor([0.0417, 0.1964, 0.5217, 0.2402]) -Greedy action tensor([-1.4757, -0.3221, 0.3747, 0.0867]) tensor([0.0653, 0.2071, 0.4158, 0.3117]) -Greedy action tensor([-0.4306, 0.9625, 0.0412, 0.0319]) tensor([0.1217, 0.4900, 0.1950, 0.1932]) -Greedy action tensor([-1.2177, 0.6402, 0.1505, 0.1634]) tensor([0.0653, 0.4185, 0.2565, 0.2598]) -Greedy action tensor([-1.8649, -0.4460, 0.6279, -0.1441]) tensor([0.0438, 0.1811, 0.5301, 0.2449]) -Greedy action tensor([-1.9433, -0.4474, 0.6664, -0.1798]) tensor([0.0402, 0.1793, 0.5462, 0.2343]) -Greedy action tensor([-1.9149, -0.3857, 0.6490, -0.1642]) tensor([0.0411, 0.1894, 0.5331, 0.2364]) -Greedy action tensor([-1.8355, -0.4475, 0.6225, -0.1188]) tensor([0.0449, 0.1801, 0.5249, 0.2501]) -Greedy action tensor([-0.6003, -0.5512, 0.1653, 0.2788]) tensor([0.1513, 0.1589, 0.3253, 0.3644]) -Greedy action tensor([-1.9215, -0.4251, 0.6627, -0.1613]) tensor([0.0408, 0.1820, 0.5402, 0.2370]) -Greedy action tensor([-1.7272, -0.3815, 0.5360, -0.0477]) tensor([0.0505, 0.1938, 0.4851, 0.2706]) -Greedy action tensor([-1.3519, -0.2520, 0.4250, -0.0993]) tensor([0.0745, 0.2239, 0.4407, 0.2609]) -Greedy action tensor([-1.7023, -0.4722, 0.5502, 0.0391]) tensor([0.0509, 0.1742, 0.4843, 0.2905]) -Greedy action tensor([0.1718, 0.6935, 0.0181, 0.1680]) tensor([0.2203, 0.3712, 0.1889, 0.2195]) -Greedy action tensor([-1.9300, -0.4076, 0.6529, -0.1730]) tensor([0.0406, 0.1862, 0.5377, 0.2354]) -Greedy action tensor([-1.5953, -0.5157, 0.4720, 0.0084]) tensor([0.0595, 0.1750, 0.4699, 0.2956]) -Greedy action tensor([-1.6046, -0.1977, 0.6197, -0.0446]) tensor([0.0524, 0.2139, 0.4844, 0.2493]) -Greedy action tensor([ 0.0472, -0.3869, 1.1115, 1.5034]) tensor([0.1132, 0.0733, 0.3281, 0.4854]) -Greedy action tensor([ 0.4706, -0.0854, 0.9525, 1.6871]) tensor([0.1523, 0.0873, 0.2465, 0.5139]) -Greedy action tensor([0.7353, 0.5270, 0.5808, 1.3457]) tensor([0.2217, 0.1800, 0.1900, 0.4082]) -Greedy action tensor([-1.5616, 0.3318, 0.5068, 0.0284]) tensor([0.0489, 0.3247, 0.3868, 0.2397]) -Greedy action tensor([-1.9088, -0.4379, 0.6515, -0.1633]) tensor([0.0416, 0.1812, 0.5387, 0.2385]) -Greedy action tensor([-0.9041, 0.3562, -0.4621, -0.4745]) tensor([0.1313, 0.4629, 0.2042, 0.2017]) -Greedy action tensor([-1.6876, 0.1794, 0.4637, -0.0824]) tensor([0.0475, 0.3074, 0.4085, 0.2366]) -Greedy action tensor([-1.9212, -0.4013, 0.6560, -0.1633]) tensor([0.0408, 0.1864, 0.5364, 0.2364]) -Greedy action tensor([-1.9263, -0.3858, 0.6485, -0.1633]) tensor([0.0406, 0.1895, 0.5331, 0.2367]) -Greedy action tensor([-1.7035, -0.4718, 0.5473, -0.0611]) tensor([0.0524, 0.1795, 0.4974, 0.2707]) -Greedy action tensor([-1.4726, 0.5386, 0.3151, 0.0506]) tensor([0.0525, 0.3926, 0.3139, 0.2410]) -Greedy action tensor([-1.4927, -0.5267, 0.4675, -0.1013]) tensor([0.0678, 0.1782, 0.4814, 0.2726]) -Greedy action tensor([-1.7369, -0.2659, 0.5437, -0.0727]) tensor([0.0490, 0.2132, 0.4791, 0.2587]) -Greedy action tensor([-1.4926, -0.3720, 0.4122, 0.0589]) tensor([0.0645, 0.1978, 0.4333, 0.3044]) -Greedy action tensor([-1.9424, -0.4464, 0.6667, -0.1793]) tensor([0.0402, 0.1794, 0.5461, 0.2343]) -Greedy action tensor([-1.9305, -0.4411, 0.6618, -0.1722]) tensor([0.0407, 0.1803, 0.5431, 0.2359]) -Greedy action tensor([-1.8504, -0.2123, 0.5951, -0.1178]) tensor([0.0429, 0.2205, 0.4943, 0.2423]) -Greedy action tensor([-1.7255, -0.4919, 0.5652, -0.0516]) tensor([0.0509, 0.1747, 0.5029, 0.2714]) -Greedy action tensor([-1.9073, -0.3876, 0.6485, -0.1587]) tensor([0.0413, 0.1889, 0.5323, 0.2375]) -Greedy action tensor([-1.9187, -0.5841, 0.7518, -0.0632]) tensor([0.0390, 0.1481, 0.5634, 0.2494]) -Greedy action tensor([-1.8105, -0.0752, 0.5499, -0.0559]) tensor([0.0434, 0.2461, 0.4597, 0.2509]) -Greedy action tensor([-1.9280, -0.4306, 0.6624, -0.1709]) tensor([0.0407, 0.1817, 0.5421, 0.2356]) -Greedy action tensor([-1.9068, -0.2567, 0.6293, -0.1882]) tensor([0.0410, 0.2133, 0.5173, 0.2284]) -Greedy action tensor([-1.9117, -0.4451, 0.6490, -0.1691]) tensor([0.0417, 0.1807, 0.5396, 0.2381]) -Greedy action tensor([-1.8871, -0.4025, 0.6450, -0.1225]) tensor([0.0420, 0.1852, 0.5279, 0.2450]) -Greedy action tensor([-1.6813, -0.5268, 0.5158, -0.0349]) tensor([0.0545, 0.1728, 0.4901, 0.2826]) -Greedy action tensor([-1.9082, -0.4057, 0.6515, -0.1573]) tensor([0.0413, 0.1858, 0.5347, 0.2382]) -Greedy action tensor([-1.8843, -0.4317, 0.6337, -0.1576]) tensor([0.0429, 0.1834, 0.5323, 0.2413]) -Greedy action tensor([-1.9231, -0.4461, 0.6590, -0.1658]) tensor([0.0410, 0.1795, 0.5420, 0.2376]) -Greedy action tensor([-1.8957, -0.4527, 0.6483, -0.1567]) tensor([0.0423, 0.1790, 0.5382, 0.2406]) -Greedy action tensor([-1.1795, 0.8832, 0.2173, 0.2795]) tensor([0.0581, 0.4571, 0.2348, 0.2499]) -Greedy action tensor([-1.9379, -0.4363, 0.6633, -0.1768]) tensor([0.0403, 0.1811, 0.5438, 0.2347]) -Greedy action tensor([-1.9210, -0.4498, 0.6649, -0.1656]) tensor([0.0410, 0.1783, 0.5437, 0.2370]) -Greedy action tensor([-1.8561, -0.4458, 0.6277, -0.1381]) tensor([0.0441, 0.1808, 0.5291, 0.2460]) -Greedy action tensor([-1.7829, -0.3810, 0.6351, -0.0914]) tensor([0.0461, 0.1871, 0.5169, 0.2500]) -Greedy action tensor([-1.7903, -0.5147, 0.6109, -0.0889]) tensor([0.0474, 0.1697, 0.5231, 0.2598]) -Greedy action tensor([-1.7817, -0.1955, 0.5525, -0.1495]) tensor([0.0469, 0.2291, 0.4841, 0.2399]) -Greedy action tensor([-1.8082, -0.2374, 0.5524, -0.0854]) tensor([0.0454, 0.2186, 0.4815, 0.2545]) -Greedy action tensor([-1.8224, -0.4224, 0.6079, -0.1175]) tensor([0.0456, 0.1850, 0.5184, 0.2510]) -Greedy action tensor([-1.3485, -0.6179, 0.3406, 0.1838]) tensor([0.0762, 0.1583, 0.4127, 0.3528]) -Greedy action tensor([-0.1795, 0.7176, 0.4238, 0.4990]) tensor([0.1379, 0.3382, 0.2521, 0.2718]) -Greedy action tensor([-1.8407, -0.4567, 0.6169, -0.1318]) tensor([0.0451, 0.1798, 0.5262, 0.2489]) -Greedy action tensor([-1.7007, -0.3317, 0.5281, -0.0797]) tensor([0.0519, 0.2039, 0.4818, 0.2624]) -Greedy action tensor([-1.9173, -0.4296, 0.6452, -0.1618]) tensor([0.0414, 0.1831, 0.5363, 0.2393]) -Greedy action tensor([-1.3462, 0.7305, 0.3425, -0.2161]) tensor([0.0572, 0.4563, 0.3095, 0.1770]) -Greedy action tensor([-1.1695, 0.4685, 0.2045, -0.0042]) tensor([0.0752, 0.3867, 0.2970, 0.2411]) -Greedy action tensor([-1.8933, -0.4295, 0.6414, -0.1750]) tensor([0.0425, 0.1839, 0.5365, 0.2371]) -Greedy action tensor([-1.9204, -0.4457, 0.6709, -0.1632]) tensor([0.0408, 0.1783, 0.5445, 0.2365]) -Greedy action tensor([-1.2271, -0.5715, 0.3115, 0.2350]) tensor([0.0840, 0.1619, 0.3915, 0.3626]) -Greedy action tensor([-1.8689, -0.4587, 0.6346, -0.1449]) tensor([0.0436, 0.1787, 0.5332, 0.2445]) -Greedy action tensor([-1.7052, -0.2054, 0.5008, -0.0366]) tensor([0.0503, 0.2256, 0.4571, 0.2670]) -Greedy action tensor([-1.1418, -0.3570, 0.3277, 0.2914]) tensor([0.0852, 0.1868, 0.3706, 0.3573]) -Greedy action tensor([-1.8792, -0.4302, 0.6322, -0.1449]) tensor([0.0430, 0.1832, 0.5301, 0.2437]) -Greedy action tensor([0.8921, 0.4895, 0.5978, 1.4300]) tensor([0.2424, 0.1620, 0.1806, 0.4150]) -Greedy action tensor([-1.5713, 0.0940, 0.4726, -0.1631]) tensor([0.0553, 0.2922, 0.4267, 0.2259]) -Greedy action tensor([-1.8997, -0.4567, 0.6497, -0.1601]) tensor([0.0421, 0.1784, 0.5394, 0.2400]) -Greedy action tensor([-1.7451, -0.3073, 0.5709, -0.0146]) tensor([0.0476, 0.2006, 0.4828, 0.2689]) -Greedy action tensor([-1.9074, -0.4230, 0.6498, -0.1579]) tensor([0.0416, 0.1834, 0.5361, 0.2390]) -Greedy action tensor([-1.2234, -0.5777, 0.3658, 0.0174]) tensor([0.0888, 0.1693, 0.4349, 0.3070]) -Greedy action tensor([-1.5241, 0.3446, 0.2931, 0.0621]) tensor([0.0540, 0.3499, 0.3323, 0.2638]) -Greedy action tensor([-1.8611, -0.2743, 0.6074, -0.1334]) tensor([0.0429, 0.2096, 0.5062, 0.2413]) -Greedy action tensor([-1.8791, -0.4177, 0.6334, -0.1453]) tensor([0.0429, 0.1850, 0.5292, 0.2429]) -Greedy action tensor([ 1.5922, -0.5940, -0.1658, 0.6950]) tensor([0.5909, 0.0664, 0.1019, 0.2409]) -Greedy action tensor([ 1.1460, -0.6826, -0.4831, 0.6224]) tensor([0.5131, 0.0824, 0.1006, 0.3039]) -Greedy action tensor([ 1.0017, -0.3803, -0.6521, 0.6122]) tensor([0.4718, 0.1184, 0.0902, 0.3196]) -Greedy action tensor([ 0.9773, -0.0611, -0.1796, 0.0211]) tensor([0.4871, 0.1724, 0.1532, 0.1872]) -Greedy action tensor([ 1.7388, -0.7333, -0.1827, 0.6381]) tensor([0.6396, 0.0540, 0.0936, 0.2128]) -Greedy action tensor([ 1.0881, -0.1379, -0.8122, 0.2797]) tensor([0.5295, 0.1554, 0.0792, 0.2359]) -Greedy action tensor([ 1.9920, -0.6661, -1.0753, 0.1378]) tensor([0.7854, 0.0550, 0.0366, 0.1230]) -Greedy action tensor([ 1.8426, -0.6012, -0.5156, -0.3612]) tensor([0.7741, 0.0672, 0.0732, 0.0854]) -Greedy action tensor([ 1.0799, -0.1478, -0.9297, 0.2486]) tensor([0.5369, 0.1573, 0.0720, 0.2338]) -Greedy action tensor([ 1.2045, -0.5390, -0.6213, 0.0360]) tensor([0.6072, 0.1062, 0.0978, 0.1887]) -Greedy action tensor([ 1.8852, -0.2653, -0.3048, -0.2187]) tensor([0.7406, 0.0862, 0.0829, 0.0903]) -Greedy action tensor([ 1.1307, -0.4050, -0.3173, 0.3249]) tensor([0.5271, 0.1135, 0.1239, 0.2355]) -Greedy action tensor([2.3217, 0.2040, 0.0154, 0.3365]) tensor([0.7368, 0.0886, 0.0734, 0.1012]) -Greedy action tensor([ 1.8939, -0.7544, -0.3183, 0.2368]) tensor([0.7294, 0.0516, 0.0798, 0.1391]) -Greedy action tensor([ 1.5166, -0.6524, -0.5130, -0.6938]) tensor([0.7378, 0.0843, 0.0969, 0.0809]) -Greedy action tensor([ 1.5137, 0.1387, -0.3927, 0.0593]) tensor([0.6116, 0.1546, 0.0909, 0.1428]) -Greedy action tensor([ 1.7883, -0.4094, -0.3822, 0.6940]) tensor([0.6410, 0.0712, 0.0732, 0.2146]) -Greedy action tensor([ 1.5227, -0.3515, -0.0412, 0.6419]) tensor([0.5627, 0.0864, 0.1178, 0.2332]) -Greedy action tensor([ 2.0684, -0.4152, -0.7200, 0.3500]) tensor([0.7551, 0.0630, 0.0465, 0.1354]) -Greedy action tensor([ 1.2945, -0.4008, -0.5209, 0.2012]) tensor([0.5947, 0.1092, 0.0968, 0.1993]) -Greedy action tensor([ 1.4253, -0.1604, -0.7188, 0.3013]) tensor([0.6072, 0.1243, 0.0711, 0.1973]) -Greedy action tensor([ 1.3647, -0.5751, -0.6947, 0.2176]) tensor([0.6294, 0.0905, 0.0803, 0.1999]) -Greedy action tensor([ 0.7588, -0.3460, -0.6974, 0.7338]) tensor([0.3937, 0.1304, 0.0918, 0.3840]) -Greedy action tensor([ 1.4751, -0.3575, -0.4948, -0.1297]) tensor([0.6665, 0.1066, 0.0930, 0.1339]) -Greedy action tensor([1.9272, 0.4139, 0.0202, 0.0804]) tensor([0.6551, 0.1442, 0.0973, 0.1033]) -Greedy action tensor([ 1.3492, -0.5072, -0.0486, 0.5305]) tensor([0.5422, 0.0847, 0.1340, 0.2391]) -Greedy action tensor([ 1.3180, -0.5461, -0.5542, 0.5662]) tensor([0.5617, 0.0871, 0.0864, 0.2648]) -Greedy action tensor([ 1.9900, -0.1170, 0.3487, -0.1597]) tensor([0.6984, 0.0849, 0.1353, 0.0814]) -Greedy action tensor([ 1.7883, 0.4220, -0.3470, 0.1686]) tensor([0.6365, 0.1623, 0.0752, 0.1260]) -Greedy action tensor([ 1.5605, -0.4476, -0.1038, 0.6840]) tensor([0.5748, 0.0772, 0.1088, 0.2393]) -Greedy action tensor([ 1.4481, 0.3673, -0.2444, -0.0778]) tensor([0.5744, 0.1949, 0.1057, 0.1249]) -Greedy action tensor([ 1.7761, -0.7486, -0.6585, 0.2917]) tensor([0.7172, 0.0574, 0.0628, 0.1625]) -Greedy action tensor([ 1.4884, -0.9055, -0.3246, 0.7539]) tensor([0.5766, 0.0526, 0.0941, 0.2766]) -Greedy action tensor([ 1.2089, -0.0226, -0.2915, 0.5422]) tensor([0.4930, 0.1439, 0.1100, 0.2531]) -Greedy action tensor([ 1.2868, -0.0461, -1.0486, 0.2722]) tensor([0.5804, 0.1530, 0.0562, 0.2104]) -Greedy action tensor([ 0.7891, -0.6622, -0.3609, 0.0601]) tensor([0.4918, 0.1152, 0.1557, 0.2372]) -Greedy action tensor([ 0.8611, -0.3896, -0.3280, 0.0624]) tensor([0.4900, 0.1403, 0.1492, 0.2205]) -Greedy action tensor([ 1.2233, -0.0034, -0.5778, 0.1996]) tensor([0.5502, 0.1613, 0.0908, 0.1977]) -Greedy action tensor([ 1.4338, -0.2739, -0.8192, 0.4674]) tensor([0.5999, 0.1088, 0.0630, 0.2283]) -Greedy action tensor([ 1.8642, -0.5137, -0.4746, 0.2108]) tensor([0.7243, 0.0672, 0.0699, 0.1386]) -Greedy action tensor([ 1.5126, -0.1998, -0.9148, 0.2583]) tensor([0.6435, 0.1161, 0.0568, 0.1836]) -Greedy action tensor([ 1.5840, -0.3843, -0.8122, -0.1292]) tensor([0.7087, 0.0990, 0.0645, 0.1278]) -Greedy action tensor([ 1.4233, -0.1985, -0.7660, 0.1245]) tensor([0.6320, 0.1248, 0.0708, 0.1724]) -Greedy action tensor([ 2.0411, -0.9556, -0.2254, 0.5180]) tensor([0.7290, 0.0364, 0.0756, 0.1590]) -Greedy action tensor([ 1.9535, -0.9088, -0.5504, 0.2774]) tensor([0.7541, 0.0431, 0.0617, 0.1411]) -Greedy action tensor([ 1.5999, -0.1420, -0.6065, 0.1764]) tensor([0.6552, 0.1148, 0.0721, 0.1578]) -Greedy action tensor([ 1.3771, -0.8139, -0.2867, 0.3552]) tensor([0.6020, 0.0673, 0.1140, 0.2167]) -Greedy action tensor([ 1.1709, -0.4014, -0.8434, 0.1353]) tensor([0.5896, 0.1224, 0.0787, 0.2093]) -Greedy action tensor([ 1.7730, 0.5331, -0.2760, 0.2490]) tensor([0.6112, 0.1769, 0.0788, 0.1331]) -Greedy action tensor([ 1.3691, -0.1261, -0.5426, 0.4705]) tensor([0.5621, 0.1260, 0.0831, 0.2288]) -Greedy action tensor([ 2.0072, -0.5876, -0.8794, 0.1563]) tensor([0.7767, 0.0580, 0.0433, 0.1220]) -Greedy action tensor([ 1.6230, -0.4197, -0.1475, 0.4780]) tensor([0.6180, 0.0801, 0.1052, 0.1966]) -Greedy action tensor([ 1.7075, -0.4223, -0.6085, 1.1000]) tensor([0.5675, 0.0675, 0.0560, 0.3091]) -Greedy action tensor([ 1.3585, -0.5059, -0.2178, 0.0791]) tensor([0.6098, 0.0945, 0.1261, 0.1696]) -Greedy action tensor([ 1.2661, 0.3221, -0.6427, 0.1557]) tensor([0.5357, 0.2084, 0.0794, 0.1765]) -Greedy action tensor([ 1.5532, -0.7925, -0.5511, 0.8091]) tensor([0.5907, 0.0566, 0.0720, 0.2807]) -Greedy action tensor([ 2.1018, -0.4071, 0.0257, 0.6606]) tensor([0.6928, 0.0564, 0.0869, 0.1639]) -Greedy action tensor([ 1.2232, -0.3787, -0.4858, 0.2665]) tensor([0.5660, 0.1141, 0.1025, 0.2174]) -Greedy action tensor([ 1.4056, -0.1347, -0.4278, 0.1219]) tensor([0.6056, 0.1298, 0.0968, 0.1678]) -Greedy action tensor([ 1.3697, -0.3240, -0.2196, 0.2245]) tensor([0.5861, 0.1078, 0.1196, 0.1865]) -Greedy action tensor([ 1.2537, -0.1182, -0.3111, -0.0845]) tensor([0.5797, 0.1470, 0.1212, 0.1521]) -Greedy action tensor([ 1.6394, -1.0988, -0.3568, 0.0600]) tensor([0.7109, 0.0460, 0.0966, 0.1465]) -Greedy action tensor([ 1.3933, -0.3136, -0.4323, 0.3447]) tensor([0.5907, 0.1072, 0.0952, 0.2070]) -Greedy action tensor([ 1.5974, -0.6512, -0.6721, 0.4952]) tensor([0.6489, 0.0685, 0.0671, 0.2155]) -Greedy action tensor([ 1.7700, -0.6437, -0.9686, 0.7237]) tensor([0.6643, 0.0594, 0.0430, 0.2333]) -Greedy action tensor([ 1.6553, -0.8668, -0.0317, 0.4991]) tensor([0.6329, 0.0508, 0.1171, 0.1992]) -Greedy action tensor([ 1.7403, -0.5833, -0.4074, -0.0329]) tensor([0.7223, 0.0707, 0.0843, 0.1226]) -Greedy action tensor([ 1.5778, -0.4134, -0.2387, 0.6427]) tensor([0.5911, 0.0807, 0.0961, 0.2320]) -Greedy action tensor([ 2.2431, -0.7290, -0.0478, 0.4621]) tensor([0.7571, 0.0388, 0.0766, 0.1275]) -Greedy action tensor([ 2.1130, 0.1338, -0.0637, 0.1002]) tensor([0.7219, 0.0998, 0.0819, 0.0965]) -Greedy action tensor([ 0.8803, -0.1880, -0.1151, 0.0703]) tensor([0.4634, 0.1592, 0.1712, 0.2061]) -Greedy action tensor([ 1.3988, -0.0910, -0.3520, 0.2590]) tensor([0.5818, 0.1311, 0.1010, 0.1861]) -Greedy action tensor([ 1.6547, -0.5395, -0.6832, 0.3689]) tensor([0.6737, 0.0751, 0.0650, 0.1862]) -Greedy action tensor([ 1.8112, -0.9504, -0.2455, 0.7239]) tensor([0.6544, 0.0414, 0.0837, 0.2206]) -Greedy action tensor([ 1.5114, -0.3879, -0.7105, 0.0379]) tensor([0.6724, 0.1006, 0.0729, 0.1541]) -Greedy action tensor([ 1.0846, -0.2632, -0.2669, 0.3611]) tensor([0.4991, 0.1297, 0.1292, 0.2421]) -Greedy action tensor([ 1.9277, -0.8716, -0.3657, 0.3182]) tensor([0.7343, 0.0447, 0.0741, 0.1469]) -Greedy action tensor([ 1.7909, 0.5126, -0.2420, 0.3392]) tensor([0.6084, 0.1694, 0.0797, 0.1425]) -Greedy action tensor([ 1.5188, -0.1614, -0.5598, 0.2653]) tensor([0.6262, 0.1167, 0.0783, 0.1788]) -Greedy action tensor([ 1.5113, -0.3365, -0.6067, 0.4759]) tensor([0.6124, 0.0965, 0.0737, 0.2175]) -Greedy action tensor([ 1.6040, -0.9072, -0.4239, -0.3292]) tensor([0.7367, 0.0598, 0.0970, 0.1066]) -Greedy action tensor([ 1.5121, -0.3166, -0.8938, 0.7447]) tensor([0.5831, 0.0937, 0.0526, 0.2707]) -Greedy action tensor([ 0.9437, -0.5001, -0.1349, -0.5599]) tensor([0.5560, 0.1312, 0.1891, 0.1236]) -Greedy action tensor([ 0.6382, -0.3184, 0.0367, -0.4746]) tensor([0.4423, 0.1699, 0.2424, 0.1454]) -Greedy action tensor([ 0.8802, -0.7870, 0.0024, -0.3218]) tensor([0.5249, 0.0991, 0.2182, 0.1578]) -Greedy action tensor([ 0.8782, -0.7559, 0.1222, -0.4473]) tensor([0.5180, 0.1011, 0.2432, 0.1376]) -Greedy action tensor([ 0.6978, -0.5842, -0.1330, -0.2733]) tensor([0.4780, 0.1327, 0.2083, 0.1810]) -Greedy action tensor([ 0.7102, -0.1844, -0.3591, -0.4101]) tensor([0.4812, 0.1967, 0.1652, 0.1570]) -Greedy action tensor([ 0.4461, -0.0710, 0.0347, -0.0127]) tensor([0.3459, 0.2062, 0.2292, 0.2186]) -Greedy action tensor([ 0.7718, -1.0661, -0.0057, -0.5734]) tensor([0.5321, 0.0847, 0.2445, 0.1386]) -Greedy action tensor([ 0.7533, -0.4892, -0.0464, -0.1864]) tensor([0.4697, 0.1356, 0.2111, 0.1835]) -Greedy action tensor([ 0.7604, -0.4038, 0.0034, -0.2353]) tensor([0.4650, 0.1451, 0.2181, 0.1718]) -Greedy action tensor([ 0.0964, 0.5854, -0.0433, -0.0602]) tensor([0.2296, 0.3744, 0.1997, 0.1963]) -Greedy action tensor([ 0.8070, -0.6460, -0.1858, -0.4507]) tensor([0.5295, 0.1238, 0.1962, 0.1505]) -Greedy action tensor([ 0.4555, -0.3626, -0.1817, -0.2860]) tensor([0.4088, 0.1804, 0.2161, 0.1947]) -Greedy action tensor([ 1.1505, -0.9119, 0.1136, -0.5117]) tensor([0.5983, 0.0761, 0.2121, 0.1135]) -Greedy action tensor([ 0.9185, -0.2453, -0.1620, -0.2107]) tensor([0.5063, 0.1581, 0.1719, 0.1637]) -Greedy action tensor([ 1.0255, -0.5396, 0.0459, -0.2771]) tensor([0.5387, 0.1126, 0.2023, 0.1464]) -Greedy action tensor([0.7215, 0.0998, 0.0096, 0.1087]) tensor([0.3892, 0.2090, 0.1910, 0.2109]) -Greedy action tensor([ 0.8580, -0.6670, -0.1281, -0.5481]) tensor([0.5448, 0.1185, 0.2032, 0.1335]) -Greedy action tensor([ 0.4384, -0.0840, 0.0294, -0.0177]) tensor([0.3459, 0.2051, 0.2298, 0.2192]) -Greedy action tensor([ 0.9673, -1.0944, 0.1508, -0.7151]) tensor([0.5697, 0.0725, 0.2518, 0.1059]) -Greedy action tensor([ 0.8213, -0.3452, -0.0658, -0.0677]) tensor([0.4685, 0.1459, 0.1930, 0.1926]) -Greedy action tensor([ 0.4778, 0.2658, -0.1078, 0.0059]) tensor([0.3345, 0.2706, 0.1862, 0.2087]) -Greedy action tensor([ 0.8556, -0.2405, 0.0108, -0.3835]) tensor([0.4870, 0.1627, 0.2092, 0.1410]) -Greedy action tensor([ 0.7263, -0.6489, -0.1431, -0.1506]) tensor([0.4789, 0.1211, 0.2008, 0.1993]) -Greedy action tensor([ 1.0296, -0.6056, -0.0328, -0.4036]) tensor([0.5621, 0.1096, 0.1943, 0.1341]) -Greedy action tensor([ 0.6882, -0.3820, -0.0694, -0.1801]) tensor([0.4481, 0.1537, 0.2101, 0.1881]) -Greedy action tensor([ 0.5369, -0.3915, 0.0037, -0.0733]) tensor([0.3960, 0.1565, 0.2324, 0.2151]) -Greedy action tensor([ 0.8543, -0.5382, -0.0959, -0.3466]) tensor([0.5165, 0.1283, 0.1997, 0.1554]) -Greedy action tensor([ 0.6737, -0.3562, 0.1659, -0.3687]) tensor([0.4326, 0.1545, 0.2604, 0.1526]) -Greedy action tensor([ 0.9174, -0.4930, 0.0307, -0.4294]) tensor([0.5219, 0.1274, 0.2150, 0.1357]) -Greedy action tensor([ 1.0797, -1.1712, 0.0494, -0.3901]) tensor([0.5909, 0.0622, 0.2109, 0.1359]) -Greedy action tensor([ 1.0686, -0.2055, -0.0668, -0.2692]) tensor([0.5366, 0.1501, 0.1724, 0.1408]) -Greedy action tensor([ 0.5040, 0.1701, -0.0951, 0.0730]) tensor([0.3430, 0.2456, 0.1884, 0.2229]) -Greedy action tensor([ 0.4042, 0.0720, -0.0113, -0.1198]) tensor([0.3368, 0.2416, 0.2223, 0.1994]) -Greedy action tensor([ 0.4329, -0.1619, 0.2187, -0.1662]) tensor([0.3439, 0.1897, 0.2776, 0.1889]) -Greedy action tensor([ 0.9848, -0.3437, 0.0747, -0.0934]) tensor([0.4981, 0.1319, 0.2005, 0.1695]) -Greedy action tensor([ 0.4665, 0.1258, -0.1364, -0.1573]) tensor([0.3579, 0.2545, 0.1958, 0.1918]) -Greedy action tensor([ 0.4576, -0.0440, 0.0037, -0.1907]) tensor([0.3618, 0.2191, 0.2298, 0.1892]) -Greedy action tensor([ 0.7968, -0.3358, -0.0786, -0.2805]) tensor([0.4809, 0.1550, 0.2004, 0.1638]) -Greedy action tensor([ 1.1436, -0.6384, -0.0289, -0.3505]) tensor([0.5874, 0.0989, 0.1819, 0.1318]) -Greedy action tensor([ 1.0685, -0.9509, 0.2681, -0.5192]) tensor([0.5598, 0.0743, 0.2514, 0.1144]) -Greedy action tensor([ 1.2041, -0.9249, 0.1135, -0.7171]) tensor([0.6245, 0.0743, 0.2098, 0.0914]) -Greedy action tensor([ 0.8427, -0.4845, 0.0133, -0.3087]) tensor([0.4956, 0.1314, 0.2162, 0.1567]) -Greedy action tensor([ 0.6981, -0.3817, -0.1482, -0.2099]) tensor([0.4604, 0.1564, 0.1975, 0.1857]) -Greedy action tensor([ 1.1748, -1.0762, 0.1279, -0.6407]) tensor([0.6176, 0.0650, 0.2168, 0.1005]) -Greedy action tensor([ 0.9171, -0.5842, -0.0567, -0.5782]) tensor([0.5480, 0.1221, 0.2070, 0.1229]) -Greedy action tensor([ 0.7566, -0.7798, -0.1354, -0.3027]) tensor([0.5072, 0.1091, 0.2079, 0.1758]) -Greedy action tensor([ 1.2312, -0.5084, -0.0073, -0.5134]) tensor([0.6097, 0.1071, 0.1767, 0.1065]) -Greedy action tensor([ 1.0624, -0.5476, 0.0751, -0.1804]) tensor([0.5373, 0.1074, 0.2002, 0.1551]) -Greedy action tensor([ 0.7173, -0.4732, -0.0859, -0.3149]) tensor([0.4743, 0.1442, 0.2125, 0.1690]) -Greedy action tensor([ 1.2163, -0.1848, 0.1487, -0.2395]) tensor([0.5484, 0.1351, 0.1886, 0.1279]) -Greedy action tensor([ 1.4281, -0.9084, 0.0933, -0.7938]) tensor([0.6811, 0.0658, 0.1793, 0.0738]) -Greedy action tensor([ 0.5179, -0.0240, 0.0634, -0.0797]) tensor([0.3615, 0.2102, 0.2294, 0.1988]) -Greedy action tensor([ 0.7287, -0.4198, 0.0416, -0.4236]) tensor([0.4682, 0.1485, 0.2355, 0.1479]) -Greedy action tensor([ 0.4281, 0.2130, -0.1579, 0.0466]) tensor([0.3283, 0.2648, 0.1827, 0.2242]) -Greedy action tensor([ 0.8933, -0.2421, -0.1850, -0.0647]) tensor([0.4890, 0.1571, 0.1663, 0.1876]) -Greedy action tensor([ 0.7756, -0.4721, -0.0581, -0.3617]) tensor([0.4896, 0.1406, 0.2127, 0.1570]) -Greedy action tensor([ 0.7159, -0.5002, -0.1046, -0.3120]) tensor([0.4775, 0.1415, 0.2102, 0.1708]) -Greedy action tensor([ 0.3665, -0.2202, -0.0544, -0.1091]) tensor([0.3528, 0.1962, 0.2316, 0.2193]) -Greedy action tensor([ 0.9097, -0.8825, -0.0319, -0.4499]) tensor([0.5515, 0.0919, 0.2151, 0.1416]) -Greedy action tensor([ 1.2458, -0.7483, -0.0032, -0.5323]) tensor([0.6282, 0.0855, 0.1802, 0.1061]) -Greedy action tensor([ 0.9862, -1.0398, 0.1747, -0.4674]) tensor([0.5525, 0.0729, 0.2454, 0.1291]) -Greedy action tensor([ 0.5143, -0.3096, -0.1153, -0.3546]) tensor([0.4183, 0.1835, 0.2228, 0.1754]) -Greedy action tensor([ 1.1917, -0.9979, 0.1239, -0.7144]) tensor([0.6233, 0.0698, 0.2143, 0.0927]) -Greedy action tensor([ 0.5761, -0.1887, 0.0079, -0.4052]) tensor([0.4155, 0.1934, 0.2354, 0.1557]) -Greedy action tensor([ 0.1851, 0.0385, -0.1111, -0.2441]) tensor([0.3069, 0.2650, 0.2282, 0.1998]) -Greedy action tensor([ 0.8975, -0.7733, 0.0053, -0.3810]) tensor([0.5330, 0.1002, 0.2184, 0.1484]) -Greedy action tensor([ 0.5353, -0.3558, 0.0008, -0.1776]) tensor([0.4022, 0.1650, 0.2357, 0.1972]) -Greedy action tensor([ 0.8249, -0.5549, 0.0128, -0.1654]) tensor([0.4838, 0.1217, 0.2148, 0.1797]) -Greedy action tensor([ 0.7591, -0.7588, 0.3505, -0.7011]) tensor([0.4726, 0.1036, 0.3141, 0.1097]) -Greedy action tensor([ 0.4285, -0.1759, -0.2693, -0.4342]) tensor([0.4055, 0.2216, 0.2018, 0.1711]) -Greedy action tensor([ 0.8833, -0.6089, 0.0202, -0.3666]) tensor([0.5173, 0.1163, 0.2182, 0.1482]) -Greedy action tensor([ 0.4708, -0.2801, -0.1089, 0.0435]) tensor([0.3725, 0.1758, 0.2087, 0.2430]) -Greedy action tensor([ 0.5103, -0.3270, -0.0541, -0.0745]) tensor([0.3908, 0.1692, 0.2223, 0.2178]) -Greedy action tensor([ 0.3853, -0.0473, -0.0471, -0.0446]) tensor([0.3392, 0.2201, 0.2201, 0.2206]) -Greedy action tensor([ 0.7413, -0.5714, -0.0931, -0.3832]) tensor([0.4931, 0.1327, 0.2141, 0.1602]) -Greedy action tensor([ 0.6310, -0.0743, 0.3448, 0.1258]) tensor([0.3511, 0.1734, 0.2637, 0.2118]) -Greedy action tensor([ 0.7060, -0.3863, 0.0213, -0.2138]) tensor([0.4468, 0.1499, 0.2253, 0.1781]) -Greedy action tensor([ 0.6702, -0.3701, 0.0412, -0.3095]) tensor([0.4421, 0.1562, 0.2357, 0.1660]) -Greedy action tensor([ 0.6755, -0.5350, -0.0593, -0.4284]) tensor([0.4741, 0.1413, 0.2274, 0.1572]) -Greedy action tensor([ 0.7149, -0.4392, 0.0287, -0.2798]) tensor([0.4569, 0.1441, 0.2300, 0.1690]) -Greedy action tensor([-0.1389, -0.9294, 0.5756, -0.5887]) tensor([0.2419, 0.1097, 0.4942, 0.1542]) -Greedy action tensor([-0.1638, -0.1467, -0.3118, -0.4833]) tensor([0.2773, 0.2821, 0.2392, 0.2015]) -Greedy action tensor([ 0.0685, -0.6085, -0.4470, 0.4391]) tensor([0.2814, 0.1430, 0.1680, 0.4076]) -Greedy action tensor([ 1.5599, -0.4870, 0.9341, 0.6696]) tensor([0.4820, 0.0622, 0.2578, 0.1979]) -Greedy action tensor([-0.6978, 0.4340, 0.1443, -0.8693]) tensor([0.1376, 0.4269, 0.3195, 0.1160]) -Greedy action tensor([-0.7482, 0.3082, -0.4115, -0.7396]) tensor([0.1591, 0.4576, 0.2228, 0.1605]) -Greedy action tensor([ 0.7160, -0.7828, 0.4487, -0.3173]) tensor([0.4265, 0.0953, 0.3265, 0.1518]) -Greedy action tensor([-0.6599, -0.6099, 0.1798, -1.0750]) tensor([0.1989, 0.2091, 0.4606, 0.1313]) -Greedy action tensor([ 0.0699, 0.0956, 1.0862, -0.6867]) tensor([0.1902, 0.1951, 0.5255, 0.0892]) -Greedy action tensor([-0.7309, -0.8738, -0.6512, -1.0704]) tensor([0.2731, 0.2367, 0.2957, 0.1945]) -Greedy action tensor([ 0.0770, -0.2249, 0.5742, -0.9554]) tensor([0.2674, 0.1977, 0.4396, 0.0952]) -Greedy action tensor([ 0.6787, -1.0034, 0.5844, -0.6484]) tensor([0.4235, 0.0788, 0.3854, 0.1123]) -Greedy action tensor([ 0.1710, -0.8052, 0.0301, -0.2025]) tensor([0.3409, 0.1284, 0.2961, 0.2346]) -Greedy action tensor([-1.0556, -1.3532, 0.6428, -0.7456]) tensor([0.1167, 0.0866, 0.6376, 0.1591]) -Greedy action tensor([ 0.6583, -0.8393, 0.2923, 0.0106]) tensor([0.4098, 0.0916, 0.2842, 0.2144]) -Greedy action tensor([-1.4670, -0.2969, 0.2543, -1.7857]) tensor([0.0949, 0.3057, 0.5305, 0.0690]) -Greedy action tensor([ 0.6493, -1.3916, 0.6929, -0.2644]) tensor([0.3883, 0.0504, 0.4056, 0.1557]) -Greedy action tensor([ 0.2845, -1.0501, 1.0634, -0.9615]) tensor([0.2681, 0.0706, 0.5842, 0.0771]) -Greedy action tensor([ 0.0080, -1.3149, 0.3175, 0.6455]) tensor([0.2212, 0.0589, 0.3014, 0.4185]) -Greedy action tensor([-1.1565, -0.9221, -0.1014, -0.8346]) tensor([0.1535, 0.1940, 0.4408, 0.2117]) -Greedy action tensor([ 0.3282, -0.8756, -0.0652, -0.1280]) tensor([0.3834, 0.1150, 0.2587, 0.2429]) -Greedy action tensor([ 0.8425, 0.0474, -0.2525, -0.4914]) tensor([0.4879, 0.2203, 0.1632, 0.1285]) -Greedy action tensor([-0.6008, -1.2922, -0.8850, 0.6786]) tensor([0.1710, 0.0856, 0.1287, 0.6147]) -Greedy action tensor([-0.4031, -0.5592, -0.1245, -0.5038]) tensor([0.2450, 0.2096, 0.3238, 0.2216]) -Greedy action tensor([0.4818, 0.2076, 1.3523, 1.0617]) tensor([0.1685, 0.1281, 0.4024, 0.3009]) -Greedy action tensor([ 0.2888, -0.3333, 0.3205, -0.3678]) tensor([0.3239, 0.1739, 0.3343, 0.1680]) -Greedy action tensor([-0.7324, -0.6494, -0.3080, 0.0143]) tensor([0.1747, 0.1898, 0.2670, 0.3685]) -Greedy action tensor([ 0.7740, -0.2365, 1.1059, 0.5283]) tensor([0.2825, 0.1028, 0.3937, 0.2210]) -Greedy action tensor([-0.3872, -1.2527, 0.7443, -0.1863]) tensor([0.1741, 0.0733, 0.5398, 0.2129]) -Greedy action tensor([ 0.2130, 0.3425, -0.1561, -0.9504]) tensor([0.3183, 0.3623, 0.2200, 0.0994]) -Greedy action tensor([-0.3439, -0.1406, 0.1214, -1.2063]) tensor([0.2358, 0.2890, 0.3756, 0.0996]) -Greedy action tensor([-1.1113, -1.0025, 0.4475, -0.8741]) tensor([0.1229, 0.1370, 0.5842, 0.1558]) -Greedy action tensor([-0.0428, -0.1705, 0.7375, -0.2884]) tensor([0.2064, 0.1817, 0.4504, 0.1615]) -Greedy action tensor([-0.1482, -1.5191, 0.2843, -0.0401]) tensor([0.2558, 0.0649, 0.3942, 0.2850]) -Greedy action tensor([ 1.1812, -0.3538, 0.3586, 1.1114]) tensor([0.3865, 0.0833, 0.1698, 0.3604]) -Greedy action tensor([ 0.3099, 0.1348, 0.5375, -0.5925]) tensor([0.2857, 0.2398, 0.3587, 0.1159]) -Greedy action tensor([ 1.0304, -0.6417, 0.2936, 0.7608]) tensor([0.4115, 0.0773, 0.1970, 0.3143]) -Greedy action tensor([-1.0811, -0.4002, -0.9529, -0.2517]) tensor([0.1561, 0.3085, 0.1775, 0.3579]) -Greedy action tensor([-0.5835, -1.1299, 0.2695, -0.4488]) tensor([0.1972, 0.1142, 0.4629, 0.2257]) -Greedy action tensor([ 0.0883, -0.9181, -0.7293, 0.0964]) tensor([0.3552, 0.1298, 0.1568, 0.3581]) -Greedy action tensor([ 1.0698, -0.3511, 0.0774, -0.0367]) tensor([0.5147, 0.1243, 0.1908, 0.1702]) -Greedy action tensor([ 1.0485, -0.2420, -0.2379, 0.2230]) tensor([0.5027, 0.1383, 0.1389, 0.2202]) -Greedy action tensor([-0.2897, 0.0426, -0.6057, -0.0791]) tensor([0.2295, 0.3199, 0.1673, 0.2833]) -Greedy action tensor([-0.2624, -0.1007, -0.4819, -0.4400]) tensor([0.2621, 0.3081, 0.2104, 0.2194]) -Greedy action tensor([ 1.5771, -1.0907, 0.2383, 0.6534]) tensor([0.5785, 0.0402, 0.1517, 0.2297]) -Greedy action tensor([ 0.4378, -1.4229, 0.1297, 0.3189]) tensor([0.3599, 0.0560, 0.2645, 0.3196]) -Greedy action tensor([-0.2589, -0.3167, -0.3699, -0.0791]) tensor([0.2478, 0.2339, 0.2218, 0.2966]) -Greedy action tensor([ 0.3327, -0.2367, -0.2233, -0.5741]) tensor([0.3932, 0.2225, 0.2255, 0.1588]) -Greedy action tensor([-0.2748, -0.6183, 0.0052, -0.1418]) tensor([0.2395, 0.1699, 0.3170, 0.2736]) -Greedy action tensor([ 0.6926, -1.3362, -0.5688, 0.3311]) tensor([0.4736, 0.0623, 0.1342, 0.3299]) -Greedy action tensor([-1.0655, -0.8145, 0.5666, -1.2858]) tensor([0.1219, 0.1567, 0.6236, 0.0978]) -Greedy action tensor([-0.3945, -1.1965, -0.3815, -0.5444]) tensor([0.3010, 0.1350, 0.3049, 0.2591]) -Greedy action tensor([ 0.0391, -0.2738, -0.4320, -0.1529]) tensor([0.3144, 0.2299, 0.1963, 0.2595]) -Greedy action tensor([0.6844, 0.0649, 0.3005, 0.7626]) tensor([0.3030, 0.1631, 0.2064, 0.3276]) -Greedy action tensor([-0.0921, -0.0028, -0.3985, -0.1025]) tensor([0.2618, 0.2863, 0.1927, 0.2591]) -Greedy action tensor([ 1.2844, -0.6785, -0.0152, 1.0117]) tensor([0.4599, 0.0646, 0.1254, 0.3501]) -Greedy action tensor([1.0962, 0.1012, 0.0745, 0.6646]) tensor([0.4203, 0.1554, 0.1513, 0.2730]) -Greedy action tensor([-0.8328, -0.8945, -0.6840, 0.5436]) tensor([0.1416, 0.1331, 0.1643, 0.5609]) -Greedy action tensor([ 0.2495, -0.3304, -0.9599, 0.2399]) tensor([0.3510, 0.1966, 0.1047, 0.3477]) -Greedy action tensor([ 0.6871, 0.8698, -0.4335, 0.1560]) tensor([0.3211, 0.3854, 0.1047, 0.1888]) -Greedy action tensor([-0.7007, -0.4584, -0.0945, -1.2812]) tensor([0.2143, 0.2730, 0.3928, 0.1199]) -Greedy action tensor([ 0.5803, -0.3532, 0.2752, 0.8343]) tensor([0.2924, 0.1150, 0.2155, 0.3770]) -Greedy action tensor([-1.3028, -1.3117, -1.2082, 0.4447]) tensor([0.1132, 0.1122, 0.1245, 0.6500]) -Greedy action tensor([ 1.4836, -0.7890, 0.0584, 1.4537]) tensor([0.4321, 0.0445, 0.1039, 0.4194]) -Greedy action tensor([0.9899, 0.2212, 0.4194, 0.5334]) tensor([0.3756, 0.1741, 0.2123, 0.2380]) -Greedy action tensor([ 0.2116, -1.1122, 0.6413, 0.9064]) tensor([0.2081, 0.0554, 0.3198, 0.4168]) -Greedy action tensor([-0.2777, -0.7380, 0.8605, -0.5074]) tensor([0.1803, 0.1138, 0.5627, 0.1433]) -Greedy action tensor([ 1.1718, -1.0134, 1.4242, 0.4433]) tensor([0.3469, 0.0390, 0.4466, 0.1674]) -Greedy action tensor([-0.1067, -0.6397, 0.4350, -0.4200]) tensor([0.2477, 0.1454, 0.4258, 0.1811]) -Greedy action tensor([ 0.0400, -0.9866, 0.1003, -0.7297]) tensor([0.3468, 0.1242, 0.3683, 0.1606]) -Greedy action tensor([-0.0156, -0.9614, 0.5371, -1.4285]) tensor([0.2968, 0.1153, 0.5157, 0.0722]) -Greedy action tensor([ 1.0791, -0.7135, 0.1122, 0.3551]) tensor([0.4922, 0.0820, 0.1872, 0.2386]) -Greedy action tensor([-0.0528, -1.5717, 1.1728, -0.3244]) tensor([0.1856, 0.0406, 0.6323, 0.1415]) -Greedy action tensor([-0.4486, -1.5512, 1.4759, 1.0909]) tensor([0.0778, 0.0258, 0.5334, 0.3629]) -Greedy action tensor([ 0.2979, -1.6245, -0.4781, 0.2538]) tensor([0.3901, 0.0571, 0.1795, 0.3733]) -Greedy action tensor([ 0.6266, -0.6374, -0.2396, 0.1644]) tensor([0.4286, 0.1211, 0.1803, 0.2700]) -Greedy action tensor([ 7.3649e-01, -1.1801e+00, -4.5768e-01, 1.7476e-04]) tensor([0.5184, 0.0763, 0.1571, 0.2483]) -Greedy action tensor([-0.0570, 0.5362, -0.7700, -0.3486]) tensor([0.2471, 0.4472, 0.1211, 0.1846]) -Greedy action tensor([-0.1408, -0.3592, 0.0450, -0.4296]) tensor([0.2662, 0.2139, 0.3205, 0.1994]) -Greedy action tensor([ 0.2053, -0.0188, 0.8880, -1.0198]) tensor([0.2456, 0.1963, 0.4860, 0.0721]) -Greedy action tensor([-0.0437, 0.4804, 0.1062, -0.6617]) tensor([0.2278, 0.3847, 0.2647, 0.1228]) -Greedy action tensor([ 1.2554, -0.3119, -0.5462, 0.0993]) tensor([0.5923, 0.1236, 0.0977, 0.1864]) -Greedy action tensor([ 1.3732, -0.5577, -0.5797, 0.0534]) tensor([0.6435, 0.0933, 0.0913, 0.1719]) -Greedy action tensor([ 1.0989, 0.0353, -0.7189, 0.5132]) tensor([0.4844, 0.1672, 0.0787, 0.2697]) -Greedy action tensor([ 1.4634, -0.4201, -0.7172, 0.4343]) tensor([0.6164, 0.0937, 0.0696, 0.2202]) -Greedy action tensor([ 1.2843, -0.2086, -0.7459, 0.0485]) tensor([0.6073, 0.1365, 0.0797, 0.1765]) -Greedy action tensor([ 1.3169, -0.5310, 0.0833, -0.0418]) tensor([0.5862, 0.0924, 0.1707, 0.1507]) -Greedy action tensor([ 1.3057, -0.6400, -0.4064, 0.7703]) tensor([0.5239, 0.0749, 0.0946, 0.3067]) -Greedy action tensor([ 1.4849, 0.1237, -0.9375, 0.1888]) tensor([0.6178, 0.1584, 0.0548, 0.1690]) -Greedy action tensor([ 1.1662, -0.3612, -0.0451, -0.0047]) tensor([0.5479, 0.1190, 0.1632, 0.1699]) -Greedy action tensor([ 1.1117, -0.3622, -0.4332, 0.3109]) tensor([0.5287, 0.1211, 0.1128, 0.2374]) -Greedy action tensor([ 1.1262, -0.2715, -0.2453, 0.1281]) tensor([0.5349, 0.1322, 0.1357, 0.1972]) -Greedy action tensor([ 1.5176, -0.1229, -0.6532, 0.2468]) tensor([0.6295, 0.1221, 0.0718, 0.1766]) -Greedy action tensor([ 0.5176, -0.4375, 0.1665, -0.2059]) tensor([0.3885, 0.1495, 0.2735, 0.1885]) -Greedy action tensor([ 1.5825, -1.2016, -0.3153, 0.1576]) tensor([0.6886, 0.0425, 0.1032, 0.1656]) -Greedy action tensor([ 1.6087, 0.3382, -0.3687, 0.1990]) tensor([0.6012, 0.1687, 0.0832, 0.1468]) -Greedy action tensor([ 1.4673, -0.3346, -0.4034, 0.2726]) tensor([0.6166, 0.1017, 0.0950, 0.1867]) -Greedy action tensor([ 1.8632, -0.5940, -0.6793, 0.3803]) tensor([0.7187, 0.0616, 0.0565, 0.1631]) -Greedy action tensor([ 1.2007, -0.2634, -0.1630, 0.1616]) tensor([0.5432, 0.1257, 0.1389, 0.1922]) -Greedy action tensor([ 1.7849, 0.1031, -0.4391, -0.2359]) tensor([0.7009, 0.1304, 0.0758, 0.0929]) -Greedy action tensor([ 1.5199, -0.4775, -0.6165, 0.4852]) tensor([0.6215, 0.0843, 0.0734, 0.2208]) -Greedy action tensor([ 1.1997, -0.4029, -0.3313, 0.2181]) tensor([0.5579, 0.1123, 0.1207, 0.2091]) -Greedy action tensor([ 1.6268, -0.1938, -0.6606, 0.2959]) tensor([0.6546, 0.1060, 0.0665, 0.1730]) -Greedy action tensor([ 1.1730, -0.1557, -0.8429, 0.4199]) tensor([0.5351, 0.1417, 0.0713, 0.2520]) -Greedy action tensor([ 1.7056, -0.8024, -0.2437, 0.4058]) tensor([0.6683, 0.0544, 0.0951, 0.1822]) -Greedy action tensor([ 1.5414, -0.5979, -0.2772, 0.3033]) tensor([0.6370, 0.0750, 0.1033, 0.1847]) -Greedy action tensor([ 1.1154, 0.1713, -0.0759, -0.0817]) tensor([0.5013, 0.1950, 0.1523, 0.1514]) -Greedy action tensor([ 1.8212, -0.9614, -0.1496, 0.3672]) tensor([0.6969, 0.0431, 0.0971, 0.1628]) -Greedy action tensor([ 1.3269, -0.3635, -0.6476, 0.1673]) tensor([0.6109, 0.1127, 0.0848, 0.1916]) -Greedy action tensor([ 1.5335, -0.3966, -0.5863, 0.2339]) tensor([0.6503, 0.0944, 0.0781, 0.1773]) -Greedy action tensor([ 1.2143, -0.3110, -0.9472, 0.5498]) tensor([0.5414, 0.1178, 0.0623, 0.2785]) -Greedy action tensor([ 1.3478, -0.4054, -0.4290, 0.4022]) tensor([0.5777, 0.1001, 0.0977, 0.2244]) -Greedy action tensor([ 1.5651, -0.4485, -0.4948, 0.3429]) tensor([0.6429, 0.0858, 0.0819, 0.1894]) -Greedy action tensor([ 1.4424, -0.6334, -0.6422, 0.1060]) tensor([0.6611, 0.0829, 0.0822, 0.1737]) -Greedy action tensor([ 1.6498, -0.7941, -0.1010, 0.5450]) tensor([0.6283, 0.0545, 0.1091, 0.2081]) -Greedy action tensor([ 1.3581, -0.3100, -0.4263, 0.1972]) tensor([0.5989, 0.1130, 0.1006, 0.1876]) -Greedy action tensor([ 1.4817, -0.3017, -0.8044, -0.0215]) tensor([0.6702, 0.1126, 0.0681, 0.1491]) -Greedy action tensor([ 1.4237, -0.6333, 0.0313, 0.5024]) tensor([0.5636, 0.0720, 0.1400, 0.2243]) -Greedy action tensor([ 1.5159, -0.8734, -0.3316, 0.3092]) tensor([0.6458, 0.0592, 0.1018, 0.1932]) -Greedy action tensor([ 1.6806, -0.2897, -0.8121, 0.2065]) tensor([0.6891, 0.0961, 0.0570, 0.1578]) -Greedy action tensor([ 1.5198, -0.6486, -0.3261, 0.3566]) tensor([0.6310, 0.0722, 0.0996, 0.1972]) -Greedy action tensor([ 1.5240, -0.5759, -0.3217, 0.4387]) tensor([0.6180, 0.0757, 0.0976, 0.2087]) -Greedy action tensor([ 1.8204, -0.2525, -0.6436, 0.4898]) tensor([0.6779, 0.0853, 0.0577, 0.1792]) -Greedy action tensor([ 1.4800, -0.5575, -0.9773, 0.4671]) tensor([0.6332, 0.0825, 0.0542, 0.2300]) -Greedy action tensor([ 1.3638, 0.0100, -0.7691, 0.1382]) tensor([0.5987, 0.1546, 0.0709, 0.1758]) -Greedy action tensor([ 1.4554, -1.1499, -0.2262, -0.1284]) tensor([0.6825, 0.0504, 0.1270, 0.1401]) -Greedy action tensor([ 1.7559, -0.3427, -0.6380, 0.1027]) tensor([0.7116, 0.0873, 0.0649, 0.1362]) -Greedy action tensor([ 1.1145, -0.7369, -0.2780, -0.0431]) tensor([0.5815, 0.0913, 0.1445, 0.1827]) -Greedy action tensor([ 1.4477, -0.1113, -0.8408, 0.5098]) tensor([0.5871, 0.1235, 0.0595, 0.2298]) -Greedy action tensor([ 2.7085, -1.6860, 0.3231, 1.0985]) tensor([0.7667, 0.0095, 0.0706, 0.1533]) -Greedy action tensor([ 1.6178, -0.1430, -0.3577, 0.3875]) tensor([0.6239, 0.1073, 0.0865, 0.1823]) -Greedy action tensor([ 1.4807, -0.5518, -0.5113, 0.2261]) tensor([0.6441, 0.0844, 0.0879, 0.1837]) -Greedy action tensor([ 1.3807, 0.1523, -0.8789, 0.5328]) tensor([0.5478, 0.1604, 0.0572, 0.2346]) -Greedy action tensor([ 1.7439, -0.9687, -0.3723, 0.5939]) tensor([0.6651, 0.0441, 0.0801, 0.2106]) -Greedy action tensor([ 2.4609, -1.5360, -0.1803, 0.6995]) tensor([0.7927, 0.0146, 0.0565, 0.1362]) -Greedy action tensor([ 1.2908, -0.4642, -0.5486, 0.5446]) tensor([0.5537, 0.0957, 0.0880, 0.2626]) -Greedy action tensor([ 1.9664, -0.4216, -0.7158, 0.4618]) tensor([0.7234, 0.0664, 0.0495, 0.1607]) -Greedy action tensor([ 1.2721, -0.6615, -0.5146, 0.2292]) tensor([0.6008, 0.0869, 0.1006, 0.2117]) -Greedy action tensor([ 2.7456, -1.0823, 0.1898, 1.1869]) tensor([0.7635, 0.0166, 0.0593, 0.1606]) -Greedy action tensor([ 1.2750e+00, -7.3539e-01, -2.3881e-01, 5.7220e-06]) tensor([0.6122, 0.0820, 0.1347, 0.1711]) -Greedy action tensor([ 1.4859, -0.3656, -0.7054, 0.2542]) tensor([0.6408, 0.1006, 0.0716, 0.1870]) -Greedy action tensor([ 2.3853, -0.0892, -0.5350, 0.3571]) tensor([0.7876, 0.0663, 0.0425, 0.1036]) -Greedy action tensor([ 0.9917, -0.3134, -0.4085, 0.2587]) tensor([0.5005, 0.1357, 0.1234, 0.2405]) -Greedy action tensor([ 1.0483, -0.2679, -0.4736, 0.3497]) tensor([0.5041, 0.1352, 0.1100, 0.2507]) -Greedy action tensor([ 1.8147, -0.1978, -0.7942, 0.6063]) tensor([0.6640, 0.0887, 0.0489, 0.1983]) -Greedy action tensor([ 1.4487, -0.3546, -0.1393, 0.5521]) tensor([0.5627, 0.0927, 0.1150, 0.2296]) -Greedy action tensor([ 1.2271, -0.1968, -0.7421, 0.3333]) tensor([0.5588, 0.1346, 0.0780, 0.2286]) -Greedy action tensor([ 1.6632, -0.2650, -0.3161, 0.0405]) tensor([0.6752, 0.0982, 0.0933, 0.1333]) -Greedy action tensor([ 1.7679, -0.8980, -0.2485, 0.5295]) tensor([0.6700, 0.0466, 0.0892, 0.1942]) -Greedy action tensor([ 1.3986, -0.5626, -0.1616, 0.2849]) tensor([0.5955, 0.0838, 0.1251, 0.1955]) -Greedy action tensor([ 1.5248, -0.6611, -0.3198, 0.1332]) tensor([0.6583, 0.0740, 0.1041, 0.1637]) -Greedy action tensor([ 1.1770, -0.4980, -0.3552, -0.1521]) tensor([0.5995, 0.1123, 0.1295, 0.1587]) -Greedy action tensor([ 1.1808, -0.6941, -0.3269, 0.8675]) tensor([0.4749, 0.0728, 0.1051, 0.3471]) -Greedy action tensor([ 1.4331, -0.5181, -0.4508, 0.1686]) tensor([0.6343, 0.0901, 0.0964, 0.1791]) -Greedy action tensor([ 1.6264, -0.4961, -0.6747, 0.5938]) tensor([0.6345, 0.0760, 0.0635, 0.2259]) -Greedy action tensor([ 1.5153, -0.3618, -0.8581, 0.4309]) tensor([0.6312, 0.0966, 0.0588, 0.2134]) -Greedy action tensor([ 1.6044, 0.0169, -0.6967, 0.1725]) tensor([0.6479, 0.1325, 0.0649, 0.1548]) -Greedy action tensor([ 1.9793, -1.2407, -0.2130, 0.7970]) tensor([0.6858, 0.0274, 0.0766, 0.2102]) -Greedy action tensor([ 1.4496, -0.5065, -0.4975, 0.2060]) tensor([0.6360, 0.0899, 0.0907, 0.1834]) -Greedy action tensor([ 1.0593, 0.2208, -1.0662, 0.4665]) tensor([0.4752, 0.2054, 0.0567, 0.2627]) -Greedy action tensor([ 1.0546, -0.4184, -0.6134, 0.1069]) tensor([0.5539, 0.1270, 0.1045, 0.2147]) -Greedy action tensor([ 1.5770, -0.5639, -0.3205, 0.2881]) tensor([0.6481, 0.0762, 0.0972, 0.1786]) -Greedy action tensor([-1.9052, -0.2720, 0.6191, -0.1569]) tensor([0.0411, 0.2103, 0.5127, 0.2360]) -Greedy action tensor([-1.8625, -0.4647, 0.6348, -0.1359]) tensor([0.0438, 0.1773, 0.5325, 0.2464]) -Greedy action tensor([-1.9091, -0.4658, 0.6734, -0.1505]) tensor([0.0412, 0.1745, 0.5451, 0.2392]) -Greedy action tensor([-1.1983, 0.2939, 0.2375, 0.0965]) tensor([0.0752, 0.3343, 0.3160, 0.2745]) -Greedy action tensor([-0.4399, 0.9610, 0.0357, 0.1779]) tensor([0.1173, 0.4762, 0.1888, 0.2176]) -Greedy action tensor([-1.8603, -0.6746, 1.3251, 0.3607]) tensor([0.0265, 0.0869, 0.6419, 0.2447]) -Greedy action tensor([1.0511, 1.3690, 0.2161, 0.9154]) tensor([0.2717, 0.3733, 0.1179, 0.2372]) -Greedy action tensor([-1.6252, 0.3611, 0.5875, -0.4618]) tensor([0.0485, 0.3533, 0.4431, 0.1552]) -Greedy action tensor([-1.8028, -0.1607, 0.5890, -0.0905]) tensor([0.0442, 0.2282, 0.4829, 0.2448]) -Greedy action tensor([-1.6275, -0.2558, 0.5584, -0.0043]) tensor([0.0529, 0.2085, 0.4706, 0.2681]) -Greedy action tensor([-1.7854, -0.4076, 0.5794, -0.1089]) tensor([0.0477, 0.1893, 0.5079, 0.2552]) -Greedy action tensor([-1.9405, -0.4404, 0.6647, -0.1774]) tensor([0.0402, 0.1804, 0.5447, 0.2347]) -Greedy action tensor([-1.8855, -0.4449, 0.6429, -0.1501]) tensor([0.0427, 0.1803, 0.5350, 0.2421]) -Greedy action tensor([-1.9186, -0.3965, 0.6517, -0.1633]) tensor([0.0409, 0.1875, 0.5348, 0.2367]) -Greedy action tensor([-1.4961, -0.3460, 0.3948, 0.0670]) tensor([0.0643, 0.2030, 0.4259, 0.3068]) -Greedy action tensor([-1.6965, -0.1568, 0.5021, -0.0862]) tensor([0.0508, 0.2370, 0.4579, 0.2543]) -Greedy action tensor([-1.7182, -0.4749, 0.5576, -0.0750]) tensor([0.0516, 0.1790, 0.5025, 0.2669]) -Greedy action tensor([-1.7412, -0.1874, 0.4965, -0.2494]) tensor([0.0512, 0.2420, 0.4795, 0.2274]) -Greedy action tensor([-1.8217, -0.4855, 0.5844, -0.1091]) tensor([0.0466, 0.1775, 0.5173, 0.2586]) -Greedy action tensor([-1.9100, -0.4443, 0.6402, -0.1601]) tensor([0.0419, 0.1812, 0.5361, 0.2408]) -Greedy action tensor([-1.0054, -0.2440, 0.2535, -0.0447]) tensor([0.1078, 0.2308, 0.3796, 0.2817]) -Greedy action tensor([-1.8300, -0.4184, 0.6055, -0.1347]) tensor([0.0455, 0.1867, 0.5198, 0.2480]) -Greedy action tensor([-1.7665, -0.2222, 0.5413, -0.1049]) tensor([0.0476, 0.2230, 0.4786, 0.2508]) -Greedy action tensor([-1.8759, -0.2485, 0.5989, -0.1404]) tensor([0.0423, 0.2153, 0.5025, 0.2399]) -Greedy action tensor([-1.7100, -0.2107, 0.5498, -0.0182]) tensor([0.0488, 0.2186, 0.4676, 0.2650]) -Greedy action tensor([-1.9092, -0.3356, 0.6265, -0.1619]) tensor([0.0413, 0.1994, 0.5220, 0.2373]) -Greedy action tensor([-1.7165, -0.4758, 0.5609, -0.1180]) tensor([0.0522, 0.1805, 0.5091, 0.2582]) -Greedy action tensor([-1.8396, -0.3705, 0.6065, -0.1289]) tensor([0.0446, 0.1938, 0.5148, 0.2468]) -Greedy action tensor([-1.8984, -0.4187, 0.6407, -0.1498]) tensor([0.0420, 0.1845, 0.5321, 0.2414]) -Greedy action tensor([-1.9329, -0.4073, 0.6534, -0.1764]) tensor([0.0405, 0.1864, 0.5383, 0.2348]) -Greedy action tensor([-1.6221, -0.5526, 0.5185, -0.0122]) tensor([0.0574, 0.1673, 0.4882, 0.2872]) -Greedy action tensor([-1.9324, -0.4169, 0.6568, -0.1737]) tensor([0.0405, 0.1845, 0.5398, 0.2352]) -Greedy action tensor([-1.9030, -0.4485, 0.6505, -0.1573]) tensor([0.0419, 0.1794, 0.5386, 0.2401]) -Greedy action tensor([-1.8777, -0.4536, 0.6314, -0.1541]) tensor([0.0434, 0.1802, 0.5333, 0.2431]) -Greedy action tensor([-1.6638, -0.4618, 0.6146, -0.2767]) tensor([0.0553, 0.1839, 0.5395, 0.2213]) -Greedy action tensor([-1.7278, -0.3907, 0.5448, -0.0757]) tensor([0.0507, 0.1930, 0.4919, 0.2645]) -Greedy action tensor([-1.1751, 0.3726, 0.2973, -0.1082]) tensor([0.0771, 0.3625, 0.3362, 0.2241]) -Greedy action tensor([-1.6081, -0.0112, 0.4140, 0.0397]) tensor([0.0535, 0.2642, 0.4043, 0.2780]) -Greedy action tensor([-1.9227, -0.4422, 0.6724, -0.1494]) tensor([0.0405, 0.1781, 0.5428, 0.2386]) -Greedy action tensor([-1.9053, -0.4233, 0.6493, -0.1585]) tensor([0.0417, 0.1834, 0.5360, 0.2390]) -Greedy action tensor([-1.7154, -0.3672, 0.6523, -0.0541]) tensor([0.0481, 0.1852, 0.5134, 0.2533]) -Greedy action tensor([-1.8784, -0.4412, 0.6344, -0.1487]) tensor([0.0431, 0.1815, 0.5322, 0.2432]) -Greedy action tensor([-1.7240, -0.3921, 0.5466, -0.0971]) tensor([0.0511, 0.1937, 0.4951, 0.2601]) -Greedy action tensor([-1.9482, -0.4523, 0.6684, -0.1829]) tensor([0.0400, 0.1786, 0.5477, 0.2338]) -Greedy action tensor([-1.8463, -0.2792, 0.6129, -0.1167]) tensor([0.0432, 0.2072, 0.5057, 0.2438]) -Greedy action tensor([-1.8655, -0.1688, 0.5849, -0.1398]) tensor([0.0423, 0.2305, 0.4899, 0.2373]) -Greedy action tensor([-1.1403, -0.5816, 1.1841, 1.3461]) tensor([0.0400, 0.0700, 0.4090, 0.4810]) -Greedy action tensor([-1.9224, -0.4130, 0.6509, -0.1699]) tensor([0.0410, 0.1854, 0.5372, 0.2364]) -Greedy action tensor([-1.9174, -0.4020, 0.6531, -0.1643]) tensor([0.0410, 0.1866, 0.5358, 0.2366]) -Greedy action tensor([-1.6811, -0.4467, 0.5553, 0.0619]) tensor([0.0513, 0.1761, 0.4797, 0.2929]) -Greedy action tensor([-1.8410, -0.4886, 0.6083, -0.1582]) tensor([0.0458, 0.1771, 0.5305, 0.2465]) -Greedy action tensor([-1.3935, -0.4560, 0.4482, 0.0270]) tensor([0.0714, 0.1824, 0.4505, 0.2957]) -Greedy action tensor([-1.8666, -0.4581, 0.6342, -0.1411]) tensor([0.0437, 0.1786, 0.5325, 0.2452]) -Greedy action tensor([-1.4367, 0.0225, 0.4933, -0.2615]) tensor([0.0648, 0.2788, 0.4465, 0.2099]) -Greedy action tensor([-1.3878, -0.6493, 0.3875, 0.1437]) tensor([0.0734, 0.1537, 0.4333, 0.3396]) -Greedy action tensor([-1.8918, -0.4586, 0.6482, -0.1515]) tensor([0.0424, 0.1778, 0.5380, 0.2418]) -Greedy action tensor([-1.3180, -0.0099, 0.3206, 0.1691]) tensor([0.0701, 0.2592, 0.3607, 0.3100]) -Greedy action tensor([-1.9078, -0.4228, 0.6520, -0.1635]) tensor([0.0415, 0.1834, 0.5373, 0.2377]) -Greedy action tensor([-1.9066, -0.3684, 0.6378, -0.1522]) tensor([0.0414, 0.1926, 0.5269, 0.2391]) -Greedy action tensor([-1.9143, -0.4648, 0.6510, -0.1615]) tensor([0.0416, 0.1773, 0.5410, 0.2401]) -Greedy action tensor([-0.8389, -0.1340, 0.2012, -0.2008]) tensor([0.1291, 0.2613, 0.3653, 0.2444]) -Greedy action tensor([-1.1174, -0.5972, 0.2794, 0.3087]) tensor([0.0919, 0.1545, 0.3713, 0.3823]) -Greedy action tensor([-1.5803, -0.1425, 0.4618, -0.0076]) tensor([0.0564, 0.2374, 0.4345, 0.2717]) -Greedy action tensor([-1.3244, -0.0935, 0.3474, -0.7977]) tensor([0.0874, 0.2993, 0.4652, 0.1480]) -Greedy action tensor([-1.8930, -0.4058, 0.6344, -0.1572]) tensor([0.0423, 0.1873, 0.5301, 0.2402]) -Greedy action tensor([-1.9313, -0.4128, 0.6468, -0.1741]) tensor([0.0408, 0.1861, 0.5369, 0.2363]) -Greedy action tensor([-1.9270, -0.4619, 0.6575, -0.1708]) tensor([0.0410, 0.1775, 0.5439, 0.2376]) -Greedy action tensor([-1.7729, 0.0221, 0.5272, -0.0444]) tensor([0.0442, 0.2660, 0.4409, 0.2489]) -Greedy action tensor([-1.9207, -0.4347, 0.6550, -0.1707]) tensor([0.0411, 0.1818, 0.5404, 0.2367]) -Greedy action tensor([-0.7472, 0.0807, 0.2190, -0.4436]) tensor([0.1375, 0.3147, 0.3614, 0.1863]) -Greedy action tensor([-1.7556, -0.3188, 0.5813, -0.1744]) tensor([0.0490, 0.2061, 0.5069, 0.2381]) -Greedy action tensor([-1.8113, -0.4165, 0.5981, -0.1225]) tensor([0.0464, 0.1870, 0.5157, 0.2509]) -Greedy action tensor([-1.9111, -0.4394, 0.6511, -0.1625]) tensor([0.0416, 0.1810, 0.5387, 0.2388]) -Greedy action tensor([-1.9085, -0.4455, 0.6542, -0.1612]) tensor([0.0416, 0.1797, 0.5398, 0.2388]) -Greedy action tensor([-1.9296, -0.4288, 0.6602, -0.1721]) tensor([0.0406, 0.1822, 0.5415, 0.2356]) -Greedy action tensor([-1.8169, -0.2798, 0.5608, -0.1020]) tensor([0.0455, 0.2115, 0.4903, 0.2527]) -Greedy action tensor([-1.3444, -0.2555, 0.6175, 0.1449]) tensor([0.0644, 0.1915, 0.4584, 0.2857]) -Greedy action tensor([-1.9114, -0.3823, 0.6411, -0.1593]) tensor([0.0413, 0.1905, 0.5301, 0.2381]) -Greedy action tensor([-1.9145, -0.3449, 0.6347, -0.1630]) tensor([0.0410, 0.1972, 0.5252, 0.2365]) -Greedy action tensor([-1.1983, -0.0500, 0.2601, 0.0426]) tensor([0.0840, 0.2647, 0.3609, 0.2904]) -Greedy action tensor([-1.8217, -0.2143, 0.5724, -0.1221]) tensor([0.0446, 0.2226, 0.4888, 0.2441]) -Greedy action tensor([ 0.5743, 0.0981, -0.0045, 0.0176]) tensor([0.3630, 0.2255, 0.2035, 0.2080]) -Greedy action tensor([ 0.8338, -0.4570, -0.0075, -0.2655]) tensor([0.4904, 0.1349, 0.2114, 0.1633]) -Greedy action tensor([ 0.5368, 0.1067, -0.1010, 0.0851]) tensor([0.3552, 0.2310, 0.1877, 0.2261]) -Greedy action tensor([ 0.9676, -0.4578, -0.1002, -0.5482]) tensor([0.5544, 0.1333, 0.1906, 0.1218]) -Greedy action tensor([ 0.5373, -0.0869, 0.1220, -0.0090]) tensor([0.3604, 0.1930, 0.2379, 0.2087]) -Greedy action tensor([ 1.0167, -0.7321, -0.0191, -0.4782]) tensor([0.5704, 0.0992, 0.2025, 0.1279]) -Greedy action tensor([ 0.9375, -0.7391, 0.1537, -0.4921]) tensor([0.5311, 0.0993, 0.2425, 0.1271]) -Greedy action tensor([ 0.6906, -0.6868, 0.1540, -0.4637]) tensor([0.4646, 0.1172, 0.2717, 0.1465]) -Greedy action tensor([ 0.8071, -0.5188, -0.1416, -0.2971]) tensor([0.5040, 0.1338, 0.1952, 0.1670]) -Greedy action tensor([ 0.6284, -0.3802, -0.0077, -0.2884]) tensor([0.4359, 0.1590, 0.2308, 0.1743]) -Greedy action tensor([ 0.8413, -0.3682, -0.0921, -0.1192]) tensor([0.4821, 0.1438, 0.1896, 0.1845]) -Greedy action tensor([ 1.0725, -0.8008, 0.1101, -0.4992]) tensor([0.5736, 0.0881, 0.2191, 0.1191]) -Greedy action tensor([ 0.2167, 0.0111, 0.0446, -0.3314]) tensor([0.3092, 0.2517, 0.2603, 0.1787]) -Greedy action tensor([ 0.3034, 0.3430, -0.1240, 0.0733]) tensor([0.2868, 0.2984, 0.1870, 0.2278]) -Greedy action tensor([0.4626, 0.0533, 0.0274, 0.0212]) tensor([0.3385, 0.2248, 0.2190, 0.2177]) -Greedy action tensor([ 0.9001, -0.9587, -0.1073, -0.3877]) tensor([0.5565, 0.0867, 0.2032, 0.1535]) -Greedy action tensor([ 0.8034, -0.5349, -0.2711, -0.4390]) tensor([0.5284, 0.1386, 0.1804, 0.1525]) -Greedy action tensor([ 1.0063, -0.2587, -0.1053, -0.2672]) tensor([0.5288, 0.1492, 0.1740, 0.1480]) -Greedy action tensor([ 0.9253, -0.3740, -0.0448, -0.3805]) tensor([0.5201, 0.1418, 0.1971, 0.1409]) -Greedy action tensor([ 0.6679, -0.1703, -0.2058, 0.0539]) tensor([0.4182, 0.1809, 0.1746, 0.2263]) -Greedy action tensor([ 0.6767, -0.3646, -0.1046, -0.3099]) tensor([0.4579, 0.1617, 0.2097, 0.1708]) -Greedy action tensor([ 1.1328, -0.5872, -0.0560, -0.5849]) tensor([0.6013, 0.1077, 0.1831, 0.1079]) -Greedy action tensor([ 0.8711, -0.3491, 0.0265, -0.4094]) tensor([0.4993, 0.1474, 0.2146, 0.1388]) -Greedy action tensor([ 1.1150, -0.7436, -0.0124, -0.5273]) tensor([0.5976, 0.0932, 0.1936, 0.1157]) -Greedy action tensor([ 0.7780, -0.6866, 0.0200, -0.3192]) tensor([0.4917, 0.1137, 0.2304, 0.1641]) -Greedy action tensor([ 0.4148, -0.3986, -0.1652, 0.0652]) tensor([0.3693, 0.1637, 0.2067, 0.2603]) -Greedy action tensor([ 0.2402, -0.1838, -0.0785, -0.2218]) tensor([0.3321, 0.2173, 0.2414, 0.2092]) -Greedy action tensor([ 0.5066, -0.4532, 0.0664, -0.4893]) tensor([0.4173, 0.1598, 0.2687, 0.1542]) -Greedy action tensor([ 0.9838, -0.7000, 0.1214, -0.5197]) tensor([0.5464, 0.1014, 0.2307, 0.1215]) -Greedy action tensor([ 0.7837, -0.2915, 0.0848, -0.1078]) tensor([0.4448, 0.1518, 0.2211, 0.1824]) -Greedy action tensor([ 0.8145, -0.5476, -0.1188, -0.1882]) tensor([0.4960, 0.1270, 0.1950, 0.1820]) -Greedy action tensor([ 0.5517, -0.6875, -0.1439, -0.2131]) tensor([0.4437, 0.1285, 0.2213, 0.2065]) -Greedy action tensor([ 1.1833, -0.4700, -0.1778, -0.5274]) tensor([0.6141, 0.1175, 0.1574, 0.1110]) -Greedy action tensor([ 1.0256, -0.9026, 0.0997, -0.6024]) tensor([0.5754, 0.0837, 0.2280, 0.1130]) -Greedy action tensor([ 0.1781, 0.2525, -0.0665, -0.0931]) tensor([0.2760, 0.2974, 0.2161, 0.2105]) -Greedy action tensor([ 0.4391, -0.1040, -0.0005, -0.0707]) tensor([0.3539, 0.2056, 0.2280, 0.2125]) -Greedy action tensor([ 0.8795, -0.6975, 0.0586, -0.3696]) tensor([0.5172, 0.1069, 0.2276, 0.1483]) -Greedy action tensor([ 0.8918, -1.0496, 0.0394, -0.5877]) tensor([0.5563, 0.0798, 0.2372, 0.1267]) -Greedy action tensor([ 0.9657, -0.5726, -0.1526, -0.6472]) tensor([0.5744, 0.1234, 0.1877, 0.1145]) -Greedy action tensor([ 0.5778, -0.3518, -0.0894, -0.1933]) tensor([0.4219, 0.1665, 0.2165, 0.1951]) -Greedy action tensor([ 0.4444, -0.0734, -0.0115, -0.5074]) tensor([0.3823, 0.2278, 0.2423, 0.1476]) -Greedy action tensor([ 0.7581, -0.1103, -0.0822, -0.0019]) tensor([0.4313, 0.1810, 0.1861, 0.2017]) -Greedy action tensor([ 1.3378, -0.7886, -0.1166, -0.5401]) tensor([0.6641, 0.0792, 0.1551, 0.1016]) -Greedy action tensor([ 1.0965, -0.8355, -0.0370, -0.5912]) tensor([0.6054, 0.0877, 0.1949, 0.1120]) -Greedy action tensor([ 0.5126, 0.1550, 0.1159, -0.1722]) tensor([0.3477, 0.2432, 0.2338, 0.1753]) -Greedy action tensor([ 0.8792, -0.1460, -0.0821, 0.0257]) tensor([0.4615, 0.1655, 0.1765, 0.1965]) -Greedy action tensor([ 0.8003, -0.1822, -0.0407, -0.0329]) tensor([0.4464, 0.1671, 0.1925, 0.1940]) -Greedy action tensor([ 0.9782, -0.9921, 0.0771, -0.5371]) tensor([0.5665, 0.0790, 0.2301, 0.1245]) -Greedy action tensor([ 1.2194, -0.9996, -0.0018, -0.5427]) tensor([0.6348, 0.0690, 0.1872, 0.1090]) -Greedy action tensor([ 0.9351, -0.6323, 0.1067, -0.3140]) tensor([0.5176, 0.1080, 0.2260, 0.1484]) -Greedy action tensor([ 0.8906, -0.0928, -0.1142, -0.0313]) tensor([0.4678, 0.1749, 0.1712, 0.1861]) -Greedy action tensor([ 0.5172, -0.2787, -0.0766, -0.0651]) tensor([0.3903, 0.1761, 0.2155, 0.2180]) -Greedy action tensor([ 0.7825, -0.4130, 0.0139, -0.3581]) tensor([0.4794, 0.1451, 0.2223, 0.1532]) -Greedy action tensor([ 0.7542, -0.5885, -0.0630, -0.5148]) tensor([0.5041, 0.1316, 0.2226, 0.1417]) -Greedy action tensor([ 0.2932, -0.0391, 0.1082, -0.0941]) tensor([0.3099, 0.2222, 0.2575, 0.2104]) -Greedy action tensor([ 0.5134, 0.0029, -0.1108, -0.0033]) tensor([0.3660, 0.2197, 0.1961, 0.2183]) -Greedy action tensor([ 0.5364, -0.2071, -0.0702, -0.0010]) tensor([0.3839, 0.1825, 0.2093, 0.2243]) -Greedy action tensor([ 0.8534, -0.5521, -0.1118, -0.3208]) tensor([0.5167, 0.1267, 0.1968, 0.1597]) -Greedy action tensor([ 0.8936, -0.5521, 0.0764, -0.4081]) tensor([0.5130, 0.1209, 0.2266, 0.1396]) -Greedy action tensor([ 1.0821, -0.5507, -0.1221, -0.5547]) tensor([0.5917, 0.1156, 0.1775, 0.1152]) -Greedy action tensor([ 0.6381, -0.4471, -0.0734, -0.2282]) tensor([0.4446, 0.1502, 0.2183, 0.1870]) -Greedy action tensor([ 0.9892, -0.5515, -0.0823, -0.5350]) tensor([0.5635, 0.1207, 0.1930, 0.1227]) -Greedy action tensor([ 0.8559, -0.6015, -0.0814, -0.3140]) tensor([0.5168, 0.1203, 0.2024, 0.1604]) -Greedy action tensor([ 0.3120, -0.0531, -0.0283, -0.0762]) tensor([0.3243, 0.2251, 0.2307, 0.2199]) -Greedy action tensor([ 0.9315, -0.4815, -0.0598, -0.4340]) tensor([0.5348, 0.1302, 0.1985, 0.1365]) -Greedy action tensor([ 1.0054, -0.1699, 0.0855, -0.0338]) tensor([0.4852, 0.1498, 0.1934, 0.1716]) -Greedy action tensor([ 1.1414, -0.7230, -0.0926, -0.6143]) tensor([0.6177, 0.0957, 0.1798, 0.1067]) -Greedy action tensor([ 0.8294, -0.5822, -0.0910, -0.2390]) tensor([0.5036, 0.1228, 0.2006, 0.1730]) -Greedy action tensor([ 1.3971, -1.0400, 0.0467, -0.8480]) tensor([0.6885, 0.0602, 0.1784, 0.0729]) -Greedy action tensor([ 0.8218, -0.6879, 0.0265, -0.3097]) tensor([0.5013, 0.1108, 0.2263, 0.1617]) -Greedy action tensor([ 1.2361, -0.7078, -0.1384, -0.3124]) tensor([0.6216, 0.0890, 0.1572, 0.1321]) -Greedy action tensor([ 1.0092, -0.7502, 0.1366, -0.3414]) tensor([0.5408, 0.0931, 0.2260, 0.1401]) -Greedy action tensor([ 0.8543, -0.1740, 0.0129, -0.0655]) tensor([0.4572, 0.1635, 0.1971, 0.1822]) -Greedy action tensor([ 1.0843, -0.5873, -0.0769, -0.7708]) tensor([0.6033, 0.1134, 0.1889, 0.0944]) -Greedy action tensor([ 1.2116, -0.7661, 0.1197, -0.7182]) tensor([0.6176, 0.0855, 0.2073, 0.0897]) -Greedy action tensor([ 1.0351, -0.8293, 0.0315, -0.4497]) tensor([0.5720, 0.0887, 0.2097, 0.1296]) -Greedy action tensor([ 0.8181, -0.5037, -0.0851, -0.3246]) tensor([0.5023, 0.1339, 0.2036, 0.1602]) -Greedy action tensor([ 0.4344, 0.1530, -0.0766, 0.0428]) tensor([0.3300, 0.2490, 0.1980, 0.2230]) -Greedy action tensor([ 1.0098, -0.8116, 0.0285, -0.5357]) tensor([0.5715, 0.0925, 0.2142, 0.1219]) -Greedy action tensor([ 0.9650, -0.6224, 0.0958, -0.3675]) tensor([0.5298, 0.1083, 0.2221, 0.1398]) -Greedy action tensor([ 0.9840, -0.9030, 0.1585, -0.9686]) tensor([0.5776, 0.0875, 0.2530, 0.0820]) -Greedy action tensor([ 0.8127, -0.2186, -0.0130, 0.6496]) tensor([0.3782, 0.1348, 0.1656, 0.3213]) -Greedy action tensor([-0.6526, -1.0338, 0.1649, -0.7865]) tensor([0.2074, 0.1416, 0.4696, 0.1814]) -Greedy action tensor([-0.3263, -0.0823, 0.9262, -0.4566]) tensor([0.1503, 0.1918, 0.5259, 0.1319]) -Greedy action tensor([ 0.9042, -1.5150, -0.2776, 0.1669]) tensor([0.5336, 0.0475, 0.1637, 0.2553]) -Greedy action tensor([ 0.5475, -0.4342, 0.2640, -0.3444]) tensor([0.3941, 0.1476, 0.2968, 0.1615]) -Greedy action tensor([ 0.5109, -0.2463, -0.5042, 0.2066]) tensor([0.3893, 0.1825, 0.1411, 0.2871]) -Greedy action tensor([ 1.1559, -0.5025, 0.6656, 0.6382]) tensor([0.4169, 0.0794, 0.2553, 0.2484]) -Greedy action tensor([ 0.2538, -0.2365, 0.0632, 0.1047]) tensor([0.3030, 0.1856, 0.2504, 0.2610]) -Greedy action tensor([ 0.6278, -1.5691, 0.5149, -0.3277]) tensor([0.4186, 0.0465, 0.3739, 0.1610]) -Greedy action tensor([ 0.4170, -1.9486, -0.5266, -0.1357]) tensor([0.4858, 0.0456, 0.1891, 0.2795]) -Greedy action tensor([-0.4015, -0.9021, 0.4040, -1.2505]) tensor([0.2341, 0.1419, 0.5239, 0.1001]) -Greedy action tensor([-0.3102, -1.2769, 0.0301, -0.0172]) tensor([0.2424, 0.0922, 0.3406, 0.3249]) -Greedy action tensor([-0.9358, -0.1994, -1.1549, -0.2706]) tensor([0.1713, 0.3578, 0.1376, 0.3332]) -Greedy action tensor([-0.5529, 0.2050, -0.2881, -0.9095]) tensor([0.1947, 0.4154, 0.2537, 0.1363]) -Greedy action tensor([-0.4579, 0.3075, 0.6112, -1.1095]) tensor([0.1519, 0.3265, 0.4424, 0.0792]) -Greedy action tensor([ 0.4702, -0.3805, -0.5935, 0.4097]) tensor([0.3685, 0.1574, 0.1272, 0.3469]) -Greedy action tensor([-0.0636, -1.0007, -0.1083, -0.5471]) tensor([0.3373, 0.1321, 0.3226, 0.2080]) -Greedy action tensor([-0.9899, -0.5511, -1.5880, 0.1706]) tensor([0.1589, 0.2465, 0.0874, 0.5072]) -Greedy action tensor([ 1.0222, -0.7635, -0.1759, -0.0169]) tensor([0.5485, 0.0920, 0.1655, 0.1940]) -Greedy action tensor([ 0.3723, -0.9216, -0.6098, -0.7961]) tensor([0.5103, 0.1399, 0.1911, 0.1586]) -Greedy action tensor([-0.6237, -0.7927, 0.3886, -0.5975]) tensor([0.1778, 0.1502, 0.4894, 0.1826]) -Greedy action tensor([-1.1118, -0.0125, -0.5864, -0.8593]) tensor([0.1433, 0.4301, 0.2423, 0.1844]) -Greedy action tensor([ 0.1494, -0.6962, 0.6738, 0.0099]) tensor([0.2507, 0.1076, 0.4236, 0.2181]) -Greedy action tensor([-0.3270, -0.1763, 0.8566, -0.5007]) tensor([0.1595, 0.1854, 0.5210, 0.1341]) -Greedy action tensor([-0.7551, -0.3463, 0.3452, -0.4543]) tensor([0.1458, 0.2194, 0.4380, 0.1969]) -Greedy action tensor([-0.1150, 0.6577, -0.8717, -0.4648]) tensor([0.2304, 0.4990, 0.1081, 0.1624]) -Greedy action tensor([-0.7631, -0.2782, 0.3533, -0.6915]) tensor([0.1481, 0.2405, 0.4523, 0.1591]) -Greedy action tensor([-0.4398, -1.1662, 0.5187, -0.8488]) tensor([0.2103, 0.1017, 0.5483, 0.1397]) -Greedy action tensor([ 1.9422, -0.8150, 0.9182, 1.0383]) tensor([0.5472, 0.0347, 0.1965, 0.2216]) -Greedy action tensor([-0.4437, -1.0378, 0.6898, -0.9354]) tensor([0.1897, 0.1047, 0.5895, 0.1161]) -Greedy action tensor([ 0.5508, 0.6773, -0.8699, -0.5829]) tensor([0.3706, 0.4206, 0.0895, 0.1193]) -Greedy action tensor([ 0.8422, -0.3134, 0.4300, -0.0914]) tensor([0.4219, 0.1329, 0.2794, 0.1659]) -Greedy action tensor([-0.3659, -0.5395, 0.2632, -1.1033]) tensor([0.2384, 0.2004, 0.4472, 0.1140]) -Greedy action tensor([ 0.3491, -0.1125, 0.1876, 0.6972]) tensor([0.2566, 0.1617, 0.2183, 0.3634]) -Greedy action tensor([ 0.4691, -1.8271, 0.1611, -0.5450]) tensor([0.4549, 0.0458, 0.3343, 0.1650]) -Greedy action tensor([-0.2950, -0.5311, -0.9879, -0.1240]) tensor([0.2877, 0.2272, 0.1439, 0.3413]) -Greedy action tensor([ 0.5663, 0.5225, -0.5640, 0.8574]) tensor([0.2764, 0.2645, 0.0893, 0.3698]) -Greedy action tensor([ 0.5022, 0.1790, 0.8763, -0.6881]) tensor([0.2872, 0.2079, 0.4175, 0.0874]) -Greedy action tensor([ 0.1022, -0.7507, 0.3817, -0.8586]) tensor([0.3194, 0.1361, 0.4224, 0.1222]) -Greedy action tensor([-1.2020, -0.5213, 0.2669, -0.8957]) tensor([0.1152, 0.2276, 0.5006, 0.1565]) -Greedy action tensor([-0.4632, 0.1463, 0.1634, -0.9840]) tensor([0.1885, 0.3468, 0.3527, 0.1120]) -Greedy action tensor([-0.9178, 0.3718, -0.4031, -0.8718]) tensor([0.1360, 0.4939, 0.2276, 0.1424]) -Greedy action tensor([ 0.3613, -0.6724, 0.0903, -0.8253]) tensor([0.4126, 0.1468, 0.3147, 0.1260]) -Greedy action tensor([ 0.0598, 0.0487, 0.0014, -0.1690]) tensor([0.2683, 0.2653, 0.2530, 0.2134]) -Greedy action tensor([-0.4173, -0.8384, 0.5181, -0.3733]) tensor([0.1905, 0.1250, 0.4854, 0.1991]) -Greedy action tensor([ 0.3440, -0.8212, -0.1216, 0.6057]) tensor([0.3088, 0.0963, 0.1938, 0.4011]) -Greedy action tensor([ 0.1127, -0.7401, 0.2247, -0.6242]) tensor([0.3308, 0.1410, 0.3700, 0.1583]) -Greedy action tensor([ 0.8226, -0.2248, 0.2651, -0.5712]) tensor([0.4605, 0.1616, 0.2637, 0.1143]) -Greedy action tensor([-0.4113, -1.0848, 0.8151, -0.7859]) tensor([0.1784, 0.0909, 0.6080, 0.1226]) -Greedy action tensor([-0.2149, -1.0485, 0.5020, -0.1303]) tensor([0.2188, 0.0951, 0.4481, 0.2381]) -Greedy action tensor([-0.2463, -0.3332, 0.9586, -0.5270]) tensor([0.1664, 0.1526, 0.5553, 0.1257]) -Greedy action tensor([-0.9132, -0.4717, -0.3239, 0.3450]) tensor([0.1270, 0.1974, 0.2289, 0.4468]) -Greedy action tensor([-0.0968, -1.7487, -1.0499, 1.3061]) tensor([0.1772, 0.0340, 0.0683, 0.7206]) -Greedy action tensor([-0.4371, 0.1541, -0.4643, 0.2797]) tensor([0.1716, 0.3099, 0.1670, 0.3514]) -Greedy action tensor([ 0.2451, 0.8562, -0.5467, -0.9872]) tensor([0.2788, 0.5136, 0.1263, 0.0813]) -Greedy action tensor([ 0.1561, -0.4555, 0.4144, -0.1506]) tensor([0.2799, 0.1518, 0.3624, 0.2060]) -Greedy action tensor([-0.0273, -0.4143, -0.3800, -0.3426]) tensor([0.3214, 0.2183, 0.2259, 0.2345]) -Greedy action tensor([ 0.5653, 0.3580, 0.7349, -0.2364]) tensor([0.2902, 0.2359, 0.3438, 0.1302]) -Greedy action tensor([-0.7822, -0.8311, -0.5970, -0.1864]) tensor([0.2012, 0.1916, 0.2421, 0.3651]) -Greedy action tensor([-1.6880, -0.1019, -0.1604, -1.1763]) tensor([0.0822, 0.4017, 0.3789, 0.1372]) -Greedy action tensor([0.0619, 0.6109, 0.1182, 0.0727]) tensor([0.2083, 0.3607, 0.2204, 0.2106]) -Greedy action tensor([-0.5593, -0.4315, 0.6044, -0.7075]) tensor([0.1613, 0.1833, 0.5164, 0.1391]) -Greedy action tensor([-1.0122, -0.1289, -0.1434, -1.3591]) tensor([0.1536, 0.3716, 0.3662, 0.1086]) -Greedy action tensor([-0.0177, -0.3305, 0.2959, -0.8610]) tensor([0.2833, 0.2072, 0.3876, 0.1219]) -Greedy action tensor([-0.9780, 0.0356, -1.1019, -0.1372]) tensor([0.1437, 0.3961, 0.1270, 0.3332]) -Greedy action tensor([ 0.5936, -0.2034, -0.8123, -0.6778]) tensor([0.5060, 0.2280, 0.1240, 0.1419]) -Greedy action tensor([ 0.1099, -0.1590, -0.0770, -0.5753]) tensor([0.3228, 0.2467, 0.2678, 0.1627]) -Greedy action tensor([-0.3318, 0.0571, -0.0153, -0.9476]) tensor([0.2279, 0.3362, 0.3128, 0.1231]) -Greedy action tensor([ 0.5712, -0.6840, 1.2641, -0.4476]) tensor([0.2743, 0.0782, 0.5485, 0.0990]) -Greedy action tensor([ 0.1963, 0.7327, 0.2600, -0.1429]) tensor([0.2228, 0.3810, 0.2375, 0.1587]) -Greedy action tensor([-0.1711, 0.3900, 0.9329, -0.6179]) tensor([0.1560, 0.2735, 0.4707, 0.0998]) -Greedy action tensor([ 0.8836, -1.0984, 0.6182, 0.1380]) tensor([0.4203, 0.0579, 0.3223, 0.1994]) -Greedy action tensor([ 0.0646, -1.3527, 0.1016, 0.2422]) tensor([0.2878, 0.0698, 0.2987, 0.3438]) -Greedy action tensor([ 0.1470, -0.3279, 0.3579, 0.4188]) tensor([0.2399, 0.1492, 0.2962, 0.3148]) -Greedy action tensor([-0.5915, -0.4382, -0.0637, 0.1801]) tensor([0.1660, 0.1935, 0.2814, 0.3591]) -Greedy action tensor([ 0.1366, -0.0910, -0.8559, -0.3366]) tensor([0.3584, 0.2855, 0.1328, 0.2233]) -Greedy action tensor([-0.2704, -0.3379, -0.7933, 0.1523]) tensor([0.2467, 0.2306, 0.1462, 0.3765]) -Greedy action tensor([-0.1648, 0.1215, -0.2997, -0.4719]) tensor([0.2538, 0.3379, 0.2217, 0.1867]) -Greedy action tensor([-0.0395, -0.7781, -0.3836, 0.7186]) tensor([0.2314, 0.1106, 0.1641, 0.4939]) -Greedy action tensor([ 1.3920, -0.1259, -0.0920, -0.2107]) tensor([0.6071, 0.1330, 0.1376, 0.1222]) -Greedy action tensor([-0.1266, -0.2543, 0.7431, -0.8521]) tensor([0.2105, 0.1853, 0.5023, 0.1019]) -Greedy action tensor([ 1.0881, -0.8226, -0.5516, 0.8910]) tensor([0.4623, 0.0684, 0.0897, 0.3796]) -Greedy action tensor([ 2.1764, -0.0544, -0.4251, 0.4368]) tensor([0.7368, 0.0792, 0.0546, 0.1294]) -Greedy action tensor([ 1.4559, -0.4281, -0.6368, -0.1149]) tensor([0.6742, 0.1025, 0.0832, 0.1402]) -Greedy action tensor([ 1.8317, -0.5170, -0.2823, 0.6145]) tensor([0.6612, 0.0631, 0.0798, 0.1958]) -Greedy action tensor([ 1.2703, -0.1181, -0.3069, -0.1257]) tensor([0.5870, 0.1464, 0.1212, 0.1453]) -Greedy action tensor([ 1.0293, -0.0200, -0.4645, -0.0720]) tensor([0.5243, 0.1836, 0.1177, 0.1743]) -Greedy action tensor([ 1.2675, -0.1691, -0.5063, 0.0517]) tensor([0.5869, 0.1395, 0.0996, 0.1740]) -Greedy action tensor([ 1.3788, -0.3783, -0.6346, -0.0559]) tensor([0.6476, 0.1117, 0.0865, 0.1542]) -Greedy action tensor([ 1.7642, -0.6508, -0.5138, 0.5193]) tensor([0.6758, 0.0604, 0.0693, 0.1946]) -Greedy action tensor([ 2.1424, -0.4166, -0.7419, 0.5553]) tensor([0.7475, 0.0578, 0.0418, 0.1529]) -Greedy action tensor([ 1.0997, -0.4928, -0.4181, 0.5045]) tensor([0.5066, 0.1030, 0.1110, 0.2793]) -Greedy action tensor([ 1.4862, -0.4232, -0.7137, 0.1822]) tensor([0.6534, 0.0968, 0.0724, 0.1774]) -Greedy action tensor([ 1.4535, -0.3464, -1.2347, -0.0108]) tensor([0.6828, 0.1129, 0.0464, 0.1579]) -Greedy action tensor([ 2.2776, 0.7250, 0.1300, -0.0703]) tensor([0.7022, 0.1487, 0.0820, 0.0671]) -Greedy action tensor([ 1.1310, 0.0316, 0.0082, -0.1391]) tensor([0.5157, 0.1718, 0.1678, 0.1448]) -Greedy action tensor([ 1.7684, -0.6611, -0.5691, 0.9766]) tensor([0.6106, 0.0538, 0.0590, 0.2766]) -Greedy action tensor([ 1.7518, -0.7053, -0.3708, 0.2011]) tensor([0.7055, 0.0604, 0.0845, 0.1496]) -Greedy action tensor([ 1.6202, -0.8787, -0.3988, 0.0971]) tensor([0.6978, 0.0573, 0.0927, 0.1522]) -Greedy action tensor([ 1.0797, -0.5979, -0.2807, 0.3980]) tensor([0.5131, 0.0959, 0.1316, 0.2595]) -Greedy action tensor([ 1.3254, -0.4958, -0.4330, 0.6596]) tensor([0.5411, 0.0876, 0.0932, 0.2781]) -Greedy action tensor([ 1.9911, -1.2509, -0.2992, 0.5464]) tensor([0.7267, 0.0284, 0.0736, 0.1714]) -Greedy action tensor([ 1.1969, 0.2902, -0.5223, 0.3750]) tensor([0.4944, 0.1997, 0.0886, 0.2173]) -Greedy action tensor([ 1.4292, 0.5782, 0.1596, -0.5272]) tensor([0.5407, 0.2309, 0.1519, 0.0764]) -Greedy action tensor([ 1.2922, -0.5586, -0.1124, 0.1663]) tensor([0.5791, 0.0910, 0.1421, 0.1878]) -Greedy action tensor([ 1.4997, -0.4611, -0.2039, 0.1524]) tensor([0.6318, 0.0889, 0.1150, 0.1642]) -Greedy action tensor([ 1.3347, -0.4047, -0.3976, 0.3409]) tensor([0.5805, 0.1019, 0.1027, 0.2149]) -Greedy action tensor([ 1.6497, -0.4258, -0.8772, 0.2648]) tensor([0.6869, 0.0862, 0.0549, 0.1720]) -Greedy action tensor([ 1.4632, 0.0086, -0.6689, -0.0818]) tensor([0.6388, 0.1492, 0.0758, 0.1363]) -Greedy action tensor([ 1.4330, -0.4728, -0.5176, 0.6807]) tensor([0.5675, 0.0844, 0.0807, 0.2674]) -Greedy action tensor([ 1.7315, -0.4840, -0.1675, 0.4348]) tensor([0.6526, 0.0712, 0.0977, 0.1784]) -Greedy action tensor([ 1.8702, -1.2413, -0.0198, 0.2535]) tensor([0.7173, 0.0319, 0.1084, 0.1424]) -Greedy action tensor([ 1.4322, -0.6167, -0.1951, 0.1642]) tensor([0.6224, 0.0802, 0.1223, 0.1751]) -Greedy action tensor([ 1.3744, -0.6445, -0.3262, 0.1286]) tensor([0.6238, 0.0828, 0.1139, 0.1795]) -Greedy action tensor([ 1.6669, -0.0341, -0.3171, 0.4710]) tensor([0.6163, 0.1125, 0.0848, 0.1864]) -Greedy action tensor([ 1.1910, -0.7592, -0.2894, 0.1263]) tensor([0.5832, 0.0830, 0.1327, 0.2011]) -Greedy action tensor([ 2.1879, -0.7513, -0.6325, 0.5201]) tensor([0.7685, 0.0407, 0.0458, 0.1450]) -Greedy action tensor([ 1.0356, -0.2003, -0.6545, 0.0478]) tensor([0.5413, 0.1573, 0.0999, 0.2016]) -Greedy action tensor([ 1.6588, -1.0016, -0.2109, 0.4173]) tensor([0.6609, 0.0462, 0.1019, 0.1910]) -Greedy action tensor([ 1.1929, -0.1963, -0.8181, 0.0429]) tensor([0.5883, 0.1467, 0.0788, 0.1863]) -Greedy action tensor([ 0.8934, -0.2942, -0.3983, 0.1183]) tensor([0.4901, 0.1495, 0.1347, 0.2258]) -Greedy action tensor([ 1.7300, -0.4389, -0.4264, 0.3490]) tensor([0.6751, 0.0772, 0.0781, 0.1696]) -Greedy action tensor([ 1.5626, -0.9463, 0.0269, 0.6707]) tensor([0.5860, 0.0477, 0.1262, 0.2402]) -Greedy action tensor([ 1.3282, -0.4939, -0.4318, 0.1708]) tensor([0.6068, 0.0981, 0.1044, 0.1907]) -Greedy action tensor([ 1.4065, -0.5685, -0.7526, 0.1420]) tensor([0.6508, 0.0903, 0.0751, 0.1838]) -Greedy action tensor([ 1.9927, -0.9121, -0.3938, 0.5708]) tensor([0.7205, 0.0395, 0.0662, 0.1738]) -Greedy action tensor([ 1.7043, -0.8220, 0.1752, 0.2676]) tensor([0.6517, 0.0521, 0.1412, 0.1549]) -Greedy action tensor([ 2.0181, -0.6787, -0.8297, 0.3702]) tensor([0.7588, 0.0512, 0.0440, 0.1460]) -Greedy action tensor([ 1.4401, -0.8709, -0.1481, 0.6850]) tensor([0.5639, 0.0559, 0.1152, 0.2650]) -Greedy action tensor([ 1.7836, -0.7559, -0.3984, 0.1824]) tensor([0.7177, 0.0566, 0.0810, 0.1447]) -Greedy action tensor([ 1.2697, -0.4201, -0.5302, 0.1191]) tensor([0.6001, 0.1108, 0.0992, 0.1899]) -Greedy action tensor([ 1.6111, -0.2540, -0.6156, 0.5019]) tensor([0.6279, 0.0973, 0.0677, 0.2071]) -Greedy action tensor([ 1.5381, -0.7920, -0.5245, 0.1972]) tensor([0.6729, 0.0655, 0.0855, 0.1760]) -Greedy action tensor([ 0.8200, -0.5197, -0.2459, 0.3185]) tensor([0.4521, 0.1184, 0.1557, 0.2738]) -Greedy action tensor([ 1.5436, -0.4386, -0.5428, 0.3217]) tensor([0.6424, 0.0885, 0.0797, 0.1893]) -Greedy action tensor([ 1.0071, -0.2686, -0.7132, 0.5073]) tensor([0.4843, 0.1352, 0.0867, 0.2938]) -Greedy action tensor([ 1.2347, -0.0783, -0.9398, 0.2913]) tensor([0.5643, 0.1518, 0.0641, 0.2197]) -Greedy action tensor([ 1.3127, -0.0977, -0.3581, 0.2478]) tensor([0.5628, 0.1374, 0.1059, 0.1940]) -Greedy action tensor([ 0.9651, -0.6055, -0.3967, 0.2141]) tensor([0.5165, 0.1074, 0.1323, 0.2437]) -Greedy action tensor([ 1.0009, -0.3849, -0.1994, 0.0989]) tensor([0.5110, 0.1278, 0.1539, 0.2073]) -Greedy action tensor([ 1.7692, -0.7759, -0.4795, 0.6603]) tensor([0.6605, 0.0518, 0.0697, 0.2179]) -Greedy action tensor([ 1.6167, -0.6463, -0.3556, 0.6831]) tensor([0.6111, 0.0636, 0.0850, 0.2403]) -Greedy action tensor([ 1.3687, -0.3884, -0.4094, 0.4331]) tensor([0.5768, 0.0995, 0.0974, 0.2263]) -Greedy action tensor([ 1.0674, -0.5321, -0.2017, 0.5110]) tensor([0.4863, 0.0982, 0.1367, 0.2788]) -Greedy action tensor([ 2.2017, -0.9426, -0.6353, 0.5441]) tensor([0.7738, 0.0333, 0.0453, 0.1475]) -Greedy action tensor([ 1.5286, -0.7563, -0.5424, 0.0032]) tensor([0.6919, 0.0704, 0.0872, 0.1505]) -Greedy action tensor([ 1.8789, -0.3419, -0.4167, 0.7310]) tensor([0.6551, 0.0711, 0.0660, 0.2079]) -Greedy action tensor([ 1.8189, -0.7087, -0.1452, 0.6763]) tensor([0.6497, 0.0519, 0.0911, 0.2073]) -Greedy action tensor([ 1.8565, -0.5808, -0.0681, -0.1513]) tensor([0.7312, 0.0639, 0.1067, 0.0982]) -Greedy action tensor([ 1.2718, -0.2968, -0.4984, 0.2580]) tensor([0.5742, 0.1196, 0.0978, 0.2084]) -Greedy action tensor([ 1.4718, -0.9250, -0.4147, -0.1014]) tensor([0.6897, 0.0628, 0.1045, 0.1430]) -Greedy action tensor([ 1.0350, -0.3895, -0.1767, 0.2135]) tensor([0.5055, 0.1216, 0.1505, 0.2223]) -Greedy action tensor([ 1.9917, 0.2507, -0.1396, -0.3187]) tensor([0.7177, 0.1259, 0.0852, 0.0712]) -Greedy action tensor([ 1.4097, -0.1943, -0.6628, 0.2245]) tensor([0.6125, 0.1232, 0.0771, 0.1872]) -Greedy action tensor([ 1.4687, -1.0274, -0.2839, 0.6198]) tensor([0.5940, 0.0489, 0.1029, 0.2541]) -Greedy action tensor([ 3.2181, -1.5317, -0.1019, 1.2245]) tensor([0.8467, 0.0073, 0.0306, 0.1153]) -Greedy action tensor([ 1.2200, -0.4421, -0.1568, 0.2817]) tensor([0.5454, 0.1035, 0.1377, 0.2134]) -Greedy action tensor([ 1.9795, -1.2420, -0.3178, 0.3551]) tensor([0.7477, 0.0298, 0.0752, 0.1473]) -Greedy action tensor([ 1.7761, -0.6902, -0.2920, 0.2519]) tensor([0.6997, 0.0594, 0.0885, 0.1524]) -Greedy action tensor([ 1.3335, -0.1006, -0.9742, 0.1538]) tensor([0.6078, 0.1449, 0.0605, 0.1868]) -Greedy action tensor([ 1.9438, -0.4187, -0.3461, 0.3195]) tensor([0.7181, 0.0676, 0.0727, 0.1415]) -Greedy action tensor([ 1.7289, -0.8491, -0.3127, 0.1082]) tensor([0.7125, 0.0541, 0.0925, 0.1409]) -Greedy action tensor([-1.9139, -0.4299, 0.6473, -0.1664]) tensor([0.0415, 0.1830, 0.5374, 0.2382]) -Greedy action tensor([-1.9124, -0.3976, 0.6502, -0.1620]) tensor([0.0412, 0.1874, 0.5343, 0.2371]) -Greedy action tensor([-1.8963, -0.4743, 0.6468, -0.1595]) tensor([0.0425, 0.1761, 0.5402, 0.2412]) -Greedy action tensor([-1.8336, -0.4172, 0.7711, 0.1446]) tensor([0.0386, 0.1593, 0.5227, 0.2794]) -Greedy action tensor([-1.9211, -0.4434, 0.6554, -0.1693]) tensor([0.0412, 0.1804, 0.5412, 0.2373]) -Greedy action tensor([-1.9144, -0.4371, 0.6549, -0.1636]) tensor([0.0413, 0.1811, 0.5396, 0.2380]) -Greedy action tensor([-1.0563, -0.5775, 0.6141, -0.0238]) tensor([0.0931, 0.1503, 0.4950, 0.2615]) -Greedy action tensor([-1.7045, -0.4573, 0.5261, -0.0422]) tensor([0.0525, 0.1826, 0.4883, 0.2766]) -Greedy action tensor([-1.9282, -0.4280, 0.6583, -0.1697]) tensor([0.0407, 0.1825, 0.5406, 0.2362]) -Greedy action tensor([-1.9086, -0.4019, 0.6449, -0.1591]) tensor([0.0415, 0.1871, 0.5329, 0.2385]) -Greedy action tensor([-1.8003, -0.4987, 0.7565, 0.0745]) tensor([0.0415, 0.1526, 0.5353, 0.2706]) -Greedy action tensor([-1.3742, 0.7711, 0.3077, -0.0788]) tensor([0.0538, 0.4601, 0.2894, 0.1967]) -Greedy action tensor([-1.9002, -0.4530, 0.6481, -0.1605]) tensor([0.0421, 0.1791, 0.5387, 0.2400]) -Greedy action tensor([-1.9073, -0.4567, 0.6546, -0.1627]) tensor([0.0418, 0.1781, 0.5412, 0.2390]) -Greedy action tensor([-1.5608, -0.3898, 0.4746, -0.0600]) tensor([0.0611, 0.1971, 0.4677, 0.2741]) -Greedy action tensor([-1.9236, -0.4394, 0.6561, -0.1700]) tensor([0.0410, 0.1809, 0.5411, 0.2369]) -Greedy action tensor([-1.8053, -0.4476, 0.5978, -0.1270]) tensor([0.0469, 0.1825, 0.5191, 0.2515]) -Greedy action tensor([-0.3736, 0.8551, -0.0652, 0.3542]) tensor([0.1274, 0.4353, 0.1734, 0.2638]) -Greedy action tensor([-1.7798, -0.3514, 0.5798, -0.1558]) tensor([0.0480, 0.2003, 0.5082, 0.2435]) -Greedy action tensor([-1.6963, -0.3224, 0.6483, -0.0766]) tensor([0.0489, 0.1934, 0.5105, 0.2472]) -Greedy action tensor([-1.8993, -0.4286, 0.6439, -0.1587]) tensor([0.0421, 0.1831, 0.5350, 0.2398]) -Greedy action tensor([-0.2401, 0.9431, -0.2714, 0.0432]) tensor([0.1524, 0.4976, 0.1477, 0.2023]) -Greedy action tensor([-1.8865, -0.3877, 0.6373, -0.1482]) tensor([0.0423, 0.1894, 0.5277, 0.2406]) -Greedy action tensor([-0.1198, 1.1729, 0.0047, 0.4685]) tensor([0.1320, 0.4808, 0.1495, 0.2377]) -Greedy action tensor([-1.5647, -0.3005, 0.4891, -0.1137]) tensor([0.0602, 0.2132, 0.4696, 0.2570]) -Greedy action tensor([-1.8411, -0.3159, 0.6421, -0.1121]) tensor([0.0431, 0.1980, 0.5161, 0.2428]) -Greedy action tensor([-1.8714, -0.4444, 0.6309, -0.1349]) tensor([0.0434, 0.1807, 0.5297, 0.2463]) -Greedy action tensor([-1.0917, -0.0278, 0.5882, 0.1783]) tensor([0.0780, 0.2260, 0.4184, 0.2777]) -Greedy action tensor([-0.4947, 0.8886, 0.0439, 0.0011]) tensor([0.1199, 0.4780, 0.2054, 0.1968]) -Greedy action tensor([-1.1856, -0.6182, 0.4860, 0.3655]) tensor([0.0781, 0.1378, 0.4157, 0.3685]) -Greedy action tensor([-1.8436, -0.1589, 0.5951, -0.1165]) tensor([0.0426, 0.2297, 0.4881, 0.2396]) -Greedy action tensor([-1.7828, -0.3976, 0.5674, -0.1199]) tensor([0.0482, 0.1925, 0.5052, 0.2541]) -Greedy action tensor([-1.9184, -0.4186, 0.6529, -0.1636]) tensor([0.0411, 0.1840, 0.5374, 0.2375]) -Greedy action tensor([-1.7622, -0.4669, 0.5779, -0.0960]) tensor([0.0492, 0.1797, 0.5108, 0.2603]) -Greedy action tensor([-1.9259, -0.4463, 0.6591, -0.1726]) tensor([0.0409, 0.1798, 0.5429, 0.2364]) -Greedy action tensor([-1.8808, -0.4438, 0.6378, -0.1467]) tensor([0.0429, 0.1807, 0.5331, 0.2433]) -Greedy action tensor([-1.9077, -0.4533, 0.6543, -0.1626]) tensor([0.0417, 0.1786, 0.5407, 0.2389]) -Greedy action tensor([-1.4126, -0.5317, 0.4043, -0.4942]) tensor([0.0828, 0.1999, 0.5097, 0.2075]) -Greedy action tensor([-1.8842, -0.2766, 0.6144, -0.1627]) tensor([0.0421, 0.2102, 0.5122, 0.2355]) -Greedy action tensor([-1.6128, -0.2587, 0.6498, 0.1161]) tensor([0.0497, 0.1925, 0.4776, 0.2801]) -Greedy action tensor([-1.7806, -0.4220, 0.6164, -0.0219]) tensor([0.0461, 0.1794, 0.5068, 0.2677]) -Greedy action tensor([-1.7103, -0.5202, 0.5411, -0.0218]) tensor([0.0521, 0.1712, 0.4949, 0.2818]) -Greedy action tensor([-1.6385, -0.1692, 0.5186, 0.0031]) tensor([0.0522, 0.2269, 0.4513, 0.2696]) -Greedy action tensor([-1.8802, -0.3808, 0.6400, -0.1448]) tensor([0.0424, 0.1899, 0.5272, 0.2405]) -Greedy action tensor([-1.7845, -0.1161, 0.5499, -0.1217]) tensor([0.0457, 0.2422, 0.4714, 0.2408]) -Greedy action tensor([-1.9283, -0.4508, 0.6627, -0.1727]) tensor([0.0408, 0.1788, 0.5443, 0.2361]) -Greedy action tensor([-1.3597, -0.3492, 0.4803, 0.0306]) tensor([0.0711, 0.1954, 0.4478, 0.2857]) -Greedy action tensor([-0.6364, -0.4125, 0.1757, 0.1899]) tensor([0.1473, 0.1843, 0.3318, 0.3366]) -Greedy action tensor([-1.9315, -0.4655, 0.6636, -0.1735]) tensor([0.0408, 0.1766, 0.5462, 0.2365]) -Greedy action tensor([-0.5015, 0.3338, 0.1099, 0.1284]) tensor([0.1423, 0.3282, 0.2623, 0.2672]) -Greedy action tensor([-1.7590, -0.4983, 0.5734, -0.1153]) tensor([0.0500, 0.1763, 0.5150, 0.2587]) -Greedy action tensor([-1.7049, -0.4682, 0.7669, 0.2282]) tensor([0.0431, 0.1485, 0.5105, 0.2979]) -Greedy action tensor([-1.9081, -0.4559, 0.6482, -0.1743]) tensor([0.0420, 0.1793, 0.5410, 0.2377]) -Greedy action tensor([-1.9147, -0.3683, 0.6449, -0.1642]) tensor([0.0410, 0.1925, 0.5303, 0.2361]) -Greedy action tensor([-1.8474, -0.4300, 0.6144, -0.1336]) tensor([0.0446, 0.1842, 0.5234, 0.2477]) -Greedy action tensor([-1.7736, -0.4548, 0.5965, -0.0908]) tensor([0.0480, 0.1796, 0.5139, 0.2585]) -Greedy action tensor([-0.6140, 0.8507, 0.0378, 0.0518]) tensor([0.1088, 0.4707, 0.2088, 0.2117]) -Greedy action tensor([-1.7625, -0.2840, 0.5577, -0.1196]) tensor([0.0482, 0.2115, 0.4909, 0.2494]) -Greedy action tensor([-1.7470, -0.4439, 0.6001, -0.0185]) tensor([0.0482, 0.1772, 0.5034, 0.2712]) -Greedy action tensor([-1.8729, -0.4333, 0.6313, -0.1468]) tensor([0.0433, 0.1829, 0.5302, 0.2435]) -Greedy action tensor([-1.9195, -0.4626, 0.6607, -0.1631]) tensor([0.0412, 0.1768, 0.5436, 0.2385]) -Greedy action tensor([-1.8211, -0.4831, 0.6780, -0.0060]) tensor([0.0432, 0.1648, 0.5263, 0.2656]) -Greedy action tensor([-1.7843, -0.5171, 0.6249, -0.1294]) tensor([0.0478, 0.1698, 0.5321, 0.2503]) -Greedy action tensor([-1.7648, -0.5264, 0.5707, -0.0930]) tensor([0.0497, 0.1716, 0.5140, 0.2647]) -Greedy action tensor([-1.8838, -0.4823, 0.5781, -0.1812]) tensor([0.0449, 0.1823, 0.5264, 0.2464]) -Greedy action tensor([-1.9284, -0.4237, 0.6596, -0.1714]) tensor([0.0406, 0.1830, 0.5407, 0.2356]) -Greedy action tensor([-1.9353, -0.4536, 0.6630, -0.1717]) tensor([0.0405, 0.1783, 0.5447, 0.2364]) -Greedy action tensor([-1.8565, -0.4549, 0.6258, -0.1357]) tensor([0.0442, 0.1796, 0.5292, 0.2471]) -Greedy action tensor([-1.7045, -0.1381, 0.4793, -0.0324]) tensor([0.0500, 0.2396, 0.4442, 0.2663]) -Greedy action tensor([-0.9490, 0.2598, 0.0431, 0.0279]) tensor([0.1031, 0.3452, 0.2780, 0.2738]) -Greedy action tensor([-1.9452, -0.4497, 0.6669, -0.1809]) tensor([0.0401, 0.1790, 0.5467, 0.2342]) -Greedy action tensor([-1.6566, -0.3689, 0.5292, -0.1024]) tensor([0.0548, 0.1986, 0.4875, 0.2592]) -Greedy action tensor([-1.2790, -0.5183, 0.4318, -0.1306]) tensor([0.0846, 0.1809, 0.4679, 0.2666]) -Greedy action tensor([-1.9367, -0.4379, 0.6621, -0.1754]) tensor([0.0404, 0.1809, 0.5435, 0.2352]) -Greedy action tensor([-1.8981, -0.4216, 0.6431, -0.1574]) tensor([0.0421, 0.1841, 0.5340, 0.2398]) -Greedy action tensor([-1.6004, -0.4999, 0.4980, -0.0431]) tensor([0.0592, 0.1778, 0.4823, 0.2807]) -Greedy action tensor([-0.6754, 0.4233, 0.0480, -0.1351]) tensor([0.1286, 0.3857, 0.2650, 0.2207]) -Greedy action tensor([-1.7273, -0.4041, 0.5638, -0.0643]) tensor([0.0502, 0.1886, 0.4964, 0.2649]) -Greedy action tensor([-1.9064, -0.4331, 0.6464, -0.1631]) tensor([0.0418, 0.1824, 0.5368, 0.2389]) -Greedy action tensor([-1.5312, -0.2788, 0.4155, 0.0189]) tensor([0.0617, 0.2158, 0.4320, 0.2906]) -Greedy action tensor([-1.9325, -0.3998, 0.6527, -0.1782]) tensor([0.0405, 0.1877, 0.5376, 0.2342]) -Greedy action tensor([ 1.0770, -0.5234, -0.0126, -0.6178]) tensor([0.5808, 0.1172, 0.1954, 0.1067]) -Greedy action tensor([ 0.5230, 0.0987, -0.0969, -0.0283]) tensor([0.3612, 0.2363, 0.1943, 0.2081]) -Greedy action tensor([ 0.7257, -0.3497, -0.1185, 0.1214]) tensor([0.4315, 0.1472, 0.1855, 0.2358]) -Greedy action tensor([ 0.7106, -0.6204, -0.0082, -0.3288]) tensor([0.4750, 0.1255, 0.2315, 0.1680]) -Greedy action tensor([ 0.3260, 0.0998, -0.0301, 0.0184]) tensor([0.3093, 0.2467, 0.2166, 0.2274]) -Greedy action tensor([ 0.6575, -0.4006, -0.2539, -0.3965]) tensor([0.4767, 0.1655, 0.1916, 0.1662]) -Greedy action tensor([ 0.8244, -0.6753, 0.0882, -0.6782]) tensor([0.5196, 0.1160, 0.2488, 0.1156]) -Greedy action tensor([ 0.8031, -0.4635, 0.0066, -0.1140]) tensor([0.4690, 0.1321, 0.2115, 0.1874]) -Greedy action tensor([ 1.3478, -1.6212, -0.0940, -0.7103]) tensor([0.7064, 0.0363, 0.1671, 0.0902]) -Greedy action tensor([ 0.7059, -0.6807, 0.3258, -0.4476]) tensor([0.4446, 0.1111, 0.3040, 0.1403]) -Greedy action tensor([ 1.0040, -0.3701, 0.0217, -0.1088]) tensor([0.5112, 0.1294, 0.1914, 0.1680]) -Greedy action tensor([ 0.9801, -0.9088, 0.0643, -0.6655]) tensor([0.5733, 0.0867, 0.2294, 0.1106]) -Greedy action tensor([ 1.0504, -0.3758, -0.1261, -0.4626]) tensor([0.5654, 0.1358, 0.1743, 0.1245]) -Greedy action tensor([ 0.3960, -0.1755, -0.1836, -0.4497]) tensor([0.3915, 0.2211, 0.2193, 0.1681]) -Greedy action tensor([ 1.2350, -0.7949, -0.1997, -0.4936]) tensor([0.6464, 0.0849, 0.1540, 0.1148]) -Greedy action tensor([ 0.9127, -0.4904, -0.0466, -0.2535]) tensor([0.5153, 0.1267, 0.1974, 0.1605]) -Greedy action tensor([ 0.6172, 0.1357, 0.0293, -0.3577]) tensor([0.3921, 0.2422, 0.2178, 0.1479]) -Greedy action tensor([ 1.0113, -0.8586, 0.1334, -0.5273]) tensor([0.5604, 0.0864, 0.2329, 0.1203]) -Greedy action tensor([ 1.3242, -0.9072, -0.1088, -0.9236]) tensor([0.6889, 0.0740, 0.1644, 0.0728]) -Greedy action tensor([ 0.9435, -0.6079, 0.0872, -0.2064]) tensor([0.5119, 0.1085, 0.2174, 0.1621]) -Greedy action tensor([ 0.7669, -0.4236, 0.0110, -0.3221]) tensor([0.4739, 0.1441, 0.2225, 0.1595]) -Greedy action tensor([ 0.7590, -0.6343, 0.0404, -0.4580]) tensor([0.4922, 0.1222, 0.2399, 0.1457]) -Greedy action tensor([ 0.9018, -0.7122, 0.0697, -0.4105]) tensor([0.5254, 0.1046, 0.2286, 0.1414]) -Greedy action tensor([ 0.6908, -0.3724, -0.1507, -0.2566]) tensor([0.4621, 0.1596, 0.1992, 0.1792]) -Greedy action tensor([ 0.7098, -0.4244, 0.0623, -0.1928]) tensor([0.4443, 0.1429, 0.2325, 0.1802]) -Greedy action tensor([ 0.4560, -0.0962, -0.0725, -0.2853]) tensor([0.3785, 0.2179, 0.2231, 0.1804]) -Greedy action tensor([ 0.4242, -0.1391, 0.0342, -0.6737]) tensor([0.3876, 0.2207, 0.2624, 0.1293]) -Greedy action tensor([ 1.0584, -0.2658, -0.0390, -0.4066]) tensor([0.5462, 0.1453, 0.1823, 0.1262]) -Greedy action tensor([ 0.7411, -0.2905, -0.2305, -0.1686]) tensor([0.4678, 0.1668, 0.1771, 0.1884]) -Greedy action tensor([ 0.9173, -0.3831, -0.0112, -0.4480]) tensor([0.5200, 0.1417, 0.2055, 0.1328]) -Greedy action tensor([ 0.4445, -0.1682, -0.0214, -0.1204]) tensor([0.3653, 0.1979, 0.2292, 0.2076]) -Greedy action tensor([ 0.7935, -0.4582, -0.0609, -0.3922]) tensor([0.4958, 0.1418, 0.2110, 0.1515]) -Greedy action tensor([ 1.0145, -0.7811, 0.0957, -0.4144]) tensor([0.5541, 0.0920, 0.2211, 0.1328]) -Greedy action tensor([ 0.7757, -0.5745, 0.0468, -0.3162]) tensor([0.4814, 0.1248, 0.2323, 0.1616]) -Greedy action tensor([ 1.4255, -0.8921, -0.1554, -0.8522]) tensor([0.7108, 0.0700, 0.1463, 0.0729]) -Greedy action tensor([ 0.4126, -0.3166, -0.1001, -0.0555]) tensor([0.3694, 0.1781, 0.2212, 0.2313]) -Greedy action tensor([ 0.8009, -0.3778, -0.0038, -0.2390]) tensor([0.4743, 0.1459, 0.2121, 0.1677]) -Greedy action tensor([ 0.6381, -0.4482, -0.0977, -0.3745]) tensor([0.4587, 0.1548, 0.2198, 0.1666]) -Greedy action tensor([ 0.9118, -0.1884, 0.0239, -0.3619]) tensor([0.4940, 0.1644, 0.2033, 0.1382]) -Greedy action tensor([ 0.8804, -0.3751, -0.1143, -0.5318]) tensor([0.5268, 0.1501, 0.1948, 0.1283]) -Greedy action tensor([ 0.6324, -0.2233, -0.0979, -0.2083]) tensor([0.4277, 0.1818, 0.2060, 0.1845]) -Greedy action tensor([ 0.2325, -0.0305, -0.1510, -0.0537]) tensor([0.3124, 0.2401, 0.2129, 0.2346]) -Greedy action tensor([ 0.5633, 0.1671, -0.1392, 0.1423]) tensor([0.3540, 0.2382, 0.1754, 0.2324]) -Greedy action tensor([ 0.7414, -0.3399, -0.0391, -0.0159]) tensor([0.4413, 0.1496, 0.2022, 0.2069]) -Greedy action tensor([ 0.9785, -0.4932, -0.2352, -0.4920]) tensor([0.5693, 0.1307, 0.1691, 0.1308]) -Greedy action tensor([ 0.3431, -0.4071, 0.0021, -0.2403]) tensor([0.3648, 0.1723, 0.2594, 0.2035]) -Greedy action tensor([ 0.8120, -0.6173, -0.0951, -0.5760]) tensor([0.5283, 0.1265, 0.2133, 0.1319]) -Greedy action tensor([ 0.8379, -0.4336, -0.0642, -0.3159]) tensor([0.4996, 0.1401, 0.2027, 0.1576]) -Greedy action tensor([ 1.1719, -0.7357, -0.0869, -0.4856]) tensor([0.6161, 0.0915, 0.1750, 0.1174]) -Greedy action tensor([ 0.8893, -0.6124, -0.0109, -0.3469]) tensor([0.5209, 0.1160, 0.2117, 0.1513]) -Greedy action tensor([ 0.9094, -0.5511, -0.0398, -0.4117]) tensor([0.5302, 0.1231, 0.2052, 0.1415]) -Greedy action tensor([ 0.6481, -0.2895, -0.0215, -0.1331]) tensor([0.4235, 0.1658, 0.2168, 0.1939]) -Greedy action tensor([ 0.8914, -0.5522, -0.0333, -0.3843]) tensor([0.5230, 0.1235, 0.2075, 0.1460]) -Greedy action tensor([ 0.6878, -0.4617, 0.0408, -0.2018]) tensor([0.4442, 0.1407, 0.2326, 0.1825]) -Greedy action tensor([ 0.8332, -0.5460, -0.1500, -0.3014]) tensor([0.5135, 0.1293, 0.1921, 0.1651]) -Greedy action tensor([ 0.7832, 0.0750, -0.0461, -0.1842]) tensor([0.4331, 0.2133, 0.1890, 0.1646]) -Greedy action tensor([ 0.8785, -0.4460, -0.1470, -0.4338]) tensor([0.5281, 0.1404, 0.1894, 0.1421]) -Greedy action tensor([ 0.6507, -0.4876, -0.1863, -0.1004]) tensor([0.4494, 0.1440, 0.1946, 0.2121]) -Greedy action tensor([ 0.2975, 0.2478, -0.0445, -0.3322]) tensor([0.3130, 0.2979, 0.2223, 0.1668]) -Greedy action tensor([ 1.0974, -0.5486, -0.0370, -0.7472]) tensor([0.5979, 0.1153, 0.1923, 0.0945]) -Greedy action tensor([ 0.8975, -0.7981, 0.1833, -0.4377]) tensor([0.5165, 0.0948, 0.2529, 0.1359]) -Greedy action tensor([ 0.7634, -0.1215, -0.0870, -0.0391]) tensor([0.4370, 0.1804, 0.1867, 0.1959]) -Greedy action tensor([ 1.1397, -1.0290, 0.2036, -0.6302]) tensor([0.5964, 0.0682, 0.2339, 0.1016]) -Greedy action tensor([ 1.0026, -0.3945, 0.1230, -0.3469]) tensor([0.5204, 0.1287, 0.2159, 0.1350]) -Greedy action tensor([ 0.5059, -0.2486, -0.0481, -0.0101]) tensor([0.3785, 0.1780, 0.2175, 0.2259]) -Greedy action tensor([ 0.9535, -0.5405, 0.0182, -0.3229]) tensor([0.5274, 0.1184, 0.2070, 0.1472]) -Greedy action tensor([ 0.6823, -0.2626, -0.0325, -0.2866]) tensor([0.4430, 0.1722, 0.2167, 0.1681]) -Greedy action tensor([ 0.5069, -0.0198, -0.1385, -0.3087]) tensor([0.3910, 0.2309, 0.2051, 0.1730]) -Greedy action tensor([ 0.6467, -0.7719, -0.1745, -0.1623]) tensor([0.4701, 0.1138, 0.2068, 0.2093]) -Greedy action tensor([ 1.0121, -0.6727, 0.1080, -0.6178]) tensor([0.5598, 0.1038, 0.2267, 0.1097]) -Greedy action tensor([ 1.1001, -0.9100, 0.1298, -0.7064]) tensor([0.5962, 0.0799, 0.2260, 0.0979]) -Greedy action tensor([ 0.9085, -0.1323, 0.0687, 0.0382]) tensor([0.4538, 0.1603, 0.1959, 0.1901]) -Greedy action tensor([ 0.5698, 0.1735, -0.1361, 0.1067]) tensor([0.3577, 0.2406, 0.1766, 0.2251]) -Greedy action tensor([ 0.4731, 0.0152, 0.0222, -0.0662]) tensor([0.3505, 0.2218, 0.2233, 0.2044]) -Greedy action tensor([ 0.5557, 0.0955, -0.0549, 0.0604]) tensor([0.3593, 0.2267, 0.1951, 0.2189]) -Greedy action tensor([ 0.3976, 0.1757, -0.2720, 0.1060]) tensor([0.3268, 0.2618, 0.1673, 0.2441]) -Greedy action tensor([ 0.7082, -0.1409, -0.1127, -0.5838]) tensor([0.4667, 0.1997, 0.2054, 0.1282]) -Greedy action tensor([ 1.2623, -0.5794, -0.0865, -0.6002]) tensor([0.6356, 0.1008, 0.1650, 0.0987]) -Greedy action tensor([ 1.3793, -0.8658, 0.0095, -0.6258]) tensor([0.6690, 0.0709, 0.1700, 0.0901]) -Greedy action tensor([ 0.6241, 0.1566, -0.0690, 0.0837]) tensor([0.3691, 0.2313, 0.1846, 0.2150]) -Greedy action tensor([ 0.8277, -0.5065, 0.0382, -0.4501]) tensor([0.5010, 0.1319, 0.2275, 0.1396]) -Greedy action tensor([-0.0459, -1.5496, -0.4719, -0.5085]) tensor([0.3992, 0.0887, 0.2607, 0.2513]) -Greedy action tensor([-0.4863, -0.9121, -1.4018, -0.2124]) tensor([0.2969, 0.1939, 0.1188, 0.3904]) -Greedy action tensor([ 0.6568, -0.7404, -0.2384, 0.4805]) tensor([0.4009, 0.0991, 0.1638, 0.3361]) -Greedy action tensor([ 0.5257, -0.7205, 0.9516, -0.0696]) tensor([0.2967, 0.0853, 0.4543, 0.1636]) -Greedy action tensor([-0.2048, -0.7784, 0.0615, -0.5039]) tensor([0.2770, 0.1561, 0.3615, 0.2054]) -Greedy action tensor([-0.9007, -0.8748, 0.7346, -0.3142]) tensor([0.1117, 0.1146, 0.5730, 0.2007]) -Greedy action tensor([-0.9625, -1.9838, -0.3391, 0.3128]) tensor([0.1469, 0.0529, 0.2741, 0.5260]) -Greedy action tensor([0.5371, 0.0090, 0.7129, 0.1878]) tensor([0.2868, 0.1691, 0.3419, 0.2022]) -Greedy action tensor([-0.8516, -0.5473, -0.0033, -1.0223]) tensor([0.1807, 0.2449, 0.4220, 0.1523]) -Greedy action tensor([-0.6019, -0.6537, 0.5679, -0.3745]) tensor([0.1556, 0.1478, 0.5013, 0.1954]) -Greedy action tensor([ 1.2559, -0.4986, -0.7713, 0.0154]) tensor([0.6274, 0.1085, 0.0826, 0.1815]) -Greedy action tensor([ 1.3746, 0.1228, -0.0396, 0.2296]) tensor([0.5413, 0.1548, 0.1316, 0.1723]) -Greedy action tensor([ 0.2774, 0.4406, 0.2262, -0.5279]) tensor([0.2798, 0.3294, 0.2658, 0.1250]) -Greedy action tensor([ 0.5440, -0.7706, -0.7950, -0.1094]) tensor([0.4876, 0.1310, 0.1278, 0.2537]) -Greedy action tensor([ 0.0388, -1.1840, -0.4131, 0.4214]) tensor([0.2944, 0.0867, 0.1874, 0.4316]) -Greedy action tensor([ 0.1840, -0.8717, 0.1416, -0.6576]) tensor([0.3653, 0.1271, 0.3501, 0.1575]) -Greedy action tensor([-0.3566, -0.2517, 0.7687, -1.0946]) tensor([0.1764, 0.1959, 0.5434, 0.0843]) -Greedy action tensor([ 0.5736, -1.1367, 0.4570, 0.0398]) tensor([0.3764, 0.0680, 0.3349, 0.2207]) -Greedy action tensor([ 0.2229, -1.4875, -0.0535, -0.9928]) tensor([0.4473, 0.0809, 0.3393, 0.1326]) -Greedy action tensor([-0.7536, -0.2430, 0.0101, -0.3429]) tensor([0.1582, 0.2636, 0.3396, 0.2386]) -Greedy action tensor([-0.3138, -0.8115, 0.6132, -0.4610]) tensor([0.2001, 0.1216, 0.5056, 0.1727]) -Greedy action tensor([-1.5950, -0.7551, -0.4493, -0.5896]) tensor([0.1088, 0.2519, 0.3421, 0.2973]) -Greedy action tensor([-0.2519, -0.0592, 0.3505, -0.6885]) tensor([0.2134, 0.2588, 0.3898, 0.1379]) -Greedy action tensor([ 0.1956, -0.3566, -0.8201, -0.6478]) tensor([0.4223, 0.2431, 0.1529, 0.1817]) -Greedy action tensor([-0.2328, 0.3528, 0.8426, -0.1162]) tensor([0.1460, 0.2622, 0.4278, 0.1640]) -Greedy action tensor([ 0.3595, 0.2467, 0.7474, -0.2732]) tensor([0.2565, 0.2292, 0.3781, 0.1363]) -Greedy action tensor([-1.0466, -0.1608, 0.4837, -0.7831]) tensor([0.1070, 0.2595, 0.4943, 0.1393]) -Greedy action tensor([ 0.8582, -1.4918, 0.4406, 0.6446]) tensor([0.3904, 0.0372, 0.2571, 0.3153]) -Greedy action tensor([ 0.1145, -0.4023, 0.6199, -0.4428]) tensor([0.2613, 0.1559, 0.4332, 0.1497]) -Greedy action tensor([ 0.5212, -0.6493, -0.1491, -0.1200]) tensor([0.4258, 0.1321, 0.2178, 0.2243]) -Greedy action tensor([ 0.4126, 0.0142, -0.0496, -0.2835]) tensor([0.3572, 0.2398, 0.2250, 0.1781]) -Greedy action tensor([ 0.4990, -0.7578, -0.6213, 0.0848]) tensor([0.4402, 0.1253, 0.1436, 0.2909]) -Greedy action tensor([ 1.4123, -0.8324, 0.1944, 0.8139]) tensor([0.5124, 0.0543, 0.1516, 0.2817]) -Greedy action tensor([ 1.1014, -0.5867, 0.8567, 0.3067]) tensor([0.4133, 0.0764, 0.3236, 0.1867]) -Greedy action tensor([ 0.7836, -0.0918, -0.0372, 0.1320]) tensor([0.4205, 0.1752, 0.1851, 0.2192]) -Greedy action tensor([ 1.0438, -1.0903, 0.4592, 0.0026]) tensor([0.4929, 0.0583, 0.2747, 0.1740]) -Greedy action tensor([ 0.4944, -0.2088, -0.0566, 0.3513]) tensor([0.3404, 0.1685, 0.1962, 0.2950]) -Greedy action tensor([-0.0113, -0.2689, -0.1740, -0.4130]) tensor([0.3038, 0.2348, 0.2581, 0.2033]) -Greedy action tensor([ 0.2613, 0.2734, 0.5324, -0.8598]) tensor([0.2740, 0.2773, 0.3594, 0.0893]) -Greedy action tensor([-1.5932, -0.7811, 0.5359, -1.6823]) tensor([0.0795, 0.1791, 0.6686, 0.0727]) -Greedy action tensor([-1.6364, -0.3336, 0.3032, -1.6220]) tensor([0.0790, 0.2909, 0.5499, 0.0802]) -Greedy action tensor([-1.1456, -0.9335, 0.2937, -0.5027]) tensor([0.1197, 0.1480, 0.5047, 0.2276]) -Greedy action tensor([-0.5382, 0.6977, 0.6451, -0.1982]) tensor([0.1097, 0.3777, 0.3584, 0.1542]) -Greedy action tensor([ 0.8972, -0.0677, 0.8167, 0.0675]) tensor([0.3650, 0.1391, 0.3368, 0.1592]) -Greedy action tensor([ 1.2666, -0.3903, -0.1753, -0.0141]) tensor([0.5865, 0.1119, 0.1387, 0.1630]) -Greedy action tensor([ 0.1409, -0.4950, 0.0570, -0.8036]) tensor([0.3524, 0.1866, 0.3240, 0.1370]) -Greedy action tensor([-0.1294, -0.3262, -0.6293, -0.5620]) tensor([0.3250, 0.2670, 0.1971, 0.2109]) -Greedy action tensor([-2.3114, -1.2192, -0.5196, 1.2411]) tensor([0.0223, 0.0664, 0.1337, 0.7776]) -Greedy action tensor([-0.5399, -0.1445, -0.0294, -0.6596]) tensor([0.1985, 0.2947, 0.3307, 0.1761]) -Greedy action tensor([ 1.5599, -0.7199, 0.1683, -0.2522]) tensor([0.6604, 0.0676, 0.1642, 0.1078]) -Greedy action tensor([ 0.3412, -0.2251, 0.2685, -0.3393]) tensor([0.3329, 0.1890, 0.3096, 0.1686]) -Greedy action tensor([0.9060, 0.3147, 0.3368, 0.5389]) tensor([0.3556, 0.1968, 0.2013, 0.2463]) -Greedy action tensor([-0.4295, 0.0200, 0.0111, -0.5048]) tensor([0.1981, 0.3105, 0.3077, 0.1837]) -Greedy action tensor([ 0.6018, -1.3224, 0.8089, -0.2746]) tensor([0.3581, 0.0523, 0.4405, 0.1491]) -Greedy action tensor([-0.8833, -0.2837, 1.0363, -0.9613]) tensor([0.0947, 0.1724, 0.6454, 0.0876]) -Greedy action tensor([-0.4405, 0.6266, -0.3559, -0.9592]) tensor([0.1789, 0.5200, 0.1947, 0.1065]) -Greedy action tensor([ 0.0070, -0.9374, 0.0405, -0.5236]) tensor([0.3321, 0.1292, 0.3434, 0.1954]) -Greedy action tensor([-0.3665, 0.4016, 0.0244, -0.2893]) tensor([0.1750, 0.3772, 0.2587, 0.1891]) -Greedy action tensor([ 0.0406, -0.9773, -0.3981, -0.3717]) tensor([0.3748, 0.1354, 0.2417, 0.2481]) -Greedy action tensor([-0.0870, -0.3383, 0.3543, 0.0213]) tensor([0.2249, 0.1749, 0.3496, 0.2506]) -Greedy action tensor([ 0.1774, -0.5582, 0.3162, -0.0666]) tensor([0.2931, 0.1405, 0.3368, 0.2296]) -Greedy action tensor([ 0.1446, -2.2532, -0.4542, -0.4749]) tensor([0.4590, 0.0417, 0.2522, 0.2470]) -Greedy action tensor([-0.6297, -1.4475, 0.9204, 0.1559]) tensor([0.1198, 0.0529, 0.5645, 0.2628]) -Greedy action tensor([-0.1337, -0.5106, -0.1683, -1.1214]) tensor([0.3306, 0.2268, 0.3194, 0.1231]) -Greedy action tensor([ 1.0992, -0.8746, -0.4209, 0.7543]) tensor([0.4841, 0.0672, 0.1059, 0.3428]) -Greedy action tensor([-1.0656, -0.0211, 1.0589, -0.4066]) tensor([0.0707, 0.2009, 0.5917, 0.1367]) -Greedy action tensor([ 0.7279, -0.4242, 0.3044, -0.2980]) tensor([0.4293, 0.1357, 0.2811, 0.1539]) -Greedy action tensor([ 0.4248, -0.5580, -0.6397, -0.2689]) tensor([0.4507, 0.1687, 0.1554, 0.2252]) -Greedy action tensor([-0.0676, -0.6012, 1.2055, -0.4542]) tensor([0.1713, 0.1005, 0.6119, 0.1164]) -Greedy action tensor([ 1.3773, -0.3816, -0.2443, -0.3614]) tensor([0.6470, 0.1114, 0.1278, 0.1137]) -Greedy action tensor([ 0.4582, 0.0430, -0.3490, 0.4396]) tensor([0.3238, 0.2138, 0.1445, 0.3179]) -Greedy action tensor([ 0.9430, 0.5498, -0.2915, -0.5067]) tensor([0.4544, 0.3067, 0.1322, 0.1066]) -Greedy action tensor([ 0.1318, -1.2121, 0.7739, -0.7254]) tensor([0.2789, 0.0727, 0.5300, 0.1184]) -Greedy action tensor([-0.0160, 0.5185, 0.1401, 0.1118]) tensor([0.1995, 0.3405, 0.2332, 0.2267]) -Greedy action tensor([-0.5187, -0.4906, -0.0534, 0.4120]) tensor([0.1624, 0.1670, 0.2586, 0.4119]) -Greedy action tensor([-0.0431, -0.8204, -0.6060, -0.3198]) tensor([0.3588, 0.1649, 0.2043, 0.2720]) -Greedy action tensor([-0.1117, -0.3327, 0.3871, -0.0273]) tensor([0.2204, 0.1767, 0.3630, 0.2398]) -Greedy action tensor([ 0.6361, -0.0028, 0.7205, 0.9579]) tensor([0.2503, 0.1321, 0.2723, 0.3453]) -Greedy action tensor([-0.9424, -0.8933, 0.1546, -0.2386]) tensor([0.1415, 0.1486, 0.4238, 0.2860]) -Greedy action tensor([ 1.0404, -0.7662, 0.6036, 0.4172]) tensor([0.4262, 0.0700, 0.2753, 0.2285]) -Greedy action tensor([ 0.1987, -1.2756, -0.8919, 0.4325]) tensor([0.3536, 0.0809, 0.1188, 0.4467]) -Greedy action tensor([-0.2874, -0.7202, 0.2544, -0.7032]) tensor([0.2483, 0.1611, 0.4268, 0.1638]) -Greedy action tensor([ 1.7258, -0.8364, -0.3342, 0.5411]) tensor([0.6621, 0.0511, 0.0844, 0.2025]) -Greedy action tensor([ 0.4146, 0.1064, -0.1022, -0.1397]) tensor([0.3442, 0.2529, 0.2053, 0.1977]) -Greedy action tensor([ 1.1856, -0.0929, -0.7058, 0.2042]) tensor([0.5543, 0.1543, 0.0836, 0.2077]) -Greedy action tensor([ 1.5199, -0.4967, -0.4047, 0.6037]) tensor([0.5956, 0.0793, 0.0869, 0.2383]) -Greedy action tensor([ 1.2035, -0.4152, -0.2027, 0.2084]) tensor([0.5516, 0.1093, 0.1352, 0.2039]) -Greedy action tensor([ 1.1570, -0.0711, -0.4600, 0.0678]) tensor([0.5471, 0.1602, 0.1086, 0.1841]) -Greedy action tensor([ 0.8100, -0.2166, -0.7314, 0.1748]) tensor([0.4757, 0.1704, 0.1018, 0.2520]) -Greedy action tensor([ 1.5583, -0.8876, -0.1530, 0.1146]) tensor([0.6652, 0.0576, 0.1202, 0.1570]) -Greedy action tensor([ 1.4605, -0.8478, -0.0847, 0.4959]) tensor([0.5904, 0.0587, 0.1259, 0.2250]) -Greedy action tensor([ 2.2999, -0.9490, -0.7089, 0.1446]) tensor([0.8305, 0.0322, 0.0410, 0.0962]) -Greedy action tensor([ 3.0005, -0.0947, 0.3700, 0.2464]) tensor([0.8468, 0.0383, 0.0610, 0.0539]) -Greedy action tensor([ 1.7385, -0.3859, -1.0178, 0.7882]) tensor([0.6371, 0.0761, 0.0405, 0.2463]) -Greedy action tensor([ 1.4045, -0.5127, -0.0476, 0.2839]) tensor([0.5857, 0.0861, 0.1371, 0.1910]) -Greedy action tensor([ 1.1383, -0.1528, -0.5110, 0.3737]) tensor([0.5174, 0.1423, 0.0994, 0.2409]) -Greedy action tensor([ 1.9765, -0.4237, -0.0827, 0.1489]) tensor([0.7251, 0.0658, 0.0925, 0.1166]) -Greedy action tensor([ 1.4994, -0.2945, -0.9440, 0.3550]) tensor([0.6363, 0.1058, 0.0553, 0.2026]) -Greedy action tensor([ 1.3929, -0.6688, -0.3267, 0.3360]) tensor([0.6046, 0.0769, 0.1083, 0.2101]) -Greedy action tensor([ 1.1447, -0.4680, -0.7131, 0.1236]) tensor([0.5829, 0.1162, 0.0909, 0.2100]) -Greedy action tensor([ 1.3540, 0.2030, -1.1436, 0.0576]) tensor([0.5980, 0.1892, 0.0492, 0.1636]) -Greedy action tensor([ 1.0887, -0.4172, -0.3964, 0.4244]) tensor([0.5094, 0.1130, 0.1154, 0.2622]) -Greedy action tensor([ 1.6198, -0.6770, -0.2799, 0.0943]) tensor([0.6813, 0.0685, 0.1019, 0.1482]) -Greedy action tensor([ 1.4400, -0.2494, -0.8436, 0.2210]) tensor([0.6321, 0.1167, 0.0644, 0.1868]) -Greedy action tensor([ 1.4392, -0.2874, -0.9794, 0.1974]) tensor([0.6427, 0.1143, 0.0572, 0.1857]) -Greedy action tensor([ 0.9596, -0.3292, -0.2168, 0.3307]) tensor([0.4723, 0.1302, 0.1457, 0.2518]) -Greedy action tensor([ 1.3627, -0.1003, -0.3616, 0.6057]) tensor([0.5322, 0.1232, 0.0949, 0.2497]) -Greedy action tensor([ 1.9228, -0.6231, -0.3597, 0.4849]) tensor([0.7053, 0.0553, 0.0720, 0.1675]) -Greedy action tensor([ 1.5522, -0.6243, -0.7847, 0.1637]) tensor([0.6852, 0.0777, 0.0662, 0.1709]) -Greedy action tensor([ 1.3776, -0.1993, -0.5359, 0.2084]) tensor([0.6007, 0.1241, 0.0886, 0.1866]) -Greedy action tensor([ 1.4825, 0.1982, -0.8853, 0.6808]) tensor([0.5497, 0.1522, 0.0515, 0.2466]) -Greedy action tensor([ 1.1996, -0.6115, -0.1577, 0.5496]) tensor([0.5147, 0.0841, 0.1325, 0.2687]) -Greedy action tensor([ 1.5451, -0.0907, -0.6568, -0.0170]) tensor([0.6600, 0.1286, 0.0730, 0.1384]) -Greedy action tensor([ 1.2794, -0.4140, -0.0828, 0.8264]) tensor([0.4818, 0.0886, 0.1234, 0.3063]) -Greedy action tensor([ 1.3085, 0.3630, -0.9240, 0.3094]) tensor([0.5365, 0.2084, 0.0575, 0.1975]) -Greedy action tensor([ 0.6001, -0.3241, 0.0533, 0.1711]) tensor([0.3807, 0.1511, 0.2203, 0.2479]) -Greedy action tensor([ 1.3414, -0.4053, -0.1738, -0.0023]) tensor([0.6042, 0.1053, 0.1328, 0.1576]) -Greedy action tensor([ 1.4198, -0.5028, -0.7526, 0.7044]) tensor([0.5717, 0.0836, 0.0651, 0.2796]) -Greedy action tensor([ 1.2955e+00, -5.0334e-01, 7.4429e-02, -3.8281e-04]) tensor([0.5767, 0.0954, 0.1701, 0.1578]) -Greedy action tensor([ 1.3443, -0.1427, -0.9879, 0.2019]) tensor([0.6089, 0.1376, 0.0591, 0.1943]) -Greedy action tensor([ 1.4932, 0.0162, -0.4949, 0.3543]) tensor([0.5933, 0.1355, 0.0813, 0.1900]) -Greedy action tensor([ 1.3298, -0.4314, -0.3065, 0.0865]) tensor([0.6042, 0.1038, 0.1176, 0.1743]) -Greedy action tensor([ 1.3587, -0.3180, -0.5939, 0.2410]) tensor([0.6039, 0.1129, 0.0857, 0.1975]) -Greedy action tensor([ 1.5947, -0.6615, -0.4029, 0.0584]) tensor([0.6870, 0.0720, 0.0932, 0.1478]) -Greedy action tensor([ 1.1146, -0.7196, -0.2139, 0.2959]) tensor([0.5360, 0.0856, 0.1420, 0.2364]) -Greedy action tensor([ 1.2105, -0.3500, -0.5182, 0.6115]) tensor([0.5163, 0.1084, 0.0916, 0.2836]) -Greedy action tensor([ 1.7277, 0.0522, -0.4918, 0.4343]) tensor([0.6368, 0.1192, 0.0692, 0.1747]) -Greedy action tensor([ 1.4813, -0.4233, -1.0085, 0.7513]) tensor([0.5835, 0.0869, 0.0484, 0.2812]) -Greedy action tensor([ 2.0514, -0.7731, -0.3809, 0.1965]) tensor([0.7671, 0.0455, 0.0674, 0.1200]) -Greedy action tensor([ 1.3315, 0.0799, -0.8580, 0.5508]) tensor([0.5388, 0.1541, 0.0603, 0.2468]) -Greedy action tensor([ 1.4102, -0.4182, -0.1368, 0.4056]) tensor([0.5748, 0.0923, 0.1224, 0.2105]) -Greedy action tensor([ 2.5655, -0.0344, -0.8606, 0.5416]) tensor([0.8072, 0.0600, 0.0262, 0.1067]) -Greedy action tensor([ 1.1893, -0.1680, -0.1253, 0.3068]) tensor([0.5155, 0.1327, 0.1385, 0.2133]) -Greedy action tensor([ 1.3894, -0.4109, -0.4503, 0.3997]) tensor([0.5897, 0.0974, 0.0937, 0.2192]) -Greedy action tensor([ 1.5661, -0.3355, -0.2238, 0.4740]) tensor([0.6054, 0.0904, 0.1011, 0.2031]) -Greedy action tensor([ 1.3879, -0.1627, -0.6468, 0.5385]) tensor([0.5648, 0.1198, 0.0738, 0.2416]) -Greedy action tensor([ 1.3128, -0.7460, -0.3110, 0.8048]) tensor([0.5191, 0.0662, 0.1023, 0.3123]) -Greedy action tensor([ 2.7245, -0.2404, 0.1604, 0.0811]) tensor([0.8336, 0.0430, 0.0642, 0.0593]) -Greedy action tensor([ 1.5572, -0.6302, -0.6029, 0.6002]) tensor([0.6205, 0.0696, 0.0716, 0.2383]) -Greedy action tensor([ 1.5389, -0.1669, -0.7508, 0.3544]) tensor([0.6294, 0.1143, 0.0638, 0.1925]) -Greedy action tensor([ 1.7600, -0.7228, -0.6065, 0.4968]) tensor([0.6849, 0.0572, 0.0643, 0.1937]) -Greedy action tensor([ 1.2967, -0.2103, -0.9229, 0.4460]) tensor([0.5690, 0.1261, 0.0618, 0.2430]) -Greedy action tensor([ 1.6523, -0.8105, -0.3129, 0.8975]) tensor([0.5898, 0.0502, 0.0826, 0.2773]) -Greedy action tensor([ 2.1325, -1.2052, -0.1102, 0.4969]) tensor([0.7482, 0.0266, 0.0794, 0.1458]) -Greedy action tensor([ 1.1391, -0.2809, -0.7618, -0.0517]) tensor([0.5899, 0.1426, 0.0882, 0.1793]) -Greedy action tensor([ 1.5723, -0.0934, -0.9057, 0.1935]) tensor([0.6558, 0.1240, 0.0550, 0.1652]) -Greedy action tensor([ 2.3877, -1.2577, -0.6407, 0.3323]) tensor([0.8316, 0.0217, 0.0402, 0.1065]) -Greedy action tensor([ 1.2383, 0.1896, -1.2610, 0.2471]) tensor([0.5544, 0.1943, 0.0455, 0.2058]) -Greedy action tensor([ 1.2618, -0.3903, -0.6959, 0.4184]) tensor([0.5672, 0.1087, 0.0801, 0.2440]) -Greedy action tensor([ 2.0493, -0.5218, -1.3762, 0.5437]) tensor([0.7514, 0.0574, 0.0244, 0.1667]) -Greedy action tensor([ 1.7809, -0.2093, -0.5256, 0.0831]) tensor([0.7045, 0.0963, 0.0702, 0.1290]) -Greedy action tensor([ 1.7432, 0.0544, -0.8387, 0.6833]) tensor([0.6223, 0.1150, 0.0471, 0.2156]) -Greedy action tensor([ 1.6710, -0.0241, -0.7480, 0.5498]) tensor([0.6256, 0.1148, 0.0557, 0.2039]) -Greedy action tensor([ 1.3491, -0.2863, -0.2551, 0.2113]) tensor([0.5826, 0.1135, 0.1171, 0.1867]) -Greedy action tensor([ 1.5223, 0.1566, -1.1557, -0.1014]) tensor([0.6574, 0.1678, 0.0452, 0.1296]) -Greedy action tensor([ 1.5346, -0.4060, -0.3258, 0.4029]) tensor([0.6166, 0.0886, 0.0960, 0.1989]) -Greedy action tensor([ 1.4165, -0.1838, -0.8008, -0.0116]) tensor([0.6450, 0.1302, 0.0702, 0.1546]) -Greedy action tensor([ 1.3873, 0.0317, -1.0375, -0.0163]) tensor([0.6281, 0.1619, 0.0556, 0.1543]) -Greedy action tensor([ 1.5566, -0.3651, -0.4876, 0.4919]) tensor([0.6170, 0.0903, 0.0799, 0.2128]) -Greedy action tensor([ 1.5346, -0.1265, -0.3824, 0.3317]) tensor([0.6108, 0.1160, 0.0898, 0.1834]) -Greedy action tensor([ 1.4818, 0.2021, -0.6003, -0.1405]) tensor([0.6249, 0.1738, 0.0779, 0.1234]) -Greedy action tensor([ 1.0564, -0.5360, -0.3216, 0.1713]) tensor([0.5353, 0.1089, 0.1349, 0.2209]) -Greedy action tensor([ 1.3690, -0.3226, 0.1705, -0.2004]) tensor([0.5903, 0.1087, 0.1781, 0.1229]) -Greedy action tensor([ 0.9275, -0.3753, -0.0890, -0.1278]) tensor([0.5046, 0.1371, 0.1826, 0.1756]) -Greedy action tensor([ 0.5502, -0.1373, 0.0658, -0.1449]) tensor([0.3820, 0.1921, 0.2353, 0.1906]) -Greedy action tensor([ 0.6149, -0.4074, -0.0415, -0.3985]) tensor([0.4461, 0.1605, 0.2314, 0.1619]) -Greedy action tensor([ 0.7757, -0.5987, -0.0463, -0.2654]) tensor([0.4888, 0.1237, 0.2149, 0.1726]) -Greedy action tensor([ 0.5963, -0.1681, 0.0017, 0.0388]) tensor([0.3861, 0.1798, 0.2130, 0.2211]) -Greedy action tensor([ 1.0788, -0.7760, -0.0480, -0.5492]) tensor([0.5963, 0.0933, 0.1933, 0.1171]) -Greedy action tensor([ 0.3755, 0.0257, -0.1139, -0.0720]) tensor([0.3382, 0.2383, 0.2073, 0.2162]) -Greedy action tensor([ 1.2538, -0.8431, -0.0393, -0.6331]) tensor([0.6457, 0.0793, 0.1772, 0.0978]) -Greedy action tensor([ 0.8053, -0.6404, 0.0588, -0.2374]) tensor([0.4849, 0.1142, 0.2299, 0.1709]) -Greedy action tensor([ 1.0408, -0.7172, 0.0713, -0.4706]) tensor([0.5643, 0.0973, 0.2140, 0.1245]) -Greedy action tensor([ 0.5531, -0.3603, 0.0034, -0.2819]) tensor([0.4146, 0.1663, 0.2393, 0.1799]) -Greedy action tensor([ 0.7240, -0.0289, -0.0265, -0.0345]) tensor([0.4147, 0.1953, 0.1958, 0.1942]) -Greedy action tensor([ 1.2251, -0.2279, -0.3730, -0.5360]) tensor([0.6219, 0.1454, 0.1258, 0.1069]) -Greedy action tensor([ 1.1522, -0.6037, -0.0094, -0.5154]) tensor([0.5972, 0.1032, 0.1869, 0.1127]) -Greedy action tensor([ 0.8592, -0.6593, 0.0024, -0.3381]) tensor([0.5140, 0.1126, 0.2182, 0.1552]) -Greedy action tensor([ 1.0188, -0.4377, 0.0277, -0.3413]) tensor([0.5374, 0.1252, 0.1995, 0.1379]) -Greedy action tensor([ 1.2464, -0.7407, 0.1744, -0.9730]) tensor([0.6297, 0.0863, 0.2156, 0.0684]) -Greedy action tensor([ 1.2077, -0.9129, -0.0339, -0.6946]) tensor([0.6418, 0.0770, 0.1854, 0.0958]) -Greedy action tensor([ 1.2017, -0.6285, -0.1213, -0.5637]) tensor([0.6258, 0.1004, 0.1667, 0.1071]) -Greedy action tensor([ 0.8317, -0.4389, -0.1177, -0.3519]) tensor([0.5066, 0.1422, 0.1961, 0.1551]) -Greedy action tensor([ 0.5557, -0.2852, 0.0326, -0.0835]) tensor([0.3919, 0.1690, 0.2323, 0.2068]) -Greedy action tensor([ 0.5324, -0.1211, 0.0081, -0.0396]) tensor([0.3736, 0.1943, 0.2212, 0.2109]) -Greedy action tensor([ 0.5901, -0.1479, 0.0415, -0.2749]) tensor([0.4037, 0.1930, 0.2333, 0.1700]) -Greedy action tensor([ 0.5009, -0.0532, -0.1273, -0.1451]) tensor([0.3799, 0.2183, 0.2027, 0.1991]) -Greedy action tensor([ 1.0846, -0.5821, -0.1164, -0.6408]) tensor([0.5996, 0.1132, 0.1804, 0.1068]) -Greedy action tensor([ 1.0875, -0.5625, -0.2671, -0.2350]) tensor([0.5826, 0.1119, 0.1503, 0.1552]) -Greedy action tensor([ 0.8418, -0.3634, 0.0371, -0.2465]) tensor([0.4799, 0.1438, 0.2146, 0.1616]) -Greedy action tensor([ 0.6849, -0.1962, 0.0965, -0.2367]) tensor([0.4224, 0.1750, 0.2345, 0.1681]) -Greedy action tensor([ 0.6994, -0.5340, 0.2228, -0.5279]) tensor([0.4534, 0.1321, 0.2815, 0.1329]) -Greedy action tensor([ 0.7150, -0.6600, -0.1614, -0.3638]) tensor([0.4977, 0.1258, 0.2072, 0.1692]) -Greedy action tensor([ 0.7088, -0.6533, -0.0097, -0.2263]) tensor([0.4681, 0.1199, 0.2282, 0.1838]) -Greedy action tensor([ 0.5136, -0.2926, 0.0614, -0.1244]) tensor([0.3830, 0.1710, 0.2437, 0.2023]) -Greedy action tensor([ 0.2055, 0.3498, -0.1519, 0.0812]) tensor([0.2675, 0.3091, 0.1871, 0.2363]) -Greedy action tensor([ 0.8147, -0.5748, -0.0773, -0.6501]) tensor([0.5291, 0.1318, 0.2168, 0.1223]) -Greedy action tensor([ 0.7990, -0.4536, -0.1105, -0.3198]) tensor([0.4962, 0.1418, 0.1998, 0.1621]) -Greedy action tensor([ 0.7052, -0.0043, -0.0274, -0.2809]) tensor([0.4263, 0.2097, 0.2049, 0.1590]) -Greedy action tensor([ 0.5935, -0.8065, -0.1992, -0.2223]) tensor([0.4670, 0.1152, 0.2114, 0.2065]) -Greedy action tensor([ 1.3616, -0.9775, 0.0617, -0.4870]) tensor([0.6551, 0.0632, 0.1786, 0.1032]) -Greedy action tensor([ 0.9023, -0.4881, 0.0915, -0.4931]) tensor([0.5151, 0.1283, 0.2290, 0.1276]) -Greedy action tensor([ 0.7782, -0.8445, -0.0533, -0.2810]) tensor([0.5052, 0.0997, 0.2200, 0.1752]) -Greedy action tensor([ 1.1331, -0.8480, 0.0446, -0.6445]) tensor([0.6084, 0.0839, 0.2049, 0.1028]) -Greedy action tensor([ 0.1243, 0.0518, -0.1139, -0.4566]) tensor([0.3051, 0.2838, 0.2404, 0.1707]) -Greedy action tensor([ 1.2341, -0.9037, -0.0094, -0.5404]) tensor([0.6346, 0.0748, 0.1830, 0.1076]) -Greedy action tensor([ 0.2917, 0.1458, -0.0563, 0.1201]) tensor([0.2930, 0.2532, 0.2069, 0.2468]) -Greedy action tensor([ 0.5810, -0.3678, -0.0517, -0.2228]) tensor([0.4227, 0.1637, 0.2245, 0.1892]) -Greedy action tensor([ 0.1199, 0.3648, -0.1052, -0.1444]) tensor([0.2602, 0.3324, 0.2077, 0.1997]) -Greedy action tensor([ 0.5394, -0.1345, -0.0970, -0.0681]) tensor([0.3871, 0.1973, 0.2048, 0.2108]) -Greedy action tensor([ 0.9938, -0.5761, -0.1262, -0.3827]) tensor([0.5597, 0.1164, 0.1826, 0.1413]) -Greedy action tensor([ 0.8958, -0.7014, -0.0061, -0.6210]) tensor([0.5472, 0.1108, 0.2220, 0.1201]) -Greedy action tensor([ 0.2982, 0.1046, -0.0625, 0.0617]) tensor([0.3021, 0.2489, 0.2106, 0.2384]) -Greedy action tensor([ 1.0640, -0.5882, -0.0519, -0.3943]) tensor([0.5708, 0.1094, 0.1870, 0.1328]) -Greedy action tensor([ 0.9248, -0.6711, 0.0811, -0.4349]) tensor([0.5292, 0.1073, 0.2276, 0.1359]) -Greedy action tensor([ 0.5487, -0.0511, 0.1095, 0.0116]) tensor([0.3600, 0.1976, 0.2320, 0.2104]) -Greedy action tensor([ 1.4653, -0.8197, -0.2255, -0.6876]) tensor([0.7131, 0.0726, 0.1315, 0.0828]) -Greedy action tensor([ 0.8496, -0.6337, -0.0564, -0.4189]) tensor([0.5229, 0.1187, 0.2113, 0.1471]) -Greedy action tensor([ 0.8030, -0.4888, -0.0179, -0.2166]) tensor([0.4818, 0.1324, 0.2120, 0.1738]) -Greedy action tensor([ 0.4827, 0.0345, -0.0794, 0.0856]) tensor([0.3471, 0.2217, 0.1979, 0.2333]) -Greedy action tensor([ 0.5860, -0.3076, -0.1764, -0.3452]) tensor([0.4406, 0.1803, 0.2055, 0.1736]) -Greedy action tensor([ 0.3692, -0.0286, -0.0053, -0.2545]) tensor([0.3454, 0.2320, 0.2375, 0.1851]) -Greedy action tensor([ 0.8965, -0.5074, -0.2246, -0.3672]) tensor([0.5393, 0.1325, 0.1758, 0.1524]) -Greedy action tensor([ 1.1902, -0.7360, -0.0838, -0.5749]) tensor([0.6263, 0.0913, 0.1752, 0.1072]) -Greedy action tensor([ 0.9776, -0.6700, -0.0394, -0.4886]) tensor([0.5602, 0.1078, 0.2026, 0.1293]) -Greedy action tensor([ 0.8507, -0.4031, -0.0816, -0.1197]) tensor([0.4859, 0.1387, 0.1913, 0.1841]) -Greedy action tensor([ 0.7774, -0.6483, -0.0049, -0.4267]) tensor([0.5006, 0.1203, 0.2289, 0.1502]) -Greedy action tensor([ 1.2998, -0.4765, -0.2066, -0.3880]) tensor([0.6346, 0.1074, 0.1407, 0.1173]) -Greedy action tensor([ 0.8243, -0.4555, 0.1371, -0.6593]) tensor([0.4980, 0.1385, 0.2505, 0.1130]) -Greedy action tensor([ 0.5943, -0.2109, 0.0006, -0.1465]) tensor([0.4039, 0.1805, 0.2231, 0.1925]) -Greedy action tensor([ 0.9003, -0.4341, -0.0551, -0.2587]) tensor([0.5097, 0.1342, 0.1961, 0.1600]) -Greedy action tensor([ 0.6827, -0.3641, 0.0228, -0.1835]) tensor([0.4370, 0.1534, 0.2259, 0.1838]) -Greedy action tensor([ 0.9680, -0.5488, -0.0510, -0.4262]) tensor([0.5469, 0.1200, 0.1974, 0.1357]) -Greedy action tensor([ 0.7633, -0.2463, -0.0411, -0.1392]) tensor([0.4510, 0.1643, 0.2018, 0.1829]) -Greedy action tensor([ 0.1626, -0.2890, -0.2216, 0.0131]) tensor([0.3146, 0.2003, 0.2142, 0.2709]) -Greedy action tensor([ 0.5825, 0.0164, -0.1645, 0.0672]) tensor([0.3789, 0.2151, 0.1795, 0.2264]) -Greedy action tensor([ 0.7228, -0.0192, 0.0731, 0.0100]) tensor([0.4018, 0.1913, 0.2098, 0.1970]) -Greedy action tensor([ 0.6042, -0.2723, -0.1552, -0.2345]) tensor([0.4317, 0.1797, 0.2020, 0.1866]) -Greedy action tensor([ 0.7906, -0.2322, -0.0581, -0.2913]) tensor([0.4702, 0.1691, 0.2013, 0.1594]) -Greedy action tensor([ 0.9355, -0.6631, -0.1456, -0.3521]) tensor([0.5502, 0.1113, 0.1867, 0.1518]) -Greedy action tensor([ 0.9974, -0.9926, 0.0693, -0.3038]) tensor([0.5543, 0.0758, 0.2191, 0.1509]) -Greedy action tensor([ 0.9229, -0.5890, -0.0530, -0.3503]) tensor([0.5327, 0.1175, 0.2007, 0.1491]) -Greedy action tensor([ 0.5530, -0.4427, -0.0310, -0.2018]) tensor([0.4172, 0.1541, 0.2326, 0.1961]) -Greedy action tensor([ 0.8562, -0.7285, 0.0247, -0.4526]) tensor([0.5234, 0.1073, 0.2279, 0.1414]) -Greedy action tensor([-1.7046, -0.6675, 0.9352, 0.0151]) tensor([0.0427, 0.1205, 0.5984, 0.2384]) -Greedy action tensor([-1.6345, -0.2299, 0.4844, -0.0699]) tensor([0.0550, 0.2241, 0.4578, 0.2630]) -Greedy action tensor([-1.8302, -0.2158, 0.5721, -0.1187]) tensor([0.0442, 0.2222, 0.4886, 0.2449]) -Greedy action tensor([-1.3194, -0.0732, 0.5129, 0.2283]) tensor([0.0648, 0.2254, 0.4051, 0.3047]) -Greedy action tensor([-1.4546, -0.1221, 0.5726, 0.2610]) tensor([0.0557, 0.2113, 0.4231, 0.3099]) -Greedy action tensor([-1.4234, -0.2211, 0.3888, 0.1050]) tensor([0.0664, 0.2209, 0.4066, 0.3061]) -Greedy action tensor([-1.7360, -0.4364, 0.5552, -0.0793]) tensor([0.0505, 0.1853, 0.4994, 0.2648]) -Greedy action tensor([-1.8960, -0.2151, 0.6059, -0.1435]) tensor([0.0411, 0.2206, 0.5014, 0.2370]) -Greedy action tensor([-1.6145, -0.5723, 0.4887, -0.0468]) tensor([0.0594, 0.1685, 0.4870, 0.2851]) -Greedy action tensor([-1.8200, -0.3692, 0.6410, -0.0823]) tensor([0.0441, 0.1882, 0.5169, 0.2508]) -Greedy action tensor([-1.5861, -0.4897, 0.6336, 0.1856]) tensor([0.0524, 0.1569, 0.4825, 0.3082]) -Greedy action tensor([-0.8159, -0.5676, 0.1600, 0.3208]) tensor([0.1242, 0.1592, 0.3296, 0.3870]) -Greedy action tensor([-1.8794, -0.4424, 0.6293, -0.1461]) tensor([0.0432, 0.1817, 0.5307, 0.2444]) -Greedy action tensor([-1.9156, -0.4435, 0.6530, -0.1630]) tensor([0.0414, 0.1803, 0.5397, 0.2387]) -Greedy action tensor([-1.5554, -0.4932, 0.4205, 0.0335]) tensor([0.0625, 0.1807, 0.4507, 0.3061]) -Greedy action tensor([-1.9131, -0.4265, 0.6525, -0.1615]) tensor([0.0413, 0.1828, 0.5377, 0.2382]) -Greedy action tensor([-1.9170, -0.3802, 0.6390, -0.1735]) tensor([0.0412, 0.1917, 0.5313, 0.2358]) -Greedy action tensor([-1.9366, -0.4350, 0.6619, -0.1760]) tensor([0.0404, 0.1814, 0.5432, 0.2350]) -Greedy action tensor([-1.9452, -0.4487, 0.6673, -0.1813]) tensor([0.0401, 0.1791, 0.5467, 0.2340]) -Greedy action tensor([-1.7718e+00, -1.7813e-04, 5.2527e-01, -5.4729e-02]) tensor([0.0447, 0.2626, 0.4441, 0.2487]) -Greedy action tensor([-1.3469, -0.6198, 0.4881, 0.3083]) tensor([0.0686, 0.1420, 0.4300, 0.3593]) -Greedy action tensor([-1.1742, 0.7503, 0.1585, 0.1604]) tensor([0.0648, 0.4437, 0.2455, 0.2460]) -Greedy action tensor([-0.7721, 0.1989, 0.1253, -0.1411]) tensor([0.1254, 0.3312, 0.3077, 0.2357]) -Greedy action tensor([-1.9219, -0.3366, 0.6402, -0.1786]) tensor([0.0407, 0.1987, 0.5278, 0.2327]) -Greedy action tensor([-1.6055, -0.2474, 0.4277, -0.0650]) tensor([0.0582, 0.2262, 0.4442, 0.2714]) -Greedy action tensor([-1.9429, -0.4480, 0.6669, -0.1796]) tensor([0.0402, 0.1792, 0.5463, 0.2343]) -Greedy action tensor([-1.9124, -0.4125, 0.6599, -0.1585]) tensor([0.0411, 0.1840, 0.5377, 0.2372]) -Greedy action tensor([-1.9081, -0.4128, 0.6593, -0.1408]) tensor([0.0411, 0.1832, 0.5353, 0.2405]) -Greedy action tensor([-1.8547, -0.3253, 0.6226, -0.1296]) tensor([0.0432, 0.1995, 0.5147, 0.2426]) -Greedy action tensor([-1.9103, -0.4101, 0.6508, -0.1529]) tensor([0.0413, 0.1850, 0.5345, 0.2393]) -Greedy action tensor([-1.9128, -0.8155, 0.2523, -0.3304]) tensor([0.0569, 0.1704, 0.4958, 0.2769]) -Greedy action tensor([-1.7774, -0.4504, 0.5817, -0.1242]) tensor([0.0486, 0.1832, 0.5143, 0.2539]) -Greedy action tensor([-1.8284, -0.4739, 0.6287, -0.0972]) tensor([0.0451, 0.1746, 0.5259, 0.2545]) -Greedy action tensor([-1.7289, -0.2913, 0.5337, -0.0367]) tensor([0.0494, 0.2079, 0.4745, 0.2682]) -Greedy action tensor([-1.8962, -0.4551, 0.6452, -0.1587]) tensor([0.0424, 0.1790, 0.5379, 0.2407]) -Greedy action tensor([-1.9176, -0.4360, 0.6686, -0.1602]) tensor([0.0409, 0.1798, 0.5425, 0.2369]) -Greedy action tensor([-1.9218, -0.3396, 0.6390, -0.1613]) tensor([0.0406, 0.1976, 0.5257, 0.2361]) -Greedy action tensor([-0.9370, -0.4897, 0.2509, 0.1699]) tensor([0.1128, 0.1763, 0.3698, 0.3411]) -Greedy action tensor([-1.8993, -0.3521, 0.6353, -0.1550]) tensor([0.0416, 0.1955, 0.5248, 0.2381]) -Greedy action tensor([-1.8937, -0.2371, 0.6356, -0.2614]) tensor([0.0418, 0.2193, 0.5249, 0.2140]) -Greedy action tensor([-1.6903, -0.2138, 0.5084, -0.0872]) tensor([0.0517, 0.2261, 0.4656, 0.2566]) -Greedy action tensor([-1.9048, -0.4533, 0.6452, -0.1631]) tensor([0.0420, 0.1795, 0.5385, 0.2400]) -Greedy action tensor([-1.6552, 0.3643, 0.4138, -0.0703]) tensor([0.0469, 0.3532, 0.3712, 0.2287]) -Greedy action tensor([-1.7774, -0.4790, 0.5673, -0.0970]) tensor([0.0489, 0.1790, 0.5098, 0.2623]) -Greedy action tensor([-1.5979, -0.5632, 0.6133, -0.0669]) tensor([0.0569, 0.1602, 0.5196, 0.2632]) -Greedy action tensor([-1.9280, -0.4384, 0.6611, -0.1726]) tensor([0.0408, 0.1807, 0.5427, 0.2358]) -Greedy action tensor([-1.7885, -0.3527, 0.5893, -0.1635]) tensor([0.0475, 0.1995, 0.5119, 0.2411]) -Greedy action tensor([-1.3061, 0.1974, 0.2476, 0.1171]) tensor([0.0696, 0.3128, 0.3289, 0.2887]) -Greedy action tensor([-1.7985, -0.3457, 0.5743, -0.1149]) tensor([0.0468, 0.1999, 0.5016, 0.2518]) -Greedy action tensor([-1.9019, -0.3696, 0.6420, -0.1380]) tensor([0.0413, 0.1913, 0.5261, 0.2412]) -Greedy action tensor([-1.9110, -0.3856, 0.6473, -0.1609]) tensor([0.0412, 0.1894, 0.5322, 0.2372]) -Greedy action tensor([-1.9218, -0.4229, 0.6623, -0.1666]) tensor([0.0408, 0.1826, 0.5406, 0.2360]) -Greedy action tensor([-1.7924, -0.0634, 0.5725, -0.2528]) tensor([0.0456, 0.2568, 0.4851, 0.2125]) -Greedy action tensor([-1.5429, -0.3392, 0.2995, -0.2876]) tensor([0.0707, 0.2355, 0.4460, 0.2479]) -Greedy action tensor([-1.8377, -0.4735, 0.6064, -0.1171]) tensor([0.0454, 0.1777, 0.5231, 0.2538]) -Greedy action tensor([-1.6576, -0.3432, 0.5224, -0.1279]) tensor([0.0550, 0.2047, 0.4864, 0.2539]) -Greedy action tensor([-1.8611, -0.4240, 0.6206, -0.1343]) tensor([0.0439, 0.1846, 0.5248, 0.2467]) -Greedy action tensor([-1.8239, -0.4817, 0.6071, -0.1350]) tensor([0.0463, 0.1771, 0.5261, 0.2505]) -Greedy action tensor([-1.3895, -0.0031, 0.5550, 0.2510]) tensor([0.0583, 0.2333, 0.4076, 0.3008]) -Greedy action tensor([-1.8887, -0.3744, 0.6450, -0.1306]) tensor([0.0418, 0.1898, 0.5261, 0.2423]) -Greedy action tensor([-0.2514, 0.5981, 0.2983, 0.6725]) tensor([0.1317, 0.3081, 0.2283, 0.3319]) -Greedy action tensor([-1.8763, -0.3911, 0.6289, -0.1475]) tensor([0.0429, 0.1896, 0.5257, 0.2418]) -Greedy action tensor([-1.3052, -0.2377, 0.5495, 0.1811]) tensor([0.0679, 0.1976, 0.4341, 0.3003]) -Greedy action tensor([-1.8355, -0.3438, 0.6525, -0.1001]) tensor([0.0432, 0.1920, 0.5199, 0.2450]) -Greedy action tensor([-1.0660, -0.3402, 0.2887, 0.3435]) tensor([0.0906, 0.1872, 0.3512, 0.3710]) -Greedy action tensor([-1.6018, -0.2570, 0.4554, 0.0027]) tensor([0.0567, 0.2176, 0.4436, 0.2821]) -Greedy action tensor([-0.1804, 0.2704, 0.8435, 1.5824]) tensor([0.0894, 0.1404, 0.2490, 0.5212]) -Greedy action tensor([-1.3901, -0.2904, 0.4462, -0.7366]) tensor([0.0820, 0.2462, 0.5142, 0.1576]) -Greedy action tensor([-1.9112, -0.4633, 0.6482, -0.1659]) tensor([0.0418, 0.1779, 0.5407, 0.2395]) -Greedy action tensor([-1.3506, 0.7434, 0.2286, 0.1218]) tensor([0.0546, 0.4429, 0.2647, 0.2379]) -Greedy action tensor([-1.8854, -0.2993, 0.6102, -0.1541]) tensor([0.0423, 0.2064, 0.5126, 0.2387]) -Greedy action tensor([-1.8673, -0.4189, 0.6246, -0.1452]) tensor([0.0436, 0.1856, 0.5268, 0.2440]) -Greedy action tensor([-1.9450, -0.4495, 0.6678, -0.1809]) tensor([0.0401, 0.1789, 0.5469, 0.2341]) -Greedy action tensor([-1.1005, 0.8752, 0.1493, 0.2388]) tensor([0.0644, 0.4647, 0.2249, 0.2459]) -Greedy action tensor([-1.3661, 0.6634, 0.2304, 0.2270]) tensor([0.0542, 0.4122, 0.2673, 0.2664]) -Greedy action tensor([-1.9388, -0.4484, 0.6691, -0.1757]) tensor([0.0403, 0.1787, 0.5463, 0.2347]) -Greedy action tensor([-1.9450, -0.4498, 0.6666, -0.1807]) tensor([0.0401, 0.1790, 0.5466, 0.2342]) -Greedy action tensor([-0.9424, 0.9469, 0.2204, 0.0250]) tensor([0.0744, 0.4920, 0.2379, 0.1957]) -Greedy action tensor([-1.8154, -0.4294, 0.5879, -0.1078]) tensor([0.0463, 0.1853, 0.5126, 0.2557]) -Greedy action tensor([-1.4321e+00, 3.1083e-02, 3.5114e-01, -3.0696e-05]) tensor([0.0647, 0.2795, 0.3849, 0.2709]) -Greedy action tensor([-1.2046, -0.4654, 0.3284, 0.2660]) tensor([0.0828, 0.1734, 0.3835, 0.3603]) -Greedy action tensor([ 1.4078, -0.8957, -0.6281, 0.7757]) tensor([0.5675, 0.0567, 0.0741, 0.3016]) -Greedy action tensor([ 1.8268, -0.2124, -0.7142, 0.0767]) tensor([0.7232, 0.0941, 0.0570, 0.1257]) -Greedy action tensor([ 1.3346, -0.8414, -0.4245, 0.5326]) tensor([0.5767, 0.0655, 0.0993, 0.2586]) -Greedy action tensor([ 1.9247, -1.0063, -0.1982, 0.5235]) tensor([0.7046, 0.0376, 0.0843, 0.1735]) -Greedy action tensor([ 1.3367, -0.2861, -0.7145, -0.0857]) tensor([0.6381, 0.1259, 0.0821, 0.1539]) -Greedy action tensor([ 1.0750, -0.2060, -0.0866, 0.1753]) tensor([0.5006, 0.1391, 0.1567, 0.2036]) -Greedy action tensor([ 1.6738, -0.6133, -0.3755, 0.0870]) tensor([0.6969, 0.0708, 0.0898, 0.1426]) -Greedy action tensor([ 1.7270, -0.3762, -0.8397, 0.2752]) tensor([0.6978, 0.0852, 0.0536, 0.1634]) -Greedy action tensor([ 1.1178, -0.3765, -0.4060, 0.1188]) tensor([0.5523, 0.1239, 0.1203, 0.2034]) -Greedy action tensor([ 1.1093, -0.1728, -0.8084, 0.1009]) tensor([0.5589, 0.1551, 0.0821, 0.2039]) -Greedy action tensor([ 1.1712, -0.6407, -0.4905, 0.4803]) tensor([0.5393, 0.0881, 0.1024, 0.2703]) -Greedy action tensor([ 1.1860, -0.3414, -0.5441, 0.3039]) tensor([0.5530, 0.1201, 0.0980, 0.2289]) -Greedy action tensor([ 1.9118, -0.2504, -0.3031, 0.3230]) tensor([0.7001, 0.0806, 0.0764, 0.1429]) -Greedy action tensor([ 1.4250, -0.0493, -0.7096, 0.1602]) tensor([0.6137, 0.1405, 0.0726, 0.1732]) -Greedy action tensor([ 0.9984, -0.6518, -0.5721, 0.6289]) tensor([0.4782, 0.0918, 0.0994, 0.3305]) -Greedy action tensor([ 2.1468, -0.8958, 0.2589, 0.8206]) tensor([0.6828, 0.0326, 0.1034, 0.1813]) -Greedy action tensor([ 0.9728, -0.1549, -0.1267, 0.2495]) tensor([0.4669, 0.1512, 0.1555, 0.2265]) -Greedy action tensor([ 1.2885, -0.3600, -0.4643, 0.3248]) tensor([0.5724, 0.1101, 0.0992, 0.2183]) -Greedy action tensor([ 1.4428, -0.9619, -0.3309, 0.7139]) tensor([0.5739, 0.0518, 0.0974, 0.2769]) -Greedy action tensor([ 1.5392, -0.6020, -0.5929, 0.2825]) tensor([0.6576, 0.0773, 0.0780, 0.1871]) -Greedy action tensor([ 1.1999, -0.2430, -1.5583, 0.3017]) tensor([0.5858, 0.1384, 0.0371, 0.2386]) -Greedy action tensor([ 1.2702, 0.1037, -0.0954, 0.4168]) tensor([0.5018, 0.1563, 0.1281, 0.2138]) -Greedy action tensor([ 1.0664, -0.3931, -0.5560, 0.1774]) tensor([0.5432, 0.1262, 0.1072, 0.2233]) -Greedy action tensor([ 1.4034, -0.4418, -0.4009, 0.2525]) tensor([0.6102, 0.0964, 0.1004, 0.1930]) -Greedy action tensor([ 1.5935, -0.5034, -0.0902, 0.5619]) tensor([0.6006, 0.0738, 0.1115, 0.2141]) -Greedy action tensor([ 1.4377, -0.3548, -1.2104, 0.3921]) tensor([0.6294, 0.1048, 0.0446, 0.2212]) -Greedy action tensor([ 1.3382, -0.6433, -0.5076, 0.0680]) tensor([0.6343, 0.0874, 0.1002, 0.1781]) -Greedy action tensor([ 1.6613, -0.1322, -0.4152, 0.1350]) tensor([0.6627, 0.1103, 0.0831, 0.1440]) -Greedy action tensor([ 1.6781, -0.9062, -0.4686, 0.3311]) tensor([0.6886, 0.0519, 0.0805, 0.1790]) -Greedy action tensor([ 1.4267, 0.0526, -1.0172, 0.3741]) tensor([0.5921, 0.1498, 0.0514, 0.2067]) -Greedy action tensor([ 1.5392, -0.3257, -0.5346, 0.3625]) tensor([0.6294, 0.0975, 0.0791, 0.1940]) -Greedy action tensor([ 1.3889, -0.0728, -1.0149, 0.0854]) tensor([0.6274, 0.1455, 0.0567, 0.1704]) -Greedy action tensor([ 1.7310, -0.8155, -0.3720, 0.4346]) tensor([0.6785, 0.0532, 0.0828, 0.1856]) -Greedy action tensor([ 1.0836, -0.1132, -0.1134, 0.0929]) tensor([0.5062, 0.1529, 0.1529, 0.1879]) -Greedy action tensor([ 1.7967, -0.6984, -0.7882, -0.0020]) tensor([0.7556, 0.0623, 0.0570, 0.1251]) -Greedy action tensor([ 1.9260, -0.6160, -0.4538, 0.8345]) tensor([0.6636, 0.0522, 0.0614, 0.2228]) -Greedy action tensor([ 1.6000, -0.7021, -0.5170, 0.2121]) tensor([0.6802, 0.0681, 0.0819, 0.1698]) -Greedy action tensor([ 1.3446, -0.7471, -0.2691, 0.3272]) tensor([0.5938, 0.0733, 0.1183, 0.2147]) -Greedy action tensor([ 2.0212, -1.3274, -0.2878, 0.4521]) tensor([0.7447, 0.0262, 0.0740, 0.1551]) -Greedy action tensor([ 1.0455, -0.7425, -0.0404, -0.1493]) tensor([0.5532, 0.0925, 0.1868, 0.1675]) -Greedy action tensor([ 1.7046, -0.2435, -0.6285, 0.6231]) tensor([0.6335, 0.0903, 0.0614, 0.2148]) -Greedy action tensor([ 1.7432, -0.8069, -0.0794, 0.1487]) tensor([0.6931, 0.0541, 0.1120, 0.1407]) -Greedy action tensor([ 1.2734e+00, -8.6893e-03, -1.1549e-03, -1.9405e-01]) tensor([0.5594, 0.1552, 0.1564, 0.1290]) -Greedy action tensor([ 1.7267, -0.3425, -0.6444, 0.2164]) tensor([0.6942, 0.0877, 0.0648, 0.1533]) -Greedy action tensor([ 1.3914, -0.3815, -0.8470, 0.4887]) tensor([0.5946, 0.1010, 0.0634, 0.2411]) -Greedy action tensor([ 1.5448, -0.1449, -0.1956, 0.6737]) tensor([0.5623, 0.1038, 0.0987, 0.2353]) -Greedy action tensor([ 1.4619, -0.3140, -0.4527, -0.0263]) tensor([0.6483, 0.1098, 0.0956, 0.1464]) -Greedy action tensor([ 1.3750, -0.5946, -0.7984, 0.6599]) tensor([0.5739, 0.0801, 0.0653, 0.2807]) -Greedy action tensor([ 1.9755, -0.5792, -0.7239, 0.5828]) tensor([0.7177, 0.0558, 0.0483, 0.1783]) -Greedy action tensor([ 1.1201, -0.4727, -0.7813, 0.3717]) tensor([0.5477, 0.1114, 0.0818, 0.2591]) -Greedy action tensor([ 1.7562, -0.6022, -0.1727, 0.2077]) tensor([0.6885, 0.0651, 0.1000, 0.1463]) -Greedy action tensor([ 1.5699, 0.1751, -0.0815, 0.2287]) tensor([0.5878, 0.1457, 0.1127, 0.1537]) -Greedy action tensor([ 1.2841, -0.1384, -0.3706, 0.5363]) tensor([0.5248, 0.1265, 0.1003, 0.2484]) -Greedy action tensor([ 1.7971, -0.5751, -0.2748, -0.1813]) tensor([0.7366, 0.0687, 0.0928, 0.1019]) -Greedy action tensor([ 1.6974, -0.6361, -0.5430, 0.1375]) tensor([0.7075, 0.0686, 0.0753, 0.1487]) -Greedy action tensor([ 1.6501, -0.5757, -0.1227, 0.1684]) tensor([0.6644, 0.0717, 0.1129, 0.1510]) -Greedy action tensor([ 1.3471, -0.5969, -0.6867, 0.0317]) tensor([0.6484, 0.0928, 0.0848, 0.1740]) -Greedy action tensor([ 1.6683, -0.5494, -0.3039, 0.1592]) tensor([0.6807, 0.0741, 0.0947, 0.1505]) -Greedy action tensor([ 1.1477, -0.3081, -0.3649, 0.5079]) tensor([0.5048, 0.1177, 0.1112, 0.2662]) -Greedy action tensor([ 1.7624, -1.1117, -0.5833, 0.1651]) tensor([0.7382, 0.0417, 0.0707, 0.1494]) -Greedy action tensor([ 1.7226, -0.5187, -0.4294, 0.5415]) tensor([0.6538, 0.0695, 0.0760, 0.2007]) -Greedy action tensor([ 1.9458, -0.9895, -0.3852, 0.7840]) tensor([0.6834, 0.0363, 0.0664, 0.2139]) -Greedy action tensor([ 2.0411, -1.2844, 0.0523, 0.9529]) tensor([0.6624, 0.0238, 0.0907, 0.2231]) -Greedy action tensor([ 1.4884, -0.6548, -0.4535, 0.4108]) tensor([0.6246, 0.0732, 0.0896, 0.2126]) -Greedy action tensor([ 1.6142, -0.6393, -0.6143, 0.4221]) tensor([0.6595, 0.0693, 0.0710, 0.2002]) -Greedy action tensor([ 2.5265, -0.0114, -0.3058, 0.0081]) tensor([0.8207, 0.0649, 0.0483, 0.0661]) -Greedy action tensor([ 0.8944, -0.2947, -1.0667, 0.6558]) tensor([0.4478, 0.1364, 0.0630, 0.3528]) -Greedy action tensor([ 1.5812, -0.6035, -0.6578, 0.6104]) tensor([0.6258, 0.0704, 0.0667, 0.2371]) -Greedy action tensor([ 1.0804, 0.1216, -0.4921, 0.0915]) tensor([0.5095, 0.1953, 0.1057, 0.1895]) -Greedy action tensor([ 1.4544, -0.3766, -0.4311, 0.3827]) tensor([0.6044, 0.0969, 0.0917, 0.2070]) -Greedy action tensor([ 1.1781, -0.2912, -0.9822, 0.3324]) tensor([0.5635, 0.1297, 0.0650, 0.2419]) -Greedy action tensor([ 1.5561, -0.1206, -1.0211, 0.4635]) tensor([0.6257, 0.1170, 0.0475, 0.2098]) -Greedy action tensor([ 1.5797, -0.2108, -1.0806, -0.0049]) tensor([0.6936, 0.1157, 0.0485, 0.1422]) -Greedy action tensor([ 1.6873, -0.8154, -0.4616, 0.1343]) tensor([0.7092, 0.0581, 0.0827, 0.1501]) -Greedy action tensor([ 1.6178, -0.7180, -0.4343, 0.1652]) tensor([0.6853, 0.0663, 0.0880, 0.1603]) -Greedy action tensor([ 2.4292, -1.3061, -0.3775, 0.9266]) tensor([0.7652, 0.0183, 0.0462, 0.1703]) -Greedy action tensor([ 0.9514, -0.4696, -0.3418, 0.3895]) tensor([0.4794, 0.1158, 0.1315, 0.2733]) -Greedy action tensor([ 1.5875, -0.5041, -0.0413, 0.5247]) tensor([0.6006, 0.0742, 0.1178, 0.2075]) -Greedy action tensor([ 1.4048, -0.2254, -0.7812, 0.2091]) tensor([0.6208, 0.1216, 0.0698, 0.1878]) -Greedy action tensor([ 2.1961, -0.3334, -0.4362, 0.2092]) tensor([0.7760, 0.0618, 0.0558, 0.1064]) -Greedy action tensor([ 1.4589, -0.6556, -0.0082, 0.3769]) tensor([0.5917, 0.0714, 0.1364, 0.2005]) -Greedy action tensor([-0.0244, -0.5079, -0.3043, -0.4428]) tensor([0.3300, 0.2035, 0.2494, 0.2172]) -Greedy action tensor([-0.4328, -1.7587, 0.1008, -0.3525]) tensor([0.2467, 0.0655, 0.4206, 0.2673]) -Greedy action tensor([ 0.3821, -1.0661, 0.4387, -0.0376]) tensor([0.3389, 0.0796, 0.3587, 0.2228]) -Greedy action tensor([ 0.8728, -1.2640, -0.3316, 0.2987]) tensor([0.5048, 0.0596, 0.1514, 0.2843]) -Greedy action tensor([-0.5982, 0.1092, 0.0838, -0.8850]) tensor([0.1737, 0.3524, 0.3435, 0.1304]) -Greedy action tensor([ 0.2657, -2.1831, 0.7792, 0.6451]) tensor([0.2370, 0.0205, 0.3961, 0.3464]) -Greedy action tensor([-0.7323, -0.9055, -0.1923, -1.0343]) tensor([0.2328, 0.1957, 0.3994, 0.1721]) -Greedy action tensor([ 0.5016, 0.3260, 0.5798, -0.6333]) tensor([0.3085, 0.2588, 0.3336, 0.0992]) -Greedy action tensor([-0.2788, -0.6573, -1.2777, -0.5139]) tensor([0.3517, 0.2408, 0.1295, 0.2780]) -Greedy action tensor([ 0.0092, -0.0285, -0.5947, 1.0971]) tensor([0.1826, 0.1758, 0.0998, 0.5418]) -Greedy action tensor([ 0.4742, -0.0696, -1.1154, 0.8583]) tensor([0.3074, 0.1785, 0.0627, 0.4514]) -Greedy action tensor([ 0.1219, -0.9726, 0.3454, -0.3904]) tensor([0.3140, 0.1051, 0.3927, 0.1881]) -Greedy action tensor([-0.2184, 0.1942, -0.1792, -0.7880]) tensor([0.2429, 0.3670, 0.2526, 0.1374]) -Greedy action tensor([-0.2459, -0.4205, -1.0891, -0.1046]) tensor([0.2922, 0.2454, 0.1258, 0.3366]) -Greedy action tensor([-1.2038, -0.0181, 0.2285, -0.7452]) tensor([0.0996, 0.3259, 0.4170, 0.1575]) -Greedy action tensor([-1.2031, -0.0606, 0.3804, -0.4269]) tensor([0.0894, 0.2804, 0.4358, 0.1944]) -Greedy action tensor([-0.7799, -0.8801, 0.1396, -0.9983]) tensor([0.1917, 0.1734, 0.4808, 0.1541]) -Greedy action tensor([-0.9446, -1.7044, 0.5130, -0.5926]) tensor([0.1392, 0.0651, 0.5978, 0.1979]) -Greedy action tensor([-0.7915, -2.1016, 0.6314, 1.1040]) tensor([0.0828, 0.0223, 0.3436, 0.5512]) -Greedy action tensor([-0.2480, -0.4963, -1.5324, 0.3051]) tensor([0.2635, 0.2055, 0.0729, 0.4581]) -Greedy action tensor([-1.1057, -0.6324, 0.2746, -0.9473]) tensor([0.1290, 0.2071, 0.5128, 0.1511]) -Greedy action tensor([ 0.2484, -1.3776, -0.5560, -0.0735]) tensor([0.4221, 0.0830, 0.1888, 0.3060]) -Greedy action tensor([ 1.4745, -1.7556, 0.4778, 0.6066]) tensor([0.5469, 0.0216, 0.2019, 0.2296]) -Greedy action tensor([ 0.6378, 0.2536, -0.1679, -0.0026]) tensor([0.3767, 0.2565, 0.1683, 0.1985]) -Greedy action tensor([ 0.9414, -0.1588, 0.1555, -0.6051]) tensor([0.4996, 0.1663, 0.2277, 0.1064]) -Greedy action tensor([ 1.0047, -0.4601, 0.4887, 1.0332]) tensor([0.3500, 0.0809, 0.2089, 0.3601]) -Greedy action tensor([-0.5960, -0.7974, -1.1228, 0.0314]) tensor([0.2336, 0.1910, 0.1379, 0.4375]) -Greedy action tensor([ 0.2602, 0.3507, 0.4325, -0.0369]) tensor([0.2484, 0.2719, 0.2951, 0.1845]) -Greedy action tensor([-0.0417, 0.2898, -0.8110, -0.1788]) tensor([0.2682, 0.3737, 0.1243, 0.2339]) -Greedy action tensor([-0.5699, -0.7423, -0.3355, -0.6851]) tensor([0.2502, 0.2106, 0.3163, 0.2230]) -Greedy action tensor([-0.7181, -0.3922, 0.1309, -0.1899]) tensor([0.1558, 0.2158, 0.3642, 0.2642]) -Greedy action tensor([-0.3642, 0.1060, -0.4624, 0.3770]) tensor([0.1784, 0.2855, 0.1617, 0.3744]) -Greedy action tensor([-0.5143, -0.5839, -0.8501, -1.4903]) tensor([0.3307, 0.3084, 0.2363, 0.1246]) -Greedy action tensor([ 1.0851, -0.6690, 1.0895, 0.9193]) tensor([0.3306, 0.0572, 0.3321, 0.2801]) -Greedy action tensor([ 1.3483, -0.5729, -0.0616, 1.5324]) tensor([0.3857, 0.0565, 0.0942, 0.4637]) -Greedy action tensor([-0.0596, 0.3614, -0.7671, -0.1217]) tensor([0.2528, 0.3851, 0.1246, 0.2376]) -Greedy action tensor([-0.4368, -0.0535, 0.5909, -0.8647]) tensor([0.1691, 0.2481, 0.4726, 0.1102]) -Greedy action tensor([ 0.6245, -0.2938, -0.4057, -0.4432]) tensor([0.4762, 0.1901, 0.1700, 0.1637]) -Greedy action tensor([ 1.2769, -0.1883, 1.1545, 0.3503]) tensor([0.3981, 0.0920, 0.3523, 0.1576]) -Greedy action tensor([-0.3548, -0.3131, 0.0451, -0.7509]) tensor([0.2377, 0.2478, 0.3546, 0.1600]) -Greedy action tensor([ 1.3385, -0.0846, 0.4251, -0.7225]) tensor([0.5652, 0.1362, 0.2267, 0.0720]) -Greedy action tensor([-0.6764, -0.2357, 0.7256, -0.8584]) tensor([0.1342, 0.2085, 0.5454, 0.1119]) -Greedy action tensor([-0.3775, -0.7166, 1.3927, -1.6594]) tensor([0.1272, 0.0906, 0.7469, 0.0353]) -Greedy action tensor([-0.5810, -1.8907, -0.6980, -0.1721]) tensor([0.2729, 0.0737, 0.2428, 0.4107]) -Greedy action tensor([-0.0219, -0.0105, 0.6390, -0.8879]) tensor([0.2289, 0.2315, 0.4433, 0.0963]) -Greedy action tensor([-0.1381, -0.4449, 0.1497, 0.1138]) tensor([0.2296, 0.1689, 0.3061, 0.2954]) -Greedy action tensor([-0.6522, -0.1455, -0.6122, -1.0090]) tensor([0.2272, 0.3772, 0.2365, 0.1591]) -Greedy action tensor([-0.3817, 0.2161, -1.0211, -0.5309]) tensor([0.2377, 0.4321, 0.1254, 0.2047]) -Greedy action tensor([-0.9950, -0.2419, 0.2757, -0.8334]) tensor([0.1272, 0.2701, 0.4532, 0.1495]) -Greedy action tensor([-0.0866, 0.4121, 0.7023, -0.1355]) tensor([0.1724, 0.2839, 0.3795, 0.1642]) -Greedy action tensor([ 0.2099, -0.2259, -0.3048, -0.4521]) tensor([0.3623, 0.2343, 0.2165, 0.1869]) -Greedy action tensor([-0.4245, -0.5323, -0.1854, -0.2519]) tensor([0.2295, 0.2061, 0.2916, 0.2728]) -Greedy action tensor([-0.0051, -0.6396, 0.1861, -0.4093]) tensor([0.2934, 0.1556, 0.3552, 0.1959]) -Greedy action tensor([-0.8080, -0.3786, 0.2984, -0.9264]) tensor([0.1551, 0.2383, 0.4689, 0.1378]) -Greedy action tensor([ 0.9466, -1.1901, 0.0307, 0.5644]) tensor([0.4544, 0.0536, 0.1818, 0.3101]) -Greedy action tensor([-0.2384, -0.5274, -0.5945, 0.1299]) tensor([0.2568, 0.1923, 0.1798, 0.3711]) -Greedy action tensor([ 0.5150, -1.4118, 0.1559, 0.0331]) tensor([0.4063, 0.0592, 0.2837, 0.2509]) -Greedy action tensor([ 0.0994, -1.1699, -0.3578, -0.5651]) tensor([0.4118, 0.1157, 0.2607, 0.2119]) -Greedy action tensor([ 0.2720, -0.5414, -0.3063, -0.3343]) tensor([0.3922, 0.1739, 0.2200, 0.2139]) -Greedy action tensor([-0.0022, -0.5441, 0.9688, -0.9678]) tensor([0.2173, 0.1264, 0.5737, 0.0827]) -Greedy action tensor([-0.4189, -0.3067, -0.3858, -0.7579]) tensor([0.2587, 0.2895, 0.2675, 0.1844]) -Greedy action tensor([-1.5079, 0.4044, 0.9317, -0.6774]) tensor([0.0464, 0.3144, 0.5326, 0.1066]) -Greedy action tensor([ 1.0815, -0.3160, 0.2083, -0.1585]) tensor([0.5117, 0.1265, 0.2137, 0.1481]) -Greedy action tensor([ 0.1448, -0.0832, 0.5584, 0.2132]) tensor([0.2284, 0.1818, 0.3453, 0.2445]) -Greedy action tensor([ 1.2323, -0.7942, 0.7153, 0.4825]) tensor([0.4544, 0.0599, 0.2710, 0.2147]) -Greedy action tensor([ 1.0423, -0.2616, 0.5679, 1.1626]) tensor([0.3310, 0.0898, 0.2059, 0.3733]) -Greedy action tensor([-1.0240, -0.7051, 0.6488, 0.0692]) tensor([0.0936, 0.1287, 0.4985, 0.2792]) -Greedy action tensor([ 0.2169, 0.1657, -0.4833, -0.0411]) tensor([0.3106, 0.2951, 0.1542, 0.2400]) -Greedy action tensor([ 1.0681, -0.9051, -0.4241, 0.6133]) tensor([0.5004, 0.0696, 0.1125, 0.3175]) -Greedy action tensor([ 0.5565, 0.3183, 0.1136, -0.2529]) tensor([0.3478, 0.2741, 0.2233, 0.1548]) -Greedy action tensor([-0.2661, -0.6390, 0.0915, -0.6999]) tensor([0.2655, 0.1829, 0.3796, 0.1720]) -Greedy action tensor([-0.6831, -0.6557, -0.0262, -0.9452]) tensor([0.2116, 0.2175, 0.4081, 0.1628]) -Greedy action tensor([-0.5257, -0.6542, 0.4112, -0.0653]) tensor([0.1662, 0.1462, 0.4242, 0.2634]) -Greedy action tensor([ 0.8640, -1.2748, 1.0741, -0.4226]) tensor([0.3806, 0.0448, 0.4695, 0.1051]) -Greedy action tensor([ 0.5097, -1.5464, -0.1102, 1.3033]) tensor([0.2579, 0.0330, 0.1388, 0.5704]) -Greedy action tensor([ 0.2993, -1.3321, -0.1914, -0.7644]) tensor([0.4645, 0.0909, 0.2843, 0.1603]) -Greedy action tensor([ 0.0254, -1.3977, -0.3740, 0.0201]) tensor([0.3441, 0.0829, 0.2308, 0.3423]) -Greedy action tensor([-0.8754, -1.4574, -0.1536, 0.4273]) tensor([0.1371, 0.0766, 0.2821, 0.5043]) -Greedy action tensor([ 0.2806, -0.4246, 0.0390, 0.6932]) tensor([0.2639, 0.1303, 0.2072, 0.3986]) -Greedy action tensor([-0.0065, -0.4267, -0.0297, -0.7496]) tensor([0.3216, 0.2113, 0.3142, 0.1530]) -Greedy action tensor([-0.0607, -1.3013, 0.1235, 0.5265]) tensor([0.2331, 0.0674, 0.2802, 0.4193]) -Greedy action tensor([ 0.8060, -0.5429, -0.0224, -0.1682]) tensor([0.4822, 0.1251, 0.2106, 0.1820]) -Greedy action tensor([ 1.0489, -0.7974, 0.0377, -0.8056]) tensor([0.5959, 0.0940, 0.2168, 0.0933]) -Greedy action tensor([ 0.5720, -0.3788, -0.0559, -0.2608]) tensor([0.4246, 0.1641, 0.2266, 0.1846]) -Greedy action tensor([ 0.9562, -0.8182, -0.0165, -0.4433]) tensor([0.5573, 0.0945, 0.2107, 0.1375]) -Greedy action tensor([ 0.6956, -0.4793, -0.1210, -0.5090]) tensor([0.4877, 0.1506, 0.2155, 0.1462]) -Greedy action tensor([ 0.8146, -0.5984, 0.0515, -0.2545]) tensor([0.4871, 0.1186, 0.2271, 0.1672]) -Greedy action tensor([ 0.7373, -0.4551, 0.0609, -0.4562]) tensor([0.4728, 0.1435, 0.2404, 0.1433]) -Greedy action tensor([ 0.6879, -0.1775, -0.1020, -0.2756]) tensor([0.4432, 0.1865, 0.2012, 0.1691]) -Greedy action tensor([ 0.8757, -0.9485, 0.1558, -0.3950]) tensor([0.5185, 0.0837, 0.2524, 0.1455]) -Greedy action tensor([ 0.9194, -0.5159, 0.1089, -0.4145]) tensor([0.5139, 0.1223, 0.2285, 0.1354]) -Greedy action tensor([ 0.9420, -0.7043, 0.0571, -0.3957]) tensor([0.5353, 0.1032, 0.2210, 0.1405]) -Greedy action tensor([ 0.3821, -0.0371, -0.1620, -0.1324]) tensor([0.3526, 0.2319, 0.2047, 0.2108]) -Greedy action tensor([ 0.7680, -0.4338, -0.1583, -0.1834]) tensor([0.4801, 0.1443, 0.1901, 0.1854]) -Greedy action tensor([ 4.5246e-01, -2.6922e-02, -3.2365e-04, -6.7268e-03]) tensor([0.3464, 0.2145, 0.2203, 0.2189]) -Greedy action tensor([ 0.4447, -0.1115, -0.0654, -0.0527]) tensor([0.3595, 0.2061, 0.2158, 0.2186]) -Greedy action tensor([ 0.7756, -0.4235, -0.0476, -0.2732]) tensor([0.4783, 0.1442, 0.2100, 0.1676]) -Greedy action tensor([0.6777, 0.1330, 0.0479, 0.0154]) tensor([0.3805, 0.2207, 0.2027, 0.1962]) -Greedy action tensor([ 1.0951, -0.7352, -0.0645, -0.4612]) tensor([0.5935, 0.0952, 0.1861, 0.1252]) -Greedy action tensor([ 0.7448, -0.7047, -0.1189, -0.1923]) tensor([0.4883, 0.1146, 0.2059, 0.1913]) -Greedy action tensor([ 0.9373, -0.3383, -0.2366, -0.0890]) tensor([0.5137, 0.1435, 0.1588, 0.1841]) -Greedy action tensor([ 1.0848, -0.8752, 0.0145, -0.6479]) tensor([0.6022, 0.0848, 0.2065, 0.1065]) -Greedy action tensor([ 0.7827, -0.4251, 0.0728, -0.3460]) tensor([0.4730, 0.1414, 0.2326, 0.1530]) -Greedy action tensor([ 0.8178, -0.3824, -0.2078, -0.4378]) tensor([0.5142, 0.1549, 0.1844, 0.1465]) -Greedy action tensor([ 0.9046, -0.6423, -0.1000, -0.4243]) tensor([0.5423, 0.1155, 0.1986, 0.1436]) -Greedy action tensor([ 0.9321, -0.6053, -0.2556, -0.8486]) tensor([0.5923, 0.1273, 0.1806, 0.0998]) -Greedy action tensor([ 0.8109, -0.4800, -0.1559, -0.2882]) tensor([0.5029, 0.1383, 0.1913, 0.1675]) -Greedy action tensor([ 0.6808, -0.6953, 0.2311, -0.8258]) tensor([0.4735, 0.1196, 0.3020, 0.1050]) -Greedy action tensor([ 0.8647, -0.7887, -0.0425, -0.4478]) tensor([0.5364, 0.1027, 0.2165, 0.1444]) -Greedy action tensor([ 5.6037e-01, -4.1548e-01, 1.6479e-04, -2.7965e-01]) tensor([0.4202, 0.1584, 0.2400, 0.1814]) -Greedy action tensor([ 0.4913, 0.2452, -0.1934, 0.0164]) tensor([0.3439, 0.2689, 0.1734, 0.2139]) -Greedy action tensor([ 0.6023, -0.2911, -0.1508, -0.6714]) tensor([0.4630, 0.1895, 0.2180, 0.1295]) -Greedy action tensor([ 0.4888, -0.2095, 0.0330, -0.1589]) tensor([0.3767, 0.1874, 0.2388, 0.1971]) -Greedy action tensor([ 1.0703, -1.0474, 0.1480, -0.6069]) tensor([0.5866, 0.0706, 0.2332, 0.1096]) -Greedy action tensor([ 0.6172, -0.3494, 0.0408, -0.6131]) tensor([0.4475, 0.1702, 0.2515, 0.1308]) -Greedy action tensor([ 0.9221, -0.6020, -0.1382, -0.3807]) tensor([0.5447, 0.1186, 0.1887, 0.1480]) -Greedy action tensor([ 0.9156, -0.6467, -0.0495, -0.4305]) tensor([0.5403, 0.1133, 0.2058, 0.1406]) -Greedy action tensor([ 0.7999, -0.3619, -0.0192, -0.1483]) tensor([0.4670, 0.1461, 0.2059, 0.1810]) -Greedy action tensor([ 1.2626, -0.5527, -0.1029, -0.3849]) tensor([0.6209, 0.1011, 0.1585, 0.1195]) -Greedy action tensor([ 0.7026, -0.5531, -0.0733, -0.3131]) tensor([0.4745, 0.1352, 0.2184, 0.1719]) -Greedy action tensor([ 0.9955, -0.8443, 0.0674, -0.6037]) tensor([0.5694, 0.0905, 0.2251, 0.1151]) -Greedy action tensor([ 0.5870, -0.3686, -0.0401, -0.0377]) tensor([0.4075, 0.1567, 0.2176, 0.2182]) -Greedy action tensor([ 0.9368, -0.4242, 0.0271, -0.3641]) tensor([0.5178, 0.1328, 0.2085, 0.1410]) -Greedy action tensor([ 0.8351, -0.5751, 0.0367, -0.2578]) tensor([0.4928, 0.1203, 0.2218, 0.1652]) -Greedy action tensor([ 0.8874, -0.7101, -0.0678, -0.5084]) tensor([0.5450, 0.1103, 0.2097, 0.1350]) -Greedy action tensor([ 0.8918, -0.6698, -0.1145, -0.3407]) tensor([0.5356, 0.1124, 0.1958, 0.1562]) -Greedy action tensor([ 0.7595, -0.3311, -0.0312, -0.2990]) tensor([0.4681, 0.1573, 0.2123, 0.1624]) -Greedy action tensor([ 0.7897, -0.2842, -0.0832, -0.3613]) tensor([0.4818, 0.1646, 0.2012, 0.1524]) -Greedy action tensor([ 0.4318, -0.1177, -0.0145, -0.0351]) tensor([0.3516, 0.2029, 0.2250, 0.2204]) -Greedy action tensor([ 0.6967, -0.5347, -0.0311, -0.3866]) tensor([0.4732, 0.1381, 0.2285, 0.1602]) -Greedy action tensor([ 0.8004, -0.2452, 0.0039, -0.1041]) tensor([0.4531, 0.1592, 0.2043, 0.1834]) -Greedy action tensor([ 0.8706, -0.5618, -0.1168, -0.2447]) tensor([0.5157, 0.1231, 0.1921, 0.1691]) -Greedy action tensor([ 1.0765, -0.7918, 0.0143, -0.6243]) tensor([0.5943, 0.0918, 0.2055, 0.1085]) -Greedy action tensor([ 0.6982, -0.6406, -0.1237, -0.6320]) tensor([0.5086, 0.1333, 0.2236, 0.1345]) -Greedy action tensor([ 0.8885, -0.3636, 0.0959, -0.6691]) tensor([0.5130, 0.1467, 0.2322, 0.1081]) -Greedy action tensor([ 0.7551, -0.3235, 0.1032, -0.3004]) tensor([0.4527, 0.1539, 0.2359, 0.1575]) -Greedy action tensor([ 0.6642, -0.4565, 0.1514, -0.4588]) tensor([0.4444, 0.1449, 0.2661, 0.1446]) -Greedy action tensor([ 0.9434, -0.4435, -0.0583, -0.3661]) tensor([0.5299, 0.1324, 0.1946, 0.1431]) -Greedy action tensor([ 0.8064, -0.7128, -0.0940, -0.1642]) tensor([0.4990, 0.1092, 0.2028, 0.1890]) -Greedy action tensor([ 0.6658, -0.4096, 0.0456, -0.1859]) tensor([0.4337, 0.1480, 0.2333, 0.1851]) -Greedy action tensor([ 1.1869, -0.6958, 0.1007, -0.6010]) tensor([0.6035, 0.0918, 0.2037, 0.1010]) -Greedy action tensor([ 0.7669, -0.1575, -0.0683, -0.1090]) tensor([0.4450, 0.1766, 0.1931, 0.1853]) -Greedy action tensor([ 0.7557, -0.4153, -0.1073, -0.1550]) tensor([0.4686, 0.1453, 0.1977, 0.1885]) -Greedy action tensor([ 0.7992, -0.5293, -0.0221, -0.5126]) tensor([0.5066, 0.1342, 0.2228, 0.1364]) -Greedy action tensor([ 0.9082, -0.6112, 0.0201, -0.5932]) tensor([0.5396, 0.1181, 0.2220, 0.1202]) -Greedy action tensor([ 0.6982, -0.4161, -0.0484, -0.1421]) tensor([0.4477, 0.1469, 0.2122, 0.1932]) -Greedy action tensor([ 0.7374, -0.7898, -0.0952, -0.3767]) tensor([0.5050, 0.1097, 0.2196, 0.1657]) -Greedy action tensor([ 0.3147, 0.1069, -0.0925, -0.0603]) tensor([0.3159, 0.2567, 0.2102, 0.2171]) -Greedy action tensor([ 0.9776, -1.0205, 0.0166, -0.4702]) tensor([0.5704, 0.0773, 0.2182, 0.1341]) -Greedy action tensor([ 1.2241, -0.6603, 0.0117, -0.5646]) tensor([0.6186, 0.0940, 0.1840, 0.1034]) -Greedy action tensor([ 0.7939, -0.6358, 0.0175, -0.2868]) tensor([0.4905, 0.1174, 0.2257, 0.1664]) -Greedy action tensor([ 0.8900, -0.7276, -0.0412, -0.2994]) tensor([0.5272, 0.1046, 0.2078, 0.1605]) -Greedy action tensor([ 0.7539, -0.2659, -0.0119, -0.1260]) tensor([0.4464, 0.1610, 0.2075, 0.1851]) -Greedy action tensor([ 1.0605, -0.4657, -0.1483, -0.6225]) tensor([0.5876, 0.1277, 0.1754, 0.1092]) -Greedy action tensor([ 0.3260, 0.0134, -0.0105, -0.1376]) tensor([0.3252, 0.2379, 0.2323, 0.2046]) -Greedy action tensor([ 0.4486, -0.0901, -0.0340, -0.2389]) tensor([0.3699, 0.2158, 0.2283, 0.1860]) -Greedy action tensor([ 1.3503, -0.7767, -0.0486, -1.0993]) tensor([0.6885, 0.0821, 0.1700, 0.0594]) -Greedy action tensor([ 0.7958, -0.2787, -0.0896, -0.0252]) tensor([0.4558, 0.1556, 0.1880, 0.2005]) -Greedy action tensor([ 0.6064, -0.2525, -0.0390, -0.3548]) tensor([0.4291, 0.1818, 0.2250, 0.1641]) -Greedy action tensor([ 0.7196, -0.2804, -0.1912, -0.1586]) tensor([0.4575, 0.1683, 0.1840, 0.1901]) -Greedy action tensor([ 0.7723, -0.4519, 0.0360, -0.3750]) tensor([0.4784, 0.1406, 0.2291, 0.1519]) -Greedy action tensor([ 0.9715, -0.7777, 0.0617, -0.4744]) tensor([0.5518, 0.0960, 0.2222, 0.1300]) -Greedy action tensor([-1.9113, -0.4272, 0.6439, -0.1602]) tensor([0.0416, 0.1834, 0.5354, 0.2396]) -Greedy action tensor([-1.8867, -0.3396, 0.6332, -0.1964]) tensor([0.0425, 0.1995, 0.5278, 0.2302]) -Greedy action tensor([-1.8001, 0.0447, 0.5174, -0.1140]) tensor([0.0437, 0.2766, 0.4437, 0.2360]) -Greedy action tensor([-0.7375, -0.4897, 0.4288, 0.5517]) tensor([0.1096, 0.1405, 0.3519, 0.3980]) -Greedy action tensor([-1.9127, -0.4329, 0.6536, -0.1617]) tensor([0.0414, 0.1817, 0.5386, 0.2383]) -Greedy action tensor([-1.9148, -0.4093, 0.6494, -0.1585]) tensor([0.0412, 0.1855, 0.5349, 0.2384]) -Greedy action tensor([-1.7559, 0.1091, 0.5032, -0.1429]) tensor([0.0454, 0.2928, 0.4343, 0.2276]) -Greedy action tensor([-1.9208, -0.4243, 0.6561, -0.1669]) tensor([0.0410, 0.1830, 0.5392, 0.2368]) -Greedy action tensor([-1.8492, -0.4294, 0.6193, -0.1333]) tensor([0.0444, 0.1838, 0.5246, 0.2472]) -Greedy action tensor([-1.9037, -0.4313, 0.6522, -0.1558]) tensor([0.0417, 0.1818, 0.5371, 0.2394]) -Greedy action tensor([-1.7143, -0.4785, 0.5436, -0.0603]) tensor([0.0520, 0.1789, 0.4972, 0.2718]) -Greedy action tensor([-1.9337, -0.4456, 0.6589, -0.1759]) tensor([0.0407, 0.1801, 0.5434, 0.2358]) -Greedy action tensor([-1.7183, -0.4296, 0.5579, -0.0718]) tensor([0.0511, 0.1855, 0.4980, 0.2653]) -Greedy action tensor([-1.9199, -0.4338, 0.6566, -0.1671]) tensor([0.0411, 0.1816, 0.5403, 0.2371]) -Greedy action tensor([-1.8711, -0.2762, 0.6098, -0.1395]) tensor([0.0425, 0.2094, 0.5080, 0.2401]) -Greedy action tensor([-1.9147, -0.4501, 0.6547, -0.1684]) tensor([0.0415, 0.1794, 0.5414, 0.2377]) -Greedy action tensor([-1.8145, -0.2629, 0.5835, -0.1483]) tensor([0.0454, 0.2144, 0.4998, 0.2404]) -Greedy action tensor([-1.9221, -0.3892, 0.6436, -0.1668]) tensor([0.0409, 0.1896, 0.5326, 0.2368]) -Greedy action tensor([-1.8782, -0.4493, 0.6353, -0.1469]) tensor([0.0432, 0.1801, 0.5329, 0.2438]) -Greedy action tensor([-1.7999, -0.3880, 0.5883, -0.1193]) tensor([0.0468, 0.1921, 0.5099, 0.2513]) -Greedy action tensor([-1.9285, -0.4597, 0.6577, -0.1730]) tensor([0.0410, 0.1780, 0.5440, 0.2370]) -Greedy action tensor([-1.8442, -0.3729, 0.6076, -0.1354]) tensor([0.0445, 0.1937, 0.5163, 0.2456]) -Greedy action tensor([-1.8470, -0.4545, 0.6157, -0.1500]) tensor([0.0450, 0.1811, 0.5282, 0.2456]) -Greedy action tensor([-1.9300, -0.4439, 0.6670, -0.1640]) tensor([0.0405, 0.1790, 0.5437, 0.2368]) -Greedy action tensor([-1.8115, -0.4461, 0.6015, -0.1149]) tensor([0.0464, 0.1819, 0.5184, 0.2533]) -Greedy action tensor([-1.8959, -0.4111, 0.6378, -0.1592]) tensor([0.0422, 0.1863, 0.5318, 0.2397]) -Greedy action tensor([-1.4990, -0.5290, 0.4834, 0.0707]) tensor([0.0637, 0.1680, 0.4623, 0.3060]) -Greedy action tensor([-1.9119, -0.4227, 0.6502, -0.1631]) tensor([0.0414, 0.1836, 0.5369, 0.2381]) -Greedy action tensor([-1.8744, -0.2957, 0.6220, -0.1411]) tensor([0.0423, 0.2050, 0.5133, 0.2393]) -Greedy action tensor([-1.8893, -0.4476, 0.6401, -0.1520]) tensor([0.0426, 0.1803, 0.5349, 0.2422]) -Greedy action tensor([-0.6685, 0.3592, 0.1437, 0.1368]) tensor([0.1207, 0.3373, 0.2719, 0.2701]) -Greedy action tensor([-1.9041, -0.4473, 0.6514, -0.1583]) tensor([0.0418, 0.1796, 0.5388, 0.2398]) -Greedy action tensor([-1.8310, -0.2150, 0.5852, -0.1754]) tensor([0.0445, 0.2240, 0.4985, 0.2330]) -Greedy action tensor([-1.6814, 0.2470, 0.4680, -0.0048]) tensor([0.0459, 0.3155, 0.3935, 0.2452]) -Greedy action tensor([-1.9009, -0.4445, 0.6445, -0.1518]) tensor([0.0420, 0.1804, 0.5359, 0.2417]) -Greedy action tensor([-0.4458, 0.9048, -0.0385, 0.2001]) tensor([0.1209, 0.4667, 0.1817, 0.2307]) -Greedy action tensor([-1.0104, 0.0157, 0.2434, -0.0345]) tensor([0.1005, 0.2805, 0.3522, 0.2668]) -Greedy action tensor([-1.7863, -0.5013, 0.6061, -0.0472]) tensor([0.0471, 0.1701, 0.5149, 0.2679]) -Greedy action tensor([-0.9416, -0.1009, 0.6944, 0.9521]) tensor([0.0662, 0.1535, 0.3401, 0.4401]) -Greedy action tensor([-1.7242, -0.3363, 0.5402, -0.0986]) tensor([0.0507, 0.2032, 0.4883, 0.2578]) -Greedy action tensor([-1.8807, -0.4486, 0.6400, -0.1480]) tensor([0.0430, 0.1799, 0.5342, 0.2429]) -Greedy action tensor([-1.8426, -0.4434, 0.6181, -0.1307]) tensor([0.0448, 0.1817, 0.5251, 0.2484]) -Greedy action tensor([-1.8277, -0.3533, 0.6163, -0.1030]) tensor([0.0444, 0.1942, 0.5120, 0.2494]) -Greedy action tensor([-1.8933, -0.2527, 0.6170, -0.1731]) tensor([0.0416, 0.2145, 0.5117, 0.2322]) -Greedy action tensor([-1.8444, -0.2292, 0.6080, -0.1236]) tensor([0.0430, 0.2165, 0.5000, 0.2406]) -Greedy action tensor([-1.9414, -0.4437, 0.6665, -0.1789]) tensor([0.0402, 0.1798, 0.5457, 0.2343]) -Greedy action tensor([-1.0891, 0.2130, 0.4547, 0.1778]) tensor([0.0775, 0.2848, 0.3627, 0.2750]) -Greedy action tensor([-1.8941, -0.3442, 0.6377, -0.1500]) tensor([0.0417, 0.1962, 0.5238, 0.2383]) -Greedy action tensor([-1.1710, 0.2313, 0.5102, 0.3325]) tensor([0.0670, 0.2722, 0.3597, 0.3012]) -Greedy action tensor([-1.6665, -0.3128, 0.5019, -0.0032]) tensor([0.0529, 0.2049, 0.4629, 0.2793]) -Greedy action tensor([-1.7687, -0.3497, 0.6511, 0.0145]) tensor([0.0448, 0.1851, 0.5036, 0.2665]) -Greedy action tensor([-1.8846, -0.4252, 0.6448, -0.1418]) tensor([0.0424, 0.1826, 0.5325, 0.2425]) -Greedy action tensor([-1.7138, -0.2186, 0.5268, -0.1015]) tensor([0.0503, 0.2244, 0.4729, 0.2523]) -Greedy action tensor([-0.7601, 0.4466, 0.0223, -0.1747]) tensor([0.1201, 0.4015, 0.2627, 0.2157]) -Greedy action tensor([-1.9299, -0.4301, 0.6607, -0.1724]) tensor([0.0406, 0.1820, 0.5418, 0.2355]) -Greedy action tensor([-1.8891, -0.3590, 0.6442, -0.1481]) tensor([0.0418, 0.1931, 0.5266, 0.2384]) -Greedy action tensor([-1.7671, -0.3662, 0.5840, -0.1126]) tensor([0.0481, 0.1953, 0.5050, 0.2516]) -Greedy action tensor([-1.8751, -0.4470, 0.6370, -0.1434]) tensor([0.0432, 0.1802, 0.5326, 0.2441]) -Greedy action tensor([-1.9007, -0.4403, 0.6478, -0.1570]) tensor([0.0420, 0.1809, 0.5370, 0.2401]) -Greedy action tensor([-1.8884, -0.4539, 0.6517, -0.1463]) tensor([0.0424, 0.1780, 0.5376, 0.2421]) -Greedy action tensor([-1.9288, -0.4430, 0.6603, -0.1719]) tensor([0.0408, 0.1801, 0.5429, 0.2362]) -Greedy action tensor([-1.8759, -0.4638, 0.6409, -0.1400]) tensor([0.0432, 0.1772, 0.5347, 0.2449]) -Greedy action tensor([-1.0347, 0.3878, 0.5352, 0.5765]) tensor([0.0668, 0.2772, 0.3212, 0.3348]) -Greedy action tensor([-1.9061, -0.3656, 0.6438, -0.1592]) tensor([0.0413, 0.1928, 0.5290, 0.2370]) -Greedy action tensor([0.4367, 0.3260, 0.0859, 0.3799]) tensor([0.2822, 0.2526, 0.1987, 0.2666]) -Greedy action tensor([-1.9259, -0.4360, 0.6564, -0.1705]) tensor([0.0409, 0.1815, 0.5410, 0.2366]) -Greedy action tensor([-0.3006, 0.4872, 0.4486, 0.4834]) tensor([0.1333, 0.2930, 0.2819, 0.2919]) -Greedy action tensor([-1.8849, -0.3421, 0.6250, -0.1432]) tensor([0.0422, 0.1975, 0.5194, 0.2409]) -Greedy action tensor([-1.6585, -0.3126, 0.5266, -0.0033]) tensor([0.0527, 0.2025, 0.4688, 0.2760]) -Greedy action tensor([-1.0758, 0.5304, 0.1707, 0.0029]) tensor([0.0806, 0.4018, 0.2804, 0.2371]) -Greedy action tensor([-1.8289, -0.4656, 0.6144, -0.1257]) tensor([0.0456, 0.1784, 0.5253, 0.2506]) -Greedy action tensor([-1.8299, -0.0623, 0.5511, -0.0997]) tensor([0.0429, 0.2512, 0.4639, 0.2420]) -Greedy action tensor([-1.9182, -0.4393, 0.6670, -0.1600]) tensor([0.0409, 0.1794, 0.5424, 0.2372]) -Greedy action tensor([-1.8692, -0.4535, 0.6338, -0.1462]) tensor([0.0436, 0.1796, 0.5327, 0.2442]) -Greedy action tensor([-1.9360, -0.4419, 0.6625, -0.1759]) tensor([0.0405, 0.1803, 0.5440, 0.2352]) -Greedy action tensor([-1.8790, -0.4381, 0.6358, -0.1503]) tensor([0.0431, 0.1819, 0.5324, 0.2426]) -Greedy action tensor([-1.9280, -0.4286, 0.6604, -0.1714]) tensor([0.0407, 0.1822, 0.5415, 0.2357]) -Greedy action tensor([-1.7195, -0.1819, 0.5278, -0.1196]) tensor([0.0498, 0.2319, 0.4715, 0.2468]) -Greedy action tensor([-1.6540, 0.2890, 0.4212, -0.0521]) tensor([0.0478, 0.3338, 0.3810, 0.2373]) -Greedy action tensor([-1.4103, -0.6047, 0.4027, 0.0844]) tensor([0.0723, 0.1619, 0.4433, 0.3225]) -Greedy action tensor([-1.9359, -0.4406, 0.6638, -0.1753]) tensor([0.0404, 0.1803, 0.5441, 0.2351]) -Greedy action tensor([ 2.2866, -0.9638, -0.5781, 0.1094]) tensor([0.8270, 0.0321, 0.0471, 0.0938]) -Greedy action tensor([1.7320, 0.4609, 0.1071, 0.1574]) tensor([0.5936, 0.1665, 0.1169, 0.1229]) -Greedy action tensor([ 1.2348, -0.2322, -0.5619, 0.0960]) tensor([0.5825, 0.1343, 0.0966, 0.1865]) -Greedy action tensor([ 2.4346e+00, 6.0323e-01, -2.3234e-01, -2.3476e-03]) tensor([0.7593, 0.1216, 0.0527, 0.0664]) -Greedy action tensor([ 1.5145, -0.4820, -0.5648, 0.6469]) tensor([0.5950, 0.0808, 0.0744, 0.2499]) -Greedy action tensor([ 1.7429, -0.3061, -0.4426, 0.2123]) tensor([0.6860, 0.0884, 0.0771, 0.1485]) -Greedy action tensor([ 1.3483, -0.4564, -0.0570, 0.3227]) tensor([0.5655, 0.0930, 0.1387, 0.2028]) -Greedy action tensor([ 1.1269, -0.2832, -0.8818, 0.1191]) tensor([0.5736, 0.1400, 0.0770, 0.2094]) -Greedy action tensor([ 1.5180, -0.5059, -0.2109, 0.2753]) tensor([0.6257, 0.0827, 0.1110, 0.1806]) -Greedy action tensor([ 1.5411, -0.4897, -0.5726, 0.3444]) tensor([0.6434, 0.0844, 0.0777, 0.1944]) -Greedy action tensor([ 1.0823e+00, -6.7878e-04, 5.2626e-02, 1.3101e-01]) tensor([0.4803, 0.1626, 0.1715, 0.1855]) -Greedy action tensor([ 1.4884, -0.8133, -0.5714, 0.2427]) tensor([0.6599, 0.0661, 0.0841, 0.1899]) -Greedy action tensor([ 1.4075, -0.5078, -0.2742, 0.5547]) tensor([0.5683, 0.0837, 0.1057, 0.2422]) -Greedy action tensor([ 1.0607, -0.2155, 0.0147, 0.0369]) tensor([0.5026, 0.1403, 0.1766, 0.1806]) -Greedy action tensor([ 1.3278, 0.3425, -0.9692, 0.2875]) tensor([0.5473, 0.2043, 0.0550, 0.1934]) -Greedy action tensor([ 1.0522, -0.6319, -0.1171, -0.2191]) tensor([0.5629, 0.1045, 0.1748, 0.1579]) -Greedy action tensor([ 1.5068, -0.7063, -0.8212, 0.2473]) tensor([0.6709, 0.0734, 0.0654, 0.1904]) -Greedy action tensor([ 1.6308, -0.7256, 0.0522, 0.1442]) tensor([0.6548, 0.0620, 0.1351, 0.1481]) -Greedy action tensor([ 1.7271, -1.1893, -0.5023, 0.1651]) tensor([0.7292, 0.0395, 0.0785, 0.1529]) -Greedy action tensor([ 1.8415, -0.5468, -0.1425, 0.3432]) tensor([0.6883, 0.0632, 0.0947, 0.1539]) -Greedy action tensor([ 2.0656, -0.6827, -0.8367, 0.2729]) tensor([0.7779, 0.0498, 0.0427, 0.1295]) -Greedy action tensor([ 1.3144, -0.7773, -0.3811, 0.1454]) tensor([0.6182, 0.0763, 0.1134, 0.1921]) -Greedy action tensor([ 1.2604, -0.9572, -0.3963, 0.6620]) tensor([0.5407, 0.0589, 0.1032, 0.2972]) -Greedy action tensor([ 1.8212, -0.0714, -0.8002, -0.2555]) tensor([0.7414, 0.1117, 0.0539, 0.0929]) -Greedy action tensor([ 1.6541, -0.5445, -0.3292, 0.3977]) tensor([0.6522, 0.0724, 0.0898, 0.1857]) -Greedy action tensor([ 1.9780e+00, -8.9400e-01, 8.4247e-04, 6.2157e-01]) tensor([0.6884, 0.0390, 0.0953, 0.1773]) -Greedy action tensor([ 1.3259, -0.4387, -0.3760, 0.3590]) tensor([0.5768, 0.0988, 0.1052, 0.2193]) -Greedy action tensor([ 1.9690, -0.7102, -0.1159, 0.4960]) tensor([0.7031, 0.0482, 0.0874, 0.1612]) -Greedy action tensor([ 1.7381, -0.9049, -0.1423, 0.6196]) tensor([0.6450, 0.0459, 0.0984, 0.2108]) -Greedy action tensor([ 1.4603, -0.1730, -1.0652, 0.2812]) tensor([0.6318, 0.1234, 0.0506, 0.1943]) -Greedy action tensor([ 2.6267, -0.0369, -0.1536, 0.9483]) tensor([0.7585, 0.0529, 0.0470, 0.1416]) -Greedy action tensor([ 1.0830, -0.4505, -0.5460, 0.3143]) tensor([0.5332, 0.1151, 0.1046, 0.2472]) -Greedy action tensor([ 1.6274, 0.0764, -0.1190, -0.1418]) tensor([0.6423, 0.1362, 0.1120, 0.1095]) -Greedy action tensor([ 1.8055, -0.6718, -0.5481, 0.1243]) tensor([0.7325, 0.0615, 0.0696, 0.1364]) -Greedy action tensor([ 2.0710, -0.2054, -0.3243, 0.2717]) tensor([0.7357, 0.0755, 0.0671, 0.1217]) -Greedy action tensor([ 1.7079, -0.8744, -0.6473, 0.4679]) tensor([0.6850, 0.0518, 0.0650, 0.1982]) -Greedy action tensor([ 1.1497, -0.2711, -0.6334, 0.3952]) tensor([0.5320, 0.1285, 0.0894, 0.2501]) -Greedy action tensor([ 1.4737, -0.6613, -0.4240, 0.2485]) tensor([0.6403, 0.0757, 0.0960, 0.1880]) -Greedy action tensor([ 1.3042, -0.5076, -0.2004, 0.3344]) tensor([0.5667, 0.0926, 0.1259, 0.2149]) -Greedy action tensor([ 1.7460, -0.2964, -0.2384, 0.0748]) tensor([0.6872, 0.0891, 0.0945, 0.1292]) -Greedy action tensor([ 1.6153, -0.5564, 0.1133, 0.1687]) tensor([0.6361, 0.0725, 0.1416, 0.1497]) -Greedy action tensor([ 1.5305, -0.5286, -1.0396, 0.4732]) tensor([0.6445, 0.0822, 0.0493, 0.2239]) -Greedy action tensor([ 1.6243, -0.4950, -0.7111, 0.3263]) tensor([0.6712, 0.0806, 0.0649, 0.1833]) -Greedy action tensor([ 1.7228, -0.6138, -0.3671, 0.3176]) tensor([0.6823, 0.0659, 0.0844, 0.1674]) -Greedy action tensor([ 1.3951, -0.4983, -0.7660, 0.3186]) tensor([0.6225, 0.0937, 0.0717, 0.2121]) -Greedy action tensor([ 1.8543, -0.8208, -0.7221, 0.1408]) tensor([0.7546, 0.0520, 0.0574, 0.1360]) -Greedy action tensor([ 1.5984, -0.3396, -0.6549, 0.4930]) tensor([0.6329, 0.0911, 0.0665, 0.2095]) -Greedy action tensor([ 1.9686, -1.0849, -0.0237, 0.5494]) tensor([0.7015, 0.0331, 0.0957, 0.1697]) -Greedy action tensor([ 1.4316, -0.0650, 0.2391, -0.1876]) tensor([0.5796, 0.1298, 0.1759, 0.1148]) -Greedy action tensor([ 1.2623, -0.4419, -0.6007, 0.3337]) tensor([0.5773, 0.1050, 0.0896, 0.2281]) -Greedy action tensor([ 1.4445, -0.2047, -0.3812, 0.0997]) tensor([0.6196, 0.1191, 0.0998, 0.1615]) -Greedy action tensor([ 1.1397, -0.1872, -0.6603, 0.4015]) tensor([0.5240, 0.1390, 0.0866, 0.2504]) -Greedy action tensor([ 1.1053, -0.2233, -0.2404, 0.3449]) tensor([0.5018, 0.1329, 0.1307, 0.2346]) -Greedy action tensor([ 1.4773, 0.2940, -0.5401, -0.2558]) tensor([0.6188, 0.1895, 0.0823, 0.1094]) -Greedy action tensor([ 1.9459, -0.6478, -0.0624, 0.6215]) tensor([0.6780, 0.0507, 0.0910, 0.1803]) -Greedy action tensor([ 1.4312, -0.2721, -0.7179, 0.1289]) tensor([0.6367, 0.1159, 0.0742, 0.1731]) -Greedy action tensor([ 2.0884, -1.0126, -0.3545, 0.5873]) tensor([0.7381, 0.0332, 0.0641, 0.1645]) -Greedy action tensor([ 1.8428, -0.8953, -0.5193, 0.3260]) tensor([0.7255, 0.0469, 0.0684, 0.1592]) -Greedy action tensor([ 1.2669, -0.6550, -0.1875, 0.6584]) tensor([0.5197, 0.0761, 0.1214, 0.2828]) -Greedy action tensor([ 1.5869, -0.6657, -0.6932, 0.1657]) tensor([0.6902, 0.0726, 0.0706, 0.1666]) -Greedy action tensor([ 1.2910, -0.0260, -0.3350, 0.6293]) tensor([0.5049, 0.1353, 0.0993, 0.2605]) -Greedy action tensor([ 1.2924, -0.3724, -0.8221, 0.7040]) tensor([0.5361, 0.1015, 0.0647, 0.2977]) -Greedy action tensor([ 1.5590, -0.4170, -0.1552, 0.4395]) tensor([0.6078, 0.0843, 0.1095, 0.1984]) -Greedy action tensor([ 1.2468, 0.1665, -0.6440, 0.4843]) tensor([0.5110, 0.1735, 0.0771, 0.2384]) -Greedy action tensor([ 1.1823, -0.0596, -0.5408, 0.5107]) tensor([0.5055, 0.1460, 0.0902, 0.2583]) -Greedy action tensor([ 1.9659, -1.0056, -0.4030, 0.7609]) tensor([0.6923, 0.0355, 0.0648, 0.2075]) -Greedy action tensor([ 0.8404, -0.1149, -0.1992, 0.0046]) tensor([0.4604, 0.1771, 0.1628, 0.1996]) -Greedy action tensor([ 1.4387, 0.1668, -0.4833, 0.0751]) tensor([0.5944, 0.1666, 0.0870, 0.1520]) -Greedy action tensor([ 1.7049, -0.8093, 0.0463, -0.5458]) tensor([0.7264, 0.0588, 0.1383, 0.0765]) -Greedy action tensor([ 1.4925, 0.0324, -0.8776, 0.4306]) tensor([0.5983, 0.1389, 0.0559, 0.2069]) -Greedy action tensor([ 1.3129, -0.4531, -0.5779, 0.0853]) tensor([0.6192, 0.1059, 0.0935, 0.1814]) -Greedy action tensor([ 0.8922, -0.2165, -0.7177, 0.4143]) tensor([0.4651, 0.1535, 0.0930, 0.2884]) -Greedy action tensor([ 1.2737e+00, -7.1336e-01, 2.4008e-02, -1.0397e-03]) tensor([0.5871, 0.0805, 0.1683, 0.1641]) -Greedy action tensor([ 1.8162, -0.2429, -0.3453, 0.3671]) tensor([0.6768, 0.0863, 0.0779, 0.1589]) -Greedy action tensor([ 1.4450, -0.1871, -0.1524, 0.2567]) tensor([0.5873, 0.1148, 0.1189, 0.1790]) -Greedy action tensor([ 1.9155, 0.0226, -0.3270, 0.0103]) tensor([0.7114, 0.1072, 0.0755, 0.1058]) -Greedy action tensor([ 1.7246, -0.4720, -0.3817, 0.3246]) tensor([0.6759, 0.0751, 0.0823, 0.1667]) -Greedy action tensor([ 1.4157, -0.4504, -0.5090, 0.1483]) tensor([0.6320, 0.0978, 0.0922, 0.1780]) -Greedy action tensor([ 0.8103, 0.1973, -0.9716, -0.2709]) tensor([0.4880, 0.2643, 0.0821, 0.1655]) -Greedy action tensor([ 1.0724, -0.7220, -0.8725, 0.8700]) tensor([0.4704, 0.0782, 0.0673, 0.3842]) -Greedy action tensor([ 1.3439, -0.5420, 0.0042, -0.0976]) tensor([0.6060, 0.0919, 0.1587, 0.1434]) -Greedy action tensor([ 0.5801, -0.1675, 0.0140, 0.0322]) tensor([0.3818, 0.1808, 0.2168, 0.2207]) -Greedy action tensor([ 0.7265, -0.2717, -0.0846, -0.2356]) tensor([0.4556, 0.1679, 0.2024, 0.1741]) -Greedy action tensor([ 1.2539, -0.6913, -0.0670, -0.5121]) tensor([0.6326, 0.0904, 0.1688, 0.1082]) -Greedy action tensor([ 0.8891, -0.3718, -0.1186, -0.2310]) tensor([0.5064, 0.1435, 0.1849, 0.1652]) -Greedy action tensor([ 1.0854, -0.6469, -0.0924, -0.5809]) tensor([0.5975, 0.1057, 0.1840, 0.1129]) -Greedy action tensor([ 0.8778, -0.2961, -0.0415, -0.1815]) tensor([0.4867, 0.1505, 0.1941, 0.1687]) -Greedy action tensor([ 0.4321, 0.0327, -0.1946, 0.0261]) tensor([0.3483, 0.2336, 0.1861, 0.2320]) -Greedy action tensor([ 0.5522, -0.3229, -0.1183, -0.3672]) tensor([0.4297, 0.1791, 0.2198, 0.1714]) -Greedy action tensor([ 1.1638, -0.8827, -0.0636, -0.8041]) tensor([0.6402, 0.0827, 0.1876, 0.0895]) -Greedy action tensor([ 0.9258, -0.3509, -0.0688, -0.0992]) tensor([0.4981, 0.1389, 0.1842, 0.1787]) -Greedy action tensor([ 0.9255, -0.5605, -0.0799, -0.2867]) tensor([0.5292, 0.1197, 0.1936, 0.1575]) -Greedy action tensor([ 0.6221, -0.1482, 0.0322, -0.0441]) tensor([0.3951, 0.1829, 0.2190, 0.2030]) -Greedy action tensor([ 0.3921, -0.1254, -0.0417, -0.1907]) tensor([0.3569, 0.2127, 0.2312, 0.1992]) -Greedy action tensor([ 1.0361, -0.5606, -0.1274, -0.4926]) tensor([0.5775, 0.1170, 0.1804, 0.1252]) -Greedy action tensor([ 0.7096, -0.4553, -0.1300, -0.2119]) tensor([0.4669, 0.1457, 0.2017, 0.1858]) -Greedy action tensor([ 0.6539, -0.3777, 0.1414, -0.4136]) tensor([0.4349, 0.1550, 0.2605, 0.1496]) -Greedy action tensor([ 1.0369, -0.6181, -0.0638, -0.6889]) tensor([0.5876, 0.1123, 0.1955, 0.1046]) -Greedy action tensor([ 0.5317, -0.0644, 0.1766, -0.2321]) tensor([0.3679, 0.2027, 0.2579, 0.1714]) -Greedy action tensor([ 0.8759, -0.3046, -0.1996, -0.3757]) tensor([0.5170, 0.1588, 0.1764, 0.1479]) -Greedy action tensor([ 0.2725, 0.3213, -0.0622, -0.3904]) tensor([0.3048, 0.3200, 0.2181, 0.1571]) -Greedy action tensor([ 1.0185, -0.1634, -0.1245, -0.4284]) tensor([0.5374, 0.1648, 0.1714, 0.1264]) -Greedy action tensor([ 0.3624, 0.0303, -0.0175, -0.2318]) tensor([0.3386, 0.2429, 0.2316, 0.1869]) -Greedy action tensor([ 0.9561, -0.6578, 0.1340, -0.2288]) tensor([0.5143, 0.1024, 0.2260, 0.1573]) -Greedy action tensor([ 0.4054, -0.1491, -0.1117, -0.0120]) tensor([0.3534, 0.2030, 0.2107, 0.2328]) -Greedy action tensor([ 1.0465, -0.9559, 0.1096, -0.4999]) tensor([0.5747, 0.0776, 0.2252, 0.1224]) -Greedy action tensor([ 0.8392, -0.3940, 0.0644, -0.3891]) tensor([0.4890, 0.1425, 0.2253, 0.1432]) -Greedy action tensor([ 0.6399, -0.7121, -0.0706, -0.1852]) tensor([0.4570, 0.1182, 0.2246, 0.2002]) -Greedy action tensor([ 0.4194, 0.3316, -0.2478, 0.0346]) tensor([0.3216, 0.2945, 0.1650, 0.2189]) -Greedy action tensor([ 0.6688, -0.4642, -0.1908, -0.1852]) tensor([0.4606, 0.1483, 0.1950, 0.1961]) -Greedy action tensor([ 0.9959, -0.5574, 0.0616, -0.4993]) tensor([0.5469, 0.1157, 0.2148, 0.1226]) -Greedy action tensor([ 0.8970, -0.4737, -0.0140, -0.3671]) tensor([0.5159, 0.1310, 0.2074, 0.1457]) -Greedy action tensor([ 0.5712, 0.1246, -0.1179, 0.1024]) tensor([0.3613, 0.2312, 0.1814, 0.2261]) -Greedy action tensor([ 0.9408, -0.6849, -0.0425, -0.5625]) tensor([0.5577, 0.1097, 0.2086, 0.1240]) -Greedy action tensor([ 0.7404, -0.4686, 0.0199, -0.3960]) tensor([0.4748, 0.1417, 0.2310, 0.1524]) -Greedy action tensor([ 0.8714, -0.3808, -0.0490, -0.4335]) tensor([0.5114, 0.1462, 0.2037, 0.1387]) -Greedy action tensor([ 1.1038, -0.7216, 0.0555, -0.8376]) tensor([0.6042, 0.0974, 0.2118, 0.0867]) -Greedy action tensor([ 0.9074, -0.4033, -0.1482, -0.3322]) tensor([0.5243, 0.1414, 0.1825, 0.1518]) -Greedy action tensor([ 1.0229, -0.3375, -0.0305, -0.0937]) tensor([0.5174, 0.1327, 0.1804, 0.1694]) -Greedy action tensor([ 1.0199, -0.8617, 0.0976, -0.4668]) tensor([0.5630, 0.0858, 0.2239, 0.1273]) -Greedy action tensor([ 0.8687, -0.3945, -0.1368, -0.3464]) tensor([0.5141, 0.1453, 0.1881, 0.1525]) -Greedy action tensor([ 0.8587, -0.3391, -0.1077, -0.1382]) tensor([0.4875, 0.1471, 0.1855, 0.1799]) -Greedy action tensor([ 0.5340, -0.2131, -0.0148, -0.1154]) tensor([0.3885, 0.1841, 0.2244, 0.2030]) -Greedy action tensor([ 0.9563, -0.6022, 0.0648, -0.4021]) tensor([0.5326, 0.1121, 0.2184, 0.1369]) -Greedy action tensor([ 0.7870, -0.5207, 0.1416, -0.3232]) tensor([0.4707, 0.1273, 0.2469, 0.1551]) -Greedy action tensor([ 0.3431, 0.1043, -0.1270, -0.4046]) tensor([0.3465, 0.2729, 0.2165, 0.1641]) -Greedy action tensor([ 0.6768, -0.3169, -0.0965, -0.1776]) tensor([0.4430, 0.1640, 0.2045, 0.1885]) -Greedy action tensor([ 0.5088, -0.4390, 0.1222, -0.3959]) tensor([0.4046, 0.1568, 0.2749, 0.1637]) -Greedy action tensor([ 1.2233, -0.6792, -0.0493, -0.8434]) tensor([0.6427, 0.0959, 0.1800, 0.0814]) -Greedy action tensor([ 0.7769, -0.6484, -0.0999, -0.3312]) tensor([0.5033, 0.1210, 0.2095, 0.1662]) -Greedy action tensor([ 1.0402, -0.9026, 0.1348, -0.3476]) tensor([0.5564, 0.0797, 0.2250, 0.1389]) -Greedy action tensor([ 0.3901, -0.1991, -0.0684, -0.2083]) tensor([0.3654, 0.2027, 0.2310, 0.2009]) -Greedy action tensor([ 0.6089, 0.2458, -0.1585, -0.1085]) tensor([0.3777, 0.2627, 0.1753, 0.1843]) -Greedy action tensor([ 0.6369, 0.0973, -0.0030, 0.0996]) tensor([0.3711, 0.2163, 0.1957, 0.2168]) -Greedy action tensor([ 0.6345, -0.2688, 0.0105, -0.1197]) tensor([0.4147, 0.1680, 0.2222, 0.1951]) -Greedy action tensor([ 0.5920, -0.5188, -0.0839, -0.2000]) tensor([0.4365, 0.1437, 0.2221, 0.1977]) -Greedy action tensor([ 1.2450, -0.5194, -0.1061, -0.4795]) tensor([0.6217, 0.1065, 0.1610, 0.1108]) -Greedy action tensor([ 0.7884, -0.6983, -0.0411, -0.3484]) tensor([0.5042, 0.1140, 0.2200, 0.1618]) -Greedy action tensor([ 0.5491, -0.0927, -0.0364, -0.2936]) tensor([0.3978, 0.2094, 0.2215, 0.1713]) -Greedy action tensor([ 0.7901, -0.2897, -0.0493, 0.0078]) tensor([0.4486, 0.1524, 0.1938, 0.2052]) -Greedy action tensor([ 0.5042, -0.4245, -0.1319, -0.2575]) tensor([0.4182, 0.1652, 0.2214, 0.1952]) -Greedy action tensor([ 1.0151, -0.5831, -0.0358, -0.5302]) tensor([0.5665, 0.1146, 0.1981, 0.1208]) -Greedy action tensor([ 0.6019, 0.0880, -0.0549, -0.1857]) tensor([0.3889, 0.2326, 0.2016, 0.1769]) -Greedy action tensor([ 0.8952, -0.3598, -0.1555, -0.4163]) tensor([0.5252, 0.1497, 0.1836, 0.1415]) -Greedy action tensor([ 0.5858, -0.3911, 0.0022, -0.1920]) tensor([0.4177, 0.1573, 0.2331, 0.1919]) -Greedy action tensor([ 0.8828, -0.5593, -0.0402, -0.2198]) tensor([0.5087, 0.1203, 0.2021, 0.1689]) -Greedy action tensor([ 0.2042, 0.0472, -0.1006, -0.0568]) tensor([0.2974, 0.2542, 0.2193, 0.2291]) -Greedy action tensor([ 0.7572, -0.5466, 0.0161, -0.3619]) tensor([0.4820, 0.1309, 0.2297, 0.1574]) -Greedy action tensor([ 0.2531, 0.2102, 0.0495, -0.2953]) tensor([0.2984, 0.2858, 0.2434, 0.1724]) -Greedy action tensor([ 8.6971e-01, -5.7745e-01, 4.2868e-04, -4.4925e-01]) tensor([0.5203, 0.1224, 0.2181, 0.1391]) -Greedy action tensor([ 0.5188, -0.0674, -0.0904, -0.2520]) tensor([0.3902, 0.2171, 0.2122, 0.1805]) -Greedy action tensor([ 1.4883, -0.9013, 0.0451, -0.6990]) tensor([0.6944, 0.0637, 0.1640, 0.0779]) -Greedy action tensor([ 1.1057, -0.4804, 0.0495, -0.5666]) tensor([0.5746, 0.1176, 0.1998, 0.1079]) -Greedy action tensor([ 0.8800, -0.7240, 0.0735, -0.3909]) tensor([0.5187, 0.1043, 0.2315, 0.1455]) -Greedy action tensor([ 0.6939, -0.3641, -0.0659, -0.1951]) tensor([0.4492, 0.1559, 0.2101, 0.1847]) -Greedy action tensor([ 0.9373, -0.6734, 0.0218, -0.5775]) tensor([0.5495, 0.1098, 0.2200, 0.1208]) -Greedy action tensor([ 0.5687, 0.2649, -0.2468, 0.0064]) tensor([0.3636, 0.2683, 0.1609, 0.2072]) -Greedy action tensor([ 1.1454, -0.7753, 0.0653, -0.5177]) tensor([0.5968, 0.0874, 0.2026, 0.1131]) -Greedy action tensor([ 0.8498, -0.5911, -0.0458, -0.6088]) tensor([0.5326, 0.1261, 0.2175, 0.1239]) -Greedy action tensor([ 1.1447, -0.5078, 0.0762, -0.6887]) tensor([0.5900, 0.1130, 0.2027, 0.0943]) -Greedy action tensor([ 0.9938, -0.5395, -0.0371, -0.4881]) tensor([0.5556, 0.1199, 0.1982, 0.1262]) -Greedy action tensor([ 0.8136, -0.6683, 0.0561, -0.2988]) tensor([0.4939, 0.1122, 0.2315, 0.1624]) -Greedy action tensor([0.5405, 0.0513, 0.5842, 0.3100]) tensor([0.2897, 0.1776, 0.3026, 0.2301]) -Greedy action tensor([-0.7316, -0.5200, 0.7489, -0.8750]) tensor([0.1334, 0.1648, 0.5862, 0.1156]) -Greedy action tensor([-0.3451, -0.6092, -0.3604, 0.3407]) tensor([0.2111, 0.1621, 0.2078, 0.4190]) -Greedy action tensor([ 0.0978, -0.9081, -0.4399, -0.2583]) tensor([0.3773, 0.1380, 0.2204, 0.2643]) -Greedy action tensor([ 0.3084, -1.2378, 0.7467, 0.1931]) tensor([0.2737, 0.0583, 0.4242, 0.2439]) -Greedy action tensor([ 0.4177, 0.2412, -0.3904, 0.0646]) tensor([0.3349, 0.2807, 0.1492, 0.2352]) -Greedy action tensor([-0.1239, -1.5372, 1.0852, -0.7545]) tensor([0.1951, 0.0475, 0.6536, 0.1038]) -Greedy action tensor([-1.0874, 0.1507, -0.2563, -0.6719]) tensor([0.1211, 0.4176, 0.2779, 0.1834]) -Greedy action tensor([ 0.7226, -1.9864, -0.1155, 0.7316]) tensor([0.3987, 0.0266, 0.1724, 0.4023]) -Greedy action tensor([-0.1260, -0.4535, 0.1106, -0.5549]) tensor([0.2748, 0.1981, 0.3482, 0.1790]) -Greedy action tensor([-0.0295, 0.4822, -0.2276, -0.1515]) tensor([0.2287, 0.3814, 0.1876, 0.2024]) -Greedy action tensor([-0.6022, -0.7405, -0.2514, 0.0364]) tensor([0.1929, 0.1680, 0.2739, 0.3653]) -Greedy action tensor([-0.4716, -0.4065, 0.1753, -0.8323]) tensor([0.2139, 0.2283, 0.4086, 0.1492]) -Greedy action tensor([ 0.3373, -1.2549, 0.4195, -1.4149]) tensor([0.4061, 0.0826, 0.4409, 0.0704]) -Greedy action tensor([-0.5564, -1.1382, -0.2266, 0.4783]) tensor([0.1735, 0.0970, 0.2413, 0.4883]) -Greedy action tensor([-0.4474, -0.7863, 0.0508, -1.0417]) tensor([0.2557, 0.1822, 0.4209, 0.1411]) -Greedy action tensor([ 0.7120, -1.5735, -0.3204, 0.6987]) tensor([0.4090, 0.0416, 0.1457, 0.4037]) -Greedy action tensor([-0.4271, 0.0024, 0.7935, -0.9329]) tensor([0.1532, 0.2353, 0.5191, 0.0924]) -Greedy action tensor([-0.0389, 0.1556, 0.7408, -0.0728]) tensor([0.1865, 0.2265, 0.4067, 0.1803]) -Greedy action tensor([ 0.6287, -0.1256, -0.5320, -0.1389]) tensor([0.4449, 0.2093, 0.1394, 0.2065]) -Greedy action tensor([-0.0158, -0.5112, 0.5267, -0.7629]) tensor([0.2629, 0.1602, 0.4523, 0.1246]) -Greedy action tensor([-0.8700, -1.0249, 0.8562, -0.2768]) tensor([0.1077, 0.0922, 0.6052, 0.1949]) -Greedy action tensor([ 0.3020, 0.1908, -0.4745, -0.6950]) tensor([0.3671, 0.3285, 0.1689, 0.1355]) -Greedy action tensor([ 0.2144, -0.4359, -0.5673, 0.0779]) tensor([0.3506, 0.1830, 0.1605, 0.3059]) -Greedy action tensor([ 1.1553, -1.0174, -0.2334, 0.1012]) tensor([0.5842, 0.0665, 0.1457, 0.2036]) -Greedy action tensor([-0.3969, 0.0019, 0.3289, -0.4689]) tensor([0.1822, 0.2716, 0.3766, 0.1696]) -Greedy action tensor([ 0.2032, -1.2773, 1.3322, -0.2269]) tensor([0.2012, 0.0458, 0.6222, 0.1309]) -Greedy action tensor([-0.2494, 0.6071, -0.3693, -0.6792]) tensor([0.2044, 0.4813, 0.1813, 0.1330]) -Greedy action tensor([ 0.7480, -0.8127, -0.1093, 0.6011]) tensor([0.4004, 0.0841, 0.1699, 0.3457]) -Greedy action tensor([-0.4496, -0.4564, -1.0133, 0.8136]) tensor([0.1640, 0.1628, 0.0933, 0.5799]) -Greedy action tensor([-0.4984, -0.9299, 0.2429, -0.2005]) tensor([0.1963, 0.1275, 0.4119, 0.2644]) -Greedy action tensor([ 0.6029, -0.9037, 0.5266, -0.8083]) tensor([0.4180, 0.0927, 0.3873, 0.1019]) -Greedy action tensor([-0.3302, -0.8853, -0.2988, -0.3335]) tensor([0.2776, 0.1593, 0.2864, 0.2767]) -Greedy action tensor([-0.0990, -0.8243, 0.7150, -0.4144]) tensor([0.2237, 0.1083, 0.5048, 0.1632]) -Greedy action tensor([ 0.9550, -0.3635, 0.1646, 0.3309]) tensor([0.4431, 0.1185, 0.2010, 0.2374]) -Greedy action tensor([-0.4211, -2.3240, -0.3220, 1.3142]) tensor([0.1262, 0.0188, 0.1393, 0.7156]) -Greedy action tensor([-0.8573, -0.3299, 0.5593, -0.5409]) tensor([0.1221, 0.2069, 0.5034, 0.1676]) -Greedy action tensor([ 0.7254, -0.6993, 1.3845, -0.8914]) tensor([0.2965, 0.0713, 0.5732, 0.0589]) -Greedy action tensor([-0.4023, -0.7772, 0.2275, -0.9847]) tensor([0.2425, 0.1667, 0.4553, 0.1355]) -Greedy action tensor([ 0.2276, 0.2514, -0.3943, 0.2804]) tensor([0.2766, 0.2833, 0.1485, 0.2916]) -Greedy action tensor([ 0.3804, -0.8619, 0.1901, 0.2518]) tensor([0.3339, 0.0964, 0.2760, 0.2936]) -Greedy action tensor([-0.6943, 0.0493, 0.2639, -1.0111]) tensor([0.1553, 0.3267, 0.4049, 0.1131]) -Greedy action tensor([ 0.1063, -0.1468, -0.1420, -0.3928]) tensor([0.3161, 0.2454, 0.2466, 0.1919]) -Greedy action tensor([ 0.7511, -1.6334, 1.0904, 0.3263]) tensor([0.3175, 0.0292, 0.4457, 0.2076]) -Greedy action tensor([ 0.2336, -0.6786, 0.6294, -0.1243]) tensor([0.2788, 0.1120, 0.4142, 0.1949]) -Greedy action tensor([ 0.2387, -0.2715, -0.4316, 0.1363]) tensor([0.3317, 0.1992, 0.1697, 0.2994]) -Greedy action tensor([ 0.6322, -0.1383, 0.9142, -0.4075]) tensor([0.3183, 0.1473, 0.4219, 0.1125]) -Greedy action tensor([ 0.1932, -0.3687, 0.2466, -0.0738]) tensor([0.2949, 0.1681, 0.3111, 0.2258]) -Greedy action tensor([ 0.1423, -0.8698, -0.3354, 0.4206]) tensor([0.3026, 0.1100, 0.1877, 0.3997]) -Greedy action tensor([-0.0654, -1.6176, -0.1603, -0.2601]) tensor([0.3396, 0.0719, 0.3089, 0.2795]) -Greedy action tensor([-0.8016, -0.6826, -0.4250, -0.3301]) tensor([0.1928, 0.2172, 0.2810, 0.3090]) -Greedy action tensor([ 0.5779, -0.7340, -0.1485, 0.0898]) tensor([0.4225, 0.1138, 0.2044, 0.2593]) -Greedy action tensor([ 1.1039, 0.9264, -0.2391, 0.4740]) tensor([0.3801, 0.3183, 0.0992, 0.2024]) -Greedy action tensor([-0.8421, -1.4552, 0.8043, -0.4206]) tensor([0.1212, 0.0656, 0.6286, 0.1847]) -Greedy action tensor([-0.0350, -1.8449, 0.2426, 0.5116]) tensor([0.2375, 0.0389, 0.3135, 0.4102]) -Greedy action tensor([ 1.5237, 0.5016, -0.3514, -0.1071]) tensor([0.5851, 0.2106, 0.0897, 0.1146]) -Greedy action tensor([ 0.2134, -0.6052, -0.0847, -0.2659]) tensor([0.3568, 0.1574, 0.2648, 0.2209]) -Greedy action tensor([-0.5840, -0.6562, 0.0443, 0.4892]) tensor([0.1486, 0.1383, 0.2785, 0.4346]) -Greedy action tensor([-0.1408, -1.0873, 0.0618, -1.1881]) tensor([0.3374, 0.1310, 0.4132, 0.1184]) -Greedy action tensor([-1.1903, 0.5420, 0.3903, -1.1281]) tensor([0.0795, 0.4496, 0.3863, 0.0846]) -Greedy action tensor([0.0381, 0.5624, 0.8394, 0.1700]) tensor([0.1651, 0.2788, 0.3678, 0.1883]) -Greedy action tensor([-0.2106, -1.1088, 0.3875, -0.6851]) tensor([0.2599, 0.1058, 0.4726, 0.1617]) -Greedy action tensor([ 0.2045, -0.6837, 0.6214, -0.6806]) tensor([0.2993, 0.1231, 0.4541, 0.1235]) -Greedy action tensor([ 0.1862, -0.8948, -0.1324, 0.2507]) tensor([0.3192, 0.1083, 0.2321, 0.3404]) -Greedy action tensor([-1.5459, -1.6820, 0.7575, -0.7118]) tensor([0.0705, 0.0615, 0.7056, 0.1623]) -Greedy action tensor([-1.0820, -0.7086, -1.5909, -0.1192]) tensor([0.1763, 0.2561, 0.1060, 0.4617]) -Greedy action tensor([-0.1187, -1.4232, -0.3066, -0.9694]) tensor([0.3957, 0.1074, 0.3279, 0.1690]) -Greedy action tensor([ 1.3444, -0.2871, 0.2705, 0.6634]) tensor([0.4894, 0.0957, 0.1672, 0.2477]) -Greedy action tensor([ 0.2030, -1.6683, -0.2259, -0.6995]) tensor([0.4523, 0.0696, 0.2946, 0.1834]) -Greedy action tensor([ 0.7165, 0.3105, -0.4487, -0.4224]) tensor([0.4351, 0.2899, 0.1357, 0.1393]) -Greedy action tensor([ 0.9156, -0.3432, -0.8376, 0.0297]) tensor([0.5349, 0.1519, 0.0927, 0.2206]) -Greedy action tensor([-0.0509, -0.6437, -0.0816, -0.0491]) tensor([0.2838, 0.1568, 0.2752, 0.2842]) -Greedy action tensor([ 0.5996, 0.1322, -0.4196, 0.4064]) tensor([0.3556, 0.2229, 0.1283, 0.2932]) -Greedy action tensor([ 1.1578, -0.6626, 0.0052, 0.7132]) tensor([0.4720, 0.0764, 0.1491, 0.3025]) -Greedy action tensor([-1.1209, -0.5705, 0.6931, -1.4106]) tensor([0.1040, 0.1803, 0.6379, 0.0778]) -Greedy action tensor([-0.7925, -0.6112, -0.8186, -0.0287]) tensor([0.1880, 0.2254, 0.1831, 0.4035]) -Greedy action tensor([-0.3481, -1.2762, 0.9630, -1.3849]) tensor([0.1831, 0.0724, 0.6795, 0.0649]) -Greedy action tensor([ 0.8125, -1.0172, 0.8604, 0.3966]) tensor([0.3485, 0.0559, 0.3656, 0.2299]) -Greedy action tensor([-0.1891, 0.0260, 0.5212, -0.3263]) tensor([0.1943, 0.2409, 0.3954, 0.1694]) -Greedy action tensor([ 0.8897, -0.7301, -0.4147, 0.3126]) tensor([0.4924, 0.0975, 0.1336, 0.2765]) -Greedy action tensor([-0.3130, 0.2100, -0.1355, -0.3871]) tensor([0.2079, 0.3507, 0.2483, 0.1931]) -Greedy action tensor([ 0.8755, -0.8540, 0.4160, 1.0617]) tensor([0.3318, 0.0589, 0.2096, 0.3997]) -Greedy action tensor([-1.8767, -0.3702, 0.6361, -0.1410]) tensor([0.0425, 0.1918, 0.5246, 0.2412]) -Greedy action tensor([-1.2701, 0.5285, 0.2327, 0.0071]) tensor([0.0661, 0.3995, 0.2972, 0.2372]) -Greedy action tensor([-1.8145, -0.2882, 0.6238, -0.0988]) tensor([0.0442, 0.2035, 0.5065, 0.2459]) -Greedy action tensor([-0.7258, 0.9666, 0.0814, 0.2266]) tensor([0.0888, 0.4822, 0.1990, 0.2301]) -Greedy action tensor([-1.9373, -0.4172, 0.6591, -0.1745]) tensor([0.0403, 0.1843, 0.5406, 0.2349]) -Greedy action tensor([-1.3520, -0.2426, 0.7272, 0.5761]) tensor([0.0529, 0.1604, 0.4230, 0.3637]) -Greedy action tensor([-1.9196, -0.4012, 0.6530, -0.1680]) tensor([0.0409, 0.1869, 0.5362, 0.2360]) -Greedy action tensor([-1.9362, -0.4557, 0.6658, -0.1744]) tensor([0.0405, 0.1779, 0.5460, 0.2357]) -Greedy action tensor([-1.9147, -0.4597, 0.6477, -0.1710]) tensor([0.0417, 0.1787, 0.5410, 0.2386]) -Greedy action tensor([-1.8511, -0.4159, 0.6337, -0.1314]) tensor([0.0439, 0.1844, 0.5267, 0.2451]) -Greedy action tensor([-1.7247, -0.1363, 0.5144, -0.1027]) tensor([0.0492, 0.2407, 0.4613, 0.2489]) -Greedy action tensor([-1.8100, -0.3762, 0.6146, -0.1037]) tensor([0.0455, 0.1907, 0.5135, 0.2504]) -Greedy action tensor([-1.7072, -0.3247, 0.6446, -0.0511]) tensor([0.0482, 0.1922, 0.5068, 0.2527]) -Greedy action tensor([-1.7942, -0.1174, 0.5319, -0.0676]) tensor([0.0450, 0.2408, 0.4610, 0.2531]) -Greedy action tensor([-1.1592, -1.1025, 0.3859, -0.4917]) tensor([0.1150, 0.1217, 0.5391, 0.2242]) -Greedy action tensor([-1.7944, -0.3768, 0.5818, -0.0687]) tensor([0.0465, 0.1919, 0.5005, 0.2611]) -Greedy action tensor([-1.4013, 0.6348, 0.3093, -0.0432]) tensor([0.0553, 0.4237, 0.3060, 0.2151]) -Greedy action tensor([-1.8532, -0.4256, 0.6191, -0.1378]) tensor([0.0443, 0.1847, 0.5248, 0.2462]) -Greedy action tensor([-1.8522, -0.4520, 0.6255, -0.1325]) tensor([0.0443, 0.1799, 0.5283, 0.2475]) -Greedy action tensor([-1.8505, -0.3733, 0.6074, -0.1426]) tensor([0.0443, 0.1940, 0.5173, 0.2444]) -Greedy action tensor([-1.0717, -0.5686, 0.4349, 0.4493]) tensor([0.0852, 0.1408, 0.3842, 0.3898]) -Greedy action tensor([-1.9142, -0.4706, 0.7284, -0.0938]) tensor([0.0393, 0.1664, 0.5518, 0.2425]) -Greedy action tensor([-1.8302, -0.4172, 0.6086, -0.1241]) tensor([0.0453, 0.1861, 0.5191, 0.2495]) -Greedy action tensor([-1.9205, -0.4381, 0.6637, -0.1669]) tensor([0.0409, 0.1802, 0.5425, 0.2364]) -Greedy action tensor([-1.8026, -0.4585, 0.6006, -0.0986]) tensor([0.0468, 0.1793, 0.5170, 0.2569]) -Greedy action tensor([-1.9186, -0.4230, 0.6496, -0.1707]) tensor([0.0412, 0.1840, 0.5379, 0.2368]) -Greedy action tensor([-1.9081, -0.4414, 0.6491, -0.1623]) tensor([0.0417, 0.1809, 0.5383, 0.2391]) -Greedy action tensor([-1.3847, 0.7163, 0.2907, 0.1543]) tensor([0.0522, 0.4263, 0.2785, 0.2430]) -Greedy action tensor([ 0.2900, 1.0443, -0.0282, 0.4807]) tensor([0.1975, 0.4199, 0.1437, 0.2390]) -Greedy action tensor([-1.5181, -0.1744, 0.4345, -0.0351]) tensor([0.0614, 0.2354, 0.4327, 0.2705]) -Greedy action tensor([-0.4416, 1.0501, 0.0345, 0.3238]) tensor([0.1086, 0.4829, 0.1749, 0.2336]) -Greedy action tensor([-1.8169, -0.2684, 0.6045, -0.1248]) tensor([0.0446, 0.2100, 0.5028, 0.2425]) -Greedy action tensor([-1.8350, -0.0151, 0.5496, -0.1269]) tensor([0.0425, 0.2621, 0.4610, 0.2344]) -Greedy action tensor([-1.9242, -0.4510, 0.6606, -0.1707]) tensor([0.0410, 0.1788, 0.5435, 0.2367]) -Greedy action tensor([-1.9292, -0.4278, 0.6600, -0.1718]) tensor([0.0406, 0.1824, 0.5413, 0.2356]) -Greedy action tensor([-1.8876, -0.3546, 0.6313, -0.1517]) tensor([0.0422, 0.1953, 0.5234, 0.2392]) -Greedy action tensor([-0.6945, 0.6805, 0.1330, -0.0511]) tensor([0.1093, 0.4324, 0.2501, 0.2081]) -Greedy action tensor([-1.7883, -0.5026, 0.5918, -0.1017]) tensor([0.0480, 0.1737, 0.5189, 0.2594]) -Greedy action tensor([-1.9267, -0.4280, 0.6597, -0.1682]) tensor([0.0407, 0.1822, 0.5408, 0.2363]) -Greedy action tensor([-1.9063, -0.2883, 0.6222, -0.1651]) tensor([0.0412, 0.2077, 0.5162, 0.2349]) -Greedy action tensor([-1.2073, 0.0246, 0.2894, 0.0308]) tensor([0.0810, 0.2777, 0.3619, 0.2794]) -Greedy action tensor([-1.9185, -0.3992, 0.6517, -0.1657]) tensor([0.0410, 0.1872, 0.5354, 0.2364]) -Greedy action tensor([-1.9240, -0.3967, 0.6474, -0.1661]) tensor([0.0408, 0.1881, 0.5342, 0.2369]) -Greedy action tensor([-1.5862, -0.2662, 0.4590, -0.0488]) tensor([0.0584, 0.2186, 0.4514, 0.2716]) -Greedy action tensor([-1.8885, -0.4424, 0.6445, -0.1580]) tensor([0.0426, 0.1808, 0.5362, 0.2403]) -Greedy action tensor([-1.9065, -0.4078, 0.6468, -0.1612]) tensor([0.0416, 0.1861, 0.5342, 0.2381]) -Greedy action tensor([-1.8896, -0.4520, 0.6461, -0.1496]) tensor([0.0425, 0.1789, 0.5365, 0.2421]) -Greedy action tensor([-1.1439, -0.4412, 0.2444, 0.5079]) tensor([0.0817, 0.1649, 0.3274, 0.4260]) -Greedy action tensor([-1.6240, -0.3890, 0.5149, -0.0997]) tensor([0.0571, 0.1963, 0.4846, 0.2621]) -Greedy action tensor([-1.3986, 0.2756, 0.3274, 0.0150]) tensor([0.0623, 0.3321, 0.3498, 0.2559]) -Greedy action tensor([-1.8988, -0.3647, 0.6321, -0.1521]) tensor([0.0418, 0.1937, 0.5249, 0.2396]) -Greedy action tensor([-1.9219, -0.4161, 0.6548, -0.1680]) tensor([0.0409, 0.1845, 0.5382, 0.2364]) -Greedy action tensor([-1.8959, -0.4486, 0.6469, -0.1567]) tensor([0.0423, 0.1797, 0.5374, 0.2406]) -Greedy action tensor([-1.9321, -0.4572, 0.6726, -0.1741]) tensor([0.0405, 0.1770, 0.5477, 0.2349]) -Greedy action tensor([-0.8947, -0.6022, 0.1750, 0.3281]) tensor([0.1156, 0.1549, 0.3369, 0.3926]) -Greedy action tensor([-1.8747, -0.4433, 0.6343, -0.1463]) tensor([0.0433, 0.1811, 0.5319, 0.2437]) -Greedy action tensor([-1.8773, -0.4491, 0.6369, -0.1484]) tensor([0.0432, 0.1801, 0.5335, 0.2433]) -Greedy action tensor([-1.8609, -0.4115, 0.6183, -0.1297]) tensor([0.0438, 0.1865, 0.5224, 0.2473]) -Greedy action tensor([-1.8857, -0.4456, 0.6407, -0.1477]) tensor([0.0427, 0.1803, 0.5342, 0.2428]) -Greedy action tensor([-1.9012, -0.3861, 0.6440, -0.1573]) tensor([0.0416, 0.1895, 0.5308, 0.2382]) -Greedy action tensor([-1.6622, -0.3672, 0.5090, -0.0700]) tensor([0.0545, 0.1991, 0.4783, 0.2681]) -Greedy action tensor([-1.7427, -0.2972, 0.6301, -0.0627]) tensor([0.0469, 0.1989, 0.5028, 0.2515]) -Greedy action tensor([-1.7611, 0.0373, 0.5386, -0.0941]) tensor([0.0448, 0.2708, 0.4470, 0.2374]) -Greedy action tensor([-1.4831, 0.6926, 0.3381, -0.6276]) tensor([0.0545, 0.4803, 0.3369, 0.1283]) -Greedy action tensor([-0.4818, 0.7841, -0.0571, 0.0297]) tensor([0.1291, 0.4580, 0.1975, 0.2154]) -Greedy action tensor([-1.9055, -0.4277, 0.6444, -0.1630]) tensor([0.0418, 0.1834, 0.5358, 0.2390]) -Greedy action tensor([-1.8602, -0.3760, 0.6021, -0.1284]) tensor([0.0439, 0.1935, 0.5147, 0.2479]) -Greedy action tensor([-1.8572, -0.4058, 0.6203, -0.1538]) tensor([0.0441, 0.1883, 0.5254, 0.2422]) -Greedy action tensor([-1.9460, -0.4494, 0.6673, -0.1812]) tensor([0.0401, 0.1790, 0.5468, 0.2341]) -Greedy action tensor([-1.8250, -0.1780, 0.5625, -0.0932]) tensor([0.0440, 0.2284, 0.4790, 0.2486]) -Greedy action tensor([-1.2007, 0.7735, 0.1562, 0.2259]) tensor([0.0615, 0.4432, 0.2390, 0.2563]) -Greedy action tensor([-1.7813, -0.5032, 0.5855, -0.1206]) tensor([0.0487, 0.1750, 0.5197, 0.2565]) -Greedy action tensor([-1.9289, -0.4490, 0.6622, -0.1715]) tensor([0.0408, 0.1790, 0.5439, 0.2363]) -Greedy action tensor([-0.9525, 0.1635, 0.2175, -0.0287]) tensor([0.1021, 0.3117, 0.3290, 0.2572]) -Greedy action tensor([-1.8688, -0.4708, 0.6230, -0.1400]) tensor([0.0439, 0.1778, 0.5308, 0.2475]) -Greedy action tensor([-1.4963, -0.5647, 0.4885, 0.1165]) tensor([0.0632, 0.1603, 0.4597, 0.3168]) -Greedy action tensor([-1.8750, -0.0829, 0.5749, -0.1306]) tensor([0.0411, 0.2469, 0.4766, 0.2354]) -Greedy action tensor([-1.8173, -0.1818, 0.5842, -0.1012]) tensor([0.0440, 0.2257, 0.4856, 0.2447]) -Greedy action tensor([-1.9477, -0.4516, 0.6686, -0.1822]) tensor([0.0400, 0.1786, 0.5475, 0.2338]) -Greedy action tensor([-0.9328, 0.6506, 0.1441, 0.0206]) tensor([0.0877, 0.4273, 0.2575, 0.2275]) -Greedy action tensor([-1.5456, -0.2701, 0.4249, 0.0137]) tensor([0.0606, 0.2169, 0.4346, 0.2880]) -Greedy action tensor([ 1.1170, -0.8117, -0.1142, 0.4602]) tensor([0.5113, 0.0743, 0.1493, 0.2651]) -Greedy action tensor([ 1.9656, 0.2069, -0.1048, 0.2949]) tensor([0.6727, 0.1159, 0.0849, 0.1266]) -Greedy action tensor([ 1.2115, -0.1982, -0.6575, -0.0072]) tensor([0.5903, 0.1442, 0.0911, 0.1745]) -Greedy action tensor([ 1.7025, -0.6887, -0.2024, 0.1950]) tensor([0.6841, 0.0626, 0.1018, 0.1515]) -Greedy action tensor([ 1.0422e+00, -2.7978e-04, -1.9801e-01, 1.6022e-01]) tensor([0.4864, 0.1715, 0.1407, 0.2014]) -Greedy action tensor([ 1.5981, -0.2477, -0.1753, 0.3945]) tensor([0.6143, 0.0970, 0.1043, 0.1844]) -Greedy action tensor([ 1.5034, 0.0897, -0.5164, 0.4840]) tensor([0.5758, 0.1401, 0.0764, 0.2078]) -Greedy action tensor([ 1.5948, -0.5240, -0.3959, 0.2313]) tensor([0.6611, 0.0795, 0.0903, 0.1691]) -Greedy action tensor([ 1.5903, -0.9358, -0.0935, 0.2108]) tensor([0.6590, 0.0527, 0.1224, 0.1659]) -Greedy action tensor([ 1.8048, -0.7453, -0.5713, 0.1735]) tensor([0.7317, 0.0571, 0.0680, 0.1432]) -Greedy action tensor([ 1.5023, -0.5535, -0.2940, 0.1583]) tensor([0.6432, 0.0823, 0.1067, 0.1677]) -Greedy action tensor([ 1.3613, -0.2989, -0.3544, 0.3483]) tensor([0.5770, 0.1097, 0.1038, 0.2095]) -Greedy action tensor([ 2.4002, -1.0510, -0.2343, 0.9214]) tensor([0.7511, 0.0238, 0.0539, 0.1712]) -Greedy action tensor([ 2.8565, -0.8633, 0.0376, 0.2550]) tensor([0.8635, 0.0209, 0.0515, 0.0640]) -Greedy action tensor([ 1.4379, -0.4699, -0.5713, 0.5691]) tensor([0.5876, 0.0872, 0.0788, 0.2464]) -Greedy action tensor([ 1.2906, -0.9015, -0.0530, 0.4060]) tensor([0.5601, 0.0625, 0.1461, 0.2312]) -Greedy action tensor([ 1.1752, -0.3764, -0.5778, 0.3944]) tensor([0.5425, 0.1150, 0.0940, 0.2485]) -Greedy action tensor([ 2.3770, 0.2342, -0.0730, 0.3023]) tensor([0.7523, 0.0883, 0.0649, 0.0945]) -Greedy action tensor([ 1.3463, -0.2927, -0.4412, 0.4411]) tensor([0.5662, 0.1100, 0.0948, 0.2290]) -Greedy action tensor([ 2.7598, -0.9139, 0.0959, 1.1219]) tensor([0.7755, 0.0197, 0.0540, 0.1508]) -Greedy action tensor([ 1.2496, -0.5400, -0.3663, 0.5303]) tensor([0.5397, 0.0901, 0.1072, 0.2629]) -Greedy action tensor([ 1.0921, -0.2355, -0.4134, 0.2666]) tensor([0.5195, 0.1377, 0.1153, 0.2275]) -Greedy action tensor([ 1.1852, -0.0418, -0.3246, -0.0593]) tensor([0.5549, 0.1627, 0.1226, 0.1598]) -Greedy action tensor([ 1.8816, 0.0987, -0.0178, -0.1638]) tensor([0.6910, 0.1162, 0.1034, 0.0894]) -Greedy action tensor([ 1.5905, -0.3408, -0.6386, 0.1067]) tensor([0.6760, 0.0980, 0.0727, 0.1533]) -Greedy action tensor([ 1.6159, -0.2071, -1.2729, 0.2336]) tensor([0.6811, 0.1100, 0.0379, 0.1710]) -Greedy action tensor([ 1.4088, -0.5871, -0.6731, 0.3177]) tensor([0.6264, 0.0851, 0.0781, 0.2104]) -Greedy action tensor([ 1.4677, -0.7472, -0.3515, -0.0360]) tensor([0.6695, 0.0731, 0.1086, 0.1488]) -Greedy action tensor([ 2.3828, -1.3690, -0.0085, 0.8324]) tensor([0.7535, 0.0177, 0.0690, 0.1599]) -Greedy action tensor([ 1.2986, -0.0587, -1.1368, 0.2018]) tensor([0.5956, 0.1533, 0.0522, 0.1989]) -Greedy action tensor([ 0.8066, -0.2757, -0.2766, 0.2668]) tensor([0.4425, 0.1499, 0.1498, 0.2579]) -Greedy action tensor([ 1.2618, -0.4382, -0.9263, 0.5464]) tensor([0.5606, 0.1024, 0.0629, 0.2741]) -Greedy action tensor([ 0.7585, -0.5972, 0.2379, 0.3352]) tensor([0.3989, 0.1028, 0.2370, 0.2612]) -Greedy action tensor([ 1.7799, -0.5426, -0.2261, -0.1437]) tensor([0.7254, 0.0711, 0.0976, 0.1060]) -Greedy action tensor([ 1.8557, -0.7648, -0.4648, 0.5711]) tensor([0.6907, 0.0503, 0.0678, 0.1912]) -Greedy action tensor([ 1.3942, 0.0304, -0.8926, 0.7735]) tensor([0.5278, 0.1349, 0.0536, 0.2837]) -Greedy action tensor([ 1.5376, -0.2070, -0.1240, 0.2795]) tensor([0.6065, 0.1060, 0.1151, 0.1724]) -Greedy action tensor([ 2.6187, -1.6558, 0.2056, 1.0623]) tensor([0.7608, 0.0106, 0.0681, 0.1605]) -Greedy action tensor([ 1.8164, -0.7159, 0.0645, 0.9325]) tensor([0.6002, 0.0477, 0.1041, 0.2480]) -Greedy action tensor([ 1.6086, -0.3502, -0.7404, 0.2950]) tensor([0.6643, 0.0937, 0.0634, 0.1786]) -Greedy action tensor([ 1.2121, 0.0145, -0.8018, 0.2612]) tensor([0.5489, 0.1657, 0.0733, 0.2121]) -Greedy action tensor([ 1.5008, -0.6173, -0.2092, 0.5608]) tensor([0.5911, 0.0711, 0.1069, 0.2309]) -Greedy action tensor([ 1.6354, -0.1024, 0.1943, 0.5890]) tensor([0.5670, 0.0997, 0.1342, 0.1991]) -Greedy action tensor([ 1.7794, -0.3781, -0.7145, 0.3700]) tensor([0.6933, 0.0801, 0.0572, 0.1693]) -Greedy action tensor([ 1.6859, -0.7909, -0.1693, 0.0193]) tensor([0.6996, 0.0588, 0.1094, 0.1322]) -Greedy action tensor([ 1.4539, -0.4474, -0.6084, 0.0659]) tensor([0.6553, 0.0979, 0.0833, 0.1635]) -Greedy action tensor([ 1.2718, -0.5740, -0.6497, 0.4553]) tensor([0.5726, 0.0904, 0.0838, 0.2531]) -Greedy action tensor([ 1.7811, -0.9273, -0.3272, 0.7546]) tensor([0.6467, 0.0431, 0.0785, 0.2317]) -Greedy action tensor([ 1.2489, -0.4020, -0.4990, 0.1595]) tensor([0.5874, 0.1127, 0.1023, 0.1976]) -Greedy action tensor([ 1.1607, -0.1679, -0.9074, 0.3565]) tensor([0.5439, 0.1440, 0.0688, 0.2433]) -Greedy action tensor([ 1.2029, -0.4824, -0.2329, 0.5035]) tensor([0.5208, 0.0965, 0.1239, 0.2588]) -Greedy action tensor([ 1.5860, -0.0104, -0.5856, 0.3439]) tensor([0.6229, 0.1262, 0.0710, 0.1799]) -Greedy action tensor([ 1.0525, -0.2573, -0.9728, 0.2068]) tensor([0.5461, 0.1474, 0.0721, 0.2344]) -Greedy action tensor([ 1.7820, -1.1002, -0.1792, 0.3578]) tensor([0.6957, 0.0390, 0.0979, 0.1675]) -Greedy action tensor([ 1.4137, -0.4302, -0.3862, 0.2509]) tensor([0.6112, 0.0967, 0.1010, 0.1911]) -Greedy action tensor([ 1.8277, -0.5942, -0.4869, 0.4630]) tensor([0.6930, 0.0615, 0.0685, 0.1770]) -Greedy action tensor([ 1.8074, -0.6216, -0.7034, 0.6029]) tensor([0.6807, 0.0600, 0.0553, 0.2041]) -Greedy action tensor([ 1.7023, -0.6308, -0.6995, 0.4491]) tensor([0.6788, 0.0658, 0.0615, 0.1939]) -Greedy action tensor([ 1.5806, -0.1861, -0.3547, 0.1025]) tensor([0.6479, 0.1107, 0.0935, 0.1478]) -Greedy action tensor([ 1.6445, -0.8567, -0.3206, 0.3969]) tensor([0.6625, 0.0543, 0.0928, 0.1903]) -Greedy action tensor([ 1.6096, -0.2909, -0.5512, 0.5656]) tensor([0.6185, 0.0925, 0.0713, 0.2177]) -Greedy action tensor([ 1.5007, -0.2502, -0.8851, 0.1208]) tensor([0.6591, 0.1144, 0.0607, 0.1658]) -Greedy action tensor([ 1.1740, -0.5023, -0.3743, 0.5796]) tensor([0.5124, 0.0959, 0.1089, 0.2828]) -Greedy action tensor([ 1.9406, -0.5503, -0.0806, 0.6346]) tensor([0.6728, 0.0557, 0.0892, 0.1823]) -Greedy action tensor([ 2.6996, -1.1293, 0.0151, 1.1341]) tensor([0.7698, 0.0167, 0.0525, 0.1609]) -Greedy action tensor([ 1.1162, -0.2531, -0.5152, 0.3738]) tensor([0.5192, 0.1320, 0.1016, 0.2471]) -Greedy action tensor([ 1.7394, -0.4353, -0.9720, -0.0081]) tensor([0.7384, 0.0839, 0.0491, 0.1286]) -Greedy action tensor([ 1.1874, -0.6339, -0.0550, 0.2516]) tensor([0.5427, 0.0878, 0.1567, 0.2129]) -Greedy action tensor([ 1.8089, -0.8434, -0.4945, 0.4432]) tensor([0.7014, 0.0494, 0.0701, 0.1790]) -Greedy action tensor([ 1.0646, -0.2249, -0.2225, 0.1830]) tensor([0.5087, 0.1401, 0.1405, 0.2107]) -Greedy action tensor([ 1.5665, -0.5508, -1.0486, 0.2480]) tensor([0.6844, 0.0824, 0.0501, 0.1831]) -Greedy action tensor([ 1.5610, -0.0240, -0.1974, 0.6445]) tensor([0.5627, 0.1153, 0.0970, 0.2250]) -Greedy action tensor([ 1.5614, -0.6346, -0.5396, 0.4916]) tensor([0.6343, 0.0706, 0.0776, 0.2176]) -Greedy action tensor([ 1.4799, -0.4353, -0.1965, -0.2587]) tensor([0.6622, 0.0976, 0.1239, 0.1164]) -Greedy action tensor([ 1.4634, -0.8485, -0.2171, 0.5263]) tensor([0.5963, 0.0591, 0.1111, 0.2336]) -Greedy action tensor([ 1.8613, -0.7413, 0.0542, 0.4388]) tensor([0.6760, 0.0501, 0.1109, 0.1630]) -Greedy action tensor([ 1.4556, -0.3275, -0.2003, 0.7048]) tensor([0.5461, 0.0918, 0.1043, 0.2578]) -Greedy action tensor([ 1.5439, 0.2593, -0.4616, -0.2509]) tensor([0.6339, 0.1754, 0.0853, 0.1053]) -Greedy action tensor([ 1.2435, -0.1945, -0.6505, 0.5757]) tensor([0.5261, 0.1249, 0.0792, 0.2698]) -Greedy action tensor([ 1.3053, -0.2737, -0.8860, 0.3319]) tensor([0.5897, 0.1216, 0.0659, 0.2228]) -Greedy action tensor([ 1.1984, 0.4117, -0.7002, -0.3345]) tensor([0.5491, 0.2501, 0.0823, 0.1186]) -Greedy action tensor([ 0.7032, -0.2495, -0.0607, -0.1464]) tensor([0.4388, 0.1692, 0.2044, 0.1876]) -Greedy action tensor([ 1.1870, -0.5229, -0.0632, -0.7322]) tensor([0.6196, 0.1121, 0.1775, 0.0909]) -Greedy action tensor([ 0.3054, -0.0286, -0.0305, 0.0281]) tensor([0.3136, 0.2246, 0.2241, 0.2377]) -Greedy action tensor([ 0.6354, -0.0534, 0.0256, -0.1412]) tensor([0.3991, 0.2004, 0.2169, 0.1836]) -Greedy action tensor([ 1.1160, -0.7808, -0.0721, -0.6019]) tensor([0.6119, 0.0918, 0.1865, 0.1098]) -Greedy action tensor([ 0.6854, -0.4214, -0.0399, -0.1592]) tensor([0.4455, 0.1473, 0.2157, 0.1915]) -Greedy action tensor([ 1.2041, -0.8522, 0.0678, -0.7734]) tensor([0.6300, 0.0806, 0.2022, 0.0872]) -Greedy action tensor([ 0.6883, -0.5880, -0.2666, -0.1879]) tensor([0.4807, 0.1342, 0.1850, 0.2002]) -Greedy action tensor([ 0.7115, -0.6371, -0.1366, -0.2726]) tensor([0.4851, 0.1259, 0.2077, 0.1813]) -Greedy action tensor([ 0.9542, -0.8016, 0.0809, -0.6149]) tensor([0.5560, 0.0961, 0.2322, 0.1158]) -Greedy action tensor([ 0.3026, -0.1549, -0.0552, 0.0184]) tensor([0.3242, 0.2052, 0.2267, 0.2440]) -Greedy action tensor([ 0.1351, 0.0021, -0.2041, 0.0653]) tensor([0.2841, 0.2487, 0.2023, 0.2649]) -Greedy action tensor([ 0.7788, -0.2962, 0.0332, -0.5318]) tensor([0.4795, 0.1637, 0.2275, 0.1293]) -Greedy action tensor([ 0.8533, -0.7393, 0.0199, -0.4082]) tensor([0.5205, 0.1059, 0.2262, 0.1474]) -Greedy action tensor([ 1.0556, -0.6620, -0.1452, -0.4330]) tensor([0.5861, 0.1052, 0.1764, 0.1323]) -Greedy action tensor([ 0.4743, -0.3499, -0.1017, -0.3223]) tensor([0.4079, 0.1789, 0.2293, 0.1839]) -Greedy action tensor([ 0.8895, -0.4223, -0.0222, -0.4113]) tensor([0.5145, 0.1386, 0.2068, 0.1401]) -Greedy action tensor([ 1.0327, -0.5809, -0.1916, -0.3680]) tensor([0.5749, 0.1145, 0.1690, 0.1417]) -Greedy action tensor([ 0.7478, -0.0147, -0.0615, -0.0596]) tensor([0.4241, 0.1979, 0.1888, 0.1892]) -Greedy action tensor([ 0.3620, 0.2469, -0.0095, -0.0375]) tensor([0.3075, 0.2741, 0.2121, 0.2062]) -Greedy action tensor([ 0.9985, -0.6653, 0.0556, -0.6270]) tensor([0.5632, 0.1067, 0.2193, 0.1108]) -Greedy action tensor([ 0.4124, 0.0412, -0.0531, 0.0333]) tensor([0.3331, 0.2298, 0.2091, 0.2280]) -Greedy action tensor([ 0.6952, 0.0949, -0.0268, 0.0061]) tensor([0.3943, 0.2163, 0.1915, 0.1979]) -Greedy action tensor([ 0.6470, -0.3288, 0.1145, -0.1570]) tensor([0.4147, 0.1563, 0.2435, 0.1856]) -Greedy action tensor([ 0.6684, -0.1857, -0.0259, -0.2146]) tensor([0.4276, 0.1820, 0.2136, 0.1768]) -Greedy action tensor([ 0.4199, 0.2502, -0.1124, 0.0445]) tensor([0.3207, 0.2707, 0.1883, 0.2203]) -Greedy action tensor([ 0.7155, -0.6344, -0.1269, -0.4242]) tensor([0.4975, 0.1290, 0.2143, 0.1592]) -Greedy action tensor([ 0.5716, -0.1746, -0.0744, -0.1007]) tensor([0.3986, 0.1890, 0.2089, 0.2035]) -Greedy action tensor([ 0.7665, -0.2662, -0.1094, -0.1168]) tensor([0.4575, 0.1629, 0.1905, 0.1891]) -Greedy action tensor([ 0.8507, -0.5070, -0.0197, -0.2212]) tensor([0.4954, 0.1275, 0.2075, 0.1696]) -Greedy action tensor([ 1.1367, -0.8623, 0.0218, -0.7761]) tensor([0.6207, 0.0841, 0.2036, 0.0917]) -Greedy action tensor([ 0.7885, -0.4473, -0.1134, -0.3714]) tensor([0.4975, 0.1446, 0.2019, 0.1560]) -Greedy action tensor([ 0.6149, -0.3631, 0.0533, -0.6197]) tensor([0.4470, 0.1681, 0.2549, 0.1300]) -Greedy action tensor([ 0.8902, -0.2610, 0.0482, -0.0361]) tensor([0.4666, 0.1476, 0.2010, 0.1848]) -Greedy action tensor([ 0.8521, -0.4275, -0.1548, -0.5515]) tensor([0.5293, 0.1472, 0.1934, 0.1301]) -Greedy action tensor([ 0.8815, -0.5715, -0.0911, -0.3730]) tensor([0.5271, 0.1233, 0.1993, 0.1503]) -Greedy action tensor([ 0.6655, -0.6044, 0.1163, -0.4397]) tensor([0.4567, 0.1283, 0.2637, 0.1512]) -Greedy action tensor([ 0.8574, -0.2129, 0.0784, 0.0114]) tensor([0.4483, 0.1537, 0.2057, 0.1924]) -Greedy action tensor([ 0.9233, -0.5353, -0.0138, -0.2878]) tensor([0.5203, 0.1210, 0.2038, 0.1550]) -Greedy action tensor([ 0.7048, -0.4190, -0.0163, -0.4039]) tensor([0.4670, 0.1518, 0.2271, 0.1541]) -Greedy action tensor([ 0.7573, -0.5506, -0.0658, -0.2091]) tensor([0.4785, 0.1294, 0.2101, 0.1820]) -Greedy action tensor([ 0.9267, -0.5156, -0.1598, -0.6710]) tensor([0.5630, 0.1331, 0.1900, 0.1139]) -Greedy action tensor([ 1.0015, -0.9083, 0.1009, -0.6093]) tensor([0.5701, 0.0844, 0.2316, 0.1139]) -Greedy action tensor([ 0.6926, -0.5514, -0.0022, -0.2703]) tensor([0.4610, 0.1329, 0.2301, 0.1760]) -Greedy action tensor([ 0.2434, -0.1603, -0.1174, -0.1796]) tensor([0.3311, 0.2211, 0.2308, 0.2169]) -Greedy action tensor([ 0.6420, 0.0874, 0.0324, -0.1794]) tensor([0.3910, 0.2245, 0.2125, 0.1720]) -Greedy action tensor([ 0.7184, -0.5147, 0.0689, -0.2471]) tensor([0.4557, 0.1328, 0.2380, 0.1735]) -Greedy action tensor([ 1.1477, -0.5430, -0.1538, -0.4802]) tensor([0.6050, 0.1116, 0.1646, 0.1188]) -Greedy action tensor([ 1.0011, -0.6093, 0.0790, -0.3218]) tensor([0.5365, 0.1072, 0.2134, 0.1429]) -Greedy action tensor([ 0.6247, -0.5474, -0.0903, -0.1515]) tensor([0.4427, 0.1371, 0.2165, 0.2037]) -Greedy action tensor([ 0.5697, -0.2167, -0.1048, -0.0431]) tensor([0.3989, 0.1817, 0.2032, 0.2162]) -Greedy action tensor([ 0.8472, -0.4843, 0.0774, -0.4589]) tensor([0.5005, 0.1322, 0.2318, 0.1356]) -Greedy action tensor([ 1.0310, -0.7222, 0.0510, -0.3959]) tensor([0.5591, 0.0968, 0.2098, 0.1342]) -Greedy action tensor([ 0.6464, -0.3100, -0.0447, -0.3089]) tensor([0.4405, 0.1693, 0.2207, 0.1695]) -Greedy action tensor([ 0.4477, -0.2908, -0.1223, -0.3526]) tensor([0.4012, 0.1917, 0.2269, 0.1802]) -Greedy action tensor([ 0.9607, -0.7548, 0.1560, -0.7924]) tensor([0.5555, 0.0999, 0.2484, 0.0962]) -Greedy action tensor([ 0.7908, -0.8906, -0.0036, -0.4774]) tensor([0.5210, 0.0970, 0.2354, 0.1466]) -Greedy action tensor([ 0.4230, -0.1542, 0.0203, -0.2060]) tensor([0.3619, 0.2032, 0.2419, 0.1929]) -Greedy action tensor([ 0.7747, -0.4669, 0.1906, -0.5619]) tensor([0.4741, 0.1370, 0.2643, 0.1246]) -Greedy action tensor([ 0.9499, -0.8350, 0.0190, -0.5481]) tensor([0.5600, 0.0940, 0.2208, 0.1252]) -Greedy action tensor([ 1.0780, -0.6409, 0.0519, -0.6240]) tensor([0.5814, 0.1042, 0.2084, 0.1060]) -Greedy action tensor([ 0.7547, -0.4349, -0.0392, -0.2826]) tensor([0.4737, 0.1442, 0.2142, 0.1679]) -Greedy action tensor([ 1.1244, -0.7550, -0.0615, -0.6906]) tensor([0.6169, 0.0942, 0.1884, 0.1005]) -Greedy action tensor([ 1.2036, -0.1422, -0.2703, -0.7673]) tensor([0.6140, 0.1598, 0.1406, 0.0855]) -Greedy action tensor([ 0.8466, -0.6310, 0.1745, -0.3930]) tensor([0.4930, 0.1125, 0.2517, 0.1427]) -Greedy action tensor([ 0.8565, -0.4920, 0.0243, -0.2292]) tensor([0.4920, 0.1277, 0.2141, 0.1661]) -Greedy action tensor([ 0.8732, -0.4521, 0.0772, -0.4599]) tensor([0.5049, 0.1342, 0.2278, 0.1331]) -Greedy action tensor([ 0.7527, -0.1921, -0.0661, -0.1087]) tensor([0.4440, 0.1726, 0.1958, 0.1876]) -Greedy action tensor([ 0.7927, -0.3243, -0.1555, -0.3390]) tensor([0.4909, 0.1607, 0.1902, 0.1583]) -Greedy action tensor([ 0.7082, -0.4172, -0.0656, -0.2691]) tensor([0.4625, 0.1501, 0.2133, 0.1741]) -Greedy action tensor([ 0.2942, 0.3330, -0.0264, 0.0272]) tensor([0.2832, 0.2944, 0.2055, 0.2168]) -Greedy action tensor([ 0.8321, -0.0458, -0.0182, 0.0905]) tensor([0.4312, 0.1792, 0.1842, 0.2054]) -Greedy action tensor([ 0.7293, -0.5744, -0.0347, -0.5274]) tensor([0.4946, 0.1343, 0.2304, 0.1408]) -Greedy action tensor([ 0.7225, -0.5195, -0.0314, -0.2831]) tensor([0.4706, 0.1359, 0.2214, 0.1721]) -Greedy action tensor([ 0.5267, 0.0748, 0.0247, -0.1954]) tensor([0.3666, 0.2333, 0.2219, 0.1781]) -Greedy action tensor([ 0.8007, -0.7262, -0.0029, -0.3875]) tensor([0.5077, 0.1103, 0.2273, 0.1547]) -Greedy action tensor([ 0.9665, -0.5457, -0.0420, -0.2997]) tensor([0.5356, 0.1181, 0.1954, 0.1510]) -Greedy action tensor([ 0.8564, -0.5723, 0.1271, -0.4657]) tensor([0.5029, 0.1205, 0.2425, 0.1341]) -Greedy action tensor([ 0.7224, -0.3375, -0.0381, -0.1721]) tensor([0.4499, 0.1559, 0.2103, 0.1839]) -Greedy action tensor([ 0.9012, -0.9380, -0.0984, -0.2970]) tensor([0.5468, 0.0869, 0.2013, 0.1650]) -Greedy action tensor([ 0.6698, -0.8004, 0.0355, -0.3669]) tensor([0.4729, 0.1087, 0.2508, 0.1677]) -Greedy action tensor([-0.6333, -0.7530, -0.2639, -0.7257]) tensor([0.2355, 0.2090, 0.3408, 0.2147]) -Greedy action tensor([ 1.0790, -0.3263, 0.3520, 0.3634]) tensor([0.4509, 0.1106, 0.2180, 0.2205]) -Greedy action tensor([ 0.1981, -0.8164, -0.3699, 0.7928]) tensor([0.2673, 0.0969, 0.1514, 0.4844]) -Greedy action tensor([ 0.2219, -1.3620, -0.0221, -0.2837]) tensor([0.3858, 0.0792, 0.3023, 0.2327]) -Greedy action tensor([ 0.4035, -0.7799, 0.1116, -0.5678]) tensor([0.4112, 0.1259, 0.3071, 0.1557]) -Greedy action tensor([ 0.4384, -1.3824, 1.2658, -0.1969]) tensor([0.2513, 0.0407, 0.5749, 0.1331]) -Greedy action tensor([-1.0164, 0.0780, 0.3450, -1.0797]) tensor([0.1133, 0.3384, 0.4420, 0.1063]) -Greedy action tensor([ 0.2781, -0.3231, 0.4300, -0.5238]) tensor([0.3164, 0.1734, 0.3683, 0.1419]) -Greedy action tensor([ 0.1393, -0.6493, -0.2967, 0.2531]) tensor([0.3104, 0.1411, 0.2007, 0.3478]) -Greedy action tensor([-0.4429, -1.4441, 0.3536, -0.3701]) tensor([0.2146, 0.0788, 0.4759, 0.2308]) -Greedy action tensor([ 0.4367, -1.2713, -0.2232, -0.3147]) tensor([0.4609, 0.0835, 0.2382, 0.2174]) -Greedy action tensor([ 0.4635, -1.6890, 0.6756, 0.3017]) tensor([0.3122, 0.0363, 0.3860, 0.2656]) -Greedy action tensor([ 0.2126, 0.0589, -0.5380, -0.3490]) tensor([0.3448, 0.2957, 0.1628, 0.1967]) -Greedy action tensor([ 0.7370, 0.2400, 0.4419, -0.0198]) tensor([0.3544, 0.2156, 0.2638, 0.1663]) -Greedy action tensor([ 0.7901, -1.4191, 0.6087, -0.0405]) tensor([0.4202, 0.0461, 0.3505, 0.1831]) -Greedy action tensor([-0.7208, 0.2160, -0.6917, -0.7890]) tensor([0.1813, 0.4627, 0.1867, 0.1694]) -Greedy action tensor([-0.5986, -0.4052, -0.0862, -0.8143]) tensor([0.2133, 0.2588, 0.3560, 0.1719]) -Greedy action tensor([ 0.7708, -0.3777, 0.8782, -0.4426]) tensor([0.3666, 0.1163, 0.4082, 0.1090]) -Greedy action tensor([-0.3546, -0.7106, -1.3065, -0.0573]) tensor([0.2913, 0.2041, 0.1124, 0.3922]) -Greedy action tensor([ 0.9785, -0.3643, 0.2038, 0.5175]) tensor([0.4251, 0.1110, 0.1959, 0.2681]) -Greedy action tensor([ 0.8439, -0.0636, -0.0276, 0.9026]) tensor([0.3469, 0.1400, 0.1451, 0.3679]) -Greedy action tensor([0.3029, 0.3561, 0.0294, 0.0459]) tensor([0.2786, 0.2939, 0.2120, 0.2155]) -Greedy action tensor([ 0.2747, -1.1013, 0.9020, -0.2229]) tensor([0.2679, 0.0677, 0.5016, 0.1629]) -Greedy action tensor([-0.4549, 0.6100, 0.3196, 0.2196]) tensor([0.1245, 0.3611, 0.2701, 0.2444]) -Greedy action tensor([ 0.2953, -0.6679, -0.2264, -0.1594]) tensor([0.3832, 0.1462, 0.2274, 0.2432]) -Greedy action tensor([ 0.7901, -0.1451, 1.3949, 0.2099]) tensor([0.2643, 0.1037, 0.4839, 0.1480]) -Greedy action tensor([ 0.1883, -1.4250, 0.8085, 0.2809]) tensor([0.2406, 0.0479, 0.4474, 0.2640]) -Greedy action tensor([ 0.4419, -0.1523, 0.2066, -0.6928]) tensor([0.3754, 0.2072, 0.2967, 0.1207]) -Greedy action tensor([-0.4898, -0.3119, -0.1385, -1.1285]) tensor([0.2413, 0.2883, 0.3429, 0.1274]) -Greedy action tensor([ 0.5829, -1.2019, 0.6601, -0.0370]) tensor([0.3589, 0.0602, 0.3877, 0.1931]) -Greedy action tensor([ 0.1195, -0.7257, -0.4526, -0.3051]) tensor([0.3777, 0.1622, 0.2131, 0.2470]) -Greedy action tensor([1.2756, 0.1097, 0.6517, 0.1662]) tensor([0.4593, 0.1431, 0.2461, 0.1515]) -Greedy action tensor([-0.6809, -0.3671, 0.4624, -0.4801]) tensor([0.1486, 0.2034, 0.4663, 0.1817]) -Greedy action tensor([-0.0207, -1.0910, -0.3131, 0.7071]) tensor([0.2404, 0.0824, 0.1794, 0.4977]) -Greedy action tensor([ 0.7441, -2.0538, 0.3557, 0.1652]) tensor([0.4349, 0.0265, 0.2949, 0.2437]) -Greedy action tensor([ 0.5589, -1.2145, -0.0334, 0.2745]) tensor([0.4040, 0.0686, 0.2234, 0.3040]) -Greedy action tensor([ 0.1805, -0.6767, 0.5997, -0.2443]) tensor([0.2779, 0.1179, 0.4225, 0.1817]) -Greedy action tensor([ 0.1756, 0.1920, -0.9418, -0.2459]) tensor([0.3334, 0.3389, 0.1091, 0.2187]) -Greedy action tensor([ 1.1272, -0.2502, -0.3366, -0.4922]) tensor([0.5947, 0.1500, 0.1376, 0.1178]) -Greedy action tensor([-0.0042, -0.8958, 0.1561, -0.3311]) tensor([0.3026, 0.1241, 0.3552, 0.2182]) -Greedy action tensor([-0.2025, -0.3154, 1.0302, -0.1893]) tensor([0.1578, 0.1410, 0.5413, 0.1599]) -Greedy action tensor([ 0.0015, -0.2798, 0.2824, 0.3988]) tensor([0.2190, 0.1653, 0.2900, 0.3258]) -Greedy action tensor([ 0.0794, -0.5297, -0.3040, -0.8799]) tensor([0.3834, 0.2085, 0.2613, 0.1469]) -Greedy action tensor([-0.2539, -0.9897, 0.2261, -0.2420]) tensor([0.2435, 0.1167, 0.3935, 0.2464]) -Greedy action tensor([-1.0120, -0.3723, -1.5255, 0.2967]) tensor([0.1390, 0.2635, 0.0832, 0.5144]) -Greedy action tensor([-1.0708, -0.5309, 0.0262, -0.8801]) tensor([0.1445, 0.2479, 0.4328, 0.1748]) -Greedy action tensor([ 0.7951, -1.1261, 0.4721, -0.6482]) tensor([0.4747, 0.0695, 0.3437, 0.1121]) -Greedy action tensor([-0.7761, -0.7320, 0.2318, 0.5642]) tensor([0.1162, 0.1214, 0.3184, 0.4439]) -Greedy action tensor([ 0.6659, -1.1638, 0.2908, 0.0641]) tensor([0.4174, 0.0670, 0.2869, 0.2287]) -Greedy action tensor([-0.1116, -0.4771, -0.0143, -0.0574]) tensor([0.2596, 0.1801, 0.2862, 0.2741]) -Greedy action tensor([ 1.7672, -0.4013, -0.6221, 0.2370]) tensor([0.7030, 0.0804, 0.0645, 0.1522]) -Greedy action tensor([-0.2432, -0.6196, 0.2144, -0.3707]) tensor([0.2411, 0.1655, 0.3811, 0.2123]) -Greedy action tensor([ 0.5831, -0.8318, -0.5364, -0.5537]) tensor([0.5290, 0.1285, 0.1727, 0.1697]) -Greedy action tensor([-0.4635, -0.8176, -1.0701, 0.6599]) tensor([0.1879, 0.1319, 0.1024, 0.5778]) -Greedy action tensor([ 0.1082, -0.1993, 0.2785, -0.0909]) tensor([0.2673, 0.1966, 0.3170, 0.2191]) -Greedy action tensor([ 0.1529, -2.0194, -0.0176, 0.5448]) tensor([0.2910, 0.0331, 0.2454, 0.4305]) -Greedy action tensor([ 0.3432, -0.2860, -0.4630, 0.2989]) tensor([0.3406, 0.1815, 0.1521, 0.3258]) -Greedy action tensor([ 0.4931, 0.5869, -0.0705, 0.3379]) tensor([0.2838, 0.3117, 0.1615, 0.2430]) -Greedy action tensor([ 0.1419, -0.3819, 0.5727, 0.0674]) tensor([0.2464, 0.1459, 0.3790, 0.2287]) -Greedy action tensor([-0.1883, -0.9502, 0.5497, -0.8510]) tensor([0.2455, 0.1146, 0.5134, 0.1265]) -Greedy action tensor([0.1241, 0.1925, 0.1685, 0.5199]) tensor([0.2173, 0.2327, 0.2272, 0.3228]) -Greedy action tensor([ 0.4548, -0.5412, -0.7533, -0.3883]) tensor([0.4765, 0.1760, 0.1424, 0.2051]) -Greedy action tensor([ 0.1533, -0.5154, 0.4786, -0.5586]) tensor([0.2952, 0.1513, 0.4087, 0.1449]) -Greedy action tensor([ 0.4184, -0.1624, -1.3561, -0.3445]) tensor([0.4555, 0.2548, 0.0772, 0.2124]) -Greedy action tensor([ 0.0448, -1.0804, -0.0050, -0.3619]) tensor([0.3399, 0.1103, 0.3234, 0.2263]) -Greedy action tensor([-0.7877, -0.4094, 0.3315, -0.9282]) tensor([0.1565, 0.2284, 0.4792, 0.1360]) -Greedy action tensor([-1.3124, -1.1212, 0.7146, -1.1845]) tensor([0.0914, 0.1107, 0.6940, 0.1039]) -Greedy action tensor([-0.2334, -0.5146, -0.4685, 0.7051]) tensor([0.1960, 0.1480, 0.1550, 0.5010]) -Greedy action tensor([-0.1807, -0.9326, 0.8454, -1.1377]) tensor([0.2153, 0.1015, 0.6006, 0.0827]) -Greedy action tensor([ 0.2048, -1.3510, 0.4099, -0.6039]) tensor([0.3467, 0.0732, 0.4257, 0.1544]) -Greedy action tensor([-0.0859, -0.4424, -0.4177, 0.3285]) tensor([0.2544, 0.1781, 0.1826, 0.3850]) -Greedy action tensor([-0.7615, -1.9019, -0.0731, 0.4272]) tensor([0.1517, 0.0485, 0.3019, 0.4979]) -Greedy action tensor([ 0.2263, -0.9241, -0.4281, 0.2214]) tensor([0.3532, 0.1118, 0.1836, 0.3515]) -Greedy action tensor([-0.8508, -0.8311, 0.1677, -0.3288]) tensor([0.1545, 0.1575, 0.4277, 0.2603]) -Greedy action tensor([ 0.0875, -0.4862, -0.0930, 0.4118]) tensor([0.2644, 0.1490, 0.2208, 0.3658]) -Greedy action tensor([ 0.2194, -1.1041, -0.1656, 0.1145]) tensor([0.3512, 0.0935, 0.2390, 0.3163]) -Greedy action tensor([-0.4891, -0.9340, -1.2253, 0.6060]) tensor([0.1957, 0.1254, 0.0937, 0.5851]) -Greedy action tensor([-0.1455, 0.2493, -0.3437, -0.8548]) tensor([0.2634, 0.3909, 0.2161, 0.1296]) -Greedy action tensor([-0.1204, -0.0856, -0.3425, 0.4704]) tensor([0.2154, 0.2231, 0.1725, 0.3890]) -Greedy action tensor([-0.1226, -1.5387, -0.8427, 0.1725]) tensor([0.3255, 0.0790, 0.1584, 0.4372]) -Greedy action tensor([-0.1467, -1.0899, -0.3465, -0.5040]) tensor([0.3439, 0.1339, 0.2816, 0.2406]) -Greedy action tensor([ 0.1483, -0.8498, 0.2948, -0.6523]) tensor([0.3361, 0.1239, 0.3891, 0.1509]) -Greedy action tensor([-1.9187, -0.4359, 0.6576, -0.1678]) tensor([0.0411, 0.1812, 0.5408, 0.2369]) -Greedy action tensor([-1.9260, -0.4421, 0.6600, -0.1696]) tensor([0.0409, 0.1802, 0.5424, 0.2366]) -Greedy action tensor([-1.8509, -0.4316, 0.6213, -0.1370]) tensor([0.0444, 0.1835, 0.5258, 0.2463]) -Greedy action tensor([-0.8416, 0.9468, 0.1049, 0.2223]) tensor([0.0803, 0.4802, 0.2069, 0.2327]) -Greedy action tensor([-1.9157, -0.4405, 0.6549, -0.1655]) tensor([0.0413, 0.1806, 0.5402, 0.2378]) -Greedy action tensor([-1.8752, -0.4046, 0.6264, -0.1441]) tensor([0.0431, 0.1876, 0.5259, 0.2434]) -Greedy action tensor([-1.7616, -0.4162, 0.7666, 0.2207]) tensor([0.0406, 0.1559, 0.5088, 0.2947]) -Greedy action tensor([-1.9449, -0.4537, 0.6712, -0.1793]) tensor([0.0400, 0.1779, 0.5480, 0.2341]) -Greedy action tensor([-1.9081, -0.3702, 0.6426, -0.1604]) tensor([0.0413, 0.1923, 0.5293, 0.2371]) -Greedy action tensor([-1.9436, -0.4509, 0.6682, -0.1795]) tensor([0.0401, 0.1786, 0.5469, 0.2343]) -Greedy action tensor([-1.8282, -0.2206, 0.5807, -0.1292]) tensor([0.0443, 0.2210, 0.4925, 0.2422]) -Greedy action tensor([-1.7487, -0.4538, 0.5775, -0.0767]) tensor([0.0495, 0.1806, 0.5066, 0.2633]) -Greedy action tensor([-1.9220, -0.4087, 0.6549, -0.1648]) tensor([0.0408, 0.1854, 0.5371, 0.2366]) -Greedy action tensor([-1.1399, -0.3212, 0.6080, 0.7021]) tensor([0.0653, 0.1480, 0.3749, 0.4118]) -Greedy action tensor([-1.8315, -0.4807, 0.6117, -0.1482]) tensor([0.0460, 0.1775, 0.5291, 0.2475]) -Greedy action tensor([-1.8112, -0.4317, 0.5968, -0.1169]) tensor([0.0465, 0.1846, 0.5162, 0.2528]) -Greedy action tensor([-1.8407, -0.1578, 0.5722, -0.1006]) tensor([0.0430, 0.2315, 0.4804, 0.2451]) -Greedy action tensor([-1.8893, -0.4179, 0.6408, -0.1464]) tensor([0.0423, 0.1844, 0.5314, 0.2419]) -Greedy action tensor([-1.8949, -0.4304, 0.6478, -0.1527]) tensor([0.0421, 0.1821, 0.5354, 0.2404]) -Greedy action tensor([-0.5709, -0.4743, 0.2300, -0.2402]) tensor([0.1748, 0.1925, 0.3894, 0.2433]) -Greedy action tensor([-1.8894, -0.4568, 0.6455, -0.1531]) tensor([0.0426, 0.1784, 0.5373, 0.2417]) -Greedy action tensor([0.4656, 0.1606, 0.6547, 1.1830]) tensor([0.2002, 0.1476, 0.2419, 0.4103]) -Greedy action tensor([-1.1499, 0.1550, 0.3415, 0.3459]) tensor([0.0736, 0.2713, 0.3269, 0.3283]) -Greedy action tensor([-0.5065, 0.8909, 0.0056, 0.1089]) tensor([0.1168, 0.4723, 0.1949, 0.2161]) -Greedy action tensor([-1.9090, -0.4371, 0.6518, -0.1614]) tensor([0.0416, 0.1812, 0.5384, 0.2388]) -Greedy action tensor([-1.9186, -0.4544, 0.6595, -0.1684]) tensor([0.0412, 0.1783, 0.5431, 0.2373]) -Greedy action tensor([0.2053, 1.0802, 0.0129, 0.6342]) tensor([0.1736, 0.4165, 0.1432, 0.2666]) -Greedy action tensor([-1.7997, -0.4914, 0.5963, -0.1336]) tensor([0.0477, 0.1764, 0.5236, 0.2523]) -Greedy action tensor([-1.9080, -0.3721, 0.6464, -0.1594]) tensor([0.0412, 0.1915, 0.5303, 0.2369]) -Greedy action tensor([-1.8375, -0.3497, 0.6330, -0.1378]) tensor([0.0440, 0.1948, 0.5204, 0.2408]) -Greedy action tensor([-1.0407, -0.2445, 0.6788, 0.1668]) tensor([0.0823, 0.1826, 0.4596, 0.2754]) -Greedy action tensor([-1.9167, -0.3600, 0.6353, -0.1545]) tensor([0.0410, 0.1944, 0.5259, 0.2387]) -Greedy action tensor([-1.8886, -0.3759, 0.6375, -0.1503]) tensor([0.0421, 0.1913, 0.5269, 0.2397]) -Greedy action tensor([-1.7489, -0.2888, 0.5465, -0.1017]) tensor([0.0490, 0.2108, 0.4860, 0.2542]) -Greedy action tensor([-1.5841, -0.4202, 0.4915, 0.0478]) tensor([0.0579, 0.1853, 0.4611, 0.2958]) -Greedy action tensor([-1.8379, -0.3120, 0.6243, -0.1176]) tensor([0.0436, 0.2007, 0.5119, 0.2438]) -Greedy action tensor([-1.6095, -0.1832, 0.2014, -0.5980]) tensor([0.0713, 0.2968, 0.4359, 0.1960]) -Greedy action tensor([-1.9239, -0.3434, 0.6416, -0.1785]) tensor([0.0407, 0.1975, 0.5289, 0.2329]) -Greedy action tensor([-1.8894, -0.3722, 0.6371, -0.1301]) tensor([0.0419, 0.1910, 0.5239, 0.2433]) -Greedy action tensor([-1.9104, -0.4393, 0.6490, -0.1647]) tensor([0.0416, 0.1813, 0.5384, 0.2386]) -Greedy action tensor([-1.8967, -0.4652, 0.6934, -0.1103]) tensor([0.0408, 0.1709, 0.5445, 0.2437]) -Greedy action tensor([-1.6254, 0.2312, 0.4057, -0.0181]) tensor([0.0500, 0.3199, 0.3809, 0.2493]) -Greedy action tensor([-1.7954, -0.3172, 0.6167, -0.0636]) tensor([0.0451, 0.1976, 0.5027, 0.2546]) -Greedy action tensor([-1.8177, -0.4367, 0.6233, -0.0928]) tensor([0.0453, 0.1803, 0.5202, 0.2542]) -Greedy action tensor([-1.6383, -0.4647, 0.5248, -0.0167]) tensor([0.0556, 0.1797, 0.4834, 0.2813]) -Greedy action tensor([-1.4818, 0.4643, 0.2421, -0.5187]) tensor([0.0616, 0.4315, 0.3455, 0.1614]) -Greedy action tensor([-1.8827, -0.2115, 0.6031, -0.1640]) tensor([0.0418, 0.2225, 0.5024, 0.2333]) -Greedy action tensor([-1.8654, -0.3034, 0.6227, -0.1304]) tensor([0.0426, 0.2031, 0.5128, 0.2415]) -Greedy action tensor([-1.8984, -0.4163, 0.6492, -0.1572]) tensor([0.0419, 0.1843, 0.5350, 0.2388]) -Greedy action tensor([-1.9305, -0.4297, 0.6587, -0.1713]) tensor([0.0406, 0.1822, 0.5412, 0.2360]) -Greedy action tensor([-1.0733, 0.8484, 0.1540, 0.3928]) tensor([0.0642, 0.4386, 0.2190, 0.2781]) -Greedy action tensor([-1.3132, -0.4083, 0.4611, 0.0126]) tensor([0.0761, 0.1882, 0.4490, 0.2867]) -Greedy action tensor([-1.9138, -0.3886, 0.6419, -0.1613]) tensor([0.0412, 0.1896, 0.5312, 0.2379]) -Greedy action tensor([-1.9315, -0.4397, 0.6581, -0.1716]) tensor([0.0407, 0.1808, 0.5421, 0.2364]) -Greedy action tensor([-0.1812, 1.0410, -0.0045, 0.1164]) tensor([0.1442, 0.4895, 0.1721, 0.1942]) -Greedy action tensor([-1.9450, -0.4464, 0.6642, -0.1809]) tensor([0.0402, 0.1797, 0.5457, 0.2344]) -Greedy action tensor([-1.8753, -0.4227, 0.6279, -0.1476]) tensor([0.0432, 0.1848, 0.5285, 0.2434]) -Greedy action tensor([-1.9183, -0.4471, 0.6552, -0.1691]) tensor([0.0413, 0.1798, 0.5414, 0.2374]) -Greedy action tensor([-1.9159, -0.4057, 0.6556, -0.1633]) tensor([0.0410, 0.1857, 0.5367, 0.2366]) -Greedy action tensor([-1.8735, -0.4202, 0.6290, -0.1448]) tensor([0.0432, 0.1850, 0.5282, 0.2436]) -Greedy action tensor([-1.9319, -0.3113, 0.6395, -0.1740]) tensor([0.0401, 0.2027, 0.5246, 0.2326]) -Greedy action tensor([-1.5030, -0.2980, 0.5707, -0.3625]) tensor([0.0649, 0.2164, 0.5159, 0.2029]) -Greedy action tensor([-1.9370, -0.4495, 0.6714, -0.1710]) tensor([0.0402, 0.1781, 0.5464, 0.2353]) -Greedy action tensor([-0.9105, 0.7696, 0.0985, 0.0847]) tensor([0.0846, 0.4542, 0.2322, 0.2290]) -Greedy action tensor([-1.8414, -0.4429, 0.5934, -0.1236]) tensor([0.0454, 0.1838, 0.5180, 0.2529]) -Greedy action tensor([-1.0031, -0.4413, 0.3328, -0.0475]) tensor([0.1092, 0.1915, 0.4153, 0.2839]) -Greedy action tensor([-1.3525, -0.1311, 0.3787, -0.0788]) tensor([0.0735, 0.2492, 0.4148, 0.2625]) -Greedy action tensor([-1.8257, -0.3780, 0.5958, -0.1301]) tensor([0.0455, 0.1936, 0.5127, 0.2481]) -Greedy action tensor([-1.6681, -0.4609, 0.5324, -0.0964]) tensor([0.0550, 0.1839, 0.4964, 0.2647]) -Greedy action tensor([-1.1020, -0.2048, 0.2763, 0.1819]) tensor([0.0906, 0.2223, 0.3597, 0.3273]) -Greedy action tensor([-1.8605, -0.3803, 0.6349, -0.1282]) tensor([0.0431, 0.1896, 0.5233, 0.2440]) -Greedy action tensor([-0.5471, 0.6690, 0.3194, 1.0360]) tensor([0.0860, 0.2903, 0.2047, 0.4190]) -Greedy action tensor([-1.0243, 0.5167, 0.2669, -0.3418]) tensor([0.0886, 0.4137, 0.3223, 0.1753]) -Greedy action tensor([-1.9477, -0.4466, 0.6658, -0.1825]) tensor([0.0400, 0.1796, 0.5464, 0.2339]) -Greedy action tensor([-1.8836, -0.4781, 0.6520, -0.1422]) tensor([0.0427, 0.1742, 0.5393, 0.2437]) -Greedy action tensor([-1.8639, -0.4450, 0.6374, -0.1099]) tensor([0.0433, 0.1788, 0.5279, 0.2500]) -Greedy action tensor([-1.7943, -0.1045, 0.5309, -0.0674]) tensor([0.0449, 0.2433, 0.4593, 0.2525]) -Greedy action tensor([-1.8675, -0.3269, 0.6306, -0.1014]) tensor([0.0422, 0.1971, 0.5136, 0.2470]) -Greedy action tensor([0.1902, 1.2088, 0.0189, 0.2632]) tensor([0.1758, 0.4869, 0.1481, 0.1891]) -Greedy action tensor([-1.9108, -0.4408, 0.6532, -0.1610]) tensor([0.0415, 0.1805, 0.5391, 0.2388]) -Greedy action tensor([-1.7735, -0.3820, 0.5717, -0.1163]) tensor([0.0483, 0.1942, 0.5041, 0.2534]) -Greedy action tensor([1.5640, 0.0838, 0.0447, 0.4923]) tensor([0.5590, 0.1272, 0.1223, 0.1914]) -Greedy action tensor([ 1.1135, -0.0441, -0.7019, 0.3439]) tensor([0.5154, 0.1620, 0.0839, 0.2387]) -Greedy action tensor([ 1.1950, -0.1806, -0.6803, 0.4982]) tensor([0.5252, 0.1327, 0.0805, 0.2616]) -Greedy action tensor([ 0.8667, -0.0349, -0.2122, -0.1269]) tensor([0.4726, 0.1918, 0.1607, 0.1750]) -Greedy action tensor([ 1.4352, -0.5456, -0.0995, 0.0626]) tensor([0.6223, 0.0859, 0.1341, 0.1577]) -Greedy action tensor([ 1.2073, -0.0257, -0.6236, 0.0775]) tensor([0.5634, 0.1642, 0.0903, 0.1821]) -Greedy action tensor([ 1.8093, -0.9278, -0.3331, -0.0467]) tensor([0.7471, 0.0484, 0.0877, 0.1168]) -Greedy action tensor([ 2.3886, -1.3110, -0.1942, 0.8325]) tensor([0.7626, 0.0189, 0.0576, 0.1609]) -Greedy action tensor([ 0.9108, -0.0741, -0.4693, 0.2118]) tensor([0.4712, 0.1760, 0.1185, 0.2342]) -Greedy action tensor([ 1.5764, -0.9553, -0.4294, 0.5805]) tensor([0.6315, 0.0502, 0.0850, 0.2333]) -Greedy action tensor([ 1.1135, -0.5350, -0.5269, 0.0550]) tensor([0.5770, 0.1110, 0.1119, 0.2002]) -Greedy action tensor([ 1.1212, 0.0492, -0.4226, 0.0688]) tensor([0.5249, 0.1797, 0.1121, 0.1833]) -Greedy action tensor([ 1.3744, -0.2202, -0.5844, 0.1605]) tensor([0.6094, 0.1237, 0.0859, 0.1810]) -Greedy action tensor([ 1.2987, -0.6400, -0.2415, 0.1929]) tensor([0.5920, 0.0852, 0.1269, 0.1959]) -Greedy action tensor([ 2.2536, -0.4110, -0.4677, 0.2353]) tensor([0.7885, 0.0549, 0.0519, 0.1048]) -Greedy action tensor([ 1.9896, 0.0248, -0.2504, 0.3356]) tensor([0.6955, 0.0975, 0.0740, 0.1330]) -Greedy action tensor([ 1.5475, -0.2494, -1.1479, 0.3885]) tensor([0.6464, 0.1072, 0.0436, 0.2028]) -Greedy action tensor([ 1.3759, -0.6664, -0.3963, 0.0507]) tensor([0.6388, 0.0829, 0.1086, 0.1698]) -Greedy action tensor([ 1.3272, -0.3289, -0.2992, 0.4519]) tensor([0.5542, 0.1058, 0.1090, 0.2310]) -Greedy action tensor([ 1.2707, -0.7609, -0.2081, 0.3950]) tensor([0.5632, 0.0738, 0.1284, 0.2346]) -Greedy action tensor([ 1.1486, -0.2868, -0.4988, 0.2794]) tensor([0.5406, 0.1287, 0.1041, 0.2267]) -Greedy action tensor([ 1.3108, -0.1409, -0.3642, 0.2897]) tensor([0.5612, 0.1314, 0.1051, 0.2022]) -Greedy action tensor([ 1.5361, -0.3714, -0.8593, -0.1242]) tensor([0.6995, 0.1038, 0.0637, 0.1330]) -Greedy action tensor([ 1.6498e+00, -2.4296e-01, -1.8033e-01, -1.4462e-03]) tensor([0.6654, 0.1002, 0.1067, 0.1276]) -Greedy action tensor([ 1.6733, -0.3952, -0.2693, 0.3645]) tensor([0.6494, 0.0821, 0.0931, 0.1754]) -Greedy action tensor([ 1.1442, 0.0888, -0.4842, -0.2312]) tensor([0.5564, 0.1937, 0.1092, 0.1406]) -Greedy action tensor([ 1.5626, -0.4124, -0.6946, -0.0118]) tensor([0.6894, 0.0957, 0.0721, 0.1428]) -Greedy action tensor([ 1.1828, 0.0275, -0.1448, 0.1974]) tensor([0.5119, 0.1612, 0.1357, 0.1911]) -Greedy action tensor([1.4605, 0.5451, 0.0394, 0.1333]) tensor([0.5244, 0.2099, 0.1266, 0.1391]) -Greedy action tensor([ 1.3363, -0.1988, -0.9191, 0.4314]) tensor([0.5798, 0.1249, 0.0608, 0.2346]) -Greedy action tensor([ 0.9724, -0.4503, 0.0220, 0.1340]) tensor([0.4854, 0.1170, 0.1877, 0.2099]) -Greedy action tensor([ 1.3860, -0.3862, -0.5549, 0.0018]) tensor([0.6394, 0.1087, 0.0918, 0.1602]) -Greedy action tensor([ 1.2867, -0.3103, -0.8591, 0.2970]) tensor([0.5913, 0.1197, 0.0692, 0.2198]) -Greedy action tensor([ 2.2642, -0.5051, -0.3982, 0.4384]) tensor([0.7731, 0.0485, 0.0539, 0.1245]) -Greedy action tensor([ 2.0721, 0.5171, -0.5157, 0.4703]) tensor([0.6721, 0.1419, 0.0505, 0.1355]) -Greedy action tensor([ 0.9480, -0.2046, -0.1104, 0.0266]) tensor([0.4852, 0.1532, 0.1684, 0.1931]) -Greedy action tensor([ 1.5519, 0.2759, -0.3801, 0.2757]) tensor([0.5872, 0.1639, 0.0851, 0.1639]) -Greedy action tensor([ 1.8569, -0.7331, -0.2882, 0.6343]) tensor([0.6727, 0.0505, 0.0787, 0.1981]) -Greedy action tensor([ 1.6011, 0.0817, -1.2833, 0.1714]) tensor([0.6604, 0.1445, 0.0369, 0.1581]) -Greedy action tensor([ 1.4556, -0.3667, -0.5660, 0.2596]) tensor([0.6264, 0.1013, 0.0830, 0.1894]) -Greedy action tensor([ 1.5716, -0.4140, -0.4396, 0.3784]) tensor([0.6352, 0.0872, 0.0850, 0.1926]) -Greedy action tensor([ 1.9871, -0.5412, -0.4485, 0.2159]) tensor([0.7477, 0.0597, 0.0655, 0.1272]) -Greedy action tensor([ 0.7572, -0.2157, -0.2171, 0.1610]) tensor([0.4336, 0.1639, 0.1637, 0.2389]) -Greedy action tensor([ 1.7541, -0.7441, -0.6746, 0.2241]) tensor([0.7210, 0.0593, 0.0636, 0.1561]) -Greedy action tensor([ 1.6912, -0.6420, -0.8185, -0.3537]) tensor([0.7647, 0.0742, 0.0622, 0.0989]) -Greedy action tensor([ 1.2935, 0.4209, -0.9488, 0.6069]) tensor([0.4933, 0.2061, 0.0524, 0.2482]) -Greedy action tensor([ 2.1523, -1.2365, -0.7590, 0.4918]) tensor([0.7823, 0.0264, 0.0426, 0.1487]) -Greedy action tensor([ 1.7969, -0.9228, -0.1427, 0.4465]) tensor([0.6808, 0.0449, 0.0979, 0.1764]) -Greedy action tensor([ 1.3670, -0.2531, -0.4183, 0.2228]) tensor([0.5938, 0.1175, 0.0996, 0.1891]) -Greedy action tensor([ 1.1955, -0.4154, -0.2601, 0.5020]) tensor([0.5174, 0.1033, 0.1207, 0.2586]) -Greedy action tensor([ 2.6918, 1.2203, 0.6906, -0.2483]) tensor([0.7054, 0.1619, 0.0953, 0.0373]) -Greedy action tensor([ 1.5481, -0.4733, -0.5042, 0.4258]) tensor([0.6303, 0.0835, 0.0810, 0.2052]) -Greedy action tensor([ 1.2933, -0.4250, -0.2653, 0.2662]) tensor([0.5721, 0.1026, 0.1204, 0.2048]) -Greedy action tensor([ 1.8329, -0.6184, -0.9623, 0.5835]) tensor([0.6974, 0.0601, 0.0426, 0.1999]) -Greedy action tensor([ 1.8497, -0.9744, 0.0896, 0.5324]) tensor([0.6670, 0.0396, 0.1147, 0.1787]) -Greedy action tensor([ 1.2173, -0.0841, -0.8170, 0.0999]) tensor([0.5780, 0.1573, 0.0756, 0.1891]) -Greedy action tensor([ 1.6126, -0.5414, -0.3109, 0.4351]) tensor([0.6369, 0.0739, 0.0930, 0.1962]) -Greedy action tensor([ 1.5652, -0.1537, -0.8861, -0.0649]) tensor([0.6843, 0.1227, 0.0590, 0.1341]) -Greedy action tensor([ 1.3710, -0.2840, -0.1729, 0.2412]) tensor([0.5788, 0.1106, 0.1236, 0.1870]) -Greedy action tensor([ 1.3225, 0.2719, -0.9781, 0.0996]) tensor([0.5733, 0.2005, 0.0574, 0.1688]) -Greedy action tensor([ 0.9697, -0.0205, -0.2310, 0.1029]) tensor([0.4778, 0.1775, 0.1438, 0.2008]) -Greedy action tensor([ 1.6379, -0.7433, 0.0411, 0.4530]) tensor([0.6247, 0.0577, 0.1265, 0.1910]) -Greedy action tensor([ 2.0581, -0.9948, -0.6783, 0.4158]) tensor([0.7660, 0.0362, 0.0496, 0.1482]) -Greedy action tensor([ 1.5917, -0.4677, -0.6052, 0.5306]) tensor([0.6310, 0.0805, 0.0701, 0.2184]) -Greedy action tensor([ 1.2383, -0.2472, -0.9261, 0.4615]) tensor([0.5552, 0.1257, 0.0637, 0.2553]) -Greedy action tensor([ 2.8584, -0.7598, -0.5160, 1.0940]) tensor([0.8115, 0.0218, 0.0278, 0.1390]) -Greedy action tensor([ 1.0660, 0.0891, -1.1397, 0.2522]) tensor([0.5182, 0.1951, 0.0571, 0.2296]) -Greedy action tensor([ 2.1980, -0.9612, -0.1874, 0.8170]) tensor([0.7216, 0.0306, 0.0664, 0.1814]) -Greedy action tensor([ 1.1957, -0.1016, -0.1494, 0.0764]) tensor([0.5375, 0.1469, 0.1400, 0.1755]) -Greedy action tensor([ 1.8673e+00, -7.1015e-01, 6.6423e-04, 2.8947e-01]) tensor([0.6959, 0.0529, 0.1076, 0.1436]) -Greedy action tensor([ 1.1747, 0.1803, -0.6406, 0.4421]) tensor([0.4967, 0.1837, 0.0808, 0.2387]) -Greedy action tensor([ 1.1048, -0.0403, 0.0726, -0.3202]) tensor([0.5222, 0.1662, 0.1860, 0.1256]) -Greedy action tensor([ 1.3681, -0.3335, -0.7259, 0.0647]) tensor([0.6340, 0.1156, 0.0781, 0.1722]) -Greedy action tensor([ 1.5789, -0.4687, -0.7384, 0.1348]) tensor([0.6833, 0.0882, 0.0673, 0.1612]) -Greedy action tensor([ 0.9663, -0.1362, -0.3669, -0.0245]) tensor([0.5084, 0.1688, 0.1340, 0.1888]) -Greedy action tensor([ 1.5146, -0.4302, -0.4860, 0.1264]) tensor([0.6546, 0.0936, 0.0885, 0.1633]) -Greedy action tensor([ 1.8787, -0.5049, 0.0045, 0.0992]) tensor([0.7070, 0.0652, 0.1085, 0.1193]) -Greedy action tensor([ 0.1604, 0.1798, -0.4641, 0.3478]) tensor([0.2659, 0.2711, 0.1424, 0.3207]) -Greedy action tensor([ 1.2114, -0.5195, -0.3285, 0.3724]) tensor([0.5483, 0.0971, 0.1176, 0.2370]) -Greedy action tensor([ 1.4270, -0.2569, -0.9380, 0.1681]) tensor([0.6396, 0.1187, 0.0601, 0.1816]) -Greedy action tensor([ 1.3922, -0.9238, -0.7861, -0.1204]) tensor([0.6982, 0.0689, 0.0791, 0.1538]) -Greedy action tensor([ 2.1364, -0.6342, -0.5863, 1.0278]) tensor([0.6857, 0.0429, 0.0450, 0.2263]) -Greedy action tensor([ 0.4766, -0.1011, -0.0643, -0.1499]) tensor([0.3734, 0.2096, 0.2174, 0.1996]) -Greedy action tensor([ 0.6545, -0.2496, -0.0496, -0.1384]) tensor([0.4252, 0.1722, 0.2103, 0.1924]) -Greedy action tensor([ 0.5283, -0.2085, 0.0010, -0.0322]) tensor([0.3788, 0.1813, 0.2236, 0.2163]) -Greedy action tensor([ 0.9248, -0.5291, -0.0288, -0.3097]) tensor([0.5236, 0.1223, 0.2018, 0.1523]) -Greedy action tensor([ 0.9567, -0.4238, -0.0852, -0.2362]) tensor([0.5242, 0.1318, 0.1849, 0.1590]) -Greedy action tensor([ 0.6876, -0.6723, -0.0207, -0.2969]) tensor([0.4711, 0.1209, 0.2320, 0.1760]) -Greedy action tensor([ 0.2457, -0.0572, -0.0995, -0.1819]) tensor([0.3227, 0.2384, 0.2285, 0.2104]) -Greedy action tensor([ 0.9695, -0.5899, -0.1069, -0.4366]) tensor([0.5567, 0.1171, 0.1897, 0.1365]) -Greedy action tensor([ 1.0505, -0.1655, 0.0496, -0.0753]) tensor([0.5029, 0.1491, 0.1849, 0.1632]) -Greedy action tensor([ 1.0688, -0.7566, -0.0011, -0.5244]) tensor([0.5857, 0.0944, 0.2009, 0.1190]) -Greedy action tensor([ 0.9304, -0.4562, 0.0042, -0.2919]) tensor([0.5153, 0.1288, 0.2041, 0.1518]) -Greedy action tensor([ 0.9683, -0.5435, -0.1548, -0.4242]) tensor([0.5573, 0.1229, 0.1813, 0.1385]) -Greedy action tensor([ 0.5856, -0.1401, -0.0715, -0.4074]) tensor([0.4214, 0.2040, 0.2185, 0.1561]) -Greedy action tensor([ 0.6157, 0.0404, -0.0033, -0.0339]) tensor([0.3812, 0.2144, 0.2053, 0.1991]) -Greedy action tensor([ 0.7361, -0.5147, 0.0410, -0.3619]) tensor([0.4720, 0.1351, 0.2355, 0.1574]) -Greedy action tensor([ 0.8524, -0.5612, -0.0380, -0.2946]) tensor([0.5073, 0.1234, 0.2082, 0.1611]) -Greedy action tensor([ 0.7471, -0.7947, -0.0791, -0.3147]) tensor([0.5006, 0.1071, 0.2191, 0.1731]) -Greedy action tensor([0.7854, 0.0918, 0.0754, 0.0738]) tensor([0.4029, 0.2013, 0.1981, 0.1977]) -Greedy action tensor([ 1.1717, -0.8363, -0.0425, -0.6882]) tensor([0.6302, 0.0846, 0.1871, 0.0981]) -Greedy action tensor([ 0.9787, -0.2416, -0.4480, -0.5257]) tensor([0.5690, 0.1679, 0.1366, 0.1264]) -Greedy action tensor([ 0.9408, -0.3187, -0.0715, -0.5041]) tensor([0.5311, 0.1507, 0.1930, 0.1252]) -Greedy action tensor([ 1.0815, -0.5937, 0.1886, -0.4585]) tensor([0.5521, 0.1034, 0.2261, 0.1184]) -Greedy action tensor([ 0.7559, -0.4012, 0.1201, -0.4501]) tensor([0.4666, 0.1467, 0.2470, 0.1397]) -Greedy action tensor([ 0.6641, -0.0949, -0.0614, 0.0025]) tensor([0.4052, 0.1897, 0.1961, 0.2091]) -Greedy action tensor([ 0.7940, -0.1800, 0.0625, -0.3123]) tensor([0.4567, 0.1724, 0.2198, 0.1511]) -Greedy action tensor([ 0.6226, -0.2873, -0.0220, -0.1527]) tensor([0.4188, 0.1686, 0.2198, 0.1929]) -Greedy action tensor([ 0.4461, 0.3051, 0.0173, -0.1903]) tensor([0.3280, 0.2848, 0.2136, 0.1736]) -Greedy action tensor([ 0.8692, -0.2184, -0.1013, -0.4878]) tensor([0.5068, 0.1708, 0.1920, 0.1305]) -Greedy action tensor([ 0.8673, -0.2707, -0.0961, -0.2954]) tensor([0.4964, 0.1591, 0.1894, 0.1552]) -Greedy action tensor([ 0.7936, -0.3576, -0.1262, -0.2485]) tensor([0.4837, 0.1530, 0.1928, 0.1706]) -Greedy action tensor([ 1.1632, -0.4526, 0.0977, -0.6410]) tensor([0.5855, 0.1164, 0.2018, 0.0964]) -Greedy action tensor([ 0.9228, -0.3570, -0.0163, -0.1807]) tensor([0.4998, 0.1390, 0.1954, 0.1658]) -Greedy action tensor([ 0.9014, -0.5169, -0.2252, -0.3413]) tensor([0.5391, 0.1305, 0.1748, 0.1556]) -Greedy action tensor([ 0.8176, -0.8139, 0.1449, -0.1524]) tensor([0.4796, 0.0938, 0.2448, 0.1818]) -Greedy action tensor([ 0.5089, -0.0873, -0.1187, -0.0125]) tensor([0.3733, 0.2057, 0.1993, 0.2217]) -Greedy action tensor([ 0.6310, 0.3724, -0.2614, 0.1255]) tensor([0.3591, 0.2773, 0.1471, 0.2166]) -Greedy action tensor([ 0.8337, -0.5940, -0.0936, -0.3269]) tensor([0.5131, 0.1231, 0.2030, 0.1608]) -Greedy action tensor([ 0.6099, -0.3660, 0.0489, -0.1832]) tensor([0.4167, 0.1570, 0.2378, 0.1885]) -Greedy action tensor([ 0.5626, -0.0523, -0.0508, 0.0145]) tensor([0.3759, 0.2033, 0.2036, 0.2173]) -Greedy action tensor([ 0.7552, -0.5345, -0.0805, -0.4701]) tensor([0.4993, 0.1375, 0.2165, 0.1466]) -Greedy action tensor([ 0.9714, -0.8188, 0.0763, -0.5855]) tensor([0.5598, 0.0934, 0.2287, 0.1180]) -Greedy action tensor([ 0.8782, -0.3986, 0.0984, -0.1805]) tensor([0.4798, 0.1338, 0.2200, 0.1664]) -Greedy action tensor([ 0.6695, -0.0509, -0.0402, -0.1633]) tensor([0.4144, 0.2016, 0.2038, 0.1802]) -Greedy action tensor([ 0.2424, -0.0155, -0.1758, -0.1796]) tensor([0.3240, 0.2503, 0.2133, 0.2124]) -Greedy action tensor([ 0.8025, -0.6473, -0.0499, -0.2636]) tensor([0.4987, 0.1170, 0.2126, 0.1717]) -Greedy action tensor([ 0.8457, -0.3462, 0.0857, -0.1913]) tensor([0.4704, 0.1428, 0.2200, 0.1668]) -Greedy action tensor([ 0.8543, -0.5637, 0.1132, -0.4821]) tensor([0.5047, 0.1222, 0.2405, 0.1326]) -Greedy action tensor([ 0.4992, -0.0663, 0.1107, -0.0032]) tensor([0.3507, 0.1992, 0.2378, 0.2122]) -Greedy action tensor([ 0.6631, -0.5205, -0.0581, -0.7047]) tensor([0.4885, 0.1496, 0.2375, 0.1244]) -Greedy action tensor([ 1.3066, -0.6668, -0.1940, -0.5181]) tensor([0.6565, 0.0912, 0.1464, 0.1059]) -Greedy action tensor([ 0.8357, -0.5742, -0.0938, -0.2702]) tensor([0.5077, 0.1240, 0.2004, 0.1680]) -Greedy action tensor([ 1.0224, -0.5913, 0.0525, -0.3778]) tensor([0.5480, 0.1091, 0.2078, 0.1351]) -Greedy action tensor([ 1.0195, -1.0327, 0.1991, -0.3606]) tensor([0.5494, 0.0706, 0.2419, 0.1382]) -Greedy action tensor([ 0.7336, -0.0962, -0.0628, -0.0009]) tensor([0.4225, 0.1843, 0.1905, 0.2027]) -Greedy action tensor([ 1.1863, -0.7617, 0.0749, -0.5928]) tensor([0.6096, 0.0869, 0.2006, 0.1029]) -Greedy action tensor([ 0.5344, -0.2801, -0.0709, -0.1365]) tensor([0.4000, 0.1771, 0.2184, 0.2045]) -Greedy action tensor([ 0.5048, -0.2959, 0.0455, -0.2288]) tensor([0.3905, 0.1753, 0.2467, 0.1875]) -Greedy action tensor([ 1.0972, -0.8202, 0.0698, -0.2845]) tensor([0.5695, 0.0837, 0.2038, 0.1430]) -Greedy action tensor([ 0.6088, -0.4630, 0.0696, -0.2402]) tensor([0.4249, 0.1455, 0.2478, 0.1818]) -Greedy action tensor([ 0.8096, -0.3595, -0.0150, -0.2179]) tensor([0.4746, 0.1474, 0.2081, 0.1699]) -Greedy action tensor([ 0.9650, -0.6068, -0.0331, -0.5919]) tensor([0.5596, 0.1162, 0.2062, 0.1180]) -Greedy action tensor([ 0.9789, -0.4307, 0.0332, -0.1719]) tensor([0.5131, 0.1253, 0.1993, 0.1623]) -Greedy action tensor([ 0.5328, -0.7605, -0.1250, -0.3197]) tensor([0.4507, 0.1237, 0.2335, 0.1922]) -Greedy action tensor([ 0.4470, -0.3140, -0.1419, -0.0644]) tensor([0.3814, 0.1782, 0.2117, 0.2287]) -Greedy action tensor([ 0.7527, -0.4254, -0.0918, -0.1829]) tensor([0.4695, 0.1445, 0.2018, 0.1842]) -Greedy action tensor([ 0.7308, -0.3782, -0.1712, -0.4998]) tensor([0.4932, 0.1627, 0.2001, 0.1441]) -Greedy action tensor([ 0.7332, -0.4836, -0.0722, -0.4406]) tensor([0.4872, 0.1443, 0.2178, 0.1507]) -Greedy action tensor([ 0.7348, -0.5970, -0.1346, -0.0746]) tensor([0.4699, 0.1240, 0.1970, 0.2091]) -Greedy action tensor([ 0.5598, -0.0674, -0.1030, -0.0147]) tensor([0.3828, 0.2044, 0.1973, 0.2155]) -Greedy action tensor([ 0.8589, -0.1983, -0.0404, -0.2021]) tensor([0.4761, 0.1654, 0.1937, 0.1648]) -Greedy action tensor([ 1.2773, -0.6962, -0.1052, -0.3576]) tensor([0.6310, 0.0877, 0.1583, 0.1230]) -Greedy action tensor([ 1.1888, -0.7936, 0.0642, -0.7148]) tensor([0.6205, 0.0855, 0.2015, 0.0925]) -Greedy action tensor([ 1.0141, -0.9223, 0.1058, -0.4774]) tensor([0.5642, 0.0814, 0.2275, 0.1270]) -Greedy action tensor([ 0.8424, -0.3793, -0.0266, -0.4524]) tensor([0.5030, 0.1483, 0.2109, 0.1378]) -Greedy action tensor([ 0.8301, -0.9720, 0.1778, -0.4119]) tensor([0.5064, 0.0835, 0.2638, 0.1463]) -Greedy action tensor([ 0.8938, -0.4621, -0.1169, -0.2834]) tensor([0.5182, 0.1335, 0.1886, 0.1597]) -Greedy action tensor([ 1.0726, -0.7834, 0.0059, -0.6675]) tensor([0.5967, 0.0933, 0.2053, 0.1047]) -Greedy action tensor([ 0.2643, 0.2111, -0.0764, -0.1898]) tensor([0.3035, 0.2878, 0.2159, 0.1927]) -Greedy action tensor([ 0.8053, -0.4399, -0.0734, 0.0252]) tensor([0.4626, 0.1332, 0.1921, 0.2120]) -Greedy action tensor([ 0.7795, -0.3331, 0.0101, -0.2631]) tensor([0.4663, 0.1533, 0.2160, 0.1644]) -Greedy action tensor([ 0.9143, -0.8233, 0.0655, -0.3700]) tensor([0.5317, 0.0936, 0.2275, 0.1472]) -Greedy action tensor([-0.4496, -0.1580, -1.5456, -0.6337]) tensor([0.2853, 0.3820, 0.0954, 0.2374]) -Greedy action tensor([ 0.1304, -0.2286, -0.3659, -0.3843]) tensor([0.3442, 0.2404, 0.2096, 0.2058]) -Greedy action tensor([ 0.9096, -0.1092, 0.7082, 0.5306]) tensor([0.3493, 0.1261, 0.2855, 0.2391]) -Greedy action tensor([ 0.5144, -0.2650, -0.1371, -0.2866]) tensor([0.4117, 0.1889, 0.2146, 0.1848]) -Greedy action tensor([ 0.3298, 0.0080, -1.2604, -0.7152]) tensor([0.4385, 0.3179, 0.0894, 0.1542]) -Greedy action tensor([ 0.8269, -1.9961, 0.0052, 0.1629]) tensor([0.4966, 0.0295, 0.2183, 0.2556]) -Greedy action tensor([-0.0275, -0.2973, 0.8272, -0.7523]) tensor([0.2175, 0.1660, 0.5112, 0.1053]) -Greedy action tensor([ 0.4418, -0.7412, -0.7831, 0.3823]) tensor([0.3933, 0.1205, 0.1156, 0.3706]) -Greedy action tensor([ 0.2054, -0.0904, -0.5274, -0.6768]) tensor([0.3790, 0.2820, 0.1821, 0.1569]) -Greedy action tensor([ 0.1052, -1.2561, -0.0210, -0.5233]) tensor([0.3744, 0.0960, 0.3300, 0.1997]) -Greedy action tensor([0.8083, 0.6228, 0.7112, 0.4443]) tensor([0.2913, 0.2420, 0.2643, 0.2024]) -Greedy action tensor([-0.7987, -0.1732, 0.1829, -0.7252]) tensor([0.1512, 0.2826, 0.4035, 0.1627]) -Greedy action tensor([-1.0242, -0.7035, 0.7409, -0.8676]) tensor([0.1065, 0.1468, 0.6222, 0.1246]) -Greedy action tensor([ 0.6234, -1.0167, 0.5874, -0.9422]) tensor([0.4224, 0.0819, 0.4074, 0.0883]) -Greedy action tensor([-0.1503, 0.4119, -1.3480, -0.0378]) tensor([0.2395, 0.4202, 0.0723, 0.2680]) -Greedy action tensor([ 0.1203, 0.5896, 0.2699, -0.3400]) tensor([0.2277, 0.3641, 0.2645, 0.1437]) -Greedy action tensor([-0.0048, -0.7408, 0.4712, -0.1318]) tensor([0.2519, 0.1207, 0.4055, 0.2219]) -Greedy action tensor([-1.5685, -0.3348, -0.5077, -0.7694]) tensor([0.1048, 0.3597, 0.3026, 0.2329]) -Greedy action tensor([-0.1460, -0.2849, 0.0995, -0.0736]) tensor([0.2368, 0.2061, 0.3026, 0.2545]) -Greedy action tensor([ 0.1803, -0.8170, 0.5330, 0.5305]) tensor([0.2375, 0.0876, 0.3379, 0.3370]) -Greedy action tensor([-0.1144, -0.5011, -1.2858, -0.0315]) tensor([0.3251, 0.2209, 0.1008, 0.3532]) -Greedy action tensor([ 0.0494, -0.7299, 0.4126, 0.5641]) tensor([0.2188, 0.1004, 0.3147, 0.3661]) -Greedy action tensor([-1.0042, -0.3541, 0.6318, -0.4588]) tensor([0.1023, 0.1960, 0.5252, 0.1765]) -Greedy action tensor([0.7187, 0.3142, 0.5804, 0.9521]) tensor([0.2631, 0.1756, 0.2291, 0.3323]) -Greedy action tensor([-0.1768, 0.1636, -0.5916, -0.7687]) tensor([0.2763, 0.3883, 0.1825, 0.1529]) -Greedy action tensor([-0.4736, -0.8638, -0.2314, -0.0470]) tensor([0.2231, 0.1510, 0.2842, 0.3417]) -Greedy action tensor([ 0.2851, -0.5424, 0.8273, -0.0179]) tensor([0.2567, 0.1122, 0.4415, 0.1896]) -Greedy action tensor([-0.8576, -0.0205, -1.6964, 0.4126]) tensor([0.1369, 0.3162, 0.0592, 0.4877]) -Greedy action tensor([ 0.4165, -1.0729, -0.7861, -0.9138]) tensor([0.5586, 0.1260, 0.1678, 0.1477]) -Greedy action tensor([-0.0720, -0.7701, -0.4019, -0.0430]) tensor([0.3081, 0.1533, 0.2215, 0.3171]) -Greedy action tensor([-1.1650, -0.4405, 0.8698, -1.3550]) tensor([0.0866, 0.1788, 0.6629, 0.0716]) -Greedy action tensor([-0.8582, -0.8506, -0.4905, -0.7669]) tensor([0.2199, 0.2216, 0.3176, 0.2409]) -Greedy action tensor([ 0.0024, -0.4626, -0.4724, 0.8229]) tensor([0.2212, 0.1389, 0.1376, 0.5024]) -Greedy action tensor([ 0.4537, 0.1646, 1.0223, -0.5682]) tensor([0.2581, 0.1933, 0.4557, 0.0929]) -Greedy action tensor([-0.4252, -1.2736, -0.7878, 0.0343]) tensor([0.2697, 0.1155, 0.1877, 0.4271]) -Greedy action tensor([ 1.1070, -0.2852, 0.0665, -0.1712]) tensor([0.5318, 0.1322, 0.1879, 0.1481]) -Greedy action tensor([ 1.2085, -0.5032, 0.8349, -0.3600]) tensor([0.4814, 0.0869, 0.3313, 0.1003]) -Greedy action tensor([-0.1104, -0.6976, -1.0612, -0.3133]) tensor([0.3625, 0.2015, 0.1401, 0.2959]) -Greedy action tensor([-0.7395, -0.0404, -1.0045, -0.5786]) tensor([0.2019, 0.4062, 0.1549, 0.2371]) -Greedy action tensor([-0.8871, -0.6077, 0.8222, -0.1030]) tensor([0.0996, 0.1317, 0.5504, 0.2182]) -Greedy action tensor([-0.1368, -0.8279, 0.7763, -0.9586]) tensor([0.2256, 0.1130, 0.5622, 0.0992]) -Greedy action tensor([ 0.3570, -0.4785, -0.2455, 0.2161]) tensor([0.3509, 0.1522, 0.1921, 0.3048]) -Greedy action tensor([-0.1879, -0.0285, -0.1658, -1.1310]) tensor([0.2790, 0.3272, 0.2852, 0.1086]) -Greedy action tensor([-0.2012, -1.0974, 0.5540, -0.8371]) tensor([0.2460, 0.1004, 0.5234, 0.1302]) -Greedy action tensor([-0.7264, -0.2218, 0.5450, -1.6381]) tensor([0.1510, 0.2500, 0.5383, 0.0607]) -Greedy action tensor([ 0.4012, 0.0996, -1.2495, 1.5022]) tensor([0.2025, 0.1498, 0.0389, 0.6089]) -Greedy action tensor([-0.9711, -0.3954, 0.5628, -0.0679]) tensor([0.1012, 0.1800, 0.4692, 0.2497]) -Greedy action tensor([ 0.5905, -0.6104, -0.2809, -0.3411]) tensor([0.4732, 0.1424, 0.1980, 0.1864]) -Greedy action tensor([-0.0497, -1.8263, 0.6702, -0.5500]) tensor([0.2611, 0.0442, 0.5364, 0.1583]) -Greedy action tensor([-0.3234, -0.4853, 0.2525, -0.7414]) tensor([0.2332, 0.1984, 0.4149, 0.1535]) -Greedy action tensor([-0.7299, 0.6644, 0.1867, -0.3786]) tensor([0.1117, 0.4503, 0.2793, 0.1587]) -Greedy action tensor([ 1.4026, -0.3252, -0.0165, -0.6839]) tensor([0.6478, 0.1151, 0.1567, 0.0804]) -Greedy action tensor([0.7903, 0.1717, 0.6665, 0.3027]) tensor([0.3294, 0.1774, 0.2910, 0.2023]) -Greedy action tensor([-1.2964, 0.3490, -0.2143, -1.2745]) tensor([0.0985, 0.5103, 0.2906, 0.1006]) -Greedy action tensor([-1.7976, 0.2775, -0.5797, -0.1862]) tensor([0.0576, 0.4590, 0.1948, 0.2887]) -Greedy action tensor([ 0.5707, 0.7707, -0.4140, 0.1558]) tensor([0.3072, 0.3752, 0.1147, 0.2029]) -Greedy action tensor([-0.0494, -1.2242, 0.3343, 0.7832]) tensor([0.1970, 0.0609, 0.2891, 0.4530]) -Greedy action tensor([-0.0672, -0.9748, 0.0727, 0.0629]) tensor([0.2708, 0.1093, 0.3115, 0.3084]) -Greedy action tensor([-0.2207, 0.2251, -0.3544, -0.7899]) tensor([0.2498, 0.3902, 0.2186, 0.1414]) -Greedy action tensor([ 0.4232, -0.4323, 0.3462, 0.4471]) tensor([0.2963, 0.1259, 0.2743, 0.3034]) -Greedy action tensor([ 0.5730, -1.0753, -0.0830, -0.4695]) tensor([0.4845, 0.0932, 0.2514, 0.1708]) -Greedy action tensor([ 1.0963, -0.0309, -0.1156, -0.0145]) tensor([0.5126, 0.1661, 0.1526, 0.1688]) -Greedy action tensor([-0.6601, -1.9627, 0.3380, -0.0857]) tensor([0.1736, 0.0472, 0.4709, 0.3083]) -Greedy action tensor([ 0.1224, 0.5319, -0.6228, -1.3364]) tensor([0.3112, 0.4687, 0.1477, 0.0724]) -Greedy action tensor([-0.6952, 1.4526, 0.0634, -0.6447]) tensor([0.0784, 0.6717, 0.1674, 0.0825]) -Greedy action tensor([ 0.0666, -0.9324, -0.2154, -0.1663]) tensor([0.3431, 0.1263, 0.2588, 0.2718]) -Greedy action tensor([ 0.5108, -1.1491, 0.8758, 0.2827]) tensor([0.2918, 0.0555, 0.4204, 0.2323]) -Greedy action tensor([-0.3211, -1.1088, 0.6246, -0.6548]) tensor([0.2107, 0.0958, 0.5425, 0.1509]) -Greedy action tensor([ 0.6498, -0.7246, -0.7606, 0.5772]) tensor([0.4120, 0.1042, 0.1006, 0.3832]) -Greedy action tensor([ 0.4572, -0.8866, 0.7936, 0.2003]) tensor([0.2912, 0.0760, 0.4076, 0.2252]) -Greedy action tensor([-1.9480, -0.7374, 0.8855, -1.2863]) tensor([0.0429, 0.1440, 0.7299, 0.0832]) -Greedy action tensor([ 0.5095, -0.1814, -0.5968, 0.2282]) tensor([0.3866, 0.1937, 0.1279, 0.2918]) -Greedy action tensor([ 1.0982, -0.8536, -0.2361, 0.4195]) tensor([0.5228, 0.0742, 0.1377, 0.2652]) -Greedy action tensor([-0.4091, -0.9204, 0.3042, -0.3438]) tensor([0.2124, 0.1274, 0.4335, 0.2267]) -Greedy action tensor([-1.0599, -0.3197, -0.5625, 0.6699]) tensor([0.0963, 0.2020, 0.1584, 0.5433]) -Greedy action tensor([-0.0040, 0.1701, 0.1689, -0.8841]) tensor([0.2636, 0.3137, 0.3133, 0.1093]) -Greedy action tensor([ 0.3177, -0.1106, 0.2517, -0.5344]) tensor([0.3318, 0.2162, 0.3106, 0.1415]) -Greedy action tensor([-0.2934, -0.4992, -0.0968, -0.4960]) tensor([0.2599, 0.2115, 0.3164, 0.2122]) -Greedy action tensor([-0.1297, 0.2799, 0.8230, 0.7907]) tensor([0.1314, 0.1979, 0.3407, 0.3299]) -Greedy action tensor([-0.2449, -0.4118, -0.2030, -0.0583]) tensor([0.2443, 0.2067, 0.2547, 0.2943]) -Greedy action tensor([ 0.4030, -0.7447, 0.0078, 0.0401]) tensor([0.3722, 0.1181, 0.2507, 0.2589]) -Greedy action tensor([ 0.3507, -1.0338, -0.9057, -0.4798]) tensor([0.5074, 0.1271, 0.1444, 0.2211]) -Greedy action tensor([ 1.4919, -0.7869, -0.0898, 0.6742]) tensor([0.5716, 0.0585, 0.1175, 0.2523]) -Greedy action tensor([ 1.2108, -0.5702, -0.3774, 0.4933]) tensor([0.5374, 0.0905, 0.1098, 0.2623]) -Greedy action tensor([ 1.2014, 0.0113, -0.2612, 0.0557]) tensor([0.5394, 0.1641, 0.1249, 0.1715]) -Greedy action tensor([ 1.2375, -0.0509, -0.4898, -0.0235]) tensor([0.5758, 0.1587, 0.1023, 0.1631]) -Greedy action tensor([ 1.4421, -0.0547, -0.4247, 0.3241]) tensor([0.5864, 0.1313, 0.0907, 0.1917]) -Greedy action tensor([ 1.6441, -0.8385, -0.0725, 0.2032]) tensor([0.6667, 0.0557, 0.1198, 0.1578]) -Greedy action tensor([ 1.2940e+00, 9.1246e-04, -6.6593e-01, 5.9879e-01]) tensor([0.5224, 0.1434, 0.0736, 0.2607]) -Greedy action tensor([ 1.2904, -0.2888, -0.3591, 0.4035]) tensor([0.5524, 0.1139, 0.1061, 0.2276]) -Greedy action tensor([ 1.0991, -0.0279, -0.7416, 0.3191]) tensor([0.5152, 0.1669, 0.0818, 0.2362]) -Greedy action tensor([ 1.7417, -1.0430, -0.2382, 0.0904]) tensor([0.7186, 0.0444, 0.0992, 0.1378]) -Greedy action tensor([ 1.4809, -0.5008, -0.0858, 0.0960]) tensor([0.6262, 0.0863, 0.1307, 0.1568]) -Greedy action tensor([ 1.8403, -0.7808, -0.4580, 0.0885]) tensor([0.7426, 0.0540, 0.0746, 0.1288]) -Greedy action tensor([ 1.9764, -0.6104, -0.6348, 0.7370]) tensor([0.6953, 0.0523, 0.0511, 0.2013]) -Greedy action tensor([ 1.4730, -0.0994, -0.9166, 0.6409]) tensor([0.5766, 0.1197, 0.0529, 0.2509]) -Greedy action tensor([ 1.1618, -0.6116, -0.4937, 0.2318]) tensor([0.5697, 0.0967, 0.1088, 0.2248]) -Greedy action tensor([ 1.3430, -0.8151, -0.4650, 0.4013]) tensor([0.5990, 0.0692, 0.0982, 0.2336]) -Greedy action tensor([ 1.5930, -0.4790, -0.5690, 0.5511]) tensor([0.6274, 0.0790, 0.0722, 0.2214]) -Greedy action tensor([ 1.3068, -1.1298, -0.3329, -0.0196]) tensor([0.6464, 0.0565, 0.1254, 0.1716]) -Greedy action tensor([ 1.6952, -0.3211, -0.4485, 0.3518]) tensor([0.6617, 0.0881, 0.0776, 0.1727]) -Greedy action tensor([ 1.4917, 0.2956, -0.9686, 0.4954]) tensor([0.5691, 0.1721, 0.0486, 0.2101]) -Greedy action tensor([ 1.3202, -0.5524, -0.0219, 0.2963]) tensor([0.5636, 0.0866, 0.1473, 0.2025]) -Greedy action tensor([ 1.5149, 0.2354, -0.3131, 0.4935]) tensor([0.5558, 0.1546, 0.0894, 0.2002]) -Greedy action tensor([ 1.2479, -0.1217, -0.6784, 0.4969]) tensor([0.5343, 0.1358, 0.0778, 0.2521]) -Greedy action tensor([ 1.7497, -0.8779, -0.1100, 0.4730]) tensor([0.6636, 0.0479, 0.1033, 0.1851]) -Greedy action tensor([ 2.0276, -0.7183, -0.5121, 0.5158]) tensor([0.7334, 0.0471, 0.0579, 0.1617]) -Greedy action tensor([ 1.3981, -0.3126, -0.0682, 0.7285]) tensor([0.5199, 0.0940, 0.1200, 0.2661]) -Greedy action tensor([ 1.4098, -0.5831, -0.6390, 0.3352]) tensor([0.6224, 0.0848, 0.0802, 0.2125]) -Greedy action tensor([ 1.8452, -0.5132, -0.7052, 0.2356]) tensor([0.7285, 0.0689, 0.0569, 0.1457]) -Greedy action tensor([ 1.0270, -0.3216, -0.2107, 0.0883]) tensor([0.5152, 0.1338, 0.1494, 0.2015]) -Greedy action tensor([ 1.2159, -0.6451, -0.2371, -0.1317]) tensor([0.6063, 0.0943, 0.1418, 0.1576]) -Greedy action tensor([ 1.2840, -0.3872, -0.5130, 0.3625]) tensor([0.5709, 0.1073, 0.0947, 0.2272]) -Greedy action tensor([ 1.2430, -0.4015, 0.2332, 0.1114]) tensor([0.5319, 0.1027, 0.1938, 0.1716]) -Greedy action tensor([ 1.4309, -0.3035, -0.1105, 0.1043]) tensor([0.6039, 0.1066, 0.1293, 0.1603]) -Greedy action tensor([ 1.4182, -0.0970, -0.3194, 0.1338]) tensor([0.5979, 0.1314, 0.1052, 0.1655]) -Greedy action tensor([ 1.6328, -0.4871, -0.3403, 0.6197]) tensor([0.6165, 0.0740, 0.0857, 0.2238]) -Greedy action tensor([ 1.4082, -0.5053, -0.4865, 0.7064]) tensor([0.5575, 0.0823, 0.0838, 0.2763]) -Greedy action tensor([ 1.9641, -0.3711, -0.3969, 0.5190]) tensor([0.7009, 0.0678, 0.0661, 0.1652]) -Greedy action tensor([ 2.1240, -1.0551, -0.2751, 0.1921]) tensor([0.7829, 0.0326, 0.0711, 0.1134]) -Greedy action tensor([ 1.6831, -0.3444, -1.3210, 0.1191]) tensor([0.7191, 0.0947, 0.0357, 0.1505]) -Greedy action tensor([ 1.1152, -0.3487, -0.4946, 0.4481]) tensor([0.5143, 0.1190, 0.1028, 0.2639]) -Greedy action tensor([ 1.3083, -0.3639, -0.7161, 0.1787]) tensor([0.6086, 0.1143, 0.0804, 0.1967]) -Greedy action tensor([ 1.6265, -0.9982, -0.3948, 0.2778]) tensor([0.6828, 0.0495, 0.0905, 0.1772]) -Greedy action tensor([ 1.4081, -0.3014, -0.5156, 0.4769]) tensor([0.5810, 0.1051, 0.0849, 0.2290]) -Greedy action tensor([ 1.7915, -0.8905, -0.4909, -0.0258]) tensor([0.7502, 0.0513, 0.0766, 0.1219]) -Greedy action tensor([ 1.6356, -0.6305, -0.3073, -0.4424]) tensor([0.7288, 0.0756, 0.1044, 0.0912]) -Greedy action tensor([ 1.0637, -0.2927, -1.0966, 0.2241]) tensor([0.5541, 0.1427, 0.0639, 0.2393]) -Greedy action tensor([ 1.2559, -0.0227, -0.1230, 0.4787]) tensor([0.5025, 0.1399, 0.1266, 0.2310]) -Greedy action tensor([ 1.3807, -0.4115, -0.6466, -0.0557]) tensor([0.6510, 0.1085, 0.0857, 0.1548]) -Greedy action tensor([ 1.0588, -0.2941, -0.6691, 0.4053]) tensor([0.5111, 0.1321, 0.0908, 0.2659]) -Greedy action tensor([ 1.6197, -0.2016, -0.5575, 0.1068]) tensor([0.6687, 0.1082, 0.0758, 0.1473]) -Greedy action tensor([ 1.8575, -0.7479, -0.3258, 0.1059]) tensor([0.7353, 0.0543, 0.0828, 0.1276]) -Greedy action tensor([ 1.3577, -0.3133, -0.3427, 0.5423]) tensor([0.5515, 0.1037, 0.1007, 0.2440]) -Greedy action tensor([ 1.3910, -0.2607, -0.9256, 0.3226]) tensor([0.6120, 0.1173, 0.0604, 0.2103]) -Greedy action tensor([ 1.5742, -0.0486, -1.4007, 0.6088]) tensor([0.6138, 0.1211, 0.0313, 0.2337]) -Greedy action tensor([ 1.7461, -0.9455, -0.2323, 0.8399]) tensor([0.6211, 0.0421, 0.0859, 0.2509]) -Greedy action tensor([ 1.2286, -0.8506, -0.2778, 0.3633]) tensor([0.5657, 0.0707, 0.1254, 0.2381]) -Greedy action tensor([ 1.5186, -0.4918, -0.1934, -0.0282]) tensor([0.6547, 0.0877, 0.1182, 0.1394]) -Greedy action tensor([ 1.5664, -0.4443, -0.3120, 0.2804]) tensor([0.6398, 0.0857, 0.0978, 0.1768]) -Greedy action tensor([ 1.2834, -0.4063, -0.3810, 0.1570]) tensor([0.5889, 0.1087, 0.1115, 0.1909]) -Greedy action tensor([ 1.7930, -0.5751, -0.4441, 0.3625]) tensor([0.6946, 0.0651, 0.0742, 0.1662]) -Greedy action tensor([ 1.0338, -0.5377, -0.2708, 0.1492]) tensor([0.5286, 0.1098, 0.1434, 0.2182]) -Greedy action tensor([ 2.0594, -0.6470, -0.4823, 0.5905]) tensor([0.7269, 0.0485, 0.0572, 0.1673]) -Greedy action tensor([ 1.6821, -0.5773, -0.1007, 0.2823]) tensor([0.6582, 0.0687, 0.1107, 0.1624]) -Greedy action tensor([ 1.8302, -0.3534, -0.5267, 0.4351]) tensor([0.6872, 0.0774, 0.0651, 0.1703]) -Greedy action tensor([ 1.3168, -0.2630, -0.8181, 0.8631]) tensor([0.5103, 0.1051, 0.0603, 0.3242]) -Greedy action tensor([ 1.3169, -0.5144, -0.3191, 0.2607]) tensor([0.5873, 0.0941, 0.1144, 0.2042]) -Greedy action tensor([ 1.3088, 0.1673, -0.6822, 0.0971]) tensor([0.5703, 0.1821, 0.0779, 0.1698]) -Greedy action tensor([ 2.0251, -0.4357, -0.1962, 0.6575]) tensor([0.6904, 0.0589, 0.0749, 0.1758]) -Greedy action tensor([ 1.5825, -0.3046, -0.5039, 0.1027]) tensor([0.6652, 0.1008, 0.0826, 0.1515]) -Greedy action tensor([ 1.4073, -0.6087, -0.2584, 0.6702]) tensor([0.5553, 0.0740, 0.1050, 0.2657]) -Greedy action tensor([ 1.3262, -0.4811, -0.4811, 0.5686]) tensor([0.5565, 0.0913, 0.0913, 0.2609]) -Greedy action tensor([ 1.7890, -0.1175, -0.0485, 0.2887]) tensor([0.6532, 0.0971, 0.1040, 0.1457]) -Greedy action tensor([ 1.3209, -0.5419, -0.4585, 0.0088]) tensor([0.6277, 0.0974, 0.1059, 0.1690]) -Greedy action tensor([ 1.3554, -0.4622, -0.2183, -0.0941]) tensor([0.6233, 0.1012, 0.1292, 0.1463]) -Greedy action tensor([ 1.4823, 0.1111, -0.8434, -0.2208]) tensor([0.6521, 0.1655, 0.0637, 0.1187]) -Greedy action tensor([ 1.4550, -0.5382, -0.2136, 0.0585]) tensor([0.6360, 0.0867, 0.1199, 0.1574]) -Greedy action tensor([ 1.6831, -0.5679, -0.3452, 0.2841]) tensor([0.6740, 0.0710, 0.0887, 0.1664]) -Greedy action tensor([ 1.6615, -0.6171, -0.2617, 0.3583]) tensor([0.6578, 0.0674, 0.0961, 0.1787]) -Greedy action tensor([ 1.3799, -0.5576, -0.6549, 0.4128]) tensor([0.6042, 0.0870, 0.0790, 0.2297]) -Greedy action tensor([ 1.6256, -0.3937, -0.3074, 0.2526]) tensor([0.6533, 0.0867, 0.0945, 0.1655]) -Greedy action tensor([ 1.3574, -0.3132, -0.7625, 0.5120]) tensor([0.5755, 0.1083, 0.0691, 0.2471]) -Greedy action tensor([-1.8935, -0.4494, 0.6447, -0.1575]) tensor([0.0424, 0.1798, 0.5370, 0.2408]) -Greedy action tensor([-1.9060, -0.4536, 0.6505, -0.1638]) tensor([0.0419, 0.1790, 0.5399, 0.2392]) -Greedy action tensor([-1.6783, -0.1179, 0.4859, -0.0665]) tensor([0.0513, 0.2444, 0.4470, 0.2573]) -Greedy action tensor([-0.7943, 0.4274, 0.3602, 0.5043]) tensor([0.0890, 0.3021, 0.2825, 0.3263]) -Greedy action tensor([-1.7648, -0.0384, 0.5393, -0.0879]) tensor([0.0455, 0.2556, 0.4556, 0.2433]) -Greedy action tensor([-1.9184, -0.4346, 0.6534, -0.1676]) tensor([0.0412, 0.1818, 0.5396, 0.2374]) -Greedy action tensor([-1.8249, -0.3514, 0.6147, -0.1041]) tensor([0.0446, 0.1947, 0.5115, 0.2493]) -Greedy action tensor([-1.9170, -0.4887, 0.7651, -0.1236]) tensor([0.0388, 0.1617, 0.5666, 0.2330]) -Greedy action tensor([-1.8432, -0.3334, 0.6041, -0.1453]) tensor([0.0444, 0.2007, 0.5126, 0.2423]) -Greedy action tensor([-1.7768, -0.3430, 0.6395, -0.0801]) tensor([0.0458, 0.1919, 0.5127, 0.2496]) -Greedy action tensor([-1.9101, -0.3511, 0.6391, -0.1597]) tensor([0.0411, 0.1956, 0.5265, 0.2368]) -Greedy action tensor([-1.5756, -0.4802, 0.5562, 0.0892]) tensor([0.0565, 0.1689, 0.4762, 0.2985]) -Greedy action tensor([-1.8953, -0.4419, 0.6440, -0.1553]) tensor([0.0423, 0.1809, 0.5359, 0.2409]) -Greedy action tensor([-1.7989, -0.4345, 0.5912, -0.1290]) tensor([0.0473, 0.1851, 0.5163, 0.2513]) -Greedy action tensor([-1.4381, -0.3968, 0.4225, 0.0154]) tensor([0.0688, 0.1949, 0.4421, 0.2943]) -Greedy action tensor([-1.4676, -0.4542, 0.4380, 0.1017]) tensor([0.0654, 0.1803, 0.4400, 0.3143]) -Greedy action tensor([-1.8513, -0.2237, 0.5863, -0.1219]) tensor([0.0432, 0.2197, 0.4939, 0.2432]) -Greedy action tensor([-0.5423, -0.5275, 0.1718, 0.1951]) tensor([0.1627, 0.1651, 0.3322, 0.3400]) -Greedy action tensor([-1.9136, -0.4375, 0.6526, -0.1644]) tensor([0.0414, 0.1813, 0.5392, 0.2382]) -Greedy action tensor([-1.8363, -0.4329, 0.6127, -0.1315]) tensor([0.0452, 0.1837, 0.5227, 0.2484]) -Greedy action tensor([-1.5288, -0.1893, 0.5570, 0.1013]) tensor([0.0556, 0.2124, 0.4479, 0.2840]) -Greedy action tensor([-1.9427, -0.4452, 0.6658, -0.1792]) tensor([0.0402, 0.1797, 0.5457, 0.2344]) -Greedy action tensor([-1.9046, -0.4426, 0.6473, -0.1621]) tensor([0.0419, 0.1809, 0.5378, 0.2394]) -Greedy action tensor([-1.7666, -0.4238, 0.6178, -0.0211]) tensor([0.0467, 0.1789, 0.5069, 0.2676]) -Greedy action tensor([-1.9086, -0.4141, 0.6611, -0.1526]) tensor([0.0411, 0.1834, 0.5373, 0.2382]) -Greedy action tensor([-1.9161, -0.4252, 0.6473, -0.1736]) tensor([0.0414, 0.1840, 0.5379, 0.2367]) -Greedy action tensor([-1.4655, -0.5880, 0.4088, 0.0854]) tensor([0.0683, 0.1643, 0.4452, 0.3222]) -Greedy action tensor([-1.8756, -0.2700, 0.6131, -0.1396]) tensor([0.0422, 0.2102, 0.5082, 0.2394]) -Greedy action tensor([-1.9087, -0.4158, 0.6662, -0.1421]) tensor([0.0409, 0.1821, 0.5374, 0.2395]) -Greedy action tensor([-0.6445, 0.5732, -0.0417, 0.0414]) tensor([0.1221, 0.4125, 0.2230, 0.2424]) -Greedy action tensor([-1.9212, -0.4121, 0.6539, -0.1671]) tensor([0.0409, 0.1851, 0.5375, 0.2365]) -Greedy action tensor([-1.5461, 0.5154, 0.3708, 0.0652]) tensor([0.0484, 0.3802, 0.3290, 0.2424]) -Greedy action tensor([-1.4373, -0.4003, 0.6354, 0.3779]) tensor([0.0558, 0.1575, 0.4437, 0.3430]) -Greedy action tensor([-1.9369, -0.3917, 0.6011, -0.1793]) tensor([0.0414, 0.1942, 0.5242, 0.2402]) -Greedy action tensor([-1.9253, -0.4295, 0.6580, -0.1699]) tensor([0.0408, 0.1822, 0.5407, 0.2363]) -Greedy action tensor([-1.9316, -0.3526, 0.5982, -0.1765]) tensor([0.0413, 0.2005, 0.5190, 0.2392]) -Greedy action tensor([-1.4996, -0.3122, 0.4909, 0.1491]) tensor([0.0595, 0.1952, 0.4357, 0.3096]) -Greedy action tensor([-0.5835, 1.1105, 0.1310, 0.4500]) tensor([0.0885, 0.4817, 0.1809, 0.2489]) -Greedy action tensor([-0.5042, 1.0856, 0.1267, 0.1463]) tensor([0.1031, 0.5055, 0.1938, 0.1976]) -Greedy action tensor([-1.5311, -0.0304, 0.4735, 0.0526]) tensor([0.0562, 0.2522, 0.4175, 0.2741]) -Greedy action tensor([-1.7017, -0.2143, 0.5856, -0.0286]) tensor([0.0485, 0.2148, 0.4780, 0.2587]) -Greedy action tensor([-1.1899, -0.5534, 0.2870, 0.0250]) tensor([0.0940, 0.1776, 0.4116, 0.3168]) -Greedy action tensor([-1.9086, -0.4474, 0.6529, -0.1616]) tensor([0.0417, 0.1796, 0.5397, 0.2390]) -Greedy action tensor([-1.9209, -0.4440, 0.6561, -0.1688]) tensor([0.0411, 0.1802, 0.5414, 0.2373]) -Greedy action tensor([-1.9041, -0.3413, 0.6337, -0.1568]) tensor([0.0414, 0.1975, 0.5236, 0.2375]) -Greedy action tensor([-0.8240, -0.5484, 0.3653, 0.5417]) tensor([0.1050, 0.1384, 0.3450, 0.4116]) -Greedy action tensor([-1.6694, -0.2114, 0.5800, -0.0645]) tensor([0.0506, 0.2175, 0.4799, 0.2519]) -Greedy action tensor([-0.0681, 0.1166, 0.5020, 0.9978]) tensor([0.1455, 0.1750, 0.2572, 0.4223]) -Greedy action tensor([-1.7886, -0.4029, 0.6029, -0.1099]) tensor([0.0470, 0.1878, 0.5135, 0.2517]) -Greedy action tensor([-1.1654, -0.5516, 0.3476, 0.3280]) tensor([0.0845, 0.1560, 0.3835, 0.3760]) -Greedy action tensor([-1.0506, 0.1736, 0.3375, 0.6144]) tensor([0.0730, 0.2484, 0.2926, 0.3860]) -Greedy action tensor([-1.7993, -0.3684, 0.6080, -0.0861]) tensor([0.0458, 0.1916, 0.5086, 0.2540]) -Greedy action tensor([-1.8756, -0.3655, 0.6200, -0.1626]) tensor([0.0431, 0.1951, 0.5227, 0.2390]) -Greedy action tensor([0.5175, 0.1905, 0.6212, 1.3024]) tensor([0.1991, 0.1436, 0.2209, 0.4365]) -Greedy action tensor([-1.9291, -0.3537, 0.6478, -0.1853]) tensor([0.0405, 0.1956, 0.5325, 0.2315]) -Greedy action tensor([-1.6386, -0.4639, 0.6416, 0.1111]) tensor([0.0506, 0.1638, 0.4947, 0.2910]) -Greedy action tensor([-1.3789, 0.2507, 0.2573, 0.1194]) tensor([0.0636, 0.3247, 0.3269, 0.2848]) -Greedy action tensor([-1.8040, 0.0157, 0.5231, -0.0926]) tensor([0.0436, 0.2688, 0.4465, 0.2412]) -Greedy action tensor([-1.8751, -0.4036, 0.6252, -0.1574]) tensor([0.0433, 0.1884, 0.5272, 0.2411]) -Greedy action tensor([-1.8548, -0.4471, 0.6242, -0.1335]) tensor([0.0442, 0.1808, 0.5277, 0.2473]) -Greedy action tensor([-1.8921, -0.4547, 0.6451, -0.1539]) tensor([0.0425, 0.1788, 0.5371, 0.2416]) -Greedy action tensor([-1.1161, 0.0779, -0.2028, -0.5546]) tensor([0.1170, 0.3862, 0.2917, 0.2052]) -Greedy action tensor([-1.7338, -0.4075, 0.5508, -0.0997]) tensor([0.0507, 0.1911, 0.4982, 0.2600]) -Greedy action tensor([-1.9037, -0.4262, 0.6481, -0.1571]) tensor([0.0418, 0.1830, 0.5358, 0.2395]) -Greedy action tensor([-1.8637, -0.4539, 0.6318, -0.1435]) tensor([0.0438, 0.1795, 0.5317, 0.2449]) -Greedy action tensor([-1.8685, -0.4390, 0.7036, -0.0416]) tensor([0.0408, 0.1706, 0.5348, 0.2538]) -Greedy action tensor([-1.8846, -0.4465, 0.6483, -0.1310]) tensor([0.0424, 0.1787, 0.5340, 0.2449]) -Greedy action tensor([-1.6842, -0.4698, 0.5526, -0.1620]) tensor([0.0546, 0.1839, 0.5113, 0.2502]) -Greedy action tensor([-1.9093, -0.4484, 0.6491, -0.1704]) tensor([0.0418, 0.1802, 0.5400, 0.2380]) -Greedy action tensor([-1.9360, -0.4431, 0.6641, -0.1755]) tensor([0.0404, 0.1799, 0.5445, 0.2351]) -Greedy action tensor([-1.8903, -0.3859, 0.6388, -0.1451]) tensor([0.0421, 0.1894, 0.5276, 0.2409]) -Greedy action tensor([-1.4718, -0.6004, 0.4182, 0.1052]) tensor([0.0673, 0.1610, 0.4457, 0.3260]) -Greedy action tensor([-1.9398, -0.4272, 0.6600, -0.1769]) tensor([0.0403, 0.1828, 0.5422, 0.2348]) -Greedy action tensor([-1.8078, -0.4764, 0.6187, -0.0763]) tensor([0.0460, 0.1740, 0.5203, 0.2597]) -Greedy action tensor([-1.8760, -0.3723, 0.6308, -0.1358]) tensor([0.0426, 0.1917, 0.5228, 0.2429]) -Greedy action tensor([-1.9351, -0.3984, 0.6519, -0.1769]) tensor([0.0404, 0.1879, 0.5372, 0.2345]) -Greedy action tensor([-1.7118, -0.2971, 0.5802, -0.0587]) tensor([0.0494, 0.2034, 0.4890, 0.2582]) -Greedy action tensor([-1.8117, -0.2810, 0.6078, -0.0401]) tensor([0.0440, 0.2032, 0.4942, 0.2586]) -Greedy action tensor([-1.8912, -0.4520, 0.6464, -0.1526]) tensor([0.0425, 0.1790, 0.5370, 0.2415]) -Greedy action tensor([-1.7942, -0.3144, 0.6318, 0.0241]) tensor([0.0437, 0.1921, 0.4947, 0.2695]) -Greedy action tensor([-1.9439, -0.4495, 0.6695, -0.1780]) tensor([0.0401, 0.1786, 0.5469, 0.2343]) -Greedy action tensor([ 0.5589, -0.2563, -0.1349, -0.1533]) tensor([0.4110, 0.1819, 0.2054, 0.2016]) -Greedy action tensor([ 0.6322, -0.0537, 0.0872, -0.1233]) tensor([0.3917, 0.1973, 0.2271, 0.1840]) -Greedy action tensor([ 0.3726, -0.1750, -0.0539, -0.0539]) tensor([0.3467, 0.2005, 0.2264, 0.2264]) -Greedy action tensor([ 0.7126, -0.2669, 0.0131, -0.0749]) tensor([0.4297, 0.1613, 0.2135, 0.1955]) -Greedy action tensor([ 0.9588, -0.3759, -0.1767, -0.1643]) tensor([0.5236, 0.1378, 0.1682, 0.1703]) -Greedy action tensor([ 0.8913, -0.5809, -0.0391, -0.2524]) tensor([0.5148, 0.1181, 0.2030, 0.1640]) -Greedy action tensor([ 0.7231, -0.6398, -0.0335, -0.3926]) tensor([0.4871, 0.1247, 0.2286, 0.1596]) -Greedy action tensor([ 0.6893, -0.3275, -0.0366, -0.1518]) tensor([0.4392, 0.1589, 0.2125, 0.1894]) -Greedy action tensor([ 1.0748, -0.9278, 0.1624, -0.7008]) tensor([0.5862, 0.0791, 0.2354, 0.0993]) -Greedy action tensor([ 1.0594, -0.9469, -0.0854, -0.4557]) tensor([0.5979, 0.0804, 0.1903, 0.1314]) -Greedy action tensor([ 0.6579, -0.5711, -0.1208, -0.2720]) tensor([0.4659, 0.1363, 0.2139, 0.1839]) -Greedy action tensor([ 0.7977, -0.4958, -0.1008, -0.4898]) tensor([0.5109, 0.1401, 0.2080, 0.1410]) -Greedy action tensor([ 0.5271, -0.1063, 0.2177, -0.3666]) tensor([0.3740, 0.1985, 0.2745, 0.1530]) -Greedy action tensor([ 1.1851, -0.5596, -0.1251, -0.5231]) tensor([0.6151, 0.1075, 0.1659, 0.1115]) -Greedy action tensor([ 0.8244, -0.5071, 0.0072, -0.3636]) tensor([0.4974, 0.1313, 0.2197, 0.1516]) -Greedy action tensor([ 1.0450, -0.9542, 0.0839, -0.5626]) tensor([0.5820, 0.0788, 0.2226, 0.1166]) -Greedy action tensor([ 0.5770, -0.1037, -0.0494, -0.3132]) tensor([0.4079, 0.2065, 0.2180, 0.1675]) -Greedy action tensor([0.5883, 0.1903, 0.0741, 0.1276]) tensor([0.3448, 0.2316, 0.2062, 0.2175]) -Greedy action tensor([ 0.0831, -0.1414, -0.0717, -0.0999]) tensor([0.2867, 0.2290, 0.2456, 0.2387]) -Greedy action tensor([ 0.5386, -0.2259, -0.1180, -0.3472]) tensor([0.4173, 0.1943, 0.2164, 0.1721]) -Greedy action tensor([ 0.4750, -0.3077, -0.1383, -0.2187]) tensor([0.4002, 0.1830, 0.2168, 0.2000]) -Greedy action tensor([ 0.9979, -0.7108, -0.0367, -0.4725]) tensor([0.5662, 0.1025, 0.2012, 0.1301]) -Greedy action tensor([ 0.7740, -0.6905, -0.0378, -0.3186]) tensor([0.4974, 0.1150, 0.2209, 0.1668]) -Greedy action tensor([ 0.5237, -0.3241, -0.0461, -0.0796]) tensor([0.3935, 0.1686, 0.2226, 0.2153]) -Greedy action tensor([ 1.2525, -0.8321, 0.0569, -0.8594]) tensor([0.6461, 0.0803, 0.1954, 0.0782]) -Greedy action tensor([ 1.0625, -0.4866, -0.0977, -0.3894]) tensor([0.5682, 0.1207, 0.1781, 0.1330]) -Greedy action tensor([ 0.8969, -0.8954, -0.0105, -0.3434]) tensor([0.5378, 0.0896, 0.2170, 0.1556]) -Greedy action tensor([ 0.5000, 0.3190, -0.2091, 0.1201]) tensor([0.3322, 0.2772, 0.1635, 0.2272]) -Greedy action tensor([ 0.7076, -0.5126, 0.1271, -0.4293]) tensor([0.4596, 0.1357, 0.2572, 0.1475]) -Greedy action tensor([ 1.1830, -0.5635, -0.0328, -0.6638]) tensor([0.6140, 0.1071, 0.1820, 0.0969]) -Greedy action tensor([ 0.4120, -0.3916, -0.2481, -0.1026]) tensor([0.3903, 0.1747, 0.2017, 0.2333]) -Greedy action tensor([ 1.0966, -0.6352, -0.0316, -0.5856]) tensor([0.5929, 0.1049, 0.1919, 0.1103]) -Greedy action tensor([ 1.0783, -0.5398, 0.1405, -0.5794]) tensor([0.5617, 0.1114, 0.2199, 0.1070]) -Greedy action tensor([ 0.2684, 0.3042, -0.2397, 0.0456]) tensor([0.2908, 0.3014, 0.1750, 0.2327]) -Greedy action tensor([ 0.4605, -0.1534, -0.0212, -0.0293]) tensor([0.3608, 0.1953, 0.2229, 0.2211]) -Greedy action tensor([ 0.5260, -0.0467, 0.0163, -0.0639]) tensor([0.3678, 0.2074, 0.2209, 0.2039]) -Greedy action tensor([ 1.1821, -0.9211, 0.1183, -0.4534]) tensor([0.6017, 0.0734, 0.2077, 0.1172]) -Greedy action tensor([ 0.5557, -0.3655, -0.1531, 0.0379]) tensor([0.4022, 0.1601, 0.1980, 0.2397]) -Greedy action tensor([ 0.9649, -0.4369, -0.1143, -0.4015]) tensor([0.5432, 0.1337, 0.1846, 0.1385]) -Greedy action tensor([ 0.6338, -0.5061, -0.2024, -0.2329]) tensor([0.4601, 0.1472, 0.1994, 0.1934]) -Greedy action tensor([ 1.0212, -0.6368, -0.0710, -0.5285]) tensor([0.5753, 0.1096, 0.1930, 0.1221]) -Greedy action tensor([ 1.0197, -0.7122, 0.0377, -0.4366]) tensor([0.5603, 0.0992, 0.2099, 0.1306]) -Greedy action tensor([ 0.8173, -0.5911, -0.1171, -0.2876]) tensor([0.5080, 0.1242, 0.1996, 0.1683]) -Greedy action tensor([ 0.9672, -0.7729, 0.0269, -0.4959]) tensor([0.5563, 0.0976, 0.2172, 0.1288]) -Greedy action tensor([ 0.8841, -0.6833, 0.1135, -0.2392]) tensor([0.5009, 0.1045, 0.2318, 0.1629]) -Greedy action tensor([ 0.9543, -0.6324, -0.0648, -0.3773]) tensor([0.5466, 0.1118, 0.1973, 0.1443]) -Greedy action tensor([ 0.7333, -0.4350, -0.0647, -0.3825]) tensor([0.4787, 0.1488, 0.2155, 0.1569]) -Greedy action tensor([ 1.1566, -0.6847, -0.0106, -0.3737]) tensor([0.5930, 0.0941, 0.1846, 0.1284]) -Greedy action tensor([ 1.1196, -0.4140, 0.1892, -0.3389]) tensor([0.5427, 0.1171, 0.2140, 0.1262]) -Greedy action tensor([ 0.6826, -0.5702, -0.0971, -0.2340]) tensor([0.4664, 0.1333, 0.2139, 0.1865]) -Greedy action tensor([ 0.8603, -0.6698, -0.0059, -0.5225]) tensor([0.5297, 0.1147, 0.2228, 0.1329]) -Greedy action tensor([ 0.8285, -0.4870, -0.0814, -0.2959]) tensor([0.5011, 0.1345, 0.2017, 0.1628]) -Greedy action tensor([ 0.7781, -0.3577, -0.1209, -0.1630]) tensor([0.4721, 0.1516, 0.1921, 0.1842]) -Greedy action tensor([ 1.0366, -0.5059, -0.2214, -0.8868]) tensor([0.6082, 0.1301, 0.1729, 0.0889]) -Greedy action tensor([ 1.2359, -0.7221, 0.0461, -0.5150]) tensor([0.6176, 0.0872, 0.1879, 0.1072]) -Greedy action tensor([ 0.9764, -1.0914, 0.1083, -0.5062]) tensor([0.5639, 0.0713, 0.2367, 0.1280]) -Greedy action tensor([ 0.9153, -0.1204, 0.0197, -0.3534]) tensor([0.4891, 0.1736, 0.1997, 0.1375]) -Greedy action tensor([ 0.5096, -0.2821, -0.0841, -0.0813]) tensor([0.3908, 0.1770, 0.2158, 0.2164]) -Greedy action tensor([ 0.6710, -0.1687, 0.1331, -0.1238]) tensor([0.4053, 0.1750, 0.2367, 0.1830]) -Greedy action tensor([ 0.4726, -0.2200, 0.0027, -0.2082]) tensor([0.3800, 0.1901, 0.2375, 0.1924]) -Greedy action tensor([ 0.6712, -0.1750, 0.2431, -0.1452]) tensor([0.3964, 0.1701, 0.2583, 0.1752]) -Greedy action tensor([ 0.9003, -0.7142, 0.2571, -0.6414]) tensor([0.5158, 0.1026, 0.2711, 0.1104]) -Greedy action tensor([ 1.1110, -0.6337, 0.0282, -0.3222]) tensor([0.5708, 0.0997, 0.1933, 0.1362]) -Greedy action tensor([ 0.7682, -0.7923, 0.0684, -0.2757]) tensor([0.4857, 0.1020, 0.2412, 0.1710]) -Greedy action tensor([ 0.9494, -0.4064, 0.1078, -0.2568]) tensor([0.5030, 0.1296, 0.2168, 0.1506]) -Greedy action tensor([ 1.1600, -0.6232, -0.0890, -0.5682]) tensor([0.6126, 0.1030, 0.1757, 0.1088]) -Greedy action tensor([ 0.4776, -0.3492, -0.0983, -0.1516]) tensor([0.3949, 0.1727, 0.2220, 0.2104]) -Greedy action tensor([ 1.1346, -0.5394, -0.0872, -0.4864]) tensor([0.5953, 0.1116, 0.1754, 0.1177]) -Greedy action tensor([ 0.8128, -0.5158, 0.1116, -0.4440]) tensor([0.4889, 0.1295, 0.2425, 0.1391]) -Greedy action tensor([ 0.6390, -0.1914, 0.0346, -0.0404]) tensor([0.4017, 0.1751, 0.2195, 0.2037]) -Greedy action tensor([ 0.9562, -0.5315, -0.0698, -0.1928]) tensor([0.5260, 0.1188, 0.1885, 0.1667]) -Greedy action tensor([ 0.1370, -0.1924, -0.2445, -0.2651]) tensor([0.3256, 0.2342, 0.2223, 0.2178]) -Greedy action tensor([ 0.8779, -0.7386, 0.0594, -0.4204]) tensor([0.5228, 0.1038, 0.2306, 0.1427]) -Greedy action tensor([ 1.2076, -0.8767, -0.1057, -0.6304]) tensor([0.6441, 0.0801, 0.1732, 0.1025]) -Greedy action tensor([ 0.7556, -0.6638, 0.3497, -0.5599]) tensor([0.4594, 0.1111, 0.3062, 0.1233]) -Greedy action tensor([ 1.1702, -0.5871, -0.0363, -0.3218]) tensor([0.5894, 0.1017, 0.1764, 0.1326]) -Greedy action tensor([ 0.6336, -0.1037, 0.0276, -0.0155]) tensor([0.3927, 0.1879, 0.2142, 0.2052]) -Greedy action tensor([ 1.2373, -0.7580, 0.0308, -0.7957]) tensor([0.6385, 0.0868, 0.1911, 0.0836]) -Greedy action tensor([ 0.7092, -0.5461, -0.1144, -0.4041]) tensor([0.4872, 0.1389, 0.2138, 0.1600]) -Greedy action tensor([ 0.7610, -0.6303, -0.0568, -0.3768]) tensor([0.4974, 0.1237, 0.2195, 0.1594]) -Greedy action tensor([ 1.0453, -0.3363, -0.0582, -0.4908]) tensor([0.5561, 0.1397, 0.1845, 0.1197]) -Greedy action tensor([-0.0229, -0.0302, 0.3367, -0.4744]) tensor([0.2462, 0.2444, 0.3527, 0.1567]) -Greedy action tensor([-0.6859, -0.3520, -0.5814, -0.3375]) tensor([0.2031, 0.2836, 0.2255, 0.2878]) -Greedy action tensor([ 0.5677, -1.1287, -0.6297, -0.1020]) tensor([0.5007, 0.0918, 0.1512, 0.2563]) -Greedy action tensor([ 1.1339, -0.8354, 0.7916, -0.2144]) tensor([0.4741, 0.0662, 0.3367, 0.1231]) -Greedy action tensor([ 1.6835, -0.5824, 0.6454, 1.2269]) tensor([0.4782, 0.0496, 0.1693, 0.3029]) -Greedy action tensor([-0.3922, -0.3126, 0.2817, -0.3151]) tensor([0.1951, 0.2113, 0.3828, 0.2108]) -Greedy action tensor([-0.3138, -0.6921, 0.0104, 0.1794]) tensor([0.2125, 0.1456, 0.2939, 0.3480]) -Greedy action tensor([-0.3056, -0.1068, -0.6509, 0.0833]) tensor([0.2271, 0.2771, 0.1608, 0.3350]) -Greedy action tensor([ 0.3608, -0.4908, -0.2123, 0.6027]) tensor([0.3064, 0.1307, 0.1727, 0.3902]) -Greedy action tensor([-0.2402, -0.2036, 0.4035, -0.3080]) tensor([0.2051, 0.2128, 0.3905, 0.1917]) -Greedy action tensor([-0.1148, -0.6286, -0.3335, -0.0533]) tensor([0.2886, 0.1726, 0.2319, 0.3069]) -Greedy action tensor([ 0.0170, -0.1361, -0.7528, -0.2247]) tensor([0.3219, 0.2762, 0.1491, 0.2528]) -Greedy action tensor([ 0.6118, -1.8763, 0.7344, -0.0626]) tensor([0.3672, 0.0305, 0.4152, 0.1871]) -Greedy action tensor([ 0.4525, -1.6812, -0.1769, 0.2355]) tensor([0.4071, 0.0482, 0.2170, 0.3277]) -Greedy action tensor([ 0.8303, -0.2144, 0.6015, -0.2319]) tensor([0.4011, 0.1411, 0.3191, 0.1387]) -Greedy action tensor([-0.9862, -0.3614, 0.5086, -0.5737]) tensor([0.1132, 0.2114, 0.5045, 0.1709]) -Greedy action tensor([ 0.5921, -0.9594, 0.5175, 0.7281]) tensor([0.3044, 0.0645, 0.2825, 0.3487]) -Greedy action tensor([-1.3684, 0.4331, -0.9849, 0.0307]) tensor([0.0795, 0.4817, 0.1167, 0.3221]) -Greedy action tensor([-1.2106, -0.1647, -0.4304, -0.7752]) tensor([0.1320, 0.3758, 0.2881, 0.2041]) -Greedy action tensor([-0.3574, -0.1484, -0.0154, -0.6566]) tensor([0.2282, 0.2813, 0.3213, 0.1692]) -Greedy action tensor([ 0.0886, 0.8000, 0.1965, -0.3134]) tensor([0.2075, 0.4226, 0.2311, 0.1388]) -Greedy action tensor([-0.5837, -0.0885, 0.1016, -0.9605]) tensor([0.1883, 0.3089, 0.3736, 0.1292]) -Greedy action tensor([ 0.4028, -0.4755, 0.4394, -0.4060]) tensor([0.3450, 0.1434, 0.3579, 0.1537]) -Greedy action tensor([-0.6097, -0.8217, 0.4459, -1.3283]) tensor([0.1934, 0.1565, 0.5558, 0.0943]) -Greedy action tensor([-0.4848, -0.0986, 1.2564, -1.6775]) tensor([0.1179, 0.1735, 0.6727, 0.0358]) -Greedy action tensor([-0.0588, -0.5617, 1.0907, -0.3975]) tensor([0.1827, 0.1105, 0.5767, 0.1302]) -Greedy action tensor([-0.0513, -1.1246, 0.5692, -0.5543]) tensor([0.2627, 0.0898, 0.4886, 0.1589]) -Greedy action tensor([ 0.5468, 0.2622, -0.2241, -0.1149]) tensor([0.3662, 0.2755, 0.1694, 0.1889]) -Greedy action tensor([-0.1931, -0.8975, 0.1944, -0.2295]) tensor([0.2543, 0.1257, 0.3747, 0.2452]) -Greedy action tensor([ 1.1022, -1.1409, 0.1319, 0.3734]) tensor([0.5082, 0.0539, 0.1926, 0.2452]) -Greedy action tensor([-0.7909, -1.5978, 0.0386, -0.6864]) tensor([0.2062, 0.0920, 0.4728, 0.2290]) -Greedy action tensor([ 1.2132, 0.1639, -0.0975, -0.1709]) tensor([0.5347, 0.1872, 0.1442, 0.1340]) -Greedy action tensor([ 0.8579, -0.9043, 0.0345, 1.0669]) tensor([0.3517, 0.0604, 0.1544, 0.4335]) -Greedy action tensor([-0.0513, -1.1662, -0.8254, 0.5733]) tensor([0.2735, 0.0897, 0.1261, 0.5107]) -Greedy action tensor([ 1.4341, -0.2856, 1.0046, 1.3456]) tensor([0.3643, 0.0652, 0.2371, 0.3334]) -Greedy action tensor([-0.1892, -0.4626, 0.2272, -0.4154]) tensor([0.2454, 0.1867, 0.3722, 0.1957]) -Greedy action tensor([-0.5328, -1.4518, 0.4576, -0.7625]) tensor([0.2047, 0.0816, 0.5510, 0.1627]) -Greedy action tensor([ 0.3401, -1.1467, -0.8317, 0.9631]) tensor([0.2941, 0.0665, 0.0911, 0.5483]) -Greedy action tensor([-0.0782, -0.4113, -0.9744, 0.1264]) tensor([0.2983, 0.2138, 0.1218, 0.3661]) -Greedy action tensor([-0.6985, -0.7411, 0.2800, -1.4721]) tensor([0.1968, 0.1886, 0.5237, 0.0908]) -Greedy action tensor([ 4.9549e-01, -7.6256e-01, -8.7243e-03, 5.0902e-04]) tensor([0.4004, 0.1138, 0.2418, 0.2441]) -Greedy action tensor([-0.5469, -1.7117, -0.7963, 0.5443]) tensor([0.1973, 0.0615, 0.1537, 0.5874]) -Greedy action tensor([-0.4487, -0.2961, 0.1571, 0.6682]) tensor([0.1418, 0.1652, 0.2598, 0.4332]) -Greedy action tensor([-0.5153, -1.1987, 0.3024, -0.8449]) tensor([0.2227, 0.1125, 0.5046, 0.1602]) -Greedy action tensor([ 0.4707, -0.1821, -0.6330, -0.1178]) tensor([0.4154, 0.2163, 0.1378, 0.2306]) -Greedy action tensor([-0.5704, -0.9801, 0.6278, -0.6061]) tensor([0.1683, 0.1117, 0.5577, 0.1624]) -Greedy action tensor([ 0.9049, -0.1498, -0.0324, 0.2880]) tensor([0.4387, 0.1528, 0.1718, 0.2367]) -Greedy action tensor([ 0.0669, -0.1567, 0.3633, -0.1237]) tensor([0.2518, 0.2014, 0.3387, 0.2081]) -Greedy action tensor([-0.6000, 0.2652, 0.5590, -0.4570]) tensor([0.1296, 0.3079, 0.4130, 0.1495]) -Greedy action tensor([ 0.2580, 0.6978, -0.6499, -0.8046]) tensor([0.3029, 0.4702, 0.1222, 0.1047]) -Greedy action tensor([-0.1979, -0.6903, -0.5092, 0.4040]) tensor([0.2399, 0.1466, 0.1757, 0.4379]) -Greedy action tensor([-1.5289, 0.3769, -0.9164, -0.2682]) tensor([0.0763, 0.5134, 0.1409, 0.2693]) -Greedy action tensor([-0.1789, -1.6874, -0.4537, 0.4820]) tensor([0.2553, 0.0565, 0.1939, 0.4943]) -Greedy action tensor([ 1.2893, -1.1369, 1.3729, 0.3861]) tensor([0.3875, 0.0342, 0.4212, 0.1570]) -Greedy action tensor([ 0.5095, -0.9857, -0.3267, 0.5390]) tensor([0.3721, 0.0834, 0.1612, 0.3832]) -Greedy action tensor([-0.0074, -0.5034, -0.3259, 0.8460]) tensor([0.2135, 0.1300, 0.1553, 0.5012]) -Greedy action tensor([ 0.2013, -0.4789, 0.1513, -0.5494]) tensor([0.3413, 0.1729, 0.3247, 0.1611]) -Greedy action tensor([ 0.7050, -0.4188, -0.3563, 0.9262]) tensor([0.3426, 0.1114, 0.1186, 0.4274]) -Greedy action tensor([-0.2821, -0.2335, -0.5784, -0.0850]) tensor([0.2493, 0.2617, 0.1854, 0.3036]) -Greedy action tensor([-0.2308, 0.6778, -0.5312, -0.8159]) tensor([0.2093, 0.5192, 0.1550, 0.1166]) -Greedy action tensor([-0.8239, -2.0551, -1.4920, -0.4221]) tensor([0.3031, 0.0885, 0.1554, 0.4530]) -Greedy action tensor([ 0.4370, -0.6479, 0.0211, -1.0822]) tensor([0.4512, 0.1525, 0.2976, 0.0987]) -Greedy action tensor([-0.1229, -0.5309, -0.6025, -0.5403]) tensor([0.3398, 0.2260, 0.2104, 0.2239]) -Greedy action tensor([ 0.0562, 0.1628, 0.9960, -0.2531]) tensor([0.1850, 0.2058, 0.4734, 0.1358]) -Greedy action tensor([ 1.0007, -1.1221, -0.6741, 0.1153]) tensor([0.5815, 0.0696, 0.1089, 0.2399]) -Greedy action tensor([-0.1751, -0.3104, 0.0738, -0.6632]) tensor([0.2653, 0.2317, 0.3402, 0.1628]) -Greedy action tensor([ 0.0340, 0.2512, -1.3153, 0.2032]) tensor([0.2713, 0.3371, 0.0704, 0.3213]) -Greedy action tensor([ 1.0330, 0.2538, -0.1349, -0.0297]) tensor([0.4727, 0.2169, 0.1470, 0.1633]) -Greedy action tensor([-1.6949, -1.0260, 1.0028, -1.1785]) tensor([0.0514, 0.1002, 0.7623, 0.0861]) -Greedy action tensor([-0.2354, -2.5879, -0.4014, 0.7448]) tensor([0.2170, 0.0206, 0.1839, 0.5785]) -Greedy action tensor([ 0.2033, -1.2689, -0.0837, 0.4641]) tensor([0.3051, 0.0700, 0.2290, 0.3960]) -Greedy action tensor([-0.7908, -0.6376, -0.8583, -0.2991]) tensor([0.2112, 0.2461, 0.1974, 0.3453]) -Greedy action tensor([ 0.3133, -1.1715, -0.5411, -0.1395]) tensor([0.4371, 0.0990, 0.1860, 0.2779]) -Greedy action tensor([-0.4010, -0.8578, -1.0511, 0.6020]) tensor([0.2048, 0.1297, 0.1069, 0.5585]) -Greedy action tensor([ 0.2969, -0.4993, -0.0094, -0.1921]) tensor([0.3571, 0.1611, 0.2629, 0.2190]) -Greedy action tensor([ 0.1957, 0.3263, -0.3906, -0.7311]) tensor([0.3235, 0.3686, 0.1800, 0.1280]) -Greedy action tensor([ 0.6831, -0.3498, 0.0843, 0.2134]) tensor([0.3951, 0.1407, 0.2171, 0.2470]) -Greedy action tensor([ 0.5773, -1.6078, 0.4066, 0.6064]) tensor([0.3350, 0.0377, 0.2824, 0.3449]) -Greedy action tensor([-1.1288, -1.4904, 0.2955, 0.2205]) tensor([0.1030, 0.0718, 0.4281, 0.3971]) -Greedy action tensor([ 1.2111, -0.0198, 0.4880, -0.3238]) tensor([0.5018, 0.1465, 0.2435, 0.1081]) -Greedy action tensor([ 0.6651, -0.7915, 1.4273, -0.7193]) tensor([0.2757, 0.0643, 0.5909, 0.0691]) -Greedy action tensor([ 1.5402, -0.8626, -0.3639, 0.7616]) tensor([0.5888, 0.0533, 0.0877, 0.2703]) -Greedy action tensor([ 1.1304, -0.1748, -0.4151, 0.2858]) tensor([0.5224, 0.1416, 0.1114, 0.2245]) -Greedy action tensor([ 1.7119, -1.2327, -0.2508, -0.1187]) tensor([0.7389, 0.0389, 0.1038, 0.1185]) -Greedy action tensor([ 1.4210, -0.0203, -0.5130, 0.4799]) tensor([0.5645, 0.1336, 0.0816, 0.2203]) -Greedy action tensor([ 1.7518, -0.4505, -0.5334, 0.2082]) tensor([0.7013, 0.0775, 0.0714, 0.1498]) -Greedy action tensor([ 2.0045, -1.0612, -0.3823, 0.0205]) tensor([0.7837, 0.0365, 0.0720, 0.1078]) -Greedy action tensor([ 1.2438, -0.2218, -0.1893, 0.3444]) tensor([0.5329, 0.1231, 0.1272, 0.2168]) -Greedy action tensor([ 2.1901, -1.0761, -0.3888, 1.0685]) tensor([0.6945, 0.0265, 0.0527, 0.2263]) -Greedy action tensor([2.3345, 0.6982, 0.0420, 0.0953]) tensor([0.7131, 0.1388, 0.0720, 0.0760]) -Greedy action tensor([ 2.1583, -1.0224, -0.1252, 0.7456]) tensor([0.7210, 0.0300, 0.0735, 0.1756]) -Greedy action tensor([ 1.8121, 0.6568, -0.7024, 0.2911]) tensor([0.6195, 0.1951, 0.0501, 0.1353]) -Greedy action tensor([ 1.5748, -0.5352, -0.5689, 0.2502]) tensor([0.6647, 0.0806, 0.0779, 0.1768]) -Greedy action tensor([ 1.9322, -0.8400, -0.6104, 1.0408]) tensor([0.6446, 0.0403, 0.0507, 0.2643]) -Greedy action tensor([ 1.7126, -0.4674, -0.3706, 0.1742]) tensor([0.6886, 0.0778, 0.0857, 0.1479]) -Greedy action tensor([ 1.6400, -0.7719, -0.7153, 0.1791]) tensor([0.7059, 0.0633, 0.0670, 0.1638]) -Greedy action tensor([ 1.6637, -0.1351, -0.6369, 0.1259]) tensor([0.6754, 0.1118, 0.0677, 0.1451]) -Greedy action tensor([ 1.5951, -0.8160, -0.4153, 0.2277]) tensor([0.6764, 0.0607, 0.0906, 0.1723]) -Greedy action tensor([ 1.1898, -0.4163, -0.3038, 0.2502]) tensor([0.5506, 0.1105, 0.1237, 0.2152]) -Greedy action tensor([ 1.7511, -0.5630, -0.6388, 0.7301]) tensor([0.6449, 0.0637, 0.0591, 0.2323]) -Greedy action tensor([ 1.6075, -0.4483, -0.5156, 0.1032]) tensor([0.6804, 0.0871, 0.0814, 0.1512]) -Greedy action tensor([ 1.5047, -0.5741, -0.4420, 0.4313]) tensor([0.6212, 0.0777, 0.0887, 0.2124]) -Greedy action tensor([ 1.5320, -0.6056, -0.6698, 0.2277]) tensor([0.6667, 0.0786, 0.0737, 0.1809]) -Greedy action tensor([ 1.5753, -0.3054, -0.2361, 0.1886]) tensor([0.6386, 0.0974, 0.1044, 0.1596]) -Greedy action tensor([ 1.4225, -0.6207, -0.4406, 0.6438]) tensor([0.5735, 0.0743, 0.0890, 0.2632]) -Greedy action tensor([ 1.5715, -0.4595, -0.7021, 0.4235]) tensor([0.6446, 0.0846, 0.0664, 0.2045]) -Greedy action tensor([ 1.0622, -0.0612, -0.0804, -0.3479]) tensor([0.5296, 0.1722, 0.1689, 0.1293]) -Greedy action tensor([ 1.5212, -0.4028, -0.3441, 0.3693]) tensor([0.6185, 0.0903, 0.0958, 0.1954]) -Greedy action tensor([ 1.3808, -0.5470, -0.3321, 0.0277]) tensor([0.6312, 0.0918, 0.1138, 0.1631]) -Greedy action tensor([ 1.9717, 0.3146, -0.1642, 0.3151]) tensor([0.6668, 0.1272, 0.0788, 0.1272]) -Greedy action tensor([ 1.8807, 0.1297, -0.5876, -0.2854]) tensor([0.7284, 0.1264, 0.0617, 0.0835]) -Greedy action tensor([ 1.6990, -0.2149, -0.4371, 0.2794]) tensor([0.6634, 0.0978, 0.0784, 0.1604]) -Greedy action tensor([ 1.8206, 0.2010, -0.3294, 0.6875]) tensor([0.6111, 0.1210, 0.0712, 0.1968]) -Greedy action tensor([ 1.2993, -0.1743, -0.6513, 0.1513]) tensor([0.5922, 0.1357, 0.0842, 0.1879]) -Greedy action tensor([ 1.5121, -0.1919, -0.8812, 0.4789]) tensor([0.6138, 0.1117, 0.0561, 0.2184]) -Greedy action tensor([ 1.8127, -0.2217, -1.3309, 0.3596]) tensor([0.7104, 0.0929, 0.0306, 0.1661]) -Greedy action tensor([ 1.3161, -0.3410, -0.3902, 0.1640]) tensor([0.5923, 0.1130, 0.1075, 0.1872]) -Greedy action tensor([ 1.5808, -0.2396, -0.4086, 0.2165]) tensor([0.6434, 0.1042, 0.0880, 0.1644]) -Greedy action tensor([ 1.3010, -0.4537, -0.2948, 0.6851]) tensor([0.5220, 0.0903, 0.1058, 0.2819]) -Greedy action tensor([ 1.4552, -0.4289, 0.0284, 0.3021]) tensor([0.5856, 0.0890, 0.1406, 0.1848]) -Greedy action tensor([ 2.6045, -1.1335, 0.3958, 0.0848]) tensor([0.8236, 0.0196, 0.0905, 0.0663]) -Greedy action tensor([ 1.0664, 0.2191, -0.2239, -0.4169]) tensor([0.5180, 0.2220, 0.1425, 0.1175]) -Greedy action tensor([ 2.0967, -0.9919, -0.5249, 0.8308]) tensor([0.7142, 0.0325, 0.0519, 0.2014]) -Greedy action tensor([ 1.4436, -0.1790, -0.6163, 0.3285]) tensor([0.6051, 0.1194, 0.0771, 0.1984]) -Greedy action tensor([ 1.6824, -0.5706, -0.2967, 0.5159]) tensor([0.6432, 0.0676, 0.0889, 0.2003]) -Greedy action tensor([ 0.8570, -0.2744, 0.1266, 0.1778]) tensor([0.4326, 0.1396, 0.2084, 0.2194]) -Greedy action tensor([ 1.3812, -0.9590, -0.2489, 0.1583]) tensor([0.6303, 0.0607, 0.1235, 0.1855]) -Greedy action tensor([ 1.1367, -0.3673, -0.5263, -0.2283]) tensor([0.5998, 0.1333, 0.1137, 0.1532]) -Greedy action tensor([ 1.3002, -0.5403, -0.1678, 0.5734]) tensor([0.5340, 0.0848, 0.1230, 0.2582]) -Greedy action tensor([ 0.9155, 0.1196, 0.0647, -0.0019]) tensor([0.4390, 0.1981, 0.1875, 0.1754]) -Greedy action tensor([ 1.1639, -0.4804, -0.4539, 0.3784]) tensor([0.5413, 0.1046, 0.1074, 0.2468]) -Greedy action tensor([ 1.5742, -0.4654, -0.3229, 0.0710]) tensor([0.6656, 0.0866, 0.0998, 0.1480]) -Greedy action tensor([ 2.1815, -0.7574, -0.7062, 0.3666]) tensor([0.7865, 0.0416, 0.0438, 0.1281]) -Greedy action tensor([ 1.5355, 0.0511, -0.1046, 0.2262]) tensor([0.5915, 0.1341, 0.1147, 0.1597]) -Greedy action tensor([ 1.5490, -1.0790, -0.1805, 0.0671]) tensor([0.6771, 0.0489, 0.1201, 0.1538]) -Greedy action tensor([ 1.0729, -0.9918, 0.1857, 1.0064]) tensor([0.4042, 0.0513, 0.1664, 0.3782]) -Greedy action tensor([ 1.3638, -0.2722, -0.0831, 0.6818]) tensor([0.5166, 0.1006, 0.1216, 0.2612]) -Greedy action tensor([ 1.2727, -0.0567, -0.2806, 0.4627]) tensor([0.5205, 0.1378, 0.1101, 0.2316]) -Greedy action tensor([ 1.9420, -0.5464, -0.8074, 0.5076]) tensor([0.7219, 0.0599, 0.0462, 0.1720]) -Greedy action tensor([ 1.3119, -0.3867, -0.8374, 0.1354]) tensor([0.6219, 0.1138, 0.0725, 0.1918]) -Greedy action tensor([ 2.5424, -1.1031, -0.5265, 0.7081]) tensor([0.8115, 0.0212, 0.0377, 0.1296]) -Greedy action tensor([ 1.7090, -0.6380, -0.1387, 0.5712]) tensor([0.6354, 0.0608, 0.1001, 0.2037]) -Greedy action tensor([ 0.6266, -0.1056, -0.0679, -0.0394]) tensor([0.4010, 0.1928, 0.2002, 0.2060]) -Greedy action tensor([ 1.6196, -0.5022, -0.3431, 0.2567]) tensor([0.6595, 0.0790, 0.0927, 0.1688]) -Greedy action tensor([ 1.6102, -0.3952, -0.5480, 0.2997]) tensor([0.6580, 0.0886, 0.0760, 0.1774]) -Greedy action tensor([ 1.1853, 0.0630, -0.0905, 0.3037]) tensor([0.4953, 0.1612, 0.1383, 0.2051]) -Greedy action tensor([ 1.7478, -0.9932, -0.2873, 0.5089]) tensor([0.6735, 0.0434, 0.0880, 0.1951]) -Greedy action tensor([ 1.5513, 0.0801, -0.5433, 0.0186]) tensor([0.6375, 0.1464, 0.0785, 0.1377]) -Greedy action tensor([ 1.5380, -0.4145, -0.3355, 0.0832]) tensor([0.6540, 0.0928, 0.1005, 0.1527]) -Greedy action tensor([ 1.8200, -1.0562, -0.4879, 0.6638]) tensor([0.6800, 0.0383, 0.0676, 0.2140]) -Greedy action tensor([ 0.9858, 0.2107, -0.2088, -0.4314]) tensor([0.4985, 0.2297, 0.1510, 0.1208]) -Greedy action tensor([ 1.0240, -0.1623, -0.2406, 0.5155]) tensor([0.4568, 0.1395, 0.1290, 0.2747]) -Greedy action tensor([ 1.6385, -1.1118, -0.2796, 0.4887]) tensor([0.6547, 0.0418, 0.0962, 0.2073]) -Greedy action tensor([ 1.1069, -0.3507, -0.4718, 0.5003]) tensor([0.5040, 0.1173, 0.1039, 0.2748]) -Greedy action tensor([ 1.5938, -0.6240, -0.4876, 0.7162]) tensor([0.6063, 0.0660, 0.0756, 0.2521]) -Greedy action tensor([ 1.1986, -0.6181, -0.4485, 0.7658]) tensor([0.4990, 0.0811, 0.0961, 0.3237]) -Greedy action tensor([ 1.1568, -0.3169, -1.0120, 0.7051]) tensor([0.5051, 0.1157, 0.0577, 0.3215]) -Greedy action tensor([ 1.3955, 0.0698, -0.3931, -0.2552]) tensor([0.6155, 0.1635, 0.1029, 0.1181]) -Greedy action tensor([ 1.3373, 0.1019, -0.3191, 0.4869]) tensor([0.5239, 0.1523, 0.1000, 0.2238]) -Greedy action tensor([ 1.4139, -0.0273, -0.0549, 0.1817]) tensor([0.5687, 0.1346, 0.1309, 0.1659]) -Greedy action tensor([ 1.2783, -1.0014, -0.1172, 0.0433]) tensor([0.6094, 0.0624, 0.1510, 0.1773]) -Greedy action tensor([ 1.6997, -0.4230, -0.5116, 0.5367]) tensor([0.6486, 0.0776, 0.0711, 0.2027]) -Greedy action tensor([ 1.0361, -0.8882, 0.1070, -0.5828]) tensor([0.5750, 0.0839, 0.2271, 0.1139]) -Greedy action tensor([ 0.7043, -0.5379, -0.0176, -0.2352]) tensor([0.4618, 0.1333, 0.2243, 0.1805]) -Greedy action tensor([ 0.9801, -0.6143, -0.0844, -0.2237]) tensor([0.5411, 0.1099, 0.1866, 0.1624]) -Greedy action tensor([ 0.7263, -0.4203, 0.0556, -0.5567]) tensor([0.4748, 0.1508, 0.2428, 0.1316]) -Greedy action tensor([ 0.7811, -0.2440, -0.1777, -0.2013]) tensor([0.4725, 0.1695, 0.1811, 0.1769]) -Greedy action tensor([ 0.5880, -0.1315, -0.1033, -0.3723]) tensor([0.4218, 0.2054, 0.2113, 0.1615]) -Greedy action tensor([ 0.8105, -0.4273, -0.1568, -0.3119]) tensor([0.5011, 0.1453, 0.1905, 0.1631]) -Greedy action tensor([ 1.3209, -1.0229, 0.1819, -0.6401]) tensor([0.6423, 0.0616, 0.2056, 0.0904]) -Greedy action tensor([ 0.8419, -0.3094, -0.0099, -0.4002]) tensor([0.4922, 0.1557, 0.2100, 0.1421]) -Greedy action tensor([ 0.8041, -0.7302, -0.0928, -0.3280]) tensor([0.5139, 0.1108, 0.2096, 0.1657]) -Greedy action tensor([ 0.8122, -0.1370, -0.1562, -0.1035]) tensor([0.4615, 0.1786, 0.1752, 0.1847]) -Greedy action tensor([ 0.2131, -0.1195, -0.0695, -0.1040]) tensor([0.3126, 0.2241, 0.2356, 0.2276]) -Greedy action tensor([ 0.5804, -0.0798, 0.0593, 0.0041]) tensor([0.3742, 0.1934, 0.2222, 0.2103]) -Greedy action tensor([ 0.6283, -0.3487, -0.0605, -0.2625]) tensor([0.4369, 0.1645, 0.2194, 0.1793]) -Greedy action tensor([ 0.6067, -0.1546, 0.0099, -0.1064]) tensor([0.3988, 0.1862, 0.2196, 0.1954]) -Greedy action tensor([ 0.4466, -0.0685, -0.0438, -0.3632]) tensor([0.3767, 0.2250, 0.2307, 0.1676]) -Greedy action tensor([ 0.8049, -0.7581, -0.0965, -0.4295]) tensor([0.5245, 0.1099, 0.2130, 0.1526]) -Greedy action tensor([ 1.1299, -0.5413, 0.0211, -0.5298]) tensor([0.5854, 0.1101, 0.1932, 0.1113]) -Greedy action tensor([ 0.5838, -0.2498, 0.0298, -0.3399]) tensor([0.4156, 0.1806, 0.2388, 0.1650]) -Greedy action tensor([ 0.5268, 0.0677, -0.0148, -0.2853]) tensor([0.3763, 0.2378, 0.2189, 0.1670]) -Greedy action tensor([ 0.8450, -0.5987, 0.0142, -0.3685]) tensor([0.5079, 0.1199, 0.2213, 0.1509]) -Greedy action tensor([ 0.5683, -0.2418, -0.0710, -0.2168]) tensor([0.4118, 0.1832, 0.2173, 0.1878]) -Greedy action tensor([ 0.5536, -0.5132, -0.1617, -0.0983]) tensor([0.4248, 0.1462, 0.2077, 0.2213]) -Greedy action tensor([ 0.6476, -0.3301, -0.1443, -0.2464]) tensor([0.4468, 0.1681, 0.2024, 0.1827]) -Greedy action tensor([ 0.7933, -0.3934, 0.1313, -0.4268]) tensor([0.4725, 0.1442, 0.2437, 0.1395]) -Greedy action tensor([ 0.8351, -0.8558, -0.1272, -0.1956]) tensor([0.5200, 0.0959, 0.1986, 0.1855]) -Greedy action tensor([ 1.0282, -0.4792, 0.0477, -0.2507]) tensor([0.5334, 0.1181, 0.2001, 0.1485]) -Greedy action tensor([ 1.0644, -0.7228, -0.0081, -0.4863]) tensor([0.5808, 0.0972, 0.1987, 0.1232]) -Greedy action tensor([ 0.9911, -0.6325, 0.2934, -0.7479]) tensor([0.5346, 0.1054, 0.2661, 0.0939]) -Greedy action tensor([ 1.2981, -0.6942, 0.0423, -0.6321]) tensor([0.6384, 0.0871, 0.1818, 0.0927]) -Greedy action tensor([ 1.0113, -0.6310, -0.0382, -0.4092]) tensor([0.5601, 0.1084, 0.1961, 0.1353]) -Greedy action tensor([ 0.4172, 0.0092, 0.0291, -0.2234]) tensor([0.3484, 0.2317, 0.2363, 0.1836]) -Greedy action tensor([ 0.8436, -0.4884, 0.0998, -0.4086]) tensor([0.4938, 0.1303, 0.2347, 0.1412]) -Greedy action tensor([ 0.8707, -0.3546, -0.1135, -0.2236]) tensor([0.4994, 0.1467, 0.1867, 0.1672]) -Greedy action tensor([ 0.8729, -0.9702, -0.0076, -0.5430]) tensor([0.5508, 0.0872, 0.2283, 0.1337]) -Greedy action tensor([ 0.9230, -0.8713, 0.0685, -0.4267]) tensor([0.5402, 0.0898, 0.2299, 0.1401]) -Greedy action tensor([ 0.7389, -0.7006, 0.0607, -0.3022]) tensor([0.4767, 0.1130, 0.2419, 0.1683]) -Greedy action tensor([ 0.9676, -0.4703, -0.1423, -0.2881]) tensor([0.5400, 0.1282, 0.1780, 0.1538]) -Greedy action tensor([ 0.7371, -0.4080, -0.0347, -0.2390]) tensor([0.4636, 0.1475, 0.2142, 0.1747]) -Greedy action tensor([ 0.3955, -0.0519, -0.0363, -0.1284]) tensor([0.3471, 0.2219, 0.2254, 0.2056]) -Greedy action tensor([ 0.4433, -0.2486, 0.0023, -0.1483]) tensor([0.3707, 0.1856, 0.2385, 0.2052]) -Greedy action tensor([ 0.8532, -0.5098, -0.0058, -0.2762]) tensor([0.4993, 0.1278, 0.2115, 0.1614]) -Greedy action tensor([ 0.7612, -0.4123, -0.0674, -0.3273]) tensor([0.4802, 0.1485, 0.2097, 0.1617]) -Greedy action tensor([ 0.6444, -0.3016, -0.0298, -0.0594]) tensor([0.4180, 0.1623, 0.2130, 0.2068]) -Greedy action tensor([ 0.7366, -0.2726, -0.1670, -0.0971]) tensor([0.4537, 0.1654, 0.1838, 0.1971]) -Greedy action tensor([ 0.6881, -0.4390, -0.0812, -0.2634]) tensor([0.4601, 0.1490, 0.2132, 0.1777]) -Greedy action tensor([ 0.4768, -0.3043, -0.2226, -0.1913]) tensor([0.4053, 0.1856, 0.2014, 0.2078]) -Greedy action tensor([ 0.5081, -0.2472, 0.0144, -0.1661]) tensor([0.3861, 0.1814, 0.2357, 0.1968]) -Greedy action tensor([ 0.6233, -0.4702, -0.0149, -0.2660]) tensor([0.4397, 0.1473, 0.2323, 0.1807]) -Greedy action tensor([ 0.8312, -0.5520, -0.1175, -0.1667]) tensor([0.4983, 0.1250, 0.1930, 0.1837]) -Greedy action tensor([ 0.8369, 0.1184, -0.0552, 0.1248]) tensor([0.4188, 0.2042, 0.1716, 0.2055]) -Greedy action tensor([ 0.7360, 0.1006, -0.0602, 0.1019]) tensor([0.3982, 0.2109, 0.1796, 0.2112]) -Greedy action tensor([ 0.7042, -0.1775, -0.1458, -0.0104]) tensor([0.4290, 0.1777, 0.1834, 0.2100]) -Greedy action tensor([ 0.9831, -0.6828, 0.0403, -0.5278]) tensor([0.5558, 0.1051, 0.2165, 0.1227]) -Greedy action tensor([ 0.6368, 0.0974, -0.0561, 0.1012]) tensor([0.3747, 0.2185, 0.1874, 0.2193]) -Greedy action tensor([ 1.0027, -0.7136, -0.0950, -0.4925]) tensor([0.5755, 0.1034, 0.1920, 0.1290]) -Greedy action tensor([ 1.0245, -0.4605, -0.0777, -0.4128]) tensor([0.5567, 0.1261, 0.1849, 0.1323]) -Greedy action tensor([ 1.0751, -0.4928, -0.0452, -0.3832]) tensor([0.5658, 0.1180, 0.1846, 0.1316]) -Greedy action tensor([ 0.7177, -0.3596, 0.0236, -0.4260]) tensor([0.4633, 0.1577, 0.2314, 0.1476]) -Greedy action tensor([ 0.8125, -0.4830, -0.0729, -0.5605]) tensor([0.5156, 0.1411, 0.2127, 0.1306]) -Greedy action tensor([ 0.4580, 0.1265, -0.2791, -0.0312]) tensor([0.3559, 0.2555, 0.1703, 0.2182]) -Greedy action tensor([ 0.8274, -0.2850, 0.0970, -0.1142]) tensor([0.4544, 0.1494, 0.2189, 0.1772]) -Greedy action tensor([ 0.5690, -0.4615, -0.1670, -0.1076]) tensor([0.4266, 0.1522, 0.2043, 0.2168]) -Greedy action tensor([ 0.2728, -0.0219, -0.0599, -0.2084]) tensor([0.3247, 0.2418, 0.2328, 0.2007]) -Greedy action tensor([ 1.1582, -0.3425, -0.0267, -0.3158]) tensor([0.5689, 0.1268, 0.1740, 0.1303]) -Greedy action tensor([ 0.9327, -0.5812, 0.0851, -0.6287]) tensor([0.5381, 0.1184, 0.2306, 0.1129]) -Greedy action tensor([ 0.6873, -0.5059, -0.1493, -0.3997]) tensor([0.4822, 0.1462, 0.2089, 0.1626]) -Greedy action tensor([ 0.7862, -0.8551, -0.1791, -0.3598]) tensor([0.5284, 0.1024, 0.2012, 0.1680]) -Greedy action tensor([ 0.8209, -0.5085, -0.0030, -0.3270]) tensor([0.4949, 0.1310, 0.2171, 0.1570]) -Greedy action tensor([ 0.5568, -0.4221, -0.2877, -0.4990]) tensor([0.4644, 0.1745, 0.1996, 0.1616]) -Greedy action tensor([ 1.1718, -0.5749, -0.0673, -0.4323]) tensor([0.6006, 0.1047, 0.1740, 0.1208]) -Greedy action tensor([ 0.6310, -0.3598, 0.0170, -0.2403]) tensor([0.4290, 0.1593, 0.2322, 0.1795]) -Greedy action tensor([ 0.9065, -0.3244, -0.0240, -0.2785]) tensor([0.5020, 0.1466, 0.1980, 0.1535]) -Greedy action tensor([ 0.8225, -0.1669, -0.2078, -0.4303]) tensor([0.4964, 0.1846, 0.1772, 0.1418]) -Greedy action tensor([ 0.8521, -0.1845, -0.0344, -0.2391]) tensor([0.4756, 0.1687, 0.1960, 0.1597]) -Greedy action tensor([ 1.0468, -0.2261, -0.2489, -0.6761]) tensor([0.5773, 0.1616, 0.1580, 0.1031]) -Greedy action tensor([ 0.2538, -0.0107, 0.0200, -0.5227]) tensor([0.3312, 0.2542, 0.2622, 0.1524]) -Greedy action tensor([ 1.0821, -0.6287, 0.0293, -0.2229]) tensor([0.5553, 0.1004, 0.1938, 0.1506]) -Greedy action tensor([ 0.5171, -0.0933, -0.0136, -0.1783]) tensor([0.3802, 0.2065, 0.2236, 0.1897]) -Greedy action tensor([ 0.7486, -0.3556, -0.1212, -0.0965]) tensor([0.4587, 0.1520, 0.1922, 0.1970]) -Greedy action tensor([ 0.9005, -0.4970, 0.1004, -0.4147]) tensor([0.5089, 0.1258, 0.2287, 0.1366]) -Greedy action tensor([-1.9159, -0.3835, 0.6406, -0.1690]) tensor([0.0412, 0.1908, 0.5314, 0.2365]) -Greedy action tensor([-1.6913, 0.1284, 0.3374, -0.3062]) tensor([0.0533, 0.3287, 0.4051, 0.2129]) -Greedy action tensor([0.9206, 0.0595, 0.7528, 1.6630]) tensor([0.2289, 0.0967, 0.1935, 0.4809]) -Greedy action tensor([-1.8946, -0.4535, 0.6487, -0.1556]) tensor([0.0423, 0.1787, 0.5382, 0.2408]) -Greedy action tensor([-0.1276, 0.5958, -0.0496, 0.4244]) tensor([0.1701, 0.3506, 0.1839, 0.2954]) -Greedy action tensor([-1.8135, -0.4583, 0.5940, -0.1012]) tensor([0.0465, 0.1801, 0.5160, 0.2574]) -Greedy action tensor([-1.8909, -0.4407, 0.6374, -0.1614]) tensor([0.0427, 0.1820, 0.5348, 0.2406]) -Greedy action tensor([-1.7781, -0.4289, 0.5840, -0.1434]) tensor([0.0486, 0.1871, 0.5153, 0.2490]) -Greedy action tensor([-1.8211, -0.4347, 0.6054, -0.1220]) tensor([0.0459, 0.1836, 0.5195, 0.2510]) -Greedy action tensor([-1.3898, 0.6363, 0.2664, 0.0688]) tensor([0.0552, 0.4185, 0.2891, 0.2373]) -Greedy action tensor([-1.8220, -0.4531, 0.6012, -0.1171]) tensor([0.0461, 0.1810, 0.5196, 0.2533]) -Greedy action tensor([-1.1739, -0.2906, 0.2242, 0.2653]) tensor([0.0856, 0.2070, 0.3464, 0.3610]) -Greedy action tensor([-1.9105, -0.3923, 0.6483, -0.1631]) tensor([0.0413, 0.1884, 0.5334, 0.2369]) -Greedy action tensor([-1.8975, -0.3874, 0.6543, -0.1483]) tensor([0.0415, 0.1878, 0.5322, 0.2385]) -Greedy action tensor([-1.8804, -0.4472, 0.6444, -0.1458]) tensor([0.0428, 0.1796, 0.5349, 0.2427]) -Greedy action tensor([-1.8665, -0.2485, 0.6321, -0.2201]) tensor([0.0427, 0.2155, 0.5200, 0.2217]) -Greedy action tensor([-0.9782, 0.2545, 0.1753, 0.0032]) tensor([0.0974, 0.3341, 0.3087, 0.2598]) -Greedy action tensor([-0.6678, 0.7606, 0.0719, -0.0532]) tensor([0.1097, 0.4576, 0.2298, 0.2028]) -Greedy action tensor([-1.8764, -0.4555, 0.6404, -0.1456]) tensor([0.0431, 0.1787, 0.5346, 0.2436]) -Greedy action tensor([-1.8737, -0.3003, 0.6165, -0.1469]) tensor([0.0425, 0.2051, 0.5132, 0.2392]) -Greedy action tensor([-1.4467, -0.1206, 0.3796, 0.0456]) tensor([0.0648, 0.2442, 0.4027, 0.2883]) -Greedy action tensor([-1.7489, -0.4441, 0.6474, 0.0495]) tensor([0.0461, 0.1698, 0.5059, 0.2782]) -Greedy action tensor([-1.8935, -0.4697, 0.6376, -0.1541]) tensor([0.0427, 0.1774, 0.5368, 0.2432]) -Greedy action tensor([-1.9113, -0.4747, 0.7194, -0.1449]) tensor([0.0401, 0.1687, 0.5567, 0.2346]) -Greedy action tensor([-1.9408, -0.4013, 0.6559, -0.1785]) tensor([0.0401, 0.1872, 0.5388, 0.2339]) -Greedy action tensor([-1.7024, -0.1287, 0.5583, -0.0232]) tensor([0.0481, 0.2322, 0.4616, 0.2581]) -Greedy action tensor([-1.8327, -0.3395, 0.6296, -0.1207]) tensor([0.0440, 0.1959, 0.5163, 0.2438]) -Greedy action tensor([-1.8079, -0.3854, 0.6402, -0.1089]) tensor([0.0451, 0.1870, 0.5214, 0.2465]) -Greedy action tensor([-1.4377, 0.2293, 0.3388, -0.0365]) tensor([0.0615, 0.3256, 0.3633, 0.2496]) -Greedy action tensor([-1.8662, -0.4437, 0.6252, -0.1400]) tensor([0.0438, 0.1816, 0.5287, 0.2460]) -Greedy action tensor([-1.8799, -0.4751, 0.6605, -0.1223]) tensor([0.0425, 0.1730, 0.5385, 0.2461]) -Greedy action tensor([-0.5369, -0.4510, 0.2457, 0.3136]) tensor([0.1511, 0.1647, 0.3305, 0.3537]) -Greedy action tensor([-1.7884, -0.4606, 0.6041, -0.0623]) tensor([0.0469, 0.1769, 0.5129, 0.2634]) -Greedy action tensor([-1.8999, -0.4233, 0.6405, -0.1616]) tensor([0.0421, 0.1843, 0.5341, 0.2395]) -Greedy action tensor([-1.5825, -0.5005, 0.4641, 0.0454]) tensor([0.0596, 0.1758, 0.4612, 0.3034]) -Greedy action tensor([-1.8837, -0.4509, 0.6301, -0.1431]) tensor([0.0430, 0.1803, 0.5314, 0.2453]) -Greedy action tensor([-1.3273, -0.5115, 0.7402, 0.7108]) tensor([0.0531, 0.1200, 0.4195, 0.4074]) -Greedy action tensor([-1.8936, -0.4105, 0.6345, -0.1560]) tensor([0.0423, 0.1866, 0.5305, 0.2406]) -Greedy action tensor([-0.8134, -0.5167, 0.2060, 0.2121]) tensor([0.1265, 0.1702, 0.3506, 0.3527]) -Greedy action tensor([-1.6791, -0.2214, 0.5061, -0.0866]) tensor([0.0523, 0.2249, 0.4655, 0.2573]) -Greedy action tensor([-1.8965, -0.4252, 0.6444, -0.1506]) tensor([0.0421, 0.1832, 0.5337, 0.2410]) -Greedy action tensor([-1.9023, -0.4274, 0.6449, -0.1605]) tensor([0.0419, 0.1833, 0.5355, 0.2393]) -Greedy action tensor([-1.8541, -0.4380, 0.6215, -0.1313]) tensor([0.0442, 0.1823, 0.5258, 0.2477]) -Greedy action tensor([-1.8498, -0.3504, 0.6073, -0.1413]) tensor([0.0441, 0.1976, 0.5148, 0.2435]) -Greedy action tensor([-1.8601, -0.3673, 0.6146, -0.1430]) tensor([0.0437, 0.1943, 0.5188, 0.2432]) -Greedy action tensor([-1.7882, -0.2941, 0.5642, -0.1231]) tensor([0.0471, 0.2096, 0.4946, 0.2487]) -Greedy action tensor([-1.8277, -0.4473, 0.6110, -0.1206]) tensor([0.0456, 0.1812, 0.5221, 0.2512]) -Greedy action tensor([-1.5840, -0.2455, 0.5912, -0.0323]) tensor([0.0545, 0.2080, 0.4801, 0.2574]) -Greedy action tensor([-1.8603, -0.3017, 0.6231, -0.1934]) tensor([0.0434, 0.2064, 0.5203, 0.2299]) -Greedy action tensor([-1.9048, -0.4121, 0.6649, -0.1477]) tensor([0.0411, 0.1830, 0.5374, 0.2384]) -Greedy action tensor([-1.9264, -0.4503, 0.6544, -0.1696]) tensor([0.0410, 0.1795, 0.5418, 0.2377]) -Greedy action tensor([-1.7844, -0.2603, 0.6080, 0.0231]) tensor([0.0442, 0.2029, 0.4835, 0.2694]) -Greedy action tensor([-0.3960, 0.9371, -0.0229, 0.1540]) tensor([0.1253, 0.4754, 0.1820, 0.2172]) -Greedy action tensor([-1.9351, -0.4213, 0.6567, -0.1752]) tensor([0.0405, 0.1839, 0.5404, 0.2352]) -Greedy action tensor([-1.8772, -0.3795, 0.6424, -0.1436]) tensor([0.0425, 0.1898, 0.5274, 0.2403]) -Greedy action tensor([-1.8031, -0.4274, 0.5935, -0.1163]) tensor([0.0468, 0.1854, 0.5147, 0.2531]) -Greedy action tensor([-1.9209, -0.4452, 0.6564, -0.1691]) tensor([0.0412, 0.1800, 0.5416, 0.2372]) -Greedy action tensor([-1.7779, -0.5188, 0.5817, -0.0945]) tensor([0.0488, 0.1719, 0.5166, 0.2627]) -Greedy action tensor([-1.9104, -0.4210, 0.6504, -0.1592]) tensor([0.0414, 0.1837, 0.5362, 0.2387]) -Greedy action tensor([-1.6961, -0.2972, 0.6106, -0.0087]) tensor([0.0488, 0.1976, 0.4899, 0.2637]) -Greedy action tensor([-1.9244, -0.4340, 0.6573, -0.1702]) tensor([0.0409, 0.1816, 0.5410, 0.2365]) -Greedy action tensor([-1.8999, -0.4464, 0.6482, -0.1631]) tensor([0.0421, 0.1802, 0.5385, 0.2392]) -Greedy action tensor([-1.8420, -0.4414, 0.6098, -0.1257]) tensor([0.0450, 0.1825, 0.5222, 0.2503]) -Greedy action tensor([-1.1605, 0.7050, 0.1607, 0.1584]) tensor([0.0669, 0.4322, 0.2507, 0.2502]) -Greedy action tensor([-1.3852, 0.0516, 0.3375, 0.0128]) tensor([0.0673, 0.2832, 0.3770, 0.2725]) -Greedy action tensor([-1.0390, -0.0367, 0.3374, 0.0512]) tensor([0.0938, 0.2556, 0.3715, 0.2791]) -Greedy action tensor([-1.9301, -0.4164, 0.6568, -0.1734]) tensor([0.0406, 0.1845, 0.5396, 0.2353]) -Greedy action tensor([-1.8804, -0.4500, 0.6606, -0.1308]) tensor([0.0423, 0.1769, 0.5372, 0.2435]) -Greedy action tensor([-1.4455, -0.2159, 0.3343, 0.1272]) tensor([0.0659, 0.2254, 0.3909, 0.3178]) -Greedy action tensor([-1.7110, -0.0210, 0.4750, -0.0575]) tensor([0.0487, 0.2638, 0.4332, 0.2543]) -Greedy action tensor([-1.7057, -0.4161, 0.5718, -0.0482]) tensor([0.0509, 0.1850, 0.4968, 0.2673]) -Greedy action tensor([-1.9177, -0.4104, 0.6523, -0.1655]) tensor([0.0411, 0.1854, 0.5366, 0.2369]) -Greedy action tensor([-1.9293, -0.4317, 0.6605, -0.1721]) tensor([0.0407, 0.1818, 0.5419, 0.2357]) -Greedy action tensor([-1.7405, 0.0869, 0.4891, -0.0113]) tensor([0.0451, 0.2807, 0.4197, 0.2545]) -Greedy action tensor([-1.5404, -0.2333, 0.4385, -0.0255]) tensor([0.0607, 0.2242, 0.4390, 0.2760]) -Greedy action tensor([-1.9338, -0.4271, 0.6602, -0.1734]) tensor([0.0405, 0.1826, 0.5416, 0.2353]) -Greedy action tensor([-1.9011, -0.4084, 0.6400, -0.1611]) tensor([0.0419, 0.1866, 0.5324, 0.2390]) -Greedy action tensor([-1.8130, -0.4742, 0.5954, -0.1144]) tensor([0.0467, 0.1783, 0.5195, 0.2555]) -Greedy action tensor([-1.8299, -0.3324, 0.5988, -0.1416]) tensor([0.0450, 0.2012, 0.5104, 0.2434]) -Greedy action tensor([-1.8282, -0.3972, 0.6043, -0.1214]) tensor([0.0453, 0.1894, 0.5157, 0.2496]) -Greedy action tensor([-1.9018, -0.4551, 0.6514, -0.1598]) tensor([0.0420, 0.1785, 0.5397, 0.2398]) -Greedy action tensor([-0.2966, 0.7003, -0.1323, -0.2493]) tensor([0.1684, 0.4564, 0.1985, 0.1766]) -Greedy action tensor([-0.0995, 0.8722, 0.2602, -0.9709]) tensor([0.1820, 0.4810, 0.2608, 0.0762]) -Greedy action tensor([-0.7701, -0.7577, 0.2419, -0.2614]) tensor([0.1556, 0.1575, 0.4281, 0.2588]) -Greedy action tensor([-0.1126, -0.0982, -0.5539, 0.3059]) tensor([0.2394, 0.2429, 0.1540, 0.3638]) -Greedy action tensor([-0.5620, -2.1468, -0.0438, 0.3409]) tensor([0.1869, 0.0383, 0.3138, 0.4610]) -Greedy action tensor([ 0.0103, -0.6916, -0.5206, -0.7066]) tensor([0.3888, 0.1927, 0.2286, 0.1898]) -Greedy action tensor([ 0.8607, 0.1017, 0.3548, -0.5743]) tensor([0.4331, 0.2027, 0.2611, 0.1031]) -Greedy action tensor([-0.5301, -1.8428, 0.5482, -0.6599]) tensor([0.1966, 0.0529, 0.5779, 0.1727]) -Greedy action tensor([-0.3586, -1.0154, 0.4012, -1.0422]) tensor([0.2403, 0.1246, 0.5138, 0.1213]) -Greedy action tensor([0.7064, 0.5422, 0.3414, 0.3873]) tensor([0.3058, 0.2596, 0.2123, 0.2223]) -Greedy action tensor([-0.3647, -1.8246, 0.2343, 0.2016]) tensor([0.2077, 0.0482, 0.3781, 0.3659]) -Greedy action tensor([-0.2091, -0.2817, 0.8678, 0.1478]) tensor([0.1589, 0.1477, 0.4664, 0.2270]) -Greedy action tensor([ 0.4196, 0.5156, -0.7145, -0.3324]) tensor([0.3456, 0.3804, 0.1112, 0.1629]) -Greedy action tensor([ 0.2818, -0.4262, -0.6974, 0.5700]) tensor([0.3123, 0.1538, 0.1173, 0.4166]) -Greedy action tensor([ 0.0383, -1.1642, -0.7446, 0.2739]) tensor([0.3308, 0.0994, 0.1512, 0.4187]) -Greedy action tensor([-1.0773, 0.3848, 0.0836, -0.8933]) tensor([0.1030, 0.4444, 0.3288, 0.1238]) -Greedy action tensor([-1.1260, -0.9436, 1.0069, -0.8873]) tensor([0.0840, 0.1008, 0.7086, 0.1066]) -Greedy action tensor([ 0.1175, -0.5971, -0.8070, -0.0428]) tensor([0.3652, 0.1787, 0.1449, 0.3111]) -Greedy action tensor([-4.4605e-02, 2.8421e-04, -8.0584e-01, -1.0698e-01]) tensor([0.2896, 0.3029, 0.1353, 0.2721]) -Greedy action tensor([-0.5819, -0.3565, 0.0802, 0.0562]) tensor([0.1644, 0.2059, 0.3186, 0.3111]) -Greedy action tensor([ 0.2434, -0.7615, -0.0469, 0.3183]) tensor([0.3133, 0.1147, 0.2344, 0.3377]) -Greedy action tensor([ 0.4280, -1.1621, -0.5417, -1.1545]) tensor([0.5591, 0.1140, 0.2120, 0.1149]) -Greedy action tensor([-1.0342, -0.1550, -0.5054, 0.5151]) tensor([0.1019, 0.2455, 0.1729, 0.4797]) -Greedy action tensor([-0.6365, 0.3530, 0.3141, -1.1135]) tensor([0.1450, 0.3900, 0.3751, 0.0900]) -Greedy action tensor([-0.0135, -0.9683, 0.3840, -0.1147]) tensor([0.2648, 0.1019, 0.3940, 0.2393]) -Greedy action tensor([ 0.9551, 0.3739, -0.1662, 0.1359]) tensor([0.4299, 0.2404, 0.1401, 0.1895]) -Greedy action tensor([ 0.0038, -1.0370, 1.1392, -0.9568]) tensor([0.2063, 0.0728, 0.6420, 0.0789]) -Greedy action tensor([-0.4621, -0.4640, 0.3568, 0.2363]) tensor([0.1593, 0.1590, 0.3613, 0.3203]) -Greedy action tensor([-0.3132, -0.7253, 0.1641, -0.0831]) tensor([0.2206, 0.1461, 0.3556, 0.2777]) -Greedy action tensor([-0.4866, 1.1810, 0.5164, -0.7194]) tensor([0.1018, 0.5398, 0.2777, 0.0807]) -Greedy action tensor([-0.8602, -0.6569, 0.3075, -0.9653]) tensor([0.1577, 0.1933, 0.5070, 0.1420]) -Greedy action tensor([ 0.5373, -1.2709, -0.0538, -0.1729]) tensor([0.4526, 0.0742, 0.2506, 0.2225]) -Greedy action tensor([-0.3072, -1.0693, -0.1921, -0.1026]) tensor([0.2621, 0.1223, 0.2940, 0.3216]) -Greedy action tensor([ 0.5038, -1.1860, -1.3540, 0.6269]) tensor([0.4046, 0.0747, 0.0631, 0.4576]) -Greedy action tensor([-0.1578, -0.0552, 0.1566, -0.3234]) tensor([0.2312, 0.2562, 0.3166, 0.1959]) -Greedy action tensor([ 0.3484, -0.9425, 0.1634, 0.1237]) tensor([0.3443, 0.0947, 0.2861, 0.2750]) -Greedy action tensor([ 0.4204, -0.5452, -0.2296, 0.0904]) tensor([0.3814, 0.1452, 0.1991, 0.2742]) -Greedy action tensor([ 0.5369, -1.0128, -0.1475, -0.3131]) tensor([0.4664, 0.0990, 0.2352, 0.1993]) -Greedy action tensor([-0.7937, 0.3727, 0.8071, -1.2192]) tensor([0.1018, 0.3269, 0.5047, 0.0665]) -Greedy action tensor([-0.6752, -1.7075, 0.2630, -1.3588]) tensor([0.2264, 0.0807, 0.5786, 0.1143]) -Greedy action tensor([-0.7179, 0.1143, 0.0146, -1.1277]) tensor([0.1655, 0.3804, 0.3443, 0.1099]) -Greedy action tensor([-1.1411, -0.8475, -1.0919, -0.4784]) tensor([0.1876, 0.2515, 0.1970, 0.3639]) -Greedy action tensor([-0.8009, -0.2992, -0.1620, -0.9312]) tensor([0.1844, 0.3045, 0.3493, 0.1618]) -Greedy action tensor([ 0.1833, -0.5685, 0.0777, -0.4375]) tensor([0.3438, 0.1621, 0.3093, 0.1848]) -Greedy action tensor([-1.8139, -1.2864, 0.5374, -0.5251]) tensor([0.0594, 0.1007, 0.6241, 0.2157]) -Greedy action tensor([ 0.3031, -0.6244, -0.1300, -0.4125]) tensor([0.3948, 0.1562, 0.2560, 0.1930]) -Greedy action tensor([-0.3154, -0.3649, 0.4507, -0.5303]) tensor([0.2037, 0.1938, 0.4382, 0.1643]) -Greedy action tensor([ 0.7482, -1.5540, 0.1519, 0.9064]) tensor([0.3543, 0.0354, 0.1952, 0.4150]) -Greedy action tensor([ 0.1428, -0.9223, -0.0478, 0.2618]) tensor([0.3033, 0.1045, 0.2506, 0.3416]) -Greedy action tensor([-0.1074, 0.4786, 0.5791, -0.3324]) tensor([0.1791, 0.3219, 0.3559, 0.1430]) -Greedy action tensor([-0.0019, -1.0914, -0.4658, -0.0194]) tensor([0.3392, 0.1141, 0.2133, 0.3333]) -Greedy action tensor([ 0.8307, -0.0130, -0.1607, 0.0317]) tensor([0.4442, 0.1911, 0.1649, 0.1998]) -Greedy action tensor([ 0.1263, -1.4657, 0.9378, -0.0298]) tensor([0.2320, 0.0472, 0.5223, 0.1985]) -Greedy action tensor([ 0.8263, 0.2187, -1.0392, 1.1021]) tensor([0.3315, 0.1805, 0.0513, 0.4367]) -Greedy action tensor([ 1.3894, 0.1460, -0.9077, -0.3691]) tensor([0.6405, 0.1847, 0.0644, 0.1104]) -Greedy action tensor([-0.0843, -0.6568, 0.0859, 0.7055]) tensor([0.2019, 0.1139, 0.2394, 0.4448]) -Greedy action tensor([-1.2044, -1.6158, 0.9792, -0.4840]) tensor([0.0794, 0.0526, 0.7048, 0.1632]) -Greedy action tensor([ 0.3119, 0.1666, -0.6871, -0.2346]) tensor([0.3556, 0.3075, 0.1310, 0.2059]) -Greedy action tensor([ 0.3786, -0.5515, 0.5219, -0.8992]) tensor([0.3537, 0.1395, 0.4082, 0.0986]) -Greedy action tensor([ 1.7295, -0.5897, -0.2381, 0.9100]) tensor([0.5957, 0.0586, 0.0833, 0.2625]) -Greedy action tensor([-1.2626, -0.5807, -0.9756, 0.3277]) tensor([0.1085, 0.2146, 0.1446, 0.5323]) -Greedy action tensor([-0.1738, -1.2458, 0.4475, -1.0394]) tensor([0.2759, 0.0944, 0.5135, 0.1161]) -Greedy action tensor([ 0.3122, -0.3755, 0.2469, -0.4387]) tensor([0.3435, 0.1727, 0.3218, 0.1621]) -Greedy action tensor([-0.4270, -0.1843, 0.0238, -0.3968]) tensor([0.2051, 0.2615, 0.3220, 0.2114]) -Greedy action tensor([ 0.7417, -0.3015, -0.2048, -0.1093]) tensor([0.4614, 0.1626, 0.1791, 0.1970]) -Greedy action tensor([0.7518, 0.1480, 0.9936, 0.7487]) tensor([0.2620, 0.1432, 0.3336, 0.2612]) -Greedy action tensor([ 0.4019, -0.8316, 0.2211, 0.1862]) tensor([0.3411, 0.0993, 0.2847, 0.2749]) -Greedy action tensor([-0.2194, -0.1698, -0.0877, -0.3320]) tensor([0.2448, 0.2572, 0.2792, 0.2187]) -Greedy action tensor([-0.9235, -0.6858, -0.2669, -0.2904]) tensor([0.1645, 0.2086, 0.3171, 0.3098]) -Greedy action tensor([-0.6670, -0.6899, 0.8827, -0.7778]) tensor([0.1319, 0.1289, 0.6212, 0.1180]) -Greedy action tensor([ 0.6544, -0.6278, 0.0175, -0.0280]) tensor([0.4326, 0.1200, 0.2288, 0.2186]) -Greedy action tensor([ 0.4541, -0.2499, 0.6077, 0.0372]) tensor([0.3012, 0.1490, 0.3512, 0.1985]) -Greedy action tensor([-0.0161, -0.7856, -0.4390, -0.2038]) tensor([0.3393, 0.1572, 0.2223, 0.2812]) -Greedy action tensor([ 0.7150, -0.0336, -0.2517, 0.1723]) tensor([0.4108, 0.1943, 0.1562, 0.2387]) -Greedy action tensor([-1.0526, -0.9699, 0.0214, -0.8240]) tensor([0.1595, 0.1732, 0.4668, 0.2005]) -Greedy action tensor([ 0.3165, -0.9056, -1.0579, 0.1420]) tensor([0.4188, 0.1234, 0.1060, 0.3518]) -Greedy action tensor([ 0.2869, -0.9676, 0.5985, -0.5740]) tensor([0.3253, 0.0928, 0.4443, 0.1376]) -Greedy action tensor([ 0.3349, 0.7565, -0.6449, -0.6325]) tensor([0.3049, 0.4648, 0.1145, 0.1159]) -Greedy action tensor([-0.3737, -0.6954, -0.2026, -1.1762]) tensor([0.2976, 0.2158, 0.3532, 0.1334]) -Greedy action tensor([-0.4012, -1.1118, 0.4340, 0.3805]) tensor([0.1672, 0.0821, 0.3854, 0.3653]) -Greedy action tensor([0.1244, 0.7867, 0.3195, 0.2016]) tensor([0.1910, 0.3704, 0.2322, 0.2064]) -Greedy action tensor([ 1.3313, -0.3973, -0.2349, 0.4804]) tensor([0.5515, 0.0979, 0.1152, 0.2355]) -Greedy action tensor([ 1.4002, -0.2266, -0.4453, -0.0245]) tensor([0.6269, 0.1232, 0.0990, 0.1508]) -Greedy action tensor([ 1.6365, -0.6399, -0.5529, 0.4182]) tensor([0.6621, 0.0680, 0.0741, 0.1958]) -Greedy action tensor([ 1.3357, -0.4078, -0.6333, 0.8895]) tensor([0.5116, 0.0895, 0.0714, 0.3275]) -Greedy action tensor([ 1.1219, -0.3610, -0.0639, -0.1162]) tensor([0.5487, 0.1246, 0.1676, 0.1591]) -Greedy action tensor([ 2.4310, 0.8692, 0.1280, -0.2693]) tensor([0.7263, 0.1523, 0.0726, 0.0488]) -Greedy action tensor([ 2.2787, -0.8335, -0.7799, 0.5091]) tensor([0.7925, 0.0353, 0.0372, 0.1350]) -Greedy action tensor([ 1.0784, -0.3037, -0.5845, 0.3783]) tensor([0.5162, 0.1296, 0.0979, 0.2563]) -Greedy action tensor([ 2.3316, 0.3021, -0.3881, 0.2542]) tensor([0.7561, 0.0994, 0.0498, 0.0947]) -Greedy action tensor([ 1.4153, -0.8078, -0.4776, 0.4642]) tensor([0.6078, 0.0658, 0.0916, 0.2348]) -Greedy action tensor([ 1.4726, -1.0412, -0.5314, 0.6971]) tensor([0.5966, 0.0483, 0.0804, 0.2747]) -Greedy action tensor([ 1.7896, -1.0691, -0.4951, 0.3315]) tensor([0.7185, 0.0412, 0.0731, 0.1672]) -Greedy action tensor([ 2.1549, -0.7318, -0.6070, 0.3003]) tensor([0.7840, 0.0437, 0.0495, 0.1227]) -Greedy action tensor([ 1.7537, -0.9656, -0.0890, 0.5601]) tensor([0.6547, 0.0432, 0.1037, 0.1984]) -Greedy action tensor([ 1.2314, -0.3692, -0.9680, 0.5142]) tensor([0.5553, 0.1121, 0.0616, 0.2711]) -Greedy action tensor([ 0.9225, -0.3182, -0.6496, 0.2586]) tensor([0.4971, 0.1438, 0.1032, 0.2559]) -Greedy action tensor([ 1.9466, -0.6960, -0.3376, 0.5409]) tensor([0.7051, 0.0502, 0.0718, 0.1729]) -Greedy action tensor([ 1.3362, -0.1904, -0.2992, 0.3663]) tensor([0.5583, 0.1213, 0.1088, 0.2116]) -Greedy action tensor([ 1.3374, -0.7840, -0.3279, 0.8247]) tensor([0.5241, 0.0628, 0.0991, 0.3139]) -Greedy action tensor([ 1.5098, -0.7954, -0.7015, 0.1895]) tensor([0.6774, 0.0676, 0.0742, 0.1809]) -Greedy action tensor([ 1.5009, -0.3910, -0.5046, 0.6203]) tensor([0.5883, 0.0887, 0.0792, 0.2439]) -Greedy action tensor([ 0.7919, -0.2251, -0.2508, 0.3145]) tensor([0.4283, 0.1549, 0.1510, 0.2657]) -Greedy action tensor([ 1.0298, -0.3500, -0.7431, 0.2472]) tensor([0.5323, 0.1339, 0.0904, 0.2434]) -Greedy action tensor([ 1.0543, -0.4077, 0.1294, -0.0121]) tensor([0.5069, 0.1175, 0.2010, 0.1745]) -Greedy action tensor([ 1.3464, -0.4075, -0.3006, 0.2374]) tensor([0.5898, 0.1021, 0.1136, 0.1945]) -Greedy action tensor([ 1.9235, -0.8794, -0.4638, 0.5387]) tensor([0.7128, 0.0432, 0.0655, 0.1785]) -Greedy action tensor([ 1.2648, -0.9355, -0.2451, 0.0640]) tensor([0.6125, 0.0678, 0.1353, 0.1843]) -Greedy action tensor([ 1.9688, -0.1501, -0.1252, -0.1209]) tensor([0.7315, 0.0879, 0.0901, 0.0905]) -Greedy action tensor([ 1.8808, -0.8530, -0.3462, 0.6891]) tensor([0.6773, 0.0440, 0.0730, 0.2057]) -Greedy action tensor([ 2.1331e+00, -4.8526e-02, -2.3742e-02, -6.0827e-04]) tensor([0.7424, 0.0838, 0.0859, 0.0879]) -Greedy action tensor([ 1.8141, -1.1322, 0.0230, 0.0509]) tensor([0.7190, 0.0378, 0.1199, 0.1233]) -Greedy action tensor([ 1.4249, -0.3934, -0.9514, 0.6734]) tensor([0.5791, 0.0940, 0.0538, 0.2731]) -Greedy action tensor([ 0.9436, -0.6773, -0.2906, 0.2025]) tensor([0.5088, 0.1006, 0.1481, 0.2425]) -Greedy action tensor([ 1.4108, -0.3047, -0.2403, 0.2353]) tensor([0.5951, 0.1070, 0.1142, 0.1837]) -Greedy action tensor([ 1.1768, -0.2245, -0.3488, -0.0582]) tensor([0.5699, 0.1404, 0.1240, 0.1658]) -Greedy action tensor([ 1.7085, -0.7184, -0.9503, 0.5728]) tensor([0.6759, 0.0597, 0.0473, 0.2171]) -Greedy action tensor([ 1.6838, -0.4271, -0.5187, 0.3060]) tensor([0.6739, 0.0816, 0.0745, 0.1699]) -Greedy action tensor([ 1.3086, 0.0671, -0.4924, 0.2871]) tensor([0.5512, 0.1593, 0.0910, 0.1985]) -Greedy action tensor([ 1.1181, -0.3079, -0.6797, -0.0434]) tensor([0.5818, 0.1398, 0.0964, 0.1821]) -Greedy action tensor([ 1.8225, -0.6444, -0.4724, 0.1454]) tensor([0.7286, 0.0618, 0.0734, 0.1362]) -Greedy action tensor([ 1.0754, -0.2353, -1.0747, 0.0436]) tensor([0.5739, 0.1547, 0.0668, 0.2045]) -Greedy action tensor([ 1.4779, -0.7201, -0.5325, 0.3355]) tensor([0.6394, 0.0710, 0.0856, 0.2040]) -Greedy action tensor([ 2.2019, 0.0321, -0.0130, 0.5774]) tensor([0.7040, 0.0804, 0.0769, 0.1387]) -Greedy action tensor([ 1.5893, -0.6896, 0.0983, 0.9567]) tensor([0.5380, 0.0551, 0.1211, 0.2858]) -Greedy action tensor([ 1.5256, -0.4326, -0.4553, 0.3698]) tensor([0.6274, 0.0885, 0.0866, 0.1975]) -Greedy action tensor([ 1.6900, -0.9553, -0.3657, 0.6059]) tensor([0.6505, 0.0462, 0.0833, 0.2200]) -Greedy action tensor([ 1.3585, -0.4081, -0.6717, 0.1925]) tensor([0.6196, 0.1059, 0.0814, 0.1931]) -Greedy action tensor([ 1.8778, -1.0720, -0.6986, 0.4901]) tensor([0.7257, 0.0380, 0.0552, 0.1812]) -Greedy action tensor([ 1.5170, -0.6597, -0.4443, -0.0701]) tensor([0.6856, 0.0778, 0.0964, 0.1402]) -Greedy action tensor([ 1.6699, -0.9136, -0.0247, 0.4153]) tensor([0.6475, 0.0489, 0.1189, 0.1847]) -Greedy action tensor([ 1.5164, -0.4033, -0.5365, 0.6125]) tensor([0.5952, 0.0873, 0.0764, 0.2411]) -Greedy action tensor([ 1.5228, -0.6574, -0.4698, -0.0023]) tensor([0.6817, 0.0770, 0.0929, 0.1483]) -Greedy action tensor([ 1.3560, -0.3162, 0.0432, 0.0421]) tensor([0.5795, 0.1089, 0.1559, 0.1557]) -Greedy action tensor([ 1.2410, -0.7298, -0.2616, 0.3717]) tensor([0.5614, 0.0782, 0.1249, 0.2354]) -Greedy action tensor([ 1.1157, -0.3622, -0.1775, 0.1560]) tensor([0.5304, 0.1210, 0.1455, 0.2031]) -Greedy action tensor([ 1.5829, -0.8829, -0.5833, 0.6174]) tensor([0.6328, 0.0538, 0.0725, 0.2410]) -Greedy action tensor([ 1.4204, -0.5827, -0.6745, 0.3327]) tensor([0.6270, 0.0846, 0.0772, 0.2113]) -Greedy action tensor([ 1.5119, -0.1600, -0.8969, 0.6559]) tensor([0.5873, 0.1103, 0.0528, 0.2495]) -Greedy action tensor([ 1.7195, -1.0733, -0.1803, 0.1840]) tensor([0.7012, 0.0429, 0.1049, 0.1510]) -Greedy action tensor([ 2.3784, -1.2330, 0.0970, 0.9871]) tensor([0.7257, 0.0196, 0.0741, 0.1805]) -Greedy action tensor([ 0.7069, -0.3146, 0.2690, -0.0079]) tensor([0.4009, 0.1443, 0.2587, 0.1961]) -Greedy action tensor([ 1.5385, -0.0834, -0.8784, 0.2759]) tensor([0.6371, 0.1258, 0.0568, 0.1802]) -Greedy action tensor([ 1.8226, -1.3373, -0.5396, -0.0431]) tensor([0.7743, 0.0329, 0.0730, 0.1199]) -Greedy action tensor([ 2.0158, -0.9551, -0.1860, 0.5254]) tensor([0.7209, 0.0370, 0.0797, 0.1624]) -Greedy action tensor([ 1.4865, 0.4328, -0.6139, 0.5071]) tensor([0.5415, 0.1888, 0.0663, 0.2034]) -Greedy action tensor([ 1.5367, 0.3503, -0.5106, 0.4550]) tensor([0.5639, 0.1722, 0.0728, 0.1912]) -Greedy action tensor([ 1.1043, -0.4585, -0.1117, 0.0951]) tensor([0.5346, 0.1120, 0.1585, 0.1949]) -Greedy action tensor([ 1.7227, -0.9609, -0.3431, 0.0068]) tensor([0.7274, 0.0497, 0.0922, 0.1308]) -Greedy action tensor([ 1.5344, 0.4510, -0.5258, 0.0041]) tensor([0.5944, 0.2012, 0.0757, 0.1287]) -Greedy action tensor([ 1.3501, -0.1259, -0.4020, 0.1834]) tensor([0.5837, 0.1334, 0.1012, 0.1817]) -Greedy action tensor([ 0.9520, 0.1260, -1.0898, 0.4692]) tensor([0.4577, 0.2004, 0.0594, 0.2825]) -Greedy action tensor([ 1.4084, -0.5054, -0.4330, 0.1232]) tensor([0.6318, 0.0932, 0.1002, 0.1748]) -Greedy action tensor([ 1.9908, -0.6561, -0.7088, 0.0027]) tensor([0.7843, 0.0556, 0.0527, 0.1074]) -Greedy action tensor([ 1.1662, -0.3806, -0.5567, 0.0497]) tensor([0.5818, 0.1239, 0.1039, 0.1905]) -Greedy action tensor([ 1.3229, -0.4281, -0.3224, 0.3526]) tensor([0.5729, 0.0995, 0.1105, 0.2171]) -Greedy action tensor([ 1.5392, -0.5577, -0.3841, 0.1689]) tensor([0.6566, 0.0807, 0.0959, 0.1668]) -Greedy action tensor([ 2.1077, -0.7107, -0.4354, 0.6289]) tensor([0.7319, 0.0437, 0.0575, 0.1668]) -Greedy action tensor([ 1.5789, -0.8672, -0.1129, 0.0856]) tensor([0.6687, 0.0579, 0.1232, 0.1502]) -Greedy action tensor([ 1.2082, -0.6164, -0.0403, 0.1505]) tensor([0.5570, 0.0898, 0.1598, 0.1934]) -Greedy action tensor([ 1.9974, -0.6914, -0.6018, 0.2832]) tensor([0.7562, 0.0514, 0.0562, 0.1362]) -Greedy action tensor([ 1.4934, -0.5192, -0.2038, 0.0274]) tensor([0.6461, 0.0864, 0.1184, 0.1492]) -Greedy action tensor([ 7.4864e-01, -1.0015e-01, 3.2320e-02, -4.8683e-04]) tensor([0.4185, 0.1791, 0.2045, 0.1979]) -Greedy action tensor([ 0.3271, -0.1320, -0.0049, -0.2289]) tensor([0.3421, 0.2162, 0.2455, 0.1962]) -Greedy action tensor([ 0.9712, -0.6013, -0.0924, -0.5895]) tensor([0.5673, 0.1177, 0.1958, 0.1191]) -Greedy action tensor([ 1.0618, -0.6042, 0.0278, -0.3319]) tensor([0.5578, 0.1054, 0.1983, 0.1384]) -Greedy action tensor([ 0.6623, -0.3613, -0.0356, -0.1224]) tensor([0.4323, 0.1553, 0.2151, 0.1972]) -Greedy action tensor([ 1.1762, -0.5338, -0.0820, -0.4112]) tensor([0.5990, 0.1083, 0.1702, 0.1225]) -Greedy action tensor([ 0.8163, -0.0565, 0.0395, -0.1670]) tensor([0.4441, 0.1855, 0.2042, 0.1661]) -Greedy action tensor([ 0.6788, -0.6555, -0.0798, -0.2245]) tensor([0.4680, 0.1232, 0.2192, 0.1896]) -Greedy action tensor([ 0.5843, -0.5048, -0.0201, -0.3600]) tensor([0.4402, 0.1481, 0.2405, 0.1712]) -Greedy action tensor([ 0.6005, -0.3941, 0.1078, -0.2870]) tensor([0.4180, 0.1546, 0.2554, 0.1721]) -Greedy action tensor([ 0.5414, -0.4331, 0.0350, -0.0802]) tensor([0.3973, 0.1499, 0.2394, 0.2134]) -Greedy action tensor([ 1.0445, -0.6998, -0.1252, -0.5561]) tensor([0.5928, 0.1036, 0.1840, 0.1196]) -Greedy action tensor([ 1.0788, -0.6545, -0.1096, -0.6117]) tensor([0.6003, 0.1061, 0.1829, 0.1107]) -Greedy action tensor([ 1.0736, -0.5391, -0.0916, -0.4771]) tensor([0.5803, 0.1157, 0.1810, 0.1231]) -Greedy action tensor([ 0.7366, -0.3969, -0.0214, -0.2242]) tensor([0.4602, 0.1481, 0.2156, 0.1761]) -Greedy action tensor([ 0.4747, -0.1677, -0.0388, -0.1137]) tensor([0.3732, 0.1963, 0.2233, 0.2072]) -Greedy action tensor([ 0.9428, -0.7903, 0.1124, -0.6720]) tensor([0.5520, 0.0976, 0.2406, 0.1098]) -Greedy action tensor([ 0.6414, -0.3913, -0.0191, -0.4549]) tensor([0.4532, 0.1613, 0.2341, 0.1514]) -Greedy action tensor([ 0.7132, -0.4553, -0.0233, -0.0988]) tensor([0.4477, 0.1392, 0.2144, 0.1988]) -Greedy action tensor([ 1.0022, -0.6866, -0.0421, -0.3719]) tensor([0.5587, 0.1032, 0.1966, 0.1414]) -Greedy action tensor([ 0.7307, -0.1667, 0.1448, -0.0617]) tensor([0.4137, 0.1687, 0.2303, 0.1873]) -Greedy action tensor([ 0.5255, -0.5542, -0.0060, -0.5006]) tensor([0.4375, 0.1486, 0.2571, 0.1568]) -Greedy action tensor([ 0.3253, -0.1567, -0.0568, 0.0160]) tensor([0.3296, 0.2036, 0.2249, 0.2419]) -Greedy action tensor([ 0.7221, -0.4260, -0.0426, -0.2677]) tensor([0.4642, 0.1473, 0.2161, 0.1725]) -Greedy action tensor([ 0.9070, -0.2277, 0.1277, -0.3718]) tensor([0.4858, 0.1562, 0.2228, 0.1352]) -Greedy action tensor([ 1.0223, -0.6276, 0.0295, -0.4430]) tensor([0.5575, 0.1071, 0.2066, 0.1288]) -Greedy action tensor([ 0.7834, -0.4475, -0.2004, -0.2569]) tensor([0.4952, 0.1446, 0.1852, 0.1750]) -Greedy action tensor([ 0.9927, -0.6256, 0.0586, -0.4343]) tensor([0.5461, 0.1083, 0.2146, 0.1311]) -Greedy action tensor([ 1.2207, -0.7045, -0.0366, -0.3103]) tensor([0.6073, 0.0886, 0.1727, 0.1314]) -Greedy action tensor([ 0.4824, -0.0571, -0.1850, -0.2678]) tensor([0.3894, 0.2270, 0.1998, 0.1839]) -Greedy action tensor([ 0.8859, -0.5322, -0.1596, -0.3391]) tensor([0.5298, 0.1283, 0.1862, 0.1556]) -Greedy action tensor([ 0.7832, -0.5782, -0.0668, -0.5202]) tensor([0.5114, 0.1311, 0.2186, 0.1389]) -Greedy action tensor([ 0.7712, -0.4186, -0.0169, -0.2414]) tensor([0.4712, 0.1434, 0.2143, 0.1712]) -Greedy action tensor([ 1.1372, -0.6994, -0.1483, -0.3684]) tensor([0.6032, 0.0961, 0.1668, 0.1338]) -Greedy action tensor([ 1.0239, -0.5853, -0.0974, -0.5648]) tensor([0.5780, 0.1156, 0.1883, 0.1180]) -Greedy action tensor([ 0.8436, -0.5385, 0.1068, -0.3139]) tensor([0.4893, 0.1228, 0.2342, 0.1538]) -Greedy action tensor([ 0.9160, -0.5222, 0.0573, -0.2037]) tensor([0.5032, 0.1194, 0.2132, 0.1642]) -Greedy action tensor([ 0.7848, -0.2103, -0.0378, -0.0268]) tensor([0.4438, 0.1641, 0.1950, 0.1971]) -Greedy action tensor([ 0.6908, -0.6106, -0.0726, -0.3474]) tensor([0.4779, 0.1301, 0.2228, 0.1692]) -Greedy action tensor([ 0.4780, -0.2959, 0.1200, -0.3422]) tensor([0.3845, 0.1773, 0.2688, 0.1693]) -Greedy action tensor([ 1.1266, -0.6768, -0.0158, -0.8766]) tensor([0.6178, 0.1018, 0.1971, 0.0833]) -Greedy action tensor([ 1.1081, -0.9780, 0.0669, -0.7085]) tensor([0.6098, 0.0757, 0.2153, 0.0991]) -Greedy action tensor([ 0.6643, -0.4893, -0.0417, -0.2366]) tensor([0.4514, 0.1424, 0.2228, 0.1834]) -Greedy action tensor([ 0.5382, 0.0589, -0.0399, 0.0926]) tensor([0.3545, 0.2195, 0.1989, 0.2270]) -Greedy action tensor([ 1.1114, -0.8445, 0.1300, -0.7862]) tensor([0.6002, 0.0849, 0.2249, 0.0900]) -Greedy action tensor([ 0.5216, -0.4345, -0.0589, -0.1573]) tensor([0.4080, 0.1568, 0.2283, 0.2069]) -Greedy action tensor([ 0.6192, -0.6193, -0.1288, -0.1349]) tensor([0.4477, 0.1298, 0.2119, 0.2106]) -Greedy action tensor([ 1.4626, -0.9933, -0.2057, -0.5923]) tensor([0.7130, 0.0612, 0.1345, 0.0913]) -Greedy action tensor([ 0.3934, 0.1482, -0.2423, -0.2823]) tensor([0.3545, 0.2774, 0.1877, 0.1804]) -Greedy action tensor([ 0.9487, -0.5968, 0.0864, -0.5063]) tensor([0.5351, 0.1141, 0.2259, 0.1249]) -Greedy action tensor([ 0.6368, -0.0388, 0.0009, -0.0748]) tensor([0.3954, 0.2012, 0.2093, 0.1941]) -Greedy action tensor([ 0.8847, -0.4545, -0.1212, -0.4963]) tensor([0.5322, 0.1395, 0.1946, 0.1338]) -Greedy action tensor([ 0.6508, -0.0629, -0.0099, -0.1165]) tensor([0.4048, 0.1983, 0.2091, 0.1879]) -Greedy action tensor([ 0.9150, -0.6227, 0.0355, -0.4798]) tensor([0.5326, 0.1144, 0.2210, 0.1320]) -Greedy action tensor([ 0.7702, 0.0270, -0.0267, 0.0802]) tensor([0.4119, 0.1959, 0.1856, 0.2066]) -Greedy action tensor([ 0.8928, -0.4046, -0.0542, -0.2157]) tensor([0.5022, 0.1372, 0.1948, 0.1657]) -Greedy action tensor([ 0.5349, -0.3300, -0.1781, -0.0716]) tensor([0.4071, 0.1714, 0.1995, 0.2220]) -Greedy action tensor([ 0.7525, -0.2783, -0.1324, -0.2042]) tensor([0.4643, 0.1656, 0.1917, 0.1784]) -Greedy action tensor([ 0.6990, -0.5732, -0.0129, -0.3212]) tensor([0.4692, 0.1315, 0.2302, 0.1691]) -Greedy action tensor([ 0.7229, -0.5711, 0.3100, -0.2210]) tensor([0.4301, 0.1179, 0.2846, 0.1674]) -Greedy action tensor([ 1.0938, -0.6428, 0.0208, -0.6299]) tensor([0.5894, 0.1038, 0.2016, 0.1052]) -Greedy action tensor([ 0.9527, -0.3922, -0.2000, -0.2846]) tensor([0.5358, 0.1396, 0.1692, 0.1555]) -Greedy action tensor([ 0.9563, -0.4889, 0.0015, -0.1538]) tensor([0.5128, 0.1209, 0.1974, 0.1690]) -Greedy action tensor([ 0.7022, -0.4287, -0.1927, -0.2290]) tensor([0.4705, 0.1518, 0.1923, 0.1854]) -Greedy action tensor([ 0.5911, -0.6077, -0.0745, -0.2991]) tensor([0.4492, 0.1355, 0.2309, 0.1844]) -Greedy action tensor([ 0.9042, -0.6388, -0.0020, -0.2973]) tensor([0.5212, 0.1114, 0.2106, 0.1568]) -Greedy action tensor([ 0.7438, -0.4369, 0.0596, -0.1815]) tensor([0.4529, 0.1391, 0.2285, 0.1795]) -Greedy action tensor([ 0.5676, -0.6319, 0.1000, -0.6946]) tensor([0.4523, 0.1363, 0.2834, 0.1280]) -Greedy action tensor([ 1.1544, -0.5602, 0.1564, -0.2941]) tensor([0.5607, 0.1009, 0.2067, 0.1317]) -Greedy action tensor([ 0.8429, -0.8026, -0.0592, -0.3481]) tensor([0.5256, 0.1014, 0.2132, 0.1597]) -Greedy action tensor([ 0.7362, -0.3214, 0.0287, -0.1133]) tensor([0.4410, 0.1531, 0.2173, 0.1886]) -Greedy action tensor([ 0.6914, -0.4294, -0.0271, -0.5047]) tensor([0.4726, 0.1541, 0.2304, 0.1429]) -Greedy action tensor([ 0.8209, -0.2154, -0.0793, -0.1699]) tensor([0.4689, 0.1664, 0.1906, 0.1741]) -Greedy action tensor([ 0.3993, -0.1760, -0.0888, -0.1250]) tensor([0.3612, 0.2032, 0.2217, 0.2138]) -Greedy action tensor([ 1.0169, -0.8502, 0.1385, -0.4553]) tensor([0.5557, 0.0859, 0.2309, 0.1275]) -Greedy action tensor([ 0.8049, -0.2888, -0.1038, 0.0118]) tensor([0.4565, 0.1529, 0.1840, 0.2066]) -Greedy action tensor([ 0.3099, 0.3136, -0.0938, -0.0111]) tensor([0.2944, 0.2955, 0.1966, 0.2136]) -Greedy action tensor([ 1.0519, -0.5234, -0.2749, -0.5691]) tensor([0.5988, 0.1239, 0.1589, 0.1184]) -Greedy action tensor([ 0.2331, 0.0069, -0.0413, -0.0233]) tensor([0.3002, 0.2394, 0.2281, 0.2323]) -Greedy action tensor([ 1.1221, -0.6417, 0.0209, -0.4870]) tensor([0.5869, 0.1006, 0.1951, 0.1174]) -Greedy action tensor([ 0.9321, -0.7649, 0.0266, -0.5797]) tensor([0.5531, 0.1013, 0.2236, 0.1220]) -Greedy action tensor([-0.9981, 0.8086, 0.2143, -0.0978]) tensor([0.0774, 0.4717, 0.2603, 0.1905]) -Greedy action tensor([-1.9362, -0.4415, 0.6618, -0.1758]) tensor([0.0405, 0.1804, 0.5438, 0.2353]) -Greedy action tensor([-1.6673, -0.2249, 0.4835, -0.0538]) tensor([0.0531, 0.2245, 0.4560, 0.2664]) -Greedy action tensor([-1.8601, -0.4270, 0.6232, -0.1422]) tensor([0.0440, 0.1843, 0.5267, 0.2450]) -Greedy action tensor([-1.8809, -0.4592, 0.6301, -0.1333]) tensor([0.0431, 0.1786, 0.5309, 0.2474]) -Greedy action tensor([-1.8529, -0.4162, 0.6720, -0.0855]) tensor([0.0425, 0.1786, 0.5303, 0.2486]) -Greedy action tensor([-1.8671, -0.3638, 0.6152, -0.1505]) tensor([0.0434, 0.1952, 0.5197, 0.2417]) -Greedy action tensor([-1.8773, -0.4227, 0.6431, -0.1165]) tensor([0.0425, 0.1820, 0.5283, 0.2472]) -Greedy action tensor([-1.1398, -0.3060, 0.3095, -0.1234]) tensor([0.0968, 0.2229, 0.4126, 0.2676]) -Greedy action tensor([-0.9833, 0.6640, 0.1590, -0.1079]) tensor([0.0853, 0.4428, 0.2673, 0.2046]) -Greedy action tensor([-1.6542, -0.4251, 0.5438, -0.0188]) tensor([0.0539, 0.1842, 0.4854, 0.2765]) -Greedy action tensor([-1.8866, -0.4233, 0.6373, -0.1521]) tensor([0.0426, 0.1841, 0.5318, 0.2415]) -Greedy action tensor([-1.9311, -0.3817, 0.6498, -0.1701]) tensor([0.0404, 0.1904, 0.5340, 0.2352]) -Greedy action tensor([-1.9042, -0.4504, 0.6443, -0.1640]) tensor([0.0421, 0.1801, 0.5381, 0.2398]) -Greedy action tensor([-0.6023, -0.4023, 0.1854, 0.2418]) tensor([0.1482, 0.1811, 0.3259, 0.3448]) -Greedy action tensor([-1.7989, -0.4235, 0.6171, -0.0938]) tensor([0.0462, 0.1827, 0.5171, 0.2540]) -Greedy action tensor([-1.5935, -0.3642, 0.4806, -0.0659]) tensor([0.0589, 0.2013, 0.4685, 0.2713]) -Greedy action tensor([-1.5233, -0.5906, 0.4543, 0.0748]) tensor([0.0637, 0.1618, 0.4599, 0.3147]) -Greedy action tensor([-1.8958, -0.3003, 0.6270, -0.1536]) tensor([0.0415, 0.2046, 0.5171, 0.2369]) -Greedy action tensor([-1.5902, -0.5294, 0.5054, 0.0216]) tensor([0.0587, 0.1696, 0.4774, 0.2943]) -Greedy action tensor([-1.3603, -0.3932, 0.3765, -0.0358]) tensor([0.0765, 0.2012, 0.4345, 0.2877]) -Greedy action tensor([-1.8912, -0.4403, 0.6394, -0.1559]) tensor([0.0426, 0.1816, 0.5346, 0.2413]) -Greedy action tensor([-1.6397, -0.4209, 0.5135, -0.0809]) tensor([0.0563, 0.1906, 0.4852, 0.2678]) -Greedy action tensor([-1.8841, -0.3815, 0.6374, -0.1492]) tensor([0.0424, 0.1903, 0.5272, 0.2401]) -Greedy action tensor([-1.6909, 0.2227, 0.4561, -0.0495]) tensor([0.0465, 0.3152, 0.3981, 0.2401]) -Greedy action tensor([-1.8889, -0.4505, 0.6444, -0.1530]) tensor([0.0426, 0.1794, 0.5363, 0.2416]) -Greedy action tensor([-1.8579, -0.3411, 0.6308, -0.1242]) tensor([0.0430, 0.1959, 0.5178, 0.2433]) -Greedy action tensor([-0.8484, 0.7283, 0.1787, -0.0933]) tensor([0.0929, 0.4497, 0.2596, 0.1978]) -Greedy action tensor([-1.9111, -0.3856, 0.6511, -0.1563]) tensor([0.0411, 0.1889, 0.5325, 0.2375]) -Greedy action tensor([-1.8329, -0.4683, 0.7258, 0.0349]) tensor([0.0411, 0.1610, 0.5315, 0.2663]) -Greedy action tensor([-1.9358, -0.4219, 0.6600, -0.1725]) tensor([0.0404, 0.1834, 0.5410, 0.2353]) -Greedy action tensor([-0.5485, 0.2929, 0.0929, -0.0344]) tensor([0.1451, 0.3366, 0.2756, 0.2427]) -Greedy action tensor([-1.8020, -0.3861, 0.6282, -0.0760]) tensor([0.0453, 0.1864, 0.5141, 0.2542]) -Greedy action tensor([-1.9168, -0.4244, 0.6564, -0.1701]) tensor([0.0412, 0.1831, 0.5396, 0.2361]) -Greedy action tensor([-1.8756, -0.4141, 0.6420, -0.1405]) tensor([0.0428, 0.1844, 0.5303, 0.2425]) -Greedy action tensor([-1.5116, -0.4777, 0.4792, 0.0844]) tensor([0.0622, 0.1750, 0.4557, 0.3070]) -Greedy action tensor([-1.3918, 0.4914, 0.2764, 0.2123]) tensor([0.0560, 0.3683, 0.2971, 0.2786]) -Greedy action tensor([-1.6849, 0.2362, 0.5674, -0.4930]) tensor([0.0485, 0.3310, 0.4609, 0.1596]) -Greedy action tensor([-1.7760, -0.3279, 0.6395, -0.0113]) tensor([0.0449, 0.1909, 0.5023, 0.2620]) -Greedy action tensor([-1.6742, -0.2508, 0.6002, 0.1129]) tensor([0.0480, 0.1992, 0.4664, 0.2865]) -Greedy action tensor([-1.5057, -0.2808, 0.4336, -0.0782]) tensor([0.0644, 0.2192, 0.4479, 0.2685]) -Greedy action tensor([ 0.6494, -0.0255, 0.7357, 1.3724]) tensor([0.2146, 0.1093, 0.2339, 0.4422]) -Greedy action tensor([1.0964, 1.3438, 0.1375, 0.8028]) tensor([0.2933, 0.3756, 0.1124, 0.2187]) -Greedy action tensor([-1.8749, -0.3614, 0.6330, -0.1363]) tensor([0.0425, 0.1932, 0.5223, 0.2420]) -Greedy action tensor([-1.9088, -0.4329, 0.6506, -0.1620]) tensor([0.0416, 0.1820, 0.5378, 0.2386]) -Greedy action tensor([-1.4361, -0.3245, 0.4547, -0.1004]) tensor([0.0691, 0.2101, 0.4579, 0.2628]) -Greedy action tensor([-1.7943, -0.2128, 0.5651, -0.1411]) tensor([0.0461, 0.2244, 0.4884, 0.2411]) -Greedy action tensor([-1.6930, 0.0315, 0.4984, -0.1045]) tensor([0.0489, 0.2743, 0.4375, 0.2394]) -Greedy action tensor([-1.9386, -0.4423, 0.6645, -0.1770]) tensor([0.0403, 0.1801, 0.5447, 0.2348]) -Greedy action tensor([-1.9106, -0.3664, 0.6446, -0.1634]) tensor([0.0412, 0.1928, 0.5299, 0.2362]) -Greedy action tensor([-1.8967, -0.3218, 0.6266, -0.1738]) tensor([0.0418, 0.2021, 0.5217, 0.2343]) -Greedy action tensor([-1.7914, -0.5349, 0.8563, 0.1244]) tensor([0.0393, 0.1382, 0.5554, 0.2671]) -Greedy action tensor([-1.9207, -0.4194, 0.6491, -0.1649]) tensor([0.0411, 0.1844, 0.5367, 0.2378]) -Greedy action tensor([-0.8370, 0.4713, 0.2375, -0.1872]) tensor([0.1048, 0.3877, 0.3069, 0.2007]) -Greedy action tensor([-1.7111, -0.3010, 0.5348, -0.1041]) tensor([0.0512, 0.2097, 0.4837, 0.2554]) -Greedy action tensor([-1.9125, -0.4358, 0.6473, -0.1637]) tensor([0.0416, 0.1820, 0.5376, 0.2389]) -Greedy action tensor([-1.9210, -0.4377, 0.6541, -0.1690]) tensor([0.0411, 0.1813, 0.5403, 0.2372]) -Greedy action tensor([-1.5359, -0.5215, 0.4479, 0.0313]) tensor([0.0632, 0.1743, 0.4595, 0.3030]) -Greedy action tensor([-1.9288, -0.7778, 0.7473, -0.0770]) tensor([0.0399, 0.1261, 0.5797, 0.2542]) -Greedy action tensor([-0.9755, 0.9073, 0.1483, 0.1217]) tensor([0.0733, 0.4817, 0.2255, 0.2196]) -Greedy action tensor([-1.9274, -0.4187, 0.6587, -0.1708]) tensor([0.0407, 0.1838, 0.5399, 0.2356]) -Greedy action tensor([-1.9247, -0.4387, 0.6577, -0.1690]) tensor([0.0409, 0.1809, 0.5414, 0.2368]) -Greedy action tensor([-1.8125, -0.4322, 0.6224, -0.0967]) tensor([0.0456, 0.1811, 0.5200, 0.2533]) -Greedy action tensor([-1.6077, 0.4576, 0.3477, -0.0372]) tensor([0.0482, 0.3799, 0.3403, 0.2316]) -Greedy action tensor([-1.8178, -0.4513, 0.6033, -0.1121]) tensor([0.0461, 0.1808, 0.5192, 0.2539]) -Greedy action tensor([-1.8861, -0.4327, 0.6534, -0.1429]) tensor([0.0423, 0.1808, 0.5355, 0.2415]) -Greedy action tensor([-1.4398, 0.0053, 0.3717, 0.0956]) tensor([0.0625, 0.2651, 0.3824, 0.2901]) -Greedy action tensor([-1.8987, -0.4727, 0.6511, -0.1490]) tensor([0.0422, 0.1755, 0.5398, 0.2425]) -Greedy action tensor([-1.3899, 0.6587, 0.2243, 0.1222]) tensor([0.0546, 0.4235, 0.2743, 0.2477]) -Greedy action tensor([-1.2418, 0.5271, 0.2641, -0.0019]) tensor([0.0674, 0.3955, 0.3040, 0.2330]) -Greedy action tensor([-1.4000, 0.0086, 0.4395, -0.2225]) tensor([0.0684, 0.2796, 0.4302, 0.2219]) -Greedy action tensor([-1.1795, 0.4299, 0.2900, -0.1137]) tensor([0.0755, 0.3773, 0.3281, 0.2191]) -Greedy action tensor([-1.6113, 0.0554, 0.4174, -0.0302]) tensor([0.0533, 0.2822, 0.4054, 0.2591]) -Greedy action tensor([-1.3454, -0.2013, 0.5836, 0.6569]) tensor([0.0543, 0.1704, 0.3735, 0.4019]) -Greedy action tensor([-1.8924, -0.3500, 0.6302, -0.1526]) tensor([0.0420, 0.1962, 0.5228, 0.2390]) -Greedy action tensor([-1.8275, -0.4575, 0.6047, -0.1123]) tensor([0.0457, 0.1799, 0.5204, 0.2540]) -Greedy action tensor([-1.8561, -0.4141, 0.6294, -0.1288]) tensor([0.0437, 0.1850, 0.5252, 0.2461]) -Greedy action tensor([-1.9199, -0.4427, 0.6560, -0.1685]) tensor([0.0412, 0.1804, 0.5412, 0.2373]) -Greedy action tensor([-1.9192, -0.4403, 0.6526, -0.1674]) tensor([0.0413, 0.1810, 0.5399, 0.2378]) -Greedy action tensor([-1.8848, -0.3692, 0.6372, -0.1453]) tensor([0.0422, 0.1921, 0.5255, 0.2403]) -Greedy action tensor([-1.8249, -0.4386, 0.6028, -0.1267]) tensor([0.0459, 0.1835, 0.5199, 0.2507]) -Greedy action tensor([ 0.2822, -1.5682, -0.3981, 0.7029]) tensor([0.3138, 0.0493, 0.1589, 0.4779]) -Greedy action tensor([-0.6767, -0.6211, -0.7916, 0.7514]) tensor([0.1405, 0.1485, 0.1252, 0.5858]) -Greedy action tensor([ 0.0369, -1.2271, -0.0246, -1.2221]) tensor([0.3989, 0.1127, 0.3751, 0.1133]) -Greedy action tensor([-0.6638, 0.4813, -0.3319, -0.8914]) tensor([0.1579, 0.4963, 0.2201, 0.1258]) -Greedy action tensor([ 0.8179, -0.2379, -0.6607, 1.5662]) tensor([0.2710, 0.0943, 0.0618, 0.5729]) -Greedy action tensor([-0.3639, 0.1030, -0.2215, -0.3336]) tensor([0.2093, 0.3338, 0.2413, 0.2157]) -Greedy action tensor([ 0.3287, -0.6616, 0.8144, -0.0840]) tensor([0.2733, 0.1015, 0.4443, 0.1809]) -Greedy action tensor([-1.4686, -0.7338, -0.3315, -0.9010]) tensor([0.1255, 0.2617, 0.3913, 0.2214]) -Greedy action tensor([ 0.9602, -0.2009, 0.1748, -0.1676]) tensor([0.4778, 0.1496, 0.2179, 0.1547]) -Greedy action tensor([-0.5944, -0.1469, 0.5326, -0.8949]) tensor([0.1565, 0.2448, 0.4829, 0.1159]) -Greedy action tensor([ 0.5904, -1.2986, -0.0125, -0.8041]) tensor([0.5138, 0.0777, 0.2811, 0.1274]) -Greedy action tensor([-0.8543, -0.1301, 0.2782, -0.8540]) tensor([0.1395, 0.2879, 0.4330, 0.1396]) -Greedy action tensor([ 0.3005, -0.1292, 0.1186, -0.0977]) tensor([0.3169, 0.2062, 0.2642, 0.2128]) -Greedy action tensor([-0.3887, -1.2497, -1.1075, -0.5713]) tensor([0.3645, 0.1541, 0.1777, 0.3037]) -Greedy action tensor([-0.6757, -0.1880, 0.3690, -0.3748]) tensor([0.1466, 0.2387, 0.4167, 0.1980]) -Greedy action tensor([-0.5212, -0.7242, 0.7039, -1.4261]) tensor([0.1778, 0.1451, 0.6052, 0.0719]) -Greedy action tensor([-0.6229, -1.3679, 0.3260, -1.4400]) tensor([0.2223, 0.1055, 0.5740, 0.0982]) -Greedy action tensor([ 0.7672, -0.4362, -0.1410, 0.7405]) tensor([0.3735, 0.1121, 0.1506, 0.3637]) -Greedy action tensor([ 0.6566, -0.2805, -0.1476, -0.5490]) tensor([0.4676, 0.1832, 0.2092, 0.1400]) -Greedy action tensor([-0.5032, -0.6889, -0.7752, 0.1794]) tensor([0.2188, 0.1817, 0.1667, 0.4329]) -Greedy action tensor([ 0.8483, -0.1289, -1.2313, 0.2426]) tensor([0.4885, 0.1839, 0.0611, 0.2666]) -Greedy action tensor([ 0.0698, -0.8484, 0.5493, -0.8575]) tensor([0.2933, 0.1171, 0.4737, 0.1160]) -Greedy action tensor([-0.6746, -1.3335, 0.1227, -0.8468]) tensor([0.2184, 0.1130, 0.4847, 0.1839]) -Greedy action tensor([-0.0341, -0.5020, -0.4520, -0.3438]) tensor([0.3313, 0.2075, 0.2181, 0.2431]) -Greedy action tensor([-0.1712, -0.7801, -0.4406, -0.7798]) tensor([0.3506, 0.1907, 0.2678, 0.1908]) -Greedy action tensor([-0.4101, -0.8458, 0.1582, -0.8555]) tensor([0.2468, 0.1596, 0.4356, 0.1581]) -Greedy action tensor([-1.5198, -0.4627, -0.6815, 0.4070]) tensor([0.0766, 0.2204, 0.1771, 0.5259]) -Greedy action tensor([-0.1546, 0.1747, -0.3529, -0.6642]) tensor([0.2624, 0.3648, 0.2152, 0.1576]) -Greedy action tensor([ 0.1935, 0.5804, -0.3284, -0.4368]) tensor([0.2779, 0.4092, 0.1649, 0.1480]) -Greedy action tensor([-0.9577, -0.8746, -0.0218, 0.4932]) tensor([0.1123, 0.1220, 0.2864, 0.4793]) -Greedy action tensor([-0.4401, 0.0327, -0.3720, 0.2959]) tensor([0.1735, 0.2784, 0.1858, 0.3623]) -Greedy action tensor([-0.3876, -0.9811, -0.0030, -0.7490]) tensor([0.2689, 0.1486, 0.3951, 0.1874]) -Greedy action tensor([-0.0068, -0.0714, 0.7940, -0.4251]) tensor([0.2073, 0.1944, 0.4618, 0.1365]) -Greedy action tensor([-0.2732, -0.7557, 0.6997, -0.3571]) tensor([0.1930, 0.1191, 0.5105, 0.1774]) -Greedy action tensor([ 0.2877, -2.4889, -0.3245, -0.2728]) tensor([0.4597, 0.0286, 0.2492, 0.2624]) -Greedy action tensor([-1.5762, -2.2906, 0.4330, -0.1384]) tensor([0.0760, 0.0372, 0.5668, 0.3200]) -Greedy action tensor([-0.0856, -1.1773, 0.2249, -0.9560]) tensor([0.3206, 0.1076, 0.4374, 0.1343]) -Greedy action tensor([ 0.6449, -0.8363, -0.1387, -0.4846]) tensor([0.4982, 0.1133, 0.2276, 0.1610]) -Greedy action tensor([ 0.0124, -1.0464, 0.2901, -0.9612]) tensor([0.3284, 0.1139, 0.4336, 0.1241]) -Greedy action tensor([ 0.4100, -0.1030, -0.4742, 0.5733]) tensor([0.3136, 0.1877, 0.1295, 0.3692]) -Greedy action tensor([-0.0167, 0.0447, -0.1500, -0.8434]) tensor([0.2962, 0.3150, 0.2592, 0.1296]) -Greedy action tensor([-0.1407, -0.9836, -1.5380, -0.4898]) tensor([0.4196, 0.1806, 0.1038, 0.2960]) -Greedy action tensor([ 0.7755, -2.0232, -0.2809, 0.4000]) tensor([0.4772, 0.0291, 0.1659, 0.3278]) -Greedy action tensor([ 1.2924, -0.8697, -0.5708, 0.8845]) tensor([0.5167, 0.0595, 0.0802, 0.3436]) -Greedy action tensor([ 1.5893, -0.8779, 0.2847, 0.4460]) tensor([0.5971, 0.0506, 0.1620, 0.1903]) -Greedy action tensor([ 0.3125, -0.6107, -0.8982, 0.9095]) tensor([0.2848, 0.1131, 0.0848, 0.5173]) -Greedy action tensor([-0.0429, -1.0408, -0.1278, -0.4771]) tensor([0.3407, 0.1256, 0.3130, 0.2207]) -Greedy action tensor([ 0.1747, -0.4540, -0.9900, -0.9658]) tensor([0.4619, 0.2463, 0.1441, 0.1476]) -Greedy action tensor([ 0.0131, 0.6808, -0.6862, -0.3515]) tensor([0.2415, 0.4708, 0.1200, 0.1677]) -Greedy action tensor([ 1.2216, 0.4030, 0.0450, -0.0508]) tensor([0.4927, 0.2173, 0.1519, 0.1380]) -Greedy action tensor([ 1.2009, 0.0663, -0.1421, -0.4274]) tensor([0.5621, 0.1808, 0.1468, 0.1103]) -Greedy action tensor([ 1.1866, 0.4639, 0.2615, -0.0936]) tensor([0.4630, 0.2248, 0.1836, 0.1287]) -Greedy action tensor([ 0.8504, -0.4894, 0.0446, 0.6975]) tensor([0.3896, 0.1020, 0.1740, 0.3343]) -Greedy action tensor([-0.4318, -1.3298, 0.5260, -0.5174]) tensor([0.2028, 0.0826, 0.5284, 0.1862]) -Greedy action tensor([ 0.1441, -0.9211, -0.4663, 0.2517]) tensor([0.3332, 0.1148, 0.1810, 0.3710]) -Greedy action tensor([-0.0855, 0.4483, -0.1832, -0.5244]) tensor([0.2349, 0.4006, 0.2130, 0.1515]) -Greedy action tensor([-0.2453, -0.2610, -0.2322, -0.8324]) tensor([0.2814, 0.2770, 0.2851, 0.1564]) -Greedy action tensor([-0.1208, 0.1119, -0.3105, -0.8379]) tensor([0.2795, 0.3528, 0.2312, 0.1365]) -Greedy action tensor([-0.9038, -1.9146, -0.7651, 0.1439]) tensor([0.1864, 0.0678, 0.2142, 0.5315]) -Greedy action tensor([ 0.2865, -0.4351, -0.2448, 0.5921]) tensor([0.2914, 0.1416, 0.1713, 0.3956]) -Greedy action tensor([-0.8215, -0.9899, -0.2708, -0.8815]) tensor([0.2212, 0.1869, 0.3836, 0.2083]) -Greedy action tensor([-0.0299, 0.0285, -0.0291, 0.0181]) tensor([0.2433, 0.2579, 0.2435, 0.2553]) -Greedy action tensor([-0.5960, -0.9910, -0.6132, 0.0116]) tensor([0.2226, 0.1500, 0.2188, 0.4087]) -Greedy action tensor([ 0.9552, -0.9233, -0.5834, 1.6422]) tensor([0.2980, 0.0455, 0.0640, 0.5924]) -Greedy action tensor([ 0.1496, -0.8088, 0.7787, -0.7345]) tensor([0.2723, 0.1044, 0.5108, 0.1125]) -Greedy action tensor([ 0.4864, 0.0589, -0.2375, 0.0873]) tensor([0.3561, 0.2323, 0.1727, 0.2389]) -Greedy action tensor([ 0.2186, -1.2118, 0.2861, 0.4637]) tensor([0.2788, 0.0667, 0.2983, 0.3562]) -Greedy action tensor([-0.4265, 0.3740, 0.5005, -0.5705]) tensor([0.1511, 0.3364, 0.3817, 0.1308]) -Greedy action tensor([-0.4377, -0.2906, -0.0219, -0.3541]) tensor([0.2100, 0.2433, 0.3183, 0.2283]) -Greedy action tensor([ 0.1138, -1.5147, -1.6383, -0.3014]) tensor([0.4926, 0.0967, 0.0854, 0.3253]) -Greedy action tensor([-0.7999, 0.1436, 0.0810, -0.7443]) tensor([0.1421, 0.3649, 0.3428, 0.1502]) -Greedy action tensor([ 0.3018, -0.5363, 1.8847, -0.8750]) tensor([0.1513, 0.0654, 0.7366, 0.0466]) -Greedy action tensor([-0.0079, -0.5971, -0.1891, 0.0742]) tensor([0.2878, 0.1597, 0.2401, 0.3124]) -Greedy action tensor([-1.3786, -0.7184, -0.0307, -0.4006]) tensor([0.1059, 0.2049, 0.4076, 0.2816]) -Greedy action tensor([ 0.9560, -0.7024, 0.3557, 0.3818]) tensor([0.4343, 0.0827, 0.2383, 0.2446]) -Greedy action tensor([ 1.3234, -1.5829, -0.1047, 0.6957]) tensor([0.5470, 0.0299, 0.1311, 0.2920]) -Greedy action tensor([-1.0597, 0.0121, -0.6677, -0.3624]) tensor([0.1350, 0.3942, 0.1998, 0.2711]) -Greedy action tensor([ 1.4160, -0.9234, 1.5482, 0.1553]) tensor([0.3966, 0.0382, 0.4527, 0.1124]) -Greedy action tensor([-1.1505, -0.9555, 0.4756, -1.0145]) tensor([0.1184, 0.1439, 0.6020, 0.1357]) -Greedy action tensor([ 0.5279, -0.6064, 0.9487, -0.1282]) tensor([0.2973, 0.0956, 0.4528, 0.1542]) -Greedy action tensor([-1.0535, -0.7596, -1.3378, -0.6354]) tensor([0.2168, 0.2908, 0.1631, 0.3293]) -Greedy action tensor([ 1.1029, -0.5899, -0.8094, 0.3040]) tensor([0.5613, 0.1033, 0.0829, 0.2525]) -Greedy action tensor([ 1.4249, -0.0922, -0.8954, 0.2735]) tensor([0.6121, 0.1343, 0.0601, 0.1935]) -Greedy action tensor([ 1.4838, -0.6630, -0.5191, 0.5323]) tensor([0.6105, 0.0713, 0.0824, 0.2358]) -Greedy action tensor([ 1.2272, -0.5354, -0.3875, 0.4215]) tensor([0.5503, 0.0944, 0.1095, 0.2458]) -Greedy action tensor([ 1.4994, -0.3853, -0.7032, -0.0358]) tensor([0.6767, 0.1028, 0.0748, 0.1458]) -Greedy action tensor([ 2.1608, -1.0409, -0.0807, 0.9853]) tensor([0.6870, 0.0280, 0.0730, 0.2121]) -Greedy action tensor([ 1.5735, -0.6530, -0.3934, 0.7669]) tensor([0.5903, 0.0637, 0.0826, 0.2635]) -Greedy action tensor([ 1.6152, -1.3869, -0.4370, 0.4662]) tensor([0.6689, 0.0332, 0.0859, 0.2120]) -Greedy action tensor([ 1.4302, -0.1934, -1.0028, 0.6649]) tensor([0.5714, 0.1127, 0.0502, 0.2658]) -Greedy action tensor([ 1.4422, -0.6799, -0.3286, 0.2266]) tensor([0.6303, 0.0755, 0.1073, 0.1869]) -Greedy action tensor([ 1.6809, -0.4749, -0.6251, 0.3759]) tensor([0.6727, 0.0779, 0.0670, 0.1824]) -Greedy action tensor([ 1.3354, -0.6418, -0.4123, 0.6148]) tensor([0.5558, 0.0770, 0.0968, 0.2704]) -Greedy action tensor([ 1.0400, -0.3851, -0.5695, 0.4869]) tensor([0.4961, 0.1193, 0.0992, 0.2854]) -Greedy action tensor([ 1.5020, -0.2802, -0.7340, 0.1217]) tensor([0.6550, 0.1102, 0.0700, 0.1647]) -Greedy action tensor([ 1.4850, -0.3537, -1.0574, 0.1062]) tensor([0.6713, 0.1068, 0.0528, 0.1691]) -Greedy action tensor([ 1.3347, -0.6145, -0.2880, 0.7771]) tensor([0.5229, 0.0745, 0.1032, 0.2994]) -Greedy action tensor([ 2.3563, -1.8050, -0.0720, 0.3946]) tensor([0.8036, 0.0125, 0.0709, 0.1130]) -Greedy action tensor([ 1.4912, -0.6462, -0.8278, 0.2371]) tensor([0.6659, 0.0786, 0.0655, 0.1900]) -Greedy action tensor([ 1.9316, 0.8833, -0.1225, 0.3336]) tensor([0.5949, 0.2085, 0.0763, 0.1203]) -Greedy action tensor([ 1.4049, -0.2151, -0.6063, 0.3977]) tensor([0.5893, 0.1166, 0.0789, 0.2152]) -Greedy action tensor([ 1.9672, -0.6306, -0.0591, 0.2287]) tensor([0.7236, 0.0539, 0.0954, 0.1272]) -Greedy action tensor([ 2.1497, -0.6635, -0.4857, 0.0521]) tensor([0.7972, 0.0478, 0.0571, 0.0979]) -Greedy action tensor([ 1.4493, 0.2116, -0.8576, 0.4304]) tensor([0.5712, 0.1657, 0.0569, 0.2062]) -Greedy action tensor([ 1.3934, -0.2728, -0.2709, 0.1546]) tensor([0.5995, 0.1133, 0.1135, 0.1737]) -Greedy action tensor([ 1.4191, -0.7447, -0.2050, 0.4373]) tensor([0.5929, 0.0681, 0.1169, 0.2221]) -Greedy action tensor([ 1.2177, -0.3496, -0.3012, 0.1996]) tensor([0.5590, 0.1166, 0.1224, 0.2020]) -Greedy action tensor([ 0.8860, -0.1262, -0.3212, 0.1264]) tensor([0.4694, 0.1706, 0.1404, 0.2196]) -Greedy action tensor([ 1.4940, -0.1976, -0.8384, 0.3624]) tensor([0.6235, 0.1149, 0.0605, 0.2011]) -Greedy action tensor([ 1.1117, -0.4774, -0.5657, 0.3005]) tensor([0.5449, 0.1112, 0.1018, 0.2421]) -Greedy action tensor([ 0.9712, -0.4719, -0.9299, 0.0696]) tensor([0.5582, 0.1318, 0.0834, 0.2266]) -Greedy action tensor([ 2.2949, -1.1588, -0.0616, 0.4814]) tensor([0.7755, 0.0245, 0.0735, 0.1265]) -Greedy action tensor([ 1.1208, 0.0158, -0.2630, -0.1551]) tensor([0.5373, 0.1780, 0.1347, 0.1500]) -Greedy action tensor([ 1.4706, -0.1714, -1.3205, 0.2108]) tensor([0.6499, 0.1258, 0.0399, 0.1844]) -Greedy action tensor([ 1.1307, -0.3614, -0.2289, 0.0993]) tensor([0.5440, 0.1224, 0.1397, 0.1939]) -Greedy action tensor([ 1.3786, -0.3156, -0.4924, 0.4112]) tensor([0.5822, 0.1070, 0.0896, 0.2212]) -Greedy action tensor([ 1.3651, -0.6494, 0.2160, 0.1777]) tensor([0.5697, 0.0760, 0.1806, 0.1738]) -Greedy action tensor([ 2.1196, -0.6136, -0.4157, 0.3209]) tensor([0.7635, 0.0496, 0.0605, 0.1264]) -Greedy action tensor([ 1.1797, -0.2631, -0.5297, 0.0895]) tensor([0.5703, 0.1347, 0.1032, 0.1917]) -Greedy action tensor([ 1.4612, -0.2367, -0.5919, 0.4434]) tensor([0.5978, 0.1094, 0.0767, 0.2160]) -Greedy action tensor([ 1.0103, -0.2040, -0.1805, 0.2234]) tensor([0.4863, 0.1444, 0.1478, 0.2214]) -Greedy action tensor([ 1.6983, -0.6532, -0.3864, 0.1050]) tensor([0.7028, 0.0669, 0.0874, 0.1428]) -Greedy action tensor([ 1.1754, 0.4397, -0.2431, -0.1314]) tensor([0.5020, 0.2406, 0.1215, 0.1359]) -Greedy action tensor([ 1.9127, -1.2553, -0.5086, 0.4801]) tensor([0.7302, 0.0307, 0.0648, 0.1743]) -Greedy action tensor([ 2.1494, -1.5374, -0.1075, 0.3254]) tensor([0.7745, 0.0194, 0.0811, 0.1250]) -Greedy action tensor([ 1.0811, -0.3047, -0.2761, 0.4145]) tensor([0.4948, 0.1238, 0.1274, 0.2541]) -Greedy action tensor([ 2.2550, -1.5320, 0.0617, 0.5390]) tensor([0.7610, 0.0172, 0.0849, 0.1368]) -Greedy action tensor([ 1.3323, 0.1858, -0.5970, 0.1312]) tensor([0.5669, 0.1801, 0.0823, 0.1706]) -Greedy action tensor([ 0.8026, -0.3651, -0.4132, 0.1985]) tensor([0.4642, 0.1444, 0.1376, 0.2537]) -Greedy action tensor([ 1.7105, -0.3173, -0.5175, 0.8951]) tensor([0.5946, 0.0783, 0.0641, 0.2631]) -Greedy action tensor([ 1.3945, -0.0275, -0.8292, 0.3203]) tensor([0.5914, 0.1427, 0.0640, 0.2020]) -Greedy action tensor([ 1.8161, -0.5772, -0.3187, 0.6238]) tensor([0.6609, 0.0604, 0.0782, 0.2006]) -Greedy action tensor([ 2.0507, -0.9927, -0.3168, 0.3757]) tensor([0.7526, 0.0359, 0.0705, 0.1410]) -Greedy action tensor([ 1.5058, -0.1669, -1.0934, 0.1301]) tensor([0.6602, 0.1239, 0.0491, 0.1668]) -Greedy action tensor([ 1.0649, -0.4922, -0.5859, 0.0554]) tensor([0.5659, 0.1193, 0.1086, 0.2062]) -Greedy action tensor([ 1.4933, 0.1422, -0.6003, -0.0126]) tensor([0.6234, 0.1614, 0.0768, 0.1383]) -Greedy action tensor([ 1.1813, -0.2221, -0.7732, 0.0442]) tensor([0.5854, 0.1439, 0.0829, 0.1878]) -Greedy action tensor([ 1.6438, 0.0821, -0.2222, 0.4506]) tensor([0.5996, 0.1258, 0.0928, 0.1818]) -Greedy action tensor([ 2.3005, -0.9747, -0.4935, 1.1074]) tensor([0.7131, 0.0270, 0.0436, 0.2163]) -Greedy action tensor([ 1.7532, -0.6954, -0.9695, 0.1977]) tensor([0.7336, 0.0634, 0.0482, 0.1548]) -Greedy action tensor([ 1.4046, -0.3095, -0.8964, 0.1555]) tensor([0.6381, 0.1149, 0.0639, 0.1830]) -Greedy action tensor([ 1.4095, -0.7800, -0.3056, 0.5901]) tensor([0.5772, 0.0646, 0.1039, 0.2543]) -Greedy action tensor([ 1.4574, -0.4606, -0.7457, 0.0158]) tensor([0.6694, 0.0983, 0.0739, 0.1583]) -Greedy action tensor([ 2.2585, -0.8787, -0.9408, 0.5377]) tensor([0.7917, 0.0344, 0.0323, 0.1416]) -Greedy action tensor([ 1.5768, -0.2957, -0.6877, 0.0873]) tensor([0.6743, 0.1037, 0.0700, 0.1520]) -Greedy action tensor([ 0.9518, -0.2813, -0.6229, 0.3920]) tensor([0.4832, 0.1408, 0.1000, 0.2760]) -Greedy action tensor([ 1.5137, -0.5912, -0.2842, 0.3642]) tensor([0.6233, 0.0760, 0.1033, 0.1975]) -Greedy action tensor([ 2.6781, -0.9705, -0.6122, 0.6910]) tensor([0.8331, 0.0217, 0.0310, 0.1142]) -Greedy action tensor([ 1.4798, -0.2659, -0.7516, 0.6433]) tensor([0.5830, 0.1018, 0.0626, 0.2526]) -Greedy action tensor([ 1.3532, -0.6224, -0.5661, 0.6857]) tensor([0.5561, 0.0771, 0.0816, 0.2852]) -Greedy action tensor([ 2.2897, 0.3893, 0.1060, -0.0030]) tensor([0.7336, 0.1097, 0.0826, 0.0741]) -Greedy action tensor([ 1.4683, -0.6899, -0.1054, 0.4770]) tensor([0.5903, 0.0682, 0.1224, 0.2191]) -Greedy action tensor([ 1.2867, -0.0857, -0.9475, -0.0715]) tensor([0.6182, 0.1567, 0.0662, 0.1589]) -Greedy action tensor([ 1.4616, -0.9969, -0.4460, 0.8879]) tensor([0.5563, 0.0476, 0.0826, 0.3135]) -Greedy action tensor([ 1.1671, 0.2277, -0.2395, 0.4174]) tensor([0.4743, 0.1854, 0.1162, 0.2241]) -Greedy action tensor([ 1.7297, -0.1196, -0.5707, 0.2504]) tensor([0.6732, 0.1059, 0.0675, 0.1534]) -Greedy action tensor([ 1.6038, -0.8094, -0.1131, 0.5473]) tensor([0.6185, 0.0554, 0.1111, 0.2150]) -Greedy action tensor([ 1.7948, -1.0029, 0.0119, 0.5536]) tensor([0.6587, 0.0401, 0.1108, 0.1904]) -Greedy action tensor([ 1.7813, 0.4453, -0.0818, 0.2659]) tensor([0.6106, 0.1605, 0.0948, 0.1342]) -Greedy action tensor([ 1.3341, 0.1245, -0.8124, 0.0905]) tensor([0.5870, 0.1751, 0.0686, 0.1693]) -Greedy action tensor([ 1.3854, -0.0118, -0.4673, 0.5787]) tensor([0.5404, 0.1336, 0.0847, 0.2412]) -Greedy action tensor([ 1.5791, -0.4831, -0.3038, 0.2324]) tensor([0.6496, 0.0826, 0.0988, 0.1690]) -Greedy action tensor([ 0.4812, 0.1830, -0.0990, 0.1818]) tensor([0.3286, 0.2439, 0.1839, 0.2436]) -Greedy action tensor([ 0.7849, -0.5205, -0.0239, -0.2352]) tensor([0.4815, 0.1305, 0.2144, 0.1736]) -Greedy action tensor([ 0.2028, 0.3695, -0.2606, 0.1202]) tensor([0.2680, 0.3166, 0.1686, 0.2467]) -Greedy action tensor([ 0.8198, -0.4358, -0.1417, -0.2419]) tensor([0.4967, 0.1415, 0.1899, 0.1718]) -Greedy action tensor([ 1.1187, -0.7634, -0.1913, -0.6746]) tensor([0.6295, 0.0959, 0.1699, 0.1048]) -Greedy action tensor([ 0.4282, 0.0334, -0.0816, -0.1134]) tensor([0.3501, 0.2359, 0.2103, 0.2037]) -Greedy action tensor([ 0.8221, -0.2636, 0.0365, -0.0978]) tensor([0.4562, 0.1540, 0.2080, 0.1818]) -Greedy action tensor([ 0.8191, -0.4894, 0.0936, -0.1480]) tensor([0.4685, 0.1266, 0.2268, 0.1781]) -Greedy action tensor([ 0.9488, -0.5622, 0.0233, -0.3871]) tensor([0.5319, 0.1174, 0.2108, 0.1399]) -Greedy action tensor([ 0.9842, -0.2404, 0.1148, -0.1618]) tensor([0.4924, 0.1447, 0.2064, 0.1565]) -Greedy action tensor([ 0.6011, -0.5749, -0.2415, -0.2213]) tensor([0.4590, 0.1416, 0.1977, 0.2017]) -Greedy action tensor([ 0.8090, -0.5733, 0.0291, -0.4637]) tensor([0.5026, 0.1262, 0.2304, 0.1408]) -Greedy action tensor([ 0.8932, -0.3489, 0.2097, -0.1247]) tensor([0.4640, 0.1340, 0.2343, 0.1677]) -Greedy action tensor([ 1.0946, -0.6639, -0.0820, -0.3684]) tensor([0.5841, 0.1006, 0.1801, 0.1352]) -Greedy action tensor([ 0.2897, -0.3470, -0.0860, -0.2868]) tensor([0.3600, 0.1905, 0.2473, 0.2023]) -Greedy action tensor([ 0.8797, -0.5366, 0.0770, -0.3869]) tensor([0.5070, 0.1230, 0.2272, 0.1429]) -Greedy action tensor([ 0.6700, -0.3904, -0.2093, -0.2411]) tensor([0.4622, 0.1601, 0.1919, 0.1859]) -Greedy action tensor([ 0.5159, -0.2115, -0.0072, -0.1101]) tensor([0.3831, 0.1851, 0.2270, 0.2048]) -Greedy action tensor([ 1.2082, -0.6335, -0.2086, -0.5992]) tensor([0.6389, 0.1013, 0.1549, 0.1048]) -Greedy action tensor([ 0.8438, -0.6836, 0.0170, -0.2381]) tensor([0.5016, 0.1089, 0.2194, 0.1700]) -Greedy action tensor([ 1.0813, -0.9307, 0.0144, -0.6044]) tensor([0.6013, 0.0804, 0.2069, 0.1114]) -Greedy action tensor([ 1.1361, -1.0628, 0.0587, -0.6343]) tensor([0.6167, 0.0684, 0.2100, 0.1050]) -Greedy action tensor([ 0.9545, -0.8062, -0.0313, -0.5782]) tensor([0.5678, 0.0976, 0.2119, 0.1226]) -Greedy action tensor([ 0.5943, -0.2967, -0.1804, -0.1149]) tensor([0.4232, 0.1736, 0.1950, 0.2082]) -Greedy action tensor([ 0.7492, -0.5892, 0.1177, -0.2754]) tensor([0.4645, 0.1218, 0.2470, 0.1667]) -Greedy action tensor([ 0.8656, -0.5389, -0.0325, -0.3586]) tensor([0.5136, 0.1261, 0.2092, 0.1510]) -Greedy action tensor([ 1.0981, -0.5928, -0.0034, -0.5453]) tensor([0.5848, 0.1078, 0.1944, 0.1130]) -Greedy action tensor([ 0.7100, -0.4412, -0.0489, -0.1676]) tensor([0.4545, 0.1437, 0.2128, 0.1890]) -Greedy action tensor([ 0.6765, 0.0834, -0.0603, 0.0510]) tensor([0.3897, 0.2153, 0.1865, 0.2085]) -Greedy action tensor([0.0389, 0.6053, 0.0282, 0.1455]) tensor([0.2056, 0.3623, 0.2034, 0.2287]) -Greedy action tensor([ 0.7449, -0.3686, 0.1414, -0.1998]) tensor([0.4417, 0.1450, 0.2416, 0.1717]) -Greedy action tensor([ 0.9169, -0.7052, -0.0677, -0.7034]) tensor([0.5653, 0.1116, 0.2112, 0.1118]) -Greedy action tensor([ 0.5924, -0.2627, -0.0845, -0.1062]) tensor([0.4114, 0.1750, 0.2091, 0.2046]) -Greedy action tensor([ 0.8424, -0.5759, -0.0663, -0.2852]) tensor([0.5079, 0.1230, 0.2047, 0.1644]) -Greedy action tensor([ 1.0869, -0.5969, 0.1183, -0.4555]) tensor([0.5621, 0.1044, 0.2134, 0.1202]) -Greedy action tensor([ 0.6961, -0.6837, 0.1432, -0.3269]) tensor([0.4574, 0.1151, 0.2631, 0.1644]) -Greedy action tensor([ 0.9851, -0.1049, -0.0903, -0.2918]) tensor([0.5112, 0.1719, 0.1744, 0.1426]) -Greedy action tensor([ 0.6524, -0.2799, 0.1109, -0.2406]) tensor([0.4193, 0.1650, 0.2440, 0.1717]) -Greedy action tensor([ 1.0378, -0.4851, -0.0643, -0.7292]) tensor([0.5810, 0.1267, 0.1930, 0.0993]) -Greedy action tensor([ 0.4834, -0.0772, 0.0529, -0.2257]) tensor([0.3686, 0.2104, 0.2396, 0.1814]) -Greedy action tensor([ 0.5047, -0.1059, -0.1109, -0.1312]) tensor([0.3827, 0.2078, 0.2068, 0.2026]) -Greedy action tensor([ 0.9811, -0.8693, 0.0542, -0.5780]) tensor([0.5671, 0.0891, 0.2244, 0.1193]) -Greedy action tensor([ 0.7826, -0.3450, -0.2009, -0.3590]) tensor([0.4957, 0.1605, 0.1854, 0.1583]) -Greedy action tensor([ 0.3298, 0.2084, -0.1447, -0.1521]) tensor([0.3200, 0.2834, 0.1991, 0.1976]) -Greedy action tensor([ 0.7095, -0.3899, -0.0550, -0.1057]) tensor([0.4462, 0.1486, 0.2077, 0.1975]) -Greedy action tensor([ 0.2473, 0.3188, -0.3251, 0.2796]) tensor([0.2724, 0.2926, 0.1537, 0.2813]) -Greedy action tensor([ 0.5455, -0.5048, -0.0526, -0.1646]) tensor([0.4182, 0.1463, 0.2299, 0.2056]) -Greedy action tensor([ 0.3307, -0.1143, -0.0801, -0.0463]) tensor([0.3345, 0.2143, 0.2218, 0.2294]) -Greedy action tensor([ 1.0756, -0.5451, -0.0946, -0.6406]) tensor([0.5925, 0.1172, 0.1838, 0.1065]) -Greedy action tensor([ 0.0382, 0.3811, -0.1003, -0.1237]) tensor([0.2421, 0.3412, 0.2108, 0.2059]) -Greedy action tensor([ 0.4147, -0.2232, 0.0119, -0.0699]) tensor([0.3555, 0.1879, 0.2376, 0.2190]) -Greedy action tensor([ 0.4535, 0.1257, -0.1093, -0.4061]) tensor([0.3685, 0.2655, 0.2099, 0.1560]) -Greedy action tensor([ 0.4332, -0.1614, -0.0059, -0.1477]) tensor([0.3629, 0.2002, 0.2339, 0.2030]) -Greedy action tensor([ 1.1038, -0.5876, -0.0194, -0.3229]) tensor([0.5716, 0.1053, 0.1859, 0.1372]) -Greedy action tensor([ 0.8544, -0.3828, 0.1614, -0.5075]) tensor([0.4886, 0.1418, 0.2444, 0.1252]) -Greedy action tensor([ 0.6703, 0.0038, -0.0392, 0.0712]) tensor([0.3914, 0.2010, 0.1926, 0.2150]) -Greedy action tensor([ 0.5944, -0.2001, 0.0666, -0.2618]) tensor([0.4054, 0.1832, 0.2392, 0.1722]) -Greedy action tensor([ 0.6109, -0.2237, -0.0881, -0.2382]) tensor([0.4239, 0.1840, 0.2107, 0.1814]) -Greedy action tensor([ 0.8240, -0.3259, -0.0129, -0.1358]) tensor([0.4689, 0.1485, 0.2031, 0.1796]) -Greedy action tensor([ 0.7425, -0.3650, 0.0498, -0.1198]) tensor([0.4439, 0.1467, 0.2220, 0.1874]) -Greedy action tensor([ 0.8468, -0.6686, -0.0306, -0.2141]) tensor([0.5046, 0.1109, 0.2098, 0.1747]) -Greedy action tensor([ 0.5070, -0.3281, -0.0458, -0.1554]) tensor([0.3961, 0.1718, 0.2279, 0.2042]) -Greedy action tensor([ 0.8623, -0.7658, 0.0016, -0.4489]) tensor([0.5295, 0.1039, 0.2239, 0.1427]) -Greedy action tensor([ 0.9117, -0.6725, 0.1793, -0.4048]) tensor([0.5118, 0.1050, 0.2460, 0.1372]) -Greedy action tensor([ 0.7929, -0.0889, -0.0444, -0.2180]) tensor([0.4523, 0.1873, 0.1958, 0.1646]) -Greedy action tensor([ 0.4292, -0.4158, -0.0491, -0.1237]) tensor([0.3810, 0.1637, 0.2362, 0.2192]) -Greedy action tensor([ 0.8030, -0.4295, -0.0326, -0.3925]) tensor([0.4932, 0.1438, 0.2138, 0.1492]) -Greedy action tensor([ 0.9826, -0.8171, -0.0443, -0.5064]) tensor([0.5717, 0.0945, 0.2048, 0.1290]) -Greedy action tensor([ 0.6735, -0.4712, -0.1396, -0.4419]) tensor([0.4786, 0.1523, 0.2122, 0.1569]) -Greedy action tensor([ 0.7008, -0.4235, 0.0297, -0.2654]) tensor([0.4512, 0.1466, 0.2306, 0.1717]) -Greedy action tensor([ 1.3473, -0.4687, -0.1251, -0.4507]) tensor([0.6420, 0.1044, 0.1472, 0.1063]) -Greedy action tensor([ 0.4080, -0.6067, -0.1355, -0.2080]) tensor([0.4027, 0.1460, 0.2338, 0.2175]) -Greedy action tensor([ 0.5452, -0.2996, -0.0853, -0.1097]) tensor([0.4030, 0.1731, 0.2145, 0.2094]) -Greedy action tensor([ 0.6466, -0.3753, -0.0599, -0.2090]) tensor([0.4389, 0.1580, 0.2165, 0.1866]) -Greedy action tensor([ 0.7930, -0.3424, -0.1334, -0.3136]) tensor([0.4883, 0.1569, 0.1934, 0.1615]) -Greedy action tensor([ 0.5790, -0.5038, -0.1170, -0.1624]) tensor([0.4322, 0.1464, 0.2155, 0.2059]) -Greedy action tensor([ 0.6109, -0.3649, 0.0300, -0.1658]) tensor([0.4173, 0.1573, 0.2335, 0.1919]) -Greedy action tensor([ 0.8428, -0.6069, 0.0306, -0.4729]) tensor([0.5137, 0.1205, 0.2280, 0.1378]) -Greedy action tensor([ 1.1863, -0.7210, 0.0623, -0.7869]) tensor([0.6202, 0.0921, 0.2015, 0.0862]) -Greedy action tensor([ 0.8330, -0.6045, 0.0602, -0.4462]) tensor([0.5057, 0.1201, 0.2335, 0.1407]) -Greedy action tensor([ 0.3919, -0.2854, -0.2140, -0.0823]) tensor([0.3737, 0.1898, 0.2039, 0.2326]) -Greedy action tensor([-1.7238, 0.8678, 0.3443, 0.1313]) tensor([0.0349, 0.4660, 0.2761, 0.2231]) -Greedy action tensor([-0.3481, 0.1706, 0.4791, 0.9450]) tensor([0.1161, 0.1951, 0.2656, 0.4232]) -Greedy action tensor([-1.9107, -0.4420, 0.6475, -0.1662]) tensor([0.0417, 0.1811, 0.5385, 0.2387]) -Greedy action tensor([-1.2972, -0.0823, 0.7196, 0.7381]) tensor([0.0512, 0.1725, 0.3846, 0.3918]) -Greedy action tensor([-0.4964, -0.3487, 0.1728, 0.1378]) tensor([0.1667, 0.1933, 0.3256, 0.3144]) -Greedy action tensor([-1.9125, -0.4060, 0.6463, -0.1631]) tensor([0.0414, 0.1865, 0.5343, 0.2378]) -Greedy action tensor([-1.6792, -0.4020, 0.5270, -0.0713]) tensor([0.0536, 0.1922, 0.4867, 0.2675]) -Greedy action tensor([-1.8203, -0.3785, 0.6148, -0.1019]) tensor([0.0450, 0.1903, 0.5138, 0.2509]) -Greedy action tensor([-1.7203, 0.1238, 0.4401, -0.1102]) tensor([0.0476, 0.3010, 0.4131, 0.2382]) -Greedy action tensor([-1.5399, -0.3730, 0.4839, -0.1047]) tensor([0.0626, 0.2010, 0.4736, 0.2629]) -Greedy action tensor([-1.8134, -0.3943, 0.6298, -0.0861]) tensor([0.0449, 0.1856, 0.5169, 0.2526]) -Greedy action tensor([-1.5285, -0.0220, 0.3970, -0.0361]) tensor([0.0595, 0.2682, 0.4078, 0.2645]) -Greedy action tensor([-1.8751, -0.4564, 0.6180, -0.1485]) tensor([0.0438, 0.1808, 0.5294, 0.2460]) -Greedy action tensor([-1.3798, -0.4949, 0.4366, -0.0963]) tensor([0.0759, 0.1838, 0.4665, 0.2738]) -Greedy action tensor([-1.8392, -0.3891, 0.5992, -0.1288]) tensor([0.0449, 0.1916, 0.5148, 0.2486]) -Greedy action tensor([-0.7748, -0.2842, 0.5472, 0.5911]) tensor([0.0971, 0.1585, 0.3640, 0.3804]) -Greedy action tensor([-1.2277, 0.6649, 0.1731, 0.2715]) tensor([0.0618, 0.4103, 0.2509, 0.2769]) -Greedy action tensor([-1.9466, -0.4498, 0.6680, -0.1814]) tensor([0.0400, 0.1789, 0.5471, 0.2340]) -Greedy action tensor([-1.9429, -0.4486, 0.6670, -0.1792]) tensor([0.0402, 0.1790, 0.5464, 0.2344]) -Greedy action tensor([-1.8682, -0.4649, 0.6374, -0.1337]) tensor([0.0435, 0.1770, 0.5330, 0.2465]) -Greedy action tensor([-1.8937, -0.3449, 0.6315, -0.1476]) tensor([0.0418, 0.1966, 0.5220, 0.2395]) -Greedy action tensor([-1.6978, -0.4785, 0.5479, -0.0600]) tensor([0.0527, 0.1784, 0.4979, 0.2711]) -Greedy action tensor([-1.9004, -0.4449, 0.6474, -0.1590]) tensor([0.0421, 0.1803, 0.5376, 0.2400]) -Greedy action tensor([-1.8313, -0.4127, 0.6162, -0.1155]) tensor([0.0449, 0.1857, 0.5195, 0.2499]) -Greedy action tensor([-1.7118, 0.0102, 0.4806, -0.0951]) tensor([0.0486, 0.2718, 0.4350, 0.2446]) -Greedy action tensor([-1.9482, -0.4529, 0.6684, -0.1834]) tensor([0.0400, 0.1785, 0.5478, 0.2337]) -Greedy action tensor([-1.9445, -0.4497, 0.6692, -0.1781]) tensor([0.0401, 0.1786, 0.5469, 0.2344]) -Greedy action tensor([-1.1892, 0.1281, 0.3972, 0.2553]) tensor([0.0722, 0.2694, 0.3525, 0.3059]) -Greedy action tensor([-1.9420, -0.4482, 0.6668, -0.1791]) tensor([0.0402, 0.1791, 0.5462, 0.2344]) -Greedy action tensor([-1.9337, -0.4379, 0.6601, -0.1755]) tensor([0.0406, 0.1811, 0.5429, 0.2354]) -Greedy action tensor([-1.8393, -0.5728, 0.8877, -0.0126]) tensor([0.0384, 0.1362, 0.5869, 0.2385]) -Greedy action tensor([-1.8525, -0.3542, 0.5959, -0.1212]) tensor([0.0441, 0.1972, 0.5099, 0.2489]) -Greedy action tensor([-0.7614, 0.8917, 0.0403, 0.2539]) tensor([0.0892, 0.4658, 0.1988, 0.2462]) -Greedy action tensor([-1.8016, 0.1788, 0.5982, -0.3494]) tensor([0.0425, 0.3078, 0.4682, 0.1815]) -Greedy action tensor([-1.9043, -0.2926, 0.6278, -0.1451]) tensor([0.0410, 0.2054, 0.5156, 0.2380]) -Greedy action tensor([-1.0199, 0.4570, 0.1687, -0.0199]) tensor([0.0879, 0.3848, 0.2884, 0.2389]) -Greedy action tensor([-1.8770, -0.4499, 0.6358, -0.1395]) tensor([0.0431, 0.1797, 0.5321, 0.2451]) -Greedy action tensor([-1.8259, -0.4686, 0.6019, -0.1001]) tensor([0.0458, 0.1779, 0.5190, 0.2572]) -Greedy action tensor([-1.8909, -0.3047, 0.6174, -0.1496]) tensor([0.0419, 0.2046, 0.5145, 0.2390]) -Greedy action tensor([-0.3048, 1.1040, 0.0322, 0.2968]) tensor([0.1202, 0.4919, 0.1684, 0.2194]) -Greedy action tensor([-1.9392, -0.4364, 0.6634, -0.1764]) tensor([0.0403, 0.1811, 0.5438, 0.2348]) -Greedy action tensor([-1.8838, -0.4432, 0.6391, -0.1523]) tensor([0.0429, 0.1810, 0.5341, 0.2421]) -Greedy action tensor([-1.9178, -0.4473, 0.6580, -0.1646]) tensor([0.0412, 0.1793, 0.5416, 0.2379]) -Greedy action tensor([-0.3466, 0.9901, -0.0280, 0.4569]) tensor([0.1188, 0.4523, 0.1634, 0.2654]) -Greedy action tensor([-1.6686, -0.3297, 0.5949, 0.0650]) tensor([0.0498, 0.1899, 0.4786, 0.2817]) -Greedy action tensor([-1.5964, 0.3587, 0.1497, -0.7528]) tensor([0.0620, 0.4382, 0.3555, 0.1442]) -Greedy action tensor([-1.9428, -0.4504, 0.6673, -0.1794]) tensor([0.0402, 0.1788, 0.5466, 0.2344]) -Greedy action tensor([-1.9091, -0.4427, 0.6509, -0.1627]) tensor([0.0417, 0.1805, 0.5389, 0.2389]) -Greedy action tensor([-1.9408, -0.4457, 0.6650, -0.1784]) tensor([0.0403, 0.1796, 0.5454, 0.2347]) -Greedy action tensor([-1.7306, 0.0355, 0.5140, -0.0966]) tensor([0.0467, 0.2731, 0.4408, 0.2394]) -Greedy action tensor([0.0472, 1.1626, 0.0019, 0.3188]) tensor([0.1583, 0.4828, 0.1513, 0.2076]) -Greedy action tensor([-1.5789, -0.1483, 0.5462, 0.0593]) tensor([0.0535, 0.2236, 0.4478, 0.2752]) -Greedy action tensor([-0.8364, -0.1011, 0.2525, -0.0860]) tensor([0.1223, 0.2552, 0.3634, 0.2591]) -Greedy action tensor([-1.1430, -0.5015, 0.2884, 0.2911]) tensor([0.0887, 0.1684, 0.3710, 0.3720]) -Greedy action tensor([-1.4071, -0.5616, 0.3769, -0.0164]) tensor([0.0752, 0.1751, 0.4476, 0.3021]) -Greedy action tensor([-1.9116, -0.4286, 0.6502, -0.1642]) tensor([0.0415, 0.1828, 0.5376, 0.2381]) -Greedy action tensor([0.9224, 0.5789, 0.5868, 1.4214]) tensor([0.2456, 0.1742, 0.1756, 0.4046]) -Greedy action tensor([-1.9112, -0.4033, 0.6541, -0.1615]) tensor([0.0412, 0.1861, 0.5357, 0.2370]) -Greedy action tensor([-1.8736, -0.3517, 0.6222, -0.1248]) tensor([0.0426, 0.1953, 0.5171, 0.2450]) -Greedy action tensor([-1.9242, -0.3824, 0.6506, -0.1631]) tensor([0.0406, 0.1898, 0.5332, 0.2363]) -Greedy action tensor([-1.7685, -0.3714, 0.5700, -0.0545]) tensor([0.0477, 0.1929, 0.4945, 0.2648]) -Greedy action tensor([-1.8263, -0.1542, 0.5957, -0.1060]) tensor([0.0431, 0.2297, 0.4862, 0.2410]) -Greedy action tensor([-1.8892, -0.3698, 0.6434, -0.1439]) tensor([0.0419, 0.1913, 0.5270, 0.2398]) -Greedy action tensor([-1.8781, -0.3843, 0.6448, -0.1438]) tensor([0.0424, 0.1889, 0.5285, 0.2402]) -Greedy action tensor([-1.0589e+00, 1.0202e-01, 1.4668e-01, -1.0462e-03]) tensor([0.0960, 0.3067, 0.3207, 0.2766]) -Greedy action tensor([-1.5901, -0.5755, 0.4808, 0.0329]) tensor([0.0597, 0.1646, 0.4733, 0.3024]) -Greedy action tensor([-1.9023, -0.3587, 0.6376, -0.1573]) tensor([0.0415, 0.1944, 0.5264, 0.2377]) -Greedy action tensor([-1.6343, -0.4604, 0.5733, 0.2221]) tensor([0.0507, 0.1640, 0.4609, 0.3244]) -Greedy action tensor([-1.8051, -0.3084, 0.6041, -0.0915]) tensor([0.0452, 0.2017, 0.5025, 0.2506]) -Greedy action tensor([-0.4884, 0.5441, 0.4481, 0.6097]) tensor([0.1069, 0.3001, 0.2726, 0.3204]) -Greedy action tensor([-1.8615, -0.4462, 0.6286, -0.1366]) tensor([0.0439, 0.1807, 0.5292, 0.2462]) -Greedy action tensor([-1.9081, -0.3865, 0.6437, -0.1619]) tensor([0.0414, 0.1897, 0.5314, 0.2375]) -Greedy action tensor([-1.1329, -0.3738, 0.3456, -0.0615]) tensor([0.0958, 0.2046, 0.4201, 0.2796]) -Greedy action tensor([-1.8455, -0.4528, 0.6206, -0.1360]) tensor([0.0448, 0.1803, 0.5274, 0.2475]) -Greedy action tensor([-1.6637, -0.5296, 0.5219, -0.0031]) tensor([0.0547, 0.1702, 0.4870, 0.2881]) -Greedy action tensor([-1.8814, -0.4582, 0.6429, -0.1485]) tensor([0.0429, 0.1782, 0.5360, 0.2429]) -Greedy action tensor([-1.9267, -0.4380, 0.6599, -0.1698]) tensor([0.0408, 0.1808, 0.5420, 0.2364]) -Greedy action tensor([-1.0872, 0.3066, 0.2568, -0.0849]) tensor([0.0863, 0.3478, 0.3309, 0.2351]) -Greedy action tensor([-1.9297, -0.4474, 0.6597, -0.1765]) tensor([0.0408, 0.1797, 0.5438, 0.2357]) -Greedy action tensor([-1.9345, -0.4332, 0.6627, -0.1713]) tensor([0.0404, 0.1813, 0.5426, 0.2356]) -Greedy action tensor([-1.1756, -0.5384, 0.3115, 0.2173]) tensor([0.0882, 0.1667, 0.3901, 0.3550]) -Greedy action tensor([2.0094, 0.7918, 0.3115, 0.2238]) tensor([0.6073, 0.1797, 0.1112, 0.1018]) -Greedy action tensor([ 1.3857, -0.4635, -0.2048, 0.0747]) tensor([0.6132, 0.0965, 0.1250, 0.1653]) -Greedy action tensor([ 1.5400, -0.1552, -0.1122, 0.0415]) tensor([0.6255, 0.1148, 0.1199, 0.1398]) -Greedy action tensor([ 1.1926, -0.7143, -0.1972, 0.0183]) tensor([0.5859, 0.0870, 0.1460, 0.1811]) -Greedy action tensor([ 1.1456, -0.1707, -0.1365, 0.1172]) tensor([0.5254, 0.1409, 0.1458, 0.1879]) -Greedy action tensor([ 0.8839, -0.0635, -0.9674, 0.2292]) tensor([0.4844, 0.1878, 0.0761, 0.2517]) -Greedy action tensor([ 2.3442, -1.1541, -0.3124, 0.9585]) tensor([0.7404, 0.0224, 0.0520, 0.1852]) -Greedy action tensor([ 2.1658, -0.9652, -0.2518, 0.4196]) tensor([0.7650, 0.0334, 0.0682, 0.1334]) -Greedy action tensor([ 1.4847, -0.6300, -0.1309, 0.4653]) tensor([0.5952, 0.0718, 0.1183, 0.2147]) -Greedy action tensor([ 1.7231, -1.4581, -0.0842, 0.0665]) tensor([0.7161, 0.0297, 0.1175, 0.1366]) -Greedy action tensor([ 1.6885, -0.6069, -0.4019, 0.1910]) tensor([0.6906, 0.0696, 0.0854, 0.1545]) -Greedy action tensor([ 0.7670, -0.1480, -0.0162, 0.1113]) tensor([0.4208, 0.1685, 0.1923, 0.2184]) -Greedy action tensor([ 1.4897, -0.5214, -0.8263, 0.0499]) tensor([0.6805, 0.0911, 0.0671, 0.1613]) -Greedy action tensor([ 1.1954, -0.4701, -0.3443, 0.2098]) tensor([0.5628, 0.1064, 0.1207, 0.2101]) -Greedy action tensor([ 0.9757, -0.0560, -0.4211, -0.2711]) tensor([0.5288, 0.1885, 0.1308, 0.1520]) -Greedy action tensor([ 1.5818, 0.4303, -0.6771, 0.1375]) tensor([0.6037, 0.1909, 0.0631, 0.1424]) -Greedy action tensor([ 1.5684, -0.3209, -0.7725, 0.1735]) tensor([0.6688, 0.1011, 0.0644, 0.1658]) -Greedy action tensor([ 1.7030, -0.2643, -0.7370, 0.2750]) tensor([0.6818, 0.0953, 0.0594, 0.1635]) -Greedy action tensor([ 1.0472, -0.4483, -0.3776, 0.5912]) tensor([0.4765, 0.1068, 0.1146, 0.3020]) -Greedy action tensor([ 1.5578, -0.3474, -0.4955, 0.3637]) tensor([0.6329, 0.0942, 0.0812, 0.1918]) -Greedy action tensor([ 1.2403, -0.2638, -0.4676, -0.0166]) tensor([0.5924, 0.1316, 0.1074, 0.1686]) -Greedy action tensor([ 1.0712, -0.1985, -0.3178, 0.0370]) tensor([0.5303, 0.1490, 0.1322, 0.1885]) -Greedy action tensor([ 1.3287, 0.5674, -0.9429, 0.1140]) tensor([0.5356, 0.2502, 0.0552, 0.1590]) -Greedy action tensor([ 1.1942, -0.1634, -0.2577, -0.2209]) tensor([0.5766, 0.1483, 0.1350, 0.1401]) -Greedy action tensor([ 1.5085, -0.6304, -0.4957, 0.2676]) tensor([0.6487, 0.0764, 0.0874, 0.1875]) -Greedy action tensor([ 1.6016, -0.3427, -0.4112, 0.3255]) tensor([0.6427, 0.0920, 0.0859, 0.1794]) -Greedy action tensor([ 1.4155, -0.4590, -0.3717, 0.3817]) tensor([0.5965, 0.0915, 0.0999, 0.2121]) -Greedy action tensor([ 1.2675, -0.6047, -0.2815, 0.5153]) tensor([0.5442, 0.0837, 0.1156, 0.2565]) -Greedy action tensor([ 1.0301, -0.1316, -0.8515, 0.2227]) tensor([0.5232, 0.1637, 0.0797, 0.2333]) -Greedy action tensor([ 1.8516, 0.0039, -0.4403, 0.4596]) tensor([0.6635, 0.1046, 0.0671, 0.1649]) -Greedy action tensor([ 1.5311, -0.2425, -0.7549, 0.4231]) tensor([0.6244, 0.1060, 0.0635, 0.2062]) -Greedy action tensor([ 1.9449, -1.1881, 0.3069, 0.1013]) tensor([0.7162, 0.0312, 0.1392, 0.1133]) -Greedy action tensor([ 1.3668, -0.2702, -0.1173, -0.0283]) tensor([0.5991, 0.1166, 0.1358, 0.1485]) -Greedy action tensor([ 1.7004, -0.2785, -0.6582, 0.3334]) tensor([0.6722, 0.0929, 0.0636, 0.1713]) -Greedy action tensor([ 1.6069, -0.8236, -0.3031, 0.4348]) tensor([0.6469, 0.0569, 0.0958, 0.2004]) -Greedy action tensor([ 1.4512, -0.3214, -0.4642, 0.4019]) tensor([0.5998, 0.1019, 0.0883, 0.2100]) -Greedy action tensor([ 1.2239, -0.2372, -0.4661, 0.3724]) tensor([0.5425, 0.1259, 0.1001, 0.2315]) -Greedy action tensor([ 1.0925, -0.6587, -0.3611, 0.1686]) tensor([0.5542, 0.0962, 0.1295, 0.2200]) -Greedy action tensor([ 2.0487, -0.7831, 0.1853, 0.8550]) tensor([0.6591, 0.0388, 0.1023, 0.1998]) -Greedy action tensor([ 1.6511, 0.1535, -1.1622, 0.4097]) tensor([0.6359, 0.1422, 0.0382, 0.1838]) -Greedy action tensor([ 0.7613, -0.1816, 0.0196, 0.0852]) tensor([0.4212, 0.1640, 0.2006, 0.2142]) -Greedy action tensor([ 1.4592, -0.7493, -0.1965, 0.6541]) tensor([0.5721, 0.0629, 0.1093, 0.2558]) -Greedy action tensor([ 1.2957, -0.7849, -0.1369, 0.1651]) tensor([0.5930, 0.0740, 0.1415, 0.1914]) -Greedy action tensor([ 1.5705, -0.7914, -0.3484, 0.1835]) tensor([0.6708, 0.0632, 0.0984, 0.1676]) -Greedy action tensor([ 1.3816, -0.6568, -0.4699, 0.4749]) tensor([0.5913, 0.0770, 0.0928, 0.2388]) -Greedy action tensor([ 1.9264, -1.1289, -0.4474, 0.0519]) tensor([0.7730, 0.0364, 0.0720, 0.1186]) -Greedy action tensor([ 1.7034, -0.8784, -0.1549, 0.3406]) tensor([0.6723, 0.0508, 0.1048, 0.1721]) -Greedy action tensor([ 2.0308, -0.8605, -0.3335, 0.2331]) tensor([0.7603, 0.0422, 0.0715, 0.1260]) -Greedy action tensor([ 1.6188, -0.3327, -0.3892, 0.2649]) tensor([0.6517, 0.0926, 0.0875, 0.1683]) -Greedy action tensor([ 1.1817, -0.1197, -0.5343, 0.1569]) tensor([0.5522, 0.1503, 0.0993, 0.1982]) -Greedy action tensor([ 1.4472, -1.2457, -0.0098, 0.5634]) tensor([0.5835, 0.0395, 0.1359, 0.2411]) -Greedy action tensor([ 1.3255, -0.2807, -0.6654, 0.2141]) tensor([0.6001, 0.1204, 0.0820, 0.1975]) -Greedy action tensor([ 2.4018, 0.7128, -0.0304, 0.1244]) tensor([0.7272, 0.1343, 0.0639, 0.0746]) -Greedy action tensor([ 1.3027, 0.1663, -0.8532, 0.0584]) tensor([0.5797, 0.1861, 0.0671, 0.1671]) -Greedy action tensor([ 1.3265, -0.2188, -0.2459, 0.1365]) tensor([0.5797, 0.1236, 0.1203, 0.1764]) -Greedy action tensor([ 1.5240, -0.8689, -0.4542, 0.6218]) tensor([0.6115, 0.0559, 0.0846, 0.2481]) -Greedy action tensor([ 1.5479, -0.2665, -0.7115, 0.4234]) tensor([0.6281, 0.1023, 0.0656, 0.2040]) -Greedy action tensor([ 0.9989, -0.0971, -0.3057, -0.2273]) tensor([0.5266, 0.1760, 0.1429, 0.1545]) -Greedy action tensor([ 2.4456, -1.6174, -0.0634, 1.0977]) tensor([0.7362, 0.0127, 0.0599, 0.1912]) -Greedy action tensor([ 1.1574, -0.3056, -0.3462, -0.0378]) tensor([0.5693, 0.1318, 0.1266, 0.1723]) -Greedy action tensor([ 1.4049, -0.1422, -0.8877, 0.3190]) tensor([0.6055, 0.1289, 0.0612, 0.2044]) -Greedy action tensor([ 1.6857, -1.0144, -0.6676, 0.2392]) tensor([0.7155, 0.0481, 0.0680, 0.1684]) -Greedy action tensor([ 1.1264, -0.6643, -0.4512, 0.3601]) tensor([0.5441, 0.0908, 0.1123, 0.2528]) -Greedy action tensor([ 1.6861, 0.3519, -1.1830, 0.2102]) tensor([0.6457, 0.1700, 0.0366, 0.1476]) -Greedy action tensor([ 1.4828, -0.5254, -0.5547, 0.3647]) tensor([0.6284, 0.0843, 0.0819, 0.2054]) -Greedy action tensor([ 2.3295, 0.3887, -0.0365, 0.4704]) tensor([0.7177, 0.1031, 0.0674, 0.1118]) -Greedy action tensor([ 1.1533, 0.0083, -0.7770, 0.1252]) tensor([0.5491, 0.1748, 0.0797, 0.1964]) -Greedy action tensor([ 1.7228, -0.9643, -0.1240, 0.2829]) tensor([0.6836, 0.0465, 0.1078, 0.1620]) -Greedy action tensor([ 1.9040, -0.7136, -0.7363, 0.4961]) tensor([0.7199, 0.0525, 0.0514, 0.1761]) -Greedy action tensor([ 1.6538, -0.9881, -0.4122, -0.2595]) tensor([0.7432, 0.0529, 0.0942, 0.1097]) -Greedy action tensor([ 1.9382, -0.4844, -0.8572, 0.0698]) tensor([0.7668, 0.0680, 0.0468, 0.1184]) -Greedy action tensor([ 2.3437, -0.6400, -0.4629, 0.5072]) tensor([0.7872, 0.0398, 0.0476, 0.1255]) -Greedy action tensor([ 0.9076, -0.1028, -0.3435, 0.3127]) tensor([0.4542, 0.1653, 0.1300, 0.2505]) -Greedy action tensor([ 1.3377, -0.3381, -0.4074, 0.3704]) tensor([0.5741, 0.1074, 0.1002, 0.2182]) -Greedy action tensor([ 1.4072, -0.6241, -1.0679, 0.1937]) tensor([0.6612, 0.0867, 0.0556, 0.1965]) -Greedy action tensor([ 1.6369, -0.0165, -0.0592, -0.1344]) tensor([0.6473, 0.1239, 0.1187, 0.1101]) -Greedy action tensor([ 1.3076, -0.5950, -0.4727, 0.1860]) tensor([0.6085, 0.0908, 0.1026, 0.1982]) -Greedy action tensor([ 1.5346, -0.4056, -0.4639, 0.4553]) tensor([0.6177, 0.0887, 0.0837, 0.2099]) -Greedy action tensor([ 1.9567, -0.7804, -0.4432, 0.5279]) tensor([0.7168, 0.0464, 0.0650, 0.1717]) -Greedy action tensor([ 1.5891, -0.4097, -1.0389, 0.0290]) tensor([0.7053, 0.0956, 0.0509, 0.1482]) -Greedy action tensor([ 1.4381, -0.0837, -0.3271, 0.0336]) tensor([0.6116, 0.1335, 0.1047, 0.1502]) -Greedy action tensor([-0.7020, -1.1468, 0.3460, -0.7305]) tensor([0.1830, 0.1173, 0.5219, 0.1779]) -Greedy action tensor([ 0.6548, -0.4254, -0.5225, -0.3015]) tensor([0.4921, 0.1671, 0.1516, 0.1891]) -Greedy action tensor([ 0.0503, -1.2461, -0.0879, -1.2978]) tensor([0.4159, 0.1138, 0.3623, 0.1080]) -Greedy action tensor([-0.0582, -0.3633, -0.0519, -0.5037]) tensor([0.2955, 0.2178, 0.2974, 0.1893]) -Greedy action tensor([-0.2022, -1.3948, -0.0339, -0.6219]) tensor([0.3181, 0.0965, 0.3764, 0.2090]) -Greedy action tensor([ 0.0272, -0.8495, -0.5445, 0.1246]) tensor([0.3244, 0.1350, 0.1831, 0.3575]) -Greedy action tensor([ 0.3260, -1.2152, -0.0592, -0.4343]) tensor([0.4234, 0.0907, 0.2880, 0.1979]) -Greedy action tensor([ 1.6328, 0.0615, 0.3561, -0.3516]) tensor([0.6157, 0.1279, 0.1717, 0.0846]) -Greedy action tensor([-1.3884, 0.0019, 0.6011, -0.9019]) tensor([0.0717, 0.2878, 0.5240, 0.1166]) -Greedy action tensor([-0.1719, -0.5120, 0.1586, -0.6374]) tensor([0.2680, 0.1907, 0.3730, 0.1683]) -Greedy action tensor([-0.2598, -0.5604, 0.3210, -0.1698]) tensor([0.2164, 0.1602, 0.3867, 0.2367]) -Greedy action tensor([ 0.4542, 0.6586, -0.4893, -0.6495]) tensor([0.3392, 0.4162, 0.1321, 0.1125]) -Greedy action tensor([ 0.1061, -1.6191, -0.3878, 0.1601]) tensor([0.3516, 0.0626, 0.2146, 0.3711]) -Greedy action tensor([-0.9294, -0.1314, -0.4742, -0.2929]) tensor([0.1495, 0.3321, 0.2357, 0.2826]) -Greedy action tensor([-0.4690, -1.1620, -0.7518, 0.4226]) tensor([0.2131, 0.1066, 0.1606, 0.5197]) -Greedy action tensor([-0.1325, -0.7184, 0.0535, 0.5214]) tensor([0.2135, 0.1188, 0.2571, 0.4105]) -Greedy action tensor([ 1.2706, -0.1696, 0.4775, -0.0199]) tensor([0.5091, 0.1206, 0.2303, 0.1400]) -Greedy action tensor([ 0.3852, -0.0988, -0.6166, 0.6126]) tensor([0.3088, 0.1903, 0.1134, 0.3876]) -Greedy action tensor([-0.0188, 0.1386, -0.3509, 0.3804]) tensor([0.2284, 0.2673, 0.1639, 0.3404]) -Greedy action tensor([-0.5973, -0.4873, 0.0746, -0.6901]) tensor([0.2006, 0.2239, 0.3927, 0.1828]) -Greedy action tensor([-1.1093, -0.2736, -0.2730, -0.8279]) tensor([0.1441, 0.3324, 0.3326, 0.1909]) -Greedy action tensor([-0.9483, -1.3318, -0.5845, -0.2129]) tensor([0.1921, 0.1309, 0.2763, 0.4007]) -Greedy action tensor([-0.5758, 0.4028, -0.5775, -0.2804]) tensor([0.1666, 0.4433, 0.1663, 0.2238]) -Greedy action tensor([ 0.0762, -0.0207, 0.4362, -0.7094]) tensor([0.2634, 0.2391, 0.3775, 0.1201]) -Greedy action tensor([ 0.1518, -0.5340, -0.3572, -0.3832]) tensor([0.3717, 0.1872, 0.2234, 0.2177]) -Greedy action tensor([-0.2511, -0.9999, -0.2169, -0.4237]) tensor([0.2986, 0.1412, 0.3090, 0.2512]) -Greedy action tensor([ 1.2488, -1.0286, 0.9485, -0.5290]) tensor([0.4970, 0.0510, 0.3680, 0.0840]) -Greedy action tensor([ 0.5676, -0.4439, -1.2035, -0.8044]) tensor([0.5595, 0.2035, 0.0952, 0.1419]) -Greedy action tensor([-0.2572, -1.5011, 0.5472, -0.2486]) tensor([0.2206, 0.0636, 0.4932, 0.2226]) -Greedy action tensor([ 0.5002, -1.1012, 0.3575, -0.2977]) tensor([0.3970, 0.0800, 0.3442, 0.1788]) -Greedy action tensor([-0.6072, -0.7242, -0.5784, -0.0533]) tensor([0.2146, 0.1910, 0.2209, 0.3735]) -Greedy action tensor([-0.3401, 0.0744, -0.2706, 0.1070]) tensor([0.1942, 0.2939, 0.2082, 0.3037]) -Greedy action tensor([ 0.4029, -0.8956, -0.1428, 0.2202]) tensor([0.3724, 0.1016, 0.2158, 0.3102]) -Greedy action tensor([-0.9162, -0.8912, 0.1380, -0.5631]) tensor([0.1583, 0.1623, 0.4542, 0.2253]) -Greedy action tensor([ 0.0623, 0.2057, 0.2273, -0.6107]) tensor([0.2602, 0.3003, 0.3068, 0.1327]) -Greedy action tensor([ 0.2983, 0.3798, 0.2766, -0.6273]) tensor([0.2890, 0.3136, 0.2828, 0.1145]) -Greedy action tensor([ 0.0263, 0.2167, -0.0195, -0.7223]) tensor([0.2749, 0.3325, 0.2626, 0.1300]) -Greedy action tensor([-0.1066, 0.2950, -0.2152, -0.7248]) tensor([0.2544, 0.3802, 0.2283, 0.1371]) -Greedy action tensor([-0.4665, -0.2007, -0.2720, -0.1082]) tensor([0.2020, 0.2635, 0.2454, 0.2891]) -Greedy action tensor([ 0.0350, -1.4364, -0.4089, 0.7532]) tensor([0.2550, 0.0585, 0.1636, 0.5229]) -Greedy action tensor([ 0.6538, 0.4084, -0.0088, 0.3865]) tensor([0.3264, 0.2554, 0.1683, 0.2499]) -Greedy action tensor([ 0.3076, -0.5698, 0.2123, -0.3016]) tensor([0.3486, 0.1450, 0.3169, 0.1896]) -Greedy action tensor([-0.4631, -0.7401, 0.6155, -1.4605]) tensor([0.1973, 0.1496, 0.5803, 0.0728]) -Greedy action tensor([ 0.1373, -0.2263, 1.2595, -0.6926]) tensor([0.1922, 0.1336, 0.5904, 0.0838]) -Greedy action tensor([-0.0734, 0.3881, -0.6048, -0.0104]) tensor([0.2359, 0.3742, 0.1386, 0.2512]) -Greedy action tensor([ 0.4357, 0.3000, -0.8155, -0.6434]) tensor([0.4001, 0.3494, 0.1145, 0.1360]) -Greedy action tensor([-0.3654, 0.4488, 0.4139, -0.0770]) tensor([0.1477, 0.3334, 0.3219, 0.1970]) -Greedy action tensor([ 1.1316, 0.0635, -0.3108, 0.6066]) tensor([0.4605, 0.1583, 0.1088, 0.2724]) -Greedy action tensor([-0.5907, -0.5068, 0.1943, -1.0771]) tensor([0.2043, 0.2222, 0.4479, 0.1256]) -Greedy action tensor([-0.6055, -0.3984, -0.5162, -0.7272]) tensor([0.2376, 0.2923, 0.2598, 0.2104]) -Greedy action tensor([ 0.2953, 0.0788, -0.1227, -0.1990]) tensor([0.3253, 0.2620, 0.2142, 0.1985]) -Greedy action tensor([-0.7108, -0.4992, -0.7886, -0.2949]) tensor([0.2138, 0.2642, 0.1978, 0.3241]) -Greedy action tensor([ 0.9300, -1.5677, 0.1563, -0.3072]) tensor([0.5453, 0.0449, 0.2516, 0.1582]) -Greedy action tensor([-1.2343, -0.8349, -0.4171, 0.1324]) tensor([0.1152, 0.1718, 0.2609, 0.4520]) -Greedy action tensor([ 0.9961, -0.4633, -0.2886, -0.2668]) tensor([0.5581, 0.1297, 0.1544, 0.1578]) -Greedy action tensor([ 0.1496, 0.3620, -0.6555, -0.4190]) tensor([0.3077, 0.3805, 0.1376, 0.1742]) -Greedy action tensor([ 0.7212, -1.0357, -0.3402, -0.6388]) tensor([0.5633, 0.0972, 0.1949, 0.1446]) -Greedy action tensor([ 0.7710, -0.6855, 1.1078, -0.2784]) tensor([0.3352, 0.0781, 0.4694, 0.1174]) -Greedy action tensor([ 0.1412, 0.1903, 0.2344, -0.4107]) tensor([0.2685, 0.2820, 0.2948, 0.1546]) -Greedy action tensor([-0.6450, -0.4937, 0.0051, -1.3207]) tensor([0.2180, 0.2536, 0.4176, 0.1109]) -Greedy action tensor([ 0.2120, -0.3877, -0.0771, 1.0094]) tensor([0.2214, 0.1215, 0.1658, 0.4913]) -Greedy action tensor([ 0.5961, -0.3537, 0.1252, -0.7394]) tensor([0.4397, 0.1701, 0.2746, 0.1157]) -Greedy action tensor([ 0.5761, -0.3479, 0.0387, -0.1799]) tensor([0.4080, 0.1620, 0.2384, 0.1916]) -Greedy action tensor([ 0.2818, -0.4093, -0.6790, 0.0096]) tensor([0.3780, 0.1894, 0.1446, 0.2879]) -Greedy action tensor([ 0.5767, 0.5494, -0.1938, 0.1418]) tensor([0.3243, 0.3156, 0.1501, 0.2100]) -Greedy action tensor([-0.6691, 0.8798, -0.0384, -1.0665]) tensor([0.1211, 0.5700, 0.2275, 0.0814]) -Greedy action tensor([-0.0359, -2.0569, -0.0983, -0.1135]) tensor([0.3336, 0.0442, 0.3134, 0.3087]) -Greedy action tensor([-0.6977, -0.3969, -1.1766, 0.0353]) tensor([0.1980, 0.2674, 0.1226, 0.4120]) -Greedy action tensor([ 1.2291, -1.2432, -0.2694, 0.8321]) tensor([0.5050, 0.0426, 0.1129, 0.3395]) -Greedy action tensor([-0.2324, -2.2259, 0.1521, 0.8327]) tensor([0.1816, 0.0247, 0.2668, 0.5269]) -Greedy action tensor([ 0.6274, -0.6011, 0.7232, 0.8150]) tensor([0.2778, 0.0813, 0.3057, 0.3351]) -Greedy action tensor([ 0.7474, -0.5311, -0.6403, 0.0484]) tensor([0.4938, 0.1375, 0.1233, 0.2454]) -Greedy action tensor([-0.6445, 0.1202, 0.0053, -1.3198]) tensor([0.1795, 0.3855, 0.3437, 0.0913]) -Greedy action tensor([-0.1132, 0.2569, 0.1660, -0.6282]) tensor([0.2290, 0.3315, 0.3027, 0.1368]) -Greedy action tensor([-1.0714, -1.0442, 0.1186, -1.1728]) tensor([0.1608, 0.1653, 0.5286, 0.1453]) -Greedy action tensor([-0.6823, 0.4411, -0.4919, -0.5633]) tensor([0.1560, 0.4797, 0.1887, 0.1757]) -Greedy action tensor([-0.0019, -0.4893, 0.0970, -0.4519]) tensor([0.2980, 0.1830, 0.3290, 0.1900]) -Greedy action tensor([ 0.0140, -0.0588, -0.0019, -1.0915]) tensor([0.3082, 0.2865, 0.3033, 0.1020]) -Greedy action tensor([-0.3498, 0.4453, 0.3755, -1.4074]) tensor([0.1777, 0.3936, 0.3670, 0.0617]) -Greedy action tensor([-0.5662, -0.5565, 0.4644, -0.5540]) tensor([0.1717, 0.1734, 0.4812, 0.1738]) -Greedy action tensor([ 0.3135, 0.1709, 0.5044, -0.6977]) tensor([0.2906, 0.2520, 0.3517, 0.1057]) -Greedy action tensor([ 1.0331, -0.6302, -0.0238, -0.4958]) tensor([0.5702, 0.1081, 0.1982, 0.1236]) -Greedy action tensor([ 0.3320, 0.0371, -0.0288, -0.0605]) tensor([0.3208, 0.2389, 0.2236, 0.2167]) -Greedy action tensor([ 1.0329, -0.6425, -0.0013, -0.5322]) tensor([0.5708, 0.1069, 0.2029, 0.1193]) -Greedy action tensor([ 0.7158, -0.3374, -0.0459, -0.1964]) tensor([0.4510, 0.1573, 0.2105, 0.1811]) -Greedy action tensor([ 0.9238, -0.4450, -0.1524, -0.3871]) tensor([0.5362, 0.1364, 0.1828, 0.1446]) -Greedy action tensor([ 0.5334, -0.1642, -0.0120, -0.3208]) tensor([0.3995, 0.1989, 0.2316, 0.1700]) -Greedy action tensor([ 0.8424, -0.6583, 0.1422, -0.3722]) tensor([0.4959, 0.1106, 0.2462, 0.1472]) -Greedy action tensor([ 0.4868, -0.3291, -0.0588, -0.2354]) tensor([0.3988, 0.1764, 0.2311, 0.1937]) -Greedy action tensor([ 1.1681e+00, -4.2449e-01, -2.3520e-04, -5.3265e-01]) tensor([0.5893, 0.1199, 0.1832, 0.1076]) -Greedy action tensor([ 0.6521, -0.6464, -0.0030, -0.2564]) tensor([0.4555, 0.1243, 0.2366, 0.1836]) -Greedy action tensor([ 0.5464, -0.4073, -0.0172, -0.1367]) tensor([0.4066, 0.1567, 0.2314, 0.2053]) -Greedy action tensor([ 0.5526, -0.2351, 0.0149, -0.1348]) tensor([0.3934, 0.1790, 0.2298, 0.1978]) -Greedy action tensor([ 0.6550, -0.5451, -0.1445, -0.0097]) tensor([0.4415, 0.1330, 0.1985, 0.2271]) -Greedy action tensor([ 0.7838, -0.6537, 0.0532, -0.4309]) tensor([0.4960, 0.1178, 0.2389, 0.1472]) -Greedy action tensor([ 1.1411, -0.7693, 0.0349, -0.6451]) tensor([0.6074, 0.0899, 0.2009, 0.1018]) -Greedy action tensor([ 0.3850, 0.1808, -0.0144, 0.0544]) tensor([0.3121, 0.2544, 0.2093, 0.2242]) -Greedy action tensor([ 0.8570, -0.4452, 0.0262, -0.2173]) tensor([0.4880, 0.1327, 0.2126, 0.1667]) -Greedy action tensor([ 1.1472, -0.6969, -0.1988, -0.5242]) tensor([0.6225, 0.0985, 0.1620, 0.1170]) -Greedy action tensor([ 0.4883, 0.2647, -0.1499, -0.0153]) tensor([0.3410, 0.2727, 0.1801, 0.2061]) -Greedy action tensor([ 1.2120, -0.5163, -0.1742, -0.3673]) tensor([0.6121, 0.1087, 0.1530, 0.1262]) -Greedy action tensor([ 1.2795, -0.7692, -0.1638, -0.5786]) tensor([0.6575, 0.0847, 0.1553, 0.1025]) -Greedy action tensor([ 1.0149, -0.5313, 0.0334, -0.2125]) tensor([0.5317, 0.1133, 0.1992, 0.1558]) -Greedy action tensor([ 1.0970, -0.6747, 0.0744, -0.5847]) tensor([0.5828, 0.0991, 0.2096, 0.1084]) -Greedy action tensor([ 0.9737, -0.4941, 0.0328, -0.5386]) tensor([0.5431, 0.1252, 0.2120, 0.1197]) -Greedy action tensor([ 0.6540, -0.5801, -0.1767, -0.1897]) tensor([0.4636, 0.1350, 0.2020, 0.1994]) -Greedy action tensor([ 1.0016, -0.7531, -0.0086, -0.4727]) tensor([0.5663, 0.0979, 0.2062, 0.1296]) -Greedy action tensor([ 0.9112, -0.6447, -0.0303, -0.5331]) tensor([0.5444, 0.1149, 0.2123, 0.1284]) -Greedy action tensor([ 0.4275, -0.0671, -0.0399, -0.1022]) tensor([0.3540, 0.2158, 0.2218, 0.2084]) -Greedy action tensor([ 0.6195, -0.0701, -0.0808, -0.2040]) tensor([0.4103, 0.2059, 0.2037, 0.1801]) -Greedy action tensor([ 0.7624, -0.0103, -0.0513, -0.0372]) tensor([0.4247, 0.1961, 0.1882, 0.1909]) -Greedy action tensor([ 0.7691, -0.1996, -0.0182, -0.0493]) tensor([0.4394, 0.1668, 0.2000, 0.1938]) -Greedy action tensor([ 0.1972, 0.0307, -0.0537, -0.1001]) tensor([0.2969, 0.2514, 0.2311, 0.2206]) -Greedy action tensor([ 0.6065, -0.2417, 0.0184, -0.0782]) tensor([0.4020, 0.1721, 0.2232, 0.2027]) -Greedy action tensor([ 0.7100, -0.4770, -0.0473, -0.3396]) tensor([0.4708, 0.1437, 0.2208, 0.1648]) -Greedy action tensor([ 0.7789, -0.7729, 0.0899, -0.4598]) tensor([0.4991, 0.1057, 0.2506, 0.1446]) -Greedy action tensor([ 0.9968, -0.7996, -0.0419, -0.4162]) tensor([0.5671, 0.0941, 0.2007, 0.1380]) -Greedy action tensor([ 1.1399, -0.5994, -0.0243, -0.3685]) tensor([0.5851, 0.1028, 0.1827, 0.1295]) -Greedy action tensor([ 0.5856, -0.2504, -0.0910, -0.4295]) tensor([0.4340, 0.1881, 0.2206, 0.1573]) -Greedy action tensor([ 0.8021, -0.5324, -0.2410, -0.3945]) tensor([0.5214, 0.1373, 0.1837, 0.1576]) -Greedy action tensor([ 0.9518, -0.7679, 0.0439, -0.6107]) tensor([0.5580, 0.0999, 0.2251, 0.1170]) -Greedy action tensor([ 8.0765e-01, -5.1311e-01, 3.1367e-04, -4.9509e-01]) tensor([0.5038, 0.1345, 0.2247, 0.1369]) -Greedy action tensor([ 0.6672, -0.4203, 0.0293, -0.1116]) tensor([0.4302, 0.1450, 0.2273, 0.1975]) -Greedy action tensor([ 0.8102, -0.3284, -0.0982, -0.3773]) tensor([0.4930, 0.1579, 0.1988, 0.1504]) -Greedy action tensor([ 0.6537, -0.0795, 0.0646, -0.1644]) tensor([0.4038, 0.1940, 0.2240, 0.1782]) -Greedy action tensor([ 0.6766, -0.5219, 0.0232, -0.2624]) tensor([0.4519, 0.1363, 0.2351, 0.1767]) -Greedy action tensor([ 0.7015, -0.7762, -0.0137, -0.4691]) tensor([0.4932, 0.1125, 0.2412, 0.1530]) -Greedy action tensor([ 0.3504, -0.1140, -0.0167, -0.1840]) tensor([0.3440, 0.2162, 0.2383, 0.2016]) -Greedy action tensor([ 0.6342, -0.4224, -0.1691, -0.0759]) tensor([0.4372, 0.1520, 0.1958, 0.2150]) -Greedy action tensor([ 0.8147, -0.3877, 0.0291, -0.3469]) tensor([0.4833, 0.1452, 0.2203, 0.1512]) -Greedy action tensor([ 0.4321, -0.2510, 0.0641, -0.4663]) tensor([0.3840, 0.1939, 0.2658, 0.1564]) -Greedy action tensor([ 0.8772, -0.7341, -0.0298, -0.5988]) tensor([0.5459, 0.1090, 0.2204, 0.1248]) -Greedy action tensor([ 0.7166, -0.6227, -0.0178, -0.2215]) tensor([0.4688, 0.1228, 0.2249, 0.1835]) -Greedy action tensor([ 0.8964, -0.5136, -0.1593, -0.4929]) tensor([0.5431, 0.1326, 0.1890, 0.1354]) -Greedy action tensor([ 0.6185, -0.5230, -0.1353, -0.3191]) tensor([0.4584, 0.1464, 0.2157, 0.1795]) -Greedy action tensor([ 0.9942, -0.7203, -0.1173, -0.5933]) tensor([0.5836, 0.1051, 0.1920, 0.1193]) -Greedy action tensor([ 0.3249, -0.1108, -0.1381, -0.0359]) tensor([0.3363, 0.2175, 0.2117, 0.2344]) -Greedy action tensor([ 0.8950, -0.5385, 0.0585, -0.1803]) tensor([0.4968, 0.1185, 0.2152, 0.1695]) -Greedy action tensor([ 0.4909, -0.2112, 0.0444, -0.3412]) tensor([0.3890, 0.1928, 0.2489, 0.1693]) -Greedy action tensor([ 1.2116, -0.6594, -0.1550, -0.4866]) tensor([0.6282, 0.0967, 0.1602, 0.1150]) -Greedy action tensor([ 0.5445, -0.2446, 0.0248, -0.1978]) tensor([0.3960, 0.1799, 0.2355, 0.1885]) -Greedy action tensor([ 0.7884, -0.7085, -0.1232, -0.2523]) tensor([0.5053, 0.1131, 0.2031, 0.1785]) -Greedy action tensor([ 0.7353, -0.3538, -0.1118, -0.1330]) tensor([0.4577, 0.1540, 0.1962, 0.1921]) -Greedy action tensor([ 4.1129e-01, -1.8714e-02, -1.8847e-04, 1.2668e-02]) tensor([0.3351, 0.2180, 0.2220, 0.2249]) -Greedy action tensor([ 1.0980, -0.7254, 0.0289, -0.3559]) tensor([0.5752, 0.0929, 0.1975, 0.1344]) -Greedy action tensor([ 1.5888, -1.0395, -0.0820, -0.7609]) tensor([0.7376, 0.0533, 0.1388, 0.0704]) -Greedy action tensor([ 0.7844, -0.4339, 0.0717, -0.2420]) tensor([0.4663, 0.1379, 0.2287, 0.1671]) -Greedy action tensor([ 0.4929, 0.3388, -0.2384, 0.1343]) tensor([0.3293, 0.2822, 0.1585, 0.2300]) -Greedy action tensor([ 0.2301, -0.1069, -0.0926, -0.2119]) tensor([0.3246, 0.2317, 0.2351, 0.2086]) -Greedy action tensor([ 0.5343, -0.1213, 0.0295, -0.3449]) tensor([0.3940, 0.2046, 0.2378, 0.1636]) -Greedy action tensor([ 1.0354, -0.6077, -0.2147, -0.5797]) tensor([0.5957, 0.1152, 0.1707, 0.1185]) -Greedy action tensor([ 0.8200, -0.2513, -0.1227, -0.1568]) tensor([0.4742, 0.1625, 0.1847, 0.1786]) -Greedy action tensor([ 0.9938, -0.4601, 0.1605, -0.5522]) tensor([0.5315, 0.1242, 0.2310, 0.1133]) -Greedy action tensor([ 0.8299, -0.3266, 0.0670, -0.3607]) tensor([0.4796, 0.1509, 0.2237, 0.1458]) -Greedy action tensor([ 0.6497, -0.2526, 0.2305, -0.4593]) tensor([0.4179, 0.1695, 0.2748, 0.1379]) -Greedy action tensor([ 0.7427, -0.5109, -0.0564, -0.4167]) tensor([0.4881, 0.1393, 0.2195, 0.1531]) -Greedy action tensor([ 0.7519, -0.6028, 0.0717, -0.4124]) tensor([0.4815, 0.1243, 0.2439, 0.1503]) -Greedy action tensor([ 1.0282, -0.6077, -0.0901, -0.6496]) tensor([0.5853, 0.1140, 0.1913, 0.1093]) -Greedy action tensor([ 0.5557, -0.1276, -0.0652, 0.0013]) tensor([0.3821, 0.1930, 0.2054, 0.2195]) -Greedy action tensor([ 0.4567, -0.3905, -0.1386, -0.2659]) tensor([0.4056, 0.1738, 0.2237, 0.1969]) -Greedy action tensor([ 1.0812, -0.7464, 0.1312, -0.3785]) tensor([0.5619, 0.0903, 0.2173, 0.1305]) -Greedy action tensor([ 0.7963, 0.0911, -0.0260, 0.1073]) tensor([0.4106, 0.2028, 0.1804, 0.2061]) -Greedy action tensor([-1.6947, -0.3717, 0.6243, -0.0684]) tensor([0.0500, 0.1877, 0.5081, 0.2542]) -Greedy action tensor([-1.4967, -0.6225, 0.4829, 0.0928]) tensor([0.0644, 0.1543, 0.4659, 0.3154]) -Greedy action tensor([-0.9580, 0.8337, 0.0822, -0.4283]) tensor([0.0867, 0.5204, 0.2455, 0.1473]) -Greedy action tensor([-1.9169, -0.4378, 0.6530, -0.1668]) tensor([0.0413, 0.1813, 0.5396, 0.2377]) -Greedy action tensor([-1.8976, -0.3517, 0.6355, -0.1533]) tensor([0.0417, 0.1955, 0.5245, 0.2383]) -Greedy action tensor([-0.0196, 1.1290, -0.1022, -0.0604]) tensor([0.1657, 0.5226, 0.1526, 0.1591]) -Greedy action tensor([-1.8773, -0.4738, 0.6351, -0.1590]) tensor([0.0435, 0.1771, 0.5368, 0.2426]) -Greedy action tensor([-1.6995, -0.4961, 0.5487, -0.1112]) tensor([0.0535, 0.1782, 0.5065, 0.2618]) -Greedy action tensor([-1.8847, -0.3899, 0.6397, -0.1473]) tensor([0.0423, 0.1887, 0.5284, 0.2405]) -Greedy action tensor([-1.7832, -0.4672, 0.6445, 0.0318]) tensor([0.0450, 0.1679, 0.5104, 0.2766]) -Greedy action tensor([-0.7189, -0.0022, -0.3135, -0.3742]) tensor([0.1678, 0.3436, 0.2517, 0.2369]) -Greedy action tensor([-1.7713e+00, -4.5415e-01, 6.3759e-01, 1.3213e-03]) tensor([0.0460, 0.1717, 0.5116, 0.2707]) -Greedy action tensor([-1.9234, -0.4213, 0.6480, -0.1656]) tensor([0.0410, 0.1842, 0.5368, 0.2379]) -Greedy action tensor([-1.9115, -0.4207, 0.6594, -0.1520]) tensor([0.0411, 0.1825, 0.5375, 0.2388]) -Greedy action tensor([-1.7889, -0.4240, 0.5756, -0.1125]) tensor([0.0478, 0.1873, 0.5090, 0.2558]) -Greedy action tensor([-1.5963, 0.0145, 0.4378, -0.0063]) tensor([0.0539, 0.2698, 0.4120, 0.2643]) -Greedy action tensor([-1.7766, -0.4519, 0.5791, -0.1021]) tensor([0.0484, 0.1822, 0.5109, 0.2585]) -Greedy action tensor([-1.7680, -0.3555, 0.5689, -0.1154]) tensor([0.0484, 0.1986, 0.5006, 0.2525]) -Greedy action tensor([-1.9240, -0.4374, 0.6589, -0.1699]) tensor([0.0409, 0.1810, 0.5416, 0.2365]) -Greedy action tensor([-1.8582, -0.4742, 0.7417, 0.0422]) tensor([0.0398, 0.1587, 0.5355, 0.2660]) -Greedy action tensor([-0.2736, 0.0922, 0.2019, 0.3491]) tensor([0.1691, 0.2437, 0.2720, 0.3152]) -Greedy action tensor([-1.7944, -0.4538, 0.5997, -0.0966]) tensor([0.0471, 0.1799, 0.5159, 0.2571]) -Greedy action tensor([-1.9146, -0.4428, 0.6578, -0.1653]) tensor([0.0413, 0.1800, 0.5411, 0.2376]) -Greedy action tensor([-1.6311, -0.4574, 0.7235, -0.0196]) tensor([0.0506, 0.1635, 0.5326, 0.2533]) -Greedy action tensor([-1.9278, -0.4122, 0.6548, -0.1664]) tensor([0.0406, 0.1850, 0.5378, 0.2366]) -Greedy action tensor([-1.9439, -0.4442, 0.6632, -0.1805]) tensor([0.0402, 0.1801, 0.5452, 0.2345]) -Greedy action tensor([-1.8978, -0.3581, 0.6371, -0.1582]) tensor([0.0417, 0.1945, 0.5262, 0.2376]) -Greedy action tensor([-1.8858, -0.3337, 0.6259, -0.1525]) tensor([0.0422, 0.1992, 0.5199, 0.2387]) -Greedy action tensor([-1.8729, -0.2973, 0.6074, -0.1355]) tensor([0.0426, 0.2060, 0.5091, 0.2422]) -Greedy action tensor([-1.7424, -0.1162, 0.5564, 0.0749]) tensor([0.0450, 0.2290, 0.4487, 0.2772]) -Greedy action tensor([-1.9459, -0.4500, 0.6679, -0.1810]) tensor([0.0401, 0.1789, 0.5470, 0.2341]) -Greedy action tensor([-1.9390, -0.4430, 0.6651, -0.1757]) tensor([0.0403, 0.1799, 0.5448, 0.2350]) -Greedy action tensor([-1.2979, 0.1791, 0.2821, -0.6896]) tensor([0.0828, 0.3628, 0.4022, 0.1522]) -Greedy action tensor([-1.8855, -0.4016, 0.6312, -0.1375]) tensor([0.0425, 0.1873, 0.5262, 0.2440]) -Greedy action tensor([-1.7324, -0.3286, 0.5446, -0.0997]) tensor([0.0502, 0.2042, 0.4889, 0.2567]) -Greedy action tensor([-1.2399, 0.2276, 0.2570, 0.0087]) tensor([0.0752, 0.3264, 0.3361, 0.2622]) -Greedy action tensor([-1.3450, 0.5266, 0.3112, -0.0676]) tensor([0.0613, 0.3981, 0.3209, 0.2197]) -Greedy action tensor([-1.8924, -0.4448, 0.6449, -0.1536]) tensor([0.0424, 0.1803, 0.5361, 0.2412]) -Greedy action tensor([-1.9278, -0.4239, 0.6589, -0.1712]) tensor([0.0407, 0.1831, 0.5406, 0.2357]) -Greedy action tensor([-1.1370, 0.7711, 0.1536, 0.1299]) tensor([0.0670, 0.4516, 0.2435, 0.2379]) -Greedy action tensor([-1.8441, -0.4515, 0.6218, -0.1266]) tensor([0.0447, 0.1799, 0.5263, 0.2490]) -Greedy action tensor([-1.8732, -0.4770, 0.6298, -0.1547]) tensor([0.0438, 0.1769, 0.5351, 0.2442]) -Greedy action tensor([-0.9761, -0.6643, 0.2651, 0.0533]) tensor([0.1159, 0.1584, 0.4011, 0.3246]) -Greedy action tensor([-0.4222, 1.0042, -0.0441, 0.4758]) tensor([0.1102, 0.4587, 0.1608, 0.2704]) -Greedy action tensor([-1.2312, -0.3439, 0.3962, 0.2091]) tensor([0.0785, 0.1906, 0.3995, 0.3314]) -Greedy action tensor([-1.8985, -0.4344, 0.6401, -0.1615]) tensor([0.0423, 0.1827, 0.5350, 0.2400]) -Greedy action tensor([-1.9126, -0.4418, 0.6484, -0.1682]) tensor([0.0416, 0.1812, 0.5390, 0.2382]) -Greedy action tensor([-1.3211, -0.1697, 0.6172, 0.1390]) tensor([0.0649, 0.2051, 0.4506, 0.2793]) -Greedy action tensor([-1.9036, -0.4401, 0.6474, -0.1601]) tensor([0.0419, 0.1811, 0.5373, 0.2396]) -Greedy action tensor([-1.6582, -0.1978, 0.4977, -0.0961]) tensor([0.0534, 0.2302, 0.4615, 0.2548]) -Greedy action tensor([-1.9275, -0.4507, 0.6606, -0.1732]) tensor([0.0409, 0.1790, 0.5438, 0.2363]) -Greedy action tensor([-1.3058, 0.6316, 0.1999, 0.2044]) tensor([0.0589, 0.4089, 0.2655, 0.2667]) -Greedy action tensor([-1.8924, -0.4249, 0.6455, -0.1460]) tensor([0.0421, 0.1829, 0.5333, 0.2417]) -Greedy action tensor([-1.7183, -0.0487, 0.4906, -0.0212]) tensor([0.0479, 0.2544, 0.4362, 0.2615]) -Greedy action tensor([-1.7774, -0.0728, 0.5238, -0.0884]) tensor([0.0457, 0.2511, 0.4560, 0.2472]) -Greedy action tensor([-1.9392, -0.4397, 0.6628, -0.1771]) tensor([0.0403, 0.1807, 0.5441, 0.2349]) -Greedy action tensor([-1.6625, -0.2923, 0.6198, -0.0356]) tensor([0.0504, 0.1986, 0.4943, 0.2567]) -Greedy action tensor([-1.8458, -0.4647, 0.6117, -0.1351]) tensor([0.0451, 0.1793, 0.5262, 0.2494]) -Greedy action tensor([-1.9129, -0.4160, 0.6406, -0.1729]) tensor([0.0416, 0.1860, 0.5351, 0.2372]) -Greedy action tensor([-1.7552, -0.2470, 0.6156, -0.0408]) tensor([0.0459, 0.2075, 0.4916, 0.2550]) -Greedy action tensor([-1.4994, 0.0194, 0.3652, 0.1298]) tensor([0.0584, 0.2668, 0.3769, 0.2979]) -Greedy action tensor([-1.8471, -0.2999, 0.6261, -0.1252]) tensor([0.0432, 0.2029, 0.5122, 0.2417]) -Greedy action tensor([-1.9161, -0.4028, 0.6509, -0.1653]) tensor([0.0411, 0.1867, 0.5355, 0.2367]) -Greedy action tensor([-1.1386, 0.8381, 0.1507, 0.3593]) tensor([0.0613, 0.4423, 0.2224, 0.2740]) -Greedy action tensor([-1.6210, -0.4676, 0.6392, 0.1389]) tensor([0.0511, 0.1620, 0.4899, 0.2970]) -Greedy action tensor([-1.8016, -0.3810, 0.6134, -0.0808]) tensor([0.0456, 0.1889, 0.5105, 0.2550]) -Greedy action tensor([-1.8654, -0.4058, 0.6228, -0.1440]) tensor([0.0436, 0.1877, 0.5249, 0.2438]) -Greedy action tensor([-1.8839, -0.4571, 0.6251, -0.1512]) tensor([0.0433, 0.1802, 0.5318, 0.2447]) -Greedy action tensor([-1.9300, -0.4177, 0.6547, -0.1707]) tensor([0.0406, 0.1844, 0.5389, 0.2361]) -Greedy action tensor([-1.9297, -0.4315, 0.6591, -0.1723]) tensor([0.0407, 0.1820, 0.5416, 0.2358]) -Greedy action tensor([-1.8517, -0.4504, 0.6219, -0.1335]) tensor([0.0444, 0.1805, 0.5273, 0.2478]) -Greedy action tensor([-1.7842, -0.4866, 0.6437, -0.0748]) tensor([0.0465, 0.1701, 0.5267, 0.2568]) -Greedy action tensor([-1.9047, -0.4459, 0.6499, -0.1638]) tensor([0.0419, 0.1802, 0.5390, 0.2389]) -Greedy action tensor([-0.9217, -0.2763, 0.2055, 0.8659]) tensor([0.0836, 0.1593, 0.2579, 0.4992]) -Greedy action tensor([-1.7448, 0.1972, 0.5017, -0.0749]) tensor([0.0440, 0.3066, 0.4158, 0.2336]) -Greedy action tensor([-1.8773, -0.3449, 0.6206, -0.1584]) tensor([0.0428, 0.1981, 0.5203, 0.2388]) -Greedy action tensor([-1.9243, -0.4492, 0.6612, -0.1706]) tensor([0.0410, 0.1790, 0.5435, 0.2365]) -Greedy action tensor([-1.8940, -0.2354, 0.6164, -0.1772]) tensor([0.0414, 0.2177, 0.5102, 0.2307]) -Greedy action tensor([-1.9475, -0.4446, 0.6657, -0.1825]) tensor([0.0400, 0.1799, 0.5462, 0.2339]) -Greedy action tensor([-1.9147, -0.4432, 0.6545, -0.1658]) tensor([0.0414, 0.1803, 0.5404, 0.2379]) -Greedy action tensor([-1.8055, -0.4444, 0.6131, -0.0250]) tensor([0.0453, 0.1768, 0.5090, 0.2689]) -Greedy action tensor([ 0.7148, -0.4065, 0.0072, -0.4198]) tensor([0.4672, 0.1523, 0.2303, 0.1502]) -Greedy action tensor([ 0.8889, -0.5624, 0.0144, -0.4197]) tensor([0.5204, 0.1219, 0.2170, 0.1406]) -Greedy action tensor([ 1.1453, -0.9288, 0.1909, -0.4980]) tensor([0.5868, 0.0737, 0.2260, 0.1135]) -Greedy action tensor([ 0.8485, -0.5765, -0.1217, -0.3485]) tensor([0.5204, 0.1252, 0.1972, 0.1572]) -Greedy action tensor([ 0.7355, -0.7253, -0.0701, -0.3610]) tensor([0.4968, 0.1153, 0.2220, 0.1659]) -Greedy action tensor([ 1.0082, -0.8266, -0.0460, -0.4868]) tensor([0.5772, 0.0922, 0.2011, 0.1295]) -Greedy action tensor([ 0.4233, 0.1288, -0.0996, 0.1112]) tensor([0.3258, 0.2427, 0.1931, 0.2384]) -Greedy action tensor([ 0.8973, -0.5302, -0.1701, -0.3619]) tensor([0.5354, 0.1285, 0.1841, 0.1520]) -Greedy action tensor([ 0.6408, 0.2543, -0.0643, -0.1688]) tensor([0.3819, 0.2595, 0.1887, 0.1700]) -Greedy action tensor([ 0.4958, -0.2347, -0.0583, -0.1016]) tensor([0.3837, 0.1848, 0.2204, 0.2111]) -Greedy action tensor([ 0.8785, -0.1958, 0.0695, -0.0312]) tensor([0.4567, 0.1560, 0.2034, 0.1839]) -Greedy action tensor([ 0.9460, -0.8578, 0.0755, -0.4764]) tensor([0.5481, 0.0903, 0.2295, 0.1322]) -Greedy action tensor([ 7.6753e-01, 6.3577e-03, 2.5678e-04, -2.8755e-02]) tensor([0.4197, 0.1961, 0.1949, 0.1893]) -Greedy action tensor([ 0.8396, -0.7775, 0.0022, -0.7331]) tensor([0.5438, 0.1079, 0.2354, 0.1128]) -Greedy action tensor([ 1.4402, -1.0733, 0.1019, -0.7848]) tensor([0.6890, 0.0558, 0.1807, 0.0745]) -Greedy action tensor([ 0.4662, -0.6798, -0.0979, -0.6037]) tensor([0.4485, 0.1426, 0.2551, 0.1538]) -Greedy action tensor([ 0.7784, -0.7670, -0.0992, -0.2098]) tensor([0.4997, 0.1065, 0.2078, 0.1860]) -Greedy action tensor([ 1.1412, -0.4971, -0.1049, -0.6394]) tensor([0.6059, 0.1177, 0.1743, 0.1021]) -Greedy action tensor([ 0.4757, -0.1434, -0.1869, -0.0516]) tensor([0.3782, 0.2036, 0.1950, 0.2232]) -Greedy action tensor([ 0.9074, -0.8304, 0.2097, -0.4030]) tensor([0.5146, 0.0905, 0.2561, 0.1388]) -Greedy action tensor([ 0.4126, -0.0283, 0.1585, 0.1496]) tensor([0.3137, 0.2019, 0.2433, 0.2411]) -Greedy action tensor([ 1.5833, -0.8804, -0.0193, -0.4724]) tensor([0.7070, 0.0602, 0.1424, 0.0905]) -Greedy action tensor([ 1.2056, -0.7158, -0.1354, -0.4187]) tensor([0.6230, 0.0912, 0.1630, 0.1228]) -Greedy action tensor([ 0.6342, -0.4523, -0.1380, -0.1778]) tensor([0.4458, 0.1504, 0.2059, 0.1979]) -Greedy action tensor([ 0.7682, -0.5369, 0.0206, -0.5789]) tensor([0.4988, 0.1353, 0.2362, 0.1297]) -Greedy action tensor([ 0.8217, -0.8567, 0.1639, -0.2096]) tensor([0.4851, 0.0906, 0.2513, 0.1730]) -Greedy action tensor([ 0.4997, -0.0661, -0.0322, -0.0202]) tensor([0.3636, 0.2065, 0.2136, 0.2162]) -Greedy action tensor([ 1.0480, -0.3913, 0.0136, -0.2893]) tensor([0.5391, 0.1278, 0.1916, 0.1415]) -Greedy action tensor([ 1.0460, -0.3913, -0.0250, -0.1955]) tensor([0.5350, 0.1271, 0.1833, 0.1546]) -Greedy action tensor([ 0.5197, -0.2600, -0.0059, -0.3320]) tensor([0.4038, 0.1852, 0.2387, 0.1723]) -Greedy action tensor([ 0.7436, -0.2919, -0.0143, -0.1675]) tensor([0.4493, 0.1595, 0.2106, 0.1806]) -Greedy action tensor([ 0.6372, -0.4100, -0.1544, -0.5746]) tensor([0.4758, 0.1670, 0.2156, 0.1416]) -Greedy action tensor([ 1.0144, -0.4602, -0.0611, -0.4099]) tensor([0.5523, 0.1264, 0.1884, 0.1329]) -Greedy action tensor([ 0.3747, -0.0253, -0.0025, -0.1199]) tensor([0.3372, 0.2260, 0.2312, 0.2056]) -Greedy action tensor([ 0.8535, -0.5319, -0.0756, -0.2811]) tensor([0.5085, 0.1272, 0.2008, 0.1635]) -Greedy action tensor([ 0.5474, -0.3933, 0.0190, -0.2722]) tensor([0.4131, 0.1613, 0.2436, 0.1820]) -Greedy action tensor([ 0.9321, -0.5487, 0.0656, -0.3361]) tensor([0.5183, 0.1179, 0.2179, 0.1458]) -Greedy action tensor([ 0.4910, -0.1045, 0.0453, -0.0570]) tensor([0.3610, 0.1990, 0.2312, 0.2087]) -Greedy action tensor([ 0.7913, -0.2918, -0.1165, -0.1695]) tensor([0.4707, 0.1594, 0.1899, 0.1801]) -Greedy action tensor([ 0.8219, -0.5515, 0.0766, -0.3167]) tensor([0.4883, 0.1237, 0.2317, 0.1564]) -Greedy action tensor([ 0.5147, -0.3867, -0.0771, -0.0261]) tensor([0.3935, 0.1597, 0.2177, 0.2291]) -Greedy action tensor([ 1.2781, -0.6670, -0.0509, -0.7804]) tensor([0.6513, 0.0931, 0.1724, 0.0831]) -Greedy action tensor([ 0.3101, 0.0262, -0.0696, -0.0083]) tensor([0.3160, 0.2379, 0.2162, 0.2299]) -Greedy action tensor([ 0.7432, -0.1953, 0.0328, -0.0704]) tensor([0.4299, 0.1682, 0.2113, 0.1906]) -Greedy action tensor([ 0.8338, -0.8064, -0.1896, -0.2064]) tensor([0.5245, 0.1017, 0.1885, 0.1853]) -Greedy action tensor([ 0.5758, -0.4787, -0.0789, -0.1366]) tensor([0.4240, 0.1477, 0.2203, 0.2080]) -Greedy action tensor([ 0.7281, -0.7643, 0.0749, -0.4349]) tensor([0.4860, 0.1093, 0.2529, 0.1519]) -Greedy action tensor([ 0.7126, -0.3485, 0.0459, -0.2533]) tensor([0.4464, 0.1545, 0.2292, 0.1699]) -Greedy action tensor([ 0.8921, -0.5275, 0.2284, -0.4853]) tensor([0.4978, 0.1204, 0.2563, 0.1256]) -Greedy action tensor([ 0.9097, -0.6187, 0.1049, -0.1946]) tensor([0.5011, 0.1087, 0.2241, 0.1661]) -Greedy action tensor([ 0.9596, -0.6929, 0.1648, -0.5648]) tensor([0.5374, 0.1029, 0.2427, 0.1170]) -Greedy action tensor([ 0.6444, -0.2919, 0.0052, -0.3258]) tensor([0.4350, 0.1705, 0.2296, 0.1649]) -Greedy action tensor([ 1.0761, -0.3546, -0.1639, 0.0043]) tensor([0.5345, 0.1278, 0.1547, 0.1830]) -Greedy action tensor([ 1.2475, -0.6777, -0.1866, -0.7873]) tensor([0.6601, 0.0963, 0.1573, 0.0863]) -Greedy action tensor([ 1.3084, -0.9007, -0.0765, -0.5824]) tensor([0.6618, 0.0727, 0.1657, 0.0999]) -Greedy action tensor([ 0.4964, -0.3261, 0.0055, -0.5237]) tensor([0.4146, 0.1822, 0.2538, 0.1495]) -Greedy action tensor([ 0.7598, -0.3437, -0.0290, -0.1639]) tensor([0.4580, 0.1519, 0.2081, 0.1819]) -Greedy action tensor([ 0.6927, -0.2555, -0.0452, -0.2082]) tensor([0.4402, 0.1705, 0.2105, 0.1788]) -Greedy action tensor([ 1.3319, -1.0866, -0.0970, -0.7031]) tensor([0.6852, 0.0610, 0.1642, 0.0895]) -Greedy action tensor([ 0.8707, -0.4620, 0.1719, -0.3483]) tensor([0.4863, 0.1283, 0.2418, 0.1437]) -Greedy action tensor([ 1.3872, -1.0248, 0.0607, -0.7169]) tensor([0.6771, 0.0607, 0.1797, 0.0826]) -Greedy action tensor([ 0.8481, -0.4797, -0.1352, -0.5490]) tensor([0.5301, 0.1405, 0.1983, 0.1311]) -Greedy action tensor([ 1.1355, -0.4843, 0.0844, -0.7362]) tensor([0.5878, 0.1163, 0.2054, 0.0904]) -Greedy action tensor([ 0.7675, -0.7810, 0.0607, -0.3756]) tensor([0.4939, 0.1050, 0.2436, 0.1575]) -Greedy action tensor([ 1.1583e+00, -5.4602e-01, -1.0375e-03, -4.8512e-01]) tensor([0.5921, 0.1077, 0.1857, 0.1145]) -Greedy action tensor([ 0.4564, 0.1742, -0.1343, 0.1000]) tensor([0.3324, 0.2507, 0.1841, 0.2328]) -Greedy action tensor([ 0.9279, -0.9946, 0.1656, -0.3315]) tensor([0.5272, 0.0771, 0.2460, 0.1496]) -Greedy action tensor([ 0.6921, -0.6464, -0.0071, -0.1551]) tensor([0.4571, 0.1199, 0.2272, 0.1959]) -Greedy action tensor([ 0.5229, -0.3265, -0.0337, -0.1634]) tensor([0.3993, 0.1708, 0.2289, 0.2010]) -Greedy action tensor([ 0.8813, -0.5219, -0.0548, -0.5906]) tensor([0.5355, 0.1316, 0.2100, 0.1229]) -Greedy action tensor([ 0.7061, -0.6267, 0.3261, -0.7595]) tensor([0.4590, 0.1211, 0.3139, 0.1060]) -Greedy action tensor([ 0.9753, -0.7256, 0.0387, -0.4116]) tensor([0.5481, 0.1000, 0.2148, 0.1370]) -Greedy action tensor([ 0.3931, -0.0415, -0.0847, -0.0525]) tensor([0.3439, 0.2227, 0.2132, 0.2202]) -Greedy action tensor([ 0.5642, -0.4053, 0.0441, -0.2736]) tensor([0.4156, 0.1576, 0.2470, 0.1798]) -Greedy action tensor([ 0.7948, -0.2784, 0.0462, -0.0269]) tensor([0.4435, 0.1516, 0.2098, 0.1950]) -Greedy action tensor([ 1.5398, -0.9136, -0.0131, -0.7881]) tensor([0.7168, 0.0616, 0.1517, 0.0699]) -Greedy action tensor([ 0.9364, -0.5036, -0.0706, -0.2110]) tensor([0.5209, 0.1234, 0.1903, 0.1654]) -Greedy action tensor([ 0.7054, -0.5226, -0.1486, -0.5530]) tensor([0.4993, 0.1462, 0.2126, 0.1419]) -Greedy action tensor([ 0.5924, 0.1772, 0.2506, -0.2689]) tensor([0.3580, 0.2364, 0.2543, 0.1513]) -Greedy action tensor([ 1.3338, -0.5940, -0.2135, -0.4710]) tensor([0.6567, 0.0955, 0.1398, 0.1080]) -Greedy action tensor([ 0.5884, -0.4525, -0.1739, -0.5884]) tensor([0.4699, 0.1659, 0.2193, 0.1449]) -Greedy action tensor([ 0.8088, -0.3772, -0.3300, 0.3222]) tensor([0.4463, 0.1363, 0.1429, 0.2744]) -Greedy action tensor([ 1.5772, -0.5024, -0.3941, 0.3913]) tensor([0.6371, 0.0796, 0.0887, 0.1946]) -Greedy action tensor([ 1.4928, -0.7126, -0.4035, 0.0544]) tensor([0.6677, 0.0736, 0.1002, 0.1585]) -Greedy action tensor([ 1.3639, -0.2859, -0.6384, 0.4002]) tensor([0.5853, 0.1124, 0.0790, 0.2233]) -Greedy action tensor([ 1.0372, -0.6514, -0.4197, 0.6415]) tensor([0.4783, 0.0884, 0.1114, 0.3220]) -Greedy action tensor([ 2.4758, 0.4191, 0.0129, -0.0266]) tensor([0.7722, 0.0988, 0.0658, 0.0632]) -Greedy action tensor([ 1.3131, 0.4275, 0.0628, -0.3292]) tensor([0.5284, 0.2179, 0.1513, 0.1023]) -Greedy action tensor([ 1.7701, -0.9933, 0.1450, 0.8390]) tensor([0.6046, 0.0381, 0.1190, 0.2383]) -Greedy action tensor([ 1.9761, -1.1520, 0.1611, 0.4884]) tensor([0.6981, 0.0306, 0.1137, 0.1577]) -Greedy action tensor([ 1.3990, -0.3670, -0.5234, 0.2707]) tensor([0.6094, 0.1042, 0.0891, 0.1972]) -Greedy action tensor([ 0.8114, 0.0487, -0.5361, 0.2749]) tensor([0.4327, 0.2018, 0.1125, 0.2530]) -Greedy action tensor([ 1.5639, -0.7297, -0.0496, 0.5706]) tensor([0.5986, 0.0604, 0.1192, 0.2217]) -Greedy action tensor([ 2.4563, -1.3878, -0.3303, 1.0416]) tensor([0.7541, 0.0161, 0.0465, 0.1833]) -Greedy action tensor([ 0.9502, -0.6233, -0.6247, 0.3356]) tensor([0.5114, 0.1060, 0.1059, 0.2766]) -Greedy action tensor([ 1.1910, 0.0484, -0.1641, 0.3854]) tensor([0.4941, 0.1576, 0.1274, 0.2208]) -Greedy action tensor([ 1.6039, -0.4179, -0.5635, 0.6963]) tensor([0.6059, 0.0802, 0.0694, 0.2445]) -Greedy action tensor([ 1.9117, -1.1515, 0.1614, 0.7197]) tensor([0.6561, 0.0307, 0.1140, 0.1992]) -Greedy action tensor([ 1.5607, -0.1842, -0.1597, 0.2800]) tensor([0.6129, 0.1071, 0.1097, 0.1703]) -Greedy action tensor([ 1.1367, 0.2882, -0.8895, 0.2441]) tensor([0.5077, 0.2174, 0.0669, 0.2080]) -Greedy action tensor([ 1.4156, -0.3026, -0.4606, -0.0228]) tensor([0.6370, 0.1143, 0.0976, 0.1512]) -Greedy action tensor([ 1.4568, -0.4617, -0.0080, 0.0318]) tensor([0.6179, 0.0907, 0.1428, 0.1486]) -Greedy action tensor([ 1.6988, -0.8277, -0.3900, 0.1175]) tensor([0.7095, 0.0567, 0.0879, 0.1459]) -Greedy action tensor([ 1.1230, -0.3914, -0.3656, 0.5498]) tensor([0.4977, 0.1095, 0.1123, 0.2805]) -Greedy action tensor([ 2.9603, -1.2276, -0.1660, 0.9080]) tensor([0.8421, 0.0128, 0.0370, 0.1082]) -Greedy action tensor([ 1.6214, -0.8283, -0.6267, 0.1330]) tensor([0.7054, 0.0609, 0.0745, 0.1592]) -Greedy action tensor([ 1.5545, -0.6650, -0.2848, 0.4044]) tensor([0.6312, 0.0686, 0.1003, 0.1999]) -Greedy action tensor([ 2.0237, 0.1663, -0.1665, 0.1098]) tensor([0.7065, 0.1103, 0.0791, 0.1042]) -Greedy action tensor([ 1.3049, -0.4706, -0.7206, 0.0257]) tensor([0.6331, 0.1072, 0.0835, 0.1762]) -Greedy action tensor([ 1.4363, -0.2088, -0.5056, 0.1259]) tensor([0.6226, 0.1202, 0.0893, 0.1679]) -Greedy action tensor([ 1.4581, -0.3423, -0.4775, -0.2010]) tensor([0.6667, 0.1102, 0.0962, 0.1269]) -Greedy action tensor([ 1.4794, -0.7038, -0.1131, 0.1189]) tensor([0.6359, 0.0717, 0.1293, 0.1631]) -Greedy action tensor([ 1.5093, -0.5424, -0.9155, 0.4804]) tensor([0.6351, 0.0816, 0.0562, 0.2270]) -Greedy action tensor([ 1.7520, -1.3113, -0.4527, -0.1993]) tensor([0.7698, 0.0360, 0.0849, 0.1094]) -Greedy action tensor([ 1.9640, -0.8814, -0.2625, 0.3994]) tensor([0.7272, 0.0423, 0.0785, 0.1521]) -Greedy action tensor([ 1.7609, -0.9409, -0.5034, 0.1625]) tensor([0.7282, 0.0489, 0.0757, 0.1473]) -Greedy action tensor([ 1.4233, -0.6054, -0.2821, 0.5791]) tensor([0.5737, 0.0754, 0.1042, 0.2466]) -Greedy action tensor([ 1.8917, -0.5386, -0.1649, 0.4518]) tensor([0.6883, 0.0606, 0.0880, 0.1631]) -Greedy action tensor([ 1.3898, -0.7421, -0.3682, 0.4660]) tensor([0.5924, 0.0703, 0.1021, 0.2352]) -Greedy action tensor([ 1.6437, -0.2431, -0.8446, 0.3262]) tensor([0.6656, 0.1009, 0.0553, 0.1783]) -Greedy action tensor([ 2.4707, -1.2126, -0.4008, 0.5445]) tensor([0.8147, 0.0205, 0.0461, 0.1187]) -Greedy action tensor([ 2.3623, -1.1280, -0.6041, 0.7960]) tensor([0.7747, 0.0236, 0.0399, 0.1618]) -Greedy action tensor([ 1.6329, -0.8785, -0.5278, 0.5031]) tensor([0.6581, 0.0534, 0.0758, 0.2126]) -Greedy action tensor([ 1.2216, -0.4222, -0.2156, 0.1675]) tensor([0.5620, 0.1086, 0.1335, 0.1959]) -Greedy action tensor([ 1.7406, -0.6373, -0.6638, 0.4542]) tensor([0.6852, 0.0636, 0.0619, 0.1893]) -Greedy action tensor([ 1.1373, -0.5990, -0.4174, 0.4222]) tensor([0.5329, 0.0939, 0.1126, 0.2607]) -Greedy action tensor([ 1.5146, 0.0042, -0.4805, 0.4845]) tensor([0.5835, 0.1288, 0.0794, 0.2083]) -Greedy action tensor([ 1.4249, -0.8297, -0.4712, 0.0921]) tensor([0.6584, 0.0691, 0.0989, 0.1736]) -Greedy action tensor([ 1.8562, -0.8520, -0.1405, 0.3499]) tensor([0.7022, 0.0468, 0.0953, 0.1557]) -Greedy action tensor([ 1.4833, 0.1573, -1.1393, -0.0800]) tensor([0.6462, 0.1716, 0.0469, 0.1353]) -Greedy action tensor([ 1.6850, -0.6313, -0.7709, 0.6930]) tensor([0.6430, 0.0634, 0.0552, 0.2384]) -Greedy action tensor([ 1.1963, -0.4714, -0.6240, 0.2108]) tensor([0.5801, 0.1094, 0.0940, 0.2165]) -Greedy action tensor([ 1.0517, -0.3938, -0.2149, 0.3796]) tensor([0.4931, 0.1162, 0.1389, 0.2518]) -Greedy action tensor([ 1.6775, -0.6179, -0.7559, 0.1825]) tensor([0.7079, 0.0713, 0.0621, 0.1587]) -Greedy action tensor([ 1.9375, -1.0008, -0.3220, 0.2142]) tensor([0.7486, 0.0396, 0.0782, 0.1336]) -Greedy action tensor([ 1.3232, -0.1234, -0.5205, -0.0612]) tensor([0.6082, 0.1432, 0.0962, 0.1523]) -Greedy action tensor([ 1.7010, -0.2787, -0.5124, 0.3083]) tensor([0.6685, 0.0923, 0.0731, 0.1661]) -Greedy action tensor([ 1.5299, -0.7527, -0.6075, 0.1296]) tensor([0.6819, 0.0696, 0.0804, 0.1681]) -Greedy action tensor([ 1.4044, -0.2426, -0.8259, 0.2368]) tensor([0.6206, 0.1195, 0.0667, 0.1931]) -Greedy action tensor([ 1.6847, -0.5987, -0.3006, 0.3256]) tensor([0.6684, 0.0681, 0.0918, 0.1717]) -Greedy action tensor([ 1.0964, -0.2867, -0.6611, 0.1061]) tensor([0.5572, 0.1397, 0.0961, 0.2070]) -Greedy action tensor([ 1.3373, -0.3416, -0.7671, 0.4985]) tensor([0.5745, 0.1072, 0.0700, 0.2483]) -Greedy action tensor([ 1.7466, 0.0972, -0.3248, -0.0548]) tensor([0.6742, 0.1296, 0.0850, 0.1113]) -Greedy action tensor([ 1.4809, -0.2905, -0.4332, 0.2190]) tensor([0.6247, 0.1063, 0.0921, 0.1769]) -Greedy action tensor([ 1.7924, -0.7288, -0.3331, 0.2864]) tensor([0.7035, 0.0565, 0.0840, 0.1560]) -Greedy action tensor([ 1.3742, 0.1643, 0.0073, -0.3976]) tensor([0.5803, 0.1731, 0.1479, 0.0987]) -Greedy action tensor([ 2.9717, -1.6286, 0.0034, 1.1455]) tensor([0.8180, 0.0082, 0.0420, 0.1317]) -Greedy action tensor([ 1.0465, -0.1808, -0.7381, 0.0581]) tensor([0.5455, 0.1599, 0.0916, 0.2030]) -Greedy action tensor([ 1.1493, 0.1384, -0.3376, -0.4792]) tensor([0.5598, 0.2037, 0.1266, 0.1099]) -Greedy action tensor([ 1.1994, -0.1910, -0.5865, 0.1906]) tensor([0.5614, 0.1398, 0.0941, 0.2047]) -Greedy action tensor([ 0.9104, -0.2657, -1.0190, 0.4099]) tensor([0.4854, 0.1498, 0.0705, 0.2943]) -Greedy action tensor([ 1.7213, -0.5614, -0.3923, 0.5134]) tensor([0.6572, 0.0670, 0.0794, 0.1964]) -Greedy action tensor([ 1.3104, -0.2011, -0.4863, 0.0494]) tensor([0.5989, 0.1321, 0.0993, 0.1697]) -Greedy action tensor([ 1.8625e+00, 9.8860e-04, 2.8542e-02, -2.6066e-01]) tensor([0.6969, 0.1083, 0.1114, 0.0834]) -Greedy action tensor([ 1.0974, -0.0551, -0.5499, -0.1600]) tensor([0.5578, 0.1762, 0.1074, 0.1586]) -Greedy action tensor([ 1.7703, -0.4011, -0.2285, 0.3358]) tensor([0.6722, 0.0766, 0.0911, 0.1601]) -Greedy action tensor([ 1.3681, -0.5598, -0.3669, 0.0386]) tensor([0.6303, 0.0917, 0.1112, 0.1668]) -Greedy action tensor([ 1.5158, -1.1130, -0.5238, -0.2690]) tensor([0.7299, 0.0527, 0.0949, 0.1225]) -Greedy action tensor([ 1.2048, -0.2345, -0.3152, 0.1997]) tensor([0.5489, 0.1301, 0.1201, 0.2009]) -Greedy action tensor([ 1.2292, -0.7021, -0.3041, 0.6882]) tensor([0.5147, 0.0746, 0.1111, 0.2996]) -Greedy action tensor([ 1.5630, -0.6585, -0.2500, 0.1460]) tensor([0.6605, 0.0716, 0.1078, 0.1601]) -Greedy action tensor([ 2.0247, -0.6425, -0.8474, 0.4259]) tensor([0.7529, 0.0523, 0.0426, 0.1522]) -Greedy action tensor([-0.2614, -1.3899, 0.8846, -0.3220]) tensor([0.1848, 0.0598, 0.5814, 0.1740]) -Greedy action tensor([ 1.3429, 0.5032, -0.2130, -0.1602]) tensor([0.5361, 0.2315, 0.1131, 0.1193]) -Greedy action tensor([-0.1605, -0.2221, -0.3244, -0.1098]) tensor([0.2603, 0.2448, 0.2210, 0.2739]) -Greedy action tensor([ 0.0629, -1.8289, -0.0458, -0.2757]) tensor([0.3622, 0.0546, 0.3249, 0.2582]) -Greedy action tensor([-1.1893, 0.1709, -0.8473, -1.0400]) tensor([0.1339, 0.5220, 0.1886, 0.1555]) -Greedy action tensor([-0.2788, -1.1070, 0.1435, -1.2335]) tensor([0.2987, 0.1305, 0.4558, 0.1150]) -Greedy action tensor([ 0.1490, -0.1047, -0.2924, -0.0353]) tensor([0.3076, 0.2387, 0.1978, 0.2558]) -Greedy action tensor([-0.9566, -1.0027, -0.1750, -0.3559]) tensor([0.1677, 0.1601, 0.3664, 0.3058]) -Greedy action tensor([ 0.6650, 0.3735, -0.9653, 0.3844]) tensor([0.3706, 0.2769, 0.0726, 0.2799]) -Greedy action tensor([ 0.3955, -0.3315, -0.7736, 0.6489]) tensor([0.3244, 0.1568, 0.1008, 0.4180]) -Greedy action tensor([ 0.0526, -0.6408, -0.2489, -0.1581]) tensor([0.3279, 0.1639, 0.2426, 0.2656]) -Greedy action tensor([-0.9553, -1.2507, 1.1107, -0.9479]) tensor([0.0939, 0.0699, 0.7415, 0.0946]) -Greedy action tensor([ 0.0118, -0.6796, 0.2192, -0.2490]) tensor([0.2856, 0.1430, 0.3514, 0.2200]) -Greedy action tensor([ 1.3276, 0.4388, 0.9258, -0.2424]) tensor([0.4370, 0.1797, 0.2924, 0.0909]) -Greedy action tensor([ 0.1373, -0.7743, 0.8443, 0.3041]) tensor([0.2169, 0.0871, 0.4398, 0.2562]) -Greedy action tensor([ 1.6920, -0.9271, 1.2757, 0.4714]) tensor([0.4932, 0.0359, 0.3253, 0.1455]) -Greedy action tensor([ 0.7544, -0.7952, 0.5569, 0.5505]) tensor([0.3510, 0.0745, 0.2881, 0.2863]) -Greedy action tensor([ 0.4666, -1.0887, -0.2369, -0.7523]) tensor([0.4996, 0.1055, 0.2472, 0.1477]) -Greedy action tensor([ 0.0608, 0.0956, -0.0390, -0.8013]) tensor([0.2974, 0.3079, 0.2691, 0.1256]) -Greedy action tensor([-0.2548, -0.1674, -1.1117, -0.6613]) tensor([0.3143, 0.3430, 0.1334, 0.2093]) -Greedy action tensor([-0.4085, -0.1659, 0.3502, -0.2484]) tensor([0.1791, 0.2283, 0.3825, 0.2102]) -Greedy action tensor([-1.0024, -1.2408, 0.2344, -0.0496]) tensor([0.1278, 0.1007, 0.4402, 0.3314]) -Greedy action tensor([-0.5435, -0.4689, 0.5156, -0.9397]) tensor([0.1775, 0.1912, 0.5118, 0.1194]) -Greedy action tensor([ 0.0781, -1.6664, 1.1766, -0.2323]) tensor([0.2038, 0.0356, 0.6112, 0.1494]) -Greedy action tensor([-1.3656, -0.1280, -0.7052, -0.1559]) tensor([0.1027, 0.3541, 0.1988, 0.3444]) -Greedy action tensor([-0.2381, -0.0462, -0.3813, 0.1227]) tensor([0.2216, 0.2685, 0.1920, 0.3179]) -Greedy action tensor([ 0.3503, -0.4941, 0.8573, -0.7655]) tensor([0.2926, 0.1258, 0.4858, 0.0959]) -Greedy action tensor([ 1.1685, -1.5502, 1.9192, 0.2991]) tensor([0.2775, 0.0183, 0.5879, 0.1163]) -Greedy action tensor([-0.5150, -0.0213, -0.5864, -0.3519]) tensor([0.2107, 0.3452, 0.1962, 0.2480]) -Greedy action tensor([ 0.4533, -0.1613, 0.7381, -0.2803]) tensor([0.2984, 0.1614, 0.3968, 0.1433]) -Greedy action tensor([ 0.2760, -0.3452, -0.6711, -0.4968]) tensor([0.4190, 0.2251, 0.1625, 0.1934]) -Greedy action tensor([ 1.1028, -0.1220, 0.1756, -0.2400]) tensor([0.5127, 0.1506, 0.2028, 0.1339]) -Greedy action tensor([-0.7418, -0.7303, -0.9354, -0.0341]) tensor([0.2055, 0.2079, 0.1694, 0.4171]) -Greedy action tensor([-2.4083, -0.4400, -1.3037, 0.2047]) tensor([0.0403, 0.2885, 0.1216, 0.5496]) -Greedy action tensor([-0.0520, -0.6338, -0.3809, -0.7978]) tensor([0.3632, 0.2030, 0.2614, 0.1723]) -Greedy action tensor([ 1.5256, -0.1444, 0.1666, -0.1583]) tensor([0.6132, 0.1154, 0.1575, 0.1138]) -Greedy action tensor([-0.6267, -0.2455, -0.4003, -0.5906]) tensor([0.2103, 0.3079, 0.2637, 0.2180]) -Greedy action tensor([ 0.7941, -0.0824, -0.4293, 0.3498]) tensor([0.4252, 0.1770, 0.1251, 0.2727]) -Greedy action tensor([ 0.1701, -0.4707, -0.8785, 1.0183]) tensor([0.2374, 0.1251, 0.0832, 0.5544]) -Greedy action tensor([-0.3446, 0.3427, -0.4563, -0.3601]) tensor([0.2055, 0.4085, 0.1837, 0.2023]) -Greedy action tensor([-0.0054, -1.3609, 0.4200, 0.0999]) tensor([0.2565, 0.0661, 0.3925, 0.2849]) -Greedy action tensor([-1.0810, -0.0058, 0.5255, -0.7042]) tensor([0.0964, 0.2825, 0.4806, 0.1405]) -Greedy action tensor([-0.2617, 0.3265, -0.2561, -0.8597]) tensor([0.2296, 0.4134, 0.2308, 0.1262]) -Greedy action tensor([ 0.0857, -1.0964, 0.4372, -0.0475]) tensor([0.2775, 0.0851, 0.3944, 0.2429]) -Greedy action tensor([ 0.4005, -1.0881, -0.9502, -0.0568]) tensor([0.4722, 0.1066, 0.1223, 0.2989]) -Greedy action tensor([-1.0937, -0.1826, -0.9106, -0.9925]) tensor([0.1726, 0.4292, 0.2073, 0.1910]) -Greedy action tensor([ 0.2606, -0.9041, -0.0189, 0.0974]) tensor([0.3428, 0.1069, 0.2592, 0.2911]) -Greedy action tensor([ 1.5649, 0.2128, -0.0567, -0.3565]) tensor([0.6240, 0.1614, 0.1233, 0.0913]) -Greedy action tensor([-0.0609, 0.3673, 0.0929, -0.0894]) tensor([0.2140, 0.3284, 0.2496, 0.2080]) -Greedy action tensor([0.7486, 0.1721, 0.7580, 0.7120]) tensor([0.2829, 0.1589, 0.2855, 0.2727]) -Greedy action tensor([-0.1841, -0.0471, 0.4604, -1.1207]) tensor([0.2250, 0.2581, 0.4287, 0.0882]) -Greedy action tensor([ 0.3163, -0.7737, -0.9296, -0.8945]) tensor([0.5203, 0.1749, 0.1497, 0.1550]) -Greedy action tensor([ 0.1286, -1.2569, -0.2462, 0.5614]) tensor([0.2874, 0.0719, 0.1976, 0.4431]) -Greedy action tensor([ 0.0331, 0.1894, -0.0340, -0.4861]) tensor([0.2703, 0.3161, 0.2528, 0.1608]) -Greedy action tensor([-0.0083, -1.5297, -0.1460, -0.4790]) tensor([0.3684, 0.0805, 0.3210, 0.2301]) -Greedy action tensor([-0.5284, -0.6108, -0.6165, -1.1581]) tensor([0.2968, 0.2733, 0.2718, 0.1581]) -Greedy action tensor([ 0.2790, -1.0418, 1.1147, -1.0924]) tensor([0.2613, 0.0697, 0.6026, 0.0663]) -Greedy action tensor([ 1.5720, -0.9135, -0.2493, 0.0390]) tensor([0.6845, 0.0570, 0.1108, 0.1478]) -Greedy action tensor([ 0.3261, -1.1892, 0.8572, -0.2721]) tensor([0.2882, 0.0633, 0.4901, 0.1584]) -Greedy action tensor([-0.0303, -1.2225, 0.7287, -0.6962]) tensor([0.2529, 0.0768, 0.5403, 0.1300]) -Greedy action tensor([-0.8610, -0.5414, 0.9875, -1.1250]) tensor([0.1053, 0.1450, 0.6688, 0.0809]) -Greedy action tensor([-0.5959, -0.9388, 0.2457, -1.1602]) tensor([0.2175, 0.1543, 0.5045, 0.1237]) -Greedy action tensor([-0.3942, -0.8071, 0.0048, -0.4576]) tensor([0.2445, 0.1618, 0.3643, 0.2294]) -Greedy action tensor([-0.7079, -1.0658, 1.1644, -1.4940]) tensor([0.1155, 0.0808, 0.7511, 0.0526]) -Greedy action tensor([ 0.1336, -0.9926, 1.7491, -0.8786]) tensor([0.1488, 0.0483, 0.7488, 0.0541]) -Greedy action tensor([-0.5133, -0.5309, -0.6451, 0.1913]) tensor([0.2048, 0.2013, 0.1795, 0.4144]) -Greedy action tensor([-0.8714, -0.7138, 0.2198, -1.0427]) tensor([0.1669, 0.1954, 0.4970, 0.1406]) -Greedy action tensor([-0.2646, 0.7721, -0.7617, 0.0163]) tensor([0.1738, 0.4902, 0.1057, 0.2302]) -Greedy action tensor([-0.4678, -0.7726, 0.3478, 0.0318]) tensor([0.1771, 0.1306, 0.4004, 0.2919]) -Greedy action tensor([ 1.1630, -1.4922, -0.2955, 0.7376]) tensor([0.5111, 0.0359, 0.1189, 0.3340]) -Greedy action tensor([ 1.0132, -0.3376, 0.8808, -0.3777]) tensor([0.4195, 0.1087, 0.3675, 0.1044]) -Greedy action tensor([-0.3395, -0.8087, 0.3549, -0.0057]) tensor([0.1990, 0.1245, 0.3986, 0.2779]) -Greedy action tensor([ 0.0148, -0.1508, -0.3809, -0.0061]) tensor([0.2857, 0.2421, 0.1924, 0.2798]) -Greedy action tensor([-1.3054, -1.1677, 0.4910, -0.7878]) tensor([0.1015, 0.1165, 0.6117, 0.1703]) -Greedy action tensor([-0.0073, -0.2916, -0.2039, 0.1127]) tensor([0.2702, 0.2033, 0.2219, 0.3046]) -Greedy action tensor([ 0.8130, -1.8049, -0.4176, 0.4388]) tensor([0.4871, 0.0355, 0.1423, 0.3351]) -Greedy action tensor([-0.9158, -0.9230, 0.2372, -1.0535]) tensor([0.1658, 0.1646, 0.5251, 0.1445]) -Greedy action tensor([-0.0140, -0.2730, 1.2587, 0.2599]) tensor([0.1502, 0.1159, 0.5363, 0.1975]) -Greedy action tensor([ 0.7157, 0.1599, 0.0668, -1.3152]) tensor([0.4489, 0.2575, 0.2346, 0.0589]) -Greedy action tensor([-0.9900, 0.4218, -0.4229, -0.7397]) tensor([0.1227, 0.5034, 0.2163, 0.1576]) -Greedy action tensor([-0.3061, -0.8056, -0.0177, 0.1942]) tensor([0.2178, 0.1322, 0.2907, 0.3593]) -Greedy action tensor([ 0.9209, -0.6788, 0.1300, -0.3820]) tensor([0.5189, 0.1048, 0.2353, 0.1410]) -Greedy action tensor([ 0.9277, -0.7238, 0.1031, -0.4708]) tensor([0.5327, 0.1022, 0.2335, 0.1316]) -Greedy action tensor([ 0.7366, -0.2891, -0.0354, -0.1624]) tensor([0.4489, 0.1610, 0.2074, 0.1827]) -Greedy action tensor([ 0.4850, 0.1447, 0.0170, -0.0007]) tensor([0.3386, 0.2410, 0.2121, 0.2083]) -Greedy action tensor([ 0.8714, -0.6042, -0.0555, -0.3180]) tensor([0.5185, 0.1185, 0.2052, 0.1578]) -Greedy action tensor([ 0.8440, -0.8289, -0.0511, -0.4684]) tensor([0.5361, 0.1006, 0.2190, 0.1443]) -Greedy action tensor([ 0.4590, -0.5206, -0.1634, -0.1315]) tensor([0.4055, 0.1522, 0.2176, 0.2247]) -Greedy action tensor([ 1.0430, -0.1240, -0.0254, -0.1032]) tensor([0.5069, 0.1578, 0.1742, 0.1611]) -Greedy action tensor([ 0.9913, -0.4112, 0.1023, -0.3300]) tensor([0.5198, 0.1279, 0.2137, 0.1387]) -Greedy action tensor([ 0.4688, -0.3507, -0.0500, -0.1829]) tensor([0.3911, 0.1723, 0.2328, 0.2038]) -Greedy action tensor([ 0.5931, -0.7127, -0.0987, -0.5933]) tensor([0.4815, 0.1305, 0.2411, 0.1470]) -Greedy action tensor([ 0.7965, 0.2726, -0.1654, -0.1432]) tensor([0.4228, 0.2504, 0.1616, 0.1652]) -Greedy action tensor([ 1.0265, -0.9126, 0.0037, -0.5622]) tensor([0.5856, 0.0842, 0.2106, 0.1196]) -Greedy action tensor([ 0.6920, -0.3729, -0.2679, -0.3333]) tensor([0.4793, 0.1652, 0.1835, 0.1719]) -Greedy action tensor([ 1.2409, -0.5677, -0.0168, -0.4344]) tensor([0.6115, 0.1002, 0.1738, 0.1145]) -Greedy action tensor([ 0.8161, -0.4864, -0.0384, -0.1769]) tensor([0.4836, 0.1315, 0.2058, 0.1792]) -Greedy action tensor([ 0.8961, -0.5138, -0.0535, -0.5226]) tensor([0.5339, 0.1304, 0.2066, 0.1292]) -Greedy action tensor([ 0.5119, 0.2249, -0.0933, 0.0900]) tensor([0.3387, 0.2542, 0.1849, 0.2221]) -Greedy action tensor([ 0.5431, -0.5418, 0.0270, -0.5742]) tensor([0.4421, 0.1494, 0.2639, 0.1446]) -Greedy action tensor([ 0.9710, -0.6898, -0.0682, -0.3532]) tensor([0.5526, 0.1050, 0.1955, 0.1470]) -Greedy action tensor([ 0.4766, 0.0433, -0.0026, 0.0143]) tensor([0.3451, 0.2238, 0.2137, 0.2174]) -Greedy action tensor([ 0.9471, -0.7900, 0.0995, -0.5949]) tensor([0.5499, 0.0968, 0.2356, 0.1177]) -Greedy action tensor([0.4099, 0.1600, 0.0362, 0.0545]) tensor([0.3157, 0.2459, 0.2172, 0.2212]) -Greedy action tensor([ 1.1516, -0.7949, -0.0311, -0.6230]) tensor([0.6177, 0.0882, 0.1893, 0.1047]) -Greedy action tensor([ 0.8458, -0.5420, -0.0177, -0.4113]) tensor([0.5113, 0.1276, 0.2156, 0.1455]) -Greedy action tensor([ 0.7930, -0.5638, 0.1682, -0.2466]) tensor([0.4659, 0.1200, 0.2494, 0.1647]) -Greedy action tensor([ 0.4826, -0.1500, -0.1588, -0.2871]) tensor([0.3967, 0.2107, 0.2089, 0.1837]) -Greedy action tensor([ 0.7721, -0.7805, 0.1507, -0.6224]) tensor([0.5008, 0.1060, 0.2690, 0.1242]) -Greedy action tensor([ 0.7923, -0.3788, -0.1994, -0.3171]) tensor([0.4973, 0.1542, 0.1845, 0.1640]) -Greedy action tensor([ 0.6683, -0.4055, 0.1741, -0.3125]) tensor([0.4298, 0.1469, 0.2622, 0.1612]) -Greedy action tensor([ 0.9374, -0.4370, -0.0295, -0.3128]) tensor([0.5209, 0.1318, 0.1981, 0.1492]) -Greedy action tensor([ 1.1548, -0.6665, 0.0284, -0.4061]) tensor([0.5896, 0.0954, 0.1912, 0.1238]) -Greedy action tensor([ 0.8963, -0.4567, -0.0301, -0.1959]) tensor([0.5025, 0.1299, 0.1990, 0.1686]) -Greedy action tensor([ 0.3418, 0.3222, -0.0642, 0.1102]) tensor([0.2907, 0.2851, 0.1937, 0.2306]) -Greedy action tensor([ 0.7379, 0.3969, -0.1110, 0.2450]) tensor([0.3637, 0.2586, 0.1556, 0.2222]) -Greedy action tensor([ 0.4558, 0.2277, -0.1068, -0.1383]) tensor([0.3427, 0.2728, 0.1953, 0.1892]) -Greedy action tensor([ 0.5449, -0.1675, -0.1065, 0.0037]) tensor([0.3855, 0.1891, 0.2010, 0.2244]) -Greedy action tensor([ 0.9332, -0.6694, 0.0137, -0.3279]) tensor([0.5309, 0.1069, 0.2117, 0.1504]) -Greedy action tensor([ 0.7458, -0.2869, -0.0089, -0.3007]) tensor([0.4593, 0.1635, 0.2159, 0.1613]) -Greedy action tensor([ 0.6940, -0.5906, -0.0101, -0.2840]) tensor([0.4657, 0.1289, 0.2303, 0.1751]) -Greedy action tensor([ 0.7983, -0.4141, 0.1596, -0.3299]) tensor([0.4653, 0.1384, 0.2457, 0.1506]) -Greedy action tensor([ 0.5301, -0.1800, -0.1302, -0.5626]) tensor([0.4267, 0.2098, 0.2205, 0.1431]) -Greedy action tensor([ 0.9191, -0.5867, 0.0046, -0.0961]) tensor([0.5038, 0.1118, 0.2019, 0.1825]) -Greedy action tensor([ 0.8545, -0.2727, -0.0622, -0.4578]) tensor([0.5018, 0.1625, 0.2006, 0.1351]) -Greedy action tensor([ 0.9473, -0.6027, 0.0338, -0.2716]) tensor([0.5239, 0.1112, 0.2101, 0.1548]) -Greedy action tensor([ 1.1370, -0.6384, 0.0061, -0.4498]) tensor([0.5894, 0.0998, 0.1902, 0.1206]) -Greedy action tensor([ 0.9200, -0.6206, -0.1112, -0.4575]) tensor([0.5485, 0.1175, 0.1956, 0.1384]) -Greedy action tensor([ 0.4379, -0.1536, -0.0268, 0.0568]) tensor([0.3491, 0.1932, 0.2193, 0.2384]) -Greedy action tensor([ 0.0934, -0.1905, -0.0650, -0.3641]) tensor([0.3087, 0.2324, 0.2635, 0.1954]) -Greedy action tensor([ 0.9362, -0.4842, 0.0527, -0.5168]) tensor([0.5294, 0.1279, 0.2188, 0.1238]) -Greedy action tensor([ 0.8045, -0.5742, 0.0036, -0.5698]) tensor([0.5118, 0.1289, 0.2298, 0.1295]) -Greedy action tensor([ 1.1334, -0.6777, -0.0955, -0.6892]) tensor([0.6182, 0.1011, 0.1809, 0.0999]) -Greedy action tensor([ 0.7737, -0.5189, 0.0736, -0.5023]) tensor([0.4878, 0.1339, 0.2422, 0.1362]) -Greedy action tensor([ 0.8261, -0.4980, 0.0825, -0.2862]) tensor([0.4831, 0.1285, 0.2296, 0.1588]) -Greedy action tensor([ 0.8614, -0.5098, 0.1650, -0.3604]) tensor([0.4885, 0.1240, 0.2435, 0.1440]) -Greedy action tensor([ 1.1637, -0.8613, 0.0582, -0.6598]) tensor([0.6156, 0.0813, 0.2038, 0.0994]) -Greedy action tensor([ 0.9631, -0.4446, -0.0997, -0.2023]) tensor([0.5258, 0.1287, 0.1816, 0.1639]) -Greedy action tensor([ 0.6679, -0.5129, -0.0998, -0.1603]) tensor([0.4529, 0.1391, 0.2102, 0.1978]) -Greedy action tensor([ 0.7463, -0.9336, 0.1424, -0.3363]) tensor([0.4827, 0.0900, 0.2639, 0.1635]) -Greedy action tensor([ 1.1086, -0.1273, -0.2383, -0.2481]) tensor([0.5531, 0.1607, 0.1438, 0.1424]) -Greedy action tensor([ 0.9951, -0.9305, 0.0862, -0.5408]) tensor([0.5669, 0.0826, 0.2284, 0.1220]) -Greedy action tensor([ 1.1488, -0.8133, 0.2259, -0.5662]) tensor([0.5821, 0.0818, 0.2313, 0.1048]) -Greedy action tensor([ 0.8118, -0.7976, -0.0772, -0.3471]) tensor([0.5195, 0.1039, 0.2136, 0.1630]) -Greedy action tensor([ 1.2052, -0.9094, 0.0764, -0.6959]) tensor([0.6275, 0.0757, 0.2030, 0.0938]) -Greedy action tensor([ 1.0649, -0.4655, -0.0501, -0.5605]) tensor([0.5743, 0.1243, 0.1883, 0.1130]) -Greedy action tensor([ 0.4082, -0.1431, 0.1270, 0.0120]) tensor([0.3329, 0.1918, 0.2513, 0.2240]) -Greedy action tensor([ 0.8490, -0.6103, -0.0665, -0.4872]) tensor([0.5276, 0.1226, 0.2112, 0.1387]) -Greedy action tensor([ 0.7856, -0.7804, 0.2621, -0.9487]) tensor([0.5056, 0.1056, 0.2995, 0.0893]) -Greedy action tensor([ 0.8841, -0.6212, 0.0505, -0.4764]) tensor([0.5227, 0.1160, 0.2271, 0.1341]) -Greedy action tensor([ 1.0866, -0.8311, 0.0274, -0.7668]) tensor([0.6059, 0.0890, 0.2101, 0.0950]) -Greedy action tensor([ 0.4917, 0.1745, -0.1451, 0.0841]) tensor([0.3422, 0.2492, 0.1810, 0.2276]) -Greedy action tensor([ 0.7534, -0.6242, -0.0415, -0.2897]) tensor([0.4863, 0.1226, 0.2197, 0.1714]) -Greedy action tensor([ 0.4657, 0.1017, -0.0273, 0.1257]) tensor([0.3314, 0.2303, 0.2024, 0.2359]) -Greedy action tensor([ 0.8427, -0.4062, -0.1262, -0.5718]) tensor([0.5237, 0.1502, 0.1988, 0.1273]) -Greedy action tensor([ 0.6606, -0.4239, -0.2115, -0.4169]) tensor([0.4770, 0.1613, 0.1994, 0.1624]) -Greedy action tensor([ 0.4691, 0.2086, -0.0926, 0.1126]) tensor([0.3288, 0.2534, 0.1875, 0.2302]) -Greedy action tensor([ 1.0998, -0.7375, 0.0893, -0.3460]) tensor([0.5686, 0.0905, 0.2070, 0.1339]) -Greedy action tensor([ 0.6211, -0.4347, -0.0486, -0.1773]) tensor([0.4329, 0.1506, 0.2216, 0.1948]) -Greedy action tensor([ 0.6689, 0.2171, -0.1765, 0.1809]) tensor([0.3732, 0.2375, 0.1602, 0.2291]) -Greedy action tensor([ 0.6094, -0.3828, -0.1017, -0.4054]) tensor([0.4496, 0.1667, 0.2208, 0.1630]) -Greedy action tensor([ 0.5240, -0.3661, -0.1715, -0.0789]) tensor([0.4070, 0.1671, 0.2031, 0.2227]) -Greedy action tensor([ 1.2515, -0.4237, -0.5823, 0.1912]) tensor([0.5905, 0.1106, 0.0944, 0.2045]) -Greedy action tensor([ 1.3862, -0.4224, -0.4605, 0.2257]) tensor([0.6116, 0.1002, 0.0965, 0.1916]) -Greedy action tensor([ 1.2708, -0.2829, -0.5098, 0.0402]) tensor([0.5980, 0.1265, 0.1008, 0.1747]) -Greedy action tensor([ 1.2715, -0.1536, -1.1968, 0.3980]) tensor([0.5738, 0.1380, 0.0486, 0.2396]) -Greedy action tensor([ 1.3162, -0.0297, -0.5863, 0.3982]) tensor([0.5528, 0.1439, 0.0825, 0.2208]) -Greedy action tensor([ 1.0865, -0.2386, -0.3897, 0.1778]) tensor([0.5271, 0.1401, 0.1204, 0.2124]) -Greedy action tensor([ 1.7673, -0.5347, -0.5447, 0.4073]) tensor([0.6869, 0.0687, 0.0680, 0.1763]) -Greedy action tensor([ 1.6447, -0.7656, -0.6561, 0.4455]) tensor([0.6705, 0.0602, 0.0672, 0.2021]) -Greedy action tensor([ 1.2026, -0.0974, -1.0972, 0.2714]) tensor([0.5660, 0.1542, 0.0568, 0.2230]) -Greedy action tensor([ 1.3523, -0.4663, -0.2635, 0.6886]) tensor([0.5331, 0.0865, 0.1059, 0.2745]) -Greedy action tensor([ 1.9034, -0.6586, -0.0638, 0.1140]) tensor([0.7225, 0.0557, 0.1010, 0.1207]) -Greedy action tensor([ 1.5562, 0.4139, -0.3602, 0.2674]) tensor([0.5741, 0.1832, 0.0845, 0.1582]) -Greedy action tensor([ 1.3164, -0.3821, -0.1664, 0.4453]) tensor([0.5469, 0.1001, 0.1242, 0.2289]) -Greedy action tensor([ 1.5604, -0.4838, -0.2356, 0.1305]) tensor([0.6516, 0.0844, 0.1081, 0.1559]) -Greedy action tensor([ 2.9162, -1.2122, 0.1535, 1.4421]) tensor([0.7644, 0.0123, 0.0482, 0.1750]) -Greedy action tensor([ 1.3916, -0.7134, -0.3188, 0.8257]) tensor([0.5346, 0.0651, 0.0967, 0.3036]) -Greedy action tensor([ 1.6178, -0.5127, -0.6944, 0.2409]) tensor([0.6802, 0.0808, 0.0674, 0.1716]) -Greedy action tensor([ 1.1598, -0.6144, -0.6355, 0.5537]) tensor([0.5316, 0.0902, 0.0883, 0.2900]) -Greedy action tensor([ 1.3358, -0.3174, -1.1222, 0.2352]) tensor([0.6212, 0.1189, 0.0532, 0.2067]) -Greedy action tensor([ 1.3239, -0.0983, -1.0015, -0.0649]) tensor([0.6296, 0.1519, 0.0615, 0.1570]) -Greedy action tensor([ 1.6974, -0.2919, -0.3759, -0.1690]) tensor([0.7056, 0.0965, 0.0887, 0.1091]) -Greedy action tensor([ 1.1305, -0.3596, -0.7184, 0.4108]) tensor([0.5349, 0.1205, 0.0842, 0.2604]) -Greedy action tensor([ 1.3271, -0.4017, -0.2900, 0.1056]) tensor([0.5985, 0.1062, 0.1188, 0.1764]) -Greedy action tensor([ 1.4194, -0.1806, -0.5069, 0.3422]) tensor([0.5924, 0.1196, 0.0863, 0.2017]) -Greedy action tensor([ 1.5599, -0.8189, -0.3172, 0.4470]) tensor([0.6352, 0.0589, 0.0972, 0.2087]) -Greedy action tensor([ 1.3549, -0.2136, -0.5865, 0.1100]) tensor([0.6098, 0.1271, 0.0875, 0.1756]) -Greedy action tensor([ 1.3986, -0.4127, -0.7996, 0.2449]) tensor([0.6290, 0.1028, 0.0698, 0.1984]) -Greedy action tensor([ 1.5580, -0.5618, -0.5369, 0.6773]) tensor([0.6033, 0.0724, 0.0743, 0.2500]) -Greedy action tensor([ 2.1208, -1.4156, -0.6629, 0.4006]) tensor([0.7874, 0.0229, 0.0487, 0.1410]) -Greedy action tensor([ 1.4811, -0.8303, -0.0900, 0.6393]) tensor([0.5754, 0.0570, 0.1196, 0.2480]) -Greedy action tensor([ 1.3047, 0.1138, -0.5797, 0.3650]) tensor([0.5415, 0.1646, 0.0823, 0.2116]) -Greedy action tensor([ 1.6291, -0.4358, -0.4705, 0.1997]) tensor([0.6717, 0.0852, 0.0823, 0.1608]) -Greedy action tensor([ 1.5780, 0.0019, -0.5365, -0.1401]) tensor([0.6636, 0.1372, 0.0801, 0.1191]) -Greedy action tensor([ 0.9073, -0.1571, -0.9594, 0.0367]) tensor([0.5213, 0.1798, 0.0806, 0.2183]) -Greedy action tensor([ 2.0654, -1.1038, -0.3512, 0.6915]) tensor([0.7223, 0.0304, 0.0645, 0.1828]) -Greedy action tensor([ 1.4451, -0.0481, -0.8910, 0.1470]) tensor([0.6272, 0.1409, 0.0607, 0.1713]) -Greedy action tensor([ 1.2304, -0.5469, -0.8624, 0.0538]) tensor([0.6247, 0.1056, 0.0771, 0.1926]) -Greedy action tensor([ 1.6981, -0.2044, -0.6370, 0.2790]) tensor([0.6721, 0.1003, 0.0651, 0.1626]) -Greedy action tensor([ 1.7747, -0.3877, -0.5274, 0.5056]) tensor([0.6684, 0.0769, 0.0669, 0.1879]) -Greedy action tensor([ 0.9801, -0.3878, -0.5211, 0.1606]) tensor([0.5213, 0.1327, 0.1162, 0.2297]) -Greedy action tensor([ 1.2118, 0.0632, -0.9556, 0.1821]) tensor([0.5591, 0.1773, 0.0640, 0.1997]) -Greedy action tensor([ 2.4995, -1.3832, 0.4608, -0.1943]) tensor([0.8207, 0.0169, 0.1069, 0.0555]) -Greedy action tensor([ 1.4652, -0.7150, -0.0926, 0.0547]) tensor([0.6379, 0.0721, 0.1343, 0.1557]) -Greedy action tensor([ 1.8420, -0.3824, -0.3027, 0.4547]) tensor([0.6780, 0.0733, 0.0794, 0.1693]) -Greedy action tensor([ 1.3015, -1.0444, -0.0551, -0.0686]) tensor([0.6221, 0.0596, 0.1602, 0.1581]) -Greedy action tensor([ 1.3629, 0.0530, 0.1308, -0.6078]) tensor([0.5879, 0.1587, 0.1715, 0.0819]) -Greedy action tensor([ 2.1415, -1.6021, 0.1255, 0.0200]) tensor([0.7833, 0.0185, 0.1043, 0.0939]) -Greedy action tensor([ 1.3841, -0.9042, -0.2437, 0.5953]) tensor([0.5707, 0.0579, 0.1121, 0.2593]) -Greedy action tensor([ 1.5738, -0.0737, -0.6777, 0.2477]) tensor([0.6397, 0.1232, 0.0673, 0.1698]) -Greedy action tensor([ 2.1368, -0.9389, -0.4245, 0.7451]) tensor([0.7289, 0.0336, 0.0563, 0.1812]) -Greedy action tensor([2.2322, 0.7863, 0.0023, 0.2916]) tensor([0.6726, 0.1584, 0.0723, 0.0966]) -Greedy action tensor([ 0.9298, -0.1200, -0.5969, -0.0904]) tensor([0.5187, 0.1816, 0.1127, 0.1870]) -Greedy action tensor([2.7200, 0.4094, 0.0689, 0.1644]) tensor([0.8017, 0.0795, 0.0566, 0.0622]) -Greedy action tensor([ 2.1731, -1.1202, 0.1735, 0.2961]) tensor([0.7544, 0.0280, 0.1021, 0.1155]) -Greedy action tensor([ 0.6865, -0.1251, -0.1314, 0.2446]) tensor([0.3955, 0.1757, 0.1746, 0.2542]) -Greedy action tensor([ 1.5700, -0.5412, -0.0458, -0.1893]) tensor([0.6702, 0.0812, 0.1332, 0.1154]) -Greedy action tensor([ 1.3212, -0.7117, -0.5240, 0.8469]) tensor([0.5232, 0.0685, 0.0827, 0.3256]) -Greedy action tensor([ 2.2406, -0.6188, -0.1777, 0.7891]) tensor([0.7243, 0.0415, 0.0645, 0.1696]) -Greedy action tensor([ 0.9799, -0.1029, -0.1855, 0.0457]) tensor([0.4894, 0.1657, 0.1526, 0.1923]) -Greedy action tensor([ 1.5748, 0.1999, -1.0275, 0.1476]) tensor([0.6382, 0.1614, 0.0473, 0.1532]) -Greedy action tensor([ 2.1698, -1.1073, 0.1934, 1.0315]) tensor([0.6682, 0.0252, 0.0926, 0.2140]) -Greedy action tensor([ 1.3026, -0.3939, -0.6103, 0.1543]) tensor([0.6067, 0.1112, 0.0896, 0.1924]) -Greedy action tensor([ 1.5779, -0.8690, -0.3521, 0.8089]) tensor([0.5899, 0.0511, 0.0856, 0.2734]) -Greedy action tensor([ 1.1096, -0.3765, -0.5532, 0.2102]) tensor([0.5486, 0.1241, 0.1040, 0.2232]) -Greedy action tensor([ 1.8074, -0.6876, 0.0130, 0.8165]) tensor([0.6173, 0.0509, 0.1026, 0.2292]) -Greedy action tensor([ 1.5434, 0.2408, -0.7127, 0.7542]) tensor([0.5462, 0.1485, 0.0572, 0.2481]) -Greedy action tensor([ 2.2677, -1.4079, -0.1044, 0.9654]) tensor([0.7192, 0.0182, 0.0671, 0.1955]) -Greedy action tensor([ 2.0793, -1.1118, -0.2806, 0.3820]) tensor([0.7583, 0.0312, 0.0716, 0.1389]) -Greedy action tensor([ 2.0163, -1.1132, -0.1954, 0.6367]) tensor([0.7118, 0.0311, 0.0779, 0.1791]) -Greedy action tensor([ 1.3214, -0.3169, -0.5040, 0.4460]) tensor([0.5643, 0.1096, 0.0909, 0.2351]) -Greedy action tensor([ 1.6185, -0.8767, -0.1855, 0.9665]) tensor([0.5656, 0.0467, 0.0931, 0.2947]) -Greedy action tensor([ 1.8365, -1.2028, -0.3267, 0.4565]) tensor([0.7070, 0.0338, 0.0813, 0.1779]) -Greedy action tensor([ 1.1367, -0.0551, -0.1980, -0.3119]) tensor([0.5550, 0.1685, 0.1461, 0.1304]) -Greedy action tensor([ 2.1193, -0.7838, -0.2771, 0.3776]) tensor([0.7569, 0.0415, 0.0689, 0.1326]) -Greedy action tensor([ 1.1975, -0.2953, -0.5541, 0.1496]) tensor([0.5718, 0.1285, 0.0992, 0.2005]) -Greedy action tensor([ 2.1334, -1.1807, -0.2891, 0.3709]) tensor([0.7712, 0.0280, 0.0684, 0.1324]) -Greedy action tensor([ 1.9704, -0.0271, -0.4208, 0.2298]) tensor([0.7129, 0.0967, 0.0653, 0.1251]) -Greedy action tensor([ 1.7587, -0.7396, -0.2946, 0.4272]) tensor([0.6781, 0.0558, 0.0870, 0.1791]) -Greedy action tensor([ 1.4955, -0.5529, -0.1937, 0.5218]) tensor([0.5913, 0.0762, 0.1092, 0.2233]) -Greedy action tensor([ 1.1596, -0.3864, -0.4612, 0.4573]) tensor([0.5246, 0.1118, 0.1037, 0.2599]) -Greedy action tensor([ 1.3043, -0.2557, -0.5256, 0.0286]) tensor([0.6061, 0.1274, 0.0972, 0.1693]) -Greedy action tensor([ 1.4969, -0.9012, -0.0506, 0.2837]) tensor([0.6246, 0.0568, 0.1329, 0.1857]) -Greedy action tensor([-1.8810, -0.3795, 0.6664, -0.1154]) tensor([0.0415, 0.1862, 0.5299, 0.2425]) -Greedy action tensor([-1.3749, -0.5740, 0.5181, 0.2447]) tensor([0.0670, 0.1493, 0.4451, 0.3386]) -Greedy action tensor([-1.8971, -0.4504, 0.6424, -0.1603]) tensor([0.0424, 0.1800, 0.5370, 0.2406]) -Greedy action tensor([-1.9176, -0.4476, 0.6549, -0.1583]) tensor([0.0412, 0.1793, 0.5400, 0.2395]) -Greedy action tensor([-1.9253, -0.4372, 0.6571, -0.1706]) tensor([0.0409, 0.1812, 0.5413, 0.2366]) -Greedy action tensor([-1.8307, -0.4767, 0.6095, -0.1403]) tensor([0.0459, 0.1779, 0.5271, 0.2490]) -Greedy action tensor([-1.3633, 0.2957, 0.3017, -0.0027]) tensor([0.0648, 0.3403, 0.3424, 0.2525]) -Greedy action tensor([-1.9126, -0.4447, 0.6546, -0.1623]) tensor([0.0414, 0.1799, 0.5401, 0.2386]) -Greedy action tensor([-1.9236, -0.3749, 0.6497, -0.1707]) tensor([0.0407, 0.1914, 0.5332, 0.2347]) -Greedy action tensor([-1.9169, -0.2857, 0.6290, -0.1692]) tensor([0.0406, 0.2077, 0.5184, 0.2333]) -Greedy action tensor([-1.4773, 0.5881, 0.3465, -0.0945]) tensor([0.0524, 0.4137, 0.3249, 0.2090]) -Greedy action tensor([-1.7957, -0.4532, 0.5986, -0.1005]) tensor([0.0471, 0.1803, 0.5161, 0.2565]) -Greedy action tensor([-1.9336, -0.4276, 0.6620, -0.1748]) tensor([0.0405, 0.1824, 0.5423, 0.2349]) -Greedy action tensor([-1.7854, 0.2838, 0.5197, -0.2025]) tensor([0.0420, 0.3325, 0.4210, 0.2045]) -Greedy action tensor([-1.9285, -0.4500, 0.6607, -0.1706]) tensor([0.0408, 0.1790, 0.5435, 0.2367]) -Greedy action tensor([-1.8273, -0.4464, 0.6089, -0.1219]) tensor([0.0456, 0.1816, 0.5216, 0.2512]) -Greedy action tensor([-1.7702, -0.4521, 0.5634, -0.0826]) tensor([0.0489, 0.1826, 0.5042, 0.2643]) -Greedy action tensor([-1.7215, -0.5016, 0.6576, -0.0812]) tensor([0.0492, 0.1665, 0.5308, 0.2535]) -Greedy action tensor([-1.7310, 0.0781, 0.4886, -0.0066]) tensor([0.0456, 0.2785, 0.4199, 0.2559]) -Greedy action tensor([-1.6670, -0.2425, 0.5032, -0.0866]) tensor([0.0533, 0.2214, 0.4666, 0.2587]) -Greedy action tensor([-1.6601, 0.4431, 0.4214, -0.0893]) tensor([0.0454, 0.3721, 0.3641, 0.2185]) -Greedy action tensor([-1.4370, 0.5863, 0.5079, -0.4548]) tensor([0.0549, 0.4150, 0.3837, 0.1465]) -Greedy action tensor([-1.9279, -0.4261, 0.6597, -0.1713]) tensor([0.0407, 0.1827, 0.5410, 0.2357]) -Greedy action tensor([-1.8404, -0.3972, 0.6477, -0.0975]) tensor([0.0435, 0.1842, 0.5237, 0.2486]) -Greedy action tensor([-1.7826, -0.4558, 0.5789, -0.1078]) tensor([0.0483, 0.1819, 0.5121, 0.2577]) -Greedy action tensor([-1.9312, -0.4378, 0.6612, -0.1719]) tensor([0.0406, 0.1808, 0.5427, 0.2359]) -Greedy action tensor([-1.8305, -0.4513, 0.8553, 0.2007]) tensor([0.0367, 0.1457, 0.5381, 0.2796]) -Greedy action tensor([-1.8810, -0.2809, 0.6163, -0.1404]) tensor([0.0420, 0.2081, 0.5104, 0.2395]) -Greedy action tensor([-1.8843, -0.3756, 0.6317, -0.1500]) tensor([0.0424, 0.1918, 0.5253, 0.2404]) -Greedy action tensor([-1.7866, -0.2463, 0.5871, -0.1239]) tensor([0.0461, 0.2152, 0.4953, 0.2433]) -Greedy action tensor([-1.8989, -0.4648, 0.6415, -0.1605]) tensor([0.0424, 0.1780, 0.5382, 0.2413]) -Greedy action tensor([-1.4783, 0.1771, 0.5760, -0.6614]) tensor([0.0613, 0.3212, 0.4786, 0.1389]) -Greedy action tensor([-1.9204, -0.2427, 0.6281, -0.1795]) tensor([0.0403, 0.2155, 0.5147, 0.2295]) -Greedy action tensor([-1.4912, 0.2998, 0.3827, -0.0711]) tensor([0.0567, 0.3397, 0.3691, 0.2345]) -Greedy action tensor([-0.9626, 0.9602, 0.1578, 0.2053]) tensor([0.0708, 0.4844, 0.2171, 0.2277]) -Greedy action tensor([-0.2588, 0.1341, 0.2092, 0.3731]) tensor([0.1678, 0.2486, 0.2680, 0.3157]) -Greedy action tensor([-1.9221, -0.4187, 0.6532, -0.1704]) tensor([0.0410, 0.1843, 0.5384, 0.2363]) -Greedy action tensor([-1.8883, -0.3733, 0.6314, -0.1247]) tensor([0.0420, 0.1911, 0.5219, 0.2450]) -Greedy action tensor([0.6782, 0.0633, 0.6753, 1.5625]) tensor([0.2016, 0.1090, 0.2011, 0.4883]) -Greedy action tensor([-1.7602, -0.1989, 0.5409, -0.1065]) tensor([0.0477, 0.2272, 0.4760, 0.2492]) -Greedy action tensor([-1.7078, -0.5022, 0.5418, -0.0274]) tensor([0.0521, 0.1740, 0.4942, 0.2797]) -Greedy action tensor([-1.2032, -0.4022, 0.8127, 0.8764]) tensor([0.0534, 0.1189, 0.4007, 0.4270]) -Greedy action tensor([-0.6682, 0.5778, 0.1650, 0.4020]) tensor([0.1032, 0.3586, 0.2374, 0.3008]) -Greedy action tensor([-0.8332, -0.5535, 0.3175, 0.5742]) tensor([0.1045, 0.1382, 0.3303, 0.4270]) -Greedy action tensor([-1.9422, -0.4207, 0.6589, -0.1812]) tensor([0.0402, 0.1841, 0.5418, 0.2339]) -Greedy action tensor([-1.4732, 0.5946, 0.2730, 0.1606]) tensor([0.0506, 0.4001, 0.2901, 0.2592]) -Greedy action tensor([-1.8724, -0.4541, 0.6386, -0.1398]) tensor([0.0433, 0.1788, 0.5332, 0.2448]) -Greedy action tensor([-1.7387, -0.5203, 0.5549, -0.0628]) tensor([0.0509, 0.1722, 0.5047, 0.2721]) -Greedy action tensor([-1.6670, -0.4268, 0.7538, 0.4094]) tensor([0.0422, 0.1459, 0.4752, 0.3367]) -Greedy action tensor([-1.8164, -0.4532, 0.5984, -0.1346]) tensor([0.0466, 0.1820, 0.5210, 0.2503]) -Greedy action tensor([-1.9116, -0.4085, 0.6501, -0.1617]) tensor([0.0413, 0.1857, 0.5353, 0.2377]) -Greedy action tensor([-1.2095, -0.3471, 0.3810, 0.2030]) tensor([0.0808, 0.1913, 0.3962, 0.3317]) -Greedy action tensor([-1.5095, 0.4413, 0.3182, 0.1461]) tensor([0.0513, 0.3609, 0.3191, 0.2687]) -Greedy action tensor([-1.8747, -0.3654, 0.6162, -0.1452]) tensor([0.0430, 0.1947, 0.5196, 0.2427]) -Greedy action tensor([-1.7393, -0.4138, 0.5656, -0.0688]) tensor([0.0497, 0.1873, 0.4986, 0.2644]) -Greedy action tensor([-1.9022, -0.3992, 0.6497, -0.1444]) tensor([0.0414, 0.1863, 0.5318, 0.2404]) -Greedy action tensor([-1.8024, -0.3995, 0.6326, -0.0775]) tensor([0.0453, 0.1841, 0.5167, 0.2540]) -Greedy action tensor([-1.9303, -0.3457, 0.6430, -0.1685]) tensor([0.0403, 0.1966, 0.5284, 0.2347]) -Greedy action tensor([-1.9442, -0.4492, 0.6677, -0.1803]) tensor([0.0401, 0.1789, 0.5467, 0.2342]) -Greedy action tensor([-1.7656, -0.4690, 0.4556, -0.4181]) tensor([0.0564, 0.2063, 0.5201, 0.2171]) -Greedy action tensor([-1.9079, -0.4566, 0.6557, -0.1617]) tensor([0.0417, 0.1780, 0.5413, 0.2390]) -Greedy action tensor([-1.5726, -0.4932, 0.5297, -0.0157]) tensor([0.0593, 0.1744, 0.4851, 0.2812]) -Greedy action tensor([-1.1189, 0.8397, 0.1296, 0.2651]) tensor([0.0642, 0.4555, 0.2239, 0.2564]) -Greedy action tensor([-1.8719, -0.2915, 0.6222, -0.1280]) tensor([0.0422, 0.2050, 0.5113, 0.2415]) -Greedy action tensor([-1.6152, 0.3486, 0.3832, 0.0399]) tensor([0.0482, 0.3437, 0.3557, 0.2524]) -Greedy action tensor([-0.9780, 0.5831, -0.2892, -0.1978]) tensor([0.1006, 0.4794, 0.2004, 0.2196]) -Greedy action tensor([-0.7684, 0.2235, 0.1522, -0.0240]) tensor([0.1203, 0.3244, 0.3020, 0.2533]) -Greedy action tensor([-0.9750, -0.7144, 0.5841, 0.2922]) tensor([0.0943, 0.1224, 0.4484, 0.3349]) -Greedy action tensor([-1.8598, -0.4569, 0.6289, -0.1402]) tensor([0.0441, 0.1792, 0.5308, 0.2460]) -Greedy action tensor([-1.8941, -0.4486, 0.6464, -0.1559]) tensor([0.0423, 0.1797, 0.5372, 0.2408]) -Greedy action tensor([-1.8014, -0.4389, 0.5924, -0.1359]) tensor([0.0473, 0.1847, 0.5180, 0.2501]) -Greedy action tensor([-1.5527, -0.4392, 0.4711, -0.0501]) tensor([0.0621, 0.1891, 0.4698, 0.2790]) -Greedy action tensor([-1.6722, -0.5174, 0.5300, -0.0679]) tensor([0.0550, 0.1744, 0.4972, 0.2734]) -Greedy action tensor([-1.9201, -0.4053, 0.6533, -0.1667]) tensor([0.0409, 0.1862, 0.5366, 0.2363]) -Greedy action tensor([-1.7532, -0.3141, 0.5669, -0.1056]) tensor([0.0486, 0.2048, 0.4943, 0.2523]) -Greedy action tensor([-1.9450, -0.4330, 0.6624, -0.1810]) tensor([0.0401, 0.1819, 0.5440, 0.2340]) -Greedy action tensor([-1.8743, -0.4101, 0.6318, -0.1304]) tensor([0.0429, 0.1856, 0.5260, 0.2455]) -Greedy action tensor([-1.7882, -0.3359, -0.3378, -0.7297]) tensor([0.0805, 0.3440, 0.3434, 0.2321]) -Greedy action tensor([-1.7186, -0.5229, 0.9386, 0.2633]) tensor([0.0387, 0.1280, 0.5522, 0.2810]) -Greedy action tensor([-1.7652, -0.4672, 0.6376, -0.0640]) tensor([0.0472, 0.1728, 0.5215, 0.2586]) -Greedy action tensor([-0.5857, 0.9243, 0.0206, 0.1747]) tensor([0.1053, 0.4765, 0.1930, 0.2252]) -Greedy action tensor([-0.4980, -0.2426, 0.2072, -0.1359]) tensor([0.1739, 0.2245, 0.3519, 0.2497]) -Greedy action tensor([-0.4608, -1.2082, 0.1325, -0.6026]) tensor([0.2409, 0.1141, 0.4360, 0.2090]) -Greedy action tensor([ 1.2699, -0.1714, 0.3704, 0.2799]) tensor([0.4963, 0.1174, 0.2019, 0.1844]) -Greedy action tensor([-0.0625, -1.0588, -0.2223, 0.1605]) tensor([0.2881, 0.1064, 0.2455, 0.3600]) -Greedy action tensor([-1.1244, -0.6422, -0.7010, -0.3635]) tensor([0.1591, 0.2576, 0.2429, 0.3404]) -Greedy action tensor([-0.0029, -1.2611, 0.0219, 0.4420]) tensor([0.2584, 0.0734, 0.2649, 0.4032]) -Greedy action tensor([ 0.2982, -0.2399, 1.3046, -0.8366]) tensor([0.2155, 0.1258, 0.5895, 0.0693]) -Greedy action tensor([ 0.3027, 0.1755, -1.0748, -0.1862]) tensor([0.3642, 0.3207, 0.0918, 0.2233]) -Greedy action tensor([-0.9692, 0.1531, 0.4436, -1.5095]) tensor([0.1141, 0.3506, 0.4688, 0.0665]) -Greedy action tensor([ 0.9008, -0.4280, -1.0187, -0.0597]) tensor([0.5574, 0.1476, 0.0817, 0.2133]) -Greedy action tensor([ 0.1366, -0.3709, 0.3260, -0.7485]) tensor([0.3103, 0.1868, 0.3749, 0.1280]) -Greedy action tensor([-0.7981, -0.1115, -1.7848, -0.5595]) tensor([0.2160, 0.4292, 0.0805, 0.2742]) -Greedy action tensor([-0.4608, -0.4116, -0.4995, -0.6793]) tensor([0.2620, 0.2753, 0.2521, 0.2106]) -Greedy action tensor([-0.7761, 0.2044, 0.4744, -1.1044]) tensor([0.1269, 0.3384, 0.4433, 0.0914]) -Greedy action tensor([ 0.0913, 0.0705, -0.1516, 0.0233]) tensor([0.2704, 0.2648, 0.2121, 0.2527]) -Greedy action tensor([-0.5161, -1.4525, 0.4845, -0.1031]) tensor([0.1778, 0.0697, 0.4837, 0.2688]) -Greedy action tensor([-0.2268, -0.9491, -0.4708, -1.1976]) tensor([0.3777, 0.1834, 0.2959, 0.1430]) -Greedy action tensor([-0.0337, -0.0943, 0.0273, -0.8011]) tensor([0.2883, 0.2714, 0.3064, 0.1338]) -Greedy action tensor([ 0.5180, 0.3352, -0.2280, -0.3807]) tensor([0.3684, 0.3069, 0.1747, 0.1500]) -Greedy action tensor([ 0.5061, -0.5112, -0.3153, 0.3709]) tensor([0.3739, 0.1352, 0.1644, 0.3266]) -Greedy action tensor([ 0.6144, -1.4288, -0.1990, -0.4877]) tensor([0.5249, 0.0680, 0.2327, 0.1744]) -Greedy action tensor([-0.8757, -0.7637, 0.2891, -0.7920]) tensor([0.1560, 0.1745, 0.5000, 0.1696]) -Greedy action tensor([0.5309, 0.3785, 0.2398, 0.3337]) tensor([0.2918, 0.2505, 0.2181, 0.2396]) -Greedy action tensor([-0.4994, 0.7271, -0.3598, -0.5621]) tensor([0.1539, 0.5246, 0.1769, 0.1445]) -Greedy action tensor([-0.3598, 0.4191, -0.1039, -0.8453]) tensor([0.1966, 0.4284, 0.2540, 0.1210]) -Greedy action tensor([-0.7866, -0.3876, 0.1131, -0.5121]) tensor([0.1596, 0.2379, 0.3925, 0.2100]) -Greedy action tensor([-0.0082, 0.3170, 0.7688, 0.4093]) tensor([0.1645, 0.2278, 0.3579, 0.2498]) -Greedy action tensor([ 0.0607, -0.2149, -0.4522, -0.4144]) tensor([0.3356, 0.2548, 0.2009, 0.2087]) -Greedy action tensor([ 0.2736, -1.2409, -0.3622, 0.1737]) tensor([0.3767, 0.0829, 0.1995, 0.3409]) -Greedy action tensor([ 0.6480, -0.2805, 1.3425, 0.4690]) tensor([0.2362, 0.0933, 0.4730, 0.1975]) -Greedy action tensor([ 0.6914, 0.6171, -0.3181, -0.7876]) tensor([0.3967, 0.3683, 0.1446, 0.0904]) -Greedy action tensor([ 0.7245, -1.1625, 1.2212, -0.3854]) tensor([0.3201, 0.0485, 0.5259, 0.1055]) -Greedy action tensor([-0.5124, -0.5818, 0.9789, -0.3433]) tensor([0.1323, 0.1234, 0.5877, 0.1566]) -Greedy action tensor([ 0.0188, 0.0317, -0.5458, -0.3953]) tensor([0.3084, 0.3124, 0.1754, 0.2038]) -Greedy action tensor([-0.2645, -2.0185, 0.2384, 0.5654]) tensor([0.1953, 0.0338, 0.3230, 0.4479]) -Greedy action tensor([-0.1491, 0.4438, 0.4512, -0.2582]) tensor([0.1809, 0.3273, 0.3297, 0.1622]) -Greedy action tensor([-0.2959, -0.2943, -0.7810, -0.8069]) tensor([0.3108, 0.3113, 0.1914, 0.1865]) -Greedy action tensor([-1.0058, -0.6171, -0.9731, 0.5910]) tensor([0.1184, 0.1746, 0.1223, 0.5846]) -Greedy action tensor([ 0.4261, -0.7327, 0.2391, -0.2551]) tensor([0.3775, 0.1185, 0.3131, 0.1910]) -Greedy action tensor([ 0.4239, -0.4019, 1.8860, 0.1837]) tensor([0.1529, 0.0670, 0.6598, 0.1203]) -Greedy action tensor([ 0.4295, 0.0827, 0.1238, -0.2229]) tensor([0.3374, 0.2385, 0.2485, 0.1757]) -Greedy action tensor([0.8084, 0.0350, 0.9287, 0.2900]) tensor([0.3140, 0.1449, 0.3541, 0.1870]) -Greedy action tensor([ 0.4461, -0.0552, 0.8234, 0.3089]) tensor([0.2541, 0.1539, 0.3705, 0.2215]) -Greedy action tensor([ 0.3016, -1.4389, -0.4521, -0.2218]) tensor([0.4467, 0.0784, 0.2102, 0.2647]) -Greedy action tensor([ 0.0279, 0.4247, -0.1519, -0.7363]) tensor([0.2640, 0.3926, 0.2205, 0.1229]) -Greedy action tensor([-0.1495, -0.8250, -1.0114, -0.3259]) tensor([0.3611, 0.1838, 0.1525, 0.3027]) -Greedy action tensor([ 0.3866, -1.6129, 1.0987, 0.1408]) tensor([0.2528, 0.0342, 0.5153, 0.1977]) -Greedy action tensor([ 0.9347, -1.1446, -0.2343, 0.2898]) tensor([0.5101, 0.0638, 0.1585, 0.2677]) -Greedy action tensor([-0.0201, -0.2980, 0.2253, -0.7341]) tensor([0.2837, 0.2148, 0.3626, 0.1389]) -Greedy action tensor([ 0.8789, -0.7756, 0.0839, 0.8450]) tensor([0.3832, 0.0733, 0.1731, 0.3705]) -Greedy action tensor([ 0.3914, -1.4875, -0.1525, 0.1903]) tensor([0.3920, 0.0599, 0.2276, 0.3206]) -Greedy action tensor([ 0.2740, -1.7776, 0.7357, -0.5067]) tensor([0.3151, 0.0405, 0.5000, 0.1443]) -Greedy action tensor([ 0.7771, 0.0329, 0.3704, -0.2195]) tensor([0.3984, 0.1893, 0.2653, 0.1471]) -Greedy action tensor([ 0.2481, -1.2428, 0.4487, -0.8958]) tensor([0.3615, 0.0814, 0.4419, 0.1152]) -Greedy action tensor([-0.4431, 0.1462, 0.2022, -0.8417]) tensor([0.1859, 0.3351, 0.3543, 0.1248]) -Greedy action tensor([-0.6898, -0.1256, -0.6189, -0.4597]) tensor([0.1965, 0.3454, 0.2109, 0.2473]) -Greedy action tensor([ 0.1319, 0.2127, 1.1146, -0.1405]) tensor([0.1812, 0.1965, 0.4842, 0.1380]) -Greedy action tensor([-1.1850, -0.5421, -0.8567, 0.2686]) tensor([0.1167, 0.2220, 0.1621, 0.4993]) -Greedy action tensor([-0.2358, -1.8847, 0.7272, 0.0650]) tensor([0.1937, 0.0372, 0.5074, 0.2617]) -Greedy action tensor([ 1.8949, -1.6730, 1.8546, -0.2960]) tensor([0.4761, 0.0134, 0.4573, 0.0532]) -Greedy action tensor([-0.4001, -0.1486, 0.0153, -0.5362]) tensor([0.2140, 0.2752, 0.3241, 0.1867]) -Greedy action tensor([ 0.4456, 0.3859, 0.6380, -0.6236]) tensor([0.2859, 0.2694, 0.3466, 0.0981]) -Greedy action tensor([ 0.2746, -0.9008, 0.3386, -0.2948]) tensor([0.3401, 0.1050, 0.3625, 0.1924]) -Greedy action tensor([ 0.8550, -0.2086, 0.5155, 0.3055]) tensor([0.3796, 0.1310, 0.2703, 0.2191]) -Greedy action tensor([-0.1649, 0.3686, -0.6189, -0.2905]) tensor([0.2369, 0.4038, 0.1504, 0.2089]) -Greedy action tensor([-0.7949, -0.6133, 0.7061, -1.3584]) tensor([0.1378, 0.1653, 0.6184, 0.0785]) -Greedy action tensor([ 0.1629, -0.6286, -0.5485, -0.6284]) tensor([0.4171, 0.1890, 0.2048, 0.1891]) -Greedy action tensor([-0.4793, 0.3624, -0.7304, -0.6921]) tensor([0.2038, 0.4729, 0.1585, 0.1647]) -Greedy action tensor([ 0.2034, 0.3890, 0.6592, -0.9121]) tensor([0.2434, 0.2930, 0.3839, 0.0798]) -Greedy action tensor([ 0.7951, -0.3102, 1.5803, -0.0942]) tensor([0.2541, 0.0841, 0.5573, 0.1044]) -Greedy action tensor([ 0.8406, -1.2729, -0.2343, 0.8573]) tensor([0.4034, 0.0487, 0.1377, 0.4102]) -Greedy action tensor([-0.7364, -0.6686, -1.3209, 0.1802]) tensor([0.1950, 0.2087, 0.1087, 0.4876]) -Greedy action tensor([-0.2048, -0.2746, -0.8729, 0.2126]) tensor([0.2523, 0.2353, 0.1294, 0.3830]) -Greedy action tensor([ 0.7431, -0.0635, 0.0916, 0.3535]) tensor([0.3781, 0.1688, 0.1971, 0.2561]) -Greedy action tensor([ 0.5036, -0.6899, -0.0601, -0.6132]) tensor([0.4546, 0.1378, 0.2587, 0.1488]) -Greedy action tensor([ 0.6489, -0.2492, 0.3285, 0.1727]) tensor([0.3631, 0.1479, 0.2635, 0.2255]) -Greedy action tensor([-0.4127, -0.4767, 0.3020, -0.3062]) tensor([0.1963, 0.1841, 0.4012, 0.2184]) -Greedy action tensor([-0.8127, 1.4725, 0.5236, -0.5226]) tensor([0.0626, 0.6154, 0.2383, 0.0837]) -Greedy action tensor([-0.4675, -0.3941, -0.7280, -0.0881]) tensor([0.2321, 0.2498, 0.1789, 0.3392]) -Greedy action tensor([-0.0972, -0.9327, -0.5442, 0.0457]) tensor([0.3099, 0.1344, 0.1982, 0.3575]) -Greedy action tensor([-0.8512, 0.5122, 0.2024, -0.4009]) tensor([0.1070, 0.4183, 0.3069, 0.1678]) -Greedy action tensor([ 0.9669, -1.4501, 0.1304, 1.1025]) tensor([0.3749, 0.0334, 0.1624, 0.4293]) -Greedy action tensor([ 0.5440, -0.3004, -0.0205, -0.2071]) tensor([0.4048, 0.1740, 0.2302, 0.1910]) -Greedy action tensor([ 0.7059, -0.7239, -0.1053, -0.2960]) tensor([0.4876, 0.1167, 0.2166, 0.1790]) -Greedy action tensor([ 1.0279, -0.5693, -0.0437, -0.4508]) tensor([0.5641, 0.1142, 0.1932, 0.1286]) -Greedy action tensor([ 0.8939, -0.3849, -0.0921, -0.1543]) tensor([0.4995, 0.1390, 0.1863, 0.1751]) -Greedy action tensor([ 0.9982, -0.6829, 0.0104, -0.5709]) tensor([0.5660, 0.1054, 0.2108, 0.1179]) -Greedy action tensor([ 1.0268, -0.3258, -0.0993, -0.4490]) tensor([0.5520, 0.1427, 0.1790, 0.1262]) -Greedy action tensor([ 0.7826, -0.9219, 0.0019, -0.4578]) tensor([0.5183, 0.0943, 0.2374, 0.1499]) -Greedy action tensor([ 0.7976, -0.2476, 0.0436, -0.0109]) tensor([0.4410, 0.1551, 0.2075, 0.1965]) -Greedy action tensor([ 0.6763, -0.1799, -0.0445, -0.2021]) tensor([0.4298, 0.1826, 0.2091, 0.1786]) -Greedy action tensor([ 0.4192, 0.0973, -0.2069, -0.0311]) tensor([0.3452, 0.2502, 0.1846, 0.2200]) -Greedy action tensor([ 0.6620, 0.2154, -0.2317, -0.2262]) tensor([0.4064, 0.2601, 0.1663, 0.1672]) -Greedy action tensor([ 0.7650, -0.3165, -0.1093, -0.0217]) tensor([0.4522, 0.1533, 0.1886, 0.2059]) -Greedy action tensor([ 0.8720, -0.6880, 0.0882, -0.4639]) tensor([0.5182, 0.1089, 0.2366, 0.1362]) -Greedy action tensor([ 0.6106, -0.1568, -0.0238, -0.1351]) tensor([0.4051, 0.1880, 0.2148, 0.1922]) -Greedy action tensor([ 0.7819, -0.3721, -0.0393, -0.1185]) tensor([0.4626, 0.1459, 0.2035, 0.1880]) -Greedy action tensor([ 0.4812, -0.1690, 0.1342, -0.2827]) tensor([0.3711, 0.1937, 0.2623, 0.1729]) -Greedy action tensor([ 0.7195, -0.0913, -0.0537, -0.0086]) tensor([0.4186, 0.1861, 0.1932, 0.2021]) -Greedy action tensor([ 1.0002, -0.7940, 0.1060, -0.5928]) tensor([0.5623, 0.0935, 0.2299, 0.1143]) -Greedy action tensor([ 0.6816, 0.0059, -0.0233, -0.1356]) tensor([0.4091, 0.2081, 0.2021, 0.1807]) -Greedy action tensor([ 0.4267, -0.3329, -0.1880, -0.1145]) tensor([0.3860, 0.1806, 0.2087, 0.2247]) -Greedy action tensor([ 0.7927, -0.0979, -0.0946, -0.1147]) tensor([0.4493, 0.1844, 0.1850, 0.1813]) -Greedy action tensor([ 0.4836, 0.2235, -0.2451, 0.1433]) tensor([0.3373, 0.2600, 0.1627, 0.2400]) -Greedy action tensor([ 0.7580, -0.5420, -0.0869, -0.3732]) tensor([0.4939, 0.1346, 0.2122, 0.1593]) -Greedy action tensor([ 0.7239, 0.0099, 0.0632, -0.0317]) tensor([0.4039, 0.1978, 0.2086, 0.1897]) -Greedy action tensor([ 1.8149, -1.1353, -0.1369, -0.6795]) tensor([0.7832, 0.0410, 0.1112, 0.0646]) -Greedy action tensor([ 0.9343, -0.5390, -0.0301, -0.3745]) tensor([0.5318, 0.1219, 0.2027, 0.1436]) -Greedy action tensor([ 1.0421, -0.5431, -0.0249, -0.4186]) tensor([0.5615, 0.1150, 0.1932, 0.1303]) -Greedy action tensor([ 0.6665, -0.5555, 0.0027, -0.2872]) tensor([0.4556, 0.1342, 0.2346, 0.1756]) -Greedy action tensor([ 0.3363, -0.0262, 0.0118, -0.0811]) tensor([0.3249, 0.2261, 0.2349, 0.2140]) -Greedy action tensor([ 0.5878, 0.3911, -0.1840, -0.0043]) tensor([0.3525, 0.2896, 0.1629, 0.1950]) -Greedy action tensor([ 0.8022, -0.2725, -0.0773, -0.1887]) tensor([0.4700, 0.1605, 0.1950, 0.1745]) -Greedy action tensor([ 0.6512, -0.3280, -0.0299, -0.1983]) tensor([0.4330, 0.1627, 0.2191, 0.1852]) -Greedy action tensor([ 0.3007, 0.1472, -0.1161, -0.1617]) tensor([0.3178, 0.2726, 0.2095, 0.2001]) -Greedy action tensor([ 0.3570, -0.3674, -0.1975, -0.0374]) tensor([0.3659, 0.1773, 0.2102, 0.2466]) -Greedy action tensor([ 0.6606, -0.4898, -0.1681, -0.3315]) tensor([0.4708, 0.1490, 0.2056, 0.1746]) -Greedy action tensor([ 0.8553, -0.5742, -0.0566, -0.4944]) tensor([0.5262, 0.1260, 0.2114, 0.1365]) -Greedy action tensor([ 1.0740, -0.6659, 0.0871, -0.5595]) tensor([0.5736, 0.1007, 0.2138, 0.1120]) -Greedy action tensor([ 1.2124, -1.1418, 0.0829, -0.5961]) tensor([0.6321, 0.0600, 0.2043, 0.1036]) -Greedy action tensor([ 0.6287, -0.6307, -0.0154, -0.2776]) tensor([0.4519, 0.1283, 0.2373, 0.1826]) -Greedy action tensor([ 0.6542, -0.2372, 0.0103, -0.0379]) tensor([0.4105, 0.1684, 0.2156, 0.2055]) -Greedy action tensor([ 0.4262, -0.3768, 0.0635, -0.4431]) tensor([0.3902, 0.1748, 0.2715, 0.1636]) -Greedy action tensor([ 0.8930, -0.2533, 0.0132, -0.0881]) tensor([0.4745, 0.1508, 0.1968, 0.1779]) -Greedy action tensor([ 0.5908, -0.2532, -0.0113, -0.0943]) tensor([0.4029, 0.1733, 0.2207, 0.2031]) -Greedy action tensor([ 0.4456, -0.1440, 0.0044, -0.0307]) tensor([0.3548, 0.1967, 0.2282, 0.2203]) -Greedy action tensor([ 0.7565, -0.7470, -0.0403, -0.3850]) tensor([0.5019, 0.1116, 0.2262, 0.1603]) -Greedy action tensor([ 0.4018, -0.0938, 0.0066, -0.0234]) tensor([0.3405, 0.2075, 0.2294, 0.2226]) -Greedy action tensor([ 0.6575, -0.3853, -0.0568, -0.2522]) tensor([0.4455, 0.1570, 0.2181, 0.1794]) -Greedy action tensor([ 0.8784, -0.5550, -0.1401, -0.4602]) tensor([0.5371, 0.1281, 0.1940, 0.1408]) -Greedy action tensor([ 1.0474, -0.6230, 0.0126, -0.3635]) tensor([0.5595, 0.1053, 0.1988, 0.1365]) -Greedy action tensor([ 1.2233, -0.2928, 0.0216, -0.2327]) tensor([0.5703, 0.1252, 0.1715, 0.1330]) -Greedy action tensor([ 1.0607, -0.4595, -0.0610, -0.4116]) tensor([0.5638, 0.1233, 0.1836, 0.1293]) -Greedy action tensor([ 0.9745, -0.6271, -0.0831, -0.3458]) tensor([0.5507, 0.1110, 0.1912, 0.1471]) -Greedy action tensor([ 0.9622, -0.6421, -0.0648, -0.5039]) tensor([0.5587, 0.1123, 0.2001, 0.1290]) -Greedy action tensor([ 1.0224, -0.5723, -0.1082, -0.3382]) tensor([0.5611, 0.1139, 0.1811, 0.1439]) -Greedy action tensor([ 0.4913, -0.2047, -0.1302, -0.3483]) tensor([0.4052, 0.2021, 0.2177, 0.1750]) -Greedy action tensor([ 0.4149, 0.2148, -0.0916, 0.0203]) tensor([0.3231, 0.2645, 0.1947, 0.2177]) -Greedy action tensor([ 1.0774, -0.6801, 0.0157, -0.4580]) tensor([0.5768, 0.0995, 0.1995, 0.1242]) -Greedy action tensor([ 0.8780, -0.8035, 0.1180, -0.4294]) tensor([0.5197, 0.0967, 0.2430, 0.1406]) -Greedy action tensor([ 0.5074, -0.5323, -0.1627, -0.0771]) tensor([0.4128, 0.1459, 0.2112, 0.2301]) -Greedy action tensor([ 0.8161, -0.2821, 0.0624, -0.0514]) tensor([0.4496, 0.1499, 0.2116, 0.1888]) -Greedy action tensor([ 0.9752, -0.5510, -0.0880, -0.3114]) tensor([0.5438, 0.1182, 0.1878, 0.1502]) -Greedy action tensor([ 1.1404, -0.8314, 0.0594, -0.6710]) tensor([0.6091, 0.0848, 0.2066, 0.0995]) -Greedy action tensor([ 1.0850, -0.9956, 0.0180, -0.5389]) tensor([0.6002, 0.0749, 0.2065, 0.1183]) -Greedy action tensor([ 0.8002, -0.5592, -0.0857, -0.4831]) tensor([0.5138, 0.1320, 0.2119, 0.1424]) -Greedy action tensor([ 0.5873, -0.2409, 0.0360, -0.1274]) tensor([0.3996, 0.1746, 0.2303, 0.1955]) -Greedy action tensor([ 0.1457, 0.3651, -0.0482, -0.0944]) tensor([0.2594, 0.3230, 0.2136, 0.2040]) -Greedy action tensor([ 0.9421, -0.8944, -0.1654, -0.3767]) tensor([0.5691, 0.0907, 0.1880, 0.1522]) -Greedy action tensor([ 0.7679, -0.5441, 0.0130, -0.2930]) tensor([0.4795, 0.1291, 0.2254, 0.1660]) -Greedy action tensor([ 1.1756, -0.3169, -0.0313, -0.1704]) tensor([0.5605, 0.1260, 0.1677, 0.1459]) -Greedy action tensor([ 0.6318, -0.3724, -0.0574, -0.1072]) tensor([0.4263, 0.1562, 0.2140, 0.2036]) -Greedy action tensor([ 0.7742, -0.1732, -0.0164, -0.0345]) tensor([0.4373, 0.1696, 0.1984, 0.1948]) -Greedy action tensor([ 0.6199, -0.4797, -0.1524, -0.2019]) tensor([0.4475, 0.1490, 0.2067, 0.1967]) -Greedy action tensor([ 0.8300, -0.4505, -0.0072, -0.1097]) tensor([0.4758, 0.1322, 0.2060, 0.1859]) -Greedy action tensor([ 1.1399, -0.9509, 0.1326, -0.6546]) tensor([0.6042, 0.0747, 0.2207, 0.1004]) -Greedy action tensor([ 0.5999, -0.5804, -0.1504, -0.2267]) tensor([0.4511, 0.1386, 0.2130, 0.1974]) -Greedy action tensor([ 0.9836, -0.5048, 0.1131, -0.4990]) tensor([0.5343, 0.1206, 0.2237, 0.1213]) -Greedy action tensor([ 1.0959, -0.4995, 0.1906, -0.4396]) tensor([0.5487, 0.1113, 0.2219, 0.1182]) -Greedy action tensor([ 0.7232, 0.0231, -0.0138, 0.0734]) tensor([0.4004, 0.1988, 0.1916, 0.2091]) -Greedy action tensor([ 1.0565, -0.1944, 0.0915, -0.1650]) tensor([0.5097, 0.1459, 0.1942, 0.1502]) -Greedy action tensor([ 0.6321, -0.2772, -0.0391, -0.0821]) tensor([0.4161, 0.1676, 0.2126, 0.2037]) -Greedy action tensor([ 0.3218, -0.1574, -0.0781, -0.1237]) tensor([0.3413, 0.2114, 0.2288, 0.2186]) -Greedy action tensor([ 1.8364, -0.1964, 0.1032, 0.1655]) tensor([0.6686, 0.0876, 0.1181, 0.1257]) -Greedy action tensor([ 1.4595, -0.3740, -0.5850, 0.1367]) tensor([0.6428, 0.1028, 0.0832, 0.1712]) -Greedy action tensor([ 1.4642, -0.2349, -1.1122, 0.3912]) tensor([0.6247, 0.1142, 0.0475, 0.2136]) -Greedy action tensor([ 1.3753, -0.3330, -0.4409, 0.2594]) tensor([0.5983, 0.1084, 0.0973, 0.1960]) -Greedy action tensor([ 0.9277, -0.0446, -0.1614, 0.0135]) tensor([0.4727, 0.1788, 0.1591, 0.1895]) -Greedy action tensor([ 1.2667, -0.2020, -1.2391, 0.0980]) tensor([0.6163, 0.1419, 0.0503, 0.1915]) -Greedy action tensor([ 1.4885, -0.5736, -0.3428, 0.4851]) tensor([0.6046, 0.0769, 0.0969, 0.2217]) -Greedy action tensor([ 2.2876, -1.1230, 0.1412, 1.0524]) tensor([0.6941, 0.0229, 0.0811, 0.2018]) -Greedy action tensor([ 1.8434, -0.9573, -0.1707, 0.1913]) tensor([0.7216, 0.0438, 0.0963, 0.1383]) -Greedy action tensor([ 1.1795, -0.5112, -0.4459, 0.3954]) tensor([0.5441, 0.1003, 0.1071, 0.2484]) -Greedy action tensor([ 1.8380, -0.6525, -0.3117, 0.5379]) tensor([0.6794, 0.0563, 0.0792, 0.1851]) -Greedy action tensor([ 1.9553, -0.6177, -0.3272, 0.4207]) tensor([0.7174, 0.0547, 0.0732, 0.1546]) -Greedy action tensor([ 1.4256, -0.4065, -0.8021, -0.0262]) tensor([0.6658, 0.1066, 0.0718, 0.1559]) -Greedy action tensor([ 1.6495, -0.9636, -0.0038, 0.6054]) tensor([0.6185, 0.0453, 0.1184, 0.2177]) -Greedy action tensor([ 1.1870, -0.5309, -0.4339, 0.2067]) tensor([0.5707, 0.1024, 0.1128, 0.2141]) -Greedy action tensor([ 1.8446, -0.3130, -0.1415, 0.0237]) tensor([0.7069, 0.0817, 0.0970, 0.1144]) -Greedy action tensor([ 1.4907, -0.5184, -0.7706, 0.0047]) tensor([0.6828, 0.0916, 0.0712, 0.1545]) -Greedy action tensor([2.1447, 0.4563, 0.1423, 0.3819]) tensor([0.6705, 0.1239, 0.0905, 0.1150]) -Greedy action tensor([ 1.2888, -0.2900, -0.2355, 0.0585]) tensor([0.5827, 0.1202, 0.1269, 0.1703]) -Greedy action tensor([ 1.7341, -0.4597, -0.3552, 0.6179]) tensor([0.6399, 0.0713, 0.0792, 0.2096]) -Greedy action tensor([ 1.8425, -0.1976, -0.4584, 1.1081]) tensor([0.5848, 0.0760, 0.0586, 0.2806]) -Greedy action tensor([ 1.7266, -0.3881, -0.4307, 0.6298]) tensor([0.6368, 0.0768, 0.0736, 0.2127]) -Greedy action tensor([ 1.5662, -0.6024, -0.8192, 0.0851]) tensor([0.6974, 0.0797, 0.0642, 0.1586]) -Greedy action tensor([ 1.8653, -0.3230, -0.1457, 0.3953]) tensor([0.6776, 0.0760, 0.0907, 0.1558]) -Greedy action tensor([ 2.5019, -0.4700, -0.7922, 0.6052]) tensor([0.8075, 0.0413, 0.0300, 0.1212]) -Greedy action tensor([ 2.0274, -0.9689, -0.0763, 0.3481]) tensor([0.7361, 0.0368, 0.0898, 0.1373]) -Greedy action tensor([ 1.8956, -0.2500, -0.3078, 0.0858]) tensor([0.7188, 0.0841, 0.0794, 0.1177]) -Greedy action tensor([ 2.5243, -1.4343, -0.1431, 1.2285]) tensor([0.7341, 0.0140, 0.0510, 0.2009]) -Greedy action tensor([ 1.2343, -0.4254, -0.6595, 0.3007]) tensor([0.5768, 0.1097, 0.0868, 0.2267]) -Greedy action tensor([ 1.9815, -1.1946, -0.0262, 0.8591]) tensor([0.6660, 0.0278, 0.0894, 0.2168]) -Greedy action tensor([ 1.5256, -0.2967, -0.5557, 0.1530]) tensor([0.6494, 0.1050, 0.0810, 0.1646]) -Greedy action tensor([ 1.4932, -0.5315, -0.7837, 0.1580]) tensor([0.6677, 0.0881, 0.0685, 0.1757]) -Greedy action tensor([ 1.1513, -0.1800, -0.3482, 0.1697]) tensor([0.5370, 0.1418, 0.1199, 0.2012]) -Greedy action tensor([ 1.9828, -0.1077, -0.1683, 0.2175]) tensor([0.7087, 0.0876, 0.0825, 0.1213]) -Greedy action tensor([ 1.8829, 0.6455, -0.3301, 0.1477]) tensor([0.6346, 0.1841, 0.0694, 0.1119]) -Greedy action tensor([ 2.1029, -0.9573, -0.2225, 0.6512]) tensor([0.7253, 0.0340, 0.0709, 0.1698]) -Greedy action tensor([ 1.7692, -0.7224, 0.1333, -0.1264]) tensor([0.7004, 0.0580, 0.1364, 0.1052]) -Greedy action tensor([ 1.6971, -0.6377, -0.6928, 0.2933]) tensor([0.6973, 0.0675, 0.0639, 0.1713]) -Greedy action tensor([ 1.9657, -0.5024, -0.4823, 0.3302]) tensor([0.7320, 0.0620, 0.0633, 0.1426]) -Greedy action tensor([ 1.6477, -0.7847, -0.3077, 0.7888]) tensor([0.6050, 0.0531, 0.0856, 0.2563]) -Greedy action tensor([ 1.2624, -0.7414, 0.1357, 0.8605]) tensor([0.4699, 0.0634, 0.1523, 0.3144]) -Greedy action tensor([ 1.0347, -0.5355, -0.3286, 0.2917]) tensor([0.5156, 0.1072, 0.1319, 0.2453]) -Greedy action tensor([ 1.4202, -0.4375, -1.2191, 0.2834]) tensor([0.6459, 0.1008, 0.0461, 0.2072]) -Greedy action tensor([ 1.2618, -0.2810, -0.9372, 0.5420]) tensor([0.5520, 0.1180, 0.0612, 0.2687]) -Greedy action tensor([ 1.8831, -0.2668, -0.2624, 0.2335]) tensor([0.7014, 0.0817, 0.0821, 0.1348]) -Greedy action tensor([ 1.6626, -0.9847, -0.1794, 0.0388]) tensor([0.7010, 0.0497, 0.1111, 0.1382]) -Greedy action tensor([ 1.7531, -0.5481, -0.1589, 0.6612]) tensor([0.6315, 0.0632, 0.0933, 0.2119]) -Greedy action tensor([ 2.3500, -0.6560, -0.0685, 0.4039]) tensor([0.7804, 0.0386, 0.0695, 0.1115]) -Greedy action tensor([ 2.0190, -0.4530, -0.4466, 0.3653]) tensor([0.7349, 0.0620, 0.0624, 0.1406]) -Greedy action tensor([ 1.2355, -0.1346, -1.1397, 0.8465]) tensor([0.4939, 0.1255, 0.0459, 0.3347]) -Greedy action tensor([ 1.9547, -0.5278, -0.5741, 0.6950]) tensor([0.6911, 0.0577, 0.0551, 0.1961]) -Greedy action tensor([ 1.9764, 0.1819, -0.0686, -0.0046]) tensor([0.6976, 0.1159, 0.0903, 0.0962]) -Greedy action tensor([ 1.8079, -0.8198, 0.0749, 0.6091]) tensor([0.6449, 0.0466, 0.1140, 0.1945]) -Greedy action tensor([ 1.7243, -0.7279, -0.5347, 0.0797]) tensor([0.7227, 0.0622, 0.0755, 0.1396]) -Greedy action tensor([ 1.4889, 0.0029, -0.5702, 0.0296]) tensor([0.6304, 0.1426, 0.0804, 0.1465]) -Greedy action tensor([ 1.1603e+00, -2.0421e-04, -1.6882e-01, 2.5932e-01]) tensor([0.5040, 0.1579, 0.1334, 0.2047]) -Greedy action tensor([ 1.9246, -0.3611, -0.5048, 0.3955]) tensor([0.7110, 0.0723, 0.0626, 0.1541]) -Greedy action tensor([ 1.1880, -0.4620, -0.8429, 0.1226]) tensor([0.5996, 0.1151, 0.0787, 0.2066]) -Greedy action tensor([ 0.8273, -0.6651, -0.2901, 0.1388]) tensor([0.4868, 0.1094, 0.1592, 0.2445]) -Greedy action tensor([ 1.7064, -0.3977, -0.9152, 0.2515]) tensor([0.7002, 0.0854, 0.0509, 0.1635]) -Greedy action tensor([ 1.4037, -0.6245, -0.2511, 0.2679]) tensor([0.6083, 0.0800, 0.1163, 0.1954]) -Greedy action tensor([ 1.6276, 0.3371, -0.7093, 0.5519]) tensor([0.5838, 0.1606, 0.0564, 0.1991]) -Greedy action tensor([ 1.8444, -0.2789, -0.8338, 0.2187]) tensor([0.7220, 0.0864, 0.0496, 0.1421]) -Greedy action tensor([ 1.0685, -0.4594, -0.1729, 0.0371]) tensor([0.5369, 0.1165, 0.1551, 0.1914]) -Greedy action tensor([ 1.6211, -0.5848, -0.4245, 0.3436]) tensor([0.6587, 0.0725, 0.0852, 0.1836]) -Greedy action tensor([ 1.3841, 0.1968, -0.2979, -0.2829]) tensor([0.5953, 0.1816, 0.1107, 0.1124]) -Greedy action tensor([ 1.4297, -0.8400, -0.2738, 0.4306]) tensor([0.6047, 0.0625, 0.1101, 0.2227]) -Greedy action tensor([ 1.5956, -0.7788, -0.4756, 0.1031]) tensor([0.6926, 0.0645, 0.0873, 0.1557]) -Greedy action tensor([ 1.3736, -0.5055, -0.6982, 0.4699]) tensor([0.5939, 0.0907, 0.0748, 0.2406]) -Greedy action tensor([ 2.9701, -0.9698, 0.1867, 0.8910]) tensor([0.8290, 0.0161, 0.0513, 0.1037]) -Greedy action tensor([ 1.9043, -0.8511, -0.0987, 0.2452]) tensor([0.7200, 0.0458, 0.0972, 0.1370]) -Greedy action tensor([ 2.3416, -0.9893, -0.2198, 0.2644]) tensor([0.8076, 0.0289, 0.0623, 0.1012]) -Greedy action tensor([ 1.2992, -0.5024, -0.6522, 0.1277]) tensor([0.6184, 0.1021, 0.0879, 0.1917]) -Greedy action tensor([ 0.9945, -0.0442, -0.0777, -0.1486]) tensor([0.4963, 0.1756, 0.1699, 0.1582]) -Greedy action tensor([ 1.2643, -0.4195, -0.6149, 0.5584]) tensor([0.5458, 0.1013, 0.0834, 0.2695]) -Greedy action tensor([ 1.4076, -0.4909, -0.0468, 0.0634]) tensor([0.6082, 0.0911, 0.1421, 0.1586]) -Greedy action tensor([ 1.7174, -0.6820, -0.4125, 0.7412]) tensor([0.6304, 0.0572, 0.0749, 0.2375]) -Greedy action tensor([ 1.4083, -0.4405, -0.6284, 0.5550]) tensor([0.5835, 0.0918, 0.0761, 0.2486]) -Greedy action tensor([ 1.5862, -0.2895, -0.8970, 0.2382]) tensor([0.6682, 0.1024, 0.0558, 0.1736]) -Greedy action tensor([ 1.1328, -0.3210, -0.5669, 0.1897]) tensor([0.5538, 0.1294, 0.1012, 0.2156]) -Greedy action tensor([ 1.0439, -0.5626, -0.0136, 0.1965]) tensor([0.5060, 0.1015, 0.1757, 0.2168]) -Greedy action tensor([-1.9321, -0.4291, 0.6589, -0.1713]) tensor([0.0406, 0.1823, 0.5412, 0.2359]) -Greedy action tensor([-1.9142, -0.4330, 0.6395, -0.1626]) tensor([0.0416, 0.1831, 0.5352, 0.2400]) -Greedy action tensor([-1.6525, -0.4707, 0.5603, 0.0850]) tensor([0.0524, 0.1708, 0.4790, 0.2978]) -Greedy action tensor([-1.0271, 0.7431, 0.0393, 0.2874]) tensor([0.0741, 0.4350, 0.2152, 0.2758]) -Greedy action tensor([-1.3921, -0.5933, 0.4132, 0.0879]) tensor([0.0730, 0.1623, 0.4440, 0.3207]) -Greedy action tensor([-0.5639, 1.0537, 0.0297, 0.3929]) tensor([0.0957, 0.4822, 0.1732, 0.2490]) -Greedy action tensor([-1.5951, -0.4620, 0.4972, -0.0118]) tensor([0.0585, 0.1818, 0.4745, 0.2852]) -Greedy action tensor([-1.8562, -0.4111, 0.6188, -0.1432]) tensor([0.0441, 0.1871, 0.5241, 0.2446]) -Greedy action tensor([-1.9481, -0.4497, 0.6667, -0.1829]) tensor([0.0400, 0.1791, 0.5470, 0.2339]) -Greedy action tensor([-1.9261, -0.4389, 0.6594, -0.1695]) tensor([0.0408, 0.1807, 0.5419, 0.2366]) -Greedy action tensor([-1.0478, 0.0434, 0.1715, -0.0249]) tensor([0.0986, 0.2936, 0.3337, 0.2742]) -Greedy action tensor([-1.6508, -0.3356, 0.5165, -0.0060]) tensor([0.0536, 0.1999, 0.4686, 0.2779]) -Greedy action tensor([-1.8690, -0.3587, 0.6444, -0.1420]) tensor([0.0426, 0.1927, 0.5254, 0.2393]) -Greedy action tensor([-1.8931, -0.3403, 0.6314, -0.1463]) tensor([0.0418, 0.1973, 0.5214, 0.2396]) -Greedy action tensor([-1.7147, -0.3903, 0.6406, -0.0508]) tensor([0.0486, 0.1827, 0.5122, 0.2565]) -Greedy action tensor([-0.9433, -0.5873, 0.2490, -0.0065]) tensor([0.1209, 0.1725, 0.3982, 0.3084]) -Greedy action tensor([-1.9206, -0.4473, 0.6585, -0.1675]) tensor([0.0411, 0.1794, 0.5421, 0.2373]) -Greedy action tensor([-1.2852, 0.6944, 0.1919, 0.1902]) tensor([0.0589, 0.4260, 0.2578, 0.2573]) -Greedy action tensor([-1.7043, -0.0625, 0.4836, -0.0571]) tensor([0.0493, 0.2547, 0.4398, 0.2561]) -Greedy action tensor([-1.1826, -0.0757, 0.5741, 0.2659]) tensor([0.0710, 0.2149, 0.4116, 0.3024]) -Greedy action tensor([-1.9209, -0.4207, 0.6527, -0.1667]) tensor([0.0410, 0.1839, 0.5380, 0.2371]) -Greedy action tensor([-1.9396, -0.4496, 0.6634, -0.1774]) tensor([0.0404, 0.1792, 0.5453, 0.2352]) -Greedy action tensor([-1.8918, -0.4151, 0.6415, -0.1505]) tensor([0.0422, 0.1849, 0.5319, 0.2409]) -Greedy action tensor([-1.8076, -0.2516, 0.5763, -0.1319]) tensor([0.0456, 0.2161, 0.4946, 0.2436]) -Greedy action tensor([-1.8641, -0.4822, 0.6329, -0.1247]) tensor([0.0438, 0.1745, 0.5322, 0.2495]) -Greedy action tensor([-0.4916, 1.0260, 0.0448, 0.2627]) tensor([0.1064, 0.4854, 0.1820, 0.2263]) -Greedy action tensor([-1.9394, -0.4084, 0.6571, -0.1770]) tensor([0.0402, 0.1859, 0.5396, 0.2343]) -Greedy action tensor([-1.7965, -0.4177, 0.6315, -0.0747]) tensor([0.0457, 0.1813, 0.5176, 0.2555]) -Greedy action tensor([-1.7365, -0.2982, 0.6128, -0.0596]) tensor([0.0475, 0.2003, 0.4980, 0.2542]) -Greedy action tensor([-0.9316, 0.8530, 0.1367, 0.0982]) tensor([0.0789, 0.4703, 0.2297, 0.2211]) -Greedy action tensor([-1.8113, -0.3433, 0.6117, -0.0886]) tensor([0.0450, 0.1953, 0.5076, 0.2520]) -Greedy action tensor([-1.7530, -0.0919, 0.6352, -0.3359]) tensor([0.0470, 0.2474, 0.5118, 0.1938]) -Greedy action tensor([-1.7672, -0.0045, 0.5168, -0.0406]) tensor([0.0449, 0.2618, 0.4408, 0.2525]) -Greedy action tensor([-1.5990, 0.3792, 0.3657, 0.0361]) tensor([0.0488, 0.3528, 0.3481, 0.2503]) -Greedy action tensor([-1.9227, -0.4351, 0.6565, -0.1682]) tensor([0.0410, 0.1814, 0.5406, 0.2370]) -Greedy action tensor([-1.6810, -0.4809, 0.5378, -0.0742]) tensor([0.0540, 0.1795, 0.4970, 0.2695]) -Greedy action tensor([-1.8379, -0.2857, 0.5858, -0.1178]) tensor([0.0443, 0.2090, 0.4996, 0.2472]) -Greedy action tensor([-1.9289, -0.4357, 0.6605, -0.1713]) tensor([0.0407, 0.1812, 0.5422, 0.2360]) -Greedy action tensor([-1.9237, -0.4694, 0.6936, -0.1594]) tensor([0.0403, 0.1725, 0.5520, 0.2352]) -Greedy action tensor([-0.6680, -0.4361, 0.1871, 0.2223]) tensor([0.1419, 0.1789, 0.3336, 0.3456]) -Greedy action tensor([-1.9378, -0.4396, 0.6642, -0.1758]) tensor([0.0403, 0.1805, 0.5442, 0.2349]) -Greedy action tensor([-1.8993, -0.3709, 0.6463, -0.1474]) tensor([0.0414, 0.1911, 0.5285, 0.2390]) -Greedy action tensor([-1.9305, -0.4364, 0.6603, -0.1724]) tensor([0.0407, 0.1811, 0.5424, 0.2359]) -Greedy action tensor([-1.5324, -0.5647, 0.4517, 0.0164]) tensor([0.0641, 0.1686, 0.4659, 0.3015]) -Greedy action tensor([-1.1710, 0.5665, 0.2354, -0.0764]) tensor([0.0727, 0.4132, 0.2968, 0.2173]) -Greedy action tensor([0.2703, 0.1413, 0.6036, 1.0606]) tensor([0.1825, 0.1604, 0.2547, 0.4023]) -Greedy action tensor([-1.6570, 0.2769, 0.4141, -0.0160]) tensor([0.0476, 0.3292, 0.3776, 0.2456]) -Greedy action tensor([-1.8570, -0.3893, 0.6188, -0.1446]) tensor([0.0439, 0.1905, 0.5222, 0.2434]) -Greedy action tensor([-1.7855, -0.3895, 0.6186, -0.0707]) tensor([0.0462, 0.1864, 0.5109, 0.2565]) -Greedy action tensor([-1.6374, -0.4818, 0.5090, -0.0320]) tensor([0.0565, 0.1793, 0.4830, 0.2812]) -Greedy action tensor([-1.8817, -0.3752, 0.6368, -0.1257]) tensor([0.0422, 0.1903, 0.5234, 0.2442]) -Greedy action tensor([-1.9049, -0.4487, 0.6339, -0.1570]) tensor([0.0422, 0.1810, 0.5344, 0.2423]) -Greedy action tensor([-1.9250, -0.4437, 0.6605, -0.1702]) tensor([0.0409, 0.1799, 0.5427, 0.2365]) -Greedy action tensor([-1.8809, -0.4721, 0.6319, -0.1488]) tensor([0.0433, 0.1772, 0.5346, 0.2449]) -Greedy action tensor([-1.7939, -0.0375, 0.5439, -0.1168]) tensor([0.0444, 0.2574, 0.4604, 0.2378]) -Greedy action tensor([-1.8555, -0.4582, 0.6204, -0.1451]) tensor([0.0445, 0.1800, 0.5293, 0.2462]) -Greedy action tensor([-1.9132, -0.4042, 0.6486, -0.1519]) tensor([0.0411, 0.1861, 0.5333, 0.2395]) -Greedy action tensor([-1.8413, -0.4321, 0.6194, -0.1205]) tensor([0.0447, 0.1828, 0.5230, 0.2496]) -Greedy action tensor([-1.9235, -0.4495, 0.6615, -0.1703]) tensor([0.0410, 0.1789, 0.5435, 0.2366]) -Greedy action tensor([-1.9119, -0.3781, 0.6429, -0.1525]) tensor([0.0411, 0.1907, 0.5293, 0.2389]) -Greedy action tensor([-1.8073, -0.4411, 0.5846, -0.0951]) tensor([0.0467, 0.1832, 0.5111, 0.2590]) -Greedy action tensor([-1.9057, -0.4117, 0.6473, -0.1582]) tensor([0.0416, 0.1853, 0.5343, 0.2388]) -Greedy action tensor([-1.5303, -0.2460, 0.6400, -0.0243]) tensor([0.0559, 0.2020, 0.4899, 0.2521]) -Greedy action tensor([-1.9226, -0.4632, 0.6563, -0.1716]) tensor([0.0412, 0.1775, 0.5437, 0.2376]) -Greedy action tensor([-1.1103, 0.7801, 0.2908, -0.2186]) tensor([0.0708, 0.4690, 0.2875, 0.1727]) -Greedy action tensor([-1.9114, -0.4543, 0.6481, -0.1620]) tensor([0.0417, 0.1791, 0.5393, 0.2399]) -Greedy action tensor([-1.4919, -0.4709, 0.4467, 0.0714]) tensor([0.0645, 0.1791, 0.4483, 0.3080]) -Greedy action tensor([-1.9127, -0.4230, 0.6521, -0.1641]) tensor([0.0414, 0.1835, 0.5375, 0.2376]) -Greedy action tensor([-1.6892, 0.1150, 0.4692, -0.0023]) tensor([0.0473, 0.2874, 0.4096, 0.2556]) -Greedy action tensor([0.1498, 0.7920, 0.0277, 0.6879]) tensor([0.1819, 0.3457, 0.1610, 0.3115]) -Greedy action tensor([-1.8572, -0.3931, 0.6540, -0.0517]) tensor([0.0421, 0.1822, 0.5192, 0.2564]) -Greedy action tensor([-1.9079, -0.3344, 0.6301, -0.1578]) tensor([0.0413, 0.1991, 0.5222, 0.2375]) -Greedy action tensor([-1.6727, -0.2827, 0.6046, -0.0437]) tensor([0.0503, 0.2021, 0.4909, 0.2567]) -Greedy action tensor([-1.9043, -0.4530, 0.6523, -0.1614]) tensor([0.0419, 0.1788, 0.5400, 0.2393]) -Greedy action tensor([-1.9226, -0.3062, 0.6346, -0.1642]) tensor([0.0404, 0.2035, 0.5214, 0.2346]) -Greedy action tensor([-1.1936, 0.1947, -0.5616, -0.3600]) tensor([0.1088, 0.4361, 0.2047, 0.2504]) -Greedy action tensor([-1.8708, -0.4201, 0.6273, -0.1493]) tensor([0.0434, 0.1853, 0.5282, 0.2430]) -Greedy action tensor([-1.9046, -0.4550, 0.6539, -0.1583]) tensor([0.0418, 0.1782, 0.5402, 0.2398]) -Greedy action tensor([-1.6115, -0.0695, 0.4585, -0.0703]) tensor([0.0547, 0.2558, 0.4338, 0.2556]) -Greedy action tensor([-0.5708, -0.3777, 0.0957, 0.2854]) tensor([0.1535, 0.1862, 0.2989, 0.3614]) -Greedy action tensor([-1.9098, -0.4072, 0.6517, -0.1608]) tensor([0.0413, 0.1857, 0.5354, 0.2376]) -Greedy action tensor([-1.0321, 0.2060, 0.4506, 0.2790]) tensor([0.0796, 0.2745, 0.3506, 0.2953]) -Greedy action tensor([-0.2465, -0.7330, -0.4310, 0.2651]) tensor([0.2431, 0.1494, 0.2021, 0.4054]) -Greedy action tensor([-0.2053, -0.5557, -0.0863, -0.6455]) tensor([0.2878, 0.2027, 0.3242, 0.1853]) -Greedy action tensor([-0.5494, -1.0906, -1.4539, -0.6892]) tensor([0.3501, 0.2038, 0.1417, 0.3044]) -Greedy action tensor([ 0.2699, 0.6810, -0.4701, -0.4702]) tensor([0.2888, 0.4356, 0.1378, 0.1378]) -Greedy action tensor([-1.2576, 0.4133, -0.2781, -0.3753]) tensor([0.0877, 0.4665, 0.2337, 0.2120]) -Greedy action tensor([ 0.7017, -0.0517, 0.6020, 0.6319]) tensor([0.3023, 0.1423, 0.2736, 0.2819]) -Greedy action tensor([-0.3478, -0.6002, -0.8395, 0.1285]) tensor([0.2501, 0.1943, 0.1530, 0.4027]) -Greedy action tensor([ 0.3788, 0.7476, 1.0430, -0.5867]) tensor([0.2097, 0.3032, 0.4074, 0.0798]) -Greedy action tensor([-0.8832, 0.4646, 0.5409, -0.9038]) tensor([0.1002, 0.3855, 0.4161, 0.0981]) -Greedy action tensor([ 0.0712, 0.0120, 0.1097, -0.2891]) tensor([0.2718, 0.2562, 0.2825, 0.1896]) -Greedy action tensor([-0.2847, -0.7984, -0.1067, -0.3567]) tensor([0.2686, 0.1607, 0.3209, 0.2499]) -Greedy action tensor([ 0.8203, -1.5934, -0.0752, 0.9351]) tensor([0.3817, 0.0342, 0.1559, 0.4282]) -Greedy action tensor([-1.0662, -0.1520, 0.1665, -0.1810]) tensor([0.1070, 0.2669, 0.3669, 0.2592]) -Greedy action tensor([-0.8901, -0.8074, 0.1930, -0.8277]) tensor([0.1638, 0.1779, 0.4839, 0.1744]) -Greedy action tensor([ 1.4350, -1.7483, 1.1140, 1.1449]) tensor([0.3976, 0.0165, 0.2884, 0.2975]) -Greedy action tensor([-0.0573, -0.8786, -0.0465, -0.0401]) tensor([0.2883, 0.1268, 0.2915, 0.2933]) -Greedy action tensor([ 0.5020, -0.8678, 0.5463, -0.4394]) tensor([0.3718, 0.0945, 0.3887, 0.1450]) -Greedy action tensor([-0.7483, -0.2401, -0.4940, -0.0681]) tensor([0.1687, 0.2805, 0.2176, 0.3332]) -Greedy action tensor([ 0.5055, -1.5755, -0.9235, 0.0337]) tensor([0.5030, 0.0628, 0.1205, 0.3138]) -Greedy action tensor([ 0.1278, -0.8509, -1.0088, 0.2458]) tensor([0.3544, 0.1332, 0.1137, 0.3987]) -Greedy action tensor([-0.4736, -0.3195, 0.2275, -0.3759]) tensor([0.1892, 0.2207, 0.3814, 0.2086]) -Greedy action tensor([-0.3461, 0.0275, 0.8148, -0.5303]) tensor([0.1544, 0.2243, 0.4929, 0.1284]) -Greedy action tensor([ 1.7772, -0.5411, 1.4030, -0.2137]) tensor([0.5201, 0.0512, 0.3577, 0.0710]) -Greedy action tensor([-0.3096, -0.2837, 0.8717, -1.0252]) tensor([0.1732, 0.1777, 0.5644, 0.0847]) -Greedy action tensor([-1.1293, -0.6398, 0.3350, -0.3175]) tensor([0.1086, 0.1772, 0.4697, 0.2446]) -Greedy action tensor([-0.3386, -0.0005, -0.2645, -0.4384]) tensor([0.2281, 0.3198, 0.2456, 0.2064]) -Greedy action tensor([ 0.3514, 0.1413, -0.6000, -0.5487]) tensor([0.3841, 0.3114, 0.1484, 0.1562]) -Greedy action tensor([-0.6683, 0.8348, 0.8625, -1.0544]) tensor([0.0926, 0.4164, 0.4281, 0.0630]) -Greedy action tensor([ 0.2613, -0.5040, 0.1618, 0.0482]) tensor([0.3146, 0.1464, 0.2848, 0.2542]) -Greedy action tensor([-0.5654, -0.3067, -1.0788, 0.4423]) tensor([0.1775, 0.2299, 0.1062, 0.4863]) -Greedy action tensor([-0.3437, 0.2581, -0.6876, 0.1582]) tensor([0.1928, 0.3520, 0.1367, 0.3185]) -Greedy action tensor([ 0.1339, -1.5231, 0.6459, 0.4932]) tensor([0.2330, 0.0444, 0.3888, 0.3337]) -Greedy action tensor([-0.7360, -0.6155, 0.4035, -0.3493]) tensor([0.1487, 0.1677, 0.4647, 0.2189]) -Greedy action tensor([ 1.2945, -1.1430, -0.4740, -0.0876]) tensor([0.6627, 0.0579, 0.1131, 0.1664]) -Greedy action tensor([ 0.0457, 0.4182, 1.2849, -0.5182]) tensor([0.1545, 0.2242, 0.5334, 0.0879]) -Greedy action tensor([ 0.1364, -1.3940, 0.7237, -1.1995]) tensor([0.3050, 0.0660, 0.5488, 0.0802]) -Greedy action tensor([ 1.4259, -0.2172, -0.0471, 0.3494]) tensor([0.5671, 0.1097, 0.1300, 0.1933]) -Greedy action tensor([-0.1440, -0.9734, 0.3127, -0.2099]) tensor([0.2531, 0.1104, 0.3996, 0.2369]) -Greedy action tensor([ 0.0332, -0.9328, -0.0823, 0.0139]) tensor([0.3075, 0.1170, 0.2739, 0.3016]) -Greedy action tensor([-0.1690, -0.6262, -0.2618, -0.2247]) tensor([0.2865, 0.1814, 0.2611, 0.2710]) -Greedy action tensor([-0.5725, -0.1194, 0.6971, -0.5911]) tensor([0.1406, 0.2211, 0.5003, 0.1380]) -Greedy action tensor([-1.1461, -1.1817, -0.1617, -0.5123]) tensor([0.1532, 0.1479, 0.4101, 0.2888]) -Greedy action tensor([-1.3430, -1.0854, 0.3923, -1.6735]) tensor([0.1152, 0.1490, 0.6531, 0.0828]) -Greedy action tensor([-0.4857, -0.4424, -0.1754, -0.7607]) tensor([0.2399, 0.2506, 0.3272, 0.1823]) -Greedy action tensor([ 1.2039, -0.1189, 0.3897, 0.8660]) tensor([0.4128, 0.1100, 0.1829, 0.2944]) -Greedy action tensor([-0.2319, -0.0926, 0.2874, -1.1788]) tensor([0.2371, 0.2725, 0.3985, 0.0920]) -Greedy action tensor([-0.1819, 0.0201, 0.2829, -0.5972]) tensor([0.2234, 0.2734, 0.3556, 0.1475]) -Greedy action tensor([-0.3203, 0.1875, -0.1915, -1.0140]) tensor([0.2326, 0.3865, 0.2646, 0.1163]) -Greedy action tensor([-1.0583, -1.0057, 0.6547, -0.8665]) tensor([0.1135, 0.1196, 0.6294, 0.1375]) -Greedy action tensor([ 0.4771, -0.8457, -0.8518, 0.4703]) tensor([0.3961, 0.1055, 0.1049, 0.3935]) -Greedy action tensor([ 0.9960, -0.6547, 1.0035, 0.4146]) tensor([0.3625, 0.0696, 0.3652, 0.2027]) -Greedy action tensor([ 0.2709, -1.1055, -0.1288, 0.0737]) tensor([0.3644, 0.0920, 0.2444, 0.2992]) -Greedy action tensor([-0.2292, -0.7646, -0.6454, 0.8901]) tensor([0.1884, 0.1103, 0.1243, 0.5770]) -Greedy action tensor([ 0.6260, -2.1296, -0.1909, 0.4852]) tensor([0.4212, 0.0268, 0.1861, 0.3659]) -Greedy action tensor([ 0.4026, 0.3964, -0.8861, 0.2734]) tensor([0.3176, 0.3157, 0.0875, 0.2791]) -Greedy action tensor([ 1.1190, -0.8123, -0.4067, 0.3838]) tensor([0.5429, 0.0787, 0.1181, 0.2603]) -Greedy action tensor([ 0.7695, -1.0381, -0.4157, 0.1777]) tensor([0.4943, 0.0811, 0.1511, 0.2735]) -Greedy action tensor([ 0.5729, 0.2145, 0.1417, -0.5899]) tensor([0.3758, 0.2626, 0.2441, 0.1175]) -Greedy action tensor([-0.0384, -0.0383, -0.2191, 0.5975]) tensor([0.2117, 0.2117, 0.1767, 0.3998]) -Greedy action tensor([-0.0557, -0.7744, -0.7887, -1.0024]) tensor([0.4245, 0.2069, 0.2039, 0.1647]) -Greedy action tensor([-0.4745, -0.5934, -0.1092, -0.2029]) tensor([0.2155, 0.1913, 0.3105, 0.2827]) -Greedy action tensor([-0.1977, -0.9706, 0.1446, -0.7114]) tensor([0.2883, 0.1331, 0.4060, 0.1725]) -Greedy action tensor([-0.6061, -1.3045, -0.0271, -0.6164]) tensor([0.2341, 0.1164, 0.4177, 0.2317]) -Greedy action tensor([ 0.4082, -1.3351, 1.5158, -0.0702]) tensor([0.2074, 0.0363, 0.6278, 0.1285]) -Greedy action tensor([-0.3500, 0.6117, 0.8344, -0.4782]) tensor([0.1288, 0.3369, 0.4210, 0.1133]) -Greedy action tensor([ 1.1594, -0.1168, 0.2065, -0.3097]) tensor([0.5277, 0.1473, 0.2035, 0.1215]) -Greedy action tensor([ 1.0328, -1.2134, 0.6544, -0.2716]) tensor([0.4849, 0.0513, 0.3322, 0.1316]) -Greedy action tensor([-0.3147, 0.0695, 0.3088, -0.5016]) tensor([0.1937, 0.2844, 0.3613, 0.1606]) -Greedy action tensor([ 0.2727, -0.3980, 0.3284, 0.4981]) tensor([0.2617, 0.1338, 0.2767, 0.3278]) -Greedy action tensor([0.4181, 0.4678, 0.4955, 0.3288]) tensor([0.2472, 0.2598, 0.2670, 0.2260]) -Greedy action tensor([-0.6106, -0.6758, 1.5356, -0.0138]) tensor([0.0813, 0.0761, 0.6950, 0.1476]) -Greedy action tensor([-0.5583, 0.5280, 0.1748, -1.1147]) tensor([0.1511, 0.4478, 0.3145, 0.0866]) -Greedy action tensor([-0.6300, -0.4585, 0.5449, -1.1834]) tensor([0.1667, 0.1979, 0.5396, 0.0958]) -Greedy action tensor([-0.7602, -1.9892, -0.8292, 0.3885]) tensor([0.1859, 0.0544, 0.1735, 0.5863]) -Greedy action tensor([ 0.3302, -1.4825, -0.1685, -0.0838]) tensor([0.4113, 0.0671, 0.2498, 0.2718]) -Greedy action tensor([-0.6183, 0.3425, 0.5792, -0.7673]) tensor([0.1284, 0.3357, 0.4253, 0.1106]) -Greedy action tensor([ 0.0048, -0.4989, 0.4833, -1.3222]) tensor([0.2871, 0.1735, 0.4633, 0.0762]) -Greedy action tensor([ 0.2285, -1.1055, 1.0442, -0.4577]) tensor([0.2483, 0.0654, 0.5613, 0.1250]) -Greedy action tensor([-0.3251, -1.0944, -0.3492, -0.8389]) tensor([0.3292, 0.1525, 0.3214, 0.1969]) -Greedy action tensor([-0.7737, -0.5957, -1.3024, -0.2672]) tensor([0.2250, 0.2689, 0.1326, 0.3734]) -Greedy action tensor([ 0.1199, 0.6547, 0.1930, -0.4168]) tensor([0.2290, 0.3908, 0.2463, 0.1339]) -Greedy action tensor([ 0.2527, -0.2172, 0.0086, -0.1507]) tensor([0.3250, 0.2032, 0.2546, 0.2171]) -Greedy action tensor([ 0.8826, -0.6739, -0.0140, -0.6518]) tensor([0.5451, 0.1150, 0.2224, 0.1175]) -Greedy action tensor([ 0.9199, -0.9930, 0.0215, -0.5681]) tensor([0.5616, 0.0829, 0.2287, 0.1268]) -Greedy action tensor([ 0.5288, -0.1276, -0.1346, -0.2485]) tensor([0.4010, 0.2080, 0.2066, 0.1843]) -Greedy action tensor([ 0.7220, -0.5430, 0.1922, -0.5441]) tensor([0.4645, 0.1311, 0.2735, 0.1310]) -Greedy action tensor([ 0.7368, -0.6733, 0.0159, -0.3845]) tensor([0.4863, 0.1187, 0.2365, 0.1585]) -Greedy action tensor([ 0.2743, 0.0967, 0.0033, -0.0115]) tensor([0.2984, 0.2498, 0.2276, 0.2242]) -Greedy action tensor([ 0.5947, -0.3024, -0.1377, -0.4768]) tensor([0.4482, 0.1828, 0.2155, 0.1535]) -Greedy action tensor([ 0.8190, -0.2348, 0.0148, -0.0384]) tensor([0.4504, 0.1570, 0.2015, 0.1911]) -Greedy action tensor([ 0.9013, -0.4980, -0.1329, -0.2891]) tensor([0.5246, 0.1294, 0.1865, 0.1595]) -Greedy action tensor([ 0.9910, -0.3584, -0.2432, -0.1269]) tensor([0.5326, 0.1382, 0.1550, 0.1742]) -Greedy action tensor([ 1.1253, -0.4746, -0.1061, -0.4006]) tensor([0.5844, 0.1180, 0.1706, 0.1271]) -Greedy action tensor([ 1.1924, -0.7590, -0.1178, -0.7521]) tensor([0.6431, 0.0914, 0.1735, 0.0920]) -Greedy action tensor([ 0.8241, -0.5952, 0.1668, -0.2590]) tensor([0.4765, 0.1153, 0.2469, 0.1613]) -Greedy action tensor([ 0.6680, -0.5389, -0.0237, -0.2664]) tensor([0.4561, 0.1364, 0.2284, 0.1792]) -Greedy action tensor([ 0.7623, -0.1197, -0.0377, -0.0301]) tensor([0.4318, 0.1787, 0.1940, 0.1955]) -Greedy action tensor([ 0.9449, -0.7488, -0.0096, -0.5299]) tensor([0.5563, 0.1023, 0.2142, 0.1273]) -Greedy action tensor([ 0.8041, -0.3498, 0.0492, -0.4041]) tensor([0.4798, 0.1513, 0.2255, 0.1433]) -Greedy action tensor([ 0.5481, -0.1090, -0.0703, -0.0944]) tensor([0.3871, 0.2007, 0.2086, 0.2036]) -Greedy action tensor([ 1.0597, -0.7815, 0.1177, -0.6108]) tensor([0.5758, 0.0913, 0.2245, 0.1083]) -Greedy action tensor([ 1.0295, -0.3349, -0.2190, -0.3915]) tensor([0.5606, 0.1432, 0.1608, 0.1354]) -Greedy action tensor([ 0.4837, -0.3171, -0.0776, -0.2716]) tensor([0.4017, 0.1804, 0.2292, 0.1888]) -Greedy action tensor([ 0.7299, 0.2417, 0.0173, -0.1432]) tensor([0.3965, 0.2434, 0.1945, 0.1656]) -Greedy action tensor([ 0.9057, -0.6984, 0.0064, -0.4298]) tensor([0.5345, 0.1075, 0.2175, 0.1406]) -Greedy action tensor([ 1.1932, -0.6959, -0.0019, -0.7041]) tensor([0.6235, 0.0943, 0.1887, 0.0935]) -Greedy action tensor([ 0.6768, -0.4319, -0.0722, -0.1235]) tensor([0.4440, 0.1465, 0.2100, 0.1995]) -Greedy action tensor([ 0.9030, -0.2705, 0.0259, -0.3128]) tensor([0.4946, 0.1530, 0.2058, 0.1466]) -Greedy action tensor([ 0.4195, -0.0857, -0.0847, -0.0568]) tensor([0.3535, 0.2133, 0.2136, 0.2196]) -Greedy action tensor([ 1.0854, -1.0422, 0.0730, -0.4759]) tensor([0.5909, 0.0704, 0.2147, 0.1240]) -Greedy action tensor([ 0.8247, -0.3436, 0.0066, -0.1252]) tensor([0.4675, 0.1453, 0.2063, 0.1808]) -Greedy action tensor([ 0.6415, 0.0438, -0.0141, -0.4637]) tensor([0.4166, 0.2292, 0.2163, 0.1380]) -Greedy action tensor([ 0.3789, 0.1011, -0.1046, -0.0053]) tensor([0.3273, 0.2479, 0.2018, 0.2229]) -Greedy action tensor([ 0.4046, -0.1910, -0.0606, -0.0972]) tensor([0.3591, 0.1980, 0.2255, 0.2174]) -Greedy action tensor([ 1.4488, -0.7684, -0.1795, -0.5437]) tensor([0.6937, 0.0756, 0.1361, 0.0946]) -Greedy action tensor([ 0.7169, -0.2451, -0.0818, -0.1489]) tensor([0.4439, 0.1696, 0.1997, 0.1868]) -Greedy action tensor([ 0.8178, -0.4589, 0.0098, -0.2699]) tensor([0.4850, 0.1353, 0.2162, 0.1635]) -Greedy action tensor([ 1.4743, -1.1268, 0.0532, -0.8447]) tensor([0.7072, 0.0525, 0.1708, 0.0696]) -Greedy action tensor([ 0.9816, -0.7000, -0.1256, -0.4216]) tensor([0.5674, 0.1056, 0.1875, 0.1395]) -Greedy action tensor([ 0.2529, -0.0984, -0.0343, -0.1776]) tensor([0.3221, 0.2267, 0.2417, 0.2094]) -Greedy action tensor([ 0.6516, -0.4474, -0.1352, -0.2170]) tensor([0.4529, 0.1509, 0.2062, 0.1900]) -Greedy action tensor([ 0.9230, -0.8220, 0.0947, -0.4759]) tensor([0.5381, 0.0940, 0.2351, 0.1329]) -Greedy action tensor([ 1.1535e+00, -6.4798e-01, 4.0406e-04, -4.8940e-01]) tensor([0.5973, 0.0986, 0.1886, 0.1155]) -Greedy action tensor([ 1.0276, -0.7339, -0.0495, -0.6634]) tensor([0.5894, 0.1012, 0.2007, 0.1086]) -Greedy action tensor([ 1.0081, -0.6719, -0.1859, -0.3749]) tensor([0.5746, 0.1071, 0.1741, 0.1441]) -Greedy action tensor([ 0.6335, -0.5458, 0.0853, -0.4544]) tensor([0.4500, 0.1384, 0.2601, 0.1516]) -Greedy action tensor([ 0.5376, -0.3660, -0.0633, -0.2291]) tensor([0.4136, 0.1675, 0.2268, 0.1921]) -Greedy action tensor([ 0.6416, 0.0555, -0.0327, 0.0770]) tensor([0.3796, 0.2112, 0.1934, 0.2158]) -Greedy action tensor([ 1.1948, -0.5990, -0.1251, -0.4346]) tensor([0.6137, 0.1021, 0.1640, 0.1203]) -Greedy action tensor([ 0.5927, -0.4145, -0.1385, -0.1740]) tensor([0.4327, 0.1580, 0.2083, 0.2010]) -Greedy action tensor([ 0.6767, -0.3001, 0.0696, -0.0861]) tensor([0.4188, 0.1577, 0.2282, 0.1953]) -Greedy action tensor([ 0.6489, -0.2787, -0.0617, -0.0750]) tensor([0.4216, 0.1668, 0.2072, 0.2044]) -Greedy action tensor([ 0.7839, -0.0960, -0.0288, -0.0146]) tensor([0.4332, 0.1797, 0.1922, 0.1949]) -Greedy action tensor([ 0.8730, -0.6209, 0.0809, -0.5782]) tensor([0.5231, 0.1174, 0.2369, 0.1226]) -Greedy action tensor([ 1.0010, -0.5995, -0.1077, -0.3841]) tensor([0.5612, 0.1132, 0.1852, 0.1405]) -Greedy action tensor([ 1.3141, -0.8406, -0.0896, -0.8328]) tensor([0.6764, 0.0784, 0.1662, 0.0790]) -Greedy action tensor([ 0.2267, -0.0956, 0.0855, -0.1375]) tensor([0.3042, 0.2204, 0.2641, 0.2113]) -Greedy action tensor([ 1.3486, -0.5645, -0.1937, -0.5690]) tensor([0.6629, 0.0979, 0.1418, 0.0974]) -Greedy action tensor([ 0.9845, -0.4055, 0.0226, -0.5964]) tensor([0.5444, 0.1356, 0.2080, 0.1120]) -Greedy action tensor([ 0.8194, -0.7570, -0.0499, -0.3865]) tensor([0.5194, 0.1074, 0.2177, 0.1555]) -Greedy action tensor([ 1.0163, -1.0511, 0.0548, -0.4567]) tensor([0.5754, 0.0728, 0.2200, 0.1319]) -Greedy action tensor([ 0.6773, -0.3889, -0.0469, -0.2114]) tensor([0.4464, 0.1537, 0.2164, 0.1835]) -Greedy action tensor([ 0.6829, -0.3997, 0.0529, -0.3515]) tensor([0.4491, 0.1521, 0.2392, 0.1596]) -Greedy action tensor([ 0.8377, -0.3106, -0.0898, -0.1961]) tensor([0.4835, 0.1533, 0.1912, 0.1719]) -Greedy action tensor([ 0.6500, -0.4292, -0.1506, -0.3142]) tensor([0.4608, 0.1566, 0.2069, 0.1757]) -Greedy action tensor([ 0.8013, -0.6205, -0.1121, -0.3302]) tensor([0.5089, 0.1228, 0.2042, 0.1642]) -Greedy action tensor([ 1.0609, -0.3530, -0.2618, -0.3171]) tensor([0.5676, 0.1380, 0.1512, 0.1431]) -Greedy action tensor([ 0.9137, -0.5724, -0.2231, -0.6140]) tensor([0.5669, 0.1282, 0.1819, 0.1230]) -Greedy action tensor([ 0.4980, -0.2551, -0.0134, -0.1653]) tensor([0.3867, 0.1821, 0.2319, 0.1992]) -Greedy action tensor([ 0.7010, -0.6074, -0.1734, -0.6130]) tensor([0.5112, 0.1382, 0.2132, 0.1374]) -Greedy action tensor([ 1.2127, -0.8581, 0.1332, -0.5928]) tensor([0.6134, 0.0773, 0.2084, 0.1008]) -Greedy action tensor([ 1.3135, -0.7252, 0.1346, -0.8753]) tensor([0.6452, 0.0840, 0.1985, 0.0723]) -Greedy action tensor([ 1.3268, -0.7145, -0.0277, -0.9255]) tensor([0.6698, 0.0870, 0.1728, 0.0704]) -Greedy action tensor([ 0.9396, -0.3959, -0.0092, -0.2348]) tensor([0.5104, 0.1342, 0.1976, 0.1577]) -Greedy action tensor([ 0.8172, -0.2788, -0.1701, -0.4196]) tensor([0.5007, 0.1673, 0.1866, 0.1454]) -Greedy action tensor([ 0.8503, -0.3659, 0.0058, -0.3055]) tensor([0.4900, 0.1452, 0.2106, 0.1542]) -Greedy action tensor([ 0.7979, -0.3557, 0.1470, -0.2008]) tensor([0.4534, 0.1430, 0.2365, 0.1670]) -Greedy action tensor([ 0.6667, -0.0545, 0.2028, -0.0314]) tensor([0.3828, 0.1861, 0.2407, 0.1904]) -Greedy action tensor([ 0.7910, -0.3453, -0.1827, -0.3378]) tensor([0.4945, 0.1587, 0.1868, 0.1599]) -Greedy action tensor([ 0.7606, -0.4082, 0.0274, -0.1357]) tensor([0.4547, 0.1413, 0.2184, 0.1856]) -Greedy action tensor([ 0.9428, -0.3793, -0.0824, -0.4030]) tensor([0.5303, 0.1414, 0.1902, 0.1381]) -Greedy action tensor([ 0.8876, -0.7313, -0.0863, -0.3306]) tensor([0.5343, 0.1059, 0.2018, 0.1580]) -Greedy action tensor([ 1.9900, -0.1540, -0.6524, 0.2621]) tensor([0.7321, 0.0858, 0.0521, 0.1301]) -Greedy action tensor([ 1.5331, -0.4009, -0.3839, 0.2815]) tensor([0.6339, 0.0916, 0.0932, 0.1813]) -Greedy action tensor([ 1.8856, -0.7525, -0.5813, 0.0856]) tensor([0.7566, 0.0541, 0.0642, 0.1251]) -Greedy action tensor([ 1.4187, -0.1883, -0.8191, 0.1033]) tensor([0.6347, 0.1273, 0.0677, 0.1703]) -Greedy action tensor([ 2.3161, -0.5801, -0.2678, 0.3856]) tensor([0.7838, 0.0433, 0.0592, 0.1137]) -Greedy action tensor([ 2.6363, -1.9232, -0.3632, 0.4164]) tensor([0.8555, 0.0090, 0.0426, 0.0929]) -Greedy action tensor([ 1.3429, -0.5218, -0.9219, 0.4837]) tensor([0.5944, 0.0921, 0.0617, 0.2517]) -Greedy action tensor([ 1.4679, -0.7915, -0.5290, -0.0419]) tensor([0.6844, 0.0715, 0.0929, 0.1512]) -Greedy action tensor([ 1.2086, -0.2456, -0.6389, 0.3280]) tensor([0.5538, 0.1294, 0.0873, 0.2296]) -Greedy action tensor([ 1.4150, -0.4879, -0.3286, 0.2506]) tensor([0.6112, 0.0911, 0.1069, 0.1908]) -Greedy action tensor([ 0.9914, -0.4569, -0.3244, 0.0943]) tensor([0.5233, 0.1230, 0.1404, 0.2134]) -Greedy action tensor([ 1.3651, -0.3680, -0.7983, 0.0183]) tensor([0.6445, 0.1139, 0.0741, 0.1676]) -Greedy action tensor([ 1.7521, -0.7478, -0.6578, -0.0091]) tensor([0.7442, 0.0611, 0.0668, 0.1279]) -Greedy action tensor([ 0.9254, -0.4949, 0.1603, -0.0955]) tensor([0.4837, 0.1169, 0.2251, 0.1743]) -Greedy action tensor([ 2.3628, -0.8774, -0.6092, 0.4966]) tensor([0.8032, 0.0315, 0.0411, 0.1243]) -Greedy action tensor([ 1.7072, -0.2253, -0.5878, -0.0372]) tensor([0.7041, 0.1019, 0.0709, 0.1230]) -Greedy action tensor([ 1.2324, -0.5567, 0.0414, 0.7662]) tensor([0.4766, 0.0796, 0.1448, 0.2990]) -Greedy action tensor([ 1.3230, -0.5953, -0.2920, 0.0326]) tensor([0.6169, 0.0906, 0.1227, 0.1698]) -Greedy action tensor([ 1.4745, -0.0985, -0.6213, 0.2578]) tensor([0.6148, 0.1275, 0.0756, 0.1821]) -Greedy action tensor([ 1.3670, -0.4674, -1.1800, 0.1452]) tensor([0.6524, 0.1042, 0.0511, 0.1923]) -Greedy action tensor([ 1.1858, -0.3745, -0.3598, 0.0796]) tensor([0.5701, 0.1198, 0.1215, 0.1886]) -Greedy action tensor([ 1.2476, -0.2775, -0.2766, 0.0791]) tensor([0.5727, 0.1246, 0.1247, 0.1780]) -Greedy action tensor([ 1.4932, 0.1211, -0.9203, 0.3578]) tensor([0.6008, 0.1524, 0.0538, 0.1930]) -Greedy action tensor([ 1.5431, -0.5422, -1.5198, 0.1649]) tensor([0.7027, 0.0873, 0.0329, 0.1771]) -Greedy action tensor([ 1.3480, -0.2951, -0.9177, 0.1267]) tensor([0.6281, 0.1215, 0.0652, 0.1852]) -Greedy action tensor([ 1.0094, -0.4050, -0.5759, 0.3768]) tensor([0.5053, 0.1228, 0.1035, 0.2684]) -Greedy action tensor([ 1.6754, -0.3407, -1.2780, 0.3469]) tensor([0.6896, 0.0918, 0.0360, 0.1826]) -Greedy action tensor([ 1.6797, -0.6200, 0.2773, 0.6152]) tensor([0.5913, 0.0593, 0.1455, 0.2039]) -Greedy action tensor([ 1.5117, -0.6825, -0.5195, 0.2547]) tensor([0.6548, 0.0730, 0.0859, 0.1863]) -Greedy action tensor([ 0.9705, -0.4549, -0.0929, 0.0552]) tensor([0.5035, 0.1210, 0.1738, 0.2016]) -Greedy action tensor([ 1.5080, -0.6452, -0.4821, 0.5048]) tensor([0.6175, 0.0717, 0.0844, 0.2264]) -Greedy action tensor([ 1.2397, -0.6100, 0.1328, 0.3062]) tensor([0.5316, 0.0836, 0.1757, 0.2090]) -Greedy action tensor([ 1.8439, -0.4630, -0.1379, 0.2673]) tensor([0.6925, 0.0690, 0.0954, 0.1431]) -Greedy action tensor([ 1.6577, -0.1915, -0.7821, 0.3055]) tensor([0.6652, 0.1047, 0.0580, 0.1721]) -Greedy action tensor([ 1.8881, -0.9510, -0.5330, 0.4920]) tensor([0.7169, 0.0419, 0.0637, 0.1775]) -Greedy action tensor([ 1.2437, -0.0168, -0.3379, 0.2701]) tensor([0.5356, 0.1519, 0.1102, 0.2023]) -Greedy action tensor([ 2.5890, -1.5384, 0.2513, 1.3375]) tensor([0.7149, 0.0115, 0.0690, 0.2045]) -Greedy action tensor([ 1.9561, -0.4146, -0.4713, 0.3754]) tensor([0.7207, 0.0673, 0.0636, 0.1483]) -Greedy action tensor([ 1.2017, -0.0382, -0.7075, -0.0207]) tensor([0.5773, 0.1671, 0.0856, 0.1700]) -Greedy action tensor([ 1.7590, -0.5032, -0.3825, 0.9153]) tensor([0.6054, 0.0630, 0.0711, 0.2604]) -Greedy action tensor([ 1.6658, 0.0070, -0.7662, 0.5973]) tensor([0.6166, 0.1174, 0.0542, 0.2118]) -Greedy action tensor([ 1.4089, -0.2363, -0.6070, 0.3349]) tensor([0.5996, 0.1157, 0.0799, 0.2048]) -Greedy action tensor([ 1.2458, 0.0035, -1.3097, 0.5244]) tensor([0.5398, 0.1559, 0.0419, 0.2624]) -Greedy action tensor([ 1.2436, -0.2336, -1.2601, 0.4039]) tensor([0.5741, 0.1310, 0.0469, 0.2479]) -Greedy action tensor([ 1.8998, -0.8053, -0.4896, 0.4539]) tensor([0.7173, 0.0480, 0.0658, 0.1690]) -Greedy action tensor([ 1.5442, -0.0760, -0.6612, 0.3366]) tensor([0.6223, 0.1231, 0.0686, 0.1860]) -Greedy action tensor([ 1.7731, -0.1608, -0.1291, 0.2931]) tensor([0.6573, 0.0950, 0.0981, 0.1496]) -Greedy action tensor([ 2.2769, -0.6645, -0.7399, 0.5220]) tensor([0.7845, 0.0414, 0.0384, 0.1357]) -Greedy action tensor([ 1.3273, -0.4663, -0.4300, 0.4558]) tensor([0.5691, 0.0947, 0.0982, 0.2381]) -Greedy action tensor([ 1.3545, 0.1809, -1.1003, 0.2652]) tensor([0.5775, 0.1786, 0.0496, 0.1943]) -Greedy action tensor([ 1.8691, -0.8365, -0.3217, 0.6605]) tensor([0.6769, 0.0452, 0.0757, 0.2021]) -Greedy action tensor([ 1.4778, -0.4313, -0.6342, 0.2652]) tensor([0.6383, 0.0946, 0.0772, 0.1898]) -Greedy action tensor([ 1.0061, -0.1978, -0.5685, 0.3277]) tensor([0.4964, 0.1489, 0.1028, 0.2519]) -Greedy action tensor([ 1.7452, -0.4633, -0.6291, 0.5821]) tensor([0.6599, 0.0725, 0.0614, 0.2062]) -Greedy action tensor([ 1.7237, -0.3273, -0.7313, 0.0771]) tensor([0.7107, 0.0914, 0.0610, 0.1369]) -Greedy action tensor([ 1.6327, 0.4081, -0.6396, -0.2521]) tensor([0.6457, 0.1897, 0.0666, 0.0980]) -Greedy action tensor([ 1.3764, -0.4540, -0.3143, 0.4474]) tensor([0.5748, 0.0922, 0.1060, 0.2270]) -Greedy action tensor([ 1.4658, -0.3310, -0.6386, 0.3391]) tensor([0.6204, 0.1029, 0.0756, 0.2011]) -Greedy action tensor([ 1.3071, 0.1666, -0.5001, 0.3664]) tensor([0.5336, 0.1706, 0.0876, 0.2083]) -Greedy action tensor([ 1.2960, -0.5314, -0.2702, 0.2776]) tensor([0.5778, 0.0929, 0.1207, 0.2087]) -Greedy action tensor([ 1.6865, -0.7373, -0.1277, 0.4992]) tensor([0.6424, 0.0569, 0.1047, 0.1960]) -Greedy action tensor([ 1.4772, -0.5519, -0.2913, 0.4431]) tensor([0.6033, 0.0793, 0.1029, 0.2145]) -Greedy action tensor([ 1.1281, -0.7189, -0.1092, 0.5398]) tensor([0.4992, 0.0787, 0.1449, 0.2772]) -Greedy action tensor([ 1.6952, -0.5283, -0.1612, 0.5181]) tensor([0.6359, 0.0688, 0.0993, 0.1960]) -Greedy action tensor([ 2.0663, -0.7046, -0.5039, 0.2878]) tensor([0.7645, 0.0479, 0.0585, 0.1291]) -Greedy action tensor([ 1.3248, -0.8280, -0.3040, 0.3481]) tensor([0.5921, 0.0688, 0.1161, 0.2230]) -Greedy action tensor([ 1.7450, -0.5267, -0.8255, 0.3487]) tensor([0.7007, 0.0723, 0.0536, 0.1734]) -Greedy action tensor([ 1.7500, -0.6506, -0.4835, 0.4026]) tensor([0.6860, 0.0622, 0.0735, 0.1783]) -Greedy action tensor([ 1.8867, -0.4431, -0.8893, 0.1373]) tensor([0.7499, 0.0730, 0.0467, 0.1304]) -Greedy action tensor([ 0.9953, 0.3471, -1.0931, 0.0884]) tensor([0.4877, 0.2550, 0.0604, 0.1969]) -Greedy action tensor([ 1.1691, -0.2428, -1.0398, 0.1223]) tensor([0.5867, 0.1430, 0.0644, 0.2060]) -Greedy action tensor([ 1.3657, -0.4717, -0.3378, 0.5427]) tensor([0.5617, 0.0894, 0.1023, 0.2466]) -Greedy action tensor([ 1.1655, -0.1601, -0.8411, 0.1629]) tensor([0.5659, 0.1503, 0.0761, 0.2076]) -Greedy action tensor([ 1.6494, -0.6430, 0.0719, 0.4781]) tensor([0.6182, 0.0625, 0.1277, 0.1916]) -Greedy action tensor([ 1.0767, -0.0831, -0.2024, 0.1107]) tensor([0.5070, 0.1590, 0.1411, 0.1930]) -Greedy action tensor([ 1.7190, -0.5312, -0.0625, 0.4110]) tensor([0.6476, 0.0682, 0.1090, 0.1751]) -Greedy action tensor([ 1.0861, -0.5479, -0.0875, 0.4933]) tensor([0.4861, 0.0949, 0.1503, 0.2687]) -Greedy action tensor([ 1.0448, -0.1308, -0.6181, 0.3827]) tensor([0.4965, 0.1532, 0.0941, 0.2561]) -Greedy action tensor([ 1.6934, -0.1758, -0.8752, 0.4186]) tensor([0.6621, 0.1021, 0.0507, 0.1850]) -Greedy action tensor([ 1.5079, -0.4955, -0.7803, 0.2955]) tensor([0.6520, 0.0879, 0.0661, 0.1940]) -Greedy action tensor([ 1.6433, -0.2242, -0.6265, -0.0789]) tensor([0.6961, 0.1076, 0.0719, 0.1244]) -Greedy action tensor([-0.9506, 0.7957, 0.1970, -0.0786]) tensor([0.0815, 0.4671, 0.2566, 0.1948]) -Greedy action tensor([-1.6685, 0.2243, 0.4270, -0.0327]) tensor([0.0478, 0.3176, 0.3890, 0.2456]) -Greedy action tensor([-1.8389, -0.4462, 0.6092, -0.1291]) tensor([0.0452, 0.1820, 0.5229, 0.2499]) -Greedy action tensor([-1.9202, -0.4411, 0.6540, -0.1683]) tensor([0.0412, 0.1808, 0.5405, 0.2375]) -Greedy action tensor([-1.8987, -0.4456, 0.6459, -0.1586]) tensor([0.0422, 0.1803, 0.5372, 0.2403]) -Greedy action tensor([-1.7921, -0.3899, 0.5826, -0.1265]) tensor([0.0474, 0.1926, 0.5093, 0.2506]) -Greedy action tensor([-1.2155, 0.6071, 0.2744, -0.6614]) tensor([0.0748, 0.4630, 0.3319, 0.1302]) -Greedy action tensor([-1.8580, -0.3048, 0.6522, -0.2590]) tensor([0.0435, 0.2057, 0.5355, 0.2153]) -Greedy action tensor([-1.9427, -0.4485, 0.6669, -0.1795]) tensor([0.0402, 0.1791, 0.5464, 0.2344]) -Greedy action tensor([-1.5732, -0.4330, 0.5279, 0.1025]) tensor([0.0567, 0.1772, 0.4633, 0.3028]) -Greedy action tensor([-1.8503, -0.3956, 0.6140, -0.1406]) tensor([0.0443, 0.1898, 0.5209, 0.2449]) -Greedy action tensor([-1.2015, 0.7766, 0.1738, 0.1971]) tensor([0.0616, 0.4453, 0.2437, 0.2494]) -Greedy action tensor([-1.8921, -0.3806, 0.6456, -0.1552]) tensor([0.0419, 0.1900, 0.5301, 0.2380]) -Greedy action tensor([-1.9261, -0.4315, 0.6587, -0.1698]) tensor([0.0408, 0.1819, 0.5410, 0.2363]) -Greedy action tensor([-1.9254, -0.4564, 0.6559, -0.1749]) tensor([0.0411, 0.1787, 0.5434, 0.2368]) -Greedy action tensor([-1.4658, 0.0878, 0.3625, -0.0233]) tensor([0.0618, 0.2922, 0.3846, 0.2615]) -Greedy action tensor([-1.8522, -0.1955, 0.5790, -0.1114]) tensor([0.0429, 0.2248, 0.4877, 0.2445]) -Greedy action tensor([-1.7559, -0.4412, 0.5656, -0.0792]) tensor([0.0494, 0.1838, 0.5030, 0.2639]) -Greedy action tensor([0.6308, 0.8809, 0.4370, 1.1955]) tensor([0.2055, 0.2639, 0.1693, 0.3614]) -Greedy action tensor([-1.9014, -0.4177, 0.6450, -0.1568]) tensor([0.0419, 0.1845, 0.5341, 0.2395]) -Greedy action tensor([-1.8420, -0.3552, 0.6337, -0.1089]) tensor([0.0435, 0.1925, 0.5176, 0.2463]) -Greedy action tensor([-1.3403, 0.6674, 0.2173, 0.1263]) tensor([0.0570, 0.4248, 0.2708, 0.2473]) -Greedy action tensor([-1.9120, -0.3845, 0.6412, -0.1595]) tensor([0.0413, 0.1902, 0.5304, 0.2382]) -Greedy action tensor([-1.9012, -0.4370, 0.6460, -0.1577]) tensor([0.0420, 0.1816, 0.5363, 0.2401]) -Greedy action tensor([-1.9142, -0.1272, 0.6164, -0.2032]) tensor([0.0399, 0.2382, 0.5011, 0.2208]) -Greedy action tensor([-1.6468, -0.4912, 0.5321, 0.1298]) tensor([0.0528, 0.1678, 0.4670, 0.3123]) -Greedy action tensor([-1.9017, -0.3699, 0.6385, -0.1552]) tensor([0.0416, 0.1924, 0.5275, 0.2385]) -Greedy action tensor([-0.5985, 0.4483, 0.0567, -0.0630]) tensor([0.1336, 0.3807, 0.2573, 0.2283]) -Greedy action tensor([-1.1054, -0.2554, 0.4653, 0.8762]) tensor([0.0649, 0.1519, 0.3123, 0.4709]) -Greedy action tensor([-1.5841, 0.1698, 0.4022, -0.6310]) tensor([0.0600, 0.3468, 0.4375, 0.1557]) -Greedy action tensor([-1.7266, -0.5028, 0.5550, -0.0599]) tensor([0.0513, 0.1745, 0.5025, 0.2717]) -Greedy action tensor([-0.7958, -0.4028, 1.1395, 1.3167]) tensor([0.0566, 0.0838, 0.3918, 0.4678]) -Greedy action tensor([-1.8855, -0.3898, 0.6298, -0.1626]) tensor([0.0427, 0.1904, 0.5279, 0.2390]) -Greedy action tensor([-1.3802, 0.1285, 0.4473, 0.2281]) tensor([0.0598, 0.2702, 0.3716, 0.2985]) -Greedy action tensor([-1.7303, 0.0161, 0.5133, -0.0950]) tensor([0.0470, 0.2693, 0.4427, 0.2410]) -Greedy action tensor([-1.7497, -0.2841, 0.6112, -0.0700]) tensor([0.0470, 0.2034, 0.4978, 0.2519]) -Greedy action tensor([-1.9099, -0.4361, 0.6500, -0.1657]) tensor([0.0416, 0.1818, 0.5384, 0.2382]) -Greedy action tensor([-1.8797, -0.4546, 0.6458, -0.1357]) tensor([0.0428, 0.1779, 0.5346, 0.2447]) -Greedy action tensor([-1.5974, -0.5351, 0.5009, -0.0732]) tensor([0.0601, 0.1739, 0.4900, 0.2760]) -Greedy action tensor([-1.9180, -0.4125, 0.6495, -0.1622]) tensor([0.0411, 0.1852, 0.5357, 0.2379]) -Greedy action tensor([-0.9359, 0.6979, -0.0183, -0.1225]) tensor([0.0919, 0.4708, 0.2300, 0.2073]) -Greedy action tensor([-0.6151, 0.6708, -0.0082, 0.1127]) tensor([0.1173, 0.4245, 0.2153, 0.2429]) -Greedy action tensor([-1.8587, -0.4274, 0.6210, -0.1324]) tensor([0.0440, 0.1840, 0.5249, 0.2471]) -Greedy action tensor([-0.9506, 0.8096, 0.1717, -0.0206]) tensor([0.0805, 0.4681, 0.2473, 0.2041]) -Greedy action tensor([-1.9182, -0.4381, 0.6567, -0.1661]) tensor([0.0412, 0.1809, 0.5405, 0.2374]) -Greedy action tensor([-1.5902, -0.5101, 0.4909, 0.0216]) tensor([0.0589, 0.1735, 0.4722, 0.2953]) -Greedy action tensor([-1.1422, 0.7584, 0.1556, 0.1173]) tensor([0.0672, 0.4497, 0.2461, 0.2369]) -Greedy action tensor([-1.8319, -0.4628, 0.6151, -0.1318]) tensor([0.0455, 0.1790, 0.5261, 0.2493]) -Greedy action tensor([-1.8634, -0.4347, 0.6259, -0.1446]) tensor([0.0439, 0.1830, 0.5286, 0.2446]) -Greedy action tensor([-1.7101, -0.2104, 0.6153, -0.0802]) tensor([0.0480, 0.2153, 0.4915, 0.2452]) -Greedy action tensor([-1.8465, -0.3786, 0.6096, -0.1302]) tensor([0.0443, 0.1924, 0.5167, 0.2466]) -Greedy action tensor([-1.9430, -0.4506, 0.6681, -0.1795]) tensor([0.0402, 0.1787, 0.5469, 0.2343]) -Greedy action tensor([-1.6242, -0.2915, 0.4351, -0.0186]) tensor([0.0568, 0.2153, 0.4452, 0.2828]) -Greedy action tensor([-1.3646, -0.6239, 0.3712, 0.1454]) tensor([0.0752, 0.1577, 0.4267, 0.3404]) -Greedy action tensor([-1.8133, -0.5054, 0.6480, -0.1137]) tensor([0.0457, 0.1690, 0.5354, 0.2500]) -Greedy action tensor([-1.6192, -0.5394, 0.4869, 0.0102]) tensor([0.0579, 0.1706, 0.4760, 0.2955]) -Greedy action tensor([-1.7457, -0.1031, 0.5265, -0.1140]) tensor([0.0477, 0.2463, 0.4623, 0.2437]) -Greedy action tensor([-1.9158, -0.4513, 0.6566, -0.1670]) tensor([0.0414, 0.1790, 0.5419, 0.2378]) -Greedy action tensor([-1.8067, -0.2526, 0.5856, -0.0980]) tensor([0.0451, 0.2132, 0.4929, 0.2488]) -Greedy action tensor([-1.9046, -0.4190, 0.6428, -0.1622]) tensor([0.0418, 0.1848, 0.5344, 0.2389]) -Greedy action tensor([-1.8262, -0.2451, 0.6130, -0.0877]) tensor([0.0435, 0.2112, 0.4982, 0.2472]) -Greedy action tensor([-1.8474, 0.1339, 0.6096, -0.3100]) tensor([0.0407, 0.2951, 0.4749, 0.1893]) -Greedy action tensor([-1.8160, -0.3402, 0.5903, -0.1303]) tensor([0.0457, 0.2001, 0.5074, 0.2468]) -Greedy action tensor([-1.9069, -0.5432, 0.9436, -0.0036]) tensor([0.0346, 0.1352, 0.5982, 0.2320]) -Greedy action tensor([ 0.1072, -0.1652, 0.8619, 1.5291]) tensor([0.1245, 0.0948, 0.2648, 0.5160]) -Greedy action tensor([-1.0915, 0.8264, 0.1675, 0.1277]) tensor([0.0680, 0.4626, 0.2394, 0.2300]) -Greedy action tensor([-1.8716, -0.3951, 0.6310, -0.1422]) tensor([0.0430, 0.1884, 0.5258, 0.2427]) -Greedy action tensor([-1.1350, -0.6095, 0.3872, 0.0284]) tensor([0.0955, 0.1615, 0.4375, 0.3056]) -Greedy action tensor([-1.2501, 0.1803, 0.3926, -0.6565]) tensor([0.0822, 0.3438, 0.4251, 0.1489]) -Greedy action tensor([-1.8566, -0.4409, 0.6260, -0.1322]) tensor([0.0441, 0.1815, 0.5274, 0.2471]) -Greedy action tensor([-1.8743, -0.4156, 0.6302, -0.1473]) tensor([0.0432, 0.1857, 0.5283, 0.2428]) -Greedy action tensor([-1.8893, -0.4380, 0.6412, -0.1536]) tensor([0.0426, 0.1816, 0.5344, 0.2414]) -Greedy action tensor([-1.7731, -0.4425, 0.6115, -0.2482]) tensor([0.0494, 0.1870, 0.5365, 0.2271]) -Greedy action tensor([-1.7524, -0.4178, 0.5727, -0.0983]) tensor([0.0494, 0.1875, 0.5050, 0.2581]) -Greedy action tensor([-0.8078, 0.6452, 0.3866, 0.4231]) tensor([0.0833, 0.3563, 0.2751, 0.2853]) -Greedy action tensor([-1.8070, 0.1027, 0.5366, -0.1343]) tensor([0.0426, 0.2873, 0.4434, 0.2267]) -Greedy action tensor([-1.8594, -0.4056, 0.6533, -0.1093]) tensor([0.0428, 0.1831, 0.5279, 0.2462]) -Greedy action tensor([-1.4604, 0.1589, 0.3978, -0.0840]) tensor([0.0609, 0.3075, 0.3904, 0.2412]) -Greedy action tensor([-1.3690, -0.5114, 0.4798, -0.0929]) tensor([0.0752, 0.1774, 0.4779, 0.2695]) -Greedy action tensor([-1.9068, -0.3456, 0.6395, -0.1593]) tensor([0.0412, 0.1964, 0.5259, 0.2366]) -Greedy action tensor([-1.9205, -0.4521, 0.6593, -0.1691]) tensor([0.0412, 0.1787, 0.5430, 0.2372]) -Greedy action tensor([ 0.1271, -1.3038, 0.0270, -0.3166]) tensor([0.3590, 0.0858, 0.3248, 0.2304]) -Greedy action tensor([ 0.7394, -0.2711, -0.3081, -0.5001]) tensor([0.4989, 0.1816, 0.1750, 0.1444]) -Greedy action tensor([ 0.1277, 0.2500, -0.2192, -0.8889]) tensor([0.3126, 0.3533, 0.2210, 0.1131]) -Greedy action tensor([ 0.3542, -0.7693, -0.0895, 0.4955]) tensor([0.3207, 0.1043, 0.2058, 0.3693]) -Greedy action tensor([-0.0620, -0.9235, -0.4909, 0.9052]) tensor([0.2126, 0.0898, 0.1384, 0.5592]) -Greedy action tensor([ 0.3866, 0.1688, -0.4134, 0.1019]) tensor([0.3327, 0.2676, 0.1495, 0.2503]) -Greedy action tensor([ 1.0887, -0.4638, 0.1368, -0.1533]) tensor([0.5301, 0.1122, 0.2046, 0.1531]) -Greedy action tensor([-0.3442, -0.3385, 0.6250, -0.7232]) tensor([0.1878, 0.1888, 0.4949, 0.1285]) -Greedy action tensor([ 0.8076, -0.9212, 1.6505, -0.4614]) tensor([0.2644, 0.0469, 0.6143, 0.0743]) -Greedy action tensor([-0.0179, -1.0375, -0.3283, -0.4222]) tensor([0.3621, 0.1306, 0.2655, 0.2417]) -Greedy action tensor([ 0.0466, 0.1264, -0.8969, -0.2059]) tensor([0.3078, 0.3333, 0.1198, 0.2391]) -Greedy action tensor([-0.5345, -1.3170, 0.1643, 0.1947]) tensor([0.1804, 0.0825, 0.3629, 0.3741]) -Greedy action tensor([-0.3231, -0.8833, 1.6754, -1.2084]) tensor([0.1068, 0.0610, 0.7881, 0.0441]) -Greedy action tensor([-0.5606, -1.6720, 0.6598, -0.7581]) tensor([0.1806, 0.0594, 0.6118, 0.1482]) -Greedy action tensor([ 0.7838, -0.2390, 0.4411, -0.1291]) tensor([0.4047, 0.1455, 0.2873, 0.1624]) -Greedy action tensor([-0.0788, -1.6477, 0.9137, -0.2421]) tensor([0.2103, 0.0438, 0.5673, 0.1786]) -Greedy action tensor([-0.6743, -1.2327, 0.1247, 0.1223]) tensor([0.1663, 0.0951, 0.3697, 0.3688]) -Greedy action tensor([-0.3070, 0.0975, 0.6948, -0.0084]) tensor([0.1522, 0.2281, 0.4145, 0.2052]) -Greedy action tensor([ 0.3210, -1.1006, -0.4542, -0.1636]) tensor([0.4314, 0.1041, 0.1987, 0.2657]) -Greedy action tensor([-0.5403, -0.6098, -0.4128, -0.1352]) tensor([0.2189, 0.2042, 0.2487, 0.3282]) -Greedy action tensor([ 0.4702, -0.0188, 0.1989, 0.6308]) tensor([0.2817, 0.1728, 0.2148, 0.3308]) -Greedy action tensor([ 0.7942, -0.0768, 0.0293, -0.2905]) tensor([0.4501, 0.1884, 0.2094, 0.1521]) -Greedy action tensor([-0.2248, -1.3714, -1.2384, -0.2580]) tensor([0.3776, 0.1200, 0.1370, 0.3653]) -Greedy action tensor([ 1.0010, -0.0256, 0.3559, -1.1755]) tensor([0.5009, 0.1795, 0.2628, 0.0568]) -Greedy action tensor([-0.8420, 0.1881, 1.0286, -1.0459]) tensor([0.0900, 0.2522, 0.5844, 0.0734]) -Greedy action tensor([ 0.2372, -0.8042, 0.7216, -0.3914]) tensor([0.2849, 0.1006, 0.4625, 0.1520]) -Greedy action tensor([-0.7587, -0.1579, -0.3267, -1.0655]) tensor([0.1961, 0.3576, 0.3020, 0.1443]) -Greedy action tensor([-0.2858, -0.5691, -0.2547, -0.9266]) tensor([0.3020, 0.2275, 0.3115, 0.1591]) -Greedy action tensor([-0.4275, 0.2371, 1.4889, -0.5583]) tensor([0.0942, 0.1831, 0.6401, 0.0826]) -Greedy action tensor([-0.6771, -1.3383, 0.5184, -0.8002]) tensor([0.1753, 0.0905, 0.5793, 0.1550]) -Greedy action tensor([ 0.5067, -1.4362, 0.1201, 1.0413]) tensor([0.2833, 0.0406, 0.1925, 0.4836]) -Greedy action tensor([-0.1370, -0.8264, 1.0129, -1.2361]) tensor([0.2003, 0.1005, 0.6325, 0.0667]) -Greedy action tensor([-0.2673, -0.5046, -1.0997, -0.2049]) tensor([0.3041, 0.2399, 0.1323, 0.3237]) -Greedy action tensor([ 0.3601, -0.7343, -0.3772, -0.8796]) tensor([0.4756, 0.1592, 0.2275, 0.1377]) -Greedy action tensor([-0.0771, -0.7151, 0.2875, -0.3965]) tensor([0.2706, 0.1430, 0.3897, 0.1967]) -Greedy action tensor([ 0.0698, -0.6476, -0.0487, 0.2753]) tensor([0.2774, 0.1354, 0.2464, 0.3407]) -Greedy action tensor([ 0.2482, -1.1801, -0.0575, -0.5660]) tensor([0.4133, 0.0991, 0.3045, 0.1831]) -Greedy action tensor([ 0.2265, -0.7391, 0.8302, -0.8318]) tensor([0.2812, 0.1071, 0.5142, 0.0976]) -Greedy action tensor([-0.1935, 0.0496, 0.0859, -0.4408]) tensor([0.2284, 0.2912, 0.3020, 0.1784]) -Greedy action tensor([-0.3172, -0.7066, -0.1969, -0.4214]) tensor([0.2698, 0.1828, 0.3043, 0.2431]) -Greedy action tensor([-1.3626, -0.1160, -0.0552, -1.0880]) tensor([0.1054, 0.3665, 0.3895, 0.1387]) -Greedy action tensor([ 0.2238, -0.6094, -0.7096, -0.2284]) tensor([0.4058, 0.1764, 0.1596, 0.2582]) -Greedy action tensor([ 0.8564, -0.3307, -0.2709, 0.5618]) tensor([0.4213, 0.1285, 0.1364, 0.3138]) -Greedy action tensor([-0.2326, -0.8010, -1.1279, -0.1801]) tensor([0.3302, 0.1870, 0.1349, 0.3480]) -Greedy action tensor([-0.7132, -0.1205, -0.2744, -0.2520]) tensor([0.1682, 0.3042, 0.2608, 0.2668]) -Greedy action tensor([-0.1967, -0.6858, 0.0456, -0.5447]) tensor([0.2783, 0.1706, 0.3546, 0.1965]) -Greedy action tensor([-0.0609, 0.3249, -0.2698, 0.0296]) tensor([0.2285, 0.3360, 0.1854, 0.2501]) -Greedy action tensor([-0.1312, -0.1571, 1.5995, -0.1550]) tensor([0.1163, 0.1134, 0.6567, 0.1136]) -Greedy action tensor([-0.6561, -1.0205, -0.0281, -0.4476]) tensor([0.2083, 0.1447, 0.3904, 0.2566]) -Greedy action tensor([ 0.3091, -0.4269, 0.3831, 0.0863]) tensor([0.2980, 0.1427, 0.3208, 0.2385]) -Greedy action tensor([1.1462, 0.0062, 0.4954, 0.0323]) tensor([0.4609, 0.1474, 0.2404, 0.1513]) -Greedy action tensor([ 0.1903, 0.3488, -0.2693, -1.1278]) tensor([0.3256, 0.3815, 0.2057, 0.0872]) -Greedy action tensor([-0.6685, -0.2642, 0.1440, -0.6326]) tensor([0.1728, 0.2588, 0.3893, 0.1791]) -Greedy action tensor([ 0.5991, -0.0121, 0.3535, -0.5500]) tensor([0.3785, 0.2054, 0.2961, 0.1200]) -Greedy action tensor([-0.0987, -0.9142, 0.8657, -0.4942]) tensor([0.2110, 0.0934, 0.5535, 0.1421]) -Greedy action tensor([ 0.0763, -1.2800, 0.7849, 0.3596]) tensor([0.2166, 0.0558, 0.4400, 0.2876]) -Greedy action tensor([-0.1628, -0.6942, 0.0303, -1.1403]) tensor([0.3148, 0.1850, 0.3818, 0.1184]) -Greedy action tensor([ 0.1655, 0.3654, -0.6633, 0.2120]) tensor([0.2699, 0.3296, 0.1178, 0.2827]) -Greedy action tensor([-0.8778, 0.1586, -1.4014, 0.2304]) tensor([0.1344, 0.3789, 0.0796, 0.4071]) -Greedy action tensor([ 0.2279, -1.3087, 0.3877, -0.4634]) tensor([0.3461, 0.0745, 0.4061, 0.1734]) -Greedy action tensor([-0.8166, -1.2127, -0.1269, -0.1036]) tensor([0.1753, 0.1179, 0.3493, 0.3575]) -Greedy action tensor([ 1.5970, -0.3041, -0.3236, -0.1240]) tensor([0.6781, 0.1013, 0.0993, 0.1213]) -Greedy action tensor([-0.0910, -0.1623, 0.3514, -0.7768]) tensor([0.2505, 0.2333, 0.3900, 0.1262]) -Greedy action tensor([-0.6146, 0.3579, -0.0622, -1.4265]) tensor([0.1716, 0.4539, 0.2982, 0.0762]) -Greedy action tensor([-0.2148, -0.4454, 0.6022, -0.5020]) tensor([0.2080, 0.1652, 0.4708, 0.1561]) -Greedy action tensor([-1.0800, -0.7967, 0.4397, -1.0553]) tensor([0.1262, 0.1675, 0.5769, 0.1294]) -Greedy action tensor([ 0.1754, -1.6540, 0.2165, -1.2203]) tensor([0.4081, 0.0655, 0.4253, 0.1011]) -Greedy action tensor([-0.6798, -0.2608, -0.3496, -0.9342]) tensor([0.2134, 0.3244, 0.2968, 0.1654]) -Greedy action tensor([ 0.7409, -0.6731, -0.7128, 0.7337]) tensor([0.4049, 0.0985, 0.0946, 0.4020]) -Greedy action tensor([ 0.3308, -1.1694, -0.6925, 0.0624]) tensor([0.4261, 0.0950, 0.1531, 0.3258]) -Greedy action tensor([-0.8251, -1.1987, -0.4586, 1.0092]) tensor([0.1065, 0.0733, 0.1536, 0.6666]) -Greedy action tensor([-0.1221, -1.2147, 0.3128, -0.7473]) tensor([0.2928, 0.0982, 0.4523, 0.1567]) -Greedy action tensor([-0.4848, -0.5610, -1.6478, -0.0373]) tensor([0.2629, 0.2436, 0.0822, 0.4113]) -Greedy action tensor([-0.2677, -1.0277, 0.2832, -1.4189]) tensor([0.2842, 0.1329, 0.4930, 0.0899]) -Greedy action tensor([ 0.9692, -0.9644, 0.1304, -0.4338]) tensor([0.5486, 0.0793, 0.2371, 0.1349]) -Greedy action tensor([-0.1793, 0.3630, -0.6911, -0.6148]) tensor([0.2521, 0.4336, 0.1511, 0.1631]) -Greedy action tensor([ 0.1893, -1.5649, 0.9787, 0.1590]) tensor([0.2301, 0.0398, 0.5068, 0.2233]) -Greedy action tensor([-0.0432, -0.3522, -0.5963, -0.3807]) tensor([0.3308, 0.2429, 0.1903, 0.2361]) -Greedy action tensor([-0.6537, -0.1285, 0.7635, -0.1727]) tensor([0.1186, 0.2005, 0.4891, 0.1918]) -Greedy action tensor([ 0.7058, -1.5044, -1.0237, 1.0435]) tensor([0.3719, 0.0408, 0.0660, 0.5213]) -Greedy action tensor([-0.9949, 0.3154, -0.5614, -0.4849]) tensor([0.1263, 0.4684, 0.1949, 0.2104]) -Greedy action tensor([ 0.3136, -0.1405, 0.0853, 0.1844]) tensor([0.3021, 0.1919, 0.2405, 0.2655]) -Greedy action tensor([ 0.4626, 0.2694, -0.1786, 0.1164]) tensor([0.3270, 0.2695, 0.1722, 0.2313]) -Greedy action tensor([ 0.4713, -0.5021, -0.2136, -0.1516]) tensor([0.4135, 0.1562, 0.2085, 0.2218]) -Greedy action tensor([ 0.5732, -0.4026, -0.1020, -0.2120]) tensor([0.4270, 0.1609, 0.2174, 0.1947]) -Greedy action tensor([ 1.0228, -0.5234, -0.1173, -0.4208]) tensor([0.5653, 0.1204, 0.1808, 0.1335]) -Greedy action tensor([ 1.0611, -0.4668, -0.2488, -0.6919]) tensor([0.6024, 0.1307, 0.1626, 0.1044]) -Greedy action tensor([ 1.5970, -1.0372, -0.1328, -0.5700]) tensor([0.7333, 0.0526, 0.1300, 0.0840]) -Greedy action tensor([ 8.8556e-01, -5.5609e-01, -7.6242e-04, -5.1520e-01]) tensor([0.5277, 0.1248, 0.2175, 0.1300]) -Greedy action tensor([ 1.0131, -0.4291, -0.2592, -0.1793]) tensor([0.5494, 0.1299, 0.1539, 0.1667]) -Greedy action tensor([ 0.7710, -0.3551, -0.3058, -0.3103]) tensor([0.4989, 0.1618, 0.1700, 0.1692]) -Greedy action tensor([ 0.8518, -0.2079, -0.0860, -0.0712]) tensor([0.4683, 0.1623, 0.1833, 0.1861]) -Greedy action tensor([ 0.7240, -0.5265, -0.1365, -0.1087]) tensor([0.4664, 0.1335, 0.1972, 0.2028]) -Greedy action tensor([ 0.6019, -0.3008, -0.1172, -0.1542]) tensor([0.4233, 0.1717, 0.2062, 0.1988]) -Greedy action tensor([ 0.9191, -0.2765, -0.0885, -0.0754]) tensor([0.4908, 0.1485, 0.1792, 0.1815]) -Greedy action tensor([ 0.8538, -1.1292, -0.0794, -0.6574]) tensor([0.5709, 0.0786, 0.2245, 0.1260]) -Greedy action tensor([ 0.7298, -0.5911, 0.0693, -0.0556]) tensor([0.4466, 0.1192, 0.2307, 0.2036]) -Greedy action tensor([ 1.2942, -0.7893, -0.0641, -0.7753]) tensor([0.6632, 0.0826, 0.1705, 0.0837]) -Greedy action tensor([ 0.7868, -0.5725, -0.0520, -0.3873]) tensor([0.5005, 0.1285, 0.2163, 0.1547]) -Greedy action tensor([ 1.1172, -0.5498, -0.3281, -0.5162]) tensor([0.6174, 0.1166, 0.1455, 0.1206]) -Greedy action tensor([ 0.8559, -0.6319, 0.1616, -0.1772]) tensor([0.4805, 0.1085, 0.2400, 0.1710]) -Greedy action tensor([ 0.8364, -0.4323, -0.1765, -0.2906]) tensor([0.5080, 0.1429, 0.1845, 0.1646]) -Greedy action tensor([ 0.8397, -0.3643, 0.0149, -0.2978]) tensor([0.4857, 0.1457, 0.2129, 0.1557]) -Greedy action tensor([ 0.9760, -1.0068, -0.0354, -0.4570]) tensor([0.5747, 0.0791, 0.2090, 0.1371]) -Greedy action tensor([ 0.7283, -0.3415, 0.0199, -0.5340]) tensor([0.4720, 0.1619, 0.2324, 0.1336]) -Greedy action tensor([ 0.6374, -0.5054, -0.0596, -0.1478]) tensor([0.4399, 0.1403, 0.2191, 0.2006]) -Greedy action tensor([ 1.0180, -0.6239, -0.1201, -0.3733]) tensor([0.5673, 0.1098, 0.1818, 0.1411]) -Greedy action tensor([ 1.0691, -0.5679, -0.1211, -0.4865]) tensor([0.5849, 0.1138, 0.1779, 0.1234]) -Greedy action tensor([ 0.5133, 0.0980, -0.0301, -0.1464]) tensor([0.3626, 0.2394, 0.2106, 0.1875]) -Greedy action tensor([ 0.2903, -0.1141, -0.0385, -0.1678]) tensor([0.3312, 0.2210, 0.2384, 0.2095]) -Greedy action tensor([ 1.2046, -0.3393, -0.1052, -0.2617]) tensor([0.5834, 0.1246, 0.1574, 0.1346]) -Greedy action tensor([ 0.9782, -0.7943, 0.0585, -0.5820]) tensor([0.5622, 0.0955, 0.2241, 0.1181]) -Greedy action tensor([ 1.0891, -0.4140, -0.0221, -0.3322]) tensor([0.5577, 0.1241, 0.1836, 0.1346]) -Greedy action tensor([ 1.1425, -0.5930, -0.0565, -0.6169]) tensor([0.6061, 0.1069, 0.1827, 0.1043]) -Greedy action tensor([ 0.5564, -0.0772, 0.0469, -0.2969]) tensor([0.3910, 0.2075, 0.2349, 0.1666]) -Greedy action tensor([ 0.8706, -0.6338, -0.0117, -0.4529]) tensor([0.5257, 0.1168, 0.2175, 0.1399]) -Greedy action tensor([ 0.3323, -0.0214, -0.0104, -0.0807]) tensor([0.3253, 0.2284, 0.2310, 0.2153]) -Greedy action tensor([ 0.8752, -0.3366, -0.1675, -0.3276]) tensor([0.5127, 0.1526, 0.1807, 0.1540]) -Greedy action tensor([ 0.7153, -0.2567, -0.0564, -0.2566]) tensor([0.4507, 0.1705, 0.2083, 0.1705]) -Greedy action tensor([ 0.5558, -0.3640, 0.0023, -0.1508]) tensor([0.4054, 0.1616, 0.2331, 0.2000]) -Greedy action tensor([ 1.0339, -0.4803, -0.2363, -0.6732]) tensor([0.5945, 0.1308, 0.1669, 0.1078]) -Greedy action tensor([ 0.2962, -0.1554, -0.0531, -0.3540]) tensor([0.3492, 0.2223, 0.2463, 0.1823]) -Greedy action tensor([ 0.5250, 0.0294, -0.0837, 0.0113]) tensor([0.3634, 0.2214, 0.1977, 0.2174]) -Greedy action tensor([ 1.0141, -0.6170, -0.0528, -0.4719]) tensor([0.5662, 0.1108, 0.1948, 0.1281]) -Greedy action tensor([ 0.9880, -0.4513, -0.1242, -0.5184]) tensor([0.5594, 0.1326, 0.1839, 0.1240]) -Greedy action tensor([ 1.0984, -0.7067, 0.1555, -0.5967]) tensor([0.5755, 0.0946, 0.2242, 0.1057]) -Greedy action tensor([ 0.9615, -0.8804, 0.0973, -0.5849]) tensor([0.5578, 0.0884, 0.2350, 0.1188]) -Greedy action tensor([ 0.7215, -0.2779, -0.0237, -0.1598]) tensor([0.4431, 0.1631, 0.2103, 0.1835]) -Greedy action tensor([ 0.9386, -0.4992, -0.0519, -0.4003]) tensor([0.5345, 0.1269, 0.1985, 0.1401]) -Greedy action tensor([ 0.4670, 0.0791, 0.0670, -0.1484]) tensor([0.3461, 0.2348, 0.2320, 0.1870]) -Greedy action tensor([ 0.9410, -0.9327, 0.1399, -0.4766]) tensor([0.5421, 0.0832, 0.2433, 0.1314]) -Greedy action tensor([ 0.7488, -0.4404, -0.0263, -0.1198]) tensor([0.4577, 0.1394, 0.2109, 0.1920]) -Greedy action tensor([ 1.0506, -0.6248, -0.0620, -0.6381]) tensor([0.5880, 0.1101, 0.1933, 0.1086]) -Greedy action tensor([ 0.7572, -0.5269, -0.0378, -0.3243]) tensor([0.4837, 0.1339, 0.2184, 0.1640]) -Greedy action tensor([ 0.6422, -0.4449, -0.1427, -0.3376]) tensor([0.4611, 0.1555, 0.2103, 0.1731]) -Greedy action tensor([ 0.4326, -0.1830, -0.0713, -0.1469]) tensor([0.3697, 0.1998, 0.2234, 0.2071]) -Greedy action tensor([ 1.3378, -0.9550, -0.0243, -0.6524]) tensor([0.6695, 0.0676, 0.1715, 0.0915]) -Greedy action tensor([ 1.0495, -0.4623, -0.0572, -0.5383]) tensor([0.5696, 0.1256, 0.1883, 0.1164]) -Greedy action tensor([ 1.0331, -0.4376, 0.0352, -0.3744]) tensor([0.5425, 0.1247, 0.2000, 0.1328]) -Greedy action tensor([ 1.0865, -0.7164, 0.0312, -0.8695]) tensor([0.6045, 0.0996, 0.2104, 0.0855]) -Greedy action tensor([ 1.2326, -0.5762, -0.0786, -0.5305]) tensor([0.6231, 0.1021, 0.1679, 0.1069]) -Greedy action tensor([ 1.0250, -0.3440, 0.0888, -0.3372]) tensor([0.5256, 0.1337, 0.2061, 0.1346]) -Greedy action tensor([ 0.6656, -0.5287, -0.0693, -0.2011]) tensor([0.4540, 0.1375, 0.2177, 0.1908]) -Greedy action tensor([ 1.0131, -0.4798, 0.0262, -0.3276]) tensor([0.5379, 0.1209, 0.2005, 0.1407]) -Greedy action tensor([ 0.6907, -0.5549, 0.0566, -0.1100]) tensor([0.4411, 0.1269, 0.2340, 0.1980]) -Greedy action tensor([ 0.6421, -0.3704, -0.1670, -0.4171]) tensor([0.4640, 0.1686, 0.2066, 0.1609]) -Greedy action tensor([ 1.0132, -0.4140, -0.0402, -0.1395]) tensor([0.5251, 0.1260, 0.1831, 0.1658]) -Greedy action tensor([ 1.0297, -0.7051, -0.3085, -0.6033]) tensor([0.6120, 0.1080, 0.1605, 0.1195]) -Greedy action tensor([ 1.1263, -0.4425, -0.0850, -0.4852]) tensor([0.5863, 0.1221, 0.1746, 0.1170]) -Greedy action tensor([ 0.6684, -0.4444, -0.1010, -0.1636]) tensor([0.4490, 0.1476, 0.2080, 0.1954]) -Greedy action tensor([ 1.1294, -0.7817, 0.0048, -0.5308]) tensor([0.6014, 0.0890, 0.1953, 0.1143]) -Greedy action tensor([ 0.8840, -0.4442, -0.0632, -0.6107]) tensor([0.5327, 0.1411, 0.2066, 0.1195]) -Greedy action tensor([ 0.9522, -0.6425, -0.0220, -0.5891]) tensor([0.5572, 0.1131, 0.2103, 0.1193]) -Greedy action tensor([ 0.4226, -0.2548, 0.0009, -0.0755]) tensor([0.3608, 0.1833, 0.2367, 0.2193]) -Greedy action tensor([ 0.9727, -1.1142, 0.0994, -0.4621]) tensor([0.5619, 0.0697, 0.2346, 0.1338]) -Greedy action tensor([ 0.4673, -0.1393, -0.0066, -0.0856]) tensor([0.3645, 0.1988, 0.2270, 0.2097]) -Greedy action tensor([ 0.5692, 0.0861, 0.0570, -0.0643]) tensor([0.3641, 0.2246, 0.2181, 0.1932]) -Greedy action tensor([ 0.9241, -0.7433, -0.0596, -0.4071]) tensor([0.5474, 0.1033, 0.2047, 0.1446]) -Greedy action tensor([ 0.6773, 0.0537, 0.0122, -0.2846]) tensor([0.4111, 0.2204, 0.2114, 0.1571]) -Greedy action tensor([ 0.3340, 0.4487, -0.2619, 0.0871]) tensor([0.2895, 0.3247, 0.1596, 0.2262]) -Greedy action tensor([ 0.2135, 0.0125, -0.1836, -0.0574]) tensor([0.3074, 0.2514, 0.2067, 0.2345]) -Greedy action tensor([ 0.8144, -0.4772, -0.1606, -0.4983]) tensor([0.5205, 0.1431, 0.1964, 0.1401]) -Greedy action tensor([ 1.0675, 0.0876, -0.3166, 0.3939]) tensor([0.4682, 0.1757, 0.1173, 0.2387]) -Greedy action tensor([ 1.8798, -0.1823, -0.4896, 0.5123]) tensor([0.6777, 0.0862, 0.0634, 0.1727]) -Greedy action tensor([ 1.1427, -0.6084, -0.4552, 0.0991]) tensor([0.5787, 0.1004, 0.1171, 0.2038]) -Greedy action tensor([ 1.6974, -0.1669, -0.4486, 0.2923]) tensor([0.6591, 0.1022, 0.0771, 0.1617]) -Greedy action tensor([ 1.3450, -0.4219, -0.6961, -0.0104]) tensor([0.6416, 0.1096, 0.0833, 0.1654]) -Greedy action tensor([ 0.9979, 0.2523, -0.1381, 0.2957]) tensor([0.4365, 0.2071, 0.1402, 0.2163]) -Greedy action tensor([ 1.3464, -0.2578, -1.6062, 0.3919]) tensor([0.6104, 0.1227, 0.0319, 0.2350]) -Greedy action tensor([ 1.1476, -0.3949, -0.5435, -0.1640]) tensor([0.5997, 0.1282, 0.1105, 0.1616]) -Greedy action tensor([ 1.6000, -0.1511, -0.7823, 0.0231]) tensor([0.6791, 0.1179, 0.0627, 0.1403]) -Greedy action tensor([ 1.5663, -0.4541, -0.5493, 0.2089]) tensor([0.6620, 0.0878, 0.0798, 0.1704]) -Greedy action tensor([ 1.2083, -0.1523, -1.1707, 0.5318]) tensor([0.5384, 0.1381, 0.0499, 0.2737]) -Greedy action tensor([ 1.5205, 0.4065, -0.5562, 0.4004]) tensor([0.5618, 0.1844, 0.0704, 0.1833]) -Greedy action tensor([ 1.1615, 0.0085, -1.0167, 0.3505]) tensor([0.5338, 0.1685, 0.0605, 0.2372]) -Greedy action tensor([ 0.8466, -0.2872, 0.1068, -0.1082]) tensor([0.4579, 0.1473, 0.2185, 0.1762]) -Greedy action tensor([ 1.0895, -0.2200, -0.2695, 0.3543]) tensor([0.4984, 0.1345, 0.1281, 0.2390]) -Greedy action tensor([2.2412, 0.2908, 0.0849, 0.0452]) tensor([0.7303, 0.1039, 0.0845, 0.0813]) -Greedy action tensor([ 1.5682, -0.1635, -0.5879, 0.2236]) tensor([0.6437, 0.1139, 0.0745, 0.1678]) -Greedy action tensor([ 2.3212, -1.0156, -0.1249, 0.3822]) tensor([0.7899, 0.0281, 0.0684, 0.1136]) -Greedy action tensor([ 1.3608, -0.5491, -0.5492, -0.0084]) tensor([0.6450, 0.0955, 0.0955, 0.1640]) -Greedy action tensor([ 1.6334, -0.3623, -0.2862, 0.1232]) tensor([0.6651, 0.0904, 0.0975, 0.1469]) -Greedy action tensor([ 1.1137, 0.2135, -0.5091, 0.4482]) tensor([0.4722, 0.1919, 0.0932, 0.2427]) -Greedy action tensor([ 1.2176, -0.2464, -0.5720, -0.0701]) tensor([0.5973, 0.1382, 0.0998, 0.1648]) -Greedy action tensor([ 1.2322, -0.2350, -0.3292, -0.2413]) tensor([0.5990, 0.1381, 0.1257, 0.1372]) -Greedy action tensor([ 1.1195, 0.1508, -0.4244, 0.2757]) tensor([0.4943, 0.1876, 0.1055, 0.2126]) -Greedy action tensor([ 1.6335, 0.2301, -0.3813, 0.4439]) tensor([0.5940, 0.1460, 0.0792, 0.1808]) -Greedy action tensor([ 2.1186, -0.3838, -0.9316, 0.7276]) tensor([0.7257, 0.0594, 0.0344, 0.1806]) -Greedy action tensor([ 1.7118, -0.5959, -0.2956, 0.1263]) tensor([0.6951, 0.0692, 0.0934, 0.1424]) -Greedy action tensor([ 0.9820, -0.0216, -0.1202, -0.0262]) tensor([0.4846, 0.1776, 0.1610, 0.1768]) -Greedy action tensor([ 1.8456, -0.7474, -1.0079, 0.3257]) tensor([0.7401, 0.0554, 0.0427, 0.1619]) -Greedy action tensor([ 1.4787, -0.0084, -0.8435, -0.5184]) tensor([0.6850, 0.1548, 0.0672, 0.0930]) -Greedy action tensor([ 1.6653, 0.1285, -0.5596, 0.2368]) tensor([0.6399, 0.1376, 0.0692, 0.1533]) -Greedy action tensor([ 1.6887, 0.4832, -0.6305, 0.3740]) tensor([0.6001, 0.1797, 0.0590, 0.1611]) -Greedy action tensor([ 1.8833, -1.0162, -0.3486, 0.9410]) tensor([0.6443, 0.0355, 0.0692, 0.2511]) -Greedy action tensor([ 1.6826, -0.8326, 0.0050, -0.0621]) tensor([0.6933, 0.0561, 0.1295, 0.1211]) -Greedy action tensor([ 1.2987, -0.3063, -0.2528, 0.2386]) tensor([0.5684, 0.1142, 0.1205, 0.1969]) -Greedy action tensor([ 1.5167, -0.7271, -0.4885, -0.0234]) tensor([0.6873, 0.0729, 0.0925, 0.1473]) -Greedy action tensor([ 1.7348, -0.0101, -1.1409, 0.2720]) tensor([0.6837, 0.1194, 0.0385, 0.1583]) -Greedy action tensor([ 0.8518, -0.3166, -0.5430, 0.3183]) tensor([0.4661, 0.1449, 0.1155, 0.2734]) -Greedy action tensor([ 1.5385, -0.1306, -0.7059, 0.3556]) tensor([0.6247, 0.1177, 0.0662, 0.1914]) -Greedy action tensor([ 1.8745, -0.6098, 0.0709, 0.0850]) tensor([0.7066, 0.0589, 0.1164, 0.1180]) -Greedy action tensor([ 1.3632, -0.6476, -0.5761, -0.1910]) tensor([0.6716, 0.0899, 0.0966, 0.1419]) -Greedy action tensor([ 1.1781, -0.5144, -0.5336, 0.2721]) tensor([0.5654, 0.1041, 0.1021, 0.2285]) -Greedy action tensor([ 1.3743, -0.4695, -0.6139, 0.3225]) tensor([0.6081, 0.0962, 0.0833, 0.2124]) -Greedy action tensor([ 1.1141, -0.3768, 0.0135, -0.0738]) tensor([0.5369, 0.1209, 0.1786, 0.1637]) -Greedy action tensor([ 1.7044, -0.4170, -0.9612, 0.2823]) tensor([0.6990, 0.0838, 0.0486, 0.1686]) -Greedy action tensor([ 2.5374, -0.7124, -0.0995, 0.6329]) tensor([0.7941, 0.0308, 0.0568, 0.1182]) -Greedy action tensor([ 1.4243, -0.1635, -0.5966, 0.0588]) tensor([0.6281, 0.1284, 0.0832, 0.1603]) -Greedy action tensor([ 1.6153, -0.4830, -0.8341, -0.0221]) tensor([0.7125, 0.0874, 0.0615, 0.1386]) -Greedy action tensor([ 1.6791, -0.5133, -0.3251, 0.4627]) tensor([0.6482, 0.0724, 0.0874, 0.1921]) -Greedy action tensor([ 1.0884, -0.2717, -0.3962, 0.6007]) tensor([0.4768, 0.1224, 0.1080, 0.2928]) -Greedy action tensor([ 1.6634, -0.3703, -0.6751, 0.2990]) tensor([0.6744, 0.0882, 0.0651, 0.1723]) -Greedy action tensor([ 1.1920, -0.3405, -0.2213, 0.3102]) tensor([0.5338, 0.1153, 0.1299, 0.2210]) -Greedy action tensor([ 1.4608, -0.1702, -0.6382, 0.0565]) tensor([0.6394, 0.1252, 0.0784, 0.1570]) -Greedy action tensor([ 1.5419, -0.5506, -0.1279, 0.6998]) tensor([0.5739, 0.0708, 0.1081, 0.2472]) -Greedy action tensor([ 1.4480, -0.7402, -0.1375, 0.7672]) tensor([0.5485, 0.0615, 0.1124, 0.2776]) -Greedy action tensor([ 1.8536, -0.9048, -0.3478, 0.5428]) tensor([0.6927, 0.0439, 0.0766, 0.1868]) -Greedy action tensor([ 1.0366, -0.0350, -0.9871, 0.3011]) tensor([0.5118, 0.1753, 0.0676, 0.2453]) -Greedy action tensor([ 2.1366, -1.2695, -0.3878, 0.5151]) tensor([0.7628, 0.0253, 0.0611, 0.1508]) -Greedy action tensor([ 1.1537, -0.2973, -0.4880, 0.6153]) tensor([0.4971, 0.1165, 0.0963, 0.2901]) -Greedy action tensor([ 1.6948, -0.6859, -0.5433, 0.3082]) tensor([0.6901, 0.0638, 0.0736, 0.1725]) -Greedy action tensor([ 2.2380, -0.9645, -0.7295, -0.0589]) tensor([0.8385, 0.0341, 0.0431, 0.0843]) -Greedy action tensor([ 1.7078, -0.8761, -0.2629, 0.3129]) tensor([0.6837, 0.0516, 0.0953, 0.1694]) -Greedy action tensor([ 1.6547, -0.9193, -0.4635, 0.3384]) tensor([0.6828, 0.0520, 0.0821, 0.1831]) -Greedy action tensor([ 1.9302, -0.8478, 0.0567, -0.0165]) tensor([0.7361, 0.0458, 0.1131, 0.1051]) -Greedy action tensor([ 1.4132, -0.6469, -0.6936, 0.7400]) tensor([0.5685, 0.0724, 0.0691, 0.2900]) -Greedy action tensor([ 1.2678, -0.3687, -0.6572, 0.4243]) tensor([0.5647, 0.1099, 0.0824, 0.2430]) -Greedy action tensor([ 1.6325, -0.4420, -0.6574, 0.6663]) tensor([0.6221, 0.0781, 0.0630, 0.2367]) -Greedy action tensor([ 1.6898, -0.7219, 0.0484, 0.6559]) tensor([0.6101, 0.0547, 0.1182, 0.2170]) -Greedy action tensor([ 1.3797, -0.2827, -0.7682, 0.4498]) tensor([0.5879, 0.1115, 0.0686, 0.2320]) -Greedy action tensor([ 1.5731, -0.1647, -0.2641, 0.7066]) tensor([0.5696, 0.1002, 0.0907, 0.2395]) -Greedy action tensor([ 1.4742, 0.0910, -0.3786, 0.4718]) tensor([0.5635, 0.1413, 0.0884, 0.2068]) -Greedy action tensor([ 1.5728, -0.7647, -0.1613, 0.1634]) tensor([0.6590, 0.0636, 0.1163, 0.1610]) -Greedy action tensor([ 1.8265, -0.4200, -0.6759, 0.2818]) tensor([0.7137, 0.0755, 0.0585, 0.1523]) -Greedy action tensor([ 1.0661, -0.7680, -0.5679, -0.1408]) tensor([0.6046, 0.0966, 0.1180, 0.1808]) -Greedy action tensor([ 1.6977, 0.0413, -1.7140, 0.0467]) tensor([0.7064, 0.1348, 0.0233, 0.1355]) -Greedy action tensor([ 1.6858, -0.8980, -0.2574, 0.6143]) tensor([0.6405, 0.0483, 0.0918, 0.2194]) -Greedy action tensor([ 1.0054, -0.3314, -0.1751, -0.1971]) tensor([0.5347, 0.1405, 0.1642, 0.1606]) -Greedy action tensor([ 1.5519, -0.5388, -0.2277, 0.2621]) tensor([0.6379, 0.0788, 0.1076, 0.1756]) -Greedy action tensor([ 1.6788, -0.5246, -0.5681, 0.2590]) tensor([0.6859, 0.0757, 0.0725, 0.1658]) -Greedy action tensor([ 1.5596, -0.4539, -0.5119, 0.5389]) tensor([0.6173, 0.0824, 0.0778, 0.2224]) -Greedy action tensor([ 2.1397, 0.3830, 0.3424, -0.2350]) tensor([0.6986, 0.1206, 0.1158, 0.0650]) -Greedy action tensor([-1.4605, -0.4758, 0.4429, 0.1600]) tensor([0.0648, 0.1734, 0.4345, 0.3274]) -Greedy action tensor([-1.6524, -0.5365, 0.5366, -0.1604]) tensor([0.0574, 0.1752, 0.5123, 0.2552]) -Greedy action tensor([-1.9426, -0.4476, 0.6666, -0.1795]) tensor([0.0402, 0.1793, 0.5462, 0.2344]) -Greedy action tensor([-1.5824, -0.2470, 0.5732, 0.0199]) tensor([0.0544, 0.2066, 0.4692, 0.2698]) -Greedy action tensor([-1.9370, -0.4413, 0.6589, -0.1771]) tensor([0.0405, 0.1808, 0.5432, 0.2354]) -Greedy action tensor([-1.7844, -0.4819, 0.5868, -0.1156]) tensor([0.0483, 0.1778, 0.5175, 0.2564]) -Greedy action tensor([-0.6756, 0.3998, 0.4437, 0.4535]) tensor([0.0991, 0.2906, 0.3036, 0.3066]) -Greedy action tensor([-1.9145, -0.4460, 0.6513, -0.1693]) tensor([0.0415, 0.1803, 0.5403, 0.2378]) -Greedy action tensor([-0.4870, 1.0319, 0.0928, 0.1458]) tensor([0.1083, 0.4945, 0.1934, 0.2039]) -Greedy action tensor([-1.9049, -0.4462, 0.6491, -0.1622]) tensor([0.0419, 0.1801, 0.5387, 0.2393]) -Greedy action tensor([-1.8942, -0.3678, 0.6361, -0.1456]) tensor([0.0418, 0.1925, 0.5253, 0.2404]) -Greedy action tensor([-1.8907, -0.4247, 0.6568, -0.1444]) tensor([0.0419, 0.1817, 0.5359, 0.2405]) -Greedy action tensor([-1.9105, -0.4357, 0.6563, -0.1626]) tensor([0.0414, 0.1810, 0.5396, 0.2379]) -Greedy action tensor([-1.6948, -0.2467, 0.2640, -0.4739]) tensor([0.0636, 0.2704, 0.4506, 0.2155]) -Greedy action tensor([-1.8574, -0.3317, 0.6303, -0.1281]) tensor([0.0430, 0.1976, 0.5172, 0.2422]) -Greedy action tensor([-1.5096, -0.0349, 0.4668, -0.1437]) tensor([0.0606, 0.2647, 0.4372, 0.2375]) -Greedy action tensor([-1.3307, -0.6379, 0.3440, 0.1600]) tensor([0.0783, 0.1565, 0.4177, 0.3475]) -Greedy action tensor([-1.9046, -0.4504, 0.6521, -0.1579]) tensor([0.0418, 0.1790, 0.5392, 0.2399]) -Greedy action tensor([-1.5617, -0.4568, 0.4831, 0.0261]) tensor([0.0601, 0.1814, 0.4644, 0.2941]) -Greedy action tensor([-1.8748, -0.4345, 0.6317, -0.1405]) tensor([0.0432, 0.1824, 0.5297, 0.2447]) -Greedy action tensor([-1.8276, -0.4487, 0.6147, -0.1184]) tensor([0.0455, 0.1805, 0.5228, 0.2512]) -Greedy action tensor([-1.8642, -0.4610, 0.6324, -0.1415]) tensor([0.0438, 0.1784, 0.5323, 0.2455]) -Greedy action tensor([-1.9424, -0.4456, 0.6669, -0.1795]) tensor([0.0402, 0.1795, 0.5461, 0.2342]) -Greedy action tensor([-1.8978, -0.4525, 0.6483, -0.1588]) tensor([0.0422, 0.1791, 0.5385, 0.2402]) -Greedy action tensor([-1.8173, -0.2157, 0.5780, -0.0924]) tensor([0.0444, 0.2201, 0.4867, 0.2489]) -Greedy action tensor([-1.9364, -0.4282, 0.6567, -0.1778]) tensor([0.0405, 0.1830, 0.5415, 0.2350]) -Greedy action tensor([-1.7211, -0.4338, 0.5404, -0.1466]) tensor([0.0525, 0.1902, 0.5038, 0.2535]) -Greedy action tensor([-0.2528, 0.1497, 0.2050, 0.3796]) tensor([0.1678, 0.2510, 0.2653, 0.3159]) -Greedy action tensor([-1.3255, -0.4287, 0.8050, 0.7265]) tensor([0.0509, 0.1247, 0.4284, 0.3960]) -Greedy action tensor([-1.8868, -0.4510, 0.6271, -0.1507]) tensor([0.0430, 0.1809, 0.5317, 0.2443]) -Greedy action tensor([-1.3484, -0.4700, 0.3816, -0.0554]) tensor([0.0788, 0.1897, 0.4444, 0.2871]) -Greedy action tensor([-1.8078, -0.0964, 0.5509, -0.0648]) tensor([0.0438, 0.2425, 0.4633, 0.2503]) -Greedy action tensor([-1.9305, -0.4299, 0.6601, -0.1714]) tensor([0.0406, 0.1821, 0.5415, 0.2358]) -Greedy action tensor([-1.9080, -0.3768, 0.6417, -0.1610]) tensor([0.0414, 0.1913, 0.5298, 0.2374]) -Greedy action tensor([-1.4145, 0.5309, 0.2496, 0.0233]) tensor([0.0572, 0.4001, 0.3020, 0.2408]) -Greedy action tensor([-1.7076, -0.1510, 0.5047, -0.0912]) tensor([0.0502, 0.2382, 0.4588, 0.2528]) -Greedy action tensor([-0.8551, 0.3931, 0.1914, -0.0254]) tensor([0.1039, 0.3620, 0.2959, 0.2382]) -Greedy action tensor([-1.2485, 0.6308, 0.2511, -0.0087]) tensor([0.0646, 0.4229, 0.2893, 0.2231]) -Greedy action tensor([-1.9344, -0.4399, 0.6631, -0.1744]) tensor([0.0405, 0.1804, 0.5437, 0.2353]) -Greedy action tensor([-1.8357, -0.3021, 0.3729, -0.2668]) tensor([0.0512, 0.2372, 0.4659, 0.2457]) -Greedy action tensor([-1.8831, -0.3775, 0.6364, -0.1463]) tensor([0.0424, 0.1909, 0.5262, 0.2406]) -Greedy action tensor([-1.8667, -0.4006, 0.6219, -0.1546]) tensor([0.0436, 0.1890, 0.5255, 0.2418]) -Greedy action tensor([-1.9103, -0.4155, 0.6419, -0.1671]) tensor([0.0417, 0.1857, 0.5346, 0.2381]) -Greedy action tensor([-0.7237, 0.2248, 0.0333, 0.3556]) tensor([0.1155, 0.2983, 0.2463, 0.3399]) -Greedy action tensor([-0.5711, 0.6052, 0.0400, 0.0155]) tensor([0.1269, 0.4113, 0.2337, 0.2281]) -Greedy action tensor([-1.8811, -0.6168, 0.9288, 0.0833]) tensor([0.0354, 0.1252, 0.5873, 0.2521]) -Greedy action tensor([-1.8988, -0.4298, 0.6474, -0.1558]) tensor([0.0420, 0.1824, 0.5357, 0.2399]) -Greedy action tensor([-1.8652, -0.4001, 0.6253, -0.1188]) tensor([0.0432, 0.1871, 0.5217, 0.2479]) -Greedy action tensor([-1.9175, -0.4526, 0.6573, -0.1674]) tensor([0.0413, 0.1787, 0.5423, 0.2377]) -Greedy action tensor([-1.9371, -0.4388, 0.6643, -0.1756]) tensor([0.0404, 0.1806, 0.5441, 0.2349]) -Greedy action tensor([-1.8402, -0.4370, 0.6151, -0.1248]) tensor([0.0449, 0.1826, 0.5230, 0.2495]) -Greedy action tensor([-1.9347, -0.4297, 0.6601, -0.1733]) tensor([0.0405, 0.1822, 0.5418, 0.2355]) -Greedy action tensor([-1.1562, -0.6322, 0.2308, 0.2584]) tensor([0.0925, 0.1563, 0.3704, 0.3808]) -Greedy action tensor([-1.9419, -0.4064, 0.6570, -0.1795]) tensor([0.0401, 0.1863, 0.5397, 0.2338]) -Greedy action tensor([-1.7379, -0.2192, 0.5339, -0.0953]) tensor([0.0489, 0.2235, 0.4746, 0.2530]) -Greedy action tensor([-1.9039, -0.3599, 0.6484, -0.1458]) tensor([0.0411, 0.1926, 0.5278, 0.2385]) -Greedy action tensor([-1.8470, -0.4621, 0.6339, -0.1275]) tensor([0.0444, 0.1773, 0.5306, 0.2478]) -Greedy action tensor([-1.5891, -0.4628, 0.4855, 0.0392]) tensor([0.0583, 0.1799, 0.4645, 0.2972]) -Greedy action tensor([-1.3459, 0.4967, 0.3395, 0.1770]) tensor([0.0578, 0.3651, 0.3120, 0.2652]) -Greedy action tensor([-1.8989, -0.4299, 0.6429, -0.1594]) tensor([0.0421, 0.1830, 0.5350, 0.2399]) -Greedy action tensor([-0.7772, 0.6673, 0.0537, -0.2857]) tensor([0.1091, 0.4623, 0.2503, 0.1783]) -Greedy action tensor([-1.6132, 0.3028, 0.4036, -0.0729]) tensor([0.0501, 0.3401, 0.3762, 0.2336]) -Greedy action tensor([-1.8843, -0.2646, 0.5945, -0.1273]) tensor([0.0421, 0.2125, 0.5017, 0.2438]) -Greedy action tensor([-0.4851, -0.4811, 0.1833, 0.0851]) tensor([0.1747, 0.1754, 0.3409, 0.3090]) -Greedy action tensor([-1.8255, -0.4238, 0.6064, -0.1259]) tensor([0.0456, 0.1854, 0.5193, 0.2497]) -Greedy action tensor([-1.6102, 0.0790, 0.4845, -0.1752]) tensor([0.0534, 0.2890, 0.4335, 0.2241]) -Greedy action tensor([-1.9221, -0.3542, 0.6418, -0.1612]) tensor([0.0407, 0.1950, 0.5279, 0.2365]) -Greedy action tensor([-1.9333, -0.4503, 0.6717, -0.1683]) tensor([0.0404, 0.1778, 0.5461, 0.2357]) -Greedy action tensor([-1.7312, -0.4688, 0.5623, -0.0791]) tensor([0.0509, 0.1797, 0.5040, 0.2654]) -Greedy action tensor([-1.5395, -0.3027, 0.6662, 0.1664]) tensor([0.0526, 0.1810, 0.4770, 0.2894]) -Greedy action tensor([-0.3583, -0.0346, 0.1185, 0.1102]) tensor([0.1789, 0.2472, 0.2881, 0.2858]) -Greedy action tensor([-1.6953, -0.3633, 0.5293, -0.0817]) tensor([0.0525, 0.1988, 0.4853, 0.2634]) -Greedy action tensor([-0.5202, 0.0300, 0.0986, -0.0132]) tensor([0.1600, 0.2774, 0.2970, 0.2656]) -Greedy action tensor([-1.9024, -0.4443, 0.6476, -0.1600]) tensor([0.0420, 0.1805, 0.5378, 0.2398]) -Greedy action tensor([-1.6735, -0.4053, 0.5797, -0.1809]) tensor([0.0540, 0.1919, 0.5139, 0.2402]) -Greedy action tensor([-1.3647, 0.6756, 0.2316, 0.1909]) tensor([0.0545, 0.4189, 0.2687, 0.2580]) -Greedy action tensor([-1.8184, -0.3693, 0.5930, -0.1324]) tensor([0.0459, 0.1953, 0.5113, 0.2475]) -Greedy action tensor([-1.5070, 0.0297, 0.5279, 0.1000]) tensor([0.0547, 0.2542, 0.4184, 0.2727]) -Greedy action tensor([-1.8600, -0.1910, 0.5960, -0.1420]) tensor([0.0425, 0.2254, 0.4953, 0.2368]) -Greedy action tensor([-1.9282, -0.4438, 0.6616, -0.1723]) tensor([0.0408, 0.1799, 0.5433, 0.2360]) -Greedy action tensor([-1.5522, -0.5478, 0.4370, 0.0403]) tensor([0.0627, 0.1711, 0.4581, 0.3081]) -Greedy action tensor([-0.6729, -0.3626, -0.6255, -0.4914]) tensor([0.2169, 0.2957, 0.2274, 0.2600]) -Greedy action tensor([-0.3888, -1.4020, 0.7145, -0.2723]) tensor([0.1818, 0.0660, 0.5479, 0.2043]) -Greedy action tensor([-0.6388, -0.7293, 0.3448, -0.5498]) tensor([0.1760, 0.1608, 0.4707, 0.1924]) -Greedy action tensor([-0.5049, -0.2170, -0.4881, -0.1968]) tensor([0.2122, 0.2831, 0.2159, 0.2888]) -Greedy action tensor([ 0.5951, -0.6088, -0.5855, -0.2294]) tensor([0.4889, 0.1467, 0.1501, 0.2143]) -Greedy action tensor([-0.4574, -0.5398, -0.8683, -0.7100]) tensor([0.2975, 0.2740, 0.1973, 0.2311]) -Greedy action tensor([-1.3354, -0.7580, 0.8763, -0.1673]) tensor([0.0661, 0.1177, 0.6036, 0.2126]) -Greedy action tensor([-0.1594, -0.5486, 0.8584, -0.4250]) tensor([0.1919, 0.1300, 0.5310, 0.1471]) -Greedy action tensor([ 0.4653, 0.1458, 0.3686, -0.3378]) tensor([0.3244, 0.2357, 0.2945, 0.1453]) -Greedy action tensor([ 0.3452, -0.2190, 0.2018, 0.9524]) tensor([0.2342, 0.1332, 0.2029, 0.4297]) -Greedy action tensor([-0.1101, -0.3663, 0.8447, -1.0135]) tensor([0.2093, 0.1620, 0.5438, 0.0848]) -Greedy action tensor([-1.1038, 0.7724, -0.1040, -0.9211]) tensor([0.0874, 0.5703, 0.2374, 0.1049]) -Greedy action tensor([ 0.1687, -1.2776, 0.1219, -0.5532]) tensor([0.3738, 0.0880, 0.3567, 0.1816]) -Greedy action tensor([ 0.4842, 0.3198, -0.4360, -0.0921]) tensor([0.3560, 0.3020, 0.1419, 0.2001]) -Greedy action tensor([-0.4322, -0.2989, 0.4697, -0.4213]) tensor([0.1780, 0.2034, 0.4387, 0.1800]) -Greedy action tensor([-0.3530, -1.0760, 0.2238, -0.1859]) tensor([0.2248, 0.1091, 0.4003, 0.2657]) -Greedy action tensor([-0.0732, -1.0120, -0.1496, -0.7010]) tensor([0.3507, 0.1372, 0.3249, 0.1872]) -Greedy action tensor([-0.0570, -1.2566, 0.1837, -0.4289]) tensor([0.3065, 0.0924, 0.3899, 0.2113]) -Greedy action tensor([ 0.8259, -0.0762, -0.2352, -0.4851]) tensor([0.4947, 0.2007, 0.1712, 0.1334]) -Greedy action tensor([-0.0849, -1.0500, 0.2521, -0.4106]) tensor([0.2854, 0.1087, 0.3998, 0.2061]) -Greedy action tensor([-0.4321, -0.6825, -0.6803, 0.1430]) tensor([0.2306, 0.1795, 0.1799, 0.4099]) -Greedy action tensor([-0.3729, 0.0138, -0.9070, -0.2271]) tensor([0.2372, 0.3492, 0.1391, 0.2745]) -Greedy action tensor([-0.8766, -0.7517, -0.8328, 0.3759]) tensor([0.1498, 0.1697, 0.1565, 0.5241]) -Greedy action tensor([-0.0563, -0.3866, 0.5165, 0.1176]) tensor([0.2136, 0.1535, 0.3788, 0.2542]) -Greedy action tensor([ 1.2369, 0.1614, 0.4271, -0.5615]) tensor([0.5124, 0.1748, 0.2280, 0.0848]) -Greedy action tensor([-0.5681, -1.1099, -0.2207, -0.8653]) tensor([0.2674, 0.1555, 0.3785, 0.1986]) -Greedy action tensor([-0.7565, -0.2798, -1.0038, 0.2474]) tensor([0.1634, 0.2632, 0.1276, 0.4459]) -Greedy action tensor([ 0.9630, -0.9501, -0.8525, -0.6605]) tensor([0.6633, 0.0979, 0.1080, 0.1308]) -Greedy action tensor([-0.3768, -1.4447, -0.2359, -0.1542]) tensor([0.2671, 0.0918, 0.3075, 0.3337]) -Greedy action tensor([-0.3539, 0.0259, -0.8809, -0.7698]) tensor([0.2694, 0.3938, 0.1590, 0.1777]) -Greedy action tensor([-0.3870, 0.2755, 0.5458, -0.9914]) tensor([0.1659, 0.3218, 0.4217, 0.0906]) -Greedy action tensor([ 0.7104, -0.0864, 0.6853, -0.5207]) tensor([0.3679, 0.1659, 0.3588, 0.1074]) -Greedy action tensor([-0.5731, -1.2195, 0.6146, -1.0215]) tensor([0.1837, 0.0963, 0.6026, 0.1174]) -Greedy action tensor([-1.0387, -0.6907, 0.1998, -0.3011]) tensor([0.1257, 0.1780, 0.4336, 0.2628]) -Greedy action tensor([-1.1672, -0.2634, -0.0965, 0.4144]) tensor([0.0889, 0.2195, 0.2593, 0.4323]) -Greedy action tensor([-0.0139, -0.0558, 0.6612, -0.4232]) tensor([0.2180, 0.2090, 0.4282, 0.1448]) -Greedy action tensor([-0.6012, -0.8838, 0.6217, -0.1418]) tensor([0.1485, 0.1119, 0.5045, 0.2351]) -Greedy action tensor([ 0.5708, -1.2183, 0.6342, -0.3026]) tensor([0.3773, 0.0631, 0.4020, 0.1576]) -Greedy action tensor([-1.1623, -0.6609, 0.2642, -0.8458]) tensor([0.1221, 0.2016, 0.5086, 0.1676]) -Greedy action tensor([ 1.1927, -0.5529, 0.4037, 0.8753]) tensor([0.4243, 0.0741, 0.1927, 0.3089]) -Greedy action tensor([ 0.5217, -1.4545, 0.2403, -0.3802]) tensor([0.4349, 0.0603, 0.3283, 0.1765]) -Greedy action tensor([-0.6136, -1.0250, 0.4953, -0.8346]) tensor([0.1820, 0.1206, 0.5515, 0.1459]) -Greedy action tensor([ 0.9623, -1.0794, -0.7266, 0.1420]) tensor([0.5699, 0.0740, 0.1053, 0.2509]) -Greedy action tensor([-0.2413, -0.4315, 0.4490, -0.0731]) tensor([0.1998, 0.1652, 0.3985, 0.2364]) -Greedy action tensor([ 0.0385, -0.3128, 0.0962, -0.7964]) tensor([0.3128, 0.2201, 0.3314, 0.1357]) -Greedy action tensor([-0.4353, -1.4400, -0.5981, -0.4536]) tensor([0.3127, 0.1145, 0.2657, 0.3071]) -Greedy action tensor([-0.8888, 0.0281, 0.1129, -0.5406]) tensor([0.1309, 0.3274, 0.3564, 0.1854]) -Greedy action tensor([ 0.0829, -0.9007, 0.5757, -0.7362]) tensor([0.2897, 0.1083, 0.4742, 0.1277]) -Greedy action tensor([-1.9751, -0.5380, 0.9323, -1.9282]) tensor([0.0407, 0.1713, 0.7453, 0.0427]) -Greedy action tensor([ 0.7428, -1.8167, -0.2228, 1.2880]) tensor([0.3142, 0.0243, 0.1196, 0.5419]) -Greedy action tensor([ 0.1494, 0.5828, 0.1249, -0.5529]) tensor([0.2491, 0.3843, 0.2431, 0.1234]) -Greedy action tensor([ 0.4515, -0.2203, -0.8803, 0.2359]) tensor([0.3875, 0.1979, 0.1023, 0.3123]) -Greedy action tensor([-1.6851, 0.3151, -0.6107, -0.7197]) tensor([0.0717, 0.5300, 0.2100, 0.1883]) -Greedy action tensor([-0.8116, -0.2640, 0.7308, 0.0394]) tensor([0.1026, 0.1774, 0.4797, 0.2403]) -Greedy action tensor([ 0.4397, -0.8084, 0.3885, 0.2104]) tensor([0.3298, 0.0947, 0.3133, 0.2622]) -Greedy action tensor([ 0.0834, 0.2122, -0.6890, -0.6615]) tensor([0.3253, 0.3700, 0.1503, 0.1544]) -Greedy action tensor([-0.3146, 0.3238, -0.0754, -0.7850]) tensor([0.2088, 0.3954, 0.2653, 0.1305]) -Greedy action tensor([ 0.6719, -0.7295, 0.1815, -0.2900]) tensor([0.4463, 0.1099, 0.2733, 0.1706]) -Greedy action tensor([ 0.7334, -0.7318, -0.4611, 0.3678]) tensor([0.4489, 0.1037, 0.1360, 0.3114]) -Greedy action tensor([-0.2189, 0.1738, 0.5021, -0.3601]) tensor([0.1850, 0.2740, 0.3804, 0.1606]) -Greedy action tensor([ 0.1864, 0.5979, 0.0485, -0.5639]) tensor([0.2596, 0.3917, 0.2261, 0.1226]) -Greedy action tensor([-0.2730, -1.0731, 0.2756, -0.8650]) tensor([0.2679, 0.1203, 0.4636, 0.1482]) -Greedy action tensor([ 0.0271, -0.1431, -0.0923, -0.4933]) tensor([0.3007, 0.2537, 0.2669, 0.1787]) -Greedy action tensor([ 0.1857, -0.7319, 0.4491, -0.5891]) tensor([0.3163, 0.1264, 0.4116, 0.1457]) -Greedy action tensor([-0.4904, -1.2053, -0.2073, 0.1361]) tensor([0.2133, 0.1044, 0.2831, 0.3992]) -Greedy action tensor([-0.5062, 0.4888, -0.7871, 0.1096]) tensor([0.1585, 0.4286, 0.1197, 0.2933]) -Greedy action tensor([ 0.4600, -0.9999, -0.9025, -0.1619]) tensor([0.4938, 0.1147, 0.1264, 0.2651]) -Greedy action tensor([-0.6456, -0.1938, -1.2235, 0.5219]) tensor([0.1576, 0.2476, 0.0884, 0.5064]) -Greedy action tensor([-0.3556, -0.7691, 0.1809, -0.4817]) tensor([0.2351, 0.1555, 0.4021, 0.2073]) -Greedy action tensor([ 0.0372, -0.1744, -0.4475, -1.2533]) tensor([0.3703, 0.2997, 0.2281, 0.1019]) -Greedy action tensor([-0.2527, -2.5222, -0.2485, 0.8737]) tensor([0.1926, 0.0199, 0.1934, 0.5941]) -Greedy action tensor([-0.0126, -0.5437, 0.7790, -1.0684]) tensor([0.2414, 0.1419, 0.5327, 0.0840]) -Greedy action tensor([-1.1705, -0.5773, -0.0890, 0.4589]) tensor([0.0921, 0.1666, 0.2716, 0.4697]) -Greedy action tensor([ 0.2489, -0.3603, 0.3362, -0.2550]) tensor([0.3087, 0.1679, 0.3369, 0.1865]) -Greedy action tensor([0.2865, 0.1358, 0.0953, 0.0588]) tensor([0.2872, 0.2470, 0.2372, 0.2287]) -Greedy action tensor([-0.5886, -0.4711, 0.2393, -1.2477]) tensor([0.2028, 0.2281, 0.4641, 0.1049]) -Greedy action tensor([-0.4250, -0.3710, 1.0364, -0.5439]) tensor([0.1378, 0.1455, 0.5943, 0.1224]) -Greedy action tensor([ 0.7048, -0.3361, -0.4932, -0.2540]) tensor([0.4906, 0.1733, 0.1481, 0.1881]) -Greedy action tensor([ 1.8811, -0.4552, -0.0092, 0.9102]) tensor([0.6148, 0.0594, 0.0929, 0.2329]) -Greedy action tensor([ 0.5208, -1.0284, -0.2792, -0.2330]) tensor([0.4690, 0.0996, 0.2107, 0.2207]) -Greedy action tensor([-0.5154, -0.3752, 0.0617, -0.6494]) tensor([0.2081, 0.2394, 0.3706, 0.1820]) -Greedy action tensor([ 0.4877, 0.2487, -0.0807, -0.2158]) tensor([0.3511, 0.2764, 0.1988, 0.1737]) -Greedy action tensor([ 1.2879, -0.8039, -0.0155, -0.6706]) tensor([0.6510, 0.0804, 0.1768, 0.0918]) -Greedy action tensor([ 1.2519, -1.0304, 0.1404, -0.8202]) tensor([0.6422, 0.0655, 0.2113, 0.0809]) -Greedy action tensor([ 0.9404, -0.7328, 0.0701, -0.6330]) tensor([0.5513, 0.1035, 0.2309, 0.1143]) -Greedy action tensor([ 0.5780, -0.0767, -0.0742, 0.0457]) tensor([0.3805, 0.1977, 0.1982, 0.2235]) -Greedy action tensor([ 0.1317, 0.2573, -0.1918, -0.7572]) tensor([0.3059, 0.3469, 0.2214, 0.1258]) -Greedy action tensor([ 0.5616, -0.2593, -0.0125, -0.0789]) tensor([0.3952, 0.1739, 0.2226, 0.2083]) -Greedy action tensor([ 0.6378, -0.6495, -0.1048, -0.1956]) tensor([0.4574, 0.1262, 0.2176, 0.1988]) -Greedy action tensor([ 0.3699, -0.0476, -0.0519, -0.0576]) tensor([0.3371, 0.2220, 0.2211, 0.2198]) -Greedy action tensor([ 0.9997, -0.2735, -0.0574, -0.1672]) tensor([0.5158, 0.1444, 0.1792, 0.1606]) -Greedy action tensor([ 0.3881, -0.0005, -0.0598, -0.3389]) tensor([0.3571, 0.2421, 0.2282, 0.1726]) -Greedy action tensor([ 1.2025, -0.7424, 0.0876, -0.6882]) tensor([0.6166, 0.0882, 0.2022, 0.0931]) -Greedy action tensor([ 0.7979, -0.2980, -0.0171, -0.2179]) tensor([0.4675, 0.1563, 0.2069, 0.1693]) -Greedy action tensor([ 0.8350, -0.7084, 0.1183, -0.4810]) tensor([0.5076, 0.1084, 0.2479, 0.1361]) -Greedy action tensor([ 0.7234, -0.4014, 0.0147, -0.2867]) tensor([0.4585, 0.1489, 0.2257, 0.1670]) -Greedy action tensor([ 0.9008, -0.3379, -0.2119, -0.1845]) tensor([0.5112, 0.1481, 0.1680, 0.1727]) -Greedy action tensor([ 0.3857, -0.0667, -0.0278, -0.0042]) tensor([0.3362, 0.2138, 0.2223, 0.2276]) -Greedy action tensor([0.7993, 0.0250, 0.0333, 0.1278]) tensor([0.4104, 0.1892, 0.1908, 0.2097]) -Greedy action tensor([ 1.3363, -0.6791, 0.0423, -0.8579]) tensor([0.6584, 0.0877, 0.1805, 0.0734]) -Greedy action tensor([ 0.6710, -0.5633, -0.1240, -0.2055]) tensor([0.4632, 0.1348, 0.2092, 0.1928]) -Greedy action tensor([ 1.0708, -0.6150, -0.0822, -0.3548]) tensor([0.5743, 0.1064, 0.1813, 0.1380]) -Greedy action tensor([ 1.2811, -0.7043, 0.1494, -0.7976]) tensor([0.6310, 0.0866, 0.2035, 0.0789]) -Greedy action tensor([ 0.5417, -0.0630, -0.0232, -0.0027]) tensor([0.3711, 0.2027, 0.2109, 0.2153]) -Greedy action tensor([ 0.6014, -0.3005, 0.0826, -0.1739]) tensor([0.4062, 0.1648, 0.2418, 0.1871]) -Greedy action tensor([ 0.7841, -0.2791, -0.0406, -0.1249]) tensor([0.4573, 0.1579, 0.2005, 0.1843]) -Greedy action tensor([ 1.1029, -0.6777, 0.2040, -0.5407]) tensor([0.5653, 0.0953, 0.2301, 0.1093]) -Greedy action tensor([ 0.4642, -0.1213, 0.0261, -0.3075]) tensor([0.3753, 0.2090, 0.2422, 0.1735]) -Greedy action tensor([ 0.9830, -0.5786, -0.2357, -0.3938]) tensor([0.5689, 0.1194, 0.1682, 0.1436]) -Greedy action tensor([ 0.6061, -0.6565, -0.0583, -0.2418]) tensor([0.4493, 0.1271, 0.2312, 0.1924]) -Greedy action tensor([ 1.0437, -0.6039, -0.0581, -0.3160]) tensor([0.5613, 0.1081, 0.1865, 0.1441]) -Greedy action tensor([ 1.0970, -0.6927, -0.2062, -0.6915]) tensor([0.6227, 0.1040, 0.1692, 0.1041]) -Greedy action tensor([ 0.7350, -0.4119, -0.0555, -0.1937]) tensor([0.4616, 0.1466, 0.2094, 0.1824]) -Greedy action tensor([ 1.0968, -0.7404, 0.0370, -0.6455]) tensor([0.5949, 0.0947, 0.2062, 0.1042]) -Greedy action tensor([ 0.5532, -0.2876, -0.0511, -0.1297]) tensor([0.4027, 0.1737, 0.2201, 0.2035]) -Greedy action tensor([ 0.7416, -0.2093, -0.0595, -0.1612]) tensor([0.4463, 0.1724, 0.2003, 0.1809]) -Greedy action tensor([ 1.0831, -0.9594, 0.1202, -0.5280]) tensor([0.5844, 0.0758, 0.2231, 0.1167]) -Greedy action tensor([ 0.5434, -0.1202, -0.0673, -0.0069]) tensor([0.3795, 0.1955, 0.2061, 0.2189]) -Greedy action tensor([ 1.1578, -0.4366, -0.0409, -0.0852]) tensor([0.5577, 0.1132, 0.1682, 0.1609]) -Greedy action tensor([ 0.7013, -0.4279, 0.0999, -0.4939]) tensor([0.4600, 0.1487, 0.2521, 0.1392]) -Greedy action tensor([ 0.4936, -0.2357, -0.0452, -0.1510]) tensor([0.3860, 0.1862, 0.2252, 0.2026]) -Greedy action tensor([ 0.5445, -0.0621, -0.1239, -0.1851]) tensor([0.3937, 0.2147, 0.2018, 0.1898]) -Greedy action tensor([ 0.4942, -0.3031, -0.0842, -0.1750]) tensor([0.3963, 0.1785, 0.2222, 0.2029]) -Greedy action tensor([ 0.8628, -0.4584, -0.0138, -0.3427]) tensor([0.5044, 0.1346, 0.2099, 0.1511]) -Greedy action tensor([ 1.0217, -0.5180, 0.0535, -0.5052]) tensor([0.5520, 0.1184, 0.2097, 0.1199]) -Greedy action tensor([ 0.6964, -0.3881, -0.1259, -0.2350]) tensor([0.4605, 0.1557, 0.2024, 0.1814]) -Greedy action tensor([ 0.8221, -0.6678, 0.0122, -0.5929]) tensor([0.5227, 0.1178, 0.2325, 0.1270]) -Greedy action tensor([ 0.5463, -0.3348, -0.1539, -0.1095]) tensor([0.4116, 0.1705, 0.2043, 0.2136]) -Greedy action tensor([ 1.2489, -0.7888, -0.0577, -0.5692]) tensor([0.6396, 0.0834, 0.1732, 0.1038]) -Greedy action tensor([ 0.9316, -0.4393, -0.0848, -0.4004]) tensor([0.5320, 0.1351, 0.1925, 0.1404]) -Greedy action tensor([ 0.7934, -0.2501, 0.0094, -0.2859]) tensor([0.4654, 0.1639, 0.2125, 0.1582]) -Greedy action tensor([ 1.0960, -0.4545, -0.0814, -0.3206]) tensor([0.5673, 0.1203, 0.1748, 0.1376]) -Greedy action tensor([ 0.6836, -0.5826, -0.0950, -0.4642]) tensor([0.4859, 0.1370, 0.2230, 0.1542]) -Greedy action tensor([ 0.9188, -0.2927, -0.0247, -0.5782]) tensor([0.5233, 0.1558, 0.2037, 0.1171]) -Greedy action tensor([ 1.0567, -0.5726, -0.0509, -0.3228]) tensor([0.5624, 0.1103, 0.1858, 0.1416]) -Greedy action tensor([ 0.7843, -0.6420, 0.0799, -0.2875]) tensor([0.4815, 0.1156, 0.2380, 0.1649]) -Greedy action tensor([ 0.7438, -0.4380, 0.0985, -0.1131]) tensor([0.4433, 0.1360, 0.2325, 0.1882]) -Greedy action tensor([ 0.4607, -0.0581, -0.0109, -0.0016]) tensor([0.3510, 0.2089, 0.2190, 0.2211]) -Greedy action tensor([ 0.8899, -0.5437, -0.0073, -0.3720]) tensor([0.5183, 0.1236, 0.2113, 0.1467]) -Greedy action tensor([ 0.0769, -0.2022, -0.0300, -0.5968]) tensor([0.3160, 0.2390, 0.2839, 0.1611]) -Greedy action tensor([ 0.6968, -0.4284, -0.1678, -0.0310]) tensor([0.4487, 0.1456, 0.1890, 0.2167]) -Greedy action tensor([ 0.7707, -0.7404, -0.1180, -0.0972]) tensor([0.4874, 0.1076, 0.2004, 0.2046]) -Greedy action tensor([ 1.0545, -0.6532, -0.1187, -0.4211]) tensor([0.5816, 0.1054, 0.1799, 0.1330]) -Greedy action tensor([ 0.7782, -0.6970, -0.1107, -0.1043]) tensor([0.4870, 0.1114, 0.2002, 0.2015]) -Greedy action tensor([ 0.9979, -0.2872, 0.1933, -0.2989]) tensor([0.5007, 0.1385, 0.2239, 0.1369]) -Greedy action tensor([ 0.9258, -0.4753, -0.0529, -0.1781]) tensor([0.5118, 0.1261, 0.1924, 0.1697]) -Greedy action tensor([ 0.7992, -0.2055, -0.5343, -0.6489]) tensor([0.5363, 0.1964, 0.1413, 0.1260]) -Greedy action tensor([ 1.0014, -0.6268, -0.1642, -0.3494]) tensor([0.5659, 0.1111, 0.1764, 0.1466]) -Greedy action tensor([ 0.3467, 0.1501, 0.0242, -0.1162]) tensor([0.3149, 0.2587, 0.2281, 0.1982]) -Greedy action tensor([ 0.5552, 0.2406, -0.1278, 0.1000]) tensor([0.3485, 0.2544, 0.1760, 0.2211]) -Greedy action tensor([ 0.6426, 0.3142, -0.1406, 0.2367]) tensor([0.3517, 0.2532, 0.1607, 0.2344]) -Greedy action tensor([ 1.2811, -0.8049, 0.0070, -0.6620]) tensor([0.6464, 0.0803, 0.1808, 0.0926]) -Greedy action tensor([ 0.5058, 0.1092, -0.1085, 0.1096]) tensor([0.3464, 0.2330, 0.1874, 0.2331]) -Greedy action tensor([ 0.3572, -0.0282, 0.0671, 0.0941]) tensor([0.3128, 0.2127, 0.2340, 0.2404]) -Greedy action tensor([ 0.4262, -0.4563, -0.0616, -0.0998]) tensor([0.3819, 0.1580, 0.2345, 0.2257]) -Greedy action tensor([ 1.0084, -0.3930, -0.1227, -0.4635]) tensor([0.5560, 0.1369, 0.1794, 0.1276]) -Greedy action tensor([ 0.2814, -0.0765, -0.0729, -0.0433]) tensor([0.3201, 0.2238, 0.2246, 0.2314]) -Greedy action tensor([ 0.5952, -0.2390, -0.0622, -0.0361]) tensor([0.4025, 0.1748, 0.2086, 0.2141]) -Greedy action tensor([ 0.8744, -0.4883, -0.0261, -0.5492]) tensor([0.5254, 0.1345, 0.2135, 0.1266]) -Greedy action tensor([ 0.3243, -0.1649, -0.0295, -0.1453]) tensor([0.3401, 0.2085, 0.2388, 0.2126]) -Greedy action tensor([ 0.5314, -0.2005, -0.0813, -0.0511]) tensor([0.3874, 0.1863, 0.2099, 0.2164]) -Greedy action tensor([ 0.6071, -0.3097, -0.0716, -0.1951]) tensor([0.4246, 0.1697, 0.2154, 0.1904]) -Greedy action tensor([ 1.9788, -0.3797, -0.0809, 0.4318]) tensor([0.6969, 0.0659, 0.0888, 0.1484]) -Greedy action tensor([ 1.5541, -0.5137, 0.0785, 0.6944]) tensor([0.5623, 0.0711, 0.1286, 0.2380]) -Greedy action tensor([ 3.1495, -1.2706, -0.5840, 0.7429]) tensor([0.8881, 0.0107, 0.0212, 0.0800]) -Greedy action tensor([ 2.2462, -0.9150, 0.2085, 0.9188]) tensor([0.6955, 0.0295, 0.0906, 0.1844]) -Greedy action tensor([ 1.7243, 0.1185, -0.2899, 0.4917]) tensor([0.6151, 0.1235, 0.0821, 0.1793]) -Greedy action tensor([ 1.2075, 0.1186, -1.2900, 0.0374]) tensor([0.5783, 0.1946, 0.0476, 0.1795]) -Greedy action tensor([ 2.0526, -0.3988, -0.1129, 0.1639]) tensor([0.7396, 0.0637, 0.0848, 0.1119]) -Greedy action tensor([ 1.1212, -0.4970, -0.2481, 0.2681]) tensor([0.5323, 0.1055, 0.1354, 0.2268]) -Greedy action tensor([ 1.2400, -0.4795, -0.6286, 0.0886]) tensor([0.6062, 0.1086, 0.0936, 0.1917]) -Greedy action tensor([ 1.7078, 0.0780, -0.4194, 0.5118]) tensor([0.6182, 0.1212, 0.0737, 0.1869]) -Greedy action tensor([ 1.8684, -0.5179, -0.4666, 0.1890]) tensor([0.7271, 0.0669, 0.0704, 0.1356]) -Greedy action tensor([ 1.4229, 0.0044, -0.1738, 0.5428]) tensor([0.5378, 0.1302, 0.1089, 0.2231]) -Greedy action tensor([ 1.8998, -0.6830, -0.7145, 0.6450]) tensor([0.6974, 0.0527, 0.0511, 0.1988]) -Greedy action tensor([ 1.6256, -0.1745, -1.1746, 0.4278]) tensor([0.6545, 0.1082, 0.0398, 0.1976]) -Greedy action tensor([ 1.3239, -0.3064, -0.4169, 0.4147]) tensor([0.5637, 0.1104, 0.0989, 0.2271]) -Greedy action tensor([ 1.2357, -0.4598, -0.1353, 0.5145]) tensor([0.5199, 0.0954, 0.1320, 0.2528]) -Greedy action tensor([ 1.3678, -0.6166, -1.1540, 0.1832]) tensor([0.6563, 0.0902, 0.0527, 0.2007]) -Greedy action tensor([ 1.8619, -1.1833, -0.1070, 0.2953]) tensor([0.7164, 0.0341, 0.1000, 0.1495]) -Greedy action tensor([ 1.9604, -0.3048, -0.3340, 0.3699]) tensor([0.7100, 0.0737, 0.0716, 0.1447]) -Greedy action tensor([ 1.2759, -0.1119, -0.5236, 0.0682]) tensor([0.5835, 0.1456, 0.0965, 0.1744]) -Greedy action tensor([ 1.8686, -0.4528, -0.6309, 0.3224]) tensor([0.7177, 0.0704, 0.0589, 0.1529]) -Greedy action tensor([ 1.3405, -0.3140, -0.2931, 0.1143]) tensor([0.5953, 0.1138, 0.1162, 0.1747]) -Greedy action tensor([ 1.0402, -0.2311, -0.6771, 0.4143]) tensor([0.5013, 0.1406, 0.0900, 0.2681]) -Greedy action tensor([ 1.9364, 0.0047, -0.2356, 0.4379]) tensor([0.6746, 0.0978, 0.0769, 0.1508]) -Greedy action tensor([ 2.2866, -1.0520, -0.6078, 0.5236]) tensor([0.7922, 0.0281, 0.0438, 0.1359]) -Greedy action tensor([ 1.4340, -0.2503, -0.3325, 0.1566]) tensor([0.6115, 0.1135, 0.1045, 0.1705]) -Greedy action tensor([ 2.5755, -0.7403, -0.4621, 0.5343]) tensor([0.8236, 0.0299, 0.0395, 0.1070]) -Greedy action tensor([ 0.9548, -0.2361, -0.9864, 0.5009]) tensor([0.4802, 0.1459, 0.0689, 0.3050]) -Greedy action tensor([ 1.3893, -0.6220, -0.4129, 0.5944]) tensor([0.5713, 0.0764, 0.0942, 0.2580]) -Greedy action tensor([ 1.9857, -0.9220, -0.4002, 0.3458]) tensor([0.7459, 0.0407, 0.0686, 0.1447]) -Greedy action tensor([ 1.2069, -0.5255, -0.6223, 0.3725]) tensor([0.5645, 0.0998, 0.0906, 0.2451]) -Greedy action tensor([ 1.4772, -0.5144, -0.7147, 0.0391]) tensor([0.6732, 0.0919, 0.0752, 0.1598]) -Greedy action tensor([ 1.3528, -0.4929, -0.6102, 0.3607]) tensor([0.5991, 0.0946, 0.0841, 0.2221]) -Greedy action tensor([ 1.2960, -0.6238, -0.3198, 0.6750]) tensor([0.5311, 0.0779, 0.1056, 0.2854]) -Greedy action tensor([ 1.1397, -0.2238, -0.7785, 0.0721]) tensor([0.5726, 0.1465, 0.0841, 0.1969]) -Greedy action tensor([ 2.1047, -1.4933, -0.2070, 0.2990]) tensor([0.7747, 0.0212, 0.0768, 0.1273]) -Greedy action tensor([ 1.2493, -0.0928, -0.5124, 0.1145]) tensor([0.5699, 0.1489, 0.0979, 0.1832]) -Greedy action tensor([ 1.0271, 0.0227, -0.5810, 0.2322]) tensor([0.4955, 0.1815, 0.0992, 0.2238]) -Greedy action tensor([ 2.5263, -0.4455, 0.0330, 0.0800]) tensor([0.8194, 0.0420, 0.0677, 0.0710]) -Greedy action tensor([ 2.0220, -0.5584, -0.0578, 0.4665]) tensor([0.7083, 0.0537, 0.0885, 0.1495]) -Greedy action tensor([ 1.3482, -0.3847, -0.5754, 0.2623]) tensor([0.6022, 0.1065, 0.0880, 0.2033]) -Greedy action tensor([ 1.5138, -0.7844, -0.2835, 0.4134]) tensor([0.6254, 0.0628, 0.1037, 0.2081]) -Greedy action tensor([ 1.1133, -0.0906, -0.8562, 0.3870]) tensor([0.5200, 0.1560, 0.0725, 0.2515]) -Greedy action tensor([ 1.7110, -0.5680, -0.2024, 0.4934]) tensor([0.6469, 0.0662, 0.0955, 0.1914]) -Greedy action tensor([ 1.9811, -1.1421, -0.5618, 0.3374]) tensor([0.7599, 0.0335, 0.0598, 0.1469]) -Greedy action tensor([ 2.1172, -0.7635, -0.2210, 0.2951]) tensor([0.7609, 0.0427, 0.0734, 0.1230]) -Greedy action tensor([ 1.4177, -0.3080, -0.9538, 0.3985]) tensor([0.6126, 0.1091, 0.0572, 0.2211]) -Greedy action tensor([ 1.3503, -0.4895, -0.7590, 0.0576]) tensor([0.6432, 0.1022, 0.0780, 0.1766]) -Greedy action tensor([ 1.1287, -0.2934, -0.5530, 0.0414]) tensor([0.5668, 0.1367, 0.1055, 0.1911]) -Greedy action tensor([ 1.7949, -0.3535, -0.7846, 0.6943]) tensor([0.6557, 0.0765, 0.0497, 0.2181]) -Greedy action tensor([ 0.9431, -0.2800, -0.8937, 0.2980]) tensor([0.5055, 0.1488, 0.0805, 0.2652]) -Greedy action tensor([ 1.8872, -0.3897, -0.9751, 0.0181]) tensor([0.7610, 0.0781, 0.0435, 0.1174]) -Greedy action tensor([ 1.1521, -0.1572, -0.1428, 0.0414]) tensor([0.5338, 0.1441, 0.1462, 0.1758]) -Greedy action tensor([ 1.5166, -0.2367, -0.1139, -0.1699]) tensor([0.6434, 0.1114, 0.1260, 0.1191]) -Greedy action tensor([ 1.6264, -0.5959, 0.0466, 0.0319]) tensor([0.6590, 0.0714, 0.1358, 0.1338]) -Greedy action tensor([ 1.4565, -0.2708, -0.7520, 0.1659]) tensor([0.6399, 0.1138, 0.0703, 0.1760]) -Greedy action tensor([ 1.4101, -0.5771, -0.4589, 0.2694]) tensor([0.6208, 0.0851, 0.0958, 0.1984]) -Greedy action tensor([ 1.6567, -0.5386, -0.4078, 0.3836]) tensor([0.6587, 0.0733, 0.0836, 0.1844]) -Greedy action tensor([ 1.3453, 0.0376, -0.5534, 0.4685]) tensor([0.5446, 0.1473, 0.0816, 0.2266]) -Greedy action tensor([ 1.4959, -0.5230, -0.1146, 0.3682]) tensor([0.6037, 0.0802, 0.1206, 0.1955]) -Greedy action tensor([ 1.1587, -0.4708, -0.8864, 0.0073]) tensor([0.6092, 0.1194, 0.0788, 0.1926]) -Greedy action tensor([ 1.6949, -0.7185, -0.2293, 0.1836]) tensor([0.6868, 0.0615, 0.1003, 0.1515]) -Greedy action tensor([ 1.4917, -0.1250, -0.4257, -0.1629]) tensor([0.6507, 0.1292, 0.0956, 0.1244]) -Greedy action tensor([ 1.3148, -0.5640, -0.6030, 0.1579]) tensor([0.6195, 0.0946, 0.0910, 0.1948]) -Greedy action tensor([ 1.4261, -0.3089, -0.7406, 0.2706]) tensor([0.6227, 0.1098, 0.0713, 0.1961]) -Greedy action tensor([ 1.6676, -0.8064, -0.5332, 0.3676]) tensor([0.6814, 0.0574, 0.0754, 0.1857]) -Greedy action tensor([ 1.2852, -0.5363, -0.2533, 0.2577]) tensor([0.5766, 0.0933, 0.1238, 0.2064]) -Greedy action tensor([ 1.4419, -0.8721, -0.0605, 0.1998]) tensor([0.6210, 0.0614, 0.1382, 0.1793]) -Greedy action tensor([ 1.0770, -0.1916, -0.4978, 0.1780]) tensor([0.5277, 0.1484, 0.1092, 0.2147]) -Greedy action tensor([ 2.1091, -1.1843, -0.2617, 0.5725]) tensor([0.7431, 0.0276, 0.0694, 0.1599]) -Greedy action tensor([ 1.1224, 0.2761, -0.7188, 0.2330]) tensor([0.5004, 0.2146, 0.0794, 0.2056]) -Greedy action tensor([ 1.9517, -0.0961, -0.0658, -0.0813]) tensor([0.7179, 0.0926, 0.0955, 0.0940]) -Greedy action tensor([ 1.2329, -0.5074, -0.5761, 0.3609]) tensor([0.5690, 0.0998, 0.0932, 0.2379]) -Greedy action tensor([ 1.1322, -0.4872, -0.5637, -0.0911]) tensor([0.5968, 0.1182, 0.1095, 0.1756]) -Greedy action tensor([ 1.3706, -0.1300, -0.4841, 0.2052]) tensor([0.5913, 0.1318, 0.0925, 0.1843]) -Greedy action tensor([ 1.9701, -0.2137, -0.8077, 0.4378]) tensor([0.7190, 0.0810, 0.0447, 0.1553]) -Greedy action tensor([ 1.5741, 0.1686, -0.3799, 0.5438]) tensor([0.5734, 0.1406, 0.0813, 0.2047]) -Greedy action tensor([ 1.6059, -0.5933, -0.6851, 0.5890]) tensor([0.6354, 0.0705, 0.0643, 0.2298]) -Greedy action tensor([ 1.3407, -0.5359, -0.5697, 0.3303]) tensor([0.6005, 0.0919, 0.0889, 0.2186]) -Greedy action tensor([ 1.3135, -0.3051, -0.8518, 0.4157]) tensor([0.5813, 0.1152, 0.0667, 0.2368]) -Greedy action tensor([ 1.6780, -0.1867, -0.5217, 0.1222]) tensor([0.6771, 0.1049, 0.0751, 0.1429]) -Greedy action tensor([-1.9267, -0.4216, 0.6604, -0.1711]) tensor([0.0407, 0.1832, 0.5407, 0.2354]) -Greedy action tensor([-1.9233, -0.4180, 0.6534, -0.1658]) tensor([0.0409, 0.1842, 0.5378, 0.2371]) -Greedy action tensor([-1.8661, -0.4261, 0.6251, -0.1495]) tensor([0.0437, 0.1846, 0.5282, 0.2434]) -Greedy action tensor([-1.6130, -0.4604, 0.5147, 0.0079]) tensor([0.0568, 0.1797, 0.4765, 0.2871]) -Greedy action tensor([-1.8994, -0.4518, 0.6503, -0.1555]) tensor([0.0421, 0.1789, 0.5385, 0.2406]) -Greedy action tensor([-1.1073, -0.2223, 0.5401, 0.3261]) tensor([0.0781, 0.1892, 0.4054, 0.3273]) -Greedy action tensor([-1.8251, -0.4612, 0.6094, -0.1259]) tensor([0.0459, 0.1795, 0.5236, 0.2510]) -Greedy action tensor([-1.9163, -0.4460, 0.6545, -0.1659]) tensor([0.0413, 0.1799, 0.5407, 0.2381]) -Greedy action tensor([-1.8796, -0.4847, 0.6296, -0.1478]) tensor([0.0435, 0.1756, 0.5350, 0.2459]) -Greedy action tensor([-1.8931, -0.3914, 0.6262, -0.1529]) tensor([0.0424, 0.1902, 0.5261, 0.2414]) -Greedy action tensor([-1.7743, -0.6128, 1.0730, 0.1842]) tensor([0.0351, 0.1120, 0.6044, 0.2485]) -Greedy action tensor([-1.9161, -0.4118, 0.6497, -0.1632]) tensor([0.0412, 0.1854, 0.5358, 0.2377]) -Greedy action tensor([-1.3706, -0.2698, 0.8993, 0.8268]) tensor([0.0441, 0.1325, 0.4266, 0.3968]) -Greedy action tensor([-1.9276, -0.3992, 0.6494, -0.1688]) tensor([0.0407, 0.1876, 0.5354, 0.2362]) -Greedy action tensor([-1.3837, -0.5745, 0.3857, 0.0573]) tensor([0.0750, 0.1684, 0.4399, 0.3168]) -Greedy action tensor([-1.9099, -0.4368, 0.6542, -0.1616]) tensor([0.0415, 0.1811, 0.5390, 0.2384]) -Greedy action tensor([-1.8937, -0.4548, 0.6473, -0.1537]) tensor([0.0424, 0.1786, 0.5377, 0.2414]) -Greedy action tensor([-1.7278, -0.3646, 0.5571, -0.0531]) tensor([0.0498, 0.1948, 0.4895, 0.2659]) -Greedy action tensor([-1.8484, -0.3144, 0.6002, -0.1447]) tensor([0.0440, 0.2042, 0.5097, 0.2420]) -Greedy action tensor([-1.8447, -0.3786, 0.6449, -0.1341]) tensor([0.0436, 0.1890, 0.5260, 0.2414]) -Greedy action tensor([-1.3348, 0.7459, 0.5011, -0.5417]) tensor([0.0572, 0.4580, 0.3585, 0.1264]) -Greedy action tensor([-1.7122, -0.4542, 0.5336, -0.0532]) tensor([0.0520, 0.1831, 0.4916, 0.2734]) -Greedy action tensor([-1.9204, -0.3772, 0.6493, -0.1542]) tensor([0.0407, 0.1903, 0.5312, 0.2378]) -Greedy action tensor([-1.9344, -0.4227, 0.6543, -0.1757]) tensor([0.0406, 0.1839, 0.5400, 0.2355]) -Greedy action tensor([-1.7548, 0.2613, 0.4776, -0.0859]) tensor([0.0432, 0.3245, 0.4029, 0.2293]) -Greedy action tensor([-1.8607, -0.3723, 0.6253, -0.1254]) tensor([0.0433, 0.1917, 0.5198, 0.2453]) -Greedy action tensor([-1.4726, -0.5485, 0.4706, -0.0803]) tensor([0.0689, 0.1735, 0.4806, 0.2770]) -Greedy action tensor([-1.8372, -0.1875, 0.5839, -0.1174]) tensor([0.0434, 0.2259, 0.4885, 0.2423]) -Greedy action tensor([-1.9475, -0.4507, 0.6679, -0.1822]) tensor([0.0400, 0.1788, 0.5473, 0.2339]) -Greedy action tensor([-1.8695, -0.5153, 0.1211, -0.4704]) tensor([0.0616, 0.2385, 0.4506, 0.2494]) -Greedy action tensor([-1.8835, -0.4306, 0.6360, -0.1509]) tensor([0.0428, 0.1831, 0.5319, 0.2422]) -Greedy action tensor([-1.8188, -0.2943, 0.5898, -0.0911]) tensor([0.0448, 0.2056, 0.4977, 0.2519]) -Greedy action tensor([-1.8704, -0.4251, 0.6266, -0.1382]) tensor([0.0434, 0.1841, 0.5271, 0.2453]) -Greedy action tensor([-1.9455, -0.4507, 0.6680, -0.1812]) tensor([0.0401, 0.1788, 0.5471, 0.2340]) -Greedy action tensor([-1.8789, -0.1476, 0.5903, -0.1561]) tensor([0.0416, 0.2347, 0.4910, 0.2327]) -Greedy action tensor([-1.8353, -0.4539, 0.6165, -0.1252]) tensor([0.0452, 0.1799, 0.5249, 0.2500]) -Greedy action tensor([-1.8819, -0.4434, 0.6369, -0.1487]) tensor([0.0429, 0.1810, 0.5331, 0.2430]) -Greedy action tensor([0.0447, 1.2320, 0.0360, 0.4670]) tensor([0.1472, 0.4824, 0.1459, 0.2245]) -Greedy action tensor([-1.8770, -0.4216, 0.6310, -0.1508]) tensor([0.0431, 0.1849, 0.5297, 0.2424]) -Greedy action tensor([-1.8484, -0.4200, 0.6146, -0.1474]) tensor([0.0447, 0.1863, 0.5243, 0.2447]) -Greedy action tensor([-1.9153, -0.4083, 0.6472, -0.1552]) tensor([0.0412, 0.1858, 0.5338, 0.2393]) -Greedy action tensor([-1.9174, -0.4266, 0.6465, -0.1695]) tensor([0.0414, 0.1837, 0.5373, 0.2376]) -Greedy action tensor([-1.8813, -0.1179, 0.5897, -0.1596]) tensor([0.0412, 0.2404, 0.4878, 0.2306]) -Greedy action tensor([-1.8422, -0.3807, 0.6260, -0.1309]) tensor([0.0442, 0.1904, 0.5210, 0.2444]) -Greedy action tensor([-1.8399, -0.4728, 0.6414, -0.1326]) tensor([0.0447, 0.1752, 0.5339, 0.2462]) -Greedy action tensor([-0.4016, -0.0103, 0.0310, 0.3331]) tensor([0.1638, 0.2422, 0.2525, 0.3415]) -Greedy action tensor([-1.8284, -0.4769, 0.8166, 0.1702]) tensor([0.0380, 0.1467, 0.5350, 0.2803]) -Greedy action tensor([-1.2892, -0.6155, 0.3110, 0.2260]) tensor([0.0802, 0.1573, 0.3974, 0.3650]) -Greedy action tensor([-1.7924, -0.5138, -0.7003, -0.8198]) tensor([0.0979, 0.3515, 0.2917, 0.2589]) -Greedy action tensor([-1.8904, -0.4245, 0.6435, -0.1517]) tensor([0.0423, 0.1834, 0.5334, 0.2409]) -Greedy action tensor([-1.4629, -0.3461, 0.4304, -0.0612]) tensor([0.0678, 0.2070, 0.4500, 0.2752]) -Greedy action tensor([-0.2537, 1.1214, 0.0063, 0.4062]) tensor([0.1221, 0.4831, 0.1584, 0.2363]) -Greedy action tensor([-1.5676, -0.5327, 0.4759, -0.0168]) tensor([0.0615, 0.1733, 0.4750, 0.2902]) -Greedy action tensor([-1.8816, -0.3308, 0.6229, -0.1483]) tensor([0.0424, 0.1997, 0.5183, 0.2397]) -Greedy action tensor([-1.8096, -0.1605, 0.5479, -0.1208]) tensor([0.0451, 0.2346, 0.4763, 0.2441]) -Greedy action tensor([-1.8085, -0.4895, 0.6005, -0.1371]) tensor([0.0472, 0.1766, 0.5251, 0.2511]) -Greedy action tensor([-1.8861, -0.2851, 0.6112, -0.1383]) tensor([0.0419, 0.2079, 0.5094, 0.2408]) -Greedy action tensor([-1.9251, -0.4268, 0.6564, -0.1697]) tensor([0.0409, 0.1828, 0.5400, 0.2364]) -Greedy action tensor([-1.8679, -0.4631, 0.6329, -0.1496]) tensor([0.0438, 0.1784, 0.5338, 0.2441]) -Greedy action tensor([-1.9219, -0.4351, 0.6572, -0.1674]) tensor([0.0410, 0.1813, 0.5406, 0.2370]) -Greedy action tensor([-0.6298, 0.9620, 0.0248, 0.3820]) tensor([0.0945, 0.4640, 0.1818, 0.2598]) -Greedy action tensor([-1.7595, -0.2938, 0.5637, -0.1164]) tensor([0.0483, 0.2091, 0.4929, 0.2497]) -Greedy action tensor([-1.9205, -0.4246, 0.6568, -0.1678]) tensor([0.0410, 0.1830, 0.5395, 0.2365]) -Greedy action tensor([-1.7154, -0.5206, 0.5517, -0.0533]) tensor([0.0520, 0.1718, 0.5020, 0.2741]) -Greedy action tensor([-1.8371, -0.3531, 0.6306, -0.1274]) tensor([0.0440, 0.1940, 0.5188, 0.2431]) -Greedy action tensor([-1.6779, 0.0674, 0.4695, -0.1160]) tensor([0.0499, 0.2856, 0.4269, 0.2377]) -Greedy action tensor([-1.9185, -0.4208, 0.6554, -0.1647]) tensor([0.0410, 0.1835, 0.5384, 0.2371]) -Greedy action tensor([-1.8488, -0.4199, 0.6161, -0.1380]) tensor([0.0445, 0.1858, 0.5235, 0.2463]) -Greedy action tensor([-0.4272, -0.4682, 0.5369, 0.7200]) tensor([0.1293, 0.1241, 0.3392, 0.4073]) -Greedy action tensor([-1.7382, -0.4214, 0.5684, -0.0632]) tensor([0.0497, 0.1856, 0.4993, 0.2655]) -Greedy action tensor([-0.8910, 0.8624, 0.0939, 0.3580]) tensor([0.0773, 0.4463, 0.2069, 0.2695]) -Greedy action tensor([-1.8870, -0.3153, 0.6182, -0.1358]) tensor([0.0420, 0.2021, 0.5141, 0.2419]) -Greedy action tensor([-0.2940, 0.3883, 0.0975, 0.1658]) tensor([0.1655, 0.3275, 0.2449, 0.2621]) -Greedy action tensor([-1.8942, -0.3409, 0.6327, -0.1505]) tensor([0.0417, 0.1973, 0.5223, 0.2387]) -Greedy action tensor([-1.9224, -0.4285, 0.6568, -0.1678]) tensor([0.0409, 0.1824, 0.5399, 0.2367]) -Greedy action tensor([-1.8233, -0.3379, 0.5925, -0.1304]) tensor([0.0454, 0.2003, 0.5079, 0.2465]) -Greedy action tensor([-1.9288, -0.4448, 0.6607, -0.1728]) tensor([0.0408, 0.1799, 0.5433, 0.2361]) -Greedy action tensor([-1.4670, -0.5440, 0.8155, 0.5384]) tensor([0.0482, 0.1213, 0.4724, 0.3581]) -Greedy action tensor([-1.9050, -0.4487, 0.6507, -0.1634]) tensor([0.0419, 0.1797, 0.5394, 0.2390]) -Greedy action tensor([-1.9198, -0.4721, 0.7242, -0.1426]) tensor([0.0396, 0.1686, 0.5575, 0.2343]) -Greedy action tensor([-1.8615, -0.3258, 0.6217, -0.1338]) tensor([0.0430, 0.1997, 0.5152, 0.2420]) -Greedy action tensor([ 0.9959, -0.6477, 0.1083, -0.5677]) tensor([0.5512, 0.1065, 0.2269, 0.1154]) -Greedy action tensor([ 0.4821, -0.2004, 0.0062, -0.2073]) tensor([0.3804, 0.1923, 0.2364, 0.1909]) -Greedy action tensor([ 0.5904, -0.2350, -0.0778, -0.1108]) tensor([0.4087, 0.1790, 0.2095, 0.2027]) -Greedy action tensor([ 0.8150, -0.6901, 0.0677, -0.3047]) tensor([0.4946, 0.1098, 0.2342, 0.1614]) -Greedy action tensor([ 0.4902, -0.0704, 0.0825, -0.3034]) tensor([0.3720, 0.2124, 0.2474, 0.1682]) -Greedy action tensor([ 0.7114, -0.1661, -0.0256, -0.0955]) tensor([0.4272, 0.1777, 0.2044, 0.1906]) -Greedy action tensor([ 0.9945, -0.7694, 0.0932, -0.4493]) tensor([0.5514, 0.0945, 0.2239, 0.1302]) -Greedy action tensor([ 0.8062, -0.1518, -0.0291, -0.0703]) tensor([0.4477, 0.1718, 0.1942, 0.1863]) -Greedy action tensor([ 1.0297, -0.6380, -0.0300, -0.5955]) tensor([0.5773, 0.1089, 0.2001, 0.1137]) -Greedy action tensor([ 0.7289, -0.4422, -0.0968, -0.0593]) tensor([0.4540, 0.1408, 0.1988, 0.2064]) -Greedy action tensor([ 0.9104, -0.5246, 0.0752, -0.4791]) tensor([0.5205, 0.1240, 0.2258, 0.1297]) -Greedy action tensor([ 0.9228, -0.8097, 0.0497, -0.5275]) tensor([0.5468, 0.0967, 0.2283, 0.1282]) -Greedy action tensor([ 0.9052, -0.6920, 0.1491, -0.2522]) tensor([0.5035, 0.1019, 0.2364, 0.1582]) -Greedy action tensor([ 1.1681, -0.7793, -0.1318, -0.7000]) tensor([0.6371, 0.0909, 0.1736, 0.0984]) -Greedy action tensor([ 0.6163, -0.0301, -0.0912, -0.0233]) tensor([0.3931, 0.2059, 0.1937, 0.2073]) -Greedy action tensor([ 1.0296, -0.6998, 0.1201, -0.3798]) tensor([0.5481, 0.0972, 0.2207, 0.1339]) -Greedy action tensor([ 0.7886, -0.1285, -0.1455, -0.2173]) tensor([0.4633, 0.1852, 0.1821, 0.1694]) -Greedy action tensor([ 0.9315, -0.4451, -0.0676, -0.3619]) tensor([0.5277, 0.1332, 0.1943, 0.1448]) -Greedy action tensor([ 1.1382, -0.5547, -0.0479, -0.3815]) tensor([0.5854, 0.1077, 0.1788, 0.1281]) -Greedy action tensor([ 0.6810, -0.1762, 0.0197, -0.0737]) tensor([0.4148, 0.1760, 0.2141, 0.1950]) -Greedy action tensor([ 0.6017, -0.4195, -0.2657, -0.2039]) tensor([0.4490, 0.1617, 0.1886, 0.2006]) -Greedy action tensor([ 0.8960, -0.6841, -0.0341, -0.3620]) tensor([0.5306, 0.1093, 0.2093, 0.1508]) -Greedy action tensor([ 1.0793, -0.6193, -0.0766, -0.5904]) tensor([0.5931, 0.1085, 0.1867, 0.1117]) -Greedy action tensor([ 0.7864, -0.4425, -0.1872, -0.2807]) tensor([0.4964, 0.1453, 0.1875, 0.1708]) -Greedy action tensor([ 0.5084, -0.1870, 0.0529, -0.1301]) tensor([0.3758, 0.1875, 0.2383, 0.1985]) -Greedy action tensor([ 1.0831, -0.6302, -0.0104, -0.5937]) tensor([0.5875, 0.1059, 0.1968, 0.1098]) -Greedy action tensor([ 1.1398, -0.7495, -0.0375, -0.6248]) tensor([0.6133, 0.0927, 0.1890, 0.1050]) -Greedy action tensor([ 0.8021, -0.3575, -0.1693, -0.1747]) tensor([0.4834, 0.1516, 0.1830, 0.1820]) -Greedy action tensor([ 1.0807, -0.4348, 0.1027, -0.3778]) tensor([0.5469, 0.1202, 0.2057, 0.1272]) -Greedy action tensor([ 0.5454, -0.0493, -0.0473, 0.0100]) tensor([0.3717, 0.2051, 0.2055, 0.2176]) -Greedy action tensor([ 0.9628, -0.4816, 0.0737, -0.2709]) tensor([0.5160, 0.1217, 0.2121, 0.1503]) -Greedy action tensor([ 0.8243, -0.4300, 0.0430, -0.3966]) tensor([0.4907, 0.1400, 0.2246, 0.1447]) -Greedy action tensor([ 1.0119, -0.5538, 0.0054, -0.3536]) tensor([0.5465, 0.1142, 0.1998, 0.1395]) -Greedy action tensor([ 0.7055, -0.4642, -0.0071, -0.0776]) tensor([0.4429, 0.1375, 0.2172, 0.2024]) -Greedy action tensor([ 0.4043, -0.1503, -0.1314, -0.0357]) tensor([0.3567, 0.2049, 0.2088, 0.2297]) -Greedy action tensor([ 0.4263, -0.1578, 0.0557, -0.0322]) tensor([0.3472, 0.1936, 0.2397, 0.2195]) -Greedy action tensor([ 0.8690, -0.8473, 0.0616, -0.4978]) tensor([0.5317, 0.0956, 0.2372, 0.1356]) -Greedy action tensor([ 0.2742, -0.0075, -0.0922, -0.2301]) tensor([0.3277, 0.2473, 0.2272, 0.1979]) -Greedy action tensor([ 0.7109, -0.3242, -0.0270, -0.5227]) tensor([0.4707, 0.1672, 0.2250, 0.1371]) -Greedy action tensor([ 0.5665, 0.2251, -0.1812, 0.0688]) tensor([0.3581, 0.2546, 0.1696, 0.2177]) -Greedy action tensor([ 1.1764, -0.6673, -0.0973, -0.5763]) tensor([0.6206, 0.0982, 0.1737, 0.1076]) -Greedy action tensor([ 0.9180, -0.7419, 0.0122, -0.3396]) tensor([0.5323, 0.1012, 0.2152, 0.1513]) -Greedy action tensor([ 0.5526, -0.0345, -0.0980, -0.0149]) tensor([0.3781, 0.2102, 0.1973, 0.2144]) -Greedy action tensor([ 0.7073, -0.5801, -0.0062, -0.4510]) tensor([0.4808, 0.1327, 0.2355, 0.1510]) -Greedy action tensor([ 0.6260, -0.4098, 0.0565, -0.1803]) tensor([0.4224, 0.1499, 0.2390, 0.1886]) -Greedy action tensor([ 0.6367, -0.2796, 0.0721, -0.1694]) tensor([0.4140, 0.1656, 0.2354, 0.1849]) -Greedy action tensor([ 1.1143, -0.4977, 0.1226, -0.4261]) tensor([0.5603, 0.1118, 0.2078, 0.1201]) -Greedy action tensor([ 0.7409, -0.5238, 0.0602, -0.1389]) tensor([0.4538, 0.1281, 0.2298, 0.1883]) -Greedy action tensor([ 0.4225, 0.0870, -0.2899, -0.1231]) tensor([0.3591, 0.2567, 0.1761, 0.2081]) -Greedy action tensor([ 0.8416, -0.4981, -0.1934, -0.5288]) tensor([0.5344, 0.1400, 0.1898, 0.1357]) -Greedy action tensor([ 0.8528, -0.5929, -0.0409, -0.5596]) tensor([0.5296, 0.1248, 0.2167, 0.1290]) -Greedy action tensor([ 0.9080, -0.6723, 0.2122, -0.1868]) tensor([0.4904, 0.1010, 0.2445, 0.1641]) -Greedy action tensor([ 0.7479, -0.5690, 0.0909, -0.2895]) tensor([0.4671, 0.1252, 0.2422, 0.1655]) -Greedy action tensor([ 0.7778, -0.5377, 0.0829, -0.2783]) tensor([0.4727, 0.1269, 0.2360, 0.1644]) -Greedy action tensor([ 0.8631, -0.4907, -0.0819, -0.2913]) tensor([0.5096, 0.1316, 0.1981, 0.1607]) -Greedy action tensor([ 1.3160, -0.6239, 0.0183, -0.9308]) tensor([0.6568, 0.0944, 0.1794, 0.0694]) -Greedy action tensor([ 0.9646, -0.7090, 0.0290, -0.5540]) tensor([0.5559, 0.1043, 0.2181, 0.1217]) -Greedy action tensor([ 0.9169, -0.5339, 0.1137, -0.5120]) tensor([0.5203, 0.1220, 0.2331, 0.1247]) -Greedy action tensor([ 0.8653, -0.3349, 0.1960, -0.4176]) tensor([0.4784, 0.1441, 0.2450, 0.1326]) -Greedy action tensor([ 0.7917, -0.4831, -0.0360, -0.3085]) tensor([0.4880, 0.1364, 0.2133, 0.1624]) -Greedy action tensor([ 0.7793, -0.5278, 0.0126, -0.2145]) tensor([0.4750, 0.1285, 0.2206, 0.1758]) -Greedy action tensor([ 1.1361, -0.9173, 0.0192, -0.5949]) tensor([0.6125, 0.0786, 0.2005, 0.1085]) -Greedy action tensor([ 0.6549, -0.3263, 0.0456, -0.1096]) tensor([0.4194, 0.1572, 0.2281, 0.1953]) -Greedy action tensor([ 7.3216e-01, -4.8208e-01, -7.1684e-04, -1.5180e-01]) tensor([0.4565, 0.1355, 0.2194, 0.1886]) -Greedy action tensor([ 1.0515, -0.5881, -0.0795, -0.3153]) tensor([0.5644, 0.1095, 0.1822, 0.1439]) -Greedy action tensor([ 1.1070, -0.5288, -0.0101, -0.2098]) tensor([0.5587, 0.1088, 0.1828, 0.1497]) -Greedy action tensor([ 0.4525, -0.0436, -0.1370, -0.0669]) tensor([0.3625, 0.2207, 0.2011, 0.2157]) -Greedy action tensor([ 0.9826, -0.8448, -0.0370, -0.4682]) tensor([0.5695, 0.0916, 0.2054, 0.1335]) -Greedy action tensor([ 0.8667, -0.6803, -0.1883, -0.3323]) tensor([0.5369, 0.1143, 0.1869, 0.1619]) -Greedy action tensor([ 0.8953, -0.5920, 0.0439, -0.3801]) tensor([0.5176, 0.1170, 0.2209, 0.1446]) -Greedy action tensor([ 1.0808, -0.2431, -0.0453, -0.2810]) tensor([0.5415, 0.1441, 0.1756, 0.1387]) -Greedy action tensor([ 0.8505, -0.8950, 0.0723, -0.3928]) tensor([0.5202, 0.0908, 0.2389, 0.1500]) -Greedy action tensor([ 0.9434, -0.4444, -0.0229, -0.4817]) tensor([0.5346, 0.1334, 0.2034, 0.1286]) -Greedy action tensor([ 1.0023, -0.6595, -0.1228, -0.2996]) tensor([0.5598, 0.1062, 0.1817, 0.1523]) -Greedy action tensor([ 0.6424, -0.4915, -0.1044, -0.3153]) tensor([0.4588, 0.1476, 0.2174, 0.1761]) -Greedy action tensor([ 0.9028, -0.3285, -0.1152, -0.6450]) tensor([0.5359, 0.1565, 0.1936, 0.1140]) -Greedy action tensor([ 0.4885, 0.1052, -0.0801, 0.0309]) tensor([0.3471, 0.2366, 0.1966, 0.2197]) -Greedy action tensor([ 1.4779, -0.9469, 0.0073, -0.6458]) tensor([0.6955, 0.0615, 0.1598, 0.0832]) -Greedy action tensor([ 1.1382, -0.5739, 0.0012, -0.7942]) tensor([0.6075, 0.1096, 0.1949, 0.0880]) -Greedy action tensor([ 0.7057, -0.0702, -0.1281, 0.0696]) tensor([0.4125, 0.1899, 0.1792, 0.2184]) -Greedy action tensor([ 1.0627, -0.8453, 0.1133, -0.6818]) tensor([0.5848, 0.0868, 0.2263, 0.1022]) -Greedy action tensor([ 0.8894, -0.7525, 0.0364, -0.1661]) tensor([0.5082, 0.0984, 0.2166, 0.1769]) -Greedy action tensor([-0.8341, -1.8532, -0.0256, -0.8704]) tensor([0.2188, 0.0790, 0.4912, 0.2110]) -Greedy action tensor([-0.2406, -0.6702, 0.4803, -0.5595]) tensor([0.2255, 0.1468, 0.4637, 0.1639]) -Greedy action tensor([-0.2729, -0.4068, -0.6540, -0.2229]) tensor([0.2771, 0.2424, 0.1893, 0.2913]) -Greedy action tensor([ 0.2679, -0.8655, 0.1336, -0.5605]) tensor([0.3798, 0.1223, 0.3321, 0.1659]) -Greedy action tensor([-0.6328, -0.1095, -0.9252, -0.6011]) tensor([0.2239, 0.3779, 0.1671, 0.2311]) -Greedy action tensor([ 1.0143, -0.3819, -0.2024, 0.6863]) tensor([0.4417, 0.1093, 0.1308, 0.3182]) -Greedy action tensor([-0.8236, -0.9474, 0.3510, -1.2012]) tensor([0.1722, 0.1522, 0.5575, 0.1181]) -Greedy action tensor([-0.8573, -0.2617, -1.0595, -0.1573]) tensor([0.1772, 0.3214, 0.1447, 0.3567]) -Greedy action tensor([0.4601, 0.4127, 0.4834, 0.0408]) tensor([0.2751, 0.2624, 0.2816, 0.1809]) -Greedy action tensor([-0.6995, -0.6866, 1.1137, -0.6862]) tensor([0.1092, 0.1106, 0.6695, 0.1107]) -Greedy action tensor([ 0.0662, -0.2423, 0.2472, -0.8973]) tensor([0.3017, 0.2216, 0.3616, 0.1151]) -Greedy action tensor([-1.0447, -0.8126, 0.5374, -0.6109]) tensor([0.1153, 0.1455, 0.5612, 0.1780]) -Greedy action tensor([ 0.0029, -0.6702, 0.0494, -0.1694]) tensor([0.2942, 0.1501, 0.3082, 0.2476]) -Greedy action tensor([-0.1263, -0.7579, 0.7156, -1.0392]) tensor([0.2351, 0.1250, 0.5456, 0.0944]) -Greedy action tensor([-0.0698, -0.1350, -0.2897, -0.9944]) tensor([0.3189, 0.2987, 0.2559, 0.1265]) -Greedy action tensor([-0.5730, 0.7566, -0.6360, -0.2613]) tensor([0.1412, 0.5335, 0.1325, 0.1928]) -Greedy action tensor([ 0.0830, -0.1427, -0.6842, 0.0797]) tensor([0.3069, 0.2448, 0.1425, 0.3058]) -Greedy action tensor([-1.1002, -0.8414, -0.7167, 0.1790]) tensor([0.1359, 0.1761, 0.1995, 0.4885]) -Greedy action tensor([-0.8252, -0.0252, 0.0845, -0.5947]) tensor([0.1435, 0.3194, 0.3564, 0.1807]) -Greedy action tensor([-0.3122, -0.6338, 0.1296, -0.2007]) tensor([0.2274, 0.1648, 0.3536, 0.2542]) -Greedy action tensor([ 0.1315, 0.3993, -0.3039, -0.2580]) tensor([0.2754, 0.3599, 0.1782, 0.1865]) -Greedy action tensor([-0.6960, -0.0504, -1.3879, -0.3911]) tensor([0.2099, 0.4003, 0.1051, 0.2847]) -Greedy action tensor([ 0.0080, 0.1351, 0.0092, -0.0854]) tensor([0.2471, 0.2806, 0.2473, 0.2250]) -Greedy action tensor([ 1.2727, -0.6603, 0.1970, -0.5538]) tensor([0.6073, 0.0879, 0.2071, 0.0978]) -Greedy action tensor([ 0.8965, -1.0605, -0.0702, -0.1866]) tensor([0.5376, 0.0760, 0.2045, 0.1820]) -Greedy action tensor([ 0.2220, 0.4272, 0.3252, -0.7211]) tensor([0.2684, 0.3295, 0.2976, 0.1045]) -Greedy action tensor([ 0.5889, -0.7950, 0.3478, -0.4760]) tensor([0.4200, 0.1052, 0.3300, 0.1448]) -Greedy action tensor([-0.7792, -0.6303, -0.4891, -0.7839]) tensor([0.2226, 0.2583, 0.2975, 0.2216]) -Greedy action tensor([ 0.1237, -1.3673, 0.1909, 0.4362]) tensor([0.2731, 0.0615, 0.2921, 0.3733]) -Greedy action tensor([ 0.2575, -0.3094, -0.9735, -0.1852]) tensor([0.3997, 0.2268, 0.1167, 0.2568]) -Greedy action tensor([-0.1560, -1.3124, 0.3356, 0.4172]) tensor([0.2117, 0.0666, 0.3461, 0.3755]) -Greedy action tensor([-0.1253, -0.3511, 1.0630, -0.9988]) tensor([0.1819, 0.1452, 0.5970, 0.0759]) -Greedy action tensor([ 0.5203, -1.5216, -0.4575, -0.3460]) tensor([0.5191, 0.0674, 0.1953, 0.2183]) -Greedy action tensor([-0.4682, -0.3430, -1.4090, -0.2323]) tensor([0.2639, 0.2991, 0.1030, 0.3341]) -Greedy action tensor([-1.1256, -0.2107, 0.3174, -0.2656]) tensor([0.0991, 0.2474, 0.4194, 0.2341]) -Greedy action tensor([ 0.7760, -1.2387, -0.3817, -0.1563]) tensor([0.5431, 0.0724, 0.1707, 0.2138]) -Greedy action tensor([-0.5198, 0.4074, -0.0626, -1.1665]) tensor([0.1776, 0.4489, 0.2805, 0.0930]) -Greedy action tensor([ 1.4057, 0.2220, 0.3364, -0.5545]) tensor([0.5586, 0.1710, 0.1917, 0.0787]) -Greedy action tensor([ 0.9691, -0.5265, 1.3664, -0.6801]) tensor([0.3443, 0.0772, 0.5123, 0.0662]) -Greedy action tensor([ 0.2241, 0.3412, 0.9884, -0.3626]) tensor([0.2071, 0.2329, 0.4448, 0.1152]) -Greedy action tensor([-0.6807, -0.1856, 0.6630, -1.1342]) tensor([0.1407, 0.2308, 0.5392, 0.0894]) -Greedy action tensor([ 0.5150, -1.0909, -0.0471, 0.1540]) tensor([0.4052, 0.0813, 0.2310, 0.2824]) -Greedy action tensor([-0.0114, -1.2825, 0.0749, -0.1236]) tensor([0.3063, 0.0859, 0.3339, 0.2738]) -Greedy action tensor([-0.3456, -0.0810, 0.6043, -0.9822]) tensor([0.1846, 0.2405, 0.4773, 0.0977]) -Greedy action tensor([ 0.2019, -0.6491, -0.8169, 0.1791]) tensor([0.3616, 0.1544, 0.1306, 0.3534]) -Greedy action tensor([-0.1997, 0.2658, 0.6720, -0.6985]) tensor([0.1789, 0.2849, 0.4277, 0.1086]) -Greedy action tensor([-0.4184, -1.8142, -0.5011, -0.3037]) tensor([0.3040, 0.0753, 0.2798, 0.3409]) -Greedy action tensor([-1.5841, -0.9458, -1.1108, 0.6464]) tensor([0.0724, 0.1372, 0.1163, 0.6741]) -Greedy action tensor([-0.2699, -1.9064, 0.3645, -0.3530]) tensor([0.2500, 0.0487, 0.4714, 0.2300]) -Greedy action tensor([ 1.1650, -0.6496, 0.0866, 0.9299]) tensor([0.4360, 0.0710, 0.1483, 0.3447]) -Greedy action tensor([ 0.0549, -1.6118, 0.9700, -0.1587]) tensor([0.2225, 0.0420, 0.5557, 0.1797]) -Greedy action tensor([ 1.0210, -1.2681, 0.1057, -0.1463]) tensor([0.5516, 0.0559, 0.2209, 0.1717]) -Greedy action tensor([ 0.4796, -0.1940, -0.6611, -1.2029]) tensor([0.4962, 0.2530, 0.1586, 0.0922]) -Greedy action tensor([-1.3664, -0.9267, 0.5312, -0.4023]) tensor([0.0844, 0.1310, 0.5631, 0.2214]) -Greedy action tensor([-0.1790, -1.4004, 0.1994, -0.3332]) tensor([0.2769, 0.0816, 0.4042, 0.2373]) -Greedy action tensor([ 0.1199, -0.6898, -0.2282, -0.2338]) tensor([0.3505, 0.1560, 0.2475, 0.2461]) -Greedy action tensor([-0.6210, -0.3752, 0.0179, -0.2403]) tensor([0.1774, 0.2269, 0.3361, 0.2596]) -Greedy action tensor([ 0.2006, 0.0448, -0.3629, -1.4075]) tensor([0.3809, 0.3260, 0.2168, 0.0763]) -Greedy action tensor([-0.6888, -1.0583, 0.5958, -0.5511]) tensor([0.1550, 0.1071, 0.5600, 0.1779]) -Greedy action tensor([ 1.0173, -0.7110, 0.6295, 0.6451]) tensor([0.3929, 0.0698, 0.2666, 0.2708]) -Greedy action tensor([-0.2548, -0.2683, -0.6121, -0.6950]) tensor([0.3003, 0.2963, 0.2101, 0.1934]) -Greedy action tensor([-0.8931, -0.5042, -0.8975, -0.4454]) tensor([0.1986, 0.2930, 0.1977, 0.3107]) -Greedy action tensor([ 1.3213, 0.2476, -0.2027, 0.6542]) tensor([0.4824, 0.1649, 0.1051, 0.2476]) -Greedy action tensor([-0.2855, -0.9288, -0.1759, -0.4022]) tensor([0.2832, 0.1488, 0.3160, 0.2520]) -Greedy action tensor([ 0.2623, -2.0391, -0.5062, -0.2704]) tensor([0.4649, 0.0466, 0.2156, 0.2729]) -Greedy action tensor([ 0.6029, 0.2409, 0.8537, -0.6820]) tensor([0.3069, 0.2137, 0.3944, 0.0849]) -Greedy action tensor([-0.2686, -0.3564, 0.9834, 0.2675]) tensor([0.1404, 0.1286, 0.4910, 0.2400]) -Greedy action tensor([-0.5234, -0.5039, 1.0919, -0.6014]) tensor([0.1254, 0.1279, 0.6307, 0.1160]) -Greedy action tensor([ 1.1173, 0.0545, -0.1695, -0.2107]) tensor([0.5300, 0.1831, 0.1464, 0.1405]) -Greedy action tensor([ 0.5838, -0.5280, 0.2441, -0.0068]) tensor([0.3854, 0.1268, 0.2744, 0.2135]) -Greedy action tensor([ 0.9762, -0.4178, -0.4885, -0.3647]) tensor([0.5744, 0.1425, 0.1328, 0.1503]) -Greedy action tensor([-0.4622, -0.6797, 0.0084, -0.4318]) tensor([0.2254, 0.1813, 0.3609, 0.2324]) -Greedy action tensor([ 0.3819, -1.7446, 0.0977, -0.1872]) tensor([0.4102, 0.0489, 0.3087, 0.2322]) -Greedy action tensor([-0.5578, -1.9753, 0.0983, 0.0679]) tensor([0.1984, 0.0481, 0.3824, 0.3710]) -Greedy action tensor([ 0.4714, 0.4061, -0.6526, -0.1827]) tensor([0.3595, 0.3368, 0.1168, 0.1869]) -Greedy action tensor([ 0.2805, -0.2391, 0.4161, -0.1837]) tensor([0.2969, 0.1766, 0.3400, 0.1866]) -Greedy action tensor([ 0.7093, -1.9766, -0.5887, -0.6813]) tensor([0.6289, 0.0429, 0.1717, 0.1565]) -Greedy action tensor([-0.3034, 0.0406, 0.2788, -0.9421]) tensor([0.2115, 0.2983, 0.3785, 0.1117]) -Greedy action tensor([ 0.9102, -1.0327, 0.0762, 1.2823]) tensor([0.3302, 0.0473, 0.1434, 0.4791]) -Greedy action tensor([ 1.2529, -0.1026, 0.0768, -0.1016]) tensor([0.5481, 0.1413, 0.1691, 0.1415]) -Greedy action tensor([ 1.1955, -0.0532, -0.3452, -0.0881]) tensor([0.5624, 0.1613, 0.1205, 0.1558]) -Greedy action tensor([ 1.5731, -0.1019, -1.1802, 0.2803]) tensor([0.6555, 0.1228, 0.0418, 0.1799]) -Greedy action tensor([ 0.8301, -0.6369, -0.2731, -0.3938]) tensor([0.5387, 0.1242, 0.1787, 0.1584]) -Greedy action tensor([ 1.3451, -0.4786, -0.9041, -0.1444]) tensor([0.6701, 0.1082, 0.0707, 0.1511]) -Greedy action tensor([ 2.4764, -1.5201, -0.2700, 0.7635]) tensor([0.7918, 0.0146, 0.0508, 0.1428]) -Greedy action tensor([ 1.3312, -0.3556, -0.8973, -0.2987]) tensor([0.6717, 0.1243, 0.0723, 0.1316]) -Greedy action tensor([ 1.2113, -0.3131, -0.2624, -0.7959]) tensor([0.6324, 0.1377, 0.1449, 0.0850]) -Greedy action tensor([ 0.9102, 0.1274, -1.0772, 0.0186]) tensor([0.4990, 0.2281, 0.0684, 0.2046]) -Greedy action tensor([ 1.4976, -0.1918, -0.5582, 0.4203]) tensor([0.6049, 0.1117, 0.0774, 0.2060]) -Greedy action tensor([ 1.6240, -0.7429, -0.7729, 0.4935]) tensor([0.6633, 0.0622, 0.0604, 0.2141]) -Greedy action tensor([ 1.6613, -0.7370, -0.3959, 0.5814]) tensor([0.6417, 0.0583, 0.0820, 0.2179]) -Greedy action tensor([ 1.6914, -0.1566, -0.5298, 0.0684]) tensor([0.6834, 0.1077, 0.0741, 0.1348]) -Greedy action tensor([ 2.2299, -1.4356, -0.2074, 1.0140]) tensor([0.7095, 0.0182, 0.0620, 0.2103]) -Greedy action tensor([ 1.3123, -0.5343, -0.5557, 0.0921]) tensor([0.6221, 0.0982, 0.0961, 0.1836]) -Greedy action tensor([ 1.2414, -0.7398, -0.3981, 0.4176]) tensor([0.5647, 0.0779, 0.1096, 0.2478]) -Greedy action tensor([ 1.5356, -0.0531, -1.0449, 0.0580]) tensor([0.6631, 0.1354, 0.0502, 0.1513]) -Greedy action tensor([ 1.1492, -0.5573, -0.0601, 0.0899]) tensor([0.5475, 0.0994, 0.1634, 0.1898]) -Greedy action tensor([ 1.5920, -0.3381, -0.9712, 0.0284]) tensor([0.6985, 0.1014, 0.0538, 0.1463]) -Greedy action tensor([ 1.1908, -0.1362, -0.7595, 0.6055]) tensor([0.5090, 0.1350, 0.0724, 0.2835]) -Greedy action tensor([ 1.2845, -0.3273, -0.6214, 0.2528]) tensor([0.5866, 0.1171, 0.0872, 0.2091]) -Greedy action tensor([ 1.4306, -0.4294, -0.3973, -0.1598]) tensor([0.6578, 0.1024, 0.1057, 0.1341]) -Greedy action tensor([ 1.6479, -0.1928, -0.6195, -0.1487]) tensor([0.7002, 0.1111, 0.0725, 0.1161]) -Greedy action tensor([ 1.4596, 0.0566, -0.7476, 0.1091]) tensor([0.6192, 0.1522, 0.0681, 0.1604]) -Greedy action tensor([ 1.1410, -0.5294, -0.2047, -0.1052]) tensor([0.5760, 0.1084, 0.1500, 0.1657]) -Greedy action tensor([ 1.8562, -0.9190, -0.4022, 0.4441]) tensor([0.7090, 0.0442, 0.0741, 0.1727]) -Greedy action tensor([ 0.9230, -0.3917, 0.1132, -0.0415]) tensor([0.4774, 0.1282, 0.2124, 0.1820]) -Greedy action tensor([ 1.2243, -0.2756, 0.0818, -0.1161]) tensor([0.5544, 0.1237, 0.1769, 0.1451]) -Greedy action tensor([ 1.3880, -0.3923, -0.7952, -0.0319]) tensor([0.6566, 0.1107, 0.0740, 0.1587]) -Greedy action tensor([ 1.1904, 0.0419, -0.4039, 0.6087]) tensor([0.4810, 0.1525, 0.0977, 0.2688]) -Greedy action tensor([ 1.8276, -0.2081, -0.6778, 0.0595]) tensor([0.7231, 0.0944, 0.0590, 0.1234]) -Greedy action tensor([ 1.4500, -0.8621, -0.0935, 0.3915]) tensor([0.6025, 0.0597, 0.1287, 0.2091]) -Greedy action tensor([ 1.3623, -0.7102, -0.3776, 0.5176]) tensor([0.5777, 0.0727, 0.1014, 0.2482]) -Greedy action tensor([ 0.9154, -0.8054, -0.3230, 0.4288]) tensor([0.4800, 0.0859, 0.1391, 0.2950]) -Greedy action tensor([ 1.3848, -0.3889, -0.6673, 0.1915]) tensor([0.6245, 0.1060, 0.0802, 0.1894]) -Greedy action tensor([ 1.7761, -0.9245, -0.1185, 0.4765]) tensor([0.6711, 0.0451, 0.1009, 0.1830]) -Greedy action tensor([ 2.1141, -0.4108, -0.9950, 0.5804]) tensor([0.7460, 0.0597, 0.0333, 0.1610]) -Greedy action tensor([ 1.1648, -0.4986, -0.3864, 0.2515]) tensor([0.5547, 0.1051, 0.1176, 0.2226]) -Greedy action tensor([ 1.7384, -0.3152, -0.4111, 0.2965]) tensor([0.6751, 0.0866, 0.0787, 0.1596]) -Greedy action tensor([ 1.6206, -0.9742, -0.2189, 0.6135]) tensor([0.6254, 0.0467, 0.0994, 0.2285]) -Greedy action tensor([ 1.4134, -0.5236, -0.1650, 0.4890]) tensor([0.5724, 0.0825, 0.1181, 0.2271]) -Greedy action tensor([ 1.0920, -0.2364, -1.3499, 0.2025]) tensor([0.5673, 0.1503, 0.0494, 0.2331]) -Greedy action tensor([ 1.5271, -0.3570, -0.8265, -0.6312]) tensor([0.7339, 0.1115, 0.0697, 0.0848]) -Greedy action tensor([ 1.3589, -0.5013, -0.4162, -0.0231]) tensor([0.6344, 0.0987, 0.1075, 0.1593]) -Greedy action tensor([ 1.2268, -0.6037, -0.0150, 0.6195]) tensor([0.5015, 0.0804, 0.1449, 0.2732]) -Greedy action tensor([ 1.1927, -0.1239, -0.4221, 0.1336]) tensor([0.5514, 0.1478, 0.1097, 0.1912]) -Greedy action tensor([ 1.5337, -0.3646, -0.6481, 0.4051]) tensor([0.6305, 0.0945, 0.0711, 0.2039]) -Greedy action tensor([ 1.9970, -0.3737, -0.7079, 0.1097]) tensor([0.7623, 0.0712, 0.0510, 0.1155]) -Greedy action tensor([ 0.9709, -0.4211, -0.3497, 0.1764]) tensor([0.5083, 0.1264, 0.1357, 0.2296]) -Greedy action tensor([ 1.4972, -0.3190, -0.9510, 0.5854]) tensor([0.6057, 0.0985, 0.0524, 0.2434]) -Greedy action tensor([ 1.5245, 0.0796, -0.7554, 0.2642]) tensor([0.6167, 0.1454, 0.0631, 0.1749]) -Greedy action tensor([ 1.4552, -0.5085, -0.9708, 0.5208]) tensor([0.6167, 0.0865, 0.0545, 0.2423]) -Greedy action tensor([ 1.2943, -0.2256, -1.0359, 0.2275]) tensor([0.6024, 0.1318, 0.0586, 0.2073]) -Greedy action tensor([ 2.4900, -0.9717, -0.0983, 0.8808]) tensor([0.7654, 0.0240, 0.0575, 0.1531]) -Greedy action tensor([ 1.1055, -0.1530, -0.4833, 0.0717]) tensor([0.5423, 0.1541, 0.1107, 0.1929]) -Greedy action tensor([ 1.8681, -0.7124, -1.1407, 0.2127]) tensor([0.7598, 0.0575, 0.0375, 0.1451]) -Greedy action tensor([ 1.4894, -0.4291, -0.3132, 0.3555]) tensor([0.6122, 0.0899, 0.1009, 0.1970]) -Greedy action tensor([ 1.3683, -0.0865, -1.2091, 0.2455]) tensor([0.6117, 0.1428, 0.0465, 0.1990]) -Greedy action tensor([ 1.3447, -0.0174, -0.8759, -0.0225]) tensor([0.6175, 0.1582, 0.0670, 0.1573]) -Greedy action tensor([ 1.6939, -0.6082, -0.3227, 0.5870]) tensor([0.6395, 0.0640, 0.0851, 0.2114]) -Greedy action tensor([ 1.6208, -0.4072, -0.1561, 0.2919]) tensor([0.6388, 0.0841, 0.1081, 0.1691]) -Greedy action tensor([ 1.2488, -0.4758, -0.4792, 0.2627]) tensor([0.5784, 0.1031, 0.1027, 0.2158]) -Greedy action tensor([ 1.2429, -0.0375, -0.3213, 0.4074]) tensor([0.5206, 0.1447, 0.1089, 0.2258]) -Greedy action tensor([ 1.0137, -0.2975, -0.2572, 0.2015]) tensor([0.5015, 0.1352, 0.1407, 0.2226]) -Greedy action tensor([ 1.9218, -1.2391, -0.1619, 0.4583]) tensor([0.7152, 0.0303, 0.0890, 0.1655]) -Greedy action tensor([ 1.8379, -0.6429, -0.4676, 0.5538]) tensor([0.6848, 0.0573, 0.0683, 0.1896]) -Greedy action tensor([ 1.3286, -0.4097, -0.3547, 0.0030]) tensor([0.6145, 0.1081, 0.1142, 0.1633]) -Greedy action tensor([ 1.6803, -0.9040, 0.0044, 0.3605]) tensor([0.6537, 0.0493, 0.1223, 0.1747]) -Greedy action tensor([ 1.4102, -0.1828, -0.7123, -0.0516]) tensor([0.6431, 0.1308, 0.0770, 0.1491]) -Greedy action tensor([ 1.1898, -0.4498, -0.7801, 0.1404]) tensor([0.5939, 0.1153, 0.0828, 0.2080]) -Greedy action tensor([ 1.2582, -0.2318, -1.1676, 0.3258]) tensor([0.5857, 0.1320, 0.0518, 0.2305]) -Greedy action tensor([ 1.0662, -0.4420, 0.0941, 0.3989]) tensor([0.4733, 0.1048, 0.1791, 0.2429]) -Greedy action tensor([ 1.2242, -0.2831, -0.2890, 0.0522]) tensor([0.5710, 0.1265, 0.1257, 0.1768]) -Greedy action tensor([ 1.3605, -0.1682, -0.3498, 0.1424]) tensor([0.5905, 0.1280, 0.1068, 0.1747]) -Greedy action tensor([ 2.1486, -0.5974, -0.3001, 0.2052]) tensor([0.7729, 0.0496, 0.0668, 0.1107]) -Greedy action tensor([ 1.8152, -1.2700, -0.4047, 0.1952]) tensor([0.7395, 0.0338, 0.0803, 0.1464]) -Greedy action tensor([ 0.8326, -0.3250, 0.0236, -0.7899]) tensor([0.5110, 0.1606, 0.2276, 0.1009]) -Greedy action tensor([ 1.4499, -0.4355, -0.1490, 0.1552]) tensor([0.6143, 0.0932, 0.1242, 0.1683]) -Greedy action tensor([ 1.3093, -0.5501, -0.0677, -0.0964]) tensor([0.6048, 0.0942, 0.1526, 0.1483]) -Greedy action tensor([ 1.1197, -0.3917, -0.1333, 0.0691]) tensor([0.5388, 0.1189, 0.1539, 0.1884]) -Greedy action tensor([ 1.1226, -0.4017, -0.1742, 0.4891]) tensor([0.4946, 0.1077, 0.1352, 0.2625]) -Greedy action tensor([ 1.3583, -0.5856, -0.3184, 0.5189]) tensor([0.5675, 0.0812, 0.1061, 0.2452]) -Greedy action tensor([ 0.8585, -0.2073, -0.0773, -0.1654]) tensor([0.4771, 0.1644, 0.1872, 0.1714]) -Greedy action tensor([ 0.7972, -0.6762, -0.0996, -0.3147]) tensor([0.5087, 0.1166, 0.2075, 0.1673]) -Greedy action tensor([ 0.7236, -0.4771, -0.0444, -0.2398]) tensor([0.4659, 0.1402, 0.2161, 0.1778]) -Greedy action tensor([ 0.9188, -0.5600, -0.0315, -0.5189]) tensor([0.5400, 0.1231, 0.2088, 0.1282]) -Greedy action tensor([ 1.0072, -0.6252, 0.0643, -0.4736]) tensor([0.5517, 0.1079, 0.2149, 0.1255]) -Greedy action tensor([ 0.7252, -0.4343, -0.0779, -0.0948]) tensor([0.4541, 0.1424, 0.2034, 0.2000]) -Greedy action tensor([ 0.6816, -0.3599, -0.1141, -0.3104]) tensor([0.4598, 0.1623, 0.2075, 0.1705]) -Greedy action tensor([ 1.0260, -0.5125, -0.0178, -0.5370]) tensor([0.5630, 0.1209, 0.1982, 0.1180]) -Greedy action tensor([0.7793, 0.0335, 0.0290, 0.0951]) tensor([0.4080, 0.1935, 0.1927, 0.2058]) -Greedy action tensor([ 0.7693, -0.6425, -0.1707, -0.3096]) tensor([0.5065, 0.1234, 0.1979, 0.1722]) -Greedy action tensor([ 0.9176, -0.5196, -0.0720, -0.4693]) tensor([0.5379, 0.1278, 0.1999, 0.1344]) -Greedy action tensor([ 0.8337, -0.8045, -0.1485, -0.2976]) tensor([0.5287, 0.1027, 0.1980, 0.1706]) -Greedy action tensor([ 0.8393, -0.3116, -0.0269, -0.2634]) tensor([0.4834, 0.1529, 0.2033, 0.1605]) -Greedy action tensor([ 1.1326, -0.7202, -0.0405, -0.7606]) tensor([0.6185, 0.0970, 0.1914, 0.0931]) -Greedy action tensor([ 0.8988, -0.2534, -0.0291, -0.0755]) tensor([0.4788, 0.1513, 0.1893, 0.1807]) -Greedy action tensor([ 0.7372, -0.6323, 0.0027, -0.5508]) tensor([0.4976, 0.1265, 0.2387, 0.1372]) -Greedy action tensor([ 0.9564, -1.0424, 0.0771, -0.6176]) tensor([0.5689, 0.0771, 0.2361, 0.1179]) -Greedy action tensor([ 1.2560, -0.8615, 0.0288, -0.7831]) tensor([0.6478, 0.0780, 0.1899, 0.0843]) -Greedy action tensor([ 1.0398, -0.6743, -0.0800, -0.5031]) tensor([0.5813, 0.1047, 0.1897, 0.1243]) -Greedy action tensor([ 0.4660, -0.1248, -0.0543, -0.0572]) tensor([0.3649, 0.2021, 0.2169, 0.2162]) -Greedy action tensor([ 0.6586, 0.1676, 0.0332, -0.5574]) tensor([0.4092, 0.2505, 0.2190, 0.1213]) -Greedy action tensor([ 0.7365, -0.6194, -0.1123, -0.1857]) tensor([0.4800, 0.1237, 0.2054, 0.1909]) -Greedy action tensor([ 0.5906, -0.3301, -0.0148, -0.2340]) tensor([0.4197, 0.1671, 0.2291, 0.1840]) -Greedy action tensor([ 0.7711, -0.2972, -0.1809, -0.2241]) tensor([0.4764, 0.1637, 0.1839, 0.1761]) -Greedy action tensor([ 1.0352, -0.5988, -0.0933, -0.4613]) tensor([0.5739, 0.1120, 0.1857, 0.1285]) -Greedy action tensor([ 1.0654, -0.6605, -0.0807, -0.6563]) tensor([0.5971, 0.1063, 0.1898, 0.1067]) -Greedy action tensor([ 0.1992, 0.3506, -0.1150, -0.2083]) tensor([0.2810, 0.3269, 0.2052, 0.1869]) -Greedy action tensor([ 0.9348, -0.6242, -0.1196, -0.3575]) tensor([0.5454, 0.1147, 0.1900, 0.1498]) -Greedy action tensor([ 0.5204, -0.1472, -0.0453, -0.0967]) tensor([0.3816, 0.1957, 0.2167, 0.2059]) -Greedy action tensor([ 0.8431, -0.7296, -0.0106, -0.4093]) tensor([0.5211, 0.1081, 0.2219, 0.1489]) -Greedy action tensor([ 0.8228, -0.4311, -0.0196, -0.3872]) tensor([0.4965, 0.1417, 0.2138, 0.1480]) -Greedy action tensor([ 0.9124, -0.6490, -0.0916, -0.4951]) tensor([0.5492, 0.1152, 0.2012, 0.1344]) -Greedy action tensor([ 0.7678, -0.4432, 0.0117, -0.0784]) tensor([0.4553, 0.1356, 0.2138, 0.1953]) -Greedy action tensor([ 0.4807, -0.1972, -0.0127, -0.1122]) tensor([0.3744, 0.1901, 0.2286, 0.2069]) -Greedy action tensor([ 0.9038, -0.3061, 0.0334, -0.1286]) tensor([0.4824, 0.1439, 0.2020, 0.1718]) -Greedy action tensor([ 0.6243, -0.3048, -0.0653, -0.3048]) tensor([0.4364, 0.1723, 0.2190, 0.1723]) -Greedy action tensor([ 0.2722, 0.1046, -0.0952, 0.0028]) tensor([0.3028, 0.2561, 0.2097, 0.2313]) -Greedy action tensor([ 0.5539, -0.1764, 0.0329, -0.0863]) tensor([0.3842, 0.1851, 0.2282, 0.2025]) -Greedy action tensor([ 1.0676, -0.6104, -0.1522, -0.4092]) tensor([0.5846, 0.1092, 0.1727, 0.1335]) -Greedy action tensor([ 0.8478, -0.3609, 0.0666, -0.5399]) tensor([0.4985, 0.1488, 0.2282, 0.1245]) -Greedy action tensor([ 0.2578, 0.2434, -0.1377, 0.0129]) tensor([0.2905, 0.2864, 0.1956, 0.2274]) -Greedy action tensor([ 0.9284, -0.3143, -0.0796, -0.2356]) tensor([0.5087, 0.1468, 0.1856, 0.1588]) -Greedy action tensor([ 0.6597, -0.3964, 0.0459, -0.1540]) tensor([0.4288, 0.1491, 0.2321, 0.1900]) -Greedy action tensor([ 0.4846, -0.2157, -0.0553, -0.1125]) tensor([0.3803, 0.1888, 0.2216, 0.2093]) -Greedy action tensor([ 0.7162, -0.3172, -0.0247, -0.6467]) tensor([0.4788, 0.1704, 0.2283, 0.1225]) -Greedy action tensor([ 0.3161, -0.2266, -0.1282, -0.3234]) tensor([0.3636, 0.2113, 0.2332, 0.1918]) -Greedy action tensor([ 1.1525, -0.8491, 0.1297, -0.4847]) tensor([0.5920, 0.0800, 0.2129, 0.1152]) -Greedy action tensor([ 0.8915, -0.4736, -0.0026, -0.3082]) tensor([0.5087, 0.1299, 0.2081, 0.1533]) -Greedy action tensor([ 0.4334, -0.2264, -0.0443, -0.0969]) tensor([0.3669, 0.1897, 0.2275, 0.2159]) -Greedy action tensor([ 0.8532, -0.3400, -0.0315, -0.2214]) tensor([0.4860, 0.1474, 0.2007, 0.1659]) -Greedy action tensor([ 1.2400, -0.6392, -0.0834, -0.7343]) tensor([0.6419, 0.0980, 0.1709, 0.0891]) -Greedy action tensor([ 0.6621, -0.3104, -0.0024, -0.1693]) tensor([0.4295, 0.1624, 0.2210, 0.1870]) -Greedy action tensor([ 1.0061, -0.6387, -0.1582, -0.4182]) tensor([0.5728, 0.1106, 0.1788, 0.1379]) -Greedy action tensor([ 0.5949, -0.3400, 0.0616, -0.1801]) tensor([0.4098, 0.1609, 0.2404, 0.1888]) -Greedy action tensor([ 0.4718, 0.1385, -0.0719, 0.0326]) tensor([0.3399, 0.2436, 0.1974, 0.2191]) -Greedy action tensor([ 0.9540, -0.7757, 0.2157, -0.3876]) tensor([0.5217, 0.0925, 0.2494, 0.1364]) -Greedy action tensor([ 0.3691, -0.2748, -0.0725, -0.1279]) tensor([0.3602, 0.1892, 0.2316, 0.2191]) -Greedy action tensor([ 0.8398, -0.5984, 0.1199, -0.3874]) tensor([0.4957, 0.1177, 0.2413, 0.1453]) -Greedy action tensor([ 0.7600, -0.1062, 0.1055, -0.1660]) tensor([0.4280, 0.1800, 0.2224, 0.1695]) -Greedy action tensor([ 0.8921, -0.8127, 0.0188, -0.5366]) tensor([0.5438, 0.0989, 0.2271, 0.1303]) -Greedy action tensor([ 0.9270, -0.6197, -0.0643, -0.3997]) tensor([0.5407, 0.1151, 0.2007, 0.1435]) -Greedy action tensor([ 0.8171, -0.5513, 0.2772, -0.4484]) tensor([0.4718, 0.1201, 0.2750, 0.1331]) -Greedy action tensor([ 0.4601, -0.1734, -0.1343, -0.0108]) tensor([0.3694, 0.1960, 0.2039, 0.2307]) -Greedy action tensor([ 0.8227, -0.4460, 0.0404, -0.3575]) tensor([0.4888, 0.1375, 0.2236, 0.1502]) -Greedy action tensor([ 1.2116, -0.5315, -0.1450, -0.5755]) tensor([0.6250, 0.1094, 0.1610, 0.1047]) -Greedy action tensor([ 0.5681, -0.1256, -0.0787, -0.0407]) tensor([0.3895, 0.1946, 0.2040, 0.2119]) -Greedy action tensor([ 0.6123, 0.0658, -0.0716, -0.1609]) tensor([0.3929, 0.2275, 0.1983, 0.1813]) -Greedy action tensor([ 0.6498, -0.4562, -0.0336, -0.2619]) tensor([0.4469, 0.1479, 0.2256, 0.1796]) -Greedy action tensor([ 0.7560, -0.3267, -0.0324, -0.3590]) tensor([0.4714, 0.1597, 0.2143, 0.1546]) -Greedy action tensor([ 1.0970, -0.4624, -0.0363, -0.7674]) tensor([0.5927, 0.1246, 0.1908, 0.0919]) -Greedy action tensor([ 0.7302, -0.2445, -0.0500, -0.5075]) tensor([0.4704, 0.1775, 0.2156, 0.1364]) -Greedy action tensor([ 0.4581, -0.0460, -0.0908, 0.0198]) tensor([0.3538, 0.2137, 0.2043, 0.2282]) -Greedy action tensor([ 0.4833, -0.2393, -0.0270, -0.1556]) tensor([0.3826, 0.1858, 0.2297, 0.2020]) -Greedy action tensor([ 0.6472, -0.1361, -0.0452, -0.0439]) tensor([0.4068, 0.1859, 0.2035, 0.2038]) -Greedy action tensor([ 0.3737, -0.3098, -0.0576, -0.3848]) tensor([0.3813, 0.1925, 0.2477, 0.1786]) -Greedy action tensor([ 1.3423, -1.0384, 0.0151, -0.9464]) tensor([0.6853, 0.0634, 0.1818, 0.0695]) -Greedy action tensor([ 0.8963, -0.7036, 0.0307, -0.5277]) tensor([0.5366, 0.1084, 0.2258, 0.1292]) -Greedy action tensor([ 0.6955, -0.6892, -0.0469, -0.2306]) tensor([0.4712, 0.1180, 0.2243, 0.1866]) -Greedy action tensor([ 0.8460, -0.3631, -0.1530, -0.0384]) tensor([0.4808, 0.1435, 0.1771, 0.1986]) -Greedy action tensor([ 0.6222, -0.2537, -0.1776, -0.4960]) tensor([0.4560, 0.1899, 0.2050, 0.1491]) -Greedy action tensor([ 0.8695, -0.5000, -0.0369, -0.6548]) tensor([0.5331, 0.1355, 0.2153, 0.1161]) -Greedy action tensor([-0.5167, 0.7879, -0.0534, 0.1608]) tensor([0.1213, 0.4471, 0.1928, 0.2388]) -Greedy action tensor([-1.6198, -0.3387, 0.4850, -0.0853]) tensor([0.0573, 0.2064, 0.4704, 0.2659]) -Greedy action tensor([-1.9235, -0.4005, 0.6487, -0.1672]) tensor([0.0409, 0.1874, 0.5351, 0.2366]) -Greedy action tensor([-1.2382, -0.5435, 0.2873, 0.2296]) tensor([0.0838, 0.1678, 0.3850, 0.3635]) -Greedy action tensor([-1.7977, -0.4624, 0.5972, -0.0993]) tensor([0.0471, 0.1790, 0.5165, 0.2574]) -Greedy action tensor([-1.8534, -0.4375, 0.6218, -0.1426]) tensor([0.0444, 0.1828, 0.5273, 0.2455]) -Greedy action tensor([-1.8737, -0.3746, 0.6327, -0.1453]) tensor([0.0428, 0.1916, 0.5246, 0.2410]) -Greedy action tensor([-1.9045, -0.4411, 0.6491, -0.1566]) tensor([0.0418, 0.1807, 0.5374, 0.2401]) -Greedy action tensor([-1.7567, -0.3810, 0.6338, -0.0157]) tensor([0.0463, 0.1834, 0.5060, 0.2643]) -Greedy action tensor([-1.9098, -0.4034, 0.6503, -0.1613]) tensor([0.0413, 0.1864, 0.5347, 0.2375]) -Greedy action tensor([-1.6765e+00, -4.1074e-01, 5.0094e-01, 1.3307e-03]) tensor([0.0534, 0.1894, 0.4713, 0.2859]) -Greedy action tensor([-1.8903, -0.3880, 0.6424, -0.1519]) tensor([0.0421, 0.1890, 0.5296, 0.2393]) -Greedy action tensor([-0.1967, -0.4510, 0.3429, 0.4149]) tensor([0.1875, 0.1454, 0.3216, 0.3456]) -Greedy action tensor([-1.9357, -0.4223, 0.6602, -0.1763]) tensor([0.0404, 0.1835, 0.5415, 0.2346]) -Greedy action tensor([-1.8404, -0.3547, 0.6199, -0.1164]) tensor([0.0440, 0.1943, 0.5150, 0.2466]) -Greedy action tensor([-1.7810, -0.4943, 0.5973, -0.0561]) tensor([0.0476, 0.1723, 0.5132, 0.2670]) -Greedy action tensor([-1.9341, -0.4158, 0.6529, -0.1782]) tensor([0.0406, 0.1852, 0.5393, 0.2349]) -Greedy action tensor([-1.2933, -0.5892, 0.3322, 0.1271]) tensor([0.0817, 0.1652, 0.4150, 0.3381]) -Greedy action tensor([-1.5098, -0.1832, 0.4221, -0.0318]) tensor([0.0623, 0.2347, 0.4299, 0.2731]) -Greedy action tensor([-1.5469, -0.4364, 0.5857, 0.0644]) tensor([0.0572, 0.1737, 0.4826, 0.2866]) -Greedy action tensor([-1.2595, 0.3062, 0.2829, -0.0062]) tensor([0.0716, 0.3428, 0.3349, 0.2508]) -Greedy action tensor([-0.0577, 1.1871, 0.0287, 0.3412]) tensor([0.1418, 0.4923, 0.1546, 0.2113]) -Greedy action tensor([-1.7267, 0.1938, 0.4614, -0.0404]) tensor([0.0452, 0.3082, 0.4028, 0.2439]) -Greedy action tensor([-1.8903, -0.4530, 0.6473, -0.1512]) tensor([0.0425, 0.1787, 0.5371, 0.2417]) -Greedy action tensor([-1.9218, -0.4123, 0.6533, -0.1676]) tensor([0.0409, 0.1852, 0.5374, 0.2365]) -Greedy action tensor([-1.9048, -0.3724, 0.6449, -0.1564]) tensor([0.0414, 0.1915, 0.5295, 0.2376]) -Greedy action tensor([-1.9093, -0.4179, 0.6604, -0.1490]) tensor([0.0411, 0.1827, 0.5371, 0.2391]) -Greedy action tensor([-1.7481, -0.0488, 0.5022, -0.0836]) tensor([0.0471, 0.2575, 0.4467, 0.2487]) -Greedy action tensor([-1.9271, -0.3847, 0.6495, -0.1715]) tensor([0.0406, 0.1900, 0.5343, 0.2351]) -Greedy action tensor([-1.8396, -0.1229, 0.5877, -0.1812]) tensor([0.0432, 0.2405, 0.4894, 0.2269]) -Greedy action tensor([-1.5098, -0.1003, 0.4514, -0.1105]) tensor([0.0615, 0.2519, 0.4373, 0.2493]) -Greedy action tensor([-1.9330, -0.4308, 0.6613, -0.1744]) tensor([0.0405, 0.1820, 0.5424, 0.2352]) -Greedy action tensor([-1.7242, -0.1791, 0.5247, -0.0556]) tensor([0.0489, 0.2290, 0.4630, 0.2591]) -Greedy action tensor([-1.9168, -0.3792, 0.6463, -0.1572]) tensor([0.0409, 0.1904, 0.5310, 0.2377]) -Greedy action tensor([-1.9432, -0.4380, 0.6630, -0.1795]) tensor([0.0402, 0.1810, 0.5444, 0.2344]) -Greedy action tensor([-1.9400, -0.4442, 0.6635, -0.1774]) tensor([0.0403, 0.1800, 0.5448, 0.2350]) -Greedy action tensor([-1.8376, -0.2374, 0.5802, -0.1044]) tensor([0.0438, 0.2170, 0.4914, 0.2478]) -Greedy action tensor([-1.8864, -0.4563, 0.6446, -0.1529]) tensor([0.0427, 0.1786, 0.5369, 0.2418]) -Greedy action tensor([-1.9053, -0.4480, 0.6436, -0.1556]) tensor([0.0419, 0.1801, 0.5366, 0.2413]) -Greedy action tensor([-1.8550, -0.1895, 0.5847, -0.1168]) tensor([0.0427, 0.2256, 0.4892, 0.2426]) -Greedy action tensor([-1.7418, -0.4353, 0.6216, -0.0578]) tensor([0.0483, 0.1784, 0.5132, 0.2602]) -Greedy action tensor([-1.9479, -0.4543, 0.6701, -0.1826]) tensor([0.0400, 0.1781, 0.5482, 0.2337]) -Greedy action tensor([-1.4347, 0.7369, 0.3332, 0.1891]) tensor([0.0483, 0.4237, 0.2830, 0.2450]) -Greedy action tensor([-1.8466, -0.2930, 0.6232, -0.1223]) tensor([0.0432, 0.2042, 0.5104, 0.2422]) -Greedy action tensor([-1.9281, -0.4334, 0.6604, -0.1718]) tensor([0.0407, 0.1815, 0.5419, 0.2358]) -Greedy action tensor([-1.8137, -0.0257, 0.5642, -0.1103]) tensor([0.0430, 0.2571, 0.4637, 0.2362]) -Greedy action tensor([-1.8983, -0.3695, 0.6351, -0.1543]) tensor([0.0418, 0.1928, 0.5264, 0.2390]) -Greedy action tensor([-1.6292, -0.1763, 0.4883, -0.0935]) tensor([0.0549, 0.2345, 0.4559, 0.2548]) -Greedy action tensor([-1.8600, -0.4615, 0.6195, -0.1513]) tensor([0.0444, 0.1799, 0.5303, 0.2453]) -Greedy action tensor([-1.8882, -0.3700, 0.6389, -0.1410]) tensor([0.0420, 0.1916, 0.5255, 0.2409]) -Greedy action tensor([-1.8355, -0.2550, 0.6046, -0.1102]) tensor([0.0436, 0.2117, 0.5000, 0.2447]) -Greedy action tensor([-1.6729, -0.4711, 0.5377, -0.0328]) tensor([0.0538, 0.1788, 0.4903, 0.2771]) -Greedy action tensor([0.2776, 0.9046, 0.1962, 0.9725]) tensor([0.1725, 0.3229, 0.1590, 0.3456]) -Greedy action tensor([-1.9240, -0.4061, 0.6536, -0.1683]) tensor([0.0408, 0.1861, 0.5370, 0.2361]) -Greedy action tensor([-1.4332, -0.5539, 0.4353, -0.0278]) tensor([0.0716, 0.1725, 0.4639, 0.2919]) -Greedy action tensor([-1.6631, -0.4749, 0.5198, -0.0492]) tensor([0.0550, 0.1805, 0.4881, 0.2763]) -Greedy action tensor([-1.1805, 0.1326, 0.2232, 0.0954]) tensor([0.0808, 0.3006, 0.3290, 0.2896]) -Greedy action tensor([-1.5882, -0.5398, 0.5137, -0.0382]) tensor([0.0597, 0.1704, 0.4886, 0.2813]) -Greedy action tensor([-1.8521, -0.1318, 0.5736, -0.1563]) tensor([0.0428, 0.2393, 0.4844, 0.2335]) -Greedy action tensor([-1.6557, 0.1643, 0.4331, 0.0530]) tensor([0.0481, 0.2972, 0.3888, 0.2659]) -Greedy action tensor([-1.9349, -0.4424, 0.6613, -0.1751]) tensor([0.0405, 0.1803, 0.5436, 0.2355]) -Greedy action tensor([-1.9439, -0.4437, 0.6647, -0.1801]) tensor([0.0402, 0.1800, 0.5455, 0.2343]) -Greedy action tensor([-1.8894, -0.4361, 0.6429, -0.1462]) tensor([0.0424, 0.1814, 0.5337, 0.2424]) -Greedy action tensor([-1.7693, -0.0235, 0.5201, -0.0665]) tensor([0.0453, 0.2594, 0.4468, 0.2485]) -Greedy action tensor([-1.8026, -0.3283, 0.5836, -0.1024]) tensor([0.0461, 0.2011, 0.5007, 0.2521]) -Greedy action tensor([-1.7164, -0.4470, 0.5859, -0.2164]) tensor([0.0525, 0.1869, 0.5251, 0.2354]) -Greedy action tensor([-1.6613, -0.3122, 0.6623, 0.0453]) tensor([0.0486, 0.1873, 0.4963, 0.2678]) -Greedy action tensor([-1.8566, -0.4400, 0.6270, -0.1371]) tensor([0.0441, 0.1817, 0.5282, 0.2460]) -Greedy action tensor([-1.4650, 0.3719, 0.2718, 0.0926]) tensor([0.0565, 0.3546, 0.3208, 0.2682]) -Greedy action tensor([-1.7274, -0.4759, 0.1606, -0.5305]) tensor([0.0694, 0.2426, 0.4584, 0.2297]) -Greedy action tensor([-1.5724, -0.3399, 0.6437, -0.0483]) tensor([0.0550, 0.1885, 0.5041, 0.2524]) -Greedy action tensor([-1.9094, -0.4029, 0.6516, -0.1573]) tensor([0.0413, 0.1862, 0.5345, 0.2380]) -Greedy action tensor([-1.2303, 0.3554, 0.3183, 0.2940]) tensor([0.0659, 0.3217, 0.3099, 0.3025]) -Greedy action tensor([-1.9347, -0.4440, 0.6635, -0.1748]) tensor([0.0405, 0.1798, 0.5443, 0.2354]) -Greedy action tensor([-1.8412, -0.4468, 0.6235, -0.1269]) tensor([0.0448, 0.1805, 0.5263, 0.2485]) -Greedy action tensor([-1.9012, -0.4502, 0.6390, -0.1558]) tensor([0.0422, 0.1802, 0.5356, 0.2419]) -Greedy action tensor([-1.8473, -0.2138, 0.5884, -0.1188]) tensor([0.0431, 0.2210, 0.4929, 0.2430]) -Greedy action tensor([-0.6173, 0.4933, 0.1257, 0.0807]) tensor([0.1227, 0.3726, 0.2580, 0.2466]) -Greedy action tensor([-0.9358, 0.8951, 0.1102, 0.2117]) tensor([0.0756, 0.4714, 0.2150, 0.2380]) -Greedy action tensor([-1.6993, -0.3543, 0.6933, 0.0041]) tensor([0.0470, 0.1804, 0.5144, 0.2582]) -Greedy action tensor([-1.8893, -0.4059, 0.6495, -0.1522]) tensor([0.0421, 0.1856, 0.5332, 0.2392]) -Greedy action tensor([-1.0508, -1.4812, 0.2537, -0.0675]) tensor([0.1248, 0.0812, 0.4602, 0.3338]) -Greedy action tensor([ 0.2236, -0.3243, -0.8617, -0.2217]) tensor([0.3912, 0.2261, 0.1321, 0.2506]) -Greedy action tensor([-0.1523, 0.7024, 0.1172, -0.6170]) tensor([0.1891, 0.4445, 0.2476, 0.1188]) -Greedy action tensor([-0.8985, -0.6133, 0.4056, -0.9091]) tensor([0.1428, 0.1899, 0.5261, 0.1413]) -Greedy action tensor([ 0.0544, -0.5050, 0.2396, -0.5100]) tensor([0.2991, 0.1709, 0.3599, 0.1701]) -Greedy action tensor([-0.3404, -0.3993, 0.0967, -0.8671]) tensor([0.2450, 0.2310, 0.3793, 0.1447]) -Greedy action tensor([-0.9073, -0.5834, 0.1243, -0.1736]) tensor([0.1375, 0.1901, 0.3859, 0.2865]) -Greedy action tensor([0.4122, 0.2004, 0.3028, 0.1129]) tensor([0.2901, 0.2347, 0.2601, 0.2151]) -Greedy action tensor([-0.4916, -0.4368, 0.0670, -0.9853]) tensor([0.2265, 0.2393, 0.3960, 0.1382]) -Greedy action tensor([-0.3269, -1.4227, 1.4844, -0.9550]) tensor([0.1252, 0.0419, 0.7661, 0.0668]) -Greedy action tensor([-1.2794, -0.8959, 0.8820, -1.0046]) tensor([0.0802, 0.1177, 0.6965, 0.1056]) -Greedy action tensor([ 1.3832, -0.8715, 1.5262, -0.0518]) tensor([0.4005, 0.0420, 0.4621, 0.0954]) -Greedy action tensor([-0.6408, -0.7097, 0.3134, -1.2475]) tensor([0.1970, 0.1839, 0.5116, 0.1074]) -Greedy action tensor([ 0.3540, -1.5662, 0.6444, -0.9371]) tensor([0.3625, 0.0531, 0.4847, 0.0997]) -Greedy action tensor([-0.2214, -0.9528, -0.1949, -0.5766]) tensor([0.3116, 0.1500, 0.3200, 0.2185]) -Greedy action tensor([-0.7997, -0.2368, 0.3739, -1.5716]) tensor([0.1550, 0.2722, 0.5012, 0.0716]) -Greedy action tensor([ 1.2030, -0.6249, 0.2277, -0.5826]) tensor([0.5863, 0.0942, 0.2211, 0.0983]) -Greedy action tensor([ 0.6463, 0.1959, 0.2513, -0.3182]) tensor([0.3714, 0.2368, 0.2502, 0.1416]) -Greedy action tensor([-1.4081, -0.1581, -0.0580, -0.9753]) tensor([0.1011, 0.3529, 0.3901, 0.1559]) -Greedy action tensor([ 0.9705, -0.7673, -0.7251, 0.6794]) tensor([0.4746, 0.0835, 0.0871, 0.3548]) -Greedy action tensor([-0.1684, 0.3708, -0.0978, -0.4162]) tensor([0.2189, 0.3753, 0.2349, 0.1708]) -Greedy action tensor([ 0.3830, -0.1574, -0.1709, -1.4300]) tensor([0.4310, 0.2510, 0.2477, 0.0703]) -Greedy action tensor([ 0.1494, -0.6273, 0.3163, -1.0650]) tensor([0.3403, 0.1565, 0.4021, 0.1010]) -Greedy action tensor([ 1.4678, -1.1829, -0.1116, 0.2158]) tensor([0.6400, 0.0452, 0.1319, 0.1830]) -Greedy action tensor([ 0.3775, -1.3830, 0.0597, -0.0414]) tensor([0.3910, 0.0672, 0.2845, 0.2572]) -Greedy action tensor([-0.0394, 0.0967, 0.1065, -0.3348]) tensor([0.2471, 0.2831, 0.2859, 0.1839]) -Greedy action tensor([ 0.0223, 0.0251, -0.3137, -0.5010]) tensor([0.3021, 0.3030, 0.2159, 0.1790]) -Greedy action tensor([ 0.2410, -0.5635, 0.0332, 0.1055]) tensor([0.3192, 0.1428, 0.2593, 0.2787]) -Greedy action tensor([-0.8086, -0.1626, -0.5102, -0.2159]) tensor([0.1649, 0.3146, 0.2222, 0.2983]) -Greedy action tensor([-0.6499, -0.5325, -1.1381, -0.2885]) tensor([0.2396, 0.2695, 0.1470, 0.3439]) -Greedy action tensor([-0.4578, 0.4048, -0.5215, -0.2927]) tensor([0.1823, 0.4318, 0.1710, 0.2150]) -Greedy action tensor([ 1.0536, -0.0883, -0.0966, 0.7467]) tensor([0.4217, 0.1346, 0.1335, 0.3102]) -Greedy action tensor([ 0.4050, -0.2120, -0.1259, -0.3393]) tensor([0.3842, 0.2073, 0.2259, 0.1825]) -Greedy action tensor([-0.1520, 0.1703, 0.4090, -0.3242]) tensor([0.2010, 0.2775, 0.3523, 0.1692]) -Greedy action tensor([ 1.2987, 0.3910, 0.0413, -0.0542]) tensor([0.5138, 0.2073, 0.1461, 0.1328]) -Greedy action tensor([-0.5899, 0.3482, 0.5813, -0.6018]) tensor([0.1287, 0.3289, 0.4152, 0.1272]) -Greedy action tensor([ 1.5440, -1.0171, -0.3006, 0.9880]) tensor([0.5528, 0.0427, 0.0874, 0.3171]) -Greedy action tensor([ 0.5969, -1.2480, 0.3615, -0.1964]) tensor([0.4166, 0.0658, 0.3292, 0.1884]) -Greedy action tensor([ 0.2436, -1.3812, 0.6056, -0.6326]) tensor([0.3279, 0.0646, 0.4709, 0.1365]) -Greedy action tensor([-0.6203, -0.8277, 0.8013, -0.7700]) tensor([0.1467, 0.1192, 0.6078, 0.1263]) -Greedy action tensor([ 0.1323, -0.9005, -0.1105, 0.7961]) tensor([0.2449, 0.0872, 0.1921, 0.4757]) -Greedy action tensor([-0.1366, 0.0086, 0.7129, 0.0673]) tensor([0.1748, 0.2021, 0.4088, 0.2143]) -Greedy action tensor([ 0.2217, -0.6580, 0.4711, -0.4695]) tensor([0.3126, 0.1297, 0.4011, 0.1566]) -Greedy action tensor([-0.9949, 0.0309, 0.7499, -0.8559]) tensor([0.0938, 0.2616, 0.5369, 0.1078]) -Greedy action tensor([-0.4085, -1.5705, 0.4201, -0.3354]) tensor([0.2137, 0.0669, 0.4895, 0.2300]) -Greedy action tensor([ 0.0320, -1.7370, -0.0084, -0.1620]) tensor([0.3385, 0.0577, 0.3250, 0.2788]) -Greedy action tensor([ 0.1054, -0.7235, 0.7818, -0.8459]) tensor([0.2639, 0.1152, 0.5190, 0.1019]) -Greedy action tensor([-1.0390, -0.0025, -1.0126, -0.1088]) tensor([0.1355, 0.3820, 0.1391, 0.3434]) -Greedy action tensor([-0.0753, -2.3667, 0.3620, -0.1806]) tensor([0.2817, 0.0285, 0.4362, 0.2536]) -Greedy action tensor([ 0.1934, 0.1797, -0.0586, -0.7847]) tensor([0.3185, 0.3142, 0.2476, 0.1198]) -Greedy action tensor([ 0.5104, 0.5525, -0.3514, 0.5495]) tensor([0.2853, 0.2975, 0.1205, 0.2967]) -Greedy action tensor([-0.2904, -0.6685, -1.0448, 0.2372]) tensor([0.2597, 0.1780, 0.1221, 0.4402]) -Greedy action tensor([ 0.0397, -0.8426, 0.5613, -0.3802]) tensor([0.2663, 0.1102, 0.4486, 0.1750]) -Greedy action tensor([-0.1731, 0.7550, 0.0452, 0.0693]) tensor([0.1653, 0.4183, 0.2057, 0.2107]) -Greedy action tensor([-0.5509, -0.0289, -0.4395, -0.0830]) tensor([0.1852, 0.3121, 0.2070, 0.2957]) -Greedy action tensor([-0.6189, -0.3578, -0.5911, -0.3847]) tensor([0.2178, 0.2828, 0.2240, 0.2753]) -Greedy action tensor([ 1.4090, -1.3134, -0.4262, 0.1203]) tensor([0.6663, 0.0438, 0.1063, 0.1836]) -Greedy action tensor([-0.8081, 0.3351, -0.2426, -0.0820]) tensor([0.1256, 0.3939, 0.2210, 0.2596]) -Greedy action tensor([ 0.1611, -1.7364, -0.4462, 0.7900]) tensor([0.2801, 0.0420, 0.1526, 0.5253]) -Greedy action tensor([-0.1083, -0.7630, 0.9624, -0.6364]) tensor([0.1989, 0.1034, 0.5804, 0.1173]) -Greedy action tensor([ 0.7349, 0.3495, 0.5031, -0.3493]) tensor([0.3557, 0.2419, 0.2821, 0.1203]) -Greedy action tensor([ 0.1179, -0.3584, 0.4212, -0.9345]) tensor([0.3008, 0.1868, 0.4074, 0.1050]) -Greedy action tensor([-0.1831, -0.0069, 0.4671, -0.3647]) tensor([0.2023, 0.2413, 0.3876, 0.1687]) -Greedy action tensor([ 0.0791, 0.0098, -0.5177, -0.4045]) tensor([0.3226, 0.3010, 0.1776, 0.1989]) -Greedy action tensor([0.9106, 0.6623, 0.8781, 0.2184]) tensor([0.3078, 0.2402, 0.2980, 0.1541]) -Greedy action tensor([-1.0472, -0.4074, 1.0298, -1.4873]) tensor([0.0868, 0.1646, 0.6927, 0.0559]) -Greedy action tensor([-0.5464, -0.5422, 0.0692, -1.1450]) tensor([0.2270, 0.2280, 0.4202, 0.1248]) -Greedy action tensor([ 1.0480, -1.5887, 0.0959, 1.0677]) tensor([0.4036, 0.0289, 0.1558, 0.4117]) -Greedy action tensor([-1.2837, -0.5779, -0.2757, -1.1452]) tensor([0.1446, 0.2929, 0.3963, 0.1661]) -Greedy action tensor([ 0.6479, -0.1561, 0.0731, -0.3114]) tensor([0.4178, 0.1870, 0.2351, 0.1601]) -Greedy action tensor([-1.0185, -2.0579, -0.1958, 1.1118]) tensor([0.0830, 0.0294, 0.1890, 0.6987]) -Greedy action tensor([-0.8273, 0.0640, -0.1562, -0.1074]) tensor([0.1343, 0.3273, 0.2626, 0.2758]) -Greedy action tensor([ 0.6459, -0.3120, -0.6965, 0.8115]) tensor([0.3540, 0.1358, 0.0925, 0.4177]) -Greedy action tensor([-0.6127, -1.2915, 1.1522, -0.8845]) tensor([0.1233, 0.0625, 0.7202, 0.0940]) -Greedy action tensor([-0.3237, 0.2465, 0.3549, -0.7006]) tensor([0.1843, 0.3260, 0.3633, 0.1264]) -Greedy action tensor([-1.4497, -1.8662, 0.1052, -0.4063]) tensor([0.1083, 0.0714, 0.5128, 0.3075]) -Greedy action tensor([ 0.1467, -0.5791, -0.0762, -0.3417]) tensor([0.3451, 0.1670, 0.2761, 0.2118]) -Greedy action tensor([ 0.2064, -1.0814, 0.4692, 0.1181]) tensor([0.2864, 0.0790, 0.3725, 0.2622]) -Greedy action tensor([-0.7583, -1.3318, 0.6272, -0.5027]) tensor([0.1459, 0.0822, 0.5833, 0.1885]) -Greedy action tensor([-0.1460, -0.1019, -0.9167, 0.3784]) tensor([0.2382, 0.2490, 0.1102, 0.4025]) -Greedy action tensor([-0.2474, -0.4955, 0.5063, -0.8155]) tensor([0.2236, 0.1745, 0.4752, 0.1267]) -Greedy action tensor([ 0.1445, -0.6353, -0.4205, -0.2533]) tensor([0.3706, 0.1699, 0.2106, 0.2489]) -Greedy action tensor([ 1.5737, -0.6580, -0.4679, 0.2071]) tensor([0.6702, 0.0719, 0.0870, 0.1709]) -Greedy action tensor([ 2.1059, -1.2323, 0.0925, 0.6226]) tensor([0.7164, 0.0254, 0.0957, 0.1625]) -Greedy action tensor([ 1.1734, -0.1997, -0.3340, -0.0759]) tensor([0.5677, 0.1438, 0.1257, 0.1628]) -Greedy action tensor([ 1.8819, 0.8172, -0.0976, 0.4040]) tensor([0.5844, 0.2015, 0.0807, 0.1333]) -Greedy action tensor([ 1.8090, -0.6825, -0.6029, 0.3293]) tensor([0.7142, 0.0591, 0.0640, 0.1626]) -Greedy action tensor([ 1.0947, -0.5811, -0.3969, 0.6314]) tensor([0.4899, 0.0917, 0.1102, 0.3082]) -Greedy action tensor([ 1.5180, -0.6782, -0.2883, -0.1410]) tensor([0.6822, 0.0759, 0.1121, 0.1298]) -Greedy action tensor([ 1.1620, -0.6366, -0.2447, 0.6852]) tensor([0.4923, 0.0815, 0.1206, 0.3056]) -Greedy action tensor([ 1.8581, -0.4085, -0.2160, 0.2789]) tensor([0.6966, 0.0722, 0.0875, 0.1436]) -Greedy action tensor([ 0.5465, 0.2880, 0.0901, -0.0268]) tensor([0.3368, 0.2601, 0.2134, 0.1898]) -Greedy action tensor([ 1.9962, -1.0621, -0.3912, 0.3347]) tensor([0.7526, 0.0353, 0.0691, 0.1429]) -Greedy action tensor([ 1.1165, -0.5106, -0.2808, 0.3101]) tensor([0.5290, 0.1040, 0.1308, 0.2362]) -Greedy action tensor([ 1.0132, 0.0809, -0.4580, 0.2433]) tensor([0.4793, 0.1887, 0.1101, 0.2219]) -Greedy action tensor([ 1.6245, -0.7191, -0.3634, 0.5159]) tensor([0.6398, 0.0614, 0.0876, 0.2111]) -Greedy action tensor([ 1.4932, -0.3184, -0.3981, 0.2409]) tensor([0.6250, 0.1021, 0.0943, 0.1786]) -Greedy action tensor([ 1.4723, -1.0010, -0.6536, 0.8952]) tensor([0.5665, 0.0478, 0.0676, 0.3181]) -Greedy action tensor([ 1.4156, -0.7368, -0.0854, 0.4142]) tensor([0.5860, 0.0681, 0.1306, 0.2153]) -Greedy action tensor([ 1.8975, 0.2676, -0.1754, 0.2481]) tensor([0.6605, 0.1294, 0.0831, 0.1269]) -Greedy action tensor([ 1.3742, -0.3377, -0.6466, 0.1270]) tensor([0.6249, 0.1128, 0.0828, 0.1795]) -Greedy action tensor([ 0.9234, -0.0643, -0.1797, -0.2695]) tensor([0.4981, 0.1855, 0.1653, 0.1511]) -Greedy action tensor([ 1.3250, -0.4324, -0.4167, 0.0491]) tensor([0.6147, 0.1060, 0.1077, 0.1716]) -Greedy action tensor([ 1.9384, -0.8988, -0.1773, -0.1817]) tensor([0.7697, 0.0451, 0.0928, 0.0924]) -Greedy action tensor([ 1.9916, -0.9641, -0.6610, 0.4348]) tensor([0.7500, 0.0390, 0.0529, 0.1581]) -Greedy action tensor([ 1.8802, -0.5160, -0.5544, 0.6438]) tensor([0.6807, 0.0620, 0.0597, 0.1977]) -Greedy action tensor([ 0.9988, -0.1582, -0.2769, 0.0447]) tensor([0.5053, 0.1589, 0.1411, 0.1946]) -Greedy action tensor([ 1.5761, -0.5417, -0.6006, 0.1603]) tensor([0.6773, 0.0815, 0.0768, 0.1644]) -Greedy action tensor([ 1.7431, -1.2990, -0.4018, 0.5425]) tensor([0.6822, 0.0326, 0.0799, 0.2054]) -Greedy action tensor([ 2.0289, 0.4559, -0.2107, 0.3485]) tensor([0.6666, 0.1383, 0.0710, 0.1242]) -Greedy action tensor([ 2.1835, -1.3120, -0.2843, 0.8093]) tensor([0.7309, 0.0222, 0.0620, 0.1849]) -Greedy action tensor([ 1.4594, -0.7115, -0.7274, 0.5376]) tensor([0.6157, 0.0702, 0.0691, 0.2449]) -Greedy action tensor([ 1.8059, -0.7334, -0.4977, 0.4470]) tensor([0.6965, 0.0550, 0.0696, 0.1790]) -Greedy action tensor([ 1.1742, -0.6882, -0.0785, 0.0025]) tensor([0.5711, 0.0887, 0.1632, 0.1770]) -Greedy action tensor([ 0.8741, -0.3125, -0.3298, 0.4369]) tensor([0.4442, 0.1356, 0.1333, 0.2869]) -Greedy action tensor([ 1.0560, -0.3342, -0.3426, 0.3168]) tensor([0.5067, 0.1262, 0.1251, 0.2420]) -Greedy action tensor([ 1.7235, -0.4631, -0.6764, 0.4037]) tensor([0.6802, 0.0764, 0.0617, 0.1817]) -Greedy action tensor([ 1.3772, -0.3136, -0.2271, 0.5945]) tensor([0.5427, 0.1001, 0.1091, 0.2481]) -Greedy action tensor([ 1.5871, -0.2354, -0.7664, 0.2585]) tensor([0.6572, 0.1062, 0.0625, 0.1741]) -Greedy action tensor([ 1.4505, -0.4588, -0.7144, 0.3537]) tensor([0.6262, 0.0928, 0.0719, 0.2091]) -Greedy action tensor([ 1.3688, -0.1898, -0.9502, 0.1674]) tensor([0.6213, 0.1307, 0.0611, 0.1869]) -Greedy action tensor([ 1.3860, -0.4691, -0.4936, -0.0230]) tensor([0.6437, 0.1007, 0.0983, 0.1573]) -Greedy action tensor([ 1.1583, 0.4648, -0.0155, -0.4619]) tensor([0.4983, 0.2491, 0.1541, 0.0986]) -Greedy action tensor([ 1.2811, -0.2996, -0.3033, 0.0649]) tensor([0.5857, 0.1206, 0.1201, 0.1736]) -Greedy action tensor([ 1.4514, -0.4918, -0.5409, 0.2130]) tensor([0.6372, 0.0913, 0.0869, 0.1847]) -Greedy action tensor([ 1.7944, -0.7515, -0.3216, 0.6872]) tensor([0.6538, 0.0513, 0.0788, 0.2161]) -Greedy action tensor([ 1.8826, -0.9130, 0.3121, 0.2335]) tensor([0.6844, 0.0418, 0.1423, 0.1315]) -Greedy action tensor([ 1.5858, -0.6723, -0.2338, 0.4248]) tensor([0.6330, 0.0662, 0.1026, 0.1982]) -Greedy action tensor([ 2.2942, -0.8070, -0.6595, 0.2284]) tensor([0.8171, 0.0368, 0.0426, 0.1035]) -Greedy action tensor([ 0.9557, -0.0978, -0.2940, 0.0494]) tensor([0.4904, 0.1710, 0.1405, 0.1981]) -Greedy action tensor([ 1.4995, -1.1624, -0.4062, 0.1874]) tensor([0.6721, 0.0469, 0.1000, 0.1810]) -Greedy action tensor([ 1.4752, -0.5067, -0.3630, 0.6686]) tensor([0.5736, 0.0791, 0.0913, 0.2560]) -Greedy action tensor([ 1.2351, -0.4599, -0.2461, 0.1803]) tensor([0.5684, 0.1044, 0.1292, 0.1980]) -Greedy action tensor([ 1.9774, -1.2315, -0.3302, 0.5901]) tensor([0.7196, 0.0291, 0.0716, 0.1797]) -Greedy action tensor([ 2.2369, 0.2111, 0.2711, -0.0426]) tensor([0.7277, 0.0960, 0.1019, 0.0745]) -Greedy action tensor([ 1.3879, -0.1734, -0.6129, 0.2279]) tensor([0.6029, 0.1265, 0.0815, 0.1890]) -Greedy action tensor([ 1.3495, -0.3903, -0.6085, 0.0529]) tensor([0.6289, 0.1104, 0.0888, 0.1720]) -Greedy action tensor([ 1.8051, -0.4512, -0.8282, 0.1977]) tensor([0.7262, 0.0761, 0.0522, 0.1455]) -Greedy action tensor([ 1.4077, -0.2389, -0.4334, 0.2367]) tensor([0.6019, 0.1160, 0.0955, 0.1866]) -Greedy action tensor([ 1.3205, -0.1045, -0.9795, 0.1172]) tensor([0.6094, 0.1466, 0.0611, 0.1829]) -Greedy action tensor([ 1.2616, -0.6201, -0.3461, 0.1384]) tensor([0.5960, 0.0908, 0.1194, 0.1938]) -Greedy action tensor([ 1.4674, -0.4687, -0.4301, 0.1422]) tensor([0.6410, 0.0925, 0.0961, 0.1704]) -Greedy action tensor([ 0.8221, 0.0335, -0.2918, 0.0774]) tensor([0.4429, 0.2013, 0.1454, 0.2103]) -Greedy action tensor([ 0.9484, -0.2184, -0.3704, 0.0821]) tensor([0.5002, 0.1557, 0.1338, 0.2103]) -Greedy action tensor([ 2.5549, 1.0669, 0.1627, -0.0149]) tensor([0.7175, 0.1620, 0.0656, 0.0549]) -Greedy action tensor([ 1.4960, -0.7490, -0.5269, 0.5397]) tensor([0.6163, 0.0653, 0.0815, 0.2369]) -Greedy action tensor([ 1.6952, -0.3479, -0.2564, 0.1350]) tensor([0.6749, 0.0875, 0.0959, 0.1418]) -Greedy action tensor([ 1.3446, -0.7157, -0.1427, 0.4118]) tensor([0.5725, 0.0729, 0.1294, 0.2252]) -Greedy action tensor([ 1.3077, 0.0960, -0.4144, 0.0608]) tensor([0.5670, 0.1688, 0.1013, 0.1629]) -Greedy action tensor([ 1.9444, 0.0856, -0.5279, -0.1075]) tensor([0.7306, 0.1139, 0.0617, 0.0939]) -Greedy action tensor([ 1.3239, -0.0769, -1.0846, 0.0174]) tensor([0.6222, 0.1533, 0.0560, 0.1685]) -Greedy action tensor([ 1.2540, -0.2359, -0.4594, 0.4620]) tensor([0.5380, 0.1213, 0.0970, 0.2437]) -Greedy action tensor([ 0.9831, -0.4862, -0.0808, -0.0358]) tensor([0.5165, 0.1188, 0.1782, 0.1865]) -Greedy action tensor([ 1.5640, -0.5314, -0.3332, 0.3509]) tensor([0.6368, 0.0783, 0.0955, 0.1893]) -Greedy action tensor([ 1.3872, -0.1256, -0.7957, 0.1661]) tensor([0.6143, 0.1353, 0.0692, 0.1811]) -Greedy action tensor([ 1.4923, -0.9180, -0.0299, 0.1010]) tensor([0.6424, 0.0577, 0.1402, 0.1598]) -Greedy action tensor([ 2.4478, -1.3400, -0.1609, 1.0192]) tensor([0.7485, 0.0170, 0.0551, 0.1794]) -Greedy action tensor([ 1.8999, -0.6913, -0.2557, 0.3805]) tensor([0.7094, 0.0532, 0.0822, 0.1553]) -Greedy action tensor([ 1.1755, -0.6354, -0.1760, 0.1511]) tensor([0.5614, 0.0918, 0.1453, 0.2015]) -Greedy action tensor([ 1.5797, -0.6044, 0.0438, 0.8267]) tensor([0.5559, 0.0626, 0.1197, 0.2618]) -Greedy action tensor([ 1.3415, -0.3092, -0.8032, 0.2583]) tensor([0.6070, 0.1165, 0.0711, 0.2055]) -Greedy action tensor([ 1.2803, -0.4932, -0.3896, 0.0097]) tensor([0.6103, 0.1036, 0.1149, 0.1713]) -Greedy action tensor([ 1.4685, 0.1688, -0.6076, -0.1359]) tensor([0.6254, 0.1705, 0.0784, 0.1257]) -Greedy action tensor([ 0.8292, -0.7559, 0.1578, -0.3692]) tensor([0.4956, 0.1016, 0.2533, 0.1495]) -Greedy action tensor([ 1.0403, -0.6543, -0.0089, -0.5017]) tensor([0.5721, 0.1051, 0.2004, 0.1224]) -Greedy action tensor([ 0.8050, -0.7300, 0.1291, -0.4031]) tensor([0.4943, 0.1065, 0.2515, 0.1477]) -Greedy action tensor([ 0.2591, 0.2161, -0.1171, -0.0209]) tensor([0.2941, 0.2817, 0.2019, 0.2223]) -Greedy action tensor([ 0.8215, -0.4406, 0.1468, -0.0766]) tensor([0.4546, 0.1287, 0.2315, 0.1852]) -Greedy action tensor([ 0.7276, -0.4860, 0.0797, -0.1776]) tensor([0.4495, 0.1336, 0.2352, 0.1818]) -Greedy action tensor([ 0.7165, -0.4843, 0.0117, -0.1609]) tensor([0.4523, 0.1361, 0.2235, 0.1881]) -Greedy action tensor([ 1.0017, -0.6001, 0.0618, -0.4643]) tensor([0.5485, 0.1106, 0.2143, 0.1266]) -Greedy action tensor([ 0.6057, -0.4217, -0.1180, -0.3237]) tensor([0.4469, 0.1600, 0.2167, 0.1764]) -Greedy action tensor([ 0.5843, -0.2854, -0.0036, -0.0232]) tensor([0.3969, 0.1664, 0.2205, 0.2162]) -Greedy action tensor([ 0.5740, -0.1371, -0.0067, -0.0336]) tensor([0.3853, 0.1892, 0.2156, 0.2099]) -Greedy action tensor([ 0.5921, -0.3747, -0.0264, -0.2245]) tensor([0.4236, 0.1611, 0.2282, 0.1872]) -Greedy action tensor([ 0.7999, -0.3532, -0.1046, -0.1610]) tensor([0.4755, 0.1501, 0.1925, 0.1819]) -Greedy action tensor([ 0.9572, -0.8918, 0.1611, -0.3123]) tensor([0.5293, 0.0833, 0.2387, 0.1487]) -Greedy action tensor([ 1.3416, -0.6036, -0.0652, -0.7503]) tensor([0.6617, 0.0946, 0.1621, 0.0817]) -Greedy action tensor([ 1.3107, -0.9715, 0.0892, -0.9029]) tensor([0.6640, 0.0678, 0.1957, 0.0726]) -Greedy action tensor([ 0.9144, -0.5066, -0.0452, -0.3429]) tensor([0.5239, 0.1265, 0.2007, 0.1490]) -Greedy action tensor([ 0.7090, -0.8892, -0.1091, -0.4096]) tensor([0.5075, 0.1027, 0.2240, 0.1658]) -Greedy action tensor([ 0.6108, -0.5151, 0.0017, -0.2243]) tensor([0.4344, 0.1409, 0.2362, 0.1885]) -Greedy action tensor([ 0.2814, -0.4134, -0.1674, -0.1723]) tensor([0.3606, 0.1800, 0.2302, 0.2291]) -Greedy action tensor([ 1.0789, -0.5404, -0.0762, -0.4085]) tensor([0.5750, 0.1139, 0.1812, 0.1299]) -Greedy action tensor([ 0.6714, -0.3478, -0.0187, -0.1941]) tensor([0.4380, 0.1581, 0.2197, 0.1843]) -Greedy action tensor([ 0.6255, -0.2843, 0.0591, -0.2079]) tensor([0.4158, 0.1674, 0.2360, 0.1807]) -Greedy action tensor([ 0.9036, -0.4413, -0.1917, -0.1640]) tensor([0.5158, 0.1344, 0.1725, 0.1773]) -Greedy action tensor([ 0.2727, 0.1348, -0.0378, -0.0773]) tensor([0.3022, 0.2633, 0.2215, 0.2130]) -Greedy action tensor([ 0.4350, -0.5027, -0.3016, -0.0905]) tensor([0.4062, 0.1591, 0.1945, 0.2402]) -Greedy action tensor([ 1.0294, -0.7424, 0.1227, -0.6941]) tensor([0.5707, 0.0970, 0.2305, 0.1018]) -Greedy action tensor([ 1.0156, -0.6481, -0.1045, -0.7455]) tensor([0.5926, 0.1123, 0.1933, 0.1018]) -Greedy action tensor([ 1.0254, -0.2369, 0.0567, -0.1464]) tensor([0.5070, 0.1435, 0.1924, 0.1571]) -Greedy action tensor([ 1.0350, -0.6428, -0.1110, -0.4492]) tensor([0.5776, 0.1079, 0.1836, 0.1309]) -Greedy action tensor([ 1.1344, -0.6728, 0.0093, -0.4543]) tensor([0.5907, 0.0969, 0.1918, 0.1206]) -Greedy action tensor([ 1.0972, -0.3692, -0.1088, -0.3648]) tensor([0.5676, 0.1310, 0.1699, 0.1315]) -Greedy action tensor([ 0.7376, -0.4008, 0.1074, -0.3919]) tensor([0.4596, 0.1472, 0.2447, 0.1485]) -Greedy action tensor([ 1.1445, -0.4763, -0.1762, -0.6462]) tensor([0.6129, 0.1212, 0.1636, 0.1023]) -Greedy action tensor([ 0.8070, -0.5250, 0.2012, -0.4283]) tensor([0.4761, 0.1257, 0.2598, 0.1384]) -Greedy action tensor([ 0.8989, -0.2869, -0.0694, -0.3941]) tensor([0.5103, 0.1559, 0.1938, 0.1400]) -Greedy action tensor([ 0.9666, -0.5068, 0.0332, -0.3639]) tensor([0.5300, 0.1215, 0.2084, 0.1401]) -Greedy action tensor([ 0.8109, -0.6884, 0.0717, -0.1755]) tensor([0.4822, 0.1077, 0.2303, 0.1798]) -Greedy action tensor([ 0.6836, -0.3348, 0.0115, -0.3406]) tensor([0.4483, 0.1619, 0.2289, 0.1610]) -Greedy action tensor([ 0.8923, -0.6867, -0.0279, -0.4176]) tensor([0.5335, 0.1100, 0.2126, 0.1440]) -Greedy action tensor([ 0.8985, -0.7314, -0.0282, -0.3891]) tensor([0.5354, 0.1049, 0.2119, 0.1477]) -Greedy action tensor([ 1.0141, -0.5574, -0.1408, -0.6507]) tensor([0.5841, 0.1213, 0.1840, 0.1105]) -Greedy action tensor([ 0.8766, -0.6206, 0.0881, -0.4807]) tensor([0.5166, 0.1156, 0.2348, 0.1330]) -Greedy action tensor([ 1.3474, -0.7982, 0.1066, -0.7133]) tensor([0.6521, 0.0763, 0.1886, 0.0831]) -Greedy action tensor([ 0.6834, -0.1371, 0.0804, -0.0684]) tensor([0.4067, 0.1790, 0.2225, 0.1918]) -Greedy action tensor([ 0.5674, -0.1033, -0.1965, -0.0568]) tensor([0.3980, 0.2035, 0.1854, 0.2132]) -Greedy action tensor([ 0.8938, -0.6694, 0.0109, -0.3654]) tensor([0.5244, 0.1098, 0.2169, 0.1489]) -Greedy action tensor([ 0.7708, -0.3404, -0.0234, -0.2739]) tensor([0.4688, 0.1543, 0.2119, 0.1649]) -Greedy action tensor([ 0.7707, -0.2098, -0.0164, -0.0721]) tensor([0.4423, 0.1659, 0.2013, 0.1904]) -Greedy action tensor([ 1.1832, -0.7963, 0.1240, -0.6849]) tensor([0.6100, 0.0843, 0.2115, 0.0942]) -Greedy action tensor([ 0.6067, -0.4143, -0.0735, -0.1704]) tensor([0.4298, 0.1548, 0.2177, 0.1976]) -Greedy action tensor([ 0.3660, -0.1153, -0.2224, -0.2605]) tensor([0.3693, 0.2282, 0.2051, 0.1974]) -Greedy action tensor([ 1.1561, -0.4122, -0.0970, -0.7649]) tensor([0.6096, 0.1270, 0.1741, 0.0893]) -Greedy action tensor([ 0.3604, 0.1723, -0.1318, 0.1165]) tensor([0.3102, 0.2570, 0.1896, 0.2431]) -Greedy action tensor([ 0.7719, -0.4946, 0.0018, -0.5148]) tensor([0.4948, 0.1394, 0.2291, 0.1367]) -Greedy action tensor([ 0.8985, -0.6107, -0.0623, -0.3380]) tensor([0.5280, 0.1167, 0.2020, 0.1533]) -Greedy action tensor([ 0.6438, -0.1125, -0.0288, -0.1355]) tensor([0.4101, 0.1925, 0.2093, 0.1881]) -Greedy action tensor([ 0.4007, -0.0267, -0.0827, -0.4698]) tensor([0.3721, 0.2427, 0.2294, 0.1558]) -Greedy action tensor([ 0.5978, 0.1058, -0.0273, -0.3192]) tensor([0.3927, 0.2401, 0.2102, 0.1570]) -Greedy action tensor([ 0.7984, -0.5839, -0.0604, -0.4344]) tensor([0.5086, 0.1277, 0.2155, 0.1482]) -Greedy action tensor([ 0.4777, -0.0478, -0.1264, 0.0511]) tensor([0.3584, 0.2119, 0.1959, 0.2339]) -Greedy action tensor([ 0.9109, -0.2344, -0.1568, 0.0867]) tensor([0.4761, 0.1515, 0.1637, 0.2088]) -Greedy action tensor([ 0.7491, -0.4007, -0.0887, -0.0318]) tensor([0.4530, 0.1435, 0.1960, 0.2075]) -Greedy action tensor([ 0.6364, 0.0395, -0.0615, -0.0351]) tensor([0.3908, 0.2151, 0.1945, 0.1997]) -Greedy action tensor([ 0.6895, -0.3108, -0.1479, -0.0640]) tensor([0.4403, 0.1619, 0.1906, 0.2073]) -Greedy action tensor([ 0.8876, -0.5456, -0.0944, -0.3288]) tensor([0.5237, 0.1249, 0.1962, 0.1552]) -Greedy action tensor([ 0.6961, -0.6627, -0.1350, -0.1432]) tensor([0.4707, 0.1210, 0.2050, 0.2034]) -Greedy action tensor([ 0.7524, -0.3733, -0.0550, -0.0984]) tensor([0.4551, 0.1476, 0.2030, 0.1943]) -Greedy action tensor([ 1.3349, -0.7351, -0.0151, -0.6200]) tensor([0.6549, 0.0826, 0.1698, 0.0927]) -Greedy action tensor([ 0.9906, -0.6818, -0.1520, -0.7342]) tensor([0.5935, 0.1115, 0.1893, 0.1058]) -Greedy action tensor([ 0.9519, -0.7525, 0.0249, -0.7291]) tensor([0.5669, 0.1031, 0.2244, 0.1056]) -Greedy action tensor([ 1.0039, -0.8874, -0.0148, -0.4212]) tensor([0.5707, 0.0861, 0.2060, 0.1372]) -Greedy action tensor([ 0.7489, -0.3514, -0.0582, -0.4360]) tensor([0.4797, 0.1596, 0.2140, 0.1467]) -Greedy action tensor([ 0.7477, -0.4691, -0.0672, -0.4325]) tensor([0.4887, 0.1448, 0.2163, 0.1501]) -Greedy action tensor([ 0.2276, 0.1913, -0.2011, -0.0223]) tensor([0.2946, 0.2841, 0.1919, 0.2294]) -Greedy action tensor([ 0.3872, -0.2881, -0.0079, -0.1025]) tensor([0.3577, 0.1821, 0.2410, 0.2192]) -Greedy action tensor([ 1.1249, -0.9524, 0.1335, -0.5953]) tensor([0.5969, 0.0748, 0.2215, 0.1069]) -Greedy action tensor([ 0.3125, 0.1190, -0.1277, -0.0017]) tensor([0.3127, 0.2576, 0.2013, 0.2284]) -Greedy action tensor([ 0.7695, -0.3187, -0.0551, -0.1257]) tensor([0.4579, 0.1542, 0.2007, 0.1871]) -Greedy action tensor([ 0.8442, -0.4089, 0.0534, -0.3712]) tensor([0.4912, 0.1403, 0.2228, 0.1457]) -Greedy action tensor([ 1.1194, -0.7325, -0.0558, -0.4413]) tensor([0.5968, 0.0937, 0.1843, 0.1253]) -Greedy action tensor([ 1.3519, -0.5611, -0.6754, 0.2001]) tensor([0.6268, 0.0925, 0.0825, 0.1981]) -Greedy action tensor([ 1.4127, -0.0943, -0.3890, 0.1128]) tensor([0.6027, 0.1335, 0.0995, 0.1643]) -Greedy action tensor([ 1.0906, -0.3663, -0.7210, -0.0089]) tensor([0.5782, 0.1347, 0.0945, 0.1926]) -Greedy action tensor([ 1.4251, -0.2056, -0.3822, 0.1904]) tensor([0.6058, 0.1186, 0.0994, 0.1762]) -Greedy action tensor([ 1.8522, -0.6835, -0.5745, 1.2236]) tensor([0.5879, 0.0466, 0.0519, 0.3136]) -Greedy action tensor([ 1.8743, -0.5799, -0.3956, 0.7683]) tensor([0.6578, 0.0565, 0.0680, 0.2177]) -Greedy action tensor([ 0.8957, -0.3407, 0.4067, -0.2373]) tensor([0.4493, 0.1305, 0.2755, 0.1447]) -Greedy action tensor([ 1.5666, -0.5841, -0.7986, 0.2775]) tensor([0.6730, 0.0783, 0.0632, 0.1854]) -Greedy action tensor([ 1.3650, -0.5155, -0.6872, 0.1057]) tensor([0.6391, 0.0975, 0.0821, 0.1814]) -Greedy action tensor([ 1.4131, -0.4777, -0.6589, 0.2686]) tensor([0.6269, 0.0946, 0.0789, 0.1996]) -Greedy action tensor([ 1.1357, -1.1961, -0.4714, -0.1413]) tensor([0.6343, 0.0616, 0.1272, 0.1769]) -Greedy action tensor([ 2.2410, -1.4928, -0.1888, 0.8035]) tensor([0.7410, 0.0177, 0.0653, 0.1760]) -Greedy action tensor([ 1.1794, -0.3643, -0.5912, 0.4473]) tensor([0.5363, 0.1145, 0.0913, 0.2579]) -Greedy action tensor([ 0.9658, 0.0716, -0.8577, 0.1564]) tensor([0.4962, 0.2029, 0.0801, 0.2209]) -Greedy action tensor([ 0.9771, -0.5297, 0.1733, 0.1758]) tensor([0.4722, 0.1046, 0.2113, 0.2119]) -Greedy action tensor([ 1.4554, -0.2290, -0.6417, 0.6717]) tensor([0.5665, 0.1051, 0.0696, 0.2588]) -Greedy action tensor([ 1.8178, -0.5596, -1.1980, 0.3706]) tensor([0.7262, 0.0674, 0.0356, 0.1708]) -Greedy action tensor([ 2.1055, 0.7241, -0.3377, 0.3144]) tensor([0.6645, 0.1669, 0.0577, 0.1108]) -Greedy action tensor([ 0.9218, -0.1435, -0.4851, 0.3573]) tensor([0.4634, 0.1597, 0.1135, 0.2635]) -Greedy action tensor([ 1.2558, -0.2764, -0.6270, 0.4749]) tensor([0.5476, 0.1183, 0.0833, 0.2508]) -Greedy action tensor([ 1.3396, -0.4853, -0.4187, 0.0905]) tensor([0.6172, 0.0995, 0.1064, 0.1770]) -Greedy action tensor([ 1.7910, 0.3120, -0.2452, 0.4280]) tensor([0.6195, 0.1412, 0.0809, 0.1585]) -Greedy action tensor([ 1.4604, -0.3255, -0.5250, 0.1224]) tensor([0.6380, 0.1070, 0.0876, 0.1674]) -Greedy action tensor([ 1.0046, -0.4878, -0.8961, 0.5055]) tensor([0.5047, 0.1135, 0.0754, 0.3064]) -Greedy action tensor([ 2.2854, -0.8440, -0.4440, 0.3468]) tensor([0.7981, 0.0349, 0.0521, 0.1149]) -Greedy action tensor([ 1.2339e+00, -1.4609e-04, -6.8717e-01, 5.1458e-02]) tensor([0.5734, 0.1669, 0.0840, 0.1758]) -Greedy action tensor([ 2.0159, -1.0494, -0.1091, 0.4418]) tensor([0.7282, 0.0340, 0.0870, 0.1509]) -Greedy action tensor([ 1.4732, -0.1724, -0.7793, 0.2864]) tensor([0.6237, 0.1203, 0.0656, 0.1904]) -Greedy action tensor([ 1.6579, -0.2062, -0.9963, 0.4955]) tensor([0.6501, 0.1008, 0.0457, 0.2033]) -Greedy action tensor([ 1.0938, -0.1297, -0.5036, 0.1276]) tensor([0.5327, 0.1567, 0.1078, 0.2027]) -Greedy action tensor([ 1.1654, -0.2767, -0.5164, 0.4060]) tensor([0.5290, 0.1251, 0.0984, 0.2475]) -Greedy action tensor([ 1.5915, 0.4476, -0.5394, 0.4637]) tensor([0.5678, 0.1809, 0.0674, 0.1838]) -Greedy action tensor([ 1.5854, -0.0254, -0.6844, 0.1555]) tensor([0.6483, 0.1295, 0.0670, 0.1552]) -Greedy action tensor([ 1.2879, -0.5453, -0.6479, 0.2565]) tensor([0.6022, 0.0963, 0.0869, 0.2147]) -Greedy action tensor([ 2.0267, -0.2139, -0.6810, 0.3657]) tensor([0.7337, 0.0781, 0.0489, 0.1394]) -Greedy action tensor([ 1.2226, -0.2059, -0.9731, 0.1340]) tensor([0.5925, 0.1420, 0.0659, 0.1995]) -Greedy action tensor([ 0.9421, -0.3745, -0.3116, -0.0051]) tensor([0.5151, 0.1381, 0.1470, 0.1998]) -Greedy action tensor([ 1.5192, -0.5538, -0.5849, 0.4644]) tensor([0.6265, 0.0788, 0.0764, 0.2182]) -Greedy action tensor([ 1.7998, -0.6512, -0.7217, -0.2075]) tensor([0.7687, 0.0663, 0.0618, 0.1033]) -Greedy action tensor([ 2.1864, -0.6722, -0.3488, 0.2412]) tensor([0.7815, 0.0448, 0.0619, 0.1117]) -Greedy action tensor([ 1.3688, -0.6611, -0.3957, 0.3861]) tensor([0.5963, 0.0783, 0.1021, 0.2232]) -Greedy action tensor([ 1.0796, -0.1139, 0.0608, -0.7342]) tensor([0.5473, 0.1659, 0.1976, 0.0892]) -Greedy action tensor([ 1.3129, -0.4113, -0.4461, 0.2303]) tensor([0.5920, 0.1056, 0.1019, 0.2005]) -Greedy action tensor([ 1.4016, 0.0652, -0.2529, -0.0342]) tensor([0.5910, 0.1553, 0.1130, 0.1406]) -Greedy action tensor([ 0.9236, -0.3883, -0.3629, 0.5536]) tensor([0.4472, 0.1204, 0.1235, 0.3089]) -Greedy action tensor([ 1.1858, -0.0320, -0.7424, -0.2224]) tensor([0.5932, 0.1755, 0.0863, 0.1451]) -Greedy action tensor([ 2.2033, -1.8280, -0.0566, 0.0828]) tensor([0.8051, 0.0143, 0.0840, 0.0966]) -Greedy action tensor([ 1.6228, -0.7288, -0.3399, 0.1290]) tensor([0.6848, 0.0652, 0.0962, 0.1538]) -Greedy action tensor([ 1.6103, -1.0445, -0.1279, 0.7636]) tensor([0.5970, 0.0420, 0.1050, 0.2560]) -Greedy action tensor([ 2.3958, -0.8215, -0.5231, 0.6240]) tensor([0.7911, 0.0317, 0.0427, 0.1345]) -Greedy action tensor([ 2.3250, -1.3382, -0.0927, 0.4740]) tensor([0.7862, 0.0202, 0.0701, 0.1235]) -Greedy action tensor([ 1.5461, -0.3875, -0.3295, 0.6796]) tensor([0.5820, 0.0842, 0.0892, 0.2447]) -Greedy action tensor([ 1.7191, -0.7823, -0.3947, 0.2957]) tensor([0.6927, 0.0568, 0.0837, 0.1669]) -Greedy action tensor([ 1.4451, -0.2719, -0.5554, 0.2907]) tensor([0.6135, 0.1102, 0.0830, 0.1934]) -Greedy action tensor([ 1.4884, -0.2075, -1.1168, 0.3629]) tensor([0.6322, 0.1160, 0.0467, 0.2051]) -Greedy action tensor([ 1.2991, -0.6785, -0.2913, 0.2108]) tensor([0.5956, 0.0824, 0.1214, 0.2006]) -Greedy action tensor([ 2.1410, -1.0155, -0.2809, -0.0029]) tensor([0.8009, 0.0341, 0.0711, 0.0939]) -Greedy action tensor([ 1.6114, -0.8221, -0.5107, 0.4065]) tensor([0.6635, 0.0582, 0.0795, 0.1989]) -Greedy action tensor([ 0.9339, -0.3408, -0.3558, 0.4744]) tensor([0.4574, 0.1278, 0.1259, 0.2889]) -Greedy action tensor([ 0.6974, -0.2991, -0.0854, 0.1327]) tensor([0.4176, 0.1541, 0.1909, 0.2374]) -Greedy action tensor([ 1.6442, -0.7556, -0.1839, 0.8472]) tensor([0.5875, 0.0533, 0.0944, 0.2648]) -Greedy action tensor([ 1.3269, -0.6543, 0.2003, -0.1067]) tensor([0.5881, 0.0811, 0.1906, 0.1402]) -Greedy action tensor([ 1.0861, -0.4956, -0.5708, 0.0986]) tensor([0.5653, 0.1162, 0.1078, 0.2106]) -Greedy action tensor([ 1.5037, -0.6204, -0.7457, 0.4576]) tensor([0.6344, 0.0758, 0.0669, 0.2229]) -Greedy action tensor([ 1.4744, -0.3916, -0.6050, 0.2195]) tensor([0.6391, 0.0989, 0.0799, 0.1822]) -Greedy action tensor([ 2.1736, -1.0217, -0.2982, 0.4331]) tensor([0.7687, 0.0315, 0.0649, 0.1349]) -Greedy action tensor([ 1.3207, -0.2569, -0.6137, -0.0067]) tensor([0.6188, 0.1278, 0.0894, 0.1641]) -Greedy action tensor([ 1.2740, -0.4204, -0.4710, 0.3344]) tensor([0.5717, 0.1050, 0.0998, 0.2234]) -Greedy action tensor([ 0.7168, -0.0185, -0.0639, -0.0021]) tensor([0.4124, 0.1977, 0.1889, 0.2010]) -Greedy action tensor([ 1.1994, -0.5862, -0.3842, 0.4320]) tensor([0.5443, 0.0913, 0.1117, 0.2527]) -Greedy action tensor([ 1.7820, -0.4767, -0.3379, 0.5029]) tensor([0.6654, 0.0695, 0.0799, 0.1852]) -Greedy action tensor([ 1.9311, -0.3243, -0.3649, 0.5604]) tensor([0.6852, 0.0718, 0.0690, 0.1740]) -Greedy action tensor([ 1.2668, -0.1750, -0.7077, 0.5571]) tensor([0.5356, 0.1267, 0.0744, 0.2634]) -Greedy action tensor([ 1.5301, -0.3044, -1.1470, 0.6434]) tensor([0.6096, 0.0973, 0.0419, 0.2511]) -Greedy action tensor([ 0.9257, -0.0811, -0.7297, 0.1349]) tensor([0.4975, 0.1818, 0.0950, 0.2256]) -Greedy action tensor([ 1.5345, -0.5874, -0.1366, 0.4343]) tensor([0.6095, 0.0730, 0.1146, 0.2029]) -Greedy action tensor([ 2.0734, -0.1073, -0.5168, 0.3474]) tensor([0.7321, 0.0827, 0.0549, 0.1303]) -Greedy action tensor([ 1.4895, -0.4511, -0.5619, 0.4922]) tensor([0.6094, 0.0875, 0.0783, 0.2248]) -Greedy action tensor([ 1.3718, -0.1261, -0.8769, 0.3999]) tensor([0.5857, 0.1309, 0.0618, 0.2216]) -Greedy action tensor([ 1.2276, -0.0870, -0.8238, 0.1241]) tensor([0.5784, 0.1553, 0.0744, 0.1919]) -Greedy action tensor([ 1.2293, -0.2048, -0.5221, -0.0322]) tensor([0.5899, 0.1406, 0.1024, 0.1671]) -Greedy action tensor([-1.1903, -0.4267, 0.3331, -0.0316]) tensor([0.0916, 0.1965, 0.4201, 0.2917]) -Greedy action tensor([-0.5323, 0.0283, 0.1762, 0.2230]) tensor([0.1447, 0.2535, 0.2939, 0.3080]) -Greedy action tensor([-1.5393, -0.3307, 0.6637, -0.0340]) tensor([0.0558, 0.1870, 0.5055, 0.2516]) -Greedy action tensor([-1.5459, -0.2067, 0.4392, -0.0403]) tensor([0.0602, 0.2299, 0.4385, 0.2714]) -Greedy action tensor([-1.8195, -0.3394, 0.6484, -0.0829]) tensor([0.0437, 0.1921, 0.5159, 0.2483]) -Greedy action tensor([-1.7682, -0.3663, 0.6490, -0.0351]) tensor([0.0456, 0.1852, 0.5113, 0.2579]) -Greedy action tensor([-1.8823, -0.4650, 0.6349, -0.1581]) tensor([0.0432, 0.1784, 0.5359, 0.2425]) -Greedy action tensor([-1.8800, -0.2650, 0.6146, -0.1444]) tensor([0.0420, 0.2111, 0.5087, 0.2382]) -Greedy action tensor([-1.6960, 0.3573, 0.4445, -0.0935]) tensor([0.0449, 0.3501, 0.3820, 0.2230]) -Greedy action tensor([-1.4621, 0.1191, 0.3585, -0.0175]) tensor([0.0614, 0.2986, 0.3794, 0.2605]) -Greedy action tensor([-1.5460, -0.5745, 0.7403, -0.1783]) tensor([0.0575, 0.1518, 0.5652, 0.2256]) -Greedy action tensor([-1.9203, -0.4089, 0.6561, -0.1665]) tensor([0.0409, 0.1853, 0.5376, 0.2362]) -Greedy action tensor([-1.9327, -0.4359, 0.6624, -0.1737]) tensor([0.0405, 0.1811, 0.5430, 0.2354]) -Greedy action tensor([-1.1837, -0.3982, 0.3467, -0.0097]) tensor([0.0905, 0.1985, 0.4182, 0.2928]) -Greedy action tensor([-1.9308, -0.4444, 0.6619, -0.1733]) tensor([0.0407, 0.1798, 0.5437, 0.2358]) -Greedy action tensor([-1.8760, -0.2492, 0.6016, -0.1502]) tensor([0.0423, 0.2154, 0.5044, 0.2378]) -Greedy action tensor([-1.6695, -0.5412, 0.5195, -0.0277]) tensor([0.0550, 0.1700, 0.4910, 0.2841]) -Greedy action tensor([-1.8819, -0.4355, 0.6354, -0.1423]) tensor([0.0428, 0.1820, 0.5311, 0.2440]) -Greedy action tensor([-1.9090, -0.4217, 0.6506, -0.1624]) tensor([0.0415, 0.1837, 0.5367, 0.2381]) -Greedy action tensor([-1.6071, -0.5668, 0.5060, -0.0520]) tensor([0.0594, 0.1681, 0.4913, 0.2812]) -Greedy action tensor([-1.9417, -0.4444, 0.6645, -0.1790]) tensor([0.0403, 0.1799, 0.5453, 0.2346]) -Greedy action tensor([0.7044, 0.1131, 0.6849, 1.3937]) tensor([0.2209, 0.1223, 0.2167, 0.4401]) -Greedy action tensor([-0.4429, 0.9775, -0.0271, 0.0287]) tensor([0.1211, 0.5012, 0.1836, 0.1941]) -Greedy action tensor([-0.3071, -0.4802, 1.0193, 1.3015]) tensor([0.0943, 0.0793, 0.3553, 0.4711]) -Greedy action tensor([-1.7502, -0.4599, 0.5754, -0.1216]) tensor([0.0501, 0.1820, 0.5126, 0.2553]) -Greedy action tensor([-1.9111, -0.3952, 0.6486, -0.1606]) tensor([0.0413, 0.1878, 0.5334, 0.2375]) -Greedy action tensor([-1.9258, -0.4339, 0.6528, -0.1689]) tensor([0.0410, 0.1821, 0.5397, 0.2373]) -Greedy action tensor([-0.8479, -0.6084, 0.9986, 1.3425]) tensor([0.0570, 0.0724, 0.3612, 0.5094]) -Greedy action tensor([-1.9059, -0.4636, 0.6522, -0.1529]) tensor([0.0418, 0.1769, 0.5399, 0.2414]) -Greedy action tensor([-1.9127, -0.4299, 0.6505, -0.1621]) tensor([0.0414, 0.1825, 0.5376, 0.2385]) -Greedy action tensor([-1.8547, -0.4616, 0.6219, -0.1421]) tensor([0.0445, 0.1792, 0.5296, 0.2467]) -Greedy action tensor([-1.3407, 0.0219, 0.6019, 0.2547]) tensor([0.0595, 0.2323, 0.4149, 0.2932]) -Greedy action tensor([-1.8144, -0.0124, 0.5983, -0.2468]) tensor([0.0434, 0.2633, 0.4850, 0.2083]) -Greedy action tensor([-1.8081, -0.2658, 0.5725, -0.1276]) tensor([0.0458, 0.2139, 0.4947, 0.2456]) -Greedy action tensor([-1.8116, -0.1492, 0.5581, -0.0978]) tensor([0.0444, 0.2341, 0.4750, 0.2465]) -Greedy action tensor([-1.8624, -0.4625, 0.6262, -0.1539]) tensor([0.0442, 0.1793, 0.5325, 0.2441]) -Greedy action tensor([-1.0131, -0.2789, 0.1320, 0.3857]) tensor([0.0973, 0.2028, 0.3058, 0.3941]) -Greedy action tensor([-0.7779, 0.4495, 0.5965, 0.8939]) tensor([0.0731, 0.2493, 0.2888, 0.3888]) -Greedy action tensor([-1.7744, 0.1861, 0.5159, -0.1647]) tensor([0.0435, 0.3091, 0.4298, 0.2176]) -Greedy action tensor([-1.8541, -0.1882, 0.5982, -0.0978]) tensor([0.0422, 0.2233, 0.4902, 0.2444]) -Greedy action tensor([-1.9232, -0.4364, 0.6587, -0.1677]) tensor([0.0409, 0.1810, 0.5412, 0.2368]) -Greedy action tensor([-1.9119, -0.4309, 0.6549, -0.1639]) tensor([0.0414, 0.1820, 0.5390, 0.2377]) -Greedy action tensor([-1.8975, -0.4111, 0.6472, -0.1533]) tensor([0.0419, 0.1851, 0.5334, 0.2396]) -Greedy action tensor([-1.7584, -0.2963, 0.5664, -0.1057]) tensor([0.0482, 0.2078, 0.4925, 0.2515]) -Greedy action tensor([-0.7028, -0.1469, 0.3003, 0.8234]) tensor([0.0993, 0.1731, 0.2707, 0.4568]) -Greedy action tensor([-1.1258e+00, 1.6396e-01, 2.4805e-01, 1.0158e-03]) tensor([0.0857, 0.3113, 0.3386, 0.2645]) -Greedy action tensor([-1.2588, 0.2188, 0.2738, 0.1524]) tensor([0.0709, 0.3105, 0.3281, 0.2906]) -Greedy action tensor([-1.7393, -0.0886, 0.5679, -0.0534]) tensor([0.0462, 0.2406, 0.4639, 0.2493]) -Greedy action tensor([ 0.6049, 1.1222, -0.0964, 0.3351]) tensor([0.2540, 0.4261, 0.1260, 0.1939]) -Greedy action tensor([-1.9172, -0.4284, 0.6512, -0.1679]) tensor([0.0413, 0.1829, 0.5384, 0.2374]) -Greedy action tensor([-1.7828, -0.4625, 0.5881, -0.0791]) tensor([0.0477, 0.1788, 0.5112, 0.2623]) -Greedy action tensor([-1.4987, 0.2060, 0.4321, -0.1593]) tensor([0.0581, 0.3195, 0.4006, 0.2218]) -Greedy action tensor([-1.7744, -0.2745, 0.6105, -0.0807]) tensor([0.0459, 0.2058, 0.4986, 0.2498]) -Greedy action tensor([-0.5944, 0.2211, 0.4225, 0.6295]) tensor([0.1061, 0.2398, 0.2933, 0.3608]) -Greedy action tensor([-1.8795, -0.4307, 0.6439, -0.1435]) tensor([0.0427, 0.1819, 0.5329, 0.2425]) -Greedy action tensor([-1.9197, -0.4326, 0.6543, -0.1657]) tensor([0.0411, 0.1819, 0.5394, 0.2376]) -Greedy action tensor([-1.8745, -0.4383, 0.6357, -0.1464]) tensor([0.0432, 0.1817, 0.5318, 0.2433]) -Greedy action tensor([-0.2790, 1.1098, -0.0013, 0.4266]) tensor([0.1197, 0.4800, 0.1580, 0.2424]) -Greedy action tensor([-0.9764, 0.5377, 0.2048, 0.4226]) tensor([0.0778, 0.3536, 0.2535, 0.3151]) -Greedy action tensor([-0.9487, 0.9318, 0.1229, 0.2294]) tensor([0.0729, 0.4777, 0.2128, 0.2367]) -Greedy action tensor([-1.8909, -0.3014, 0.6272, -0.1850]) tensor([0.0420, 0.2058, 0.5209, 0.2312]) -Greedy action tensor([-1.6913, -0.2929, 0.5149, -0.0762]) tensor([0.0522, 0.2113, 0.4740, 0.2625]) -Greedy action tensor([-1.7424, -0.3605, 0.5527, -0.0984]) tensor([0.0498, 0.1983, 0.4942, 0.2577]) -Greedy action tensor([-1.1352, 0.3830, 0.2205, 0.0227]) tensor([0.0792, 0.3615, 0.3072, 0.2521]) -Greedy action tensor([-1.9124, -0.3615, 0.6407, -0.1546]) tensor([0.0410, 0.1936, 0.5273, 0.2381]) -Greedy action tensor([-1.5941, -0.4410, 0.4757, -0.0091]) tensor([0.0589, 0.1867, 0.4669, 0.2875]) -Greedy action tensor([-1.5303, -0.4515, 0.5452, 0.2324]) tensor([0.0564, 0.1658, 0.4492, 0.3286]) -Greedy action tensor([-1.9196, -0.4513, 0.6581, -0.1686]) tensor([0.0412, 0.1789, 0.5425, 0.2374]) -Greedy action tensor([-0.5206, -0.2962, 0.5677, 0.7962]) tensor([0.1117, 0.1398, 0.3317, 0.4168]) -Greedy action tensor([-1.9258, -0.4553, 0.6589, -0.1685]) tensor([0.0410, 0.1783, 0.5432, 0.2375]) -Greedy action tensor([-1.7535, 0.2364, 0.4616, -0.0498]) tensor([0.0435, 0.3184, 0.3989, 0.2392]) -Greedy action tensor([-1.8856, -0.3241, 0.6199, -0.1324]) tensor([0.0420, 0.2003, 0.5149, 0.2427]) -Greedy action tensor([-1.6588, -0.5234, 0.5343, -0.0960]) tensor([0.0560, 0.1744, 0.5022, 0.2674]) -Greedy action tensor([-1.9015, -0.4029, 0.6459, -0.1569]) tensor([0.0417, 0.1867, 0.5328, 0.2388]) -Greedy action tensor([-1.9086, -0.4492, 0.6512, -0.1642]) tensor([0.0417, 0.1796, 0.5398, 0.2388]) -Greedy action tensor([-1.8005, -0.4568, 0.6092, -0.0671]) tensor([0.0462, 0.1773, 0.5147, 0.2618]) -Greedy action tensor([-1.7882, -0.4956, 0.6112, -0.1099]) tensor([0.0476, 0.1733, 0.5242, 0.2549]) -Greedy action tensor([-1.8984, -0.4480, 0.6487, -0.1549]) tensor([0.0421, 0.1796, 0.5376, 0.2407]) -Greedy action tensor([-1.8392, -0.3857, 0.6100, -0.1198]) tensor([0.0446, 0.1907, 0.5160, 0.2487]) -Greedy action tensor([-1.1049, -0.1222, 0.2394, -0.1240]) tensor([0.0983, 0.2626, 0.3770, 0.2621]) -Greedy action tensor([-1.5262, 0.1915, 0.3950, -0.0620]) tensor([0.0564, 0.3143, 0.3853, 0.2440]) -Greedy action tensor([-0.2347, 0.2258, 0.9045, -0.4426]) tensor([0.1533, 0.2430, 0.4791, 0.1246]) -Greedy action tensor([-0.7198, -2.2125, 0.3842, 1.1566]) tensor([0.0928, 0.0209, 0.2800, 0.6063]) -Greedy action tensor([-0.0996, 0.6879, -0.2195, -0.0819]) tensor([0.1960, 0.4307, 0.1738, 0.1995]) -Greedy action tensor([ 1.1204, -0.3744, 0.0467, -0.1629]) tensor([0.5426, 0.1217, 0.1854, 0.1504]) -Greedy action tensor([-0.7164, -0.2195, -0.0778, -1.3670]) tensor([0.1977, 0.3249, 0.3743, 0.1031]) -Greedy action tensor([ 0.9772, -0.1235, 0.6550, -0.1586]) tensor([0.4205, 0.1399, 0.3046, 0.1350]) -Greedy action tensor([-0.6126, -1.3747, 0.8956, 0.2874]) tensor([0.1184, 0.0553, 0.5351, 0.2913]) -Greedy action tensor([ 1.0886, -0.2554, 0.0429, 0.5942]) tensor([0.4500, 0.1174, 0.1581, 0.2745]) -Greedy action tensor([ 0.2772, 0.3229, 0.8182, -0.9377]) tensor([0.2462, 0.2578, 0.4230, 0.0731]) -Greedy action tensor([-0.0289, 0.0057, -0.6580, 1.0872]) tensor([0.1779, 0.1842, 0.0948, 0.5431]) -Greedy action tensor([ 0.4888, -0.3043, -0.9989, 0.5925]) tensor([0.3587, 0.1623, 0.0810, 0.3979]) -Greedy action tensor([ 0.6284, -0.8065, 0.7925, -0.8655]) tensor([0.3786, 0.0902, 0.4462, 0.0850]) -Greedy action tensor([-0.2093, 0.6497, 0.5613, -0.0424]) tensor([0.1492, 0.3522, 0.3224, 0.1763]) -Greedy action tensor([-1.0048, -0.0875, 1.3242, -1.0998]) tensor([0.0681, 0.1705, 0.6994, 0.0620]) -Greedy action tensor([-0.0774, -0.8600, 0.9917, -1.0966]) tensor([0.2114, 0.0966, 0.6157, 0.0763]) -Greedy action tensor([ 0.9746, 0.5889, -0.2210, 0.1715]) tensor([0.4114, 0.2798, 0.1245, 0.1843]) -Greedy action tensor([-1.1111, -0.9801, -0.8017, -0.0425]) tensor([0.1559, 0.1777, 0.2125, 0.4539]) -Greedy action tensor([-1.2968, -0.4680, -0.7230, 0.4148]) tensor([0.0943, 0.2160, 0.1674, 0.5223]) -Greedy action tensor([-1.6513, -0.6558, 0.7546, -0.6146]) tensor([0.0568, 0.1536, 0.6295, 0.1601]) -Greedy action tensor([ 0.0587, -1.5187, -0.4012, -0.8606]) tensor([0.4471, 0.0923, 0.2823, 0.1783]) -Greedy action tensor([ 1.4202, -0.0372, 1.0957, 1.1229]) tensor([0.3706, 0.0863, 0.2679, 0.2753]) -Greedy action tensor([-0.2856, -0.5144, -0.1019, -0.6772]) tensor([0.2722, 0.2166, 0.3272, 0.1840]) -Greedy action tensor([-0.3666, -2.4423, -0.0360, 1.0377]) tensor([0.1518, 0.0190, 0.2112, 0.6180]) -Greedy action tensor([ 0.5501, -0.7564, 0.8642, 0.8052]) tensor([0.2544, 0.0689, 0.3483, 0.3284]) -Greedy action tensor([ 1.3351, -0.7264, 1.0179, 0.2304]) tensor([0.4573, 0.0582, 0.3330, 0.1515]) -Greedy action tensor([-0.4574, -1.0491, -0.3078, 0.0389]) tensor([0.2295, 0.1270, 0.2665, 0.3770]) -Greedy action tensor([ 1.1906, -0.2034, 0.6618, -0.4491]) tensor([0.4923, 0.1221, 0.2901, 0.0955]) -Greedy action tensor([ 0.1562, -1.6699, 0.4711, -0.7454]) tensor([0.3405, 0.0548, 0.4665, 0.1382]) -Greedy action tensor([-0.2086, 0.0890, 0.2246, -0.1752]) tensor([0.2031, 0.2735, 0.3133, 0.2100]) -Greedy action tensor([-0.7425, -1.7539, 0.8733, -0.5584]) tensor([0.1316, 0.0479, 0.6623, 0.1582]) -Greedy action tensor([-0.5781, -0.4815, 0.3652, -0.9118]) tensor([0.1857, 0.2045, 0.4769, 0.1330]) -Greedy action tensor([-0.1162, 0.0084, -0.0763, -0.5962]) tensor([0.2637, 0.2987, 0.2744, 0.1632]) -Greedy action tensor([ 1.5157, -0.3274, 0.0833, 0.8691]) tensor([0.5206, 0.0824, 0.1243, 0.2727]) -Greedy action tensor([-1.2369, 0.2637, 0.5400, -0.6345]) tensor([0.0756, 0.3392, 0.4471, 0.1381]) -Greedy action tensor([-0.9275, -0.7886, -0.9739, 0.7958]) tensor([0.1149, 0.1320, 0.1096, 0.6435]) -Greedy action tensor([-0.2341, 0.0646, -0.0467, -0.6149]) tensor([0.2360, 0.3181, 0.2846, 0.1613]) -Greedy action tensor([ 0.3765, 0.5082, 0.1513, -0.2052]) tensor([0.2859, 0.3261, 0.2282, 0.1598]) -Greedy action tensor([-0.5551, -0.4333, 0.4974, -1.2575]) tensor([0.1821, 0.2057, 0.5219, 0.0902]) -Greedy action tensor([-0.0620, -1.4340, -0.3746, -0.3704]) tensor([0.3677, 0.0932, 0.2690, 0.2701]) -Greedy action tensor([-0.7992, -0.6281, -0.6884, -0.0447]) tensor([0.1841, 0.2185, 0.2057, 0.3916]) -Greedy action tensor([ 1.2286, 0.4485, 0.0748, -0.1616]) tensor([0.4944, 0.2266, 0.1559, 0.1231]) -Greedy action tensor([-0.0026, -0.0253, 0.9014, 0.4057]) tensor([0.1680, 0.1643, 0.4149, 0.2528]) -Greedy action tensor([-0.5585, 0.0077, 0.5929, -1.0313]) tensor([0.1527, 0.2690, 0.4830, 0.0952]) -Greedy action tensor([-0.2703, -1.1990, -0.5127, -0.5627]) tensor([0.3417, 0.1350, 0.2682, 0.2551]) -Greedy action tensor([-0.1114, -0.1883, 1.0951, -1.4177]) tensor([0.1806, 0.1672, 0.6034, 0.0489]) -Greedy action tensor([-0.5461, -0.8777, -0.4291, -0.7518]) tensor([0.2735, 0.1963, 0.3075, 0.2227]) -Greedy action tensor([ 0.4443, -1.2528, 0.5462, -0.5011]) tensor([0.3733, 0.0684, 0.4133, 0.1450]) -Greedy action tensor([-0.5894, 0.4885, -0.0997, -1.2037]) tensor([0.1636, 0.4808, 0.2670, 0.0885]) -Greedy action tensor([-0.2428, -2.2923, -0.1058, 0.1700]) tensor([0.2641, 0.0340, 0.3029, 0.3991]) -Greedy action tensor([-0.1512, -0.6762, -0.4181, -0.2013]) tensor([0.3022, 0.1788, 0.2315, 0.2875]) -Greedy action tensor([-0.0464, 0.2350, -1.0070, -0.2829]) tensor([0.2860, 0.3789, 0.1094, 0.2257]) -Greedy action tensor([ 1.1972, -0.0599, -0.1384, 0.6460]) tensor([0.4709, 0.1340, 0.1238, 0.2713]) -Greedy action tensor([-0.7203, -0.3915, 0.3143, -0.7899]) tensor([0.1630, 0.2264, 0.4586, 0.1520]) -Greedy action tensor([-1.1532, -1.2928, 0.1983, -0.3200]) tensor([0.1245, 0.1083, 0.4809, 0.2864]) -Greedy action tensor([-1.0236, -0.5225, -0.6460, -0.5579]) tensor([0.1754, 0.2894, 0.2558, 0.2794]) -Greedy action tensor([-0.8511, -0.9310, 0.8035, -1.0280]) tensor([0.1251, 0.1155, 0.6545, 0.1048]) -Greedy action tensor([ 0.2799, 0.5578, -0.3791, -0.5740]) tensor([0.3064, 0.4046, 0.1585, 0.1305]) -Greedy action tensor([ 1.0675, -0.4092, 0.1993, 0.4543]) tensor([0.4567, 0.1043, 0.1917, 0.2473]) -Greedy action tensor([ 0.1195, 0.5911, 0.2038, -0.3247]) tensor([0.2308, 0.3700, 0.2512, 0.1481]) -Greedy action tensor([ 0.8614, -0.7466, 0.1736, -1.0597]) tensor([0.5407, 0.1083, 0.2718, 0.0792]) -Greedy action tensor([-1.0278, -1.1271, 0.1092, -0.3933]) tensor([0.1447, 0.1311, 0.4512, 0.2730]) -Greedy action tensor([ 0.0612, -0.0570, -0.3117, -0.3108]) tensor([0.3061, 0.2720, 0.2108, 0.2110]) -Greedy action tensor([-0.4979, 0.0009, -0.6466, -0.6614]) tensor([0.2295, 0.3779, 0.1978, 0.1949]) -Greedy action tensor([ 0.0395, -0.4283, -0.3833, -0.4735]) tensor([0.3472, 0.2175, 0.2275, 0.2079]) -Greedy action tensor([-0.2328, -0.4965, -1.5246, -0.1735]) tensor([0.3222, 0.2475, 0.0885, 0.3418]) -Greedy action tensor([-1.6085, 0.5845, -0.1525, -0.6257]) tensor([0.0591, 0.5296, 0.2534, 0.1579]) -Greedy action tensor([-0.1714, -2.3290, 1.1973, -1.1194]) tensor([0.1840, 0.0213, 0.7234, 0.0713]) -Greedy action tensor([ 1.0048, -0.9900, 0.2570, 1.2151]) tensor([0.3517, 0.0478, 0.1665, 0.4340]) -Greedy action tensor([-0.3140, -1.2805, 0.8192, -0.6735]) tensor([0.1929, 0.0734, 0.5991, 0.1347]) -Greedy action tensor([ 0.1429, -0.1190, 1.0240, -0.8536]) tensor([0.2197, 0.1690, 0.5302, 0.0811]) -Greedy action tensor([-0.0128, 0.1400, -0.1970, -0.3119]) tensor([0.2675, 0.3117, 0.2225, 0.1983]) -Greedy action tensor([ 0.1805, -0.5031, -0.0932, -0.0984]) tensor([0.3309, 0.1670, 0.2517, 0.2504]) -Greedy action tensor([ 0.1525, -1.1058, 0.9742, -0.3409]) tensor([0.2399, 0.0682, 0.5455, 0.1464]) -Greedy action tensor([-0.2395, -1.3585, 0.3993, -0.6605]) tensor([0.2579, 0.0842, 0.4885, 0.1693]) -Greedy action tensor([ 0.3768, -1.3481, -0.0176, -0.1041]) tensor([0.4048, 0.0721, 0.2728, 0.2503]) -Greedy action tensor([ 1.4458, 0.3754, -0.6419, 0.2284]) tensor([0.5673, 0.1945, 0.0703, 0.1679]) -Greedy action tensor([-0.4294, -0.8269, 0.0419, 0.4877]) tensor([0.1731, 0.1163, 0.2774, 0.4332]) -Greedy action tensor([-0.2072, -0.9541, 0.7989, -0.2624]) tensor([0.1940, 0.0919, 0.5305, 0.1836]) -Greedy action tensor([ 0.4890, 0.1217, -0.5620, -0.2852]) tensor([0.3995, 0.2767, 0.1397, 0.1842]) -Greedy action tensor([ 0.4130, 0.0936, -0.6213, 0.6103]) tensor([0.3030, 0.2202, 0.1077, 0.3691]) -Greedy action tensor([-0.5963, -1.4105, 0.6073, -0.3305]) tensor([0.1645, 0.0729, 0.5481, 0.2146]) -Greedy action tensor([ 0.8962, -0.5404, -0.0037, -0.2888]) tensor([0.5128, 0.1219, 0.2085, 0.1568]) -Greedy action tensor([ 0.5293, -0.4282, -0.1462, -0.0587]) tensor([0.4085, 0.1568, 0.2079, 0.2269]) -Greedy action tensor([ 0.6972, -0.4362, -0.1051, 0.0135]) tensor([0.4395, 0.1415, 0.1971, 0.2219]) -Greedy action tensor([ 0.6360, -0.4077, 0.0562, -0.4580]) tensor([0.4450, 0.1567, 0.2492, 0.1490]) -Greedy action tensor([ 0.5799, 0.0115, -0.0219, 0.0686]) tensor([0.3685, 0.2087, 0.2019, 0.2210]) -Greedy action tensor([ 0.9161, -0.5149, -0.0361, -0.4058]) tensor([0.5286, 0.1264, 0.2040, 0.1410]) -Greedy action tensor([ 0.8320, -0.7442, 0.1681, -0.5592]) tensor([0.5075, 0.1049, 0.2613, 0.1263]) -Greedy action tensor([ 1.3940, -0.9679, -0.0948, -0.7905]) tensor([0.6981, 0.0658, 0.1575, 0.0786]) -Greedy action tensor([ 0.6759, -0.2167, -0.0571, -0.1548]) tensor([0.4300, 0.1761, 0.2066, 0.1874]) -Greedy action tensor([ 0.2076, 0.0034, -0.1608, -0.5042]) tensor([0.3336, 0.2720, 0.2308, 0.1637]) -Greedy action tensor([ 1.1607, -0.5921, -0.2508, -0.5370]) tensor([0.6249, 0.1083, 0.1524, 0.1144]) -Greedy action tensor([ 0.7171, -0.5922, -0.0611, -0.3932]) tensor([0.4858, 0.1311, 0.2231, 0.1600]) -Greedy action tensor([ 0.4822, -0.0589, 0.0030, 0.0080]) tensor([0.3541, 0.2061, 0.2193, 0.2204]) -Greedy action tensor([ 0.7265, -0.3995, 0.0354, -0.1834]) tensor([0.4488, 0.1456, 0.2249, 0.1807]) -Greedy action tensor([ 0.5985, -0.2524, -0.0241, -0.2957]) tensor([0.4215, 0.1800, 0.2262, 0.1724]) -Greedy action tensor([ 1.2963, -0.8718, -0.0217, -0.9210]) tensor([0.6707, 0.0767, 0.1795, 0.0730]) -Greedy action tensor([ 0.9310, -0.5861, -0.0493, -0.3019]) tensor([0.5302, 0.1163, 0.1989, 0.1545]) -Greedy action tensor([ 0.2670, -0.0896, -0.1774, 0.0364]) tensor([0.3190, 0.2233, 0.2045, 0.2533]) -Greedy action tensor([ 0.4377, -0.4070, -0.0202, -0.0847]) tensor([0.3766, 0.1618, 0.2382, 0.2234]) -Greedy action tensor([ 0.9290, -0.6144, 0.0842, -0.3178]) tensor([0.5179, 0.1107, 0.2225, 0.1489]) -Greedy action tensor([ 0.8478, -0.3321, -0.1004, -0.3600]) tensor([0.5016, 0.1542, 0.1943, 0.1499]) -Greedy action tensor([0.2092, 0.1903, 0.2561, 0.3347]) tensor([0.2402, 0.2357, 0.2517, 0.2723]) -Greedy action tensor([ 0.5402, -0.0908, -0.1036, -0.0832]) tensor([0.3856, 0.2051, 0.2026, 0.2067]) -Greedy action tensor([ 0.7151, -0.3074, -0.0145, -0.3739]) tensor([0.4591, 0.1651, 0.2213, 0.1545]) -Greedy action tensor([ 0.7379, -0.3792, 0.1219, -0.1856]) tensor([0.4416, 0.1445, 0.2385, 0.1754]) -Greedy action tensor([ 0.4122, -0.1273, 0.0136, -0.0691]) tensor([0.3482, 0.2030, 0.2337, 0.2152]) -Greedy action tensor([ 0.5200, -0.5343, -0.2264, -0.0967]) tensor([0.4233, 0.1475, 0.2007, 0.2285]) -Greedy action tensor([ 0.6751, -0.0684, -0.2371, -0.6000]) tensor([0.4637, 0.2205, 0.1862, 0.1296]) -Greedy action tensor([ 0.7576, -0.3390, -0.0797, -0.1381]) tensor([0.4597, 0.1536, 0.1990, 0.1877]) -Greedy action tensor([ 0.8513, -0.5025, 0.1257, -0.3299]) tensor([0.4880, 0.1260, 0.2362, 0.1498]) -Greedy action tensor([ 1.0926, -0.8556, 0.0715, -0.5853]) tensor([0.5919, 0.0844, 0.2132, 0.1105]) -Greedy action tensor([ 0.2076, -0.2031, 0.0216, -0.1339]) tensor([0.3121, 0.2070, 0.2591, 0.2218]) -Greedy action tensor([ 0.6780, -0.3540, -0.0507, -0.1958]) tensor([0.4432, 0.1579, 0.2139, 0.1850]) -Greedy action tensor([ 0.9206, -0.3496, -0.1188, 0.0015]) tensor([0.4918, 0.1381, 0.1739, 0.1962]) -Greedy action tensor([ 0.7653, -0.4831, -0.1023, -0.4885]) tensor([0.5019, 0.1440, 0.2108, 0.1433]) -Greedy action tensor([ 0.9647, -0.4923, -0.2593, -0.3199]) tensor([0.5544, 0.1291, 0.1630, 0.1534]) -Greedy action tensor([ 1.0387, -0.5944, -0.0440, -0.5410]) tensor([0.5747, 0.1122, 0.1946, 0.1184]) -Greedy action tensor([ 0.6062, -0.6336, 0.0485, -0.1892]) tensor([0.4323, 0.1251, 0.2475, 0.1951]) -Greedy action tensor([ 0.9002, -0.5435, 0.0038, -0.1252]) tensor([0.4993, 0.1179, 0.2037, 0.1791]) -Greedy action tensor([ 0.5415, -0.1809, -0.0485, -0.0813]) tensor([0.3881, 0.1885, 0.2152, 0.2082]) -Greedy action tensor([ 0.8344, -0.5799, -0.1797, -0.3418]) tensor([0.5224, 0.1270, 0.1895, 0.1611]) -Greedy action tensor([ 0.6208, -0.4672, -0.0295, -0.1205]) tensor([0.4282, 0.1443, 0.2235, 0.2040]) -Greedy action tensor([ 1.3210, -0.8528, -0.0272, -0.7392]) tensor([0.6663, 0.0758, 0.1730, 0.0849]) -Greedy action tensor([ 1.0993, -1.0880, -0.0359, -0.5693]) tensor([0.6165, 0.0692, 0.1981, 0.1162]) -Greedy action tensor([ 1.2768, -0.7760, -0.0042, -0.7623]) tensor([0.6509, 0.0836, 0.1808, 0.0847]) -Greedy action tensor([ 7.0731e-01, -3.0497e-02, 9.3547e-02, -6.3656e-04]) tensor([0.3981, 0.1903, 0.2155, 0.1961]) -Greedy action tensor([ 0.6740, -0.3882, -0.0745, -0.2263]) tensor([0.4494, 0.1554, 0.2126, 0.1826]) -Greedy action tensor([ 0.6446, -0.1389, -0.0968, -0.1601]) tensor([0.4201, 0.1919, 0.2001, 0.1879]) -Greedy action tensor([ 1.0126, -0.4254, 0.2774, -0.4116]) tensor([0.5109, 0.1213, 0.2449, 0.1230]) -Greedy action tensor([ 0.5471, -0.0218, -0.1171, -0.0527]) tensor([0.3802, 0.2153, 0.1957, 0.2087]) -Greedy action tensor([ 1.1903, -0.6074, -0.2237, -0.2059]) tensor([0.6037, 0.1000, 0.1468, 0.1494]) -Greedy action tensor([ 0.5397, 0.2494, -0.2040, 0.0315]) tensor([0.3540, 0.2648, 0.1683, 0.2130]) -Greedy action tensor([ 0.4622, -0.0426, -0.1124, -0.0100]) tensor([0.3584, 0.2163, 0.2017, 0.2235]) -Greedy action tensor([ 0.5312, -0.2222, 0.2101, 0.0216]) tensor([0.3576, 0.1683, 0.2593, 0.2148]) -Greedy action tensor([ 0.9231, -0.5552, -0.1080, -0.9254]) tensor([0.5740, 0.1309, 0.2047, 0.0904]) -Greedy action tensor([ 0.6591, -0.4245, -0.0614, -0.1621]) tensor([0.4416, 0.1494, 0.2148, 0.1942]) -Greedy action tensor([ 0.5314, -0.3789, -0.0302, -0.0932]) tensor([0.3987, 0.1604, 0.2274, 0.2135]) -Greedy action tensor([ 0.9183, -0.4193, -0.1048, -0.2977]) tensor([0.5213, 0.1368, 0.1874, 0.1545]) -Greedy action tensor([ 1.0573, -0.6146, -0.0050, -0.2422]) tensor([0.5536, 0.1040, 0.1914, 0.1510]) -Greedy action tensor([ 0.8365, -0.4561, 0.0326, -0.1506]) tensor([0.4774, 0.1311, 0.2137, 0.1779]) -Greedy action tensor([ 0.7020, -0.3689, -0.0928, -0.3142]) tensor([0.4638, 0.1589, 0.2095, 0.1679]) -Greedy action tensor([ 0.7001, -0.4055, -0.0653, -0.3759]) tensor([0.4679, 0.1549, 0.2176, 0.1595]) -Greedy action tensor([ 1.1159, -0.6843, -0.1910, -0.4777]) tensor([0.6101, 0.1008, 0.1651, 0.1240]) -Greedy action tensor([ 0.9416, -0.2843, -0.0179, -0.2924]) tensor([0.5082, 0.1492, 0.1947, 0.1480]) -Greedy action tensor([ 0.9664, -0.6384, -0.0821, -0.3365]) tensor([0.5485, 0.1102, 0.1922, 0.1491]) -Greedy action tensor([ 0.9651, -0.4463, -0.0164, -0.2982]) tensor([0.5260, 0.1282, 0.1971, 0.1487]) -Greedy action tensor([ 0.3862, -0.1035, -0.1642, -0.4561]) tensor([0.3816, 0.2339, 0.2201, 0.1644]) -Greedy action tensor([ 0.5409, -0.0406, -0.1440, -0.0327]) tensor([0.3807, 0.2128, 0.1919, 0.2145]) -Greedy action tensor([ 0.6502, -0.6931, -0.1133, -0.3161]) tensor([0.4745, 0.1238, 0.2211, 0.1805]) -Greedy action tensor([ 0.4806, -0.0456, 0.0785, -0.5909]) tensor([0.3843, 0.2270, 0.2571, 0.1316]) -Greedy action tensor([ 1.0729, -0.7304, 0.0657, -0.4076]) tensor([0.5690, 0.0937, 0.2078, 0.1295]) -Greedy action tensor([ 0.6666, -0.4880, -0.0471, -0.1886]) tensor([0.4484, 0.1413, 0.2196, 0.1906]) -Greedy action tensor([ 0.6977, -0.1402, 0.0848, -0.0884]) tensor([0.4115, 0.1780, 0.2230, 0.1875]) -Greedy action tensor([ 0.9707, -0.6568, -0.1046, -0.1581]) tensor([0.5373, 0.1055, 0.1833, 0.1738]) -Greedy action tensor([ 0.7079, -0.4510, -0.2696, -0.4124]) tensor([0.4960, 0.1556, 0.1866, 0.1618]) -Greedy action tensor([ 0.2035, 0.0882, -0.1811, -0.0568]) tensor([0.2992, 0.2666, 0.2036, 0.2306]) -Greedy action tensor([ 0.6751, -0.4297, -0.2884, -0.3204]) tensor([0.4802, 0.1591, 0.1832, 0.1775]) -Greedy action tensor([ 0.6780, -0.4128, -0.0959, -0.0620]) tensor([0.4397, 0.1477, 0.2028, 0.2098]) -Greedy action tensor([ 0.8007, -0.3764, 0.0056, -0.1419]) tensor([0.4653, 0.1434, 0.2101, 0.1813]) -Greedy action tensor([ 1.0546, -0.7251, 0.0921, -0.3148]) tensor([0.5541, 0.0935, 0.2116, 0.1409]) -Greedy action tensor([ 0.9189, -0.8316, 0.0757, -0.3861]) tensor([0.5333, 0.0926, 0.2295, 0.1446]) -Greedy action tensor([1.8035, 0.0430, 0.1375, 0.5049]) tensor([0.6120, 0.1052, 0.1157, 0.1670]) -Greedy action tensor([ 1.6576, -0.4680, -0.4725, 0.2791]) tensor([0.6711, 0.0801, 0.0797, 0.1691]) -Greedy action tensor([ 1.4171, -0.3293, -0.4487, 0.2500]) tensor([0.6096, 0.1063, 0.0943, 0.1898]) -Greedy action tensor([ 1.3595, -0.3113, -0.2054, 0.2381]) tensor([0.5804, 0.1092, 0.1214, 0.1891]) -Greedy action tensor([ 1.4653, -0.4624, -0.0218, 0.1022]) tensor([0.6145, 0.0894, 0.1389, 0.1572]) -Greedy action tensor([ 2.1819, -1.0855, -0.5734, 0.7759]) tensor([0.7425, 0.0283, 0.0472, 0.1820]) -Greedy action tensor([ 1.9751, -0.4257, -0.4057, 0.2312]) tensor([0.7364, 0.0668, 0.0681, 0.1288]) -Greedy action tensor([ 1.2147, -0.4031, -0.8123, 0.4701]) tensor([0.5540, 0.1099, 0.0730, 0.2631]) -Greedy action tensor([ 1.9058, -0.7504, -0.3385, -0.0669]) tensor([0.7603, 0.0534, 0.0806, 0.1057]) -Greedy action tensor([ 1.3971, -0.4795, -1.1614, 0.4135]) tensor([0.6233, 0.0954, 0.0483, 0.2331]) -Greedy action tensor([ 1.6997, -0.1933, -0.5189, 0.6359]) tensor([0.6232, 0.0939, 0.0678, 0.2151]) -Greedy action tensor([ 1.2214, -0.1248, -0.0829, 0.4824]) tensor([0.4977, 0.1295, 0.1351, 0.2377]) -Greedy action tensor([ 1.1190, -0.0783, -0.2469, 0.2545]) tensor([0.5055, 0.1526, 0.1290, 0.2129]) -Greedy action tensor([ 1.4587, -0.4169, -0.2623, 0.3438]) tensor([0.6024, 0.0923, 0.1078, 0.1975]) -Greedy action tensor([ 1.2588, -0.7210, -0.8084, 0.8986]) tensor([0.5096, 0.0704, 0.0645, 0.3555]) -Greedy action tensor([ 1.6717, -0.7234, 0.0792, 0.4404]) tensor([0.6303, 0.0575, 0.1282, 0.1840]) -Greedy action tensor([ 1.9780, -1.0341, -0.4208, 0.4885]) tensor([0.7323, 0.0360, 0.0665, 0.1651]) -Greedy action tensor([ 1.4758, -0.5682, -0.5945, 0.0728]) tensor([0.6660, 0.0863, 0.0840, 0.1637]) -Greedy action tensor([ 1.5035, -0.4174, -0.4206, 0.1238]) tensor([0.6476, 0.0949, 0.0946, 0.1630]) -Greedy action tensor([ 1.5511, -0.7107, -0.6433, 0.4594]) tensor([0.6446, 0.0671, 0.0718, 0.2164]) -Greedy action tensor([ 1.7032, -0.4878, -0.2847, 0.5513]) tensor([0.6391, 0.0714, 0.0875, 0.2020]) -Greedy action tensor([ 1.7375, -0.1717, -0.6028, 0.3782]) tensor([0.6661, 0.0987, 0.0641, 0.1711]) -Greedy action tensor([ 1.7259, -0.4811, -0.4393, 0.1756]) tensor([0.6959, 0.0766, 0.0798, 0.1477]) -Greedy action tensor([ 1.5388, -1.0848, -0.4025, -0.1959]) tensor([0.7181, 0.0521, 0.1031, 0.1267]) -Greedy action tensor([ 1.0063, -0.3713, -0.2042, 0.4214]) tensor([0.4745, 0.1197, 0.1414, 0.2644]) -Greedy action tensor([ 1.5875, -0.5816, -0.5544, 0.2158]) tensor([0.6732, 0.0769, 0.0791, 0.1708]) -Greedy action tensor([ 1.8720, -0.8660, -0.1195, 0.2763]) tensor([0.7123, 0.0461, 0.0972, 0.1444]) -Greedy action tensor([ 1.6929, -0.5384, 0.2742, 0.0314]) tensor([0.6497, 0.0698, 0.1572, 0.1233]) -Greedy action tensor([ 0.9268, -0.3000, -0.2634, 0.3688]) tensor([0.4609, 0.1351, 0.1402, 0.2638]) -Greedy action tensor([ 1.2604, -0.6720, -0.1786, -0.1897]) tensor([0.6186, 0.0896, 0.1467, 0.1451]) -Greedy action tensor([ 1.3104, -0.5174, -0.2950, 0.2189]) tensor([0.5892, 0.0947, 0.1183, 0.1978]) -Greedy action tensor([ 1.6696, -0.2156, -0.6792, 0.1902]) tensor([0.6779, 0.1029, 0.0647, 0.1544]) -Greedy action tensor([ 1.0960, -0.3784, -0.2529, -0.3805]) tensor([0.5825, 0.1333, 0.1512, 0.1330]) -Greedy action tensor([ 1.6448, 0.2038, -1.0801, 0.3019]) tensor([0.6397, 0.1514, 0.0419, 0.1670]) -Greedy action tensor([ 0.7142, -0.3948, 0.0620, -0.0572]) tensor([0.4323, 0.1426, 0.2252, 0.1999]) -Greedy action tensor([ 0.8237, -0.0382, -0.2495, 0.1728]) tensor([0.4375, 0.1848, 0.1496, 0.2282]) -Greedy action tensor([ 1.4079, -0.6141, -0.7374, 0.1438]) tensor([0.6528, 0.0864, 0.0764, 0.1844]) -Greedy action tensor([ 1.0267, -0.4548, -0.6362, 0.4838]) tensor([0.5005, 0.1138, 0.0949, 0.2908]) -Greedy action tensor([ 1.5805, -0.5760, -0.4277, 0.1109]) tensor([0.6757, 0.0782, 0.0907, 0.1554]) -Greedy action tensor([ 1.7186, -0.6033, -0.4085, 0.2230]) tensor([0.6938, 0.0681, 0.0827, 0.1555]) -Greedy action tensor([ 1.3338, 0.5058, -0.5142, 0.0652]) tensor([0.5331, 0.2329, 0.0840, 0.1499]) -Greedy action tensor([ 1.1989, -0.5860, -0.3488, -0.1263]) tensor([0.6074, 0.1019, 0.1292, 0.1614]) -Greedy action tensor([ 1.6412, -0.0297, -0.2493, -0.2601]) tensor([0.6719, 0.1264, 0.1014, 0.1004]) -Greedy action tensor([ 2.3383, -0.0233, -0.3629, 0.3874]) tensor([0.7671, 0.0723, 0.0515, 0.1090]) -Greedy action tensor([ 1.5802, -0.5878, -0.3812, 0.2392]) tensor([0.6593, 0.0754, 0.0927, 0.1725]) -Greedy action tensor([ 1.4410, 0.1098, -1.0425, 0.5595]) tensor([0.5676, 0.1499, 0.0474, 0.2351]) -Greedy action tensor([ 1.2880, -0.4371, -1.0129, 0.2753]) tensor([0.6092, 0.1085, 0.0610, 0.2213]) -Greedy action tensor([ 1.9686, -0.6930, -0.2337, 0.2943]) tensor([0.7311, 0.0511, 0.0808, 0.1370]) -Greedy action tensor([ 2.2955, -1.5515, 0.4103, -0.1581]) tensor([0.7942, 0.0170, 0.1206, 0.0683]) -Greedy action tensor([ 1.8078, -0.1171, -0.7691, 0.8532]) tensor([0.6223, 0.0908, 0.0473, 0.2396]) -Greedy action tensor([ 1.3961, 0.2575, -0.6515, 0.1557]) tensor([0.5752, 0.1842, 0.0742, 0.1664]) -Greedy action tensor([ 1.6208, -0.4057, -0.3042, -0.0642]) tensor([0.6835, 0.0901, 0.0997, 0.1267]) -Greedy action tensor([ 1.3312, -0.5020, 0.0543, 0.3198]) tensor([0.5548, 0.0887, 0.1547, 0.2018]) -Greedy action tensor([ 1.7608, -0.9229, 0.0111, 0.8230]) tensor([0.6121, 0.0418, 0.1064, 0.2396]) -Greedy action tensor([ 1.8847, -1.0896, -0.2518, 0.2411]) tensor([0.7340, 0.0375, 0.0867, 0.1419]) -Greedy action tensor([ 1.0785e+00, -2.7978e-01, -2.8127e-01, 7.8982e-04]) tensor([0.5393, 0.1387, 0.1385, 0.1836]) -Greedy action tensor([ 1.2279, -0.0184, 0.0687, 0.2761]) tensor([0.5032, 0.1447, 0.1579, 0.1943]) -Greedy action tensor([ 1.5942, -0.1096, -1.2779, -0.0317]) tensor([0.6967, 0.1268, 0.0394, 0.1371]) -Greedy action tensor([ 0.9578, 0.1704, -0.0402, -0.0627]) tensor([0.4579, 0.2083, 0.1688, 0.1650]) -Greedy action tensor([ 1.5275, -0.5805, -0.6395, 0.1753]) tensor([0.6690, 0.0813, 0.0766, 0.1731]) -Greedy action tensor([ 1.2919, -0.2149, -0.1389, -0.4630]) tensor([0.6121, 0.1357, 0.1464, 0.1059]) -Greedy action tensor([ 1.7817, -0.8146, -0.2219, 0.5614]) tensor([0.6647, 0.0495, 0.0896, 0.1962]) -Greedy action tensor([ 1.7009, -0.2301, -0.5759, 0.2382]) tensor([0.6760, 0.0980, 0.0694, 0.1566]) -Greedy action tensor([ 1.1506, -0.2856, -0.9315, 0.4439]) tensor([0.5389, 0.1282, 0.0672, 0.2658]) -Greedy action tensor([ 2.0799, -0.7048, -0.4955, 0.7077]) tensor([0.7187, 0.0444, 0.0547, 0.1822]) -Greedy action tensor([ 1.1530, -0.1351, -0.4426, 0.1103]) tensor([0.5461, 0.1506, 0.1107, 0.1925]) -Greedy action tensor([ 1.8288, -0.6145, -0.5946, 0.6111]) tensor([0.6796, 0.0590, 0.0602, 0.2011]) -Greedy action tensor([ 1.4599, 0.0797, -0.9652, 0.2489]) tensor([0.6105, 0.1536, 0.0540, 0.1819]) -Greedy action tensor([ 1.5320, -0.5871, -0.4246, 0.2300]) tensor([0.6521, 0.0783, 0.0922, 0.1774]) -Greedy action tensor([ 0.9499, -0.2333, 0.0342, 0.0449]) tensor([0.4737, 0.1451, 0.1896, 0.1916]) -Greedy action tensor([ 1.2368, -0.1582, -0.5114, 0.3954]) tensor([0.5397, 0.1337, 0.0939, 0.2326]) -Greedy action tensor([ 0.9959, -0.5400, -0.4287, 0.3438]) tensor([0.5059, 0.1089, 0.1217, 0.2635]) -Greedy action tensor([ 1.2736, -0.4455, 0.1351, 0.2952]) tensor([0.5332, 0.0956, 0.1708, 0.2004]) -Greedy action tensor([ 1.4796, -0.8892, -0.2840, 0.1672]) tensor([0.6518, 0.0610, 0.1117, 0.1755]) -Greedy action tensor([ 1.6383, -0.6443, -0.6108, 0.6139]) tensor([0.6384, 0.0651, 0.0673, 0.2292]) -Greedy action tensor([ 1.3506, -0.4194, -0.7566, 0.2095]) tensor([0.6206, 0.1057, 0.0754, 0.1983]) -Greedy action tensor([ 1.1818, -0.1990, -0.5286, 0.1068]) tensor([0.5639, 0.1417, 0.1019, 0.1924]) -Greedy action tensor([ 2.0139, -1.1419, -0.0914, 0.9360]) tensor([0.6646, 0.0283, 0.0810, 0.2262]) -Greedy action tensor([ 1.3433, -0.2351, -0.9197, 0.2191]) tensor([0.6115, 0.1262, 0.0636, 0.1987]) -Greedy action tensor([ 1.7887, -0.6053, -0.3007, 0.3310]) tensor([0.6907, 0.0630, 0.0855, 0.1608]) -Greedy action tensor([ 1.7897, -0.9184, -0.7557, 0.3142]) tensor([0.7279, 0.0485, 0.0571, 0.1664]) -Greedy action tensor([-1.9388, -0.4377, 0.6633, -0.1773]) tensor([0.0403, 0.1809, 0.5440, 0.2347]) -Greedy action tensor([-1.7837e+00, -3.9916e-01, 6.6537e-01, 1.5470e-03]) tensor([0.0444, 0.1772, 0.5138, 0.2646]) -Greedy action tensor([-0.1126, 1.0736, 0.0075, 0.3564]) tensor([0.1428, 0.4678, 0.1611, 0.2283]) -Greedy action tensor([-1.3885, -0.1176, 0.3918, -0.0604]) tensor([0.0701, 0.2498, 0.4157, 0.2645]) -Greedy action tensor([-1.9120, -0.4226, 0.6497, -0.1630]) tensor([0.0414, 0.1837, 0.5367, 0.2381]) -Greedy action tensor([-1.6782, -0.3692, 0.6355, -0.0395]) tensor([0.0501, 0.1855, 0.5065, 0.2579]) -Greedy action tensor([-1.7192, -0.3165, 0.5402, -0.0495]) tensor([0.0501, 0.2038, 0.4800, 0.2661]) -Greedy action tensor([-1.6332, -0.5535, 0.5112, -0.0029]) tensor([0.0569, 0.1674, 0.4854, 0.2903]) -Greedy action tensor([-1.9234, -0.4322, 0.6658, -0.1516]) tensor([0.0406, 0.1803, 0.5405, 0.2387]) -Greedy action tensor([-1.8659, -0.4354, 0.6280, -0.1417]) tensor([0.0437, 0.1826, 0.5288, 0.2449]) -Greedy action tensor([-1.7301, -0.3871, 0.5159, -0.0603]) tensor([0.0510, 0.1955, 0.4823, 0.2711]) -Greedy action tensor([-1.7697, 0.0767, 0.5008, -0.0703]) tensor([0.0445, 0.2817, 0.4306, 0.2432]) -Greedy action tensor([-1.8787, -0.3770, 0.6384, -0.1430]) tensor([0.0425, 0.1906, 0.5261, 0.2408]) -Greedy action tensor([-0.8925, 0.9598, 0.0982, 0.3182]) tensor([0.0745, 0.4749, 0.2006, 0.2500]) -Greedy action tensor([-1.7234, -0.3099, 0.5296, -0.0729]) tensor([0.0504, 0.2072, 0.4797, 0.2626]) -Greedy action tensor([-1.4122, 0.5930, 0.2281, 0.1873]) tensor([0.0540, 0.4007, 0.2782, 0.2671]) -Greedy action tensor([-1.8500, -0.4399, 0.5957, -0.1249]) tensor([0.0449, 0.1841, 0.5186, 0.2523]) -Greedy action tensor([-1.8881, -0.3830, 0.6608, -0.1380]) tensor([0.0416, 0.1873, 0.5319, 0.2393]) -Greedy action tensor([-1.8984, -0.4391, 0.6439, -0.1597]) tensor([0.0422, 0.1815, 0.5362, 0.2401]) -Greedy action tensor([-1.3816, 0.1938, 0.1004, -0.6561]) tensor([0.0813, 0.3929, 0.3579, 0.1679]) -Greedy action tensor([-1.9106, -0.4174, 0.6554, -0.1627]) tensor([0.0413, 0.1839, 0.5376, 0.2372]) -Greedy action tensor([-1.0960, -0.2111, 0.1822, 0.3071]) tensor([0.0902, 0.2186, 0.3240, 0.3671]) -Greedy action tensor([-1.9162, -0.4247, 0.6485, -0.1705]) tensor([0.0414, 0.1839, 0.5377, 0.2371]) -Greedy action tensor([-1.8642, -0.4393, 0.6260, -0.1249]) tensor([0.0436, 0.1814, 0.5265, 0.2485]) -Greedy action tensor([-1.7178, 0.2348, 0.5192, -0.4451]) tensor([0.0477, 0.3359, 0.4463, 0.1702]) -Greedy action tensor([-1.8419, -0.2578, 0.6016, -0.1019]) tensor([0.0433, 0.2112, 0.4987, 0.2468]) -Greedy action tensor([-1.9159, -0.4394, 0.6534, -0.1666]) tensor([0.0413, 0.1810, 0.5399, 0.2378]) -Greedy action tensor([-1.4391, -0.4674, 0.4719, -0.2104]) tensor([0.0724, 0.1912, 0.4892, 0.2473]) -Greedy action tensor([-1.8545, -0.3969, 0.6408, -0.1166]) tensor([0.0433, 0.1859, 0.5248, 0.2460]) -Greedy action tensor([-1.8967, -0.4536, 0.6396, -0.1646]) tensor([0.0425, 0.1800, 0.5371, 0.2403]) -Greedy action tensor([-1.9156, -0.4265, 0.6511, -0.1671]) tensor([0.0413, 0.1832, 0.5381, 0.2374]) -Greedy action tensor([-1.8364, -0.4637, 0.6225, -0.1269]) tensor([0.0451, 0.1780, 0.5275, 0.2493]) -Greedy action tensor([-1.9337, -0.4461, 0.6691, -0.1651]) tensor([0.0403, 0.1786, 0.5446, 0.2365]) -Greedy action tensor([-1.8654, -0.4560, 0.6317, -0.1464]) tensor([0.0438, 0.1794, 0.5323, 0.2445]) -Greedy action tensor([-1.7727, -0.2481, 0.6023, -0.0686]) tensor([0.0458, 0.2103, 0.4923, 0.2517]) -Greedy action tensor([-1.8358, -0.4574, 0.6193, -0.1257]) tensor([0.0452, 0.1792, 0.5260, 0.2497]) -Greedy action tensor([-7.1625e-01, 5.0572e-01, 1.0633e-01, 1.6731e-04]) tensor([0.1147, 0.3893, 0.2611, 0.2348]) -Greedy action tensor([-1.8916, -0.4515, 0.6454, -0.1558]) tensor([0.0425, 0.1793, 0.5371, 0.2411]) -Greedy action tensor([-1.7862, -0.3875, 0.5813, -0.1091]) tensor([0.0475, 0.1922, 0.5064, 0.2539]) -Greedy action tensor([-1.8973, -0.4214, 0.6432, -0.1570]) tensor([0.0421, 0.1841, 0.5339, 0.2399]) -Greedy action tensor([-0.9078, 0.0465, -0.1179, -0.2530]) tensor([0.1294, 0.3362, 0.2852, 0.2492]) -Greedy action tensor([-1.8712, -0.3916, 0.6544, -0.1287]) tensor([0.0424, 0.1861, 0.5296, 0.2420]) -Greedy action tensor([-1.9006, -0.3834, 0.6397, -0.1557]) tensor([0.0417, 0.1902, 0.5292, 0.2389]) -Greedy action tensor([-1.9348, -0.3993, 0.6531, -0.1743]) tensor([0.0404, 0.1876, 0.5372, 0.2349]) -Greedy action tensor([-1.8121, -0.4183, 0.6005, -0.1447]) tensor([0.0465, 0.1875, 0.5194, 0.2465]) -Greedy action tensor([-0.7807, 0.8022, 0.2261, 0.6326]) tensor([0.0786, 0.3829, 0.2152, 0.3232]) -Greedy action tensor([-1.9161, -0.4499, 0.6581, -0.1671]) tensor([0.0413, 0.1790, 0.5421, 0.2375]) -Greedy action tensor([-1.9382, -0.4359, 0.6622, -0.1760]) tensor([0.0403, 0.1812, 0.5434, 0.2350]) -Greedy action tensor([-1.7040, -0.3941, 0.5479, 0.0200]) tensor([0.0505, 0.1870, 0.4796, 0.2829]) -Greedy action tensor([-1.6598, -0.3572, 0.5270, -0.0863]) tensor([0.0543, 0.1998, 0.4838, 0.2620]) -Greedy action tensor([-1.9015, -0.4412, 0.6495, -0.1551]) tensor([0.0419, 0.1805, 0.5373, 0.2403]) -Greedy action tensor([-0.5234, 0.3875, 0.1082, 0.0898]) tensor([0.1386, 0.3447, 0.2607, 0.2560]) -Greedy action tensor([-1.9308, -0.4582, 0.6671, -0.1691]) tensor([0.0406, 0.1771, 0.5458, 0.2365]) -Greedy action tensor([-1.8605, -0.2343, 0.6017, -0.1371]) tensor([0.0427, 0.2171, 0.5009, 0.2393]) -Greedy action tensor([-1.8889, -0.2643, 0.6109, -0.1478]) tensor([0.0417, 0.2119, 0.5084, 0.2380]) -Greedy action tensor([-1.3306, 0.7627, 0.2315, 0.1909]) tensor([0.0542, 0.4394, 0.2583, 0.2481]) -Greedy action tensor([-1.9102, -0.4305, 0.6515, -0.1592]) tensor([0.0415, 0.1822, 0.5375, 0.2389]) -Greedy action tensor([-0.7108, -0.2519, 0.7079, 0.5671]) tensor([0.0971, 0.1536, 0.4010, 0.3483]) -Greedy action tensor([-1.7603, -0.3449, 0.5507, -0.0698]) tensor([0.0485, 0.1997, 0.4890, 0.2629]) -Greedy action tensor([-1.8049, -0.3753, 0.6059, -0.0898]) tensor([0.0457, 0.1909, 0.5093, 0.2540]) -Greedy action tensor([-1.8841, -0.3665, 0.6320, -0.1479]) tensor([0.0423, 0.1931, 0.5242, 0.2403]) -Greedy action tensor([-1.7976, -0.4028, 0.6679, -0.0495]) tensor([0.0444, 0.1789, 0.5220, 0.2547]) -Greedy action tensor([-1.5386, -0.4363, 0.4824, -0.0739]) tensor([0.0630, 0.1896, 0.4751, 0.2724]) -Greedy action tensor([-1.8537, -0.4346, 0.6198, -0.1451]) tensor([0.0444, 0.1836, 0.5268, 0.2452]) -Greedy action tensor([-1.6128, -0.3626, 0.4766, 0.0053]) tensor([0.0568, 0.1982, 0.4587, 0.2863]) -Greedy action tensor([-1.6674, -0.1789, 0.4978, -0.0806]) tensor([0.0525, 0.2328, 0.4579, 0.2568]) -Greedy action tensor([-1.9009, -0.2916, 0.6247, -0.1505]) tensor([0.0412, 0.2061, 0.5153, 0.2374]) -Greedy action tensor([-1.8491, -0.1535, 0.5890, -0.1229]) tensor([0.0425, 0.2317, 0.4869, 0.2389]) -Greedy action tensor([-1.7622, -0.4920, 0.6840, 0.1196]) tensor([0.0441, 0.1571, 0.5092, 0.2896]) -Greedy action tensor([-1.9357, -0.4541, 0.6604, -0.1776]) tensor([0.0406, 0.1788, 0.5449, 0.2357]) -Greedy action tensor([-1.8666, -0.4535, 0.6272, -0.1493]) tensor([0.0439, 0.1803, 0.5313, 0.2444]) -Greedy action tensor([-1.8978, -0.2288, 0.6095, -0.1601]) tensor([0.0412, 0.2187, 0.5058, 0.2343]) -Greedy action tensor([-1.8035, -0.3602, 0.5919, -0.1521]) tensor([0.0467, 0.1977, 0.5122, 0.2434]) -Greedy action tensor([-1.8416, -0.3596, 0.6039, -0.1379]) tensor([0.0446, 0.1962, 0.5143, 0.2449]) -Greedy action tensor([-1.9052, -0.4387, 0.6474, -0.1643]) tensor([0.0419, 0.1815, 0.5378, 0.2388]) -Greedy action tensor([-0.7788, -0.0434, 0.1747, -0.0086]) tensor([0.1275, 0.2661, 0.3309, 0.2755]) -Greedy action tensor([-1.6146, 0.2316, 0.3954, -0.0269]) tensor([0.0508, 0.3218, 0.3790, 0.2485]) -Greedy action tensor([-1.9467, -0.4461, 0.6659, -0.1819]) tensor([0.0401, 0.1797, 0.5463, 0.2340]) -Greedy action tensor([-1.7643, -0.3879, 0.5674, -0.0626]) tensor([0.0482, 0.1910, 0.4964, 0.2644]) -Greedy action tensor([-0.6595, -0.0990, 0.4139, 0.8120]) tensor([0.0997, 0.1746, 0.2916, 0.4342]) -Greedy action tensor([-1.8464, -0.1845, 0.5589, -0.1002]) tensor([0.0433, 0.2283, 0.4801, 0.2483]) -Greedy action tensor([-0.6776, 0.0066, 0.2633, -1.0274]) tensor([0.1600, 0.3172, 0.4100, 0.1128]) -Greedy action tensor([-0.1407, 0.1258, -0.0079, -0.2079]) tensor([0.2282, 0.2979, 0.2606, 0.2134]) -Greedy action tensor([-0.0793, -0.7030, 0.4290, 0.4910]) tensor([0.2013, 0.1079, 0.3347, 0.3561]) -Greedy action tensor([ 0.6023, -0.1545, 0.2751, 0.2598]) tensor([0.3448, 0.1618, 0.2486, 0.2448]) -Greedy action tensor([ 0.7075, -0.3019, -0.0230, 0.3400]) tensor([0.3939, 0.1436, 0.1897, 0.2728]) -Greedy action tensor([-0.6410, -1.0797, 0.3232, 0.0842]) tensor([0.1579, 0.1018, 0.4141, 0.3261]) -Greedy action tensor([-0.6191, 0.1549, 0.1793, -1.4319]) tensor([0.1714, 0.3717, 0.3809, 0.0760]) -Greedy action tensor([-0.1801, -1.7880, -0.3684, -0.4740]) tensor([0.3605, 0.0722, 0.2986, 0.2687]) -Greedy action tensor([-0.2607, -0.6901, -0.2834, -0.7648]) tensor([0.3094, 0.2014, 0.3024, 0.1869]) -Greedy action tensor([ 0.8033, -1.0715, 0.1544, 0.2136]) tensor([0.4483, 0.0688, 0.2343, 0.2486]) -Greedy action tensor([-0.4903, -0.4019, 0.4138, -0.8274]) tensor([0.1895, 0.2071, 0.4681, 0.1353]) -Greedy action tensor([ 1.6395, -0.3660, 0.0695, 0.4102]) tensor([0.6116, 0.0823, 0.1272, 0.1789]) -Greedy action tensor([-0.5778, -1.0395, 0.1367, -1.3025]) tensor([0.2405, 0.1516, 0.4914, 0.1165]) -Greedy action tensor([ 0.1738, -1.8896, 0.6236, 0.4875]) tensor([0.2461, 0.0313, 0.3859, 0.3368]) -Greedy action tensor([-0.9388, -0.5480, 0.8177, -0.8753]) tensor([0.1071, 0.1583, 0.6204, 0.1141]) -Greedy action tensor([ 0.0077, -0.8003, -1.5873, 0.0700]) tensor([0.3686, 0.1643, 0.0748, 0.3923]) -Greedy action tensor([-0.5640, -0.6766, -0.1267, -0.2755]) tensor([0.2094, 0.1871, 0.3242, 0.2794]) -Greedy action tensor([ 1.4771, -1.4198, 1.4083, 0.1469]) tensor([0.4438, 0.0245, 0.4143, 0.1174]) -Greedy action tensor([ 0.1624, -0.9049, 0.6265, -0.7631]) tensor([0.3002, 0.1033, 0.4775, 0.1190]) -Greedy action tensor([ 0.2010, -0.0027, 0.6247, -0.1014]) tensor([0.2450, 0.1998, 0.3742, 0.1810]) -Greedy action tensor([-0.4282, 0.5635, -0.1394, -0.3578]) tensor([0.1638, 0.4417, 0.2187, 0.1758]) -Greedy action tensor([-1.1400, -1.4780, 0.6271, -0.2556]) tensor([0.1001, 0.0714, 0.5861, 0.2424]) -Greedy action tensor([-0.4649, -1.1507, 0.0240, -0.0684]) tensor([0.2164, 0.1090, 0.3529, 0.3217]) -Greedy action tensor([-1.0862, 0.5660, -0.7160, -0.3506]) tensor([0.1025, 0.5350, 0.1485, 0.2140]) -Greedy action tensor([ 1.0549, -1.0181, 0.1951, 0.3175]) tensor([0.4932, 0.0621, 0.2088, 0.2359]) -Greedy action tensor([ 0.7653, -0.6280, -0.3604, 0.2843]) tensor([0.4565, 0.1133, 0.1481, 0.2822]) -Greedy action tensor([0.4709, 0.0239, 0.1961, 0.2407]) tensor([0.3131, 0.2003, 0.2379, 0.2487]) -Greedy action tensor([0.1244, 0.1632, 0.2332, 0.6146]) tensor([0.2089, 0.2172, 0.2329, 0.3410]) -Greedy action tensor([ 0.6618, 0.9637, 0.3217, -0.1558]) tensor([0.2853, 0.3858, 0.2030, 0.1259]) -Greedy action tensor([-0.2604, -0.7197, 0.3748, -0.6695]) tensor([0.2390, 0.1510, 0.4512, 0.1588]) -Greedy action tensor([ 0.1479, -0.8578, 0.4138, -0.0460]) tensor([0.2862, 0.1047, 0.3734, 0.2357]) -Greedy action tensor([-0.1292, 0.0829, 0.1167, -0.9609]) tensor([0.2531, 0.3129, 0.3237, 0.1102]) -Greedy action tensor([ 0.3541, 0.4842, 0.6968, -0.1604]) tensor([0.2412, 0.2747, 0.3398, 0.1442]) -Greedy action tensor([ 0.2120, -0.1397, 0.4573, 1.0748]) tensor([0.1869, 0.1315, 0.2388, 0.4428]) -Greedy action tensor([-0.0736, -0.0856, 0.0030, -0.1164]) tensor([0.2484, 0.2454, 0.2682, 0.2380]) -Greedy action tensor([-0.4850, 0.0753, -0.2478, -0.8719]) tensor([0.2128, 0.3728, 0.2698, 0.1446]) -Greedy action tensor([ 0.6596, -1.4324, 0.3969, -0.6811]) tensor([0.4642, 0.0573, 0.3570, 0.1215]) -Greedy action tensor([ 0.0314, -1.0200, 0.6970, -0.3132]) tensor([0.2498, 0.0873, 0.4860, 0.1770]) -Greedy action tensor([ 0.8737, -0.5338, 0.5540, -0.1157]) tensor([0.4268, 0.1045, 0.3100, 0.1587]) -Greedy action tensor([-0.5182, -0.4985, -0.3007, -0.2689]) tensor([0.2200, 0.2244, 0.2734, 0.2823]) -Greedy action tensor([ 1.6157, -0.4943, 0.3641, 0.9421]) tensor([0.5216, 0.0632, 0.1492, 0.2660]) -Greedy action tensor([ 0.4320, -0.5407, -0.1051, 0.2161]) tensor([0.3612, 0.1366, 0.2111, 0.2911]) -Greedy action tensor([ 1.3827, -0.4436, -0.5198, 0.9450]) tensor([0.5113, 0.0823, 0.0763, 0.3301]) -Greedy action tensor([-0.1126, -0.3488, 0.4168, -0.2799]) tensor([0.2308, 0.1822, 0.3918, 0.1952]) -Greedy action tensor([-0.6670, -0.6625, -0.1331, -0.6319]) tensor([0.2107, 0.2117, 0.3594, 0.2182]) -Greedy action tensor([-1.1372, -0.9171, 0.0300, -1.2336]) tensor([0.1571, 0.1957, 0.5046, 0.1426]) -Greedy action tensor([-0.8035, -0.8401, 0.3138, -1.1933]) tensor([0.1755, 0.1692, 0.5364, 0.1189]) -Greedy action tensor([-0.2741, -1.6142, -1.2598, -0.2552]) tensor([0.3768, 0.0986, 0.1406, 0.3840]) -Greedy action tensor([ 1.2178, -1.0172, 0.2057, 0.9608]) tensor([0.4457, 0.0477, 0.1620, 0.3447]) -Greedy action tensor([ 1.2910, -0.5281, 0.6281, 0.9086]) tensor([0.4238, 0.0687, 0.2184, 0.2891]) -Greedy action tensor([ 1.7829, -0.4578, 0.3098, 0.8040]) tensor([0.5843, 0.0622, 0.1339, 0.2195]) -Greedy action tensor([0.9761, 0.0040, 0.9875, 0.3804]) tensor([0.3400, 0.1286, 0.3439, 0.1874]) -Greedy action tensor([-0.2023, -0.5422, -0.0680, 0.2863]) tensor([0.2229, 0.1587, 0.2550, 0.3634]) -Greedy action tensor([ 0.1726, -0.9142, 0.8621, 0.4204]) tensor([0.2169, 0.0731, 0.4322, 0.2778]) -Greedy action tensor([-0.3925, -0.2532, -0.2389, -0.4854]) tensor([0.2366, 0.2719, 0.2759, 0.2156]) -Greedy action tensor([ 1.1840, -0.5893, -0.8020, 0.0039]) tensor([0.6195, 0.1052, 0.0850, 0.1903]) -Greedy action tensor([-0.4459, -0.2887, -0.4338, -0.3692]) tensor([0.2346, 0.2746, 0.2375, 0.2533]) -Greedy action tensor([-0.5193, -0.4742, 0.0959, -0.8342]) tensor([0.2162, 0.2261, 0.3999, 0.1578]) -Greedy action tensor([ 0.2465, 0.3714, 0.1669, -0.5265]) tensor([0.2842, 0.3220, 0.2625, 0.1312]) -Greedy action tensor([ 0.3313, -0.3592, -0.5570, -0.1111]) tensor([0.3914, 0.1962, 0.1610, 0.2514]) -Greedy action tensor([-0.3314, -0.6215, 0.5600, -1.1222]) tensor([0.2155, 0.1612, 0.5255, 0.0977]) -Greedy action tensor([0.7547, 0.0158, 0.0767, 0.1816]) tensor([0.3923, 0.1874, 0.1991, 0.2212]) -Greedy action tensor([ 0.1820, -0.6274, -0.1619, 0.3340]) tensor([0.3014, 0.1341, 0.2137, 0.3508]) -Greedy action tensor([ 0.3608, -1.5019, -0.8741, -0.5242]) tensor([0.5380, 0.0835, 0.1565, 0.2220]) -Greedy action tensor([-0.8843, 0.0901, 0.0602, -0.3727]) tensor([0.1268, 0.3359, 0.3260, 0.2114]) -Greedy action tensor([ 0.7082, 0.3357, 0.1317, -0.6648]) tensor([0.3993, 0.2751, 0.2244, 0.1012]) -Greedy action tensor([-0.0369, -0.4269, -0.7270, -0.4276]) tensor([0.3502, 0.2371, 0.1757, 0.2370]) -Greedy action tensor([ 0.9074, -0.2420, 0.5837, -0.2191]) tensor([0.4229, 0.1340, 0.3060, 0.1371]) -Greedy action tensor([-0.1247, -1.2594, -0.3728, 0.9871]) tensor([0.1945, 0.0625, 0.1518, 0.5912]) -Greedy action tensor([-0.2646, 0.5789, 0.4878, -0.7713]) tensor([0.1653, 0.3843, 0.3508, 0.0996]) -Greedy action tensor([-0.0581, -0.4375, -0.3426, -0.8791]) tensor([0.3476, 0.2379, 0.2616, 0.1530]) -Greedy action tensor([-0.2580, 0.9450, -0.5962, -1.3539]) tensor([0.1860, 0.6193, 0.1326, 0.0622]) -Greedy action tensor([ 0.7931, 0.2202, -0.0678, -0.1483]) tensor([0.4207, 0.2373, 0.1779, 0.1641]) -Greedy action tensor([ 0.9669, -1.1853, -0.9243, 1.4056]) tensor([0.3549, 0.0412, 0.0536, 0.5503]) -Greedy action tensor([ 2.0327, -0.8015, 0.9772, 1.0434]) tensor([0.5622, 0.0330, 0.1957, 0.2091]) -Greedy action tensor([-0.2450, 0.6155, 0.3146, 0.1325]) tensor([0.1521, 0.3597, 0.2662, 0.2219]) -Greedy action tensor([ 0.9717, -0.9083, -0.4884, 0.7767]) tensor([0.4530, 0.0691, 0.1052, 0.3727]) -Greedy action tensor([-0.3365, -0.1081, -0.8206, -0.4174]) tensor([0.2635, 0.3311, 0.1624, 0.2430]) -Greedy action tensor([ 0.9771, -0.9598, -1.1769, 0.1610]) tensor([0.5874, 0.0847, 0.0682, 0.2597]) -Greedy action tensor([-0.7008, -0.3804, 0.0718, -0.2758]) tensor([0.1647, 0.2269, 0.3566, 0.2519]) -Greedy action tensor([-0.8058, 0.0331, -1.5543, -0.1482]) tensor([0.1749, 0.4047, 0.0828, 0.3376]) -Greedy action tensor([ 0.0367, -1.0147, -0.3376, -1.0374]) tensor([0.4204, 0.1469, 0.2891, 0.1436]) -Greedy action tensor([ 0.6491, -0.4374, -0.1378, -0.2811]) tensor([0.4572, 0.1543, 0.2081, 0.1804]) -Greedy action tensor([ 0.9712, -0.7186, 0.1763, -0.4068]) tensor([0.5296, 0.0977, 0.2392, 0.1335]) -Greedy action tensor([ 0.7310, -0.5583, 0.1307, -0.2538]) tensor([0.4550, 0.1253, 0.2496, 0.1700]) -Greedy action tensor([ 0.7300, -0.4728, -0.0852, -0.3338]) tensor([0.4789, 0.1438, 0.2119, 0.1653]) -Greedy action tensor([ 0.5635, -0.4117, -0.3297, -0.1861]) tensor([0.4427, 0.1669, 0.1812, 0.2092]) -Greedy action tensor([ 0.7364, -0.2182, 0.1288, -0.1585]) tensor([0.4276, 0.1646, 0.2329, 0.1748]) -Greedy action tensor([ 0.8880, -0.0749, 0.0109, -0.2847]) tensor([0.4746, 0.1812, 0.1974, 0.1469]) -Greedy action tensor([ 0.8801, -0.4501, -0.0780, -0.5041]) tensor([0.5267, 0.1393, 0.2020, 0.1320]) -Greedy action tensor([ 0.4699, -0.3003, 0.0220, -0.2247]) tensor([0.3845, 0.1780, 0.2456, 0.1919]) -Greedy action tensor([0.4866, 0.0497, 0.0008, 0.1009]) tensor([0.3400, 0.2197, 0.2092, 0.2312]) -Greedy action tensor([ 0.8215, -0.4520, -0.1509, -0.3425]) tensor([0.5075, 0.1420, 0.1919, 0.1585]) -Greedy action tensor([ 0.4885, 0.1802, 0.0297, -0.0996]) tensor([0.3422, 0.2514, 0.2163, 0.1901]) -Greedy action tensor([ 1.0109, -0.8608, -0.0216, -0.5272]) tensor([0.5798, 0.0892, 0.2065, 0.1245]) -Greedy action tensor([ 0.3459, 0.0053, -0.0446, -0.0502]) tensor([0.3267, 0.2324, 0.2211, 0.2198]) -Greedy action tensor([ 1.1403, -0.1060, -0.1726, -0.2419]) tensor([0.5532, 0.1591, 0.1488, 0.1389]) -Greedy action tensor([ 1.1210, -0.3936, -0.1494, -0.5021]) tensor([0.5890, 0.1295, 0.1653, 0.1162]) -Greedy action tensor([ 1.3016, -0.8691, -0.1677, -0.5055]) tensor([0.6630, 0.0756, 0.1526, 0.1088]) -Greedy action tensor([ 1.0517, -0.8415, 0.0912, -0.5234]) tensor([0.5746, 0.0865, 0.2199, 0.1189]) -Greedy action tensor([ 0.8223, -0.6360, -0.0899, -0.6075]) tensor([0.5337, 0.1242, 0.2144, 0.1278]) -Greedy action tensor([ 1.2873, -0.7512, -0.1324, -0.6037]) tensor([0.6566, 0.0855, 0.1588, 0.0991]) -Greedy action tensor([ 0.4742, -0.4421, 0.0615, -0.3385]) tensor([0.3991, 0.1597, 0.2642, 0.1771]) -Greedy action tensor([ 0.6404, -0.5570, -0.0141, -0.3690]) tensor([0.4574, 0.1381, 0.2377, 0.1667]) -Greedy action tensor([ 0.3888, -0.2518, -0.0233, -0.0801]) tensor([0.3552, 0.1872, 0.2353, 0.2223]) -Greedy action tensor([ 0.4121, -0.5294, -0.1820, -0.1579]) tensor([0.3988, 0.1555, 0.2202, 0.2255]) -Greedy action tensor([ 1.0912, -0.7454, 0.0189, -0.6046]) tensor([0.5935, 0.0946, 0.2031, 0.1089]) -Greedy action tensor([ 0.6690, 0.1083, -0.0720, 0.1558]) tensor([0.3779, 0.2157, 0.1801, 0.2262]) -Greedy action tensor([ 0.7507, -0.1402, 0.0716, -0.3941]) tensor([0.4473, 0.1835, 0.2268, 0.1424]) -Greedy action tensor([ 0.6877, 0.3623, -0.2473, 0.1985]) tensor([0.3666, 0.2647, 0.1439, 0.2248]) -Greedy action tensor([ 0.5622, 0.5868, -0.3937, 0.3096]) tensor([0.3139, 0.3217, 0.1207, 0.2438]) -Greedy action tensor([ 1.1602, -0.8898, 0.0652, -0.8983]) tensor([0.6286, 0.0809, 0.2103, 0.0802]) -Greedy action tensor([ 1.3451, -0.8774, 0.0356, -0.6975]) tensor([0.6631, 0.0718, 0.1790, 0.0860]) -Greedy action tensor([ 0.8995, -0.3040, -0.0234, -0.5411]) tensor([0.5170, 0.1552, 0.2054, 0.1224]) -Greedy action tensor([ 1.5510, -1.0104, 0.0202, -0.9346]) tensor([0.7263, 0.0561, 0.1571, 0.0605]) -Greedy action tensor([ 0.8216, -0.1526, 0.0201, 0.0201]) tensor([0.4396, 0.1659, 0.1972, 0.1972]) -Greedy action tensor([ 0.5463, 0.2631, -0.2671, 0.0674]) tensor([0.3551, 0.2675, 0.1574, 0.2200]) -Greedy action tensor([ 0.8457, -0.7356, -0.0292, -0.3266]) tensor([0.5175, 0.1065, 0.2158, 0.1602]) -Greedy action tensor([ 0.5370, 0.1033, -0.1100, 0.0281]) tensor([0.3606, 0.2337, 0.1888, 0.2168]) -Greedy action tensor([ 0.6060, -0.2932, -0.0980, -0.1851]) tensor([0.4247, 0.1728, 0.2100, 0.1925]) -Greedy action tensor([ 0.4544, -0.3592, -0.0152, -0.0804]) tensor([0.3768, 0.1670, 0.2356, 0.2207]) -Greedy action tensor([ 0.1873, -0.0049, -0.0541, -0.4152]) tensor([0.3166, 0.2613, 0.2487, 0.1734]) -Greedy action tensor([ 0.3239, -0.0793, -0.1434, 0.0009]) tensor([0.3312, 0.2213, 0.2076, 0.2398]) -Greedy action tensor([ 0.7755, -0.4937, -0.0995, -0.2805]) tensor([0.4888, 0.1374, 0.2038, 0.1700]) -Greedy action tensor([ 1.1498, -0.7508, -0.0779, -0.4724]) tensor([0.6098, 0.0911, 0.1786, 0.1204]) -Greedy action tensor([ 1.3907, -0.9571, 0.0606, -0.9273]) tensor([0.6856, 0.0655, 0.1813, 0.0675]) -Greedy action tensor([ 0.5439, -0.2302, 0.0531, -0.1358]) tensor([0.3876, 0.1787, 0.2373, 0.1964]) -Greedy action tensor([ 1.0993, -0.6008, 0.1551, -0.6053]) tensor([0.5703, 0.1042, 0.2218, 0.1037]) -Greedy action tensor([ 0.7037, -0.3712, -0.0775, -0.3945]) tensor([0.4689, 0.1601, 0.2147, 0.1564]) -Greedy action tensor([ 0.7660, -0.3892, -0.0147, -0.3288]) tensor([0.4745, 0.1495, 0.2173, 0.1588]) -Greedy action tensor([ 1.5187, -0.8574, -0.0335, -0.6313]) tensor([0.7036, 0.0654, 0.1490, 0.0820]) -Greedy action tensor([ 0.9816, 0.1054, -0.0192, 0.0336]) tensor([0.4605, 0.1917, 0.1693, 0.1785]) -Greedy action tensor([ 1.3730, -0.9879, 0.0313, -0.4094]) tensor([0.6562, 0.0619, 0.1715, 0.1104]) -Greedy action tensor([ 0.9058, -0.7941, 0.1109, -0.8311]) tensor([0.5524, 0.1009, 0.2495, 0.0973]) -Greedy action tensor([ 0.9457, -0.5954, -0.0095, -0.3272]) tensor([0.5322, 0.1140, 0.2048, 0.1490]) -Greedy action tensor([ 0.7699, -0.3662, -0.0442, -0.1584]) tensor([0.4631, 0.1487, 0.2052, 0.1830]) -Greedy action tensor([ 0.6370, -0.4415, -0.0767, -0.2427]) tensor([0.4455, 0.1515, 0.2182, 0.1848]) -Greedy action tensor([0.5432, 0.0893, 0.0078, 0.0191]) tensor([0.3555, 0.2258, 0.2081, 0.2105]) -Greedy action tensor([ 0.6459, -0.3243, -0.0225, -0.0621]) tensor([0.4194, 0.1590, 0.2150, 0.2066]) -Greedy action tensor([ 1.1393, -0.7447, 0.1146, -0.6885]) tensor([0.5982, 0.0909, 0.2147, 0.0962]) -Greedy action tensor([ 1.1715, -0.6134, -0.1071, -0.4064]) tensor([0.6051, 0.1015, 0.1685, 0.1249]) -Greedy action tensor([ 0.6321, -0.3570, -0.0031, -0.1095]) tensor([0.4205, 0.1564, 0.2228, 0.2003]) -Greedy action tensor([ 1.0241e+00, -5.4184e-01, 9.9516e-04, -4.4012e-01]) tensor([0.5557, 0.1161, 0.1998, 0.1285]) -Greedy action tensor([ 0.4507, -0.2412, -0.0794, -0.2560]) tensor([0.3872, 0.1939, 0.2279, 0.1910]) -Greedy action tensor([ 0.3610, -0.0494, -0.0826, -0.0671]) tensor([0.3382, 0.2244, 0.2170, 0.2204]) -Greedy action tensor([ 0.6994, -0.3330, -0.0698, -0.6998]) tensor([0.4840, 0.1724, 0.2242, 0.1194]) -Greedy action tensor([ 0.7517, -0.2569, -0.0874, -0.0650]) tensor([0.4467, 0.1629, 0.1930, 0.1974]) -Greedy action tensor([ 0.9348, -0.0572, -0.0693, -0.6379]) tensor([0.5142, 0.1907, 0.1884, 0.1067]) -Greedy action tensor([ 0.4791, -0.0429, -0.1079, 0.0092]) tensor([0.3604, 0.2139, 0.2004, 0.2253]) -Greedy action tensor([ 0.7070, 0.2659, 0.0351, -0.2650]) tensor([0.3949, 0.2540, 0.2017, 0.1494]) -Greedy action tensor([ 0.9526, -0.4190, -0.0512, -0.2849]) tensor([0.5235, 0.1328, 0.1919, 0.1519]) -Greedy action tensor([ 0.9831, -1.3179, 0.1793, -0.7276]) tensor([0.5785, 0.0579, 0.2590, 0.1046]) -Greedy action tensor([ 0.8615, -0.6531, -0.0383, -0.4093]) tensor([0.5243, 0.1153, 0.2132, 0.1471]) -Greedy action tensor([ 0.4330, -0.1271, -0.0797, -0.2990]) tensor([0.3772, 0.2154, 0.2259, 0.1814]) -Greedy action tensor([ 0.9245, -0.3301, -0.1996, -0.4041]) tensor([0.5334, 0.1521, 0.1733, 0.1413]) -Greedy action tensor([ 1.1649, -0.1140, 0.1336, -0.2462]) tensor([0.5323, 0.1481, 0.1898, 0.1298]) -Greedy action tensor([ 0.8866, -0.1397, -0.0702, -0.5108]) tensor([0.5026, 0.1801, 0.1931, 0.1243]) -Greedy action tensor([ 0.0597, 0.3418, -0.0833, -0.0190]) tensor([0.2429, 0.3221, 0.2105, 0.2245]) -Greedy action tensor([ 0.6883, -0.3085, -0.0782, -0.2146]) tensor([0.4466, 0.1648, 0.2075, 0.1811]) -Greedy action tensor([ 0.7576, -0.6012, -0.0426, -0.4801]) tensor([0.5009, 0.1287, 0.2250, 0.1453]) -Greedy action tensor([ 0.4514, -0.3636, -0.0249, -0.1906]) tensor([0.3861, 0.1709, 0.2398, 0.2032]) -Greedy action tensor([ 0.9712, -0.5497, -0.0527, -0.2979]) tensor([0.5380, 0.1176, 0.1932, 0.1512]) -Greedy action tensor([ 0.3412, 0.1810, -0.0860, -0.1122]) tensor([0.3185, 0.2713, 0.2078, 0.2024]) -Greedy action tensor([ 1.4301, -0.6369, -0.7207, 0.1801]) tensor([0.6538, 0.0828, 0.0761, 0.1873]) -Greedy action tensor([ 0.9357, -0.2915, -0.8591, 0.1878]) tensor([0.5174, 0.1517, 0.0860, 0.2449]) -Greedy action tensor([ 1.5217, -0.2939, -0.3414, 0.1894]) tensor([0.6322, 0.1029, 0.0981, 0.1668]) -Greedy action tensor([ 1.4852, -0.4406, -0.7279, 0.3847]) tensor([0.6298, 0.0918, 0.0689, 0.2095]) -Greedy action tensor([ 2.3401, -0.1544, -0.5976, 0.1945]) tensor([0.7984, 0.0659, 0.0423, 0.0934]) -Greedy action tensor([ 1.4556, 0.0518, -0.8408, -0.0019]) tensor([0.6333, 0.1556, 0.0637, 0.1474]) -Greedy action tensor([ 1.3905, -0.4198, -0.8396, 0.4767]) tensor([0.5980, 0.0978, 0.0643, 0.2398]) -Greedy action tensor([ 1.7666, -0.8241, -0.3524, 0.5324]) tensor([0.6729, 0.0504, 0.0808, 0.1958]) -Greedy action tensor([ 1.1366, -0.1728, -0.5707, 0.2634]) tensor([0.5351, 0.1445, 0.0970, 0.2234]) -Greedy action tensor([ 1.4881, -0.2351, -0.4902, 0.1358]) tensor([0.6347, 0.1133, 0.0878, 0.1642]) -Greedy action tensor([ 1.8959, -0.9111, -0.1558, 0.3593]) tensor([0.7122, 0.0430, 0.0915, 0.1532]) -Greedy action tensor([ 1.7114, -0.2231, -0.5483, 0.6881]) tensor([0.6218, 0.0898, 0.0649, 0.2235]) -Greedy action tensor([ 1.4906, -0.4696, -0.6218, 0.0404]) tensor([0.6683, 0.0941, 0.0808, 0.1567]) -Greedy action tensor([ 1.4130, 0.3574, -0.3906, 0.1132]) tensor([0.5601, 0.1949, 0.0923, 0.1527]) -Greedy action tensor([ 1.6114, -0.9531, -0.7495, 0.0576]) tensor([0.7232, 0.0557, 0.0682, 0.1529]) -Greedy action tensor([ 1.3553, -0.6577, -1.1127, 0.2640]) tensor([0.6435, 0.0860, 0.0545, 0.2160]) -Greedy action tensor([ 1.6210, -0.5780, -0.3654, 0.2369]) tensor([0.6673, 0.0740, 0.0915, 0.1672]) -Greedy action tensor([ 1.4013, -0.5631, -0.3121, 0.0539]) tensor([0.6328, 0.0887, 0.1140, 0.1645]) -Greedy action tensor([ 1.3865, -0.4740, -0.4531, 0.4308]) tensor([0.5886, 0.0916, 0.0935, 0.2263]) -Greedy action tensor([ 1.9492, -1.5377, 0.1149, -0.1530]) tensor([0.7619, 0.0233, 0.1217, 0.0931]) -Greedy action tensor([ 1.6296, -1.0131, -0.7277, 0.1722]) tensor([0.7150, 0.0509, 0.0677, 0.1665]) -Greedy action tensor([ 1.3873, -0.6191, -0.5990, 0.0956]) tensor([0.6466, 0.0870, 0.0887, 0.1777]) -Greedy action tensor([ 1.3756, -0.1492, -0.4558, -0.1955]) tensor([0.6307, 0.1373, 0.1010, 0.1311]) -Greedy action tensor([ 1.7197, -0.6435, -0.3267, 0.8106]) tensor([0.6149, 0.0579, 0.0794, 0.2478]) -Greedy action tensor([ 2.0481, -0.9231, -0.3464, 0.3616]) tensor([0.7532, 0.0386, 0.0687, 0.1395]) -Greedy action tensor([ 1.5314, -0.4492, -0.1491, 0.1846]) tensor([0.6312, 0.0871, 0.1176, 0.1642]) -Greedy action tensor([ 1.2612, -0.4907, -0.5212, 0.5085]) tensor([0.5516, 0.0957, 0.0928, 0.2599]) -Greedy action tensor([ 1.3617, -0.8188, -0.1327, 0.6394]) tensor([0.5486, 0.0620, 0.1231, 0.2664]) -Greedy action tensor([ 1.0938, -0.3817, -0.2522, 0.2511]) tensor([0.5210, 0.1191, 0.1356, 0.2243]) -Greedy action tensor([ 1.0980, 0.0523, -0.4428, 0.0814]) tensor([0.5188, 0.1823, 0.1111, 0.1877]) -Greedy action tensor([ 1.8114, -0.8907, -0.5716, 0.4822]) tensor([0.7022, 0.0471, 0.0648, 0.1859]) -Greedy action tensor([ 1.5962, 0.0878, -0.2872, 0.5756]) tensor([0.5768, 0.1276, 0.0877, 0.2079]) -Greedy action tensor([ 2.0184, -0.3652, -0.4894, 0.4999]) tensor([0.7180, 0.0662, 0.0585, 0.1573]) -Greedy action tensor([ 1.1566, -0.3327, -0.2748, 0.1149]) tensor([0.5502, 0.1241, 0.1315, 0.1942]) -Greedy action tensor([ 2.0624, -0.1715, -0.6648, 0.6866]) tensor([0.7017, 0.0752, 0.0459, 0.1773]) -Greedy action tensor([ 2.3537, -0.9538, 0.0836, 1.2180]) tensor([0.6844, 0.0251, 0.0707, 0.2198]) -Greedy action tensor([ 1.5085, -0.3721, -0.1287, 0.2458]) tensor([0.6135, 0.0936, 0.1193, 0.1736]) -Greedy action tensor([ 1.1440, -0.0229, -0.8601, 0.2421]) tensor([0.5400, 0.1681, 0.0728, 0.2191]) -Greedy action tensor([ 1.6962, -0.7255, -1.0549, 0.5351]) tensor([0.6822, 0.0606, 0.0436, 0.2136]) -Greedy action tensor([ 1.1919, -0.5154, -0.3491, 0.2803]) tensor([0.5564, 0.1009, 0.1191, 0.2236]) -Greedy action tensor([ 1.6360, -0.5773, -0.4520, 0.1057]) tensor([0.6898, 0.0754, 0.0855, 0.1493]) -Greedy action tensor([ 1.7028, -0.4367, -0.9511, 0.3583]) tensor([0.6902, 0.0813, 0.0486, 0.1799]) -Greedy action tensor([ 1.3150, -0.8711, -0.1364, 0.3908]) tensor([0.5736, 0.0644, 0.1344, 0.2276]) -Greedy action tensor([ 1.4445, -0.6952, -0.2369, 0.1123]) tensor([0.6379, 0.0751, 0.1187, 0.1683]) -Greedy action tensor([ 1.4911, -0.2364, -0.2114, 0.2935]) tensor([0.6017, 0.1069, 0.1097, 0.1817]) -Greedy action tensor([ 1.5267, -0.6158, 0.2564, 0.5154]) tensor([0.5676, 0.0666, 0.1593, 0.2064]) -Greedy action tensor([ 1.7703e+00, -1.2492e+00, 7.5397e-04, 1.9974e-01]) tensor([0.7007, 0.0342, 0.1194, 0.1457]) -Greedy action tensor([ 1.0902, -0.3731, -0.5306, 0.3682]) tensor([0.5222, 0.1209, 0.1033, 0.2537]) -Greedy action tensor([ 0.7391, -0.1581, -0.0209, 0.2067]) tensor([0.4061, 0.1656, 0.1899, 0.2385]) -Greedy action tensor([ 1.3790, -0.1159, -0.2829, 0.1150]) tensor([0.5894, 0.1322, 0.1119, 0.1665]) -Greedy action tensor([1.9938, 0.5454, 0.0306, 0.0652]) tensor([0.6576, 0.1545, 0.0923, 0.0956]) -Greedy action tensor([2.4505, 0.5784, 0.2551, 0.0430]) tensor([0.7379, 0.1135, 0.0821, 0.0664]) -Greedy action tensor([ 1.1423, -0.1874, -1.1216, 0.4865]) tensor([0.5298, 0.1402, 0.0551, 0.2750]) -Greedy action tensor([ 1.2289, -0.2874, -0.4453, 0.0829]) tensor([0.5798, 0.1273, 0.1087, 0.1843]) -Greedy action tensor([ 1.4121, -0.6423, -0.4400, 0.0997]) tensor([0.6434, 0.0825, 0.1010, 0.1732]) -Greedy action tensor([ 1.6942, -0.5381, -0.5117, 0.6203]) tensor([0.6414, 0.0688, 0.0706, 0.2191]) -Greedy action tensor([ 1.1960, -0.9217, -0.3506, 0.5673]) tensor([0.5357, 0.0645, 0.1141, 0.2857]) -Greedy action tensor([ 1.7147, -0.5499, -0.5753, 0.2853]) tensor([0.6922, 0.0719, 0.0701, 0.1658]) -Greedy action tensor([ 1.3344, -0.8353, -0.1904, 0.1751]) tensor([0.6077, 0.0694, 0.1323, 0.1906]) -Greedy action tensor([ 1.2421, -0.3436, -0.3500, 0.0743]) tensor([0.5816, 0.1191, 0.1184, 0.1809]) -Greedy action tensor([ 1.4213, -0.5617, -0.3821, 0.4928]) tensor([0.5891, 0.0811, 0.0970, 0.2328]) -Greedy action tensor([ 1.4141, -0.2413, -0.5730, 0.0605]) tensor([0.6304, 0.1204, 0.0864, 0.1628]) -Greedy action tensor([ 1.7703, -0.6591, -0.0503, 0.2053]) tensor([0.6854, 0.0604, 0.1110, 0.1433]) -Greedy action tensor([ 1.6220, -0.2522, -0.4894, 0.2320]) tensor([0.6563, 0.1007, 0.0795, 0.1635]) -Greedy action tensor([ 1.3862, 0.1086, -0.5657, 0.3991]) tensor([0.5576, 0.1554, 0.0792, 0.2078]) -Greedy action tensor([ 1.6878, -0.7399, -0.1654, 0.5113]) tensor([0.6438, 0.0568, 0.1009, 0.1985]) -Greedy action tensor([ 1.2799, -0.4238, -0.1789, 0.2492]) tensor([0.5646, 0.1028, 0.1313, 0.2014]) -Greedy action tensor([ 1.4869, -0.9262, -0.3414, 0.6173]) tensor([0.5990, 0.0536, 0.0963, 0.2511]) -Greedy action tensor([ 1.3434, -0.3574, -0.4149, 0.2547]) tensor([0.5912, 0.1079, 0.1019, 0.1990]) -Greedy action tensor([ 2.3746, -1.0349, -0.5254, 0.9192]) tensor([0.7568, 0.0250, 0.0416, 0.1766]) -Greedy action tensor([ 1.5220, -0.6403, -0.4462, 0.2178]) tensor([0.6552, 0.0754, 0.0915, 0.1778]) -Greedy action tensor([ 1.1566, 0.1521, -0.5890, -0.0015]) tensor([0.5391, 0.1974, 0.0941, 0.1693]) -Greedy action tensor([ 1.1266, -0.5805, 0.0922, -0.6144]) tensor([0.5841, 0.1059, 0.2076, 0.1024]) -Greedy action tensor([ 1.6971, -0.1760, -0.6643, 0.2901]) tensor([0.6699, 0.1029, 0.0632, 0.1640]) -Greedy action tensor([ 1.6900, -0.7773, -0.2110, 0.4566]) tensor([0.6555, 0.0556, 0.0979, 0.1910]) -Greedy action tensor([ 2.2836, -1.5784, -0.1651, 0.3880]) tensor([0.7951, 0.0167, 0.0687, 0.1195]) -Greedy action tensor([ 1.4752, -0.7225, -0.1184, 0.6557]) tensor([0.5698, 0.0633, 0.1158, 0.2511]) -Greedy action tensor([ 1.9442, -0.9369, -0.2829, 0.4024]) tensor([0.7257, 0.0407, 0.0783, 0.1553]) -Greedy action tensor([ 1.7740, -0.0584, -0.9468, 0.0446]) tensor([0.7126, 0.1140, 0.0469, 0.1264]) -Greedy action tensor([ 1.4079, -0.3442, -0.3374, 0.6346]) tensor([0.5526, 0.0958, 0.0965, 0.2550]) -Greedy action tensor([ 1.4115, -0.8460, -0.3543, 0.0780]) tensor([0.6497, 0.0680, 0.1111, 0.1712]) -Greedy action tensor([-1.7065, -0.3227, 0.6585, -0.0319]) tensor([0.0477, 0.1903, 0.5076, 0.2545]) -Greedy action tensor([-1.9308, -0.4324, 0.6613, -0.1729]) tensor([0.0406, 0.1817, 0.5423, 0.2355]) -Greedy action tensor([-1.6316, -0.3752, 0.4359, -0.0263]) tensor([0.0575, 0.2019, 0.4544, 0.2862]) -Greedy action tensor([-1.8996, -0.4522, 0.6494, -0.1575]) tensor([0.0421, 0.1790, 0.5386, 0.2403]) -Greedy action tensor([-1.3914, -0.2077, 0.3996, 0.2235]) tensor([0.0654, 0.2136, 0.3921, 0.3288]) -Greedy action tensor([-1.9033, -0.3907, 0.6470, -0.1569]) tensor([0.0415, 0.1884, 0.5319, 0.2381]) -Greedy action tensor([-1.7456, 0.0925, 0.5188, -0.0430]) tensor([0.0446, 0.2806, 0.4297, 0.2450]) -Greedy action tensor([-1.8323, -0.4134, 0.6059, -0.1360]) tensor([0.0454, 0.1875, 0.5197, 0.2475]) -Greedy action tensor([-1.4844, -0.4706, 0.4449, 0.0410]) tensor([0.0656, 0.1809, 0.4518, 0.3017]) -Greedy action tensor([-1.6020, -0.4019, 0.4791, -0.0159]) tensor([0.0581, 0.1928, 0.4654, 0.2837]) -Greedy action tensor([-1.8879, -0.4566, 0.6439, -0.1536]) tensor([0.0427, 0.1786, 0.5369, 0.2418]) -Greedy action tensor([-1.8292, -0.4046, 0.6054, -0.1170]) tensor([0.0452, 0.1880, 0.5161, 0.2506]) -Greedy action tensor([-1.9014, -0.4320, 0.6698, -0.1205]) tensor([0.0410, 0.1784, 0.5369, 0.2436]) -Greedy action tensor([-1.9179, -0.4138, 0.6583, -0.1630]) tensor([0.0409, 0.1842, 0.5381, 0.2367]) -Greedy action tensor([-1.9102, -0.4076, 0.6517, -0.1654]) tensor([0.0414, 0.1858, 0.5360, 0.2368]) -Greedy action tensor([-1.7297, -0.4255, 0.5741, -0.1235]) tensor([0.0508, 0.1872, 0.5087, 0.2532]) -Greedy action tensor([-1.9116, -0.4562, 0.6401, -0.1625]) tensor([0.0419, 0.1796, 0.5376, 0.2409]) -Greedy action tensor([-1.9353, -0.4379, 0.6636, -0.1754]) tensor([0.0404, 0.1807, 0.5438, 0.2350]) -Greedy action tensor([-0.8954, -0.6549, 0.2339, 0.3213]) tensor([0.1144, 0.1455, 0.3539, 0.3862]) -Greedy action tensor([-1.7527, -0.4735, 0.5511, -0.0692]) tensor([0.0500, 0.1798, 0.5008, 0.2693]) -Greedy action tensor([-1.6446, -0.1878, 0.4881, -0.0759]) tensor([0.0540, 0.2316, 0.4554, 0.2590]) -Greedy action tensor([-1.0632, -0.4734, 0.2052, 0.3109]) tensor([0.0970, 0.1749, 0.3448, 0.3832]) -Greedy action tensor([0.0014, 1.1682, 0.0508, 0.2509]) tensor([0.1528, 0.4907, 0.1605, 0.1961]) -Greedy action tensor([-1.6149, 0.1741, 0.4397, -0.0681]) tensor([0.0513, 0.3071, 0.4005, 0.2410]) -Greedy action tensor([-1.5657, 0.4156, 0.3790, -0.0808]) tensor([0.0509, 0.3689, 0.3557, 0.2246]) -Greedy action tensor([-1.9416, -0.4530, 0.6675, -0.1786]) tensor([0.0402, 0.1783, 0.5468, 0.2346]) -Greedy action tensor([-1.6548, -0.5356, 0.5273, -0.0864]) tensor([0.0564, 0.1728, 0.5001, 0.2707]) -Greedy action tensor([-1.8954, -0.2379, 0.6141, -0.1682]) tensor([0.0414, 0.2170, 0.5089, 0.2327]) -Greedy action tensor([-0.7300, 1.0253, 0.0403, 0.4570]) tensor([0.0818, 0.4733, 0.1767, 0.2681]) -Greedy action tensor([-0.5853, 0.7587, -0.0489, 0.2999]) tensor([0.1115, 0.4276, 0.1907, 0.2703]) -Greedy action tensor([-1.5592, -0.1656, 0.4818, -0.0895]) tensor([0.0586, 0.2360, 0.4508, 0.2546]) -Greedy action tensor([-1.5282, -0.0483, 0.4395, 0.1344]) tensor([0.0561, 0.2465, 0.4015, 0.2959]) -Greedy action tensor([-1.9318, -0.4695, 0.7081, -0.1617]) tensor([0.0397, 0.1713, 0.5560, 0.2330]) -Greedy action tensor([-1.1090, 0.7585, 0.1100, 0.3732]) tensor([0.0655, 0.4242, 0.2218, 0.2885]) -Greedy action tensor([-1.9142, -0.3882, 0.6398, -0.1756]) tensor([0.0414, 0.1905, 0.5325, 0.2356]) -Greedy action tensor([-1.3552, 0.1764, 0.3495, -0.0570]) tensor([0.0676, 0.3128, 0.3719, 0.2477]) -Greedy action tensor([-1.8620, -0.4515, 0.6300, -0.1397]) tensor([0.0439, 0.1799, 0.5305, 0.2457]) -Greedy action tensor([-1.8563, -0.3852, 0.6140, -0.1448]) tensor([0.0440, 0.1917, 0.5206, 0.2437]) -Greedy action tensor([-1.3049, -0.0595, 0.3334, -0.2333]) tensor([0.0797, 0.2770, 0.4104, 0.2329]) -Greedy action tensor([-1.7998, -0.4727, 0.6052, -0.1117]) tensor([0.0470, 0.1774, 0.5212, 0.2545]) -Greedy action tensor([-1.9226, -0.4098, 0.6552, -0.1683]) tensor([0.0408, 0.1854, 0.5378, 0.2360]) -Greedy action tensor([-1.8714, -0.3656, 0.6489, -0.1358]) tensor([0.0424, 0.1909, 0.5265, 0.2402]) -Greedy action tensor([-1.8696, -0.4551, 0.6328, -0.1456]) tensor([0.0436, 0.1794, 0.5325, 0.2445]) -Greedy action tensor([-1.7720, -0.0265, 0.5288, -0.1499]) tensor([0.0459, 0.2631, 0.4584, 0.2326]) -Greedy action tensor([-0.0912, 1.1578, -0.0162, 0.4476]) tensor([0.1374, 0.4790, 0.1481, 0.2355]) -Greedy action tensor([-1.8717, -0.3811, 0.6642, -0.1145]) tensor([0.0419, 0.1861, 0.5292, 0.2429]) -Greedy action tensor([-1.6254, -0.4904, 0.5175, -0.0117]) tensor([0.0566, 0.1762, 0.4828, 0.2844]) -Greedy action tensor([-1.9187, -0.4148, 0.6527, -0.1667]) tensor([0.0411, 0.1848, 0.5373, 0.2368]) -Greedy action tensor([-0.9955, -0.3000, 0.5701, 1.1372]) tensor([0.0616, 0.1235, 0.2949, 0.5199]) -Greedy action tensor([-1.6845, -0.3810, 0.5375, -0.0984]) tensor([0.0532, 0.1959, 0.4909, 0.2599]) -Greedy action tensor([-1.7840, -0.0110, 0.5375, -0.1015]) tensor([0.0445, 0.2622, 0.4538, 0.2395]) -Greedy action tensor([-1.8157, -0.2914, 0.6310, -0.1173]) tensor([0.0442, 0.2031, 0.5109, 0.2417]) -Greedy action tensor([-1.9138, -0.4036, 0.6504, -0.1632]) tensor([0.0412, 0.1865, 0.5351, 0.2372]) -Greedy action tensor([-1.9357, -0.4127, 0.6568, -0.1750]) tensor([0.0404, 0.1852, 0.5396, 0.2349]) -Greedy action tensor([-1.6766, -0.3140, 0.4995, -0.0466]) tensor([0.0531, 0.2075, 0.4682, 0.2712]) -Greedy action tensor([-1.9431, -0.4436, 0.6641, -0.1797]) tensor([0.0402, 0.1801, 0.5452, 0.2345]) -Greedy action tensor([-1.6398, -0.2195, 0.5879, 0.2350]) tensor([0.0478, 0.1977, 0.4432, 0.3114]) -Greedy action tensor([-1.8957, -0.4210, 0.6562, -0.1414]) tensor([0.0417, 0.1822, 0.5351, 0.2410]) -Greedy action tensor([-1.9289, -0.4479, 0.6710, -0.1581]) tensor([0.0404, 0.1778, 0.5443, 0.2375]) -Greedy action tensor([-1.7079, -0.4103, 0.5560, -0.0947]) tensor([0.0518, 0.1897, 0.4985, 0.2601]) -Greedy action tensor([-1.4795, -0.1253, 0.6055, 0.0920]) tensor([0.0564, 0.2185, 0.4537, 0.2715]) -Greedy action tensor([-1.6553, 0.2748, 0.4000, 0.0154]) tensor([0.0476, 0.3279, 0.3716, 0.2530]) -Greedy action tensor([-1.5422, -0.3456, 0.4615, 0.0388]) tensor([0.0603, 0.1995, 0.4472, 0.2930]) -Greedy action tensor([-1.8825, -0.4491, 0.6385, -0.1467]) tensor([0.0429, 0.1799, 0.5338, 0.2434]) -Greedy action tensor([-0.5198, -0.0371, 1.0693, 1.5285]) tensor([0.0655, 0.1061, 0.3207, 0.5077]) -Greedy action tensor([-1.5108, -0.3680, 0.4732, 0.0987]) tensor([0.0609, 0.1911, 0.4432, 0.3047]) -Greedy action tensor([-0.9060, 0.4368, 0.1673, -0.0650]) tensor([0.0993, 0.3802, 0.2904, 0.2302]) -Greedy action tensor([-1.5685, -0.3764, 0.4440, -0.0308]) tensor([0.0609, 0.2005, 0.4554, 0.2832]) -Greedy action tensor([-1.8710, -0.4422, 0.6338, -0.1411]) tensor([0.0434, 0.1810, 0.5309, 0.2447]) -Greedy action tensor([-1.8672, -0.4486, 0.6323, -0.1458]) tensor([0.0437, 0.1804, 0.5317, 0.2442]) -Greedy action tensor([-1.7895, -0.3146, 0.6098, -0.0650]) tensor([0.0455, 0.1987, 0.5008, 0.2550]) -Greedy action tensor([-1.6896, -0.5036, 0.5431, -0.1312]) tensor([0.0545, 0.1784, 0.5082, 0.2589]) -Greedy action tensor([-0.6971, 0.7526, -0.2623, -0.2191]) tensor([0.1188, 0.5062, 0.1835, 0.1916]) -Greedy action tensor([-1.6703, -0.5290, 0.5145, -0.0218]) tensor([0.0549, 0.1719, 0.4879, 0.2854]) -Greedy action tensor([-1.8685, -0.4617, 0.6350, -0.1434]) tensor([0.0436, 0.1781, 0.5334, 0.2449]) -Greedy action tensor([-1.8177, -0.4266, 0.5986, -0.1272]) tensor([0.0462, 0.1857, 0.5176, 0.2505]) -Greedy action tensor([-1.8027, -0.3131, 0.5763, -0.1276]) tensor([0.0464, 0.2056, 0.5005, 0.2475]) -Greedy action tensor([-1.9184, -0.4347, 0.6537, -0.1683]) tensor([0.0412, 0.1818, 0.5398, 0.2373]) -Greedy action tensor([-1.5928, 0.0755, 0.4991, 0.1843]) tensor([0.0492, 0.2610, 0.3987, 0.2910]) -Greedy action tensor([-1.8942, -0.4394, 0.6427, -0.1543]) tensor([0.0423, 0.1814, 0.5352, 0.2412]) -Greedy action tensor([-0.8481, 0.3589, 0.0586, 0.0354]) tensor([0.1082, 0.3619, 0.2680, 0.2619]) -Greedy action tensor([ 0.2119, -0.3045, 0.3244, -0.3321]) tensor([0.3034, 0.1810, 0.3395, 0.1761]) -Greedy action tensor([ 0.9905, -0.5987, -0.5521, -0.0200]) tensor([0.5612, 0.1145, 0.1200, 0.2043]) -Greedy action tensor([-0.0855, -0.0638, -0.5210, 0.3088]) tensor([0.2408, 0.2461, 0.1558, 0.3572]) -Greedy action tensor([ 1.4806, -1.1861, -0.2609, 0.1292]) tensor([0.6651, 0.0462, 0.1166, 0.1722]) -Greedy action tensor([ 0.3752, -1.2444, 0.0810, -0.0835]) tensor([0.3883, 0.0769, 0.2893, 0.2455]) -Greedy action tensor([ 0.0532, -0.8194, -0.1589, 0.1223]) tensor([0.3032, 0.1267, 0.2453, 0.3249]) -Greedy action tensor([ 0.5437, -0.0501, -0.1273, -0.2623]) tensor([0.3984, 0.2200, 0.2037, 0.1779]) -Greedy action tensor([-0.7589, -0.1728, 0.5101, -0.2567]) tensor([0.1249, 0.2244, 0.4443, 0.2064]) -Greedy action tensor([ 0.1040, -0.3251, -0.1453, -0.7203]) tensor([0.3485, 0.2269, 0.2716, 0.1529]) -Greedy action tensor([-0.1457, 0.3828, -1.0143, -0.3673]) tensor([0.2553, 0.4331, 0.1071, 0.2045]) -Greedy action tensor([-1.0394, -0.8966, 0.1340, -0.6845]) tensor([0.1468, 0.1693, 0.4746, 0.2093]) -Greedy action tensor([-0.1001, -0.1965, 0.3174, -0.0441]) tensor([0.2230, 0.2025, 0.3386, 0.2359]) -Greedy action tensor([ 0.5739, -1.6797, 0.8872, 0.3159]) tensor([0.3081, 0.0324, 0.4215, 0.2380]) -Greedy action tensor([-0.5706, -0.1913, -0.9226, 0.4611]) tensor([0.1675, 0.2448, 0.1178, 0.4700]) -Greedy action tensor([-0.1664, 0.0169, 0.4286, -0.6393]) tensor([0.2156, 0.2590, 0.3910, 0.1344]) -Greedy action tensor([-0.5974, -1.8995, 1.5336, -0.7930]) tensor([0.0951, 0.0259, 0.8009, 0.0782]) -Greedy action tensor([-0.4201, -0.4629, -0.2941, -0.5216]) tensor([0.2503, 0.2398, 0.2839, 0.2261]) -Greedy action tensor([-0.4314, -0.0895, 0.2146, -1.2280]) tensor([0.2098, 0.2953, 0.4003, 0.0946]) -Greedy action tensor([ 0.1408, 0.6256, -0.5264, -0.5186]) tensor([0.2737, 0.4444, 0.1404, 0.1415]) -Greedy action tensor([ 0.9339, -1.3653, 0.4713, 0.4928]) tensor([0.4214, 0.0423, 0.2653, 0.2711]) -Greedy action tensor([-0.0912, -0.6274, -0.9550, -0.3301]) tensor([0.3579, 0.2094, 0.1509, 0.2819]) -Greedy action tensor([ 0.8747, -0.2520, -0.2303, -0.2354]) tensor([0.5038, 0.1633, 0.1669, 0.1660]) -Greedy action tensor([-0.1223, -1.0691, -0.3883, 0.2157]) tensor([0.2812, 0.1091, 0.2155, 0.3942]) -Greedy action tensor([-0.2060, 0.2540, -0.1301, -0.8955]) tensor([0.2401, 0.3804, 0.2590, 0.1205]) -Greedy action tensor([-0.2122, -1.0124, -0.1259, 0.9125]) tensor([0.1780, 0.0800, 0.1940, 0.5481]) -Greedy action tensor([ 0.3280, -1.7752, -0.1071, 0.0921]) tensor([0.3908, 0.0477, 0.2529, 0.3086]) -Greedy action tensor([ 0.1721, -0.1289, 0.1433, -0.1300]) tensor([0.2898, 0.2145, 0.2815, 0.2142]) -Greedy action tensor([ 0.4627, -0.0071, 1.0192, -0.4246]) tensor([0.2644, 0.1653, 0.4613, 0.1089]) -Greedy action tensor([-1.3574, -0.9202, 0.6469, -1.2985]) tensor([0.0907, 0.1404, 0.6728, 0.0962]) -Greedy action tensor([ 1.1101, -1.1826, 0.4164, 0.6923]) tensor([0.4426, 0.0447, 0.2212, 0.2915]) -Greedy action tensor([-0.4527, -0.9586, -0.3928, -0.5434]) tensor([0.2795, 0.1685, 0.2967, 0.2553]) -Greedy action tensor([-0.0090, -0.2127, 0.2711, -0.2455]) tensor([0.2546, 0.2076, 0.3369, 0.2009]) -Greedy action tensor([-0.0887, -0.3755, -0.4554, 0.2413]) tensor([0.2608, 0.1958, 0.1807, 0.3627]) -Greedy action tensor([-1.5708, -0.6078, 0.8637, -1.0873]) tensor([0.0601, 0.1573, 0.6852, 0.0974]) -Greedy action tensor([ 1.5043, 0.7499, 0.8087, -0.7925]) tensor([0.4832, 0.2272, 0.2410, 0.0486]) -Greedy action tensor([ 0.3936, -0.8319, 0.2660, 0.1818]) tensor([0.3352, 0.0984, 0.2951, 0.2713]) -Greedy action tensor([ 0.9179, -0.6402, 0.6392, 0.4379]) tensor([0.3867, 0.0814, 0.2926, 0.2393]) -Greedy action tensor([ 0.3556, 0.4001, 0.0761, -0.4756]) tensor([0.3089, 0.3230, 0.2336, 0.1345]) -Greedy action tensor([ 0.7093, -1.4955, 0.8853, 0.2310]) tensor([0.3422, 0.0377, 0.4080, 0.2121]) -Greedy action tensor([ 0.3175, -0.1895, 0.1110, 0.0732]) tensor([0.3126, 0.1883, 0.2543, 0.2449]) -Greedy action tensor([-0.6147, -0.5109, 0.0624, -0.3905]) tensor([0.1877, 0.2082, 0.3693, 0.2348]) -Greedy action tensor([ 1.7768, -0.5956, 0.7545, -0.2931]) tensor([0.6332, 0.0591, 0.2278, 0.0799]) -Greedy action tensor([-0.2715, -0.2479, 0.0489, -0.9752]) tensor([0.2567, 0.2628, 0.3536, 0.1270]) -Greedy action tensor([ 0.1007, -1.1472, -0.9964, -0.1621]) tensor([0.4184, 0.1201, 0.1397, 0.3217]) -Greedy action tensor([-0.1582, -0.8222, -0.2389, -0.5764]) tensor([0.3231, 0.1663, 0.2980, 0.2126]) -Greedy action tensor([ 0.7143, -0.8943, -0.0370, -0.3818]) tensor([0.4985, 0.0998, 0.2352, 0.1666]) -Greedy action tensor([ 0.0267, -0.3319, -0.4685, -0.0117]) tensor([0.3058, 0.2136, 0.1863, 0.2943]) -Greedy action tensor([ 0.3383, -0.7156, 0.3026, 0.8981]) tensor([0.2461, 0.0858, 0.2374, 0.4307]) -Greedy action tensor([0.8133, 0.3503, 0.2375, 0.2295]) tensor([0.3637, 0.2289, 0.2045, 0.2029]) -Greedy action tensor([ 0.8787, -1.3060, 0.6576, 0.0699]) tensor([0.4238, 0.0477, 0.3397, 0.1888]) -Greedy action tensor([-0.9527, -0.9621, -0.7091, -0.9597]) tensor([0.2348, 0.2326, 0.2995, 0.2331]) -Greedy action tensor([ 0.7109, -0.7179, 1.1653, 0.5685]) tensor([0.2716, 0.0651, 0.4278, 0.2355]) -Greedy action tensor([ 0.9571, -0.5370, -0.3463, -0.2438]) tensor([0.5565, 0.1249, 0.1511, 0.1675]) -Greedy action tensor([-0.3174, -0.8917, -0.8199, -0.2741]) tensor([0.3113, 0.1753, 0.1883, 0.3251]) -Greedy action tensor([ 1.0909, -1.7353, 0.8258, 0.7743]) tensor([0.3914, 0.0232, 0.3002, 0.2852]) -Greedy action tensor([0.2750, 0.1545, 0.3915, 0.1160]) tensor([0.2589, 0.2295, 0.2909, 0.2208]) -Greedy action tensor([ 0.0355, -0.6068, 0.2874, -0.6197]) tensor([0.3001, 0.1579, 0.3861, 0.1559]) -Greedy action tensor([ 1.3407, -0.6514, -0.2001, 0.7951]) tensor([0.5181, 0.0707, 0.1110, 0.3002]) -Greedy action tensor([-0.6532, -1.1379, 0.3367, -0.5673]) tensor([0.1853, 0.1141, 0.4986, 0.2019]) -Greedy action tensor([ 1.4458, -0.3378, 0.7570, 1.6936]) tensor([0.3388, 0.0569, 0.1702, 0.4341]) -Greedy action tensor([-0.7096, -0.5541, -0.7366, -0.5303]) tensor([0.2305, 0.2693, 0.2244, 0.2758]) -Greedy action tensor([ 0.2157, -0.0376, 0.9209, -0.3224]) tensor([0.2281, 0.1770, 0.4617, 0.1332]) -Greedy action tensor([-0.2567, -0.8528, -0.6672, -0.1357]) tensor([0.2991, 0.1648, 0.1984, 0.3376]) -Greedy action tensor([-0.0262, 0.1582, 0.5996, -0.4084]) tensor([0.2103, 0.2529, 0.3932, 0.1435]) -Greedy action tensor([ 0.5495, -1.3730, -0.3856, -0.2394]) tensor([0.5017, 0.0734, 0.1969, 0.2280]) -Greedy action tensor([-0.0850, -1.4585, 0.0445, 0.4231]) tensor([0.2467, 0.0625, 0.2808, 0.4101]) -Greedy action tensor([-1.0747, -0.6474, -0.5955, -0.7889]) tensor([0.1825, 0.2798, 0.2947, 0.2429]) -Greedy action tensor([ 0.4018, -1.0228, 0.6811, -0.1185]) tensor([0.3167, 0.0762, 0.4188, 0.1882]) -Greedy action tensor([ 0.8935, -0.3873, -0.1246, -0.6704]) tensor([0.5410, 0.1503, 0.1955, 0.1132]) -Greedy action tensor([ 0.1463, 0.4540, -1.0249, -0.1051]) tensor([0.2900, 0.3945, 0.0899, 0.2256]) -Greedy action tensor([ 0.8929, -1.0942, 0.9167, 0.7624]) tensor([0.3291, 0.0451, 0.3370, 0.2888]) -Greedy action tensor([ 1.3279, -0.3034, 0.8396, 0.7312]) tensor([0.4237, 0.0829, 0.2600, 0.2333]) -Greedy action tensor([ 0.9576, 0.2213, 0.7886, -0.9775]) tensor([0.4052, 0.1941, 0.3422, 0.0585]) -Greedy action tensor([ 0.6407, -1.7953, 0.0570, 0.0339]) tensor([0.4565, 0.0400, 0.2547, 0.2488]) -Greedy action tensor([-0.5684, -0.3315, -0.0498, 0.0630]) tensor([0.1716, 0.2175, 0.2882, 0.3227]) -Greedy action tensor([-0.0840, 0.6579, -0.4743, -0.3723]) tensor([0.2209, 0.4639, 0.1495, 0.1656]) -Greedy action tensor([ 0.2968, -0.6586, 0.0728, 1.0313]) tensor([0.2343, 0.0901, 0.1873, 0.4883]) -Greedy action tensor([ 0.5379, 0.2094, 0.3928, -0.2966]) tensor([0.3312, 0.2385, 0.2865, 0.1438]) -Greedy action tensor([-0.3559, -0.9044, 0.4309, -0.8631]) tensor([0.2285, 0.1320, 0.5019, 0.1376]) -Greedy action tensor([-0.8766, -0.9963, 0.2493, -0.6390]) tensor([0.1603, 0.1422, 0.4942, 0.2033]) -Greedy action tensor([-0.1442, -0.6102, -0.7279, -0.8854]) tensor([0.3757, 0.2357, 0.2096, 0.1790]) -Greedy action tensor([-0.0502, -0.0381, 0.5720, -0.2134]) tensor([0.2117, 0.2142, 0.3943, 0.1798]) -Greedy action tensor([ 1.2345, -0.6903, -0.1837, -0.4363]) tensor([0.6345, 0.0926, 0.1536, 0.1193]) -Greedy action tensor([ 0.6452, -0.3840, -0.2432, -0.3084]) tensor([0.4643, 0.1659, 0.1910, 0.1789]) -Greedy action tensor([ 0.9686, -0.6414, -0.0521, -0.2563]) tensor([0.5394, 0.1078, 0.1944, 0.1585]) -Greedy action tensor([ 1.1025, -0.4948, -0.1015, -0.6427]) tensor([0.5963, 0.1207, 0.1789, 0.1041]) -Greedy action tensor([ 0.7124, -0.2025, 0.0773, -0.2241]) tensor([0.4306, 0.1725, 0.2282, 0.1688]) -Greedy action tensor([ 1.0002, -0.8003, 0.1394, -0.5965]) tensor([0.5585, 0.0923, 0.2361, 0.1131]) -Greedy action tensor([ 0.7476, -0.5433, -0.1119, -0.3053]) tensor([0.4884, 0.1343, 0.2068, 0.1704]) -Greedy action tensor([ 1.2871, -1.1772, 0.0139, -0.5841]) tensor([0.6584, 0.0560, 0.1843, 0.1014]) -Greedy action tensor([ 0.7062, 0.0443, -0.0770, -0.0777]) tensor([0.4116, 0.2123, 0.1881, 0.1880]) -Greedy action tensor([ 0.6255, -0.1879, -0.0560, -0.2235]) tensor([0.4207, 0.1865, 0.2128, 0.1800]) -Greedy action tensor([ 0.9316, -0.7701, -0.0055, -0.5724]) tensor([0.5567, 0.1015, 0.2181, 0.1237]) -Greedy action tensor([ 0.6314, 0.0342, 0.0207, -0.0756]) tensor([0.3866, 0.2128, 0.2099, 0.1907]) -Greedy action tensor([ 0.5672, 0.2468, -0.1422, -0.2145]) tensor([0.3738, 0.2713, 0.1839, 0.1710]) -Greedy action tensor([ 0.8543, -0.5391, -0.0841, -0.3110]) tensor([0.5125, 0.1272, 0.2005, 0.1598]) -Greedy action tensor([ 1.0335, -0.6736, -0.0475, -0.6395]) tensor([0.5854, 0.1062, 0.1986, 0.1099]) -Greedy action tensor([ 0.8344, -0.5056, -0.0287, -0.1216]) tensor([0.4835, 0.1266, 0.2040, 0.1859]) -Greedy action tensor([ 0.9443, -0.8330, 0.1166, -0.4852]) tensor([0.5418, 0.0916, 0.2368, 0.1297]) -Greedy action tensor([ 0.9718, -0.7367, 0.0938, -0.4360]) tensor([0.5430, 0.0984, 0.2257, 0.1329]) -Greedy action tensor([ 0.7941, -0.2690, -0.1131, -0.1424]) tensor([0.4671, 0.1613, 0.1885, 0.1831]) -Greedy action tensor([ 0.4811, -0.4703, -0.2056, 0.0154]) tensor([0.3973, 0.1534, 0.1999, 0.2494]) -Greedy action tensor([ 1.2490, -0.7505, -0.1367, -0.6255]) tensor([0.6498, 0.0880, 0.1625, 0.0997]) -Greedy action tensor([ 1.2102, -0.6286, 0.1532, -0.6895]) tensor([0.6038, 0.0960, 0.2098, 0.0903]) -Greedy action tensor([ 0.6638, -0.3569, -0.0370, -0.2126]) tensor([0.4400, 0.1585, 0.2183, 0.1831]) -Greedy action tensor([ 0.9794, -0.1249, 0.0123, -0.2600]) tensor([0.4997, 0.1656, 0.1900, 0.1447]) -Greedy action tensor([ 0.9794, -0.4564, 0.1011, -0.6103]) tensor([0.5384, 0.1281, 0.2237, 0.1098]) -Greedy action tensor([ 0.7704, -0.7386, -0.2540, -1.0099]) tensor([0.5719, 0.1265, 0.2053, 0.0964]) -Greedy action tensor([ 0.6644, -0.3687, 0.0310, -0.1370]) tensor([0.4282, 0.1524, 0.2273, 0.1921]) -Greedy action tensor([ 0.6725, -0.4213, -0.0914, -0.1973]) tensor([0.4505, 0.1509, 0.2099, 0.1888]) -Greedy action tensor([ 0.4730, -0.4691, -0.0388, -0.1393]) tensor([0.3951, 0.1540, 0.2368, 0.2142]) -Greedy action tensor([ 0.8776, -0.8950, -0.0068, -0.5478]) tensor([0.5485, 0.0932, 0.2265, 0.1319]) -Greedy action tensor([ 0.5584, -0.3061, 0.0038, -0.0881]) tensor([0.3969, 0.1672, 0.2280, 0.2079]) -Greedy action tensor([ 0.7909, -0.4151, -0.0634, -0.3848]) tensor([0.4918, 0.1472, 0.2093, 0.1517]) -Greedy action tensor([ 0.8520, -0.5316, -0.0272, -0.2580]) tensor([0.5012, 0.1256, 0.2080, 0.1652]) -Greedy action tensor([ 0.6556, -0.2665, -0.0388, -0.1047]) tensor([0.4229, 0.1682, 0.2112, 0.1977]) -Greedy action tensor([ 1.1643, -0.6035, 0.0737, -0.6597]) tensor([0.5995, 0.1023, 0.2014, 0.0967]) -Greedy action tensor([ 0.9357, -0.5151, -0.0073, -0.5370]) tensor([0.5396, 0.1265, 0.2102, 0.1237]) -Greedy action tensor([ 0.9654, -0.2079, -0.0378, -0.3556]) tensor([0.5147, 0.1592, 0.1887, 0.1374]) -Greedy action tensor([ 0.7988, -0.4012, 0.0391, -0.3388]) tensor([0.4786, 0.1441, 0.2239, 0.1534]) -Greedy action tensor([ 0.9800, -0.4993, -0.1254, -0.6421]) tensor([0.5694, 0.1297, 0.1885, 0.1124]) -Greedy action tensor([ 0.4696, -0.1402, -0.1014, -0.2727]) tensor([0.3869, 0.2103, 0.2186, 0.1842]) -Greedy action tensor([ 0.9329, -0.5591, -0.0199, -0.5052]) tensor([0.5412, 0.1217, 0.2087, 0.1285]) -Greedy action tensor([ 0.5192, -0.4366, -0.0640, -0.3935]) tensor([0.4266, 0.1640, 0.2381, 0.1713]) -Greedy action tensor([ 0.4605, -0.1689, -0.0410, -0.0932]) tensor([0.3686, 0.1964, 0.2232, 0.2118]) -Greedy action tensor([ 0.6401, -0.5801, 0.0272, -0.2005]) tensor([0.4408, 0.1301, 0.2388, 0.1902]) -Greedy action tensor([ 0.4210, -0.1891, -0.0731, -0.2793]) tensor([0.3774, 0.2050, 0.2302, 0.1873]) -Greedy action tensor([ 0.9780, -0.6390, -0.1132, -0.3943]) tensor([0.5594, 0.1110, 0.1878, 0.1418]) -Greedy action tensor([ 1.0352, -0.6575, -0.0558, -0.5669]) tensor([0.5809, 0.1069, 0.1951, 0.1170]) -Greedy action tensor([ 0.7806, -0.3792, -0.1156, -0.5302]) tensor([0.5022, 0.1575, 0.2050, 0.1354]) -Greedy action tensor([ 0.7131, -0.7329, -0.0031, -0.4076]) tensor([0.4877, 0.1149, 0.2383, 0.1590]) -Greedy action tensor([ 0.8184, -0.9294, 0.1368, -0.4825]) tensor([0.5122, 0.0892, 0.2591, 0.1395]) -Greedy action tensor([ 0.6967, -0.7780, 0.0422, -0.2833]) tensor([0.4708, 0.1078, 0.2447, 0.1767]) -Greedy action tensor([ 0.4513, -0.4981, -0.0846, -0.2351]) tensor([0.4039, 0.1563, 0.2364, 0.2034]) -Greedy action tensor([ 0.7145, -0.2336, 0.0163, -0.2020]) tensor([0.4377, 0.1696, 0.2177, 0.1750]) -Greedy action tensor([ 0.5800, -0.3358, -0.2453, -0.2072]) tensor([0.4360, 0.1745, 0.1910, 0.1985]) -Greedy action tensor([ 0.9041, -0.4567, -0.0835, -0.3051]) tensor([0.5188, 0.1331, 0.1933, 0.1548]) -Greedy action tensor([ 0.2988, 0.2400, -0.2540, 0.1285]) tensor([0.2975, 0.2805, 0.1711, 0.2509]) -Greedy action tensor([ 0.7831, -0.5824, -0.0428, -0.5182]) tensor([0.5088, 0.1299, 0.2228, 0.1385]) -Greedy action tensor([ 1.1354, -0.7179, -0.0869, -0.3944]) tensor([0.5996, 0.0940, 0.1766, 0.1299]) -Greedy action tensor([ 0.3513, -0.3494, -0.0834, -0.1589]) tensor([0.3644, 0.1808, 0.2359, 0.2188]) -Greedy action tensor([ 0.7833, -0.0895, -0.0148, -0.0200]) tensor([0.4318, 0.1804, 0.1944, 0.1934]) -Greedy action tensor([ 1.0795, -0.2879, -0.0565, -0.3942]) tensor([0.5540, 0.1411, 0.1779, 0.1269]) -Greedy action tensor([ 0.3547, -0.2511, -0.0333, -0.1973]) tensor([0.3572, 0.1949, 0.2423, 0.2056]) -Greedy action tensor([ 1.0700, -0.5918, -0.0738, -0.3579]) tensor([0.5720, 0.1086, 0.1822, 0.1372]) -Greedy action tensor([ 0.4291, -0.1279, -0.1061, -0.1369]) tensor([0.3668, 0.2101, 0.2148, 0.2083]) -Greedy action tensor([ 1.1773, -0.6487, 0.0018, -0.5702]) tensor([0.6083, 0.0980, 0.1878, 0.1060]) -Greedy action tensor([ 0.7010, -0.0040, -0.0488, 0.1290]) tensor([0.3951, 0.1952, 0.1867, 0.2230]) -Greedy action tensor([ 0.7916, -0.6765, -0.0156, -0.3576]) tensor([0.5017, 0.1156, 0.2238, 0.1590]) -Greedy action tensor([ 0.5756, 0.1171, -0.0637, -0.0790]) tensor([0.3732, 0.2359, 0.1969, 0.1939]) -Greedy action tensor([ 0.7998, -0.1168, -0.1030, -0.3503]) tensor([0.4713, 0.1884, 0.1911, 0.1492]) -Greedy action tensor([ 0.8136, -0.2280, -0.0717, -0.1844]) tensor([0.4686, 0.1654, 0.1933, 0.1727]) -Greedy action tensor([ 0.6294, -0.2317, -0.0656, -0.1177]) tensor([0.4175, 0.1765, 0.2083, 0.1978]) -Greedy action tensor([ 1.1478, -0.7834, 0.0040, -0.3112]) tensor([0.5896, 0.0855, 0.1878, 0.1371]) -Greedy action tensor([ 0.8992, -0.5841, -0.1823, -0.3653]) tensor([0.5410, 0.1228, 0.1834, 0.1528]) -Greedy action tensor([ 0.7688, -0.6530, -0.0843, -0.4114]) tensor([0.5064, 0.1222, 0.2158, 0.1556]) -Greedy action tensor([ 0.6858, -0.2789, -0.0456, -0.2749]) tensor([0.4454, 0.1698, 0.2144, 0.1704]) -Greedy action tensor([ 0.6825, -0.3498, -0.1631, -0.1214]) tensor([0.4478, 0.1595, 0.1923, 0.2004]) -Greedy action tensor([ 0.7464, -0.3542, 0.0020, -0.1570]) tensor([0.4519, 0.1503, 0.2147, 0.1831]) -Greedy action tensor([ 0.5035, -0.0689, 0.0412, -0.2348]) tensor([0.3743, 0.2112, 0.2357, 0.1789]) -Greedy action tensor([ 1.0937, -0.5893, -0.1800, -0.4097]) tensor([0.5924, 0.1101, 0.1658, 0.1317]) -Greedy action tensor([ 0.8220, -0.6177, -0.0560, -0.5000]) tensor([0.5211, 0.1235, 0.2166, 0.1389]) -Greedy action tensor([ 0.6380, -0.5500, -0.1206, -0.2912]) tensor([0.4612, 0.1406, 0.2160, 0.1821]) -Greedy action tensor([ 1.3157, -0.3953, -0.4411, 0.0205]) tensor([0.6146, 0.1110, 0.1061, 0.1683]) -Greedy action tensor([ 1.0166, -0.1053, -0.5825, 0.1244]) tensor([0.5161, 0.1681, 0.1043, 0.2115]) -Greedy action tensor([ 1.5650, -1.1017, -0.2528, 0.0961]) tensor([0.6840, 0.0475, 0.1111, 0.1574]) -Greedy action tensor([ 1.3723, -1.1924, -0.2459, -0.1767]) tensor([0.6722, 0.0517, 0.1333, 0.1428]) -Greedy action tensor([ 0.9947, -0.5157, -0.4927, 0.5417]) tensor([0.4802, 0.1060, 0.1085, 0.3053]) -Greedy action tensor([ 1.4111, -0.5411, -0.2859, 0.5577]) tensor([0.5710, 0.0811, 0.1046, 0.2432]) -Greedy action tensor([ 1.7954, -1.0966, -0.0032, 0.6483]) tensor([0.6500, 0.0361, 0.1076, 0.2064]) -Greedy action tensor([ 1.7633, -0.9487, -0.1056, 0.3527]) tensor([0.6827, 0.0453, 0.1053, 0.1666]) -Greedy action tensor([ 1.8091, -0.6574, -0.2534, 0.7232]) tensor([0.6453, 0.0548, 0.0820, 0.2179]) -Greedy action tensor([ 1.7734, -1.3024, -0.0246, 0.0150]) tensor([0.7225, 0.0333, 0.1197, 0.1245]) -Greedy action tensor([ 0.8345, -0.1328, -0.5911, 0.0614]) tensor([0.4803, 0.1826, 0.1154, 0.2217]) -Greedy action tensor([ 1.7624, -0.7527, -0.2705, 0.0911]) tensor([0.7144, 0.0578, 0.0935, 0.1343]) -Greedy action tensor([ 1.6207, -0.4152, -0.9052, 0.2352]) tensor([0.6846, 0.0894, 0.0548, 0.1713]) -Greedy action tensor([ 1.7572, -0.0827, -0.5149, -0.0962]) tensor([0.7049, 0.1120, 0.0727, 0.1105]) -Greedy action tensor([ 1.2450, -0.5181, -0.5206, 0.5541]) tensor([0.5424, 0.0930, 0.0928, 0.2718]) -Greedy action tensor([ 1.3403, -0.4547, -0.8608, 0.2260]) tensor([0.6231, 0.1035, 0.0690, 0.2045]) -Greedy action tensor([ 1.5861, 0.2809, -0.4665, 0.2684]) tensor([0.5998, 0.1626, 0.0770, 0.1606]) -Greedy action tensor([ 1.2546, -0.6830, -0.6369, 0.5773]) tensor([0.5547, 0.0799, 0.0837, 0.2818]) -Greedy action tensor([ 1.2820, -0.8790, -0.6461, 0.0374]) tensor([0.6457, 0.0744, 0.0939, 0.1860]) -Greedy action tensor([ 1.2158, -0.2435, -0.5475, 0.2600]) tensor([0.5592, 0.1300, 0.0959, 0.2150]) -Greedy action tensor([ 0.7043, -0.0418, -0.2732, 0.0899]) tensor([0.4182, 0.1983, 0.1573, 0.2262]) -Greedy action tensor([ 2.1187, -0.4986, -0.7980, 0.2860]) tensor([0.7769, 0.0567, 0.0420, 0.1243]) -Greedy action tensor([ 1.5700, -0.2567, -0.4289, 0.1493]) tensor([0.6502, 0.1046, 0.0881, 0.1571]) -Greedy action tensor([ 1.7848, -0.8713, -0.5691, 0.7090]) tensor([0.6639, 0.0466, 0.0631, 0.2264]) -Greedy action tensor([ 2.0839, -1.1896, -0.3282, 0.5451]) tensor([0.7451, 0.0282, 0.0668, 0.1599]) -Greedy action tensor([ 1.3852, -1.4088, -0.3721, -0.0607]) tensor([0.6806, 0.0416, 0.1174, 0.1603]) -Greedy action tensor([ 2.0186, -0.3555, -0.4628, 0.2546]) tensor([0.7418, 0.0691, 0.0620, 0.1271]) -Greedy action tensor([ 1.3413, -0.4385, -0.0847, 0.1008]) tensor([0.5889, 0.0993, 0.1415, 0.1703]) -Greedy action tensor([ 1.0881, -0.8166, -0.1814, 0.2019]) tensor([0.5429, 0.0808, 0.1525, 0.2238]) -Greedy action tensor([ 1.3291, -0.4739, -0.2665, 0.3092]) tensor([0.5786, 0.0954, 0.1173, 0.2087]) -Greedy action tensor([ 1.1575, -0.1520, 0.0531, 0.1107]) tensor([0.5122, 0.1383, 0.1697, 0.1798]) -Greedy action tensor([ 1.7749, -0.4156, -0.6218, 0.2016]) tensor([0.7091, 0.0793, 0.0645, 0.1470]) -Greedy action tensor([ 1.4856, -0.1593, -1.2628, 0.2013]) tensor([0.6519, 0.1258, 0.0417, 0.1805]) -Greedy action tensor([ 2.0640, -0.6638, -0.2242, 0.4539]) tensor([0.7317, 0.0478, 0.0742, 0.1463]) -Greedy action tensor([ 1.1257, -0.3066, 0.2143, -0.1257]) tensor([0.5190, 0.1239, 0.2086, 0.1485]) -Greedy action tensor([ 1.6099, -0.8333, -0.0627, 0.3520]) tensor([0.6415, 0.0557, 0.1204, 0.1823]) -Greedy action tensor([ 1.4239, -0.5117, -0.2329, 0.1520]) tensor([0.6190, 0.0894, 0.1181, 0.1735]) -Greedy action tensor([ 1.5965, -0.8172, -0.5208, 0.6938]) tensor([0.6191, 0.0554, 0.0745, 0.2510]) -Greedy action tensor([ 1.4345, -0.3427, -0.6383, 0.0961]) tensor([0.6422, 0.1086, 0.0808, 0.1684]) -Greedy action tensor([ 2.3221, -1.1419, -0.4095, 0.6961]) tensor([0.7733, 0.0242, 0.0504, 0.1521]) -Greedy action tensor([ 1.3432, -0.5216, -0.2976, 0.2733]) tensor([0.5911, 0.0916, 0.1146, 0.2028]) -Greedy action tensor([ 1.0798, -0.2487, -0.3970, 0.3272]) tensor([0.5091, 0.1348, 0.1163, 0.2398]) -Greedy action tensor([ 1.8516, -0.8538, -0.4418, 0.4750]) tensor([0.7041, 0.0471, 0.0711, 0.1777]) -Greedy action tensor([ 1.8827, -0.4827, -0.4984, 0.8206]) tensor([0.6527, 0.0613, 0.0603, 0.2257]) -Greedy action tensor([ 2.0908, -1.0618, -0.5846, 0.6813]) tensor([0.7375, 0.0315, 0.0508, 0.1801]) -Greedy action tensor([ 1.4461, -0.4907, -0.3795, 0.2687]) tensor([0.6198, 0.0894, 0.0999, 0.1909]) -Greedy action tensor([ 1.2791, -0.1822, -0.6123, 0.0965]) tensor([0.5920, 0.1373, 0.0893, 0.1814]) -Greedy action tensor([ 0.9948, -0.6543, -0.1543, 0.5390]) tensor([0.4666, 0.0897, 0.1479, 0.2958]) -Greedy action tensor([ 1.5092, -0.7570, -0.3664, 0.7148]) tensor([0.5852, 0.0607, 0.0897, 0.2644]) -Greedy action tensor([ 1.0871, -0.4052, -0.0468, 0.0590]) tensor([0.5251, 0.1181, 0.1690, 0.1878]) -Greedy action tensor([ 1.6117, -0.6343, -0.2737, 0.5269]) tensor([0.6267, 0.0663, 0.0951, 0.2118]) -Greedy action tensor([ 1.1418, -0.3998, -0.3804, 0.4000]) tensor([0.5240, 0.1121, 0.1143, 0.2495]) -Greedy action tensor([ 1.0539, -0.5504, -0.5033, 0.4948]) tensor([0.5042, 0.1014, 0.1062, 0.2882]) -Greedy action tensor([ 1.3630, -0.3302, -0.2374, 0.0222]) tensor([0.6070, 0.1116, 0.1225, 0.1588]) -Greedy action tensor([ 2.1961, -1.0560, -0.4749, 0.7281]) tensor([0.7472, 0.0289, 0.0517, 0.1722]) -Greedy action tensor([ 0.9712, 0.1764, -0.5579, 0.0849]) tensor([0.4806, 0.2171, 0.1042, 0.1981]) -Greedy action tensor([ 1.8707, -0.7645, -0.8022, 0.6184]) tensor([0.7010, 0.0503, 0.0484, 0.2004]) -Greedy action tensor([ 1.6840, -0.6491, -0.7543, -0.0751]) tensor([0.7372, 0.0715, 0.0644, 0.1269]) -Greedy action tensor([ 1.3716, -0.5039, -0.3832, 0.1274]) tensor([0.6194, 0.0949, 0.1071, 0.1785]) -Greedy action tensor([ 1.2156, -0.2460, -0.6474, 0.5444]) tensor([0.5268, 0.1222, 0.0818, 0.2693]) -Greedy action tensor([ 1.4874, -0.5431, -0.0610, 0.3014]) tensor([0.6063, 0.0796, 0.1289, 0.1852]) -Greedy action tensor([ 1.6500, -0.2808, -0.8123, 0.2733]) tensor([0.6745, 0.0978, 0.0575, 0.1702]) -Greedy action tensor([ 1.2427, -0.3287, -0.5128, -0.0187]) tensor([0.6010, 0.1249, 0.1039, 0.1702]) -Greedy action tensor([ 1.6136, -0.2246, -0.5608, 0.2561]) tensor([0.6536, 0.1040, 0.0743, 0.1682]) -Greedy action tensor([ 1.7367, 0.3693, -0.8518, 0.1447]) tensor([0.6521, 0.1661, 0.0490, 0.1327]) -Greedy action tensor([ 1.8711, 0.3227, -0.5761, 0.5226]) tensor([0.6415, 0.1364, 0.0555, 0.1666]) -Greedy action tensor([ 1.2534, -0.1673, -0.6175, 0.3500]) tensor([0.5553, 0.1341, 0.0855, 0.2250]) -Greedy action tensor([ 1.4533, -0.5213, 0.0387, 0.3339]) tensor([0.5854, 0.0813, 0.1423, 0.1911]) -Greedy action tensor([ 1.2210, -0.3788, -0.7168, 0.7008]) tensor([0.5154, 0.1041, 0.0742, 0.3063]) -Greedy action tensor([ 1.3082, -0.5621, -0.5735, 0.3547]) tensor([0.5911, 0.0911, 0.0900, 0.2278]) -Greedy action tensor([ 1.6434, -0.6738, -0.3814, 0.6049]) tensor([0.6311, 0.0622, 0.0833, 0.2234]) -Greedy action tensor([ 2.0314, -0.7228, -0.5071, 0.5639]) tensor([0.7283, 0.0464, 0.0575, 0.1679]) -Greedy action tensor([ 1.2480, -0.0484, -0.8256, 0.5582]) tensor([0.5261, 0.1439, 0.0661, 0.2639]) -Greedy action tensor([ 0.8425, -0.1408, -0.0837, -1.1148]) tensor([0.5232, 0.1957, 0.2072, 0.0739]) -Greedy action tensor([ 1.5712, 0.2347, -0.0016, -0.2930]) tensor([0.6153, 0.1617, 0.1277, 0.0954]) -Greedy action tensor([ 1.8128, 0.5216, -0.1218, 0.4003]) tensor([0.6013, 0.1653, 0.0869, 0.1464]) -Greedy action tensor([ 1.3883, -0.2687, -0.0882, -0.1202]) tensor([0.6096, 0.1163, 0.1393, 0.1349]) -Greedy action tensor([ 0.8226, -0.3607, -0.0450, 0.3815]) tensor([0.4220, 0.1293, 0.1772, 0.2715]) -Greedy action tensor([ 1.4777, -0.2769, -0.5395, 0.1736]) tensor([0.6339, 0.1097, 0.0843, 0.1721]) -Greedy action tensor([ 1.6042, -0.8238, -0.3777, 0.2192]) tensor([0.6774, 0.0597, 0.0933, 0.1696]) -Greedy action tensor([ 2.1070, -1.1094, -0.1359, 0.3041]) tensor([0.7627, 0.0306, 0.0810, 0.1257]) -Greedy action tensor([-1.7967, -0.2734, 0.6162, -0.0852]) tensor([0.0449, 0.2058, 0.5009, 0.2484]) -Greedy action tensor([-1.8098, -0.1247, 0.5478, -0.1295]) tensor([0.0448, 0.2416, 0.4732, 0.2404]) -Greedy action tensor([-1.1313, 0.3230, 0.2083, 0.1419]) tensor([0.0789, 0.3379, 0.3013, 0.2819]) -Greedy action tensor([-1.7944, -0.2772, 0.5658, -0.1199]) tensor([0.0465, 0.2122, 0.4930, 0.2483]) -Greedy action tensor([-1.5311, -0.4147, 0.4824, -0.0703]) tensor([0.0631, 0.1926, 0.4724, 0.2718]) -Greedy action tensor([-1.7452, -0.4694, 0.5606, -0.0866]) tensor([0.0503, 0.1803, 0.5050, 0.2644]) -Greedy action tensor([-1.9016, -0.4462, 0.6737, -0.1308]) tensor([0.0412, 0.1764, 0.5406, 0.2418]) -Greedy action tensor([-1.8781, -0.2455, 0.6396, -0.2316]) tensor([0.0422, 0.2159, 0.5231, 0.2189]) -Greedy action tensor([-1.7270, -0.4932, 0.5826, 0.0187]) tensor([0.0494, 0.1697, 0.4977, 0.2832]) -Greedy action tensor([-1.9066, -0.2673, 0.6191, -0.1596]) tensor([0.0410, 0.2112, 0.5125, 0.2353]) -Greedy action tensor([-1.8883, -0.4493, 0.6404, -0.1571]) tensor([0.0427, 0.1802, 0.5358, 0.2413]) -Greedy action tensor([-1.9423, -0.4467, 0.6636, -0.1788]) tensor([0.0403, 0.1796, 0.5453, 0.2348]) -Greedy action tensor([-1.8448, -0.2920, 0.6077, -0.1212]) tensor([0.0436, 0.2059, 0.5063, 0.2442]) -Greedy action tensor([-1.7668, -0.3115, 0.5815, -0.0640]) tensor([0.0471, 0.2017, 0.4928, 0.2584]) -Greedy action tensor([-1.9436, -0.4470, 0.6664, -0.1802]) tensor([0.0402, 0.1794, 0.5462, 0.2343]) -Greedy action tensor([-1.2200, -0.5057, 0.2188, 0.2705]) tensor([0.0855, 0.1746, 0.3604, 0.3795]) -Greedy action tensor([-1.3250, -0.0268, 0.4061, 0.1424]) tensor([0.0683, 0.2501, 0.3855, 0.2962]) -Greedy action tensor([-1.9420, -0.4507, 0.6628, -0.1799]) tensor([0.0403, 0.1792, 0.5456, 0.2349]) -Greedy action tensor([-1.8516, -0.4502, 0.6244, -0.1316]) tensor([0.0444, 0.1802, 0.5277, 0.2478]) -Greedy action tensor([-1.6070, 0.0751, 0.4281, -0.0387]) tensor([0.0531, 0.2856, 0.4065, 0.2549]) -Greedy action tensor([-1.9128, -0.3796, 0.6477, -0.1652]) tensor([0.0411, 0.1905, 0.5323, 0.2361]) -Greedy action tensor([-1.9362, -0.3910, 0.6522, -0.1763]) tensor([0.0403, 0.1890, 0.5364, 0.2343]) -Greedy action tensor([-0.8613, 0.7520, 0.1117, 0.0031]) tensor([0.0906, 0.4547, 0.2397, 0.2150]) -Greedy action tensor([-1.0978, 0.8095, 0.1247, 0.2388]) tensor([0.0670, 0.4509, 0.2273, 0.2548]) -Greedy action tensor([-0.4340, 1.0219, 0.0370, 0.2253]) tensor([0.1133, 0.4860, 0.1815, 0.2191]) -Greedy action tensor([-1.8317, -0.4621, 0.6180, -0.1238]) tensor([0.0454, 0.1785, 0.5257, 0.2504]) -Greedy action tensor([-1.8254, -0.4436, 0.6072, -0.1215]) tensor([0.0457, 0.1821, 0.5208, 0.2513]) -Greedy action tensor([-1.9282, -0.4101, 0.6546, -0.1719]) tensor([0.0407, 0.1856, 0.5382, 0.2355]) -Greedy action tensor([-1.9284, -0.4301, 0.6590, -0.1713]) tensor([0.0407, 0.1821, 0.5412, 0.2359]) -Greedy action tensor([-1.8947, -0.4113, 0.6378, -0.1592]) tensor([0.0423, 0.1863, 0.5318, 0.2397]) -Greedy action tensor([-1.6633, -0.5266, 0.5288, -0.0784]) tensor([0.0557, 0.1736, 0.4988, 0.2718]) -Greedy action tensor([-1.3156, -0.3795, 0.3669, 0.2263]) tensor([0.0735, 0.1875, 0.3955, 0.3436]) -Greedy action tensor([-1.6854, -0.2471, 0.5216, -0.0424]) tensor([0.0514, 0.2164, 0.4667, 0.2655]) -Greedy action tensor([-1.7391, -0.4781, 0.5765, -0.0764]) tensor([0.0502, 0.1770, 0.5082, 0.2645]) -Greedy action tensor([-0.5267, -0.6318, 0.1970, 0.4194]) tensor([0.1530, 0.1377, 0.3154, 0.3939]) -Greedy action tensor([-0.7771, 0.8603, 0.0743, 0.3166]) tensor([0.0872, 0.4483, 0.2043, 0.2603]) -Greedy action tensor([-1.9121, -0.2658, 0.6272, -0.1812]) tensor([0.0408, 0.2117, 0.5171, 0.2304]) -Greedy action tensor([-1.9207, -0.4527, 0.6582, -0.1672]) tensor([0.0412, 0.1786, 0.5425, 0.2377]) -Greedy action tensor([-1.9143, -0.4441, 0.6544, -0.1635]) tensor([0.0414, 0.1801, 0.5401, 0.2384]) -Greedy action tensor([-1.5930, 0.4079, 0.3591, 0.0350]) tensor([0.0487, 0.3602, 0.3430, 0.2481]) -Greedy action tensor([-1.9194, -0.4311, 0.6554, -0.1663]) tensor([0.0411, 0.1821, 0.5396, 0.2373]) -Greedy action tensor([-1.9319, -0.2818, 0.6371, -0.1786]) tensor([0.0399, 0.2080, 0.5214, 0.2306]) -Greedy action tensor([-1.8001, -0.3878, 0.6225, -0.0991]) tensor([0.0457, 0.1878, 0.5158, 0.2507]) -Greedy action tensor([-1.8544, -0.3930, 0.6162, -0.1422]) tensor([0.0441, 0.1901, 0.5215, 0.2443]) -Greedy action tensor([-1.8893, -0.3861, 0.6461, -0.1536]) tensor([0.0420, 0.1890, 0.5305, 0.2385]) -Greedy action tensor([-1.8456, -0.3121, 0.6009, -0.1288]) tensor([0.0440, 0.2037, 0.5076, 0.2447]) -Greedy action tensor([-1.9289, -0.4059, 0.6563, -0.1673]) tensor([0.0405, 0.1859, 0.5377, 0.2360]) -Greedy action tensor([-1.8828, -0.3971, 0.6294, -0.1395]) tensor([0.0426, 0.1883, 0.5255, 0.2436]) -Greedy action tensor([-1.9092, -0.3469, 0.6386, -0.1643]) tensor([0.0412, 0.1965, 0.5264, 0.2359]) -Greedy action tensor([1.0148, 0.7416, 0.5582, 1.3976]) tensor([0.2590, 0.1971, 0.1641, 0.3798]) -Greedy action tensor([-1.9458, -0.4525, 0.6682, -0.1809]) tensor([0.0401, 0.1785, 0.5473, 0.2341]) -Greedy action tensor([-1.8462, -0.3860, 0.6348, -0.1193]) tensor([0.0437, 0.1882, 0.5224, 0.2457]) -Greedy action tensor([-0.8269, -0.4945, 0.2945, -0.1310]) tensor([0.1339, 0.1867, 0.4109, 0.2685]) -Greedy action tensor([-1.7817, -0.4563, 0.6470, 0.0077]) tensor([0.0453, 0.1704, 0.5134, 0.2709]) -Greedy action tensor([-1.5551, -0.4424, 0.4831, -0.0423]) tensor([0.0615, 0.1871, 0.4722, 0.2792]) -Greedy action tensor([-1.4607, 0.0263, -0.6018, -0.9190]) tensor([0.1052, 0.4655, 0.2484, 0.1809]) -Greedy action tensor([-1.8754, -0.4506, 0.6388, -0.1443]) tensor([0.0432, 0.1795, 0.5335, 0.2438]) -Greedy action tensor([-1.6518, 0.1324, 0.4637, -0.0967]) tensor([0.0500, 0.2980, 0.4150, 0.2370]) -Greedy action tensor([-1.8905, -0.3079, 0.6285, -0.1480]) tensor([0.0417, 0.2029, 0.5174, 0.2380]) -Greedy action tensor([-1.9058, -0.4444, 0.6496, -0.1592]) tensor([0.0418, 0.1802, 0.5382, 0.2397]) -Greedy action tensor([-1.8704, -0.1947, 0.5903, -0.1358]) tensor([0.0422, 0.2252, 0.4938, 0.2389]) -Greedy action tensor([-1.8829, -0.4496, 0.6385, -0.1529]) tensor([0.0430, 0.1801, 0.5347, 0.2423]) -Greedy action tensor([-1.7357, -0.1191, 0.5218, -0.1308]) tensor([0.0486, 0.2448, 0.4647, 0.2419]) -Greedy action tensor([-1.8281, -0.4331, 0.6088, -0.1262]) tensor([0.0455, 0.1838, 0.5209, 0.2498]) -Greedy action tensor([-1.8941, -0.2763, 0.6182, -0.1345]) tensor([0.0413, 0.2085, 0.5099, 0.2402]) -Greedy action tensor([-1.9254, -0.4472, 0.6577, -0.1718]) tensor([0.0410, 0.1797, 0.5426, 0.2367]) -Greedy action tensor([-1.8837, -0.4480, 0.6689, -0.1112]) tensor([0.0418, 0.1756, 0.5366, 0.2460]) -Greedy action tensor([-1.6764, -0.4517, 0.7320, -0.0988]) tensor([0.0491, 0.1671, 0.5459, 0.2378]) -Greedy action tensor([-1.7774, -0.3033, 0.6005, -0.0684]) tensor([0.0461, 0.2015, 0.4975, 0.2549]) -Greedy action tensor([ 0.0611, -0.2021, 0.8261, 1.3029]) tensor([0.1355, 0.1042, 0.2912, 0.4691]) -Greedy action tensor([-1.8009, -0.4906, 0.5965, -0.1259]) tensor([0.0475, 0.1762, 0.5225, 0.2537]) -Greedy action tensor([-1.9252, -0.4419, 0.6605, -0.1698]) tensor([0.0409, 0.1801, 0.5425, 0.2365]) -Greedy action tensor([-1.2262, -0.4887, 0.3649, -0.0097]) tensor([0.0879, 0.1838, 0.4316, 0.2967]) -Greedy action tensor([ 0.2270, -0.0972, -0.0422, 0.6078]) tensor([0.2531, 0.1830, 0.1934, 0.3704]) -Greedy action tensor([-1.8915, -0.4266, 0.6408, -0.1626]) tensor([0.0425, 0.1838, 0.5344, 0.2393]) -Greedy action tensor([-1.9242, -0.4216, 0.6588, -0.1633]) tensor([0.0407, 0.1830, 0.5392, 0.2370]) -Greedy action tensor([-1.8345, -0.2967, 0.5978, -0.1263]) tensor([0.0443, 0.2063, 0.5047, 0.2446]) -Greedy action tensor([-1.9243, -0.4493, 0.6590, -0.1733]) tensor([0.0410, 0.1793, 0.5433, 0.2364]) -Greedy action tensor([-1.6949, -0.3088, 0.5509, -0.0106]) tensor([0.0504, 0.2016, 0.4763, 0.2717]) -Greedy action tensor([-0.3849, 0.6431, 0.0554, 0.4343]) tensor([0.1313, 0.3670, 0.2039, 0.2978]) -Greedy action tensor([-1.9273, -0.4199, 0.6586, -0.1712]) tensor([0.0407, 0.1837, 0.5401, 0.2355]) -Greedy action tensor([ 0.3644, 0.0029, 0.5846, -0.6832]) tensor([0.3036, 0.2115, 0.3784, 0.1065]) -Greedy action tensor([-0.0148, 0.1545, -0.7486, -0.8019]) tensor([0.3205, 0.3797, 0.1539, 0.1459]) -Greedy action tensor([ 0.1652, 0.7005, -0.4260, -0.7036]) tensor([0.2716, 0.4640, 0.1504, 0.1139]) -Greedy action tensor([-0.6528, -0.7260, 0.4037, -0.8315]) tensor([0.1772, 0.1647, 0.5098, 0.1482]) -Greedy action tensor([-0.4375, -0.2631, -0.5415, -0.8004]) tensor([0.2640, 0.3143, 0.2380, 0.1837]) -Greedy action tensor([ 0.3750, -0.6726, -0.0758, -0.0054]) tensor([0.3743, 0.1313, 0.2385, 0.2559]) -Greedy action tensor([ 0.9067, -0.3090, 0.2997, -0.8536]) tensor([0.4967, 0.1473, 0.2707, 0.0854]) -Greedy action tensor([ 0.5861, -0.2617, -0.2145, -0.4867]) tensor([0.4506, 0.1930, 0.2023, 0.1541]) -Greedy action tensor([-0.4256, -0.1574, 0.4201, -0.9272]) tensor([0.1907, 0.2494, 0.4444, 0.1155]) -Greedy action tensor([ 0.0950, -0.4371, 0.4890, -0.7784]) tensor([0.2867, 0.1684, 0.4252, 0.1197]) -Greedy action tensor([ 0.3047, -0.3510, 0.0047, 0.9227]) tensor([0.2430, 0.1261, 0.1800, 0.4508]) -Greedy action tensor([ 0.2904, -1.1609, 0.5219, -0.2032]) tensor([0.3220, 0.0754, 0.4059, 0.1966]) -Greedy action tensor([-0.0546, -0.3565, -0.4286, -0.4601]) tensor([0.3232, 0.2390, 0.2224, 0.2155]) -Greedy action tensor([ 0.9902, -0.3682, -0.1999, 0.1223]) tensor([0.5048, 0.1298, 0.1536, 0.2119]) -Greedy action tensor([-0.4053, -0.9390, -0.0841, -0.2769]) tensor([0.2438, 0.1430, 0.3361, 0.2772]) -Greedy action tensor([ 0.8491, -0.6781, 1.0589, 0.3627]) tensor([0.3262, 0.0708, 0.4024, 0.2006]) -Greedy action tensor([-0.2723, -1.0660, 0.5066, -1.0654]) tensor([0.2449, 0.1107, 0.5336, 0.1108]) -Greedy action tensor([-0.1064, -0.7849, -0.4651, 0.1438]) tensor([0.2865, 0.1454, 0.2001, 0.3680]) -Greedy action tensor([-0.1641, 0.2444, -0.2151, -0.9277]) tensor([0.2551, 0.3837, 0.2424, 0.1189]) -Greedy action tensor([-0.1947, -0.3658, 0.6312, -1.3043]) tensor([0.2244, 0.1891, 0.5125, 0.0740]) -Greedy action tensor([-0.9081, -0.3639, 0.2903, -1.0659]) tensor([0.1451, 0.2500, 0.4810, 0.1239]) -Greedy action tensor([-0.1919, -0.7140, -0.2210, -0.4509]) tensor([0.2997, 0.1778, 0.2911, 0.2313]) -Greedy action tensor([ 0.8385, -0.7024, 0.3697, 0.3047]) tensor([0.4121, 0.0883, 0.2579, 0.2417]) -Greedy action tensor([-0.9254, -1.3082, 0.9033, -1.4383]) tensor([0.1176, 0.0802, 0.7319, 0.0704]) -Greedy action tensor([-0.4161, -0.1630, 0.5903, -0.6194]) tensor([0.1712, 0.2206, 0.4685, 0.1397]) -Greedy action tensor([-0.1079, -0.3395, 0.0995, -0.9124]) tensor([0.2881, 0.2285, 0.3545, 0.1289]) -Greedy action tensor([-1.3174, -0.3582, 0.0363, -1.4287]) tensor([0.1194, 0.3116, 0.4622, 0.1068]) -Greedy action tensor([-1.3317, 0.1149, 0.1723, -1.8626]) tensor([0.0967, 0.4110, 0.4353, 0.0569]) -Greedy action tensor([ 0.0353, -0.5981, -0.5287, -0.2414]) tensor([0.3499, 0.1857, 0.1991, 0.2653]) -Greedy action tensor([ 0.6710, -0.1682, 0.7474, -0.3588]) tensor([0.3486, 0.1506, 0.3763, 0.1245]) -Greedy action tensor([ 0.4757, -0.0429, -0.4217, -0.0858]) tensor([0.3886, 0.2314, 0.1584, 0.2216]) -Greedy action tensor([-1.1019, -0.3033, -1.2366, -0.3809]) tensor([0.1625, 0.3612, 0.1420, 0.3342]) -Greedy action tensor([ 0.3839, -1.1396, 1.2394, -0.3954]) tensor([0.2482, 0.0541, 0.5839, 0.1139]) -Greedy action tensor([ 0.9727, -0.2556, -0.5558, 0.8147]) tensor([0.4231, 0.1239, 0.0918, 0.3613]) -Greedy action tensor([-0.1224, 0.0533, 0.9627, -1.0294]) tensor([0.1800, 0.2146, 0.5327, 0.0727]) -Greedy action tensor([-0.8099, -0.9840, 1.4509, -0.0042]) tensor([0.0732, 0.0615, 0.7016, 0.1637]) -Greedy action tensor([-0.4407, -1.2748, -0.2973, -0.3749]) tensor([0.2735, 0.1188, 0.3157, 0.2921]) -Greedy action tensor([-0.8456, -0.9530, 0.8171, -0.8711]) tensor([0.1227, 0.1102, 0.6473, 0.1197]) -Greedy action tensor([ 0.2415, -1.9494, 0.7223, -0.0980]) tensor([0.2906, 0.0325, 0.4700, 0.2069]) -Greedy action tensor([ 0.8055, 0.0475, 0.6977, -0.2117]) tensor([0.3666, 0.1718, 0.3291, 0.1326]) -Greedy action tensor([-0.2956, -0.1261, 0.2284, -0.8146]) tensor([0.2238, 0.2651, 0.3779, 0.1332]) -Greedy action tensor([ 0.0229, -1.4393, -0.0981, -0.0114]) tensor([0.3243, 0.0751, 0.2873, 0.3133]) -Greedy action tensor([ 1.3198, 0.2628, 0.2865, -0.0104]) tensor([0.5082, 0.1766, 0.1808, 0.1344]) -Greedy action tensor([-1.0220, 0.5359, -1.4088, -0.2513]) tensor([0.1164, 0.5529, 0.0791, 0.2516]) -Greedy action tensor([ 0.3298, 0.0494, 0.2430, -0.2761]) tensor([0.3108, 0.2348, 0.2849, 0.1695]) -Greedy action tensor([ 0.0881, 0.0689, 0.1609, -0.5496]) tensor([0.2789, 0.2736, 0.3000, 0.1474]) -Greedy action tensor([-0.0811, 0.0597, 1.1055, -0.9959]) tensor([0.1716, 0.1975, 0.5621, 0.0687]) -Greedy action tensor([ 0.5364, -0.1036, 0.2672, -0.4996]) tensor([0.3779, 0.1993, 0.2887, 0.1341]) -Greedy action tensor([ 1.3375, -0.9038, 0.9823, 0.2753]) tensor([0.4645, 0.0494, 0.3256, 0.1606]) -Greedy action tensor([ 0.1247, -0.0099, -0.0614, -0.1216]) tensor([0.2869, 0.2507, 0.2381, 0.2242]) -Greedy action tensor([ 0.3005, -1.6396, -0.3832, -0.0308]) tensor([0.4226, 0.0607, 0.2133, 0.3034]) -Greedy action tensor([-0.8915, -0.4559, -0.0816, -0.2001]) tensor([0.1473, 0.2277, 0.3310, 0.2940]) -Greedy action tensor([ 1.0744, -0.6373, -0.0326, 0.3274]) tensor([0.5038, 0.0910, 0.1665, 0.2387]) -Greedy action tensor([ 0.2164, -1.2222, 0.7396, -0.2850]) tensor([0.2833, 0.0672, 0.4780, 0.1716]) -Greedy action tensor([ 0.3780, -1.1236, -0.2900, 0.7714]) tensor([0.3108, 0.0692, 0.1594, 0.4606]) -Greedy action tensor([ 0.9580, 0.8093, 0.1075, -0.3658]) tensor([0.3914, 0.3373, 0.1672, 0.1042]) -Greedy action tensor([-0.1053, -0.4173, -0.1343, -0.7953]) tensor([0.3120, 0.2284, 0.3031, 0.1565]) -Greedy action tensor([ 1.3157, -0.5094, 0.9384, 0.7661]) tensor([0.4125, 0.0665, 0.2829, 0.2381]) -Greedy action tensor([0.2625, 0.0113, 0.1699, 0.9271]) tensor([0.2158, 0.1679, 0.1968, 0.4195]) -Greedy action tensor([-0.0167, -1.9082, 0.2595, -0.7861]) tensor([0.3410, 0.0514, 0.4495, 0.1580]) -Greedy action tensor([-0.9914, 0.0514, 0.3922, -0.4690]) tensor([0.1051, 0.2983, 0.4194, 0.1772]) -Greedy action tensor([ 0.2904, -1.2756, 0.2542, 0.5101]) tensor([0.2925, 0.0611, 0.2821, 0.3643]) -Greedy action tensor([-0.6958, -0.5160, 0.2198, -1.3837]) tensor([0.1924, 0.2303, 0.4806, 0.0967]) -Greedy action tensor([-0.1637, 0.3453, 0.1428, -0.4430]) tensor([0.2093, 0.3481, 0.2843, 0.1583]) -Greedy action tensor([ 0.2618, -2.0562, 0.1783, 0.0490]) tensor([0.3538, 0.0348, 0.3254, 0.2860]) -Greedy action tensor([-1.7871, -1.1188, 0.4694, -0.9787]) tensor([0.0678, 0.1323, 0.6477, 0.1522]) -Greedy action tensor([ 0.2556, -0.2188, -0.9681, 0.5929]) tensor([0.3014, 0.1876, 0.0887, 0.4224]) -Greedy action tensor([ 1.4202, -0.3775, -0.4735, 0.3847]) tensor([0.5983, 0.0991, 0.0901, 0.2125]) -Greedy action tensor([-0.2283, -0.6469, -0.0432, -0.7420]) tensor([0.2890, 0.1902, 0.3478, 0.1729]) -Greedy action tensor([ 0.6377, -2.6572, -0.1437, -0.1152]) tensor([0.5087, 0.0189, 0.2329, 0.2396]) -Greedy action tensor([-1.0659, -1.6937, 0.5603, -0.7587]) tensor([0.1254, 0.0669, 0.6373, 0.1704]) -Greedy action tensor([ 1.2323, -0.6708, 0.0729, 0.9634]) tensor([0.4490, 0.0670, 0.1408, 0.3432]) -Greedy action tensor([ 0.5273, -0.2981, -0.0709, -0.2815]) tensor([0.4110, 0.1800, 0.2260, 0.1830]) -Greedy action tensor([ 0.9145, -0.7917, 0.6923, 0.6185]) tensor([0.3668, 0.0666, 0.2937, 0.2728]) -Greedy action tensor([-0.2015, -0.1914, -0.1951, -0.4884]) tensor([0.2655, 0.2681, 0.2672, 0.1992]) -Greedy action tensor([ 0.5789, -0.0018, 0.5961, -0.0519]) tensor([0.3216, 0.1800, 0.3272, 0.1712]) -Greedy action tensor([0.8898, 0.1179, 0.8523, 0.0669]) tensor([0.3491, 0.1613, 0.3363, 0.1533]) -Greedy action tensor([ 5.3013e-01, -1.2350e+00, -4.1828e-03, -1.6731e-04]) tensor([0.4263, 0.0730, 0.2499, 0.2509]) -Greedy action tensor([-0.1481, -0.9760, -0.4126, 1.3631]) tensor([0.1484, 0.0649, 0.1139, 0.6727]) -Greedy action tensor([-0.2482, -1.5188, -0.1808, -0.5020]) tensor([0.3199, 0.0898, 0.3422, 0.2482]) -Greedy action tensor([ 0.7348, -0.9652, -0.3820, 0.6738]) tensor([0.4080, 0.0745, 0.1336, 0.3839]) -Greedy action tensor([ 0.7222, -0.4892, 0.1289, -0.2402]) tensor([0.4480, 0.1334, 0.2475, 0.1711]) -Greedy action tensor([ 0.9391, -0.9351, 0.1984, -0.3874]) tensor([0.5275, 0.0810, 0.2515, 0.1400]) -Greedy action tensor([ 1.1640, -0.3729, -0.1728, -0.4996]) tensor([0.5998, 0.1290, 0.1576, 0.1136]) -Greedy action tensor([ 0.9611, -0.3216, -0.0200, -0.4509]) tensor([0.5275, 0.1463, 0.1977, 0.1285]) -Greedy action tensor([ 0.5633, -0.6475, -0.1757, -0.1544]) tensor([0.4418, 0.1316, 0.2110, 0.2155]) -Greedy action tensor([ 0.9191, -0.7699, 0.1046, -0.4339]) tensor([0.5302, 0.0979, 0.2348, 0.1370]) -Greedy action tensor([ 0.6698, -0.3010, 0.0505, -0.1762]) tensor([0.4262, 0.1614, 0.2294, 0.1829]) -Greedy action tensor([ 0.5267, 0.2349, -0.0545, 0.0955]) tensor([0.3383, 0.2527, 0.1892, 0.2198]) -Greedy action tensor([ 0.7678, -0.3112, -0.1599, -0.0751]) tensor([0.4617, 0.1570, 0.1826, 0.1987]) -Greedy action tensor([ 0.8865, -0.4305, -0.2725, -0.3489]) tensor([0.5341, 0.1431, 0.1676, 0.1553]) -Greedy action tensor([ 1.2929, -0.9573, -0.0521, -0.6782]) tensor([0.6643, 0.0700, 0.1731, 0.0925]) -Greedy action tensor([ 0.3203, -0.0274, -0.0976, -0.1075]) tensor([0.3315, 0.2341, 0.2183, 0.2161]) -Greedy action tensor([ 0.8209, -0.3753, 0.0107, -0.1556]) tensor([0.4709, 0.1424, 0.2094, 0.1773]) -Greedy action tensor([ 0.7743, -0.2605, -0.0927, -0.1378]) tensor([0.4593, 0.1632, 0.1930, 0.1845]) -Greedy action tensor([ 0.7339, 0.0819, -0.0079, 0.0574]) tensor([0.3991, 0.2079, 0.1901, 0.2029]) -Greedy action tensor([ 0.9998, -0.6835, 0.0952, -0.4205]) tensor([0.5458, 0.1014, 0.2209, 0.1319]) -Greedy action tensor([ 0.4200, -0.0827, -0.1018, -0.3080]) tensor([0.3730, 0.2256, 0.2213, 0.1801]) -Greedy action tensor([ 0.4449, -0.0852, 0.0486, -0.0699]) tensor([0.3498, 0.2059, 0.2353, 0.2090]) -Greedy action tensor([ 0.9192, -0.4130, -0.1221, -0.6290]) tensor([0.5466, 0.1442, 0.1929, 0.1162]) -Greedy action tensor([ 0.8121, -0.3962, -0.0650, -0.2330]) tensor([0.4840, 0.1446, 0.2013, 0.1702]) -Greedy action tensor([ 0.7312, -0.5737, -0.0247, -0.3772]) tensor([0.4829, 0.1310, 0.2268, 0.1594]) -Greedy action tensor([ 0.5294, -0.4913, -0.0882, -0.1854]) tensor([0.4186, 0.1508, 0.2257, 0.2048]) -Greedy action tensor([ 0.3043, -0.0871, 0.0209, -0.0455]) tensor([0.3191, 0.2157, 0.2403, 0.2249]) -Greedy action tensor([ 0.8386, -0.4273, -0.0592, -0.3055]) tensor([0.4980, 0.1404, 0.2029, 0.1586]) -Greedy action tensor([ 0.8394, -0.6801, -0.0357, -0.3766]) tensor([0.5176, 0.1133, 0.2157, 0.1534]) -Greedy action tensor([ 0.9816, -0.4942, 0.1436, -0.2346]) tensor([0.5108, 0.1168, 0.2210, 0.1514]) -Greedy action tensor([ 1.0321, -0.3157, -0.0384, -0.1357]) tensor([0.5225, 0.1358, 0.1792, 0.1625]) -Greedy action tensor([ 0.9863, -0.6819, -0.0962, -0.1601]) tensor([0.5420, 0.1022, 0.1836, 0.1722]) -Greedy action tensor([ 1.3066, -0.6187, -0.0145, -0.7639]) tensor([0.6499, 0.0948, 0.1734, 0.0820]) -Greedy action tensor([ 0.5808, -0.3617, -0.1667, -0.2043]) tensor([0.4312, 0.1680, 0.2042, 0.1966]) -Greedy action tensor([ 0.8153, -0.5918, -0.0692, -0.2926]) tensor([0.5030, 0.1232, 0.2077, 0.1661]) -Greedy action tensor([ 1.0888, -0.7024, 0.1409, -0.4145]) tensor([0.5628, 0.0939, 0.2181, 0.1252]) -Greedy action tensor([ 1.1463, -0.6044, -0.1808, -0.2376]) tensor([0.5919, 0.1028, 0.1570, 0.1483]) -Greedy action tensor([ 1.0017, -0.4926, -0.1122, -0.3160]) tensor([0.5493, 0.1233, 0.1803, 0.1471]) -Greedy action tensor([ 0.2794, 0.0473, -0.2644, -0.2070]) tensor([0.3347, 0.2653, 0.1943, 0.2057]) -Greedy action tensor([ 0.7524, -0.3592, -0.1109, -0.2777]) tensor([0.4744, 0.1561, 0.2001, 0.1694]) -Greedy action tensor([ 0.6556, -0.3183, -0.0319, -0.1765]) tensor([0.4318, 0.1631, 0.2172, 0.1879]) -Greedy action tensor([ 0.6049, -0.0894, 0.1150, -0.1837]) tensor([0.3896, 0.1946, 0.2387, 0.1771]) -Greedy action tensor([ 0.9250, -0.7503, 0.1202, -0.4627]) tensor([0.5308, 0.0994, 0.2373, 0.1325]) -Greedy action tensor([ 0.6331, -0.3767, -0.1827, -0.1429]) tensor([0.4411, 0.1607, 0.1951, 0.2030]) -Greedy action tensor([0.9480, 0.2606, 0.1077, 0.0738]) tensor([0.4252, 0.2138, 0.1835, 0.1774]) -Greedy action tensor([ 0.9463, -0.7270, 0.1129, -0.2019]) tensor([0.5156, 0.0967, 0.2241, 0.1636]) -Greedy action tensor([ 0.7567, -0.5487, 0.0384, -0.2882]) tensor([0.4739, 0.1284, 0.2310, 0.1667]) -Greedy action tensor([ 0.4928, -0.1264, -0.0508, -0.0317]) tensor([0.3689, 0.1986, 0.2142, 0.2183]) -Greedy action tensor([ 1.0413, -0.6163, -0.1356, -0.4130]) tensor([0.5772, 0.1100, 0.1779, 0.1348]) -Greedy action tensor([ 0.7644, -0.4050, -0.1350, -0.3009]) tensor([0.4850, 0.1506, 0.1973, 0.1671]) -Greedy action tensor([ 1.1482, -1.1636, 0.0739, -0.7473]) tensor([0.6286, 0.0623, 0.2147, 0.0944]) -Greedy action tensor([ 0.0717, -0.1339, 0.1230, 0.2347]) tensor([0.2473, 0.2013, 0.2603, 0.2911]) -Greedy action tensor([ 1.3026, -0.9204, -0.0378, -0.7374]) tensor([0.6666, 0.0722, 0.1745, 0.0867]) -Greedy action tensor([ 0.7991, -0.2519, 0.0888, -0.2865]) tensor([0.4590, 0.1604, 0.2256, 0.1550]) -Greedy action tensor([ 1.1017, -0.7634, 0.1307, -0.8095]) tensor([0.5947, 0.0921, 0.2252, 0.0880]) -Greedy action tensor([ 0.8784, -0.2593, -0.0291, -0.2321]) tensor([0.4870, 0.1561, 0.1965, 0.1604]) -Greedy action tensor([ 0.9311, -0.3496, -0.0066, -0.3107]) tensor([0.5107, 0.1419, 0.1999, 0.1475]) -Greedy action tensor([ 0.6354, -0.5033, 0.0624, -0.3318]) tensor([0.4417, 0.1414, 0.2490, 0.1679]) -Greedy action tensor([0.5935, 0.0618, 0.0064, 0.0905]) tensor([0.3639, 0.2138, 0.2023, 0.2200]) -Greedy action tensor([ 0.4500, -0.2728, -0.1809, -0.1703]) tensor([0.3913, 0.1900, 0.2082, 0.2105]) -Greedy action tensor([ 1.0363, -0.9667, 0.0541, -0.5570]) tensor([0.5839, 0.0788, 0.2186, 0.1187]) -Greedy action tensor([ 0.7396, -0.7891, -0.1862, -0.2220]) tensor([0.5012, 0.1087, 0.1986, 0.1916]) -Greedy action tensor([ 0.7190, -0.3198, 0.0233, -0.4173]) tensor([0.4601, 0.1628, 0.2294, 0.1477]) -Greedy action tensor([ 1.0820, -0.6529, -0.1028, -0.5518]) tensor([0.5961, 0.1052, 0.1823, 0.1164]) -Greedy action tensor([ 1.2279, -0.5051, -0.2573, -0.6389]) tensor([0.6419, 0.1135, 0.1454, 0.0993]) -Greedy action tensor([ 0.7000, -0.2529, 0.0995, -0.0150]) tensor([0.4126, 0.1591, 0.2264, 0.2019]) -Greedy action tensor([ 0.7274, -0.8304, 0.0950, -0.3580]) tensor([0.4808, 0.1013, 0.2555, 0.1624]) -Greedy action tensor([ 0.6770, -0.2614, 0.0953, -0.2175]) tensor([0.4239, 0.1659, 0.2369, 0.1733]) -Greedy action tensor([ 0.4663, -0.0936, -0.0697, -0.1147]) tensor([0.3682, 0.2104, 0.2154, 0.2060]) -Greedy action tensor([ 0.5024, -0.2100, -0.0571, -0.0520]) tensor([0.3793, 0.1860, 0.2168, 0.2179]) -Greedy action tensor([ 0.7404, -0.2889, -0.1632, -0.1373]) tensor([0.4591, 0.1640, 0.1860, 0.1909]) -Greedy action tensor([ 1.1294, -0.5227, -0.1644, -0.6505]) tensor([0.6118, 0.1173, 0.1678, 0.1032]) -Greedy action tensor([ 0.9583, -0.4803, -0.1618, -0.2491]) tensor([0.5369, 0.1274, 0.1752, 0.1605]) -Greedy action tensor([ 0.4087, -0.0517, -0.1411, 0.0239]) tensor([0.3462, 0.2184, 0.1998, 0.2356]) -Greedy action tensor([ 1.1081, -0.5637, -0.2121, -0.3593]) tensor([0.5933, 0.1115, 0.1585, 0.1368]) -Greedy action tensor([ 0.9114, -0.6856, -0.0162, -0.5150]) tensor([0.5440, 0.1102, 0.2151, 0.1307]) -Greedy action tensor([ 1.1312, -0.8169, -0.0504, -0.6040]) tensor([0.6151, 0.0877, 0.1887, 0.1085]) -Greedy action tensor([ 1.3987, -0.7009, -0.0518, -0.5599]) tensor([0.6675, 0.0818, 0.1565, 0.0942]) -Greedy action tensor([ 1.3152, -0.4564, -0.0292, -0.4915]) tensor([0.6270, 0.1066, 0.1634, 0.1029]) -Greedy action tensor([ 0.9618, -0.2933, -0.1172, -0.1827]) tensor([0.5146, 0.1467, 0.1749, 0.1638]) -Greedy action tensor([ 0.7835, -0.2360, -0.0844, -0.1238]) tensor([0.4578, 0.1652, 0.1922, 0.1848]) -Greedy action tensor([ 1.1201, -0.9397, -0.0835, -0.3797]) tensor([0.6058, 0.0772, 0.1818, 0.1352]) -Greedy action tensor([ 0.7898, -0.6253, -0.0229, -0.3911]) tensor([0.5016, 0.1218, 0.2225, 0.1540]) -Greedy action tensor([ 0.8389, -0.6544, 0.0766, -0.4139]) tensor([0.5058, 0.1136, 0.2360, 0.1445]) -Greedy action tensor([ 0.7893, -0.3868, -0.0892, -0.2560]) tensor([0.4818, 0.1486, 0.2002, 0.1694]) -Greedy action tensor([ 1.1040, -0.7109, -0.1501, -0.3901]) tensor([0.5979, 0.0974, 0.1706, 0.1342]) -Greedy action tensor([ 1.3141, -0.6379, -0.2904, 0.2815]) tensor([0.5886, 0.0836, 0.1183, 0.2096]) -Greedy action tensor([ 1.1018, -0.8897, -0.2286, 0.1680]) tensor([0.5574, 0.0761, 0.1474, 0.2191]) -Greedy action tensor([ 1.7312, -0.5883, -0.7564, 0.3722]) tensor([0.6952, 0.0684, 0.0578, 0.1786]) -Greedy action tensor([ 1.1644, -0.5639, 0.0977, 0.0964]) tensor([0.5361, 0.0952, 0.1845, 0.1842]) -Greedy action tensor([ 1.2333, -0.4905, -0.0810, 0.4416]) tensor([0.5263, 0.0939, 0.1414, 0.2384]) -Greedy action tensor([ 1.6749, 0.1752, -0.3057, 0.6314]) tensor([0.5836, 0.1303, 0.0805, 0.2056]) -Greedy action tensor([ 1.5243, -0.6208, -0.9632, 0.3033]) tensor([0.6689, 0.0783, 0.0556, 0.1973]) -Greedy action tensor([ 1.5530, -0.9103, -0.5098, 0.3748]) tensor([0.6579, 0.0560, 0.0836, 0.2025]) -Greedy action tensor([ 1.6586, -0.8982, -0.0897, 0.2600]) tensor([0.6673, 0.0518, 0.1162, 0.1648]) -Greedy action tensor([ 1.6511, -0.6323, -0.2389, 0.6345]) tensor([0.6192, 0.0631, 0.0936, 0.2241]) -Greedy action tensor([ 1.1730, -0.6642, -0.2278, -0.0204]) tensor([0.5852, 0.0932, 0.1442, 0.1774]) -Greedy action tensor([ 2.0977, -1.2365, 0.0744, 0.4817]) tensor([0.7318, 0.0261, 0.0968, 0.1454]) -Greedy action tensor([ 1.9026, -0.2735, -0.3095, 0.2023]) tensor([0.7115, 0.0807, 0.0779, 0.1299]) -Greedy action tensor([ 1.3467, -0.4579, -0.1970, 0.0837]) tensor([0.6021, 0.0991, 0.1286, 0.1703]) -Greedy action tensor([ 1.4561, 0.3348, -0.7045, 0.4501]) tensor([0.5535, 0.1804, 0.0638, 0.2024]) -Greedy action tensor([ 1.9204, -1.3134, -0.0422, 0.7955]) tensor([0.6646, 0.0262, 0.0934, 0.2158]) -Greedy action tensor([ 1.9513, -0.7038, -0.8328, 0.4302]) tensor([0.7404, 0.0520, 0.0457, 0.1618]) -Greedy action tensor([ 1.1369, -0.4348, -0.8093, 0.2734]) tensor([0.5643, 0.1172, 0.0806, 0.2380]) -Greedy action tensor([ 1.3933, -0.4957, -0.3064, 0.1996]) tensor([0.6108, 0.0924, 0.1116, 0.1851]) -Greedy action tensor([ 1.1856, -0.1421, -0.3730, 0.0789]) tensor([0.5537, 0.1468, 0.1165, 0.1831]) -Greedy action tensor([ 1.5428, -0.3572, -0.5868, 0.3954]) tensor([0.6306, 0.0943, 0.0750, 0.2002]) -Greedy action tensor([ 1.4215, -0.5448, -0.2430, 0.2528]) tensor([0.6097, 0.0853, 0.1154, 0.1895]) -Greedy action tensor([ 1.3240, 0.3161, -0.2421, -0.1976]) tensor([0.5580, 0.2036, 0.1165, 0.1218]) -Greedy action tensor([ 1.2740, -0.4600, -1.1690, 0.4208]) tensor([0.5919, 0.1045, 0.0514, 0.2522]) -Greedy action tensor([ 1.4818, 0.3394, -0.4787, 0.0320]) tensor([0.5901, 0.1883, 0.0831, 0.1385]) -Greedy action tensor([ 1.5178, -0.5418, -0.4779, -0.0473]) tensor([0.6791, 0.0866, 0.0923, 0.1420]) -Greedy action tensor([ 1.7243, -0.6560, -0.7339, 0.1199]) tensor([0.7251, 0.0671, 0.0621, 0.1458]) -Greedy action tensor([ 1.9279, -1.0756, -0.6010, 0.3821]) tensor([0.7449, 0.0370, 0.0594, 0.1588]) -Greedy action tensor([ 1.9729, -0.5982, -0.2427, 0.1723]) tensor([0.7403, 0.0566, 0.0808, 0.1223]) -Greedy action tensor([ 1.7023, -0.8288, -0.2518, 0.6815]) tensor([0.6323, 0.0503, 0.0896, 0.2278]) -Greedy action tensor([ 2.2694, -1.3445, -0.2658, 0.4150]) tensor([0.7919, 0.0213, 0.0628, 0.1240]) -Greedy action tensor([ 1.3956, -0.1770, -0.3074, 0.6207]) tensor([0.5404, 0.1121, 0.0984, 0.2490]) -Greedy action tensor([ 1.9321, -0.9926, -0.5235, 0.3803]) tensor([0.7400, 0.0397, 0.0635, 0.1568]) -Greedy action tensor([ 1.0268, -0.5930, -0.2024, -0.0773]) tensor([0.5489, 0.1086, 0.1605, 0.1820]) -Greedy action tensor([ 1.9489, -0.6817, -0.3262, 1.0523]) tensor([0.6318, 0.0455, 0.0649, 0.2578]) -Greedy action tensor([ 1.3293, -0.7644, -0.2166, 0.4985]) tensor([0.5643, 0.0695, 0.1203, 0.2459]) -Greedy action tensor([ 1.5869, -0.7374, -0.4959, 0.1646]) tensor([0.6832, 0.0669, 0.0851, 0.1648]) -Greedy action tensor([ 1.3309, -0.1979, -0.7054, 0.2251]) tensor([0.5959, 0.1292, 0.0778, 0.1972]) -Greedy action tensor([ 0.6756, -0.1067, -0.3088, 0.5875]) tensor([0.3641, 0.1665, 0.1360, 0.3334]) -Greedy action tensor([ 1.7078, -0.6148, -0.9557, 0.6017]) tensor([0.6673, 0.0654, 0.0465, 0.2208]) -Greedy action tensor([ 1.7256, -0.5031, -0.5137, 0.4174]) tensor([0.6736, 0.0725, 0.0718, 0.1821]) -Greedy action tensor([ 1.5779, -0.2673, -0.1574, -0.1272]) tensor([0.6596, 0.1042, 0.1163, 0.1199]) -Greedy action tensor([ 1.1779e+00, -5.8681e-01, -5.4767e-01, -1.6958e-04]) tensor([0.6034, 0.1033, 0.1075, 0.1858]) -Greedy action tensor([ 1.9853, -0.4860, -0.3292, 0.8232]) tensor([0.6684, 0.0565, 0.0660, 0.2091]) -Greedy action tensor([ 1.7825, -0.6439, -0.5651, 0.4826]) tensor([0.6866, 0.0607, 0.0656, 0.1871]) -Greedy action tensor([ 1.5247, 0.2769, -0.8919, 0.3077]) tensor([0.5979, 0.1717, 0.0534, 0.1771]) -Greedy action tensor([ 0.8917, -0.2708, -0.9043, 0.3842]) tensor([0.4806, 0.1503, 0.0798, 0.2893]) -Greedy action tensor([ 1.1588, -0.3383, -0.5905, -0.0129]) tensor([0.5856, 0.1311, 0.1018, 0.1815]) -Greedy action tensor([ 2.0010, -0.9611, -0.3776, 0.2028]) tensor([0.7634, 0.0395, 0.0708, 0.1264]) -Greedy action tensor([ 1.0736, 0.1936, -0.7215, 0.2018]) tensor([0.5002, 0.2075, 0.0831, 0.2092]) -Greedy action tensor([ 1.2065, -0.1274, -0.2274, -0.1269]) tensor([0.5664, 0.1492, 0.1350, 0.1493]) -Greedy action tensor([ 2.1668, 0.6067, -0.0069, 0.2734]) tensor([0.6782, 0.1425, 0.0771, 0.1021]) -Greedy action tensor([ 1.0452, -0.3236, -0.0933, 0.1832]) tensor([0.5007, 0.1274, 0.1604, 0.2115]) -Greedy action tensor([ 2.2669, -0.6488, -1.1124, 0.2516]) tensor([0.8187, 0.0443, 0.0279, 0.1091]) -Greedy action tensor([ 1.4663, -0.4558, -0.6537, 0.4988]) tensor([0.6074, 0.0889, 0.0729, 0.2308]) -Greedy action tensor([ 1.1784, -0.3774, -0.3025, 0.5095]) tensor([0.5126, 0.1082, 0.1166, 0.2626]) -Greedy action tensor([ 1.8747, -0.6895, -0.3063, 0.3229]) tensor([0.7134, 0.0549, 0.0806, 0.1511]) -Greedy action tensor([ 1.1965, -0.5154, -0.4076, 0.5453]) tensor([0.5255, 0.0949, 0.1057, 0.2740]) -Greedy action tensor([ 1.2402, -0.4740, -0.8718, 0.3336]) tensor([0.5865, 0.1056, 0.0710, 0.2369]) -Greedy action tensor([ 1.7245, -0.7214, -0.6463, 0.3022]) tensor([0.7036, 0.0610, 0.0657, 0.1697]) -Greedy action tensor([ 0.9486, -0.3729, -0.0169, 0.2728]) tensor([0.4638, 0.1237, 0.1766, 0.2359]) -Greedy action tensor([ 1.0046, 0.0689, -0.3123, 0.1952]) tensor([0.4750, 0.1863, 0.1273, 0.2114]) -Greedy action tensor([ 1.0133, -0.0244, 0.1545, -0.1154]) tensor([0.4759, 0.1686, 0.2016, 0.1539]) -Greedy action tensor([ 1.2350, -0.5246, -0.2355, 0.2914]) tensor([0.5583, 0.0961, 0.1283, 0.2173]) -Greedy action tensor([ 1.2775, -1.0197, -0.1300, 0.2694]) tensor([0.5847, 0.0588, 0.1431, 0.2134]) -Greedy action tensor([ 2.0864, -0.2049, -0.5460, -0.0605]) tensor([0.7753, 0.0784, 0.0557, 0.0906]) -Greedy action tensor([ 2.4189, -1.4031, -0.3371, 0.4148]) tensor([0.8195, 0.0179, 0.0521, 0.1105]) -Greedy action tensor([ 2.2494, -0.7763, -0.6489, 0.0826]) tensor([0.8209, 0.0398, 0.0452, 0.0940]) -Greedy action tensor([ 1.0998, -0.3667, -0.7079, 0.4142]) tensor([0.5267, 0.1215, 0.0864, 0.2654]) -Greedy action tensor([ 1.5666, -0.5558, -1.0629, 0.5668]) tensor([0.6411, 0.0768, 0.0462, 0.2359]) -Greedy action tensor([ 1.4260, 0.2328, -0.8949, 0.3371]) tensor([0.5754, 0.1745, 0.0565, 0.1937]) -Greedy action tensor([ 1.3360, -0.4615, -0.7285, 0.0861]) tensor([0.6333, 0.1049, 0.0803, 0.1814]) -Greedy action tensor([ 1.7353, -0.1401, -0.6019, -0.1360]) tensor([0.7124, 0.1092, 0.0688, 0.1096]) -Greedy action tensor([ 1.4151, -0.6226, -0.2123, 0.3960]) tensor([0.5925, 0.0772, 0.1164, 0.2139]) -Greedy action tensor([ 1.6792, -0.8345, -0.7247, 0.4571]) tensor([0.6822, 0.0552, 0.0616, 0.2010]) -Greedy action tensor([ 1.3492, 0.3183, -0.7839, 0.4363]) tensor([0.5329, 0.1901, 0.0631, 0.2139]) -Greedy action tensor([ 1.6885, -1.1260, -0.3527, 0.7068]) tensor([0.6392, 0.0383, 0.0830, 0.2395]) -Greedy action tensor([ 1.7272, -0.6548, 0.0113, 0.3025]) tensor([0.6610, 0.0611, 0.1189, 0.1590]) -Greedy action tensor([ 1.2620, -0.4055, -0.9377, 0.2685]) tensor([0.5989, 0.1130, 0.0664, 0.2218]) -Greedy action tensor([ 1.1605, -0.6549, -0.2352, 0.1721]) tensor([0.5610, 0.0913, 0.1389, 0.2088]) -Greedy action tensor([ 1.5061, -0.7521, -0.3261, 0.5414]) tensor([0.6076, 0.0635, 0.0973, 0.2316]) -Greedy action tensor([-1.0806, -1.0182, -0.7446, 0.0439]) tensor([0.1528, 0.1627, 0.2139, 0.4706]) -Greedy action tensor([-0.4362, 0.3631, -0.3334, -0.3429]) tensor([0.1842, 0.4096, 0.2041, 0.2022]) -Greedy action tensor([ 0.2830, 0.2460, -0.6945, 0.1717]) tensor([0.3092, 0.2979, 0.1163, 0.2766]) -Greedy action tensor([-0.3997, -1.8017, -0.1744, 0.6481]) tensor([0.1869, 0.0460, 0.2341, 0.5329]) -Greedy action tensor([-0.0276, -0.5347, -0.0673, -0.5878]) tensor([0.3190, 0.1921, 0.3066, 0.1822]) -Greedy action tensor([ 0.6688, -0.2208, -0.4076, -0.4466]) tensor([0.4809, 0.1976, 0.1639, 0.1576]) -Greedy action tensor([ 1.0508, -0.4296, -0.2681, 0.9906]) tensor([0.4104, 0.0934, 0.1098, 0.3864]) -Greedy action tensor([-0.4302, -0.9462, -0.0495, -0.7598]) tensor([0.2646, 0.1579, 0.3872, 0.1903]) -Greedy action tensor([-0.4639, -0.7589, -0.8812, 0.0686]) tensor([0.2435, 0.1813, 0.1604, 0.4147]) -Greedy action tensor([ 0.3291, -0.1960, -0.1872, -0.2012]) tensor([0.3601, 0.2130, 0.2149, 0.2119]) -Greedy action tensor([ 0.5992, -0.3277, 0.2342, -0.3201]) tensor([0.4018, 0.1590, 0.2789, 0.1602]) -Greedy action tensor([-0.3458, -0.7079, -0.1125, -1.2067]) tensor([0.2957, 0.2059, 0.3734, 0.1250]) -Greedy action tensor([ 0.5180, -0.0235, -0.5539, 0.8167]) tensor([0.3056, 0.1778, 0.1046, 0.4120]) -Greedy action tensor([-0.9367, -1.0373, 0.6863, -0.7116]) tensor([0.1216, 0.1099, 0.6162, 0.1523]) -Greedy action tensor([-0.1236, 0.5116, -0.0712, -0.4955]) tensor([0.2159, 0.4076, 0.2276, 0.1489]) -Greedy action tensor([-0.8672, 0.3007, 0.3902, -0.3250]) tensor([0.1058, 0.3402, 0.3720, 0.1820]) -Greedy action tensor([ 0.4571, -0.4983, 0.6514, 0.1822]) tensor([0.2977, 0.1145, 0.3616, 0.2262]) -Greedy action tensor([ 0.4052, -0.7219, -0.9598, 0.2098]) tensor([0.4164, 0.1349, 0.1063, 0.3424]) -Greedy action tensor([ 0.0985, -0.3232, -0.7532, 0.2796]) tensor([0.3048, 0.1999, 0.1300, 0.3653]) -Greedy action tensor([ 0.1629, -1.3927, 0.5892, -0.8825]) tensor([0.3232, 0.0682, 0.4950, 0.1136]) -Greedy action tensor([-0.7922, -0.8467, 0.5517, -0.8711]) tensor([0.1491, 0.1412, 0.5718, 0.1378]) -Greedy action tensor([ 0.5024, -0.0449, 0.6334, -0.1683]) tensor([0.3096, 0.1791, 0.3529, 0.1583]) -Greedy action tensor([-0.2639, -1.2885, -0.9055, 0.2242]) tensor([0.2845, 0.1021, 0.1498, 0.4636]) -Greedy action tensor([ 0.7931, -0.6277, 1.2564, 0.6583]) tensor([0.2699, 0.0652, 0.4290, 0.2359]) -Greedy action tensor([ 0.0368, 0.0372, -0.1403, -0.5986]) tensor([0.2969, 0.2970, 0.2487, 0.1573]) -Greedy action tensor([ 0.6124, 0.1117, -0.2988, -1.0821]) tensor([0.4562, 0.2765, 0.1834, 0.0838]) -Greedy action tensor([-0.3242, -0.3255, 0.2499, -0.1912]) tensor([0.2034, 0.2031, 0.3611, 0.2323]) -Greedy action tensor([-0.1318, 0.7042, 0.0459, -0.4295]) tensor([0.1907, 0.4400, 0.2278, 0.1416]) -Greedy action tensor([ 0.3225, -1.1596, 0.1668, -0.4869]) tensor([0.3955, 0.0899, 0.3385, 0.1761]) -Greedy action tensor([-0.0286, -1.0662, -0.7038, 0.5747]) tensor([0.2709, 0.0960, 0.1379, 0.4952]) -Greedy action tensor([ 0.5814, -0.5123, -0.8278, 0.9514]) tensor([0.3303, 0.1107, 0.0807, 0.4783]) -Greedy action tensor([ 0.3597, -1.0755, -0.1865, 0.0810]) tensor([0.3885, 0.0925, 0.2250, 0.2940]) -Greedy action tensor([ 0.0178, -0.1245, 0.2733, -0.4915]) tensor([0.2660, 0.2307, 0.3434, 0.1598]) -Greedy action tensor([ 0.5195, -1.2113, 0.6393, -0.7475]) tensor([0.3867, 0.0685, 0.4359, 0.1089]) -Greedy action tensor([-0.5371, 0.2000, 0.6249, -1.2344]) tensor([0.1474, 0.3080, 0.4712, 0.0734]) -Greedy action tensor([-2.0177, 0.5856, -0.7818, -0.4166]) tensor([0.0437, 0.5897, 0.1502, 0.2164]) -Greedy action tensor([-0.4712, 0.1706, 0.1823, -0.7030]) tensor([0.1781, 0.3384, 0.3423, 0.1412]) -Greedy action tensor([ 1.2517, -0.7515, -0.6255, 0.0926]) tensor([0.6244, 0.0842, 0.0955, 0.1959]) -Greedy action tensor([-0.5544, -0.8356, 0.0613, -0.5048]) tensor([0.2148, 0.1621, 0.3975, 0.2257]) -Greedy action tensor([ 0.6854, -0.2943, -0.1441, -0.1878]) tensor([0.4486, 0.1684, 0.1957, 0.1873]) -Greedy action tensor([-0.4261, -0.9891, -0.3213, 0.2876]) tensor([0.2118, 0.1206, 0.2352, 0.4324]) -Greedy action tensor([ 1.0947, -1.3919, -0.2211, -0.4147]) tensor([0.6359, 0.0529, 0.1706, 0.1406]) -Greedy action tensor([-0.8855, -0.2745, -0.0210, -0.6239]) tensor([0.1535, 0.2828, 0.3643, 0.1994]) -Greedy action tensor([-0.2863, 0.7006, 0.4241, -1.1977]) tensor([0.1634, 0.4384, 0.3325, 0.0657]) -Greedy action tensor([-0.1859, 0.4383, -0.1778, -0.5175]) tensor([0.2177, 0.4065, 0.2195, 0.1563]) -Greedy action tensor([ 0.2645, -0.1022, -0.0369, 0.2151]) tensor([0.2955, 0.2048, 0.2186, 0.2812]) -Greedy action tensor([ 0.2513, -1.2421, 0.5962, -0.3903]) tensor([0.3162, 0.0710, 0.4464, 0.1664]) -Greedy action tensor([ 0.2388, -0.6931, 0.3069, 0.5948]) tensor([0.2569, 0.1012, 0.2750, 0.3668]) -Greedy action tensor([ 0.5471, -0.4289, -0.0177, -0.0747]) tensor([0.4028, 0.1518, 0.2290, 0.2163]) -Greedy action tensor([-0.5716, -1.1641, -0.7087, 0.3930]) tensor([0.1981, 0.1095, 0.1727, 0.5197]) -Greedy action tensor([-0.1542, 0.0636, -0.2565, -0.0722]) tensor([0.2363, 0.2938, 0.2133, 0.2565]) -Greedy action tensor([-0.0175, -0.5848, -0.0388, -0.9280]) tensor([0.3392, 0.1923, 0.3320, 0.1365]) -Greedy action tensor([ 1.1476, -1.7036, 1.6055, 0.2170]) tensor([0.3297, 0.0191, 0.5212, 0.1300]) -Greedy action tensor([ 0.4993, -0.7797, -0.1185, 0.0645]) tensor([0.4057, 0.1129, 0.2187, 0.2627]) -Greedy action tensor([-0.0978, 0.2593, -0.4769, -0.8359]) tensor([0.2784, 0.3979, 0.1906, 0.1331]) -Greedy action tensor([-0.9079, -0.6016, -0.1880, 0.3475]) tensor([0.1262, 0.1715, 0.2593, 0.4430]) -Greedy action tensor([ 1.3414, -0.4883, 0.9381, 0.0072]) tensor([0.4780, 0.0767, 0.3194, 0.1259]) -Greedy action tensor([ 0.1713, -0.3232, -0.4875, -0.4686]) tensor([0.3767, 0.2297, 0.1949, 0.1987]) -Greedy action tensor([-0.7891, -0.3223, -0.9780, -0.0498]) tensor([0.1813, 0.2891, 0.1501, 0.3796]) -Greedy action tensor([-0.7425, -0.9845, -0.5628, -1.3479]) tensor([0.2835, 0.2225, 0.3393, 0.1547]) -Greedy action tensor([ 1.0137, -1.2731, 0.2771, 0.2170]) tensor([0.4923, 0.0500, 0.2357, 0.2219]) -Greedy action tensor([ 1.7336, -0.5450, -0.0100, 1.4481]) tensor([0.4928, 0.0505, 0.0862, 0.3705]) -Greedy action tensor([-0.5104, -0.2697, -0.0056, -0.7126]) tensor([0.2107, 0.2681, 0.3491, 0.1721]) -Greedy action tensor([ 0.8756, -0.8324, -0.2216, -0.9829]) tensor([0.5985, 0.1085, 0.1998, 0.0933]) -Greedy action tensor([ 0.7400, -0.9145, -0.3142, 0.1491]) tensor([0.4777, 0.0913, 0.1665, 0.2645]) -Greedy action tensor([ 1.0203, -1.0156, 0.4812, 0.5095]) tensor([0.4322, 0.0564, 0.2521, 0.2593]) -Greedy action tensor([ 0.0411, -1.1637, 0.5441, -0.9918]) tensor([0.3022, 0.0906, 0.4997, 0.1076]) -Greedy action tensor([-1.2747, -2.1570, 0.1272, 0.5103]) tensor([0.0874, 0.0362, 0.3553, 0.5211]) -Greedy action tensor([ 0.1056, -0.7049, -0.4594, -0.4929]) tensor([0.3902, 0.1735, 0.2218, 0.2145]) -Greedy action tensor([ 0.2552, 0.1568, -0.1746, -0.3152]) tensor([0.3203, 0.2903, 0.2084, 0.1811]) -Greedy action tensor([ 0.0178, 0.1550, -0.2799, -0.6025]) tensor([0.2918, 0.3347, 0.2166, 0.1569]) -Greedy action tensor([ 0.9082, -0.4826, -0.7971, 0.2260]) tensor([0.5165, 0.1285, 0.0939, 0.2611]) -Greedy action tensor([ 0.2251, 0.4440, -0.4351, 1.3806]) tensor([0.1684, 0.2096, 0.0870, 0.5349]) -Greedy action tensor([ 0.3632, 0.8011, -0.1381, -0.4937]) tensor([0.2794, 0.4328, 0.1692, 0.1186]) -Greedy action tensor([-0.4752, -0.5179, 0.4063, -0.3176]) tensor([0.1804, 0.1729, 0.4356, 0.2112]) -Greedy action tensor([ 0.0725, -0.7663, -0.6196, -0.0977]) tensor([0.3602, 0.1557, 0.1803, 0.3038]) -Greedy action tensor([ 0.9225, 0.0498, -0.0485, -0.1843]) tensor([0.4701, 0.1964, 0.1780, 0.1554]) -Greedy action tensor([-0.1873, -0.8913, -0.0499, -0.4037]) tensor([0.2901, 0.1435, 0.3328, 0.2336]) -Greedy action tensor([-0.5600, -0.7405, -0.3669, -0.4614]) tensor([0.2409, 0.2011, 0.2922, 0.2658]) -Greedy action tensor([-0.6190, 0.3702, -0.2725, -0.0073]) tensor([0.1440, 0.3871, 0.2036, 0.2654]) -Greedy action tensor([-0.0939, -0.9120, -0.4451, 0.3752]) tensor([0.2671, 0.1179, 0.1880, 0.4270]) -Greedy action tensor([-1.6679, -0.1762, 0.4778, -0.0719]) tensor([0.0528, 0.2348, 0.4517, 0.2607]) -Greedy action tensor([-1.6438, -0.1929, 0.4863, -0.0844]) tensor([0.0542, 0.2314, 0.4564, 0.2579]) -Greedy action tensor([-1.2442, -0.5874, 0.3541, 0.0472]) tensor([0.0869, 0.1675, 0.4295, 0.3160]) -Greedy action tensor([-1.9151, -0.4353, 0.6497, -0.1679]) tensor([0.0414, 0.1820, 0.5387, 0.2378]) -Greedy action tensor([-1.9018, -0.4005, 0.6387, -0.1623]) tensor([0.0419, 0.1880, 0.5315, 0.2386]) -Greedy action tensor([-1.8980, -0.3956, 0.6275, -0.1474]) tensor([0.0421, 0.1892, 0.5262, 0.2425]) -Greedy action tensor([-1.9052, -0.3940, 0.6376, -0.1574]) tensor([0.0417, 0.1889, 0.5300, 0.2394]) -Greedy action tensor([-1.8113, -0.4107, 0.6250, -0.1337]) tensor([0.0458, 0.1858, 0.5234, 0.2451]) -Greedy action tensor([-1.6436, -0.4202, 0.5128, -0.0907]) tensor([0.0563, 0.1913, 0.4864, 0.2660]) -Greedy action tensor([-1.2203, -0.3404, 0.2662, 0.2414]) tensor([0.0823, 0.1985, 0.3641, 0.3551]) -Greedy action tensor([-1.9264, -0.4385, 0.6592, -0.1704]) tensor([0.0408, 0.1808, 0.5419, 0.2364]) -Greedy action tensor([-1.9434, -0.4430, 0.6665, -0.1775]) tensor([0.0401, 0.1799, 0.5455, 0.2345]) -Greedy action tensor([-1.8311, -0.3235, 0.6034, -0.1394]) tensor([0.0447, 0.2020, 0.5104, 0.2429]) -Greedy action tensor([-1.9113, -0.3862, 0.6369, -0.1585]) tensor([0.0414, 0.1903, 0.5293, 0.2390]) -Greedy action tensor([-1.8948, -0.4125, 0.6535, -0.1547]) tensor([0.0419, 0.1843, 0.5353, 0.2385]) -Greedy action tensor([-1.8940, -0.4651, 0.6356, -0.1524]) tensor([0.0427, 0.1782, 0.5356, 0.2436]) -Greedy action tensor([-1.8251, -0.4312, 0.6090, -0.1247]) tensor([0.0456, 0.1839, 0.5205, 0.2499]) -Greedy action tensor([-1.9480, -0.4524, 0.6680, -0.1826]) tensor([0.0400, 0.1786, 0.5475, 0.2339]) -Greedy action tensor([-1.6115, -0.4426, 0.6209, -0.0734]) tensor([0.0550, 0.1769, 0.5123, 0.2559]) -Greedy action tensor([-1.6425, 0.3368, 0.4177, 0.1342]) tensor([0.0455, 0.3290, 0.3568, 0.2687]) -Greedy action tensor([-1.9447, -0.4497, 0.6651, -0.1805]) tensor([0.0402, 0.1791, 0.5462, 0.2345]) -Greedy action tensor([-1.7970, -0.0666, 0.6564, -0.3944]) tensor([0.0448, 0.2526, 0.5206, 0.1820]) -Greedy action tensor([-1.4540, 0.5938, 0.2940, 0.0644]) tensor([0.0525, 0.4067, 0.3013, 0.2395]) -Greedy action tensor([-1.5216, 0.1854, 0.3914, -0.2218]) tensor([0.0590, 0.3251, 0.3995, 0.2164]) -Greedy action tensor([-1.0646, -0.3579, 0.3142, 0.3426]) tensor([0.0902, 0.1829, 0.3582, 0.3686]) -Greedy action tensor([-1.9328, -0.4165, 0.6576, -0.1718]) tensor([0.0405, 0.1844, 0.5397, 0.2355]) -Greedy action tensor([-1.8176, -0.4010, 0.5959, -0.1289]) tensor([0.0461, 0.1899, 0.5147, 0.2493]) -Greedy action tensor([-1.5190, -0.1823, 0.4394, -0.0385]) tensor([0.0614, 0.2337, 0.4351, 0.2698]) -Greedy action tensor([-1.2492, -0.1178, 0.6133, 0.1606]) tensor([0.0683, 0.2118, 0.4400, 0.2798]) -Greedy action tensor([-0.9095, 0.6196, 0.1365, -0.0495]) tensor([0.0924, 0.4263, 0.2630, 0.2183]) -Greedy action tensor([-1.3083, -0.2338, 0.6135, 0.1690]) tensor([0.0660, 0.1934, 0.4512, 0.2893]) -Greedy action tensor([-1.7145, -0.1973, 0.5150, -0.1072]) tensor([0.0504, 0.2298, 0.4684, 0.2514]) -Greedy action tensor([-0.8233, -0.0192, 0.1610, -0.0836]) tensor([0.1249, 0.2791, 0.3342, 0.2617]) -Greedy action tensor([-0.9241, 0.3941, 0.1824, -0.0499]) tensor([0.0985, 0.3679, 0.2977, 0.2360]) -Greedy action tensor([-1.7990, -0.5114, 0.5923, -0.1091]) tensor([0.0477, 0.1728, 0.5211, 0.2584]) -Greedy action tensor([-1.8365, -0.4432, 0.6967, -0.0072]) tensor([0.0419, 0.1689, 0.5280, 0.2612]) -Greedy action tensor([-1.7466, -0.1663, 0.5623, -0.0626]) tensor([0.0469, 0.2279, 0.4723, 0.2528]) -Greedy action tensor([-1.9081, -0.4484, 0.6509, -0.1625]) tensor([0.0417, 0.1797, 0.5394, 0.2392]) -Greedy action tensor([-1.0789, -0.7113, 0.2520, 0.2683]) tensor([0.0993, 0.1434, 0.3756, 0.3818]) -Greedy action tensor([-1.9111, -0.3941, 0.6486, -0.1617]) tensor([0.0412, 0.1880, 0.5335, 0.2372]) -Greedy action tensor([-1.7954, -0.4511, 0.5934, -0.1322]) tensor([0.0476, 0.1825, 0.5188, 0.2511]) -Greedy action tensor([-1.2103, 0.0404, 0.3826, 0.2144]) tensor([0.0737, 0.2575, 0.3625, 0.3064]) -Greedy action tensor([-1.6798, -0.2917, 0.6038, 0.0120]) tensor([0.0494, 0.1979, 0.4846, 0.2681]) -Greedy action tensor([-1.9018, -0.3963, 0.6361, -0.1580]) tensor([0.0419, 0.1887, 0.5299, 0.2395]) -Greedy action tensor([-1.5395, -0.4826, 0.4692, -0.0016]) tensor([0.0626, 0.1800, 0.4663, 0.2912]) -Greedy action tensor([-1.5877, -0.4679, 0.5155, 0.0451]) tensor([0.0576, 0.1764, 0.4715, 0.2946]) -Greedy action tensor([-1.5278, 0.2042, 0.3946, -0.0654]) tensor([0.0562, 0.3174, 0.3840, 0.2424]) -Greedy action tensor([-1.4911, 0.4220, 0.3629, 0.1775]) tensor([0.0514, 0.3480, 0.3281, 0.2726]) -Greedy action tensor([-1.8580, -0.3932, 0.6519, -0.1156]) tensor([0.0428, 0.1854, 0.5271, 0.2447]) -Greedy action tensor([-1.9062, -0.3883, 0.6416, -0.1563]) tensor([0.0415, 0.1894, 0.5303, 0.2388]) -Greedy action tensor([-1.5117, 0.4973, 0.3177, 0.0577]) tensor([0.0513, 0.3826, 0.3197, 0.2465]) -Greedy action tensor([-1.6183, -0.5887, 0.5259, 0.0195]) tensor([0.0572, 0.1602, 0.4883, 0.2943]) -Greedy action tensor([-1.7013, -0.1288, 0.5162, -0.1042]) tensor([0.0501, 0.2416, 0.4606, 0.2477]) -Greedy action tensor([-1.9053, -0.4560, 0.6531, -0.1621]) tensor([0.0419, 0.1783, 0.5406, 0.2392]) -Greedy action tensor([-1.8870, -0.4343, 0.6351, -0.1564]) tensor([0.0428, 0.1829, 0.5329, 0.2415]) -Greedy action tensor([-1.9339, -0.4348, 0.6626, -0.1737]) tensor([0.0405, 0.1812, 0.5430, 0.2353]) -Greedy action tensor([-1.7911, -0.4176, 0.5914, -0.1142]) tensor([0.0473, 0.1869, 0.5126, 0.2532]) -Greedy action tensor([-1.8928, -0.4508, 0.6444, -0.1575]) tensor([0.0425, 0.1796, 0.5370, 0.2409]) -Greedy action tensor([-0.8999, -0.0366, 0.2541, -0.0593]) tensor([0.1129, 0.2676, 0.3579, 0.2616]) -Greedy action tensor([-1.8702, -0.4453, 0.6291, -0.1502]) tensor([0.0436, 0.1814, 0.5313, 0.2437]) -Greedy action tensor([-1.3833, 0.4822, 0.2883, -0.0541]) tensor([0.0604, 0.3901, 0.3213, 0.2282]) -Greedy action tensor([-1.8513, -0.4618, 0.6266, -0.1352]) tensor([0.0445, 0.1784, 0.5298, 0.2473]) -Greedy action tensor([-1.9208, -0.3883, 0.6491, -0.1663]) tensor([0.0409, 0.1892, 0.5338, 0.2362]) -Greedy action tensor([-1.4464, 0.4131, 0.2687, 0.1526]) tensor([0.0558, 0.3582, 0.3100, 0.2760]) -Greedy action tensor([-1.0574, 0.1366, 0.1184, -0.0216]) tensor([0.0965, 0.3186, 0.3129, 0.2720]) -Greedy action tensor([-1.9432, -0.4557, 0.6695, -0.1792]) tensor([0.0402, 0.1778, 0.5477, 0.2344]) -Greedy action tensor([0.2309, 0.8273, 0.2098, 0.8943]) tensor([0.1743, 0.3165, 0.1707, 0.3384]) -Greedy action tensor([-1.8959, -0.4494, 0.6488, -0.1382]) tensor([0.0420, 0.1786, 0.5356, 0.2438]) -Greedy action tensor([-1.6555, 0.0488, 0.4599, -0.0656]) tensor([0.0508, 0.2792, 0.4211, 0.2490]) -Greedy action tensor([-1.2981, 0.1255, 0.3192, -0.0404]) tensor([0.0729, 0.3029, 0.3676, 0.2566]) -Greedy action tensor([-1.9231, -0.4168, 0.6573, -0.1690]) tensor([0.0408, 0.1842, 0.5391, 0.2359]) -Greedy action tensor([-1.5537, -0.3449, 0.4580, -0.0475]) tensor([0.0612, 0.2050, 0.4577, 0.2761]) -Greedy action tensor([-1.5350, 0.4927, 0.3465, -0.0423]) tensor([0.0510, 0.3874, 0.3347, 0.2269]) -Greedy action tensor([-1.8615, -0.2007, 0.5788, -0.1151]) tensor([0.0426, 0.2242, 0.4889, 0.2443]) -Greedy action tensor([-1.8722, -0.4064, 0.6199, -0.1735]) tensor([0.0437, 0.1893, 0.5282, 0.2389]) -Greedy action tensor([-1.8873, -0.4596, 0.6456, -0.1517]) tensor([0.0427, 0.1779, 0.5373, 0.2421]) -Greedy action tensor([-1.4034, -0.5196, 0.4003, 0.0727]) tensor([0.0721, 0.1745, 0.4379, 0.3155]) -Greedy action tensor([-1.9009, -0.4057, 0.6466, -0.1555]) tensor([0.0417, 0.1861, 0.5331, 0.2390]) -Greedy action tensor([-1.3974, -0.5369, 0.4092, 0.0330]) tensor([0.0733, 0.1734, 0.4466, 0.3066]) -Greedy action tensor([-1.7094, 0.3205, 0.4625, -0.1183]) tensor([0.0448, 0.3415, 0.3935, 0.2202]) -Greedy action tensor([-1.8755, -0.3739, 0.6435, -0.1231]) tensor([0.0422, 0.1896, 0.5245, 0.2437]) -Greedy action tensor([ 1.2787, -0.4568, 0.1619, -0.4643]) tensor([0.5957, 0.1050, 0.1950, 0.1043]) -Greedy action tensor([ 0.4689, -0.1770, -0.0226, -0.0748]) tensor([0.3681, 0.1930, 0.2252, 0.2137]) -Greedy action tensor([ 0.7235, -0.1310, -0.0711, -0.0407]) tensor([0.4268, 0.1816, 0.1928, 0.1988]) -Greedy action tensor([ 0.7485, -0.4152, 0.0929, -0.2034]) tensor([0.4510, 0.1408, 0.2341, 0.1741]) -Greedy action tensor([ 0.7600, -0.5978, 0.0329, -0.5309]) tensor([0.4961, 0.1276, 0.2398, 0.1365]) -Greedy action tensor([ 0.9665, -0.0794, 0.0488, -0.1102]) tensor([0.4781, 0.1680, 0.1910, 0.1629]) -Greedy action tensor([ 1.0862, -0.3572, -0.1253, -0.2879]) tensor([0.5596, 0.1321, 0.1666, 0.1416]) -Greedy action tensor([ 1.1210, -0.7432, 0.0422, -0.6441]) tensor([0.6002, 0.0930, 0.2041, 0.1027]) -Greedy action tensor([ 0.7993, -0.4951, -0.1181, -0.4249]) tensor([0.5082, 0.1393, 0.2031, 0.1494]) -Greedy action tensor([ 0.7044, 0.0239, -0.0876, -0.3078]) tensor([0.4305, 0.2180, 0.1950, 0.1565]) -Greedy action tensor([ 1.0789, -0.6066, -0.0732, -0.5520]) tensor([0.5893, 0.1092, 0.1862, 0.1153]) -Greedy action tensor([ 0.6181, 0.2726, -0.1952, 0.2291]) tensor([0.3535, 0.2502, 0.1567, 0.2396]) -Greedy action tensor([ 0.8198, -0.3882, -0.0544, -0.3001]) tensor([0.4897, 0.1463, 0.2043, 0.1598]) -Greedy action tensor([ 0.8292, -0.4787, -0.1264, -0.2922]) tensor([0.5048, 0.1365, 0.1942, 0.1645]) -Greedy action tensor([ 0.8431, -0.7843, 0.2732, -0.3933]) tensor([0.4872, 0.0957, 0.2756, 0.1415]) -Greedy action tensor([ 0.7290, -0.5451, -0.0897, -0.1414]) tensor([0.4674, 0.1307, 0.2061, 0.1957]) -Greedy action tensor([ 0.8052, -0.4774, -0.0732, -0.4545]) tensor([0.5059, 0.1403, 0.2102, 0.1436]) -Greedy action tensor([ 0.7267, -0.9159, 0.0175, -0.2719]) tensor([0.4869, 0.0942, 0.2396, 0.1794]) -Greedy action tensor([ 0.7642, -0.4386, 0.1944, -0.3552]) tensor([0.4561, 0.1370, 0.2580, 0.1489]) -Greedy action tensor([ 0.7088, -0.3723, -0.0151, -0.3245]) tensor([0.4587, 0.1556, 0.2224, 0.1632]) -Greedy action tensor([ 0.5329, -0.2076, 0.0008, -0.2619]) tensor([0.3975, 0.1896, 0.2335, 0.1795]) -Greedy action tensor([ 0.8203, -0.3671, -0.1641, -0.2704]) tensor([0.4963, 0.1514, 0.1855, 0.1668]) -Greedy action tensor([ 0.4922, -0.3215, -0.1468, -0.0953]) tensor([0.3958, 0.1754, 0.2089, 0.2199]) -Greedy action tensor([ 0.6851, -0.2093, -0.0371, -0.0584]) tensor([0.4219, 0.1725, 0.2049, 0.2006]) -Greedy action tensor([ 1.0284, -0.5954, 0.0443, -0.3170]) tensor([0.5460, 0.1077, 0.2041, 0.1422]) -Greedy action tensor([ 0.8520, -0.6227, -0.0117, -0.2647]) tensor([0.5056, 0.1157, 0.2132, 0.1655]) -Greedy action tensor([ 1.0853, -0.6955, -0.0781, -0.4499]) tensor([0.5895, 0.0993, 0.1842, 0.1270]) -Greedy action tensor([ 1.2182, -0.8170, 0.0080, -0.6283]) tensor([0.6303, 0.0823, 0.1879, 0.0995]) -Greedy action tensor([ 0.9191, -0.6969, 0.2589, -0.6486]) tensor([0.5198, 0.1033, 0.2686, 0.1084]) -Greedy action tensor([ 0.9384, -0.5175, -0.0337, -0.4637]) tensor([0.5383, 0.1255, 0.2037, 0.1325]) -Greedy action tensor([ 0.8295, -0.6121, -0.0187, -0.3205]) tensor([0.5047, 0.1194, 0.2161, 0.1598]) -Greedy action tensor([ 0.8283, -0.3999, -0.1051, -0.1527]) tensor([0.4852, 0.1421, 0.1908, 0.1819]) -Greedy action tensor([ 0.5815, -0.4445, -0.0956, -0.1101]) tensor([0.4224, 0.1514, 0.2146, 0.2115]) -Greedy action tensor([ 0.7349, 0.0737, 0.3135, -0.1535]) tensor([0.3870, 0.1998, 0.2540, 0.1592]) -Greedy action tensor([ 0.9279, -0.8114, 0.0101, -0.4784]) tensor([0.5494, 0.0965, 0.2194, 0.1346]) -Greedy action tensor([ 0.4966, -0.3072, 0.0194, -0.3484]) tensor([0.4004, 0.1792, 0.2484, 0.1720]) -Greedy action tensor([ 1.1563, -0.5117, 0.1434, -0.6597]) tensor([0.5833, 0.1100, 0.2118, 0.0949]) -Greedy action tensor([ 0.4643, -0.1697, -0.0754, -0.1138]) tensor([0.3739, 0.1984, 0.2180, 0.2098]) -Greedy action tensor([ 0.8734, -0.4040, -0.1497, -0.4474]) tensor([0.5249, 0.1463, 0.1887, 0.1401]) -Greedy action tensor([ 0.8567, -0.3052, -0.0449, -0.1314]) tensor([0.4782, 0.1496, 0.1941, 0.1780]) -Greedy action tensor([ 1.1055, -0.4720, -0.1201, -0.6438]) tensor([0.5974, 0.1234, 0.1754, 0.1039]) -Greedy action tensor([ 1.0337, -0.7055, 0.0500, -0.4379]) tensor([0.5621, 0.0987, 0.2102, 0.1290]) -Greedy action tensor([ 1.2266, -0.3824, -0.0678, -0.1613]) tensor([0.5801, 0.1161, 0.1590, 0.1448]) -Greedy action tensor([ 1.0814, -0.6667, -0.0092, -0.5949]) tensor([0.5892, 0.1026, 0.1980, 0.1102]) -Greedy action tensor([ 0.8498, -0.6106, 0.1094, -0.5544]) tensor([0.5116, 0.1188, 0.2440, 0.1256]) -Greedy action tensor([ 0.5740, -0.3586, 0.1545, -0.4741]) tensor([0.4164, 0.1639, 0.2737, 0.1460]) -Greedy action tensor([ 1.0259, -0.5963, -0.0470, -0.4766]) tensor([0.5675, 0.1121, 0.1941, 0.1263]) -Greedy action tensor([ 0.9407, -0.8102, -0.0976, -0.2595]) tensor([0.5468, 0.0949, 0.1936, 0.1647]) -Greedy action tensor([ 0.8223, -0.6136, 0.0736, -0.7569]) tensor([0.5216, 0.1241, 0.2467, 0.1075]) -Greedy action tensor([ 0.8762, -0.6199, -0.1274, -0.4728]) tensor([0.5405, 0.1211, 0.1981, 0.1403]) -Greedy action tensor([ 0.6461, -0.1506, -0.0731, -0.0181]) tensor([0.4077, 0.1838, 0.1986, 0.2098]) -Greedy action tensor([ 0.8413, -0.6498, -0.0368, -0.2830]) tensor([0.5087, 0.1145, 0.2114, 0.1653]) -Greedy action tensor([ 1.0052, -0.6505, -0.0720, -0.6639]) tensor([0.5814, 0.1110, 0.1980, 0.1095]) -Greedy action tensor([ 1.0615, -0.8609, 0.0638, -0.4442]) tensor([0.5758, 0.0842, 0.2123, 0.1277]) -Greedy action tensor([ 0.5986, -0.0945, 0.0657, -0.0463]) tensor([0.3829, 0.1915, 0.2247, 0.2009]) -Greedy action tensor([ 0.9030, -0.1526, -0.1144, -0.4743]) tensor([0.5097, 0.1774, 0.1843, 0.1286]) -Greedy action tensor([ 1.0862, -0.5782, 0.0520, -0.6419]) tensor([0.5806, 0.1099, 0.2064, 0.1031]) -Greedy action tensor([ 0.7602, 0.1541, -0.2367, 0.0210]) tensor([0.4181, 0.2280, 0.1543, 0.1996]) -Greedy action tensor([ 0.8819, -0.2607, -0.0743, -0.1172]) tensor([0.4827, 0.1540, 0.1855, 0.1777]) -Greedy action tensor([ 0.8231, -0.6273, -0.0485, -0.2220]) tensor([0.4989, 0.1170, 0.2087, 0.1754]) -Greedy action tensor([ 0.7470, -0.7064, 0.0280, -0.3527]) tensor([0.4869, 0.1138, 0.2372, 0.1621]) -Greedy action tensor([ 0.9371, -0.5545, -0.1544, -0.2025]) tensor([0.5317, 0.1196, 0.1785, 0.1701]) -Greedy action tensor([ 0.7138, -0.4960, 0.1595, -0.1494]) tensor([0.4358, 0.1300, 0.2504, 0.1838]) -Greedy action tensor([ 0.7124, -0.4712, -0.1376, -0.0768]) tensor([0.4571, 0.1399, 0.1954, 0.2076]) -Greedy action tensor([ 0.8720, -0.4217, -0.1057, -0.2068]) tensor([0.5024, 0.1378, 0.1890, 0.1708]) -Greedy action tensor([ 0.4477, -0.4368, -0.0174, -0.2025]) tensor([0.3902, 0.1611, 0.2451, 0.2037]) -Greedy action tensor([ 0.7291, -0.5696, 0.0833, -0.2240]) tensor([0.4581, 0.1250, 0.2402, 0.1766]) -Greedy action tensor([ 1.2444, -0.6998, 0.0658, -0.5001]) tensor([0.6152, 0.0880, 0.1893, 0.1075]) -Greedy action tensor([ 0.6825, -0.6691, -0.1207, -0.2069]) tensor([0.4722, 0.1222, 0.2115, 0.1940]) -Greedy action tensor([ 0.3820, 0.1114, -0.1473, 0.0070]) tensor([0.3290, 0.2510, 0.1938, 0.2261]) -Greedy action tensor([ 1.0153, -0.6964, 0.0428, -0.7431]) tensor([0.5777, 0.1043, 0.2184, 0.0995]) -Greedy action tensor([ 0.7279, -0.8356, 0.0643, -0.3797]) tensor([0.4867, 0.1019, 0.2506, 0.1608]) -Greedy action tensor([ 0.6403, -0.5941, -0.0438, -0.2527]) tensor([0.4535, 0.1320, 0.2288, 0.1857]) -Greedy action tensor([ 0.4031, 0.1392, 0.0152, -0.0123]) tensor([0.3219, 0.2472, 0.2184, 0.2125]) -Greedy action tensor([ 1.1987, -0.6502, -0.1735, -0.6594]) tensor([0.6382, 0.1005, 0.1618, 0.0995]) -Greedy action tensor([ 0.6127, -0.1115, -0.2240, 0.0150]) tensor([0.4052, 0.1964, 0.1755, 0.2229]) -Greedy action tensor([ 0.6388, -0.2083, -0.1072, -0.1558]) tensor([0.4247, 0.1820, 0.2014, 0.1919]) -Greedy action tensor([ 0.6109, -0.0568, 0.0881, 0.0271]) tensor([0.3754, 0.1926, 0.2226, 0.2094]) -Greedy action tensor([ 0.9423, -0.3572, -0.0494, -0.3123]) tensor([0.5185, 0.1414, 0.1923, 0.1479]) -Greedy action tensor([ 1.0186, -0.6328, 0.0585, -0.5751]) tensor([0.5625, 0.1079, 0.2153, 0.1143]) -Greedy action tensor([ 1.2490, -0.7340, -0.0269, -0.6907]) tensor([0.6408, 0.0882, 0.1789, 0.0921]) -Greedy action tensor([ 1.7617, -0.2852, -0.5416, 0.1083]) tensor([0.7040, 0.0909, 0.0703, 0.1347]) -Greedy action tensor([ 1.5691, -0.0996, -0.3143, 0.1788]) tensor([0.6291, 0.1186, 0.0957, 0.1567]) -Greedy action tensor([ 1.4434, -0.3136, -0.9375, -0.0085]) tensor([0.6670, 0.1151, 0.0617, 0.1562]) -Greedy action tensor([ 1.9052, -0.4057, -0.2370, 0.3145]) tensor([0.7040, 0.0698, 0.0827, 0.1435]) -Greedy action tensor([ 1.2692, -0.1867, -0.7696, 0.2620]) tensor([0.5785, 0.1349, 0.0753, 0.2113]) -Greedy action tensor([ 1.3109, -0.3238, -0.5580, 0.3023]) tensor([0.5834, 0.1138, 0.0900, 0.2128]) -Greedy action tensor([ 1.4001, -0.6327, -0.8759, 0.2432]) tensor([0.6460, 0.0846, 0.0663, 0.2031]) -Greedy action tensor([ 1.4679, -0.5888, -0.0865, 0.2102]) tensor([0.6160, 0.0788, 0.1302, 0.1751]) -Greedy action tensor([ 1.8238, -0.6012, -0.7220, 0.8587]) tensor([0.6461, 0.0572, 0.0507, 0.2461]) -Greedy action tensor([ 1.2502, -0.2327, -0.0778, -0.0671]) tensor([0.5682, 0.1290, 0.1506, 0.1522]) -Greedy action tensor([ 1.1477, -0.4614, -0.6826, 0.3555]) tensor([0.5515, 0.1103, 0.0884, 0.2497]) -Greedy action tensor([ 1.2629, -0.6672, -0.3748, 0.6191]) tensor([0.5362, 0.0778, 0.1043, 0.2817]) -Greedy action tensor([ 1.2208, -0.4445, -0.4022, 0.4196]) tensor([0.5449, 0.1031, 0.1075, 0.2445]) -Greedy action tensor([ 1.6413, -0.4399, -0.4818, 0.5662]) tensor([0.6306, 0.0787, 0.0755, 0.2152]) -Greedy action tensor([ 1.1630, -0.1989, -1.0005, 0.0317]) tensor([0.5904, 0.1513, 0.0679, 0.1905]) -Greedy action tensor([ 1.7232, -0.3105, -0.8392, 0.6960]) tensor([0.6386, 0.0836, 0.0492, 0.2286]) -Greedy action tensor([ 2.1661, -1.1554, -0.5203, 0.7817]) tensor([0.7382, 0.0266, 0.0503, 0.1849]) -Greedy action tensor([ 1.0713, -0.3888, -0.2073, 0.0725]) tensor([0.5322, 0.1236, 0.1482, 0.1960]) -Greedy action tensor([ 2.4127, -1.3155, -0.4108, 0.7596]) tensor([0.7844, 0.0189, 0.0466, 0.1502]) -Greedy action tensor([ 1.1675, -0.4784, -0.0895, 0.5236]) tensor([0.4993, 0.0963, 0.1421, 0.2623]) -Greedy action tensor([ 1.8192, -0.8606, -0.2651, 0.7042]) tensor([0.6575, 0.0451, 0.0818, 0.2156]) -Greedy action tensor([ 1.5988, 0.1487, -0.0343, -0.3664]) tensor([0.6370, 0.1494, 0.1244, 0.0893]) -Greedy action tensor([ 1.2064, -0.8386, -0.0691, 0.8310]) tensor([0.4772, 0.0617, 0.1333, 0.3278]) -Greedy action tensor([ 1.4653, -0.5285, -0.6813, 0.1683]) tensor([0.6551, 0.0892, 0.0766, 0.1791]) -Greedy action tensor([ 1.7625, -0.5447, -0.7663, 0.5949]) tensor([0.6710, 0.0668, 0.0535, 0.2087]) -Greedy action tensor([ 2.1107, -1.4218, -0.2152, 0.8986]) tensor([0.7020, 0.0205, 0.0686, 0.2089]) -Greedy action tensor([ 1.0833, -0.3720, -0.7919, 0.4282]) tensor([0.5246, 0.1224, 0.0804, 0.2725]) -Greedy action tensor([ 1.3482, -0.3865, -0.3365, 0.4878]) tensor([0.5602, 0.0989, 0.1039, 0.2370]) -Greedy action tensor([ 1.3910, -0.3451, -0.2336, 0.0402]) tensor([0.6127, 0.1080, 0.1207, 0.1587]) -Greedy action tensor([ 2.0640, -0.6787, 0.0319, 0.6116]) tensor([0.6996, 0.0451, 0.0917, 0.1637]) -Greedy action tensor([ 1.2773, -0.2326, -0.5109, 0.1423]) tensor([0.5849, 0.1292, 0.0978, 0.1880]) -Greedy action tensor([ 2.1014, -0.5905, -0.6709, 0.5146]) tensor([0.7492, 0.0508, 0.0468, 0.1533]) -Greedy action tensor([ 1.9399, -1.5372, 0.3034, -0.0225]) tensor([0.7320, 0.0226, 0.1425, 0.1029]) -Greedy action tensor([ 1.0119, -0.6110, -0.4415, 0.7436]) tensor([0.4554, 0.0899, 0.1065, 0.3483]) -Greedy action tensor([ 1.6464, -0.5442, -1.2150, 0.3256]) tensor([0.6964, 0.0779, 0.0398, 0.1859]) -Greedy action tensor([ 1.6631, -0.5502, -0.0091, 0.5108]) tensor([0.6199, 0.0678, 0.1164, 0.1959]) -Greedy action tensor([ 1.8205, -1.2016, -0.0814, 0.3034]) tensor([0.7056, 0.0344, 0.1053, 0.1548]) -Greedy action tensor([ 1.9510, -0.9050, -0.3135, 0.6191]) tensor([0.7016, 0.0403, 0.0729, 0.1852]) -Greedy action tensor([ 1.3723, 0.0654, -0.6815, 0.3231]) tensor([0.5717, 0.1547, 0.0733, 0.2002]) -Greedy action tensor([ 1.2048, -0.3537, -0.2013, 0.2232]) tensor([0.5464, 0.1150, 0.1339, 0.2047]) -Greedy action tensor([ 1.8886, -0.9431, -0.3650, 0.3307]) tensor([0.7275, 0.0429, 0.0764, 0.1532]) -Greedy action tensor([ 1.4170, -0.3812, -0.2609, 0.4076]) tensor([0.5825, 0.0965, 0.1088, 0.2123]) -Greedy action tensor([ 1.5893, -0.1543, -0.5328, 0.4861]) tensor([0.6148, 0.1075, 0.0736, 0.2040]) -Greedy action tensor([ 1.4241, -0.7435, -0.4284, 0.5659]) tensor([0.5899, 0.0675, 0.0925, 0.2501]) -Greedy action tensor([ 1.0794, -0.1785, -0.2684, 0.1829]) tensor([0.5123, 0.1456, 0.1331, 0.2090]) -Greedy action tensor([ 1.2723, -0.3109, -0.6402, 0.4170]) tensor([0.5624, 0.1155, 0.0831, 0.2391]) -Greedy action tensor([ 1.7517, -0.4175, -0.8115, 0.1614]) tensor([0.7167, 0.0819, 0.0552, 0.1461]) -Greedy action tensor([ 1.7319, -0.7474, -0.3969, 0.3835]) tensor([0.6838, 0.0573, 0.0814, 0.1775]) -Greedy action tensor([ 2.2468, -0.5239, -0.6835, 0.4625]) tensor([0.7789, 0.0488, 0.0416, 0.1308]) -Greedy action tensor([ 1.9820, -1.2634, -0.2470, 0.4921]) tensor([0.7289, 0.0284, 0.0785, 0.1643]) -Greedy action tensor([ 1.6598, -0.3740, -0.7514, 0.0917]) tensor([0.6998, 0.0916, 0.0628, 0.1459]) -Greedy action tensor([ 1.4120, -0.2492, -0.7460, 0.6084]) tensor([0.5704, 0.1083, 0.0659, 0.2554]) -Greedy action tensor([ 1.4006, -0.9434, -0.0916, -0.1256]) tensor([0.6501, 0.0624, 0.1462, 0.1413]) -Greedy action tensor([ 1.5378, -1.3223, -0.2386, 0.0763]) tensor([0.6857, 0.0393, 0.1161, 0.1590]) -Greedy action tensor([ 1.6481, -1.0562, -0.2505, 0.2910]) tensor([0.6784, 0.0454, 0.1016, 0.1746]) -Greedy action tensor([ 1.5448, -0.3632, -0.5562, 0.3364]) tensor([0.6372, 0.0945, 0.0780, 0.1903]) -Greedy action tensor([ 1.2468, -0.4819, 0.0192, 0.2660]) tensor([0.5419, 0.0962, 0.1588, 0.2032]) -Greedy action tensor([ 1.3804, -0.3616, -0.5869, 0.1774]) tensor([0.6191, 0.1084, 0.0866, 0.1859]) -Greedy action tensor([ 1.5176, 0.4307, -0.3675, 0.1744]) tensor([0.5714, 0.1927, 0.0867, 0.1491]) -Greedy action tensor([ 1.2437, -0.2677, -0.8490, 0.0429]) tensor([0.6080, 0.1341, 0.0750, 0.1829]) -Greedy action tensor([ 1.9508, -0.8647, -0.6876, 0.4810]) tensor([0.7346, 0.0440, 0.0525, 0.1689]) -Greedy action tensor([ 1.4393, -1.2097, -0.1212, -0.0787]) tensor([0.6667, 0.0471, 0.1400, 0.1461]) -Greedy action tensor([ 1.1624, -0.2431, -0.9871, 0.2135]) tensor([0.5718, 0.1402, 0.0666, 0.2214]) -Greedy action tensor([ 1.4067, -0.0105, -1.0797, -0.2656]) tensor([0.6608, 0.1602, 0.0550, 0.1241]) -Greedy action tensor([ 1.8015, -0.4377, -0.4056, 0.1491]) tensor([0.7102, 0.0757, 0.0781, 0.1361]) -Greedy action tensor([ 1.3206, -0.7636, -0.3944, 0.3632]) tensor([0.5923, 0.0737, 0.1066, 0.2274]) -Greedy action tensor([ 1.4415, -0.5617, -0.0268, 0.0917]) tensor([0.6156, 0.0830, 0.1418, 0.1596]) -Greedy action tensor([ 0.9449, -0.2882, -0.0644, -0.1565]) tensor([0.5030, 0.1466, 0.1833, 0.1672]) -Greedy action tensor([ 0.9312, -0.2355, -0.1313, 0.2759]) tensor([0.4595, 0.1431, 0.1588, 0.2386]) -Greedy action tensor([ 1.5174, -0.6004, -0.7587, 0.2557]) tensor([0.6639, 0.0799, 0.0682, 0.1880]) -Greedy action tensor([ 1.2655, -0.3052, -0.6851, 0.0509]) tensor([0.6072, 0.1262, 0.0863, 0.1802]) -Greedy action tensor([ 1.1222, -0.3615, -0.8096, 0.0561]) tensor([0.5827, 0.1322, 0.0844, 0.2007]) -Greedy action tensor([ 1.3688, -0.5393, -0.3420, 0.0362]) tensor([0.6278, 0.0931, 0.1135, 0.1656]) -Greedy action tensor([ 1.0344, -0.2888, -0.7343, 0.2439]) tensor([0.5290, 0.1409, 0.0902, 0.2399]) -Greedy action tensor([ 1.2781, -0.6504, -0.6794, 0.0371]) tensor([0.6347, 0.0923, 0.0896, 0.1835]) -Greedy action tensor([ 1.3804, -0.5695, -0.4252, 0.2090]) tensor([0.6186, 0.0880, 0.1017, 0.1917]) -Greedy action tensor([ 1.6207, -0.2615, -0.5809, 0.3882]) tensor([0.6433, 0.0979, 0.0712, 0.1876]) -Greedy action tensor([ 1.7546, -1.0315, -0.2693, 0.2715]) tensor([0.7039, 0.0434, 0.0930, 0.1597]) -Greedy action tensor([ 1.8926, -0.5661, -0.5801, 0.3414]) tensor([0.7236, 0.0619, 0.0610, 0.1534]) -Greedy action tensor([ 1.2759, -0.2768, -0.5690, 0.8836]) tensor([0.4889, 0.1035, 0.0773, 0.3303]) -Greedy action tensor([ 0.9780, -0.4008, -0.0101, 0.0219]) tensor([0.4979, 0.1254, 0.1854, 0.1914]) -Greedy action tensor([-0.4365, -1.1328, 0.6250, -0.1281]) tensor([0.1739, 0.0867, 0.5027, 0.2367]) -Greedy action tensor([ 0.8412, -1.3318, 0.2729, 0.0135]) tensor([0.4723, 0.0538, 0.2675, 0.2064]) -Greedy action tensor([ 0.4692, 0.3840, 0.0761, -0.9462]) tensor([0.3526, 0.3238, 0.2380, 0.0856]) -Greedy action tensor([-1.3479, -0.3423, -0.0296, 0.0467]) tensor([0.0869, 0.2376, 0.3249, 0.3506]) -Greedy action tensor([ 0.8578, -0.0954, 0.1468, 0.8681]) tensor([0.3464, 0.1335, 0.1701, 0.3500]) -Greedy action tensor([ 1.0268, -1.1345, -0.0873, 1.1549]) tensor([0.3876, 0.0446, 0.1272, 0.4405]) -Greedy action tensor([-0.6107, -0.1684, 0.4844, -0.0779]) tensor([0.1379, 0.2147, 0.4124, 0.2350]) -Greedy action tensor([ 0.7521, 0.3765, -0.2056, -0.2281]) tensor([0.4088, 0.2808, 0.1569, 0.1534]) -Greedy action tensor([ 0.8509, -0.4750, 0.0401, -0.0982]) tensor([0.4768, 0.1266, 0.2120, 0.1846]) -Greedy action tensor([ 0.8390, -1.3719, -0.2730, 0.2198]) tensor([0.5058, 0.0554, 0.1664, 0.2723]) -Greedy action tensor([ 0.3660, -1.4007, 0.5285, -0.1197]) tensor([0.3375, 0.0577, 0.3971, 0.2077]) -Greedy action tensor([-0.7419, 0.1679, -1.1854, -0.7411]) tensor([0.1951, 0.4845, 0.1252, 0.1952]) -Greedy action tensor([ 0.1611, -1.5579, -0.0845, -0.4795]) tensor([0.4019, 0.0720, 0.3143, 0.2118]) -Greedy action tensor([ 0.5839, -0.1706, 0.1282, -0.4444]) tensor([0.4062, 0.1910, 0.2575, 0.1453]) -Greedy action tensor([ 0.8225, -1.2513, 0.1701, 0.2850]) tensor([0.4483, 0.0564, 0.2335, 0.2619]) -Greedy action tensor([ 0.7257, -1.3819, -0.1550, -0.1140]) tensor([0.5082, 0.0618, 0.2106, 0.2195]) -Greedy action tensor([ 0.0609, -0.3687, -0.4950, 0.0419]) tensor([0.3120, 0.2030, 0.1789, 0.3061]) -Greedy action tensor([-0.4733, -0.2663, 0.7123, -0.3357]) tensor([0.1504, 0.1850, 0.4921, 0.1725]) -Greedy action tensor([ 0.0788, -0.1658, -0.0023, 0.6001]) tensor([0.2278, 0.1784, 0.2101, 0.3837]) -Greedy action tensor([ 0.0856, -0.8255, 0.8451, -1.2868]) tensor([0.2637, 0.1060, 0.5635, 0.0668]) -Greedy action tensor([ 0.3262, -0.0767, -0.6160, -0.1433]) tensor([0.3727, 0.2491, 0.1452, 0.2330]) -Greedy action tensor([ 0.5816, -0.8279, -0.1102, 0.6115]) tensor([0.3603, 0.0880, 0.1804, 0.3713]) -Greedy action tensor([-1.0144e-03, -1.5930e+00, 1.0286e+00, -7.3378e-01]) tensor([0.2230, 0.0454, 0.6244, 0.1072]) -Greedy action tensor([ 0.5432, -0.6198, -0.2826, 0.2021]) tensor([0.4063, 0.1270, 0.1779, 0.2889]) -Greedy action tensor([-0.4328, -0.3824, -0.2028, -0.6232]) tensor([0.2417, 0.2542, 0.3042, 0.1998]) -Greedy action tensor([-0.4283, 0.6784, -0.7245, -0.8776]) tensor([0.1850, 0.5594, 0.1376, 0.1180]) -Greedy action tensor([-0.2791, -1.3537, -0.6878, 0.6616]) tensor([0.2189, 0.0748, 0.1455, 0.5608]) -Greedy action tensor([ 0.0058, -0.0843, 0.2868, -1.0948]) tensor([0.2800, 0.2559, 0.3709, 0.0932]) -Greedy action tensor([-0.4486, 0.8099, -0.3449, -1.2167]) tensor([0.1641, 0.5777, 0.1820, 0.0761]) -Greedy action tensor([ 0.4018, -1.6212, 0.9234, 0.0433]) tensor([0.2844, 0.0376, 0.4792, 0.1988]) -Greedy action tensor([-1.1879, -1.9623, 0.6094, -0.5544]) tensor([0.1066, 0.0492, 0.6433, 0.2009]) -Greedy action tensor([ 0.5055, -0.7301, 0.1796, -0.2121]) tensor([0.3999, 0.1162, 0.2887, 0.1951]) -Greedy action tensor([ 1.1170, -0.0900, -0.3298, 0.8317]) tensor([0.4374, 0.1308, 0.1029, 0.3288]) -Greedy action tensor([-0.2187, -1.2577, -0.0394, 0.6711]) tensor([0.2006, 0.0710, 0.2400, 0.4884]) -Greedy action tensor([-0.7901, -0.9701, 0.3747, -0.8527]) tensor([0.1672, 0.1397, 0.5360, 0.1571]) -Greedy action tensor([-0.5177, -1.0137, -0.1308, 0.0465]) tensor([0.2066, 0.1258, 0.3042, 0.3633]) -Greedy action tensor([-0.3687, -0.3450, 0.4978, -0.4053]) tensor([0.1863, 0.1908, 0.4432, 0.1797]) -Greedy action tensor([ 0.8644, -1.1795, 0.3315, 0.1190]) tensor([0.4564, 0.0591, 0.2679, 0.2166]) -Greedy action tensor([ 0.6522, -0.9400, 1.0565, 0.1169]) tensor([0.3042, 0.0619, 0.4558, 0.1781]) -Greedy action tensor([-0.3049, -1.4928, 1.0088, -0.3716]) tensor([0.1678, 0.0511, 0.6241, 0.1570]) -Greedy action tensor([ 1.4326, -0.6600, -0.8873, 1.0222]) tensor([0.5305, 0.0654, 0.0521, 0.3519]) -Greedy action tensor([-0.7371, -0.5489, -0.0589, -0.3219]) tensor([0.1757, 0.2121, 0.3461, 0.2661]) -Greedy action tensor([ 0.3434, -1.6647, 0.5234, 0.5105]) tensor([0.2846, 0.0382, 0.3408, 0.3364]) -Greedy action tensor([ 0.3234, -0.0588, -0.2797, 0.2887]) tensor([0.3129, 0.2136, 0.1712, 0.3023]) -Greedy action tensor([-1.3911, -0.3267, 0.2317, -0.9849]) tensor([0.0955, 0.2770, 0.4841, 0.1434]) -Greedy action tensor([-0.7034, -0.0310, 0.0163, 0.0402]) tensor([0.1405, 0.2753, 0.2886, 0.2956]) -Greedy action tensor([ 0.0891, 0.0541, 0.2601, -0.2890]) tensor([0.2606, 0.2516, 0.3092, 0.1786]) -Greedy action tensor([ 0.6581, -0.2243, 0.2478, 0.0650]) tensor([0.3803, 0.1573, 0.2523, 0.2101]) -Greedy action tensor([ 0.6835, -1.8453, 1.2958, -0.1591]) tensor([0.2981, 0.0238, 0.5498, 0.1283]) -Greedy action tensor([ 0.6089, -1.4699, -0.6227, 0.1860]) tensor([0.4826, 0.0604, 0.1408, 0.3162]) -Greedy action tensor([0.9912, 0.1278, 0.8692, 0.0401]) tensor([0.3713, 0.1566, 0.3287, 0.1434]) -Greedy action tensor([-0.3344, 0.3696, 0.0506, 0.1680]) tensor([0.1628, 0.3291, 0.2392, 0.2690]) -Greedy action tensor([ 0.4229, -0.3672, 0.2110, -0.3623]) tensor([0.3678, 0.1669, 0.2976, 0.1677]) -Greedy action tensor([ 0.0174, -1.6325, 0.5054, -0.2519]) tensor([0.2789, 0.0536, 0.4544, 0.2131]) -Greedy action tensor([-0.2571, -0.5828, -0.0813, -0.1541]) tensor([0.2486, 0.1795, 0.2964, 0.2756]) -Greedy action tensor([-0.1690, -1.2148, -0.0906, -0.0757]) tensor([0.2832, 0.0995, 0.3063, 0.3109]) -Greedy action tensor([ 0.2175, -0.0890, -0.1619, 0.0068]) tensor([0.3096, 0.2278, 0.2118, 0.2507]) -Greedy action tensor([-0.5185, 0.1287, -0.5437, -0.9958]) tensor([0.2219, 0.4239, 0.2164, 0.1377]) -Greedy action tensor([-0.0710, -1.9926, -0.3880, -0.1970]) tensor([0.3628, 0.0531, 0.2642, 0.3199]) -Greedy action tensor([-0.2865, -0.4575, -0.1911, -0.5847]) tensor([0.2714, 0.2287, 0.2985, 0.2014]) -Greedy action tensor([-1.1612, 0.1137, 0.3871, -1.5919]) tensor([0.1007, 0.3603, 0.4736, 0.0654]) -Greedy action tensor([-0.9223, -0.7312, -0.0076, 0.3873]) tensor([0.1189, 0.1439, 0.2967, 0.4404]) -Greedy action tensor([ 0.3809, -1.4638, 0.8314, 0.0271]) tensor([0.2916, 0.0461, 0.4576, 0.2047]) -Greedy action tensor([ 0.3226, -0.7680, -0.3307, 0.2276]) tensor([0.3616, 0.1215, 0.1881, 0.3288]) -Greedy action tensor([ 0.4096, -0.1124, -0.2832, -0.4135]) tensor([0.3949, 0.2343, 0.1975, 0.1734]) -Greedy action tensor([ 0.1575, -1.2402, -0.3217, 0.9153]) tensor([0.2500, 0.0618, 0.1548, 0.5334]) -Greedy action tensor([ 0.3830, -0.8428, 0.4802, 0.7545]) tensor([0.2600, 0.0763, 0.2866, 0.3771]) -Greedy action tensor([-0.8476, -0.4160, -1.1558, -0.4373]) tensor([0.2091, 0.3220, 0.1537, 0.3152]) -Greedy action tensor([ 0.2108, -0.5463, -0.0527, -1.0966]) tensor([0.3987, 0.1870, 0.3064, 0.1079]) -Greedy action tensor([-0.7919, -0.6600, -0.0287, -0.7375]) tensor([0.1872, 0.2136, 0.4015, 0.1977]) -Greedy action tensor([ 0.8408, -0.0702, 0.8366, -0.0481]) tensor([0.3560, 0.1431, 0.3545, 0.1464]) -Greedy action tensor([-0.2696, -1.6233, 0.7763, -0.0498]) tensor([0.1869, 0.0483, 0.5320, 0.2329]) -Greedy action tensor([ 1.0567, -0.0884, 0.1563, 0.5603]) tensor([0.4286, 0.1364, 0.1742, 0.2609]) -Greedy action tensor([ 0.4282, -0.6806, 0.2903, 0.8978]) tensor([0.2631, 0.0868, 0.2292, 0.4208]) -Greedy action tensor([-0.7494, -0.1903, -0.5472, -0.4457]) tensor([0.1877, 0.3283, 0.2297, 0.2543]) -Greedy action tensor([-0.8439, -0.4041, 0.6176, -0.9506]) tensor([0.1288, 0.2000, 0.5555, 0.1158]) -Greedy action tensor([-0.0443, -1.3076, 0.4349, -0.0486]) tensor([0.2569, 0.0726, 0.4148, 0.2557]) -Greedy action tensor([ 0.2586, -1.8363, 0.2714, -0.1699]) tensor([0.3587, 0.0442, 0.3634, 0.2337]) -Greedy action tensor([ 0.0847, -0.5118, 0.8001, -0.4556]) tensor([0.2393, 0.1318, 0.4894, 0.1394]) -Greedy action tensor([-0.8214, -0.4705, -0.5152, -0.6412]) tensor([0.2010, 0.2854, 0.2730, 0.2406]) -Greedy action tensor([-0.2103, -1.2304, -0.7392, 0.4052]) tensor([0.2631, 0.0949, 0.1551, 0.4869]) -Greedy action tensor([ 0.6534, -0.1805, -0.0722, -0.0435]) tensor([0.4138, 0.1797, 0.2003, 0.2061]) -Greedy action tensor([ 0.5451, -0.2926, -0.0507, -0.3133]) tensor([0.4153, 0.1797, 0.2289, 0.1760]) -Greedy action tensor([ 1.1916, -0.6529, 0.1011, -0.8095]) tensor([0.6138, 0.0970, 0.2062, 0.0830]) -Greedy action tensor([ 1.2368, -0.6752, 0.1108, -0.4492]) tensor([0.6034, 0.0892, 0.1957, 0.1118]) -Greedy action tensor([ 0.9635, -0.2650, -0.0119, -0.4582]) tensor([0.5233, 0.1532, 0.1973, 0.1263]) -Greedy action tensor([ 0.6445, -0.5492, -0.0972, -0.2351]) tensor([0.4557, 0.1381, 0.2171, 0.1891]) -Greedy action tensor([ 1.3427, -0.9238, 0.1612, -0.8442]) tensor([0.6567, 0.0681, 0.2015, 0.0737]) -Greedy action tensor([ 0.8672, -0.5219, -0.0140, -0.2657]) tensor([0.5036, 0.1256, 0.2086, 0.1622]) -Greedy action tensor([ 0.8691, -0.5538, 0.0681, -0.1866]) tensor([0.4907, 0.1183, 0.2203, 0.1707]) -Greedy action tensor([ 0.9093, -0.5025, -0.1686, -0.6008]) tensor([0.5540, 0.1350, 0.1885, 0.1224]) -Greedy action tensor([ 0.9057, -0.4395, -0.1377, -0.4417]) tensor([0.5340, 0.1391, 0.1881, 0.1388]) -Greedy action tensor([ 0.6053, -0.3922, -0.1062, -0.1751]) tensor([0.4314, 0.1591, 0.2118, 0.1977]) -Greedy action tensor([ 0.3019, -0.2514, -0.0546, -0.3552]) tensor([0.3580, 0.2059, 0.2506, 0.1856]) -Greedy action tensor([ 1.2349, -0.5889, -0.0212, -0.5241]) tensor([0.6179, 0.0997, 0.1759, 0.1064]) -Greedy action tensor([ 0.7684, -0.2595, -0.0865, -0.1371]) tensor([0.4572, 0.1636, 0.1944, 0.1848]) -Greedy action tensor([ 0.7237, -0.7507, -0.0175, -0.3158]) tensor([0.4857, 0.1112, 0.2314, 0.1717]) -Greedy action tensor([ 0.3003, -0.1091, -0.2412, -0.1036]) tensor([0.3432, 0.2279, 0.1997, 0.2292]) -Greedy action tensor([ 0.6414, -0.4639, -0.0643, -0.1214]) tensor([0.4364, 0.1445, 0.2155, 0.2035]) -Greedy action tensor([ 0.6718, -0.3645, 0.0206, -0.2502]) tensor([0.4398, 0.1560, 0.2293, 0.1749]) -Greedy action tensor([ 0.9925, -0.3789, 0.0789, -0.1622]) tensor([0.5076, 0.1288, 0.2036, 0.1600]) -Greedy action tensor([ 4.7644e-01, 3.1811e-04, -5.9850e-02, -1.2550e-01]) tensor([0.3631, 0.2256, 0.2124, 0.1989]) -Greedy action tensor([ 0.3978, -0.0224, -0.0140, -0.0418]) tensor([0.3374, 0.2216, 0.2235, 0.2174]) -Greedy action tensor([ 0.8649, -0.3745, -0.0342, -0.2181]) tensor([0.4914, 0.1423, 0.2000, 0.1664]) -Greedy action tensor([ 0.7429, -0.2839, -0.1656, -0.4160]) tensor([0.4819, 0.1726, 0.1943, 0.1512]) -Greedy action tensor([ 0.9450, -0.2282, -0.0863, -0.3167]) tensor([0.5131, 0.1587, 0.1829, 0.1453]) -Greedy action tensor([ 0.9250, -0.6787, -0.0687, -0.6451]) tensor([0.5620, 0.1130, 0.2080, 0.1169]) -Greedy action tensor([ 0.7165, -0.5374, -0.0514, -0.2999]) tensor([0.4736, 0.1352, 0.2198, 0.1714]) -Greedy action tensor([ 1.0393, -0.8562, 0.1478, -0.6077]) tensor([0.5705, 0.0857, 0.2339, 0.1099]) -Greedy action tensor([ 0.9940, -0.6209, -0.0901, -0.5448]) tensor([0.5709, 0.1135, 0.1931, 0.1225]) -Greedy action tensor([ 1.1107, -0.6933, 0.0441, -0.3875]) tensor([0.5772, 0.0950, 0.1987, 0.1290]) -Greedy action tensor([ 0.5487, 0.0948, -0.1320, -0.1441]) tensor([0.3786, 0.2404, 0.1917, 0.1893]) -Greedy action tensor([ 0.4580, -0.3630, -0.2142, -0.0873]) tensor([0.3952, 0.1739, 0.2018, 0.2291]) -Greedy action tensor([ 0.9506, -0.2492, -0.0138, -0.0941]) tensor([0.4916, 0.1481, 0.1874, 0.1729]) -Greedy action tensor([ 1.0619, -0.6971, -0.0998, -0.5175]) tensor([0.5913, 0.1018, 0.1850, 0.1219]) -Greedy action tensor([ 0.8846, -0.5504, -0.1393, -0.3204]) tensor([0.5272, 0.1255, 0.1893, 0.1580]) -Greedy action tensor([ 1.1060, -0.6436, 0.1066, -0.7232]) tensor([0.5874, 0.1021, 0.2162, 0.0943]) -Greedy action tensor([ 0.9617, -0.4365, -0.0551, -0.2863]) tensor([0.5275, 0.1303, 0.1908, 0.1514]) -Greedy action tensor([ 0.8548, -0.3846, 0.0009, -0.2621]) tensor([0.4896, 0.1418, 0.2084, 0.1602]) -Greedy action tensor([ 0.8105, -0.5573, 0.0204, -0.3206]) tensor([0.4923, 0.1254, 0.2234, 0.1589]) -Greedy action tensor([ 0.7493, -0.4323, 0.0241, -0.3466]) tensor([0.4705, 0.1444, 0.2279, 0.1573]) -Greedy action tensor([ 0.9486, -0.7603, 0.2089, -0.5101]) tensor([0.5289, 0.0958, 0.2524, 0.1230]) -Greedy action tensor([ 1.0522, -0.7782, 0.0316, -0.5683]) tensor([0.5819, 0.0933, 0.2097, 0.1151]) -Greedy action tensor([ 1.3575, -1.0049, -0.0659, -0.7173]) tensor([0.6846, 0.0645, 0.1649, 0.0860]) -Greedy action tensor([ 0.9287, -0.3592, -0.1163, -0.2298]) tensor([0.5151, 0.1421, 0.1811, 0.1617]) -Greedy action tensor([ 0.4691, -0.1272, 0.0763, -0.2907]) tensor([0.3712, 0.2045, 0.2506, 0.1736]) -Greedy action tensor([ 0.5457, -0.2510, 0.0121, -0.1204]) tensor([0.3920, 0.1767, 0.2299, 0.2014]) -Greedy action tensor([ 0.8365, -0.3928, -0.0705, -0.4556]) tensor([0.5074, 0.1484, 0.2048, 0.1394]) -Greedy action tensor([ 0.7753, -0.5949, 0.0156, -0.1872]) tensor([0.4753, 0.1208, 0.2224, 0.1815]) -Greedy action tensor([ 0.7748, -0.3942, 0.0413, -0.2123]) tensor([0.4622, 0.1436, 0.2220, 0.1722]) -Greedy action tensor([ 0.5505, -0.4443, 0.0338, -0.2636]) tensor([0.4151, 0.1535, 0.2476, 0.1839]) -Greedy action tensor([ 0.5457, -0.5363, -0.0521, -0.2387]) tensor([0.4264, 0.1445, 0.2345, 0.1946]) -Greedy action tensor([ 1.4068, -0.8130, -0.1153, -0.5803]) tensor([0.6831, 0.0742, 0.1491, 0.0936]) -Greedy action tensor([ 0.9853, -0.4496, -0.0164, -0.2178]) tensor([0.5247, 0.1250, 0.1927, 0.1576]) -Greedy action tensor([ 0.3650, -0.0940, 0.0018, -0.1606]) tensor([0.3426, 0.2165, 0.2383, 0.2026]) -Greedy action tensor([ 0.8863, -0.5512, -0.1542, -0.2568]) tensor([0.5237, 0.1244, 0.1850, 0.1669]) -Greedy action tensor([ 1.1075, -0.3643, -0.1212, -0.3262]) tensor([0.5680, 0.1304, 0.1662, 0.1354]) -Greedy action tensor([ 1.0095, -0.7233, 0.0150, -0.2776]) tensor([0.5486, 0.0970, 0.2029, 0.1515]) -Greedy action tensor([ 1.0539, -0.9909, 0.2550, -0.6224]) tensor([0.5662, 0.0733, 0.2547, 0.1059]) -Greedy action tensor([ 1.0880, -0.8587, 0.0934, -0.2177]) tensor([0.5607, 0.0800, 0.2074, 0.1519]) -Greedy action tensor([ 0.7009, -0.0916, 0.1756, -0.3919]) tensor([0.4203, 0.1903, 0.2485, 0.1409]) -Greedy action tensor([ 0.5767, -0.4715, -0.0625, -0.1333]) tensor([0.4219, 0.1479, 0.2227, 0.2075]) -Greedy action tensor([ 1.2041, -0.7316, -0.1612, -0.5203]) tensor([0.6338, 0.0915, 0.1618, 0.1130]) -Greedy action tensor([ 0.4705, 0.2131, -0.0875, 0.1152]) tensor([0.3283, 0.2538, 0.1879, 0.2301]) -Greedy action tensor([ 0.9886, -0.3152, -0.2062, -0.2793]) tensor([0.5389, 0.1463, 0.1632, 0.1517]) -Greedy action tensor([ 1.1056, -1.0951, 0.2409, -0.7428]) tensor([0.5919, 0.0655, 0.2493, 0.0932]) -Greedy action tensor([ 1.2130, -0.6760, -0.0926, -0.8457]) tensor([0.6452, 0.0976, 0.1749, 0.0823]) -Greedy action tensor([ 0.8250, -0.5252, -0.0499, -0.4000]) tensor([0.5077, 0.1316, 0.2116, 0.1491]) -Greedy action tensor([ 1.0844, -0.7615, 0.1426, -0.6970]) tensor([0.5827, 0.0920, 0.2272, 0.0981]) -Greedy action tensor([ 0.4567, -0.1490, -0.0114, -0.1444]) tensor([0.3676, 0.2006, 0.2302, 0.2015]) -Greedy action tensor([ 0.8433, -0.1968, 0.1923, -0.1218]) tensor([0.4433, 0.1567, 0.2312, 0.1689]) -Greedy action tensor([ 0.5908, 0.0974, -0.1474, -0.2133]) tensor([0.3943, 0.2408, 0.1885, 0.1765]) -Greedy action tensor([ 0.7421, -0.6999, -0.0974, -0.1930]) tensor([0.4852, 0.1147, 0.2096, 0.1905]) -Greedy action tensor([ 0.9079, -0.6282, 0.0422, -0.2740]) tensor([0.5148, 0.1108, 0.2166, 0.1579]) -Greedy action tensor([ 0.6463, -0.3412, -0.0339, -0.0398]) tensor([0.4197, 0.1564, 0.2126, 0.2113]) -Greedy action tensor([ 0.6938, -0.4568, -0.0158, -0.3536]) tensor([0.4631, 0.1466, 0.2278, 0.1625]) -Greedy action tensor([ 0.7315, -0.1768, -0.0764, -0.1766]) tensor([0.4440, 0.1790, 0.1979, 0.1791]) -Greedy action tensor([ 1.1043, -0.6864, -0.1030, -0.5539]) tensor([0.6038, 0.1007, 0.1805, 0.1150]) -Greedy action tensor([ 0.8968, -0.3702, -0.0241, -0.0937]) tensor([0.4875, 0.1373, 0.1941, 0.1810]) -Greedy action tensor([ 0.7099, -0.3310, -0.0256, -0.1043]) tensor([0.4395, 0.1552, 0.2106, 0.1947]) -Greedy action tensor([ 0.5666, -0.0852, -0.1301, -0.1916]) tensor([0.4020, 0.2095, 0.2003, 0.1883]) -Greedy action tensor([ 0.9573, -0.7628, 0.1752, -0.3003]) tensor([0.5206, 0.0932, 0.2382, 0.1480]) -Greedy action tensor([ 2.1310, -0.1024, -0.1689, 0.4148]) tensor([0.7209, 0.0773, 0.0723, 0.1296]) -Greedy action tensor([ 1.2628, 0.1872, -1.0120, 0.4095]) tensor([0.5348, 0.1824, 0.0550, 0.2278]) -Greedy action tensor([ 1.6854, -0.4944, -0.4423, 0.4708]) tensor([0.6540, 0.0739, 0.0779, 0.1941]) -Greedy action tensor([ 1.3410, -0.4449, -0.5642, 0.3342]) tensor([0.5946, 0.0997, 0.0885, 0.2173]) -Greedy action tensor([ 1.1750, -0.3070, -0.6878, 0.0694]) tensor([0.5836, 0.1326, 0.0906, 0.1932]) -Greedy action tensor([ 1.8200, -0.7965, 0.0196, 0.0085]) tensor([0.7134, 0.0521, 0.1179, 0.1166]) -Greedy action tensor([ 1.5183, -1.0020, 0.0037, 0.4807]) tensor([0.6044, 0.0486, 0.1329, 0.2141]) -Greedy action tensor([ 1.6331, -0.0079, -0.5766, 0.0831]) tensor([0.6597, 0.1278, 0.0724, 0.1400]) -Greedy action tensor([ 2.1263, 0.3270, -0.6533, 0.0465]) tensor([0.7394, 0.1223, 0.0459, 0.0924]) -Greedy action tensor([ 1.0266, -0.6319, -0.5806, 0.2691]) tensor([0.5377, 0.1024, 0.1078, 0.2521]) -Greedy action tensor([ 1.7870, -0.6371, 0.0090, 0.5118]) tensor([0.6506, 0.0576, 0.1100, 0.1818]) -Greedy action tensor([ 1.5462, -0.8437, -0.4003, 0.3023]) tensor([0.6567, 0.0602, 0.0938, 0.1893]) -Greedy action tensor([ 1.9053, -0.7956, -0.6750, 0.8617]) tensor([0.6688, 0.0449, 0.0507, 0.2356]) -Greedy action tensor([ 1.0856, -0.2909, -0.3466, 0.0846]) tensor([0.5380, 0.1358, 0.1285, 0.1977]) -Greedy action tensor([ 1.7301, -1.2232, -0.2697, 0.3257]) tensor([0.6978, 0.0364, 0.0945, 0.1713]) -Greedy action tensor([ 1.4135, 0.0635, -0.9133, 0.4085]) tensor([0.5804, 0.1505, 0.0567, 0.2125]) -Greedy action tensor([ 1.6406, -0.6398, -0.6318, 0.1640]) tensor([0.6975, 0.0713, 0.0719, 0.1593]) -Greedy action tensor([ 2.3555, -1.0941, -0.0384, 0.7739]) tensor([0.7526, 0.0239, 0.0687, 0.1548]) -Greedy action tensor([ 1.1989, -0.5419, -0.0792, 0.2001]) tensor([0.5488, 0.0962, 0.1529, 0.2021]) -Greedy action tensor([ 2.1684, -0.9618, -0.2281, 0.4454]) tensor([0.7615, 0.0333, 0.0693, 0.1359]) -Greedy action tensor([ 1.7405, -0.4215, -0.5490, 0.7664]) tensor([0.6274, 0.0722, 0.0636, 0.2369]) -Greedy action tensor([ 1.5031, -0.7448, -0.0485, 0.6546]) tensor([0.5729, 0.0605, 0.1214, 0.2452]) -Greedy action tensor([ 1.4362, -0.5700, -0.4326, 0.4737]) tensor([0.5985, 0.0805, 0.0924, 0.2286]) -Greedy action tensor([ 1.3638, -0.1184, -0.3936, 0.0283]) tensor([0.6014, 0.1366, 0.1037, 0.1582]) -Greedy action tensor([ 0.8854, -0.3905, 0.0327, 0.2198]) tensor([0.4506, 0.1258, 0.1921, 0.2316]) -Greedy action tensor([ 1.1102, -0.5665, -0.5863, 0.3194]) tensor([0.5483, 0.1025, 0.1005, 0.2486]) -Greedy action tensor([ 1.1666, 0.3246, -0.3471, 0.2824]) tensor([0.4845, 0.2087, 0.1066, 0.2001]) -Greedy action tensor([ 1.5378, -0.7740, -0.3479, 0.3919]) tensor([0.6374, 0.0632, 0.0967, 0.2027]) -Greedy action tensor([ 1.7402, -0.5820, -0.1672, 0.5804]) tensor([0.6410, 0.0629, 0.0952, 0.2010]) -Greedy action tensor([ 1.9821, -0.6385, -0.2744, 0.4857]) tensor([0.7136, 0.0519, 0.0747, 0.1598]) -Greedy action tensor([ 1.5342, -0.7642, -0.5036, 0.4031]) tensor([0.6438, 0.0646, 0.0839, 0.2077]) -Greedy action tensor([ 1.9122, -0.3255, -0.5618, 0.0350]) tensor([0.7441, 0.0794, 0.0627, 0.1139]) -Greedy action tensor([ 1.5932, -0.7180, -0.4226, 0.2614]) tensor([0.6683, 0.0663, 0.0890, 0.1764]) -Greedy action tensor([ 1.9596, -0.1057, -0.8662, 0.4443]) tensor([0.7113, 0.0902, 0.0422, 0.1563]) -Greedy action tensor([ 1.4856, -0.6014, 0.0900, 0.4051]) tensor([0.5844, 0.0725, 0.1447, 0.1984]) -Greedy action tensor([ 1.5257, -0.5053, -0.4969, 0.2592]) tensor([0.6471, 0.0849, 0.0856, 0.1824]) -Greedy action tensor([ 1.3985, -0.4759, -0.3903, 0.2062]) tensor([0.6157, 0.0945, 0.1029, 0.1869]) -Greedy action tensor([ 1.4142, -0.5341, -0.4108, 0.1396]) tensor([0.6316, 0.0900, 0.1018, 0.1766]) -Greedy action tensor([ 1.3534, -0.1224, -0.7524, 0.0853]) tensor([0.6128, 0.1401, 0.0746, 0.1724]) -Greedy action tensor([ 1.3691, -0.9797, -0.2423, 0.7592]) tensor([0.5439, 0.0519, 0.1086, 0.2956]) -Greedy action tensor([ 1.0909, -0.7191, -0.1052, 0.1345]) tensor([0.5405, 0.0884, 0.1634, 0.2077]) -Greedy action tensor([ 1.6774, -0.6218, -0.3455, 0.1470]) tensor([0.6901, 0.0692, 0.0913, 0.1494]) -Greedy action tensor([ 1.6768, -0.8082, -0.4104, 0.0223]) tensor([0.7150, 0.0596, 0.0887, 0.1367]) -Greedy action tensor([ 1.2706, -0.3991, -0.9928, 0.4567]) tensor([0.5762, 0.1085, 0.0599, 0.2553]) -Greedy action tensor([ 1.2445, -0.0357, -0.8351, 0.7071]) tensor([0.5032, 0.1399, 0.0629, 0.2940]) -Greedy action tensor([ 1.4012, -0.4247, -1.0893, 0.0030]) tensor([0.6707, 0.1080, 0.0556, 0.1657]) -Greedy action tensor([ 1.5522, -0.6791, -0.3877, 0.3966]) tensor([0.6386, 0.0686, 0.0918, 0.2011]) -Greedy action tensor([ 1.1789, 0.1681, -1.1029, 0.1706]) tensor([0.5462, 0.1988, 0.0558, 0.1993]) -Greedy action tensor([ 1.7282, -0.2168, -0.4242, 0.3356]) tensor([0.6633, 0.0948, 0.0771, 0.1648]) -Greedy action tensor([ 0.8809, -0.1663, -0.9417, 0.2226]) tensor([0.4925, 0.1728, 0.0796, 0.2550]) -Greedy action tensor([ 2.6383, -1.5737, 0.0676, 1.2174]) tensor([0.7503, 0.0111, 0.0574, 0.1812]) -Greedy action tensor([ 1.4450, -0.6682, -0.7119, 0.6915]) tensor([0.5857, 0.0708, 0.0678, 0.2757]) -Greedy action tensor([ 1.4527, -0.4436, -0.4223, 0.3313]) tensor([0.6138, 0.0921, 0.0941, 0.2000]) -Greedy action tensor([ 0.9625, -0.3220, -0.6153, 0.0127]) tensor([0.5348, 0.1480, 0.1104, 0.2068]) -Greedy action tensor([ 1.5992, -0.1846, -0.2263, 0.0867]) tensor([0.6454, 0.1084, 0.1040, 0.1422]) -Greedy action tensor([ 1.8818, -1.0314, -0.2780, 0.8551]) tensor([0.6545, 0.0355, 0.0755, 0.2344]) -Greedy action tensor([ 1.3229, -0.4292, -0.4058, 0.4967]) tensor([0.5591, 0.0969, 0.0992, 0.2447]) -Greedy action tensor([ 1.7527, -0.6760, -0.6232, 0.2990]) tensor([0.7068, 0.0623, 0.0657, 0.1652]) -Greedy action tensor([ 0.9533, -0.5821, 0.2057, 0.1788]) tensor([0.4652, 0.1002, 0.2202, 0.2144]) -Greedy action tensor([ 1.4879, -0.7496, -0.6603, 0.3163]) tensor([0.6522, 0.0696, 0.0761, 0.2021]) -Greedy action tensor([ 1.1170, -0.3284, -0.0136, -0.1236]) tensor([0.5412, 0.1275, 0.1747, 0.1565]) -Greedy action tensor([ 1.7847, -0.4330, -0.4279, 0.4983]) tensor([0.6691, 0.0728, 0.0732, 0.1849]) -Greedy action tensor([ 1.6692, -0.4473, -0.2984, 0.2736]) tensor([0.6632, 0.0799, 0.0927, 0.1642]) -Greedy action tensor([ 1.8643, -0.9119, 0.0412, 0.0151]) tensor([0.7240, 0.0451, 0.1170, 0.1139]) -Greedy action tensor([ 1.1716, -0.4905, -0.5459, 0.8708]) tensor([0.4741, 0.0899, 0.0851, 0.3509]) -Greedy action tensor([ 1.3466, -0.0489, -0.5740, -0.6576]) tensor([0.6540, 0.1620, 0.0958, 0.0881]) -Greedy action tensor([ 1.9890, -0.8620, -0.3312, 0.5736]) tensor([0.7149, 0.0413, 0.0702, 0.1736]) -Greedy action tensor([ 1.5355, -0.7192, -0.2040, 0.7383]) tensor([0.5777, 0.0606, 0.1014, 0.2603]) -Greedy action tensor([ 1.0720, -0.2016, -0.3161, 0.2623]) tensor([0.5065, 0.1417, 0.1264, 0.2254]) -Greedy action tensor([ 1.4298, -0.6162, -0.3436, 0.7301]) tensor([0.5569, 0.0720, 0.0945, 0.2766]) -Greedy action tensor([ 2.1557, -0.9609, -0.3463, 0.3677]) tensor([0.7731, 0.0343, 0.0633, 0.1293]) -Greedy action tensor([ 1.2178, -0.2898, -0.4512, 0.1652]) tensor([0.5685, 0.1259, 0.1071, 0.1984]) -Greedy action tensor([ 1.2299, -0.5457, -0.2821, 0.4747]) tensor([0.5377, 0.0911, 0.1185, 0.2527]) -Greedy action tensor([ 1.9719, -1.2718, -0.1742, 0.7529]) tensor([0.6890, 0.0269, 0.0806, 0.2036]) -Greedy action tensor([ 1.3404, 0.3071, -0.6144, 0.3755]) tensor([0.5324, 0.1894, 0.0754, 0.2028]) -Greedy action tensor([ 1.7699, -1.0464, -0.2607, 0.5506]) tensor([0.6727, 0.0402, 0.0883, 0.1987]) -Greedy action tensor([ 1.6152, -0.6549, -0.6934, 0.2277]) tensor([0.6885, 0.0711, 0.0684, 0.1719]) -Greedy action tensor([ 1.6573, -0.5884, -0.7172, 0.3939]) tensor([0.6749, 0.0714, 0.0628, 0.1908]) -Greedy action tensor([ 1.3922, -0.6246, -0.4777, 0.4834]) tensor([0.5916, 0.0787, 0.0912, 0.2384]) -Greedy action tensor([ 1.6206, -0.9186, 0.0288, 0.6866]) tensor([0.5968, 0.0471, 0.1215, 0.2346]) -Greedy action tensor([ 1.3119, -0.2647, -0.5354, 0.0789]) tensor([0.6040, 0.1248, 0.0952, 0.1760]) -Greedy action tensor([ 0.9640, -0.0220, 0.7779, 1.6683]) tensor([0.2366, 0.0883, 0.1965, 0.4786]) -Greedy action tensor([-1.5437, -0.5176, 0.4344, 0.1308]) tensor([0.0611, 0.1706, 0.4420, 0.3263]) -Greedy action tensor([-1.9046, -0.3682, 0.6412, -0.1531]) tensor([0.0414, 0.1923, 0.5278, 0.2385]) -Greedy action tensor([-1.8870, -0.3896, 0.6357, -0.1809]) tensor([0.0427, 0.1907, 0.5317, 0.2350]) -Greedy action tensor([-1.4307, -0.6211, 0.5069, 0.0907]) tensor([0.0677, 0.1521, 0.4701, 0.3101]) -Greedy action tensor([-1.9125, -0.4098, 0.6495, -0.1634]) tensor([0.0413, 0.1857, 0.5355, 0.2375]) -Greedy action tensor([-1.8832, -0.3688, 0.6278, -0.1546]) tensor([0.0426, 0.1935, 0.5242, 0.2397]) -Greedy action tensor([-1.9457, -0.4537, 0.6679, -0.1809]) tensor([0.0401, 0.1783, 0.5474, 0.2342]) -Greedy action tensor([-1.6387, -0.4729, 0.3550, -0.3591]) tensor([0.0660, 0.2118, 0.4848, 0.2374]) -Greedy action tensor([-1.9003, -0.3956, 0.6483, -0.1564]) tensor([0.0416, 0.1875, 0.5326, 0.2382]) -Greedy action tensor([-1.8152, -0.4568, 0.6037, -0.1212]) tensor([0.0464, 0.1804, 0.5209, 0.2523]) -Greedy action tensor([-1.5240, -0.2174, 0.5836, 0.0976]) tensor([0.0556, 0.2054, 0.4576, 0.2814]) -Greedy action tensor([-1.7812, -0.4247, 0.5885, -0.1024]) tensor([0.0478, 0.1855, 0.5108, 0.2560]) -Greedy action tensor([-1.8303, -0.4342, 0.6093, -0.1197]) tensor([0.0454, 0.1833, 0.5203, 0.2510]) -Greedy action tensor([-1.8821, -0.3224, 0.6315, -0.1425]) tensor([0.0420, 0.1999, 0.5189, 0.2393]) -Greedy action tensor([-1.9153, -0.4132, 0.6550, -0.1657]) tensor([0.0411, 0.1847, 0.5376, 0.2366]) -Greedy action tensor([-1.9490, -0.4486, 0.6664, -0.1833]) tensor([0.0400, 0.1793, 0.5469, 0.2338]) -Greedy action tensor([-0.8525, -0.0296, 0.3829, 0.4401]) tensor([0.0965, 0.2198, 0.3320, 0.3516]) -Greedy action tensor([-1.0249, 0.2389, 0.0356, -0.5501]) tensor([0.1107, 0.3917, 0.3196, 0.1780]) -Greedy action tensor([-1.9078, -0.4375, 0.6474, -0.1632]) tensor([0.0418, 0.1817, 0.5376, 0.2390]) -Greedy action tensor([-1.8605, -0.4176, 0.6173, -0.1210]) tensor([0.0438, 0.1853, 0.5216, 0.2493]) -Greedy action tensor([-1.9077, -0.4253, 0.6532, -0.1627]) tensor([0.0415, 0.1829, 0.5378, 0.2378]) -Greedy action tensor([-1.4321, 0.0070, 0.4859, 0.1334]) tensor([0.0595, 0.2509, 0.4050, 0.2847]) -Greedy action tensor([-1.4804, -0.2873, 0.6232, 0.0202]) tensor([0.0589, 0.1942, 0.4828, 0.2641]) -Greedy action tensor([-1.9170, -0.4391, 0.6720, -0.1674]) tensor([0.0409, 0.1793, 0.5446, 0.2352]) -Greedy action tensor([-1.8803, -0.4453, 0.6396, -0.1171]) tensor([0.0426, 0.1790, 0.5298, 0.2486]) -Greedy action tensor([-1.0774, -0.5776, 0.5985, 0.8658]) tensor([0.0668, 0.1101, 0.3569, 0.4662]) -Greedy action tensor([-1.1669, -0.3717, 0.4201, 0.2111]) tensor([0.0828, 0.1835, 0.4050, 0.3286]) -Greedy action tensor([-0.8123, 0.9883, 0.1572, 0.4243]) tensor([0.0761, 0.4609, 0.2008, 0.2622]) -Greedy action tensor([-1.8912, -0.3938, 0.6352, -0.1349]) tensor([0.0421, 0.1881, 0.5262, 0.2436]) -Greedy action tensor([-0.8917, -0.6315, 0.2205, 0.3326]) tensor([0.1144, 0.1484, 0.3479, 0.3892]) -Greedy action tensor([-1.9305, -0.4438, 0.6647, -0.1749]) tensor([0.0406, 0.1797, 0.5445, 0.2352]) -Greedy action tensor([-0.4647, 0.9633, -0.0423, 0.4555]) tensor([0.1086, 0.4530, 0.1657, 0.2726]) -Greedy action tensor([-1.9075, -0.3746, 0.6382, -0.1745]) tensor([0.0416, 0.1927, 0.5304, 0.2353]) -Greedy action tensor([-1.9163, -0.4303, 0.6540, -0.1640]) tensor([0.0412, 0.1822, 0.5388, 0.2378]) -Greedy action tensor([-0.1205, 0.3563, 0.3513, 0.7900]) tensor([0.1493, 0.2405, 0.2392, 0.3710]) -Greedy action tensor([-1.5184, 0.2385, 0.3975, -0.1404]) tensor([0.0570, 0.3301, 0.3870, 0.2260]) -Greedy action tensor([-1.3413, -0.6281, 0.3440, 0.1869]) tensor([0.0767, 0.1564, 0.4135, 0.3534]) -Greedy action tensor([-1.8624, -0.2888, 0.6151, -0.1305]) tensor([0.0428, 0.2063, 0.5093, 0.2416]) -Greedy action tensor([-1.9195, -0.3926, 0.6504, -0.1669]) tensor([0.0409, 0.1884, 0.5346, 0.2361]) -Greedy action tensor([-1.8638, -0.4466, 0.6359, -0.1372]) tensor([0.0436, 0.1799, 0.5312, 0.2452]) -Greedy action tensor([-1.7070, -0.4328, 0.5629, -0.0435]) tensor([0.0512, 0.1831, 0.4955, 0.2702]) -Greedy action tensor([-1.8626, -0.2429, 0.6025, -0.1287]) tensor([0.0426, 0.2152, 0.5011, 0.2412]) -Greedy action tensor([-1.6584, 0.1230, 0.4283, 0.0247]) tensor([0.0491, 0.2914, 0.3954, 0.2641]) -Greedy action tensor([-1.4806, 0.0575, 0.3612, -0.0372]) tensor([0.0617, 0.2874, 0.3894, 0.2614]) -Greedy action tensor([-1.1767, 0.2853, 0.3104, -0.1430]) tensor([0.0797, 0.3438, 0.3525, 0.2240]) -Greedy action tensor([-1.8859, -0.4020, 0.6325, -0.1465]) tensor([0.0425, 0.1876, 0.5277, 0.2422]) -Greedy action tensor([-1.7655, -0.4647, 0.5874, -0.0871]) tensor([0.0487, 0.1787, 0.5118, 0.2607]) -Greedy action tensor([-1.9308, -0.4318, 0.6613, -0.1693]) tensor([0.0406, 0.1816, 0.5418, 0.2361]) -Greedy action tensor([-1.7598, -0.4089, 0.5784, -0.0759]) tensor([0.0485, 0.1873, 0.5028, 0.2614]) -Greedy action tensor([-0.3092, 0.2889, 0.0827, -0.0107]) tensor([0.1771, 0.3221, 0.2621, 0.2387]) -Greedy action tensor([-1.7029, -0.0349, 0.5815, -0.3850]) tensor([0.0504, 0.2670, 0.4945, 0.1881]) -Greedy action tensor([-1.8877, -0.2757, 0.6233, -0.1472]) tensor([0.0416, 0.2086, 0.5126, 0.2372]) -Greedy action tensor([-1.1429, 0.2825, 0.2740, -0.0345]) tensor([0.0812, 0.3378, 0.3350, 0.2460]) -Greedy action tensor([-1.8570, -0.3116, 0.6070, -0.1385]) tensor([0.0434, 0.2037, 0.5106, 0.2422]) -Greedy action tensor([-1.4484, -0.4892, 0.4318, 0.0852]) tensor([0.0676, 0.1763, 0.4429, 0.3132]) -Greedy action tensor([-1.7491, -0.4749, 0.5720, -0.1225]) tensor([0.0504, 0.1801, 0.5132, 0.2563]) -Greedy action tensor([-1.4945, -0.1900, 0.4269, -0.0460]) tensor([0.0634, 0.2337, 0.4331, 0.2699]) -Greedy action tensor([-1.9139, -0.4026, 0.6486, -0.1627]) tensor([0.0412, 0.1868, 0.5345, 0.2375]) -Greedy action tensor([-1.9315, -0.3967, 0.6509, -0.1728]) tensor([0.0405, 0.1881, 0.5362, 0.2353]) -Greedy action tensor([-1.0232, 0.8139, 0.1175, 0.1509]) tensor([0.0733, 0.4602, 0.2294, 0.2371]) -Greedy action tensor([-1.8187, -0.3836, 0.6111, -0.0945]) tensor([0.0451, 0.1895, 0.5124, 0.2530]) -Greedy action tensor([-1.6419, -0.2849, 0.4936, -0.0569]) tensor([0.0549, 0.2131, 0.4643, 0.2677]) -Greedy action tensor([-1.8295, -0.4647, 0.6152, -0.1313]) tensor([0.0457, 0.1787, 0.5262, 0.2494]) -Greedy action tensor([-1.8665, -0.4484, 0.6307, -0.1405]) tensor([0.0437, 0.1804, 0.5306, 0.2454]) -Greedy action tensor([-1.8403, -0.4044, 0.6098, -0.1317]) tensor([0.0448, 0.1884, 0.5194, 0.2474]) -Greedy action tensor([-1.8130, -0.4579, 0.5958, -0.1204]) tensor([0.0467, 0.1809, 0.5189, 0.2535]) -Greedy action tensor([-0.9676, 0.4605, 0.1114, 0.2166]) tensor([0.0879, 0.3665, 0.2585, 0.2872]) -Greedy action tensor([-1.9453, -0.4554, 0.6691, -0.1805]) tensor([0.0401, 0.1779, 0.5477, 0.2342]) -Greedy action tensor([-1.6426, -0.5119, 0.5303, -0.0300]) tensor([0.0559, 0.1731, 0.4908, 0.2803]) -Greedy action tensor([-1.6903, -0.5187, 0.7887, 0.0192]) tensor([0.0461, 0.1488, 0.5502, 0.2549]) -Greedy action tensor([-1.9415, -0.4479, 0.6646, -0.1793]) tensor([0.0403, 0.1794, 0.5457, 0.2346]) -Greedy action tensor([-1.3336, -0.5088, 0.3845, 0.0895]) tensor([0.0769, 0.1754, 0.4286, 0.3191]) -Greedy action tensor([-1.9116, -0.3927, 0.6477, -0.1636]) tensor([0.0413, 0.1884, 0.5334, 0.2369]) -Greedy action tensor([-0.3752, 0.7930, 0.0543, 0.2696]) tensor([0.1306, 0.4200, 0.2006, 0.2488]) -Greedy action tensor([-1.9122, -0.4280, 0.6520, -0.1589]) tensor([0.0414, 0.1825, 0.5373, 0.2388]) -Greedy action tensor([-1.9372, -0.4442, 0.6643, -0.1764]) tensor([0.0404, 0.1798, 0.5448, 0.2350]) -Greedy action tensor([-1.9296, -0.4474, 0.6612, -0.1744]) tensor([0.0408, 0.1795, 0.5439, 0.2358]) -Greedy action tensor([-1.1845, -0.0243, 0.2095, 0.2152]) tensor([0.0815, 0.2599, 0.3284, 0.3302]) -Greedy action tensor([-1.8366, -0.3774, 0.6277, -0.1131]) tensor([0.0441, 0.1899, 0.5187, 0.2473]) -Greedy action tensor([-1.8369, -0.4057, 0.6071, -0.1233]) tensor([0.0449, 0.1880, 0.5177, 0.2494]) -Greedy action tensor([ 1.0984, -0.6962, 0.0497, -0.5180]) tensor([0.5830, 0.0969, 0.2043, 0.1158]) -Greedy action tensor([ 1.2348, -0.6241, 0.1345, -0.7443]) tensor([0.6147, 0.0958, 0.2046, 0.0849]) -Greedy action tensor([ 1.0329, -0.3854, 0.1755, -0.3829]) tensor([0.5238, 0.1268, 0.2222, 0.1271]) -Greedy action tensor([ 0.5916, -0.5119, -0.1035, -0.5342]) tensor([0.4640, 0.1539, 0.2315, 0.1505]) -Greedy action tensor([ 1.0957, -0.8130, -0.1698, -0.6842]) tensor([0.6254, 0.0927, 0.1764, 0.1055]) -Greedy action tensor([ 0.9550, -0.7837, -0.0535, -0.6039]) tensor([0.5711, 0.1004, 0.2083, 0.1201]) -Greedy action tensor([ 0.6276, -0.3447, 0.0044, -0.1363]) tensor([0.4201, 0.1589, 0.2253, 0.1957]) -Greedy action tensor([ 1.0208, -0.6322, 0.2595, -0.4342]) tensor([0.5286, 0.1012, 0.2469, 0.1234]) -Greedy action tensor([ 1.2591, -0.7603, -0.1959, -0.5598]) tensor([0.6543, 0.0868, 0.1527, 0.1061]) -Greedy action tensor([ 1.1227, -0.6984, -0.0200, -0.4027]) tensor([0.5888, 0.0953, 0.1878, 0.1281]) -Greedy action tensor([ 0.7459, -0.6824, 0.0345, -0.2947]) tensor([0.4799, 0.1150, 0.2356, 0.1695]) -Greedy action tensor([ 0.7657, -0.2797, 0.0095, -0.2029]) tensor([0.4544, 0.1597, 0.2133, 0.1725]) -Greedy action tensor([ 0.7724, -0.6428, 0.0223, -0.4065]) tensor([0.4944, 0.1201, 0.2335, 0.1521]) -Greedy action tensor([ 0.7577, -0.5741, -0.1453, -0.4858]) tensor([0.5108, 0.1349, 0.2071, 0.1473]) -Greedy action tensor([ 0.5681, -0.2710, -0.0249, -0.1327]) tensor([0.4031, 0.1742, 0.2228, 0.2000]) -Greedy action tensor([ 0.5634, -0.5150, 0.1310, -0.2551]) tensor([0.4115, 0.1400, 0.2670, 0.1815]) -Greedy action tensor([ 0.4464, -0.1497, 0.0088, -0.1697]) tensor([0.3654, 0.2013, 0.2359, 0.1974]) -Greedy action tensor([ 0.7236, -0.2724, 0.1148, -0.2802]) tensor([0.4386, 0.1620, 0.2386, 0.1607]) -Greedy action tensor([ 1.2701, -0.8912, -0.0159, -0.6168]) tensor([0.6481, 0.0746, 0.1791, 0.0982]) -Greedy action tensor([ 0.8498, -0.3557, -0.0587, -0.0981]) tensor([0.4784, 0.1433, 0.1929, 0.1854]) -Greedy action tensor([ 0.4250, 0.1563, -0.0210, 0.0686]) tensor([0.3221, 0.2462, 0.2062, 0.2255]) -Greedy action tensor([ 0.5673, -0.5960, -0.0847, -0.3249]) tensor([0.4458, 0.1393, 0.2323, 0.1827]) -Greedy action tensor([ 0.9772, -0.6026, -0.0240, -0.5772]) tensor([0.5603, 0.1154, 0.2059, 0.1184]) -Greedy action tensor([ 0.5369, -0.0493, 0.0586, -0.0185]) tensor([0.3636, 0.2023, 0.2254, 0.2087]) -Greedy action tensor([ 0.6711, -0.0777, -0.0473, -0.4071]) tensor([0.4347, 0.2056, 0.2119, 0.1479]) -Greedy action tensor([ 0.8093, -0.3991, -0.1381, -0.2426]) tensor([0.4912, 0.1467, 0.1905, 0.1716]) -Greedy action tensor([ 0.6788, -0.4503, 0.0461, -0.2255]) tensor([0.4426, 0.1431, 0.2351, 0.1792]) -Greedy action tensor([ 0.4246, -0.3176, -0.0023, -0.1359]) tensor([0.3705, 0.1763, 0.2417, 0.2115]) -Greedy action tensor([ 0.9568, -0.6818, -0.0878, -0.4316]) tensor([0.5569, 0.1082, 0.1959, 0.1389]) -Greedy action tensor([ 0.9367, -0.7845, -0.1328, -0.4401]) tensor([0.5636, 0.1008, 0.1934, 0.1422]) -Greedy action tensor([ 0.5150, -0.0530, -0.0347, -0.1057]) tensor([0.3730, 0.2113, 0.2152, 0.2005]) -Greedy action tensor([ 1.0145, -0.5353, -0.1462, -0.2843]) tensor([0.5560, 0.1180, 0.1742, 0.1517]) -Greedy action tensor([ 0.4979, -0.2085, -0.1548, -0.0573]) tensor([0.3864, 0.1907, 0.2012, 0.2218]) -Greedy action tensor([ 1.0222, -0.6130, -0.0812, -0.4453]) tensor([0.5691, 0.1109, 0.1888, 0.1312]) -Greedy action tensor([ 1.1466, -0.7075, -0.0800, -0.5976]) tensor([0.6155, 0.0964, 0.1805, 0.1076]) -Greedy action tensor([ 0.7496, -0.4740, 0.2024, -0.3452]) tensor([0.4530, 0.1333, 0.2621, 0.1516]) -Greedy action tensor([ 0.8931, -0.3084, 0.1156, -0.4480]) tensor([0.4946, 0.1487, 0.2273, 0.1294]) -Greedy action tensor([ 0.9047, 0.3249, -0.1714, -0.0114]) tensor([0.4346, 0.2434, 0.1482, 0.1739]) -Greedy action tensor([ 0.8499, -0.3647, -0.0395, -0.2628]) tensor([0.4911, 0.1458, 0.2018, 0.1614]) -Greedy action tensor([ 0.9512, -0.4041, 0.1147, -0.1900]) tensor([0.4974, 0.1283, 0.2155, 0.1589]) -Greedy action tensor([ 0.6240, -0.2106, -0.1382, -0.0613]) tensor([0.4159, 0.1805, 0.1941, 0.2096]) -Greedy action tensor([ 0.8078, -0.3342, -0.0059, -0.1999]) tensor([0.4700, 0.1500, 0.2083, 0.1716]) -Greedy action tensor([ 0.7911, -0.5689, -0.0876, -0.4892]) tensor([0.5128, 0.1316, 0.2130, 0.1425]) -Greedy action tensor([ 1.2465, -0.7304, -0.0656, -0.5751]) tensor([0.6371, 0.0882, 0.1716, 0.1031]) -Greedy action tensor([ 0.5369, -0.2334, 0.0262, -0.1669]) tensor([0.3910, 0.1810, 0.2346, 0.1934]) -Greedy action tensor([ 0.7868, -0.4350, -0.0554, -0.4027]) tensor([0.4926, 0.1452, 0.2122, 0.1499]) -Greedy action tensor([ 0.8394, -0.4270, -0.1499, -0.4016]) tensor([0.5147, 0.1451, 0.1914, 0.1488]) -Greedy action tensor([ 1.3734, -0.7791, -0.1427, -0.5563]) tensor([0.6752, 0.0785, 0.1483, 0.0980]) -Greedy action tensor([ 1.1565, -0.7485, -0.0679, -0.5162]) tensor([0.6133, 0.0913, 0.1803, 0.1151]) -Greedy action tensor([ 0.9834, -0.4976, 0.0603, -0.4043]) tensor([0.5335, 0.1213, 0.2120, 0.1332]) -Greedy action tensor([ 1.0942, -0.1996, -0.0250, -0.1621]) tensor([0.5304, 0.1454, 0.1732, 0.1510]) -Greedy action tensor([ 0.8923, -1.0269, -0.0925, -0.4118]) tensor([0.5582, 0.0819, 0.2085, 0.1515]) -Greedy action tensor([ 1.1186, -0.9342, 0.1998, -0.6872]) tensor([0.5911, 0.0759, 0.2359, 0.0971]) -Greedy action tensor([ 1.0799, -0.6147, -0.0654, -0.6840]) tensor([0.5977, 0.1098, 0.1901, 0.1024]) -Greedy action tensor([ 1.2446, -0.8195, -0.1994, -0.5563]) tensor([0.6544, 0.0831, 0.1544, 0.1081]) -Greedy action tensor([ 0.5835, 0.0628, -0.0491, 0.0795]) tensor([0.3664, 0.2177, 0.1946, 0.2213]) -Greedy action tensor([ 1.1279, -0.6194, -0.1058, -0.4352]) tensor([0.5970, 0.1040, 0.1739, 0.1251]) -Greedy action tensor([ 0.5630, -0.2052, -0.0111, -0.2894]) tensor([0.4076, 0.1891, 0.2296, 0.1738]) -Greedy action tensor([ 0.6628, 0.0525, -0.1207, 0.1121]) tensor([0.3881, 0.2108, 0.1773, 0.2238]) -Greedy action tensor([ 0.5403, -0.4966, -0.0227, -0.2508]) tensor([0.4206, 0.1491, 0.2395, 0.1907]) -Greedy action tensor([ 0.5412, -0.2621, -0.0083, -0.3856]) tensor([0.4131, 0.1850, 0.2384, 0.1635]) -Greedy action tensor([ 0.8483, -0.6265, 0.1318, -0.2669]) tensor([0.4890, 0.1119, 0.2388, 0.1603]) -Greedy action tensor([ 0.8069, -0.4107, -0.0317, -0.2224]) tensor([0.4795, 0.1419, 0.2073, 0.1713]) -Greedy action tensor([ 0.1779, 0.2037, -0.1887, -0.2665]) tensor([0.2976, 0.3054, 0.2062, 0.1908]) -Greedy action tensor([ 1.1708, -0.5051, -0.2119, -0.5583]) tensor([0.6190, 0.1158, 0.1553, 0.1098]) -Greedy action tensor([ 0.7957, -0.7910, -0.0990, -0.2663]) tensor([0.5104, 0.1044, 0.2086, 0.1765]) -Greedy action tensor([ 0.6633, -0.3864, -0.1352, -0.1308]) tensor([0.4440, 0.1554, 0.1998, 0.2007]) -Greedy action tensor([ 0.8888, -0.5078, -0.0299, -0.2516]) tensor([0.5086, 0.1259, 0.2029, 0.1626]) -Greedy action tensor([ 1.0976, -0.6267, -0.0543, -0.5494]) tensor([0.5928, 0.1057, 0.1873, 0.1142]) -Greedy action tensor([ 1.0373, -0.9370, 0.0210, -0.3633]) tensor([0.5723, 0.0795, 0.2071, 0.1411]) -Greedy action tensor([ 0.4467, -0.1201, 0.0021, -0.1655]) tensor([0.3636, 0.2063, 0.2331, 0.1971]) -Greedy action tensor([ 0.8132, -0.5150, -0.0461, -0.2099]) tensor([0.4883, 0.1294, 0.2068, 0.1755]) -Greedy action tensor([ 0.7978, -0.5805, -0.0243, -0.2138]) tensor([0.4866, 0.1226, 0.2139, 0.1769]) -Greedy action tensor([ 0.6511, -0.3164, -0.0295, -0.0719]) tensor([0.4217, 0.1602, 0.2135, 0.2046]) -Greedy action tensor([ 0.9918, -0.5243, 0.1735, -0.7883]) tensor([0.5466, 0.1200, 0.2412, 0.0922]) -Greedy action tensor([ 0.7343, -0.1991, -0.1831, -0.2976]) tensor([0.4653, 0.1830, 0.1859, 0.1658]) -Greedy action tensor([ 0.6357, -0.2192, 0.0350, -0.0877]) tensor([0.4067, 0.1730, 0.2230, 0.1973]) -Greedy action tensor([ 0.5853, -0.3876, -0.0073, -0.2597]) tensor([0.4237, 0.1601, 0.2342, 0.1820]) -Greedy action tensor([ 0.6349, -0.1103, 0.0300, -0.0916]) tensor([0.3993, 0.1895, 0.2181, 0.1931]) -Greedy action tensor([ 1.1106, -0.8335, 0.0898, -0.5464]) tensor([0.5903, 0.0845, 0.2127, 0.1126]) -Greedy action tensor([ 1.0673, -0.5425, 0.1517, -0.4905]) tensor([0.5522, 0.1104, 0.2211, 0.1163]) -Greedy action tensor([ 0.5411, -1.3415, 0.3005, 0.2465]) tensor([0.3727, 0.0567, 0.2930, 0.2776]) -Greedy action tensor([-0.8466, -0.6178, 0.0819, -0.2824]) tensor([0.1528, 0.1920, 0.3866, 0.2686]) -Greedy action tensor([-0.0918, 0.3078, 0.1143, -0.7118]) tensor([0.2348, 0.3502, 0.2886, 0.1263]) -Greedy action tensor([ 0.1455, -1.5137, -0.5723, 0.2655]) tensor([0.3564, 0.0678, 0.1739, 0.4019]) -Greedy action tensor([-0.6963, -0.4050, 0.6472, -0.7127]) tensor([0.1398, 0.1870, 0.5357, 0.1375]) -Greedy action tensor([-0.4127, -1.0000, 0.4207, 0.3164]) tensor([0.1686, 0.0937, 0.3880, 0.3496]) -Greedy action tensor([ 0.4674, -1.7368, 1.0418, -0.2348]) tensor([0.2957, 0.0326, 0.5252, 0.1465]) -Greedy action tensor([-1.0507, -0.0928, -0.0463, -1.0097]) tensor([0.1355, 0.3532, 0.3700, 0.1412]) -Greedy action tensor([ 0.8098, -0.6998, 0.0232, 0.4038]) tensor([0.4269, 0.0943, 0.1944, 0.2844]) -Greedy action tensor([-0.1782, -1.4421, 0.7162, -0.9155]) tensor([0.2377, 0.0672, 0.5814, 0.1137]) -Greedy action tensor([ 1.0359, -1.0031, 0.6406, 0.0088]) tensor([0.4626, 0.0602, 0.3116, 0.1656]) -Greedy action tensor([ 0.6044, 0.1223, -0.9478, -0.3135]) tensor([0.4487, 0.2771, 0.0950, 0.1792]) -Greedy action tensor([-0.4251, -0.7994, 0.2389, -0.5766]) tensor([0.2227, 0.1532, 0.4327, 0.1914]) -Greedy action tensor([-0.2819, -0.6708, 0.3656, -0.6269]) tensor([0.2327, 0.1577, 0.4447, 0.1648]) -Greedy action tensor([-0.4479, 0.2301, -0.7419, -0.3753]) tensor([0.2087, 0.4112, 0.1556, 0.2245]) -Greedy action tensor([-0.7740, -0.2745, 0.7580, -0.7716]) tensor([0.1208, 0.1991, 0.5590, 0.1211]) -Greedy action tensor([-0.6116, -0.8219, 0.0123, -1.2281]) tensor([0.2372, 0.1922, 0.4426, 0.1280]) -Greedy action tensor([-0.7767, -1.4295, 1.9911, -1.3591]) tensor([0.0555, 0.0289, 0.8845, 0.0310]) -Greedy action tensor([-0.5062, 0.2702, -0.0086, -0.7257]) tensor([0.1779, 0.3867, 0.2926, 0.1428]) -Greedy action tensor([ 0.3085, -0.4583, -0.4130, -0.1495]) tensor([0.3871, 0.1798, 0.1881, 0.2449]) -Greedy action tensor([ 1.3469, -1.0049, 0.1447, 0.9098]) tensor([0.4898, 0.0466, 0.1472, 0.3164]) -Greedy action tensor([ 1.0361, 0.4683, 0.4465, -0.3386]) tensor([0.4212, 0.2387, 0.2336, 0.1065]) -Greedy action tensor([ 1.0482, -0.2217, -0.0997, 0.1769]) tensor([0.4959, 0.1393, 0.1573, 0.2075]) -Greedy action tensor([0.9759, 0.1068, 0.1377, 0.3727]) tensor([0.4169, 0.1748, 0.1803, 0.2280]) -Greedy action tensor([ 0.0727, -0.0720, 0.2235, -0.4354]) tensor([0.2755, 0.2384, 0.3203, 0.1658]) -Greedy action tensor([-0.3867, -0.5492, 0.1341, -0.6730]) tensor([0.2334, 0.1984, 0.3929, 0.1753]) -Greedy action tensor([ 0.9932, -1.2269, 0.1609, 0.1057]) tensor([0.5114, 0.0555, 0.2225, 0.2105]) -Greedy action tensor([-0.1032, 0.4865, -0.0797, -0.6507]) tensor([0.2270, 0.4093, 0.2324, 0.1313]) -Greedy action tensor([-0.7661, -0.0864, -0.8069, -0.2000]) tensor([0.1756, 0.3465, 0.1686, 0.3093]) -Greedy action tensor([-0.1204, 0.2974, 0.6223, -0.1991]) tensor([0.1804, 0.2739, 0.3790, 0.1667]) -Greedy action tensor([-0.6288, -0.6069, 0.4967, 0.0468]) tensor([0.1415, 0.1446, 0.4359, 0.2780]) -Greedy action tensor([0.3230, 0.4561, 0.0260, 0.3921]) tensor([0.2527, 0.2887, 0.1878, 0.2708]) -Greedy action tensor([-1.3305, -0.1274, -0.6595, 0.3804]) tensor([0.0846, 0.2818, 0.1655, 0.4681]) -Greedy action tensor([-1.1343, -0.7742, -0.7609, 0.3585]) tensor([0.1200, 0.1720, 0.1743, 0.5338]) -Greedy action tensor([ 0.8300, -1.2957, -0.4748, -0.8667]) tensor([0.6354, 0.0758, 0.1723, 0.1165]) -Greedy action tensor([ 0.2917, -0.1384, -0.2317, 0.3197]) tensor([0.3057, 0.1988, 0.1811, 0.3144]) -Greedy action tensor([ 0.6634, 0.4109, 1.2837, -0.2966]) tensor([0.2488, 0.1933, 0.4626, 0.0953]) -Greedy action tensor([ 0.1664, 0.0294, 1.0788, -0.5771]) tensor([0.2067, 0.1802, 0.5148, 0.0983]) -Greedy action tensor([ 1.3058, -0.0702, 0.7538, -0.0843]) tensor([0.4814, 0.1216, 0.2772, 0.1199]) -Greedy action tensor([ 0.1100, -0.4140, -0.0827, -0.7617]) tensor([0.3527, 0.2089, 0.2909, 0.1475]) -Greedy action tensor([ 0.2857, -0.9515, -0.1702, 0.9228]) tensor([0.2621, 0.0761, 0.1661, 0.4957]) -Greedy action tensor([-0.3352, -0.8039, -0.5772, -0.7542]) tensor([0.3259, 0.2039, 0.2558, 0.2143]) -Greedy action tensor([-0.0572, -0.3635, 0.1890, -0.0342]) tensor([0.2476, 0.1823, 0.3167, 0.2534]) -Greedy action tensor([ 0.6380, 0.0348, -0.5160, 0.5009]) tensor([0.3657, 0.2001, 0.1153, 0.3189]) -Greedy action tensor([ 0.7148, 0.4385, 0.8388, -0.7403]) tensor([0.3201, 0.2428, 0.3624, 0.0747]) -Greedy action tensor([ 1.1300, -0.7841, 0.5379, 1.4786]) tensor([0.3207, 0.0473, 0.1774, 0.4545]) -Greedy action tensor([ 0.0344, -0.3960, 0.5309, -0.2433]) tensor([0.2469, 0.1605, 0.4056, 0.1870]) -Greedy action tensor([ 0.7170, -1.3512, 0.1008, 0.5031]) tensor([0.4042, 0.0511, 0.2183, 0.3264]) -Greedy action tensor([ 0.4983, -0.6573, -0.0995, 0.0981]) tensor([0.3945, 0.1242, 0.2170, 0.2644]) -Greedy action tensor([ 0.5743, 0.5426, 0.2674, -0.5615]) tensor([0.3305, 0.3202, 0.2432, 0.1062]) -Greedy action tensor([ 0.2976, -0.7365, -0.4875, -0.3210]) tensor([0.4255, 0.1513, 0.1940, 0.2292]) -Greedy action tensor([-0.3765, -0.9443, -0.3705, 0.6790]) tensor([0.1836, 0.1041, 0.1847, 0.5276]) -Greedy action tensor([ 0.9305, -0.2842, 0.8000, -0.4999]) tensor([0.4143, 0.1230, 0.3636, 0.0991]) -Greedy action tensor([-0.4347, -1.1408, 0.9793, -0.0337]) tensor([0.1409, 0.0695, 0.5793, 0.2103]) -Greedy action tensor([ 0.5283, -0.7612, -0.0971, -0.4530]) tensor([0.4576, 0.1260, 0.2448, 0.1715]) -Greedy action tensor([-0.0597, -1.7466, -0.5572, 0.5773]) tensor([0.2714, 0.0502, 0.1651, 0.5132]) -Greedy action tensor([ 0.2064, -0.4839, -0.7129, 0.4488]) tensor([0.3150, 0.1580, 0.1256, 0.4014]) -Greedy action tensor([ 0.9842, 0.2352, -0.6860, -0.0778]) tensor([0.4983, 0.2356, 0.0938, 0.1723]) -Greedy action tensor([ 0.5527, -1.0074, 1.3371, -0.5001]) tensor([0.2667, 0.0560, 0.5843, 0.0931]) -Greedy action tensor([ 0.2166, -0.3164, -0.3436, 0.9637]) tensor([0.2342, 0.1375, 0.1338, 0.4945]) -Greedy action tensor([ 0.4538, -1.0935, 0.3863, 0.2622]) tensor([0.3364, 0.0716, 0.3144, 0.2777]) -Greedy action tensor([-0.3753, -1.3352, 0.0363, -0.3210]) tensor([0.2533, 0.0970, 0.3823, 0.2674]) -Greedy action tensor([ 0.6725, -1.4000, 1.4162, -1.1229]) tensor([0.2945, 0.0371, 0.6195, 0.0489]) -Greedy action tensor([ 0.6511, -0.0804, 0.2240, -0.8407]) tensor([0.4240, 0.2040, 0.2766, 0.0954]) -Greedy action tensor([-0.7891, -0.0277, -0.6299, -0.8698]) tensor([0.1910, 0.4089, 0.2239, 0.1762]) -Greedy action tensor([-0.4351, -0.7917, 0.4239, -0.2663]) tensor([0.1907, 0.1335, 0.4501, 0.2257]) -Greedy action tensor([-0.1426, -0.0572, -0.3824, -1.0725]) tensor([0.3058, 0.3330, 0.2406, 0.1206]) -Greedy action tensor([ 0.4429, -0.8248, -0.6390, -0.1616]) tensor([0.4615, 0.1299, 0.1564, 0.2522]) -Greedy action tensor([-0.2984, -0.9332, -0.0883, -0.6184]) tensor([0.2865, 0.1519, 0.3535, 0.2081]) -Greedy action tensor([-0.6315, -0.4992, 0.7964, -1.0273]) tensor([0.1432, 0.1634, 0.5970, 0.0964]) -Greedy action tensor([-1.1109, 1.2871, 0.2766, -0.3855]) tensor([0.0553, 0.6088, 0.2216, 0.1143]) -Greedy action tensor([-0.4478, -0.5584, 0.5662, -0.6140]) tensor([0.1819, 0.1628, 0.5013, 0.1540]) -Greedy action tensor([ 0.4307, 0.2945, -0.1234, 0.2897]) tensor([0.3016, 0.2632, 0.1733, 0.2619]) -Greedy action tensor([ 0.1216, -1.1388, 0.7176, -0.0107]) tensor([0.2516, 0.0713, 0.4566, 0.2204]) -Greedy action tensor([ 0.8485, 0.2105, -0.6178, -1.0268]) tensor([0.5229, 0.2763, 0.1207, 0.0802]) -Greedy action tensor([ 0.7974, -1.0908, 1.1116, -0.1971]) tensor([0.3460, 0.0524, 0.4737, 0.1280]) -Greedy action tensor([-0.7080, -0.3734, 0.0996, -0.7523]) tensor([0.1787, 0.2497, 0.4007, 0.1709]) -Greedy action tensor([ 0.2925, -0.5846, 0.3000, 0.1459]) tensor([0.3042, 0.1266, 0.3065, 0.2627]) -Greedy action tensor([ 0.3491, -0.6671, -0.5520, -0.0224]) tensor([0.4069, 0.1473, 0.1652, 0.2806]) -Greedy action tensor([-0.2488, 0.1523, -0.4420, -0.5804]) tensor([0.2478, 0.3701, 0.2043, 0.1779]) -Greedy action tensor([ 0.5381, -0.9795, 0.2857, 0.0942]) tensor([0.3791, 0.0831, 0.2946, 0.2432]) -Greedy action tensor([ 1.3015, -0.2487, -0.1483, 0.4332]) tensor([0.5358, 0.1137, 0.1257, 0.2248]) -Greedy action tensor([ 1.0849, -0.2288, -0.4279, 0.1441]) tensor([0.5321, 0.1430, 0.1172, 0.2077]) -Greedy action tensor([ 1.5822, -0.6556, -0.4974, 0.0746]) tensor([0.6882, 0.0734, 0.0860, 0.1524]) -Greedy action tensor([ 1.9899, -0.1875, -0.3763, -0.0428]) tensor([0.7473, 0.0847, 0.0701, 0.0979]) -Greedy action tensor([ 1.6487, -0.9488, -0.4616, 0.0739]) tensor([0.7129, 0.0531, 0.0864, 0.1476]) -Greedy action tensor([ 1.4289, -0.6391, -0.1877, 0.3828]) tensor([0.5965, 0.0754, 0.1185, 0.2096]) -Greedy action tensor([ 1.2409, -0.7153, -0.5544, 0.1809]) tensor([0.6046, 0.0855, 0.1004, 0.2095]) -Greedy action tensor([ 1.9851, -1.0305, -0.0988, 0.2390]) tensor([0.7419, 0.0364, 0.0923, 0.1294]) -Greedy action tensor([ 1.6653, -0.9896, 0.1969, 0.4505]) tensor([0.6260, 0.0440, 0.1442, 0.1858]) -Greedy action tensor([ 1.5127, -0.4065, -0.5288, 0.5833]) tensor([0.5983, 0.0878, 0.0777, 0.2362]) -Greedy action tensor([ 1.4532, -0.5922, -0.3102, 0.5463]) tensor([0.5867, 0.0759, 0.1006, 0.2369]) -Greedy action tensor([ 1.5780, -0.2299, -0.1760, 0.0951]) tensor([0.6394, 0.1049, 0.1107, 0.1451]) -Greedy action tensor([ 1.2553, -0.2018, -0.4333, 0.0141]) tensor([0.5859, 0.1365, 0.1083, 0.1694]) -Greedy action tensor([ 1.8784, -1.0338, -0.3878, 0.1856]) tensor([0.7451, 0.0405, 0.0773, 0.1371]) -Greedy action tensor([ 1.0838, -0.3344, -0.7982, 0.5472]) tensor([0.5053, 0.1223, 0.0769, 0.2954]) -Greedy action tensor([ 1.3469, -0.5954, -1.0215, 0.2929]) tensor([0.6307, 0.0904, 0.0591, 0.2198]) -Greedy action tensor([ 1.2188, -0.3078, -0.6664, 0.1635]) tensor([0.5824, 0.1265, 0.0884, 0.2027]) -Greedy action tensor([ 2.0766, -0.7237, -0.4946, 0.3987]) tensor([0.7553, 0.0459, 0.0577, 0.1411]) -Greedy action tensor([ 1.8039, -1.0342, -0.0554, 0.8082]) tensor([0.6314, 0.0370, 0.0984, 0.2333]) -Greedy action tensor([ 1.7022, -0.6024, -0.3282, 0.0448]) tensor([0.7034, 0.0702, 0.0923, 0.1341]) -Greedy action tensor([ 1.4454, -0.7330, -0.1777, 0.1147]) tensor([0.6350, 0.0719, 0.1253, 0.1678]) -Greedy action tensor([ 2.1515, -0.5767, -0.3602, 0.2666]) tensor([0.7702, 0.0503, 0.0625, 0.1170]) -Greedy action tensor([ 1.5107, -0.5912, 0.0427, 0.5492]) tensor([0.5764, 0.0704, 0.1328, 0.2204]) -Greedy action tensor([ 0.6442, 0.0245, 0.1839, -0.2976]) tensor([0.3908, 0.2103, 0.2466, 0.1524]) -Greedy action tensor([ 1.3880, -0.5394, -0.2188, 0.2915]) tensor([0.5952, 0.0866, 0.1193, 0.1988]) -Greedy action tensor([ 1.8601, 0.3212, -0.1216, 0.2448]) tensor([0.6446, 0.1383, 0.0889, 0.1282]) -Greedy action tensor([ 1.8803, -1.0890, -0.3687, 0.5510]) tensor([0.7035, 0.0361, 0.0742, 0.1862]) -Greedy action tensor([ 1.8200, -0.1861, -0.2092, 0.3332]) tensor([0.6702, 0.0902, 0.0881, 0.1515]) -Greedy action tensor([ 1.3770, -0.2474, -0.4947, 0.1818]) tensor([0.6048, 0.1192, 0.0930, 0.1830]) -Greedy action tensor([ 1.4012, -0.6415, -0.5293, 0.2743]) tensor([0.6255, 0.0811, 0.0907, 0.2027]) -Greedy action tensor([ 1.9720, -0.6388, -0.2952, 0.6483]) tensor([0.6929, 0.0509, 0.0718, 0.1844]) -Greedy action tensor([ 1.5677, -0.7884, -0.5780, 0.5535]) tensor([0.6351, 0.0602, 0.0743, 0.2304]) -Greedy action tensor([ 1.2009, -0.1382, -0.3602, -0.0710]) tensor([0.5707, 0.1496, 0.1198, 0.1600]) -Greedy action tensor([ 1.7124, -0.7343, -0.3085, 0.0571]) tensor([0.7092, 0.0614, 0.0940, 0.1355]) -Greedy action tensor([ 0.7396, -0.3225, -0.1101, -0.0947]) tensor([0.4530, 0.1566, 0.1937, 0.1967]) -Greedy action tensor([ 1.6937, -0.5350, -0.4300, 0.2966]) tensor([0.6782, 0.0730, 0.0811, 0.1677]) -Greedy action tensor([ 1.7206, -0.3518, -0.6948, 0.4004]) tensor([0.6746, 0.0849, 0.0603, 0.1802]) -Greedy action tensor([ 2.0993, -1.3955, 0.0915, 1.1404]) tensor([0.6460, 0.0196, 0.0867, 0.2476]) -Greedy action tensor([ 1.3410, -0.5220, -0.6494, 0.4856]) tensor([0.5824, 0.0904, 0.0796, 0.2476]) -Greedy action tensor([ 1.7152, -0.3884, -0.6822, 0.5537]) tensor([0.6553, 0.0800, 0.0596, 0.2051]) -Greedy action tensor([ 1.5036, -0.6773, -0.1254, 0.2041]) tensor([0.6322, 0.0714, 0.1240, 0.1724]) -Greedy action tensor([ 1.6266, -0.6761, -0.4605, 0.6926]) tensor([0.6184, 0.0618, 0.0767, 0.2430]) -Greedy action tensor([2.2040, 0.5705, 0.0355, 0.1407]) tensor([0.6961, 0.1359, 0.0796, 0.0884]) -Greedy action tensor([ 1.2187, -0.0663, -0.7592, 0.3405]) tensor([0.5463, 0.1511, 0.0756, 0.2270]) -Greedy action tensor([ 1.8321, 0.0318, -0.6314, -0.0265]) tensor([0.7111, 0.1175, 0.0605, 0.1108]) -Greedy action tensor([ 1.5515, -0.3813, -0.0642, 0.3873]) tensor([0.6040, 0.0874, 0.1200, 0.1886]) -Greedy action tensor([ 1.8975, -0.9984, -0.4270, 0.1466]) tensor([0.7538, 0.0416, 0.0737, 0.1309]) -Greedy action tensor([ 1.3814, -0.8262, 0.1010, 0.3063]) tensor([0.5783, 0.0636, 0.1607, 0.1974]) -Greedy action tensor([ 1.3297, -0.4385, 0.1234, 0.1071]) tensor([0.5668, 0.0967, 0.1696, 0.1669]) -Greedy action tensor([ 1.1655, 0.0814, -0.6824, -0.2736]) tensor([0.5771, 0.1952, 0.0909, 0.1368]) -Greedy action tensor([ 1.6092, -1.0097, -0.4032, 0.2424]) tensor([0.6843, 0.0499, 0.0915, 0.1744]) -Greedy action tensor([ 1.1408, -0.3071, -0.5920, 0.1170]) tensor([0.5646, 0.1327, 0.0998, 0.2028]) -Greedy action tensor([ 1.6255, -0.9043, -0.0341, 0.9809]) tensor([0.5572, 0.0444, 0.1060, 0.2924]) -Greedy action tensor([ 1.1851, -0.1045, -0.8249, 0.2842]) tensor([0.5508, 0.1517, 0.0738, 0.2237]) -Greedy action tensor([ 1.5165, -0.5780, -0.7264, -0.0031]) tensor([0.6906, 0.0850, 0.0733, 0.1511]) -Greedy action tensor([ 1.5699, -0.9023, -0.2242, 0.4189]) tensor([0.6382, 0.0539, 0.1061, 0.2019]) -Greedy action tensor([ 1.5304, -0.8130, -0.4192, -0.1560]) tensor([0.7025, 0.0674, 0.1000, 0.1301]) -Greedy action tensor([ 1.6542, -1.2966, -0.2698, -0.0244]) tensor([0.7221, 0.0378, 0.1054, 0.1348]) -Greedy action tensor([ 1.4717, -0.4219, -0.2812, 0.2858]) tensor([0.6138, 0.0924, 0.1064, 0.1875]) -Greedy action tensor([2.1560, 0.2101, 0.0690, 0.2608]) tensor([0.7056, 0.1008, 0.0875, 0.1060]) -Greedy action tensor([ 1.2641, 0.0585, -0.6739, -0.0460]) tensor([0.5837, 0.1748, 0.0840, 0.1575]) -Greedy action tensor([ 1.6403, 0.2208, -0.3311, -0.1725]) tensor([0.6475, 0.1566, 0.0902, 0.1057]) -Greedy action tensor([ 2.1021, -0.7014, -0.5741, 0.4389]) tensor([0.7582, 0.0459, 0.0522, 0.1437]) -Greedy action tensor([ 1.1038, -0.2690, -0.6728, 0.4951]) tensor([0.5085, 0.1288, 0.0860, 0.2766]) -Greedy action tensor([ 2.1149, -0.4460, -0.2054, -0.1324]) tensor([0.7805, 0.0603, 0.0767, 0.0825]) -Greedy action tensor([ 1.2073, -0.4220, -0.2734, 0.2500]) tensor([0.5533, 0.1085, 0.1259, 0.2124]) -Greedy action tensor([ 1.4749, -0.6933, -0.3150, -0.0117]) tensor([0.6633, 0.0759, 0.1108, 0.1500]) -Greedy action tensor([ 1.4222, -0.4840, -0.2178, 0.0674]) tensor([0.6247, 0.0929, 0.1212, 0.1612]) -Greedy action tensor([ 2.1876, -1.0218, -0.4402, 0.3308]) tensor([0.7882, 0.0318, 0.0569, 0.1231]) -Greedy action tensor([ 1.0490, -0.5281, -0.3755, 0.4609]) tensor([0.4994, 0.1032, 0.1202, 0.2773]) -Greedy action tensor([ 1.5783, -0.1885, -0.9586, 0.2257]) tensor([0.6629, 0.1133, 0.0524, 0.1714]) -Greedy action tensor([ 0.8014, -0.2728, 0.1629, -0.1708]) tensor([0.4449, 0.1519, 0.2349, 0.1683]) -Greedy action tensor([ 1.6791, -0.2925, -0.4323, 0.1295]) tensor([0.6791, 0.0945, 0.0822, 0.1442]) -Greedy action tensor([ 2.3037, -1.1162, -0.0748, 0.7505]) tensor([0.7479, 0.0245, 0.0693, 0.1583]) -Greedy action tensor([ 2.0180, -1.0114, -0.1260, 0.8121]) tensor([0.6826, 0.0330, 0.0800, 0.2044]) -Greedy action tensor([ 1.6402, -0.6140, -0.1320, 0.3392]) tensor([0.6463, 0.0678, 0.1099, 0.1760]) -Greedy action tensor([ 1.5831, -0.4866, -0.6417, 0.3051]) tensor([0.6610, 0.0834, 0.0714, 0.1841]) -Greedy action tensor([ 2.1596, -0.8011, -0.4871, 0.3282]) tensor([0.7795, 0.0404, 0.0553, 0.1249]) -Greedy action tensor([ 1.1291, -0.0347, -1.0657, 0.5469]) tensor([0.5045, 0.1575, 0.0562, 0.2818]) -Greedy action tensor([ 0.8786, -0.3373, -0.3288, 0.1945]) tensor([0.4762, 0.1412, 0.1424, 0.2403]) -Greedy action tensor([ 1.6658, -0.3095, -0.1918, 0.4391]) tensor([0.6297, 0.0874, 0.0983, 0.1847]) -Greedy action tensor([ 1.2517, -0.3095, -0.4753, 0.1172]) tensor([0.5850, 0.1228, 0.1040, 0.1881]) -Greedy action tensor([ 1.6700, -0.7634, -0.1877, 0.4916]) tensor([0.6445, 0.0566, 0.1006, 0.1984]) -Greedy action tensor([-1.1115, -0.2658, 0.6682, 0.1108]) tensor([0.0790, 0.1841, 0.4685, 0.2683]) -Greedy action tensor([-1.9419, -0.4542, 0.6644, -0.1797]) tensor([0.0403, 0.1785, 0.5463, 0.2349]) -Greedy action tensor([-1.8655, -0.3716, 0.6194, -0.1399]) tensor([0.0433, 0.1931, 0.5201, 0.2434]) -Greedy action tensor([-0.7487, 0.8606, 0.1508, -0.1171]) tensor([0.0967, 0.4836, 0.2378, 0.1819]) -Greedy action tensor([-1.9323, -0.4049, 0.6559, -0.1675]) tensor([0.0404, 0.1861, 0.5375, 0.2360]) -Greedy action tensor([-1.5979, 0.3854, 0.3744, 0.1142]) tensor([0.0476, 0.3461, 0.3423, 0.2639]) -Greedy action tensor([-1.6812, 0.2106, 0.4695, -0.1753]) tensor([0.0482, 0.3199, 0.4144, 0.2175]) -Greedy action tensor([-1.8855, -0.4483, 0.6423, -0.1480]) tensor([0.0427, 0.1797, 0.5349, 0.2427]) -Greedy action tensor([-1.8291, -0.4315, 0.6093, -0.1296]) tensor([0.0455, 0.1841, 0.5214, 0.2490]) -Greedy action tensor([-1.8005, -0.3288, 0.5803, -0.1228]) tensor([0.0465, 0.2024, 0.5024, 0.2487]) -Greedy action tensor([-1.8845, -0.2832, 0.6058, -0.1522]) tensor([0.0422, 0.2095, 0.5095, 0.2388]) -Greedy action tensor([-1.9050, -0.4382, 0.6477, -0.1593]) tensor([0.0418, 0.1813, 0.5371, 0.2397]) -Greedy action tensor([-1.5829, -0.2880, 0.4662, -0.0616]) tensor([0.0589, 0.2149, 0.4568, 0.2695]) -Greedy action tensor([-1.0698, 0.2219, 0.3179, -0.1877]) tensor([0.0904, 0.3290, 0.3622, 0.2184]) -Greedy action tensor([-1.3139, -0.0710, 0.2778, 0.1395]) tensor([0.0732, 0.2538, 0.3597, 0.3133]) -Greedy action tensor([-1.8942, -0.5287, 0.1160, -0.3319]) tensor([0.0583, 0.2284, 0.4352, 0.2781]) -Greedy action tensor([-1.9018, -0.3536, 0.3261, -0.2742]) tensor([0.0498, 0.2343, 0.4623, 0.2536]) -Greedy action tensor([-1.8842, -0.3132, 0.6263, -0.1373]) tensor([0.0419, 0.2017, 0.5160, 0.2404]) -Greedy action tensor([-1.9216, -0.4283, 0.6554, -0.1682]) tensor([0.0410, 0.1826, 0.5396, 0.2368]) -Greedy action tensor([-1.8926, -0.4933, 0.7184, -0.1062]) tensor([0.0406, 0.1645, 0.5526, 0.2423]) -Greedy action tensor([-1.7756, -0.3899, 0.5676, -0.0821]) tensor([0.0480, 0.1917, 0.4995, 0.2608]) -Greedy action tensor([-1.9212, -0.4426, 0.6583, -0.1675]) tensor([0.0411, 0.1801, 0.5416, 0.2372]) -Greedy action tensor([-1.8124, -0.4046, 0.6509, -0.1166]) tensor([0.0449, 0.1834, 0.5271, 0.2446]) -Greedy action tensor([-1.9030, -0.4348, 0.6429, -0.1665]) tensor([0.0421, 0.1826, 0.5365, 0.2388]) -Greedy action tensor([-1.5250, -0.4597, 0.4472, 0.0452]) tensor([0.0629, 0.1825, 0.4521, 0.3024]) -Greedy action tensor([-1.9243, -0.4189, 0.6559, -0.1674]) tensor([0.0408, 0.1839, 0.5388, 0.2365]) -Greedy action tensor([-1.6043, -0.0906, 0.5670, 0.0672]) tensor([0.0509, 0.2314, 0.4467, 0.2710]) -Greedy action tensor([-1.8903, -0.4652, 0.6356, -0.1530]) tensor([0.0428, 0.1781, 0.5356, 0.2434]) -Greedy action tensor([-1.8679, -0.4252, 0.6281, -0.1596]) tensor([0.0437, 0.1849, 0.5302, 0.2412]) -Greedy action tensor([-1.8153, -0.1228, 0.5640, -0.0870]) tensor([0.0437, 0.2377, 0.4723, 0.2463]) -Greedy action tensor([-1.5845, -0.4991, 0.4934, 0.0339]) tensor([0.0588, 0.1742, 0.4701, 0.2969]) -Greedy action tensor([-1.1978, -0.3440, 0.3785, -0.7731]) tensor([0.1029, 0.2418, 0.4979, 0.1574]) -Greedy action tensor([-0.6275, 0.9895, 0.0800, 0.1890]) tensor([0.0968, 0.4877, 0.1964, 0.2190]) -Greedy action tensor([-1.8748, -0.4435, 0.6344, -0.1483]) tensor([0.0433, 0.1811, 0.5322, 0.2433]) -Greedy action tensor([-0.9381, 0.6856, 0.1514, -0.0146]) tensor([0.0865, 0.4386, 0.2571, 0.2178]) -Greedy action tensor([-1.9006, -0.3809, 0.6458, -0.1566]) tensor([0.0416, 0.1900, 0.5305, 0.2378]) -Greedy action tensor([-1.4552, -0.4179, 0.3633, 0.0146]) tensor([0.0698, 0.1969, 0.4300, 0.3034]) -Greedy action tensor([-0.9351, 0.4479, -0.8387, -0.6467]) tensor([0.1347, 0.5371, 0.1484, 0.1798]) -Greedy action tensor([-1.6745, 0.1406, 0.4560, -0.0507]) tensor([0.0485, 0.2977, 0.4080, 0.2458]) -Greedy action tensor([-1.9079, -0.4069, 0.6480, -0.1589]) tensor([0.0415, 0.1860, 0.5342, 0.2384]) -Greedy action tensor([-1.8319, -0.3497, 0.6195, -0.1333]) tensor([0.0445, 0.1959, 0.5164, 0.2432]) -Greedy action tensor([-1.9455, -0.4374, 0.6625, -0.1823]) tensor([0.0401, 0.1813, 0.5446, 0.2340]) -Greedy action tensor([-1.7289, -0.1844, 0.6046, -0.0697]) tensor([0.0471, 0.2204, 0.4853, 0.2473]) -Greedy action tensor([-1.7431, -0.4090, 0.5722, -0.0542]) tensor([0.0492, 0.1867, 0.4980, 0.2662]) -Greedy action tensor([-1.7077, -0.2739, 0.4975, -0.1259]) tensor([0.0523, 0.2193, 0.4742, 0.2542]) -Greedy action tensor([-1.9054, -0.4302, 0.6615, -0.1584]) tensor([0.0414, 0.1811, 0.5397, 0.2377]) -Greedy action tensor([-1.5949, -0.4404, 0.5279, -0.1299]) tensor([0.0593, 0.1882, 0.4957, 0.2568]) -Greedy action tensor([-1.8806, -0.4234, 0.6409, -0.1484]) tensor([0.0427, 0.1835, 0.5321, 0.2417]) -Greedy action tensor([-1.2335, -0.2610, 0.2353, 0.2255]) tensor([0.0814, 0.2152, 0.3535, 0.3500]) -Greedy action tensor([-1.7144, 0.1545, 0.4800, -0.0227]) tensor([0.0457, 0.2962, 0.4101, 0.2481]) -Greedy action tensor([-1.7276, 0.1524, 0.4831, 0.0172]) tensor([0.0446, 0.2926, 0.4072, 0.2556]) -Greedy action tensor([-1.6966, 0.1948, 0.4531, -0.0595]) tensor([0.0468, 0.3105, 0.4020, 0.2408]) -Greedy action tensor([-1.4957, -0.2277, 0.4057, -0.0671]) tensor([0.0648, 0.2304, 0.4341, 0.2706]) -Greedy action tensor([-1.8831, -0.4555, 0.6367, -0.1411]) tensor([0.0429, 0.1789, 0.5332, 0.2450]) -Greedy action tensor([-1.3366, 0.5287, 0.1803, 0.0693]) tensor([0.0621, 0.4012, 0.2832, 0.2534]) -Greedy action tensor([-1.8506, -0.3754, 0.6294, -0.1302]) tensor([0.0437, 0.1909, 0.5214, 0.2440]) -Greedy action tensor([-1.7599, -0.4872, 0.5694, -0.1256]) tensor([0.0501, 0.1788, 0.5144, 0.2567]) -Greedy action tensor([-1.7193, 0.0486, 0.5433, -0.0199]) tensor([0.0456, 0.2671, 0.4380, 0.2494]) -Greedy action tensor([-1.7365, -0.3216, 0.5397, -0.0977]) tensor([0.0500, 0.2057, 0.4869, 0.2574]) -Greedy action tensor([-1.7176, -0.4785, 0.5618, -0.0542]) tensor([0.0513, 0.1770, 0.5011, 0.2706]) -Greedy action tensor([-1.7795, -0.2784, 0.6018, -0.2128]) tensor([0.0474, 0.2127, 0.5128, 0.2271]) -Greedy action tensor([-1.8740, -0.3017, 0.6102, -0.1300]) tensor([0.0425, 0.2048, 0.5096, 0.2431]) -Greedy action tensor([-1.9268, -0.4257, 0.6573, -0.1711]) tensor([0.0408, 0.1829, 0.5403, 0.2360]) -Greedy action tensor([-1.2701, 0.7002, 0.2662, 0.2210]) tensor([0.0579, 0.4155, 0.2692, 0.2573]) -Greedy action tensor([-1.1823, 0.2354, 0.2562, -0.0117]) tensor([0.0796, 0.3285, 0.3354, 0.2565]) -Greedy action tensor([-1.9009, -0.4250, 0.6432, -0.1604]) tensor([0.0420, 0.1838, 0.5348, 0.2394]) -Greedy action tensor([-1.7870, -0.4246, 0.5840, -0.1092]) tensor([0.0477, 0.1863, 0.5107, 0.2553]) -Greedy action tensor([-1.8991, -0.3991, 0.6459, -0.1555]) tensor([0.0418, 0.1872, 0.5322, 0.2388]) -Greedy action tensor([-1.8803, -0.4465, 0.6381, -0.1484]) tensor([0.0430, 0.1804, 0.5336, 0.2430]) -Greedy action tensor([-1.8838, -0.4359, 0.6380, -0.1511]) tensor([0.0428, 0.1821, 0.5330, 0.2421]) -Greedy action tensor([-1.8230, -0.3712, 0.5924, -0.1463]) tensor([0.0458, 0.1958, 0.5132, 0.2452]) -Greedy action tensor([-0.7553, 0.8485, 0.1069, 0.0378]) tensor([0.0948, 0.4712, 0.2245, 0.2095]) -Greedy action tensor([-1.9142, -0.4129, 0.6563, -0.1695]) tensor([0.0412, 0.1848, 0.5383, 0.2357]) -Greedy action tensor([-1.8766, -0.3015, 0.6118, -0.1359]) tensor([0.0424, 0.2049, 0.5108, 0.2418]) -Greedy action tensor([-1.5564, -0.0695, 0.4186, -0.0361]) tensor([0.0581, 0.2571, 0.4189, 0.2659]) -Greedy action tensor([-1.9359, -0.4450, 0.6640, -0.1754]) tensor([0.0405, 0.1797, 0.5446, 0.2353]) -Greedy action tensor([-1.7208, -0.4317, 0.5519, -0.1034]) tensor([0.0516, 0.1873, 0.5009, 0.2601]) -Greedy action tensor([-1.8595, -0.4381, 0.6273, -0.1393]) tensor([0.0440, 0.1821, 0.5284, 0.2455]) -Greedy action tensor([-1.6774, -0.3815, 0.5577, -0.1740]) tensor([0.0541, 0.1975, 0.5053, 0.2431]) -Greedy action tensor([-1.1855, 0.4439, 0.2693, -0.0558]) tensor([0.0742, 0.3784, 0.3178, 0.2296]) -Greedy action tensor([-1.8684, -0.2800, 0.6022, -0.1386]) tensor([0.0428, 0.2095, 0.5063, 0.2414]) -Greedy action tensor([ 1.4760, -1.0445, -0.0127, -0.9360]) tensor([0.7165, 0.0576, 0.1617, 0.0642]) -Greedy action tensor([ 1.2420, -0.7365, 0.1220, -0.4290]) tensor([0.6051, 0.0837, 0.1974, 0.1138]) -Greedy action tensor([ 0.7639, -0.2963, 0.0192, -0.2323]) tensor([0.4565, 0.1581, 0.2168, 0.1686]) -Greedy action tensor([ 0.9552, -0.3858, 0.0619, -0.4509]) tensor([0.5219, 0.1365, 0.2136, 0.1279]) -Greedy action tensor([ 0.4705, -0.0171, -0.1058, -0.2439]) tensor([0.3752, 0.2304, 0.2108, 0.1836]) -Greedy action tensor([ 1.2093, -0.5393, -0.2518, -1.0860]) tensor([0.6637, 0.1155, 0.1540, 0.0669]) -Greedy action tensor([ 0.3592, -0.0680, -0.1312, -0.1904]) tensor([0.3519, 0.2295, 0.2155, 0.2031]) -Greedy action tensor([ 0.6271, 0.0339, -0.1220, 0.0473]) tensor([0.3868, 0.2137, 0.1829, 0.2166]) -Greedy action tensor([ 0.8434, -0.5303, -0.1397, -0.3006]) tensor([0.5139, 0.1301, 0.1923, 0.1637]) -Greedy action tensor([ 0.9458, -0.3718, -0.1086, -0.5011]) tensor([0.5401, 0.1446, 0.1882, 0.1271]) -Greedy action tensor([ 0.5365, -0.0551, 0.0620, -0.0684]) tensor([0.3674, 0.2033, 0.2286, 0.2007]) -Greedy action tensor([ 0.5408, -0.2000, -0.0771, -0.1597]) tensor([0.3981, 0.1898, 0.2146, 0.1976]) -Greedy action tensor([ 0.6395, -0.2269, 0.0278, -0.0356]) tensor([0.4045, 0.1701, 0.2194, 0.2059]) -Greedy action tensor([ 0.8877, -0.2998, -0.0816, -0.5072]) tensor([0.5175, 0.1578, 0.1963, 0.1283]) -Greedy action tensor([ 0.4897, -0.3624, 0.0272, -0.2662]) tensor([0.3959, 0.1689, 0.2493, 0.1859]) -Greedy action tensor([ 0.9918, -0.6749, 0.0158, -0.5208]) tensor([0.5599, 0.1057, 0.2110, 0.1234]) -Greedy action tensor([ 0.8551, -0.3269, -0.0915, -0.2132]) tensor([0.4906, 0.1504, 0.1904, 0.1686]) -Greedy action tensor([ 0.3455, 0.4588, -0.1175, 0.1819]) tensor([0.2779, 0.3112, 0.1749, 0.2360]) -Greedy action tensor([ 0.6060, -0.1665, -0.0666, -0.1491]) tensor([0.4095, 0.1891, 0.2090, 0.1924]) -Greedy action tensor([ 1.0588, -0.5958, -0.2821, -0.4676]) tensor([0.5988, 0.1145, 0.1566, 0.1301]) -Greedy action tensor([ 0.7304, -0.2860, 0.0130, -0.0257]) tensor([0.4311, 0.1560, 0.2104, 0.2024]) -Greedy action tensor([ 1.0549, -0.9101, -0.0480, -0.3132]) tensor([0.5792, 0.0812, 0.1922, 0.1474]) -Greedy action tensor([ 1.0247, -0.6180, -0.0524, -0.4391]) tensor([0.5665, 0.1096, 0.1929, 0.1310]) -Greedy action tensor([ 1.0273, -0.6892, 0.0222, -0.6018]) tensor([0.5741, 0.1032, 0.2101, 0.1126]) -Greedy action tensor([ 0.9187, -0.6727, 0.0629, -0.4001]) tensor([0.5274, 0.1074, 0.2241, 0.1411]) -Greedy action tensor([ 0.7757, -0.3589, 0.0935, -0.3159]) tensor([0.4624, 0.1487, 0.2337, 0.1552]) -Greedy action tensor([ 0.4821, -0.0543, -0.0240, -0.0175]) tensor([0.3578, 0.2093, 0.2157, 0.2171]) -Greedy action tensor([ 0.5482, -0.1618, 0.0265, -0.0522]) tensor([0.3797, 0.1867, 0.2253, 0.2083]) -Greedy action tensor([ 0.6790, -0.3755, -0.0193, -0.2748]) tensor([0.4482, 0.1561, 0.2230, 0.1727]) -Greedy action tensor([ 1.1722, -0.9363, 0.0120, -0.5850]) tensor([0.6221, 0.0755, 0.1950, 0.1073]) -Greedy action tensor([ 1.2536, -0.9123, 0.1566, -0.8881]) tensor([0.6386, 0.0732, 0.2132, 0.0750]) -Greedy action tensor([ 0.6529, -0.1069, 0.1383, -0.0017]) tensor([0.3868, 0.1809, 0.2312, 0.2010]) -Greedy action tensor([ 0.8211, -0.0845, -0.0166, -0.3787]) tensor([0.4677, 0.1891, 0.2024, 0.1409]) -Greedy action tensor([ 0.9021, -0.4969, -0.2494, -0.4081]) tensor([0.5456, 0.1347, 0.1725, 0.1472]) -Greedy action tensor([ 0.7727, -0.5831, -0.0276, -0.2471]) tensor([0.4836, 0.1247, 0.2172, 0.1744]) -Greedy action tensor([ 0.9409, -0.5067, -0.1074, -0.1051]) tensor([0.5163, 0.1214, 0.1810, 0.1814]) -Greedy action tensor([ 0.7678, -0.0923, -0.1289, -0.0961]) tensor([0.4439, 0.1878, 0.1811, 0.1871]) -Greedy action tensor([ 0.7140, -0.5791, -0.0993, -0.2132]) tensor([0.4732, 0.1298, 0.2098, 0.1872]) -Greedy action tensor([ 0.4216, -0.3804, -0.1484, -0.1221]) tensor([0.3854, 0.1728, 0.2180, 0.2238]) -Greedy action tensor([ 0.6746, -0.3536, 0.0337, -0.0613]) tensor([0.4231, 0.1513, 0.2229, 0.2027]) -Greedy action tensor([ 0.7971, -0.5736, -0.0293, -0.4956]) tensor([0.5086, 0.1292, 0.2226, 0.1396]) -Greedy action tensor([ 0.6159, -0.1465, -0.0565, -0.0080]) tensor([0.3979, 0.1857, 0.2031, 0.2133]) -Greedy action tensor([ 0.9043, -0.3917, -0.0399, -0.3042]) tensor([0.5099, 0.1395, 0.1983, 0.1523]) -Greedy action tensor([ 0.8150, -0.6667, -0.1294, -0.3336]) tensor([0.5173, 0.1175, 0.2012, 0.1640]) -Greedy action tensor([ 0.4625, -0.1652, -0.0104, -0.0406]) tensor([0.3621, 0.1933, 0.2257, 0.2189]) -Greedy action tensor([ 0.6264, 0.0902, -0.0248, 0.0384]) tensor([0.3757, 0.2198, 0.1959, 0.2087]) -Greedy action tensor([ 0.9968, -0.6297, -0.0128, -0.4841]) tensor([0.5591, 0.1099, 0.2037, 0.1272]) -Greedy action tensor([ 0.2139, 0.0334, -0.1352, -0.2362]) tensor([0.3147, 0.2627, 0.2220, 0.2006]) -Greedy action tensor([ 1.1657, -0.6106, -0.0849, -0.4428]) tensor([0.6039, 0.1022, 0.1729, 0.1209]) -Greedy action tensor([ 1.2410, -0.8948, -0.0068, -0.5957]) tensor([0.6391, 0.0755, 0.1835, 0.1018]) -Greedy action tensor([ 1.0136, -0.3339, -0.3756, -0.5619]) tensor([0.5827, 0.1514, 0.1453, 0.1206]) -Greedy action tensor([ 0.4271, -0.1470, -0.0859, -0.1079]) tensor([0.3640, 0.2050, 0.2179, 0.2132]) -Greedy action tensor([ 1.1629, -0.9719, 0.0531, -0.6452]) tensor([0.6204, 0.0734, 0.2045, 0.1017]) -Greedy action tensor([ 0.7857, -0.3528, 0.0053, -0.2018]) tensor([0.4649, 0.1489, 0.2130, 0.1732]) -Greedy action tensor([ 0.5540, -0.3892, -0.0523, -0.0649]) tensor([0.4043, 0.1574, 0.2205, 0.2177]) -Greedy action tensor([ 0.9946, -0.5486, 0.0316, -0.3823]) tensor([0.5412, 0.1157, 0.2066, 0.1366]) -Greedy action tensor([ 0.8686, -0.1775, -0.0489, -0.0265]) tensor([0.4631, 0.1627, 0.1850, 0.1892]) -Greedy action tensor([ 1.1932, -0.7651, 0.1039, -0.6745]) tensor([0.6127, 0.0865, 0.2061, 0.0947]) -Greedy action tensor([ 0.4765, -0.2784, -0.1211, 0.0035]) tensor([0.3783, 0.1778, 0.2081, 0.2357]) -Greedy action tensor([ 1.0375, -0.1730, -0.0022, -0.2377]) tensor([0.5179, 0.1543, 0.1831, 0.1447]) -Greedy action tensor([ 0.7219, -0.6080, 0.1468, -0.2437]) tensor([0.4529, 0.1198, 0.2548, 0.1724]) -Greedy action tensor([ 0.8749, -0.5737, 0.0550, -0.4648]) tensor([0.5162, 0.1212, 0.2274, 0.1352]) -Greedy action tensor([ 0.5553, 0.2486, -0.2013, 0.1733]) tensor([0.3463, 0.2548, 0.1625, 0.2364]) -Greedy action tensor([ 0.5840, -0.3087, 0.0472, -0.1238]) tensor([0.4021, 0.1647, 0.2351, 0.1981]) -Greedy action tensor([ 1.1360, -0.5070, -0.1964, -0.8608]) tensor([0.6277, 0.1214, 0.1656, 0.0852]) -Greedy action tensor([ 0.7744, -0.3430, -0.0312, -0.8792]) tensor([0.5088, 0.1664, 0.2273, 0.0974]) -Greedy action tensor([ 0.3867, -0.0843, -0.0790, -0.1504]) tensor([0.3526, 0.2201, 0.2213, 0.2060]) -Greedy action tensor([ 0.5639, -0.3032, -0.0088, -0.2465]) tensor([0.4117, 0.1730, 0.2322, 0.1831]) -Greedy action tensor([ 0.7035, -0.4369, -0.1037, -0.2474]) tensor([0.4646, 0.1485, 0.2073, 0.1795]) -Greedy action tensor([ 0.8256, -0.4283, -0.0641, -0.1982]) tensor([0.4865, 0.1389, 0.1999, 0.1748]) -Greedy action tensor([ 0.5318, -0.3514, -0.0025, -0.1411]) tensor([0.3984, 0.1647, 0.2335, 0.2033]) -Greedy action tensor([ 0.5982, -0.4691, 0.0828, -0.2432]) tensor([0.4215, 0.1450, 0.2518, 0.1817]) -Greedy action tensor([ 0.7864, -0.6223, 0.0314, -0.3802]) tensor([0.4936, 0.1207, 0.2320, 0.1537]) -Greedy action tensor([ 0.8026, -0.4464, -0.0373, -0.3799]) tensor([0.4938, 0.1416, 0.2132, 0.1514]) -Greedy action tensor([ 0.9016, -0.5639, -0.1588, -0.3301]) tensor([0.5350, 0.1236, 0.1853, 0.1561]) -Greedy action tensor([ 0.1825, 0.0207, -0.0731, -0.5514]) tensor([0.3221, 0.2739, 0.2494, 0.1546]) -Greedy action tensor([ 0.4688, 0.1058, -0.0160, 0.0153]) tensor([0.3394, 0.2360, 0.2090, 0.2156]) -Greedy action tensor([ 0.9745, -0.5817, 0.0867, -0.4669]) tensor([0.5379, 0.1135, 0.2214, 0.1273]) -Greedy action tensor([ 1.0529, -0.6098, -0.0830, -0.4481]) tensor([0.5768, 0.1094, 0.1852, 0.1286]) -Greedy action tensor([ 0.8253, -0.6678, 0.1231, -0.5664]) tensor([0.5079, 0.1141, 0.2517, 0.1263]) -Greedy action tensor([ 1.3017, -1.0223, -0.0807, -0.7829]) tensor([0.6788, 0.0664, 0.1704, 0.0844]) -Greedy action tensor([ 1.3097, -0.5903, -0.1228, 0.0492]) tensor([0.5982, 0.0895, 0.1428, 0.1696]) -Greedy action tensor([ 1.2208, -0.5586, -0.4164, 0.0735]) tensor([0.5950, 0.1004, 0.1157, 0.1889]) -Greedy action tensor([ 1.2572, 0.0044, -0.7704, 0.5547]) tensor([0.5228, 0.1494, 0.0688, 0.2590]) -Greedy action tensor([ 1.3501, -0.2469, -0.7672, 0.1830]) tensor([0.6119, 0.1239, 0.0737, 0.1905]) -Greedy action tensor([ 2.2567, -1.0287, -0.3138, 0.7834]) tensor([0.7446, 0.0279, 0.0570, 0.1706]) -Greedy action tensor([ 1.8171, -0.6555, -0.6612, 0.6976]) tensor([0.6690, 0.0564, 0.0561, 0.2184]) -Greedy action tensor([ 1.3433, -0.2948, 0.0149, 0.4638]) tensor([0.5336, 0.1037, 0.1413, 0.2214]) -Greedy action tensor([ 1.5059, -0.4308, 0.0355, -0.2117]) tensor([0.6437, 0.0928, 0.1479, 0.1155]) -Greedy action tensor([ 1.2101e+00, 2.5298e-02, -2.7978e-01, 7.2140e-04]) tensor([0.5466, 0.1671, 0.1232, 0.1631]) -Greedy action tensor([ 1.6578, -0.5674, -0.8198, 0.2871]) tensor([0.6916, 0.0747, 0.0581, 0.1756]) -Greedy action tensor([ 1.5878, -0.3738, -0.8825, 0.4290]) tensor([0.6497, 0.0914, 0.0549, 0.2039]) -Greedy action tensor([ 1.4556, -0.6157, -0.2792, 0.4258]) tensor([0.6026, 0.0759, 0.1063, 0.2152]) -Greedy action tensor([ 1.1414, -0.0287, -0.6099, 0.1001]) tensor([0.5444, 0.1689, 0.0945, 0.1922]) -Greedy action tensor([ 1.5174, -0.6520, -0.5078, -0.1055]) tensor([0.6927, 0.0791, 0.0914, 0.1367]) -Greedy action tensor([ 1.4231, -0.2425, -0.8320, 0.2733]) tensor([0.6209, 0.1174, 0.0651, 0.1966]) -Greedy action tensor([ 1.0645, -0.3429, -0.4861, 0.3121]) tensor([0.5186, 0.1270, 0.1100, 0.2444]) -Greedy action tensor([ 1.1309, 0.1335, -1.3385, 0.5501]) tensor([0.4968, 0.1832, 0.0420, 0.2779]) -Greedy action tensor([ 1.3667, -0.1924, -0.4987, 0.2555]) tensor([0.5902, 0.1241, 0.0914, 0.1943]) -Greedy action tensor([ 2.1902, -1.1050, -0.5144, 0.5630]) tensor([0.7690, 0.0285, 0.0514, 0.1511]) -Greedy action tensor([ 1.2604, -0.3469, -0.5785, 0.1292]) tensor([0.5945, 0.1192, 0.0945, 0.1918]) -Greedy action tensor([ 1.3575, -0.2894, -0.3244, 0.1590]) tensor([0.5951, 0.1147, 0.1107, 0.1795]) -Greedy action tensor([2.0202, 0.4390, 0.0395, 0.0696]) tensor([0.6730, 0.1385, 0.0929, 0.0957]) -Greedy action tensor([ 1.3847, -0.5822, -0.3212, 0.3289]) tensor([0.5990, 0.0838, 0.1088, 0.2084]) -Greedy action tensor([ 0.8425, 0.4449, -0.8047, 0.2394]) tensor([0.4147, 0.2786, 0.0799, 0.2269]) -Greedy action tensor([ 1.4890, -0.4704, -0.6810, 0.0892]) tensor([0.6659, 0.0939, 0.0760, 0.1642]) -Greedy action tensor([ 1.7092, -0.2775, -0.4408, 0.4089]) tensor([0.6553, 0.0899, 0.0763, 0.1785]) -Greedy action tensor([ 1.3330, -0.0698, -0.9019, 0.2518]) tensor([0.5910, 0.1453, 0.0632, 0.2005]) -Greedy action tensor([ 1.5378, -0.5855, -1.1633, 0.5174]) tensor([0.6463, 0.0773, 0.0434, 0.2330]) -Greedy action tensor([ 1.5042, -0.5896, -0.8224, 0.7189]) tensor([0.5963, 0.0735, 0.0582, 0.2719]) -Greedy action tensor([ 2.4720, -0.5529, 0.4964, -0.2577]) tensor([0.7984, 0.0388, 0.1107, 0.0521]) -Greedy action tensor([ 1.1780, -0.5707, -0.3388, 0.4290]) tensor([0.5358, 0.0932, 0.1176, 0.2534]) -Greedy action tensor([ 1.6266, -0.0809, -0.2889, 0.7004]) tensor([0.5798, 0.1051, 0.0854, 0.2296]) -Greedy action tensor([ 0.9885, 0.1124, 0.3025, -0.1925]) tensor([0.4490, 0.1870, 0.2261, 0.1378]) -Greedy action tensor([ 0.8918, -0.2672, -0.2260, 0.1320]) tensor([0.4742, 0.1488, 0.1551, 0.2218]) -Greedy action tensor([ 1.0916, -0.2744, -0.7365, 0.1270]) tensor([0.5565, 0.1420, 0.0894, 0.2121]) -Greedy action tensor([ 1.7602, -0.0161, -0.5983, -0.2304]) tensor([0.7141, 0.1209, 0.0675, 0.0976]) -Greedy action tensor([ 1.7381, -0.8823, -0.5707, 0.3541]) tensor([0.7029, 0.0511, 0.0699, 0.1761]) -Greedy action tensor([ 1.4607, -0.1334, -1.0430, 0.4383]) tensor([0.6080, 0.1235, 0.0497, 0.2187]) -Greedy action tensor([ 1.8708, -0.4520, -0.3617, 0.4330]) tensor([0.6931, 0.0679, 0.0743, 0.1646]) -Greedy action tensor([ 0.8799, -0.5141, -0.5203, 0.5647]) tensor([0.4496, 0.1115, 0.1108, 0.3280]) -Greedy action tensor([ 1.0084, -0.2610, -0.0273, -0.0445]) tensor([0.5038, 0.1416, 0.1788, 0.1758]) -Greedy action tensor([ 1.4457, -0.3630, -0.8606, 0.7188]) tensor([0.5724, 0.0938, 0.0570, 0.2767]) -Greedy action tensor([ 1.6755, -0.2618, -0.7249, -0.3360]) tensor([0.7307, 0.1053, 0.0663, 0.0978]) -Greedy action tensor([ 1.4183, -0.6266, -0.6403, 0.5939]) tensor([0.5898, 0.0763, 0.0753, 0.2586]) -Greedy action tensor([ 1.2699, 0.0740, -0.6132, 0.0585]) tensor([0.5707, 0.1726, 0.0868, 0.1699]) -Greedy action tensor([ 1.4994, 0.2010, -0.4539, -0.0047]) tensor([0.6109, 0.1667, 0.0866, 0.1357]) -Greedy action tensor([ 0.9288, -0.2862, -0.1321, 0.0902]) tensor([0.4819, 0.1430, 0.1668, 0.2083]) -Greedy action tensor([ 2.0438, -0.8968, -0.2597, 0.3531]) tensor([0.7479, 0.0395, 0.0747, 0.1379]) -Greedy action tensor([ 2.0111, -0.2527, -0.6744, 0.4856]) tensor([0.7196, 0.0748, 0.0491, 0.1565]) -Greedy action tensor([ 1.9186, -0.4800, -0.8186, 0.2974]) tensor([0.7390, 0.0671, 0.0478, 0.1461]) -Greedy action tensor([ 1.1943, -0.0992, -0.4385, 0.1575]) tensor([0.5482, 0.1504, 0.1071, 0.1944]) -Greedy action tensor([ 2.0387, -1.4290, 0.1249, 0.8467]) tensor([0.6746, 0.0210, 0.0995, 0.2048]) -Greedy action tensor([ 1.5427, -0.6589, -0.5688, 0.3381]) tensor([0.6529, 0.0722, 0.0790, 0.1958]) -Greedy action tensor([ 1.0171, 0.1001, -0.0051, -0.4570]) tensor([0.5029, 0.2010, 0.1809, 0.1152]) -Greedy action tensor([ 1.1070, -0.1753, -0.5245, 0.2230]) tensor([0.5302, 0.1471, 0.1037, 0.2190]) -Greedy action tensor([ 1.3129, -0.9399, -0.1631, 0.3783]) tensor([0.5792, 0.0609, 0.1324, 0.2275]) -Greedy action tensor([ 1.1087, -0.2417, -0.3555, 0.2561]) tensor([0.5217, 0.1352, 0.1207, 0.2224]) -Greedy action tensor([ 0.9506, -0.2388, -0.1258, 0.1763]) tensor([0.4748, 0.1445, 0.1618, 0.2189]) -Greedy action tensor([ 1.4718, -0.7948, 0.0161, -0.1269]) tensor([0.6498, 0.0674, 0.1515, 0.1314]) -Greedy action tensor([ 2.0181, -1.0926, -0.5145, 0.2500]) tensor([0.7724, 0.0344, 0.0614, 0.1318]) -Greedy action tensor([ 1.2670, -0.4955, -0.8607, 0.2264]) tensor([0.6083, 0.1044, 0.0725, 0.2149]) -Greedy action tensor([ 1.8048, -0.9312, -0.1160, 0.1998]) tensor([0.7081, 0.0459, 0.1037, 0.1423]) -Greedy action tensor([ 1.4158, -0.5953, -1.2805, 0.1510]) tensor([0.6740, 0.0902, 0.0455, 0.1903]) -Greedy action tensor([ 1.3421, -0.1173, -0.7926, 0.3987]) tensor([0.5747, 0.1336, 0.0680, 0.2237]) -Greedy action tensor([ 1.1635, -0.5771, -0.4248, 0.5927]) tensor([0.5142, 0.0902, 0.1050, 0.2906]) -Greedy action tensor([ 1.2449, -0.5284, -0.7843, 0.6266]) tensor([0.5434, 0.0923, 0.0714, 0.2929]) -Greedy action tensor([ 0.9668, 0.2116, -0.6402, 0.1645]) tensor([0.4720, 0.2218, 0.0946, 0.2116]) -Greedy action tensor([ 1.1971, -0.4572, -0.6001, 0.4487]) tensor([0.5464, 0.1045, 0.0906, 0.2585]) -Greedy action tensor([ 2.3944, -0.9932, -0.6479, 0.4006]) tensor([0.8212, 0.0277, 0.0392, 0.1118]) -Greedy action tensor([ 1.4109, 0.2193, -1.2269, 0.4267]) tensor([0.5718, 0.1737, 0.0409, 0.2137]) -Greedy action tensor([ 2.0264, -0.7867, -0.3228, 0.2850]) tensor([0.7515, 0.0451, 0.0717, 0.1317]) -Greedy action tensor([ 1.8610, -0.4342, -0.6424, 1.2406]) tensor([0.5813, 0.0586, 0.0476, 0.3126]) -Greedy action tensor([ 1.3769, -0.3061, -0.3776, 0.2688]) tensor([0.5921, 0.1100, 0.1024, 0.1955]) -Greedy action tensor([ 1.2603, -0.4170, -0.3896, 0.3030]) tensor([0.5673, 0.1060, 0.1090, 0.2178]) -Greedy action tensor([ 1.2252, -0.0532, -0.4860, -0.0238]) tensor([0.5728, 0.1595, 0.1035, 0.1643]) -Greedy action tensor([ 1.3131, -0.3254, -0.3144, 0.3800]) tensor([0.5605, 0.1089, 0.1101, 0.2205]) -Greedy action tensor([ 2.0488, -1.0096, -0.3378, 0.2657]) tensor([0.7651, 0.0359, 0.0703, 0.1286]) -Greedy action tensor([ 2.1587, -1.4680, -0.2579, 0.3796]) tensor([0.7784, 0.0207, 0.0695, 0.1314]) -Greedy action tensor([ 2.1256, -1.4284, -0.1334, 0.3383]) tensor([0.7690, 0.0220, 0.0803, 0.1287]) -Greedy action tensor([ 1.3644, -0.3307, -1.0788, 0.1195]) tensor([0.6417, 0.1178, 0.0558, 0.1848]) -Greedy action tensor([ 1.4190, -0.4764, -0.7075, 0.4414]) tensor([0.6076, 0.0913, 0.0725, 0.2286]) -Greedy action tensor([ 1.0210, 0.2709, -0.6848, 0.5950]) tensor([0.4335, 0.2047, 0.0787, 0.2831]) -Greedy action tensor([-0.8208, -2.0086, 0.7042, -0.7321]) tensor([0.1430, 0.0436, 0.6571, 0.1563]) -Greedy action tensor([ 0.5826, -1.3002, -0.0164, -0.0290]) tensor([0.4456, 0.0678, 0.2448, 0.2418]) -Greedy action tensor([-0.1208, 0.6402, 0.1674, 0.1640]) tensor([0.1723, 0.3688, 0.2299, 0.2291]) -Greedy action tensor([-0.0801, -0.5873, 0.6096, -0.9804]) tensor([0.2499, 0.1505, 0.4981, 0.1016]) -Greedy action tensor([-0.4416, 0.3938, -0.0082, -1.3905]) tensor([0.1910, 0.4404, 0.2946, 0.0740]) -Greedy action tensor([-1.1555, 0.0877, -0.9436, -0.5715]) tensor([0.1334, 0.4625, 0.1649, 0.2392]) -Greedy action tensor([ 0.0066, -0.8007, -0.2457, -0.0665]) tensor([0.3172, 0.1415, 0.2465, 0.2949]) -Greedy action tensor([-0.7676, -0.3876, -0.0440, -1.6436]) tensor([0.2024, 0.2960, 0.4173, 0.0843]) -Greedy action tensor([-0.0775, -0.7493, 0.3491, -0.0473]) tensor([0.2455, 0.1254, 0.3761, 0.2530]) -Greedy action tensor([-0.7272, -0.3945, 0.5398, -0.5117]) tensor([0.1392, 0.1941, 0.4941, 0.1726]) -Greedy action tensor([-0.6938, 0.8818, -0.2775, -1.5626]) tensor([0.1287, 0.6221, 0.1952, 0.0540]) -Greedy action tensor([-0.0646, -0.5110, -0.3010, -1.1142]) tensor([0.3598, 0.2302, 0.2840, 0.1260]) -Greedy action tensor([-0.1004, 0.1685, -0.5383, -0.4445]) tensor([0.2730, 0.3572, 0.1762, 0.1935]) -Greedy action tensor([ 0.0068, -0.3735, -0.5627, -0.0462]) tensor([0.3127, 0.2138, 0.1769, 0.2966]) -Greedy action tensor([ 0.0582, 0.0263, 0.7001, -0.5146]) tensor([0.2256, 0.2185, 0.4287, 0.1272]) -Greedy action tensor([ 0.4088, -1.0944, -0.5865, 0.1332]) tensor([0.4253, 0.0946, 0.1572, 0.3229]) -Greedy action tensor([-0.1838, -2.4092, 0.2943, 0.0514]) tensor([0.2509, 0.0271, 0.4046, 0.3174]) -Greedy action tensor([-0.3832, -0.1017, 0.0572, 0.2681]) tensor([0.1725, 0.2286, 0.2680, 0.3309]) -Greedy action tensor([ 0.9817, -0.3373, 0.4646, -0.0604]) tensor([0.4512, 0.1206, 0.2690, 0.1591]) -Greedy action tensor([-0.0779, -0.4083, -0.9377, -0.3234]) tensor([0.3420, 0.2458, 0.1447, 0.2675]) -Greedy action tensor([ 0.0157, -0.4312, -0.1642, -0.0081]) tensor([0.2897, 0.1853, 0.2420, 0.2829]) -Greedy action tensor([ 0.4792, 0.6026, -0.7663, -1.0313]) tensor([0.3788, 0.4285, 0.1090, 0.0836]) -Greedy action tensor([-0.8032, -0.2301, 0.3727, -1.9062]) tensor([0.1576, 0.2795, 0.5107, 0.0523]) -Greedy action tensor([ 0.6148, -0.9626, 0.2820, -0.2537]) tensor([0.4268, 0.0881, 0.3060, 0.1791]) -Greedy action tensor([ 0.3895, -0.0877, 0.9234, 0.0366]) tensor([0.2482, 0.1540, 0.4234, 0.1744]) -Greedy action tensor([0.7437, 0.1648, 0.1754, 0.0824]) tensor([0.3783, 0.2121, 0.2143, 0.1953]) -Greedy action tensor([-0.2945, -0.4535, 0.4456, 0.1126]) tensor([0.1834, 0.1565, 0.3845, 0.2756]) -Greedy action tensor([ 1.1789, -0.6405, -0.4760, 0.2634]) tensor([0.5703, 0.0925, 0.1090, 0.2283]) -Greedy action tensor([-0.1065, 0.4955, -0.2002, -0.5284]) tensor([0.2277, 0.4157, 0.2073, 0.1493]) -Greedy action tensor([-0.4870, -0.5495, -0.8789, -0.4302]) tensor([0.2722, 0.2557, 0.1839, 0.2881]) -Greedy action tensor([ 0.7316, -1.7371, 0.3639, 0.0288]) tensor([0.4401, 0.0373, 0.3047, 0.2179]) -Greedy action tensor([-0.5370, -0.7935, 0.5467, -0.5309]) tensor([0.1743, 0.1349, 0.5153, 0.1754]) -Greedy action tensor([ 0.5343, -0.7328, -0.1202, 0.3305]) tensor([0.3821, 0.1076, 0.1986, 0.3117]) -Greedy action tensor([-0.0721, -0.0922, 0.2098, -0.4207]) tensor([0.2493, 0.2443, 0.3305, 0.1759]) -Greedy action tensor([ 0.2629, -1.2537, -0.0640, -0.1483]) tensor([0.3841, 0.0843, 0.2770, 0.2546]) -Greedy action tensor([ 0.1793, 0.0013, 0.3219, -0.4550]) tensor([0.2841, 0.2377, 0.3276, 0.1506]) -Greedy action tensor([-1.0622, -1.2624, 0.5486, 0.1573]) tensor([0.0979, 0.0802, 0.4903, 0.3316]) -Greedy action tensor([-0.3849, -0.9023, -0.5161, -0.4646]) tensor([0.2944, 0.1755, 0.2582, 0.2719]) -Greedy action tensor([ 0.3027, 0.1360, 0.6303, -0.9246]) tensor([0.2835, 0.2400, 0.3934, 0.0831]) -Greedy action tensor([ 0.2199, -1.3627, -0.1807, -0.5029]) tensor([0.4236, 0.0870, 0.2838, 0.2056]) -Greedy action tensor([ 0.2007, -0.0864, 0.4675, -0.4581]) tensor([0.2798, 0.2100, 0.3654, 0.1448]) -Greedy action tensor([ 0.0827, -0.1511, 0.5844, -0.7696]) tensor([0.2584, 0.2046, 0.4268, 0.1102]) -Greedy action tensor([ 2.0515, -0.7432, 0.5215, 0.8733]) tensor([0.6307, 0.0386, 0.1366, 0.1942]) -Greedy action tensor([ 1.2875, -1.1083, 0.5572, 0.2892]) tensor([0.5151, 0.0469, 0.2482, 0.1898]) -Greedy action tensor([ 0.0508, -0.7321, -0.0538, 0.0024]) tensor([0.3021, 0.1381, 0.2721, 0.2878]) -Greedy action tensor([-0.5580, -0.7264, -1.1058, 0.3530]) tensor([0.2037, 0.1721, 0.1178, 0.5065]) -Greedy action tensor([ 0.5142, -1.0831, -1.2785, 0.7118]) tensor([0.3865, 0.0782, 0.0644, 0.4709]) -Greedy action tensor([ 0.7853, -1.7114, -0.1564, 1.5232]) tensor([0.2806, 0.0231, 0.1094, 0.5869]) -Greedy action tensor([-0.3376, 0.0907, -0.3156, -1.4411]) tensor([0.2572, 0.3947, 0.2629, 0.0853]) -Greedy action tensor([-0.6150, -0.4955, -0.4059, -1.5115]) tensor([0.2654, 0.2991, 0.3272, 0.1083]) -Greedy action tensor([ 0.1703, -0.0441, 0.3071, -0.0063]) tensor([0.2637, 0.2128, 0.3024, 0.2210]) -Greedy action tensor([ 0.1084, -0.7524, -0.2830, 0.3587]) tensor([0.2956, 0.1250, 0.1998, 0.3796]) -Greedy action tensor([ 0.4226, -0.3402, 0.1267, -0.4420]) tensor([0.3800, 0.1772, 0.2827, 0.1601]) -Greedy action tensor([ 0.5286, -0.0173, 0.5355, 0.0291]) tensor([0.3132, 0.1814, 0.3153, 0.1900]) -Greedy action tensor([ 0.0347, -1.5762, -0.0682, 1.1639]) tensor([0.1925, 0.0384, 0.1737, 0.5954]) -Greedy action tensor([-0.4948, -0.9683, 0.5308, -0.2168]) tensor([0.1745, 0.1087, 0.4865, 0.2304]) -Greedy action tensor([-0.4177, 0.1256, 0.5387, 0.3435]) tensor([0.1340, 0.2306, 0.3486, 0.2868]) -Greedy action tensor([1.1618, 0.3215, 0.0295, 0.1289]) tensor([0.4740, 0.2046, 0.1528, 0.1687]) -Greedy action tensor([-0.0507, -1.1915, -0.3583, 0.7562]) tensor([0.2328, 0.0744, 0.1712, 0.5217]) -Greedy action tensor([-0.1279, -0.3375, 0.1381, -0.7910]) tensor([0.2754, 0.2233, 0.3594, 0.1419]) -Greedy action tensor([-0.3853, -0.6117, -0.0787, -0.7647]) tensor([0.2604, 0.2076, 0.3538, 0.1782]) -Greedy action tensor([ 0.5272, -1.1488, 0.1890, -0.2246]) tensor([0.4216, 0.0789, 0.3007, 0.1988]) -Greedy action tensor([-0.6246, -1.4653, -0.6026, -0.2014]) tensor([0.2512, 0.1084, 0.2568, 0.3836]) -Greedy action tensor([-0.3775, 0.7153, -0.0113, -0.5167]) tensor([0.1589, 0.4738, 0.2291, 0.1382]) -Greedy action tensor([-0.6714, 0.6104, 0.6283, -0.4353]) tensor([0.1049, 0.3778, 0.3846, 0.1328]) -Greedy action tensor([-0.0495, -0.3835, -0.5056, 0.6183]) tensor([0.2326, 0.1665, 0.1474, 0.4535]) -Greedy action tensor([-0.5659, -1.2617, 0.5167, -1.1027]) tensor([0.1986, 0.0990, 0.5863, 0.1161]) -Greedy action tensor([-0.8126, -0.6027, 0.0689, -0.1361]) tensor([0.1512, 0.1865, 0.3650, 0.2973]) -Greedy action tensor([-0.1964, -0.7187, -0.1906, 0.5878]) tensor([0.2088, 0.1238, 0.2100, 0.4574]) -Greedy action tensor([-0.5259, -0.2879, -0.8937, -0.8007]) tensor([0.2688, 0.3410, 0.1861, 0.2042]) -Greedy action tensor([-0.3744, -0.5977, 0.1098, -0.8958]) tensor([0.2490, 0.1991, 0.4041, 0.1478]) -Greedy action tensor([-0.3612, -0.2160, -0.7730, 0.0497]) tensor([0.2311, 0.2672, 0.1531, 0.3486]) -Greedy action tensor([-0.3632, 0.1172, -1.2927, 0.1170]) tensor([0.2161, 0.3493, 0.0853, 0.3493]) -Greedy action tensor([-0.1164, -0.5227, -1.4621, 0.1386]) tensor([0.3108, 0.2071, 0.0809, 0.4011]) -Greedy action tensor([ 0.3415, -1.4999, 0.3653, -0.5106]) tensor([0.3833, 0.0608, 0.3925, 0.1635]) -Greedy action tensor([-0.2929, -0.4544, -0.0837, -0.6438]) tensor([0.2640, 0.2246, 0.3255, 0.1859]) -Greedy action tensor([0.3053, 0.0693, 0.4571, 0.7548]) tensor([0.2212, 0.1747, 0.2574, 0.3467]) -Greedy action tensor([ 0.1945, -2.7460, 0.1587, 0.1404]) tensor([0.3373, 0.0178, 0.3254, 0.3195]) -Greedy action tensor([-0.9195, -0.2823, 0.2437, -0.2623]) tensor([0.1247, 0.2358, 0.3990, 0.2406]) -Greedy action tensor([-0.8155, -0.2812, -1.7239, 0.0105]) tensor([0.1854, 0.3163, 0.0748, 0.4235]) -Greedy action tensor([ 0.5419, -1.3597, -0.1845, 1.3533]) tensor([0.2575, 0.0384, 0.1245, 0.5796]) -Greedy action tensor([-1.8507, -0.1603, 0.5813, -0.1224]) tensor([0.0427, 0.2314, 0.4857, 0.2403]) -Greedy action tensor([-1.1400, -0.1788, 0.2230, 0.2769]) tensor([0.0859, 0.2245, 0.3355, 0.3541]) -Greedy action tensor([-1.9329, -0.4435, 0.6622, -0.1738]) tensor([0.0406, 0.1800, 0.5438, 0.2357]) -Greedy action tensor([-1.0331, -0.0080, 0.1937, 0.0107]) tensor([0.0996, 0.2777, 0.3397, 0.2829]) -Greedy action tensor([-1.9307, -0.4316, 0.6602, -0.1728]) tensor([0.0406, 0.1819, 0.5419, 0.2356]) -Greedy action tensor([-1.1334, -0.1677, 0.5138, -0.4953]) tensor([0.0934, 0.2452, 0.4847, 0.1767]) -Greedy action tensor([-1.3739, 0.4523, 0.5144, -0.5034]) tensor([0.0617, 0.3832, 0.4078, 0.1474]) -Greedy action tensor([-1.9371, -0.4497, 0.6613, -0.1753]) tensor([0.0405, 0.1792, 0.5444, 0.2358]) -Greedy action tensor([-1.6982, -0.5239, 0.5371, -0.0150]) tensor([0.0527, 0.1706, 0.4929, 0.2838]) -Greedy action tensor([-1.8126, -0.4752, 0.5802, -0.1263]) tensor([0.0473, 0.1801, 0.5174, 0.2553]) -Greedy action tensor([-1.7762, -0.3167, 0.6473, -0.1214]) tensor([0.0458, 0.1972, 0.5172, 0.2398]) -Greedy action tensor([-1.9284, -0.4120, 0.6545, -0.1732]) tensor([0.0407, 0.1854, 0.5386, 0.2354]) -Greedy action tensor([-0.7782, 0.5088, 0.1200, 0.4028]) tensor([0.0968, 0.3505, 0.2376, 0.3152]) -Greedy action tensor([-1.9140, -0.4764, 0.6891, -0.1523]) tensor([0.0408, 0.1716, 0.5504, 0.2373]) -Greedy action tensor([-0.6896, 0.4765, 0.0995, -0.1864]) tensor([0.1240, 0.3980, 0.2730, 0.2051]) -Greedy action tensor([-1.9196, -0.4429, 0.6563, -0.1657]) tensor([0.0412, 0.1802, 0.5409, 0.2378]) -Greedy action tensor([-1.8496, -0.1690, 0.5991, -0.1231]) tensor([0.0424, 0.2278, 0.4912, 0.2386]) -Greedy action tensor([-1.8731, -0.4171, 0.6284, -0.1470]) tensor([0.0433, 0.1856, 0.5280, 0.2431]) -Greedy action tensor([-1.9273, -0.4533, 0.6621, -0.1721]) tensor([0.0409, 0.1784, 0.5443, 0.2364]) -Greedy action tensor([-0.3960, 0.9715, 0.0112, 0.1287]) tensor([0.1232, 0.4835, 0.1851, 0.2082]) -Greedy action tensor([-1.8623, -0.3501, 0.6328, -0.1327]) tensor([0.0429, 0.1947, 0.5203, 0.2420]) -Greedy action tensor([-1.8810, -0.4725, 0.6353, -0.1516]) tensor([0.0433, 0.1770, 0.5358, 0.2439]) -Greedy action tensor([-1.4097, -0.4340, 0.2639, 0.2716]) tensor([0.0697, 0.1848, 0.3713, 0.3742]) -Greedy action tensor([-1.8687, -0.4265, 0.6340, -0.1370]) tensor([0.0433, 0.1832, 0.5289, 0.2446]) -Greedy action tensor([-1.9101, -0.4403, 0.6540, -0.1602]) tensor([0.0415, 0.1805, 0.5392, 0.2388]) -Greedy action tensor([-1.6464, -0.4074, 0.5301, -0.0778]) tensor([0.0553, 0.1911, 0.4879, 0.2657]) -Greedy action tensor([-1.4636, 0.1845, 0.3386, 0.0565]) tensor([0.0594, 0.3087, 0.3602, 0.2716]) -Greedy action tensor([-1.8227, -0.4831, 0.7022, -0.0826]) tensor([0.0435, 0.1659, 0.5429, 0.2477]) -Greedy action tensor([-1.9256, -0.4081, 0.6534, -0.1657]) tensor([0.0407, 0.1857, 0.5369, 0.2367]) -Greedy action tensor([-1.8765, -0.4277, 0.6351, -0.1505]) tensor([0.0431, 0.1835, 0.5312, 0.2421]) -Greedy action tensor([-0.7717, -0.2631, -0.6076, -0.2672]) tensor([0.1819, 0.3025, 0.2143, 0.3013]) -Greedy action tensor([-1.8473, -0.4225, 0.6143, -0.1454]) tensor([0.0447, 0.1859, 0.5242, 0.2452]) -Greedy action tensor([-1.1547, 0.8171, 0.1741, 0.1747]) tensor([0.0635, 0.4564, 0.2400, 0.2401]) -Greedy action tensor([-1.6625, -0.3991, 0.5444, 0.0764]) tensor([0.0518, 0.1831, 0.4705, 0.2946]) -Greedy action tensor([-1.7693, -0.1522, 0.5342, -0.0936]) tensor([0.0467, 0.2355, 0.4679, 0.2498]) -Greedy action tensor([-1.7596, -0.5167, 0.8675, 0.1093]) tensor([0.0404, 0.1399, 0.5582, 0.2615]) -Greedy action tensor([-1.8580, -0.3987, 0.6393, -0.1254]) tensor([0.0433, 0.1862, 0.5258, 0.2447]) -Greedy action tensor([-1.9096, -0.4370, 0.6475, -0.1695]) tensor([0.0417, 0.1820, 0.5384, 0.2378]) -Greedy action tensor([-1.4504, -0.3131, 0.4800, -0.4583]) tensor([0.0730, 0.2275, 0.5028, 0.1967]) -Greedy action tensor([-0.9095, 0.4553, 0.1401, -0.0583]) tensor([0.0989, 0.3871, 0.2824, 0.2316]) -Greedy action tensor([-1.7492, -0.5289, 0.5755, -0.0715]) tensor([0.0501, 0.1697, 0.5121, 0.2681]) -Greedy action tensor([-1.9229, -0.3878, 0.6467, -0.1635]) tensor([0.0408, 0.1894, 0.5328, 0.2370]) -Greedy action tensor([-1.9459, -0.4526, 0.6676, -0.1810]) tensor([0.0401, 0.1785, 0.5472, 0.2342]) -Greedy action tensor([-1.3281, 0.6369, 0.2016, 0.1992]) tensor([0.0576, 0.4111, 0.2660, 0.2653]) -Greedy action tensor([-1.6468, -0.3483, 0.5850, -0.2423]) tensor([0.0554, 0.2029, 0.5160, 0.2256]) -Greedy action tensor([-1.8493, -0.3960, 0.6351, -0.1246]) tensor([0.0437, 0.1869, 0.5242, 0.2452]) -Greedy action tensor([-1.8940, -0.3976, 0.6229, -0.1844]) tensor([0.0428, 0.1910, 0.5299, 0.2364]) -Greedy action tensor([-1.5563, 0.4328, 0.4321, -0.0807]) tensor([0.0500, 0.3657, 0.3654, 0.2188]) -Greedy action tensor([-1.7701, -0.3027, 0.5728, -0.1325]) tensor([0.0479, 0.2076, 0.4983, 0.2462]) -Greedy action tensor([-1.8439, -0.0184, 0.5694, -0.1692]) tensor([0.0422, 0.2617, 0.4711, 0.2251]) -Greedy action tensor([-0.3770, 0.7687, 0.0823, 0.5860]) tensor([0.1198, 0.3767, 0.1896, 0.3138]) -Greedy action tensor([-1.8255, -0.2568, 0.6141, -0.1039]) tensor([0.0437, 0.2100, 0.5016, 0.2446]) -Greedy action tensor([-1.8650, -0.4343, 0.6274, -0.1375]) tensor([0.0437, 0.1826, 0.5280, 0.2457]) -Greedy action tensor([-1.8586, -0.4137, 0.6201, -0.1437]) tensor([0.0440, 0.1866, 0.5248, 0.2445]) -Greedy action tensor([-0.7538, -0.2322, 0.3495, 0.6182]) tensor([0.1037, 0.1747, 0.3126, 0.4090]) -Greedy action tensor([-1.8770, -0.4534, 0.6293, -0.1528]) tensor([0.0434, 0.1804, 0.5326, 0.2436]) -Greedy action tensor([-1.7561, -0.4886, 0.5893, -0.0850]) tensor([0.0492, 0.1749, 0.5140, 0.2619]) -Greedy action tensor([-1.9116, -0.4373, 0.6414, -0.1617]) tensor([0.0417, 0.1822, 0.5360, 0.2401]) -Greedy action tensor([-1.9384, -0.4103, 0.6568, -0.1760]) tensor([0.0403, 0.1856, 0.5395, 0.2346]) -Greedy action tensor([-1.8737, -0.3913, 0.6251, -0.1509]) tensor([0.0432, 0.1900, 0.5251, 0.2417]) -Greedy action tensor([-1.8929, -0.4567, 0.6582, -0.1336]) tensor([0.0420, 0.1764, 0.5379, 0.2437]) -Greedy action tensor([-1.8709, -0.4547, 0.6322, -0.1471]) tensor([0.0436, 0.1796, 0.5325, 0.2443]) -Greedy action tensor([-1.7652, -0.2877, 0.6295, 0.0081]) tensor([0.0450, 0.1971, 0.4931, 0.2649]) -Greedy action tensor([-1.8070, -0.0504, 0.5582, -0.0858]) tensor([0.0434, 0.2515, 0.4623, 0.2428]) -Greedy action tensor([-1.9356, -0.4402, 0.6649, -0.1754]) tensor([0.0404, 0.1803, 0.5444, 0.2349]) -Greedy action tensor([-1.1773, -0.2903, 0.5433, -0.5583]) tensor([0.0920, 0.2233, 0.5139, 0.1708]) -Greedy action tensor([-1.8629, -0.4565, 0.6299, -0.1349]) tensor([0.0439, 0.1790, 0.5304, 0.2468]) -Greedy action tensor([-1.8979, -0.3868, 0.6446, -0.1532]) tensor([0.0417, 0.1891, 0.5304, 0.2388]) -Greedy action tensor([-1.8846, -0.2785, 0.6114, -0.1336]) tensor([0.0419, 0.2087, 0.5082, 0.2412]) -Greedy action tensor([-1.9445, -0.4467, 0.6671, -0.1807]) tensor([0.0401, 0.1794, 0.5464, 0.2341]) -Greedy action tensor([-1.0166, 0.4830, -0.8972, -0.9561]) tensor([0.1304, 0.5842, 0.1469, 0.1385]) -Greedy action tensor([-1.4536, -0.5609, 0.4407, -0.0188]) tensor([0.0700, 0.1709, 0.4653, 0.2939]) -Greedy action tensor([-0.8520, -0.1575, 0.2625, 0.6052]) tensor([0.0967, 0.1936, 0.2947, 0.4151]) -Greedy action tensor([-1.4300, -0.4780, 0.4614, 0.2423]) tensor([0.0643, 0.1667, 0.4265, 0.3425]) -Greedy action tensor([-1.9436, -0.4467, 0.6664, -0.1797]) tensor([0.0402, 0.1794, 0.5461, 0.2343]) -Greedy action tensor([-0.4841, 0.9281, 0.0401, 0.0545]) tensor([0.1175, 0.4825, 0.1985, 0.2014]) -Greedy action tensor([-1.9372, -0.4336, 0.6613, -0.1760]) tensor([0.0404, 0.1817, 0.5429, 0.2350]) -Greedy action tensor([-1.8798, -0.4595, 0.6414, -0.1505]) tensor([0.0431, 0.1782, 0.5359, 0.2428]) -Greedy action tensor([-1.9011, -0.3671, 0.6392, -0.1533]) tensor([0.0416, 0.1927, 0.5271, 0.2386]) -Greedy action tensor([-1.8943, -0.3546, 0.6372, -0.1512]) tensor([0.0418, 0.1947, 0.5249, 0.2386]) -Greedy action tensor([-0.8735, 0.2506, 0.1686, -0.0559]) tensor([0.1090, 0.3353, 0.3089, 0.2468]) -Greedy action tensor([ 0.4722, -0.0495, -0.0766, -0.0238]) tensor([0.3597, 0.2135, 0.2078, 0.2190]) -Greedy action tensor([ 0.9211, -0.5015, -0.0017, -0.5924]) tensor([0.5380, 0.1297, 0.2138, 0.1184]) -Greedy action tensor([ 1.2322, -0.7062, -0.1193, -0.5607]) tensor([0.6372, 0.0917, 0.1650, 0.1061]) -Greedy action tensor([ 1.1580, -0.6762, -0.0480, -0.8825]) tensor([0.6293, 0.1005, 0.1884, 0.0818]) -Greedy action tensor([ 1.1833, -0.6041, -0.1657, -0.4195]) tensor([0.6142, 0.1028, 0.1594, 0.1237]) -Greedy action tensor([ 0.7243, -0.3515, -0.0223, -0.2218]) tensor([0.4539, 0.1548, 0.2151, 0.1762]) -Greedy action tensor([ 0.8882, -0.4093, -0.1079, -0.3501]) tensor([0.5175, 0.1414, 0.1911, 0.1500]) -Greedy action tensor([ 0.8561, -0.3409, 0.0797, -0.6936]) tensor([0.5065, 0.1530, 0.2330, 0.1075]) -Greedy action tensor([ 0.8806, -0.6933, 0.2112, -0.2956]) tensor([0.4932, 0.1022, 0.2525, 0.1521]) -Greedy action tensor([ 0.8491, -0.4810, -0.0894, -0.1908]) tensor([0.4977, 0.1316, 0.1947, 0.1759]) -Greedy action tensor([ 0.2911, -0.1892, -0.1101, -0.1511]) tensor([0.3412, 0.2111, 0.2284, 0.2193]) -Greedy action tensor([ 0.6287, -0.0167, -0.1546, -0.1017]) tensor([0.4060, 0.2129, 0.1855, 0.1956]) -Greedy action tensor([ 0.8487, -0.5139, 0.1071, -0.5087]) tensor([0.5026, 0.1287, 0.2394, 0.1293]) -Greedy action tensor([ 0.7470, 0.4461, -0.2081, -0.0972]) tensor([0.3914, 0.2897, 0.1506, 0.1683]) -Greedy action tensor([ 0.6453, -0.6803, -0.1260, -0.2292]) tensor([0.4662, 0.1238, 0.2156, 0.1944]) -Greedy action tensor([ 0.7476, -0.3889, 0.0357, -0.3460]) tensor([0.4659, 0.1495, 0.2286, 0.1561]) -Greedy action tensor([ 0.5742, -0.2674, 0.0989, -0.3653]) tensor([0.4092, 0.1764, 0.2544, 0.1599]) -Greedy action tensor([ 0.2411, -0.0056, -0.2185, 0.0963]) tensor([0.3051, 0.2384, 0.1927, 0.2639]) -Greedy action tensor([ 0.7016, -0.2967, -0.1064, -0.2993]) tensor([0.4583, 0.1689, 0.2043, 0.1685]) -Greedy action tensor([ 0.6202, 0.2210, -0.0917, 0.0800]) tensor([0.3644, 0.2445, 0.1788, 0.2123]) -Greedy action tensor([ 0.5439, -0.3623, -0.0803, -0.1429]) tensor([0.4093, 0.1654, 0.2193, 0.2060]) -Greedy action tensor([ 0.7875, -0.7448, -0.1218, -0.2905]) tensor([0.5104, 0.1103, 0.2056, 0.1737]) -Greedy action tensor([ 0.7419, -0.1913, 0.0925, -0.0214]) tensor([0.4199, 0.1651, 0.2193, 0.1957]) -Greedy action tensor([ 1.1580, -0.6452, 0.0881, -0.3016]) tensor([0.5747, 0.0947, 0.1971, 0.1335]) -Greedy action tensor([ 0.7497, -0.3315, -0.0725, -0.1708]) tensor([0.4594, 0.1558, 0.2019, 0.1830]) -Greedy action tensor([ 0.6298, -0.2101, -0.0517, -0.0988]) tensor([0.4132, 0.1784, 0.2090, 0.1994]) -Greedy action tensor([ 0.5283, -0.2259, -0.0583, -0.2284]) tensor([0.4007, 0.1885, 0.2229, 0.1880]) -Greedy action tensor([ 0.3835, 0.0649, -0.0338, 0.0539]) tensor([0.3220, 0.2342, 0.2122, 0.2316]) -Greedy action tensor([ 1.1570, -0.7901, -0.0224, -0.6158]) tensor([0.6173, 0.0881, 0.1898, 0.1049]) -Greedy action tensor([ 0.2887, 0.0903, -0.2183, -0.0788]) tensor([0.3210, 0.2633, 0.1934, 0.2223]) -Greedy action tensor([ 0.8239, -0.5358, -0.0245, -0.3347]) tensor([0.5003, 0.1284, 0.2142, 0.1571]) -Greedy action tensor([ 0.6402, -0.1175, 0.0038, 0.0323]) tensor([0.3933, 0.1844, 0.2081, 0.2142]) -Greedy action tensor([ 0.5798, -0.1572, -0.0745, -0.0578]) tensor([0.3957, 0.1894, 0.2057, 0.2092]) -Greedy action tensor([ 0.5170, 0.1042, -0.1094, -0.0558]) tensor([0.3623, 0.2398, 0.1936, 0.2043]) -Greedy action tensor([ 0.4174, -0.0307, -0.1243, -0.0285]) tensor([0.3496, 0.2233, 0.2033, 0.2238]) -Greedy action tensor([ 0.7251, -0.2811, 0.0435, -0.3941]) tensor([0.4550, 0.1663, 0.2301, 0.1486]) -Greedy action tensor([ 0.7818, -0.5558, -0.1015, -0.4835]) tensor([0.5107, 0.1341, 0.2111, 0.1441]) -Greedy action tensor([ 0.7167, -0.4973, -0.0803, -0.2654]) tensor([0.4712, 0.1400, 0.2124, 0.1765]) -Greedy action tensor([ 1.0835, -0.5873, -0.0346, -0.5849]) tensor([0.5870, 0.1104, 0.1919, 0.1107]) -Greedy action tensor([ 0.6619, -0.5127, -0.1175, -0.1838]) tensor([0.4552, 0.1406, 0.2088, 0.1954]) -Greedy action tensor([ 1.3695, -1.0232, -0.0572, -0.7344]) tensor([0.6880, 0.0629, 0.1652, 0.0839]) -Greedy action tensor([ 0.6351, -0.1538, -0.0692, -0.0243]) tensor([0.4055, 0.1842, 0.2005, 0.2097]) -Greedy action tensor([ 0.8339, -0.7966, 0.0716, -0.3451]) tensor([0.5076, 0.0994, 0.2368, 0.1561]) -Greedy action tensor([ 0.8402, -0.4790, -0.0920, -0.2870]) tensor([0.5038, 0.1347, 0.1983, 0.1632]) -Greedy action tensor([ 0.8849, -0.6917, -0.0779, -0.4416]) tensor([0.5394, 0.1115, 0.2060, 0.1432]) -Greedy action tensor([ 0.8690, -0.5210, 0.0384, -0.3452]) tensor([0.5046, 0.1257, 0.2199, 0.1498]) -Greedy action tensor([ 1.1574, -0.8210, -0.1736, -0.6212]) tensor([0.6364, 0.0880, 0.1681, 0.1075]) -Greedy action tensor([ 0.4341, -0.0596, -0.1320, -0.0974]) tensor([0.3615, 0.2207, 0.2053, 0.2125]) -Greedy action tensor([ 0.8452, -0.4716, -0.0152, -0.2144]) tensor([0.4908, 0.1315, 0.2076, 0.1701]) -Greedy action tensor([ 0.8373, -0.4134, 0.0108, -0.5838]) tensor([0.5088, 0.1457, 0.2226, 0.1228]) -Greedy action tensor([ 1.2239, -0.9371, 0.1368, -0.5976]) tensor([0.6195, 0.0714, 0.2089, 0.1002]) -Greedy action tensor([ 0.5977, -0.1151, 0.0400, -0.0459]) tensor([0.3864, 0.1894, 0.2212, 0.2030]) -Greedy action tensor([ 0.5820, -0.4930, 0.0256, -0.3040]) tensor([0.4298, 0.1467, 0.2464, 0.1772]) -Greedy action tensor([ 1.2884, -0.7594, -0.1349, -0.4952]) tensor([0.6502, 0.0839, 0.1566, 0.1093]) -Greedy action tensor([ 0.9250, -0.4112, -0.0820, -0.4662]) tensor([0.5328, 0.1400, 0.1946, 0.1325]) -Greedy action tensor([ 0.3952, -0.1492, -0.0461, -0.0649]) tensor([0.3503, 0.2032, 0.2253, 0.2211]) -Greedy action tensor([ 1.0705, -0.5485, -0.0025, -0.4146]) tensor([0.5661, 0.1121, 0.1936, 0.1282]) -Greedy action tensor([ 0.8317, -0.5105, -0.0227, -0.4559]) tensor([0.5095, 0.1331, 0.2168, 0.1406]) -Greedy action tensor([ 0.2605, -0.1664, -0.0418, -0.1860]) tensor([0.3299, 0.2153, 0.2438, 0.2111]) -Greedy action tensor([ 0.7229, -0.5060, -0.2042, -0.3140]) tensor([0.4895, 0.1432, 0.1937, 0.1736]) -Greedy action tensor([ 0.5593, 0.2041, -0.0702, 0.0936]) tensor([0.3495, 0.2450, 0.1862, 0.2194]) -Greedy action tensor([ 0.6612, -0.5509, -0.1230, -0.2523]) tensor([0.4640, 0.1381, 0.2118, 0.1861]) -Greedy action tensor([ 0.4910, -0.4096, 0.0213, -0.2818]) tensor([0.4011, 0.1630, 0.2508, 0.1852]) -Greedy action tensor([ 1.2168, -0.6350, -0.1597, -0.6751]) tensor([0.6409, 0.1006, 0.1618, 0.0967]) -Greedy action tensor([ 0.3082, -0.0701, -0.0981, -0.1298]) tensor([0.3337, 0.2286, 0.2223, 0.2154]) -Greedy action tensor([ 0.6571, -0.3426, -0.0207, -0.1439]) tensor([0.4302, 0.1583, 0.2184, 0.1931]) -Greedy action tensor([ 0.3429, -0.0471, -0.0660, -0.1442]) tensor([0.3383, 0.2291, 0.2248, 0.2079]) -Greedy action tensor([ 0.6556, -0.3105, -0.0184, -0.1669]) tensor([0.4293, 0.1634, 0.2188, 0.1886]) -Greedy action tensor([ 0.6656, 0.2162, -0.0046, -0.0928]) tensor([0.3820, 0.2437, 0.1954, 0.1789]) -Greedy action tensor([ 0.7844, -0.6570, -0.0045, -0.4036]) tensor([0.5011, 0.1186, 0.2276, 0.1527]) -Greedy action tensor([ 0.9898, -0.7472, 0.0657, -0.4156]) tensor([0.5500, 0.0968, 0.2183, 0.1349]) -Greedy action tensor([ 0.5706, -0.0748, 0.1420, -0.0980]) tensor([0.3720, 0.1951, 0.2423, 0.1906]) -Greedy action tensor([ 0.8293, -0.2551, -0.0546, -0.2487]) tensor([0.4781, 0.1617, 0.1976, 0.1627]) -Greedy action tensor([ 0.4644, 0.2559, -0.1008, 0.0884]) tensor([0.3261, 0.2647, 0.1853, 0.2239]) -Greedy action tensor([ 0.8941, -0.4745, 0.0459, -0.2977]) tensor([0.5034, 0.1281, 0.2156, 0.1529]) -Greedy action tensor([ 1.1908, -0.5221, -0.0985, -0.3005]) tensor([0.5949, 0.1073, 0.1639, 0.1339]) -Greedy action tensor([ 0.7680, -0.1090, 0.0226, -0.0229]) tensor([0.4266, 0.1775, 0.2025, 0.1934]) -Greedy action tensor([ 0.8966, -0.6195, 0.0749, -0.4834]) tensor([0.5233, 0.1149, 0.2301, 0.1317]) -Greedy action tensor([ 0.7696, 0.1046, -0.0707, -0.0087]) tensor([0.4158, 0.2138, 0.1795, 0.1909]) -Greedy action tensor([ 0.4162, -0.2162, 0.0350, -0.0667]) tensor([0.3532, 0.1877, 0.2412, 0.2179]) -Greedy action tensor([ 0.9736, 0.1850, 0.1769, -0.0353]) tensor([0.4405, 0.2002, 0.1986, 0.1606]) -Greedy action tensor([ 1.7362, -0.8821, -0.5036, 0.5316]) tensor([0.6760, 0.0493, 0.0720, 0.2027]) -Greedy action tensor([ 1.2753, -0.3390, -0.4633, 0.2655]) tensor([0.5750, 0.1144, 0.1011, 0.2095]) -Greedy action tensor([ 1.2500, -0.4081, -0.3906, 0.2317]) tensor([0.5729, 0.1091, 0.1111, 0.2069]) -Greedy action tensor([ 1.5426, -0.5042, -0.2426, 0.2111]) tensor([0.6406, 0.0827, 0.1075, 0.1692]) -Greedy action tensor([ 1.7490, -0.4715, -0.8630, 0.2759]) tensor([0.7086, 0.0769, 0.0520, 0.1624]) -Greedy action tensor([ 1.2982, -0.3564, -0.5514, 0.0690]) tensor([0.6094, 0.1165, 0.0959, 0.1783]) -Greedy action tensor([ 1.5222, -0.5184, -0.7382, 0.3068]) tensor([0.6532, 0.0849, 0.0681, 0.1938]) -Greedy action tensor([ 1.2457, -0.4625, -1.0115, 0.3352]) tensor([0.5923, 0.1073, 0.0620, 0.2383]) -Greedy action tensor([ 1.1324, -0.4026, -0.2422, -0.1456]) tensor([0.5724, 0.1233, 0.1448, 0.1595]) -Greedy action tensor([ 1.1628, -0.1951, -0.3526, 0.1272]) tensor([0.5459, 0.1404, 0.1199, 0.1938]) -Greedy action tensor([ 1.4358, -0.2527, -0.5954, 0.2897]) tensor([0.6120, 0.1131, 0.0803, 0.1946]) -Greedy action tensor([ 1.4002, -0.7425, -0.5337, 0.2465]) tensor([0.6340, 0.0744, 0.0917, 0.2000]) -Greedy action tensor([ 1.3301, -0.7419, -0.3660, 0.5062]) tensor([0.5721, 0.0720, 0.1049, 0.2510]) -Greedy action tensor([ 1.9620, -0.4578, -0.6854, 0.0772]) tensor([0.7624, 0.0678, 0.0540, 0.1158]) -Greedy action tensor([ 1.2206, 0.6522, 0.2540, -0.0484]) tensor([0.4488, 0.2542, 0.1707, 0.1262]) -Greedy action tensor([ 1.1864, -0.2984, -0.2882, 0.2976]) tensor([0.5357, 0.1214, 0.1226, 0.2203]) -Greedy action tensor([ 1.3850, -0.5599, -0.5452, 0.3600]) tensor([0.6072, 0.0868, 0.0881, 0.2179]) -Greedy action tensor([ 1.8228, -0.7823, -0.3962, 0.4068]) tensor([0.7016, 0.0518, 0.0763, 0.1703]) -Greedy action tensor([ 1.3242, -0.2758, -0.8421, 0.1284]) tensor([0.6177, 0.1247, 0.0708, 0.1868]) -Greedy action tensor([ 1.4281, -0.6041, -0.4638, 0.5533]) tensor([0.5887, 0.0771, 0.0888, 0.2454]) -Greedy action tensor([ 1.3423, -0.3456, -0.9470, 0.4101]) tensor([0.5953, 0.1101, 0.0603, 0.2343]) -Greedy action tensor([ 1.3571, -0.6441, -0.2806, 0.5116]) tensor([0.5685, 0.0768, 0.1105, 0.2441]) -Greedy action tensor([ 1.2135, -0.3315, -0.7666, -0.0341]) tensor([0.6103, 0.1302, 0.0843, 0.1753]) -Greedy action tensor([ 1.0170, -0.3948, -0.9219, 0.4985]) tensor([0.5043, 0.1229, 0.0726, 0.3003]) -Greedy action tensor([ 1.1010, -0.5224, -0.4626, 0.2961]) tensor([0.5394, 0.1064, 0.1130, 0.2412]) -Greedy action tensor([ 1.3253, -0.7849, -0.2065, 0.3293]) tensor([0.5859, 0.0710, 0.1266, 0.2164]) -Greedy action tensor([ 1.8818, -0.6029, -0.2644, 0.0801]) tensor([0.7324, 0.0611, 0.0856, 0.1209]) -Greedy action tensor([ 1.6175, -0.2662, -1.4846, 0.4625]) tensor([0.6614, 0.1005, 0.0297, 0.2084]) -Greedy action tensor([ 1.6439, -0.5081, -0.3406, 0.3786]) tensor([0.6511, 0.0757, 0.0895, 0.1837]) -Greedy action tensor([ 1.5136, 0.0447, -0.3178, 0.5009]) tensor([0.5703, 0.1313, 0.0913, 0.2071]) -Greedy action tensor([ 1.2747, 0.0970, -1.0414, 0.4720]) tensor([0.5392, 0.1660, 0.0532, 0.2416]) -Greedy action tensor([ 1.3592, -0.4225, -1.2547, 0.4320]) tensor([0.6108, 0.1028, 0.0447, 0.2417]) -Greedy action tensor([ 1.9964, -0.6742, -0.6024, 0.2448]) tensor([0.7593, 0.0526, 0.0565, 0.1317]) -Greedy action tensor([ 2.8572, -1.9676, -0.1890, 0.6875]) tensor([0.8549, 0.0069, 0.0406, 0.0976]) -Greedy action tensor([ 1.4339, -0.2920, -0.7367, 0.8640]) tensor([0.5383, 0.0958, 0.0614, 0.3044]) -Greedy action tensor([ 1.2309, -0.1580, -0.7135, 0.3060]) tensor([0.5590, 0.1394, 0.0800, 0.2217]) -Greedy action tensor([ 1.3990, -0.2168, -0.6480, 0.2647]) tensor([0.6063, 0.1205, 0.0783, 0.1950]) -Greedy action tensor([ 1.5064, -0.4295, -0.3880, 0.5694]) tensor([0.5929, 0.0856, 0.0892, 0.2323]) -Greedy action tensor([ 1.3796, -0.6528, -0.3343, 0.1092]) tensor([0.6282, 0.0823, 0.1132, 0.1763]) -Greedy action tensor([ 1.5025, -0.3166, -0.2288, 0.3270]) tensor([0.6068, 0.0984, 0.1074, 0.1873]) -Greedy action tensor([ 1.4771, -0.3360, -0.4315, 0.2400]) tensor([0.6243, 0.1019, 0.0926, 0.1812]) -Greedy action tensor([ 1.7574, -0.5212, -0.6133, 0.3908]) tensor([0.6893, 0.0706, 0.0644, 0.1757]) -Greedy action tensor([ 1.3744, -0.4797, -0.3688, 0.2405]) tensor([0.6048, 0.0947, 0.1058, 0.1946]) -Greedy action tensor([ 1.6809, -0.5150, -0.1968, 0.1408]) tensor([0.6763, 0.0753, 0.1034, 0.1450]) -Greedy action tensor([ 1.2440, -0.7028, -0.3123, 0.3999]) tensor([0.5607, 0.0800, 0.1183, 0.2411]) -Greedy action tensor([ 1.5722, -0.1566, -0.1740, 0.3613]) tensor([0.6061, 0.1076, 0.1057, 0.1806]) -Greedy action tensor([ 2.1589, -1.1592, -0.2346, 0.7237]) tensor([0.7323, 0.0265, 0.0669, 0.1743]) -Greedy action tensor([ 1.6192, -0.1315, -0.2947, 0.2574]) tensor([0.6340, 0.1101, 0.0935, 0.1624]) -Greedy action tensor([ 1.5837, -0.6092, -0.3229, 0.7729]) tensor([0.5866, 0.0655, 0.0872, 0.2607]) -Greedy action tensor([ 1.7219, -1.0116, -0.1647, 0.5688]) tensor([0.6526, 0.0424, 0.0989, 0.2060]) -Greedy action tensor([ 1.5363, -0.4085, -0.1981, 0.4634]) tensor([0.6019, 0.0861, 0.1062, 0.2058]) -Greedy action tensor([ 1.3495, -0.0725, -0.1864, 0.1892]) tensor([0.5650, 0.1363, 0.1216, 0.1771]) -Greedy action tensor([ 1.5880, 0.6913, 0.1880, -0.5464]) tensor([0.5641, 0.2301, 0.1391, 0.0667]) -Greedy action tensor([ 1.9408, -0.9468, -0.4656, 0.6156]) tensor([0.7084, 0.0395, 0.0639, 0.1883]) -Greedy action tensor([ 1.4137, -0.5887, -0.3604, 0.7252]) tensor([0.5534, 0.0747, 0.0939, 0.2780]) -Greedy action tensor([ 1.5067, 0.3145, -0.7518, -0.2334]) tensor([0.6315, 0.1917, 0.0660, 0.1108]) -Greedy action tensor([ 1.8282, -0.4151, -0.2802, 0.7571]) tensor([0.6369, 0.0676, 0.0773, 0.2182]) -Greedy action tensor([ 1.5773, -0.1199, -0.7599, -0.0424]) tensor([0.6767, 0.1240, 0.0654, 0.1340]) -Greedy action tensor([ 1.5615, -0.7081, -0.6373, 0.8495]) tensor([0.5865, 0.0606, 0.0651, 0.2878]) -Greedy action tensor([ 1.0955, -0.2169, -0.7298, 0.1372]) tensor([0.5513, 0.1484, 0.0888, 0.2114]) -Greedy action tensor([ 1.3109, 0.0910, -0.7331, 0.0626]) tensor([0.5842, 0.1725, 0.0757, 0.1677]) -Greedy action tensor([ 1.8386, -0.9118, -0.5274, 1.0938]) tensor([0.6125, 0.0391, 0.0575, 0.2908]) -Greedy action tensor([ 1.2060, -0.0678, -0.5673, 0.0137]) tensor([0.5704, 0.1596, 0.0968, 0.1731]) -Greedy action tensor([ 1.6628, -0.9080, -0.3755, 0.3672]) tensor([0.6755, 0.0517, 0.0880, 0.1849]) -Greedy action tensor([ 1.1315, -0.1674, -0.8103, 0.0109]) tensor([0.5739, 0.1566, 0.0823, 0.1871]) -Greedy action tensor([ 2.0680, -1.0011, -0.5219, 0.6514]) tensor([0.7331, 0.0341, 0.0550, 0.1778]) -Greedy action tensor([ 1.3570, -0.1662, -0.5796, 0.0400]) tensor([0.6134, 0.1337, 0.0885, 0.1644]) -Greedy action tensor([ 1.8604, -0.7140, -0.5888, 0.2163]) tensor([0.7376, 0.0562, 0.0637, 0.1425]) -Greedy action tensor([ 1.3550, -0.6344, -0.2516, 0.1805]) tensor([0.6074, 0.0831, 0.1218, 0.1877]) -Greedy action tensor([ 2.0368, -1.1406, -0.0916, 0.3613]) tensor([0.7419, 0.0309, 0.0883, 0.1389]) -Greedy action tensor([ 0.8159, -0.2004, -0.4972, 0.7448]) tensor([0.3903, 0.1412, 0.1050, 0.3635]) -Greedy action tensor([ 1.2291, -0.4776, -0.3243, 0.3891]) tensor([0.5480, 0.0995, 0.1159, 0.2366]) -Greedy action tensor([ 1.3922, 0.1000, -0.3297, -0.0227]) tensor([0.5895, 0.1619, 0.1054, 0.1432]) -Greedy action tensor([ 1.3530, -0.1931, -0.4141, -0.1061]) tensor([0.6187, 0.1318, 0.1057, 0.1438]) -Greedy action tensor([ 1.4353, -0.7244, -0.4598, 0.2091]) tensor([0.6414, 0.0740, 0.0964, 0.1882]) -Greedy action tensor([ 1.2860, -0.2047, -0.4828, 0.3728]) tensor([0.5565, 0.1253, 0.0949, 0.2233]) -Greedy action tensor([ 1.8255, -1.5390, -0.1603, 0.3234]) tensor([0.7171, 0.0248, 0.0984, 0.1597]) -Greedy action tensor([ 1.8151, -0.7188, -0.4873, 0.5043]) tensor([0.6901, 0.0548, 0.0690, 0.1861]) -Greedy action tensor([ 1.8337, -0.6546, -0.1080, 0.5996]) tensor([0.6589, 0.0547, 0.0945, 0.1918]) -Greedy action tensor([ 1.1125, -0.5104, -0.3947, 0.2976]) tensor([0.5372, 0.1060, 0.1190, 0.2378]) -Greedy action tensor([ 1.2795, -0.4339, -0.4373, 0.0940]) tensor([0.6004, 0.1082, 0.1079, 0.1835]) -Greedy action tensor([-0.7312, -0.1360, -0.3453, -0.8982]) tensor([0.1949, 0.3535, 0.2867, 0.1649]) -Greedy action tensor([-0.2367, -2.7731, 0.4598, 0.2872]) tensor([0.2094, 0.0166, 0.4203, 0.3537]) -Greedy action tensor([-0.6835, -0.3528, -0.4190, -0.5343]) tensor([0.2060, 0.2867, 0.2683, 0.2391]) -Greedy action tensor([ 0.4068, -0.8771, 0.4350, -0.3098]) tensor([0.3579, 0.0991, 0.3682, 0.1748]) -Greedy action tensor([ 0.6742, -0.9932, 0.8559, 0.6178]) tensor([0.3000, 0.0566, 0.3598, 0.2836]) -Greedy action tensor([ 0.6062, -0.8236, -0.2894, 0.2308]) tensor([0.4283, 0.1025, 0.1749, 0.2943]) -Greedy action tensor([ 0.3257, 0.5272, -0.5100, -0.1774]) tensor([0.3066, 0.3751, 0.1329, 0.1854]) -Greedy action tensor([ 0.8368, -1.1220, -0.0674, 0.6407]) tensor([0.4223, 0.0596, 0.1710, 0.3471]) -Greedy action tensor([-0.1331, 0.2814, -0.1575, -0.8283]) tensor([0.2507, 0.3795, 0.2447, 0.1251]) -Greedy action tensor([-0.4613, -0.7262, -0.1740, -0.8545]) tensor([0.2649, 0.2033, 0.3531, 0.1788]) -Greedy action tensor([-1.4588, -1.0878, 0.3701, -0.4817]) tensor([0.0882, 0.1279, 0.5495, 0.2344]) -Greedy action tensor([-0.7246, 0.0531, 0.1748, -1.4917]) tensor([0.1640, 0.3569, 0.4030, 0.0761]) -Greedy action tensor([ 0.5544, -1.4728, 0.5412, 1.1196]) tensor([0.2578, 0.0340, 0.2545, 0.4537]) -Greedy action tensor([-0.0721, -0.4469, -0.6941, -0.6621]) tensor([0.3599, 0.2474, 0.1932, 0.1995]) -Greedy action tensor([-1.2096, -1.5056, 0.9936, -0.9016]) tensor([0.0822, 0.0612, 0.7447, 0.1119]) -Greedy action tensor([-0.1595, -0.5833, -0.3216, -1.1033]) tensor([0.3455, 0.2262, 0.2938, 0.1345]) -Greedy action tensor([-0.5664, -0.2133, 0.4208, -0.8975]) tensor([0.1717, 0.2444, 0.4607, 0.1233]) -Greedy action tensor([ 0.7493, -1.4338, 0.2796, 0.5363]) tensor([0.3928, 0.0443, 0.2456, 0.3174]) -Greedy action tensor([-0.7799, -0.0104, -0.6840, -0.7179]) tensor([0.1879, 0.4055, 0.2068, 0.1999]) -Greedy action tensor([-0.1888, -0.2958, -0.5340, -0.7333]) tensor([0.3138, 0.2820, 0.2222, 0.1820]) -Greedy action tensor([-0.5304, -1.4742, -0.1435, 2.0638]) tensor([0.0615, 0.0240, 0.0906, 0.8239]) -Greedy action tensor([-0.2174, 0.0636, -0.5327, 0.7904]) tensor([0.1726, 0.2286, 0.1259, 0.4729]) -Greedy action tensor([ 1.1757, -0.4083, 0.2381, -0.5006]) tensor([0.5606, 0.1150, 0.2195, 0.1049]) -Greedy action tensor([ 0.5684, -1.5719, 1.6552, 0.0500]) tensor([0.2138, 0.0251, 0.6338, 0.1273]) -Greedy action tensor([ 1.0927, -0.8641, 0.4828, 0.3257]) tensor([0.4653, 0.0658, 0.2529, 0.2161]) -Greedy action tensor([ 0.4641, 0.2212, -0.1734, 0.1591]) tensor([0.3279, 0.2572, 0.1733, 0.2417]) -Greedy action tensor([ 0.4665, -1.3071, -0.0809, -0.6861]) tensor([0.4845, 0.0822, 0.2803, 0.1530]) -Greedy action tensor([ 0.4145, -2.0698, 0.2817, -0.4939]) tensor([0.4233, 0.0353, 0.3707, 0.1707]) -Greedy action tensor([-0.6769, -0.4744, -0.1480, -0.4938]) tensor([0.1952, 0.2390, 0.3313, 0.2344]) -Greedy action tensor([-0.3371, -1.0516, 0.5715, -0.9564]) tensor([0.2218, 0.1086, 0.5503, 0.1194]) -Greedy action tensor([ 0.4869, -1.2981, -0.1166, 0.7748]) tensor([0.3281, 0.0550, 0.1794, 0.4375]) -Greedy action tensor([ 0.2914, -0.4546, 0.3742, 0.0976]) tensor([0.2955, 0.1401, 0.3210, 0.2434]) -Greedy action tensor([ 0.6726, -1.3621, 0.1897, -0.3875]) tensor([0.4775, 0.0624, 0.2946, 0.1654]) -Greedy action tensor([ 1.1861, -1.2297, 0.8951, 0.6349]) tensor([0.4144, 0.0370, 0.3098, 0.2388]) -Greedy action tensor([ 0.4604, -1.2909, 0.5089, 0.1560]) tensor([0.3377, 0.0586, 0.3545, 0.2491]) -Greedy action tensor([ 0.0295, -1.0512, 0.1247, -0.1844]) tensor([0.3080, 0.1045, 0.3388, 0.2487]) -Greedy action tensor([-0.2469, -0.5863, 0.2384, -0.3924]) tensor([0.2380, 0.1695, 0.3867, 0.2058]) -Greedy action tensor([ 0.3258, -0.1611, 0.1461, 0.7117]) tensor([0.2550, 0.1567, 0.2131, 0.3751]) -Greedy action tensor([ 1.0350, 0.2962, 1.3586, -0.2562]) tensor([0.3190, 0.1524, 0.4409, 0.0877]) -Greedy action tensor([-0.9799, -0.0417, -0.5067, 0.2873]) tensor([0.1148, 0.2933, 0.1843, 0.4076]) -Greedy action tensor([-0.9488, -0.6274, 0.8517, -0.5877]) tensor([0.1014, 0.1398, 0.6134, 0.1454]) -Greedy action tensor([-0.0757, -0.5142, 0.1643, -0.6093]) tensor([0.2855, 0.1841, 0.3629, 0.1674]) -Greedy action tensor([ 0.0933, -0.2790, 0.3882, 0.0424]) tensor([0.2511, 0.1730, 0.3372, 0.2386]) -Greedy action tensor([-1.2281, -0.6926, -0.5410, -0.1809]) tensor([0.1325, 0.2264, 0.2634, 0.3777]) -Greedy action tensor([-0.8240, -1.1026, 0.0545, -0.2188]) tensor([0.1668, 0.1262, 0.4015, 0.3055]) -Greedy action tensor([0.2585, 0.1496, 0.8364, 0.0949]) tensor([0.2208, 0.1980, 0.3936, 0.1875]) -Greedy action tensor([-0.2973, -0.4567, 0.1564, -0.0865]) tensor([0.2145, 0.1829, 0.3377, 0.2649]) -Greedy action tensor([-0.1464, 0.1974, -0.2850, -0.7314]) tensor([0.2605, 0.3675, 0.2268, 0.1452]) -Greedy action tensor([ 0.8484, -1.3836, -0.4552, 0.5153]) tensor([0.4772, 0.0512, 0.1296, 0.3420]) -Greedy action tensor([ 0.1475, -0.5627, -0.8738, -0.1821]) tensor([0.3890, 0.1912, 0.1401, 0.2798]) -Greedy action tensor([-1.8594, -0.5346, 0.1053, -0.7886]) tensor([0.0675, 0.2539, 0.4816, 0.1970]) -Greedy action tensor([ 0.5091, -1.9220, -0.1850, 0.1416]) tensor([0.4386, 0.0386, 0.2191, 0.3037]) -Greedy action tensor([ 0.1830, -1.0544, -0.2282, 0.5884]) tensor([0.2896, 0.0840, 0.1920, 0.4344]) -Greedy action tensor([-0.7951, -0.5146, -1.4443, -0.1044]) tensor([0.2066, 0.2734, 0.1079, 0.4121]) -Greedy action tensor([-0.8916, -0.7782, -0.5517, -0.9907]) tensor([0.2257, 0.2528, 0.3171, 0.2044]) -Greedy action tensor([ 0.2556, -0.6946, -0.2873, -0.2265]) tensor([0.3868, 0.1496, 0.2248, 0.2389]) -Greedy action tensor([-0.3991, -1.4426, -0.2184, 0.4183]) tensor([0.2077, 0.0732, 0.2488, 0.4703]) -Greedy action tensor([ 0.7884, -1.0505, 0.1737, 0.0227]) tensor([0.4619, 0.0734, 0.2498, 0.2148]) -Greedy action tensor([ 0.7289, 0.0432, -0.3650, 0.8671]) tensor([0.3348, 0.1686, 0.1121, 0.3844]) -Greedy action tensor([ 0.6605, -0.2783, 0.3582, -0.2494]) tensor([0.3948, 0.1544, 0.2918, 0.1589]) -Greedy action tensor([-0.4126, -0.8439, -0.6988, -0.1832]) tensor([0.2733, 0.1776, 0.2053, 0.3438]) -Greedy action tensor([ 0.5012, -0.9534, -0.1836, -0.3693]) tensor([0.4637, 0.1083, 0.2338, 0.1942]) -Greedy action tensor([ 0.7782, -0.4917, -0.1701, -0.1281]) tensor([0.4826, 0.1355, 0.1869, 0.1950]) -Greedy action tensor([ 0.0398, -0.7407, -0.1473, -0.5098]) tensor([0.3491, 0.1599, 0.2895, 0.2015]) -Greedy action tensor([ 0.2950, -1.0634, 0.7306, -0.0635]) tensor([0.2856, 0.0734, 0.4415, 0.1995]) -Greedy action tensor([ 0.8730, 0.2612, -0.2313, -0.0597]) tensor([0.4410, 0.2392, 0.1462, 0.1736]) -Greedy action tensor([ 0.7207, -0.5937, 1.0359, 0.5824]) tensor([0.2849, 0.0765, 0.3905, 0.2481]) -Greedy action tensor([-1.2827, -0.8443, 0.5149, -1.7516]) tensor([0.1086, 0.1683, 0.6552, 0.0679]) -Greedy action tensor([-0.0295, -0.2453, 0.1275, -0.4596]) tensor([0.2758, 0.2222, 0.3226, 0.1794]) -Greedy action tensor([ 0.5422, -0.9750, -0.7001, -0.2627]) tensor([0.5115, 0.1122, 0.1477, 0.2287]) -Greedy action tensor([ 0.6276, -1.3898, 0.1153, -0.6122]) tensor([0.4947, 0.0658, 0.2964, 0.1432]) -Greedy action tensor([ 0.9471, -0.4348, -0.4834, 0.3536]) tensor([0.4896, 0.1229, 0.1171, 0.2704]) -Greedy action tensor([ 0.8117, -0.3996, -0.2598, -0.6242]) tensor([0.5324, 0.1586, 0.1824, 0.1267]) -Greedy action tensor([ 0.0826, -0.7883, 0.2633, 0.5847]) tensor([0.2343, 0.0981, 0.2807, 0.3870]) -Greedy action tensor([ 0.0323, -1.4670, -0.4429, 0.4088]) tensor([0.3028, 0.0676, 0.1883, 0.4413]) -Greedy action tensor([ 1.3723, -0.1250, 0.1137, -0.4422]) tensor([0.5985, 0.1339, 0.1700, 0.0975]) -Greedy action tensor([0.3241, 0.0507, 1.1021, 0.4463]) tensor([0.1973, 0.1501, 0.4296, 0.2230]) -Greedy action tensor([-0.0331, -1.0235, -0.6685, 0.7509]) tensor([0.2444, 0.0908, 0.1295, 0.5353]) -Greedy action tensor([ 1.7776, -0.1363, -1.1426, 0.6761]) tensor([0.6520, 0.0962, 0.0352, 0.2167]) -Greedy action tensor([-0.3741, -0.8217, -1.2234, 0.5274]) tensor([0.2207, 0.1411, 0.0944, 0.5438]) -Greedy action tensor([-1.0821, -1.0669, -0.5939, -1.6142]) tensor([0.2363, 0.2399, 0.3850, 0.1388]) -Greedy action tensor([-0.4564, -1.6227, 0.4282, -0.6936]) tensor([0.2211, 0.0689, 0.5356, 0.1744]) -Greedy action tensor([-1.8749, -0.4536, 0.6279, -0.1497]) tensor([0.0435, 0.1803, 0.5318, 0.2444]) -Greedy action tensor([-1.8523, -0.4373, 0.6159, -0.1424]) tensor([0.0445, 0.1834, 0.5258, 0.2463]) -Greedy action tensor([-0.2188, 0.9464, -0.1951, 0.1491]) tensor([0.1498, 0.4804, 0.1534, 0.2164]) -Greedy action tensor([-1.8454, -0.3436, 0.6042, -0.1418]) tensor([0.0443, 0.1990, 0.5133, 0.2434]) -Greedy action tensor([-1.9222, -0.3675, 0.6440, -0.1655]) tensor([0.0407, 0.1929, 0.5303, 0.2360]) -Greedy action tensor([-1.8804, -0.4082, 0.6306, -0.1437]) tensor([0.0428, 0.1866, 0.5274, 0.2432]) -Greedy action tensor([-1.5254, -0.5992, 0.4958, 0.0292]) tensor([0.0633, 0.1597, 0.4775, 0.2995]) -Greedy action tensor([-1.1846, 0.8789, 0.1287, 0.3762]) tensor([0.0576, 0.4537, 0.2143, 0.2744]) -Greedy action tensor([-0.6361, 0.6133, 0.0784, -0.0235]) tensor([0.1194, 0.4164, 0.2439, 0.2203]) -Greedy action tensor([-1.1006, -0.3142, 0.2738, -0.0110]) tensor([0.0988, 0.2169, 0.3905, 0.2937]) -Greedy action tensor([ 1.2262, 0.7349, -0.3013, -0.4727]) tensor([0.4971, 0.3041, 0.1079, 0.0909]) -Greedy action tensor([-1.9065, -0.3289, 0.6353, -0.1832]) tensor([0.0414, 0.2006, 0.5260, 0.2320]) -Greedy action tensor([-0.9055, 0.4543, 0.3296, 0.2876]) tensor([0.0860, 0.3349, 0.2956, 0.2835]) -Greedy action tensor([-0.7614, 0.0248, 0.1545, 0.1006]) tensor([0.1240, 0.2723, 0.3100, 0.2937]) -Greedy action tensor([-1.4910, -0.6073, 0.4762, 0.1281]) tensor([0.0640, 0.1549, 0.4578, 0.3232]) -Greedy action tensor([-1.1999, -0.2918, 0.2518, 0.2808]) tensor([0.0823, 0.2042, 0.3516, 0.3619]) -Greedy action tensor([-1.9183, -0.4162, 0.6535, -0.1652]) tensor([0.0411, 0.1844, 0.5375, 0.2370]) -Greedy action tensor([-1.3515, -0.2140, 0.4731, 0.0361]) tensor([0.0698, 0.2177, 0.4328, 0.2796]) -Greedy action tensor([-1.8270, -0.1204, 0.6001, -0.3333]) tensor([0.0449, 0.2472, 0.5081, 0.1998]) -Greedy action tensor([-1.9156, -0.4442, 0.6518, -0.1656]) tensor([0.0414, 0.1804, 0.5398, 0.2384]) -Greedy action tensor([-1.8171, -0.0885, 0.5647, -0.1006]) tensor([0.0434, 0.2447, 0.4702, 0.2417]) -Greedy action tensor([-1.8522, -0.4668, 0.6198, -0.1209]) tensor([0.0445, 0.1777, 0.5267, 0.2511]) -Greedy action tensor([-1.9487, -0.4479, 0.6660, -0.1832]) tensor([0.0400, 0.1795, 0.5467, 0.2338]) -Greedy action tensor([-1.8718, -0.3529, 0.6331, -0.1376]) tensor([0.0426, 0.1946, 0.5215, 0.2413]) -Greedy action tensor([-1.7360, -0.2754, 0.5641, -0.0114]) tensor([0.0479, 0.2062, 0.4774, 0.2685]) -Greedy action tensor([-1.7887, -0.2565, 0.6011, -0.0872]) tensor([0.0454, 0.2102, 0.4955, 0.2489]) -Greedy action tensor([-1.8668, -0.4228, 0.6385, -0.1365]) tensor([0.0432, 0.1832, 0.5296, 0.2440]) -Greedy action tensor([-1.8273, -0.4586, 0.6076, -0.1278]) tensor([0.0458, 0.1802, 0.5232, 0.2508]) -Greedy action tensor([-1.1780, -0.3286, 0.2607, -0.1641]) tensor([0.0970, 0.2268, 0.4089, 0.2673]) -Greedy action tensor([-1.8756, -0.5234, 0.8884, 0.0101]) tensor([0.0366, 0.1415, 0.5806, 0.2413]) -Greedy action tensor([-1.9237, -0.3927, 0.6506, -0.1698]) tensor([0.0408, 0.1885, 0.5351, 0.2356]) -Greedy action tensor([-1.9396, -0.4470, 0.6625, -0.1793]) tensor([0.0404, 0.1797, 0.5450, 0.2349]) -Greedy action tensor([-1.7906, -0.4849, 0.6370, 0.0078]) tensor([0.0453, 0.1673, 0.5136, 0.2738]) -Greedy action tensor([-1.8717, -0.3329, 0.6138, -0.1437]) tensor([0.0429, 0.2000, 0.5154, 0.2417]) -Greedy action tensor([-1.3056, 0.2962, 0.3129, 0.0140]) tensor([0.0678, 0.3364, 0.3421, 0.2537]) -Greedy action tensor([-0.9247, 0.0749, 0.1866, -0.1614]) tensor([0.1123, 0.3053, 0.3414, 0.2410]) -Greedy action tensor([-1.9190, -0.4446, 0.6625, -0.1629]) tensor([0.0410, 0.1792, 0.5422, 0.2375]) -Greedy action tensor([-1.8776, -0.4452, 0.6351, -0.1465]) tensor([0.0432, 0.1808, 0.5324, 0.2437]) -Greedy action tensor([-1.6752, -0.5324, 0.5258, -0.0446]) tensor([0.0547, 0.1716, 0.4943, 0.2794]) -Greedy action tensor([-1.3447, 0.5758, 0.2488, 0.0763]) tensor([0.0592, 0.4041, 0.2914, 0.2452]) -Greedy action tensor([-0.7288, -0.0530, 0.2505, -0.3674]) tensor([0.1416, 0.2783, 0.3770, 0.2032]) -Greedy action tensor([-1.8520, -0.4266, 0.6161, -0.1383]) tensor([0.0444, 0.1848, 0.5242, 0.2466]) -Greedy action tensor([-1.5672, 0.0940, 0.4113, -0.0318]) tensor([0.0551, 0.2903, 0.3986, 0.2559]) -Greedy action tensor([-1.9298, -0.4009, 0.6490, -0.1786]) tensor([0.0407, 0.1879, 0.5368, 0.2346]) -Greedy action tensor([-1.8589, -0.4473, 0.6285, -0.1375]) tensor([0.0440, 0.1805, 0.5294, 0.2461]) -Greedy action tensor([-1.7544, -0.2588, 0.5870, -0.0488]) tensor([0.0468, 0.2089, 0.4866, 0.2577]) -Greedy action tensor([-1.9120, -0.4495, 0.6548, -0.1621]) tensor([0.0415, 0.1791, 0.5405, 0.2388]) -Greedy action tensor([-1.6207, -0.4275, 0.5208, -0.0075]) tensor([0.0561, 0.1850, 0.4774, 0.2815]) -Greedy action tensor([-1.6445, -0.4145, 0.5223, -0.1057]) tensor([0.0561, 0.1921, 0.4902, 0.2616]) -Greedy action tensor([-1.1793, -0.5041, 0.4085, 0.2907]) tensor([0.0819, 0.1609, 0.4008, 0.3563]) -Greedy action tensor([-1.3869, -0.4357, 0.4908, 0.2885]) tensor([0.0647, 0.1674, 0.4227, 0.3453]) -Greedy action tensor([-1.2084, -0.6280, 0.4330, 0.0897]) tensor([0.0861, 0.1539, 0.4446, 0.3154]) -Greedy action tensor([-0.9553, -0.1837, 0.3560, 0.4628]) tensor([0.0909, 0.1966, 0.3373, 0.3753]) -Greedy action tensor([-1.4708, 0.2930, 0.3015, 0.0684]) tensor([0.0575, 0.3357, 0.3386, 0.2682]) -Greedy action tensor([-0.2869, 0.5312, 0.5317, 0.5575]) tensor([0.1272, 0.2883, 0.2885, 0.2960]) -Greedy action tensor([-1.3051, 0.7042, 0.2395, 0.1937]) tensor([0.0567, 0.4233, 0.2659, 0.2540]) -Greedy action tensor([-1.4599, 0.1920, 0.4013, -0.6568]) tensor([0.0672, 0.3506, 0.4322, 0.1500]) -Greedy action tensor([-1.2349, -0.4592, 1.3046, 1.0788]) tensor([0.0385, 0.0837, 0.4883, 0.3895]) -Greedy action tensor([-1.2141, 0.3477, 0.2540, -0.0321]) tensor([0.0748, 0.3566, 0.3247, 0.2439]) -Greedy action tensor([-1.8145, -0.4237, 0.6058, -0.1773]) tensor([0.0467, 0.1877, 0.5255, 0.2401]) -Greedy action tensor([-1.1815, -0.6167, 0.2577, 0.2804]) tensor([0.0886, 0.1558, 0.3735, 0.3821]) -Greedy action tensor([-1.8119, -0.4986, 0.5774, -0.1178]) tensor([0.0475, 0.1765, 0.5177, 0.2583]) -Greedy action tensor([-1.7175, -0.5216, 0.6277, -0.0306]) tensor([0.0496, 0.1641, 0.5180, 0.2682]) -Greedy action tensor([-1.8751, -0.4028, 0.6258, -0.1657]) tensor([0.0433, 0.1889, 0.5284, 0.2394]) -Greedy action tensor([-0.4915, 0.0864, 0.5312, 0.6339]) tensor([0.1157, 0.2062, 0.3217, 0.3565]) -Greedy action tensor([-1.6572, -0.1807, 0.5232, 0.0190]) tensor([0.0511, 0.2236, 0.4522, 0.2731]) -Greedy action tensor([-1.2261, 0.6805, 0.2940, 0.2167]) tensor([0.0605, 0.4070, 0.2766, 0.2560]) -Greedy action tensor([-1.7001, -0.1438, 0.5974, -0.0546]) tensor([0.0479, 0.2271, 0.4766, 0.2483]) -Greedy action tensor([-1.8409, -0.3819, 0.6585, -0.0925]) tensor([0.0431, 0.1852, 0.5243, 0.2474]) -Greedy action tensor([-1.6723, -0.4381, 0.5416, -0.0227]) tensor([0.0532, 0.1828, 0.4870, 0.2770]) -Greedy action tensor([-1.8508, -0.4468, 0.6207, -0.1320]) tensor([0.0445, 0.1810, 0.5265, 0.2480]) -Greedy action tensor([-1.4578, -0.3844, 0.3622, 0.1732]) tensor([0.0658, 0.1924, 0.4059, 0.3360]) -Greedy action tensor([-1.6262, 0.0181, 0.4502, -0.0481]) tensor([0.0526, 0.2725, 0.4198, 0.2551]) -Greedy action tensor([-1.8963, -0.4456, 0.6462, -0.1559]) tensor([0.0422, 0.1802, 0.5369, 0.2407]) -Greedy action tensor([-1.9210, -0.4383, 0.6569, -0.1681]) tensor([0.0411, 0.1809, 0.5409, 0.2371]) -Greedy action tensor([-1.8761, -0.4310, 0.6332, -0.1496]) tensor([0.0432, 0.1832, 0.5309, 0.2427]) -Greedy action tensor([-1.3793, -0.4397, 0.3825, -0.0077]) tensor([0.0751, 0.1921, 0.4370, 0.2958]) -Greedy action tensor([-1.8991, -0.4570, 0.6489, -0.1605]) tensor([0.0422, 0.1785, 0.5393, 0.2400]) -Greedy action tensor([-1.6262, 0.3412, 0.4361, -0.0850]) tensor([0.0483, 0.3457, 0.3802, 0.2258]) -Greedy action tensor([-0.4814, -0.5166, 0.1672, 0.2219]) tensor([0.1695, 0.1637, 0.3243, 0.3425]) -Greedy action tensor([-1.1046, -0.4106, 0.7038, 0.8755]) tensor([0.0612, 0.1225, 0.3732, 0.4431]) -Greedy action tensor([ 1.1250, -0.7665, 0.1502, -0.6465]) tensor([0.5889, 0.0888, 0.2222, 0.1002]) -Greedy action tensor([ 0.7725, -0.4170, 0.1055, -0.4773]) tensor([0.4752, 0.1446, 0.2439, 0.1362]) -Greedy action tensor([ 0.6964, -0.7306, -0.0664, -0.2644]) tensor([0.4787, 0.1149, 0.2232, 0.1831]) -Greedy action tensor([ 0.4155, -0.0321, -0.0995, -0.0505]) tensor([0.3492, 0.2232, 0.2086, 0.2191]) -Greedy action tensor([ 0.4431, 0.0044, -0.0781, -0.3972]) tensor([0.3745, 0.2415, 0.2224, 0.1616]) -Greedy action tensor([ 0.6811, 0.1307, -0.1051, 0.1440]) tensor([0.3821, 0.2204, 0.1741, 0.2233]) -Greedy action tensor([ 0.8650, -0.3866, 0.0526, -0.1510]) tensor([0.4780, 0.1367, 0.2121, 0.1731]) -Greedy action tensor([ 0.4726, -0.2192, 0.1263, -0.2447]) tensor([0.3709, 0.1857, 0.2623, 0.1810]) -Greedy action tensor([ 1.0060, -0.7986, 0.0474, -0.5545]) tensor([0.5688, 0.0936, 0.2181, 0.1195]) -Greedy action tensor([ 1.1764, -0.7743, 0.0430, -0.5496]) tensor([0.6090, 0.0866, 0.1960, 0.1084]) -Greedy action tensor([ 0.9386, -0.5430, -0.0645, -0.3496]) tensor([0.5348, 0.1216, 0.1961, 0.1475]) -Greedy action tensor([ 0.5356, -0.4318, -0.2095, -0.1232]) tensor([0.4216, 0.1602, 0.2001, 0.2181]) -Greedy action tensor([ 0.6298, -0.2852, -0.0968, -0.2008]) tensor([0.4311, 0.1727, 0.2084, 0.1878]) -Greedy action tensor([ 1.1629, -0.6788, -0.0473, -0.8577]) tensor([0.6292, 0.0998, 0.1876, 0.0834]) -Greedy action tensor([ 0.4122, -0.1744, -0.0168, -0.0077]) tensor([0.3491, 0.1942, 0.2273, 0.2294]) -Greedy action tensor([ 1.2069, -0.7884, -0.0858, -0.4607]) tensor([0.6253, 0.0850, 0.1717, 0.1180]) -Greedy action tensor([ 0.8233, -1.0225, -0.0365, -0.3358]) tensor([0.5277, 0.0833, 0.2234, 0.1656]) -Greedy action tensor([ 1.0472, -0.7623, -0.1427, -0.4284]) tensor([0.5894, 0.0965, 0.1793, 0.1348]) -Greedy action tensor([ 0.7829, -0.1647, -0.2034, -0.5235]) tensor([0.4923, 0.1908, 0.1836, 0.1333]) -Greedy action tensor([ 0.8107, -0.4972, -0.1930, -0.3308]) tensor([0.5112, 0.1382, 0.1874, 0.1632]) -Greedy action tensor([ 0.6360, 0.2140, -0.1375, -0.2606]) tensor([0.3960, 0.2597, 0.1827, 0.1616]) -Greedy action tensor([ 0.6000, -0.2638, 0.0144, -0.1310]) tensor([0.4065, 0.1714, 0.2264, 0.1957]) -Greedy action tensor([ 0.9005, -0.4684, 0.0024, -0.4531]) tensor([0.5208, 0.1325, 0.2122, 0.1345]) -Greedy action tensor([ 1.0404, -0.6585, -0.0030, -0.5203]) tensor([0.5730, 0.1048, 0.2018, 0.1203]) -Greedy action tensor([ 0.8056, -0.5319, 0.1531, -0.4578]) tensor([0.4840, 0.1271, 0.2521, 0.1368]) -Greedy action tensor([ 0.7343, -0.6830, 0.0973, -0.3258]) tensor([0.4722, 0.1144, 0.2497, 0.1636]) -Greedy action tensor([ 1.0781, -0.7906, 0.1885, -0.6407]) tensor([0.5732, 0.0885, 0.2355, 0.1028]) -Greedy action tensor([ 0.5880, 0.0609, 0.0097, -0.0043]) tensor([0.3698, 0.2183, 0.2074, 0.2045]) -Greedy action tensor([ 1.0890, -0.5000, -0.0058, -0.3508]) tensor([0.5631, 0.1150, 0.1884, 0.1335]) -Greedy action tensor([ 0.4282, -0.1427, -0.0181, -0.1253]) tensor([0.3597, 0.2032, 0.2302, 0.2068]) -Greedy action tensor([ 0.5335, -0.0026, -0.1264, -0.2562]) tensor([0.3913, 0.2289, 0.2022, 0.1776]) -Greedy action tensor([ 0.8462, -0.6310, -0.0586, -0.4483]) tensor([0.5244, 0.1197, 0.2122, 0.1437]) -Greedy action tensor([ 0.5244, -0.3505, 0.0168, -0.1103]) tensor([0.3923, 0.1636, 0.2361, 0.2080]) -Greedy action tensor([ 0.8971, -0.4802, -0.0868, -0.2232]) tensor([0.5122, 0.1292, 0.1915, 0.1671]) -Greedy action tensor([ 0.8882, -0.8419, -0.0771, -0.4297]) tensor([0.5477, 0.0971, 0.2086, 0.1466]) -Greedy action tensor([ 0.8785, -0.6339, 0.0546, -0.6195]) tensor([0.5312, 0.1171, 0.2330, 0.1188]) -Greedy action tensor([ 0.6562, -0.3203, -0.0970, -0.0520]) tensor([0.4274, 0.1609, 0.2012, 0.2105]) -Greedy action tensor([ 0.9704, -0.7602, -0.0399, -0.4489]) tensor([0.5608, 0.0994, 0.2042, 0.1356]) -Greedy action tensor([ 0.6736, -0.7668, 0.0734, -0.1666]) tensor([0.4510, 0.1068, 0.2475, 0.1947]) -Greedy action tensor([ 0.8866, -0.7936, -0.0706, -0.2861]) tensor([0.5320, 0.0991, 0.2043, 0.1647]) -Greedy action tensor([ 0.9477, -0.3723, -0.1120, -0.3343]) tensor([0.5288, 0.1413, 0.1833, 0.1467]) -Greedy action tensor([ 0.3931, -0.0391, 0.0686, -0.3527]) tensor([0.3513, 0.2280, 0.2540, 0.1667]) -Greedy action tensor([ 0.9376, -0.4531, -0.0093, -0.4362]) tensor([0.5291, 0.1317, 0.2053, 0.1339]) -Greedy action tensor([ 0.7791, -0.4316, 0.0395, -0.3920]) tensor([0.4795, 0.1429, 0.2289, 0.1487]) -Greedy action tensor([ 1.0692, -0.7034, 0.0412, -0.3912]) tensor([0.5683, 0.0965, 0.2033, 0.1319]) -Greedy action tensor([ 1.0836, -0.5236, -0.0871, -0.2311]) tensor([0.5621, 0.1127, 0.1743, 0.1509]) -Greedy action tensor([ 0.8839, -0.6339, -0.0107, -0.4836]) tensor([0.5311, 0.1164, 0.2171, 0.1353]) -Greedy action tensor([ 0.8030, -0.2921, 0.0307, -0.0847]) tensor([0.4529, 0.1515, 0.2092, 0.1864]) -Greedy action tensor([ 0.8695, -0.5209, 0.1235, -0.3176]) tensor([0.4930, 0.1228, 0.2338, 0.1504]) -Greedy action tensor([ 0.3223, 0.2057, -0.0140, -0.0218]) tensor([0.3018, 0.2686, 0.2156, 0.2140]) -Greedy action tensor([ 1.0327, -0.8031, 0.1742, -0.7169]) tensor([0.5691, 0.0908, 0.2412, 0.0989]) -Greedy action tensor([ 0.7853, -0.3814, -0.0751, -0.1535]) tensor([0.4705, 0.1465, 0.1990, 0.1840]) -Greedy action tensor([ 0.8968, -0.3991, -0.0497, -0.3887]) tensor([0.5159, 0.1412, 0.2002, 0.1427]) -Greedy action tensor([ 1.0064, -0.8192, 0.0642, -0.3264]) tensor([0.5511, 0.0888, 0.2148, 0.1453]) -Greedy action tensor([ 0.7690, -0.6391, -0.0870, -0.2802]) tensor([0.4951, 0.1211, 0.2104, 0.1734]) -Greedy action tensor([ 0.6182, -0.0987, 0.0362, -0.0008]) tensor([0.3868, 0.1888, 0.2161, 0.2083]) -Greedy action tensor([ 1.1113, -0.7043, 0.0431, -0.4696]) tensor([0.5841, 0.0950, 0.2007, 0.1202]) -Greedy action tensor([ 0.4687, 0.2425, 0.0682, -0.1555]) tensor([0.3330, 0.2656, 0.2231, 0.1784]) -Greedy action tensor([ 0.4939, 0.0227, -0.1235, 0.0218]) tensor([0.3588, 0.2240, 0.1935, 0.2238]) -Greedy action tensor([ 0.8577, -0.5973, 0.0513, -0.2523]) tensor([0.4977, 0.1162, 0.2222, 0.1640]) -Greedy action tensor([ 0.4853, -0.0790, 0.0293, -0.0089]) tensor([0.3555, 0.2022, 0.2254, 0.2169]) -Greedy action tensor([ 0.7808, -0.3442, -0.2071, -0.0568]) tensor([0.4695, 0.1524, 0.1748, 0.2032]) -Greedy action tensor([ 1.3170, -0.6300, -0.0212, -0.4248]) tensor([0.6328, 0.0903, 0.1660, 0.1109]) -Greedy action tensor([ 0.6645, -0.3707, -0.1099, -0.1941]) tensor([0.4465, 0.1586, 0.2058, 0.1892]) -Greedy action tensor([ 1.1967, -0.6740, -0.0623, -0.5521]) tensor([0.6204, 0.0955, 0.1761, 0.1079]) -Greedy action tensor([ 0.9140, -0.6214, -0.1573, -0.3968]) tensor([0.5472, 0.1178, 0.1874, 0.1475]) -Greedy action tensor([ 0.8834, -0.8156, -0.0960, -0.3373]) tensor([0.5395, 0.0987, 0.2026, 0.1592]) -Greedy action tensor([ 0.5710, 0.0942, -0.0254, 0.0462]) tensor([0.3619, 0.2246, 0.1993, 0.2141]) -Greedy action tensor([ 1.2667, -0.8071, 0.0257, -0.4994]) tensor([0.6306, 0.0793, 0.1823, 0.1078]) -Greedy action tensor([ 0.9223, -0.6861, 0.0150, -0.3779]) tensor([0.5330, 0.1067, 0.2151, 0.1452]) -Greedy action tensor([ 0.1497, 0.3284, -0.1596, 0.0771]) tensor([0.2591, 0.3098, 0.1902, 0.2410]) -Greedy action tensor([ 0.9669, 0.0706, 0.1998, -0.1983]) tensor([0.4578, 0.1868, 0.2126, 0.1428]) -Greedy action tensor([ 0.6872, -0.3900, -0.0620, -0.2254]) tensor([0.4515, 0.1538, 0.2135, 0.1813]) -Greedy action tensor([ 1.0181, -0.6012, -0.0248, -0.5116]) tensor([0.5659, 0.1121, 0.1994, 0.1226]) -Greedy action tensor([ 0.6161, -0.3095, -0.0749, -0.0674]) tensor([0.4163, 0.1650, 0.2086, 0.2102]) -Greedy action tensor([ 1.1601, -0.6387, -0.0622, -0.4304]) tensor([0.6010, 0.0995, 0.1770, 0.1225]) -Greedy action tensor([ 0.8289, -0.6215, -0.0022, -0.5427]) tensor([0.5198, 0.1219, 0.2264, 0.1319]) -Greedy action tensor([ 1.1016, -0.7943, 0.0265, -0.7027]) tensor([0.6039, 0.0907, 0.2061, 0.0994]) -Greedy action tensor([ 0.2832, 0.5318, -0.2421, 0.1932]) tensor([0.2640, 0.3385, 0.1561, 0.2413]) -Greedy action tensor([ 0.8619, -0.4958, 0.0310, -0.2830]) tensor([0.4972, 0.1279, 0.2166, 0.1582]) -Greedy action tensor([ 0.5964, -0.2566, -0.0243, -0.2408]) tensor([0.4173, 0.1778, 0.2243, 0.1806]) -Greedy action tensor([ 0.9466, -0.1042, -0.7515, 0.0716]) tensor([0.5129, 0.1794, 0.0939, 0.2138]) -Greedy action tensor([ 1.2950, -0.1550, -1.3828, 0.3266]) tensor([0.5942, 0.1394, 0.0408, 0.2256]) -Greedy action tensor([ 1.1618, -0.0907, -0.4228, -0.2225]) tensor([0.5743, 0.1641, 0.1177, 0.1439]) -Greedy action tensor([ 1.8431, -0.0610, -0.6960, 0.0891]) tensor([0.7138, 0.1063, 0.0563, 0.1235]) -Greedy action tensor([ 1.4679, -0.3422, -0.4403, 0.3297]) tensor([0.6126, 0.1002, 0.0909, 0.1963]) -Greedy action tensor([ 1.7925, -0.2461, -0.4322, 0.2222]) tensor([0.6914, 0.0900, 0.0747, 0.1438]) -Greedy action tensor([ 1.2371, 0.1508, -0.1775, -0.0513]) tensor([0.5387, 0.1818, 0.1309, 0.1485]) -Greedy action tensor([ 1.8207, -1.0598, -0.1036, 0.4251]) tensor([0.6898, 0.0387, 0.1007, 0.1709]) -Greedy action tensor([ 1.1819, -0.1917, -0.9946, 0.3346]) tensor([0.5570, 0.1410, 0.0632, 0.2387]) -Greedy action tensor([ 1.0373, -0.3640, -0.7518, 0.4334]) tensor([0.5102, 0.1256, 0.0853, 0.2789]) -Greedy action tensor([ 0.9006, -0.2677, -0.6046, 0.0685]) tensor([0.5081, 0.1580, 0.1128, 0.2211]) -Greedy action tensor([ 1.3267, -0.8146, -0.0074, 0.0458]) tensor([0.6029, 0.0708, 0.1588, 0.1675]) -Greedy action tensor([0.7202, 0.1894, 0.0176, 0.0108]) tensor([0.3883, 0.2284, 0.1923, 0.1910]) -Greedy action tensor([ 0.7789, -0.1155, -0.2263, 0.0602]) tensor([0.4421, 0.1807, 0.1618, 0.2154]) -Greedy action tensor([ 1.3031, -0.1994, -0.3871, 0.2387]) tensor([0.5708, 0.1270, 0.1053, 0.1969]) -Greedy action tensor([ 1.8676, -0.9539, -0.3387, 0.3111]) tensor([0.7244, 0.0431, 0.0798, 0.1527]) -Greedy action tensor([ 0.8986, -0.3794, -0.1225, -0.1071]) tensor([0.4988, 0.1390, 0.1797, 0.1825]) -Greedy action tensor([ 1.3129, -0.2074, 0.1821, 0.0650]) tensor([0.5469, 0.1196, 0.1765, 0.1570]) -Greedy action tensor([ 2.7553, -1.4194, -0.3387, 0.4330]) tensor([0.8630, 0.0133, 0.0391, 0.0846]) -Greedy action tensor([ 1.4429, -0.3465, -0.2972, 0.2692]) tensor([0.6054, 0.1011, 0.1063, 0.1872]) -Greedy action tensor([ 1.3846, -0.2649, -0.8569, 0.4530]) tensor([0.5909, 0.1135, 0.0628, 0.2328]) -Greedy action tensor([ 1.1593, -0.3346, -0.8272, -0.0710]) tensor([0.6046, 0.1357, 0.0829, 0.1767]) -Greedy action tensor([ 1.9432, -1.1656, -0.1603, 0.3339]) tensor([0.7317, 0.0327, 0.0893, 0.1464]) -Greedy action tensor([ 1.9576, 0.6638, -0.2642, 0.2506]) tensor([0.6394, 0.1753, 0.0693, 0.1160]) -Greedy action tensor([2.2802, 0.3907, 0.0255, 0.4930]) tensor([0.7025, 0.1062, 0.0737, 0.1176]) -Greedy action tensor([ 1.2605, 0.1044, -0.8547, 0.2693]) tensor([0.5536, 0.1742, 0.0668, 0.2055]) -Greedy action tensor([ 1.7091, -0.0355, -0.8284, 0.6011]) tensor([0.6313, 0.1103, 0.0499, 0.2085]) -Greedy action tensor([ 1.9969, -0.7750, -0.7275, 0.2330]) tensor([0.7695, 0.0481, 0.0505, 0.1319]) -Greedy action tensor([ 1.8978, -0.5238, -0.8067, 0.4500]) tensor([0.7190, 0.0638, 0.0481, 0.1690]) -Greedy action tensor([ 1.2077, 0.0766, -1.1305, 0.3365]) tensor([0.5442, 0.1756, 0.0525, 0.2277]) -Greedy action tensor([ 1.8802, -0.9543, -0.3757, 0.3973]) tensor([0.7192, 0.0422, 0.0754, 0.1632]) -Greedy action tensor([ 1.1274, 0.0268, 0.0409, -0.1491]) tensor([0.5131, 0.1707, 0.1731, 0.1431]) -Greedy action tensor([ 1.1012, -0.2296, -0.7507, 0.1298]) tensor([0.5556, 0.1468, 0.0872, 0.2103]) -Greedy action tensor([ 1.5535, 0.2506, -0.6586, 0.0121]) tensor([0.6268, 0.1703, 0.0686, 0.1342]) -Greedy action tensor([ 1.5191, -0.6344, -0.2660, 0.1741]) tensor([0.6475, 0.0752, 0.1086, 0.1687]) -Greedy action tensor([ 1.5783, 0.1061, -0.3662, 0.1484]) tensor([0.6204, 0.1423, 0.0888, 0.1485]) -Greedy action tensor([ 0.3263, -0.4142, 0.1240, -0.0949]) tensor([0.3390, 0.1617, 0.2769, 0.2225]) -Greedy action tensor([ 1.4811, -0.1430, -0.4788, 0.1040]) tensor([0.6288, 0.1239, 0.0886, 0.1587]) -Greedy action tensor([ 1.1175, -0.2888, -0.0925, -0.0538]) tensor([0.5396, 0.1322, 0.1609, 0.1673]) -Greedy action tensor([ 1.2652, -0.0585, -0.8539, 0.1177]) tensor([0.5869, 0.1562, 0.0705, 0.1863]) -Greedy action tensor([ 1.2930, -0.8635, -0.0290, 0.2252]) tensor([0.5794, 0.0670, 0.1544, 0.1992]) -Greedy action tensor([ 1.7966, -1.2286, 0.1271, 0.9135]) tensor([0.6059, 0.0294, 0.1141, 0.2505]) -Greedy action tensor([ 1.3587, -0.5069, -0.1358, 0.3571]) tensor([0.5726, 0.0886, 0.1285, 0.2103]) -Greedy action tensor([ 2.3546, 0.5633, -0.0108, 0.4655]) tensor([0.7083, 0.1181, 0.0665, 0.1071]) -Greedy action tensor([ 1.1912, -0.0668, -0.0399, -0.0383]) tensor([0.5351, 0.1521, 0.1562, 0.1565]) -Greedy action tensor([ 2.3385, -1.2273, -0.2762, 0.4712]) tensor([0.7962, 0.0225, 0.0583, 0.1230]) -Greedy action tensor([ 1.7248, -0.3728, -0.6736, 0.1617]) tensor([0.7027, 0.0863, 0.0639, 0.1472]) -Greedy action tensor([ 1.4619, 0.4467, -0.5714, 0.1422]) tensor([0.5680, 0.2058, 0.0744, 0.1518]) -Greedy action tensor([ 1.2738, -0.2385, -0.6163, 0.0993]) tensor([0.5951, 0.1312, 0.0899, 0.1839]) -Greedy action tensor([ 1.9631, -1.4445, -0.3841, 0.4272]) tensor([0.7440, 0.0246, 0.0712, 0.1602]) -Greedy action tensor([ 1.4171, -0.5658, -0.6335, 0.0414]) tensor([0.6583, 0.0906, 0.0847, 0.1663]) -Greedy action tensor([ 1.6874, -0.6354, -0.4139, -0.0904]) tensor([0.7198, 0.0705, 0.0880, 0.1217]) -Greedy action tensor([ 1.2877, -0.5325, -0.2394, 0.0758]) tensor([0.5964, 0.0966, 0.1295, 0.1775]) -Greedy action tensor([ 0.8930, -0.2376, -0.3666, 0.3084]) tensor([0.4621, 0.1492, 0.1311, 0.2575]) -Greedy action tensor([ 2.0409, 0.5629, -0.0797, 0.0867]) tensor([0.6713, 0.1531, 0.0805, 0.0951]) -Greedy action tensor([ 1.1955, -0.4923, -0.6308, 0.0544]) tensor([0.6004, 0.1110, 0.0967, 0.1918]) -Greedy action tensor([ 1.1093, -0.4351, -0.2992, 0.3028]) tensor([0.5251, 0.1121, 0.1284, 0.2344]) -Greedy action tensor([ 1.7134, -0.1702, -0.5525, 0.4552]) tensor([0.6494, 0.0987, 0.0674, 0.1845]) -Greedy action tensor([ 1.5858, -0.1330, -1.6206, 0.0930]) tensor([0.6923, 0.1241, 0.0280, 0.1556]) -Greedy action tensor([ 1.7042, -0.4938, -0.3753, 0.5373]) tensor([0.6463, 0.0718, 0.0808, 0.2012]) -Greedy action tensor([ 1.0099, -0.4535, -0.4214, -0.0148]) tensor([0.5466, 0.1265, 0.1306, 0.1962]) -Greedy action tensor([ 1.6719, -0.5647, -0.8705, 0.3172]) tensor([0.6928, 0.0740, 0.0545, 0.1787]) -Greedy action tensor([ 1.1188, -0.5663, -0.0695, 0.4117]) tensor([0.5042, 0.0935, 0.1536, 0.2486]) -Greedy action tensor([ 1.2978, -0.7286, -0.3036, 0.2654]) tensor([0.5919, 0.0780, 0.1193, 0.2108]) -Greedy action tensor([ 1.9874, -0.1017, -0.6296, 0.4386]) tensor([0.7096, 0.0878, 0.0518, 0.1508]) -Greedy action tensor([ 1.0539, -0.2560, -0.3519, 0.5642]) tensor([0.4700, 0.1268, 0.1152, 0.2880]) -Greedy action tensor([ 1.2755, -0.3914, -0.5860, 0.1919]) tensor([0.5943, 0.1122, 0.0924, 0.2011]) -Greedy action tensor([ 1.6209, -0.4563, -0.2475, 0.2567]) tensor([0.6514, 0.0816, 0.1006, 0.1665]) -Greedy action tensor([ 1.5468, -0.1704, -0.4742, 0.0718]) tensor([0.6490, 0.1165, 0.0860, 0.1485]) -Greedy action tensor([ 1.1934, -0.4391, -0.1651, 0.1853]) tensor([0.5502, 0.1075, 0.1414, 0.2008]) -Greedy action tensor([ 1.5241, -0.4052, -0.7405, -0.1051]) tensor([0.6919, 0.1005, 0.0719, 0.1357]) -Greedy action tensor([ 1.6983, -0.6889, -0.3531, 0.0255]) tensor([0.7102, 0.0653, 0.0913, 0.1333]) -Greedy action tensor([ 1.9810, -1.1164, -0.4826, 0.3073]) tensor([0.7588, 0.0343, 0.0646, 0.1423]) -Greedy action tensor([ 2.0159, -1.0428, -0.5077, 0.7849]) tensor([0.7047, 0.0331, 0.0565, 0.2058]) -Greedy action tensor([ 1.3158, -0.2920, -0.6217, 0.1761]) tensor([0.6009, 0.1204, 0.0866, 0.1922]) -Greedy action tensor([ 2.0714, -1.4177, -0.1538, 0.5289]) tensor([0.7394, 0.0226, 0.0799, 0.1581]) -Greedy action tensor([ 1.7835, -0.9149, -0.0966, 0.5017]) tensor([0.6678, 0.0450, 0.1019, 0.1853]) -Greedy action tensor([ 1.9805, -0.8246, -0.5655, 0.6055]) tensor([0.7185, 0.0435, 0.0563, 0.1817]) -Greedy action tensor([ 1.1297, -0.7314, 0.0186, 0.5802]) tensor([0.4850, 0.0754, 0.1597, 0.2799]) -Greedy action tensor([ 0.7192, 0.1538, -0.1468, -0.3599]) tensor([0.4294, 0.2440, 0.1806, 0.1460]) -Greedy action tensor([ 1.5784, -0.1758, -0.7761, 0.1962]) tensor([0.6583, 0.1139, 0.0625, 0.1652]) -Greedy action tensor([ 1.1577, -0.2805, -0.4153, 0.1451]) tensor([0.5531, 0.1313, 0.1147, 0.2009]) -Greedy action tensor([ 0.7346, -1.8753, 0.9383, -0.2536]) tensor([0.3743, 0.0275, 0.4589, 0.1393]) -Greedy action tensor([-0.2187, -1.4877, 0.4365, -0.8803]) tensor([0.2686, 0.0755, 0.5172, 0.1386]) -Greedy action tensor([ 0.0203, 0.0598, -0.0951, -0.8263]) tensor([0.2976, 0.3096, 0.2652, 0.1276]) -Greedy action tensor([ 0.5711, 0.0644, -0.1917, -0.8815]) tensor([0.4343, 0.2616, 0.2025, 0.1016]) -Greedy action tensor([-0.4698, -0.0977, -0.0779, -0.5279]) tensor([0.2052, 0.2976, 0.3036, 0.1936]) -Greedy action tensor([ 0.6947, -2.2755, 0.0946, 0.2455]) tensor([0.4468, 0.0229, 0.2452, 0.2851]) -Greedy action tensor([-0.8078, -1.1682, -0.0851, -0.9672]) tensor([0.2169, 0.1513, 0.4469, 0.1849]) -Greedy action tensor([-0.6540, -1.2515, -0.6101, -0.0250]) tensor([0.2237, 0.1231, 0.2337, 0.4196]) -Greedy action tensor([-0.6619, -0.8367, 0.2393, -0.0083]) tensor([0.1606, 0.1349, 0.3956, 0.3088]) -Greedy action tensor([ 0.0497, -1.2410, -0.2169, -0.1723]) tensor([0.3519, 0.0968, 0.2695, 0.2818]) -Greedy action tensor([-1.0127, -0.6397, -0.7667, -0.7822]) tensor([0.2004, 0.2910, 0.2563, 0.2523]) -Greedy action tensor([ 1.0069, -0.9484, -0.1321, 0.1119]) tensor([0.5347, 0.0757, 0.1712, 0.2185]) -Greedy action tensor([ 0.2263, -0.4742, 0.2292, -0.4030]) tensor([0.3298, 0.1637, 0.3307, 0.1758]) -Greedy action tensor([ 0.8071, -0.6502, 0.3892, 0.0059]) tensor([0.4273, 0.0995, 0.2814, 0.1918]) -Greedy action tensor([-0.9799, -0.7830, -0.1338, -1.5088]) tensor([0.1947, 0.2370, 0.4536, 0.1147]) -Greedy action tensor([ 0.8280, 0.0297, 0.0415, -0.1696]) tensor([0.4397, 0.1979, 0.2003, 0.1621]) -Greedy action tensor([ 0.2530, -1.5944, 0.7861, 0.1464]) tensor([0.2659, 0.0419, 0.4532, 0.2390]) -Greedy action tensor([ 1.2376, -1.8705, 0.7037, 0.7094]) tensor([0.4503, 0.0201, 0.2640, 0.2655]) -Greedy action tensor([-0.0954, -1.0009, -1.0251, -0.9310]) tensor([0.4479, 0.1811, 0.1768, 0.1942]) -Greedy action tensor([ 1.4010, -1.6508, 0.6414, -0.2606]) tensor([0.5865, 0.0277, 0.2744, 0.1113]) -Greedy action tensor([ 0.5671, -1.0272, -0.7170, 0.0849]) tensor([0.4768, 0.0968, 0.1320, 0.2944]) -Greedy action tensor([ 0.2060, -0.4403, -0.8118, -0.5144]) tensor([0.4216, 0.2209, 0.1524, 0.2051]) -Greedy action tensor([ 0.7734, 0.2540, -0.0547, -0.3945]) tensor([0.4268, 0.2539, 0.1865, 0.1328]) -Greedy action tensor([-0.4838, 0.1616, -0.0951, -0.8511]) tensor([0.1971, 0.3758, 0.2907, 0.1365]) -Greedy action tensor([-0.3979, -0.8457, -0.8053, -0.3183]) tensor([0.2952, 0.1887, 0.1964, 0.3197]) -Greedy action tensor([ 1.1784, -0.4737, 0.5559, 1.7333]) tensor([0.2882, 0.0552, 0.1546, 0.5019]) -Greedy action tensor([-0.2327, -0.7044, -0.0836, 0.5034]) tensor([0.2052, 0.1280, 0.2382, 0.4285]) -Greedy action tensor([ 1.4860, -1.5187, 0.9231, 0.1061]) tensor([0.5346, 0.0265, 0.3045, 0.1345]) -Greedy action tensor([ 0.5504, 0.2971, -0.5343, -0.3898]) tensor([0.3992, 0.3099, 0.1349, 0.1559]) -Greedy action tensor([-0.6778, -0.5798, -0.3308, -0.9096]) tensor([0.2320, 0.2559, 0.3282, 0.1840]) -Greedy action tensor([ 0.1629, 0.5030, 0.0756, -0.3104]) tensor([0.2535, 0.3562, 0.2323, 0.1579]) -Greedy action tensor([-0.6188, -0.5866, -0.5640, -0.8969]) tensor([0.2600, 0.2685, 0.2746, 0.1969]) -Greedy action tensor([-0.7781, -0.1880, 0.6867, -0.6955]) tensor([0.1217, 0.2196, 0.5266, 0.1322]) -Greedy action tensor([ 0.4671, -0.5201, -0.7267, -0.4197]) tensor([0.4790, 0.1785, 0.1452, 0.1973]) -Greedy action tensor([-0.7442, -0.6459, 0.1234, -0.3985]) tensor([0.1696, 0.1871, 0.4038, 0.2396]) -Greedy action tensor([-0.6290, -0.5294, -0.8273, 0.0624]) tensor([0.2032, 0.2245, 0.1666, 0.4057]) -Greedy action tensor([-0.1509, -0.5768, 0.1144, -0.6080]) tensor([0.2785, 0.1819, 0.3632, 0.1764]) -Greedy action tensor([-0.1781, -0.1943, -0.5191, 0.8392]) tensor([0.1831, 0.1802, 0.1302, 0.5065]) -Greedy action tensor([ 0.2659, -1.7436, -0.4620, -0.3505]) tensor([0.4636, 0.0622, 0.2239, 0.2503]) -Greedy action tensor([ 0.3478, -1.4998, -0.9174, 0.6223]) tensor([0.3629, 0.0572, 0.1024, 0.4775]) -Greedy action tensor([-0.3686, -0.0964, 0.1750, -0.0063]) tensor([0.1828, 0.2399, 0.3148, 0.2625]) -Greedy action tensor([-1.0919, -1.3237, 1.2116, -0.8289]) tensor([0.0763, 0.0605, 0.7639, 0.0993]) -Greedy action tensor([-0.3071, -0.4812, -0.5371, -0.0542]) tensor([0.2549, 0.2142, 0.2025, 0.3283]) -Greedy action tensor([-0.3464, -0.0259, 0.0243, -0.7851]) tensor([0.2237, 0.3081, 0.3240, 0.1442]) -Greedy action tensor([ 0.9304, -0.7555, -0.5379, 0.7621]) tensor([0.4424, 0.0820, 0.1019, 0.3738]) -Greedy action tensor([ 0.7186, 0.4199, -0.2652, 0.0520]) tensor([0.3803, 0.2822, 0.1422, 0.1953]) -Greedy action tensor([ 0.0587, -1.3724, 0.5914, -0.6967]) tensor([0.2930, 0.0700, 0.4992, 0.1377]) -Greedy action tensor([ 1.3275, -1.1824, -0.1789, 1.3382]) tensor([0.4322, 0.0351, 0.0958, 0.4368]) -Greedy action tensor([1.2437, 0.1454, 0.1925, 0.1747]) tensor([0.4935, 0.1646, 0.1725, 0.1694]) -Greedy action tensor([ 0.7058, -1.2781, -0.4322, -0.0933]) tensor([0.5242, 0.0721, 0.1680, 0.2357]) -Greedy action tensor([ 0.2539, -0.9183, -0.3134, 0.7360]) tensor([0.2860, 0.0886, 0.1622, 0.4632]) -Greedy action tensor([-1.0858, 0.6896, 0.3790, -0.8423]) tensor([0.0800, 0.4720, 0.3460, 0.1020]) -Greedy action tensor([ 1.2045, -0.8939, 0.2146, 0.8105]) tensor([0.4611, 0.0566, 0.1714, 0.3110]) -Greedy action tensor([-0.3539, -0.2148, -0.1363, 0.1242]) tensor([0.1998, 0.2296, 0.2484, 0.3223]) -Greedy action tensor([-0.6029, -2.1423, 0.4232, 0.1798]) tensor([0.1615, 0.0346, 0.4506, 0.3533]) -Greedy action tensor([ 0.5071, -0.4428, 0.5339, -0.5944]) tensor([0.3641, 0.1408, 0.3740, 0.1210]) -Greedy action tensor([-0.1008, -0.1541, -1.1014, -0.0447]) tensor([0.2964, 0.2811, 0.1090, 0.3135]) -Greedy action tensor([ 0.4900, -0.3988, -0.9961, -0.4778]) tensor([0.4957, 0.2038, 0.1122, 0.1883]) -Greedy action tensor([ 1.1384, -0.4801, 0.0769, 1.1602]) tensor([0.3897, 0.0772, 0.1348, 0.3983]) -Greedy action tensor([0.7804, 0.4475, 0.2830, 0.0950]) tensor([0.3535, 0.2534, 0.2150, 0.1781]) -Greedy action tensor([-1.7175, -1.0357, -0.9271, 0.4967]) tensor([0.0698, 0.1379, 0.1538, 0.6385]) -Greedy action tensor([-0.7628, -0.4364, 0.3012, -0.7201]) tensor([0.1580, 0.2190, 0.4580, 0.1649]) -Greedy action tensor([-0.5609, -0.5427, -0.0532, -0.3930]) tensor([0.2056, 0.2094, 0.3417, 0.2433]) -Greedy action tensor([ 1.2240, -0.7353, 0.2423, 0.5713]) tensor([0.4911, 0.0692, 0.1840, 0.2557]) -Greedy action tensor([-0.3467, -0.8239, 0.0674, 0.3221]) tensor([0.1966, 0.1220, 0.2975, 0.3838]) -Greedy action tensor([-1.2672, -1.2885, 0.8481, -1.0865]) tensor([0.0872, 0.0854, 0.7230, 0.1045]) -Greedy action tensor([ 0.1666, -1.2532, 0.3631, 0.2605]) tensor([0.2811, 0.0680, 0.3421, 0.3088]) -Greedy action tensor([ 0.3874, -0.9427, 0.3876, -0.0570]) tensor([0.3441, 0.0910, 0.3442, 0.2207]) -Greedy action tensor([ 1.5234, -0.5203, 0.4218, 0.4259]) tensor([0.5569, 0.0721, 0.1851, 0.1858]) -Greedy action tensor([ 0.1271, -0.4416, -0.4938, -0.5808]) tensor([0.3852, 0.2181, 0.2070, 0.1898]) -Greedy action tensor([ 0.5926, -1.3758, -0.7512, -0.1492]) tensor([0.5328, 0.0744, 0.1390, 0.2538]) -Greedy action tensor([ 0.2138, -0.4973, 0.8168, 0.6424]) tensor([0.2060, 0.1012, 0.3765, 0.3163]) -Greedy action tensor([-0.4083, -1.3192, 0.0887, -0.4556]) tensor([0.2500, 0.1005, 0.4110, 0.2385]) -Greedy action tensor([ 0.1526, -1.3798, -0.5250, 0.4094]) tensor([0.3315, 0.0716, 0.1683, 0.4286]) -Greedy action tensor([ 1.0285, -0.5283, 0.0532, -0.3622]) tensor([0.5444, 0.1148, 0.2053, 0.1355]) -Greedy action tensor([ 2.1817, -1.1389, 1.2329, 1.1947]) tensor([0.5568, 0.0201, 0.2156, 0.2075]) -Greedy action tensor([ 0.1948, 0.6293, 0.4256, -0.7643]) tensor([0.2388, 0.3688, 0.3008, 0.0915]) -Greedy action tensor([ 1.7286, -0.5517, 0.2091, 0.0077]) tensor([0.6667, 0.0682, 0.1459, 0.1193]) -Greedy action tensor([-0.5915, -0.7084, -0.8368, -0.0502]) tensor([0.2278, 0.2027, 0.1782, 0.3914]) -Greedy action tensor([ 0.2172, -0.1126, -0.8936, 0.0158]) tensor([0.3489, 0.2509, 0.1149, 0.2853]) -Greedy action tensor([ 1.1498, -1.1202, 0.4585, 0.7995]) tensor([0.4331, 0.0447, 0.2170, 0.3051]) -Greedy action tensor([-0.7012, -0.4960, -0.2040, -0.2837]) tensor([0.1855, 0.2278, 0.3050, 0.2817]) -Greedy action tensor([-1.8855, -0.3458, 0.6207, -0.1638]) tensor([0.0425, 0.1983, 0.5213, 0.2379]) -Greedy action tensor([-1.9078, -0.4342, 0.6485, -0.1622]) tensor([0.0417, 0.1820, 0.5374, 0.2389]) -Greedy action tensor([-1.9088, -0.3970, 0.6504, -0.1599]) tensor([0.0413, 0.1873, 0.5339, 0.2374]) -Greedy action tensor([ 0.2892, -0.5184, 0.1034, 0.3962]) tensor([0.2951, 0.1316, 0.2450, 0.3284]) -Greedy action tensor([-1.8994, -0.2871, 0.6289, -0.1804]) tensor([0.0414, 0.2078, 0.5195, 0.2312]) -Greedy action tensor([-0.8419, 0.8729, 0.0640, 0.2184]) tensor([0.0839, 0.4662, 0.2076, 0.2423]) -Greedy action tensor([-1.7723, -0.2334, 0.5516, -0.1040]) tensor([0.0472, 0.2200, 0.4823, 0.2504]) -Greedy action tensor([-1.7305, -0.4719, 0.7430, 0.1298]) tensor([0.0438, 0.1543, 0.5201, 0.2817]) -Greedy action tensor([-1.7853, -0.2218, 0.5478, -0.1039]) tensor([0.0466, 0.2226, 0.4805, 0.2504]) -Greedy action tensor([-1.5502, 0.3326, 0.4090, 0.1230]) tensor([0.0500, 0.3287, 0.3548, 0.2665]) -Greedy action tensor([-1.8511, -0.3273, 0.6060, -0.1373]) tensor([0.0438, 0.2012, 0.5116, 0.2433]) -Greedy action tensor([-0.8175, 0.0401, 0.2204, -0.1576]) tensor([0.1232, 0.2905, 0.3479, 0.2384]) -Greedy action tensor([-1.8186, -0.3223, 0.6252, -0.2241]) tensor([0.0456, 0.2038, 0.5257, 0.2248]) -Greedy action tensor([-1.8946, -0.4479, 0.6492, -0.1466]) tensor([0.0422, 0.1791, 0.5366, 0.2421]) -Greedy action tensor([-1.9127, -0.3841, 0.6486, -0.1632]) tensor([0.0411, 0.1897, 0.5327, 0.2365]) -Greedy action tensor([-1.8390, -0.2534, 0.5916, -0.1322]) tensor([0.0439, 0.2145, 0.4994, 0.2421]) -Greedy action tensor([-1.9174, -0.4395, 0.6545, -0.1676]) tensor([0.0413, 0.1809, 0.5403, 0.2375]) -Greedy action tensor([-1.1133, -0.4453, 0.3359, -0.0945]) tensor([0.1002, 0.1954, 0.4268, 0.2775]) -Greedy action tensor([-1.9241, -0.4234, 0.6553, -0.1695]) tensor([0.0409, 0.1834, 0.5393, 0.2364]) -Greedy action tensor([-0.5821, 0.6868, 0.0501, 0.0379]) tensor([0.1205, 0.4287, 0.2268, 0.2240]) -Greedy action tensor([-1.6825, -0.2205, 0.5185, -0.0931]) tensor([0.0520, 0.2241, 0.4693, 0.2546]) -Greedy action tensor([-1.8893, -0.3836, 0.6367, -0.1838]) tensor([0.0425, 0.1917, 0.5317, 0.2341]) -Greedy action tensor([-1.4942, -0.5494, 0.4823, 0.0155]) tensor([0.0653, 0.1680, 0.4713, 0.2955]) -Greedy action tensor([-1.9324, -0.4459, 0.6593, -0.1752]) tensor([0.0407, 0.1799, 0.5434, 0.2359]) -Greedy action tensor([-1.8544, -0.3818, 0.6171, -0.1379]) tensor([0.0439, 0.1915, 0.5201, 0.2444]) -Greedy action tensor([-1.8283, -0.4579, 0.6092, -0.1253]) tensor([0.0457, 0.1800, 0.5233, 0.2510]) -Greedy action tensor([-1.9130, -0.4443, 0.6531, -0.1648]) tensor([0.0415, 0.1802, 0.5400, 0.2383]) -Greedy action tensor([-1.8929, -0.4596, 0.6406, -0.1465]) tensor([0.0425, 0.1782, 0.5355, 0.2438]) -Greedy action tensor([-1.1192, -0.4607, 0.3561, -0.2200]) tensor([0.1024, 0.1979, 0.4479, 0.2518]) -Greedy action tensor([-1.9467, -0.4500, 0.6679, -0.1814]) tensor([0.0400, 0.1789, 0.5471, 0.2340]) -Greedy action tensor([-1.6768, 0.0147, 0.4670, -0.0544]) tensor([0.0499, 0.2710, 0.4261, 0.2529]) -Greedy action tensor([-1.7684, -0.4001, 0.6429, -0.0680]) tensor([0.0464, 0.1823, 0.5173, 0.2541]) -Greedy action tensor([-1.9121, -0.3776, 0.6463, -0.1621]) tensor([0.0411, 0.1908, 0.5313, 0.2367]) -Greedy action tensor([ 0.0431, 1.1545, -0.0154, 0.3731]) tensor([0.1569, 0.4768, 0.1480, 0.2183]) -Greedy action tensor([-1.6300, -0.4946, 0.5340, 0.0048]) tensor([0.0557, 0.1734, 0.4851, 0.2858]) -Greedy action tensor([-1.5947, 0.0895, 0.5262, 0.0461]) tensor([0.0503, 0.2709, 0.4193, 0.2594]) -Greedy action tensor([-1.9348, -0.4051, 0.6549, -0.1725]) tensor([0.0404, 0.1864, 0.5380, 0.2352]) -Greedy action tensor([-1.9247, -0.4486, 0.6617, -0.1703]) tensor([0.0409, 0.1791, 0.5435, 0.2365]) -Greedy action tensor([-1.8996, -0.4551, 0.6493, -0.1593]) tensor([0.0421, 0.1787, 0.5391, 0.2401]) -Greedy action tensor([0.2297, 1.0847, 0.1458, 0.7658]) tensor([0.1672, 0.3932, 0.1538, 0.2858]) -Greedy action tensor([-1.9421, -0.4388, 0.6640, -0.1782]) tensor([0.0402, 0.1808, 0.5445, 0.2346]) -Greedy action tensor([-1.9370, -0.4502, 0.6691, -0.1738]) tensor([0.0403, 0.1783, 0.5462, 0.2351]) -Greedy action tensor([-1.8577, -0.4527, 0.6247, -0.1420]) tensor([0.0442, 0.1803, 0.5295, 0.2460]) -Greedy action tensor([-1.8950, -0.4299, 0.6394, -0.1588]) tensor([0.0423, 0.1833, 0.5340, 0.2404]) -Greedy action tensor([-1.6169, -0.4897, 0.5146, 0.0103]) tensor([0.0568, 0.1754, 0.4787, 0.2891]) -Greedy action tensor([-1.3202, 0.0302, 0.3343, -0.0217]) tensor([0.0727, 0.2806, 0.3803, 0.2664]) -Greedy action tensor([-1.8528, -0.4769, 0.6210, -0.1484]) tensor([0.0448, 0.1773, 0.5316, 0.2463]) -Greedy action tensor([-0.8692, 0.8492, 0.0676, 0.1862]) tensor([0.0833, 0.4646, 0.2126, 0.2394]) -Greedy action tensor([-1.7953, -0.4885, 0.5929, -0.1287]) tensor([0.0479, 0.1769, 0.5217, 0.2535]) -Greedy action tensor([-1.7536, 0.2970, 0.4747, -0.1149]) tensor([0.0431, 0.3349, 0.4001, 0.2219]) -Greedy action tensor([-1.3799, -0.2532, 0.4019, -0.0528]) tensor([0.0725, 0.2237, 0.4306, 0.2733]) -Greedy action tensor([-1.9175, -0.4243, 0.6637, -0.1483]) tensor([0.0408, 0.1815, 0.5386, 0.2391]) -Greedy action tensor([-1.6221, -0.1696, 0.5706, 0.0424]) tensor([0.0512, 0.2190, 0.4591, 0.2707]) -Greedy action tensor([-1.3628, 0.6318, 0.2449, 0.1013]) tensor([0.0566, 0.4161, 0.2826, 0.2448]) -Greedy action tensor([-1.9409, -0.4447, 0.6650, -0.1776]) tensor([0.0403, 0.1797, 0.5452, 0.2348]) -Greedy action tensor([-0.9462, -0.2670, 0.2476, -0.0090]) tensor([0.1133, 0.2235, 0.3739, 0.2893]) -Greedy action tensor([-1.8697, -0.3684, 0.6588, -0.1295]) tensor([0.0422, 0.1892, 0.5284, 0.2402]) -Greedy action tensor([-1.6012, -0.4794, 0.5005, -0.0016]) tensor([0.0581, 0.1785, 0.4755, 0.2878]) -Greedy action tensor([-0.8211, 0.7052, 0.1233, 0.0203]) tensor([0.0953, 0.4385, 0.2451, 0.2211]) -Greedy action tensor([-1.2017, 0.6622, 0.2095, -0.2834]) tensor([0.0712, 0.4588, 0.2918, 0.1782]) -Greedy action tensor([-1.9272, -0.4331, 0.6592, -0.1701]) tensor([0.0408, 0.1816, 0.5414, 0.2362]) -Greedy action tensor([-1.8225, -0.2961, 0.6141, -0.1105]) tensor([0.0443, 0.2038, 0.5065, 0.2454]) -Greedy action tensor([-1.8547, -0.4196, 0.6191, -0.1403]) tensor([0.0442, 0.1857, 0.5246, 0.2455]) -Greedy action tensor([-1.7962, -0.3556, 0.5953, -0.0828]) tensor([0.0461, 0.1946, 0.5037, 0.2556]) -Greedy action tensor([-1.9234, -0.4421, 0.6504, -0.1732]) tensor([0.0412, 0.1812, 0.5404, 0.2372]) -Greedy action tensor([-1.6093, -0.4636, 0.5049, -0.0039]) tensor([0.0574, 0.1806, 0.4758, 0.2861]) -Greedy action tensor([-0.4303, 0.1997, 0.5354, 0.7386]) tensor([0.1146, 0.2153, 0.3011, 0.3690]) -Greedy action tensor([-1.8974, -0.4427, 0.6440, -0.1574]) tensor([0.0422, 0.1809, 0.5363, 0.2406]) -Greedy action tensor([-1.8453, -0.3429, 0.6280, -0.1249]) tensor([0.0436, 0.1958, 0.5170, 0.2435]) -Greedy action tensor([-1.4995, -0.1561, 0.6120, -0.6433]) tensor([0.0647, 0.2481, 0.5348, 0.1524]) -Greedy action tensor([-0.7430, -0.6421, 0.2585, -0.0794]) tensor([0.1477, 0.1634, 0.4021, 0.2868]) -Greedy action tensor([-1.5096, -0.2020, 0.4391, -0.0709]) tensor([0.0628, 0.2321, 0.4406, 0.2646]) -Greedy action tensor([-1.9310, -0.4346, 0.6668, -0.1697]) tensor([0.0405, 0.1807, 0.5435, 0.2354]) -Greedy action tensor([-1.9154, -0.3335, 0.6379, -0.1552]) tensor([0.0408, 0.1983, 0.5239, 0.2370]) -Greedy action tensor([-1.9254, -0.3715, 0.6442, -0.1703]) tensor([0.0407, 0.1925, 0.5315, 0.2354]) -Greedy action tensor([-1.8797, -0.4536, 0.6383, -0.1494]) tensor([0.0431, 0.1794, 0.5344, 0.2431]) -Greedy action tensor([-1.8166, -0.4437, 0.6140, -0.1127]) tensor([0.0459, 0.1810, 0.5212, 0.2520]) -Greedy action tensor([-1.1244, -0.4573, 0.3739, -0.0053]) tensor([0.0954, 0.1859, 0.4267, 0.2920]) -Greedy action tensor([-1.2479, -0.2351, 0.3734, 0.2016]) tensor([0.0765, 0.2106, 0.3870, 0.3259]) -Greedy action tensor([-1.0228, -0.4864, 0.3050, -0.0050]) tensor([0.1081, 0.1848, 0.4079, 0.2992]) -Greedy action tensor([-1.9163, -0.4443, 0.6573, -0.1634]) tensor([0.0413, 0.1798, 0.5409, 0.2381]) -Greedy action tensor([ 1.0881, -0.9567, 0.3496, -0.7899]) tensor([0.5682, 0.0735, 0.2715, 0.0869]) -Greedy action tensor([ 0.7014, -0.5721, 0.1393, -0.7699]) tensor([0.4809, 0.1346, 0.2741, 0.1104]) -Greedy action tensor([ 0.7375, -0.2709, -0.1908, -0.1402]) tensor([0.4596, 0.1677, 0.1816, 0.1911]) -Greedy action tensor([ 0.5302, -0.4118, -0.0672, -0.4802]) tensor([0.4340, 0.1692, 0.2388, 0.1580]) -Greedy action tensor([ 1.0678, -0.4960, 0.1055, -0.7770]) tensor([0.5716, 0.1197, 0.2184, 0.0903]) -Greedy action tensor([ 0.5499, -0.5120, -0.1387, -0.3813]) tensor([0.4460, 0.1542, 0.2240, 0.1758]) -Greedy action tensor([ 0.4213, -0.0399, 0.2280, -0.0584]) tensor([0.3253, 0.2051, 0.2681, 0.2014]) -Greedy action tensor([ 0.9238, -0.6403, -0.0405, -0.5158]) tensor([0.5472, 0.1145, 0.2086, 0.1297]) -Greedy action tensor([ 0.6755, 0.2026, -0.1309, 0.1365]) tensor([0.3769, 0.2349, 0.1683, 0.2199]) -Greedy action tensor([ 0.7375, -0.4288, -0.0397, -0.2616]) tensor([0.4674, 0.1456, 0.2149, 0.1721]) -Greedy action tensor([ 0.7625, -0.4866, 0.1497, -0.2470]) tensor([0.4560, 0.1308, 0.2471, 0.1662]) -Greedy action tensor([ 0.7072, -0.4764, 0.0451, -0.3634]) tensor([0.4620, 0.1414, 0.2382, 0.1584]) -Greedy action tensor([ 0.8806, -0.7539, 0.0330, -0.4719]) tensor([0.5313, 0.1036, 0.2276, 0.1374]) -Greedy action tensor([ 0.9280, -0.8161, 0.1476, -0.7382]) tensor([0.5489, 0.0959, 0.2515, 0.1037]) -Greedy action tensor([ 0.9316, -1.0945, 0.1298, -0.6026]) tensor([0.5568, 0.0734, 0.2497, 0.1201]) -Greedy action tensor([ 0.5356, -0.1787, 0.0786, -0.0315]) tensor([0.3718, 0.1820, 0.2354, 0.2108]) -Greedy action tensor([ 1.0283, -0.4198, -0.1418, -0.2958]) tensor([0.5520, 0.1297, 0.1713, 0.1469]) -Greedy action tensor([ 0.8195, -0.7408, 0.1466, -0.8400]) tensor([0.5234, 0.1099, 0.2671, 0.0996]) -Greedy action tensor([0.0617, 0.5507, 0.0109, 0.0845]) tensor([0.2172, 0.3542, 0.2064, 0.2222]) -Greedy action tensor([ 0.7626, -0.4538, -0.0284, -0.2355]) tensor([0.4721, 0.1399, 0.2140, 0.1740]) -Greedy action tensor([ 0.7185, -0.5072, -0.1167, -0.1976]) tensor([0.4701, 0.1380, 0.2039, 0.1881]) -Greedy action tensor([ 1.1828, -0.8128, 0.0290, -0.6874]) tensor([0.6229, 0.0847, 0.1965, 0.0960]) -Greedy action tensor([ 0.9686, -0.5087, 0.0439, -0.2040]) tensor([0.5169, 0.1180, 0.2050, 0.1600]) -Greedy action tensor([ 1.0175, -0.3667, -0.1941, -0.4802]) tensor([0.5644, 0.1414, 0.1680, 0.1262]) -Greedy action tensor([ 0.7036, -0.3608, 0.0613, -0.1623]) tensor([0.4364, 0.1505, 0.2296, 0.1836]) -Greedy action tensor([ 1.1919, -0.7193, -0.0173, -0.6001]) tensor([0.6200, 0.0917, 0.1850, 0.1033]) -Greedy action tensor([ 0.4377, -0.0005, 0.0239, -0.0627]) tensor([0.3433, 0.2215, 0.2270, 0.2081]) -Greedy action tensor([ 0.8665, -0.6899, -0.1157, -0.2885]) tensor([0.5262, 0.1110, 0.1971, 0.1658]) -Greedy action tensor([ 0.7727, -0.3157, -0.0578, -0.1186]) tensor([0.4581, 0.1543, 0.1997, 0.1879]) -Greedy action tensor([ 0.6871, -0.6193, -0.1340, -0.1591]) tensor([0.4673, 0.1266, 0.2056, 0.2005]) -Greedy action tensor([ 0.7802, -0.5450, -0.0894, -0.2818]) tensor([0.4925, 0.1309, 0.2064, 0.1703]) -Greedy action tensor([ 0.9943, -0.4673, 0.0596, -0.4861]) tensor([0.5399, 0.1252, 0.2120, 0.1229]) -Greedy action tensor([ 0.4801, -0.0294, -0.1523, 0.1051]) tensor([0.3547, 0.2131, 0.1884, 0.2438]) -Greedy action tensor([ 0.7911, -0.5135, -0.0825, -0.2623]) tensor([0.4908, 0.1331, 0.2049, 0.1712]) -Greedy action tensor([ 0.8393, -0.4705, -0.1691, -0.1737]) tensor([0.5005, 0.1351, 0.1826, 0.1818]) -Greedy action tensor([ 0.9215, -0.3892, -0.0932, -0.4970]) tensor([0.5335, 0.1439, 0.1934, 0.1292]) -Greedy action tensor([ 0.8627, -0.4855, -0.0565, -0.4030]) tensor([0.5153, 0.1338, 0.2055, 0.1453]) -Greedy action tensor([ 0.9186, -0.5537, -0.0765, -0.3759]) tensor([0.5339, 0.1225, 0.1974, 0.1463]) -Greedy action tensor([ 1.2294, -1.1658, 0.0471, -0.8096]) tensor([0.6545, 0.0597, 0.2007, 0.0852]) -Greedy action tensor([ 0.6966, -0.9958, -0.1450, -0.5302]) tensor([0.5240, 0.0965, 0.2259, 0.1537]) -Greedy action tensor([ 0.8827, -0.4429, -0.0931, -0.2111]) tensor([0.5057, 0.1343, 0.1906, 0.1694]) -Greedy action tensor([ 0.5679, -0.4869, -0.1512, -0.3243]) tensor([0.4454, 0.1551, 0.2170, 0.1825]) -Greedy action tensor([ 0.6050, -0.0319, -0.0026, -0.1979]) tensor([0.3966, 0.2098, 0.2160, 0.1777]) -Greedy action tensor([ 0.8624, -0.7272, -0.0329, -0.1282]) tensor([0.5041, 0.1028, 0.2059, 0.1872]) -Greedy action tensor([ 0.9747, -0.7048, -0.0969, -0.5899]) tensor([0.5754, 0.1073, 0.1970, 0.1203]) -Greedy action tensor([ 0.4321, -0.0244, -0.1526, -0.2166]) tensor([0.3685, 0.2335, 0.2054, 0.1926]) -Greedy action tensor([ 0.3108, -0.2592, -0.0949, -0.0057]) tensor([0.3378, 0.1910, 0.2251, 0.2461]) -Greedy action tensor([ 0.8504, -0.4071, -0.0024, -0.1429]) tensor([0.4806, 0.1366, 0.2048, 0.1780]) -Greedy action tensor([ 0.4301, -0.2763, -0.1051, -0.1555]) tensor([0.3794, 0.1872, 0.2222, 0.2112]) -Greedy action tensor([ 0.8996, -0.4255, -0.1060, -0.2055]) tensor([0.5095, 0.1354, 0.1864, 0.1687]) -Greedy action tensor([ 0.4017, 0.0140, 0.0752, -0.1312]) tensor([0.3348, 0.2272, 0.2415, 0.1965]) -Greedy action tensor([ 1.2511, -0.5746, -0.0281, -0.9090]) tensor([0.6432, 0.1036, 0.1790, 0.0742]) -Greedy action tensor([ 1.2938, -0.9408, -0.1236, -0.6467]) tensor([0.6698, 0.0717, 0.1623, 0.0962]) -Greedy action tensor([ 0.7317, -0.8351, 0.1007, -0.3971]) tensor([0.4845, 0.1011, 0.2577, 0.1567]) -Greedy action tensor([ 0.9515, -0.0153, 0.0075, 0.0162]) tensor([0.4626, 0.1759, 0.1800, 0.1815]) -Greedy action tensor([ 0.9709, -0.5430, -0.1478, -0.3718]) tensor([0.5531, 0.1217, 0.1807, 0.1444]) -Greedy action tensor([ 0.8450, -0.3508, -0.2597, -0.1894]) tensor([0.5027, 0.1521, 0.1666, 0.1787]) -Greedy action tensor([ 0.8247, 0.1141, -0.1114, -0.3268]) tensor([0.4546, 0.2234, 0.1783, 0.1437]) -Greedy action tensor([ 1.0495, -0.3671, 0.0428, -0.3538]) tensor([0.5394, 0.1308, 0.1971, 0.1326]) -Greedy action tensor([ 0.5989, -0.3525, 0.0962, -0.3382]) tensor([0.4197, 0.1621, 0.2539, 0.1644]) -Greedy action tensor([ 0.7031, -0.5330, -0.0811, -0.0877]) tensor([0.4544, 0.1320, 0.2074, 0.2061]) -Greedy action tensor([ 0.6407, -0.4044, -0.0214, -0.4379]) tensor([0.4530, 0.1593, 0.2336, 0.1540]) -Greedy action tensor([ 0.6442, -0.1370, 0.0070, -0.0645]) tensor([0.4034, 0.1847, 0.2133, 0.1986]) -Greedy action tensor([ 0.9875, -0.9371, 0.1801, -0.4805]) tensor([0.5488, 0.0801, 0.2447, 0.1264]) -Greedy action tensor([ 0.1389, 0.1855, -0.0347, -0.7361]) tensor([0.3026, 0.3170, 0.2543, 0.1261]) -Greedy action tensor([ 0.4907, -0.0315, -0.0071, -0.3625]) tensor([0.3806, 0.2258, 0.2314, 0.1622]) -Greedy action tensor([ 1.1425, -0.7659, -0.0814, -0.7398]) tensor([0.6271, 0.0930, 0.1844, 0.0955]) -Greedy action tensor([ 0.9431, -0.8429, 0.0165, -0.4714]) tensor([0.5535, 0.0928, 0.2192, 0.1345]) -Greedy action tensor([ 0.5758, -0.0646, -0.0999, -0.0056]) tensor([0.3854, 0.2031, 0.1961, 0.2155]) -Greedy action tensor([ 0.4464, 0.1380, -0.0414, 0.0566]) tensor([0.3305, 0.2428, 0.2029, 0.2238]) -Greedy action tensor([ 0.9863, -0.7509, -0.0444, -0.2575]) tensor([0.5491, 0.0966, 0.1959, 0.1583]) -Greedy action tensor([ 0.9883, -0.9763, 0.0035, -0.4348]) tensor([0.5699, 0.0799, 0.2129, 0.1373]) -Greedy action tensor([ 0.6921, -0.2180, 0.1504, -0.0633]) tensor([0.4075, 0.1640, 0.2371, 0.1914]) -Greedy action tensor([ 0.9266, -0.7889, 0.0253, -0.4484]) tensor([0.5438, 0.0978, 0.2208, 0.1375]) -Greedy action tensor([ 1.0594, -0.5652, -0.1664, -0.2016]) tensor([0.5637, 0.1110, 0.1655, 0.1597]) -Greedy action tensor([0.8487, 0.1242, 0.0642, 0.0514]) tensor([0.4182, 0.2026, 0.1908, 0.1884]) -Greedy action tensor([ 1.1128, -0.6828, -0.0896, -0.4140]) tensor([0.5939, 0.0986, 0.1785, 0.1290]) -Greedy action tensor([ 1.1113, -0.5401, -0.1494, -0.3769]) tensor([0.5879, 0.1127, 0.1666, 0.1327]) -Greedy action tensor([ 1.1938, -0.7339, -0.0238, -0.5524]) tensor([0.6189, 0.0900, 0.1831, 0.1080]) -Greedy action tensor([ 0.6532, -0.3535, -0.0526, -0.0768]) tensor([0.4272, 0.1561, 0.2109, 0.2059]) -Greedy action tensor([ 1.1584, -0.5532, -0.0030, -0.8247]) tensor([0.6130, 0.1107, 0.1919, 0.0844]) -Greedy action tensor([ 0.7278, -0.3917, -0.1364, -0.2307]) tensor([0.4692, 0.1532, 0.1977, 0.1799]) -Greedy action tensor([ 1.6750, -0.5795, -0.1162, 0.1181]) tensor([0.6745, 0.0708, 0.1125, 0.1422]) -Greedy action tensor([ 0.9243, -0.4509, -0.3357, 0.2731]) tensor([0.4859, 0.1228, 0.1378, 0.2534]) -Greedy action tensor([ 1.2435, -0.4556, -0.2511, 0.5356]) tensor([0.5264, 0.0962, 0.1181, 0.2593]) -Greedy action tensor([ 1.7655, -0.8613, -0.6164, 0.9000]) tensor([0.6307, 0.0456, 0.0583, 0.2654]) -Greedy action tensor([ 1.2190, 0.0439, -1.2928, 0.4560]) tensor([0.5387, 0.1664, 0.0437, 0.2512]) -Greedy action tensor([ 1.2110, -0.0895, -0.8116, 0.0567]) tensor([0.5814, 0.1584, 0.0769, 0.1833]) -Greedy action tensor([ 1.2973, -0.3318, -0.0747, 0.3305]) tensor([0.5465, 0.1072, 0.1386, 0.2078]) -Greedy action tensor([ 1.6903, -0.1371, -0.4789, -0.4355]) tensor([0.7171, 0.1153, 0.0820, 0.0856]) -Greedy action tensor([ 1.3311, -0.2062, -0.8600, 0.3058]) tensor([0.5933, 0.1275, 0.0663, 0.2128]) -Greedy action tensor([ 2.1635, -0.4002, -0.5996, 0.4493]) tensor([0.7575, 0.0583, 0.0478, 0.1364]) -Greedy action tensor([ 2.0246, -1.2051, -0.0665, 0.5622]) tensor([0.7169, 0.0284, 0.0886, 0.1661]) -Greedy action tensor([ 1.6588, -0.9784, -0.0290, 0.8256]) tensor([0.5913, 0.0423, 0.1093, 0.2570]) -Greedy action tensor([ 1.7846, -0.9459, -0.4389, 0.3597]) tensor([0.7072, 0.0461, 0.0765, 0.1701]) -Greedy action tensor([ 1.3099, -0.5272, -0.2790, 0.1203]) tensor([0.5996, 0.0955, 0.1224, 0.1825]) -Greedy action tensor([ 1.2673, -0.2632, -0.3919, 0.0992]) tensor([0.5822, 0.1260, 0.1108, 0.1810]) -Greedy action tensor([ 1.2989, -0.2072, -1.0714, 0.2107]) tensor([0.6053, 0.1342, 0.0566, 0.2039]) -Greedy action tensor([ 1.4636, -0.3377, -0.1227, 0.1946]) tensor([0.6057, 0.1000, 0.1240, 0.1703]) -Greedy action tensor([ 1.3046, -0.3521, -0.3290, 0.0953]) tensor([0.5937, 0.1133, 0.1159, 0.1772]) -Greedy action tensor([ 1.6677, -0.4501, -0.7180, 0.3659]) tensor([0.6737, 0.0810, 0.0620, 0.1833]) -Greedy action tensor([ 1.2540, -0.3244, -0.1223, 0.1424]) tensor([0.5593, 0.1154, 0.1412, 0.1840]) -Greedy action tensor([ 1.6791, -0.8695, -0.5711, -0.0081]) tensor([0.7307, 0.0571, 0.0770, 0.1352]) -Greedy action tensor([ 0.5384, -0.2892, 0.1709, 0.2223]) tensor([0.3498, 0.1529, 0.2422, 0.2550]) -Greedy action tensor([ 1.2244, -0.2111, -0.6290, 0.0599]) tensor([0.5859, 0.1394, 0.0918, 0.1828]) -Greedy action tensor([ 1.2552, -0.4308, -0.3973, 0.2298]) tensor([0.5762, 0.1067, 0.1104, 0.2067]) -Greedy action tensor([ 1.7272, -0.7390, -0.5931, 0.1832]) tensor([0.7160, 0.0608, 0.0703, 0.1529]) -Greedy action tensor([ 1.1934, -0.6189, -0.5281, 0.2923]) tensor([0.5720, 0.0934, 0.1023, 0.2323]) -Greedy action tensor([ 1.7525, -0.4834, -0.5821, 0.4780]) tensor([0.6742, 0.0721, 0.0653, 0.1885]) -Greedy action tensor([ 2.3216, -0.1034, -0.1582, 0.6501]) tensor([0.7352, 0.0650, 0.0616, 0.1382]) -Greedy action tensor([ 1.1961, -0.2974, -0.2397, 0.2098]) tensor([0.5448, 0.1224, 0.1296, 0.2032]) -Greedy action tensor([ 1.3821, 0.2093, -0.3311, -0.2005]) tensor([0.5899, 0.1826, 0.1064, 0.1212]) -Greedy action tensor([ 2.4229, -1.5314, -0.2654, 0.8969]) tensor([0.7665, 0.0147, 0.0521, 0.1666]) -Greedy action tensor([ 1.7660, -0.4426, -0.4062, 0.2718]) tensor([0.6905, 0.0759, 0.0787, 0.1550]) -Greedy action tensor([ 0.7743, -0.3382, 0.1516, 0.1134]) tensor([0.4199, 0.1380, 0.2253, 0.2168]) -Greedy action tensor([ 1.0360, -0.5633, -0.6678, 0.4337]) tensor([0.5177, 0.1046, 0.0942, 0.2835]) -Greedy action tensor([ 0.7293, -0.4242, 0.0844, 0.0291]) tensor([0.4280, 0.1350, 0.2245, 0.2125]) -Greedy action tensor([ 0.9247, -0.2027, -0.4104, 0.0951]) tensor([0.4942, 0.1601, 0.1301, 0.2156]) -Greedy action tensor([ 1.2449, -0.5464, -0.3431, 0.2136]) tensor([0.5788, 0.0965, 0.1183, 0.2064]) -Greedy action tensor([ 2.1795, -0.7721, -0.5984, 0.1686]) tensor([0.8011, 0.0419, 0.0498, 0.1072]) -Greedy action tensor([ 1.1297, -0.3853, -0.4017, 0.6532]) tensor([0.4861, 0.1069, 0.1051, 0.3019]) -Greedy action tensor([ 1.6179, -0.7583, -0.3823, 0.2114]) tensor([0.6788, 0.0631, 0.0919, 0.1663]) -Greedy action tensor([ 1.5615, -0.2558, -0.5519, 0.1549]) tensor([0.6543, 0.1063, 0.0791, 0.1603]) -Greedy action tensor([ 1.0741, -0.3313, -0.4056, 0.1798]) tensor([0.5314, 0.1303, 0.1210, 0.2173]) -Greedy action tensor([ 1.6588, -0.4373, -0.5737, 0.5559]) tensor([0.6402, 0.0787, 0.0687, 0.2125]) -Greedy action tensor([ 2.2069, -0.8255, -0.6256, 0.5695]) tensor([0.7683, 0.0370, 0.0452, 0.1494]) -Greedy action tensor([ 1.6314, 0.0690, -0.4829, 0.3839]) tensor([0.6182, 0.1296, 0.0746, 0.1776]) -Greedy action tensor([ 1.3351, -0.1208, -0.3838, -0.0314]) tensor([0.5997, 0.1399, 0.1075, 0.1529]) -Greedy action tensor([ 1.3894, 0.0092, -0.3293, -0.0776]) tensor([0.6019, 0.1514, 0.1079, 0.1388]) -Greedy action tensor([ 1.6401, -0.3294, -0.5099, 0.1151]) tensor([0.6786, 0.0947, 0.0790, 0.1477]) -Greedy action tensor([ 1.3952, -0.8060, -0.6677, 0.5741]) tensor([0.5960, 0.0660, 0.0758, 0.2622]) -Greedy action tensor([ 1.5101, -0.1687, -0.5577, 0.1904]) tensor([0.6328, 0.1181, 0.0800, 0.1691]) -Greedy action tensor([ 1.5556, -0.0814, -0.4767, -0.0888]) tensor([0.6584, 0.1281, 0.0863, 0.1272]) -Greedy action tensor([ 1.0301, -0.5443, -0.0290, 0.6432]) tensor([0.4478, 0.0928, 0.1553, 0.3041]) -Greedy action tensor([ 1.9412, -0.9271, -0.1115, 0.3444]) tensor([0.7206, 0.0409, 0.0925, 0.1460]) -Greedy action tensor([ 1.8222, -0.7169, 0.0650, 0.4925]) tensor([0.6596, 0.0521, 0.1138, 0.1745]) -Greedy action tensor([ 1.6925, -0.5839, -0.2520, 0.2218]) tensor([0.6777, 0.0696, 0.0970, 0.1557]) -Greedy action tensor([ 2.2438, -0.2959, -0.3502, 1.2334]) tensor([0.6589, 0.0520, 0.0492, 0.2399]) -Greedy action tensor([ 1.1831, -0.4794, 0.0811, 0.0546]) tensor([0.5419, 0.1028, 0.1800, 0.1753]) -Greedy action tensor([ 1.7455, -0.2581, -0.5059, 0.1072]) tensor([0.6971, 0.0940, 0.0734, 0.1355]) -Greedy action tensor([ 1.5798, -0.9611, -0.0646, 1.0619]) tensor([0.5354, 0.0422, 0.1034, 0.3190]) -Greedy action tensor([ 1.2183, -0.2999, -0.7488, 0.2789]) tensor([0.5715, 0.1252, 0.0799, 0.2234]) -Greedy action tensor([ 1.8771, -0.2824, -0.8321, 0.3542]) tensor([0.7143, 0.0824, 0.0476, 0.1558]) -Greedy action tensor([ 2.2079, -0.5933, -0.3099, 0.3436]) tensor([0.7714, 0.0468, 0.0622, 0.1196]) -Greedy action tensor([ 1.1322, -0.6033, 0.0538, 0.1370]) tensor([0.5302, 0.0935, 0.1803, 0.1960]) -Greedy action tensor([ 1.3594, -0.3261, -0.3859, -0.0825]) tensor([0.6264, 0.1161, 0.1094, 0.1481]) -Greedy action tensor([ 1.4693, -0.5656, -0.7128, 0.1001]) tensor([0.6676, 0.0873, 0.0753, 0.1698]) -Greedy action tensor([ 1.2670, -0.2941, -0.4802, 0.4583]) tensor([0.5466, 0.1147, 0.0952, 0.2435]) -Greedy action tensor([ 0.3251, -0.1400, 0.1099, -0.1403]) tensor([0.3265, 0.2051, 0.2633, 0.2050]) -Greedy action tensor([ 1.7009, -0.5693, -0.6871, 0.4182]) tensor([0.6792, 0.0701, 0.0624, 0.1883]) -Greedy action tensor([ 1.1549, -0.3191, -0.0967, 0.6537]) tensor([0.4715, 0.1080, 0.1349, 0.2856]) -Greedy action tensor([ 1.1242, -0.4502, -0.5934, 0.9209]) tensor([0.4540, 0.0940, 0.0815, 0.3705]) -Greedy action tensor([ 1.9527, -0.5361, -0.2607, 0.3057]) tensor([0.7220, 0.0599, 0.0789, 0.1391]) -Greedy action tensor([ 1.1143, -0.3921, -0.5576, 0.3654]) tensor([0.5312, 0.1178, 0.0998, 0.2512]) -Greedy action tensor([ 1.4427, -0.4611, -0.3037, 0.3754]) tensor([0.5998, 0.0894, 0.1046, 0.2063]) -Greedy action tensor([ 1.3652, -0.6279, -0.5762, 0.8174]) tensor([0.5382, 0.0733, 0.0772, 0.3112]) -Greedy action tensor([ 1.3297, -0.4540, -0.7109, 0.6014]) tensor([0.5616, 0.0944, 0.0730, 0.2711]) -Greedy action tensor([ 1.6326, -0.6225, -0.3121, 0.3569]) tensor([0.6548, 0.0687, 0.0937, 0.1828]) -Greedy action tensor([ 1.2522, 0.0442, -0.7324, 0.6479]) tensor([0.5044, 0.1507, 0.0693, 0.2756]) -Greedy action tensor([ 1.0507, -0.0293, -0.5979, 0.0560]) tensor([0.5258, 0.1786, 0.1011, 0.1945]) -Greedy action tensor([ 1.5123, 0.1978, 0.2171, -0.3085]) tensor([0.5867, 0.1576, 0.1607, 0.0950]) -Greedy action tensor([ 1.7502, -0.9154, -0.2158, 0.4510]) tensor([0.6746, 0.0469, 0.0945, 0.1840]) -Greedy action tensor([ 1.5740, -0.1990, -0.5503, 0.1251]) tensor([0.6561, 0.1114, 0.0784, 0.1541]) -Greedy action tensor([-1.3182, -0.4712, 0.5735, -0.5377]) tensor([0.0823, 0.1921, 0.5459, 0.1797]) -Greedy action tensor([ 0.2830, -1.1439, 0.1153, 0.0917]) tensor([0.3435, 0.0825, 0.2904, 0.2837]) -Greedy action tensor([-0.1696, -0.1426, 0.6446, 0.0793]) tensor([0.1796, 0.1845, 0.4055, 0.2304]) -Greedy action tensor([ 0.3520, 0.5823, -0.1673, -0.3669]) tensor([0.2993, 0.3768, 0.1781, 0.1458]) -Greedy action tensor([-0.7038, -0.7912, -0.7658, 0.1734]) tensor([0.1901, 0.1742, 0.1787, 0.4570]) -Greedy action tensor([-0.5762, -0.6592, 0.1891, -1.0008]) tensor([0.2117, 0.1948, 0.4550, 0.1385]) -Greedy action tensor([-0.6219, 0.0388, -1.3572, 0.8175]) tensor([0.1310, 0.2536, 0.0628, 0.5526]) -Greedy action tensor([-0.2332, -0.0486, -0.1198, -0.4787]) tensor([0.2436, 0.2930, 0.2728, 0.1906]) -Greedy action tensor([1.0356, 0.3642, 0.8993, 0.2216]) tensor([0.3538, 0.1808, 0.3087, 0.1568]) -Greedy action tensor([ 0.1356, -1.4463, -0.6932, -0.2138]) tensor([0.4260, 0.0876, 0.1860, 0.3004]) -Greedy action tensor([-0.1913, -0.2584, 1.5834, -0.7798]) tensor([0.1192, 0.1115, 0.7032, 0.0662]) -Greedy action tensor([-0.9661, -0.1995, 0.3786, -1.3518]) tensor([0.1304, 0.2807, 0.5003, 0.0887]) -Greedy action tensor([-0.2845, -0.9463, 0.0300, -0.1237]) tensor([0.2463, 0.1271, 0.3373, 0.2893]) -Greedy action tensor([ 0.3042, -1.8883, 0.2293, 0.3677]) tensor([0.3220, 0.0360, 0.2988, 0.3432]) -Greedy action tensor([ 0.9659, -0.0747, 0.2043, 0.2462]) tensor([0.4334, 0.1531, 0.2024, 0.2110]) -Greedy action tensor([-0.0917, -1.0374, 0.9356, -0.8872]) tensor([0.2158, 0.0838, 0.6029, 0.0974]) -Greedy action tensor([ 0.1667, -0.7404, 0.5272, -0.3098]) tensor([0.2891, 0.1167, 0.4146, 0.1795]) -Greedy action tensor([ 0.0267, -0.1427, -0.3635, 0.4791]) tensor([0.2443, 0.2062, 0.1654, 0.3841]) -Greedy action tensor([-0.4420, -1.1765, 0.7949, 0.0037]) tensor([0.1542, 0.0740, 0.5311, 0.2408]) -Greedy action tensor([-0.9701, -0.3178, -1.1273, -0.3557]) tensor([0.1778, 0.3414, 0.1520, 0.3287]) -Greedy action tensor([ 0.4763, 0.1903, -0.2818, 0.3331]) tensor([0.3240, 0.2434, 0.1518, 0.2808]) -Greedy action tensor([ 0.6563, -0.0755, 1.1610, 0.2269]) tensor([0.2640, 0.1270, 0.4372, 0.1718]) -Greedy action tensor([ 0.2837, -1.1440, -0.5610, -0.4520]) tensor([0.4654, 0.1116, 0.2000, 0.2230]) -Greedy action tensor([ 0.6021, -0.8626, -0.2767, -0.3310]) tensor([0.4902, 0.1133, 0.2036, 0.1928]) -Greedy action tensor([ 1.0851, -1.2080, -0.9544, 0.0498]) tensor([0.6304, 0.0636, 0.0820, 0.2239]) -Greedy action tensor([ 1.0251, -0.8233, -0.5970, 0.3059]) tensor([0.5429, 0.0855, 0.1072, 0.2644]) -Greedy action tensor([-0.2841, -1.8059, 1.3965, -1.8272]) tensor([0.1470, 0.0321, 0.7894, 0.0314]) -Greedy action tensor([ 0.8522, -1.3482, -0.5506, 1.0974]) tensor([0.3796, 0.0420, 0.0933, 0.4851]) -Greedy action tensor([-0.4951, -0.7765, 0.3214, -0.0832]) tensor([0.1809, 0.1366, 0.4094, 0.2731]) -Greedy action tensor([-0.3028, -0.7282, 0.4398, -0.3826]) tensor([0.2138, 0.1397, 0.4492, 0.1974]) -Greedy action tensor([ 0.0890, -0.5803, -0.9711, 0.6426]) tensor([0.2779, 0.1423, 0.0963, 0.4835]) -Greedy action tensor([ 0.1034, -0.5176, 0.0097, -0.2315]) tensor([0.3161, 0.1699, 0.2878, 0.2262]) -Greedy action tensor([ 0.0639, -0.4606, -0.0792, -0.1856]) tensor([0.3089, 0.1828, 0.2677, 0.2407]) -Greedy action tensor([-0.6314, 0.5962, -0.3464, -0.0249]) tensor([0.1320, 0.4505, 0.1755, 0.2421]) -Greedy action tensor([-1.2575, -1.4725, 0.6315, -0.3562]) tensor([0.0919, 0.0741, 0.6077, 0.2263]) -Greedy action tensor([ 1.3145, 0.3866, -0.6657, -0.3728]) tensor([0.5819, 0.2301, 0.0803, 0.1077]) -Greedy action tensor([-1.4482, -0.9830, 0.7616, -0.9557]) tensor([0.0750, 0.1193, 0.6831, 0.1226]) -Greedy action tensor([0.2301, 0.1454, 0.3517, 0.2958]) tensor([0.2430, 0.2232, 0.2744, 0.2595]) -Greedy action tensor([1.0447, 0.0211, 0.2649, 0.6737]) tensor([0.3987, 0.1433, 0.1828, 0.2752]) -Greedy action tensor([-0.2512, -0.6168, -0.4765, 0.2078]) tensor([0.2454, 0.1703, 0.1959, 0.3884]) -Greedy action tensor([ 0.3274, -1.6803, 0.2742, -0.7160]) tensor([0.4107, 0.0552, 0.3894, 0.1447]) -Greedy action tensor([ 1.3425, -0.9290, -0.4409, -0.1503]) tensor([0.6685, 0.0690, 0.1123, 0.1502]) -Greedy action tensor([ 0.3015, 0.6891, -0.5898, 0.5968]) tensor([0.2366, 0.3486, 0.0970, 0.3178]) -Greedy action tensor([ 0.0293, -1.0810, -0.1803, 0.1316]) tensor([0.3079, 0.1014, 0.2497, 0.3410]) -Greedy action tensor([-0.1853, -1.3999, 0.1449, 0.2942]) tensor([0.2324, 0.0690, 0.3233, 0.3753]) -Greedy action tensor([ 0.0492, -1.0980, 0.4562, 0.5862]) tensor([0.2207, 0.0701, 0.3316, 0.3776]) -Greedy action tensor([ 0.4376, -1.7061, 0.1101, -0.3647]) tensor([0.4374, 0.0513, 0.3152, 0.1961]) -Greedy action tensor([-0.2163, -0.5601, -0.4879, 0.6285]) tensor([0.2084, 0.1478, 0.1588, 0.4850]) -Greedy action tensor([-0.4617, -0.4468, 0.9369, -0.9267]) tensor([0.1494, 0.1517, 0.6051, 0.0939]) -Greedy action tensor([-0.0077, -1.0166, 1.0025, 0.2723]) tensor([0.1840, 0.0671, 0.5054, 0.2435]) -Greedy action tensor([-0.9582, -1.0106, -0.0268, -0.5072]) tensor([0.1651, 0.1567, 0.4190, 0.2592]) -Greedy action tensor([-1.0143, -1.0407, 0.1682, -0.9718]) tensor([0.1592, 0.1551, 0.5195, 0.1662]) -Greedy action tensor([ 0.4241, -0.2932, -0.9718, -0.4846]) tensor([0.4676, 0.2282, 0.1158, 0.1885]) -Greedy action tensor([ 0.0467, -0.0729, -0.1101, 0.0805]) tensor([0.2648, 0.2349, 0.2264, 0.2739]) -Greedy action tensor([ 0.2542, 0.1537, -0.2222, -0.0925]) tensor([0.3094, 0.2798, 0.1921, 0.2187]) -Greedy action tensor([0.4276, 0.4972, 0.0020, 0.0487]) tensor([0.2932, 0.3144, 0.1916, 0.2008]) -Greedy action tensor([ 0.9255, -1.6713, 1.7367, -0.0646]) tensor([0.2705, 0.0202, 0.6088, 0.1005]) -Greedy action tensor([ 0.3921, -1.0861, -0.1041, -0.6060]) tensor([0.4534, 0.1034, 0.2761, 0.1671]) -Greedy action tensor([-1.1703, -1.1297, 0.5575, -1.0850]) tensor([0.1142, 0.1189, 0.6426, 0.1243]) -Greedy action tensor([0.8882, 0.1137, 0.0521, 0.3163]) tensor([0.4067, 0.1875, 0.1763, 0.2296]) -Greedy action tensor([ 0.4553, -0.2481, -0.2166, 0.0701]) tensor([0.3723, 0.1843, 0.1901, 0.2533]) -Greedy action tensor([ 0.1728, -0.1009, 0.3706, -0.9685]) tensor([0.3031, 0.2306, 0.3695, 0.0968]) -Greedy action tensor([-0.0294, -0.8987, 0.8015, -0.8080]) tensor([0.2396, 0.1004, 0.5500, 0.1100]) -Greedy action tensor([-0.8577, -1.3968, 0.7001, -1.5814]) tensor([0.1467, 0.0856, 0.6966, 0.0711]) -Greedy action tensor([-0.1155, -1.3717, 0.1642, -0.1946]) tensor([0.2832, 0.0806, 0.3746, 0.2616]) -Greedy action tensor([ 0.2283, -0.4023, 0.2524, -0.1750]) tensor([0.3101, 0.1651, 0.3177, 0.2072]) -Greedy action tensor([-0.2922, 0.1229, -0.2930, -0.6317]) tensor([0.2366, 0.3584, 0.2364, 0.1685]) -Greedy action tensor([ 0.2392, -0.6793, -0.7018, 1.1085]) tensor([0.2395, 0.0956, 0.0935, 0.5714]) -Greedy action tensor([ 0.5975, 0.2301, -0.0159, -0.3936]) tensor([0.3838, 0.2658, 0.2079, 0.1425]) -Greedy action tensor([1.3112, 0.3195, 0.3727, 0.5382]) tensor([0.4497, 0.1668, 0.1759, 0.2076]) -Greedy action tensor([ 0.3022, 0.3849, -0.0059, -0.5861]) tensor([0.3094, 0.3360, 0.2273, 0.1273]) -Greedy action tensor([-0.0506, -1.0636, -0.7821, -0.0636]) tensor([0.3532, 0.1282, 0.1699, 0.3486]) -Greedy action tensor([-0.1053, -0.2870, -0.1763, 0.3869]) tensor([0.2272, 0.1895, 0.2116, 0.3717]) -Greedy action tensor([-0.6723, -0.2160, 0.2309, -1.2679]) tensor([0.1787, 0.2820, 0.4409, 0.0985]) -Greedy action tensor([ 0.0870, -0.8834, 0.4586, -0.5740]) tensor([0.2989, 0.1133, 0.4335, 0.1543]) -Greedy action tensor([ 0.7005, 0.1484, 0.3570, -0.3138]) tensor([0.3777, 0.2174, 0.2679, 0.1370]) -Greedy action tensor([-0.7719, -0.4407, -0.3749, 0.6018]) tensor([0.1277, 0.1779, 0.1900, 0.5045]) -Greedy action tensor([ 0.0900, -0.9852, 0.4265, 0.3124]) tensor([0.2506, 0.0855, 0.3509, 0.3130]) -Greedy action tensor([ 0.0274, -1.7074, 0.9673, -0.6294]) tensor([0.2350, 0.0415, 0.6016, 0.1219]) -Greedy action tensor([ 1.0332, -0.9838, -0.3953, -0.1092]) tensor([0.5911, 0.0787, 0.1417, 0.1886]) -Greedy action tensor([ 0.0176, -0.5449, -0.8370, 0.7088]) tensor([0.2506, 0.1428, 0.1066, 0.5001]) -Greedy action tensor([-0.2051, -0.6924, -0.9598, 0.1240]) tensor([0.2878, 0.1768, 0.1353, 0.4000]) -Greedy action tensor([-1.9009, -0.4241, 0.6414, -0.1647]) tensor([0.0421, 0.1843, 0.5348, 0.2388]) -Greedy action tensor([-1.8716, -0.4698, 0.6340, -0.1514]) tensor([0.0437, 0.1774, 0.5350, 0.2439]) -Greedy action tensor([-1.8954, -0.3452, 0.6210, -0.1429]) tensor([0.0419, 0.1975, 0.5189, 0.2417]) -Greedy action tensor([-1.3441, -0.0795, 0.3747, -0.0813]) tensor([0.0732, 0.2594, 0.4085, 0.2589]) -Greedy action tensor([-1.9374, -0.3902, 0.6519, -0.1738]) tensor([0.0402, 0.1890, 0.5360, 0.2347]) -Greedy action tensor([-1.9006, -0.3940, 0.6451, -0.1520]) tensor([0.0416, 0.1879, 0.5311, 0.2393]) -Greedy action tensor([-0.9846, 0.7362, 0.1964, -0.1110]) tensor([0.0817, 0.4565, 0.2661, 0.1957]) -Greedy action tensor([-1.7886, -0.1887, 0.5645, -0.1148]) tensor([0.0459, 0.2271, 0.4824, 0.2446]) -Greedy action tensor([-1.3058, 0.7478, 0.2178, 0.1631]) tensor([0.0564, 0.4397, 0.2588, 0.2450]) -Greedy action tensor([-1.7047, 0.1994, 0.4745, -0.0354]) tensor([0.0457, 0.3071, 0.4043, 0.2428]) -Greedy action tensor([-1.7907, -0.4172, 0.6253, -0.1074]) tensor([0.0464, 0.1834, 0.5202, 0.2500]) -Greedy action tensor([-1.9232, -0.4380, 0.6563, -0.1691]) tensor([0.0410, 0.1811, 0.5409, 0.2370]) -Greedy action tensor([-1.9189, -0.4294, 0.6569, -0.1673]) tensor([0.0411, 0.1822, 0.5399, 0.2368]) -Greedy action tensor([-1.9120, -0.4189, 0.6403, -0.1624]) tensor([0.0416, 0.1851, 0.5340, 0.2393]) -Greedy action tensor([-1.7343, -0.1506, 0.5303, -0.1047]) tensor([0.0485, 0.2365, 0.4673, 0.2476]) -Greedy action tensor([-1.6919, 0.1793, 0.4365, -0.0156]) tensor([0.0471, 0.3058, 0.3955, 0.2516]) -Greedy action tensor([-1.5630, -0.3415, 0.4838, 0.0757]) tensor([0.0579, 0.1963, 0.4480, 0.2979]) -Greedy action tensor([-1.9329, -0.4134, 0.6541, -0.1738]) tensor([0.0405, 0.1853, 0.5388, 0.2354]) -Greedy action tensor([-0.4220, 0.4049, 0.1451, 0.2558]) tensor([0.1425, 0.3257, 0.2512, 0.2806]) -Greedy action tensor([-1.5200, -0.2142, 0.6598, 0.2139]) tensor([0.0521, 0.1922, 0.4607, 0.2950]) -Greedy action tensor([-1.9123, -0.3467, 0.6302, -0.1546]) tensor([0.0412, 0.1970, 0.5232, 0.2387]) -Greedy action tensor([-1.9275, -0.4160, 0.6570, -0.1723]) tensor([0.0407, 0.1845, 0.5394, 0.2354]) -Greedy action tensor([-0.4974, -0.3232, 1.1673, 0.7780]) tensor([0.0905, 0.1077, 0.4780, 0.3239]) -Greedy action tensor([-1.9040, -0.4509, 0.6496, -0.1648]) tensor([0.0420, 0.1795, 0.5396, 0.2390]) -Greedy action tensor([-1.9111, -0.4448, 0.6521, -0.1574]) tensor([0.0415, 0.1799, 0.5388, 0.2398]) -Greedy action tensor([-1.9095, 0.0844, 0.5665, -0.1651]) tensor([0.0385, 0.2829, 0.4582, 0.2204]) -Greedy action tensor([-1.8692, -0.3204, 0.6088, -0.1451]) tensor([0.0430, 0.2026, 0.5130, 0.2414]) -Greedy action tensor([ 0.6285, 0.9316, -0.0959, 0.6406]) tensor([0.2597, 0.3516, 0.1258, 0.2628]) -Greedy action tensor([-1.8414, -0.2625, 0.5899, -0.1406]) tensor([0.0441, 0.2136, 0.5010, 0.2413]) -Greedy action tensor([-1.8366, -0.4639, 0.6095, -0.1311]) tensor([0.0455, 0.1794, 0.5248, 0.2503]) -Greedy action tensor([-1.9000, -0.2967, 0.6261, -0.1442]) tensor([0.0412, 0.2048, 0.5154, 0.2386]) -Greedy action tensor([-1.9044, -0.4562, 0.6513, -0.1628]) tensor([0.0419, 0.1785, 0.5402, 0.2393]) -Greedy action tensor([-1.2288, 0.1654, 0.2656, -0.0034]) tensor([0.0776, 0.3127, 0.3456, 0.2641]) -Greedy action tensor([-1.8838, -0.4509, 0.6391, -0.1549]) tensor([0.0429, 0.1799, 0.5352, 0.2419]) -Greedy action tensor([-1.4359, -0.4742, 0.4842, -0.1622]) tensor([0.0714, 0.1867, 0.4869, 0.2551]) -Greedy action tensor([-1.9122, -0.4495, 0.6540, -0.1651]) tensor([0.0415, 0.1794, 0.5407, 0.2384]) -Greedy action tensor([-1.8922, -0.3559, 0.6309, -0.1511]) tensor([0.0420, 0.1951, 0.5234, 0.2395]) -Greedy action tensor([-1.6325, -0.5630, 0.5077, -0.0766]) tensor([0.0583, 0.1699, 0.4956, 0.2763]) -Greedy action tensor([-1.9004, -0.4380, 0.6479, -0.1583]) tensor([0.0420, 0.1813, 0.5369, 0.2398]) -Greedy action tensor([-1.6893, 0.3149, 0.4678, -0.0041]) tensor([0.0445, 0.3304, 0.3850, 0.2401]) -Greedy action tensor([-0.5862, 0.9715, 0.0092, 0.5393]) tensor([0.0940, 0.4461, 0.1704, 0.2896]) -Greedy action tensor([-1.9061, -0.4650, 0.6547, -0.1572]) tensor([0.0418, 0.1766, 0.5412, 0.2403]) -Greedy action tensor([-1.8276, -0.3983, 0.6123, -0.1171]) tensor([0.0451, 0.1883, 0.5172, 0.2494]) -Greedy action tensor([-1.9221, -0.3609, 0.6455, -0.1610]) tensor([0.0406, 0.1935, 0.5295, 0.2364]) -Greedy action tensor([-1.8996, -0.3917, 0.6499, -0.1555]) tensor([0.0416, 0.1879, 0.5325, 0.2380]) -Greedy action tensor([-0.4604, 0.3937, 0.1367, 0.2034]) tensor([0.1407, 0.3305, 0.2556, 0.2732]) -Greedy action tensor([-1.4442, 0.7066, 0.2824, 0.0566]) tensor([0.0508, 0.4362, 0.2854, 0.2277]) -Greedy action tensor([-1.6494, -0.5692, 0.5122, -0.0344]) tensor([0.0566, 0.1668, 0.4918, 0.2847]) -Greedy action tensor([-1.9357, -0.4036, 0.6547, -0.1740]) tensor([0.0403, 0.1867, 0.5380, 0.2349]) -Greedy action tensor([-1.8592, -0.3885, 0.6208, -0.1506]) tensor([0.0438, 0.1908, 0.5234, 0.2420]) -Greedy action tensor([-1.4371, -0.2347, 0.5058, 0.0104]) tensor([0.0643, 0.2139, 0.4485, 0.2733]) -Greedy action tensor([-1.8259, -0.4128, 0.6043, -0.1394]) tensor([0.0457, 0.1879, 0.5195, 0.2469]) -Greedy action tensor([-1.6181, 0.4781, 0.5879, -0.6087]) tensor([0.0477, 0.3882, 0.4332, 0.1309]) -Greedy action tensor([-1.8863, -0.4341, 0.6378, -0.1547]) tensor([0.0427, 0.1826, 0.5333, 0.2414]) -Greedy action tensor([-1.6985, -0.3218, 0.5257, -0.0743]) tensor([0.0519, 0.2055, 0.4795, 0.2632]) -Greedy action tensor([-1.9337, -0.4194, 0.6498, -0.1790]) tensor([0.0407, 0.1850, 0.5390, 0.2353]) -Greedy action tensor([-1.9057, -0.3210, 0.6281, -0.1593]) tensor([0.0413, 0.2014, 0.5204, 0.2368]) -Greedy action tensor([-1.8174, -0.3457, 0.5882, -0.1264]) tensor([0.0457, 0.1992, 0.5069, 0.2481]) -Greedy action tensor([-1.7529, -0.4520, 0.5737, -0.0850]) tensor([0.0495, 0.1817, 0.5067, 0.2622]) -Greedy action tensor([-1.7148, -0.2936, 0.5412, -0.0478]) tensor([0.0500, 0.2073, 0.4776, 0.2650]) -Greedy action tensor([-1.7422, -0.1182, 0.6360, -0.3740]) tensor([0.0481, 0.2441, 0.5189, 0.1890]) -Greedy action tensor([-1.3591, -0.5631, 0.3681, 0.1367]) tensor([0.0752, 0.1666, 0.4228, 0.3354]) -Greedy action tensor([-1.8965, -0.4484, 0.6461, -0.1568]) tensor([0.0423, 0.1798, 0.5372, 0.2407]) -Greedy action tensor([-1.9023, -0.4345, 0.6417, -0.1515]) tensor([0.0420, 0.1821, 0.5342, 0.2417]) -Greedy action tensor([-1.6556, -0.4465, 0.5151, -0.0266]) tensor([0.0549, 0.1839, 0.4812, 0.2799]) -Greedy action tensor([-1.9226, -0.4415, 0.6626, -0.1717]) tensor([0.0409, 0.1801, 0.5431, 0.2358]) -Greedy action tensor([-1.9213, -0.4526, 0.6742, -0.1617]) tensor([0.0407, 0.1769, 0.5458, 0.2366]) -Greedy action tensor([-1.8724, -0.0278, 0.5689, -0.1360]) tensor([0.0408, 0.2583, 0.4691, 0.2318]) -Greedy action tensor([-1.2440, 0.2813, 0.3171, 0.1368]) tensor([0.0697, 0.3206, 0.3323, 0.2774]) -Greedy action tensor([-1.8788, -0.2647, 0.6119, -0.1487]) tensor([0.0421, 0.2117, 0.5085, 0.2377]) -Greedy action tensor([-1.9027, -0.4510, 0.6511, -0.1593]) tensor([0.0419, 0.1791, 0.5392, 0.2398]) -Greedy action tensor([-1.4297, -0.1356, 0.3648, -0.0022]) tensor([0.0674, 0.2459, 0.4056, 0.2810]) -Greedy action tensor([-1.8846, -0.4589, 0.6427, -0.1517]) tensor([0.0429, 0.1783, 0.5365, 0.2424]) -Greedy action tensor([-1.9136, -0.4320, 0.6507, -0.1653]) tensor([0.0414, 0.1823, 0.5383, 0.2380]) -Greedy action tensor([-1.8588, -0.4154, 0.6444, -0.1403]) tensor([0.0434, 0.1839, 0.5306, 0.2421]) -Greedy action tensor([-1.9164, -0.4114, 0.6533, -0.1651]) tensor([0.0411, 0.1851, 0.5369, 0.2368]) -Greedy action tensor([-1.8081, -0.2808, 0.6394, -0.0415]) tensor([0.0434, 0.2001, 0.5022, 0.2542]) -Greedy action tensor([-1.9006, -0.3899, 0.6485, -0.1588]) tensor([0.0416, 0.1885, 0.5324, 0.2375]) -Greedy action tensor([-1.9019, -0.3635, 0.6354, -0.1527]) tensor([0.0416, 0.1936, 0.5257, 0.2391]) -Greedy action tensor([-1.8704, -0.3836, 0.6371, -0.1342]) tensor([0.0428, 0.1892, 0.5251, 0.2428]) -Greedy action tensor([-1.5612, -0.4625, 0.5757, 0.2967]) tensor([0.0530, 0.1589, 0.4487, 0.3395]) -Greedy action tensor([ 1.1293, -0.8890, 0.2589, -0.5330]) tensor([0.5743, 0.0763, 0.2405, 0.1089]) -Greedy action tensor([ 1.0356, -0.4641, 0.1026, -0.5808]) tensor([0.5509, 0.1230, 0.2167, 0.1094]) -Greedy action tensor([ 0.2433, -0.0762, -0.0728, -0.2963]) tensor([0.3291, 0.2391, 0.2399, 0.1919]) -Greedy action tensor([ 0.7353, -0.7391, 0.0368, -0.5053]) tensor([0.4962, 0.1136, 0.2468, 0.1435]) -Greedy action tensor([ 0.7661, -0.1891, -0.1033, -0.3052]) tensor([0.4659, 0.1792, 0.1953, 0.1596]) -Greedy action tensor([ 1.1266, -0.7866, 0.0267, -0.3094]) tensor([0.5819, 0.0859, 0.1937, 0.1384]) -Greedy action tensor([ 0.6012, -0.2883, 0.0066, -0.4209]) tensor([0.4306, 0.1769, 0.2376, 0.1549]) -Greedy action tensor([ 0.8416, -0.2170, -0.0671, -0.0569]) tensor([0.4636, 0.1608, 0.1868, 0.1888]) -Greedy action tensor([ 0.3548, 0.1267, -0.1514, 0.1296]) tensor([0.3128, 0.2490, 0.1885, 0.2497]) -Greedy action tensor([ 0.8938, -0.2213, -0.0531, -0.1609]) tensor([0.4845, 0.1588, 0.1879, 0.1687]) -Greedy action tensor([ 0.4847, -0.4882, -0.1738, -0.0151]) tensor([0.3996, 0.1511, 0.2069, 0.2424]) -Greedy action tensor([ 1.0656, -0.6386, -0.0644, -0.4678]) tensor([0.5811, 0.1057, 0.1877, 0.1254]) -Greedy action tensor([ 0.9337, -0.3295, -0.1278, -0.0650]) tensor([0.5007, 0.1416, 0.1732, 0.1845]) -Greedy action tensor([ 1.1190, -0.6182, -0.1331, -0.7589]) tensor([0.6192, 0.1090, 0.1771, 0.0947]) -Greedy action tensor([ 0.9733, -0.3730, -0.1625, -0.1569]) tensor([0.5251, 0.1366, 0.1687, 0.1696]) -Greedy action tensor([ 0.6457, -0.3768, 0.0834, -0.3829]) tensor([0.4372, 0.1573, 0.2492, 0.1563]) -Greedy action tensor([0.4326, 0.0051, 0.0390, 0.0253]) tensor([0.3342, 0.2179, 0.2255, 0.2224]) -Greedy action tensor([ 0.6582, -0.3116, -0.0370, -0.0620]) tensor([0.4229, 0.1603, 0.2110, 0.2058]) -Greedy action tensor([ 0.9339, -0.4162, -0.0825, -0.4048]) tensor([0.5310, 0.1376, 0.1922, 0.1392]) -Greedy action tensor([ 1.0779, -0.7099, 0.0522, -0.5766]) tensor([0.5824, 0.0975, 0.2088, 0.1113]) -Greedy action tensor([ 0.6873, -0.5871, -0.1198, -0.1328]) tensor([0.4617, 0.1291, 0.2060, 0.2033]) -Greedy action tensor([ 0.6521, 0.3606, -0.2188, 0.2757]) tensor([0.3506, 0.2620, 0.1468, 0.2406]) -Greedy action tensor([ 1.0859, -0.6618, -0.1200, -0.5367]) tensor([0.5984, 0.1042, 0.1792, 0.1181]) -Greedy action tensor([ 1.2158, -0.5692, -0.0742, -0.5562]) tensor([0.6200, 0.1040, 0.1706, 0.1054]) -Greedy action tensor([ 0.6512, -0.4009, -0.2325, -0.3783]) tensor([0.4718, 0.1647, 0.1950, 0.1685]) -Greedy action tensor([ 1.0671, -0.4674, 0.0520, -0.2841]) tensor([0.5444, 0.1174, 0.1973, 0.1410]) -Greedy action tensor([ 0.2437, -0.2999, -0.0901, -0.1249]) tensor([0.3346, 0.1943, 0.2397, 0.2315]) -Greedy action tensor([ 0.5271, -0.2338, -0.1683, -0.0983]) tensor([0.3998, 0.1868, 0.1995, 0.2139]) -Greedy action tensor([ 0.6513, -0.2686, 0.2588, -0.7470]) tensor([0.4309, 0.1717, 0.2910, 0.1064]) -Greedy action tensor([ 0.7929, -0.3218, 0.1406, -0.1053]) tensor([0.4432, 0.1454, 0.2309, 0.1805]) -Greedy action tensor([ 0.5978, 0.0396, -0.0505, -0.2038]) tensor([0.3931, 0.2250, 0.2056, 0.1764]) -Greedy action tensor([ 0.5558, -0.2910, -0.1851, -0.1147]) tensor([0.4137, 0.1774, 0.1972, 0.2116]) -Greedy action tensor([ 0.6248, -0.3802, -0.0908, -0.1889]) tensor([0.4351, 0.1593, 0.2127, 0.1928]) -Greedy action tensor([ 0.9440, -0.5410, -0.0966, -0.3666]) tensor([0.5407, 0.1225, 0.1910, 0.1458]) -Greedy action tensor([ 1.1434, -0.0501, 0.0383, -0.2954]) tensor([0.5343, 0.1620, 0.1770, 0.1268]) -Greedy action tensor([ 1.0800, -0.4971, -0.1056, -0.3827]) tensor([0.5735, 0.1185, 0.1752, 0.1328]) -Greedy action tensor([ 1.1854, -0.4695, -0.0176, -0.4563]) tensor([0.5935, 0.1134, 0.1782, 0.1149]) -Greedy action tensor([ 0.7983, -0.1808, -0.0413, -0.1081]) tensor([0.4522, 0.1699, 0.1953, 0.1827]) -Greedy action tensor([ 0.9644, -0.5105, -0.0741, -0.5522]) tensor([0.5549, 0.1270, 0.1964, 0.1218]) -Greedy action tensor([ 0.6243, -0.3366, -0.0840, -0.2747]) tensor([0.4382, 0.1676, 0.2158, 0.1783]) -Greedy action tensor([ 0.7883, -0.4405, 0.1599, -0.5211]) tensor([0.4771, 0.1396, 0.2545, 0.1288]) -Greedy action tensor([ 0.8944, -0.5451, 0.0770, -0.3025]) tensor([0.5049, 0.1197, 0.2229, 0.1525]) -Greedy action tensor([ 0.8467, -0.5270, -0.1872, -0.4686]) tensor([0.5327, 0.1349, 0.1894, 0.1430]) -Greedy action tensor([ 0.6638, -0.1018, -0.1202, 0.0437]) tensor([0.4066, 0.1891, 0.1856, 0.2187]) -Greedy action tensor([ 1.1075, -0.9455, -0.1390, -0.5613]) tensor([0.6233, 0.0800, 0.1792, 0.1175]) -Greedy action tensor([ 1.1440, -0.6502, -0.0099, -0.6148]) tensor([0.6046, 0.1005, 0.1907, 0.1041]) -Greedy action tensor([ 0.5925, -0.1963, -0.0373, -0.0940]) tensor([0.4015, 0.1825, 0.2139, 0.2021]) -Greedy action tensor([ 0.6515, -0.4279, -0.0294, -0.1726]) tensor([0.4377, 0.1487, 0.2216, 0.1920]) -Greedy action tensor([ 0.5686, 0.0015, -0.0670, -0.0207]) tensor([0.3771, 0.2139, 0.1997, 0.2092]) -Greedy action tensor([ 0.5668, 0.3321, -0.0970, 0.0848]) tensor([0.3421, 0.2705, 0.1761, 0.2112]) -Greedy action tensor([ 0.6127, 0.4308, -0.0594, 0.0777]) tensor([0.3413, 0.2845, 0.1743, 0.1999]) -Greedy action tensor([ 0.4012, 0.0864, -0.0670, -0.1055]) tensor([0.3380, 0.2467, 0.2116, 0.2036]) -Greedy action tensor([ 0.8291, -0.6754, -0.0675, -0.2611]) tensor([0.5086, 0.1130, 0.2075, 0.1710]) -Greedy action tensor([ 1.4381, -0.6843, -0.1073, -0.4877]) tensor([0.6763, 0.0810, 0.1442, 0.0986]) -Greedy action tensor([ 0.8589, -0.4810, 0.0550, -0.2100]) tensor([0.4871, 0.1276, 0.2180, 0.1673]) -Greedy action tensor([ 0.9476, 0.1767, -0.0377, -0.0413]) tensor([0.4529, 0.2095, 0.1691, 0.1685]) -Greedy action tensor([ 0.6511, -0.3729, -0.1032, -0.1648]) tensor([0.4402, 0.1581, 0.2070, 0.1947]) -Greedy action tensor([ 0.8093, -0.4802, -0.0033, -0.1356]) tensor([0.4744, 0.1307, 0.2105, 0.1844]) -Greedy action tensor([ 0.7686, -0.4013, -0.0859, -0.2691]) tensor([0.4784, 0.1485, 0.2036, 0.1695]) -Greedy action tensor([ 1.3364, -0.6455, -0.0159, -0.6797]) tensor([0.6538, 0.0901, 0.1691, 0.0871]) -Greedy action tensor([ 0.9458, -0.8528, 0.0687, -0.3158]) tensor([0.5363, 0.0888, 0.2231, 0.1519]) -Greedy action tensor([ 0.4070, -0.2189, -0.0661, -0.3088]) tensor([0.3778, 0.2021, 0.2354, 0.1847]) -Greedy action tensor([ 1.4079, -1.0320, 0.0273, -0.7304]) tensor([0.6866, 0.0599, 0.1726, 0.0809]) -Greedy action tensor([ 0.7123, -0.3928, 0.0077, -0.4525]) tensor([0.4678, 0.1549, 0.2313, 0.1460]) -Greedy action tensor([ 0.5969, -0.5089, -0.0655, -0.4028]) tensor([0.4516, 0.1494, 0.2328, 0.1662]) -Greedy action tensor([ 1.3083, -0.6813, -0.0103, -0.7687]) tensor([0.6538, 0.0894, 0.1749, 0.0819]) -Greedy action tensor([ 0.6556, -0.2101, 0.0359, -0.1084]) tensor([0.4124, 0.1735, 0.2219, 0.1921]) -Greedy action tensor([ 1.1217, -0.6888, -0.0142, -0.5604]) tensor([0.5985, 0.0979, 0.1922, 0.1113]) -Greedy action tensor([ 5.6458e-01, -1.0399e-01, 1.8400e-04, -1.9886e-03]) tensor([0.3776, 0.1935, 0.2147, 0.2143]) -Greedy action tensor([ 0.6395, -0.4407, -0.1163, -0.2385]) tensor([0.4495, 0.1526, 0.2111, 0.1868]) -Greedy action tensor([ 0.4031, -0.2389, -0.0460, -0.0505]) tensor([0.3572, 0.1880, 0.2280, 0.2269]) -Greedy action tensor([ 0.5704, -0.0700, 0.0635, -0.0983]) tensor([0.3785, 0.1995, 0.2280, 0.1940]) -Greedy action tensor([ 0.8879, -0.7288, 0.0198, -0.4875]) tensor([0.5345, 0.1061, 0.2243, 0.1351]) -Greedy action tensor([ 1.2289, -0.4755, -0.1798, -0.3914]) tensor([0.6157, 0.1120, 0.1505, 0.1218]) -Greedy action tensor([ 0.5316, -0.2866, -0.0828, -0.2413]) tensor([0.4092, 0.1805, 0.2214, 0.1889]) -Greedy action tensor([ 0.8924, -0.7277, 0.0302, -0.3697]) tensor([0.5255, 0.1040, 0.2219, 0.1487]) -Greedy action tensor([ 0.9305, -0.6911, -0.0877, -0.3611]) tensor([0.5454, 0.1078, 0.1970, 0.1499]) -Greedy action tensor([ 0.5418, 0.0975, -0.0786, -0.3363]) tensor([0.3854, 0.2472, 0.2073, 0.1602]) -Greedy action tensor([ 0.7820, -0.4070, 0.0721, -0.1714]) tensor([0.4584, 0.1396, 0.2254, 0.1767]) -Greedy action tensor([ 0.8700, -0.7615, 0.0844, -0.3806]) tensor([0.5160, 0.1010, 0.2352, 0.1478]) -Greedy action tensor([ 0.3064, 0.4398, -0.0892, -0.3104]) tensor([0.2980, 0.3405, 0.2006, 0.1608]) -Greedy action tensor([ 1.5929, -0.4260, -0.5747, 0.1349]) tensor([0.6757, 0.0897, 0.0773, 0.1572]) -Greedy action tensor([ 1.7091, -0.2301, -0.2250, 0.1601]) tensor([0.6663, 0.0958, 0.0963, 0.1416]) -Greedy action tensor([ 2.0039, -0.3049, -0.6365, -0.3879]) tensor([0.7923, 0.0787, 0.0565, 0.0725]) -Greedy action tensor([ 1.5654, -0.5207, -0.9135, 0.3165]) tensor([0.6690, 0.0831, 0.0561, 0.1919]) -Greedy action tensor([ 1.6207, -0.7676, -0.1311, 0.5804]) tensor([0.6178, 0.0567, 0.1072, 0.2183]) -Greedy action tensor([ 1.0743, -0.5684, 0.1652, -0.0397]) tensor([0.5196, 0.1005, 0.2093, 0.1706]) -Greedy action tensor([ 1.4442, -0.3421, -0.4155, 0.3852]) tensor([0.5988, 0.1003, 0.0932, 0.2077]) -Greedy action tensor([ 2.0672, -1.4549, -0.2560, 0.7288]) tensor([0.7196, 0.0213, 0.0705, 0.1887]) -Greedy action tensor([ 1.2516, -0.0869, -0.8907, 0.1271]) tensor([0.5867, 0.1539, 0.0689, 0.1906]) -Greedy action tensor([ 1.0808, -0.2282, -0.4743, 0.3435]) tensor([0.5103, 0.1378, 0.1078, 0.2441]) -Greedy action tensor([ 2.1141, -0.9219, -0.5364, 0.3131]) tensor([0.7790, 0.0374, 0.0550, 0.1286]) -Greedy action tensor([ 0.9536, -0.7148, -0.1787, 0.3063]) tensor([0.4916, 0.0927, 0.1584, 0.2573]) -Greedy action tensor([ 1.3183, -0.5472, -0.3944, 0.2854]) tensor([0.5913, 0.0915, 0.1067, 0.2105]) -Greedy action tensor([ 1.1554, 0.0031, -0.6625, 0.2831]) tensor([0.5274, 0.1666, 0.0856, 0.2204]) -Greedy action tensor([ 1.1286, -0.7874, -0.0838, 0.2173]) tensor([0.5415, 0.0797, 0.1611, 0.2177]) -Greedy action tensor([ 1.2938, -0.3615, -0.2310, 0.0776]) tensor([0.5865, 0.1120, 0.1277, 0.1738]) -Greedy action tensor([ 1.1989, -0.5191, -0.6147, 0.1552]) tensor([0.5901, 0.1059, 0.0962, 0.2078]) -Greedy action tensor([ 1.3406, -0.4010, -0.4824, 0.6288]) tensor([0.5472, 0.0959, 0.0884, 0.2685]) -Greedy action tensor([ 1.2225, 0.2682, -0.6253, 0.0215]) tensor([0.5424, 0.2089, 0.0855, 0.1632]) -Greedy action tensor([ 1.5634, -0.0216, -0.2884, 0.2533]) tensor([0.6128, 0.1256, 0.0962, 0.1654]) -Greedy action tensor([ 1.1296, 0.3454, -1.1281, -0.0795]) tensor([0.5378, 0.2455, 0.0562, 0.1605]) -Greedy action tensor([ 1.2514, -0.5889, -0.2925, 0.6769]) tensor([0.5167, 0.0820, 0.1103, 0.2909]) -Greedy action tensor([ 1.4664, -0.1363, -1.2054, 0.2913]) tensor([0.6332, 0.1275, 0.0438, 0.1955]) -Greedy action tensor([ 2.0055, 0.5729, -0.1755, 0.1098]) tensor([0.6658, 0.1589, 0.0752, 0.1000]) -Greedy action tensor([ 0.9531, -0.2638, -0.9023, 0.4637]) tensor([0.4842, 0.1434, 0.0757, 0.2968]) -Greedy action tensor([ 1.0635, -0.2180, -0.8356, 0.2980]) tensor([0.5284, 0.1467, 0.0791, 0.2458]) -Greedy action tensor([ 2.1683, 0.5799, -0.2153, 0.2206]) tensor([0.6949, 0.1419, 0.0641, 0.0991]) -Greedy action tensor([ 2.2694, -0.9998, -0.4788, 0.0932]) tensor([0.8227, 0.0313, 0.0527, 0.0933]) -Greedy action tensor([ 0.9038, -0.3352, 0.0711, -0.2729]) tensor([0.4919, 0.1425, 0.2139, 0.1517]) -Greedy action tensor([ 1.5103, -0.3624, -0.9464, 0.3640]) tensor([0.6422, 0.0987, 0.0550, 0.2041]) -Greedy action tensor([ 1.9853, -0.9708, -0.5928, 1.1666]) tensor([0.6374, 0.0332, 0.0484, 0.2811]) -Greedy action tensor([ 1.6020, 0.0282, -0.7054, 0.4664]) tensor([0.6143, 0.1273, 0.0611, 0.1973]) -Greedy action tensor([ 1.6149, -0.1824, -0.1424, 0.2343]) tensor([0.6291, 0.1043, 0.1085, 0.1582]) -Greedy action tensor([ 1.7538, 0.2781, -0.7517, -0.1198]) tensor([0.6831, 0.1562, 0.0558, 0.1049]) -Greedy action tensor([ 1.2125, -0.0919, -1.3802, 0.2087]) tensor([0.5839, 0.1584, 0.0437, 0.2140]) -Greedy action tensor([ 0.5903, -0.0225, -0.2902, 0.0976]) tensor([0.3895, 0.2110, 0.1615, 0.2380]) -Greedy action tensor([ 1.4866, -0.4083, -0.5985, 0.2209]) tensor([0.6424, 0.0966, 0.0798, 0.1812]) -Greedy action tensor([ 1.6303, -0.5341, -0.3824, 0.4491]) tensor([0.6429, 0.0738, 0.0859, 0.1973]) -Greedy action tensor([ 1.7072, -0.4357, -0.4689, 0.2408]) tensor([0.6842, 0.0803, 0.0776, 0.1579]) -Greedy action tensor([ 1.6686, -0.4109, -0.5477, -0.0259]) tensor([0.7054, 0.0882, 0.0769, 0.1296]) -Greedy action tensor([ 1.6476, -0.3620, -0.3955, 0.3788]) tensor([0.6473, 0.0868, 0.0839, 0.1820]) -Greedy action tensor([ 1.5586, -0.9426, -0.2932, 0.4477]) tensor([0.6377, 0.0523, 0.1001, 0.2100]) -Greedy action tensor([ 1.6757, -0.7775, -0.3892, 0.3012]) tensor([0.6822, 0.0587, 0.0865, 0.1726]) -Greedy action tensor([ 1.4799, 0.2626, -1.0004, 0.5252]) tensor([0.5667, 0.1677, 0.0474, 0.2181]) -Greedy action tensor([ 1.3581, -0.8124, -0.1840, 0.4995]) tensor([0.5708, 0.0651, 0.1221, 0.2419]) -Greedy action tensor([ 1.5299, -0.1148, -0.3886, 0.3274]) tensor([0.6096, 0.1177, 0.0895, 0.1832]) -Greedy action tensor([ 1.6062, -0.6331, -0.1389, 0.4843]) tensor([0.6223, 0.0663, 0.1087, 0.2027]) -Greedy action tensor([ 1.2152, 0.2377, -1.1332, 0.3355]) tensor([0.5300, 0.1994, 0.0506, 0.2199]) -Greedy action tensor([ 1.7870, -0.5355, -0.3506, 0.1640]) tensor([0.7076, 0.0694, 0.0834, 0.1396]) -Greedy action tensor([ 1.4400, -0.2273, -0.5438, 0.1612]) tensor([0.6232, 0.1176, 0.0857, 0.1735]) -Greedy action tensor([ 2.3143, -1.2438, -0.0836, 0.7029]) tensor([0.7581, 0.0216, 0.0689, 0.1513]) -Greedy action tensor([ 1.3776, -0.3608, -0.3934, -0.0535]) tensor([0.6309, 0.1109, 0.1074, 0.1508]) -Greedy action tensor([ 1.3144, 0.1713, -0.8777, 0.5401]) tensor([0.5287, 0.1686, 0.0590, 0.2437]) -Greedy action tensor([ 1.3996, -0.0172, -0.1074, -0.6156]) tensor([0.6260, 0.1518, 0.1387, 0.0834]) -Greedy action tensor([ 1.3831, -0.3212, -0.7492, 0.4181]) tensor([0.5947, 0.1082, 0.0705, 0.2266]) -Greedy action tensor([ 1.2634, -0.1133, -1.0424, 0.6441]) tensor([0.5290, 0.1335, 0.0527, 0.2848]) -Greedy action tensor([ 1.4631, -0.5846, -0.5864, 0.4643]) tensor([0.6150, 0.0794, 0.0792, 0.2265]) -Greedy action tensor([ 1.2421, -0.5920, -0.4555, 0.1979]) tensor([0.5900, 0.0943, 0.1080, 0.2077]) -Greedy action tensor([ 1.6392, -0.5736, -0.3071, -0.0018]) tensor([0.6916, 0.0757, 0.0988, 0.1340]) -Greedy action tensor([ 1.4465, -0.0277, -0.4708, -0.0382]) tensor([0.6240, 0.1429, 0.0917, 0.1414]) -Greedy action tensor([ 1.3447, -0.6743, -0.1947, 0.6417]) tensor([0.5428, 0.0721, 0.1164, 0.2687]) -Greedy action tensor([ 1.2920, -0.2172, -0.8555, 0.3963]) tensor([0.5727, 0.1266, 0.0669, 0.2338]) -Greedy action tensor([ 1.5316, 0.1336, -0.5683, 0.0920]) tensor([0.6224, 0.1538, 0.0762, 0.1475]) -Greedy action tensor([ 1.6232, -0.6975, -0.7870, 0.3375]) tensor([0.6828, 0.0671, 0.0613, 0.1888]) -Greedy action tensor([ 1.6608, 0.5492, -0.3315, 0.5230]) tensor([0.5599, 0.1842, 0.0764, 0.1795]) -Greedy action tensor([ 1.5112, 0.0135, -0.2885, 0.2817]) tensor([0.5947, 0.1330, 0.0983, 0.1739]) -Greedy action tensor([ 1.3141, -0.2803, -0.7069, 0.1350]) tensor([0.6086, 0.1236, 0.0807, 0.1872]) -Greedy action tensor([ 1.7127, -0.5725, -0.5246, 0.1653]) tensor([0.7036, 0.0716, 0.0751, 0.1497]) -Greedy action tensor([ 0.9166, -0.2683, -0.0093, -0.1775]) tensor([0.4910, 0.1501, 0.1945, 0.1644]) -Greedy action tensor([ 2.6165, -1.3072, -0.4070, 0.9314]) tensor([0.7975, 0.0158, 0.0388, 0.1479]) -Greedy action tensor([ 0.3735, -0.5314, 0.3098, -0.3595]) tensor([0.3542, 0.1433, 0.3323, 0.1702]) -Greedy action tensor([ 1.4525, 0.1360, -0.3857, 0.3686]) tensor([0.5664, 0.1519, 0.0901, 0.1916]) -Greedy action tensor([ 1.2866, -0.4481, -0.3510, 0.3536]) tensor([0.5668, 0.1000, 0.1102, 0.2230]) -Greedy action tensor([ 1.7763, -0.7092, -0.8245, 0.4986]) tensor([0.6963, 0.0580, 0.0517, 0.1940]) -Greedy action tensor([ 1.4646, -0.4512, -0.6426, 0.3365]) tensor([0.6280, 0.0924, 0.0763, 0.2032]) -Greedy action tensor([ 2.4729, -0.9811, -0.4610, 0.6207]) tensor([0.8053, 0.0255, 0.0428, 0.1264]) -Greedy action tensor([ 1.8324, 0.1416, -0.7799, 0.7234]) tensor([0.6299, 0.1161, 0.0462, 0.2078]) -Greedy action tensor([ 1.5543, -0.3157, -1.2208, 0.2499]) tensor([0.6721, 0.1036, 0.0419, 0.1824]) -Greedy action tensor([ 1.2728, -0.0748, -0.3056, -0.0106]) tensor([0.5736, 0.1491, 0.1183, 0.1589]) -Greedy action tensor([ 1.4422, -0.6666, -0.7525, 0.4742]) tensor([0.6201, 0.0753, 0.0691, 0.2355]) -Greedy action tensor([ 1.0342, 0.4876, -0.5512, -0.1617]) tensor([0.4793, 0.2775, 0.0982, 0.1450]) -Greedy action tensor([-0.8178, 0.5924, 0.5733, -1.0571]) tensor([0.1010, 0.4137, 0.4058, 0.0795]) -Greedy action tensor([-0.3645, -0.5249, -0.4926, -0.8532]) tensor([0.2989, 0.2547, 0.2630, 0.1834]) -Greedy action tensor([ 0.1972, -1.1626, 1.3559, -0.6997]) tensor([0.2062, 0.0529, 0.6568, 0.0841]) -Greedy action tensor([-0.1107, 0.9462, -1.2448, -1.0431]) tensor([0.2177, 0.6265, 0.0700, 0.0857]) -Greedy action tensor([ 0.3924, -0.2362, -0.6236, -0.0650]) tensor([0.3955, 0.2109, 0.1432, 0.2503]) -Greedy action tensor([ 0.9190, -0.1747, -0.4352, 0.0351]) tensor([0.4984, 0.1670, 0.1287, 0.2059]) -Greedy action tensor([ 0.7185, -0.7108, -0.2097, -0.4491]) tensor([0.5139, 0.1231, 0.2031, 0.1599]) -Greedy action tensor([ 0.5731, -0.1884, -0.4605, -0.7583]) tensor([0.4792, 0.2238, 0.1705, 0.1266]) -Greedy action tensor([ 0.4835, -0.5721, 0.6916, 0.1359]) tensor([0.3043, 0.1059, 0.3748, 0.2150]) -Greedy action tensor([-0.2487, -0.3268, 1.1321, -1.4930]) tensor([0.1615, 0.1494, 0.6426, 0.0465]) -Greedy action tensor([-0.1853, -1.1626, -0.0683, 0.6628]) tensor([0.2068, 0.0778, 0.2325, 0.4829]) -Greedy action tensor([ 0.3271, -0.2057, -0.0432, -0.4350]) tensor([0.3644, 0.2139, 0.2516, 0.1701]) -Greedy action tensor([-0.2058, -1.1103, 0.4586, -0.2540]) tensor([0.2325, 0.0941, 0.4518, 0.2216]) -Greedy action tensor([ 0.8468, -0.9058, 0.2578, 0.3034]) tensor([0.4331, 0.0751, 0.2403, 0.2515]) -Greedy action tensor([-0.3885, -0.9951, 0.3244, -0.4877]) tensor([0.2227, 0.1214, 0.4543, 0.2016]) -Greedy action tensor([-0.2322, -0.5843, -1.0713, 0.4720]) tensor([0.2405, 0.1691, 0.1039, 0.4864]) -Greedy action tensor([-1.3388, 0.3490, -0.5895, -0.3886]) tensor([0.0900, 0.4868, 0.1904, 0.2328]) -Greedy action tensor([-0.7397, -0.3586, 0.0962, -0.6039]) tensor([0.1690, 0.2474, 0.3899, 0.1936]) -Greedy action tensor([-0.9022, -0.5360, 0.8160, -1.0969]) tensor([0.1131, 0.1632, 0.6306, 0.0931]) -Greedy action tensor([ 0.0178, -1.0535, 0.2490, -0.4424]) tensor([0.3092, 0.1059, 0.3897, 0.1952]) -Greedy action tensor([ 0.0876, -1.8266, -0.6163, -0.0029]) tensor([0.3913, 0.0577, 0.1935, 0.3574]) -Greedy action tensor([-0.1351, -0.1658, -0.2334, -0.6009]) tensor([0.2854, 0.2768, 0.2587, 0.1791]) -Greedy action tensor([-0.6824, -0.9689, 0.3107, -0.9849]) tensor([0.1927, 0.1447, 0.5202, 0.1424]) -Greedy action tensor([ 0.2865, -1.1761, 0.7947, 0.9321]) tensor([0.2083, 0.0482, 0.3462, 0.3972]) -Greedy action tensor([ 1.4700, -0.6061, -0.2441, -0.6725]) tensor([0.7028, 0.0881, 0.1266, 0.0825]) -Greedy action tensor([ 0.9120, -1.0469, -0.2464, 0.0978]) tensor([0.5269, 0.0743, 0.1654, 0.2334]) -Greedy action tensor([-0.4308, -0.3882, 0.3263, -0.3294]) tensor([0.1893, 0.1976, 0.4036, 0.2095]) -Greedy action tensor([ 0.6469, -1.1835, 0.1646, 0.1058]) tensor([0.4238, 0.0679, 0.2616, 0.2467]) -Greedy action tensor([-1.0806, -0.9541, 0.2955, -0.7136]) tensor([0.1327, 0.1506, 0.5253, 0.1915]) -Greedy action tensor([-0.3867, -1.5531, 0.8728, -1.3654]) tensor([0.1919, 0.0598, 0.6762, 0.0721]) -Greedy action tensor([ 0.7882, -0.8842, -0.3013, -0.7562]) tensor([0.5755, 0.1081, 0.1936, 0.1228]) -Greedy action tensor([-0.0017, -0.8374, -0.0666, -0.6962]) tensor([0.3484, 0.1511, 0.3265, 0.1740]) -Greedy action tensor([ 1.5437, -0.3047, 0.3605, 0.0972]) tensor([0.5885, 0.0927, 0.1803, 0.1385]) -Greedy action tensor([-0.2526, -0.2289, -0.9688, -0.0415]) tensor([0.2668, 0.2732, 0.1304, 0.3296]) -Greedy action tensor([0.0717, 0.1079, 0.8196, 0.1177]) tensor([0.1924, 0.1995, 0.4065, 0.2015]) -Greedy action tensor([-0.5207, -0.5830, 0.0952, -0.5287]) tensor([0.2091, 0.1964, 0.3871, 0.2074]) -Greedy action tensor([ 0.1428, 0.7000, -0.2876, -0.8437]) tensor([0.2653, 0.4632, 0.1725, 0.0989]) -Greedy action tensor([ 0.3964, 0.3802, -0.2270, -0.4143]) tensor([0.3373, 0.3319, 0.1808, 0.1499]) -Greedy action tensor([-0.3827, -0.2924, -0.4869, -0.9136]) tensor([0.2790, 0.3054, 0.2514, 0.1641]) -Greedy action tensor([-0.1765, -1.1441, 0.3943, -0.8450]) tensor([0.2731, 0.1038, 0.4832, 0.1399]) -Greedy action tensor([ 0.2503, -0.1004, 0.1125, -0.3722]) tensor([0.3213, 0.2263, 0.2800, 0.1724]) -Greedy action tensor([ 0.2173, -0.0030, 0.4884, -0.4077]) tensor([0.2741, 0.2199, 0.3594, 0.1467]) -Greedy action tensor([-0.4831, -0.3927, 0.3719, -0.5073]) tensor([0.1844, 0.2019, 0.4337, 0.1800]) -Greedy action tensor([ 0.3324, -0.3398, -0.1820, -0.2685]) tensor([0.3764, 0.1922, 0.2250, 0.2064]) -Greedy action tensor([1.0764, 0.0717, 0.4062, 0.3683]) tensor([0.4219, 0.1545, 0.2158, 0.2078]) -Greedy action tensor([ 0.0586, -0.6003, 0.1557, -0.4936]) tensor([0.3130, 0.1619, 0.3449, 0.1802]) -Greedy action tensor([-0.3233, 0.8979, 0.2733, -0.8298]) tensor([0.1469, 0.4980, 0.2667, 0.0885]) -Greedy action tensor([-0.0237, -1.1164, -0.4274, 0.1563]) tensor([0.3125, 0.1048, 0.2087, 0.3741]) -Greedy action tensor([-0.3680, 0.2470, 0.7699, -0.9546]) tensor([0.1532, 0.2834, 0.4781, 0.0852]) -Greedy action tensor([-0.3700, -0.6246, -0.0655, -0.5326]) tensor([0.2512, 0.1947, 0.3406, 0.2135]) -Greedy action tensor([-0.2076, -0.4645, 1.1433, -0.3396]) tensor([0.1536, 0.1188, 0.5930, 0.1346]) -Greedy action tensor([ 0.8910, -0.0725, 0.2666, -0.1675]) tensor([0.4417, 0.1685, 0.2366, 0.1532]) -Greedy action tensor([ 0.9813, -1.0248, 0.0199, -0.8160]) tensor([0.5943, 0.0799, 0.2272, 0.0985]) -Greedy action tensor([ 1.1005, -0.2956, -0.1846, 0.7635]) tensor([0.4468, 0.1106, 0.1236, 0.3190]) -Greedy action tensor([ 1.2737, -0.4771, -0.5680, 1.0272]) tensor([0.4731, 0.0821, 0.0750, 0.3697]) -Greedy action tensor([-0.3889, 0.2320, 0.6897, -0.8916]) tensor([0.1561, 0.2904, 0.4590, 0.0944]) -Greedy action tensor([ 0.7074, -1.7318, -0.1710, 0.4873]) tensor([0.4338, 0.0378, 0.1802, 0.3481]) -Greedy action tensor([ 0.5091, -1.5778, 1.2308, 0.1296]) tensor([0.2587, 0.0321, 0.5323, 0.1770]) -Greedy action tensor([-1.2093, 0.0784, 0.0042, -0.4323]) tensor([0.0984, 0.3566, 0.3311, 0.2140]) -Greedy action tensor([-0.2325, -0.8784, 0.5410, -0.5099]) tensor([0.2248, 0.1178, 0.4871, 0.1703]) -Greedy action tensor([ 0.7836, -0.5498, -0.5968, -0.4798]) tensor([0.5563, 0.1466, 0.1399, 0.1572]) -Greedy action tensor([ 0.2197, -1.0463, 0.0928, 0.1553]) tensor([0.3225, 0.0909, 0.2841, 0.3024]) -Greedy action tensor([-0.5601, -0.5452, 0.8921, -0.6546]) tensor([0.1389, 0.1410, 0.5936, 0.1264]) -Greedy action tensor([-0.2367, -1.0203, 0.7786, -1.5890]) tensor([0.2234, 0.1021, 0.6167, 0.0578]) -Greedy action tensor([-0.5510, -0.0222, -0.4475, 0.0286]) tensor([0.1789, 0.3035, 0.1983, 0.3193]) -Greedy action tensor([-0.3231, -1.2623, 0.2082, -0.5601]) tensor([0.2577, 0.1007, 0.4383, 0.2033]) -Greedy action tensor([-1.2704, 0.3672, -0.1691, -0.4698]) tensor([0.0879, 0.4520, 0.2644, 0.1957]) -Greedy action tensor([ 0.5473, -1.1969, 0.2852, 0.0187]) tensor([0.3947, 0.0690, 0.3037, 0.2326]) -Greedy action tensor([-0.2637, 0.1716, 0.1195, -0.3921]) tensor([0.2044, 0.3159, 0.2999, 0.1798]) -Greedy action tensor([0.3334, 0.7419, 0.6780, 0.2780]) tensor([0.2057, 0.3094, 0.2903, 0.1946]) -Greedy action tensor([-0.0965, -0.7182, 0.2340, 0.1578]) tensor([0.2371, 0.1273, 0.3299, 0.3057]) -Greedy action tensor([ 0.6975, -1.0501, -0.0423, -0.7914]) tensor([0.5328, 0.0928, 0.2542, 0.1202]) -Greedy action tensor([-0.5113, -0.7493, -0.8069, 0.3898]) tensor([0.2002, 0.1578, 0.1490, 0.4930]) -Greedy action tensor([ 1.2796, -0.3966, 0.9002, 0.8981]) tensor([0.3915, 0.0732, 0.2679, 0.2674]) -Greedy action tensor([-0.8525, -0.7738, 0.7282, -0.6367]) tensor([0.1222, 0.1322, 0.5938, 0.1517]) -Greedy action tensor([-0.7558, 0.3205, 0.0955, -1.1856]) tensor([0.1444, 0.4235, 0.3382, 0.0939]) -Greedy action tensor([-0.9372, -1.2534, -0.1962, -0.5135]) tensor([0.1867, 0.1361, 0.3918, 0.2853]) -Greedy action tensor([ 1.3260, -0.7576, 0.9545, 0.6116]) tensor([0.4341, 0.0540, 0.2994, 0.2125]) -Greedy action tensor([ 0.0618, -0.5498, -0.0433, 0.4577]) tensor([0.2546, 0.1381, 0.2292, 0.3782]) -Greedy action tensor([ 0.1290, -0.6276, 0.0454, 0.7405]) tensor([0.2363, 0.1109, 0.2173, 0.4355]) -Greedy action tensor([-0.5991, -0.2600, -0.3143, -0.5824]) tensor([0.2105, 0.2955, 0.2799, 0.2141]) -Greedy action tensor([ 0.3403, -1.2320, -0.4927, -0.5423]) tensor([0.4864, 0.1010, 0.2114, 0.2012]) -Greedy action tensor([ 1.1600, -0.6365, 0.0420, -0.7126]) tensor([0.6073, 0.1007, 0.1986, 0.0934]) -Greedy action tensor([ 0.7022, -0.4426, -0.1171, -0.1494]) tensor([0.4575, 0.1456, 0.2016, 0.1952]) -Greedy action tensor([ 0.7400, -0.5818, -0.2081, -0.3997]) tensor([0.5066, 0.1351, 0.1963, 0.1621]) -Greedy action tensor([ 0.9218, -0.6165, 0.0446, -0.3266]) tensor([0.5215, 0.1120, 0.2169, 0.1496]) -Greedy action tensor([ 0.9083, -0.6675, -0.0408, -0.3535]) tensor([0.5327, 0.1102, 0.2062, 0.1508]) -Greedy action tensor([ 0.3136, 0.1459, -0.0385, -0.1491]) tensor([0.3146, 0.2661, 0.2212, 0.1981]) -Greedy action tensor([ 0.8733, -0.7678, 0.0428, -0.3463]) tensor([0.5195, 0.1007, 0.2264, 0.1534]) -Greedy action tensor([ 0.9775, -0.6998, 0.0685, -0.5564]) tensor([0.5539, 0.1035, 0.2232, 0.1195]) -Greedy action tensor([ 1.0431, -0.8448, 0.0096, -0.5357]) tensor([0.5836, 0.0884, 0.2076, 0.1204]) -Greedy action tensor([ 0.9645, -0.7853, 0.1834, -0.4808]) tensor([0.5355, 0.0931, 0.2452, 0.1262]) -Greedy action tensor([ 0.5196, -0.2757, -0.0048, -0.3211]) tensor([0.4041, 0.1824, 0.2392, 0.1743]) -Greedy action tensor([ 1.1029, -0.8542, 0.0626, -0.6109]) tensor([0.5971, 0.0843, 0.2110, 0.1076]) -Greedy action tensor([ 0.9890, -0.6034, 0.1255, -0.2772]) tensor([0.5244, 0.1067, 0.2211, 0.1478]) -Greedy action tensor([ 0.3597, 0.4402, -0.2379, -0.2213]) tensor([0.3132, 0.3394, 0.1723, 0.1752]) -Greedy action tensor([ 0.8357, -0.5074, -0.0174, -0.6195]) tensor([0.5207, 0.1359, 0.2219, 0.1215]) -Greedy action tensor([ 0.5571, -0.4591, -0.0637, -0.1326]) tensor([0.4165, 0.1507, 0.2239, 0.2089]) -Greedy action tensor([ 0.6459, -0.4700, 0.0236, -0.5159]) tensor([0.4593, 0.1505, 0.2465, 0.1437]) -Greedy action tensor([ 0.9992, -1.0346, -0.0142, -0.3089]) tensor([0.5669, 0.0742, 0.2057, 0.1532]) -Greedy action tensor([ 0.7173, -0.4501, -0.1095, -0.2941]) tensor([0.4734, 0.1473, 0.2071, 0.1722]) -Greedy action tensor([ 0.7442, -0.5101, -0.1740, -0.3050]) tensor([0.4915, 0.1402, 0.1962, 0.1721]) -Greedy action tensor([ 0.7592, -0.3537, 0.0186, -0.3324]) tensor([0.4670, 0.1535, 0.2227, 0.1568]) -Greedy action tensor([ 0.7801, -0.2285, 0.0118, -0.2013]) tensor([0.4538, 0.1655, 0.2105, 0.1701]) -Greedy action tensor([ 0.8732, -0.6639, 0.0737, -0.3600]) tensor([0.5113, 0.1099, 0.2298, 0.1490]) -Greedy action tensor([ 0.8662, -0.4090, -0.1452, -0.1760]) tensor([0.5011, 0.1400, 0.1822, 0.1767]) -Greedy action tensor([ 1.0108, -0.9573, 0.0038, -0.5482]) tensor([0.5830, 0.0815, 0.2130, 0.1226]) -Greedy action tensor([ 0.8392, -0.7871, 0.0431, -0.4117]) tensor([0.5171, 0.1017, 0.2333, 0.1480]) -Greedy action tensor([ 0.9261, -0.6511, 0.0653, -0.5159]) tensor([0.5360, 0.1107, 0.2266, 0.1267]) -Greedy action tensor([ 0.7688, -0.4198, -0.0026, -0.1969]) tensor([0.4656, 0.1418, 0.2153, 0.1773]) -Greedy action tensor([ 0.6760, -0.2579, -0.1498, 0.0029]) tensor([0.4272, 0.1679, 0.1870, 0.2179]) -Greedy action tensor([ 0.9026, -0.7724, 0.0787, -0.4212]) tensor([0.5285, 0.0990, 0.2319, 0.1406]) -Greedy action tensor([ 0.5790, -0.4106, 0.0269, -0.2156]) tensor([0.4168, 0.1549, 0.2400, 0.1883]) -Greedy action tensor([0.4157, 0.3096, 0.0364, 0.1733]) tensor([0.2969, 0.2670, 0.2032, 0.2330]) -Greedy action tensor([ 0.8850, -0.4874, -0.0526, -0.1891]) tensor([0.5034, 0.1276, 0.1971, 0.1719]) -Greedy action tensor([ 1.0016, -0.9262, 0.1432, -0.5529]) tensor([0.5616, 0.0817, 0.2380, 0.1187]) -Greedy action tensor([ 1.0031, -0.7431, -0.0461, -0.6225]) tensor([0.5809, 0.1013, 0.2034, 0.1143]) -Greedy action tensor([ 0.7034, -0.3324, -0.2018, -0.2467]) tensor([0.4660, 0.1654, 0.1885, 0.1802]) -Greedy action tensor([ 0.8508, -0.8038, 0.0169, -0.3891]) tensor([0.5222, 0.0998, 0.2268, 0.1511]) -Greedy action tensor([ 0.6233, -0.4257, -0.1422, -0.2348]) tensor([0.4466, 0.1564, 0.2077, 0.1893]) -Greedy action tensor([ 0.5132, -0.1695, -0.0228, -0.0764]) tensor([0.3781, 0.1910, 0.2212, 0.2097]) -Greedy action tensor([ 1.0057, -0.8512, 0.1112, -0.4635]) tensor([0.5571, 0.0870, 0.2277, 0.1282]) -Greedy action tensor([ 0.6889, -0.3325, -0.0583, -0.1581]) tensor([0.4420, 0.1592, 0.2094, 0.1895]) -Greedy action tensor([ 0.4855, -0.2721, -0.0040, -0.1764]) tensor([0.3850, 0.1805, 0.2360, 0.1986]) -Greedy action tensor([ 0.7862, -0.7080, -0.1579, -0.2578]) tensor([0.5088, 0.1142, 0.1979, 0.1791]) -Greedy action tensor([ 0.5854, -0.2712, 0.1265, -0.3599]) tensor([0.4090, 0.1737, 0.2585, 0.1589]) -Greedy action tensor([ 0.5241, -0.0303, 0.0566, -0.0845]) tensor([0.3643, 0.2093, 0.2283, 0.1982]) -Greedy action tensor([ 1.2730, -0.9038, -0.1626, -0.5473]) tensor([0.6608, 0.0749, 0.1572, 0.1070]) -Greedy action tensor([ 0.6220, -0.1290, -0.0041, -0.1081]) tensor([0.4019, 0.1896, 0.2149, 0.1936]) -Greedy action tensor([ 0.7496, -0.0475, 0.0579, -0.1407]) tensor([0.4234, 0.1908, 0.2120, 0.1738]) -Greedy action tensor([ 0.6810, -0.4172, -0.0658, -0.3746]) tensor([0.4640, 0.1547, 0.2199, 0.1615]) -Greedy action tensor([ 1.0086, -0.5238, 0.0778, -0.2946]) tensor([0.5314, 0.1148, 0.2095, 0.1444]) -Greedy action tensor([ 0.9986, -0.7603, 0.2285, -0.6241]) tensor([0.5457, 0.0940, 0.2526, 0.1077]) -Greedy action tensor([ 0.7224, -0.1148, -0.0295, -0.0594]) tensor([0.4234, 0.1833, 0.1996, 0.1937]) -Greedy action tensor([ 1.1447, -0.8473, -0.1162, -0.6302]) tensor([0.6292, 0.0858, 0.1783, 0.1066]) -Greedy action tensor([ 0.3262, -0.3189, -0.2308, -0.0418]) tensor([0.3585, 0.1881, 0.2054, 0.2481]) -Greedy action tensor([ 1.1214, -0.5463, 0.0265, -0.4891]) tensor([0.5804, 0.1095, 0.1942, 0.1160]) -Greedy action tensor([ 1.0465, -0.7621, -0.0527, -0.6620]) tensor([0.5959, 0.0977, 0.1985, 0.1079]) -Greedy action tensor([ 0.8442, -0.5633, -0.0438, -0.1956]) tensor([0.4976, 0.1218, 0.2047, 0.1759]) -Greedy action tensor([ 1.1002, -0.3926, 0.1037, -0.2681]) tensor([0.5410, 0.1216, 0.1997, 0.1377]) -Greedy action tensor([ 0.3989, 0.2288, -0.2079, 0.0824]) tensor([0.3208, 0.2706, 0.1749, 0.2338]) -Greedy action tensor([ 0.8982, -0.3439, -0.2548, -0.1214]) tensor([0.5088, 0.1469, 0.1606, 0.1836]) -Greedy action tensor([ 1.0183, -0.7082, 0.0241, -0.7284]) tensor([0.5806, 0.1033, 0.2149, 0.1012]) -Greedy action tensor([ 1.0291, -0.7254, 0.1175, -0.4314]) tensor([0.5534, 0.0957, 0.2224, 0.1285]) -Greedy action tensor([ 1.3758, -0.8697, 0.1288, -0.6043]) tensor([0.6530, 0.0691, 0.1877, 0.0902]) -Greedy action tensor([ 1.0710, -0.6498, 0.0049, -0.4138]) tensor([0.5715, 0.1023, 0.1968, 0.1295]) -Greedy action tensor([ 0.5003, -0.1379, 0.0614, -0.2873]) tensor([0.3805, 0.2010, 0.2453, 0.1731]) -Greedy action tensor([ 0.7791, -0.3770, -0.0237, -0.1733]) tensor([0.4654, 0.1465, 0.2085, 0.1796]) -Greedy action tensor([ 0.7249, -0.1124, -0.0871, 0.0462]) tensor([0.4194, 0.1816, 0.1862, 0.2128]) -Greedy action tensor([ 0.5833, -0.3584, 0.0998, -0.6138]) tensor([0.4331, 0.1689, 0.2671, 0.1308]) -Greedy action tensor([ 0.6640, -0.3463, -0.0439, -0.1289]) tensor([0.4330, 0.1577, 0.2133, 0.1960]) -Greedy action tensor([ 1.1211, -0.3333, -0.1229, -0.5172]) tensor([0.5827, 0.1361, 0.1680, 0.1132]) -Greedy action tensor([ 0.8020, -0.5813, -0.0234, -0.5510]) tensor([0.5135, 0.1288, 0.2250, 0.1327]) -Greedy action tensor([ 0.4883, -0.4161, -0.2107, -0.1881]) tensor([0.4149, 0.1679, 0.2062, 0.2109]) -Greedy action tensor([ 0.3673, 0.0605, -0.0423, -0.1403]) tensor([0.3332, 0.2451, 0.2212, 0.2005]) -Greedy action tensor([ 0.5535, -0.3606, 0.0659, -0.3034]) tensor([0.4099, 0.1643, 0.2517, 0.1740]) -Greedy action tensor([ 0.7579, -0.3750, -0.0361, -0.1444]) tensor([0.4588, 0.1478, 0.2074, 0.1861]) -Greedy action tensor([ 0.8123, -0.4974, -0.1212, -0.5085]) tensor([0.5181, 0.1398, 0.2037, 0.1383]) -Greedy action tensor([ 1.2724, -0.5650, 0.0936, -0.6069]) tensor([0.6174, 0.0983, 0.1900, 0.0943]) -Greedy action tensor([ 0.7348, -0.0217, -0.0330, -0.0336]) tensor([0.4172, 0.1958, 0.1936, 0.1935]) -Greedy action tensor([ 1.0410, -0.8615, -0.0050, -0.5189]) tensor([0.5846, 0.0872, 0.2054, 0.1228]) -Greedy action tensor([ 0.6843, -0.5696, 0.0466, -0.2531]) tensor([0.4534, 0.1294, 0.2396, 0.1776]) -Greedy action tensor([ 0.7333, -0.1402, 0.0627, -0.2793]) tensor([0.4363, 0.1821, 0.2231, 0.1585]) -Greedy action tensor([-0.8367, 0.2228, 0.0139, 0.4934]) tensor([0.0999, 0.2883, 0.2339, 0.3779]) -Greedy action tensor([-1.8554, -0.3650, 0.2282, -0.4270]) tensor([0.0567, 0.2516, 0.4553, 0.2364]) -Greedy action tensor([-1.7984, -0.4446, 0.6491, -0.0584]) tensor([0.0452, 0.1750, 0.5224, 0.2575]) -Greedy action tensor([-1.5518, -0.2319, 0.4858, 0.0767]) tensor([0.0571, 0.2138, 0.4381, 0.2910]) -Greedy action tensor([-1.7752, -0.4077, 0.6230, -0.0999]) tensor([0.0470, 0.1846, 0.5173, 0.2511]) -Greedy action tensor([-1.8306, -0.2687, 0.5967, -0.1053]) tensor([0.0440, 0.2100, 0.4988, 0.2472]) -Greedy action tensor([-1.8621, -0.3002, 0.6177, -0.1375]) tensor([0.0429, 0.2045, 0.5120, 0.2406]) -Greedy action tensor([-0.9835, -0.6172, 0.3912, 0.5627]) tensor([0.0902, 0.1301, 0.3565, 0.4232]) -Greedy action tensor([-1.7056, -0.4418, 0.6188, -0.0828]) tensor([0.0504, 0.1785, 0.5155, 0.2556]) -Greedy action tensor([-1.9041, -0.4570, 0.6547, -0.1593]) tensor([0.0418, 0.1779, 0.5407, 0.2396]) -Greedy action tensor([-1.9012, -0.4330, 0.6487, -0.1589]) tensor([0.0419, 0.1820, 0.5368, 0.2393]) -Greedy action tensor([-1.6032, -0.3535, 0.6413, 0.0065]) tensor([0.0528, 0.1844, 0.4985, 0.2643]) -Greedy action tensor([-1.8886, -0.4105, 0.6357, -0.1477]) tensor([0.0424, 0.1860, 0.5296, 0.2419]) -Greedy action tensor([-1.8553, -0.4350, 0.6224, -0.1398]) tensor([0.0442, 0.1830, 0.5269, 0.2459]) -Greedy action tensor([-1.4847, 0.4287, 0.3301, -0.0157]) tensor([0.0548, 0.3711, 0.3362, 0.2379]) -Greedy action tensor([-1.9017, -0.3734, 0.6391, -0.1548]) tensor([0.0416, 0.1918, 0.5279, 0.2387]) -Greedy action tensor([-1.8313, -0.4238, 0.5953, -0.1104]) tensor([0.0455, 0.1857, 0.5147, 0.2541]) -Greedy action tensor([-1.2469, 0.7520, 0.2318, 0.0656]) tensor([0.0607, 0.4478, 0.2662, 0.2254]) -Greedy action tensor([-1.8774, -0.2873, 0.6195, -0.1326]) tensor([0.0421, 0.2063, 0.5108, 0.2408]) -Greedy action tensor([-1.9393, -0.4406, 0.6640, -0.1781]) tensor([0.0403, 0.1805, 0.5446, 0.2346]) -Greedy action tensor([-1.9106, -0.4104, 0.6483, -0.1638]) tensor([0.0414, 0.1857, 0.5353, 0.2376]) -Greedy action tensor([-1.8390, -0.4662, 0.6097, -0.1296]) tensor([0.0454, 0.1790, 0.5250, 0.2506]) -Greedy action tensor([-1.8150, -0.4832, 0.6083, -0.1209]) tensor([0.0465, 0.1761, 0.5245, 0.2530]) -Greedy action tensor([-1.9026, -0.4493, 0.6515, -0.1569]) tensor([0.0419, 0.1792, 0.5388, 0.2401]) -Greedy action tensor([-1.6818, -0.5178, 0.6695, 0.1508]) tensor([0.0477, 0.1529, 0.5011, 0.2983]) -Greedy action tensor([-1.9056, -0.4473, 0.6828, -0.1586]) tensor([0.0411, 0.1766, 0.5467, 0.2357]) -Greedy action tensor([-1.8992, -0.3008, 0.6197, -0.1492]) tensor([0.0415, 0.2051, 0.5148, 0.2386]) -Greedy action tensor([-1.0418, 0.0211, 0.4842, 0.3048]) tensor([0.0810, 0.2346, 0.3728, 0.3116]) -Greedy action tensor([-1.9075, -0.3989, 0.6503, -0.1604]) tensor([0.0414, 0.1871, 0.5341, 0.2374]) -Greedy action tensor([-1.9106, -0.4238, 0.6402, -0.1547]) tensor([0.0416, 0.1841, 0.5334, 0.2409]) -Greedy action tensor([-1.9223, -0.4123, 0.6562, -0.1683]) tensor([0.0408, 0.1849, 0.5383, 0.2360]) -Greedy action tensor([-1.8890, -0.2370, 0.6089, -0.1385]) tensor([0.0414, 0.2162, 0.5038, 0.2386]) -Greedy action tensor([-1.8838, -0.3731, 0.6295, -0.1851]) tensor([0.0428, 0.1941, 0.5289, 0.2342]) -Greedy action tensor([-1.8950, -0.4532, 0.6499, -0.1558]) tensor([0.0423, 0.1787, 0.5385, 0.2406]) -Greedy action tensor([-1.8708, -0.4129, 0.6265, -0.1486]) tensor([0.0434, 0.1865, 0.5272, 0.2429]) -Greedy action tensor([-1.2167, -0.3456, 0.2951, 0.2679]) tensor([0.0811, 0.1937, 0.3676, 0.3577]) -Greedy action tensor([-1.9257, -0.4209, 0.6580, -0.1721]) tensor([0.0408, 0.1836, 0.5401, 0.2355]) -Greedy action tensor([-1.8724, -0.4118, 0.6265, -0.1418]) tensor([0.0433, 0.1864, 0.5263, 0.2441]) -Greedy action tensor([-1.9161, -0.4528, 0.6590, -0.1640]) tensor([0.0413, 0.1784, 0.5422, 0.2381]) -Greedy action tensor([-1.7169, 0.0434, 0.4771, -0.0834]) tensor([0.0478, 0.2781, 0.4291, 0.2450]) -Greedy action tensor([-0.6628, 0.5369, -0.0615, 0.0067]) tensor([0.1235, 0.4099, 0.2253, 0.2412]) -Greedy action tensor([-1.0332, -0.4338, 0.3085, 0.0548]) tensor([0.1040, 0.1894, 0.3979, 0.3087]) -Greedy action tensor([-0.4325, -0.3104, 0.1485, 0.1167]) tensor([0.1770, 0.2000, 0.3165, 0.3065]) -Greedy action tensor([-1.8760, -0.4599, 0.6399, -0.1469]) tensor([0.0432, 0.1781, 0.5350, 0.2436]) -Greedy action tensor([-1.9058, -0.4442, 0.6497, -0.1600]) tensor([0.0418, 0.1803, 0.5383, 0.2396]) -Greedy action tensor([-1.8620, -0.2577, 0.5987, -0.1601]) tensor([0.0432, 0.2147, 0.5055, 0.2367]) -Greedy action tensor([-1.8225, -0.1544, 0.5664, -0.1139]) tensor([0.0440, 0.2333, 0.4797, 0.2430]) -Greedy action tensor([-1.8160, -0.3105, 0.6225, -0.0930]) tensor([0.0443, 0.1997, 0.5077, 0.2483]) -Greedy action tensor([-1.5178, -0.5882, 0.4720, 0.0702]) tensor([0.0635, 0.1609, 0.4646, 0.3109]) -Greedy action tensor([-1.1012, 0.2156, 0.4298, 0.3417]) tensor([0.0736, 0.2746, 0.3402, 0.3115]) -Greedy action tensor([-1.9039, -0.4485, 0.6464, -0.1608]) tensor([0.0420, 0.1800, 0.5380, 0.2400]) -Greedy action tensor([-1.0165, -0.5792, 0.2926, 0.0192]) tensor([0.1103, 0.1708, 0.4083, 0.3106]) -Greedy action tensor([-1.8963, -0.4476, 0.6470, -0.1556]) tensor([0.0422, 0.1798, 0.5372, 0.2408]) -Greedy action tensor([-1.6956, -0.3715, 0.5319, -0.0991]) tensor([0.0527, 0.1981, 0.4890, 0.2602]) -Greedy action tensor([-1.9031, -0.2974, 0.6213, -0.1458]) tensor([0.0412, 0.2053, 0.5145, 0.2389]) -Greedy action tensor([-1.9111, -0.3621, 0.6412, -0.1664]) tensor([0.0412, 0.1939, 0.5290, 0.2359]) -Greedy action tensor([-1.8057, -0.4429, 0.5995, -0.1094]) tensor([0.0466, 0.1822, 0.5168, 0.2544]) -Greedy action tensor([-1.8816, -0.4005, 0.6406, -0.1300]) tensor([0.0423, 0.1862, 0.5274, 0.2440]) -Greedy action tensor([-1.7862, 0.0328, 0.5101, -0.0456]) tensor([0.0439, 0.2704, 0.4358, 0.2500]) -Greedy action tensor([-1.9050, -0.4492, 0.6512, -0.1599]) tensor([0.0418, 0.1794, 0.5392, 0.2396]) -Greedy action tensor([-1.5790, -0.4749, 0.4839, 0.0347]) tensor([0.0591, 0.1784, 0.4654, 0.2970]) -Greedy action tensor([-1.8558, -0.3199, 0.6284, -0.1232]) tensor([0.0429, 0.1994, 0.5148, 0.2428]) -Greedy action tensor([-1.8743, -0.3863, 0.6257, -0.1444]) tensor([0.0430, 0.1905, 0.5240, 0.2426]) -Greedy action tensor([-1.8785, -0.2778, 0.6067, -0.1417]) tensor([0.0423, 0.2097, 0.5078, 0.2402]) -Greedy action tensor([-1.8824, -0.4698, 0.6353, -0.1516]) tensor([0.0432, 0.1774, 0.5356, 0.2438]) -Greedy action tensor([-1.7395, -0.4774, 0.5496, -0.0593]) tensor([0.0506, 0.1787, 0.4992, 0.2715]) -Greedy action tensor([-1.8131, -0.3749, 0.6088, -0.1044]) tensor([0.0454, 0.1915, 0.5121, 0.2510]) -Greedy action tensor([-1.5664, 0.1548, 0.4013, -0.0433]) tensor([0.0546, 0.3050, 0.3902, 0.2502]) -Greedy action tensor([-1.9207, -0.4186, 0.6523, -0.1678]) tensor([0.0410, 0.1843, 0.5378, 0.2368]) -Greedy action tensor([-1.8053, -0.1720, 0.5554, -0.1006]) tensor([0.0450, 0.2305, 0.4770, 0.2475]) -Greedy action tensor([-1.5202, -0.4897, 0.4543, 0.0181]) tensor([0.0639, 0.1789, 0.4599, 0.2973]) -Greedy action tensor([-1.8932, -0.4438, 0.6426, -0.1916]) tensor([0.0428, 0.1823, 0.5403, 0.2346]) -Greedy action tensor([-0.8892, 0.9112, 0.1077, 0.2064]) tensor([0.0784, 0.4746, 0.2125, 0.2345]) -Greedy action tensor([-1.7910, -0.1117, 0.5437, -0.0838]) tensor([0.0450, 0.2415, 0.4651, 0.2483]) -Greedy action tensor([-1.9168, -0.4211, 0.6556, -0.1644]) tensor([0.0411, 0.1834, 0.5384, 0.2371]) -Greedy action tensor([-1.9343, -0.4417, 0.6673, -0.1671]) tensor([0.0403, 0.1795, 0.5440, 0.2362]) -Greedy action tensor([-1.7734, -0.3896, 0.5680, -0.1327]) tensor([0.0487, 0.1942, 0.5060, 0.2511]) -Greedy action tensor([-1.9036, -0.4017, 0.6502, -0.1586]) tensor([0.0415, 0.1865, 0.5341, 0.2379]) -Greedy action tensor([-1.9350, -0.4411, 0.6626, -0.1746]) tensor([0.0405, 0.1803, 0.5438, 0.2354]) -Greedy action tensor([-1.7645, -0.3380, 0.6298, -0.0498]) tensor([0.0461, 0.1921, 0.5056, 0.2562]) -Greedy action tensor([-1.6733, -0.1396, 0.5315, -0.0370]) tensor([0.0504, 0.2336, 0.4571, 0.2589]) -Greedy action tensor([ 1.0616, -0.6775, -0.5708, -0.0505]) tensor([0.5882, 0.1033, 0.1150, 0.1934]) -Greedy action tensor([ 0.6264, 0.0194, -0.2221, 0.0668]) tensor([0.3930, 0.2142, 0.1682, 0.2246]) -Greedy action tensor([ 1.4211, -0.2585, -0.4998, 0.4422]) tensor([0.5853, 0.1091, 0.0857, 0.2199]) -Greedy action tensor([ 1.4229, -0.6891, -0.3637, 0.5171]) tensor([0.5908, 0.0715, 0.0990, 0.2388]) -Greedy action tensor([ 1.7556, -0.3362, -0.4088, -0.0486]) tensor([0.7128, 0.0880, 0.0818, 0.1173]) -Greedy action tensor([ 1.4074, -0.5873, -0.5608, 0.3786]) tensor([0.6123, 0.0833, 0.0855, 0.2189]) -Greedy action tensor([ 1.9255, -1.1177, -0.2017, 0.4945]) tensor([0.7113, 0.0339, 0.0848, 0.1700]) -Greedy action tensor([ 1.3838, -0.6814, -0.2650, 0.5557]) tensor([0.5695, 0.0722, 0.1095, 0.2488]) -Greedy action tensor([ 1.7079, -0.8651, -0.4846, 0.3114]) tensor([0.6967, 0.0532, 0.0778, 0.1724]) -Greedy action tensor([ 1.6233, -0.7887, -0.5602, -0.0185]) tensor([0.7164, 0.0642, 0.0807, 0.1387]) -Greedy action tensor([ 1.5751, -0.4494, -0.4726, 0.1345]) tensor([0.6676, 0.0882, 0.0861, 0.1581]) -Greedy action tensor([ 1.7559, -0.1169, -0.0852, 0.0760]) tensor([0.6672, 0.1025, 0.1059, 0.1244]) -Greedy action tensor([ 1.7912, -0.3199, -0.1938, 0.2412]) tensor([0.6799, 0.0823, 0.0934, 0.1443]) -Greedy action tensor([ 1.6279, -0.3030, -0.4997, 0.2496]) tensor([0.6596, 0.0956, 0.0786, 0.1662]) -Greedy action tensor([ 1.5096, 0.2114, -0.5548, -0.4279]) tensor([0.6477, 0.1768, 0.0822, 0.0933]) -Greedy action tensor([ 1.4682, -0.3365, -0.7025, 0.0426]) tensor([0.6583, 0.1083, 0.0751, 0.1582]) -Greedy action tensor([ 1.3437, 0.1927, -0.3562, 0.0836]) tensor([0.5610, 0.1774, 0.1025, 0.1591]) -Greedy action tensor([ 1.5772, -0.5564, -0.5190, 0.5482]) tensor([0.6255, 0.0741, 0.0769, 0.2235]) -Greedy action tensor([ 1.2210, -0.4776, -0.2889, 0.4035]) tensor([0.5419, 0.0991, 0.1197, 0.2393]) -Greedy action tensor([ 1.1046, -0.1373, -1.1265, 0.2610]) tensor([0.5475, 0.1581, 0.0588, 0.2355]) -Greedy action tensor([ 1.4189, -0.6911, -0.5557, -0.1849]) tensor([0.6844, 0.0830, 0.0950, 0.1376]) -Greedy action tensor([ 1.4590, -0.3839, -0.5749, 0.6945]) tensor([0.5699, 0.0902, 0.0746, 0.2653]) -Greedy action tensor([ 2.8705, 0.8235, -0.0445, -0.1161]) tensor([0.8105, 0.1047, 0.0439, 0.0409]) -Greedy action tensor([ 1.4340, 0.0742, -1.0978, 0.7321]) tensor([0.5459, 0.1401, 0.0434, 0.2706]) -Greedy action tensor([ 1.1184, -0.3460, -0.6265, 0.0199]) tensor([0.5749, 0.1329, 0.1004, 0.1917]) -Greedy action tensor([ 2.2497, -1.4147, -0.1577, 0.6825]) tensor([0.7551, 0.0193, 0.0680, 0.1575]) -Greedy action tensor([ 1.6225, -0.0393, -0.6909, -0.0800]) tensor([0.6798, 0.1290, 0.0672, 0.1239]) -Greedy action tensor([ 1.6856, -0.5858, -0.5434, 0.2784]) tensor([0.6870, 0.0709, 0.0739, 0.1682]) -Greedy action tensor([ 1.3341, -0.5693, -0.6498, 0.0628]) tensor([0.6381, 0.0951, 0.0878, 0.1790]) -Greedy action tensor([ 1.3262, -0.2211, 0.0014, 0.2587]) tensor([0.5487, 0.1168, 0.1459, 0.1887]) -Greedy action tensor([ 1.3184, -0.3454, -1.1074, 0.2040]) tensor([0.6227, 0.1179, 0.0550, 0.2043]) -Greedy action tensor([ 1.4986, -0.7763, -0.3523, 0.4445]) tensor([0.6217, 0.0639, 0.0977, 0.2167]) -Greedy action tensor([ 2.0589, -1.2565, 0.2363, 1.0608]) tensor([0.6384, 0.0232, 0.1032, 0.2353]) -Greedy action tensor([ 1.9587, -0.4455, -0.2891, 0.4421]) tensor([0.7065, 0.0638, 0.0746, 0.1551]) -Greedy action tensor([ 1.5557, -0.6310, -0.4428, 0.2453]) tensor([0.6590, 0.0740, 0.0893, 0.1777]) -Greedy action tensor([ 1.4366, -0.3452, -0.0403, 0.6850]) tensor([0.5352, 0.0901, 0.1222, 0.2524]) -Greedy action tensor([ 2.0149, 0.1735, -0.5492, 0.0732]) tensor([0.7251, 0.1150, 0.0558, 0.1040]) -Greedy action tensor([ 1.0412, -0.2238, -0.9565, 0.2895]) tensor([0.5292, 0.1494, 0.0718, 0.2496]) -Greedy action tensor([ 1.9898, -1.0919, -0.4916, 0.6794]) tensor([0.7147, 0.0328, 0.0598, 0.1928]) -Greedy action tensor([ 0.8948, -0.5147, 0.1974, 0.4461]) tensor([0.4201, 0.1026, 0.2091, 0.2682]) -Greedy action tensor([ 1.6599, 0.2546, -0.3400, -0.0670]) tensor([0.6416, 0.1574, 0.0869, 0.1141]) -Greedy action tensor([ 1.9483, -1.0596, -0.4365, 0.6544]) tensor([0.7064, 0.0349, 0.0651, 0.1937]) -Greedy action tensor([ 2.3066, 0.1618, -0.3009, 0.0122]) tensor([0.7742, 0.0907, 0.0571, 0.0781]) -Greedy action tensor([ 1.3868, -0.3201, -0.5096, 0.1849]) tensor([0.6127, 0.1112, 0.0920, 0.1842]) -Greedy action tensor([ 1.5233, -0.5705, -0.7352, 0.1025]) tensor([0.6806, 0.0839, 0.0711, 0.1644]) -Greedy action tensor([ 1.1043, -0.0413, -0.4847, 0.6415]) tensor([0.4648, 0.1478, 0.0949, 0.2926]) -Greedy action tensor([ 1.6729, -1.0312, -0.5685, 0.1244]) tensor([0.7216, 0.0483, 0.0767, 0.1534]) -Greedy action tensor([ 1.3530, -0.4998, -0.2913, -0.1622]) tensor([0.6371, 0.0999, 0.1231, 0.1400]) -Greedy action tensor([ 1.0780, -0.0486, -0.3905, 0.1867]) tensor([0.5090, 0.1650, 0.1172, 0.2088]) -Greedy action tensor([ 1.4480, -0.4260, -0.1888, 0.2383]) tensor([0.6074, 0.0932, 0.1182, 0.1812]) -Greedy action tensor([ 1.4596, -0.2639, -0.4907, 0.4723]) tensor([0.5906, 0.1054, 0.0840, 0.2200]) -Greedy action tensor([ 1.6695, -0.7543, -0.3534, 0.2497]) tensor([0.6837, 0.0606, 0.0904, 0.1653]) -Greedy action tensor([ 1.7702, -0.8456, -0.5077, 0.3800]) tensor([0.7019, 0.0513, 0.0719, 0.1748]) -Greedy action tensor([ 1.9164, -0.5529, -0.7430, 0.5162]) tensor([0.7137, 0.0604, 0.0500, 0.1760]) -Greedy action tensor([ 1.1753, -0.2587, -1.0293, 0.3249]) tensor([0.5631, 0.1342, 0.0621, 0.2406]) -Greedy action tensor([ 1.2942, -0.1173, -0.8855, 0.3222]) tensor([0.5763, 0.1405, 0.0652, 0.2180]) -Greedy action tensor([ 1.3520, -0.4573, -0.7050, 0.1834]) tensor([0.6241, 0.1022, 0.0798, 0.1940]) -Greedy action tensor([ 1.6779, -0.3129, -0.7224, 0.4692]) tensor([0.6554, 0.0895, 0.0594, 0.1957]) -Greedy action tensor([ 1.4726, -0.7182, -0.9782, 0.5089]) tensor([0.6331, 0.0708, 0.0546, 0.2415]) -Greedy action tensor([ 1.3515, -0.3711, -0.1320, 0.4040]) tensor([0.5577, 0.0996, 0.1265, 0.2162]) -Greedy action tensor([ 1.2690, -0.5235, -0.6660, 0.3243]) tensor([0.5883, 0.0980, 0.0850, 0.2287]) -Greedy action tensor([ 1.9553, -0.7792, -0.8364, 0.5357]) tensor([0.7310, 0.0475, 0.0448, 0.1768]) -Greedy action tensor([ 1.5364, -0.4090, -0.4790, 0.2349]) tensor([0.6459, 0.0923, 0.0861, 0.1758]) -Greedy action tensor([ 1.8883, -0.6393, -0.6438, 0.3004]) tensor([0.7333, 0.0586, 0.0583, 0.1498]) -Greedy action tensor([ 2.0335, -0.8731, -0.4157, 0.4704]) tensor([0.7405, 0.0405, 0.0639, 0.1551]) -Greedy action tensor([ 0.9157, 0.2068, -0.0516, -0.2912]) tensor([0.4605, 0.2267, 0.1751, 0.1378]) -Greedy action tensor([ 1.3975, -0.3533, -0.1524, 0.0349]) tensor([0.6090, 0.1058, 0.1293, 0.1559]) -Greedy action tensor([ 1.6156, -0.7056, -0.5290, 0.2988]) tensor([0.6742, 0.0662, 0.0790, 0.1807]) -Greedy action tensor([ 0.9725, 0.0597, -0.2875, 0.0812]) tensor([0.4773, 0.1916, 0.1354, 0.1958]) -Greedy action tensor([ 1.3334, -0.8682, -0.2922, 0.6239]) tensor([0.5558, 0.0615, 0.1094, 0.2734]) -Greedy action tensor([ 1.9458, 0.7564, -0.0980, 0.2832]) tensor([0.6159, 0.1875, 0.0798, 0.1168]) -Greedy action tensor([ 1.4056, -0.3444, -0.8513, 0.4625]) tensor([0.5996, 0.1042, 0.0628, 0.2335]) -Greedy action tensor([ 1.6027, -0.6073, -0.8850, -0.0134]) tensor([0.7187, 0.0788, 0.0597, 0.1428]) -Greedy action tensor([ 1.5345, -0.4882, -0.3784, 0.2217]) tensor([0.6456, 0.0854, 0.0953, 0.1737]) -Greedy action tensor([ 1.5305, -0.7188, -0.2309, 0.3164]) tensor([0.6352, 0.0670, 0.1091, 0.1887]) -Greedy action tensor([ 2.3311, 0.1701, -0.8784, 0.0907]) tensor([0.7924, 0.0913, 0.0320, 0.0843]) -Greedy action tensor([ 1.2959, -0.4869, -0.6638, -0.0499]) tensor([0.6372, 0.1072, 0.0898, 0.1659]) -Greedy action tensor([ 1.1448, -0.2352, -0.5819, 0.3840]) tensor([0.5272, 0.1326, 0.0938, 0.2464]) -Greedy action tensor([ 1.6743, -0.7640, -0.9545, 0.2793]) tensor([0.7106, 0.0620, 0.0513, 0.1761]) -Greedy action tensor([ 1.7047, -1.0150, -0.3768, 0.7189]) tensor([0.6395, 0.0421, 0.0798, 0.2386]) -Greedy action tensor([ 2.0602, -0.4238, -0.1208, 0.1760]) tensor([0.7417, 0.0619, 0.0838, 0.1127]) -Greedy action tensor([ 0.7768, -0.1225, -0.1856, -0.4802]) tensor([0.4823, 0.1962, 0.1842, 0.1372]) -Greedy action tensor([ 0.5346, -0.1370, -0.0052, -0.0744]) tensor([0.3791, 0.1937, 0.2210, 0.2062]) -Greedy action tensor([ 0.6892, -0.4721, 0.1081, -0.5965]) tensor([0.4654, 0.1457, 0.2603, 0.1287]) -Greedy action tensor([ 0.9524, -0.5492, -0.0116, -0.5011]) tensor([0.5441, 0.1212, 0.2075, 0.1272]) -Greedy action tensor([ 0.6934, 0.1994, -0.0130, 0.1491]) tensor([0.3726, 0.2274, 0.1838, 0.2162]) -Greedy action tensor([ 0.9566, -0.5811, -0.1395, -0.3292]) tensor([0.5478, 0.1177, 0.1831, 0.1514]) -Greedy action tensor([ 0.4319, -0.1441, -0.0038, -0.1364]) tensor([0.3603, 0.2025, 0.2330, 0.2041]) -Greedy action tensor([ 0.8273, 0.2661, -0.0599, 0.2557]) tensor([0.3926, 0.2240, 0.1617, 0.2217]) -Greedy action tensor([ 1.0152, -0.6266, -0.0128, -0.3609]) tensor([0.5543, 0.1073, 0.1983, 0.1400]) -Greedy action tensor([ 0.8840, -0.6414, 0.1966, -0.4476]) tensor([0.5039, 0.1096, 0.2534, 0.1331]) -Greedy action tensor([ 0.7815, -0.7717, 0.0394, -0.3783]) tensor([0.4997, 0.1057, 0.2379, 0.1567]) -Greedy action tensor([ 0.2927, -0.1694, -0.1196, -0.1644]) tensor([0.3419, 0.2154, 0.2264, 0.2164]) -Greedy action tensor([ 0.8586, -0.6968, 0.0583, -0.3128]) tensor([0.5075, 0.1071, 0.2280, 0.1573]) -Greedy action tensor([ 0.7539, -0.5213, 0.0481, -0.2638]) tensor([0.4685, 0.1309, 0.2313, 0.1693]) -Greedy action tensor([ 0.4923, -0.0636, -0.1893, -0.0998]) tensor([0.3799, 0.2179, 0.1921, 0.2101]) -Greedy action tensor([ 0.9344, -0.6940, 0.0992, -0.3906]) tensor([0.5275, 0.1035, 0.2288, 0.1402]) -Greedy action tensor([ 0.4366, -0.0710, -0.0961, -0.1100]) tensor([0.3613, 0.2175, 0.2121, 0.2092]) -Greedy action tensor([ 0.7402, -0.6689, -0.0812, -0.1988]) tensor([0.4819, 0.1178, 0.2119, 0.1884]) -Greedy action tensor([ 0.6841, -0.3732, -0.0204, -0.4460]) tensor([0.4619, 0.1605, 0.2284, 0.1492]) -Greedy action tensor([ 0.3536, 0.0225, 0.0821, -0.0365]) tensor([0.3167, 0.2275, 0.2414, 0.2144]) -Greedy action tensor([ 0.6966, -0.5550, -0.1730, 0.0965]) tensor([0.4437, 0.1269, 0.1859, 0.2435]) -Greedy action tensor([ 0.9441, -0.6208, 0.0541, -0.4021]) tensor([0.5319, 0.1112, 0.2184, 0.1384]) -Greedy action tensor([ 0.4562, -0.3515, -0.2168, -0.5513]) tensor([0.4308, 0.1921, 0.2198, 0.1573]) -Greedy action tensor([ 1.1293, -0.3132, -0.0889, -0.3881]) tensor([0.5710, 0.1349, 0.1689, 0.1252]) -Greedy action tensor([ 0.9129, -0.6667, 0.0590, -0.1874]) tensor([0.5090, 0.1049, 0.2167, 0.1694]) -Greedy action tensor([ 1.0790, -0.6631, -0.2363, -0.4771]) tensor([0.6044, 0.1059, 0.1622, 0.1275]) -Greedy action tensor([ 1.2533, -0.8548, 0.1580, -0.6792]) tensor([0.6247, 0.0759, 0.2089, 0.0905]) -Greedy action tensor([ 0.7751, -0.2042, -0.0091, -0.1910]) tensor([0.4519, 0.1698, 0.2063, 0.1720]) -Greedy action tensor([ 0.3678, -0.0919, -0.1951, -0.0049]) tensor([0.3460, 0.2185, 0.1971, 0.2384]) -Greedy action tensor([ 1.0317, -0.5429, -0.0681, -0.5598]) tensor([0.5735, 0.1188, 0.1909, 0.1168]) -Greedy action tensor([ 0.2901, -0.1550, -0.1149, -0.2380]) tensor([0.3451, 0.2211, 0.2302, 0.2035]) -Greedy action tensor([ 0.3966, -0.0929, -0.0987, -0.0269]) tensor([0.3476, 0.2130, 0.2118, 0.2276]) -Greedy action tensor([ 0.7663, -0.4000, 0.0680, -0.3754]) tensor([0.4699, 0.1464, 0.2337, 0.1500]) -Greedy action tensor([ 0.9402, -1.2329, 0.0113, -0.6593]) tensor([0.5845, 0.0665, 0.2309, 0.1181]) -Greedy action tensor([ 0.9721, -0.6425, 0.1365, -0.5036]) tensor([0.5373, 0.1069, 0.2330, 0.1228]) -Greedy action tensor([ 1.2268, -0.6013, -0.1141, -0.4559]) tensor([0.6218, 0.0999, 0.1627, 0.1156]) -Greedy action tensor([ 0.6978, -0.5699, -0.0512, -0.5676]) tensor([0.4910, 0.1382, 0.2322, 0.1385]) -Greedy action tensor([ 0.6582, -0.3332, -0.1191, -0.0752]) tensor([0.4327, 0.1606, 0.1989, 0.2078]) -Greedy action tensor([ 1.0086, -0.4745, -0.1743, -0.1472]) tensor([0.5411, 0.1228, 0.1658, 0.1703]) -Greedy action tensor([ 0.9656, -0.6748, -0.0852, -0.3441]) tensor([0.5514, 0.1069, 0.1928, 0.1488]) -Greedy action tensor([ 0.9304, -0.5323, -0.0037, -0.3673]) tensor([0.5269, 0.1220, 0.2071, 0.1439]) -Greedy action tensor([ 0.9871, -0.2242, -0.0323, -0.3392]) tensor([0.5197, 0.1548, 0.1875, 0.1380]) -Greedy action tensor([ 0.8618, -0.5922, -0.0707, 0.0418]) tensor([0.4837, 0.1130, 0.1903, 0.2130]) -Greedy action tensor([ 0.5237, 0.0069, 0.0734, -0.2104]) tensor([0.3685, 0.2198, 0.2349, 0.1769]) -Greedy action tensor([ 0.9211, -0.6066, 0.0922, -0.2558]) tensor([0.5097, 0.1106, 0.2225, 0.1571]) -Greedy action tensor([ 1.3679, -0.8505, 0.1144, -0.6775]) tensor([0.6563, 0.0714, 0.1874, 0.0849]) -Greedy action tensor([ 0.7233, -0.1434, 0.0154, -0.0959]) tensor([0.4249, 0.1786, 0.2093, 0.1873]) -Greedy action tensor([ 0.7489, -0.4509, -0.1102, -0.2913]) tensor([0.4812, 0.1450, 0.2038, 0.1700]) -Greedy action tensor([ 0.3663, 0.0451, -0.1266, -0.0104]) tensor([0.3309, 0.2400, 0.2021, 0.2270]) -Greedy action tensor([ 0.9531, -0.5145, 0.0549, -0.3694]) tensor([0.5252, 0.1210, 0.2139, 0.1399]) -Greedy action tensor([ 0.6940, -0.3333, -0.0160, -0.1526]) tensor([0.4389, 0.1571, 0.2158, 0.1882]) -Greedy action tensor([ 1.2767, -0.6217, -0.1824, -0.4576]) tensor([0.6415, 0.0961, 0.1491, 0.1132]) -Greedy action tensor([ 1.0115, -0.5875, -0.0812, -0.5552]) tensor([0.5727, 0.1157, 0.1920, 0.1195]) -Greedy action tensor([ 0.8543, -0.2581, 0.0254, -0.1369]) tensor([0.4681, 0.1539, 0.2043, 0.1737]) -Greedy action tensor([ 0.6540, -0.3449, -0.1160, -0.3784]) tensor([0.4572, 0.1684, 0.2117, 0.1628]) -Greedy action tensor([ 0.5933, -0.6339, 0.1303, -0.7623]) tensor([0.4587, 0.1344, 0.2887, 0.1182]) -Greedy action tensor([ 0.9939, -0.1073, 0.0513, -0.0232]) tensor([0.4799, 0.1595, 0.1870, 0.1736]) -Greedy action tensor([ 1.0404, -0.8497, 0.1346, -0.3701]) tensor([0.5558, 0.0840, 0.2247, 0.1356]) -Greedy action tensor([ 0.9536, -0.7123, 0.0203, -0.2445]) tensor([0.5308, 0.1003, 0.2087, 0.1602]) -Greedy action tensor([ 1.0782, -0.9872, 0.0493, -0.4536]) tensor([0.5881, 0.0746, 0.2102, 0.1271]) -Greedy action tensor([ 0.4197, -0.0121, 0.0084, -0.0055]) tensor([0.3372, 0.2189, 0.2235, 0.2204]) -Greedy action tensor([ 1.0127, -0.6737, -0.0681, -0.4102]) tensor([0.5664, 0.1049, 0.1922, 0.1365]) -Greedy action tensor([ 1.0756, -0.5303, -0.0727, -0.2814]) tensor([0.5633, 0.1131, 0.1787, 0.1450]) -Greedy action tensor([ 0.9787, -0.6542, 0.1214, -0.3798]) tensor([0.5328, 0.1041, 0.2261, 0.1370]) -Greedy action tensor([ 0.9943, -0.5855, -0.0405, -0.2895]) tensor([0.5440, 0.1121, 0.1933, 0.1507]) -Greedy action tensor([ 0.7413, -0.6205, 0.3631, -0.4902]) tensor([0.4478, 0.1147, 0.3068, 0.1307]) -Greedy action tensor([ 0.8855, -0.6174, -0.0668, -0.3529]) tensor([0.5268, 0.1172, 0.2033, 0.1527]) -Greedy action tensor([ 0.8191, -0.4272, -0.1333, -0.6514]) tensor([0.5254, 0.1511, 0.2027, 0.1208]) -Greedy action tensor([ 1.3483, -0.8037, 0.1010, -0.7111]) tensor([0.6531, 0.0759, 0.1876, 0.0833]) -Greedy action tensor([ 0.7692, -0.0688, -0.0335, 0.0046]) tensor([0.4262, 0.1844, 0.1910, 0.1984]) -Greedy action tensor([ 0.7847, -0.4705, -0.0034, -0.3064]) tensor([0.4818, 0.1373, 0.2191, 0.1618]) -Greedy action tensor([ 0.7348, -0.1681, -0.1133, -0.0200]) tensor([0.4341, 0.1760, 0.1859, 0.2041]) -Greedy action tensor([ 0.7641, -0.4900, -0.0078, -0.2288]) tensor([0.4722, 0.1347, 0.2182, 0.1749]) -Greedy action tensor([ 1.3345, -0.8214, 0.1111, -0.7830]) tensor([0.6534, 0.0757, 0.1923, 0.0786]) -Greedy action tensor([ 0.5534, -0.7468, -0.0204, -0.6241]) tensor([0.4664, 0.1271, 0.2628, 0.1437]) -Greedy action tensor([ 0.9774, -0.5519, -0.1520, -0.2276]) tensor([0.5436, 0.1178, 0.1757, 0.1629]) -Greedy action tensor([ 0.9433, -0.9505, 0.0041, -0.2207]) tensor([0.5395, 0.0812, 0.2109, 0.1684]) -Greedy action tensor([ 9.4034e-01, -7.9269e-01, 8.1328e-04, -6.6109e-01]) tensor([0.5652, 0.0999, 0.2209, 0.1140]) -Greedy action tensor([ 1.2782, -0.7972, -0.1682, -0.8163]) tensor([0.6738, 0.0846, 0.1586, 0.0830]) -Greedy action tensor([ 0.9019, -0.6360, 0.2026, -0.7602]) tensor([0.5259, 0.1130, 0.2613, 0.0998]) -Greedy action tensor([ 0.5114, 0.2533, -0.2408, -0.0958]) tensor([0.3586, 0.2770, 0.1690, 0.1954]) -Greedy action tensor([ 0.8368, 0.4042, -0.1971, 0.2052]) tensor([0.3943, 0.2558, 0.1402, 0.2097]) -Greedy action tensor([ 0.4539, -0.0845, -0.2159, 0.1527]) tensor([0.3527, 0.2059, 0.1805, 0.2610]) -Greedy action tensor([-0.5180, -0.1638, 0.0140, -1.1950]) tensor([0.2157, 0.3074, 0.3672, 0.1096]) -Greedy action tensor([-1.2704, -0.4725, -1.1743, -0.4498]) tensor([0.1517, 0.3368, 0.1670, 0.3446]) -Greedy action tensor([-0.4477, 0.4611, 1.2240, -0.8720]) tensor([0.1057, 0.2624, 0.5627, 0.0692]) -Greedy action tensor([ 1.1657, -0.4662, 0.3234, 0.0917]) tensor([0.5082, 0.0994, 0.2189, 0.1736]) -Greedy action tensor([-0.1876, -1.2576, -0.0462, -0.5695]) tensor([0.3147, 0.1080, 0.3625, 0.2148]) -Greedy action tensor([ 1.1400, -0.3631, 0.4696, 0.8086]) tensor([0.4078, 0.0907, 0.2086, 0.2928]) -Greedy action tensor([-1.0691, -1.3575, -0.3889, -0.2140]) tensor([0.1646, 0.1234, 0.3250, 0.3871]) -Greedy action tensor([ 0.0192, -1.0150, 0.8875, -0.4201]) tensor([0.2282, 0.0811, 0.5437, 0.1470]) -Greedy action tensor([-0.2021, -0.8784, -0.1188, -0.4239]) tensor([0.2944, 0.1497, 0.3200, 0.2359]) -Greedy action tensor([ 0.3620, 0.1004, -0.7848, -0.2199]) tensor([0.3779, 0.2909, 0.1200, 0.2112]) -Greedy action tensor([-0.5146, 0.8582, 0.0381, 0.0049]) tensor([0.1195, 0.4718, 0.2077, 0.2010]) -Greedy action tensor([-1.2278, 0.2605, 1.1070, -1.3781]) tensor([0.0602, 0.2666, 0.6215, 0.0518]) -Greedy action tensor([-1.1412, -0.3948, -0.6219, -0.4615]) tensor([0.1479, 0.3119, 0.2485, 0.2918]) -Greedy action tensor([-0.1066, 0.1095, -0.3405, -1.2855]) tensor([0.2994, 0.3716, 0.2369, 0.0921]) -Greedy action tensor([ 0.5474, -0.8595, 1.1849, -1.3020]) tensor([0.3036, 0.0743, 0.5743, 0.0478]) -Greedy action tensor([ 0.6745, 0.0665, -0.0634, -0.5900]) tensor([0.4338, 0.2362, 0.2074, 0.1225]) -Greedy action tensor([-0.7126, -0.2495, 0.9044, -0.9580]) tensor([0.1189, 0.1890, 0.5991, 0.0930]) -Greedy action tensor([-0.7059, -1.3011, 0.6256, -0.6469]) tensor([0.1563, 0.0862, 0.5918, 0.1658]) -Greedy action tensor([-0.4413, 0.1710, -0.1511, -1.0456]) tensor([0.2115, 0.3902, 0.2827, 0.1156]) -Greedy action tensor([-0.3407, 0.2740, -0.1215, -0.6615]) tensor([0.2075, 0.3836, 0.2583, 0.1505]) -Greedy action tensor([ 0.4101, -0.0770, -0.1790, 0.2809]) tensor([0.3281, 0.2016, 0.1820, 0.2883]) -Greedy action tensor([ 0.8756, -0.6036, 0.3118, -0.1261]) tensor([0.4621, 0.1053, 0.2629, 0.1697]) -Greedy action tensor([ 0.0026, -0.3566, 0.1510, 0.3629]) tensor([0.2330, 0.1627, 0.2703, 0.3341]) -Greedy action tensor([-0.6927, -0.6406, -0.7976, 0.7959]) tensor([0.1354, 0.1427, 0.1219, 0.6000]) -Greedy action tensor([ 0.0245, -1.2373, 0.3484, 0.0622]) tensor([0.2700, 0.0764, 0.3732, 0.2804]) -Greedy action tensor([ 0.5295, 0.1399, -0.7560, -0.1669]) tensor([0.4078, 0.2762, 0.1128, 0.2032]) -Greedy action tensor([ 0.0536, -1.0147, -0.2579, 0.7125]) tensor([0.2495, 0.0857, 0.1827, 0.4821]) -Greedy action tensor([-0.6477, -0.3375, 0.2565, -0.0298]) tensor([0.1495, 0.2039, 0.3693, 0.2773]) -Greedy action tensor([-0.7884, -0.2223, -1.1320, -0.0096]) tensor([0.1770, 0.3118, 0.1255, 0.3857]) -Greedy action tensor([ 0.3536, -0.0219, 0.6608, -0.4464]) tensor([0.2861, 0.1965, 0.3889, 0.1285]) -Greedy action tensor([-0.2618, 0.1921, 0.7626, -0.3191]) tensor([0.1586, 0.2497, 0.4418, 0.1498]) -Greedy action tensor([-0.0083, 0.2791, -0.2207, -0.8617]) tensor([0.2803, 0.3736, 0.2267, 0.1194]) -Greedy action tensor([ 0.2547, 0.0098, 0.5025, -0.5666]) tensor([0.2854, 0.2234, 0.3656, 0.1255]) -Greedy action tensor([-0.8757, -0.3386, -0.5755, -0.7761]) tensor([0.1936, 0.3312, 0.2613, 0.2139]) -Greedy action tensor([ 0.8558, -0.9492, 0.4966, 0.6225]) tensor([0.3767, 0.0620, 0.2630, 0.2983]) -Greedy action tensor([-0.0992, 0.0617, -0.1781, -0.6317]) tensor([0.2713, 0.3187, 0.2507, 0.1593]) -Greedy action tensor([ 0.1444, 0.5272, -0.5739, -0.3661]) tensor([0.2814, 0.4126, 0.1372, 0.1689]) -Greedy action tensor([-0.6971, -1.5925, -0.0947, -0.3170]) tensor([0.2129, 0.0869, 0.3888, 0.3113]) -Greedy action tensor([ 0.8401, -1.6396, 0.2653, 0.4792]) tensor([0.4267, 0.0357, 0.2401, 0.2974]) -Greedy action tensor([-0.3898, -0.9484, 0.2130, -0.8387]) tensor([0.2477, 0.1417, 0.4526, 0.1581]) -Greedy action tensor([ 0.3038, -0.0584, 0.8471, -0.4023]) tensor([0.2557, 0.1780, 0.4402, 0.1262]) -Greedy action tensor([-0.6105, -0.5109, -0.2762, -0.5440]) tensor([0.2188, 0.2417, 0.3056, 0.2338]) -Greedy action tensor([ 1.0569, -0.5556, -0.1232, 1.2483]) tensor([0.3680, 0.0734, 0.1131, 0.4456]) -Greedy action tensor([ 0.1089, -0.8131, -0.3322, -0.3186]) tensor([0.3713, 0.1477, 0.2389, 0.2422]) -Greedy action tensor([-0.0788, -0.4895, -0.6572, -0.7197]) tensor([0.3635, 0.2411, 0.2039, 0.1915]) -Greedy action tensor([-0.5888, 0.3237, 0.8583, -0.7941]) tensor([0.1169, 0.2911, 0.4968, 0.0952]) -Greedy action tensor([-0.2471, -0.3675, 0.5789, 0.4038]) tensor([0.1643, 0.1456, 0.3752, 0.3149]) -Greedy action tensor([-0.1801, -0.6528, -0.1077, -0.4215]) tensor([0.2870, 0.1789, 0.3086, 0.2255]) -Greedy action tensor([ 0.8457, -0.0880, 0.2756, 0.3459]) tensor([0.3898, 0.1532, 0.2204, 0.2365]) -Greedy action tensor([ 0.6640, -0.0812, 1.1317, 0.4896]) tensor([0.2557, 0.1214, 0.4082, 0.2148]) -Greedy action tensor([ 0.0443, -0.8224, -0.2939, 0.6004]) tensor([0.2579, 0.1084, 0.1839, 0.4498]) -Greedy action tensor([ 0.4452, -0.1244, -0.2340, -0.9847]) tensor([0.4325, 0.2447, 0.2193, 0.1035]) -Greedy action tensor([-1.5007, -0.4156, -0.8594, -0.0526]) tensor([0.0989, 0.2927, 0.1878, 0.4207]) -Greedy action tensor([-0.8449, -0.5692, -0.1858, -0.1185]) tensor([0.1583, 0.2085, 0.3060, 0.3272]) -Greedy action tensor([ 0.1391, -0.9883, 1.0648, -0.1426]) tensor([0.2173, 0.0704, 0.5484, 0.1639]) -Greedy action tensor([-0.3016, -1.3651, 0.3736, -0.3666]) tensor([0.2355, 0.0813, 0.4626, 0.2207]) -Greedy action tensor([ 1.0995, -0.3929, -0.3388, 0.7925]) tensor([0.4550, 0.1023, 0.1080, 0.3347]) -Greedy action tensor([-0.7878, -0.0272, -1.0965, 0.2695]) tensor([0.1481, 0.3169, 0.1088, 0.4263]) -Greedy action tensor([ 1.1739, -1.0873, 0.0195, 0.9764]) tensor([0.4464, 0.0465, 0.1407, 0.3664]) -Greedy action tensor([ 0.3209, -1.6463, -0.4685, 0.2371]) tensor([0.3978, 0.0556, 0.1807, 0.3659]) -Greedy action tensor([ 0.5910, -0.9234, 0.3381, 0.2665]) tensor([0.3677, 0.0809, 0.2856, 0.2658]) -Greedy action tensor([-0.2798, -0.0744, -0.4010, -0.7304]) tensor([0.2666, 0.3274, 0.2362, 0.1699]) -Greedy action tensor([-0.3194, -0.7469, -0.6946, -0.5684]) tensor([0.3206, 0.2091, 0.2203, 0.2500]) -Greedy action tensor([ 0.1496, -0.3987, -0.6454, 0.0544]) tensor([0.3403, 0.1967, 0.1537, 0.3094]) -Greedy action tensor([ 0.3923, -1.5015, -0.0477, 0.1489]) tensor([0.3878, 0.0584, 0.2498, 0.3040]) -Greedy action tensor([ 0.0218, -1.1264, 0.1332, 0.1382]) tensor([0.2810, 0.0891, 0.3141, 0.3157]) -Greedy action tensor([ 0.6069, -1.1884, -0.2614, -0.8154]) tensor([0.5474, 0.0909, 0.2297, 0.1320]) -Greedy action tensor([ 0.5583, -0.5631, 0.2040, -0.6191]) tensor([0.4282, 0.1395, 0.3004, 0.1319]) -Greedy action tensor([ 1.1660, -1.2235, -0.0950, -0.6123]) tensor([0.6477, 0.0594, 0.1835, 0.1094]) -Greedy action tensor([-0.5187, -0.8507, 0.0048, -0.6017]) tensor([0.2312, 0.1659, 0.3902, 0.2128]) -Greedy action tensor([ 0.2580, 0.2930, -0.6429, 0.8285]) tensor([0.2375, 0.2459, 0.0965, 0.4201]) -Greedy action tensor([ 1.5595, -0.2545, 1.2600, 0.6574]) tensor([0.4329, 0.0706, 0.3209, 0.1756]) -Greedy action tensor([-0.6849, -0.1227, 0.5662, -0.3498]) tensor([0.1308, 0.2294, 0.4569, 0.1828]) -Greedy action tensor([ 1.5509, 0.1009, 0.1476, -0.5078]) tensor([0.6219, 0.1459, 0.1529, 0.0794]) -Greedy action tensor([ 0.0570, -0.0295, -0.0602, -1.2513]) tensor([0.3250, 0.2981, 0.2891, 0.0878]) -Greedy action tensor([-0.9745, -2.3732, 2.2403, -1.2960]) tensor([0.0372, 0.0092, 0.9266, 0.0270]) -Greedy action tensor([ 0.0464, 0.3324, -0.0173, -1.5189]) tensor([0.2875, 0.3827, 0.2697, 0.0601]) -Greedy action tensor([ 0.5058, 0.3724, 0.5914, -0.3818]) tensor([0.2962, 0.2592, 0.3227, 0.1219]) -Greedy action tensor([ 0.3463, -0.8248, 0.3670, 0.5719]) tensor([0.2790, 0.0865, 0.2849, 0.3496]) -Greedy action tensor([-1.2889, 0.0360, 0.4900, -0.5190]) tensor([0.0778, 0.2929, 0.4612, 0.1681]) -Greedy action tensor([-1.9342, -0.4404, 0.6628, -0.1743]) tensor([0.0405, 0.1804, 0.5437, 0.2354]) -Greedy action tensor([-1.8646, -0.4228, 0.6360, -0.1344]) tensor([0.0434, 0.1834, 0.5286, 0.2447]) -Greedy action tensor([-1.4941, -0.2801, 0.4300, -0.0572]) tensor([0.0648, 0.2183, 0.4441, 0.2728]) -Greedy action tensor([-1.9313, -0.4334, 0.6618, -0.1731]) tensor([0.0406, 0.1815, 0.5426, 0.2354]) -Greedy action tensor([-1.7036, -0.3005, 0.6332, -0.0689]) tensor([0.0487, 0.1980, 0.5037, 0.2496]) -Greedy action tensor([-1.9439, -0.4289, 0.6622, -0.1800]) tensor([0.0401, 0.1825, 0.5433, 0.2341]) -Greedy action tensor([-1.5907, -0.2099, 0.4676, -0.0549]) tensor([0.0573, 0.2279, 0.4487, 0.2661]) -Greedy action tensor([-1.7712, -0.3634, 0.6179, -0.0461]) tensor([0.0463, 0.1892, 0.5047, 0.2598]) -Greedy action tensor([-1.1503, -0.3334, 0.6907, 0.3962]) tensor([0.0701, 0.1587, 0.4420, 0.3292]) -Greedy action tensor([-1.9342, -0.3844, 0.6501, -0.1733]) tensor([0.0404, 0.1901, 0.5348, 0.2348]) -Greedy action tensor([-1.9290, -0.4509, 0.6631, -0.1730]) tensor([0.0408, 0.1787, 0.5445, 0.2360]) -Greedy action tensor([-1.9010, -0.3976, 0.6410, -0.1778]) tensor([0.0420, 0.1889, 0.5337, 0.2353]) -Greedy action tensor([-1.9331, -0.4333, 0.6620, -0.1739]) tensor([0.0405, 0.1815, 0.5427, 0.2353]) -Greedy action tensor([-1.7802, 0.1509, 0.5033, -0.0881]) tensor([0.0432, 0.2981, 0.4240, 0.2347]) -Greedy action tensor([-1.8702, -0.4195, 0.6301, -0.1432]) tensor([0.0433, 0.1849, 0.5281, 0.2437]) -Greedy action tensor([-1.8125, 0.0682, 0.5197, -0.1082]) tensor([0.0428, 0.2808, 0.4410, 0.2354]) -Greedy action tensor([-1.7163, -0.0948, 0.5129, -0.0991]) tensor([0.0490, 0.2482, 0.4557, 0.2471]) -Greedy action tensor([-1.9043, -0.4350, 0.6494, -0.1573]) tensor([0.0418, 0.1816, 0.5370, 0.2397]) -Greedy action tensor([-1.9384, -0.4435, 0.6603, -0.1774]) tensor([0.0404, 0.1803, 0.5439, 0.2353]) -Greedy action tensor([-1.7499, -0.4393, 0.5796, -0.0720]) tensor([0.0492, 0.1824, 0.5052, 0.2633]) -Greedy action tensor([-1.7332, -0.4788, 0.6648, 0.1550]) tensor([0.0452, 0.1585, 0.4975, 0.2988]) -Greedy action tensor([-1.9048, -0.4011, 0.6456, -0.1568]) tensor([0.0416, 0.1870, 0.5327, 0.2388]) -Greedy action tensor([-1.6886, -0.1909, 0.5677, -0.0112]) tensor([0.0491, 0.2195, 0.4687, 0.2627]) -Greedy action tensor([-1.7755, -0.3468, 0.5680, -0.1112]) tensor([0.0479, 0.1999, 0.4991, 0.2530]) -Greedy action tensor([-1.0423, 0.8633, 0.1010, 0.3142]) tensor([0.0678, 0.4560, 0.2128, 0.2634]) -Greedy action tensor([-1.7734, -0.3530, 0.6192, -0.0488]) tensor([0.0461, 0.1908, 0.5044, 0.2586]) -Greedy action tensor([-1.9096, -0.4491, 0.6534, -0.1612]) tensor([0.0416, 0.1793, 0.5400, 0.2391]) -Greedy action tensor([-1.7433, -0.5046, 0.5677, -0.1114]) tensor([0.0509, 0.1756, 0.5132, 0.2602]) -Greedy action tensor([-1.8370, -0.4492, 0.6009, -0.1728]) tensor([0.0460, 0.1843, 0.5267, 0.2430]) -Greedy action tensor([-1.8994, -0.4484, 0.6476, -0.1617]) tensor([0.0422, 0.1799, 0.5383, 0.2396]) -Greedy action tensor([-1.8375, -0.3905, 0.5851, -0.1483]) tensor([0.0456, 0.1937, 0.5139, 0.2468]) -Greedy action tensor([-1.1952, 0.3806, 0.3424, -0.1861]) tensor([0.0756, 0.3654, 0.3517, 0.2073]) -Greedy action tensor([-1.8957, -0.4476, 0.6441, -0.1568]) tensor([0.0423, 0.1801, 0.5366, 0.2409]) -Greedy action tensor([-1.9277, -0.4603, 0.6584, -0.1737]) tensor([0.0410, 0.1778, 0.5443, 0.2369]) -Greedy action tensor([-1.8659, -0.4551, 0.6543, -0.0675]) tensor([0.0424, 0.1739, 0.5274, 0.2562]) -Greedy action tensor([-1.7826, -0.4823, 0.6027, -0.0677]) tensor([0.0474, 0.1741, 0.5151, 0.2635]) -Greedy action tensor([-1.8837, -0.3537, 0.6312, -0.1470]) tensor([0.0423, 0.1952, 0.5226, 0.2400]) -Greedy action tensor([-1.8345, -0.3882, 0.6024, -0.1231]) tensor([0.0450, 0.1911, 0.5147, 0.2492]) -Greedy action tensor([-1.7170, -0.3341, 0.6093, -0.0147]) tensor([0.0483, 0.1925, 0.4944, 0.2649]) -Greedy action tensor([-1.8907, -0.3972, 0.6326, -0.1600]) tensor([0.0424, 0.1889, 0.5291, 0.2395]) -Greedy action tensor([-1.9151, -0.4149, 0.6604, -0.1462]) tensor([0.0408, 0.1831, 0.5366, 0.2395]) -Greedy action tensor([-1.6955, -0.3984, 0.5422, -0.1130]) tensor([0.0529, 0.1936, 0.4959, 0.2576]) -Greedy action tensor([-1.8970, -0.4512, 0.6404, -0.1597]) tensor([0.0424, 0.1801, 0.5365, 0.2410]) -Greedy action tensor([-1.9002, -0.1712, 0.6130, -0.1813]) tensor([0.0407, 0.2295, 0.5027, 0.2272]) -Greedy action tensor([-1.0711, 0.8053, 0.1614, 0.0927]) tensor([0.0706, 0.4611, 0.2422, 0.2261]) -Greedy action tensor([-1.9120, -0.3787, 0.6471, -0.1617]) tensor([0.0411, 0.1906, 0.5316, 0.2367]) -Greedy action tensor([-1.9208, -0.4351, 0.6568, -0.1669]) tensor([0.0410, 0.1814, 0.5405, 0.2371]) -Greedy action tensor([-1.3415, -0.5643, 0.3648, 0.1462]) tensor([0.0763, 0.1659, 0.4202, 0.3377]) -Greedy action tensor([-1.9308, -0.4458, 0.6619, -0.1742]) tensor([0.0407, 0.1797, 0.5439, 0.2357]) -Greedy action tensor([-1.8683, -0.3109, 0.6171, -0.1270]) tensor([0.0426, 0.2023, 0.5118, 0.2432]) -Greedy action tensor([-1.8995, -0.3332, 0.6356, -0.1546]) tensor([0.0414, 0.1984, 0.5229, 0.2372]) -Greedy action tensor([-1.7282, -0.4160, 0.6608, 0.1272]) tensor([0.0454, 0.1687, 0.4953, 0.2905]) -Greedy action tensor([-1.8991, -0.4545, 0.6491, -0.1591]) tensor([0.0422, 0.1787, 0.5389, 0.2402]) -Greedy action tensor([-1.9308, -0.3180, 0.6417, -0.1801]) tensor([0.0402, 0.2017, 0.5266, 0.2315]) -Greedy action tensor([-1.4051, -0.4111, 0.3635, 0.1064]) tensor([0.0709, 0.1917, 0.4158, 0.3216]) -Greedy action tensor([-1.9033, -0.4408, 0.6468, -0.1612]) tensor([0.0420, 0.1811, 0.5374, 0.2395]) -Greedy action tensor([-1.7840, -0.3170, 0.6739, -0.3581]) tensor([0.0472, 0.2048, 0.5515, 0.1965]) -Greedy action tensor([-1.8125, -0.5436, 0.7446, -0.0679]) tensor([0.0431, 0.1535, 0.5565, 0.2469]) -Greedy action tensor([-1.8668, -0.4195, 0.6249, -0.1485]) tensor([0.0437, 0.1856, 0.5274, 0.2434]) -Greedy action tensor([-1.8755, -0.4006, 0.6499, -0.1403]) tensor([0.0425, 0.1857, 0.5309, 0.2409]) -Greedy action tensor([-1.5627, -0.0498, 0.4299, -0.0159]) tensor([0.0569, 0.2584, 0.4174, 0.2673]) -Greedy action tensor([-1.7812, -0.3626, 0.5695, -0.1096]) tensor([0.0477, 0.1973, 0.5010, 0.2540]) -Greedy action tensor([-1.8114, -0.4234, 0.6053, -0.1117]) tensor([0.0461, 0.1848, 0.5168, 0.2523]) -Greedy action tensor([-1.8705, -0.4419, 0.6266, -0.1496]) tensor([0.0437, 0.1821, 0.5302, 0.2440]) -Greedy action tensor([-1.9102, -0.4545, 0.6559, -0.1639]) tensor([0.0416, 0.1784, 0.5415, 0.2385]) -Greedy action tensor([-1.7278, 0.2733, 0.4484, -0.0290]) tensor([0.0441, 0.3262, 0.3886, 0.2411]) -Greedy action tensor([-1.8258, -0.4446, 0.6101, -0.1196]) tensor([0.0456, 0.1816, 0.5214, 0.2514]) -Greedy action tensor([-1.9153, -0.4092, 0.6535, -0.1633]) tensor([0.0411, 0.1854, 0.5365, 0.2370]) -Greedy action tensor([-1.8150, -0.3914, 0.6599, -0.0888]) tensor([0.0441, 0.1833, 0.5245, 0.2481]) -Greedy action tensor([-1.9046, -0.4039, 0.6487, -0.1609]) tensor([0.0416, 0.1865, 0.5342, 0.2378]) -Greedy action tensor([-1.6288, 0.1968, 0.4314, 0.0149]) tensor([0.0494, 0.3068, 0.3880, 0.2558]) -Greedy action tensor([-1.8132, -0.0529, 0.5419, -0.1044]) tensor([0.0437, 0.2542, 0.4607, 0.2414]) -Greedy action tensor([-1.2187, -0.2073, 0.3282, 0.2729]) tensor([0.0776, 0.2133, 0.3644, 0.3448]) -Greedy action tensor([-1.5793, -0.2611, 0.6366, 0.0836]) tensor([0.0521, 0.1948, 0.4781, 0.2750]) -Greedy action tensor([-1.8192, -0.3939, 0.5932, -0.1409]) tensor([0.0461, 0.1919, 0.5149, 0.2471]) -Greedy action tensor([-1.9350, -0.4312, 0.6614, -0.1760]) tensor([0.0405, 0.1820, 0.5427, 0.2349]) -Greedy action tensor([-1.6936, -0.3058, 0.5379, -0.1629]) tensor([0.0528, 0.2115, 0.4917, 0.2440]) -Greedy action tensor([-1.0273, 0.4958, 0.2686, -0.1421]) tensor([0.0857, 0.3932, 0.3133, 0.2078]) -Greedy action tensor([-1.8778, -0.3672, 0.4523, -0.1736]) tensor([0.0469, 0.2126, 0.4825, 0.2580]) -Greedy action tensor([-1.6876, -0.1754, 0.5755, -0.0378]) tensor([0.0491, 0.2229, 0.4722, 0.2558]) -Greedy action tensor([ 1.9184, 0.4163, -0.3777, -0.0413]) tensor([0.6830, 0.1521, 0.0687, 0.0962]) -Greedy action tensor([ 1.3770, -0.3659, -0.5139, 0.1696]) tensor([0.6154, 0.1077, 0.0929, 0.1840]) -Greedy action tensor([ 1.0878, -0.5350, -0.8186, 0.2596]) tensor([0.5609, 0.1107, 0.0834, 0.2450]) -Greedy action tensor([ 2.2999, -0.7821, -0.1059, 0.9664]) tensor([0.7145, 0.0328, 0.0644, 0.1883]) -Greedy action tensor([ 1.4526, -0.4587, -0.5585, 0.5479]) tensor([0.5930, 0.0877, 0.0794, 0.2400]) -Greedy action tensor([ 1.1290, -0.2774, -0.8625, 0.6171]) tensor([0.5048, 0.1237, 0.0689, 0.3026]) -Greedy action tensor([ 1.4607, -0.5303, -0.5875, 0.0880]) tensor([0.6584, 0.0899, 0.0849, 0.1668]) -Greedy action tensor([ 1.5692, -0.3623, -0.5269, 0.4012]) tensor([0.6334, 0.0918, 0.0779, 0.1970]) -Greedy action tensor([2.1237, 0.8023, 0.0103, 0.1490]) tensor([0.6551, 0.1748, 0.0792, 0.0909]) -Greedy action tensor([ 1.7091, -0.6005, -0.3966, 0.6564]) tensor([0.6369, 0.0632, 0.0776, 0.2223]) -Greedy action tensor([ 1.2283, -0.2376, -0.3396, 0.1191]) tensor([0.5652, 0.1305, 0.1178, 0.1864]) -Greedy action tensor([ 1.4961, -0.3420, -0.6405, 0.4872]) tensor([0.6091, 0.0969, 0.0719, 0.2221]) -Greedy action tensor([ 1.2903, -0.3076, -0.4323, 0.2549]) tensor([0.5761, 0.1165, 0.1029, 0.2045]) -Greedy action tensor([ 1.6114, -0.4735, -0.3478, 0.4042]) tensor([0.6393, 0.0795, 0.0901, 0.1911]) -Greedy action tensor([ 1.1595, -0.2846, -0.1741, 0.0244]) tensor([0.5492, 0.1296, 0.1447, 0.1765]) -Greedy action tensor([ 2.3259, -0.7577, -0.2775, 0.1997]) tensor([0.8070, 0.0370, 0.0597, 0.0963]) -Greedy action tensor([ 1.3560, -0.4605, -0.3345, 0.0191]) tensor([0.6212, 0.1010, 0.1146, 0.1632]) -Greedy action tensor([ 1.8011, -0.9936, -0.3744, 0.3590]) tensor([0.7087, 0.0433, 0.0805, 0.1676]) -Greedy action tensor([ 1.4991, -0.7451, -0.4919, 0.2963]) tensor([0.6481, 0.0687, 0.0885, 0.1947]) -Greedy action tensor([ 2.2896, -1.0577, 0.0167, 0.0452]) tensor([0.8037, 0.0283, 0.0828, 0.0852]) -Greedy action tensor([ 1.3258, -0.3627, -0.4558, 0.3382]) tensor([0.5795, 0.1071, 0.0976, 0.2159]) -Greedy action tensor([ 1.4388, -0.4182, -0.6100, 0.3012]) tensor([0.6228, 0.0973, 0.0803, 0.1997]) -Greedy action tensor([ 1.4253, -0.8644, -0.4478, -0.0836]) tensor([0.6775, 0.0686, 0.1041, 0.1498]) -Greedy action tensor([ 1.6150, -1.1337, -0.2100, 0.2208]) tensor([0.6788, 0.0434, 0.1094, 0.1683]) -Greedy action tensor([ 1.3015, -0.4750, -0.5686, 0.2856]) tensor([0.5933, 0.1004, 0.0914, 0.2148]) -Greedy action tensor([ 2.1013, -1.2828, 0.0517, 0.5582]) tensor([0.7265, 0.0246, 0.0936, 0.1553]) -Greedy action tensor([ 1.1770, -0.2092, -0.4945, 0.1452]) tensor([0.5573, 0.1393, 0.1047, 0.1986]) -Greedy action tensor([ 1.1715, -0.4922, 0.1447, 0.2257]) tensor([0.5165, 0.0979, 0.1850, 0.2006]) -Greedy action tensor([ 1.6276, -0.5467, -0.5008, 0.6384]) tensor([0.6232, 0.0709, 0.0742, 0.2318]) -Greedy action tensor([ 1.4125, -0.6794, -0.7010, 0.4409]) tensor([0.6162, 0.0761, 0.0745, 0.2332]) -Greedy action tensor([ 1.2423, -0.4026, -0.7613, 0.1762]) tensor([0.5980, 0.1154, 0.0806, 0.2059]) -Greedy action tensor([ 2.0754, -0.8386, -0.7202, 0.8292]) tensor([0.7128, 0.0387, 0.0435, 0.2050]) -Greedy action tensor([ 1.5847, -0.9097, -0.5378, 0.4589]) tensor([0.6550, 0.0541, 0.0784, 0.2125]) -Greedy action tensor([ 1.3941, -0.3998, -0.6734, 0.3736]) tensor([0.6049, 0.1006, 0.0765, 0.2180]) -Greedy action tensor([ 1.4001, -0.3176, -0.1588, 0.6995]) tensor([0.5302, 0.0952, 0.1115, 0.2631]) -Greedy action tensor([1.1696, 0.1764, 0.1530, 0.1401]) tensor([0.4786, 0.1773, 0.1732, 0.1709]) -Greedy action tensor([ 1.7423, -0.8532, -0.7681, 0.1699]) tensor([0.7335, 0.0547, 0.0596, 0.1522]) -Greedy action tensor([ 2.0047, -0.6269, -0.1811, 0.4909]) tensor([0.7120, 0.0512, 0.0800, 0.1567]) -Greedy action tensor([ 1.6281, -0.9464, -0.7163, 0.0219]) tensor([0.7285, 0.0555, 0.0699, 0.1462]) -Greedy action tensor([ 1.6345, -0.5875, -0.4797, 0.0798]) tensor([0.6943, 0.0752, 0.0838, 0.1467]) -Greedy action tensor([ 1.4877, -0.8491, -0.7135, 0.7993]) tensor([0.5849, 0.0565, 0.0647, 0.2939]) -Greedy action tensor([ 1.7578, -1.0086, -0.5934, 0.4763]) tensor([0.6965, 0.0438, 0.0663, 0.1934]) -Greedy action tensor([ 1.5545, -0.4180, -0.5278, 0.1679]) tensor([0.6606, 0.0919, 0.0823, 0.1651]) -Greedy action tensor([ 0.5349, 0.0117, 0.1221, -0.1563]) tensor([0.3629, 0.2151, 0.2402, 0.1818]) -Greedy action tensor([ 1.3810, -0.4144, -0.6133, 0.1577]) tensor([0.6264, 0.1040, 0.0853, 0.1843]) -Greedy action tensor([ 2.0021, -0.7197, 0.0253, 0.3986]) tensor([0.7115, 0.0468, 0.0986, 0.1432]) -Greedy action tensor([ 1.1699, -0.1449, -0.7614, 0.0869]) tensor([0.5708, 0.1533, 0.0827, 0.1932]) -Greedy action tensor([ 1.4549, -0.3310, -0.4692, 0.1151]) tensor([0.6347, 0.1064, 0.0927, 0.1662]) -Greedy action tensor([ 1.1617, -0.7291, -0.3449, 0.3978]) tensor([0.5439, 0.0821, 0.1206, 0.2534]) -Greedy action tensor([ 1.6379, -0.3365, -0.4463, 0.4239]) tensor([0.6409, 0.0890, 0.0797, 0.1903]) -Greedy action tensor([ 2.1017, -0.9271, -0.3677, 0.1377]) tensor([0.7854, 0.0380, 0.0665, 0.1102]) -Greedy action tensor([ 1.6835, -0.2227, -1.1660, 0.2188]) tensor([0.6956, 0.1034, 0.0403, 0.1608]) -Greedy action tensor([ 1.3366, 0.0933, -0.4754, 0.2446]) tensor([0.5595, 0.1614, 0.0914, 0.1877]) -Greedy action tensor([ 1.6698, -0.2125, -0.7215, 0.2771]) tensor([0.6702, 0.1020, 0.0613, 0.1665]) -Greedy action tensor([ 1.9371, -1.3053, -0.1706, 0.0369]) tensor([0.7633, 0.0298, 0.0928, 0.1141]) -Greedy action tensor([ 1.7008, 0.1696, -0.2891, 0.3680]) tensor([0.6185, 0.1338, 0.0846, 0.1631]) -Greedy action tensor([ 1.1907, -0.4658, -0.2524, -0.0844]) tensor([0.5860, 0.1118, 0.1384, 0.1637]) -Greedy action tensor([ 2.1266, -0.9066, -0.1795, 0.4018]) tensor([0.7541, 0.0363, 0.0752, 0.1344]) -Greedy action tensor([ 1.4660, -0.5093, -0.6449, -0.1958]) tensor([0.6898, 0.0957, 0.0836, 0.1309]) -Greedy action tensor([ 1.6664, -0.4671, -0.4619, 0.4717]) tensor([0.6492, 0.0769, 0.0773, 0.1966]) -Greedy action tensor([ 1.9900, -1.2113, -0.5784, 0.3802]) tensor([0.7591, 0.0309, 0.0582, 0.1518]) -Greedy action tensor([ 1.6346, -0.5485, -0.5156, 0.1337]) tensor([0.6887, 0.0776, 0.0802, 0.1535]) -Greedy action tensor([1.8860, 0.6705, 0.1548, 0.0097]) tensor([0.6147, 0.1823, 0.1088, 0.0941]) -Greedy action tensor([ 1.1068, -0.1787, -0.4897, 0.3535]) tensor([0.5128, 0.1418, 0.1039, 0.2414]) -Greedy action tensor([ 0.8105, -0.3619, -0.2418, -0.2740]) tensor([0.5008, 0.1551, 0.1748, 0.1693]) -Greedy action tensor([ 1.5914, -1.1613, -0.1688, 0.1532]) tensor([0.6788, 0.0433, 0.1168, 0.1611]) -Greedy action tensor([ 1.6358, -0.5974, -0.8928, 0.1094]) tensor([0.7121, 0.0763, 0.0568, 0.1548]) -Greedy action tensor([ 1.3047, -0.0214, -0.3506, -0.2699]) tensor([0.6011, 0.1596, 0.1148, 0.1245]) -Greedy action tensor([ 1.8663, 0.0155, -0.7317, 0.9116]) tensor([0.6186, 0.0972, 0.0460, 0.2381]) -Greedy action tensor([ 1.6610, -0.3688, -0.7798, 0.3263]) tensor([0.6749, 0.0887, 0.0588, 0.1777]) -Greedy action tensor([ 1.2149, -0.0107, -0.8610, 0.3851]) tensor([0.5390, 0.1582, 0.0676, 0.2351]) -Greedy action tensor([ 1.7844, -1.0490, -0.2560, -0.0084]) tensor([0.7379, 0.0434, 0.0959, 0.1228]) -Greedy action tensor([ 1.6345, -0.4225, -0.3007, 0.3889]) tensor([0.6410, 0.0819, 0.0926, 0.1845]) -Greedy action tensor([ 1.3461, -0.4691, -0.1685, 0.2815]) tensor([0.5789, 0.0942, 0.1273, 0.1996]) -Greedy action tensor([ 2.0788, -0.1930, -0.3554, 0.2323]) tensor([0.7415, 0.0765, 0.0650, 0.1170]) -Greedy action tensor([ 1.4697, -0.9381, 0.0658, 0.8678]) tensor([0.5309, 0.0478, 0.1304, 0.2908]) -Greedy action tensor([ 1.4050, -0.7266, -0.4813, -0.1380]) tensor([0.6738, 0.0800, 0.1022, 0.1440]) -Greedy action tensor([ 2.0410, 0.3005, -0.2197, 0.5100]) tensor([0.6684, 0.1173, 0.0697, 0.1446]) -Greedy action tensor([ 1.2331, -0.1548, -0.8939, 0.0986]) tensor([0.5916, 0.1477, 0.0705, 0.1903]) -Greedy action tensor([ 1.8995, -1.0888, -0.1272, 0.5173]) tensor([0.6978, 0.0351, 0.0919, 0.1752]) -Greedy action tensor([ 1.6305, -1.0998, -0.5813, -0.1297]) tensor([0.7426, 0.0484, 0.0813, 0.1277]) -Greedy action tensor([ 1.0466, -0.0118, -0.6446, 0.0134]) tensor([0.5299, 0.1839, 0.0977, 0.1886]) -Greedy action tensor([ 0.7046, -0.4596, -0.1292, -0.4839]) tensor([0.4875, 0.1522, 0.2118, 0.1485]) -Greedy action tensor([ 0.5940, -0.3929, -0.2176, -0.1150]) tensor([0.4331, 0.1614, 0.1924, 0.2131]) -Greedy action tensor([ 0.7058, -0.3816, -0.1029, -0.3116]) tensor([0.4664, 0.1572, 0.2078, 0.1686]) -Greedy action tensor([ 0.4553, 0.0662, -0.0490, -0.2787]) tensor([0.3621, 0.2454, 0.2187, 0.1738]) -Greedy action tensor([ 0.3613, -0.1449, -0.0264, -0.1231]) tensor([0.3451, 0.2080, 0.2342, 0.2126]) -Greedy action tensor([ 0.9594, -0.6556, -0.0364, -0.4568]) tensor([0.5522, 0.1098, 0.2040, 0.1340]) -Greedy action tensor([0.6416, 0.0442, 0.0207, 0.0578]) tensor([0.3780, 0.2080, 0.2032, 0.2108]) -Greedy action tensor([ 0.8225, -0.4591, -0.0859, -0.2631]) tensor([0.4954, 0.1375, 0.1997, 0.1673]) -Greedy action tensor([ 0.5582, -0.2621, -0.0095, -0.1162]) tensor([0.3974, 0.1750, 0.2252, 0.2024]) -Greedy action tensor([ 0.3886, -0.0607, -0.0377, -0.7123]) tensor([0.3812, 0.2432, 0.2489, 0.1268]) -Greedy action tensor([ 0.9262, -0.5375, -0.0278, -0.2368]) tensor([0.5184, 0.1199, 0.1997, 0.1620]) -Greedy action tensor([ 0.7646, -0.2287, -0.0258, -0.6277]) tensor([0.4825, 0.1787, 0.2189, 0.1199]) -Greedy action tensor([ 0.7207, -0.2758, -0.2098, -0.3950]) tensor([0.4782, 0.1765, 0.1886, 0.1567]) -Greedy action tensor([ 0.6895, -0.3479, 0.1544, -0.2149]) tensor([0.4265, 0.1511, 0.2497, 0.1726]) -Greedy action tensor([ 8.0399e-01, -2.5809e-01, 7.9833e-04, -4.1424e-01]) tensor([0.4786, 0.1655, 0.2144, 0.1416]) -Greedy action tensor([ 0.6438, -0.6251, -0.2196, -0.3110]) tensor([0.4790, 0.1347, 0.2020, 0.1844]) -Greedy action tensor([ 0.9868, -0.9083, -0.0652, -0.5151]) tensor([0.5806, 0.0873, 0.2028, 0.1293]) -Greedy action tensor([ 1.0354, -0.4998, -0.0219, -0.5170]) tensor([0.5635, 0.1214, 0.1958, 0.1193]) -Greedy action tensor([ 0.3307, 0.1090, -0.1054, 0.0609]) tensor([0.3114, 0.2495, 0.2013, 0.2378]) -Greedy action tensor([ 0.5981, -0.0803, 0.0316, -0.1099]) tensor([0.3895, 0.1976, 0.2210, 0.1919]) -Greedy action tensor([ 0.8252, -0.3806, -0.1610, -0.1471]) tensor([0.4877, 0.1460, 0.1819, 0.1844]) -Greedy action tensor([ 1.1479, -0.2573, -0.0139, -0.0336]) tensor([0.5362, 0.1315, 0.1678, 0.1645]) -Greedy action tensor([ 0.8543, -0.4989, -0.0785, -0.1125]) tensor([0.4921, 0.1272, 0.1936, 0.1871]) -Greedy action tensor([ 1.2743, -0.7025, 0.1043, -0.6615]) tensor([0.6277, 0.0869, 0.1948, 0.0906]) -Greedy action tensor([ 0.8582, -0.6257, -0.0915, -0.3271]) tensor([0.5210, 0.1181, 0.2016, 0.1593]) -Greedy action tensor([0.8759, 0.2423, 0.0638, 0.0139]) tensor([0.4172, 0.2214, 0.1852, 0.1762]) -Greedy action tensor([ 0.9145, -0.9948, 0.1046, -0.6166]) tensor([0.5527, 0.0819, 0.2459, 0.1195]) -Greedy action tensor([ 1.1440, -0.8935, 0.0124, -0.4230]) tensor([0.6018, 0.0785, 0.1941, 0.1256]) -Greedy action tensor([ 1.1668, -1.1440, 0.1270, -0.5445]) tensor([0.6122, 0.0607, 0.2164, 0.1106]) -Greedy action tensor([ 0.8594, -0.4278, -0.1020, -0.5526]) tensor([0.5258, 0.1451, 0.2010, 0.1281]) -Greedy action tensor([ 0.3326, -0.0525, -0.0005, -0.0146]) tensor([0.3222, 0.2192, 0.2309, 0.2277]) -Greedy action tensor([ 1.0208, -0.3701, -0.1338, -0.2666]) tensor([0.5435, 0.1352, 0.1713, 0.1500]) -Greedy action tensor([ 0.9527, -0.0804, 0.0307, 0.0902]) tensor([0.4596, 0.1636, 0.1828, 0.1940]) -Greedy action tensor([ 1.0087, -0.2202, 0.1394, -0.2139]) tensor([0.4984, 0.1458, 0.2090, 0.1468]) -Greedy action tensor([ 0.3544, -0.3002, -0.0715, -0.2300]) tensor([0.3663, 0.1903, 0.2392, 0.2042]) -Greedy action tensor([ 0.3600, -0.2469, -0.1243, -0.0764]) tensor([0.3562, 0.1941, 0.2195, 0.2302]) -Greedy action tensor([ 0.5339, -0.4685, -0.0107, -0.2854]) tensor([0.4188, 0.1537, 0.2429, 0.1846]) -Greedy action tensor([ 0.6721, -0.0157, -0.0716, -0.0365]) tensor([0.4048, 0.2035, 0.1924, 0.1993]) -Greedy action tensor([ 0.5488, 0.1983, -0.1905, 0.0675]) tensor([0.3572, 0.2516, 0.1705, 0.2207]) -Greedy action tensor([ 0.6195, -0.6834, 0.1116, -0.5783]) tensor([0.4597, 0.1249, 0.2766, 0.1388]) -Greedy action tensor([ 0.9598, -0.3473, -0.1985, -0.4940]) tensor([0.5500, 0.1488, 0.1727, 0.1285]) -Greedy action tensor([ 0.9787, -0.4242, -0.2238, -0.5183]) tensor([0.5649, 0.1389, 0.1697, 0.1264]) -Greedy action tensor([ 0.8833, -0.6677, -0.0896, -0.3787]) tensor([0.5339, 0.1132, 0.2018, 0.1511]) -Greedy action tensor([ 1.1100, -0.6305, -0.0161, -0.3429]) tensor([0.5768, 0.1012, 0.1871, 0.1349]) -Greedy action tensor([ 0.7780, -0.2918, -0.0579, -0.2074]) tensor([0.4651, 0.1596, 0.2016, 0.1736]) -Greedy action tensor([ 0.3269, 0.3725, -0.0960, -0.1257]) tensor([0.2996, 0.3136, 0.1963, 0.1905]) -Greedy action tensor([ 0.6599, -0.1783, 0.0222, -0.2611]) tensor([0.4239, 0.1833, 0.2240, 0.1688]) -Greedy action tensor([ 0.4521, -0.0183, -0.1180, 0.0077]) tensor([0.3532, 0.2206, 0.1997, 0.2265]) -Greedy action tensor([ 0.4651, -0.2048, 0.0547, -0.1730]) tensor([0.3699, 0.1893, 0.2454, 0.1954]) -Greedy action tensor([ 0.2731, 0.3538, -0.1332, -0.0394]) tensor([0.2872, 0.3114, 0.1913, 0.2101]) -Greedy action tensor([ 0.7713, -0.1209, 0.1593, -0.0485]) tensor([0.4180, 0.1713, 0.2266, 0.1841]) -Greedy action tensor([ 0.6826, 0.1187, -0.0515, 0.0508]) tensor([0.3875, 0.2205, 0.1860, 0.2060]) -Greedy action tensor([ 0.9176, -0.4189, -0.1808, -0.2254]) tensor([0.5222, 0.1372, 0.1741, 0.1665]) -Greedy action tensor([ 0.8225, -0.3188, 0.0049, -0.1495]) tensor([0.4675, 0.1493, 0.2064, 0.1769]) -Greedy action tensor([ 0.6510, -0.3533, -0.1769, -0.2049]) tensor([0.4488, 0.1644, 0.1961, 0.1907]) -Greedy action tensor([ 0.7303, -0.4612, 0.1685, -0.4833]) tensor([0.4606, 0.1399, 0.2626, 0.1369]) -Greedy action tensor([ 0.0753, 0.1835, -0.0007, -0.6293]) tensor([0.2829, 0.3152, 0.2621, 0.1398]) -Greedy action tensor([ 1.2302, -0.6944, -0.1005, -0.4617]) tensor([0.6272, 0.0915, 0.1658, 0.1155]) -Greedy action tensor([ 0.1400, 0.3426, -0.0835, -0.2690]) tensor([0.2711, 0.3320, 0.2168, 0.1801]) -Greedy action tensor([ 0.7134, -0.5047, 0.0704, -0.4535]) tensor([0.4689, 0.1387, 0.2465, 0.1460]) -Greedy action tensor([ 0.5646, -0.0274, -0.0205, 0.0715]) tensor([0.3675, 0.2033, 0.2047, 0.2245]) -Greedy action tensor([0.5164, 0.0239, 0.0146, 0.0249]) tensor([0.3536, 0.2161, 0.2141, 0.2163]) -Greedy action tensor([ 0.8429, -0.4771, -0.1289, -0.3449]) tensor([0.5127, 0.1370, 0.1940, 0.1563]) -Greedy action tensor([ 0.6919, -0.5385, -0.0312, -0.2238]) tensor([0.4592, 0.1342, 0.2228, 0.1838]) -Greedy action tensor([ 0.6839, 0.0984, -0.1719, -0.2133]) tensor([0.4185, 0.2330, 0.1778, 0.1706]) -Greedy action tensor([ 0.5506, -0.5293, -0.0584, -0.3503]) tensor([0.4367, 0.1483, 0.2375, 0.1774]) -Greedy action tensor([ 0.8968, -0.7563, -0.0014, -0.4911]) tensor([0.5410, 0.1036, 0.2204, 0.1350]) -Greedy action tensor([ 0.7817, -0.4911, 0.0350, -0.5818]) tensor([0.4976, 0.1393, 0.2358, 0.1273]) -Greedy action tensor([ 1.0213, -0.7010, -0.0789, -0.4611]) tensor([0.5752, 0.1028, 0.1914, 0.1306]) -Greedy action tensor([ 0.8435, -0.7144, -0.1557, -0.6665]) tensor([0.5557, 0.1170, 0.2046, 0.1227]) -Greedy action tensor([ 0.8845, -0.5961, -0.0598, -0.1910]) tensor([0.5108, 0.1162, 0.1987, 0.1743]) -Greedy action tensor([ 0.9895, -0.6123, 0.0183, -0.2462]) tensor([0.5345, 0.1077, 0.2024, 0.1554]) -Greedy action tensor([ 0.4908, 0.1140, -0.0042, 0.0080]) tensor([0.3433, 0.2355, 0.2093, 0.2119]) -Greedy action tensor([ 0.8146, -0.1613, -0.0162, -0.3636]) tensor([0.4716, 0.1777, 0.2055, 0.1452]) -Greedy action tensor([ 1.3483, -1.1398, 0.1210, -0.7566]) tensor([0.6676, 0.0555, 0.1956, 0.0813]) -Greedy action tensor([ 0.7970, -0.3955, -0.1338, -0.2008]) tensor([0.4839, 0.1469, 0.1908, 0.1784]) -Greedy action tensor([ 0.5141, -0.2267, 0.0047, -0.0771]) tensor([0.3800, 0.1812, 0.2284, 0.2104]) -Greedy action tensor([ 0.6539, 0.0247, -0.1187, 0.0423]) tensor([0.3941, 0.2101, 0.1820, 0.2138]) -Greedy action tensor([ 0.7719, -0.3733, -0.1680, -0.0780]) tensor([0.4681, 0.1489, 0.1829, 0.2001]) -Greedy action tensor([ 0.4597, -0.1266, -0.0731, -0.0945]) tensor([0.3679, 0.2047, 0.2160, 0.2114]) -Greedy action tensor([ 0.5834, -0.2645, -0.0038, -0.6247]) tensor([0.4380, 0.1876, 0.2435, 0.1309]) -Greedy action tensor([-0.0920, -0.9515, 0.1256, -0.5431]) tensor([0.3027, 0.1282, 0.3763, 0.1928]) -Greedy action tensor([-0.9923, -0.8007, 0.5664, -1.9776]) tensor([0.1363, 0.1651, 0.6477, 0.0509]) -Greedy action tensor([ 0.2443, -1.4657, 0.9954, 0.2419]) tensor([0.2327, 0.0421, 0.4931, 0.2321]) -Greedy action tensor([-0.4928, -0.7551, 0.5604, -0.3933]) tensor([0.1742, 0.1340, 0.4994, 0.1924]) -Greedy action tensor([ 0.4176, -0.6165, -0.7537, -0.8632]) tensor([0.5146, 0.1830, 0.1595, 0.1430]) -Greedy action tensor([ 0.0561, -0.0212, -0.1313, 0.0007]) tensor([0.2702, 0.2501, 0.2240, 0.2556]) -Greedy action tensor([ 0.0770, 0.2127, -0.1277, -0.2364]) tensor([0.2709, 0.3103, 0.2208, 0.1980]) -Greedy action tensor([ 0.0463, 0.3810, -0.3266, -0.6980]) tensor([0.2808, 0.3924, 0.1934, 0.1334]) -Greedy action tensor([ 0.0307, -1.3452, 0.4656, -1.4692]) tensor([0.3311, 0.0836, 0.5114, 0.0739]) -Greedy action tensor([ 0.0107, -0.9753, 0.9332, -0.5773]) tensor([0.2250, 0.0839, 0.5661, 0.1250]) -Greedy action tensor([-0.7782, -1.1161, 0.7029, -0.7858]) tensor([0.1408, 0.1004, 0.6191, 0.1397]) -Greedy action tensor([ 0.3620, -1.5722, 0.9668, -0.4911]) tensor([0.2940, 0.0425, 0.5383, 0.1253]) -Greedy action tensor([ 0.3095, -0.5496, 0.3443, -0.0790]) tensor([0.3188, 0.1350, 0.3301, 0.2162]) -Greedy action tensor([-1.4355, -0.0559, 0.3031, -0.3381]) tensor([0.0732, 0.2909, 0.4165, 0.2194]) -Greedy action tensor([-0.2854, -0.4705, -0.8595, -0.8859]) tensor([0.3398, 0.2824, 0.1914, 0.1864]) -Greedy action tensor([-0.0288, -1.4423, 0.0279, 0.2020]) tensor([0.2808, 0.0683, 0.2972, 0.3537]) -Greedy action tensor([ 0.5352, -0.5106, -1.1063, 0.9650]) tensor([0.3245, 0.1140, 0.0628, 0.4987]) -Greedy action tensor([ 0.8342, -0.5299, -0.7085, 0.8600]) tensor([0.4007, 0.1024, 0.0857, 0.4112]) -Greedy action tensor([ 1.1139, -0.0193, 0.6599, -0.0342]) tensor([0.4397, 0.1416, 0.2792, 0.1395]) -Greedy action tensor([-0.5466, -0.6149, -1.2500, -0.4505]) tensor([0.2833, 0.2646, 0.1402, 0.3119]) -Greedy action tensor([-0.7031, -0.0955, 0.8371, -1.1502]) tensor([0.1228, 0.2255, 0.5731, 0.0786]) -Greedy action tensor([-0.9759, -0.8316, -0.5524, 1.0745]) tensor([0.0873, 0.1009, 0.1333, 0.6785]) -Greedy action tensor([-0.3176, -0.1122, -0.3769, 1.0028]) tensor([0.1446, 0.1776, 0.1363, 0.5415]) -Greedy action tensor([-0.3077, -1.2845, -0.6099, -0.0746]) tensor([0.2960, 0.1115, 0.2188, 0.3737]) -Greedy action tensor([ 0.1583, -0.5049, 1.0021, -0.4444]) tensor([0.2279, 0.1174, 0.5299, 0.1247]) -Greedy action tensor([-0.9221, -0.4757, 0.3283, -0.3928]) tensor([0.1290, 0.2016, 0.4504, 0.2190]) -Greedy action tensor([ 0.0096, 0.0226, -0.2739, 0.3391]) tensor([0.2406, 0.2437, 0.1812, 0.3345]) -Greedy action tensor([-0.1794, 0.0112, 0.2126, -0.5592]) tensor([0.2286, 0.2766, 0.3383, 0.1564]) -Greedy action tensor([ 0.2351, -1.6102, -0.2033, 0.3909]) tensor([0.3365, 0.0532, 0.2171, 0.3932]) -Greedy action tensor([ 1.6359, 0.2996, -0.0029, 0.5421]) tensor([0.5581, 0.1467, 0.1084, 0.1869]) -Greedy action tensor([-0.5275, -1.3168, -0.3753, 0.1562]) tensor([0.2174, 0.0987, 0.2532, 0.4307]) -Greedy action tensor([ 0.9588, -0.7887, 1.1816, 1.0454]) tensor([0.2846, 0.0496, 0.3556, 0.3103]) -Greedy action tensor([ 0.5928, 0.7970, 0.6243, -0.5077]) tensor([0.2785, 0.3415, 0.2874, 0.0926]) -Greedy action tensor([-0.0741, -1.2307, 0.5080, -0.3008]) tensor([0.2563, 0.0806, 0.4587, 0.2043]) -Greedy action tensor([-0.9730, -1.1749, 0.1133, -0.0565]) tensor([0.1373, 0.1122, 0.4070, 0.3434]) -Greedy action tensor([-0.5507, -0.0967, 0.7666, -0.5818]) tensor([0.1374, 0.2164, 0.5130, 0.1332]) -Greedy action tensor([-0.7825, -0.5609, -0.3120, -0.7895]) tensor([0.2065, 0.2578, 0.3306, 0.2051]) -Greedy action tensor([ 1.0520, -0.5944, 0.6333, -0.6381]) tensor([0.4914, 0.0947, 0.3233, 0.0907]) -Greedy action tensor([ 0.5744, -0.0373, 0.7119, -0.2914]) tensor([0.3215, 0.1744, 0.3689, 0.1353]) -Greedy action tensor([-0.7468, -0.5420, 0.5742, -0.5749]) tensor([0.1396, 0.1714, 0.5232, 0.1658]) -Greedy action tensor([ 1.2858, -0.0143, -0.2335, 0.7738]) tensor([0.4783, 0.1303, 0.1047, 0.2867]) -Greedy action tensor([-0.8524, -0.0883, 0.4381, -0.7389]) tensor([0.1265, 0.2717, 0.4600, 0.1418]) -Greedy action tensor([ 0.2374, 0.2766, 0.5081, -0.4095]) tensor([0.2581, 0.2684, 0.3383, 0.1352]) -Greedy action tensor([-0.8748, 0.3846, -0.6872, -0.4758]) tensor([0.1385, 0.4880, 0.1671, 0.2064]) -Greedy action tensor([-0.3394, -0.3034, 0.3088, -1.0342]) tensor([0.2248, 0.2331, 0.4299, 0.1122]) -Greedy action tensor([-0.4770, -1.2609, 0.7265, -0.4133]) tensor([0.1708, 0.0780, 0.5691, 0.1821]) -Greedy action tensor([-0.0769, -0.9552, 0.3989, -1.6878]) tensor([0.3101, 0.1289, 0.4991, 0.0619]) -Greedy action tensor([ 0.4990, 0.2027, 0.7956, -0.4247]) tensor([0.2869, 0.2133, 0.3859, 0.1139]) -Greedy action tensor([ 0.1784, 0.2092, -0.4086, -0.8909]) tensor([0.3412, 0.3519, 0.1897, 0.1171]) -Greedy action tensor([-0.9377, -0.3853, -0.0254, -0.3253]) tensor([0.1414, 0.2457, 0.3521, 0.2608]) -Greedy action tensor([ 1.0736, -1.1315, -0.0906, -0.0986]) tensor([0.5773, 0.0636, 0.1802, 0.1788]) -Greedy action tensor([-0.5196, -0.1704, -0.1540, -0.7899]) tensor([0.2163, 0.3067, 0.3118, 0.1651]) -Greedy action tensor([-0.3650, -1.0136, 0.3934, -0.2629]) tensor([0.2099, 0.1097, 0.4480, 0.2324]) -Greedy action tensor([-0.5552, -0.0171, -0.2265, 0.1198]) tensor([0.1649, 0.2824, 0.2290, 0.3238]) -Greedy action tensor([-0.2413, -0.6576, 0.6103, -0.7508]) tensor([0.2172, 0.1433, 0.5090, 0.1305]) -Greedy action tensor([ 0.8450, -1.3662, 0.2352, 0.0119]) tensor([0.4790, 0.0525, 0.2603, 0.2082]) -Greedy action tensor([-0.0751, -0.0537, 0.2232, -0.6869]) tensor([0.2556, 0.2612, 0.3445, 0.1387]) -Greedy action tensor([ 1.2080, 0.3721, -0.1417, -0.0849]) tensor([0.5083, 0.2203, 0.1318, 0.1395]) -Greedy action tensor([-0.5107, -0.7503, 0.3046, -0.2701]) tensor([0.1880, 0.1480, 0.4249, 0.2392]) -Greedy action tensor([ 1.0089, -0.1687, 0.0820, 0.6779]) tensor([0.4129, 0.1272, 0.1634, 0.2965]) -Greedy action tensor([ 0.2376, -0.8197, 0.0326, 0.4422]) tensor([0.2951, 0.1025, 0.2404, 0.3621]) -Greedy action tensor([0.8915, 0.1340, 0.2393, 0.1736]) tensor([0.4036, 0.1892, 0.2102, 0.1969]) -Greedy action tensor([1.2353, 0.0731, 0.7276, 0.9034]) tensor([0.3799, 0.1188, 0.2286, 0.2726]) -Greedy action tensor([-0.2346, 0.2746, -0.3385, 0.8455]) tensor([0.1536, 0.2556, 0.1384, 0.4524]) -Greedy action tensor([ 1.0287, -0.1212, 0.0158, 0.1010]) tensor([0.4819, 0.1526, 0.1750, 0.1906]) -Greedy action tensor([-0.1151, -0.4362, -0.0827, -0.2537]) tensor([0.2756, 0.1999, 0.2846, 0.2399]) -Greedy action tensor([-0.0722, 0.4532, -0.6476, -0.9836]) tensor([0.2736, 0.4626, 0.1539, 0.1100]) -Greedy action tensor([-0.3419, -0.4474, 0.1708, -0.7473]) tensor([0.2361, 0.2124, 0.3941, 0.1574]) -Greedy action tensor([-0.6915, -0.3551, -0.5043, -0.7376]) tensor([0.2193, 0.3070, 0.2644, 0.2094]) -Greedy action tensor([-0.2114, -0.3623, 0.0344, -0.8025]) tensor([0.2708, 0.2329, 0.3463, 0.1500]) -Greedy action tensor([-0.1817, -1.2262, -0.3067, 0.4343]) tensor([0.2447, 0.0861, 0.2160, 0.4531]) -Greedy action tensor([ 0.0815, -1.0000, -0.0256, 0.0829]) tensor([0.3088, 0.1047, 0.2774, 0.3092]) -Greedy action tensor([ 0.6015, -0.1644, 0.5692, 0.3079]) tensor([0.3146, 0.1463, 0.3046, 0.2346]) -Greedy action tensor([ 0.1697, -0.8763, 0.4890, -0.6082]) tensor([0.3138, 0.1102, 0.4318, 0.1441]) -Greedy action tensor([ 0.7929, -0.4473, 0.1206, -0.6783]) tensor([0.4927, 0.1426, 0.2516, 0.1132]) -Greedy action tensor([ 0.2917, 0.1112, -0.6462, -0.7115]) tensor([0.3856, 0.3220, 0.1510, 0.1414]) -Greedy action tensor([-0.2951, 0.0113, -0.1013, -0.1041]) tensor([0.2091, 0.2840, 0.2538, 0.2531]) -Greedy action tensor([ 0.7546, -0.8228, 1.0199, 0.6257]) tensor([0.2950, 0.0609, 0.3847, 0.2594]) -Greedy action tensor([ 0.3845, -0.2338, -0.3083, 0.8514]) tensor([0.2752, 0.1483, 0.1376, 0.4389]) -Greedy action tensor([-0.6346, -0.1988, -0.2940, -1.2313]) tensor([0.2221, 0.3434, 0.3122, 0.1223]) -Greedy action tensor([-0.8542, -1.9386, 1.4775, -0.5831]) tensor([0.0772, 0.0261, 0.7953, 0.1013]) -Greedy action tensor([-0.2596, -0.8335, 0.0812, -0.9940]) tensor([0.2899, 0.1633, 0.4077, 0.1391]) -Greedy action tensor([ 2.3252, -1.1961, -0.1056, 0.3079]) tensor([0.7996, 0.0236, 0.0703, 0.1064]) -Greedy action tensor([ 1.0420, -0.5662, -0.4091, 0.5291]) tensor([0.4918, 0.0985, 0.1152, 0.2945]) -Greedy action tensor([ 1.3201, -0.6115, -0.1439, 0.1092]) tensor([0.5973, 0.0866, 0.1382, 0.1780]) -Greedy action tensor([ 1.4129, 0.1475, -0.1532, -0.2514]) tensor([0.5951, 0.1679, 0.1243, 0.1127]) -Greedy action tensor([ 1.3104, -0.2960, -0.4690, 0.3805]) tensor([0.5669, 0.1137, 0.0957, 0.2237]) -Greedy action tensor([ 1.1010, -0.6014, -0.4885, 0.4249]) tensor([0.5277, 0.0962, 0.1077, 0.2684]) -Greedy action tensor([ 1.1794, -0.0708, -1.6240, 0.2419]) tensor([0.5752, 0.1648, 0.0349, 0.2252]) -Greedy action tensor([ 1.3835, -0.7244, -0.1831, 0.3866]) tensor([0.5885, 0.0715, 0.1228, 0.2172]) -Greedy action tensor([ 1.0657, -0.2390, -0.5343, 0.1060]) tensor([0.5387, 0.1461, 0.1088, 0.2063]) -Greedy action tensor([ 0.9779, -0.2297, -1.0421, 0.3235]) tensor([0.5125, 0.1532, 0.0680, 0.2664]) -Greedy action tensor([ 1.1906, -0.5859, -0.2908, 0.6572]) tensor([0.5043, 0.0853, 0.1146, 0.2958]) -Greedy action tensor([ 1.9640, 0.4404, -0.1639, -0.0979]) tensor([0.6829, 0.1488, 0.0813, 0.0869]) -Greedy action tensor([ 1.6506, -0.3247, -0.5567, -0.0518]) tensor([0.6988, 0.0969, 0.0769, 0.1274]) -Greedy action tensor([ 1.3961, -0.0369, -0.7366, 0.0280]) tensor([0.6205, 0.1480, 0.0735, 0.1580]) -Greedy action tensor([ 1.6814, -0.3755, -0.9668, 0.0923]) tensor([0.7129, 0.0911, 0.0505, 0.1455]) -Greedy action tensor([ 1.4495, -0.8591, -0.3641, 0.5273]) tensor([0.6024, 0.0599, 0.0982, 0.2395]) -Greedy action tensor([ 1.4761, -0.2577, -0.1735, 0.3773]) tensor([0.5875, 0.1038, 0.1129, 0.1958]) -Greedy action tensor([ 1.2938, -0.4279, -0.7899, 0.4360]) tensor([0.5789, 0.1035, 0.0721, 0.2455]) -Greedy action tensor([ 1.0229, -0.3429, 0.0835, 0.2252]) tensor([0.4770, 0.1217, 0.1864, 0.2148]) -Greedy action tensor([ 1.6108, -1.0020, -0.0188, 0.4289]) tensor([0.6345, 0.0465, 0.1244, 0.1946]) -Greedy action tensor([ 2.1200, -1.0520, -0.4602, 0.4496]) tensor([0.7658, 0.0321, 0.0580, 0.1441]) -Greedy action tensor([ 1.4881, -0.4743, -0.5990, 0.4629]) tensor([0.6160, 0.0866, 0.0764, 0.2210]) -Greedy action tensor([ 1.5031, 0.0525, 0.1973, -0.3465]) tensor([0.6014, 0.1410, 0.1630, 0.0946]) -Greedy action tensor([ 1.0491, -0.6321, -0.4144, 0.0818]) tensor([0.5563, 0.1035, 0.1287, 0.2114]) -Greedy action tensor([ 1.8786, -0.8410, -0.3835, 0.3750]) tensor([0.7182, 0.0473, 0.0748, 0.1597]) -Greedy action tensor([ 2.3776, -0.9178, -0.5777, 0.5852]) tensor([0.7964, 0.0295, 0.0415, 0.1326]) -Greedy action tensor([ 1.4121, -0.0870, -1.0215, 0.7981]) tensor([0.5399, 0.1206, 0.0474, 0.2922]) -Greedy action tensor([ 0.7990, -0.3728, -0.4850, 0.4122]) tensor([0.4413, 0.1367, 0.1222, 0.2997]) -Greedy action tensor([ 2.0368, -0.0237, 0.3461, 0.1926]) tensor([0.6803, 0.0867, 0.1254, 0.1076]) -Greedy action tensor([ 1.6455, -0.6496, -0.5726, 0.3758]) tensor([0.6709, 0.0676, 0.0730, 0.1885]) -Greedy action tensor([ 1.5922, 0.4743, -0.4029, 0.2172]) tensor([0.5828, 0.1906, 0.0793, 0.1474]) -Greedy action tensor([ 1.7333, -0.5668, -0.7214, 0.0870]) tensor([0.7252, 0.0727, 0.0623, 0.1398]) -Greedy action tensor([ 1.2786, -0.4845, -0.6289, 0.2597]) tensor([0.5949, 0.1020, 0.0883, 0.2147]) -Greedy action tensor([ 0.7599, -0.3359, 0.0325, 0.3516]) tensor([0.4029, 0.1347, 0.1946, 0.2678]) -Greedy action tensor([ 1.4336, -0.3500, -0.5825, 0.0740]) tensor([0.6419, 0.1078, 0.0855, 0.1648]) -Greedy action tensor([ 1.4234, -0.4219, -0.5954, 0.3277]) tensor([0.6153, 0.0972, 0.0817, 0.2057]) -Greedy action tensor([ 1.6136, -0.4758, -0.6443, 0.3004]) tensor([0.6679, 0.0827, 0.0698, 0.1796]) -Greedy action tensor([ 1.4073, -0.5315, -0.3453, 0.1481]) tensor([0.6246, 0.0899, 0.1083, 0.1773]) -Greedy action tensor([ 2.0187, 0.2174, -0.1641, 0.2456]) tensor([0.6908, 0.1140, 0.0779, 0.1173]) -Greedy action tensor([ 1.4235, -0.3481, -0.5558, 0.2519]) tensor([0.6180, 0.1051, 0.0854, 0.1915]) -Greedy action tensor([ 1.5261, -0.3711, -0.2579, 0.6988]) tensor([0.5698, 0.0855, 0.0957, 0.2491]) -Greedy action tensor([ 2.0643, -0.7058, -0.5386, 0.1345]) tensor([0.7801, 0.0489, 0.0578, 0.1133]) -Greedy action tensor([ 1.0780, -0.3726, -0.5681, 0.3543]) tensor([0.5230, 0.1226, 0.1008, 0.2536]) -Greedy action tensor([ 1.6744, -0.6763, -0.4925, 0.0605]) tensor([0.7097, 0.0676, 0.0813, 0.1413]) -Greedy action tensor([ 1.4155, -0.6204, -0.6406, 0.6512]) tensor([0.5800, 0.0757, 0.0742, 0.2701]) -Greedy action tensor([ 1.3575, -0.1007, 0.0805, 0.1085]) tensor([0.5561, 0.1294, 0.1551, 0.1595]) -Greedy action tensor([ 1.3267, 0.3293, -1.2762, 0.1706]) tensor([0.5689, 0.2099, 0.0421, 0.1791]) -Greedy action tensor([ 1.6387, -0.7808, -0.7065, 0.1589]) tensor([0.7080, 0.0630, 0.0678, 0.1612]) -Greedy action tensor([ 1.6519, -0.4585, -0.1702, 0.1151]) tensor([0.6676, 0.0809, 0.1079, 0.1436]) -Greedy action tensor([ 1.4690, -0.6920, -0.8043, 0.2745]) tensor([0.6574, 0.0757, 0.0677, 0.1991]) -Greedy action tensor([ 1.4732, -0.3522, -0.1937, 0.2089]) tensor([0.6126, 0.0987, 0.1157, 0.1730]) -Greedy action tensor([ 1.5924, -0.7818, -0.1639, -0.0570]) tensor([0.6859, 0.0639, 0.1184, 0.1318]) -Greedy action tensor([ 2.0970, -0.6251, -0.2168, 0.9059]) tensor([0.6810, 0.0448, 0.0673, 0.2069]) -Greedy action tensor([ 2.4423, -1.7410, 0.0516, 0.6072]) tensor([0.7896, 0.0120, 0.0723, 0.1260]) -Greedy action tensor([ 1.6924, -0.4158, -0.7263, 0.2949]) tensor([0.6860, 0.0833, 0.0611, 0.1696]) -Greedy action tensor([ 1.3086, -0.2693, -0.4772, 0.2270]) tensor([0.5837, 0.1205, 0.0979, 0.1979]) -Greedy action tensor([ 1.7402, -0.0787, -0.3458, 0.0329]) tensor([0.6813, 0.1105, 0.0846, 0.1236]) -Greedy action tensor([ 1.1407, -0.4428, -0.5523, 0.2226]) tensor([0.5591, 0.1148, 0.1029, 0.2233]) -Greedy action tensor([ 1.6183, -0.5424, -0.5492, 0.4350]) tensor([0.6511, 0.0750, 0.0745, 0.1994]) -Greedy action tensor([ 1.1951, -0.6176, -0.8354, 0.4273]) tensor([0.5687, 0.0928, 0.0746, 0.2639]) -Greedy action tensor([ 1.2075, -0.5574, -0.3461, 0.4409]) tensor([0.5413, 0.0927, 0.1145, 0.2515]) -Greedy action tensor([ 1.4588, -0.6363, -0.4280, 0.1790]) tensor([0.6440, 0.0793, 0.0976, 0.1791]) -Greedy action tensor([ 1.1373, -0.4498, -0.4481, -0.2179]) tensor([0.5998, 0.1227, 0.1229, 0.1547]) -Greedy action tensor([ 3.0280, -1.5879, -0.1239, 1.2299]) tensor([0.8208, 0.0081, 0.0351, 0.1359]) -Greedy action tensor([ 0.6465, -0.3346, -0.1998, 0.0848]) tensor([0.4212, 0.1579, 0.1807, 0.2402]) -Greedy action tensor([ 1.2382, -0.5224, -0.6601, 0.2908]) tensor([0.5849, 0.1006, 0.0876, 0.2268]) -Greedy action tensor([ 1.4995, -0.5525, -0.4254, 0.2833]) tensor([0.6366, 0.0818, 0.0929, 0.1887]) -Greedy action tensor([ 1.4111, -0.4387, -0.7529, 0.2096]) tensor([0.6358, 0.1000, 0.0730, 0.1912]) -Greedy action tensor([ 1.3313, -0.8749, -0.1037, 0.5053]) tensor([0.5599, 0.0617, 0.1333, 0.2451]) -Greedy action tensor([ 1.3654, 0.1647, -1.1255, 0.2251]) tensor([0.5870, 0.1767, 0.0486, 0.1877]) -Greedy action tensor([ 1.9745, -1.1387, -0.1782, 0.7463]) tensor([0.6880, 0.0306, 0.0799, 0.2015]) -Greedy action tensor([ 1.6060, -0.8450, -0.2796, 0.1897]) tensor([0.6754, 0.0582, 0.1025, 0.1639]) -Greedy action tensor([ 1.2752, -0.5182, -0.3744, 0.5536]) tensor([0.5421, 0.0902, 0.1042, 0.2635]) -Greedy action tensor([ 1.0656, -0.3376, -0.2788, -0.1040]) tensor([0.5504, 0.1353, 0.1435, 0.1709]) -Greedy action tensor([ 0.9287, -0.3512, -0.4277, 0.5441]) tensor([0.4512, 0.1255, 0.1162, 0.3071]) -Greedy action tensor([ 1.4265, -0.6469, -0.6731, 0.3532]) tensor([0.6289, 0.0791, 0.0770, 0.2150]) -Greedy action tensor([ 1.2764, -0.4944, -0.7424, 0.0117]) tensor([0.6308, 0.1074, 0.0838, 0.1781]) -Greedy action tensor([ 1.3873, -0.2122, 0.0194, -0.0301]) tensor([0.5886, 0.1189, 0.1499, 0.1426]) -Greedy action tensor([ 1.3448, 0.0941, -0.6217, -0.0296]) tensor([0.5955, 0.1705, 0.0833, 0.1507]) -Greedy action tensor([ 1.8394, -0.7300, -0.8293, -0.2978]) tensor([0.7912, 0.0606, 0.0549, 0.0934]) -Greedy action tensor([ 0.9966, -0.4819, -0.0751, 0.1164]) tensor([0.5037, 0.1148, 0.1725, 0.2089]) -Greedy action tensor([-0.4294, 0.8288, -0.0185, 0.1046]) tensor([0.1293, 0.4551, 0.1950, 0.2206]) -Greedy action tensor([-1.8924, -0.3345, 0.6384, -0.1565]) tensor([0.0417, 0.1980, 0.5238, 0.2365]) -Greedy action tensor([-1.9142, -0.4559, 0.6578, -0.1657]) tensor([0.0414, 0.1781, 0.5424, 0.2380]) -Greedy action tensor([-1.8955, -0.4330, 0.6457, -0.1551]) tensor([0.0422, 0.1821, 0.5354, 0.2404]) -Greedy action tensor([-1.8175, -0.3421, 0.6445, -0.0961]) tensor([0.0441, 0.1927, 0.5168, 0.2464]) -Greedy action tensor([-1.8262, -0.3763, 0.6056, -0.1045]) tensor([0.0450, 0.1917, 0.5118, 0.2516]) -Greedy action tensor([-0.2983, 1.0155, 0.0387, -0.0090]) tensor([0.1341, 0.4989, 0.1879, 0.1791]) -Greedy action tensor([-1.4372, -0.6182, 0.4899, 0.1068]) tensor([0.0675, 0.1530, 0.4635, 0.3160]) -Greedy action tensor([-1.2032, 0.3697, 0.2508, -0.0813]) tensor([0.0759, 0.3660, 0.3250, 0.2331]) -Greedy action tensor([-1.8966, -0.4520, 0.6484, -0.1538]) tensor([0.0422, 0.1789, 0.5378, 0.2411]) -Greedy action tensor([-1.9084, -0.4518, 0.6520, -0.1588]) tensor([0.0417, 0.1789, 0.5396, 0.2398]) -Greedy action tensor([-1.6168, 0.3811, 0.4357, -0.1542]) tensor([0.0488, 0.3601, 0.3803, 0.2108]) -Greedy action tensor([-1.5970, -0.3697, 0.6102, 0.1598]) tensor([0.0518, 0.1768, 0.4711, 0.3003]) -Greedy action tensor([ 0.1789, 1.1730, -0.0207, 0.6296]) tensor([0.1642, 0.4437, 0.1345, 0.2577]) -Greedy action tensor([-1.7131, -0.3346, 0.5408, -0.0171]) tensor([0.0501, 0.1990, 0.4775, 0.2733]) -Greedy action tensor([-1.7358, -0.4838, 0.5639, -0.1003]) tensor([0.0510, 0.1784, 0.5087, 0.2618]) -Greedy action tensor([-1.9087, -0.4447, 0.6591, -0.1536]) tensor([0.0414, 0.1791, 0.5400, 0.2396]) -Greedy action tensor([-1.9199, -0.4414, 0.6551, -0.1676]) tensor([0.0412, 0.1806, 0.5407, 0.2375]) -Greedy action tensor([-1.8443, -0.4571, 0.6115, -0.1271]) tensor([0.0450, 0.1801, 0.5244, 0.2505]) -Greedy action tensor([-1.6779, -0.3676, 0.5991, 0.0344]) tensor([0.0500, 0.1854, 0.4875, 0.2771]) -Greedy action tensor([-1.2790, -0.2606, 0.1997, 0.2461]) tensor([0.0784, 0.2171, 0.3441, 0.3604]) -Greedy action tensor([-1.6599, -0.3509, 0.6991, 0.1490]) tensor([0.0468, 0.1731, 0.4947, 0.2854]) -Greedy action tensor([-1.8710, -0.2860, 0.6114, -0.1434]) tensor([0.0426, 0.2078, 0.5099, 0.2397]) -Greedy action tensor([-1.9170, -0.3698, 0.6372, -0.1603]) tensor([0.0411, 0.1929, 0.5281, 0.2379]) -Greedy action tensor([-1.9158, -0.4513, 0.6504, -0.1701]) tensor([0.0415, 0.1797, 0.5407, 0.2380]) -Greedy action tensor([-1.9090, -0.3173, 0.6331, -0.1639]) tensor([0.0411, 0.2018, 0.5219, 0.2352]) -Greedy action tensor([-1.9284, -0.4344, 0.6583, -0.1716]) tensor([0.0408, 0.1816, 0.5415, 0.2362]) -Greedy action tensor([-1.6715, -0.4063, 0.5733, 0.0281]) tensor([0.0514, 0.1822, 0.4852, 0.2813]) -Greedy action tensor([-0.5429, 1.0553, 0.0265, 0.3968]) tensor([0.0974, 0.4814, 0.1721, 0.2492]) -Greedy action tensor([-0.8850, -0.1359, 0.2263, -0.1157]) tensor([0.1203, 0.2545, 0.3655, 0.2597]) -Greedy action tensor([-1.6612, -0.5777, 0.6027, -0.0268]) tensor([0.0535, 0.1580, 0.5144, 0.2741]) -Greedy action tensor([-1.5713, 0.3944, 0.3666, -0.0148]) tensor([0.0504, 0.3601, 0.3503, 0.2392]) -Greedy action tensor([-1.9208, -0.4549, 0.6599, -0.1697]) tensor([0.0412, 0.1783, 0.5435, 0.2371]) -Greedy action tensor([-1.9674, -0.4799, -0.2865, -0.4193]) tensor([0.0645, 0.2856, 0.3465, 0.3034]) -Greedy action tensor([-1.9343, -0.4183, 0.6560, -0.1741]) tensor([0.0405, 0.1844, 0.5398, 0.2354]) -Greedy action tensor([-1.0984, -0.0456, 0.1386, -0.0862]) tensor([0.0994, 0.2848, 0.3424, 0.2734]) -Greedy action tensor([-1.8672, -0.4590, 0.6183, -0.1355]) tensor([0.0440, 0.1797, 0.5279, 0.2484]) -Greedy action tensor([-1.4498, -0.1105, 0.4188, -0.0746]) tensor([0.0656, 0.2502, 0.4248, 0.2594]) -Greedy action tensor([-1.9214, -0.3844, 0.6415, -0.1749]) tensor([0.0411, 0.1909, 0.5326, 0.2354]) -Greedy action tensor([-1.8422, -0.5011, 0.6346, -0.1186]) tensor([0.0448, 0.1712, 0.5330, 0.2510]) -Greedy action tensor([-1.7663, 0.2399, 0.5817, -0.3159]) tensor([0.0432, 0.3210, 0.4517, 0.1841]) -Greedy action tensor([-1.9278, -0.4467, 0.6618, -0.1711]) tensor([0.0408, 0.1794, 0.5435, 0.2363]) -Greedy action tensor([-1.9346, -0.4509, 0.6712, -0.1760]) tensor([0.0404, 0.1781, 0.5470, 0.2345]) -Greedy action tensor([-1.5916, 0.0329, 0.4275, -0.0791]) tensor([0.0551, 0.2797, 0.4151, 0.2501]) -Greedy action tensor([-1.6021, -0.4620, 0.5215, -0.0971]) tensor([0.0589, 0.1840, 0.4920, 0.2651]) -Greedy action tensor([-1.1754, 0.6833, 0.2224, -0.0419]) tensor([0.0686, 0.4404, 0.2777, 0.2132]) -Greedy action tensor([-1.9093, -0.3816, 0.6425, -0.1551]) tensor([0.0413, 0.1903, 0.5298, 0.2386]) -Greedy action tensor([-1.7397, -0.0152, 0.5319, -0.0474]) tensor([0.0460, 0.2581, 0.4460, 0.2499]) -Greedy action tensor([-1.8718, -0.2182, 0.3672, -0.2801]) tensor([0.0487, 0.2546, 0.4573, 0.2394]) -Greedy action tensor([-1.8969, -0.4521, 0.6491, -0.1539]) tensor([0.0422, 0.1789, 0.5380, 0.2410]) -Greedy action tensor([-1.8764, -0.3363, 0.6308, -0.1385]) tensor([0.0423, 0.1975, 0.5195, 0.2407]) -Greedy action tensor([-1.9130, -0.3948, 0.6426, -0.1595]) tensor([0.0413, 0.1885, 0.5318, 0.2384]) -Greedy action tensor([-1.8723, -0.4064, 0.6248, -0.1521]) tensor([0.0434, 0.1878, 0.5267, 0.2422]) -Greedy action tensor([-1.7457, 0.3480, 0.4763, -0.1393]) tensor([0.0429, 0.3479, 0.3955, 0.2137]) -Greedy action tensor([-0.5614, 0.7668, -0.1115, 0.2418]) tensor([0.1166, 0.4402, 0.1829, 0.2604]) -Greedy action tensor([-1.2090, 0.6016, 0.1117, 0.3727]) tensor([0.0636, 0.3888, 0.2382, 0.3093]) -Greedy action tensor([-1.9309, -0.3864, 0.6484, -0.1733]) tensor([0.0405, 0.1899, 0.5345, 0.2350]) -Greedy action tensor([-1.7922, -0.0301, 0.5379, -0.0964]) tensor([0.0443, 0.2582, 0.4557, 0.2417]) -Greedy action tensor([-0.5186, 0.7876, -0.0631, 0.1555]) tensor([0.1215, 0.4485, 0.1916, 0.2384]) -Greedy action tensor([-1.9158, -0.4478, 0.6511, -0.1697]) tensor([0.0415, 0.1801, 0.5405, 0.2379]) -Greedy action tensor([-1.8446, -0.4519, 0.6046, -0.1220]) tensor([0.0450, 0.1813, 0.5215, 0.2522]) -Greedy action tensor([-1.7621, -0.1940, 0.5721, -0.2252]) tensor([0.0482, 0.2310, 0.4969, 0.2239]) -Greedy action tensor([-1.2908, -0.0278, -0.7151, -0.5223]) tensor([0.1181, 0.4174, 0.2099, 0.2546]) -Greedy action tensor([-1.9201, -0.3728, 0.6419, -0.1655]) tensor([0.0409, 0.1922, 0.5303, 0.2365]) -Greedy action tensor([-1.7926, -0.3226, 0.5769, -0.1220]) tensor([0.0468, 0.2036, 0.5007, 0.2489]) -Greedy action tensor([-0.8120, 0.6648, -0.5177, -0.3466]) tensor([0.1203, 0.5267, 0.1614, 0.1916]) -Greedy action tensor([-1.3736, -0.3178, 0.3371, 0.0050]) tensor([0.0748, 0.2149, 0.4136, 0.2967]) -Greedy action tensor([-1.7044, -0.4570, 0.5515, -0.0626]) tensor([0.0521, 0.1814, 0.4973, 0.2691]) -Greedy action tensor([-1.8880, -0.3605, 0.6320, -0.1464]) tensor([0.0421, 0.1940, 0.5235, 0.2404]) -Greedy action tensor([-1.7780, -0.1109, 0.5303, -0.0955]) tensor([0.0460, 0.2437, 0.4628, 0.2475]) -Greedy action tensor([-1.5357, 0.5473, 0.3398, 0.1012]) tensor([0.0483, 0.3880, 0.3153, 0.2484]) -Greedy action tensor([-1.6104, 0.2544, 0.4512, 0.0517]) tensor([0.0486, 0.3136, 0.3818, 0.2560]) -Greedy action tensor([-1.7441, -0.4713, 0.5688, -0.0846]) tensor([0.0502, 0.1792, 0.5069, 0.2637]) -Greedy action tensor([-0.6058, 0.5362, 0.3585, 0.3284]) tensor([0.1075, 0.3368, 0.2820, 0.2737]) -Greedy action tensor([-1.9021, -0.4043, 0.6487, -0.1543]) tensor([0.0416, 0.1861, 0.5333, 0.2389]) -Greedy action tensor([-1.8950, -0.4587, 0.7117, -0.1167]) tensor([0.0405, 0.1704, 0.5492, 0.2399]) -Greedy action tensor([-1.8883, -0.4559, 0.6447, -0.1539]) tensor([0.0427, 0.1787, 0.5370, 0.2416]) -Greedy action tensor([-1.6298, -0.5163, 0.5166, -0.0494]) tensor([0.0573, 0.1744, 0.4900, 0.2782]) -Greedy action tensor([-1.6565, -0.5114, 0.5044, 0.0270]) tensor([0.0549, 0.1726, 0.4767, 0.2957]) -Greedy action tensor([-1.3904, -0.3588, 0.7407, 0.6083]) tensor([0.0510, 0.1431, 0.4296, 0.3763]) -Greedy action tensor([-1.9192, -0.4608, 0.6597, -0.1686]) tensor([0.0413, 0.1774, 0.5438, 0.2375]) -Greedy action tensor([ 0.6607, -0.1512, -0.0511, -0.0403]) tensor([0.4114, 0.1827, 0.2019, 0.2041]) -Greedy action tensor([ 0.6827, -0.7005, 0.0734, -0.2751]) tensor([0.4591, 0.1151, 0.2496, 0.1762]) -Greedy action tensor([ 0.7801, -0.4163, 0.0635, -0.3087]) tensor([0.4701, 0.1421, 0.2296, 0.1582]) -Greedy action tensor([ 0.6716, -0.2158, -0.0759, -0.1881]) tensor([0.4332, 0.1783, 0.2051, 0.1834]) -Greedy action tensor([ 0.9732, -0.7612, -0.0849, -0.4012]) tensor([0.5629, 0.0993, 0.1954, 0.1424]) -Greedy action tensor([ 0.9762, -0.6643, -0.0229, -0.4332]) tensor([0.5536, 0.1073, 0.2038, 0.1352]) -Greedy action tensor([ 1.1855, -0.6882, -0.0514, -0.5493]) tensor([0.6172, 0.0948, 0.1792, 0.1089]) -Greedy action tensor([ 0.6300, -0.2484, -0.1521, -0.0974]) tensor([0.4245, 0.1763, 0.1942, 0.2051]) -Greedy action tensor([ 1.0099, -0.6411, 0.0289, -0.3749]) tensor([0.5503, 0.1056, 0.2063, 0.1378]) -Greedy action tensor([ 0.7704, -0.5746, 0.1280, -0.7525]) tensor([0.4988, 0.1300, 0.2624, 0.1088]) -Greedy action tensor([ 0.5529, -0.0175, -0.1161, 0.0202]) tensor([0.3753, 0.2122, 0.1922, 0.2203]) -Greedy action tensor([ 1.1345, -0.8858, 0.0337, -0.5055]) tensor([0.6027, 0.0799, 0.2005, 0.1169]) -Greedy action tensor([ 0.8710, -0.8446, 0.0983, -0.3261]) tensor([0.5145, 0.0925, 0.2376, 0.1554]) -Greedy action tensor([ 0.5669, 0.2234, -0.1245, 0.1559]) tensor([0.3481, 0.2469, 0.1743, 0.2307]) -Greedy action tensor([ 0.7978, -0.1884, -0.1785, -0.1591]) tensor([0.4686, 0.1748, 0.1765, 0.1800]) -Greedy action tensor([ 0.4087, -0.2650, -0.0662, -0.1942]) tensor([0.3733, 0.1903, 0.2322, 0.2043]) -Greedy action tensor([ 1.1889, -0.4656, 0.1164, -0.3650]) tensor([0.5731, 0.1096, 0.1961, 0.1212]) -Greedy action tensor([ 0.8371, -0.4665, 0.0495, -0.3903]) tensor([0.4952, 0.1345, 0.2253, 0.1451]) -Greedy action tensor([ 1.0474, -0.7070, 0.0529, -0.6274]) tensor([0.5780, 0.1000, 0.2138, 0.1083]) -Greedy action tensor([ 0.9215, -0.8600, 0.1030, -0.2768]) tensor([0.5232, 0.0881, 0.2308, 0.1579]) -Greedy action tensor([ 0.6580, -0.3594, 0.0537, -0.2469]) tensor([0.4324, 0.1563, 0.2363, 0.1749]) -Greedy action tensor([ 0.7962, -0.3331, 0.0093, -0.1582]) tensor([0.4622, 0.1494, 0.2104, 0.1780]) -Greedy action tensor([ 0.9449, -0.5124, -0.1859, -0.5547]) tensor([0.5622, 0.1309, 0.1815, 0.1255]) -Greedy action tensor([ 0.8906, -0.6739, 0.0791, -0.5739]) tensor([0.5306, 0.1110, 0.2357, 0.1227]) -Greedy action tensor([ 0.4694, -0.2407, -0.0848, -0.4401]) tensor([0.4050, 0.1991, 0.2327, 0.1631]) -Greedy action tensor([ 0.6626, 0.2241, -0.1244, 0.1808]) tensor([0.3679, 0.2373, 0.1675, 0.2273]) -Greedy action tensor([ 1.0896, -0.9852, 0.1383, -0.3842]) tensor([0.5744, 0.0721, 0.2219, 0.1316]) -Greedy action tensor([ 0.5638, -0.6232, 0.0526, -0.2838]) tensor([0.4286, 0.1308, 0.2571, 0.1836]) -Greedy action tensor([ 1.0029, -0.7011, -0.0053, -0.5026]) tensor([0.5654, 0.1029, 0.2063, 0.1255]) -Greedy action tensor([ 1.0891, -0.6581, -0.0514, -0.5597]) tensor([0.5931, 0.1033, 0.1896, 0.1140]) -Greedy action tensor([ 0.7207, -0.5668, -0.0055, -0.3277]) tensor([0.4739, 0.1308, 0.2292, 0.1661]) -Greedy action tensor([ 0.7599, -0.5439, 0.0736, -0.2994]) tensor([0.4713, 0.1280, 0.2373, 0.1634]) -Greedy action tensor([ 1.0656, -0.6182, -0.0060, -0.5484]) tensor([0.5790, 0.1075, 0.1983, 0.1153]) -Greedy action tensor([ 0.6884, -0.6113, -0.0494, -0.2776]) tensor([0.4692, 0.1279, 0.2243, 0.1786]) -Greedy action tensor([ 0.9551, -0.3396, -0.0379, -0.1444]) tensor([0.5057, 0.1385, 0.1873, 0.1684]) -Greedy action tensor([ 1.0128, -0.6615, -0.1787, -0.3963]) tensor([0.5762, 0.1080, 0.1750, 0.1408]) -Greedy action tensor([ 0.5468, -0.0354, -0.1062, -0.0051]) tensor([0.3766, 0.2104, 0.1960, 0.2169]) -Greedy action tensor([ 1.1804, -0.6431, -0.0834, -0.3493]) tensor([0.6022, 0.0972, 0.1702, 0.1304]) -Greedy action tensor([ 1.1894, -0.4010, -0.1529, -0.2391]) tensor([0.5866, 0.1196, 0.1532, 0.1406]) -Greedy action tensor([ 0.8914, -0.5778, 0.2544, -0.3051]) tensor([0.4852, 0.1116, 0.2566, 0.1466]) -Greedy action tensor([ 0.5580, -0.3294, -0.0427, -0.0629]) tensor([0.4004, 0.1648, 0.2196, 0.2152]) -Greedy action tensor([ 0.8499, -0.5810, -0.0057, -0.2568]) tensor([0.5013, 0.1199, 0.2131, 0.1658]) -Greedy action tensor([ 0.6278, -0.3023, 0.0212, -0.2052]) tensor([0.4211, 0.1661, 0.2296, 0.1831]) -Greedy action tensor([ 0.5680, -0.1164, -0.1401, -0.2163]) tensor([0.4076, 0.2056, 0.2008, 0.1860]) -Greedy action tensor([ 0.7946, -0.6363, -0.0299, -0.4284]) tensor([0.5071, 0.1213, 0.2224, 0.1493]) -Greedy action tensor([ 1.1817, -0.7958, -0.0243, -0.5981]) tensor([0.6225, 0.0862, 0.1864, 0.1050]) -Greedy action tensor([ 0.6434, -0.5541, -0.2074, -0.1709]) tensor([0.4604, 0.1390, 0.1966, 0.2039]) -Greedy action tensor([ 0.8009, -0.7279, -0.0464, -0.2892]) tensor([0.5046, 0.1094, 0.2163, 0.1697]) -Greedy action tensor([ 0.8441, -0.6073, 0.1921, -0.2232]) tensor([0.4764, 0.1116, 0.2482, 0.1638]) -Greedy action tensor([ 1.2385, -0.8571, -0.0922, -0.5448]) tensor([0.6429, 0.0791, 0.1699, 0.1081]) -Greedy action tensor([ 0.3561, 0.1077, -0.0172, -0.0647]) tensor([0.3200, 0.2496, 0.2203, 0.2101]) -Greedy action tensor([ 0.5455, -0.1625, -0.0156, -0.0481]) tensor([0.3823, 0.1883, 0.2181, 0.2112]) -Greedy action tensor([ 0.6947, -0.5664, -0.0213, -0.3304]) tensor([0.4693, 0.1330, 0.2294, 0.1684]) -Greedy action tensor([ 0.8300, -0.5301, -0.1199, -0.3896]) tensor([0.5158, 0.1324, 0.1995, 0.1523]) -Greedy action tensor([ 0.8615, -0.6166, -0.1632, -0.1563]) tensor([0.5133, 0.1171, 0.1842, 0.1855]) -Greedy action tensor([ 0.6740, -0.3794, -0.0808, -0.3933]) tensor([0.4624, 0.1612, 0.2174, 0.1590]) -Greedy action tensor([ 1.0778, -0.3736, 0.0725, -0.1462]) tensor([0.5279, 0.1237, 0.1932, 0.1552]) -Greedy action tensor([ 0.8061, 0.2180, -0.0279, -0.0414]) tensor([0.4135, 0.2297, 0.1796, 0.1772]) -Greedy action tensor([ 0.6700, -0.4891, -0.1696, -0.1421]) tensor([0.4567, 0.1433, 0.1972, 0.2027]) -Greedy action tensor([ 1.0834, -0.3350, -0.0397, -0.2354]) tensor([0.5450, 0.1319, 0.1773, 0.1458]) -Greedy action tensor([ 0.6505, 0.4407, -0.3023, 0.2269]) tensor([0.3507, 0.2844, 0.1353, 0.2296]) -Greedy action tensor([ 0.2855, 0.0848, -0.0729, -0.0146]) tensor([0.3070, 0.2511, 0.2145, 0.2274]) -Greedy action tensor([ 0.7467, -0.0700, 0.0247, -0.5669]) tensor([0.4553, 0.2012, 0.2212, 0.1224]) -Greedy action tensor([ 0.8773, -0.2533, -0.1080, -0.3229]) tensor([0.5007, 0.1616, 0.1869, 0.1508]) -Greedy action tensor([ 0.6535, -0.2916, -0.0313, -0.1129]) tensor([0.4242, 0.1649, 0.2139, 0.1971]) -Greedy action tensor([ 0.6546, -0.2806, 0.0021, -0.0950]) tensor([0.4192, 0.1645, 0.2183, 0.1981]) -Greedy action tensor([ 0.9044, -0.3792, -0.0577, -0.3865]) tensor([0.5170, 0.1432, 0.1976, 0.1422]) -Greedy action tensor([ 0.8354, -0.2199, -0.1133, -0.2126]) tensor([0.4794, 0.1669, 0.1856, 0.1681]) -Greedy action tensor([ 0.6216, -0.4140, -0.0182, -0.2955]) tensor([0.4382, 0.1556, 0.2311, 0.1751]) -Greedy action tensor([ 0.7290, -0.2744, -0.0171, -0.1652]) tensor([0.4445, 0.1630, 0.2108, 0.1818]) -Greedy action tensor([ 0.9065, -0.4988, -0.1074, -0.3660]) tensor([0.5296, 0.1299, 0.1921, 0.1484]) -Greedy action tensor([ 1.1438, -0.8823, 0.1464, -0.6150]) tensor([0.5978, 0.0788, 0.2205, 0.1030]) -Greedy action tensor([ 1.2525, -0.8114, -0.0342, -0.4753]) tensor([0.6326, 0.0803, 0.1747, 0.1124]) -Greedy action tensor([ 0.7918, -0.2179, 0.0058, -0.5286]) tensor([0.4792, 0.1746, 0.2183, 0.1279]) -Greedy action tensor([ 1.0878, -0.7712, 0.1214, -0.7225]) tensor([0.5883, 0.0917, 0.2238, 0.0962]) -Greedy action tensor([ 0.5812, -0.3364, -0.1325, -0.3666]) tensor([0.4392, 0.1754, 0.2151, 0.1702]) -Greedy action tensor([ 1.1150, -0.7891, -0.0168, -0.5897]) tensor([0.6049, 0.0901, 0.1950, 0.1100]) -Greedy action tensor([ 1.0336, -0.5042, 0.0107, -0.6214]) tensor([0.5664, 0.1217, 0.2037, 0.1082]) -Greedy action tensor([ 0.6064, -0.2722, -0.0453, -0.1023]) tensor([0.4117, 0.1710, 0.2146, 0.2027]) -Greedy action tensor([ 0.9286, -0.9384, 0.1432, -0.4595]) tensor([0.5376, 0.0831, 0.2451, 0.1342]) -Greedy action tensor([ 0.7345, -0.3765, -0.0083, -0.2853]) tensor([0.4618, 0.1520, 0.2197, 0.1665]) -Greedy action tensor([-0.5438, -0.7262, -1.1836, -0.4765]) tensor([0.2915, 0.2429, 0.1537, 0.3118]) -Greedy action tensor([ 0.2911, -0.3093, -0.5943, -0.1207]) tensor([0.3812, 0.2091, 0.1573, 0.2525]) -Greedy action tensor([ 0.1293, -1.0698, -0.3698, -1.0719]) tensor([0.4526, 0.1365, 0.2748, 0.1362]) -Greedy action tensor([-0.0342, -0.9601, -0.1810, -0.4190]) tensor([0.3401, 0.1347, 0.2937, 0.2315]) -Greedy action tensor([ 0.7779, -0.2765, -0.1674, 0.6080]) tensor([0.3875, 0.1350, 0.1506, 0.3269]) -Greedy action tensor([ 0.0757, -0.6203, 0.1144, -0.9734]) tensor([0.3462, 0.1726, 0.3599, 0.1213]) -Greedy action tensor([ 0.8878, 0.6588, 0.4870, -0.1575]) tensor([0.3550, 0.2824, 0.2378, 0.1248]) -Greedy action tensor([-0.8215, 0.1121, -0.7949, -0.1113]) tensor([0.1514, 0.3851, 0.1555, 0.3080]) -Greedy action tensor([ 0.4847, -0.3241, 0.7027, -1.2870]) tensor([0.3498, 0.1558, 0.4350, 0.0595]) -Greedy action tensor([-0.7677, 0.3816, -0.3515, 0.3565]) tensor([0.1143, 0.3607, 0.1733, 0.3518]) -Greedy action tensor([-0.1307, -0.7589, -0.1173, -0.2765]) tensor([0.2931, 0.1564, 0.2971, 0.2534]) -Greedy action tensor([-0.2854, -1.0607, 1.2906, -0.9123]) tensor([0.1464, 0.0674, 0.7080, 0.0782]) -Greedy action tensor([-0.7045, -0.1334, 0.4290, -1.5679]) tensor([0.1588, 0.2810, 0.4932, 0.0670]) -Greedy action tensor([0.7059, 0.0316, 1.0773, 0.1035]) tensor([0.2852, 0.1453, 0.4134, 0.1561]) -Greedy action tensor([-0.9675, -1.3008, 0.6991, -0.8592]) tensor([0.1231, 0.0882, 0.6516, 0.1372]) -Greedy action tensor([ 0.4067, -1.4329, 0.8199, -0.9861]) tensor([0.3426, 0.0544, 0.5179, 0.0851]) -Greedy action tensor([-0.6988, 0.4919, -0.7752, 0.3868]) tensor([0.1223, 0.4023, 0.1133, 0.3621]) -Greedy action tensor([ 0.0034, -0.0081, 0.3807, -0.1462]) tensor([0.2321, 0.2295, 0.3385, 0.1999]) -Greedy action tensor([ 0.5454, -1.5870, -0.3864, -0.5621]) tensor([0.5426, 0.0643, 0.2137, 0.1793]) -Greedy action tensor([ 0.4914, -0.5557, 0.3243, -0.2597]) tensor([0.3747, 0.1315, 0.3170, 0.1768]) -Greedy action tensor([-0.1732, -0.9512, 0.0447, -0.0865]) tensor([0.2636, 0.1211, 0.3278, 0.2875]) -Greedy action tensor([ 0.1142, -0.4315, -0.3785, 0.1439]) tensor([0.3105, 0.1799, 0.1897, 0.3199]) -Greedy action tensor([ 0.6399, -0.1000, -0.2609, 0.7260]) tensor([0.3363, 0.1605, 0.1366, 0.3666]) -Greedy action tensor([ 0.1162, -0.6459, 0.3387, -0.8747]) tensor([0.3239, 0.1512, 0.4046, 0.1203]) -Greedy action tensor([-1.4118, -1.2547, 0.6788, -1.0726]) tensor([0.0857, 0.1003, 0.6936, 0.1204]) -Greedy action tensor([ 0.1182, -0.3701, 0.0755, -0.2109]) tensor([0.3038, 0.1864, 0.2911, 0.2186]) -Greedy action tensor([-0.9821, 0.3875, 0.5439, -1.2216]) tensor([0.0969, 0.3812, 0.4457, 0.0763]) -Greedy action tensor([ 1.3760, -0.8124, 1.3459, -0.3910]) tensor([0.4438, 0.0497, 0.4306, 0.0758]) -Greedy action tensor([ 0.4610, -0.8524, 0.3021, -0.3256]) tensor([0.3880, 0.1043, 0.3310, 0.1767]) -Greedy action tensor([ 0.0680, -0.8018, 0.7489, 0.0312]) tensor([0.2294, 0.0961, 0.4533, 0.2212]) -Greedy action tensor([ 0.6161, -0.0544, 0.8480, -0.5374]) tensor([0.3238, 0.1656, 0.4084, 0.1022]) -Greedy action tensor([ 1.3238, -0.9398, 0.4811, -0.5188]) tensor([0.5907, 0.0614, 0.2543, 0.0936]) -Greedy action tensor([ 0.0280, 0.1584, -0.0674, -0.7901]) tensor([0.2866, 0.3265, 0.2605, 0.1265]) -Greedy action tensor([-1.1781, -1.0449, 0.6270, -1.1300]) tensor([0.1079, 0.1232, 0.6558, 0.1132]) -Greedy action tensor([-0.4314, 0.1509, -0.0315, -0.3007]) tensor([0.1845, 0.3302, 0.2751, 0.2102]) -Greedy action tensor([ 0.5314, 0.0705, -0.7170, -0.2582]) tensor([0.4216, 0.2659, 0.1210, 0.1914]) -Greedy action tensor([ 0.3426, -1.0094, -0.1521, -0.2232]) tensor([0.4105, 0.1062, 0.2503, 0.2331]) -Greedy action tensor([-0.8923, -0.8644, 0.2976, -0.6948]) tensor([0.1531, 0.1574, 0.5031, 0.1865]) -Greedy action tensor([-0.0409, -0.0370, -0.0090, -0.2945]) tensor([0.2623, 0.2633, 0.2708, 0.2035]) -Greedy action tensor([-0.3323, 0.3175, 0.4509, -0.4517]) tensor([0.1669, 0.3197, 0.3653, 0.1481]) -Greedy action tensor([-1.1439, -0.6082, -0.0672, -0.1478]) tensor([0.1197, 0.2046, 0.3514, 0.3242]) -Greedy action tensor([ 0.2683, -0.2816, -0.1874, -0.4831]) tensor([0.3728, 0.2151, 0.2363, 0.1758]) -Greedy action tensor([-0.4952, -0.7900, 0.9178, -0.5440]) tensor([0.1469, 0.1094, 0.6037, 0.1399]) -Greedy action tensor([-0.6940, -1.4369, 0.8987, -0.5365]) tensor([0.1322, 0.0629, 0.6501, 0.1548]) -Greedy action tensor([ 0.1108, -0.1650, -0.3086, 0.7434]) tensor([0.2326, 0.1766, 0.1529, 0.4379]) -Greedy action tensor([-0.3731, -1.6835, 0.6904, -1.2217]) tensor([0.2177, 0.0587, 0.6305, 0.0932]) -Greedy action tensor([-0.5704, 0.5461, 0.6298, -0.6213]) tensor([0.1201, 0.3669, 0.3989, 0.1142]) -Greedy action tensor([-0.4170, -0.9312, -0.0614, -0.6269]) tensor([0.2607, 0.1559, 0.3720, 0.2113]) -Greedy action tensor([-0.7333, -1.1212, -0.5306, -0.8862]) tensor([0.2659, 0.1804, 0.3256, 0.2282]) -Greedy action tensor([ 0.4816, -1.0263, -1.2448, 0.4282]) tensor([0.4260, 0.0943, 0.0758, 0.4039]) -Greedy action tensor([ 0.4046, -0.3111, 0.1394, 0.8236]) tensor([0.2648, 0.1294, 0.2031, 0.4026]) -Greedy action tensor([-0.4570, -2.0779, 1.1273, -0.4999]) tensor([0.1422, 0.0281, 0.6934, 0.1362]) -Greedy action tensor([-0.4503, -0.6958, -0.5061, 0.6303]) tensor([0.1762, 0.1379, 0.1667, 0.5193]) -Greedy action tensor([ 0.7845, -0.6655, 0.3176, -0.4902]) tensor([0.4671, 0.1096, 0.2928, 0.1305]) -Greedy action tensor([-0.6501, -0.6327, -0.1196, -0.3657]) tensor([0.1982, 0.2016, 0.3368, 0.2634]) -Greedy action tensor([ 1.1075, -1.4539, -0.0136, 0.6043]) tensor([0.4981, 0.0385, 0.1623, 0.3011]) -Greedy action tensor([-0.6459, -0.6103, -0.1675, -0.4993]) tensor([0.2080, 0.2155, 0.3356, 0.2409]) -Greedy action tensor([ 0.4000, 0.4369, -0.3866, 0.3209]) tensor([0.2927, 0.3037, 0.1333, 0.2704]) -Greedy action tensor([-1.2351, -0.6502, -0.9355, -0.9241]) tensor([0.1815, 0.3258, 0.2449, 0.2477]) -Greedy action tensor([-0.0872, -0.8214, -0.2002, -0.3118]) tensor([0.3153, 0.1513, 0.2816, 0.2519]) -Greedy action tensor([-0.9212, -0.8842, 0.8191, -0.5893]) tensor([0.1095, 0.1137, 0.6242, 0.1526]) -Greedy action tensor([ 1.2095, -0.5730, 0.9268, 0.3230]) tensor([0.4284, 0.0721, 0.3229, 0.1766]) -Greedy action tensor([ 1.4381, 0.1790, -0.0541, -0.3627]) tensor([0.5974, 0.1696, 0.1343, 0.0987]) -Greedy action tensor([-0.2753, -0.3845, -0.5748, -0.1486]) tensor([0.2651, 0.2376, 0.1965, 0.3008]) -Greedy action tensor([ 0.3510, -0.7565, 0.0213, 0.5186]) tensor([0.3094, 0.1022, 0.2225, 0.3659]) -Greedy action tensor([ 0.4619, -0.9568, 0.0034, 0.3451]) tensor([0.3618, 0.0876, 0.2287, 0.3219]) -Greedy action tensor([0.5888, 0.2982, 0.1700, 0.5874]) tensor([0.2938, 0.2197, 0.1932, 0.2933]) -Greedy action tensor([-0.5752, 0.5715, -0.5506, -0.4693]) tensor([0.1591, 0.5009, 0.1631, 0.1769]) -Greedy action tensor([-0.8947, -0.4446, 0.1552, -1.2698]) tensor([0.1636, 0.2566, 0.4674, 0.1124]) -Greedy action tensor([-0.2337, 0.7281, -0.3854, -1.1615]) tensor([0.2053, 0.5371, 0.1764, 0.0812]) -Greedy action tensor([ 0.3184, -0.1686, -0.3467, -0.2041]) tensor([0.3674, 0.2258, 0.1889, 0.2179]) -Greedy action tensor([-0.2460, -0.1423, 0.2916, -0.8062]) tensor([0.2277, 0.2525, 0.3898, 0.1300]) -Greedy action tensor([-0.5206, -0.7463, 0.1132, -0.9241]) tensor([0.2298, 0.1834, 0.4332, 0.1535]) -Greedy action tensor([-0.6204, -0.5026, 0.0018, -0.6109]) tensor([0.2001, 0.2251, 0.3728, 0.2020]) -Greedy action tensor([0.0137, 0.1391, 0.3430, 0.0942]) tensor([0.2170, 0.2460, 0.3017, 0.2352]) -Greedy action tensor([-0.8508, -1.0344, 0.0108, 0.0651]) tensor([0.1493, 0.1243, 0.3534, 0.3731]) -Greedy action tensor([-0.2239, -0.0842, -0.0829, -0.5052]) tensor([0.2465, 0.2835, 0.2839, 0.1861]) -Greedy action tensor([-0.2971, -1.2274, 0.3497, -0.5710]) tensor([0.2460, 0.0970, 0.4698, 0.1871]) -Greedy action tensor([ 0.1540, -0.8637, 0.3759, 0.8683]) tensor([0.2149, 0.0777, 0.2683, 0.4390]) -Greedy action tensor([-0.7587, -0.7236, -0.5064, -0.4664]) tensor([0.2145, 0.2222, 0.2760, 0.2873]) -Greedy action tensor([-0.3710, -0.3449, 0.4898, -1.5911]) tensor([0.2134, 0.2190, 0.5046, 0.0630]) -Greedy action tensor([ 1.8933, -0.7725, 0.7902, 0.7774]) tensor([0.5784, 0.0402, 0.1919, 0.1895]) -Greedy action tensor([ 1.2329, -0.2219, -0.7766, 0.0371]) tensor([0.5988, 0.1398, 0.0803, 0.1811]) -Greedy action tensor([ 1.1005, -0.3681, -0.9081, 0.6833]) tensor([0.4942, 0.1138, 0.0663, 0.3257]) -Greedy action tensor([ 1.2161, -1.1750, -0.1244, -0.0012]) tensor([0.6063, 0.0555, 0.1587, 0.1795]) -Greedy action tensor([ 1.5450, 0.0540, -0.6547, 0.5940]) tensor([0.5806, 0.1307, 0.0643, 0.2243]) -Greedy action tensor([ 1.4346, -0.5837, -0.2136, -0.0470]) tensor([0.6441, 0.0856, 0.1239, 0.1464]) -Greedy action tensor([ 1.8904, -0.5128, -0.7016, 0.5663]) tensor([0.6986, 0.0632, 0.0523, 0.1859]) -Greedy action tensor([ 0.9074, -0.2900, -0.4122, 0.1071]) tensor([0.4954, 0.1496, 0.1324, 0.2225]) -Greedy action tensor([ 1.9430, -1.3622, 0.0291, 0.7974]) tensor([0.6657, 0.0244, 0.0982, 0.2117]) -Greedy action tensor([ 1.2853, -0.1704, -0.3622, 0.1591]) tensor([0.5714, 0.1333, 0.1100, 0.1853]) -Greedy action tensor([ 1.6829, -0.6901, -0.2679, 0.5835]) tensor([0.6376, 0.0594, 0.0906, 0.2124]) -Greedy action tensor([ 1.0098, -0.3565, -0.2348, 0.1203]) tensor([0.5118, 0.1305, 0.1474, 0.2103]) -Greedy action tensor([ 1.8551, 0.4734, 0.2324, -0.1499]) tensor([0.6316, 0.1586, 0.1247, 0.0851]) -Greedy action tensor([ 1.3134, -0.4816, -0.5434, 0.6335]) tensor([0.5468, 0.0908, 0.0854, 0.2770]) -Greedy action tensor([ 1.3647, -0.6433, -0.5943, 0.0229]) tensor([0.6508, 0.0874, 0.0918, 0.1701]) -Greedy action tensor([ 1.6596, -0.4187, -1.1302, 0.5999]) tensor([0.6523, 0.0816, 0.0401, 0.2260]) -Greedy action tensor([ 1.7376, -0.6970, -0.0830, 0.8157]) tensor([0.6071, 0.0532, 0.0983, 0.2415]) -Greedy action tensor([ 1.7134, -0.6446, -0.3309, 0.4030]) tensor([0.6694, 0.0633, 0.0867, 0.1806]) -Greedy action tensor([ 1.9983, -1.1607, -0.1775, 0.8286]) tensor([0.6819, 0.0290, 0.0774, 0.2117]) -Greedy action tensor([ 1.9259, -0.6666, 0.3077, 0.3081]) tensor([0.6796, 0.0509, 0.1347, 0.1348]) -Greedy action tensor([ 1.3979, -0.5104, -0.3333, -0.1783]) tensor([0.6527, 0.0968, 0.1156, 0.1349]) -Greedy action tensor([ 0.8432, -0.4187, -0.1045, -0.0557]) tensor([0.4813, 0.1363, 0.1866, 0.1959]) -Greedy action tensor([ 1.5494, -0.1977, -0.6207, 0.1843]) tensor([0.6478, 0.1129, 0.0740, 0.1654]) -Greedy action tensor([ 1.4722, -0.1627, -0.3514, 0.2380]) tensor([0.6070, 0.1183, 0.0980, 0.1767]) -Greedy action tensor([ 1.4404, -0.4797, -1.0106, 0.0817]) tensor([0.6712, 0.0984, 0.0579, 0.1725]) -Greedy action tensor([ 1.1034, -0.3886, -0.2474, 0.3810]) tensor([0.5077, 0.1142, 0.1315, 0.2465]) -Greedy action tensor([ 2.3920, -1.0983, -0.0738, 0.7234]) tensor([0.7669, 0.0234, 0.0651, 0.1446]) -Greedy action tensor([ 1.5312, 0.2190, -0.3339, 0.3037]) tensor([0.5824, 0.1568, 0.0902, 0.1706]) -Greedy action tensor([ 1.2419, -0.1524, -0.6905, 0.2137]) tensor([0.5713, 0.1417, 0.0827, 0.2043]) -Greedy action tensor([ 1.9857, 0.8189, -0.1705, 0.3151]) tensor([0.6191, 0.1928, 0.0717, 0.1165]) -Greedy action tensor([ 1.0941, -0.5657, 0.1854, 0.1385]) tensor([0.5056, 0.0962, 0.2038, 0.1944]) -Greedy action tensor([ 1.5476, -0.5105, -0.5856, 0.9987]) tensor([0.5483, 0.0700, 0.0650, 0.3167]) -Greedy action tensor([ 1.3131, -0.3015, -0.3997, 0.2775]) tensor([0.5766, 0.1147, 0.1040, 0.2047]) -Greedy action tensor([ 2.2231, -1.1337, -0.2664, 0.9220]) tensor([0.7194, 0.0251, 0.0597, 0.1958]) -Greedy action tensor([ 1.5104, -0.0430, -0.3195, 0.3899]) tensor([0.5889, 0.1246, 0.0945, 0.1920]) -Greedy action tensor([ 1.3318, -0.7961, 0.0359, 0.5755]) tensor([0.5370, 0.0640, 0.1470, 0.2521]) -Greedy action tensor([ 1.3513, -0.4063, -0.3608, 0.2645]) tensor([0.5916, 0.1020, 0.1068, 0.1995]) -Greedy action tensor([ 1.4097, -0.5928, 0.4752, -0.2474]) tensor([0.5819, 0.0786, 0.2286, 0.1110]) -Greedy action tensor([ 1.4040, -0.7568, -0.2535, 0.2569]) tensor([0.6160, 0.0710, 0.1174, 0.1956]) -Greedy action tensor([ 1.9216, -0.4666, -0.4643, 0.1857]) tensor([0.7353, 0.0675, 0.0676, 0.1296]) -Greedy action tensor([ 1.5601, -0.4811, -0.3064, 0.2412]) tensor([0.6444, 0.0837, 0.0997, 0.1723]) -Greedy action tensor([ 1.6781, -0.4417, -0.6189, 0.8529]) tensor([0.6029, 0.0724, 0.0606, 0.2641]) -Greedy action tensor([ 1.3946, -0.4637, -0.0359, 0.1983]) tensor([0.5891, 0.0919, 0.1409, 0.1781]) -Greedy action tensor([ 0.8524, -0.0125, 0.1105, 0.1468]) tensor([0.4182, 0.1761, 0.1992, 0.2065]) -Greedy action tensor([ 0.8730, -0.4126, 0.0450, 0.1535]) tensor([0.4545, 0.1256, 0.1986, 0.2213]) -Greedy action tensor([ 1.2519, -0.2961, -0.5476, 0.1563]) tensor([0.5840, 0.1242, 0.0966, 0.1953]) -Greedy action tensor([ 1.5616, -1.0678, -0.3884, 0.9520]) tensor([0.5688, 0.0410, 0.0809, 0.3092]) -Greedy action tensor([ 1.2610, -0.4744, -0.5344, 0.2368]) tensor([0.5877, 0.1036, 0.0976, 0.2110]) -Greedy action tensor([ 1.1598, -0.4511, 0.0872, 0.1715]) tensor([0.5225, 0.1043, 0.1787, 0.1945]) -Greedy action tensor([ 1.6609, -0.2672, -0.8715, 0.1454]) tensor([0.6922, 0.1007, 0.0550, 0.1521]) -Greedy action tensor([ 2.0530, 0.0531, -0.7356, -0.3104]) tensor([0.7746, 0.1048, 0.0476, 0.0729]) -Greedy action tensor([ 1.1188, 0.3329, -0.8739, -0.2492]) tensor([0.5415, 0.2468, 0.0738, 0.1379]) -Greedy action tensor([ 1.5070, -0.4645, -0.5114, -0.1050]) tensor([0.6795, 0.0946, 0.0903, 0.1356]) -Greedy action tensor([ 1.3787, -0.2468, -0.5762, 0.6142]) tensor([0.5543, 0.1091, 0.0785, 0.2581]) -Greedy action tensor([ 2.4827, 0.9006, -0.0902, 0.1329]) tensor([0.7261, 0.1492, 0.0554, 0.0693]) -Greedy action tensor([ 1.1948, -0.2506, -0.7455, 0.5513]) tensor([0.5250, 0.1237, 0.0754, 0.2759]) -Greedy action tensor([ 1.1364, -0.4372, -0.8752, 0.6003]) tensor([0.5192, 0.1076, 0.0695, 0.3037]) -Greedy action tensor([ 1.2749, -0.2153, -1.1893, 0.2936]) tensor([0.5934, 0.1337, 0.0505, 0.2224]) -Greedy action tensor([ 1.3090, -0.1612, -0.9135, 0.7300]) tensor([0.5267, 0.1211, 0.0571, 0.2952]) -Greedy action tensor([ 1.6066, -0.1726, -0.7488, 0.1477]) tensor([0.6684, 0.1128, 0.0634, 0.1554]) -Greedy action tensor([ 2.2359e+00, -9.5541e-01, 5.2065e-04, 1.2861e+00]) tensor([0.6515, 0.0268, 0.0697, 0.2520]) -Greedy action tensor([ 1.4661, -0.8786, 0.0837, 0.4493]) tensor([0.5853, 0.0561, 0.1469, 0.2117]) -Greedy action tensor([ 1.2429, -0.7038, -0.4134, 0.4350]) tensor([0.5620, 0.0802, 0.1073, 0.2505]) -Greedy action tensor([ 1.1801, -0.4558, -0.4336, 0.5990]) tensor([0.5120, 0.0997, 0.1020, 0.2863]) -Greedy action tensor([ 1.6790, -0.8064, -0.1373, 0.5666]) tensor([0.6350, 0.0529, 0.1033, 0.2088]) -Greedy action tensor([ 1.5331, -0.6899, -0.7345, 0.2627]) tensor([0.6700, 0.0725, 0.0694, 0.1881]) -Greedy action tensor([ 2.5400, 0.8398, -0.0877, -0.0161]) tensor([0.7505, 0.1371, 0.0542, 0.0582]) -Greedy action tensor([ 1.5016, -0.2516, -0.7780, 0.0222]) tensor([0.6652, 0.1152, 0.0681, 0.1515]) -Greedy action tensor([2.5130, 0.1621, 0.2775, 0.0996]) tensor([0.7741, 0.0738, 0.0828, 0.0693]) -Greedy action tensor([ 1.3311, -0.2965, -1.0160, 0.2116]) tensor([0.6178, 0.1214, 0.0591, 0.2017]) -Greedy action tensor([ 1.0078, -0.2080, -0.4232, 0.3542]) tensor([0.4865, 0.1442, 0.1163, 0.2530]) -Greedy action tensor([ 1.7914, 0.3409, -0.5440, 0.4496]) tensor([0.6279, 0.1472, 0.0608, 0.1641]) -Greedy action tensor([ 1.6296, -0.5681, -0.1534, -0.0060]) tensor([0.6784, 0.0753, 0.1141, 0.1322]) -Greedy action tensor([ 1.1615, -0.2470, -0.7450, 0.0529]) tensor([0.5803, 0.1419, 0.0862, 0.1915]) -Greedy action tensor([ 1.1806, -0.2617, -0.9610, 0.2918]) tensor([0.5666, 0.1339, 0.0666, 0.2329]) -Greedy action tensor([ 1.1412, -0.4457, -0.6851, 0.2205]) tensor([0.5669, 0.1160, 0.0913, 0.2258]) -Greedy action tensor([ 2.6589, -1.5735, -0.2783, 0.6955]) tensor([0.8279, 0.0120, 0.0439, 0.1162]) -Greedy action tensor([ 1.5523, -0.9891, -0.5947, 0.7050]) tensor([0.6157, 0.0485, 0.0719, 0.2639]) -Greedy action tensor([ 1.7391, -0.1394, -0.4811, 0.1181]) tensor([0.6854, 0.1047, 0.0744, 0.1355]) -Greedy action tensor([ 1.4624, -0.2066, -0.6232, 0.1085]) tensor([0.6366, 0.1199, 0.0791, 0.1644]) -Greedy action tensor([ 1.5360, -0.7392, -0.6551, -0.0408]) tensor([0.7036, 0.0723, 0.0787, 0.1454]) -Greedy action tensor([ 0.9092, -0.1802, -0.8471, 0.1082]) tensor([0.5107, 0.1718, 0.0882, 0.2293]) -Greedy action tensor([-0.9841, 0.8936, 0.1146, 0.2420]) tensor([0.0717, 0.4688, 0.2151, 0.2444]) -Greedy action tensor([-0.6511, 0.3697, 0.0820, -0.0094]) tensor([0.1289, 0.3578, 0.2684, 0.2449]) -Greedy action tensor([-1.8438, -0.2343, 0.5896, -0.1326]) tensor([0.0436, 0.2180, 0.4970, 0.2414]) -Greedy action tensor([-1.7672, -0.4352, 0.6964, 0.0195]) tensor([0.0444, 0.1683, 0.5220, 0.2653]) -Greedy action tensor([-1.8038, -0.1376, 0.5753, -0.0728]) tensor([0.0440, 0.2328, 0.4749, 0.2484]) -Greedy action tensor([-1.8878, -0.4027, 0.6510, -0.1464]) tensor([0.0420, 0.1856, 0.5325, 0.2399]) -Greedy action tensor([-1.8527, -0.4465, 0.6221, -0.1366]) tensor([0.0444, 0.1812, 0.5275, 0.2470]) -Greedy action tensor([-0.1676, 1.1490, 0.0451, 0.2953]) tensor([0.1323, 0.4937, 0.1637, 0.2102]) -Greedy action tensor([-1.8281, -0.3222, 0.6149, -0.1385]) tensor([0.0446, 0.2010, 0.5130, 0.2415]) -Greedy action tensor([-1.8038, -0.5209, 0.6240, -0.0864]) tensor([0.0465, 0.1677, 0.5269, 0.2589]) -Greedy action tensor([-1.8442, -0.4396, 0.6389, -0.1297]) tensor([0.0442, 0.1802, 0.5299, 0.2457]) -Greedy action tensor([-1.9001, -0.4554, 0.6417, -0.1663]) tensor([0.0424, 0.1797, 0.5381, 0.2399]) -Greedy action tensor([-1.9092, -0.4422, 0.6516, -0.1621]) tensor([0.0416, 0.1805, 0.5390, 0.2389]) -Greedy action tensor([-1.9375, -0.4582, 0.6802, -0.1708]) tensor([0.0401, 0.1760, 0.5494, 0.2346]) -Greedy action tensor([-1.9275, -0.4441, 0.6620, -0.1739]) tensor([0.0408, 0.1799, 0.5437, 0.2357]) -Greedy action tensor([-1.7307, -0.3124, 0.5791, -0.0354]) tensor([0.0484, 0.2000, 0.4878, 0.2638]) -Greedy action tensor([-1.8001, -0.2838, 0.5953, -0.0977]) tensor([0.0454, 0.2069, 0.4984, 0.2492]) -Greedy action tensor([-1.4741, 0.6029, 0.3162, 0.0029]) tensor([0.0517, 0.4124, 0.3096, 0.2263]) -Greedy action tensor([-1.6466, -0.1137, 0.4976, -0.0243]) tensor([0.0520, 0.2408, 0.4438, 0.2634]) -Greedy action tensor([-1.8616, -0.4552, 0.6246, -0.1466]) tensor([0.0441, 0.1802, 0.5304, 0.2453]) -Greedy action tensor([-1.7521, -0.2231, 0.5837, -0.0923]) tensor([0.0471, 0.2175, 0.4874, 0.2479]) -Greedy action tensor([-1.8171, -0.1098, 0.5724, -0.1112]) tensor([0.0436, 0.2405, 0.4757, 0.2402]) -Greedy action tensor([-1.9279, -0.4256, 0.5415, -0.2193]) tensor([0.0438, 0.1968, 0.5176, 0.2419]) -Greedy action tensor([ 0.1803, -0.5096, 0.2348, -0.5771]) tensor([0.3304, 0.1657, 0.3489, 0.1549]) -Greedy action tensor([-0.8183, 0.9694, 0.0539, 0.3821]) tensor([0.0788, 0.4709, 0.1885, 0.2618]) -Greedy action tensor([-1.3053, -0.5174, 0.2692, 0.2048]) tensor([0.0797, 0.1751, 0.3846, 0.3606]) -Greedy action tensor([-1.9234, -0.4486, 0.6584, -0.1704]) tensor([0.0410, 0.1794, 0.5427, 0.2369]) -Greedy action tensor([-1.9353, -0.4336, 0.6606, -0.1763]) tensor([0.0405, 0.1817, 0.5428, 0.2350]) -Greedy action tensor([-1.8962, -0.4523, 0.6487, -0.1547]) tensor([0.0422, 0.1789, 0.5380, 0.2409]) -Greedy action tensor([-1.1530, 0.8877, 0.1342, 0.3411]) tensor([0.0596, 0.4588, 0.2160, 0.2656]) -Greedy action tensor([-1.9171, -0.4156, 0.6541, -0.1640]) tensor([0.0411, 0.1844, 0.5374, 0.2371]) -Greedy action tensor([-1.9193, -0.4397, 0.6566, -0.1684]) tensor([0.0412, 0.1807, 0.5410, 0.2371]) -Greedy action tensor([-1.9027, -0.3441, 0.6380, -0.1561]) tensor([0.0414, 0.1966, 0.5248, 0.2372]) -Greedy action tensor([-0.8529, -0.1950, 0.3362, 0.7047]) tensor([0.0912, 0.1761, 0.2996, 0.4331]) -Greedy action tensor([-1.9376, -0.4560, 0.6800, -0.1743]) tensor([0.0401, 0.1765, 0.5495, 0.2339]) -Greedy action tensor([-0.7833, 0.6595, 0.1407, -0.2574]) tensor([0.1059, 0.4482, 0.2668, 0.1792]) -Greedy action tensor([-1.8095, -0.4418, 0.5840, -0.1167]) tensor([0.0469, 0.1842, 0.5139, 0.2550]) -Greedy action tensor([-1.2073, 0.8019, 0.1739, 0.1530]) tensor([0.0612, 0.4566, 0.2436, 0.2386]) -Greedy action tensor([-1.6924, -0.5280, 0.5747, 0.0092]) tensor([0.0517, 0.1657, 0.4991, 0.2835]) -Greedy action tensor([-1.8849, -0.4445, 0.6368, -0.1577]) tensor([0.0429, 0.1812, 0.5344, 0.2414]) -Greedy action tensor([-1.9250, -0.4281, 0.6576, -0.1690]) tensor([0.0408, 0.1824, 0.5403, 0.2364]) -Greedy action tensor([-1.8604, -0.3968, 0.6489, -0.2201]) tensor([0.0439, 0.1898, 0.5399, 0.2264]) -Greedy action tensor([-1.7720, -0.1852, 0.5490, -0.1278]) tensor([0.0471, 0.2300, 0.4793, 0.2436]) -Greedy action tensor([-1.8683, -0.4375, 0.6285, -0.1414]) tensor([0.0436, 0.1822, 0.5292, 0.2450]) -Greedy action tensor([-1.9400, -0.4438, 0.6652, -0.1780]) tensor([0.0403, 0.1799, 0.5452, 0.2346]) -Greedy action tensor([-0.3183, -0.6368, 1.0086, 1.2676]) tensor([0.0963, 0.0701, 0.3631, 0.4705]) -Greedy action tensor([-1.8465, -0.4337, 0.6160, -0.1468]) tensor([0.0448, 0.1841, 0.5259, 0.2452]) -Greedy action tensor([-1.7735, -0.1470, 0.5803, -0.2067]) tensor([0.0467, 0.2376, 0.4918, 0.2239]) -Greedy action tensor([-1.9114, -0.4200, 0.6529, -0.1515]) tensor([0.0412, 0.1833, 0.5358, 0.2397]) -Greedy action tensor([-1.6889, -0.4965, 0.5601, -0.1391]) tensor([0.0541, 0.1783, 0.5128, 0.2548]) -Greedy action tensor([-0.7561, 0.3768, 0.5428, 0.2347]) tensor([0.0956, 0.2967, 0.3503, 0.2574]) -Greedy action tensor([-1.5862, -0.5365, 0.5111, -0.1120]) tensor([0.0611, 0.1745, 0.4975, 0.2668]) -Greedy action tensor([-1.9071, -0.4518, 0.6818, -0.1387]) tensor([0.0409, 0.1752, 0.5443, 0.2396]) -Greedy action tensor([-1.9306, -0.4212, 0.6580, -0.1736]) tensor([0.0406, 0.1837, 0.5404, 0.2353]) -Greedy action tensor([-1.9161, -0.4159, 0.6540, -0.1660]) tensor([0.0411, 0.1844, 0.5376, 0.2368]) -Greedy action tensor([-1.9310, -0.4394, 0.6666, -0.1705]) tensor([0.0405, 0.1800, 0.5440, 0.2355]) -Greedy action tensor([-1.8104, -0.0353, 0.5371, -0.1274]) tensor([0.0440, 0.2595, 0.4599, 0.2366]) -Greedy action tensor([-1.8613, -0.4330, 0.6226, -0.1402]) tensor([0.0440, 0.1834, 0.5269, 0.2458]) -Greedy action tensor([-1.8423, -0.3735, 0.6508, -0.1316]) tensor([0.0435, 0.1891, 0.5266, 0.2408]) -Greedy action tensor([-1.7800, -0.4534, 0.5802, -0.1068]) tensor([0.0483, 0.1821, 0.5120, 0.2576]) -Greedy action tensor([-1.4424, -0.5908, 0.5402, 0.1606]) tensor([0.0642, 0.1505, 0.4663, 0.3190]) -Greedy action tensor([-1.8776, -0.4593, 0.6399, -0.1480]) tensor([0.0432, 0.1783, 0.5352, 0.2434]) -Greedy action tensor([-1.1138, -0.2614, 0.3655, -0.0930]) tensor([0.0951, 0.2231, 0.4177, 0.2641]) -Greedy action tensor([-1.6258, 0.1567, 0.3953, 0.0405]) tensor([0.0505, 0.3005, 0.3814, 0.2675]) -Greedy action tensor([-1.9030, -0.4370, 0.6505, -0.1612]) tensor([0.0419, 0.1813, 0.5379, 0.2389]) -Greedy action tensor([-1.8802, -0.3124, 0.6401, -0.1276]) tensor([0.0417, 0.1998, 0.5181, 0.2404]) -Greedy action tensor([-1.9122, -0.4403, 0.6526, -0.1631]) tensor([0.0415, 0.1808, 0.5392, 0.2385]) -Greedy action tensor([-1.0437, 0.6952, 0.1385, 0.0875]) tensor([0.0766, 0.4360, 0.2499, 0.2375]) -Greedy action tensor([-1.7752, -0.4864, 0.5914, -0.1007]) tensor([0.0485, 0.1759, 0.5169, 0.2587]) -Greedy action tensor([-1.8943, -0.4293, 0.6409, -0.1567]) tensor([0.0423, 0.1831, 0.5340, 0.2405]) -Greedy action tensor([-1.9251, -0.4200, 0.6579, -0.1696]) tensor([0.0408, 0.1837, 0.5397, 0.2359]) -Greedy action tensor([-1.6532, -0.5563, 0.5123, -0.0827]) tensor([0.0571, 0.1709, 0.4976, 0.2744]) -Greedy action tensor([-1.8737, -0.3157, 0.6167, -0.1443]) tensor([0.0426, 0.2025, 0.5145, 0.2404]) -Greedy action tensor([-1.9193, -0.4528, 0.6772, -0.1526]) tensor([0.0406, 0.1762, 0.5453, 0.2379]) -Greedy action tensor([-1.8900, -0.3984, 0.6329, -0.1472]) tensor([0.0423, 0.1881, 0.5277, 0.2419]) -Greedy action tensor([-1.6886, -0.5237, 0.5383, -0.0878]) tensor([0.0542, 0.1739, 0.5029, 0.2689]) -Greedy action tensor([-1.1493, -0.5134, 0.5335, -0.1949]) tensor([0.0920, 0.1738, 0.4952, 0.2390]) -Greedy action tensor([-0.4895, -0.4488, 0.1214, 0.3003]) tensor([0.1643, 0.1711, 0.3026, 0.3619]) -Greedy action tensor([-1.8125, -0.2485, 0.5824, -0.0461]) tensor([0.0443, 0.2114, 0.4854, 0.2589]) -Greedy action tensor([-1.8993, -0.4475, 0.6496, -0.1549]) tensor([0.0420, 0.1796, 0.5378, 0.2406]) -Greedy action tensor([-1.8332, -0.3018, 0.5957, -0.1205]) tensor([0.0444, 0.2054, 0.5040, 0.2462]) -Greedy action tensor([ 0.6173, -0.1410, -0.0087, -0.0971]) tensor([0.4012, 0.1879, 0.2145, 0.1964]) -Greedy action tensor([ 0.7102, -0.4965, -0.0446, -0.2686]) tensor([0.4662, 0.1395, 0.2192, 0.1752]) -Greedy action tensor([ 0.5795, 0.2781, -0.1532, 0.1797]) tensor([0.3459, 0.2559, 0.1663, 0.2319]) -Greedy action tensor([ 0.4498, -0.2581, -0.2293, -0.3297]) tensor([0.4068, 0.2004, 0.2063, 0.1866]) -Greedy action tensor([ 0.9638, -0.5991, -0.0047, -0.4221]) tensor([0.5437, 0.1139, 0.2064, 0.1360]) -Greedy action tensor([ 0.7729, -0.7702, -0.1529, -0.2968]) tensor([0.5120, 0.1094, 0.2029, 0.1757]) -Greedy action tensor([ 0.9073, -0.6250, 0.0533, -0.3957]) tensor([0.5226, 0.1129, 0.2225, 0.1420]) -Greedy action tensor([ 0.8882, -0.5428, -0.1010, -0.2405]) tensor([0.5170, 0.1236, 0.1922, 0.1672]) -Greedy action tensor([ 1.0413, -0.3738, -0.0201, -0.3185]) tensor([0.5418, 0.1316, 0.1875, 0.1391]) -Greedy action tensor([ 0.6356, -0.4978, -0.0696, -0.4090]) tensor([0.4613, 0.1485, 0.2279, 0.1623]) -Greedy action tensor([ 0.5167, -0.2981, 0.0189, -0.1813]) tensor([0.3924, 0.1737, 0.2386, 0.1953]) -Greedy action tensor([ 1.0651, -0.5686, -0.0764, -0.3498]) tensor([0.5690, 0.1111, 0.1817, 0.1382]) -Greedy action tensor([ 0.8715, -0.5274, -0.0957, -0.5311]) tensor([0.5339, 0.1318, 0.2030, 0.1313]) -Greedy action tensor([ 1.0982, -0.8814, 0.2096, -0.4578]) tensor([0.5681, 0.0785, 0.2336, 0.1199]) -Greedy action tensor([ 0.9297, -0.5908, -0.0177, -0.4220]) tensor([0.5361, 0.1172, 0.2079, 0.1388]) -Greedy action tensor([ 0.7930, -0.7190, 0.1880, -0.5929]) tensor([0.4959, 0.1093, 0.2708, 0.1240]) -Greedy action tensor([ 0.6835, -0.7159, -0.2085, -0.2808]) tensor([0.4907, 0.1211, 0.2011, 0.1871]) -Greedy action tensor([ 0.3755, 0.3870, -0.1610, 0.1937]) tensor([0.2915, 0.2949, 0.1705, 0.2431]) -Greedy action tensor([ 0.2785, 0.1668, -0.2000, -0.1880]) tensor([0.3183, 0.2847, 0.1973, 0.1997]) -Greedy action tensor([ 0.6484, -0.6496, -0.1010, -0.2578]) tensor([0.4652, 0.1270, 0.2199, 0.1880]) -Greedy action tensor([ 0.9807, -0.5473, -0.0042, -0.5262]) tensor([0.5519, 0.1197, 0.2061, 0.1223]) -Greedy action tensor([ 0.8330, -0.6202, 0.0856, -0.5674]) tensor([0.5118, 0.1197, 0.2424, 0.1262]) -Greedy action tensor([ 1.1394, -0.6647, 0.1412, -0.6390]) tensor([0.5875, 0.0967, 0.2165, 0.0992]) -Greedy action tensor([ 0.3816, -0.0572, -0.1210, -0.2528]) tensor([0.3597, 0.2319, 0.2176, 0.1907]) -Greedy action tensor([ 0.8644, -0.4040, -0.0698, -0.2066]) tensor([0.4958, 0.1395, 0.1948, 0.1699]) -Greedy action tensor([ 1.1355e+00, -4.9270e-01, 2.3574e-04, -4.2549e-01]) tensor([0.5789, 0.1136, 0.1860, 0.1215]) -Greedy action tensor([ 0.9716, -0.6278, -0.1133, -0.4167]) tensor([0.5588, 0.1129, 0.1889, 0.1394]) -Greedy action tensor([ 1.0011, -0.4681, -0.1869, -0.3223]) tensor([0.5552, 0.1278, 0.1692, 0.1478]) -Greedy action tensor([ 0.7248, -0.4995, -0.0818, -0.1463]) tensor([0.4632, 0.1362, 0.2068, 0.1939]) -Greedy action tensor([ 1.0668, -0.4680, 0.0117, -0.4933]) tensor([0.5638, 0.1215, 0.1963, 0.1185]) -Greedy action tensor([ 0.6224, -0.2666, -0.0249, -0.0772]) tensor([0.4113, 0.1691, 0.2153, 0.2043]) -Greedy action tensor([ 1.2147, -0.7177, -0.0863, -0.6530]) tensor([0.6363, 0.0921, 0.1732, 0.0983]) -Greedy action tensor([ 0.8198, -0.5129, -0.0995, -0.6675]) tensor([0.5295, 0.1397, 0.2112, 0.1197]) -Greedy action tensor([ 0.6351, -0.1653, -0.0435, -0.0384]) tensor([0.4054, 0.1821, 0.2057, 0.2068]) -Greedy action tensor([ 0.3739, -0.0404, -0.0693, -0.5248]) tensor([0.3690, 0.2439, 0.2369, 0.1502]) -Greedy action tensor([ 0.4778, -0.1216, 0.0810, -0.0741]) tensor([0.3575, 0.1963, 0.2404, 0.2058]) -Greedy action tensor([ 0.9853, -0.7561, 0.0620, -0.5249]) tensor([0.5576, 0.0977, 0.2215, 0.1232]) -Greedy action tensor([ 1.2834, -0.6397, -0.0050, -0.9359]) tensor([0.6534, 0.0955, 0.1801, 0.0710]) -Greedy action tensor([ 0.6850, -0.4592, 0.0045, -0.3610]) tensor([0.4595, 0.1463, 0.2327, 0.1614]) -Greedy action tensor([ 1.4216, -0.9085, 0.1114, -0.7447]) tensor([0.6749, 0.0657, 0.1821, 0.0773]) -Greedy action tensor([ 7.5088e-01, -6.2950e-01, 5.9754e-06, -2.7288e-01]) tensor([0.4801, 0.1207, 0.2266, 0.1725]) -Greedy action tensor([ 0.8715, -0.1448, -0.0473, -0.0317]) tensor([0.4617, 0.1671, 0.1842, 0.1871]) -Greedy action tensor([ 0.3524, -0.0098, -0.0434, -0.0347]) tensor([0.3280, 0.2284, 0.2208, 0.2227]) -Greedy action tensor([ 0.8484, -0.7124, -0.0695, -0.3345]) tensor([0.5220, 0.1096, 0.2085, 0.1599]) -Greedy action tensor([ 1.0684, -0.5225, 0.0578, -0.5368]) tensor([0.5654, 0.1152, 0.2058, 0.1136]) -Greedy action tensor([ 0.4069, -0.1620, -0.0964, -0.2820]) tensor([0.3741, 0.2118, 0.2262, 0.1879]) -Greedy action tensor([ 0.4157, -0.0952, -0.0326, -0.3797]) tensor([0.3717, 0.2230, 0.2374, 0.1678]) -Greedy action tensor([ 0.5479, -0.3259, -0.0699, -0.2669]) tensor([0.4168, 0.1740, 0.2247, 0.1845]) -Greedy action tensor([ 1.0670, -0.5936, 0.1509, -0.5173]) tensor([0.5570, 0.1059, 0.2229, 0.1142]) -Greedy action tensor([ 1.2224, -0.8711, 0.0404, -0.8315]) tensor([0.6418, 0.0791, 0.1968, 0.0823]) -Greedy action tensor([ 0.9035, -0.5503, -0.0447, -0.5102]) tensor([0.5364, 0.1253, 0.2078, 0.1305]) -Greedy action tensor([ 0.7463, -0.3188, -0.0562, -0.2158]) tensor([0.4598, 0.1585, 0.2061, 0.1757]) -Greedy action tensor([ 0.7469, -0.6052, -0.0865, -0.2707]) tensor([0.4867, 0.1259, 0.2115, 0.1759]) -Greedy action tensor([ 0.9913, -0.5641, -0.1247, -0.4186]) tensor([0.5609, 0.1184, 0.1837, 0.1370]) -Greedy action tensor([ 0.8441, -0.5323, -0.1031, -0.4213]) tensor([0.5202, 0.1313, 0.2017, 0.1468]) -Greedy action tensor([ 0.9472, -0.5224, -0.1895, -0.4422]) tensor([0.5555, 0.1278, 0.1782, 0.1385]) -Greedy action tensor([ 0.8781, -0.4893, -0.0450, -0.3309]) tensor([0.5127, 0.1306, 0.2037, 0.1530]) -Greedy action tensor([ 0.8421, -0.6146, -0.0781, -0.1800]) tensor([0.5022, 0.1170, 0.2001, 0.1807]) -Greedy action tensor([ 0.9865, -0.6477, -0.0286, -0.5012]) tensor([0.5607, 0.1094, 0.2032, 0.1267]) -Greedy action tensor([ 0.4785, -0.0774, 0.0145, -0.0215]) tensor([0.3560, 0.2042, 0.2239, 0.2159]) -Greedy action tensor([ 0.5755, -0.1876, -0.0819, -0.0144]) tensor([0.3939, 0.1836, 0.2041, 0.2184]) -Greedy action tensor([ 0.3492, -0.1048, -0.0331, -0.1688]) tensor([0.3433, 0.2180, 0.2342, 0.2045]) -Greedy action tensor([ 1.3355, -0.7033, -0.0537, -0.5837]) tensor([0.6552, 0.0853, 0.1633, 0.0961]) -Greedy action tensor([ 0.4259, 0.0624, -0.0150, 0.0193]) tensor([0.3328, 0.2314, 0.2142, 0.2216]) -Greedy action tensor([ 0.7431, -0.6773, -0.0248, -0.3163]) tensor([0.4873, 0.1177, 0.2261, 0.1689]) -Greedy action tensor([ 0.7459, -0.2849, -0.0508, -0.2309]) tensor([0.4579, 0.1633, 0.2064, 0.1724]) -Greedy action tensor([ 0.3014, 0.0881, -0.0254, 0.0278]) tensor([0.3040, 0.2456, 0.2192, 0.2312]) -Greedy action tensor([ 0.6582, -0.3316, 0.0125, -0.1056]) tensor([0.4234, 0.1574, 0.2220, 0.1973]) -Greedy action tensor([ 0.7756, -0.2476, -0.0286, -0.1213]) tensor([0.4515, 0.1623, 0.2020, 0.1841]) -Greedy action tensor([ 0.0900, 0.0227, 0.0994, -0.3470]) tensor([0.2785, 0.2604, 0.2811, 0.1799]) -Greedy action tensor([ 0.5150, -0.5127, -0.1424, -0.0613]) tensor([0.4102, 0.1468, 0.2126, 0.2305]) -Greedy action tensor([ 0.6417, -0.2249, 0.0276, -0.0929]) tensor([0.4096, 0.1722, 0.2217, 0.1965]) -Greedy action tensor([ 0.6174, -0.4374, -0.0619, -0.0765]) tensor([0.4246, 0.1479, 0.2153, 0.2122]) -Greedy action tensor([ 0.7549, -0.4973, -0.0875, -0.3520]) tensor([0.4885, 0.1396, 0.2104, 0.1615]) -Greedy action tensor([ 0.7785, -0.1948, 0.0773, -0.3584]) tensor([0.4557, 0.1722, 0.2260, 0.1462]) -Greedy action tensor([ 0.8183, -0.5106, 0.0010, -0.4849]) tensor([0.5055, 0.1339, 0.2233, 0.1373]) -Greedy action tensor([ 0.4717, -0.1409, -0.0102, -0.3300]) tensor([0.3834, 0.2078, 0.2368, 0.1720]) -Greedy action tensor([ 1.3028, -0.8825, -0.0982, -0.6011]) tensor([0.6632, 0.0746, 0.1634, 0.0988]) -Greedy action tensor([ 1.0084, -0.7959, 0.2024, -0.4422]) tensor([0.5418, 0.0892, 0.2420, 0.1270]) -Greedy action tensor([ 0.8934, -0.5299, -0.0462, -0.3749]) tensor([0.5227, 0.1259, 0.2043, 0.1471]) -Greedy action tensor([ 0.8307, -0.5070, -0.1665, -0.6492]) tensor([0.5379, 0.1412, 0.1984, 0.1225]) -Greedy action tensor([ 1.2661, -0.3324, -0.5628, -0.0380]) tensor([0.6119, 0.1237, 0.0983, 0.1661]) -Greedy action tensor([ 2.1076, -1.0213, -0.3694, 0.6900]) tensor([0.7299, 0.0319, 0.0613, 0.1768]) -Greedy action tensor([ 1.2697, 0.2736, -0.7737, 0.1196]) tensor([0.5508, 0.2034, 0.0714, 0.1744]) -Greedy action tensor([ 1.4521, -1.0934, -0.4267, 0.5266]) tensor([0.6144, 0.0482, 0.0939, 0.2435]) -Greedy action tensor([ 0.8297, -0.5321, 0.0325, 0.2768]) tensor([0.4382, 0.1123, 0.1974, 0.2521]) -Greedy action tensor([0.9905, 0.0313, 0.2262, 0.0425]) tensor([0.4472, 0.1714, 0.2082, 0.1733]) -Greedy action tensor([ 1.1506, -0.6190, -0.2088, 0.6351]) tensor([0.4940, 0.0842, 0.1269, 0.2950]) -Greedy action tensor([ 1.6497, -0.4451, -0.3945, 0.3015]) tensor([0.6613, 0.0814, 0.0856, 0.1717]) -Greedy action tensor([ 1.3288, -0.3245, -0.9572, 0.4138]) tensor([0.5905, 0.1130, 0.0600, 0.2365]) -Greedy action tensor([ 1.3868, -0.6695, -0.1811, 0.4671]) tensor([0.5764, 0.0737, 0.1202, 0.2298]) -Greedy action tensor([ 1.4195, -0.4292, -0.7780, 0.3792]) tensor([0.6166, 0.0971, 0.0685, 0.2179]) -Greedy action tensor([ 1.6503, 0.2237, -0.6038, 0.4018]) tensor([0.6127, 0.1471, 0.0643, 0.1758]) -Greedy action tensor([ 1.7565, 0.2755, -0.1333, 0.4523]) tensor([0.6061, 0.1378, 0.0916, 0.1645]) -Greedy action tensor([ 1.2978, -0.7633, -0.3566, 0.6125]) tensor([0.5487, 0.0699, 0.1049, 0.2765]) -Greedy action tensor([ 1.1731, -0.2606, -0.2432, 0.5166]) tensor([0.5001, 0.1192, 0.1213, 0.2594]) -Greedy action tensor([ 1.1281, 0.0609, -0.9837, 0.1185]) tensor([0.5466, 0.1880, 0.0662, 0.1992]) -Greedy action tensor([ 2.3905, -0.7918, -0.0876, 0.4264]) tensor([0.7901, 0.0328, 0.0663, 0.1108]) -Greedy action tensor([ 1.6179, 0.1389, -0.6350, 0.6099]) tensor([0.5890, 0.1342, 0.0619, 0.2149]) -Greedy action tensor([ 1.2681, -0.4345, -0.2501, 0.1529]) tensor([0.5783, 0.1054, 0.1267, 0.1896]) -Greedy action tensor([ 1.6574, -1.1409, -0.4968, 0.7602]) tensor([0.6311, 0.0384, 0.0732, 0.2573]) -Greedy action tensor([ 1.6222, -0.8353, -0.4554, 0.7467]) tensor([0.6144, 0.0526, 0.0769, 0.2560]) -Greedy action tensor([ 1.2812, -0.1565, -0.8474, 0.1082]) tensor([0.6003, 0.1425, 0.0714, 0.1857]) -Greedy action tensor([ 1.2935, -0.4688, -0.0568, 0.2377]) tensor([0.5622, 0.0965, 0.1457, 0.1956]) -Greedy action tensor([ 1.2624, -0.5967, -0.0486, -0.0645]) tensor([0.5915, 0.0922, 0.1594, 0.1569]) -Greedy action tensor([ 1.2341, 0.0306, -0.6706, 0.2007]) tensor([0.5541, 0.1663, 0.0825, 0.1971]) -Greedy action tensor([ 1.4249, -0.4404, -0.7520, 0.5027]) tensor([0.6003, 0.0930, 0.0681, 0.2387]) -Greedy action tensor([ 1.2579, -0.2225, -0.6539, 0.4571]) tensor([0.5481, 0.1247, 0.0810, 0.2461]) -Greedy action tensor([ 2.1901, -0.8515, -0.1998, 0.2300]) tensor([0.7811, 0.0373, 0.0716, 0.1100]) -Greedy action tensor([ 1.8633, -1.0376, -0.4643, 0.5057]) tensor([0.7093, 0.0390, 0.0692, 0.1825]) -Greedy action tensor([ 1.4820, -0.2324, -0.4365, 0.6009]) tensor([0.5743, 0.1034, 0.0843, 0.2379]) -Greedy action tensor([ 1.8731, -0.9068, -0.4524, 0.8339]) tensor([0.6607, 0.0410, 0.0646, 0.2337]) -Greedy action tensor([ 0.6369, -0.3549, 0.0273, -0.0305]) tensor([0.4119, 0.1528, 0.2239, 0.2113]) -Greedy action tensor([ 1.5472, -0.4704, -0.7033, 0.5672]) tensor([0.6197, 0.0824, 0.0653, 0.2326]) -Greedy action tensor([ 1.0662, -0.2564, -0.6877, 0.2712]) tensor([0.5288, 0.1409, 0.0915, 0.2388]) -Greedy action tensor([ 1.6092, -1.0234, -0.2100, 0.5825]) tensor([0.6280, 0.0451, 0.1018, 0.2250]) -Greedy action tensor([ 1.1485, -0.2358, -0.4203, 0.0972]) tensor([0.5530, 0.1385, 0.1152, 0.1933]) -Greedy action tensor([ 1.2975, -0.2135, -0.0672, 0.3137]) tensor([0.5405, 0.1193, 0.1381, 0.2021]) -Greedy action tensor([ 1.6179, -0.3700, -0.5798, 0.1742]) tensor([0.6738, 0.0923, 0.0748, 0.1590]) -Greedy action tensor([ 2.0997, -0.7969, -0.4389, 0.6225]) tensor([0.7340, 0.0405, 0.0580, 0.1675]) -Greedy action tensor([ 1.7152, -0.6027, -0.6036, 0.5279]) tensor([0.6658, 0.0656, 0.0655, 0.2031]) -Greedy action tensor([ 1.5117, -0.7069, -0.7948, 0.0375]) tensor([0.6957, 0.0757, 0.0693, 0.1593]) -Greedy action tensor([ 1.3616, -0.3310, -0.4558, 0.1825]) tensor([0.6046, 0.1113, 0.0982, 0.1859]) -Greedy action tensor([ 1.1948, -0.2272, -0.4552, 0.2071]) tensor([0.5538, 0.1336, 0.1064, 0.2063]) -Greedy action tensor([ 1.6664, -0.6606, -0.8602, 0.6479]) tensor([0.6499, 0.0634, 0.0519, 0.2347]) -Greedy action tensor([ 2.0955, -0.6078, -0.6470, 0.5272]) tensor([0.7464, 0.0500, 0.0481, 0.1555]) -Greedy action tensor([ 1.2628, -0.2286, -0.6011, 0.1395]) tensor([0.5864, 0.1320, 0.0909, 0.1907]) -Greedy action tensor([ 1.9656, 0.3264, 0.5316, -0.0370]) tensor([0.6380, 0.1238, 0.1521, 0.0861]) -Greedy action tensor([ 1.0365, -0.0770, -0.3569, 0.1661]) tensor([0.5011, 0.1646, 0.1244, 0.2099]) -Greedy action tensor([ 1.0981, -0.2187, -0.0218, 0.3646]) tensor([0.4820, 0.1292, 0.1573, 0.2315]) -Greedy action tensor([ 1.0885, -0.1510, -0.6099, 0.1760]) tensor([0.5336, 0.1545, 0.0976, 0.2143]) -Greedy action tensor([ 1.4389, -0.3414, -0.4983, 0.3343]) tensor([0.6083, 0.1026, 0.0877, 0.2015]) -Greedy action tensor([ 1.1420, -0.2228, -0.0806, 0.3812]) tensor([0.4957, 0.1266, 0.1460, 0.2317]) -Greedy action tensor([ 0.9562, 0.2554, -0.3067, 0.3985]) tensor([0.4252, 0.2110, 0.1203, 0.2435]) -Greedy action tensor([ 0.9508, -0.2996, -0.6748, 0.4963]) tensor([0.4722, 0.1352, 0.0929, 0.2997]) -Greedy action tensor([ 1.1975, -0.5990, -0.9944, 0.3243]) tensor([0.5899, 0.0979, 0.0659, 0.2463]) -Greedy action tensor([ 1.1874, -0.4653, -1.2231, 0.6069]) tensor([0.5432, 0.1040, 0.0488, 0.3040]) -Greedy action tensor([ 1.2879, -0.1005, -0.1298, -0.1690]) tensor([0.5798, 0.1446, 0.1405, 0.1351]) -Greedy action tensor([ 1.4857, -0.8340, -0.3311, 0.3283]) tensor([0.6349, 0.0624, 0.1032, 0.1995]) -Greedy action tensor([ 1.1933, 0.0609, -0.6489, 0.2156]) tensor([0.5385, 0.1735, 0.0853, 0.2026]) -Greedy action tensor([ 1.0220, -0.2565, -0.5954, 0.6543]) tensor([0.4610, 0.1284, 0.0915, 0.3192]) -Greedy action tensor([ 1.3452, -0.5462, -0.7906, 0.8543]) tensor([0.5316, 0.0802, 0.0628, 0.3254]) -Greedy action tensor([ 1.4531, -0.7735, -0.5217, 0.2036]) tensor([0.6522, 0.0704, 0.0905, 0.1869]) -Greedy action tensor([ 2.4477, -0.0728, -0.0886, 0.5506]) tensor([0.7636, 0.0614, 0.0604, 0.1145]) -Greedy action tensor([ 1.4607, -0.7285, -0.3545, 0.1621]) tensor([0.6461, 0.0724, 0.1052, 0.1763]) -Greedy action tensor([ 1.6217, -0.4197, -0.7703, -0.0702]) tensor([0.7115, 0.0924, 0.0651, 0.1310]) -Greedy action tensor([ 1.2524, -0.3104, -0.4931, 0.3543]) tensor([0.5582, 0.1170, 0.0974, 0.2274]) -Greedy action tensor([ 1.6323, -0.6568, -0.5442, 0.5216]) tensor([0.6476, 0.0656, 0.0735, 0.2133]) -Greedy action tensor([ 1.1306, -0.5314, -0.6030, -0.0786]) tensor([0.6007, 0.1140, 0.1061, 0.1793]) -Greedy action tensor([ 1.2431, -0.1389, -1.2757, 0.2596]) tensor([0.5863, 0.1472, 0.0472, 0.2193]) -Greedy action tensor([ 1.2648, -0.3746, -0.6957, 0.1439]) tensor([0.6021, 0.1169, 0.0848, 0.1963]) -Greedy action tensor([ 1.9533, -1.3066, -0.6248, 0.2557]) tensor([0.7708, 0.0296, 0.0585, 0.1411]) -Greedy action tensor([ 1.3283, -0.1497, -1.0829, 0.1960]) tensor([0.6097, 0.1391, 0.0547, 0.1965]) -Greedy action tensor([ 1.8039, -0.6496, -0.1081, 0.8736]) tensor([0.6142, 0.0528, 0.0908, 0.2423]) -Greedy action tensor([ 1.3489, -0.1549, -0.4981, -0.1442]) tensor([0.6232, 0.1385, 0.0983, 0.1400]) -Greedy action tensor([ 1.8498, -0.9005, -0.3989, 0.5437]) tensor([0.6943, 0.0444, 0.0733, 0.1881]) -Greedy action tensor([ 1.3190, -0.0695, -0.7155, 0.0249]) tensor([0.6045, 0.1508, 0.0790, 0.1657]) -Greedy action tensor([ 0.9155, -0.2588, -0.2953, 0.2824]) tensor([0.4677, 0.1445, 0.1394, 0.2483]) -Greedy action tensor([ 1.6538, -0.3055, -1.1891, 0.2646]) tensor([0.6904, 0.0973, 0.0402, 0.1721]) -Greedy action tensor([ 2.1326, -0.1402, -0.1387, -0.0118]) tensor([0.7557, 0.0779, 0.0780, 0.0885]) -Greedy action tensor([ 1.2673, 0.1267, -1.2429, 0.1287]) tensor([0.5810, 0.1857, 0.0472, 0.1861]) -Greedy action tensor([ 1.6187, -0.9349, 0.2720, -0.1942]) tensor([0.6662, 0.0518, 0.1733, 0.1087]) -Greedy action tensor([-0.4269, -0.3046, 0.0314, -0.9758]) tensor([0.2332, 0.2635, 0.3687, 0.1347]) -Greedy action tensor([-0.4314, -0.9888, 0.0918, 0.2800]) tensor([0.1888, 0.1081, 0.3186, 0.3845]) -Greedy action tensor([ 0.2950, -1.4973, -0.5938, 0.4567]) tensor([0.3632, 0.0605, 0.1493, 0.4269]) -Greedy action tensor([ 0.1518, -0.1233, -0.2105, 0.2778]) tensor([0.2786, 0.2116, 0.1939, 0.3160]) -Greedy action tensor([ 0.9576, -0.9461, 1.1435, -0.1853]) tensor([0.3742, 0.0558, 0.4507, 0.1193]) -Greedy action tensor([ 0.6885, -0.9274, -1.1956, -0.4282]) tensor([0.5959, 0.1184, 0.0906, 0.1951]) -Greedy action tensor([-0.3205, -0.8495, 0.2194, -0.4232]) tensor([0.2377, 0.1400, 0.4078, 0.2145]) -Greedy action tensor([ 0.4288, -1.7344, 0.7318, 0.2121]) tensor([0.3054, 0.0351, 0.4135, 0.2459]) -Greedy action tensor([ 0.3653, -0.9325, 1.3029, 0.3933]) tensor([0.2059, 0.0563, 0.5260, 0.2118]) -Greedy action tensor([ 1.1677, -1.2008, 1.2282, 0.4491]) tensor([0.3783, 0.0354, 0.4019, 0.1844]) -Greedy action tensor([-0.2322, -0.4423, 0.0670, -1.2849]) tensor([0.2850, 0.2310, 0.3844, 0.0995]) -Greedy action tensor([ 0.0113, -1.0444, 0.2111, -0.3750]) tensor([0.3078, 0.1071, 0.3759, 0.2092]) -Greedy action tensor([ 1.2035, -0.9515, 0.8745, 0.7985]) tensor([0.3996, 0.0463, 0.2876, 0.2665]) -Greedy action tensor([ 0.0827, -1.0032, 0.2009, 0.1959]) tensor([0.2791, 0.0942, 0.3141, 0.3126]) -Greedy action tensor([-0.3347, -0.2272, 0.2726, -0.4792]) tensor([0.2077, 0.2313, 0.3812, 0.1798]) -Greedy action tensor([ 0.0652, -0.2211, 0.9067, -0.0700]) tensor([0.2023, 0.1519, 0.4692, 0.1767]) -Greedy action tensor([-0.2213, -1.0091, -0.4937, -0.2194]) tensor([0.3107, 0.1413, 0.2366, 0.3113]) -Greedy action tensor([ 0.3336, 0.0363, -0.0216, -0.2557]) tensor([0.3335, 0.2477, 0.2338, 0.1850]) -Greedy action tensor([ 1.4172, -0.8453, 0.8296, 1.5715]) tensor([0.3538, 0.0368, 0.1966, 0.4128]) -Greedy action tensor([ 0.1597, -1.0395, 1.0573, -0.7278]) tensor([0.2400, 0.0723, 0.5889, 0.0988]) -Greedy action tensor([ 0.7681, -0.0799, -0.2071, -0.2851]) tensor([0.4642, 0.1988, 0.1751, 0.1619]) -Greedy action tensor([ 1.4361, -1.5720, -0.0383, -0.0146]) tensor([0.6611, 0.0326, 0.1513, 0.1549]) -Greedy action tensor([-1.8677, 0.3080, -0.0321, -0.8808]) tensor([0.0533, 0.4695, 0.3342, 0.1430]) -Greedy action tensor([ 0.6142, -1.5955, -0.3082, 0.7827]) tensor([0.3716, 0.0408, 0.1477, 0.4398]) -Greedy action tensor([-0.4156, -1.7821, 0.7438, -0.6703]) tensor([0.1916, 0.0489, 0.6110, 0.1485]) -Greedy action tensor([-1.5315, 0.4504, 0.2306, -0.5322]) tensor([0.0595, 0.4320, 0.3468, 0.1617]) -Greedy action tensor([-0.0873, -1.2129, 1.1036, -0.9934]) tensor([0.1993, 0.0647, 0.6556, 0.0805]) -Greedy action tensor([ 1.1321, -1.6157, -0.0746, -0.1280]) tensor([0.6072, 0.0389, 0.1817, 0.1722]) -Greedy action tensor([-0.6489, -0.4673, -0.0399, -0.7851]) tensor([0.2036, 0.2442, 0.3744, 0.1777]) -Greedy action tensor([0.6933, 0.4418, 0.0972, 0.0935]) tensor([0.3475, 0.2702, 0.1915, 0.1908]) -Greedy action tensor([-0.2518, -0.9119, -0.0326, -0.5926]) tensor([0.2879, 0.1488, 0.3585, 0.2048]) -Greedy action tensor([ 0.7354, 0.1553, 0.7583, -0.0434]) tensor([0.3287, 0.1840, 0.3364, 0.1509]) -Greedy action tensor([-0.4024, -0.1746, 0.0085, -0.3968]) tensor([0.2097, 0.2633, 0.3162, 0.2108]) -Greedy action tensor([-0.4804, -1.7642, 1.5197, -0.9245]) tensor([0.1074, 0.0298, 0.7939, 0.0689]) -Greedy action tensor([ 0.4171, 0.2690, 1.0189, -0.5867]) tensor([0.2467, 0.2127, 0.4502, 0.0904]) -Greedy action tensor([-0.7576, 0.0409, 0.7530, 0.6624]) tensor([0.0841, 0.1869, 0.3810, 0.3480]) -Greedy action tensor([1.0439, 0.2252, 0.8312, 0.4655]) tensor([0.3558, 0.1569, 0.2877, 0.1996]) -Greedy action tensor([-0.2045, -1.2126, -0.3433, 0.2028]) tensor([0.2675, 0.0976, 0.2328, 0.4020]) -Greedy action tensor([-0.2390, -1.2837, -0.4518, 0.1209]) tensor([0.2783, 0.0979, 0.2250, 0.3988]) -Greedy action tensor([ 0.4305, -0.0094, 0.0602, -0.4585]) tensor([0.3642, 0.2346, 0.2515, 0.1497]) -Greedy action tensor([ 0.3828, -0.3680, 0.5088, -0.3289]) tensor([0.3229, 0.1524, 0.3662, 0.1585]) -Greedy action tensor([-0.6037, 0.0194, -0.3055, -0.6351]) tensor([0.1930, 0.3599, 0.2601, 0.1870]) -Greedy action tensor([-1.1378, 0.3538, -0.3254, -0.6889]) tensor([0.1079, 0.4797, 0.2432, 0.1691]) -Greedy action tensor([-0.3710, 0.3993, -0.3426, -0.5453]) tensor([0.1988, 0.4296, 0.2046, 0.1670]) -Greedy action tensor([ 0.1563, 0.0995, -0.2386, -0.8611]) tensor([0.3356, 0.3170, 0.2261, 0.1213]) -Greedy action tensor([ 0.5551, -1.3837, -0.2352, -0.1247]) tensor([0.4752, 0.0684, 0.2156, 0.2408]) -Greedy action tensor([ 0.3291, -0.3791, -0.8872, 0.1773]) tensor([0.3776, 0.1860, 0.1119, 0.3245]) -Greedy action tensor([-0.8101, -1.0987, -0.3562, -0.5504]) tensor([0.2164, 0.1622, 0.3408, 0.2806]) -Greedy action tensor([-0.3137, -0.5772, -0.4144, -0.0316]) tensor([0.2501, 0.1922, 0.2261, 0.3316]) -Greedy action tensor([ 0.2904, 0.0145, 0.4919, -0.1715]) tensor([0.2768, 0.2101, 0.3386, 0.1744]) -Greedy action tensor([-0.1000, -1.4141, 0.1919, -0.4253]) tensor([0.3003, 0.0807, 0.4021, 0.2169]) -Greedy action tensor([ 0.7741, -1.9048, 0.6603, 0.2714]) tensor([0.3897, 0.0267, 0.3478, 0.2357]) -Greedy action tensor([ 0.2423, 0.1231, 0.2608, -0.7885]) tensor([0.3065, 0.2720, 0.3122, 0.1093]) -Greedy action tensor([-0.7758, -1.4546, 0.6050, -1.3826]) tensor([0.1658, 0.0841, 0.6597, 0.0904]) -Greedy action tensor([ 0.2541, -0.0274, -0.2078, -0.4192]) tensor([0.3455, 0.2607, 0.2177, 0.1762]) -Greedy action tensor([-0.4515, -1.1250, 0.9330, -0.4116]) tensor([0.1528, 0.0779, 0.6102, 0.1590]) -Greedy action tensor([ 0.0435, -1.1287, 0.7748, -0.5102]) tensor([0.2524, 0.0782, 0.5244, 0.1451]) -Greedy action tensor([-1.8233, -0.0526, -0.0333, -0.6825]) tensor([0.0625, 0.3673, 0.3745, 0.1957]) -Greedy action tensor([-0.2074, -0.7703, -0.4976, 0.1651]) tensor([0.2653, 0.1511, 0.1985, 0.3851]) -Greedy action tensor([-1.2321, -2.0706, -0.1050, -0.3990]) tensor([0.1466, 0.0634, 0.4526, 0.3373]) -Greedy action tensor([ 0.9522, -1.3144, -0.0054, 0.0190]) tensor([0.5317, 0.0551, 0.2041, 0.2091]) -Greedy action tensor([ 1.1611, -0.6817, 0.5371, 1.4342]) tensor([0.3324, 0.0526, 0.1781, 0.4368]) -Greedy action tensor([-0.4454, -0.6257, -0.3233, -0.5663]) tensor([0.2597, 0.2168, 0.2934, 0.2301]) -Greedy action tensor([ 0.1369, -1.2951, 0.3072, -1.0909]) tensor([0.3680, 0.0879, 0.4363, 0.1078]) -Greedy action tensor([-0.3358, 0.3872, 0.0217, -1.0676]) tensor([0.2011, 0.4145, 0.2876, 0.0968]) -Greedy action tensor([ 0.7322, -0.6944, 0.3319, 0.0153]) tensor([0.4169, 0.1001, 0.2794, 0.2036]) -Greedy action tensor([ 0.8465, 0.0183, -1.0134, 0.4596]) tensor([0.4402, 0.1923, 0.0685, 0.2990]) -Greedy action tensor([-0.1589, -0.9322, -0.1862, -0.6887]) tensor([0.3308, 0.1526, 0.3219, 0.1947]) -Greedy action tensor([-0.4552, 0.1068, 0.8573, -0.4208]) tensor([0.1332, 0.2337, 0.4951, 0.1379]) -Greedy action tensor([-0.8989, -1.4374, 1.1082, -1.2686]) tensor([0.1029, 0.0601, 0.7659, 0.0711]) -Greedy action tensor([ 0.1853, -0.0738, -0.0179, -0.4176]) tensor([0.3190, 0.2462, 0.2603, 0.1746]) -Greedy action tensor([ 0.3552, -0.8418, -0.6441, -0.2893]) tensor([0.4555, 0.1376, 0.1677, 0.2391]) -Greedy action tensor([-1.1237, -1.4620, 0.7595, -1.3239]) tensor([0.1098, 0.0783, 0.7220, 0.0899]) -Greedy action tensor([ 0.0513, -1.5310, -0.3753, -0.4551]) tensor([0.4063, 0.0835, 0.2652, 0.2449]) -Greedy action tensor([-0.2280, 0.9961, -0.5527, -0.8121]) tensor([0.1760, 0.5986, 0.1272, 0.0981]) -Greedy action tensor([ 0.3462, -0.3992, 0.9429, -0.3492]) tensor([0.2639, 0.1252, 0.4793, 0.1316]) -Greedy action tensor([-0.1500, -0.0169, 0.4413, -1.3560]) tensor([0.2354, 0.2689, 0.4252, 0.0705]) -Greedy action tensor([-0.5272, -1.1683, -0.1291, -0.7462]) tensor([0.2618, 0.1379, 0.3899, 0.2103]) -Greedy action tensor([ 0.5765, -0.6479, -0.5795, -0.1824]) tensor([0.4815, 0.1415, 0.1515, 0.2254]) -Greedy action tensor([ 0.1758, -0.4551, 0.1230, 0.8691]) tensor([0.2232, 0.1187, 0.2117, 0.4464]) -Greedy action tensor([-1.0800, -1.0688, -0.3269, -0.7937]) tensor([0.1829, 0.1850, 0.3885, 0.2436]) -Greedy action tensor([-0.1238, 0.0824, 0.8181, -0.7057]) tensor([0.1868, 0.2296, 0.4792, 0.1044]) -Greedy action tensor([ 0.8872, -0.6884, -0.0676, -0.3753]) tensor([0.5334, 0.1104, 0.2053, 0.1509]) -Greedy action tensor([ 0.6295, -0.5342, -0.2375, 0.0651]) tensor([0.4345, 0.1357, 0.1826, 0.2471]) -Greedy action tensor([ 0.6078, -0.3630, -0.1482, -0.2341]) tensor([0.4388, 0.1662, 0.2060, 0.1890]) -Greedy action tensor([ 0.4528, 0.0610, -0.0163, -0.0412]) tensor([0.3435, 0.2321, 0.2149, 0.2096]) -Greedy action tensor([ 1.3086, -0.9409, 0.0016, -0.6721]) tensor([0.6605, 0.0696, 0.1787, 0.0911]) -Greedy action tensor([ 0.7895, -0.2599, -0.1824, -0.3731]) tensor([0.4899, 0.1715, 0.1854, 0.1532]) -Greedy action tensor([ 0.7071, -0.3582, 0.1267, -0.3178]) tensor([0.4419, 0.1523, 0.2473, 0.1586]) -Greedy action tensor([ 1.2124, -0.6366, 0.0964, -0.3986]) tensor([0.5936, 0.0934, 0.1945, 0.1185]) -Greedy action tensor([ 0.7704, -0.3521, -0.1333, -0.2112]) tensor([0.4750, 0.1546, 0.1924, 0.1780]) -Greedy action tensor([ 0.7547, -0.2125, -0.0479, -0.2582]) tensor([0.4563, 0.1735, 0.2045, 0.1657]) -Greedy action tensor([ 1.0702, -0.6106, -0.0359, -0.6363]) tensor([0.5887, 0.1096, 0.1948, 0.1069]) -Greedy action tensor([ 0.8334, -0.5086, -0.0454, -0.4091]) tensor([0.5088, 0.1330, 0.2113, 0.1469]) -Greedy action tensor([ 0.6541, -0.2562, 0.0921, -0.0800]) tensor([0.4078, 0.1641, 0.2324, 0.1957]) -Greedy action tensor([ 1.0273, -0.4888, -0.0382, -0.1558]) tensor([0.5346, 0.1174, 0.1842, 0.1638]) -Greedy action tensor([ 8.2628e-01, -6.4330e-01, 5.8129e-04, -4.2355e-01]) tensor([0.5116, 0.1177, 0.2241, 0.1466]) -Greedy action tensor([ 0.9360, -0.8625, -0.0195, -0.5518]) tensor([0.5631, 0.0932, 0.2166, 0.1272]) -Greedy action tensor([ 0.8403, -0.2215, -0.0769, -0.2270]) tensor([0.4786, 0.1655, 0.1913, 0.1646]) -Greedy action tensor([ 0.6521, -0.8051, -0.1408, -0.3027]) tensor([0.4830, 0.1125, 0.2186, 0.1859]) -Greedy action tensor([ 0.3531, -0.0259, -0.1781, 0.0120]) tensor([0.3352, 0.2294, 0.1971, 0.2383]) -Greedy action tensor([ 0.6995, -0.1386, -0.0173, -0.0342]) tensor([0.4165, 0.1801, 0.2034, 0.2000]) -Greedy action tensor([ 1.3345, -0.9686, 0.1815, -0.6943]) tensor([0.6464, 0.0646, 0.2040, 0.0850]) -Greedy action tensor([ 0.5124, -0.2446, -0.0265, -0.0532]) tensor([0.3816, 0.1790, 0.2226, 0.2168]) -Greedy action tensor([ 1.2449, -0.8077, -0.1023, -0.6212]) tensor([0.6480, 0.0832, 0.1685, 0.1003]) -Greedy action tensor([ 0.5024, -0.0895, -0.0330, 0.0055]) tensor([0.3640, 0.2014, 0.2131, 0.2215]) -Greedy action tensor([ 0.5676, -0.6349, 0.0132, -0.2382]) tensor([0.4307, 0.1294, 0.2474, 0.1924]) -Greedy action tensor([ 0.7433, -0.7064, -0.1000, -0.2358]) tensor([0.4900, 0.1150, 0.2109, 0.1841]) -Greedy action tensor([ 1.1155, -0.9293, 0.1754, -0.7247]) tensor([0.5957, 0.0771, 0.2327, 0.0946]) -Greedy action tensor([ 0.8342, -0.3351, -0.2308, -0.6478]) tensor([0.5312, 0.1650, 0.1831, 0.1207]) -Greedy action tensor([ 1.1023, -0.4672, 0.3730, -0.6189]) tensor([0.5350, 0.1114, 0.2580, 0.0957]) -Greedy action tensor([ 1.3138, -0.8297, -0.0070, -0.4317]) tensor([0.6415, 0.0752, 0.1712, 0.1120]) -Greedy action tensor([ 0.7659, -0.3015, -0.0894, -0.1400]) tensor([0.4601, 0.1583, 0.1956, 0.1860]) -Greedy action tensor([ 1.2716, -0.8535, -0.0463, -0.7433]) tensor([0.6577, 0.0785, 0.1761, 0.0877]) -Greedy action tensor([ 0.8129, -0.5874, 0.1703, -0.2090]) tensor([0.4690, 0.1156, 0.2466, 0.1688]) -Greedy action tensor([ 1.0331, 0.2123, -0.0631, 0.0648]) tensor([0.4643, 0.2043, 0.1551, 0.1763]) -Greedy action tensor([ 0.5520, 0.0600, -0.1475, -0.3333]) tensor([0.3967, 0.2425, 0.1971, 0.1637]) -Greedy action tensor([ 0.2324, 0.1147, -0.1345, -0.3224]) tensor([0.3169, 0.2817, 0.2195, 0.1819]) -Greedy action tensor([ 1.1547, -0.5890, -0.1705, -0.1397]) tensor([0.5832, 0.1020, 0.1550, 0.1598]) -Greedy action tensor([ 0.3666, -0.3344, -0.1225, -0.0509]) tensor([0.3613, 0.1792, 0.2215, 0.2380]) -Greedy action tensor([ 0.8646, -0.5518, 0.0068, -0.4666]) tensor([0.5179, 0.1256, 0.2196, 0.1368]) -Greedy action tensor([ 0.6343, -0.2598, 0.0041, -0.0473]) tensor([0.4086, 0.1671, 0.2176, 0.2067]) -Greedy action tensor([ 0.8366, -0.5445, -0.0992, -0.2017]) tensor([0.5006, 0.1258, 0.1964, 0.1772]) -Greedy action tensor([ 0.7539, -0.0497, 0.2225, -0.0988]) tensor([0.4062, 0.1819, 0.2388, 0.1731]) -Greedy action tensor([ 1.6388, -1.1890, 0.2088, -0.7510]) tensor([0.7194, 0.0425, 0.1722, 0.0659]) -Greedy action tensor([ 0.9179, -0.7831, -0.1124, -0.3460]) tensor([0.5489, 0.1002, 0.1959, 0.1551]) -Greedy action tensor([ 0.7295, -0.4586, 0.0738, -0.2710]) tensor([0.4563, 0.1391, 0.2368, 0.1678]) -Greedy action tensor([ 0.4449, -0.0530, 0.0417, 0.0135]) tensor([0.3418, 0.2077, 0.2284, 0.2220]) -Greedy action tensor([ 0.9287, -0.4993, -0.1459, -0.4591]) tensor([0.5462, 0.1310, 0.1865, 0.1363]) -Greedy action tensor([ 0.4401, -0.2110, -0.0368, -0.3084]) tensor([0.3824, 0.1994, 0.2373, 0.1809]) -Greedy action tensor([ 0.7879, -0.4436, -0.0042, -0.3023]) tensor([0.4806, 0.1403, 0.2176, 0.1615]) -Greedy action tensor([ 0.6285, -0.4563, -0.1176, -0.4060]) tensor([0.4613, 0.1559, 0.2188, 0.1640]) -Greedy action tensor([ 1.0005, -0.6692, 0.1623, -0.5526]) tensor([0.5457, 0.1028, 0.2360, 0.1155]) -Greedy action tensor([ 0.6203, -0.5280, -0.0804, -0.1093]) tensor([0.4356, 0.1382, 0.2162, 0.2100]) -Greedy action tensor([ 0.7135, -0.3680, -0.0428, -0.5188]) tensor([0.4762, 0.1615, 0.2235, 0.1389]) -Greedy action tensor([ 0.7475, -0.3059, -0.0905, -0.1731]) tensor([0.4588, 0.1600, 0.1985, 0.1827]) -Greedy action tensor([ 1.0255, -0.7283, -0.1355, -0.2436]) tensor([0.5658, 0.0979, 0.1772, 0.1590]) -Greedy action tensor([ 0.8860, -0.5781, -0.1299, -0.6211]) tensor([0.5510, 0.1274, 0.1995, 0.1221]) -Greedy action tensor([ 0.6665, -0.0938, -0.1244, 0.0555]) tensor([0.4059, 0.1898, 0.1841, 0.2203]) -Greedy action tensor([ 0.8660, -0.1724, -0.1189, -0.6708]) tensor([0.5148, 0.1822, 0.1923, 0.1107]) -Greedy action tensor([0.1851, 0.0856, 0.0670, 0.1248]) tensor([0.2677, 0.2424, 0.2379, 0.2521]) -Greedy action tensor([ 1.0447, -0.5013, 0.1123, -0.3312]) tensor([0.5378, 0.1146, 0.2117, 0.1359]) -Greedy action tensor([ 0.9832, -0.5117, -0.1163, -0.5222]) tensor([0.5620, 0.1260, 0.1872, 0.1247]) -Greedy action tensor([ 0.7430, -0.2677, -0.0020, -0.3560]) tensor([0.4604, 0.1676, 0.2186, 0.1534]) -Greedy action tensor([ 0.5394, -0.1318, -0.1318, -0.0647]) tensor([0.3893, 0.1990, 0.1990, 0.2128]) -Greedy action tensor([ 0.7983, -0.5872, -0.0505, -0.4368]) tensor([0.5079, 0.1271, 0.2173, 0.1477]) -Greedy action tensor([ 0.7081, -0.6586, -0.1246, -0.4925]) tensor([0.5023, 0.1281, 0.2184, 0.1512]) -Greedy action tensor([ 0.1474, 0.5358, -0.1882, 0.1231]) tensor([0.2401, 0.3540, 0.1716, 0.2343]) -Greedy action tensor([ 0.7639, -0.3644, -0.1833, -0.2964]) tensor([0.4860, 0.1572, 0.1885, 0.1683]) -Greedy action tensor([ 0.9754, -0.7438, -0.0430, -0.5515]) tensor([0.5690, 0.1020, 0.2055, 0.1236]) -Greedy action tensor([ 0.6602, -0.6102, -0.0919, -0.2253]) tensor([0.4620, 0.1297, 0.2178, 0.1906]) -Greedy action tensor([ 0.9299, -0.5650, -0.0420, -0.4505]) tensor([0.5393, 0.1210, 0.2041, 0.1356]) -Greedy action tensor([ 0.7645, -0.3376, -0.1026, -0.0964]) tensor([0.4597, 0.1527, 0.1932, 0.1944]) -Greedy action tensor([ 0.4223, -0.0260, -0.0794, -0.0586]) tensor([0.3493, 0.2231, 0.2115, 0.2160]) -Greedy action tensor([ 0.7412, -0.6073, -0.1415, -0.2556]) tensor([0.4896, 0.1271, 0.2025, 0.1807]) -Greedy action tensor([ 1.1506, -0.6405, 0.0719, -0.4639]) tensor([0.5862, 0.0978, 0.1993, 0.1167]) -Greedy action tensor([ 0.6996, -0.6596, 0.2189, -0.6961]) tensor([0.4710, 0.1210, 0.2913, 0.1167]) -Greedy action tensor([ 0.7792, -0.5521, -0.0423, -0.3636]) tensor([0.4944, 0.1306, 0.2174, 0.1577]) -Greedy action tensor([ 1.0656, -0.8331, 0.0754, -0.6916]) tensor([0.5904, 0.0884, 0.2193, 0.1019]) -Greedy action tensor([ 1.1037, -0.2026, -0.1318, -0.4897]) tensor([0.5667, 0.1535, 0.1647, 0.1152]) -Greedy action tensor([ 0.4467, -0.0902, -0.1438, -0.0953]) tensor([0.3676, 0.2149, 0.2037, 0.2138]) -Greedy action tensor([ 0.7189, -0.5275, -0.0754, -0.2750]) tensor([0.4740, 0.1363, 0.2142, 0.1755]) -Greedy action tensor([ 0.7319, -0.2592, -0.0578, -0.0729]) tensor([0.4401, 0.1633, 0.1998, 0.1968]) -Greedy action tensor([-1.8548, -0.3875, 0.6132, -0.1448]) tensor([0.0441, 0.1914, 0.5206, 0.2439]) -Greedy action tensor([-0.8592, -0.3668, 0.3076, -0.0315]) tensor([0.1229, 0.2011, 0.3948, 0.2812]) -Greedy action tensor([-1.9008, -0.3339, 0.6352, -0.1558]) tensor([0.0414, 0.1984, 0.5230, 0.2371]) -Greedy action tensor([-1.9388, -0.4437, 0.6650, -0.1773]) tensor([0.0403, 0.1799, 0.5451, 0.2348]) -Greedy action tensor([-1.8984, -0.4491, 0.6468, -0.1565]) tensor([0.0422, 0.1797, 0.5375, 0.2407]) -Greedy action tensor([-1.9035, -0.4400, 0.6439, -0.1651]) tensor([0.0420, 0.1817, 0.5371, 0.2392]) -Greedy action tensor([-0.7113, 0.3400, 0.0460, 0.1101]) tensor([0.1210, 0.3461, 0.2579, 0.2750]) -Greedy action tensor([-1.8559, -0.4130, 0.6172, -0.1458]) tensor([0.0442, 0.1871, 0.5242, 0.2444]) -Greedy action tensor([-1.9001, -0.3566, 0.6293, -0.1459]) tensor([0.0417, 0.1950, 0.5226, 0.2407]) -Greedy action tensor([-1.7518, -0.0828, 0.5283, -0.1243]) tensor([0.0472, 0.2506, 0.4617, 0.2404]) -Greedy action tensor([-1.4058, -0.2528, 0.6006, 0.1385]) tensor([0.0614, 0.1945, 0.4566, 0.2876]) -Greedy action tensor([-1.9022, -0.4549, 0.6524, -0.1589]) tensor([0.0420, 0.1784, 0.5398, 0.2398]) -Greedy action tensor([-1.2862, -0.3964, 0.2131, 0.2457]) tensor([0.0797, 0.1942, 0.3571, 0.3690]) -Greedy action tensor([-1.3143, -0.4146, 0.9981, 0.8471]) tensor([0.0450, 0.1106, 0.4541, 0.3904]) -Greedy action tensor([-0.0917, 1.1765, 0.0328, 0.3896]) tensor([0.1369, 0.4866, 0.1550, 0.2215]) -Greedy action tensor([-1.8330, -0.1200, 0.5560, -0.1257]) tensor([0.0435, 0.2415, 0.4748, 0.2401]) -Greedy action tensor([-1.5297, -0.3404, 0.5207, -0.2029]) tensor([0.0632, 0.2076, 0.4911, 0.2382]) -Greedy action tensor([-1.8749, -0.2115, 0.5981, -0.1242]) tensor([0.0419, 0.2209, 0.4963, 0.2410]) -Greedy action tensor([-1.8014, -0.4129, 0.5914, -0.1237]) tensor([0.0469, 0.1881, 0.5137, 0.2513]) -Greedy action tensor([-1.9439, -0.4551, 0.6715, -0.1794]) tensor([0.0401, 0.1777, 0.5482, 0.2341]) -Greedy action tensor([-0.7044, 0.8852, 0.0756, -0.2582]) tensor([0.1037, 0.5082, 0.2262, 0.1620]) -Greedy action tensor([-1.2917, 0.6054, 0.1974, 0.1456]) tensor([0.0613, 0.4088, 0.2718, 0.2581]) -Greedy action tensor([-1.9120, -0.4552, 0.6554, -0.1649]) tensor([0.0416, 0.1784, 0.5416, 0.2385]) -Greedy action tensor([-1.8637, -0.4449, 0.6360, -0.1359]) tensor([0.0436, 0.1801, 0.5309, 0.2454]) -Greedy action tensor([-1.5170, -0.1890, 0.6286, -0.5317]) tensor([0.0625, 0.2358, 0.5342, 0.1674]) -Greedy action tensor([-1.9184, -0.4490, 0.6567, -0.1680]) tensor([0.0413, 0.1793, 0.5419, 0.2375]) -Greedy action tensor([-1.9409, -0.4503, 0.6673, -0.1771]) tensor([0.0402, 0.1787, 0.5463, 0.2348]) -Greedy action tensor([-1.6593, -0.5196, 0.4926, -0.0113]) tensor([0.0558, 0.1744, 0.4799, 0.2899]) -Greedy action tensor([-1.5913, -0.1090, 0.4684, -0.0759]) tensor([0.0562, 0.2474, 0.4407, 0.2557]) -Greedy action tensor([-1.6847, -0.2067, 0.5017, -0.0780]) tensor([0.0519, 0.2275, 0.4619, 0.2587]) -Greedy action tensor([-1.8060, -0.4362, 0.5990, -0.1213]) tensor([0.0467, 0.1838, 0.5176, 0.2519]) -Greedy action tensor([-1.6887, -0.4092, 0.5056, -0.0241]) tensor([0.0530, 0.1907, 0.4760, 0.2803]) -Greedy action tensor([-1.7580, -0.4056, 0.3172, -0.4193]) tensor([0.0601, 0.2323, 0.4785, 0.2291]) -Greedy action tensor([-1.1983, 0.6291, 0.1688, 0.2550]) tensor([0.0649, 0.4033, 0.2545, 0.2774]) -Greedy action tensor([ 0.7878, 0.6370, -0.0961, 0.6164]) tensor([0.3210, 0.2760, 0.1326, 0.2704]) -Greedy action tensor([-1.4531, -0.5333, 0.4253, 0.0947]) tensor([0.0678, 0.1700, 0.4435, 0.3187]) -Greedy action tensor([-1.8581, -0.4532, 0.6230, -0.1595]) tensor([0.0445, 0.1812, 0.5314, 0.2430]) -Greedy action tensor([-1.9239, -0.4310, 0.6591, -0.1710]) tensor([0.0409, 0.1819, 0.5412, 0.2360]) -Greedy action tensor([-0.1068, 1.0881, 0.0171, 0.1683]) tensor([0.1481, 0.4892, 0.1676, 0.1950]) -Greedy action tensor([-1.7525, -0.2259, 0.5535, 0.0050]) tensor([0.0466, 0.2147, 0.4681, 0.2705]) -Greedy action tensor([-1.8377, -0.2395, 0.6010, -0.1858]) tensor([0.0442, 0.2186, 0.5066, 0.2306]) -Greedy action tensor([-1.9008, -0.4517, 0.6483, -0.1608]) tensor([0.0421, 0.1793, 0.5387, 0.2399]) -Greedy action tensor([-1.7674, -0.0865, 0.6320, -0.3894]) tensor([0.0468, 0.2515, 0.5159, 0.1858]) -Greedy action tensor([-1.8607, -0.3147, 0.6171, -0.1337]) tensor([0.0430, 0.2020, 0.5129, 0.2421]) -Greedy action tensor([-1.8710, -0.4298, 0.6309, -0.1436]) tensor([0.0434, 0.1833, 0.5294, 0.2440]) -Greedy action tensor([-1.6935, -0.3068, 0.5869, 0.0026]) tensor([0.0494, 0.1978, 0.4833, 0.2695]) -Greedy action tensor([-1.5730, 0.4632, 0.3722, 0.0454]) tensor([0.0483, 0.3701, 0.3379, 0.2437]) -Greedy action tensor([-1.8897, -0.4571, 0.6452, -0.1545]) tensor([0.0426, 0.1785, 0.5374, 0.2415]) -Greedy action tensor([-1.9126, -0.4722, 0.6633, -0.1610]) tensor([0.0414, 0.1750, 0.5447, 0.2389]) -Greedy action tensor([-1.9426, -0.4445, 0.6662, -0.1779]) tensor([0.0402, 0.1797, 0.5456, 0.2346]) -Greedy action tensor([-1.9285, -0.4527, 0.6624, -0.1729]) tensor([0.0408, 0.1785, 0.5445, 0.2362]) -Greedy action tensor([-1.1902, -0.4206, 0.3080, -0.0074]) tensor([0.0918, 0.1981, 0.4106, 0.2995]) -Greedy action tensor([-1.9206, -0.4048, 0.6524, -0.1679]) tensor([0.0409, 0.1864, 0.5365, 0.2362]) -Greedy action tensor([-1.2103, -0.5345, 0.2090, 0.1972]) tensor([0.0894, 0.1757, 0.3696, 0.3653]) -Greedy action tensor([-1.8818, -0.2882, 0.6242, -0.1425]) tensor([0.0419, 0.2062, 0.5134, 0.2385]) -Greedy action tensor([-1.8625, -0.2329, 0.6066, -0.1332]) tensor([0.0425, 0.2166, 0.5016, 0.2393]) -Greedy action tensor([ 0.1503, 1.1491, 0.0603, -0.0051]) tensor([0.1823, 0.4950, 0.1666, 0.1561]) -Greedy action tensor([-0.6951, 0.5295, -0.0413, -0.0254]) tensor([0.1208, 0.4110, 0.2322, 0.2360]) -Greedy action tensor([-1.8503, -0.4222, 0.6172, -0.1347]) tensor([0.0444, 0.1852, 0.5236, 0.2468]) -Greedy action tensor([-1.9369, -0.3843, 0.6514, -0.1769]) tensor([0.0403, 0.1901, 0.5356, 0.2340]) -Greedy action tensor([-1.7112, 0.1392, 0.4758, -0.0544]) tensor([0.0465, 0.2958, 0.4141, 0.2437]) -Greedy action tensor([-1.8927, -0.3492, 0.6330, -0.1509]) tensor([0.0419, 0.1959, 0.5232, 0.2389]) -Greedy action tensor([-1.9336, -0.4347, 0.6611, -0.1743]) tensor([0.0405, 0.1814, 0.5427, 0.2354]) -Greedy action tensor([-1.6223, -0.1831, 0.5589, -0.0212]) tensor([0.0525, 0.2216, 0.4653, 0.2605]) -Greedy action tensor([-0.9087, 0.7151, 0.2118, -0.1455]) tensor([0.0886, 0.4495, 0.2717, 0.1901]) -Greedy action tensor([-0.7606, 0.6921, 0.1035, -0.0748]) tensor([0.1038, 0.4437, 0.2463, 0.2061]) -Greedy action tensor([-0.6910, 1.0041, 0.0209, 0.4488]) tensor([0.0861, 0.4691, 0.1755, 0.2692]) -Greedy action tensor([-1.2312, 0.8038, 0.1823, 0.2326]) tensor([0.0585, 0.4479, 0.2406, 0.2530]) -Greedy action tensor([-1.9009, -0.3570, 0.6401, -0.1605]) tensor([0.0415, 0.1945, 0.5272, 0.2367]) -Greedy action tensor([-1.3005, 0.6112, 0.3257, -0.1640]) tensor([0.0626, 0.4237, 0.3185, 0.1952]) -Greedy action tensor([-1.9241, -0.4384, 0.6588, -0.1683]) tensor([0.0409, 0.1808, 0.5415, 0.2368]) -Greedy action tensor([-1.9470, -0.4507, 0.6679, -0.1822]) tensor([0.0400, 0.1788, 0.5473, 0.2339]) -Greedy action tensor([-1.8341, -0.4582, 0.6190, -0.1162]) tensor([0.0451, 0.1787, 0.5247, 0.2515]) -Greedy action tensor([-0.9382, 0.5098, -0.2222, -0.4300]) tensor([0.1116, 0.4747, 0.2283, 0.1855]) -Greedy action tensor([-1.9458, -0.4550, 0.6700, -0.1805]) tensor([0.0401, 0.1779, 0.5480, 0.2341]) -Greedy action tensor([-1.9368, -0.4481, 0.6636, -0.1781]) tensor([0.0405, 0.1794, 0.5452, 0.2350]) -Greedy action tensor([-1.9327, -0.4445, 0.6629, -0.1731]) tensor([0.0406, 0.1797, 0.5439, 0.2358]) -Greedy action tensor([-0.8863, -0.3480, 0.3707, 0.6626]) tensor([0.0915, 0.1567, 0.3215, 0.4304]) -Greedy action tensor([-1.8802, -0.4251, 0.6327, -0.1514]) tensor([0.0430, 0.1842, 0.5306, 0.2422]) -Greedy action tensor([-1.8864, -0.3712, 0.6375, -0.1486]) tensor([0.0422, 0.1919, 0.5262, 0.2397]) -Greedy action tensor([-1.9133, -0.3940, 0.6491, -0.1593]) tensor([0.0411, 0.1879, 0.5333, 0.2376]) -Greedy action tensor([ 1.3567, -0.1954, -0.7722, -0.0470]) tensor([0.6343, 0.1344, 0.0755, 0.1559]) -Greedy action tensor([ 1.5747, -0.0961, -0.8009, 0.3056]) tensor([0.6401, 0.1204, 0.0595, 0.1799]) -Greedy action tensor([ 1.6281, -0.6694, -0.3467, 0.7061]) tensor([0.6109, 0.0614, 0.0848, 0.2430]) -Greedy action tensor([ 1.3331, -0.2471, -0.3633, 0.1753]) tensor([0.5870, 0.1209, 0.1076, 0.1844]) -Greedy action tensor([ 1.8981, -1.2718, -0.3582, 0.2855]) tensor([0.7429, 0.0312, 0.0778, 0.1481]) -Greedy action tensor([ 1.0695, -0.2857, -0.1314, -0.1015]) tensor([0.5351, 0.1380, 0.1610, 0.1659]) -Greedy action tensor([ 1.8513, -0.8387, -0.5486, 0.3273]) tensor([0.7265, 0.0493, 0.0659, 0.1583]) -Greedy action tensor([ 1.0863, -0.3068, -0.2152, 0.2489]) tensor([0.5120, 0.1271, 0.1393, 0.2216]) -Greedy action tensor([ 1.1880, -0.3745, -0.8579, 0.4322]) tensor([0.5529, 0.1159, 0.0715, 0.2597]) -Greedy action tensor([ 1.6817, -0.0767, -0.5871, 0.3361]) tensor([0.6510, 0.1122, 0.0673, 0.1695]) -Greedy action tensor([ 1.3226, -0.3682, -1.1682, 0.0735]) tensor([0.6435, 0.1186, 0.0533, 0.1845]) -Greedy action tensor([ 1.5573, -0.3691, -0.5051, 0.2849]) tensor([0.6439, 0.0938, 0.0819, 0.1804]) -Greedy action tensor([ 1.5647, -0.5989, -0.0858, 0.2869]) tensor([0.6307, 0.0725, 0.1211, 0.1757]) -Greedy action tensor([ 1.2720, -0.3742, -0.4176, 0.2775]) tensor([0.5723, 0.1103, 0.1056, 0.2117]) -Greedy action tensor([ 1.4768, -0.5214, -0.6509, 0.4088]) tensor([0.6256, 0.0848, 0.0745, 0.2150]) -Greedy action tensor([ 1.1290, -0.1801, -0.2704, 0.4584]) tensor([0.4931, 0.1332, 0.1217, 0.2521]) -Greedy action tensor([ 1.1013, -0.4063, -0.7346, 0.2208]) tensor([0.5570, 0.1233, 0.0888, 0.2309]) -Greedy action tensor([ 1.1674, -0.0737, -0.4816, -0.0573]) tensor([0.5633, 0.1628, 0.1083, 0.1655]) -Greedy action tensor([ 1.9402, -1.3870, -0.3821, 0.5627]) tensor([0.7214, 0.0259, 0.0707, 0.1819]) -Greedy action tensor([ 1.6710, -0.9592, -0.1990, 0.4752]) tensor([0.6542, 0.0471, 0.1008, 0.1979]) -Greedy action tensor([ 1.5000, 0.3134, -0.7596, 0.3165]) tensor([0.5828, 0.1779, 0.0608, 0.1785]) -Greedy action tensor([ 1.2853, 0.1673, -0.9368, 0.1600]) tensor([0.5682, 0.1858, 0.0616, 0.1844]) -Greedy action tensor([ 2.2934, -0.8053, -0.1006, 0.2604]) tensor([0.7891, 0.0356, 0.0720, 0.1033]) -Greedy action tensor([ 1.3136, -0.2445, -0.6928, 0.0807]) tensor([0.6111, 0.1287, 0.0822, 0.1781]) -Greedy action tensor([ 0.8426, -0.3858, -0.5115, 0.1375]) tensor([0.4890, 0.1432, 0.1262, 0.2416]) -Greedy action tensor([ 1.0548, -0.4944, -0.5576, 0.5121]) tensor([0.5018, 0.1066, 0.1001, 0.2916]) -Greedy action tensor([ 2.1280, -1.1277, -0.1136, 0.4977]) tensor([0.7459, 0.0288, 0.0793, 0.1461]) -Greedy action tensor([ 1.8933, -0.3964, -0.5420, 0.2488]) tensor([0.7236, 0.0733, 0.0634, 0.1397]) -Greedy action tensor([ 1.4723, -0.5377, -0.6313, 0.4816]) tensor([0.6145, 0.0823, 0.0750, 0.2282]) -Greedy action tensor([ 1.0663, -0.1763, -0.4527, 0.0654]) tensor([0.5333, 0.1539, 0.1168, 0.1960]) -Greedy action tensor([ 1.1056, -0.3706, -0.3017, 0.3820]) tensor([0.5106, 0.1167, 0.1250, 0.2477]) -Greedy action tensor([ 1.3018, 0.0605, -0.7677, 0.5320]) tensor([0.5324, 0.1539, 0.0672, 0.2465]) -Greedy action tensor([ 1.1575, -0.0363, -0.7269, 0.2392]) tensor([0.5393, 0.1635, 0.0819, 0.2153]) -Greedy action tensor([ 1.4076, -0.0534, -0.8093, -0.0220]) tensor([0.6328, 0.1468, 0.0689, 0.1515]) -Greedy action tensor([ 1.9352, -0.0783, -1.0722, 0.2987]) tensor([0.7259, 0.0969, 0.0359, 0.1413]) -Greedy action tensor([ 1.3317, -0.2169, -1.4175, 0.3348]) tensor([0.6077, 0.1292, 0.0389, 0.2243]) -Greedy action tensor([ 1.2479, -0.1111, -0.7159, -0.1595]) tensor([0.6090, 0.1565, 0.0855, 0.1491]) -Greedy action tensor([ 1.3027, -0.0495, -0.8832, 0.0406]) tensor([0.6046, 0.1564, 0.0679, 0.1711]) -Greedy action tensor([ 1.1218, 0.2448, -0.6755, 0.0547]) tensor([0.5193, 0.2160, 0.0861, 0.1786]) -Greedy action tensor([ 1.1084, -0.3476, -0.5684, 0.7230]) tensor([0.4761, 0.1110, 0.0890, 0.3238]) -Greedy action tensor([ 1.5922, -0.4241, -0.5749, 0.0483]) tensor([0.6844, 0.0911, 0.0784, 0.1461]) -Greedy action tensor([ 1.4720, -0.6101, -0.0937, 0.3476]) tensor([0.6030, 0.0752, 0.1260, 0.1959]) -Greedy action tensor([ 1.4122, -0.0645, -0.7205, 0.7797]) tensor([0.5324, 0.1216, 0.0631, 0.2829]) -Greedy action tensor([ 1.4959, 0.1081, -0.9495, -0.1655]) tensor([0.6552, 0.1636, 0.0568, 0.1244]) -Greedy action tensor([ 1.6964, -0.6032, -0.6104, 0.8666]) tensor([0.6113, 0.0613, 0.0609, 0.2666]) -Greedy action tensor([ 1.2689, -0.5938, -0.4666, 0.2912]) tensor([0.5856, 0.0909, 0.1032, 0.2203]) -Greedy action tensor([ 1.1190, -0.3918, -0.0081, 0.0640]) tensor([0.5283, 0.1166, 0.1712, 0.1839]) -Greedy action tensor([ 1.5231, -0.8864, -0.2312, 0.1792]) tensor([0.6563, 0.0590, 0.1136, 0.1712]) -Greedy action tensor([ 1.9409, -0.8959, -0.5062, 0.3493]) tensor([0.7414, 0.0435, 0.0642, 0.1510]) -Greedy action tensor([ 1.6671, -0.1900, -0.5971, -0.1241]) tensor([0.7009, 0.1094, 0.0728, 0.1169]) -Greedy action tensor([ 1.4461, -0.0291, -0.6369, 0.4230]) tensor([0.5839, 0.1336, 0.0727, 0.2099]) -Greedy action tensor([ 1.2311, -0.2435, -0.0611, 0.0984]) tensor([0.5477, 0.1254, 0.1504, 0.1765]) -Greedy action tensor([ 1.4192, -0.4071, -0.5863, 0.3567]) tensor([0.6093, 0.0981, 0.0820, 0.2106]) -Greedy action tensor([ 1.8061, -0.0833, 0.0935, 0.7157]) tensor([0.5996, 0.0906, 0.1082, 0.2015]) -Greedy action tensor([ 1.6146, -1.3633, 0.0138, 0.0287]) tensor([0.6861, 0.0349, 0.1384, 0.1405]) -Greedy action tensor([ 1.6944, -1.3976, -0.3938, -0.1287]) tensor([0.7514, 0.0341, 0.0931, 0.1214]) -Greedy action tensor([ 1.5979, 0.1048, -0.9651, 0.5702]) tensor([0.6026, 0.1354, 0.0464, 0.2156]) -Greedy action tensor([ 0.5470, -0.4649, 0.1918, -0.1443]) tensor([0.3898, 0.1417, 0.2733, 0.1953]) -Greedy action tensor([ 1.7285, -0.7040, -0.4865, 0.4424]) tensor([0.6787, 0.0596, 0.0741, 0.1876]) -Greedy action tensor([ 1.5368, -0.6597, 0.0299, -0.0755]) tensor([0.6526, 0.0726, 0.1446, 0.1302]) -Greedy action tensor([ 1.7627, -0.0255, -0.4428, -0.0601]) tensor([0.6949, 0.1162, 0.0766, 0.1123]) -Greedy action tensor([ 1.9574, 0.2895, -0.1480, 0.5923]) tensor([0.6387, 0.1205, 0.0778, 0.1631]) -Greedy action tensor([ 1.6216, 0.1349, -0.6996, 0.1965]) tensor([0.6391, 0.1445, 0.0627, 0.1537]) -Greedy action tensor([ 1.3519, -0.1794, -0.8397, 0.3505]) tensor([0.5898, 0.1276, 0.0659, 0.2167]) -Greedy action tensor([ 2.2509, -0.4461, 0.1803, 0.9323]) tensor([0.6844, 0.0461, 0.0863, 0.1831]) -Greedy action tensor([ 1.2930, -0.2857, -0.5443, 0.5293]) tensor([0.5460, 0.1126, 0.0870, 0.2544]) -Greedy action tensor([ 0.7631, -0.2973, -0.0976, 0.2329]) tensor([0.4242, 0.1469, 0.1794, 0.2496]) -Greedy action tensor([ 1.4853, -0.8375, -0.4322, 0.0964]) tensor([0.6692, 0.0656, 0.0984, 0.1669]) -Greedy action tensor([ 1.5258, -0.2741, -0.6838, 0.1154]) tensor([0.6583, 0.1088, 0.0722, 0.1606]) -Greedy action tensor([ 1.2531, -0.2933, -1.0087, 0.2631]) tensor([0.5922, 0.1261, 0.0617, 0.2200]) -Greedy action tensor([ 1.0212, -0.5035, 0.0818, 0.3241]) tensor([0.4747, 0.1033, 0.1855, 0.2364]) -Greedy action tensor([ 1.4271, -0.0679, -0.9117, 0.7369]) tensor([0.5488, 0.1231, 0.0529, 0.2752]) -Greedy action tensor([ 1.2494, -0.5758, -0.6069, 0.2594]) tensor([0.5920, 0.0954, 0.0925, 0.2200]) -Greedy action tensor([ 1.7518, -0.3818, -0.5666, 0.1441]) tensor([0.7056, 0.0836, 0.0695, 0.1414]) -Greedy action tensor([ 1.4080, 0.0385, -1.0276, 0.3722]) tensor([0.5894, 0.1498, 0.0516, 0.2092]) -Greedy action tensor([ 1.6862, -0.5689, -0.1108, 0.5913]) tensor([0.6230, 0.0653, 0.1033, 0.2084]) -Greedy action tensor([ 1.3271, -0.1538, -0.5802, 0.0687]) tensor([0.6024, 0.1370, 0.0894, 0.1711]) -Greedy action tensor([ 1.7195, -0.9021, -0.4817, 0.3797]) tensor([0.6919, 0.0503, 0.0766, 0.1812]) -Greedy action tensor([ 1.2296, -0.6087, -0.3789, 0.2734]) tensor([0.5735, 0.0912, 0.1148, 0.2204]) -Greedy action tensor([ 2.0818, -0.2887, -0.1224, 0.2800]) tensor([0.7306, 0.0683, 0.0806, 0.1205]) -Greedy action tensor([ 2.0590, -0.8838, -0.1917, 0.4481]) tensor([0.7365, 0.0388, 0.0776, 0.1471]) -Greedy action tensor([ 1.3101, -0.5627, 1.4741, 1.5589]) tensor([0.2767, 0.0425, 0.3260, 0.3548]) -Greedy action tensor([ 0.9636, -0.8410, 0.1160, 0.4856]) tensor([0.4519, 0.0744, 0.1936, 0.2802]) -Greedy action tensor([-0.2200, -0.6567, -0.5374, 0.0876]) tensor([0.2678, 0.1730, 0.1950, 0.3642]) -Greedy action tensor([ 0.1198, 0.0759, -1.3729, -0.3023]) tensor([0.3524, 0.3373, 0.0792, 0.2311]) -Greedy action tensor([-0.8411, 0.2184, -1.1394, -0.1580]) tensor([0.1514, 0.4366, 0.1123, 0.2997]) -Greedy action tensor([0.6428, 0.0381, 0.1374, 0.1823]) tensor([0.3597, 0.1965, 0.2170, 0.2269]) -Greedy action tensor([-0.2236, -0.8128, 0.6495, 0.0891]) tensor([0.1881, 0.1044, 0.4504, 0.2572]) -Greedy action tensor([-0.0652, -0.2052, 0.2956, -0.3259]) tensor([0.2454, 0.2134, 0.3521, 0.1891]) -Greedy action tensor([-0.0502, 0.3566, 0.3777, -0.2606]) tensor([0.2063, 0.3099, 0.3165, 0.1672]) -Greedy action tensor([ 0.6600, -1.2651, 0.5092, 0.0070]) tensor([0.3958, 0.0577, 0.3404, 0.2060]) -Greedy action tensor([ 0.0132, -0.3981, -0.5229, -0.0805]) tensor([0.3166, 0.2098, 0.1852, 0.2883]) -Greedy action tensor([ 0.4724, 0.0464, -0.1116, -0.0362]) tensor([0.3556, 0.2323, 0.1983, 0.2138]) -Greedy action tensor([-1.0441, -2.0655, 0.3145, -0.0753]) tensor([0.1268, 0.0457, 0.4934, 0.3341]) -Greedy action tensor([-0.3592, 0.0766, -0.1301, -0.2062]) tensor([0.2012, 0.3112, 0.2531, 0.2345]) -Greedy action tensor([ 0.1070, -0.2873, 0.1973, -0.5328]) tensor([0.3034, 0.2045, 0.3321, 0.1600]) -Greedy action tensor([-0.0906, -0.3367, 0.1904, 0.1007]) tensor([0.2316, 0.1811, 0.3068, 0.2805]) -Greedy action tensor([ 0.1687, 0.2773, -0.0254, -0.4479]) tensor([0.2875, 0.3205, 0.2368, 0.1552]) -Greedy action tensor([ 1.0134, 0.0585, -0.2940, 0.5138]) tensor([0.4421, 0.1701, 0.1196, 0.2682]) -Greedy action tensor([-1.1945, -1.1516, -0.8914, -0.7961]) tensor([0.2046, 0.2136, 0.2771, 0.3048]) -Greedy action tensor([ 0.2499, 0.0751, -0.4293, -0.4019]) tensor([0.3487, 0.2928, 0.1768, 0.1817]) -Greedy action tensor([ 0.2697, -0.8639, 0.2927, -0.0298]) tensor([0.3240, 0.1043, 0.3315, 0.2401]) -Greedy action tensor([-0.2832, -0.3820, 0.7266, -0.8610]) tensor([0.1919, 0.1738, 0.5267, 0.1077]) -Greedy action tensor([ 0.0309, -0.0589, -1.4953, 0.0772]) tensor([0.3146, 0.2876, 0.0684, 0.3295]) -Greedy action tensor([-0.6512, -0.0622, 0.1811, -0.4955]) tensor([0.1595, 0.2875, 0.3666, 0.1864]) -Greedy action tensor([ 0.9790, -0.5708, 0.1781, -0.3891]) tensor([0.5220, 0.1108, 0.2343, 0.1329]) -Greedy action tensor([-0.3912, -0.1883, 0.3548, -1.2405]) tensor([0.2100, 0.2573, 0.4429, 0.0898]) -Greedy action tensor([ 0.8062, -0.8684, -0.0474, 0.7780]) tensor([0.3868, 0.0725, 0.1647, 0.3760]) -Greedy action tensor([ 0.3902, 0.2276, 0.1461, -0.1397]) tensor([0.3104, 0.2638, 0.2432, 0.1827]) -Greedy action tensor([-0.3556, 0.1693, -0.2679, -0.8979]) tensor([0.2292, 0.3874, 0.2502, 0.1332]) -Greedy action tensor([ 1.0523, -0.4822, 0.2305, 1.3555]) tensor([0.3323, 0.0716, 0.1461, 0.4500]) -Greedy action tensor([ 0.4306, 0.1718, -0.5016, -0.4230]) tensor([0.3859, 0.2979, 0.1519, 0.1643]) -Greedy action tensor([ 0.3501, -0.1746, 0.7459, -0.5851]) tensor([0.2882, 0.1705, 0.4281, 0.1131]) -Greedy action tensor([ 0.6174, -0.9990, 0.6529, -0.1065]) tensor([0.3677, 0.0730, 0.3810, 0.1783]) -Greedy action tensor([ 0.1093, -0.6438, 0.0712, -0.1332]) tensor([0.3107, 0.1463, 0.2991, 0.2438]) -Greedy action tensor([ 0.0196, -0.9343, -0.5927, -0.6725]) tensor([0.4119, 0.1587, 0.2233, 0.2061]) -Greedy action tensor([-0.5493, 0.3349, -0.2499, -1.1797]) tensor([0.1886, 0.4566, 0.2544, 0.1004]) -Greedy action tensor([-0.2480, 0.1804, 0.3033, 0.0507]) tensor([0.1780, 0.2732, 0.3089, 0.2399]) -Greedy action tensor([ 1.2148, -0.9817, 0.0881, 0.2957]) tensor([0.5452, 0.0606, 0.1767, 0.2175]) -Greedy action tensor([-0.3876, 0.3424, -0.2906, -1.1879]) tensor([0.2162, 0.4486, 0.2382, 0.0971]) -Greedy action tensor([-0.4153, 0.4883, -0.5127, -0.6729]) tensor([0.1942, 0.4795, 0.1762, 0.1501]) -Greedy action tensor([-1.0392, -0.7409, 0.1231, -1.0928]) tensor([0.1540, 0.2075, 0.4925, 0.1460]) -Greedy action tensor([ 0.4094, 0.3419, -0.1919, -0.7224]) tensor([0.3565, 0.3332, 0.1954, 0.1149]) -Greedy action tensor([-0.5917, -0.1665, 0.1822, -0.5075]) tensor([0.1728, 0.2644, 0.3747, 0.1880]) -Greedy action tensor([-1.0095, -0.9912, 0.6126, -0.8542]) tensor([0.1212, 0.1235, 0.6138, 0.1416]) -Greedy action tensor([-0.2617, -0.8462, 0.1614, 0.2237]) tensor([0.2124, 0.1184, 0.3242, 0.3450]) -Greedy action tensor([ 0.5709, -1.0703, 0.9327, 0.5808]) tensor([0.2748, 0.0532, 0.3945, 0.2775]) -Greedy action tensor([-0.8298, -0.3502, 0.4823, -1.9991]) tensor([0.1506, 0.2433, 0.5593, 0.0468]) -Greedy action tensor([-0.8065, -0.4283, -0.6270, 0.2398]) tensor([0.1538, 0.2244, 0.1840, 0.4378]) -Greedy action tensor([-0.7976, -0.5222, -0.2195, -1.0178]) tensor([0.2040, 0.2687, 0.3637, 0.1637]) -Greedy action tensor([ 0.1249, -0.8963, 0.1671, -0.9999]) tensor([0.3666, 0.1320, 0.3824, 0.1190]) -Greedy action tensor([ 0.0770, -0.9227, 0.4000, 0.2165]) tensor([0.2565, 0.0944, 0.3543, 0.2949]) -Greedy action tensor([-0.3255, -0.6155, -0.5213, 0.7314]) tensor([0.1836, 0.1374, 0.1509, 0.5282]) -Greedy action tensor([ 0.4269, -0.2379, 0.7617, -0.8062]) tensor([0.3122, 0.1606, 0.4363, 0.0910]) -Greedy action tensor([-0.7782, -1.5200, -0.0312, -1.1109]) tensor([0.2324, 0.1107, 0.4904, 0.1666]) -Greedy action tensor([ 0.8155, -0.0310, -0.9708, 0.4846]) tensor([0.4320, 0.1853, 0.0724, 0.3103]) -Greedy action tensor([ 0.4521, 0.5391, -0.0662, -0.3197]) tensor([0.3176, 0.3465, 0.1891, 0.1468]) -Greedy action tensor([ 0.6125, 0.7045, -0.3446, 0.6776]) tensor([0.2819, 0.3090, 0.1082, 0.3009]) -Greedy action tensor([-1.4202, -0.0747, -0.8908, -0.3097]) tensor([0.1045, 0.4011, 0.1773, 0.3171]) -Greedy action tensor([-0.5766, 0.9669, 0.5431, -0.7933]) tensor([0.1047, 0.4901, 0.3208, 0.0843]) -Greedy action tensor([-0.9953, -0.9075, -1.0058, -0.6372]) tensor([0.2216, 0.2420, 0.2193, 0.3171]) -Greedy action tensor([-0.4882, 0.3716, 0.2640, -0.9923]) tensor([0.1642, 0.3881, 0.3485, 0.0992]) -Greedy action tensor([-0.2421, -1.0437, 0.5937, -1.0671]) tensor([0.2385, 0.1070, 0.5501, 0.1045]) -Greedy action tensor([ 0.2941, -0.3625, -0.7394, -0.3067]) tensor([0.4128, 0.2141, 0.1468, 0.2263]) -Greedy action tensor([-0.4484, -0.6575, -0.1853, -1.5498]) tensor([0.2903, 0.2355, 0.3777, 0.0965]) -Greedy action tensor([-0.6585, -0.7911, 0.6997, -0.8546]) tensor([0.1518, 0.1330, 0.5904, 0.1248]) -Greedy action tensor([-0.0074, 0.1376, -0.4755, 0.0119]) tensor([0.2630, 0.3041, 0.1647, 0.2682]) -Greedy action tensor([ 0.2577, 0.2458, -0.6920, -0.0410]) tensor([0.3208, 0.3170, 0.1241, 0.2380]) -Greedy action tensor([ 0.8014, -0.7427, 0.5833, 0.1682]) tensor([0.3924, 0.0838, 0.3155, 0.2083]) -Greedy action tensor([ 1.2206, -1.2278, 1.9370, 0.9566]) tensor([0.2563, 0.0222, 0.5247, 0.1968]) -Greedy action tensor([-0.3223, -0.5434, -0.3989, -0.2726]) tensor([0.2646, 0.2121, 0.2451, 0.2781]) -Greedy action tensor([ 1.1238, 0.6512, 0.1873, -0.5576]) tensor([0.4542, 0.2832, 0.1781, 0.0845]) -Greedy action tensor([-0.5518, -0.8427, -1.0709, -0.0709]) tensor([0.2525, 0.1888, 0.1503, 0.4085]) -Greedy action tensor([-0.8949, -0.7663, -0.6964, 0.7574]) tensor([0.1166, 0.1326, 0.1422, 0.6086]) -Greedy action tensor([-0.4929, -0.1279, 0.0434, -0.8361]) tensor([0.2058, 0.2964, 0.3518, 0.1460]) -Greedy action tensor([ 0.5130, -0.2881, 0.5904, -1.3046]) tensor([0.3715, 0.1667, 0.4014, 0.0603]) -Greedy action tensor([ 0.2316, -0.8851, 0.7083, -0.1074]) tensor([0.2739, 0.0897, 0.4412, 0.1952]) -Greedy action tensor([ 0.4374, -0.3192, -0.3510, -0.8650]) tensor([0.4554, 0.2137, 0.2070, 0.1238]) -Greedy action tensor([ 1.0766, -1.1701, -0.4600, -0.7999]) tensor([0.6784, 0.0717, 0.1459, 0.1039]) -Greedy action tensor([-0.2717, -0.4675, 0.1693, -1.3608]) tensor([0.2693, 0.2214, 0.4186, 0.0906]) -Greedy action tensor([-0.5290, 0.2353, -1.7543, -0.1183]) tensor([0.2021, 0.4339, 0.0593, 0.3047]) -Greedy action tensor([ 1.7945, 0.0138, 0.3520, -0.3735]) tensor([0.6582, 0.1109, 0.1556, 0.0753]) -Greedy action tensor([ 1.2203, -0.7640, -0.0538, -0.7241]) tensor([0.6409, 0.0881, 0.1793, 0.0917]) -Greedy action tensor([ 1.2065, -0.6473, -0.1427, -0.5017]) tensor([0.6261, 0.0981, 0.1624, 0.1134]) -Greedy action tensor([ 0.6700, -0.2041, -0.1908, -0.1588]) tensor([0.4392, 0.1833, 0.1857, 0.1918]) -Greedy action tensor([ 0.9479, -0.3943, -0.0544, -0.4008]) tensor([0.5297, 0.1384, 0.1944, 0.1375]) -Greedy action tensor([ 0.6133, -0.2230, 0.0709, -0.5751]) tensor([0.4312, 0.1868, 0.2506, 0.1314]) -Greedy action tensor([ 0.9513, -0.5427, 0.2079, -0.3687]) tensor([0.5084, 0.1141, 0.2417, 0.1358]) -Greedy action tensor([ 0.4989, -0.1943, -0.0090, -0.1501]) tensor([0.3811, 0.1905, 0.2293, 0.1991]) -Greedy action tensor([ 0.5978, -0.3032, -0.0176, -0.1503]) tensor([0.4132, 0.1679, 0.2233, 0.1956]) -Greedy action tensor([ 0.7458, -0.2467, 0.1387, -0.2153]) tensor([0.4352, 0.1613, 0.2371, 0.1664]) -Greedy action tensor([ 0.5811, -0.2896, 0.0805, -0.2004]) tensor([0.4028, 0.1686, 0.2442, 0.1844]) -Greedy action tensor([ 0.6498, -0.4421, -0.0992, -0.1814]) tensor([0.4456, 0.1496, 0.2107, 0.1941]) -Greedy action tensor([ 0.8395, -0.4404, -0.0853, -0.3007]) tensor([0.5014, 0.1394, 0.1989, 0.1603]) -Greedy action tensor([ 0.5396, -0.4217, -0.0891, -0.1283]) tensor([0.4118, 0.1575, 0.2196, 0.2112]) -Greedy action tensor([ 1.1217, -0.5403, -0.2488, -0.3090]) tensor([0.5942, 0.1128, 0.1509, 0.1421]) -Greedy action tensor([ 0.8698, -0.4495, 0.2662, -0.5494]) tensor([0.4863, 0.1300, 0.2660, 0.1177]) -Greedy action tensor([ 0.8129, -0.6309, 0.0937, -0.2431]) tensor([0.4829, 0.1140, 0.2352, 0.1680]) -Greedy action tensor([ 0.8135, -0.5212, -0.1230, -0.2724]) tensor([0.5018, 0.1321, 0.1967, 0.1694]) -Greedy action tensor([ 0.3509, 0.1359, -0.0366, 0.0147]) tensor([0.3125, 0.2521, 0.2121, 0.2233]) -Greedy action tensor([ 0.8887, -0.4780, -0.1438, -0.3866]) tensor([0.5290, 0.1349, 0.1884, 0.1478]) -Greedy action tensor([ 0.7159, -0.3912, -0.1174, -0.1100]) tensor([0.4539, 0.1500, 0.1973, 0.1988]) -Greedy action tensor([ 1.1288, -0.5534, 0.1093, -0.5245]) tensor([0.5753, 0.1070, 0.2076, 0.1101]) -Greedy action tensor([ 0.6729, -0.3736, 0.0014, -0.2652]) tensor([0.4438, 0.1558, 0.2267, 0.1737]) -Greedy action tensor([ 0.9036, -0.5497, 0.1209, -0.4546]) tensor([0.5133, 0.1200, 0.2347, 0.1320]) -Greedy action tensor([ 0.4591, -0.3810, -0.0564, -0.1289]) tensor([0.3870, 0.1670, 0.2311, 0.2149]) -Greedy action tensor([ 0.8115, -0.4116, -0.0953, -0.2342]) tensor([0.4879, 0.1436, 0.1970, 0.1715]) -Greedy action tensor([ 0.6645, -0.5378, -0.1424, -0.1807]) tensor([0.4595, 0.1381, 0.2051, 0.1973]) -Greedy action tensor([ 1.2053, -0.6688, -0.1399, -0.3986]) tensor([0.6192, 0.0950, 0.1613, 0.1245]) -Greedy action tensor([ 1.1577, -0.6541, -0.0223, -0.8018]) tensor([0.6205, 0.1014, 0.1907, 0.0874]) -Greedy action tensor([ 0.3883, -0.0200, -0.0345, -0.1910]) tensor([0.3472, 0.2308, 0.2275, 0.1945]) -Greedy action tensor([ 1.2486, -0.8807, 0.0700, -0.5747]) tensor([0.6297, 0.0749, 0.1938, 0.1017]) -Greedy action tensor([ 0.6292, -0.3896, -0.0952, -0.1777]) tensor([0.4363, 0.1575, 0.2115, 0.1947]) -Greedy action tensor([ 0.4671, -0.3532, -0.0405, -0.2182]) tensor([0.3927, 0.1729, 0.2364, 0.1979]) -Greedy action tensor([ 0.7134, -0.2067, -0.0708, -0.0842]) tensor([0.4338, 0.1728, 0.1980, 0.1954]) -Greedy action tensor([ 0.5390, -0.1021, -0.0245, -0.0770]) tensor([0.3794, 0.1998, 0.2159, 0.2049]) -Greedy action tensor([ 0.4536, -0.3394, 0.0596, -0.2248]) tensor([0.3796, 0.1718, 0.2560, 0.1926]) -Greedy action tensor([ 0.9906, -0.7271, -0.1305, -0.4149]) tensor([0.5712, 0.1025, 0.1862, 0.1401]) -Greedy action tensor([ 0.8659, -0.8113, -0.0267, -0.3767]) tensor([0.5305, 0.0991, 0.2173, 0.1531]) -Greedy action tensor([ 0.8622, -0.2817, 0.0349, -0.1064]) tensor([0.4683, 0.1492, 0.2048, 0.1778]) -Greedy action tensor([ 1.2431, -0.7720, 0.0265, -0.6638]) tensor([0.6337, 0.0845, 0.1877, 0.0941]) -Greedy action tensor([ 0.3240, -0.1815, -0.0698, -0.4165]) tensor([0.3630, 0.2190, 0.2449, 0.1731]) -Greedy action tensor([ 0.9609, -0.4209, -0.0050, -0.2032]) tensor([0.5144, 0.1292, 0.1958, 0.1606]) -Greedy action tensor([ 1.0830, -0.4293, -0.1489, -0.3693]) tensor([0.5727, 0.1262, 0.1671, 0.1340]) -Greedy action tensor([ 0.6466, -0.2536, -0.0560, -0.3229]) tensor([0.4384, 0.1782, 0.2171, 0.1663]) -Greedy action tensor([ 1.0236, -0.7791, 0.0661, -0.6085]) tensor([0.5733, 0.0945, 0.2201, 0.1121]) -Greedy action tensor([ 0.7597, -0.0525, 0.1240, -0.5032]) tensor([0.4432, 0.1967, 0.2347, 0.1254]) -Greedy action tensor([ 0.6496, -0.5960, 0.0762, -0.1298]) tensor([0.4329, 0.1246, 0.2440, 0.1986]) -Greedy action tensor([ 1.0400, -0.5378, -0.0313, -0.2713]) tensor([0.5499, 0.1135, 0.1884, 0.1482]) -Greedy action tensor([ 0.9709, -0.7408, -0.1009, -0.4951]) tensor([0.5702, 0.1030, 0.1952, 0.1316]) -Greedy action tensor([ 1.3082, -0.8047, -0.0017, -0.6857]) tensor([0.6549, 0.0792, 0.1767, 0.0892]) -Greedy action tensor([ 0.8439, -0.6474, 0.0659, -0.3308]) tensor([0.5017, 0.1129, 0.2304, 0.1550]) -Greedy action tensor([ 0.8433, -0.5566, -0.2963, -0.5903]) tensor([0.5540, 0.1366, 0.1773, 0.1321]) -Greedy action tensor([ 1.0758, -0.9135, -0.0101, -0.5566]) tensor([0.5988, 0.0819, 0.2022, 0.1171]) -Greedy action tensor([ 0.7007, -0.5136, -0.0675, -0.0474]) tensor([0.4476, 0.1329, 0.2076, 0.2118]) -Greedy action tensor([ 1.1024, -0.6967, 0.0810, -0.2416]) tensor([0.5598, 0.0926, 0.2016, 0.1460]) -Greedy action tensor([ 0.7370, -0.2792, 0.0633, -0.1250]) tensor([0.4359, 0.1578, 0.2222, 0.1841]) -Greedy action tensor([ 0.5772, 0.0546, -0.0123, 0.0561]) tensor([0.3648, 0.2163, 0.2023, 0.2166]) -Greedy action tensor([ 1.2501, -0.7980, 0.0607, -0.7617]) tensor([0.6381, 0.0823, 0.1942, 0.0853]) -Greedy action tensor([ 0.7185, -0.3644, -0.0157, -0.1647]) tensor([0.4480, 0.1517, 0.2150, 0.1852]) -Greedy action tensor([ 0.8112, -0.5298, -0.1608, -0.5949]) tensor([0.5305, 0.1388, 0.2007, 0.1300]) -Greedy action tensor([ 0.7874, -0.5967, 0.0235, -0.2235]) tensor([0.4807, 0.1204, 0.2239, 0.1749]) -Greedy action tensor([ 0.7448, -0.6049, -0.0283, -0.6227]) tensor([0.5062, 0.1313, 0.2336, 0.1289]) -Greedy action tensor([ 1.0599, -0.7344, 0.1167, -0.6315]) tensor([0.5748, 0.0955, 0.2238, 0.1059]) -Greedy action tensor([ 0.5448, 0.0128, 0.1617, -0.2411]) tensor([0.3670, 0.2156, 0.2502, 0.1672]) -Greedy action tensor([ 0.6669, -0.1602, -0.0104, -0.1382]) tensor([0.4180, 0.1828, 0.2123, 0.1869]) -Greedy action tensor([ 0.9171, -0.3529, 0.0739, -0.3497]) tensor([0.5018, 0.1409, 0.2159, 0.1414]) -Greedy action tensor([ 0.8493, -0.2717, 0.0089, -0.5405]) tensor([0.4984, 0.1624, 0.2151, 0.1241]) -Greedy action tensor([ 0.8074, -0.5732, -0.0032, -0.1691]) tensor([0.4825, 0.1213, 0.2145, 0.1817]) -Greedy action tensor([ 1.0587, -0.4489, 0.0393, -0.2454]) tensor([0.5395, 0.1195, 0.1946, 0.1464]) -Greedy action tensor([ 0.5639, -0.0641, 0.0026, -0.4088]) tensor([0.4029, 0.2150, 0.2298, 0.1523]) -Greedy action tensor([ 0.8182, -0.6294, -0.1282, -0.3814]) tensor([0.5196, 0.1222, 0.2017, 0.1566]) -Greedy action tensor([ 0.6870, -0.1312, -0.1056, -0.0675]) tensor([0.4230, 0.1866, 0.1915, 0.1989]) -Greedy action tensor([ 0.8243, -0.1784, -0.0086, -0.3992]) tensor([0.4771, 0.1751, 0.2074, 0.1404]) -Greedy action tensor([ 1.0697, -0.8077, 0.1710, -0.7905]) tensor([0.5828, 0.0892, 0.2373, 0.0907]) -Greedy action tensor([ 0.7378, -0.6340, -0.0288, -0.2462]) tensor([0.4780, 0.1212, 0.2221, 0.1787]) -Greedy action tensor([ 0.7011, -0.3787, 0.0589, -0.4286]) tensor([0.4569, 0.1552, 0.2404, 0.1476]) -Greedy action tensor([ 0.8224, -0.4954, -0.1212, -0.1999]) tensor([0.4959, 0.1328, 0.1930, 0.1784]) -Greedy action tensor([ 1.0694, -0.2758, -0.0237, -0.3391]) tensor([0.5434, 0.1416, 0.1821, 0.1329]) -Greedy action tensor([ 1.1267, -0.4664, -0.1221, -0.6055]) tensor([0.5999, 0.1219, 0.1721, 0.1061]) -Greedy action tensor([ 0.6661, -0.4504, -0.0547, -0.2540]) tensor([0.4520, 0.1480, 0.2199, 0.1801]) -Greedy action tensor([ 0.8568, -0.6973, -0.0309, -0.6027]) tensor([0.5390, 0.1139, 0.2218, 0.1252]) -Greedy action tensor([ 1.0044, -0.7639, 0.1335, -0.4779]) tensor([0.5506, 0.0939, 0.2304, 0.1250]) -Greedy action tensor([-0.2922, 1.0651, 0.0174, 0.2624]) tensor([0.1252, 0.4863, 0.1706, 0.2179]) -Greedy action tensor([-1.8234, -0.4471, 0.6035, -0.1233]) tensor([0.0460, 0.1820, 0.5204, 0.2516]) -Greedy action tensor([-1.3162, -0.7263, 0.4434, 0.2363]) tensor([0.0750, 0.1352, 0.4356, 0.3541]) -Greedy action tensor([-1.8756, -0.4736, 0.6298, -0.1489]) tensor([0.0436, 0.1772, 0.5341, 0.2451]) -Greedy action tensor([-1.5402, -0.0651, 0.4083, -0.0111]) tensor([0.0588, 0.2571, 0.4127, 0.2714]) -Greedy action tensor([-1.7546, 0.0862, 0.4987, -0.0437]) tensor([0.0447, 0.2819, 0.4258, 0.2476]) -Greedy action tensor([-1.9113, -0.4464, 0.6547, -0.1610]) tensor([0.0415, 0.1796, 0.5401, 0.2389]) -Greedy action tensor([-1.1426, 0.0760, 0.2312, 0.0078]) tensor([0.0870, 0.2943, 0.3437, 0.2749]) -Greedy action tensor([-1.8869, -0.4601, 0.6866, -0.0996]) tensor([0.0412, 0.1718, 0.5407, 0.2463]) -Greedy action tensor([-1.3298, -0.3966, 0.3337, 0.1373]) tensor([0.0760, 0.1933, 0.4011, 0.3296]) -Greedy action tensor([-1.7852, 0.0146, 0.5218, -0.1030]) tensor([0.0445, 0.2692, 0.4470, 0.2393]) -Greedy action tensor([-1.8085, -0.3075, 0.5792, -0.1012]) tensor([0.0457, 0.2050, 0.4974, 0.2519]) -Greedy action tensor([-0.1481, 1.0975, -0.0170, 0.3950]) tensor([0.1363, 0.4736, 0.1554, 0.2346]) -Greedy action tensor([-1.8667, -0.4803, 0.6252, -0.1381]) tensor([0.0440, 0.1761, 0.5319, 0.2479]) -Greedy action tensor([-1.4819, -0.3171, 0.4664, -0.1051]) tensor([0.0659, 0.2111, 0.4621, 0.2609]) -Greedy action tensor([-1.9205, -0.4089, 0.6532, -0.1680]) tensor([0.0410, 0.1857, 0.5371, 0.2363]) -Greedy action tensor([-1.8953, -0.4221, 0.6406, -0.1521]) tensor([0.0422, 0.1841, 0.5327, 0.2411]) -Greedy action tensor([-1.8135, -0.3408, 0.5869, -0.1121]) tensor([0.0457, 0.1994, 0.5042, 0.2507]) -Greedy action tensor([-1.6889, -0.3367, 0.8112, 0.4444]) tensor([0.0392, 0.1516, 0.4779, 0.3312]) -Greedy action tensor([-1.9377, -0.4378, 0.6641, -0.1768]) tensor([0.0403, 0.1808, 0.5441, 0.2347]) -Greedy action tensor([-1.8040, -0.4874, 0.6031, -0.0989]) tensor([0.0469, 0.1749, 0.5204, 0.2579]) -Greedy action tensor([-1.9055, -0.4531, 0.6505, -0.1626]) tensor([0.0419, 0.1790, 0.5397, 0.2394]) -Greedy action tensor([-1.9182, -0.4266, 0.6511, -0.1680]) tensor([0.0412, 0.1832, 0.5383, 0.2373]) -Greedy action tensor([-1.8636, -0.3392, 0.6299, -0.1346]) tensor([0.0429, 0.1968, 0.5188, 0.2415]) -Greedy action tensor([-1.6153, 0.2653, 0.4282, 0.0458]) tensor([0.0487, 0.3192, 0.3757, 0.2563]) -Greedy action tensor([-1.8002, -0.3348, 0.6479, -0.0443]) tensor([0.0441, 0.1909, 0.5099, 0.2552]) -Greedy action tensor([-1.9070, -0.4341, 0.6443, -0.1644]) tensor([0.0418, 0.1825, 0.5366, 0.2390]) -Greedy action tensor([-1.8788, -0.4639, 0.6487, -0.1443]) tensor([0.0429, 0.1766, 0.5373, 0.2431]) -Greedy action tensor([-1.8622, -0.4078, 0.6224, -0.1408]) tensor([0.0437, 0.1872, 0.5245, 0.2445]) -Greedy action tensor([-1.9283, -0.4467, 0.6591, -0.1758]) tensor([0.0409, 0.1799, 0.5435, 0.2358]) -Greedy action tensor([-1.6040, -0.1749, 0.4763, -0.0864]) tensor([0.0564, 0.2353, 0.4513, 0.2571]) -Greedy action tensor([-1.8702, -0.3350, 0.6171, -0.1403]) tensor([0.0429, 0.1991, 0.5160, 0.2419]) -Greedy action tensor([-1.4537, 0.5098, 0.2822, 0.2260]) tensor([0.0522, 0.3718, 0.2961, 0.2799]) -Greedy action tensor([-1.9203, -0.4427, 0.6582, -0.1659]) tensor([0.0411, 0.1800, 0.5414, 0.2375]) -Greedy action tensor([-1.9334, -0.4387, 0.6665, -0.1737]) tensor([0.0404, 0.1803, 0.5444, 0.2350]) -Greedy action tensor([-1.8032, -0.3234, 0.6157, -0.0865]) tensor([0.0451, 0.1979, 0.5062, 0.2508]) -Greedy action tensor([-1.9157, -0.4350, 0.6538, -0.1662]) tensor([0.0413, 0.1816, 0.5395, 0.2376]) -Greedy action tensor([-1.7909, -0.1841, 0.4285, -0.2755]) tensor([0.0507, 0.2526, 0.4661, 0.2306]) -Greedy action tensor([-1.8416, -0.3366, 0.6127, -0.0979]) tensor([0.0437, 0.1970, 0.5091, 0.2501]) -Greedy action tensor([-0.9269, 0.8899, 0.1056, 0.2198]) tensor([0.0763, 0.4693, 0.2142, 0.2401]) -Greedy action tensor([-1.6770, -0.2037, 0.5977, -0.0265]) tensor([0.0493, 0.2150, 0.4791, 0.2567]) -Greedy action tensor([-1.8241, -0.0430, 0.5611, -0.1120]) tensor([0.0428, 0.2544, 0.4654, 0.2374]) -Greedy action tensor([-1.7519, -0.3863, 0.5929, -0.0398]) tensor([0.0479, 0.1876, 0.4993, 0.2652]) -Greedy action tensor([-1.5686, -0.4914, 0.4943, 0.0383]) tensor([0.0596, 0.1749, 0.4686, 0.2970]) -Greedy action tensor([-1.8262, -0.3567, 0.6156, -0.0842]) tensor([0.0443, 0.1928, 0.5097, 0.2532]) -Greedy action tensor([-1.8892, -0.4562, 0.6425, -0.1561]) tensor([0.0427, 0.1789, 0.5368, 0.2416]) -Greedy action tensor([-1.8049, -0.4894, 0.5984, -0.1338]) tensor([0.0474, 0.1766, 0.5240, 0.2520]) -Greedy action tensor([-1.8388, -0.4466, 0.6166, -0.1216]) tensor([0.0450, 0.1809, 0.5238, 0.2504]) -Greedy action tensor([-0.5817, 0.0505, -0.4803, -0.0673]) tensor([0.1766, 0.3324, 0.1955, 0.2955]) -Greedy action tensor([-1.9264, -0.4428, 0.6655, -0.1679]) tensor([0.0407, 0.1795, 0.5436, 0.2362]) -Greedy action tensor([-1.5346, -0.5346, 0.4680, -0.0174]) tensor([0.0638, 0.1733, 0.4723, 0.2907]) -Greedy action tensor([-1.9322, -0.4502, 0.6606, -0.1764]) tensor([0.0407, 0.1793, 0.5443, 0.2357]) -Greedy action tensor([-1.8773, -0.3726, 0.6330, -0.1459]) tensor([0.0426, 0.1919, 0.5247, 0.2408]) -Greedy action tensor([-0.9692, -0.6980, 0.3189, 0.3206]) tensor([0.1045, 0.1371, 0.3789, 0.3795]) -Greedy action tensor([-1.8646, -0.3736, 0.6256, -0.1338]) tensor([0.0432, 0.1919, 0.5211, 0.2438]) -Greedy action tensor([-1.4376, -0.0854, 0.4418, -0.1569]) tensor([0.0666, 0.2575, 0.4362, 0.2397]) -Greedy action tensor([-1.9099, -0.4180, 0.6491, -0.1626]) tensor([0.0415, 0.1844, 0.5360, 0.2381]) -Greedy action tensor([-1.9296, -0.4419, 0.6613, -0.1721]) tensor([0.0407, 0.1802, 0.5431, 0.2360]) -Greedy action tensor([-1.8217, -0.1816, 0.5723, -0.1328]) tensor([0.0444, 0.2289, 0.4864, 0.2403]) -Greedy action tensor([-1.0921, 0.7854, 0.1039, 0.2036]) tensor([0.0690, 0.4509, 0.2281, 0.2520]) -Greedy action tensor([-1.2828, -0.0455, 0.2864, 0.0265]) tensor([0.0772, 0.2661, 0.3708, 0.2859]) -Greedy action tensor([-1.9326, -0.4308, 0.6583, -0.1733]) tensor([0.0406, 0.1822, 0.5415, 0.2357]) -Greedy action tensor([-1.8709, -0.4500, 0.6261, -0.1509]) tensor([0.0437, 0.1811, 0.5311, 0.2442]) -Greedy action tensor([-1.8104, -0.2906, 0.5656, -0.2988]) tensor([0.0479, 0.2191, 0.5157, 0.2173]) -Greedy action tensor([-1.9413, -0.4436, 0.6655, -0.1788]) tensor([0.0402, 0.1799, 0.5454, 0.2344]) -Greedy action tensor([-1.8912, -0.3901, 0.6381, -0.1577]) tensor([0.0422, 0.1894, 0.5295, 0.2389]) -Greedy action tensor([-1.8421, -0.4632, 0.6206, -0.0843]) tensor([0.0444, 0.1764, 0.5215, 0.2577]) -Greedy action tensor([-0.7429, 0.4586, -0.5359, -0.1187]) tensor([0.1347, 0.4480, 0.1657, 0.2515]) -Greedy action tensor([-1.7775, -0.1145, 0.5505, -0.0811]) tensor([0.0455, 0.2399, 0.4665, 0.2481]) -Greedy action tensor([-1.6403, 0.0934, 0.4495, -0.0710]) tensor([0.0512, 0.2896, 0.4135, 0.2457]) -Greedy action tensor([-1.9273, -0.4450, 0.6598, -0.1711]) tensor([0.0408, 0.1798, 0.5428, 0.2365]) -Greedy action tensor([-1.6331, -0.2851, 0.5457, -0.1852]) tensor([0.0557, 0.2146, 0.4925, 0.2371]) -Greedy action tensor([-1.7749, -0.4674, 0.5985, -0.0865]) tensor([0.0480, 0.1774, 0.5150, 0.2596]) -Greedy action tensor([-1.9264, -0.4511, 0.6634, -0.1711]) tensor([0.0408, 0.1786, 0.5443, 0.2363]) -Greedy action tensor([-1.9192, -0.4122, 0.6525, -0.1662]) tensor([0.0410, 0.1852, 0.5370, 0.2368]) -Greedy action tensor([-1.9251, -0.4388, 0.6579, -0.1693]) tensor([0.0409, 0.1808, 0.5415, 0.2368]) -Greedy action tensor([-1.4942, -0.3168, 0.5652, 0.0948]) tensor([0.0589, 0.1911, 0.4616, 0.2884]) -Greedy action tensor([-1.8436, -0.3374, 0.1382, -0.4406]) tensor([0.0594, 0.2679, 0.4311, 0.2416]) -Greedy action tensor([-1.9310, -0.4228, 0.6583, -0.1728]) tensor([0.0406, 0.1834, 0.5406, 0.2355]) -Greedy action tensor([-1.8766, -0.4477, 0.6378, -0.1454]) tensor([0.0431, 0.1801, 0.5331, 0.2436]) -Greedy action tensor([-1.9189, -0.4429, 0.6543, -0.1674]) tensor([0.0412, 0.1804, 0.5406, 0.2377]) -Greedy action tensor([ 1.2581, -0.2731, -0.6304, 0.4162]) tensor([0.5560, 0.1203, 0.0841, 0.2396]) -Greedy action tensor([ 0.8705, 0.0069, -0.5040, 0.4090]) tensor([0.4338, 0.1829, 0.1098, 0.2735]) -Greedy action tensor([ 1.4051, -0.4207, -0.9181, 0.0766]) tensor([0.6562, 0.1057, 0.0643, 0.1738]) -Greedy action tensor([ 1.7412, -0.1316, -0.5383, 0.0939]) tensor([0.6903, 0.1061, 0.0706, 0.1329]) -Greedy action tensor([ 1.3208, -0.1067, -0.4036, 0.0758]) tensor([0.5861, 0.1406, 0.1045, 0.1688]) -Greedy action tensor([ 1.4549, -0.5841, -0.3459, 0.4884]) tensor([0.5968, 0.0777, 0.0986, 0.2270]) -Greedy action tensor([ 1.1616, 0.1161, -0.3242, -0.3255]) tensor([0.5544, 0.1949, 0.1255, 0.1253]) -Greedy action tensor([ 1.1534, 0.1229, -0.3002, 0.2625]) tensor([0.4998, 0.1783, 0.1168, 0.2051]) -Greedy action tensor([ 1.2727, -0.5210, -0.1678, -0.1696]) tensor([0.6099, 0.1015, 0.1444, 0.1442]) -Greedy action tensor([ 1.2529, 0.3889, -0.5341, -0.3437]) tensor([0.5582, 0.2353, 0.0935, 0.1131]) -Greedy action tensor([ 1.7548, -1.4746, -0.0559, 0.5725]) tensor([0.6624, 0.0262, 0.1083, 0.2031]) -Greedy action tensor([ 1.4257, -0.3226, -0.1018, 0.2646]) tensor([0.5867, 0.1021, 0.1274, 0.1837]) -Greedy action tensor([ 1.9328, -0.5394, -1.0676, 0.4949]) tensor([0.7291, 0.0615, 0.0363, 0.1731]) -Greedy action tensor([ 1.2929, -0.4741, -0.3672, 0.0890]) tensor([0.6021, 0.1029, 0.1145, 0.1806]) -Greedy action tensor([ 1.3030, -0.1741, -0.6130, 0.0752]) tensor([0.5994, 0.1368, 0.0882, 0.1756]) -Greedy action tensor([ 1.9152, -0.5771, -0.4352, 0.1898]) tensor([0.7374, 0.0610, 0.0703, 0.1313]) -Greedy action tensor([ 1.4333, -0.0609, -0.5428, 0.6345]) tensor([0.5516, 0.1238, 0.0765, 0.2481]) -Greedy action tensor([ 2.7104, -0.3115, 0.3925, -0.0244]) tensor([0.8250, 0.0402, 0.0812, 0.0535]) -Greedy action tensor([ 1.8530, -0.6835, -0.0727, 0.5619]) tensor([0.6667, 0.0528, 0.0972, 0.1833]) -Greedy action tensor([ 1.3447, -0.5519, -0.5071, 0.6634]) tensor([0.5516, 0.0828, 0.0866, 0.2791]) -Greedy action tensor([ 1.4560, -0.3464, -0.8785, 0.2912]) tensor([0.6354, 0.1048, 0.0615, 0.1982]) -Greedy action tensor([ 1.8078, -0.3864, -0.3711, 0.1601]) tensor([0.7057, 0.0786, 0.0799, 0.1358]) -Greedy action tensor([ 2.0662, -1.1695, -0.0090, 0.7806]) tensor([0.6938, 0.0273, 0.0871, 0.1918]) -Greedy action tensor([ 1.1027, -0.0852, -0.5979, 0.0327]) tensor([0.5463, 0.1666, 0.0997, 0.1874]) -Greedy action tensor([ 0.9687, -0.5726, -0.1109, 0.7234]) tensor([0.4280, 0.0916, 0.1454, 0.3349]) -Greedy action tensor([ 1.5463, -0.3819, -0.1431, -0.0932]) tensor([0.6561, 0.0954, 0.1211, 0.1273]) -Greedy action tensor([ 1.0953, -0.0224, -0.5660, 0.1640]) tensor([0.5233, 0.1711, 0.0994, 0.2062]) -Greedy action tensor([ 1.9769, -1.3014, -0.2976, 0.3881]) tensor([0.7437, 0.0280, 0.0765, 0.1518]) -Greedy action tensor([ 1.5511, -0.4321, -0.2761, 0.2439]) tensor([0.6373, 0.0877, 0.1025, 0.1724]) -Greedy action tensor([ 1.0958, -0.4600, -0.5242, 0.0087]) tensor([0.5727, 0.1209, 0.1133, 0.1931]) -Greedy action tensor([1.8992, 0.4777, 0.0942, 0.3920]) tensor([0.6145, 0.1483, 0.1011, 0.1361]) -Greedy action tensor([ 1.7126, -0.0632, -0.2288, 0.2558]) tensor([0.6469, 0.1096, 0.0928, 0.1507]) -Greedy action tensor([ 1.4933, -0.2022, -0.4220, 0.1287]) tensor([0.6304, 0.1157, 0.0929, 0.1611]) -Greedy action tensor([ 1.3668, 0.2574, -1.1570, -0.1717]) tensor([0.6155, 0.2030, 0.0493, 0.1322]) -Greedy action tensor([ 1.2831, -0.2939, -0.4813, 0.0646]) tensor([0.5975, 0.1234, 0.1024, 0.1767]) -Greedy action tensor([ 2.0922, -0.2596, -0.5261, 0.1693]) tensor([0.7609, 0.0724, 0.0555, 0.1112]) -Greedy action tensor([ 1.7938, -0.0614, -0.1582, 0.3587]) tensor([0.6508, 0.1018, 0.0924, 0.1550]) -Greedy action tensor([ 1.3165, -0.4597, -0.5673, 0.0621]) tensor([0.6225, 0.1054, 0.0946, 0.1775]) -Greedy action tensor([ 1.6896, -0.7163, -0.5710, 0.2913]) tensor([0.6937, 0.0626, 0.0723, 0.1714]) -Greedy action tensor([ 1.4955, -0.4039, -0.4326, 0.2185]) tensor([0.6353, 0.0951, 0.0924, 0.1772]) -Greedy action tensor([ 0.3343, -0.3234, -0.0532, 0.3007]) tensor([0.3161, 0.1638, 0.2145, 0.3056]) -Greedy action tensor([ 1.5829, -0.9421, -0.3728, 0.5388]) tensor([0.6355, 0.0509, 0.0899, 0.2237]) -Greedy action tensor([ 1.8092, -1.0344, -0.4060, 0.4549]) tensor([0.7015, 0.0408, 0.0766, 0.1811]) -Greedy action tensor([ 1.3972, -0.4783, -0.6508, 0.3597]) tensor([0.6110, 0.0937, 0.0788, 0.2165]) -Greedy action tensor([ 1.8469, -0.5590, -0.3758, 0.5928]) tensor([0.6739, 0.0608, 0.0730, 0.1923]) -Greedy action tensor([ 1.2349, -0.3554, -0.1799, 0.1898]) tensor([0.5560, 0.1133, 0.1351, 0.1955]) -Greedy action tensor([ 1.5855, -0.1851, -0.1951, 0.2926]) tensor([0.6199, 0.1055, 0.1045, 0.1701]) -Greedy action tensor([ 1.2540, -0.1325, -0.8466, 0.2785]) tensor([0.5716, 0.1429, 0.0700, 0.2155]) -Greedy action tensor([ 1.1782, 0.0685, -0.9612, 0.4340]) tensor([0.5201, 0.1715, 0.0612, 0.2471]) -Greedy action tensor([ 1.1226, -0.1953, -0.1006, -0.0278]) tensor([0.5323, 0.1425, 0.1567, 0.1685]) -Greedy action tensor([ 2.1097, -1.2268, -0.1121, 0.5981]) tensor([0.7329, 0.0261, 0.0794, 0.1616]) -Greedy action tensor([ 1.3640, -0.5008, 0.0113, 0.0308]) tensor([0.5963, 0.0924, 0.1542, 0.1572]) -Greedy action tensor([ 1.1943, -0.2854, -0.0401, 0.0452]) tensor([0.5448, 0.1240, 0.1585, 0.1727]) -Greedy action tensor([ 1.5413, -0.2754, -0.8089, 0.5007]) tensor([0.6207, 0.1009, 0.0592, 0.2192]) -Greedy action tensor([ 2.1386, -0.7763, -0.4749, 0.5379]) tensor([0.7523, 0.0408, 0.0551, 0.1518]) -Greedy action tensor([ 1.3690, -0.3138, -0.6776, 0.5068]) tensor([0.5756, 0.1070, 0.0744, 0.2430]) -Greedy action tensor([ 1.5089, -0.6090, -0.3968, 0.0610]) tensor([0.6649, 0.0800, 0.0989, 0.1563]) -Greedy action tensor([ 1.5833, -0.4140, -1.0589, 0.2200]) tensor([0.6837, 0.0928, 0.0487, 0.1749]) -Greedy action tensor([ 1.0761, 0.3685, -0.9787, 0.2737]) tensor([0.4833, 0.2382, 0.0619, 0.2166]) -Greedy action tensor([ 1.6974, 0.1743, -0.5059, 0.4138]) tensor([0.6229, 0.1358, 0.0688, 0.1725]) -Greedy action tensor([ 1.3465, -0.4125, -0.2241, 0.4299]) tensor([0.5618, 0.0968, 0.1168, 0.2246]) -Greedy action tensor([ 1.6446, -0.7385, -0.5044, 0.6354]) tensor([0.6356, 0.0586, 0.0741, 0.2317]) -Greedy action tensor([ 1.3724, -0.0571, -0.1467, -0.0426]) tensor([0.5878, 0.1407, 0.1287, 0.1428]) -Greedy action tensor([ 1.9118, -1.0397, -0.4781, -0.1684]) tensor([0.7881, 0.0412, 0.0722, 0.0984]) -Greedy action tensor([ 1.6189, -0.1730, -1.0616, 0.3642]) tensor([0.6578, 0.1096, 0.0451, 0.1876]) -Greedy action tensor([ 1.4305, -0.3979, -0.3521, 0.0767]) tensor([0.6301, 0.1012, 0.1060, 0.1627]) -Greedy action tensor([ 2.1197, -0.8508, -0.5945, 0.1250]) tensor([0.7977, 0.0409, 0.0529, 0.1085]) -Greedy action tensor([ 1.7121, -0.1402, -0.5338, 0.2895]) tensor([0.6650, 0.1043, 0.0704, 0.1603]) -Greedy action tensor([ 1.2156, -0.5550, -0.1154, 0.2650]) tensor([0.5492, 0.0935, 0.1451, 0.2123]) -Greedy action tensor([ 1.8907, 0.2483, -0.6704, 0.3706]) tensor([0.6714, 0.1299, 0.0518, 0.1468]) -Greedy action tensor([ 1.2929, -0.0517, -0.8592, 0.3846]) tensor([0.5618, 0.1464, 0.0653, 0.2265]) -Greedy action tensor([ 1.9527, -1.0054, -0.0375, 0.2498]) tensor([0.7295, 0.0379, 0.0997, 0.1329]) -Greedy action tensor([ 0.7635, -0.1086, 0.1665, 0.0379]) tensor([0.4077, 0.1705, 0.2244, 0.1974]) -Greedy action tensor([ 1.9383, -0.5918, -0.5339, 0.6938]) tensor([0.6886, 0.0549, 0.0581, 0.1984]) -Greedy action tensor([ 2.0403, -1.0888, -0.2722, 0.1974]) tensor([0.7686, 0.0336, 0.0761, 0.1217]) -Greedy action tensor([ 1.1369, -0.2457, -0.6892, 0.4384]) tensor([0.5238, 0.1314, 0.0843, 0.2605]) -Greedy action tensor([ 1.6563, -0.1804, -0.3465, 0.1017]) tensor([0.6642, 0.1058, 0.0896, 0.1403]) -Greedy action tensor([ 1.0865, -0.2583, -0.9778, 0.3733]) tensor([0.5326, 0.1388, 0.0676, 0.2610]) -Greedy action tensor([ 1.5401, -0.6132, -0.6353, 0.4351]) tensor([0.6407, 0.0744, 0.0728, 0.2122]) -Greedy action tensor([ 2.8340, -1.0109, 0.1698, 1.0557]) tensor([0.7937, 0.0170, 0.0553, 0.1341]) -Greedy action tensor([ 1.0583, -0.5735, -0.5838, 0.9767]) tensor([0.4328, 0.0846, 0.0838, 0.3988]) -Greedy action tensor([-0.9472, 0.1439, 0.7723, -1.4816]) tensor([0.0986, 0.2935, 0.5502, 0.0578]) -Greedy action tensor([ 0.6291, -0.4477, -0.2988, 0.7220]) tensor([0.3529, 0.1202, 0.1395, 0.3873]) -Greedy action tensor([-0.2999, -1.4336, -0.6563, -0.1307]) tensor([0.3119, 0.1004, 0.2184, 0.3694]) -Greedy action tensor([-0.2628, -0.5841, -0.9481, 0.0583]) tensor([0.2772, 0.2010, 0.1397, 0.3821]) -Greedy action tensor([ 0.1372, -0.5780, -0.0914, -0.8647]) tensor([0.3771, 0.1844, 0.3000, 0.1385]) -Greedy action tensor([-0.1511, 0.7107, 0.5176, -0.2423]) tensor([0.1605, 0.3799, 0.3132, 0.1465]) -Greedy action tensor([ 1.0830, -0.1131, 0.8519, -0.1416]) tensor([0.4184, 0.1265, 0.3321, 0.1230]) -Greedy action tensor([-0.1505, -0.0994, -0.9164, -0.7012]) tensor([0.3232, 0.3402, 0.1503, 0.1863]) -Greedy action tensor([ 0.7125, -0.3819, 0.0494, -0.1662]) tensor([0.4414, 0.1478, 0.2275, 0.1833]) -Greedy action tensor([ 0.8118, -0.6141, 0.5261, 0.7370]) tensor([0.3425, 0.0823, 0.2574, 0.3178]) -Greedy action tensor([-0.4675, -0.1089, -1.2374, 0.0714]) tensor([0.2170, 0.3106, 0.1005, 0.3719]) -Greedy action tensor([ 1.2968, -0.4256, 0.0582, -0.1663]) tensor([0.5883, 0.1051, 0.1705, 0.1362]) -Greedy action tensor([ 1.1337, -0.0913, 0.3676, -0.8796]) tensor([0.5285, 0.1552, 0.2457, 0.0706]) -Greedy action tensor([ 0.3248, -0.7062, 0.7850, -0.6298]) tensor([0.3007, 0.1072, 0.4764, 0.1157]) -Greedy action tensor([-0.7609, -0.8724, -0.6927, 0.1899]) tensor([0.1801, 0.1611, 0.1928, 0.4660]) -Greedy action tensor([ 0.1616, -0.4017, 0.1697, -0.2234]) tensor([0.3069, 0.1748, 0.3094, 0.2089]) -Greedy action tensor([-0.2266, -1.4028, 0.7242, -0.8936]) tensor([0.2268, 0.0700, 0.5869, 0.1164]) -Greedy action tensor([ 0.2056, -0.5011, 0.4228, -0.2828]) tensor([0.2985, 0.1473, 0.3710, 0.1832]) -Greedy action tensor([ 0.6634, -0.6551, 0.7816, -0.3938]) tensor([0.3649, 0.0976, 0.4107, 0.1268]) -Greedy action tensor([-0.2147, 0.0574, 0.0473, -0.3140]) tensor([0.2213, 0.2906, 0.2876, 0.2004]) -Greedy action tensor([-0.2888, -0.2216, 0.4042, -0.3365]) tensor([0.1991, 0.2129, 0.3981, 0.1898]) -Greedy action tensor([ 0.2398, -1.0742, -0.2552, -0.4005]) tensor([0.4157, 0.1117, 0.2534, 0.2191]) -Greedy action tensor([ 0.6427, -0.3458, -0.2944, 0.0991]) tensor([0.4265, 0.1587, 0.1671, 0.2477]) -Greedy action tensor([ 0.2780, -0.0862, 0.3654, -0.6897]) tensor([0.3158, 0.2194, 0.3447, 0.1200]) -Greedy action tensor([ 1.3913, 0.3551, 0.0977, -0.4681]) tensor([0.5603, 0.1988, 0.1537, 0.0873]) -Greedy action tensor([ 0.2350, -0.1740, 0.0855, -0.3354]) tensor([0.3236, 0.2149, 0.2786, 0.1829]) -Greedy action tensor([ 1.5677, -1.6795, -0.2276, 0.4346]) tensor([0.6549, 0.0255, 0.1088, 0.2109]) -Greedy action tensor([-0.0307, 0.3630, -0.6089, -0.4807]) tensor([0.2717, 0.4027, 0.1524, 0.1732]) -Greedy action tensor([-0.7644, 0.2407, 0.6244, -0.8796]) tensor([0.1158, 0.3165, 0.4645, 0.1032]) -Greedy action tensor([ 0.3211, -0.1933, 0.5829, -0.0732]) tensor([0.2800, 0.1674, 0.3638, 0.1888]) -Greedy action tensor([-0.3080, -1.1063, -0.2992, -0.2931]) tensor([0.2879, 0.1296, 0.2904, 0.2922]) -Greedy action tensor([ 0.5656, 0.4348, -0.1149, -0.7415]) tensor([0.3767, 0.3305, 0.1908, 0.1019]) -Greedy action tensor([-0.4307, -0.6519, -0.3261, -0.8288]) tensor([0.2790, 0.2237, 0.3098, 0.1874]) -Greedy action tensor([0.6353, 0.2238, 0.7337, 0.1839]) tensor([0.2939, 0.1947, 0.3243, 0.1871]) -Greedy action tensor([-0.9742, -0.8838, -0.5877, 0.2616]) tensor([0.1427, 0.1562, 0.2100, 0.4910]) -Greedy action tensor([ 0.2717, -0.1237, -0.2684, 0.3761]) tensor([0.2971, 0.2001, 0.1731, 0.3298]) -Greedy action tensor([ 0.6783, -1.0309, 1.0631, -0.0417]) tensor([0.3188, 0.0577, 0.4684, 0.1552]) -Greedy action tensor([ 0.4926, -1.0558, -0.7545, 0.1112]) tensor([0.4581, 0.0974, 0.1316, 0.3129]) -Greedy action tensor([-0.6683, -0.6109, 1.2994, -0.5547]) tensor([0.0968, 0.1025, 0.6923, 0.1084]) -Greedy action tensor([-0.3466, -0.4136, -1.4032, -0.2124]) tensor([0.2919, 0.2729, 0.1015, 0.3338]) -Greedy action tensor([1.2851, 0.0081, 0.6204, 0.1419]) tensor([0.4735, 0.1320, 0.2436, 0.1509]) -Greedy action tensor([-0.8145, -0.5127, -0.5645, 0.0030]) tensor([0.1695, 0.2292, 0.2176, 0.3838]) -Greedy action tensor([-0.1789, -0.9682, 0.0137, 0.1297]) tensor([0.2483, 0.1127, 0.3010, 0.3380]) -Greedy action tensor([-0.8241, -0.6100, 0.1871, -0.6955]) tensor([0.1633, 0.2022, 0.4488, 0.1857]) -Greedy action tensor([ 0.6068, -0.6189, -0.4473, -0.7889]) tensor([0.5292, 0.1553, 0.1844, 0.1310]) -Greedy action tensor([ 0.1737, -0.5394, 1.0891, -0.2812]) tensor([0.2163, 0.1060, 0.5404, 0.1373]) -Greedy action tensor([ 0.5308, -0.2627, -0.0281, 0.2436]) tensor([0.3604, 0.1630, 0.2061, 0.2705]) -Greedy action tensor([ 0.0896, -0.9643, -0.7814, -0.1254]) tensor([0.3886, 0.1354, 0.1626, 0.3134]) -Greedy action tensor([-0.3549, -1.5800, 1.4566, -1.2621]) tensor([0.1279, 0.0376, 0.7829, 0.0516]) -Greedy action tensor([ 1.2510, -1.2039, -0.4799, 0.2395]) tensor([0.6148, 0.0528, 0.1089, 0.2236]) -Greedy action tensor([ 0.1321, 0.2888, -0.3729, -0.8344]) tensor([0.3171, 0.3709, 0.1914, 0.1206]) -Greedy action tensor([-0.4835, -0.3538, 0.3565, -0.5360]) tensor([0.1850, 0.2107, 0.4287, 0.1756]) -Greedy action tensor([ 0.1276, -0.9107, 0.3010, -0.7658]) tensor([0.3387, 0.1199, 0.4028, 0.1386]) -Greedy action tensor([-0.6117, -1.2424, 0.0788, -0.7013]) tensor([0.2252, 0.1198, 0.4491, 0.2059]) -Greedy action tensor([-1.2549, -0.2482, -0.1795, -1.5861]) tensor([0.1354, 0.3705, 0.3969, 0.0972]) -Greedy action tensor([ 0.7535, 0.1210, -0.1936, -0.2824]) tensor([0.4398, 0.2336, 0.1706, 0.1561]) -Greedy action tensor([-0.0647, -0.8186, 0.1402, 0.3302]) tensor([0.2391, 0.1125, 0.2935, 0.3549]) -Greedy action tensor([-0.7135, -0.9477, -0.0266, -0.2946]) tensor([0.1887, 0.1493, 0.3751, 0.2869]) -Greedy action tensor([ 0.2837, -0.6830, 0.4066, -0.6260]) tensor([0.3432, 0.1305, 0.3881, 0.1382]) -Greedy action tensor([ 0.5685, -0.6194, -0.6541, -0.1872]) tensor([0.4833, 0.1473, 0.1423, 0.2270]) -Greedy action tensor([-0.4296, -1.0222, -0.6467, -0.6225]) tensor([0.3142, 0.1737, 0.2529, 0.2591]) -Greedy action tensor([-0.9538, -0.5825, 0.9597, -1.5380]) tensor([0.1022, 0.1482, 0.6926, 0.0570]) -Greedy action tensor([ 0.7667, -0.3393, 0.1822, 0.1765]) tensor([0.4094, 0.1355, 0.2282, 0.2269]) -Greedy action tensor([-0.1434, -0.3605, -0.4019, -0.4685]) tensor([0.3031, 0.2439, 0.2340, 0.2190]) -Greedy action tensor([ 0.5974, -0.2498, -0.1162, -0.0269]) tensor([0.4075, 0.1746, 0.1996, 0.2183]) -Greedy action tensor([ 0.2974, -0.3600, -0.4969, -0.7359]) tensor([0.4299, 0.2228, 0.1943, 0.1530]) -Greedy action tensor([-1.4103, -0.0462, -1.5846, 0.2203]) tensor([0.0921, 0.3603, 0.0774, 0.4703]) -Greedy action tensor([-0.3017, 0.0920, 0.1879, -0.6340]) tensor([0.2070, 0.3068, 0.3377, 0.1485]) -Greedy action tensor([ 0.3752, -1.8155, 1.0669, 0.7856]) tensor([0.2166, 0.0242, 0.4326, 0.3265]) -Greedy action tensor([ 0.7874, -0.7325, 0.1204, 0.0503]) tensor([0.4524, 0.0990, 0.2322, 0.2165]) -Greedy action tensor([ 0.1819, -1.0981, 0.2058, -0.3965]) tensor([0.3493, 0.0971, 0.3577, 0.1959]) -Greedy action tensor([-1.0965, -1.4703, -0.3569, -0.7461]) tensor([0.1922, 0.1323, 0.4027, 0.2728]) -Greedy action tensor([ 0.1456, -0.2817, 0.6317, -0.6454]) tensor([0.2680, 0.1748, 0.4357, 0.1215]) -Greedy action tensor([ 0.2287, -0.1973, -0.0936, -0.5871]) tensor([0.3546, 0.2316, 0.2569, 0.1568]) -Greedy action tensor([-0.8628, -0.6554, -0.1177, -0.9103]) tensor([0.1890, 0.2326, 0.3982, 0.1802]) -Greedy action tensor([-0.0728, -0.5263, -0.6586, 0.2932]) tensor([0.2752, 0.1748, 0.1532, 0.3968]) -Greedy action tensor([ 0.2463, 0.0370, -0.1860, -0.4375]) tensor([0.3373, 0.2736, 0.2189, 0.1702]) -Greedy action tensor([-0.3383, -1.1371, -0.2769, -0.3797]) tensor([0.2880, 0.1295, 0.3062, 0.2763]) -Greedy action tensor([-0.9853, -0.8824, -0.0454, -0.9436]) tensor([0.1751, 0.1941, 0.4482, 0.1826]) -Greedy action tensor([ 1.1130, -0.6855, -0.2633, -0.3578]) tensor([0.6069, 0.1005, 0.1532, 0.1394]) -Greedy action tensor([-0.2717, -0.6569, -1.1885, 0.5551]) tensor([0.2290, 0.1558, 0.0916, 0.5236]) -Greedy action tensor([ 1.4580, -0.5485, -0.2706, 0.7773]) tensor([0.5500, 0.0739, 0.0976, 0.2784]) -Greedy action tensor([ 0.9303, -0.7324, 0.1345, -0.3750]) tensor([0.5230, 0.0992, 0.2360, 0.1418]) -Greedy action tensor([ 0.5421, -0.1456, -0.0700, -0.1785]) tensor([0.3951, 0.1986, 0.2142, 0.1922]) -Greedy action tensor([ 0.7946, -0.2920, -0.1025, -0.1670]) tensor([0.4701, 0.1586, 0.1917, 0.1797]) -Greedy action tensor([ 0.5374, -0.1485, -0.0806, -0.0504]) tensor([0.3849, 0.1938, 0.2075, 0.2138]) -Greedy action tensor([ 0.9144, -0.3796, -0.0904, -0.3349]) tensor([0.5189, 0.1423, 0.1900, 0.1488]) -Greedy action tensor([ 0.5921, -0.6052, -0.2881, -0.6488]) tensor([0.4985, 0.1506, 0.2067, 0.1441]) -Greedy action tensor([ 0.7316, -0.4937, -0.0479, -0.1973]) tensor([0.4657, 0.1368, 0.2136, 0.1840]) -Greedy action tensor([ 0.8613, -0.6463, 0.0097, -0.2644]) tensor([0.5069, 0.1123, 0.2163, 0.1645]) -Greedy action tensor([ 0.4851, -0.0117, -0.1401, 0.0217]) tensor([0.3606, 0.2195, 0.1930, 0.2269]) -Greedy action tensor([ 0.6032, 0.0671, -0.0667, -0.1570]) tensor([0.3900, 0.2281, 0.1996, 0.1823]) -Greedy action tensor([ 0.7835, -0.8621, 0.1260, -0.3851]) tensor([0.4946, 0.0954, 0.2563, 0.1537]) -Greedy action tensor([ 1.1111, -0.7323, 0.0775, -0.5626]) tensor([0.5877, 0.0930, 0.2091, 0.1102]) -Greedy action tensor([ 0.8394, -0.2760, 0.1668, -0.1515]) tensor([0.4526, 0.1484, 0.2310, 0.1680]) -Greedy action tensor([ 0.9197, -0.7437, 0.0732, -0.5273]) tensor([0.5395, 0.1022, 0.2314, 0.1269]) -Greedy action tensor([ 0.9634, -0.7129, -0.0094, -0.3450]) tensor([0.5449, 0.1019, 0.2060, 0.1472]) -Greedy action tensor([ 0.7087, -0.3262, -0.1662, -0.1395]) tensor([0.4545, 0.1615, 0.1895, 0.1946]) -Greedy action tensor([ 1.0320, -0.8816, -0.0728, -0.4803]) tensor([0.5885, 0.0868, 0.1949, 0.1297]) -Greedy action tensor([ 0.6996, -0.2937, -0.0185, -0.1290]) tensor([0.4358, 0.1614, 0.2125, 0.1903]) -Greedy action tensor([ 0.9701, -0.7072, -0.0406, -0.2959]) tensor([0.5456, 0.1020, 0.1986, 0.1538]) -Greedy action tensor([ 0.7788, -0.4804, 0.0225, -0.1642]) tensor([0.4667, 0.1325, 0.2191, 0.1818]) -Greedy action tensor([ 1.1073, 0.1141, 0.1600, -0.1404]) tensor([0.4889, 0.1811, 0.1896, 0.1404]) -Greedy action tensor([ 0.6883, -0.5860, -0.0798, -0.1887]) tensor([0.4631, 0.1295, 0.2148, 0.1927]) -Greedy action tensor([ 1.1472, -0.6706, 0.0860, -0.5113]) tensor([0.5886, 0.0956, 0.2037, 0.1121]) -Greedy action tensor([ 0.5165, -0.2938, 0.0626, -0.2736]) tensor([0.3947, 0.1755, 0.2507, 0.1791]) -Greedy action tensor([ 0.5717, -0.0435, -0.1232, -0.6712]) tensor([0.4295, 0.2322, 0.2144, 0.1239]) -Greedy action tensor([ 1.0816, -0.4309, -0.0776, -0.3262]) tensor([0.5622, 0.1239, 0.1764, 0.1376]) -Greedy action tensor([ 0.2154, -0.1659, -0.0754, -0.1979]) tensor([0.3234, 0.2209, 0.2418, 0.2139]) -Greedy action tensor([ 0.3111, -0.3884, -0.1679, -0.0937]) tensor([0.3593, 0.1785, 0.2225, 0.2397]) -Greedy action tensor([ 0.7137, -0.2642, -0.1187, -0.1586]) tensor([0.4486, 0.1687, 0.1951, 0.1875]) -Greedy action tensor([ 1.0327, -0.8927, 0.0159, -0.4169]) tensor([0.5740, 0.0837, 0.2076, 0.1347]) -Greedy action tensor([ 0.4971, 0.0352, 0.0737, -0.0384]) tensor([0.3484, 0.2195, 0.2281, 0.2039]) -Greedy action tensor([ 0.9289, -0.4013, -0.0827, -0.1936]) tensor([0.5119, 0.1354, 0.1861, 0.1666]) -Greedy action tensor([ 0.7984, -0.5406, -0.1384, -0.4686]) tensor([0.5166, 0.1354, 0.2024, 0.1455]) -Greedy action tensor([ 1.0832, -0.7912, 0.0765, -0.6086]) tensor([0.5872, 0.0901, 0.2146, 0.1082]) -Greedy action tensor([ 0.8589, -0.1831, 0.0667, -0.1073]) tensor([0.4574, 0.1614, 0.2072, 0.1741]) -Greedy action tensor([ 0.9478, -0.5299, -0.0642, -0.3637]) tensor([0.5373, 0.1226, 0.1953, 0.1448]) -Greedy action tensor([ 0.8323, -0.4113, 0.0622, -0.3060]) tensor([0.4827, 0.1392, 0.2235, 0.1546]) -Greedy action tensor([ 0.7548, -0.2225, -0.0513, -0.0417]) tensor([0.4398, 0.1655, 0.1964, 0.1983]) -Greedy action tensor([ 0.9264, -0.4473, 0.0990, -0.0825]) tensor([0.4866, 0.1232, 0.2128, 0.1774]) -Greedy action tensor([ 0.9778, -0.7371, -0.0181, -0.4073]) tensor([0.5557, 0.1000, 0.2053, 0.1391]) -Greedy action tensor([ 0.9007, -0.2796, -0.0486, -0.0882]) tensor([0.4840, 0.1487, 0.1873, 0.1800]) -Greedy action tensor([ 1.0422, -0.1770, -0.0264, -0.1525]) tensor([0.5150, 0.1522, 0.1769, 0.1559]) -Greedy action tensor([ 1.3523, -0.6774, 0.0402, -0.6132]) tensor([0.6490, 0.0853, 0.1748, 0.0909]) -Greedy action tensor([ 0.5820, -0.3712, 0.0782, -0.2171]) tensor([0.4099, 0.1580, 0.2477, 0.1844]) -Greedy action tensor([ 0.9993, -0.7061, 0.1335, -0.4250]) tensor([0.5426, 0.0986, 0.2283, 0.1306]) -Greedy action tensor([ 1.3593, -1.0769, 0.0693, -0.8431]) tensor([0.6787, 0.0594, 0.1868, 0.0750]) -Greedy action tensor([ 0.9258, -0.6988, -0.0357, -0.5591]) tensor([0.5538, 0.1091, 0.2117, 0.1254]) -Greedy action tensor([ 0.9574, -0.9315, 0.2778, -0.7350]) tensor([0.5429, 0.0821, 0.2751, 0.0999]) -Greedy action tensor([ 0.7256, -0.0063, -0.0468, 0.0703]) tensor([0.4061, 0.1953, 0.1876, 0.2109]) -Greedy action tensor([ 0.7221, -0.3736, -0.0903, -0.1881]) tensor([0.4586, 0.1533, 0.2035, 0.1846]) -Greedy action tensor([ 0.8362, -0.6215, 0.0081, -0.4126]) tensor([0.5111, 0.1190, 0.2233, 0.1466]) -Greedy action tensor([ 0.7069, -0.1684, -0.0249, 0.0126]) tensor([0.4171, 0.1738, 0.2007, 0.2083]) -Greedy action tensor([ 1.0910, -0.6326, 0.1637, -0.5445]) tensor([0.5653, 0.1009, 0.2236, 0.1102]) -Greedy action tensor([ 1.0121, -0.7011, 0.0812, -0.6517]) tensor([0.5669, 0.1022, 0.2235, 0.1074]) -Greedy action tensor([ 0.9128, -0.7108, 0.0635, -0.3885]) tensor([0.5271, 0.1039, 0.2255, 0.1435]) -Greedy action tensor([ 0.8097, -0.1169, 0.0682, -0.0209]) tensor([0.4332, 0.1715, 0.2064, 0.1888]) -Greedy action tensor([ 1.1112, -0.8196, -0.0291, -0.5589]) tensor([0.6050, 0.0877, 0.1934, 0.1139]) -Greedy action tensor([ 0.6743, -0.7239, -0.0592, -0.2712]) tensor([0.4726, 0.1168, 0.2270, 0.1836]) -Greedy action tensor([ 0.7140, -0.4054, -0.1283, -0.2286]) tensor([0.4658, 0.1521, 0.2006, 0.1815]) -Greedy action tensor([ 0.6306, 0.0319, -0.1339, 0.0164]) tensor([0.3912, 0.2150, 0.1821, 0.2117]) -Greedy action tensor([ 0.7992, -0.2450, 0.1302, -0.0734]) tensor([0.4382, 0.1542, 0.2245, 0.1831]) -Greedy action tensor([ 1.3478, -1.0454, -0.0194, -0.7097]) tensor([0.6785, 0.0620, 0.1729, 0.0867]) -Greedy action tensor([ 0.9643, -0.5100, -0.1137, -0.3350]) tensor([0.5429, 0.1243, 0.1847, 0.1481]) -Greedy action tensor([ 0.6308, -0.6658, 0.0599, -0.4096]) tensor([0.4563, 0.1248, 0.2578, 0.1612]) -Greedy action tensor([ 0.9315, -0.4457, -0.0039, -0.2465]) tensor([0.5121, 0.1292, 0.2010, 0.1577]) -Greedy action tensor([ 0.9262, 0.0359, -0.2866, -0.4293]) tensor([0.5087, 0.2088, 0.1513, 0.1312]) -Greedy action tensor([ 0.5653, -0.3050, -0.1124, -0.6700]) tensor([0.4510, 0.1889, 0.2290, 0.1311]) -Greedy action tensor([ 0.8385, -1.0252, -0.0207, -0.4857]) tensor([0.5421, 0.0841, 0.2296, 0.1442]) -Greedy action tensor([ 0.9123, -0.4635, -0.1439, -0.3147]) tensor([0.5281, 0.1334, 0.1837, 0.1548]) -Greedy action tensor([ 0.6936, -0.7135, -0.0737, -0.3225]) tensor([0.4828, 0.1182, 0.2242, 0.1748]) -Greedy action tensor([ 1.6661, -1.0198, -0.2488, -0.6004]) tensor([0.7581, 0.0517, 0.1117, 0.0786]) -Greedy action tensor([ 0.8579, -0.7388, 0.0979, -0.4256]) tensor([0.5135, 0.1040, 0.2402, 0.1423]) -Greedy action tensor([ 0.6518, -0.2160, -0.0841, -0.0923]) tensor([0.4212, 0.1769, 0.2018, 0.2001]) -Greedy action tensor([ 0.8259, -0.3496, -0.0811, -0.1650]) tensor([0.4799, 0.1481, 0.1938, 0.1782]) -Greedy action tensor([ 0.7712, -0.5939, 0.3030, -0.8893]) tensor([0.4827, 0.1233, 0.3023, 0.0917]) -Greedy action tensor([ 1.3252, -0.9332, -0.1506, -0.4850]) tensor([0.6681, 0.0698, 0.1527, 0.1093]) -Greedy action tensor([ 0.7369, -0.4929, -0.0386, -0.1117]) tensor([0.4586, 0.1341, 0.2111, 0.1963]) -Greedy action tensor([ 0.8697, -0.5805, -0.0176, -0.4648]) tensor([0.5237, 0.1228, 0.2156, 0.1379]) -Greedy action tensor([ 0.9196, -0.5341, -0.0616, -0.4747]) tensor([0.5386, 0.1259, 0.2019, 0.1336]) -Greedy action tensor([ 0.9159, -0.4965, -0.0731, -0.3334]) tensor([0.5257, 0.1280, 0.1955, 0.1507]) -Greedy action tensor([ 0.3446, 0.1163, -0.1598, 0.1522]) tensor([0.3101, 0.2468, 0.1873, 0.2558]) -Greedy action tensor([-1.9042, -0.4380, 0.6482, -0.1600]) tensor([0.0419, 0.1813, 0.5373, 0.2395]) -Greedy action tensor([-1.8891, -0.3287, 0.6267, -0.1503]) tensor([0.0420, 0.1998, 0.5194, 0.2388]) -Greedy action tensor([-1.9173, -0.4437, 0.6547, -0.1681]) tensor([0.0413, 0.1803, 0.5408, 0.2375]) -Greedy action tensor([-1.8947, -0.3952, 0.6507, -0.1566]) tensor([0.0418, 0.1873, 0.5331, 0.2378]) -Greedy action tensor([-1.8316, -0.0803, 0.5710, -0.1117]) tensor([0.0427, 0.2463, 0.4723, 0.2387]) -Greedy action tensor([-1.8416, -0.3763, 0.6344, -0.1264]) tensor([0.0439, 0.1900, 0.5221, 0.2440]) -Greedy action tensor([-1.8585, -0.4454, 0.6258, -0.1397]) tensor([0.0441, 0.1812, 0.5288, 0.2459]) -Greedy action tensor([-1.8604, -0.3096, 0.6271, -0.1399]) tensor([0.0429, 0.2021, 0.5156, 0.2394]) -Greedy action tensor([-1.5748, -0.4805, 0.5018, -0.0381]) tensor([0.0602, 0.1798, 0.4802, 0.2799]) -Greedy action tensor([-1.9078, -0.4536, 0.6507, -0.1648]) tensor([0.0418, 0.1790, 0.5402, 0.2390]) -Greedy action tensor([-1.6296, -0.1508, 0.4759, -0.0620]) tensor([0.0544, 0.2385, 0.4464, 0.2607]) -Greedy action tensor([-1.8895, -0.4114, 0.6530, -0.1502]) tensor([0.0420, 0.1843, 0.5344, 0.2393]) -Greedy action tensor([-1.9145, -0.4474, 0.6552, -0.1651]) tensor([0.0414, 0.1796, 0.5409, 0.2381]) -Greedy action tensor([-1.9400, -0.4168, 0.6582, -0.1780]) tensor([0.0402, 0.1846, 0.5408, 0.2344]) -Greedy action tensor([-1.7619, -0.1974, 0.5368, -0.1013]) tensor([0.0476, 0.2276, 0.4743, 0.2505]) -Greedy action tensor([-1.9166, -0.3907, 0.6504, -0.1655]) tensor([0.0410, 0.1886, 0.5342, 0.2362]) -Greedy action tensor([-1.9166, -0.4395, 0.6553, -0.1660]) tensor([0.0413, 0.1808, 0.5403, 0.2377]) -Greedy action tensor([-1.7134, 0.0030, 0.5076, -0.1286]) tensor([0.0484, 0.2693, 0.4461, 0.2361]) -Greedy action tensor([-1.8688, -0.3653, 0.6251, -0.1268]) tensor([0.0429, 0.1929, 0.5193, 0.2449]) -Greedy action tensor([-1.4116, 0.2803, 0.4824, 0.2225]) tensor([0.0549, 0.2983, 0.3652, 0.2816]) -Greedy action tensor([-1.8773, -0.1888, 0.6477, -0.2708]) tensor([0.0419, 0.2265, 0.5229, 0.2087]) -Greedy action tensor([-1.9248, -0.4117, 0.6485, -0.1713]) tensor([0.0409, 0.1859, 0.5367, 0.2364]) -Greedy action tensor([-1.9096, -0.4477, 0.6482, -0.1671]) tensor([0.0418, 0.1803, 0.5393, 0.2386]) -Greedy action tensor([-1.8032, -0.4905, 0.5938, -0.1150]) tensor([0.0474, 0.1760, 0.5205, 0.2562]) -Greedy action tensor([-1.1478, -0.0905, 0.3903, -0.2539]) tensor([0.0911, 0.2622, 0.4241, 0.2227]) -Greedy action tensor([-1.8634, -0.4532, 0.6274, -0.1424]) tensor([0.0439, 0.1800, 0.5304, 0.2456]) -Greedy action tensor([-1.8067, -0.4673, 0.5963, -0.1276]) tensor([0.0471, 0.1798, 0.5207, 0.2525]) -Greedy action tensor([-1.9305, -0.4506, 0.6620, -0.1751]) tensor([0.0408, 0.1790, 0.5445, 0.2358]) -Greedy action tensor([-1.7686, -0.4422, 0.5759, -0.1170]) tensor([0.0490, 0.1846, 0.5109, 0.2555]) -Greedy action tensor([-1.4829, -0.1064, 0.4245, -0.3604]) tensor([0.0677, 0.2682, 0.4560, 0.2080]) -Greedy action tensor([-1.5515, 0.1359, 0.4013, -0.0457]) tensor([0.0557, 0.3009, 0.3924, 0.2510]) -Greedy action tensor([-1.5784, -0.5474, 0.4923, -0.0745]) tensor([0.0616, 0.1727, 0.4885, 0.2772]) -Greedy action tensor([1.0360, 0.2255, 0.6892, 1.5061]) tensor([0.2665, 0.1185, 0.1884, 0.4265]) -Greedy action tensor([-1.3261, -0.5522, 0.3845, 0.0085]) tensor([0.0800, 0.1735, 0.4426, 0.3039]) -Greedy action tensor([-1.9404, -0.4477, 0.6696, -0.1737]) tensor([0.0402, 0.1787, 0.5461, 0.2350]) -Greedy action tensor([-1.8943, -0.4384, 0.6432, -0.1557]) tensor([0.0423, 0.1815, 0.5353, 0.2408]) -Greedy action tensor([-1.7595, -0.4648, 0.6000, 0.0307]) tensor([0.0471, 0.1719, 0.4987, 0.2822]) -Greedy action tensor([-1.6783, 0.0790, 0.5115, -0.2114]) tensor([0.0498, 0.2889, 0.4452, 0.2161]) -Greedy action tensor([-1.9126, -0.4541, 0.6559, -0.1648]) tensor([0.0415, 0.1785, 0.5416, 0.2384]) -Greedy action tensor([-1.5143, 0.3349, 0.3028, 0.0832]) tensor([0.0542, 0.3445, 0.3336, 0.2678]) -Greedy action tensor([-1.8917, -0.4579, 0.6449, -0.1569]) tensor([0.0426, 0.1785, 0.5377, 0.2412]) -Greedy action tensor([-1.9373, -0.4414, 0.6642, -0.1761]) tensor([0.0404, 0.1802, 0.5445, 0.2350]) -Greedy action tensor([-1.9130, -0.4193, 0.6527, -0.1649]) tensor([0.0413, 0.1840, 0.5374, 0.2373]) -Greedy action tensor([-1.2400, 0.4590, 0.3765, 0.1833]) tensor([0.0639, 0.3493, 0.3217, 0.2652]) -Greedy action tensor([-1.7167, -0.4538, 0.5518, -0.1066]) tensor([0.0521, 0.1841, 0.5033, 0.2605]) -Greedy action tensor([-1.6930, -1.0391, 0.5518, -0.3133]) tensor([0.0612, 0.1177, 0.5778, 0.2433]) -Greedy action tensor([-1.9130, -0.4393, 0.6531, -0.1627]) tensor([0.0414, 0.1809, 0.5392, 0.2385]) -Greedy action tensor([-1.7501, -0.3709, 0.5333, -0.0489]) tensor([0.0494, 0.1960, 0.4842, 0.2705]) -Greedy action tensor([-1.5057, -0.1240, 0.4574, 0.1593]) tensor([0.0575, 0.2290, 0.4095, 0.3040]) -Greedy action tensor([-1.5798, -0.4232, 0.6403, 0.0378]) tensor([0.0543, 0.1725, 0.4997, 0.2736]) -Greedy action tensor([-1.8831, -0.4049, 0.6336, -0.1502]) tensor([0.0427, 0.1872, 0.5287, 0.2415]) -Greedy action tensor([-1.7803, -0.4013, 0.6736, 0.0227]) tensor([0.0441, 0.1751, 0.5131, 0.2676]) -Greedy action tensor([-1.4563, -0.2976, 0.3852, 0.1119]) tensor([0.0654, 0.2084, 0.4124, 0.3138]) -Greedy action tensor([-0.5058, 0.8892, -0.0377, 0.0654]) tensor([0.1190, 0.4802, 0.1901, 0.2107]) -Greedy action tensor([0.3131, 0.6710, 0.4833, 0.9926]) tensor([0.1789, 0.2559, 0.2121, 0.3530]) -Greedy action tensor([-1.9333, -0.4396, 0.6619, -0.1739]) tensor([0.0405, 0.1806, 0.5433, 0.2355]) -Greedy action tensor([-1.1038, 0.4040, 0.4568, 0.4390]) tensor([0.0669, 0.3020, 0.3184, 0.3128]) -Greedy action tensor([-1.9296, -0.4417, 0.6619, -0.1730]) tensor([0.0407, 0.1802, 0.5433, 0.2358]) -Greedy action tensor([-1.9233, -0.4495, 0.6617, -0.1692]) tensor([0.0410, 0.1789, 0.5434, 0.2367]) -Greedy action tensor([-1.7729, 0.0133, 0.5236, -0.1159]) tensor([0.0451, 0.2694, 0.4487, 0.2367]) -Greedy action tensor([-1.9217, -0.4477, 0.6603, -0.1695]) tensor([0.0411, 0.1793, 0.5429, 0.2368]) -Greedy action tensor([-0.7347, 0.2476, 0.2097, 0.0513]) tensor([0.1185, 0.3165, 0.3048, 0.2601]) -Greedy action tensor([-1.8686, -0.4040, 0.6240, -0.1469]) tensor([0.0435, 0.1880, 0.5255, 0.2431]) -Greedy action tensor([-0.4824, 0.6632, 0.1093, 0.4663]) tensor([0.1172, 0.3684, 0.2117, 0.3026]) -Greedy action tensor([-1.9340, -0.4084, 0.6557, -0.1720]) tensor([0.0404, 0.1858, 0.5385, 0.2353]) -Greedy action tensor([-1.0868, 0.4408, 0.3462, -0.4146]) tensor([0.0851, 0.3919, 0.3565, 0.1666]) -Greedy action tensor([-1.7467, -0.4810, 0.5500, -0.0717]) tensor([0.0504, 0.1788, 0.5014, 0.2693]) -Greedy action tensor([-1.9023, -0.4472, 0.6517, -0.1572]) tensor([0.0419, 0.1795, 0.5387, 0.2399]) -Greedy action tensor([-0.8531, -0.5493, 0.0381, -0.0256]) tensor([0.1412, 0.1914, 0.3443, 0.3231]) -Greedy action tensor([-1.8995, -0.4050, 0.6447, -0.1600]) tensor([0.0419, 0.1866, 0.5331, 0.2384]) -Greedy action tensor([-1.9390, -0.4060, 0.6559, -0.1771]) tensor([0.0402, 0.1864, 0.5390, 0.2343]) -Greedy action tensor([-1.7303, -0.2797, 0.5301, -0.0454]) tensor([0.0494, 0.2107, 0.4736, 0.2663]) -Greedy action tensor([-1.4809, 0.0108, 0.4972, -0.6692]) tensor([0.0670, 0.2978, 0.4843, 0.1509]) -Greedy action tensor([-0.6141, 0.4093, 0.0726, -0.0777]) tensor([0.1337, 0.3720, 0.2657, 0.2286]) -Greedy action tensor([-1.9123, -0.4397, 0.6532, -0.1625]) tensor([0.0415, 0.1808, 0.5393, 0.2385]) -Greedy action tensor([-1.9284, -0.4405, 0.6604, -0.1709]) tensor([0.0408, 0.1804, 0.5426, 0.2363]) -Greedy action tensor([-1.7851, -0.2850, 0.5687, -0.1119]) tensor([0.0469, 0.2101, 0.4933, 0.2498]) -Greedy action tensor([-1.9054, -0.4009, 0.6432, -0.1601]) tensor([0.0416, 0.1874, 0.5325, 0.2385]) -Greedy action tensor([-1.8823, -0.3687, 0.5976, -0.2124]) tensor([0.0439, 0.1993, 0.5238, 0.2330]) -Greedy action tensor([-1.9283, -0.4235, 0.6595, -0.1714]) tensor([0.0407, 0.1831, 0.5407, 0.2356]) -Greedy action tensor([-1.8714, -0.3275, 0.6291, -0.1375]) tensor([0.0425, 0.1990, 0.5179, 0.2406]) -Greedy action tensor([-1.7129, -0.2800, 0.5747, -0.0494]) tensor([0.0492, 0.2062, 0.4848, 0.2597]) -Greedy action tensor([ 1.3443, -0.4902, -0.5095, 0.1197]) tensor([0.6210, 0.0992, 0.0973, 0.1825]) -Greedy action tensor([ 1.3686, -0.1566, -0.7551, 0.2123]) tensor([0.6054, 0.1317, 0.0724, 0.1905]) -Greedy action tensor([ 0.8038, -0.2938, -0.1632, 0.3094]) tensor([0.4303, 0.1436, 0.1636, 0.2625]) -Greedy action tensor([ 1.1610, -0.1282, -0.6967, 0.5235]) tensor([0.5102, 0.1405, 0.0796, 0.2697]) -Greedy action tensor([ 1.9615, 0.1126, -0.4123, 0.0664]) tensor([0.7139, 0.1124, 0.0665, 0.1073]) -Greedy action tensor([ 1.8346, -0.8974, -0.6297, 0.7658]) tensor([0.6695, 0.0436, 0.0570, 0.2299]) -Greedy action tensor([ 1.9823, -0.1770, -0.1164, 0.3792]) tensor([0.6948, 0.0802, 0.0852, 0.1398]) -Greedy action tensor([ 2.1334, -0.9865, -0.3282, 0.7257]) tensor([0.7277, 0.0321, 0.0621, 0.1781]) -Greedy action tensor([ 1.1973, -0.5978, -0.6351, 0.4017]) tensor([0.5626, 0.0935, 0.0900, 0.2539]) -Greedy action tensor([ 1.7354, 0.6220, -0.5286, 0.2439]) tensor([0.6033, 0.1982, 0.0627, 0.1358]) -Greedy action tensor([ 1.3193, -0.0105, -1.1956, 0.6573]) tensor([0.5373, 0.1421, 0.0434, 0.2771]) -Greedy action tensor([ 1.7829, -0.3597, -0.1002, 0.1993]) tensor([0.6781, 0.0796, 0.1031, 0.1392]) -Greedy action tensor([ 1.4558, -0.5684, -0.5867, 0.4095]) tensor([0.6199, 0.0819, 0.0804, 0.2178]) -Greedy action tensor([ 1.1924, -0.1097, -0.6045, 0.4569]) tensor([0.5217, 0.1419, 0.0865, 0.2500]) -Greedy action tensor([ 1.2482, -0.2089, -0.7234, -0.0762]) tensor([0.6105, 0.1422, 0.0850, 0.1624]) -Greedy action tensor([ 1.9216, -0.1368, -0.2158, 0.2098]) tensor([0.7012, 0.0895, 0.0827, 0.1266]) -Greedy action tensor([ 1.1532, -0.4551, -0.3479, 0.1215]) tensor([0.5619, 0.1125, 0.1253, 0.2003]) -Greedy action tensor([ 1.5688, -0.7015, -0.4708, 0.2667]) tensor([0.6643, 0.0686, 0.0864, 0.1807]) -Greedy action tensor([ 1.4540, -0.2964, -0.9792, 0.1274]) tensor([0.6549, 0.1138, 0.0575, 0.1738]) -Greedy action tensor([ 1.1701, -0.7061, -0.4344, 0.6895]) tensor([0.5069, 0.0776, 0.1019, 0.3135]) -Greedy action tensor([ 1.9630, -1.2880, -0.4606, 0.5499]) tensor([0.7295, 0.0283, 0.0646, 0.1776]) -Greedy action tensor([ 1.2080, -0.4193, -0.3863, 0.5207]) tensor([0.5256, 0.1033, 0.1067, 0.2644]) -Greedy action tensor([ 1.4282, -0.6826, -0.2635, 0.6891]) tensor([0.5609, 0.0679, 0.1033, 0.2678]) -Greedy action tensor([ 1.0589, -0.0825, -0.4623, 0.0707]) tensor([0.5235, 0.1672, 0.1144, 0.1949]) -Greedy action tensor([ 1.5963, -0.7540, -0.2492, 0.2271]) tensor([0.6633, 0.0632, 0.1048, 0.1687]) -Greedy action tensor([ 1.0495, -0.1028, -0.9767, 0.4294]) tensor([0.5036, 0.1591, 0.0664, 0.2709]) -Greedy action tensor([ 1.2179, -0.2163, -0.3419, -0.2170]) tensor([0.5929, 0.1413, 0.1246, 0.1412]) -Greedy action tensor([ 1.6652, -0.2705, -0.5822, -0.0560]) tensor([0.6999, 0.1010, 0.0740, 0.1252]) -Greedy action tensor([ 1.1760, -0.8668, -0.8402, 0.9031]) tensor([0.4941, 0.0641, 0.0658, 0.3761]) -Greedy action tensor([ 1.2396, -0.7253, 0.0204, 0.1941]) tensor([0.5596, 0.0784, 0.1653, 0.1967]) -Greedy action tensor([ 1.5452, -0.2890, -0.6153, 0.4988]) tensor([0.6149, 0.0982, 0.0709, 0.2160]) -Greedy action tensor([ 1.6680, -0.4272, -0.7206, 0.2222]) tensor([0.6895, 0.0848, 0.0633, 0.1624]) -Greedy action tensor([ 1.2315, -0.3958, -0.4696, 0.0578]) tensor([0.5924, 0.1164, 0.1081, 0.1832]) -Greedy action tensor([ 1.3801, -0.6592, -0.0932, 0.2578]) tensor([0.5935, 0.0772, 0.1360, 0.1932]) -Greedy action tensor([ 1.6674, -0.4829, -0.4740, 0.3790]) tensor([0.6624, 0.0771, 0.0778, 0.1826]) -Greedy action tensor([ 1.5399, -0.6663, -0.1680, 0.1446]) tensor([0.6497, 0.0715, 0.1178, 0.1610]) -Greedy action tensor([ 1.2808, -0.2911, -0.7735, 0.4097]) tensor([0.5700, 0.1184, 0.0731, 0.2385]) -Greedy action tensor([ 1.2600, -0.3346, -0.3703, 0.0447]) tensor([0.5898, 0.1197, 0.1155, 0.1750]) -Greedy action tensor([ 1.3168, -0.3707, -0.7491, 0.3639]) tensor([0.5892, 0.1090, 0.0747, 0.2272]) -Greedy action tensor([ 1.9090, -0.6986, -0.1781, 0.4399]) tensor([0.7003, 0.0516, 0.0869, 0.1612]) -Greedy action tensor([ 2.8840, 0.3995, -0.1588, 0.1371]) tensor([0.8367, 0.0698, 0.0399, 0.0537]) -Greedy action tensor([ 1.7991, -0.2707, -0.3244, 0.4293]) tensor([0.6667, 0.0841, 0.0797, 0.1694]) -Greedy action tensor([ 1.1923, -0.2843, -0.7554, 0.0403]) tensor([0.5928, 0.1354, 0.0845, 0.1873]) -Greedy action tensor([ 1.4923, -0.6209, -0.4564, -0.1041]) tensor([0.6822, 0.0824, 0.0972, 0.1382]) -Greedy action tensor([ 1.4746, -1.1965, -0.1575, 0.1200]) tensor([0.6567, 0.0454, 0.1284, 0.1695]) -Greedy action tensor([ 1.0134, 0.0187, -0.2767, -0.1601]) tensor([0.5117, 0.1892, 0.1408, 0.1583]) -Greedy action tensor([ 1.2598, -0.2262, -0.9583, 0.0812]) tensor([0.6087, 0.1377, 0.0662, 0.1873]) -Greedy action tensor([ 1.3718, -0.1536, -0.2291, 0.1193]) tensor([0.5865, 0.1276, 0.1183, 0.1676]) -Greedy action tensor([ 1.5071, -0.0226, -0.7375, 0.1676]) tensor([0.6311, 0.1367, 0.0669, 0.1653]) -Greedy action tensor([ 1.4283, -0.5409, -0.3957, 0.3275]) tensor([0.6122, 0.0854, 0.0988, 0.2036]) -Greedy action tensor([ 1.5116, 0.4583, -0.7271, -0.3581]) tensor([0.6213, 0.2167, 0.0662, 0.0958]) -Greedy action tensor([ 1.1605, -0.5665, -0.4308, 0.3362]) tensor([0.5494, 0.0977, 0.1119, 0.2410]) -Greedy action tensor([ 1.4596, -0.2086, -0.8261, 0.8243]) tensor([0.5494, 0.1036, 0.0559, 0.2911]) -Greedy action tensor([ 1.0635, -0.3455, -0.8243, 0.1336]) tensor([0.5585, 0.1365, 0.0846, 0.2204]) -Greedy action tensor([ 1.8278, -0.5803, -0.6329, 0.6060]) tensor([0.6803, 0.0612, 0.0581, 0.2005]) -Greedy action tensor([ 0.9957, -0.9831, 0.1586, -0.6217]) tensor([0.5651, 0.0781, 0.2447, 0.1121]) -Greedy action tensor([ 1.7631, -0.8066, -0.2779, 0.1442]) tensor([0.7120, 0.0545, 0.0925, 0.1410]) -Greedy action tensor([ 1.7463, -0.7446, -0.1117, 0.0826]) tensor([0.7002, 0.0580, 0.1092, 0.1326]) -Greedy action tensor([ 1.2313, -0.3068, -0.6534, 0.6289]) tensor([0.5224, 0.1122, 0.0793, 0.2860]) -Greedy action tensor([ 1.5322, -0.1665, -0.5943, 0.4475]) tensor([0.6097, 0.1115, 0.0727, 0.2061]) -Greedy action tensor([ 1.7683, -0.4344, -0.3869, -0.2017]) tensor([0.7321, 0.0809, 0.0848, 0.1021]) -Greedy action tensor([ 1.1958, -0.5467, -0.2538, 0.2647]) tensor([0.5544, 0.0971, 0.1301, 0.2185]) -Greedy action tensor([ 2.0490, -0.9024, 0.1796, 1.2220]) tensor([0.6083, 0.0318, 0.0938, 0.2661]) -Greedy action tensor([ 1.3799, -0.2056, -0.5276, 0.1575]) tensor([0.6069, 0.1243, 0.0901, 0.1787]) -Greedy action tensor([ 1.1272, -0.5475, -0.5748, 0.3741]) tensor([0.5433, 0.1018, 0.0990, 0.2558]) -Greedy action tensor([ 1.2713, -0.4280, -0.4350, 0.0462]) tensor([0.6031, 0.1103, 0.1095, 0.1771]) -Greedy action tensor([ 2.2216, -0.5280, -0.5415, 0.6328]) tensor([0.7512, 0.0480, 0.0474, 0.1534]) -Greedy action tensor([ 1.3249, -0.4473, -0.3319, 0.3436]) tensor([0.5762, 0.0979, 0.1099, 0.2160]) -Greedy action tensor([ 1.2395, -0.9873, -0.2960, 0.1585]) tensor([0.6015, 0.0649, 0.1295, 0.2041]) -Greedy action tensor([ 2.1450, -0.7429, -0.3982, 0.5359]) tensor([0.7494, 0.0417, 0.0589, 0.1499]) -Greedy action tensor([ 1.4113, -0.2650, -0.4625, 0.1152]) tensor([0.6195, 0.1159, 0.0951, 0.1695]) -Greedy action tensor([ 1.2556, -0.6295, -0.1534, 0.1759]) tensor([0.5761, 0.0875, 0.1408, 0.1957]) -Greedy action tensor([ 1.9745, -0.4254, -1.0423, 0.3288]) tensor([0.7504, 0.0681, 0.0367, 0.1448]) -Greedy action tensor([ 1.1654, -0.4822, -0.4997, 0.3795]) tensor([0.5442, 0.1048, 0.1030, 0.2480]) -Greedy action tensor([ 1.1788, -0.2791, -0.1928, 0.3134]) tensor([0.5243, 0.1220, 0.1330, 0.2207]) -Greedy action tensor([ 1.4222, -0.6201, -0.4812, 0.2216]) tensor([0.6330, 0.0821, 0.0944, 0.1905]) -Greedy action tensor([ 1.1996, -0.2080, -0.7071, 0.2833]) tensor([0.5576, 0.1365, 0.0828, 0.2231]) -Greedy action tensor([ 2.0187, -0.9616, -0.7328, 0.4051]) tensor([0.7612, 0.0386, 0.0486, 0.1516]) -Greedy action tensor([ 1.4446, -0.4949, -0.3051, 0.0498]) tensor([0.6388, 0.0918, 0.1110, 0.1583]) -Greedy action tensor([ 1.4005, -0.1340, -0.5677, 0.2972]) tensor([0.5928, 0.1278, 0.0828, 0.1967]) -Greedy action tensor([ 1.3349, -0.0774, -0.7093, 0.0946]) tensor([0.6016, 0.1465, 0.0779, 0.1740]) -Greedy action tensor([-0.8800, -1.6949, 0.8068, -1.1019]) tensor([0.1308, 0.0579, 0.7066, 0.1048]) -Greedy action tensor([-0.9531, -0.7299, -0.6694, 0.3936]) tensor([0.1347, 0.1684, 0.1789, 0.5180]) -Greedy action tensor([ 0.6221, -0.5515, 0.0566, -0.5086]) tensor([0.4545, 0.1406, 0.2582, 0.1467]) -Greedy action tensor([-0.0337, -1.2544, 0.6509, -0.1477]) tensor([0.2398, 0.0707, 0.4755, 0.2140]) -Greedy action tensor([-0.6733, -1.3607, 0.5237, -0.8316]) tensor([0.1765, 0.0887, 0.5842, 0.1506]) -Greedy action tensor([ 0.4445, -1.0167, 1.0948, -1.1206]) tensor([0.2979, 0.0691, 0.5708, 0.0623]) -Greedy action tensor([ 1.4137, -0.7303, -0.1460, 0.4480]) tensor([0.5855, 0.0686, 0.1231, 0.2229]) -Greedy action tensor([ 1.1940, -0.4140, -0.6551, -0.1678]) tensor([0.6196, 0.1241, 0.0975, 0.1587]) -Greedy action tensor([ 0.0549, -0.4456, -0.3781, 0.1273]) tensor([0.3003, 0.1821, 0.1948, 0.3229]) -Greedy action tensor([ 1.0316, -0.1817, -0.1722, 0.2154]) tensor([0.4903, 0.1457, 0.1471, 0.2168]) -Greedy action tensor([ 0.8132, -1.7465, -0.9350, 0.0409]) tensor([0.5837, 0.0451, 0.1016, 0.2696]) -Greedy action tensor([-1.0108, -1.3915, 0.7390, -0.6781]) tensor([0.1132, 0.0774, 0.6515, 0.1579]) -Greedy action tensor([-0.2544, -0.7847, 0.1853, -1.2223]) tensor([0.2840, 0.1671, 0.4409, 0.1079]) -Greedy action tensor([-0.5227, -0.6136, 0.5311, -1.0181]) tensor([0.1855, 0.1694, 0.5321, 0.1130]) -Greedy action tensor([ 0.1679, -1.7585, -0.4672, 0.2054]) tensor([0.3685, 0.0537, 0.1953, 0.3826]) -Greedy action tensor([0.7324, 0.0594, 1.1185, 0.3495]) tensor([0.2730, 0.1393, 0.4016, 0.1861]) -Greedy action tensor([-1.7817, -0.1235, 0.3099, -0.8851]) tensor([0.0595, 0.3125, 0.4820, 0.1459]) -Greedy action tensor([-0.8305, -0.2030, -0.8246, -0.0462]) tensor([0.1647, 0.3086, 0.1657, 0.3610]) -Greedy action tensor([ 0.9248, -0.1976, -0.6901, -0.0445]) tensor([0.5253, 0.1710, 0.1045, 0.1993]) -Greedy action tensor([ 0.2053, -0.8269, -0.4926, -0.4236]) tensor([0.4189, 0.1492, 0.2085, 0.2234]) -Greedy action tensor([-0.0474, -0.8197, 0.9691, -0.5824]) tensor([0.2079, 0.0960, 0.5744, 0.1217]) -Greedy action tensor([-0.3331, -0.6078, -0.2430, -0.9857]) tensor([0.2963, 0.2252, 0.3242, 0.1543]) -Greedy action tensor([ 0.0494, 0.2164, 1.2882, -0.8321]) tensor([0.1654, 0.1954, 0.5707, 0.0685]) -Greedy action tensor([ 0.5667, -1.2541, 0.6931, -0.6853]) tensor([0.3872, 0.0627, 0.4394, 0.1107]) -Greedy action tensor([ 0.7822, -0.4575, 1.2535, -0.4482]) tensor([0.3141, 0.0909, 0.5032, 0.0918]) -Greedy action tensor([ 0.6707, -0.2510, -0.4162, -0.5769]) tensor([0.4945, 0.1967, 0.1668, 0.1420]) -Greedy action tensor([ 1.4783, 0.3746, -0.5800, 0.2898]) tensor([0.5669, 0.1880, 0.0724, 0.1727]) -Greedy action tensor([-0.8151, -1.3393, -0.0919, -0.1503]) tensor([0.1787, 0.1058, 0.3682, 0.3473]) -Greedy action tensor([-0.9941, 0.1967, -0.0357, -0.4080]) tensor([0.1150, 0.3784, 0.2999, 0.2067]) -Greedy action tensor([ 0.0325, 0.1853, 0.3324, -0.8068]) tensor([0.2534, 0.2952, 0.3420, 0.1095]) -Greedy action tensor([ 0.2057, -1.6567, 0.9608, -0.6037]) tensor([0.2682, 0.0417, 0.5707, 0.1194]) -Greedy action tensor([-0.2488, -0.2586, 0.9409, -0.8123]) tensor([0.1711, 0.1694, 0.5622, 0.0974]) -Greedy action tensor([ 1.6867, -1.3678, 0.7330, 0.5205]) tensor([0.5734, 0.0270, 0.2209, 0.1786]) -Greedy action tensor([ 1.2790, -0.4468, 0.8488, 0.3544]) tensor([0.4494, 0.0800, 0.2923, 0.1783]) -Greedy action tensor([ 0.2296, 0.5098, 0.3918, -0.2214]) tensor([0.2418, 0.3199, 0.2843, 0.1540]) -Greedy action tensor([ 0.9662, -0.6553, 0.0491, 0.0881]) tensor([0.4968, 0.0982, 0.1986, 0.2065]) -Greedy action tensor([ 0.1930, -0.2165, -0.3001, -0.4974]) tensor([0.3602, 0.2392, 0.2200, 0.1806]) -Greedy action tensor([ 0.9008, -0.1685, -0.7308, -0.2167]) tensor([0.5359, 0.1839, 0.1048, 0.1753]) -Greedy action tensor([-0.8412, 0.6121, 0.3829, -0.8633]) tensor([0.1036, 0.4429, 0.3522, 0.1013]) -Greedy action tensor([ 0.7902, -0.2059, 0.0785, -0.1818]) tensor([0.4468, 0.1650, 0.2193, 0.1690]) -Greedy action tensor([-0.6267, -1.1472, 0.0810, -0.6886]) tensor([0.2191, 0.1302, 0.4447, 0.2060]) -Greedy action tensor([ 0.1488, -0.4756, -0.5296, -0.6960]) tensor([0.4044, 0.2166, 0.2052, 0.1738]) -Greedy action tensor([-0.0219, -1.3472, 0.8031, -1.1946]) tensor([0.2593, 0.0689, 0.5916, 0.0803]) -Greedy action tensor([ 0.3498, -0.5658, 0.3407, 0.7034]) tensor([0.2621, 0.1049, 0.2597, 0.3733]) -Greedy action tensor([ 0.7713, -0.6032, -0.2346, -0.7775]) tensor([0.5461, 0.1381, 0.1997, 0.1160]) -Greedy action tensor([-0.0568, -0.6007, -1.2614, -0.1960]) tensor([0.3636, 0.2111, 0.1090, 0.3163]) -Greedy action tensor([-1.3617, -0.8447, -0.7212, 1.0252]) tensor([0.0647, 0.1085, 0.1228, 0.7040]) -Greedy action tensor([ 0.0150, -1.1914, 0.5156, -0.1125]) tensor([0.2611, 0.0782, 0.4308, 0.2299]) -Greedy action tensor([-0.5886, 0.2473, 0.8172, -0.7591]) tensor([0.1215, 0.2803, 0.4957, 0.1025]) -Greedy action tensor([ 0.2992, 0.4521, -0.4539, -0.0340]) tensor([0.2983, 0.3475, 0.1405, 0.2137]) -Greedy action tensor([-0.2378, -0.1464, -0.0961, -0.5450]) tensor([0.2510, 0.2751, 0.2893, 0.1846]) -Greedy action tensor([-0.9391, -1.4439, 0.1941, -0.5364]) tensor([0.1612, 0.0973, 0.5005, 0.2411]) -Greedy action tensor([ 0.5932, -1.4502, 0.1396, -0.3633]) tensor([0.4653, 0.0603, 0.2956, 0.1788]) -Greedy action tensor([ 0.5193, 0.5593, -0.6114, 0.2386]) tensor([0.3206, 0.3337, 0.1035, 0.2422]) -Greedy action tensor([-0.1861, -0.9297, 0.1351, -0.6552]) tensor([0.2874, 0.1366, 0.3962, 0.1798]) -Greedy action tensor([-0.6697, -0.9445, -0.3293, -0.9066]) tensor([0.2529, 0.1921, 0.3554, 0.1995]) -Greedy action tensor([ 0.3776, -0.4205, -0.9650, 0.1637]) tensor([0.3970, 0.1787, 0.1037, 0.3206]) -Greedy action tensor([-0.2764, -0.8930, 0.1832, 0.2670]) tensor([0.2064, 0.1114, 0.3268, 0.3554]) -Greedy action tensor([ 1.5383, -0.7731, 1.5123, -0.0342]) tensor([0.4384, 0.0435, 0.4271, 0.0910]) -Greedy action tensor([-0.0683, -0.4486, 0.6317, -0.1178]) tensor([0.2151, 0.1471, 0.4331, 0.2047]) -Greedy action tensor([ 0.2185, -0.3531, -0.5622, -1.0764]) tensor([0.4354, 0.2458, 0.1995, 0.1193]) -Greedy action tensor([ 0.3672, 0.3030, 0.0382, -0.3768]) tensor([0.3192, 0.2994, 0.2297, 0.1517]) -Greedy action tensor([ 0.8010, -1.1885, -0.1692, 0.0854]) tensor([0.4988, 0.0682, 0.1891, 0.2439]) -Greedy action tensor([-0.9208, 0.0931, -0.3706, -0.5846]) tensor([0.1452, 0.4001, 0.2516, 0.2032]) -Greedy action tensor([ 2.0522, -1.0819, 0.8680, 0.9989]) tensor([0.5888, 0.0256, 0.1802, 0.2054]) -Greedy action tensor([-0.0075, -0.5469, -0.6097, -0.2081]) tensor([0.3391, 0.1977, 0.1857, 0.2775]) -Greedy action tensor([-0.3970, -1.3050, -0.5162, -0.9115]) tensor([0.3462, 0.1396, 0.3073, 0.2069]) -Greedy action tensor([-0.3656, 0.2855, 1.3062, -0.8645]) tensor([0.1130, 0.2168, 0.6016, 0.0686]) -Greedy action tensor([ 0.1271, -0.5676, 0.3487, -0.3113]) tensor([0.2948, 0.1472, 0.3679, 0.1901]) -Greedy action tensor([ 0.5882, -1.2997, 1.9547, -0.0958]) tensor([0.1793, 0.0271, 0.7031, 0.0905]) -Greedy action tensor([ 1.1888, 0.0701, 0.2219, -0.0523]) tensor([0.5010, 0.1637, 0.1905, 0.1448]) -Greedy action tensor([ 1.6577, -0.0205, 0.6014, 0.2884]) tensor([0.5590, 0.1044, 0.1944, 0.1422]) -Greedy action tensor([-0.3146, -0.7611, -0.3265, 0.0030]) tensor([0.2499, 0.1599, 0.2469, 0.3433]) -Greedy action tensor([-1.3717, -0.4416, -0.9942, -0.3752]) tensor([0.1298, 0.3291, 0.1894, 0.3517]) -Greedy action tensor([-0.4339, -0.6198, -0.5365, -0.9681]) tensor([0.3013, 0.2502, 0.2719, 0.1766]) -Greedy action tensor([-0.9991, -0.5675, 0.1279, -1.6566]) tensor([0.1628, 0.2506, 0.5023, 0.0843]) -Greedy action tensor([ 0.3352, -1.2542, -0.4066, -0.0353]) tensor([0.4218, 0.0861, 0.2009, 0.2912]) -Greedy action tensor([-0.5042, -0.8897, -0.3600, -0.1025]) tensor([0.2310, 0.1571, 0.2668, 0.3452]) -Greedy action tensor([-0.2898, -0.7963, 0.3698, 0.4890]) tensor([0.1750, 0.1054, 0.3384, 0.3812]) -Greedy action tensor([-0.3231, -0.6325, -0.4476, 0.4213]) tensor([0.2118, 0.1554, 0.1870, 0.4458]) -Greedy action tensor([ 1.4963, -0.8178, 0.3998, -0.5878]) tensor([0.6421, 0.0635, 0.2145, 0.0799]) -Greedy action tensor([ 0.8825, -0.7017, 0.0832, -0.4473]) tensor([0.5210, 0.1069, 0.2343, 0.1378]) -Greedy action tensor([ 0.9552, -0.6645, 0.0378, -0.4722]) tensor([0.5442, 0.1077, 0.2175, 0.1306]) -Greedy action tensor([ 1.1035, -0.6341, 0.1485, -0.4003]) tensor([0.5608, 0.0987, 0.2158, 0.1247]) -Greedy action tensor([ 0.7726, -0.0578, -0.0392, 0.0611]) tensor([0.4218, 0.1839, 0.1873, 0.2071]) -Greedy action tensor([ 1.0175, -0.4641, -0.1159, -0.2145]) tensor([0.5432, 0.1235, 0.1749, 0.1585]) -Greedy action tensor([ 0.8775, -0.6351, 0.1078, -0.5924]) tensor([0.5226, 0.1151, 0.2421, 0.1202]) -Greedy action tensor([ 0.5075, -0.2359, -0.1254, -0.3492]) tensor([0.4113, 0.1956, 0.2184, 0.1746]) -Greedy action tensor([ 0.3597, -0.1089, -0.0908, 0.0123]) tensor([0.3367, 0.2108, 0.2146, 0.2379]) -Greedy action tensor([ 0.9162, -0.3897, -0.0752, -0.1386]) tensor([0.5024, 0.1361, 0.1864, 0.1750]) -Greedy action tensor([ 1.1274, -0.7445, -0.0077, -0.5801]) tensor([0.6037, 0.0929, 0.1940, 0.1095]) -Greedy action tensor([ 1.2650, -1.0586, 0.0869, -0.6663]) tensor([0.6448, 0.0631, 0.1985, 0.0935]) -Greedy action tensor([ 0.7958, -0.3799, 0.0424, -0.2081]) tensor([0.4660, 0.1438, 0.2194, 0.1708]) -Greedy action tensor([ 0.6040, -0.1122, -0.0174, -0.0224]) tensor([0.3906, 0.1908, 0.2098, 0.2088]) -Greedy action tensor([ 1.1621, -0.6837, -0.1660, -0.3591]) tensor([0.6093, 0.0962, 0.1614, 0.1331]) -Greedy action tensor([ 1.0134, -0.6575, -0.0324, -0.4220]) tensor([0.5626, 0.1058, 0.1977, 0.1339]) -Greedy action tensor([ 0.4976, -0.1011, 0.0567, -0.2004]) tensor([0.3716, 0.2043, 0.2392, 0.1849]) -Greedy action tensor([ 0.7879, 0.0140, -0.2445, -0.9265]) tensor([0.5006, 0.2309, 0.1783, 0.0902]) -Greedy action tensor([ 0.7640, -0.6331, 0.0647, -0.5920]) tensor([0.4995, 0.1235, 0.2482, 0.1287]) -Greedy action tensor([ 0.7909, -0.0844, 0.0483, -0.4330]) tensor([0.4573, 0.1906, 0.2176, 0.1345]) -Greedy action tensor([ 1.0145, -0.7027, 0.0142, -0.6266]) tensor([0.5743, 0.1031, 0.2112, 0.1113]) -Greedy action tensor([ 1.2501, -0.7713, 0.0089, -0.6238]) tensor([0.6349, 0.0841, 0.1835, 0.0975]) -Greedy action tensor([ 0.5891, -0.2810, -0.0816, -0.5513]) tensor([0.4444, 0.1862, 0.2273, 0.1421]) -Greedy action tensor([ 0.7490, -0.0787, -0.0688, -0.0017]) tensor([0.4255, 0.1859, 0.1878, 0.2008]) -Greedy action tensor([ 0.9257, -0.8293, -0.0553, -0.3951]) tensor([0.5510, 0.0953, 0.2066, 0.1471]) -Greedy action tensor([ 0.6108, -0.5081, 0.1135, -0.3292]) tensor([0.4300, 0.1405, 0.2615, 0.1680]) -Greedy action tensor([ 0.8684, -0.7053, 0.0075, -0.3285]) tensor([0.5175, 0.1073, 0.2188, 0.1564]) -Greedy action tensor([ 0.7918, -0.4949, -0.0704, -0.1973]) tensor([0.4830, 0.1334, 0.2039, 0.1796]) -Greedy action tensor([ 0.9650, -0.6115, -0.0530, -0.3244]) tensor([0.5425, 0.1121, 0.1960, 0.1494]) -Greedy action tensor([ 0.7996, -0.5891, 0.1911, -0.2424]) tensor([0.4659, 0.1162, 0.2535, 0.1643]) -Greedy action tensor([ 0.5925, -0.4448, -0.0015, -0.2459]) tensor([0.4275, 0.1515, 0.2360, 0.1849]) -Greedy action tensor([ 1.0941, -0.5404, -0.2847, -0.7739]) tensor([0.6245, 0.1218, 0.1573, 0.0964]) -Greedy action tensor([ 1.3238, -0.7546, 0.0080, -0.9247]) tensor([0.6671, 0.0835, 0.1790, 0.0704]) -Greedy action tensor([ 0.4265, -0.1615, -0.0684, -0.0337]) tensor([0.3576, 0.1986, 0.2180, 0.2257]) -Greedy action tensor([ 1.0772, -0.7235, -0.0456, -0.4966]) tensor([0.5890, 0.0973, 0.1916, 0.1221]) -Greedy action tensor([ 1.1054, -0.8594, 0.0830, -0.6995]) tensor([0.6008, 0.0842, 0.2161, 0.0988]) -Greedy action tensor([ 0.6530, 0.0626, -0.2034, -0.1380]) tensor([0.4112, 0.2278, 0.1746, 0.1864]) -Greedy action tensor([ 0.4460, 0.0116, 0.0675, -0.3491]) tensor([0.3592, 0.2326, 0.2460, 0.1622]) -Greedy action tensor([ 0.6625, -0.1068, -0.0656, -0.2861]) tensor([0.4286, 0.1986, 0.2069, 0.1660]) -Greedy action tensor([ 0.4093, -0.0776, -0.0054, -0.0886]) tensor([0.3469, 0.2132, 0.2291, 0.2108]) -Greedy action tensor([ 0.2926, -0.0094, -0.1511, -0.0392]) tensor([0.3227, 0.2386, 0.2071, 0.2316]) -Greedy action tensor([ 0.1340, -0.0409, -0.0626, -0.4211]) tensor([0.3091, 0.2595, 0.2539, 0.1774]) -Greedy action tensor([ 0.8384, -0.4649, 0.0630, -0.1483]) tensor([0.4751, 0.1290, 0.2188, 0.1771]) -Greedy action tensor([ 0.8368, -0.6250, 0.1675, -0.2713]) tensor([0.4821, 0.1118, 0.2469, 0.1592]) -Greedy action tensor([ 0.9770, -0.6177, -0.0369, -0.3351]) tensor([0.5449, 0.1106, 0.1977, 0.1467]) -Greedy action tensor([ 0.7254, -0.3865, 0.1296, -0.1794]) tensor([0.4377, 0.1440, 0.2412, 0.1771]) -Greedy action tensor([ 0.7659, -0.5606, -0.1011, -0.4407]) tensor([0.5038, 0.1337, 0.2117, 0.1508]) -Greedy action tensor([ 0.4656, -0.2579, -0.0854, -0.0414]) tensor([0.3754, 0.1821, 0.2164, 0.2261]) -Greedy action tensor([ 0.6777, -0.4803, 0.0542, -0.2692]) tensor([0.4468, 0.1403, 0.2395, 0.1733]) -Greedy action tensor([ 0.4388, -0.2243, 0.0068, -0.0713]) tensor([0.3617, 0.1864, 0.2348, 0.2172]) -Greedy action tensor([ 0.8084, -0.3872, -0.1903, -0.3476]) tensor([0.5036, 0.1524, 0.1855, 0.1585]) -Greedy action tensor([ 0.8503, -0.7591, -0.0902, -0.3526]) tensor([0.5289, 0.1058, 0.2065, 0.1588]) -Greedy action tensor([ 0.9996, -0.6691, -0.0371, -0.4442]) tensor([0.5621, 0.1060, 0.1993, 0.1327]) -Greedy action tensor([ 0.7173, -0.1270, 0.0743, -0.3315]) tensor([0.4337, 0.1864, 0.2280, 0.1519]) -Greedy action tensor([ 1.0482, -0.6023, -0.1062, -0.4076]) tensor([0.5746, 0.1103, 0.1811, 0.1340]) -Greedy action tensor([ 0.4491, -0.1252, -0.0092, -0.0371]) tensor([0.3558, 0.2004, 0.2250, 0.2188]) -Greedy action tensor([ 0.7225, -0.3248, -0.0063, -0.6090]) tensor([0.4768, 0.1673, 0.2300, 0.1259]) -Greedy action tensor([ 0.7006, -0.4268, 0.0181, -0.2582]) tensor([0.4520, 0.1464, 0.2284, 0.1733]) -Greedy action tensor([ 0.9944, -0.7985, 0.0708, -0.3711]) tensor([0.5498, 0.0915, 0.2183, 0.1403]) -Greedy action tensor([ 0.5257, 0.0246, -0.0816, -0.1765]) tensor([0.3779, 0.2290, 0.2059, 0.1873]) -Greedy action tensor([ 0.9562, -0.7820, 0.1109, -0.6481]) tensor([0.5536, 0.0973, 0.2378, 0.1113]) -Greedy action tensor([ 0.8361, -0.2899, 0.0639, -0.4545]) tensor([0.4851, 0.1573, 0.2241, 0.1335]) -Greedy action tensor([ 0.4545, 0.0028, -0.0306, -0.0196]) tensor([0.3479, 0.2214, 0.2142, 0.2165]) -Greedy action tensor([ 0.9335, -0.7491, 0.1454, -0.3602]) tensor([0.5222, 0.0971, 0.2375, 0.1432]) -Greedy action tensor([ 0.9440, -0.7250, 0.0053, -0.2112]) tensor([0.5278, 0.0995, 0.2065, 0.1663]) -Greedy action tensor([ 0.7241, -0.2446, 0.0982, -0.0928]) tensor([0.4244, 0.1611, 0.2270, 0.1875]) -Greedy action tensor([ 0.8676, -0.5453, 0.0207, -0.5809]) tensor([0.5244, 0.1276, 0.2248, 0.1232]) -Greedy action tensor([ 0.5862, -0.4276, -0.0022, -0.0719]) tensor([0.4105, 0.1489, 0.2279, 0.2126]) -Greedy action tensor([ 1.0455, -0.7711, -0.1009, -0.4085]) tensor([0.5834, 0.0949, 0.1854, 0.1363]) -Greedy action tensor([ 1.3772, -0.8074, -0.2387, -0.7073]) tensor([0.6966, 0.0784, 0.1384, 0.0866]) -Greedy action tensor([ 1.0203, -0.6249, -0.0359, -0.3114]) tensor([0.5541, 0.1069, 0.1927, 0.1463]) -Greedy action tensor([ 7.8397e-01, 6.3655e-04, -3.5144e-03, -6.4977e-01]) tensor([0.4651, 0.2125, 0.2116, 0.1109]) -Greedy action tensor([ 0.8554, -0.5915, -0.0722, -0.4313]) tensor([0.5244, 0.1234, 0.2074, 0.1448]) -Greedy action tensor([ 0.9792, -0.7483, 0.1149, -0.4671]) tensor([0.5451, 0.0969, 0.2297, 0.1283]) -Greedy action tensor([ 0.8960, -0.8077, -0.0727, -0.3648]) tensor([0.5420, 0.0987, 0.2057, 0.1536]) -Greedy action tensor([ 0.4779, 0.1451, -0.0319, -0.0690]) tensor([0.3453, 0.2475, 0.2074, 0.1998]) -Greedy action tensor([ 1.0587, -0.5412, -0.0600, -0.5515]) tensor([0.5785, 0.1168, 0.1890, 0.1156]) -Greedy action tensor([ 0.8708, -0.2293, 0.0828, -0.1753]) tensor([0.4675, 0.1556, 0.2126, 0.1643]) -Greedy action tensor([ 0.8004, -0.2125, 0.0072, -0.0831]) tensor([0.4487, 0.1629, 0.2030, 0.1854]) -Greedy action tensor([ 0.6383, -0.3538, -0.1397, -0.3222]) tensor([0.4519, 0.1676, 0.2076, 0.1729]) -Greedy action tensor([ 0.7289, -0.5492, -0.1679, -0.1370]) tensor([0.4746, 0.1322, 0.1936, 0.1996]) -Greedy action tensor([ 0.8273, -0.7134, 0.0055, -0.4257]) tensor([0.5156, 0.1104, 0.2267, 0.1473]) -Greedy action tensor([-1.7332, -0.4513, 0.5582, -0.0753]) tensor([0.0507, 0.1825, 0.5009, 0.2659]) -Greedy action tensor([-1.9387, -0.4385, 0.6629, -0.1768]) tensor([0.0403, 0.1808, 0.5439, 0.2349]) -Greedy action tensor([-1.9305, -0.4427, 0.6619, -0.1738]) tensor([0.0407, 0.1801, 0.5435, 0.2357]) -Greedy action tensor([-1.7942, -0.4535, 0.5968, -0.1021]) tensor([0.0472, 0.1805, 0.5159, 0.2564]) -Greedy action tensor([-1.9274, -0.4490, 0.6700, -0.1616]) tensor([0.0406, 0.1778, 0.5445, 0.2371]) -Greedy action tensor([-0.3547, 0.1339, 0.3486, 0.6745]) tensor([0.1342, 0.2188, 0.2712, 0.3757]) -Greedy action tensor([-1.8863, -0.3757, 0.6388, -0.1491]) tensor([0.0422, 0.1911, 0.5270, 0.2397]) -Greedy action tensor([-1.7090, 0.2792, 0.4438, -0.0289]) tensor([0.0449, 0.3278, 0.3864, 0.2409]) -Greedy action tensor([-1.7966, -0.2906, 0.6364, -0.3070]) tensor([0.0469, 0.2113, 0.5339, 0.2079]) -Greedy action tensor([-1.8788, -0.4377, 0.6232, -0.1419]) tensor([0.0433, 0.1828, 0.5282, 0.2457]) -Greedy action tensor([-1.3990, -0.0600, 0.4916, 0.1360]) tensor([0.0622, 0.2373, 0.4119, 0.2886]) -Greedy action tensor([ 0.8529, 1.2014, -0.0793, 0.3895]) tensor([0.2907, 0.4119, 0.1144, 0.1829]) -Greedy action tensor([-0.5197, -0.2868, 0.1560, 0.1183]) tensor([0.1634, 0.2062, 0.3211, 0.3092]) -Greedy action tensor([-1.8928, -0.4572, 0.6491, -0.1515]) tensor([0.0424, 0.1780, 0.5380, 0.2416]) -Greedy action tensor([-1.8278, -0.4548, 0.6286, -0.1148]) tensor([0.0451, 0.1781, 0.5264, 0.2503]) -Greedy action tensor([-1.8814, -0.3520, 0.6311, -0.1443]) tensor([0.0423, 0.1953, 0.5220, 0.2404]) -Greedy action tensor([-0.8194, 0.7803, 0.1208, -0.0271]) tensor([0.0933, 0.4619, 0.2388, 0.2060]) -Greedy action tensor([-1.2232, 0.7615, 0.2134, 0.0699]) tensor([0.0620, 0.4512, 0.2608, 0.2260]) -Greedy action tensor([-1.6996, -0.2479, 0.5194, -0.0777]) tensor([0.0512, 0.2186, 0.4709, 0.2592]) -Greedy action tensor([-1.8376, -0.1002, 0.5729, -0.1294]) tensor([0.0428, 0.2435, 0.4772, 0.2365]) -Greedy action tensor([-1.5516, -0.3098, 0.7092, 0.2508]) tensor([0.0497, 0.1721, 0.4768, 0.3014]) -Greedy action tensor([-1.8834, -0.4641, 0.6247, -0.1640]) tensor([0.0435, 0.1798, 0.5341, 0.2427]) -Greedy action tensor([-1.6365, -0.4192, 0.5679, 0.0444]) tensor([0.0532, 0.1796, 0.4818, 0.2855]) -Greedy action tensor([-1.8887, -0.3837, 0.6402, -0.1438]) tensor([0.0421, 0.1895, 0.5276, 0.2409]) -Greedy action tensor([-0.9944, 0.8845, 0.0682, 0.3703]) tensor([0.0697, 0.4560, 0.2016, 0.2727]) -Greedy action tensor([-1.5385, -0.5208, 0.9271, 0.2507]) tensor([0.0465, 0.1286, 0.5469, 0.2781]) -Greedy action tensor([-1.6737, 0.0050, 0.4721, -0.0818]) tensor([0.0505, 0.2704, 0.4313, 0.2479]) -Greedy action tensor([-1.8402, -0.4456, 0.6177, -0.1307]) tensor([0.0450, 0.1814, 0.5252, 0.2485]) -Greedy action tensor([-1.8414, -0.2603, 0.6008, -0.1392]) tensor([0.0438, 0.2128, 0.5033, 0.2401]) -Greedy action tensor([-1.9160, -0.4214, 0.6546, -0.1643]) tensor([0.0412, 0.1835, 0.5381, 0.2373]) -Greedy action tensor([-1.8506, -0.3518, 0.6031, -0.1304]) tensor([0.0441, 0.1973, 0.5125, 0.2461]) -Greedy action tensor([-1.9081, -0.3743, 0.6385, -0.1548]) tensor([0.0414, 0.1918, 0.5280, 0.2388]) -Greedy action tensor([-1.8683, -0.1532, 0.5961, -0.1398]) tensor([0.0418, 0.2321, 0.4909, 0.2352]) -Greedy action tensor([-1.2526, 0.5210, 0.1714, 0.1349]) tensor([0.0664, 0.3915, 0.2760, 0.2661]) -Greedy action tensor([-1.6346, 0.2540, 0.4851, -0.5720]) tensor([0.0531, 0.3510, 0.4423, 0.1537]) -Greedy action tensor([-1.8538, -0.3242, 0.6156, -0.1744]) tensor([0.0439, 0.2025, 0.5183, 0.2353]) -Greedy action tensor([-1.0275, -0.4158, 0.4591, 0.1572]) tensor([0.0949, 0.1750, 0.4197, 0.3104]) -Greedy action tensor([-1.4767, -0.4628, 0.6619, 0.5219]) tensor([0.0510, 0.1405, 0.4325, 0.3760]) -Greedy action tensor([-1.8694, -0.4547, 0.6367, -0.1375]) tensor([0.0434, 0.1787, 0.5324, 0.2455]) -Greedy action tensor([-1.6084, -0.3751, 0.4762, -0.0104]) tensor([0.0574, 0.1971, 0.4617, 0.2838]) -Greedy action tensor([-1.2031, 0.8377, 0.1864, 0.2306]) tensor([0.0592, 0.4553, 0.2374, 0.2481]) -Greedy action tensor([-1.9143, -0.3816, 0.6478, -0.1645]) tensor([0.0411, 0.1902, 0.5324, 0.2363]) -Greedy action tensor([-1.9288, -0.4437, 0.6613, -0.1718]) tensor([0.0407, 0.1799, 0.5432, 0.2361]) -Greedy action tensor([-1.9042, -0.4416, 0.6480, -0.1501]) tensor([0.0418, 0.1804, 0.5363, 0.2415]) -Greedy action tensor([-1.9059, -0.4388, 0.6515, -0.1624]) tensor([0.0417, 0.1810, 0.5386, 0.2387]) -Greedy action tensor([-1.8626, -0.4551, 0.6290, -0.1475]) tensor([0.0440, 0.1798, 0.5316, 0.2446]) -Greedy action tensor([-0.8935, 0.8840, 0.0729, 0.3525]) tensor([0.0768, 0.4543, 0.2019, 0.2670]) -Greedy action tensor([-1.4877, -0.2538, 0.6504, -0.5842]) tensor([0.0650, 0.2232, 0.5514, 0.1604]) -Greedy action tensor([-1.6497, -0.3976, 0.5482, -0.1330]) tensor([0.0554, 0.1937, 0.4987, 0.2523]) -Greedy action tensor([-1.9214, -0.4071, 0.6535, -0.1674]) tensor([0.0409, 0.1859, 0.5369, 0.2363]) -Greedy action tensor([-1.5010, -0.4252, 0.4382, 0.0720]) tensor([0.0637, 0.1867, 0.4427, 0.3069]) -Greedy action tensor([-1.2456, 0.5307, 0.1969, 0.0830]) tensor([0.0670, 0.3961, 0.2837, 0.2531]) -Greedy action tensor([-1.8748, -0.4403, 0.6398, -0.1424]) tensor([0.0431, 0.1808, 0.5325, 0.2436]) -Greedy action tensor([-1.5523, -0.5952, 0.5387, 0.1040]) tensor([0.0590, 0.1538, 0.4778, 0.3094]) -Greedy action tensor([-1.6992, -0.1366, 0.5535, -0.0330]) tensor([0.0486, 0.2319, 0.4623, 0.2572]) -Greedy action tensor([-1.9196, -0.3960, 0.6514, -0.1665]) tensor([0.0409, 0.1877, 0.5351, 0.2362]) -Greedy action tensor([-1.5426, 0.2025, 0.3816, -0.0554]) tensor([0.0555, 0.3181, 0.3805, 0.2458]) -Greedy action tensor([-1.1435, 0.8506, 0.1302, -0.6103]) tensor([0.0734, 0.5392, 0.2623, 0.1251]) -Greedy action tensor([-1.6791, -0.4626, 0.5467, -0.1186]) tensor([0.0544, 0.1835, 0.5034, 0.2588]) -Greedy action tensor([-1.8172, -0.4414, 0.6091, -0.1131]) tensor([0.0459, 0.1818, 0.5198, 0.2524]) -Greedy action tensor([-1.5170, -0.5576, 0.4741, -0.0554]) tensor([0.0656, 0.1712, 0.4803, 0.2829]) -Greedy action tensor([-0.8171, -0.6049, 0.2685, 0.4710]) tensor([0.1133, 0.1401, 0.3356, 0.4109]) -Greedy action tensor([-1.9470, -0.4505, 0.6675, -0.1818]) tensor([0.0400, 0.1789, 0.5471, 0.2340]) -Greedy action tensor([-1.6262, -0.3733, 0.4943, -0.0723]) tensor([0.0569, 0.1993, 0.4745, 0.2693]) -Greedy action tensor([-1.5349, -0.4342, 0.4666, 0.0310]) tensor([0.0618, 0.1857, 0.4570, 0.2956]) -Greedy action tensor([-1.9379, -0.4462, 0.6645, -0.1765]) tensor([0.0404, 0.1795, 0.5450, 0.2351]) -Greedy action tensor([-1.6550, -0.5590, 0.5133, -0.0341]) tensor([0.0562, 0.1682, 0.4914, 0.2842]) -Greedy action tensor([-0.6072, 0.3219, 0.1796, -0.0810]) tensor([0.1347, 0.3412, 0.2960, 0.2281]) -Greedy action tensor([-1.2902, 0.2220, 0.3137, -0.0595]) tensor([0.0718, 0.3256, 0.3569, 0.2457]) -Greedy action tensor([-0.9489, 0.3269, 0.1810, 0.0565]) tensor([0.0961, 0.3441, 0.2973, 0.2625]) -Greedy action tensor([-1.9199, -0.4373, 0.6540, -0.1685]) tensor([0.0412, 0.1814, 0.5401, 0.2373]) -Greedy action tensor([-1.5452, -0.4571, 0.4762, 0.0367]) tensor([0.0610, 0.1812, 0.4608, 0.2969]) -Greedy action tensor([-1.8845, -0.3957, 0.6413, -0.1298]) tensor([0.0422, 0.1869, 0.5271, 0.2438]) -Greedy action tensor([-1.6742, 0.1294, 0.4847, -0.1775]) tensor([0.0495, 0.3006, 0.4288, 0.2211]) -Greedy action tensor([-1.7449, -0.3023, 0.5488, -0.1141]) tensor([0.0494, 0.2090, 0.4894, 0.2522]) -Greedy action tensor([-0.9858, 0.4389, 0.5151, 0.5727]) tensor([0.0695, 0.2888, 0.3116, 0.3301]) -Greedy action tensor([-1.8226, -0.2893, 0.5849, -0.1325]) tensor([0.0451, 0.2091, 0.5012, 0.2446]) -Greedy action tensor([-1.2608, 0.7166, 0.1742, 0.1226]) tensor([0.0609, 0.4402, 0.2559, 0.2430]) -Greedy action tensor([-1.8769, -0.3836, 0.6393, -0.1427]) tensor([0.0426, 0.1895, 0.5269, 0.2411]) -Greedy action tensor([-1.9030, -0.4387, 0.6477, -0.1583]) tensor([0.0419, 0.1812, 0.5370, 0.2399]) -Greedy action tensor([-1.8758, -0.1124, 0.5818, -0.1392]) tensor([0.0413, 0.2411, 0.4828, 0.2348]) -Greedy action tensor([ 1.6434, -0.9119, -0.5152, 0.5803]) tensor([0.6500, 0.0505, 0.0751, 0.2245]) -Greedy action tensor([ 1.3921, -0.5125, -0.1911, 0.5193]) tensor([0.5643, 0.0840, 0.1159, 0.2358]) -Greedy action tensor([ 2.3558, -0.6731, -0.3290, 0.3808]) tensor([0.7966, 0.0385, 0.0544, 0.1105]) -Greedy action tensor([ 1.8975, -0.7839, -0.5017, 0.3954]) tensor([0.7236, 0.0495, 0.0657, 0.1611]) -Greedy action tensor([ 1.2033, -0.5364, -0.2284, -0.2714]) tensor([0.6085, 0.1068, 0.1454, 0.1393]) -Greedy action tensor([ 1.5632, -0.5042, -0.1820, 0.2055]) tensor([0.6417, 0.0812, 0.1120, 0.1651]) -Greedy action tensor([ 0.3141, -0.2491, 0.0246, 0.0738]) tensor([0.3221, 0.1834, 0.2411, 0.2533]) -Greedy action tensor([ 1.5955, -0.8931, -0.5350, 0.2926]) tensor([0.6786, 0.0563, 0.0806, 0.1844]) -Greedy action tensor([ 1.6922, 0.5733, -0.3534, 0.3897]) tensor([0.5788, 0.1890, 0.0748, 0.1573]) -Greedy action tensor([ 1.2842, -0.0895, -0.8007, 0.5017]) tensor([0.5450, 0.1380, 0.0678, 0.2492]) -Greedy action tensor([ 1.9212, -0.9396, -0.0406, 0.8120]) tensor([0.6546, 0.0375, 0.0920, 0.2159]) -Greedy action tensor([ 1.2651, -0.0609, -0.9799, 0.4093]) tensor([0.5567, 0.1478, 0.0590, 0.2366]) -Greedy action tensor([ 1.7291, -0.5435, -0.6673, 0.2988]) tensor([0.6977, 0.0719, 0.0635, 0.1669]) -Greedy action tensor([ 1.1683, -0.4197, -0.4445, 0.6200]) tensor([0.5047, 0.1031, 0.1006, 0.2916]) -Greedy action tensor([ 1.2012, -0.2291, -0.9540, 0.2247]) tensor([0.5775, 0.1381, 0.0669, 0.2175]) -Greedy action tensor([ 1.1975, -0.2331, -0.6671, 0.0030]) tensor([0.5893, 0.1409, 0.0913, 0.1785]) -Greedy action tensor([ 1.7498, -0.7627, -0.1826, 0.0724]) tensor([0.7078, 0.0574, 0.1025, 0.1323]) -Greedy action tensor([ 1.3597, -0.6757, -0.3144, 0.4168]) tensor([0.5856, 0.0765, 0.1098, 0.2281]) -Greedy action tensor([ 1.9073, -0.7543, -0.5461, 0.2095]) tensor([0.7469, 0.0522, 0.0642, 0.1367]) -Greedy action tensor([ 1.2146, -0.1105, -0.1871, -0.2361]) tensor([0.5726, 0.1522, 0.1410, 0.1342]) -Greedy action tensor([ 1.0149, -0.2850, -0.0074, 0.1631]) tensor([0.4857, 0.1324, 0.1747, 0.2072]) -Greedy action tensor([2.1051, 0.4563, 0.0189, 0.3133]) tensor([0.6743, 0.1296, 0.0837, 0.1124]) -Greedy action tensor([ 1.1610, -0.4479, -0.1249, 0.3713]) tensor([0.5180, 0.1037, 0.1432, 0.2352]) -Greedy action tensor([ 1.6367, -0.3994, -0.6294, 0.2492]) tensor([0.6739, 0.0880, 0.0699, 0.1683]) -Greedy action tensor([ 1.4325, -0.0510, -0.6288, 0.7098]) tensor([0.5436, 0.1233, 0.0692, 0.2639]) -Greedy action tensor([ 1.5502, -0.3023, -0.4271, 0.4702]) tensor([0.6117, 0.0959, 0.0847, 0.2077]) -Greedy action tensor([ 1.7995, -1.2197, -0.2390, -0.0714]) tensor([0.7502, 0.0366, 0.0977, 0.1155]) -Greedy action tensor([ 1.4195, -0.3035, -0.9918, 0.0985]) tensor([0.6514, 0.1163, 0.0584, 0.1738]) -Greedy action tensor([ 1.1309, -0.6354, -0.1477, -0.1731]) tensor([0.5811, 0.0993, 0.1618, 0.1577]) -Greedy action tensor([ 1.2098, -0.2223, -0.2246, 0.1710]) tensor([0.5462, 0.1304, 0.1301, 0.1933]) -Greedy action tensor([ 1.4061, -0.3474, -0.6572, 0.1255]) tensor([0.6337, 0.1097, 0.0805, 0.1761]) -Greedy action tensor([ 2.0205, -1.1223, -0.1681, 0.6569]) tensor([0.7087, 0.0306, 0.0794, 0.1813]) -Greedy action tensor([ 1.6305, -0.3184, -0.4668, -0.0404]) tensor([0.6881, 0.0980, 0.0845, 0.1294]) -Greedy action tensor([ 1.4323, -0.5586, -0.0197, 0.4409]) tensor([0.5741, 0.0784, 0.1344, 0.2130]) -Greedy action tensor([ 1.1165, -0.5989, -0.6367, 0.5188]) tensor([0.5255, 0.0945, 0.0910, 0.2890]) -Greedy action tensor([ 1.2385, -0.3548, 0.1582, 0.3627]) tensor([0.5104, 0.1037, 0.1733, 0.2126]) -Greedy action tensor([ 1.3911, -0.5600, -0.5104, -0.5103]) tensor([0.6940, 0.0986, 0.1037, 0.1037]) -Greedy action tensor([ 1.2761, -0.4365, -0.2479, 0.0573]) tensor([0.5904, 0.1065, 0.1286, 0.1745]) -Greedy action tensor([ 1.1664, -0.3970, -0.3271, 0.5643]) tensor([0.5046, 0.1057, 0.1133, 0.2764]) -Greedy action tensor([ 1.6650, -0.2745, -0.6106, 0.4689]) tensor([0.6456, 0.0928, 0.0663, 0.1952]) -Greedy action tensor([ 1.3814, 0.3509, -1.1629, 0.5650]) tensor([0.5327, 0.1901, 0.0418, 0.2354]) -Greedy action tensor([ 1.4244, -0.3848, -0.5044, 0.5810]) tensor([0.5749, 0.0942, 0.0835, 0.2474]) -Greedy action tensor([ 1.6202, -0.9175, 0.0275, 0.4000]) tensor([0.6339, 0.0501, 0.1289, 0.1871]) -Greedy action tensor([ 1.4235, 0.0498, -0.7626, 0.1864]) tensor([0.6040, 0.1529, 0.0679, 0.1753]) -Greedy action tensor([ 1.2133, -0.4005, -0.5257, 0.6225]) tensor([0.5185, 0.1032, 0.0911, 0.2872]) -Greedy action tensor([ 1.2269, -0.4251, -1.0463, 0.3300]) tensor([0.5874, 0.1126, 0.0605, 0.2396]) -Greedy action tensor([ 1.2809, 0.0799, -0.5075, 0.0419]) tensor([0.5689, 0.1712, 0.0951, 0.1648]) -Greedy action tensor([ 1.2120, -0.5417, -0.3363, 0.6485]) tensor([0.5115, 0.0886, 0.1088, 0.2912]) -Greedy action tensor([ 1.5127, -0.6127, -1.0004, 0.3772]) tensor([0.6572, 0.0785, 0.0532, 0.2111]) -Greedy action tensor([ 1.1038, -0.3419, -0.8485, 0.4174]) tensor([0.5316, 0.1253, 0.0755, 0.2676]) -Greedy action tensor([ 1.1700, -0.2992, -0.5010, 0.3949]) tensor([0.5322, 0.1225, 0.1001, 0.2452]) -Greedy action tensor([ 1.2900, -0.0875, -0.8632, 0.5850]) tensor([0.5369, 0.1354, 0.0623, 0.2653]) -Greedy action tensor([ 1.6259, -0.6636, -0.6915, 0.3299]) tensor([0.6787, 0.0688, 0.0669, 0.1857]) -Greedy action tensor([ 1.2481, -0.5181, -0.5807, 0.0221]) tensor([0.6154, 0.1052, 0.0988, 0.1806]) -Greedy action tensor([ 1.8389, 0.5510, -0.2150, 0.2417]) tensor([0.6224, 0.1717, 0.0798, 0.1260]) -Greedy action tensor([ 1.1322, -0.3117, -0.7449, 0.0781]) tensor([0.5755, 0.1358, 0.0881, 0.2006]) -Greedy action tensor([ 1.1120, -0.1571, -0.8038, 0.5867]) tensor([0.4951, 0.1392, 0.0729, 0.2928]) -Greedy action tensor([ 1.6980, -0.7438, -0.6281, 0.5691]) tensor([0.6631, 0.0577, 0.0648, 0.2144]) -Greedy action tensor([ 1.7239, -0.6761, 0.9919, -0.1712]) tensor([0.5807, 0.0527, 0.2793, 0.0873]) -Greedy action tensor([ 1.1200, -0.4180, -0.7535, 0.3332]) tensor([0.5483, 0.1178, 0.0842, 0.2497]) -Greedy action tensor([ 1.3536, -0.0848, -0.4123, 0.1197]) tensor([0.5884, 0.1396, 0.1006, 0.1713]) -Greedy action tensor([ 1.1959, -0.1068, -0.4574, 0.2366]) tensor([0.5416, 0.1472, 0.1037, 0.2075]) -Greedy action tensor([ 1.6792, -0.8878, -0.3701, 0.2773]) tensor([0.6888, 0.0529, 0.0887, 0.1695]) -Greedy action tensor([ 0.9733, -0.2452, -0.4256, 0.3425]) tensor([0.4820, 0.1425, 0.1190, 0.2565]) -Greedy action tensor([ 1.1588, -0.5672, -1.0123, 0.6485]) tensor([0.5284, 0.0941, 0.0603, 0.3172]) -Greedy action tensor([ 1.5894, 0.3220, -0.9723, 0.7276]) tensor([0.5614, 0.1581, 0.0433, 0.2372]) -Greedy action tensor([ 1.4233, 0.2912, -0.4162, 0.0348]) tensor([0.5778, 0.1863, 0.0918, 0.1441]) -Greedy action tensor([ 1.4956, -0.6924, -0.8064, 0.4413]) tensor([0.6408, 0.0719, 0.0641, 0.2233]) -Greedy action tensor([ 1.8535, -0.1904, -0.2338, 0.3666]) tensor([0.6758, 0.0875, 0.0838, 0.1528]) -Greedy action tensor([ 1.4993, -0.6220, -0.7108, 0.2682]) tensor([0.6572, 0.0788, 0.0721, 0.1919]) -Greedy action tensor([ 1.8774, -0.8039, -0.3761, 0.3980]) tensor([0.7136, 0.0489, 0.0750, 0.1625]) -Greedy action tensor([ 1.3814, -0.5045, -0.2778, 0.3943]) tensor([0.5832, 0.0885, 0.1110, 0.2173]) -Greedy action tensor([ 2.4285, -0.7367, -0.5892, 0.5424]) tensor([0.8047, 0.0340, 0.0394, 0.1220]) -Greedy action tensor([ 1.2252, -0.2372, -0.6046, 0.1621]) tensor([0.5755, 0.1333, 0.0923, 0.1988]) -Greedy action tensor([ 1.2141, -0.4607, -0.2164, 0.1523]) tensor([0.5642, 0.1057, 0.1350, 0.1951]) -Greedy action tensor([ 1.8874, -0.2802, -0.4794, 0.2819]) tensor([0.7097, 0.0812, 0.0666, 0.1425]) -Greedy action tensor([ 1.0591, -0.4050, -0.2899, 0.5226]) tensor([0.4818, 0.1114, 0.1250, 0.2817]) -Greedy action tensor([ 1.5925, -0.1612, -0.3592, 0.2437]) tensor([0.6350, 0.1099, 0.0902, 0.1648]) -Greedy action tensor([ 1.7221, -0.8329, -0.1024, -0.1960]) tensor([0.7216, 0.0561, 0.1164, 0.1060]) -Greedy action tensor([ 1.5530, -0.4195, -0.7182, 0.3961]) tensor([0.6424, 0.0894, 0.0663, 0.2020]) -Greedy action tensor([ 1.6655, -0.4371, -0.6353, 0.1993]) tensor([0.6882, 0.0841, 0.0689, 0.1588]) -Greedy action tensor([ 0.7421, -0.3986, -0.0935, -0.2482]) tensor([0.4707, 0.1504, 0.2041, 0.1748]) -Greedy action tensor([ 0.2810, -0.0254, -0.0637, -0.3126]) tensor([0.3337, 0.2456, 0.2364, 0.1843]) -Greedy action tensor([ 0.4882, -0.4271, -0.1010, -0.1582]) tensor([0.4034, 0.1615, 0.2238, 0.2113]) -Greedy action tensor([ 0.6932, -0.4037, -0.0787, -0.1551]) tensor([0.4496, 0.1501, 0.2078, 0.1925]) -Greedy action tensor([ 0.9441, -0.9515, 0.0947, -0.4446]) tensor([0.5473, 0.0822, 0.2340, 0.1365]) -Greedy action tensor([ 0.7293, -0.6206, -0.0526, -0.3294]) tensor([0.4846, 0.1256, 0.2217, 0.1681]) -Greedy action tensor([ 0.8971, -0.5756, -0.0769, -0.5688]) tensor([0.5441, 0.1248, 0.2055, 0.1256]) -Greedy action tensor([ 0.7582, -0.4394, 0.0240, -0.2562]) tensor([0.4663, 0.1408, 0.2238, 0.1691]) -Greedy action tensor([ 0.7796, -0.6825, 0.0723, -0.3288]) tensor([0.4867, 0.1128, 0.2399, 0.1606]) -Greedy action tensor([ 0.8968, -0.8193, -0.0486, -0.5157]) tensor([0.5519, 0.0992, 0.2144, 0.1344]) -Greedy action tensor([ 1.1081, -0.3195, 0.2067, -0.3867]) tensor([0.5347, 0.1283, 0.2171, 0.1199]) -Greedy action tensor([ 0.7476, -0.1492, -0.0460, -0.1768]) tensor([0.4431, 0.1807, 0.2004, 0.1758]) -Greedy action tensor([ 0.7705, -0.7266, -0.1340, -0.3009]) tensor([0.5073, 0.1135, 0.2053, 0.1738]) -Greedy action tensor([ 0.8382, -0.3150, -0.1075, -0.2987]) tensor([0.4939, 0.1559, 0.1918, 0.1584]) -Greedy action tensor([ 0.6688, -0.7562, 0.0844, -0.5727]) tensor([0.4792, 0.1153, 0.2671, 0.1385]) -Greedy action tensor([ 0.6390, -0.3701, -0.1511, -0.2945]) tensor([0.4522, 0.1648, 0.2052, 0.1778]) -Greedy action tensor([ 0.5380, -0.1545, 0.0720, 0.0087]) tensor([0.3681, 0.1842, 0.2310, 0.2168]) -Greedy action tensor([ 0.5162, -0.3901, 0.1146, -0.2843]) tensor([0.3965, 0.1602, 0.2653, 0.1780]) -Greedy action tensor([ 0.4818, 0.0908, -0.1231, -0.2273]) tensor([0.3684, 0.2492, 0.2012, 0.1813]) -Greedy action tensor([ 0.8290, -0.3215, -0.0303, -0.0551]) tensor([0.4645, 0.1470, 0.1967, 0.1919]) -Greedy action tensor([ 0.9618, -0.4999, -0.0641, -0.5040]) tensor([0.5491, 0.1273, 0.1968, 0.1268]) -Greedy action tensor([ 0.9668, -0.8759, 0.0031, -0.3789]) tensor([0.5555, 0.0880, 0.2119, 0.1446]) -Greedy action tensor([ 0.6783, 0.2201, -0.1190, 0.1087]) tensor([0.3775, 0.2388, 0.1701, 0.2136]) -Greedy action tensor([ 0.4959, -0.1185, -0.0353, -0.0951]) tensor([0.3728, 0.2017, 0.2192, 0.2064]) -Greedy action tensor([ 0.9481, -0.4947, -0.1690, -0.4176]) tensor([0.5498, 0.1299, 0.1799, 0.1403]) -Greedy action tensor([ 0.4939, -0.3087, -0.1175, -0.1396]) tensor([0.3966, 0.1777, 0.2152, 0.2105]) -Greedy action tensor([ 0.4512, -0.1825, 0.1387, -0.0884]) tensor([0.3515, 0.1865, 0.2571, 0.2049]) -Greedy action tensor([ 0.7582, -0.5845, 0.0177, -0.3596]) tensor([0.4843, 0.1265, 0.2309, 0.1583]) -Greedy action tensor([ 0.8066, 0.2938, -0.1389, -0.2298]) tensor([0.4270, 0.2557, 0.1659, 0.1515]) -Greedy action tensor([ 0.5979, -0.3936, 0.0453, -0.7507]) tensor([0.4533, 0.1682, 0.2609, 0.1177]) -Greedy action tensor([ 0.8444, -0.4253, 0.0067, -0.6224]) tensor([0.5143, 0.1445, 0.2226, 0.1186]) -Greedy action tensor([ 0.7354, -0.2975, 0.0356, -0.1480]) tensor([0.4413, 0.1571, 0.2192, 0.1824]) -Greedy action tensor([ 0.8998, -0.4062, -0.1002, -0.3179]) tensor([0.5169, 0.1400, 0.1901, 0.1529]) -Greedy action tensor([ 0.4333, -0.1925, -0.0114, -0.0997]) tensor([0.3620, 0.1936, 0.2320, 0.2124]) -Greedy action tensor([ 0.7121, -0.5850, -0.1402, -0.1589]) tensor([0.4721, 0.1290, 0.2013, 0.1976]) -Greedy action tensor([ 0.6016, -0.4537, -0.1067, -0.1288]) tensor([0.4306, 0.1499, 0.2121, 0.2074]) -Greedy action tensor([ 0.8289, -0.5947, -0.0240, -0.4771]) tensor([0.5160, 0.1243, 0.2199, 0.1398]) -Greedy action tensor([ 0.6959, -0.5986, -0.0398, -0.3503]) tensor([0.4752, 0.1302, 0.2277, 0.1669]) -Greedy action tensor([ 0.8707, -0.7354, -0.0262, -0.3353]) tensor([0.5241, 0.1052, 0.2138, 0.1569]) -Greedy action tensor([ 0.6079, -0.2962, -0.0674, -0.2061]) tensor([0.4243, 0.1718, 0.2160, 0.1880]) -Greedy action tensor([ 1.4152, -1.0397, 0.0829, -0.9187]) tensor([0.6912, 0.0594, 0.1824, 0.0670]) -Greedy action tensor([ 0.9352, -0.8328, 0.1328, -0.3584]) tensor([0.5282, 0.0902, 0.2368, 0.1449]) -Greedy action tensor([ 0.7010, -0.3564, 0.0803, -0.1382]) tensor([0.4316, 0.1499, 0.2320, 0.1865]) -Greedy action tensor([ 0.8004, -0.1883, 0.0179, -0.2753]) tensor([0.4607, 0.1714, 0.2107, 0.1572]) -Greedy action tensor([ 0.8221, -0.8085, 0.1466, -0.3203]) tensor([0.4941, 0.0967, 0.2515, 0.1577]) -Greedy action tensor([ 0.6083, -0.5020, -0.1328, -0.3013]) tensor([0.4527, 0.1492, 0.2158, 0.1823]) -Greedy action tensor([ 1.0029, -0.4824, -0.1130, -0.2504]) tensor([0.5436, 0.1231, 0.1781, 0.1552]) -Greedy action tensor([ 0.4764, -0.0414, 0.0123, -0.2114]) tensor([0.3667, 0.2185, 0.2305, 0.1843]) -Greedy action tensor([ 0.8357, -0.4154, -0.0913, -0.1633]) tensor([0.4878, 0.1396, 0.1930, 0.1796]) -Greedy action tensor([ 0.6310, -0.3001, -0.0379, -0.0892]) tensor([0.4179, 0.1647, 0.2141, 0.2034]) -Greedy action tensor([ 0.6275, -0.3127, 0.0212, -0.1424]) tensor([0.4168, 0.1628, 0.2273, 0.1930]) -Greedy action tensor([ 0.6536, -0.2150, -0.0777, -0.0978]) tensor([0.4215, 0.1768, 0.2029, 0.1988]) -Greedy action tensor([ 0.8607, -0.9303, 0.1234, -0.4496]) tensor([0.5222, 0.0871, 0.2498, 0.1409]) -Greedy action tensor([ 0.6645, -0.3239, -0.0931, -0.2000]) tensor([0.4421, 0.1645, 0.2072, 0.1862]) -Greedy action tensor([ 0.4121, 0.2096, -0.0794, 0.0969]) tensor([0.3167, 0.2586, 0.1937, 0.2310]) -Greedy action tensor([ 0.9837, -0.6597, 0.1041, -0.5323]) tensor([0.5471, 0.1058, 0.2270, 0.1201]) -Greedy action tensor([ 1.1528, -0.5035, -0.0986, -0.2097]) tensor([0.5771, 0.1101, 0.1651, 0.1477]) -Greedy action tensor([ 0.4814, -0.0759, 0.0137, -0.0222]) tensor([0.3567, 0.2043, 0.2234, 0.2156]) -Greedy action tensor([ 0.4315, -0.2616, -0.0176, -0.1838]) tensor([0.3733, 0.1867, 0.2382, 0.2018]) -Greedy action tensor([ 0.7180, -0.4598, -0.0289, -0.1828]) tensor([0.4570, 0.1407, 0.2166, 0.1857]) -Greedy action tensor([ 0.7749, -0.0641, 0.0944, -0.2251]) tensor([0.4336, 0.1874, 0.2195, 0.1595]) -Greedy action tensor([ 1.1055, -0.8661, -0.0416, -0.7041]) tensor([0.6171, 0.0859, 0.1960, 0.1010]) -Greedy action tensor([ 1.2097, -0.8261, 0.0104, -0.3514]) tensor([0.6091, 0.0795, 0.1836, 0.1278]) -Greedy action tensor([ 0.2867, 0.0495, -0.1410, -0.2027]) tensor([0.3275, 0.2583, 0.2135, 0.2007]) -Greedy action tensor([ 0.5657, -0.0126, -0.1848, -0.4306]) tensor([0.4163, 0.2335, 0.1965, 0.1537]) -Greedy action tensor([ 0.6788, -0.0125, -0.0404, 0.0769]) tensor([0.3943, 0.1975, 0.1921, 0.2160]) -Greedy action tensor([ 0.8290, -0.4832, -0.0166, -0.2234]) tensor([0.4884, 0.1315, 0.2097, 0.1705]) -Greedy action tensor([ 0.4187, -0.1460, -0.0012, 0.0110]) tensor([0.3459, 0.1967, 0.2273, 0.2301]) -Greedy action tensor([ 0.4608, -0.0333, -0.0021, -0.1891]) tensor([0.3621, 0.2209, 0.2279, 0.1890]) -Greedy action tensor([ 0.7792, -0.4091, -0.0236, -0.0816]) tensor([0.4596, 0.1401, 0.2060, 0.1943]) -Greedy action tensor([ 1.1722, -0.7268, -0.0099, -0.4384]) tensor([0.6038, 0.0904, 0.1851, 0.1206]) -Greedy action tensor([ 0.8189, 0.0439, -0.0312, 0.0249]) tensor([0.4273, 0.1969, 0.1826, 0.1932]) -Greedy action tensor([ 0.7746, -0.5454, 0.1701, -0.2237]) tensor([0.4583, 0.1224, 0.2504, 0.1689]) -Greedy action tensor([ 0.7371, -0.5450, -0.1158, -0.1878]) tensor([0.4762, 0.1321, 0.2029, 0.1888]) -Greedy action tensor([ 0.7899, -0.5016, -0.1378, -0.3435]) tensor([0.5019, 0.1380, 0.1985, 0.1616]) -Greedy action tensor([ 0.9152, -0.4709, -0.1667, -0.4498]) tensor([0.5422, 0.1356, 0.1838, 0.1385]) -Greedy action tensor([ 1.1479, -0.5022, -0.0747, -0.6832]) tensor([0.6073, 0.1166, 0.1788, 0.0973]) -Greedy action tensor([ 0.4439, 0.2693, -0.2003, 0.0747]) tensor([0.3272, 0.2748, 0.1718, 0.2262]) -Greedy action tensor([ 0.5543, -0.1237, 0.2187, -0.4843]) tensor([0.3881, 0.1970, 0.2775, 0.1374]) -Greedy action tensor([ 1.0426, -0.6588, -0.1031, -0.3369]) tensor([0.5707, 0.1041, 0.1815, 0.1437]) -Greedy action tensor([ 0.9441, 0.0312, 0.0394, -0.2461]) tensor([0.4739, 0.1902, 0.1918, 0.1441]) -Greedy action tensor([-0.1523, -0.4306, 0.3698, 0.1106]) tensor([0.2108, 0.1596, 0.3554, 0.2742]) -Greedy action tensor([ 1.2368, -0.2823, -0.0299, 0.3930]) tensor([0.5179, 0.1134, 0.1459, 0.2228]) -Greedy action tensor([-0.7427, -0.1747, 0.7081, 0.9413]) tensor([0.0805, 0.1421, 0.3436, 0.4338]) -Greedy action tensor([ 0.1518, -1.2596, -0.1709, -0.6699]) tensor([0.4153, 0.1013, 0.3008, 0.1826]) -Greedy action tensor([ 0.3763, -0.3700, -0.6769, 0.1510]) tensor([0.3815, 0.1809, 0.1331, 0.3045]) -Greedy action tensor([ 0.3726, 0.2543, -0.4925, -0.0484]) tensor([0.3372, 0.2996, 0.1420, 0.2213]) -Greedy action tensor([-0.6501, -0.4334, -0.1592, -0.9301]) tensor([0.2159, 0.2682, 0.3527, 0.1632]) -Greedy action tensor([-0.1246, -1.1942, 0.6216, -1.1184]) tensor([0.2616, 0.0898, 0.5518, 0.0968]) -Greedy action tensor([ 0.5607, -1.0876, 0.0481, -0.9167]) tensor([0.4952, 0.0953, 0.2966, 0.1130]) -Greedy action tensor([-0.7944, -0.2736, 0.6022, -1.1611]) tensor([0.1348, 0.2269, 0.5448, 0.0934]) -Greedy action tensor([-0.2441, -0.7263, -0.1097, -0.2031]) tensor([0.2629, 0.1623, 0.3008, 0.2739]) -Greedy action tensor([-0.8795, -0.5301, 0.0963, -0.8590]) tensor([0.1641, 0.2328, 0.4355, 0.1675]) -Greedy action tensor([-0.1917, -1.1314, -0.4872, -0.1302]) tensor([0.3127, 0.1222, 0.2327, 0.3325]) -Greedy action tensor([ 1.6508, -0.4737, 1.1750, 0.0785]) tensor([0.5132, 0.0613, 0.3189, 0.1065]) -Greedy action tensor([ 0.3215, -0.3463, 1.1270, -0.8078]) tensor([0.2455, 0.1259, 0.5493, 0.0794]) -Greedy action tensor([ 0.9030, -0.1514, -0.6630, 0.3029]) tensor([0.4748, 0.1654, 0.0992, 0.2606]) -Greedy action tensor([ 1.2244, 0.8451, 0.0928, -0.9798]) tensor([0.4723, 0.3232, 0.1523, 0.0521]) -Greedy action tensor([ 0.2415, 0.4606, -0.3475, -1.0384]) tensor([0.3249, 0.4045, 0.1803, 0.0903]) -Greedy action tensor([-0.8538, -1.3446, 0.3126, -0.8413]) tensor([0.1714, 0.1049, 0.5502, 0.1735]) -Greedy action tensor([-1.1263, -1.2491, -0.6816, -0.4608]) tensor([0.1855, 0.1641, 0.2894, 0.3609]) -Greedy action tensor([ 0.8645, -0.7771, 0.5152, -0.9964]) tensor([0.4868, 0.0943, 0.3433, 0.0757]) -Greedy action tensor([-0.6684, -0.6154, -0.2427, -1.2121]) tensor([0.2401, 0.2531, 0.3675, 0.1394]) -Greedy action tensor([-0.5973, -0.3049, 1.0669, -1.0177]) tensor([0.1208, 0.1618, 0.6380, 0.0793]) -Greedy action tensor([ 0.8335, -0.1551, 0.2807, 0.0471]) tensor([0.4162, 0.1549, 0.2394, 0.1896]) -Greedy action tensor([ 0.7103, 0.0742, -0.0078, 0.1977]) tensor([0.3823, 0.2024, 0.1864, 0.2290]) -Greedy action tensor([ 0.7685, 0.2825, 1.0626, -0.9414]) tensor([0.3187, 0.1960, 0.4277, 0.0576]) -Greedy action tensor([ 0.9250, -1.2415, -0.1584, -0.4003]) tensor([0.5818, 0.0667, 0.1969, 0.1546]) -Greedy action tensor([ 0.5053, -0.6172, 0.4186, 0.9064]) tensor([0.2677, 0.0871, 0.2455, 0.3998]) -Greedy action tensor([ 0.4712, -1.1721, 0.3687, 0.1465]) tensor([0.3548, 0.0686, 0.3202, 0.2564]) -Greedy action tensor([-1.0903, -1.0845, 0.9262, -1.1888]) tensor([0.0959, 0.0965, 0.7207, 0.0869]) -Greedy action tensor([-1.1307, -0.2330, -0.9457, -0.2241]) tensor([0.1402, 0.3440, 0.1687, 0.3471]) -Greedy action tensor([-0.1426, -0.4200, 0.6195, -1.1166]) tensor([0.2337, 0.1771, 0.5009, 0.0883]) -Greedy action tensor([-1.4290, -1.0920, -0.4353, 0.3774]) tensor([0.0894, 0.1252, 0.2414, 0.5441]) -Greedy action tensor([-0.2488, 0.1418, -0.6589, 0.2583]) tensor([0.2083, 0.3078, 0.1382, 0.3458]) -Greedy action tensor([ 0.7782, -0.0648, -0.3143, 0.2938]) tensor([0.4198, 0.1807, 0.1408, 0.2586]) -Greedy action tensor([-0.3729, -1.2931, 0.7471, -0.2503]) tensor([0.1788, 0.0712, 0.5479, 0.2021]) -Greedy action tensor([ 0.0972, -0.0147, 0.2740, -0.4169]) tensor([0.2713, 0.2426, 0.3238, 0.1623]) -Greedy action tensor([-0.0831, -0.8135, 0.0298, -0.7670]) tensor([0.3220, 0.1551, 0.3605, 0.1625]) -Greedy action tensor([-0.2866, -0.0489, 0.2455, -1.1529]) tensor([0.2277, 0.2888, 0.3877, 0.0958]) -Greedy action tensor([-0.1387, -0.9063, 0.6470, -0.9234]) tensor([0.2431, 0.1128, 0.5332, 0.1109]) -Greedy action tensor([-0.1883, -1.5108, -0.8296, 0.2564]) tensor([0.2982, 0.0795, 0.1571, 0.4653]) -Greedy action tensor([-0.7284, -1.2611, -0.7166, -0.0377]) tensor([0.2177, 0.1278, 0.2203, 0.4343]) -Greedy action tensor([ 0.8743, -0.0875, -0.2755, -0.4475]) tensor([0.5088, 0.1944, 0.1611, 0.1357]) -Greedy action tensor([-0.1635, 0.0688, 0.1158, -0.3475]) tensor([0.2265, 0.2857, 0.2994, 0.1884]) -Greedy action tensor([ 0.4166, -0.5995, 0.5455, -0.2840]) tensor([0.3338, 0.1208, 0.3797, 0.1657]) -Greedy action tensor([ 0.5162, -1.0615, -0.4126, -0.4599]) tensor([0.5055, 0.1044, 0.1997, 0.1905]) -Greedy action tensor([ 0.5938, -1.3622, 0.7628, -0.7230]) tensor([0.3856, 0.0545, 0.4566, 0.1033]) -Greedy action tensor([ 0.6140, -1.1174, -0.1226, 0.9889]) tensor([0.3215, 0.0569, 0.1539, 0.4677]) -Greedy action tensor([ 0.4986, -1.5633, -0.2270, 0.5491]) tensor([0.3755, 0.0478, 0.1817, 0.3950]) -Greedy action tensor([-0.3365, -1.4648, -0.1127, -0.2095]) tensor([0.2695, 0.0872, 0.3372, 0.3061]) -Greedy action tensor([-0.1703, 0.4788, -0.3827, -0.3373]) tensor([0.2189, 0.4189, 0.1770, 0.1852]) -Greedy action tensor([-0.2962, -0.5576, 0.1583, -1.5426]) tensor([0.2753, 0.2119, 0.4336, 0.0791]) -Greedy action tensor([ 0.3829, -0.1729, 0.9865, -0.0881]) tensor([0.2483, 0.1425, 0.4541, 0.1551]) -Greedy action tensor([-0.0361, -1.0376, -0.3857, -0.1399]) tensor([0.3363, 0.1235, 0.2371, 0.3031]) -Greedy action tensor([-0.0165, -1.1527, -0.1175, 0.2798]) tensor([0.2801, 0.0899, 0.2532, 0.3767]) -Greedy action tensor([ 0.2248, -1.1623, 0.9295, 0.3911]) tensor([0.2245, 0.0561, 0.4542, 0.2651]) -Greedy action tensor([-0.9592, 0.4928, -0.5438, -0.7087]) tensor([0.1239, 0.5292, 0.1877, 0.1592]) -Greedy action tensor([ 1.1831, -0.5230, 1.5955, -0.1477]) tensor([0.3383, 0.0614, 0.5109, 0.0894]) -Greedy action tensor([-0.0772, -0.8737, -0.2907, -0.1127]) tensor([0.3102, 0.1399, 0.2506, 0.2994]) -Greedy action tensor([ 0.2717, -0.2413, -0.0234, -0.0430]) tensor([0.3254, 0.1948, 0.2422, 0.2376]) -Greedy action tensor([-0.4650, -1.4171, 0.8247, -0.8654]) tensor([0.1758, 0.0679, 0.6385, 0.1178]) -Greedy action tensor([ 1.2414, -0.7692, 0.1011, 0.2834]) tensor([0.5443, 0.0729, 0.1740, 0.2088]) -Greedy action tensor([-1.7220, 0.1726, 0.0894, -1.3419]) tensor([0.0657, 0.4366, 0.4017, 0.0960]) -Greedy action tensor([-1.0035, -1.1180, 0.8789, -0.8094]) tensor([0.1034, 0.0922, 0.6790, 0.1255]) -Greedy action tensor([ 0.1932, -0.8473, 0.1554, 0.0285]) tensor([0.3160, 0.1116, 0.3043, 0.2680]) -Greedy action tensor([-0.2852, -1.1772, -1.0348, -1.1757]) tensor([0.4361, 0.1788, 0.2061, 0.1790]) -Greedy action tensor([-0.4728, -0.7509, -0.0395, 0.2429]) tensor([0.1871, 0.1417, 0.2885, 0.3827]) -Greedy action tensor([-1.4104, -0.0736, 1.6778, -1.1365]) tensor([0.0356, 0.1357, 0.7818, 0.0469]) -Greedy action tensor([ 1.6371, -0.0925, -0.2721, 0.2497]) tensor([0.6348, 0.1126, 0.0941, 0.1585]) -Greedy action tensor([-1.4036, -1.5456, 0.6678, -0.3673]) tensor([0.0792, 0.0687, 0.6287, 0.2233]) -Greedy action tensor([-0.8095, -0.0158, -0.5735, 0.2182]) tensor([0.1375, 0.3041, 0.1741, 0.3843]) -Greedy action tensor([ 0.2236, -0.6952, -0.0961, -0.0377]) tensor([0.3454, 0.1378, 0.2509, 0.2660]) -Greedy action tensor([-0.6108, -0.2869, 0.2663, -0.6311]) tensor([0.1734, 0.2397, 0.4169, 0.1699]) -Greedy action tensor([-0.1928, -0.5839, -0.3814, -0.3199]) tensor([0.2954, 0.1998, 0.2446, 0.2601]) -Greedy action tensor([-0.5896, -0.7344, -0.8565, -0.9598]) tensor([0.3011, 0.2605, 0.2305, 0.2079]) -Greedy action tensor([-1.0766, -0.6166, -1.3910, 0.2610]) tensor([0.1404, 0.2224, 0.1025, 0.5348]) -Greedy action tensor([ 0.1916, -0.5881, 0.9891, -1.4001]) tensor([0.2576, 0.1181, 0.5718, 0.0524]) -Greedy action tensor([-1.3500, 0.9591, -0.1097, -0.6601]) tensor([0.0605, 0.6094, 0.2093, 0.1207]) -Greedy action tensor([ 0.3845, -1.3893, 1.0246, 0.7391]) tensor([0.2226, 0.0378, 0.4222, 0.3174]) -Greedy action tensor([-0.3421, -0.5998, 0.2948, -0.5842]) tensor([0.2248, 0.1737, 0.4250, 0.1765]) -Greedy action tensor([-0.1463, -1.1460, 0.0036, -0.6615]) tensor([0.3198, 0.1177, 0.3715, 0.1910]) -Greedy action tensor([-1.4596, -0.5729, 0.5413, -0.0743]) tensor([0.0675, 0.1638, 0.4991, 0.2697]) -Greedy action tensor([-1.9294, -0.4468, 0.6585, -0.1736]) tensor([0.0408, 0.1798, 0.5430, 0.2363]) -Greedy action tensor([-1.6346, -0.3283, 0.5090, -0.0931]) tensor([0.0559, 0.2063, 0.4767, 0.2611]) -Greedy action tensor([-1.7705, -0.3948, 0.5743, -0.0853]) tensor([0.0481, 0.1904, 0.5019, 0.2595]) -Greedy action tensor([-1.6635, -0.0320, 0.4536, -0.0536]) tensor([0.0515, 0.2632, 0.4278, 0.2576]) -Greedy action tensor([-1.3676, 0.5508, 0.2546, 0.0090]) tensor([0.0594, 0.4045, 0.3008, 0.2353]) -Greedy action tensor([-0.8315, 0.9498, 0.0980, 0.2333]) tensor([0.0808, 0.4799, 0.2048, 0.2344]) -Greedy action tensor([-1.8950, -0.4686, 0.6476, -0.1544]) tensor([0.0424, 0.1766, 0.5392, 0.2418]) -Greedy action tensor([-1.9264, -0.2688, 0.6313, -0.1717]) tensor([0.0401, 0.2104, 0.5176, 0.2319]) -Greedy action tensor([-1.4436, -0.1949, 0.4437, -0.1115]) tensor([0.0672, 0.2343, 0.4437, 0.2547]) -Greedy action tensor([-0.9616, 0.4348, 0.2748, -0.0919]) tensor([0.0920, 0.3717, 0.3168, 0.2195]) -Greedy action tensor([-1.0832, 0.7774, 0.1753, 0.3804]) tensor([0.0655, 0.4209, 0.2305, 0.2830]) -Greedy action tensor([-1.8115, -0.4355, 0.6053, -0.1073]) tensor([0.0462, 0.1827, 0.5174, 0.2537]) -Greedy action tensor([-1.9432, -0.4464, 0.6660, -0.1794]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-1.8024, -0.4776, 0.5753, -0.1082]) tensor([0.0477, 0.1793, 0.5137, 0.2594]) -Greedy action tensor([-1.9414, -0.4447, 0.6648, -0.1785]) tensor([0.0403, 0.1798, 0.5453, 0.2346]) -Greedy action tensor([-1.8783, -0.3340, 0.6303, -0.1418]) tensor([0.0423, 0.1981, 0.5196, 0.2401]) -Greedy action tensor([-1.8622, -0.4161, 0.6263, -0.1248]) tensor([0.0435, 0.1849, 0.5243, 0.2474]) -Greedy action tensor([-1.8488, -0.2589, 0.5936, -0.1193]) tensor([0.0434, 0.2128, 0.4991, 0.2447]) -Greedy action tensor([-1.9255, -0.4547, 0.6611, -0.1721]) tensor([0.0410, 0.1783, 0.5442, 0.2365]) -Greedy action tensor([-1.3428, 0.2104, -0.2321, -0.2625]) tensor([0.0854, 0.4037, 0.2593, 0.2516]) -Greedy action tensor([-1.7326, -0.4298, 0.5408, -0.0666]) tensor([0.0508, 0.1869, 0.4934, 0.2688]) -Greedy action tensor([-1.5508, 0.0164, 0.6343, -0.5804]) tensor([0.0577, 0.2767, 0.5133, 0.1523]) -Greedy action tensor([-1.7939, -0.4655, 0.5218, -0.1372]) tensor([0.0496, 0.1873, 0.5029, 0.2602]) -Greedy action tensor([-1.6633, 0.2402, 0.4227, 0.0216]) tensor([0.0473, 0.3172, 0.3807, 0.2549]) -Greedy action tensor([-1.8361, -0.3417, 0.5959, -0.1493]) tensor([0.0450, 0.2004, 0.5117, 0.2429]) -Greedy action tensor([-1.9081, -0.4370, 0.6461, -0.1646]) tensor([0.0418, 0.1819, 0.5374, 0.2389]) -Greedy action tensor([-1.9197, -0.4313, 0.6527, -0.1690]) tensor([0.0412, 0.1824, 0.5393, 0.2371]) -Greedy action tensor([-1.7515, -0.2630, 0.5732, -0.1921]) tensor([0.0490, 0.2171, 0.5009, 0.2330]) -Greedy action tensor([-1.8225, -0.2848, 0.5891, -0.1236]) tensor([0.0449, 0.2090, 0.5007, 0.2455]) -Greedy action tensor([-1.9074, -0.3560, 0.6399, -0.1613]) tensor([0.0413, 0.1948, 0.5273, 0.2366]) -Greedy action tensor([-1.5903, -0.3806, 0.6220, -0.0324]) tensor([0.0548, 0.1838, 0.5010, 0.2604]) -Greedy action tensor([-1.8921, -0.2459, 0.6096, -0.1358]) tensor([0.0414, 0.2145, 0.5046, 0.2395]) -Greedy action tensor([-1.8997, -0.4281, 0.6469, -0.1593]) tensor([0.0420, 0.1829, 0.5358, 0.2393]) -Greedy action tensor([-1.8458, -0.2593, 0.5936, -0.1231]) tensor([0.0436, 0.2129, 0.4996, 0.2440]) -Greedy action tensor([-0.5687, -0.2936, 0.1919, -0.0516]) tensor([0.1630, 0.2147, 0.3488, 0.2735]) -Greedy action tensor([-1.8943, -0.4514, 0.6444, -0.1583]) tensor([0.0424, 0.1796, 0.5372, 0.2407]) -Greedy action tensor([-1.9205, -0.4168, 0.6549, -0.1644]) tensor([0.0409, 0.1842, 0.5378, 0.2370]) -Greedy action tensor([-1.5343, -0.5303, 0.4948, -0.1191]) tensor([0.0647, 0.1766, 0.4922, 0.2664]) -Greedy action tensor([-1.2042, -0.5845, 0.3441, 0.3599]) tensor([0.0810, 0.1506, 0.3811, 0.3872]) -Greedy action tensor([-1.2020, 0.3747, 0.4730, 0.2719]) tensor([0.0643, 0.3113, 0.3434, 0.2809]) -Greedy action tensor([-1.6337, 0.3537, 0.2845, -0.2886]) tensor([0.0528, 0.3852, 0.3594, 0.2026]) -Greedy action tensor([-1.8717, -0.4391, 0.6463, -0.0941]) tensor([0.0425, 0.1782, 0.5276, 0.2516]) -Greedy action tensor([-1.8207, -0.4893, 0.6038, -0.1302]) tensor([0.0465, 0.1761, 0.5253, 0.2521]) -Greedy action tensor([-1.5553, 0.4101, 0.3399, 0.0768]) tensor([0.0502, 0.3586, 0.3343, 0.2569]) -Greedy action tensor([-1.8947, -0.3285, 0.6323, -0.1522]) tensor([0.0416, 0.1994, 0.5211, 0.2378]) -Greedy action tensor([-1.8307, -0.4668, 0.6162, -0.1253]) tensor([0.0455, 0.1780, 0.5259, 0.2505]) -Greedy action tensor([-1.8407, -0.3962, 0.6071, -0.1419]) tensor([0.0449, 0.1904, 0.5192, 0.2455]) -Greedy action tensor([-1.8193, -0.4886, 0.5951, -0.1134]) tensor([0.0466, 0.1762, 0.5208, 0.2564]) -Greedy action tensor([-1.2533, -0.6153, 0.3132, 0.2025]) tensor([0.0835, 0.1581, 0.4001, 0.3582]) -Greedy action tensor([-1.7755, -0.3871, 0.6231, -0.2644]) tensor([0.0487, 0.1951, 0.5357, 0.2205]) -Greedy action tensor([-1.8249, -0.3608, 0.6140, -0.1262]) tensor([0.0449, 0.1943, 0.5150, 0.2457]) -Greedy action tensor([-1.8886, -0.4300, 0.6359, -0.1514]) tensor([0.0426, 0.1832, 0.5320, 0.2421]) -Greedy action tensor([-1.3820, 0.7851, 0.4895, -0.5451]) tensor([0.0539, 0.4710, 0.3505, 0.1245]) -Greedy action tensor([-1.6356, -0.2807, 0.4897, -0.0591]) tensor([0.0553, 0.2143, 0.4630, 0.2675]) -Greedy action tensor([-1.7811, -0.4637, 0.5989, -0.0612]) tensor([0.0473, 0.1768, 0.5115, 0.2643]) -Greedy action tensor([-1.9168, -0.4537, 0.6564, -0.1660]) tensor([0.0413, 0.1786, 0.5419, 0.2381]) -Greedy action tensor([-0.6596, 0.7585, 0.1028, -0.0708]) tensor([0.1102, 0.4550, 0.2362, 0.1986]) -Greedy action tensor([-0.7629, 0.8307, 0.0043, 0.1958]) tensor([0.0936, 0.4607, 0.2016, 0.2441]) -Greedy action tensor([-1.8487, -0.4059, 0.6137, -0.1304]) tensor([0.0444, 0.1878, 0.5205, 0.2473]) -Greedy action tensor([-1.4339, 0.2706, 0.2967, -0.0306]) tensor([0.0617, 0.3392, 0.3482, 0.2510]) -Greedy action tensor([-1.8327, -0.3599, 0.6051, -0.1274]) tensor([0.0448, 0.1955, 0.5131, 0.2466]) -Greedy action tensor([-1.9104, -0.4399, 0.6519, -0.1613]) tensor([0.0416, 0.1808, 0.5387, 0.2389]) -Greedy action tensor([-1.8586, -0.4184, 0.6492, -0.1469]) tensor([0.0434, 0.1833, 0.5329, 0.2404]) -Greedy action tensor([-1.1836, 0.3672, 0.2337, -0.0134]) tensor([0.0765, 0.3609, 0.3158, 0.2467]) -Greedy action tensor([-1.9103, -0.4412, 0.6515, -0.1634]) tensor([0.0416, 0.1807, 0.5390, 0.2386]) -Greedy action tensor([-1.8288, -0.2102, 0.5878, -0.1120]) tensor([0.0438, 0.2211, 0.4911, 0.2439]) -Greedy action tensor([-1.7979, -0.2942, 0.5642, -0.1067]) tensor([0.0464, 0.2089, 0.4928, 0.2519]) -Greedy action tensor([-1.8881, -0.3746, 0.6373, -0.1554]) tensor([0.0422, 0.1917, 0.5274, 0.2387]) -Greedy action tensor([-1.8963, -0.4551, 0.6503, -0.1567]) tensor([0.0422, 0.1784, 0.5389, 0.2405]) -Greedy action tensor([-1.9056, -0.4561, 0.6460, -0.1582]) tensor([0.0420, 0.1788, 0.5383, 0.2409]) -Greedy action tensor([-1.8878, -0.4499, 0.6407, -0.1561]) tensor([0.0427, 0.1800, 0.5357, 0.2415]) -Greedy action tensor([-1.9029, -0.2471, 0.6145, -0.1460]) tensor([0.0409, 0.2144, 0.5075, 0.2372]) -Greedy action tensor([-1.7567, -0.2453, 0.4114, -0.1907]) tensor([0.0525, 0.2378, 0.4586, 0.2511]) -Greedy action tensor([-1.8390, -0.4016, 0.6223, -0.1215]) tensor([0.0444, 0.1871, 0.5209, 0.2476]) -Greedy action tensor([-0.3058, -0.3085, 0.2043, 0.2455]) tensor([0.1852, 0.1848, 0.3085, 0.3215]) -Greedy action tensor([-1.6384, -0.3530, 0.5875, 0.0650]) tensor([0.0516, 0.1867, 0.4781, 0.2836]) -Greedy action tensor([-1.5336, 0.5467, 0.3251, 0.0050]) tensor([0.0498, 0.3987, 0.3195, 0.2320]) -Greedy action tensor([-1.3580, -0.5022, 0.4683, -0.1198]) tensor([0.0768, 0.1808, 0.4773, 0.2651]) -Greedy action tensor([-1.7619, -0.4644, 0.5809, -0.1142]) tensor([0.0493, 0.1806, 0.5137, 0.2564]) -Greedy action tensor([-1.9147, -0.2965, 0.6382, -0.1897]) tensor([0.0408, 0.2059, 0.5242, 0.2291]) -Greedy action tensor([ 1.6081, -0.7172, -0.1997, -0.0839]) tensor([0.6916, 0.0676, 0.1134, 0.1274]) -Greedy action tensor([ 1.2922, -0.4529, -1.0176, 0.3805]) tensor([0.5968, 0.1042, 0.0592, 0.2398]) -Greedy action tensor([ 1.8778, -0.7512, -0.2322, 0.4086]) tensor([0.7025, 0.0507, 0.0852, 0.1617]) -Greedy action tensor([ 0.8863, -0.1972, -0.4474, -0.2033]) tensor([0.5159, 0.1746, 0.1359, 0.1735]) -Greedy action tensor([ 1.6749, -0.3767, -0.9917, 1.0138]) tensor([0.5833, 0.0750, 0.0405, 0.3012]) -Greedy action tensor([ 1.6115, 0.5602, -0.7129, -0.1055]) tensor([0.6147, 0.2148, 0.0601, 0.1104]) -Greedy action tensor([ 1.4415, -0.7901, -0.4768, 0.1842]) tensor([0.6499, 0.0698, 0.0954, 0.1849]) -Greedy action tensor([ 1.2316, 0.0836, -1.0595, 0.3245]) tensor([0.5488, 0.1741, 0.0555, 0.2216]) -Greedy action tensor([ 1.2853, -0.0716, -0.6930, 0.2861]) tensor([0.5669, 0.1460, 0.0784, 0.2087]) -Greedy action tensor([ 1.9161, -0.4525, -0.9061, 0.2447]) tensor([0.7457, 0.0698, 0.0443, 0.1402]) -Greedy action tensor([ 1.4588, -0.1882, -0.3433, 0.2690]) tensor([0.6017, 0.1159, 0.0993, 0.1831]) -Greedy action tensor([ 1.1026, -0.3910, -0.5048, 0.2217]) tensor([0.5437, 0.1221, 0.1090, 0.2253]) -Greedy action tensor([ 1.2819, -0.2378, -0.4621, -0.0652]) tensor([0.6047, 0.1323, 0.1057, 0.1572]) -Greedy action tensor([ 1.1685, -0.4079, -1.1941, 0.3503]) tensor([0.5740, 0.1187, 0.0541, 0.2533]) -Greedy action tensor([ 1.1723, -0.0985, -0.6424, 0.2204]) tensor([0.5466, 0.1534, 0.0890, 0.2110]) -Greedy action tensor([ 1.2804, -0.3534, -0.5978, 0.2729]) tensor([0.5837, 0.1139, 0.0892, 0.2131]) -Greedy action tensor([ 1.4452, -0.2753, -0.8307, 0.6957]) tensor([0.5700, 0.1020, 0.0585, 0.2694]) -Greedy action tensor([ 1.3942, -0.6176, -0.2904, 0.6046]) tensor([0.5639, 0.0754, 0.1046, 0.2560]) -Greedy action tensor([ 1.5348, -0.6161, 0.1373, 0.3349]) tensor([0.6007, 0.0699, 0.1485, 0.1809]) -Greedy action tensor([ 1.9125, -0.7624, -0.6640, 0.2735]) tensor([0.7467, 0.0515, 0.0568, 0.1450]) -Greedy action tensor([ 1.5357, -0.5019, -0.6920, 0.2710]) tensor([0.6577, 0.0857, 0.0709, 0.1857]) -Greedy action tensor([ 1.3823, -0.3271, -0.3912, 0.3565]) tensor([0.5851, 0.1059, 0.0993, 0.2097]) -Greedy action tensor([ 0.8045, -0.2327, -0.3201, -0.0452]) tensor([0.4747, 0.1682, 0.1542, 0.2029]) -Greedy action tensor([ 1.5798, -0.4551, -0.6914, 0.3109]) tensor([0.6601, 0.0863, 0.0681, 0.1856]) -Greedy action tensor([ 2.2501, -0.9853, -0.4981, 0.2352]) tensor([0.8086, 0.0318, 0.0518, 0.1078]) -Greedy action tensor([ 1.7463, -0.6735, 0.0155, 0.5947]) tensor([0.6320, 0.0562, 0.1120, 0.1998]) -Greedy action tensor([ 1.5579, -0.3150, -0.8515, 1.0646]) tensor([0.5393, 0.0829, 0.0485, 0.3293]) -Greedy action tensor([ 2.1111, -0.7814, -0.8982, 0.4130]) tensor([0.7765, 0.0430, 0.0383, 0.1421]) -Greedy action tensor([ 1.6869, -0.1146, -0.1747, 0.4567]) tensor([0.6201, 0.1023, 0.0964, 0.1812]) -Greedy action tensor([ 1.6861, -0.3684, -0.6491, 1.0705]) tensor([0.5665, 0.0726, 0.0548, 0.3061]) -Greedy action tensor([ 1.1112, -0.4336, -0.4164, 0.1559]) tensor([0.5509, 0.1175, 0.1196, 0.2119]) -Greedy action tensor([ 1.6249, 0.0041, -0.8944, 0.7347]) tensor([0.5921, 0.1171, 0.0477, 0.2431]) -Greedy action tensor([ 1.4711, -0.7630, -0.4929, 0.0982]) tensor([0.6663, 0.0714, 0.0935, 0.1688]) -Greedy action tensor([ 1.1496, -0.8801, -0.1937, 0.2953]) tensor([0.5501, 0.0723, 0.1436, 0.2341]) -Greedy action tensor([ 2.2582, -0.7782, -0.5965, 0.3498]) tensor([0.7975, 0.0383, 0.0459, 0.1183]) -Greedy action tensor([ 1.1899, -0.4005, -0.6507, 0.0343]) tensor([0.5962, 0.1215, 0.0946, 0.1877]) -Greedy action tensor([ 1.2481, -0.2410, -0.7137, 0.2152]) tensor([0.5807, 0.1310, 0.0816, 0.2067]) -Greedy action tensor([ 1.3549, 0.0157, -0.1893, -0.0671]) tensor([0.5825, 0.1526, 0.1244, 0.1405]) -Greedy action tensor([ 1.2940, -0.5891, -0.9897, 0.1420]) tensor([0.6369, 0.0969, 0.0649, 0.2013]) -Greedy action tensor([ 1.4324, -0.6578, -0.1544, 0.3431]) tensor([0.6007, 0.0743, 0.1229, 0.2021]) -Greedy action tensor([ 1.4178, -0.0133, -0.7200, 0.5226]) tensor([0.5664, 0.1354, 0.0668, 0.2314]) -Greedy action tensor([ 1.1200, -0.5198, -0.3268, 0.1893]) tensor([0.5484, 0.1064, 0.1290, 0.2162]) -Greedy action tensor([ 0.9456, -0.2810, -0.6053, 0.2760]) tensor([0.4957, 0.1454, 0.1051, 0.2538]) -Greedy action tensor([ 1.6790, -0.2741, -0.3837, 0.6497]) tensor([0.6149, 0.0872, 0.0782, 0.2197]) -Greedy action tensor([ 1.2906, -0.4205, -0.2559, 0.2337]) tensor([0.5743, 0.1038, 0.1223, 0.1996]) -Greedy action tensor([ 1.6576, -0.1748, -1.2060, -0.0582]) tensor([0.7159, 0.1146, 0.0409, 0.1287]) -Greedy action tensor([ 1.1661, -0.0697, -0.6655, 0.2643]) tensor([0.5386, 0.1565, 0.0863, 0.2186]) -Greedy action tensor([ 1.8104, -0.7442, -0.2599, 0.7110]) tensor([0.6506, 0.0506, 0.0821, 0.2167]) -Greedy action tensor([ 1.7561, -0.4896, -0.5303, 0.7748]) tensor([0.6320, 0.0669, 0.0642, 0.2369]) -Greedy action tensor([ 1.0849, -0.4235, -0.2744, 0.1412]) tensor([0.5355, 0.1185, 0.1376, 0.2084]) -Greedy action tensor([ 1.6189, -1.2238, -0.1506, -0.1235]) tensor([0.7124, 0.0415, 0.1214, 0.1247]) -Greedy action tensor([ 1.8347, -0.4205, -0.8585, 0.6717]) tensor([0.6734, 0.0706, 0.0456, 0.2105]) -Greedy action tensor([ 1.7257, -0.2412, -0.7459, 0.0442]) tensor([0.7090, 0.0992, 0.0599, 0.1319]) -Greedy action tensor([ 1.3376, -0.3426, -0.4333, 0.4846]) tensor([0.5610, 0.1045, 0.0955, 0.2390]) -Greedy action tensor([ 1.5110, -0.6088, -0.1307, 0.6391]) tensor([0.5774, 0.0693, 0.1118, 0.2414]) -Greedy action tensor([ 1.3666, -0.0484, -0.3448, 0.0653]) tensor([0.5897, 0.1433, 0.1065, 0.1605]) -Greedy action tensor([ 1.6157, -0.1225, -0.2540, -0.1059]) tensor([0.6628, 0.1165, 0.1022, 0.1185]) -Greedy action tensor([ 2.2201, -0.5562, -0.6946, 0.2880]) tensor([0.7928, 0.0494, 0.0430, 0.1148]) -Greedy action tensor([ 1.4392, -0.6448, -0.5620, 0.2723]) tensor([0.6366, 0.0792, 0.0860, 0.1982]) -Greedy action tensor([ 8.3815e-01, -3.4427e-01, 1.6488e-03, -5.7878e-04]) tensor([0.4604, 0.1411, 0.1995, 0.1990]) -Greedy action tensor([ 0.9632, -0.5950, -0.5226, 0.7731]) tensor([0.4417, 0.0930, 0.1000, 0.3653]) -Greedy action tensor([ 1.2549, -0.1951, -0.5763, 0.4333]) tensor([0.5451, 0.1279, 0.0873, 0.2397]) -Greedy action tensor([ 1.3658, 0.0921, -0.5291, -0.0199]) tensor([0.5951, 0.1665, 0.0895, 0.1489]) -Greedy action tensor([ 1.3186, -0.4478, -0.6044, 0.3397]) tensor([0.5907, 0.1010, 0.0863, 0.2219]) -Greedy action tensor([1.9522, 0.2629, 0.1657, 0.1123]) tensor([0.6618, 0.1222, 0.1109, 0.1051]) -Greedy action tensor([ 0.7157, -0.2036, -0.1838, 0.1925]) tensor([0.4170, 0.1663, 0.1696, 0.2471]) -Greedy action tensor([ 1.6808, -0.9508, -0.5783, 0.6997]) tensor([0.6446, 0.0464, 0.0673, 0.2417]) -Greedy action tensor([ 1.6619, -0.0096, -0.4581, 0.2331]) tensor([0.6462, 0.1215, 0.0776, 0.1548]) -Greedy action tensor([ 1.2805, -0.8086, -0.0592, 0.6533]) tensor([0.5209, 0.0645, 0.1364, 0.2782]) -Greedy action tensor([ 1.6892, -0.7522, -0.2011, 0.3242]) tensor([0.6696, 0.0583, 0.1011, 0.1710]) -Greedy action tensor([ 1.8172, -0.8468, -0.4005, 0.4769]) tensor([0.6943, 0.0484, 0.0756, 0.1817]) -Greedy action tensor([ 1.1369, -0.2752, -0.7252, 0.6209]) tensor([0.5010, 0.1221, 0.0778, 0.2991]) -Greedy action tensor([ 2.2614, -0.8195, -0.4880, 0.8436]) tensor([0.7396, 0.0340, 0.0473, 0.1792]) -Greedy action tensor([ 1.7878, -0.5756, -0.4555, 0.4591]) tensor([0.6826, 0.0642, 0.0724, 0.1808]) -Greedy action tensor([ 1.8315, -0.8677, -0.8819, 0.4503]) tensor([0.7221, 0.0486, 0.0479, 0.1815]) -Greedy action tensor([ 2.1953, -0.5620, -0.4035, 0.4336]) tensor([0.7636, 0.0485, 0.0568, 0.1311]) -Greedy action tensor([ 1.2419, -0.0078, -0.4033, 0.0799]) tensor([0.5579, 0.1599, 0.1077, 0.1745]) -Greedy action tensor([ 1.8154, -0.9879, -0.1450, 0.0192]) tensor([0.7313, 0.0443, 0.1030, 0.1214]) -Greedy action tensor([ 1.4369e+00, -2.7682e-01, -8.4084e-01, -4.7591e-04]) tensor([0.6578, 0.1185, 0.0674, 0.1563]) -Greedy action tensor([ 1.2104, -0.6077, -0.4533, 0.3210]) tensor([0.5673, 0.0921, 0.1075, 0.2331]) -Greedy action tensor([ 1.1491, -0.0142, -1.0900, 0.3601]) tensor([0.5338, 0.1668, 0.0569, 0.2425]) -Greedy action tensor([ 0.6734, -0.3567, 0.0092, -0.2232]) tensor([0.4387, 0.1566, 0.2258, 0.1790]) -Greedy action tensor([ 1.0034, -0.5369, 0.0763, -0.5339]) tensor([0.5480, 0.1174, 0.2168, 0.1178]) -Greedy action tensor([ 0.5824, -0.6456, 0.0531, -0.2668]) tensor([0.4330, 0.1268, 0.2550, 0.1852]) -Greedy action tensor([ 0.7328, -0.2703, 0.1008, -0.4036]) tensor([0.4506, 0.1652, 0.2395, 0.1446]) -Greedy action tensor([ 1.2507, -0.6587, -0.1685, -0.4736]) tensor([0.6376, 0.0945, 0.1542, 0.1137]) -Greedy action tensor([ 0.6857, -0.7345, -0.0584, -0.3326]) tensor([0.4812, 0.1163, 0.2286, 0.1738]) -Greedy action tensor([ 0.8655, -0.5391, -0.1616, -0.5733]) tensor([0.5433, 0.1334, 0.1945, 0.1289]) -Greedy action tensor([ 0.2128, 0.1690, 0.0047, -0.3486]) tensor([0.2994, 0.2866, 0.2432, 0.1708]) -Greedy action tensor([ 0.7512, -0.4548, -0.1324, -0.3903]) tensor([0.4921, 0.1473, 0.2034, 0.1571]) -Greedy action tensor([ 0.6300, -0.3094, -0.0257, -0.3744]) tensor([0.4393, 0.1717, 0.2280, 0.1609]) -Greedy action tensor([ 0.9873, -0.5553, 0.0422, -0.2076]) tensor([0.5249, 0.1122, 0.2040, 0.1589]) -Greedy action tensor([ 0.7330, -0.4280, -0.0459, -0.2688]) tensor([0.4674, 0.1464, 0.2145, 0.1716]) -Greedy action tensor([ 0.5371, -0.4803, -0.0734, -0.1276]) tensor([0.4134, 0.1495, 0.2245, 0.2127]) -Greedy action tensor([ 1.1574, -0.7933, 0.1163, -0.5616]) tensor([0.5972, 0.0849, 0.2109, 0.1070]) -Greedy action tensor([ 1.0192, -0.5179, -0.1032, -0.5877]) tensor([0.5744, 0.1235, 0.1870, 0.1152]) -Greedy action tensor([ 0.7278, -0.4880, -0.0218, -0.3603]) tensor([0.4749, 0.1408, 0.2244, 0.1600]) -Greedy action tensor([ 0.5300, -0.1423, 0.0429, -0.0081]) tensor([0.3692, 0.1885, 0.2268, 0.2155]) -Greedy action tensor([ 0.9001, -0.6372, -0.0278, -0.5548]) tensor([0.5424, 0.1166, 0.2144, 0.1266]) -Greedy action tensor([ 4.5455e-01, -3.5255e-01, 2.7849e-04, -9.5843e-02]) tensor([0.3763, 0.1679, 0.2389, 0.2170]) -Greedy action tensor([ 0.7902, 0.2985, -0.0080, -0.1555]) tensor([0.4081, 0.2496, 0.1837, 0.1585]) -Greedy action tensor([ 0.9057, -0.4497, 0.1409, -0.3702]) tensor([0.4994, 0.1288, 0.2324, 0.1394]) -Greedy action tensor([ 0.7829, -0.4803, 0.0752, -0.6404]) tensor([0.4959, 0.1402, 0.2444, 0.1195]) -Greedy action tensor([ 0.7589, -0.3639, 0.0227, -0.2506]) tensor([0.4611, 0.1500, 0.2208, 0.1680]) -Greedy action tensor([ 0.7891, -0.4220, 0.0739, -0.4539]) tensor([0.4818, 0.1435, 0.2357, 0.1390]) -Greedy action tensor([ 0.8310, -0.6026, 0.0884, -0.4257]) tensor([0.5003, 0.1193, 0.2381, 0.1424]) -Greedy action tensor([ 0.6544, -0.4473, -0.2162, -0.1318]) tensor([0.4532, 0.1506, 0.1898, 0.2065]) -Greedy action tensor([ 0.4965, -0.3735, -0.1111, -0.4579]) tensor([0.4258, 0.1784, 0.2319, 0.1639]) -Greedy action tensor([ 0.7040, 0.3464, -0.1264, 0.1429]) tensor([0.3696, 0.2585, 0.1611, 0.2109]) -Greedy action tensor([ 0.7883, -0.3327, -0.0301, -0.1782]) tensor([0.4656, 0.1518, 0.2054, 0.1771]) -Greedy action tensor([ 0.8509, -0.7302, -0.0055, -0.4514]) tensor([0.5257, 0.1082, 0.2232, 0.1429]) -Greedy action tensor([ 0.3087, -0.1516, -0.0446, -0.1888]) tensor([0.3400, 0.2145, 0.2388, 0.2067]) -Greedy action tensor([ 0.5333, -0.1881, 0.0106, -0.1751]) tensor([0.3889, 0.1890, 0.2306, 0.1915]) -Greedy action tensor([ 1.0338, -0.5154, 0.2822, -0.4199]) tensor([0.5215, 0.1108, 0.2459, 0.1219]) -Greedy action tensor([ 1.1075, -0.4322, 0.3011, -0.5233]) tensor([0.5386, 0.1155, 0.2405, 0.1054]) -Greedy action tensor([ 1.0495, -0.7250, 0.1168, -0.6032]) tensor([0.5699, 0.0966, 0.2243, 0.1092]) -Greedy action tensor([ 0.4177, 0.0986, -0.1630, 0.0457]) tensor([0.3361, 0.2442, 0.1880, 0.2317]) -Greedy action tensor([ 0.8731, -0.6566, 0.0328, -0.5345]) tensor([0.5283, 0.1144, 0.2280, 0.1293]) -Greedy action tensor([ 0.6609, -0.1185, 0.0469, -0.0371]) tensor([0.4004, 0.1837, 0.2167, 0.1992]) -Greedy action tensor([ 0.5826, -0.4298, -0.0225, -0.2431]) tensor([0.4260, 0.1548, 0.2326, 0.1866]) -Greedy action tensor([ 0.6486, -0.2195, 0.1729, -0.4268]) tensor([0.4198, 0.1762, 0.2609, 0.1432]) -Greedy action tensor([ 0.4906, -0.0454, 0.0306, -0.2494]) tensor([0.3713, 0.2172, 0.2344, 0.1771]) -Greedy action tensor([ 1.3012, -0.7467, -0.0357, -0.8065]) tensor([0.6609, 0.0853, 0.1736, 0.0803]) -Greedy action tensor([ 1.0556, -0.8734, 0.0191, -0.4420]) tensor([0.5802, 0.0843, 0.2058, 0.1298]) -Greedy action tensor([ 0.1525, -0.0167, -0.0435, -0.1989]) tensor([0.2967, 0.2505, 0.2439, 0.2088]) -Greedy action tensor([ 0.9234, -0.6250, -0.2036, -0.3448]) tensor([0.5501, 0.1169, 0.1782, 0.1547]) -Greedy action tensor([ 1.0675, -0.7399, -0.0301, -0.6238]) tensor([0.5945, 0.0975, 0.1984, 0.1096]) -Greedy action tensor([ 0.6301, -0.0141, -0.1210, -0.1669]) tensor([0.4086, 0.2145, 0.1928, 0.1841]) -Greedy action tensor([ 0.8963, -0.8883, -0.0814, -0.2975]) tensor([0.5414, 0.0909, 0.2037, 0.1641]) -Greedy action tensor([ 0.8771, -0.1917, -0.0162, 0.0219]) tensor([0.4592, 0.1577, 0.1879, 0.1952]) -Greedy action tensor([ 0.3265, 0.6623, -0.2813, 0.2014]) tensor([0.2614, 0.3657, 0.1423, 0.2306]) -Greedy action tensor([ 0.6144, -0.3998, -0.2733, -0.2896]) tensor([0.4589, 0.1664, 0.1889, 0.1858]) -Greedy action tensor([ 0.5664, -0.3634, 0.0228, -0.2651]) tensor([0.4148, 0.1637, 0.2409, 0.1806]) -Greedy action tensor([ 0.6474, -0.4223, -0.1470, -0.2187]) tensor([0.4514, 0.1549, 0.2039, 0.1898]) -Greedy action tensor([ 0.5717, -0.0413, -0.1436, -0.0740]) tensor([0.3914, 0.2120, 0.1914, 0.2052]) -Greedy action tensor([ 0.6266, -0.2394, 0.0236, -0.0883]) tensor([0.4070, 0.1712, 0.2227, 0.1991]) -Greedy action tensor([ 1.3684, -0.9632, -0.0884, -0.6263]) tensor([0.6820, 0.0663, 0.1589, 0.0928]) -Greedy action tensor([ 1.1270, -0.8411, 0.0434, -0.6753]) tensor([0.6086, 0.0850, 0.2059, 0.1004]) -Greedy action tensor([ 0.4510, 0.1134, -0.0021, 0.1472]) tensor([0.3239, 0.2311, 0.2059, 0.2391]) -Greedy action tensor([ 0.7197, -0.2039, 0.0077, -0.0978]) tensor([0.4293, 0.1705, 0.2107, 0.1896]) -Greedy action tensor([ 0.4375, 0.0040, 0.0137, -0.2725]) tensor([0.3578, 0.2320, 0.2342, 0.1759]) -Greedy action tensor([ 0.8536, -0.6470, -0.1430, -0.5454]) tensor([0.5438, 0.1213, 0.2007, 0.1342]) -Greedy action tensor([ 0.7795, -0.2740, 0.0342, -0.1541]) tensor([0.4512, 0.1573, 0.2141, 0.1774]) -Greedy action tensor([ 1.0985, -0.7548, 0.0330, -0.4052]) tensor([0.5802, 0.0909, 0.1999, 0.1290]) -Greedy action tensor([ 0.7293, -0.7751, -0.1566, -0.3385]) tensor([0.5055, 0.1123, 0.2084, 0.1738]) -Greedy action tensor([ 0.4516, -0.0852, 0.0712, -0.2177]) tensor([0.3597, 0.2103, 0.2459, 0.1842]) -Greedy action tensor([ 0.7272, -0.3891, -0.1051, -0.3879]) tensor([0.4784, 0.1567, 0.2081, 0.1569]) -Greedy action tensor([ 1.1641, -0.8819, -0.0481, -0.6949]) tensor([0.6319, 0.0817, 0.1880, 0.0985]) -Greedy action tensor([ 1.0061, -0.5423, -0.2011, -0.1373]) tensor([0.5463, 0.1161, 0.1634, 0.1741]) -Greedy action tensor([ 0.9751, -0.5220, 0.0091, -0.2043]) tensor([0.5231, 0.1170, 0.1991, 0.1608]) -Greedy action tensor([ 0.9276, -0.4038, -0.1627, -0.3147]) tensor([0.5294, 0.1398, 0.1779, 0.1529]) -Greedy action tensor([ 0.6857, -0.3315, -0.0743, -0.1383]) tensor([0.4409, 0.1594, 0.2062, 0.1934]) -Greedy action tensor([ 0.9087, -0.3658, -0.0472, -0.1016]) tensor([0.4931, 0.1378, 0.1896, 0.1795]) -Greedy action tensor([ 0.8436, 0.0511, -0.0945, 0.1294]) tensor([0.4285, 0.1940, 0.1677, 0.2098]) -Greedy action tensor([ 1.1358, -0.7553, 0.0569, -0.5603]) tensor([0.5973, 0.0901, 0.2031, 0.1095]) -Greedy action tensor([ 0.8201, -0.6208, -0.1364, -0.3208]) tensor([0.5153, 0.1220, 0.1980, 0.1647]) -Greedy action tensor([ 0.4269, -0.0872, -0.0969, -0.0159]) tensor([0.3530, 0.2111, 0.2091, 0.2267]) -Greedy action tensor([ 0.7504, -0.1307, 0.0231, -0.2130]) tensor([0.4388, 0.1818, 0.2120, 0.1674]) -Greedy action tensor([ 0.4862, -0.0560, 0.0569, -0.2967]) tensor([0.3718, 0.2162, 0.2420, 0.1699]) -Greedy action tensor([ 0.7119, -0.2063, -0.0157, -0.1465]) tensor([0.4336, 0.1731, 0.2095, 0.1838]) -Greedy action tensor([ 0.9101, -0.6604, -0.0951, -0.0045]) tensor([0.5064, 0.1053, 0.1853, 0.2029]) -Greedy action tensor([ 0.9307, -0.7463, 0.0723, -0.5503]) tensor([0.5440, 0.1017, 0.2306, 0.1237]) -Greedy action tensor([-0.0479, -0.3002, 0.6638, -0.0953]) tensor([0.2097, 0.1630, 0.4273, 0.2000]) -Greedy action tensor([ 2.2885, -1.9055, 0.6620, 1.0729]) tensor([0.6630, 0.0100, 0.1304, 0.1966]) -Greedy action tensor([-0.1934, 0.3936, 0.5291, 0.3189]) tensor([0.1532, 0.2756, 0.3155, 0.2557]) -Greedy action tensor([-0.1453, -0.4850, -0.0630, -0.4046]) tensor([0.2802, 0.1995, 0.3042, 0.2162]) -Greedy action tensor([-1.1514, -0.6901, 0.5402, -0.6376]) tensor([0.1032, 0.1637, 0.5604, 0.1726]) -Greedy action tensor([ 0.2222, -0.3090, -0.3835, -0.1542]) tensor([0.3546, 0.2085, 0.1935, 0.2434]) -Greedy action tensor([-1.2588, 0.3017, 1.1189, -1.1299]) tensor([0.0566, 0.2693, 0.6098, 0.0643]) -Greedy action tensor([-0.1055, -1.3852, 0.9828, -0.4029]) tensor([0.2004, 0.0557, 0.5950, 0.1488]) -Greedy action tensor([ 0.1747, -0.5019, -0.2531, -1.0944]) tensor([0.4096, 0.2082, 0.2670, 0.1151]) -Greedy action tensor([ 0.7335, -0.0825, -1.3865, 0.5254]) tensor([0.4212, 0.1862, 0.0506, 0.3420]) -Greedy action tensor([-0.5289, -0.3946, -0.1353, -0.6718]) tensor([0.2226, 0.2546, 0.3299, 0.1929]) -Greedy action tensor([-0.6531, -0.1067, -0.5278, -0.3945]) tensor([0.1940, 0.3350, 0.2199, 0.2512]) -Greedy action tensor([ 0.3616, -0.2369, -0.1781, -0.3169]) tensor([0.3788, 0.2082, 0.2208, 0.1922]) -Greedy action tensor([0.2420, 0.0424, 0.3021, 0.3019]) tensor([0.2536, 0.2077, 0.2693, 0.2693]) -Greedy action tensor([ 0.2394, 0.9954, -0.0502, -0.4563]) tensor([0.2285, 0.4866, 0.1710, 0.1139]) -Greedy action tensor([-0.3612, -0.0618, -0.8187, -0.8910]) tensor([0.2801, 0.3778, 0.1772, 0.1649]) -Greedy action tensor([ 0.2960, -0.7389, -1.0214, 0.2640]) tensor([0.3859, 0.1371, 0.1033, 0.3737]) -Greedy action tensor([ 0.5427, -0.9513, -0.1185, -0.0944]) tensor([0.4406, 0.0989, 0.2275, 0.2330]) -Greedy action tensor([-0.2305, 0.5299, 0.2654, -0.3846]) tensor([0.1774, 0.3794, 0.2912, 0.1520]) -Greedy action tensor([ 0.3427, -2.2039, 0.1426, -0.5683]) tensor([0.4350, 0.0341, 0.3561, 0.1749]) -Greedy action tensor([ 1.1710, -0.6632, 0.3643, 0.8504]) tensor([0.4289, 0.0685, 0.1914, 0.3112]) -Greedy action tensor([ 1.7436, -0.1723, 0.4084, 1.3045]) tensor([0.4866, 0.0716, 0.1280, 0.3137]) -Greedy action tensor([-1.1131, -0.0366, -0.1168, -1.2631]) tensor([0.1333, 0.3911, 0.3610, 0.1147]) -Greedy action tensor([-0.1456, -1.1238, -0.8373, 0.1660]) tensor([0.3084, 0.1160, 0.1544, 0.4212]) -Greedy action tensor([-0.1635, 0.4689, -0.4482, -0.2136]) tensor([0.2181, 0.4105, 0.1640, 0.2074]) -Greedy action tensor([ 0.1875, -0.1069, 0.2848, -0.5190]) tensor([0.2993, 0.2230, 0.3300, 0.1477]) -Greedy action tensor([ 0.4186, 0.3039, -0.7789, -0.7238]) tensor([0.3980, 0.3549, 0.1202, 0.1270]) -Greedy action tensor([0.9605, 0.1375, 0.4335, 1.1506]) tensor([0.3088, 0.1356, 0.1823, 0.3734]) -Greedy action tensor([-0.6563, -1.4830, 0.9378, -0.9586]) tensor([0.1408, 0.0616, 0.6935, 0.1041]) -Greedy action tensor([-0.5408, -0.7803, 0.3505, -0.8847]) tensor([0.2027, 0.1595, 0.4942, 0.1437]) -Greedy action tensor([-0.6299, -0.7660, 0.5355, -0.2452]) tensor([0.1527, 0.1333, 0.4897, 0.2243]) -Greedy action tensor([ 0.0766, 0.4141, -0.1036, -0.0651]) tensor([0.2436, 0.3414, 0.2035, 0.2114]) -Greedy action tensor([-0.6619, -0.7293, -0.2400, -0.6875]) tensor([0.2255, 0.2108, 0.3439, 0.2198]) -Greedy action tensor([ 0.4683, 0.6309, -0.2814, -0.7999]) tensor([0.3412, 0.4015, 0.1612, 0.0960]) -Greedy action tensor([ 0.0130, 0.9317, -0.3154, -0.4341]) tensor([0.2055, 0.5150, 0.1480, 0.1314]) -Greedy action tensor([ 0.5488, 1.1972, -0.2803, 0.0697]) tensor([0.2520, 0.4820, 0.1100, 0.1561]) -Greedy action tensor([-0.4593, 0.2241, -0.1489, -0.8072]) tensor([0.1980, 0.3922, 0.2700, 0.1398]) -Greedy action tensor([-1.0376, -0.9363, 0.1408, 0.3089]) tensor([0.1087, 0.1203, 0.3532, 0.4178]) -Greedy action tensor([ 0.6763, 0.1948, -0.4342, 0.2781]) tensor([0.3819, 0.2359, 0.1258, 0.2564]) -Greedy action tensor([ 0.3454, -1.3492, -0.1575, 0.2254]) tensor([0.3738, 0.0687, 0.2261, 0.3315]) -Greedy action tensor([-0.3586, -0.6911, -0.6586, -0.8380]) tensor([0.3250, 0.2330, 0.2408, 0.2012]) -Greedy action tensor([-0.9022, -0.5005, -0.4846, 0.6327]) tensor([0.1156, 0.1727, 0.1755, 0.5363]) -Greedy action tensor([-0.6713, -1.5277, 0.8425, -1.0927]) tensor([0.1509, 0.0641, 0.6859, 0.0990]) -Greedy action tensor([-0.9158, -0.4086, -0.3936, 0.5444]) tensor([0.1156, 0.1919, 0.1948, 0.4977]) -Greedy action tensor([ 0.6435, -0.7169, -0.3865, -0.3512]) tensor([0.5042, 0.1294, 0.1800, 0.1865]) -Greedy action tensor([ 0.4147, -1.4851, 0.1223, 0.1788]) tensor([0.3723, 0.0557, 0.2779, 0.2941]) -Greedy action tensor([ 1.6112, -0.0581, -0.0074, -0.1247]) tensor([0.6399, 0.1205, 0.1268, 0.1128]) -Greedy action tensor([ 0.1682, -0.4163, 0.1984, 0.0891]) tensor([0.2847, 0.1587, 0.2935, 0.2631]) -Greedy action tensor([ 0.8224, -1.0269, -0.4930, 1.0687]) tensor([0.3697, 0.0582, 0.0992, 0.4729]) -Greedy action tensor([-0.2421, -0.7449, 0.4267, -0.8481]) tensor([0.2438, 0.1474, 0.4758, 0.1330]) -Greedy action tensor([-0.4433, 0.2809, -0.6510, -0.2808]) tensor([0.1979, 0.4084, 0.1608, 0.2329]) -Greedy action tensor([ 0.0066, 0.9216, 0.5095, -0.6721]) tensor([0.1768, 0.4413, 0.2923, 0.0897]) -Greedy action tensor([-0.7686, -0.6946, 0.5676, -0.5183]) tensor([0.1395, 0.1503, 0.5309, 0.1792]) -Greedy action tensor([-0.0531, -0.4325, 0.5501, -0.7546]) tensor([0.2495, 0.1707, 0.4561, 0.1237]) -Greedy action tensor([-0.0936, 0.0334, 0.2540, -0.0483]) tensor([0.2175, 0.2470, 0.3079, 0.2276]) -Greedy action tensor([ 0.0968, -0.1832, -0.0023, 0.1399]) tensor([0.2699, 0.2040, 0.2444, 0.2818]) -Greedy action tensor([ 1.1790, -0.6336, 0.5725, 0.9669]) tensor([0.3972, 0.0648, 0.2166, 0.3213]) -Greedy action tensor([0.8295, 0.1977, 0.4803, 0.7961]) tensor([0.3121, 0.1659, 0.2201, 0.3019]) -Greedy action tensor([ 0.2729, -0.4156, -0.6885, 0.6334]) tensor([0.3013, 0.1514, 0.1152, 0.4321]) -Greedy action tensor([-0.9919, -0.5639, -0.0039, -0.1826]) tensor([0.1339, 0.2055, 0.3597, 0.3009]) -Greedy action tensor([-0.5524, -0.3652, -0.1982, -0.6145]) tensor([0.2188, 0.2638, 0.3118, 0.2056]) -Greedy action tensor([ 1.2047, -0.2555, 0.3313, 0.4111]) tensor([0.4758, 0.1105, 0.1986, 0.2151]) -Greedy action tensor([-0.8296, -0.4300, -1.5696, 0.0448]) tensor([0.1864, 0.2779, 0.0889, 0.4468]) -Greedy action tensor([ 1.3889, -0.5738, 0.9014, 0.9193]) tensor([0.4202, 0.0590, 0.2581, 0.2627]) -Greedy action tensor([ 1.6059, -1.0898, -0.3591, 1.1270]) tensor([0.5473, 0.0369, 0.0767, 0.3390]) -Greedy action tensor([-0.7145, -1.1695, 0.6174, -0.2356]) tensor([0.1421, 0.0902, 0.5383, 0.2294]) -Greedy action tensor([ 0.8495, 0.4743, -0.3715, 0.2382]) tensor([0.3961, 0.2722, 0.1168, 0.2149]) -Greedy action tensor([-0.0966, -0.6294, 0.4617, -0.2758]) tensor([0.2398, 0.1407, 0.4190, 0.2004]) -Greedy action tensor([ 0.5965, -1.5463, 0.0518, 0.2073]) tensor([0.4211, 0.0494, 0.2442, 0.2853]) -Greedy action tensor([ 0.2024, -1.2412, 0.8912, -1.2004]) tensor([0.2879, 0.0680, 0.5733, 0.0708]) -Greedy action tensor([-0.9088, -0.9460, -0.9586, -0.5080]) tensor([0.2269, 0.2186, 0.2158, 0.3387]) -Greedy action tensor([-0.5644, -0.6071, 1.0992, -0.7442]) tensor([0.1239, 0.1187, 0.6539, 0.1035]) -Greedy action tensor([ 0.5808, -0.9971, -0.7032, 1.6815]) tensor([0.2227, 0.0460, 0.0617, 0.6696]) -Greedy action tensor([-0.1661, -0.6309, -0.0683, 0.2350]) tensor([0.2367, 0.1487, 0.2610, 0.3535]) -Greedy action tensor([-0.8040, -0.4239, -1.1043, -0.1610]) tensor([0.1959, 0.2865, 0.1451, 0.3726]) -Greedy action tensor([ 0.1653, -1.2676, 0.4856, -0.7493]) tensor([0.3315, 0.0791, 0.4566, 0.1328]) -Greedy action tensor([-0.7618, -0.3598, -0.3966, -1.1019]) tensor([0.2152, 0.3216, 0.3100, 0.1531]) -Greedy action tensor([ 1.6327, -1.2399, 0.3083, 0.0055]) tensor([0.6583, 0.0372, 0.1751, 0.1293]) -Greedy action tensor([ 0.6758, -1.0560, 0.9762, 0.8676]) tensor([0.2675, 0.0473, 0.3612, 0.3240]) -Greedy action tensor([ 0.1554, 0.6348, -0.1233, 0.2053]) tensor([0.2261, 0.3652, 0.1711, 0.2377]) -Greedy action tensor([-0.3956, 0.3842, 0.6904, -0.1701]) tensor([0.1352, 0.2949, 0.4005, 0.1694]) -Greedy action tensor([ 1.2312, 0.3236, -0.1530, 0.1774]) tensor([0.4993, 0.2015, 0.1251, 0.1741]) -Greedy action tensor([ 1.2742, -0.0193, -0.4718, 0.3945]) tensor([0.5366, 0.1472, 0.0936, 0.2226]) -Greedy action tensor([ 1.2488, -0.4098, -0.2365, 0.4095]) tensor([0.5409, 0.1030, 0.1225, 0.2337]) -Greedy action tensor([ 1.4947, -0.2906, -0.5147, 0.5197]) tensor([0.5956, 0.0999, 0.0799, 0.2247]) -Greedy action tensor([ 1.4527, -0.7924, -0.3768, 0.2379]) tensor([0.6397, 0.0678, 0.1027, 0.1899]) -Greedy action tensor([ 1.5380, 0.2728, -0.1320, 0.0441]) tensor([0.5900, 0.1665, 0.1111, 0.1325]) -Greedy action tensor([ 1.3686, -0.7492, -0.2913, 0.7458]) tensor([0.5414, 0.0651, 0.1030, 0.2905]) -Greedy action tensor([ 1.1244, -0.1945, -0.3803, -0.3464]) tensor([0.5817, 0.1555, 0.1292, 0.1336]) -Greedy action tensor([ 1.6041, 0.4993, -0.0799, -0.1967]) tensor([0.5945, 0.1969, 0.1104, 0.0982]) -Greedy action tensor([ 1.3417, 0.5308, -0.3098, 0.2908]) tensor([0.5036, 0.2238, 0.0966, 0.1761]) -Greedy action tensor([ 1.7616, 0.1028, -0.3234, 0.6139]) tensor([0.6127, 0.1166, 0.0762, 0.1944]) -Greedy action tensor([ 1.7174e+00, -6.7688e-01, -1.6659e-05, 4.2064e-02]) tensor([0.6859, 0.0626, 0.1231, 0.1284]) -Greedy action tensor([ 1.7243, 0.2129, -0.6873, 0.0792]) tensor([0.6652, 0.1467, 0.0596, 0.1284]) -Greedy action tensor([ 1.5922, -0.5433, 0.0850, 0.0940]) tensor([0.6397, 0.0756, 0.1417, 0.1430]) -Greedy action tensor([ 1.1942, -0.1939, -0.7261, -0.0049]) tensor([0.5891, 0.1470, 0.0863, 0.1776]) -Greedy action tensor([ 1.3303, -0.5125, -0.4411, 0.4656]) tensor([0.5715, 0.0905, 0.0972, 0.2407]) -Greedy action tensor([ 1.2774, 0.1888, -0.6302, -0.1311]) tensor([0.5782, 0.1946, 0.0858, 0.1414]) -Greedy action tensor([ 1.8568, -0.7681, -0.4778, 0.5805]) tensor([0.6904, 0.0500, 0.0669, 0.1927]) -Greedy action tensor([ 1.0323, -0.4241, -0.0576, 0.1236]) tensor([0.5070, 0.1182, 0.1705, 0.2043]) -Greedy action tensor([ 1.4669, -0.8844, -0.3982, 0.3566]) tensor([0.6331, 0.0603, 0.0980, 0.2086]) -Greedy action tensor([ 1.3224, -0.4724, 0.1029, 0.2427]) tensor([0.5552, 0.0922, 0.1640, 0.1886]) -Greedy action tensor([ 6.2971e-01, -5.9993e-01, -4.3395e-04, 2.7884e-02]) tensor([0.4215, 0.1232, 0.2244, 0.2309]) -Greedy action tensor([ 1.5745, -0.5314, -0.7601, 1.5136]) tensor([0.4631, 0.0564, 0.0448, 0.4357]) -Greedy action tensor([ 0.9723, 0.2136, -1.1969, 0.3671]) tensor([0.4698, 0.2200, 0.0537, 0.2565]) -Greedy action tensor([ 2.3238, -0.9168, -0.4241, 0.4602]) tensor([0.7947, 0.0311, 0.0509, 0.1233]) -Greedy action tensor([ 1.1409, -0.1215, -0.4357, 0.6340]) tensor([0.4780, 0.1353, 0.0988, 0.2879]) -Greedy action tensor([ 1.0724, -0.4124, -0.4358, 0.5496]) tensor([0.4900, 0.1110, 0.1084, 0.2905]) -Greedy action tensor([ 1.5446, -0.3888, -0.3144, 0.1357]) tensor([0.6473, 0.0936, 0.1009, 0.1582]) -Greedy action tensor([ 2.0707, -0.9425, -0.8191, 0.2701]) tensor([0.7874, 0.0387, 0.0438, 0.1301]) -Greedy action tensor([ 2.2050, -0.6829, -0.9596, 0.6231]) tensor([0.7672, 0.0427, 0.0324, 0.1577]) -Greedy action tensor([ 1.9342, 0.5032, -0.2771, 0.0246]) tensor([0.6681, 0.1597, 0.0732, 0.0990]) -Greedy action tensor([ 1.4914, -0.1846, -0.3425, 0.7332]) tensor([0.5508, 0.1031, 0.0880, 0.2581]) -Greedy action tensor([ 0.5224, 0.2187, 0.0365, -0.0111]) tensor([0.3402, 0.2511, 0.2092, 0.1995]) -Greedy action tensor([ 1.5614, -0.3899, -0.4474, 0.3006]) tensor([0.6412, 0.0911, 0.0860, 0.1817]) -Greedy action tensor([ 1.5136, -0.7862, -0.4161, 0.4015]) tensor([0.6352, 0.0637, 0.0922, 0.2089]) -Greedy action tensor([ 1.5379, -0.4619, -0.7960, 0.4353]) tensor([0.6393, 0.0865, 0.0620, 0.2122]) -Greedy action tensor([ 1.8860, 0.2910, -0.3018, 0.3896]) tensor([0.6498, 0.1318, 0.0729, 0.1455]) -Greedy action tensor([ 1.5170, 0.0397, -0.3355, -0.2041]) tensor([0.6394, 0.1459, 0.1003, 0.1144]) -Greedy action tensor([ 1.7087, 0.4175, -0.4388, 0.0234]) tensor([0.6341, 0.1743, 0.0740, 0.1175]) -Greedy action tensor([ 1.6568, -0.7725, -0.2640, 0.3234]) tensor([0.6675, 0.0588, 0.0978, 0.1759]) -Greedy action tensor([ 1.2600, -0.4617, -0.6329, 0.2784]) tensor([0.5868, 0.1049, 0.0884, 0.2199]) -Greedy action tensor([ 1.0582, -0.2754, -0.0573, 0.2399]) tensor([0.4920, 0.1297, 0.1613, 0.2171]) -Greedy action tensor([ 1.2667, -0.4311, -0.3234, 0.0562]) tensor([0.5935, 0.1087, 0.1210, 0.1769]) -Greedy action tensor([ 1.1210, -0.6782, -0.1490, 0.3257]) tensor([0.5270, 0.0872, 0.1480, 0.2379]) -Greedy action tensor([ 1.2499, -0.2347, -0.7475, 0.1580]) tensor([0.5890, 0.1335, 0.0799, 0.1976]) -Greedy action tensor([ 1.9457, -0.6448, 0.2873, 0.8050]) tensor([0.6309, 0.0473, 0.1202, 0.2016]) -Greedy action tensor([ 0.8658, -0.2719, -0.3313, -0.1456]) tensor([0.5034, 0.1614, 0.1521, 0.1831]) -Greedy action tensor([ 1.6522, -0.7242, -0.5408, 0.4542]) tensor([0.6639, 0.0617, 0.0741, 0.2004]) -Greedy action tensor([ 1.6781, -0.9987, -0.5616, 0.2127]) tensor([0.7111, 0.0489, 0.0757, 0.1643]) -Greedy action tensor([ 2.0465, -0.1659, -1.0598, 0.5643]) tensor([0.7239, 0.0792, 0.0324, 0.1644]) -Greedy action tensor([ 1.1733, -0.4041, -1.1999, 0.5151]) tensor([0.5502, 0.1136, 0.0513, 0.2849]) -Greedy action tensor([ 1.4173, -0.4987, -0.3619, 0.0685]) tensor([0.6347, 0.0934, 0.1071, 0.1647]) -Greedy action tensor([ 2.1650, -0.9384, -1.0627, 0.0190]) tensor([0.8323, 0.0374, 0.0330, 0.0973]) -Greedy action tensor([ 1.1483, -0.4497, -0.5526, 0.2868]) tensor([0.5533, 0.1119, 0.1010, 0.2338]) -Greedy action tensor([ 1.8578, -0.4068, -1.0236, 0.0136]) tensor([0.7587, 0.0788, 0.0425, 0.1200]) -Greedy action tensor([ 1.6572, -0.3835, -0.6318, 0.6235]) tensor([0.6301, 0.0819, 0.0639, 0.2241]) -Greedy action tensor([ 1.5922, -0.7537, -0.2667, 0.1740]) tensor([0.6695, 0.0641, 0.1043, 0.1621]) -Greedy action tensor([ 1.6838, -0.5959, -0.5376, 0.5468]) tensor([0.6529, 0.0668, 0.0708, 0.2095]) -Greedy action tensor([ 1.3373, -0.0676, -0.8814, 0.0398]) tensor([0.6145, 0.1508, 0.0668, 0.1679]) -Greedy action tensor([ 1.5576, -0.6746, -0.1100, 0.0559]) tensor([0.6584, 0.0706, 0.1243, 0.1467]) -Greedy action tensor([ 1.0571, -0.2025, -0.4338, 0.3241]) tensor([0.5027, 0.1426, 0.1132, 0.2415]) -Greedy action tensor([ 1.4727, 0.2345, -0.4921, 0.5469]) tensor([0.5476, 0.1587, 0.0768, 0.2170]) -Greedy action tensor([ 1.6551, -0.8971, -0.1978, 0.4756]) tensor([0.6485, 0.0505, 0.1017, 0.1993]) -Greedy action tensor([ 1.5198, -0.3466, -0.2613, 0.3021]) tensor([0.6177, 0.0955, 0.1040, 0.1828]) -Greedy action tensor([ 1.2098, -0.4756, -0.8722, 0.2401]) tensor([0.5920, 0.1097, 0.0738, 0.2245]) -Greedy action tensor([ 0.5218, -0.2770, -0.0563, -0.0913]) tensor([0.3918, 0.1762, 0.2198, 0.2122]) -Greedy action tensor([ 1.3618, -0.5500, -0.2125, 0.4552]) tensor([0.5685, 0.0840, 0.1178, 0.2296]) -Greedy action tensor([ 2.2288, -0.7955, -0.2489, 0.6252]) tensor([0.7498, 0.0364, 0.0629, 0.1508]) -Greedy action tensor([ 1.9134, -0.8972, -0.5644, 0.5661]) tensor([0.7122, 0.0429, 0.0598, 0.1851]) -Greedy action tensor([ 1.5035, -1.2886, -0.0566, 0.0679]) tensor([0.6625, 0.0406, 0.1392, 0.1577]) -Greedy action tensor([ 1.8078, -0.7880, -0.2243, 0.4227]) tensor([0.6868, 0.0512, 0.0900, 0.1719]) -Greedy action tensor([ 2.7895, -0.0607, -0.1714, 0.5223]) tensor([0.8243, 0.0477, 0.0427, 0.0854]) -Greedy action tensor([ 1.9121, -0.8438, -0.2220, 0.9001]) tensor([0.6471, 0.0411, 0.0766, 0.2352]) -Greedy action tensor([ 1.8965, -1.0208, -0.5882, 0.1947]) tensor([0.7577, 0.0410, 0.0632, 0.1382]) -Greedy action tensor([ 1.7747, -0.6015, -0.4755, 0.1743]) tensor([0.7142, 0.0664, 0.0753, 0.1441]) -Greedy action tensor([ 1.5012, -0.0988, -0.2922, 0.3420]) tensor([0.5945, 0.1200, 0.0989, 0.1865]) -Greedy action tensor([ 1.7349, -0.5656, -0.3922, 0.2903]) tensor([0.6872, 0.0689, 0.0819, 0.1621]) -Greedy action tensor([ 1.3128, -0.0464, -0.7495, 0.0676]) tensor([0.5981, 0.1536, 0.0761, 0.1722]) -Greedy action tensor([ 1.3176, -0.1165, -0.5554, 0.1747]) tensor([0.5845, 0.1393, 0.0898, 0.1864]) -Greedy action tensor([ 1.3547, -0.4207, -0.6099, 0.2635]) tensor([0.6077, 0.1030, 0.0852, 0.2041]) -Greedy action tensor([ 1.5940, 0.1753, -0.3143, 0.4245]) tensor([0.5879, 0.1423, 0.0872, 0.1826]) -Greedy action tensor([2.4145, 0.5697, 0.1803, 0.4310]) tensor([0.7129, 0.1127, 0.0763, 0.0981]) -Greedy action tensor([ 0.9752, -0.5202, -0.1890, -0.4411]) tensor([0.5621, 0.1260, 0.1755, 0.1364]) -Greedy action tensor([ 0.9127, -0.5438, -0.1336, -0.3982]) tensor([0.5394, 0.1257, 0.1895, 0.1454]) -Greedy action tensor([ 0.6761, -0.4548, -0.1371, -0.2182]) tensor([0.4598, 0.1484, 0.2039, 0.1880]) -Greedy action tensor([ 0.4478, -0.1006, 0.1185, -0.1129]) tensor([0.3487, 0.2015, 0.2508, 0.1990]) -Greedy action tensor([ 1.3163, -0.6330, -0.1848, -0.4140]) tensor([0.6483, 0.0923, 0.1445, 0.1149]) -Greedy action tensor([ 0.7819, -0.2290, -0.0730, -0.5170]) tensor([0.4850, 0.1765, 0.2063, 0.1323]) -Greedy action tensor([ 1.0335, -0.6778, 0.1432, -0.6260]) tensor([0.5614, 0.1014, 0.2304, 0.1068]) -Greedy action tensor([ 1.3031, -0.6888, -0.0839, -0.9854]) tensor([0.6722, 0.0917, 0.1679, 0.0682]) -Greedy action tensor([ 0.3264, -0.1861, -0.1252, -0.0265]) tensor([0.3403, 0.2039, 0.2167, 0.2391]) -Greedy action tensor([ 0.6714, -0.0063, -0.0148, 0.0626]) tensor([0.3913, 0.1987, 0.1970, 0.2129]) -Greedy action tensor([ 1.1142, -0.6200, 0.0794, -0.6415]) tensor([0.5866, 0.1036, 0.2084, 0.1014]) -Greedy action tensor([ 0.7003, -0.4401, -0.3923, -0.2120]) tensor([0.4862, 0.1555, 0.1631, 0.1953]) -Greedy action tensor([ 0.8931, -0.5523, -0.1087, -0.2469]) tensor([0.5201, 0.1226, 0.1910, 0.1663]) -Greedy action tensor([ 1.1617, -0.4550, -0.0633, -0.2842]) tensor([0.5788, 0.1149, 0.1700, 0.1363]) -Greedy action tensor([ 0.8699, -0.5269, -0.1482, -0.2861]) tensor([0.5199, 0.1286, 0.1878, 0.1636]) -Greedy action tensor([ 5.9160e-01, -2.5074e-03, -7.3370e-02, 1.8683e-04]) tensor([0.3817, 0.2107, 0.1963, 0.2113]) -Greedy action tensor([ 0.8644, -0.4306, -0.1627, -0.2911]) tensor([0.5136, 0.1407, 0.1839, 0.1618]) -Greedy action tensor([ 0.6177, -0.1495, 0.0104, -0.0550]) tensor([0.3969, 0.1843, 0.2163, 0.2026]) -Greedy action tensor([ 0.6081, -0.3184, -0.0285, -0.3052]) tensor([0.4299, 0.1702, 0.2274, 0.1725]) -Greedy action tensor([ 0.4983, -0.0782, -0.0036, -0.3172]) tensor([0.3832, 0.2153, 0.2320, 0.1695]) -Greedy action tensor([ 0.8998, -0.7106, 0.0958, -0.3695]) tensor([0.5186, 0.1036, 0.2321, 0.1457]) -Greedy action tensor([ 1.0252, -0.6135, -0.0902, -0.2890]) tensor([0.5584, 0.1085, 0.1830, 0.1500]) -Greedy action tensor([ 1.1600, -0.8166, 0.0594, -0.4992]) tensor([0.6019, 0.0834, 0.2002, 0.1145]) -Greedy action tensor([ 0.3089, 0.0705, -0.2707, -0.3379]) tensor([0.3482, 0.2744, 0.1950, 0.1824]) -Greedy action tensor([ 0.5511, -0.0738, 0.1291, -0.4457]) tensor([0.3906, 0.2091, 0.2561, 0.1442]) -Greedy action tensor([ 0.8867, -0.4618, -0.0775, -0.2643]) tensor([0.5109, 0.1327, 0.1948, 0.1616]) -Greedy action tensor([ 0.7688, -0.5737, -0.0845, -0.1173]) tensor([0.4763, 0.1244, 0.2029, 0.1964]) -Greedy action tensor([ 1.2288, -0.8188, -0.0672, -0.8670]) tensor([0.6555, 0.0846, 0.1794, 0.0806]) -Greedy action tensor([ 0.8920, -0.7081, -0.0680, -0.3444]) tensor([0.5333, 0.1077, 0.2042, 0.1549]) -Greedy action tensor([ 1.0079, -0.8035, -0.0727, -0.3757]) tensor([0.5703, 0.0932, 0.1935, 0.1430]) -Greedy action tensor([ 0.5736, -0.1948, -0.0372, -0.1714]) tensor([0.4030, 0.1869, 0.2188, 0.1913]) -Greedy action tensor([ 0.8983, -0.4827, -0.0522, -0.2655]) tensor([0.5128, 0.1289, 0.1982, 0.1601]) -Greedy action tensor([ 0.5167, -0.4538, -0.0100, -0.1952]) tensor([0.4065, 0.1540, 0.2400, 0.1995]) -Greedy action tensor([ 0.8323, -0.7512, 0.1298, -0.3536]) tensor([0.4985, 0.1023, 0.2469, 0.1523]) -Greedy action tensor([ 0.7586, -0.3333, 0.0367, -0.3923]) tensor([0.4678, 0.1570, 0.2273, 0.1480]) -Greedy action tensor([ 1.1208, -1.1275, 0.1944, -0.7475]) tensor([0.6039, 0.0638, 0.2391, 0.0932]) -Greedy action tensor([ 0.5108, -0.3954, -0.0613, -0.1737]) tensor([0.4044, 0.1634, 0.2282, 0.2040]) -Greedy action tensor([ 0.9369, -0.4745, 0.3042, -0.3325]) tensor([0.4864, 0.1186, 0.2583, 0.1367]) -Greedy action tensor([ 1.0732, -1.0393, -0.0054, -0.5742]) tensor([0.6048, 0.0731, 0.2057, 0.1164]) -Greedy action tensor([ 0.7065, -0.0607, -0.0459, -0.0495]) tensor([0.4158, 0.1931, 0.1959, 0.1952]) -Greedy action tensor([ 0.6447, -0.1865, 0.0429, -0.3062]) tensor([0.4220, 0.1838, 0.2312, 0.1631]) -Greedy action tensor([ 0.3801, 0.1320, 0.0821, -0.2510]) tensor([0.3274, 0.2554, 0.2430, 0.1742]) -Greedy action tensor([ 0.7663, -0.1963, -0.0612, -0.2735]) tensor([0.4603, 0.1758, 0.2012, 0.1627]) -Greedy action tensor([ 0.5446, -0.2488, 0.0403, -0.0720]) tensor([0.3852, 0.1742, 0.2326, 0.2079]) -Greedy action tensor([ 0.6518, -0.3621, 0.2739, -0.3838]) tensor([0.4161, 0.1510, 0.2852, 0.1477]) -Greedy action tensor([ 0.8782, -0.5208, -0.1083, -0.2282]) tensor([0.5127, 0.1266, 0.1912, 0.1696]) -Greedy action tensor([ 1.0980, -0.7179, -0.1278, -0.5474]) tensor([0.6064, 0.0987, 0.1780, 0.1170]) -Greedy action tensor([ 0.5383, -0.3708, -0.0513, -0.0687]) tensor([0.3996, 0.1610, 0.2216, 0.2178]) -Greedy action tensor([ 1.3147, -0.8708, 0.1080, -0.6675]) tensor([0.6454, 0.0726, 0.1931, 0.0889]) -Greedy action tensor([ 0.3603, -0.3882, -0.1765, -0.0460]) tensor([0.3671, 0.1737, 0.2146, 0.2446]) -Greedy action tensor([ 0.8973, -0.4037, -0.3077, -0.3763]) tensor([0.5400, 0.1470, 0.1618, 0.1511]) -Greedy action tensor([ 0.9001, -0.6062, 0.0731, -0.6284]) tensor([0.5331, 0.1182, 0.2331, 0.1156]) -Greedy action tensor([ 0.7684, -0.6504, -0.2486, -0.4884]) tensor([0.5296, 0.1282, 0.1915, 0.1507]) -Greedy action tensor([ 1.1364, -0.5333, -0.0408, -0.2979]) tensor([0.5765, 0.1086, 0.1776, 0.1374]) -Greedy action tensor([ 0.6436, -0.7747, -0.1602, -0.1906]) tensor([0.4708, 0.1140, 0.2107, 0.2044]) -Greedy action tensor([ 0.8591, -0.5383, -0.0685, -0.2917]) tensor([0.5104, 0.1262, 0.2019, 0.1615]) -Greedy action tensor([ 0.5861, -0.2349, -0.1309, -0.0184]) tensor([0.4041, 0.1778, 0.1973, 0.2208]) -Greedy action tensor([ 1.1418, -0.7613, -0.1146, -0.7766]) tensor([0.6327, 0.0943, 0.1801, 0.0929]) -Greedy action tensor([ 1.0094, -0.5175, 0.1275, -0.3580]) tensor([0.5302, 0.1152, 0.2195, 0.1351]) -Greedy action tensor([ 1.3264, -0.6713, -0.2830, -0.8808]) tensor([0.6917, 0.0938, 0.1384, 0.0761]) -Greedy action tensor([ 0.9902, -0.7825, 0.0895, -0.5779]) tensor([0.5603, 0.0952, 0.2277, 0.1168]) -Greedy action tensor([ 0.8788, -0.3566, 0.0848, -0.2504]) tensor([0.4840, 0.1407, 0.2188, 0.1565]) -Greedy action tensor([ 0.4654, 0.0070, -0.0252, -0.0278]) tensor([0.3502, 0.2214, 0.2144, 0.2139]) -Greedy action tensor([ 0.7237, -0.8067, -0.1954, -0.1843]) tensor([0.4954, 0.1072, 0.1976, 0.1998]) -Greedy action tensor([ 0.7571, -0.2023, 0.0457, -0.1728]) tensor([0.4408, 0.1689, 0.2164, 0.1739]) -Greedy action tensor([ 0.3166, 0.0128, -0.0878, -0.2149]) tensor([0.3341, 0.2466, 0.2230, 0.1964]) -Greedy action tensor([ 1.0962, -0.8305, 0.0400, -0.7078]) tensor([0.6031, 0.0878, 0.2098, 0.0993]) -Greedy action tensor([ 0.7282, -0.3119, -0.1494, -0.2739]) tensor([0.4681, 0.1654, 0.1946, 0.1718]) -Greedy action tensor([ 0.6570, -0.2408, -0.0028, -0.1326]) tensor([0.4205, 0.1713, 0.2174, 0.1909]) -Greedy action tensor([ 0.6677, -0.6132, 0.2639, -0.5390]) tensor([0.4455, 0.1237, 0.2975, 0.1333]) -Greedy action tensor([ 0.7927, -0.6255, -0.1882, -0.2782]) tensor([0.5103, 0.1236, 0.1913, 0.1749]) -Greedy action tensor([0.7985, 0.0816, 0.0743, 0.0443]) tensor([0.4093, 0.1998, 0.1984, 0.1925]) -Greedy action tensor([ 0.9743, -0.8242, 0.0950, -0.5133]) tensor([0.5535, 0.0916, 0.2298, 0.1251]) -Greedy action tensor([ 0.8955, -0.7859, -0.0452, -0.3515]) tensor([0.5365, 0.0999, 0.2094, 0.1542]) -Greedy action tensor([ 0.6758, 0.0018, -0.1408, 0.0348]) tensor([0.4035, 0.2056, 0.1783, 0.2125]) -Greedy action tensor([ 0.3644, 0.0300, 0.0076, -0.2292]) tensor([0.3369, 0.2411, 0.2358, 0.1861]) -Greedy action tensor([ 1.1077, -0.6808, -0.0511, -0.5181]) tensor([0.5960, 0.0997, 0.1871, 0.1173]) -Greedy action tensor([ 0.6265, -0.0158, -0.0802, 0.0062]) tensor([0.3911, 0.2057, 0.1929, 0.2103]) -Greedy action tensor([ 0.7066, -0.6027, 0.0289, -0.6791]) tensor([0.4931, 0.1331, 0.2504, 0.1234]) -Greedy action tensor([ 0.3014, -0.2711, -0.0283, -0.2413]) tensor([0.3491, 0.1969, 0.2511, 0.2029]) -Greedy action tensor([ 0.4872, -0.3058, -0.0752, -0.2327]) tensor([0.3985, 0.1803, 0.2271, 0.1940]) -Greedy action tensor([-1.7681, -0.1246, 0.5976, -0.2524]) tensor([0.0468, 0.2420, 0.4983, 0.2130]) -Greedy action tensor([-1.2340, -0.0898, 0.1023, -0.0664]) tensor([0.0896, 0.2814, 0.3410, 0.2880]) -Greedy action tensor([-0.3220, 0.8802, 0.2380, 0.4961]) tensor([0.1198, 0.3988, 0.2098, 0.2716]) -Greedy action tensor([-1.9284, -0.4468, 0.6594, -0.1737]) tensor([0.0408, 0.1797, 0.5433, 0.2362]) -Greedy action tensor([-1.6697, -0.1867, 0.5007, -0.0820]) tensor([0.0525, 0.2312, 0.4597, 0.2567]) -Greedy action tensor([-1.5769, -0.1178, 0.4193, -0.0225]) tensor([0.0575, 0.2473, 0.4232, 0.2720]) -Greedy action tensor([-0.6339, 0.4975, -0.0514, -0.2217]) tensor([0.1351, 0.4189, 0.2419, 0.2041]) -Greedy action tensor([-1.0874, 0.4576, 0.2451, -0.0617]) tensor([0.0815, 0.3821, 0.3090, 0.2274]) -Greedy action tensor([-1.8967, -0.4504, 0.6624, -0.1278]) tensor([0.0416, 0.1767, 0.5377, 0.2440]) -Greedy action tensor([ 0.1603, -0.6378, 0.7186, 0.7711]) tensor([0.1984, 0.0893, 0.3468, 0.3655]) -Greedy action tensor([-0.5699, 0.9617, 0.0194, 0.3332]) tensor([0.1011, 0.4674, 0.1822, 0.2493]) -Greedy action tensor([-1.5383, -0.5163, 0.4534, 0.0825]) tensor([0.0619, 0.1719, 0.4533, 0.3129]) -Greedy action tensor([-1.8847, -0.4449, 0.6393, -0.1579]) tensor([0.0429, 0.1809, 0.5351, 0.2411]) -Greedy action tensor([-1.8632, -0.3591, 0.6119, -0.1407]) tensor([0.0435, 0.1958, 0.5171, 0.2436]) -Greedy action tensor([-1.5156, -0.3507, 0.5454, 0.1242]) tensor([0.0581, 0.1862, 0.4563, 0.2994]) -Greedy action tensor([-1.8538, -0.2029, 0.6014, -0.1235]) tensor([0.0425, 0.2218, 0.4956, 0.2401]) -Greedy action tensor([-1.9293, -0.4115, 0.6514, -0.1787]) tensor([0.0408, 0.1860, 0.5384, 0.2348]) -Greedy action tensor([-0.8853, 0.0181, 0.3179, -0.2992]) tensor([0.1163, 0.2871, 0.3875, 0.2091]) -Greedy action tensor([-1.7623, -0.4731, 0.5817, -0.0798]) tensor([0.0489, 0.1777, 0.5101, 0.2633]) -Greedy action tensor([-1.8192, -0.4877, 0.6047, -0.1358]) tensor([0.0466, 0.1764, 0.5261, 0.2509]) -Greedy action tensor([-1.9340, -0.4391, 0.6630, -0.1739]) tensor([0.0405, 0.1806, 0.5436, 0.2354]) -Greedy action tensor([-1.8862, -0.4438, 0.6428, -0.1535]) tensor([0.0427, 0.1806, 0.5353, 0.2414]) -Greedy action tensor([-1.2244, -0.5315, 0.3467, -0.0109]) tensor([0.0895, 0.1789, 0.4305, 0.3011]) -Greedy action tensor([-1.4735, 0.1211, 0.4665, 0.0558]) tensor([0.0571, 0.2815, 0.3976, 0.2637]) -Greedy action tensor([-1.8254, -0.2986, 0.6035, -0.1403]) tensor([0.0448, 0.2060, 0.5078, 0.2414]) -Greedy action tensor([-1.7167, -0.2644, 0.5928, -0.0282]) tensor([0.0482, 0.2059, 0.4852, 0.2608]) -Greedy action tensor([-0.8819, 0.0534, 0.0890, 0.5682]) tensor([0.0957, 0.2438, 0.2526, 0.4079]) -Greedy action tensor([-1.9236, -0.4466, 0.6575, -0.1729]) tensor([0.0411, 0.1799, 0.5426, 0.2365]) -Greedy action tensor([-1.8389, -0.4772, 0.6067, -0.1235]) tensor([0.0455, 0.1774, 0.5244, 0.2527]) -Greedy action tensor([-1.8091, -0.3270, 0.6448, -0.0600]) tensor([0.0439, 0.1932, 0.5106, 0.2523]) -Greedy action tensor([-1.8633, -0.4206, 0.6250, -0.1439]) tensor([0.0438, 0.1852, 0.5269, 0.2442]) -Greedy action tensor([-1.8270, -0.4498, 0.6098, -0.1519]) tensor([0.0460, 0.1823, 0.5261, 0.2456]) -Greedy action tensor([-1.8791, -0.3533, 0.6170, -0.1473]) tensor([0.0428, 0.1967, 0.5189, 0.2416]) -Greedy action tensor([-1.9343, -0.4144, 0.6529, -0.1773]) tensor([0.0406, 0.1854, 0.5391, 0.2350]) -Greedy action tensor([-1.7935, -0.4324, 0.6564, -0.0171]) tensor([0.0447, 0.1742, 0.5174, 0.2638]) -Greedy action tensor([-1.5535, -0.4061, 0.4562, 0.0268]) tensor([0.0607, 0.1913, 0.4531, 0.2949]) -Greedy action tensor([-1.9184, -0.4394, 0.6543, -0.1669]) tensor([0.0412, 0.1810, 0.5402, 0.2376]) -Greedy action tensor([-0.6892, -1.1848, 0.5960, -0.5302]) tensor([0.1563, 0.0952, 0.5652, 0.1833]) -Greedy action tensor([-1.7249, -0.3260, 0.5736, -0.0238]) tensor([0.0488, 0.1977, 0.4861, 0.2674]) -Greedy action tensor([-1.9462, -0.4429, 0.6651, -0.1814]) tensor([0.0401, 0.1802, 0.5457, 0.2340]) -Greedy action tensor([-1.5888, -0.5128, 0.4834, 0.0087]) tensor([0.0595, 0.1744, 0.4723, 0.2938]) -Greedy action tensor([-1.8593, -0.4502, 0.6267, -0.1377]) tensor([0.0441, 0.1803, 0.5292, 0.2464]) -Greedy action tensor([-1.9197, -0.4146, 0.6563, -0.1679]) tensor([0.0410, 0.1845, 0.5384, 0.2361]) -Greedy action tensor([-1.7804, -0.4062, 0.5801, -0.1072]) tensor([0.0479, 0.1893, 0.5076, 0.2553]) -Greedy action tensor([-1.4739, -0.0789, 0.4077, -0.0217]) tensor([0.0630, 0.2542, 0.4136, 0.2692]) -Greedy action tensor([-1.1665, -0.0240, 0.2246, 0.0819]) tensor([0.0859, 0.2693, 0.3453, 0.2994]) -Greedy action tensor([-1.1858, -0.6328, 0.2623, -0.6963]) tensor([0.1159, 0.2016, 0.4933, 0.1892]) -Greedy action tensor([-1.8196, -0.0342, -0.2813, -0.4657]) tensor([0.0646, 0.3849, 0.3006, 0.2500]) -Greedy action tensor([-1.8218, -0.1255, 0.5633, -0.0810]) tensor([0.0434, 0.2369, 0.4719, 0.2477]) -Greedy action tensor([-1.0290, 0.1235, 0.2440, 0.0387]) tensor([0.0939, 0.2974, 0.3355, 0.2732]) -Greedy action tensor([-1.2530, -0.3220, 0.3841, -0.0962]) tensor([0.0843, 0.2140, 0.4335, 0.2682]) -Greedy action tensor([-1.0171, -0.1502, 0.3055, -0.1411]) tensor([0.1049, 0.2496, 0.3936, 0.2519]) -Greedy action tensor([-1.7938, -0.4347, 0.5905, -0.1172]) tensor([0.0474, 0.1846, 0.5145, 0.2535]) -Greedy action tensor([-1.8467, -0.4563, 0.6503, -0.0918]) tensor([0.0436, 0.1750, 0.5293, 0.2520]) -Greedy action tensor([-1.9369, -0.4453, 0.6641, -0.1760]) tensor([0.0404, 0.1796, 0.5448, 0.2352]) -Greedy action tensor([-1.8629, -0.1664, 0.6155, -0.2129]) tensor([0.0424, 0.2313, 0.5055, 0.2208]) -Greedy action tensor([-1.5983, 0.2958, 0.3939, -0.0260]) tensor([0.0505, 0.3357, 0.3704, 0.2434]) -Greedy action tensor([-1.8973, -0.3710, 0.6338, -0.1541]) tensor([0.0419, 0.1926, 0.5262, 0.2393]) -Greedy action tensor([ 0.4091, 1.2372, -0.0028, 0.3984]) tensor([0.2024, 0.4633, 0.1341, 0.2002]) -Greedy action tensor([-1.9226, -0.3487, 0.6398, -0.1666]) tensor([0.0407, 0.1963, 0.5275, 0.2355]) -Greedy action tensor([0.5532, 0.8128, 0.0175, 0.2021]) tensor([0.2789, 0.3616, 0.1632, 0.1963]) -Greedy action tensor([-1.9148, -0.4544, 0.6572, -0.1661]) tensor([0.0414, 0.1784, 0.5422, 0.2380]) -Greedy action tensor([-1.9210, -0.4544, 0.6602, -0.1692]) tensor([0.0411, 0.1783, 0.5435, 0.2371]) -Greedy action tensor([-1.8740, -0.4292, 0.6471, -0.1514]) tensor([0.0430, 0.1822, 0.5344, 0.2405]) -Greedy action tensor([-1.9347, -0.4265, 0.6589, -0.1731]) tensor([0.0405, 0.1828, 0.5412, 0.2355]) -Greedy action tensor([-1.5199, -0.1584, 0.3915, 0.0394]) tensor([0.0609, 0.2376, 0.4119, 0.2896]) -Greedy action tensor([-1.7964, -0.0842, 0.5608, -0.1875]) tensor([0.0452, 0.2507, 0.4779, 0.2261]) -Greedy action tensor([-1.9143, -0.4348, 0.6522, -0.1643]) tensor([0.0414, 0.1817, 0.5388, 0.2381]) -Greedy action tensor([-1.8847, -0.4196, 0.6336, -0.1479]) tensor([0.0427, 0.1848, 0.5299, 0.2425]) -Greedy action tensor([-1.8795, -0.3506, 0.6174, -0.1451]) tensor([0.0427, 0.1970, 0.5185, 0.2419]) -Greedy action tensor([-1.8819, -0.2834, 0.6177, -0.1471]) tensor([0.0420, 0.2079, 0.5119, 0.2382]) -Greedy action tensor([-0.8396, -0.4175, 0.7780, 1.0094]) tensor([0.0718, 0.1096, 0.3621, 0.4565]) -Greedy action tensor([-0.5834, 0.4237, 0.0939, -0.0294]) tensor([0.1343, 0.3676, 0.2644, 0.2337]) -Greedy action tensor([-1.8973, -0.4318, 0.6433, -0.1557]) tensor([0.0422, 0.1825, 0.5348, 0.2405]) -Greedy action tensor([-1.8163, -0.4790, 0.6487, -0.0240]) tensor([0.0443, 0.1687, 0.5211, 0.2659]) -Greedy action tensor([-1.7805, -0.3242, 0.5815, -0.1237]) tensor([0.0473, 0.2029, 0.5019, 0.2479]) -Greedy action tensor([-1.6888, 0.4281, 0.4809, -0.1947]) tensor([0.0444, 0.3689, 0.3889, 0.1979]) -Greedy action tensor([-1.6395, -0.3948, 0.5972, 0.0698]) tensor([0.0517, 0.1793, 0.4836, 0.2854]) -Greedy action tensor([-1.9149, -0.4028, 0.6511, -0.1507]) tensor([0.0410, 0.1860, 0.5336, 0.2393]) -Greedy action tensor([ 0.4328, -0.2203, 0.1179, -0.1020]) tensor([0.3526, 0.1835, 0.2573, 0.2065]) -Greedy action tensor([-1.9212, -0.4155, 0.6561, -0.1672]) tensor([0.0409, 0.1844, 0.5384, 0.2363]) -Greedy action tensor([ 0.5759, -1.0907, 0.7413, 0.1132]) tensor([0.3335, 0.0630, 0.3935, 0.2100]) -Greedy action tensor([ 0.8990, -1.6883, -0.0592, -0.6967]) tensor([0.6018, 0.0453, 0.2308, 0.1220]) -Greedy action tensor([ 0.7608, -1.0011, 0.2698, 1.0949]) tensor([0.3144, 0.0540, 0.1924, 0.4391]) -Greedy action tensor([-0.3084, -0.9523, 0.4358, -1.0836]) tensor([0.2445, 0.1284, 0.5145, 0.1126]) -Greedy action tensor([-0.2517, -1.3520, -0.3056, 0.0878]) tensor([0.2714, 0.0903, 0.2572, 0.3811]) -Greedy action tensor([ 0.0890, -0.0205, -0.2545, -0.1453]) tensor([0.2944, 0.2639, 0.2088, 0.2329]) -Greedy action tensor([-0.1919, -0.6449, -0.0150, -0.4429]) tensor([0.2772, 0.1762, 0.3309, 0.2157]) -Greedy action tensor([ 0.1082, -0.7243, 1.4920, 0.4231]) tensor([0.1472, 0.0640, 0.5872, 0.2016]) -Greedy action tensor([ 0.7858, -1.2870, 0.8468, 0.1733]) tensor([0.3662, 0.0461, 0.3892, 0.1985]) -Greedy action tensor([ 1.4618, -0.3907, 0.7173, -0.1808]) tensor([0.5479, 0.0859, 0.2602, 0.1060]) -Greedy action tensor([ 0.7275, -2.1126, 0.9883, 0.0712]) tensor([0.3478, 0.0203, 0.4514, 0.1804]) -Greedy action tensor([ 0.9412, 0.8435, -0.7055, -0.0601]) tensor([0.4054, 0.3676, 0.0781, 0.1489]) -Greedy action tensor([ 0.5356, -1.0859, 0.2962, -0.4482]) tensor([0.4240, 0.0838, 0.3337, 0.1585]) -Greedy action tensor([ 0.1304, -0.0646, -0.7303, 0.0040]) tensor([0.3198, 0.2631, 0.1352, 0.2818]) -Greedy action tensor([-0.1895, -0.7273, -0.5840, -0.2276]) tensor([0.3105, 0.1813, 0.2093, 0.2989]) -Greedy action tensor([-0.2442, -0.2493, 0.6810, -0.7049]) tensor([0.1943, 0.1933, 0.4900, 0.1225]) -Greedy action tensor([ 0.1533, 0.1320, -0.2411, -0.8734]) tensor([0.3321, 0.3251, 0.2239, 0.1190]) -Greedy action tensor([ 0.7069, -0.9377, 0.0652, -0.3709]) tensor([0.4855, 0.0937, 0.2556, 0.1652]) -Greedy action tensor([-0.2020, -0.2728, -0.6735, -0.3210]) tensor([0.2904, 0.2706, 0.1812, 0.2578]) -Greedy action tensor([-0.4965, -0.5793, -0.2415, -0.9154]) tensor([0.2585, 0.2379, 0.3336, 0.1700]) -Greedy action tensor([-0.0773, -0.0231, -0.0804, -0.4447]) tensor([0.2670, 0.2819, 0.2662, 0.1849]) -Greedy action tensor([-0.5742, -0.0763, -0.1918, -0.5417]) tensor([0.1944, 0.3198, 0.2849, 0.2008]) -Greedy action tensor([-0.4965, 0.2273, 0.1536, -0.3903]) tensor([0.1642, 0.3386, 0.3146, 0.1826]) -Greedy action tensor([ 0.0251, 0.3611, -1.1056, -0.5515]) tensor([0.3045, 0.4261, 0.0983, 0.1711]) -Greedy action tensor([ 0.4514, -0.8524, 0.0249, 0.2082]) tensor([0.3692, 0.1002, 0.2410, 0.2895]) -Greedy action tensor([-0.6105, -1.3411, -0.4304, -0.8517]) tensor([0.2886, 0.1390, 0.3456, 0.2268]) -Greedy action tensor([ 0.2919, 0.0018, -0.1317, -0.0104]) tensor([0.3183, 0.2381, 0.2084, 0.2352]) -Greedy action tensor([ 0.3938, -0.2789, 0.4444, -0.2509]) tensor([0.3239, 0.1653, 0.3407, 0.1700]) -Greedy action tensor([ 0.2344, -0.3167, 0.0556, -0.9403]) tensor([0.3675, 0.2118, 0.3073, 0.1135]) -Greedy action tensor([ 0.4668, -0.1793, -0.0357, -0.7212]) tensor([0.4108, 0.2153, 0.2486, 0.1252]) -Greedy action tensor([-0.7131, 0.0996, 0.3552, 0.2129]) tensor([0.1151, 0.2594, 0.3350, 0.2905]) -Greedy action tensor([ 0.1849, -1.1454, 0.7232, -0.3272]) tensor([0.2796, 0.0739, 0.4790, 0.1675]) -Greedy action tensor([-0.1814, -0.6686, -0.4752, -0.5924]) tensor([0.3308, 0.2032, 0.2466, 0.2193]) -Greedy action tensor([-1.1054, -1.3198, 1.1044, -1.0782]) tensor([0.0837, 0.0675, 0.7628, 0.0860]) -Greedy action tensor([ 0.6679, -0.3894, 0.3340, 0.5137]) tensor([0.3424, 0.1190, 0.2452, 0.2935]) -Greedy action tensor([-0.1526, 0.5596, 0.5050, -0.4308]) tensor([0.1746, 0.3560, 0.3371, 0.1322]) -Greedy action tensor([-0.4145, -1.1538, -0.0120, 0.5511]) tensor([0.1786, 0.0853, 0.2671, 0.4690]) -Greedy action tensor([ 0.6293, -0.4758, -0.0623, -0.5692]) tensor([0.4687, 0.1552, 0.2347, 0.1414]) -Greedy action tensor([0.4099, 0.1367, 0.5822, 0.4550]) tensor([0.2503, 0.1905, 0.2974, 0.2619]) -Greedy action tensor([ 0.0121, -0.1323, -0.5303, -0.9135]) tensor([0.3517, 0.3044, 0.2045, 0.1394]) -Greedy action tensor([-0.6460, 0.7179, 0.1115, -0.6173]) tensor([0.1239, 0.4845, 0.2642, 0.1275]) -Greedy action tensor([-0.1162, -1.0290, -0.4324, -0.1860]) tensor([0.3265, 0.1310, 0.2380, 0.3045]) -Greedy action tensor([ 0.6308, -1.0394, -0.1644, -0.2565]) tensor([0.4875, 0.0917, 0.2201, 0.2007]) -Greedy action tensor([-0.0101, -0.1787, 0.3363, 0.0767]) tensor([0.2299, 0.1942, 0.3251, 0.2508]) -Greedy action tensor([-0.0225, -1.5005, -0.4468, -0.3280]) tensor([0.3818, 0.0871, 0.2498, 0.2813]) -Greedy action tensor([ 0.1647, -0.0599, 0.4564, -0.1336]) tensor([0.2578, 0.2059, 0.3451, 0.1913]) -Greedy action tensor([-0.4239, -0.5849, -0.6038, 0.0959]) tensor([0.2289, 0.1949, 0.1912, 0.3850]) -Greedy action tensor([ 0.1549, -0.3881, 0.1878, -0.6960]) tensor([0.3288, 0.1910, 0.3398, 0.1404]) -Greedy action tensor([-0.6001, -0.0256, 0.4817, -0.9093]) tensor([0.1548, 0.2749, 0.4566, 0.1136]) -Greedy action tensor([-1.1332, 0.2404, -0.0983, -0.4441]) tensor([0.1025, 0.4048, 0.2885, 0.2042]) -Greedy action tensor([-0.1830, -1.2674, 0.4427, -0.6840]) tensor([0.2622, 0.0887, 0.4902, 0.1589]) -Greedy action tensor([-0.1396, 0.4615, -0.2919, -0.7199]) tensor([0.2357, 0.4300, 0.2024, 0.1319]) -Greedy action tensor([-0.9364, 0.1999, 0.2392, -0.0625]) tensor([0.1025, 0.3194, 0.3323, 0.2457]) -Greedy action tensor([ 0.8036, 0.2820, -0.5632, -0.4769]) tensor([0.4703, 0.2791, 0.1199, 0.1307]) -Greedy action tensor([-0.3567, 0.2386, -0.2457, 0.1365]) tensor([0.1796, 0.3257, 0.2007, 0.2941]) -Greedy action tensor([-1.4274, -0.5946, 0.0945, -1.3379]) tensor([0.1114, 0.2563, 0.5104, 0.1219]) -Greedy action tensor([-0.0278, -0.7918, 0.1054, 0.5284]) tensor([0.2298, 0.1070, 0.2625, 0.4007]) -Greedy action tensor([-0.7568, 0.1095, 0.3741, -1.4392]) tensor([0.1432, 0.3406, 0.4438, 0.0724]) -Greedy action tensor([ 0.3732, 0.0158, -0.1905, -0.6529]) tensor([0.3807, 0.2663, 0.2166, 0.1364]) -Greedy action tensor([ 0.1122, -1.7717, 0.5463, -0.4256]) tensor([0.3049, 0.0463, 0.4707, 0.1781]) -Greedy action tensor([-1.0813, -0.6536, -0.5251, -1.6086]) tensor([0.2054, 0.3151, 0.3583, 0.1212]) -Greedy action tensor([ 0.5652, -0.6302, 0.0564, -0.1590]) tensor([0.4187, 0.1267, 0.2517, 0.2029]) -Greedy action tensor([ 0.2795, -1.0479, -0.6484, 0.3020]) tensor([0.3727, 0.0988, 0.1473, 0.3812]) -Greedy action tensor([-0.0451, -1.3379, 0.0831, -0.3751]) tensor([0.3195, 0.0877, 0.3632, 0.2297]) -Greedy action tensor([-0.4261, -0.4311, 0.1711, -0.8290]) tensor([0.2232, 0.2221, 0.4055, 0.1492]) -Greedy action tensor([1.5867, 0.2977, 0.3185, 0.2150]) tensor([0.5523, 0.1522, 0.1554, 0.1401]) -Greedy action tensor([-0.2787, -0.6664, -0.2490, -0.6575]) tensor([0.2947, 0.2000, 0.3036, 0.2018]) -Greedy action tensor([-0.0967, -0.5824, -0.5077, -0.0032]) tensor([0.2962, 0.1822, 0.1964, 0.3252]) -Greedy action tensor([-0.3705, -0.5046, -0.9316, -1.0344]) tensor([0.3378, 0.2954, 0.1928, 0.1739]) -Greedy action tensor([ 0.0285, 0.4009, -0.1311, -0.2266]) tensor([0.2452, 0.3558, 0.2090, 0.1900]) -Greedy action tensor([-1.5720, -0.5226, -0.0215, -1.4481]) tensor([0.1031, 0.2944, 0.4859, 0.1167]) -Greedy action tensor([ 0.6934, -0.0578, -0.1297, -0.0794]) tensor([0.4215, 0.1988, 0.1851, 0.1946]) -Greedy action tensor([-0.5495, -0.0554, -0.9580, 0.3993]) tensor([0.1699, 0.2785, 0.1129, 0.4387]) -Greedy action tensor([ 0.1179, 0.8626, -0.5670, -0.9073]) tensor([0.2520, 0.5306, 0.1270, 0.0904]) -Greedy action tensor([-0.3728, 0.6844, -0.8345, -0.5301]) tensor([0.1865, 0.5367, 0.1175, 0.1593]) -Greedy action tensor([ 0.1682, 0.7626, 0.1376, -0.0358]) tensor([0.2175, 0.3941, 0.2110, 0.1774]) -Greedy action tensor([ 0.0151, -0.5962, 0.4159, -0.0312]) tensor([0.2506, 0.1360, 0.3742, 0.2393]) -Greedy action tensor([ 0.3889, -1.4136, -0.3602, 0.3927]) tensor([0.3786, 0.0624, 0.1790, 0.3800]) -Greedy action tensor([-0.4862, -1.1495, -0.3525, -0.3510]) tensor([0.2629, 0.1355, 0.3006, 0.3010]) -Greedy action tensor([ 1.4126, -0.7541, 1.6245, 1.4626]) tensor([0.2940, 0.0337, 0.3633, 0.3090]) -Greedy action tensor([ 1.6659, -0.3858, 1.3216, -0.4122]) tensor([0.5096, 0.0655, 0.3611, 0.0638]) -Greedy action tensor([ 0.0316, 0.1619, 0.2353, -0.4375]) tensor([0.2506, 0.2855, 0.3072, 0.1568]) -Greedy action tensor([ 1.3778, -0.5424, -0.4108, 0.5916]) tensor([0.5652, 0.0828, 0.0945, 0.2575]) -Greedy action tensor([ 0.4529, -0.0333, 0.2207, 0.0566]) tensor([0.3246, 0.1996, 0.2574, 0.2184]) -Greedy action tensor([ 1.3241, -0.7545, 0.0894, 0.3749]) tensor([0.5546, 0.0694, 0.1613, 0.2147]) -Greedy action tensor([ 1.7605, -0.3354, -0.4607, 0.2875]) tensor([0.6846, 0.0842, 0.0743, 0.1569]) -Greedy action tensor([ 1.9485, -0.9644, -0.8586, 0.5516]) tensor([0.7342, 0.0399, 0.0443, 0.1816]) -Greedy action tensor([ 1.0413, -0.1059, -0.8658, 0.2509]) tensor([0.5209, 0.1654, 0.0774, 0.2363]) -Greedy action tensor([ 1.0531, -0.6280, -0.0915, 0.8091]) tensor([0.4371, 0.0814, 0.1391, 0.3424]) -Greedy action tensor([ 1.3555, -0.3137, -0.4264, 0.1603]) tensor([0.6027, 0.1135, 0.1014, 0.1824]) -Greedy action tensor([ 1.4008, -0.4836, -0.3587, 0.1450]) tensor([0.6215, 0.0944, 0.1070, 0.1770]) -Greedy action tensor([ 1.2500, -0.3467, -0.6017, 0.8380]) tensor([0.4946, 0.1002, 0.0776, 0.3276]) -Greedy action tensor([ 1.7721, -0.5896, -0.5188, 0.7243]) tensor([0.6468, 0.0610, 0.0654, 0.2268]) -Greedy action tensor([ 1.2595, -0.2982, -0.3463, 0.3664]) tensor([0.5492, 0.1157, 0.1102, 0.2248]) -Greedy action tensor([ 1.7018, -0.6729, -0.6808, 0.2806]) tensor([0.7009, 0.0652, 0.0647, 0.1692]) -Greedy action tensor([ 1.4784, -0.5730, -0.4501, 0.7585]) tensor([0.5680, 0.0730, 0.0826, 0.2765]) -Greedy action tensor([ 1.1365, 0.0166, -0.0826, 0.2874]) tensor([0.4879, 0.1592, 0.1442, 0.2087]) -Greedy action tensor([ 1.0805, -0.1738, -0.0351, -0.0584]) tensor([0.5173, 0.1476, 0.1695, 0.1656]) -Greedy action tensor([ 1.8435, -0.5396, -0.2871, -0.0280]) tensor([0.7326, 0.0676, 0.0870, 0.1128]) -Greedy action tensor([ 1.0236, -0.4657, -0.3458, 0.5083]) tensor([0.4815, 0.1086, 0.1224, 0.2876]) -Greedy action tensor([ 0.7518, 0.0017, 0.0754, -0.2158]) tensor([0.4236, 0.2001, 0.2154, 0.1610]) -Greedy action tensor([ 1.2967, -0.3196, -0.4004, -0.0149]) tensor([0.6056, 0.1203, 0.1110, 0.1632]) -Greedy action tensor([ 1.0616, -0.6138, -0.0160, 0.2847]) tensor([0.5032, 0.0942, 0.1713, 0.2314]) -Greedy action tensor([ 1.5016, -0.5698, -0.4037, 0.2167]) tensor([0.6446, 0.0812, 0.0959, 0.1783]) -Greedy action tensor([ 1.4807, -0.5384, -0.2374, 0.2317]) tensor([0.6254, 0.0830, 0.1122, 0.1794]) -Greedy action tensor([ 1.2127, -0.0930, -0.3770, -0.3211]) tensor([0.5915, 0.1603, 0.1206, 0.1276]) -Greedy action tensor([ 2.1855, -0.5345, -0.4936, 0.3712]) tensor([0.7707, 0.0508, 0.0529, 0.1256]) -Greedy action tensor([ 1.0222, -0.2857, -0.4670, 0.4284]) tensor([0.4882, 0.1320, 0.1101, 0.2696]) -Greedy action tensor([ 0.8170, -0.0766, -0.3532, 0.0483]) tensor([0.4581, 0.1874, 0.1421, 0.2124]) -Greedy action tensor([ 1.3787, -0.2196, 0.0325, 0.3334]) tensor([0.5513, 0.1115, 0.1434, 0.1938]) -Greedy action tensor([ 1.4392, -1.0171, -0.1622, -0.0926]) tensor([0.6651, 0.0570, 0.1341, 0.1438]) -Greedy action tensor([ 1.1337, -0.3538, -0.7781, 0.2360]) tensor([0.5614, 0.1268, 0.0830, 0.2288]) -Greedy action tensor([ 1.4952, -0.6341, -0.3068, 0.1228]) tensor([0.6504, 0.0774, 0.1073, 0.1649]) -Greedy action tensor([ 1.3525, -0.1835, -1.0096, 0.0808]) tensor([0.6290, 0.1354, 0.0593, 0.1763]) -Greedy action tensor([ 1.7868, -0.8385, -0.3880, 0.5384]) tensor([0.6789, 0.0492, 0.0771, 0.1948]) -Greedy action tensor([ 1.4916, -0.3481, -0.7926, 0.1312]) tensor([0.6591, 0.1047, 0.0671, 0.1691]) -Greedy action tensor([ 1.2396, -0.3324, -0.3706, 0.0470]) tensor([0.5845, 0.1214, 0.1168, 0.1774]) -Greedy action tensor([1.3048e+00, 1.1784e-02, 1.0276e-03, 1.1901e-02]) tensor([0.5493, 0.1508, 0.1491, 0.1508]) -Greedy action tensor([ 2.2686, -1.3242, 0.0078, 0.8459]) tensor([0.7284, 0.0200, 0.0759, 0.1756]) -Greedy action tensor([ 1.0675, -0.2270, -0.1415, 0.1614]) tensor([0.5059, 0.1386, 0.1510, 0.2044]) -Greedy action tensor([ 1.6791, -0.1593, -0.8450, 0.3925]) tensor([0.6599, 0.1050, 0.0529, 0.1823]) -Greedy action tensor([ 1.8433, -0.1185, -1.0771, 0.7586]) tensor([0.6525, 0.0917, 0.0352, 0.2206]) -Greedy action tensor([ 2.5296, -0.9042, -0.0378, 1.0190]) tensor([0.7520, 0.0243, 0.0577, 0.1660]) -Greedy action tensor([ 2.0142, -1.0011, 0.0191, 0.3248]) tensor([0.7301, 0.0358, 0.0993, 0.1348]) -Greedy action tensor([ 1.2400, -0.3217, -0.5326, 0.0723]) tensor([0.5915, 0.1241, 0.1005, 0.1840]) -Greedy action tensor([ 1.8301, -0.3490, -0.1791, 0.6773]) tensor([0.6398, 0.0724, 0.0858, 0.2020]) -Greedy action tensor([ 2.0714, -0.1960, -0.1259, 0.0935]) tensor([0.7391, 0.0766, 0.0821, 0.1023]) -Greedy action tensor([ 0.8714, -0.1637, -0.4326, 0.4604]) tensor([0.4368, 0.1551, 0.1185, 0.2896]) -Greedy action tensor([ 1.2602, -0.4089, -0.1595, 0.4517]) tensor([0.5331, 0.1005, 0.1289, 0.2375]) -Greedy action tensor([ 1.2047, -0.0273, 0.0640, 0.2549]) tensor([0.5005, 0.1460, 0.1600, 0.1936]) -Greedy action tensor([ 1.4103, -0.1413, -0.5229, 0.7603]) tensor([0.5323, 0.1128, 0.0770, 0.2779]) -Greedy action tensor([ 2.3126, -0.7504, -0.8206, 0.3961]) tensor([0.8081, 0.0378, 0.0352, 0.1189]) -Greedy action tensor([ 1.7629, -0.3164, -0.5991, 0.2938]) tensor([0.6899, 0.0863, 0.0650, 0.1588]) -Greedy action tensor([ 1.7766, -0.1171, -0.6306, 0.6243]) tensor([0.6425, 0.0967, 0.0579, 0.2030]) -Greedy action tensor([ 1.5425, -0.3351, -0.3122, 0.1772]) tensor([0.6391, 0.0978, 0.1000, 0.1632]) -Greedy action tensor([ 1.4673, -0.1258, -0.8879, 0.4540]) tensor([0.6020, 0.1224, 0.0571, 0.2185]) -Greedy action tensor([ 1.6960, -0.8349, -0.6405, 0.6619]) tensor([0.6528, 0.0520, 0.0631, 0.2321]) -Greedy action tensor([ 1.4588, -0.2347, -0.8611, 0.6411]) tensor([0.5802, 0.1067, 0.0570, 0.2561]) -Greedy action tensor([ 1.9878, -1.1265, 0.1312, 0.8075]) tensor([0.6632, 0.0295, 0.1036, 0.2037]) -Greedy action tensor([ 1.8842, -0.9614, -0.6132, 0.2580]) tensor([0.7479, 0.0435, 0.0615, 0.1471]) -Greedy action tensor([ 1.6600, -0.4490, -0.2807, 0.4997]) tensor([0.6336, 0.0769, 0.0910, 0.1986]) -Greedy action tensor([ 1.5480, -0.2943, -0.3056, 0.2979]) tensor([0.6244, 0.0989, 0.0978, 0.1789]) -Greedy action tensor([ 1.7299, -0.8950, -0.1331, 0.4829]) tensor([0.6601, 0.0478, 0.1024, 0.1897]) -Greedy action tensor([ 1.8352, -0.8353, -0.7865, 0.2899]) tensor([0.7379, 0.0511, 0.0536, 0.1574]) -Greedy action tensor([ 0.9775, -0.2722, -0.2310, 0.1663]) tensor([0.4927, 0.1412, 0.1471, 0.2189]) -Greedy action tensor([ 1.1000, -0.3044, -0.4392, 0.1610]) tensor([0.5402, 0.1326, 0.1159, 0.2112]) -Greedy action tensor([ 1.2509, -0.2093, -0.5892, 0.3147]) tensor([0.5608, 0.1302, 0.0891, 0.2199]) -Greedy action tensor([ 1.1900, -0.4003, -0.0477, 0.7713]) tensor([0.4647, 0.0947, 0.1348, 0.3057]) -Greedy action tensor([ 2.0168, -1.3224, -0.4460, 0.1625]) tensor([0.7830, 0.0278, 0.0667, 0.1226]) -Greedy action tensor([ 1.3292, -0.6368, -0.2104, 0.2361]) tensor([0.5918, 0.0829, 0.1269, 0.1984]) -Greedy action tensor([ 1.1355, 0.1093, -0.6447, -0.1368]) tensor([0.5534, 0.1983, 0.0933, 0.1550]) -Greedy action tensor([ 1.2457, -0.5179, -0.7392, 0.5509]) tensor([0.5531, 0.0948, 0.0760, 0.2761]) -Greedy action tensor([ 1.4220, -0.7473, -0.4733, 0.1211]) tensor([0.6507, 0.0743, 0.0978, 0.1772]) -Greedy action tensor([ 2.2559, -1.0477, 0.1531, 1.0916]) tensor([0.6798, 0.0250, 0.0830, 0.2122]) -Greedy action tensor([ 1.6121, -1.0469, -0.7623, -0.1219]) tensor([0.7465, 0.0523, 0.0695, 0.1318]) -Greedy action tensor([ 1.2655, -0.2159, -0.2154, 0.0982]) tensor([0.5663, 0.1287, 0.1288, 0.1762]) -Greedy action tensor([ 1.2831, -0.3268, -0.7784, 0.4541]) tensor([0.5670, 0.1134, 0.0722, 0.2475]) -Greedy action tensor([ 1.8962, -0.7680, -0.5633, 0.4612]) tensor([0.7177, 0.0500, 0.0613, 0.1709]) -Greedy action tensor([ 1.8744, -1.0987, -0.3885, 0.1136]) tensor([0.7535, 0.0385, 0.0784, 0.1295]) -Greedy action tensor([ 1.3682, -0.1755, -1.1251, 0.5042]) tensor([0.5822, 0.1243, 0.0481, 0.2454]) -Greedy action tensor([ 0.9741, -0.1010, -0.1795, 0.0656]) tensor([0.4855, 0.1657, 0.1532, 0.1957]) -Greedy action tensor([ 2.1049, -0.6256, -0.1001, 0.6148]) tensor([0.7139, 0.0465, 0.0787, 0.1609]) -Greedy action tensor([ 1.4238, -0.2741, -0.4516, 0.0232]) tensor([0.6318, 0.1157, 0.0969, 0.1557]) -Greedy action tensor([ 0.9032, -0.4718, -0.0431, -0.3741]) tensor([0.5209, 0.1317, 0.2022, 0.1452]) -Greedy action tensor([ 0.8759, -0.4974, -0.1600, -0.2543]) tensor([0.5178, 0.1312, 0.1838, 0.1672]) -Greedy action tensor([ 0.5951, 0.0838, -0.1036, 0.0959]) tensor([0.3698, 0.2218, 0.1839, 0.2245]) -Greedy action tensor([ 0.9191, -0.3411, 0.0676, -0.1315]) tensor([0.4854, 0.1377, 0.2072, 0.1698]) -Greedy action tensor([ 0.9117, -0.5442, -0.0850, -0.3106]) tensor([0.5272, 0.1229, 0.1946, 0.1553]) -Greedy action tensor([ 0.4431, -0.0286, -0.1105, -0.1309]) tensor([0.3620, 0.2259, 0.2081, 0.2039]) -Greedy action tensor([ 0.8565, -0.7767, 0.0483, -0.4162]) tensor([0.5206, 0.1017, 0.2320, 0.1458]) -Greedy action tensor([ 1.0510, -0.6528, -0.0147, -0.5143]) tensor([0.5762, 0.1049, 0.1985, 0.1204]) -Greedy action tensor([ 0.5552, -0.0175, -0.1492, -0.0986]) tensor([0.3878, 0.2187, 0.1917, 0.2017]) -Greedy action tensor([ 0.9909, -0.5885, 0.0524, -0.4913]) tensor([0.5481, 0.1130, 0.2144, 0.1245]) -Greedy action tensor([ 0.9695, -0.8274, 0.2208, -0.3947]) tensor([0.5279, 0.0875, 0.2497, 0.1349]) -Greedy action tensor([ 1.0784, -0.4770, 0.1048, -0.2751]) tensor([0.5414, 0.1143, 0.2045, 0.1399]) -Greedy action tensor([ 0.8052, -0.2169, -0.0143, -0.0194]) tensor([0.4467, 0.1607, 0.1968, 0.1958]) -Greedy action tensor([ 0.7467, -0.4208, -0.0262, -0.3920]) tensor([0.4778, 0.1487, 0.2206, 0.1530]) -Greedy action tensor([ 0.8206, -0.4531, 0.0898, -0.4331]) tensor([0.4886, 0.1367, 0.2353, 0.1395]) -Greedy action tensor([ 1.1175, -0.8050, -0.0679, -0.5517]) tensor([0.6097, 0.0892, 0.1863, 0.1149]) -Greedy action tensor([ 0.9387, -0.2704, -0.1125, -0.1267]) tensor([0.5019, 0.1498, 0.1754, 0.1729]) -Greedy action tensor([ 1.1564, -0.6857, 0.0195, -0.6488]) tensor([0.6084, 0.0964, 0.1952, 0.1000]) -Greedy action tensor([ 0.7944, -0.6106, 0.1491, -0.5767]) tensor([0.4941, 0.1213, 0.2592, 0.1254]) -Greedy action tensor([ 0.7757, 0.0347, -0.0757, -0.0137]) tensor([0.4242, 0.2022, 0.1810, 0.1926]) -Greedy action tensor([ 0.7539, -0.5950, 0.0763, -0.1896]) tensor([0.4637, 0.1203, 0.2355, 0.1805]) -Greedy action tensor([ 1.0237, -0.8744, 0.0856, -0.8192]) tensor([0.5884, 0.0882, 0.2303, 0.0932]) -Greedy action tensor([ 0.6854, -0.3515, -0.0035, -0.1736]) tensor([0.4385, 0.1555, 0.2202, 0.1858]) -Greedy action tensor([ 1.1492, -0.8271, 0.0569, -0.6070]) tensor([0.6073, 0.0842, 0.2037, 0.1049]) -Greedy action tensor([ 1.0444, -0.5399, 0.0047, -0.4497]) tensor([0.5608, 0.1150, 0.1983, 0.1259]) -Greedy action tensor([ 0.8907, -0.4681, -0.0046, -0.5493]) tensor([0.5257, 0.1351, 0.2147, 0.1245]) -Greedy action tensor([ 1.0667, -0.8736, 0.1070, -0.5493]) tensor([0.5796, 0.0833, 0.2220, 0.1152]) -Greedy action tensor([ 0.8076, -0.9746, 0.2100, -0.8030]) tensor([0.5213, 0.0877, 0.2868, 0.1042]) -Greedy action tensor([ 0.9437, -0.1575, -0.1576, -0.1287]) tensor([0.4982, 0.1656, 0.1656, 0.1705]) -Greedy action tensor([ 0.5344, -0.2101, -0.0405, -0.1366]) tensor([0.3923, 0.1863, 0.2208, 0.2006]) -Greedy action tensor([ 0.9077, -0.4991, -0.1391, -0.3600]) tensor([0.5326, 0.1305, 0.1870, 0.1499]) -Greedy action tensor([ 1.0444, -0.5851, 0.1252, -0.4463]) tensor([0.5494, 0.1077, 0.2191, 0.1237]) -Greedy action tensor([ 1.1710, -0.7782, -0.0336, -0.3540]) tensor([0.6025, 0.0858, 0.1806, 0.1311]) -Greedy action tensor([ 0.8280, -0.5104, -0.1706, -0.3929]) tensor([0.5193, 0.1362, 0.1913, 0.1532]) -Greedy action tensor([ 1.0234, -0.4647, -0.1586, -0.5527]) tensor([0.5750, 0.1298, 0.1763, 0.1189]) -Greedy action tensor([ 0.8089, -0.5054, -0.0072, -0.2796]) tensor([0.4884, 0.1312, 0.2159, 0.1644]) -Greedy action tensor([ 0.6555, 0.0767, -0.0187, -0.0050]) tensor([0.3866, 0.2167, 0.1970, 0.1997]) -Greedy action tensor([ 0.9331, -0.6441, 0.0284, -0.3625]) tensor([0.5305, 0.1096, 0.2147, 0.1452]) -Greedy action tensor([ 0.4353, -0.0273, -0.0644, 0.0654]) tensor([0.3416, 0.2151, 0.2073, 0.2360]) -Greedy action tensor([ 1.0617, -0.6968, 0.0286, -0.6280]) tensor([0.5838, 0.1006, 0.2078, 0.1078]) -Greedy action tensor([ 0.9649, -0.4562, 0.0220, -0.1881]) tensor([0.5137, 0.1240, 0.2001, 0.1622]) -Greedy action tensor([ 0.6932, -0.4431, -0.0976, -0.2850]) tensor([0.4650, 0.1493, 0.2109, 0.1748]) -Greedy action tensor([ 0.6330, -0.4328, -0.0479, -0.1499]) tensor([0.4333, 0.1493, 0.2193, 0.1981]) -Greedy action tensor([ 0.5924, -0.1767, -0.0405, -0.1122]) tensor([0.4018, 0.1862, 0.2134, 0.1986]) -Greedy action tensor([ 0.6326, -0.0212, -0.1484, -0.1262]) tensor([0.4088, 0.2126, 0.1872, 0.1914]) -Greedy action tensor([ 0.3734, -0.0814, -0.2256, 0.0053]) tensor([0.3477, 0.2206, 0.1910, 0.2406]) -Greedy action tensor([ 1.2345, -0.6886, -0.0149, -0.6463]) tensor([0.6308, 0.0922, 0.1808, 0.0962]) -Greedy action tensor([ 0.8091, -0.4805, 0.0599, -0.3245]) tensor([0.4831, 0.1330, 0.2284, 0.1555]) -Greedy action tensor([ 0.7871, -0.6291, -0.0020, -0.5664]) tensor([0.5115, 0.1241, 0.2323, 0.1321]) -Greedy action tensor([ 0.9036, -0.4730, 0.0289, -0.4976]) tensor([0.5220, 0.1318, 0.2177, 0.1286]) -Greedy action tensor([ 0.5737, -0.6226, -0.0449, -0.2389]) tensor([0.4377, 0.1323, 0.2358, 0.1942]) -Greedy action tensor([ 0.8942, -0.7454, -0.0934, -0.4544]) tensor([0.5476, 0.1063, 0.2040, 0.1422]) -Greedy action tensor([ 0.1368, 0.1567, -0.1170, -0.4507]) tensor([0.2983, 0.3044, 0.2315, 0.1658]) -Greedy action tensor([ 0.5828, 0.0262, 0.0586, -0.0031]) tensor([0.3674, 0.2106, 0.2175, 0.2045]) -Greedy action tensor([ 1.3209, -0.8373, -0.2292, -0.5903]) tensor([0.6777, 0.0783, 0.1438, 0.1002]) -Greedy action tensor([ 0.6424, 0.0693, 0.0454, -0.1400]) tensor([0.3889, 0.2192, 0.2141, 0.1778]) -Greedy action tensor([ 0.4090, -0.2134, -0.1330, -0.3178]) tensor([0.3844, 0.2063, 0.2235, 0.1858]) -Greedy action tensor([ 0.8467, -0.1827, 0.0543, -0.1392]) tensor([0.4581, 0.1636, 0.2074, 0.1709]) -Greedy action tensor([ 0.1469, 0.0598, -0.2391, 0.1538]) tensor([0.2775, 0.2544, 0.1886, 0.2794]) -Greedy action tensor([ 0.8322, -0.8500, 0.0526, -0.4561]) tensor([0.5207, 0.0968, 0.2388, 0.1436]) -Greedy action tensor([ 1.2211, -0.6975, -0.1246, -0.6991]) tensor([0.6436, 0.0945, 0.1676, 0.0943]) -Greedy action tensor([ 1.0079, -0.6229, -0.0709, -0.3128]) tensor([0.5547, 0.1086, 0.1886, 0.1481]) -Greedy action tensor([ 0.7478, -0.5864, -0.0315, -0.2535]) tensor([0.4786, 0.1260, 0.2195, 0.1758]) -Greedy action tensor([ 0.7546, -0.3391, -0.0543, -0.1067]) tensor([0.4540, 0.1521, 0.2022, 0.1918]) -Greedy action tensor([ 0.8155, -0.3900, -0.2079, -0.4768]) tensor([0.5172, 0.1549, 0.1859, 0.1420]) -Greedy action tensor([ 0.6999, -0.3228, -0.0039, -0.4278]) tensor([0.4591, 0.1651, 0.2271, 0.1486]) -Greedy action tensor([ 0.7792, -0.2515, 0.0963, -0.3021]) tensor([0.4543, 0.1621, 0.2295, 0.1541]) -Greedy action tensor([ 0.9748, -0.5331, -0.0550, -0.3472]) tensor([0.5420, 0.1200, 0.1935, 0.1445]) -Greedy action tensor([ 0.8689, -0.7764, 0.0096, -0.4440]) tensor([0.5304, 0.1023, 0.2246, 0.1427]) -Greedy action tensor([ 0.4094, -0.4727, -0.2481, -0.0251]) tensor([0.3877, 0.1605, 0.2009, 0.2510]) -Greedy action tensor([ 1.0543, -0.4669, -0.0712, -0.5524]) tensor([0.5736, 0.1253, 0.1861, 0.1150]) -Greedy action tensor([ 1.0011, -0.3706, -0.0332, -0.3970]) tensor([0.5387, 0.1367, 0.1915, 0.1331]) -Greedy action tensor([ 0.5915, -0.0086, -0.0845, -0.0418]) tensor([0.3864, 0.2120, 0.1965, 0.2051]) -Greedy action tensor([ 0.9580, -0.4426, -0.0970, -0.2629]) tensor([0.5292, 0.1304, 0.1843, 0.1561]) -Greedy action tensor([ 1.2608, -0.7569, 0.0439, -0.8350]) tensor([0.6443, 0.0857, 0.1908, 0.0792]) -Greedy action tensor([ 0.5761, -0.0986, -0.0524, -0.1431]) tensor([0.3953, 0.2013, 0.2108, 0.1926]) -Greedy action tensor([ 1.1981, -0.7461, 0.0931, -0.6146]) tensor([0.6107, 0.0874, 0.2023, 0.0997]) -Greedy action tensor([ 0.8029, -0.5373, -0.1150, -0.1703]) tensor([0.4904, 0.1284, 0.1959, 0.1853]) -Greedy action tensor([ 0.8191, -0.0459, 0.0626, 0.0848]) tensor([0.4219, 0.1776, 0.1980, 0.2025]) -Greedy action tensor([ 0.2573, -0.2746, -0.1072, -0.3121]) tensor([0.3511, 0.2063, 0.2439, 0.1987]) -Greedy action tensor([ 0.9593, -0.1002, -0.0701, -0.1086]) tensor([0.4884, 0.1693, 0.1745, 0.1679]) -Greedy action tensor([-1.8583, -0.4191, 0.6233, -0.1353]) tensor([0.0439, 0.1851, 0.5251, 0.2459]) -Greedy action tensor([-1.7336, -0.5030, 0.5628, -0.1031]) tensor([0.0514, 0.1758, 0.5105, 0.2623]) -Greedy action tensor([-1.5487, -0.4286, 0.5234, -0.2716]) tensor([0.0641, 0.1966, 0.5093, 0.2300]) -Greedy action tensor([-1.3124, -0.4109, 0.4456, -0.1842]) tensor([0.0809, 0.1994, 0.4696, 0.2501]) -Greedy action tensor([-1.8432, -0.4667, 0.6054, -0.1175]) tensor([0.0451, 0.1788, 0.5225, 0.2536]) -Greedy action tensor([-1.8818, -0.2795, 0.6095, -0.1471]) tensor([0.0422, 0.2094, 0.5094, 0.2390]) -Greedy action tensor([-1.3760, -0.4411, 0.3678, 0.1303]) tensor([0.0726, 0.1849, 0.4151, 0.3274]) -Greedy action tensor([-1.9292, -0.4295, 0.6639, -0.1693]) tensor([0.0405, 0.1817, 0.5421, 0.2356]) -Greedy action tensor([-1.7425, -0.3675, 0.5487, -0.0634]) tensor([0.0495, 0.1958, 0.4894, 0.2653]) -Greedy action tensor([-1.4944, -0.0361, 0.4165, -0.0689]) tensor([0.0617, 0.2651, 0.4168, 0.2565]) -Greedy action tensor([-1.0753, 0.6106, 0.2298, -0.1080]) tensor([0.0786, 0.4244, 0.2900, 0.2069]) -Greedy action tensor([-1.8956, -0.4119, 0.6544, -0.1331]) tensor([0.0416, 0.1834, 0.5327, 0.2424]) -Greedy action tensor([-1.8678, -0.4598, 0.6353, -0.1436]) tensor([0.0436, 0.1784, 0.5332, 0.2447]) -Greedy action tensor([-1.6128, -0.0136, 0.5153, -0.1912]) tensor([0.0541, 0.2676, 0.4542, 0.2241]) -Greedy action tensor([-0.7361, 1.0471, 0.1194, 0.2590]) tensor([0.0833, 0.4955, 0.1959, 0.2253]) -Greedy action tensor([-1.9110, -0.4008, 0.6502, -0.1611]) tensor([0.0413, 0.1868, 0.5344, 0.2375]) -Greedy action tensor([-1.4916, -0.6139, 0.4933, 0.1417]) tensor([0.0633, 0.1522, 0.4605, 0.3240]) -Greedy action tensor([-1.8761, -0.3532, 0.6310, -0.1414]) tensor([0.0425, 0.1949, 0.5216, 0.2409]) -Greedy action tensor([-1.8642, -0.2113, 0.6102, -0.1372]) tensor([0.0422, 0.2202, 0.5006, 0.2371]) -Greedy action tensor([-1.7443, -0.4701, 0.5428, -0.0705]) tensor([0.0506, 0.1810, 0.4984, 0.2699]) -Greedy action tensor([-1.7814, -0.0897, 0.5651, -0.0761]) tensor([0.0447, 0.2426, 0.4669, 0.2459]) -Greedy action tensor([-1.8467, -0.2497, 0.5913, -0.1398]) tensor([0.0437, 0.2156, 0.5000, 0.2407]) -Greedy action tensor([-1.9389, -0.4456, 0.6652, -0.1769]) tensor([0.0403, 0.1796, 0.5452, 0.2349]) -Greedy action tensor([-1.9030, -0.4517, 0.6453, -0.1638]) tensor([0.0421, 0.1798, 0.5384, 0.2397]) -Greedy action tensor([-1.9085, -0.4504, 0.6535, -0.1623]) tensor([0.0417, 0.1791, 0.5402, 0.2389]) -Greedy action tensor([-1.7329, -0.5228, 0.5518, -0.0696]) tensor([0.0514, 0.1724, 0.5049, 0.2713]) -Greedy action tensor([-1.9293, -0.4317, 0.6609, -0.1722]) tensor([0.0407, 0.1818, 0.5420, 0.2356]) -Greedy action tensor([-0.7509, -0.1884, -0.7765, -0.2105]) tensor([0.1836, 0.3222, 0.1790, 0.3152]) -Greedy action tensor([-1.9080, -0.4293, 0.6493, -0.1628]) tensor([0.0416, 0.1827, 0.5372, 0.2385]) -Greedy action tensor([-1.6834, -0.4502, 0.6418, -0.0028]) tensor([0.0499, 0.1713, 0.5107, 0.2680]) -Greedy action tensor([-0.5856, 0.5153, 0.0591, 0.0283]) tensor([0.1289, 0.3875, 0.2456, 0.2381]) -Greedy action tensor([-1.9172, -0.4784, 0.7465, -0.1303]) tensor([0.0392, 0.1651, 0.5619, 0.2338]) -Greedy action tensor([-0.7285, 0.7119, 0.1439, 0.3907]) tensor([0.0937, 0.3954, 0.2241, 0.2868]) -Greedy action tensor([-1.7786, -0.2722, 0.5917, -0.0456]) tensor([0.0457, 0.2062, 0.4893, 0.2587]) -Greedy action tensor([-0.2887, 0.5079, 0.0121, 0.5405]) tensor([0.1458, 0.3233, 0.1969, 0.3340]) -Greedy action tensor([-1.7248, -0.4606, 0.7264, 0.1374]) tensor([0.0443, 0.1568, 0.5138, 0.2851]) -Greedy action tensor([-1.8843, -0.2491, 0.6183, -0.1465]) tensor([0.0416, 0.2135, 0.5083, 0.2366]) -Greedy action tensor([-1.8337, -0.0585, 0.5500, -0.1148]) tensor([0.0429, 0.2530, 0.4649, 0.2392]) -Greedy action tensor([-1.9282, -0.4494, 0.6637, -0.1724]) tensor([0.0408, 0.1789, 0.5444, 0.2360]) -Greedy action tensor([-1.6047, 0.4680, 0.4070, 0.0313]) tensor([0.0464, 0.3686, 0.3468, 0.2382]) -Greedy action tensor([-1.8956, -0.3784, 0.6401, -0.1553]) tensor([0.0419, 0.1909, 0.5286, 0.2386]) -Greedy action tensor([-1.2053, -0.4117, 0.5829, -0.4456]) tensor([0.0883, 0.1952, 0.5278, 0.1887]) -Greedy action tensor([-1.2629, -0.2338, 0.2453, 0.2531]) tensor([0.0777, 0.2174, 0.3511, 0.3538]) -Greedy action tensor([-1.8048, -0.3008, 0.5763, -0.1071]) tensor([0.0459, 0.2066, 0.4967, 0.2508]) -Greedy action tensor([-1.7278, -0.2360, 0.5295, -0.0866]) tensor([0.0496, 0.2204, 0.4740, 0.2560]) -Greedy action tensor([-1.5530, -0.3415, 0.6626, 0.0203]) tensor([0.0545, 0.1831, 0.4996, 0.2628]) -Greedy action tensor([-0.5728, 0.8695, 0.0321, 0.1383]) tensor([0.1099, 0.4650, 0.2013, 0.2238]) -Greedy action tensor([-1.8676, -0.3132, 0.6236, -0.1471]) tensor([0.0427, 0.2023, 0.5162, 0.2388]) -Greedy action tensor([-1.5563, -0.5391, 0.7089, 0.2708]) tensor([0.0510, 0.1410, 0.4911, 0.3169]) -Greedy action tensor([-1.2953e+00, -2.5901e-01, 3.5002e-01, -1.1224e-03]) tensor([0.0791, 0.2228, 0.4097, 0.2884]) -Greedy action tensor([-1.1417, 0.2825, 0.2555, 0.1193]) tensor([0.0786, 0.3264, 0.3177, 0.2773]) -Greedy action tensor([-1.8959, -0.3216, 0.6224, -0.1438]) tensor([0.0417, 0.2011, 0.5169, 0.2403]) -Greedy action tensor([-1.7747, -0.3730, 0.5731, -0.1189]) tensor([0.0482, 0.1957, 0.5039, 0.2522]) -Greedy action tensor([-1.9030, -0.4431, 0.6464, -0.1636]) tensor([0.0420, 0.1809, 0.5378, 0.2392]) -Greedy action tensor([-1.2604, 0.0991, 0.3523, -0.1878]) tensor([0.0779, 0.3034, 0.3909, 0.2278]) -Greedy action tensor([-1.7210, -0.0816, 0.5285, -0.0713]) tensor([0.0480, 0.2472, 0.4550, 0.2498]) -Greedy action tensor([-1.6529, -0.5225, 0.5197, -0.0998]) tensor([0.0568, 0.1759, 0.4988, 0.2685]) -Greedy action tensor([-1.8050, -0.4440, 0.5661, -0.1247]) tensor([0.0477, 0.1859, 0.5105, 0.2559]) -Greedy action tensor([-1.8974, -0.4537, 0.7259, -0.0590]) tensor([0.0395, 0.1674, 0.5446, 0.2484]) -Greedy action tensor([-1.6886, -0.3362, 0.5188, -0.0643]) tensor([0.0525, 0.2031, 0.4777, 0.2666]) -Greedy action tensor([-1.9117, -0.4220, 0.6539, -0.1621]) tensor([0.0413, 0.1833, 0.5376, 0.2377]) -Greedy action tensor([-0.8072, 0.7790, 0.0902, 0.0406]) tensor([0.0937, 0.4577, 0.2299, 0.2187]) -Greedy action tensor([-1.7926, -0.4917, 0.5873, -0.0958]) tensor([0.0478, 0.1755, 0.5161, 0.2607]) -Greedy action tensor([-1.8556, -0.4217, 0.6164, -0.1221]) tensor([0.0441, 0.1848, 0.5218, 0.2493]) -Greedy action tensor([-1.8650, -0.3313, 0.6155, -0.1485]) tensor([0.0432, 0.2003, 0.5161, 0.2404]) -Greedy action tensor([-1.9268, -0.4457, 0.6591, -0.1650]) tensor([0.0408, 0.1795, 0.5420, 0.2377]) -Greedy action tensor([-1.9138, -0.3283, 0.6458, -0.1971]) tensor([0.0410, 0.2002, 0.5304, 0.2283]) -Greedy action tensor([-1.9154, -0.4299, 0.6486, -0.1697]) tensor([0.0414, 0.1830, 0.5381, 0.2374]) -Greedy action tensor([-1.8767, -0.4716, 0.6318, -0.1531]) tensor([0.0435, 0.1775, 0.5350, 0.2440]) -Greedy action tensor([-1.9060, -0.3469, 0.6395, -0.1588]) tensor([0.0412, 0.1961, 0.5259, 0.2367]) -Greedy action tensor([-1.5908, -0.3354, 0.6067, 0.0202]) tensor([0.0540, 0.1895, 0.4861, 0.2704]) -Greedy action tensor([-1.9004, -0.4051, 0.6465, -0.1546]) tensor([0.0417, 0.1862, 0.5329, 0.2392]) -Greedy action tensor([-1.8637, -0.4493, 0.6257, -0.1480]) tensor([0.0440, 0.1810, 0.5303, 0.2447]) -Greedy action tensor([-0.7322, -0.6307, 0.5360, 0.5963]) tensor([0.1060, 0.1173, 0.3767, 0.4001]) -Greedy action tensor([-1.9183, -0.4334, 0.6554, -0.1664]) tensor([0.0412, 0.1817, 0.5398, 0.2373]) -Greedy action tensor([-1.6559, -0.3353, 0.5074, -0.0872]) tensor([0.0548, 0.2053, 0.4768, 0.2631]) -Greedy action tensor([-1.9169, -0.4500, 0.6577, -0.1656]) tensor([0.0413, 0.1790, 0.5419, 0.2379]) -Greedy action tensor([-1.9027, -0.4219, 0.6568, -0.1576]) tensor([0.0416, 0.1828, 0.5375, 0.2381]) -Greedy action tensor([-1.8854, -0.2805, 0.6176, -0.1528]) tensor([0.0419, 0.2087, 0.5123, 0.2371]) -Greedy action tensor([-0.6949, 0.8553, 0.1132, 0.4686]) tensor([0.0896, 0.4224, 0.2011, 0.2869]) -Greedy action tensor([-1.7471, 0.0294, 0.5578, -0.2420]) tensor([0.0466, 0.2757, 0.4676, 0.2101]) -Greedy action tensor([ 0.9518, 0.8793, -0.3791, -0.0350]) tensor([0.3896, 0.3623, 0.1029, 0.1452]) -Greedy action tensor([-0.1251, -0.1719, 0.0905, -1.2207]) tensor([0.2833, 0.2704, 0.3515, 0.0947]) -Greedy action tensor([ 0.2920, -1.0853, -0.8439, 0.4293]) tensor([0.3676, 0.0927, 0.1180, 0.4217]) -Greedy action tensor([-0.1121, -0.5144, -0.1108, -0.2711]) tensor([0.2839, 0.1898, 0.2842, 0.2421]) -Greedy action tensor([-0.6131, -0.8172, -0.4569, 0.7280]) tensor([0.1469, 0.1198, 0.1717, 0.5616]) -Greedy action tensor([-0.6235, -0.4982, -0.9512, 0.5685]) tensor([0.1627, 0.1844, 0.1172, 0.5357]) -Greedy action tensor([ 0.1355, -1.0641, -0.5480, 0.0319]) tensor([0.3693, 0.1113, 0.1864, 0.3330]) -Greedy action tensor([-1.2246, -2.3604, 2.2484, -1.1841]) tensor([0.0289, 0.0093, 0.9317, 0.0301]) -Greedy action tensor([-0.9217, -0.4969, 0.4062, -0.5758]) tensor([0.1296, 0.1982, 0.4890, 0.1832]) -Greedy action tensor([ 0.8771, -1.6372, -0.0831, -0.5192]) tensor([0.5844, 0.0473, 0.2237, 0.1446]) -Greedy action tensor([ 0.6711, -0.9465, -0.1713, -0.0232]) tensor([0.4698, 0.0932, 0.2023, 0.2346]) -Greedy action tensor([ 0.4242, -1.8029, 0.4285, -0.0668]) tensor([0.3671, 0.0396, 0.3687, 0.2247]) -Greedy action tensor([0.2507, 0.0103, 0.3577, 0.0454]) tensor([0.2693, 0.2117, 0.2997, 0.2193]) -Greedy action tensor([-1.0629, -1.2889, 0.6019, -1.8014]) tensor([0.1323, 0.1055, 0.6990, 0.0632]) -Greedy action tensor([ 0.4278, -0.6807, -0.1986, 0.9771]) tensor([0.2780, 0.0918, 0.1486, 0.4816]) -Greedy action tensor([-0.5068, -1.0820, 0.4031, -0.5978]) tensor([0.2016, 0.1134, 0.5009, 0.1841]) -Greedy action tensor([-0.6258, -0.9655, -0.5680, -0.5935]) tensor([0.2629, 0.1872, 0.2785, 0.2715]) -Greedy action tensor([ 0.6701, 0.4568, 0.2788, -0.7133]) tensor([0.3657, 0.2954, 0.2473, 0.0917]) -Greedy action tensor([ 0.7787, -0.1104, 0.3563, 0.7020]) tensor([0.3342, 0.1373, 0.2190, 0.3095]) -Greedy action tensor([ 0.0455, -0.7410, -0.0319, -0.2772]) tensor([0.3220, 0.1467, 0.2981, 0.2332]) -Greedy action tensor([ 0.2723, -0.5150, -0.1149, 0.0912]) tensor([0.3369, 0.1533, 0.2287, 0.2811]) -Greedy action tensor([ 0.2636, 0.3249, -0.6902, -0.2707]) tensor([0.3295, 0.3504, 0.1270, 0.1931]) -Greedy action tensor([ 0.1249, -0.3107, -0.5872, 0.3483]) tensor([0.2952, 0.1909, 0.1448, 0.3691]) -Greedy action tensor([0.4366, 0.9054, 0.1717, 0.1115]) tensor([0.2446, 0.3909, 0.1877, 0.1767]) -Greedy action tensor([ 0.2222, -0.9817, 0.1334, -0.0868]) tensor([0.3391, 0.1017, 0.3103, 0.2489]) -Greedy action tensor([-0.7237, -1.0482, 0.2373, -1.1539]) tensor([0.2005, 0.1449, 0.5242, 0.1304]) -Greedy action tensor([-0.2383, -0.5406, -0.3225, -0.4010]) tensor([0.2850, 0.2107, 0.2620, 0.2422]) -Greedy action tensor([-0.3287, -1.3828, -0.0110, 0.1736]) tensor([0.2286, 0.0797, 0.3141, 0.3777]) -Greedy action tensor([-0.4262, -0.6242, 0.4427, -0.8388]) tensor([0.2055, 0.1686, 0.4899, 0.1360]) -Greedy action tensor([ 0.4583, 0.6952, -0.7573, -0.5115]) tensor([0.3398, 0.4306, 0.1008, 0.1288]) -Greedy action tensor([-0.4213, -0.6960, -0.4158, 0.2641]) tensor([0.2105, 0.1600, 0.2117, 0.4178]) -Greedy action tensor([ 1.1361, -0.9591, 0.2249, 0.2497]) tensor([0.5162, 0.0635, 0.2075, 0.2127]) -Greedy action tensor([ 0.7435, -1.5107, -0.8468, 1.2935]) tensor([0.3287, 0.0345, 0.0670, 0.5698]) -Greedy action tensor([ 1.1197, -0.8302, -0.5000, 0.1627]) tensor([0.5800, 0.0825, 0.1148, 0.2227]) -Greedy action tensor([-1.0045, -0.6648, -0.2238, -0.4979]) tensor([0.1601, 0.2248, 0.3494, 0.2657]) -Greedy action tensor([-0.1057, -0.9381, 0.6824, -0.0657]) tensor([0.2139, 0.0930, 0.4704, 0.2226]) -Greedy action tensor([ 0.7112, -1.0631, 0.1963, 0.9344]) tensor([0.3314, 0.0562, 0.1981, 0.4143]) -Greedy action tensor([-0.8193, -0.5872, 0.3831, -0.5998]) tensor([0.1463, 0.1845, 0.4869, 0.1822]) -Greedy action tensor([-0.2864, -0.7754, -0.2623, -0.9756]) tensor([0.3185, 0.1953, 0.3263, 0.1599]) -Greedy action tensor([-0.2446, 0.3845, 0.3564, -0.3360]) tensor([0.1782, 0.3342, 0.3250, 0.1626]) -Greedy action tensor([-0.0198, -0.0323, -0.1260, 0.2880]) tensor([0.2354, 0.2325, 0.2117, 0.3203]) -Greedy action tensor([-0.7404, -0.3319, -0.3200, -0.5389]) tensor([0.1905, 0.2866, 0.2900, 0.2330]) -Greedy action tensor([ 0.8322, -0.3036, -0.0614, 0.1908]) tensor([0.4431, 0.1423, 0.1813, 0.2333]) -Greedy action tensor([-0.6373, -0.4455, 0.8736, -1.0718]) tensor([0.1353, 0.1639, 0.6131, 0.0876]) -Greedy action tensor([-0.3665, 0.3860, -0.1964, -1.2276]) tensor([0.2114, 0.4486, 0.2506, 0.0894]) -Greedy action tensor([-1.1118, -0.5117, 0.4413, -1.2050]) tensor([0.1182, 0.2154, 0.5587, 0.1077]) -Greedy action tensor([ 0.7260, -0.9938, -0.4687, 0.4830]) tensor([0.4413, 0.0790, 0.1336, 0.3461]) -Greedy action tensor([-1.0166, 0.3524, -0.6537, -1.0996]) tensor([0.1372, 0.5394, 0.1972, 0.1263]) -Greedy action tensor([-0.5378, -0.2647, -0.1082, -0.4604]) tensor([0.2028, 0.2665, 0.3116, 0.2191]) -Greedy action tensor([ 0.9325, -0.3786, 0.4062, -0.6067]) tensor([0.4820, 0.1299, 0.2847, 0.1034]) -Greedy action tensor([-0.4222, -0.5456, 0.2977, -0.0472]) tensor([0.1854, 0.1639, 0.3809, 0.2698]) -Greedy action tensor([-0.2391, -0.2880, 0.1367, 0.3329]) tensor([0.1930, 0.1838, 0.2811, 0.3420]) -Greedy action tensor([ 0.3485, -0.5559, -0.9230, -0.1556]) tensor([0.4368, 0.1768, 0.1225, 0.2639]) -Greedy action tensor([-0.4917, -1.4379, -0.3249, -0.7664]) tensor([0.3003, 0.1166, 0.3549, 0.2282]) -Greedy action tensor([-0.1157, -0.9406, 1.3350, -0.3983]) tensor([0.1549, 0.0679, 0.6606, 0.1167]) -Greedy action tensor([ 0.0350, -0.2431, 0.3283, 0.2576]) tensor([0.2300, 0.1742, 0.3084, 0.2874]) -Greedy action tensor([ 0.1875, -0.5551, -0.3553, 0.1519]) tensor([0.3309, 0.1575, 0.1923, 0.3193]) -Greedy action tensor([ 0.4854, -0.2275, -0.2204, -0.3476]) tensor([0.4134, 0.2027, 0.2041, 0.1797]) -Greedy action tensor([ 1.1876, -0.4616, -0.4706, 1.0557]) tensor([0.4426, 0.0851, 0.0843, 0.3880]) -Greedy action tensor([0.9172, 0.2346, 0.8042, 0.4246]) tensor([0.3323, 0.1679, 0.2968, 0.2030]) -Greedy action tensor([ 0.1846, -0.5294, -0.6999, -0.7694]) tensor([0.4371, 0.2140, 0.1805, 0.1684]) -Greedy action tensor([ 0.2951, 0.3164, -0.4752, 0.4026]) tensor([0.2779, 0.2839, 0.1287, 0.3095]) -Greedy action tensor([-0.2751, -1.1493, 0.4246, -1.0084]) tensor([0.2557, 0.1067, 0.5148, 0.1228]) -Greedy action tensor([-0.1333, -0.2503, 0.2079, -0.3570]) tensor([0.2441, 0.2172, 0.3434, 0.1952]) -Greedy action tensor([ 0.0428, -0.4513, 0.1409, -0.4479]) tensor([0.3007, 0.1835, 0.3317, 0.1841]) -Greedy action tensor([ 0.1270, -0.1848, -0.0983, -0.3048]) tensor([0.3145, 0.2303, 0.2511, 0.2042]) -Greedy action tensor([ 0.6696, -0.4811, -0.2195, -0.8182]) tensor([0.5120, 0.1620, 0.2104, 0.1156]) -Greedy action tensor([-0.9832, 0.1089, -0.7649, -0.3121]) tensor([0.1393, 0.4151, 0.1732, 0.2724]) -Greedy action tensor([-0.4492, 0.5379, 0.1982, -0.8404]) tensor([0.1595, 0.4280, 0.3047, 0.1078]) -Greedy action tensor([ 1.0987, -0.4246, -0.1463, 0.4018]) tensor([0.4990, 0.1088, 0.1437, 0.2486]) -Greedy action tensor([-0.2835, -2.4469, 0.5835, 0.1797]) tensor([0.1967, 0.0226, 0.4681, 0.3126]) -Greedy action tensor([ 0.8633, 0.0385, -1.0995, 0.1510]) tensor([0.4833, 0.2118, 0.0679, 0.2370]) -Greedy action tensor([-0.7808, -0.1543, 0.2994, -0.9809]) tensor([0.1507, 0.2820, 0.4439, 0.1234]) -Greedy action tensor([ 0.6444, -0.4276, 0.3757, 0.0370]) tensor([0.3771, 0.1291, 0.2883, 0.2055]) -Greedy action tensor([-0.0508, -0.4006, -0.2434, 0.3259]) tensor([0.2508, 0.1768, 0.2069, 0.3655]) -Greedy action tensor([ 0.4723, -0.3196, -0.6770, 0.0477]) tensor([0.4126, 0.1869, 0.1307, 0.2698]) -Greedy action tensor([-0.4443, 0.7303, 0.4126, -0.9129]) tensor([0.1385, 0.4484, 0.3264, 0.0867]) -Greedy action tensor([-0.3496, -1.2019, 0.5394, -0.3304]) tensor([0.2050, 0.0874, 0.4986, 0.2090]) -Greedy action tensor([-1.1427, -0.1911, -0.9650, -0.0262]) tensor([0.1276, 0.3304, 0.1524, 0.3896]) -Greedy action tensor([-0.2926, 0.1900, 0.1756, -0.8316]) tensor([0.2083, 0.3375, 0.3327, 0.1215]) -Greedy action tensor([-0.1172, -1.4502, 0.4318, -1.2841]) tensor([0.3024, 0.0797, 0.5237, 0.0942]) -Greedy action tensor([-1.0280, 0.8032, -0.1628, -0.3869]) tensor([0.0868, 0.5420, 0.2063, 0.1649]) -Greedy action tensor([ 1.4021, -0.4554, -0.2964, 0.0757]) tensor([0.6233, 0.0973, 0.1140, 0.1654]) -Greedy action tensor([ 1.8230, -0.8923, -0.2958, 0.6900]) tensor([0.6629, 0.0439, 0.0797, 0.2135]) -Greedy action tensor([ 1.3035, -0.1413, -0.8826, 0.1326]) tensor([0.6031, 0.1422, 0.0678, 0.1870]) -Greedy action tensor([ 1.3442, -0.1860, -0.4147, 0.1697]) tensor([0.5890, 0.1275, 0.1015, 0.1820]) -Greedy action tensor([ 1.4183, 0.2543, -0.2320, -0.6707]) tensor([0.6142, 0.1918, 0.1179, 0.0760]) -Greedy action tensor([ 1.6460, -0.3736, -0.3796, 0.0889]) tensor([0.6778, 0.0899, 0.0894, 0.1428]) -Greedy action tensor([ 1.4395, -0.0857, -0.5007, -0.0415]) tensor([0.6295, 0.1370, 0.0904, 0.1432]) -Greedy action tensor([ 1.7706, 0.0357, -0.2852, 0.5437]) tensor([0.6259, 0.1104, 0.0801, 0.1835]) -Greedy action tensor([ 1.7034, -0.5431, -0.5976, 1.1872]) tensor([0.5547, 0.0587, 0.0556, 0.3311]) -Greedy action tensor([ 1.5026, 0.1491, -0.5589, -0.0530]) tensor([0.6263, 0.1618, 0.0797, 0.1322]) -Greedy action tensor([ 1.1854, -0.5013, -0.8664, 0.5683]) tensor([0.5396, 0.0999, 0.0693, 0.2911]) -Greedy action tensor([ 1.6882, -0.5724, -0.6933, 0.3275]) tensor([0.6881, 0.0718, 0.0636, 0.1765]) -Greedy action tensor([ 1.5027, -0.8113, -0.6246, 0.4576]) tensor([0.6371, 0.0630, 0.0759, 0.2240]) -Greedy action tensor([ 1.1870, -0.2086, -0.4059, 0.3492]) tensor([0.5309, 0.1315, 0.1079, 0.2297]) -Greedy action tensor([ 2.0909, -0.9909, -0.4155, 0.6081]) tensor([0.7383, 0.0339, 0.0602, 0.1676]) -Greedy action tensor([ 2.3780, -1.1337, -0.5700, 0.7328]) tensor([0.7842, 0.0234, 0.0411, 0.1513]) -Greedy action tensor([ 1.1298, -0.4765, -0.1155, -0.2648]) tensor([0.5759, 0.1155, 0.1658, 0.1428]) -Greedy action tensor([ 1.5237, -0.3076, -0.9325, 0.0590]) tensor([0.6770, 0.1085, 0.0581, 0.1565]) -Greedy action tensor([ 1.4695, -0.7422, -0.5770, 0.7129]) tensor([0.5855, 0.0641, 0.0756, 0.2747]) -Greedy action tensor([ 1.3974, 0.0561, -0.8158, 0.2262]) tensor([0.5949, 0.1556, 0.0651, 0.1844]) -Greedy action tensor([ 0.6157, -0.1889, -0.0295, 0.1018]) tensor([0.3891, 0.1740, 0.2041, 0.2327]) -Greedy action tensor([ 1.0964, -0.4634, -0.6414, 0.3397]) tensor([0.5390, 0.1133, 0.0948, 0.2529]) -Greedy action tensor([ 0.9541, -0.2963, -0.7326, 0.1863]) tensor([0.5167, 0.1480, 0.0956, 0.2397]) -Greedy action tensor([ 1.6231, -0.1140, -0.4298, 0.1121]) tensor([0.6557, 0.1154, 0.0842, 0.1447]) -Greedy action tensor([ 1.3799, -0.6163, 0.2168, 0.4474]) tensor([0.5429, 0.0738, 0.1697, 0.2137]) -Greedy action tensor([ 1.2417, -0.6133, 0.0560, 0.0631]) tensor([0.5651, 0.0884, 0.1727, 0.1739]) -Greedy action tensor([ 1.4715, -0.1908, -0.8510, 0.2909]) tensor([0.6270, 0.1189, 0.0615, 0.1926]) -Greedy action tensor([ 1.2364, -0.6938, -0.5562, 0.2499]) tensor([0.5936, 0.0862, 0.0989, 0.2214]) -Greedy action tensor([ 1.3494, -0.5339, -0.7495, 0.3264]) tensor([0.6119, 0.0931, 0.0750, 0.2200]) -Greedy action tensor([ 1.7305, -0.4682, -0.8373, 0.5267]) tensor([0.6722, 0.0746, 0.0516, 0.2017]) -Greedy action tensor([ 1.4596, 0.1672, -0.6416, -0.0298]) tensor([0.6163, 0.1693, 0.0754, 0.1390]) -Greedy action tensor([ 1.5558, -0.7210, -0.6346, 0.3826]) tensor([0.6562, 0.0673, 0.0734, 0.2030]) -Greedy action tensor([ 1.4430, -0.4214, -0.5389, 0.4055]) tensor([0.6071, 0.0941, 0.0837, 0.2151]) -Greedy action tensor([ 1.4861, -0.4074, 0.0845, -0.0340]) tensor([0.6190, 0.0932, 0.1524, 0.1354]) -Greedy action tensor([ 1.6980, -0.6658, -0.3003, 0.5609]) tensor([0.6450, 0.0607, 0.0874, 0.2069]) -Greedy action tensor([ 1.7204, -0.3272, -0.5180, 0.6205]) tensor([0.6375, 0.0823, 0.0680, 0.2122]) -Greedy action tensor([ 1.3419, -0.1340, -0.8043, 0.6838]) tensor([0.5367, 0.1227, 0.0628, 0.2779]) -Greedy action tensor([ 1.2618, -0.5299, -0.4594, 0.3649]) tensor([0.5703, 0.0951, 0.1020, 0.2326]) -Greedy action tensor([ 1.4678, -0.7472, -0.2998, 0.1871]) tensor([0.6420, 0.0701, 0.1096, 0.1784]) -Greedy action tensor([ 1.3816, 0.0610, -0.4324, -0.0605]) tensor([0.6001, 0.1602, 0.0978, 0.1419]) -Greedy action tensor([ 1.3405, -0.4097, -0.3212, -0.1258]) tensor([0.6272, 0.1090, 0.1191, 0.1448]) -Greedy action tensor([ 1.0317, -0.2489, -0.0593, 0.0422]) tensor([0.5036, 0.1400, 0.1692, 0.1872]) -Greedy action tensor([ 2.4114, -1.4173, 0.0548, 0.5224]) tensor([0.7888, 0.0171, 0.0747, 0.1193]) -Greedy action tensor([ 1.1621, -0.4524, -0.7486, 0.5295]) tensor([0.5324, 0.1059, 0.0788, 0.2828]) -Greedy action tensor([ 2.5658, -1.5663, 0.2022, 0.3213]) tensor([0.8223, 0.0132, 0.0774, 0.0871]) -Greedy action tensor([ 1.4588, -0.4485, -0.5080, 0.2947]) tensor([0.6248, 0.0928, 0.0874, 0.1951]) -Greedy action tensor([ 1.3406, -0.4062, -0.2624, 0.1015]) tensor([0.6005, 0.1047, 0.1209, 0.1739]) -Greedy action tensor([ 2.6933, 0.9266, -0.0421, 0.1245]) tensor([0.7620, 0.1302, 0.0494, 0.0584]) -Greedy action tensor([ 1.4063, -0.2369, -0.3121, 0.1530]) tensor([0.6030, 0.1166, 0.1082, 0.1722]) -Greedy action tensor([ 1.7817, -0.9017, -0.3545, 0.5583]) tensor([0.6754, 0.0461, 0.0798, 0.1987]) -Greedy action tensor([ 1.4166, -0.8569, -0.8467, -0.3324]) tensor([0.7242, 0.0745, 0.0753, 0.1260]) -Greedy action tensor([ 2.1690, -0.9599, -0.7173, 0.0316]) tensor([0.8213, 0.0359, 0.0458, 0.0969]) -Greedy action tensor([ 1.4795, -0.4539, -0.6355, 0.4967]) tensor([0.6099, 0.0882, 0.0736, 0.2283]) -Greedy action tensor([ 1.1378, -0.1937, -0.2638, 0.0773]) tensor([0.5386, 0.1422, 0.1326, 0.1865]) -Greedy action tensor([ 1.0715, -0.2994, -0.3957, 0.3171]) tensor([0.5116, 0.1299, 0.1180, 0.2406]) -Greedy action tensor([ 1.0242, -0.2876, -0.8945, 0.5413]) tensor([0.4918, 0.1325, 0.0722, 0.3035]) -Greedy action tensor([ 1.2910, -0.3243, -0.5221, 0.5822]) tensor([0.5393, 0.1072, 0.0880, 0.2655]) -Greedy action tensor([ 1.5188, 0.2921, -0.5357, 0.2403]) tensor([0.5883, 0.1725, 0.0754, 0.1638]) -Greedy action tensor([ 1.5850, -0.0035, -1.2653, 0.4075]) tensor([0.6369, 0.1301, 0.0368, 0.1962]) -Greedy action tensor([ 1.3346, -0.2388, -0.5698, 0.1764]) tensor([0.5987, 0.1241, 0.0891, 0.1880]) -Greedy action tensor([ 1.2956, -0.0380, -0.8276, 0.3458]) tensor([0.5650, 0.1489, 0.0676, 0.2185]) -Greedy action tensor([ 1.3669, -0.7484, -0.8142, -0.3347]) tensor([0.7063, 0.0852, 0.0797, 0.1288]) -Greedy action tensor([ 1.2076, -0.1961, -0.7807, 0.3082]) tensor([0.5588, 0.1373, 0.0765, 0.2273]) -Greedy action tensor([ 2.0742, -1.2609, -0.1741, 0.5880]) tensor([0.7313, 0.0260, 0.0772, 0.1654]) -Greedy action tensor([ 1.7149, -0.9041, -0.6566, 0.4444]) tensor([0.6911, 0.0504, 0.0645, 0.1940]) -Greedy action tensor([ 1.2543, -0.4097, -0.2539, 0.7870]) tensor([0.4908, 0.0930, 0.1086, 0.3076]) -Greedy action tensor([ 1.4134, -0.3345, -0.1175, 0.4818]) tensor([0.5604, 0.0976, 0.1212, 0.2208]) -Greedy action tensor([ 1.1541, -0.3838, -0.0899, 0.1433]) tensor([0.5356, 0.1151, 0.1544, 0.1949]) -Greedy action tensor([ 1.3681, -0.2864, -0.5100, 0.0544]) tensor([0.6200, 0.1185, 0.0948, 0.1667]) -Greedy action tensor([ 1.7505, -0.7518, -0.2561, -0.1242]) tensor([0.7301, 0.0598, 0.0982, 0.1120]) -Greedy action tensor([ 2.0708, -1.6667, -0.0546, 0.2726]) tensor([0.7641, 0.0182, 0.0912, 0.1265]) -Greedy action tensor([ 1.7433, -0.6292, -0.8631, 0.6083]) tensor([0.6718, 0.0626, 0.0496, 0.2159]) -Greedy action tensor([ 1.4292, -0.1798, -0.6641, 0.5736]) tensor([0.5720, 0.1144, 0.0705, 0.2431]) -Greedy action tensor([ 1.3308, -0.1455, -0.9825, 0.3651]) tensor([0.5854, 0.1338, 0.0579, 0.2229]) -Greedy action tensor([ 1.7797, -0.6654, 0.1422, -0.0267]) tensor([0.6918, 0.0600, 0.1345, 0.1136]) -Greedy action tensor([ 1.7681, -0.9300, -0.2175, 0.4017]) tensor([0.6851, 0.0461, 0.0941, 0.1747]) -Greedy action tensor([ 1.7202, -0.4720, -0.5474, 0.7289]) tensor([0.6304, 0.0704, 0.0653, 0.2339]) -Greedy action tensor([ 1.3238, -0.8684, 0.0334, 0.3599]) tensor([0.5655, 0.0632, 0.1556, 0.2157]) -Greedy action tensor([ 1.4312, -0.9019, 0.0377, 0.0573]) tensor([0.6257, 0.0607, 0.1553, 0.1584]) -Greedy action tensor([ 1.5224, -0.6157, -0.3066, 0.1233]) tensor([0.6556, 0.0773, 0.1053, 0.1618]) -Greedy action tensor([ 1.7636, -1.0996, -0.4131, 0.8234]) tensor([0.6406, 0.0366, 0.0727, 0.2502]) -Greedy action tensor([ 0.7230, -0.1930, -0.0983, -0.0654]) tensor([0.4358, 0.1744, 0.1917, 0.1981]) -Greedy action tensor([ 0.6915, -0.3225, -0.1660, -0.0927]) tensor([0.4458, 0.1617, 0.1891, 0.2035]) -Greedy action tensor([ 0.9800, -0.3951, -0.0481, -0.2084]) tensor([0.5221, 0.1320, 0.1868, 0.1591]) -Greedy action tensor([ 0.7464, -0.7025, -0.1040, -0.3041]) tensor([0.4971, 0.1167, 0.2124, 0.1739]) -Greedy action tensor([ 0.6827, -0.0787, -0.0443, -0.1771]) tensor([0.4213, 0.1968, 0.2036, 0.1783]) -Greedy action tensor([ 0.6656, -0.3341, -0.1373, -0.1792]) tensor([0.4453, 0.1639, 0.1995, 0.1913]) -Greedy action tensor([ 0.7095, -0.2366, 0.0154, -0.3909]) tensor([0.4504, 0.1748, 0.2250, 0.1498]) -Greedy action tensor([ 1.0376, -0.6810, -0.0727, -0.5713]) tensor([0.5852, 0.1049, 0.1928, 0.1171]) -Greedy action tensor([ 0.8736, -0.4130, 0.0259, -0.4425]) tensor([0.5069, 0.1400, 0.2172, 0.1359]) -Greedy action tensor([ 0.9167, -0.3297, -0.1770, -0.2152]) tensor([0.5141, 0.1478, 0.1722, 0.1658]) -Greedy action tensor([ 0.9155, -0.0374, -0.1592, -0.2121]) tensor([0.4876, 0.1880, 0.1665, 0.1579]) -Greedy action tensor([0.8587, 0.1043, 0.1766, 0.0043]) tensor([0.4164, 0.1958, 0.2105, 0.1772]) -Greedy action tensor([ 0.6332, -0.4187, 0.1399, -0.4349]) tensor([0.4341, 0.1516, 0.2651, 0.1492]) -Greedy action tensor([ 0.6996, -0.3648, 0.1194, -0.4286]) tensor([0.4488, 0.1548, 0.2512, 0.1452]) -Greedy action tensor([ 1.1015, -0.7514, 0.1650, -0.5636]) tensor([0.5754, 0.0902, 0.2256, 0.1088]) -Greedy action tensor([ 0.7729, -0.7061, -0.0569, -0.2425]) tensor([0.4935, 0.1125, 0.2152, 0.1788]) -Greedy action tensor([ 0.7690, -0.6212, 0.0163, -0.4181]) tensor([0.4938, 0.1230, 0.2326, 0.1507]) -Greedy action tensor([ 0.5218, -0.3323, 0.0448, -0.5409]) tensor([0.4181, 0.1780, 0.2595, 0.1445]) -Greedy action tensor([ 1.2975, -0.8379, -0.0834, -0.8322]) tensor([0.6719, 0.0794, 0.1689, 0.0799]) -Greedy action tensor([ 0.6752, -0.2256, -0.1155, -0.1760]) tensor([0.4373, 0.1777, 0.1983, 0.1867]) -Greedy action tensor([ 0.2945, 0.0898, -0.1212, 0.0823]) tensor([0.3045, 0.2482, 0.2010, 0.2463]) -Greedy action tensor([ 0.9501, -1.0104, 0.0937, -0.3748]) tensor([0.5461, 0.0769, 0.2319, 0.1452]) -Greedy action tensor([ 1.2476, -0.7727, 0.1516, -0.4444]) tensor([0.6057, 0.0803, 0.2024, 0.1115]) -Greedy action tensor([ 1.3209, -0.9610, 0.1934, -0.6585]) tensor([0.6393, 0.0653, 0.2071, 0.0883]) -Greedy action tensor([ 0.8895, -0.6556, -0.1074, -0.5567]) tensor([0.5501, 0.1173, 0.2030, 0.1295]) -Greedy action tensor([ 1.1290, -0.7609, 0.0480, -0.7789]) tensor([0.6102, 0.0922, 0.2070, 0.0906]) -Greedy action tensor([ 1.0498, -0.5107, 0.0678, -0.5484]) tensor([0.5596, 0.1175, 0.2096, 0.1132]) -Greedy action tensor([ 0.3140, -0.0347, -0.0308, -0.3402]) tensor([0.3408, 0.2405, 0.2414, 0.1772]) -Greedy action tensor([ 0.7040, -0.4832, -0.0431, -0.3841]) tensor([0.4727, 0.1442, 0.2239, 0.1592]) -Greedy action tensor([ 0.6132, -0.0389, -0.0382, 0.0248]) tensor([0.3850, 0.2006, 0.2007, 0.2138]) -Greedy action tensor([ 1.1801, -0.5862, -0.0918, -0.8209]) tensor([0.6303, 0.1078, 0.1767, 0.0852]) -Greedy action tensor([ 0.9340, -0.2938, 0.1374, -0.1615]) tensor([0.4812, 0.1410, 0.2169, 0.1609]) -Greedy action tensor([ 0.7609, -0.4464, -0.1003, -0.3440]) tensor([0.4871, 0.1457, 0.2059, 0.1614]) -Greedy action tensor([ 0.8791, -0.6193, -0.0095, -0.2606]) tensor([0.5116, 0.1143, 0.2104, 0.1637]) -Greedy action tensor([0.8403, 0.1891, 0.0463, 0.1522]) tensor([0.4039, 0.2106, 0.1826, 0.2030]) -Greedy action tensor([ 0.5966, -0.3412, -0.1653, -0.4207]) tensor([0.4505, 0.1764, 0.2103, 0.1629]) -Greedy action tensor([ 0.7808, -0.0476, -0.1187, -0.1966]) tensor([0.4505, 0.1967, 0.1832, 0.1695]) -Greedy action tensor([ 0.8930, -0.3340, -0.1594, -0.3341]) tensor([0.5167, 0.1515, 0.1804, 0.1515]) -Greedy action tensor([0.5342, 0.4009, 0.0720, 0.1767]) tensor([0.3121, 0.2731, 0.1966, 0.2183]) -Greedy action tensor([ 0.9937, -0.5745, 0.0874, -0.5936]) tensor([0.5504, 0.1147, 0.2224, 0.1125]) -Greedy action tensor([ 0.7523, -0.3673, -0.0472, -0.2636]) tensor([0.4677, 0.1527, 0.2103, 0.1694]) -Greedy action tensor([ 0.6538, -0.3967, -0.0261, -0.1160]) tensor([0.4311, 0.1508, 0.2184, 0.1997]) -Greedy action tensor([ 0.6775, -0.2465, 0.0417, -0.0904]) tensor([0.4183, 0.1660, 0.2215, 0.1941]) -Greedy action tensor([ 0.9994, -0.8373, 0.0895, -0.5268]) tensor([0.5620, 0.0896, 0.2262, 0.1222]) -Greedy action tensor([ 0.3616, -0.1102, -0.1344, -0.0330]) tensor([0.3440, 0.2146, 0.2095, 0.2319]) -Greedy action tensor([ 0.9828, -0.6474, -0.1074, -0.4886]) tensor([0.5677, 0.1112, 0.1908, 0.1303]) -Greedy action tensor([ 0.5371, -0.2095, -0.0369, -0.2196]) tensor([0.3990, 0.1891, 0.2247, 0.1872]) -Greedy action tensor([ 0.5993, -0.3526, -0.0413, -0.1535]) tensor([0.4195, 0.1619, 0.2210, 0.1976]) -Greedy action tensor([ 1.0848, -0.7725, 0.0929, -0.5238]) tensor([0.5790, 0.0904, 0.2147, 0.1159]) -Greedy action tensor([ 0.7974, -0.7780, 0.0459, -0.4256]) tensor([0.5069, 0.1049, 0.2391, 0.1492]) -Greedy action tensor([ 0.5716, -0.2417, -0.0347, -0.1526]) tensor([0.4043, 0.1793, 0.2205, 0.1960]) -Greedy action tensor([ 0.6876, -0.3845, -0.1076, -0.1535]) tensor([0.4494, 0.1538, 0.2029, 0.1938]) -Greedy action tensor([ 0.4802, -0.0960, 0.0191, -0.0312]) tensor([0.3581, 0.2013, 0.2258, 0.2148]) -Greedy action tensor([ 0.9735, -0.5935, 0.1150, -0.4069]) tensor([0.5308, 0.1108, 0.2250, 0.1335]) -Greedy action tensor([ 0.6273, -0.2727, -0.0307, -0.1113]) tensor([0.4163, 0.1692, 0.2156, 0.1989]) -Greedy action tensor([ 0.6652, -0.5607, 0.0146, -0.2186]) tensor([0.4487, 0.1317, 0.2341, 0.1854]) -Greedy action tensor([ 0.7452, -0.6244, -0.1686, -0.2403]) tensor([0.4930, 0.1253, 0.1977, 0.1840]) -Greedy action tensor([ 0.3999, -0.0858, -0.1113, -0.0407]) tensor([0.3498, 0.2152, 0.2098, 0.2252]) -Greedy action tensor([ 1.0700, -0.5262, -0.0752, -0.4130]) tensor([0.5722, 0.1160, 0.1820, 0.1299]) -Greedy action tensor([ 0.7400, -0.5009, -0.0862, -0.3188]) tensor([0.4822, 0.1394, 0.2111, 0.1673]) -Greedy action tensor([ 1.0872, -0.5765, -0.0492, -0.6501]) tensor([0.5930, 0.1123, 0.1903, 0.1044]) -Greedy action tensor([ 0.6311, 0.1952, -0.2297, -0.0722]) tensor([0.3899, 0.2522, 0.1649, 0.1930]) -Greedy action tensor([ 0.6623, -0.3174, -0.1052, -0.0787]) tensor([0.4317, 0.1621, 0.2004, 0.2058]) -Greedy action tensor([ 0.9649, 0.0389, -0.0845, -0.1452]) tensor([0.4817, 0.1908, 0.1687, 0.1587]) -Greedy action tensor([ 1.0022, -0.3407, -0.1239, -0.3844]) tensor([0.5449, 0.1423, 0.1767, 0.1362]) -Greedy action tensor([ 0.8945, -0.8282, 0.2043, -0.4649]) tensor([0.5163, 0.0922, 0.2589, 0.1326]) -Greedy action tensor([ 0.8930, -1.0438, 0.0271, -0.5523]) tensor([0.5554, 0.0801, 0.2336, 0.1309]) -Greedy action tensor([ 1.0758, -0.1230, -0.2788, -0.4830]) tensor([0.5650, 0.1704, 0.1458, 0.1189]) -Greedy action tensor([ 1.0219, -0.8509, 0.1212, -0.6261]) tensor([0.5706, 0.0877, 0.2318, 0.1098]) -Greedy action tensor([ 0.8648, -0.6779, -0.0099, -0.4886]) tensor([0.5293, 0.1132, 0.2207, 0.1368]) -Greedy action tensor([ 0.6293, -0.4411, -0.1537, -0.1287]) tensor([0.4408, 0.1512, 0.2015, 0.2066]) -Greedy action tensor([ 0.9509, -0.1814, -0.0473, -0.4266]) tensor([0.5147, 0.1659, 0.1897, 0.1298]) -Greedy action tensor([ 0.7574, -0.3348, -0.0571, -0.1950]) tensor([0.4621, 0.1550, 0.2046, 0.1783]) -Greedy action tensor([ 0.4743, 0.0360, -0.0478, -0.0167]) tensor([0.3508, 0.2263, 0.2081, 0.2147]) -Greedy action tensor([ 0.6022, -0.0888, 0.1195, -0.0442]) tensor([0.3785, 0.1897, 0.2336, 0.1983]) -Greedy action tensor([ 0.7586, -0.6953, -0.0709, -0.0784]) tensor([0.4755, 0.1111, 0.2075, 0.2059]) -Greedy action tensor([ 0.3087, -0.0348, -0.1192, -0.1549]) tensor([0.3344, 0.2372, 0.2180, 0.2104]) -Greedy action tensor([ 0.8452, 0.1928, 0.1066, -0.6440]) tensor([0.4496, 0.2342, 0.2148, 0.1014]) -Greedy action tensor([ 0.6960, -0.4422, -0.0565, -0.1360]) tensor([0.4491, 0.1439, 0.2116, 0.1954]) -Greedy action tensor([ 0.7822, -0.1812, -0.1747, -0.4900]) tensor([0.4888, 0.1865, 0.1877, 0.1370]) -Greedy action tensor([ 0.8512, -0.5679, -0.1095, -0.2376]) tensor([0.5099, 0.1234, 0.1951, 0.1716]) -Greedy action tensor([ 0.5104, 0.2566, -0.2104, -0.0321]) tensor([0.3517, 0.2728, 0.1710, 0.2044]) -Greedy action tensor([-1.8878, -0.3766, 0.6265, -0.1535]) tensor([0.0425, 0.1924, 0.5246, 0.2405]) -Greedy action tensor([-1.8970, -0.3430, 0.6346, -0.1532]) tensor([0.0416, 0.1969, 0.5234, 0.2381]) -Greedy action tensor([1.0687, 0.5589, 0.0264, 0.1088]) tensor([0.4281, 0.2571, 0.1509, 0.1639]) -Greedy action tensor([-1.8927, -0.4557, 0.6458, -0.1570]) tensor([0.0425, 0.1787, 0.5378, 0.2410]) -Greedy action tensor([-1.8465, -0.2576, 0.6071, -0.1211]) tensor([0.0432, 0.2116, 0.5025, 0.2426]) -Greedy action tensor([-1.6593, -0.3944, 0.5122, -0.0227]) tensor([0.0542, 0.1920, 0.4754, 0.2784]) -Greedy action tensor([-0.2324, 0.7497, -0.2250, 0.1873]) tensor([0.1613, 0.4307, 0.1625, 0.2454]) -Greedy action tensor([-1.6920, -0.4359, 0.5524, -0.1368]) tensor([0.0535, 0.1880, 0.5050, 0.2535]) -Greedy action tensor([-1.8040, -0.2522, 0.5683, -0.0980]) tensor([0.0456, 0.2150, 0.4885, 0.2509]) -Greedy action tensor([-1.6940, -0.3873, 0.6776, 0.0489]) tensor([0.0473, 0.1749, 0.5072, 0.2705]) -Greedy action tensor([-1.8315, -0.5951, 0.9371, -0.0659]) tensor([0.0381, 0.1313, 0.6077, 0.2229]) -Greedy action tensor([-1.5021, -0.1707, 0.4414, -0.0906]) tensor([0.0630, 0.2386, 0.4400, 0.2585]) -Greedy action tensor([-1.8131, -0.3570, 0.5840, -0.1259]) tensor([0.0461, 0.1978, 0.5069, 0.2492]) -Greedy action tensor([-1.4822, -0.0202, 0.3924, -0.0510]) tensor([0.0624, 0.2694, 0.4070, 0.2612]) -Greedy action tensor([-1.8767, -0.3918, 0.6265, -0.1571]) tensor([0.0431, 0.1901, 0.5264, 0.2404]) -Greedy action tensor([-1.9176, -0.4468, 0.6560, -0.1673]) tensor([0.0413, 0.1797, 0.5414, 0.2377]) -Greedy action tensor([-1.9021, -0.3427, 0.6346, -0.1577]) tensor([0.0415, 0.1972, 0.5240, 0.2373]) -Greedy action tensor([-1.6803, -0.3128, 0.5003, -0.0600]) tensor([0.0531, 0.2085, 0.4700, 0.2684]) -Greedy action tensor([-1.3894, -0.6503, 0.3993, 0.1621]) tensor([0.0725, 0.1518, 0.4336, 0.3421]) -Greedy action tensor([-1.7613, 0.0118, 0.5132, -0.0721]) tensor([0.0454, 0.2674, 0.4414, 0.2458]) -Greedy action tensor([-1.8375, -0.1383, 0.5658, -0.1180]) tensor([0.0433, 0.2367, 0.4786, 0.2415]) -Greedy action tensor([-1.8906, -0.4558, 0.6433, -0.1570]) tensor([0.0426, 0.1790, 0.5371, 0.2413]) -Greedy action tensor([-1.8946, -0.3347, 0.6348, -0.1493]) tensor([0.0416, 0.1980, 0.5221, 0.2383]) -Greedy action tensor([-1.4469, -0.5996, 0.4521, -0.0640]) tensor([0.0714, 0.1667, 0.4771, 0.2848]) -Greedy action tensor([-1.8340, -0.3977, 0.6030, -0.1317]) tensor([0.0452, 0.1900, 0.5169, 0.2479]) -Greedy action tensor([-1.6515, 0.1199, 0.4632, -0.0984]) tensor([0.0503, 0.2955, 0.4166, 0.2376]) -Greedy action tensor([-0.8539, -0.6088, 0.3831, 0.7188]) tensor([0.0949, 0.1212, 0.3268, 0.4571]) -Greedy action tensor([-1.9090, -0.4277, 0.6495, -0.1614]) tensor([0.0416, 0.1828, 0.5370, 0.2386]) -Greedy action tensor([-1.4959, -0.4946, 0.3884, 0.0956]) tensor([0.0657, 0.1789, 0.4326, 0.3228]) -Greedy action tensor([-1.6004, -0.3733, 0.5297, -0.3577]) tensor([0.0614, 0.2094, 0.5166, 0.2127]) -Greedy action tensor([-1.9029, -0.4213, 0.6424, -0.1532]) tensor([0.0418, 0.1841, 0.5334, 0.2407]) -Greedy action tensor([-1.4777, -0.2206, 0.4103, -0.0267]) tensor([0.0650, 0.2284, 0.4293, 0.2773]) -Greedy action tensor([-1.7759, -0.4246, 0.6289, -0.0358]) tensor([0.0462, 0.1785, 0.5119, 0.2633]) -Greedy action tensor([-1.8705, -0.4362, 0.6177, -0.1343]) tensor([0.0436, 0.1832, 0.5255, 0.2477]) -Greedy action tensor([-1.9217, -0.3648, 0.6432, -0.1601]) tensor([0.0407, 0.1931, 0.5292, 0.2370]) -Greedy action tensor([-1.9062, -1.0592, 0.4496, -0.2523]) tensor([0.0523, 0.1221, 0.5520, 0.2736]) -Greedy action tensor([-1.9200, -0.4183, 0.6542, -0.1661]) tensor([0.0410, 0.1841, 0.5380, 0.2369]) -Greedy action tensor([-1.8709, 0.0689, 0.5619, -0.1577]) tensor([0.0402, 0.2795, 0.4576, 0.2228]) -Greedy action tensor([-1.9242, -0.4499, 0.6593, -0.1717]) tensor([0.0410, 0.1792, 0.5432, 0.2366]) -Greedy action tensor([-1.9152, -0.3046, 0.6339, -0.1800]) tensor([0.0409, 0.2046, 0.5229, 0.2317]) -Greedy action tensor([-1.8088, 0.2320, 0.5248, -0.1579]) tensor([0.0413, 0.3178, 0.4258, 0.2151]) -Greedy action tensor([-1.8951, -0.3434, 0.6267, -0.1724]) tensor([0.0421, 0.1985, 0.5238, 0.2356]) -Greedy action tensor([-1.7736, 0.1678, 0.4947, -0.0666]) tensor([0.0432, 0.3011, 0.4175, 0.2382]) -Greedy action tensor([-1.8357, -0.2692, 0.5874, -0.2640]) tensor([0.0457, 0.2189, 0.5154, 0.2200]) -Greedy action tensor([-1.6428, 0.2921, 0.4224, -0.0296]) tensor([0.0480, 0.3324, 0.3786, 0.2409]) -Greedy action tensor([-1.9417, -0.4564, 0.6705, -0.1778]) tensor([0.0402, 0.1775, 0.5478, 0.2345]) -Greedy action tensor([-1.5914, -0.0549, 0.4595, 0.1855]) tensor([0.0517, 0.2404, 0.4021, 0.3057]) -Greedy action tensor([-1.8462, -0.4815, 0.6173, -0.1407]) tensor([0.0451, 0.1766, 0.5299, 0.2483]) -Greedy action tensor([-1.2299, 0.4083, 0.0875, 0.0235]) tensor([0.0747, 0.3845, 0.2790, 0.2617]) -Greedy action tensor([-1.8628, -0.3998, 0.6854, -0.0449]) tensor([0.0412, 0.1780, 0.5269, 0.2539]) -Greedy action tensor([-0.8241, -0.3138, 0.2423, -0.0616]) tensor([0.1296, 0.2159, 0.3766, 0.2779]) -Greedy action tensor([-1.9435, -0.4368, 0.6624, -0.1797]) tensor([0.0402, 0.1813, 0.5442, 0.2344]) -Greedy action tensor([-1.8528, -0.2660, 0.6075, -0.1255]) tensor([0.0431, 0.2105, 0.5042, 0.2422]) -Greedy action tensor([-1.8890, -0.4007, 0.6477, -0.1526]) tensor([0.0421, 0.1866, 0.5323, 0.2391]) -Greedy action tensor([-1.8727, -0.2093, 0.6049, -0.1553]) tensor([0.0421, 0.2221, 0.5014, 0.2344]) -Greedy action tensor([-1.7840, 0.0883, 0.5035, -0.0840]) tensor([0.0438, 0.2849, 0.4315, 0.2398]) -Greedy action tensor([-1.8393, -0.4436, 0.6122, -0.1461]) tensor([0.0453, 0.1829, 0.5256, 0.2462]) -Greedy action tensor([-1.9435, -0.4210, 0.6601, -0.1804]) tensor([0.0401, 0.1839, 0.5421, 0.2339]) -Greedy action tensor([-1.8014, -0.4543, 0.5795, -0.1348]) tensor([0.0477, 0.1835, 0.5161, 0.2526]) -Greedy action tensor([-1.0039, 0.4807, 0.4552, 0.4119]) tensor([0.0723, 0.3190, 0.3110, 0.2978]) -Greedy action tensor([-0.7432, 0.0511, 0.2975, 0.1131]) tensor([0.1191, 0.2635, 0.3371, 0.2803]) -Greedy action tensor([-1.5262, 0.5457, 0.3468, -0.0110]) tensor([0.0500, 0.3970, 0.3254, 0.2275]) -Greedy action tensor([-1.9073, -0.4417, 0.6489, -0.1621]) tensor([0.0418, 0.1808, 0.5382, 0.2392]) -Greedy action tensor([-1.2442, -0.5663, 0.3287, 0.2601]) tensor([0.0814, 0.1603, 0.3922, 0.3662]) -Greedy action tensor([-1.9189, -0.4442, 0.6571, -0.1665]) tensor([0.0412, 0.1799, 0.5413, 0.2376]) -Greedy action tensor([-1.8567, -0.4346, 0.6402, -0.1137]) tensor([0.0435, 0.1802, 0.5279, 0.2484]) -Greedy action tensor([-1.9011, -0.4514, 0.6472, -0.1613]) tensor([0.0421, 0.1795, 0.5385, 0.2399]) -Greedy action tensor([-1.9137, -0.4466, 0.6522, -0.1651]) tensor([0.0415, 0.1800, 0.5400, 0.2385]) -Greedy action tensor([-1.9264, -0.4381, 0.6596, -0.1700]) tensor([0.0408, 0.1808, 0.5419, 0.2364]) -Greedy action tensor([-1.8777, -0.4588, 0.6532, -0.1389]) tensor([0.0428, 0.1767, 0.5372, 0.2433]) -Greedy action tensor([-1.5056, 0.3895, 0.3345, 0.0616]) tensor([0.0534, 0.3550, 0.3360, 0.2557]) -Greedy action tensor([-1.8856, -0.4373, 0.6353, -0.1542]) tensor([0.0428, 0.1823, 0.5329, 0.2420]) -Greedy action tensor([-1.8581, -0.4106, 0.6216, -0.1365]) tensor([0.0439, 0.1866, 0.5240, 0.2455]) -Greedy action tensor([-1.9468, -0.4509, 0.6681, -0.1819]) tensor([0.0400, 0.1787, 0.5473, 0.2339]) -Greedy action tensor([-1.8413, -0.3896, 0.6177, -0.1235]) tensor([0.0444, 0.1895, 0.5189, 0.2473]) -Greedy action tensor([-1.9302, -0.4242, 0.6592, -0.1726]) tensor([0.0406, 0.1831, 0.5409, 0.2354]) -Greedy action tensor([-1.9046, -0.4370, 0.6609, -0.1479]) tensor([0.0414, 0.1797, 0.5388, 0.2400]) -Greedy action tensor([-1.9120, -0.4556, 0.6577, -0.1615]) tensor([0.0415, 0.1780, 0.5417, 0.2388]) -Greedy action tensor([-1.8237, -0.2805, 0.6320, -0.0824]) tensor([0.0434, 0.2031, 0.5059, 0.2476]) -Greedy action tensor([-1.8101, -0.0432, 0.5384, -0.1287]) tensor([0.0441, 0.2579, 0.4613, 0.2368]) -Greedy action tensor([-1.5447, 0.1995, 0.3919, -0.0293]) tensor([0.0549, 0.3142, 0.3809, 0.2500]) -Greedy action tensor([ 1.4013, -0.4287, -0.2842, 0.5753]) tensor([0.5607, 0.0899, 0.1039, 0.2455]) -Greedy action tensor([ 1.5045, -0.6693, -0.2258, 0.3384]) tensor([0.6240, 0.0710, 0.1106, 0.1944]) -Greedy action tensor([ 1.6637, -0.2283, -0.7563, 0.9712]) tensor([0.5747, 0.0866, 0.0511, 0.2875]) -Greedy action tensor([ 1.3821, -0.0393, -0.1149, -0.0785]) tensor([0.5892, 0.1422, 0.1319, 0.1367]) -Greedy action tensor([ 1.2587, -0.3015, -0.1260, 0.1239]) tensor([0.5612, 0.1179, 0.1405, 0.1804]) -Greedy action tensor([ 0.9848, -0.4952, -0.5362, 0.2288]) tensor([0.5220, 0.1188, 0.1141, 0.2451]) -Greedy action tensor([ 0.8872, -0.3140, -0.2918, 0.2480]) tensor([0.4681, 0.1408, 0.1440, 0.2470]) -Greedy action tensor([ 0.6780, -0.1125, -0.0779, 0.1127]) tensor([0.4014, 0.1821, 0.1885, 0.2281]) -Greedy action tensor([ 1.3540, -0.6295, -0.2880, 0.4568]) tensor([0.5751, 0.0791, 0.1113, 0.2345]) -Greedy action tensor([ 1.5610, -1.1099, -0.2417, 0.1219]) tensor([0.6797, 0.0470, 0.1121, 0.1612]) -Greedy action tensor([ 1.5543, -0.6766, -0.5362, 0.5130]) tensor([0.6313, 0.0678, 0.0780, 0.2228]) -Greedy action tensor([ 1.0632, -0.3680, -0.6594, 0.2263]) tensor([0.5403, 0.1292, 0.0965, 0.2340]) -Greedy action tensor([ 1.9736, -1.0411, -0.4993, 0.4515]) tensor([0.7398, 0.0363, 0.0624, 0.1615]) -Greedy action tensor([ 0.9843, -0.1928, -0.3815, 0.4465]) tensor([0.4657, 0.1435, 0.1188, 0.2720]) -Greedy action tensor([ 1.8377, -0.3418, -0.3022, 0.4266]) tensor([0.6781, 0.0767, 0.0798, 0.1654]) -Greedy action tensor([ 1.3311, -0.5048, -0.2177, 0.3896]) tensor([0.5675, 0.0905, 0.1206, 0.2214]) -Greedy action tensor([ 0.8194, -0.8281, -0.1579, 0.3081]) tensor([0.4611, 0.0888, 0.1735, 0.2765]) -Greedy action tensor([ 1.0480, -0.3287, -0.3700, 0.0568]) tensor([0.5360, 0.1353, 0.1298, 0.1989]) -Greedy action tensor([ 1.5440, -0.6418, -0.3647, 0.3440]) tensor([0.6403, 0.0720, 0.0949, 0.1928]) -Greedy action tensor([ 1.6265, -0.8092, -0.3523, 0.6011]) tensor([0.6311, 0.0553, 0.0872, 0.2264]) -Greedy action tensor([ 1.3812, -0.1259, -0.7227, 0.1989]) tensor([0.6060, 0.1343, 0.0739, 0.1858]) -Greedy action tensor([ 2.1209, -0.7561, -0.0308, 1.2308]) tensor([0.6316, 0.0356, 0.0734, 0.2594]) -Greedy action tensor([ 1.4257, -0.5728, -0.4954, 0.3497]) tensor([0.6162, 0.0835, 0.0902, 0.2101]) -Greedy action tensor([ 1.6262, -1.0230, 0.3728, -0.0436]) tensor([0.6475, 0.0458, 0.1849, 0.1219]) -Greedy action tensor([ 2.5034, -1.7100, -0.1380, 0.3967]) tensor([0.8280, 0.0123, 0.0590, 0.1007]) -Greedy action tensor([ 1.4186, -0.0310, -1.4274, 0.4132]) tensor([0.6029, 0.1415, 0.0350, 0.2206]) -Greedy action tensor([ 1.4235, -0.3947, -0.3352, 0.2485]) tensor([0.6085, 0.0988, 0.1048, 0.1879]) -Greedy action tensor([ 1.8111, -0.9243, -0.1639, 0.6689]) tensor([0.6567, 0.0426, 0.0911, 0.2096]) -Greedy action tensor([ 1.3794, 0.3253, -0.5126, -0.2234]) tensor([0.5880, 0.2049, 0.0887, 0.1184]) -Greedy action tensor([ 1.5639, -0.8507, -0.3455, 0.3806]) tensor([0.6477, 0.0579, 0.0960, 0.1984]) -Greedy action tensor([ 1.3294, -0.2116, -0.9540, 0.1551]) tensor([0.6153, 0.1318, 0.0627, 0.1902]) -Greedy action tensor([ 1.1555, -0.1682, -0.9296, 0.0042]) tensor([0.5859, 0.1559, 0.0728, 0.1853]) -Greedy action tensor([ 1.0163, -0.0026, -0.0290, 0.0948]) tensor([0.4738, 0.1710, 0.1666, 0.1885]) -Greedy action tensor([ 1.3481, -0.3937, -0.7945, 0.1995]) tensor([0.6213, 0.1088, 0.0729, 0.1970]) -Greedy action tensor([ 1.7223, -0.2865, -0.4545, 0.6564]) tensor([0.6281, 0.0843, 0.0712, 0.2163]) -Greedy action tensor([ 2.0092, -0.6952, -0.5113, 0.5209]) tensor([0.7283, 0.0487, 0.0586, 0.1644]) -Greedy action tensor([ 2.2809, -1.4084, 0.1756, 0.6100]) tensor([0.7491, 0.0187, 0.0913, 0.1409]) -Greedy action tensor([ 1.3922, 0.0019, -1.3790, 0.5142]) tensor([0.5790, 0.1442, 0.0362, 0.2406]) -Greedy action tensor([ 1.9211, -1.0477, -0.2585, 0.3462]) tensor([0.7291, 0.0375, 0.0825, 0.1510]) -Greedy action tensor([ 1.0021, -0.1376, -0.0603, 0.2027]) tensor([0.4728, 0.1513, 0.1634, 0.2126]) -Greedy action tensor([ 1.6138, -0.6290, -0.2006, 0.4244]) tensor([0.6355, 0.0675, 0.1036, 0.1935]) -Greedy action tensor([ 1.3401, 0.0888, -1.3766, -0.0443]) tensor([0.6239, 0.1785, 0.0412, 0.1563]) -Greedy action tensor([ 1.4695, -0.6895, -0.3240, 0.2644]) tensor([0.6323, 0.0730, 0.1052, 0.1895]) -Greedy action tensor([ 1.7811, -1.0579, -0.0135, 0.9691]) tensor([0.5993, 0.0350, 0.0996, 0.2661]) -Greedy action tensor([ 2.2849, -1.1169, -0.5208, 0.7404]) tensor([0.7650, 0.0255, 0.0463, 0.1633]) -Greedy action tensor([ 1.2792, -0.2102, -1.0750, 0.0813]) tensor([0.6164, 0.1390, 0.0585, 0.1860]) -Greedy action tensor([ 1.2235, -0.7738, -0.4767, 0.2565]) tensor([0.5887, 0.0799, 0.1075, 0.2238]) -Greedy action tensor([ 3.2355, -1.6127, -0.1364, 1.2052]) tensor([0.8522, 0.0067, 0.0293, 0.1119]) -Greedy action tensor([ 1.1950, -0.1760, -0.8542, 0.0155]) tensor([0.5917, 0.1502, 0.0762, 0.1819]) -Greedy action tensor([ 1.3544, -0.4909, -0.4466, 0.4298]) tensor([0.5815, 0.0919, 0.0960, 0.2307]) -Greedy action tensor([ 1.8391, 0.2966, -0.9538, 0.1474]) tensor([0.6853, 0.1465, 0.0420, 0.1262]) -Greedy action tensor([ 1.2707, -0.3517, -0.3509, 0.2830]) tensor([0.5658, 0.1117, 0.1118, 0.2107]) -Greedy action tensor([ 1.6418, -0.6174, -0.3198, 0.1985]) tensor([0.6751, 0.0705, 0.0949, 0.1594]) -Greedy action tensor([ 1.3064, -0.2595, -0.9707, 0.7706]) tensor([0.5272, 0.1101, 0.0541, 0.3085]) -Greedy action tensor([ 0.9971, -0.3195, -0.6322, 0.6336]) tensor([0.4631, 0.1241, 0.0908, 0.3220]) -Greedy action tensor([ 1.2858, -0.3722, -0.4855, 0.3567]) tensor([0.5696, 0.1085, 0.0969, 0.2250]) -Greedy action tensor([ 1.3975, 0.2041, -1.0460, 0.2398]) tensor([0.5868, 0.1779, 0.0510, 0.1844]) -Greedy action tensor([ 1.8704, 0.7750, -0.2678, 0.5573]) tensor([0.5810, 0.1943, 0.0685, 0.1563]) -Greedy action tensor([ 0.9227, -0.2694, -0.0698, 0.3514]) tensor([0.4466, 0.1356, 0.1655, 0.2522]) -Greedy action tensor([ 1.5840, -0.4959, -0.7356, 0.3808]) tensor([0.6564, 0.0820, 0.0645, 0.1971]) -Greedy action tensor([ 1.8044, -0.0979, -0.7422, 0.7210]) tensor([0.6386, 0.0953, 0.0500, 0.2161]) -Greedy action tensor([ 2.0218, -1.0623, -0.5620, 0.6246]) tensor([0.7307, 0.0334, 0.0552, 0.1807]) -Greedy action tensor([ 1.2486, -0.3338, -0.2223, 0.0597]) tensor([0.5748, 0.1181, 0.1320, 0.1751]) -Greedy action tensor([ 2.1695, -0.9830, -0.4849, 0.6602]) tensor([0.7495, 0.0320, 0.0527, 0.1657]) -Greedy action tensor([ 1.1106, -0.5822, -0.2997, 0.4518]) tensor([0.5140, 0.0946, 0.1254, 0.2660]) -Greedy action tensor([ 0.5827, 0.0311, -0.1400, 0.0911]) tensor([0.3741, 0.2155, 0.1816, 0.2288]) -Greedy action tensor([ 1.7454, -0.7974, -0.6039, 0.4012]) tensor([0.6969, 0.0548, 0.0665, 0.1817]) -Greedy action tensor([ 1.3376, -0.7150, -0.8381, 0.3757]) tensor([0.6157, 0.0791, 0.0699, 0.2353]) -Greedy action tensor([ 1.3301, -0.3188, -0.5484, 0.3231]) tensor([0.5847, 0.1124, 0.0893, 0.2136]) -Greedy action tensor([ 1.1482, -0.1409, -0.5488, -0.0453]) tensor([0.5676, 0.1564, 0.1040, 0.1721]) -Greedy action tensor([ 1.1633, 0.0605, -0.0608, -0.7255]) tensor([0.5627, 0.1868, 0.1654, 0.0851]) -Greedy action tensor([ 1.8223, -0.6816, -0.6630, 0.6103]) tensor([0.6837, 0.0559, 0.0570, 0.2035]) -Greedy action tensor([ 1.7402, -0.2831, -0.8767, 0.0754]) tensor([0.7171, 0.0948, 0.0524, 0.1357]) -Greedy action tensor([ 1.3867, -0.6494, -0.1742, 0.0103]) tensor([0.6277, 0.0820, 0.1318, 0.1585]) -Greedy action tensor([ 1.2765, -0.5352, -0.5824, 0.5295]) tensor([0.5577, 0.0911, 0.0869, 0.2642]) -Greedy action tensor([ 1.4516, 0.0631, -1.0035, 0.5034]) tensor([0.5805, 0.1448, 0.0498, 0.2249]) -Greedy action tensor([ 1.4043, -0.3133, -0.9298, 0.1494]) tensor([0.6404, 0.1150, 0.0621, 0.1826]) -Greedy action tensor([ 1.4647, 0.1009, -0.7707, 0.6216]) tensor([0.5577, 0.1426, 0.0596, 0.2400]) -Greedy action tensor([ 1.6062, -0.0492, -0.5084, 0.0584]) tensor([0.6560, 0.1253, 0.0792, 0.1395]) -Greedy action tensor([ 1.1376, -0.4962, -0.5040, 0.5033]) tensor([0.5211, 0.1017, 0.1009, 0.2763]) -Greedy action tensor([ 1.0617, -0.3404, -0.4585, 0.1281]) tensor([0.5383, 0.1324, 0.1177, 0.2116]) -Greedy action tensor([1.4550, 0.1674, 0.0355, 0.2785]) tensor([0.5476, 0.1511, 0.1324, 0.1689]) -Greedy action tensor([-0.3023, -0.0830, 0.7382, -0.6303]) tensor([0.1725, 0.2148, 0.4884, 0.1243]) -Greedy action tensor([ 0.8632, -0.0881, 0.7814, 0.3145]) tensor([0.3466, 0.1339, 0.3193, 0.2002]) -Greedy action tensor([ 1.6192, -1.0072, 0.4009, 0.6343]) tensor([0.5742, 0.0415, 0.1698, 0.2145]) -Greedy action tensor([ 1.4910, -0.8118, -0.2395, 1.0458]) tensor([0.5214, 0.0521, 0.0924, 0.3341]) -Greedy action tensor([-1.2508, -0.8520, 0.1011, -0.3173]) tensor([0.1124, 0.1675, 0.4343, 0.2858]) -Greedy action tensor([ 1.2125, -0.6376, 0.3081, 0.2289]) tensor([0.5166, 0.0812, 0.2091, 0.1932]) -Greedy action tensor([-0.0086, -0.1278, -0.0047, -0.5068]) tensor([0.2858, 0.2537, 0.2869, 0.1736]) -Greedy action tensor([ 0.1433, -1.1663, 0.5216, -0.2804]) tensor([0.2955, 0.0798, 0.4313, 0.1934]) -Greedy action tensor([ 0.1347, -0.0746, -0.2341, 0.3862]) tensor([0.2639, 0.2141, 0.1825, 0.3394]) -Greedy action tensor([-0.1373, -0.1741, 0.4439, -0.7940]) tensor([0.2342, 0.2257, 0.4187, 0.1214]) -Greedy action tensor([ 0.1082, 0.5594, 0.1914, -0.5389]) tensor([0.2392, 0.3756, 0.2600, 0.1252]) -Greedy action tensor([ 1.6244, -0.9274, 0.5457, 0.2673]) tensor([0.5969, 0.0465, 0.2030, 0.1536]) -Greedy action tensor([ 0.9126, 0.3389, -0.3500, -0.2905]) tensor([0.4658, 0.2625, 0.1318, 0.1399]) -Greedy action tensor([-0.2984, -1.2356, -0.0553, -0.0641]) tensor([0.2544, 0.0997, 0.3244, 0.3216]) -Greedy action tensor([ 1.2006, -0.8364, 0.2428, 1.4515]) tensor([0.3572, 0.0466, 0.1371, 0.4591]) -Greedy action tensor([ 1.4786, -0.9611, 0.5100, 1.0008]) tensor([0.4792, 0.0418, 0.1819, 0.2972]) -Greedy action tensor([ 0.9592, 0.1049, -1.1504, 0.1163]) tensor([0.5057, 0.2152, 0.0613, 0.2177]) -Greedy action tensor([-0.7211, 0.1715, 0.5224, -0.3652]) tensor([0.1199, 0.2929, 0.4160, 0.1712]) -Greedy action tensor([0.2773, 0.1300, 0.9368, 0.5478]) tensor([0.1958, 0.1690, 0.3786, 0.2566]) -Greedy action tensor([ 1.5650, -0.3415, 0.2612, 0.9676]) tensor([0.5075, 0.0754, 0.1378, 0.2793]) -Greedy action tensor([-0.2466, -0.5585, -0.2344, 0.3304]) tensor([0.2210, 0.1618, 0.2237, 0.3935]) -Greedy action tensor([-0.7878, -0.0518, 0.3619, -1.0047]) tensor([0.1418, 0.2961, 0.4478, 0.1142]) -Greedy action tensor([-0.5444, -0.0394, 0.1899, -0.8747]) tensor([0.1832, 0.3035, 0.3817, 0.1316]) -Greedy action tensor([ 0.6305, -1.3274, 0.6928, -0.4408]) tensor([0.3925, 0.0554, 0.4177, 0.1344]) -Greedy action tensor([ 0.5971, -0.9312, 0.1198, -0.2727]) tensor([0.4432, 0.0961, 0.2750, 0.1857]) -Greedy action tensor([ 0.4188, -0.9200, -0.5973, 0.7848]) tensor([0.3261, 0.0855, 0.1181, 0.4703]) -Greedy action tensor([ 1.5277, -0.2681, 0.9624, 0.5838]) tensor([0.4710, 0.0782, 0.2676, 0.1833]) -Greedy action tensor([ 0.0556, 0.5670, -0.3114, -0.7881]) tensor([0.2638, 0.4400, 0.1828, 0.1135]) -Greedy action tensor([-0.3972, 0.8757, -0.2285, -0.2586]) tensor([0.1449, 0.5173, 0.1715, 0.1664]) -Greedy action tensor([-0.2146, -0.0367, -0.7203, 0.1368]) tensor([0.2370, 0.2832, 0.1429, 0.3368]) -Greedy action tensor([-0.5026, -0.5401, 0.1632, -1.3325]) tensor([0.2301, 0.2217, 0.4478, 0.1004]) -Greedy action tensor([ 0.3301, -0.5267, 0.2631, -0.5109]) tensor([0.3583, 0.1521, 0.3351, 0.1545]) -Greedy action tensor([0.1997, 0.4066, 0.6628, 0.5800]) tensor([0.1893, 0.2329, 0.3009, 0.2769]) -Greedy action tensor([ 0.5579, 0.5702, -1.2449, -0.1886]) tensor([0.3772, 0.3819, 0.0622, 0.1788]) -Greedy action tensor([0.8273, 0.1956, 0.5514, 0.8511]) tensor([0.3017, 0.1604, 0.2290, 0.3090]) -Greedy action tensor([ 0.8450, -0.6282, -0.5220, 0.0417]) tensor([0.5176, 0.1186, 0.1319, 0.2318]) -Greedy action tensor([ 0.2365, -0.7564, -1.3479, -0.6122]) tensor([0.4991, 0.1849, 0.1024, 0.2136]) -Greedy action tensor([ 1.2611, -0.2030, 0.1292, -0.4111]) tensor([0.5742, 0.1328, 0.1851, 0.1079]) -Greedy action tensor([-0.0200, -0.0388, -0.4683, 0.1349]) tensor([0.2640, 0.2591, 0.1686, 0.3083]) -Greedy action tensor([-0.0761, -1.9430, -0.3122, -0.3718]) tensor([0.3720, 0.0575, 0.2938, 0.2767]) -Greedy action tensor([ 0.3509, -1.0324, -0.0746, 0.2512]) tensor([0.3560, 0.0893, 0.2326, 0.3222]) -Greedy action tensor([-0.1665, 1.3512, 0.0904, -0.7959]) tensor([0.1354, 0.6175, 0.1750, 0.0721]) -Greedy action tensor([-0.2150, -0.6324, 0.2892, -0.9915]) tensor([0.2650, 0.1745, 0.4386, 0.1219]) -Greedy action tensor([ 0.0817, -0.7364, 0.8892, -0.3405]) tensor([0.2305, 0.1017, 0.5167, 0.1511]) -Greedy action tensor([-1.0368, -0.6181, -0.6803, -1.1865]) tensor([0.2079, 0.3161, 0.2970, 0.1790]) -Greedy action tensor([-0.6963, -0.6722, 0.5518, -0.6075]) tensor([0.1515, 0.1552, 0.5278, 0.1656]) -Greedy action tensor([-0.1467, -1.0510, -0.7317, 0.1851]) tensor([0.2980, 0.1207, 0.1660, 0.4153]) -Greedy action tensor([-1.0333, -0.4310, 0.2234, -1.5422]) tensor([0.1441, 0.2631, 0.5062, 0.0866]) -Greedy action tensor([ 0.4298, -1.5245, 0.9617, -0.5679]) tensor([0.3113, 0.0441, 0.5299, 0.1148]) -Greedy action tensor([-0.5144, -0.0908, 0.0595, -0.3229]) tensor([0.1814, 0.2770, 0.3219, 0.2196]) -Greedy action tensor([-0.9334, -0.5165, 0.6298, -1.5363]) tensor([0.1276, 0.1936, 0.6090, 0.0698]) -Greedy action tensor([-0.2752, -0.5417, 0.3517, -0.0656]) tensor([0.2053, 0.1573, 0.3843, 0.2532]) -Greedy action tensor([-0.0847, -0.0086, -0.8560, -0.3569]) tensor([0.3028, 0.3267, 0.1400, 0.2306]) -Greedy action tensor([ 0.4505, 0.7774, 0.8561, -0.0308]) tensor([0.2220, 0.3078, 0.3330, 0.1372]) -Greedy action tensor([ 0.4211, -1.3144, 0.4720, -0.9208]) tensor([0.4016, 0.0708, 0.4226, 0.1050]) -Greedy action tensor([-0.5763, -0.5486, 0.4434, -0.4029]) tensor([0.1670, 0.1716, 0.4628, 0.1986]) -Greedy action tensor([ 0.3133, -0.9565, 0.5053, -0.4800]) tensor([0.3396, 0.0954, 0.4114, 0.1536]) -Greedy action tensor([-0.0422, 0.5972, -0.9858, -0.4004]) tensor([0.2510, 0.4758, 0.0977, 0.1755]) -Greedy action tensor([-1.1486, -0.3005, -0.1239, -0.7745]) tensor([0.1320, 0.3083, 0.3678, 0.1919]) -Greedy action tensor([-0.0161, -1.4430, 1.1139, -0.2351]) tensor([0.1946, 0.0467, 0.6024, 0.1563]) -Greedy action tensor([-0.0842, -0.1244, 0.4420, 0.1930]) tensor([0.2011, 0.1932, 0.3404, 0.2653]) -Greedy action tensor([-0.1857, -0.0680, 1.2794, -0.4233]) tensor([0.1381, 0.1553, 0.5977, 0.1089]) -Greedy action tensor([ 0.0858, -0.0342, 0.0306, -0.7708]) tensor([0.3070, 0.2722, 0.2905, 0.1303]) -Greedy action tensor([-1.5620, -1.2249, 0.3817, -0.7649]) tensor([0.0862, 0.1207, 0.6019, 0.1912]) -Greedy action tensor([ 0.3181, -0.7310, -0.3443, -0.7787]) tensor([0.4546, 0.1592, 0.2344, 0.1518]) -Greedy action tensor([ 0.5777, -1.0243, 0.0151, 1.0269]) tensor([0.2995, 0.0604, 0.1707, 0.4694]) -Greedy action tensor([-0.4555, -0.4435, -0.1168, -0.7295]) tensor([0.2395, 0.2424, 0.3360, 0.1821]) -Greedy action tensor([ 0.4837, -0.3398, 0.6273, -0.5125]) tensor([0.3375, 0.1481, 0.3897, 0.1246]) -Greedy action tensor([ 0.7714, -0.7085, -0.0302, 0.3014]) tensor([0.4346, 0.0989, 0.1949, 0.2716]) -Greedy action tensor([ 1.1845, -1.0504, -0.0587, -0.6388]) tensor([0.6423, 0.0687, 0.1853, 0.1037]) -Greedy action tensor([ 0.5244, -1.0798, -0.3746, -0.6142]) tensor([0.5186, 0.1043, 0.2111, 0.1661]) -Greedy action tensor([1.4768, 0.2217, 0.1125, 0.4266]) tensor([0.5290, 0.1508, 0.1352, 0.1851]) -Greedy action tensor([-0.2340, 0.0629, -0.3125, -0.9665]) tensor([0.2666, 0.3588, 0.2465, 0.1282]) -Greedy action tensor([-0.3282, -0.6420, -0.3470, -0.1236]) tensor([0.2539, 0.1855, 0.2491, 0.3115]) -Greedy action tensor([-0.2396, -0.3165, -0.1262, -1.1407]) tensor([0.2897, 0.2682, 0.3245, 0.1176]) -Greedy action tensor([-0.5320, -0.7429, -0.2945, 0.3021]) tensor([0.1859, 0.1505, 0.2357, 0.4280]) -Greedy action tensor([ 0.2265, -0.4785, -0.5841, -0.0666]) tensor([0.3725, 0.1841, 0.1656, 0.2779]) -Greedy action tensor([-0.1813, -0.0988, -1.0404, -0.1172]) tensor([0.2797, 0.3037, 0.1185, 0.2982]) -Greedy action tensor([ 0.8296, -0.0713, 0.3391, 0.3311]) tensor([0.3808, 0.1547, 0.2332, 0.2313]) -Greedy action tensor([-0.7879, -0.4347, -0.4208, -1.1098]) tensor([0.2178, 0.3100, 0.3144, 0.1578]) -Greedy action tensor([-0.3274, -0.5164, 0.2551, -0.6936]) tensor([0.2319, 0.1920, 0.4153, 0.1608]) -Greedy action tensor([ 0.4576, -0.0906, 0.0015, -0.7479]) tensor([0.3982, 0.2302, 0.2524, 0.1193]) -Greedy action tensor([ 0.9401, -0.7992, 0.1929, -0.3863]) tensor([0.5223, 0.0917, 0.2474, 0.1386]) -Greedy action tensor([ 0.9839, -0.7320, 0.0883, -0.4307]) tensor([0.5461, 0.0982, 0.2230, 0.1327]) -Greedy action tensor([ 0.4438, 0.0605, -0.0082, 0.0138]) tensor([0.3369, 0.2296, 0.2144, 0.2191]) -Greedy action tensor([ 0.8495, -0.7278, -0.0486, -0.5059]) tensor([0.5343, 0.1103, 0.2176, 0.1378]) -Greedy action tensor([ 0.3620, 0.0032, -0.0093, -0.1770]) tensor([0.3365, 0.2351, 0.2321, 0.1963]) -Greedy action tensor([ 1.1864, -0.6251, 0.0116, -0.7219]) tensor([0.6171, 0.1008, 0.1906, 0.0915]) -Greedy action tensor([ 0.4210, -0.1797, -0.0924, -0.0190]) tensor([0.3583, 0.1965, 0.2144, 0.2307]) -Greedy action tensor([ 0.7189, -0.2944, -0.1276, -0.1836]) tensor([0.4551, 0.1652, 0.1952, 0.1846]) -Greedy action tensor([ 0.4033, -0.1188, -0.0067, -0.0534]) tensor([0.3460, 0.2053, 0.2296, 0.2191]) -Greedy action tensor([ 0.4375, -0.3439, -0.1140, -0.1003]) tensor([0.3820, 0.1749, 0.2201, 0.2231]) -Greedy action tensor([ 0.5601, -0.1696, -0.0128, -0.0727]) tensor([0.3881, 0.1870, 0.2188, 0.2061]) -Greedy action tensor([ 0.7664, -0.3930, 0.2553, -0.2518]) tensor([0.4396, 0.1379, 0.2637, 0.1588]) -Greedy action tensor([ 0.5725, 0.3072, -0.1885, 0.1258]) tensor([0.3480, 0.2669, 0.1626, 0.2226]) -Greedy action tensor([ 0.6005, -0.3225, -0.0103, -0.2109]) tensor([0.4194, 0.1666, 0.2277, 0.1863]) -Greedy action tensor([ 0.8390, -0.6841, 0.0099, -0.4652]) tensor([0.5193, 0.1132, 0.2266, 0.1409]) -Greedy action tensor([ 0.8966, -0.5488, -0.1814, -0.4374]) tensor([0.5437, 0.1281, 0.1850, 0.1432]) -Greedy action tensor([ 0.9168, -0.2516, -0.0174, -0.2392]) tensor([0.4954, 0.1540, 0.1947, 0.1559]) -Greedy action tensor([ 1.0047, -0.5751, 0.0832, -0.4820]) tensor([0.5464, 0.1126, 0.2174, 0.1236]) -Greedy action tensor([ 0.2054, 0.0980, -0.0403, -0.1577]) tensor([0.2962, 0.2661, 0.2317, 0.2060]) -Greedy action tensor([ 0.5125, -0.1054, -0.1578, 0.0008]) tensor([0.3773, 0.2034, 0.1930, 0.2262]) -Greedy action tensor([ 0.9456, -0.3081, -0.0781, -0.1191]) tensor([0.5026, 0.1435, 0.1806, 0.1733]) -Greedy action tensor([ 0.8627, -0.8174, 0.0264, -0.2806]) tensor([0.5159, 0.0961, 0.2235, 0.1644]) -Greedy action tensor([ 0.4705, 0.1064, 0.0131, -0.0786]) tensor([0.3442, 0.2392, 0.2179, 0.1988]) -Greedy action tensor([ 0.4878, -0.1282, -0.0254, -0.3427]) tensor([0.3884, 0.2098, 0.2325, 0.1693]) -Greedy action tensor([ 0.8891, -0.7440, -0.0646, -0.2539]) tensor([0.5265, 0.1028, 0.2029, 0.1679]) -Greedy action tensor([ 0.8156, -0.8094, 0.0987, -0.2662]) tensor([0.4940, 0.0973, 0.2412, 0.1675]) -Greedy action tensor([ 0.7594, -0.5512, -0.0236, -0.3136]) tensor([0.4834, 0.1303, 0.2209, 0.1653]) -Greedy action tensor([ 0.6415, 0.2380, 0.1124, -0.0404]) tensor([0.3620, 0.2418, 0.2132, 0.1830]) -Greedy action tensor([ 0.7973, -0.4631, -0.1084, -0.1974]) tensor([0.4860, 0.1378, 0.1965, 0.1797]) -Greedy action tensor([ 0.9960, -0.8681, -0.0105, -0.4234]) tensor([0.5674, 0.0880, 0.2074, 0.1372]) -Greedy action tensor([ 0.7302, -0.3694, 0.0082, -0.5092]) tensor([0.4743, 0.1580, 0.2304, 0.1373]) -Greedy action tensor([ 0.4949, -0.4660, -0.0855, -0.3311]) tensor([0.4202, 0.1607, 0.2352, 0.1839]) -Greedy action tensor([ 0.4256, -0.2265, -0.0893, -0.4785]) tensor([0.3963, 0.2065, 0.2368, 0.1605]) -Greedy action tensor([ 1.1202, -0.7814, -0.0164, -0.5104]) tensor([0.6002, 0.0896, 0.1926, 0.1175]) -Greedy action tensor([ 0.7309, -0.5766, -0.1706, -0.5843]) tensor([0.5142, 0.1391, 0.2087, 0.1380]) -Greedy action tensor([ 0.4198, -0.3242, -0.0854, -0.1627]) tensor([0.3792, 0.1802, 0.2288, 0.2118]) -Greedy action tensor([ 0.6039, -0.2580, -0.1515, -0.0102]) tensor([0.4110, 0.1736, 0.1931, 0.2224]) -Greedy action tensor([ 0.8225, -0.5987, 0.0400, -0.3089]) tensor([0.4947, 0.1194, 0.2262, 0.1596]) -Greedy action tensor([ 0.7521, -0.4507, -0.0290, -0.2555]) tensor([0.4709, 0.1415, 0.2157, 0.1720]) -Greedy action tensor([ 0.8833, -0.0276, -0.1124, -0.1042]) tensor([0.4664, 0.1876, 0.1723, 0.1737]) -Greedy action tensor([ 0.5428, 0.0156, -0.1414, -0.2751]) tensor([0.3943, 0.2327, 0.1989, 0.1740]) -Greedy action tensor([ 0.8734, -0.5115, 0.1393, -0.3586]) tensor([0.4946, 0.1238, 0.2374, 0.1443]) -Greedy action tensor([ 0.9880, -0.4808, -0.0308, -0.7489]) tensor([0.5658, 0.1303, 0.2043, 0.0996]) -Greedy action tensor([ 1.0885, -0.5754, -0.0893, -0.4512]) tensor([0.5842, 0.1106, 0.1799, 0.1253]) -Greedy action tensor([ 0.5556, -0.2208, -0.1276, 0.0705]) tensor([0.3875, 0.1783, 0.1957, 0.2385]) -Greedy action tensor([ 0.6596, -0.6597, 0.1292, -0.2159]) tensor([0.4401, 0.1176, 0.2589, 0.1834]) -Greedy action tensor([ 1.0020, -0.9701, 0.0910, -0.7036]) tensor([0.5804, 0.0808, 0.2334, 0.1054]) -Greedy action tensor([ 0.7620, -0.4931, -0.0578, -0.4111]) tensor([0.4914, 0.1401, 0.2165, 0.1520]) -Greedy action tensor([ 0.8346, -0.5784, -0.0132, -0.5309]) tensor([0.5189, 0.1263, 0.2223, 0.1325]) -Greedy action tensor([ 0.7675, -0.4598, 0.0125, -0.3418]) tensor([0.4778, 0.1400, 0.2246, 0.1576]) -Greedy action tensor([ 0.8308, -0.6228, 0.0656, -0.4235]) tensor([0.5040, 0.1178, 0.2345, 0.1438]) -Greedy action tensor([ 0.4050, -0.4345, 0.0331, -0.0969]) tensor([0.3668, 0.1584, 0.2528, 0.2220]) -Greedy action tensor([ 1.0734, -0.5788, -0.3033, -0.6046]) tensor([0.6132, 0.1175, 0.1548, 0.1145]) -Greedy action tensor([ 0.9021, -0.6105, 0.0945, -0.2372]) tensor([0.5035, 0.1109, 0.2245, 0.1611]) -Greedy action tensor([ 0.5653, 0.1329, -0.0402, -0.0723]) tensor([0.3672, 0.2383, 0.2004, 0.1941]) -Greedy action tensor([ 0.4588, -0.4972, 0.0216, -0.6581]) tensor([0.4242, 0.1631, 0.2739, 0.1388]) -Greedy action tensor([ 0.8269, -0.3041, -0.0788, -0.1020]) tensor([0.4713, 0.1521, 0.1905, 0.1861]) -Greedy action tensor([ 1.1586, 0.0559, 0.0124, -0.0204]) tensor([0.5109, 0.1696, 0.1624, 0.1571]) -Greedy action tensor([ 0.8212, -0.4781, -0.0446, -0.2419]) tensor([0.4905, 0.1338, 0.2064, 0.1694]) -Greedy action tensor([ 0.6683, -0.3300, -0.0158, -0.0731]) tensor([0.4256, 0.1568, 0.2147, 0.2028]) -Greedy action tensor([ 1.1380, -0.8039, 0.1943, -0.3862]) tensor([0.5713, 0.0819, 0.2223, 0.1244]) -Greedy action tensor([ 0.5437, -0.4608, -0.0852, -0.0618]) tensor([0.4090, 0.1498, 0.2181, 0.2232]) -Greedy action tensor([ 0.6157, -0.2891, 0.0608, -0.1713]) tensor([0.4109, 0.1662, 0.2359, 0.1870]) -Greedy action tensor([ 0.8121, -0.0915, -0.1134, -0.1388]) tensor([0.4571, 0.1852, 0.1812, 0.1766]) -Greedy action tensor([ 0.8061, -0.5460, -0.0544, -0.2250]) tensor([0.4906, 0.1269, 0.2075, 0.1750]) -Greedy action tensor([ 1.3227, -0.8074, -0.0056, -0.7300]) tensor([0.6613, 0.0786, 0.1752, 0.0849]) -Greedy action tensor([ 0.7405, -0.6249, -0.0222, -0.3487]) tensor([0.4859, 0.1240, 0.2266, 0.1635]) -Greedy action tensor([ 0.4581, -0.3912, -0.0878, -0.1175]) tensor([0.3892, 0.1665, 0.2255, 0.2189]) -Greedy action tensor([ 0.9467, -0.6094, -0.0033, -0.4329]) tensor([0.5407, 0.1141, 0.2091, 0.1361]) -Greedy action tensor([ 0.2974, -0.0409, 0.1128, -0.1199]) tensor([0.3122, 0.2226, 0.2596, 0.2057]) -Greedy action tensor([ 0.9742, -0.5165, -0.1002, -0.3862]) tensor([0.5485, 0.1235, 0.1873, 0.1407]) -Greedy action tensor([ 0.7167, -0.2241, 0.1329, -0.2461]) tensor([0.4292, 0.1675, 0.2394, 0.1639]) -Greedy action tensor([ 1.0277, -0.5761, -0.2373, -0.1809]) tensor([0.5612, 0.1129, 0.1584, 0.1676]) -Greedy action tensor([ 0.8741, -0.8287, 0.1156, -0.3808]) tensor([0.5166, 0.0941, 0.2420, 0.1473]) -Greedy action tensor([ 0.7615, -0.6681, -0.0285, -0.1708]) tensor([0.4792, 0.1147, 0.2175, 0.1886]) -Greedy action tensor([ 0.9204, -0.6296, -0.1101, -0.4336]) tensor([0.5473, 0.1162, 0.1953, 0.1413]) -Greedy action tensor([ 0.0613, -0.0302, -0.1384, -0.0206]) tensor([0.2737, 0.2498, 0.2242, 0.2522]) -Greedy action tensor([ 0.7038, -0.4346, 0.0425, -0.3205]) tensor([0.4555, 0.1459, 0.2351, 0.1635]) -Greedy action tensor([ 1.1463, -0.8954, -0.0027, -0.5872]) tensor([0.6160, 0.0800, 0.1952, 0.1088]) -Greedy action tensor([ 0.5861, 0.0041, -0.0917, -0.3571]) tensor([0.4072, 0.2275, 0.2067, 0.1586]) -Greedy action tensor([-1.8661, -0.2219, 0.5969, -0.1244]) tensor([0.0423, 0.2191, 0.4969, 0.2416]) -Greedy action tensor([0.1084, 0.6241, 0.4760, 0.8920]) tensor([0.1585, 0.2655, 0.2289, 0.3471]) -Greedy action tensor([-1.7979, -0.1093, 0.5357, -0.0961]) tensor([0.0450, 0.2437, 0.4644, 0.2469]) -Greedy action tensor([-1.6517, -0.5399, 0.5273, -0.0838]) tensor([0.0566, 0.1720, 0.5000, 0.2714]) -Greedy action tensor([-1.6544, -0.2939, 0.6063, -0.0212]) tensor([0.0510, 0.1988, 0.4891, 0.2611]) -Greedy action tensor([-1.1062, -0.4094, 0.3593, -0.1153]) tensor([0.0997, 0.2001, 0.4316, 0.2685]) -Greedy action tensor([-1.9292, -0.4310, 0.6599, -0.1717]) tensor([0.0407, 0.1819, 0.5416, 0.2358]) -Greedy action tensor([-1.8148, -0.1444, 0.5732, -0.0919]) tensor([0.0438, 0.2330, 0.4776, 0.2456]) -Greedy action tensor([-1.5477, -0.2608, 0.6246, 0.0373]) tensor([0.0547, 0.1981, 0.4802, 0.2669]) -Greedy action tensor([-1.6663, -0.5440, 0.5167, -0.0435]) tensor([0.0555, 0.1705, 0.4926, 0.2813]) -Greedy action tensor([-0.9491, -0.2393, 0.1775, 0.4937]) tensor([0.0966, 0.1965, 0.2981, 0.4089]) -Greedy action tensor([-1.9141, -0.4360, 0.6534, -0.1623]) tensor([0.0414, 0.1813, 0.5390, 0.2384]) -Greedy action tensor([-1.8777, -0.3902, 0.6274, -0.1570]) tensor([0.0430, 0.1903, 0.5264, 0.2403]) -Greedy action tensor([-1.4616, -0.0926, 0.6269, 0.2406]) tensor([0.0541, 0.2126, 0.4366, 0.2967]) -Greedy action tensor([-1.8937, -0.4444, 0.6435, -0.1580]) tensor([0.0424, 0.1807, 0.5363, 0.2406]) -Greedy action tensor([-1.9276, -0.4232, 0.6532, -0.1687]) tensor([0.0408, 0.1836, 0.5387, 0.2368]) -Greedy action tensor([-1.8074, -0.4704, 0.5906, -0.1078]) tensor([0.0470, 0.1789, 0.5169, 0.2571]) -Greedy action tensor([-0.1291, -0.1895, 0.2113, 0.2617]) tensor([0.2073, 0.1951, 0.2913, 0.3063]) -Greedy action tensor([-1.8766, -0.3728, 0.6204, -0.1589]) tensor([0.0431, 0.1938, 0.5232, 0.2400]) -Greedy action tensor([-1.8734, -0.4382, 0.6378, -0.1405]) tensor([0.0431, 0.1812, 0.5315, 0.2441]) -Greedy action tensor([-1.9224, -0.4111, 0.6554, -0.1682]) tensor([0.0409, 0.1852, 0.5379, 0.2361]) -Greedy action tensor([-0.7485, 0.9938, 0.1397, 0.2889]) tensor([0.0836, 0.4773, 0.2032, 0.2359]) -Greedy action tensor([-1.6388, -0.5400, 0.5269, -0.0149]) tensor([0.0562, 0.1686, 0.4901, 0.2851]) -Greedy action tensor([-1.8609, -0.1916, 0.5701, -0.1264]) tensor([0.0428, 0.2274, 0.4871, 0.2427]) -Greedy action tensor([-1.3635, -0.4846, 0.3441, 0.1175]) tensor([0.0751, 0.1808, 0.4140, 0.3301]) -Greedy action tensor([-1.4792, 0.4508, 0.4241, 0.1111]) tensor([0.0513, 0.3533, 0.3440, 0.2515]) -Greedy action tensor([-1.8287, -0.4746, 0.6078, -0.1382]) tensor([0.0460, 0.1783, 0.5262, 0.2495]) -Greedy action tensor([-1.9274, -0.4478, 0.6701, -0.1711]) tensor([0.0406, 0.1784, 0.5457, 0.2353]) -Greedy action tensor([-1.8040, -0.4324, 0.5936, -0.1093]) tensor([0.0468, 0.1843, 0.5143, 0.2546]) -Greedy action tensor([-1.7728, -0.4467, 0.5803, -0.1213]) tensor([0.0488, 0.1837, 0.5131, 0.2544]) -Greedy action tensor([-1.6588, -0.4530, 0.5260, -0.0901]) tensor([0.0555, 0.1852, 0.4931, 0.2663]) -Greedy action tensor([-1.9024, -0.4782, 0.7573, -0.0939]) tensor([0.0391, 0.1626, 0.5594, 0.2388]) -Greedy action tensor([-0.6968, 0.7617, 0.0938, -0.0367]) tensor([0.1059, 0.4555, 0.2336, 0.2050]) -Greedy action tensor([-1.9411, -0.4474, 0.6666, -0.1783]) tensor([0.0402, 0.1792, 0.5460, 0.2346]) -Greedy action tensor([-1.9035, -0.4449, 0.6449, -0.1649]) tensor([0.0421, 0.1809, 0.5378, 0.2393]) -Greedy action tensor([-1.9034, -0.4359, 0.6462, -0.1570]) tensor([0.0419, 0.1817, 0.5362, 0.2402]) -Greedy action tensor([-1.8970, -0.4192, 0.6383, -0.1598]) tensor([0.0422, 0.1851, 0.5328, 0.2399]) -Greedy action tensor([-1.9015, -0.4350, 0.6442, -0.1584]) tensor([0.0420, 0.1821, 0.5358, 0.2401]) -Greedy action tensor([-1.7090, -0.0945, 0.5550, -0.0397]) tensor([0.0477, 0.2398, 0.4591, 0.2533]) -Greedy action tensor([-1.8802, -0.4508, 0.6370, -0.1468]) tensor([0.0431, 0.1798, 0.5335, 0.2437]) -Greedy action tensor([-1.5456, -0.0561, 0.4432, -0.1092]) tensor([0.0590, 0.2617, 0.4312, 0.2482]) -Greedy action tensor([ 0.5218, -0.2524, 0.1293, 0.5343]) tensor([0.3176, 0.1464, 0.2145, 0.3216]) -Greedy action tensor([-1.5252, 0.3600, 0.3172, 0.0893]) tensor([0.0528, 0.3481, 0.3335, 0.2655]) -Greedy action tensor([-1.0609, -0.1636, 0.0846, -0.6284]) tensor([0.1229, 0.3014, 0.3863, 0.1894]) -Greedy action tensor([-1.9251, -0.4456, 0.6589, -0.1697]) tensor([0.0409, 0.1798, 0.5424, 0.2369]) -Greedy action tensor([-1.9166, -0.4412, 0.6559, -0.1660]) tensor([0.0413, 0.1805, 0.5406, 0.2377]) -Greedy action tensor([-1.9136, -0.4063, 0.6460, -0.1629]) tensor([0.0413, 0.1865, 0.5342, 0.2379]) -Greedy action tensor([-1.8941, -0.2840, 0.6125, -0.1450]) tensor([0.0416, 0.2083, 0.5106, 0.2394]) -Greedy action tensor([-1.9225, -0.3910, 0.6520, -0.1716]) tensor([0.0408, 0.1887, 0.5355, 0.2350]) -Greedy action tensor([-1.7390, -0.3021, 0.5365, -0.0885]) tensor([0.0496, 0.2088, 0.4830, 0.2585]) -Greedy action tensor([-1.8991, -0.4514, 0.6480, -0.1585]) tensor([0.0422, 0.1793, 0.5383, 0.2403]) -Greedy action tensor([-1.9129, -0.4425, 0.6539, -0.1635]) tensor([0.0414, 0.1803, 0.5398, 0.2384]) -Greedy action tensor([-0.9816, 0.9319, 0.1110, 0.3034]) tensor([0.0696, 0.4715, 0.2075, 0.2515]) -Greedy action tensor([-1.3150, 0.2030, 0.2453, 0.0673]) tensor([0.0699, 0.3189, 0.3327, 0.2785]) -Greedy action tensor([-1.8735, -0.3232, 0.6139, -0.1384]) tensor([0.0427, 0.2013, 0.5138, 0.2422]) -Greedy action tensor([-1.7156, -0.2155, 0.6275, -0.4337]) tensor([0.0513, 0.2299, 0.5340, 0.1848]) -Greedy action tensor([-0.4304, 0.4431, 0.4831, 0.3536]) tensor([0.1238, 0.2965, 0.3086, 0.2711]) -Greedy action tensor([-1.9074, -0.3994, 0.6472, -0.1495]) tensor([0.0413, 0.1868, 0.5320, 0.2398]) -Greedy action tensor([-1.3222, -0.5606, 0.4000, -0.0313]) tensor([0.0808, 0.1731, 0.4523, 0.2938]) -Greedy action tensor([-1.8348, -0.4427, 0.6116, -0.1423]) tensor([0.0454, 0.1829, 0.5248, 0.2469]) -Greedy action tensor([-1.8762, -0.4381, 0.6385, -0.1479]) tensor([0.0431, 0.1815, 0.5327, 0.2427]) -Greedy action tensor([0.2348, 0.7337, 0.0215, 0.2544]) tensor([0.2235, 0.3681, 0.1806, 0.2279]) -Greedy action tensor([-1.7326, -0.4820, 0.5621, -0.1060]) tensor([0.0513, 0.1791, 0.5088, 0.2608]) -Greedy action tensor([-1.0337, -0.2709, 0.3508, -0.1837]) tensor([0.1055, 0.2263, 0.4213, 0.2469]) -Greedy action tensor([-1.1004, -0.4804, 0.3364, -0.0527]) tensor([0.1008, 0.1874, 0.4242, 0.2875]) -Greedy action tensor([-1.3971, -0.4428, 0.3954, 0.1273]) tensor([0.0705, 0.1829, 0.4230, 0.3236]) -Greedy action tensor([-1.8713, -0.4342, 0.6323, -0.1424]) tensor([0.0433, 0.1824, 0.5300, 0.2442]) -Greedy action tensor([-1.7247, 0.0316, 0.5049, -0.0953]) tensor([0.0472, 0.2733, 0.4387, 0.2407]) -Greedy action tensor([-1.4318, -0.5029, 0.9936, 0.7958]) tensor([0.0415, 0.1050, 0.4689, 0.3847]) -Greedy action tensor([-1.9238, -0.4539, 0.6611, -0.1707]) tensor([0.0410, 0.1783, 0.5439, 0.2367]) -Greedy action tensor([-1.7614, -0.4434, 0.5816, -0.0758]) tensor([0.0487, 0.1818, 0.5068, 0.2626]) -Greedy action tensor([-1.4967, 0.0491, 0.4255, 0.0614]) tensor([0.0579, 0.2716, 0.3957, 0.2749]) -Greedy action tensor([-1.6523, -0.5320, 0.5312, -0.0929]) tensor([0.0565, 0.1732, 0.5016, 0.2687]) -Greedy action tensor([-1.8545, -0.2358, 0.5981, -0.1325]) tensor([0.0430, 0.2170, 0.4995, 0.2406]) -Greedy action tensor([-1.9446, -0.4572, 0.6770, -0.1776]) tensor([0.0399, 0.1768, 0.5495, 0.2338]) -Greedy action tensor([-1.8437, -0.3872, 0.6076, -0.1197]) tensor([0.0444, 0.1907, 0.5157, 0.2492]) -Greedy action tensor([-1.9075, -0.4079, 0.6453, -0.1590]) tensor([0.0415, 0.1861, 0.5336, 0.2387]) -Greedy action tensor([-1.9118, -0.4367, 0.6521, -0.1631]) tensor([0.0415, 0.1813, 0.5388, 0.2384]) -Greedy action tensor([-1.8141, -0.2354, 0.5722, -0.1252]) tensor([0.0452, 0.2190, 0.4912, 0.2446]) -Greedy action tensor([-0.3520, 1.0530, 0.0226, 0.2662]) tensor([0.1193, 0.4860, 0.1734, 0.2213]) -Greedy action tensor([-1.6011, -0.5385, 0.4833, 0.0643]) tensor([0.0581, 0.1680, 0.4669, 0.3070]) -Greedy action tensor([-1.9064, -0.4577, 0.6524, -0.1628]) tensor([0.0418, 0.1782, 0.5407, 0.2393]) -Greedy action tensor([ 1.8748, -1.0387, -0.1251, 0.6335]) tensor([0.6763, 0.0367, 0.0915, 0.1955]) -Greedy action tensor([ 1.3600, -0.2017, -0.7798, 0.2642]) tensor([0.6018, 0.1262, 0.0708, 0.2012]) -Greedy action tensor([ 1.6667, -0.4357, -0.4023, 0.5004]) tensor([0.6410, 0.0783, 0.0810, 0.1997]) -Greedy action tensor([ 1.3823, -0.4434, -0.7780, 0.5475]) tensor([0.5847, 0.0942, 0.0674, 0.2537]) -Greedy action tensor([ 1.3709, -0.0350, -1.0225, 0.0092]) tensor([0.6279, 0.1539, 0.0573, 0.1609]) -Greedy action tensor([ 1.6068, -0.2467, -0.8106, 0.1778]) tensor([0.6732, 0.1055, 0.0600, 0.1613]) -Greedy action tensor([ 1.5016, -0.1067, -1.7632, 0.1716]) tensor([0.6654, 0.1332, 0.0254, 0.1760]) -Greedy action tensor([ 1.3661, -0.4572, -0.3535, -0.0162]) tensor([0.6283, 0.1015, 0.1125, 0.1577]) -Greedy action tensor([ 1.2581, -0.0581, -0.9964, 0.1480]) tensor([0.5873, 0.1575, 0.0616, 0.1936]) -Greedy action tensor([ 1.7650, 0.3877, -0.2521, 0.2628]) tensor([0.6219, 0.1569, 0.0827, 0.1385]) -Greedy action tensor([ 1.0937, 0.0227, -0.9347, 0.2275]) tensor([0.5278, 0.1808, 0.0694, 0.2220]) -Greedy action tensor([ 1.9290, -1.2228, 0.2115, 0.6621]) tensor([0.6649, 0.0284, 0.1194, 0.1873]) -Greedy action tensor([ 2.0697, -1.1082, -0.7011, 0.3536]) tensor([0.7788, 0.0325, 0.0488, 0.1400]) -Greedy action tensor([ 1.5140, -0.3139, -0.2307, 0.0235]) tensor([0.6407, 0.1030, 0.1119, 0.1443]) -Greedy action tensor([ 0.8363, -0.6403, -0.4236, -0.2866]) tensor([0.5442, 0.1243, 0.1544, 0.1771]) -Greedy action tensor([ 1.1986, -0.3369, -0.6449, 0.1052]) tensor([0.5852, 0.1260, 0.0926, 0.1961]) -Greedy action tensor([ 1.5044, -0.2735, -0.6464, 0.1803]) tensor([0.6446, 0.1089, 0.0750, 0.1715]) -Greedy action tensor([ 1.2735, -0.4960, -0.4419, 0.1876]) tensor([0.5925, 0.1010, 0.1066, 0.2000]) -Greedy action tensor([ 1.8031, -0.3634, -0.4917, 0.1043]) tensor([0.7152, 0.0819, 0.0721, 0.1308]) -Greedy action tensor([ 1.5003, 0.2406, -0.5416, -0.0159]) tensor([0.6123, 0.1738, 0.0795, 0.1344]) -Greedy action tensor([ 1.4133, 0.1909, -0.7853, 0.6160]) tensor([0.5388, 0.1587, 0.0598, 0.2427]) -Greedy action tensor([ 1.3715, -0.1942, -0.2838, 0.0486]) tensor([0.6001, 0.1254, 0.1146, 0.1598]) -Greedy action tensor([ 1.3509, -0.1773, -0.4024, 0.2353]) tensor([0.5821, 0.1263, 0.1008, 0.1908]) -Greedy action tensor([ 1.0118, -0.3334, -0.7577, 0.1330]) tensor([0.5416, 0.1411, 0.0923, 0.2250]) -Greedy action tensor([ 1.4334, -0.1003, -0.8476, 0.0248]) tensor([0.6400, 0.1381, 0.0654, 0.1565]) -Greedy action tensor([ 1.2937, -1.1496, -0.4872, -0.4436]) tensor([0.6986, 0.0607, 0.1177, 0.1230]) -Greedy action tensor([ 1.6642, -0.8852, -0.2103, 0.1166]) tensor([0.6924, 0.0541, 0.1062, 0.1473]) -Greedy action tensor([ 1.8819, -1.1080, 0.0057, 0.3878]) tensor([0.7003, 0.0352, 0.1073, 0.1572]) -Greedy action tensor([ 2.2602, -0.9558, -0.2640, 0.5670]) tensor([0.7668, 0.0308, 0.0614, 0.1410]) -Greedy action tensor([ 2.0121, -0.4732, -0.4525, 0.1982]) tensor([0.7511, 0.0626, 0.0639, 0.1225]) -Greedy action tensor([ 1.9365, 0.1571, -0.5134, -0.3455]) tensor([0.7369, 0.1243, 0.0636, 0.0752]) -Greedy action tensor([ 1.4408, -0.7838, -0.1260, -0.0063]) tensor([0.6443, 0.0697, 0.1345, 0.1516]) -Greedy action tensor([ 1.2359, -0.5702, -0.3040, 0.5592]) tensor([0.5300, 0.0871, 0.1136, 0.2694]) -Greedy action tensor([ 1.7352, -0.8104, -0.4676, 0.3008]) tensor([0.7007, 0.0550, 0.0774, 0.1669]) -Greedy action tensor([ 1.4678, -0.7430, -0.3928, 0.6722]) tensor([0.5826, 0.0639, 0.0906, 0.2629]) -Greedy action tensor([ 1.5608, -0.8485, -0.5613, 0.3118]) tensor([0.6683, 0.0601, 0.0800, 0.1916]) -Greedy action tensor([ 2.2449, -0.4553, 0.0293, 0.4892]) tensor([0.7412, 0.0498, 0.0809, 0.1281]) -Greedy action tensor([ 0.9875, -0.1774, -0.5702, -0.1066]) tensor([0.5384, 0.1680, 0.1134, 0.1803]) -Greedy action tensor([ 1.3689, -0.5455, -0.1830, 0.0196]) tensor([0.6178, 0.0911, 0.1309, 0.1603]) -Greedy action tensor([ 1.3429, -0.0197, -0.1146, 0.0092]) tensor([0.5707, 0.1461, 0.1329, 0.1504]) -Greedy action tensor([ 1.3680, -0.8303, -0.0262, 0.9694]) tensor([0.4926, 0.0547, 0.1222, 0.3306]) -Greedy action tensor([ 1.5688, -1.0072, -0.2850, 0.5616]) tensor([0.6258, 0.0476, 0.0980, 0.2286]) -Greedy action tensor([ 1.7156, -1.0074, -0.2192, 0.1328]) tensor([0.7065, 0.0464, 0.1020, 0.1451]) -Greedy action tensor([ 1.2198, -0.1881, -0.6798, 0.2952]) tensor([0.5583, 0.1366, 0.0835, 0.2215]) -Greedy action tensor([ 1.5415, -0.2290, -0.7142, 0.3662]) tensor([0.6314, 0.1075, 0.0662, 0.1949]) -Greedy action tensor([ 1.6347, -0.1223, -1.2732, 0.4829]) tensor([0.6480, 0.1118, 0.0354, 0.2048]) -Greedy action tensor([ 1.4010, -0.4910, 0.0279, 0.7207]) tensor([0.5234, 0.0789, 0.1326, 0.2651]) -Greedy action tensor([ 1.3415, -0.8162, -0.4162, 0.4298]) tensor([0.5918, 0.0684, 0.1020, 0.2378]) -Greedy action tensor([ 1.4061, -0.4562, -0.4177, 0.3419]) tensor([0.6018, 0.0935, 0.0971, 0.2076]) -Greedy action tensor([ 1.3261, -0.5431, -0.3420, 0.4908]) tensor([0.5629, 0.0868, 0.1062, 0.2441]) -Greedy action tensor([ 1.5137, -0.6928, -0.1136, 0.2065]) tensor([0.6341, 0.0698, 0.1246, 0.1716]) -Greedy action tensor([ 1.6880, -0.1999, -0.6698, 0.0066]) tensor([0.6983, 0.1057, 0.0661, 0.1300]) -Greedy action tensor([ 1.1247, -0.4513, -0.8882, 0.2675]) tensor([0.5667, 0.1172, 0.0757, 0.2405]) -Greedy action tensor([ 1.4733, -0.4629, -0.3471, 0.0949]) tensor([0.6418, 0.0926, 0.1039, 0.1617]) -Greedy action tensor([ 1.8365, 0.0079, -0.0902, 0.4175]) tensor([0.6459, 0.1038, 0.0941, 0.1563]) -Greedy action tensor([ 1.5773, -0.6934, 0.1026, 0.4248]) tensor([0.6068, 0.0626, 0.1389, 0.1917]) -Greedy action tensor([ 1.7098, -0.9431, -0.3077, -0.0952]) tensor([0.7310, 0.0515, 0.0972, 0.1202]) -Greedy action tensor([ 1.5583, -0.4957, -0.4283, 0.4174]) tensor([0.6310, 0.0809, 0.0865, 0.2016]) -Greedy action tensor([ 1.6844, -1.0562, -0.2309, 0.0667]) tensor([0.7091, 0.0458, 0.1044, 0.1407]) -Greedy action tensor([ 1.1262, -0.1426, -0.6835, 0.0097]) tensor([0.5642, 0.1587, 0.0924, 0.1847]) -Greedy action tensor([ 1.5500, -0.1469, -0.7460, 0.2364]) tensor([0.6440, 0.1180, 0.0648, 0.1731]) -Greedy action tensor([ 1.0235, -0.2472, -0.6751, 0.1139]) tensor([0.5358, 0.1504, 0.0980, 0.2158]) -Greedy action tensor([ 1.8025, -0.7501, -0.1242, 0.4429]) tensor([0.6756, 0.0526, 0.0984, 0.1735]) -Greedy action tensor([ 1.0807, -0.3610, -0.2858, -0.0432]) tensor([0.5505, 0.1302, 0.1404, 0.1789]) -Greedy action tensor([ 1.5478, -0.7758, -0.2918, 0.6492]) tensor([0.6010, 0.0588, 0.0955, 0.2447]) -Greedy action tensor([ 1.5570, -0.5982, -0.9798, 0.0898]) tensor([0.7015, 0.0813, 0.0555, 0.1617]) -Greedy action tensor([ 1.4022, -0.2696, -0.6223, 0.4461]) tensor([0.5867, 0.1103, 0.0775, 0.2255]) -Greedy action tensor([ 1.0839, -0.0361, -0.2103, -0.4154]) tensor([0.5484, 0.1789, 0.1503, 0.1224]) -Greedy action tensor([ 1.3473, -0.6302, -0.0862, 0.4612]) tensor([0.5589, 0.0774, 0.1333, 0.2304]) -Greedy action tensor([ 1.4016, -0.7475, -0.0600, 0.4878]) tensor([0.5716, 0.0666, 0.1325, 0.2292]) -Greedy action tensor([ 1.3163, -0.3721, -0.3564, 0.0376]) tensor([0.6057, 0.1119, 0.1137, 0.1686]) -Greedy action tensor([ 1.2585, -0.3177, -1.0777, 0.2133]) tensor([0.6042, 0.1249, 0.0584, 0.2124]) -Greedy action tensor([ 0.5431, -0.1817, 0.0924, -0.0439]) tensor([0.3735, 0.1809, 0.2380, 0.2076]) -Greedy action tensor([ 2.1361, -0.6138, -0.0180, 0.2232]) tensor([0.7532, 0.0482, 0.0874, 0.1112]) -Greedy action tensor([ 1.9163, -0.1461, -0.2771, 0.4073]) tensor([0.6850, 0.0871, 0.0764, 0.1515]) -Greedy action tensor([ 1.5261, -0.7918, -0.2343, 0.5866]) tensor([0.6019, 0.0593, 0.1035, 0.2353]) -Greedy action tensor([ 0.9664, -0.3034, -0.1714, 0.0547]) tensor([0.4992, 0.1402, 0.1600, 0.2006]) -Greedy action tensor([ 1.0451, 0.0141, 0.0079, -0.1563]) tensor([0.4971, 0.1773, 0.1762, 0.1495]) -Greedy action tensor([ 1.9998, 0.4155, -0.2632, 0.5630]) tensor([0.6465, 0.1326, 0.0673, 0.1537]) -Greedy action tensor([ 1.5582, -0.1682, -1.0456, 0.0893]) tensor([0.6747, 0.1201, 0.0499, 0.1553]) -Greedy action tensor([ 1.8003, -0.5980, -0.9731, 0.0289]) tensor([0.7556, 0.0687, 0.0472, 0.1285]) -Greedy action tensor([ 0.7811, -0.0875, -0.1604, -0.4437]) tensor([0.4754, 0.1995, 0.1854, 0.1397]) -Greedy action tensor([ 0.7517, -0.4298, -0.1138, -0.4076]) tensor([0.4899, 0.1503, 0.2062, 0.1537]) -Greedy action tensor([ 0.5894, 0.0248, 0.0797, -0.1890]) tensor([0.3804, 0.2163, 0.2285, 0.1747]) -Greedy action tensor([ 0.8650, -0.6338, -0.0072, -0.5486]) tensor([0.5306, 0.1185, 0.2218, 0.1291]) -Greedy action tensor([ 0.8565, -0.3956, -0.0843, -0.1922]) tensor([0.4934, 0.1411, 0.1926, 0.1729]) -Greedy action tensor([ 0.8824, 0.0061, 0.0055, -0.0444]) tensor([0.4488, 0.1868, 0.1867, 0.1776]) -Greedy action tensor([ 0.7564, -0.0111, -0.0704, -0.0223]) tensor([0.4236, 0.1966, 0.1853, 0.1944]) -Greedy action tensor([ 1.0817, -0.4859, -0.2434, -0.5989]) tensor([0.6022, 0.1256, 0.1600, 0.1122]) -Greedy action tensor([ 0.8822, -0.5879, 0.1956, -0.3219]) tensor([0.4918, 0.1131, 0.2475, 0.1475]) -Greedy action tensor([ 1.0118, -0.7560, 0.1809, -0.3242]) tensor([0.5350, 0.0913, 0.2331, 0.1406]) -Greedy action tensor([ 0.6966, -0.3202, 0.0361, -0.2649]) tensor([0.4424, 0.1600, 0.2285, 0.1691]) -Greedy action tensor([ 0.7351, -0.4672, -0.0341, -0.2189]) tensor([0.4653, 0.1398, 0.2156, 0.1792]) -Greedy action tensor([ 0.5295, -0.2746, -0.0983, -0.2273]) tensor([0.4081, 0.1826, 0.2178, 0.1915]) -Greedy action tensor([ 0.8934, -0.7978, 0.1764, -0.4085]) tensor([0.5143, 0.0948, 0.2511, 0.1399]) -Greedy action tensor([ 0.8402, -0.5621, 0.3183, -0.8338]) tensor([0.4933, 0.1214, 0.2928, 0.0925]) -Greedy action tensor([ 1.0126, -0.2708, -0.0164, -0.2398]) tensor([0.5208, 0.1443, 0.1861, 0.1488]) -Greedy action tensor([ 0.5610, -0.3846, -0.1003, -0.2513]) tensor([0.4258, 0.1654, 0.2198, 0.1890]) -Greedy action tensor([ 0.8499, -0.4898, 0.0248, -0.5023]) tensor([0.5105, 0.1337, 0.2237, 0.1321]) -Greedy action tensor([ 1.4168, -0.9921, 0.1665, -0.8215]) tensor([0.6743, 0.0606, 0.1931, 0.0719]) -Greedy action tensor([ 0.7482, -0.5760, -0.1073, -0.2546]) tensor([0.4859, 0.1293, 0.2065, 0.1783]) -Greedy action tensor([ 1.1851, -0.6842, -0.0774, -0.6375]) tensor([0.6255, 0.0965, 0.1770, 0.1011]) -Greedy action tensor([ 1.2006, -0.6829, -0.0580, -0.7472]) tensor([0.6334, 0.0963, 0.1799, 0.0903]) -Greedy action tensor([ 1.0618, -0.6811, 0.1313, -0.7802]) tensor([0.5787, 0.1013, 0.2282, 0.0917]) -Greedy action tensor([ 1.0454, -0.3907, -0.0663, -0.4208]) tensor([0.5563, 0.1323, 0.1830, 0.1284]) -Greedy action tensor([ 0.4881, -0.3752, -0.2264, -0.3467]) tensor([0.4264, 0.1798, 0.2087, 0.1850]) -Greedy action tensor([ 0.7703, -0.4205, -0.1526, -0.2201]) tensor([0.4824, 0.1467, 0.1917, 0.1792]) -Greedy action tensor([ 0.5332, 0.0371, 0.0539, -0.0770]) tensor([0.3608, 0.2197, 0.2234, 0.1960]) -Greedy action tensor([ 0.8737, -0.7339, -0.0996, -0.3816]) tensor([0.5367, 0.1075, 0.2028, 0.1530]) -Greedy action tensor([ 0.6394, -0.1461, 0.0367, -0.1907]) tensor([0.4100, 0.1869, 0.2244, 0.1787]) -Greedy action tensor([ 0.4881, -0.5658, -0.2683, -0.1282]) tensor([0.4241, 0.1478, 0.1991, 0.2290]) -Greedy action tensor([ 0.5230, 0.2835, -0.1513, 0.1582]) tensor([0.3343, 0.2631, 0.1704, 0.2322]) -Greedy action tensor([ 0.5032, -0.4212, -0.1906, 0.0888]) tensor([0.3911, 0.1552, 0.1954, 0.2584]) -Greedy action tensor([ 0.9200, -0.7179, 0.0251, -0.5251]) tensor([0.5439, 0.1057, 0.2222, 0.1282]) -Greedy action tensor([ 0.7095, -0.4423, -0.0668, -0.3909]) tensor([0.4742, 0.1499, 0.2182, 0.1578]) -Greedy action tensor([ 1.0764, -0.6957, -0.0889, -0.3870]) tensor([0.5837, 0.0992, 0.1820, 0.1351]) -Greedy action tensor([ 0.7749, -0.4325, -0.1281, -0.2801]) tensor([0.4872, 0.1457, 0.1975, 0.1696]) -Greedy action tensor([ 0.6980, -0.5935, 0.1101, -0.7691]) tensor([0.4852, 0.1334, 0.2695, 0.1119]) -Greedy action tensor([ 0.8607, -0.6018, -0.0879, -0.4521]) tensor([0.5297, 0.1227, 0.2051, 0.1425]) -Greedy action tensor([ 0.5977, -0.3093, 0.0095, -0.1386]) tensor([0.4102, 0.1656, 0.2278, 0.1964]) -Greedy action tensor([ 1.3017, -0.7218, 0.1008, -0.7186]) tensor([0.6387, 0.0844, 0.1922, 0.0847]) -Greedy action tensor([ 0.8358, -0.6515, 0.0498, -0.5815]) tensor([0.5197, 0.1175, 0.2368, 0.1260]) -Greedy action tensor([ 0.7737, -0.3587, -0.0455, -0.1654]) tensor([0.4642, 0.1496, 0.2046, 0.1815]) -Greedy action tensor([ 0.9042, -0.7670, 0.0836, -0.3942]) tensor([0.5260, 0.0989, 0.2315, 0.1436]) -Greedy action tensor([ 0.4790, -0.0686, -0.0975, -0.0247]) tensor([0.3644, 0.2107, 0.2047, 0.2202]) -Greedy action tensor([ 0.7940, -0.3920, -0.1138, -0.6115]) tensor([0.5117, 0.1563, 0.2065, 0.1255]) -Greedy action tensor([ 0.8025, -0.2969, -0.1230, -0.0210]) tensor([0.4612, 0.1536, 0.1828, 0.2024]) -Greedy action tensor([ 0.7799, -0.5313, -0.2031, -0.2818]) tensor([0.5026, 0.1354, 0.1881, 0.1738]) -Greedy action tensor([ 0.7677, -0.3423, -0.1775, -0.2794]) tensor([0.4833, 0.1593, 0.1878, 0.1696]) -Greedy action tensor([ 0.7044, -0.6605, 0.0421, -0.4985]) tensor([0.4828, 0.1233, 0.2490, 0.1450]) -Greedy action tensor([ 0.9357, -0.5308, 0.0116, -0.4445]) tensor([0.5322, 0.1228, 0.2112, 0.1338]) -Greedy action tensor([ 0.9166, -0.6519, 0.1287, -0.3978]) tensor([0.5177, 0.1079, 0.2354, 0.1391]) -Greedy action tensor([ 0.5592, -0.2960, -0.0512, -0.0638]) tensor([0.3992, 0.1698, 0.2169, 0.2141]) -Greedy action tensor([ 0.7967, -0.2971, -0.0825, -0.3743]) tensor([0.4854, 0.1626, 0.2015, 0.1505]) -Greedy action tensor([ 0.7634, -0.0695, 0.0638, -0.2819]) tensor([0.4380, 0.1904, 0.2176, 0.1540]) -Greedy action tensor([ 0.4832, 0.3633, 0.0605, -0.1550]) tensor([0.3257, 0.2889, 0.2134, 0.1720]) -Greedy action tensor([ 1.1251, -0.7828, -0.0289, -0.4524]) tensor([0.5987, 0.0888, 0.1888, 0.1236]) -Greedy action tensor([ 0.9751, -0.6119, 0.0110, -0.4287]) tensor([0.5460, 0.1117, 0.2082, 0.1341]) -Greedy action tensor([ 0.6626, -0.3004, -0.0322, -0.1799]) tensor([0.4326, 0.1651, 0.2160, 0.1863]) -Greedy action tensor([ 0.7019, -0.1084, -0.1547, -0.0973]) tensor([0.4312, 0.1918, 0.1831, 0.1939]) -Greedy action tensor([ 0.8445, -0.3897, -0.0855, -0.2359]) tensor([0.4938, 0.1437, 0.1948, 0.1676]) -Greedy action tensor([ 0.6865, 0.0826, 0.0305, -0.0054]) tensor([0.3897, 0.2130, 0.2022, 0.1951]) -Greedy action tensor([ 1.2780, -1.0011, -0.0243, -0.5241]) tensor([0.6497, 0.0665, 0.1767, 0.1072]) -Greedy action tensor([ 0.8024, -0.6153, 0.1930, -0.3064]) tensor([0.4726, 0.1145, 0.2569, 0.1559]) -Greedy action tensor([ 0.9966, -0.2811, 0.0475, -0.4662]) tensor([0.5270, 0.1469, 0.2040, 0.1221]) -Greedy action tensor([ 0.2584, -0.1970, -0.1527, -0.0817]) tensor([0.3324, 0.2108, 0.2203, 0.2365]) -Greedy action tensor([ 0.0715, 0.3264, -0.2722, -0.3808]) tensor([0.2751, 0.3549, 0.1951, 0.1750]) -Greedy action tensor([ 1.0522, -0.7370, 0.0643, -0.6197]) tensor([0.5789, 0.0967, 0.2156, 0.1088]) -Greedy action tensor([ 0.6629, -0.1760, -0.0828, -0.2744]) tensor([0.4351, 0.1881, 0.2064, 0.1704]) -Greedy action tensor([ 0.6658, -0.3969, -0.0769, -0.2289]) tensor([0.4484, 0.1549, 0.2134, 0.1833]) -Greedy action tensor([ 0.3713, -0.1100, 0.0440, -0.3057]) tensor([0.3513, 0.2171, 0.2532, 0.1785]) -Greedy action tensor([ 0.8849, -0.7933, 0.1244, -0.7371]) tensor([0.5401, 0.1008, 0.2524, 0.1067]) -Greedy action tensor([ 0.2523, 0.1445, 0.0667, -0.2291]) tensor([0.2988, 0.2683, 0.2482, 0.1847]) -Greedy action tensor([ 0.8037, -0.6817, 0.0023, -0.4141]) tensor([0.5074, 0.1149, 0.2277, 0.1501]) -Greedy action tensor([ 1.0226, -0.6132, -0.0914, -0.6197]) tensor([0.5826, 0.1135, 0.1912, 0.1127]) -Greedy action tensor([ 0.8468, -0.3991, 0.0037, -0.4913]) tensor([0.5049, 0.1453, 0.2173, 0.1325]) -Greedy action tensor([ 0.7047, -0.3308, -0.1246, -0.1570]) tensor([0.4517, 0.1604, 0.1971, 0.1908]) -Greedy action tensor([ 1.0408, -0.6277, 0.1365, -0.5351]) tensor([0.5555, 0.1047, 0.2249, 0.1149]) -Greedy action tensor([ 0.9330, -0.4904, 0.1662, -0.1533]) tensor([0.4895, 0.1179, 0.2274, 0.1652]) -Greedy action tensor([ 0.6903, 0.1141, -0.0452, 0.1721]) tensor([0.3792, 0.2131, 0.1818, 0.2259]) -Greedy action tensor([ 1.2086, -0.7805, -0.0163, -0.2897]) tensor([0.6046, 0.0827, 0.1776, 0.1351]) -Greedy action tensor([ 0.6996, -0.1870, -0.0026, -0.0334]) tensor([0.4188, 0.1726, 0.2075, 0.2012]) -Greedy action tensor([ 0.5523, -1.5291, -0.0107, -0.1645]) tensor([0.4582, 0.0572, 0.2609, 0.2237]) -Greedy action tensor([-0.2062, -1.1128, -0.0598, -0.3321]) tensor([0.2904, 0.1173, 0.3362, 0.2561]) -Greedy action tensor([-0.6520, -0.0461, 0.0304, -0.8347]) tensor([0.1772, 0.3247, 0.3505, 0.1476]) -Greedy action tensor([1.1405, 0.1390, 0.0836, 0.3946]) tensor([0.4568, 0.1678, 0.1587, 0.2167]) -Greedy action tensor([ 1.1280, -1.3804, 1.2517, -1.0195]) tensor([0.4292, 0.0349, 0.4857, 0.0501]) -Greedy action tensor([-0.0385, -1.8826, -0.1082, 0.1675]) tensor([0.3012, 0.0476, 0.2810, 0.3702]) -Greedy action tensor([ 0.3646, 0.4466, -0.0990, -0.4070]) tensor([0.3148, 0.3417, 0.1980, 0.1455]) -Greedy action tensor([-0.9664, -0.1673, -0.4516, -0.6342]) tensor([0.1590, 0.3535, 0.2660, 0.2216]) -Greedy action tensor([-0.5587, -0.3937, -0.0907, 0.5280]) tensor([0.1484, 0.1750, 0.2369, 0.4398]) -Greedy action tensor([-0.0469, -0.7662, -0.3673, -0.6866]) tensor([0.3649, 0.1778, 0.2649, 0.1925]) -Greedy action tensor([ 1.2264, -0.2410, -0.7105, -0.2470]) tensor([0.6235, 0.1437, 0.0899, 0.1429]) -Greedy action tensor([ 0.6843, -1.3363, -0.0363, -0.1422]) tensor([0.4862, 0.0645, 0.2365, 0.2128]) -Greedy action tensor([ 0.7161, 0.0600, -0.2924, 0.2084]) tensor([0.4023, 0.2088, 0.1468, 0.2421]) -Greedy action tensor([-0.9905, -1.1245, -0.2440, -0.6902]) tensor([0.1875, 0.1639, 0.3955, 0.2531]) -Greedy action tensor([-0.0357, -1.1598, -0.1457, -0.1009]) tensor([0.3167, 0.1029, 0.2837, 0.2967]) -Greedy action tensor([-0.4871, -0.4923, -1.2533, 0.5623]) tensor([0.1881, 0.1872, 0.0874, 0.5373]) -Greedy action tensor([ 0.1285, -0.5884, -0.1345, -0.3371]) tensor([0.3467, 0.1693, 0.2665, 0.2176]) -Greedy action tensor([-0.2696, 0.1831, 0.0605, -0.9150]) tensor([0.2228, 0.3504, 0.3100, 0.1168]) -Greedy action tensor([-0.1927, -0.9923, 0.5169, -0.2074]) tensor([0.2238, 0.1006, 0.4551, 0.2205]) -Greedy action tensor([-0.2580, -0.3466, 0.0928, -1.3776]) tensor([0.2731, 0.2499, 0.3878, 0.0891]) -Greedy action tensor([-1.3949, -0.5893, -0.0175, 0.3337]) tensor([0.0779, 0.1744, 0.3089, 0.4388]) -Greedy action tensor([-0.3402, -0.4123, -0.1174, -0.5096]) tensor([0.2485, 0.2312, 0.3105, 0.2098]) -Greedy action tensor([-0.5508, -0.4150, 0.6867, -1.1801]) tensor([0.1633, 0.1870, 0.5627, 0.0870]) -Greedy action tensor([ 0.9327, -0.1109, 0.2411, 0.1549]) tensor([0.4325, 0.1523, 0.2166, 0.1987]) -Greedy action tensor([-0.3369, 0.2596, -0.2891, -0.6488]) tensor([0.2176, 0.3950, 0.2282, 0.1593]) -Greedy action tensor([ 0.2956, -0.6991, 0.3648, -0.0440]) tensor([0.3171, 0.1173, 0.3398, 0.2258]) -Greedy action tensor([-0.2310, -1.1732, -0.5343, 0.2982]) tensor([0.2614, 0.1019, 0.1930, 0.4437]) -Greedy action tensor([-0.0920, -0.0315, 0.0669, 0.6801]) tensor([0.1852, 0.1968, 0.2171, 0.4009]) -Greedy action tensor([-0.1208, -1.4047, 0.3107, 0.4846]) tensor([0.2151, 0.0596, 0.3312, 0.3941]) -Greedy action tensor([-0.6077, -1.0772, -0.0440, -1.0652]) tensor([0.2490, 0.1557, 0.4376, 0.1576]) -Greedy action tensor([-0.1675, -0.0074, -0.1710, -0.3098]) tensor([0.2477, 0.2907, 0.2468, 0.2148]) -Greedy action tensor([ 0.5359, -0.5218, -0.0919, 0.0808]) tensor([0.3976, 0.1380, 0.2122, 0.2522]) -Greedy action tensor([ 0.5460, -1.3694, -0.7534, -0.4602]) tensor([0.5601, 0.0825, 0.1527, 0.2048]) -Greedy action tensor([ 0.8333, -0.7362, -0.0798, 0.7838]) tensor([0.3904, 0.0813, 0.1567, 0.3716]) -Greedy action tensor([ 1.0668, 0.1365, -0.2398, -0.2366]) tensor([0.5163, 0.2036, 0.1398, 0.1402]) -Greedy action tensor([ 0.2750, -1.5746, 1.2262, 0.0954]) tensor([0.2183, 0.0343, 0.5650, 0.1824]) -Greedy action tensor([ 0.5770, -0.8137, 0.8970, -1.0174]) tensor([0.3535, 0.0880, 0.4868, 0.0718]) -Greedy action tensor([ 1.1332, -1.1303, 0.0284, -0.5954]) tensor([0.6200, 0.0645, 0.2054, 0.1101]) -Greedy action tensor([ 1.3170, -0.5565, 0.1321, 0.5196]) tensor([0.5236, 0.0804, 0.1601, 0.2359]) -Greedy action tensor([-0.7284, 0.0180, -1.3401, -0.8430]) tensor([0.2201, 0.4643, 0.1194, 0.1963]) -Greedy action tensor([-0.5435, -0.9967, -0.2553, 0.1844]) tensor([0.1984, 0.1261, 0.2647, 0.4108]) -Greedy action tensor([ 0.2910, -1.2018, -0.1151, 0.3800]) tensor([0.3351, 0.0753, 0.2233, 0.3663]) -Greedy action tensor([-0.1682, -0.3531, 0.9266, -0.6083]) tensor([0.1830, 0.1521, 0.5470, 0.1179]) -Greedy action tensor([ 0.9263, -0.7484, 0.1241, 0.7952]) tensor([0.3979, 0.0746, 0.1784, 0.3491]) -Greedy action tensor([ 0.0783, -1.0848, -0.4721, -0.1622]) tensor([0.3738, 0.1168, 0.2156, 0.2939]) -Greedy action tensor([ 0.5818, -0.1580, 0.1490, -0.2538]) tensor([0.3907, 0.1864, 0.2535, 0.1694]) -Greedy action tensor([ 0.2673, -0.1953, -0.1045, -0.4099]) tensor([0.3537, 0.2227, 0.2439, 0.1797]) -Greedy action tensor([-1.0731, -0.5957, -1.9584, 0.1377]) tensor([0.1567, 0.2526, 0.0647, 0.5260]) -Greedy action tensor([-0.2675, 0.3309, -1.1409, 0.7902]) tensor([0.1635, 0.2974, 0.0683, 0.4708]) -Greedy action tensor([ 0.0041, 0.3410, 0.9310, -0.3823]) tensor([0.1784, 0.2498, 0.4506, 0.1212]) -Greedy action tensor([-0.2770, -0.3731, -0.2930, -0.5941]) tensor([0.2762, 0.2509, 0.2718, 0.2011]) -Greedy action tensor([ 5.9909e-01, -1.1321e+00, 1.0283e-03, -5.6549e-01]) tensor([0.4904, 0.0868, 0.2697, 0.1530]) -Greedy action tensor([-0.7097, -0.7424, -0.4103, -0.3988]) tensor([0.2136, 0.2067, 0.2882, 0.2915]) -Greedy action tensor([ 0.9442, -1.3173, 0.1082, -0.3269]) tensor([0.5500, 0.0573, 0.2384, 0.1543]) -Greedy action tensor([-0.4202, 0.7424, -0.6048, -0.7928]) tensor([0.1749, 0.5593, 0.1454, 0.1205]) -Greedy action tensor([0.5974, 0.2063, 0.9934, 0.2444]) tensor([0.2588, 0.1750, 0.3845, 0.1818]) -Greedy action tensor([-0.4916, -1.2250, 0.2841, -0.4584]) tensor([0.2134, 0.1025, 0.4635, 0.2206]) -Greedy action tensor([ 0.0346, -0.3966, 0.0949, -1.3623]) tensor([0.3379, 0.2196, 0.3589, 0.0836]) -Greedy action tensor([-0.0827, 0.5807, -0.3513, -0.3710]) tensor([0.2245, 0.4357, 0.1716, 0.1682]) -Greedy action tensor([ 0.2944, -1.0108, 0.0240, 0.7050]) tensor([0.2823, 0.0765, 0.2154, 0.4257]) -Greedy action tensor([ 0.3585, 0.3513, -0.4955, 0.1651]) tensor([0.3084, 0.3062, 0.1313, 0.2542]) -Greedy action tensor([ 0.1458, -0.5749, 0.0740, 0.6780]) tensor([0.2427, 0.1181, 0.2259, 0.4133]) -Greedy action tensor([-0.2872, -1.4263, -0.0841, 0.0103]) tensor([0.2570, 0.0823, 0.3148, 0.3460]) -Greedy action tensor([-0.3988, 0.3933, -0.1763, -0.8404]) tensor([0.1961, 0.4329, 0.2449, 0.1261]) -Greedy action tensor([ 0.4039, -0.4070, -0.1181, 0.0671]) tensor([0.3634, 0.1615, 0.2156, 0.2595]) -Greedy action tensor([ 0.2099, -0.5886, 0.0652, -0.1263]) tensor([0.3301, 0.1485, 0.2856, 0.2358]) -Greedy action tensor([-0.7967, -0.9128, -0.6419, -0.8395]) tensor([0.2490, 0.2217, 0.2907, 0.2386]) -Greedy action tensor([ 0.3803, -1.1481, 0.4629, -0.0350]) tensor([0.3375, 0.0732, 0.3665, 0.2228]) -Greedy action tensor([ 0.0168, -0.6285, -1.1322, -0.7454]) tensor([0.4333, 0.2273, 0.1373, 0.2022]) -Greedy action tensor([ 0.8772, -0.6264, -0.7126, -0.6496]) tensor([0.6084, 0.1353, 0.1241, 0.1322]) -Greedy action tensor([-0.0922, 0.0365, 0.1020, -1.2903]) tensor([0.2737, 0.3113, 0.3324, 0.0826]) -Greedy action tensor([ 0.0022, -0.2638, -0.4089, -1.3950]) tensor([0.3736, 0.2863, 0.2477, 0.0924]) -Greedy action tensor([ 0.0646, -1.1207, 1.0630, -0.9397]) tensor([0.2280, 0.0697, 0.6188, 0.0835]) -Greedy action tensor([-0.1723, -1.1410, -0.8558, 0.2945]) tensor([0.2874, 0.1091, 0.1451, 0.4584]) -Greedy action tensor([-0.7724, 0.2960, -1.0780, -0.3279]) tensor([0.1611, 0.4689, 0.1187, 0.2513]) -Greedy action tensor([ 0.3464, -0.5192, -0.0210, -0.4730]) tensor([0.3915, 0.1648, 0.2712, 0.1725]) -Greedy action tensor([ 1.3299, -0.2381, -0.2627, 0.5687]) tensor([0.5322, 0.1109, 0.1083, 0.2486]) -Greedy action tensor([-0.9987, -0.9953, -0.0950, -1.0411]) tensor([0.1841, 0.1848, 0.4546, 0.1765]) -Greedy action tensor([-0.1526, 0.2762, -1.1651, -0.5115]) tensor([0.2780, 0.4268, 0.1010, 0.1942]) -Greedy action tensor([ 0.0116, -0.9009, -0.2656, -0.4617]) tensor([0.3594, 0.1443, 0.2724, 0.2239]) -Greedy action tensor([ 1.1870, -0.3677, 1.1386, 0.8301]) tensor([0.3492, 0.0738, 0.3327, 0.2444]) -Greedy action tensor([-1.9195, -0.4574, 0.6694, -0.1558]) tensor([0.0409, 0.1764, 0.5443, 0.2385]) -Greedy action tensor([-1.9093, -0.4407, 0.6482, -0.1647]) tensor([0.0417, 0.1812, 0.5383, 0.2388]) -Greedy action tensor([-1.8924, -0.3588, 0.6359, -0.1567]) tensor([0.0419, 0.1944, 0.5257, 0.2380]) -Greedy action tensor([-1.9181, -0.4440, 0.6575, -0.1648]) tensor([0.0412, 0.1799, 0.5411, 0.2378]) -Greedy action tensor([-1.6740, -0.4176, 0.5510, -0.0803]) tensor([0.0535, 0.1880, 0.4952, 0.2634]) -Greedy action tensor([-1.7692, -0.4515, 0.6441, -0.0084]) tensor([0.0460, 0.1719, 0.5142, 0.2678]) -Greedy action tensor([-0.6806, 0.1581, 0.1074, 0.0226]) tensor([0.1328, 0.3071, 0.2919, 0.2682]) -Greedy action tensor([-1.4063, 0.0183, 0.3856, -0.0869]) tensor([0.0671, 0.2790, 0.4028, 0.2511]) -Greedy action tensor([-1.7690, -0.1963, 0.5596, -0.1471]) tensor([0.0473, 0.2279, 0.4854, 0.2394]) -Greedy action tensor([-1.8814, -0.4605, 0.6409, -0.1509]) tensor([0.0430, 0.1782, 0.5360, 0.2428]) -Greedy action tensor([-1.8544, -0.2576, 0.5910, -0.1530]) tensor([0.0436, 0.2151, 0.5025, 0.2388]) -Greedy action tensor([-1.9188, -0.4299, 0.6546, -0.1666]) tensor([0.0411, 0.1823, 0.5393, 0.2372]) -Greedy action tensor([-1.1148, 0.6243, -0.1442, -0.1390]) tensor([0.0834, 0.4749, 0.2202, 0.2214]) -Greedy action tensor([-1.5165, -0.2343, 0.5458, 0.0706]) tensor([0.0576, 0.2076, 0.4531, 0.2817]) -Greedy action tensor([-1.9030, -0.4550, 0.6518, -0.1603]) tensor([0.0420, 0.1785, 0.5399, 0.2397]) -Greedy action tensor([-1.8184, -0.3979, 0.5949, -0.1505]) tensor([0.0463, 0.1915, 0.5169, 0.2453]) -Greedy action tensor([-1.9167, -0.4409, 0.6550, -0.1662]) tensor([0.0413, 0.1806, 0.5404, 0.2377]) -Greedy action tensor([-1.8934, -0.3319, 0.6318, -0.1435]) tensor([0.0416, 0.1985, 0.5203, 0.2396]) -Greedy action tensor([-1.8359, -0.4295, 0.6059, -0.1221]) tensor([0.0452, 0.1845, 0.5195, 0.2508]) -Greedy action tensor([-1.7968, -0.0885, 0.5709, -0.0822]) tensor([0.0440, 0.2427, 0.4692, 0.2442]) -Greedy action tensor([-1.1717, -0.3272, 0.1684, 0.2894]) tensor([0.0873, 0.2031, 0.3334, 0.3762]) -Greedy action tensor([-1.8158, -0.1076, 0.5588, -0.0932]) tensor([0.0437, 0.2414, 0.4700, 0.2449]) -Greedy action tensor([-1.8196, -0.4240, 0.6799, 0.0356]) tensor([0.0424, 0.1710, 0.5158, 0.2708]) -Greedy action tensor([-1.9400, -0.4447, 0.6649, -0.1780]) tensor([0.0403, 0.1798, 0.5452, 0.2347]) -Greedy action tensor([-1.8827, -0.2897, 0.6243, -0.1425]) tensor([0.0419, 0.2059, 0.5136, 0.2386]) -Greedy action tensor([-1.8350, -0.4305, 0.6104, -0.1273]) tensor([0.0452, 0.1841, 0.5214, 0.2493]) -Greedy action tensor([-1.5649, -0.0322, 0.3267, -0.5287]) tensor([0.0663, 0.3071, 0.4397, 0.1869]) -Greedy action tensor([-1.7904, -0.4693, 0.5871, -0.1097]) tensor([0.0479, 0.1794, 0.5158, 0.2570]) -Greedy action tensor([-1.6816, -0.0461, 0.5089, -0.0419]) tensor([0.0494, 0.2537, 0.4420, 0.2548]) -Greedy action tensor([0.3206, 0.2471, 0.6408, 1.1129]) tensor([0.1813, 0.1685, 0.2498, 0.4004]) -Greedy action tensor([-1.8586, -0.3568, 0.6260, -0.1382]) tensor([0.0433, 0.1946, 0.5199, 0.2421]) -Greedy action tensor([-1.9048, -0.4001, 0.6289, -0.1722]) tensor([0.0421, 0.1895, 0.5304, 0.2380]) -Greedy action tensor([-1.7484, -0.3917, 0.5637, -0.1637]) tensor([0.0504, 0.1956, 0.5084, 0.2457]) -Greedy action tensor([-1.6681, -0.5405, 0.5056, -0.1282]) tensor([0.0570, 0.1760, 0.5011, 0.2659]) -Greedy action tensor([-1.5872, -0.5429, 0.4766, 0.0037]) tensor([0.0601, 0.1709, 0.4737, 0.2952]) -Greedy action tensor([-0.6795, 0.3921, 0.0967, -0.0376]) tensor([0.1251, 0.3653, 0.2719, 0.2377]) -Greedy action tensor([-1.8847, -0.4292, 0.6303, -0.1550]) tensor([0.0429, 0.1840, 0.5309, 0.2421]) -Greedy action tensor([-1.0081, 0.2526, 0.1937, 0.1045]) tensor([0.0918, 0.3238, 0.3052, 0.2792]) -Greedy action tensor([-0.7944, 0.8584, 0.1277, 0.0151]) tensor([0.0910, 0.4754, 0.2290, 0.2046]) -Greedy action tensor([-0.9107, -0.0848, 0.2162, 0.0934]) tensor([0.1099, 0.2510, 0.3392, 0.3000]) -Greedy action tensor([-1.2790, -0.5948, 0.2962, 0.2555]) tensor([0.0803, 0.1592, 0.3880, 0.3725]) -Greedy action tensor([-1.7791, -0.4412, 0.5863, -0.0968]) tensor([0.0480, 0.1829, 0.5110, 0.2581]) -Greedy action tensor([-1.7923, -0.4141, 0.5885, -0.1228]) tensor([0.0474, 0.1881, 0.5127, 0.2517]) -Greedy action tensor([-1.9364, -0.4588, 0.6636, -0.1747]) tensor([0.0405, 0.1777, 0.5458, 0.2360]) -Greedy action tensor([-1.9120, -0.3327, 0.6390, -0.1649]) tensor([0.0410, 0.1988, 0.5252, 0.2351]) -Greedy action tensor([-1.9369, -0.4388, 0.6624, -0.1748]) tensor([0.0404, 0.1807, 0.5436, 0.2353]) -Greedy action tensor([-1.7814, -0.4451, 0.5930, -0.0847]) tensor([0.0476, 0.1811, 0.5115, 0.2597]) -Greedy action tensor([-1.3136, 0.5783, 0.2421, 0.0762]) tensor([0.0610, 0.4048, 0.2892, 0.2450]) -Greedy action tensor([-1.7199, -0.2635, 0.5351, -0.0685]) tensor([0.0499, 0.2141, 0.4758, 0.2602]) -Greedy action tensor([-0.9099, -0.4719, 0.3007, 0.6335]) tensor([0.0945, 0.1464, 0.3170, 0.4422]) -Greedy action tensor([-1.9366, -0.4419, 0.6683, -0.1725]) tensor([0.0403, 0.1796, 0.5450, 0.2351]) -Greedy action tensor([-1.8731, -0.4617, 0.6368, -0.1474]) tensor([0.0434, 0.1782, 0.5344, 0.2440]) -Greedy action tensor([-1.8031, -0.4710, 0.6026, -0.1123]) tensor([0.0469, 0.1779, 0.5205, 0.2547]) -Greedy action tensor([-1.8748, -0.4452, 0.6388, -0.1401]) tensor([0.0431, 0.1801, 0.5324, 0.2444]) -Greedy action tensor([-1.8769, -0.3972, 0.6274, -0.1525]) tensor([0.0430, 0.1890, 0.5266, 0.2414]) -Greedy action tensor([-1.6724, -0.4821, 0.5389, -0.0429]) tensor([0.0540, 0.1776, 0.4929, 0.2755]) -Greedy action tensor([-1.8070, -0.4408, 0.6717, 0.0026]) tensor([0.0436, 0.1708, 0.5195, 0.2661]) -Greedy action tensor([-1.8187, -0.3468, 0.6039, -0.1158]) tensor([0.0452, 0.1970, 0.5097, 0.2481]) -Greedy action tensor([-1.7733, -0.4696, 0.5825, -0.0978]) tensor([0.0486, 0.1790, 0.5127, 0.2597]) -Greedy action tensor([-1.6207, 0.3048, 0.3824, 0.0218]) tensor([0.0489, 0.3356, 0.3626, 0.2529]) -Greedy action tensor([-1.4287, 0.6300, 0.3215, -0.0713]) tensor([0.0541, 0.4241, 0.3115, 0.2103]) -Greedy action tensor([-1.7532, -0.4603, 0.5748, -0.0827]) tensor([0.0495, 0.1802, 0.5074, 0.2629]) -Greedy action tensor([-1.1975e+00, 6.8951e-04, 2.7467e-01, 1.6963e-02]) tensor([0.0830, 0.2752, 0.3620, 0.2797]) -Greedy action tensor([-1.3775, -0.1732, 0.4537, 0.2262]) tensor([0.0643, 0.2145, 0.4014, 0.3198]) -Greedy action tensor([-1.3780, -0.3794, 0.5357, 0.1978]) tensor([0.0652, 0.1771, 0.4422, 0.3154]) -Greedy action tensor([-1.6668, -0.1399, 0.3936, -0.5397]) tensor([0.0605, 0.2784, 0.4746, 0.1866]) -Greedy action tensor([-1.8846, -0.3964, 0.6381, -0.1386]) tensor([0.0423, 0.1875, 0.5275, 0.2426]) -Greedy action tensor([-1.9303, -0.4397, 0.6609, -0.1724]) tensor([0.0407, 0.1806, 0.5428, 0.2359]) -Greedy action tensor([-1.8495, -0.3093, 0.6023, -0.1252]) tensor([0.0437, 0.2039, 0.5073, 0.2451]) -Greedy action tensor([-1.8372, -0.3701, 0.6019, -0.1361]) tensor([0.0449, 0.1946, 0.5145, 0.2460]) -Greedy action tensor([-1.8918, -0.4544, 0.6447, -0.1546]) tensor([0.0425, 0.1789, 0.5371, 0.2415]) -Greedy action tensor([-1.8830, -0.4257, 0.6318, -0.1514]) tensor([0.0429, 0.1842, 0.5305, 0.2424]) -Greedy action tensor([-1.8289, -0.3277, 0.5956, -0.1343]) tensor([0.0450, 0.2019, 0.5082, 0.2449]) -Greedy action tensor([-1.9104, -0.4402, 0.6499, -0.1646]) tensor([0.0416, 0.1811, 0.5387, 0.2386]) -Greedy action tensor([-0.3590, 0.2961, 0.1050, 0.6361]) tensor([0.1385, 0.2666, 0.2202, 0.3746]) -Greedy action tensor([-1.9449, -0.4504, 0.6679, -0.1800]) tensor([0.0401, 0.1788, 0.5469, 0.2342]) -Greedy action tensor([-1.1873, 0.2770, 0.1946, 0.0465]) tensor([0.0785, 0.3394, 0.3126, 0.2695]) -Greedy action tensor([-1.9030, 0.0898, -0.0664, -0.3227]) tensor([0.0514, 0.3768, 0.3223, 0.2495]) -Greedy action tensor([-1.8935, -0.4568, 0.6459, -0.1574]) tensor([0.0425, 0.1786, 0.5380, 0.2410]) -Greedy action tensor([-1.6784, -0.0465, 0.4997, -0.1006]) tensor([0.0505, 0.2584, 0.4462, 0.2448]) -Greedy action tensor([-1.9122, -0.4356, 0.6503, -0.1651]) tensor([0.0415, 0.1818, 0.5385, 0.2382]) -Greedy action tensor([ 1.3197, -0.1972, -0.5179, 0.3265]) tensor([0.5718, 0.1254, 0.0910, 0.2118]) -Greedy action tensor([ 1.2699, -0.4309, -0.8299, 0.0530]) tensor([0.6245, 0.1140, 0.0765, 0.1850]) -Greedy action tensor([ 1.1352, -0.3565, -0.7549, 0.2109]) tensor([0.5641, 0.1269, 0.0852, 0.2238]) -Greedy action tensor([ 2.1330, -1.2432, -0.1803, 0.8695]) tensor([0.7063, 0.0241, 0.0699, 0.1997]) -Greedy action tensor([ 1.9533, -0.7350, -0.5197, 0.3805]) tensor([0.7354, 0.0500, 0.0620, 0.1526]) -Greedy action tensor([ 2.2318, -0.0949, -0.5093, 0.6037]) tensor([0.7362, 0.0719, 0.0475, 0.1445]) -Greedy action tensor([ 1.0357, 0.1530, -0.8068, 0.2728]) tensor([0.4906, 0.2029, 0.0777, 0.2288]) -Greedy action tensor([ 0.9191, -0.4931, -0.4900, 0.1035]) tensor([0.5181, 0.1262, 0.1266, 0.2292]) -Greedy action tensor([ 1.2312, -0.4922, -0.4846, 0.0967]) tensor([0.5953, 0.1062, 0.1070, 0.1914]) -Greedy action tensor([ 1.5489, -0.5514, -0.3239, 0.3451]) tensor([0.6344, 0.0777, 0.0975, 0.1904]) -Greedy action tensor([ 0.9878, -0.4836, -0.0555, 0.0884]) tensor([0.5028, 0.1154, 0.1771, 0.2046]) -Greedy action tensor([ 1.5201, 0.5944, -0.4042, 0.1622]) tensor([0.5557, 0.2202, 0.0811, 0.1429]) -Greedy action tensor([ 1.6543, -0.4745, -0.2068, 0.4134]) tensor([0.6395, 0.0761, 0.0995, 0.1849]) -Greedy action tensor([ 1.2016, -0.4222, -0.3932, -0.0032]) tensor([0.5883, 0.1160, 0.1194, 0.1763]) -Greedy action tensor([ 1.8775, -0.4456, -0.6088, 0.4091]) tensor([0.7085, 0.0694, 0.0590, 0.1632]) -Greedy action tensor([ 1.3726, 0.2316, -0.8888, 0.1620]) tensor([0.5808, 0.1856, 0.0605, 0.1731]) -Greedy action tensor([ 1.4164, -0.5538, -0.3734, 0.1010]) tensor([0.6350, 0.0885, 0.1060, 0.1704]) -Greedy action tensor([ 1.1274, -0.3184, -0.3881, 0.0474]) tensor([0.5572, 0.1312, 0.1224, 0.1892]) -Greedy action tensor([ 0.9070, -0.2699, -0.1849, 0.2017]) tensor([0.4678, 0.1442, 0.1570, 0.2311]) -Greedy action tensor([ 1.8307, -0.2637, -0.5158, 0.2512]) tensor([0.7018, 0.0864, 0.0672, 0.1446]) -Greedy action tensor([ 1.7985, -0.8612, -0.4042, 0.5876]) tensor([0.6764, 0.0473, 0.0747, 0.2015]) -Greedy action tensor([ 1.5640, -0.5336, -0.5926, 0.6916]) tensor([0.6037, 0.0741, 0.0699, 0.2523]) -Greedy action tensor([ 1.5213, -0.6949, -0.4987, 0.4234]) tensor([0.6348, 0.0692, 0.0842, 0.2118]) -Greedy action tensor([ 1.4768, 0.0017, -1.0747, -0.1085]) tensor([0.6615, 0.1513, 0.0516, 0.1355]) -Greedy action tensor([ 1.2817, 0.1352, -0.4354, -0.1889]) tensor([0.5790, 0.1840, 0.1040, 0.1330]) -Greedy action tensor([ 1.4504, -0.3976, -0.0871, 0.0581]) tensor([0.6169, 0.0972, 0.1326, 0.1533]) -Greedy action tensor([ 1.5385, -1.0824, -0.6023, 0.2957]) tensor([0.6762, 0.0492, 0.0795, 0.1951]) -Greedy action tensor([ 1.2698, -0.2602, -0.3900, 0.6543]) tensor([0.5136, 0.1112, 0.0977, 0.2775]) -Greedy action tensor([ 1.1730, -0.3996, -0.3719, -0.2335]) tensor([0.6003, 0.1246, 0.1281, 0.1471]) -Greedy action tensor([ 1.8699, -0.5726, -0.9752, 0.7330]) tensor([0.6822, 0.0593, 0.0397, 0.2189]) -Greedy action tensor([ 1.3051, -0.1886, -0.8138, 0.5348]) tensor([0.5532, 0.1242, 0.0665, 0.2561]) -Greedy action tensor([ 1.3761, -0.0996, -1.5747, 0.2556]) tensor([0.6223, 0.1423, 0.0325, 0.2029]) -Greedy action tensor([ 1.3082, -0.3592, -0.7690, -0.0987]) tensor([0.6415, 0.1211, 0.0804, 0.1571]) -Greedy action tensor([ 1.6083, -0.6020, 0.0702, 0.1877]) tensor([0.6386, 0.0700, 0.1372, 0.1543]) -Greedy action tensor([ 1.3561, -0.5032, -0.2744, 0.0655]) tensor([0.6147, 0.0958, 0.1204, 0.1691]) -Greedy action tensor([ 1.4089, -0.9264, -0.4420, -0.0909]) tensor([0.6770, 0.0655, 0.1064, 0.1511]) -Greedy action tensor([ 2.0613, -0.8255, -0.0462, 0.6246]) tensor([0.7067, 0.0394, 0.0859, 0.1680]) -Greedy action tensor([ 1.6697, 0.3806, -0.9112, 0.5511]) tensor([0.5960, 0.1642, 0.0451, 0.1947]) -Greedy action tensor([ 1.7481, 0.3568, -0.0988, 0.4330]) tensor([0.5970, 0.1485, 0.0942, 0.1603]) -Greedy action tensor([ 1.2795, -0.7960, -0.3300, 0.5614]) tensor([0.5515, 0.0692, 0.1103, 0.2690]) -Greedy action tensor([ 2.0479, 0.8439, 0.3794, -0.1103]) tensor([0.6234, 0.1870, 0.1175, 0.0720]) -Greedy action tensor([ 0.9899, -0.4841, -0.5435, 0.3708]) tensor([0.5042, 0.1155, 0.1088, 0.2715]) -Greedy action tensor([1.8473, 0.4951, 0.0373, 0.0767]) tensor([0.6279, 0.1624, 0.1028, 0.1069]) -Greedy action tensor([ 1.4432, -0.2596, -1.2919, 0.2931]) tensor([0.6395, 0.1165, 0.0415, 0.2025]) -Greedy action tensor([ 1.0757, -0.4582, -0.9966, 0.3586]) tensor([0.5465, 0.1179, 0.0688, 0.2668]) -Greedy action tensor([ 1.4051, -0.4114, -0.6338, -0.0203]) tensor([0.6522, 0.1061, 0.0849, 0.1568]) -Greedy action tensor([ 1.3723, -0.6466, -0.3959, 0.2395]) tensor([0.6152, 0.0817, 0.1050, 0.1982]) -Greedy action tensor([ 2.6168, -1.0119, -0.0094, 0.4707]) tensor([0.8225, 0.0218, 0.0595, 0.0962]) -Greedy action tensor([ 1.8669, -0.4600, -0.5719, 0.2288]) tensor([0.7250, 0.0708, 0.0633, 0.1409]) -Greedy action tensor([ 1.1759, -0.3670, -0.0361, 0.2335]) tensor([0.5260, 0.1124, 0.1565, 0.2050]) -Greedy action tensor([ 1.3254, -0.4149, -0.6204, 0.0273]) tensor([0.6284, 0.1103, 0.0898, 0.1716]) -Greedy action tensor([ 0.9630, 0.2589, -0.0907, -0.5172]) tensor([0.4829, 0.2388, 0.1684, 0.1099]) -Greedy action tensor([ 1.5853, -0.3972, -0.4637, 0.4530]) tensor([0.6294, 0.0867, 0.0811, 0.2029]) -Greedy action tensor([ 1.0296, -0.4720, -0.2844, 0.6765]) tensor([0.4558, 0.1015, 0.1225, 0.3202]) -Greedy action tensor([ 1.5258, -0.0192, -0.7421, 0.5770]) tensor([0.5868, 0.1252, 0.0608, 0.2272]) -Greedy action tensor([ 1.1164, 0.0539, -0.8031, 0.0143]) tensor([0.5481, 0.1894, 0.0804, 0.1821]) -Greedy action tensor([ 1.6327, -0.1956, -0.3916, 0.3400]) tensor([0.6380, 0.1025, 0.0843, 0.1752]) -Greedy action tensor([ 1.3312, -0.5795, -0.9227, 0.1180]) tensor([0.6451, 0.0955, 0.0677, 0.1917]) -Greedy action tensor([ 1.5395, -0.5533, -0.4409, 0.4997]) tensor([0.6192, 0.0764, 0.0855, 0.2189]) -Greedy action tensor([ 2.1818, -0.7174, -0.7534, 0.3748]) tensor([0.7860, 0.0433, 0.0417, 0.1290]) -Greedy action tensor([ 1.4357, -0.0793, -0.1621, -0.7413]) tensor([0.6513, 0.1431, 0.1318, 0.0738]) -Greedy action tensor([ 1.6362, -0.6278, -0.3619, 0.2484]) tensor([0.6715, 0.0698, 0.0911, 0.1676]) -Greedy action tensor([ 1.3442, -0.2044, -0.9306, 0.2752]) tensor([0.6029, 0.1281, 0.0620, 0.2070]) -Greedy action tensor([ 1.1203, -0.3630, -0.2690, 0.4354]) tensor([0.5050, 0.1146, 0.1259, 0.2546]) -Greedy action tensor([ 1.6872, -0.8718, -0.7725, 0.5742]) tensor([0.6705, 0.0519, 0.0573, 0.2203]) -Greedy action tensor([ 1.4091, 0.0622, -0.5065, 0.5519]) tensor([0.5460, 0.1420, 0.0804, 0.2317]) -Greedy action tensor([ 1.1039, -0.2690, -0.2379, -0.0037]) tensor([0.5420, 0.1373, 0.1417, 0.1790]) -Greedy action tensor([ 1.8552, -0.2431, -0.7019, 0.1829]) tensor([0.7205, 0.0884, 0.0559, 0.1353]) -Greedy action tensor([ 2.5705, -1.5428, -0.2225, 0.6281]) tensor([0.8190, 0.0134, 0.0502, 0.1174]) -Greedy action tensor([ 1.8505, 0.1445, -0.1514, 0.3788]) tensor([0.6468, 0.1174, 0.0874, 0.1484]) -Greedy action tensor([ 1.8449, -0.9350, -0.0237, 0.1381]) tensor([0.7154, 0.0444, 0.1104, 0.1298]) -Greedy action tensor([ 1.2264, -0.8535, -0.2981, 0.6391]) tensor([0.5267, 0.0658, 0.1147, 0.2928]) -Greedy action tensor([ 1.8770, -0.5973, -0.4227, 0.0947]) tensor([0.7392, 0.0623, 0.0741, 0.1244]) -Greedy action tensor([ 1.7203, -0.1551, -0.9888, 0.0350]) tensor([0.7116, 0.1091, 0.0474, 0.1319]) -Greedy action tensor([ 1.8230, -1.2279, -0.0363, 0.8158]) tensor([0.6376, 0.0302, 0.0993, 0.2329]) -Greedy action tensor([ 1.1585, -0.1263, -0.6546, 0.1770]) tensor([0.5511, 0.1525, 0.0899, 0.2065]) -Greedy action tensor([ 1.5981, -0.3828, -1.2473, 0.4903]) tensor([0.6552, 0.0904, 0.0381, 0.2164]) -Greedy action tensor([ 1.2429, -0.4760, -0.3922, 0.7336]) tensor([0.5063, 0.0908, 0.0987, 0.3042]) -Greedy action tensor([ 1.2099, 0.0137, -0.5417, 0.5429]) tensor([0.5027, 0.1520, 0.0872, 0.2580]) -Greedy action tensor([ 1.5091, -0.6041, -0.1897, 0.4855]) tensor([0.6013, 0.0727, 0.1100, 0.2160]) -Greedy action tensor([ 1.8745, -0.9411, -0.3041, 0.0138]) tensor([0.7527, 0.0451, 0.0852, 0.1171]) -Greedy action tensor([ 0.8929, -0.6055, 0.0085, -0.4673]) tensor([0.5282, 0.1181, 0.2181, 0.1356]) -Greedy action tensor([ 0.4948, 0.1145, -0.1621, 0.1275]) tensor([0.3455, 0.2362, 0.1791, 0.2393]) -Greedy action tensor([ 0.6353, -0.2594, -0.0393, -0.1191]) tensor([0.4187, 0.1711, 0.2133, 0.1969]) -Greedy action tensor([ 1.0330, -0.9536, -0.0089, -0.4806]) tensor([0.5848, 0.0802, 0.2063, 0.1287]) -Greedy action tensor([ 1.2194, -0.7458, -0.0448, -0.4764]) tensor([0.6226, 0.0872, 0.1759, 0.1142]) -Greedy action tensor([ 1.1507, -0.3045, -0.2520, -0.3562]) tensor([0.5879, 0.1372, 0.1446, 0.1303]) -Greedy action tensor([ 0.9506, -0.6613, -0.0158, -0.5194]) tensor([0.5525, 0.1102, 0.2102, 0.1270]) -Greedy action tensor([ 0.6615, -0.4248, -0.0789, -0.4659]) tensor([0.4677, 0.1578, 0.2230, 0.1515]) -Greedy action tensor([ 0.8439, -0.2425, 0.1480, -0.3309]) tensor([0.4662, 0.1573, 0.2325, 0.1440]) -Greedy action tensor([ 1.0097, -0.4445, 0.1350, -0.2228]) tensor([0.5149, 0.1203, 0.2147, 0.1501]) -Greedy action tensor([ 0.5887, 0.0213, -0.2239, -0.0915]) tensor([0.3973, 0.2252, 0.1763, 0.2012]) -Greedy action tensor([ 1.1657, -0.5716, -0.0142, -0.7662]) tensor([0.6142, 0.1081, 0.1887, 0.0890]) -Greedy action tensor([ 0.5438, -0.4121, -0.1769, -0.1314]) tensor([0.4202, 0.1615, 0.2044, 0.2139]) -Greedy action tensor([ 1.3463, -0.6851, 0.0098, -1.0656]) tensor([0.6740, 0.0884, 0.1771, 0.0604]) -Greedy action tensor([ 0.6591, -0.4199, 0.0466, -0.4623]) tensor([0.4529, 0.1540, 0.2455, 0.1476]) -Greedy action tensor([ 0.9173, -0.5453, -0.1163, -0.3856]) tensor([0.5379, 0.1246, 0.1913, 0.1462]) -Greedy action tensor([ 1.1032, -0.7808, -0.0344, -0.4301]) tensor([0.5923, 0.0900, 0.1899, 0.1278]) -Greedy action tensor([ 1.1239, -0.8162, -0.0168, -0.5242]) tensor([0.6040, 0.0868, 0.1930, 0.1162]) -Greedy action tensor([ 0.6036, -0.2970, -0.0028, -0.3419]) tensor([0.4273, 0.1736, 0.2330, 0.1660]) -Greedy action tensor([ 0.5950, -0.2914, -0.0253, -0.2993]) tensor([0.4239, 0.1747, 0.2280, 0.1733]) -Greedy action tensor([ 0.8272, -0.6672, -0.0638, -0.4942]) tensor([0.5259, 0.1180, 0.2158, 0.1403]) -Greedy action tensor([ 0.4125, -0.3287, -0.1945, 0.0344]) tensor([0.3695, 0.1761, 0.2013, 0.2531]) -Greedy action tensor([ 0.4044, -0.1126, -0.0854, -0.1633]) tensor([0.3602, 0.2148, 0.2207, 0.2042]) -Greedy action tensor([ 0.9444, -0.6529, -0.0104, -0.5785]) tensor([0.5539, 0.1121, 0.2132, 0.1208]) -Greedy action tensor([ 0.8175, -0.4767, -0.1997, -0.2801]) tensor([0.5078, 0.1392, 0.1836, 0.1694]) -Greedy action tensor([ 0.5292, -0.2152, -0.0297, -0.0978]) tensor([0.3874, 0.1840, 0.2216, 0.2070]) -Greedy action tensor([ 0.9870, -0.5421, -0.1399, -0.6031]) tensor([0.5732, 0.1242, 0.1857, 0.1169]) -Greedy action tensor([ 0.8745, -0.4656, -0.0346, -0.1934]) tensor([0.4979, 0.1304, 0.2006, 0.1711]) -Greedy action tensor([ 0.6359, -0.2038, 0.0140, -0.0607]) tensor([0.4053, 0.1750, 0.2176, 0.2020]) -Greedy action tensor([ 0.4922, -0.2524, -0.1166, -0.1956]) tensor([0.3966, 0.1883, 0.2157, 0.1993]) -Greedy action tensor([ 0.8910, -0.8084, 0.1952, -0.3120]) tensor([0.5046, 0.0922, 0.2516, 0.1515]) -Greedy action tensor([ 0.7973, -0.5926, 0.0177, -0.3047]) tensor([0.4902, 0.1221, 0.2248, 0.1629]) -Greedy action tensor([ 1.0364, -0.6701, -0.0937, -0.1524]) tensor([0.5528, 0.1003, 0.1785, 0.1684]) -Greedy action tensor([ 0.6748, -0.3464, -0.0154, -0.1698]) tensor([0.4364, 0.1572, 0.2189, 0.1875]) -Greedy action tensor([ 1.0032, -0.7436, 0.2127, -0.7541]) tensor([0.5554, 0.0968, 0.2519, 0.0958]) -Greedy action tensor([ 0.4951, -0.5147, -0.1747, -0.2220]) tensor([0.4230, 0.1541, 0.2165, 0.2065]) -Greedy action tensor([ 0.6685, -0.3756, -0.0555, -0.1943]) tensor([0.4427, 0.1558, 0.2146, 0.1868]) -Greedy action tensor([ 0.8170, -0.9269, -0.1185, -0.4586]) tensor([0.5416, 0.0947, 0.2125, 0.1512]) -Greedy action tensor([ 0.3232, 0.1748, -0.2216, 0.0262]) tensor([0.3140, 0.2707, 0.1821, 0.2333]) -Greedy action tensor([ 0.3858, -0.1676, -0.0828, -0.1438]) tensor([0.3585, 0.2061, 0.2244, 0.2111]) -Greedy action tensor([ 0.9058, -0.7379, 0.1509, -0.3781]) tensor([0.5154, 0.0996, 0.2423, 0.1427]) -Greedy action tensor([ 0.5782, -0.1932, 0.0853, -0.2171]) tensor([0.3961, 0.1831, 0.2420, 0.1788]) -Greedy action tensor([ 0.5997, -0.3900, -0.0997, -0.1812]) tensor([0.4298, 0.1598, 0.2136, 0.1969]) -Greedy action tensor([ 0.6657, -0.0089, 0.0993, 0.0583]) tensor([0.3814, 0.1943, 0.2165, 0.2078]) -Greedy action tensor([ 1.0980, -0.6821, 0.1085, -0.6541]) tensor([0.5835, 0.0984, 0.2169, 0.1012]) -Greedy action tensor([ 1.0411, -0.4273, -0.2642, -0.3159]) tensor([0.5686, 0.1309, 0.1541, 0.1464]) -Greedy action tensor([ 0.9240, -0.7254, 0.0515, -0.3755]) tensor([0.5311, 0.1021, 0.2220, 0.1448]) -Greedy action tensor([ 1.0098, -0.6858, 0.1146, -0.5941]) tensor([0.5577, 0.1023, 0.2278, 0.1122]) -Greedy action tensor([ 0.5471, -0.2925, -0.0520, -0.1927]) tensor([0.4068, 0.1757, 0.2234, 0.1941]) -Greedy action tensor([ 0.7893, -0.6945, -0.1455, -0.2509]) tensor([0.5069, 0.1150, 0.1990, 0.1791]) -Greedy action tensor([ 1.1668, -1.1245, 0.1276, -0.6650]) tensor([0.6192, 0.0626, 0.2190, 0.0991]) -Greedy action tensor([ 0.3644, -0.0547, 0.0069, -0.0872]) tensor([0.3340, 0.2197, 0.2336, 0.2127]) -Greedy action tensor([ 0.5661, -0.1089, -0.1238, 0.0207]) tensor([0.3861, 0.1966, 0.1936, 0.2237]) -Greedy action tensor([ 0.8251, -0.4259, -0.1141, -0.2462]) tensor([0.4951, 0.1417, 0.1936, 0.1696]) -Greedy action tensor([ 0.4377, 0.0080, -0.0925, -0.1319]) tensor([0.3565, 0.2320, 0.2098, 0.2017]) -Greedy action tensor([ 0.8035, -0.5258, -0.0429, -0.3583]) tensor([0.4984, 0.1319, 0.2138, 0.1559]) -Greedy action tensor([ 0.5627, -0.0467, -0.0644, -0.0110]) tensor([0.3786, 0.2058, 0.2022, 0.2133]) -Greedy action tensor([ 0.9581, -0.1138, 0.0802, -0.1302]) tensor([0.4774, 0.1634, 0.1984, 0.1608]) -Greedy action tensor([ 1.0857, -0.5400, -0.0740, -0.6644]) tensor([0.5938, 0.1168, 0.1862, 0.1032]) -Greedy action tensor([ 0.7539, -0.5290, -0.2168, -0.4693]) tensor([0.5127, 0.1421, 0.1942, 0.1509]) -Greedy action tensor([ 1.3566, -0.7529, -0.0626, -0.2934]) tensor([0.6430, 0.0780, 0.1555, 0.1235]) -Greedy action tensor([ 0.5097, 0.1315, -0.0779, 0.0040]) tensor([0.3516, 0.2409, 0.1954, 0.2121]) -Greedy action tensor([ 0.4418, -0.2254, -0.0241, -0.0218]) tensor([0.3611, 0.1853, 0.2266, 0.2271]) -Greedy action tensor([ 0.9392, -0.5410, -0.0348, -0.4207]) tensor([0.5371, 0.1222, 0.2028, 0.1379]) -Greedy action tensor([ 1.0216, -0.6322, 0.0232, -0.3329]) tensor([0.5501, 0.1052, 0.2027, 0.1420]) -Greedy action tensor([ 0.5164, -0.0380, -0.1052, -0.0330]) tensor([0.3719, 0.2136, 0.1998, 0.2147]) -Greedy action tensor([ 0.5752, -0.1856, -0.1142, -0.1276]) tensor([0.4058, 0.1896, 0.2037, 0.2009]) -Greedy action tensor([ 0.7805, -0.3424, -0.0156, -0.2249]) tensor([0.4668, 0.1519, 0.2106, 0.1708]) -Greedy action tensor([ 0.9291, -0.6768, 0.1248, -0.2072]) tensor([0.5079, 0.1019, 0.2272, 0.1630]) -Greedy action tensor([ 0.8795, -0.4859, -0.1466, -0.2505]) tensor([0.5163, 0.1318, 0.1851, 0.1668]) -Greedy action tensor([ 0.5520, 0.1131, -0.0743, 0.0561]) tensor([0.3586, 0.2312, 0.1917, 0.2184]) -Greedy action tensor([ 0.7155, -0.5085, 0.0268, -0.2572]) tensor([0.4599, 0.1352, 0.2310, 0.1739]) -Greedy action tensor([ 0.6415, -0.4244, -0.1360, -0.3339]) tensor([0.4585, 0.1579, 0.2107, 0.1729]) -Greedy action tensor([ 0.4538, 0.2769, -0.1641, 0.0955]) tensor([0.3251, 0.2724, 0.1753, 0.2272]) -Greedy action tensor([ 1.0906, -0.6354, -0.1624, -0.3579]) tensor([0.5887, 0.1048, 0.1682, 0.1383]) -Greedy action tensor([ 0.8395, -0.5991, 0.0737, -0.4070]) tensor([0.5026, 0.1192, 0.2337, 0.1445]) -Greedy action tensor([ 1.0460, -0.6159, -0.0960, -0.4334]) tensor([0.5758, 0.1093, 0.1838, 0.1312]) -Greedy action tensor([ 0.5440, -0.1866, -0.1541, -0.2886]) tensor([0.4142, 0.1995, 0.2061, 0.1802]) -Greedy action tensor([ 0.3014, 0.0125, -0.0681, -0.1650]) tensor([0.3260, 0.2442, 0.2253, 0.2045]) -Greedy action tensor([ 0.9554, -0.7518, -0.0996, -0.4250]) tensor([0.5615, 0.1018, 0.1955, 0.1412]) -Greedy action tensor([ 0.9605, -0.6867, 0.1616, -0.3739]) tensor([0.5247, 0.1011, 0.2360, 0.1382]) -Greedy action tensor([-0.6886, -0.1883, 0.4025, -0.3065]) tensor([0.1410, 0.2325, 0.4198, 0.2066]) -Greedy action tensor([ 0.0091, -1.5738, -0.4439, 0.5573]) tensor([0.2800, 0.0575, 0.1780, 0.4845]) -Greedy action tensor([ 0.0369, -1.0432, -0.7148, -0.5496]) tensor([0.4224, 0.1434, 0.1992, 0.2350]) -Greedy action tensor([-0.2295, -0.5772, 0.0779, -1.2041]) tensor([0.2904, 0.2051, 0.3949, 0.1096]) -Greedy action tensor([-0.5020, -1.6470, 0.5655, -0.6849]) tensor([0.1977, 0.0629, 0.5748, 0.1646]) -Greedy action tensor([ 0.4307, -1.2380, -0.1898, -0.2428]) tensor([0.4472, 0.0843, 0.2405, 0.2280]) -Greedy action tensor([ 0.2117, -0.7071, 0.0534, -0.1878]) tensor([0.3421, 0.1365, 0.2920, 0.2294]) -Greedy action tensor([ 0.5073, -0.9106, 0.5650, 0.3731]) tensor([0.3149, 0.0763, 0.3336, 0.2753]) -Greedy action tensor([-0.4114, -0.9280, 0.5954, -0.2234]) tensor([0.1805, 0.1077, 0.4940, 0.2178]) -Greedy action tensor([ 0.3823, 0.5051, -0.0138, -0.2388]) tensor([0.2993, 0.3384, 0.2014, 0.1608]) -Greedy action tensor([ 0.3809, -0.2516, -0.3130, -0.1084]) tensor([0.3782, 0.2009, 0.1890, 0.2319]) -Greedy action tensor([-0.3366, -1.0386, 0.3403, -0.5579]) tensor([0.2345, 0.1162, 0.4614, 0.1879]) -Greedy action tensor([-0.0478, 0.0738, -0.2879, -0.5466]) tensor([0.2838, 0.3205, 0.2233, 0.1724]) -Greedy action tensor([-0.0707, -0.8676, -0.3145, -0.4189]) tensor([0.3401, 0.1533, 0.2665, 0.2401]) -Greedy action tensor([-0.3562, -0.1394, -0.3292, -1.0212]) tensor([0.2643, 0.3283, 0.2715, 0.1359]) -Greedy action tensor([ 0.5280, -1.4364, -0.6856, -0.2334]) tensor([0.5251, 0.0736, 0.1560, 0.2452]) -Greedy action tensor([-0.1409, -1.3727, 1.0773, -0.8708]) tensor([0.1940, 0.0566, 0.6559, 0.0935]) -Greedy action tensor([-0.4545, -1.4003, 0.7322, -0.2788]) tensor([0.1707, 0.0663, 0.5594, 0.2035]) -Greedy action tensor([ 0.3123, 0.2061, -0.0946, 0.1512]) tensor([0.2927, 0.2632, 0.1949, 0.2492]) -Greedy action tensor([-0.9639, -1.2274, 0.3286, -0.2493]) tensor([0.1342, 0.1031, 0.4886, 0.2741]) -Greedy action tensor([-0.6558, 0.4115, 1.1455, -1.0390]) tensor([0.0939, 0.2731, 0.5690, 0.0640]) -Greedy action tensor([-0.3432, -0.5249, -0.8995, -0.6491]) tensor([0.3181, 0.2653, 0.1824, 0.2343]) -Greedy action tensor([-0.0419, -0.3401, -0.8764, -0.4433]) tensor([0.3514, 0.2608, 0.1525, 0.2352]) -Greedy action tensor([-0.5990, -0.4852, -0.5034, -0.4000]) tensor([0.2252, 0.2523, 0.2478, 0.2747]) -Greedy action tensor([-0.3847, -0.8515, 0.1188, -0.6502]) tensor([0.2470, 0.1549, 0.4087, 0.1894]) -Greedy action tensor([ 1.5681, -0.9371, 0.7211, 1.3417]) tensor([0.4333, 0.0354, 0.1858, 0.3455]) -Greedy action tensor([ 0.0145, -1.2555, 0.6655, 0.6706]) tensor([0.1951, 0.0548, 0.3741, 0.3760]) -Greedy action tensor([ 0.4910, -1.1688, 0.1039, -0.3884]) tensor([0.4378, 0.0833, 0.2973, 0.1817]) -Greedy action tensor([ 0.5892, -0.0126, -0.4562, -0.5333]) tensor([0.4495, 0.2462, 0.1580, 0.1463]) -Greedy action tensor([-0.6268, -1.1126, -0.2819, -0.6167]) tensor([0.2477, 0.1524, 0.3497, 0.2502]) -Greedy action tensor([-1.3751, -0.8095, -1.2521, 0.1452]) tensor([0.1181, 0.2080, 0.1336, 0.5403]) -Greedy action tensor([ 1.4150, -1.3333, 0.0652, -0.1070]) tensor([0.6487, 0.0415, 0.1682, 0.1416]) -Greedy action tensor([ 1.5401, -0.1047, -0.0738, 0.6434]) tensor([0.5555, 0.1072, 0.1106, 0.2266]) -Greedy action tensor([-0.1301, -0.1116, -0.1206, -0.0344]) tensor([0.2422, 0.2467, 0.2445, 0.2665]) -Greedy action tensor([ 0.0277, -1.2433, -0.4834, -0.0626]) tensor([0.3579, 0.1004, 0.2147, 0.3270]) -Greedy action tensor([ 0.9037, -1.0258, -0.8070, -0.2139]) tensor([0.6049, 0.0879, 0.1093, 0.1979]) -Greedy action tensor([-1.2834, -1.3531, 0.1665, -0.4793]) tensor([0.1186, 0.1106, 0.5057, 0.2651]) -Greedy action tensor([ 1.3739, -0.4770, 0.1366, -0.6878]) tensor([0.6351, 0.0998, 0.1843, 0.0808]) -Greedy action tensor([-0.1864, -1.0133, -0.1559, 0.7408]) tensor([0.2002, 0.0876, 0.2064, 0.5059]) -Greedy action tensor([-0.3825, -0.3377, 0.8018, -0.5725]) tensor([0.1628, 0.1703, 0.5322, 0.1347]) -Greedy action tensor([ 0.1945, -1.3061, -0.7467, -0.3386]) tensor([0.4546, 0.1014, 0.1773, 0.2667]) -Greedy action tensor([-0.4351, 0.2022, 0.0012, -0.7937]) tensor([0.1947, 0.3682, 0.3011, 0.1360]) -Greedy action tensor([ 0.7416, -0.2286, 0.3402, -0.4909]) tensor([0.4274, 0.1620, 0.2861, 0.1246]) -Greedy action tensor([ 0.0748, -1.3803, 0.6004, 0.7600]) tensor([0.2037, 0.0475, 0.3446, 0.4042]) -Greedy action tensor([-0.2170, 0.1588, -0.5305, 0.0932]) tensor([0.2198, 0.3200, 0.1606, 0.2997]) -Greedy action tensor([ 1.2339, 0.8988, 0.6726, -0.7905]) tensor([0.4136, 0.2958, 0.2360, 0.0546]) -Greedy action tensor([-0.2961, -0.8681, -0.3912, -0.8987]) tensor([0.3310, 0.1868, 0.3010, 0.1812]) -Greedy action tensor([ 0.7595, -0.6808, -0.2758, 0.2517]) tensor([0.4558, 0.1080, 0.1619, 0.2743]) -Greedy action tensor([-0.6896, 0.0658, -0.2041, 0.4503]) tensor([0.1269, 0.2701, 0.2062, 0.3968]) -Greedy action tensor([-0.8299, -0.6046, -1.3329, -0.0346]) tensor([0.1971, 0.2470, 0.1192, 0.4367]) -Greedy action tensor([-0.2326, -1.1686, 0.3667, -0.0130]) tensor([0.2243, 0.0880, 0.4084, 0.2794]) -Greedy action tensor([-0.6380, -0.2750, 0.0342, -0.6359]) tensor([0.1852, 0.2663, 0.3628, 0.1856]) -Greedy action tensor([1.2802, 0.2922, 0.3587, 1.7902]) tensor([0.2911, 0.1084, 0.1158, 0.4847]) -Greedy action tensor([-0.7092, -0.4711, 0.1298, -0.9104]) tensor([0.1852, 0.2350, 0.4285, 0.1514]) -Greedy action tensor([-0.0398, -0.7445, -0.2437, 0.1491]) tensor([0.2843, 0.1405, 0.2318, 0.3434]) -Greedy action tensor([ 1.8150, -1.6491, 0.3383, 0.3705]) tensor([0.6686, 0.0209, 0.1527, 0.1577]) -Greedy action tensor([-0.7083, -1.5449, -0.3588, -0.2900]) tensor([0.2288, 0.0991, 0.3245, 0.3476]) -Greedy action tensor([-0.7131, -0.9386, 0.5121, -0.4326]) tensor([0.1532, 0.1223, 0.5217, 0.2028]) -Greedy action tensor([ 0.2044, -0.3453, 0.1330, -0.1699]) tensor([0.3129, 0.1806, 0.2913, 0.2152]) -Greedy action tensor([-0.4582, -0.5452, -1.0712, 0.2883]) tensor([0.2189, 0.2007, 0.1186, 0.4618]) -Greedy action tensor([-0.3374, -1.4421, 0.8362, -0.6408]) tensor([0.1886, 0.0625, 0.6098, 0.1392]) -Greedy action tensor([ 0.3363, 0.0277, -0.3337, -0.1216]) tensor([0.3474, 0.2551, 0.1778, 0.2198]) -Greedy action tensor([-0.4590, -0.5189, -0.1966, -0.2709]) tensor([0.2248, 0.2117, 0.2922, 0.2713]) -Greedy action tensor([ 0.1253, -0.0451, 1.0513, -0.9822]) tensor([0.2128, 0.1795, 0.5373, 0.0703]) -Greedy action tensor([-0.6166, -1.6244, 0.5655, -0.0402]) tensor([0.1561, 0.0570, 0.5091, 0.2778]) -Greedy action tensor([-0.4584, -0.4941, -0.6393, 0.0686]) tensor([0.2226, 0.2148, 0.1857, 0.3770]) -Greedy action tensor([ 0.0840, 0.1928, 0.8102, -0.3608]) tensor([0.2073, 0.2312, 0.4286, 0.1329]) -Greedy action tensor([ 0.1455, -0.1285, -0.9674, 0.3640]) tensor([0.3000, 0.2281, 0.0986, 0.3733]) -Greedy action tensor([-0.0895, -0.6087, -0.6258, 0.3972]) tensor([0.2627, 0.1563, 0.1536, 0.4274]) -Greedy action tensor([ 0.4505, -1.3828, -0.7137, -0.8448]) tensor([0.5728, 0.0916, 0.1788, 0.1568]) -Greedy action tensor([-0.7966, -1.1172, -0.0125, -0.5783]) tensor([0.1938, 0.1406, 0.4245, 0.2411]) -Greedy action tensor([ 0.1642, -1.2848, 0.6416, -1.0213]) tensor([0.3172, 0.0745, 0.5113, 0.0969]) -Greedy action tensor([ 1.1662, -0.6776, 0.7380, -0.3651]) tensor([0.4936, 0.0781, 0.3216, 0.1067]) -Greedy action tensor([ 0.5656, -0.9301, -0.5779, -0.7270]) tensor([0.5503, 0.1233, 0.1754, 0.1511]) -Greedy action tensor([-0.6853, -0.9611, -0.2230, -0.4771]) tensor([0.2184, 0.1658, 0.3468, 0.2690]) -Greedy action tensor([-0.0225, -1.9367, -0.4848, -0.5307]) tensor([0.4204, 0.0620, 0.2648, 0.2529]) -Greedy action tensor([ 0.8829, -1.3005, -0.2692, -0.4789]) tensor([0.5935, 0.0669, 0.1875, 0.1521]) -Greedy action tensor([-0.7227, -0.3345, 0.1265, -0.4534]) tensor([0.1634, 0.2408, 0.3819, 0.2139]) -Greedy action tensor([ 0.4752, -0.0728, 0.4551, -0.0440]) tensor([0.3171, 0.1833, 0.3108, 0.1887]) -Greedy action tensor([-0.0018, -0.2365, -0.2256, -0.2666]) tensor([0.2978, 0.2355, 0.2381, 0.2285]) -Greedy action tensor([ 1.3418, -0.3661, 0.7743, 0.1907]) tensor([0.4844, 0.0878, 0.2746, 0.1532]) -Greedy action tensor([ 1.4364, -0.5330, -0.3869, -0.1847]) tensor([0.6672, 0.0931, 0.1078, 0.1319]) -Greedy action tensor([ 1.7203, -0.8131, -0.3780, 0.3943]) tensor([0.6814, 0.0541, 0.0836, 0.1809]) -Greedy action tensor([ 1.8806, -0.5260, -0.3016, 0.3702]) tensor([0.7024, 0.0633, 0.0792, 0.1551]) -Greedy action tensor([ 1.3810, -0.2990, -0.6290, 0.5661]) tensor([0.5672, 0.1057, 0.0760, 0.2511]) -Greedy action tensor([ 1.7730, -0.5822, -0.8276, 0.5255]) tensor([0.6867, 0.0651, 0.0510, 0.1972]) -Greedy action tensor([ 1.9492, -0.1274, -0.1501, 0.3970]) tensor([0.6851, 0.0859, 0.0839, 0.1451]) -Greedy action tensor([ 1.8761, -1.0227, -0.1364, 0.2901]) tensor([0.7176, 0.0395, 0.0959, 0.1469]) -Greedy action tensor([ 1.5336, -0.5615, -1.1284, 0.3662]) tensor([0.6649, 0.0818, 0.0464, 0.2069]) -Greedy action tensor([ 1.2231, -0.6478, 0.0667, -0.1001]) tensor([0.5764, 0.0888, 0.1813, 0.1535]) -Greedy action tensor([ 2.1167, -0.3125, 0.0672, 0.2783]) tensor([0.7268, 0.0640, 0.0936, 0.1156]) -Greedy action tensor([ 1.5306, -0.0063, -0.5927, 0.2780]) tensor([0.6171, 0.1327, 0.0738, 0.1764]) -Greedy action tensor([ 1.5476, -0.1571, -0.4142, 0.1476]) tensor([0.6373, 0.1159, 0.0896, 0.1572]) -Greedy action tensor([ 1.6344, 0.0633, -0.2897, 0.0749]) tensor([0.6394, 0.1329, 0.0934, 0.1344]) -Greedy action tensor([ 1.6428, -0.6785, 0.2079, 0.1356]) tensor([0.6419, 0.0630, 0.1529, 0.1422]) -Greedy action tensor([ 1.1669, -0.0605, 0.0830, 0.2018]) tensor([0.4970, 0.1456, 0.1681, 0.1893]) -Greedy action tensor([ 1.1413, -0.2535, -0.3769, 0.1538]) tensor([0.5436, 0.1348, 0.1191, 0.2025]) -Greedy action tensor([ 2.0748, -0.8533, -0.3088, 0.1736]) tensor([0.7721, 0.0413, 0.0712, 0.1154]) -Greedy action tensor([ 1.1908, -0.6396, -0.9624, 0.6368]) tensor([0.5402, 0.0866, 0.0627, 0.3105]) -Greedy action tensor([ 1.3789, -0.2672, -0.1854, 0.1037]) tensor([0.5947, 0.1147, 0.1244, 0.1662]) -Greedy action tensor([ 1.6101, 0.3546, -0.5127, 0.6163]) tensor([0.5635, 0.1605, 0.0674, 0.2086]) -Greedy action tensor([2.3722, 0.0384, 0.0480, 0.3975]) tensor([0.7499, 0.0727, 0.0734, 0.1041]) -Greedy action tensor([ 1.4255, -0.2121, -0.5124, 0.2204]) tensor([0.6104, 0.1187, 0.0879, 0.1829]) -Greedy action tensor([ 1.2045, -0.6120, -0.3981, 0.4921]) tensor([0.5392, 0.0877, 0.1086, 0.2645]) -Greedy action tensor([ 1.4410, -0.2920, -0.3635, 0.2047]) tensor([0.6128, 0.1083, 0.1008, 0.1780]) -Greedy action tensor([ 1.5521, -0.6060, -0.3756, 0.2328]) tensor([0.6543, 0.0756, 0.0952, 0.1749]) -Greedy action tensor([ 1.4415, -0.6238, -0.7196, 0.1099]) tensor([0.6640, 0.0842, 0.0765, 0.1753]) -Greedy action tensor([ 2.0432, -0.6391, -0.4242, 0.0623]) tensor([0.7745, 0.0530, 0.0657, 0.1068]) -Greedy action tensor([ 1.5779, -0.2943, -0.6264, 0.1959]) tensor([0.6600, 0.1015, 0.0728, 0.1657]) -Greedy action tensor([ 1.7251, -0.9414, -0.7148, 0.1165]) tensor([0.7370, 0.0512, 0.0642, 0.1475]) -Greedy action tensor([ 1.5596, -0.5516, -0.4166, 0.6629]) tensor([0.5997, 0.0726, 0.0831, 0.2446]) -Greedy action tensor([ 2.2627, -0.0323, -0.2959, 0.6222]) tensor([0.7288, 0.0734, 0.0564, 0.1413]) -Greedy action tensor([ 1.1926, -0.2766, -0.8691, 0.6874]) tensor([0.5100, 0.1174, 0.0649, 0.3077]) -Greedy action tensor([ 1.4412, -0.4508, -0.4136, 0.1590]) tensor([0.6310, 0.0951, 0.0987, 0.1751]) -Greedy action tensor([ 0.8411, -0.0776, -0.1027, 0.2428]) tensor([0.4277, 0.1707, 0.1665, 0.2352]) -Greedy action tensor([ 1.8368, -0.4665, -0.2403, 0.1069]) tensor([0.7130, 0.0712, 0.0893, 0.1264]) -Greedy action tensor([ 1.6686, -0.6211, -0.3189, 0.4351]) tensor([0.6538, 0.0662, 0.0896, 0.1904]) -Greedy action tensor([ 1.1458, -0.7769, -0.2264, 0.4086]) tensor([0.5324, 0.0778, 0.1350, 0.2547]) -Greedy action tensor([ 2.1631, -0.8208, -0.4433, 0.1477]) tensor([0.7951, 0.0402, 0.0587, 0.1060]) -Greedy action tensor([ 1.0628, -0.1708, -0.1469, -0.0461]) tensor([0.5210, 0.1517, 0.1554, 0.1719]) -Greedy action tensor([ 1.7671, 0.7120, -0.1796, 0.1226]) tensor([0.5938, 0.2067, 0.0848, 0.1147]) -Greedy action tensor([ 1.2937, -0.3024, -0.4629, 0.1046]) tensor([0.5953, 0.1207, 0.1028, 0.1813]) -Greedy action tensor([ 1.8731, -0.5660, -0.3254, 0.2538]) tensor([0.7162, 0.0625, 0.0795, 0.1418]) -Greedy action tensor([ 1.4506, -0.4697, -1.1155, 0.4399]) tensor([0.6300, 0.0923, 0.0484, 0.2293]) -Greedy action tensor([ 1.8110, -0.3791, -0.3103, 0.4485]) tensor([0.6721, 0.0752, 0.0806, 0.1721]) -Greedy action tensor([ 1.4467, -0.2542, -0.0569, 0.1699]) tensor([0.5939, 0.1084, 0.1320, 0.1657]) -Greedy action tensor([ 1.1694, -0.0177, -0.2738, -0.1016]) tensor([0.5489, 0.1675, 0.1296, 0.1540]) -Greedy action tensor([2.7113, 1.0891, 0.1820, 0.1267]) tensor([0.7393, 0.1460, 0.0589, 0.0558]) -Greedy action tensor([ 1.6407, -0.2785, -0.9301, 0.1610]) tensor([0.6892, 0.1011, 0.0527, 0.1569]) -Greedy action tensor([ 1.3395, -0.1317, -0.5584, 0.3579]) tensor([0.5701, 0.1309, 0.0854, 0.2136]) -Greedy action tensor([ 2.0390, -0.7544, -0.5388, 0.0027]) tensor([0.7889, 0.0483, 0.0599, 0.1030]) -Greedy action tensor([ 1.7876, -0.9384, -0.1796, 0.3451]) tensor([0.6936, 0.0454, 0.0970, 0.1639]) -Greedy action tensor([ 1.7680, -0.7493, -0.6790, 0.1815]) tensor([0.7289, 0.0588, 0.0631, 0.1492]) -Greedy action tensor([ 1.5349, -0.2735, -0.6459, -0.1680]) tensor([0.6854, 0.1123, 0.0774, 0.1248]) -Greedy action tensor([ 1.7899, -0.9473, -0.4931, 0.7197]) tensor([0.6624, 0.0429, 0.0676, 0.2272]) -Greedy action tensor([ 1.6664, -1.4416, -0.1281, 0.1172]) tensor([0.7026, 0.0314, 0.1168, 0.1492]) -Greedy action tensor([ 1.2452, -0.4503, -0.1598, 0.2526]) tensor([0.5557, 0.1020, 0.1364, 0.2060]) -Greedy action tensor([ 1.1318, -0.4293, -0.0919, 0.1927]) tensor([0.5277, 0.1108, 0.1552, 0.2063]) -Greedy action tensor([ 1.3873, -0.0923, -0.7278, 0.2616]) tensor([0.5978, 0.1361, 0.0721, 0.1939]) -Greedy action tensor([ 1.0110, -0.4329, 0.0322, 0.1771]) tensor([0.4887, 0.1153, 0.1837, 0.2123]) -Greedy action tensor([ 1.3132, -0.5677, -0.4403, 0.4030]) tensor([0.5787, 0.0882, 0.1002, 0.2329]) -Greedy action tensor([ 1.8709, -0.3469, -0.5968, 0.0474]) tensor([0.7380, 0.0803, 0.0626, 0.1192]) -Greedy action tensor([ 1.5970, -0.6299, -0.1564, 0.2293]) tensor([0.6512, 0.0702, 0.1128, 0.1658]) -Greedy action tensor([ 1.4534, -0.6972, -0.6258, 0.4244]) tensor([0.6255, 0.0728, 0.0782, 0.2235]) -Greedy action tensor([ 1.2020, -0.3316, -0.4415, 0.3565]) tensor([0.5440, 0.1174, 0.1051, 0.2335]) -Greedy action tensor([ 1.1861, -0.6828, -0.2270, 0.0736]) tensor([0.5792, 0.0894, 0.1410, 0.1904]) -Greedy action tensor([ 1.5561, -0.8641, -0.1954, 0.6051]) tensor([0.6065, 0.0539, 0.1052, 0.2343]) -Greedy action tensor([ 1.3168, -0.0917, -0.8416, -0.0482]) tensor([0.6190, 0.1514, 0.0715, 0.1581]) -Greedy action tensor([ 2.1039, -0.6115, -0.3956, 0.8949]) tensor([0.6912, 0.0457, 0.0568, 0.2063]) -Greedy action tensor([ 1.2699, -0.1360, -0.4317, 0.4851]) tensor([0.5309, 0.1301, 0.0968, 0.2422]) -Greedy action tensor([ 1.5912, -0.4751, -0.3457, 0.2807]) tensor([0.6491, 0.0822, 0.0936, 0.1751]) -Greedy action tensor([ 1.1939, -0.3147, -0.0642, 0.0652]) tensor([0.5468, 0.1210, 0.1554, 0.1769]) -Greedy action tensor([ 1.0982, -0.3022, -0.6070, 0.4970]) tensor([0.5060, 0.1247, 0.0919, 0.2773]) -Greedy action tensor([ 1.4545, -0.3040, -0.5345, -0.0527]) tensor([0.6533, 0.1126, 0.0894, 0.1447]) -Greedy action tensor([ 1.0326, -0.5104, -0.3849, 0.3690]) tensor([0.5073, 0.1084, 0.1229, 0.2613]) -Greedy action tensor([ 1.1869, -0.0689, -0.9352, 0.4185]) tensor([0.5352, 0.1525, 0.0641, 0.2482]) -Greedy action tensor([ 1.3927, 0.0328, -0.9624, -0.0825]) tensor([0.6328, 0.1624, 0.0600, 0.1447]) -Greedy action tensor([ 2.0756, -0.6116, 0.0274, 0.6180]) tensor([0.6994, 0.0476, 0.0902, 0.1628]) -Greedy action tensor([ 1.1019, -0.2955, -1.0517, 0.2763]) tensor([0.5552, 0.1373, 0.0644, 0.2431]) -Greedy action tensor([ 1.1422, -0.2193, -0.5294, 0.2295]) tensor([0.5418, 0.1389, 0.1018, 0.2175]) -Greedy action tensor([ 1.9689, -0.9609, -0.0249, 0.0679]) tensor([0.7468, 0.0399, 0.1017, 0.1116]) -Greedy action tensor([ 1.5229, -0.5473, -0.5263, 0.5485]) tensor([0.6126, 0.0773, 0.0789, 0.2312]) -Greedy action tensor([ 1.2334, -0.2711, -0.1158, -0.0209]) tensor([0.5660, 0.1257, 0.1468, 0.1615]) -Greedy action tensor([ 0.7715, -0.6382, -0.0363, -0.6437]) tensor([0.5173, 0.1263, 0.2307, 0.1257]) -Greedy action tensor([ 0.9911, -0.7266, -0.0721, -0.4113]) tensor([0.5647, 0.1014, 0.1950, 0.1389]) -Greedy action tensor([ 0.8713, 0.0709, -0.0067, -0.0410]) tensor([0.4412, 0.1982, 0.1834, 0.1772]) -Greedy action tensor([ 0.9203, -0.5213, -0.0474, -0.2295]) tensor([0.5173, 0.1224, 0.1965, 0.1638]) -Greedy action tensor([ 0.5332, -0.0299, 0.0244, -0.2080]) tensor([0.3778, 0.2151, 0.2271, 0.1800]) -Greedy action tensor([ 1.0637, -0.4922, -0.1063, -0.2090]) tensor([0.5551, 0.1171, 0.1723, 0.1555]) -Greedy action tensor([ 1.0803, -0.6887, -0.0112, -0.5279]) tensor([0.5860, 0.0999, 0.1967, 0.1173]) -Greedy action tensor([ 0.6740, -0.1253, -0.0841, -0.0586]) tensor([0.4169, 0.1874, 0.1953, 0.2004]) -Greedy action tensor([ 0.1445, 0.0463, -0.0382, -0.4003]) tensor([0.3013, 0.2731, 0.2510, 0.1747]) -Greedy action tensor([ 1.6213, -1.1548, -0.0865, -0.9217]) tensor([0.7563, 0.0471, 0.1371, 0.0595]) -Greedy action tensor([ 1.0420, -0.7698, 0.0053, -0.4582]) tensor([0.5744, 0.0938, 0.2037, 0.1281]) -Greedy action tensor([ 0.8238, -0.2979, -0.1712, -0.1034]) tensor([0.4782, 0.1558, 0.1768, 0.1892]) -Greedy action tensor([ 1.1695, -0.6563, 0.0278, -0.4136]) tensor([0.5932, 0.0956, 0.1894, 0.1218]) -Greedy action tensor([ 1.0551, -0.6207, -0.1990, -0.4371]) tensor([0.5892, 0.1103, 0.1681, 0.1325]) -Greedy action tensor([ 1.3391, -0.8541, -0.0160, -0.8776]) tensor([0.6764, 0.0755, 0.1745, 0.0737]) -Greedy action tensor([ 1.0028, -0.7973, 0.0358, -0.5299]) tensor([0.5677, 0.0938, 0.2159, 0.1226]) -Greedy action tensor([ 0.4834, -0.3630, 0.0074, -0.1647]) tensor([0.3886, 0.1667, 0.2414, 0.2033]) -Greedy action tensor([ 0.8676, -0.4647, -0.0365, -0.8466]) tensor([0.5409, 0.1427, 0.2190, 0.0974]) -Greedy action tensor([ 1.0358, -0.6887, 0.0236, -0.6087]) tensor([0.5764, 0.1028, 0.2095, 0.1113]) -Greedy action tensor([ 0.7266, -0.2762, -0.0360, -0.0720]) tensor([0.4380, 0.1607, 0.2043, 0.1971]) -Greedy action tensor([ 0.5782, -0.2214, -0.0081, -0.1765]) tensor([0.4039, 0.1815, 0.2247, 0.1899]) -Greedy action tensor([ 0.1687, 0.0295, -0.0853, -0.1507]) tensor([0.2965, 0.2580, 0.2300, 0.2154]) -Greedy action tensor([ 0.7278, -0.4862, -0.0668, -0.2756]) tensor([0.4727, 0.1404, 0.2136, 0.1733]) -Greedy action tensor([ 0.7209, -0.4730, -0.0243, -0.2120]) tensor([0.4606, 0.1396, 0.2186, 0.1812]) -Greedy action tensor([ 1.1520, -0.6320, -0.1202, -0.2643]) tensor([0.5914, 0.0993, 0.1657, 0.1435]) -Greedy action tensor([ 0.8397, -0.4809, -0.0747, -0.4331]) tensor([0.5134, 0.1371, 0.2057, 0.1438]) -Greedy action tensor([ 0.8615, -0.5394, 0.1567, -0.3005]) tensor([0.4870, 0.1200, 0.2407, 0.1524]) -Greedy action tensor([ 0.7562, -0.3538, 0.0333, -0.1974]) tensor([0.4545, 0.1498, 0.2206, 0.1751]) -Greedy action tensor([ 1.1239, -0.7249, 0.1594, -0.5282]) tensor([0.5780, 0.0910, 0.2203, 0.1108]) -Greedy action tensor([ 0.5111, -0.1440, 0.0110, -0.1323]) tensor([0.3772, 0.1959, 0.2287, 0.1982]) -Greedy action tensor([ 0.8174, -0.3341, 0.0814, -0.3363]) tensor([0.4738, 0.1498, 0.2270, 0.1495]) -Greedy action tensor([ 0.7771, -0.6358, 0.0512, -0.4080]) tensor([0.4919, 0.1197, 0.2380, 0.1504]) -Greedy action tensor([ 0.3991, -0.1077, 0.2386, 0.1139]) tensor([0.3119, 0.1879, 0.2657, 0.2345]) -Greedy action tensor([ 0.7337, -0.3413, -0.0639, -0.1258]) tensor([0.4515, 0.1541, 0.2033, 0.1911]) -Greedy action tensor([ 0.4716, -0.1214, -0.1053, -0.5926]) tensor([0.4066, 0.2247, 0.2284, 0.1403]) -Greedy action tensor([ 0.9409, -0.3985, 0.0677, -0.3362]) tensor([0.5106, 0.1338, 0.2132, 0.1424]) -Greedy action tensor([ 0.4607, -0.3409, 0.0339, -0.2830]) tensor([0.3881, 0.1741, 0.2533, 0.1845]) -Greedy action tensor([ 1.0316, -0.3924, -0.0076, -0.2411]) tensor([0.5334, 0.1284, 0.1887, 0.1494]) -Greedy action tensor([ 0.9090, -0.4881, 0.0402, -0.2435]) tensor([0.5044, 0.1247, 0.2116, 0.1593]) -Greedy action tensor([ 0.9774, -0.4928, -0.1041, -0.3831]) tensor([0.5478, 0.1259, 0.1858, 0.1405]) -Greedy action tensor([ 0.8785, -0.8014, -0.1425, -0.4530]) tensor([0.5523, 0.1029, 0.1990, 0.1458]) -Greedy action tensor([ 0.9502, -0.7684, 0.2322, -0.5468]) tensor([0.5289, 0.0948, 0.2579, 0.1184]) -Greedy action tensor([ 0.6198, -0.4525, -0.0034, -0.4203]) tensor([0.4480, 0.1533, 0.2403, 0.1584]) -Greedy action tensor([ 0.9874, -0.6312, 0.0464, -0.6422]) tensor([0.5604, 0.1111, 0.2187, 0.1098]) -Greedy action tensor([ 0.5303, -0.2570, -0.0798, -0.3211]) tensor([0.4123, 0.1876, 0.2240, 0.1760]) -Greedy action tensor([ 0.7914, -0.1946, 0.0552, -0.4456]) tensor([0.4668, 0.1741, 0.2236, 0.1355]) -Greedy action tensor([ 1.0294, -0.9206, -0.0564, -0.4453]) tensor([0.5852, 0.0833, 0.1976, 0.1339]) -Greedy action tensor([ 1.0058, -0.5374, -0.1358, -0.5667]) tensor([0.5745, 0.1228, 0.1835, 0.1192]) -Greedy action tensor([ 0.6590, -0.4599, -0.0846, -0.2033]) tensor([0.4496, 0.1469, 0.2137, 0.1898]) -Greedy action tensor([ 1.0959, -0.5449, -0.0694, -0.8332]) tensor([0.6057, 0.1174, 0.1889, 0.0880]) -Greedy action tensor([ 0.6862, -0.5101, -0.1610, -0.2130]) tensor([0.4678, 0.1414, 0.2005, 0.1903]) -Greedy action tensor([ 0.4718, 0.0844, -0.0951, 0.0448]) tensor([0.3450, 0.2342, 0.1957, 0.2251]) -Greedy action tensor([ 0.7819, -0.6206, 0.0513, -0.3367]) tensor([0.4868, 0.1197, 0.2344, 0.1590]) -Greedy action tensor([ 1.1256, -0.1056, -0.0799, -0.4529]) tensor([0.5562, 0.1624, 0.1666, 0.1148]) -Greedy action tensor([ 0.7766, -0.5521, -0.0652, -0.4398]) tensor([0.5020, 0.1329, 0.2163, 0.1487]) -Greedy action tensor([ 0.4561, -0.5190, -0.2444, -0.2052]) tensor([0.4185, 0.1578, 0.2077, 0.2160]) -Greedy action tensor([ 0.9524, -0.3264, -0.0385, -0.1999]) tensor([0.5088, 0.1416, 0.1889, 0.1607]) -Greedy action tensor([ 0.7576, -0.1682, -0.0135, -0.1787]) tensor([0.4443, 0.1760, 0.2055, 0.1742]) -Greedy action tensor([ 0.9853, -0.4321, -0.0867, -0.2285]) tensor([0.5314, 0.1288, 0.1819, 0.1579]) -Greedy action tensor([ 0.6529, -0.1890, -0.0444, -0.0582]) tensor([0.4132, 0.1781, 0.2058, 0.2029]) -Greedy action tensor([ 0.7713, -0.6896, 0.1631, -0.2563]) tensor([0.4685, 0.1087, 0.2551, 0.1677]) -Greedy action tensor([ 0.5477, -0.5098, -0.0623, -0.2331]) tensor([0.4258, 0.1479, 0.2313, 0.1950]) -Greedy action tensor([ 0.5258, -0.1023, -0.1345, -0.0999]) tensor([0.3868, 0.2064, 0.1999, 0.2069]) -Greedy action tensor([ 0.7002, -0.3951, -0.0360, -0.1366]) tensor([0.4451, 0.1489, 0.2132, 0.1928]) -Greedy action tensor([ 0.9210, -0.3018, -0.1050, -0.2211]) tensor([0.5071, 0.1493, 0.1818, 0.1618]) -Greedy action tensor([ 0.7900, -0.6281, 0.0787, -0.1624]) tensor([0.4719, 0.1143, 0.2317, 0.1821]) -Greedy action tensor([ 0.9385, -0.4228, -0.0270, -0.1200]) tensor([0.5040, 0.1292, 0.1919, 0.1749]) -Greedy action tensor([ 1.2425, -0.7603, 0.0089, -0.5884]) tensor([0.6303, 0.0851, 0.1836, 0.1010]) -Greedy action tensor([ 0.6916, -0.4388, -0.1322, -0.2835]) tensor([0.4676, 0.1510, 0.2051, 0.1763]) -Greedy action tensor([ 0.6447, -0.4031, -0.1262, -0.3049]) tensor([0.4545, 0.1594, 0.2102, 0.1759]) -Greedy action tensor([ 0.9866, -0.3694, -0.1343, -0.4741]) tensor([0.5507, 0.1419, 0.1795, 0.1278]) -Greedy action tensor([ 0.9035, -0.6237, -0.0995, -0.3627]) tensor([0.5360, 0.1164, 0.1966, 0.1511]) -Greedy action tensor([ 0.7008, -0.3392, 0.0227, -0.1881]) tensor([0.4401, 0.1556, 0.2234, 0.1809]) -Greedy action tensor([ 0.5736, -0.1907, -0.0545, -0.2389]) tensor([0.4093, 0.1906, 0.2184, 0.1816]) -Greedy action tensor([ 0.4789, -0.0895, 0.0046, -0.1932]) tensor([0.3705, 0.2098, 0.2305, 0.1892]) -Greedy action tensor([ 0.3552, -0.5182, -0.3288, -0.0939]) tensor([0.3906, 0.1631, 0.1971, 0.2493]) -Greedy action tensor([ 0.5045, -0.2249, 0.1140, -0.4407]) tensor([0.3925, 0.1893, 0.2656, 0.1525]) -Greedy action tensor([ 0.6713, -0.1919, -0.2393, -0.2414]) tensor([0.4493, 0.1895, 0.1808, 0.1804]) -Greedy action tensor([ 1.0537, -0.7007, 0.1247, -0.3777]) tensor([0.5534, 0.0957, 0.2186, 0.1323]) -Greedy action tensor([ 0.4331, -0.2232, -0.0551, -0.4049]) tensor([0.3898, 0.2022, 0.2393, 0.1686]) -Greedy action tensor([ 1.1047, -0.6496, -0.0414, -0.8334]) tensor([0.6117, 0.1058, 0.1944, 0.0881]) -Greedy action tensor([-0.2853, 1.0609, 0.0334, 0.2028]) tensor([0.1274, 0.4897, 0.1753, 0.2076]) -Greedy action tensor([-1.1255, -0.5330, 0.7323, 0.9600]) tensor([0.0579, 0.1047, 0.3712, 0.4661]) -Greedy action tensor([-1.8927, -0.3286, 0.6271, -0.1528]) tensor([0.0418, 0.1999, 0.5199, 0.2383]) -Greedy action tensor([-1.6762, 0.3662, 0.5013, -0.2345]) tensor([0.0460, 0.3543, 0.4055, 0.1943]) -Greedy action tensor([-1.8961, -0.4502, 0.6415, -0.1605]) tensor([0.0424, 0.1802, 0.5367, 0.2407]) -Greedy action tensor([-1.6943, -0.3283, 0.5987, -0.0491]) tensor([0.0500, 0.1959, 0.4951, 0.2590]) -Greedy action tensor([-1.8398, -0.2492, 0.6015, -0.1158]) tensor([0.0435, 0.2133, 0.4994, 0.2438]) -Greedy action tensor([-1.8548, -0.4297, 0.6205, -0.1344]) tensor([0.0442, 0.1837, 0.5252, 0.2469]) -Greedy action tensor([-1.7046, -0.4723, 0.5473, -0.0604]) tensor([0.0523, 0.1794, 0.4974, 0.2709]) -Greedy action tensor([-1.7485, 0.3084, 0.4814, -0.1264]) tensor([0.0431, 0.3374, 0.4011, 0.2184]) -Greedy action tensor([-1.9045, -0.4357, 0.6483, -0.1564]) tensor([0.0418, 0.1815, 0.5367, 0.2400]) -Greedy action tensor([-1.8403, -0.1587, 0.5793, -0.1191]) tensor([0.0431, 0.2316, 0.4844, 0.2409]) -Greedy action tensor([-1.6678, -0.5329, 0.5304, -0.0997]) tensor([0.0558, 0.1736, 0.5028, 0.2678]) -Greedy action tensor([-0.7665, 0.6431, 0.0785, 0.0549]) tensor([0.1031, 0.4223, 0.2401, 0.2345]) -Greedy action tensor([-1.9176, -0.4448, 0.6566, -0.1658]) tensor([0.0412, 0.1799, 0.5411, 0.2378]) -Greedy action tensor([-1.9252, -0.4468, 0.6599, -0.1717]) tensor([0.0409, 0.1796, 0.5431, 0.2364]) -Greedy action tensor([-1.3446e+00, 1.1775e-03, 3.1681e-01, 4.2963e-02]) tensor([0.0709, 0.2722, 0.3732, 0.2838]) -Greedy action tensor([-1.8374, -0.3806, 0.6176, -0.1174]) tensor([0.0444, 0.1906, 0.5171, 0.2480]) -Greedy action tensor([-1.6875, -0.2561, 0.5151, -0.0914]) tensor([0.0522, 0.2183, 0.4721, 0.2574]) -Greedy action tensor([-1.8504, -0.4251, 0.6517, -0.1337]) tensor([0.0436, 0.1814, 0.5323, 0.2427]) -Greedy action tensor([-1.9084, -0.4370, 0.6511, -0.1638]) tensor([0.0417, 0.1814, 0.5385, 0.2384]) -Greedy action tensor([-1.8910, -0.4236, 0.6414, -0.1521]) tensor([0.0424, 0.1837, 0.5329, 0.2410]) -Greedy action tensor([-1.0831, -0.5509, 0.2894, 0.1769]) tensor([0.0983, 0.1674, 0.3878, 0.3465]) -Greedy action tensor([-0.7227, 0.8006, -0.0617, -0.0708]) tensor([0.1059, 0.4858, 0.2051, 0.2032]) -Greedy action tensor([-1.9215, -0.4163, 0.6545, -0.1682]) tensor([0.0409, 0.1845, 0.5382, 0.2364]) -Greedy action tensor([-1.6537, -0.2640, 0.6555, 0.1099]) tensor([0.0478, 0.1919, 0.4813, 0.2789]) -Greedy action tensor([-1.9282, -0.4178, 0.6582, -0.1714]) tensor([0.0406, 0.1841, 0.5398, 0.2355]) -Greedy action tensor([-1.8595, -0.3443, 0.6305, -0.1211]) tensor([0.0429, 0.1953, 0.5177, 0.2441]) -Greedy action tensor([-1.5465, -0.4320, 0.4525, -0.0042]) tensor([0.0621, 0.1893, 0.4583, 0.2903]) -Greedy action tensor([-1.4881, 0.3652, 0.3180, 0.1440]) tensor([0.0538, 0.3434, 0.3276, 0.2753]) -Greedy action tensor([-1.7971, -0.4348, 0.5943, -0.1280]) tensor([0.0473, 0.1847, 0.5169, 0.2510]) -Greedy action tensor([-1.6413, -0.3524, 0.5444, -0.5020]) tensor([0.0601, 0.2179, 0.5343, 0.1877]) -Greedy action tensor([-1.8904, -0.3373, 0.6265, -0.1417]) tensor([0.0419, 0.1981, 0.5192, 0.2408]) -Greedy action tensor([-1.8923, -0.4383, 0.6414, -0.1554]) tensor([0.0424, 0.1817, 0.5348, 0.2411]) -Greedy action tensor([-1.9160, -0.4218, 0.6538, -0.1666]) tensor([0.0412, 0.1836, 0.5382, 0.2370]) -Greedy action tensor([-1.7429, 0.1610, 0.4818, -0.0883]) tensor([0.0451, 0.3024, 0.4168, 0.2357]) -Greedy action tensor([-0.9598, 0.1981, 0.2334, -0.0449]) tensor([0.1002, 0.3191, 0.3305, 0.2502]) -Greedy action tensor([-1.4394, 0.0849, 0.3067, 0.1957]) tensor([0.0608, 0.2791, 0.3484, 0.3118]) -Greedy action tensor([-1.8483, -0.3785, 0.6231, -0.1152]) tensor([0.0438, 0.1903, 0.5182, 0.2477]) -Greedy action tensor([-1.8282, -0.2986, 0.6341, -0.1038]) tensor([0.0436, 0.2011, 0.5110, 0.2443]) -Greedy action tensor([-1.8325, -0.4527, 0.6061, -0.1244]) tensor([0.0456, 0.1811, 0.5220, 0.2514]) -Greedy action tensor([-1.9328, -0.4516, 0.6830, -0.1540]) tensor([0.0400, 0.1759, 0.5472, 0.2369]) -Greedy action tensor([-1.8565, -0.4076, 0.6635, -0.1145]) tensor([0.0427, 0.1820, 0.5312, 0.2440]) -Greedy action tensor([-1.8169, -0.4304, 0.5997, -0.1354]) tensor([0.0463, 0.1854, 0.5193, 0.2490]) -Greedy action tensor([-1.8958, -0.3750, 0.6414, -0.1526]) tensor([0.0418, 0.1912, 0.5283, 0.2388]) -Greedy action tensor([-0.5837, 0.4121, 0.1012, 0.0831]) tensor([0.1309, 0.3544, 0.2597, 0.2550]) -Greedy action tensor([-1.9211, -0.4488, 0.6597, -0.1660]) tensor([0.0411, 0.1790, 0.5424, 0.2375]) -Greedy action tensor([-0.1082, 1.0036, 0.0753, 0.0364]) tensor([0.1563, 0.4752, 0.1878, 0.1807]) -Greedy action tensor([-1.2118, 0.3288, 0.2501, 0.1866]) tensor([0.0713, 0.3327, 0.3075, 0.2886]) -Greedy action tensor([-1.4385, -0.5814, 0.4275, 0.0863]) tensor([0.0694, 0.1635, 0.4484, 0.3187]) -Greedy action tensor([-1.8862, -0.3180, 0.6199, -0.1579]) tensor([0.0422, 0.2026, 0.5175, 0.2377]) -Greedy action tensor([-1.6987, 0.3672, 0.4423, -0.0715]) tensor([0.0445, 0.3509, 0.3783, 0.2263]) -Greedy action tensor([0.2701, 0.8350, 0.2603, 1.2979]) tensor([0.1528, 0.2688, 0.1513, 0.4271]) -Greedy action tensor([-1.7295, -0.3697, 0.5591, -0.0935]) tensor([0.0503, 0.1958, 0.4957, 0.2581]) -Greedy action tensor([-1.9249, -0.4218, 0.6541, -0.1713]) tensor([0.0409, 0.1838, 0.5391, 0.2362]) -Greedy action tensor([-1.6548, 0.0597, 0.4654, -0.0725]) tensor([0.0506, 0.2812, 0.4218, 0.2464]) -Greedy action tensor([-0.8350, -0.3327, 0.5643, 0.7138]) tensor([0.0876, 0.1448, 0.3551, 0.4124]) -Greedy action tensor([-1.8123, -0.4405, 0.5996, -0.1182]) tensor([0.0464, 0.1830, 0.5179, 0.2526]) -Greedy action tensor([-0.9055, -0.1381, 0.1524, -0.3970]) tensor([0.1299, 0.2798, 0.3742, 0.2160]) -Greedy action tensor([-1.9257, -0.4662, 0.6750, -0.1650]) tensor([0.0407, 0.1750, 0.5478, 0.2365]) -Greedy action tensor([-1.7172, -0.4091, 0.7066, -0.0672]) tensor([0.0472, 0.1745, 0.5326, 0.2457]) -Greedy action tensor([-1.8895, -0.4142, 0.6338, -0.1548]) tensor([0.0425, 0.1860, 0.5304, 0.2411]) -Greedy action tensor([-1.3495, -0.5576, 0.6210, -0.1325]) tensor([0.0727, 0.1605, 0.5214, 0.2454]) -Greedy action tensor([-1.4243, 0.8018, 0.3173, -0.0133]) tensor([0.0498, 0.4616, 0.2843, 0.2043]) -Greedy action tensor([-0.9635, -0.3256, 0.2542, -0.0099]) tensor([0.1128, 0.2134, 0.3811, 0.2927]) -Greedy action tensor([-1.9127, -0.4192, 0.6472, -0.1664]) tensor([0.0415, 0.1846, 0.5362, 0.2377]) -Greedy action tensor([-1.8275, -0.2293, 0.5923, -0.1131]) tensor([0.0440, 0.2174, 0.4944, 0.2442]) -Greedy action tensor([-1.6130, -0.1373, 0.4608, -0.0573]) tensor([0.0553, 0.2421, 0.4403, 0.2623]) -Greedy action tensor([-1.4758, -0.2275, 0.4420, -0.0709]) tensor([0.0651, 0.2268, 0.4430, 0.2652]) -Greedy action tensor([-1.7414, 0.0660, 0.5021, -0.1298]) tensor([0.0464, 0.2831, 0.4378, 0.2327]) -Greedy action tensor([-1.5195, -0.0776, 0.4336, -0.0681]) tensor([0.0604, 0.2555, 0.4260, 0.2580]) -Greedy action tensor([-1.8185, -0.0203, 0.5396, -0.1064]) tensor([0.0432, 0.2609, 0.4566, 0.2393]) -Greedy action tensor([-1.9336, -0.4416, 0.6607, -0.1748]) tensor([0.0406, 0.1804, 0.5433, 0.2356]) -Greedy action tensor([-1.4263, -0.2742, 0.4169, 0.1611]) tensor([0.0650, 0.2059, 0.4109, 0.3182]) -Greedy action tensor([-1.9253, -0.4243, 0.6588, -0.1615]) tensor([0.0407, 0.1826, 0.5393, 0.2375]) -Greedy action tensor([-1.8981, -0.3682, 0.6408, -0.1555]) tensor([0.0417, 0.1924, 0.5278, 0.2380]) -Greedy action tensor([-1.6667e+00, 2.1545e-01, 4.2985e-01, 1.3566e-04]) tensor([0.0476, 0.3127, 0.3875, 0.2521]) -Greedy action tensor([-1.8662, -0.3689, 0.6264, -0.1409]) tensor([0.0431, 0.1929, 0.5218, 0.2422]) -Greedy action tensor([-1.5870, -0.0048, 0.4605, 0.1113]) tensor([0.0524, 0.2550, 0.4061, 0.2864]) -Greedy action tensor([-1.7176, -0.4354, 0.5589, -0.0636]) tensor([0.0511, 0.1841, 0.4977, 0.2671]) -Greedy action tensor([-1.7429, -0.1669, 0.5640, -0.0630]) tensor([0.0471, 0.2276, 0.4728, 0.2525]) -Greedy action tensor([ 0.0210, -0.1855, 0.1444, -0.8328]) tensor([0.2967, 0.2413, 0.3357, 0.1263]) -Greedy action tensor([-0.6743, -0.0123, 0.1493, -0.6013]) tensor([0.1589, 0.3081, 0.3621, 0.1709]) -Greedy action tensor([ 0.5250, -0.8020, 0.2041, 0.3032]) tensor([0.3582, 0.0950, 0.2599, 0.2869]) -Greedy action tensor([ 0.1674, -0.2798, -0.0784, -0.4884]) tensor([0.3401, 0.2174, 0.2660, 0.1765]) -Greedy action tensor([-0.3757, -1.0489, -0.1046, -0.6911]) tensor([0.2816, 0.1436, 0.3693, 0.2054]) -Greedy action tensor([ 0.2787, -0.5126, -0.6633, -0.9471]) tensor([0.4680, 0.2121, 0.1825, 0.1374]) -Greedy action tensor([-0.4684, -0.7129, -0.7605, -0.2303]) tensor([0.2632, 0.2062, 0.1966, 0.3340]) -Greedy action tensor([ 0.4036, -0.4224, -0.0232, 0.1279]) tensor([0.3509, 0.1536, 0.2290, 0.2664]) -Greedy action tensor([ 0.3071, -1.1287, -0.6445, 0.0532]) tensor([0.4167, 0.0991, 0.1609, 0.3233]) -Greedy action tensor([ 0.5802, -0.0063, -0.0106, 0.2827]) tensor([0.3505, 0.1950, 0.1942, 0.2603]) -Greedy action tensor([-0.6475, -1.0396, 0.8838, -1.1965]) tensor([0.1454, 0.0982, 0.6724, 0.0840]) -Greedy action tensor([ 0.2957, -0.8387, -0.0741, 0.0061]) tensor([0.3622, 0.1165, 0.2502, 0.2711]) -Greedy action tensor([-1.4982, -0.5624, 0.5212, -0.5172]) tensor([0.0727, 0.1854, 0.5479, 0.1940]) -Greedy action tensor([ 0.0490, -0.1575, -0.4808, 0.0367]) tensor([0.2950, 0.2400, 0.1737, 0.2914]) -Greedy action tensor([ 1.0511, -1.1262, -0.0916, 1.3515]) tensor([0.3594, 0.0407, 0.1146, 0.4853]) -Greedy action tensor([-0.4318, -0.0970, -1.0935, 0.3302]) tensor([0.1978, 0.2764, 0.1020, 0.4238]) -Greedy action tensor([-0.4102, -0.9853, -0.0616, -0.4550]) tensor([0.2541, 0.1429, 0.3600, 0.2429]) -Greedy action tensor([1.1621, 0.7925, 0.1383, 1.1088]) tensor([0.3335, 0.2305, 0.1198, 0.3162]) -Greedy action tensor([ 0.7532, -1.0922, 1.4418, -0.3769]) tensor([0.2880, 0.0455, 0.5734, 0.0930]) -Greedy action tensor([ 0.2241, -0.9001, 0.9294, -0.0059]) tensor([0.2413, 0.0784, 0.4885, 0.1917]) -Greedy action tensor([-0.0791, -1.6167, -0.2328, 0.7255]) tensor([0.2321, 0.0499, 0.1991, 0.5190]) -Greedy action tensor([ 0.6518, -1.0761, -0.7679, 1.0120]) tensor([0.3505, 0.0623, 0.0847, 0.5025]) -Greedy action tensor([ 1.7593, -0.0440, 0.7067, 1.1958]) tensor([0.4801, 0.0791, 0.1676, 0.2733]) -Greedy action tensor([-0.1385, -0.8204, -0.6063, -0.8645]) tensor([0.3823, 0.1933, 0.2394, 0.1850]) -Greedy action tensor([ 1.5354, -0.2609, -0.2586, 0.3013]) tensor([0.6160, 0.1022, 0.1024, 0.1793]) -Greedy action tensor([ 0.4445, -0.3722, -0.9255, 0.3943]) tensor([0.3778, 0.1669, 0.0960, 0.3593]) -Greedy action tensor([ 0.9403, -0.3818, -0.0535, -0.1422]) tensor([0.5062, 0.1349, 0.1874, 0.1715]) -Greedy action tensor([ 0.4689, -1.4016, -0.2569, 1.1890]) tensor([0.2708, 0.0417, 0.1311, 0.5564]) -Greedy action tensor([ 0.2694, -1.9774, 0.0972, -0.1144]) tensor([0.3804, 0.0402, 0.3202, 0.2592]) -Greedy action tensor([ 0.4896, -1.3047, 0.7754, -0.2226]) tensor([0.3347, 0.0556, 0.4455, 0.1642]) -Greedy action tensor([ 0.5732, 0.0488, 0.5810, -0.7140]) tensor([0.3477, 0.2058, 0.3505, 0.0960]) -Greedy action tensor([-0.5110, -0.8865, -0.6210, 0.2607]) tensor([0.2107, 0.1447, 0.1888, 0.4558]) -Greedy action tensor([ 0.6128, -0.5033, -1.0133, 0.0750]) tensor([0.4743, 0.1554, 0.0933, 0.2770]) -Greedy action tensor([ 0.1316, -0.6593, -0.6580, 0.1210]) tensor([0.3452, 0.1565, 0.1567, 0.3416]) -Greedy action tensor([-0.1908, -0.2064, -0.2231, -0.7662]) tensor([0.2845, 0.2801, 0.2754, 0.1600]) -Greedy action tensor([ 0.4006, 0.1768, -0.3780, 0.2376]) tensor([0.3217, 0.2572, 0.1477, 0.2734]) -Greedy action tensor([ 0.2042, -0.3031, -0.0938, 0.0850]) tensor([0.3094, 0.1863, 0.2297, 0.2746]) -Greedy action tensor([-1.0457, -0.0451, -0.7364, -0.2969]) tensor([0.1390, 0.3779, 0.1893, 0.2938]) -Greedy action tensor([-1.0720, -1.2926, 0.9261, -0.2605]) tensor([0.0875, 0.0702, 0.6453, 0.1970]) -Greedy action tensor([-0.6246, -0.5767, 0.4821, -0.4112]) tensor([0.1584, 0.1662, 0.4792, 0.1961]) -Greedy action tensor([-0.2646, -0.9115, -1.0193, 0.4668]) tensor([0.2456, 0.1286, 0.1155, 0.5103]) -Greedy action tensor([-0.1629, -1.1864, 0.4165, -1.1635]) tensor([0.2847, 0.1023, 0.5083, 0.1047]) -Greedy action tensor([-0.5706, -1.1565, 0.5303, -0.8421]) tensor([0.1878, 0.1045, 0.5646, 0.1431]) -Greedy action tensor([ 0.8593, -0.1821, 0.3906, -0.7399]) tensor([0.4586, 0.1618, 0.2869, 0.0926]) -Greedy action tensor([ 0.8357, 0.2802, -0.4871, 0.2284]) tensor([0.4193, 0.2406, 0.1117, 0.2284]) -Greedy action tensor([-0.0233, -0.6168, 1.2161, -0.0678]) tensor([0.1677, 0.0926, 0.5792, 0.1604]) -Greedy action tensor([-0.6315, -0.5498, 0.2004, -0.7544]) tensor([0.1898, 0.2060, 0.4362, 0.1679]) -Greedy action tensor([ 0.0075, -0.1675, 0.1234, -0.5402]) tensor([0.2824, 0.2371, 0.3171, 0.1633]) -Greedy action tensor([-0.0872, -0.0735, -0.0309, -0.8269]) tensor([0.2818, 0.2857, 0.2981, 0.1345]) -Greedy action tensor([ 0.3124, 0.1844, 1.3291, -0.2511]) tensor([0.1918, 0.1688, 0.5302, 0.1092]) -Greedy action tensor([-0.1304, 0.2658, -0.1420, -0.3968]) tensor([0.2358, 0.3504, 0.2331, 0.1807]) -Greedy action tensor([-1.5928, -1.7449, 1.6909, -0.7690]) tensor([0.0325, 0.0279, 0.8657, 0.0740]) -Greedy action tensor([-1.3391, 0.0221, -1.2122, 0.7579]) tensor([0.0705, 0.2751, 0.0801, 0.5743]) -Greedy action tensor([ 0.3585, -0.5392, 1.1894, -0.8833]) tensor([0.2505, 0.1021, 0.5750, 0.0724]) -Greedy action tensor([-0.5865, -0.3173, 0.3653, -1.1324]) tensor([0.1825, 0.2389, 0.4728, 0.1057]) -Greedy action tensor([ 1.1911, -1.1142, 0.5384, 0.9397]) tensor([0.4170, 0.0416, 0.2171, 0.3243]) -Greedy action tensor([-0.3525, -0.7115, -0.3892, -1.0725]) tensor([0.3175, 0.2218, 0.3061, 0.1546]) -Greedy action tensor([-0.9401, -0.9522, -0.6199, 0.4134]) tensor([0.1382, 0.1365, 0.1903, 0.5349]) -Greedy action tensor([-0.3004, -0.6539, -0.3058, -1.1910]) tensor([0.3218, 0.2260, 0.3201, 0.1321]) -Greedy action tensor([ 0.5042, 0.1011, 0.9706, -0.5429]) tensor([0.2768, 0.1849, 0.4412, 0.0971]) -Greedy action tensor([-0.5369, 0.1571, -0.7035, -0.3744]) tensor([0.1990, 0.3984, 0.1685, 0.2341]) -Greedy action tensor([ 0.3214, -0.9899, 0.3568, -1.0745]) tensor([0.3917, 0.1055, 0.4058, 0.0970]) -Greedy action tensor([ 0.5254, -0.8737, -0.0640, -0.0394]) tensor([0.4219, 0.1041, 0.2340, 0.2399]) -Greedy action tensor([ 0.6868, -1.6320, -0.9648, 0.7206]) tensor([0.4302, 0.0423, 0.0825, 0.4450]) -Greedy action tensor([ 1.2096, -0.5109, 0.4483, 0.2412]) tensor([0.4936, 0.0884, 0.2306, 0.1874]) -Greedy action tensor([ 0.5829, -0.2265, 0.0559, 0.1020]) tensor([0.3768, 0.1677, 0.2225, 0.2330]) -Greedy action tensor([-0.7323, -0.0979, -0.7208, 0.5012]) tensor([0.1364, 0.2573, 0.1380, 0.4683]) -Greedy action tensor([-0.3505, -1.0835, -0.2619, -1.1868]) tensor([0.3326, 0.1598, 0.3634, 0.1441]) -Greedy action tensor([-0.0182, 0.0479, -0.3261, 0.1366]) tensor([0.2518, 0.2690, 0.1851, 0.2940]) -Greedy action tensor([-0.4655, -0.4155, -1.1995, -0.2981]) tensor([0.2693, 0.2831, 0.1293, 0.3184]) -Greedy action tensor([ 0.4974, -0.5396, -0.7506, -0.2331]) tensor([0.4710, 0.1670, 0.1352, 0.2269]) -Greedy action tensor([-0.5286, -0.8815, 1.0633, -1.9152]) tensor([0.1457, 0.1023, 0.7156, 0.0364]) -Greedy action tensor([-0.9735, -0.3542, 0.9098, -1.6347]) tensor([0.1005, 0.1867, 0.6609, 0.0519]) -Greedy action tensor([ 0.4210, -0.4896, -0.2920, -0.0412]) tensor([0.3964, 0.1595, 0.1943, 0.2497]) -Greedy action tensor([ 0.1916, -0.2195, -0.1001, 0.1337]) tensor([0.2982, 0.1977, 0.2227, 0.2814]) -Greedy action tensor([ 0.9163, -2.1329, -0.1745, 0.7231]) tensor([0.4530, 0.0215, 0.1522, 0.3734]) -Greedy action tensor([ 1.0148, -0.2167, 1.2739, -0.5749]) tensor([0.3582, 0.1045, 0.4642, 0.0731]) -Greedy action tensor([ 0.1834, -0.1793, -0.8086, 0.1630]) tensor([0.3283, 0.2284, 0.1217, 0.3216]) -Greedy action tensor([1.7454, 0.3534, 0.4627, 1.0510]) tensor([0.4938, 0.1227, 0.1369, 0.2466]) -Greedy action tensor([ 0.6541, -0.1443, 0.7091, -0.8306]) tensor([0.3659, 0.1647, 0.3866, 0.0829]) -Greedy action tensor([-0.3662, 0.4666, -0.1233, -1.0823]) tensor([0.1975, 0.4542, 0.2518, 0.0965]) -Greedy action tensor([ 0.8246, 0.0137, -0.4544, -0.1745]) tensor([0.4783, 0.2126, 0.1331, 0.1761]) -Greedy action tensor([ 1.5734, -0.0130, -0.5573, 0.2300]) tensor([0.6312, 0.1292, 0.0750, 0.1647]) -Greedy action tensor([ 1.3769, -0.2520, -0.2615, 0.0693]) tensor([0.6021, 0.1181, 0.1170, 0.1628]) -Greedy action tensor([ 1.4769, -0.2330, -0.7397, -0.0920]) tensor([0.6675, 0.1207, 0.0727, 0.1390]) -Greedy action tensor([ 2.0379, -0.0946, -0.1676, -0.4246]) tensor([0.7611, 0.0902, 0.0839, 0.0649]) -Greedy action tensor([ 1.3043, -0.4174, -0.5789, 0.2088]) tensor([0.6005, 0.1074, 0.0913, 0.2008]) -Greedy action tensor([ 1.3390, -0.5399, -0.1504, 0.1232]) tensor([0.5971, 0.0912, 0.1347, 0.1770]) -Greedy action tensor([ 1.1453, 0.1202, -0.7319, 0.1978]) tensor([0.5265, 0.1889, 0.0806, 0.2041]) -Greedy action tensor([ 1.5502, -0.4205, -0.3546, 0.0302]) tensor([0.6636, 0.0925, 0.0988, 0.1451]) -Greedy action tensor([ 1.2525, -0.4025, -0.5208, 0.2123]) tensor([0.5834, 0.1115, 0.0990, 0.2061]) -Greedy action tensor([ 1.2985, -0.1532, -0.8886, 0.4824]) tensor([0.5591, 0.1309, 0.0628, 0.2472]) -Greedy action tensor([ 1.6851, -0.9997, -0.5347, 0.4471]) tensor([0.6817, 0.0465, 0.0741, 0.1977]) -Greedy action tensor([ 1.0349, 0.1703, -0.9648, 0.3237]) tensor([0.4884, 0.2057, 0.0661, 0.2398]) -Greedy action tensor([ 1.3963, -0.5641, -0.5258, 0.3790]) tensor([0.6066, 0.0854, 0.0887, 0.2193]) -Greedy action tensor([ 1.1008e+00, -7.1807e-01, -4.5797e-04, 9.6855e-03]) tensor([0.5463, 0.0886, 0.1816, 0.1835]) -Greedy action tensor([ 1.1011, -0.2246, -0.1359, -0.0106]) tensor([0.5305, 0.1409, 0.1540, 0.1745]) -Greedy action tensor([ 2.0227, -0.5360, -0.7405, 0.4539]) tensor([0.7414, 0.0574, 0.0468, 0.1544]) -Greedy action tensor([ 1.7283, -1.0955, -0.5761, 0.3236]) tensor([0.7119, 0.0423, 0.0711, 0.1747]) -Greedy action tensor([ 1.8561, -0.2520, 0.0239, 0.1519]) tensor([0.6833, 0.0830, 0.1094, 0.1243]) -Greedy action tensor([ 1.1799, 0.0084, -0.3709, 0.4094]) tensor([0.5038, 0.1561, 0.1069, 0.2332]) -Greedy action tensor([ 1.0906, -0.3979, -0.6410, 0.1436]) tensor([0.5585, 0.1261, 0.0988, 0.2166]) -Greedy action tensor([ 1.6899, -0.2966, -0.7256, 0.2975]) tensor([0.6780, 0.0930, 0.0606, 0.1685]) -Greedy action tensor([ 1.3852, -0.9167, -0.4036, 0.6361]) tensor([0.5747, 0.0575, 0.0961, 0.2717]) -Greedy action tensor([ 2.1174, -1.0861, -0.0792, 0.9348]) tensor([0.6857, 0.0279, 0.0762, 0.2102]) -Greedy action tensor([ 1.0225, -0.3887, -0.3544, 0.6312]) tensor([0.4603, 0.1122, 0.1162, 0.3113]) -Greedy action tensor([ 1.2705, 0.1475, -0.5677, 0.1359]) tensor([0.5537, 0.1801, 0.0881, 0.1780]) -Greedy action tensor([ 1.1319, -0.4386, -0.5834, -0.0309]) tensor([0.5881, 0.1223, 0.1058, 0.1838]) -Greedy action tensor([ 1.0922, -0.3574, -0.7758, 0.1910]) tensor([0.5570, 0.1307, 0.0860, 0.2262]) -Greedy action tensor([ 1.3844, -0.2278, -0.5571, 0.4238]) tensor([0.5795, 0.1156, 0.0832, 0.2218]) -Greedy action tensor([ 0.9178, -0.6255, -0.2381, 0.3879]) tensor([0.4723, 0.1009, 0.1487, 0.2780]) -Greedy action tensor([2.1887, 0.9248, 0.0173, 0.4242]) tensor([0.6378, 0.1802, 0.0727, 0.1092]) -Greedy action tensor([ 0.9392, -0.3022, -1.1939, 0.2849]) tensor([0.5189, 0.1499, 0.0615, 0.2697]) -Greedy action tensor([ 1.2763, -0.0083, -0.3023, -0.3611]) tensor([0.5961, 0.1650, 0.1230, 0.1159]) -Greedy action tensor([ 1.5091, -0.0788, -1.0252, 0.0829]) tensor([0.6562, 0.1341, 0.0520, 0.1576]) -Greedy action tensor([ 1.1811, -0.6542, -0.1661, 0.0340]) tensor([0.5757, 0.0919, 0.1497, 0.1828]) -Greedy action tensor([ 1.5966, -1.5485, -0.2291, 0.2492]) tensor([0.6830, 0.0294, 0.1100, 0.1775]) -Greedy action tensor([ 1.5299, -0.4620, -0.7052, 0.1968]) tensor([0.6635, 0.0905, 0.0710, 0.1749]) -Greedy action tensor([ 2.0491, -1.3160, -0.0878, 0.7535]) tensor([0.7011, 0.0242, 0.0827, 0.1919]) -Greedy action tensor([ 0.8538, -0.0674, 0.2096, 0.0048]) tensor([0.4254, 0.1693, 0.2233, 0.1820]) -Greedy action tensor([ 1.7887, -0.3868, -0.3551, 0.1769]) tensor([0.6992, 0.0794, 0.0819, 0.1395]) -Greedy action tensor([ 1.7609, -0.7713, -0.6125, 0.4661]) tensor([0.6913, 0.0549, 0.0644, 0.1894]) -Greedy action tensor([ 0.8666, -0.2383, -0.0910, 0.4293]) tensor([0.4236, 0.1403, 0.1626, 0.2735]) -Greedy action tensor([ 1.5281, -0.6164, -0.4366, 0.0206]) tensor([0.6762, 0.0792, 0.0948, 0.1498]) -Greedy action tensor([ 1.3479, -0.4461, -0.6648, -0.0161]) tensor([0.6429, 0.1069, 0.0859, 0.1643]) -Greedy action tensor([ 0.8539, -0.0409, -0.0143, 0.0871]) tensor([0.4361, 0.1782, 0.1830, 0.2026]) -Greedy action tensor([ 1.6935, -0.5191, -0.3001, 0.3895]) tensor([0.6592, 0.0721, 0.0898, 0.1789]) -Greedy action tensor([ 1.0473, -0.4737, -0.1064, 0.2854]) tensor([0.4998, 0.1092, 0.1577, 0.2333]) -Greedy action tensor([ 1.5810, 0.1030, -0.4965, 0.5508]) tensor([0.5847, 0.1334, 0.0732, 0.2087]) -Greedy action tensor([ 1.9122, -0.3376, -0.3000, 0.2675]) tensor([0.7102, 0.0749, 0.0777, 0.1371]) -Greedy action tensor([ 0.9516, -0.1306, -0.6803, 0.2594]) tensor([0.4914, 0.1665, 0.0961, 0.2460]) -Greedy action tensor([ 1.6506, -0.8135, -0.0382, 0.4645]) tensor([0.6348, 0.0540, 0.1173, 0.1939]) -Greedy action tensor([ 2.0196, -0.8633, -0.5461, 0.1002]) tensor([0.7815, 0.0437, 0.0601, 0.1146]) -Greedy action tensor([ 1.4480, 0.0380, -0.3333, 0.4560]) tensor([0.5607, 0.1369, 0.0944, 0.2079]) -Greedy action tensor([ 1.5707, -0.3986, -0.6529, 0.0858]) tensor([0.6783, 0.0947, 0.0734, 0.1536]) -Greedy action tensor([ 2.3196, -0.7143, -0.4974, 0.8689]) tensor([0.7450, 0.0359, 0.0445, 0.1746]) -Greedy action tensor([ 0.8026, 0.0945, -0.9649, 0.3368]) tensor([0.4365, 0.2150, 0.0745, 0.2740]) -Greedy action tensor([ 1.6521, -0.8124, -0.5246, 0.5638]) tensor([0.6514, 0.0554, 0.0739, 0.2194]) -Greedy action tensor([ 1.2247, 0.0028, -1.1545, 0.4911]) tensor([0.5355, 0.1578, 0.0496, 0.2571]) -Greedy action tensor([ 1.1046, -0.1986, -0.5619, 0.0461]) tensor([0.5532, 0.1503, 0.1045, 0.1920]) -Greedy action tensor([ 1.6754, -0.9247, -0.4732, 0.5296]) tensor([0.6627, 0.0492, 0.0773, 0.2107]) -Greedy action tensor([ 1.7238, -0.7578, -0.1109, 0.7224]) tensor([0.6209, 0.0519, 0.0991, 0.2281]) -Greedy action tensor([ 1.1094, -0.3790, -0.8991, 0.1978]) tensor([0.5676, 0.1281, 0.0762, 0.2281]) -Greedy action tensor([ 1.7452, -0.5866, -0.1638, 0.7611]) tensor([0.6176, 0.0600, 0.0915, 0.2308]) -Greedy action tensor([ 1.3586, -0.4834, -0.3029, 0.0477]) tensor([0.6181, 0.0980, 0.1173, 0.1666]) -Greedy action tensor([ 1.4054, -0.6425, -0.5290, 0.3837]) tensor([0.6122, 0.0790, 0.0885, 0.2204]) -Greedy action tensor([ 1.3171, -0.0608, -0.2419, 0.0285]) tensor([0.5753, 0.1450, 0.1210, 0.1586]) -Greedy action tensor([ 0.9802, 0.0468, -0.9097, 0.2726]) tensor([0.4909, 0.1930, 0.0742, 0.2419]) -Greedy action tensor([ 1.5112, -0.2872, -0.8735, 0.2761]) tensor([0.6458, 0.1069, 0.0595, 0.1878]) -Greedy action tensor([ 2.1246, -0.8068, -0.7861, 0.3629]) tensor([0.7815, 0.0417, 0.0425, 0.1342]) -Greedy action tensor([ 1.6761, -0.6511, -0.1911, 0.3807]) tensor([0.6553, 0.0639, 0.1013, 0.1794]) -Greedy action tensor([ 1.8236, -1.1995, 0.0721, 0.5056]) tensor([0.6712, 0.0327, 0.1165, 0.1797]) -Greedy action tensor([ 1.4907, -0.4358, -0.6826, 0.1110]) tensor([0.6618, 0.0964, 0.0753, 0.1665]) -Greedy action tensor([ 1.7068, -0.4416, -0.2207, 0.4428]) tensor([0.6474, 0.0755, 0.0942, 0.1829]) -Greedy action tensor([ 1.8589, -1.0248, -0.0257, 0.1394]) tensor([0.7210, 0.0403, 0.1095, 0.1292]) -Greedy action tensor([ 1.1396, -0.6041, -0.3445, 0.3566]) tensor([0.5380, 0.0941, 0.1220, 0.2459]) -Greedy action tensor([ 1.2330, -0.0662, -1.3256, 0.4063]) tensor([0.5594, 0.1526, 0.0433, 0.2447]) -Greedy action tensor([ 1.1173, -0.3030, -0.5067, 0.3614]) tensor([0.5240, 0.1266, 0.1033, 0.2461]) -Greedy action tensor([ 2.0011, 0.8696, -0.2545, 0.0616]) tensor([0.6365, 0.2053, 0.0667, 0.0915]) -Greedy action tensor([ 0.9455, -0.5344, -0.2616, 0.1719]) tensor([0.5030, 0.1145, 0.1504, 0.2321]) -Greedy action tensor([ 2.4287, -1.1740, -0.5561, 0.3024]) tensor([0.8354, 0.0228, 0.0422, 0.0996]) -Greedy action tensor([ 0.9798, -0.3421, -0.2866, 0.0747]) tensor([0.5120, 0.1365, 0.1443, 0.2071]) -Greedy action tensor([ 1.7095, -0.6389, -0.6998, 0.2071]) tensor([0.7102, 0.0678, 0.0638, 0.1581]) -Greedy action tensor([ 0.7337, -0.2670, -0.1192, -0.0099]) tensor([0.4407, 0.1620, 0.1878, 0.2095]) -Greedy action tensor([ 1.1905, -0.9706, 0.0997, -0.4998]) tensor([0.6114, 0.0704, 0.2054, 0.1128]) -Greedy action tensor([ 0.4641, 0.1263, -0.0427, 0.0789]) tensor([0.3338, 0.2381, 0.2011, 0.2271]) -Greedy action tensor([ 1.0538, -0.8939, -0.1095, -0.4721]) tensor([0.5979, 0.0853, 0.1868, 0.1300]) -Greedy action tensor([ 0.8433, -0.4405, -0.2363, -0.3167]) tensor([0.5181, 0.1435, 0.1760, 0.1624]) -Greedy action tensor([ 1.1639, -0.5672, 0.1435, -0.5521]) tensor([0.5823, 0.1031, 0.2099, 0.1047]) -Greedy action tensor([ 0.6382, 0.2159, -0.0997, -0.0023]) tensor([0.3758, 0.2464, 0.1797, 0.1981]) -Greedy action tensor([ 0.9482, -0.7668, 0.1120, -0.5287]) tensor([0.5430, 0.0977, 0.2353, 0.1240]) -Greedy action tensor([ 0.5577, -0.3932, -0.0310, -0.3281]) tensor([0.4248, 0.1642, 0.2358, 0.1752]) -Greedy action tensor([ 0.9636, -0.3878, 0.1037, -0.1460]) tensor([0.4971, 0.1287, 0.2104, 0.1639]) -Greedy action tensor([ 0.8020, -0.5202, -0.0381, -0.3080]) tensor([0.4932, 0.1315, 0.2129, 0.1625]) -Greedy action tensor([ 0.6716, -0.2465, 0.0481, -0.0964]) tensor([0.4168, 0.1664, 0.2234, 0.1934]) -Greedy action tensor([ 0.6801, -0.3542, -0.0798, -0.3421]) tensor([0.4581, 0.1628, 0.2142, 0.1648]) -Greedy action tensor([ 0.9171, -0.5719, 0.0755, -0.2981]) tensor([0.5120, 0.1155, 0.2207, 0.1519]) -Greedy action tensor([ 1.1324, -0.8007, -0.0800, -0.5087]) tensor([0.6113, 0.0884, 0.1818, 0.1184]) -Greedy action tensor([ 0.5099, -0.2837, -0.0944, -0.2449]) tensor([0.4051, 0.1832, 0.2213, 0.1904]) -Greedy action tensor([ 0.8104, -0.4818, -0.1446, -0.2948]) tensor([0.5024, 0.1380, 0.1933, 0.1664]) -Greedy action tensor([ 0.8138, -0.8442, 0.0024, -0.3078]) tensor([0.5101, 0.0972, 0.2266, 0.1662]) -Greedy action tensor([ 0.6714, -0.4303, -0.0278, -0.2642]) tensor([0.4501, 0.1496, 0.2237, 0.1766]) -Greedy action tensor([ 0.4722, 0.2287, -0.1251, 0.0674]) tensor([0.3332, 0.2612, 0.1833, 0.2223]) -Greedy action tensor([ 1.1182, -0.4384, -0.2508, -0.1794]) tensor([0.5752, 0.1213, 0.1463, 0.1571]) -Greedy action tensor([ 1.2972, -0.8683, -0.0700, -0.7195]) tensor([0.6655, 0.0763, 0.1696, 0.0886]) -Greedy action tensor([ 0.8999, -0.6409, -0.0143, -0.3539]) tensor([0.5262, 0.1127, 0.2109, 0.1502]) -Greedy action tensor([ 0.7799, -0.1059, 0.0083, -0.7060]) tensor([0.4760, 0.1963, 0.2200, 0.1077]) -Greedy action tensor([ 0.8629, -0.4944, -0.0168, -0.5888]) tensor([0.5245, 0.1350, 0.2176, 0.1228]) -Greedy action tensor([ 1.2918, -0.7377, -0.0373, -0.7221]) tensor([0.6538, 0.0859, 0.1731, 0.0873]) -Greedy action tensor([ 0.4912, 0.0258, -0.2167, -0.0679]) tensor([0.3714, 0.2332, 0.1830, 0.2124]) -Greedy action tensor([ 0.4133, -0.2426, -0.0561, -0.1592]) tensor([0.3692, 0.1916, 0.2309, 0.2083]) -Greedy action tensor([ 0.8183, -0.1543, -0.0310, -0.2283]) tensor([0.4636, 0.1753, 0.1983, 0.1628]) -Greedy action tensor([ 0.7822, -0.4118, -0.0071, -0.2154]) tensor([0.4704, 0.1425, 0.2136, 0.1735]) -Greedy action tensor([ 0.2777, 0.0226, -0.0152, -0.4903]) tensor([0.3350, 0.2596, 0.2500, 0.1554]) -Greedy action tensor([ 0.8158, -0.4088, -0.0712, -0.2410]) tensor([0.4870, 0.1431, 0.2006, 0.1693]) -Greedy action tensor([ 0.9798, -0.8071, 0.0271, -0.5225]) tensor([0.5631, 0.0943, 0.2172, 0.1254]) -Greedy action tensor([ 0.9489, -0.7973, 0.0290, -0.5896]) tensor([0.5594, 0.0976, 0.2229, 0.1201]) -Greedy action tensor([ 1.2363, -0.7253, -0.0895, -0.7718]) tensor([0.6492, 0.0913, 0.1724, 0.0871]) -Greedy action tensor([ 0.5721, -0.2376, -0.0548, -0.2595]) tensor([0.4141, 0.1843, 0.2213, 0.1803]) -Greedy action tensor([ 1.2321, -0.5333, -0.1077, -0.3673]) tensor([0.6116, 0.1047, 0.1602, 0.1236]) -Greedy action tensor([ 0.5581, -0.3645, -0.0116, -0.3028]) tensor([0.4191, 0.1666, 0.2371, 0.1772]) -Greedy action tensor([ 0.6871, -0.1351, 0.0061, -0.1573]) tensor([0.4210, 0.1850, 0.2131, 0.1809]) -Greedy action tensor([ 0.7087, -0.5624, -0.1328, -0.1987]) tensor([0.4728, 0.1326, 0.2038, 0.1908]) -Greedy action tensor([ 1.0566, -0.6099, -0.0546, -0.4964]) tensor([0.5781, 0.1092, 0.1903, 0.1223]) -Greedy action tensor([ 0.9373, -0.5845, -0.0950, -0.4064]) tensor([0.5448, 0.1190, 0.1941, 0.1421]) -Greedy action tensor([ 0.9581, -0.0327, -0.0446, 0.0998]) tensor([0.4625, 0.1717, 0.1697, 0.1961]) -Greedy action tensor([ 1.1457, -0.5199, -0.1659, -0.2872]) tensor([0.5892, 0.1114, 0.1587, 0.1406]) -Greedy action tensor([ 1.0296, -0.8809, 0.0306, -0.4959]) tensor([0.5768, 0.0854, 0.2124, 0.1254]) -Greedy action tensor([ 0.8709, -0.5099, 0.0216, -0.1702]) tensor([0.4921, 0.1237, 0.2105, 0.1737]) -Greedy action tensor([ 0.3715, -0.0924, -0.0106, -0.0438]) tensor([0.3365, 0.2116, 0.2297, 0.2222]) -Greedy action tensor([ 0.5888, -0.3632, -0.0813, -0.0050]) tensor([0.4082, 0.1576, 0.2089, 0.2254]) -Greedy action tensor([ 0.7956, -0.3512, -0.1309, -0.2366]) tensor([0.4831, 0.1535, 0.1913, 0.1721]) -Greedy action tensor([ 0.7435, -0.3917, 0.0643, -0.1964]) tensor([0.4506, 0.1448, 0.2285, 0.1760]) -Greedy action tensor([ 0.8107, -0.3891, 0.0792, -0.4747]) tensor([0.4857, 0.1463, 0.2337, 0.1343]) -Greedy action tensor([ 0.6495, -0.3247, 0.0121, -0.1128]) tensor([0.4214, 0.1591, 0.2228, 0.1967]) -Greedy action tensor([ 1.0626, -0.8661, -0.1347, -0.3724]) tensor([0.5933, 0.0862, 0.1792, 0.1413]) -Greedy action tensor([ 0.5780, -0.3404, -0.1123, -0.0152]) tensor([0.4076, 0.1627, 0.2044, 0.2252]) -Greedy action tensor([ 0.9418, -0.7071, 0.0479, -0.4971]) tensor([0.5439, 0.1046, 0.2225, 0.1290]) -Greedy action tensor([ 1.1466, -0.5372, -0.0144, -0.4879]) tensor([0.5904, 0.1096, 0.1849, 0.1152]) -Greedy action tensor([ 0.4676, 0.1132, -0.0759, 0.0535]) tensor([0.3398, 0.2384, 0.1973, 0.2246]) -Greedy action tensor([ 1.0328, -0.6978, 0.1166, -0.4972]) tensor([0.5575, 0.0988, 0.2230, 0.1207]) -Greedy action tensor([ 0.9498, -0.6511, -0.0174, -0.6802]) tensor([0.5625, 0.1135, 0.2138, 0.1102]) -Greedy action tensor([ 0.6005, -0.0781, -0.0925, -0.0230]) tensor([0.3932, 0.1995, 0.1966, 0.2108]) -Greedy action tensor([ 1.0270, -0.8570, 0.1101, -0.7793]) tensor([0.5827, 0.0886, 0.2330, 0.0957]) -Greedy action tensor([ 0.6255, -0.4879, 0.2473, -0.6271]) tensor([0.4349, 0.1428, 0.2980, 0.1243]) -Greedy action tensor([ 0.7495, -0.3510, -0.0861, -0.2947]) tensor([0.4721, 0.1571, 0.2047, 0.1661]) -Greedy action tensor([ 1.0833, -0.7844, 0.1034, -0.3802]) tensor([0.5678, 0.0877, 0.2131, 0.1314]) -Greedy action tensor([ 0.8446, -0.3642, 0.1151, -0.5290]) tensor([0.4917, 0.1468, 0.2371, 0.1245]) -Greedy action tensor([ 0.6822, -0.3800, -0.0858, -0.1705]) tensor([0.4472, 0.1546, 0.2075, 0.1907]) -Greedy action tensor([ 0.9427, -0.3960, -0.0569, -0.5707]) tensor([0.5404, 0.1417, 0.1989, 0.1190]) -Greedy action tensor([ 0.4631, 0.0431, 0.1662, -0.2077]) tensor([0.3435, 0.2257, 0.2552, 0.1756]) -Greedy action tensor([ 0.4467, -0.4110, 0.0028, -0.4720]) tensor([0.4057, 0.1721, 0.2603, 0.1619]) -Greedy action tensor([ 1.2279, -0.7308, 0.0050, -0.8597]) tensor([0.6413, 0.0904, 0.1888, 0.0795]) -Greedy action tensor([ 0.8539, -0.2349, 0.0295, -0.3875]) tensor([0.4845, 0.1631, 0.2124, 0.1400]) -Greedy action tensor([ 0.1726, 0.0803, -0.1170, -0.1889]) tensor([0.2979, 0.2716, 0.2230, 0.2075]) -Greedy action tensor([ 0.9708, -0.5799, -0.0698, -0.2446]) tensor([0.5371, 0.1139, 0.1897, 0.1593]) -Greedy action tensor([ 0.7859, -0.5524, -0.1558, -0.1555]) tensor([0.4896, 0.1284, 0.1909, 0.1910]) -Greedy action tensor([ 0.4818, -0.4447, -0.0488, -0.4066]) tensor([0.4174, 0.1653, 0.2456, 0.1717]) -Greedy action tensor([ 0.7742, -0.5766, 0.0497, -0.7424]) tensor([0.5094, 0.1320, 0.2468, 0.1118]) -Greedy action tensor([ 1.4014, -0.9382, 0.0087, -0.7106]) tensor([0.6822, 0.0657, 0.1695, 0.0825]) -Greedy action tensor([ 1.4863, -0.8303, -0.2127, -0.6808]) tensor([0.7163, 0.0706, 0.1310, 0.0820]) -Greedy action tensor([ 1.0334, -0.5236, -0.1987, -0.6117]) tensor([0.5898, 0.1243, 0.1720, 0.1138]) -Greedy action tensor([ 0.8067, 0.0814, -0.0682, -0.1253]) tensor([0.4358, 0.2110, 0.1817, 0.1716]) -Greedy action tensor([ 0.9461, -0.6136, 0.1483, -0.5108]) tensor([0.5281, 0.1110, 0.2378, 0.1230]) -Greedy action tensor([-1.9493, -0.4523, 0.6681, -0.1835]) tensor([0.0400, 0.1786, 0.5477, 0.2337]) -Greedy action tensor([-1.9238, -0.4267, 0.6612, -0.1684]) tensor([0.0408, 0.1823, 0.5410, 0.2360]) -Greedy action tensor([-0.0337, 1.0871, -0.0390, 0.2145]) tensor([0.1576, 0.4835, 0.1568, 0.2020]) -Greedy action tensor([-1.8653, -0.3330, 0.6241, -0.1477]) tensor([0.0430, 0.1990, 0.5184, 0.2396]) -Greedy action tensor([-1.9026, -0.4349, 0.6452, -0.1608]) tensor([0.0420, 0.1821, 0.5363, 0.2396]) -Greedy action tensor([-1.5393, -0.1959, 0.6674, -0.1070]) tensor([0.0552, 0.2116, 0.5018, 0.2313]) -Greedy action tensor([-1.4641, -0.2828, 0.6628, 0.0352]) tensor([0.0584, 0.1903, 0.4898, 0.2615]) -Greedy action tensor([-1.7120, -0.4612, 0.5433, -0.0590]) tensor([0.0519, 0.1814, 0.4954, 0.2712]) -Greedy action tensor([-1.2892, -0.6452, 0.3198, 0.2087]) tensor([0.0808, 0.1539, 0.4039, 0.3614]) -Greedy action tensor([-1.3486, -0.4508, 0.3612, 0.1592]) tensor([0.0741, 0.1818, 0.4095, 0.3346]) -Greedy action tensor([-0.4564, 1.0658, 0.0072, 0.4187]) tensor([0.1045, 0.4788, 0.1661, 0.2507]) -Greedy action tensor([-1.6213, -0.3429, 0.5012, -0.0765]) tensor([0.0567, 0.2037, 0.4738, 0.2659]) -Greedy action tensor([-1.9273, -0.4596, 0.6581, -0.1740]) tensor([0.0410, 0.1780, 0.5442, 0.2368]) -Greedy action tensor([-1.5087, -0.4161, 0.4626, -0.0664]) tensor([0.0650, 0.1937, 0.4665, 0.2748]) -Greedy action tensor([-1.8050, -0.3283, 0.5897, -0.1265]) tensor([0.0461, 0.2018, 0.5053, 0.2469]) -Greedy action tensor([-1.5243, 0.2754, 0.3696, -0.0531]) tensor([0.0554, 0.3351, 0.3682, 0.2413]) -Greedy action tensor([-1.8301, -0.2640, 0.5871, -0.1472]) tensor([0.0447, 0.2139, 0.5010, 0.2404]) -Greedy action tensor([-1.8191, -0.4493, 0.6038, -0.1257]) tensor([0.0462, 0.1817, 0.5209, 0.2512]) -Greedy action tensor([-1.8910, -0.4574, 0.6466, -0.1550]) tensor([0.0425, 0.1783, 0.5379, 0.2413]) -Greedy action tensor([-1.8861, -0.3661, 0.6274, -0.1543]) tensor([0.0424, 0.1940, 0.5239, 0.2397]) -Greedy action tensor([-0.8382, 0.2851, 0.1803, -0.1009]) tensor([0.1119, 0.3442, 0.3099, 0.2340]) -Greedy action tensor([-1.9370, -0.4362, 0.6628, -0.1760]) tensor([0.0404, 0.1811, 0.5436, 0.2350]) -Greedy action tensor([-1.9095, -0.4153, 0.6460, -0.1632]) tensor([0.0416, 0.1852, 0.5351, 0.2382]) -Greedy action tensor([-0.6174, 0.0578, 0.1671, 0.1675]) tensor([0.1361, 0.2674, 0.2982, 0.2983]) -Greedy action tensor([-0.9125, 0.4062, 0.1451, 0.2985]) tensor([0.0911, 0.3406, 0.2624, 0.3059]) -Greedy action tensor([-1.8675, -0.3885, 0.6230, -0.1407]) tensor([0.0433, 0.1901, 0.5229, 0.2436]) -Greedy action tensor([-1.5727, -0.2205, 0.4537, -0.0514]) tensor([0.0587, 0.2270, 0.4455, 0.2688]) -Greedy action tensor([-0.9867, -0.3408, 0.4002, 0.6616]) tensor([0.0826, 0.1576, 0.3305, 0.4293]) -Greedy action tensor([-1.8624, -0.4178, 0.6383, -0.1295]) tensor([0.0433, 0.1837, 0.5280, 0.2450]) -Greedy action tensor([-1.9053, -0.3919, 0.6455, -0.1599]) tensor([0.0415, 0.1886, 0.5321, 0.2378]) -Greedy action tensor([-1.9150, -0.3717, 0.6415, -0.1556]) tensor([0.0410, 0.1920, 0.5288, 0.2383]) -Greedy action tensor([-1.8513, -0.3119, 0.6383, -0.1108]) tensor([0.0427, 0.1991, 0.5148, 0.2434]) -Greedy action tensor([-1.8661, -0.1191, 0.5836, -0.1522]) tensor([0.0419, 0.2403, 0.4853, 0.2325]) -Greedy action tensor([-1.9006, -0.4438, 0.6492, -0.1560]) tensor([0.0420, 0.1802, 0.5376, 0.2403]) -Greedy action tensor([-0.8263, -0.7431, 0.2780, 0.3320]) tensor([0.1206, 0.1311, 0.3640, 0.3842]) -Greedy action tensor([-1.9011, -0.4520, 0.6796, -0.1504]) tensor([0.0413, 0.1758, 0.5452, 0.2377]) -Greedy action tensor([-1.1000, 0.5227, -0.8339, -1.2545]) tensor([0.1215, 0.6158, 0.1586, 0.1041]) -Greedy action tensor([-1.9412, -0.4444, 0.6661, -0.1788]) tensor([0.0402, 0.1797, 0.5456, 0.2344]) -Greedy action tensor([-1.5334, -0.2539, 0.4597, -0.0813]) tensor([0.0617, 0.2218, 0.4528, 0.2636]) -Greedy action tensor([-1.4146, -0.4120, 0.3879, 0.1192]) tensor([0.0693, 0.1889, 0.4204, 0.3213]) -Greedy action tensor([-1.8193, -0.2736, 0.6446, -0.3046]) tensor([0.0455, 0.2133, 0.5344, 0.2068]) -Greedy action tensor([-1.8875, -0.4514, 0.6404, -0.1499]) tensor([0.0427, 0.1796, 0.5350, 0.2427]) -Greedy action tensor([-1.8275, -0.4097, 0.6319, -0.1240]) tensor([0.0448, 0.1849, 0.5241, 0.2461]) -Greedy action tensor([-1.9309, -0.4288, 0.6601, -0.1728]) tensor([0.0406, 0.1823, 0.5416, 0.2355]) -Greedy action tensor([-1.9368, -0.4571, 0.6706, -0.1724]) tensor([0.0403, 0.1771, 0.5471, 0.2355]) -Greedy action tensor([-1.2339, -0.0526, 0.4254, 0.0787]) tensor([0.0756, 0.2463, 0.3972, 0.2809]) -Greedy action tensor([-1.9182, -0.4597, 0.6541, -0.1681]) tensor([0.0414, 0.1780, 0.5422, 0.2383]) -Greedy action tensor([-1.1546, 0.3545, -0.0878, -0.6973]) tensor([0.0999, 0.4519, 0.2904, 0.1578]) -Greedy action tensor([-1.6366, -0.3421, 0.4767, -0.0190]) tensor([0.0557, 0.2031, 0.4606, 0.2806]) -Greedy action tensor([-1.7244, 0.2043, 0.4814, -0.0614]) tensor([0.0450, 0.3095, 0.4083, 0.2373]) -Greedy action tensor([-1.9046, -0.4510, 0.6507, -0.1589]) tensor([0.0419, 0.1791, 0.5391, 0.2399]) -Greedy action tensor([-1.9239, -0.4418, 0.6627, -0.1721]) tensor([0.0409, 0.1800, 0.5433, 0.2358]) -Greedy action tensor([-1.8796, -0.4432, 0.6320, -0.1504]) tensor([0.0432, 0.1815, 0.5320, 0.2433]) -Greedy action tensor([-1.9204, -0.4274, 0.6563, -0.1661]) tensor([0.0410, 0.1825, 0.5395, 0.2370]) -Greedy action tensor([-1.9298, -0.4288, 0.6606, -0.1725]) tensor([0.0406, 0.1822, 0.5417, 0.2355]) -Greedy action tensor([-1.8977, -0.3176, 0.6355, -0.1571]) tensor([0.0414, 0.2010, 0.5215, 0.2361]) -Greedy action tensor([-1.4408, -0.1563, 0.4450, -0.1558]) tensor([0.0675, 0.2438, 0.4448, 0.2439]) -Greedy action tensor([-1.5346, -0.3608, 0.5323, 0.0398]) tensor([0.0590, 0.1907, 0.4658, 0.2846]) -Greedy action tensor([-1.9300, -0.3309, 0.6405, -0.1736]) tensor([0.0403, 0.1994, 0.5268, 0.2334]) -Greedy action tensor([-1.4503, -0.1874, 0.6688, 0.2637]) tensor([0.0543, 0.1920, 0.4521, 0.3015]) -Greedy action tensor([-1.1552, 0.5199, 0.3248, -0.1890]) tensor([0.0748, 0.3996, 0.3288, 0.1967]) -Greedy action tensor([-0.9005, -0.5499, 0.0450, 0.5667]) tensor([0.1072, 0.1522, 0.2759, 0.4648]) -Greedy action tensor([-1.9082, -0.4241, 0.6505, -0.1599]) tensor([0.0415, 0.1832, 0.5366, 0.2386]) -Greedy action tensor([-1.9274, -0.3689, 0.6454, -0.1693]) tensor([0.0406, 0.1927, 0.5314, 0.2353]) -Greedy action tensor([-1.4013, 0.2805, 0.4032, -0.1714]) tensor([0.0630, 0.3386, 0.3829, 0.2155]) -Greedy action tensor([-1.9337, -0.4376, 0.6593, -0.1753]) tensor([0.0406, 0.1812, 0.5427, 0.2355]) -Greedy action tensor([0.4566, 1.1676, 0.0738, 0.8344]) tensor([0.1932, 0.3933, 0.1317, 0.2818]) -Greedy action tensor([-1.4921, -0.5593, 0.3970, 0.0921]) tensor([0.0665, 0.1691, 0.4400, 0.3244]) -Greedy action tensor([-1.9225, -0.5187, 0.8277, -0.1209]) tensor([0.0373, 0.1520, 0.5843, 0.2263]) -Greedy action tensor([-1.9422, -0.4402, 0.6628, -0.1795]) tensor([0.0402, 0.1807, 0.5445, 0.2345]) -Greedy action tensor([-1.9289, -0.4401, 0.6578, -0.1732]) tensor([0.0408, 0.1808, 0.5422, 0.2362]) -Greedy action tensor([-1.8677, -0.1736, 0.6024, -0.1377]) tensor([0.0418, 0.2276, 0.4946, 0.2359]) -Greedy action tensor([-1.8554, -0.2994, 0.6003, -0.1257]) tensor([0.0434, 0.2058, 0.5060, 0.2448]) -Greedy action tensor([-1.9329, -0.4366, 0.6617, -0.1739]) tensor([0.0405, 0.1810, 0.5430, 0.2354]) -Greedy action tensor([-1.4239, 0.7358, 0.3206, -0.0315]) tensor([0.0515, 0.4465, 0.2947, 0.2073]) -Greedy action tensor([-1.6823, 0.3259, 0.4646, -0.0727]) tensor([0.0454, 0.3385, 0.3889, 0.2272]) -Greedy action tensor([-1.9090, -0.3471, 0.6390, -0.1615]) tensor([0.0412, 0.1963, 0.5262, 0.2363]) -Greedy action tensor([-1.7458, -0.1962, 0.5311, -0.1032]) tensor([0.0485, 0.2284, 0.4726, 0.2506]) -Greedy action tensor([-1.7769, 0.0126, 0.5627, -0.2119]) tensor([0.0452, 0.2703, 0.4686, 0.2160]) -Greedy action tensor([-1.3587, -0.5233, 0.5140, -0.2060]) tensor([0.0770, 0.1777, 0.5013, 0.2440]) -Greedy action tensor([-1.2065, 0.7914, 0.1571, 0.2644]) tensor([0.0601, 0.4432, 0.2350, 0.2617]) -Greedy action tensor([ 0.2838, -0.0530, -0.5042, 0.3801]) tensor([0.3058, 0.2184, 0.1391, 0.3367]) -Greedy action tensor([-0.1151, -0.8308, -0.1916, -0.0287]) tensor([0.2853, 0.1395, 0.2643, 0.3110]) -Greedy action tensor([-0.3471, -1.0603, -0.1285, -1.8953]) tensor([0.3393, 0.1663, 0.4222, 0.0721]) -Greedy action tensor([ 1.0954, -0.5771, 0.0716, 0.3053]) tensor([0.4998, 0.0938, 0.1795, 0.2268]) -Greedy action tensor([ 0.1752, -0.7305, -0.2495, -0.1542]) tensor([0.3600, 0.1455, 0.2354, 0.2590]) -Greedy action tensor([-0.9707, 0.4159, 0.6031, -0.9767]) tensor([0.0924, 0.3698, 0.4459, 0.0919]) -Greedy action tensor([-1.0328, -0.8480, 0.2893, -0.7601]) tensor([0.1376, 0.1655, 0.5161, 0.1807]) -Greedy action tensor([-0.3815, -0.1244, 0.5128, -1.0685]) tensor([0.1908, 0.2467, 0.4666, 0.0960]) -Greedy action tensor([0.2578, 0.5672, 0.7639, 0.0941]) tensor([0.2053, 0.2798, 0.3406, 0.1743]) -Greedy action tensor([ 1.3280, -0.1644, -0.5320, -0.3262]) tensor([0.6362, 0.1430, 0.0990, 0.1217]) -Greedy action tensor([-0.8750, -1.4069, 0.6172, -1.2177]) tensor([0.1483, 0.0871, 0.6594, 0.1053]) -Greedy action tensor([ 1.7023, -0.4440, 0.0787, 0.2787]) tensor([0.6431, 0.0752, 0.1268, 0.1549]) -Greedy action tensor([1.0254, 0.0257, 0.2778, 0.1381]) tensor([0.4438, 0.1633, 0.2101, 0.1827]) -Greedy action tensor([ 0.7824, -1.5753, 0.0300, -0.5620]) tensor([0.5475, 0.0518, 0.2580, 0.1427]) -Greedy action tensor([ 0.0868, -1.1383, 0.1853, -1.0319]) tensor([0.3671, 0.1078, 0.4051, 0.1199]) -Greedy action tensor([ 0.5637, -1.2511, -0.6285, 0.1828]) tensor([0.4652, 0.0758, 0.1412, 0.3178]) -Greedy action tensor([ 0.1830, -1.6961, 0.0689, -0.2600]) tensor([0.3722, 0.0568, 0.3320, 0.2390]) -Greedy action tensor([-0.4758, -1.2636, 0.2731, 0.4640]) tensor([0.1632, 0.0742, 0.3450, 0.4176]) -Greedy action tensor([-0.3274, -0.4866, 0.9195, -0.6338]) tensor([0.1648, 0.1405, 0.5734, 0.1213]) -Greedy action tensor([-0.3933, 0.0792, -0.1640, -0.5961]) tensor([0.2138, 0.3429, 0.2689, 0.1745]) -Greedy action tensor([-0.8239, -0.8244, -0.0013, -0.4116]) tensor([0.1728, 0.1727, 0.3934, 0.2610]) -Greedy action tensor([-1.8010, -0.2957, -0.4040, 0.1241]) tensor([0.0610, 0.2746, 0.2465, 0.4179]) -Greedy action tensor([ 0.4590, -0.8972, 0.0098, 0.1735]) tensor([0.3777, 0.0973, 0.2410, 0.2839]) -Greedy action tensor([-0.5522, -0.6762, -1.1041, 0.7192]) tensor([0.1660, 0.1466, 0.0956, 0.5918]) -Greedy action tensor([ 0.4387, -0.5730, -0.0024, 0.3395]) tensor([0.3433, 0.1248, 0.2209, 0.3109]) -Greedy action tensor([-0.4579, -0.5742, -0.3480, -0.6428]) tensor([0.2606, 0.2320, 0.2908, 0.2166]) -Greedy action tensor([-1.3020, -0.3590, 1.1765, -1.5313]) tensor([0.0614, 0.1577, 0.7321, 0.0488]) -Greedy action tensor([-1.0008, -0.2803, -0.2425, -0.7447]) tensor([0.1543, 0.3171, 0.3293, 0.1993]) -Greedy action tensor([-0.6114, -0.7355, -0.8950, -0.6646]) tensor([0.2790, 0.2464, 0.2101, 0.2645]) -Greedy action tensor([-0.3797, 0.0230, 0.0384, 0.0114]) tensor([0.1820, 0.2723, 0.2765, 0.2692]) -Greedy action tensor([-0.5304, -0.0074, 0.7102, -0.9379]) tensor([0.1469, 0.2477, 0.5077, 0.0977]) -Greedy action tensor([-1.4336, -0.7514, 0.7566, -0.5748]) tensor([0.0700, 0.1386, 0.6261, 0.1653]) -Greedy action tensor([ 0.1268, -1.2632, 0.3274, 0.0247]) tensor([0.2964, 0.0738, 0.3622, 0.2676]) -Greedy action tensor([-0.0155, -0.5886, -0.3544, -0.1819]) tensor([0.3202, 0.1805, 0.2282, 0.2711]) -Greedy action tensor([-0.0766, -1.1903, 0.7968, -0.9607]) tensor([0.2418, 0.0794, 0.5790, 0.0999]) -Greedy action tensor([ 1.6138, -0.8762, -0.1111, 0.9788]) tensor([0.5583, 0.0463, 0.0995, 0.2959]) -Greedy action tensor([-0.2681, -0.7513, 0.0219, -0.7051]) tensor([0.2778, 0.1714, 0.3713, 0.1795]) -Greedy action tensor([ 0.4745, 0.1024, 1.2262, -0.1883]) tensor([0.2312, 0.1594, 0.4903, 0.1192]) -Greedy action tensor([ 0.5455, 0.1036, -0.3401, -0.1064]) tensor([0.3881, 0.2495, 0.1601, 0.2023]) -Greedy action tensor([-0.1093, -1.0616, -0.1170, -0.5591]) tensor([0.3316, 0.1279, 0.3290, 0.2115]) -Greedy action tensor([-0.2100, -0.3133, -0.5962, 0.0131]) tensor([0.2610, 0.2354, 0.1774, 0.3262]) -Greedy action tensor([ 0.3808, -0.7083, -0.5214, -0.7988]) tensor([0.4879, 0.1642, 0.1979, 0.1500]) -Greedy action tensor([-0.3601, -0.2723, -0.0291, -0.9446]) tensor([0.2474, 0.2701, 0.3445, 0.1379]) -Greedy action tensor([-0.8603, -0.7033, 0.2203, -1.3060]) tensor([0.1737, 0.2032, 0.5118, 0.1112]) -Greedy action tensor([-0.2324, -1.0778, 0.1357, -0.2003]) tensor([0.2560, 0.1099, 0.3699, 0.2643]) -Greedy action tensor([ 0.2531, -1.1952, -0.9814, -0.8359]) tensor([0.5369, 0.1262, 0.1562, 0.1807]) -Greedy action tensor([-0.8779, -0.5364, 0.5809, -0.6542]) tensor([0.1256, 0.1768, 0.5404, 0.1572]) -Greedy action tensor([-0.3963, -1.2152, -0.0205, 0.3468]) tensor([0.2000, 0.0882, 0.2913, 0.4205]) -Greedy action tensor([ 0.3945, -1.3243, 0.1253, -0.0588]) tensor([0.3878, 0.0695, 0.2963, 0.2464]) -Greedy action tensor([ 0.1844, -0.2344, -0.1967, -0.4677]) tensor([0.3494, 0.2299, 0.2387, 0.1820]) -Greedy action tensor([-0.6106, 0.0532, 0.2412, -1.1559]) tensor([0.1705, 0.3311, 0.3996, 0.0988]) -Greedy action tensor([-0.3375, -0.2008, 0.5431, -0.6304]) tensor([0.1885, 0.2161, 0.4547, 0.1406]) -Greedy action tensor([-0.4183, -0.6074, -0.7451, -0.6250]) tensor([0.2974, 0.2462, 0.2145, 0.2419]) -Greedy action tensor([ 0.1137, -0.3298, 0.0709, -0.2446]) tensor([0.3032, 0.1946, 0.2904, 0.2119]) -Greedy action tensor([-0.2313, 0.6080, -0.3444, -0.5424]) tensor([0.2024, 0.4685, 0.1808, 0.1483]) -Greedy action tensor([ 0.1617, 0.3969, 0.6572, -0.0196]) tensor([0.2109, 0.2669, 0.3462, 0.1760]) -Greedy action tensor([ 0.6550, -1.1488, -0.3581, -0.6917]) tensor([0.5593, 0.0921, 0.2031, 0.1455]) -Greedy action tensor([ 0.2874, -1.2515, -0.5338, -0.0745]) tensor([0.4254, 0.0913, 0.1871, 0.2962]) -Greedy action tensor([ 0.2086, 0.0212, -0.7617, 0.5609]) tensor([0.2754, 0.2284, 0.1044, 0.3918]) -Greedy action tensor([ 1.9609, -0.7831, 0.7035, 1.4206]) tensor([0.5178, 0.0333, 0.1473, 0.3016]) -Greedy action tensor([ 0.3829, -1.0289, 0.4897, -0.2118]) tensor([0.3439, 0.0838, 0.3826, 0.1897]) -Greedy action tensor([ 0.4014, 0.5420, -0.1356, -0.6691]) tensor([0.3249, 0.3739, 0.1899, 0.1114]) -Greedy action tensor([ 0.3143, -0.5922, 0.5157, 0.2209]) tensor([0.2826, 0.1142, 0.3457, 0.2574]) -Greedy action tensor([ 0.0242, -0.7073, 0.0059, -0.1631]) tensor([0.3037, 0.1462, 0.2982, 0.2519]) -Greedy action tensor([-0.8142, -0.4621, 0.6469, -1.1363]) tensor([0.1341, 0.1907, 0.5780, 0.0972]) -Greedy action tensor([ 0.2773, -1.0330, -0.1401, 0.0642]) tensor([0.3654, 0.0986, 0.2407, 0.2953]) -Greedy action tensor([ 8.1614e-04, -3.3327e-02, 8.4129e-01, -1.6918e-01]) tensor([0.1950, 0.1885, 0.4520, 0.1645]) -Greedy action tensor([-0.8456, 0.0252, -0.2007, 0.1633]) tensor([0.1244, 0.2972, 0.2371, 0.3412]) -Greedy action tensor([-0.9872, -0.5571, -1.4773, 0.4988]) tensor([0.1321, 0.2031, 0.0809, 0.5839]) -Greedy action tensor([ 0.0964, -1.2945, -0.0202, -0.3331]) tensor([0.3585, 0.0892, 0.3190, 0.2333]) -Greedy action tensor([ 0.0361, -1.4448, -0.1925, 0.1617]) tensor([0.3168, 0.0720, 0.2520, 0.3592]) -Greedy action tensor([-0.4586, -0.8344, -0.3488, 0.1386]) tensor([0.2165, 0.1487, 0.2416, 0.3933]) -Greedy action tensor([ 0.3483, -0.0973, -0.5625, 0.2347]) tensor([0.3407, 0.2182, 0.1370, 0.3041]) -Greedy action tensor([ 0.0821, -0.5280, -0.8228, -0.8059]) tensor([0.4238, 0.2303, 0.1715, 0.1744]) -Greedy action tensor([ 1.1768, -0.6783, 1.3561, 0.3590]) tensor([0.3579, 0.0560, 0.4282, 0.1580]) -Greedy action tensor([-1.0817, 0.6608, -0.9906, -0.5622]) tensor([0.1054, 0.6020, 0.1154, 0.1772]) -Greedy action tensor([ 0.3986, 0.6639, -0.5682, -0.2241]) tensor([0.3105, 0.4048, 0.1181, 0.1666]) -Greedy action tensor([-1.1058, -0.5402, 0.6698, -1.8417]) tensor([0.1094, 0.1925, 0.6457, 0.0524]) -Greedy action tensor([ 0.8771, -0.5015, 0.3122, 0.3160]) tensor([0.4182, 0.1054, 0.2377, 0.2386]) -Greedy action tensor([ 0.1402, -0.0993, -0.6207, 0.6882]) tensor([0.2510, 0.1975, 0.1173, 0.4342]) -Greedy action tensor([-0.4401, 0.7258, 0.3655, -0.0796]) tensor([0.1269, 0.4072, 0.2840, 0.1820]) -Greedy action tensor([ 1.4938, -0.6374, -0.2591, 0.2919]) tensor([0.6279, 0.0745, 0.1088, 0.1888]) -Greedy action tensor([ 1.1985, -0.2633, -1.1982, 0.4767]) tensor([0.5529, 0.1282, 0.0503, 0.2686]) -Greedy action tensor([ 1.2409, -0.4740, -0.2962, 0.3212]) tensor([0.5575, 0.1003, 0.1199, 0.2223]) -Greedy action tensor([ 1.1003, -0.2000, -0.7122, 0.3071]) tensor([0.5296, 0.1443, 0.0865, 0.2396]) -Greedy action tensor([ 1.3789, -0.5003, -0.4356, 0.0806]) tensor([0.6295, 0.0961, 0.1026, 0.1718]) -Greedy action tensor([ 1.1791, -0.4431, -0.0535, -0.0228]) tensor([0.5588, 0.1103, 0.1629, 0.1680]) -Greedy action tensor([ 2.3036, -1.5639, -0.1447, 0.4757]) tensor([0.7886, 0.0165, 0.0682, 0.1268]) -Greedy action tensor([ 2.1059, -1.1433, -0.5528, 0.7984]) tensor([0.7250, 0.0281, 0.0508, 0.1961]) -Greedy action tensor([ 1.4614, -0.8717, -0.3556, -0.2805]) tensor([0.6970, 0.0676, 0.1133, 0.1221]) -Greedy action tensor([ 1.6050, -0.9055, -0.2076, 0.3974]) tensor([0.6479, 0.0526, 0.1058, 0.1937]) -Greedy action tensor([ 1.6555, -0.3534, -0.5533, 0.0794]) tensor([0.6893, 0.0925, 0.0757, 0.1425]) -Greedy action tensor([ 1.4654, -0.6168, -0.5476, 0.6884]) tensor([0.5821, 0.0726, 0.0778, 0.2676]) -Greedy action tensor([ 1.3578, -0.2542, -0.6041, 0.2039]) tensor([0.6041, 0.1205, 0.0849, 0.1905]) -Greedy action tensor([ 1.3763, -0.1184, -1.0977, 0.3616]) tensor([0.5984, 0.1342, 0.0504, 0.2169]) -Greedy action tensor([ 1.6951, -0.5819, -0.4305, 0.6913]) tensor([0.6295, 0.0646, 0.0751, 0.2307]) -Greedy action tensor([ 1.5599, -0.5247, -0.5734, 1.2330]) tensor([0.5092, 0.0633, 0.0603, 0.3672]) -Greedy action tensor([ 1.9981, -0.7647, -0.1381, 0.2513]) tensor([0.7377, 0.0466, 0.0871, 0.1286]) -Greedy action tensor([ 1.2323, -0.1965, -0.9172, 0.5023]) tensor([0.5440, 0.1304, 0.0634, 0.2622]) -Greedy action tensor([1.4073, 0.2405, 0.0788, 0.0390]) tensor([0.5462, 0.1701, 0.1447, 0.1390]) -Greedy action tensor([ 1.8183, -0.6885, -0.7076, 0.8142]) tensor([0.6545, 0.0534, 0.0524, 0.2398]) -Greedy action tensor([ 1.1728, 0.1773, -0.7232, 0.4638]) tensor([0.4971, 0.1837, 0.0746, 0.2446]) -Greedy action tensor([ 1.3024, -0.2201, -0.6486, 0.3891]) tensor([0.5677, 0.1238, 0.0807, 0.2278]) -Greedy action tensor([ 2.1328, -1.3385, -0.1319, 0.7241]) tensor([0.7250, 0.0225, 0.0753, 0.1772]) -Greedy action tensor([ 1.2183, -0.4460, -0.1965, 0.3566]) tensor([0.5392, 0.1021, 0.1310, 0.2278]) -Greedy action tensor([ 1.4994, -0.6449, -0.0036, 0.2975]) tensor([0.6097, 0.0714, 0.1356, 0.1833]) -Greedy action tensor([ 1.1980, -0.2698, -0.0075, 0.4410]) tensor([0.5002, 0.1153, 0.1498, 0.2346]) -Greedy action tensor([ 1.8138, 0.5416, -1.0441, -0.3606]) tensor([0.6890, 0.1931, 0.0395, 0.0783]) -Greedy action tensor([ 0.8459, -0.2111, -0.5651, 0.1282]) tensor([0.4809, 0.1671, 0.1173, 0.2346]) -Greedy action tensor([ 1.2471, -0.1408, -0.5804, -0.0272]) tensor([0.5917, 0.1477, 0.0952, 0.1654]) -Greedy action tensor([ 1.5039, 0.0355, -0.8002, 0.2053]) tensor([0.6238, 0.1437, 0.0623, 0.1703]) -Greedy action tensor([ 1.6759, -0.6310, -0.9400, -0.4050]) tensor([0.7707, 0.0767, 0.0563, 0.0962]) -Greedy action tensor([ 1.7558, -0.3957, -0.3656, 0.1711]) tensor([0.6939, 0.0807, 0.0832, 0.1423]) -Greedy action tensor([ 1.3045, -0.5748, -0.7176, 0.6045]) tensor([0.5613, 0.0857, 0.0743, 0.2787]) -Greedy action tensor([ 1.1608, -0.5453, -0.1992, 0.4113]) tensor([0.5233, 0.0950, 0.1343, 0.2473]) -Greedy action tensor([ 1.3632, -0.7504, -0.3628, -0.0446]) tensor([0.6479, 0.0783, 0.1153, 0.1585]) -Greedy action tensor([ 1.3780, -0.7748, -0.3843, 0.4706]) tensor([0.5912, 0.0687, 0.1015, 0.2386]) -Greedy action tensor([ 1.1992, 0.1863, -0.6289, 0.2558]) tensor([0.5227, 0.1898, 0.0840, 0.2035]) -Greedy action tensor([ 1.1994, -0.0450, -0.3463, 0.5523]) tensor([0.4939, 0.1423, 0.1053, 0.2586]) -Greedy action tensor([ 1.6818, 0.3892, -0.2231, 0.4302]) tensor([0.5850, 0.1606, 0.0871, 0.1673]) -Greedy action tensor([ 1.5626, -0.4373, -0.4695, 0.1766]) tensor([0.6594, 0.0892, 0.0864, 0.1649]) -Greedy action tensor([ 1.4801, 0.0251, -1.5910, -0.0550]) tensor([0.6688, 0.1561, 0.0310, 0.1441]) -Greedy action tensor([ 1.7771, -0.4623, -1.0920, 0.4005]) tensor([0.7064, 0.0752, 0.0401, 0.1783]) -Greedy action tensor([ 1.0916, -0.4738, -0.6243, 0.5722]) tensor([0.5041, 0.1054, 0.0906, 0.2999]) -Greedy action tensor([ 1.8447, -0.2825, -0.2566, -0.1077]) tensor([0.7229, 0.0861, 0.0884, 0.1026]) -Greedy action tensor([ 1.4451, -0.4961, -0.2547, 0.3583]) tensor([0.6011, 0.0863, 0.1098, 0.2028]) -Greedy action tensor([ 1.5436, -0.5971, -0.5123, 0.5317]) tensor([0.6215, 0.0731, 0.0795, 0.2259]) -Greedy action tensor([ 1.3470, -0.2663, -0.0738, 0.0832]) tensor([0.5803, 0.1156, 0.1401, 0.1640]) -Greedy action tensor([ 1.3398, -0.5575, -0.6304, 0.1950]) tensor([0.6220, 0.0933, 0.0867, 0.1980]) -Greedy action tensor([ 1.0881, -0.3631, -0.4848, 0.4407]) tensor([0.5088, 0.1192, 0.1056, 0.2664]) -Greedy action tensor([ 1.7572, -0.6960, -0.2711, 0.2301]) tensor([0.6970, 0.0600, 0.0917, 0.1514]) -Greedy action tensor([ 1.5276, -0.6982, -0.5214, 0.2954]) tensor([0.6542, 0.0706, 0.0843, 0.1908]) -Greedy action tensor([ 1.8623, -1.4273, -0.1729, 0.4009]) tensor([0.7144, 0.0266, 0.0933, 0.1657]) -Greedy action tensor([ 1.2712, -0.1788, -0.6119, 0.2605]) tensor([0.5712, 0.1340, 0.0869, 0.2079]) -Greedy action tensor([ 1.1897, -0.3455, -0.4002, 0.1113]) tensor([0.5683, 0.1224, 0.1159, 0.1933]) -Greedy action tensor([ 1.4726, -0.3143, -0.9161, 0.1832]) tensor([0.6516, 0.1091, 0.0598, 0.1795]) -Greedy action tensor([ 2.0995, -0.5920, -0.3939, 0.6681]) tensor([0.7197, 0.0488, 0.0595, 0.1720]) -Greedy action tensor([ 1.6118, 0.5624, -0.3313, 0.3981]) tensor([0.5585, 0.1956, 0.0800, 0.1659]) -Greedy action tensor([ 1.2542, -0.2541, -0.2109, 0.4875]) tensor([0.5217, 0.1154, 0.1205, 0.2423]) -Greedy action tensor([ 1.7959, -1.0168, -0.2297, 0.6149]) tensor([0.6671, 0.0401, 0.0880, 0.2048]) -Greedy action tensor([ 1.5198, -0.7121, -0.7046, 0.3374]) tensor([0.6570, 0.0705, 0.0710, 0.2014]) -Greedy action tensor([ 1.3550, -0.6692, -0.1249, 0.2295]) tensor([0.5937, 0.0784, 0.1352, 0.1927]) -Greedy action tensor([ 1.4678, -0.4196, -0.6971, 0.4430]) tensor([0.6154, 0.0932, 0.0706, 0.2208]) -Greedy action tensor([ 2.1155, -0.2343, -0.8126, 0.6334]) tensor([0.7267, 0.0693, 0.0389, 0.1651]) -Greedy action tensor([ 1.3158, -0.0804, -0.8568, 0.1079]) tensor([0.6023, 0.1491, 0.0686, 0.1800]) -Greedy action tensor([ 1.4721, -0.3952, -0.1719, 0.1409]) tensor([0.6204, 0.0959, 0.1199, 0.1639]) -Greedy action tensor([ 1.6977, -0.7268, -0.8326, 0.6280]) tensor([0.6617, 0.0586, 0.0527, 0.2270]) -Greedy action tensor([ 1.4388, -0.7781, -0.3397, -0.0578]) tensor([0.6659, 0.0725, 0.1125, 0.1491]) -Greedy action tensor([ 1.3177, -0.2968, -0.5421, 0.1335]) tensor([0.6022, 0.1198, 0.0938, 0.1843]) -Greedy action tensor([ 1.6302, -0.8705, -0.7427, 0.5188]) tensor([0.6647, 0.0545, 0.0620, 0.2188]) -Greedy action tensor([ 1.6355, -0.6029, -0.5233, 0.7090]) tensor([0.6180, 0.0659, 0.0714, 0.2447]) -Greedy action tensor([ 1.8343, -0.5392, -0.5506, 0.5605]) tensor([0.6826, 0.0636, 0.0629, 0.1910]) -Greedy action tensor([ 1.6478, -0.4730, -0.8426, 0.4095]) tensor([0.6699, 0.0803, 0.0555, 0.1942]) -Greedy action tensor([ 1.4274, -0.7380, -0.8083, 0.1490]) tensor([0.6666, 0.0765, 0.0713, 0.1856]) -Greedy action tensor([ 1.3029, -0.0652, -0.7339, -0.0317]) tensor([0.6067, 0.1545, 0.0791, 0.1597]) -Greedy action tensor([ 1.9781, 0.2864, -0.7210, -0.0645]) tensor([0.7240, 0.1334, 0.0487, 0.0939]) -Greedy action tensor([ 1.7506, 0.1953, -0.6659, 0.1505]) tensor([0.6657, 0.1405, 0.0594, 0.1344]) -Greedy action tensor([ 1.4804, -0.2247, -0.8614, 0.0724]) tensor([0.6568, 0.1194, 0.0631, 0.1607]) -Greedy action tensor([ 1.0600, -0.6474, -0.2730, 0.2860]) tensor([0.5246, 0.0951, 0.1383, 0.2419]) -Greedy action tensor([ 1.4366, -0.3024, -0.9858, 0.1307]) tensor([0.6513, 0.1144, 0.0578, 0.1765]) -Greedy action tensor([ 1.3791, -0.2057, -0.4157, 0.1027]) tensor([0.6060, 0.1242, 0.1007, 0.1691]) -Greedy action tensor([ 1.6579, -0.3039, -0.7297, 0.1587]) tensor([0.6869, 0.0966, 0.0631, 0.1534]) -Greedy action tensor([ 0.9744, -0.4758, -0.0400, -0.3485]) tensor([0.5366, 0.1258, 0.1946, 0.1429]) -Greedy action tensor([ 0.5764, -0.3190, -0.0775, -0.0823]) tensor([0.4088, 0.1670, 0.2126, 0.2116]) -Greedy action tensor([ 0.8732, -0.2443, -0.0017, -0.0788]) tensor([0.4695, 0.1536, 0.1957, 0.1812]) -Greedy action tensor([ 0.4425, -0.2326, -0.0850, -0.0332]) tensor([0.3676, 0.1871, 0.2169, 0.2284]) -Greedy action tensor([ 1.0019, -0.6019, 0.0293, -0.4073]) tensor([0.5484, 0.1103, 0.2073, 0.1340]) -Greedy action tensor([ 0.6594, -0.6563, -0.1361, -0.2295]) tensor([0.4693, 0.1259, 0.2118, 0.1929]) -Greedy action tensor([ 0.8348, -0.5167, -0.1291, -0.3037]) tensor([0.5101, 0.1320, 0.1945, 0.1634]) -Greedy action tensor([ 0.8229, 0.0237, 0.1553, -0.0696]) tensor([0.4215, 0.1896, 0.2162, 0.1727]) -Greedy action tensor([ 1.0354, -0.4729, -0.1388, -0.4320]) tensor([0.5679, 0.1257, 0.1755, 0.1309]) -Greedy action tensor([ 0.8669, -0.6716, -0.0117, -0.5490]) tensor([0.5340, 0.1146, 0.2218, 0.1296]) -Greedy action tensor([ 0.9197, -0.5972, -0.0657, -0.3467]) tensor([0.5335, 0.1170, 0.1991, 0.1504]) -Greedy action tensor([ 0.5694, 0.1895, -0.1961, 0.0825]) tensor([0.3618, 0.2475, 0.1683, 0.2224]) -Greedy action tensor([ 0.9740, -0.5185, -0.1255, -0.5586]) tensor([0.5638, 0.1267, 0.1878, 0.1217]) -Greedy action tensor([ 1.0058, -0.4669, -0.1060, -0.6294]) tensor([0.5704, 0.1308, 0.1876, 0.1112]) -Greedy action tensor([ 0.5980, -0.2664, 0.0276, -0.0963]) tensor([0.4022, 0.1695, 0.2274, 0.2009]) -Greedy action tensor([ 0.3313, -0.5634, -0.2976, -0.1086]) tensor([0.3867, 0.1580, 0.2062, 0.2491]) -Greedy action tensor([ 1.0652, -0.6298, -0.0359, -0.8151]) tensor([0.5993, 0.1100, 0.1993, 0.0914]) -Greedy action tensor([ 1.0358, -0.7543, 0.0358, -0.4683]) tensor([0.5691, 0.0950, 0.2094, 0.1265]) -Greedy action tensor([ 0.4173, -0.0322, -0.1293, -0.0377]) tensor([0.3507, 0.2237, 0.2030, 0.2225]) -Greedy action tensor([ 1.0181, -0.6218, 0.0536, -0.4743]) tensor([0.5555, 0.1078, 0.2118, 0.1249]) -Greedy action tensor([ 0.7942, -0.3269, 0.0230, -0.3254]) tensor([0.4729, 0.1541, 0.2187, 0.1543]) -Greedy action tensor([ 0.8292, -0.9340, 0.1995, -0.3500]) tensor([0.4971, 0.0852, 0.2648, 0.1529]) -Greedy action tensor([ 0.5467, 0.0909, -0.0325, -0.0043]) tensor([0.3609, 0.2288, 0.2022, 0.2080]) -Greedy action tensor([ 0.9949, -0.5145, 0.0920, -0.3340]) tensor([0.5288, 0.1169, 0.2144, 0.1400]) -Greedy action tensor([ 1.0102, -0.6535, -0.0798, -0.6042]) tensor([0.5798, 0.1098, 0.1949, 0.1154]) -Greedy action tensor([ 0.6564, -0.1115, 0.0360, -0.0661]) tensor([0.4020, 0.1865, 0.2162, 0.1952]) -Greedy action tensor([ 0.9600, -0.4285, 0.1708, -0.3195]) tensor([0.5046, 0.1259, 0.2292, 0.1404]) -Greedy action tensor([ 1.1989, -0.6564, -0.0170, -0.4257]) tensor([0.6061, 0.0948, 0.1797, 0.1194]) -Greedy action tensor([ 0.5626, -0.3105, 0.0386, -0.1355]) tensor([0.3988, 0.1666, 0.2362, 0.1984]) -Greedy action tensor([ 0.5960, -0.0050, 0.0454, -0.4166]) tensor([0.4019, 0.2204, 0.2317, 0.1460]) -Greedy action tensor([ 1.1164, -0.7150, -0.0858, -0.4546]) tensor([0.5993, 0.0960, 0.1801, 0.1246]) -Greedy action tensor([ 0.6514, 0.3017, -0.1914, 0.1461]) tensor([0.3651, 0.2574, 0.1572, 0.2203]) -Greedy action tensor([ 1.0857, -0.3044, 0.1100, -0.2478]) tensor([0.5292, 0.1318, 0.1995, 0.1395]) -Greedy action tensor([ 0.7112, -0.2268, 0.0949, -0.0366]) tensor([0.4158, 0.1628, 0.2245, 0.1969]) -Greedy action tensor([ 0.6153, -0.6071, -0.1312, -0.2941]) tensor([0.4606, 0.1356, 0.2183, 0.1855]) -Greedy action tensor([ 1.1385, -0.8398, -0.1519, -0.4946]) tensor([0.6216, 0.0860, 0.1710, 0.1214]) -Greedy action tensor([ 0.5351, -0.0510, -0.1357, -0.0209]) tensor([0.3786, 0.2107, 0.1936, 0.2171]) -Greedy action tensor([ 0.4826, -0.0089, 0.0306, -0.0629]) tensor([0.3537, 0.2163, 0.2251, 0.2050]) -Greedy action tensor([ 0.9525, -0.4740, 0.1589, -0.2111]) tensor([0.4988, 0.1198, 0.2256, 0.1558]) -Greedy action tensor([ 0.6035, -0.1947, 0.0975, -0.1487]) tensor([0.3961, 0.1783, 0.2388, 0.1867]) -Greedy action tensor([ 0.2734, 0.1533, -0.0065, -0.2062]) tensor([0.3066, 0.2719, 0.2317, 0.1898]) -Greedy action tensor([ 0.9726, -0.6494, -0.0584, -0.6264]) tensor([0.5694, 0.1125, 0.2031, 0.1151]) -Greedy action tensor([ 0.4443, -0.5819, 0.0131, -0.1384]) tensor([0.3896, 0.1396, 0.2532, 0.2176]) -Greedy action tensor([ 0.6005, -0.2390, -0.0721, -0.1806]) tensor([0.4166, 0.1799, 0.2126, 0.1908]) -Greedy action tensor([ 0.7483, -0.0472, -0.1478, -0.1629]) tensor([0.4422, 0.1996, 0.1805, 0.1778]) -Greedy action tensor([ 1.0282, -0.8695, 0.2242, -0.4825]) tensor([0.5500, 0.0825, 0.2461, 0.1214]) -Greedy action tensor([ 0.9732, -0.8252, 0.0085, -0.5152]) tensor([0.5642, 0.0934, 0.2150, 0.1274]) -Greedy action tensor([ 0.9927, -0.6192, -0.1071, -0.5735]) tensor([0.5743, 0.1146, 0.1912, 0.1199]) -Greedy action tensor([ 0.7168, -0.6073, 0.2517, -0.4790]) tensor([0.4553, 0.1211, 0.2859, 0.1377]) -Greedy action tensor([ 0.9349, -0.6031, -0.0935, -0.5260]) tensor([0.5542, 0.1190, 0.1982, 0.1286]) -Greedy action tensor([ 0.8013, 0.4494, -0.1568, -0.1498]) tensor([0.4043, 0.2844, 0.1551, 0.1562]) -Greedy action tensor([ 0.8235, -0.6639, -0.0504, -0.5635]) tensor([0.5282, 0.1194, 0.2205, 0.1320]) -Greedy action tensor([ 1.2287, -0.5771, -0.0088, -0.2407]) tensor([0.5936, 0.0976, 0.1722, 0.1366]) -Greedy action tensor([ 0.8073, -0.4367, 0.0091, -0.2378]) tensor([0.4785, 0.1379, 0.2154, 0.1682]) -Greedy action tensor([0.7040, 0.1521, 0.0807, 0.0021]) tensor([0.3835, 0.2208, 0.2056, 0.1901]) -Greedy action tensor([ 0.8680, -0.6613, 0.1380, -0.5361]) tensor([0.5144, 0.1115, 0.2479, 0.1263]) -Greedy action tensor([ 0.7772, -0.8006, -0.0513, -0.3725]) tensor([0.5102, 0.1053, 0.2228, 0.1616]) -Greedy action tensor([ 0.8521, -0.4821, -0.0807, -0.5173]) tensor([0.5233, 0.1378, 0.2059, 0.1330]) -Greedy action tensor([ 0.9127, -0.6013, -0.0070, -0.6285]) tensor([0.5456, 0.1201, 0.2175, 0.1168]) -Greedy action tensor([ 0.7822, -0.3981, -0.0762, -0.3224]) tensor([0.4849, 0.1489, 0.2055, 0.1607]) -Greedy action tensor([ 1.1005, -0.6090, 0.1555, -0.6881]) tensor([0.5758, 0.1042, 0.2238, 0.0963]) -Greedy action tensor([ 0.5682, -0.0844, 0.2191, -0.0189]) tensor([0.3595, 0.1872, 0.2535, 0.1998]) -Greedy action tensor([ 0.8309, 0.0658, -0.0571, 0.0229]) tensor([0.4306, 0.2003, 0.1772, 0.1919]) -Greedy action tensor([ 0.9573, -0.7020, 0.0493, -0.4853]) tensor([0.5465, 0.1040, 0.2204, 0.1291]) -Greedy action tensor([ 0.5728, -0.3997, -0.0451, -0.3033]) tensor([0.4285, 0.1620, 0.2310, 0.1784]) -Greedy action tensor([ 0.4135, -0.1286, -0.0158, -0.0661]) tensor([0.3507, 0.2039, 0.2283, 0.2171]) -Greedy action tensor([ 0.9973, -0.8313, -0.0030, -0.4352]) tensor([0.5659, 0.0909, 0.2081, 0.1351]) -Greedy action tensor([ 0.6978, -0.7463, -0.0206, -0.3790]) tensor([0.4844, 0.1143, 0.2362, 0.1650]) -Greedy action tensor([ 0.6430, -0.6184, -0.1498, -0.1429]) tensor([0.4563, 0.1293, 0.2065, 0.2079]) -Greedy action tensor([ 0.5398, -0.0167, 0.1166, -0.1607]) tensor([0.3670, 0.2104, 0.2404, 0.1822]) -Greedy action tensor([ 1.1583, -0.7178, 0.0646, -0.5545]) tensor([0.5993, 0.0918, 0.2008, 0.1081]) -Greedy action tensor([ 0.3772, -0.2477, -0.0197, -0.3845]) tensor([0.3739, 0.2002, 0.2514, 0.1746]) -Greedy action tensor([ 0.5916, -0.4562, -0.0579, -0.2229]) tensor([0.4318, 0.1514, 0.2255, 0.1912]) -Greedy action tensor([ 0.1752, 0.0311, -0.0932, -0.0835]) tensor([0.2939, 0.2545, 0.2247, 0.2269]) -Greedy action tensor([ 0.8988, -0.2740, -0.1220, -0.4135]) tensor([0.5157, 0.1596, 0.1858, 0.1388]) -Greedy action tensor([ 0.4787, -0.2438, 0.1058, -0.1233]) tensor([0.3674, 0.1784, 0.2530, 0.2012]) -Greedy action tensor([ 1.3771, -0.8361, 0.1464, -0.6919]) tensor([0.6545, 0.0716, 0.1912, 0.0827]) -Greedy action tensor([ 0.3884, -0.0107, -0.1139, -0.1487]) tensor([0.3496, 0.2346, 0.2116, 0.2043]) -Greedy action tensor([ 0.8423, -0.1940, 0.1216, -0.1405]) tensor([0.4514, 0.1601, 0.2195, 0.1689]) -Greedy action tensor([ 1.3857, -0.9614, 0.0768, -0.7078]) tensor([0.6716, 0.0642, 0.1814, 0.0828]) -Greedy action tensor([ 0.7367, -0.5735, -0.0233, -0.3627]) tensor([0.4830, 0.1303, 0.2259, 0.1609]) -Greedy action tensor([-1.9202, -0.4138, 0.6559, -0.1667]) tensor([0.0409, 0.1846, 0.5381, 0.2364]) -Greedy action tensor([-1.8081, -0.6446, 0.1758, -0.2169]) tensor([0.0610, 0.1954, 0.4438, 0.2997]) -Greedy action tensor([-1.9050, -0.4227, 0.6437, -0.1603]) tensor([0.0418, 0.1841, 0.5348, 0.2393]) -Greedy action tensor([-1.9009, -0.4472, 0.6482, -0.1580]) tensor([0.0420, 0.1799, 0.5379, 0.2402]) -Greedy action tensor([-1.8377, -0.4402, 0.6137, -0.1302]) tensor([0.0451, 0.1825, 0.5236, 0.2488]) -Greedy action tensor([-1.9113, -0.4098, 0.6467, -0.1632]) tensor([0.0414, 0.1859, 0.5347, 0.2379]) -Greedy action tensor([-1.5374, -0.3489, 0.4874, 0.0857]) tensor([0.0591, 0.1939, 0.4475, 0.2995]) -Greedy action tensor([-0.3362, 1.0780, 0.0546, 0.2152]) tensor([0.1201, 0.4940, 0.1775, 0.2084]) -Greedy action tensor([-1.9023, -0.4339, 0.6410, -0.1600]) tensor([0.0421, 0.1826, 0.5351, 0.2402]) -Greedy action tensor([-1.8525, -0.4491, 0.6239, -0.1333]) tensor([0.0444, 0.1805, 0.5277, 0.2475]) -Greedy action tensor([-1.6999, -0.4554, 0.5491, -0.0866]) tensor([0.0527, 0.1830, 0.4997, 0.2646]) -Greedy action tensor([-1.9447, -0.4400, 0.6643, -0.1803]) tensor([0.0401, 0.1806, 0.5450, 0.2342]) -Greedy action tensor([-1.8617, -0.4256, 0.6227, -0.1394]) tensor([0.0439, 0.1844, 0.5261, 0.2456]) -Greedy action tensor([-1.8687, -0.3914, 0.6491, -0.0755]) tensor([0.0420, 0.1841, 0.5213, 0.2526]) -Greedy action tensor([-1.9275, -0.4183, 0.6567, -0.1713]) tensor([0.0407, 0.1841, 0.5395, 0.2357]) -Greedy action tensor([-1.8027, -0.4074, 0.5914, -0.1370]) tensor([0.0470, 0.1896, 0.5149, 0.2485]) -Greedy action tensor([-1.9174, -0.4332, 0.6526, -0.1671]) tensor([0.0413, 0.1820, 0.5392, 0.2375]) -Greedy action tensor([-1.9180, -0.3583, 0.6392, -0.1799]) tensor([0.0411, 0.1954, 0.5299, 0.2336]) -Greedy action tensor([-1.8140, -0.4722, 0.6010, -0.1309]) tensor([0.0467, 0.1788, 0.5229, 0.2515]) -Greedy action tensor([-1.7901, -0.2834, 0.5760, -0.1270]) tensor([0.0466, 0.2104, 0.4969, 0.2460]) -Greedy action tensor([-1.7381, 0.0413, 0.4913, -0.0410]) tensor([0.0461, 0.2734, 0.4287, 0.2518]) -Greedy action tensor([-1.9165, -0.4389, 0.6517, -0.1659]) tensor([0.0414, 0.1812, 0.5393, 0.2381]) -Greedy action tensor([-1.5376, 0.2218, 0.4291, -0.1264]) tensor([0.0554, 0.3217, 0.3958, 0.2271]) -Greedy action tensor([-0.9359, 0.8424, 0.0380, 0.3967]) tensor([0.0749, 0.4431, 0.1982, 0.2838]) -Greedy action tensor([ 0.0970, -0.0034, 0.0863, 0.1844]) tensor([0.2509, 0.2270, 0.2483, 0.2738]) -Greedy action tensor([-0.3359, 1.1507, 0.0914, 0.2883]) tensor([0.1134, 0.5012, 0.1738, 0.2116]) -Greedy action tensor([-1.8875, -0.4228, 0.6392, -0.1406]) tensor([0.0424, 0.1835, 0.5307, 0.2433]) -Greedy action tensor([-1.7310, -0.3718, 0.5507, -0.1010]) tensor([0.0505, 0.1967, 0.4949, 0.2579]) -Greedy action tensor([-1.8522, -0.3003, 0.6125, -0.1364]) tensor([0.0434, 0.2049, 0.5104, 0.2413]) -Greedy action tensor([-1.8569, -0.4078, 0.6557, -0.1407]) tensor([0.0432, 0.1839, 0.5327, 0.2402]) -Greedy action tensor([-1.9392, -0.4018, 0.6557, -0.1767]) tensor([0.0402, 0.1870, 0.5385, 0.2342]) -Greedy action tensor([-1.9319, -0.4345, 0.6615, -0.1733]) tensor([0.0406, 0.1814, 0.5426, 0.2355]) -Greedy action tensor([-1.9173, -0.4260, 0.6598, -0.1608]) tensor([0.0410, 0.1821, 0.5394, 0.2374]) -Greedy action tensor([-1.8868, -0.4572, 0.6459, -0.1519]) tensor([0.0427, 0.1782, 0.5372, 0.2419]) -Greedy action tensor([-1.7241, -0.4965, 0.5627, -0.1124]) tensor([0.0519, 0.1771, 0.5109, 0.2601]) -Greedy action tensor([-1.0993, 0.8176, 0.1522, 0.1605]) tensor([0.0675, 0.4588, 0.2359, 0.2378]) -Greedy action tensor([-1.5869, -0.4117, 0.4934, 0.0256]) tensor([0.0579, 0.1876, 0.4639, 0.2906]) -Greedy action tensor([-1.9290, -0.4572, 0.6641, -0.1680]) tensor([0.0407, 0.1775, 0.5447, 0.2370]) -Greedy action tensor([-1.6884, -0.4927, 0.5421, -0.1016]) tensor([0.0541, 0.1787, 0.5030, 0.2642]) -Greedy action tensor([-1.3101, -0.0178, 0.3605, 0.1921]) tensor([0.0692, 0.2520, 0.3679, 0.3109]) -Greedy action tensor([-1.6716, -0.5280, 0.5541, -0.0512]) tensor([0.0542, 0.1700, 0.5018, 0.2739]) -Greedy action tensor([-1.5356, 0.0455, 0.5355, -0.5305]) tensor([0.0605, 0.2941, 0.4801, 0.1653]) -Greedy action tensor([-1.7079, -0.4529, 0.5538, -0.0557]) tensor([0.0517, 0.1815, 0.4967, 0.2700]) -Greedy action tensor([-1.8342, -0.3446, 0.6371, -0.1044]) tensor([0.0436, 0.1936, 0.5166, 0.2461]) -Greedy action tensor([-0.9906, -0.3467, 0.1682, 0.2878]) tensor([0.1033, 0.1967, 0.3291, 0.3709]) -Greedy action tensor([-1.8696, -0.3603, 0.6184, -0.1499]) tensor([0.0432, 0.1955, 0.5201, 0.2412]) -Greedy action tensor([-1.0888, 0.1256, 0.5282, 0.9997]) tensor([0.0572, 0.1927, 0.2882, 0.4618]) -Greedy action tensor([-1.0378, -0.1957, 0.2197, 0.4316]) tensor([0.0894, 0.2075, 0.3144, 0.3886]) -Greedy action tensor([-1.2568, 0.2410, 0.4765, 0.1599]) tensor([0.0656, 0.2931, 0.3710, 0.2703]) -Greedy action tensor([-1.8973, -0.3388, 0.6282, -0.1469]) tensor([0.0417, 0.1979, 0.5206, 0.2398]) -Greedy action tensor([-1.8750, -0.3644, 0.6283, -0.1122]) tensor([0.0424, 0.1921, 0.5183, 0.2472]) -Greedy action tensor([-1.8922, -0.4165, 0.6447, -0.1514]) tensor([0.0422, 0.1844, 0.5330, 0.2404]) -Greedy action tensor([-1.9254, -0.4361, 0.6508, -0.1696]) tensor([0.0410, 0.1820, 0.5395, 0.2375]) -Greedy action tensor([-1.8635, -0.4553, 0.6287, -0.1451]) tensor([0.0440, 0.1797, 0.5313, 0.2451]) -Greedy action tensor([-1.6458, 0.0953, 0.4262, -0.0298]) tensor([0.0508, 0.2898, 0.4035, 0.2558]) -Greedy action tensor([-1.7188, -0.2449, 0.5218, -0.0774]) tensor([0.0502, 0.2191, 0.4717, 0.2591]) -Greedy action tensor([-1.1339, 0.1231, 0.2828, -0.0199]) tensor([0.0856, 0.3008, 0.3529, 0.2607]) -Greedy action tensor([-1.8170, -0.3748, 0.6110, -0.0996]) tensor([0.0452, 0.1911, 0.5121, 0.2516]) -Greedy action tensor([-1.6473, -0.3997, 0.7041, -0.0035]) tensor([0.0496, 0.1727, 0.5209, 0.2567]) -Greedy action tensor([-0.9061, -0.0343, 0.1446, 0.0293]) tensor([0.1136, 0.2718, 0.3250, 0.2896]) -Greedy action tensor([-1.3335, 0.0386, 0.3241, 0.0272]) tensor([0.0710, 0.2799, 0.3724, 0.2767]) -Greedy action tensor([-1.9111, -0.4301, 0.6478, -0.1644]) tensor([0.0416, 0.1828, 0.5372, 0.2384]) -Greedy action tensor([-1.7757, -0.1747, 0.5889, -0.0795]) tensor([0.0454, 0.2248, 0.4825, 0.2473]) -Greedy action tensor([-1.3418, -0.1146, 0.4400, -0.5398]) tensor([0.0795, 0.2712, 0.4721, 0.1772]) -Greedy action tensor([-1.7407, 0.1068, 0.6089, -0.3437]) tensor([0.0457, 0.2901, 0.4793, 0.1849]) -Greedy action tensor([-1.7526, -0.2355, 0.5769, -0.0116]) tensor([0.0464, 0.2117, 0.4770, 0.2648]) -Greedy action tensor([-1.7308, -0.4810, 0.5368, -0.0464]) tensor([0.0512, 0.1786, 0.4943, 0.2759]) -Greedy action tensor([-1.9149, -0.3287, 0.6374, -0.1538]) tensor([0.0408, 0.1991, 0.5231, 0.2371]) -Greedy action tensor([-1.8809, -0.3552, 0.6248, -0.1468]) tensor([0.0425, 0.1956, 0.5211, 0.2409]) -Greedy action tensor([-1.9378, -0.4525, 0.6747, -0.1732]) tensor([0.0402, 0.1774, 0.5478, 0.2346]) -Greedy action tensor([-1.9072, -0.4026, 0.6487, -0.1565]) tensor([0.0414, 0.1865, 0.5336, 0.2385]) -Greedy action tensor([-0.2315, 0.1730, 0.6539, 0.5641]) tensor([0.1401, 0.2099, 0.3396, 0.3104]) -Greedy action tensor([-1.7276, -0.4218, 0.5690, -0.0649]) tensor([0.0502, 0.1854, 0.4994, 0.2650]) -Greedy action tensor([-1.9404, -0.4551, 0.6710, -0.1749]) tensor([0.0402, 0.1775, 0.5474, 0.2349]) -Greedy action tensor([-1.5709, 0.2358, 0.6020, -0.6480]) tensor([0.0544, 0.3312, 0.4776, 0.1368]) -Greedy action tensor([-1.7124, -0.5259, 0.5351, -0.0550]) tensor([0.0527, 0.1725, 0.4985, 0.2763]) -Greedy action tensor([-1.7248, -0.3421, 0.6499, 0.1013]) tensor([0.0456, 0.1816, 0.4898, 0.2830]) -Greedy action tensor([-1.6620, -0.2415, 0.1742, -0.5615]) tensor([0.0694, 0.2871, 0.4351, 0.2085]) -Greedy action tensor([-1.9248, -0.4325, 0.6576, -0.1686]) tensor([0.0409, 0.1818, 0.5407, 0.2367]) -Greedy action tensor([-0.3194, -0.2915, 0.0041, 0.4887]) tensor([0.1769, 0.1819, 0.2444, 0.3968]) -Greedy action tensor([-1.8649, -0.4209, 0.6250, -0.1451]) tensor([0.0437, 0.1852, 0.5271, 0.2440]) -Greedy action tensor([ 1.3801, -2.4349, 0.9294, 0.1516]) tensor([0.5123, 0.0113, 0.3264, 0.1500]) -Greedy action tensor([-0.1037, -0.8466, -0.6467, 0.6112]) tensor([0.2439, 0.1160, 0.1417, 0.4984]) -Greedy action tensor([-0.8396, -0.9380, 0.1848, -1.0626]) tensor([0.1821, 0.1650, 0.5072, 0.1457]) -Greedy action tensor([-0.2069, -1.2828, 0.1492, -0.2026]) tensor([0.2650, 0.0904, 0.3784, 0.2662]) -Greedy action tensor([-0.7523, -0.3565, 0.5571, -0.8880]) tensor([0.1416, 0.2103, 0.5244, 0.1236]) -Greedy action tensor([-0.8463, -0.6154, 0.7887, -0.9555]) tensor([0.1207, 0.1520, 0.6191, 0.1082]) -Greedy action tensor([ 0.3461, -1.4267, 0.0316, 0.4877]) tensor([0.3277, 0.0557, 0.2392, 0.3775]) -Greedy action tensor([-0.7400, -0.6997, -0.5834, -0.0305]) tensor([0.1907, 0.1985, 0.2230, 0.3877]) -Greedy action tensor([-0.1059, -0.8531, 0.2669, -0.7862]) tensor([0.2914, 0.1380, 0.4230, 0.1476]) -Greedy action tensor([ 0.9213, -0.7208, -0.4598, 0.0044]) tensor([0.5421, 0.1049, 0.1362, 0.2167]) -Greedy action tensor([-0.5730, -0.4820, 0.2393, -0.4470]) tensor([0.1824, 0.1998, 0.4109, 0.2069]) -Greedy action tensor([-0.8281, 0.1164, -1.0912, -0.2674]) tensor([0.1642, 0.4221, 0.1262, 0.2876]) -Greedy action tensor([-0.2340, -0.8008, -0.1451, 0.2011]) tensor([0.2378, 0.1349, 0.2599, 0.3674]) -Greedy action tensor([ 0.6798, 0.0433, 0.7305, -0.6454]) tensor([0.3513, 0.1859, 0.3695, 0.0933]) -Greedy action tensor([ 0.3676, -0.4931, -0.6019, 0.0141]) tensor([0.3993, 0.1689, 0.1514, 0.2804]) -Greedy action tensor([-0.2055, 0.3630, -0.7413, -0.3185]) tensor([0.2356, 0.4160, 0.1379, 0.2105]) -Greedy action tensor([-0.6293, -0.9433, -0.2277, -0.5241]) tensor([0.2306, 0.1685, 0.3446, 0.2562]) -Greedy action tensor([ 2.2349, -0.9128, 0.9007, 0.8810]) tensor([0.6392, 0.0275, 0.1683, 0.1651]) -Greedy action tensor([-0.4721, 0.1363, 0.3373, -0.8780]) tensor([0.1739, 0.3195, 0.3907, 0.1159]) -Greedy action tensor([-0.8656, -0.4664, -0.4599, 0.0705]) tensor([0.1529, 0.2279, 0.2294, 0.3898]) -Greedy action tensor([-0.2496, -0.9116, -0.3723, -0.9534]) tensor([0.3454, 0.1782, 0.3055, 0.1709]) -Greedy action tensor([-0.2109, -0.0499, -0.2800, -0.5307]) tensor([0.2608, 0.3064, 0.2434, 0.1894]) -Greedy action tensor([-0.9024, -0.6182, 0.4919, -0.9066]) tensor([0.1359, 0.1806, 0.5481, 0.1354]) -Greedy action tensor([ 1.6087, -1.1512, 0.6526, 0.3379]) tensor([0.5786, 0.0366, 0.2224, 0.1624]) -Greedy action tensor([-1.5678, -0.7727, 0.2465, -0.9957]) tensor([0.0899, 0.1991, 0.5517, 0.1593]) -Greedy action tensor([ 1.0428, -0.3569, 0.0672, 0.0498]) tensor([0.5015, 0.1237, 0.1890, 0.1858]) -Greedy action tensor([ 0.2796, 0.1076, -0.3748, 0.5129]) tensor([0.2759, 0.2323, 0.1434, 0.3484]) -Greedy action tensor([ 0.1048, -1.2456, 0.0029, -0.0534]) tensor([0.3316, 0.0859, 0.2994, 0.2831]) -Greedy action tensor([ 0.4190, -0.3481, 0.9737, 0.3464]) tensor([0.2418, 0.1123, 0.4211, 0.2249]) -Greedy action tensor([ 0.4678, 0.4661, -0.2219, -0.3648]) tensor([0.3407, 0.3401, 0.1709, 0.1482]) -Greedy action tensor([-0.6704, -1.4406, 0.1106, -0.6253]) tensor([0.2131, 0.0986, 0.4653, 0.2229]) -Greedy action tensor([ 1.1209, 0.6992, -0.0820, 0.5083]) tensor([0.4003, 0.2626, 0.1202, 0.2169]) -Greedy action tensor([-0.3820, 0.0181, -0.8879, -0.2223]) tensor([0.2343, 0.3496, 0.1413, 0.2749]) -Greedy action tensor([-0.2034, -1.3045, -0.4694, -0.6404]) tensor([0.3643, 0.1211, 0.2792, 0.2353]) -Greedy action tensor([-0.0329, -1.6935, -0.1200, -0.2212]) tensor([0.3407, 0.0647, 0.3123, 0.2822]) -Greedy action tensor([-0.3285, -1.7514, 1.1425, -1.1973]) tensor([0.1663, 0.0401, 0.7239, 0.0698]) -Greedy action tensor([-0.7417, -1.1806, 1.6820, -1.0405]) tensor([0.0731, 0.0472, 0.8255, 0.0542]) -Greedy action tensor([-0.0912, -1.0508, -0.1207, -0.6909]) tensor([0.3445, 0.1320, 0.3345, 0.1891]) -Greedy action tensor([-0.1999, -1.6155, 0.2166, -0.7365]) tensor([0.2990, 0.0726, 0.4535, 0.1749]) -Greedy action tensor([-0.7321, -0.7710, -0.2926, -1.1348]) tensor([0.2391, 0.2300, 0.3711, 0.1598]) -Greedy action tensor([-0.1643, 0.1226, -0.1892, -1.0523]) tensor([0.2689, 0.3582, 0.2623, 0.1106]) -Greedy action tensor([-1.1962, -0.5222, 0.5966, -1.4790]) tensor([0.1029, 0.2018, 0.6178, 0.0775]) -Greedy action tensor([ 0.0852, -0.4275, 0.2876, -0.5295]) tensor([0.2973, 0.1780, 0.3639, 0.1608]) -Greedy action tensor([ 0.2195, 0.2827, 0.9328, -0.3796]) tensor([0.2148, 0.2288, 0.4384, 0.1180]) -Greedy action tensor([ 0.7229, -0.0857, 0.2197, 0.2466]) tensor([0.3744, 0.1668, 0.2263, 0.2325]) -Greedy action tensor([-0.2521, -0.1359, -1.4492, 0.6564]) tensor([0.2038, 0.2290, 0.0616, 0.5056]) -Greedy action tensor([-0.6456, -0.9067, -0.8299, 0.6711]) tensor([0.1579, 0.1216, 0.1313, 0.5891]) -Greedy action tensor([ 0.5507, 0.0401, -1.0796, -0.4507]) tensor([0.4622, 0.2774, 0.0905, 0.1698]) -Greedy action tensor([ 0.7585, -0.6544, 0.5747, -0.0943]) tensor([0.3997, 0.0973, 0.3326, 0.1704]) -Greedy action tensor([ 1.3961, -0.3387, 0.9071, 1.0198]) tensor([0.4039, 0.0713, 0.2477, 0.2772]) -Greedy action tensor([ 0.3894, 0.7881, 0.7344, -0.6127]) tensor([0.2342, 0.3490, 0.3307, 0.0860]) -Greedy action tensor([ 0.1642, -0.5396, 1.0725, 0.2494]) tensor([0.1975, 0.0977, 0.4898, 0.2150]) -Greedy action tensor([ 0.6728, 0.4201, -0.4460, -0.4734]) tensor([0.4130, 0.3208, 0.1349, 0.1313]) -Greedy action tensor([ 0.2782, 0.9210, 0.4787, -0.2286]) tensor([0.2116, 0.4024, 0.2586, 0.1275]) -Greedy action tensor([-0.9487, -1.4937, -0.8235, 0.0683]) tensor([0.1825, 0.1058, 0.2069, 0.5047]) -Greedy action tensor([ 0.2105, -0.2055, -0.3872, -0.9919]) tensor([0.3984, 0.2628, 0.2191, 0.1197]) -Greedy action tensor([ 0.2883, -1.0980, -0.1744, -0.2857]) tensor([0.4094, 0.1023, 0.2577, 0.2306]) -Greedy action tensor([-0.3144, -0.7997, 0.2028, 1.1118]) tensor([0.1341, 0.0826, 0.2250, 0.5584]) -Greedy action tensor([ 0.6787, -1.1702, -0.6815, 0.1907]) tensor([0.4931, 0.0776, 0.1265, 0.3027]) -Greedy action tensor([ 0.1232, -0.1007, 0.0076, 0.0335]) tensor([0.2774, 0.2218, 0.2471, 0.2536]) -Greedy action tensor([-1.0356, -0.1219, -0.4507, -0.8927]) tensor([0.1552, 0.3871, 0.2786, 0.1791]) -Greedy action tensor([-0.7360, -0.2158, -0.0611, -0.8986]) tensor([0.1819, 0.3061, 0.3573, 0.1546]) -Greedy action tensor([ 1.1146e+00, -3.0611e-03, 5.7618e-01, -5.9462e-04]) tensor([0.4467, 0.1461, 0.2607, 0.1465]) -Greedy action tensor([-0.3121, -0.2275, -0.0096, -0.1207]) tensor([0.2149, 0.2339, 0.2909, 0.2603]) -Greedy action tensor([-0.0899, -0.2632, -1.2278, -0.4899]) tensor([0.3532, 0.2970, 0.1132, 0.2367]) -Greedy action tensor([-0.1712, -0.9243, 0.5598, -0.8411]) tensor([0.2463, 0.1160, 0.5116, 0.1261]) -Greedy action tensor([ 0.6327, -0.1163, 0.0710, -0.2299]) tensor([0.4057, 0.1918, 0.2313, 0.1712]) -Greedy action tensor([-0.1602, 0.3479, 0.0438, -0.2648]) tensor([0.2088, 0.3471, 0.2561, 0.1881]) -Greedy action tensor([-0.0229, 0.1670, -0.9784, -0.5027]) tensor([0.3113, 0.3763, 0.1197, 0.1926]) -Greedy action tensor([-0.0312, 0.0619, -0.3158, -0.0799]) tensor([0.2630, 0.2887, 0.1979, 0.2505]) -Greedy action tensor([-0.2171, -0.9347, -0.0496, -0.3599]) tensor([0.2827, 0.1379, 0.3343, 0.2451]) -Greedy action tensor([-0.2242, -1.3350, -0.1873, -0.3748]) tensor([0.3099, 0.1020, 0.3215, 0.2665]) -Greedy action tensor([-0.6766, -0.1896, -0.1350, -0.7313]) tensor([0.1889, 0.3075, 0.3247, 0.1789]) -Greedy action tensor([-0.1346, -1.1067, 0.6554, -0.1257]) tensor([0.2178, 0.0824, 0.4800, 0.2198]) -Greedy action tensor([-0.1613, -2.2068, -0.2656, 0.5862]) tensor([0.2414, 0.0312, 0.2175, 0.5098]) -Greedy action tensor([-1.5432, -1.6036, -0.1932, -0.8915]) tensor([0.1296, 0.1220, 0.4998, 0.2486]) -Greedy action tensor([-0.0416, 0.8354, 0.5129, -0.6266]) tensor([0.1754, 0.4216, 0.3053, 0.0977]) -Greedy action tensor([-0.8707, 0.0837, 0.0786, -0.0908]) tensor([0.1196, 0.3106, 0.3090, 0.2608]) -Greedy action tensor([ 1.3767, -0.8707, 0.3121, 0.0794]) tensor([0.5801, 0.0613, 0.2001, 0.1585]) -Greedy action tensor([ 0.3289, -1.3986, 0.5405, -0.1499]) tensor([0.3297, 0.0586, 0.4074, 0.2043]) -Greedy action tensor([-0.2210, -0.7281, 0.1447, -0.2518]) tensor([0.2492, 0.1501, 0.3592, 0.2416]) -Greedy action tensor([ 1.4971, -0.8273, -0.4304, 0.3987]) tensor([0.6342, 0.0621, 0.0923, 0.2115]) -Greedy action tensor([ 1.2899, 0.0045, -0.9016, 0.6124]) tensor([0.5274, 0.1458, 0.0589, 0.2678]) -Greedy action tensor([ 1.7048, -0.8267, -0.6223, 0.2516]) tensor([0.7088, 0.0564, 0.0692, 0.1657]) -Greedy action tensor([ 1.2560, -0.1099, -0.7114, -0.1261]) tensor([0.6075, 0.1550, 0.0849, 0.1525]) -Greedy action tensor([ 1.1948, -0.3705, -0.5935, 0.4809]) tensor([0.5359, 0.1120, 0.0896, 0.2625]) -Greedy action tensor([ 1.6889, -0.1958, -0.6816, 0.0784]) tensor([0.6920, 0.1051, 0.0647, 0.1383]) -Greedy action tensor([ 2.2555, 0.8223, -0.0259, 0.2643]) tensor([0.6769, 0.1615, 0.0691, 0.0924]) -Greedy action tensor([ 1.0388, -0.3495, -0.1278, 0.2622]) tensor([0.4948, 0.1235, 0.1541, 0.2276]) -Greedy action tensor([ 1.8138, 0.7416, -0.4469, -0.2351]) tensor([0.6348, 0.2173, 0.0662, 0.0818]) -Greedy action tensor([ 1.4770, -0.2775, -0.6243, 0.2357]) tensor([0.6312, 0.1092, 0.0772, 0.1824]) -Greedy action tensor([ 1.5757, 0.1307, -0.5755, 0.1772]) tensor([0.6254, 0.1474, 0.0728, 0.1544]) -Greedy action tensor([ 1.9306, -0.9066, -0.3520, 0.3304]) tensor([0.7340, 0.0430, 0.0749, 0.1482]) -Greedy action tensor([ 1.8920, -0.3717, -0.8234, 0.1634]) tensor([0.7420, 0.0771, 0.0491, 0.1317]) -Greedy action tensor([ 1.1402, -0.3643, -0.4569, 0.1432]) tensor([0.5575, 0.1238, 0.1129, 0.2057]) -Greedy action tensor([ 1.1795, -0.2894, -0.7769, 0.1562]) tensor([0.5777, 0.1330, 0.0817, 0.2076]) -Greedy action tensor([ 1.5982, -0.4685, -0.2113, 0.4387]) tensor([0.6235, 0.0789, 0.1021, 0.1955]) -Greedy action tensor([ 2.1741, -0.7723, -0.2697, 0.0964]) tensor([0.7908, 0.0415, 0.0687, 0.0990]) -Greedy action tensor([ 1.5478, -0.6343, -0.8573, 0.1293]) tensor([0.6920, 0.0781, 0.0625, 0.1675]) -Greedy action tensor([ 1.0744, -0.2474, -1.0281, 0.1834]) tensor([0.5558, 0.1482, 0.0679, 0.2280]) -Greedy action tensor([ 1.2355, -0.2544, -0.0860, 0.2487]) tensor([0.5362, 0.1209, 0.1430, 0.1999]) -Greedy action tensor([ 1.8095, -0.4444, -0.7685, 0.0637]) tensor([0.7378, 0.0775, 0.0560, 0.1288]) -Greedy action tensor([ 2.8313, 0.8890, -0.1586, -0.0587]) tensor([0.8005, 0.1148, 0.0403, 0.0445]) -Greedy action tensor([ 1.0075, -0.6381, -0.7628, 0.4217]) tensor([0.5209, 0.1005, 0.0887, 0.2900]) -Greedy action tensor([ 1.2034, 0.0487, -0.4240, 0.1209]) tensor([0.5404, 0.1703, 0.1062, 0.1831]) -Greedy action tensor([ 0.7563, -0.0201, 0.0192, -0.1310]) tensor([0.4255, 0.1957, 0.2036, 0.1752]) -Greedy action tensor([ 2.1932, -1.3890, 0.0149, 0.4046]) tensor([0.7644, 0.0213, 0.0866, 0.1278]) -Greedy action tensor([ 2.0099, -0.8452, 0.1289, 0.9578]) tensor([0.6414, 0.0369, 0.0978, 0.2240]) -Greedy action tensor([ 1.0584, -0.2394, -0.0755, 0.2028]) tensor([0.4951, 0.1352, 0.1593, 0.2104]) -Greedy action tensor([ 2.1715, -0.4789, 0.2251, 0.4402]) tensor([0.7192, 0.0508, 0.1027, 0.1273]) -Greedy action tensor([ 1.5627, -0.8622, -0.2880, 0.6842]) tensor([0.6020, 0.0533, 0.0946, 0.2501]) -Greedy action tensor([ 1.0819, -0.3748, -0.8958, 0.2281]) tensor([0.5564, 0.1296, 0.0770, 0.2369]) -Greedy action tensor([ 1.3014, -0.3466, -0.8918, 0.7913]) tensor([0.5251, 0.1010, 0.0586, 0.3153]) -Greedy action tensor([ 1.0198, -0.4170, -0.6372, 0.3077]) tensor([0.5211, 0.1239, 0.0994, 0.2557]) -Greedy action tensor([ 2.1940, -0.9617, -0.1977, 0.3617]) tensor([0.7727, 0.0329, 0.0707, 0.1237]) -Greedy action tensor([ 1.7598, -0.2190, -0.2527, -0.0070]) tensor([0.6931, 0.0958, 0.0926, 0.1184]) -Greedy action tensor([ 1.8501, -0.6032, -0.3635, 0.6145]) tensor([0.6730, 0.0579, 0.0736, 0.1956]) -Greedy action tensor([ 1.4447, -0.5634, -0.9910, 0.2823]) tensor([0.6517, 0.0875, 0.0570, 0.2038]) -Greedy action tensor([ 1.5681, -0.6379, -0.1644, 0.2779]) tensor([0.6401, 0.0705, 0.1132, 0.1762]) -Greedy action tensor([ 1.7913, -0.7204, -0.7176, 0.8940]) tensor([0.6369, 0.0517, 0.0518, 0.2596]) -Greedy action tensor([ 1.8069, 0.2470, 0.0321, -0.4518]) tensor([0.6738, 0.1416, 0.1142, 0.0704]) -Greedy action tensor([ 1.2759, -0.2939, -0.2158, 0.5342]) tensor([0.5237, 0.1090, 0.1178, 0.2495]) -Greedy action tensor([ 1.8880, -0.7691, -0.3369, 0.3075]) tensor([0.7225, 0.0507, 0.0781, 0.1487]) -Greedy action tensor([ 1.6365, -0.6628, -0.3962, 0.2505]) tensor([0.6750, 0.0677, 0.0884, 0.1688]) -Greedy action tensor([ 2.0205, -0.8236, 0.0262, 0.8169]) tensor([0.6692, 0.0389, 0.0911, 0.2008]) -Greedy action tensor([ 1.7205, -1.0306, 0.0458, 0.5543]) tensor([0.6399, 0.0409, 0.1199, 0.1994]) -Greedy action tensor([ 1.3231, -0.5348, -0.6568, 0.0041]) tensor([0.6404, 0.0999, 0.0884, 0.1712]) -Greedy action tensor([ 1.8053, -0.5313, -0.1808, -0.0601]) tensor([0.7201, 0.0696, 0.0988, 0.1115]) -Greedy action tensor([ 1.7277, 0.1704, -0.2578, 0.1245]) tensor([0.6455, 0.1360, 0.0886, 0.1299]) -Greedy action tensor([ 1.2373, -0.1731, -0.4808, -0.1123]) tensor([0.5942, 0.1450, 0.1066, 0.1541]) -Greedy action tensor([ 1.1853, -0.4071, -0.4763, 0.1737]) tensor([0.5692, 0.1158, 0.1081, 0.2070]) -Greedy action tensor([ 1.3820, -0.5544, -0.2563, 0.0687]) tensor([0.6221, 0.0897, 0.1209, 0.1673]) -Greedy action tensor([ 1.2575, -0.6733, -0.3017, 0.4820]) tensor([0.5507, 0.0799, 0.1158, 0.2536]) -Greedy action tensor([ 1.5437, 0.0693, -0.8266, 0.2316]) tensor([0.6283, 0.1438, 0.0587, 0.1692]) -Greedy action tensor([ 1.9060, -0.9786, -0.4951, 0.1720]) tensor([0.7558, 0.0422, 0.0685, 0.1335]) -Greedy action tensor([ 1.3799, 0.1467, -1.5562, -0.1205]) tensor([0.6380, 0.1859, 0.0339, 0.1423]) -Greedy action tensor([ 1.4599, 0.1890, -0.3399, -0.3213]) tensor([0.6194, 0.1738, 0.1024, 0.1043]) -Greedy action tensor([ 1.8337, -0.5281, -0.3435, 0.0576]) tensor([0.7263, 0.0684, 0.0823, 0.1230]) -Greedy action tensor([ 1.1609, -0.3501, -0.1341, 0.0019]) tensor([0.5530, 0.1220, 0.1515, 0.1735]) -Greedy action tensor([ 1.8297, 0.2962, -0.4415, -0.0375]) tensor([0.6786, 0.1464, 0.0700, 0.1049]) -Greedy action tensor([ 1.4769, -0.4132, -0.4663, 0.0607]) tensor([0.6507, 0.0983, 0.0932, 0.1579]) -Greedy action tensor([ 0.9205, -0.4910, 0.1615, 0.1582]) tensor([0.4590, 0.1119, 0.2149, 0.2142]) -Greedy action tensor([ 1.3579, -0.1983, -0.6416, 0.6450]) tensor([0.5445, 0.1149, 0.0737, 0.2669]) -Greedy action tensor([ 1.6867, -0.9898, -0.6147, 0.5494]) tensor([0.6713, 0.0462, 0.0672, 0.2153]) -Greedy action tensor([ 0.9890, -0.2594, -1.0692, 0.6547]) tensor([0.4694, 0.1347, 0.0599, 0.3360]) -Greedy action tensor([ 0.9526, -0.2653, -0.2222, 0.5632]) tensor([0.4382, 0.1296, 0.1353, 0.2969]) -Greedy action tensor([ 1.4753, -0.5630, -0.0366, 0.0748]) tensor([0.6261, 0.0815, 0.1381, 0.1543]) -Greedy action tensor([ 1.8689, -0.2909, -0.4954, 0.1886]) tensor([0.7165, 0.0826, 0.0674, 0.1335]) -Greedy action tensor([ 2.0682, -0.9463, -0.4152, 0.2612]) tensor([0.7712, 0.0378, 0.0644, 0.1266]) -Greedy action tensor([ 1.6549, -0.2134, -0.7834, 0.3396]) tensor([0.6622, 0.1022, 0.0578, 0.1777]) -Greedy action tensor([ 0.7502, -0.6614, -0.1529, -0.0629]) tensor([0.4779, 0.1165, 0.1937, 0.2119]) -Greedy action tensor([ 2.2174, -1.2244, 0.1691, 0.1237]) tensor([0.7787, 0.0249, 0.1004, 0.0960]) -Greedy action tensor([ 1.2872, -0.0970, -0.3758, 0.1341]) tensor([0.5696, 0.1427, 0.1080, 0.1798]) -Greedy action tensor([ 1.9848, -0.6960, -0.4024, 0.3621]) tensor([0.7365, 0.0505, 0.0677, 0.1454]) -Greedy action tensor([ 1.3625, -0.4579, -0.5544, 0.3642]) tensor([0.5961, 0.0965, 0.0877, 0.2197]) -Greedy action tensor([ 1.3422, -0.3938, -1.0819, 0.2597]) tensor([0.6236, 0.1099, 0.0552, 0.2113]) -Greedy action tensor([ 1.3648, -0.3492, -1.5088, 0.4184]) tensor([0.6155, 0.1109, 0.0348, 0.2389]) -Greedy action tensor([ 1.5604, -0.1681, -1.0445, 0.0373]) tensor([0.6805, 0.1208, 0.0503, 0.1484]) -Greedy action tensor([ 2.0420, -0.7972, -0.3366, 0.5948]) tensor([0.7213, 0.0422, 0.0669, 0.1697]) -Greedy action tensor([ 1.0452, -0.4442, -0.2212, 0.2874]) tensor([0.5061, 0.1141, 0.1426, 0.2372]) -Greedy action tensor([ 2.3218, -0.8135, -0.1805, -0.0629]) tensor([0.8213, 0.0357, 0.0673, 0.0757]) -Greedy action tensor([ 1.2713, -0.0675, -0.8959, 0.2141]) tensor([0.5800, 0.1521, 0.0664, 0.2015]) -Greedy action tensor([ 0.4671, -0.3202, 0.0671, -0.3985]) tensor([0.3927, 0.1787, 0.2633, 0.1653]) -Greedy action tensor([ 0.6734, -0.3554, -0.0120, -0.1957]) tensor([0.4385, 0.1567, 0.2209, 0.1839]) -Greedy action tensor([ 0.7145, -0.5204, -0.0571, -0.3128]) tensor([0.4737, 0.1378, 0.2190, 0.1696]) -Greedy action tensor([ 1.1767, -0.9314, -0.0164, -0.5933]) tensor([0.6269, 0.0761, 0.1901, 0.1068]) -Greedy action tensor([ 1.1937, -0.6601, -0.1239, -0.5754]) tensor([0.6270, 0.0982, 0.1679, 0.1069]) -Greedy action tensor([ 0.8531, -0.3433, 0.2290, -0.4784]) tensor([0.4757, 0.1438, 0.2549, 0.1256]) -Greedy action tensor([ 0.8471, -0.6396, 0.0281, -0.5130]) tensor([0.5199, 0.1175, 0.2292, 0.1334]) -Greedy action tensor([ 0.7492, -0.1612, -0.1371, -0.2239]) tensor([0.4561, 0.1835, 0.1880, 0.1724]) -Greedy action tensor([ 1.0567, -0.8731, 0.0323, -0.6249]) tensor([0.5916, 0.0859, 0.2124, 0.1101]) -Greedy action tensor([ 0.6643, -0.4858, -0.0058, -0.4272]) tensor([0.4621, 0.1463, 0.2364, 0.1551]) -Greedy action tensor([ 0.6691, -0.5771, -0.0440, -0.1695]) tensor([0.4525, 0.1301, 0.2218, 0.1956]) -Greedy action tensor([ 0.6707, -0.4418, -0.0990, -0.0814]) tensor([0.4418, 0.1453, 0.2046, 0.2083]) -Greedy action tensor([ 0.5262, -0.0741, -0.0029, -0.1675]) tensor([0.3791, 0.2080, 0.2234, 0.1895]) -Greedy action tensor([ 0.8415, -0.4148, 0.0375, -0.3517]) tensor([0.4913, 0.1399, 0.2199, 0.1490]) -Greedy action tensor([ 0.9906, -0.5701, -0.1667, -0.5481]) tensor([0.5751, 0.1208, 0.1808, 0.1234]) -Greedy action tensor([ 0.7499, -0.4447, -0.0542, -0.2173]) tensor([0.4694, 0.1421, 0.2100, 0.1784]) -Greedy action tensor([ 0.5008, -0.3743, 0.0583, -0.2560]) tensor([0.3955, 0.1649, 0.2541, 0.1856]) -Greedy action tensor([ 0.9787, -0.4295, -0.0189, -0.3959]) tensor([0.5358, 0.1311, 0.1976, 0.1355]) -Greedy action tensor([ 0.9709, -0.1995, -0.0712, -0.1834]) tensor([0.5055, 0.1568, 0.1783, 0.1594]) -Greedy action tensor([ 1.3347, -0.9726, 0.0537, -0.7027]) tensor([0.6633, 0.0660, 0.1842, 0.0865]) -Greedy action tensor([ 0.9815, -0.7273, 0.2382, -0.5357]) tensor([0.5331, 0.0965, 0.2535, 0.1169]) -Greedy action tensor([ 0.8194, -0.7260, -0.0838, -0.3243]) tensor([0.5162, 0.1101, 0.2092, 0.1645]) -Greedy action tensor([ 0.5563, -0.3842, -0.0615, -0.2400]) tensor([0.4201, 0.1640, 0.2265, 0.1894]) -Greedy action tensor([ 0.4295, -0.3607, -0.0584, -0.0186]) tensor([0.3695, 0.1676, 0.2268, 0.2360]) -Greedy action tensor([ 0.7687, -0.4214, -0.0556, -0.1829]) tensor([0.4697, 0.1429, 0.2060, 0.1814]) -Greedy action tensor([ 0.8487, -0.7998, 0.1820, -0.3395]) tensor([0.4974, 0.0957, 0.2554, 0.1516]) -Greedy action tensor([ 0.7077, -0.6587, 0.1001, -0.1791]) tensor([0.4522, 0.1153, 0.2463, 0.1863]) -Greedy action tensor([ 1.3202, -0.7999, -0.0893, -0.6217]) tensor([0.6633, 0.0796, 0.1620, 0.0951]) -Greedy action tensor([ 0.7806, -0.6876, 0.2143, -0.4052]) tensor([0.4754, 0.1095, 0.2698, 0.1452]) -Greedy action tensor([ 0.6219, -0.1366, 0.1040, -0.0525]) tensor([0.3886, 0.1820, 0.2315, 0.1979]) -Greedy action tensor([ 1.0977, -0.7047, -0.0845, -0.6066]) tensor([0.6048, 0.0997, 0.1854, 0.1100]) -Greedy action tensor([ 0.5699, -0.1097, -0.0175, -0.0008]) tensor([0.3806, 0.1929, 0.2115, 0.2151]) -Greedy action tensor([ 0.8943, -0.7250, 0.0318, -0.2645]) tensor([0.5171, 0.1024, 0.2183, 0.1623]) -Greedy action tensor([ 0.6216, 0.2887, 0.1097, -0.5281]) tensor([0.3798, 0.2723, 0.2276, 0.1203]) -Greedy action tensor([ 1.1462, -0.6796, 0.0358, -0.5689]) tensor([0.5987, 0.0964, 0.1972, 0.1077]) -Greedy action tensor([ 0.5630, -0.3419, -0.1496, -0.2226]) tensor([0.4254, 0.1721, 0.2086, 0.1939]) -Greedy action tensor([ 1.0613, -0.8505, -0.0071, -0.5234]) tensor([0.5895, 0.0871, 0.2025, 0.1209]) -Greedy action tensor([ 0.3408, -0.3301, -0.1345, -0.0352]) tensor([0.3547, 0.1813, 0.2205, 0.2435]) -Greedy action tensor([ 0.5233, 0.1575, -0.1486, 0.1201]) tensor([0.3481, 0.2415, 0.1778, 0.2326]) -Greedy action tensor([ 0.5264, -0.2003, -0.0289, -0.1650]) tensor([0.3909, 0.1890, 0.2243, 0.1958]) -Greedy action tensor([ 0.9968, -0.5317, 0.0382, -0.3392]) tensor([0.5367, 0.1164, 0.2058, 0.1411]) -Greedy action tensor([ 0.4846, -0.0255, 0.0184, -0.4120]) tensor([0.3794, 0.2278, 0.2380, 0.1548]) -Greedy action tensor([ 0.5426, -0.6236, -0.0593, -0.2617]) tensor([0.4335, 0.1351, 0.2375, 0.1939]) -Greedy action tensor([ 0.6560, -0.0227, -0.0951, 0.0054]) tensor([0.3999, 0.2028, 0.1887, 0.2086]) -Greedy action tensor([ 0.3659, -0.0828, -0.0433, -0.3421]) tensor([0.3577, 0.2284, 0.2376, 0.1762]) -Greedy action tensor([ 0.4803, -0.1107, 0.0366, -0.2004]) tensor([0.3701, 0.2050, 0.2375, 0.1874]) -Greedy action tensor([ 0.8475, -0.7899, 0.0557, -0.7142]) tensor([0.5384, 0.1047, 0.2439, 0.1129]) -Greedy action tensor([ 0.5720, -0.1881, -0.0305, -0.1471]) tensor([0.3996, 0.1869, 0.2188, 0.1947]) -Greedy action tensor([ 0.7953, -0.4693, 0.1119, -0.1293]) tensor([0.4579, 0.1293, 0.2312, 0.1816]) -Greedy action tensor([ 1.1589, -0.7181, -0.0987, -0.6704]) tensor([0.6258, 0.0958, 0.1779, 0.1005]) -Greedy action tensor([ 1.0736, -0.8348, 0.0869, -0.5271]) tensor([0.5804, 0.0861, 0.2164, 0.1171]) -Greedy action tensor([ 0.5500, -0.1296, -0.0758, -0.0637]) tensor([0.3872, 0.1962, 0.2071, 0.2096]) -Greedy action tensor([ 0.8679, -0.3956, -0.0455, -0.1533]) tensor([0.4893, 0.1383, 0.1963, 0.1762]) -Greedy action tensor([ 0.7046, -0.5209, -0.0179, -0.2029]) tensor([0.4582, 0.1345, 0.2224, 0.1849]) -Greedy action tensor([ 0.4731, 0.1944, -0.1587, 0.1064]) tensor([0.3354, 0.2538, 0.1783, 0.2324]) -Greedy action tensor([ 0.5395, -0.2391, 0.0133, -0.2283]) tensor([0.3978, 0.1826, 0.2350, 0.1846]) -Greedy action tensor([ 1.1244, -0.9384, 0.0283, -0.6204]) tensor([0.6113, 0.0777, 0.2043, 0.1068]) -Greedy action tensor([ 0.6842, -0.4659, -0.0786, 0.0294]) tensor([0.4343, 0.1375, 0.2025, 0.2257]) -Greedy action tensor([ 0.6737, -0.1168, 0.0366, -0.1088]) tensor([0.4099, 0.1859, 0.2168, 0.1874]) -Greedy action tensor([ 0.8436, -0.6282, -0.0377, -0.5334]) tensor([0.5274, 0.1210, 0.2185, 0.1331]) -Greedy action tensor([ 0.5146, -0.2724, -0.1476, -0.0853]) tensor([0.3969, 0.1807, 0.2047, 0.2178]) -Greedy action tensor([ 0.6528, -0.4782, 0.0558, -0.2810]) tensor([0.4413, 0.1424, 0.2429, 0.1734]) -Greedy action tensor([ 0.3458, 0.2509, -0.0402, -0.2608]) tensor([0.3190, 0.2902, 0.2169, 0.1739]) -Greedy action tensor([ 0.7202, -0.4714, -0.1790, -0.4735]) tensor([0.4966, 0.1508, 0.2021, 0.1505]) -Greedy action tensor([ 0.4614, -0.2397, -0.0452, -0.2026]) tensor([0.3826, 0.1898, 0.2306, 0.1970]) -Greedy action tensor([ 0.7943, 0.0133, 0.0371, -0.1208]) tensor([0.4297, 0.1968, 0.2015, 0.1721]) -Greedy action tensor([ 1.3184, -0.7255, -0.1068, -1.0543]) tensor([0.6834, 0.0885, 0.1643, 0.0637]) -Greedy action tensor([ 0.2460, 0.4171, -0.0811, 0.1431]) tensor([0.2625, 0.3115, 0.1893, 0.2368]) -Greedy action tensor([ 0.7669, -0.5450, -0.0240, -0.3729]) tensor([0.4896, 0.1318, 0.2220, 0.1566]) -Greedy action tensor([ 0.7857, 0.1571, -0.0885, 0.1848]) tensor([0.4002, 0.2134, 0.1670, 0.2194]) -Greedy action tensor([ 0.8780, -0.4520, -0.0792, -0.3184]) tensor([0.5126, 0.1356, 0.1968, 0.1550]) -Greedy action tensor([ 1.0383, 0.0643, -0.0135, -0.2583]) tensor([0.4999, 0.1887, 0.1746, 0.1367]) -Greedy action tensor([ 0.9359, -0.3127, 0.0961, -0.1308]) tensor([0.4848, 0.1391, 0.2093, 0.1668]) -Greedy action tensor([ 0.9486, -0.5672, -0.0671, -0.7426]) tensor([0.5662, 0.1244, 0.2051, 0.1044]) -Greedy action tensor([ 0.7966, -0.4774, 0.0133, -0.3445]) tensor([0.4864, 0.1360, 0.2222, 0.1554]) -Greedy action tensor([ 0.9005, -0.5828, 0.0702, -0.3461]) tensor([0.5128, 0.1163, 0.2235, 0.1474]) -Greedy action tensor([ 1.0829, -0.7009, 0.1454, -0.6728]) tensor([0.5772, 0.0970, 0.2260, 0.0997]) -Greedy action tensor([ 0.5358, -0.3831, 0.0593, -0.3912]) tensor([0.4140, 0.1652, 0.2571, 0.1638]) -Greedy action tensor([ 1.0301, -0.2619, 0.0043, -0.1169]) tensor([0.5126, 0.1408, 0.1838, 0.1628]) -Greedy action tensor([ 0.5157, -0.1102, -0.0249, -0.0515]) tensor([0.3725, 0.1992, 0.2170, 0.2113]) -Greedy action tensor([ 0.7476, -0.1401, 0.0708, -0.0297]) tensor([0.4203, 0.1730, 0.2136, 0.1932]) -Greedy action tensor([-1.6388, -0.2522, 0.4761, -0.0915]) tensor([0.0556, 0.2224, 0.4608, 0.2612]) -Greedy action tensor([-1.6296, -0.1112, 0.4800, -0.0896]) tensor([0.0541, 0.2471, 0.4463, 0.2525]) -Greedy action tensor([-1.7508, -0.2893, 0.6138, -0.0734]) tensor([0.0469, 0.2024, 0.4994, 0.2512]) -Greedy action tensor([-1.5645, -0.2341, 0.5031, -0.1415]) tensor([0.0594, 0.2246, 0.4695, 0.2465]) -Greedy action tensor([0.0195, 0.5842, 0.0337, 0.0018]) tensor([0.2103, 0.3699, 0.2133, 0.2066]) -Greedy action tensor([-1.2924, 0.4941, 0.2589, -0.0305]) tensor([0.0657, 0.3922, 0.3100, 0.2321]) -Greedy action tensor([-1.2195, 0.2743, 0.3591, -0.1659]) tensor([0.0759, 0.3382, 0.3681, 0.2178]) -Greedy action tensor([-1.8473, -0.1429, 0.6086, -0.2116]) tensor([0.0429, 0.2361, 0.5006, 0.2204]) -Greedy action tensor([-1.7033, -0.1589, 0.5779, -0.0200]) tensor([0.0479, 0.2246, 0.4693, 0.2581]) -Greedy action tensor([-0.6606, 0.5609, 0.1125, -0.0456]) tensor([0.1189, 0.4034, 0.2577, 0.2200]) -Greedy action tensor([-1.8711, -0.4592, 0.6318, -0.1596]) tensor([0.0437, 0.1795, 0.5345, 0.2422]) -Greedy action tensor([-1.1863, 0.7791, 0.1799, 0.1491]) tensor([0.0631, 0.4501, 0.2472, 0.2397]) -Greedy action tensor([-1.3297, 0.3919, 0.2287, 0.1876]) tensor([0.0629, 0.3517, 0.2987, 0.2867]) -Greedy action tensor([-1.9361, -0.4460, 0.6690, -0.1722]) tensor([0.0403, 0.1789, 0.5455, 0.2353]) -Greedy action tensor([-1.5337, -0.4724, 0.4528, 0.0342]) tensor([0.0626, 0.1809, 0.4563, 0.3002]) -Greedy action tensor([-1.9127, -0.4625, 0.6496, -0.1658]) tensor([0.0417, 0.1779, 0.5410, 0.2394]) -Greedy action tensor([-1.8731, -0.3896, 0.6334, -0.1353]) tensor([0.0428, 0.1888, 0.5250, 0.2434]) -Greedy action tensor([-1.9277, -0.4479, 0.6604, -0.1711]) tensor([0.0408, 0.1794, 0.5433, 0.2365]) -Greedy action tensor([-1.8912, -0.3525, 0.6333, -0.1501]) tensor([0.0419, 0.1954, 0.5235, 0.2392]) -Greedy action tensor([-1.8522, -0.3656, 0.6134, -0.1441]) tensor([0.0440, 0.1947, 0.5183, 0.2430]) -Greedy action tensor([-1.8534, -0.4033, 0.6161, -0.1432]) tensor([0.0442, 0.1886, 0.5226, 0.2446]) -Greedy action tensor([-1.9108, -0.4500, 0.6565, -0.1606]) tensor([0.0415, 0.1788, 0.5408, 0.2389]) -Greedy action tensor([-1.3034, 0.6272, 0.2825, -0.0867]) tensor([0.0619, 0.4268, 0.3023, 0.2090]) -Greedy action tensor([-1.4112, -0.4575, 0.4587, -0.1369]) tensor([0.0732, 0.1900, 0.4750, 0.2618]) -Greedy action tensor([-1.8224, -0.4787, 0.6515, -0.0240]) tensor([0.0440, 0.1686, 0.5219, 0.2656]) -Greedy action tensor([-1.9026, -0.4318, 0.6453, -0.1598]) tensor([0.0419, 0.1825, 0.5359, 0.2396]) -Greedy action tensor([ 1.4165, -0.1752, 0.8541, 1.0804]) tensor([0.4019, 0.0818, 0.2290, 0.2872]) -Greedy action tensor([-0.7865, 0.5632, -0.1530, -0.3216]) tensor([0.1200, 0.4628, 0.2261, 0.1910]) -Greedy action tensor([-1.8834, -0.3289, 0.6413, -0.1358]) tensor([0.0417, 0.1975, 0.5212, 0.2396]) -Greedy action tensor([-1.5054, 0.4758, 0.3381, 0.0463]) tensor([0.0518, 0.3759, 0.3276, 0.2447]) -Greedy action tensor([-1.8786, -0.4529, 0.6412, -0.1459]) tensor([0.0430, 0.1790, 0.5346, 0.2433]) -Greedy action tensor([-1.9264, -0.4522, 0.6812, -0.1571]) tensor([0.0403, 0.1761, 0.5470, 0.2366]) -Greedy action tensor([-1.6798, -0.0780, 0.5048, -0.1060]) tensor([0.0508, 0.2522, 0.4517, 0.2452]) -Greedy action tensor([-1.9022, -0.4387, 0.6438, -0.1619]) tensor([0.0421, 0.1817, 0.5365, 0.2397]) -Greedy action tensor([-1.8512, -0.3873, 0.6479, -0.1015]) tensor([0.0430, 0.1860, 0.5236, 0.2474]) -Greedy action tensor([-1.8714, -0.3694, 0.6236, -0.1354]) tensor([0.0429, 0.1928, 0.5206, 0.2437]) -Greedy action tensor([-0.8273, 0.7496, 0.0953, 0.0262]) tensor([0.0934, 0.4522, 0.2350, 0.2193]) -Greedy action tensor([-1.9443, -0.4484, 0.6678, -0.1795]) tensor([0.0401, 0.1790, 0.5466, 0.2343]) -Greedy action tensor([-1.9271, -0.4125, 0.6561, -0.1707]) tensor([0.0407, 0.1850, 0.5387, 0.2356]) -Greedy action tensor([0.5312, 0.4076, 0.5360, 1.3184]) tensor([0.1966, 0.1738, 0.1976, 0.4320]) -Greedy action tensor([-1.8542, -0.1683, 0.5922, -0.1298]) tensor([0.0425, 0.2292, 0.4902, 0.2381]) -Greedy action tensor([6.3519e-01, 1.3000e+00, 1.0329e-03, 4.8094e-01]) tensor([0.2309, 0.4488, 0.1224, 0.1979]) -Greedy action tensor([-1.9227, -0.4526, 0.6613, -0.1701]) tensor([0.0410, 0.1785, 0.5437, 0.2368]) -Greedy action tensor([-1.2357, -0.1792, 0.4986, 0.7058]) tensor([0.0606, 0.1742, 0.3431, 0.4221]) -Greedy action tensor([-1.6518, -0.0065, 0.5148, -0.0074]) tensor([0.0498, 0.2580, 0.4345, 0.2577]) -Greedy action tensor([-1.9236, -0.4085, 0.6553, -0.1683]) tensor([0.0408, 0.1856, 0.5377, 0.2360]) -Greedy action tensor([-1.6135, -0.3002, 0.4829, -0.0693]) tensor([0.0570, 0.2120, 0.4639, 0.2671]) -Greedy action tensor([-1.4725, -0.1211, 0.3885, -0.0200]) tensor([0.0642, 0.2481, 0.4131, 0.2745]) -Greedy action tensor([-1.8776, -0.4142, 0.6209, -0.1352]) tensor([0.0431, 0.1863, 0.5244, 0.2462]) -Greedy action tensor([-1.7260, -0.4647, 0.6884, 0.0918]) tensor([0.0457, 0.1614, 0.5113, 0.2816]) -Greedy action tensor([-1.0228, 0.6269, 0.1179, 0.2420]) tensor([0.0777, 0.4043, 0.2430, 0.2751]) -Greedy action tensor([-1.0284, 0.3845, -0.2085, -0.0896]) tensor([0.1007, 0.4135, 0.2285, 0.2574]) -Greedy action tensor([-1.8661, -0.4272, 0.6242, -0.1430]) tensor([0.0437, 0.1843, 0.5272, 0.2448]) -Greedy action tensor([-1.7865, -0.4329, 0.6007, -0.0837]) tensor([0.0471, 0.1822, 0.5123, 0.2584]) -Greedy action tensor([-1.8775, -0.4586, 0.6402, -0.1476]) tensor([0.0432, 0.1783, 0.5351, 0.2434]) -Greedy action tensor([-1.9254, -0.4183, 0.6540, -0.1668]) tensor([0.0408, 0.1842, 0.5382, 0.2368]) -Greedy action tensor([-1.7343, 0.1235, 0.4186, -0.1439]) tensor([0.0478, 0.3063, 0.4115, 0.2344]) -Greedy action tensor([-1.6807, -0.5316, 0.5378, -0.0065]) tensor([0.0535, 0.1689, 0.4921, 0.2855]) -Greedy action tensor([-1.8698, -0.3498, 0.6339, -0.1313]) tensor([0.0426, 0.1947, 0.5206, 0.2422]) -Greedy action tensor([-1.9374, -0.4454, 0.6659, -0.1743]) tensor([0.0403, 0.1794, 0.5450, 0.2352]) -Greedy action tensor([-1.9165, -0.4137, 0.6541, -0.1653]) tensor([0.0411, 0.1847, 0.5374, 0.2368]) -Greedy action tensor([-0.7762, 0.5469, 0.0683, -0.4152]) tensor([0.1174, 0.4409, 0.2732, 0.1685]) -Greedy action tensor([ 0.6018, -0.0417, 0.7037, 1.2773]) tensor([0.2175, 0.1143, 0.2408, 0.4274]) -Greedy action tensor([-1.8839, -0.4227, 0.6438, -0.1427]) tensor([0.0425, 0.1831, 0.5321, 0.2423]) -Greedy action tensor([-1.8490, -0.2220, 0.5983, -0.0968]) tensor([0.0427, 0.2173, 0.4936, 0.2463]) -Greedy action tensor([-1.8047, -0.4627, 0.6011, -0.1082]) tensor([0.0468, 0.1791, 0.5188, 0.2553]) -Greedy action tensor([-1.0474, -0.0768, 0.1198, 0.4177]) tensor([0.0894, 0.2361, 0.2874, 0.3871]) -Greedy action tensor([-1.2464, -0.2916, 0.5280, 0.4377]) tensor([0.0672, 0.1746, 0.3962, 0.3620]) -Greedy action tensor([-1.4378, 0.6782, 0.2633, 0.0830]) tensor([0.0517, 0.4287, 0.2831, 0.2364]) -Greedy action tensor([-1.8957, -0.4280, 0.6412, -0.1576]) tensor([0.0423, 0.1834, 0.5341, 0.2403]) -Greedy action tensor([-1.8255, -0.2430, 0.5799, -0.1232]) tensor([0.0446, 0.2169, 0.4940, 0.2445]) -Greedy action tensor([-1.9374, -0.4501, 0.6644, -0.1781]) tensor([0.0404, 0.1790, 0.5456, 0.2350]) -Greedy action tensor([-0.5906, -0.0389, 0.2921, 0.7710]) tensor([0.1104, 0.1917, 0.2669, 0.4309]) -Greedy action tensor([-1.7423e+00, -1.0780e-04, 5.2194e-01, -1.4002e-01]) tensor([0.0470, 0.2681, 0.4519, 0.2331]) -Greedy action tensor([-1.8989, -0.4667, 0.6531, -0.1527]) tensor([0.0421, 0.1763, 0.5403, 0.2413]) -Greedy action tensor([-1.5573, 0.2660, 0.3998, -0.0589]) tensor([0.0533, 0.3303, 0.3776, 0.2387]) -Greedy action tensor([-1.2904, 0.2790, 0.2707, -0.0171]) tensor([0.0707, 0.3397, 0.3369, 0.2526]) -Greedy action tensor([-1.9024, -0.4570, 0.6510, -0.1614]) tensor([0.0420, 0.1783, 0.5400, 0.2397]) -Greedy action tensor([-1.8922, -0.4187, 0.6424, -0.1430]) tensor([0.0421, 0.1840, 0.5316, 0.2423]) -Greedy action tensor([-1.9134, -0.4381, 0.6532, -0.1631]) tensor([0.0414, 0.1810, 0.5392, 0.2384]) -Greedy action tensor([-1.9106, -0.4363, 0.6497, -0.1639]) tensor([0.0416, 0.1817, 0.5382, 0.2386]) -Greedy action tensor([-0.2689, -0.8456, -1.3908, 0.7536]) tensor([0.2143, 0.1204, 0.0698, 0.5956]) -Greedy action tensor([-0.2821, -0.4152, 0.6613, -0.1989]) tensor([0.1808, 0.1583, 0.4644, 0.1965]) -Greedy action tensor([ 0.7398, -1.1695, 0.5508, 0.0182]) tensor([0.4062, 0.0602, 0.3362, 0.1974]) -Greedy action tensor([ 0.2386, -0.3301, 0.2152, -0.2153]) tensor([0.3146, 0.1782, 0.3074, 0.1998]) -Greedy action tensor([-0.0182, 0.0992, 0.0615, -0.0695]) tensor([0.2405, 0.2705, 0.2605, 0.2285]) -Greedy action tensor([-1.4310, -0.4602, -0.6067, 0.4504]) tensor([0.0801, 0.2115, 0.1827, 0.5257]) -Greedy action tensor([ 0.5973, -0.7503, 0.4100, -0.2900]) tensor([0.3999, 0.1039, 0.3316, 0.1647]) -Greedy action tensor([-0.1333, -0.7563, -0.4258, -0.3125]) tensor([0.3207, 0.1720, 0.2393, 0.2680]) -Greedy action tensor([ 1.5223, -1.4303, -0.1447, 0.8027]) tensor([0.5787, 0.0302, 0.1093, 0.2818]) -Greedy action tensor([-0.4646, -0.0179, -0.8876, -0.3085]) tensor([0.2279, 0.3563, 0.1493, 0.2665]) -Greedy action tensor([ 0.9047, -0.5104, 0.1758, -0.8346]) tensor([0.5260, 0.1278, 0.2538, 0.0924]) -Greedy action tensor([-0.4111, -0.1072, 1.0383, -0.6621]) tensor([0.1353, 0.1833, 0.5762, 0.1052]) -Greedy action tensor([ 0.3910, 0.4919, -0.5715, -0.8559]) tensor([0.3603, 0.3985, 0.1376, 0.1035]) -Greedy action tensor([-0.4578, -0.8739, 0.7145, -0.9332]) tensor([0.1815, 0.1197, 0.5860, 0.1128]) -Greedy action tensor([ 0.1787, -1.6350, -0.3388, 0.0179]) tensor([0.3831, 0.0625, 0.2283, 0.3262]) -Greedy action tensor([-0.2770, 0.1234, 0.1299, -0.3921]) tensor([0.2047, 0.3055, 0.3075, 0.1824]) -Greedy action tensor([-0.3036, -0.8261, -0.2109, 0.1704]) tensor([0.2327, 0.1380, 0.2554, 0.3739]) -Greedy action tensor([-1.2037, -0.0680, 0.5751, -1.3832]) tensor([0.0920, 0.2864, 0.5448, 0.0769]) -Greedy action tensor([ 0.2060, 0.6606, -0.4925, -0.3226]) tensor([0.2730, 0.4302, 0.1358, 0.1609]) -Greedy action tensor([ 0.1782, -1.3651, 0.7024, -0.8559]) tensor([0.3069, 0.0656, 0.5184, 0.1091]) -Greedy action tensor([-0.1398, 0.9416, 0.2784, 0.0866]) tensor([0.1488, 0.4387, 0.2260, 0.1866]) -Greedy action tensor([ 0.0740, -0.9645, -0.6810, 0.1060]) tensor([0.3501, 0.1239, 0.1645, 0.3615]) -Greedy action tensor([-0.4522, -0.5160, -0.5361, -0.9564]) tensor([0.2889, 0.2710, 0.2656, 0.1745]) -Greedy action tensor([-0.0303, -0.4148, -0.2510, -0.4536]) tensor([0.3187, 0.2170, 0.2556, 0.2087]) -Greedy action tensor([-0.1494, 0.5524, -0.4462, -0.0292]) tensor([0.2046, 0.4127, 0.1520, 0.2307]) -Greedy action tensor([ 0.9288, -0.4896, 0.2560, 0.6290]) tensor([0.4011, 0.0971, 0.2047, 0.2972]) -Greedy action tensor([-0.2529, -0.8586, 0.1196, -0.9125]) tensor([0.2846, 0.1553, 0.4130, 0.1471]) -Greedy action tensor([ 0.4890, -0.1365, -0.4102, 0.2502]) tensor([0.3664, 0.1960, 0.1491, 0.2885]) -Greedy action tensor([-0.4858, 0.4190, -0.5409, 0.0511]) tensor([0.1632, 0.4033, 0.1544, 0.2791]) -Greedy action tensor([ 0.9767, -1.3538, -0.3356, 0.4939]) tensor([0.5042, 0.0490, 0.1357, 0.3111]) -Greedy action tensor([ 0.6543, -0.4049, -0.0736, -0.2648]) tensor([0.4487, 0.1556, 0.2167, 0.1790]) -Greedy action tensor([-0.1626, -0.6670, -0.0812, -0.9596]) tensor([0.3185, 0.1924, 0.3456, 0.1436]) -Greedy action tensor([-0.2384, -0.6880, 0.4330, -0.7391]) tensor([0.2380, 0.1518, 0.4659, 0.1443]) -Greedy action tensor([-0.8730, -0.8778, -0.3205, -1.0686]) tensor([0.2195, 0.2185, 0.3815, 0.1805]) -Greedy action tensor([ 1.9073, -0.5438, 1.3102, 0.9401]) tensor([0.4958, 0.0427, 0.2729, 0.1885]) -Greedy action tensor([-0.1176, -0.6561, 0.4597, -0.2015]) tensor([0.2334, 0.1362, 0.4157, 0.2146]) -Greedy action tensor([-0.8541, -0.3436, 0.4434, -1.5063]) tensor([0.1460, 0.2433, 0.5345, 0.0761]) -Greedy action tensor([ 0.1568, -1.0060, 0.2911, -0.3831]) tensor([0.3290, 0.1029, 0.3763, 0.1918]) -Greedy action tensor([-0.8534, -0.2238, -0.9423, -0.4305]) tensor([0.1880, 0.3529, 0.1720, 0.2870]) -Greedy action tensor([ 0.6092, 0.2397, 0.0762, -0.1676]) tensor([0.3653, 0.2524, 0.2143, 0.1680]) -Greedy action tensor([ 1.0931, -0.5012, 0.4702, 0.6349]) tensor([0.4216, 0.0856, 0.2261, 0.2666]) -Greedy action tensor([-0.3569, -0.1398, -0.2499, -0.6218]) tensor([0.2426, 0.3014, 0.2700, 0.1861]) -Greedy action tensor([-0.1927, -1.6351, -0.7356, 0.5178]) tensor([0.2596, 0.0614, 0.1508, 0.5282]) -Greedy action tensor([ 0.0765, 0.2667, 0.1231, -0.2823]) tensor([0.2528, 0.3058, 0.2649, 0.1766]) -Greedy action tensor([-0.0233, -0.3481, -0.4037, -0.1721]) tensor([0.3060, 0.2211, 0.2092, 0.2637]) -Greedy action tensor([-0.7644, -0.9748, 0.2128, -1.0754]) tensor([0.1923, 0.1558, 0.5109, 0.1409]) -Greedy action tensor([-0.1192, -1.5299, 1.6547, -0.5773]) tensor([0.1287, 0.0314, 0.7585, 0.0814]) -Greedy action tensor([ 0.9604, -1.5604, -0.1145, 0.2231]) tensor([0.5263, 0.0423, 0.1796, 0.2518]) -Greedy action tensor([ 1.0915, 0.3152, -0.2928, 0.9094]) tensor([0.3931, 0.1808, 0.0985, 0.3276]) -Greedy action tensor([-0.4010, -0.4729, -0.4455, -0.1675]) tensor([0.2410, 0.2242, 0.2305, 0.3043]) -Greedy action tensor([-0.3475, -0.8588, 0.7340, -0.9361]) tensor([0.1959, 0.1175, 0.5778, 0.1088]) -Greedy action tensor([ 0.0726, -2.0946, -0.5780, -0.2910]) tensor([0.4289, 0.0491, 0.2238, 0.2982]) -Greedy action tensor([-0.3020, -0.9207, 0.6001, -0.9492]) tensor([0.2209, 0.1190, 0.5445, 0.1156]) -Greedy action tensor([-0.6446, -0.1202, -0.7305, -0.5314]) tensor([0.2115, 0.3574, 0.1941, 0.2369]) -Greedy action tensor([ 0.5371, -1.4554, 0.0162, 0.0207]) tensor([0.4297, 0.0586, 0.2553, 0.2564]) -Greedy action tensor([ 0.6458, -0.5506, 0.2619, 0.5610]) tensor([0.3446, 0.1042, 0.2347, 0.3166]) -Greedy action tensor([-0.1592, -0.3125, -0.4773, -0.1798]) tensor([0.2805, 0.2406, 0.2041, 0.2748]) -Greedy action tensor([ 0.2246, -0.4797, 0.7260, -1.0571]) tensor([0.2921, 0.1444, 0.4823, 0.0811]) -Greedy action tensor([ 0.2469, -0.3937, -0.1284, 0.4027]) tensor([0.2956, 0.1558, 0.2031, 0.3455]) -Greedy action tensor([ 0.7350, -1.8536, -0.1190, 0.1279]) tensor([0.4888, 0.0367, 0.2081, 0.2664]) -Greedy action tensor([ 0.2558, 0.8273, -1.0417, -0.4302]) tensor([0.2819, 0.4992, 0.0770, 0.1419]) -Greedy action tensor([ 0.8236, -0.4078, 0.2172, 0.5783]) tensor([0.3817, 0.1114, 0.2082, 0.2987]) -Greedy action tensor([-0.8060, -0.4700, -0.5946, 0.0082]) tensor([0.1697, 0.2375, 0.2097, 0.3831]) -Greedy action tensor([ 0.5625, -0.4802, 0.7216, 0.4349]) tensor([0.2937, 0.1035, 0.3443, 0.2585]) -Greedy action tensor([-0.1501, -0.1490, 0.6576, -0.3211]) tensor([0.1966, 0.1968, 0.4409, 0.1657]) -Greedy action tensor([ 0.0093, -0.8866, 0.0446, -0.8668]) tensor([0.3496, 0.1427, 0.3621, 0.1456]) -Greedy action tensor([-0.1892, -0.4103, 0.0112, -0.4085]) tensor([0.2613, 0.2095, 0.3193, 0.2099]) -Greedy action tensor([-0.7304, -1.3439, 1.1787, -0.3549]) tensor([0.1026, 0.0556, 0.6924, 0.1494]) -Greedy action tensor([ 0.4188, -0.7211, -0.2493, -0.5817]) tensor([0.4545, 0.1454, 0.2330, 0.1671]) -Greedy action tensor([-0.1735, -0.4524, 0.5479, -0.6548]) tensor([0.2256, 0.1707, 0.4642, 0.1394]) -Greedy action tensor([-0.8840, -0.8634, -0.2054, -0.9021]) tensor([0.2011, 0.2052, 0.3963, 0.1974]) -Greedy action tensor([-0.0414, -0.7283, -0.4327, 0.2442]) tensor([0.2849, 0.1434, 0.1927, 0.3791]) -Greedy action tensor([ 0.0554, -1.0992, 0.2926, -0.4577]) tensor([0.3143, 0.0991, 0.3985, 0.1882]) -Greedy action tensor([0.3267, 0.1916, 0.3672, 0.9042]) tensor([0.2129, 0.1860, 0.2217, 0.3793]) -Greedy action tensor([ 0.6150, 0.1087, 0.3143, -0.2462]) tensor([0.3616, 0.2179, 0.2677, 0.1528]) -Greedy action tensor([-0.2752, -1.5342, 0.4814, -1.1691]) tensor([0.2615, 0.0743, 0.5573, 0.1070]) -Greedy action tensor([ 0.7428, -1.2863, 1.0950, -0.7795]) tensor([0.3608, 0.0474, 0.5131, 0.0787]) -Greedy action tensor([ 0.8518, -1.2989, 0.1716, 1.0834]) tensor([0.3468, 0.0404, 0.1757, 0.4372]) -Greedy action tensor([ 0.6613, -1.3175, 0.4419, -0.7108]) tensor([0.4556, 0.0630, 0.3659, 0.1155]) -Greedy action tensor([-0.5794, -0.7852, -1.1064, -0.0233]) tensor([0.2411, 0.1962, 0.1423, 0.4204]) -Greedy action tensor([ 0.3378, 0.1058, -0.5398, -0.1307]) tensor([0.3528, 0.2797, 0.1467, 0.2208]) -Greedy action tensor([ 1.1426, -0.3008, -0.5472, 0.2141]) tensor([0.5507, 0.1300, 0.1016, 0.2176]) -Greedy action tensor([ 1.0283, -0.3044, -1.1934, 0.3409]) tensor([0.5333, 0.1407, 0.0578, 0.2682]) -Greedy action tensor([ 1.3802, -0.6224, -0.9438, 0.6554]) tensor([0.5823, 0.0786, 0.0570, 0.2821]) -Greedy action tensor([ 1.3855, -0.0225, -0.6468, 0.0636]) tensor([0.6089, 0.1490, 0.0798, 0.1623]) -Greedy action tensor([ 1.5708, -0.8937, -0.2761, 0.2096]) tensor([0.6671, 0.0567, 0.1052, 0.1710]) -Greedy action tensor([ 1.0364, -0.3301, -0.2801, 0.3834]) tensor([0.4893, 0.1248, 0.1312, 0.2547]) -Greedy action tensor([ 1.3801, -0.5587, -0.6130, 0.2586]) tensor([0.6227, 0.0896, 0.0848, 0.2029]) -Greedy action tensor([ 1.7791, -0.7533, -0.1830, 0.1660]) tensor([0.7046, 0.0560, 0.0990, 0.1404]) -Greedy action tensor([ 1.8337, -0.7325, -0.5273, 0.3538]) tensor([0.7149, 0.0549, 0.0674, 0.1627]) -Greedy action tensor([ 1.2495, -0.3372, -0.3622, -0.2061]) tensor([0.6107, 0.1250, 0.1219, 0.1425]) -Greedy action tensor([ 0.8654, -0.2358, -0.7521, 0.2006]) tensor([0.4890, 0.1626, 0.0970, 0.2515]) -Greedy action tensor([ 1.1509, -0.4422, -0.2853, 0.1344]) tensor([0.5547, 0.1128, 0.1319, 0.2007]) -Greedy action tensor([ 2.4963, -0.8402, -0.3364, 0.8034]) tensor([0.7822, 0.0278, 0.0460, 0.1439]) -Greedy action tensor([ 0.9841, -0.3897, -0.3031, 0.2866]) tensor([0.4933, 0.1249, 0.1362, 0.2456]) -Greedy action tensor([ 1.8355, -1.0032, -0.5180, 0.2248]) tensor([0.7389, 0.0432, 0.0702, 0.1476]) -Greedy action tensor([ 1.5462, 0.2736, -0.6809, 0.3126]) tensor([0.5955, 0.1668, 0.0642, 0.1734]) -Greedy action tensor([ 1.4706, 0.1972, -0.6257, -0.2453]) tensor([0.6319, 0.1768, 0.0777, 0.1136]) -Greedy action tensor([ 1.6691, -0.8609, 0.1241, 0.2663]) tensor([0.6498, 0.0518, 0.1386, 0.1598]) -Greedy action tensor([ 1.2944, 0.0739, -0.2237, -0.2560]) tensor([0.5792, 0.1709, 0.1269, 0.1229]) -Greedy action tensor([ 1.5908, -0.3453, -0.6999, 0.3484]) tensor([0.6518, 0.0940, 0.0660, 0.1882]) -Greedy action tensor([ 1.5124, -0.4089, -0.6673, -0.0337]) tensor([0.6791, 0.0994, 0.0768, 0.1447]) -Greedy action tensor([ 1.7166, 0.4455, -0.4904, 0.5062]) tensor([0.5922, 0.1661, 0.0652, 0.1765]) -Greedy action tensor([ 1.6018, -1.0918, -0.4699, 0.1059]) tensor([0.7054, 0.0477, 0.0889, 0.1580]) -Greedy action tensor([ 1.3962, -0.5892, -0.9648, 0.0488]) tensor([0.6704, 0.0921, 0.0632, 0.1743]) -Greedy action tensor([ 1.5201, -1.3933, -0.0455, -0.1794]) tensor([0.6916, 0.0375, 0.1445, 0.1264]) -Greedy action tensor([ 2.6920, -1.1374, 0.1361, 1.2382]) tensor([0.7502, 0.0163, 0.0582, 0.1753]) -Greedy action tensor([ 1.3914, -0.5946, -0.3924, 0.3486]) tensor([0.6032, 0.0828, 0.1013, 0.2126]) -Greedy action tensor([ 1.5691, -0.4101, -0.4377, 0.4117]) tensor([0.6302, 0.0871, 0.0847, 0.1981]) -Greedy action tensor([ 1.7168, 0.7106, -0.2479, 0.2139]) tensor([0.5786, 0.2115, 0.0811, 0.1287]) -Greedy action tensor([ 1.1904, -0.4464, -0.2250, -0.0593]) tensor([0.5800, 0.1129, 0.1409, 0.1662]) -Greedy action tensor([ 1.1706, -0.8089, -0.5585, -0.1476]) tensor([0.6316, 0.0873, 0.1121, 0.1690]) -Greedy action tensor([ 1.2661, -0.2422, -0.9291, 0.0385]) tensor([0.6152, 0.1361, 0.0685, 0.1802]) -Greedy action tensor([ 2.0142, -0.8037, -0.4468, 0.4300]) tensor([0.7406, 0.0442, 0.0632, 0.1519]) -Greedy action tensor([ 1.2939, -0.3444, -0.0835, 0.2949]) tensor([0.5510, 0.1071, 0.1390, 0.2029]) -Greedy action tensor([ 1.3162, -0.1032, -0.4336, -0.0645]) tensor([0.5999, 0.1451, 0.1043, 0.1508]) -Greedy action tensor([ 2.5000, -1.0664, -0.0154, 0.2020]) tensor([0.8268, 0.0234, 0.0668, 0.0831]) -Greedy action tensor([ 1.0875, 0.1895, -0.7921, 0.1813]) tensor([0.5091, 0.2074, 0.0777, 0.2057]) -Greedy action tensor([ 0.9858, -0.2682, -0.5141, -0.0340]) tensor([0.5350, 0.1527, 0.1194, 0.1930]) -Greedy action tensor([ 2.6780, -1.7007, 0.7579, 0.0681]) tensor([0.8112, 0.0102, 0.1189, 0.0597]) -Greedy action tensor([ 1.4381, -0.3441, -0.7524, 0.0533]) tensor([0.6534, 0.1099, 0.0731, 0.1636]) -Greedy action tensor([ 1.2098, -0.3000, -0.6609, -0.0031]) tensor([0.5980, 0.1321, 0.0921, 0.1778]) -Greedy action tensor([ 1.9272, -0.7199, -0.6360, 0.2718]) tensor([0.7469, 0.0529, 0.0576, 0.1427]) -Greedy action tensor([ 1.1180, -0.4113, -0.5186, 0.2359]) tensor([0.5479, 0.1187, 0.1066, 0.2268]) -Greedy action tensor([ 0.9002, -0.4139, -0.2751, 0.2458]) tensor([0.4768, 0.1281, 0.1472, 0.2478]) -Greedy action tensor([ 1.4430, -0.2919, -0.8087, 0.2601]) tensor([0.6297, 0.1111, 0.0663, 0.1929]) -Greedy action tensor([ 2.3626, 0.5289, -0.1097, 0.2600]) tensor([0.7319, 0.1170, 0.0618, 0.0894]) -Greedy action tensor([ 1.3309, -0.0702, -0.3738, 0.1179]) tensor([0.5796, 0.1428, 0.1054, 0.1723]) -Greedy action tensor([ 0.9736, -0.0612, -1.2741, 0.3357]) tensor([0.5027, 0.1786, 0.0531, 0.2656]) -Greedy action tensor([ 1.4209, -0.3512, -0.2377, 0.1257]) tensor([0.6119, 0.1040, 0.1165, 0.1676]) -Greedy action tensor([ 1.4354, -0.2601, -1.1212, 0.2498]) tensor([0.6383, 0.1171, 0.0495, 0.1951]) -Greedy action tensor([ 1.6172, 0.0429, -1.1448, 0.6706]) tensor([0.6030, 0.1249, 0.0381, 0.2340]) -Greedy action tensor([ 1.8281, -0.8756, -0.3405, 0.4996]) tensor([0.6915, 0.0463, 0.0791, 0.1832]) -Greedy action tensor([ 1.4914, -0.1683, 0.0388, -0.2008]) tensor([0.6218, 0.1183, 0.1455, 0.1145]) -Greedy action tensor([ 1.3869, -0.6088, -0.3197, 0.4038]) tensor([0.5912, 0.0804, 0.1073, 0.2212]) -Greedy action tensor([ 1.4226, -0.7729, -0.0135, 0.0666]) tensor([0.6223, 0.0693, 0.1480, 0.1604]) -Greedy action tensor([ 1.0913, -0.3243, -0.0548, -0.0453]) tensor([0.5315, 0.1290, 0.1689, 0.1706]) -Greedy action tensor([ 1.2547, -0.6813, -0.3765, 0.3280]) tensor([0.5761, 0.0831, 0.1127, 0.2280]) -Greedy action tensor([ 1.2282, 0.0967, -0.9463, -0.0207]) tensor([0.5804, 0.1872, 0.0660, 0.1665]) -Greedy action tensor([ 1.2216, 0.2529, -0.5889, -0.0901]) tensor([0.5517, 0.2094, 0.0902, 0.1486]) -Greedy action tensor([ 0.9878, -0.3996, -0.1911, 0.1401]) tensor([0.5036, 0.1258, 0.1549, 0.2157]) -Greedy action tensor([ 0.9660, -0.4674, -0.3326, 0.5155]) tensor([0.4654, 0.1110, 0.1270, 0.2966]) -Greedy action tensor([ 2.0446, -1.2343, -0.2448, 0.7961]) tensor([0.7013, 0.0264, 0.0711, 0.2012]) -Greedy action tensor([ 1.9748, -0.7385, -0.5559, 0.4228]) tensor([0.7365, 0.0488, 0.0586, 0.1560]) -Greedy action tensor([ 1.5449, -0.7151, -0.6041, 0.1644]) tensor([0.6792, 0.0709, 0.0792, 0.1708]) -Greedy action tensor([ 1.6496, -0.4917, -0.6389, 0.7049]) tensor([0.6220, 0.0731, 0.0631, 0.2418]) -Greedy action tensor([ 1.3503, -0.1663, -0.3892, -0.2052]) tensor([0.6226, 0.1366, 0.1093, 0.1314]) -Greedy action tensor([ 1.2534, -0.6258, -0.4162, 0.3127]) tensor([0.5776, 0.0882, 0.1088, 0.2255]) -Greedy action tensor([ 1.4390, -0.2137, -0.1565, -0.0650]) tensor([0.6186, 0.1185, 0.1255, 0.1375]) -Greedy action tensor([ 1.7582, -0.9748, -0.1986, 0.5064]) tensor([0.6701, 0.0436, 0.0947, 0.1916]) -Greedy action tensor([ 1.1269, -0.2836, -0.2469, -0.1135]) tensor([0.5598, 0.1366, 0.1417, 0.1619]) -Greedy action tensor([ 1.1149, -0.0189, -1.1003, 0.2390]) tensor([0.5413, 0.1742, 0.0591, 0.2254]) -Greedy action tensor([ 1.4440, -0.7664, -0.4076, 0.5696]) tensor([0.5939, 0.0651, 0.0932, 0.2477]) -Greedy action tensor([1.8102, 0.2227, 0.0669, 0.3781]) tensor([0.6180, 0.1263, 0.1081, 0.1476]) -Greedy action tensor([ 1.1550, -0.4462, -0.4187, 0.4507]) tensor([0.5254, 0.1059, 0.1089, 0.2598]) -Greedy action tensor([ 1.4706, -0.4899, -0.6535, 0.0619]) tensor([0.6645, 0.0936, 0.0794, 0.1625]) -Greedy action tensor([ 1.7923, -0.6510, -0.5709, -0.0641]) tensor([0.7478, 0.0650, 0.0704, 0.1168]) -Greedy action tensor([ 1.4289, -0.5311, -0.7501, 0.4232]) tensor([0.6174, 0.0870, 0.0699, 0.2258]) -Greedy action tensor([ 1.2985, -0.3415, -0.8498, 0.3019]) tensor([0.5953, 0.1155, 0.0695, 0.2197]) -Greedy action tensor([ 2.6387, -0.4972, -0.1999, 0.4895]) tensor([0.8206, 0.0357, 0.0480, 0.0957]) -Greedy action tensor([ 1.6182, -0.5459, -0.7496, 0.6182]) tensor([0.6343, 0.0729, 0.0594, 0.2334]) -Greedy action tensor([ 1.5924, -0.3731, -0.0025, 0.5610]) tensor([0.5884, 0.0824, 0.1194, 0.2098]) -Greedy action tensor([ 1.1231, -0.7691, 0.1450, -0.5018]) tensor([0.5802, 0.0875, 0.2181, 0.1142]) -Greedy action tensor([ 1.2000, -0.5778, -0.1718, -0.2785]) tensor([0.6058, 0.1024, 0.1537, 0.1381]) -Greedy action tensor([ 0.9202, -0.6450, -0.0405, -0.0268]) tensor([0.5052, 0.1056, 0.1933, 0.1960]) -Greedy action tensor([ 0.6686, -0.0873, -0.1993, -0.1013]) tensor([0.4251, 0.1996, 0.1785, 0.1968]) -Greedy action tensor([ 0.8254, -0.5700, -0.0196, -0.2298]) tensor([0.4937, 0.1223, 0.2121, 0.1719]) -Greedy action tensor([ 0.4026, -0.1001, -0.1687, -0.0005]) tensor([0.3524, 0.2131, 0.1990, 0.2355]) -Greedy action tensor([ 0.4110, -0.2637, -0.1133, -0.2329]) tensor([0.3807, 0.1939, 0.2254, 0.2000]) -Greedy action tensor([ 0.7962, -0.0849, -0.0629, -0.1887]) tensor([0.4522, 0.1874, 0.1915, 0.1689]) -Greedy action tensor([ 0.9290, -0.7206, -0.1124, -0.5477]) tensor([0.5639, 0.1083, 0.1990, 0.1288]) -Greedy action tensor([ 0.8996, -0.2878, -0.0283, -0.1457]) tensor([0.4873, 0.1486, 0.1927, 0.1713]) -Greedy action tensor([ 0.6284, -0.2820, -0.0238, -0.1168]) tensor([0.4170, 0.1678, 0.2172, 0.1979]) -Greedy action tensor([ 0.6366, -0.2963, -0.0570, -0.1173]) tensor([0.4231, 0.1664, 0.2114, 0.1991]) -Greedy action tensor([ 0.7887, -0.1373, -0.1184, -0.1919]) tensor([0.4598, 0.1821, 0.1856, 0.1725]) -Greedy action tensor([ 0.5230, -0.1813, 0.0629, -0.0709]) tensor([0.3734, 0.1846, 0.2357, 0.2062]) -Greedy action tensor([ 1.0068, -0.8534, -0.0193, -0.4539]) tensor([0.5727, 0.0891, 0.2053, 0.1329]) -Greedy action tensor([ 0.5492, -0.1964, -0.0610, -0.1221]) tensor([0.3955, 0.1876, 0.2148, 0.2021]) -Greedy action tensor([ 0.7555, -0.5508, -0.1055, -0.3969]) tensor([0.4977, 0.1348, 0.2104, 0.1572]) -Greedy action tensor([ 0.9232, -0.9036, 0.1625, -0.8258]) tensor([0.5549, 0.0893, 0.2593, 0.0965]) -Greedy action tensor([ 0.8642, -0.2918, -0.1048, -0.3578]) tensor([0.5028, 0.1583, 0.1908, 0.1482]) -Greedy action tensor([ 1.2579, -0.7150, -0.0042, -0.6452]) tensor([0.6364, 0.0885, 0.1802, 0.0949]) -Greedy action tensor([ 1.4650, -0.8003, 0.1916, -0.7400]) tensor([0.6694, 0.0695, 0.1873, 0.0738]) -Greedy action tensor([ 0.8652, -0.6679, -0.1725, -0.3424]) tensor([0.5350, 0.1155, 0.1895, 0.1599]) -Greedy action tensor([ 0.8245, -0.4317, -0.1071, -0.4849]) tensor([0.5132, 0.1461, 0.2022, 0.1385]) -Greedy action tensor([ 0.8464, -0.5408, 0.2271, -0.2858]) tensor([0.4738, 0.1184, 0.2551, 0.1527]) -Greedy action tensor([ 1.0102, -0.6108, -0.0305, -0.3701]) tensor([0.5548, 0.1097, 0.1960, 0.1395]) -Greedy action tensor([ 0.9561, -0.3458, -0.1245, -0.1926]) tensor([0.5185, 0.1411, 0.1760, 0.1644]) -Greedy action tensor([ 0.5737, -0.1523, 0.0652, -0.3403]) tensor([0.4022, 0.1946, 0.2419, 0.1613]) -Greedy action tensor([ 0.9089, -0.5241, -0.0125, -0.5397]) tensor([0.5344, 0.1275, 0.2126, 0.1255]) -Greedy action tensor([ 0.6434, -0.2165, 0.0920, -0.5372]) tensor([0.4336, 0.1835, 0.2498, 0.1331]) -Greedy action tensor([ 0.9094, -0.4244, 0.0100, -0.5882]) tensor([0.5280, 0.1391, 0.2148, 0.1181]) -Greedy action tensor([ 0.8266, -0.1618, -0.1258, -0.4885]) tensor([0.4935, 0.1837, 0.1904, 0.1325]) -Greedy action tensor([ 0.7997, -0.6101, -0.0872, -0.2402]) tensor([0.4976, 0.1215, 0.2050, 0.1759]) -Greedy action tensor([ 1.0175, -0.4772, -0.2068, -0.6921]) tensor([0.5885, 0.1320, 0.1730, 0.1065]) -Greedy action tensor([0.4681, 0.1413, 0.0253, 0.0035]) tensor([0.3342, 0.2411, 0.2147, 0.2100]) -Greedy action tensor([ 0.7041, -0.4778, 0.0764, -0.3006]) tensor([0.4532, 0.1390, 0.2419, 0.1659]) -Greedy action tensor([ 0.3642, 0.2116, -0.1940, 0.1672]) tensor([0.3075, 0.2640, 0.1760, 0.2525]) -Greedy action tensor([ 0.7256, -0.6382, 0.1016, -0.1476]) tensor([0.4527, 0.1157, 0.2426, 0.1890]) -Greedy action tensor([ 1.1002, -0.6546, -0.1849, -0.4504]) tensor([0.6018, 0.1041, 0.1665, 0.1277]) -Greedy action tensor([ 0.6438, -0.2544, 0.1104, -0.1369]) tensor([0.4078, 0.1661, 0.2392, 0.1868]) -Greedy action tensor([ 0.6908, -0.1843, -0.0433, -0.1074]) tensor([0.4261, 0.1776, 0.2045, 0.1918]) -Greedy action tensor([ 0.6301, -0.0997, -0.0941, -0.0823]) tensor([0.4070, 0.1962, 0.1973, 0.1996]) -Greedy action tensor([ 0.9882, -0.5806, 0.0558, -0.6336]) tensor([0.5557, 0.1158, 0.2187, 0.1098]) -Greedy action tensor([ 1.0925, -0.7882, -0.1068, -0.4588]) tensor([0.6003, 0.0915, 0.1809, 0.1272]) -Greedy action tensor([ 1.0856, -0.7104, 0.0415, -0.5946]) tensor([0.5868, 0.0974, 0.2065, 0.1093]) -Greedy action tensor([ 1.3090, -1.0523, 0.0791, -0.5124]) tensor([0.6458, 0.0609, 0.1888, 0.1045]) -Greedy action tensor([ 0.9091, -0.6237, 0.0673, -0.3932]) tensor([0.5212, 0.1125, 0.2246, 0.1417]) -Greedy action tensor([ 0.8197, -0.1834, 0.0110, -0.4848]) tensor([0.4800, 0.1760, 0.2138, 0.1302]) -Greedy action tensor([ 0.7751, -0.1446, 0.0835, -0.1089]) tensor([0.4324, 0.1724, 0.2166, 0.1786]) -Greedy action tensor([ 0.5156, -0.4141, -0.0396, -0.2433]) tensor([0.4104, 0.1620, 0.2355, 0.1921]) -Greedy action tensor([ 1.4342, -1.0023, -0.1747, -0.7082]) tensor([0.7118, 0.0623, 0.1424, 0.0835]) -Greedy action tensor([ 0.6018, 0.0510, -0.0829, -0.1901]) tensor([0.3947, 0.2275, 0.1990, 0.1788]) -Greedy action tensor([ 0.6764, -0.2526, -0.3864, -0.3354]) tensor([0.4753, 0.1877, 0.1642, 0.1728]) -Greedy action tensor([ 0.9571, -0.5619, -0.1982, -0.5135]) tensor([0.5670, 0.1241, 0.1786, 0.1303]) -Greedy action tensor([ 0.5777, -0.1118, -0.1526, -0.2978]) tensor([0.4166, 0.2091, 0.2007, 0.1736]) -Greedy action tensor([ 0.6791, -0.5387, -0.0449, -0.2476]) tensor([0.4594, 0.1359, 0.2227, 0.1819]) -Greedy action tensor([ 0.4808, -0.4482, -0.0546, -0.2182]) tensor([0.4036, 0.1594, 0.2363, 0.2006]) -Greedy action tensor([ 0.9407, -0.6254, -0.0045, -0.3988]) tensor([0.5378, 0.1123, 0.2090, 0.1409]) -Greedy action tensor([ 1.1453, -0.6922, -0.0345, -0.6865]) tensor([0.6148, 0.0979, 0.1889, 0.0984]) -Greedy action tensor([ 0.8916, -0.2824, -0.0575, -0.1086]) tensor([0.4845, 0.1498, 0.1875, 0.1782]) -Greedy action tensor([ 1.2561, -0.6757, -0.0902, -0.5644]) tensor([0.6381, 0.0925, 0.1661, 0.1033]) -Greedy action tensor([ 0.9088, -0.6036, -0.1619, -0.4848]) tensor([0.5521, 0.1217, 0.1892, 0.1370]) -Greedy action tensor([ 0.9090, -0.4718, -0.0096, -0.5070]) tensor([0.5282, 0.1328, 0.2108, 0.1282]) -Greedy action tensor([ 1.0378, -0.5058, 0.0951, -0.6291]) tensor([0.5580, 0.1192, 0.2174, 0.1054]) -Greedy action tensor([ 0.7194, -0.5259, -0.0302, -0.1330]) tensor([0.4573, 0.1316, 0.2161, 0.1950]) -Greedy action tensor([ 1.1953, -0.9378, 0.0407, -0.7404]) tensor([0.6337, 0.0751, 0.1997, 0.0915]) -Greedy action tensor([ 1.1117, -1.0822, 0.2065, -0.7031]) tensor([0.5957, 0.0664, 0.2409, 0.0970]) -Greedy action tensor([ 1.1147, -0.5264, -0.0714, -0.5072]) tensor([0.5894, 0.1142, 0.1800, 0.1164]) -Greedy action tensor([ 0.7004, -0.4333, -0.2295, -0.2026]) tensor([0.4713, 0.1517, 0.1860, 0.1911]) -Greedy action tensor([ 0.3774, 0.2285, -0.0372, 0.0915]) tensor([0.3055, 0.2632, 0.2018, 0.2295]) -Greedy action tensor([ 0.5416, -0.3265, -0.0575, -0.1572]) tensor([0.4055, 0.1702, 0.2227, 0.2016]) -Greedy action tensor([ 0.3850, 0.2320, -0.3409, -0.1490]) tensor([0.3415, 0.2930, 0.1653, 0.2002]) -Greedy action tensor([ 0.5960, -0.1281, -0.0804, -0.1054]) tensor([0.4018, 0.1948, 0.2043, 0.1992]) -Greedy action tensor([ 0.8184, -0.5220, -0.1590, -0.4098]) tensor([0.5179, 0.1356, 0.1949, 0.1516]) -Greedy action tensor([ 1.0383, -0.4667, 0.0250, -0.3479]) tensor([0.5449, 0.1210, 0.1978, 0.1362]) -Greedy action tensor([ 0.9462, -0.6267, 0.1120, -0.5996]) tensor([0.5391, 0.1118, 0.2341, 0.1149]) -Greedy action tensor([ 0.9137, -0.7594, 0.2295, -0.5170]) tensor([0.5178, 0.0972, 0.2612, 0.1238]) -Greedy action tensor([ 0.8293, -0.3385, 0.0663, -0.2706]) tensor([0.4739, 0.1474, 0.2210, 0.1577]) -Greedy action tensor([ 0.7573, -0.4059, -0.1420, -0.2501]) tensor([0.4797, 0.1499, 0.1952, 0.1752]) -Greedy action tensor([ 0.6376, -0.4928, -0.0350, -0.3247]) tensor([0.4514, 0.1458, 0.2304, 0.1724]) -Greedy action tensor([ 1.6379, -0.9327, -0.1608, -0.6008]) tensor([0.7415, 0.0567, 0.1227, 0.0790]) -Greedy action tensor([ 0.8997, -0.4468, 0.0262, -0.2123]) tensor([0.4984, 0.1296, 0.2081, 0.1639]) -Greedy action tensor([-1.7345, -0.4854, 0.5460, -0.2127]) tensor([0.0531, 0.1850, 0.5189, 0.2430]) -Greedy action tensor([-1.8194, -0.4252, 0.6803, -0.0214]) tensor([0.0430, 0.1734, 0.5239, 0.2597]) -Greedy action tensor([-1.9114, -0.4327, 0.6523, -0.1631]) tensor([0.0415, 0.1819, 0.5384, 0.2382]) -Greedy action tensor([-1.9415, -0.4546, 0.6756, -0.1784]) tensor([0.0401, 0.1773, 0.5489, 0.2337]) -Greedy action tensor([-1.8264, -0.3742, 0.5977, -0.1315]) tensor([0.0454, 0.1941, 0.5130, 0.2474]) -Greedy action tensor([-1.5799, -0.3913, 0.8170, 0.6301]) tensor([0.0410, 0.1346, 0.4506, 0.3738]) -Greedy action tensor([-1.6962, -0.1476, 0.4935, -0.0680]) tensor([0.0507, 0.2384, 0.4527, 0.2582]) -Greedy action tensor([-0.7356, 0.9742, 0.0612, 0.3018]) tensor([0.0864, 0.4779, 0.1918, 0.2439]) -Greedy action tensor([-1.8390, -0.3303, 0.5796, -0.1009]) tensor([0.0446, 0.2015, 0.5005, 0.2534]) -Greedy action tensor([-1.4019, -0.4032, 0.4276, 0.1822]) tensor([0.0675, 0.1832, 0.4204, 0.3289]) -Greedy action tensor([-1.6436, -0.2603, 0.4966, -0.0866]) tensor([0.0548, 0.2187, 0.4662, 0.2602]) -Greedy action tensor([-1.8693, -0.2396, 0.6112, -0.1407]) tensor([0.0422, 0.2155, 0.5045, 0.2379]) -Greedy action tensor([-1.8369, -0.1203, 0.5631, -0.1365]) tensor([0.0434, 0.2413, 0.4779, 0.2374]) -Greedy action tensor([-1.7671, -0.1328, 0.5365, -0.1225]) tensor([0.0469, 0.2405, 0.4696, 0.2430]) -Greedy action tensor([-1.9231, -0.4584, 0.6983, -0.1497]) tensor([0.0400, 0.1732, 0.5508, 0.2359]) -Greedy action tensor([-1.2189, -0.6099, 0.3371, 0.0640]) tensor([0.0894, 0.1644, 0.4237, 0.3225]) -Greedy action tensor([-1.8998, -0.4350, 0.6418, -0.1580]) tensor([0.0421, 0.1823, 0.5351, 0.2405]) -Greedy action tensor([-1.9140, -0.3828, 0.6486, -0.1620]) tensor([0.0411, 0.1898, 0.5324, 0.2367]) -Greedy action tensor([-1.8474, -0.4331, 0.6159, -0.1430]) tensor([0.0447, 0.1840, 0.5253, 0.2459]) -Greedy action tensor([-1.9322, -0.4551, 0.6828, -0.1687]) tensor([0.0402, 0.1761, 0.5493, 0.2344]) -Greedy action tensor([-1.9097, -0.4178, 0.6513, -0.1603]) tensor([0.0414, 0.1841, 0.5363, 0.2382]) -Greedy action tensor([-1.4117, -0.6352, 0.7794, 0.5171]) tensor([0.0526, 0.1144, 0.4708, 0.3622]) -Greedy action tensor([-1.6812, -0.1319, 0.4908, -0.0766]) tensor([0.0514, 0.2419, 0.4510, 0.2557]) -Greedy action tensor([-1.9263, -0.4235, 0.6607, -0.1611]) tensor([0.0406, 0.1825, 0.5396, 0.2372]) -Greedy action tensor([-1.9339, -0.4410, 0.6616, -0.1727]) tensor([0.0405, 0.1804, 0.5433, 0.2359]) -Greedy action tensor([-1.9390, -0.4384, 0.6618, -0.1767]) tensor([0.0403, 0.1809, 0.5437, 0.2350]) -Greedy action tensor([-1.6808, -0.5281, 0.5397, -0.0951]) tensor([0.0548, 0.1734, 0.5045, 0.2674]) -Greedy action tensor([ 0.1810, -0.5272, 1.4281, 0.8597]) tensor([0.1440, 0.0709, 0.5012, 0.2839]) -Greedy action tensor([-0.1160, 1.1251, -0.0045, 0.3974]) tensor([0.1380, 0.4773, 0.1542, 0.2305]) -Greedy action tensor([-0.5807, 1.0034, 0.0710, 0.2720]) tensor([0.0986, 0.4808, 0.1892, 0.2314]) -Greedy action tensor([-1.8514, -0.1475, 0.5736, -0.1457]) tensor([0.0429, 0.2358, 0.4850, 0.2362]) -Greedy action tensor([-0.8247, 0.7906, 0.1048, 0.0123]) tensor([0.0920, 0.4626, 0.2330, 0.2124]) -Greedy action tensor([-1.3460, 0.6083, 0.2480, 0.0785]) tensor([0.0583, 0.4119, 0.2873, 0.2425]) -Greedy action tensor([-1.9074, -0.4713, 0.6597, -0.1537]) tensor([0.0417, 0.1751, 0.5426, 0.2406]) -Greedy action tensor([-1.8970, -0.4028, 0.6423, -0.1501]) tensor([0.0419, 0.1867, 0.5310, 0.2404]) -Greedy action tensor([-1.8809, -0.4660, 0.6385, -0.1555]) tensor([0.0432, 0.1778, 0.5365, 0.2425]) -Greedy action tensor([-1.8887, -0.4034, 0.6519, -0.1472]) tensor([0.0420, 0.1855, 0.5329, 0.2396]) -Greedy action tensor([-1.0355, 0.2495, -0.0238, -0.2595]) tensor([0.1048, 0.3790, 0.2884, 0.2278]) -Greedy action tensor([-1.9426, -0.4523, 0.6684, -0.1798]) tensor([0.0402, 0.1784, 0.5471, 0.2343]) -Greedy action tensor([-1.7579, -0.3845, 0.5759, -0.0653]) tensor([0.0483, 0.1908, 0.4984, 0.2625]) -Greedy action tensor([-1.9066, -0.4385, 0.6434, -0.1556]) tensor([0.0418, 0.1816, 0.5357, 0.2409]) -Greedy action tensor([-1.9135, -0.3921, 0.6474, -0.1597]) tensor([0.0411, 0.1884, 0.5328, 0.2377]) -Greedy action tensor([-1.7168, -0.3100, 0.5950, -0.0632]) tensor([0.0490, 0.2001, 0.4947, 0.2561]) -Greedy action tensor([-1.9053, -0.3621, 0.6431, -0.1577]) tensor([0.0413, 0.1933, 0.5282, 0.2372]) -Greedy action tensor([-1.5250, -0.3181, 0.4752, -0.0987]) tensor([0.0629, 0.2103, 0.4649, 0.2619]) -Greedy action tensor([-1.8181, -0.0497, 0.5202, -0.3764]) tensor([0.0466, 0.2732, 0.4831, 0.1971]) -Greedy action tensor([-1.7933, -0.3551, 0.6601, -0.0317]) tensor([0.0441, 0.1859, 0.5131, 0.2569]) -Greedy action tensor([-1.8827, -0.3760, 0.6277, -0.1553]) tensor([0.0426, 0.1924, 0.5250, 0.2399]) -Greedy action tensor([-1.9453, -0.4562, 0.6748, -0.1780]) tensor([0.0400, 0.1771, 0.5489, 0.2340]) -Greedy action tensor([-1.9380, -0.4505, 0.6709, -0.1759]) tensor([0.0403, 0.1782, 0.5470, 0.2345]) -Greedy action tensor([-1.4160, -0.1942, 0.3741, -0.0054]) tensor([0.0691, 0.2343, 0.4136, 0.2830]) -Greedy action tensor([-1.7676, 0.0781, 0.5019, -0.0584]) tensor([0.0444, 0.2810, 0.4294, 0.2452]) -Greedy action tensor([-1.8892, -0.3154, 0.6316, -0.1513]) tensor([0.0418, 0.2015, 0.5194, 0.2374]) -Greedy action tensor([-1.2924, 0.5373, 0.2927, -0.0842]) tensor([0.0647, 0.4031, 0.3157, 0.2165]) -Greedy action tensor([-1.8097, -0.4156, 0.5930, -0.1235]) tensor([0.0465, 0.1877, 0.5145, 0.2513]) -Greedy action tensor([-0.6982, 0.1634, 0.1752, -0.1180]) tensor([0.1325, 0.3136, 0.3173, 0.2367]) -Greedy action tensor([-1.7470, -0.4517, 0.6258, 0.0226]) tensor([0.0471, 0.1719, 0.5049, 0.2762]) -Greedy action tensor([-1.8059, -0.4249, 0.5946, -0.1243]) tensor([0.0468, 0.1861, 0.5158, 0.2513]) -Greedy action tensor([-1.8835, -0.4468, 0.6405, -0.1473]) tensor([0.0428, 0.1801, 0.5342, 0.2430]) -Greedy action tensor([-0.6054, 0.7254, 0.0627, 0.1136]) tensor([0.1138, 0.4307, 0.2220, 0.2336]) -Greedy action tensor([-0.7901, 0.8517, 0.0711, 0.0844]) tensor([0.0915, 0.4726, 0.2165, 0.2194]) -Greedy action tensor([-1.8957, -0.3618, 0.6381, -0.1521]) tensor([0.0417, 0.1935, 0.5260, 0.2387]) -Greedy action tensor([-1.6252, -0.4345, 0.7865, -0.0711]) tensor([0.0496, 0.1631, 0.5529, 0.2345]) -Greedy action tensor([-1.8281, -0.2630, 0.5675, -0.0990]) tensor([0.0447, 0.2136, 0.4901, 0.2517]) -Greedy action tensor([-1.3962, 0.8249, 0.2360, -0.7050]) tensor([0.0577, 0.5319, 0.2952, 0.1152]) -Greedy action tensor([-1.8381, -0.3240, 0.5959, -0.1337]) tensor([0.0445, 0.2025, 0.5080, 0.2449]) -Greedy action tensor([-1.4378, -0.3441, 0.3495, 0.1244]) tensor([0.0679, 0.2027, 0.4056, 0.3238]) -Greedy action tensor([-1.9411, -0.4223, 0.6592, -0.1780]) tensor([0.0402, 0.1837, 0.5416, 0.2345]) -Greedy action tensor([-1.2859, -0.2056, 0.6644, 0.4231]) tensor([0.0606, 0.1785, 0.4261, 0.3348]) -Greedy action tensor([-1.8714, -0.2977, 0.6204, -0.1294]) tensor([0.0423, 0.2043, 0.5116, 0.2417]) -Greedy action tensor([-1.7002, -0.5307, 0.5752, -0.1250]) tensor([0.0532, 0.1714, 0.5181, 0.2572]) -Greedy action tensor([-1.8191, -0.3150, 0.6088, -0.1047]) tensor([0.0447, 0.2010, 0.5063, 0.2481]) -Greedy action tensor([-1.9255, -0.4062, 0.6548, -0.1724]) tensor([0.0407, 0.1862, 0.5379, 0.2352]) -Greedy action tensor([-0.9479, 0.7180, 0.1583, -0.0685]) tensor([0.0853, 0.4513, 0.2579, 0.2055]) -Greedy action tensor([-1.2596, 0.6945, 0.2256, 0.0405]) tensor([0.0619, 0.4372, 0.2735, 0.2273]) -Greedy action tensor([-1.8717, 0.2202, 0.5411, -0.1591]) tensor([0.0387, 0.3139, 0.4326, 0.2148]) -Greedy action tensor([-1.9022, -0.3791, 0.6402, -0.1608]) tensor([0.0417, 0.1911, 0.5296, 0.2377]) -Greedy action tensor([-1.6737, -0.3804, 0.5212, -0.0822]) tensor([0.0540, 0.1966, 0.4844, 0.2650]) -Greedy action tensor([-0.8543, 0.3660, 0.1532, 0.0022]) tensor([0.1055, 0.3573, 0.2888, 0.2484]) -Greedy action tensor([-1.8747, -0.3064, 0.6172, -0.1291]) tensor([0.0424, 0.2032, 0.5118, 0.2426]) -Greedy action tensor([-1.4329, -0.2275, 0.5959, -0.4164]) tensor([0.0680, 0.2270, 0.5171, 0.1879]) -Greedy action tensor([ 0.8053, -0.7498, -0.0652, -0.2330]) tensor([0.5040, 0.1064, 0.2111, 0.1785]) -Greedy action tensor([ 0.8817, -0.4982, -0.0513, -0.1876]) tensor([0.5030, 0.1266, 0.1979, 0.1726]) -Greedy action tensor([ 0.9610, -0.4008, -0.0192, -0.1455]) tensor([0.5096, 0.1306, 0.1912, 0.1685]) -Greedy action tensor([ 1.0579, -0.4212, -0.1773, -0.2924]) tensor([0.5625, 0.1282, 0.1636, 0.1458]) -Greedy action tensor([ 0.5577, -0.4005, -0.0412, -0.5005]) tensor([0.4386, 0.1682, 0.2410, 0.1522]) -Greedy action tensor([ 0.6325, -0.3458, 0.0106, -0.2350]) tensor([0.4286, 0.1612, 0.2302, 0.1800]) -Greedy action tensor([ 0.8604, -0.7904, 0.3397, -0.3846]) tensor([0.4822, 0.0925, 0.2865, 0.1388]) -Greedy action tensor([ 1.4241, -1.3640, 0.1426, -0.5424]) tensor([0.6761, 0.0416, 0.1877, 0.0946]) -Greedy action tensor([ 1.0023, -0.7947, 0.2365, -0.5702]) tensor([0.5440, 0.0902, 0.2529, 0.1129]) -Greedy action tensor([ 0.9085, -0.5293, 0.0284, -0.4490]) tensor([0.5237, 0.1244, 0.2172, 0.1347]) -Greedy action tensor([ 0.4445, -0.1014, -0.0096, -0.0741]) tensor([0.3559, 0.2062, 0.2260, 0.2119]) -Greedy action tensor([ 1.0404, -0.9425, 0.0218, -0.6054]) tensor([0.5912, 0.0814, 0.2135, 0.1140]) -Greedy action tensor([ 0.9161, -0.5008, 0.0226, -0.4024]) tensor([0.5211, 0.1263, 0.2132, 0.1394]) -Greedy action tensor([ 0.8960, -0.3134, 0.2549, -0.3268]) tensor([0.4718, 0.1408, 0.2485, 0.1389]) -Greedy action tensor([ 1.1335, -0.4501, 0.0286, -0.2753]) tensor([0.5615, 0.1152, 0.1860, 0.1373]) -Greedy action tensor([ 0.9492, -0.7597, 0.0608, -0.4628]) tensor([0.5446, 0.0986, 0.2240, 0.1327]) -Greedy action tensor([ 0.8538, -0.7254, -0.0640, -0.6661]) tensor([0.5482, 0.1130, 0.2189, 0.1199]) -Greedy action tensor([ 0.0053, -0.0411, -0.1104, -0.3077]) tensor([0.2796, 0.2669, 0.2490, 0.2045]) -Greedy action tensor([ 0.5914, -0.4954, -0.0823, -0.1543]) tensor([0.4307, 0.1453, 0.2196, 0.2044]) -Greedy action tensor([ 0.6986, -0.3266, 0.0835, -0.3534]) tensor([0.4447, 0.1595, 0.2404, 0.1553]) -Greedy action tensor([ 0.6739, -0.5441, -0.1576, -0.3425]) tensor([0.4778, 0.1413, 0.2080, 0.1729]) -Greedy action tensor([ 0.7218, -0.4394, -0.0702, -0.2169]) tensor([0.4636, 0.1451, 0.2100, 0.1813]) -Greedy action tensor([ 0.6109, 0.0363, -0.0322, -0.0724]) tensor([0.3856, 0.2171, 0.2027, 0.1947]) -Greedy action tensor([ 1.1148, -0.5563, -0.1593, -0.1592]) tensor([0.5723, 0.1076, 0.1601, 0.1601]) -Greedy action tensor([ 1.3092, -0.8867, 0.0354, -0.6925]) tensor([0.6553, 0.0729, 0.1833, 0.0885]) -Greedy action tensor([ 0.5847, -0.6162, -0.2045, -0.2132]) tensor([0.4534, 0.1364, 0.2060, 0.2042]) -Greedy action tensor([ 1.0702, -0.6375, 0.0158, -0.3206]) tensor([0.5622, 0.1019, 0.1959, 0.1399]) -Greedy action tensor([ 0.7735, -0.3894, 0.1622, -0.5264]) tensor([0.4700, 0.1469, 0.2550, 0.1281]) -Greedy action tensor([ 0.5608, -0.3116, -0.0287, -0.1155]) tensor([0.4031, 0.1685, 0.2235, 0.2049]) -Greedy action tensor([ 1.0738, -0.9037, 0.0122, -0.6009]) tensor([0.5982, 0.0828, 0.2069, 0.1121]) -Greedy action tensor([ 0.6968, -0.2635, -0.1118, -0.3396]) tensor([0.4581, 0.1753, 0.2041, 0.1625]) -Greedy action tensor([ 0.9353, -0.9534, 0.1322, -0.3606]) tensor([0.5340, 0.0808, 0.2392, 0.1461]) -Greedy action tensor([ 0.8982, -0.7028, 0.2100, -0.4736]) tensor([0.5108, 0.1030, 0.2566, 0.1296]) -Greedy action tensor([ 1.2282, -0.8350, -0.2379, -0.6354]) tensor([0.6609, 0.0840, 0.1526, 0.1025]) -Greedy action tensor([ 1.1504, -0.4744, 0.0119, -0.2567]) tensor([0.5675, 0.1118, 0.1818, 0.1390]) -Greedy action tensor([ 0.8626, -0.4626, 0.0359, -0.1106]) tensor([0.4805, 0.1277, 0.2102, 0.1816]) -Greedy action tensor([ 1.1839, -0.6769, -0.1975, -0.3676]) tensor([0.6178, 0.0961, 0.1552, 0.1309]) -Greedy action tensor([ 0.5522, -0.0230, -0.0062, -0.0439]) tensor([0.3724, 0.2095, 0.2130, 0.2052]) -Greedy action tensor([ 1.2161, -0.8187, -0.0319, -0.7204]) tensor([0.6402, 0.0837, 0.1838, 0.0923]) -Greedy action tensor([ 0.1488, 0.2385, 0.0398, -0.3001]) tensor([0.2756, 0.3014, 0.2471, 0.1759]) -Greedy action tensor([0.8568, 0.0189, 0.0161, 0.0292]) tensor([0.4346, 0.1880, 0.1875, 0.1899]) -Greedy action tensor([ 0.7053, -0.3420, -0.0523, -0.1624]) tensor([0.4465, 0.1567, 0.2093, 0.1875]) -Greedy action tensor([ 0.5254, -0.4612, -0.1414, -0.1800]) tensor([0.4201, 0.1566, 0.2157, 0.2075]) -Greedy action tensor([ 0.7255, -0.5722, 0.1230, -0.5444]) tensor([0.4759, 0.1300, 0.2605, 0.1337]) -Greedy action tensor([ 1.0001, -0.6350, -0.0735, -0.4659]) tensor([0.5657, 0.1103, 0.1934, 0.1306]) -Greedy action tensor([ 0.6668, -0.4280, -0.1410, -0.1871]) tensor([0.4533, 0.1517, 0.2021, 0.1930]) -Greedy action tensor([ 0.6761, -0.0165, -0.0048, 0.0418]) tensor([0.3942, 0.1972, 0.1995, 0.2090]) -Greedy action tensor([ 0.8321, -0.6405, -0.0604, -0.2953]) tensor([0.5095, 0.1168, 0.2087, 0.1650]) -Greedy action tensor([ 1.3323, -0.7048, 0.0428, -0.7472]) tensor([0.6533, 0.0852, 0.1799, 0.0816]) -Greedy action tensor([ 1.0968, -0.6308, 0.0706, -0.7068]) tensor([0.5880, 0.1045, 0.2107, 0.0968]) -Greedy action tensor([ 0.7028, -0.0481, 0.1361, -0.3204]) tensor([0.4169, 0.1967, 0.2365, 0.1498]) -Greedy action tensor([ 1.0621, -0.7340, -0.1834, -0.3590]) tensor([0.5899, 0.0979, 0.1698, 0.1424]) -Greedy action tensor([ 0.5019, -0.1526, 0.0024, -0.2306]) tensor([0.3835, 0.1993, 0.2327, 0.1844]) -Greedy action tensor([ 0.8326, -1.0219, 0.2452, -0.7991]) tensor([0.5241, 0.0820, 0.2913, 0.1025]) -Greedy action tensor([ 0.9968, -0.5741, -0.2089, -0.6021]) tensor([0.5850, 0.1216, 0.1752, 0.1182]) -Greedy action tensor([ 1.0922, -0.6755, 0.0259, -0.7596]) tensor([0.5981, 0.1021, 0.2059, 0.0939]) -Greedy action tensor([ 1.1547, -0.7609, -0.2224, -0.4258]) tensor([0.6229, 0.0917, 0.1572, 0.1282]) -Greedy action tensor([ 0.9694, -0.6766, -0.0084, -0.3069]) tensor([0.5411, 0.1043, 0.2035, 0.1510]) -Greedy action tensor([ 0.8033, -0.6440, 0.1849, -0.3669]) tensor([0.4798, 0.1128, 0.2585, 0.1489]) -Greedy action tensor([ 0.6138, -0.2229, 0.0743, -0.1914]) tensor([0.4060, 0.1759, 0.2367, 0.1815]) -Greedy action tensor([ 1.0848, -0.6445, -0.0400, -0.6278]) tensor([0.5943, 0.1054, 0.1930, 0.1072]) -Greedy action tensor([ 1.2319, -0.7551, 0.0144, -0.7966]) tensor([0.6391, 0.0876, 0.1892, 0.0841]) -Greedy action tensor([ 0.3764, -0.0722, -0.0184, -0.0864]) tensor([0.3399, 0.2170, 0.2291, 0.2140]) -Greedy action tensor([ 0.5389, -0.2532, -0.0489, -0.0516]) tensor([0.3903, 0.1767, 0.2168, 0.2162]) -Greedy action tensor([ 0.8419, -0.2933, 0.0306, -0.2463]) tensor([0.4756, 0.1528, 0.2113, 0.1602]) -Greedy action tensor([ 0.7083, -0.4575, -0.0489, -0.3514]) tensor([0.4701, 0.1465, 0.2205, 0.1629]) -Greedy action tensor([ 1.0626, -0.5869, -0.2493, -0.7953]) tensor([0.6182, 0.1188, 0.1665, 0.0964]) -Greedy action tensor([ 0.5593, -0.2781, -0.0256, -0.2279]) tensor([0.4090, 0.1770, 0.2279, 0.1861]) -Greedy action tensor([ 0.4019, -0.2548, 0.0389, -0.3359]) tensor([0.3714, 0.1926, 0.2584, 0.1776]) -Greedy action tensor([ 0.4659, -0.1793, -0.1097, 0.0225]) tensor([0.3665, 0.1922, 0.2061, 0.2352]) -Greedy action tensor([ 0.5502, -0.2281, -0.0215, -0.2187]) tensor([0.4020, 0.1846, 0.2270, 0.1864]) -Greedy action tensor([ 0.4989, 0.1236, 0.0762, -0.0114]) tensor([0.3398, 0.2335, 0.2227, 0.2040]) -Greedy action tensor([ 0.4218, -0.0344, -0.0073, -0.0425]) tensor([0.3432, 0.2175, 0.2235, 0.2157]) -Greedy action tensor([ 1.0381, -1.2599, 0.2545, -0.4610]) tensor([0.5616, 0.0564, 0.2565, 0.1254]) -Greedy action tensor([ 0.1362, -0.1597, 0.0388, 0.1774]) tensor([0.2708, 0.2014, 0.2456, 0.2822]) -Greedy action tensor([ 1.1822, -0.7606, -0.0371, -0.7264]) tensor([0.6301, 0.0903, 0.1862, 0.0934]) -Greedy action tensor([ 0.3481, 0.0551, -0.1292, -0.0605]) tensor([0.3299, 0.2461, 0.2047, 0.2193]) -Greedy action tensor([ 1.0975, -0.7376, -0.0422, -0.3545]) tensor([0.5836, 0.0931, 0.1867, 0.1366]) -Greedy action tensor([ 1.1941, -0.7733, -0.0934, -1.0094]) tensor([0.6552, 0.0916, 0.1808, 0.0723]) -Greedy action tensor([ 1.1891, -0.8181, -0.0541, -0.4807]) tensor([0.6207, 0.0834, 0.1790, 0.1169]) -Greedy action tensor([ 0.5548, -0.3395, -0.0734, -0.2027]) tensor([0.4147, 0.1696, 0.2213, 0.1944]) -Greedy action tensor([ 1.2558, -0.5595, -0.1393, 0.0877]) tensor([0.5809, 0.0946, 0.1439, 0.1806]) -Greedy action tensor([ 1.7205, -0.9820, -0.0333, 0.6840]) tensor([0.6270, 0.0420, 0.1085, 0.2224]) -Greedy action tensor([ 1.8035, -0.8719, -0.4644, 0.4701]) tensor([0.6964, 0.0480, 0.0721, 0.1835]) -Greedy action tensor([ 0.9026, -0.2420, 0.2843, 0.1340]) tensor([0.4309, 0.1372, 0.2322, 0.1998]) -Greedy action tensor([ 1.8794, -0.6034, -0.2902, 0.2526]) tensor([0.7172, 0.0599, 0.0819, 0.1410]) -Greedy action tensor([ 1.1540, -0.4487, -0.2085, 0.3991]) tensor([0.5188, 0.1045, 0.1328, 0.2439]) -Greedy action tensor([ 1.8178, -1.1473, -0.3957, 0.1468]) tensor([0.7413, 0.0382, 0.0810, 0.1394]) -Greedy action tensor([ 1.5546, -0.6810, -0.5948, 0.3117]) tensor([0.6613, 0.0707, 0.0771, 0.1908]) -Greedy action tensor([ 0.8787, -0.4652, -0.0092, 0.2344]) tensor([0.4551, 0.1187, 0.1873, 0.2389]) -Greedy action tensor([ 1.3909, -0.1980, -1.2167, 0.2446]) tensor([0.6267, 0.1279, 0.0462, 0.1992]) -Greedy action tensor([ 1.2107, -0.4407, -0.5466, 0.6026]) tensor([0.5239, 0.1005, 0.0904, 0.2852]) -Greedy action tensor([ 1.7885, -0.2277, -0.0062, 0.1031]) tensor([0.6735, 0.0897, 0.1119, 0.1249]) -Greedy action tensor([ 1.6711, -0.7967, -0.5867, 0.7371]) tensor([0.6320, 0.0536, 0.0661, 0.2484]) -Greedy action tensor([ 1.8622, -0.8324, -0.2561, 0.6493]) tensor([0.6733, 0.0455, 0.0810, 0.2002]) -Greedy action tensor([ 1.1938, -0.2359, -0.1304, -0.1918]) tensor([0.5696, 0.1364, 0.1515, 0.1425]) -Greedy action tensor([ 1.5346, 0.0042, -0.3000, 0.6594]) tensor([0.5578, 0.1207, 0.0891, 0.2325]) -Greedy action tensor([ 1.3275, 0.1159, -0.5000, 0.4413]) tensor([0.5346, 0.1591, 0.0860, 0.2204]) -Greedy action tensor([ 1.1252, -0.3466, -0.3939, 0.3561]) tensor([0.5231, 0.1200, 0.1145, 0.2424]) -Greedy action tensor([ 1.0251, -0.0428, 0.0503, 0.0826]) tensor([0.4738, 0.1629, 0.1787, 0.1846]) -Greedy action tensor([ 0.7729, -0.7933, -0.6743, -0.2463]) tensor([0.5540, 0.1157, 0.1303, 0.1999]) -Greedy action tensor([ 2.1864, -0.7703, -0.2747, -0.1065]) tensor([0.8076, 0.0420, 0.0689, 0.0815]) -Greedy action tensor([ 1.5567, -0.6217, -0.2063, 0.2777]) tensor([0.6398, 0.0724, 0.1097, 0.1781]) -Greedy action tensor([ 1.4849, -0.5851, -0.3503, 0.4496]) tensor([0.6094, 0.0769, 0.0973, 0.2164]) -Greedy action tensor([ 1.1185, -0.3863, -0.1711, 0.3587]) tensor([0.5089, 0.1130, 0.1401, 0.2380]) -Greedy action tensor([ 2.2887, -0.1075, -0.6418, 0.3982]) tensor([0.7720, 0.0703, 0.0412, 0.1166]) -Greedy action tensor([ 1.2748, -0.4945, -0.6173, 0.0991]) tensor([0.6136, 0.1046, 0.0925, 0.1893]) -Greedy action tensor([ 1.4153, -0.2708, -0.9646, 0.7204]) tensor([0.5628, 0.1042, 0.0521, 0.2809]) -Greedy action tensor([ 1.7583, -0.6184, -0.3445, 0.3248]) tensor([0.6880, 0.0639, 0.0840, 0.1641]) -Greedy action tensor([ 1.6756, -0.2933, -1.2816, 0.7257]) tensor([0.6336, 0.0885, 0.0329, 0.2450]) -Greedy action tensor([ 1.3459, 0.3338, -0.4680, 0.4214]) tensor([0.5200, 0.1890, 0.0848, 0.2063]) -Greedy action tensor([ 1.8093, -0.9485, -0.6034, 0.3284]) tensor([0.7244, 0.0460, 0.0649, 0.1648]) -Greedy action tensor([ 1.3682, -0.2786, -0.5388, 0.5083]) tensor([0.5668, 0.1092, 0.0842, 0.2399]) -Greedy action tensor([ 2.1746, 0.1488, -0.0033, 0.3216]) tensor([0.7133, 0.0941, 0.0808, 0.1118]) -Greedy action tensor([ 2.2041, -1.4687, 0.1131, 0.9525]) tensor([0.6969, 0.0177, 0.0861, 0.1993]) -Greedy action tensor([ 1.5392, -0.2659, -0.3939, 0.0823]) tensor([0.6485, 0.1066, 0.0938, 0.1511]) -Greedy action tensor([ 1.1212, -0.6356, -0.3132, -0.3421]) tensor([0.6089, 0.1051, 0.1451, 0.1409]) -Greedy action tensor([ 2.3183, -1.6019, -0.1523, 1.0426]) tensor([0.7227, 0.0143, 0.0611, 0.2018]) -Greedy action tensor([ 1.8411, -0.2699, -0.2229, 0.0564]) tensor([0.7063, 0.0855, 0.0897, 0.1185]) -Greedy action tensor([ 1.8045, -1.0944, -0.4526, 0.1645]) tensor([0.7387, 0.0407, 0.0773, 0.1433]) -Greedy action tensor([ 1.7599, -0.6188, -0.6980, 0.4212]) tensor([0.6942, 0.0643, 0.0594, 0.1820]) -Greedy action tensor([ 1.8658, -0.4320, -0.1984, -0.0989]) tensor([0.7312, 0.0735, 0.0928, 0.1025]) -Greedy action tensor([ 1.7604, -0.5862, -0.4786, 0.3001]) tensor([0.6971, 0.0667, 0.0743, 0.1619]) -Greedy action tensor([ 1.4229, -0.5791, -0.4179, 0.1610]) tensor([0.6342, 0.0857, 0.1006, 0.1795]) -Greedy action tensor([ 1.3187, -0.5877, -0.6540, 0.2646]) tensor([0.6112, 0.0908, 0.0850, 0.2130]) -Greedy action tensor([ 1.4358, -0.6475, -0.0394, 0.4168]) tensor([0.5834, 0.0726, 0.1334, 0.2106]) -Greedy action tensor([ 0.9942, -0.0096, -0.3994, 0.2723]) tensor([0.4761, 0.1745, 0.1182, 0.2313]) -Greedy action tensor([ 1.2798, -0.4180, -0.5391, 0.2338]) tensor([0.5894, 0.1079, 0.0956, 0.2071]) -Greedy action tensor([ 1.3253, -0.6104, -0.4045, 0.1920]) tensor([0.6084, 0.0878, 0.1079, 0.1959]) -Greedy action tensor([ 2.0572, -1.0779, 0.1750, 0.9105]) tensor([0.6607, 0.0287, 0.1006, 0.2099]) -Greedy action tensor([ 2.1626, -0.3673, -0.5020, 0.1136]) tensor([0.7824, 0.0623, 0.0545, 0.1008]) -Greedy action tensor([ 1.6454, -0.9049, -0.3081, 0.3619]) tensor([0.6681, 0.0521, 0.0947, 0.1851]) -Greedy action tensor([ 1.8058, -0.7969, -0.8154, 0.3362]) tensor([0.7263, 0.0538, 0.0528, 0.1671]) -Greedy action tensor([ 1.3582, -0.2214, -0.4488, 0.2780]) tensor([0.5849, 0.1205, 0.0960, 0.1986]) -Greedy action tensor([ 1.3303, -1.0264, -0.3595, 0.2771]) tensor([0.6142, 0.0582, 0.1134, 0.2143]) -Greedy action tensor([ 1.0441, -0.3490, -0.6323, 0.1125]) tensor([0.5467, 0.1357, 0.1023, 0.2153]) -Greedy action tensor([ 1.7468, -0.0913, -0.5934, -0.1217]) tensor([0.7093, 0.1129, 0.0683, 0.1095]) -Greedy action tensor([ 1.7137, -0.5597, -0.2205, 0.6097]) tensor([0.6333, 0.0652, 0.0915, 0.2100]) -Greedy action tensor([ 0.8803, -0.4879, 0.1700, -0.1370]) tensor([0.4745, 0.1208, 0.2332, 0.1715]) -Greedy action tensor([ 1.5460, -1.0480, -0.4749, 0.7216]) tensor([0.6076, 0.0454, 0.0805, 0.2664]) -Greedy action tensor([ 1.5570, -0.6458, -0.6714, 0.4724]) tensor([0.6426, 0.0710, 0.0692, 0.2172]) -Greedy action tensor([ 2.1367, -1.3431, -0.0422, 0.3893]) tensor([0.7586, 0.0234, 0.0858, 0.1322]) -Greedy action tensor([ 1.4671, 0.3413, -0.8080, 0.2945]) tensor([0.5758, 0.1868, 0.0592, 0.1782]) -Greedy action tensor([ 1.3108, -0.3314, -0.7808, -0.0400]) tensor([0.6345, 0.1228, 0.0783, 0.1644]) -Greedy action tensor([ 2.7365, -1.5220, -0.0654, 0.8344]) tensor([0.8169, 0.0116, 0.0496, 0.1219]) -Greedy action tensor([ 1.2855, -0.3356, -0.9276, 0.0415]) tensor([0.6269, 0.1239, 0.0686, 0.1807]) -Greedy action tensor([ 1.8707, -0.9287, -0.7593, 1.2804]) tensor([0.5927, 0.0361, 0.0427, 0.3285]) -Greedy action tensor([ 1.7212, -0.6528, -0.4266, 0.1923]) tensor([0.7009, 0.0653, 0.0818, 0.1520]) -Greedy action tensor([ 1.3291, -0.1543, -0.4870, -0.1806]) tensor([0.6209, 0.1409, 0.1010, 0.1372]) -Greedy action tensor([ 1.6185, -0.5819, -0.0435, 0.1519]) tensor([0.6531, 0.0723, 0.1239, 0.1507]) -Greedy action tensor([ 2.8488, -1.4490, -0.5041, 0.9621]) tensor([0.8332, 0.0113, 0.0291, 0.1263]) -Greedy action tensor([ 1.2064, -0.1716, -0.5366, 0.5957]) tensor([0.5076, 0.1280, 0.0888, 0.2756]) -Greedy action tensor([ 0.8697, -0.2069, -0.3619, 0.0429]) tensor([0.4831, 0.1646, 0.1410, 0.2113]) -Greedy action tensor([ 1.3245, -0.0255, -1.2504, 0.3080]) tensor([0.5892, 0.1527, 0.0449, 0.2132]) -Greedy action tensor([ 1.7794, -0.7641, -0.1403, 0.1699]) tensor([0.7016, 0.0551, 0.1029, 0.1403]) -Greedy action tensor([ 1.6687, -1.0252, -0.2517, 0.5858]) tensor([0.6440, 0.0435, 0.0944, 0.2181]) -Greedy action tensor([ 1.6321, -0.8115, -0.1639, 0.0836]) tensor([0.6824, 0.0593, 0.1133, 0.1451]) -Greedy action tensor([ 1.8714, 0.1560, -1.0272, -0.1119]) tensor([0.7285, 0.1311, 0.0401, 0.1003]) -Greedy action tensor([ 1.3342, -0.6332, -0.4930, 0.3462]) tensor([0.5977, 0.0836, 0.0962, 0.2225]) -Greedy action tensor([ 1.3711, -0.1463, 0.0301, 0.0295]) tensor([0.5740, 0.1258, 0.1501, 0.1500]) -Greedy action tensor([ 1.2488, -0.8591, -0.2209, 0.7653]) tensor([0.5081, 0.0617, 0.1169, 0.3133]) -Greedy action tensor([ 1.3963, -0.1104, -1.0376, 0.1702]) tensor([0.6239, 0.1383, 0.0547, 0.1831]) -Greedy action tensor([-0.3201, 0.5283, 0.0833, -0.4432]) tensor([0.1749, 0.4086, 0.2618, 0.1547]) -Greedy action tensor([-0.6070, 0.9344, -0.8860, 0.0128]) tensor([0.1207, 0.5637, 0.0913, 0.2243]) -Greedy action tensor([-0.4987, -0.7562, -0.4496, -0.8722]) tensor([0.2848, 0.2201, 0.2991, 0.1960]) -Greedy action tensor([-0.6303, -0.9697, -0.3618, -1.2544]) tensor([0.2812, 0.2003, 0.3678, 0.1507]) -Greedy action tensor([-0.5511, -0.8720, -0.8439, -0.3633]) tensor([0.2719, 0.1972, 0.2029, 0.3280]) -Greedy action tensor([ 0.0039, -0.5323, -0.1146, -0.3739]) tensor([0.3166, 0.1852, 0.2812, 0.2170]) -Greedy action tensor([ 1.6205, 0.2212, -0.5911, 1.4722]) tensor([0.4508, 0.1112, 0.0494, 0.3886]) -Greedy action tensor([-0.5498, -0.7059, 0.6555, -0.4196]) tensor([0.1579, 0.1351, 0.5271, 0.1799]) -Greedy action tensor([ 0.1555, -0.7817, -0.3130, 0.2801]) tensor([0.3174, 0.1243, 0.1987, 0.3595]) -Greedy action tensor([ 0.1604, -0.6386, 0.1203, -0.0561]) tensor([0.3110, 0.1399, 0.2987, 0.2504]) -Greedy action tensor([ 1.1655, -0.5511, -0.4459, 0.6308]) tensor([0.5089, 0.0914, 0.1016, 0.2981]) -Greedy action tensor([-0.2427, -0.1169, -1.0207, 0.5973]) tensor([0.2037, 0.2310, 0.0936, 0.4718]) -Greedy action tensor([-0.3095, -0.4330, 0.6101, -0.5489]) tensor([0.1931, 0.1706, 0.4843, 0.1520]) -Greedy action tensor([ 0.8098, -0.1638, -0.6329, 0.4982]) tensor([0.4262, 0.1610, 0.1007, 0.3121]) -Greedy action tensor([-0.3461, -0.2560, 0.3182, -0.1617]) tensor([0.1908, 0.2088, 0.3708, 0.2295]) -Greedy action tensor([ 0.1034, -0.8488, 0.4689, -0.4201]) tensor([0.2925, 0.1128, 0.4215, 0.1732]) -Greedy action tensor([-0.4471, 0.1246, -0.3153, -0.4386]) tensor([0.2032, 0.3600, 0.2319, 0.2050]) -Greedy action tensor([-0.6779, -0.8993, 0.2815, -0.5925]) tensor([0.1818, 0.1457, 0.4745, 0.1980]) -Greedy action tensor([ 0.1658, 0.3784, -0.2221, -0.5326]) tensor([0.2930, 0.3624, 0.1988, 0.1457]) -Greedy action tensor([-0.4548, -0.3531, 1.0709, -1.2648]) tensor([0.1399, 0.1548, 0.6431, 0.0622]) -Greedy action tensor([ 0.5394, 0.2445, 0.4632, -0.2836]) tensor([0.3215, 0.2394, 0.2979, 0.1412]) -Greedy action tensor([-0.3439, -0.0623, -0.6715, -0.1169]) tensor([0.2325, 0.3081, 0.1676, 0.2918]) -Greedy action tensor([-0.2597, -0.9464, 0.2403, 0.3148]) tensor([0.2029, 0.1021, 0.3345, 0.3604]) -Greedy action tensor([ 0.3991, 0.3407, 0.4769, -0.0648]) tensor([0.2737, 0.2582, 0.2959, 0.1721]) -Greedy action tensor([-1.0366, -0.1578, -0.1073, -0.1976]) tensor([0.1211, 0.2917, 0.3068, 0.2803]) -Greedy action tensor([-1.3809, -1.1378, 0.6561, -1.0121]) tensor([0.0878, 0.1120, 0.6733, 0.1270]) -Greedy action tensor([-0.5945, -1.1182, 1.0614, -0.8272]) tensor([0.1312, 0.0777, 0.6872, 0.1039]) -Greedy action tensor([ 0.4134, -0.5748, 0.9759, -0.6277]) tensor([0.2873, 0.1070, 0.5043, 0.1014]) -Greedy action tensor([-0.9498, -0.5772, -0.7562, -0.4515]) tensor([0.1883, 0.2733, 0.2285, 0.3099]) -Greedy action tensor([-0.9399, -1.2507, 0.1191, -1.3311]) tensor([0.1889, 0.1385, 0.5448, 0.1278]) -Greedy action tensor([-0.0860, 0.0489, -0.6770, -0.9160]) tensor([0.3191, 0.3651, 0.1767, 0.1391]) -Greedy action tensor([-0.0158, -0.2436, -0.1233, 0.4082]) tensor([0.2368, 0.1886, 0.2127, 0.3619]) -Greedy action tensor([ 0.1226, -0.6386, -0.4478, -0.4764]) tensor([0.3873, 0.1809, 0.2190, 0.2128]) -Greedy action tensor([-0.3022, -0.0617, -0.6607, -0.7001]) tensor([0.2746, 0.3492, 0.1918, 0.1844]) -Greedy action tensor([ 0.9178, -1.7777, -0.0547, 0.6591]) tensor([0.4509, 0.0304, 0.1705, 0.3481]) -Greedy action tensor([-1.0592, -0.6001, -0.6386, -0.5050]) tensor([0.1711, 0.2707, 0.2605, 0.2977]) -Greedy action tensor([ 1.1794, -0.2179, -0.3873, 1.1718]) tensor([0.4084, 0.1010, 0.0853, 0.4053]) -Greedy action tensor([ 0.2767, -0.8192, 0.1965, -1.2001]) tensor([0.4023, 0.1345, 0.3713, 0.0919]) -Greedy action tensor([-1.2440, -1.4443, 0.7282, -1.0529]) tensor([0.0979, 0.0801, 0.7035, 0.1185]) -Greedy action tensor([ 1.5996, -1.8480, 0.3994, 0.4761]) tensor([0.6031, 0.0192, 0.1816, 0.1961]) -Greedy action tensor([-0.4482, 1.1064, -0.1777, -1.4680]) tensor([0.1351, 0.6392, 0.1770, 0.0487]) -Greedy action tensor([ 0.7211, -0.0525, -0.4274, -0.8324]) tensor([0.5025, 0.2318, 0.1593, 0.1063]) -Greedy action tensor([ 0.7766, -0.7109, -0.7427, 0.3765]) tensor([0.4728, 0.1068, 0.1035, 0.3169]) -Greedy action tensor([-0.4140, -0.4571, 1.0717, -0.0167]) tensor([0.1272, 0.1218, 0.5618, 0.1892]) -Greedy action tensor([-0.4897, 0.3457, 0.6348, -0.2589]) tensor([0.1308, 0.3016, 0.4028, 0.1648]) -Greedy action tensor([-0.6333, -1.4874, 0.9574, -0.8504]) tensor([0.1401, 0.0596, 0.6875, 0.1128]) -Greedy action tensor([-0.3382, -0.5676, -0.2792, -0.2028]) tensor([0.2499, 0.1987, 0.2651, 0.2862]) -Greedy action tensor([ 0.3471, -0.0434, 0.5274, -0.3486]) tensor([0.2965, 0.2006, 0.3550, 0.1479]) -Greedy action tensor([-0.7713, -1.3342, -0.0592, 0.8083]) tensor([0.1182, 0.0673, 0.2409, 0.5736]) -Greedy action tensor([ 1.6605, -0.4539, 0.0242, 0.0362]) tensor([0.6612, 0.0798, 0.1287, 0.1303]) -Greedy action tensor([ 0.8128, 0.2754, 0.6424, -0.8121]) tensor([0.3810, 0.2226, 0.3213, 0.0750]) -Greedy action tensor([ 0.2992, 0.1897, 0.7008, -0.7854]) tensor([0.2682, 0.2404, 0.4007, 0.0907]) -Greedy action tensor([-0.4128, -2.0152, 0.7923, 0.1803]) tensor([0.1575, 0.0317, 0.5257, 0.2851]) -Greedy action tensor([ 0.1965, 0.0139, -0.6736, -0.5239]) tensor([0.3652, 0.3042, 0.1530, 0.1777]) -Greedy action tensor([-0.1140, -0.1641, 0.3431, -0.9620]) tensor([0.2526, 0.2403, 0.3990, 0.1082]) -Greedy action tensor([-0.5475, -0.6115, -1.5467, -0.1085]) tensor([0.2592, 0.2432, 0.0955, 0.4021]) -Greedy action tensor([-0.2565, -1.1272, 1.2376, -1.3993]) tensor([0.1615, 0.0676, 0.7194, 0.0515]) -Greedy action tensor([ 0.5400, -0.9951, 0.5116, 0.0164]) tensor([0.3597, 0.0775, 0.3496, 0.2131]) -Greedy action tensor([ 0.3462, 0.2543, -0.4881, 0.6717]) tensor([0.2680, 0.2445, 0.1164, 0.3711]) -Greedy action tensor([ 1.2514, -0.4900, -0.3270, 0.2512]) tensor([0.5716, 0.1002, 0.1179, 0.2102]) -Greedy action tensor([ 0.6372, -1.8080, -0.3712, 0.2364]) tensor([0.4714, 0.0409, 0.1720, 0.3157]) -Greedy action tensor([ 0.2614, -1.1238, 0.5558, 0.3096]) tensor([0.2746, 0.0687, 0.3686, 0.2881]) -Greedy action tensor([-0.3038, -0.5554, 0.3776, 0.3525]) tensor([0.1760, 0.1368, 0.3479, 0.3393]) -Greedy action tensor([ 0.1701, 0.2151, 0.0641, -0.7970]) tensor([0.3007, 0.3145, 0.2704, 0.1143]) -Greedy action tensor([ 0.2337, -0.7585, -0.3018, -0.7073]) tensor([0.4262, 0.1580, 0.2495, 0.1663]) -Greedy action tensor([ 0.9667, -0.9164, -0.5006, 0.1372]) tensor([0.5498, 0.0836, 0.1268, 0.2398]) -Greedy action tensor([ 0.3863, -0.9654, 0.0965, -0.2170]) tensor([0.3915, 0.1013, 0.2930, 0.2142]) -Greedy action tensor([-0.5945, -1.4017, -0.6386, -0.8332]) tensor([0.3134, 0.1398, 0.2999, 0.2469]) -Greedy action tensor([ 0.9720, -1.3021, 1.0317, -1.1360]) tensor([0.4375, 0.0450, 0.4644, 0.0531]) -Greedy action tensor([ 0.1171, -1.9792, 0.7982, -1.2963]) tensor([0.2992, 0.0368, 0.5912, 0.0728]) -Greedy action tensor([-0.4067, -1.4068, 0.4977, -0.8796]) tensor([0.2241, 0.0824, 0.5537, 0.1397]) -Greedy action tensor([-0.1446, -0.4335, -0.2133, -0.6008]) tensor([0.3015, 0.2259, 0.2815, 0.1911]) -Greedy action tensor([ 0.6030, 0.2243, -0.5762, -0.3406]) tensor([0.4199, 0.2875, 0.1291, 0.1634]) -Greedy action tensor([ 0.9791, -0.0859, -0.1444, 0.5676]) tensor([0.4287, 0.1478, 0.1394, 0.2841]) -Greedy action tensor([ 0.0819, -0.7898, 0.7691, -0.3703]) tensor([0.2474, 0.1035, 0.4918, 0.1574]) -Greedy action tensor([-0.1528, -0.3853, -0.1620, 0.2223]) tensor([0.2359, 0.1870, 0.2338, 0.3433]) -Greedy action tensor([-0.5111, -0.9332, 0.5887, -1.2590]) tensor([0.1948, 0.1277, 0.5852, 0.0922]) -Greedy action tensor([-0.1008, -0.0867, -0.0274, -0.3583]) tensor([0.2588, 0.2625, 0.2786, 0.2001]) -Greedy action tensor([-0.6879, -0.4189, 0.1232, -0.4243]) tensor([0.1706, 0.2233, 0.3840, 0.2221]) -Greedy action tensor([ 0.5447, -0.5436, -0.7909, -0.4474]) tensor([0.5075, 0.1709, 0.1335, 0.1882]) -Greedy action tensor([-0.2323, 0.0584, -0.7224, -0.8691]) tensor([0.2875, 0.3844, 0.1761, 0.1520]) -Greedy action tensor([-1.9207, -0.4175, 0.6576, -0.1685]) tensor([0.0409, 0.1840, 0.5391, 0.2360]) -Greedy action tensor([-1.1469, 0.0613, -0.4134, -0.1852]) tensor([0.1105, 0.3700, 0.2302, 0.2892]) -Greedy action tensor([-1.4243, 0.2223, 0.3298, -0.0040]) tensor([0.0621, 0.3222, 0.3588, 0.2569]) -Greedy action tensor([-1.6646, -0.5235, 0.5202, -0.0224]) tensor([0.0550, 0.1721, 0.4888, 0.2841]) -Greedy action tensor([0.8222, 0.6108, 0.5376, 1.3270]) tensor([0.2371, 0.1919, 0.1783, 0.3927]) -Greedy action tensor([-0.5265, -0.4378, 0.1792, 0.2320]) tensor([0.1599, 0.1747, 0.3239, 0.3415]) -Greedy action tensor([-1.9205, -0.4408, 0.6559, -0.1700]) tensor([0.0412, 0.1807, 0.5412, 0.2369]) -Greedy action tensor([-1.5992, -0.4616, 0.4880, 0.0033]) tensor([0.0583, 0.1819, 0.4702, 0.2896]) -Greedy action tensor([-1.5564, -0.5143, 0.5699, -0.0479]) tensor([0.0597, 0.1694, 0.5009, 0.2700]) -Greedy action tensor([-1.8536, -0.4536, 0.6168, -0.1386]) tensor([0.0446, 0.1807, 0.5271, 0.2476]) -Greedy action tensor([-1.9254, -0.4476, 0.6587, -0.1727]) tensor([0.0410, 0.1796, 0.5430, 0.2364]) -Greedy action tensor([-1.9181, -0.4397, 0.6550, -0.1663]) tensor([0.0412, 0.1808, 0.5403, 0.2377]) -Greedy action tensor([-1.6272, -0.3289, 0.4966, -0.0836]) tensor([0.0565, 0.2069, 0.4723, 0.2644]) -Greedy action tensor([-1.8252, -0.4560, 0.6096, -0.1217]) tensor([0.0458, 0.1800, 0.5226, 0.2515]) -Greedy action tensor([-1.8849, -0.4082, 0.6313, -0.1562]) tensor([0.0427, 0.1872, 0.5293, 0.2408]) -Greedy action tensor([-1.4852, 0.6573, 0.3529, -0.1028]) tensor([0.0505, 0.4305, 0.3176, 0.2013]) -Greedy action tensor([-1.4799, -0.5838, 0.6262, 0.0524]) tensor([0.0614, 0.1504, 0.5042, 0.2841]) -Greedy action tensor([-1.8970, -0.3909, 0.6490, -0.1560]) tensor([0.0417, 0.1881, 0.5322, 0.2379]) -Greedy action tensor([-1.8671, -0.4480, 0.6330, -0.1454]) tensor([0.0436, 0.1804, 0.5318, 0.2442]) -Greedy action tensor([-0.9899, 0.3745, 0.1873, 0.1008]) tensor([0.0898, 0.3514, 0.2914, 0.2673]) -Greedy action tensor([-1.9250, -0.3207, 0.6377, -0.1770]) tensor([0.0405, 0.2015, 0.5254, 0.2326]) -Greedy action tensor([-1.5977, -0.4318, 0.4832, -0.0155]) tensor([0.0585, 0.1878, 0.4689, 0.2848]) -Greedy action tensor([-0.7739, -0.3479, 0.1630, 0.4470]) tensor([0.1180, 0.1807, 0.3012, 0.4001]) -Greedy action tensor([-0.8753, -0.5507, 0.2272, 0.1235]) tensor([0.1233, 0.1706, 0.3713, 0.3348]) -Greedy action tensor([-1.0842, 0.4726, -0.8462, -0.5999]) tensor([0.1158, 0.5493, 0.1469, 0.1880]) -Greedy action tensor([-1.2653, 0.7570, 0.3860, -0.4408]) tensor([0.0623, 0.4707, 0.3248, 0.1421]) -Greedy action tensor([-1.8406, -0.3031, 0.5956, -0.1288]) tensor([0.0442, 0.2057, 0.5052, 0.2449]) -Greedy action tensor([-1.9141, -0.4057, 0.6498, -0.1635]) tensor([0.0412, 0.1863, 0.5352, 0.2373]) -Greedy action tensor([-1.8804, -0.4499, 0.6377, -0.1524]) tensor([0.0431, 0.1801, 0.5343, 0.2425]) -Greedy action tensor([-1.3176, 0.3603, 0.4706, -0.7295]) tensor([0.0708, 0.3788, 0.4230, 0.1274]) -Greedy action tensor([-1.8200, -0.4124, 0.6477, -0.0703]) tensor([0.0442, 0.1805, 0.5211, 0.2542]) -Greedy action tensor([-0.9311, 0.8058, 0.1756, -0.0580]) tensor([0.0827, 0.4694, 0.2500, 0.1979]) -Greedy action tensor([-1.4809, -0.2198, 0.4076, -0.0247]) tensor([0.0648, 0.2288, 0.4284, 0.2780]) -Greedy action tensor([-1.4566, -0.5228, 0.5040, 0.1180]) tensor([0.0646, 0.1644, 0.4590, 0.3120]) -Greedy action tensor([-1.8956, -0.4575, 0.6476, -0.1568]) tensor([0.0423, 0.1783, 0.5384, 0.2409]) -Greedy action tensor([-1.9036, -0.3851, 0.6501, -0.1540]) tensor([0.0414, 0.1889, 0.5318, 0.2380]) -Greedy action tensor([-1.4253, 0.3348, 0.3923, -0.1863]) tensor([0.0609, 0.3540, 0.3749, 0.2102]) -Greedy action tensor([-1.8773, -0.3639, 0.6224, -0.1679]) tensor([0.0430, 0.1954, 0.5239, 0.2377]) -Greedy action tensor([-1.5591, 0.1938, 0.3844, 0.0221]) tensor([0.0537, 0.3100, 0.3751, 0.2611]) -Greedy action tensor([-1.8602, -0.4342, 0.6237, -0.1413]) tensor([0.0440, 0.1831, 0.5274, 0.2454]) -Greedy action tensor([-1.7700, -0.3551, 0.5771, -0.1463]) tensor([0.0484, 0.1994, 0.5065, 0.2457]) -Greedy action tensor([-1.3799, -0.5593, 0.3997, 0.1422]) tensor([0.0726, 0.1648, 0.4301, 0.3325]) -Greedy action tensor([-1.8543, -0.3597, 0.5850, -0.1397]) tensor([0.0445, 0.1983, 0.5101, 0.2471]) -Greedy action tensor([-1.8561, -0.4343, 0.6272, -0.1265]) tensor([0.0439, 0.1821, 0.5263, 0.2477]) -Greedy action tensor([-0.7625, 0.6829, -0.3647, -0.1970]) tensor([0.1178, 0.4997, 0.1753, 0.2073]) -Greedy action tensor([-1.8913, -0.3223, 0.6289, -0.1354]) tensor([0.0416, 0.1999, 0.5175, 0.2410]) -Greedy action tensor([-1.8330, -0.1168, 0.5581, -0.1117]) tensor([0.0433, 0.2410, 0.4734, 0.2423]) -Greedy action tensor([-1.6231, -0.6306, 0.5798, 0.0578]) tensor([0.0552, 0.1489, 0.4995, 0.2964]) -Greedy action tensor([-1.4940, -0.0165, -0.2023, -0.5422]) tensor([0.0861, 0.3774, 0.3134, 0.2231]) -Greedy action tensor([-1.8869, -0.4049, 0.6333, -0.1539]) tensor([0.0426, 0.1874, 0.5292, 0.2408]) -Greedy action tensor([-1.8740, -0.4404, 0.6790, -0.1193]) tensor([0.0420, 0.1761, 0.5393, 0.2427]) -Greedy action tensor([-1.5369, -0.0190, 0.3760, -0.6509]) tensor([0.0677, 0.3091, 0.4588, 0.1643]) -Greedy action tensor([-1.8583, -0.4476, 0.6298, -0.1369]) tensor([0.0440, 0.1803, 0.5296, 0.2460]) -Greedy action tensor([-1.9022, -0.3476, 0.6310, -0.1526]) tensor([0.0415, 0.1966, 0.5230, 0.2389]) -Greedy action tensor([-1.9309, -0.4261, 0.6606, -0.1730]) tensor([0.0406, 0.1827, 0.5415, 0.2353]) -Greedy action tensor([-1.4180, -0.1473, 0.3802, -0.0407]) tensor([0.0686, 0.2446, 0.4146, 0.2721]) -Greedy action tensor([-1.8920, -0.4417, 0.6335, -0.1690]) tensor([0.0428, 0.1825, 0.5349, 0.2397]) -Greedy action tensor([-1.6305, -0.5583, 0.5080, 0.0380]) tensor([0.0565, 0.1650, 0.4791, 0.2995]) -Greedy action tensor([-1.6307, -0.0678, 0.4843, 0.0086]) tensor([0.0520, 0.2484, 0.4314, 0.2681]) -Greedy action tensor([-1.9458, -0.4455, 0.6652, -0.1810]) tensor([0.0401, 0.1798, 0.5459, 0.2342]) -Greedy action tensor([-0.9485, 0.9753, 0.1103, 0.4317]) tensor([0.0680, 0.4656, 0.1960, 0.2704]) -Greedy action tensor([-1.8173, -0.2748, 0.6011, -0.1029]) tensor([0.0445, 0.2082, 0.5000, 0.2473]) -Greedy action tensor([-1.8328, -0.3571, 0.6005, -0.1265]) tensor([0.0449, 0.1963, 0.5115, 0.2473]) -Greedy action tensor([-1.8343, -0.4683, 0.7566, 0.0032]) tensor([0.0407, 0.1597, 0.5436, 0.2559]) -Greedy action tensor([-1.8761, -0.3252, 0.6296, -0.1388]) tensor([0.0423, 0.1994, 0.5181, 0.2403]) -Greedy action tensor([-1.8260, -0.4312, 0.6052, -0.1398]) tensor([0.0459, 0.1850, 0.5215, 0.2476]) -Greedy action tensor([-1.9007, -0.3927, 0.6387, -0.1779]) tensor([0.0420, 0.1899, 0.5327, 0.2354]) -Greedy action tensor([-1.9171, -0.4370, 0.6526, -0.1668]) tensor([0.0413, 0.1815, 0.5395, 0.2377]) -Greedy action tensor([-1.8341, -0.4232, 0.6458, -0.0797]) tensor([0.0438, 0.1797, 0.5232, 0.2533]) -Greedy action tensor([-1.9244, -0.4424, 0.6622, -0.1705]) tensor([0.0409, 0.1799, 0.5430, 0.2362]) -Greedy action tensor([-1.7809, -0.4868, 0.5845, -0.1172]) tensor([0.0486, 0.1773, 0.5175, 0.2566]) -Greedy action tensor([-1.7953, -0.3908, 0.5910, -0.1338]) tensor([0.0471, 0.1920, 0.5126, 0.2483]) -Greedy action tensor([-1.9184, -0.4388, 0.6533, -0.1687]) tensor([0.0413, 0.1812, 0.5401, 0.2374]) -Greedy action tensor([-1.9343, -0.4257, 0.6602, -0.1753]) tensor([0.0405, 0.1829, 0.5417, 0.2349]) -Greedy action tensor([-1.8535, -0.2562, 0.6019, -0.1462]) tensor([0.0433, 0.2138, 0.5043, 0.2386]) -Greedy action tensor([-1.6490, -0.3571, 0.5244, -0.1158]) tensor([0.0554, 0.2015, 0.4866, 0.2565]) -Greedy action tensor([-0.1871, -0.2098, 0.2053, 0.2514]) tensor([0.1997, 0.1952, 0.2956, 0.3096]) -Greedy action tensor([-0.3490, 0.7885, 0.0779, 0.5191]) tensor([0.1245, 0.3882, 0.1907, 0.2966]) -Greedy action tensor([-0.8672, 0.0450, 0.2004, -0.0321]) tensor([0.1149, 0.2861, 0.3342, 0.2648]) -Greedy action tensor([-1.7300, -0.4837, 0.5592, -0.0728]) tensor([0.0510, 0.1775, 0.5037, 0.2677]) -Greedy action tensor([-1.8927, -0.4488, 0.6420, -0.1529]) tensor([0.0425, 0.1800, 0.5356, 0.2419]) -Greedy action tensor([ 1.1489, -0.8225, -0.1066, -0.6792]) tensor([0.6310, 0.0879, 0.1798, 0.1014]) -Greedy action tensor([ 0.9669, -0.8274, 0.0096, -0.5610]) tensor([0.5659, 0.0941, 0.2173, 0.1228]) -Greedy action tensor([ 1.7316, -1.0813, -0.0599, -0.5147]) tensor([0.7504, 0.0451, 0.1251, 0.0794]) -Greedy action tensor([ 0.7203, 0.0360, 0.1514, -0.1192]) tensor([0.3996, 0.2016, 0.2262, 0.1726]) -Greedy action tensor([0.6679, 0.0552, 0.0050, 0.0877]) tensor([0.3821, 0.2071, 0.1969, 0.2139]) -Greedy action tensor([ 0.9748, -0.6119, -0.0448, -0.4927]) tensor([0.5568, 0.1139, 0.2009, 0.1284]) -Greedy action tensor([ 0.2231, 0.1645, -0.1041, -0.0156]) tensor([0.2897, 0.2732, 0.2089, 0.2282]) -Greedy action tensor([ 0.6859, -0.4400, -0.0070, -0.3180]) tensor([0.4564, 0.1480, 0.2283, 0.1673]) -Greedy action tensor([ 0.8372, -0.5387, -0.0561, -0.2436]) tensor([0.4997, 0.1262, 0.2045, 0.1696]) -Greedy action tensor([ 1.0765, -0.9276, 0.0928, -0.5597]) tensor([0.5871, 0.0791, 0.2195, 0.1143]) -Greedy action tensor([ 0.8136, 0.1230, -0.1530, -0.1996]) tensor([0.4455, 0.2233, 0.1695, 0.1617]) -Greedy action tensor([ 0.7409, -0.4096, -0.0150, -0.1188]) tensor([0.4526, 0.1432, 0.2125, 0.1916]) -Greedy action tensor([ 0.9833, -0.9255, 0.0825, -0.4075]) tensor([0.5545, 0.0822, 0.2253, 0.1380]) -Greedy action tensor([ 1.1839, -0.6401, 0.0903, -0.3707]) tensor([0.5856, 0.0945, 0.1962, 0.1237]) -Greedy action tensor([ 0.6375, -0.5412, -0.0115, -0.1194]) tensor([0.4349, 0.1338, 0.2273, 0.2040]) -Greedy action tensor([ 0.5781, 0.0227, -0.1084, 0.0823]) tensor([0.3723, 0.2136, 0.1874, 0.2267]) -Greedy action tensor([ 0.6211, -0.1532, -0.0442, -0.2158]) tensor([0.4152, 0.1915, 0.2135, 0.1798]) -Greedy action tensor([ 0.6913, -0.3697, -0.0058, -0.3439]) tensor([0.4547, 0.1574, 0.2264, 0.1615]) -Greedy action tensor([ 1.0432, -0.5194, -0.2725, -0.3638]) tensor([0.5805, 0.1217, 0.1557, 0.1421]) -Greedy action tensor([ 1.0489, -0.7396, 0.1830, -0.5723]) tensor([0.5600, 0.0936, 0.2356, 0.1107]) -Greedy action tensor([ 0.3953, -0.2847, -0.1924, 0.0738]) tensor([0.3588, 0.1818, 0.1993, 0.2601]) -Greedy action tensor([ 0.9919, -0.7409, 0.0764, -0.2879]) tensor([0.5390, 0.0953, 0.2158, 0.1499]) -Greedy action tensor([ 1.3783, -0.7698, -0.1225, -0.6268]) tensor([0.6783, 0.0792, 0.1512, 0.0913]) -Greedy action tensor([ 0.7825, -0.2025, -0.0049, -0.1826]) tensor([0.4526, 0.1690, 0.2059, 0.1724]) -Greedy action tensor([ 0.6956, -0.3035, -0.0715, -0.3071]) tensor([0.4547, 0.1674, 0.2111, 0.1668]) -Greedy action tensor([ 0.2826, 0.0679, -0.0313, -0.0716]) tensor([0.3087, 0.2491, 0.2255, 0.2166]) -Greedy action tensor([ 1.0945, -0.9193, 0.0899, -0.6293]) tensor([0.5959, 0.0795, 0.2182, 0.1063]) -Greedy action tensor([ 0.6339, -0.4357, -0.0934, -0.2049]) tensor([0.4428, 0.1519, 0.2139, 0.1914]) -Greedy action tensor([ 0.5036, -0.2523, -0.0084, -0.4046]) tensor([0.4045, 0.1900, 0.2424, 0.1631]) -Greedy action tensor([ 0.5147, -0.1148, -0.1227, -0.0335]) tensor([0.3788, 0.2019, 0.2003, 0.2190]) -Greedy action tensor([ 0.6430, -0.2781, 0.1220, -0.0904]) tensor([0.4045, 0.1610, 0.2402, 0.1943]) -Greedy action tensor([ 0.4424, -0.0910, -0.1838, -0.0248]) tensor([0.3639, 0.2135, 0.1946, 0.2281]) -Greedy action tensor([ 0.7246, -0.4354, -0.1375, -0.2208]) tensor([0.4707, 0.1476, 0.1988, 0.1829]) -Greedy action tensor([ 0.7430, -0.4990, -0.0297, -0.3645]) tensor([0.4805, 0.1388, 0.2219, 0.1588]) -Greedy action tensor([ 0.7606, -0.1858, 0.0515, -0.3884]) tensor([0.4551, 0.1767, 0.2240, 0.1443]) -Greedy action tensor([ 1.1838, -0.6906, -0.1589, -0.5516]) tensor([0.6286, 0.0965, 0.1641, 0.1108]) -Greedy action tensor([ 0.7104, -0.4170, -0.0516, -0.1702]) tensor([0.4535, 0.1469, 0.2116, 0.1880]) -Greedy action tensor([ 0.6530, -0.3576, -0.0896, -0.1892]) tensor([0.4404, 0.1603, 0.2096, 0.1897]) -Greedy action tensor([ 0.8969, -0.5282, -0.0269, -0.5132]) tensor([0.5315, 0.1278, 0.2110, 0.1297]) -Greedy action tensor([ 0.5805, 0.1997, -0.1134, 0.0557]) tensor([0.3604, 0.2463, 0.1801, 0.2132]) -Greedy action tensor([ 0.1495, -0.1197, -0.1900, -0.1494]) tensor([0.3108, 0.2374, 0.2213, 0.2305]) -Greedy action tensor([ 0.2282, 0.0362, -0.0299, -0.2074]) tensor([0.3082, 0.2544, 0.2381, 0.1994]) -Greedy action tensor([ 1.0229, -0.8976, -0.1009, -0.3814]) tensor([0.5824, 0.0853, 0.1893, 0.1430]) -Greedy action tensor([ 0.4451, -0.4078, 0.0996, -0.3888]) tensor([0.3893, 0.1659, 0.2756, 0.1691]) -Greedy action tensor([ 0.7003, -0.6360, -0.1330, -0.1893]) tensor([0.4743, 0.1247, 0.2062, 0.1949]) -Greedy action tensor([ 0.8288, -0.2876, -0.1067, -0.2095]) tensor([0.4822, 0.1579, 0.1892, 0.1707]) -Greedy action tensor([ 0.9304, -0.5880, -0.1045, -0.2915]) tensor([0.5351, 0.1172, 0.1901, 0.1577]) -Greedy action tensor([ 0.9953, -0.1763, -0.0181, -0.3139]) tensor([0.5147, 0.1595, 0.1868, 0.1390]) -Greedy action tensor([ 0.8928, -0.4558, 0.1189, -0.3790]) tensor([0.4997, 0.1297, 0.2305, 0.1401]) -Greedy action tensor([ 0.3381, -0.0716, -0.0734, -0.1494]) tensor([0.3401, 0.2258, 0.2253, 0.2088]) -Greedy action tensor([ 0.8141, -0.4933, -0.1744, -0.6580]) tensor([0.5342, 0.1445, 0.1988, 0.1226]) -Greedy action tensor([ 0.6180, 0.0049, -0.0761, 0.0352]) tensor([0.3847, 0.2084, 0.1922, 0.2148]) -Greedy action tensor([ 0.6591, -0.2286, 0.0326, -0.1113]) tensor([0.4151, 0.1709, 0.2219, 0.1921]) -Greedy action tensor([ 0.5512, -0.0235, -0.0427, 0.0320]) tensor([0.3690, 0.2077, 0.2037, 0.2196]) -Greedy action tensor([ 1.2075, -0.5317, -0.0901, -0.4139]) tensor([0.6074, 0.1067, 0.1659, 0.1200]) -Greedy action tensor([ 0.7819, -0.6556, -0.0472, -0.5118]) tensor([0.5133, 0.1219, 0.2240, 0.1408]) -Greedy action tensor([ 0.6021, -0.5167, -0.0721, -0.4606]) tensor([0.4583, 0.1497, 0.2336, 0.1584]) -Greedy action tensor([ 0.9674, -0.3548, -0.0998, -0.3656]) tensor([0.5336, 0.1422, 0.1835, 0.1407]) -Greedy action tensor([ 0.9491, -0.4012, -0.1583, -0.1508]) tensor([0.5202, 0.1348, 0.1719, 0.1732]) -Greedy action tensor([ 1.0284, -0.8418, 0.2145, -0.6187]) tensor([0.5587, 0.0861, 0.2476, 0.1076]) -Greedy action tensor([ 0.3736, -0.0862, 0.0125, -0.1474]) tensor([0.3422, 0.2161, 0.2385, 0.2032]) -Greedy action tensor([ 0.9145, -0.3565, 0.0555, -0.1103]) tensor([0.4847, 0.1360, 0.2053, 0.1739]) -Greedy action tensor([ 0.7061, -0.4339, -0.1892, -0.4922]) tensor([0.4926, 0.1575, 0.2012, 0.1486]) -Greedy action tensor([ 0.8822, -0.5514, 0.0125, -0.3357]) tensor([0.5120, 0.1221, 0.2145, 0.1515]) -Greedy action tensor([ 1.0045, -0.4778, -0.0549, -0.3982]) tensor([0.5495, 0.1248, 0.1905, 0.1351]) -Greedy action tensor([ 0.8142, -0.5476, -0.0623, -0.4148]) tensor([0.5089, 0.1304, 0.2118, 0.1489]) -Greedy action tensor([ 0.5582, -0.2367, -0.0557, -0.0674]) tensor([0.3956, 0.1787, 0.2141, 0.2116]) -Greedy action tensor([ 0.7152, -0.3769, -0.0907, -0.1065]) tensor([0.4501, 0.1510, 0.2010, 0.1979]) -Greedy action tensor([ 0.6924, -0.3329, 0.0177, -0.2270]) tensor([0.4412, 0.1582, 0.2247, 0.1759]) -Greedy action tensor([ 0.9532, -0.7428, -0.0411, -0.4864]) tensor([0.5585, 0.1024, 0.2066, 0.1324]) -Greedy action tensor([ 0.9490, -0.7473, 0.1409, -0.5879]) tensor([0.5423, 0.0994, 0.2417, 0.1166]) -Greedy action tensor([ 0.5726, -0.7622, -0.2465, -0.1230]) tensor([0.4540, 0.1195, 0.2001, 0.2264]) -Greedy action tensor([ 0.6561, -0.5059, 0.0086, -0.4590]) tensor([0.4621, 0.1446, 0.2418, 0.1515]) -Greedy action tensor([ 0.2876, 0.0693, -0.0436, -0.0614]) tensor([0.3099, 0.2491, 0.2225, 0.2186]) -Greedy action tensor([ 0.8936, -0.5821, 0.0260, -0.5341]) tensor([0.5295, 0.1211, 0.2224, 0.1270]) -Greedy action tensor([ 1.1219, -0.4527, -0.2100, -0.7037]) tensor([0.6127, 0.1269, 0.1617, 0.0987]) -Greedy action tensor([ 1.0831, -0.5799, -0.0214, -0.6117]) tensor([0.5867, 0.1112, 0.1944, 0.1077]) -Greedy action tensor([ 0.5560, -0.2663, 0.0160, -0.3229]) tensor([0.4103, 0.1803, 0.2391, 0.1704]) -Greedy action tensor([ 1.1761, -0.8670, 0.0346, -0.6134]) tensor([0.6188, 0.0802, 0.1976, 0.1034]) -Greedy action tensor([ 0.5725, -0.4069, -0.1169, -0.1143]) tensor([0.4201, 0.1577, 0.2108, 0.2114]) -Greedy action tensor([ 0.7350, -0.3524, -0.0998, -0.1961]) tensor([0.4619, 0.1557, 0.2004, 0.1820]) -Greedy action tensor([ 1.4872, -0.0453, -0.3863, -0.2231]) tensor([0.6450, 0.1393, 0.0991, 0.1166]) -Greedy action tensor([ 1.3839, -0.6401, -0.4946, 0.2865]) tensor([0.6178, 0.0816, 0.0944, 0.2062]) -Greedy action tensor([ 1.7328, -0.4300, -0.5267, 0.5539]) tensor([0.6549, 0.0753, 0.0684, 0.2014]) -Greedy action tensor([ 1.3817, -0.3155, -0.6022, 0.1293]) tensor([0.6224, 0.1140, 0.0856, 0.1779]) -Greedy action tensor([ 1.5203, -0.4690, -0.4997, 0.1676]) tensor([0.6544, 0.0895, 0.0868, 0.1692]) -Greedy action tensor([ 2.1933, -1.4137, -0.1340, 0.7354]) tensor([0.7367, 0.0200, 0.0719, 0.1715]) -Greedy action tensor([ 1.2155, -0.3861, -0.9825, 0.1082]) tensor([0.6086, 0.1227, 0.0676, 0.2011]) -Greedy action tensor([ 1.4639, -0.1097, -0.4529, 0.0693]) tensor([0.6241, 0.1294, 0.0918, 0.1547]) -Greedy action tensor([ 1.4835e+00, -7.9986e-01, 9.9427e-02, 8.7814e-04]) tensor([0.6331, 0.0645, 0.1586, 0.1437]) -Greedy action tensor([ 0.9141, -0.5860, 0.0624, 0.1492]) tensor([0.4728, 0.1055, 0.2017, 0.2200]) -Greedy action tensor([ 1.7228, -0.5043, -0.3453, 0.4048]) tensor([0.6658, 0.0718, 0.0842, 0.1782]) -Greedy action tensor([ 2.0593, -1.2656, -0.1814, 0.6511]) tensor([0.7210, 0.0259, 0.0767, 0.1763]) -Greedy action tensor([ 1.1404, -0.2427, -0.5495, 0.1918]) tensor([0.5487, 0.1376, 0.1012, 0.2125]) -Greedy action tensor([ 1.4657, -0.0616, -0.0727, -0.1457]) tensor([0.6129, 0.1331, 0.1316, 0.1224]) -Greedy action tensor([ 1.8373, -0.1979, -0.7069, 0.2064]) tensor([0.7118, 0.0930, 0.0559, 0.1393]) -Greedy action tensor([ 1.3995, -0.6288, -0.2202, 0.2265]) tensor([0.6101, 0.0803, 0.1208, 0.1888]) -Greedy action tensor([ 1.5287, -0.6130, -0.0932, 0.1533]) tensor([0.6379, 0.0749, 0.1260, 0.1612]) -Greedy action tensor([ 1.1822, -0.4875, -0.3249, 0.4273]) tensor([0.5319, 0.1002, 0.1178, 0.2500]) -Greedy action tensor([ 1.8856, 0.8044, -0.2675, 0.2210]) tensor([0.6081, 0.2062, 0.0706, 0.1151]) -Greedy action tensor([ 1.2007, -0.5160, -0.5729, 0.4665]) tensor([0.5467, 0.0982, 0.0928, 0.2623]) -Greedy action tensor([ 1.7509, -0.9233, 0.0039, 0.5352]) tensor([0.6495, 0.0448, 0.1132, 0.1926]) -Greedy action tensor([ 1.5297, -0.4232, -0.7245, 0.2993]) tensor([0.6498, 0.0922, 0.0682, 0.1899]) -Greedy action tensor([ 1.1658, -0.3955, -0.6031, -0.0983]) tensor([0.6014, 0.1262, 0.1025, 0.1699]) -Greedy action tensor([ 1.1944, -0.3573, -0.9523, 0.1832]) tensor([0.5908, 0.1252, 0.0691, 0.2149]) -Greedy action tensor([ 1.3516, -0.5856, -0.3051, 0.4969]) tensor([0.5681, 0.0819, 0.1084, 0.2417]) -Greedy action tensor([ 1.6430, -0.8499, -0.0622, 0.1815]) tensor([0.6683, 0.0552, 0.1215, 0.1550]) -Greedy action tensor([ 1.6463, -0.0895, -1.6019, -0.1891]) tensor([0.7275, 0.1282, 0.0283, 0.1161]) -Greedy action tensor([ 1.1936, -0.4169, -0.3580, 0.5655]) tensor([0.5141, 0.1027, 0.1089, 0.2743]) -Greedy action tensor([ 1.4867, -0.4018, -0.3850, 0.0516]) tensor([0.6480, 0.0980, 0.0997, 0.1543]) -Greedy action tensor([ 1.3937, -0.3037, -0.5835, 0.0825]) tensor([0.6285, 0.1151, 0.0870, 0.1694]) -Greedy action tensor([ 2.2831, -0.4333, -0.5830, 0.7414]) tensor([0.7479, 0.0494, 0.0426, 0.1601]) -Greedy action tensor([ 1.7058, -0.6010, -0.2421, 0.1409]) tensor([0.6891, 0.0686, 0.0982, 0.1441]) -Greedy action tensor([ 1.2355, -0.0338, -0.9151, 0.4675]) tensor([0.5372, 0.1510, 0.0625, 0.2492]) -Greedy action tensor([ 2.0799, -0.6418, 0.1064, -0.1198]) tensor([0.7601, 0.0500, 0.1056, 0.0842]) -Greedy action tensor([ 1.5550, -0.6163, -0.9000, 0.1537]) tensor([0.6915, 0.0788, 0.0594, 0.1703]) -Greedy action tensor([ 1.2485, -0.1597, -0.2993, -0.1102]) tensor([0.5833, 0.1427, 0.1241, 0.1499]) -Greedy action tensor([ 1.0720, -0.3322, -0.3880, 0.2907]) tensor([0.5166, 0.1269, 0.1200, 0.2365]) -Greedy action tensor([ 1.5819, -0.9634, 0.0837, 0.4053]) tensor([0.6210, 0.0487, 0.1388, 0.1915]) -Greedy action tensor([ 1.6612, -0.0959, -0.0727, -0.0395]) tensor([0.6529, 0.1127, 0.1153, 0.1192]) -Greedy action tensor([ 1.4277, -0.3984, -0.3854, 0.4561]) tensor([0.5873, 0.0946, 0.0958, 0.2223]) -Greedy action tensor([ 1.5511, -0.4825, -0.3340, 0.6971]) tensor([0.5853, 0.0766, 0.0889, 0.2492]) -Greedy action tensor([ 1.2775, -0.1629, -0.7182, 0.2295]) tensor([0.5802, 0.1374, 0.0789, 0.2035]) -Greedy action tensor([ 2.4413, 0.7726, 0.2550, -0.0756]) tensor([0.7238, 0.1364, 0.0813, 0.0584]) -Greedy action tensor([2.4678, 0.0606, 0.1022, 0.4703]) tensor([0.7578, 0.0683, 0.0712, 0.1028]) -Greedy action tensor([ 1.2474, -0.1554, -0.5718, 0.3450]) tensor([0.5514, 0.1356, 0.0894, 0.2236]) -Greedy action tensor([ 1.0491, -0.0051, -0.9240, 0.3952]) tensor([0.4981, 0.1736, 0.0693, 0.2590]) -Greedy action tensor([ 1.5064, -0.4743, -0.8376, 0.0796]) tensor([0.6784, 0.0936, 0.0651, 0.1629]) -Greedy action tensor([ 1.3413, -1.2983, -0.1553, -0.0470]) tensor([0.6473, 0.0462, 0.1449, 0.1615]) -Greedy action tensor([ 1.7221, -0.3086, -0.2812, -0.4129]) tensor([0.7223, 0.0948, 0.0974, 0.0854]) -Greedy action tensor([ 0.3764, -0.5337, 0.1558, 0.0111]) tensor([0.3450, 0.1389, 0.2767, 0.2394]) -Greedy action tensor([ 1.4710e+00, -7.0371e-01, -6.3750e-04, 3.6003e-01]) tensor([0.5979, 0.0680, 0.1373, 0.1969]) -Greedy action tensor([ 1.6941, -0.1136, -0.6286, 0.0624]) tensor([0.6860, 0.1125, 0.0672, 0.1342]) -Greedy action tensor([ 1.4469, -0.4368, -0.2848, 0.1067]) tensor([0.6286, 0.0956, 0.1113, 0.1646]) -Greedy action tensor([ 0.9577, -0.3729, -0.1977, 0.1631]) tensor([0.4924, 0.1301, 0.1551, 0.2224]) -Greedy action tensor([ 1.6144, -1.1067, -0.2349, 0.0089]) tensor([0.7023, 0.0462, 0.1105, 0.1410]) -Greedy action tensor([ 2.2661, -0.9858, -0.3051, 0.9236]) tensor([0.7266, 0.0281, 0.0555, 0.1898]) -Greedy action tensor([ 1.7219, -1.0532, -0.1766, 0.5613]) tensor([0.6555, 0.0409, 0.0982, 0.2054]) -Greedy action tensor([ 1.4381, -0.4044, -0.8058, 0.5735]) tensor([0.5932, 0.0940, 0.0629, 0.2499]) -Greedy action tensor([ 1.4236, -0.3881, -0.5405, -0.0477]) tensor([0.6522, 0.1066, 0.0915, 0.1498]) -Greedy action tensor([ 1.3376, -0.4155, -0.6532, -0.1542]) tensor([0.6516, 0.1129, 0.0890, 0.1466]) -Greedy action tensor([ 1.4311, 0.0202, -0.7451, 0.5015]) tensor([0.5707, 0.1392, 0.0648, 0.2253]) -Greedy action tensor([ 1.4776, -0.5537, -0.4597, 0.3425]) tensor([0.6263, 0.0821, 0.0902, 0.2013]) -Greedy action tensor([ 1.2661, -0.2606, -0.4452, 0.4335]) tensor([0.5456, 0.1185, 0.0986, 0.2373]) -Greedy action tensor([ 1.1898, -0.2069, 0.0047, 0.0585]) tensor([0.5331, 0.1319, 0.1630, 0.1720]) -Greedy action tensor([ 1.2179, -0.6722, -0.7723, 0.5461]) tensor([0.5560, 0.0840, 0.0760, 0.2840]) -Greedy action tensor([ 1.3981, 0.3415, -1.4561, -0.1632]) tensor([0.6192, 0.2152, 0.0357, 0.1299]) -Greedy action tensor([ 1.4483, -0.5205, -0.0972, 0.0708]) tensor([0.6230, 0.0870, 0.1328, 0.1571]) -Greedy action tensor([ 1.1203, 0.1060, -0.1850, -0.0512]) tensor([0.5145, 0.1866, 0.1395, 0.1595]) -Greedy action tensor([ 1.6050, -0.2266, -0.6026, 0.5577]) tensor([0.6169, 0.0988, 0.0678, 0.2165]) -Greedy action tensor([ 1.2754, -0.1183, -0.7262, 0.1683]) tensor([0.5835, 0.1448, 0.0788, 0.1929]) -Greedy action tensor([ 1.0264, -0.1547, -0.3367, 0.1996]) tensor([0.4999, 0.1534, 0.1279, 0.2187]) -Greedy action tensor([ 1.7486, -0.5837, -0.6376, 0.5727]) tensor([0.6677, 0.0648, 0.0614, 0.2060]) -Greedy action tensor([ 1.1797, -0.3605, -0.6647, 0.1082]) tensor([0.5831, 0.1250, 0.0922, 0.1997]) -Greedy action tensor([ 1.4959, 0.0178, -1.0931, 0.5732]) tensor([0.5880, 0.1341, 0.0442, 0.2337]) -Greedy action tensor([ 1.5676, -0.7227, -0.7311, 0.4819]) tensor([0.6497, 0.0658, 0.0652, 0.2194]) -Greedy action tensor([ 1.8772, -0.2506, -0.2544, 0.3204]) tensor([0.6903, 0.0822, 0.0819, 0.1455]) -Greedy action tensor([ 1.4136, -0.3133, -0.8349, 0.1981]) tensor([0.6329, 0.1126, 0.0668, 0.1877]) -Greedy action tensor([ 1.1231, -0.0592, -0.3340, -0.0150]) tensor([0.5377, 0.1648, 0.1252, 0.1723]) -Greedy action tensor([ 1.6981, -0.4560, -0.7505, 0.5870]) tensor([0.6529, 0.0757, 0.0564, 0.2149]) -Greedy action tensor([ 2.0646, -1.0865, -0.5260, 0.5854]) tensor([0.7432, 0.0318, 0.0557, 0.1693]) -Greedy action tensor([ 1.4531, -0.7672, -0.5372, 0.1256]) tensor([0.6621, 0.0719, 0.0905, 0.1755]) -Greedy action tensor([ 0.4660, 0.0616, -1.0538, 0.1074]) tensor([0.3869, 0.2582, 0.0846, 0.2703]) -Greedy action tensor([ 0.3934, -0.7545, 0.6610, -0.6116]) tensor([0.3344, 0.1061, 0.4370, 0.1224]) -Greedy action tensor([ 1.2056, -0.7018, 1.1623, 0.5449]) tensor([0.3813, 0.0566, 0.3651, 0.1969]) -Greedy action tensor([-0.9295, -0.6674, -0.0991, -1.3880]) tensor([0.1914, 0.2487, 0.4390, 0.1210]) -Greedy action tensor([ 1.4651, -0.8587, -0.5386, 0.9890]) tensor([0.5394, 0.0528, 0.0727, 0.3351]) -Greedy action tensor([ 0.4989, -0.0873, -0.0309, -0.2836]) tensor([0.3843, 0.2138, 0.2262, 0.1757]) -Greedy action tensor([-0.3027, -0.9288, 0.4399, -1.1677]) tensor([0.2465, 0.1318, 0.5180, 0.1038]) -Greedy action tensor([-1.1423, 1.0365, 0.3066, -0.7614]) tensor([0.0643, 0.5679, 0.2737, 0.0941]) -Greedy action tensor([-0.9679, -0.5128, 0.6587, -0.9168]) tensor([0.1147, 0.1809, 0.5836, 0.1208]) -Greedy action tensor([1.3427, 0.2617, 1.6144, 0.5365]) tensor([0.3228, 0.1095, 0.4236, 0.1441]) -Greedy action tensor([ 0.5607, -1.1748, 0.7363, 0.1159]) tensor([0.3323, 0.0586, 0.3961, 0.2130]) -Greedy action tensor([-0.7009, 0.3415, 0.0827, -0.8367]) tensor([0.1450, 0.4111, 0.3174, 0.1266]) -Greedy action tensor([-0.0187, -0.4091, -0.1985, -0.4362]) tensor([0.3154, 0.2134, 0.2635, 0.2077]) -Greedy action tensor([-0.6952, -0.0548, -0.4264, 0.5639]) tensor([0.1294, 0.2455, 0.1693, 0.4558]) -Greedy action tensor([ 1.1536, -0.0701, 0.2638, -0.6572]) tensor([0.5352, 0.1574, 0.2198, 0.0875]) -Greedy action tensor([-1.1909, -0.5544, 0.5991, -0.7767]) tensor([0.0962, 0.1819, 0.5763, 0.1456]) -Greedy action tensor([ 0.7438, -1.3904, -0.0818, 0.3115]) tensor([0.4535, 0.0537, 0.1986, 0.2943]) -Greedy action tensor([ 0.3573, -0.6965, -0.3032, 0.7288]) tensor([0.3017, 0.1052, 0.1558, 0.4374]) -Greedy action tensor([ 0.8691, -1.0194, -0.2902, 0.5654]) tensor([0.4539, 0.0687, 0.1424, 0.3350]) -Greedy action tensor([1.2604, 0.5061, 0.6823, 0.3563]) tensor([0.4105, 0.1931, 0.2303, 0.1662]) -Greedy action tensor([-0.2681, -1.2255, 0.2697, -0.2185]) tensor([0.2412, 0.0926, 0.4129, 0.2534]) -Greedy action tensor([-0.8396, -1.0534, 0.6280, -1.0538]) tensor([0.1438, 0.1161, 0.6240, 0.1161]) -Greedy action tensor([ 0.2241, 0.2220, 0.7229, -0.4914]) tensor([0.2419, 0.2414, 0.3984, 0.1183]) -Greedy action tensor([-0.6910, -0.5429, 0.6316, -0.6902]) tensor([0.1446, 0.1677, 0.5429, 0.1448]) -Greedy action tensor([ 0.6741, -0.3692, 0.1536, 0.0194]) tensor([0.4055, 0.1429, 0.2410, 0.2107]) -Greedy action tensor([-0.0214, -0.2185, -0.0455, 0.5768]) tensor([0.2166, 0.1779, 0.2115, 0.3940]) -Greedy action tensor([ 0.8992, -0.6144, 0.3740, -0.7644]) tensor([0.4998, 0.1100, 0.2956, 0.0947]) -Greedy action tensor([-1.3058, -0.3133, 0.8809, -0.7334]) tensor([0.0696, 0.1877, 0.6195, 0.1233]) -Greedy action tensor([ 0.0647, -0.3459, -0.2684, -1.3615]) tensor([0.3817, 0.2531, 0.2735, 0.0917]) -Greedy action tensor([ 0.5053, -0.1369, -0.0035, -0.4236]) tensor([0.3965, 0.2086, 0.2384, 0.1566]) -Greedy action tensor([ 0.5430, -0.5023, 0.2787, -1.0074]) tensor([0.4289, 0.1508, 0.3293, 0.0910]) -Greedy action tensor([ 0.1742, -0.7676, -0.4487, 0.1572]) tensor([0.3437, 0.1340, 0.1844, 0.3379]) -Greedy action tensor([-1.4795, -0.0909, 0.3323, -1.9083]) tensor([0.0849, 0.3403, 0.5195, 0.0553]) -Greedy action tensor([ 0.1946, -1.5946, 0.6595, -0.5148]) tensor([0.3076, 0.0514, 0.4897, 0.1513]) -Greedy action tensor([-2.4498, -0.9839, -0.1955, 0.5917]) tensor([0.0279, 0.1210, 0.2662, 0.5849]) -Greedy action tensor([ 0.7192, -1.7150, -0.5192, 0.7179]) tensor([0.4208, 0.0369, 0.1220, 0.4203]) -Greedy action tensor([-0.5577, -1.6291, -1.2534, -0.1572]) tensor([0.2999, 0.1027, 0.1496, 0.4477]) -Greedy action tensor([ 0.2164, -1.9342, 0.0662, 0.4145]) tensor([0.3129, 0.0364, 0.2693, 0.3814]) -Greedy action tensor([ 0.2463, -0.1640, -0.0364, -0.4372]) tensor([0.3422, 0.2271, 0.2579, 0.1728]) -Greedy action tensor([ 0.7364, -1.2702, 0.1058, -0.5052]) tensor([0.5113, 0.0687, 0.2722, 0.1477]) -Greedy action tensor([ 0.1014, 0.2593, -0.0271, -0.2248]) tensor([0.2651, 0.3104, 0.2331, 0.1913]) -Greedy action tensor([-0.3453, -0.7081, -0.6029, 0.2531]) tensor([0.2332, 0.1623, 0.1803, 0.4243]) -Greedy action tensor([-0.3487, 0.7136, 0.6650, -0.6637]) tensor([0.1355, 0.3921, 0.3735, 0.0989]) -Greedy action tensor([ 0.9550, -1.3650, 1.1334, 0.2753]) tensor([0.3571, 0.0351, 0.4268, 0.1810]) -Greedy action tensor([-0.3040, 0.7553, -0.3355, -0.4279]) tensor([0.1743, 0.5028, 0.1689, 0.1540]) -Greedy action tensor([-0.0561, 0.4270, -0.5395, 0.3794]) tensor([0.2091, 0.3389, 0.1289, 0.3231]) -Greedy action tensor([-0.2678, 0.7511, 0.0131, -0.0200]) tensor([0.1568, 0.4345, 0.2077, 0.2010]) -Greedy action tensor([ 0.2706, -1.5081, 0.1816, -0.8374]) tensor([0.4143, 0.0700, 0.3790, 0.1368]) -Greedy action tensor([ 1.1075, -1.0906, -0.2518, -0.5583]) tensor([0.6423, 0.0713, 0.1650, 0.1214]) -Greedy action tensor([-1.0139, -1.0959, -0.1446, -0.8280]) tensor([0.1815, 0.1672, 0.4328, 0.2185]) -Greedy action tensor([-0.2392, 0.1629, 0.7795, 0.0923]) tensor([0.1502, 0.2246, 0.4160, 0.2092]) -Greedy action tensor([-2.3212e-01, 1.9746e-04, -1.1785e+00, -1.5528e-01]) tensor([0.2681, 0.3383, 0.1041, 0.2895]) -Greedy action tensor([-0.2964, -1.4072, -0.4678, -0.2889]) tensor([0.3145, 0.1036, 0.2650, 0.3169]) -Greedy action tensor([-0.1323, -0.9060, 0.4569, -0.0145]) tensor([0.2279, 0.1051, 0.4107, 0.2563]) -Greedy action tensor([-0.0836, -0.9506, -0.5695, -0.1911]) tensor([0.3409, 0.1432, 0.2097, 0.3062]) -Greedy action tensor([ 0.4603, 0.4630, -1.2428, 0.9867]) tensor([0.2579, 0.2586, 0.0470, 0.4366]) -Greedy action tensor([-0.4513, -1.4982, 0.8250, -0.9420]) tensor([0.1803, 0.0633, 0.6460, 0.1104]) -Greedy action tensor([-0.6480, 0.3129, -0.3248, -0.6375]) tensor([0.1665, 0.4352, 0.2300, 0.1683]) -Greedy action tensor([0.1035, 0.1842, 0.1024, 0.0577]) tensor([0.2476, 0.2684, 0.2474, 0.2365]) -Greedy action tensor([ 1.5663, -0.4285, -0.3489, 1.3163]) tensor([0.4849, 0.0660, 0.0714, 0.3777]) -Greedy action tensor([ 0.3649, 0.2683, -0.7147, -0.2927]) tensor([0.3616, 0.3283, 0.1228, 0.1873]) -Greedy action tensor([-0.0277, -0.3013, -0.0669, -1.2537]) tensor([0.3316, 0.2522, 0.3189, 0.0973]) -Greedy action tensor([ 0.6775, -0.9152, 0.5898, 0.1886]) tensor([0.3659, 0.0744, 0.3352, 0.2244]) -Greedy action tensor([-0.8284, -2.0935, 0.1889, 0.0099]) tensor([0.1572, 0.0444, 0.4348, 0.3636]) -Greedy action tensor([ 0.3408, -0.6723, 0.9615, -0.8932]) tensor([0.2845, 0.1033, 0.5293, 0.0828]) -Greedy action tensor([-0.9696, -0.5274, 0.1559, -0.8599]) tensor([0.1481, 0.2304, 0.4563, 0.1652]) -Greedy action tensor([ 0.1003, 0.3221, -0.2955, -0.5392]) tensor([0.2899, 0.3619, 0.1952, 0.1530]) -Greedy action tensor([ 0.2803, -0.8238, -0.6723, -0.3331]) tensor([0.4427, 0.1468, 0.1708, 0.2397]) -Greedy action tensor([-0.0723, -0.0094, 0.1072, -0.7895]) tensor([0.2667, 0.2840, 0.3191, 0.1302]) -Greedy action tensor([ 0.1654, 0.0416, -0.5107, -0.6694]) tensor([0.3539, 0.3126, 0.1800, 0.1536]) -Greedy action tensor([ 0.5434, -0.5773, -0.3595, 0.7508]) tensor([0.3376, 0.1101, 0.1369, 0.4154]) -Greedy action tensor([ 1.2100, -0.7962, -0.4793, -0.1412]) tensor([0.6337, 0.0852, 0.1170, 0.1641]) -Greedy action tensor([-0.1845, 0.2301, -0.6575, -1.0383]) tensor([0.2807, 0.4249, 0.1749, 0.1195]) -Greedy action tensor([-0.8089, -0.7406, -0.5826, -0.9089]) tensor([0.2364, 0.2532, 0.2965, 0.2139]) -Greedy action tensor([-0.8655, -1.3876, -0.4874, 0.8622]) tensor([0.1152, 0.0683, 0.1681, 0.6483]) -Greedy action tensor([-0.9260, -0.2951, -0.0941, 0.4100]) tensor([0.1113, 0.2093, 0.2558, 0.4236]) -Greedy action tensor([ 0.8321, -0.0723, -0.9479, -0.0356]) tensor([0.5017, 0.2031, 0.0846, 0.2107]) -Greedy action tensor([-0.0183, -1.0224, 0.0255, -0.3236]) tensor([0.3177, 0.1164, 0.3319, 0.2341]) -Greedy action tensor([ 0.2643, -1.3853, -0.1049, -0.0596]) tensor([0.3836, 0.0737, 0.2652, 0.2775]) -Greedy action tensor([-0.7242, -1.8652, 0.1118, -0.3220]) tensor([0.1953, 0.0624, 0.4505, 0.2919]) -Greedy action tensor([-0.2548, -1.0328, 0.2370, -0.5121]) tensor([0.2585, 0.1188, 0.4228, 0.1999]) -Greedy action tensor([-1.8822, -0.3552, 0.6249, -0.1360]) tensor([0.0424, 0.1951, 0.5197, 0.2429]) -Greedy action tensor([-1.8522, -0.4653, 0.6241, -0.1394]) tensor([0.0446, 0.1783, 0.5301, 0.2470]) -Greedy action tensor([-1.6247, -0.4512, 0.5102, -0.0864]) tensor([0.0577, 0.1864, 0.4875, 0.2685]) -Greedy action tensor([-1.8988, -0.3866, 0.6452, -0.1543]) tensor([0.0417, 0.1891, 0.5306, 0.2386]) -Greedy action tensor([-1.9048, -0.4562, 0.6513, -0.1621]) tensor([0.0419, 0.1785, 0.5401, 0.2395]) -Greedy action tensor([-0.4786, 0.9102, 0.0148, 0.0293]) tensor([0.1203, 0.4826, 0.1971, 0.2000]) -Greedy action tensor([-1.7013, -0.2447, 0.5251, -0.0666]) tensor([0.0508, 0.2180, 0.4707, 0.2605]) -Greedy action tensor([-0.6551, 0.3465, 0.1366, 0.0375]) tensor([0.1261, 0.3434, 0.2784, 0.2521]) -Greedy action tensor([-1.8686, -0.4302, 0.6249, -0.1507]) tensor([0.0437, 0.1841, 0.5288, 0.2435]) -Greedy action tensor([-1.4153, -0.8524, -0.2463, -0.6793]) tensor([0.1240, 0.2178, 0.3992, 0.2589]) -Greedy action tensor([-1.1693, 0.1468, 0.2625, 0.0059]) tensor([0.0823, 0.3068, 0.3444, 0.2665]) -Greedy action tensor([-1.8132, -0.4062, 0.5948, -0.1290]) tensor([0.0463, 0.1892, 0.5148, 0.2496]) -Greedy action tensor([-1.8339, -0.4379, 0.6087, -0.1217]) tensor([0.0453, 0.1829, 0.5209, 0.2509]) -Greedy action tensor([-1.4829, -0.3733, 0.4121, 0.0972]) tensor([0.0643, 0.1952, 0.4280, 0.3124]) -Greedy action tensor([-1.8852, -0.3204, 0.6137, -0.1374]) tensor([0.0422, 0.2018, 0.5136, 0.2424]) -Greedy action tensor([-1.9274, -0.4023, 0.6508, -0.1694]) tensor([0.0407, 0.1870, 0.5362, 0.2361]) -Greedy action tensor([ 0.0736, -0.0567, 0.1119, -0.0109]) tensor([0.2607, 0.2288, 0.2709, 0.2396]) -Greedy action tensor([-0.7667, 0.1856, 0.1373, -0.0299]) tensor([0.1227, 0.3180, 0.3030, 0.2563]) -Greedy action tensor([-1.8852, -0.3890, 0.6288, -0.1452]) tensor([0.0425, 0.1898, 0.5254, 0.2423]) -Greedy action tensor([-0.3375, 0.9957, 0.0072, 0.0262]) tensor([0.1308, 0.4963, 0.1847, 0.1882]) -Greedy action tensor([-0.8716, 0.5964, 0.0032, -0.4370]) tensor([0.1077, 0.4676, 0.2583, 0.1664]) -Greedy action tensor([-1.7496, -0.3357, 0.5381, -0.0531]) tensor([0.0490, 0.2014, 0.4825, 0.2671]) -Greedy action tensor([-1.8661, -0.4156, 0.6496, -0.1250]) tensor([0.0428, 0.1827, 0.5301, 0.2443]) -Greedy action tensor([-1.7536, 0.1514, 0.5164, -0.1056]) tensor([0.0443, 0.2974, 0.4284, 0.2300]) -Greedy action tensor([-1.3444, 0.0622, 0.2835, 0.1441]) tensor([0.0685, 0.2795, 0.3487, 0.3033]) -Greedy action tensor([-1.5674, -0.4931, 0.5173, 0.0039]) tensor([0.0596, 0.1745, 0.4792, 0.2868]) -Greedy action tensor([-1.0452, 0.6756, 0.1672, -0.0276]) tensor([0.0786, 0.4395, 0.2643, 0.2176]) -Greedy action tensor([-1.9167, -0.4062, 0.6525, -0.1680]) tensor([0.0411, 0.1861, 0.5366, 0.2362]) -Greedy action tensor([-1.4562, 0.5863, 0.3040, 0.0624]) tensor([0.0524, 0.4039, 0.3045, 0.2392]) -Greedy action tensor([0.0036, 0.9282, 0.0393, 0.6746]) tensor([0.1535, 0.3870, 0.1591, 0.3003]) -Greedy action tensor([-1.1226, 0.3996, 0.1886, -0.1523]) tensor([0.0838, 0.3840, 0.3110, 0.2212]) -Greedy action tensor([-1.8815, -0.3180, 0.0552, -0.3754]) tensor([0.0581, 0.2773, 0.4028, 0.2618]) -Greedy action tensor([-0.9906, 0.5802, 0.1792, -0.0479]) tensor([0.0862, 0.4147, 0.2777, 0.2213]) -Greedy action tensor([-1.5152, -0.1025, 0.6486, -0.4704]) tensor([0.0600, 0.2466, 0.5227, 0.1707]) -Greedy action tensor([-7.9221e-01, 7.4717e-01, 1.1572e-01, 5.7808e-04]) tensor([0.0966, 0.4504, 0.2395, 0.2135]) -Greedy action tensor([-1.0446, 0.9112, 0.1967, 0.1813]) tensor([0.0670, 0.4733, 0.2317, 0.2281]) -Greedy action tensor([-1.8578, -0.4436, 0.6236, -0.1362]) tensor([0.0441, 0.1815, 0.5276, 0.2468]) -Greedy action tensor([-1.8498, -0.4545, 0.6279, -0.1308]) tensor([0.0444, 0.1792, 0.5288, 0.2476]) -Greedy action tensor([-1.2631, -0.3871, 0.7583, 0.1003]) tensor([0.0673, 0.1616, 0.5080, 0.2631]) -Greedy action tensor([-1.8902, -0.4408, 0.6406, -0.1546]) tensor([0.0426, 0.1813, 0.5347, 0.2414]) -Greedy action tensor([-1.6020, 0.2032, 0.4142, -0.0485]) tensor([0.0518, 0.3148, 0.3887, 0.2447]) -Greedy action tensor([-1.8679, -0.4558, 0.6250, -0.1464]) tensor([0.0439, 0.1801, 0.5307, 0.2454]) -Greedy action tensor([-1.8493, -0.4577, 0.6198, -0.1383]) tensor([0.0447, 0.1798, 0.5281, 0.2474]) -Greedy action tensor([-1.9024, -0.4436, 0.6496, -0.1574]) tensor([0.0419, 0.1803, 0.5378, 0.2400]) -Greedy action tensor([-1.9353, -0.4385, 0.6632, -0.1730]) tensor([0.0404, 0.1806, 0.5435, 0.2355]) -Greedy action tensor([-1.9183, -0.4017, 0.6521, -0.1657]) tensor([0.0410, 0.1868, 0.5358, 0.2365]) -Greedy action tensor([-1.8556, -0.2162, 0.5857, -0.1127]) tensor([0.0428, 0.2206, 0.4919, 0.2447]) -Greedy action tensor([-1.8559, -0.2634, 0.6039, -0.1273]) tensor([0.0430, 0.2114, 0.5033, 0.2423]) -Greedy action tensor([-1.5762, -0.0710, 0.4560, -0.0534]) tensor([0.0564, 0.2542, 0.4306, 0.2588]) -Greedy action tensor([-1.7681, -0.4692, 0.5736, -0.1003]) tensor([0.0491, 0.1800, 0.5106, 0.2603]) -Greedy action tensor([-1.9074, -0.4404, 0.6531, -0.1599]) tensor([0.0416, 0.1805, 0.5388, 0.2390]) -Greedy action tensor([-1.7683, 0.0236, 0.4857, -0.0850]) tensor([0.0456, 0.2739, 0.4348, 0.2457]) -Greedy action tensor([-1.5717, 0.1967, 0.3679, -0.0117]) tensor([0.0538, 0.3155, 0.3745, 0.2562]) -Greedy action tensor([-1.9249, -0.2829, 0.6356, -0.1841]) tensor([0.0403, 0.2082, 0.5217, 0.2298]) -Greedy action tensor([-1.7053, -0.3108, 0.5021, -0.0330]) tensor([0.0514, 0.2074, 0.4675, 0.2738]) -Greedy action tensor([-0.6416, -0.3489, 0.6060, 0.8175]) tensor([0.0988, 0.1324, 0.3439, 0.4249]) -Greedy action tensor([-1.5716, 0.4612, 0.3998, 0.0372]) tensor([0.0480, 0.3669, 0.3450, 0.2401]) -Greedy action tensor([-1.8968, -0.3280, 0.6307, -0.1550]) tensor([0.0416, 0.1998, 0.5211, 0.2375]) -Greedy action tensor([-1.3628, -1.1399, 0.3480, -0.6647]) tensor([0.1021, 0.1276, 0.5651, 0.2052]) -Greedy action tensor([-1.9399, -0.4456, 0.6646, -0.1778]) tensor([0.0403, 0.1797, 0.5452, 0.2348]) -Greedy action tensor([-1.5234, -0.4651, 0.6128, 0.4569]) tensor([0.0510, 0.1471, 0.4321, 0.3698]) -Greedy action tensor([-1.9382, -0.4342, 0.6621, -0.1775]) tensor([0.0403, 0.1815, 0.5434, 0.2347]) -Greedy action tensor([-0.8560, 0.7854, 0.1512, -0.0822]) tensor([0.0903, 0.4664, 0.2474, 0.1959]) -Greedy action tensor([-1.5133, 0.1386, 0.4036, -0.0593]) tensor([0.0578, 0.3016, 0.3931, 0.2475]) -Greedy action tensor([-1.5456, -0.3437, 0.4171, 0.0422]) tensor([0.0612, 0.2036, 0.4357, 0.2995]) -Greedy action tensor([-1.9247, -0.4528, 0.6619, -0.1663]) tensor([0.0409, 0.1783, 0.5435, 0.2374]) -Greedy action tensor([-1.8083, -0.4584, 0.6198, -0.0852]) tensor([0.0459, 0.1770, 0.5202, 0.2570]) -Greedy action tensor([-1.9220, -0.4541, 0.6592, -0.1707]) tensor([0.0411, 0.1785, 0.5434, 0.2370]) -Greedy action tensor([-1.1433, 0.8714, 0.2064, 0.1649]) tensor([0.0623, 0.4671, 0.2402, 0.2304]) -Greedy action tensor([-1.7742, -0.2028, 0.5732, -0.0629]) tensor([0.0459, 0.2207, 0.4796, 0.2539]) -Greedy action tensor([-1.8982, -0.4358, 0.6480, -0.1559]) tensor([0.0420, 0.1815, 0.5364, 0.2401]) -Greedy action tensor([-0.9487, -0.2166, 0.2079, 0.0085]) tensor([0.1128, 0.2346, 0.3587, 0.2938]) -Greedy action tensor([-1.6257, -0.4753, 0.5779, -0.1641]) tensor([0.0570, 0.1802, 0.5167, 0.2460]) -Greedy action tensor([-1.2487, 0.1970, 0.3560, -0.1165]) tensor([0.0751, 0.3186, 0.3735, 0.2329]) -Greedy action tensor([-1.6110, -0.5383, 0.4968, -0.0177]) tensor([0.0586, 0.1712, 0.4820, 0.2882]) -Greedy action tensor([-1.8274, -0.4487, 0.6112, -0.1225]) tensor([0.0456, 0.1810, 0.5225, 0.2509]) -Greedy action tensor([-1.9261, -0.4514, 0.6599, -0.1717]) tensor([0.0409, 0.1789, 0.5435, 0.2366]) -Greedy action tensor([-1.9246, -0.4523, 0.6614, -0.1690]) tensor([0.0409, 0.1785, 0.5436, 0.2370]) -Greedy action tensor([-1.9041, -0.4547, 0.6519, -0.1604]) tensor([0.0419, 0.1785, 0.5399, 0.2396]) -Greedy action tensor([-1.3596, 0.0860, 0.4991, 0.0956]) tensor([0.0627, 0.2662, 0.4023, 0.2688]) -Greedy action tensor([-0.9367, -0.4169, 1.1079, 1.3649]) tensor([0.0490, 0.0824, 0.3788, 0.4898]) -Greedy action tensor([ 1.0924, -0.6079, -0.1896, -0.4738]) tensor([0.5992, 0.1094, 0.1663, 0.1251]) -Greedy action tensor([ 0.6364, -0.5798, -0.1381, -0.2601]) tensor([0.4618, 0.1369, 0.2129, 0.1884]) -Greedy action tensor([ 0.4053, -0.1673, 0.0095, 0.0584]) tensor([0.3397, 0.1916, 0.2286, 0.2401]) -Greedy action tensor([ 0.6189, -0.3164, -0.0019, -0.5946]) tensor([0.4490, 0.1762, 0.2413, 0.1334]) -Greedy action tensor([ 0.5766, -0.5121, -0.0738, -0.2199]) tensor([0.4330, 0.1458, 0.2260, 0.1953]) -Greedy action tensor([ 0.7380, -0.3395, 0.0305, -0.1047]) tensor([0.4417, 0.1504, 0.2177, 0.1902]) -Greedy action tensor([ 0.6186, 0.0269, -0.0522, 0.0165]) tensor([0.3828, 0.2118, 0.1957, 0.2096]) -Greedy action tensor([ 0.3834, 0.1715, -0.0835, 0.0535]) tensor([0.3169, 0.2564, 0.1987, 0.2279]) -Greedy action tensor([ 1.0303, -0.5486, -0.0503, -0.4118]) tensor([0.5612, 0.1157, 0.1905, 0.1327]) -Greedy action tensor([ 1.0764, -0.6596, 0.0486, -0.6518]) tensor([0.5843, 0.1030, 0.2090, 0.1038]) -Greedy action tensor([ 0.4489, 0.0875, -0.0059, 0.0640]) tensor([0.3320, 0.2313, 0.2107, 0.2259]) -Greedy action tensor([ 1.1428, -0.7556, 0.0693, -0.7360]) tensor([0.6081, 0.0911, 0.2079, 0.0929]) -Greedy action tensor([ 0.5720, 0.3031, -0.1143, -0.1950]) tensor([0.3660, 0.2797, 0.1843, 0.1700]) -Greedy action tensor([ 0.9046, -0.5292, -0.0738, -0.5548]) tensor([0.5415, 0.1291, 0.2036, 0.1258]) -Greedy action tensor([ 0.9135, -0.4491, -0.0676, -0.3007]) tensor([0.5187, 0.1328, 0.1945, 0.1540]) -Greedy action tensor([ 0.9962, -0.4630, -0.2210, -0.4489]) tensor([0.5668, 0.1317, 0.1678, 0.1336]) -Greedy action tensor([ 1.1631, -0.7571, 0.0204, -0.6782]) tensor([0.6157, 0.0902, 0.1964, 0.0977]) -Greedy action tensor([ 0.8077, -0.2122, -0.0435, -0.1118]) tensor([0.4574, 0.1650, 0.1953, 0.1824]) -Greedy action tensor([ 0.5943, 0.1518, -0.1233, 0.0821]) tensor([0.3664, 0.2354, 0.1788, 0.2195]) -Greedy action tensor([ 0.7161, -0.5585, 0.0458, -0.6550]) tensor([0.4890, 0.1367, 0.2502, 0.1241]) -Greedy action tensor([ 0.7147, -0.4817, -0.0939, -0.2021]) tensor([0.4656, 0.1408, 0.2074, 0.1862]) -Greedy action tensor([ 0.6525, -0.4113, 0.0234, -0.3473]) tensor([0.4452, 0.1537, 0.2373, 0.1638]) -Greedy action tensor([ 0.7867, -0.5365, -0.1011, -0.3981]) tensor([0.5041, 0.1342, 0.2075, 0.1542]) -Greedy action tensor([ 0.4702, 0.1242, 0.0178, -0.0103]) tensor([0.3376, 0.2389, 0.2148, 0.2088]) -Greedy action tensor([ 0.7232, -0.7258, 0.0009, -0.3256]) tensor([0.4829, 0.1134, 0.2345, 0.1692]) -Greedy action tensor([ 0.7218, -0.2136, 0.0031, -0.1120]) tensor([0.4321, 0.1696, 0.2106, 0.1877]) -Greedy action tensor([ 1.1479, -0.7165, -0.0925, -0.6890]) tensor([0.6236, 0.0967, 0.1804, 0.0994]) -Greedy action tensor([ 0.3197, -0.0724, 0.1659, -0.6182]) tensor([0.3419, 0.2310, 0.2932, 0.1338]) -Greedy action tensor([ 0.7624, -0.3767, -0.0131, -0.1409]) tensor([0.4575, 0.1464, 0.2107, 0.1854]) -Greedy action tensor([ 0.8760, -0.7459, 0.0468, -0.4786]) tensor([0.5285, 0.1044, 0.2307, 0.1364]) -Greedy action tensor([ 0.9193, -0.5110, -0.2991, -0.3759]) tensor([0.5529, 0.1323, 0.1635, 0.1514]) -Greedy action tensor([ 0.6148, -0.3374, -0.1120, -0.4737]) tensor([0.4533, 0.1749, 0.2191, 0.1526]) -Greedy action tensor([ 0.7855, -0.2940, -0.1361, -0.2257]) tensor([0.4759, 0.1617, 0.1893, 0.1731]) -Greedy action tensor([ 0.3959, -0.2453, -0.0866, -0.0371]) tensor([0.3581, 0.1886, 0.2210, 0.2323]) -Greedy action tensor([ 1.0390, -0.7892, 0.0623, -0.5914]) tensor([0.5770, 0.0927, 0.2173, 0.1130]) -Greedy action tensor([ 0.8544, -0.5713, -0.0186, -0.4818]) tensor([0.5206, 0.1251, 0.2174, 0.1368]) -Greedy action tensor([ 0.4229, -0.4801, -0.0661, -0.1694]) tensor([0.3889, 0.1576, 0.2385, 0.2151]) -Greedy action tensor([ 1.2595, -0.8224, 0.1170, -0.7069]) tensor([0.6315, 0.0787, 0.2014, 0.0884]) -Greedy action tensor([ 0.2848, -0.1577, -0.0799, -0.1727]) tensor([0.3367, 0.2163, 0.2338, 0.2131]) -Greedy action tensor([ 1.1200, -0.7912, -0.0457, -0.7580]) tensor([0.6202, 0.0917, 0.1933, 0.0948]) -Greedy action tensor([ 1.0783, -1.1361, 0.1407, -0.5900]) tensor([0.5920, 0.0647, 0.2318, 0.1116]) -Greedy action tensor([ 0.8847, -0.7308, 0.0377, -0.2891]) tensor([0.5163, 0.1026, 0.2214, 0.1596]) -Greedy action tensor([ 0.9939, -0.5007, -0.0549, -0.4253]) tensor([0.5505, 0.1235, 0.1929, 0.1332]) -Greedy action tensor([ 0.6383, -0.3080, -0.0853, -0.1397]) tensor([0.4287, 0.1664, 0.2079, 0.1969]) -Greedy action tensor([ 0.7000, -0.5962, -0.1272, -0.2800]) tensor([0.4793, 0.1311, 0.2096, 0.1799]) -Greedy action tensor([ 0.6585, -0.3204, 0.0046, -0.2626]) tensor([0.4360, 0.1638, 0.2267, 0.1735]) -Greedy action tensor([ 0.5950, -0.1955, -0.0274, -0.2287]) tensor([0.4117, 0.1867, 0.2209, 0.1806]) -Greedy action tensor([ 1.1351, -0.7427, -0.0893, -0.8413]) tensor([0.6307, 0.0965, 0.1854, 0.0874]) -Greedy action tensor([ 1.2694, -0.6458, -0.1778, -0.5357]) tensor([0.6464, 0.0952, 0.1521, 0.1063]) -Greedy action tensor([ 1.4138, -0.7507, -0.1372, -0.7631]) tensor([0.6943, 0.0797, 0.1472, 0.0787]) -Greedy action tensor([ 1.0052, -0.7097, -0.0769, -0.5038]) tensor([0.5747, 0.1034, 0.1948, 0.1271]) -Greedy action tensor([ 0.6467, -0.1299, -0.0545, -0.0853]) tensor([0.4104, 0.1888, 0.2035, 0.1974]) -Greedy action tensor([ 0.5213, -0.0415, 0.1019, -0.0050]) tensor([0.3549, 0.2021, 0.2333, 0.2097]) -Greedy action tensor([ 0.9947, -0.6963, 0.2191, -0.5664]) tensor([0.5392, 0.0994, 0.2483, 0.1132]) -Greedy action tensor([ 1.0842, -0.6593, -0.1437, -0.6098]) tensor([0.6055, 0.1059, 0.1773, 0.1113]) -Greedy action tensor([ 0.5876, -0.2528, -0.0804, -0.0665]) tensor([0.4058, 0.1751, 0.2081, 0.2110]) -Greedy action tensor([ 0.7979, -0.2236, 0.0616, -0.3169]) tensor([0.4615, 0.1662, 0.2210, 0.1514]) -Greedy action tensor([ 0.8352, -0.5546, 0.0164, -0.2780]) tensor([0.4954, 0.1234, 0.2185, 0.1627]) -Greedy action tensor([ 1.0017, -0.8934, 0.0085, -0.6478]) tensor([0.5838, 0.0877, 0.2162, 0.1122]) -Greedy action tensor([ 0.9125, -0.6227, -0.1056, -0.3350]) tensor([0.5365, 0.1156, 0.1938, 0.1541]) -Greedy action tensor([ 0.3084, -0.1120, -0.1248, -0.0920]) tensor([0.3361, 0.2207, 0.2180, 0.2252]) -Greedy action tensor([ 0.7552, -0.2261, -0.1898, 0.0280]) tensor([0.4451, 0.1668, 0.1730, 0.2151]) -Greedy action tensor([ 0.6722, -0.7083, 0.0100, -0.5276]) tensor([0.4835, 0.1216, 0.2493, 0.1456]) -Greedy action tensor([ 0.7495, -0.4475, 0.0362, -0.1843]) tensor([0.4576, 0.1382, 0.2243, 0.1799]) -Greedy action tensor([ 1.0333, -0.5898, -0.0985, -0.3136]) tensor([0.5619, 0.1108, 0.1812, 0.1461]) -Greedy action tensor([ 0.8587, 0.2370, -0.0256, -0.2009]) tensor([0.4354, 0.2338, 0.1798, 0.1509]) -Greedy action tensor([ 0.5967, -0.3890, -0.0956, -0.1356]) tensor([0.4247, 0.1585, 0.2126, 0.2042]) -Greedy action tensor([ 0.7455, -0.5331, -0.0031, -0.3004]) tensor([0.4755, 0.1324, 0.2249, 0.1671]) -Greedy action tensor([ 0.3167, -0.3622, -0.2145, -0.0259]) tensor([0.3565, 0.1808, 0.2096, 0.2531]) -Greedy action tensor([ 0.7856, -0.2474, 0.2204, -0.2005]) tensor([0.4353, 0.1549, 0.2474, 0.1624]) -Greedy action tensor([ 0.8676, 0.0270, -0.1071, -0.0455]) tensor([0.4525, 0.1952, 0.1707, 0.1816]) -Greedy action tensor([ 0.6936, -0.4971, -0.1201, -0.2723]) tensor([0.4700, 0.1429, 0.2083, 0.1789]) -Greedy action tensor([ 0.5583, -0.3464, -0.1176, -0.1055]) tensor([0.4118, 0.1667, 0.2095, 0.2120]) -Greedy action tensor([ 0.9333, -0.4829, 0.1102, -0.3212]) tensor([0.5084, 0.1234, 0.2232, 0.1450]) -Greedy action tensor([ 0.7555, -0.5769, -0.0358, -0.2817]) tensor([0.4827, 0.1274, 0.2188, 0.1711]) -Greedy action tensor([ 1.0902, -0.6649, 0.0757, -0.6548]) tensor([0.5847, 0.1011, 0.2120, 0.1021]) -Greedy action tensor([ 1.1809, -0.8627, -0.0275, -0.5994]) tensor([0.6262, 0.0811, 0.1870, 0.1056]) -Greedy action tensor([ 0.8687, -0.6621, -0.0071, -0.4036]) tensor([0.5227, 0.1131, 0.2177, 0.1465]) -Greedy action tensor([ 0.7000, -0.4727, 0.1362, -0.3123]) tensor([0.4460, 0.1381, 0.2538, 0.1621]) -Greedy action tensor([ 1.0324, -0.4185, 0.1035, -0.3423]) tensor([0.5313, 0.1245, 0.2099, 0.1344]) -Greedy action tensor([ 0.8484, 0.1703, -0.0818, 0.1165]) tensor([0.4196, 0.2130, 0.1655, 0.2018]) -Greedy action tensor([ 1.4569, -0.7704, -0.4055, 0.1406]) tensor([0.6531, 0.0704, 0.1014, 0.1751]) -Greedy action tensor([ 0.9927, 0.2440, -0.6735, -0.4638]) tensor([0.5277, 0.2496, 0.0997, 0.1230]) -Greedy action tensor([ 1.5295, 0.2824, -0.6005, -0.4301]) tensor([0.6464, 0.1857, 0.0768, 0.0911]) -Greedy action tensor([ 1.3355, -0.5267, -0.4574, 0.5234]) tensor([0.5663, 0.0880, 0.0943, 0.2514]) -Greedy action tensor([ 0.7974, -0.1957, -0.3072, 0.0349]) tensor([0.4612, 0.1708, 0.1528, 0.2151]) -Greedy action tensor([ 2.2022, -1.3881, 0.0181, 0.8104]) tensor([0.7200, 0.0199, 0.0811, 0.1790]) -Greedy action tensor([ 1.2628, -0.1846, -0.8909, 0.1046]) tensor([0.6005, 0.1412, 0.0697, 0.1886]) -Greedy action tensor([ 1.5557, -0.0838, -0.6813, 0.2556]) tensor([0.6356, 0.1233, 0.0679, 0.1732]) -Greedy action tensor([ 1.5981, -0.8101, -0.5543, 0.1370]) tensor([0.6953, 0.0626, 0.0808, 0.1613]) -Greedy action tensor([ 1.7578, 0.4828, -0.1660, 0.4825]) tensor([0.5866, 0.1639, 0.0857, 0.1639]) -Greedy action tensor([ 2.0291, -1.0965, -0.6120, 0.6572]) tensor([0.7306, 0.0321, 0.0521, 0.1853]) -Greedy action tensor([ 1.5314, -0.3684, -0.3030, 0.2443]) tensor([0.6308, 0.0944, 0.1007, 0.1741]) -Greedy action tensor([ 1.6667, 0.0096, -0.5926, 0.3369]) tensor([0.6412, 0.1223, 0.0670, 0.1696]) -Greedy action tensor([ 1.7886, -1.3721, -0.1488, 0.0511]) tensor([0.7340, 0.0311, 0.1058, 0.1292]) -Greedy action tensor([ 1.4537, -0.2154, -0.4958, 0.0370]) tensor([0.6356, 0.1198, 0.0905, 0.1541]) -Greedy action tensor([ 2.1454, -1.0432, -0.7918, 0.9233]) tensor([0.7200, 0.0297, 0.0382, 0.2121]) -Greedy action tensor([ 1.8063, -0.3323, -0.6987, 0.4534]) tensor([0.6859, 0.0808, 0.0560, 0.1773]) -Greedy action tensor([ 1.5745, 0.2253, -0.8315, 0.0838]) tensor([0.6350, 0.1647, 0.0573, 0.1430]) -Greedy action tensor([ 1.7630, -0.9199, -0.3321, 0.7416]) tensor([0.6445, 0.0441, 0.0793, 0.2321]) -Greedy action tensor([ 1.6067, -0.9544, -0.8502, 0.4109]) tensor([0.6824, 0.0527, 0.0585, 0.2064]) -Greedy action tensor([ 1.1234, -0.3953, -0.4028, 0.4213]) tensor([0.5176, 0.1134, 0.1125, 0.2565]) -Greedy action tensor([ 1.9164, -1.1294, -0.4723, 0.3996]) tensor([0.7360, 0.0350, 0.0675, 0.1615]) -Greedy action tensor([ 1.6539, 0.0476, -1.4563, -0.1625]) tensor([0.7103, 0.1425, 0.0317, 0.1155]) -Greedy action tensor([ 1.5862, -0.6569, -0.6666, 0.1507]) tensor([0.6900, 0.0732, 0.0725, 0.1642]) -Greedy action tensor([ 1.7767e+00, -8.2453e-01, -4.6265e-01, 1.2438e-03]) tensor([0.7407, 0.0549, 0.0789, 0.1255]) -Greedy action tensor([ 1.5397, 0.1307, -0.3386, 0.4253]) tensor([0.5796, 0.1416, 0.0886, 0.1902]) -Greedy action tensor([ 1.7567, -0.7370, -0.4078, 0.2153]) tensor([0.7085, 0.0585, 0.0813, 0.1517]) -Greedy action tensor([ 1.6202, 0.3806, -0.3028, 0.3098]) tensor([0.5864, 0.1698, 0.0857, 0.1582]) -Greedy action tensor([ 1.1951, -0.2825, -0.6324, 0.4113]) tensor([0.5418, 0.1236, 0.0871, 0.2474]) -Greedy action tensor([ 1.2680, -0.1270, -0.7361, 0.3540]) tensor([0.5607, 0.1390, 0.0756, 0.2248]) -Greedy action tensor([ 1.0116, -0.0966, -0.7089, -0.1413]) tensor([0.5480, 0.1809, 0.0981, 0.1730]) -Greedy action tensor([ 1.6022, -0.7302, -0.2098, 0.1186]) tensor([0.6724, 0.0653, 0.1098, 0.1525]) -Greedy action tensor([ 1.3656, -0.2338, -0.8211, 0.4324]) tensor([0.5856, 0.1183, 0.0658, 0.2303]) -Greedy action tensor([ 0.9937, -0.2145, -0.7032, 0.0361]) tensor([0.5360, 0.1601, 0.0982, 0.2057]) -Greedy action tensor([ 0.8560, 0.2201, -0.6601, -0.1397]) tensor([0.4720, 0.2499, 0.1036, 0.1744]) -Greedy action tensor([ 1.2209, -0.0582, -1.2159, 0.1341]) tensor([0.5872, 0.1634, 0.0513, 0.1981]) -Greedy action tensor([ 1.5141, -0.6797, -0.0913, 0.1504]) tensor([0.6377, 0.0711, 0.1281, 0.1631]) -Greedy action tensor([ 0.8761, -0.4292, -0.5003, 0.4354]) tensor([0.4614, 0.1251, 0.1165, 0.2970]) -Greedy action tensor([ 1.0338, -0.5192, -0.5729, 0.6197]) tensor([0.4824, 0.1021, 0.0967, 0.3188]) -Greedy action tensor([ 1.2649, -0.1862, -0.4385, 0.3646]) tensor([0.5486, 0.1286, 0.0999, 0.2230]) -Greedy action tensor([ 2.0700, -0.7694, -0.4118, 0.4239]) tensor([0.7492, 0.0438, 0.0626, 0.1444]) -Greedy action tensor([ 1.2681, -0.3352, -0.7287, -0.0703]) tensor([0.6253, 0.1258, 0.0849, 0.1640]) -Greedy action tensor([ 1.8693, -0.7994, -0.3604, 0.6772]) tensor([0.6755, 0.0468, 0.0726, 0.2051]) -Greedy action tensor([ 1.3469, -0.6761, -0.5406, 0.4757]) tensor([0.5875, 0.0777, 0.0890, 0.2458]) -Greedy action tensor([ 2.5185, -0.7875, -0.4093, 0.3301]) tensor([0.8318, 0.0305, 0.0445, 0.0932]) -Greedy action tensor([ 1.6981, -0.7555, -0.6359, 0.2074]) tensor([0.7102, 0.0611, 0.0688, 0.1599]) -Greedy action tensor([ 1.4522, -0.2969, -0.5449, 0.1321]) tensor([0.6342, 0.1103, 0.0861, 0.1694]) -Greedy action tensor([ 1.6512, -0.3011, -1.1195, 0.0234]) tensor([0.7138, 0.1013, 0.0447, 0.1402]) -Greedy action tensor([ 0.9613, -0.4015, -0.2158, 0.1277]) tensor([0.5004, 0.1281, 0.1542, 0.2174]) -Greedy action tensor([ 1.3581, -0.0470, -0.5255, 0.3943]) tensor([0.5622, 0.1379, 0.0855, 0.2144]) -Greedy action tensor([ 1.6074, -0.4015, -0.7121, 0.4624]) tensor([0.6449, 0.0865, 0.0634, 0.2052]) -Greedy action tensor([ 1.5145, -0.1390, -0.7886, 0.4344]) tensor([0.6132, 0.1173, 0.0613, 0.2082]) -Greedy action tensor([ 1.5402, -0.6650, -0.6867, 0.7323]) tensor([0.6010, 0.0662, 0.0648, 0.2679]) -Greedy action tensor([ 1.3190, 0.3000, -0.8426, 0.0393]) tensor([0.5700, 0.2058, 0.0656, 0.1585]) -Greedy action tensor([ 1.0288, -0.3724, -0.8340, 0.4862]) tensor([0.5043, 0.1242, 0.0783, 0.2931]) -Greedy action tensor([ 1.3896, -0.2030, -0.6929, -0.0068]) tensor([0.6347, 0.1291, 0.0791, 0.1571]) -Greedy action tensor([ 1.0294, -0.5062, -0.4009, 0.1752]) tensor([0.5319, 0.1145, 0.1272, 0.2264]) -Greedy action tensor([ 1.6627, -0.4165, -0.3727, 0.6056]) tensor([0.6238, 0.0780, 0.0815, 0.2168]) -Greedy action tensor([ 1.2652, -0.2232, -0.5178, -0.0893]) tensor([0.6053, 0.1366, 0.1018, 0.1562]) -Greedy action tensor([ 1.7805, -0.4014, -0.6437, 0.6100]) tensor([0.6615, 0.0746, 0.0586, 0.2052]) -Greedy action tensor([ 1.6489, -0.4865, -0.8709, 0.1800]) tensor([0.6999, 0.0827, 0.0563, 0.1611]) -Greedy action tensor([2.2806, 0.3732, 0.1221, 0.2189]) tensor([0.7188, 0.1067, 0.0830, 0.0915]) -Greedy action tensor([ 1.4025, -0.0403, -1.1508, 0.1259]) tensor([0.6277, 0.1483, 0.0489, 0.1751]) -Greedy action tensor([ 1.4463, -0.5717, -0.0703, 0.2544]) tensor([0.6039, 0.0803, 0.1325, 0.1834]) -Greedy action tensor([ 0.9587, -0.1531, -0.3002, 0.0382]) tensor([0.4972, 0.1636, 0.1412, 0.1981]) -Greedy action tensor([ 1.5286, -0.0961, -0.5326, 0.5419]) tensor([0.5893, 0.1161, 0.0750, 0.2197]) -Greedy action tensor([ 1.6116, -0.5444, -0.2641, 0.1680]) tensor([0.6644, 0.0769, 0.1018, 0.1568]) -Greedy action tensor([ 0.9447, -0.0580, -0.3078, -0.0400]) tensor([0.4935, 0.1811, 0.1410, 0.1844]) -Greedy action tensor([1.0001, 0.1494, 0.0283, 0.1561]) tensor([0.4473, 0.1911, 0.1693, 0.1923]) -Greedy action tensor([ 1.1796, -0.3777, -0.3431, 0.2317]) tensor([0.5505, 0.1160, 0.1201, 0.2134]) -Greedy action tensor([ 1.2731, -0.2460, -1.0331, 0.1207]) tensor([0.6118, 0.1339, 0.0610, 0.1933]) -Greedy action tensor([ 1.2352, -0.2316, -0.5419, 0.1292]) tensor([0.5778, 0.1333, 0.0977, 0.1912]) -Greedy action tensor([ 1.3410, -0.6811, -0.5505, 0.6100]) tensor([0.5667, 0.0750, 0.0855, 0.2728]) -Greedy action tensor([ 1.3086, -0.7067, -0.5207, 0.6222]) tensor([0.5564, 0.0742, 0.0893, 0.2801]) -Greedy action tensor([ 2.0895, -0.7160, -0.4854, 0.5105]) tensor([0.7447, 0.0450, 0.0567, 0.1535]) -Greedy action tensor([ 1.2978, -0.2185, -0.4724, 0.3084]) tensor([0.5676, 0.1246, 0.0967, 0.2111]) -Greedy action tensor([ 1.0098, -0.4624, -0.1153, 0.1895]) tensor([0.5014, 0.1150, 0.1628, 0.2208]) -Greedy action tensor([ 1.5787, -0.5016, -0.6108, 0.2092]) tensor([0.6707, 0.0838, 0.0751, 0.1705]) -Greedy action tensor([ 1.0923, -0.3081, -0.8739, 0.3302]) tensor([0.5396, 0.1330, 0.0755, 0.2518]) -Greedy action tensor([ 1.7997, -0.1817, 0.0264, 0.5053]) tensor([0.6322, 0.0872, 0.1073, 0.1733]) -Greedy action tensor([ 1.3219, -0.4654, -0.2640, 0.5603]) tensor([0.5438, 0.0910, 0.1113, 0.2539]) -Greedy action tensor([-0.1806, -0.4956, 0.1402, -0.5070]) tensor([0.2611, 0.1906, 0.3599, 0.1884]) -Greedy action tensor([ 0.0046, -1.9063, -0.0187, 0.6299]) tensor([0.2504, 0.0370, 0.2446, 0.4679]) -Greedy action tensor([-0.7029, 0.2857, 0.0724, -0.2193]) tensor([0.1337, 0.3592, 0.2903, 0.2168]) -Greedy action tensor([ 0.1990, -0.1483, -0.2974, -1.2201]) tensor([0.3911, 0.2763, 0.2380, 0.0946]) -Greedy action tensor([ 0.0662, -1.5926, 0.2037, -0.8556]) tensor([0.3656, 0.0696, 0.4194, 0.1454]) -Greedy action tensor([ 0.4284, -0.6803, -0.6588, -0.6227]) tensor([0.4959, 0.1636, 0.1672, 0.1733]) -Greedy action tensor([-0.0864, -0.4964, -1.0014, -0.0870]) tensor([0.3264, 0.2166, 0.1307, 0.3262]) -Greedy action tensor([ 0.0525, -1.2339, -0.3823, -1.1971]) tensor([0.4524, 0.1250, 0.2929, 0.1297]) -Greedy action tensor([ 1.5069, -1.1760, 0.1015, 1.0905]) tensor([0.5068, 0.0347, 0.1243, 0.3342]) -Greedy action tensor([-0.1903, -0.5198, 0.1412, 0.0070]) tensor([0.2309, 0.1661, 0.3217, 0.2813]) -Greedy action tensor([-0.4858, -0.7792, -0.5040, 0.2139]) tensor([0.2109, 0.1573, 0.2071, 0.4246]) -Greedy action tensor([-0.6595, 0.0117, 0.2092, -0.7432]) tensor([0.1597, 0.3125, 0.3808, 0.1469]) -Greedy action tensor([-0.9790, -0.6891, -0.3023, -0.8984]) tensor([0.1856, 0.2480, 0.3652, 0.2012]) -Greedy action tensor([-0.7172, -0.4536, 0.3967, 0.1415]) tensor([0.1297, 0.1689, 0.3952, 0.3062]) -Greedy action tensor([ 0.7216, -0.7848, -0.1782, 0.6543]) tensor([0.3901, 0.0865, 0.1586, 0.3647]) -Greedy action tensor([-0.6244, -0.1373, 0.3527, 0.2091]) tensor([0.1318, 0.2146, 0.3502, 0.3034]) -Greedy action tensor([ 0.2425, -0.4256, -0.4094, -0.3114]) tensor([0.3834, 0.1965, 0.1998, 0.2203]) -Greedy action tensor([-0.1883, 0.1196, -0.4136, 0.1890]) tensor([0.2166, 0.2947, 0.1729, 0.3159]) -Greedy action tensor([-0.0912, -0.5137, -0.7374, 0.4396]) tensor([0.2578, 0.1689, 0.1351, 0.4383]) -Greedy action tensor([-0.2045, -0.4287, -0.3331, -0.2565]) tensor([0.2757, 0.2203, 0.2424, 0.2617]) -Greedy action tensor([ 1.4043, -1.1130, 1.6708, 0.4983]) tensor([0.3584, 0.0289, 0.4679, 0.1448]) -Greedy action tensor([-0.0525, -1.5499, 0.7411, -0.8677]) tensor([0.2579, 0.0577, 0.5703, 0.1141]) -Greedy action tensor([-0.3087, -0.0405, 0.1163, -0.5839]) tensor([0.2175, 0.2845, 0.3328, 0.1652]) -Greedy action tensor([ 0.2358, -0.6805, -0.4240, -0.2699]) tensor([0.3968, 0.1587, 0.2051, 0.2393]) -Greedy action tensor([-0.0131, 0.0096, 0.1122, -0.7801]) tensor([0.2762, 0.2825, 0.3130, 0.1283]) -Greedy action tensor([-0.0812, -1.0406, -0.5399, -0.6908]) tensor([0.3908, 0.1497, 0.2470, 0.2124]) -Greedy action tensor([-0.9153, -0.3363, 0.1375, -0.7578]) tensor([0.1466, 0.2616, 0.4202, 0.1716]) -Greedy action tensor([-0.1507, 0.3610, -0.0860, -0.5858]) tensor([0.2282, 0.3807, 0.2434, 0.1477]) -Greedy action tensor([ 0.0513, 0.6140, -0.0479, -0.6798]) tensor([0.2414, 0.4238, 0.2186, 0.1162]) -Greedy action tensor([-0.5491, -1.0942, -0.0104, -1.0397]) tensor([0.2560, 0.1484, 0.4388, 0.1568]) -Greedy action tensor([-1.1359, -1.1378, 0.1700, -1.2439]) tensor([0.1518, 0.1515, 0.5604, 0.1363]) -Greedy action tensor([ 0.7579, -0.5013, 0.7690, 0.2354]) tensor([0.3463, 0.0983, 0.3501, 0.2053]) -Greedy action tensor([ 0.1220, 0.0451, 0.1079, -0.8973]) tensor([0.3056, 0.2829, 0.3013, 0.1103]) -Greedy action tensor([ 0.5978, -0.4512, 0.2972, -0.4986]) tensor([0.4124, 0.1445, 0.3053, 0.1378]) -Greedy action tensor([ 0.3169, -0.8350, -0.4150, -0.0230]) tensor([0.3986, 0.1260, 0.1917, 0.2837]) -Greedy action tensor([ 0.5788, -1.3565, 0.9001, -0.5842]) tensor([0.3526, 0.0509, 0.4862, 0.1102]) -Greedy action tensor([-0.8919, -0.6518, -1.2316, 0.0358]) tensor([0.1814, 0.2307, 0.1292, 0.4588]) -Greedy action tensor([ 0.2873, 0.5585, 0.1169, -0.6970]) tensor([0.2834, 0.3717, 0.2390, 0.1059]) -Greedy action tensor([ 0.4685, -0.6929, 1.5924, -0.9542]) tensor([0.2159, 0.0676, 0.6644, 0.0521]) -Greedy action tensor([-0.2873, 0.2137, 0.3007, -0.5191]) tensor([0.1907, 0.3147, 0.3433, 0.1512]) -Greedy action tensor([ 1.1255, 0.5179, -0.3626, 0.1601]) tensor([0.4648, 0.2532, 0.1050, 0.1770]) -Greedy action tensor([ 0.2232, -0.2970, 0.6242, -0.6084]) tensor([0.2838, 0.1687, 0.4239, 0.1236]) -Greedy action tensor([ 0.7898, -0.7902, 0.5641, 0.3979]) tensor([0.3732, 0.0769, 0.2978, 0.2522]) -Greedy action tensor([ 0.0620, 0.0764, -0.0229, -0.6507]) tensor([0.2921, 0.2963, 0.2683, 0.1432]) -Greedy action tensor([ 1.0251, -1.2518, 0.1014, 0.8794]) tensor([0.4230, 0.0434, 0.1680, 0.3657]) -Greedy action tensor([-0.3480, 0.4207, 0.0169, -0.4590]) tensor([0.1821, 0.3927, 0.2623, 0.1629]) -Greedy action tensor([-1.2039, -0.4644, -0.4013, -0.1482]) tensor([0.1219, 0.2555, 0.2721, 0.3505]) -Greedy action tensor([ 0.1159, -0.7402, 0.1430, -0.5121]) tensor([0.3349, 0.1423, 0.3441, 0.1787]) -Greedy action tensor([-1.1688, -0.7801, 0.1868, -0.1838]) tensor([0.1107, 0.1633, 0.4295, 0.2965]) -Greedy action tensor([-1.0297, -0.3988, -0.4646, 0.4070]) tensor([0.1130, 0.2125, 0.1989, 0.4756]) -Greedy action tensor([ 0.3642, -0.3112, -0.8171, 1.0927]) tensor([0.2572, 0.1309, 0.0789, 0.5329]) -Greedy action tensor([-0.0927, 0.2751, -0.2435, -0.0298]) tensor([0.2289, 0.3306, 0.1968, 0.2437]) -Greedy action tensor([ 1.2266, 0.4607, -0.0065, 0.1988]) tensor([0.4730, 0.2199, 0.1378, 0.1692]) -Greedy action tensor([ 0.3684, -1.2515, -0.8470, -0.2359]) tensor([0.4900, 0.0970, 0.1453, 0.2678]) -Greedy action tensor([ 0.5012, -1.2162, 0.0716, -0.3384]) tensor([0.4421, 0.0794, 0.2877, 0.1909]) -Greedy action tensor([ 0.4181, 0.0456, 0.6847, -0.0294]) tensor([0.2752, 0.1896, 0.3593, 0.1759]) -Greedy action tensor([ 0.5445, -0.3995, 0.0525, -0.7498]) tensor([0.4396, 0.1711, 0.2688, 0.1205]) -Greedy action tensor([-0.5262, -0.1445, -0.1805, -0.6178]) tensor([0.2088, 0.3058, 0.2950, 0.1905]) -Greedy action tensor([-0.7768, -0.5531, 0.0910, -1.5068]) tensor([0.1955, 0.2446, 0.4657, 0.0942]) -Greedy action tensor([ 0.3304, 0.2184, -0.9345, 0.0801]) tensor([0.3384, 0.3026, 0.0955, 0.2635]) -Greedy action tensor([-0.8440, -1.3420, 0.3371, -0.2838]) tensor([0.1511, 0.0918, 0.4924, 0.2646]) -Greedy action tensor([-0.0523, -0.4626, -0.9634, -0.3986]) tensor([0.3606, 0.2393, 0.1450, 0.2551]) -Greedy action tensor([-0.9719, -1.1349, 0.2757, -0.7551]) tensor([0.1521, 0.1292, 0.5297, 0.1890]) -Greedy action tensor([ 0.0186, -0.2339, -0.0074, -0.8222]) tensor([0.3142, 0.2441, 0.3062, 0.1355]) -Greedy action tensor([-0.8152, -0.2486, -0.0312, -0.5585]) tensor([0.1601, 0.2822, 0.3507, 0.2070]) -Greedy action tensor([-0.2066, 0.6299, 0.3104, -0.6503]) tensor([0.1777, 0.4102, 0.2980, 0.1140]) -Greedy action tensor([ 0.7990, -0.0709, -0.3343, -0.5234]) tensor([0.4981, 0.2087, 0.1604, 0.1328]) -Greedy action tensor([ 0.4708, 0.5115, 0.7137, -0.7630]) tensor([0.2772, 0.2887, 0.3534, 0.0807]) -Greedy action tensor([-0.3520, -0.3643, 0.6925, -0.2510]) tensor([0.1685, 0.1664, 0.4788, 0.1864]) -Greedy action tensor([-0.5147, -0.6476, -0.4833, -0.0345]) tensor([0.2210, 0.1935, 0.2281, 0.3573]) -Greedy action tensor([-0.3070, -1.0134, -0.1305, -0.0097]) tensor([0.2480, 0.1224, 0.2958, 0.3338]) -Greedy action tensor([-0.3450, 0.1286, 0.4423, -0.6402]) tensor([0.1803, 0.2895, 0.3961, 0.1342]) -Greedy action tensor([-0.6126, -0.0909, -0.1703, -0.5625]) tensor([0.1890, 0.3184, 0.2940, 0.1986]) -Greedy action tensor([ 0.0203, -0.3823, -0.0737, -0.2617]) tensor([0.3000, 0.2006, 0.2731, 0.2263]) -Greedy action tensor([ 0.2452, -0.0087, -0.2543, 0.0339]) tensor([0.3133, 0.2430, 0.1901, 0.2536]) -Greedy action tensor([ 1.5817, -0.0977, 0.6206, 0.7642]) tensor([0.4974, 0.0928, 0.1902, 0.2196]) -Greedy action tensor([-0.9683, -0.0369, 0.1891, -1.5142]) tensor([0.1370, 0.3477, 0.4359, 0.0794]) -Greedy action tensor([-0.1388, -0.4990, 0.2245, -0.8243]) tensor([0.2748, 0.1917, 0.3951, 0.1384]) -Greedy action tensor([-0.5299, -0.8013, -0.5676, -0.6076]) tensor([0.2739, 0.2088, 0.2638, 0.2535]) -Greedy action tensor([-0.4119, -1.2656, 0.0600, -0.4647]) tensor([0.2514, 0.1071, 0.4030, 0.2385]) -Greedy action tensor([ 0.3783, 0.1151, -0.2266, -0.4480]) tensor([0.3633, 0.2793, 0.1984, 0.1590]) -Greedy action tensor([-1.9445, -0.4553, 0.6698, -0.1799]) tensor([0.0401, 0.1778, 0.5478, 0.2342]) -Greedy action tensor([-0.8566, 0.1305, 0.5548, 0.2744]) tensor([0.0919, 0.2465, 0.3769, 0.2847]) -Greedy action tensor([-1.7540e+00, -3.5959e-01, 6.8201e-01, 9.2208e-05]) tensor([0.0450, 0.1813, 0.5139, 0.2598]) -Greedy action tensor([-1.2183, 0.7722, 0.1759, 0.1886]) tensor([0.0609, 0.4454, 0.2453, 0.2484]) -Greedy action tensor([-1.8651, -0.4388, 0.6280, -0.1479]) tensor([0.0438, 0.1823, 0.5299, 0.2439]) -Greedy action tensor([-1.9144, -0.3765, 0.6413, -0.1613]) tensor([0.0411, 0.1915, 0.5299, 0.2375]) -Greedy action tensor([-1.9437, -0.4494, 0.6673, -0.1790]) tensor([0.0401, 0.1789, 0.5465, 0.2344]) -Greedy action tensor([0.1359, 0.2680, 0.5981, 1.2309]) tensor([0.1489, 0.1699, 0.2363, 0.4450]) -Greedy action tensor([-1.9136, -0.4172, 0.6476, -0.1671]) tensor([0.0414, 0.1849, 0.5363, 0.2374]) -Greedy action tensor([-1.9060, -0.4549, 0.6510, -0.1639]) tensor([0.0419, 0.1788, 0.5402, 0.2391]) -Greedy action tensor([-1.7850, -0.1600, 0.5548, -0.1216]) tensor([0.0460, 0.2337, 0.4775, 0.2428]) -Greedy action tensor([-1.8333, -0.0921, 0.5616, -0.1122]) tensor([0.0430, 0.2452, 0.4715, 0.2403]) -Greedy action tensor([-1.7684, -0.4422, 0.5706, -0.0859]) tensor([0.0487, 0.1836, 0.5055, 0.2622]) -Greedy action tensor([-1.9315, -0.4205, 0.6580, -0.1739]) tensor([0.0406, 0.1838, 0.5404, 0.2352]) -Greedy action tensor([-1.9386, -0.4436, 0.6645, -0.1772]) tensor([0.0403, 0.1799, 0.5449, 0.2348]) -Greedy action tensor([-1.8953, -0.4546, 0.6489, -0.1544]) tensor([0.0423, 0.1785, 0.5382, 0.2410]) -Greedy action tensor([-1.8018, -0.3068, 0.5487, -0.1105]) tensor([0.0468, 0.2086, 0.4907, 0.2539]) -Greedy action tensor([-0.7284, 0.5483, -1.3062, -0.8159]) tensor([0.1650, 0.5913, 0.0926, 0.1511]) -Greedy action tensor([-1.5044, 0.6547, 0.4376, -0.6437]) tensor([0.0526, 0.4559, 0.3670, 0.1245]) -Greedy action tensor([-1.9146, -0.3984, 0.6376, -0.1584]) tensor([0.0414, 0.1884, 0.5308, 0.2395]) -Greedy action tensor([-1.2489, 0.1417, 0.2637, 0.0183]) tensor([0.0763, 0.3065, 0.3463, 0.2709]) -Greedy action tensor([-1.7330, -0.4774, 0.5650, -0.1101]) tensor([0.0512, 0.1797, 0.5096, 0.2595]) -Greedy action tensor([-1.8539, -0.3387, 0.6253, -0.1275]) tensor([0.0433, 0.1970, 0.5165, 0.2433]) -Greedy action tensor([-1.8071, -0.4602, 0.5934, -0.1042]) tensor([0.0468, 0.1800, 0.5162, 0.2570]) -Greedy action tensor([ 0.3100, 1.0901, -0.0339, 0.4912]) tensor([0.1965, 0.4287, 0.1393, 0.2355]) -Greedy action tensor([-1.8749, -0.2665, 0.6153, -0.1401]) tensor([0.0421, 0.2105, 0.5085, 0.2389]) -Greedy action tensor([-1.9259, -0.4299, 0.6557, -0.1701]) tensor([0.0409, 0.1824, 0.5402, 0.2365]) -Greedy action tensor([-1.9239, -0.4727, 0.6708, -0.1712]) tensor([0.0409, 0.1747, 0.5482, 0.2362]) -Greedy action tensor([-1.8822, -0.4317, 0.6331, -0.1484]) tensor([0.0429, 0.1831, 0.5310, 0.2430]) -Greedy action tensor([-1.6351, -0.4237, 0.6385, 0.1777]) tensor([0.0495, 0.1663, 0.4809, 0.3033]) -Greedy action tensor([-1.7334, -0.3396, 0.6464, 0.0951]) tensor([0.0453, 0.1827, 0.4897, 0.2822]) -Greedy action tensor([-1.7778, -0.5089, 0.6748, -0.0532]) tensor([0.0459, 0.1633, 0.5333, 0.2575]) -Greedy action tensor([-1.6295, -0.2503, 0.4808, -0.0603]) tensor([0.0555, 0.2203, 0.4577, 0.2664]) -Greedy action tensor([-1.0150, 0.3661, 0.1830, -0.0389]) tensor([0.0914, 0.3635, 0.3027, 0.2425]) -Greedy action tensor([-0.6671, -0.0570, -0.9832, -0.1621]) tensor([0.1913, 0.3522, 0.1395, 0.3170]) -Greedy action tensor([-1.8746, -0.3903, 0.6475, -0.1394]) tensor([0.0425, 0.1875, 0.5291, 0.2409]) -Greedy action tensor([-1.5887, 0.3041, 0.4108, 0.0269]) tensor([0.0499, 0.3310, 0.3683, 0.2509]) -Greedy action tensor([-1.5875, -0.4251, 0.5029, -0.1205]) tensor([0.0602, 0.1924, 0.4866, 0.2609]) -Greedy action tensor([ 0.4341, 1.1502, -0.0295, 0.5137]) tensor([0.2102, 0.4301, 0.1322, 0.2276]) -Greedy action tensor([-1.8962, -0.4112, 0.6471, -0.1581]) tensor([0.0420, 0.1853, 0.5340, 0.2387]) -Greedy action tensor([-1.6930, -0.1675, 0.5623, -0.0024]) tensor([0.0486, 0.2236, 0.4640, 0.2638]) -Greedy action tensor([-1.5229, -0.5806, 0.5315, -0.1049]) tensor([0.0645, 0.1656, 0.5035, 0.2664]) -Greedy action tensor([-0.8484, -0.2981, 0.3737, 0.5804]) tensor([0.0971, 0.1683, 0.3295, 0.4052]) -Greedy action tensor([-1.8072, -0.4776, 0.6001, -0.0817]) tensor([0.0465, 0.1758, 0.5165, 0.2612]) -Greedy action tensor([-1.9108, -0.4142, 0.6555, -0.1672]) tensor([0.0413, 0.1845, 0.5379, 0.2363]) -Greedy action tensor([-1.4631, -0.5671, 0.4436, 0.2070]) tensor([0.0645, 0.1581, 0.4344, 0.3429]) -Greedy action tensor([-1.2979, 0.7656, 0.2384, 0.1764]) tensor([0.0559, 0.4401, 0.2598, 0.2442]) -Greedy action tensor([-1.8407, -0.4510, 0.6239, -0.1260]) tensor([0.0448, 0.1798, 0.5267, 0.2488]) -Greedy action tensor([-0.9780, 0.8073, 0.2392, 0.2773]) tensor([0.0722, 0.4305, 0.2439, 0.2534]) -Greedy action tensor([-0.6763, 0.5922, 0.0603, 0.1338]) tensor([0.1125, 0.3998, 0.2349, 0.2528]) -Greedy action tensor([-1.3592, 0.7113, 0.2827, 0.0939]) tensor([0.0544, 0.4316, 0.2812, 0.2328]) -Greedy action tensor([-1.8488, -0.4339, 0.6436, -0.0928]) tensor([0.0435, 0.1790, 0.5258, 0.2518]) -Greedy action tensor([-1.8967, -0.4497, 0.6474, -0.1536]) tensor([0.0422, 0.1794, 0.5373, 0.2412]) -Greedy action tensor([-1.8789, -0.4286, 0.6347, -0.1483]) tensor([0.0430, 0.1834, 0.5310, 0.2427]) -Greedy action tensor([-1.6065, -0.4805, 0.5283, -0.1399]) tensor([0.0593, 0.1827, 0.5011, 0.2569]) -Greedy action tensor([-1.1807, 0.5097, 0.2159, 0.0575]) tensor([0.0719, 0.3897, 0.2905, 0.2479]) -Greedy action tensor([-1.8343, -0.3785, 0.6178, -0.1137]) tensor([0.0445, 0.1907, 0.5164, 0.2485]) -Greedy action tensor([-1.6113, 0.4678, 0.4117, 0.0123]) tensor([0.0462, 0.3697, 0.3496, 0.2345]) -Greedy action tensor([-1.8394, -0.3416, 0.5999, -0.1374]) tensor([0.0446, 0.1994, 0.5113, 0.2446]) -Greedy action tensor([-1.9134, -0.4329, 0.6513, -0.1680]) tensor([0.0415, 0.1822, 0.5388, 0.2375]) -Greedy action tensor([-1.8035, -0.0260, 0.5504, -0.0815]) tensor([0.0434, 0.2568, 0.4569, 0.2429]) -Greedy action tensor([-0.7021, 0.9865, 0.0627, 0.2807]) tensor([0.0890, 0.4818, 0.1913, 0.2379]) -Greedy action tensor([-1.8818, -0.4007, 0.1405, -0.3803]) tensor([0.0573, 0.2521, 0.4332, 0.2573]) -Greedy action tensor([-1.3393, 0.0025, 0.4498, 0.6267]) tensor([0.0557, 0.2131, 0.3333, 0.3979]) -Greedy action tensor([-1.9150, -0.4465, 0.6538, -0.1648]) tensor([0.0414, 0.1798, 0.5404, 0.2383]) -Greedy action tensor([-1.2449, -0.5854, 0.4169, 0.1011]) tensor([0.0830, 0.1606, 0.4374, 0.3190]) -Greedy action tensor([-1.9273, -0.3918, 0.6532, -0.1671]) tensor([0.0406, 0.1883, 0.5354, 0.2357]) -Greedy action tensor([-1.6167, 0.3343, 0.4023, -0.0648]) tensor([0.0493, 0.3468, 0.3712, 0.2327]) -Greedy action tensor([-1.2590, -0.5174, 0.4025, -0.0988]) tensor([0.0865, 0.1817, 0.4558, 0.2761]) -Greedy action tensor([-0.5877, 0.8108, -0.4342, -0.2358]) tensor([0.1309, 0.5302, 0.1527, 0.1862]) -Greedy action tensor([-1.5317, 0.4783, 0.3510, 0.0150]) tensor([0.0507, 0.3783, 0.3330, 0.2380]) -Greedy action tensor([-1.8792, -0.4566, 0.6386, -0.1515]) tensor([0.0431, 0.1790, 0.5351, 0.2428]) -Greedy action tensor([-1.7236, 0.3339, 0.5082, -0.2324]) tensor([0.0443, 0.3465, 0.4125, 0.1967]) -Greedy action tensor([-1.8870, -0.4089, 0.6549, -0.1373]) tensor([0.0419, 0.1839, 0.5329, 0.2413]) -Greedy action tensor([-1.9438, -0.4443, 0.6645, -0.1795]) tensor([0.0402, 0.1800, 0.5454, 0.2345]) -Greedy action tensor([-1.9434, -0.5377, -0.1766, -0.4094]) tensor([0.0642, 0.2620, 0.3759, 0.2979]) -Greedy action tensor([-1.7141, -0.4139, 0.6402, 0.0942]) tensor([0.0469, 0.1723, 0.4944, 0.2864]) -Greedy action tensor([-1.9113, -0.3761, 0.6366, -0.1651]) tensor([0.0414, 0.1922, 0.5291, 0.2373]) -Greedy action tensor([-1.8629, -0.4435, 0.6304, -0.1335]) tensor([0.0437, 0.1808, 0.5291, 0.2465]) -Greedy action tensor([-1.2945, -0.2477, 0.3665, -0.0452]) tensor([0.0794, 0.2261, 0.4178, 0.2768]) -Greedy action tensor([-1.6709, -0.4479, 0.5015, -0.0061]) tensor([0.0542, 0.1840, 0.4756, 0.2863]) -Greedy action tensor([ 1.0797, -0.1596, -0.6271, -0.0083]) tensor([0.5531, 0.1602, 0.1004, 0.1863]) -Greedy action tensor([ 1.5849, -0.8634, -0.6744, 0.2765]) tensor([0.6844, 0.0592, 0.0715, 0.1850]) -Greedy action tensor([ 1.2473, -0.3852, -0.8096, 0.2783]) tensor([0.5873, 0.1148, 0.0751, 0.2228]) -Greedy action tensor([ 1.5953, -0.5924, -0.7602, 0.6205]) tensor([0.6312, 0.0708, 0.0599, 0.2381]) -Greedy action tensor([ 2.1904, -0.6214, -0.3033, 0.7020]) tensor([0.7308, 0.0439, 0.0604, 0.1650]) -Greedy action tensor([ 1.0769, -0.3955, -0.2143, 0.2745]) tensor([0.5121, 0.1175, 0.1408, 0.2296]) -Greedy action tensor([ 1.4504, -0.2418, -0.5813, 0.4012]) tensor([0.6004, 0.1105, 0.0787, 0.2103]) -Greedy action tensor([ 2.0881, -1.3277, -0.2495, 0.8798]) tensor([0.7002, 0.0230, 0.0676, 0.2092]) -Greedy action tensor([ 1.0752, -0.4713, 0.1219, 0.1141]) tensor([0.5048, 0.1075, 0.1946, 0.1931]) -Greedy action tensor([ 1.1253, -0.2023, -0.3519, 0.3877]) tensor([0.5072, 0.1345, 0.1158, 0.2426]) -Greedy action tensor([ 1.2732, -0.1716, 0.0215, 0.2718]) tensor([0.5293, 0.1248, 0.1514, 0.1945]) -Greedy action tensor([ 1.5872, -0.0223, -0.7084, -0.1441]) tensor([0.6767, 0.1353, 0.0681, 0.1198]) -Greedy action tensor([ 1.4245, -0.4567, -0.4263, 0.7772]) tensor([0.5456, 0.0831, 0.0857, 0.2856]) -Greedy action tensor([ 1.1763, -0.6811, -0.5665, 0.3655]) tensor([0.5632, 0.0879, 0.0986, 0.2503]) -Greedy action tensor([ 1.2425, -0.3190, -0.3263, 0.1280]) tensor([0.5727, 0.1202, 0.1193, 0.1879]) -Greedy action tensor([ 1.0909, -0.3745, -0.1305, 0.3229]) tensor([0.5026, 0.1161, 0.1482, 0.2332]) -Greedy action tensor([ 1.3050, -0.1993, -0.9871, 0.1315]) tensor([0.6126, 0.1361, 0.0619, 0.1895]) -Greedy action tensor([ 1.0273, -0.3179, -0.4091, 0.2080]) tensor([0.5157, 0.1343, 0.1226, 0.2273]) -Greedy action tensor([ 1.3354, -0.2857, -0.9024, 0.3689]) tensor([0.5935, 0.1173, 0.0633, 0.2258]) -Greedy action tensor([ 1.5316, -0.5513, -0.1532, 0.1009]) tensor([0.6455, 0.0804, 0.1197, 0.1544]) -Greedy action tensor([ 1.6810, -0.6059, -0.2705, 0.1295]) tensor([0.6870, 0.0698, 0.0976, 0.1456]) -Greedy action tensor([ 1.7260, -0.9146, -0.4434, 0.3190]) tensor([0.6991, 0.0499, 0.0799, 0.1712]) -Greedy action tensor([ 1.3041, -0.2162, -0.8398, 0.5479]) tensor([0.5539, 0.1211, 0.0649, 0.2600]) -Greedy action tensor([ 1.5434, -0.6452, -0.2615, 0.3627]) tensor([0.6315, 0.0708, 0.1039, 0.1939]) -Greedy action tensor([ 1.6828, -1.1127, -0.3905, 0.4861]) tensor([0.6716, 0.0410, 0.0845, 0.2029]) -Greedy action tensor([ 0.8787, -0.1636, -0.5504, 0.1674]) tensor([0.4800, 0.1693, 0.1150, 0.2357]) -Greedy action tensor([ 1.9905, -0.6501, -0.4517, 0.5136]) tensor([0.7212, 0.0514, 0.0627, 0.1647]) -Greedy action tensor([ 1.2614, -0.2225, -0.5447, 0.1923]) tensor([0.5766, 0.1307, 0.0947, 0.1980]) -Greedy action tensor([ 1.6021, -0.9483, -0.5479, 0.2048]) tensor([0.6936, 0.0541, 0.0808, 0.1715]) -Greedy action tensor([ 1.7838, -0.2018, -0.7127, 0.0449]) tensor([0.7167, 0.0984, 0.0590, 0.1259]) -Greedy action tensor([ 1.9405, -1.4611, -0.2376, 1.1079]) tensor([0.6323, 0.0211, 0.0716, 0.2750]) -Greedy action tensor([ 1.3616, -0.7966, -0.1243, 0.3779]) tensor([0.5828, 0.0673, 0.1319, 0.2179]) -Greedy action tensor([ 1.3254, -0.0626, -0.9538, 0.1929]) tensor([0.5973, 0.1491, 0.0611, 0.1925]) -Greedy action tensor([ 1.8958, -0.3524, -0.4706, 0.2469]) tensor([0.7186, 0.0759, 0.0674, 0.1382]) -Greedy action tensor([ 1.4480, -0.1166, -0.4440, -0.2118]) tensor([0.6451, 0.1349, 0.0973, 0.1227]) -Greedy action tensor([ 1.1959, -0.3712, -0.3393, 0.4498]) tensor([0.5268, 0.1099, 0.1135, 0.2498]) -Greedy action tensor([ 1.1301, -0.5950, -0.1566, 0.3991]) tensor([0.5166, 0.0920, 0.1427, 0.2487]) -Greedy action tensor([ 1.6759, -0.8172, -0.3996, 0.8430]) tensor([0.6087, 0.0503, 0.0764, 0.2646]) -Greedy action tensor([ 1.3815, -0.3846, -0.3379, 0.6587]) tensor([0.5448, 0.0932, 0.0976, 0.2644]) -Greedy action tensor([ 1.2890, -0.6263, -0.3396, -0.0174]) tensor([0.6195, 0.0912, 0.1215, 0.1677]) -Greedy action tensor([ 1.9152, -0.5978, -0.3459, 0.2771]) tensor([0.7248, 0.0587, 0.0756, 0.1409]) -Greedy action tensor([ 1.5991, -0.3354, -0.8870, 0.5184]) tensor([0.6381, 0.0922, 0.0531, 0.2166]) -Greedy action tensor([ 1.3896, -0.5311, -0.2799, 0.1173]) tensor([0.6192, 0.0907, 0.1166, 0.1735]) -Greedy action tensor([ 2.0138, -0.8813, -0.5227, 0.3702]) tensor([0.7532, 0.0416, 0.0596, 0.1456]) -Greedy action tensor([ 1.7032, -0.4429, -0.4851, 0.4071]) tensor([0.6655, 0.0778, 0.0746, 0.1821]) -Greedy action tensor([ 1.3749, -0.7370, -0.0850, 0.5639]) tensor([0.5563, 0.0673, 0.1292, 0.2472]) -Greedy action tensor([ 1.4953e+00, -3.0230e-01, -7.3236e-01, -1.5977e-04]) tensor([0.6677, 0.1106, 0.0720, 0.1497]) -Greedy action tensor([ 1.9120, 0.0665, -0.7843, 0.7513]) tensor([0.6499, 0.1026, 0.0438, 0.2036]) -Greedy action tensor([ 1.4381, 0.4377, -0.5149, 0.2623]) tensor([0.5500, 0.2023, 0.0780, 0.1697]) -Greedy action tensor([ 1.3986, -0.5255, -0.4928, 0.6236]) tensor([0.5690, 0.0831, 0.0858, 0.2621]) -Greedy action tensor([ 1.5371, -0.1403, -0.8054, 0.1809]) tensor([0.6491, 0.1213, 0.0624, 0.1672]) -Greedy action tensor([ 1.5956, -0.5903, -0.6431, 0.8260]) tensor([0.5945, 0.0668, 0.0634, 0.2754]) -Greedy action tensor([ 1.5308, -0.3802, -0.5588, -0.1042]) tensor([0.6818, 0.1009, 0.0844, 0.1329]) -Greedy action tensor([ 1.5470, -0.1190, -1.1721, 0.6495]) tensor([0.6015, 0.1137, 0.0397, 0.2452]) -Greedy action tensor([ 1.6182, -0.4093, -1.2608, 0.3096]) tensor([0.6858, 0.0903, 0.0385, 0.1853]) -Greedy action tensor([ 2.0380, -1.1705, 0.0562, 0.4831]) tensor([0.7197, 0.0291, 0.0992, 0.1520]) -Greedy action tensor([ 1.6202, -0.2780, -0.4367, 0.1679]) tensor([0.6615, 0.0991, 0.0846, 0.1548]) -Greedy action tensor([ 1.3027, -0.5231, -0.4412, 0.1351]) tensor([0.6072, 0.0978, 0.1061, 0.1889]) -Greedy action tensor([ 1.4985, -0.6649, -0.6151, 0.2562]) tensor([0.6560, 0.0754, 0.0792, 0.1894]) -Greedy action tensor([ 1.4927, -0.5975, -0.1322, 0.1761]) tensor([0.6295, 0.0778, 0.1240, 0.1687]) -Greedy action tensor([ 1.0989, -0.5635, -0.2064, 0.4148]) tensor([0.5088, 0.0965, 0.1379, 0.2567]) -Greedy action tensor([ 1.4994, -0.2492, -0.2530, 0.7097]) tensor([0.5551, 0.0966, 0.0962, 0.2520]) -Greedy action tensor([ 1.8190, -0.6311, -0.1562, 0.1181]) tensor([0.7105, 0.0613, 0.0986, 0.1297]) -Greedy action tensor([ 2.5401, -1.1450, -0.1359, 0.7117]) tensor([0.7971, 0.0200, 0.0549, 0.1281]) -Greedy action tensor([ 1.3946, -0.0823, -0.0717, 0.2445]) tensor([0.5632, 0.1286, 0.1300, 0.1783]) -Greedy action tensor([ 1.5779, 0.0046, -0.9904, 0.2283]) tensor([0.6479, 0.1344, 0.0497, 0.1680]) -Greedy action tensor([ 1.8857, -0.8931, 0.0162, 0.6959]) tensor([0.6576, 0.0408, 0.1014, 0.2001]) -Greedy action tensor([ 2.5044, -0.9812, -0.0775, 0.9302]) tensor([0.7614, 0.0233, 0.0576, 0.1577]) -Greedy action tensor([ 0.9682, -0.5918, -1.0358, 0.5855]) tensor([0.4934, 0.1037, 0.0665, 0.3365]) -Greedy action tensor([ 1.1640, -0.3140, -0.3890, 0.4279]) tensor([0.5212, 0.1189, 0.1103, 0.2496]) -Greedy action tensor([ 1.3615, -0.5230, -0.3142, 0.1930]) tensor([0.6061, 0.0921, 0.1134, 0.1884]) -Greedy action tensor([ 1.1564, -0.1818, -0.5613, 0.3378]) tensor([0.5311, 0.1393, 0.0953, 0.2342]) -Greedy action tensor([ 1.8903, -0.2080, -1.3072, 0.3490]) tensor([0.7259, 0.0890, 0.0297, 0.1554]) -Greedy action tensor([ 1.7395, -0.9516, -0.1850, 0.4109]) tensor([0.6763, 0.0459, 0.0987, 0.1791]) -Greedy action tensor([ 2.0751, -1.1704, 0.3417, 1.1259]) tensor([0.6239, 0.0243, 0.1102, 0.2415]) -Greedy action tensor([ 2.0715, -0.6679, -0.3970, 0.3488]) tensor([0.7531, 0.0487, 0.0638, 0.1345]) -Greedy action tensor([ 1.4660, -0.3898, -0.9651, -0.3531]) tensor([0.7110, 0.1112, 0.0625, 0.1153]) -Greedy action tensor([ 1.6665, -0.9932, -0.4320, 0.3554]) tensor([0.6839, 0.0479, 0.0839, 0.1843]) -Greedy action tensor([ 2.0174, -0.5579, -0.3681, 0.6178]) tensor([0.7068, 0.0538, 0.0651, 0.1744]) -Greedy action tensor([ 2.1012, -1.1608, -0.5501, 0.4392]) tensor([0.7700, 0.0295, 0.0543, 0.1461]) -Greedy action tensor([ 1.8771, -0.1325, -0.7164, 0.6960]) tensor([0.6597, 0.0884, 0.0493, 0.2025]) -Greedy action tensor([ 0.9221, -0.5708, -0.1137, -0.4277]) tensor([0.5438, 0.1222, 0.1930, 0.1410]) -Greedy action tensor([ 0.6776, -0.5629, -0.1800, -0.3482]) tensor([0.4826, 0.1396, 0.2047, 0.1730]) -Greedy action tensor([ 1.1390, -0.7854, 0.2389, -0.5471]) tensor([0.5755, 0.0840, 0.2339, 0.1066]) -Greedy action tensor([ 0.7026, -0.5609, -0.0815, -0.1512]) tensor([0.4619, 0.1306, 0.2109, 0.1967]) -Greedy action tensor([ 0.8109, -0.8538, 0.1130, -0.3108]) tensor([0.4969, 0.0940, 0.2473, 0.1619]) -Greedy action tensor([ 0.4121, -0.1104, 0.0079, -0.2801]) tensor([0.3622, 0.2148, 0.2418, 0.1813]) -Greedy action tensor([ 1.1309, -0.6756, -0.0989, -0.5022]) tensor([0.6054, 0.0994, 0.1770, 0.1182]) -Greedy action tensor([ 1.1538, -0.9740, 0.0815, -0.5502]) tensor([0.6085, 0.0725, 0.2083, 0.1107]) -Greedy action tensor([ 1.1531, -0.6632, -0.0886, -0.3078]) tensor([0.5940, 0.0966, 0.1716, 0.1378]) -Greedy action tensor([ 0.9387, -0.4189, -0.2353, -0.4174]) tensor([0.5482, 0.1410, 0.1695, 0.1413]) -Greedy action tensor([ 0.3565, 0.1828, -0.2036, 0.1191]) tensor([0.3125, 0.2626, 0.1785, 0.2464]) -Greedy action tensor([ 0.8000, -0.4030, 0.1465, -0.3768]) tensor([0.4698, 0.1411, 0.2444, 0.1448]) -Greedy action tensor([ 1.1426, -0.3559, -0.0314, -0.4282]) tensor([0.5746, 0.1284, 0.1776, 0.1194]) -Greedy action tensor([ 0.6230, 0.1003, -0.0651, 0.0653]) tensor([0.3748, 0.2222, 0.1884, 0.2146]) -Greedy action tensor([ 0.7782, -0.8632, -0.2041, -0.1163]) tensor([0.5058, 0.0980, 0.1894, 0.2068]) -Greedy action tensor([ 0.6055, -0.3057, -0.0407, -0.2036]) tensor([0.4217, 0.1695, 0.2210, 0.1878]) -Greedy action tensor([ 0.7684, -0.0118, 0.0551, 0.0255]) tensor([0.4125, 0.1891, 0.2021, 0.1962]) -Greedy action tensor([ 0.9027, -0.5308, -0.1457, -0.3767]) tensor([0.5356, 0.1277, 0.1877, 0.1490]) -Greedy action tensor([ 1.0691, -0.6881, -0.0753, -0.4584]) tensor([0.5855, 0.1010, 0.1864, 0.1271]) -Greedy action tensor([ 0.7113, -0.4704, -0.0340, -0.4108]) tensor([0.4746, 0.1456, 0.2253, 0.1545]) -Greedy action tensor([ 1.0739, -0.4529, 0.0329, -0.6118]) tensor([0.5696, 0.1237, 0.2011, 0.1056]) -Greedy action tensor([ 0.6769, -0.2484, -0.0886, -0.1711]) tensor([0.4367, 0.1731, 0.2031, 0.1870]) -Greedy action tensor([ 0.9511, -0.4589, -0.0769, -0.3313]) tensor([0.5321, 0.1299, 0.1904, 0.1476]) -Greedy action tensor([ 0.5874, -0.2000, -0.0173, -0.0242]) tensor([0.3931, 0.1789, 0.2147, 0.2133]) -Greedy action tensor([ 0.6141, -0.1237, 0.1260, -0.0573]) tensor([0.3842, 0.1837, 0.2358, 0.1963]) -Greedy action tensor([ 0.6391, 0.1754, -0.0815, 0.1267]) tensor([0.3684, 0.2317, 0.1792, 0.2207]) -Greedy action tensor([ 0.7694, -0.1270, 0.0385, -0.4447]) tensor([0.4574, 0.1866, 0.2202, 0.1358]) -Greedy action tensor([ 0.8259, -0.5005, -0.0096, -0.4464]) tensor([0.5052, 0.1341, 0.2191, 0.1416]) -Greedy action tensor([ 0.7363, -0.3855, 0.1738, -0.3143]) tensor([0.4454, 0.1451, 0.2538, 0.1558]) -Greedy action tensor([ 0.7810, -0.5869, 0.4076, -0.5798]) tensor([0.4546, 0.1158, 0.3130, 0.1166]) -Greedy action tensor([ 1.1117, -0.7346, 0.0566, -0.4722]) tensor([0.5844, 0.0922, 0.2035, 0.1199]) -Greedy action tensor([ 1.2210, -0.9986, -0.2810, -0.4739]) tensor([0.6601, 0.0717, 0.1470, 0.1212]) -Greedy action tensor([ 1.1928, -0.5827, -0.0232, -0.2943]) tensor([0.5911, 0.1001, 0.1752, 0.1336]) -Greedy action tensor([ 0.5710, -0.0546, -0.1607, -0.0097]) tensor([0.3883, 0.2077, 0.1868, 0.2172]) -Greedy action tensor([ 0.9661, -0.2560, -0.0707, -0.3881]) tensor([0.5243, 0.1545, 0.1859, 0.1353]) -Greedy action tensor([ 0.6773, -0.4152, 0.0297, -0.3674]) tensor([0.4524, 0.1517, 0.2367, 0.1592]) -Greedy action tensor([ 0.4763, -0.4457, -0.0260, -0.1677]) tensor([0.3956, 0.1573, 0.2394, 0.2078]) -Greedy action tensor([ 0.9161, -0.7456, 0.1537, -0.4444]) tensor([0.5228, 0.0992, 0.2439, 0.1341]) -Greedy action tensor([ 0.3601, -0.2259, -0.0955, -0.1880]) tensor([0.3612, 0.2010, 0.2290, 0.2088]) -Greedy action tensor([ 0.0921, 0.1133, -0.1617, -0.0150]) tensor([0.2706, 0.2764, 0.2099, 0.2431]) -Greedy action tensor([ 0.9644, -0.5751, -0.1623, -0.2541]) tensor([0.5452, 0.1169, 0.1767, 0.1612]) -Greedy action tensor([ 0.6811, -0.2977, -0.0145, -0.3159]) tensor([0.4457, 0.1675, 0.2223, 0.1645]) -Greedy action tensor([ 0.7445, -0.4693, -0.1543, -0.3511]) tensor([0.4905, 0.1457, 0.1997, 0.1640]) -Greedy action tensor([ 0.8593, -0.6568, -0.0586, -0.3567]) tensor([0.5221, 0.1146, 0.2085, 0.1548]) -Greedy action tensor([ 1.3090, -0.8178, 0.0059, -0.5747]) tensor([0.6481, 0.0773, 0.1761, 0.0985]) -Greedy action tensor([ 0.5649, 0.0919, -0.1568, -0.2632]) tensor([0.3928, 0.2448, 0.1909, 0.1716]) -Greedy action tensor([ 1.1425, -0.4161, 0.0304, -0.6501]) tensor([0.5862, 0.1234, 0.1928, 0.0976]) -Greedy action tensor([ 0.4826, -0.2499, 0.0084, -0.3326]) tensor([0.3928, 0.1888, 0.2445, 0.1738]) -Greedy action tensor([ 0.7382, -0.4687, 0.0655, -0.4891]) tensor([0.4756, 0.1423, 0.2427, 0.1394]) -Greedy action tensor([ 0.8510, -0.5948, -0.0047, -0.3777]) tensor([0.5120, 0.1206, 0.2176, 0.1498]) -Greedy action tensor([ 1.2338, -0.9063, -0.0181, -0.8348]) tensor([0.6536, 0.0769, 0.1869, 0.0826]) -Greedy action tensor([ 0.6339, -0.4499, -0.1234, -0.3323]) tensor([0.4571, 0.1546, 0.2143, 0.1739]) -Greedy action tensor([ 0.6524, -0.2910, 0.0815, -0.1791]) tensor([0.4185, 0.1629, 0.2364, 0.1822]) -Greedy action tensor([ 0.9215, -0.5936, -0.0776, -0.4175]) tensor([0.5405, 0.1188, 0.1990, 0.1417]) -Greedy action tensor([ 0.7947, -0.1327, -0.0381, -0.0957]) tensor([0.4462, 0.1765, 0.1940, 0.1832]) -Greedy action tensor([ 0.4411, -0.2583, 0.0243, -0.1508]) tensor([0.3691, 0.1834, 0.2433, 0.2042]) -Greedy action tensor([ 0.6938, -0.3763, 0.0132, -0.3079]) tensor([0.4511, 0.1547, 0.2284, 0.1657]) -Greedy action tensor([ 1.3293, -0.6780, -0.1367, -0.5446]) tensor([0.6584, 0.0885, 0.1520, 0.1011]) -Greedy action tensor([ 0.8523, -0.4358, -0.0887, -0.3159]) tensor([0.5058, 0.1395, 0.1974, 0.1573]) -Greedy action tensor([ 0.9252, -0.7424, 0.0513, -0.5498]) tensor([0.5450, 0.1028, 0.2275, 0.1247]) -Greedy action tensor([ 0.5748, -0.2290, -0.0706, -0.3399]) tensor([0.4214, 0.1887, 0.2210, 0.1689]) -Greedy action tensor([ 1.0004, -0.7770, 0.0279, -0.6008]) tensor([0.5718, 0.0967, 0.2162, 0.1153]) -Greedy action tensor([ 0.7537, -0.5832, -0.1057, -0.2089]) tensor([0.4836, 0.1270, 0.2047, 0.1847]) -Greedy action tensor([ 1.1166, -0.5794, -0.0558, -0.7463]) tensor([0.6067, 0.1113, 0.1878, 0.0942]) -Greedy action tensor([ 0.8948, -0.7206, -0.1521, -0.3515]) tensor([0.5443, 0.1082, 0.1910, 0.1565]) -Greedy action tensor([ 0.9060, -0.3485, -0.1604, -0.2835]) tensor([0.5171, 0.1475, 0.1780, 0.1574]) -Greedy action tensor([ 0.8547, -0.7378, -0.0324, -0.3269]) tensor([0.5203, 0.1058, 0.2143, 0.1596]) -Greedy action tensor([ 0.7111, -0.5141, 0.0564, -0.1899]) tensor([0.4506, 0.1323, 0.2341, 0.1830]) -Greedy action tensor([ 0.6003, 0.3925, -0.2115, 0.1917]) tensor([0.3423, 0.2781, 0.1520, 0.2275]) -Greedy action tensor([ 0.7829, -0.4980, -0.0517, -0.6017]) tensor([0.5096, 0.1416, 0.2212, 0.1276]) -Greedy action tensor([ 0.6646, -0.2623, -0.1037, -0.1951]) tensor([0.4380, 0.1734, 0.2032, 0.1854]) -Greedy action tensor([ 0.6480, -0.1971, 0.1068, -0.1743]) tensor([0.4080, 0.1752, 0.2375, 0.1793]) -Greedy action tensor([ 0.7501, -0.4427, -0.0073, -0.1302]) tensor([0.4573, 0.1387, 0.2144, 0.1896]) -Greedy action tensor([ 0.8082, -0.5848, 0.1607, -0.4200]) tensor([0.4844, 0.1203, 0.2535, 0.1418]) -Greedy action tensor([ 1.0928, -0.6042, -0.1462, -0.4604]) tensor([0.5937, 0.1088, 0.1720, 0.1256]) -Greedy action tensor([ 0.7378, -0.3780, 0.0803, -0.2170]) tensor([0.4483, 0.1469, 0.2323, 0.1725]) -Greedy action tensor([ 0.8679, -0.6517, -0.0973, -0.2332]) tensor([0.5175, 0.1132, 0.1971, 0.1721]) -Greedy action tensor([ 0.8312, -0.4292, -0.0777, -0.3886]) tensor([0.5046, 0.1431, 0.2033, 0.1490]) -Greedy action tensor([ 0.7549, -0.3692, -0.0616, -0.4671]) tensor([0.4851, 0.1576, 0.2144, 0.1429]) -Greedy action tensor([ 0.4908, -0.0348, 0.0060, 0.0137]) tensor([0.3537, 0.2091, 0.2178, 0.2195]) -Greedy action tensor([ 0.4755, -0.1895, 0.0414, -0.1173]) tensor([0.3683, 0.1894, 0.2386, 0.2036]) -Greedy action tensor([ 0.0157, -2.0117, 0.3838, -0.1321]) tensor([0.2908, 0.0383, 0.4202, 0.2508]) -Greedy action tensor([-0.2353, -1.0364, -0.1378, -0.8482]) tensor([0.3233, 0.1451, 0.3564, 0.1752]) -Greedy action tensor([ 0.0250, -0.4801, 0.2941, -1.0290]) tensor([0.3067, 0.1851, 0.4014, 0.1069]) -Greedy action tensor([-0.3049, -1.8430, 0.1574, -0.0609]) tensor([0.2452, 0.0527, 0.3893, 0.3129]) -Greedy action tensor([-1.0433, -0.2427, -0.4327, -0.6583]) tensor([0.1530, 0.3406, 0.2817, 0.2248]) -Greedy action tensor([ 0.4002, -0.4712, -0.7211, 0.2756]) tensor([0.3807, 0.1592, 0.1240, 0.3361]) -Greedy action tensor([-1.1451, 0.6936, 0.2988, -1.0862]) tensor([0.0794, 0.4996, 0.3367, 0.0843]) -Greedy action tensor([-1.0273, -0.8130, -0.9077, 0.6274]) tensor([0.1163, 0.1441, 0.1311, 0.6085]) -Greedy action tensor([-1.0387, 0.0374, -0.3542, -0.0512]) tensor([0.1163, 0.3411, 0.2305, 0.3121]) -Greedy action tensor([ 0.2721, -1.5639, -0.4430, 0.9969]) tensor([0.2693, 0.0429, 0.1317, 0.5560]) -Greedy action tensor([ 0.0061, -0.5385, -0.2701, -0.8611]) tensor([0.3625, 0.2103, 0.2750, 0.1523]) -Greedy action tensor([0.8270, 0.0127, 0.7343, 0.6233]) tensor([0.3154, 0.1397, 0.2875, 0.2573]) -Greedy action tensor([ 0.3386, -1.2187, 1.1043, -0.7662]) tensor([0.2708, 0.0571, 0.5824, 0.0897]) -Greedy action tensor([-0.6195, 0.3932, 0.3511, -1.3795]) tensor([0.1458, 0.4013, 0.3848, 0.0682]) -Greedy action tensor([ 1.7068, -0.1391, 0.9841, 1.3151]) tensor([0.4312, 0.0681, 0.2093, 0.2914]) -Greedy action tensor([ 1.1751, -0.4211, -0.9537, 1.9351]) tensor([0.2890, 0.0586, 0.0344, 0.6180]) -Greedy action tensor([-0.3920, -0.9995, -0.1269, -0.4850]) tensor([0.2660, 0.1449, 0.3467, 0.2424]) -Greedy action tensor([ 0.8812, -0.0465, -0.5045, 0.0996]) tensor([0.4754, 0.1880, 0.1189, 0.2176]) -Greedy action tensor([-0.1096, -1.1722, 0.4993, -0.4526]) tensor([0.2568, 0.0887, 0.4722, 0.1823]) -Greedy action tensor([-0.6153, -0.6200, -1.6484, 0.0933]) tensor([0.2282, 0.2271, 0.0812, 0.4635]) -Greedy action tensor([ 0.4508, -0.7031, 0.3835, 0.9569]) tensor([0.2558, 0.0807, 0.2392, 0.4243]) -Greedy action tensor([-0.1262, 0.2426, 0.8733, -0.3846]) tensor([0.1685, 0.2436, 0.4578, 0.1301]) -Greedy action tensor([-0.5882, -0.9701, -0.6003, -0.7257]) tensor([0.2823, 0.1927, 0.2789, 0.2461]) -Greedy action tensor([ 0.3519, -1.1957, -0.6363, -0.0860]) tensor([0.4484, 0.0954, 0.1669, 0.2894]) -Greedy action tensor([ 0.3648, 0.0659, -0.7321, -0.5393]) tensor([0.4032, 0.2990, 0.1346, 0.1632]) -Greedy action tensor([ 1.4837, -0.5775, 1.7541, 0.2876]) tensor([0.3649, 0.0465, 0.4783, 0.1103]) -Greedy action tensor([ 0.1334, 0.0957, -0.0229, -1.0097]) tensor([0.3188, 0.3070, 0.2726, 0.1016]) -Greedy action tensor([ 0.0559, 0.9627, 0.1070, -0.8012]) tensor([0.2019, 0.5000, 0.2125, 0.0857]) -Greedy action tensor([ 0.2704, -1.3951, -0.8045, 0.4743]) tensor([0.3628, 0.0686, 0.1238, 0.4448]) -Greedy action tensor([ 0.0063, -0.2745, 0.3915, -0.1806]) tensor([0.2466, 0.1863, 0.3625, 0.2046]) -Greedy action tensor([-0.4108, 0.0036, 0.0603, -0.5083]) tensor([0.1991, 0.3014, 0.3189, 0.1806]) -Greedy action tensor([0.7375, 0.4908, 0.1435, 0.0926]) tensor([0.3499, 0.2734, 0.1932, 0.1836]) -Greedy action tensor([ 0.4317, -0.9614, 0.2330, 1.2815]) tensor([0.2269, 0.0563, 0.1860, 0.5308]) -Greedy action tensor([ 0.3196, -1.5967, -0.0385, 0.1471]) tensor([0.3721, 0.0548, 0.2601, 0.3131]) -Greedy action tensor([ 1.1734, -0.7209, -0.0564, -0.2313]) tensor([0.5924, 0.0891, 0.1732, 0.1454]) -Greedy action tensor([-0.2783, -1.0880, 0.0425, -1.2042]) tensor([0.3106, 0.1382, 0.4281, 0.1231]) -Greedy action tensor([ 0.1986, -2.0041, -0.0852, 0.2056]) tensor([0.3484, 0.0385, 0.2623, 0.3508]) -Greedy action tensor([ 1.6823, -1.0428, -0.2045, 0.6237]) tensor([0.6394, 0.0419, 0.0969, 0.2218]) -Greedy action tensor([ 0.0064, -0.6822, -0.2966, 0.1829]) tensor([0.2912, 0.1463, 0.2151, 0.3474]) -Greedy action tensor([ 0.0271, -1.3604, 0.2856, -0.8621]) tensor([0.3383, 0.0845, 0.4381, 0.1391]) -Greedy action tensor([ 0.6728, -0.8948, 0.5854, -0.0104]) tensor([0.3803, 0.0793, 0.3484, 0.1920]) -Greedy action tensor([ 0.1269, -0.3165, 0.1479, -0.3755]) tensor([0.3060, 0.1964, 0.3125, 0.1851]) -Greedy action tensor([-0.8536, -0.4136, -0.6724, 0.1682]) tensor([0.1532, 0.2378, 0.1836, 0.4255]) -Greedy action tensor([-0.0730, -0.5768, -0.1341, -0.2502]) tensor([0.2956, 0.1786, 0.2781, 0.2476]) -Greedy action tensor([-0.6852, -0.2853, -0.5032, -0.4466]) tensor([0.2016, 0.3007, 0.2418, 0.2559]) -Greedy action tensor([-0.5324, -0.0122, -0.8482, 0.1109]) tensor([0.1882, 0.3166, 0.1372, 0.3580]) -Greedy action tensor([-0.1922, -0.6968, 0.5791, -0.3383]) tensor([0.2160, 0.1304, 0.4670, 0.1866]) -Greedy action tensor([-0.0934, -0.1057, 0.1763, -1.0879]) tensor([0.2727, 0.2694, 0.3571, 0.1009]) -Greedy action tensor([-0.6343, -0.9276, 0.9233, -1.4265]) tensor([0.1440, 0.1074, 0.6835, 0.0652]) -Greedy action tensor([-1.0503, -0.4859, -0.7772, 0.3998]) tensor([0.1200, 0.2109, 0.1576, 0.5115]) -Greedy action tensor([-0.7188, -1.4040, 0.3877, 0.4978]) tensor([0.1265, 0.0638, 0.3826, 0.4271]) -Greedy action tensor([-0.0528, -1.0198, 0.2974, 0.4414]) tensor([0.2253, 0.0857, 0.3198, 0.3693]) -Greedy action tensor([-0.7071, -1.5741, 0.6207, -0.7063]) tensor([0.1614, 0.0678, 0.6091, 0.1616]) -Greedy action tensor([-0.3082, -1.4591, 0.6521, -0.2552]) tensor([0.2007, 0.0635, 0.5242, 0.2116]) -Greedy action tensor([ 0.3874, -0.1132, -0.4578, -0.3164]) tensor([0.3952, 0.2396, 0.1697, 0.1955]) -Greedy action tensor([-0.7308, -0.5797, 0.2106, -1.0302]) tensor([0.1829, 0.2127, 0.4688, 0.1356]) -Greedy action tensor([-0.3703, -0.0108, -0.2312, -0.7090]) tensor([0.2329, 0.3336, 0.2676, 0.1659]) -Greedy action tensor([ 0.0181, 0.5470, -0.3929, -0.6861]) tensor([0.2594, 0.4403, 0.1720, 0.1283]) -Greedy action tensor([ 0.4230, -1.7474, -0.1420, 0.3624]) tensor([0.3811, 0.0435, 0.2166, 0.3587]) -Greedy action tensor([-0.5083, -0.0400, -1.4634, -0.1702]) tensor([0.2281, 0.3643, 0.0878, 0.3198]) -Greedy action tensor([ 0.4104, -0.1777, -0.6270, -0.3400]) tensor([0.4198, 0.2332, 0.1488, 0.1982]) -Greedy action tensor([ 0.2567, -0.6105, 0.1200, -0.8116]) tensor([0.3794, 0.1594, 0.3309, 0.1303]) -Greedy action tensor([-0.7935, -0.5603, 0.3083, -0.6833]) tensor([0.1565, 0.1976, 0.4711, 0.1748]) -Greedy action tensor([-1.0267, -2.0955, 0.0433, 0.1943]) tensor([0.1307, 0.0449, 0.3811, 0.4433]) -Greedy action tensor([-0.8500, 0.2145, 0.0075, -1.1025]) tensor([0.1422, 0.4122, 0.3351, 0.1105]) -Greedy action tensor([-0.4169, -0.3449, -0.1458, -0.9669]) tensor([0.2523, 0.2712, 0.3309, 0.1456]) -Greedy action tensor([-0.1310, 0.2642, 0.2247, 0.3219]) tensor([0.1823, 0.2707, 0.2602, 0.2868]) -Greedy action tensor([ 0.9079, -0.6882, -0.0997, 0.2902]) tensor([0.4746, 0.0962, 0.1733, 0.2559]) -Greedy action tensor([-0.5790, -1.0277, 0.2558, -0.9707]) tensor([0.2165, 0.1382, 0.4989, 0.1463]) -Greedy action tensor([ 0.5371, -0.2190, 0.1866, -0.0705]) tensor([0.3679, 0.1727, 0.2591, 0.2003]) -Greedy action tensor([-0.4813, 0.0254, 0.4670, -0.0967]) tensor([0.1490, 0.2473, 0.3847, 0.2189]) -Greedy action tensor([ 0.9396, -0.8835, 0.0861, 0.4473]) tensor([0.4548, 0.0735, 0.1937, 0.2780]) -Greedy action tensor([-0.6188, 0.1782, -0.1231, -0.0333]) tensor([0.1502, 0.3334, 0.2466, 0.2698]) -Greedy action tensor([ 0.2774, 0.5546, 0.4310, -0.4551]) tensor([0.2521, 0.3327, 0.2940, 0.1212]) -Greedy action tensor([-0.8626, -1.2853, -0.1945, -1.4683]) tensor([0.2409, 0.1579, 0.4698, 0.1314]) -Greedy action tensor([ 0.0029, -0.4400, 0.0241, -0.6289]) tensor([0.3130, 0.2010, 0.3197, 0.1664]) -Greedy action tensor([-0.4351, -0.8826, -0.4888, -0.5381]) tensor([0.2866, 0.1832, 0.2716, 0.2586]) -Greedy action tensor([ 0.3165, 0.3268, -0.8604, -0.3381]) tensor([0.3523, 0.3560, 0.1086, 0.1831]) -Greedy action tensor([ 0.0884, -1.4561, 0.6602, -0.3591]) tensor([0.2759, 0.0589, 0.4888, 0.1764]) -Greedy action tensor([-0.1742, -0.9039, -0.1600, -0.4219]) tensor([0.3052, 0.1471, 0.3095, 0.2382]) -Greedy action tensor([ 0.2401, -0.7103, -0.3433, 0.5308]) tensor([0.3047, 0.1178, 0.1700, 0.4075]) -Greedy action tensor([ 0.0545, -0.5148, -0.0681, -0.6090]) tensor([0.3372, 0.1908, 0.2983, 0.1737]) -Greedy action tensor([ 1.1919, -0.7696, -0.0404, -0.6110]) tensor([0.6261, 0.0881, 0.1826, 0.1032]) -Greedy action tensor([ 0.3942, 0.1470, -0.0401, 0.0605]) tensor([0.3180, 0.2483, 0.2059, 0.2278]) -Greedy action tensor([ 0.7855, -0.0874, -0.0583, -0.0485]) tensor([0.4382, 0.1830, 0.1885, 0.1903]) -Greedy action tensor([ 0.9393, -0.8856, 0.1998, -0.5224]) tensor([0.5347, 0.0862, 0.2552, 0.1239]) -Greedy action tensor([ 1.0957, -0.8393, 0.1518, -0.6708]) tensor([0.5867, 0.0847, 0.2283, 0.1003]) -Greedy action tensor([ 0.8465, -0.3958, -0.1213, -0.2342]) tensor([0.4980, 0.1438, 0.1892, 0.1690]) -Greedy action tensor([ 0.9997, -0.7062, -0.1393, -0.3916]) tensor([0.5713, 0.1037, 0.1829, 0.1421]) -Greedy action tensor([ 0.8431, -0.8576, 0.1018, -0.5816]) tensor([0.5264, 0.0961, 0.2508, 0.1266]) -Greedy action tensor([ 0.6651, -0.0495, 0.2329, -0.0654]) tensor([0.3817, 0.1868, 0.2477, 0.1838]) -Greedy action tensor([ 0.9255, -0.5722, -0.1143, -0.2025]) tensor([0.5261, 0.1177, 0.1860, 0.1703]) -Greedy action tensor([ 1.1177, -0.7386, 0.0988, -0.3376]) tensor([0.5712, 0.0893, 0.2062, 0.1333]) -Greedy action tensor([ 0.6888, -0.1766, -0.1076, -0.0132]) tensor([0.4224, 0.1778, 0.1905, 0.2093]) -Greedy action tensor([ 1.3287, -0.7079, -0.1986, -0.5169]) tensor([0.6642, 0.0867, 0.1442, 0.1049]) -Greedy action tensor([ 0.5186, -0.1592, 0.0195, -0.1291]) tensor([0.3791, 0.1925, 0.2301, 0.1983]) -Greedy action tensor([ 1.0673, -0.5120, 0.0514, -0.3629]) tensor([0.5533, 0.1140, 0.2003, 0.1324]) -Greedy action tensor([ 0.2329, 0.1487, 0.0215, -0.3350]) tensor([0.3035, 0.2789, 0.2456, 0.1720]) -Greedy action tensor([ 0.9986, -0.2530, -0.0689, -0.3062]) tensor([0.5260, 0.1505, 0.1809, 0.1427]) -Greedy action tensor([ 0.7315, 0.0431, -0.0910, -0.0549]) tensor([0.4172, 0.2096, 0.1833, 0.1900]) -Greedy action tensor([ 1.0723, -0.6103, -0.1439, -0.4137]) tensor([0.5853, 0.1088, 0.1735, 0.1324]) -Greedy action tensor([ 0.9796, -0.6826, -0.0192, -0.4464]) tensor([0.5561, 0.1055, 0.2048, 0.1336]) -Greedy action tensor([ 0.3274, -0.0241, -0.0260, -0.2661]) tensor([0.3380, 0.2378, 0.2374, 0.1867]) -Greedy action tensor([ 0.8971, -0.6874, 0.0779, -0.6236]) tensor([0.5364, 0.1100, 0.2364, 0.1172]) -Greedy action tensor([ 0.6953, -0.2965, -0.0226, -0.2812]) tensor([0.4474, 0.1659, 0.2182, 0.1685]) -Greedy action tensor([ 0.6890, -0.5483, -0.0372, -0.1900]) tensor([0.4568, 0.1325, 0.2210, 0.1897]) -Greedy action tensor([ 1.1764, -0.6464, 0.0812, -0.4478]) tensor([0.5906, 0.0954, 0.1975, 0.1164]) -Greedy action tensor([ 1.0832, -0.7304, -0.0388, -0.5096]) tensor([0.5910, 0.0964, 0.1924, 0.1202]) -Greedy action tensor([ 0.6580, -0.2146, -0.1823, -0.1392]) tensor([0.4348, 0.1817, 0.1876, 0.1959]) -Greedy action tensor([ 0.9192, -0.4100, 0.0873, -0.4113]) tensor([0.5091, 0.1347, 0.2216, 0.1346]) -Greedy action tensor([ 0.5424, 0.2091, -0.1190, 0.1451]) tensor([0.3443, 0.2467, 0.1777, 0.2314]) -Greedy action tensor([ 0.9527, -0.2796, -0.0441, -0.0970]) tensor([0.4973, 0.1450, 0.1835, 0.1741]) -Greedy action tensor([ 0.9570, -0.3807, -0.0156, -0.2632]) tensor([0.5166, 0.1356, 0.1953, 0.1525]) -Greedy action tensor([ 0.8122, -0.4666, -0.0982, -0.2219]) tensor([0.4911, 0.1367, 0.1976, 0.1746]) -Greedy action tensor([ 0.6865, -0.3921, -0.1110, -0.1341]) tensor([0.4483, 0.1525, 0.2019, 0.1973]) -Greedy action tensor([ 0.6104, -0.2223, -0.0348, -0.1004]) tensor([0.4080, 0.1775, 0.2141, 0.2004]) -Greedy action tensor([ 0.9426, -0.7477, -0.0423, -0.4543]) tensor([0.5539, 0.1022, 0.2069, 0.1370]) -Greedy action tensor([ 0.9447, -0.6137, 0.0110, -0.3387]) tensor([0.5317, 0.1119, 0.2090, 0.1473]) -Greedy action tensor([ 0.9095, -0.9252, 0.0172, -0.5813]) tensor([0.5572, 0.0890, 0.2283, 0.1255]) -Greedy action tensor([ 0.7276, -0.2132, 0.0748, -0.1122]) tensor([0.4269, 0.1666, 0.2222, 0.1843]) -Greedy action tensor([ 0.5084, -0.4052, -0.2043, -0.0824]) tensor([0.4089, 0.1640, 0.2005, 0.2265]) -Greedy action tensor([ 0.7500, -0.1432, -0.1388, -0.5405]) tensor([0.4772, 0.1953, 0.1962, 0.1313]) -Greedy action tensor([ 0.8175, -0.6417, -0.1562, -0.3161]) tensor([0.5176, 0.1203, 0.1955, 0.1666]) -Greedy action tensor([ 0.8623, -0.2856, -0.3599, -0.4459]) tensor([0.5313, 0.1686, 0.1565, 0.1436]) -Greedy action tensor([ 0.6065, -0.2366, 0.0289, -0.1386]) tensor([0.4055, 0.1745, 0.2276, 0.1925]) -Greedy action tensor([ 0.4181, -0.2164, -0.0461, -0.1230]) tensor([0.3648, 0.1934, 0.2294, 0.2124]) -Greedy action tensor([ 1.1978, -0.8796, 0.1009, -0.5793]) tensor([0.6141, 0.0769, 0.2051, 0.1039]) -Greedy action tensor([ 0.8558, -0.5676, 0.0098, -0.4866]) tensor([0.5178, 0.1247, 0.2222, 0.1353]) -Greedy action tensor([ 0.4916, -0.3611, -0.1625, -0.1519]) tensor([0.4046, 0.1725, 0.2104, 0.2126]) -Greedy action tensor([ 1.0841, -0.5693, -0.2366, -0.8823]) tensor([0.6257, 0.1197, 0.1670, 0.0876]) -Greedy action tensor([ 0.8478, -0.4220, -0.0897, -0.4308]) tensor([0.5126, 0.1440, 0.2007, 0.1427]) -Greedy action tensor([ 0.3261, -0.1394, 0.1261, -0.1485]) tensor([0.3259, 0.2046, 0.2668, 0.2027]) -Greedy action tensor([ 0.8692, -0.7067, 0.0306, -0.6257]) tensor([0.5367, 0.1110, 0.2320, 0.1204]) -Greedy action tensor([ 0.5255, -0.1225, -0.1203, -0.4499]) tensor([0.4125, 0.2158, 0.2162, 0.1555]) -Greedy action tensor([ 0.9553, -0.6289, 0.1656, -0.4036]) tensor([0.5219, 0.1071, 0.2369, 0.1341]) -Greedy action tensor([ 1.0276, -0.6078, -0.0589, -0.4088]) tensor([0.5650, 0.1101, 0.1906, 0.1343]) -Greedy action tensor([ 0.5065, -0.2496, -0.1896, -0.1950]) tensor([0.4059, 0.1906, 0.2023, 0.2012]) -Greedy action tensor([ 0.8937, -0.6804, -0.0207, -0.3797]) tensor([0.5297, 0.1098, 0.2123, 0.1483]) -Greedy action tensor([ 0.5305, -0.4802, -0.0063, -0.1142]) tensor([0.4043, 0.1471, 0.2364, 0.2122]) -Greedy action tensor([ 0.6696, -0.0648, 0.0670, -0.1709]) tensor([0.4067, 0.1952, 0.2226, 0.1755]) -Greedy action tensor([ 0.7393, -0.7329, 0.1652, -0.5250]) tensor([0.4819, 0.1106, 0.2714, 0.1361]) -Greedy action tensor([ 1.0896, -0.8066, 0.0317, -0.6889]) tensor([0.6002, 0.0901, 0.2084, 0.1014]) -Greedy action tensor([ 1.0225, -0.4776, -0.0251, -0.2540]) tensor([0.5397, 0.1204, 0.1893, 0.1506]) -Greedy action tensor([ 1.0308, -0.7450, -0.0782, -0.8624]) tensor([0.6061, 0.1026, 0.2000, 0.0913]) -Greedy action tensor([ 0.9168, -0.5161, -0.0309, -0.3488]) tensor([0.5240, 0.1250, 0.2031, 0.1478]) -Greedy action tensor([ 0.5048, -0.0703, -0.1186, -0.2825]) tensor([0.3916, 0.2203, 0.2099, 0.1782]) -Greedy action tensor([ 1.0583, -0.5488, -0.1373, -0.5260]) tensor([0.5854, 0.1174, 0.1771, 0.1201]) -Greedy action tensor([ 0.9623, -0.6915, 0.1990, -0.2838]) tensor([0.5141, 0.0984, 0.2397, 0.1479]) -Greedy action tensor([ 0.9907, -0.4347, -0.0273, -0.4498]) tensor([0.5439, 0.1308, 0.1965, 0.1288]) -Greedy action tensor([ 0.8690, -0.4585, -0.0549, -0.2163]) tensor([0.5000, 0.1326, 0.1985, 0.1689]) -Greedy action tensor([ 0.7206, -0.5074, 0.1811, -0.3579]) tensor([0.4513, 0.1322, 0.2631, 0.1535]) -Greedy action tensor([ 0.6335, -0.4032, 0.0580, -0.1751]) tensor([0.4233, 0.1501, 0.2381, 0.1886]) -Greedy action tensor([ 0.8951, -0.5147, -0.0446, -0.4077]) tensor([0.5245, 0.1281, 0.2049, 0.1425]) -Greedy action tensor([ 1.0405, -0.7057, 0.0730, -0.5442]) tensor([0.5684, 0.0991, 0.2160, 0.1165]) -Greedy action tensor([ 0.9595, -0.7071, -0.0413, -0.4039]) tensor([0.5518, 0.1042, 0.2028, 0.1411]) -Greedy action tensor([ 0.9359, -0.5171, -0.0650, -0.3535]) tensor([0.5328, 0.1246, 0.1958, 0.1467]) -Greedy action tensor([ 0.4642, -0.1932, -0.1074, -0.3776]) tensor([0.3978, 0.2061, 0.2246, 0.1714]) -Greedy action tensor([ 0.8255, -0.7023, -0.1517, -0.3432]) tensor([0.5252, 0.1140, 0.1977, 0.1632]) -Greedy action tensor([ 0.6757, -0.7846, 0.0139, -0.3615]) tensor([0.4756, 0.1104, 0.2454, 0.1686]) -Greedy action tensor([ 0.7662, -0.1607, -0.0500, -0.0786]) tensor([0.4410, 0.1745, 0.1950, 0.1895]) -Greedy action tensor([ 0.8796, -0.6013, -0.1298, -0.4044]) tensor([0.5351, 0.1217, 0.1950, 0.1482]) -Greedy action tensor([ 0.9073, -0.4707, -0.1446, -0.3395]) tensor([0.5295, 0.1335, 0.1849, 0.1522]) -Greedy action tensor([ 0.2483, -0.2648, -0.1522, -0.1161]) tensor([0.3375, 0.2020, 0.2261, 0.2344]) -Greedy action tensor([ 0.3935, -0.2550, 0.1854, -0.1193]) tensor([0.3409, 0.1782, 0.2768, 0.2041]) -Greedy action tensor([ 0.9016, -0.2708, -0.6348, 0.1685]) tensor([0.4987, 0.1544, 0.1073, 0.2396]) -Greedy action tensor([ 1.3759, -0.4821, -0.7966, 0.0657]) tensor([0.6495, 0.1013, 0.0740, 0.1752]) -Greedy action tensor([ 1.9097, -1.0609, -0.3616, 0.5925]) tensor([0.7031, 0.0360, 0.0725, 0.1883]) -Greedy action tensor([ 1.1542, -0.4767, -0.4085, 0.2264]) tensor([0.5553, 0.1087, 0.1164, 0.2196]) -Greedy action tensor([ 1.4848, -0.2712, -0.4440, 0.4232]) tensor([0.6010, 0.1038, 0.0873, 0.2079]) -Greedy action tensor([ 1.3891, -0.5880, -0.3311, 0.3977]) tensor([0.5922, 0.0820, 0.1060, 0.2198]) -Greedy action tensor([ 2.0562, -0.7460, -0.4728, 0.7455]) tensor([0.7092, 0.0430, 0.0566, 0.1912]) -Greedy action tensor([ 1.6127, -0.1520, -0.2405, 0.1430]) tensor([0.6419, 0.1099, 0.1006, 0.1476]) -Greedy action tensor([ 1.4743, -0.1218, -1.0121, 0.0100]) tensor([0.6591, 0.1336, 0.0548, 0.1524]) -Greedy action tensor([ 1.9934, -0.7620, -0.4117, 0.5460]) tensor([0.7199, 0.0458, 0.0650, 0.1693]) -Greedy action tensor([ 1.1155, -0.1650, -1.0428, 0.4402]) tensor([0.5256, 0.1461, 0.0607, 0.2676]) -Greedy action tensor([ 2.0456, -1.2532, -0.6457, 0.4048]) tensor([0.7701, 0.0284, 0.0522, 0.1493]) -Greedy action tensor([ 1.6728, -0.6959, -0.6823, 0.2227]) tensor([0.7027, 0.0658, 0.0667, 0.1648]) -Greedy action tensor([ 1.2078, -0.2509, -0.8369, 0.0540]) tensor([0.5962, 0.1386, 0.0772, 0.1881]) -Greedy action tensor([ 1.9329, -0.3335, -0.6912, 0.0627]) tensor([0.7517, 0.0779, 0.0545, 0.1158]) -Greedy action tensor([ 1.2236, -0.4100, -0.2170, 0.1793]) tensor([0.5606, 0.1094, 0.1327, 0.1973]) -Greedy action tensor([ 1.2764, -0.6123, -0.3629, -0.0560]) tensor([0.6214, 0.0940, 0.1206, 0.1640]) -Greedy action tensor([ 1.6318, -0.7102, -0.0358, 0.7311]) tensor([0.5913, 0.0568, 0.1116, 0.2402]) -Greedy action tensor([ 1.0572, -0.3108, -0.3029, 0.0119]) tensor([0.5368, 0.1367, 0.1378, 0.1887]) -Greedy action tensor([ 1.5139, -0.6092, -0.1310, 0.4021]) tensor([0.6092, 0.0729, 0.1176, 0.2004]) -Greedy action tensor([ 1.5044, -0.1135, -0.3742, -0.1451]) tensor([0.6480, 0.1285, 0.0990, 0.1245]) -Greedy action tensor([ 1.7937, -0.6466, -0.1484, 0.4005]) tensor([0.6762, 0.0589, 0.0970, 0.1679]) -Greedy action tensor([ 1.5552, -0.9779, -0.7683, 0.4901]) tensor([0.6570, 0.0522, 0.0643, 0.2265]) -Greedy action tensor([ 1.7852, -0.9882, -0.6099, 0.7207]) tensor([0.6673, 0.0417, 0.0608, 0.2302]) -Greedy action tensor([ 1.1874, -0.0550, -0.2365, -0.0185]) tensor([0.5468, 0.1579, 0.1317, 0.1637]) -Greedy action tensor([ 1.8966, -1.0398, -0.3759, 0.5831]) tensor([0.7018, 0.0372, 0.0723, 0.1887]) -Greedy action tensor([ 1.4717, -0.8598, -0.0631, 0.4921]) tensor([0.5924, 0.0575, 0.1277, 0.2224]) -Greedy action tensor([ 1.2489, -0.2003, -1.2818, 0.4386]) tensor([0.5685, 0.1334, 0.0453, 0.2528]) -Greedy action tensor([ 1.5400, -0.4707, -0.5938, 0.0879]) tensor([0.6728, 0.0901, 0.0796, 0.1575]) -Greedy action tensor([ 1.2921, -0.1359, -0.2872, 0.0603]) tensor([0.5755, 0.1380, 0.1186, 0.1679]) -Greedy action tensor([ 1.7236, -0.0557, -0.5321, -0.1384]) tensor([0.6998, 0.1181, 0.0733, 0.1087]) -Greedy action tensor([ 1.5244, -0.5556, -0.3777, 0.2558]) tensor([0.6429, 0.0803, 0.0960, 0.1808]) -Greedy action tensor([ 1.6888, -0.8056, 0.0327, 0.3600]) tensor([0.6501, 0.0537, 0.1241, 0.1722]) -Greedy action tensor([ 1.8435, -0.5701, -0.1680, 0.4346]) tensor([0.6813, 0.0610, 0.0912, 0.1665]) -Greedy action tensor([ 1.8444, -0.5856, -0.3016, 0.6092]) tensor([0.6685, 0.0589, 0.0782, 0.1944]) -Greedy action tensor([ 1.2492, -0.2573, -0.5978, 0.0150]) tensor([0.5986, 0.1327, 0.0944, 0.1742]) -Greedy action tensor([ 1.3497, 0.0906, -1.0903, -0.0397]) tensor([0.6172, 0.1752, 0.0538, 0.1538]) -Greedy action tensor([ 1.2809, -0.2059, -0.5639, -0.0031]) tensor([0.6020, 0.1361, 0.0952, 0.1667]) -Greedy action tensor([ 1.8672, -0.7148, -0.7169, 0.3778]) tensor([0.7264, 0.0549, 0.0548, 0.1638]) -Greedy action tensor([ 1.4815, -0.3118, -0.2898, 0.5756]) tensor([0.5745, 0.0956, 0.0977, 0.2322]) -Greedy action tensor([ 1.1547, -0.0316, -0.1089, 0.2958]) tensor([0.4971, 0.1518, 0.1405, 0.2106]) -Greedy action tensor([ 1.4476, -0.3112, -0.9722, 0.0591]) tensor([0.6620, 0.1140, 0.0589, 0.1651]) -Greedy action tensor([ 1.5739, -0.4179, -0.4973, 0.2939]) tensor([0.6491, 0.0886, 0.0818, 0.1805]) -Greedy action tensor([ 0.4250, -0.2299, 0.0910, -0.1754]) tensor([0.3592, 0.1866, 0.2572, 0.1970]) -Greedy action tensor([ 1.4531, 0.4509, -0.6887, -0.1580]) tensor([0.5938, 0.2180, 0.0697, 0.1186]) -Greedy action tensor([ 1.2728, -0.3662, -0.5633, 0.3948]) tensor([0.5652, 0.1097, 0.0901, 0.2349]) -Greedy action tensor([ 1.7845, -0.4465, 0.3082, 0.3636]) tensor([0.6340, 0.0681, 0.1448, 0.1531]) -Greedy action tensor([ 1.3603, -0.3717, -0.2449, 0.2297]) tensor([0.5880, 0.1040, 0.1181, 0.1898]) -Greedy action tensor([ 1.3085, -0.2426, -0.2471, 0.0703]) tensor([0.5838, 0.1238, 0.1232, 0.1692]) -Greedy action tensor([ 1.5211, 0.4151, -0.4252, 0.2434]) tensor([0.5707, 0.1888, 0.0815, 0.1590]) -Greedy action tensor([ 1.6768, 0.3367, -0.8278, 0.4584]) tensor([0.6100, 0.1597, 0.0498, 0.1804]) -Greedy action tensor([ 2.1490, -0.9350, -0.3081, 0.4781]) tensor([0.7578, 0.0347, 0.0649, 0.1425]) -Greedy action tensor([2.2818, 0.7466, 0.1118, 0.1479]) tensor([0.6906, 0.1488, 0.0789, 0.0818]) -Greedy action tensor([ 1.0380, -0.1222, -0.7405, 0.3368]) tensor([0.5055, 0.1584, 0.0854, 0.2507]) -Greedy action tensor([ 1.1605, -0.4408, -0.2675, 0.3386]) tensor([0.5316, 0.1072, 0.1275, 0.2337]) -Greedy action tensor([ 1.7554, -0.7992, -0.3310, 0.5339]) tensor([0.6682, 0.0519, 0.0829, 0.1970]) -Greedy action tensor([ 2.2243, -1.3315, -0.4171, 0.3509]) tensor([0.7978, 0.0228, 0.0569, 0.1225]) -Greedy action tensor([ 1.4724, -0.8131, -0.1325, 0.3599]) tensor([0.6130, 0.0624, 0.1232, 0.2015]) -Greedy action tensor([ 1.3180, -0.3803, -0.6150, 0.5999]) tensor([0.5509, 0.1008, 0.0797, 0.2686]) -Greedy action tensor([ 1.8145, -0.9274, -0.3512, 0.1898]) tensor([0.7267, 0.0468, 0.0833, 0.1431]) -Greedy action tensor([ 1.7351, 0.2726, -1.0528, 0.0805]) tensor([0.6737, 0.1561, 0.0415, 0.1288]) -Greedy action tensor([ 1.3908, -0.6948, -0.4777, 0.0340]) tensor([0.6510, 0.0809, 0.1005, 0.1676]) -Greedy action tensor([ 1.5843, -0.0496, -0.8767, 0.0917]) tensor([0.6643, 0.1296, 0.0567, 0.1493]) -Greedy action tensor([ 1.1512, -0.2709, -0.6751, 0.1076]) tensor([0.5700, 0.1375, 0.0918, 0.2007]) -Greedy action tensor([ 1.8053, -0.5682, -0.2524, 0.1508]) tensor([0.7082, 0.0660, 0.0905, 0.1354]) -Greedy action tensor([ 0.9690, -0.5435, -0.5911, 0.6517]) tensor([0.4633, 0.1021, 0.0973, 0.3373]) -Greedy action tensor([ 1.7859, -0.7481, -0.7247, 0.6365]) tensor([0.6769, 0.0537, 0.0550, 0.2144]) -Greedy action tensor([ 1.1462, -0.0773, -0.4852, 0.0526]) tensor([0.5480, 0.1612, 0.1072, 0.1836]) -Greedy action tensor([ 1.2364, -0.9024, -0.1311, 0.0530]) tensor([0.5957, 0.0702, 0.1517, 0.1824]) -Greedy action tensor([ 1.0428, -0.2811, -0.7502, 0.0357]) tensor([0.5562, 0.1480, 0.0926, 0.2032]) -Greedy action tensor([ 1.4332, -0.5032, -0.6144, 0.2858]) tensor([0.6286, 0.0907, 0.0811, 0.1996]) -Greedy action tensor([ 1.8756, -0.3317, -0.4203, 0.4411]) tensor([0.6902, 0.0759, 0.0695, 0.1644]) -Greedy action tensor([ 2.3234, -1.4610, -0.2589, 0.9972]) tensor([0.7332, 0.0167, 0.0554, 0.1947]) -Greedy action tensor([ 1.8560, -0.8905, 0.1968, 0.6687]) tensor([0.6413, 0.0411, 0.1220, 0.1956]) -Greedy action tensor([ 1.4068, -0.5654, -0.5637, 0.3045]) tensor([0.6209, 0.0864, 0.0865, 0.2062]) -Greedy action tensor([ 1.3509, -0.6405, -0.4832, -0.0728]) tensor([0.6506, 0.0888, 0.1039, 0.1567]) -Greedy action tensor([ 1.6187, -0.6674, -0.8833, 0.3371]) tensor([0.6844, 0.0696, 0.0561, 0.1900]) -Greedy action tensor([ 1.3935, -0.4622, -0.3389, 0.2974]) tensor([0.5997, 0.0938, 0.1061, 0.2004]) -Greedy action tensor([ 1.3562, -0.1381, -0.6569, 0.3515]) tensor([0.5800, 0.1302, 0.0775, 0.2124]) -Greedy action tensor([ 0.7732, -0.5734, -0.2196, 0.0414]) tensor([0.4736, 0.1232, 0.1755, 0.2278]) -Greedy action tensor([-1.8279, -0.3887, 0.5994, -0.1309]) tensor([0.0455, 0.1917, 0.5148, 0.2480]) -Greedy action tensor([-1.7486, -0.4655, 0.5702, -0.0807]) tensor([0.0498, 0.1797, 0.5063, 0.2641]) -Greedy action tensor([-1.9231, -0.4470, 0.6576, -0.1686]) tensor([0.0410, 0.1796, 0.5421, 0.2373]) -Greedy action tensor([-1.9049, -0.4220, 0.6598, -0.1610]) tensor([0.0415, 0.1826, 0.5388, 0.2371]) -Greedy action tensor([-1.8326, -0.4855, 0.6062, -0.1232]) tensor([0.0458, 0.1762, 0.5249, 0.2531]) -Greedy action tensor([-1.7431, 0.2411, 0.4968, -0.1057]) tensor([0.0438, 0.3189, 0.4118, 0.2254]) -Greedy action tensor([-1.8475, -0.3180, 0.6113, -0.1277]) tensor([0.0437, 0.2016, 0.5107, 0.2439]) -Greedy action tensor([-1.6533, -0.0663, 0.3051, -0.1590]) tensor([0.0574, 0.2804, 0.4066, 0.2556]) -Greedy action tensor([-1.9344, -0.4449, 0.6626, -0.1749]) tensor([0.0405, 0.1798, 0.5442, 0.2355]) -Greedy action tensor([-1.9236, -0.4413, 0.6586, -0.1680]) tensor([0.0410, 0.1803, 0.5417, 0.2370]) -Greedy action tensor([-1.5928, -0.5448, 0.4975, -0.0625]) tensor([0.0604, 0.1722, 0.4884, 0.2790]) -Greedy action tensor([-1.7120, -0.1116, 0.5438, -0.0318]) tensor([0.0479, 0.2375, 0.4574, 0.2572]) -Greedy action tensor([-0.6276, 0.6296, 0.0355, 0.0464]) tensor([0.1188, 0.4176, 0.2305, 0.2331]) -Greedy action tensor([-1.8952, -0.3918, 0.6426, -0.1490]) tensor([0.0419, 0.1883, 0.5298, 0.2401]) -Greedy action tensor([-1.8711, -0.4325, 0.6299, -0.1465]) tensor([0.0434, 0.1831, 0.5297, 0.2437]) -Greedy action tensor([-1.5516, -0.1043, 0.4116, -0.0252]) tensor([0.0589, 0.2505, 0.4195, 0.2711]) -Greedy action tensor([-1.8938, -0.3064, 0.6272, -0.1512]) tensor([0.0416, 0.2034, 0.5174, 0.2376]) -Greedy action tensor([-1.9338, -0.4164, 0.6565, -0.1723]) tensor([0.0405, 0.1845, 0.5395, 0.2355]) -Greedy action tensor([-1.8733, -0.4283, 0.6292, -0.1450]) tensor([0.0433, 0.1837, 0.5290, 0.2439]) -Greedy action tensor([-1.3571, -0.0789, 0.3626, -0.0350]) tensor([0.0718, 0.2578, 0.4010, 0.2694]) -Greedy action tensor([-1.7408, -0.4022, 0.5580, -0.1095]) tensor([0.0503, 0.1918, 0.5010, 0.2570]) -Greedy action tensor([-1.7312, -0.9007, 0.8485, 0.4136]) tensor([0.0400, 0.0917, 0.5271, 0.3412]) -Greedy action tensor([-1.8644, -0.4427, 0.6301, -0.1421]) tensor([0.0438, 0.1813, 0.5300, 0.2449]) -Greedy action tensor([-1.8404, -0.1313, 0.5691, -0.1217]) tensor([0.0430, 0.2378, 0.4791, 0.2401]) -Greedy action tensor([-1.8565, -0.4686, 0.6207, -0.1169]) tensor([0.0442, 0.1772, 0.5267, 0.2519]) -Greedy action tensor([-1.6938, -0.3610, 0.5293, -0.0901]) tensor([0.0526, 0.1996, 0.4861, 0.2617]) -Greedy action tensor([-1.5371, 0.5071, 0.3487, 0.0091]) tensor([0.0500, 0.3860, 0.3294, 0.2346]) -Greedy action tensor([-1.4350, 0.6485, 0.3241, 0.0716]) tensor([0.0517, 0.4151, 0.3001, 0.2331]) -Greedy action tensor([-1.5799, -0.3505, 0.4355, -0.4641]) tensor([0.0668, 0.2283, 0.5011, 0.2038]) -Greedy action tensor([-1.9259, -0.4700, 0.6844, -0.1673]) tensor([0.0405, 0.1736, 0.5508, 0.2350]) -Greedy action tensor([-1.9360, -0.4526, 0.6714, -0.1762]) tensor([0.0404, 0.1779, 0.5473, 0.2345]) -Greedy action tensor([-1.2717, 0.0555, 0.3963, -0.6043]) tensor([0.0832, 0.3136, 0.4410, 0.1621]) -Greedy action tensor([-1.8930, -0.3924, 0.6363, -0.1511]) tensor([0.0421, 0.1889, 0.5285, 0.2405]) -Greedy action tensor([-1.8638, -0.3985, 0.6506, -0.1381]) tensor([0.0429, 0.1857, 0.5303, 0.2410]) -Greedy action tensor([-1.5650, -0.2511, 0.6112, -0.0142]) tensor([0.0548, 0.2039, 0.4829, 0.2584]) -Greedy action tensor([-1.8039, -0.4016, 0.6421, -0.2465]) tensor([0.0468, 0.1904, 0.5405, 0.2223]) -Greedy action tensor([-1.8471, -0.2530, 0.5781, -0.1226]) tensor([0.0438, 0.2156, 0.4950, 0.2456]) -Greedy action tensor([-1.5123, 0.1022, 0.3935, -0.0579]) tensor([0.0587, 0.2951, 0.3948, 0.2514]) -Greedy action tensor([-1.8974, -0.4429, 0.6425, -0.1579]) tensor([0.0423, 0.1810, 0.5360, 0.2407]) -Greedy action tensor([-1.9033, -0.4460, 0.6461, -0.1628]) tensor([0.0420, 0.1805, 0.5379, 0.2396]) -Greedy action tensor([-1.8885, -0.3636, 0.6316, -0.1495]) tensor([0.0422, 0.1937, 0.5241, 0.2400]) -Greedy action tensor([-0.7304, 0.7688, -0.0686, -0.2533]) tensor([0.1108, 0.4960, 0.2147, 0.1785]) -Greedy action tensor([-1.9397, -0.4413, 0.6634, -0.1785]) tensor([0.0403, 0.1804, 0.5446, 0.2347]) -Greedy action tensor([-1.2486, -0.6482, 0.2461, 0.2535]) tensor([0.0849, 0.1548, 0.3787, 0.3815]) -Greedy action tensor([-1.8697, -0.4417, 0.6330, -0.1281]) tensor([0.0433, 0.1806, 0.5290, 0.2471]) -Greedy action tensor([-1.9198, -0.4360, 0.6559, -0.1668]) tensor([0.0411, 0.1813, 0.5403, 0.2373]) -Greedy action tensor([-1.2479, 0.1987, 0.2518, 0.0675]) tensor([0.0743, 0.3158, 0.3330, 0.2769]) -Greedy action tensor([-1.4649, -0.4254, 0.4068, 0.0994]) tensor([0.0662, 0.1872, 0.4302, 0.3164]) -Greedy action tensor([-1.6966, -0.2194, 0.6105, 0.0218]) tensor([0.0476, 0.2086, 0.4783, 0.2655]) -Greedy action tensor([-1.6468, -0.1024, 0.5784, 0.0140]) tensor([0.0495, 0.2319, 0.4581, 0.2605]) -Greedy action tensor([-1.8331, -0.4642, 0.6223, -0.1208]) tensor([0.0452, 0.1777, 0.5266, 0.2505]) -Greedy action tensor([-1.9121, -0.4403, 0.6515, -0.1635]) tensor([0.0415, 0.1809, 0.5390, 0.2386]) -Greedy action tensor([-1.6318, 0.3976, 0.4124, -0.0883]) tensor([0.0476, 0.3621, 0.3675, 0.2228]) -Greedy action tensor([-1.6846, -0.2859, 0.5602, -0.0351]) tensor([0.0508, 0.2057, 0.4793, 0.2643]) -Greedy action tensor([-1.7607, -0.2804, 0.5838, -0.0560]) tensor([0.0469, 0.2061, 0.4891, 0.2579]) -Greedy action tensor([-1.9216, -0.4434, 0.6578, -0.1674]) tensor([0.0411, 0.1801, 0.5416, 0.2373]) -Greedy action tensor([-1.2593, -0.1896, 0.7158, 0.5753]) tensor([0.0575, 0.1676, 0.4146, 0.3602]) -Greedy action tensor([-1.8237, -0.4882, 0.6084, -0.1216]) tensor([0.0461, 0.1754, 0.5253, 0.2531]) -Greedy action tensor([-1.8143, -0.4210, 0.6029, -0.1160]) tensor([0.0461, 0.1856, 0.5166, 0.2518]) -Greedy action tensor([-1.8461, -0.3894, 0.6092, -0.1455]) tensor([0.0446, 0.1914, 0.5196, 0.2443]) -Greedy action tensor([-1.9191, -0.4075, 0.6499, -0.1635]) tensor([0.0410, 0.1860, 0.5355, 0.2374]) -Greedy action tensor([-1.8022, 0.2752, 0.5378, -0.2124]) tensor([0.0412, 0.3290, 0.4278, 0.2020]) -Greedy action tensor([-1.1602, 0.2403, 0.2419, 0.0047]) tensor([0.0811, 0.3291, 0.3297, 0.2600]) -Greedy action tensor([-1.9123, -0.4087, 0.6382, -0.1632]) tensor([0.0416, 0.1869, 0.5325, 0.2390]) -Greedy action tensor([-1.9289, -0.4025, 0.6553, -0.1690]) tensor([0.0405, 0.1866, 0.5373, 0.2356]) -Greedy action tensor([-1.9363, -0.4412, 0.6641, -0.1757]) tensor([0.0404, 0.1802, 0.5443, 0.2350]) -Greedy action tensor([-1.8803, -0.3945, 0.6287, -0.1532]) tensor([0.0429, 0.1893, 0.5268, 0.2410]) -Greedy action tensor([-1.8329, -0.4480, 0.6108, -0.1235]) tensor([0.0454, 0.1813, 0.5226, 0.2508]) -Greedy action tensor([-1.6257, -0.4497, 0.5021, -0.0086]) tensor([0.0566, 0.1834, 0.4750, 0.2850]) -Greedy action tensor([-1.5469, -0.4084, 0.6978, 0.0056]) tensor([0.0547, 0.1708, 0.5162, 0.2583]) -Greedy action tensor([-1.0437, -0.2548, 0.3349, 0.3020]) tensor([0.0908, 0.1999, 0.3605, 0.3488]) -Greedy action tensor([-1.4885, -0.5646, 0.4048, 0.1145]) tensor([0.0661, 0.1665, 0.4390, 0.3284]) -Greedy action tensor([-1.8785, -0.4752, 0.6370, -0.1472]) tensor([0.0433, 0.1762, 0.5359, 0.2446]) -Greedy action tensor([-1.8604, -0.2878, 0.5996, -0.1268]) tensor([0.0431, 0.2079, 0.5048, 0.2442]) -Greedy action tensor([-1.7527, 0.3389, 0.4567, -0.0183]) tensor([0.0419, 0.3392, 0.3816, 0.2373]) -Greedy action tensor([-1.7649, -0.4113, 0.5716, -0.1207]) tensor([0.0490, 0.1898, 0.5073, 0.2539]) -Greedy action tensor([-1.9318, -0.4507, 0.6638, -0.1728]) tensor([0.0406, 0.1787, 0.5447, 0.2359]) -Greedy action tensor([-1.9246, -0.4264, 0.6589, -0.1698]) tensor([0.0408, 0.1826, 0.5406, 0.2360]) -Greedy action tensor([-1.3920, -0.0075, 0.3286, 0.0229]) tensor([0.0680, 0.2717, 0.3802, 0.2801]) -Greedy action tensor([-1.9086, -0.4578, 0.6497, -0.1647]) tensor([0.0418, 0.1785, 0.5403, 0.2393]) -Greedy action tensor([-0.6371, 0.7751, 0.0883, -0.0619]) tensor([0.1118, 0.4588, 0.2308, 0.1986]) -Greedy action tensor([ 0.7639, -0.5447, 0.0559, 0.5835]) tensor([0.3849, 0.1040, 0.1896, 0.3214]) -Greedy action tensor([ 0.9630, -0.2537, 0.2358, 0.4378]) tensor([0.4218, 0.1249, 0.2038, 0.2495]) -Greedy action tensor([-0.0315, -1.1253, -0.2409, -0.5308]) tensor([0.3633, 0.1217, 0.2946, 0.2205]) -Greedy action tensor([ 1.2434, 1.1705, 0.3768, -0.1214]) tensor([0.3838, 0.3568, 0.1613, 0.0980]) -Greedy action tensor([-0.3874, -0.0396, 0.6120, -0.8802]) tensor([0.1741, 0.2465, 0.4730, 0.1064]) -Greedy action tensor([-0.2826, -1.4450, 0.4466, -0.1102]) tensor([0.2186, 0.0684, 0.4533, 0.2597]) -Greedy action tensor([ 1.3825, -0.4000, 1.1942, -0.0467]) tensor([0.4472, 0.0752, 0.3705, 0.1071]) -Greedy action tensor([-0.1606, 0.7216, 0.0528, -1.0157]) tensor([0.1969, 0.4757, 0.2437, 0.0837]) -Greedy action tensor([ 0.4984, -1.7182, 0.1262, -0.4310]) tensor([0.4560, 0.0497, 0.3143, 0.1800]) -Greedy action tensor([ 0.3323, -0.2867, 0.0199, -0.8925]) tensor([0.3900, 0.2100, 0.2854, 0.1146]) -Greedy action tensor([-0.1915, -0.9508, -0.4223, -0.8803]) tensor([0.3618, 0.1693, 0.2872, 0.1817]) -Greedy action tensor([-0.5782, -0.5068, 0.0748, -0.9450]) tensor([0.2133, 0.2291, 0.4098, 0.1478]) -Greedy action tensor([ 0.5646, -1.4001, -0.0181, -0.9893]) tensor([0.5236, 0.0734, 0.2923, 0.1107]) -Greedy action tensor([-0.2798, -1.0415, 0.0161, -0.4150]) tensor([0.2714, 0.1267, 0.3648, 0.2371]) -Greedy action tensor([ 0.3916, 0.5716, -0.7473, 0.1527]) tensor([0.3026, 0.3623, 0.0969, 0.2383]) -Greedy action tensor([-0.4588, -0.8009, 0.0100, -0.1992]) tensor([0.2172, 0.1543, 0.3470, 0.2815]) -Greedy action tensor([-0.5539, -0.4349, -0.4516, -1.1793]) tensor([0.2653, 0.2988, 0.2939, 0.1420]) -Greedy action tensor([ 0.1826, -0.5979, 0.8070, -0.3226]) tensor([0.2545, 0.1166, 0.4753, 0.1536]) -Greedy action tensor([-0.1054, 0.0827, 0.0835, -0.9577]) tensor([0.2603, 0.3142, 0.3145, 0.1110]) -Greedy action tensor([ 0.9407, -1.4487, 1.3369, 0.8767]) tensor([0.2844, 0.0261, 0.4227, 0.2668]) -Greedy action tensor([-0.0889, -0.2178, -0.6363, 0.1042]) tensor([0.2724, 0.2395, 0.1576, 0.3305]) -Greedy action tensor([-0.1052, -0.5650, -0.2224, -0.1643]) tensor([0.2887, 0.1823, 0.2568, 0.2722]) -Greedy action tensor([ 0.3132, -0.5610, -0.3231, -0.4507]) tensor([0.4145, 0.1729, 0.2194, 0.1931]) -Greedy action tensor([ 0.8361, -1.2353, 0.5499, -0.2477]) tensor([0.4514, 0.0569, 0.3390, 0.1527]) -Greedy action tensor([-1.0137, -0.2415, 0.3370, -0.3860]) tensor([0.1124, 0.2433, 0.4338, 0.2105]) -Greedy action tensor([ 1.2451, -0.8787, 0.3318, 1.2368]) tensor([0.3980, 0.0476, 0.1597, 0.3947]) -Greedy action tensor([ 1.2854, -0.4588, 0.2419, 0.7591]) tensor([0.4722, 0.0825, 0.1663, 0.2790]) -Greedy action tensor([ 0.0594, -0.1987, -0.1383, -0.7991]) tensor([0.3314, 0.2561, 0.2720, 0.1405]) -Greedy action tensor([ 0.6963, -1.1222, 1.3841, -0.3879]) tensor([0.2865, 0.0465, 0.5701, 0.0969]) -Greedy action tensor([-0.2301, -1.6978, -0.2032, 0.3950]) tensor([0.2423, 0.0559, 0.2490, 0.4528]) -Greedy action tensor([-0.1390, -0.2159, 0.5380, -0.2013]) tensor([0.2069, 0.1916, 0.4071, 0.1944]) -Greedy action tensor([-0.3864, -1.4605, 0.1977, -0.4287]) tensor([0.2443, 0.0835, 0.4381, 0.2342]) -Greedy action tensor([-0.1236, -0.4250, 0.0469, -0.6889]) tensor([0.2862, 0.2117, 0.3394, 0.1626]) -Greedy action tensor([-0.7378, -0.8814, -0.4805, 0.5866]) tensor([0.1445, 0.1252, 0.1869, 0.5434]) -Greedy action tensor([-0.6598, -1.3696, 0.0719, 0.8702]) tensor([0.1221, 0.0601, 0.2538, 0.5640]) -Greedy action tensor([ 0.4210, -1.7410, -0.0657, 0.7967]) tensor([0.3139, 0.0361, 0.1929, 0.4570]) -Greedy action tensor([-0.0359, 0.4278, 0.5220, 0.0200]) tensor([0.1854, 0.2947, 0.3239, 0.1960]) -Greedy action tensor([-0.7157, -0.7770, 0.2649, 0.0683]) tensor([0.1471, 0.1384, 0.3922, 0.3222]) -Greedy action tensor([1.6849, 0.3512, 0.2271, 0.2960]) tensor([0.5729, 0.1509, 0.1333, 0.1428]) -Greedy action tensor([-0.3016, -1.1496, 0.7114, 0.0689]) tensor([0.1776, 0.0761, 0.4891, 0.2573]) -Greedy action tensor([-0.5858, -0.8513, 0.2926, -0.6935]) tensor([0.1972, 0.1512, 0.4746, 0.1770]) -Greedy action tensor([-0.3088, -1.2906, -0.1475, 1.0025]) tensor([0.1597, 0.0598, 0.1877, 0.5928]) -Greedy action tensor([ 0.2544, -1.9780, 0.2689, -0.1543]) tensor([0.3589, 0.0385, 0.3641, 0.2385]) -Greedy action tensor([-0.5436, -1.0666, -0.1995, -0.3710]) tensor([0.2386, 0.1414, 0.3365, 0.2835]) -Greedy action tensor([-0.0613, 1.2128, -0.5943, -1.0423]) tensor([0.1806, 0.6457, 0.1060, 0.0677]) -Greedy action tensor([ 0.2837, -0.8823, 0.4465, 0.2950]) tensor([0.2857, 0.0890, 0.3362, 0.2890]) -Greedy action tensor([ 0.9295, 0.3067, 0.9563, -0.2095]) tensor([0.3468, 0.1860, 0.3562, 0.1110]) -Greedy action tensor([-0.2415, -0.7154, -1.0892, 0.2584]) tensor([0.2703, 0.1683, 0.1158, 0.4456]) -Greedy action tensor([ 1.4930, -1.2834, 0.1799, -0.1603]) tensor([0.6568, 0.0409, 0.1766, 0.1257]) -Greedy action tensor([-0.0822, -0.7417, 0.1554, -0.3942]) tensor([0.2843, 0.1470, 0.3606, 0.2081]) -Greedy action tensor([ 1.4716, -1.6231, 0.6379, 0.5865]) tensor([0.5284, 0.0239, 0.2296, 0.2181]) -Greedy action tensor([ 0.0047, -1.7469, 0.2440, -1.0412]) tensor([0.3578, 0.0621, 0.4545, 0.1257]) -Greedy action tensor([-0.4651, -0.4240, -0.3338, 0.3197]) tensor([0.1861, 0.1939, 0.2122, 0.4079]) -Greedy action tensor([ 0.1848, -0.2156, -0.0861, -1.1847]) tensor([0.3722, 0.2494, 0.2838, 0.0946]) -Greedy action tensor([-0.5013, -1.0027, 0.1638, -0.5378]) tensor([0.2215, 0.1342, 0.4308, 0.2136]) -Greedy action tensor([-0.3672, -1.0147, -0.6700, -0.1952]) tensor([0.2899, 0.1517, 0.2141, 0.3443]) -Greedy action tensor([ 0.0089, -1.0922, -0.1233, -0.7974]) tensor([0.3766, 0.1252, 0.3300, 0.1682]) -Greedy action tensor([-0.0735, -0.4387, 1.0148, -0.8497]) tensor([0.1952, 0.1355, 0.5795, 0.0898]) -Greedy action tensor([-0.6967, -1.5050, -0.2477, -0.9110]) tensor([0.2618, 0.1167, 0.4102, 0.2113]) -Greedy action tensor([-0.2171, -0.3960, -0.0161, 0.0908]) tensor([0.2263, 0.1892, 0.2767, 0.3079]) -Greedy action tensor([-0.4305, -1.5741, -0.1217, 1.2198]) tensor([0.1268, 0.0404, 0.1726, 0.6602]) -Greedy action tensor([ 0.1159, -1.2340, 0.2372, 0.2219]) tensor([0.2857, 0.0741, 0.3226, 0.3176]) -Greedy action tensor([-1.6930, -0.7663, 0.7887, -0.7907]) tensor([0.0557, 0.1407, 0.6663, 0.1373]) -Greedy action tensor([-0.0047, -1.1737, 1.0148, -0.8109]) tensor([0.2208, 0.0686, 0.6120, 0.0986]) -Greedy action tensor([-0.4179, 0.1388, 0.3088, 0.0016]) tensor([0.1579, 0.2755, 0.3265, 0.2401]) -Greedy action tensor([ 0.9371, -0.1328, -0.8052, 0.3659]) tensor([0.4801, 0.1647, 0.0841, 0.2712]) -Greedy action tensor([-0.4172, -0.7154, 0.0048, -0.3720]) tensor([0.2318, 0.1721, 0.3535, 0.2426]) -Greedy action tensor([ 0.7614, -0.4877, 0.1020, -0.4857]) tensor([0.4782, 0.1371, 0.2473, 0.1374]) -Greedy action tensor([1.5920, 0.1174, 0.3393, 0.0673]) tensor([0.5773, 0.1321, 0.1649, 0.1257]) -Greedy action tensor([-0.5349, -0.2253, -0.3065, -0.4109]) tensor([0.2105, 0.2868, 0.2645, 0.2382]) -Greedy action tensor([ 0.5149, -1.1218, -0.0712, -1.0702]) tensor([0.5112, 0.0995, 0.2845, 0.1048]) -Greedy action tensor([ 0.5713, -0.7012, 0.8433, -0.0198]) tensor([0.3178, 0.0890, 0.4172, 0.1760]) -Greedy action tensor([ 0.3013, 0.0209, -0.9069, -0.3102]) tensor([0.3851, 0.2909, 0.1150, 0.2089]) -Greedy action tensor([-0.4410, -1.5080, 0.7659, -0.7261]) tensor([0.1839, 0.0633, 0.6146, 0.1382]) -Greedy action tensor([ 0.8445, 0.1703, 0.8861, -0.0439]) tensor([0.3375, 0.1720, 0.3518, 0.1388]) -Greedy action tensor([ 0.1230, -0.2950, 0.4199, 0.2106]) tensor([0.2442, 0.1608, 0.3286, 0.2665]) -Greedy action tensor([ 0.4814, -0.4780, -0.1564, 0.6642]) tensor([0.3213, 0.1231, 0.1698, 0.3858]) -Greedy action tensor([-0.9629, -0.4716, 0.4233, -1.0456]) tensor([0.1324, 0.2163, 0.5294, 0.1219]) -Greedy action tensor([-0.8557, -0.7631, -1.1259, 0.0947]) tensor([0.1836, 0.2014, 0.1401, 0.4749]) -Greedy action tensor([-0.0080, -0.5829, 0.1853, -0.6040]) tensor([0.3006, 0.1692, 0.3647, 0.1656]) -Greedy action tensor([ 0.0533, -1.3838, 0.8195, -0.1491]) tensor([0.2378, 0.0565, 0.5116, 0.1942]) -Greedy action tensor([ 0.4688, 0.2287, -0.8765, 0.1965]) tensor([0.3560, 0.2801, 0.0927, 0.2712]) -Greedy action tensor([ 0.9595, -0.6767, -0.0285, -0.5451]) tensor([0.5589, 0.1088, 0.2081, 0.1241]) -Greedy action tensor([ 0.2792, 0.4234, -0.3674, -0.2727]) tensor([0.3072, 0.3549, 0.1609, 0.1769]) -Greedy action tensor([ 1.0157, -0.8197, 0.0735, -0.5773]) tensor([0.5706, 0.0910, 0.2224, 0.1160]) -Greedy action tensor([ 0.7506, -0.2179, 0.0041, -0.3209]) tensor([0.4553, 0.1729, 0.2158, 0.1560]) -Greedy action tensor([ 0.9827, -0.7503, 0.0103, -0.3539]) tensor([0.5501, 0.0972, 0.2081, 0.1446]) -Greedy action tensor([ 0.7627, -0.5900, -0.0626, -0.5555]) tensor([0.5091, 0.1316, 0.2230, 0.1362]) -Greedy action tensor([ 0.6152, -0.4100, 0.0710, -0.2244]) tensor([0.4218, 0.1513, 0.2448, 0.1822]) -Greedy action tensor([ 0.7900, -0.1707, 0.0134, -0.1473]) tensor([0.4476, 0.1712, 0.2059, 0.1753]) -Greedy action tensor([ 1.1931, -0.5541, -0.1174, -0.2171]) tensor([0.5924, 0.1032, 0.1598, 0.1446]) -Greedy action tensor([ 0.9826, -0.6826, 0.1154, -0.5303]) tensor([0.5466, 0.1034, 0.2296, 0.1204]) -Greedy action tensor([ 1.0003, -0.7600, 0.0776, -0.3467]) tensor([0.5466, 0.0940, 0.2172, 0.1421]) -Greedy action tensor([ 0.7519, -0.4995, 0.1515, -0.2023]) tensor([0.4505, 0.1289, 0.2471, 0.1735]) -Greedy action tensor([ 0.0547, 0.6448, -0.1066, -0.1501]) tensor([0.2237, 0.4036, 0.1904, 0.1823]) -Greedy action tensor([ 0.3824, 0.0945, 0.0117, -0.0777]) tensor([0.3256, 0.2442, 0.2247, 0.2055]) -Greedy action tensor([ 1.0277, -0.6262, 0.0473, -0.4359]) tensor([0.5562, 0.1064, 0.2087, 0.1287]) -Greedy action tensor([ 0.7328, -0.1316, -0.0597, -0.1667]) tensor([0.4385, 0.1847, 0.1985, 0.1783]) -Greedy action tensor([ 1.3391, -0.7307, -0.1826, -0.4276]) tensor([0.6599, 0.0833, 0.1441, 0.1128]) -Greedy action tensor([ 0.4995, -0.0358, -0.1001, -0.0823]) tensor([0.3713, 0.2174, 0.2038, 0.2075]) -Greedy action tensor([ 6.2445e-01, -3.2603e-01, 6.0678e-04, -2.5629e-01]) tensor([0.4279, 0.1654, 0.2293, 0.1774]) -Greedy action tensor([ 1.1700, -0.6858, 0.0557, -0.7427]) tensor([0.6127, 0.0958, 0.2010, 0.0905]) -Greedy action tensor([ 1.0820, -0.5728, -0.0175, -0.2688]) tensor([0.5608, 0.1072, 0.1868, 0.1453]) -Greedy action tensor([ 0.9043, -0.4837, -0.1204, -0.2466]) tensor([0.5195, 0.1297, 0.1865, 0.1644]) -Greedy action tensor([ 0.8525, -0.6517, 0.0241, -0.5395]) tensor([0.5242, 0.1165, 0.2290, 0.1303]) -Greedy action tensor([ 0.5756, -0.6246, -0.1046, -0.2652]) tensor([0.4466, 0.1345, 0.2262, 0.1927]) -Greedy action tensor([ 0.4596, 0.0948, -0.0217, -0.0059]) tensor([0.3401, 0.2362, 0.2102, 0.2135]) -Greedy action tensor([ 0.8658, -0.7493, 0.1517, -0.5119]) tensor([0.5153, 0.1025, 0.2523, 0.1299]) -Greedy action tensor([ 0.7356, -0.4310, 0.1046, -0.2186]) tensor([0.4487, 0.1397, 0.2387, 0.1728]) -Greedy action tensor([ 0.8820, -0.4353, -0.0066, -0.3977]) tensor([0.5109, 0.1369, 0.2101, 0.1421]) -Greedy action tensor([ 0.8199, -0.2299, -0.0442, -0.0574]) tensor([0.4572, 0.1600, 0.1927, 0.1901]) -Greedy action tensor([ 0.8710, -0.5865, 0.1618, -0.2312]) tensor([0.4861, 0.1132, 0.2392, 0.1615]) -Greedy action tensor([ 1.3578, -0.9678, -0.0955, -0.6741]) tensor([0.6837, 0.0668, 0.1598, 0.0896]) -Greedy action tensor([ 1.0003, -0.1366, -0.0507, -0.4219]) tensor([0.5231, 0.1678, 0.1829, 0.1262]) -Greedy action tensor([ 0.8682, -0.7456, 0.1577, -0.3622]) tensor([0.5044, 0.1004, 0.2478, 0.1474]) -Greedy action tensor([ 1.2204, -0.8211, 0.1234, -0.5527]) tensor([0.6122, 0.0795, 0.2044, 0.1040]) -Greedy action tensor([ 0.3603, -0.0968, -0.1954, 0.0120]) tensor([0.3433, 0.2174, 0.1970, 0.2423]) -Greedy action tensor([ 0.7559, -0.2468, -0.0021, -0.0896]) tensor([0.4415, 0.1620, 0.2069, 0.1896]) -Greedy action tensor([ 0.8458, -0.7930, -0.0307, -0.3208]) tensor([0.5203, 0.1011, 0.2166, 0.1620]) -Greedy action tensor([ 0.4299, -0.1824, -0.0355, -0.0981]) tensor([0.3624, 0.1964, 0.2275, 0.2137]) -Greedy action tensor([ 1.3600, -0.7323, 0.0373, -0.6220]) tensor([0.6546, 0.0808, 0.1744, 0.0902]) -Greedy action tensor([ 1.2084, -0.6499, 0.0781, -0.4581]) tensor([0.5996, 0.0935, 0.1936, 0.1133]) -Greedy action tensor([ 0.6002, -0.3791, 0.0380, -0.2607]) tensor([0.4222, 0.1586, 0.2407, 0.1785]) -Greedy action tensor([ 0.2309, 0.2391, -0.0622, -0.1848]) tensor([0.2929, 0.2953, 0.2185, 0.1933]) -Greedy action tensor([ 0.6113, -0.4282, 0.0820, -0.2224]) tensor([0.4207, 0.1488, 0.2478, 0.1828]) -Greedy action tensor([ 0.4565, -0.5110, -0.1004, -0.1349]) tensor([0.3990, 0.1516, 0.2286, 0.2208]) -Greedy action tensor([ 0.8690, -0.7280, -0.0470, -0.3515]) tensor([0.5270, 0.1067, 0.2108, 0.1555]) -Greedy action tensor([ 0.4904, 0.0642, -0.1769, -0.0387]) tensor([0.3629, 0.2370, 0.1862, 0.2138]) -Greedy action tensor([ 0.8106, -0.3935, -0.0151, -0.4803]) tensor([0.4968, 0.1490, 0.2176, 0.1366]) -Greedy action tensor([ 0.9919, -0.4890, -0.0807, -0.3393]) tensor([0.5454, 0.1240, 0.1866, 0.1441]) -Greedy action tensor([ 0.8625, -0.9801, 0.1187, -0.4307]) tensor([0.5241, 0.0830, 0.2491, 0.1438]) -Greedy action tensor([ 1.3224, -0.8471, 0.1610, -0.6600]) tensor([0.6390, 0.0730, 0.2000, 0.0880]) -Greedy action tensor([ 0.4531, 0.0411, -0.1587, -0.0240]) tensor([0.3540, 0.2344, 0.1920, 0.2196]) -Greedy action tensor([ 1.5244, -0.9552, 0.0559, -0.7676]) tensor([0.7067, 0.0592, 0.1627, 0.0714]) -Greedy action tensor([ 0.4989, -0.2902, -0.1643, -0.3542]) tensor([0.4174, 0.1896, 0.2151, 0.1779]) -Greedy action tensor([ 0.8304, -0.6452, -0.1562, -0.2741]) tensor([0.5174, 0.1183, 0.1929, 0.1714]) -Greedy action tensor([ 1.1134, -0.4821, 0.0481, -0.1374]) tensor([0.5453, 0.1106, 0.1879, 0.1561]) -Greedy action tensor([ 0.6928, -0.2171, 0.0490, -0.1749]) tensor([0.4259, 0.1715, 0.2237, 0.1789]) -Greedy action tensor([ 0.9527, -0.2486, -0.1086, -0.3910]) tensor([0.5242, 0.1577, 0.1814, 0.1368]) -Greedy action tensor([ 0.0947, 0.1908, -0.1015, 0.0848]) tensor([0.2556, 0.2813, 0.2100, 0.2531]) -Greedy action tensor([ 0.5719, -0.1398, -0.0039, -0.1213]) tensor([0.3917, 0.1923, 0.2202, 0.1958]) -Greedy action tensor([ 0.7838, -0.5844, -0.0227, -0.3389]) tensor([0.4935, 0.1256, 0.2203, 0.1606]) -Greedy action tensor([ 0.5578, -0.5733, -0.1300, -0.2630]) tensor([0.4414, 0.1424, 0.2219, 0.1943]) -Greedy action tensor([ 0.6692, -0.2963, 0.0364, -0.4732]) tensor([0.4482, 0.1707, 0.2381, 0.1430]) -Greedy action tensor([ 0.3848, 0.0351, 0.0212, -0.0312]) tensor([0.3268, 0.2304, 0.2272, 0.2156]) -Greedy action tensor([ 0.8691, -0.3332, -0.1775, -0.2074]) tensor([0.5019, 0.1508, 0.1762, 0.1710]) -Greedy action tensor([ 0.3198, 0.3283, 0.1375, -0.2334]) tensor([0.2927, 0.2951, 0.2439, 0.1683]) -Greedy action tensor([ 0.6295, -0.2482, -0.0160, -0.1155]) tensor([0.4141, 0.1722, 0.2172, 0.1966]) -Greedy action tensor([ 0.5309, -0.2948, -0.0211, -0.3737]) tensor([0.4135, 0.1811, 0.2381, 0.1673]) -Greedy action tensor([ 0.5830, -0.1393, 0.0228, -0.1882]) tensor([0.3970, 0.1928, 0.2267, 0.1836]) -Greedy action tensor([ 0.9288, -0.7135, -0.1622, -0.3880]) tensor([0.5564, 0.1077, 0.1869, 0.1491]) -Greedy action tensor([ 9.9507e-01, -5.5005e-01, 5.8937e-04, -4.3515e-01]) tensor([0.5487, 0.1170, 0.2030, 0.1313]) -Greedy action tensor([ 0.7351, 0.0128, -0.1165, 0.0466]) tensor([0.4141, 0.2011, 0.1767, 0.2080]) -Greedy action tensor([ 0.3013, 0.0921, -0.0303, -0.0687]) tensor([0.3106, 0.2520, 0.2229, 0.2145]) -Greedy action tensor([ 5.7511e-01, -3.1209e-01, -4.1631e-04, -7.8569e-02]) tensor([0.4009, 0.1651, 0.2255, 0.2085]) -Greedy action tensor([ 0.9036, -0.4425, -0.1558, -0.2412]) tensor([0.5194, 0.1352, 0.1801, 0.1653]) -Greedy action tensor([ 0.6474, -0.5174, 0.0072, -0.3262]) tensor([0.4511, 0.1407, 0.2378, 0.1704]) -Greedy action tensor([ 1.0023, -0.2343, -0.1429, -0.2183]) tensor([0.5253, 0.1525, 0.1671, 0.1550]) -Greedy action tensor([ 0.6760, -0.4262, -0.1361, -0.4447]) tensor([0.4757, 0.1580, 0.2112, 0.1551]) -Greedy action tensor([ 0.2669, -0.1167, -0.1952, 0.0693]) tensor([0.3193, 0.2176, 0.2011, 0.2620]) -Greedy action tensor([ 0.8338, -0.2384, 0.0651, -0.0782]) tensor([0.4530, 0.1550, 0.2100, 0.1820]) -Greedy action tensor([ 0.6302, 0.2652, -0.1458, 0.1842]) tensor([0.3578, 0.2484, 0.1647, 0.2291]) -Greedy action tensor([ 1.2024, -0.7297, 0.0924, -0.4451]) tensor([0.5999, 0.0869, 0.1977, 0.1155]) -Greedy action tensor([ 1.4541, -0.4063, -0.7077, 0.3208]) tensor([0.6279, 0.0977, 0.0723, 0.2022]) -Greedy action tensor([ 1.1685, -0.6415, -0.3170, 0.4446]) tensor([0.5334, 0.0873, 0.1207, 0.2586]) -Greedy action tensor([ 1.9255, -1.0938, -0.6915, 0.3419]) tensor([0.7535, 0.0368, 0.0550, 0.1547]) -Greedy action tensor([ 1.1655, -0.0499, -0.5210, 0.0670]) tensor([0.5509, 0.1634, 0.1020, 0.1837]) -Greedy action tensor([ 1.6301, -0.6015, -0.1878, 0.4120]) tensor([0.6388, 0.0686, 0.1037, 0.1889]) -Greedy action tensor([ 1.3746, -0.0057, -0.3358, 0.0785]) tensor([0.5862, 0.1474, 0.1060, 0.1604]) -Greedy action tensor([ 1.7860, -1.1578, -0.4887, 0.1487]) tensor([0.7407, 0.0390, 0.0762, 0.1441]) -Greedy action tensor([ 1.4795, -0.5309, -0.7479, 0.1679]) tensor([0.6618, 0.0886, 0.0713, 0.1783]) -Greedy action tensor([ 1.5882, -1.0797, -0.1791, 0.9168]) tensor([0.5711, 0.0396, 0.0975, 0.2918]) -Greedy action tensor([ 1.7912, -0.6158, -0.4057, 0.1226]) tensor([0.7196, 0.0648, 0.0800, 0.1356]) -Greedy action tensor([ 1.6725, -0.6289, -1.0015, 0.6851]) tensor([0.6487, 0.0649, 0.0447, 0.2416]) -Greedy action tensor([ 2.1539, -1.4996, -0.2425, 1.0743]) tensor([0.6865, 0.0178, 0.0625, 0.2332]) -Greedy action tensor([ 2.0006, -0.7114, -0.0287, 0.0030]) tensor([0.7499, 0.0498, 0.0986, 0.1017]) -Greedy action tensor([ 1.5183, -0.6971, -0.0690, 0.6783]) tensor([0.5730, 0.0625, 0.1172, 0.2474]) -Greedy action tensor([ 1.5473, -0.7857, -0.5493, -0.2911]) tensor([0.7252, 0.0703, 0.0891, 0.1154]) -Greedy action tensor([ 1.4694, -0.6130, -0.7816, 0.2342]) tensor([0.6576, 0.0820, 0.0692, 0.1912]) -Greedy action tensor([ 1.2416, -0.2346, -0.3688, 0.1569]) tensor([0.5662, 0.1294, 0.1131, 0.1914]) -Greedy action tensor([ 1.6882, -0.0861, -0.4413, 0.5444]) tensor([0.6222, 0.1055, 0.0740, 0.1982]) -Greedy action tensor([ 1.4845, -0.2140, -0.7932, 0.2312]) tensor([0.6365, 0.1165, 0.0653, 0.1818]) -Greedy action tensor([ 2.9361, -2.0479, 0.0128, 0.7319]) tensor([0.8540, 0.0058, 0.0459, 0.0942]) -Greedy action tensor([ 1.9878, -0.3467, -0.8233, 0.3605]) tensor([0.7389, 0.0716, 0.0444, 0.1451]) -Greedy action tensor([ 1.3060, -0.2378, -0.1388, 0.1583]) tensor([0.5660, 0.1209, 0.1335, 0.1796]) -Greedy action tensor([ 1.8054, -1.0402, -0.8476, -0.2540]) tensor([0.7961, 0.0463, 0.0561, 0.1015]) -Greedy action tensor([ 1.1788, -0.4936, -0.4721, 0.1630]) tensor([0.5741, 0.1078, 0.1102, 0.2079]) -Greedy action tensor([ 1.2965, -0.3868, -0.0831, 0.1945]) tensor([0.5651, 0.1050, 0.1422, 0.1877]) -Greedy action tensor([ 1.4240, -0.5331, -0.5107, 0.9256]) tensor([0.5282, 0.0746, 0.0763, 0.3209]) -Greedy action tensor([ 1.2511, -0.3976, -0.8233, 0.8241]) tensor([0.5075, 0.0976, 0.0638, 0.3311]) -Greedy action tensor([ 1.4375, 0.4104, -0.5852, 0.2878]) tensor([0.5534, 0.1981, 0.0732, 0.1753]) -Greedy action tensor([ 2.0085, -0.3266, -0.7944, 0.6651]) tensor([0.7050, 0.0683, 0.0428, 0.1840]) -Greedy action tensor([ 0.7086, 0.0197, -0.1315, -0.0854]) tensor([0.4192, 0.2105, 0.1809, 0.1895]) -Greedy action tensor([ 1.4205, -0.5137, -0.0654, 0.7455]) tensor([0.5319, 0.0769, 0.1204, 0.2708]) -Greedy action tensor([ 1.3397, -0.4232, -0.2690, 0.2333]) tensor([0.5874, 0.1008, 0.1176, 0.1943]) -Greedy action tensor([ 1.2421, -0.2263, -0.5501, 0.1466]) tensor([0.5776, 0.1330, 0.0962, 0.1931]) -Greedy action tensor([ 1.2981, -0.5137, -1.0492, 0.1183]) tensor([0.6384, 0.1043, 0.0610, 0.1962]) -Greedy action tensor([ 1.2246, -0.4043, -0.5913, 0.3751]) tensor([0.5598, 0.1098, 0.0911, 0.2394]) -Greedy action tensor([ 1.4337, -0.7905, -0.3548, 0.6549]) tensor([0.5766, 0.0624, 0.0964, 0.2646]) -Greedy action tensor([ 2.1212, -0.4681, -1.2499, 0.5095]) tensor([0.7640, 0.0574, 0.0262, 0.1524]) -Greedy action tensor([ 1.9039, -0.6793, -0.0939, 0.4901]) tensor([0.6876, 0.0519, 0.0933, 0.1672]) -Greedy action tensor([ 1.2917, -0.2931, -0.5210, 0.1632]) tensor([0.5911, 0.1212, 0.0965, 0.1912]) -Greedy action tensor([ 1.1928, -0.6242, -0.5194, 0.0975]) tensor([0.5961, 0.0969, 0.1076, 0.1994]) -Greedy action tensor([ 0.8784, -0.1641, -0.7256, 0.1291]) tensor([0.4935, 0.1740, 0.0992, 0.2333]) -Greedy action tensor([ 1.1411, -0.0125, -0.4425, 0.5432]) tensor([0.4829, 0.1524, 0.0991, 0.2656]) -Greedy action tensor([ 1.4960, -0.6434, -0.5363, 0.0601]) tensor([0.6726, 0.0792, 0.0881, 0.1600]) -Greedy action tensor([ 1.4948, -0.7855, -0.1575, 0.5778]) tensor([0.5905, 0.0604, 0.1131, 0.2360]) -Greedy action tensor([ 1.5672, 0.2683, -0.2115, -0.1588]) tensor([0.6174, 0.1684, 0.1043, 0.1099]) -Greedy action tensor([ 1.3952, -0.3608, -0.7308, 0.5595]) tensor([0.5795, 0.1001, 0.0691, 0.2512]) -Greedy action tensor([ 2.4754, -1.7311, 0.0243, 0.6364]) tensor([0.7936, 0.0118, 0.0684, 0.1262]) -Greedy action tensor([ 1.0295, -0.5118, -0.4808, 0.5832]) tensor([0.4819, 0.1032, 0.1064, 0.3085]) -Greedy action tensor([ 1.4515, 0.2464, -1.1134, 0.3179]) tensor([0.5888, 0.1764, 0.0453, 0.1895]) -Greedy action tensor([ 1.6460, -0.5815, -0.3670, 0.5811]) tensor([0.6305, 0.0680, 0.0842, 0.2173]) -Greedy action tensor([ 1.6079, -0.7815, -0.1231, 0.1171]) tensor([0.6693, 0.0614, 0.1186, 0.1507]) -Greedy action tensor([ 1.4421, 0.0821, -1.3747, 0.4454]) tensor([0.5933, 0.1523, 0.0355, 0.2190]) -Greedy action tensor([ 0.7636, -0.3027, 0.4037, 0.1342]) tensor([0.3884, 0.1337, 0.2710, 0.2070]) -Greedy action tensor([ 1.6118, -0.4496, -0.7572, 0.1426]) tensor([0.6892, 0.0877, 0.0645, 0.1586]) -Greedy action tensor([ 1.0531, -0.3097, -0.1944, -0.0694]) tensor([0.5352, 0.1370, 0.1537, 0.1742]) -Greedy action tensor([ 1.9598, -0.8935, -0.0951, 0.1530]) tensor([0.7408, 0.0427, 0.0949, 0.1216]) -Greedy action tensor([ 1.2736, -0.5593, 0.3519, 0.1937]) tensor([0.5270, 0.0843, 0.2097, 0.1790]) -Greedy action tensor([ 1.3494, -0.5601, -0.2281, 0.2488]) tensor([0.5926, 0.0878, 0.1224, 0.1972]) -Greedy action tensor([ 1.6143, -0.5297, -0.2142, 0.5867]) tensor([0.6114, 0.0716, 0.0982, 0.2188]) -Greedy action tensor([ 1.4863, -0.5597, -1.0635, 0.2357]) tensor([0.6695, 0.0865, 0.0523, 0.1917]) -Greedy action tensor([ 1.8974, -0.5375, -0.5586, 0.4625]) tensor([0.7085, 0.0621, 0.0608, 0.1687]) -Greedy action tensor([ 1.8528, -0.9930, -0.2464, 0.1639]) tensor([0.7324, 0.0425, 0.0898, 0.1353]) -Greedy action tensor([ 1.3825, -0.5691, -0.3762, 0.5629]) tensor([0.5698, 0.0809, 0.0982, 0.2511]) -Greedy action tensor([ 1.6207, -0.6339, 0.0420, -0.0709]) tensor([0.6687, 0.0702, 0.1379, 0.1232]) -Greedy action tensor([ 1.6864, -1.1541, -0.4421, 0.5887]) tensor([0.6618, 0.0386, 0.0788, 0.2208]) -Greedy action tensor([ 1.0374, -0.1323, -0.0276, 0.0704]) tensor([0.4913, 0.1525, 0.1694, 0.1868]) -Greedy action tensor([ 1.5928, -0.6856, -0.2656, -0.0597]) tensor([0.6897, 0.0707, 0.1075, 0.1321]) -Greedy action tensor([ 1.3194, -0.4002, -0.1274, 0.2508]) tensor([0.5688, 0.1019, 0.1339, 0.1954]) -Greedy action tensor([ 1.1687, -0.4010, -0.3047, 0.8150]) tensor([0.4674, 0.0973, 0.1071, 0.3282]) -Greedy action tensor([ 0.9437, -0.1662, -0.5099, 0.2009]) tensor([0.4904, 0.1616, 0.1146, 0.2333]) -Greedy action tensor([ 1.4311, -0.1483, -0.5510, -0.1998]) tensor([0.6495, 0.1339, 0.0895, 0.1271]) -Greedy action tensor([ 1.3155, -0.1869, -0.5830, 0.0104]) tensor([0.6085, 0.1354, 0.0911, 0.1650]) -Greedy action tensor([ 1.4911, 0.1531, -0.8468, 0.5570]) tensor([0.5708, 0.1498, 0.0551, 0.2243]) -Greedy action tensor([ 1.6772, -0.7943, -0.2063, 0.3495]) tensor([0.6660, 0.0562, 0.1013, 0.1765]) -Greedy action tensor([ 1.5869, -0.8913, -0.1788, 0.5226]) tensor([0.6250, 0.0524, 0.1069, 0.2156]) -Greedy action tensor([ 1.6355, 0.0021, -0.4633, -0.1625]) tensor([0.6741, 0.1316, 0.0826, 0.1116]) -Greedy action tensor([ 1.4018, -0.6862, -0.2471, 0.2580]) tensor([0.6117, 0.0758, 0.1176, 0.1949]) -Greedy action tensor([ 1.4774, -0.3170, -0.2026, 0.0078]) tensor([0.6319, 0.1050, 0.1178, 0.1454]) -Greedy action tensor([ 1.6348, -0.3621, -0.4023, 0.2717]) tensor([0.6570, 0.0892, 0.0857, 0.1681]) -Greedy action tensor([ 1.3053, -0.2191, -0.6306, 0.2240]) tensor([0.5878, 0.1280, 0.0848, 0.1994]) -Greedy action tensor([ 1.1362, -0.8608, -0.2445, 0.8162]) tensor([0.4732, 0.0642, 0.1190, 0.3436]) -Greedy action tensor([-0.8973, -0.4026, 0.3832, 0.5494]) tensor([0.0954, 0.1564, 0.3431, 0.4051]) -Greedy action tensor([-1.8489, -0.3558, 0.6168, -0.1705]) tensor([0.0443, 0.1971, 0.5213, 0.2373]) -Greedy action tensor([-1.8753, -0.4557, 0.6412, -0.1424]) tensor([0.0431, 0.1784, 0.5343, 0.2441]) -Greedy action tensor([-1.9346, -0.4430, 0.6638, -0.1747]) tensor([0.0405, 0.1799, 0.5443, 0.2353]) -Greedy action tensor([-1.6467, -0.3311, 0.6862, 0.1547]) tensor([0.0474, 0.1767, 0.4887, 0.2872]) -Greedy action tensor([-1.5441, -0.5746, 0.4683, -0.0221]) tensor([0.0637, 0.1679, 0.4766, 0.2918]) -Greedy action tensor([-1.2137, 0.0049, 0.1712, 0.1231]) tensor([0.0821, 0.2776, 0.3278, 0.3125]) -Greedy action tensor([-1.6738, -0.2463, 0.5079, -0.0876]) tensor([0.0529, 0.2204, 0.4685, 0.2583]) -Greedy action tensor([-1.8336, -0.2710, 0.5986, -0.1164]) tensor([0.0440, 0.2100, 0.5010, 0.2451]) -Greedy action tensor([-1.9099, -0.4412, 0.6485, -0.1641]) tensor([0.0417, 0.1811, 0.5384, 0.2389]) -Greedy action tensor([-1.9474, -0.4521, 0.6681, -0.1821]) tensor([0.0400, 0.1786, 0.5475, 0.2339]) -Greedy action tensor([-1.8755, -0.4125, 0.6293, -0.1468]) tensor([0.0431, 0.1862, 0.5278, 0.2429]) -Greedy action tensor([-1.9376, -0.4546, 0.6619, -0.1789]) tensor([0.0405, 0.1786, 0.5455, 0.2353]) -Greedy action tensor([-1.8000, -0.5005, 0.5954, -0.1253]) tensor([0.0477, 0.1748, 0.5231, 0.2544]) -Greedy action tensor([-1.8871, -0.4162, 0.6249, -0.1410]) tensor([0.0427, 0.1859, 0.5266, 0.2448]) -Greedy action tensor([0.5369, 0.8615, 0.4224, 1.0938]) tensor([0.1992, 0.2756, 0.1776, 0.3476]) -Greedy action tensor([-1.8829, -0.4498, 0.6394, -0.1530]) tensor([0.0429, 0.1800, 0.5349, 0.2422]) -Greedy action tensor([-1.9052, -0.4334, 0.6463, -0.1616]) tensor([0.0418, 0.1823, 0.5366, 0.2392]) -Greedy action tensor([-1.7619, -0.4384, 0.5772, -0.0882]) tensor([0.0489, 0.1836, 0.5069, 0.2606]) -Greedy action tensor([-1.8502, -0.4135, 0.6158, -0.1385]) tensor([0.0444, 0.1868, 0.5229, 0.2459]) -Greedy action tensor([-1.8246, -0.4668, 0.6115, -0.1312]) tensor([0.0460, 0.1787, 0.5253, 0.2500]) -Greedy action tensor([-1.8149, -0.4695, 0.6086, -0.1197]) tensor([0.0464, 0.1780, 0.5231, 0.2525]) -Greedy action tensor([-1.6088, -0.3557, 0.7126, 0.2990]) tensor([0.0467, 0.1634, 0.4755, 0.3144]) -Greedy action tensor([-1.9051, -0.3928, 0.6469, -0.1574]) tensor([0.0415, 0.1882, 0.5322, 0.2381]) -Greedy action tensor([ 0.0824, -0.5801, 0.1224, 0.4631]) tensor([0.2488, 0.1283, 0.2589, 0.3640]) -Greedy action tensor([-1.1609, -0.5297, 0.7613, 0.5937]) tensor([0.0645, 0.1213, 0.4411, 0.3731]) -Greedy action tensor([-1.8810, -0.4561, 0.6371, -0.1520]) tensor([0.0431, 0.1792, 0.5348, 0.2429]) -Greedy action tensor([-1.8844, -0.4097, 0.6244, -0.1416]) tensor([0.0428, 0.1870, 0.5258, 0.2444]) -Greedy action tensor([-1.7779, -0.4880, 0.5743, -0.0993]) tensor([0.0488, 0.1772, 0.5126, 0.2614]) -Greedy action tensor([-1.6473, -0.4789, 0.5354, -0.0866]) tensor([0.0560, 0.1802, 0.4969, 0.2668]) -Greedy action tensor([-1.9045, -0.4479, 0.6467, -0.1503]) tensor([0.0419, 0.1796, 0.5367, 0.2419]) -Greedy action tensor([-1.9079, -0.4432, 0.6489, -0.1634]) tensor([0.0418, 0.1807, 0.5385, 0.2390]) -Greedy action tensor([-1.5890, 0.0079, 0.4456, -0.0805]) tensor([0.0552, 0.2727, 0.4225, 0.2496]) -Greedy action tensor([-1.7949, -0.5000, 0.5932, -0.1223]) tensor([0.0479, 0.1749, 0.5220, 0.2552]) -Greedy action tensor([-1.4669, -0.4750, 0.4730, -0.1146]) tensor([0.0689, 0.1857, 0.4792, 0.2663]) -Greedy action tensor([-1.8276, -0.3917, 0.6016, -0.1268]) tensor([0.0454, 0.1908, 0.5152, 0.2487]) -Greedy action tensor([-1.9343, -0.4433, 0.6638, -0.1744]) tensor([0.0405, 0.1799, 0.5442, 0.2354]) -Greedy action tensor([-1.2259, -0.0530, 0.2996, 0.0203]) tensor([0.0813, 0.2626, 0.3736, 0.2826]) -Greedy action tensor([-1.5688, -0.1030, 0.4731, -0.1124]) tensor([0.0577, 0.2500, 0.4447, 0.2476]) -Greedy action tensor([-1.9127, -0.4271, 0.6527, -0.1642]) tensor([0.0414, 0.1828, 0.5381, 0.2377]) -Greedy action tensor([-1.3691, -0.4273, 0.7126, -0.0172]) tensor([0.0647, 0.1660, 0.5191, 0.2502]) -Greedy action tensor([-1.8885, -0.3150, 0.6352, -0.1946]) tensor([0.0421, 0.2032, 0.5255, 0.2292]) -Greedy action tensor([-1.7899, -0.2023, 0.3292, -0.1623]) tensor([0.0518, 0.2534, 0.4311, 0.2637]) -Greedy action tensor([-1.8970, -0.4326, 0.6577, -0.1539]) tensor([0.0418, 0.1809, 0.5382, 0.2391]) -Greedy action tensor([-1.7458, 0.4039, 0.5229, -0.2425]) tensor([0.0421, 0.3614, 0.4071, 0.1894]) -Greedy action tensor([-0.7677, 0.8820, 0.1681, -0.1198]) tensor([0.0938, 0.4880, 0.2390, 0.1792]) -Greedy action tensor([-1.8426, -0.0880, 0.5606, -0.1301]) tensor([0.0428, 0.2472, 0.4730, 0.2370]) -Greedy action tensor([-1.5363, -0.2760, 0.5318, 0.0180]) tensor([0.0582, 0.2054, 0.4607, 0.2756]) -Greedy action tensor([-1.8283, -0.4407, 0.6080, -0.1257]) tensor([0.0456, 0.1827, 0.5214, 0.2503]) -Greedy action tensor([-1.8163, -0.4905, 0.6046, -0.1396]) tensor([0.0468, 0.1762, 0.5268, 0.2503]) -Greedy action tensor([-1.8587, -0.3402, 0.6046, -0.1432]) tensor([0.0437, 0.1996, 0.5135, 0.2431]) -Greedy action tensor([-1.7606, -0.4967, 0.5707, -0.0939]) tensor([0.0497, 0.1759, 0.5114, 0.2631]) -Greedy action tensor([-0.9505, -0.6343, 0.3077, -0.0764]) tensor([0.1207, 0.1655, 0.4246, 0.2892]) -Greedy action tensor([ 0.1243, 1.0501, -0.0489, 0.1733]) tensor([0.1847, 0.4661, 0.1553, 0.1939]) -Greedy action tensor([-1.8040, -0.3892, 0.6193, -0.0915]) tensor([0.0456, 0.1876, 0.5142, 0.2526]) -Greedy action tensor([-1.7203, -0.3852, 0.5552, -0.0751]) tensor([0.0507, 0.1928, 0.4937, 0.2628]) -Greedy action tensor([-1.8589, -0.4115, 0.6478, -0.1099]) tensor([0.0430, 0.1828, 0.5272, 0.2471]) -Greedy action tensor([-1.5634, 0.0058, 0.4138, -0.0704]) tensor([0.0572, 0.2748, 0.4133, 0.2547]) -Greedy action tensor([-1.6624, -0.1942, 0.6059, 0.0118]) tensor([0.0492, 0.2134, 0.4751, 0.2623]) -Greedy action tensor([-1.9427, -0.4469, 0.6679, -0.1779]) tensor([0.0401, 0.1792, 0.5462, 0.2345]) -Greedy action tensor([-1.9215, -0.4658, 0.6127, -0.2125]) tensor([0.0427, 0.1831, 0.5383, 0.2359]) -Greedy action tensor([-0.7309, 0.8317, 0.1174, -0.0218]) tensor([0.0986, 0.4706, 0.2304, 0.2004]) -Greedy action tensor([-1.7329, -0.2260, 0.5749, -0.0423]) tensor([0.0476, 0.2150, 0.4790, 0.2584]) -Greedy action tensor([-1.4716, -0.5709, 0.4428, -0.0219]) tensor([0.0689, 0.1697, 0.4676, 0.2938]) -Greedy action tensor([-1.9380, -0.4437, 0.6651, -0.1768]) tensor([0.0404, 0.1798, 0.5450, 0.2348]) -Greedy action tensor([-1.4816, -0.3225, 0.3843, 0.0343]) tensor([0.0658, 0.2096, 0.4251, 0.2995]) -Greedy action tensor([-1.8510, -0.4413, 0.6228, -0.1338]) tensor([0.0444, 0.1817, 0.5267, 0.2472]) -Greedy action tensor([-1.7687, -0.2568, 0.5949, -0.0708]) tensor([0.0462, 0.2097, 0.4915, 0.2526]) -Greedy action tensor([-1.8285, -0.1411, 0.5737, -0.1081]) tensor([0.0434, 0.2346, 0.4795, 0.2425]) -Greedy action tensor([-1.9025, -0.4213, 0.6490, -0.1573]) tensor([0.0418, 0.1836, 0.5355, 0.2391]) -Greedy action tensor([-1.7806, -0.4618, 0.5872, -0.0971]) tensor([0.0481, 0.1798, 0.5132, 0.2589]) -Greedy action tensor([-1.7759, -0.3282, 0.5483, -0.0780]) tensor([0.0478, 0.2032, 0.4881, 0.2609]) -Greedy action tensor([-1.9328, -0.4522, 0.6652, -0.1732]) tensor([0.0406, 0.1784, 0.5453, 0.2358]) -Greedy action tensor([-1.9221, -0.4296, 0.6557, -0.1691]) tensor([0.0410, 0.1824, 0.5399, 0.2367]) -Greedy action tensor([-1.8257, -0.3989, 0.6129, -0.2088]) tensor([0.0462, 0.1923, 0.5289, 0.2326]) -Greedy action tensor([-1.8854, -0.2928, 0.6212, -0.1469]) tensor([0.0419, 0.2060, 0.5138, 0.2383]) -Greedy action tensor([-1.8718, -0.4256, 0.6338, -0.1360]) tensor([0.0432, 0.1833, 0.5287, 0.2448]) -Greedy action tensor([-1.8609, -0.4553, 0.6314, -0.1333]) tensor([0.0439, 0.1789, 0.5303, 0.2469]) -Greedy action tensor([-1.9261, -0.4332, 0.6571, -0.1717]) tensor([0.0409, 0.1819, 0.5411, 0.2362]) -Greedy action tensor([-1.7292, -0.5113, 0.4809, -0.2987]) tensor([0.0566, 0.1912, 0.5157, 0.2365]) -Greedy action tensor([-1.9038, -0.4196, 0.6431, -0.1611]) tensor([0.0419, 0.1846, 0.5344, 0.2391]) -Greedy action tensor([-1.1540, -0.9545, -0.0164, 0.3666]) tensor([0.1009, 0.1231, 0.3146, 0.4614]) -Greedy action tensor([-0.4770, -0.0642, -0.7850, 0.5469]) tensor([0.1658, 0.2506, 0.1219, 0.4617]) -Greedy action tensor([-0.3562, -0.8216, 0.2092, -0.3188]) tensor([0.2259, 0.1419, 0.3977, 0.2345]) -Greedy action tensor([ 0.5778, -1.1848, 0.6238, 0.1095]) tensor([0.3515, 0.0603, 0.3681, 0.2201]) -Greedy action tensor([-0.7704, -1.1091, 0.0178, 0.0864]) tensor([0.1596, 0.1137, 0.3509, 0.3758]) -Greedy action tensor([-0.5397, -0.5546, -0.2595, -0.8737]) tensor([0.2485, 0.2448, 0.3288, 0.1779]) -Greedy action tensor([-0.2834, -0.5991, -0.1552, -0.6409]) tensor([0.2805, 0.2045, 0.3188, 0.1962]) -Greedy action tensor([-0.3142, -0.7746, -0.4707, 0.2963]) tensor([0.2311, 0.1458, 0.1976, 0.4255]) -Greedy action tensor([ 0.6529, -0.4706, -0.0188, -0.4530]) tensor([0.4615, 0.1500, 0.2358, 0.1527]) -Greedy action tensor([-0.4822, 0.0033, 0.2175, -0.5753]) tensor([0.1802, 0.2928, 0.3628, 0.1642]) -Greedy action tensor([-1.2862, -1.1642, 0.5435, -1.2839]) tensor([0.1068, 0.1206, 0.6655, 0.1070]) -Greedy action tensor([-0.7035, -1.2832, 1.3661, -1.0920]) tensor([0.0984, 0.0551, 0.7797, 0.0667]) -Greedy action tensor([ 0.3541, -0.1106, -0.9194, 1.3423]) tensor([0.2176, 0.1368, 0.0609, 0.5847]) -Greedy action tensor([ 0.1462, -0.1610, 0.8501, -0.9892]) tensor([0.2452, 0.1803, 0.4957, 0.0788]) -Greedy action tensor([ 0.5819, -0.7235, -0.5568, -0.1716]) tensor([0.4850, 0.1315, 0.1553, 0.2283]) -Greedy action tensor([ 0.7197, -0.8348, -0.8738, -0.1611]) tensor([0.5468, 0.1155, 0.1111, 0.2266]) -Greedy action tensor([-0.1428, -0.5433, 0.8192, -0.2392]) tensor([0.1925, 0.1290, 0.5037, 0.1748]) -Greedy action tensor([-0.0093, -0.5355, -0.9146, -0.1009]) tensor([0.3439, 0.2032, 0.1391, 0.3138]) -Greedy action tensor([ 0.3736, -0.3704, -0.2306, 0.4586]) tensor([0.3215, 0.1528, 0.1757, 0.3500]) -Greedy action tensor([ 0.4477, 0.3372, -0.6217, -0.3655]) tensor([0.3729, 0.3338, 0.1280, 0.1653]) -Greedy action tensor([-0.1311, -0.7805, -0.4819, -0.7455]) tensor([0.3613, 0.1888, 0.2544, 0.1955]) -Greedy action tensor([-0.1538, -0.8095, -0.0115, -1.1460]) tensor([0.3287, 0.1706, 0.3789, 0.1219]) -Greedy action tensor([ 0.7685, -1.2881, -0.4426, 0.2832]) tensor([0.4899, 0.0627, 0.1459, 0.3016]) -Greedy action tensor([ 0.9463, 0.5441, -0.4126, -0.5361]) tensor([0.4645, 0.3107, 0.1194, 0.1055]) -Greedy action tensor([ 0.5335, -0.8547, 0.2653, 0.8152]) tensor([0.2994, 0.0747, 0.2290, 0.3969]) -Greedy action tensor([-1.3001, 0.2678, -1.2179, -0.5506]) tensor([0.1111, 0.5331, 0.1207, 0.2352]) -Greedy action tensor([ 1.6749, -0.7283, 0.8869, 0.7691]) tensor([0.5130, 0.0464, 0.2333, 0.2074]) -Greedy action tensor([-1.7867, -0.2033, 0.2007, -1.6697]) tensor([0.0700, 0.3409, 0.5105, 0.0787]) -Greedy action tensor([-0.2487, 0.3630, 0.2194, -1.0721]) tensor([0.2049, 0.3778, 0.3273, 0.0900]) -Greedy action tensor([ 0.4872, -0.1710, -0.0424, -0.8990]) tensor([0.4243, 0.2197, 0.2499, 0.1061]) -Greedy action tensor([-1.0354, -0.4961, -0.6458, -0.9155]) tensor([0.1880, 0.3224, 0.2776, 0.2120]) -Greedy action tensor([-0.9652, -0.6222, -0.3440, -0.2268]) tensor([0.1572, 0.2215, 0.2925, 0.3289]) -Greedy action tensor([-0.0275, -0.0521, 0.2920, -0.8198]) tensor([0.2628, 0.2564, 0.3617, 0.1190]) -Greedy action tensor([-1.1039, -0.4150, 0.7966, -0.6910]) tensor([0.0893, 0.1779, 0.5977, 0.1350]) -Greedy action tensor([-0.3883, -0.2734, -0.2276, -0.9502]) tensor([0.2586, 0.2901, 0.3037, 0.1475]) -Greedy action tensor([ 0.6926, -0.5814, -1.8368, 0.4621]) tensor([0.4643, 0.1299, 0.0370, 0.3688]) -Greedy action tensor([ 1.0549, -1.5346, -0.2784, -0.6849]) tensor([0.6604, 0.0496, 0.1741, 0.1159]) -Greedy action tensor([ 0.8087, -0.0264, 0.7787, 0.6438]) tensor([0.3075, 0.1334, 0.2984, 0.2607]) -Greedy action tensor([ 0.4544, -0.7653, 0.6132, 0.0886]) tensor([0.3163, 0.0934, 0.3708, 0.2194]) -Greedy action tensor([ 0.7531, -0.4673, -0.4855, -0.1706]) tensor([0.5045, 0.1489, 0.1462, 0.2003]) -Greedy action tensor([-1.4071, -0.6381, 1.4125, -0.9760]) tensor([0.0466, 0.1005, 0.7812, 0.0717]) -Greedy action tensor([ 1.1601, -0.4060, 0.1515, 0.5580]) tensor([0.4714, 0.0985, 0.1719, 0.2582]) -Greedy action tensor([-0.2440, -1.0582, 0.8819, 0.0328]) tensor([0.1711, 0.0758, 0.5275, 0.2256]) -Greedy action tensor([ 0.7442, -0.3435, 0.6380, 0.7537]) tensor([0.3081, 0.1038, 0.2771, 0.3110]) -Greedy action tensor([-1.0309, -0.8711, 0.5789, 0.6702]) tensor([0.0790, 0.0927, 0.3953, 0.4330]) -Greedy action tensor([-0.5843, 0.0710, 0.3266, -0.6982]) tensor([0.1586, 0.3054, 0.3944, 0.1415]) -Greedy action tensor([ 0.2793, -0.8364, -0.0371, 0.2589]) tensor([0.3294, 0.1079, 0.2400, 0.3227]) -Greedy action tensor([ 0.1254, 0.0784, -0.5948, -0.3352]) tensor([0.3256, 0.3106, 0.1584, 0.2054]) -Greedy action tensor([ 0.0387, -1.0889, 0.0701, 0.5481]) tensor([0.2487, 0.0805, 0.2567, 0.4140]) -Greedy action tensor([ 0.6809, -1.0010, -0.3416, 0.3695]) tensor([0.4390, 0.0817, 0.1579, 0.3215]) -Greedy action tensor([ 0.2844, 0.4240, -0.3101, -0.0371]) tensor([0.2918, 0.3355, 0.1610, 0.2116]) -Greedy action tensor([ 0.0573, -0.9015, -0.1067, 0.0398]) tensor([0.3111, 0.1193, 0.2640, 0.3057]) -Greedy action tensor([ 0.5086, -0.6808, -0.7771, 0.2367]) tensor([0.4268, 0.1299, 0.1180, 0.3252]) -Greedy action tensor([-0.1955, -0.7955, -0.3982, -0.6962]) tensor([0.3365, 0.1847, 0.2748, 0.2040]) -Greedy action tensor([ 0.0868, -1.0325, -0.6752, 0.0674]) tensor([0.3605, 0.1177, 0.1683, 0.3536]) -Greedy action tensor([-0.7665, -1.5267, -0.3743, -0.4277]) tensor([0.2298, 0.1075, 0.3402, 0.3225]) -Greedy action tensor([ 0.2728, -0.2833, 0.2366, -1.2108]) tensor([0.3617, 0.2074, 0.3488, 0.0820]) -Greedy action tensor([-0.8295, -0.5332, 0.6237, -1.5764]) tensor([0.1409, 0.1895, 0.6027, 0.0668]) -Greedy action tensor([-0.3782, 0.0546, -0.1908, -1.2004]) tensor([0.2388, 0.3682, 0.2881, 0.1050]) -Greedy action tensor([ 0.8163, -1.2476, 0.1167, 0.3734]) tensor([0.4413, 0.0560, 0.2192, 0.2834]) -Greedy action tensor([ 0.1555, -1.8014, 0.7853, 0.1517]) tensor([0.2491, 0.0352, 0.4676, 0.2481]) -Greedy action tensor([-1.3830, 0.5681, -0.4974, 0.0083]) tensor([0.0691, 0.4859, 0.1674, 0.2776]) -Greedy action tensor([-0.0108, -1.0710, 1.3661, -0.8920]) tensor([0.1747, 0.0605, 0.6924, 0.0724]) -Greedy action tensor([-0.3265, -0.1532, -0.0701, -0.6061]) tensor([0.2360, 0.2806, 0.3050, 0.1784]) -Greedy action tensor([-0.5155, 0.1029, 0.1479, -0.7762]) tensor([0.1796, 0.3333, 0.3487, 0.1384]) -Greedy action tensor([ 0.0665, -0.6923, -0.8589, -0.0207]) tensor([0.3596, 0.1684, 0.1425, 0.3296]) -Greedy action tensor([ 0.7174, -1.2099, -0.4221, -0.3542]) tensor([0.5531, 0.0805, 0.1770, 0.1894]) -Greedy action tensor([-0.5661, -0.7046, -0.4719, 0.3360]) tensor([0.1840, 0.1602, 0.2022, 0.4536]) -Greedy action tensor([-0.5175, -0.5419, -0.3414, -0.2898]) tensor([0.2260, 0.2206, 0.2696, 0.2838]) -Greedy action tensor([ 0.2135, -0.6871, 0.1556, 0.3981]) tensor([0.2815, 0.1144, 0.2656, 0.3385]) -Greedy action tensor([-0.7574, -0.6229, 0.2136, -1.3322]) tensor([0.1870, 0.2139, 0.4938, 0.1053]) -Greedy action tensor([ 0.3733, -0.3895, -0.2857, -0.0209]) tensor([0.3762, 0.1755, 0.1947, 0.2537]) -Greedy action tensor([-0.2148, 0.1788, -0.2104, -0.8506]) tensor([0.2490, 0.3691, 0.2501, 0.1318]) -Greedy action tensor([-0.3264, -1.2688, 0.0583, -0.2293]) tensor([0.2525, 0.0984, 0.3709, 0.2782]) -Greedy action tensor([ 0.0558, -0.5246, -0.4160, 0.2098]) tensor([0.2985, 0.1671, 0.1862, 0.3482]) -Greedy action tensor([ 0.4069, -0.4686, 0.1370, -0.7904]) tensor([0.4029, 0.1679, 0.3076, 0.1217]) -Greedy action tensor([ 0.1908, -0.6913, 0.2328, 0.1943]) tensor([0.2890, 0.1196, 0.3014, 0.2900]) -Greedy action tensor([ 0.5197, -1.1435, -0.2383, -0.4675]) tensor([0.4924, 0.0933, 0.2308, 0.1835]) -Greedy action tensor([-0.3591, -0.9674, 0.0353, -0.6973]) tensor([0.2673, 0.1455, 0.3966, 0.1906]) -Greedy action tensor([ 0.5007, -1.5159, -0.0430, -0.0806]) tensor([0.4400, 0.0586, 0.2554, 0.2460]) -Greedy action tensor([-1.2381, -1.0592, 0.5224, -0.8164]) tensor([0.1049, 0.1254, 0.6098, 0.1599]) -Greedy action tensor([ 0.7552, -0.5010, 0.0577, -0.1730]) tensor([0.4592, 0.1307, 0.2286, 0.1815]) -Greedy action tensor([ 1.0940, -0.2832, -0.1475, -0.1910]) tensor([0.5501, 0.1388, 0.1589, 0.1522]) -Greedy action tensor([ 0.5470, 0.0485, 0.0715, -0.1958]) tensor([0.3697, 0.2246, 0.2298, 0.1759]) -Greedy action tensor([ 0.7361, -0.5283, 0.0011, -0.2758]) tensor([0.4705, 0.1329, 0.2256, 0.1710]) -Greedy action tensor([ 0.9270, -0.6754, -0.0943, -0.5523]) tensor([0.5589, 0.1126, 0.2013, 0.1273]) -Greedy action tensor([ 1.3224, -0.5734, -0.2031, -0.7156]) tensor([0.6676, 0.1003, 0.1452, 0.0870]) -Greedy action tensor([ 0.7819, -0.4311, -0.1363, -0.3518]) tensor([0.4954, 0.1473, 0.1978, 0.1595]) -Greedy action tensor([ 0.8226, -0.7332, 0.0051, -0.4828]) tensor([0.5198, 0.1097, 0.2295, 0.1409]) -Greedy action tensor([ 0.5586, 0.1193, -0.1254, 0.0802]) tensor([0.3612, 0.2328, 0.1822, 0.2238]) -Greedy action tensor([ 0.7834, -0.3284, -0.0181, -0.0855]) tensor([0.4552, 0.1497, 0.2042, 0.1909]) -Greedy action tensor([ 0.6221, 0.0343, -0.0613, -0.3980]) tensor([0.4131, 0.2295, 0.2085, 0.1489]) -Greedy action tensor([ 0.8209, -0.6066, -0.0703, -0.3958]) tensor([0.5138, 0.1233, 0.2107, 0.1522]) -Greedy action tensor([ 0.8265, -0.3791, 0.0948, -0.2126]) tensor([0.4685, 0.1403, 0.2254, 0.1658]) -Greedy action tensor([ 0.3394, 0.0606, -0.0491, -0.4166]) tensor([0.3443, 0.2605, 0.2335, 0.1617]) -Greedy action tensor([ 0.5098, -0.1714, -0.0040, -0.3211]) tensor([0.3937, 0.1992, 0.2355, 0.1715]) -Greedy action tensor([ 0.6463, -0.4002, -0.0134, -0.1965]) tensor([0.4350, 0.1528, 0.2249, 0.1873]) -Greedy action tensor([ 1.1797, -0.6045, -0.1085, -0.4647]) tensor([0.6109, 0.1026, 0.1685, 0.1180]) -Greedy action tensor([ 1.0637, -0.6791, -0.0685, -0.5323]) tensor([0.5882, 0.1030, 0.1896, 0.1192]) -Greedy action tensor([ 0.5970, -0.4074, -0.0776, -0.5408]) tensor([0.4553, 0.1668, 0.2319, 0.1460]) -Greedy action tensor([ 0.7473, -0.6365, -0.0385, -0.3028]) tensor([0.4863, 0.1219, 0.2216, 0.1702]) -Greedy action tensor([ 0.7378, -0.3627, -0.0837, -0.2636]) tensor([0.4673, 0.1555, 0.2055, 0.1717]) -Greedy action tensor([ 1.0712, -0.7691, 0.1012, -0.4472]) tensor([0.5692, 0.0904, 0.2158, 0.1247]) -Greedy action tensor([ 0.9023, -0.8800, -0.1074, -0.5030]) tensor([0.5625, 0.0946, 0.2049, 0.1380]) -Greedy action tensor([ 0.6378, -0.2589, 0.0099, -0.5143]) tensor([0.4429, 0.1807, 0.2364, 0.1400]) -Greedy action tensor([ 0.5514, 0.2044, -0.1421, 0.1406]) tensor([0.3485, 0.2463, 0.1742, 0.2311]) -Greedy action tensor([ 1.0381, -0.3986, 0.1114, -0.3802]) tensor([0.5331, 0.1267, 0.2110, 0.1291]) -Greedy action tensor([ 0.6570, -0.4481, 0.0010, -0.5471]) tensor([0.4651, 0.1540, 0.2414, 0.1395]) -Greedy action tensor([ 0.8266, -0.1015, -0.0185, -0.0814]) tensor([0.4488, 0.1774, 0.1928, 0.1810]) -Greedy action tensor([ 0.9910, -0.3765, -0.2885, -0.5623]) tensor([0.5732, 0.1460, 0.1595, 0.1213]) -Greedy action tensor([ 0.9552, -0.0126, 0.0958, -0.5621]) tensor([0.4944, 0.1878, 0.2093, 0.1084]) -Greedy action tensor([ 0.8211, -0.8212, -0.0729, -0.4456]) tensor([0.5307, 0.1027, 0.2171, 0.1495]) -Greedy action tensor([ 0.3147, 0.6864, -0.0601, 0.1529]) tensor([0.2507, 0.3636, 0.1724, 0.2133]) -Greedy action tensor([ 0.3996, -0.3108, -0.0708, -0.2960]) tensor([0.3824, 0.1879, 0.2389, 0.1907]) -Greedy action tensor([ 0.5402, -0.4314, 0.0150, -0.2281]) tensor([0.4109, 0.1555, 0.2430, 0.1906]) -Greedy action tensor([ 0.6393, -0.0342, -0.0416, 0.0348]) tensor([0.3903, 0.1990, 0.1975, 0.2132]) -Greedy action tensor([ 0.3429, -0.0668, -0.1482, -0.2789]) tensor([0.3555, 0.2360, 0.2176, 0.1909]) -Greedy action tensor([ 1.1053, -0.7606, 0.0364, -0.5756]) tensor([0.5937, 0.0919, 0.2039, 0.1106]) -Greedy action tensor([ 0.9950, -0.7119, -0.1043, -0.3965]) tensor([0.5671, 0.1029, 0.1889, 0.1410]) -Greedy action tensor([ 1.0306, -0.8481, -0.1525, -0.3701]) tensor([0.5863, 0.0896, 0.1796, 0.1445]) -Greedy action tensor([ 0.5821, -0.1925, -0.0386, -0.2649]) tensor([0.4120, 0.1899, 0.2215, 0.1766]) -Greedy action tensor([ 0.8160, -0.3005, -0.2425, -0.3382]) tensor([0.5026, 0.1646, 0.1744, 0.1585]) -Greedy action tensor([ 0.7237, -0.2048, -0.1401, -0.1224]) tensor([0.4453, 0.1759, 0.1877, 0.1911]) -Greedy action tensor([ 0.8284, -0.4594, -0.1165, -0.4384]) tensor([0.5138, 0.1417, 0.1997, 0.1447]) -Greedy action tensor([ 1.0036, -0.3113, -0.0733, -0.2365]) tensor([0.5267, 0.1414, 0.1794, 0.1524]) -Greedy action tensor([ 0.7303, -0.3664, -0.1584, -0.3132]) tensor([0.4768, 0.1592, 0.1960, 0.1679]) -Greedy action tensor([ 0.9462, -0.6448, 0.0736, -0.5656]) tensor([0.5429, 0.1106, 0.2268, 0.1197]) -Greedy action tensor([ 0.8128, -0.5950, -0.0975, -0.4331]) tensor([0.5169, 0.1265, 0.2080, 0.1487]) -Greedy action tensor([ 0.8984, -0.4431, -0.0882, -0.2999]) tensor([0.5165, 0.1350, 0.1926, 0.1558]) -Greedy action tensor([ 0.7790, -0.3638, -0.0062, -0.1273]) tensor([0.4589, 0.1464, 0.2093, 0.1854]) -Greedy action tensor([ 0.3514, 0.1642, -0.2267, 0.1147]) tensor([0.3145, 0.2608, 0.1764, 0.2482]) -Greedy action tensor([ 0.5778, -0.0207, 0.0026, 0.0011]) tensor([0.3740, 0.2056, 0.2104, 0.2101]) -Greedy action tensor([ 0.4652, -0.0490, 0.0873, -0.1037]) tensor([0.3510, 0.2099, 0.2405, 0.1987]) -Greedy action tensor([ 0.6812, -0.6289, -0.1882, -0.2086]) tensor([0.4763, 0.1285, 0.1996, 0.1956]) -Greedy action tensor([0.6314, 0.0255, 0.0194, 0.0419]) tensor([0.3784, 0.2065, 0.2052, 0.2099]) -Greedy action tensor([ 1.2211, -0.7849, -0.0805, -0.9299]) tensor([0.6566, 0.0883, 0.1787, 0.0764]) -Greedy action tensor([ 0.7929, -0.6676, 0.1801, -0.2591]) tensor([0.4710, 0.1093, 0.2552, 0.1645]) -Greedy action tensor([ 1.1253, -0.6295, 0.1086, -0.3684]) tensor([0.5684, 0.0983, 0.2056, 0.1276]) -Greedy action tensor([ 0.4850, -0.1570, -0.0573, -0.1003]) tensor([0.3753, 0.1975, 0.2182, 0.2090]) -Greedy action tensor([ 0.7226, -0.2313, -0.1984, -0.3493]) tensor([0.4704, 0.1812, 0.1873, 0.1611]) -Greedy action tensor([ 0.6064, -0.1889, -0.0160, -0.0572]) tensor([0.3995, 0.1804, 0.2144, 0.2057]) -Greedy action tensor([ 0.5445, -0.4854, -0.0487, -0.1682]) tensor([0.4167, 0.1488, 0.2302, 0.2043]) -Greedy action tensor([ 0.9568, -0.3184, -0.0580, -0.1918]) tensor([0.5105, 0.1426, 0.1850, 0.1619]) -Greedy action tensor([ 1.3267, -0.6584, -0.1160, -0.6641]) tensor([0.6621, 0.0910, 0.1565, 0.0904]) -Greedy action tensor([ 0.6996, -0.4636, -0.1099, -0.3265]) tensor([0.4726, 0.1477, 0.2104, 0.1694]) -Greedy action tensor([ 0.8840, -0.7765, 0.0186, -0.3324]) tensor([0.5243, 0.0996, 0.2207, 0.1553]) -Greedy action tensor([ 0.5922, -0.0590, -0.1282, -0.1616]) tensor([0.4034, 0.2104, 0.1963, 0.1899]) -Greedy action tensor([ 0.7664, -0.6231, 0.0949, -0.4246]) tensor([0.4845, 0.1207, 0.2475, 0.1472]) -Greedy action tensor([ 0.8367, -0.3595, -0.0206, -0.4294]) tensor([0.4979, 0.1505, 0.2112, 0.1404]) -Greedy action tensor([ 1.1184, -0.8166, 0.0979, -0.6726]) tensor([0.5982, 0.0864, 0.2156, 0.0998]) -Greedy action tensor([ 0.5612, -0.0551, 0.0866, -0.0135]) tensor([0.3670, 0.1982, 0.2283, 0.2066]) -Greedy action tensor([ 0.7240, -0.2887, -0.0455, -0.3681]) tensor([0.4625, 0.1680, 0.2143, 0.1552]) -Greedy action tensor([ 1.2295, -0.7212, -0.0110, -0.8166]) tensor([0.6407, 0.0911, 0.1853, 0.0828]) -Greedy action tensor([ 0.9238, -0.7853, 0.1576, -0.8071]) tensor([0.5486, 0.0993, 0.2550, 0.0972]) -Greedy action tensor([ 0.8596, -1.0769, 0.0273, -0.6028]) tensor([0.5522, 0.0796, 0.2402, 0.1279]) -Greedy action tensor([ 0.4949, -0.3699, -0.2408, -0.0697]) tensor([0.4050, 0.1706, 0.1941, 0.2303]) -Greedy action tensor([ 1.2364, -0.8777, 0.1345, -0.6290]) tensor([0.6220, 0.0751, 0.2066, 0.0963]) -Greedy action tensor([ 0.8430, -0.3073, 0.0864, -0.0727]) tensor([0.4575, 0.1448, 0.2147, 0.1831]) -Greedy action tensor([ 0.6544, -0.1925, 0.0956, -0.2058]) tensor([0.4126, 0.1769, 0.2360, 0.1746]) -Greedy action tensor([ 0.2846, 0.1201, -0.0730, -0.1166]) tensor([0.3108, 0.2637, 0.2174, 0.2081]) -Greedy action tensor([ 1.0145, -0.7136, -0.0082, -0.4592]) tensor([0.5661, 0.1006, 0.2036, 0.1297]) -Greedy action tensor([ 0.6836, -0.3685, -0.0645, -0.1108]) tensor([0.4397, 0.1535, 0.2081, 0.1987]) -Greedy action tensor([ 1.6386, -0.5332, -0.5288, 0.2984]) tensor([0.6710, 0.0765, 0.0768, 0.1757]) -Greedy action tensor([ 1.2509, -1.3318, -0.1095, -0.2042]) tensor([0.6388, 0.0483, 0.1639, 0.1491]) -Greedy action tensor([ 2.1269, -0.5658, -0.5025, 0.6773]) tensor([0.7275, 0.0493, 0.0525, 0.1707]) -Greedy action tensor([ 1.2658, -0.4819, -0.1591, 0.4205]) tensor([0.5422, 0.0945, 0.1304, 0.2329]) -Greedy action tensor([ 1.3554, -0.4749, -0.6685, 0.7730]) tensor([0.5402, 0.0866, 0.0714, 0.3017]) -Greedy action tensor([ 1.7048, -0.5534, -0.1878, 0.3974]) tensor([0.6554, 0.0685, 0.0988, 0.1773]) -Greedy action tensor([ 1.7260, -0.7596, -0.8863, 0.0197]) tensor([0.7473, 0.0622, 0.0548, 0.1357]) -Greedy action tensor([ 1.6173, -0.7214, -0.5212, 0.4626]) tensor([0.6539, 0.0631, 0.0770, 0.2060]) -Greedy action tensor([ 1.4288, -0.8646, -0.3444, 0.6849]) tensor([0.5728, 0.0578, 0.0972, 0.2722]) -Greedy action tensor([ 1.2313, -0.9247, -0.1019, 0.4959]) tensor([0.5380, 0.0623, 0.1418, 0.2579]) -Greedy action tensor([ 1.1435, -0.0418, -0.8855, 0.3316]) tensor([0.5316, 0.1625, 0.0699, 0.2360]) -Greedy action tensor([ 1.5737, -0.7790, -0.6572, 0.4679]) tensor([0.6521, 0.0620, 0.0701, 0.2158]) -Greedy action tensor([ 1.2915, 0.1639, -0.7648, -0.1829]) tensor([0.5950, 0.1927, 0.0761, 0.1362]) -Greedy action tensor([ 1.2774, -0.3790, -0.8141, 0.1888]) tensor([0.6057, 0.1156, 0.0748, 0.2039]) -Greedy action tensor([ 1.3713, -0.3339, -1.2923, 0.2382]) tensor([0.6355, 0.1155, 0.0443, 0.2047]) -Greedy action tensor([ 0.8707, -0.2190, -0.3097, 0.0905]) tensor([0.4758, 0.1600, 0.1461, 0.2181]) -Greedy action tensor([ 1.1673, -0.9847, -0.4063, 0.4257]) tensor([0.5556, 0.0646, 0.1152, 0.2646]) -Greedy action tensor([ 1.6853, -0.3172, -0.9046, 0.2982]) tensor([0.6850, 0.0925, 0.0514, 0.1711]) -Greedy action tensor([ 0.9166, -0.3124, -0.0880, -0.1834]) tensor([0.5021, 0.1469, 0.1839, 0.1671]) -Greedy action tensor([ 1.4751, -0.0193, -0.7033, 0.2932]) tensor([0.6082, 0.1365, 0.0689, 0.1865]) -Greedy action tensor([ 1.4513, -0.1376, -0.6714, 0.5742]) tensor([0.5748, 0.1173, 0.0688, 0.2391]) -Greedy action tensor([ 1.1818, 0.4178, -1.3210, 0.5147]) tensor([0.4852, 0.2260, 0.0397, 0.2490]) -Greedy action tensor([ 1.1016, 0.1465, -0.6594, 0.0352]) tensor([0.5261, 0.2024, 0.0904, 0.1811]) -Greedy action tensor([ 1.2677, -0.6137, -0.4940, 0.1895]) tensor([0.6008, 0.0916, 0.1032, 0.2044]) -Greedy action tensor([ 1.5169, -0.0132, -0.0185, -0.0557]) tensor([0.6100, 0.1321, 0.1314, 0.1266]) -Greedy action tensor([ 1.3396, -0.9834, -0.4071, 0.1435]) tensor([0.6350, 0.0622, 0.1107, 0.1920]) -Greedy action tensor([ 2.3186, -0.3749, -0.7547, 1.1412]) tensor([0.7032, 0.0476, 0.0325, 0.2167]) -Greedy action tensor([ 1.1279, -0.4081, -0.2560, 0.2943]) tensor([0.5262, 0.1133, 0.1319, 0.2286]) -Greedy action tensor([ 1.4439, -0.5564, -0.5011, 0.0556]) tensor([0.6546, 0.0886, 0.0936, 0.1633]) -Greedy action tensor([ 1.3601, -0.0412, -0.4494, 0.3879]) tensor([0.5592, 0.1377, 0.0916, 0.2115]) -Greedy action tensor([ 1.3671, -0.4307, -0.1564, 0.3298]) tensor([0.5754, 0.0953, 0.1254, 0.2039]) -Greedy action tensor([ 1.7227, -0.2271, -0.5883, 0.4276]) tensor([0.6599, 0.0939, 0.0654, 0.1807]) -Greedy action tensor([ 1.4834, -0.1031, -0.3714, -0.1314]) tensor([0.6410, 0.1312, 0.1003, 0.1275]) -Greedy action tensor([ 1.4317, -0.6617, -0.3211, -0.1302]) tensor([0.6639, 0.0818, 0.1150, 0.1392]) -Greedy action tensor([ 1.6990, -0.4476, -0.5342, 0.6720]) tensor([0.6321, 0.0739, 0.0677, 0.2263]) -Greedy action tensor([ 1.1191, -0.3478, -0.0230, 0.1730]) tensor([0.5160, 0.1190, 0.1647, 0.2003]) -Greedy action tensor([ 1.7452, 0.1942, -0.2779, 0.2224]) tensor([0.6400, 0.1357, 0.0846, 0.1396]) -Greedy action tensor([ 1.7438, -0.5789, -0.6204, 0.3653]) tensor([0.6925, 0.0679, 0.0651, 0.1745]) -Greedy action tensor([ 1.5677, 0.0697, -0.7288, 0.5131]) tensor([0.5979, 0.1337, 0.0602, 0.2083]) -Greedy action tensor([ 1.3541, -0.5605, -0.8719, 0.3338]) tensor([0.6189, 0.0912, 0.0668, 0.2231]) -Greedy action tensor([ 1.4150, -0.6648, -0.5581, -0.0839]) tensor([0.6723, 0.0840, 0.0935, 0.1502]) -Greedy action tensor([ 1.7625, -0.3552, -0.4529, 0.0126]) tensor([0.7127, 0.0857, 0.0778, 0.1238]) -Greedy action tensor([ 1.5495, -0.5859, -0.3473, 0.1965]) tensor([0.6550, 0.0774, 0.0983, 0.1693]) -Greedy action tensor([ 1.5068, -0.5928, -1.0748, 0.3031]) tensor([0.6675, 0.0818, 0.0505, 0.2003]) -Greedy action tensor([ 1.1938, -0.4213, -0.9451, 0.2983]) tensor([0.5797, 0.1153, 0.0683, 0.2368]) -Greedy action tensor([ 1.6982, -0.3747, -0.8681, 0.0410]) tensor([0.7177, 0.0903, 0.0551, 0.1368]) -Greedy action tensor([ 1.6771, -1.2196, -0.6773, -0.1404]) tensor([0.7619, 0.0421, 0.0723, 0.1237]) -Greedy action tensor([ 1.5652, -0.5714, 0.0960, 0.6238]) tensor([0.5753, 0.0679, 0.1324, 0.2244]) -Greedy action tensor([ 1.1903, -0.0659, -0.2672, 0.1225]) tensor([0.5373, 0.1530, 0.1251, 0.1847]) -Greedy action tensor([ 1.2143, -0.1313, -0.7662, 0.0425]) tensor([0.5854, 0.1524, 0.0808, 0.1814]) -Greedy action tensor([ 1.0878, -0.2226, -0.8223, 0.5995]) tensor([0.4923, 0.1328, 0.0729, 0.3021]) -Greedy action tensor([ 1.7233, -0.7666, -0.0572, -0.0243]) tensor([0.7014, 0.0582, 0.1182, 0.1222]) -Greedy action tensor([ 1.4435, -0.5443, -0.4353, 0.1471]) tensor([0.6397, 0.0876, 0.0977, 0.1750]) -Greedy action tensor([ 1.3286, 0.0247, -0.7842, 0.5329]) tensor([0.5424, 0.1473, 0.0656, 0.2448]) -Greedy action tensor([ 2.4357, -0.6244, 0.0132, 0.3970]) tensor([0.7900, 0.0370, 0.0701, 0.1029]) -Greedy action tensor([ 1.4418, -0.7504, -0.4267, 0.5109]) tensor([0.6023, 0.0673, 0.0930, 0.2374]) -Greedy action tensor([ 1.3587, -0.3191, -0.4995, 0.7039]) tensor([0.5370, 0.1003, 0.0837, 0.2790]) -Greedy action tensor([ 1.6560, -0.6190, -0.0733, 0.3363]) tensor([0.6462, 0.0664, 0.1146, 0.1727]) -Greedy action tensor([ 1.4097, 0.0029, -0.6412, 0.5450]) tensor([0.5572, 0.1365, 0.0717, 0.2347]) -Greedy action tensor([ 1.1630, -0.2510, -0.3504, 0.2691]) tensor([0.5341, 0.1299, 0.1176, 0.2185]) -Greedy action tensor([ 2.0461, -0.0378, -0.1027, 0.0188]) tensor([0.7285, 0.0907, 0.0850, 0.0959]) -Greedy action tensor([ 1.0415, -0.4767, -0.1558, 0.2003]) tensor([0.5122, 0.1122, 0.1547, 0.2209]) -Greedy action tensor([ 1.3014, -0.5863, -0.0888, 0.2690]) tensor([0.5693, 0.0862, 0.1418, 0.2027]) -Greedy action tensor([ 0.9115, -0.3554, -0.1435, 0.2731]) tensor([0.4634, 0.1305, 0.1613, 0.2447]) -Greedy action tensor([ 1.5926, -0.8151, -0.0182, 0.5698]) tensor([0.6063, 0.0546, 0.1211, 0.2180]) -Greedy action tensor([ 1.3348, -0.2044, -0.0601, -0.0137]) tensor([0.5807, 0.1246, 0.1439, 0.1508]) -Greedy action tensor([ 1.7992, -0.0711, -0.7067, 0.2818]) tensor([0.6873, 0.1059, 0.0561, 0.1507]) -Greedy action tensor([ 1.4394, -0.2095, -0.3956, -0.0161]) tensor([0.6308, 0.1213, 0.1007, 0.1472]) -Greedy action tensor([ 1.8622, -0.6566, -0.7098, 0.7350]) tensor([0.6753, 0.0544, 0.0516, 0.2187]) -Greedy action tensor([ 1.4748, -0.6854, -0.5772, 0.3966]) tensor([0.6313, 0.0728, 0.0811, 0.2148]) -Greedy action tensor([ 1.4870, -0.7285, -0.8742, 0.4868]) tensor([0.6364, 0.0694, 0.0600, 0.2341]) -Greedy action tensor([ 1.6267, -0.1572, -1.1088, 0.2544]) tensor([0.6728, 0.1130, 0.0436, 0.1706]) -Greedy action tensor([ 1.7280, -0.8501, 0.1903, 0.9940]) tensor([0.5647, 0.0429, 0.1213, 0.2711]) -Greedy action tensor([ 1.3146, -0.4861, -0.6795, -0.2469]) tensor([0.6617, 0.1093, 0.0901, 0.1389]) -Greedy action tensor([ 1.5732, -0.4428, -1.0425, -0.1048]) tensor([0.7179, 0.0956, 0.0525, 0.1341]) -Greedy action tensor([ 1.1618, -0.4070, -0.5814, 0.0610]) tensor([0.5828, 0.1214, 0.1020, 0.1938]) -Greedy action tensor([ 1.9300, -0.9668, -0.0594, 0.8907]) tensor([0.6470, 0.0357, 0.0885, 0.2288]) -Greedy action tensor([ 1.4818, 0.4211, -0.7360, 0.1023]) tensor([0.5859, 0.2028, 0.0638, 0.1475]) -Greedy action tensor([ 1.6325, -1.3871, -0.5041, 0.1900]) tensor([0.7126, 0.0348, 0.0841, 0.1684]) -Greedy action tensor([ 1.1726, -0.3115, -0.4029, -0.0022]) tensor([0.5739, 0.1301, 0.1187, 0.1773]) -Greedy action tensor([ 1.4926, -0.9444, -0.3476, 0.6355]) tensor([0.5986, 0.0523, 0.0950, 0.2540]) -Greedy action tensor([-1.8498, -0.4487, 0.6224, -0.1365]) tensor([0.0445, 0.1808, 0.5276, 0.2470]) -Greedy action tensor([-1.6311, -0.4843, 0.5252, -0.0872]) tensor([0.0572, 0.1802, 0.4945, 0.2680]) -Greedy action tensor([-1.3397, 0.6746, 0.2197, 0.0680]) tensor([0.0577, 0.4323, 0.2743, 0.2357]) -Greedy action tensor([-1.8558, -0.4325, 0.6189, -0.1482]) tensor([0.0444, 0.1841, 0.5269, 0.2447]) -Greedy action tensor([-1.6641, -0.3663, 0.5144, -0.0657]) tensor([0.0542, 0.1986, 0.4790, 0.2682]) -Greedy action tensor([-1.9004, -0.4407, 0.6461, -0.1595]) tensor([0.0421, 0.1811, 0.5369, 0.2399]) -Greedy action tensor([-1.8996, -0.3122, 0.6408, -0.2002]) tensor([0.0416, 0.2034, 0.5275, 0.2275]) -Greedy action tensor([-1.9406, -0.4213, 0.6585, -0.1777]) tensor([0.0402, 0.1839, 0.5413, 0.2346]) -Greedy action tensor([-0.6166, -0.2093, 0.0341, -0.5843]) tensor([0.1834, 0.2756, 0.3516, 0.1894]) -Greedy action tensor([-1.8664, -0.4540, 0.6332, -0.1492]) tensor([0.0438, 0.1797, 0.5329, 0.2437]) -Greedy action tensor([-1.9220, -0.4427, 0.6544, -0.1703]) tensor([0.0411, 0.1806, 0.5410, 0.2372]) -Greedy action tensor([-1.9171, -0.4377, 0.6536, -0.1684]) tensor([0.0413, 0.1813, 0.5400, 0.2374]) -Greedy action tensor([-1.9056, -0.4352, 0.6473, -0.1606]) tensor([0.0418, 0.1819, 0.5369, 0.2394]) -Greedy action tensor([-1.1941, 0.0555, 0.3733, -0.1080]) tensor([0.0817, 0.2849, 0.3915, 0.2419]) -Greedy action tensor([-1.9451, -0.4496, 0.6669, -0.1806]) tensor([0.0401, 0.1790, 0.5467, 0.2342]) -Greedy action tensor([-1.5470, 0.4190, 0.3589, -0.0401]) tensor([0.0516, 0.3685, 0.3470, 0.2328]) -Greedy action tensor([-1.9007, -0.4486, 0.6478, -0.1593]) tensor([0.0421, 0.1798, 0.5381, 0.2401]) -Greedy action tensor([-1.9122, -0.4281, 0.6512, -0.1622]) tensor([0.0414, 0.1827, 0.5376, 0.2383]) -Greedy action tensor([-0.6018, 0.6154, -0.0089, 0.0244]) tensor([0.1241, 0.4192, 0.2245, 0.2321]) -Greedy action tensor([-1.8993, -0.3498, 0.6393, -0.1523]) tensor([0.0415, 0.1953, 0.5252, 0.2380]) -Greedy action tensor([-1.8134, -0.4456, 0.6002, -0.1008]) tensor([0.0462, 0.1814, 0.5163, 0.2561]) -Greedy action tensor([-1.8377, -0.3846, 0.6149, -0.1262]) tensor([0.0446, 0.1906, 0.5179, 0.2469]) -Greedy action tensor([-1.8021, -0.4195, 0.6868, -0.0518]) tensor([0.0439, 0.1749, 0.5287, 0.2526]) -Greedy action tensor([-1.4079, -0.3451, 0.3705, 0.1001]) tensor([0.0698, 0.2019, 0.4131, 0.3152]) -Greedy action tensor([-1.9162, -0.1965, 0.6217, -0.1815]) tensor([0.0402, 0.2242, 0.5081, 0.2276]) -Greedy action tensor([-1.9173, -0.3902, 0.6507, -0.1570]) tensor([0.0409, 0.1883, 0.5331, 0.2377]) -Greedy action tensor([-1.8794, -0.2949, 0.6191, -0.1461]) tensor([0.0422, 0.2058, 0.5132, 0.2388]) -Greedy action tensor([-1.7984, -0.4433, 0.5972, -0.1073]) tensor([0.0470, 0.1822, 0.5158, 0.2550]) -Greedy action tensor([-1.2418, -0.3033, 0.6323, 0.1671]) tensor([0.0706, 0.1805, 0.4600, 0.2889]) -Greedy action tensor([-0.6326, 0.6526, -0.6376, -0.4425]) tensor([0.1466, 0.5301, 0.1459, 0.1773]) -Greedy action tensor([-1.5965, -0.5293, 0.5002, 0.0656]) tensor([0.0577, 0.1679, 0.4700, 0.3043]) -Greedy action tensor([-1.8430, -0.3835, 0.6237, -0.1197]) tensor([0.0441, 0.1897, 0.5193, 0.2469]) -Greedy action tensor([-1.9345, -0.4406, 0.6582, -0.1760]) tensor([0.0406, 0.1809, 0.5428, 0.2357]) -Greedy action tensor([-1.6747, -0.5196, 0.5001, -0.0274]) tensor([0.0550, 0.1747, 0.4844, 0.2858]) -Greedy action tensor([-1.6953, -0.5084, 0.5258, -0.0077]) tensor([0.0529, 0.1734, 0.4877, 0.2860]) -Greedy action tensor([-0.2744, 0.7726, 0.0976, 0.9277]) tensor([0.1159, 0.3303, 0.1681, 0.3857]) -Greedy action tensor([-1.9183, -0.4457, 0.6562, -0.1707]) tensor([0.0413, 0.1800, 0.5418, 0.2370]) -Greedy action tensor([-1.9254, -0.4278, 0.6580, -0.1726]) tensor([0.0408, 0.1826, 0.5408, 0.2357]) -Greedy action tensor([-1.8334, -0.4040, 0.6073, -0.1276]) tensor([0.0451, 0.1884, 0.5180, 0.2484]) -Greedy action tensor([-1.6729, -0.2277, 0.6016, 0.0883]) tensor([0.0481, 0.2041, 0.4678, 0.2800]) -Greedy action tensor([-1.8935, -0.3029, 0.6228, -0.1573]) tensor([0.0417, 0.2047, 0.5167, 0.2368]) -Greedy action tensor([-0.9005, -0.3467, 0.2597, -0.0727]) tensor([0.1217, 0.2117, 0.3882, 0.2784]) -Greedy action tensor([-1.9103, -0.4476, 0.6516, -0.1672]) tensor([0.0417, 0.1800, 0.5402, 0.2382]) -Greedy action tensor([-1.8771, -0.4139, 0.6293, -0.1567]) tensor([0.0432, 0.1865, 0.5292, 0.2412]) -Greedy action tensor([-1.8873, -0.4618, 0.6370, -0.1590]) tensor([0.0430, 0.1788, 0.5363, 0.2419]) -Greedy action tensor([-1.9328, -0.4451, 0.6609, -0.1747]) tensor([0.0406, 0.1799, 0.5437, 0.2358]) -Greedy action tensor([-1.8236, -0.3182, 0.6043, -0.1093]) tensor([0.0447, 0.2012, 0.5062, 0.2480]) -Greedy action tensor([-1.8498, -0.1220, 0.5797, -0.1594]) tensor([0.0427, 0.2405, 0.4851, 0.2317]) -Greedy action tensor([-1.2368, 0.5646, 0.1234, 0.2364]) tensor([0.0653, 0.3955, 0.2544, 0.2848]) -Greedy action tensor([-1.6656, -0.5246, 0.7675, 0.1837]) tensor([0.0457, 0.1431, 0.5208, 0.2905]) -Greedy action tensor([-1.7876, -0.3848, 0.6287, -0.1069]) tensor([0.0462, 0.1879, 0.5178, 0.2481]) -Greedy action tensor([-1.8556, -0.2189, 0.5869, -0.1467]) tensor([0.0432, 0.2218, 0.4966, 0.2384]) -Greedy action tensor([-1.2140, 0.7559, 0.1916, 0.2440]) tensor([0.0604, 0.4333, 0.2465, 0.2597]) -Greedy action tensor([-1.8434, -0.2368, 0.6270, -0.2284]) tensor([0.0438, 0.2183, 0.5178, 0.2201]) -Greedy action tensor([-1.8330, -0.4349, 0.6162, -0.1138]) tensor([0.0450, 0.1823, 0.5214, 0.2513]) -Greedy action tensor([-1.3152, 0.4234, 0.3474, 0.1769]) tensor([0.0609, 0.3467, 0.3213, 0.2710]) -Greedy action tensor([-1.9060, -0.4535, 0.6512, -0.1637]) tensor([0.0419, 0.1789, 0.5401, 0.2391]) -Greedy action tensor([-1.7996, -0.4019, 0.5815, -0.1211]) tensor([0.0471, 0.1907, 0.5098, 0.2525]) -Greedy action tensor([-1.8387, -0.4588, 0.6184, -0.1311]) tensor([0.0451, 0.1793, 0.5266, 0.2489]) -Greedy action tensor([-1.8504, -0.4142, 0.6154, -0.1454]) tensor([0.0445, 0.1871, 0.5237, 0.2447]) -Greedy action tensor([-1.9268, -0.4541, 0.6627, -0.1724]) tensor([0.0409, 0.1783, 0.5446, 0.2363]) -Greedy action tensor([-1.9138, -0.3974, 0.6510, -0.1629]) tensor([0.0411, 0.1874, 0.5346, 0.2369]) -Greedy action tensor([-1.8110, -0.3266, 0.5879, -0.0807]) tensor([0.0453, 0.2000, 0.4990, 0.2557]) -Greedy action tensor([-1.2714, 0.1575, 0.2664, 0.0282]) tensor([0.0741, 0.3093, 0.3449, 0.2718]) -Greedy action tensor([-0.7786, -0.6791, 0.1394, -0.3740]) tensor([0.1637, 0.1809, 0.4100, 0.2454]) -Greedy action tensor([-1.4313, -0.4404, 0.4098, -0.0539]) tensor([0.0716, 0.1929, 0.4515, 0.2840]) -Greedy action tensor([-1.7240, -0.3095, 0.5610, -0.1052]) tensor([0.0500, 0.2059, 0.4916, 0.2525]) -Greedy action tensor([-1.9229, -0.3678, 0.6512, -0.1913]) tensor([0.0408, 0.1933, 0.5354, 0.2305]) -Greedy action tensor([-1.8567, -0.3509, 0.6405, -0.1353]) tensor([0.0430, 0.1939, 0.5225, 0.2406]) -Greedy action tensor([-1.9051, -0.4560, 0.6453, -0.1692]) tensor([0.0421, 0.1794, 0.5396, 0.2390]) -Greedy action tensor([-1.8733, -0.4504, 0.6388, -0.1434]) tensor([0.0433, 0.1795, 0.5333, 0.2439]) -Greedy action tensor([-1.6815, 0.1210, 0.4239, -0.0046]) tensor([0.0485, 0.2941, 0.3981, 0.2594]) -Greedy action tensor([-0.8589, 0.4450, 0.1776, -0.0890]) tensor([0.1035, 0.3812, 0.2918, 0.2235]) -Greedy action tensor([-1.8594, -0.3476, 0.6167, -0.1417]) tensor([0.0435, 0.1972, 0.5171, 0.2422]) -Greedy action tensor([-0.6273, 0.6033, -0.3103, -0.4030]) tensor([0.1419, 0.4857, 0.1948, 0.1776]) -Greedy action tensor([-1.8911, -0.4109, 0.6368, -0.1573]) tensor([0.0424, 0.1863, 0.5312, 0.2401]) -Greedy action tensor([-1.5712, -0.5177, 0.4895, -0.0690]) tensor([0.0617, 0.1769, 0.4843, 0.2771]) -Greedy action tensor([-1.8293, -0.4168, 0.6099, -0.1337]) tensor([0.0454, 0.1865, 0.5206, 0.2475]) -Greedy action tensor([-1.7995, -0.4898, 0.5902, -0.0779]) tensor([0.0471, 0.1747, 0.5144, 0.2637]) -Greedy action tensor([-1.3589, 0.2708, 0.3037, -0.7315]) tensor([0.0755, 0.3851, 0.3980, 0.1414]) -Greedy action tensor([-1.5475, -0.1382, 0.5378, 0.1205]) tensor([0.0542, 0.2219, 0.4363, 0.2875]) -Greedy action tensor([ 0.2025, -0.2923, -0.5563, -0.8702]) tensor([0.4132, 0.2519, 0.1935, 0.1414]) -Greedy action tensor([ 1.1802, -0.6285, -0.2413, -0.3892]) tensor([0.6198, 0.1016, 0.1496, 0.1290]) -Greedy action tensor([ 0.8585, -1.1081, -0.5802, -0.0396]) tensor([0.5604, 0.0784, 0.1329, 0.2283]) -Greedy action tensor([ 0.3803, -0.1630, -0.9966, -0.4872]) tensor([0.4438, 0.2578, 0.1120, 0.1864]) -Greedy action tensor([ 1.3551, -0.4098, 0.6079, 1.0909]) tensor([0.4145, 0.0710, 0.1963, 0.3182]) -Greedy action tensor([-0.3851, -0.8584, -0.2603, -0.9258]) tensor([0.2996, 0.1866, 0.3394, 0.1744]) -Greedy action tensor([ 0.3413, -0.8131, 0.4855, 0.7869]) tensor([0.2480, 0.0782, 0.2865, 0.3873]) -Greedy action tensor([-1.1796e+00, -1.7665e-01, -8.6187e-01, 6.9144e-04]) tensor([0.1197, 0.3263, 0.1644, 0.3896]) -Greedy action tensor([-0.8233, -1.4499, 0.9784, -1.1113]) tensor([0.1198, 0.0640, 0.7263, 0.0899]) -Greedy action tensor([-0.2969, -0.0993, 0.5966, -0.9500]) tensor([0.1930, 0.2351, 0.4715, 0.1004]) -Greedy action tensor([ 0.2525, 0.6028, 0.1047, -0.3836]) tensor([0.2624, 0.3724, 0.2263, 0.1389]) -Greedy action tensor([ 0.7369, -0.8627, -0.0704, 1.0250]) tensor([0.3353, 0.0677, 0.1496, 0.4473]) -Greedy action tensor([-0.4126, -0.1724, 1.4299, -0.2621]) tensor([0.1026, 0.1305, 0.6477, 0.1193]) -Greedy action tensor([ 0.3251, -1.2744, 1.0572, -1.2228]) tensor([0.2862, 0.0578, 0.5951, 0.0609]) -Greedy action tensor([-1.0047, -0.3457, 0.8668, -0.7292]) tensor([0.0930, 0.1798, 0.6046, 0.1226]) -Greedy action tensor([ 0.3510, -1.1156, 0.2199, -0.6203]) tensor([0.4022, 0.0928, 0.3528, 0.1523]) -Greedy action tensor([-0.7578, -1.4901, 0.5062, -0.4786]) tensor([0.1577, 0.0758, 0.5581, 0.2084]) -Greedy action tensor([ 0.2487, -0.7351, -0.1661, -0.4575]) tensor([0.3956, 0.1479, 0.2613, 0.1952]) -Greedy action tensor([ 0.9723, -0.3273, 0.4456, 0.3298]) tensor([0.4186, 0.1141, 0.2472, 0.2201]) -Greedy action tensor([-0.2264, -0.8674, 0.0550, 0.0074]) tensor([0.2430, 0.1280, 0.3220, 0.3070]) -Greedy action tensor([ 1.7292, -0.2838, 1.0548, 0.4783]) tensor([0.5183, 0.0692, 0.2641, 0.1484]) -Greedy action tensor([-0.9150, -1.0494, 0.2335, -1.0995]) tensor([0.1707, 0.1492, 0.5382, 0.1419]) -Greedy action tensor([-0.2016, -1.2062, -0.2980, -0.4034]) tensor([0.3235, 0.1185, 0.2937, 0.2643]) -Greedy action tensor([-0.1466, -0.6449, 0.0340, -0.9468]) tensor([0.3073, 0.1867, 0.3681, 0.1380]) -Greedy action tensor([-0.4929, -0.7535, 0.1195, 0.1394]) tensor([0.1819, 0.1402, 0.3356, 0.3423]) -Greedy action tensor([ 0.0535, -0.6567, 0.1994, 0.3307]) tensor([0.2520, 0.1239, 0.2916, 0.3325]) -Greedy action tensor([ 0.9270, -0.6330, -0.4242, 0.3984]) tensor([0.4858, 0.1021, 0.1258, 0.2863]) -Greedy action tensor([ 0.6894, 0.9032, -0.8904, -0.9288]) tensor([0.3784, 0.4686, 0.0780, 0.0750]) -Greedy action tensor([ 0.7512, -1.0943, -0.4921, -0.5207]) tensor([0.5791, 0.0915, 0.1670, 0.1623]) -Greedy action tensor([ 0.5693, -1.0823, 0.1927, -0.1229]) tensor([0.4204, 0.0806, 0.2885, 0.2104]) -Greedy action tensor([ 0.2459, -0.9155, 0.3402, -0.7156]) tensor([0.3579, 0.1120, 0.3933, 0.1368]) -Greedy action tensor([-0.2341, -0.6140, -0.3320, -0.6411]) tensor([0.3071, 0.2100, 0.2785, 0.2044]) -Greedy action tensor([-0.5576, -1.2990, 1.0499, -0.7999]) tensor([0.1379, 0.0657, 0.6882, 0.1082]) -Greedy action tensor([ 0.1622, 0.6759, -0.5154, -0.3235]) tensor([0.2635, 0.4405, 0.1338, 0.1621]) -Greedy action tensor([-1.0777, -0.8426, 0.7451, -1.4562]) tensor([0.1094, 0.1384, 0.6772, 0.0749]) -Greedy action tensor([-0.1011, 0.3439, 0.6604, -0.1679]) tensor([0.1774, 0.2768, 0.3799, 0.1659]) -Greedy action tensor([ 1.2863, -0.3459, -0.1982, 0.2847]) tensor([0.5588, 0.1093, 0.1266, 0.2053]) -Greedy action tensor([ 0.1379, 0.2771, -0.5366, -0.6550]) tensor([0.3214, 0.3694, 0.1637, 0.1454]) -Greedy action tensor([-0.0732, 0.2588, 0.1013, -0.2379]) tensor([0.2256, 0.3144, 0.2686, 0.1913]) -Greedy action tensor([ 0.6749, -0.4211, 0.5405, -0.0583]) tensor([0.3719, 0.1243, 0.3251, 0.1787]) -Greedy action tensor([ 0.5337, -0.6463, -0.2601, 0.0552]) tensor([0.4203, 0.1292, 0.1900, 0.2605]) -Greedy action tensor([ 0.9551, 0.6432, 0.2511, -0.6497]) tensor([0.4119, 0.3016, 0.2037, 0.0828]) -Greedy action tensor([-0.2499, -0.6865, -0.3954, -0.4882]) tensor([0.3032, 0.1959, 0.2621, 0.2389]) -Greedy action tensor([ 0.3479, 0.3539, 0.0153, -0.0693]) tensor([0.2957, 0.2975, 0.2120, 0.1948]) -Greedy action tensor([-0.9031, -0.3668, 1.1763, -0.8111]) tensor([0.0847, 0.1448, 0.6776, 0.0929]) -Greedy action tensor([-0.0663, -0.2154, 0.6058, -0.6921]) tensor([0.2296, 0.1978, 0.4497, 0.1228]) -Greedy action tensor([ 0.3120, 0.1092, 0.1795, -0.6054]) tensor([0.3234, 0.2640, 0.2833, 0.1292]) -Greedy action tensor([ 0.3096, 0.2343, -0.2950, -0.4939]) tensor([0.3423, 0.3175, 0.1870, 0.1533]) -Greedy action tensor([-0.5051, -1.0302, 1.5049, -0.4290]) tensor([0.0987, 0.0584, 0.7365, 0.1065]) -Greedy action tensor([1.1893, 0.0967, 0.0965, 0.8799]) tensor([0.4159, 0.1395, 0.1394, 0.3052]) -Greedy action tensor([-0.7058, -0.2352, -0.8551, -0.4833]) tensor([0.2122, 0.3398, 0.1828, 0.2651]) -Greedy action tensor([ 0.9550, 0.1868, 0.1482, -0.2776]) tensor([0.4542, 0.2107, 0.2027, 0.1324]) -Greedy action tensor([-0.1251, -0.6120, -0.4017, 0.0842]) tensor([0.2773, 0.1704, 0.2103, 0.3419]) -Greedy action tensor([1.3462, 0.3850, 0.3339, 0.5090]) tensor([0.4590, 0.1755, 0.1668, 0.1987]) -Greedy action tensor([ 1.2170, -0.2559, -0.1406, -0.0086]) tensor([0.5617, 0.1288, 0.1445, 0.1649]) -Greedy action tensor([ 0.1929, 0.2529, -0.2120, 1.1692]) tensor([0.1858, 0.1972, 0.1239, 0.4931]) -Greedy action tensor([ 0.8638, 0.1410, -0.0787, 0.3314]) tensor([0.4061, 0.1971, 0.1583, 0.2385]) -Greedy action tensor([-0.0497, -1.3792, 0.5670, -1.3712]) tensor([0.2955, 0.0782, 0.5475, 0.0788]) -Greedy action tensor([-0.4380, -0.5353, -0.5194, -0.5180]) tensor([0.2665, 0.2418, 0.2457, 0.2460]) -Greedy action tensor([ 0.0651, -1.8701, 1.0074, 0.0507]) tensor([0.2129, 0.0307, 0.5464, 0.2099]) -Greedy action tensor([-0.4343, -0.3512, -0.2813, -0.7819]) tensor([0.2526, 0.2745, 0.2944, 0.1785]) -Greedy action tensor([-0.3355, 0.3139, -0.6435, -0.1976]) tensor([0.2085, 0.3991, 0.1532, 0.2393]) -Greedy action tensor([ 0.6945, -0.8997, -0.3365, 0.9971]) tensor([0.3433, 0.0697, 0.1224, 0.4646]) -Greedy action tensor([-0.3690, -1.5421, 0.7683, -1.3908]) tensor([0.2089, 0.0646, 0.6513, 0.0752]) -Greedy action tensor([ 0.1400, 0.6240, -0.5359, -0.5078]) tensor([0.2737, 0.4440, 0.1392, 0.1432]) -Greedy action tensor([ 0.1247, -0.5361, -0.4033, 0.8636]) tensor([0.2381, 0.1230, 0.1404, 0.4985]) -Greedy action tensor([ 0.0551, -0.4744, 0.2331, -1.0272]) tensor([0.3202, 0.1886, 0.3826, 0.1085]) -Greedy action tensor([ 0.4910, 0.4560, -0.5274, -0.7674]) tensor([0.3830, 0.3698, 0.1383, 0.1088]) -Greedy action tensor([-0.9502, -0.7927, -0.5122, -0.6522]) tensor([0.1973, 0.2310, 0.3058, 0.2659]) -Greedy action tensor([0.5932, 0.8102, 0.0173, 0.9818]) tensor([0.2337, 0.2903, 0.1314, 0.3447]) -Greedy action tensor([-0.0176, -0.1514, 0.2438, -1.8702]) tensor([0.3003, 0.2627, 0.3900, 0.0471]) -Greedy action tensor([-0.0124, -0.5877, 0.7298, -0.1643]) tensor([0.2211, 0.1244, 0.4645, 0.1900]) -Greedy action tensor([ 1.1030, 0.1131, -0.3531, -0.0667]) tensor([0.5221, 0.1940, 0.1217, 0.1621]) -Greedy action tensor([-0.3059, -0.6946, -1.2158, 0.2901]) tensor([0.2567, 0.1740, 0.1033, 0.4659]) -Greedy action tensor([-0.3470, -0.1365, -0.9295, -0.1393]) tensor([0.2485, 0.3068, 0.1388, 0.3059]) -Greedy action tensor([-0.0096, -0.9998, -0.4110, -0.3094]) tensor([0.3595, 0.1335, 0.2406, 0.2664]) -Greedy action tensor([-0.1204, -1.6365, 0.9266, -0.8325]) tensor([0.2193, 0.0482, 0.6249, 0.1076]) -Greedy action tensor([-0.1830, -1.3574, -0.3880, -1.7592]) tensor([0.4291, 0.1326, 0.3496, 0.0887]) -Greedy action tensor([-0.9579, 0.0151, -1.1194, -0.1114]) tensor([0.1464, 0.3875, 0.1246, 0.3414]) -Greedy action tensor([ 0.5453, -0.5433, -0.2494, 0.4918]) tensor([0.3655, 0.1230, 0.1651, 0.3464]) -Greedy action tensor([ 0.2024, -0.9957, -0.7935, 0.6230]) tensor([0.3131, 0.0945, 0.1156, 0.4768]) -Greedy action tensor([ 0.7759, -0.4072, -0.0773, -0.2214]) tensor([0.4759, 0.1458, 0.2028, 0.1756]) -Greedy action tensor([ 0.9752, -0.7189, -0.0915, -0.3956]) tensor([0.5612, 0.1031, 0.1931, 0.1425]) -Greedy action tensor([ 0.4656, -0.0970, -0.0157, -0.0397]) tensor([0.3583, 0.2041, 0.2214, 0.2162]) -Greedy action tensor([ 0.8680, -0.2149, -0.0067, -0.1347]) tensor([0.4711, 0.1595, 0.1965, 0.1729]) -Greedy action tensor([ 0.9416, -0.9540, -0.0505, -0.5190]) tensor([0.5704, 0.0857, 0.2115, 0.1324]) -Greedy action tensor([ 0.2466, -0.0202, -0.1312, -0.2453]) tensor([0.3265, 0.2500, 0.2238, 0.1997]) -Greedy action tensor([ 0.4312, 0.0620, -0.1039, 0.1236]) tensor([0.3320, 0.2295, 0.1944, 0.2441]) -Greedy action tensor([ 1.1082, -0.6896, -0.0809, -0.5824]) tensor([0.6044, 0.1001, 0.1840, 0.1115]) -Greedy action tensor([ 1.0670, -0.5346, -0.0504, -0.5320]) tensor([0.5778, 0.1165, 0.1890, 0.1168]) -Greedy action tensor([ 1.1334, -0.9478, 0.1938, -0.7271]) tensor([0.5984, 0.0747, 0.2339, 0.0931]) -Greedy action tensor([ 1.3760, -0.9906, 0.2327, -0.6020]) tensor([0.6448, 0.0605, 0.2055, 0.0892]) -Greedy action tensor([ 0.9087, -0.4169, -0.0577, -0.3729]) tensor([0.5198, 0.1381, 0.1978, 0.1443]) -Greedy action tensor([ 1.1536, -0.7500, -0.0821, -0.4240]) tensor([0.6075, 0.0905, 0.1766, 0.1254]) -Greedy action tensor([ 0.8005, -0.4731, 0.0206, -0.2049]) tensor([0.4752, 0.1330, 0.2179, 0.1739]) -Greedy action tensor([ 0.8385, -0.5151, -0.0205, -0.2039]) tensor([0.4915, 0.1270, 0.2082, 0.1733]) -Greedy action tensor([ 0.5354, -0.3008, -0.0172, -0.1589]) tensor([0.3987, 0.1728, 0.2294, 0.1991]) -Greedy action tensor([ 1.1782, -0.6574, 0.0936, -0.6169]) tensor([0.6011, 0.0959, 0.2032, 0.0998]) -Greedy action tensor([ 0.8688, -0.5915, 0.2092, -0.2720]) tensor([0.4834, 0.1122, 0.2499, 0.1545]) -Greedy action tensor([ 0.5797, -0.3138, -0.0854, -0.2242]) tensor([0.4218, 0.1726, 0.2169, 0.1888]) -Greedy action tensor([ 0.8704, -0.3639, -0.1548, -0.5884]) tensor([0.5313, 0.1546, 0.1906, 0.1235]) -Greedy action tensor([ 0.6157, -0.3003, 0.0408, -0.1466]) tensor([0.4116, 0.1647, 0.2316, 0.1920]) -Greedy action tensor([ 1.0690, -1.0204, 0.1135, -0.4309]) tensor([0.5775, 0.0715, 0.2221, 0.1289]) -Greedy action tensor([ 0.7948, -0.6964, 0.0688, -0.3626]) tensor([0.4943, 0.1113, 0.2392, 0.1553]) -Greedy action tensor([ 1.0374, -0.5406, -0.1114, -0.4733]) tensor([0.5733, 0.1183, 0.1818, 0.1266]) -Greedy action tensor([ 0.8093, -0.5457, 0.0064, -0.2585]) tensor([0.4879, 0.1258, 0.2186, 0.1677]) -Greedy action tensor([ 0.6912, -0.4898, -0.0623, -0.4030]) tensor([0.4734, 0.1453, 0.2228, 0.1585]) -Greedy action tensor([ 0.7209, -0.4995, 0.0719, -0.1595]) tensor([0.4480, 0.1322, 0.2341, 0.1857]) -Greedy action tensor([ 0.2771, 0.1957, -0.0158, -0.0145]) tensor([0.2928, 0.2699, 0.2185, 0.2188]) -Greedy action tensor([ 0.8929, -0.7634, 0.1143, -0.7517]) tensor([0.5426, 0.1036, 0.2491, 0.1048]) -Greedy action tensor([ 1.0943, -0.6328, -0.0293, -0.6172]) tensor([0.5940, 0.1056, 0.1931, 0.1073]) -Greedy action tensor([ 0.4660, -0.0776, 0.0603, -0.1477]) tensor([0.3586, 0.2082, 0.2390, 0.1941]) -Greedy action tensor([ 0.4759, -0.2904, 0.0559, -0.2285]) tensor([0.3822, 0.1776, 0.2512, 0.1890]) -Greedy action tensor([ 0.5521, 0.0343, -0.0665, 0.0489]) tensor([0.3651, 0.2175, 0.1967, 0.2207]) -Greedy action tensor([ 0.7893, 0.1268, -0.4514, -0.2316]) tensor([0.4619, 0.2381, 0.1336, 0.1664]) -Greedy action tensor([ 0.5091, 0.1761, -0.2646, 0.1671]) tensor([0.3462, 0.2481, 0.1597, 0.2459]) -Greedy action tensor([ 0.6920, -0.1332, 0.0868, -0.3863]) tensor([0.4302, 0.1885, 0.2349, 0.1464]) -Greedy action tensor([ 0.8588, -0.5120, -0.0410, -0.4191]) tensor([0.5157, 0.1309, 0.2097, 0.1437]) -Greedy action tensor([ 1.2500, -0.4383, -0.2181, -0.2796]) tensor([0.6128, 0.1133, 0.1412, 0.1328]) -Greedy action tensor([ 1.1553, -0.8395, 0.2203, -0.4372]) tensor([0.5773, 0.0785, 0.2267, 0.1175]) -Greedy action tensor([ 1.0809, -0.6056, -0.1667, -0.3488]) tensor([0.5842, 0.1082, 0.1678, 0.1398]) -Greedy action tensor([ 0.4639, -0.0039, -0.0753, 0.0148]) tensor([0.3511, 0.2200, 0.2048, 0.2241]) -Greedy action tensor([ 0.9576, -0.4144, -0.1153, -0.5031]) tensor([0.5472, 0.1388, 0.1871, 0.1270]) -Greedy action tensor([ 0.5201, -0.2820, -0.0656, -0.1959]) tensor([0.4010, 0.1798, 0.2232, 0.1960]) -Greedy action tensor([ 1.1402, -0.6813, -0.0307, -0.6285]) tensor([0.6089, 0.0985, 0.1888, 0.1038]) -Greedy action tensor([ 0.4831, -0.4155, -0.0656, -0.1737]) tensor([0.3995, 0.1626, 0.2308, 0.2071]) -Greedy action tensor([ 0.9530, -0.3440, -0.0181, -0.2520]) tensor([0.5124, 0.1401, 0.1940, 0.1536]) -Greedy action tensor([ 1.0510, -0.7372, -0.1403, -0.4250]) tensor([0.5884, 0.0984, 0.1788, 0.1345]) -Greedy action tensor([ 0.7305, -0.4393, -0.1079, -0.4938]) tensor([0.4910, 0.1524, 0.2123, 0.1443]) -Greedy action tensor([ 1.3741, -0.8220, 0.0607, -0.7403]) tensor([0.6663, 0.0741, 0.1792, 0.0804]) -Greedy action tensor([ 0.9858, -0.3789, -0.0913, -0.6037]) tensor([0.5555, 0.1419, 0.1892, 0.1133]) -Greedy action tensor([ 8.4102e-01, -8.4605e-02, 8.0202e-02, -5.1499e-04]) tensor([0.4358, 0.1727, 0.2036, 0.1879]) -Greedy action tensor([ 0.3311, -0.1296, -0.0809, -0.1761]) tensor([0.3454, 0.2179, 0.2288, 0.2080]) -Greedy action tensor([ 0.3687, 0.0083, -0.0286, -0.0200]) tensor([0.3281, 0.2289, 0.2206, 0.2225]) -Greedy action tensor([ 0.8891, -0.4293, 0.1845, -0.4253]) tensor([0.4925, 0.1318, 0.2434, 0.1323]) -Greedy action tensor([ 0.6801, -0.2635, -0.1746, -0.1535]) tensor([0.4446, 0.1731, 0.1891, 0.1932]) -Greedy action tensor([ 1.5516, -1.0813, 0.1184, -0.8203]) tensor([0.7124, 0.0512, 0.1699, 0.0665]) -Greedy action tensor([ 0.9738, -0.5832, -0.1422, -0.3098]) tensor([0.5508, 0.1161, 0.1805, 0.1526]) -Greedy action tensor([ 0.9956, -0.8271, 0.1080, -0.4374]) tensor([0.5519, 0.0892, 0.2272, 0.1317]) -Greedy action tensor([ 0.6405, -0.3046, -0.0244, -0.1040]) tensor([0.4205, 0.1634, 0.2163, 0.1997]) -Greedy action tensor([ 0.9709, -0.2425, -0.1871, -0.4748]) tensor([0.5415, 0.1609, 0.1701, 0.1276]) -Greedy action tensor([ 0.7026, -0.6590, 0.3946, -0.5032]) tensor([0.4366, 0.1119, 0.3208, 0.1307]) -Greedy action tensor([ 1.4821, -0.7673, -0.1681, -0.6715]) tensor([0.7075, 0.0746, 0.1358, 0.0821]) -Greedy action tensor([ 0.8422, -0.7945, 0.1358, -0.4927]) tensor([0.5125, 0.0997, 0.2529, 0.1349]) -Greedy action tensor([ 0.3491, 0.0295, -0.0108, -0.0703]) tensor([0.3245, 0.2357, 0.2264, 0.2133]) -Greedy action tensor([ 0.8123, -0.5359, -0.0161, -0.0701]) tensor([0.4739, 0.1231, 0.2070, 0.1961]) -Greedy action tensor([ 0.7106, -0.2513, -0.2237, -0.5346]) tensor([0.4848, 0.1853, 0.1904, 0.1396]) -Greedy action tensor([ 0.8397, -0.6092, 0.0126, -0.4066]) tensor([0.5103, 0.1198, 0.2232, 0.1467]) -Greedy action tensor([ 1.0479, -0.5824, 0.0795, -0.2758]) tensor([0.5430, 0.1063, 0.2062, 0.1445]) -Greedy action tensor([ 0.7449, -0.4584, 0.0287, -0.3697]) tensor([0.4724, 0.1418, 0.2308, 0.1550]) -Greedy action tensor([ 0.7012, -0.4316, -0.0158, -0.0852]) tensor([0.4413, 0.1422, 0.2155, 0.2010]) -Greedy action tensor([ 0.8706, -0.5443, -0.0907, -0.2667]) tensor([0.5139, 0.1248, 0.1965, 0.1648]) -Greedy action tensor([ 0.7418, -0.4811, -0.1625, -0.4728]) tensor([0.5010, 0.1475, 0.2028, 0.1487]) -Greedy action tensor([ 0.7745, -0.4504, -0.0368, -0.2984]) tensor([0.4808, 0.1412, 0.2136, 0.1644]) -Greedy action tensor([ 0.8613, -0.7428, -0.1040, -0.4074]) tensor([0.5367, 0.1079, 0.2044, 0.1509]) -Greedy action tensor([ 0.8238, 0.1829, -0.0592, -0.1650]) tensor([0.4325, 0.2278, 0.1788, 0.1609]) -Greedy action tensor([ 0.4792, -0.2737, -0.0250, -0.0027]) tensor([0.3714, 0.1749, 0.2243, 0.2294]) -Greedy action tensor([ 1.0453, -0.3755, -0.2247, -0.3491]) tensor([0.5649, 0.1364, 0.1586, 0.1401]) -Greedy action tensor([ 0.7199, -0.4051, -0.0820, -0.4391]) tensor([0.4792, 0.1556, 0.2149, 0.1504]) -Greedy action tensor([ 0.8340, -0.4309, -0.1218, -0.3215]) tensor([0.5046, 0.1424, 0.1940, 0.1589]) -Greedy action tensor([ 1.3511, -0.8450, -0.0762, -0.7797]) tensor([0.6803, 0.0757, 0.1632, 0.0808]) -Greedy action tensor([ 0.5271, -0.2848, 0.1004, -0.1289]) tensor([0.3823, 0.1697, 0.2495, 0.1984]) -Greedy action tensor([ 1.5998, -0.1477, -0.9496, 0.4157]) tensor([0.6417, 0.1118, 0.0501, 0.1964]) -Greedy action tensor([ 1.9953, 0.1521, -0.8305, -0.2938]) tensor([0.7582, 0.1200, 0.0449, 0.0769]) -Greedy action tensor([ 1.2940, -0.8407, -0.3703, 0.4244]) tensor([0.5791, 0.0685, 0.1096, 0.2427]) -Greedy action tensor([ 1.6154, 0.3062, -0.5317, -0.3631]) tensor([0.6557, 0.1771, 0.0766, 0.0907]) -Greedy action tensor([ 1.2807, -0.6502, -0.5892, 0.6665]) tensor([0.5434, 0.0788, 0.0838, 0.2940]) -Greedy action tensor([ 1.3993, -0.3233, -0.4229, 0.3528]) tensor([0.5912, 0.1056, 0.0956, 0.2076]) -Greedy action tensor([ 1.0828, -0.2765, -0.8914, 0.7427]) tensor([0.4745, 0.1219, 0.0659, 0.3377]) -Greedy action tensor([ 1.7381, -0.3724, -0.6905, 0.2239]) tensor([0.6996, 0.0848, 0.0617, 0.1539]) -Greedy action tensor([ 1.6899, -0.4542, -0.8325, 0.5996]) tensor([0.6521, 0.0764, 0.0523, 0.2192]) -Greedy action tensor([2.4239, 0.7615, 0.5538, 0.1097]) tensor([0.6932, 0.1315, 0.1068, 0.0685]) -Greedy action tensor([ 1.9144, -0.7600, -0.5947, 0.2009]) tensor([0.7516, 0.0518, 0.0611, 0.1355]) -Greedy action tensor([ 1.4710, -0.0850, -0.4254, -0.0190]) tensor([0.6303, 0.1330, 0.0946, 0.1421]) -Greedy action tensor([ 1.4215e+00, -4.5240e-04, -3.1875e-01, 5.9386e-01]) tensor([0.5394, 0.1301, 0.0947, 0.2358]) -Greedy action tensor([ 1.6997, -0.1100, -0.3478, -0.2184]) tensor([0.6946, 0.1137, 0.0896, 0.1020]) -Greedy action tensor([ 1.6557, -0.7907, -0.1958, 0.6611]) tensor([0.6198, 0.0537, 0.0973, 0.2292]) -Greedy action tensor([ 1.3116, -0.1773, -0.9253, 0.1458]) tensor([0.6082, 0.1372, 0.0650, 0.1896]) -Greedy action tensor([ 1.6791, -0.0505, -0.9000, 0.6485]) tensor([0.6211, 0.1102, 0.0471, 0.2216]) -Greedy action tensor([ 2.1832, -0.9406, -0.2022, 0.7177]) tensor([0.7315, 0.0322, 0.0673, 0.1690]) -Greedy action tensor([ 1.6994, -0.5665, -0.4861, 0.2140]) tensor([0.6932, 0.0719, 0.0779, 0.1569]) -Greedy action tensor([ 1.3072, 0.1781, -0.2982, 0.1347]) tensor([0.5453, 0.1763, 0.1095, 0.1688]) -Greedy action tensor([ 1.3933, -0.8947, -0.4379, 0.2135]) tensor([0.6373, 0.0647, 0.1021, 0.1959]) -Greedy action tensor([ 1.5538, -0.3693, -0.4401, 0.3077]) tensor([0.6370, 0.0931, 0.0867, 0.1832]) -Greedy action tensor([ 1.7414, 0.0921, 0.0494, -0.2212]) tensor([0.6593, 0.1267, 0.1214, 0.0926]) -Greedy action tensor([ 1.8542, -1.3047, -0.3584, -0.2697]) tensor([0.7865, 0.0334, 0.0861, 0.0940]) -Greedy action tensor([ 1.4185, 0.2637, -0.2827, -0.2209]) tensor([0.5911, 0.1863, 0.1079, 0.1147]) -Greedy action tensor([ 1.5674, -0.7080, -0.5044, 0.5661]) tensor([0.6265, 0.0644, 0.0789, 0.2302]) -Greedy action tensor([ 1.3773, -0.2772, -0.5170, 0.6488]) tensor([0.5482, 0.1048, 0.0825, 0.2646]) -Greedy action tensor([ 1.8226, -0.9742, -0.5756, -0.0148]) tensor([0.7627, 0.0465, 0.0693, 0.1214]) -Greedy action tensor([ 1.5627, 0.4440, -0.6195, 0.5333]) tensor([0.5566, 0.1818, 0.0628, 0.1988]) -Greedy action tensor([ 1.4445, -0.2643, -0.3933, 0.2276]) tensor([0.6111, 0.1107, 0.0973, 0.1810]) -Greedy action tensor([ 1.5290, -0.4355, -0.5733, 0.5036]) tensor([0.6169, 0.0865, 0.0754, 0.2213]) -Greedy action tensor([ 1.4063, -0.0284, -0.0756, -0.4812]) tensor([0.6185, 0.1473, 0.1405, 0.0937]) -Greedy action tensor([ 1.3916, -0.3224, -0.3724, 0.3940]) tensor([0.5813, 0.1047, 0.0996, 0.2144]) -Greedy action tensor([ 1.3481, -0.5200, -0.5120, 0.2448]) tensor([0.6091, 0.0940, 0.0948, 0.2021]) -Greedy action tensor([ 1.3343, -0.4265, -0.8406, 0.2817]) tensor([0.6118, 0.1052, 0.0695, 0.2135]) -Greedy action tensor([ 1.1226, -0.3656, -0.4866, -0.1302]) tensor([0.5843, 0.1319, 0.1169, 0.1669]) -Greedy action tensor([ 1.2373, -0.4894, -0.8067, 0.6466]) tensor([0.5372, 0.0956, 0.0696, 0.2976]) -Greedy action tensor([ 1.4565, -0.2152, -1.0100, 0.4054]) tensor([0.6164, 0.1158, 0.0523, 0.2155]) -Greedy action tensor([ 0.6323, -0.0681, 0.1306, -0.2192]) tensor([0.3955, 0.1963, 0.2394, 0.1688]) -Greedy action tensor([ 2.3300, -1.4325, -0.3073, 0.6101]) tensor([0.7850, 0.0182, 0.0562, 0.1406]) -Greedy action tensor([ 1.3357, -0.2685, -0.7203, -0.1167]) tensor([0.6398, 0.1286, 0.0819, 0.1497]) -Greedy action tensor([ 1.4291, -0.8583, -0.4181, 0.1153]) tensor([0.6545, 0.0664, 0.1032, 0.1759]) -Greedy action tensor([ 1.6518, -0.2367, -0.8024, 0.4650]) tensor([0.6483, 0.0981, 0.0557, 0.1979]) -Greedy action tensor([ 1.6209, -0.1471, -0.7184, 0.2875]) tensor([0.6533, 0.1115, 0.0630, 0.1722]) -Greedy action tensor([ 1.8082, -0.4973, -0.5267, 0.4246]) tensor([0.6910, 0.0689, 0.0669, 0.1732]) -Greedy action tensor([ 1.3118, 0.2798, -1.4376, 0.2649]) tensor([0.5646, 0.2011, 0.0361, 0.1982]) -Greedy action tensor([ 1.2009, -0.3705, -0.5006, 0.3520]) tensor([0.5500, 0.1143, 0.1003, 0.2353]) -Greedy action tensor([ 1.6323, -0.3343, -0.5127, 0.3492]) tensor([0.6518, 0.0912, 0.0763, 0.1807]) -Greedy action tensor([ 1.8084, 0.5131, -0.1317, 0.3474]) tensor([0.6062, 0.1660, 0.0871, 0.1406]) -Greedy action tensor([ 1.8509, -0.8366, -0.7398, 0.5940]) tensor([0.7005, 0.0477, 0.0525, 0.1993]) -Greedy action tensor([ 1.0139, -0.1375, -0.9332, 0.1460]) tensor([0.5323, 0.1683, 0.0760, 0.2235]) -Greedy action tensor([ 1.0000, -0.2465, -0.6600, 0.3390]) tensor([0.5015, 0.1442, 0.0954, 0.2589]) -Greedy action tensor([ 1.4464, -0.4610, -0.1620, 0.4712]) tensor([0.5794, 0.0860, 0.1160, 0.2185]) -Greedy action tensor([ 1.8123, -0.5732, -0.9324, 0.3490]) tensor([0.7206, 0.0663, 0.0463, 0.1668]) -Greedy action tensor([ 1.2391, -0.2370, -0.7225, 0.0919]) tensor([0.5929, 0.1355, 0.0834, 0.1883]) -Greedy action tensor([ 1.2577e+00, -8.0549e-02, -4.5281e-01, 1.3128e-04]) tensor([0.5789, 0.1519, 0.1047, 0.1646]) -Greedy action tensor([ 0.9539, -0.3648, -0.4018, 0.2667]) tensor([0.4931, 0.1319, 0.1271, 0.2480]) -Greedy action tensor([ 1.4992, -0.4373, -1.2021, 0.3718]) tensor([0.6514, 0.0939, 0.0437, 0.2110]) -Greedy action tensor([ 1.4128, -0.3661, -0.7467, 0.4657]) tensor([0.5981, 0.1010, 0.0690, 0.2320]) -Greedy action tensor([ 1.1411, -0.5103, -0.5618, 0.1881]) tensor([0.5683, 0.1090, 0.1035, 0.2191]) -Greedy action tensor([ 2.1274, -0.9992, -0.2995, 0.3185]) tensor([0.7716, 0.0338, 0.0681, 0.1264]) -Greedy action tensor([ 1.1901, -0.4186, -0.7310, 0.6257]) tensor([0.5221, 0.1045, 0.0765, 0.2969]) -Greedy action tensor([ 1.6471, -0.7575, -0.5991, 0.7950]) tensor([0.6163, 0.0556, 0.0652, 0.2629]) -Greedy action tensor([ 1.6726, -0.9433, -0.0729, 0.6969]) tensor([0.6155, 0.0450, 0.1074, 0.2320]) -Greedy action tensor([ 0.9366, -0.2742, -0.9945, 0.3434]) tensor([0.5011, 0.1493, 0.0727, 0.2769]) -Greedy action tensor([ 1.4007, -0.6026, -0.8714, 0.1490]) tensor([0.6562, 0.0885, 0.0676, 0.1877]) -Greedy action tensor([ 1.3152, -0.3639, -0.4435, 0.4310]) tensor([0.5644, 0.1053, 0.0972, 0.2331]) -Greedy action tensor([ 2.3390, -0.9273, -0.0087, 0.5286]) tensor([0.7708, 0.0294, 0.0737, 0.1261]) -Greedy action tensor([ 1.1860, -0.2471, -0.2935, -0.0095]) tensor([0.5653, 0.1349, 0.1288, 0.1710]) -Greedy action tensor([2.0083, 0.2158, 0.1298, 0.3025]) tensor([0.6662, 0.1110, 0.1018, 0.1210]) -Greedy action tensor([ 1.2899, -0.7375, -0.2196, 0.1483]) tensor([0.5981, 0.0788, 0.1322, 0.1910]) -Greedy action tensor([ 2.1617, -1.2039, 0.0935, 0.1672]) tensor([0.7710, 0.0266, 0.0975, 0.1049]) -Greedy action tensor([ 0.9805, -0.3031, -0.5755, -0.0113]) tensor([0.5379, 0.1490, 0.1135, 0.1995]) -Greedy action tensor([ 1.0371, -0.0766, -0.7045, 0.0658]) tensor([0.5313, 0.1744, 0.0931, 0.2011]) -Greedy action tensor([ 1.2335, -0.3785, -0.2826, 0.2024]) tensor([0.5632, 0.1123, 0.1236, 0.2008]) -Greedy action tensor([ 1.3816, -0.4927, -0.2433, 0.3374]) tensor([0.5874, 0.0901, 0.1157, 0.2067]) -Greedy action tensor([ 1.1730, 0.2225, -1.4204, -0.4068]) tensor([0.5998, 0.2318, 0.0448, 0.1236]) -Greedy action tensor([ 1.2992, -0.3271, -0.4135, 0.5808]) tensor([0.5363, 0.1055, 0.0967, 0.2615]) -Greedy action tensor([ 1.3053, -0.4252, -1.0283, 0.4011]) tensor([0.5956, 0.1055, 0.0577, 0.2411]) -Greedy action tensor([ 1.3391, -0.1876, -0.1748, -0.0276]) tensor([0.5909, 0.1284, 0.1300, 0.1507]) -Greedy action tensor([ 1.6495, -0.9476, -0.9582, 0.4570]) tensor([0.6889, 0.0513, 0.0508, 0.2090]) -Greedy action tensor([-1.8514, -0.3423, 0.6121, -0.1317]) tensor([0.0438, 0.1979, 0.5140, 0.2443]) -Greedy action tensor([-0.5963, -0.3529, 0.3680, 0.6578]) tensor([0.1190, 0.1518, 0.3121, 0.4171]) -Greedy action tensor([-1.9004, -0.4253, 0.6411, -0.1619]) tensor([0.0421, 0.1840, 0.5345, 0.2394]) -Greedy action tensor([-1.9218, -0.4300, 0.6540, -0.1698]) tensor([0.0411, 0.1825, 0.5396, 0.2368]) -Greedy action tensor([-1.4193, 0.5237, 0.2944, -0.0774]) tensor([0.0576, 0.4022, 0.3197, 0.2205]) -Greedy action tensor([-1.7013, -0.3177, 0.5969, -0.0046]) tensor([0.0490, 0.1955, 0.4880, 0.2674]) -Greedy action tensor([-1.8791, -0.3829, 0.6373, -0.1423]) tensor([0.0425, 0.1898, 0.5263, 0.2414]) -Greedy action tensor([-1.8915, -0.4501, 0.6458, -0.1538]) tensor([0.0425, 0.1794, 0.5368, 0.2413]) -Greedy action tensor([-1.8002, -0.2998, 0.5756, -0.1132]) tensor([0.0462, 0.2071, 0.4971, 0.2496]) -Greedy action tensor([-1.8829, -0.0189, 0.5770, -0.1550]) tensor([0.0404, 0.2602, 0.4723, 0.2271]) -Greedy action tensor([-1.0264, 0.7880, 0.1858, -0.0391]) tensor([0.0759, 0.4656, 0.2549, 0.2036]) -Greedy action tensor([-1.9092, -0.4516, 0.6528, -0.1636]) tensor([0.0417, 0.1791, 0.5404, 0.2389]) -Greedy action tensor([-1.8350, -0.3449, 0.6323, -0.1253]) tensor([0.0439, 0.1950, 0.5182, 0.2429]) -Greedy action tensor([-1.9073, -0.3724, 0.6449, -0.1573]) tensor([0.0413, 0.1915, 0.5297, 0.2375]) -Greedy action tensor([-1.9250, -0.4100, 0.6543, -0.1726]) tensor([0.0408, 0.1856, 0.5382, 0.2354]) -Greedy action tensor([-1.8962, -0.5245, 0.8928, -0.0084]) tensor([0.0360, 0.1417, 0.5848, 0.2375]) -Greedy action tensor([-1.8853, -0.4786, 0.6489, -0.1447]) tensor([0.0428, 0.1745, 0.5390, 0.2437]) -Greedy action tensor([-1.7396, 0.1648, 0.4907, -0.0466]) tensor([0.0445, 0.2991, 0.4143, 0.2421]) -Greedy action tensor([-1.9121, -0.4505, 0.6536, -0.1668]) tensor([0.0416, 0.1793, 0.5409, 0.2381]) -Greedy action tensor([-1.9365, -0.4389, 0.6633, -0.1760]) tensor([0.0404, 0.1807, 0.5439, 0.2350]) -Greedy action tensor([-1.3660, 0.2942, 0.2901, -0.0037]) tensor([0.0649, 0.3415, 0.3401, 0.2535]) -Greedy action tensor([-1.8916, -0.3274, 0.6287, -0.1462]) tensor([0.0418, 0.1996, 0.5193, 0.2393]) -Greedy action tensor([-1.8884, -0.4445, 0.6410, -0.1575]) tensor([0.0427, 0.1809, 0.5355, 0.2410]) -Greedy action tensor([-1.8456, 0.0256, 0.5638, -0.1682]) tensor([0.0417, 0.2709, 0.4641, 0.2232]) -Greedy action tensor([-1.9371, -0.4584, 0.6701, -0.1737]) tensor([0.0404, 0.1770, 0.5473, 0.2354]) -Greedy action tensor([-1.8871, -0.4291, 0.6361, -0.1520]) tensor([0.0427, 0.1834, 0.5320, 0.2419]) -Greedy action tensor([-0.6447, 0.9819, 0.0441, 0.2909]) tensor([0.0941, 0.4787, 0.1874, 0.2398]) -Greedy action tensor([-0.4345, 0.9377, -0.0141, 0.1438]) tensor([0.1212, 0.4781, 0.1846, 0.2161]) -Greedy action tensor([-1.8511, -0.3811, 0.6067, -0.1116]) tensor([0.0440, 0.1914, 0.5140, 0.2506]) -Greedy action tensor([-1.3996, -0.6110, 0.3732, 0.1019]) tensor([0.0737, 0.1621, 0.4337, 0.3306]) -Greedy action tensor([-1.9049, -0.4128, 0.6479, -0.1561]) tensor([0.0416, 0.1850, 0.5343, 0.2391]) -Greedy action tensor([-0.8162, 0.9253, 0.1385, 0.0775]) tensor([0.0851, 0.4857, 0.2211, 0.2080]) -Greedy action tensor([-1.8894, -0.3928, 0.6298, -0.1597]) tensor([0.0425, 0.1899, 0.5279, 0.2397]) -Greedy action tensor([-1.9008, -0.4191, 0.6423, -0.1605]) tensor([0.0420, 0.1848, 0.5340, 0.2393]) -Greedy action tensor([-1.0253, -0.6676, 0.2109, 0.3455]) tensor([0.1019, 0.1458, 0.3509, 0.4014]) -Greedy action tensor([-1.7790, -0.4137, 0.5855, -0.0816]) tensor([0.0476, 0.1864, 0.5062, 0.2598]) -Greedy action tensor([-1.7068, -0.4732, 0.5421, -0.0961]) tensor([0.0529, 0.1815, 0.5010, 0.2646]) -Greedy action tensor([-1.8695, -0.2505, 0.6050, -0.1404]) tensor([0.0424, 0.2143, 0.5041, 0.2392]) -Greedy action tensor([-1.9050, -0.4326, 0.6501, -0.1647]) tensor([0.0418, 0.1822, 0.5379, 0.2381]) -Greedy action tensor([-1.8726, -0.3469, 0.6145, -0.1492]) tensor([0.0431, 0.1980, 0.5177, 0.2412]) -Greedy action tensor([-1.4280, 0.5535, 0.3723, -0.1137]) tensor([0.0555, 0.4024, 0.3357, 0.2065]) -Greedy action tensor([-1.9392, -0.4411, 0.6598, -0.1779]) tensor([0.0404, 0.1808, 0.5436, 0.2352]) -Greedy action tensor([-1.6608, -0.1992, 0.4919, -0.0637]) tensor([0.0530, 0.2287, 0.4564, 0.2619]) -Greedy action tensor([-1.8374, -0.4822, 0.7018, 0.0135]) tensor([0.0418, 0.1622, 0.5298, 0.2662]) -Greedy action tensor([-0.9587, -0.4929, 0.7111, 0.8090]) tensor([0.0727, 0.1158, 0.3859, 0.4256]) -Greedy action tensor([-1.6803, -0.3876, 0.5529, -0.0680]) tensor([0.0527, 0.1918, 0.4914, 0.2641]) -Greedy action tensor([-1.8349, -0.1526, 0.5747, -0.0816]) tensor([0.0430, 0.2310, 0.4781, 0.2480]) -Greedy action tensor([-1.8948, -0.3146, 0.6305, -0.1819]) tensor([0.0419, 0.2032, 0.5229, 0.2320]) -Greedy action tensor([-1.6990, -0.1055, 0.5226, -0.1415]) tensor([0.0503, 0.2474, 0.4636, 0.2387]) -Greedy action tensor([-1.4794, -0.2741, 0.4604, 0.0979]) tensor([0.0620, 0.2068, 0.4311, 0.3000]) -Greedy action tensor([-1.6660, -0.4067, 0.5232, -0.0829]) tensor([0.0546, 0.1923, 0.4873, 0.2658]) -Greedy action tensor([-0.8309, -0.4981, 0.4774, 0.7699]) tensor([0.0905, 0.1262, 0.3348, 0.4485]) -Greedy action tensor([-1.9134, -0.4550, 0.6555, -0.1627]) tensor([0.0415, 0.1783, 0.5414, 0.2388]) -Greedy action tensor([-1.9274, -0.4458, 0.6584, -0.1729]) tensor([0.0409, 0.1799, 0.5428, 0.2364]) -Greedy action tensor([-1.2791, -0.0570, 0.5455, 0.1737]) tensor([0.0673, 0.2283, 0.4170, 0.2875]) -Greedy action tensor([-1.7784, -0.4282, 0.6143, 0.0160]) tensor([0.0458, 0.1768, 0.5016, 0.2757]) -Greedy action tensor([-1.9361, -0.4436, 0.6638, -0.1755]) tensor([0.0404, 0.1799, 0.5445, 0.2352]) -Greedy action tensor([-1.7056, 0.1731, 0.4406, 0.0018]) tensor([0.0463, 0.3028, 0.3957, 0.2552]) -Greedy action tensor([-1.8697, -0.4526, 0.6307, -0.1469]) tensor([0.0436, 0.1800, 0.5319, 0.2444]) -Greedy action tensor([-1.9061, -0.3685, 0.6457, -0.1579]) tensor([0.0413, 0.1921, 0.5296, 0.2371]) -Greedy action tensor([-1.0142, 0.8614, 0.1406, 0.1652]) tensor([0.0717, 0.4677, 0.2275, 0.2331]) -Greedy action tensor([-1.3152, 0.0310, 0.3459, 0.0357]) tensor([0.0716, 0.2751, 0.3769, 0.2764]) -Greedy action tensor([-1.8143, -0.5592, 0.6943, -0.0242]) tensor([0.0439, 0.1540, 0.5393, 0.2629]) -Greedy action tensor([-1.8908, -0.4565, 0.6469, -0.1518]) tensor([0.0425, 0.1783, 0.5374, 0.2418]) -Greedy action tensor([-1.2148, 0.7536, 0.1578, 0.2107]) tensor([0.0615, 0.4402, 0.2426, 0.2558]) -Greedy action tensor([-1.6973, -0.2238, 0.5138, -0.0934]) tensor([0.0514, 0.2243, 0.4689, 0.2555]) -Greedy action tensor([-1.5652, -0.3650, 0.6240, 0.0234]) tensor([0.0551, 0.1830, 0.4920, 0.2699]) -Greedy action tensor([-1.8440, 0.0089, 0.5447, -0.1116]) tensor([0.0418, 0.2665, 0.4554, 0.2363]) -Greedy action tensor([-1.9358, -0.4448, 0.6619, -0.1755]) tensor([0.0405, 0.1799, 0.5441, 0.2355]) -Greedy action tensor([-1.5197, 0.4961, 0.3819, -0.0199]) tensor([0.0508, 0.3813, 0.3402, 0.2276]) -Greedy action tensor([-1.9265, -0.4303, 0.6571, -0.1700]) tensor([0.0408, 0.1822, 0.5406, 0.2364]) -Greedy action tensor([-1.9139, -0.4428, 0.6515, -0.1668]) tensor([0.0415, 0.1807, 0.5397, 0.2381]) -Greedy action tensor([-1.9160, -0.4092, 0.6509, -0.1664]) tensor([0.0412, 0.1858, 0.5362, 0.2368]) -Greedy action tensor([-0.6962, 0.8600, 0.0676, 0.0830]) tensor([0.0993, 0.4709, 0.2132, 0.2165]) -Greedy action tensor([-1.5843, 0.2543, 0.4126, -0.1181]) tensor([0.0527, 0.3312, 0.3880, 0.2282]) -Greedy action tensor([-1.7371, -0.3774, 0.5600, -0.1283]) tensor([0.0504, 0.1963, 0.5013, 0.2519]) -Greedy action tensor([-1.8263, -0.3298, 0.6468, -0.1021]) tensor([0.0436, 0.1947, 0.5171, 0.2445]) -Greedy action tensor([-1.8891, -0.4463, 0.6432, -0.1528]) tensor([0.0426, 0.1802, 0.5356, 0.2416]) -Greedy action tensor([-1.8159, -0.4543, 0.6055, -0.1171]) tensor([0.0462, 0.1804, 0.5206, 0.2528]) -Greedy action tensor([-1.9283, -0.4362, 0.6604, -0.1707]) tensor([0.0407, 0.1811, 0.5421, 0.2361]) -Greedy action tensor([-1.8372, -0.3627, 0.6004, -0.1458]) tensor([0.0450, 0.1964, 0.5146, 0.2440]) -Greedy action tensor([ 0.7849, -0.2751, -0.0379, -0.1840]) tensor([0.4619, 0.1600, 0.2029, 0.1753]) -Greedy action tensor([ 0.7466, -0.7510, -0.0277, -0.3327]) tensor([0.4940, 0.1105, 0.2277, 0.1679]) -Greedy action tensor([ 0.9704, -0.6244, -0.0796, -0.6446]) tensor([0.5708, 0.1159, 0.1998, 0.1135]) -Greedy action tensor([ 0.8911, -0.7697, -0.0347, -0.4634]) tensor([0.5422, 0.1030, 0.2148, 0.1399]) -Greedy action tensor([ 0.6611, -0.1995, -0.0515, -0.0860]) tensor([0.4189, 0.1772, 0.2054, 0.1985]) -Greedy action tensor([ 1.1651, -0.7079, -0.0384, -0.5337]) tensor([0.6110, 0.0939, 0.1834, 0.1117]) -Greedy action tensor([ 0.8797, -0.2940, -0.0490, -0.1328]) tensor([0.4836, 0.1496, 0.1911, 0.1757]) -Greedy action tensor([ 0.6305, -0.2325, 0.0633, -0.1376]) tensor([0.4077, 0.1720, 0.2312, 0.1891]) -Greedy action tensor([ 0.3433, 0.1067, 0.0613, -0.3485]) tensor([0.3285, 0.2593, 0.2478, 0.1645]) -Greedy action tensor([ 0.4000, -0.0579, -0.1023, -0.1947]) tensor([0.3585, 0.2268, 0.2169, 0.1978]) -Greedy action tensor([ 0.8387, -0.3761, -0.1375, -0.1969]) tensor([0.4930, 0.1463, 0.1857, 0.1750]) -Greedy action tensor([ 1.1373, -0.7879, 0.0332, -0.7968]) tensor([0.6166, 0.0899, 0.2044, 0.0891]) -Greedy action tensor([ 0.6990, -0.0844, -0.0303, -0.1498]) tensor([0.4225, 0.1930, 0.2037, 0.1808]) -Greedy action tensor([ 0.9536, -0.2304, 0.0161, -0.0784]) tensor([0.4869, 0.1490, 0.1907, 0.1735]) -Greedy action tensor([ 0.6898, -0.0972, 0.0647, 0.0159]) tensor([0.4000, 0.1821, 0.2141, 0.2039]) -Greedy action tensor([ 0.8808, -0.6798, -0.0167, -0.4422]) tensor([0.5308, 0.1115, 0.2164, 0.1414]) -Greedy action tensor([ 0.7346, -0.2868, 0.0835, -0.2471]) tensor([0.4432, 0.1596, 0.2311, 0.1661]) -Greedy action tensor([ 0.8513, -0.3657, -0.0121, -0.1942]) tensor([0.4832, 0.1431, 0.2038, 0.1699]) -Greedy action tensor([ 0.6602, -0.5590, 0.0022, -0.3272]) tensor([0.4575, 0.1352, 0.2369, 0.1704]) -Greedy action tensor([ 1.1375, -1.1511, 0.0090, -0.6538]) tensor([0.6283, 0.0637, 0.2033, 0.1048]) -Greedy action tensor([ 0.5355, -0.1734, 0.0296, -0.0025]) tensor([0.3733, 0.1837, 0.2251, 0.2180]) -Greedy action tensor([ 0.9344, -0.5183, -0.0736, -0.5597]) tensor([0.5484, 0.1283, 0.2002, 0.1231]) -Greedy action tensor([ 1.1747, -0.6528, -0.0771, -0.7381]) tensor([0.6272, 0.1009, 0.1794, 0.0926]) -Greedy action tensor([ 0.5789, 0.0341, -0.0299, 0.0613]) tensor([0.3677, 0.2132, 0.2000, 0.2191]) -Greedy action tensor([ 0.9131, -0.6193, -0.1009, -0.4453]) tensor([0.5447, 0.1177, 0.1976, 0.1400]) -Greedy action tensor([ 0.9308, -0.9900, 0.1376, -0.3755]) tensor([0.5348, 0.0784, 0.2420, 0.1449]) -Greedy action tensor([ 1.2218, -0.7377, -0.0689, -0.9216]) tensor([0.6522, 0.0919, 0.1794, 0.0765]) -Greedy action tensor([ 0.7096, -0.6764, 0.0014, -0.4295]) tensor([0.4848, 0.1212, 0.2388, 0.1552]) -Greedy action tensor([ 1.0864, -0.6226, -0.1708, -0.5794]) tensor([0.6044, 0.1094, 0.1719, 0.1143]) -Greedy action tensor([ 0.7556, -0.5356, 0.0335, -0.3376]) tensor([0.4772, 0.1312, 0.2318, 0.1599]) -Greedy action tensor([ 0.6329, -0.4230, 0.0363, -0.2632]) tensor([0.4335, 0.1508, 0.2387, 0.1769]) -Greedy action tensor([ 0.2795, -0.0951, 0.0932, -0.3198]) tensor([0.3261, 0.2242, 0.2707, 0.1791]) -Greedy action tensor([ 0.5614, 0.0015, -0.0466, -0.1409]) tensor([0.3830, 0.2188, 0.2085, 0.1897]) -Greedy action tensor([ 0.9386, -0.5450, 0.0495, -0.4595]) tensor([0.5305, 0.1203, 0.2181, 0.1311]) -Greedy action tensor([ 0.8262, -0.6419, -0.0333, -0.5235]) tensor([0.5227, 0.1204, 0.2213, 0.1356]) -Greedy action tensor([ 0.9744, -0.8516, 0.0402, -0.5841]) tensor([0.5668, 0.0913, 0.2227, 0.1193]) -Greedy action tensor([ 0.3072, -0.1588, -0.0426, -0.1031]) tensor([0.3338, 0.2095, 0.2353, 0.2215]) -Greedy action tensor([ 0.8429, -0.2732, -0.1189, -0.2707]) tensor([0.4906, 0.1607, 0.1875, 0.1611]) -Greedy action tensor([ 0.1855, 0.2557, -0.1219, 0.0150]) tensor([0.2739, 0.2938, 0.2014, 0.2309]) -Greedy action tensor([ 0.6008, -0.4630, -0.0969, -0.4272]) tensor([0.4544, 0.1568, 0.2262, 0.1626]) -Greedy action tensor([ 0.8791, -0.6630, -0.0538, -0.4015]) tensor([0.5305, 0.1135, 0.2087, 0.1474]) -Greedy action tensor([ 0.9298, -0.5218, -0.1649, -0.3747]) tensor([0.5434, 0.1273, 0.1819, 0.1474]) -Greedy action tensor([ 0.8309, -0.4009, -0.0587, -0.2377]) tensor([0.4887, 0.1426, 0.2008, 0.1679]) -Greedy action tensor([ 0.7786, -0.4185, -0.0897, -0.2611]) tensor([0.4819, 0.1455, 0.2022, 0.1704]) -Greedy action tensor([ 1.0698, -0.6936, 0.0125, -0.4566]) tensor([0.5760, 0.0988, 0.2001, 0.1252]) -Greedy action tensor([ 1.0169, -0.5949, 0.0409, -0.3583]) tensor([0.5467, 0.1091, 0.2060, 0.1382]) -Greedy action tensor([ 0.8868, -0.2569, 0.1966, -0.0745]) tensor([0.4540, 0.1447, 0.2277, 0.1736]) -Greedy action tensor([ 1.6015, -1.0725, -0.1526, -0.7773]) tensor([0.7492, 0.0517, 0.1297, 0.0694]) -Greedy action tensor([ 0.9531, -0.4476, -0.0121, -0.4895]) tensor([0.5366, 0.1322, 0.2044, 0.1268]) -Greedy action tensor([ 0.4237, -0.0514, 0.0139, -0.1890]) tensor([0.3537, 0.2199, 0.2348, 0.1916]) -Greedy action tensor([ 0.9653, -0.6226, -0.0870, -0.4736]) tensor([0.5584, 0.1141, 0.1950, 0.1325]) -Greedy action tensor([ 0.8043, -0.6331, 0.0200, -0.4441]) tensor([0.5048, 0.1199, 0.2304, 0.1449]) -Greedy action tensor([ 0.9649, -0.8149, 0.1720, -0.2688]) tensor([0.5229, 0.0882, 0.2366, 0.1523]) -Greedy action tensor([ 0.6609, -0.6138, -0.0609, -0.2028]) tensor([0.4572, 0.1278, 0.2222, 0.1928]) -Greedy action tensor([ 0.8946, -0.3462, 0.0047, -0.3727]) tensor([0.5047, 0.1459, 0.2073, 0.1421]) -Greedy action tensor([ 1.4112, -0.8477, -0.1131, -0.6843]) tensor([0.6919, 0.0723, 0.1507, 0.0851]) -Greedy action tensor([ 0.8625, -0.5073, -0.0442, -0.3070]) tensor([0.5080, 0.1291, 0.2052, 0.1578]) -Greedy action tensor([ 0.5862, -0.2902, -0.0635, -0.3365]) tensor([0.4281, 0.1782, 0.2236, 0.1702]) -Greedy action tensor([ 0.2971, 0.0981, -0.0559, -0.3069]) tensor([0.3259, 0.2671, 0.2290, 0.1781]) -Greedy action tensor([ 0.2073, 0.1055, -0.0427, -0.1010]) tensor([0.2927, 0.2644, 0.2279, 0.2150]) -Greedy action tensor([ 0.7605, -0.3962, -0.0995, -0.4696]) tensor([0.4926, 0.1549, 0.2084, 0.1440]) -Greedy action tensor([ 0.7098, -0.4518, 0.0476, -0.4322]) tensor([0.4656, 0.1457, 0.2401, 0.1486]) -Greedy action tensor([ 0.8080, -0.2292, 0.1840, -0.3136]) tensor([0.4513, 0.1600, 0.2418, 0.1470]) -Greedy action tensor([ 0.7980, -0.8428, 0.1000, -0.2995]) tensor([0.4938, 0.0957, 0.2457, 0.1648]) -Greedy action tensor([ 0.9678, -0.5070, -0.0102, -0.2310]) tensor([0.5245, 0.1200, 0.1973, 0.1582]) -Greedy action tensor([ 0.6200, -0.0919, -0.0658, -0.0454]) tensor([0.3986, 0.1956, 0.2008, 0.2049]) -Greedy action tensor([ 0.9277, -0.4623, -0.2300, -0.8034]) tensor([0.5746, 0.1431, 0.1805, 0.1018]) -Greedy action tensor([ 0.8395, -0.2671, -0.0159, -0.2844]) tensor([0.4806, 0.1589, 0.2043, 0.1562]) -Greedy action tensor([ 0.9073, -0.3639, 0.0954, -0.4265]) tensor([0.5030, 0.1411, 0.2233, 0.1325]) -Greedy action tensor([ 1.0425, -0.3353, -0.1320, -0.2607]) tensor([0.5456, 0.1376, 0.1686, 0.1482]) -Greedy action tensor([ 0.9667, -0.5336, -0.1060, -0.6499]) tensor([0.5670, 0.1265, 0.1940, 0.1126]) -Greedy action tensor([ 1.0672, -0.5497, 0.0724, -0.4250]) tensor([0.5577, 0.1107, 0.2062, 0.1254]) -Greedy action tensor([ 1.0196, -0.1215, 0.1200, -0.3360]) tensor([0.5040, 0.1610, 0.2050, 0.1299]) -Greedy action tensor([ 0.9738, -0.5856, -0.0981, -0.3617]) tensor([0.5508, 0.1158, 0.1886, 0.1449]) -Greedy action tensor([ 0.9692, -0.8690, 0.0610, -0.4709]) tensor([0.5558, 0.0884, 0.2241, 0.1317]) -Greedy action tensor([ 0.8218, -0.3132, -0.0442, -0.1478]) tensor([0.4714, 0.1515, 0.1983, 0.1788]) -Greedy action tensor([ 0.8008, -0.4409, -0.0951, -0.2123]) tensor([0.4854, 0.1402, 0.1981, 0.1762]) -Greedy action tensor([ 0.9135, -0.7779, -0.2913, -0.6168]) tensor([0.5881, 0.1084, 0.1763, 0.1273]) -Greedy action tensor([ 0.4745, -0.3130, -0.0414, -0.2297]) tensor([0.3927, 0.1787, 0.2344, 0.1942]) -Greedy action tensor([ 1.3114, -0.6145, -0.0652, -0.5458]) tensor([0.6434, 0.0938, 0.1624, 0.1004]) -Greedy action tensor([ 0.8310, -0.6695, 0.0219, -0.3956]) tensor([0.5098, 0.1137, 0.2270, 0.1495]) -Greedy action tensor([-0.4039, -0.2334, 0.1124, -0.4812]) tensor([0.2089, 0.2477, 0.3501, 0.1933]) -Greedy action tensor([-1.0362, -0.9239, 0.6345, -0.9946]) tensor([0.1180, 0.1320, 0.6271, 0.1230]) -Greedy action tensor([-0.8719, -0.0561, 0.0218, -1.0809]) tensor([0.1535, 0.3470, 0.3751, 0.1245]) -Greedy action tensor([-0.1307, -0.0405, 0.2128, -0.7265]) tensor([0.2466, 0.2699, 0.3477, 0.1359]) -Greedy action tensor([1.1821, 0.2135, 0.5326, 0.0923]) tensor([0.4468, 0.1696, 0.2334, 0.1502]) -Greedy action tensor([-1.7704, -0.3750, -1.4890, 0.7253]) tensor([0.0541, 0.2183, 0.0717, 0.6560]) -Greedy action tensor([ 0.9382, -1.4155, 1.2662, 0.7122]) tensor([0.3048, 0.0290, 0.4231, 0.2431]) -Greedy action tensor([-0.8515, -0.7182, 0.6683, 0.0414]) tensor([0.1092, 0.1248, 0.4993, 0.2667]) -Greedy action tensor([ 0.1361, -0.5864, 0.4567, -0.7663]) tensor([0.3059, 0.1485, 0.4215, 0.1241]) -Greedy action tensor([ 0.4504, -0.7880, 1.2264, -0.3621]) tensor([0.2560, 0.0742, 0.5562, 0.1136]) -Greedy action tensor([ 1.6124, 0.4662, -0.3040, -0.3200]) tensor([0.6212, 0.1974, 0.0914, 0.0900]) -Greedy action tensor([-0.1822, -0.0589, 0.1268, -0.4040]) tensor([0.2329, 0.2634, 0.3172, 0.1865]) -Greedy action tensor([-0.7148, -1.3262, 0.2920, -0.2480]) tensor([0.1702, 0.0924, 0.4659, 0.2715]) -Greedy action tensor([ 0.6138, -0.6160, 0.1726, 0.1771]) tensor([0.3873, 0.1132, 0.2491, 0.2503]) -Greedy action tensor([-1.1196, 0.1900, -0.2629, -0.4876]) tensor([0.1118, 0.4143, 0.2634, 0.2104]) -Greedy action tensor([ 0.4641, -0.3846, 0.0266, 0.1727]) tensor([0.3545, 0.1517, 0.2289, 0.2649]) -Greedy action tensor([-0.1472, -0.2390, 0.3998, -0.8529]) tensor([0.2419, 0.2207, 0.4180, 0.1194]) -Greedy action tensor([ 0.6986, -0.4480, 0.5330, 0.6326]) tensor([0.3224, 0.1024, 0.2733, 0.3019]) -Greedy action tensor([ 0.0078, -1.0689, 0.5968, 0.2490]) tensor([0.2265, 0.0772, 0.4081, 0.2882]) -Greedy action tensor([-0.0910, -1.2483, 0.4454, -0.8265]) tensor([0.2854, 0.0897, 0.4880, 0.1368]) -Greedy action tensor([ 0.5965, -0.7195, -0.0473, 0.6123]) tensor([0.3560, 0.0955, 0.1870, 0.3616]) -Greedy action tensor([ 1.2515, 0.4645, 0.2284, -0.3468]) tensor([0.4958, 0.2257, 0.1782, 0.1003]) -Greedy action tensor([-0.8399, 0.3075, 0.6141, -0.4224]) tensor([0.1005, 0.3166, 0.4302, 0.1526]) -Greedy action tensor([-0.0368, -0.4611, -0.9559, 0.6801]) tensor([0.2438, 0.1595, 0.0973, 0.4994]) -Greedy action tensor([ 0.1705, -2.0280, 0.0322, 0.1056]) tensor([0.3426, 0.0380, 0.2983, 0.3211]) -Greedy action tensor([-1.1790, -0.8399, 0.8948, -1.4204]) tensor([0.0897, 0.1260, 0.7138, 0.0705]) -Greedy action tensor([-0.4894, 0.6212, -0.6079, -1.0674]) tensor([0.1823, 0.5535, 0.1619, 0.1023]) -Greedy action tensor([-1.1037, -1.0140, -0.9071, -0.6020]) tensor([0.2015, 0.2204, 0.2453, 0.3328]) -Greedy action tensor([ 0.1790, -1.0957, 0.0601, -0.1634]) tensor([0.3475, 0.0971, 0.3086, 0.2468]) -Greedy action tensor([-0.2205, -2.0734, 0.1794, 0.9589]) tensor([0.1695, 0.0266, 0.2528, 0.5512]) -Greedy action tensor([ 0.1164, -1.4700, 0.4018, -0.7303]) tensor([0.3374, 0.0691, 0.4488, 0.1447]) -Greedy action tensor([-0.1051, 0.3356, -0.1858, -0.1933]) tensor([0.2277, 0.3538, 0.2100, 0.2085]) -Greedy action tensor([ 1.0435, -0.2369, 0.6339, 0.9696]) tensor([0.3484, 0.0968, 0.2313, 0.3235]) -Greedy action tensor([ 1.2676, -0.7562, -0.1943, -0.6488]) tensor([0.6618, 0.0875, 0.1534, 0.0974]) -Greedy action tensor([-0.6246, -1.0018, -0.8535, -0.0562]) tensor([0.2355, 0.1615, 0.1873, 0.4157]) -Greedy action tensor([ 1.2562, -0.6469, 0.4814, 1.4391]) tensor([0.3558, 0.0530, 0.1640, 0.4272]) -Greedy action tensor([-0.7615, -0.2924, -0.6169, -0.7982]) tensor([0.2119, 0.3388, 0.2449, 0.2043]) -Greedy action tensor([ 0.1370, 0.1463, -0.3925, 0.5194]) tensor([0.2461, 0.2484, 0.1449, 0.3607]) -Greedy action tensor([ 0.8239, -1.2251, 0.4992, 0.4779]) tensor([0.3908, 0.0504, 0.2824, 0.2765]) -Greedy action tensor([ 0.6184, -0.5580, -0.2247, 0.9485]) tensor([0.3195, 0.0985, 0.1375, 0.4444]) -Greedy action tensor([ 0.1371, -1.1135, 0.3107, -0.2301]) tensor([0.3156, 0.0904, 0.3754, 0.2186]) -Greedy action tensor([ 0.5956, -0.3469, -0.0040, 0.8520]) tensor([0.3095, 0.1206, 0.1699, 0.4000]) -Greedy action tensor([-0.3141, 0.5251, 0.3056, -0.9462]) tensor([0.1753, 0.4057, 0.3258, 0.0932]) -Greedy action tensor([ 0.3418, 0.0880, -0.2927, 0.5948]) tensor([0.2782, 0.2159, 0.1475, 0.3584]) -Greedy action tensor([-0.3824, 0.5032, -1.2447, -0.8988]) tensor([0.2251, 0.5456, 0.0950, 0.1343]) -Greedy action tensor([-0.3644, 0.3641, -0.5642, 0.7910]) tensor([0.1415, 0.2932, 0.1159, 0.4494]) -Greedy action tensor([ 0.6516, 0.6523, 0.9299, -0.2354]) tensor([0.2678, 0.2680, 0.3538, 0.1103]) -Greedy action tensor([-0.0906, -0.5919, 0.3296, -0.3526]) tensor([0.2566, 0.1554, 0.3906, 0.1974]) -Greedy action tensor([1.0867, 0.3941, 0.1038, 0.0020]) tensor([0.4520, 0.2261, 0.1691, 0.1528]) -Greedy action tensor([-2.5621, -1.1607, 1.3076, -1.0299]) tensor([0.0174, 0.0705, 0.8318, 0.0803]) -Greedy action tensor([ 0.8708, 0.0509, -0.0217, 0.4856]) tensor([0.3952, 0.1741, 0.1619, 0.2689]) -Greedy action tensor([ 0.1542, 0.3188, 0.4275, -0.6020]) tensor([0.2524, 0.2975, 0.3317, 0.1185]) -Greedy action tensor([-0.2339, -1.6964, 0.4069, -0.5619]) tensor([0.2597, 0.0602, 0.4930, 0.1871]) -Greedy action tensor([0.4271, 0.5899, 0.2494, 0.1904]) tensor([0.2629, 0.3094, 0.2201, 0.2075]) -Greedy action tensor([-0.4565, -1.0639, 0.1453, 0.3922]) tensor([0.1752, 0.0955, 0.3199, 0.4095]) -Greedy action tensor([ 1.0600, 0.0836, -0.9177, -0.2846]) tensor([0.5631, 0.2121, 0.0779, 0.1468]) -Greedy action tensor([-0.3378, -0.9172, 0.2505, 0.0106]) tensor([0.2093, 0.1172, 0.3769, 0.2965]) -Greedy action tensor([ 0.3762, -0.8055, 0.5760, -0.6358]) tensor([0.3459, 0.1061, 0.4223, 0.1257]) -Greedy action tensor([ 1.6071, -1.3011, 0.6252, 0.8320]) tensor([0.5292, 0.0289, 0.1982, 0.2438]) -Greedy action tensor([-0.8602, -0.7297, -1.0819, 0.0732]) tensor([0.1824, 0.2078, 0.1461, 0.4638]) -Greedy action tensor([ 1.1329, -0.8598, -0.0672, 0.8766]) tensor([0.4522, 0.0616, 0.1362, 0.3500]) -Greedy action tensor([ 0.5641, -0.7247, -0.5885, -0.1365]) tensor([0.4790, 0.1320, 0.1513, 0.2377]) -Greedy action tensor([-0.9259, -0.1555, 0.0038, -0.9808]) tensor([0.1506, 0.3254, 0.3815, 0.1425]) -Greedy action tensor([ 0.6223, 0.3643, -0.5855, -1.1371]) tensor([0.4457, 0.3444, 0.1332, 0.0767]) -Greedy action tensor([-0.1240, 0.4354, -0.6695, -0.2553]) tensor([0.2378, 0.4160, 0.1378, 0.2085]) -Greedy action tensor([ 0.1624, -0.5277, -1.5720, -0.2749]) tensor([0.4303, 0.2158, 0.0760, 0.2779]) -Greedy action tensor([ 1.7787, -0.0234, -0.1625, -0.6021]) tensor([0.7138, 0.1177, 0.1025, 0.0660]) -Greedy action tensor([ 0.1738, -1.2388, -0.0464, 1.1509]) tensor([0.2126, 0.0518, 0.1706, 0.5650]) -Greedy action tensor([ 0.3800, 0.2696, -0.0031, -0.2693]) tensor([0.3226, 0.2889, 0.2199, 0.1685]) -Greedy action tensor([-0.4132, -0.0907, -0.0927, 0.2845]) tensor([0.1734, 0.2394, 0.2389, 0.3484]) -Greedy action tensor([ 0.5117, -1.1811, -0.4193, -0.0231]) tensor([0.4621, 0.0850, 0.1821, 0.2707]) -Greedy action tensor([-0.5979, -0.2683, -0.7762, 0.7000]) tensor([0.1452, 0.2018, 0.1215, 0.5316]) -Greedy action tensor([-0.1728, 0.2485, -0.1200, -0.0280]) tensor([0.2112, 0.3219, 0.2227, 0.2442]) -Greedy action tensor([-0.8893, -1.1658, 0.7292, -0.2391]) tensor([0.1147, 0.0870, 0.5786, 0.2197]) -Greedy action tensor([ 0.4040, 0.1560, 0.4687, -0.8187]) tensor([0.3183, 0.2484, 0.3396, 0.0937]) -Greedy action tensor([-0.2140, -0.4519, -1.0068, -0.6240]) tensor([0.3443, 0.2714, 0.1558, 0.2285]) -Greedy action tensor([-0.2861, -0.9152, 0.2245, -0.7321]) tensor([0.2605, 0.1388, 0.4340, 0.1667]) -Greedy action tensor([ 0.0293, -1.4117, -0.3074, 0.0464]) tensor([0.3369, 0.0797, 0.2406, 0.3427]) -Greedy action tensor([-0.3576, 0.8477, 0.5297, -0.8079]) tensor([0.1351, 0.4508, 0.3280, 0.0861]) -Greedy action tensor([-0.9881, 0.1483, -0.0276, -0.0438]) tensor([0.1075, 0.3350, 0.2810, 0.2765]) -Greedy action tensor([ 0.6732, 0.2833, -0.0900, -0.4267]) tensor([0.4038, 0.2735, 0.1883, 0.1344]) -Greedy action tensor([ 1.1790, -0.2093, 1.6761, 0.2349]) tensor([0.3046, 0.0760, 0.5008, 0.1185]) -Greedy action tensor([ 2.1573, -0.9947, -0.4708, 0.7125]) tensor([0.7403, 0.0317, 0.0535, 0.1746]) -Greedy action tensor([ 1.2571, 0.1322, -0.6940, 0.2778]) tensor([0.5428, 0.1762, 0.0771, 0.2039]) -Greedy action tensor([ 1.3463, -0.7529, 0.1820, 0.4475]) tensor([0.5430, 0.0665, 0.1695, 0.2210]) -Greedy action tensor([ 1.2690, -0.6138, -0.6548, 0.3971]) tensor([0.5826, 0.0887, 0.0851, 0.2436]) -Greedy action tensor([ 1.2328, -0.0384, -0.4282, 0.1998]) tensor([0.5475, 0.1536, 0.1040, 0.1949]) -Greedy action tensor([ 1.7365, -0.6075, 0.0681, 0.4789]) tensor([0.6374, 0.0612, 0.1202, 0.1812]) -Greedy action tensor([ 1.4002, -0.4590, -0.5277, 0.2661]) tensor([0.6161, 0.0960, 0.0896, 0.1982]) -Greedy action tensor([ 1.2089, -0.1444, -0.2708, 0.1410]) tensor([0.5465, 0.1412, 0.1244, 0.1879]) -Greedy action tensor([ 1.1195, -0.2582, -0.3106, 0.6111]) tensor([0.4778, 0.1205, 0.1143, 0.2874]) -Greedy action tensor([ 1.3792, -0.7038, -0.4328, 0.1296]) tensor([0.6351, 0.0791, 0.1037, 0.1820]) -Greedy action tensor([ 1.2024, -0.3276, -0.4512, 0.4643]) tensor([0.5302, 0.1148, 0.1015, 0.2535]) -Greedy action tensor([ 1.5228, -0.3544, -0.5450, 0.0088]) tensor([0.6669, 0.1020, 0.0843, 0.1467]) -Greedy action tensor([ 1.7595, -0.7956, -0.6454, 0.2020]) tensor([0.7254, 0.0563, 0.0655, 0.1528]) -Greedy action tensor([ 1.5665, -0.5975, -0.2827, 0.5479]) tensor([0.6123, 0.0703, 0.0963, 0.2211]) -Greedy action tensor([ 1.4852, -0.3485, -0.7824, 0.5596]) tensor([0.6025, 0.0963, 0.0624, 0.2388]) -Greedy action tensor([ 1.1942, -0.3036, -0.4380, 0.4776]) tensor([0.5242, 0.1172, 0.1025, 0.2561]) -Greedy action tensor([ 2.2659, -0.7570, -0.3437, 0.3787]) tensor([0.7851, 0.0382, 0.0578, 0.1189]) -Greedy action tensor([ 1.6471, -1.0188, -0.1531, 0.3930]) tensor([0.6579, 0.0457, 0.1087, 0.1877]) -Greedy action tensor([ 1.5339, -0.9001, -0.4454, 0.5879]) tensor([0.6195, 0.0543, 0.0856, 0.2406]) -Greedy action tensor([ 1.5196, 0.0980, -0.7003, 0.1481]) tensor([0.6236, 0.1505, 0.0677, 0.1582]) -Greedy action tensor([ 1.1032, -0.2917, -0.3761, 0.0681]) tensor([0.5462, 0.1354, 0.1244, 0.1940]) -Greedy action tensor([ 1.3297, -0.3396, -0.4082, 0.5313]) tensor([0.5512, 0.1038, 0.0969, 0.2481]) -Greedy action tensor([ 1.9366, -1.7366, -0.0029, -0.0694]) tensor([0.7670, 0.0195, 0.1103, 0.1032]) -Greedy action tensor([ 0.3962, -0.5186, -0.1172, -0.0702]) tensor([0.3808, 0.1525, 0.2279, 0.2388]) -Greedy action tensor([ 1.1260, -0.0330, -0.8634, 0.1336]) tensor([0.5491, 0.1723, 0.0751, 0.2035]) -Greedy action tensor([ 1.1515, -0.5339, -0.3272, 0.2601]) tensor([0.5484, 0.1017, 0.1250, 0.2249]) -Greedy action tensor([ 2.0854, -1.4688, -0.4039, 0.5931]) tensor([0.7483, 0.0214, 0.0621, 0.1683]) -Greedy action tensor([ 0.8864, -0.2989, -0.6939, 0.4148]) tensor([0.4683, 0.1431, 0.0964, 0.2922]) -Greedy action tensor([ 1.5297, 0.1694, -0.2024, 0.3610]) tensor([0.5733, 0.1471, 0.1014, 0.1782]) -Greedy action tensor([ 1.6402, -0.8771, -0.3290, 0.4299]) tensor([0.6586, 0.0531, 0.0919, 0.1963]) -Greedy action tensor([ 1.6565, -0.2569, -0.4519, 0.1818]) tensor([0.6676, 0.0985, 0.0811, 0.1528]) -Greedy action tensor([ 1.6892, -0.2178, -0.9184, 0.0814]) tensor([0.7030, 0.1044, 0.0518, 0.1408]) -Greedy action tensor([ 1.4200, -0.4957, -0.0874, 0.6660]) tensor([0.5437, 0.0801, 0.1204, 0.2558]) -Greedy action tensor([ 1.6746, -0.3071, -1.0119, 0.6607]) tensor([0.6375, 0.0879, 0.0434, 0.2313]) -Greedy action tensor([ 0.8453, -0.2825, 0.0338, 0.2235]) tensor([0.4339, 0.1405, 0.1927, 0.2330]) -Greedy action tensor([ 1.0850, -0.2298, -0.8838, 0.1544]) tensor([0.5548, 0.1490, 0.0775, 0.2188]) -Greedy action tensor([ 1.6488, -0.3438, -0.2150, -0.1956]) tensor([0.6899, 0.0941, 0.1070, 0.1091]) -Greedy action tensor([ 1.4978, -0.1643, -0.7354, 0.5851]) tensor([0.5888, 0.1117, 0.0631, 0.2364]) -Greedy action tensor([ 1.5838, -0.1440, -0.5386, 0.2759]) tensor([0.6378, 0.1133, 0.0764, 0.1725]) -Greedy action tensor([ 1.3448, -0.2991, -0.5141, 0.3999]) tensor([0.5754, 0.1112, 0.0897, 0.2237]) -Greedy action tensor([ 1.4087, -0.6264, -0.2468, 0.5139]) tensor([0.5779, 0.0755, 0.1104, 0.2362]) -Greedy action tensor([ 1.8830, -1.2758, -0.4517, 0.2523]) tensor([0.7490, 0.0318, 0.0725, 0.1466]) -Greedy action tensor([ 1.2441, -0.3954, -0.7128, 0.1345]) tensor([0.6006, 0.1166, 0.0849, 0.1980]) -Greedy action tensor([ 1.1856, -0.1478, -0.5096, -0.0666]) tensor([0.5770, 0.1521, 0.1059, 0.1650]) -Greedy action tensor([ 1.5974, -0.3247, -0.9336, 0.3393]) tensor([0.6622, 0.0969, 0.0527, 0.1882]) -Greedy action tensor([ 1.2138, -0.2511, -0.6462, 0.1741]) tensor([0.5746, 0.1328, 0.0894, 0.2032]) -Greedy action tensor([ 1.4691, -0.1438, -0.6394, 0.4948]) tensor([0.5889, 0.1174, 0.0715, 0.2223]) -Greedy action tensor([ 2.0470, -1.0738, -0.1658, 1.1861]) tensor([0.6344, 0.0280, 0.0694, 0.2682]) -Greedy action tensor([ 1.5049, -0.6333, -0.6647, 0.5142]) tensor([0.6237, 0.0735, 0.0712, 0.2316]) -Greedy action tensor([ 1.3170, 0.0795, -0.3778, 0.2483]) tensor([0.5503, 0.1596, 0.1011, 0.1890]) -Greedy action tensor([ 1.5581, -0.5380, -0.2228, 0.2777]) tensor([0.6372, 0.0783, 0.1074, 0.1771]) -Greedy action tensor([ 1.3661, -0.0488, -0.9671, 0.3197]) tensor([0.5913, 0.1437, 0.0573, 0.2077]) -Greedy action tensor([ 2.0182, -1.3190, 0.0240, 0.5370]) tensor([0.7148, 0.0254, 0.0973, 0.1625]) -Greedy action tensor([ 1.4741, -0.6224, -0.6146, 0.1948]) tensor([0.6558, 0.0806, 0.0812, 0.1824]) -Greedy action tensor([ 1.7476, -0.3397, -0.1833, 0.3443]) tensor([0.6601, 0.0819, 0.0957, 0.1623]) -Greedy action tensor([ 1.7967, 0.3764, -0.5440, 0.0694]) tensor([0.6598, 0.1594, 0.0635, 0.1173]) -Greedy action tensor([ 1.6278, -0.8723, -0.2253, 0.1437]) tensor([0.6824, 0.0560, 0.1070, 0.1547]) -Greedy action tensor([ 1.4931, -0.7843, -0.5321, 0.2592]) tensor([0.6554, 0.0672, 0.0865, 0.1908]) -Greedy action tensor([ 1.8682, -0.8895, -0.3187, 0.7008]) tensor([0.6726, 0.0427, 0.0755, 0.2093]) -Greedy action tensor([ 1.1901, -0.3810, -0.1504, 0.1754]) tensor([0.5458, 0.1134, 0.1428, 0.1979]) -Greedy action tensor([ 1.9671, -0.2093, -0.5104, 0.4429]) tensor([0.7066, 0.0802, 0.0593, 0.1539]) -Greedy action tensor([ 1.3262, -0.8893, 0.0322, -0.1243]) tensor([0.6181, 0.0674, 0.1695, 0.1449]) -Greedy action tensor([ 1.0644, -0.5907, -0.3320, -0.2738]) tensor([0.5879, 0.1123, 0.1455, 0.1542]) -Greedy action tensor([ 2.1514, -1.0260, 0.0746, 1.1207]) tensor([0.6563, 0.0274, 0.0822, 0.2341]) -Greedy action tensor([ 0.9943, -0.3682, -0.2261, 0.2630]) tensor([0.4920, 0.1260, 0.1452, 0.2368]) -Greedy action tensor([ 1.3398, -0.4738, -0.1962, 0.3648]) tensor([0.5696, 0.0929, 0.1226, 0.2149]) -Greedy action tensor([ 1.3545, 0.0416, -1.3300, 0.2286]) tensor([0.6018, 0.1619, 0.0411, 0.1952]) -Greedy action tensor([ 1.6880, -0.5329, -0.7267, 0.0612]) tensor([0.7171, 0.0778, 0.0641, 0.1410]) -Greedy action tensor([ 1.9171, -0.5233, -0.6882, 0.4275]) tensor([0.7212, 0.0628, 0.0533, 0.1626]) -Greedy action tensor([ 1.8446, -0.6791, -0.3598, 0.4242]) tensor([0.6983, 0.0560, 0.0770, 0.1687]) -Greedy action tensor([ 1.9885, -1.0305, 0.2468, 0.8566]) tensor([0.6466, 0.0316, 0.1133, 0.2085]) -Greedy action tensor([ 1.1989, -0.3484, -0.9325, 0.1414]) tensor([0.5957, 0.1268, 0.0707, 0.2069]) -Greedy action tensor([ 1.4555, -0.5234, -0.7432, 0.4433]) tensor([0.6201, 0.0857, 0.0688, 0.2254]) -Greedy action tensor([ 1.2475, -0.3710, -0.4755, 0.1459]) tensor([0.5851, 0.1160, 0.1045, 0.1945]) -Greedy action tensor([ 1.6022, 0.0384, -0.1698, -0.2077]) tensor([0.6481, 0.1357, 0.1102, 0.1061]) -Greedy action tensor([ 1.1355, -0.4507, -0.3549, 0.0708]) tensor([0.5634, 0.1153, 0.1269, 0.1943]) -Greedy action tensor([ 1.3234, -0.0385, -0.8101, 0.1095]) tensor([0.5982, 0.1532, 0.0708, 0.1777]) -Greedy action tensor([ 1.9759, -0.8426, -0.4600, -0.0450]) tensor([0.7814, 0.0466, 0.0684, 0.1036]) -Greedy action tensor([ 1.1265, -0.6224, -0.1825, -0.2961]) tensor([0.5934, 0.1032, 0.1603, 0.1431]) -Greedy action tensor([ 2.3644e+00, -2.2596e-01, 5.8055e-05, 1.5171e-01]) tensor([0.7822, 0.0587, 0.0735, 0.0856]) -Greedy action tensor([ 1.3580, -0.2612, -0.8830, 0.1359]) tensor([0.6254, 0.1239, 0.0665, 0.1843]) -Greedy action tensor([-1.0714, 0.3100, 0.2545, -0.1578]) tensor([0.0890, 0.3541, 0.3350, 0.2218]) -Greedy action tensor([-1.3862, -0.5990, 0.3647, 0.1485]) tensor([0.0735, 0.1616, 0.4236, 0.3413]) -Greedy action tensor([-1.7748, -0.3023, 0.5604, -0.1202]) tensor([0.0478, 0.2084, 0.4938, 0.2500]) -Greedy action tensor([-1.8871, -0.3658, 0.6376, -0.1454]) tensor([0.0421, 0.1926, 0.5253, 0.2401]) -Greedy action tensor([-1.8782, -0.3722, 0.6340, -0.1849]) tensor([0.0430, 0.1937, 0.5298, 0.2336]) -Greedy action tensor([-1.7029, -0.2448, 0.5230, -0.0972]) tensor([0.0512, 0.2199, 0.4740, 0.2549]) -Greedy action tensor([-1.3733, -0.1970, 0.3527, 0.0126]) tensor([0.0722, 0.2340, 0.4054, 0.2885]) -Greedy action tensor([-1.1862, 0.5123, 0.3092, -0.1485]) tensor([0.0727, 0.3975, 0.3245, 0.2053]) -Greedy action tensor([-1.4643, -0.6057, 0.3844, 0.0802]) tensor([0.0695, 0.1639, 0.4412, 0.3255]) -Greedy action tensor([-1.7977, -0.4064, 0.6019, -0.0767]) tensor([0.0462, 0.1859, 0.5094, 0.2585]) -Greedy action tensor([-1.8470, -0.1039, 0.5786, -0.1300]) tensor([0.0424, 0.2422, 0.4794, 0.2360]) -Greedy action tensor([-0.8433, 0.8383, 0.1097, 0.3864]) tensor([0.0807, 0.4338, 0.2094, 0.2761]) -Greedy action tensor([-1.8143, -0.5160, 0.7275, -0.1043]) tensor([0.0437, 0.1600, 0.5548, 0.2415]) -Greedy action tensor([-1.8888, -0.4586, 0.6464, -0.1510]) tensor([0.0426, 0.1780, 0.5374, 0.2421]) -Greedy action tensor([-1.9086, -0.3683, 0.6369, -0.1765]) tensor([0.0415, 0.1939, 0.5297, 0.2349]) -Greedy action tensor([-1.7633, -0.4884, 0.5829, -0.1313]) tensor([0.0497, 0.1777, 0.5187, 0.2539]) -Greedy action tensor([-1.8438, -0.2671, 0.5972, -0.1383]) tensor([0.0438, 0.2120, 0.5031, 0.2411]) -Greedy action tensor([-1.8589, -0.3805, 0.6210, -0.1180]) tensor([0.0434, 0.1905, 0.5185, 0.2476]) -Greedy action tensor([-1.7239, -0.4362, 0.6591, 0.0888]) tensor([0.0463, 0.1679, 0.5020, 0.2838]) -Greedy action tensor([-1.8672, -0.4101, 0.6460, -0.1474]) tensor([0.0431, 0.1849, 0.5316, 0.2405]) -Greedy action tensor([-1.4513, 0.0238, 0.4704, -0.5437]) tensor([0.0681, 0.2977, 0.4653, 0.1688]) -Greedy action tensor([-1.8277, -0.1596, 0.5634, -0.1028]) tensor([0.0438, 0.2321, 0.4784, 0.2457]) -Greedy action tensor([-1.8803, -0.4536, 0.6414, -0.1452]) tensor([0.0429, 0.1789, 0.5347, 0.2435]) -Greedy action tensor([-1.6453, -0.2693, 0.4924, -0.0735]) tensor([0.0548, 0.2169, 0.4646, 0.2638]) -Greedy action tensor([-1.7549, -0.4973, 0.5621, -0.0536]) tensor([0.0496, 0.1746, 0.5037, 0.2721]) -Greedy action tensor([-1.9036, -0.4333, 0.6399, -0.1542]) tensor([0.0420, 0.1826, 0.5340, 0.2414]) -Greedy action tensor([-0.3327, 1.0058, 0.0199, 0.1941]) tensor([0.1261, 0.4809, 0.1794, 0.2136]) -Greedy action tensor([-1.8410, -0.4702, 0.6263, -0.1230]) tensor([0.0448, 0.1766, 0.5287, 0.2499]) -Greedy action tensor([-1.9288, -0.3916, 0.6518, -0.1689]) tensor([0.0405, 0.1886, 0.5353, 0.2356]) -Greedy action tensor([-1.8758, -0.4391, 0.6306, -0.1620]) tensor([0.0434, 0.1828, 0.5327, 0.2411]) -Greedy action tensor([-1.4698, -0.5637, 0.4699, -0.1911]) tensor([0.0713, 0.1765, 0.4961, 0.2561]) -Greedy action tensor([-1.1506, -0.5849, 0.3327, 0.0131]) tensor([0.0964, 0.1698, 0.4250, 0.3087]) -Greedy action tensor([-1.7260, -0.4993, 0.5630, -0.0985]) tensor([0.0516, 0.1761, 0.5094, 0.2629]) -Greedy action tensor([-1.7969, -0.2436, 0.5662, -0.1147]) tensor([0.0460, 0.2176, 0.4889, 0.2475]) -Greedy action tensor([-1.7463, -0.0063, 0.5170, -0.1020]) tensor([0.0465, 0.2651, 0.4474, 0.2409]) -Greedy action tensor([-1.8809, -0.4566, 0.6395, -0.1505]) tensor([0.0430, 0.1788, 0.5352, 0.2429]) -Greedy action tensor([-1.8966, -0.4423, 0.6456, -0.1557]) tensor([0.0422, 0.1807, 0.5364, 0.2407]) -Greedy action tensor([-1.9154, -0.4561, 0.6518, -0.1623]) tensor([0.0415, 0.1785, 0.5405, 0.2395]) -Greedy action tensor([-1.9016, -0.3974, 0.6461, -0.1544]) tensor([0.0416, 0.1874, 0.5320, 0.2389]) -Greedy action tensor([-1.5465, 0.2823, 0.3824, -0.0684]) tensor([0.0541, 0.3367, 0.3721, 0.2371]) -Greedy action tensor([-1.1291, 0.7381, 0.1726, 0.0888]) tensor([0.0688, 0.4454, 0.2530, 0.2327]) -Greedy action tensor([-1.8412, -0.4151, 0.6386, -0.1287]) tensor([0.0442, 0.1838, 0.5272, 0.2448]) -Greedy action tensor([-1.8832, -0.4628, 0.6445, -0.1505]) tensor([0.0429, 0.1775, 0.5371, 0.2425]) -Greedy action tensor([-1.4065, 0.7825, 0.4700, -0.7083]) tensor([0.0542, 0.4834, 0.3536, 0.1088]) -Greedy action tensor([-1.8888, -0.2951, 0.6303, -0.1530]) tensor([0.0416, 0.2050, 0.5171, 0.2363]) -Greedy action tensor([-1.8732, -0.4301, 0.6305, -0.1415]) tensor([0.0433, 0.1832, 0.5291, 0.2445]) -Greedy action tensor([-0.5947, 1.0253, 0.0322, 0.3472]) tensor([0.0953, 0.4817, 0.1784, 0.2445]) -Greedy action tensor([-1.4428, 0.4565, 0.3546, 0.0881]) tensor([0.0545, 0.3644, 0.3290, 0.2521]) -Greedy action tensor([-1.3496, 0.6379, 0.2790, -0.0143]) tensor([0.0582, 0.4244, 0.2964, 0.2211]) -Greedy action tensor([-0.9618, 0.2018, 0.5551, 0.2409]) tensor([0.0827, 0.2648, 0.3770, 0.2754]) -Greedy action tensor([-0.7808, -0.0503, 0.5618, 1.2941]) tensor([0.0673, 0.1396, 0.2575, 0.5356]) -Greedy action tensor([-0.3546, 0.0318, 0.7604, 1.5196]) tensor([0.0831, 0.1223, 0.2533, 0.5413]) -Greedy action tensor([-1.8854, -0.4333, 0.6366, -0.1569]) tensor([0.0428, 0.1829, 0.5332, 0.2411]) -Greedy action tensor([-1.8028, -0.4158, 0.5989, -0.1021]) tensor([0.0465, 0.1860, 0.5130, 0.2545]) -Greedy action tensor([-1.9417, -0.4454, 0.6658, -0.1793]) tensor([0.0402, 0.1796, 0.5457, 0.2344]) -Greedy action tensor([-0.4683, 1.0374, 0.0259, 0.3024]) tensor([0.1074, 0.4842, 0.1761, 0.2322]) -Greedy action tensor([-1.8598, -0.4062, 0.6197, -0.1429]) tensor([0.0439, 0.1878, 0.5239, 0.2444]) -Greedy action tensor([-1.9374, -0.4600, 0.6668, -0.1750]) tensor([0.0404, 0.1772, 0.5468, 0.2356]) -Greedy action tensor([-1.8500, -0.4605, 0.6236, -0.1367]) tensor([0.0446, 0.1790, 0.5291, 0.2474]) -Greedy action tensor([-1.6338, -0.2361, 0.6072, -0.0344]) tensor([0.0515, 0.2086, 0.4847, 0.2552]) -Greedy action tensor([-1.7462, -0.5072, 0.5601, -0.0846]) tensor([0.0506, 0.1747, 0.5080, 0.2666]) -Greedy action tensor([-0.9163, 0.7890, 0.0447, 0.2920]) tensor([0.0802, 0.4415, 0.2097, 0.2686]) -Greedy action tensor([-1.9114, -0.3906, 0.6393, -0.1648]) tensor([0.0414, 0.1897, 0.5312, 0.2377]) -Greedy action tensor([-1.9056, -0.6170, 1.1312, 0.4516]) tensor([0.0278, 0.1007, 0.5784, 0.2931]) -Greedy action tensor([-1.4246, 0.3694, 0.3278, 0.0242]) tensor([0.0587, 0.3529, 0.3385, 0.2499]) -Greedy action tensor([-1.8773, -0.3547, 0.6317, -0.1417]) tensor([0.0425, 0.1947, 0.5220, 0.2409]) -Greedy action tensor([-0.0551, -0.6687, 0.2048, -0.6906]) tensor([0.2969, 0.1608, 0.3851, 0.1573]) -Greedy action tensor([-1.2127, -0.5284, 0.4066, 0.3146]) tensor([0.0791, 0.1569, 0.3996, 0.3644]) -Greedy action tensor([-1.7249, -0.2399, 0.5363, -0.0964]) tensor([0.0497, 0.2196, 0.4772, 0.2535]) -Greedy action tensor([-1.9259, -0.4466, 0.6622, -0.1713]) tensor([0.0409, 0.1794, 0.5436, 0.2362]) -Greedy action tensor([-1.5214, -0.3141, 0.4505, -0.0491]) tensor([0.0629, 0.2105, 0.4522, 0.2744]) -Greedy action tensor([-0.6159, -0.5544, 0.2294, -0.3333]) tensor([0.1749, 0.1860, 0.4072, 0.2320]) -Greedy action tensor([-1.8393, -0.1999, 0.5710, -0.1280]) tensor([0.0438, 0.2257, 0.4879, 0.2426]) -Greedy action tensor([-1.7895, -0.0438, 0.5262, -0.0710]) tensor([0.0446, 0.2554, 0.4516, 0.2485]) -Greedy action tensor([-1.3649, -0.2054, 0.6863, 0.4669]) tensor([0.0549, 0.1751, 0.4271, 0.3429]) -Greedy action tensor([-1.9454, -0.4510, 0.6647, -0.1820]) tensor([0.0402, 0.1791, 0.5464, 0.2343]) -Greedy action tensor([-1.8830, -0.4536, 0.6408, -0.1492]) tensor([0.0429, 0.1791, 0.5351, 0.2429]) -Greedy action tensor([-1.7983, -0.3042, 0.5779, -0.0525]) tensor([0.0456, 0.2030, 0.4904, 0.2611]) -Greedy action tensor([-1.9021, -0.4589, 0.6528, -0.1605]) tensor([0.0420, 0.1778, 0.5405, 0.2397]) -Greedy action tensor([-1.9290, -0.3421, 0.6450, -0.1847]) tensor([0.0404, 0.1977, 0.5305, 0.2314]) -Greedy action tensor([-1.5721, -0.2625, 0.5539, -0.0572]) tensor([0.0567, 0.2101, 0.4753, 0.2580]) -Greedy action tensor([ 0.8383, -0.4724, 0.1453, -0.2727]) tensor([0.4764, 0.1285, 0.2382, 0.1569]) -Greedy action tensor([ 0.7630, -0.5148, -0.0686, -0.1686]) tensor([0.4744, 0.1322, 0.2065, 0.1869]) -Greedy action tensor([ 0.9492, -0.5519, -0.0524, -0.2852]) tensor([0.5316, 0.1185, 0.1953, 0.1547]) -Greedy action tensor([ 0.9614, -0.6616, 0.0324, -0.3759]) tensor([0.5391, 0.1064, 0.2129, 0.1416]) -Greedy action tensor([ 0.5982, -0.1508, -0.1567, -0.0231]) tensor([0.4032, 0.1906, 0.1895, 0.2166]) -Greedy action tensor([0.3077, 0.2709, 0.0188, 0.1471]) tensor([0.2805, 0.2704, 0.2101, 0.2389]) -Greedy action tensor([ 1.0493, -0.9823, -0.1393, -0.4107]) tensor([0.5995, 0.0786, 0.1826, 0.1392]) -Greedy action tensor([ 0.7016, 0.3688, -0.2283, -0.0202]) tensor([0.3850, 0.2760, 0.1519, 0.1871]) -Greedy action tensor([ 1.2109, -0.6586, -0.1052, -0.8136]) tensor([0.6433, 0.0992, 0.1725, 0.0850]) -Greedy action tensor([ 0.5322, -0.3587, 0.0056, -0.0612]) tensor([0.3916, 0.1607, 0.2313, 0.2164]) -Greedy action tensor([ 1.1330, -0.6939, -0.0687, -0.5504]) tensor([0.6070, 0.0977, 0.1825, 0.1128]) -Greedy action tensor([ 0.9190, -0.6177, 0.1002, -0.5761]) tensor([0.5319, 0.1144, 0.2345, 0.1192]) -Greedy action tensor([ 1.1085, -0.5672, -0.0892, -0.5388]) tensor([0.5947, 0.1113, 0.1795, 0.1145]) -Greedy action tensor([ 1.0653, -0.6300, 0.0939, -0.6109]) tensor([0.5717, 0.1049, 0.2164, 0.1070]) -Greedy action tensor([ 1.1293, -0.5765, 0.1060, -0.7127]) tensor([0.5884, 0.1069, 0.2115, 0.0933]) -Greedy action tensor([ 0.6238, -0.4000, 0.0854, -0.4781]) tensor([0.4395, 0.1579, 0.2565, 0.1460]) -Greedy action tensor([ 0.8582, -0.6512, -0.0578, -0.4763]) tensor([0.5307, 0.1173, 0.2123, 0.1397]) -Greedy action tensor([ 0.5931, -0.1744, -0.1602, -0.4481]) tensor([0.4371, 0.2029, 0.2058, 0.1543]) -Greedy action tensor([ 0.9549, -0.4974, -0.0448, -0.2564]) tensor([0.5264, 0.1232, 0.1937, 0.1568]) -Greedy action tensor([ 1.0802, -0.8049, 0.2122, -0.3261]) tensor([0.5505, 0.0836, 0.2311, 0.1349]) -Greedy action tensor([ 0.8078, -0.5761, -0.0739, -0.4917]) tensor([0.5162, 0.1294, 0.2137, 0.1407]) -Greedy action tensor([ 1.3545, -0.8143, -0.0723, -0.7382]) tensor([0.6767, 0.0774, 0.1625, 0.0835]) -Greedy action tensor([ 1.4459, -0.6947, -0.0759, -0.9714]) tensor([0.7017, 0.0825, 0.1532, 0.0626]) -Greedy action tensor([ 0.8631, -0.1979, -0.0337, -0.0828]) tensor([0.4668, 0.1616, 0.1904, 0.1813]) -Greedy action tensor([ 0.5008, -0.6189, -0.2161, -0.1364]) tensor([0.4267, 0.1393, 0.2084, 0.2256]) -Greedy action tensor([ 1.0527, -0.5554, -0.1094, -0.3312]) tensor([0.5670, 0.1136, 0.1774, 0.1421]) -Greedy action tensor([ 0.6861, -0.3366, 0.0029, -0.2362]) tensor([0.4420, 0.1590, 0.2232, 0.1758]) -Greedy action tensor([ 0.6294, -0.3242, 0.0596, -0.1002]) tensor([0.4110, 0.1584, 0.2325, 0.1981]) -Greedy action tensor([ 0.7491, -0.7423, -0.1215, -0.4085]) tensor([0.5107, 0.1149, 0.2138, 0.1605]) -Greedy action tensor([ 0.9433, -0.5490, -0.1641, -0.4383]) tensor([0.5536, 0.1245, 0.1829, 0.1391]) -Greedy action tensor([ 0.7886, -0.4883, -0.0061, -0.4357]) tensor([0.4939, 0.1378, 0.2231, 0.1452]) -Greedy action tensor([ 0.7269, -0.2790, -0.1570, -0.2362]) tensor([0.4628, 0.1693, 0.1912, 0.1767]) -Greedy action tensor([ 1.0276, -0.9587, 0.0949, -0.4321]) tensor([0.5672, 0.0778, 0.2232, 0.1318]) -Greedy action tensor([ 0.9226, -0.6315, 0.1025, -0.5672]) tensor([0.5327, 0.1126, 0.2346, 0.1201]) -Greedy action tensor([ 0.6000, -0.2559, -0.0876, -0.0454]) tensor([0.4078, 0.1733, 0.2050, 0.2139]) -Greedy action tensor([ 0.8805, -0.0759, -0.0180, 0.0426]) tensor([0.4496, 0.1728, 0.1831, 0.1945]) -Greedy action tensor([ 0.8066, -0.7900, -0.1294, -0.3521]) tensor([0.5239, 0.1061, 0.2055, 0.1645]) -Greedy action tensor([ 1.1058, -0.8208, 0.1098, -0.8994]) tensor([0.6062, 0.0883, 0.2239, 0.0816]) -Greedy action tensor([ 0.9189, -0.2362, -0.0888, -0.5350]) tensor([0.5225, 0.1646, 0.1908, 0.1221]) -Greedy action tensor([ 0.7373, -0.4789, -0.0630, -0.0962]) tensor([0.4587, 0.1359, 0.2060, 0.1993]) -Greedy action tensor([ 1.1469, -0.5987, -0.1093, -0.3773]) tensor([0.5963, 0.1041, 0.1698, 0.1299]) -Greedy action tensor([ 0.7616, -0.5362, 0.0257, -0.3000]) tensor([0.4766, 0.1302, 0.2283, 0.1649]) -Greedy action tensor([ 0.8619, -0.4402, 0.1278, -0.4657]) tensor([0.4958, 0.1348, 0.2379, 0.1314]) -Greedy action tensor([ 0.6286, -0.3903, 0.0274, -0.3627]) tensor([0.4385, 0.1583, 0.2404, 0.1627]) -Greedy action tensor([ 0.5164, -0.4800, 0.0701, -0.4668]) tensor([0.4196, 0.1549, 0.2685, 0.1570]) -Greedy action tensor([ 0.8124, -0.4490, 0.0132, -0.2226]) tensor([0.4789, 0.1356, 0.2154, 0.1701]) -Greedy action tensor([ 0.6292, -0.0031, 0.0367, -0.0407]) tensor([0.3852, 0.2047, 0.2130, 0.1971]) -Greedy action tensor([ 0.9108, -0.4845, -0.0173, -0.2108]) tensor([0.5079, 0.1258, 0.2008, 0.1655]) -Greedy action tensor([ 0.8923, 0.1079, 0.1537, -0.0265]) tensor([0.4286, 0.1956, 0.2048, 0.1710]) -Greedy action tensor([ 0.7260, -0.5097, 0.0071, -0.2901]) tensor([0.4673, 0.1358, 0.2277, 0.1692]) -Greedy action tensor([ 0.7316, -0.0956, 0.1317, -0.1369]) tensor([0.4157, 0.1818, 0.2282, 0.1744]) -Greedy action tensor([ 1.1411, -0.4072, -0.0542, -0.1297]) tensor([0.5569, 0.1184, 0.1685, 0.1563]) -Greedy action tensor([ 0.9584, -0.8553, 0.0400, -0.5815]) tensor([0.5629, 0.0918, 0.2247, 0.1207]) -Greedy action tensor([ 0.8086, -0.0286, -0.5163, -0.4160]) tensor([0.5018, 0.2173, 0.1334, 0.1475]) -Greedy action tensor([ 1.1107, -0.7550, -0.1095, -0.6959]) tensor([0.6195, 0.0959, 0.1829, 0.1017]) -Greedy action tensor([ 1.3285, -0.6484, 0.0114, -0.7799]) tensor([0.6545, 0.0906, 0.1754, 0.0795]) -Greedy action tensor([ 0.6620, -0.2897, -0.0191, -0.2011]) tensor([0.4321, 0.1669, 0.2187, 0.1823]) -Greedy action tensor([ 9.4112e-01, -1.9684e-01, 5.4852e-02, -7.6092e-04]) tensor([0.4711, 0.1510, 0.1942, 0.1837]) -Greedy action tensor([ 0.4387, -0.2409, -0.2272, -0.0250]) tensor([0.3774, 0.1913, 0.1939, 0.2374]) -Greedy action tensor([ 0.8943, -0.4812, -0.0577, -0.3653]) tensor([0.5202, 0.1315, 0.2008, 0.1476]) -Greedy action tensor([ 1.5896, -0.9668, 0.1764, -0.9625]) tensor([0.7149, 0.0555, 0.1740, 0.0557]) -Greedy action tensor([ 0.7485, -0.5123, -0.1181, -0.2424]) tensor([0.4819, 0.1366, 0.2026, 0.1789]) -Greedy action tensor([ 1.2560, -0.6398, 0.1367, -0.7896]) tensor([0.6227, 0.0935, 0.2033, 0.0805]) -Greedy action tensor([ 0.8423, -0.0712, -0.0667, -0.5929]) tensor([0.4897, 0.1964, 0.1973, 0.1166]) -Greedy action tensor([ 0.8950, -0.1101, -0.0951, -0.2335]) tensor([0.4852, 0.1776, 0.1803, 0.1570]) -Greedy action tensor([ 0.6301, -0.3525, 0.0702, -0.0735]) tensor([0.4098, 0.1534, 0.2341, 0.2028]) -Greedy action tensor([ 0.9066, -0.5575, 0.1132, -0.4568]) tensor([0.5156, 0.1193, 0.2332, 0.1319]) -Greedy action tensor([ 0.9111, -0.7915, 0.1359, -0.4690]) tensor([0.5279, 0.0962, 0.2432, 0.1328]) -Greedy action tensor([ 0.4074, 0.0231, 0.0327, -0.1250]) tensor([0.3384, 0.2304, 0.2326, 0.1987]) -Greedy action tensor([ 0.7644, -0.4604, -0.0061, -0.4267]) tensor([0.4853, 0.1426, 0.2246, 0.1475]) -Greedy action tensor([ 0.9887, -0.7411, 0.0356, -0.5926]) tensor([0.5654, 0.1003, 0.2180, 0.1163]) -Greedy action tensor([ 0.7135, -0.7416, -0.0767, -0.2699]) tensor([0.4852, 0.1132, 0.2201, 0.1815]) -Greedy action tensor([ 0.7364, -0.3397, -0.0223, -0.1375]) tensor([0.4491, 0.1531, 0.2103, 0.1874]) -Greedy action tensor([ 0.9796, -0.6909, -0.0176, -0.3306]) tensor([0.5474, 0.1030, 0.2019, 0.1477]) -Greedy action tensor([ 0.2588, -0.0972, -0.0722, -0.3686]) tensor([0.3387, 0.2372, 0.2432, 0.1808]) -Greedy action tensor([ 1.3795, -0.8323, 0.1181, -0.6226]) tensor([0.6545, 0.0717, 0.1854, 0.0884]) -Greedy action tensor([ 0.6090, -0.0107, 0.1477, -0.4820]) tensor([0.3993, 0.2149, 0.2517, 0.1341]) -Greedy action tensor([ 0.7380, -0.3726, -0.0795, -0.4065]) tensor([0.4786, 0.1576, 0.2113, 0.1524]) -Greedy action tensor([ 0.6430, 0.4088, -0.0669, -0.3037]) tensor([0.3744, 0.2962, 0.1841, 0.1453]) -Greedy action tensor([ 0.2449, -0.1872, -0.1063, -0.1166]) tensor([0.3279, 0.2129, 0.2308, 0.2284]) -Greedy action tensor([ 9.5710e-01, -7.3964e-01, 5.9807e-04, -3.0092e-01]) tensor([0.5400, 0.0990, 0.2075, 0.1535]) -Greedy action tensor([ 0.3859, -0.8666, 0.4066, 0.0405]) tensor([0.3317, 0.0948, 0.3387, 0.2348]) -Greedy action tensor([-0.5443, -1.2002, 0.6855, -0.9286]) tensor([0.1779, 0.0923, 0.6086, 0.1212]) -Greedy action tensor([ 0.9425, -0.2305, 1.1515, 0.7255]) tensor([0.2988, 0.0925, 0.3683, 0.2405]) -Greedy action tensor([-0.5702, 0.0857, 0.0801, -1.2580]) tensor([0.1871, 0.3604, 0.3584, 0.0940]) -Greedy action tensor([ 0.5559, -1.6042, -0.5085, -0.1120]) tensor([0.5068, 0.0584, 0.1748, 0.2599]) -Greedy action tensor([ 0.3721, -0.5365, 0.3196, -0.1909]) tensor([0.3423, 0.1380, 0.3248, 0.1949]) -Greedy action tensor([ 1.1845, -1.0317, 0.1932, -0.4888]) tensor([0.5996, 0.0654, 0.2225, 0.1125]) -Greedy action tensor([ 0.3395, -0.5962, -0.0412, 0.2627]) tensor([0.3331, 0.1307, 0.2277, 0.3085]) -Greedy action tensor([ 0.0098, -0.6921, -0.1836, -0.5882]) tensor([0.3485, 0.1727, 0.2872, 0.1916]) -Greedy action tensor([ 1.4369, 0.5416, 0.1196, -0.1510]) tensor([0.5317, 0.2172, 0.1424, 0.1087]) -Greedy action tensor([-0.1238, 0.1310, -1.1945, -0.1207]) tensor([0.2750, 0.3548, 0.0943, 0.2759]) -Greedy action tensor([ 0.0527, -0.5021, 0.2660, 0.3241]) tensor([0.2425, 0.1392, 0.3002, 0.3181]) -Greedy action tensor([-0.2588, 0.0596, 0.9515, 0.2147]) tensor([0.1363, 0.1874, 0.4573, 0.2189]) -Greedy action tensor([-0.9471, -0.8595, -0.4976, -0.4904]) tensor([0.1909, 0.2084, 0.2993, 0.3014]) -Greedy action tensor([-0.5002, 0.5386, -0.4856, -0.1466]) tensor([0.1596, 0.4511, 0.1620, 0.2273]) -Greedy action tensor([-0.3939, -0.9473, -0.0905, -1.0937]) tensor([0.2919, 0.1678, 0.3953, 0.1450]) -Greedy action tensor([-0.2988, -0.2359, -0.6307, 0.3007]) tensor([0.2172, 0.2313, 0.1559, 0.3956]) -Greedy action tensor([-0.1012, -0.1548, -0.3103, -0.9834]) tensor([0.3152, 0.2987, 0.2557, 0.1304]) -Greedy action tensor([-0.9894, -0.7844, 0.2106, -0.7871]) tensor([0.1477, 0.1813, 0.4903, 0.1808]) -Greedy action tensor([ 0.4083, -0.2160, -0.1801, 0.2822]) tensor([0.3364, 0.1802, 0.1868, 0.2966]) -Greedy action tensor([1.5001, 0.1001, 0.0609, 0.0078]) tensor([0.5853, 0.1443, 0.1388, 0.1316]) -Greedy action tensor([-0.1325, 0.1461, 0.0366, -0.4076]) tensor([0.2345, 0.3098, 0.2777, 0.1781]) -Greedy action tensor([ 0.4898, -1.5235, -0.8570, -0.5472]) tensor([0.5720, 0.0764, 0.1488, 0.2028]) -Greedy action tensor([-0.4746, -0.6492, -0.6095, 0.2806]) tensor([0.2065, 0.1735, 0.1805, 0.4395]) -Greedy action tensor([-1.0080, 0.5985, -0.3439, -0.5640]) tensor([0.1054, 0.5255, 0.2048, 0.1643]) -Greedy action tensor([-0.7107, -0.7591, -0.4497, -0.4674]) tensor([0.2209, 0.2105, 0.2868, 0.2818]) -Greedy action tensor([-0.4628, 0.4748, -0.0623, 0.2330]) tensor([0.1418, 0.3622, 0.2117, 0.2844]) -Greedy action tensor([ 0.9433, -1.6027, 0.7610, 0.3391]) tensor([0.4068, 0.0319, 0.3390, 0.2223]) -Greedy action tensor([ 0.9533, -1.3069, 1.2561, -0.7751]) tensor([0.3794, 0.0396, 0.5136, 0.0674]) -Greedy action tensor([ 0.8893, -0.9787, 0.9134, -0.5599]) tensor([0.4143, 0.0640, 0.4244, 0.0973]) -Greedy action tensor([-0.6356, -0.4515, -0.8562, -0.5106]) tensor([0.2417, 0.2906, 0.1939, 0.2739]) -Greedy action tensor([-0.2666, -0.5272, -0.0179, -0.4650]) tensor([0.2582, 0.1990, 0.3311, 0.2117]) -Greedy action tensor([ 0.0490, -0.2161, 0.1862, -0.6489]) tensor([0.2931, 0.2248, 0.3362, 0.1459]) -Greedy action tensor([ 0.4862, -0.2104, 0.3176, -0.0603]) tensor([0.3422, 0.1705, 0.2891, 0.1981]) -Greedy action tensor([ 0.3743, -0.9326, 0.4991, 0.0976]) tensor([0.3163, 0.0856, 0.3583, 0.2398]) -Greedy action tensor([ 0.0265, -0.6512, 0.0751, -0.9272]) tensor([0.3398, 0.1725, 0.3567, 0.1309]) -Greedy action tensor([-0.7150, -0.0866, -0.9825, -0.3722]) tensor([0.1981, 0.3713, 0.1516, 0.2791]) -Greedy action tensor([ 0.0726, 0.4065, -0.4996, -0.9356]) tensor([0.3007, 0.4199, 0.1697, 0.1097]) -Greedy action tensor([ 0.2451, -0.9995, -0.6313, 0.2963]) tensor([0.3627, 0.1045, 0.1510, 0.3818]) -Greedy action tensor([ 0.5843, -0.9927, 0.9626, -0.1780]) tensor([0.3192, 0.0659, 0.4659, 0.1489]) -Greedy action tensor([ 0.0964, -0.4947, -0.3730, 0.1833]) tensor([0.3058, 0.1693, 0.1913, 0.3336]) -Greedy action tensor([ 0.5623, 0.3539, -0.4703, -0.2708]) tensor([0.3842, 0.3119, 0.1368, 0.1670]) -Greedy action tensor([1.2946, 0.9906, 0.4221, 0.5304]) tensor([0.3815, 0.2815, 0.1594, 0.1777]) -Greedy action tensor([ 0.3253, -0.1589, 0.4070, 0.3183]) tensor([0.2707, 0.1668, 0.2937, 0.2688]) -Greedy action tensor([ 0.5527, -0.8467, 0.2101, 0.4264]) tensor([0.3524, 0.0869, 0.2502, 0.3106]) -Greedy action tensor([-1.3330, -1.1101, -0.0095, -0.0308]) tensor([0.1033, 0.1291, 0.3879, 0.3797]) -Greedy action tensor([-0.5469, -1.0214, 0.5705, -0.7285]) tensor([0.1814, 0.1129, 0.5545, 0.1513]) -Greedy action tensor([-0.1131, -0.9821, 0.3541, 0.3676]) tensor([0.2159, 0.0905, 0.3444, 0.3491]) -Greedy action tensor([ 1.5888, -0.1871, 0.0914, -0.8909]) tensor([0.6771, 0.1147, 0.1515, 0.0567]) -Greedy action tensor([-0.6307, -0.7282, -0.5611, -0.4082]) tensor([0.2365, 0.2145, 0.2535, 0.2954]) -Greedy action tensor([-0.6202, 0.0290, -0.3077, -0.2732]) tensor([0.1756, 0.3361, 0.2400, 0.2484]) -Greedy action tensor([ 0.1643, -1.1313, 0.2947, 0.8409]) tensor([0.2283, 0.0625, 0.2601, 0.4491]) -Greedy action tensor([ 0.2127, -1.6783, -0.5316, -0.6798]) tensor([0.4913, 0.0741, 0.2334, 0.2012]) -Greedy action tensor([-0.3099, -0.1830, 0.5097, -0.6816]) tensor([0.1963, 0.2229, 0.4455, 0.1354]) -Greedy action tensor([-0.8253, 0.3268, -1.3033, -0.4915]) tensor([0.1618, 0.5120, 0.1003, 0.2259]) -Greedy action tensor([-0.2499, 0.0957, 1.1071, -0.1925]) tensor([0.1359, 0.1921, 0.5280, 0.1440]) -Greedy action tensor([ 0.2983, -0.0743, -0.6186, -0.2761]) tensor([0.3771, 0.2598, 0.1508, 0.2123]) -Greedy action tensor([ 0.2435, -0.8523, 0.3750, -0.5090]) tensor([0.3395, 0.1135, 0.3871, 0.1599]) -Greedy action tensor([ 0.5528, -0.7446, -0.2960, 0.0576]) tensor([0.4328, 0.1183, 0.1852, 0.2638]) -Greedy action tensor([-0.8406, -0.8660, 0.1709, -0.6154]) tensor([0.1673, 0.1631, 0.4600, 0.2096]) -Greedy action tensor([ 1.1660, -1.4815, 0.7134, 0.8512]) tensor([0.4104, 0.0291, 0.2610, 0.2996]) -Greedy action tensor([-0.5814, -0.3036, 0.2739, -0.3421]) tensor([0.1683, 0.2222, 0.3958, 0.2138]) -Greedy action tensor([-0.1726, -1.1651, 0.5235, -0.5819]) tensor([0.2475, 0.0917, 0.4964, 0.1644]) -Greedy action tensor([ 0.9923, -0.4434, 0.5326, 0.7695]) tensor([0.3746, 0.0891, 0.2365, 0.2998]) -Greedy action tensor([-1.1712, -0.9795, -1.1281, 0.2207]) tensor([0.1374, 0.1664, 0.1435, 0.5527]) -Greedy action tensor([-0.0459, -1.8877, 0.9731, -0.9830]) tensor([0.2314, 0.0367, 0.6412, 0.0907]) -Greedy action tensor([-0.9776, -1.0939, -0.8118, -0.8456]) tensor([0.2374, 0.2114, 0.2803, 0.2709]) -Greedy action tensor([-0.3841, -0.7426, -1.5275, -0.9206]) tensor([0.3843, 0.2685, 0.1225, 0.2247]) -Greedy action tensor([-0.5847, -0.9440, 0.5392, -0.0619]) tensor([0.1548, 0.1081, 0.4762, 0.2610]) -Greedy action tensor([-1.7133, -0.2596, -0.1318, -0.0238]) tensor([0.0643, 0.2750, 0.3125, 0.3482]) -Greedy action tensor([ 1.1924, -0.1507, -0.1575, -0.2650]) tensor([0.5704, 0.1489, 0.1479, 0.1328]) -Greedy action tensor([ 0.0611, -1.1677, -0.2229, 0.3935]) tensor([0.2907, 0.0851, 0.2188, 0.4054]) -Greedy action tensor([0.1445, 0.2371, 0.0004, 0.0026]) tensor([0.2611, 0.2864, 0.2260, 0.2265]) -Greedy action tensor([-1.7167, 0.5229, -0.3816, -0.5039]) tensor([0.0570, 0.5349, 0.2165, 0.1916]) -Greedy action tensor([-0.8321, 0.0337, 0.0715, -1.1654]) tensor([0.1524, 0.3622, 0.3762, 0.1092]) -Greedy action tensor([-0.1491, -0.4507, 0.9144, -0.7968]) tensor([0.1938, 0.1434, 0.5614, 0.1014]) -Greedy action tensor([ 0.1244, -1.2264, 0.8548, 0.2781]) tensor([0.2222, 0.0576, 0.4612, 0.2591]) -Greedy action tensor([ 0.1595, -1.6657, 0.3875, -0.0735]) tensor([0.3116, 0.0502, 0.3914, 0.2468]) -Greedy action tensor([ 0.5012, -1.4041, 0.6112, 0.6820]) tensor([0.2888, 0.0430, 0.3223, 0.3460]) -Greedy action tensor([ 1.2136, -0.5863, -0.6908, 0.4678]) tensor([0.5591, 0.0924, 0.0833, 0.2652]) -Greedy action tensor([ 1.4201, -0.1537, 0.3480, 1.6073]) tensor([0.3629, 0.0752, 0.1242, 0.4376]) -Greedy action tensor([-1.0912, -1.1129, -0.0913, -0.9492]) tensor([0.1710, 0.1673, 0.4647, 0.1970]) -Greedy action tensor([ 1.3641, -0.0174, -0.8230, 0.2216]) tensor([0.5944, 0.1493, 0.0667, 0.1896]) -Greedy action tensor([ 1.5186, -0.6331, -0.1612, 0.8664]) tensor([0.5484, 0.0638, 0.1022, 0.2856]) -Greedy action tensor([ 1.1054, -0.5313, -0.0021, 0.6008]) tensor([0.4698, 0.0914, 0.1552, 0.2836]) -Greedy action tensor([ 0.9072, -0.2495, -0.9035, 0.0381]) tensor([0.5270, 0.1658, 0.0862, 0.2210]) -Greedy action tensor([ 1.5132, -1.3602, -0.1963, -0.2392]) tensor([0.7088, 0.0401, 0.1283, 0.1229]) -Greedy action tensor([ 1.7133, -0.7428, -0.4394, 0.8228]) tensor([0.6202, 0.0532, 0.0721, 0.2546]) -Greedy action tensor([ 1.4636, -0.5467, -0.3201, 0.3809]) tensor([0.6095, 0.0816, 0.1024, 0.2064]) -Greedy action tensor([ 1.6194, -0.5021, -0.4483, 0.2648]) tensor([0.6647, 0.0797, 0.0841, 0.1715]) -Greedy action tensor([ 1.0226, -0.0044, -0.6289, 0.0256]) tensor([0.5212, 0.1866, 0.0999, 0.1923]) -Greedy action tensor([ 1.1322, -0.4580, -0.3139, 0.1358]) tensor([0.5529, 0.1127, 0.1302, 0.2041]) -Greedy action tensor([ 1.3369, -0.5089, -0.2842, 0.1057]) tensor([0.6070, 0.0958, 0.1200, 0.1772]) -Greedy action tensor([ 0.9400, -0.5244, -0.2359, -0.0458]) tensor([0.5228, 0.1209, 0.1613, 0.1951]) -Greedy action tensor([ 1.9479, -0.2941, -0.7553, 0.2084]) tensor([0.7414, 0.0788, 0.0497, 0.1302]) -Greedy action tensor([ 1.7468, 0.4283, -0.5989, 0.4567]) tensor([0.6103, 0.1633, 0.0585, 0.1680]) -Greedy action tensor([ 1.2166, -0.5495, -0.7792, 0.6244]) tensor([0.5376, 0.0919, 0.0731, 0.2974]) -Greedy action tensor([ 1.4162, -0.4669, -0.4521, 0.3002]) tensor([0.6120, 0.0931, 0.0945, 0.2005]) -Greedy action tensor([ 1.5653, -0.2036, -0.6882, 0.1834]) tensor([0.6550, 0.1117, 0.0688, 0.1645]) -Greedy action tensor([ 1.9961, -0.8777, -0.3052, 0.2144]) tensor([0.7547, 0.0426, 0.0756, 0.1271]) -Greedy action tensor([ 1.6847, -0.4037, -0.4834, 0.3512]) tensor([0.6659, 0.0825, 0.0762, 0.1755]) -Greedy action tensor([ 1.6427, 0.6150, -0.2895, 0.0088]) tensor([0.5890, 0.2108, 0.0853, 0.1149]) -Greedy action tensor([ 1.3915, -0.1579, -0.4261, 0.0327]) tensor([0.6128, 0.1302, 0.0995, 0.1575]) -Greedy action tensor([ 2.0537, -0.6100, -0.6745, 0.3220]) tensor([0.7622, 0.0531, 0.0498, 0.1349]) -Greedy action tensor([ 0.9864, 0.0233, -0.5324, 0.3827]) tensor([0.4657, 0.1777, 0.1020, 0.2546]) -Greedy action tensor([ 1.6719, -0.3868, -0.2397, 0.1538]) tensor([0.6691, 0.0854, 0.0989, 0.1466]) -Greedy action tensor([ 1.6363, -0.4894, -1.1549, 0.1388]) tensor([0.7120, 0.0850, 0.0437, 0.1593]) -Greedy action tensor([ 1.4323, -0.1722, -0.6998, 0.3849]) tensor([0.5986, 0.1203, 0.0710, 0.2100]) -Greedy action tensor([ 1.7043, 0.1955, -0.3471, 0.0648]) tensor([0.6477, 0.1433, 0.0833, 0.1257]) -Greedy action tensor([ 2.6636, -0.8816, 0.1150, 1.2220]) tensor([0.7443, 0.0215, 0.0582, 0.1761]) -Greedy action tensor([ 1.4289, -0.9476, -0.0200, -0.0784]) tensor([0.6455, 0.0600, 0.1516, 0.1430]) -Greedy action tensor([ 1.1960, -0.0650, -0.1572, 0.2924]) tensor([0.5136, 0.1455, 0.1327, 0.2081]) -Greedy action tensor([ 1.2711, -0.3793, -0.5637, 0.0830]) tensor([0.6037, 0.1159, 0.0964, 0.1840]) -Greedy action tensor([ 1.8654, -1.0381, -0.0590, 0.1356]) tensor([0.7256, 0.0398, 0.1059, 0.1287]) -Greedy action tensor([ 1.8582, -1.2919, -0.0347, 0.3843]) tensor([0.7030, 0.0301, 0.1059, 0.1610]) -Greedy action tensor([ 2.1404, -0.6565, -0.1570, 0.5097]) tensor([0.7368, 0.0449, 0.0741, 0.1442]) -Greedy action tensor([ 1.6170, -0.3970, -0.5330, 0.6827]) tensor([0.6087, 0.0812, 0.0709, 0.2391]) -Greedy action tensor([ 1.2216, -0.6963, -0.6423, 0.1716]) tensor([0.6054, 0.0889, 0.0939, 0.2118]) -Greedy action tensor([ 2.3056, -0.8435, -0.2917, 0.5299]) tensor([0.7772, 0.0333, 0.0579, 0.1316]) -Greedy action tensor([ 1.4418, -0.6069, -0.2187, 0.0219]) tensor([0.6407, 0.0826, 0.1218, 0.1549]) -Greedy action tensor([ 1.5411, -0.5394, -0.5361, 0.4770]) tensor([0.6269, 0.0783, 0.0785, 0.2163]) -Greedy action tensor([ 1.4063, -0.5787, -0.8641, 0.8107]) tensor([0.5581, 0.0767, 0.0576, 0.3076]) -Greedy action tensor([ 1.7694, -0.4611, -1.0518, 0.6692]) tensor([0.6667, 0.0717, 0.0397, 0.2219]) -Greedy action tensor([ 1.8242, -0.8318, -0.3549, 0.6111]) tensor([0.6754, 0.0474, 0.0764, 0.2008]) -Greedy action tensor([ 1.6292, -0.7149, -0.5285, 0.3459]) tensor([0.6718, 0.0644, 0.0776, 0.1862]) -Greedy action tensor([ 1.3638, -0.1751, -0.2538, 0.2903]) tensor([0.5699, 0.1223, 0.1130, 0.1948]) -Greedy action tensor([ 2.0220, -1.2067, -0.1508, 0.5845]) tensor([0.7189, 0.0285, 0.0819, 0.1708]) -Greedy action tensor([ 1.0973, -0.0536, -0.1703, 0.1574]) tensor([0.5029, 0.1591, 0.1416, 0.1965]) -Greedy action tensor([ 1.6184, -0.8450, 0.0230, 0.4104]) tensor([0.6302, 0.0537, 0.1278, 0.1883]) -Greedy action tensor([ 1.6919, -0.6365, -0.0601, 0.3800]) tensor([0.6493, 0.0633, 0.1126, 0.1749]) -Greedy action tensor([ 1.4807, -1.0857, -0.3396, 0.8374]) tensor([0.5668, 0.0435, 0.0918, 0.2979]) -Greedy action tensor([ 1.2566, -0.4948, -0.4269, 0.0074]) tensor([0.6075, 0.1054, 0.1128, 0.1742]) -Greedy action tensor([ 1.4174, -0.1549, -0.4532, 0.0122]) tensor([0.6223, 0.1292, 0.0959, 0.1527]) -Greedy action tensor([ 1.3209, -0.5547, -0.3607, 0.5582]) tensor([0.5538, 0.0849, 0.1030, 0.2583]) -Greedy action tensor([ 1.5991, -0.5574, -0.3211, 0.0514]) tensor([0.6779, 0.0785, 0.0994, 0.1442]) -Greedy action tensor([ 1.6220, -1.0026, 0.0042, 0.4718]) tensor([0.6300, 0.0457, 0.1249, 0.1994]) -Greedy action tensor([ 1.6236, -0.8099, -0.3358, 0.6444]) tensor([0.6233, 0.0547, 0.0879, 0.2341]) -Greedy action tensor([ 1.2575, -0.5778, -1.2201, 0.5551]) tensor([0.5751, 0.0918, 0.0483, 0.2849]) -Greedy action tensor([ 2.1687, -0.6962, -0.1265, 0.5873]) tensor([0.7335, 0.0418, 0.0739, 0.1509]) -Greedy action tensor([ 1.3879, 0.1000, -0.0668, -0.2010]) tensor([0.5836, 0.1610, 0.1363, 0.1191]) -Greedy action tensor([ 1.8922, -0.7149, -1.0149, -0.3181]) tensor([0.8077, 0.0596, 0.0441, 0.0886]) -Greedy action tensor([ 1.5367, -0.6289, -0.2851, 0.3398]) tensor([0.6335, 0.0726, 0.1025, 0.1914]) -Greedy action tensor([ 0.7967, -0.7181, -0.2329, -0.1653]) tensor([0.5104, 0.1122, 0.1823, 0.1951]) -Greedy action tensor([ 1.3314, -0.5139, -0.6497, 0.5787]) tensor([0.5659, 0.0894, 0.0781, 0.2666]) -Greedy action tensor([ 1.3645, 0.0571, -0.3300, -0.0440]) tensor([0.5887, 0.1592, 0.1081, 0.1439]) -Greedy action tensor([ 1.3017, -0.3203, -0.8596, 0.2778]) tensor([0.5981, 0.1181, 0.0689, 0.2149]) -Greedy action tensor([ 1.4934, -0.6293, -0.4933, 0.4686]) tensor([0.6189, 0.0741, 0.0849, 0.2221]) -Greedy action tensor([ 1.4728, -0.0634, -0.8694, 0.6196]) tensor([0.5756, 0.1239, 0.0553, 0.2452]) -Greedy action tensor([ 1.5480, -0.3847, -0.9393, 0.0952]) tensor([0.6841, 0.0990, 0.0569, 0.1600]) -Greedy action tensor([ 1.3526, -0.7518, -0.4727, 0.6356]) tensor([0.5646, 0.0688, 0.0910, 0.2756]) -Greedy action tensor([ 1.9062, 0.5665, -0.0685, -0.1276]) tensor([0.6529, 0.1710, 0.0906, 0.0854]) -Greedy action tensor([ 1.4937, -0.2967, -0.7806, 0.2139]) tensor([0.6461, 0.1078, 0.0665, 0.1797]) -Greedy action tensor([ 1.5540, -0.8011, -0.5087, -0.3376]) tensor([0.7284, 0.0691, 0.0926, 0.1099]) -Greedy action tensor([ 1.5054, -0.6268, -0.4309, 0.1656]) tensor([0.6559, 0.0778, 0.0946, 0.1718]) -Greedy action tensor([ 1.3697, -0.6139, -0.6711, 0.2652]) tensor([0.6254, 0.0860, 0.0813, 0.2072]) -Greedy action tensor([ 1.1704, -0.2740, -0.0457, 0.1663]) tensor([0.5267, 0.1242, 0.1561, 0.1930]) -Greedy action tensor([ 1.3496, -0.5654, -0.8962, 0.1848]) tensor([0.6389, 0.0941, 0.0676, 0.1993]) -Greedy action tensor([ 1.8132, -0.7131, -0.7948, 1.1276]) tensor([0.6034, 0.0482, 0.0445, 0.3039]) -Greedy action tensor([ 1.3880, -0.4776, -0.2989, 0.6247]) tensor([0.5537, 0.0857, 0.1025, 0.2581]) -Greedy action tensor([ 1.3918, -0.3348, -0.3480, 0.0662]) tensor([0.6176, 0.1099, 0.1084, 0.1641]) -Greedy action tensor([ 1.7208, 0.1549, -0.1691, 0.2260]) tensor([0.6312, 0.1319, 0.0954, 0.1416]) -Greedy action tensor([ 1.8465, -1.0090, -0.7409, 0.2481]) tensor([0.7491, 0.0431, 0.0563, 0.1515]) -Greedy action tensor([ 1.9485, -0.6932, -0.4888, 0.2695]) tensor([0.7434, 0.0530, 0.0650, 0.1387]) -Greedy action tensor([-1.9225, -0.4246, 0.6582, -0.1703]) tensor([0.0409, 0.1830, 0.5402, 0.2359]) -Greedy action tensor([-1.5701, -0.5479, 0.4676, 0.0297]) tensor([0.0610, 0.1694, 0.4677, 0.3019]) -Greedy action tensor([-1.1905, 0.3346, 0.2613, -0.0382]) tensor([0.0767, 0.3526, 0.3277, 0.2429]) -Greedy action tensor([-1.8478, -0.4784, 0.6183, -0.1451]) tensor([0.0451, 0.1772, 0.5305, 0.2473]) -Greedy action tensor([-1.8650, -0.2400, 0.6127, -0.1380]) tensor([0.0423, 0.2150, 0.5045, 0.2381]) -Greedy action tensor([-1.3139, 0.6783, 0.2868, 0.0389]) tensor([0.0583, 0.4274, 0.2889, 0.2255]) -Greedy action tensor([-1.8672, -0.4459, 0.6313, -0.1405]) tensor([0.0436, 0.1807, 0.5305, 0.2452]) -Greedy action tensor([-1.2693, -0.5288, -0.1705, -0.6202]) tensor([0.1248, 0.2618, 0.3745, 0.2389]) -Greedy action tensor([-1.8551, -0.3390, 0.6087, -0.1342]) tensor([0.0437, 0.1989, 0.5132, 0.2442]) -Greedy action tensor([-1.8339, -0.3251, 0.5961, -0.1400]) tensor([0.0448, 0.2026, 0.5089, 0.2437]) -Greedy action tensor([-1.9346, -0.4271, 0.6523, -0.1837]) tensor([0.0407, 0.1838, 0.5410, 0.2345]) -Greedy action tensor([-1.7789, -0.2305, 0.5430, -0.1109]) tensor([0.0472, 0.2219, 0.4809, 0.2501]) -Greedy action tensor([-1.2312, -0.0939, 0.6402, 0.4891]) tensor([0.0617, 0.1925, 0.4010, 0.3448]) -Greedy action tensor([-1.8552, -0.4472, 0.6219, -0.1443]) tensor([0.0444, 0.1815, 0.5285, 0.2457]) -Greedy action tensor([-1.4020, -0.2171, 0.3801, -0.0109]) tensor([0.0703, 0.2298, 0.4175, 0.2824]) -Greedy action tensor([-1.9288, -0.2984, 0.6362, -0.1737]) tensor([0.0402, 0.2051, 0.5223, 0.2324]) -Greedy action tensor([-0.6062, -0.3663, 0.1979, -0.0114]) tensor([0.1583, 0.2012, 0.3537, 0.2869]) -Greedy action tensor([-1.8652, -0.3388, 0.6002, -0.1310]) tensor([0.0434, 0.1998, 0.5109, 0.2459]) -Greedy action tensor([-1.6802, -0.3874, 0.5733, -0.0078]) tensor([0.0513, 0.1869, 0.4885, 0.2732]) -Greedy action tensor([-0.9955, 0.0565, -0.0261, -0.1473]) tensor([0.1132, 0.3241, 0.2984, 0.2643]) -Greedy action tensor([-1.5111, -0.3182, 0.5657, 0.4375]) tensor([0.0518, 0.1709, 0.4135, 0.3638]) -Greedy action tensor([-1.9353, -0.4212, 0.6543, -0.1741]) tensor([0.0405, 0.1841, 0.5397, 0.2357]) -Greedy action tensor([-1.9109, -0.4501, 0.6532, -0.1653]) tensor([0.0416, 0.1793, 0.5406, 0.2385]) -Greedy action tensor([-1.8163, -0.1049, 0.5394, -0.1238]) tensor([0.0444, 0.2459, 0.4684, 0.2413]) -Greedy action tensor([-1.8942, -0.3665, 0.6420, -0.1624]) tensor([0.0419, 0.1929, 0.5287, 0.2365]) -Greedy action tensor([-1.5541, -0.2089, 0.5063, 0.0958]) tensor([0.0559, 0.2145, 0.4386, 0.2910]) -Greedy action tensor([-1.8889, -0.4393, 0.6416, -0.1521]) tensor([0.0426, 0.1813, 0.5345, 0.2417]) -Greedy action tensor([-1.7015, -0.2870, 0.5229, -0.0916]) tensor([0.0516, 0.2125, 0.4776, 0.2583]) -Greedy action tensor([-1.2962, 0.5190, 0.3440, 0.2903]) tensor([0.0582, 0.3574, 0.3000, 0.2843]) -Greedy action tensor([-1.8948, -0.3993, 0.6531, -0.1534]) tensor([0.0418, 0.1863, 0.5337, 0.2383]) -Greedy action tensor([-1.5123, 0.2660, 0.3705, -0.0707]) tensor([0.0564, 0.3341, 0.3709, 0.2386]) -Greedy action tensor([-0.9124, -0.3505, 0.1182, -0.6316]) tensor([0.1453, 0.2549, 0.4073, 0.1924]) -Greedy action tensor([-1.6964, -0.4765, 0.5307, 0.0558]) tensor([0.0515, 0.1743, 0.4773, 0.2969]) -Greedy action tensor([-1.9172, -0.4343, 0.6566, -0.1649]) tensor([0.0412, 0.1814, 0.5400, 0.2375]) -Greedy action tensor([-1.5100, -0.4967, 0.4994, 0.1945]) tensor([0.0598, 0.1648, 0.4463, 0.3290]) -Greedy action tensor([-1.0856, -0.1618, 0.3000, 0.0480]) tensor([0.0941, 0.2371, 0.3763, 0.2925]) -Greedy action tensor([-1.7737, -0.4093, 0.5944, -0.0746]) tensor([0.0475, 0.1858, 0.5070, 0.2597]) -Greedy action tensor([-1.8961, -0.3518, 0.6335, -0.1468]) tensor([0.0417, 0.1953, 0.5232, 0.2398]) -Greedy action tensor([-1.8845, -0.3344, 0.6186, -0.1387]) tensor([0.0423, 0.1991, 0.5164, 0.2422]) -Greedy action tensor([-1.7831, -0.3192, 0.6458, -0.0124]) tensor([0.0444, 0.1917, 0.5033, 0.2606]) -Greedy action tensor([-1.5304, -0.2975, 0.4617, -0.0737]) tensor([0.0623, 0.2137, 0.4566, 0.2673]) -Greedy action tensor([-1.4586, -0.0393, 0.3808, -0.0252]) tensor([0.0640, 0.2647, 0.4029, 0.2685]) -Greedy action tensor([-1.3870, 0.1828, 0.3587, -0.0690]) tensor([0.0655, 0.3147, 0.3752, 0.2446]) -Greedy action tensor([-1.9317, -0.4293, 0.6534, -0.1732]) tensor([0.0407, 0.1829, 0.5401, 0.2363]) -Greedy action tensor([-1.7200, 0.2161, 0.4364, -0.0076]) tensor([0.0452, 0.3134, 0.3907, 0.2506]) -Greedy action tensor([-1.8091, -0.4906, 0.6006, -0.1355]) tensor([0.0472, 0.1763, 0.5250, 0.2515]) -Greedy action tensor([-1.9196, -0.4424, 0.6534, -0.1690]) tensor([0.0412, 0.1807, 0.5406, 0.2375]) -Greedy action tensor([-1.9054, -0.4479, 0.6474, -0.1652]) tensor([0.0420, 0.1802, 0.5388, 0.2391]) -Greedy action tensor([-1.0561, 0.8698, 0.1064, 0.3190]) tensor([0.0666, 0.4570, 0.2130, 0.2634]) -Greedy action tensor([-1.6552, -0.3131, 0.6045, -0.0056]) tensor([0.0510, 0.1951, 0.4885, 0.2654]) -Greedy action tensor([-1.8966, -0.4375, 0.6417, -0.1604]) tensor([0.0423, 0.1820, 0.5356, 0.2401]) -Greedy action tensor([-1.8601, -0.4554, 0.6285, -0.1452]) tensor([0.0441, 0.1797, 0.5312, 0.2450]) -Greedy action tensor([-1.9188, -0.3281, 0.6339, -0.1673]) tensor([0.0408, 0.2002, 0.5239, 0.2351]) -Greedy action tensor([-1.8594, -0.4872, 0.6450, -0.1392]) tensor([0.0439, 0.1732, 0.5375, 0.2454]) -Greedy action tensor([-0.8202, -0.5977, 0.2248, 0.2877]) tensor([0.1231, 0.1538, 0.3501, 0.3729]) -Greedy action tensor([-1.8103, -0.4291, 0.5998, -0.1187]) tensor([0.0464, 0.1847, 0.5169, 0.2520]) -Greedy action tensor([-1.3798, -0.0098, -0.0015, -0.2266]) tensor([0.0828, 0.3260, 0.3287, 0.2625]) -Greedy action tensor([-1.9031, -0.4414, 0.6503, -0.1560]) tensor([0.0418, 0.1805, 0.5376, 0.2401]) -Greedy action tensor([-1.6292, -0.4376, 0.5001, -0.0086]) tensor([0.0563, 0.1854, 0.4735, 0.2847]) -Greedy action tensor([-1.8444, -0.3769, 0.6512, -0.1138]) tensor([0.0433, 0.1877, 0.5248, 0.2442]) -Greedy action tensor([-1.1487, 0.7066, 0.2245, -0.0586]) tensor([0.0699, 0.4466, 0.2758, 0.2078]) -Greedy action tensor([-1.6525, -0.3455, 0.5595, -0.0277]) tensor([0.0529, 0.1954, 0.4831, 0.2686]) -Greedy action tensor([-0.7429, 0.4055, 0.0200, -0.0028]) tensor([0.1191, 0.3756, 0.2555, 0.2497]) -Greedy action tensor([-1.8153, -0.4376, 0.5994, -0.1351]) tensor([0.0465, 0.1843, 0.5198, 0.2494]) -Greedy action tensor([-1.7506, -0.3460, 0.5530, -0.1018]) tensor([0.0493, 0.2008, 0.4935, 0.2564]) -Greedy action tensor([-0.8610, 0.8759, 0.1240, 0.0694]) tensor([0.0841, 0.4776, 0.2252, 0.2132]) -Greedy action tensor([-1.8216, -0.4796, 0.5958, -0.1158]) tensor([0.0464, 0.1776, 0.5205, 0.2555]) -Greedy action tensor([-1.8975, -0.4361, 0.6435, -0.1598]) tensor([0.0422, 0.1820, 0.5358, 0.2400]) -Greedy action tensor([-1.8028, 0.0418, 0.5224, -0.0388]) tensor([0.0428, 0.2704, 0.4373, 0.2495]) -Greedy action tensor([0.1025, 0.8419, 0.0127, 0.4929]) tensor([0.1823, 0.3818, 0.1666, 0.2693]) -Greedy action tensor([-1.7769, -0.3905, 0.6230, -0.0450]) tensor([0.0461, 0.1846, 0.5085, 0.2607]) -Greedy action tensor([-1.3345, -0.4279, 0.3571, 0.1718]) tensor([0.0746, 0.1846, 0.4047, 0.3362]) -Greedy action tensor([-1.7498, -0.4668, 0.5045, -0.2852]) tensor([0.0542, 0.1954, 0.5161, 0.2343]) -Greedy action tensor([-1.7604, -0.3103, 0.5973, -0.1014]) tensor([0.0474, 0.2022, 0.5012, 0.2492]) -Greedy action tensor([-1.8154, -0.4501, 0.7387, 0.1340]) tensor([0.0403, 0.1579, 0.5185, 0.2832]) -Greedy action tensor([-0.5984, 0.7103, 0.0706, 0.2380]) tensor([0.1116, 0.4130, 0.2178, 0.2575]) -Greedy action tensor([-1.9229, -0.3526, 0.6380, -0.1833]) tensor([0.0409, 0.1966, 0.5295, 0.2329]) -Greedy action tensor([-1.9194, -0.4379, 0.6596, -0.1678]) tensor([0.0411, 0.1807, 0.5415, 0.2367]) -Greedy action tensor([-1.8972, -0.4521, 0.6443, -0.1607]) tensor([0.0423, 0.1796, 0.5377, 0.2404]) -Greedy action tensor([-0.8217, 0.1512, 0.2337, -0.2230]) tensor([0.1199, 0.3173, 0.3445, 0.2182]) -Greedy action tensor([-1.8795, -0.2538, 0.6121, -0.1441]) tensor([0.0420, 0.2132, 0.5069, 0.2380]) -Greedy action tensor([ 0.8831, -0.1694, -0.0341, -0.0660]) tensor([0.4682, 0.1634, 0.1871, 0.1812]) -Greedy action tensor([ 0.6684, -0.1148, 0.0400, -0.0305]) tensor([0.4020, 0.1837, 0.2145, 0.1998]) -Greedy action tensor([ 0.8807, -0.1156, -0.2677, -0.5570]) tensor([0.5198, 0.1919, 0.1649, 0.1234]) -Greedy action tensor([ 0.8904, -0.5305, -0.2198, -0.4031]) tensor([0.5419, 0.1309, 0.1786, 0.1486]) -Greedy action tensor([ 0.8950, -0.5513, -0.0794, -0.2764]) tensor([0.5201, 0.1225, 0.1963, 0.1612]) -Greedy action tensor([ 0.8023, -0.2011, -0.0125, 0.0308]) tensor([0.4402, 0.1614, 0.1949, 0.2035]) -Greedy action tensor([ 0.3644, -0.0647, -0.0904, -0.1079]) tensor([0.3437, 0.2238, 0.2181, 0.2143]) -Greedy action tensor([0.4580, 0.0882, 0.0206, 0.0612]) tensor([0.3323, 0.2296, 0.2146, 0.2235]) -Greedy action tensor([ 0.6865, -0.1798, 0.0356, -0.2621]) tensor([0.4293, 0.1805, 0.2239, 0.1663]) -Greedy action tensor([ 0.7922, -0.2071, -0.0672, -0.0947]) tensor([0.4538, 0.1671, 0.1922, 0.1869]) -Greedy action tensor([ 0.5892, -0.3303, -0.0966, -0.3038]) tensor([0.4326, 0.1725, 0.2179, 0.1771]) -Greedy action tensor([ 0.9047, -0.4643, 0.2879, -0.5235]) tensor([0.4917, 0.1251, 0.2654, 0.1179]) -Greedy action tensor([ 0.7799, -0.8045, 0.0071, -0.3149]) tensor([0.4997, 0.1025, 0.2307, 0.1672]) -Greedy action tensor([ 1.0167, -0.2529, -0.1061, -0.3549]) tensor([0.5376, 0.1510, 0.1749, 0.1364]) -Greedy action tensor([ 0.5528, -0.2228, -0.0210, -0.1043]) tensor([0.3934, 0.1811, 0.2216, 0.2039]) -Greedy action tensor([ 0.5667, -0.0789, -0.0011, 0.0104]) tensor([0.3753, 0.1968, 0.2127, 0.2152]) -Greedy action tensor([ 0.8734, -0.6421, 0.2026, -0.8427]) tensor([0.5233, 0.1150, 0.2676, 0.0941]) -Greedy action tensor([ 0.7351, -0.4179, 0.0620, -0.1760]) tensor([0.4489, 0.1417, 0.2290, 0.1805]) -Greedy action tensor([ 0.5820, -0.2321, 0.0191, -0.1209]) tensor([0.3988, 0.1767, 0.2271, 0.1974]) -Greedy action tensor([ 0.6730, -0.2250, -0.0031, -0.0902]) tensor([0.4198, 0.1710, 0.2135, 0.1957]) -Greedy action tensor([ 1.3069, -0.8219, 0.0585, -0.4991]) tensor([0.6369, 0.0758, 0.1827, 0.1046]) -Greedy action tensor([ 0.7296, -0.4096, -0.1370, -0.3259]) tensor([0.4788, 0.1533, 0.2013, 0.1666]) -Greedy action tensor([ 0.5151, -0.4241, -0.3202, -0.1403]) tensor([0.4266, 0.1668, 0.1850, 0.2215]) -Greedy action tensor([ 1.0364, -0.6748, -0.1235, -0.7935]) tensor([0.6044, 0.1092, 0.1895, 0.0970]) -Greedy action tensor([ 0.8604, -0.3389, -0.0159, -0.1246]) tensor([0.4782, 0.1441, 0.1991, 0.1786]) -Greedy action tensor([ 0.9312, -0.8999, 0.2871, -0.7644]) tensor([0.5351, 0.0857, 0.2810, 0.0982]) -Greedy action tensor([ 0.7770, -0.0251, -0.0376, -0.4215]) tensor([0.4560, 0.2045, 0.2019, 0.1376]) -Greedy action tensor([ 0.8489, -0.7517, 0.1862, -0.3000]) tensor([0.4916, 0.0992, 0.2534, 0.1558]) -Greedy action tensor([0.5281, 0.1552, 0.0023, 0.0226]) tensor([0.3469, 0.2389, 0.2050, 0.2092]) -Greedy action tensor([ 1.0661, -0.5511, -0.0916, -0.4717]) tensor([0.5789, 0.1149, 0.1819, 0.1244]) -Greedy action tensor([ 0.4847, -0.1768, -0.0008, -0.0888]) tensor([0.3710, 0.1915, 0.2283, 0.2091]) -Greedy action tensor([ 0.6958, 0.2243, -0.0630, -0.1280]) tensor([0.3951, 0.2466, 0.1850, 0.1734]) -Greedy action tensor([ 0.7081, -0.0750, -0.0618, -0.5463]) tensor([0.4535, 0.2072, 0.2100, 0.1294]) -Greedy action tensor([ 0.7959, -0.3625, -0.1695, -0.1698]) tensor([0.4818, 0.1513, 0.1835, 0.1834]) -Greedy action tensor([ 0.4976, 0.5361, -0.2025, 0.2161]) tensor([0.3039, 0.3158, 0.1509, 0.2293]) -Greedy action tensor([ 0.3319, -0.0097, -0.1841, 0.0196]) tensor([0.3290, 0.2338, 0.1964, 0.2408]) -Greedy action tensor([0.5669, 0.0222, 0.0633, 0.0410]) tensor([0.3603, 0.2090, 0.2178, 0.2130]) -Greedy action tensor([ 1.2082, -0.6286, -0.2847, -0.5305]) tensor([0.6411, 0.1021, 0.1441, 0.1127]) -Greedy action tensor([ 0.8288, -0.5369, 0.1011, -0.5302]) tensor([0.5012, 0.1279, 0.2421, 0.1288]) -Greedy action tensor([ 0.4600, 0.0736, -0.1663, -0.1362]) tensor([0.3617, 0.2458, 0.1933, 0.1992]) -Greedy action tensor([0.6260, 0.1017, 0.0698, 0.1031]) tensor([0.3626, 0.2146, 0.2079, 0.2149]) -Greedy action tensor([ 0.6150, -0.0387, -0.0512, -0.1650]) tensor([0.4013, 0.2087, 0.2061, 0.1839]) -Greedy action tensor([ 0.5723, -0.0651, 0.1072, 0.0427]) tensor([0.3642, 0.1925, 0.2288, 0.2145]) -Greedy action tensor([ 0.8193, -0.4413, -0.1993, -0.2664]) tensor([0.5045, 0.1430, 0.1822, 0.1704]) -Greedy action tensor([ 0.9137, -0.4696, 0.1833, -0.5762]) tensor([0.5108, 0.1281, 0.2460, 0.1151]) -Greedy action tensor([ 0.4196, 0.1104, 0.1373, -0.2011]) tensor([0.3305, 0.2426, 0.2492, 0.1777]) -Greedy action tensor([ 0.6165, -0.3014, -0.0740, -0.2411]) tensor([0.4301, 0.1718, 0.2156, 0.1824]) -Greedy action tensor([ 1.0612, -0.7933, 0.0831, -0.5035]) tensor([0.5742, 0.0899, 0.2159, 0.1201]) -Greedy action tensor([ 0.5368, -0.4154, -0.1237, -0.2215]) tensor([0.4218, 0.1628, 0.2179, 0.1976]) -Greedy action tensor([ 0.8013, -0.3691, -0.1169, -0.5692]) tensor([0.5093, 0.1580, 0.2033, 0.1294]) -Greedy action tensor([ 0.9718, -0.7363, 0.0471, -0.6663]) tensor([0.5643, 0.1023, 0.2238, 0.1097]) -Greedy action tensor([ 0.8310, -0.8355, -0.0843, -0.3530]) tensor([0.5276, 0.0997, 0.2113, 0.1615]) -Greedy action tensor([ 0.6535, -0.1309, -0.1549, -0.3269]) tensor([0.4392, 0.2004, 0.1957, 0.1648]) -Greedy action tensor([ 0.3131, 0.0402, -0.2403, 0.0734]) tensor([0.3202, 0.2437, 0.1841, 0.2520]) -Greedy action tensor([ 0.4530, -0.4803, -0.0888, -0.2872]) tensor([0.4078, 0.1604, 0.2372, 0.1945]) -Greedy action tensor([ 0.8300, -0.4880, -0.0469, -0.1700]) tensor([0.4874, 0.1305, 0.2028, 0.1793]) -Greedy action tensor([ 1.0844, -0.5862, -0.1809, -0.5223]) tensor([0.5985, 0.1126, 0.1689, 0.1200]) -Greedy action tensor([ 0.6868, -0.4414, -0.1083, -0.1526]) tensor([0.4531, 0.1466, 0.2046, 0.1957]) -Greedy action tensor([ 0.9225, -0.4751, 0.0327, -0.2517]) tensor([0.5084, 0.1257, 0.2088, 0.1571]) -Greedy action tensor([ 0.7202, -0.6567, -0.0611, -0.3746]) tensor([0.4891, 0.1234, 0.2239, 0.1636]) -Greedy action tensor([ 0.5317, -0.2950, -0.0235, -0.1525]) tensor([0.3975, 0.1739, 0.2281, 0.2005]) -Greedy action tensor([ 0.8941, -0.4586, 0.0676, -0.4049]) tensor([0.5079, 0.1313, 0.2222, 0.1386]) -Greedy action tensor([ 0.7980, -0.5249, 0.1188, -0.5266]) tensor([0.4904, 0.1306, 0.2486, 0.1304]) -Greedy action tensor([ 0.3249, -0.3474, -0.2253, -0.0337]) tensor([0.3589, 0.1833, 0.2070, 0.2508]) -Greedy action tensor([ 0.4398, -0.2320, -0.1924, -0.3137]) tensor([0.3979, 0.2033, 0.2115, 0.1873]) -Greedy action tensor([ 0.9331, -0.7479, -0.0354, -0.4835]) tensor([0.5530, 0.1030, 0.2099, 0.1341]) -Greedy action tensor([ 0.6529, -0.2449, 0.0835, -0.2604]) tensor([0.4211, 0.1716, 0.2383, 0.1690]) -Greedy action tensor([ 0.2352, 0.1127, -0.1139, -0.1106]) tensor([0.3032, 0.2683, 0.2139, 0.2146]) -Greedy action tensor([ 0.7403, -0.3507, -0.1161, -0.2263]) tensor([0.4671, 0.1569, 0.1984, 0.1777]) -Greedy action tensor([ 0.7021, -0.5686, -0.1806, -0.5464]) tensor([0.5047, 0.1417, 0.2088, 0.1448]) -Greedy action tensor([ 0.7196, -0.3018, -0.0757, -0.1442]) tensor([0.4478, 0.1613, 0.2022, 0.1888]) -Greedy action tensor([ 0.7219, -0.5796, -0.1576, -0.4380]) tensor([0.4998, 0.1360, 0.2074, 0.1567]) -Greedy action tensor([ 0.4901, -0.2418, -0.1887, -0.1272]) tensor([0.3956, 0.1903, 0.2007, 0.2134]) -Greedy action tensor([ 1.0556, -1.0770, 0.2489, -0.5640]) tensor([0.5673, 0.0672, 0.2532, 0.1123]) -Greedy action tensor([ 0.7462, -0.2513, 0.0103, -0.0771]) tensor([0.4373, 0.1613, 0.2095, 0.1920]) -Greedy action tensor([ 0.3555, -0.0140, 0.1308, 0.0288]) tensor([0.3114, 0.2152, 0.2487, 0.2246]) -Greedy action tensor([ 0.4976, -0.2855, -0.0676, -0.1661]) tensor([0.3937, 0.1799, 0.2237, 0.2027]) -Greedy action tensor([ 0.8793, -0.7254, 0.0461, -0.3717]) tensor([0.5203, 0.1046, 0.2262, 0.1489]) -Greedy action tensor([ 1.1936, -0.6852, 0.1030, -0.5795]) tensor([0.6029, 0.0921, 0.2026, 0.1024]) -Greedy action tensor([ 1.0645, -0.6825, 0.1367, -0.5831]) tensor([0.5675, 0.0989, 0.2244, 0.1092]) -Greedy action tensor([ 0.8294, -0.8682, 0.1998, -0.4323]) tensor([0.5002, 0.0916, 0.2665, 0.1417]) -Greedy action tensor([ 0.5308, 0.1303, 0.0131, -0.1685]) tensor([0.3620, 0.2425, 0.2157, 0.1799]) -Greedy action tensor([ 0.4210, -0.8064, 0.2991, 0.3988]) tensor([0.3168, 0.0928, 0.2805, 0.3099]) -Greedy action tensor([-0.8633, -1.6893, 0.0686, 1.1794]) tensor([0.0856, 0.0375, 0.2172, 0.6597]) -Greedy action tensor([ 0.2142, -0.6300, 0.6203, -0.6152]) tensor([0.2970, 0.1277, 0.4458, 0.1296]) -Greedy action tensor([ 1.6308, -1.1636, 1.5025, -0.4731]) tensor([0.4848, 0.0296, 0.4264, 0.0591]) -Greedy action tensor([ 0.3252, -0.3799, 1.7082, -0.8547]) tensor([0.1728, 0.0854, 0.6888, 0.0531]) -Greedy action tensor([-0.1765, -0.9958, 0.0837, -0.5453]) tensor([0.2916, 0.1285, 0.3782, 0.2017]) -Greedy action tensor([-0.4460, -1.4782, -0.1853, -0.8094]) tensor([0.2986, 0.1063, 0.3875, 0.2076]) -Greedy action tensor([ 0.2666, -1.1495, 0.0103, -0.0399]) tensor([0.3633, 0.0882, 0.2811, 0.2674]) -Greedy action tensor([-0.1244, -2.3035, -0.0509, -0.2698]) tensor([0.3274, 0.0370, 0.3524, 0.2831]) -Greedy action tensor([ 0.0122, 0.1939, -0.3225, 0.3618]) tensor([0.2308, 0.2768, 0.1651, 0.3273]) -Greedy action tensor([-1.3000, -1.2308, 0.2611, -0.8322]) tensor([0.1186, 0.1271, 0.5650, 0.1893]) -Greedy action tensor([-0.7326, -1.4376, 0.3028, -1.0843]) tensor([0.1994, 0.0985, 0.5617, 0.1403]) -Greedy action tensor([ 0.8540, -0.5241, 0.5876, 1.0993]) tensor([0.3034, 0.0765, 0.2324, 0.3877]) -Greedy action tensor([ 0.1311, -0.3511, -0.5880, -1.0443]) tensor([0.4144, 0.2558, 0.2019, 0.1279]) -Greedy action tensor([ 1.7105, -0.4298, -0.8204, 1.0867]) tensor([0.5770, 0.0679, 0.0459, 0.3092]) -Greedy action tensor([-0.2770, -1.6345, 0.8630, 0.0121]) tensor([0.1748, 0.0450, 0.5467, 0.2335]) -Greedy action tensor([-0.4358, -0.4414, 0.6847, -0.6425]) tensor([0.1702, 0.1693, 0.5220, 0.1384]) -Greedy action tensor([-0.0952, -0.4773, -0.0287, -0.3199]) tensor([0.2817, 0.1922, 0.3011, 0.2250]) -Greedy action tensor([-0.7267, -0.2351, -0.0087, -0.7490]) tensor([0.1766, 0.2887, 0.3620, 0.1727]) -Greedy action tensor([-0.1956, -0.3830, -0.0425, -0.3003]) tensor([0.2567, 0.2129, 0.2992, 0.2312]) -Greedy action tensor([ 0.1215, -0.1245, -0.5030, 1.0267]) tensor([0.2088, 0.1632, 0.1118, 0.5162]) -Greedy action tensor([-0.4734, 0.6522, -0.2967, -0.8326]) tensor([0.1674, 0.5160, 0.1998, 0.1169]) -Greedy action tensor([-0.2449, -0.0836, -0.5455, -0.0835]) tensor([0.2445, 0.2872, 0.1810, 0.2873]) -Greedy action tensor([ 1.2653, -0.9112, -0.1886, 1.0517]) tensor([0.4641, 0.0526, 0.1084, 0.3748]) -Greedy action tensor([-0.6009, -1.1431, 0.7708, -0.0539]) tensor([0.1379, 0.0802, 0.5436, 0.2383]) -Greedy action tensor([ 0.6274, -0.7443, -0.2901, 0.2140]) tensor([0.4321, 0.1096, 0.1726, 0.2857]) -Greedy action tensor([1.3010, 0.1258, 0.1056, 0.9924]) tensor([0.4263, 0.1316, 0.1290, 0.3131]) -Greedy action tensor([-0.8927, -1.1219, 0.4653, -1.2107]) tensor([0.1560, 0.1240, 0.6065, 0.1135]) -Greedy action tensor([-0.3122, -0.8418, 0.6295, -0.4229]) tensor([0.1981, 0.1166, 0.5080, 0.1773]) -Greedy action tensor([-0.7250, -0.4913, 0.0311, -0.7976]) tensor([0.1878, 0.2373, 0.4001, 0.1747]) -Greedy action tensor([-0.3309, -1.6191, 0.6693, -0.9698]) tensor([0.2211, 0.0610, 0.6012, 0.1167]) -Greedy action tensor([-0.1416, -0.1443, -0.4299, -0.5789]) tensor([0.2948, 0.2940, 0.2209, 0.1903]) -Greedy action tensor([ 1.0635, -0.5701, -0.1683, 0.1548]) tensor([0.5291, 0.1033, 0.1544, 0.2132]) -Greedy action tensor([ 0.2413, -1.1300, -0.0045, -0.5777]) tensor([0.4037, 0.1025, 0.3158, 0.1780]) -Greedy action tensor([-0.2915, -0.8807, 0.6157, -0.8814]) tensor([0.2180, 0.1210, 0.5401, 0.1209]) -Greedy action tensor([ 0.0444, -0.0714, -0.2911, -0.5785]) tensor([0.3183, 0.2835, 0.2276, 0.1707]) -Greedy action tensor([-0.2693, -0.9729, -0.5377, -0.2307]) tensor([0.3032, 0.1500, 0.2318, 0.3151]) -Greedy action tensor([ 1.1966, -1.0082, -0.5651, 1.0101]) tensor([0.4735, 0.0522, 0.0813, 0.3929]) -Greedy action tensor([-0.2700, -1.9155, 0.8248, -0.3984]) tensor([0.1976, 0.0381, 0.5905, 0.1738]) -Greedy action tensor([-0.0160, -1.1107, 0.4648, 0.0438]) tensor([0.2491, 0.0834, 0.4030, 0.2645]) -Greedy action tensor([ 0.1477, -0.3572, 0.0107, -0.4298]) tensor([0.3293, 0.1988, 0.2871, 0.1848]) -Greedy action tensor([ 0.2166, -0.9513, 1.2049, 0.1795]) tensor([0.2016, 0.0627, 0.5415, 0.1942]) -Greedy action tensor([ 0.1520, 0.5485, -0.2306, -0.1890]) tensor([0.2577, 0.3832, 0.1758, 0.1833]) -Greedy action tensor([-0.9286, -1.4739, -0.0319, -1.5210]) tensor([0.2181, 0.1265, 0.5348, 0.1206]) -Greedy action tensor([ 0.1412, 0.4350, -0.0923, -0.4887]) tensor([0.2728, 0.3659, 0.2160, 0.1453]) -Greedy action tensor([ 1.0251, -0.9549, -0.3376, 0.4754]) tensor([0.5073, 0.0700, 0.1299, 0.2928]) -Greedy action tensor([ 0.1330, -1.7188, 0.1202, 0.5322]) tensor([0.2751, 0.0432, 0.2716, 0.4101]) -Greedy action tensor([ 1.3946, -1.7597, 0.0544, 0.7890]) tensor([0.5405, 0.0231, 0.1415, 0.2950]) -Greedy action tensor([ 1.6067, -1.2074, -0.4970, 0.2197]) tensor([0.6984, 0.0419, 0.0852, 0.1745]) -Greedy action tensor([ 0.1869, -0.5091, -0.6067, -1.2328]) tensor([0.4561, 0.2274, 0.2063, 0.1103]) -Greedy action tensor([-0.3230, -0.7363, -0.1718, -1.0103]) tensor([0.3005, 0.1988, 0.3496, 0.1511]) -Greedy action tensor([ 0.4401, -1.1900, -0.1222, 0.4431]) tensor([0.3612, 0.0708, 0.2058, 0.3622]) -Greedy action tensor([-0.2492, -1.6465, 0.8884, -1.2283]) tensor([0.2109, 0.0521, 0.6578, 0.0792]) -Greedy action tensor([ 0.0428, 0.2123, -0.8813, -0.3457]) tensor([0.3068, 0.3634, 0.1218, 0.2080]) -Greedy action tensor([ 1.7662, -1.0512, -0.1001, 0.9855]) tensor([0.5979, 0.0357, 0.0925, 0.2739]) -Greedy action tensor([-0.4071, -0.3469, -1.0502, -0.7539]) tensor([0.3035, 0.3224, 0.1595, 0.2146]) -Greedy action tensor([-0.2999, -0.9991, -0.5422, -0.9175]) tensor([0.3545, 0.1762, 0.2782, 0.1912]) -Greedy action tensor([ 0.0584, -0.3884, 0.2181, -0.5103]) tensor([0.2959, 0.1893, 0.3472, 0.1676]) -Greedy action tensor([-0.4736, -0.0051, 0.1563, -0.1138]) tensor([0.1693, 0.2704, 0.3178, 0.2426]) -Greedy action tensor([ 0.0824, -1.5125, 0.0537, -0.2501]) tensor([0.3458, 0.0702, 0.3360, 0.2480]) -Greedy action tensor([ 0.7586, -0.9662, 1.3765, 0.4350]) tensor([0.2662, 0.0474, 0.4938, 0.1926]) -Greedy action tensor([ 0.4015, -0.3790, -0.0660, 0.1567]) tensor([0.3487, 0.1598, 0.2185, 0.2730]) -Greedy action tensor([-0.2207, -0.0189, 1.1629, -0.8163]) tensor([0.1478, 0.1809, 0.5898, 0.0815]) -Greedy action tensor([ 0.5794, 0.7865, -0.0595, -0.6949]) tensor([0.3292, 0.4050, 0.1738, 0.0921]) -Greedy action tensor([-0.0190, -0.7017, -0.5631, -0.2092]) tensor([0.3434, 0.1735, 0.1993, 0.2839]) -Greedy action tensor([ 0.2472, -1.4846, -0.1330, -0.1860]) tensor([0.3986, 0.0705, 0.2725, 0.2584]) -Greedy action tensor([-0.4549, -0.8253, 0.2557, -1.0149]) tensor([0.2327, 0.1607, 0.4737, 0.1329]) -Greedy action tensor([ 0.2222, -0.9471, -0.3104, -0.1054]) tensor([0.3819, 0.1186, 0.2242, 0.2752]) -Greedy action tensor([ 1.1367e-02, -4.4324e-04, 4.4518e-02, -8.1853e-01]) tensor([0.2892, 0.2858, 0.2989, 0.1261]) -Greedy action tensor([-1.3298, -1.3149, -0.1794, 0.1537]) tensor([0.1043, 0.1059, 0.3297, 0.4600]) -Greedy action tensor([-0.1799, -0.7661, 0.4398, -0.9344]) tensor([0.2574, 0.1432, 0.4783, 0.1210]) -Greedy action tensor([-1.1624, -1.3707, -0.2216, -1.0103]) tensor([0.1806, 0.1466, 0.4626, 0.2102]) -Greedy action tensor([-0.0048, -1.1089, 0.2969, -0.7239]) tensor([0.3154, 0.1045, 0.4264, 0.1536]) -Greedy action tensor([ 0.1771, -0.4836, 0.5685, -0.1449]) tensor([0.2688, 0.1388, 0.3976, 0.1948]) -Greedy action tensor([ 0.4813, 0.0405, 1.0624, -0.8866]) tensor([0.2713, 0.1746, 0.4851, 0.0691]) -Greedy action tensor([-0.7040, -0.3003, -0.0643, -0.2757]) tensor([0.1687, 0.2526, 0.3198, 0.2589]) -Greedy action tensor([-0.4239, -0.6820, -0.8955, 0.7538]) tensor([0.1772, 0.1369, 0.1106, 0.5753]) -Greedy action tensor([-0.4116, -1.0341, -0.1609, -0.8074]) tensor([0.2861, 0.1536, 0.3677, 0.1926]) -Greedy action tensor([-0.3169, -0.2703, -0.2866, -1.1155]) tensor([0.2834, 0.2969, 0.2921, 0.1275]) -Greedy action tensor([ 1.6923, -1.0492, -0.1387, -0.1716]) tensor([0.7247, 0.0467, 0.1161, 0.1124]) -Greedy action tensor([-0.3723, 0.7317, -0.2636, -1.1091]) tensor([0.1783, 0.5377, 0.1987, 0.0853]) -Greedy action tensor([ 1.0809, -0.2482, -0.4208, 0.2547]) tensor([0.5194, 0.1375, 0.1157, 0.2274]) -Greedy action tensor([ 2.0223, -0.2707, -0.6130, 0.4508]) tensor([0.7244, 0.0731, 0.0519, 0.1505]) -Greedy action tensor([ 1.3115, -0.6145, -0.3342, 0.3875]) tensor([0.5762, 0.0840, 0.1111, 0.2287]) -Greedy action tensor([ 1.4510, -0.2284, -0.9898, 0.0688]) tensor([0.6559, 0.1223, 0.0571, 0.1646]) -Greedy action tensor([ 1.5046, -0.4840, -0.4763, 0.0754]) tensor([0.6604, 0.0904, 0.0911, 0.1582]) -Greedy action tensor([ 1.3630, -0.3129, -0.4975, 0.2139]) tensor([0.6025, 0.1128, 0.0938, 0.1910]) -Greedy action tensor([ 0.4365, -0.3127, -0.0196, 0.0181]) tensor([0.3617, 0.1710, 0.2292, 0.2380]) -Greedy action tensor([ 1.4488, -0.1526, -0.8183, 0.1808]) tensor([0.6303, 0.1271, 0.0653, 0.1774]) -Greedy action tensor([ 2.2306, -0.9090, 0.1695, 1.3464]) tensor([0.6314, 0.0273, 0.0804, 0.2608]) -Greedy action tensor([ 1.4053, -0.1599, -0.8846, 0.1202]) tensor([0.6302, 0.1317, 0.0638, 0.1743]) -Greedy action tensor([ 1.3668, -0.2440, -0.8782, 0.2159]) tensor([0.6165, 0.1231, 0.0653, 0.1950]) -Greedy action tensor([ 1.4237, -0.7645, -0.4728, 0.2673]) tensor([0.6342, 0.0711, 0.0952, 0.1995]) -Greedy action tensor([ 1.3837, -0.6879, -0.7959, 0.5003]) tensor([0.6052, 0.0762, 0.0684, 0.2502]) -Greedy action tensor([ 0.7580, -0.2391, -0.3790, -0.0383]) tensor([0.4671, 0.1723, 0.1498, 0.2107]) -Greedy action tensor([ 1.1003, -0.4395, -0.3576, 0.1222]) tensor([0.5485, 0.1176, 0.1276, 0.2062]) -Greedy action tensor([ 1.1778, -0.3832, -0.1818, 0.6119]) tensor([0.4915, 0.1032, 0.1262, 0.2791]) -Greedy action tensor([ 1.3743, 0.0330, -0.3123, 0.0517]) tensor([0.5837, 0.1526, 0.1081, 0.1555]) -Greedy action tensor([ 1.0550, -0.3515, -0.3083, -0.0202]) tensor([0.5429, 0.1330, 0.1389, 0.1852]) -Greedy action tensor([ 1.5475, -1.0231, -0.0092, -0.0166]) tensor([0.6682, 0.0511, 0.1409, 0.1398]) -Greedy action tensor([ 1.9427, -1.0423, -0.7099, -0.0599]) tensor([0.7962, 0.0402, 0.0561, 0.1075]) -Greedy action tensor([ 2.3349, -0.8739, -0.0760, 0.3144]) tensor([0.7919, 0.0320, 0.0711, 0.1050]) -Greedy action tensor([ 1.3506, -0.2450, 0.2694, -0.4030]) tensor([0.5830, 0.1182, 0.1978, 0.1010]) -Greedy action tensor([ 0.9633, -0.1673, -0.3764, 0.0280]) tensor([0.5058, 0.1633, 0.1325, 0.1985]) -Greedy action tensor([ 1.6866, -0.8254, -0.0251, 0.6497]) tensor([0.6187, 0.0502, 0.1117, 0.2194]) -Greedy action tensor([ 1.8948, -1.2811, -0.0108, 0.3876]) tensor([0.7082, 0.0296, 0.1053, 0.1569]) -Greedy action tensor([ 1.3792, -0.2644, -0.7914, 0.2147]) tensor([0.6175, 0.1193, 0.0705, 0.1927]) -Greedy action tensor([ 1.2360, -0.2753, -0.2576, 0.3108]) tensor([0.5430, 0.1198, 0.1219, 0.2153]) -Greedy action tensor([ 1.4581, -0.5917, -0.9124, 0.6691]) tensor([0.5965, 0.0768, 0.0557, 0.2710]) -Greedy action tensor([ 1.1508, -0.6035, -0.8816, 0.5682]) tensor([0.5369, 0.0929, 0.0703, 0.2998]) -Greedy action tensor([2.6857, 0.5473, 0.0868, 0.1269]) tensor([0.7876, 0.0928, 0.0586, 0.0610]) -Greedy action tensor([ 1.9398, -1.1809, -0.4866, 0.2850]) tensor([0.7555, 0.0333, 0.0668, 0.1444]) -Greedy action tensor([ 1.6163, -0.2184, -0.1961, -0.1823]) tensor([0.6718, 0.1073, 0.1097, 0.1112]) -Greedy action tensor([ 1.5100, -0.4629, -0.0144, 0.1567]) tensor([0.6191, 0.0861, 0.1348, 0.1600]) -Greedy action tensor([ 1.7451, -0.6936, -0.7696, 0.2480]) tensor([0.7184, 0.0627, 0.0581, 0.1608]) -Greedy action tensor([ 0.7108, -0.3786, 0.2405, -0.2332]) tensor([0.4255, 0.1431, 0.2658, 0.1655]) -Greedy action tensor([ 1.4553, -0.0516, -0.9563, 0.0268]) tensor([0.6448, 0.1429, 0.0578, 0.1545]) -Greedy action tensor([ 1.7456, -0.5075, -0.5462, 0.3104]) tensor([0.6924, 0.0727, 0.0700, 0.1648]) -Greedy action tensor([ 1.5328, 0.0498, -0.4706, -0.2860]) tensor([0.6562, 0.1489, 0.0885, 0.1064]) -Greedy action tensor([ 1.4982, -0.4426, -0.4904, 0.0496]) tensor([0.6599, 0.0948, 0.0903, 0.1550]) -Greedy action tensor([ 1.5713, -0.5850, -0.2528, 0.2036]) tensor([0.6528, 0.0756, 0.1053, 0.1663]) -Greedy action tensor([ 1.8629, -0.5895, -0.1842, 0.5779]) tensor([0.6703, 0.0577, 0.0865, 0.1854]) -Greedy action tensor([ 1.7978, -0.3111, -1.2317, -0.0671]) tensor([0.7549, 0.0916, 0.0365, 0.1169]) -Greedy action tensor([ 1.2817, -0.1526, -0.7054, 0.0723]) tensor([0.5975, 0.1424, 0.0819, 0.1783]) -Greedy action tensor([ 1.1852, -0.1840, -0.2491, -0.0097]) tensor([0.5570, 0.1417, 0.1327, 0.1686]) -Greedy action tensor([ 1.1923, -0.0394, -0.7753, 0.1666]) tensor([0.5586, 0.1630, 0.0781, 0.2003]) -Greedy action tensor([ 1.7154, -0.2784, -0.9650, 0.1885]) tensor([0.7033, 0.0958, 0.0482, 0.1528]) -Greedy action tensor([ 1.1428, 0.1059, -1.3923, 0.1121]) tensor([0.5585, 0.1980, 0.0443, 0.1993]) -Greedy action tensor([ 1.3531, -0.5650, -0.5326, 0.9309]) tensor([0.5117, 0.0752, 0.0776, 0.3355]) -Greedy action tensor([ 1.9802, 0.0380, -0.2787, 0.6236]) tensor([0.6643, 0.0953, 0.0694, 0.1711]) -Greedy action tensor([ 1.8396, -0.6241, -0.0039, 0.0435]) tensor([0.7096, 0.0604, 0.1123, 0.1177]) -Greedy action tensor([ 1.3426, -0.2274, -0.6827, 0.3670]) tensor([0.5824, 0.1212, 0.0768, 0.2195]) -Greedy action tensor([ 1.5323, -0.2187, -1.1382, 0.1875]) tensor([0.6652, 0.1155, 0.0460, 0.1733]) -Greedy action tensor([ 1.7492, -0.8996, -0.6732, 0.3207]) tensor([0.7147, 0.0506, 0.0634, 0.1713]) -Greedy action tensor([ 1.4590, -0.6025, -0.5367, 0.5318]) tensor([0.6028, 0.0767, 0.0819, 0.2385]) -Greedy action tensor([ 1.4336, -0.2967, -0.4429, 0.4956]) tensor([0.5808, 0.1029, 0.0889, 0.2273]) -Greedy action tensor([ 1.2599, -0.1140, -0.5422, 0.1064]) tensor([0.5768, 0.1460, 0.0952, 0.1820]) -Greedy action tensor([ 2.1844, -0.8571, -0.7771, 0.6889]) tensor([0.7555, 0.0361, 0.0391, 0.1693]) -Greedy action tensor([ 1.3192, -0.7877, -0.4705, 0.7432]) tensor([0.5403, 0.0657, 0.0902, 0.3037]) -Greedy action tensor([ 1.7984, 0.0768, -0.1738, 0.0965]) tensor([0.6666, 0.1192, 0.0927, 0.1215]) -Greedy action tensor([ 1.6426, 0.1878, -0.5375, -0.4129]) tensor([0.6782, 0.1583, 0.0767, 0.0868]) -Greedy action tensor([ 1.1121, -0.4602, -0.2979, 0.6209]) tensor([0.4846, 0.1006, 0.1183, 0.2965]) -Greedy action tensor([ 1.3351, -0.1040, -0.6944, 0.1263]) tensor([0.5998, 0.1423, 0.0788, 0.1791]) -Greedy action tensor([ 1.3174, -0.7057, -0.5690, 0.2909]) tensor([0.6090, 0.0805, 0.0923, 0.2182]) -Greedy action tensor([ 1.6291, -0.7420, -0.5881, 0.0283]) tensor([0.7122, 0.0665, 0.0776, 0.1437]) -Greedy action tensor([ 1.0954, -0.1525, -1.0192, 0.0783]) tensor([0.5652, 0.1623, 0.0682, 0.2044]) -Greedy action tensor([ 1.4408, -0.3413, -0.7185, 0.3568]) tensor([0.6165, 0.1038, 0.0712, 0.2086]) -Greedy action tensor([ 1.2416, -0.4509, -0.8000, 0.5474]) tensor([0.5515, 0.1015, 0.0716, 0.2754]) -Greedy action tensor([ 1.6697, -0.0102, -0.3218, 0.0139]) tensor([0.6606, 0.1231, 0.0902, 0.1261]) -Greedy action tensor([ 1.8138, -1.0980, -0.3162, 0.5815]) tensor([0.6827, 0.0371, 0.0811, 0.1991]) -Greedy action tensor([ 1.0701, -0.0130, -1.0206, 0.3428]) tensor([0.5141, 0.1740, 0.0635, 0.2484]) -Greedy action tensor([ 1.1329, -0.2430, -1.1382, 0.3005]) tensor([0.5584, 0.1411, 0.0576, 0.2429]) -Greedy action tensor([ 1.3778, -0.3833, -0.7941, 0.0995]) tensor([0.6393, 0.1099, 0.0729, 0.1780]) -Greedy action tensor([ 1.8421, -0.9653, 0.0851, 0.2733]) tensor([0.6938, 0.0419, 0.1197, 0.1445]) -Greedy action tensor([ 1.2809, -0.4220, -0.4906, 0.3062]) tensor([0.5782, 0.1053, 0.0983, 0.2182]) -Greedy action tensor([ 1.4311, -0.7744, -0.4202, 0.7154]) tensor([0.5695, 0.0627, 0.0894, 0.2784]) -Greedy action tensor([ 2.1068, -1.0466, -0.6501, 0.6949]) tensor([0.7408, 0.0316, 0.0470, 0.1805]) -Greedy action tensor([ 1.1260, -0.5054, -0.4133, 0.5934]) tensor([0.5007, 0.0980, 0.1074, 0.2939]) -Greedy action tensor([ 1.8478, -0.9704, -0.0110, 0.7399]) tensor([0.6469, 0.0386, 0.1008, 0.2136]) -Greedy action tensor([ 1.4614, -0.9272, 0.0430, 0.3585]) tensor([0.6003, 0.0551, 0.1453, 0.1992]) -Greedy action tensor([ 1.5531, -0.1572, -0.6787, 0.2049]) tensor([0.6461, 0.1168, 0.0693, 0.1678]) -Greedy action tensor([ 1.1459, -0.0893, 0.0888, 0.3304]) tensor([0.4806, 0.1398, 0.1670, 0.2126]) -Greedy action tensor([ 0.5174, -0.5242, -0.1947, -0.1663]) tensor([0.4258, 0.1503, 0.2089, 0.2149]) -Greedy action tensor([ 0.5790, 0.0013, -0.0867, -0.0213]) tensor([0.3811, 0.2139, 0.1959, 0.2091]) -Greedy action tensor([ 1.3046, -0.9724, 0.1720, -0.9215]) tensor([0.6524, 0.0669, 0.2102, 0.0704]) -Greedy action tensor([ 0.7237, -0.1211, 0.1098, -0.2137]) tensor([0.4233, 0.1819, 0.2291, 0.1658]) -Greedy action tensor([ 1.0261, -0.9030, 0.1843, -0.4946]) tensor([0.5572, 0.0809, 0.2401, 0.1218]) -Greedy action tensor([ 0.5630, 0.0563, -0.0442, -0.1147]) tensor([0.3766, 0.2269, 0.2052, 0.1912]) -Greedy action tensor([ 0.8433, -0.2186, -0.1047, -0.0860]) tensor([0.4699, 0.1625, 0.1821, 0.1855]) -Greedy action tensor([ 1.0775, -0.8657, 0.1408, -0.4112]) tensor([0.5679, 0.0814, 0.2226, 0.1282]) -Greedy action tensor([ 0.9144, -0.7284, -0.0126, -0.3995]) tensor([0.5382, 0.1041, 0.2130, 0.1447]) -Greedy action tensor([ 0.5069, -0.2163, -0.0877, -0.2922]) tensor([0.4021, 0.1951, 0.2219, 0.1809]) -Greedy action tensor([ 1.3326, -0.8250, -0.0095, -0.4968]) tensor([0.6505, 0.0752, 0.1700, 0.1044]) -Greedy action tensor([ 1.1702, -0.5968, -0.0583, -0.4924]) tensor([0.6049, 0.1033, 0.1771, 0.1147]) -Greedy action tensor([ 1.0176, -0.6165, -0.0401, -0.2637]) tensor([0.5494, 0.1072, 0.1908, 0.1526]) -Greedy action tensor([ 1.1343, -1.1726, 0.1752, -0.6566]) tensor([0.6062, 0.0604, 0.2323, 0.1011]) -Greedy action tensor([ 1.1380, -0.8052, 0.2990, -0.6668]) tensor([0.5747, 0.0823, 0.2484, 0.0946]) -Greedy action tensor([ 0.5463, -0.4041, -0.2686, -0.9536]) tensor([0.4872, 0.1884, 0.2157, 0.1087]) -Greedy action tensor([ 1.0279, -0.7432, 0.2015, -0.4425]) tensor([0.5442, 0.0926, 0.2381, 0.1251]) -Greedy action tensor([ 0.5527, -0.2770, 0.0217, -0.1354]) tensor([0.3958, 0.1726, 0.2327, 0.1989]) -Greedy action tensor([ 0.9073, -0.5802, 0.0110, -0.1614]) tensor([0.5057, 0.1143, 0.2064, 0.1737]) -Greedy action tensor([ 0.5121, 0.0843, -0.2460, 0.0960]) tensor([0.3597, 0.2345, 0.1685, 0.2373]) -Greedy action tensor([ 0.7852, -0.6059, -0.0828, -0.3779]) tensor([0.5048, 0.1256, 0.2119, 0.1578]) -Greedy action tensor([ 0.7028, -0.0595, 0.0163, -0.0514]) tensor([0.4098, 0.1912, 0.2063, 0.1927]) -Greedy action tensor([ 0.6104, 0.1933, -0.0624, 0.1740]) tensor([0.3552, 0.2340, 0.1812, 0.2296]) -Greedy action tensor([ 0.9963, -0.7411, 0.0237, -0.3072]) tensor([0.5478, 0.0964, 0.2071, 0.1487]) -Greedy action tensor([ 0.3126, 0.1614, -0.0752, 0.1078]) tensor([0.2982, 0.2564, 0.2024, 0.2430]) -Greedy action tensor([ 1.1363, -0.9314, 0.1466, -0.6738]) tensor([0.6018, 0.0761, 0.2237, 0.0985]) -Greedy action tensor([ 1.1388, -0.8832, 0.2308, -0.5987]) tensor([0.5842, 0.0773, 0.2356, 0.1028]) -Greedy action tensor([ 0.8706, -0.4739, -0.0412, -0.3168]) tensor([0.5083, 0.1325, 0.2042, 0.1550]) -Greedy action tensor([ 0.9542, -0.5915, -0.0025, -0.5611]) tensor([0.5503, 0.1173, 0.2114, 0.1209]) -Greedy action tensor([ 0.4517, -0.1842, -0.2133, -0.4387]) tensor([0.4075, 0.2157, 0.2096, 0.1673]) -Greedy action tensor([ 0.8489, -0.7347, 0.0712, -0.4351]) tensor([0.5150, 0.1057, 0.2366, 0.1426]) -Greedy action tensor([ 0.7159, -0.6841, -0.0355, -0.3357]) tensor([0.4836, 0.1193, 0.2281, 0.1690]) -Greedy action tensor([ 0.6499, -0.3270, -0.0156, -0.0899]) tensor([0.4223, 0.1590, 0.2171, 0.2015]) -Greedy action tensor([ 0.7282, -0.3873, -0.2244, -0.3208]) tensor([0.4846, 0.1588, 0.1869, 0.1697]) -Greedy action tensor([ 1.0489, -0.5553, -0.0984, -0.5620]) tensor([0.5820, 0.1170, 0.1848, 0.1162]) -Greedy action tensor([ 0.9248, -0.4728, -0.0103, -0.1398]) tensor([0.5039, 0.1245, 0.1978, 0.1738]) -Greedy action tensor([ 0.6012, -0.1703, -0.1219, -0.4354]) tensor([0.4344, 0.2008, 0.2108, 0.1541]) -Greedy action tensor([ 0.9847, -0.4751, 0.0014, -0.1969]) tensor([0.5227, 0.1214, 0.1955, 0.1604]) -Greedy action tensor([ 1.0381, -0.6293, -0.0748, -0.6851]) tensor([0.5897, 0.1113, 0.1938, 0.1053]) -Greedy action tensor([ 0.3562, -0.0745, 0.0244, -0.1876]) tensor([0.3392, 0.2205, 0.2434, 0.1969]) -Greedy action tensor([ 0.8871, 0.0441, 0.0106, -0.4261]) tensor([0.4727, 0.2035, 0.1967, 0.1271]) -Greedy action tensor([ 0.9415, -0.3969, -0.0238, -0.2701]) tensor([0.5152, 0.1351, 0.1962, 0.1534]) -Greedy action tensor([ 1.1568, -0.3272, -0.1194, -0.1417]) tensor([0.5622, 0.1275, 0.1569, 0.1534]) -Greedy action tensor([ 0.6637, -0.0323, 0.0973, 0.0171]) tensor([0.3861, 0.1925, 0.2191, 0.2022]) -Greedy action tensor([ 1.2165, -0.4744, -0.1535, -0.3025]) tensor([0.6034, 0.1112, 0.1533, 0.1321]) -Greedy action tensor([ 0.9396, -0.7405, -0.0186, -0.6567]) tensor([0.5641, 0.1051, 0.2164, 0.1143]) -Greedy action tensor([ 1.1232, -0.0478, -0.0794, -0.3900]) tensor([0.5462, 0.1694, 0.1641, 0.1203]) -Greedy action tensor([ 0.8834, -0.4270, 0.0217, -0.1932]) tensor([0.4919, 0.1327, 0.2078, 0.1676]) -Greedy action tensor([ 0.7169, -0.1869, 0.0238, 0.0215]) tensor([0.4160, 0.1685, 0.2080, 0.2075]) -Greedy action tensor([ 1.0859, -0.8431, 0.0559, -0.6279]) tensor([0.5944, 0.0864, 0.2122, 0.1071]) -Greedy action tensor([ 0.8889, -0.6826, -0.0447, -0.5993]) tensor([0.5474, 0.1137, 0.2152, 0.1236]) -Greedy action tensor([ 0.9303, -0.1091, -0.0137, -0.6070]) tensor([0.5108, 0.1807, 0.1987, 0.1098]) -Greedy action tensor([ 0.5527, -0.4275, -0.1508, -0.2298]) tensor([0.4297, 0.1612, 0.2126, 0.1965]) -Greedy action tensor([ 0.6628, -0.3947, -0.0721, -0.1485]) tensor([0.4403, 0.1529, 0.2112, 0.1956]) -Greedy action tensor([ 0.8137, -0.3387, -0.0950, -0.2274]) tensor([0.4826, 0.1525, 0.1945, 0.1704]) -Greedy action tensor([ 0.7831, -0.4003, 0.1222, -0.1498]) tensor([0.4513, 0.1382, 0.2330, 0.1775]) -Greedy action tensor([ 1.2076, -1.1335, 0.0980, -0.7693]) tensor([0.6392, 0.0615, 0.2108, 0.0885]) -Greedy action tensor([ 1.2353, -1.1801, 0.2568, -0.6689]) tensor([0.6195, 0.0553, 0.2329, 0.0923]) -Greedy action tensor([ 0.6529, 0.2602, -0.1654, 0.1808]) tensor([0.3650, 0.2464, 0.1610, 0.2276]) -Greedy action tensor([ 0.7805, -0.0404, 0.0061, -0.2574]) tensor([0.4434, 0.1951, 0.2044, 0.1571]) -Greedy action tensor([ 0.6395, -0.1618, 0.1520, -0.1875]) tensor([0.3999, 0.1795, 0.2456, 0.1749]) -Greedy action tensor([ 0.6500, -0.2464, -0.0515, -0.1042]) tensor([0.4212, 0.1719, 0.2088, 0.1981]) -Greedy action tensor([ 0.7733, -0.7943, -0.1571, -0.2247]) tensor([0.5072, 0.1058, 0.2000, 0.1870]) -Greedy action tensor([ 0.6194, -0.6830, 0.2261, -0.5750]) tensor([0.4445, 0.1209, 0.3000, 0.1346]) -Greedy action tensor([ 1.0809, -0.7782, -0.1108, -0.6147]) tensor([0.6086, 0.0948, 0.1848, 0.1117]) -Greedy action tensor([ 1.0389, -0.6142, 0.0124, -0.5540]) tensor([0.5704, 0.1092, 0.2044, 0.1160]) -Greedy action tensor([ 0.5482, -0.2828, -0.0037, -0.1665]) tensor([0.3999, 0.1742, 0.2303, 0.1957]) -Greedy action tensor([ 1.1449, -0.7065, 0.1446, -0.8164]) tensor([0.6004, 0.0943, 0.2208, 0.0845]) -Greedy action tensor([ 0.7927, -0.7458, -0.0864, -0.3098]) tensor([0.5097, 0.1094, 0.2116, 0.1692]) -Greedy action tensor([ 0.5445, -0.5824, -0.0524, -0.2283]) tensor([0.4280, 0.1387, 0.2356, 0.1976]) -Greedy action tensor([ 0.9645, -0.7913, 0.0627, -0.4698]) tensor([0.5504, 0.0951, 0.2234, 0.1311]) -Greedy action tensor([ 0.6148, -0.0989, 0.0522, -0.4417]) tensor([0.4154, 0.2035, 0.2367, 0.1444]) -Greedy action tensor([ 0.5547, -0.4167, 0.0735, -0.4074]) tensor([0.4204, 0.1591, 0.2598, 0.1606]) -Greedy action tensor([ 0.7527, -0.5437, -0.0202, -0.2451]) tensor([0.4753, 0.1300, 0.2194, 0.1752]) -Greedy action tensor([ 1.2262, -0.6558, 0.1361, -1.0056]) tensor([0.6266, 0.0954, 0.2107, 0.0673]) -Greedy action tensor([-0.0551, -0.0762, -0.1541, -0.2364]) tensor([0.2689, 0.2633, 0.2435, 0.2243]) -Greedy action tensor([ 0.4615, -0.3534, 0.0053, -0.3898]) tensor([0.3995, 0.1768, 0.2531, 0.1705]) -Greedy action tensor([ 0.7315, -0.2471, 0.0767, -0.1846]) tensor([0.4356, 0.1637, 0.2263, 0.1743]) -Greedy action tensor([ 0.7547, -0.3849, -0.0679, -0.2488]) tensor([0.4704, 0.1505, 0.2066, 0.1725]) -Greedy action tensor([ 0.9724, -0.2520, -0.0889, -0.1063]) tensor([0.5051, 0.1485, 0.1747, 0.1717]) -Greedy action tensor([ 0.4163, -0.2995, -0.0884, -0.2344]) tensor([0.3825, 0.1870, 0.2309, 0.1996]) -Greedy action tensor([-1.8842, -0.4397, 0.6246, -0.1424]) tensor([0.0430, 0.1824, 0.5289, 0.2456]) -Greedy action tensor([-1.8790, -0.0206, 0.5755, -0.1732]) tensor([0.0407, 0.2611, 0.4740, 0.2242]) -Greedy action tensor([-1.8971, -0.4722, 0.6450, -0.1602]) tensor([0.0425, 0.1766, 0.5397, 0.2413]) -Greedy action tensor([-1.9076, -0.4128, 0.6595, -0.1552]) tensor([0.0412, 0.1838, 0.5371, 0.2378]) -Greedy action tensor([-1.9043, -0.4450, 0.6477, -0.1604]) tensor([0.0419, 0.1804, 0.5379, 0.2398]) -Greedy action tensor([-1.6246e+00, -4.1951e-01, 4.9544e-01, 2.0856e-04]) tensor([0.0563, 0.1880, 0.4695, 0.2861]) -Greedy action tensor([-1.2369, -0.2981, 0.3560, -0.0447]) tensor([0.0850, 0.2172, 0.4179, 0.2799]) -Greedy action tensor([-1.4135, -0.6642, 0.4321, 0.1150]) tensor([0.0711, 0.1505, 0.4504, 0.3280]) -Greedy action tensor([-1.8969, -0.2953, 0.6301, -0.1575]) tensor([0.0414, 0.2053, 0.5178, 0.2356]) -Greedy action tensor([-1.8152, -0.3579, 0.6182, -0.1192]) tensor([0.0452, 0.1939, 0.5147, 0.2462]) -Greedy action tensor([-1.8667, -0.2810, 0.6063, -0.1528]) tensor([0.0429, 0.2096, 0.5091, 0.2383]) -Greedy action tensor([-1.5470, -0.5288, 0.6020, -0.1650]) tensor([0.0612, 0.1696, 0.5253, 0.2439]) -Greedy action tensor([-1.7197, -0.4151, 0.6062, 0.0140]) tensor([0.0486, 0.1791, 0.4973, 0.2751]) -Greedy action tensor([-0.8743, -0.3958, 0.4248, 0.4539]) tensor([0.0995, 0.1605, 0.3646, 0.3754]) -Greedy action tensor([-1.5886, -0.1368, 0.5258, 0.0912]) tensor([0.0529, 0.2257, 0.4379, 0.2835]) -Greedy action tensor([-1.8992, -0.4574, 0.6486, -0.1592]) tensor([0.0422, 0.1784, 0.5391, 0.2403]) -Greedy action tensor([-1.8698, -0.4501, 0.6316, -0.1487]) tensor([0.0436, 0.1804, 0.5321, 0.2439]) -Greedy action tensor([-1.9197, -0.4476, 0.6566, -0.1706]) tensor([0.0412, 0.1797, 0.5421, 0.2370]) -Greedy action tensor([-1.8181, -0.1901, 0.5632, -0.1249]) tensor([0.0447, 0.2279, 0.4841, 0.2433]) -Greedy action tensor([-1.8908, -0.4546, 0.6431, -0.1566]) tensor([0.0426, 0.1791, 0.5369, 0.2413]) -Greedy action tensor([-1.7415, -0.5335, 0.5741, -0.0802]) tensor([0.0506, 0.1695, 0.5131, 0.2667]) -Greedy action tensor([-1.4960, 0.6513, 0.4530, -0.4040]) tensor([0.0511, 0.4376, 0.3589, 0.1523]) -Greedy action tensor([-0.9328, -0.3982, 0.3887, 0.5597]) tensor([0.0917, 0.1565, 0.3438, 0.4079]) -Greedy action tensor([-1.3307, -0.2413, 0.5780, -0.4231]) tensor([0.0758, 0.2253, 0.5111, 0.1878]) -Greedy action tensor([-1.7586, -0.3350, 0.5575, -0.1065]) tensor([0.0488, 0.2025, 0.4943, 0.2544]) -Greedy action tensor([-1.0847, -0.0185, 0.5194, 0.1105]) tensor([0.0821, 0.2384, 0.4083, 0.2712]) -Greedy action tensor([-1.8224, -0.3627, 0.6145, -0.1076]) tensor([0.0448, 0.1930, 0.5130, 0.2491]) -Greedy action tensor([-1.6219, 0.1974, -0.8752, -1.1308]) tensor([0.0916, 0.5652, 0.1934, 0.1498]) -Greedy action tensor([-1.5724, -0.0672, 0.5926, 0.0483]) tensor([0.0519, 0.2337, 0.4521, 0.2623]) -Greedy action tensor([-1.8758, -0.2746, 0.6239, -0.1417]) tensor([0.0420, 0.2083, 0.5117, 0.2380]) -Greedy action tensor([-1.9345, -0.4043, 0.6527, -0.1771]) tensor([0.0405, 0.1869, 0.5380, 0.2346]) -Greedy action tensor([-1.3738, -0.5526, 0.3975, 0.0677]) tensor([0.0747, 0.1699, 0.4394, 0.3159]) -Greedy action tensor([-1.8603, -0.4567, 0.6314, -0.1422]) tensor([0.0440, 0.1791, 0.5316, 0.2453]) -Greedy action tensor([-1.8939, -0.4534, 0.6474, -0.1534]) tensor([0.0423, 0.1788, 0.5375, 0.2413]) -Greedy action tensor([-1.9028, -0.4530, 0.6519, -0.1571]) tensor([0.0419, 0.1786, 0.5393, 0.2402]) -Greedy action tensor([-1.8895, -0.4486, 0.6426, -0.1415]) tensor([0.0425, 0.1794, 0.5342, 0.2439]) -Greedy action tensor([-1.9407, -0.4573, 0.6698, -0.1762]) tensor([0.0402, 0.1774, 0.5475, 0.2349]) -Greedy action tensor([-1.8046, -0.2975, 0.5942, -0.1080]) tensor([0.0455, 0.2054, 0.5009, 0.2482]) -Greedy action tensor([-0.4890, 0.7978, 0.0203, 0.1556]) tensor([0.1221, 0.4421, 0.2032, 0.2326]) -Greedy action tensor([-1.6714, 0.2340, 0.4244, -0.0182]) tensor([0.0474, 0.3189, 0.3858, 0.2478]) -Greedy action tensor([-1.8696, -0.3908, 0.6263, -0.1356]) tensor([0.0431, 0.1893, 0.5233, 0.2443]) -Greedy action tensor([-0.1830, 0.8097, -0.0224, 0.5531]) tensor([0.1437, 0.3877, 0.1687, 0.2999]) -Greedy action tensor([-1.8336, -0.4364, 0.6086, -0.1296]) tensor([0.0454, 0.1835, 0.5218, 0.2494]) -Greedy action tensor([-1.3761, 0.2313, 0.2959, 0.0020]) tensor([0.0654, 0.3266, 0.3484, 0.2596]) -Greedy action tensor([-1.8605, -0.4240, 0.6250, -0.1386]) tensor([0.0438, 0.1844, 0.5264, 0.2453]) -Greedy action tensor([-1.9051, -0.4597, 0.6433, -0.1568]) tensor([0.0421, 0.1785, 0.5378, 0.2416]) -Greedy action tensor([-1.8996, -0.3927, 0.6376, -0.1605]) tensor([0.0419, 0.1892, 0.5302, 0.2387]) -Greedy action tensor([-1.5435, 0.3272, 0.5720, -0.6158]) tensor([0.0546, 0.3545, 0.4528, 0.1381]) -Greedy action tensor([-1.8997, -0.3549, 0.6278, -0.1548]) tensor([0.0418, 0.1958, 0.5232, 0.2392]) -Greedy action tensor([-1.9041, -0.4241, 0.6513, -0.1603]) tensor([0.0417, 0.1831, 0.5368, 0.2384]) -Greedy action tensor([-1.8961, -0.3067, 0.6238, -0.1473]) tensor([0.0415, 0.2035, 0.5162, 0.2387]) -Greedy action tensor([-1.8680, -0.3960, 0.6243, -0.1425]) tensor([0.0434, 0.1890, 0.5242, 0.2435]) -Greedy action tensor([-0.4654, 0.8267, -0.0471, 0.0925]) tensor([0.1265, 0.4604, 0.1922, 0.2209]) -Greedy action tensor([-1.6679, -0.2606, 0.4992, -0.0704]) tensor([0.0533, 0.2178, 0.4656, 0.2634]) -Greedy action tensor([-1.4462, -0.4966, 0.4202, 0.0443]) tensor([0.0690, 0.1784, 0.4462, 0.3064]) -Greedy action tensor([-1.8909, -0.3283, 0.6233, -0.1558]) tensor([0.0420, 0.2005, 0.5192, 0.2382]) -Greedy action tensor([-0.8921, -0.3412, 0.2691, -0.0989]) tensor([0.1229, 0.2131, 0.3924, 0.2716]) -Greedy action tensor([-1.8561, -0.4496, 0.6317, -0.1337]) tensor([0.0440, 0.1797, 0.5298, 0.2465]) -Greedy action tensor([-1.8605, -0.4445, 0.6327, -0.1348]) tensor([0.0438, 0.1804, 0.5298, 0.2459]) -Greedy action tensor([-1.8970, -0.4279, 0.6438, -0.1545]) tensor([0.0421, 0.1830, 0.5344, 0.2405]) -Greedy action tensor([-1.9173, -0.4486, 0.6515, -0.1713]) tensor([0.0414, 0.1801, 0.5409, 0.2376]) -Greedy action tensor([-1.8828, -0.4527, 0.6381, -0.1489]) tensor([0.0430, 0.1795, 0.5343, 0.2432]) -Greedy action tensor([-1.5704, -0.5046, 0.5001, 0.1251]) tensor([0.0579, 0.1680, 0.4588, 0.3153]) -Greedy action tensor([-1.7832, -0.4899, 0.5679, -0.0894]) tensor([0.0486, 0.1771, 0.5100, 0.2643]) -Greedy action tensor([-1.8395, -0.2555, 0.5817, -0.1229]) tensor([0.0441, 0.2147, 0.4960, 0.2452]) -Greedy action tensor([-1.5985e+00, -4.3774e-01, 5.2415e-01, -8.9759e-04]) tensor([0.0572, 0.1826, 0.4777, 0.2826]) -Greedy action tensor([-1.8900, -0.4249, 0.6369, -0.1543]) tensor([0.0425, 0.1840, 0.5322, 0.2412]) -Greedy action tensor([-1.2478, 0.4489, 0.2340, 0.0881]) tensor([0.0682, 0.3722, 0.3002, 0.2594]) -Greedy action tensor([-1.4916, -0.0881, 0.5074, 0.0937]) tensor([0.0577, 0.2348, 0.4259, 0.2816]) -Greedy action tensor([-1.4851, 0.5889, 0.3304, 0.0356]) tensor([0.0508, 0.4044, 0.3123, 0.2325]) -Greedy action tensor([ 0.6279, 1.2835, -0.0301, 0.4007]) tensor([0.2358, 0.4542, 0.1221, 0.1879]) -Greedy action tensor([ 0.9851, 0.6763, -0.0268, 0.6562]) tensor([0.3549, 0.2606, 0.1290, 0.2554]) -Greedy action tensor([-1.2997, 0.7531, 0.2087, 0.1296]) tensor([0.0572, 0.4455, 0.2585, 0.2388]) -Greedy action tensor([-1.9345, -0.4499, 0.6643, -0.1742]) tensor([0.0405, 0.1789, 0.5450, 0.2356]) -Greedy action tensor([-1.6242, 0.3143, 0.1110, -0.1062]) tensor([0.0550, 0.3822, 0.3118, 0.2510]) -Greedy action tensor([-1.3017, -0.5872, 0.3885, -0.0184]) tensor([0.0828, 0.1693, 0.4490, 0.2989]) -Greedy action tensor([-1.7594, -0.2661, 0.5477, -0.0897]) tensor([0.0481, 0.2140, 0.4828, 0.2552]) -Greedy action tensor([-1.9292, -0.4302, 0.6611, -0.1719]) tensor([0.0406, 0.1819, 0.5418, 0.2356]) -Greedy action tensor([-1.8785, -0.6275, 0.9956, 0.0151]) tensor([0.0347, 0.1211, 0.6139, 0.2303]) -Greedy action tensor([-1.7827, -0.1942, 0.5542, -0.1252]) tensor([0.0465, 0.2278, 0.4815, 0.2441]) -Greedy action tensor([-1.5268, -0.5497, 0.5180, -0.0529]) tensor([0.0635, 0.1687, 0.4906, 0.2772]) -Greedy action tensor([ 1.2676, -0.7845, -0.3257, 0.3723]) tensor([0.5746, 0.0738, 0.1168, 0.2347]) -Greedy action tensor([ 1.9517, 0.6165, -0.3512, 0.3182]) tensor([0.6417, 0.1688, 0.0642, 0.1253]) -Greedy action tensor([ 1.5227, -0.6668, 0.0636, 0.1486]) tensor([0.6260, 0.0701, 0.1455, 0.1584]) -Greedy action tensor([ 1.0770, -0.5776, -0.2627, 0.6610]) tensor([0.4733, 0.0905, 0.1240, 0.3122]) -Greedy action tensor([ 1.9693, -1.0395, -0.3532, 0.4676]) tensor([0.7299, 0.0360, 0.0715, 0.1626]) -Greedy action tensor([ 1.2008, -0.2315, -0.5460, 0.4679]) tensor([0.5281, 0.1261, 0.0921, 0.2538]) -Greedy action tensor([ 1.3677, -0.5677, -0.1444, 0.5790]) tensor([0.5497, 0.0794, 0.1212, 0.2498]) -Greedy action tensor([ 0.9928, -0.3355, -0.7380, 0.2629]) tensor([0.5197, 0.1377, 0.0921, 0.2505]) -Greedy action tensor([ 1.2960, -0.3612, -0.6393, 0.0689]) tensor([0.6142, 0.1171, 0.0887, 0.1800]) -Greedy action tensor([ 2.0232, -0.5200, -0.7298, 0.2848]) tensor([0.7586, 0.0596, 0.0484, 0.1334]) -Greedy action tensor([ 1.7605, -0.5796, -0.7811, -0.0267]) tensor([0.7449, 0.0717, 0.0587, 0.1247]) -Greedy action tensor([ 1.4115, -0.4133, -0.2499, 0.1451]) tensor([0.6124, 0.0988, 0.1163, 0.1726]) -Greedy action tensor([ 1.1525, -0.4980, -0.0558, 0.5186]) tensor([0.4948, 0.0950, 0.1478, 0.2625]) -Greedy action tensor([ 1.8361, -0.4576, -0.8416, 0.5879]) tensor([0.6865, 0.0693, 0.0472, 0.1970]) -Greedy action tensor([ 1.2211, -0.4172, -0.3716, 0.7157]) tensor([0.4998, 0.0971, 0.1016, 0.3015]) -Greedy action tensor([ 1.4184, -0.0580, -1.6140, 0.0026]) tensor([0.6582, 0.1504, 0.0317, 0.1598]) -Greedy action tensor([ 1.8253, -0.5453, -0.7856, 0.5396]) tensor([0.6928, 0.0647, 0.0509, 0.1915]) -Greedy action tensor([ 1.4455, -0.2991, -0.4985, 0.0698]) tensor([0.6367, 0.1112, 0.0911, 0.1609]) -Greedy action tensor([ 1.8852, -0.6643, -0.6510, 0.2256]) tensor([0.7421, 0.0580, 0.0588, 0.1412]) -Greedy action tensor([ 1.6200, -0.7629, -0.1850, -0.0375]) tensor([0.6909, 0.0638, 0.1136, 0.1317]) -Greedy action tensor([ 1.2130, -0.2893, -0.4924, 0.0928]) tensor([0.5778, 0.1286, 0.1050, 0.1885]) -Greedy action tensor([ 1.2364, -0.5208, -0.5677, 0.1652]) tensor([0.5953, 0.1027, 0.0980, 0.2040]) -Greedy action tensor([ 1.8018, -0.5824, -0.2752, 0.6441]) tensor([0.6529, 0.0602, 0.0818, 0.2051]) -Greedy action tensor([ 1.3195, -0.1526, -0.9253, 0.2997]) tensor([0.5896, 0.1353, 0.0625, 0.2126]) -Greedy action tensor([ 2.6218, -1.1241, -0.3352, 0.4157]) tensor([0.8434, 0.0199, 0.0438, 0.0929]) -Greedy action tensor([ 1.3354, -0.1517, -0.5892, -0.2620]) tensor([0.6352, 0.1436, 0.0927, 0.1286]) -Greedy action tensor([ 1.3576, -0.4021, -0.2395, 0.1873]) tensor([0.5935, 0.1021, 0.1202, 0.1842]) -Greedy action tensor([ 1.8314, -0.3976, -0.7480, 0.6587]) tensor([0.6698, 0.0721, 0.0508, 0.2073]) -Greedy action tensor([ 1.7068, -0.3771, -1.2866, 0.2578]) tensor([0.7095, 0.0883, 0.0356, 0.1666]) -Greedy action tensor([ 1.1014, -0.1883, -0.6015, 0.1638]) tensor([0.5408, 0.1489, 0.0985, 0.2118]) -Greedy action tensor([ 2.0186, -0.6886, -0.4400, 0.6313]) tensor([0.7133, 0.0476, 0.0610, 0.1781]) -Greedy action tensor([ 1.5246, -0.3995, -1.1273, 0.1942]) tensor([0.6753, 0.0986, 0.0476, 0.1785]) -Greedy action tensor([ 1.1763, -0.0247, -0.3303, -0.1136]) tensor([0.5562, 0.1674, 0.1233, 0.1531]) -Greedy action tensor([ 1.3588, -0.3405, -0.8978, 0.1292]) tensor([0.6329, 0.1157, 0.0663, 0.1851]) -Greedy action tensor([ 1.2624, -0.6125, -0.3585, 0.2524]) tensor([0.5830, 0.0894, 0.1153, 0.2123]) -Greedy action tensor([ 1.6894, -0.6936, -0.2002, 0.1730]) tensor([0.6836, 0.0631, 0.1033, 0.1500]) -Greedy action tensor([ 1.2782, -0.3621, -0.3731, 0.3787]) tensor([0.5579, 0.1082, 0.1070, 0.2269]) -Greedy action tensor([ 1.8633, -1.0829, -0.5327, -0.0441]) tensor([0.7739, 0.0407, 0.0705, 0.1149]) -Greedy action tensor([ 1.0602, -0.0112, -1.0045, 0.3532]) tensor([0.5095, 0.1745, 0.0646, 0.2513]) -Greedy action tensor([ 1.9005, -1.1028, -0.1977, 0.5061]) tensor([0.7041, 0.0349, 0.0864, 0.1746]) -Greedy action tensor([ 2.3404, -0.6832, -0.6424, 0.3768]) tensor([0.8067, 0.0392, 0.0409, 0.1132]) -Greedy action tensor([ 1.9842, -1.0094, -0.2081, 0.5301]) tensor([0.7167, 0.0359, 0.0800, 0.1674]) -Greedy action tensor([ 1.3592, -0.6457, -0.7121, 0.2299]) tensor([0.6313, 0.0850, 0.0796, 0.2041]) -Greedy action tensor([ 1.7931, -0.7751, -0.4406, 0.0580]) tensor([0.7352, 0.0564, 0.0788, 0.1297]) -Greedy action tensor([ 0.9371, 0.0948, -0.2483, 0.0258]) tensor([0.4676, 0.2014, 0.1429, 0.1880]) -Greedy action tensor([ 1.7106, -0.2749, -0.5286, 0.1969]) tensor([0.6831, 0.0938, 0.0728, 0.1504]) -Greedy action tensor([ 1.3620, -1.1405, -0.4141, -0.0607]) tensor([0.6701, 0.0549, 0.1135, 0.1615]) -Greedy action tensor([ 1.9800, -0.6265, -0.5559, 0.2850]) tensor([0.7482, 0.0552, 0.0592, 0.1374]) -Greedy action tensor([ 1.1170, -0.4119, -0.5363, 0.3214]) tensor([0.5378, 0.1166, 0.1029, 0.2427]) -Greedy action tensor([ 0.9149, -0.2677, -0.1996, 0.2702]) tensor([0.4631, 0.1419, 0.1519, 0.2431]) -Greedy action tensor([ 1.3170, -0.4566, -0.3328, 0.3488]) tensor([0.5742, 0.0974, 0.1103, 0.2181]) -Greedy action tensor([ 1.4132, -0.2068, -0.3167, 0.2213]) tensor([0.5956, 0.1179, 0.1056, 0.1809]) -Greedy action tensor([ 1.1228, 0.0069, -0.7815, -0.0126]) tensor([0.5562, 0.1822, 0.0828, 0.1787]) -Greedy action tensor([ 1.3074e+00, -7.7266e-01, -1.1044e-01, 4.5663e-04]) tensor([0.6106, 0.0763, 0.1479, 0.1652]) -Greedy action tensor([ 1.7859, 0.5421, -0.1512, -0.0217]) tensor([0.6264, 0.1806, 0.0903, 0.1028]) -Greedy action tensor([ 1.1888, -0.6090, -0.3168, -0.3125]) tensor([0.6210, 0.1029, 0.1378, 0.1384]) -Greedy action tensor([ 1.2212, -0.2584, -0.8248, 0.1949]) tensor([0.5830, 0.1328, 0.0754, 0.2089]) -Greedy action tensor([ 1.2269, -0.5817, -0.3210, 0.3251]) tensor([0.5610, 0.0919, 0.1193, 0.2277]) -Greedy action tensor([ 1.8639, -0.1467, 0.0184, 0.4726]) tensor([0.6491, 0.0869, 0.1025, 0.1615]) -Greedy action tensor([ 0.9690, -0.2577, -0.0445, 0.5268]) tensor([0.4350, 0.1276, 0.1579, 0.2795]) -Greedy action tensor([ 1.2195, -0.7035, 0.0420, 0.1828]) tensor([0.5528, 0.0808, 0.1703, 0.1960]) -Greedy action tensor([ 1.4887, -0.2222, -0.1877, -0.3359]) tensor([0.6540, 0.1182, 0.1223, 0.1055]) -Greedy action tensor([ 1.7289, -0.7245, -0.3976, 0.3035]) tensor([0.6917, 0.0595, 0.0825, 0.1663]) -Greedy action tensor([ 1.7270, -0.7529, -0.4725, -0.0683]) tensor([0.7349, 0.0616, 0.0815, 0.1220]) -Greedy action tensor([ 1.2392, -0.8104, -0.3798, 0.4708]) tensor([0.5585, 0.0719, 0.1106, 0.2590]) -Greedy action tensor([ 0.6075, -0.1760, 0.0288, -0.0537]) tensor([0.3947, 0.1803, 0.2213, 0.2038]) -Greedy action tensor([ 1.6637, -0.4090, -0.7205, 0.6310]) tensor([0.6353, 0.0800, 0.0586, 0.2262]) -Greedy action tensor([ 2.0853, -0.9850, 0.0626, 0.9750]) tensor([0.6630, 0.0308, 0.0877, 0.2185]) -Greedy action tensor([ 1.7415, -0.5043, -0.3960, 0.4572]) tensor([0.6664, 0.0705, 0.0786, 0.1845]) -Greedy action tensor([ 0.9659, -0.2825, -0.7769, 0.0602]) tensor([0.5358, 0.1538, 0.0938, 0.2166]) -Greedy action tensor([ 1.2111, -0.3654, -0.3735, 0.2357]) tensor([0.5590, 0.1155, 0.1146, 0.2108]) -Greedy action tensor([ 2.2134, 0.3535, -0.0978, 0.0674]) tensor([0.7290, 0.1135, 0.0723, 0.0853]) -Greedy action tensor([ 1.5052, -0.3209, -0.3616, 0.0411]) tensor([0.6464, 0.1041, 0.0999, 0.1495]) -Greedy action tensor([ 1.5160, -0.4058, -0.3039, -0.1855]) tensor([0.6708, 0.0982, 0.1087, 0.1224]) -Greedy action tensor([ 1.1673, -0.3685, 0.1523, 0.1186]) tensor([0.5187, 0.1117, 0.1879, 0.1817]) -Greedy action tensor([ 1.2543, -0.4627, -0.3862, 0.1089]) tensor([0.5912, 0.1062, 0.1146, 0.1880]) -Greedy action tensor([ 1.3471, -0.6571, -0.6858, 0.8826]) tensor([0.5279, 0.0712, 0.0691, 0.3318]) -Greedy action tensor([ 0.7300, -0.4035, -0.2120, 0.0849]) tensor([0.4472, 0.1439, 0.1743, 0.2346]) -Greedy action tensor([ 1.1105, -0.3483, -0.5710, 0.2916]) tensor([0.5378, 0.1250, 0.1001, 0.2371]) -Greedy action tensor([ 1.2722, -0.0625, -1.1101, 0.3267]) tensor([0.5734, 0.1509, 0.0529, 0.2228]) -Greedy action tensor([ 1.3991, -0.0429, -0.6617, 0.0390]) tensor([0.6171, 0.1459, 0.0786, 0.1584]) -Greedy action tensor([-0.5277, -0.7941, -0.2445, 0.2676]) tensor([0.1884, 0.1443, 0.2500, 0.4173]) -Greedy action tensor([ 0.3232, 0.8859, -0.5155, -0.5450]) tensor([0.2772, 0.4866, 0.1198, 0.1163]) -Greedy action tensor([-0.5669, -0.7438, -0.4866, -1.0697]) tensor([0.2836, 0.2376, 0.3073, 0.1715]) -Greedy action tensor([ 0.3766, 0.6587, -0.0933, -0.5611]) tensor([0.2992, 0.3967, 0.1870, 0.1171]) -Greedy action tensor([ 0.9468, -0.4105, 0.7139, -0.6421]) tensor([0.4437, 0.1142, 0.3515, 0.0906]) -Greedy action tensor([-1.1546, -0.1963, 0.7927, -0.9441]) tensor([0.0844, 0.2200, 0.5915, 0.1041]) -Greedy action tensor([ 0.1231, -0.5046, -0.5261, 0.1706]) tensor([0.3221, 0.1719, 0.1683, 0.3377]) -Greedy action tensor([ 0.3653, -1.0194, 1.1649, -0.6552]) tensor([0.2607, 0.0653, 0.5800, 0.0940]) -Greedy action tensor([-0.6851, -0.4887, 1.8346, -0.8078]) tensor([0.0644, 0.0784, 0.8002, 0.0570]) -Greedy action tensor([-0.2036, -0.5522, 0.7423, 0.0663]) tensor([0.1789, 0.1262, 0.4606, 0.2343]) -Greedy action tensor([-0.0434, -0.6994, -0.8672, -0.3083]) tensor([0.3670, 0.1904, 0.1610, 0.2816]) -Greedy action tensor([-0.5651, 0.1100, -0.0804, -1.2514]) tensor([0.1964, 0.3858, 0.3189, 0.0989]) -Greedy action tensor([-0.6101, 0.3962, -0.4422, -0.7084]) tensor([0.1717, 0.4696, 0.2031, 0.1556]) -Greedy action tensor([-0.2125, -0.7662, 0.1744, -0.2484]) tensor([0.2493, 0.1433, 0.3670, 0.2405]) -Greedy action tensor([-0.2414, -0.1253, -1.0500, -0.4850]) tensor([0.2983, 0.3350, 0.1329, 0.2338]) -Greedy action tensor([ 0.2077, -0.2851, 1.6363, -0.8476]) tensor([0.1631, 0.0996, 0.6805, 0.0568]) -Greedy action tensor([-0.3703, -0.2981, -1.1364, -0.0992]) tensor([0.2597, 0.2791, 0.1207, 0.3405]) -Greedy action tensor([ 0.5496, -0.6138, 0.2689, 0.4180]) tensor([0.3396, 0.1061, 0.2565, 0.2977]) -Greedy action tensor([-0.0013, -0.8450, 0.2949, -0.5089]) tensor([0.2961, 0.1274, 0.3982, 0.1783]) -Greedy action tensor([-0.1171, 0.3274, 0.1191, -0.6537]) tensor([0.2267, 0.3536, 0.2871, 0.1326]) -Greedy action tensor([-0.0627, -1.1719, 0.4972, -0.7463]) tensor([0.2789, 0.0920, 0.4883, 0.1408]) -Greedy action tensor([-0.8954, -1.0703, 0.6345, -1.1806]) tensor([0.1387, 0.1165, 0.6405, 0.1043]) -Greedy action tensor([ 0.9263, -0.9511, 0.4008, -0.5104]) tensor([0.5045, 0.0772, 0.2983, 0.1199]) -Greedy action tensor([-0.5148, -0.1088, 0.4820, -0.9880]) tensor([0.1714, 0.2573, 0.4645, 0.1068]) -Greedy action tensor([ 0.8467, -0.1046, 0.4516, 0.9065]) tensor([0.3203, 0.1237, 0.2158, 0.3401]) -Greedy action tensor([-0.0823, -0.8907, -0.6251, -0.3080]) tensor([0.3540, 0.1577, 0.2057, 0.2825]) -Greedy action tensor([-0.3210, -1.3703, 0.4403, -1.2535]) tensor([0.2574, 0.0901, 0.5511, 0.1013]) -Greedy action tensor([ 0.2748, -0.7338, 0.5418, -1.1607]) tensor([0.3438, 0.1254, 0.4490, 0.0818]) -Greedy action tensor([-0.5197, -0.5488, -0.8529, 0.1813]) tensor([0.2126, 0.2065, 0.1523, 0.4286]) -Greedy action tensor([-0.1334, 0.1921, 0.4844, -0.4022]) tensor([0.1998, 0.2767, 0.3707, 0.1527]) -Greedy action tensor([ 0.3505, -1.4640, -0.0048, -0.8537]) tensor([0.4622, 0.0753, 0.3239, 0.1386]) -Greedy action tensor([ 0.4674, -1.0242, 0.1762, -0.3068]) tensor([0.4110, 0.0925, 0.3071, 0.1895]) -Greedy action tensor([-0.5200, -0.0549, -0.3456, -0.6150]) tensor([0.2131, 0.3393, 0.2537, 0.1938]) -Greedy action tensor([ 0.1755, -0.8557, 0.5291, -0.3110]) tensor([0.2945, 0.1050, 0.4194, 0.1811]) -Greedy action tensor([-0.1254, -0.1380, -0.1675, -0.2309]) tensor([0.2600, 0.2568, 0.2493, 0.2340]) -Greedy action tensor([0.9122, 0.5290, 0.8224, 0.1698]) tensor([0.3255, 0.2219, 0.2976, 0.1549]) -Greedy action tensor([ 0.9777, -1.1523, -0.0037, 0.3884]) tensor([0.4882, 0.0580, 0.1830, 0.2708]) -Greedy action tensor([ 0.3380, 0.2717, -0.1662, 0.2121]) tensor([0.2923, 0.2735, 0.1765, 0.2577]) -Greedy action tensor([-0.0115, -1.1780, 0.7203, 0.4488]) tensor([0.2010, 0.0626, 0.4179, 0.3185]) -Greedy action tensor([ 0.8000, -0.9406, 0.1202, -0.1739]) tensor([0.4855, 0.0852, 0.2460, 0.1833]) -Greedy action tensor([ 0.8294, -1.0115, 0.6867, -1.1173]) tensor([0.4612, 0.0732, 0.3998, 0.0658]) -Greedy action tensor([-1.1777, -0.3025, -0.8619, -0.4070]) tensor([0.1443, 0.3461, 0.1978, 0.3118]) -Greedy action tensor([ 1.8716, -1.1311, 1.2122, 0.0674]) tensor([0.5776, 0.0287, 0.2987, 0.0951]) -Greedy action tensor([-0.3634, -0.0092, -1.1539, -0.3868]) tensor([0.2594, 0.3696, 0.1177, 0.2534]) -Greedy action tensor([-0.9130, 0.5198, -0.8629, 0.0622]) tensor([0.1124, 0.4712, 0.1182, 0.2982]) -Greedy action tensor([ 0.1378, -0.2303, -0.5401, -0.1228]) tensor([0.3367, 0.2330, 0.1709, 0.2594]) -Greedy action tensor([ 0.9754, -1.4396, 1.1095, -1.0049]) tensor([0.4218, 0.0377, 0.4823, 0.0582]) -Greedy action tensor([ 0.1679, -1.2004, 0.5223, 0.1016]) tensor([0.2766, 0.0704, 0.3942, 0.2588]) -Greedy action tensor([ 0.8400, -0.9652, -0.5433, 0.9344]) tensor([0.3977, 0.0654, 0.0997, 0.4371]) -Greedy action tensor([-0.0022, -0.1487, -0.4235, -0.5423]) tensor([0.3223, 0.2784, 0.2115, 0.1878]) -Greedy action tensor([ 0.1271, -0.5807, -0.0184, 0.2039]) tensor([0.2909, 0.1434, 0.2516, 0.3142]) -Greedy action tensor([ 0.6647, -0.3609, -0.8091, 0.4948]) tensor([0.4113, 0.1475, 0.0942, 0.3470]) -Greedy action tensor([-1.2621, 0.7488, -0.0830, -0.6749]) tensor([0.0740, 0.5525, 0.2405, 0.1330]) -Greedy action tensor([-0.1251, -0.6535, -0.3478, -0.1455]) tensor([0.2968, 0.1750, 0.2375, 0.2908]) -Greedy action tensor([-0.2737, 0.5994, -0.5048, -0.5062]) tensor([0.2008, 0.4807, 0.1594, 0.1591]) -Greedy action tensor([ 0.3615, -0.3671, -0.7484, 1.3221]) tensor([0.2260, 0.1091, 0.0745, 0.5905]) -Greedy action tensor([ 0.4366, 1.0224, -0.3551, 0.2073]) tensor([0.2472, 0.4442, 0.1120, 0.1966]) -Greedy action tensor([-0.1911, -1.0977, 0.3248, -1.0279]) tensor([0.2847, 0.1150, 0.4770, 0.1233]) -Greedy action tensor([-0.3693, -0.5835, -0.5870, -0.7890]) tensor([0.3059, 0.2469, 0.2461, 0.2011]) -Greedy action tensor([-0.0529, -1.5669, 1.0132, -0.7682]) tensor([0.2168, 0.0477, 0.6295, 0.1060]) -Greedy action tensor([ 1.0201, -0.2261, -0.1412, -0.0705]) tensor([0.5163, 0.1485, 0.1617, 0.1735]) -Greedy action tensor([-0.6685, -0.1353, -1.0461, -0.2070]) tensor([0.2009, 0.3425, 0.1377, 0.3188]) -Greedy action tensor([ 0.0578, -0.4940, 0.0697, -0.5026]) tensor([0.3166, 0.1823, 0.3204, 0.1808]) -Greedy action tensor([-0.1410, -0.3351, 0.4094, -0.6625]) tensor([0.2409, 0.1984, 0.4177, 0.1430]) -Greedy action tensor([-1.2055, 0.3862, -1.0178, -0.6413]) tensor([0.1127, 0.5534, 0.1359, 0.1981]) -Greedy action tensor([ 0.6981, -1.3971, 1.1270, 0.1580]) tensor([0.3085, 0.0380, 0.4738, 0.1798]) -Greedy action tensor([-0.6350, 0.3229, 0.3475, -1.2037]) tensor([0.1461, 0.3808, 0.3903, 0.0827]) -Greedy action tensor([-0.7546, -0.2510, -0.0280, -1.0115]) tensor([0.1819, 0.3011, 0.3763, 0.1407]) -Greedy action tensor([-0.1601, -0.0942, 0.2394, -0.5575]) tensor([0.2363, 0.2524, 0.3524, 0.1588]) -Greedy action tensor([-0.3547, 0.8098, 0.3775, -0.9570]) tensor([0.1464, 0.4690, 0.3044, 0.0802]) -Greedy action tensor([-1.1611, -0.4978, 0.2545, -1.3779]) tensor([0.1271, 0.2468, 0.5237, 0.1024]) -Greedy action tensor([ 0.9893, -0.5380, -0.8326, 0.6429]) tensor([0.4794, 0.1041, 0.0775, 0.3390]) -Greedy action tensor([ 0.7038, -1.1139, 0.0063, -0.8739]) tensor([0.5357, 0.0870, 0.2667, 0.1106]) -Greedy action tensor([ 0.5131, -0.3816, 0.6716, 0.3178]) tensor([0.2938, 0.1201, 0.3443, 0.2417]) -Greedy action tensor([-1.2630, -0.0813, 0.1745, -1.1729]) tensor([0.1046, 0.3408, 0.4402, 0.1144]) -Greedy action tensor([ 0.1781, -0.7005, -0.2896, -0.1306]) tensor([0.3602, 0.1496, 0.2257, 0.2645]) -Greedy action tensor([ 0.3586, 0.3387, -0.1842, -0.2052]) tensor([0.3194, 0.3131, 0.1856, 0.1818]) -Greedy action tensor([ 0.3888, 0.0912, 0.5551, -1.1694]) tensor([0.3191, 0.2369, 0.3768, 0.0672]) -Greedy action tensor([-0.2321, -1.9712, 0.1032, 0.1625]) tensor([0.2464, 0.0433, 0.3446, 0.3657]) -Greedy action tensor([ 0.2254, 0.5459, -0.6978, -0.3928]) tensor([0.3018, 0.4158, 0.1199, 0.1626]) -Greedy action tensor([ 0.3889, -0.5638, -0.3590, 0.5448]) tensor([0.3303, 0.1274, 0.1563, 0.3860]) -Greedy action tensor([ 0.7630, -0.4583, 0.0653, -0.1795]) tensor([0.4582, 0.1351, 0.2281, 0.1786]) -Greedy action tensor([ 1.3998, -1.0462, 0.0499, -0.7173]) tensor([0.6820, 0.0591, 0.1768, 0.0821]) -Greedy action tensor([ 0.7400, -0.5269, -0.1064, -0.2748]) tensor([0.4824, 0.1359, 0.2069, 0.1748]) -Greedy action tensor([ 0.8282, -0.6108, -0.0918, -0.4754]) tensor([0.5243, 0.1243, 0.2089, 0.1424]) -Greedy action tensor([ 0.7475, -0.6291, -0.0606, -0.4951]) tensor([0.5033, 0.1271, 0.2243, 0.1453]) -Greedy action tensor([ 0.9147, -0.7940, 0.0921, -0.4800]) tensor([0.5352, 0.0969, 0.2351, 0.1327]) -Greedy action tensor([ 0.5293, -0.2820, 0.0518, -0.4230]) tensor([0.4081, 0.1813, 0.2531, 0.1575]) -Greedy action tensor([ 0.9696, -0.4826, -0.0916, -0.4567]) tensor([0.5494, 0.1286, 0.1901, 0.1320]) -Greedy action tensor([ 0.8109, 0.0261, -0.0176, -0.0185]) tensor([0.4293, 0.1959, 0.1875, 0.1873]) -Greedy action tensor([ 0.8814, -0.3813, -0.1764, -0.4636]) tensor([0.5289, 0.1496, 0.1837, 0.1378]) -Greedy action tensor([ 0.8588, -0.7429, 0.0334, -0.4467]) tensor([0.5234, 0.1055, 0.2293, 0.1419]) -Greedy action tensor([ 0.6279, -0.1771, -0.0977, -0.2725]) tensor([0.4278, 0.1913, 0.2071, 0.1739]) -Greedy action tensor([ 1.0840, -0.5725, 0.1517, -0.6185]) tensor([0.5660, 0.1080, 0.2228, 0.1031]) -Greedy action tensor([ 0.7923, -0.2626, 0.1754, -0.2545]) tensor([0.4466, 0.1555, 0.2410, 0.1568]) -Greedy action tensor([ 0.8929, -0.9667, 0.0491, -0.4431]) tensor([0.5409, 0.0842, 0.2326, 0.1422]) -Greedy action tensor([ 0.5958, -0.3979, -0.1704, -0.0991]) tensor([0.4284, 0.1586, 0.1991, 0.2138]) -Greedy action tensor([0.6487, 0.2240, 0.1496, 0.0291]) tensor([0.3572, 0.2336, 0.2169, 0.1923]) -Greedy action tensor([ 0.8577, -0.4122, -0.0408, -0.5245]) tensor([0.5157, 0.1448, 0.2100, 0.1295]) -Greedy action tensor([ 0.8414, -0.3409, 0.0150, -0.2067]) tensor([0.4774, 0.1463, 0.2089, 0.1674]) -Greedy action tensor([0.2514, 0.0781, 0.0066, 0.1172]) tensor([0.2859, 0.2404, 0.2238, 0.2500]) -Greedy action tensor([ 0.6429, -0.6676, -0.1764, -0.3094]) tensor([0.4770, 0.1287, 0.2102, 0.1841]) -Greedy action tensor([ 0.5765, -0.1840, -0.0622, -0.0921]) tensor([0.3988, 0.1864, 0.2105, 0.2043]) -Greedy action tensor([ 0.9439, -0.6642, -0.0034, -0.4859]) tensor([0.5472, 0.1096, 0.2122, 0.1310]) -Greedy action tensor([ 1.2620, -0.6956, 0.1070, -0.6242]) tensor([0.6219, 0.0878, 0.1960, 0.0943]) -Greedy action tensor([ 1.1172, -0.4683, -0.1308, -0.2839]) tensor([0.5753, 0.1179, 0.1652, 0.1417]) -Greedy action tensor([ 0.8572, -0.2724, 0.0131, -0.5551]) tensor([0.5008, 0.1618, 0.2153, 0.1220]) -Greedy action tensor([ 0.6122, -0.4348, -0.0357, -0.3450]) tensor([0.4429, 0.1554, 0.2317, 0.1700]) -Greedy action tensor([ 0.5111, -0.1366, -0.0101, -0.1596]) tensor([0.3805, 0.1991, 0.2259, 0.1945]) -Greedy action tensor([ 0.5980, -0.4384, -0.2812, -0.3319]) tensor([0.4620, 0.1639, 0.1918, 0.1823]) -Greedy action tensor([ 1.0309, -0.3944, 0.0011, -0.3809]) tensor([0.5431, 0.1306, 0.1939, 0.1324]) -Greedy action tensor([ 0.4361, -0.2666, 0.0721, -0.3084]) tensor([0.3752, 0.1858, 0.2607, 0.1782]) -Greedy action tensor([0.3489, 0.2117, 0.0205, 0.1307]) tensor([0.2945, 0.2567, 0.2121, 0.2367]) -Greedy action tensor([ 1.3036, -0.5552, 0.1958, -0.2676]) tensor([0.5903, 0.0920, 0.1950, 0.1227]) -Greedy action tensor([ 1.1254, -0.3848, 0.0035, -0.2787]) tensor([0.5580, 0.1232, 0.1817, 0.1370]) -Greedy action tensor([ 0.8728, -0.6522, 0.0699, -0.3353]) tensor([0.5090, 0.1108, 0.2281, 0.1521]) -Greedy action tensor([0.7567, 0.1241, 0.1592, 0.0317]) tensor([0.3897, 0.2070, 0.2144, 0.1888]) -Greedy action tensor([ 0.7367, -0.4147, -0.2059, -0.2896]) tensor([0.4844, 0.1532, 0.1888, 0.1736]) -Greedy action tensor([ 1.1108, -0.7527, 0.0065, -0.5254]) tensor([0.5948, 0.0923, 0.1971, 0.1158]) -Greedy action tensor([ 0.7696, -0.3167, 0.0068, -0.2501]) tensor([0.4620, 0.1559, 0.2155, 0.1666]) -Greedy action tensor([ 1.1743, -0.7346, 0.0894, -0.4082]) tensor([0.5911, 0.0876, 0.1998, 0.1215]) -Greedy action tensor([ 1.3844, -0.9186, -0.0814, -0.4243]) tensor([0.6690, 0.0669, 0.1545, 0.1096]) -Greedy action tensor([ 0.7888, -0.2940, 0.0035, -0.1518]) tensor([0.4577, 0.1550, 0.2087, 0.1787]) -Greedy action tensor([ 0.7537, -0.6759, -0.0151, -0.3533]) tensor([0.4918, 0.1177, 0.2280, 0.1625]) -Greedy action tensor([ 0.7788, -0.4084, 0.0412, -0.1641]) tensor([0.4602, 0.1404, 0.2201, 0.1792]) -Greedy action tensor([ 0.9478, -0.5513, -0.0992, -0.3074]) tensor([0.5378, 0.1201, 0.1888, 0.1533]) -Greedy action tensor([ 0.8376, -0.5499, -0.0351, -0.3842]) tensor([0.5096, 0.1273, 0.2129, 0.1502]) -Greedy action tensor([ 0.5183, -0.2473, -0.0647, -0.1349]) tensor([0.3931, 0.1828, 0.2194, 0.2046]) -Greedy action tensor([ 1.2128, -0.7432, 0.0838, -0.5450]) tensor([0.6108, 0.0864, 0.1975, 0.1053]) -Greedy action tensor([ 0.5835, -0.4540, -0.1921, -0.2507]) tensor([0.4446, 0.1575, 0.2047, 0.1931]) -Greedy action tensor([ 0.7129, -0.5387, 0.1474, -0.5628]) tensor([0.4687, 0.1341, 0.2663, 0.1309]) -Greedy action tensor([ 0.7187, -0.3509, -0.0498, -0.1839]) tensor([0.4520, 0.1551, 0.2096, 0.1833]) -Greedy action tensor([ 0.8733, -0.6914, 0.0593, -0.7103]) tensor([0.5384, 0.1126, 0.2385, 0.1105]) -Greedy action tensor([ 0.7811, -0.4620, -0.0035, -0.3707]) tensor([0.4852, 0.1400, 0.2214, 0.1534]) -Greedy action tensor([ 0.7626, 0.1158, -0.0991, 0.0200]) tensor([0.4129, 0.2162, 0.1744, 0.1965]) -Greedy action tensor([ 0.5437, -0.4318, -0.0582, -0.1159]) tensor([0.4095, 0.1544, 0.2243, 0.2117]) -Greedy action tensor([ 0.5875, -0.0899, -0.1273, -0.0093]) tensor([0.3925, 0.1994, 0.1920, 0.2161]) -Greedy action tensor([ 0.6828, -0.2622, -0.1018, -0.3596]) tensor([0.4550, 0.1769, 0.2076, 0.1604]) -Greedy action tensor([ 1.0126, -0.4371, 0.0326, -0.2252]) tensor([0.5263, 0.1235, 0.1975, 0.1526]) -Greedy action tensor([ 0.9742, -0.6343, 0.0193, -0.3601]) tensor([0.5410, 0.1083, 0.2082, 0.1425]) -Greedy action tensor([ 0.7863, -0.5101, -0.1712, -0.3678]) tensor([0.5069, 0.1387, 0.1946, 0.1598]) -Greedy action tensor([ 0.6103, -0.1861, -0.1096, -0.2470]) tensor([0.4234, 0.1909, 0.2061, 0.1796]) -Greedy action tensor([ 0.7992, 0.0755, 0.0156, -0.1460]) tensor([0.4291, 0.2081, 0.1960, 0.1668]) -Greedy action tensor([ 0.5962, -0.5265, 0.0906, -0.2406]) tensor([0.4234, 0.1378, 0.2554, 0.1834]) -Greedy action tensor([ 0.6141, -0.0209, -0.0550, 0.0052]) tensor([0.3867, 0.2049, 0.1981, 0.2103]) -Greedy action tensor([ 0.4459, -0.3352, -0.0323, -0.3222]) tensor([0.3934, 0.1802, 0.2439, 0.1825]) -Greedy action tensor([ 0.9312, -0.7936, -0.0529, -0.3662]) tensor([0.5479, 0.0976, 0.2048, 0.1497]) -Greedy action tensor([ 0.6765, -0.4664, -0.2208, -0.1639]) tensor([0.4634, 0.1478, 0.1889, 0.2000]) -Greedy action tensor([ 1.1489, -0.7511, -0.0209, -0.2663]) tensor([0.5872, 0.0878, 0.1823, 0.1426]) -Greedy action tensor([ 1.2780, -0.6412, -0.0950, -0.6686]) tensor([0.6482, 0.0951, 0.1642, 0.0925]) -Greedy action tensor([ 0.7496, -0.5367, -0.1147, -0.3223]) tensor([0.4902, 0.1354, 0.2065, 0.1678]) -Greedy action tensor([ 1.3228, -0.8404, 0.0479, -0.7166]) tensor([0.6559, 0.0754, 0.1833, 0.0853]) -Greedy action tensor([ 0.7285, -0.2928, -0.0808, -0.2082]) tensor([0.4551, 0.1639, 0.2026, 0.1784]) -Greedy action tensor([ 0.4420, -0.1130, -0.0386, -0.0739]) tensor([0.3585, 0.2058, 0.2217, 0.2140]) -Greedy action tensor([ 0.5485, -0.1813, 0.0701, -0.0533]) tensor([0.3774, 0.1819, 0.2339, 0.2068]) -Greedy action tensor([ 0.8737, -0.8547, -0.0438, -0.3152]) tensor([0.5315, 0.0944, 0.2123, 0.1619]) -Greedy action tensor([ 0.6007, -0.4242, 0.0491, 0.0240]) tensor([0.4005, 0.1437, 0.2307, 0.2250]) -Greedy action tensor([ 0.5901, -0.4492, -0.0495, -0.2938]) tensor([0.4359, 0.1542, 0.2299, 0.1801]) -Greedy action tensor([ 0.7890, -0.5102, 0.1130, -0.6902]) tensor([0.4977, 0.1357, 0.2532, 0.1134]) -Greedy action tensor([ 0.7395, -0.2569, -0.2980, -0.5695]) tensor([0.5016, 0.1852, 0.1777, 0.1355]) -Greedy action tensor([ 0.8429, -0.3700, -0.1049, -0.5525]) tensor([0.5174, 0.1538, 0.2005, 0.1282]) -Greedy action tensor([ 0.4764, -0.1214, 0.1751, 0.0557]) tensor([0.3394, 0.1867, 0.2511, 0.2228]) -Greedy action tensor([ 0.7147, -0.5347, 0.0994, -0.3929]) tensor([0.4635, 0.1329, 0.2505, 0.1531]) -Greedy action tensor([-1.8170, -0.4422, 0.6055, -0.1219]) tensor([0.0461, 0.1824, 0.5201, 0.2513]) -Greedy action tensor([-1.6873, 0.0942, 0.4415, -0.0560]) tensor([0.0489, 0.2903, 0.4109, 0.2499]) -Greedy action tensor([-1.5953, 0.5547, -0.1055, -0.7419]) tensor([0.0611, 0.5245, 0.2710, 0.1434]) -Greedy action tensor([-1.9059, -0.4022, 0.6507, -0.1563]) tensor([0.0414, 0.1863, 0.5340, 0.2383]) -Greedy action tensor([-1.8934, -0.4546, 0.6485, -0.1543]) tensor([0.0424, 0.1785, 0.5380, 0.2411]) -Greedy action tensor([-1.7595, -0.2517, 0.5486, -0.0999]) tensor([0.0480, 0.2168, 0.4827, 0.2524]) -Greedy action tensor([-1.8965, -0.4609, 0.6391, -0.1549]) tensor([0.0425, 0.1786, 0.5365, 0.2425]) -Greedy action tensor([-1.9113, -0.4104, 0.6506, -0.1650]) tensor([0.0414, 0.1855, 0.5360, 0.2371]) -Greedy action tensor([-1.9155, -0.4592, 0.6522, -0.1699]) tensor([0.0416, 0.1783, 0.5419, 0.2382]) -Greedy action tensor([-1.2894, 0.7759, 0.1524, 0.1714]) tensor([0.0574, 0.4526, 0.2427, 0.2473]) -Greedy action tensor([-1.7957, -0.4763, 0.5951, -0.1340]) tensor([0.0478, 0.1787, 0.5218, 0.2517]) -Greedy action tensor([-1.8865, -0.3603, 0.6348, -0.1477]) tensor([0.0421, 0.1938, 0.5243, 0.2397]) -Greedy action tensor([-1.9192, -0.4109, 0.6508, -0.1625]) tensor([0.0410, 0.1854, 0.5360, 0.2376]) -Greedy action tensor([-1.7629, -0.1448, 0.5260, -0.0967]) tensor([0.0472, 0.2379, 0.4653, 0.2496]) -Greedy action tensor([-0.7504, -0.2877, 0.1323, -0.3375]) tensor([0.1534, 0.2437, 0.3709, 0.2319]) -Greedy action tensor([-1.9322, -0.4234, 0.6592, -0.1739]) tensor([0.0405, 0.1833, 0.5410, 0.2352]) -Greedy action tensor([-1.7379, -0.2498, 0.5432, -0.1084]) tensor([0.0492, 0.2180, 0.4817, 0.2511]) -Greedy action tensor([-1.9305, -0.3461, 0.6456, -0.1849]) tensor([0.0404, 0.1970, 0.5311, 0.2315]) -Greedy action tensor([-1.8965, -0.4238, 0.6556, -0.1484]) tensor([0.0418, 0.1822, 0.5361, 0.2399]) -Greedy action tensor([-1.8936, -0.3459, 0.6353, -0.1509]) tensor([0.0417, 0.1962, 0.5235, 0.2385]) -Greedy action tensor([-1.7771, -0.3863, 0.5804, -0.1276]) tensor([0.0481, 0.1933, 0.5082, 0.2504]) -Greedy action tensor([-1.2465, -0.7247, 0.3669, 0.1500]) tensor([0.0851, 0.1435, 0.4274, 0.3440]) -Greedy action tensor([-1.8992, -0.4532, 0.6486, -0.1559]) tensor([0.0421, 0.1788, 0.5383, 0.2408]) -Greedy action tensor([-1.1117, 0.6960, 0.1533, -0.6324]) tensor([0.0816, 0.4975, 0.2891, 0.1318]) -Greedy action tensor([-1.9152, -0.4490, 0.6552, -0.1666]) tensor([0.0414, 0.1794, 0.5412, 0.2380]) -Greedy action tensor([-1.6465, -0.4353, 0.5084, -0.0236]) tensor([0.0554, 0.1860, 0.4779, 0.2807]) -Greedy action tensor([-0.8328, 0.6289, -0.3120, -0.4608]) tensor([0.1184, 0.5106, 0.1993, 0.1717]) -Greedy action tensor([-1.7913, -0.4131, 0.6327, -0.0519]) tensor([0.0456, 0.1807, 0.5143, 0.2594]) -Greedy action tensor([-1.8985, -0.4362, 0.6446, -0.1573]) tensor([0.0421, 0.1818, 0.5358, 0.2403]) -Greedy action tensor([-1.9080, -0.4218, 0.6486, -0.1647]) tensor([0.0416, 0.1840, 0.5365, 0.2379]) -Greedy action tensor([-1.4758, -0.2001, 0.6406, 0.4280]) tensor([0.0510, 0.1828, 0.4237, 0.3425]) -Greedy action tensor([-1.8256, -0.2684, 0.5844, -0.1230]) tensor([0.0447, 0.2122, 0.4978, 0.2454]) -Greedy action tensor([-1.9000, -0.4164, 0.6399, -0.1592]) tensor([0.0420, 0.1853, 0.5330, 0.2397]) -Greedy action tensor([-1.5802, -0.2043, 0.4473, -0.0418]) tensor([0.0581, 0.2300, 0.4413, 0.2706]) -Greedy action tensor([-1.9087, -0.4032, 0.6504, -0.1611]) tensor([0.0414, 0.1864, 0.5347, 0.2375]) -Greedy action tensor([-1.8703, -0.3088, 0.6214, -0.1466]) tensor([0.0426, 0.2032, 0.5151, 0.2390]) -Greedy action tensor([-1.8957, -0.3128, 0.6320, -0.1528]) tensor([0.0415, 0.2020, 0.5195, 0.2370]) -Greedy action tensor([-1.9294, -0.4216, 0.6599, -0.1730]) tensor([0.0406, 0.1834, 0.5409, 0.2352]) -Greedy action tensor([-1.6605, -0.1880, 0.4923, -0.0841]) tensor([0.0532, 0.2318, 0.4578, 0.2572]) -Greedy action tensor([-1.9348, -0.4513, 0.6666, -0.1736]) tensor([0.0405, 0.1784, 0.5456, 0.2355]) -Greedy action tensor([-1.7869, -0.4775, 0.6001, -0.1039]) tensor([0.0477, 0.1767, 0.5190, 0.2567]) -Greedy action tensor([-1.8828, -0.3723, 0.6263, -0.1577]) tensor([0.0427, 0.1933, 0.5246, 0.2395]) -Greedy action tensor([-1.9114, -0.3725, 0.6439, -0.1617]) tensor([0.0412, 0.1919, 0.5301, 0.2369]) -Greedy action tensor([-1.9389, -0.4407, 0.6643, -0.1761]) tensor([0.0403, 0.1803, 0.5444, 0.2349]) -Greedy action tensor([-1.8553, -0.4575, 0.6289, -0.1307]) tensor([0.0442, 0.1787, 0.5295, 0.2477]) -Greedy action tensor([-1.9253, -0.4411, 0.6543, -0.1718]) tensor([0.0410, 0.1809, 0.5411, 0.2369]) -Greedy action tensor([-1.4413, 0.2298, 0.4274, -0.1741]) tensor([0.0612, 0.3253, 0.3964, 0.2172]) -Greedy action tensor([-1.9047, -0.3715, 0.6377, -0.1575]) tensor([0.0415, 0.1924, 0.5278, 0.2383]) -Greedy action tensor([-1.8213, -0.1267, 0.5577, -0.0876]) tensor([0.0437, 0.2378, 0.4713, 0.2472]) -Greedy action tensor([-1.9280, -0.4640, 0.7179, -0.1428]) tensor([0.0394, 0.1703, 0.5554, 0.2349]) -Greedy action tensor([-1.9092, -0.4366, 0.6474, -0.1655]) tensor([0.0417, 0.1819, 0.5378, 0.2386]) -Greedy action tensor([-0.8802, 0.8704, 0.1581, 0.0224]) tensor([0.0830, 0.4779, 0.2344, 0.2047]) -Greedy action tensor([-1.2074, 0.4350, 0.2449, -0.0405]) tensor([0.0733, 0.3785, 0.3130, 0.2353]) -Greedy action tensor([-1.7624, -0.2240, 0.5667, -0.1627]) tensor([0.0479, 0.2231, 0.4918, 0.2372]) -Greedy action tensor([-1.8593, -0.3409, 0.6119, -0.1497]) tensor([0.0436, 0.1991, 0.5162, 0.2410]) -Greedy action tensor([-1.7532, -0.1135, 0.5222, -0.1105]) tensor([0.0475, 0.2448, 0.4622, 0.2455]) -Greedy action tensor([-0.4556, -0.4780, 0.2276, 0.1329]) tensor([0.1736, 0.1698, 0.3438, 0.3128]) -Greedy action tensor([-1.8608, -0.4367, 0.6268, -0.1407]) tensor([0.0439, 0.1824, 0.5284, 0.2453]) -Greedy action tensor([0.3201, 1.0569, 0.0183, 0.6943]) tensor([0.1893, 0.3955, 0.1400, 0.2752]) -Greedy action tensor([-1.7099, -0.1148, 0.5034, -0.1018]) tensor([0.0498, 0.2456, 0.4557, 0.2488]) -Greedy action tensor([-0.2012, -0.4523, -0.0050, 0.6497]) tensor([0.1874, 0.1458, 0.2280, 0.4388]) -Greedy action tensor([-1.4858, 0.2013, 0.3784, -0.0424]) tensor([0.0585, 0.3162, 0.3775, 0.2478]) -Greedy action tensor([-1.9211, -0.4360, 0.6568, -0.1685]) tensor([0.0411, 0.1813, 0.5407, 0.2369]) -Greedy action tensor([-1.0042, -0.6531, 0.2496, 0.1020]) tensor([0.1118, 0.1588, 0.3916, 0.3379]) -Greedy action tensor([-1.4078, -0.5136, 0.4923, -0.1570]) tensor([0.0734, 0.1795, 0.4907, 0.2564]) -Greedy action tensor([-0.5992, -0.5209, 0.1427, 0.3368]) tensor([0.1486, 0.1607, 0.3120, 0.3788]) -Greedy action tensor([-1.9344, -0.4423, 0.6632, -0.1746]) tensor([0.0405, 0.1801, 0.5440, 0.2354]) -Greedy action tensor([-1.9096, -0.3627, 0.6421, -0.1610]) tensor([0.0412, 0.1935, 0.5285, 0.2368]) -Greedy action tensor([-1.8982, -0.4191, 0.6484, -0.1491]) tensor([0.0418, 0.1836, 0.5340, 0.2405]) -Greedy action tensor([-0.9382, 0.8758, 0.1758, 0.0372]) tensor([0.0779, 0.4780, 0.2374, 0.2067]) -Greedy action tensor([-1.5468, -0.0237, 0.4168, 0.0554]) tensor([0.0566, 0.2595, 0.4031, 0.2808]) -Greedy action tensor([-0.8936, -0.0221, 0.2017, -0.1099]) tensor([0.1167, 0.2789, 0.3489, 0.2555]) -Greedy action tensor([-1.4139, 0.0223, 0.1598, -0.5888]) tensor([0.0812, 0.3415, 0.3919, 0.1854]) -Greedy action tensor([-1.4199, 0.1749, 0.3610, -0.0765]) tensor([0.0637, 0.3140, 0.3782, 0.2442]) -Greedy action tensor([-1.8911, -0.4021, 0.6474, -0.1465]) tensor([0.0420, 0.1861, 0.5316, 0.2403]) -Greedy action tensor([-1.8685, -0.2974, 0.6193, -0.1314]) tensor([0.0425, 0.2045, 0.5115, 0.2415]) -Greedy action tensor([-1.6036, -0.0556, 0.4227, -0.0341]) tensor([0.0553, 0.2599, 0.4193, 0.2655]) -Greedy action tensor([-1.0888, 0.0583, 0.3378, 0.1551]) tensor([0.0849, 0.2673, 0.3534, 0.2944]) -Greedy action tensor([-1.7734, 0.8007, 0.5346, 0.2538]) tensor([0.0315, 0.4130, 0.3165, 0.2390]) -Greedy action tensor([-0.8370, 0.9224, 0.0771, 0.2448]) tensor([0.0816, 0.4740, 0.2036, 0.2408]) -Greedy action tensor([-1.5419, 0.1758, 0.3895, -0.0316]) tensor([0.0556, 0.3096, 0.3833, 0.2516]) -Greedy action tensor([ 0.9082, -0.3140, -0.5669, 0.4243]) tensor([0.4674, 0.1377, 0.1069, 0.2881]) -Greedy action tensor([ 1.5887, -0.0182, -0.3931, 0.2110]) tensor([0.6287, 0.1261, 0.0867, 0.1585]) -Greedy action tensor([ 1.2778, -1.4092, 0.0320, -0.3119]) tensor([0.6411, 0.0436, 0.1845, 0.1308]) -Greedy action tensor([ 1.4374, -0.6558, -0.7323, 0.3042]) tensor([0.6412, 0.0791, 0.0732, 0.2065]) -Greedy action tensor([ 1.8849, -0.3833, -0.7165, 1.1942]) tensor([0.5956, 0.0616, 0.0442, 0.2985]) -Greedy action tensor([ 1.7727, -0.3466, -0.3505, 0.8997]) tensor([0.6033, 0.0725, 0.0722, 0.2520]) -Greedy action tensor([ 1.7049, -0.0631, -0.5150, 0.0932]) tensor([0.6762, 0.1154, 0.0735, 0.1349]) -Greedy action tensor([ 1.3432, -0.6223, -0.0629, 0.2994]) tensor([0.5756, 0.0806, 0.1411, 0.2027]) -Greedy action tensor([ 1.4385, -0.5651, -0.3487, 0.1674]) tensor([0.6318, 0.0852, 0.1058, 0.1772]) -Greedy action tensor([ 1.2679, -0.2468, -0.6819, 0.4224]) tensor([0.5582, 0.1227, 0.0794, 0.2396]) -Greedy action tensor([ 1.3081, -0.7732, -0.4209, 0.5201]) tensor([0.5692, 0.0710, 0.1010, 0.2588]) -Greedy action tensor([ 1.4804, -0.4042, -0.6690, 1.0074]) tensor([0.5287, 0.0803, 0.0616, 0.3294]) -Greedy action tensor([ 2.1292, -0.6753, -0.9267, 0.2939]) tensor([0.7892, 0.0478, 0.0372, 0.1259]) -Greedy action tensor([ 1.9703, -0.7263, -0.3543, -0.2379]) tensor([0.7842, 0.0529, 0.0767, 0.0862]) -Greedy action tensor([ 1.3273, -0.5629, -0.2310, 0.3330]) tensor([0.5775, 0.0872, 0.1216, 0.2137]) -Greedy action tensor([ 0.9137, -0.2755, -1.0297, 0.2952]) tensor([0.5034, 0.1533, 0.0721, 0.2712]) -Greedy action tensor([ 1.7648, -1.1019, -0.2655, -0.1365]) tensor([0.7476, 0.0425, 0.0982, 0.1117]) -Greedy action tensor([ 1.2793, -0.3387, -0.4221, -0.0570]) tensor([0.6085, 0.1206, 0.1110, 0.1599]) -Greedy action tensor([ 1.9173, -0.8992, -0.0868, 0.1875]) tensor([0.7289, 0.0436, 0.0982, 0.1293]) -Greedy action tensor([ 1.6366, -0.6731, -0.5458, 0.3958]) tensor([0.6661, 0.0661, 0.0751, 0.1926]) -Greedy action tensor([ 1.4830, -0.2445, -0.3188, 0.4701]) tensor([0.5862, 0.1042, 0.0967, 0.2129]) -Greedy action tensor([ 2.2446, 0.0977, 0.1740, -0.0630]) tensor([0.7449, 0.0870, 0.0939, 0.0741]) -Greedy action tensor([ 1.7185, -0.4692, -0.5380, 0.6578]) tensor([0.6397, 0.0718, 0.0670, 0.2215]) -Greedy action tensor([ 1.7457, -1.1251, -0.3967, 0.7431]) tensor([0.6490, 0.0368, 0.0762, 0.2381]) -Greedy action tensor([ 2.4881, -0.0413, -0.1463, 0.2800]) tensor([0.7928, 0.0632, 0.0569, 0.0871]) -Greedy action tensor([ 1.2651, 0.1562, -0.4730, 0.4957]) tensor([0.5078, 0.1676, 0.0893, 0.2353]) -Greedy action tensor([ 2.0554, -1.1976, 0.0252, 0.8131]) tensor([0.6856, 0.0265, 0.0900, 0.1979]) -Greedy action tensor([ 1.5680, -0.3469, -1.0078, 0.4944]) tensor([0.6389, 0.0941, 0.0486, 0.2184]) -Greedy action tensor([ 1.6546, -0.7993, -0.4435, 0.8488]) tensor([0.6041, 0.0519, 0.0741, 0.2699]) -Greedy action tensor([ 1.1007, 0.0490, -0.8905, 0.2023]) tensor([0.5282, 0.1845, 0.0721, 0.2151]) -Greedy action tensor([ 1.6733, -0.5228, -1.3050, 0.3887]) tensor([0.6950, 0.0773, 0.0354, 0.1924]) -Greedy action tensor([ 1.4120, -0.5863, -0.3317, 0.1617]) tensor([0.6262, 0.0849, 0.1095, 0.1794]) -Greedy action tensor([ 1.2540, -0.5911, -0.4565, 0.2903]) tensor([0.5813, 0.0919, 0.1051, 0.2218]) -Greedy action tensor([ 1.3766, -0.6139, -0.7694, -0.0205]) tensor([0.6663, 0.0910, 0.0779, 0.1648]) -Greedy action tensor([ 1.7386, -0.1747, -1.0302, 0.5424]) tensor([0.6611, 0.0976, 0.0415, 0.1999]) -Greedy action tensor([ 1.5650, -0.3328, -0.4853, 0.2394]) tensor([0.6476, 0.0971, 0.0833, 0.1720]) -Greedy action tensor([ 1.7074, -1.4054, -0.3113, -0.0299]) tensor([0.7389, 0.0329, 0.0982, 0.1300]) -Greedy action tensor([ 1.9695, -1.4523, -0.4390, -0.3279]) tensor([0.8176, 0.0267, 0.0735, 0.0822]) -Greedy action tensor([ 1.2698, -0.2209, -0.6633, 0.0155]) tensor([0.6042, 0.1361, 0.0874, 0.1724]) -Greedy action tensor([ 1.3426, -0.7253, -0.1018, 0.2946]) tensor([0.5838, 0.0738, 0.1377, 0.2047]) -Greedy action tensor([ 2.1394, -1.1693, 0.2141, 0.2388]) tensor([0.7508, 0.0275, 0.1095, 0.1122]) -Greedy action tensor([ 1.5868, -0.6000, -0.1916, 0.2922]) tensor([0.6430, 0.0722, 0.1086, 0.1762]) -Greedy action tensor([ 1.3360, -0.3059, -0.2475, 0.6624]) tensor([0.5239, 0.1014, 0.1075, 0.2671]) -Greedy action tensor([ 0.8472, -0.2767, -0.2257, -0.0266]) tensor([0.4798, 0.1559, 0.1641, 0.2002]) -Greedy action tensor([ 1.3235, -0.4353, -0.8871, 0.0917]) tensor([0.6355, 0.1095, 0.0697, 0.1854]) -Greedy action tensor([ 1.4028, 0.0856, -0.5320, 0.1962]) tensor([0.5843, 0.1565, 0.0844, 0.1748]) -Greedy action tensor([ 0.9753, -0.1759, -0.7328, 0.2064]) tensor([0.5100, 0.1613, 0.0924, 0.2364]) -Greedy action tensor([ 1.5840, -0.1250, -0.6820, 0.4620]) tensor([0.6209, 0.1124, 0.0644, 0.2022]) -Greedy action tensor([ 1.3421, -0.1418, -0.5582, 0.3239]) tensor([0.5755, 0.1305, 0.0861, 0.2079]) -Greedy action tensor([ 2.1766, -0.1172, -1.0779, 0.4144]) tensor([0.7627, 0.0769, 0.0294, 0.1309]) -Greedy action tensor([ 0.9827, -0.4764, -0.5146, 0.2370]) tensor([0.5180, 0.1204, 0.1159, 0.2457]) -Greedy action tensor([ 1.3896, -0.5277, -0.1088, 0.3408]) tensor([0.5811, 0.0854, 0.1299, 0.2036]) -Greedy action tensor([ 1.4309, -1.3162, -0.0730, 0.0606]) tensor([0.6492, 0.0416, 0.1443, 0.1649]) -Greedy action tensor([ 1.2383, -0.2669, -0.5540, 0.2720]) tensor([0.5653, 0.1255, 0.0942, 0.2151]) -Greedy action tensor([ 2.1546, -1.3284, -0.3383, 0.7888]) tensor([0.7307, 0.0224, 0.0604, 0.1865]) -Greedy action tensor([ 1.3110, -0.0602, -1.0167, -0.0464]) tensor([0.6216, 0.1578, 0.0606, 0.1600]) -Greedy action tensor([ 1.2287, -0.1924, -0.4516, -0.0518]) tensor([0.5863, 0.1416, 0.1092, 0.1629]) -Greedy action tensor([ 1.4307, -0.0493, -0.8233, 0.2794]) tensor([0.6065, 0.1381, 0.0637, 0.1918]) -Greedy action tensor([ 1.2649, -0.4814, -0.6285, 0.4643]) tensor([0.5637, 0.0983, 0.0849, 0.2531]) -Greedy action tensor([ 1.4848, -0.6218, -0.4423, -0.0681]) tensor([0.6762, 0.0823, 0.0984, 0.1431]) -Greedy action tensor([ 0.7648, -0.0139, -0.1975, 0.1048]) tensor([0.4241, 0.1947, 0.1620, 0.2192]) -Greedy action tensor([ 1.6558, -0.9390, -0.1332, 0.4377]) tensor([0.6504, 0.0486, 0.1087, 0.1924]) -Greedy action tensor([ 1.1174, -0.3518, -0.3160, 0.7394]) tensor([0.4643, 0.1068, 0.1107, 0.3181]) -Greedy action tensor([ 1.5045, -0.0092, -0.4144, -0.0881]) tensor([0.6368, 0.1402, 0.0935, 0.1295]) -Greedy action tensor([ 2.1228, -1.2615, -0.2247, 0.4071]) tensor([0.7637, 0.0259, 0.0730, 0.1373]) -Greedy action tensor([ 1.0339, -0.5245, -0.0111, 0.4688]) tensor([0.4694, 0.0988, 0.1651, 0.2667]) -Greedy action tensor([ 1.6111, -1.2242, -0.0771, -0.0174]) tensor([0.6946, 0.0408, 0.1284, 0.1363]) -Greedy action tensor([ 1.5860, -0.9849, -0.5701, 0.0298]) tensor([0.7127, 0.0545, 0.0825, 0.1503]) -Greedy action tensor([ 1.0751, -0.2874, -0.8929, 0.5797]) tensor([0.4987, 0.1277, 0.0697, 0.3039]) -Greedy action tensor([ 1.5770, -0.6060, -0.2553, 0.0366]) tensor([0.6725, 0.0758, 0.1076, 0.1441]) -Greedy action tensor([ 1.4954, -0.3671, -0.3335, -0.2765]) tensor([0.6730, 0.1045, 0.1081, 0.1144]) -Greedy action tensor([ 1.6547, -0.4674, -0.8468, 0.2878]) tensor([0.6865, 0.0822, 0.0563, 0.1750]) -Greedy action tensor([ 1.2005, -0.6493, -0.0165, 0.3707]) tensor([0.5292, 0.0832, 0.1567, 0.2308]) -Greedy action tensor([ 1.4343, 0.0504, -0.5628, 0.5105]) tensor([0.5607, 0.1405, 0.0761, 0.2226]) -Greedy action tensor([ 1.4177, -0.4765, -0.8706, 0.2192]) tensor([0.6437, 0.0968, 0.0653, 0.1942]) -Greedy action tensor([ 1.5139, -1.0468, -0.4796, 0.4569]) tensor([0.6406, 0.0495, 0.0873, 0.2226]) -Greedy action tensor([ 1.4701, -0.8806, -0.4013, 0.5937]) tensor([0.6004, 0.0572, 0.0924, 0.2499]) -Greedy action tensor([ 1.2805, -0.6361, -0.5080, 0.2750]) tensor([0.5952, 0.0876, 0.0995, 0.2178]) -Greedy action tensor([ 1.4785, -0.5887, -0.3378, 0.4381]) tensor([0.6088, 0.0770, 0.0990, 0.2151]) -Greedy action tensor([ 2.0167, 0.5173, -0.0902, 0.1519]) tensor([0.6668, 0.1489, 0.0811, 0.1033]) -Greedy action tensor([ 1.1598, -0.5993, 0.0344, 0.0384]) tensor([0.5487, 0.0945, 0.1781, 0.1788]) -Greedy action tensor([ 1.3037, -0.4937, -0.2632, 1.2169]) tensor([0.4365, 0.0723, 0.0911, 0.4001]) -Greedy action tensor([ 0.3505, -1.0791, 0.9105, -0.4585]) tensor([0.2911, 0.0697, 0.5096, 0.1296]) -Greedy action tensor([-1.8032, -0.7248, -0.8548, -0.0760]) tensor([0.0823, 0.2420, 0.2125, 0.4631]) -Greedy action tensor([ 0.0882, -0.3655, 0.3761, -0.5131]) tensor([0.2843, 0.1806, 0.3792, 0.1558]) -Greedy action tensor([ 0.4673, -0.6900, 0.2181, -0.5604]) tensor([0.4079, 0.1282, 0.3179, 0.1460]) -Greedy action tensor([ 0.3601, -1.5155, 0.0041, -0.2086]) tensor([0.4132, 0.0633, 0.2895, 0.2340]) -Greedy action tensor([ 0.7898, 0.5612, -0.2383, -0.0650]) tensor([0.3878, 0.3085, 0.1387, 0.1650]) -Greedy action tensor([ 0.7067, -0.5804, -0.2344, 0.0345]) tensor([0.4594, 0.1268, 0.1793, 0.2346]) -Greedy action tensor([ 0.1459, -0.4120, -0.1515, -0.1118]) tensor([0.3238, 0.1854, 0.2405, 0.2503]) -Greedy action tensor([-0.7509, -0.2222, 1.0734, -0.9394]) tensor([0.1028, 0.1745, 0.6375, 0.0852]) -Greedy action tensor([ 1.3831, -1.1178, 0.8827, 1.2873]) tensor([0.3851, 0.0316, 0.2335, 0.3499]) -Greedy action tensor([ 1.4060, -1.1775, 0.5783, 0.3083]) tensor([0.5416, 0.0409, 0.2367, 0.1807]) -Greedy action tensor([-0.4970, 1.0232, -0.4401, -0.5490]) tensor([0.1319, 0.6032, 0.1396, 0.1252]) -Greedy action tensor([ 0.6139, -0.9499, -0.4490, -0.1861]) tensor([0.4990, 0.1045, 0.1724, 0.2242]) -Greedy action tensor([ 0.6489, -0.9753, -0.1662, -0.8987]) tensor([0.5398, 0.1064, 0.2389, 0.1149]) -Greedy action tensor([-0.8528, 0.0866, -0.3747, -1.4467]) tensor([0.1747, 0.4470, 0.2818, 0.0965]) -Greedy action tensor([-0.0431, 0.3684, -0.6765, -0.7550]) tensor([0.2832, 0.4274, 0.1503, 0.1390]) -Greedy action tensor([-0.7044, -1.9921, 0.5564, -0.8185]) tensor([0.1755, 0.0484, 0.6194, 0.1566]) -Greedy action tensor([-1.0496, -0.5126, 0.0956, -1.9907]) tensor([0.1601, 0.2740, 0.5034, 0.0625]) -Greedy action tensor([-0.7524, -1.0138, 0.0857, -0.9770]) tensor([0.2049, 0.1578, 0.4737, 0.1637]) -Greedy action tensor([-0.2349, -0.4261, -1.2185, 0.0852]) tensor([0.2795, 0.2309, 0.1045, 0.3850]) -Greedy action tensor([-0.1745, -0.6910, -0.4884, 0.0693]) tensor([0.2775, 0.1656, 0.2028, 0.3541]) -Greedy action tensor([ 0.0299, -0.3920, -0.0372, -0.5920]) tensor([0.3197, 0.2097, 0.2990, 0.1717]) -Greedy action tensor([-0.8505, -0.9239, -0.7519, -0.6176]) tensor([0.2328, 0.2163, 0.2570, 0.2939]) -Greedy action tensor([-0.6895, -0.7018, 0.0491, -0.3170]) tensor([0.1808, 0.1786, 0.3783, 0.2623]) -Greedy action tensor([-0.7007, 0.2372, 0.1533, -1.1091]) tensor([0.1522, 0.3889, 0.3576, 0.1012]) -Greedy action tensor([ 0.4179, -0.4849, 0.2763, 0.0717]) tensor([0.3355, 0.1360, 0.2912, 0.2373]) -Greedy action tensor([-0.6734, -0.9742, 0.5580, -0.0606]) tensor([0.1426, 0.1056, 0.4886, 0.2632]) -Greedy action tensor([-0.0585, -0.2330, -1.0760, 0.3273]) tensor([0.2723, 0.2287, 0.0984, 0.4005]) -Greedy action tensor([ 0.4162, -0.4430, -0.8108, 0.1344]) tensor([0.4047, 0.1714, 0.1186, 0.3053]) -Greedy action tensor([-0.8678, -0.9418, 0.9946, -1.4180]) tensor([0.1118, 0.1038, 0.7199, 0.0645]) -Greedy action tensor([-0.4183, -1.1385, 0.5134, -1.1533]) tensor([0.2220, 0.1080, 0.5636, 0.1064]) -Greedy action tensor([ 0.3750, -2.2364, 0.2940, 0.6103]) tensor([0.3067, 0.0225, 0.2828, 0.3880]) -Greedy action tensor([-0.6924, -0.4998, -0.4704, -0.2536]) tensor([0.1995, 0.2419, 0.2491, 0.3095]) -Greedy action tensor([-1.1319, 0.1615, 0.3824, -0.4080]) tensor([0.0889, 0.3239, 0.4040, 0.1833]) -Greedy action tensor([ 0.8493, -1.0680, -0.0714, -0.1613]) tensor([0.5238, 0.0770, 0.2086, 0.1906]) -Greedy action tensor([-0.9237, -0.8411, 0.3532, 0.4041]) tensor([0.1059, 0.1150, 0.3796, 0.3995]) -Greedy action tensor([ 1.1122, -0.3943, -0.0493, 1.4830]) tensor([0.3352, 0.0743, 0.1049, 0.4856]) -Greedy action tensor([-0.2127, 0.8580, -0.7295, -0.1566]) tensor([0.1795, 0.5236, 0.1070, 0.1898]) -Greedy action tensor([-0.0786, -1.1302, -0.5131, 0.2192]) tensor([0.2991, 0.1045, 0.1937, 0.4028]) -Greedy action tensor([ 0.1187, -0.4278, 0.5527, -0.1029]) tensor([0.2549, 0.1476, 0.3934, 0.2042]) -Greedy action tensor([ 0.0209, 0.1621, 0.6736, -1.1296]) tensor([0.2279, 0.2624, 0.4376, 0.0721]) -Greedy action tensor([-0.1579, -0.8651, -0.4418, -0.2083]) tensor([0.3128, 0.1542, 0.2355, 0.2975]) -Greedy action tensor([-0.2068, -0.7997, 0.2666, 0.3011]) tensor([0.2075, 0.1147, 0.3331, 0.3448]) -Greedy action tensor([-0.0737, -0.1501, 0.0540, 0.4319]) tensor([0.2118, 0.1963, 0.2407, 0.3512]) -Greedy action tensor([ 0.1204, -0.0013, -0.1493, -0.5941]) tensor([0.3186, 0.2821, 0.2433, 0.1559]) -Greedy action tensor([ 0.1628, 0.0718, 0.0998, -0.6751]) tensor([0.3045, 0.2780, 0.2859, 0.1317]) -Greedy action tensor([ 0.5396, -1.0087, -0.0675, 0.5753]) tensor([0.3579, 0.0761, 0.1950, 0.3709]) -Greedy action tensor([-0.2153, -0.1733, -0.0356, -0.6071]) tensor([0.2554, 0.2664, 0.3057, 0.1726]) -Greedy action tensor([-0.2562, -0.5051, 0.4270, -0.6239]) tensor([0.2246, 0.1751, 0.4448, 0.1555]) -Greedy action tensor([ 0.1657, -0.4188, -0.4600, -1.0448]) tensor([0.4184, 0.2332, 0.2238, 0.1247]) -Greedy action tensor([-0.0812, -0.8485, -0.4198, -0.2630]) tensor([0.3321, 0.1542, 0.2367, 0.2769]) -Greedy action tensor([ 0.0671, -1.4745, -0.7987, -1.2495]) tensor([0.5255, 0.1125, 0.2211, 0.1409]) -Greedy action tensor([ 0.5420, 0.2664, -0.4066, -0.2093]) tensor([0.3819, 0.2900, 0.1479, 0.1802]) -Greedy action tensor([ 0.4612, 0.7867, 0.2225, -0.4306]) tensor([0.2792, 0.3865, 0.2199, 0.1144]) -Greedy action tensor([-0.0916, -0.2671, -1.3200, -0.0695]) tensor([0.3170, 0.2660, 0.0928, 0.3241]) -Greedy action tensor([-0.2717, -0.8396, -0.6383, 0.4984]) tensor([0.2262, 0.1282, 0.1568, 0.4887]) -Greedy action tensor([-0.1505, -1.5458, 0.1154, -1.1980]) tensor([0.3444, 0.0853, 0.4494, 0.1208]) -Greedy action tensor([ 0.2813, -0.7647, 0.7367, 0.5866]) tensor([0.2334, 0.0820, 0.3680, 0.3167]) -Greedy action tensor([-0.7953, -1.3959, 0.7396, -0.2845]) tensor([0.1273, 0.0698, 0.5907, 0.2121]) -Greedy action tensor([-1.6469, -1.1547, 0.8475, -1.6493]) tensor([0.0635, 0.1039, 0.7693, 0.0633]) -Greedy action tensor([ 0.0862, 0.6381, 0.1055, -0.6516]) tensor([0.2362, 0.4101, 0.2408, 0.1129]) -Greedy action tensor([ 0.2468, -0.0575, 0.1899, -0.1301]) tensor([0.2969, 0.2190, 0.2805, 0.2037]) -Greedy action tensor([ 1.1079, -1.4601, 0.0485, 0.5758]) tensor([0.4973, 0.0381, 0.1724, 0.2921]) -Greedy action tensor([ 0.5916, -0.7661, -0.4294, 0.8816]) tensor([0.3385, 0.0871, 0.1220, 0.4524]) -Greedy action tensor([-0.3052, -0.3542, -0.3940, -0.0321]) tensor([0.2392, 0.2277, 0.2188, 0.3143]) -Greedy action tensor([-1.3139, -0.4624, 0.7985, -1.3534]) tensor([0.0795, 0.1864, 0.6576, 0.0765]) -Greedy action tensor([ 0.3116, 0.5289, -0.1124, -0.2324]) tensor([0.2876, 0.3573, 0.1882, 0.1669]) -Greedy action tensor([-0.9120, -0.8700, 0.3432, -1.1374]) tensor([0.1575, 0.1642, 0.5526, 0.1257]) -Greedy action tensor([ 0.4268, -1.1979, 0.6718, 0.0688]) tensor([0.3151, 0.0621, 0.4026, 0.2203]) -Greedy action tensor([ 0.8373, -1.6197, -0.5294, -0.0893]) tensor([0.5758, 0.0493, 0.1468, 0.2280]) -Greedy action tensor([ 0.6590, 0.0582, -0.3529, 0.4201]) tensor([0.3705, 0.2031, 0.1347, 0.2917]) -Greedy action tensor([-1.7641, -0.9569, 0.5665, -1.1252]) tensor([0.0649, 0.1454, 0.6669, 0.1229]) -Greedy action tensor([-0.2797, -0.7886, 0.7083, -0.6139]) tensor([0.1999, 0.1202, 0.5369, 0.1431]) -Greedy action tensor([ 0.8000, -0.4495, 0.4677, 1.0781]) tensor([0.3008, 0.0862, 0.2158, 0.3972]) -Greedy action tensor([ 0.1134, -0.6921, -0.5386, 0.7720]) tensor([0.2564, 0.1146, 0.1336, 0.4954]) -Greedy action tensor([-0.7267, -0.4723, -1.3484, 0.0089]) tensor([0.2035, 0.2625, 0.1093, 0.4247]) -Greedy action tensor([ 0.3676, -0.5107, -0.6361, 0.0374]) tensor([0.3999, 0.1662, 0.1466, 0.2874]) -Greedy action tensor([ 0.6303, -1.1423, -0.3200, -0.2282]) tensor([0.5049, 0.0858, 0.1952, 0.2140]) -Greedy action tensor([-0.3647, -0.7050, -0.3757, -0.4323]) tensor([0.2751, 0.1957, 0.2721, 0.2571]) -Greedy action tensor([1.0049, 0.1879, 0.2682, 0.6577]) tensor([0.3806, 0.1682, 0.1822, 0.2690]) -Greedy action tensor([ 0.8261, -0.5832, -0.0790, -0.5179]) tensor([0.5237, 0.1279, 0.2118, 0.1366]) -Greedy action tensor([ 0.6985, 0.0631, -0.0092, 0.0565]) tensor([0.3924, 0.2078, 0.1933, 0.2065]) -Greedy action tensor([ 0.9110, -0.6853, 0.0055, -0.2732]) tensor([0.5227, 0.1059, 0.2114, 0.1600]) -Greedy action tensor([ 0.6803, -0.3891, -0.1596, -0.1524]) tensor([0.4525, 0.1553, 0.1954, 0.1968]) -Greedy action tensor([ 0.4908, -0.0509, -0.0988, -0.1327]) tensor([0.3742, 0.2177, 0.2075, 0.2006]) -Greedy action tensor([ 0.8202, -0.7649, -0.1222, -0.3509]) tensor([0.5250, 0.1076, 0.2046, 0.1628]) -Greedy action tensor([ 0.9923, -0.5807, 0.3344, -0.4048]) tensor([0.5069, 0.1051, 0.2626, 0.1254]) -Greedy action tensor([ 0.7457, -0.7098, 0.0525, -0.2873]) tensor([0.4787, 0.1117, 0.2393, 0.1704]) -Greedy action tensor([ 0.3945, 0.1485, 0.0157, -0.0147]) tensor([0.3194, 0.2498, 0.2187, 0.2121]) -Greedy action tensor([ 0.5198, 0.1516, -0.1918, 0.0068]) tensor([0.3595, 0.2488, 0.1765, 0.2152]) -Greedy action tensor([ 0.7275, -0.5449, -0.0211, -0.2860]) tensor([0.4726, 0.1324, 0.2235, 0.1715]) -Greedy action tensor([ 1.1734, -0.6610, -0.0156, -0.7913]) tensor([0.6233, 0.0995, 0.1898, 0.0874]) -Greedy action tensor([ 0.6620, -0.3200, -0.1853, -0.8026]) tensor([0.4916, 0.1841, 0.2107, 0.1136]) -Greedy action tensor([ 0.7494, -0.5937, 0.0240, -0.4222]) tensor([0.4866, 0.1270, 0.2356, 0.1508]) -Greedy action tensor([ 0.6784, -0.0362, 0.0789, 0.0419]) tensor([0.3895, 0.1906, 0.2139, 0.2061]) -Greedy action tensor([ 0.8642, 0.2246, -0.1134, -0.3968]) tensor([0.4572, 0.2412, 0.1720, 0.1296]) -Greedy action tensor([ 0.8426, -0.0863, 0.0847, -0.5338]) tensor([0.4726, 0.1867, 0.2215, 0.1193]) -Greedy action tensor([ 0.7046, -0.6261, -0.1335, -0.2654]) tensor([0.4817, 0.1273, 0.2084, 0.1826]) -Greedy action tensor([ 0.5790, -0.2059, -0.1370, -0.0152]) tensor([0.4005, 0.1827, 0.1957, 0.2211]) -Greedy action tensor([ 1.2445, -0.6506, -0.1249, -0.5351]) tensor([0.6356, 0.0955, 0.1616, 0.1072]) -Greedy action tensor([ 0.9289, -0.2317, -0.0545, -0.2634]) tensor([0.5023, 0.1574, 0.1879, 0.1525]) -Greedy action tensor([ 0.6706, -0.4381, 0.1160, -0.1332]) tensor([0.4252, 0.1403, 0.2442, 0.1903]) -Greedy action tensor([ 0.6640, 0.2549, -0.2750, 0.1487]) tensor([0.3770, 0.2504, 0.1474, 0.2252]) -Greedy action tensor([ 0.7132, -0.2526, -0.2473, -0.1327]) tensor([0.4561, 0.1736, 0.1745, 0.1957]) -Greedy action tensor([ 0.9944, -0.7888, 0.0452, -0.3979]) tensor([0.5544, 0.0932, 0.2146, 0.1378]) -Greedy action tensor([ 1.1522, -0.5244, -0.1279, -0.4133]) tensor([0.5974, 0.1117, 0.1661, 0.1248]) -Greedy action tensor([ 0.7449, -0.8640, -0.1845, -0.1195]) tensor([0.4960, 0.0992, 0.1958, 0.2090]) -Greedy action tensor([ 0.9013, -0.7978, 0.2275, -0.3700]) tensor([0.5068, 0.0927, 0.2584, 0.1422]) -Greedy action tensor([ 0.4950, -0.3073, 0.1272, -0.3488]) tensor([0.3890, 0.1744, 0.2693, 0.1673]) -Greedy action tensor([ 0.7287, -0.2673, -0.0159, -0.2622]) tensor([0.4514, 0.1667, 0.2144, 0.1676]) -Greedy action tensor([ 0.9716, -0.4315, -0.0437, -0.3491]) tensor([0.5333, 0.1311, 0.1932, 0.1424]) -Greedy action tensor([ 0.6727, -0.6567, 0.0252, -0.2273]) tensor([0.4557, 0.1206, 0.2385, 0.1853]) -Greedy action tensor([ 0.8496, -0.3807, -0.1690, -0.2885]) tensor([0.5067, 0.1480, 0.1830, 0.1623]) -Greedy action tensor([ 1.1198, -0.9090, 0.2332, -0.5188]) tensor([0.5754, 0.0757, 0.2371, 0.1118]) -Greedy action tensor([ 1.0000, -0.5755, 0.0891, -0.5620]) tensor([0.5498, 0.1138, 0.2211, 0.1153]) -Greedy action tensor([ 0.7739, -0.4355, -0.0193, -0.2759]) tensor([0.4760, 0.1420, 0.2154, 0.1666]) -Greedy action tensor([ 0.8059, -0.6319, -0.3034, -0.8445]) tensor([0.5684, 0.1350, 0.1875, 0.1091]) -Greedy action tensor([ 0.5728, -0.1002, 0.0675, -0.2990]) tensor([0.3950, 0.2015, 0.2383, 0.1652]) -Greedy action tensor([ 0.8138, -0.3922, -0.0048, -0.2595]) tensor([0.4802, 0.1438, 0.2118, 0.1642]) -Greedy action tensor([ 0.9980, -0.5407, -0.0705, -0.4074]) tensor([0.5545, 0.1190, 0.1905, 0.1360]) -Greedy action tensor([ 0.7727, -0.5411, -0.0618, -0.4825]) tensor([0.5030, 0.1352, 0.2184, 0.1434]) -Greedy action tensor([ 0.6485, 0.0271, -0.0007, -0.0073]) tensor([0.3878, 0.2083, 0.2026, 0.2013]) -Greedy action tensor([ 0.6275, -0.2623, -0.0320, -0.0343]) tensor([0.4092, 0.1681, 0.2116, 0.2111]) -Greedy action tensor([ 0.3333, 0.1004, -0.0255, 0.1332]) tensor([0.3022, 0.2394, 0.2111, 0.2474]) -Greedy action tensor([ 1.2796, -0.9576, 0.1282, -0.5662]) tensor([0.6326, 0.0675, 0.2000, 0.0999]) -Greedy action tensor([ 0.9188, 0.0235, -0.0280, 0.0232]) tensor([0.4536, 0.1853, 0.1760, 0.1852]) -Greedy action tensor([ 0.6784, -0.1217, -0.1200, -0.0475]) tensor([0.4196, 0.1885, 0.1888, 0.2030]) -Greedy action tensor([ 0.6216, -0.3222, 0.0079, -0.2696]) tensor([0.4272, 0.1662, 0.2313, 0.1752]) -Greedy action tensor([ 0.4626, -0.0546, -0.0630, -0.0458]) tensor([0.3586, 0.2138, 0.2120, 0.2157]) -Greedy action tensor([ 0.8188, -0.2807, -0.2105, -0.3511]) tensor([0.4998, 0.1665, 0.1786, 0.1551]) -Greedy action tensor([ 0.7547, -0.7754, 0.0504, -0.3208]) tensor([0.4873, 0.1055, 0.2410, 0.1662]) -Greedy action tensor([ 0.8273, -0.2362, 0.0077, -0.1468]) tensor([0.4622, 0.1596, 0.2037, 0.1745]) -Greedy action tensor([ 0.6996, 0.4260, -0.1824, -0.4863]) tensor([0.4032, 0.3067, 0.1669, 0.1232]) -Greedy action tensor([ 0.8892, -0.9182, -0.0801, -0.3300]) tensor([0.5438, 0.0892, 0.2063, 0.1607]) -Greedy action tensor([ 0.4960, 0.1504, -0.2220, -0.1480]) tensor([0.3675, 0.2601, 0.1793, 0.1930]) -Greedy action tensor([ 0.5756, -0.1761, -0.0337, -0.0314]) tensor([0.3906, 0.1842, 0.2124, 0.2129]) -Greedy action tensor([ 0.9987, -0.6738, 0.0109, -0.8502]) tensor([0.5822, 0.1093, 0.2168, 0.0916]) -Greedy action tensor([ 0.6479, -0.3438, -0.0216, -0.0917]) tensor([0.4237, 0.1572, 0.2169, 0.2022]) -Greedy action tensor([ 0.7884, -0.6703, 0.0854, -0.6443]) tensor([0.5086, 0.1183, 0.2518, 0.1214]) -Greedy action tensor([ 1.0608, -0.8271, 0.0734, -0.4508]) tensor([0.5732, 0.0868, 0.2136, 0.1264]) -Greedy action tensor([ 1.2127, -1.0989, 0.1223, -0.4727]) tensor([0.6171, 0.0612, 0.2074, 0.1144]) -Greedy action tensor([ 0.8255, -0.2782, 0.0067, -0.4575]) tensor([0.4878, 0.1618, 0.2151, 0.1352]) -Greedy action tensor([ 0.6621, -0.2698, -0.0327, -0.2380]) tensor([0.4349, 0.1712, 0.2171, 0.1768]) -Greedy action tensor([ 0.9252, -0.2055, -0.0383, -0.1984]) tensor([0.4928, 0.1591, 0.1880, 0.1602]) -Greedy action tensor([ 0.9451, -0.2944, 0.1417, -0.2829]) tensor([0.4926, 0.1426, 0.2206, 0.1443]) -Greedy action tensor([ 0.6184, -0.0721, -0.0962, 0.0349]) tensor([0.3924, 0.1967, 0.1920, 0.2189]) -Greedy action tensor([ 0.5310, -0.2206, -0.1148, -0.0676]) tensor([0.3929, 0.1853, 0.2060, 0.2159]) -Greedy action tensor([ 1.0997, -0.7792, -0.0719, -0.6671]) tensor([0.6122, 0.0935, 0.1897, 0.1046]) -Greedy action tensor([ 0.6209, 0.1716, -0.1349, 0.0738]) tensor([0.3722, 0.2375, 0.1748, 0.2154]) -Greedy action tensor([ 1.2071, -0.6452, 0.1629, -0.3742]) tensor([0.5832, 0.0915, 0.2053, 0.1200]) -Greedy action tensor([0.5886, 0.0445, 0.0019, 0.0176]) tensor([0.3702, 0.2148, 0.2059, 0.2091]) -Greedy action tensor([ 0.4596, 0.0883, -0.1592, -0.6128]) tensor([0.3890, 0.2684, 0.2095, 0.1331]) -Greedy action tensor([ 7.4589e-01, -5.2214e-01, -5.4917e-04, -3.0713e-01]) tensor([0.4752, 0.1337, 0.2253, 0.1658]) -Greedy action tensor([ 0.6171, -0.0787, -0.0417, -0.0079]) tensor([0.3919, 0.1955, 0.2028, 0.2098]) -Greedy action tensor([ 0.6284, -0.3767, -0.1176, -0.1576]) tensor([0.4356, 0.1594, 0.2066, 0.1985]) -Greedy action tensor([ 0.6730, 0.2031, -0.0185, 0.0992]) tensor([0.3719, 0.2324, 0.1862, 0.2095]) -Greedy action tensor([ 0.8791, -0.3455, -0.0424, -0.2385]) tensor([0.4953, 0.1456, 0.1971, 0.1620]) -Greedy action tensor([ 6.7446e-01, -7.3637e-01, -5.1376e-04, -2.0194e-01]) tensor([0.4610, 0.1124, 0.2347, 0.1919]) -Greedy action tensor([ 0.6452, -0.3846, -0.1630, -0.1654]) tensor([0.4450, 0.1589, 0.1983, 0.1978]) -Greedy action tensor([0.5736, 0.0064, 0.0125, 0.0170]) tensor([0.3689, 0.2092, 0.2105, 0.2114]) -Greedy action tensor([ 0.8233, -0.2133, 0.0334, -0.5369]) tensor([0.4842, 0.1717, 0.2198, 0.1243]) -Greedy action tensor([ 1.9264, -1.0759, -0.2676, 0.5521]) tensor([0.7071, 0.0351, 0.0788, 0.1789]) -Greedy action tensor([ 1.1029, -0.4597, -0.1587, -0.2642]) tensor([0.5722, 0.1199, 0.1620, 0.1458]) -Greedy action tensor([ 2.2001, 0.4923, -0.1416, 0.6524]) tensor([0.6711, 0.1216, 0.0645, 0.1428]) -Greedy action tensor([ 1.2327, -0.0777, -0.7635, 0.0778]) tensor([0.5812, 0.1568, 0.0790, 0.1831]) -Greedy action tensor([ 1.3137, -0.8073, -0.1179, 0.7390]) tensor([0.5204, 0.0624, 0.1243, 0.2929]) -Greedy action tensor([ 1.9457, -1.3997, -0.1032, 0.6498]) tensor([0.6955, 0.0245, 0.0896, 0.1903]) -Greedy action tensor([ 1.4041, -0.6624, 0.0881, 0.4148]) tensor([0.5661, 0.0717, 0.1518, 0.2105]) -Greedy action tensor([ 1.5515, -0.3086, -0.6872, 0.3130]) tensor([0.6443, 0.1003, 0.0687, 0.1867]) -Greedy action tensor([ 1.8501, -0.8253, -0.6239, 0.3888]) tensor([0.7220, 0.0497, 0.0608, 0.1675]) -Greedy action tensor([ 1.3416, -0.4105, -0.5596, 0.1831]) tensor([0.6110, 0.1059, 0.0913, 0.1918]) -Greedy action tensor([ 1.2850, -0.7214, -0.3769, -0.1377]) tensor([0.6389, 0.0859, 0.1212, 0.1540]) -Greedy action tensor([ 1.0720, -0.2650, -0.2117, 0.2651]) tensor([0.5035, 0.1323, 0.1395, 0.2247]) -Greedy action tensor([ 1.2931, -0.2001, -0.6132, 0.4206]) tensor([0.5583, 0.1254, 0.0830, 0.2333]) -Greedy action tensor([ 1.1359, -0.2614, -0.1299, 0.2043]) tensor([0.5200, 0.1286, 0.1466, 0.2048]) -Greedy action tensor([ 1.6195, -0.5877, -0.5845, 0.3643]) tensor([0.6643, 0.0731, 0.0733, 0.1893]) -Greedy action tensor([ 1.8975, -0.7464, -0.7335, 0.0954]) tensor([0.7645, 0.0543, 0.0551, 0.1261]) -Greedy action tensor([ 1.1411, -0.4649, -0.7727, 0.1124]) tensor([0.5863, 0.1177, 0.0865, 0.2096]) -Greedy action tensor([ 1.2656, -0.4855, -0.3405, 0.4115]) tensor([0.5556, 0.0964, 0.1115, 0.2365]) -Greedy action tensor([ 1.2736, 0.0069, -1.0767, -0.0233]) tensor([0.6059, 0.1707, 0.0578, 0.1656]) -Greedy action tensor([ 1.6425, -0.5892, -0.2794, 0.5941]) tensor([0.6234, 0.0669, 0.0912, 0.2185]) -Greedy action tensor([ 1.3246, -0.5424, -0.6589, 0.2614]) tensor([0.6107, 0.0944, 0.0840, 0.2109]) -Greedy action tensor([ 1.6001, -0.7858, -0.4817, 0.4702]) tensor([0.6495, 0.0598, 0.0810, 0.2098]) -Greedy action tensor([ 2.1922, -1.0000, -0.0143, 0.3133]) tensor([0.7669, 0.0315, 0.0844, 0.1172]) -Greedy action tensor([ 1.8996, -1.2059, -0.3886, 0.8058]) tensor([0.6751, 0.0302, 0.0685, 0.2261]) -Greedy action tensor([ 1.3232, -0.3704, -0.2453, 0.5270]) tensor([0.5425, 0.0997, 0.1130, 0.2447]) -Greedy action tensor([ 1.2261, -0.0350, -0.7103, 0.0611]) tensor([0.5749, 0.1629, 0.0829, 0.1793]) -Greedy action tensor([ 1.2581, -0.6335, -0.3928, 0.3280]) tensor([0.5756, 0.0868, 0.1104, 0.2271]) -Greedy action tensor([ 1.2528, -0.2549, -0.4659, 0.0998]) tensor([0.5826, 0.1290, 0.1045, 0.1839]) -Greedy action tensor([ 1.9565, -0.9007, -0.1850, 0.4417]) tensor([0.7170, 0.0412, 0.0842, 0.1576]) -Greedy action tensor([ 1.8364, -0.7388, -0.7160, 0.0117]) tensor([0.7603, 0.0579, 0.0592, 0.1226]) -Greedy action tensor([ 1.6605, -0.4464, -1.0835, 0.4680]) tensor([0.6714, 0.0817, 0.0432, 0.2038]) -Greedy action tensor([ 2.0631, -1.4557, -0.0088, 0.3962]) tensor([0.7438, 0.0220, 0.0937, 0.1405]) -Greedy action tensor([ 2.7527, 0.5986, -0.0250, 0.3194]) tensor([0.7899, 0.0916, 0.0491, 0.0693]) -Greedy action tensor([ 1.5808, -0.9807, -0.5255, 0.0993]) tensor([0.7012, 0.0541, 0.0853, 0.1594]) -Greedy action tensor([ 1.2040, -0.2164, -0.4759, -0.1268]) tensor([0.5909, 0.1428, 0.1101, 0.1562]) -Greedy action tensor([ 1.7044, -0.8212, -0.4042, 0.5331]) tensor([0.6616, 0.0529, 0.0803, 0.2051]) -Greedy action tensor([ 1.2070, -0.4108, -0.6068, 0.3052]) tensor([0.5659, 0.1122, 0.0923, 0.2297]) -Greedy action tensor([ 1.2580, -0.0621, -0.4473, 0.0365]) tensor([0.5735, 0.1532, 0.1042, 0.1691]) -Greedy action tensor([ 2.2518, -1.1887, -0.2041, 0.8895]) tensor([0.7279, 0.0233, 0.0624, 0.1864]) -Greedy action tensor([ 0.8569, -0.0211, 0.0029, 0.1384]) tensor([0.4294, 0.1785, 0.1828, 0.2093]) -Greedy action tensor([ 1.6011, -0.2282, -1.6659, 0.1993]) tensor([0.6921, 0.1111, 0.0264, 0.1704]) -Greedy action tensor([ 1.3420, -0.1082, -0.7149, 0.3294]) tensor([0.5795, 0.1359, 0.0741, 0.2105]) -Greedy action tensor([ 2.2324, 0.8962, -0.1886, 0.1198]) tensor([0.6791, 0.1785, 0.0603, 0.0821]) -Greedy action tensor([ 1.2887, -0.0504, -0.4641, 0.4920]) tensor([0.5302, 0.1389, 0.0919, 0.2390]) -Greedy action tensor([ 0.9427, -0.3393, -0.0449, 0.2919]) tensor([0.4605, 0.1278, 0.1715, 0.2402]) -Greedy action tensor([ 2.0206, -0.8491, -0.5089, 0.5734]) tensor([0.7291, 0.0413, 0.0581, 0.1715]) -Greedy action tensor([ 1.2024, -0.1820, -0.6249, -0.1261]) tensor([0.5966, 0.1494, 0.0960, 0.1580]) -Greedy action tensor([ 1.4105, -0.5055, -0.5552, 0.5162]) tensor([0.5896, 0.0868, 0.0826, 0.2411]) -Greedy action tensor([ 1.6689, -0.1794, -0.7218, 0.4838]) tensor([0.6432, 0.1013, 0.0589, 0.1966]) -Greedy action tensor([ 2.0767, -0.5922, -0.2176, 0.6730]) tensor([0.7063, 0.0490, 0.0712, 0.1735]) -Greedy action tensor([ 1.6722, -0.5934, -0.2657, 0.2379]) tensor([0.6729, 0.0698, 0.0969, 0.1603]) -Greedy action tensor([ 1.3161, -0.2497, -0.3826, 0.3515]) tensor([0.5640, 0.1178, 0.1032, 0.2150]) -Greedy action tensor([ 1.8615, -0.8468, -0.1095, 0.6778]) tensor([0.6613, 0.0441, 0.0921, 0.2025]) -Greedy action tensor([ 1.1923, 0.4154, -0.4653, 0.0926]) tensor([0.5042, 0.2318, 0.0961, 0.1679]) -Greedy action tensor([ 1.8784, -0.6229, -0.0758, 0.6476]) tensor([0.6598, 0.0541, 0.0935, 0.1927]) -Greedy action tensor([ 1.0575, 0.0699, -1.1755, 0.5140]) tensor([0.4853, 0.1808, 0.0520, 0.2819]) -Greedy action tensor([ 1.2316, -0.0841, -0.6413, 0.3384]) tensor([0.5461, 0.1465, 0.0839, 0.2235]) -Greedy action tensor([ 1.2666, -0.4590, -0.4509, 0.2713]) tensor([0.5790, 0.1031, 0.1039, 0.2140]) -Greedy action tensor([ 0.9744, 0.0287, -1.0171, 0.3121]) tensor([0.4901, 0.1903, 0.0669, 0.2527]) -Greedy action tensor([ 1.8443, -1.1365, 0.1699, 0.8035]) tensor([0.6284, 0.0319, 0.1178, 0.2219]) -Greedy action tensor([ 1.2459, -0.6219, -0.4046, 0.1985]) tensor([0.5892, 0.0910, 0.1131, 0.2067]) -Greedy action tensor([ 1.7087, -0.7656, -0.4922, 0.0429]) tensor([0.7226, 0.0609, 0.0800, 0.1366]) -Greedy action tensor([ 1.3757, -0.1674, -0.4979, 0.5639]) tensor([0.5521, 0.1180, 0.0848, 0.2452]) -Greedy action tensor([ 1.4987, -0.5360, -0.5216, 0.1691]) tensor([0.6545, 0.0856, 0.0868, 0.1732]) -Greedy action tensor([ 1.1041, 0.0445, -1.1276, 0.1450]) tensor([0.5443, 0.1887, 0.0584, 0.2086]) -Greedy action tensor([ 1.8069, -0.1773, -0.4570, 0.5001]) tensor([0.6613, 0.0909, 0.0687, 0.1790]) -Greedy action tensor([ 1.8346, -0.7537, -0.3838, 0.7962]) tensor([0.6502, 0.0489, 0.0707, 0.2302]) -Greedy action tensor([ 1.7630, -0.0043, -0.8392, 0.3422]) tensor([0.6728, 0.1149, 0.0499, 0.1625]) -Greedy action tensor([ 2.1742, -0.8782, 0.3251, 0.9447]) tensor([0.6680, 0.0316, 0.1051, 0.1953]) -Greedy action tensor([ 1.3263, -0.4258, -0.8559, 0.1529]) tensor([0.6267, 0.1087, 0.0707, 0.1939]) -Greedy action tensor([ 1.9758, -0.5074, -0.2871, 0.0824]) tensor([0.7473, 0.0624, 0.0778, 0.1125]) -Greedy action tensor([ 1.2190, -0.6369, -0.3817, 0.7877]) tensor([0.4981, 0.0779, 0.1005, 0.3236]) -Greedy action tensor([ 1.3065, 0.0529, -0.6626, -0.0942]) tensor([0.5983, 0.1708, 0.0835, 0.1474]) -Greedy action tensor([ 1.1960, -0.3376, -0.8677, -0.2446]) tensor([0.6331, 0.1366, 0.0804, 0.1499]) -Greedy action tensor([ 1.1980, -0.4848, -0.5452, 0.5107]) tensor([0.5366, 0.0997, 0.0939, 0.2698]) -Greedy action tensor([ 1.2062, -0.0732, -0.6626, 0.0260]) tensor([0.5748, 0.1599, 0.0887, 0.1766]) -Greedy action tensor([ 1.3406, -0.2964, -0.6330, 0.3622]) tensor([0.5850, 0.1138, 0.0813, 0.2199]) -Greedy action tensor([ 2.0925, -0.9580, -0.3541, 0.6729]) tensor([0.7269, 0.0344, 0.0629, 0.1758]) -Greedy action tensor([ 1.2093, -0.2554, -0.5843, 0.1494]) tensor([0.5734, 0.1325, 0.0954, 0.1987]) -Greedy action tensor([ 1.7698, -0.0185, -0.3349, 0.4389]) tensor([0.6438, 0.1077, 0.0785, 0.1701]) -Greedy action tensor([ 1.6520, -0.5516, -0.6992, -0.0736]) tensor([0.7227, 0.0798, 0.0688, 0.1287]) -Greedy action tensor([-1.8219, -0.3529, 0.6335, -0.2372]) tensor([0.0457, 0.1986, 0.5326, 0.2230]) -Greedy action tensor([-0.8460, 0.1739, 0.4528, 0.4833]) tensor([0.0892, 0.2472, 0.3268, 0.3369]) -Greedy action tensor([-1.7932, -0.3321, 0.5562, -0.0728]) tensor([0.0468, 0.2017, 0.4902, 0.2614]) -Greedy action tensor([-1.8669, -0.4253, 0.6251, -0.1383]) tensor([0.0436, 0.1842, 0.5267, 0.2455]) -Greedy action tensor([-0.8215, 0.9385, 0.1411, -0.2634]) tensor([0.0895, 0.5200, 0.2342, 0.1563]) -Greedy action tensor([-1.0096, -0.6076, 0.2435, 0.2268]) tensor([0.1059, 0.1584, 0.3709, 0.3648]) -Greedy action tensor([-0.9490, -0.4434, 0.2818, -0.0967]) tensor([0.1187, 0.1968, 0.4063, 0.2783]) -Greedy action tensor([-1.8300, -0.4048, 0.6059, -0.1356]) tensor([0.0454, 0.1888, 0.5187, 0.2471]) -Greedy action tensor([-1.8817, -0.4484, 0.6407, -0.1515]) tensor([0.0429, 0.1800, 0.5349, 0.2422]) -Greedy action tensor([-0.6249, -0.2592, 0.1736, 0.1772]) tensor([0.1451, 0.2091, 0.3223, 0.3235]) -Greedy action tensor([-1.8843, -0.5362, 0.8061, -0.1238]) tensor([0.0394, 0.1516, 0.5801, 0.2289]) -Greedy action tensor([-1.8802, -0.4564, 0.6426, -0.1456]) tensor([0.0429, 0.1784, 0.5353, 0.2434]) -Greedy action tensor([-1.8950, -0.3337, 0.6255, -0.1429]) tensor([0.0417, 0.1988, 0.5188, 0.2406]) -Greedy action tensor([-1.1357, -0.2370, 0.3948, -0.1641]) tensor([0.0933, 0.2292, 0.4311, 0.2465]) -Greedy action tensor([-1.7408, 0.1076, 0.4717, -0.0818]) tensor([0.0460, 0.2920, 0.4203, 0.2417]) -Greedy action tensor([-0.5372, 0.9154, 0.0521, -0.0490]) tensor([0.1149, 0.4909, 0.2071, 0.1872]) -Greedy action tensor([-1.9155, -0.4566, 0.6593, -0.1653]) tensor([0.0413, 0.1778, 0.5428, 0.2380]) -Greedy action tensor([-0.8114, 0.8074, 0.1166, 0.0250]) tensor([0.0919, 0.4637, 0.2324, 0.2120]) -Greedy action tensor([-1.8953, -0.4480, 0.6496, -0.1543]) tensor([0.0422, 0.1794, 0.5377, 0.2407]) -Greedy action tensor([ 0.4526, -0.0442, 0.6910, 1.5347]) tensor([0.1716, 0.1044, 0.2178, 0.5063]) -Greedy action tensor([-1.6035, -0.2245, 0.4660, -0.0562]) tensor([0.0568, 0.2257, 0.4503, 0.2671]) -Greedy action tensor([-0.8983, 0.6545, 0.0958, 0.0293]) tensor([0.0913, 0.4313, 0.2467, 0.2308]) -Greedy action tensor([-1.9186, -0.4209, 0.6568, -0.1680]) tensor([0.0410, 0.1835, 0.5391, 0.2363]) -Greedy action tensor([-1.4580, 0.5778, 0.3630, -0.1522]) tensor([0.0540, 0.4134, 0.3335, 0.1992]) -Greedy action tensor([-1.9112, -0.4363, 0.6525, -0.1617]) tensor([0.0415, 0.1813, 0.5386, 0.2386]) -Greedy action tensor([-1.7201, -0.3179, 0.5356, -0.0858]) tensor([0.0507, 0.2060, 0.4836, 0.2598]) -Greedy action tensor([-1.9247, -0.4176, 0.6540, -0.1700]) tensor([0.0409, 0.1844, 0.5385, 0.2362]) -Greedy action tensor([-1.8733, -0.4443, 0.6340, -0.1425]) tensor([0.0433, 0.1808, 0.5314, 0.2445]) -Greedy action tensor([-1.8053, -0.2945, 0.6042, -0.0577]) tensor([0.0446, 0.2023, 0.4968, 0.2563]) -Greedy action tensor([-1.8550, -0.4356, 0.6183, -0.1429]) tensor([0.0444, 0.1835, 0.5263, 0.2459]) -Greedy action tensor([-1.6899, -0.2890, 0.5227, -0.0960]) tensor([0.0523, 0.2123, 0.4780, 0.2575]) -Greedy action tensor([-1.8897, -0.3733, 0.6349, -0.1540]) tensor([0.0422, 0.1921, 0.5265, 0.2392]) -Greedy action tensor([-1.9260, -0.4282, 0.6582, -0.1711]) tensor([0.0408, 0.1825, 0.5408, 0.2360]) -Greedy action tensor([-1.8869, -0.4725, 0.6303, -0.1508]) tensor([0.0431, 0.1775, 0.5346, 0.2448]) -Greedy action tensor([-1.7824, -0.3511, 0.5728, -0.1147]) tensor([0.0476, 0.1990, 0.5013, 0.2521]) -Greedy action tensor([-1.7921, -0.4651, 0.7246, -0.0469]) tensor([0.0437, 0.1647, 0.5413, 0.2503]) -Greedy action tensor([-1.9094, -0.3982, 0.6488, -0.1631]) tensor([0.0414, 0.1875, 0.5341, 0.2371]) -Greedy action tensor([-1.9018, -0.2940, 0.6208, -0.1609]) tensor([0.0414, 0.2066, 0.5159, 0.2361]) -Greedy action tensor([-1.6540, -0.0534, 0.4597, -0.0316]) tensor([0.0518, 0.2568, 0.4290, 0.2624]) -Greedy action tensor([-1.9438, -0.4503, 0.6678, -0.1800]) tensor([0.0401, 0.1788, 0.5468, 0.2343]) -Greedy action tensor([-1.9177, -0.4382, 0.6561, -0.1650]) tensor([0.0412, 0.1809, 0.5403, 0.2377]) -Greedy action tensor([0.7227, 0.0661, 0.7405, 1.6232]) tensor([0.2001, 0.1038, 0.2037, 0.4924]) -Greedy action tensor([-1.8790, -0.3769, 0.6342, -0.1558]) tensor([0.0427, 0.1916, 0.5267, 0.2390]) -Greedy action tensor([-1.9018, -0.4314, 0.6515, -0.1573]) tensor([0.0418, 0.1819, 0.5371, 0.2392]) -Greedy action tensor([-1.8648, -0.4288, 0.6268, -0.1394]) tensor([0.0437, 0.1836, 0.5275, 0.2452]) -Greedy action tensor([-1.6771, -0.1149, 0.5125, -0.0569]) tensor([0.0506, 0.2414, 0.4521, 0.2558]) -Greedy action tensor([-1.7889, -0.4364, 0.5937, -0.0943]) tensor([0.0473, 0.1829, 0.5123, 0.2575]) -Greedy action tensor([-0.5889, -0.3306, 0.6658, 0.9430]) tensor([0.0959, 0.1242, 0.3363, 0.4437]) -Greedy action tensor([-1.8375, -0.3848, 0.6094, -0.1441]) tensor([0.0449, 0.1920, 0.5188, 0.2442]) -Greedy action tensor([-1.0252, -0.4960, 0.3962, -0.0068]) tensor([0.1041, 0.1767, 0.4312, 0.2881]) -Greedy action tensor([-1.2605, 0.0361, -0.8323, -0.5965]) tensor([0.1229, 0.4496, 0.1887, 0.2388]) -Greedy action tensor([-1.8345, -0.2470, 0.5939, -0.1722]) tensor([0.0444, 0.2174, 0.5040, 0.2342]) -Greedy action tensor([-1.9171, -0.4314, 0.6514, -0.1664]) tensor([0.0413, 0.1824, 0.5386, 0.2377]) -Greedy action tensor([-1.8011, -0.2778, 0.2049, -0.3666]) tensor([0.0581, 0.2664, 0.4317, 0.2438]) -Greedy action tensor([-1.9262, -0.4447, 0.6565, -0.1718]) tensor([0.0410, 0.1802, 0.5421, 0.2368]) -Greedy action tensor([-1.8887, -0.3108, 0.6242, -0.1345]) tensor([0.0417, 0.2022, 0.5150, 0.2411]) -Greedy action tensor([-1.7623, 0.1490, 0.0800, -0.2702]) tensor([0.0540, 0.3651, 0.3408, 0.2401]) -Greedy action tensor([-1.7928, -0.4654, 0.5929, -0.1258]) tensor([0.0478, 0.1801, 0.5191, 0.2530]) -Greedy action tensor([-1.5513, -0.1227, 0.4630, -0.1078]) tensor([0.0592, 0.2469, 0.4434, 0.2506]) -Greedy action tensor([-1.3230, 0.2682, 0.2828, 0.0047]) tensor([0.0682, 0.3348, 0.3397, 0.2573]) -Greedy action tensor([-1.9079, -0.4645, 0.6521, -0.1636]) tensor([0.0419, 0.1772, 0.5414, 0.2395]) -Greedy action tensor([-1.5461, -0.4938, 0.4564, 0.0259]) tensor([0.0622, 0.1780, 0.4604, 0.2994]) -Greedy action tensor([-1.8352, -0.1124, 0.5736, -0.1317]) tensor([0.0431, 0.2412, 0.4791, 0.2366]) -Greedy action tensor([-1.9411, -0.4487, 0.6679, -0.1772]) tensor([0.0402, 0.1788, 0.5463, 0.2346]) -Greedy action tensor([-1.8970, -0.3283, 0.6342, -0.1530]) tensor([0.0415, 0.1993, 0.5217, 0.2375]) -Greedy action tensor([-1.9219, -0.4408, 0.6553, -0.1683]) tensor([0.0411, 0.1807, 0.5408, 0.2373]) -Greedy action tensor([-1.6701, -0.2964, 0.5241, -0.0969]) tensor([0.0533, 0.2107, 0.4787, 0.2572]) -Greedy action tensor([-1.8893, -0.4501, 0.6425, -0.1563]) tensor([0.0426, 0.1798, 0.5363, 0.2413]) -Greedy action tensor([ 0.4163, 1.1556, -0.0971, 0.3830]) tensor([0.2146, 0.4494, 0.1284, 0.2076]) -Greedy action tensor([-0.7347, -0.5525, 0.3326, 0.3908]) tensor([0.1221, 0.1465, 0.3550, 0.3763]) -Greedy action tensor([-0.6505, 0.7526, 0.2277, 0.4525]) tensor([0.0954, 0.3879, 0.2295, 0.2873]) -Greedy action tensor([-0.2351, 1.1265, 0.0045, 0.4045]) tensor([0.1239, 0.4836, 0.1575, 0.2349]) -Greedy action tensor([-1.9060, -0.4542, 0.6766, -0.1572]) tensor([0.0412, 0.1761, 0.5456, 0.2370]) -Greedy action tensor([-1.8730, -0.3520, 0.6238, -0.1475]) tensor([0.0429, 0.1961, 0.5204, 0.2406]) -Greedy action tensor([-1.8357, -0.4255, 0.6083, -0.1305]) tensor([0.0452, 0.1852, 0.5208, 0.2488]) -Greedy action tensor([-1.9309, -0.4252, 0.6538, -0.1754]) tensor([0.0407, 0.1836, 0.5400, 0.2357]) -Greedy action tensor([-1.9042, -0.4425, 0.6508, -0.1575]) tensor([0.0418, 0.1803, 0.5381, 0.2398]) -Greedy action tensor([-1.9329, -0.4325, 0.6576, -0.1749]) tensor([0.0406, 0.1821, 0.5417, 0.2356]) -Greedy action tensor([-0.3570, 1.0549, 0.0505, 0.2065]) tensor([0.1196, 0.4907, 0.1797, 0.2101]) -Greedy action tensor([-1.7523, -0.4329, 0.5908, -0.0233]) tensor([0.0481, 0.1800, 0.5009, 0.2711]) -Greedy action tensor([-1.1993, 0.0357, 0.2896, -0.0265]) tensor([0.0826, 0.2841, 0.3663, 0.2670]) -Greedy action tensor([ 0.8684, -0.4510, 0.0306, -0.3619]) tensor([0.5020, 0.1342, 0.2172, 0.1467]) -Greedy action tensor([ 1.1163, -0.7200, 0.0225, -0.7528]) tensor([0.6066, 0.0967, 0.2032, 0.0936]) -Greedy action tensor([ 0.7310, -0.1250, 0.1163, -0.1393]) tensor([0.4194, 0.1782, 0.2268, 0.1756]) -Greedy action tensor([ 0.8892, -0.5164, -0.1722, -0.4009]) tensor([0.5358, 0.1314, 0.1854, 0.1475]) -Greedy action tensor([ 0.6631, -0.4968, -0.1107, -0.2966]) tensor([0.4634, 0.1453, 0.2138, 0.1775]) -Greedy action tensor([ 0.5000, -0.4460, -0.1515, -0.1357]) tensor([0.4100, 0.1592, 0.2137, 0.2171]) -Greedy action tensor([ 0.6749, -0.4178, -0.0817, -0.3890]) tensor([0.4652, 0.1560, 0.2183, 0.1605]) -Greedy action tensor([ 1.2710, -0.4449, -0.0236, -0.3664]) tensor([0.6067, 0.1091, 0.1662, 0.1180]) -Greedy action tensor([ 1.0304, -0.6307, -0.0937, -0.1662]) tensor([0.5503, 0.1045, 0.1788, 0.1663]) -Greedy action tensor([ 0.6959, -0.4776, -0.0719, -0.2039]) tensor([0.4587, 0.1419, 0.2129, 0.1865]) -Greedy action tensor([ 0.6453, -0.4401, 0.0180, -0.2433]) tensor([0.4380, 0.1479, 0.2339, 0.1801]) -Greedy action tensor([ 0.5909, -0.2199, 0.0067, -0.2593]) tensor([0.4116, 0.1830, 0.2295, 0.1759]) -Greedy action tensor([ 1.1053, -0.0195, -0.0445, -0.1011]) tensor([0.5153, 0.1673, 0.1632, 0.1542]) -Greedy action tensor([ 0.7937, -0.3635, -0.0279, -0.3964]) tensor([0.4859, 0.1527, 0.2136, 0.1478]) -Greedy action tensor([ 0.6026, -0.5286, 0.0166, -0.2094]) tensor([0.4305, 0.1389, 0.2396, 0.1911]) -Greedy action tensor([ 0.4837, 0.2534, 0.0614, -0.0315]) tensor([0.3282, 0.2607, 0.2151, 0.1960]) -Greedy action tensor([ 0.9157, -0.5204, 0.0509, -0.2500]) tensor([0.5074, 0.1207, 0.2137, 0.1582]) -Greedy action tensor([ 0.5366, -0.1659, -0.1342, -0.3353]) tensor([0.4124, 0.2043, 0.2109, 0.1724]) -Greedy action tensor([ 1.1293, -0.5186, -0.1095, -0.4425]) tensor([0.5918, 0.1139, 0.1714, 0.1229]) -Greedy action tensor([ 1.0244, -0.4305, -0.0245, -0.3637]) tensor([0.5455, 0.1273, 0.1911, 0.1361]) -Greedy action tensor([ 0.6899, -0.6787, 0.0468, -0.2942]) tensor([0.4643, 0.1181, 0.2440, 0.1735]) -Greedy action tensor([ 0.9129, -0.5945, 0.0365, -0.2544]) tensor([0.5131, 0.1136, 0.2136, 0.1597]) -Greedy action tensor([ 0.9743, -0.4449, -0.0987, -0.5298]) tensor([0.5537, 0.1339, 0.1893, 0.1230]) -Greedy action tensor([ 0.9339, -0.4804, -0.0451, -0.2174]) tensor([0.5168, 0.1256, 0.1942, 0.1634]) -Greedy action tensor([ 0.1696, 0.1302, -0.2200, -0.2513]) tensor([0.3035, 0.2918, 0.2056, 0.1992]) -Greedy action tensor([ 0.4934, 0.2061, -0.0210, 0.0459]) tensor([0.3347, 0.2512, 0.2001, 0.2140]) -Greedy action tensor([ 0.9670, -0.3891, -0.1222, -0.2990]) tensor([0.5330, 0.1373, 0.1794, 0.1503]) -Greedy action tensor([ 0.6361, -0.2596, -0.0872, -0.1408]) tensor([0.4249, 0.1735, 0.2061, 0.1954]) -Greedy action tensor([ 0.4956, -0.4365, -0.0605, -0.0888]) tensor([0.3961, 0.1560, 0.2271, 0.2208]) -Greedy action tensor([ 0.9526, -0.6691, 0.1048, -0.5030]) tensor([0.5379, 0.1063, 0.2304, 0.1255]) -Greedy action tensor([ 0.6491, -0.2840, -0.0307, -0.0385]) tensor([0.4162, 0.1637, 0.2109, 0.2093]) -Greedy action tensor([ 0.4359, 0.0323, -0.0996, -0.0161]) tensor([0.3461, 0.2311, 0.2026, 0.2202]) -Greedy action tensor([ 0.7219, 0.0121, -0.0139, 0.0547]) tensor([0.4026, 0.1980, 0.1929, 0.2066]) -Greedy action tensor([ 0.6671, -0.4747, -0.0113, -0.4122]) tensor([0.4616, 0.1474, 0.2342, 0.1569]) -Greedy action tensor([ 0.5199, 0.1607, -0.2126, -0.0881]) tensor([0.3672, 0.2564, 0.1765, 0.1999]) -Greedy action tensor([ 1.1612, -0.6726, -0.0262, -0.4551]) tensor([0.6012, 0.0961, 0.1834, 0.1194]) -Greedy action tensor([ 0.7243, -0.2754, 0.0723, -0.1798]) tensor([0.4359, 0.1604, 0.2271, 0.1765]) -Greedy action tensor([ 0.7567, -0.1982, -0.0991, -0.0971]) tensor([0.4473, 0.1722, 0.1901, 0.1905]) -Greedy action tensor([ 0.7534, -0.9333, 0.1342, -0.7893]) tensor([0.5162, 0.0956, 0.2779, 0.1104]) -Greedy action tensor([ 0.7217, -0.7051, 0.0358, -0.5948]) tensor([0.4971, 0.1193, 0.2503, 0.1333]) -Greedy action tensor([ 0.2340, -0.1272, -0.0743, -0.1838]) tensor([0.3236, 0.2255, 0.2378, 0.2131]) -Greedy action tensor([ 0.6576, -0.3416, 0.0883, -0.3839]) tensor([0.4372, 0.1610, 0.2475, 0.1543]) -Greedy action tensor([ 1.1087, -0.6164, -0.0338, -0.4622]) tensor([0.5865, 0.1045, 0.1871, 0.1219]) -Greedy action tensor([ 0.5139, -0.1248, -0.0819, -0.1764]) tensor([0.3875, 0.2046, 0.2136, 0.1943]) -Greedy action tensor([ 0.9401, -0.5833, 0.0436, -0.2484]) tensor([0.5180, 0.1129, 0.2113, 0.1578]) -Greedy action tensor([ 0.8472, -0.6195, -0.0322, -0.4339]) tensor([0.5199, 0.1199, 0.2158, 0.1444]) -Greedy action tensor([ 0.8722, -0.3909, -0.0909, -0.1213]) tensor([0.4915, 0.1390, 0.1876, 0.1820]) -Greedy action tensor([ 1.1480, -0.7005, -0.1968, -0.6047]) tensor([0.6284, 0.0990, 0.1638, 0.1089]) -Greedy action tensor([ 0.7487, -0.2045, 0.1083, -0.1868]) tensor([0.4338, 0.1672, 0.2287, 0.1702]) -Greedy action tensor([ 0.5630, -0.1480, -0.0729, -0.0869]) tensor([0.3933, 0.1932, 0.2082, 0.2053]) -Greedy action tensor([ 0.4705, -0.1504, -0.1176, -0.1032]) tensor([0.3765, 0.2023, 0.2091, 0.2121]) -Greedy action tensor([ 0.7806, -0.4785, 0.0186, -0.3030]) tensor([0.4787, 0.1359, 0.2234, 0.1620]) -Greedy action tensor([ 1.0731, -0.5974, -0.2297, -0.4540]) tensor([0.5963, 0.1122, 0.1620, 0.1295]) -Greedy action tensor([ 0.6485, -0.3702, -0.0551, -0.3337]) tensor([0.4483, 0.1619, 0.2219, 0.1679]) -Greedy action tensor([ 1.2348, -0.8512, 0.1213, -0.5766]) tensor([0.6188, 0.0768, 0.2032, 0.1011]) -Greedy action tensor([ 1.0871, -0.8281, -0.1817, -0.5808]) tensor([0.6184, 0.0911, 0.1739, 0.1167]) -Greedy action tensor([ 0.4814, -0.2360, -0.0206, -0.1653]) tensor([0.3821, 0.1865, 0.2313, 0.2001]) -Greedy action tensor([ 1.3112, -0.6468, -0.0311, -0.5676]) tensor([0.6430, 0.0908, 0.1680, 0.0982]) -Greedy action tensor([ 1.3796, -0.8124, -0.0623, -0.7072]) tensor([0.6792, 0.0759, 0.1606, 0.0843]) -Greedy action tensor([ 1.1784, -0.7769, -0.0065, -0.6712]) tensor([0.6232, 0.0882, 0.1906, 0.0980]) -Greedy action tensor([ 0.8451, -0.4544, -0.1335, -0.3729]) tensor([0.5143, 0.1402, 0.1933, 0.1521]) -Greedy action tensor([ 0.6392, -0.5155, -0.0649, -0.2431]) tensor([0.4497, 0.1417, 0.2224, 0.1861]) -Greedy action tensor([ 0.7976, -0.5613, 0.0183, -0.4207]) tensor([0.4972, 0.1277, 0.2281, 0.1470]) -Greedy action tensor([ 1.0578, -0.8960, 0.1145, -0.5099]) tensor([0.5748, 0.0815, 0.2238, 0.1199]) -Greedy action tensor([ 1.2510, -0.5279, -0.2031, -0.3222]) tensor([0.6212, 0.1049, 0.1451, 0.1288]) -Greedy action tensor([ 0.8447, -0.6544, 0.1177, -0.5261]) tensor([0.5101, 0.1139, 0.2465, 0.1295]) -Greedy action tensor([ 0.8361, -0.1902, -0.0141, -0.3117]) tensor([0.4755, 0.1704, 0.2032, 0.1509]) -Greedy action tensor([ 1.0153, -0.7156, 0.0387, -0.6677]) tensor([0.5749, 0.1018, 0.2165, 0.1068]) -Greedy action tensor([ 0.9745, -0.6695, 0.1089, -0.2006]) tensor([0.5201, 0.1005, 0.2188, 0.1606]) -Greedy action tensor([ 0.9477, -0.5273, 0.0535, -0.2230]) tensor([0.5134, 0.1175, 0.2099, 0.1592]) -Greedy action tensor([ 0.9492, -0.3865, 0.0344, -0.2201]) tensor([0.5065, 0.1332, 0.2029, 0.1573]) -Greedy action tensor([ 0.9522, -0.6699, 0.0011, -0.7080]) tensor([0.5637, 0.1113, 0.2178, 0.1072]) -Greedy action tensor([ 0.1730, 0.1278, -0.0821, -0.1439]) tensor([0.2891, 0.2763, 0.2240, 0.2106]) -Greedy action tensor([ 0.7986, -0.6307, -0.0922, -0.2978]) tensor([0.5041, 0.1207, 0.2068, 0.1684]) -Greedy action tensor([ 0.7309, -0.2425, -0.0738, -0.4028]) tensor([0.4658, 0.1760, 0.2083, 0.1499]) -Greedy action tensor([ 1.0106, -0.3932, -0.0937, -0.1434]) tensor([0.5284, 0.1298, 0.1751, 0.1666]) -Greedy action tensor([ 0.9063, -0.6414, -0.0374, -0.4560]) tensor([0.5382, 0.1145, 0.2095, 0.1378]) -Greedy action tensor([ 1.2313, -0.9759, 0.0131, -0.4342]) tensor([0.6270, 0.0690, 0.1855, 0.1186]) -Greedy action tensor([ 0.9530, -0.4939, 0.0131, -0.5475]) tensor([0.5408, 0.1273, 0.2113, 0.1206]) -Greedy action tensor([ 0.8157, -0.5654, -0.0895, -0.3906]) tensor([0.5115, 0.1285, 0.2069, 0.1531]) -Greedy action tensor([ 0.9515, -0.5402, -0.0993, -0.3002]) tensor([0.5375, 0.1209, 0.1879, 0.1537]) -Greedy action tensor([-0.4823, -1.0456, 0.9109, -0.6154]) tensor([0.1545, 0.0880, 0.6223, 0.1352]) -Greedy action tensor([-0.2016, -1.7180, -0.7854, -0.0110]) tensor([0.3347, 0.0735, 0.1867, 0.4051]) -Greedy action tensor([-0.3763, -0.1358, -0.1197, -0.5272]) tensor([0.2260, 0.2875, 0.2921, 0.1944]) -Greedy action tensor([ 0.2029, -1.4561, -0.2063, -0.1090]) tensor([0.3866, 0.0736, 0.2568, 0.2830]) -Greedy action tensor([ 0.9036, -0.5939, -0.9266, -0.2601]) tensor([0.5895, 0.1319, 0.0945, 0.1841]) -Greedy action tensor([-0.2578, -0.4496, 0.3602, -0.5774]) tensor([0.2269, 0.1873, 0.4210, 0.1648]) -Greedy action tensor([ 0.3092, 0.1755, -0.7893, 0.2506]) tensor([0.3173, 0.2776, 0.1058, 0.2993]) -Greedy action tensor([-0.9344, 0.1048, 0.8007, 0.2972]) tensor([0.0774, 0.2188, 0.4387, 0.2652]) -Greedy action tensor([-0.3002, 0.4277, 0.8546, 0.2068]) tensor([0.1265, 0.2620, 0.4015, 0.2100]) -Greedy action tensor([-0.0501, 0.6587, 0.0964, -0.5851]) tensor([0.2094, 0.4255, 0.2425, 0.1227]) -Greedy action tensor([-0.6063, -0.4678, -0.6284, -1.6991]) tensor([0.2889, 0.3318, 0.2825, 0.0968]) -Greedy action tensor([ 0.9802, 0.1430, -0.3561, 0.6019]) tensor([0.4200, 0.1818, 0.1104, 0.2877]) -Greedy action tensor([ 0.4505, -1.2911, 0.8140, 0.8162]) tensor([0.2466, 0.0432, 0.3547, 0.3555]) -Greedy action tensor([ 1.1089, 0.1442, -1.4762, 0.4735]) tensor([0.5035, 0.1919, 0.0380, 0.2667]) -Greedy action tensor([ 0.2781, -0.4614, -0.6259, 1.2086]) tensor([0.2263, 0.1080, 0.0917, 0.5740]) -Greedy action tensor([ 0.0401, -0.8095, 0.5524, -0.7443]) tensor([0.2814, 0.1203, 0.4698, 0.1285]) -Greedy action tensor([ 0.4787, -1.2754, -0.5702, 0.0701]) tensor([0.4571, 0.0791, 0.1601, 0.3037]) -Greedy action tensor([-0.7345, -1.2495, 1.1528, -0.9018]) tensor([0.1106, 0.0661, 0.7299, 0.0935]) -Greedy action tensor([-0.4343, 0.1222, 0.2068, -1.0917]) tensor([0.1938, 0.3380, 0.3678, 0.1004]) -Greedy action tensor([ 0.0198, 0.1717, -0.1291, -0.7964]) tensor([0.2884, 0.3357, 0.2485, 0.1275]) -Greedy action tensor([-0.2207, -1.1457, 0.7037, -0.3826]) tensor([0.2098, 0.0832, 0.5287, 0.1784]) -Greedy action tensor([-0.9977, -1.0694, -0.5038, -0.7857]) tensor([0.2081, 0.1937, 0.3410, 0.2572]) -Greedy action tensor([-0.3073, -1.6022, 0.7010, -0.9874]) tensor([0.2212, 0.0606, 0.6062, 0.1120]) -Greedy action tensor([ 1.7270, -0.8373, 0.1702, 0.8755]) tensor([0.5832, 0.0449, 0.1230, 0.2489]) -Greedy action tensor([-0.8539, -1.0767, -1.2503, -1.0459]) tensor([0.3032, 0.2426, 0.2040, 0.2502]) -Greedy action tensor([-0.0162, -0.8315, 0.3284, -0.5701]) tensor([0.2917, 0.1291, 0.4117, 0.1676]) -Greedy action tensor([ 0.5902, -0.0355, -0.4565, -0.3699]) tensor([0.4408, 0.2358, 0.1547, 0.1687]) -Greedy action tensor([-0.2449, 0.1636, 0.5396, -0.9952]) tensor([0.1935, 0.2911, 0.4240, 0.0914]) -Greedy action tensor([ 0.8329, -1.4508, -0.9426, 0.8279]) tensor([0.4413, 0.0450, 0.0747, 0.4390]) -Greedy action tensor([-0.7491, -0.6234, 1.0275, -1.6423]) tensor([0.1183, 0.1341, 0.6991, 0.0484]) -Greedy action tensor([ 0.8412, -1.2293, -0.6394, 0.5643]) tensor([0.4735, 0.0597, 0.1077, 0.3590]) -Greedy action tensor([-0.5342, -0.3823, -0.0026, -0.2952]) tensor([0.1947, 0.2266, 0.3313, 0.2473]) -Greedy action tensor([ 0.5127, 0.2660, -0.0444, -0.3709]) tensor([0.3613, 0.2823, 0.2070, 0.1493]) -Greedy action tensor([ 0.3217, -0.2933, 0.2887, -0.7077]) tensor([0.3490, 0.1887, 0.3376, 0.1247]) -Greedy action tensor([ 0.3141, -0.6686, 0.7288, -0.6950]) tensor([0.3074, 0.1151, 0.4654, 0.1121]) -Greedy action tensor([-0.5139, -2.3362, 1.8398, -1.3597]) tensor([0.0825, 0.0133, 0.8687, 0.0354]) -Greedy action tensor([ 0.2907, -0.6823, -0.8874, 0.6847]) tensor([0.3156, 0.1193, 0.0972, 0.4680]) -Greedy action tensor([-0.4029, -0.5648, -0.2804, -0.4938]) tensor([0.2568, 0.2184, 0.2903, 0.2345]) -Greedy action tensor([-0.5941, 0.2399, 0.1805, -0.8283]) tensor([0.1596, 0.3676, 0.3464, 0.1263]) -Greedy action tensor([-0.0062, -0.5093, -0.4814, 0.3381]) tensor([0.2749, 0.1662, 0.1709, 0.3879]) -Greedy action tensor([-0.9587, -1.1267, 0.2522, -0.9115]) tensor([0.1600, 0.1353, 0.5370, 0.1677]) -Greedy action tensor([-0.5386, 0.1017, 1.1155, -0.3732]) tensor([0.1075, 0.2039, 0.5619, 0.1268]) -Greedy action tensor([ 1.5016, -0.7231, 1.6718, 0.3929]) tensor([0.3812, 0.0412, 0.4519, 0.1258]) -Greedy action tensor([-0.2249, -0.7202, -0.6493, -1.2895]) tensor([0.3834, 0.2336, 0.2508, 0.1322]) -Greedy action tensor([ 0.8900, -1.1449, 0.5019, 0.4895]) tensor([0.4034, 0.0527, 0.2736, 0.2703]) -Greedy action tensor([-0.1724, -0.0308, 0.9035, -0.5182]) tensor([0.1726, 0.1989, 0.5063, 0.1222]) -Greedy action tensor([ 0.6449, -1.4272, -0.0855, -0.2026]) tensor([0.4911, 0.0618, 0.2366, 0.2104]) -Greedy action tensor([-0.6636, -0.8489, -0.2752, 0.6664]) tensor([0.1411, 0.1172, 0.2081, 0.5336]) -Greedy action tensor([-0.9192, -0.9328, -0.7943, -0.2072]) tensor([0.1939, 0.1913, 0.2197, 0.3952]) -Greedy action tensor([-0.2812, 0.1486, 0.0995, 0.1472]) tensor([0.1807, 0.2777, 0.2644, 0.2773]) -Greedy action tensor([ 0.3367, -0.9719, 0.2154, -0.3279]) tensor([0.3745, 0.1012, 0.3317, 0.1927]) -Greedy action tensor([-0.1322, -0.7663, 0.4443, -0.8259]) tensor([0.2625, 0.1392, 0.4672, 0.1312]) -Greedy action tensor([-0.4195, -1.7446, 0.1092, -0.7642]) tensor([0.2724, 0.0724, 0.4622, 0.1930]) -Greedy action tensor([ 0.7299, -1.9972, -0.3437, -0.5278]) tensor([0.5912, 0.0387, 0.2021, 0.1681]) -Greedy action tensor([ 0.2284, -0.2899, -1.1087, -0.2257]) tensor([0.4011, 0.2389, 0.1053, 0.2547]) -Greedy action tensor([-0.7349, -0.3672, 0.7345, -0.8286]) tensor([0.1299, 0.1875, 0.5644, 0.1182]) -Greedy action tensor([ 0.9195, -0.0549, -0.4845, 0.7089]) tensor([0.4110, 0.1551, 0.1009, 0.3330]) -Greedy action tensor([ 0.2186, -0.6848, 0.2867, 0.9102]) tensor([0.2236, 0.0906, 0.2394, 0.4465]) -Greedy action tensor([-0.9272, -0.8443, -1.1042, 0.5529]) tensor([0.1367, 0.1485, 0.1145, 0.6004]) -Greedy action tensor([ 1.9514, -0.7922, 0.8466, -0.4935]) tensor([0.6746, 0.0434, 0.2235, 0.0585]) -Greedy action tensor([-0.9228, -0.0170, -0.0661, -0.5764]) tensor([0.1381, 0.3415, 0.3252, 0.1952]) -Greedy action tensor([ 0.0368, -0.3949, 0.4208, -0.3017]) tensor([0.2611, 0.1695, 0.3833, 0.1861]) -Greedy action tensor([ 1.2160, -1.1272, -0.1250, 0.2926]) tensor([0.5699, 0.0547, 0.1491, 0.2263]) -Greedy action tensor([ 0.7421, 0.4000, -0.3931, -0.1414]) tensor([0.4090, 0.2905, 0.1314, 0.1691]) -Greedy action tensor([ 0.4504, -1.1944, 0.9801, -1.4785]) tensor([0.3293, 0.0636, 0.5593, 0.0478]) -Greedy action tensor([-0.8982, -0.8636, 0.4935, -0.6341]) tensor([0.1359, 0.1407, 0.5465, 0.1769]) -Greedy action tensor([ 0.5409, -0.2307, -1.1226, -0.1013]) tensor([0.4591, 0.2123, 0.0870, 0.2416]) -Greedy action tensor([-0.0164, -0.9247, -0.1452, 0.0427]) tensor([0.2991, 0.1206, 0.2630, 0.3173]) -Greedy action tensor([-0.6449, -0.5828, 0.2959, -1.1799]) tensor([0.1919, 0.2042, 0.4916, 0.1124]) -Greedy action tensor([-0.5416, -1.4312, -0.1944, -0.1486]) tensor([0.2322, 0.0954, 0.3285, 0.3439]) -Greedy action tensor([ 0.0545, -0.4763, 0.3624, -0.3809]) tensor([0.2781, 0.1636, 0.3784, 0.1799]) -Greedy action tensor([-0.1105, -0.8472, -0.4606, 0.3116]) tensor([0.2697, 0.1291, 0.1900, 0.4113]) -Greedy action tensor([-0.6340, 0.2880, 0.3546, -0.6525]) tensor([0.1392, 0.3500, 0.3741, 0.1367]) -Greedy action tensor([-0.3060, -0.0868, -0.5787, -0.4706]) tensor([0.2594, 0.3230, 0.1975, 0.2200]) -Greedy action tensor([-0.5472, -0.2349, -0.9510, -0.1662]) tensor([0.2223, 0.3038, 0.1485, 0.3254]) -Greedy action tensor([ 1.4659, -0.2555, -0.2101, -0.6454]) tensor([0.6725, 0.1203, 0.1258, 0.0814]) -Greedy action tensor([ 0.5346, -1.1729, 0.6109, -0.7694]) tensor([0.3949, 0.0716, 0.4262, 0.1072]) -Greedy action tensor([-1.4165, -0.6741, -1.0371, -0.4195]) tensor([0.1375, 0.2889, 0.2009, 0.3727]) -Greedy action tensor([-0.6592, -0.7791, 0.1989, -0.2229]) tensor([0.1726, 0.1531, 0.4072, 0.2670]) -Greedy action tensor([ 0.4407, 0.2312, -0.0137, -0.0199]) tensor([0.3250, 0.2636, 0.2063, 0.2051]) -Greedy action tensor([ 0.4931, -1.2322, 0.7313, -0.3542]) tensor([0.3477, 0.0619, 0.4413, 0.1490]) -Greedy action tensor([ 1.2014, -0.3542, -0.9656, 0.1380]) tensor([0.5985, 0.1263, 0.0685, 0.2067]) -Greedy action tensor([ 1.7225, -0.5003, -0.9025, 0.2650]) tensor([0.7074, 0.0766, 0.0512, 0.1647]) -Greedy action tensor([ 1.4091, -0.4532, -0.2851, 0.5617]) tensor([0.5658, 0.0879, 0.1040, 0.2424]) -Greedy action tensor([ 1.2989, -0.4661, -0.5249, 0.0665]) tensor([0.6157, 0.1054, 0.0994, 0.1795]) -Greedy action tensor([ 1.0564, -0.2000, -0.6608, -0.2035]) tensor([0.5721, 0.1629, 0.1027, 0.1623]) -Greedy action tensor([ 1.0030, -0.1110, -0.1539, 0.6681]) tensor([0.4241, 0.1392, 0.1333, 0.3034]) -Greedy action tensor([ 1.2694, -0.7543, -0.5549, 0.4626]) tensor([0.5748, 0.0760, 0.0927, 0.2565]) -Greedy action tensor([ 1.9750, -0.5955, -0.4360, 0.3666]) tensor([0.7318, 0.0560, 0.0657, 0.1465]) -Greedy action tensor([ 1.8399, -1.0866, -0.2979, 0.7188]) tensor([0.6678, 0.0358, 0.0787, 0.2176]) -Greedy action tensor([ 1.3684, -0.6224, -0.5039, 0.9259]) tensor([0.5174, 0.0707, 0.0796, 0.3324]) -Greedy action tensor([ 1.7906, -0.3493, -0.6176, 0.3303]) tensor([0.6945, 0.0817, 0.0625, 0.1613]) -Greedy action tensor([ 1.5500, -1.1237, -0.3021, 0.6554]) tensor([0.6117, 0.0422, 0.0960, 0.2501]) -Greedy action tensor([ 1.7318, -0.3869, -0.4718, 0.3270]) tensor([0.6775, 0.0814, 0.0748, 0.1663]) -Greedy action tensor([ 1.9337, -1.2188, 0.1929, 0.5821]) tensor([0.6771, 0.0289, 0.1187, 0.1752]) -Greedy action tensor([ 1.0692, -0.4351, -0.3349, 0.2912]) tensor([0.5189, 0.1153, 0.1274, 0.2384]) -Greedy action tensor([ 1.8426, -0.4856, -0.6956, 0.5094]) tensor([0.6944, 0.0677, 0.0549, 0.1831]) -Greedy action tensor([ 1.5999, -0.3697, -0.1274, 0.2579]) tensor([0.6335, 0.0884, 0.1126, 0.1655]) -Greedy action tensor([ 1.3611, -0.4846, -0.1728, 0.4072]) tensor([0.5685, 0.0898, 0.1226, 0.2190]) -Greedy action tensor([ 1.4742, 0.1529, -0.6165, 0.3483]) tensor([0.5832, 0.1556, 0.0721, 0.1892]) -Greedy action tensor([ 1.1829, 0.4833, -1.1499, 0.1732]) tensor([0.5107, 0.2537, 0.0495, 0.1861]) -Greedy action tensor([ 1.4031, -0.2969, -0.2065, 0.1347]) tensor([0.6010, 0.1098, 0.1202, 0.1691]) -Greedy action tensor([ 0.8767, -0.2295, 0.2297, -0.0759]) tensor([0.4464, 0.1477, 0.2337, 0.1722]) -Greedy action tensor([ 1.4671, -0.3426, -0.2407, 0.0391]) tensor([0.6310, 0.1033, 0.1144, 0.1513]) -Greedy action tensor([ 1.2679, -0.0879, -1.1906, 0.3591]) tensor([0.5726, 0.1476, 0.0490, 0.2308]) -Greedy action tensor([ 1.6300, 0.0534, -0.5976, -0.0748]) tensor([0.6683, 0.1381, 0.0720, 0.1215]) -Greedy action tensor([ 1.9435, -0.8335, -0.7974, -0.2012]) tensor([0.8040, 0.0500, 0.0519, 0.0941]) -Greedy action tensor([ 1.2961, -0.5064, -0.3898, 0.2713]) tensor([0.5851, 0.0965, 0.1084, 0.2100]) -Greedy action tensor([ 1.6163, -1.0676, -0.1697, 0.1770]) tensor([0.6789, 0.0464, 0.1138, 0.1610]) -Greedy action tensor([ 1.3036, -0.3673, -0.9772, 0.0048]) tensor([0.6397, 0.1203, 0.0654, 0.1746]) -Greedy action tensor([ 1.3047, -0.2633, 0.1160, 0.0789]) tensor([0.5535, 0.1154, 0.1686, 0.1625]) -Greedy action tensor([ 1.2228, -0.0400, -0.1650, 0.2804]) tensor([0.5202, 0.1472, 0.1299, 0.2027]) -Greedy action tensor([ 1.0843, -0.1092, -0.1671, -0.1373]) tensor([0.5308, 0.1609, 0.1519, 0.1565]) -Greedy action tensor([ 1.0810, -0.2163, -0.8744, 0.1004]) tensor([0.5587, 0.1527, 0.0791, 0.2096]) -Greedy action tensor([ 2.2595, -1.2658, -0.1570, 0.8256]) tensor([0.7369, 0.0217, 0.0658, 0.1757]) -Greedy action tensor([ 0.8047, -0.2172, -0.0323, 0.1780]) tensor([0.4297, 0.1546, 0.1861, 0.2296]) -Greedy action tensor([ 1.1681, -0.5107, -0.3514, 0.5307]) tensor([0.5170, 0.0965, 0.1131, 0.2734]) -Greedy action tensor([ 1.0976, -0.4285, -1.0466, 0.2819]) tensor([0.5628, 0.1223, 0.0659, 0.2489]) -Greedy action tensor([ 1.8455, -0.6063, -0.8479, -0.0116]) tensor([0.7634, 0.0658, 0.0516, 0.1192]) -Greedy action tensor([ 1.6244, -1.0904, -0.3547, 0.1928]) tensor([0.6928, 0.0459, 0.0957, 0.1655]) -Greedy action tensor([ 1.2046, -0.3244, -1.0540, 0.5139]) tensor([0.5487, 0.1189, 0.0573, 0.2750]) -Greedy action tensor([ 1.0954, 0.0123, -0.8154, -0.0024]) tensor([0.5494, 0.1860, 0.0813, 0.1833]) -Greedy action tensor([ 1.2142, -0.5792, -0.2187, 0.2145]) tensor([0.5640, 0.0939, 0.1346, 0.2075]) -Greedy action tensor([ 1.2190, -0.2552, -1.1963, 0.2987]) tensor([0.5825, 0.1334, 0.0520, 0.2321]) -Greedy action tensor([ 1.5892, -0.3560, -0.4095, 0.1779]) tensor([0.6569, 0.0939, 0.0890, 0.1602]) -Greedy action tensor([ 1.4571, -0.6611, -0.2273, 0.5443]) tensor([0.5858, 0.0704, 0.1087, 0.2351]) -Greedy action tensor([ 1.6725, -0.9453, -0.3684, 0.3685]) tensor([0.6783, 0.0495, 0.0881, 0.1841]) -Greedy action tensor([ 1.8081, -1.0626, -0.3593, 0.3825]) tensor([0.7085, 0.0401, 0.0811, 0.1703]) -Greedy action tensor([ 1.6466, -0.2514, -0.8066, -0.0266]) tensor([0.7025, 0.1053, 0.0604, 0.1318]) -Greedy action tensor([ 1.3714, 0.0866, -0.1487, -0.1329]) tensor([0.5822, 0.1611, 0.1273, 0.1293]) -Greedy action tensor([ 1.2229, -0.6512, -0.1489, 0.4654]) tensor([0.5331, 0.0818, 0.1352, 0.2499]) -Greedy action tensor([ 1.2788, -0.1885, -0.7186, 0.4220]) tensor([0.5584, 0.1287, 0.0758, 0.2371]) -Greedy action tensor([ 1.6465, -1.1263, -0.3786, -0.0521]) tensor([0.7260, 0.0454, 0.0958, 0.1328]) -Greedy action tensor([ 1.6440, -0.0018, -0.5748, 0.2717]) tensor([0.6430, 0.1240, 0.0699, 0.1630]) -Greedy action tensor([ 1.5475, -0.2219, -0.5958, 0.3079]) tensor([0.6340, 0.1081, 0.0743, 0.1836]) -Greedy action tensor([ 1.3685, -0.2194, -0.3940, 0.0287]) tensor([0.6106, 0.1248, 0.1048, 0.1599]) -Greedy action tensor([ 1.2373, -0.0709, -0.7386, -0.0735]) tensor([0.5958, 0.1610, 0.0826, 0.1606]) -Greedy action tensor([ 1.4633, -0.4882, -0.5931, 0.2755]) tensor([0.6350, 0.0902, 0.0812, 0.1936]) -Greedy action tensor([ 1.3296, -0.3075, -0.7405, 0.5291]) tensor([0.5650, 0.1099, 0.0713, 0.2538]) -Greedy action tensor([ 1.6892, -0.7261, -0.5581, 0.3116]) tensor([0.6910, 0.0617, 0.0730, 0.1743]) -Greedy action tensor([ 1.3834, -0.6868, -0.5326, 0.5776]) tensor([0.5814, 0.0733, 0.0856, 0.2597]) -Greedy action tensor([ 1.3701, -0.4729, -0.3507, 0.2987]) tensor([0.5953, 0.0943, 0.1065, 0.2039]) -Greedy action tensor([ 1.2893, -0.4213, -0.4981, 0.0499]) tensor([0.6106, 0.1104, 0.1022, 0.1768]) -Greedy action tensor([ 1.0660, -0.2059, -0.1132, 0.1237]) tensor([0.5057, 0.1417, 0.1555, 0.1971]) -Greedy action tensor([ 2.0450, -0.6611, -0.4280, 0.4994]) tensor([0.7330, 0.0490, 0.0618, 0.1563]) -Greedy action tensor([ 2.0290, -1.2163, -0.5408, 0.4618]) tensor([0.7552, 0.0294, 0.0578, 0.1576]) -Greedy action tensor([ 1.4209, -0.5306, -0.3741, 0.2736]) tensor([0.6151, 0.0874, 0.1022, 0.1953]) -Greedy action tensor([ 1.1935, -0.5609, 0.3575, 0.0885]) tensor([0.5161, 0.0893, 0.2237, 0.1709]) -Greedy action tensor([ 1.6558, -0.7563, -0.5705, 0.7943]) tensor([0.6173, 0.0553, 0.0666, 0.2608]) -Greedy action tensor([ 0.8308, -0.2997, -0.1641, -0.0018]) tensor([0.4700, 0.1518, 0.1738, 0.2044]) -Greedy action tensor([ 1.9496, -0.5258, -0.8210, 0.7111]) tensor([0.6961, 0.0586, 0.0436, 0.2018]) -Greedy action tensor([ 2.1079, -0.9145, -0.3622, 0.6655]) tensor([0.7301, 0.0355, 0.0618, 0.1726]) -Greedy action tensor([ 1.5158, -0.4851, -0.4982, 0.2970]) tensor([0.6393, 0.0864, 0.0853, 0.1890]) -Greedy action tensor([ 0.7446, -0.0121, -0.0828, -0.0517]) tensor([0.4242, 0.1990, 0.1854, 0.1913]) -Greedy action tensor([ 1.3028, -0.5186, -0.9106, 0.2442]) tensor([0.6180, 0.1000, 0.0676, 0.2144]) -Greedy action tensor([ 1.7977, -0.2830, -0.5558, 0.9137]) tensor([0.6124, 0.0764, 0.0582, 0.2530]) -Greedy action tensor([ 1.4808, -0.1727, -0.4585, 0.1622]) tensor([0.6239, 0.1194, 0.0897, 0.1669]) -Greedy action tensor([ 0.8841, -0.1801, -0.1366, 0.1959]) tensor([0.4529, 0.1563, 0.1632, 0.2276]) -Greedy action tensor([ 1.3090, -0.6059, -0.7240, 0.4195]) tensor([0.5920, 0.0872, 0.0775, 0.2432]) -Greedy action tensor([ 2.0739, -1.2790, 0.5308, 0.1112]) tensor([0.7199, 0.0252, 0.1538, 0.1011]) -Greedy action tensor([ 1.2362, -0.3147, -0.1682, -0.0527]) tensor([0.5770, 0.1224, 0.1417, 0.1590]) -Greedy action tensor([ 1.3353, -0.5751, -0.7467, 0.0269]) tensor([0.6481, 0.0959, 0.0808, 0.1751]) -Greedy action tensor([ 0.4323, -0.1673, -0.0104, -0.0782]) tensor([0.3582, 0.1967, 0.2301, 0.2150]) -Greedy action tensor([ 9.7664e-01, -6.1243e-01, 5.3793e-05, -3.8690e-01]) tensor([0.5445, 0.1111, 0.2051, 0.1393]) -Greedy action tensor([ 0.7851, -0.2646, -0.1564, -0.1865]) tensor([0.4720, 0.1652, 0.1841, 0.1786]) -Greedy action tensor([ 0.6824, -0.4977, 0.0091, -0.2449]) tensor([0.4519, 0.1389, 0.2305, 0.1788]) -Greedy action tensor([ 0.6342, -0.1463, 0.0706, -0.1381]) tensor([0.4017, 0.1841, 0.2286, 0.1856]) -Greedy action tensor([ 0.8573, -0.5263, -0.2361, -0.4527]) tensor([0.5389, 0.1351, 0.1806, 0.1454]) -Greedy action tensor([ 0.4947, -0.0744, -0.0764, -0.0885]) tensor([0.3719, 0.2105, 0.2101, 0.2076]) -Greedy action tensor([ 0.9326, -0.8681, 0.2169, -0.3808]) tensor([0.5200, 0.0859, 0.2542, 0.1398]) -Greedy action tensor([ 5.3026e-01, -4.1726e-01, -4.8390e-04, -2.8677e-01]) tensor([0.4136, 0.1604, 0.2433, 0.1827]) -Greedy action tensor([ 1.1880, -0.6392, -0.1125, -0.8835]) tensor([0.6413, 0.1032, 0.1747, 0.0808]) -Greedy action tensor([ 1.1589, -1.2555, 0.2486, -0.5917]) tensor([0.6004, 0.0537, 0.2416, 0.1043]) -Greedy action tensor([ 0.6685, -0.2547, 0.0550, -0.2940]) tensor([0.4309, 0.1712, 0.2333, 0.1646]) -Greedy action tensor([ 0.2718, -0.2097, -0.1480, -0.3377]) tensor([0.3548, 0.2192, 0.2332, 0.1929]) -Greedy action tensor([ 0.8242, -0.4337, -0.0775, -0.1509]) tensor([0.4837, 0.1375, 0.1963, 0.1824]) -Greedy action tensor([ 0.8092, -0.6162, 0.0457, -0.4433]) tensor([0.5020, 0.1207, 0.2339, 0.1435]) -Greedy action tensor([ 0.8275, -0.6926, 0.0922, -0.5513]) tensor([0.5128, 0.1122, 0.2458, 0.1292]) -Greedy action tensor([ 0.7520, -0.3186, 0.1602, -0.1495]) tensor([0.4344, 0.1489, 0.2404, 0.1763]) -Greedy action tensor([ 0.6337, -0.2443, -0.0125, -0.1091]) tensor([0.4140, 0.1721, 0.2169, 0.1970]) -Greedy action tensor([ 0.7279, -0.0742, -0.1063, 0.0956]) tensor([0.4142, 0.1857, 0.1799, 0.2201]) -Greedy action tensor([ 0.7998, 0.1594, 0.2280, -0.0689]) tensor([0.3982, 0.2099, 0.2248, 0.1671]) -Greedy action tensor([ 0.3616, -0.5249, -0.1896, -0.0876]) tensor([0.3807, 0.1569, 0.2194, 0.2430]) -Greedy action tensor([ 0.6041, -0.1061, 0.1427, -0.0205]) tensor([0.3763, 0.1850, 0.2372, 0.2015]) -Greedy action tensor([ 0.5674, -0.3799, -0.0328, -0.4553]) tensor([0.4355, 0.1689, 0.2390, 0.1566]) -Greedy action tensor([ 0.9602, -0.4664, -0.0744, -0.2654]) tensor([0.5294, 0.1271, 0.1881, 0.1554]) -Greedy action tensor([ 0.7950, -0.2613, -0.1332, -0.1985]) tensor([0.4732, 0.1645, 0.1870, 0.1752]) -Greedy action tensor([ 0.5796, -0.0353, 0.0876, -0.0926]) tensor([0.3756, 0.2031, 0.2296, 0.1918]) -Greedy action tensor([ 1.1343, -0.3687, 0.1907, -0.3428]) tensor([0.5435, 0.1209, 0.2115, 0.1241]) -Greedy action tensor([ 1.2224, -0.9269, 0.0681, -0.5236]) tensor([0.6225, 0.0726, 0.1963, 0.1086]) -Greedy action tensor([ 0.7931, -0.4948, 0.0012, -0.1891]) tensor([0.4754, 0.1312, 0.2154, 0.1781]) -Greedy action tensor([ 0.9640, -0.5724, -0.0314, -0.3681]) tensor([0.5409, 0.1164, 0.1999, 0.1428]) -Greedy action tensor([ 1.3408, -0.7945, -0.0465, -0.9779]) tensor([0.6820, 0.0806, 0.1703, 0.0671]) -Greedy action tensor([ 0.9454, -0.8912, -0.0508, -0.4838]) tensor([0.5656, 0.0901, 0.2088, 0.1354]) -Greedy action tensor([ 0.8734, -0.5870, -0.0156, -0.5858]) tensor([0.5332, 0.1238, 0.2192, 0.1239]) -Greedy action tensor([ 1.1675, -0.9435, 0.1101, -0.6463]) tensor([0.6129, 0.0742, 0.2129, 0.0999]) -Greedy action tensor([ 1.0310, -0.5325, -0.0191, -0.4130]) tensor([0.5570, 0.1166, 0.1949, 0.1314]) -Greedy action tensor([ 0.9658, -0.5693, -0.0089, -0.4438]) tensor([0.5444, 0.1173, 0.2054, 0.1330]) -Greedy action tensor([ 0.9649, -0.7760, 0.1544, -0.4006]) tensor([0.5332, 0.0935, 0.2371, 0.1361]) -Greedy action tensor([ 1.0970, -0.9417, 0.1052, -0.6750]) tensor([0.5984, 0.0779, 0.2220, 0.1017]) -Greedy action tensor([ 0.9687, -0.7557, 0.1906, -0.3428]) tensor([0.5244, 0.0935, 0.2409, 0.1413]) -Greedy action tensor([ 9.3152e-01, -9.6122e-02, 6.7601e-02, -7.6355e-04]) tensor([0.4602, 0.1647, 0.1940, 0.1812]) -Greedy action tensor([ 0.8135, -0.4494, -0.1087, -0.1738]) tensor([0.4871, 0.1378, 0.1937, 0.1815]) -Greedy action tensor([ 0.9227, -0.6502, -0.0942, -0.4509]) tensor([0.5487, 0.1138, 0.1985, 0.1389]) -Greedy action tensor([ 0.7934, -0.4309, 0.0218, -0.3705]) tensor([0.4834, 0.1421, 0.2235, 0.1510]) -Greedy action tensor([ 0.9055, -0.4669, 0.1331, -0.3782]) tensor([0.5019, 0.1272, 0.2318, 0.1390]) -Greedy action tensor([ 0.9553, -0.4800, -0.0603, -0.4711]) tensor([0.5434, 0.1293, 0.1968, 0.1305]) -Greedy action tensor([ 1.2446, -0.6129, -0.1262, -0.8217]) tensor([0.6508, 0.1016, 0.1652, 0.0824]) -Greedy action tensor([ 0.9391, -0.8003, 0.1149, -0.4791]) tensor([0.5387, 0.0946, 0.2363, 0.1304]) -Greedy action tensor([ 0.8950, -0.7563, 0.0983, -0.6186]) tensor([0.5369, 0.1030, 0.2420, 0.1182]) -Greedy action tensor([ 0.9062, -0.7950, -0.0977, -0.7088]) tensor([0.5722, 0.1044, 0.2097, 0.1138]) -Greedy action tensor([ 0.7410, -0.0973, -0.0602, -0.0334]) tensor([0.4269, 0.1846, 0.1916, 0.1968]) -Greedy action tensor([ 0.4663, 0.0696, 0.0587, -0.2114]) tensor([0.3514, 0.2363, 0.2338, 0.1785]) -Greedy action tensor([ 0.8247, -0.4949, -0.0263, -0.2895]) tensor([0.4945, 0.1321, 0.2111, 0.1623]) -Greedy action tensor([ 1.1361, -0.6427, 0.1971, -0.7437]) tensor([0.5839, 0.0986, 0.2283, 0.0891]) -Greedy action tensor([ 0.6829, -0.3203, -0.1866, -0.2260]) tensor([0.4569, 0.1675, 0.1915, 0.1841]) -Greedy action tensor([ 0.4907, -0.0760, 0.0914, -0.1570]) tensor([0.3621, 0.2055, 0.2429, 0.1895]) -Greedy action tensor([ 0.7049, -0.3692, 0.0399, -0.3492]) tensor([0.4536, 0.1550, 0.2333, 0.1581]) -Greedy action tensor([ 0.8584, 0.2325, 0.0977, -0.1092]) tensor([0.4198, 0.2245, 0.1962, 0.1595]) -Greedy action tensor([ 0.5397, -0.3170, -0.0864, -0.2035]) tensor([0.4107, 0.1744, 0.2196, 0.1953]) -Greedy action tensor([ 0.8992, -0.7859, -0.1091, -0.2282]) tensor([0.5336, 0.0989, 0.1947, 0.1728]) -Greedy action tensor([ 0.8620, -0.7021, 0.0563, -0.3706]) tensor([0.5135, 0.1075, 0.2294, 0.1497]) -Greedy action tensor([ 0.8231, -0.3526, -0.0549, -0.1663]) tensor([0.4771, 0.1472, 0.1983, 0.1774]) -Greedy action tensor([ 1.0640, -0.7194, 0.0965, -0.6700]) tensor([0.5798, 0.0974, 0.2203, 0.1024]) -Greedy action tensor([ 0.8241, -0.8747, 0.1177, -0.1790]) tensor([0.4895, 0.0895, 0.2415, 0.1795]) -Greedy action tensor([ 0.7223, -0.7057, 0.0092, -0.2886]) tensor([0.4776, 0.1145, 0.2341, 0.1738]) -Greedy action tensor([ 1.1738, -0.7787, 0.1306, -0.7339]) tensor([0.6088, 0.0864, 0.2145, 0.0904]) -Greedy action tensor([ 0.9013, -0.6323, -0.0517, -0.3002]) tensor([0.5257, 0.1134, 0.2027, 0.1581]) -Greedy action tensor([ 1.2189, -0.8476, 0.0374, -0.6292]) tensor([0.6285, 0.0796, 0.1929, 0.0990]) -Greedy action tensor([ 0.6678, -0.4408, -0.1429, -0.3319]) tensor([0.4667, 0.1540, 0.2075, 0.1717]) -Greedy action tensor([ 0.8024, -0.5147, 0.0241, -0.4530]) tensor([0.4970, 0.1332, 0.2282, 0.1416]) -Greedy action tensor([ 0.8373, -0.6233, 0.1253, -0.8913]) tensor([0.5262, 0.1221, 0.2582, 0.0934]) -Greedy action tensor([ 1.1648, -0.5933, -0.0795, -0.4145]) tensor([0.6000, 0.1034, 0.1729, 0.1237]) -Greedy action tensor([ 0.8187, -0.1635, -0.0034, -0.2082]) tensor([0.4604, 0.1724, 0.2023, 0.1649]) -Greedy action tensor([ 1.1281, -0.6213, -0.0486, -0.7846]) tensor([0.6136, 0.1067, 0.1892, 0.0906]) -Greedy action tensor([ 0.6660, 0.0284, 0.0278, -0.0185]) tensor([0.3904, 0.2064, 0.2062, 0.1969]) -Greedy action tensor([ 0.4335, 0.0810, 0.0724, -0.0923]) tensor([0.3343, 0.2350, 0.2330, 0.1976]) -Greedy action tensor([ 0.6093, -0.2543, 0.0670, -0.0831]) tensor([0.3995, 0.1684, 0.2322, 0.1999]) -Greedy action tensor([ 0.5436, 0.0220, 0.0119, -0.0022]) tensor([0.3623, 0.2150, 0.2129, 0.2099]) -Greedy action tensor([ 0.4588, 0.0644, -0.1523, -0.0147]) tensor([0.3521, 0.2374, 0.1911, 0.2193]) -Greedy action tensor([ 1.0982, -0.8872, 0.0652, -0.5504]) tensor([0.5933, 0.0815, 0.2112, 0.1141]) -Greedy action tensor([ 1.3449, -0.9551, -0.0515, -0.7484]) tensor([0.6798, 0.0682, 0.1682, 0.0838]) -Greedy action tensor([ 0.7651, -0.1588, -0.1861, -0.0520]) tensor([0.4494, 0.1784, 0.1736, 0.1985]) -Greedy action tensor([-1.8157, -0.4542, 0.5986, -0.1235]) tensor([0.0465, 0.1814, 0.5197, 0.2525]) -Greedy action tensor([-1.6646, -0.5561, 0.5301, -0.0254]) tensor([0.0551, 0.1669, 0.4944, 0.2837]) -Greedy action tensor([-1.8670, -0.3538, 0.6384, -0.1354]) tensor([0.0427, 0.1938, 0.5226, 0.2410]) -Greedy action tensor([-1.7962, -0.4049, 0.5862, -0.1146]) tensor([0.0471, 0.1894, 0.5103, 0.2532]) -Greedy action tensor([-1.8959, -0.4546, 0.6477, -0.1542]) tensor([0.0423, 0.1786, 0.5379, 0.2412]) -Greedy action tensor([-1.8146, -0.2696, 0.5793, -0.1230]) tensor([0.0453, 0.2124, 0.4964, 0.2459]) -Greedy action tensor([-1.0739, -0.1536, 0.4011, 0.4877]) tensor([0.0791, 0.1985, 0.3456, 0.3769]) -Greedy action tensor([-0.9081, 0.4721, 0.1759, -0.0556]) tensor([0.0973, 0.3868, 0.2877, 0.2282]) -Greedy action tensor([-1.8651, -0.4699, 0.6435, -0.1347]) tensor([0.0435, 0.1757, 0.5350, 0.2457]) -Greedy action tensor([-1.8700, -0.3154, 0.6075, -0.2175]) tensor([0.0437, 0.2070, 0.5210, 0.2283]) -Greedy action tensor([-1.8306, -0.4434, 0.6559, -0.0821]) tensor([0.0439, 0.1758, 0.5279, 0.2524]) -Greedy action tensor([-1.5467, -0.3069, 0.6439, -0.0095]) tensor([0.0554, 0.1915, 0.4954, 0.2577]) -Greedy action tensor([-1.8493, -0.4491, 0.6206, -0.1305]) tensor([0.0445, 0.1806, 0.5264, 0.2484]) -Greedy action tensor([-1.2778, 0.6156, 0.1705, 0.2531]) tensor([0.0605, 0.4021, 0.2576, 0.2798]) -Greedy action tensor([-1.5439, -0.4751, 0.5063, 0.2136]) tensor([0.0572, 0.1666, 0.4445, 0.3317]) -Greedy action tensor([-1.8974, -0.4319, 0.6466, -0.1559]) tensor([0.0421, 0.1822, 0.5357, 0.2401]) -Greedy action tensor([-1.8975, -0.4323, 0.6412, -0.1651]) tensor([0.0423, 0.1830, 0.5356, 0.2391]) -Greedy action tensor([-1.9044, -0.3622, 0.6443, -0.1459]) tensor([0.0412, 0.1926, 0.5270, 0.2391]) -Greedy action tensor([-1.9023, -0.2573, 0.6203, -0.1585]) tensor([0.0410, 0.2127, 0.5115, 0.2348]) -Greedy action tensor([-1.8890, -0.4232, 0.6461, -0.1488]) tensor([0.0423, 0.1832, 0.5336, 0.2410]) -Greedy action tensor([-1.9158, -0.4006, 0.6521, -0.1656]) tensor([0.0411, 0.1869, 0.5356, 0.2364]) -Greedy action tensor([-1.9054, -0.3971, 0.6416, -0.1524]) tensor([0.0416, 0.1878, 0.5307, 0.2399]) -Greedy action tensor([-1.6938, -0.5087, 0.7767, -0.4353]) tensor([0.0510, 0.1667, 0.6029, 0.1794]) -Greedy action tensor([-1.8849, -0.3181, 0.6231, -0.1385]) tensor([0.0420, 0.2013, 0.5159, 0.2409]) -Greedy action tensor([-1.9117, -0.3700, 0.6457, -0.1629]) tensor([0.0411, 0.1921, 0.5305, 0.2363]) -Greedy action tensor([-1.8527, -0.4321, 0.6398, -0.0872]) tensor([0.0433, 0.1794, 0.5240, 0.2533]) -Greedy action tensor([-1.6281, -0.3928, 0.4731, -0.0305]) tensor([0.0570, 0.1959, 0.4657, 0.2814]) -Greedy action tensor([-1.5089, 0.2078, 0.4594, -0.4358]) tensor([0.0601, 0.3343, 0.4300, 0.1756]) -Greedy action tensor([-1.8785, -0.2214, 0.6081, -0.1487]) tensor([0.0418, 0.2194, 0.5029, 0.2359]) -Greedy action tensor([-1.8943, -0.2915, 0.6164, -0.1436]) tensor([0.0416, 0.2066, 0.5122, 0.2396]) -Greedy action tensor([-1.7412, -0.3234, 0.5234, -0.0547]) tensor([0.0496, 0.2048, 0.4777, 0.2679]) -Greedy action tensor([-1.8863, -0.1353, 0.5910, -0.1511]) tensor([0.0411, 0.2367, 0.4893, 0.2329]) -Greedy action tensor([-1.8905, -0.4527, 0.6470, -0.1520]) tensor([0.0425, 0.1788, 0.5371, 0.2416]) -Greedy action tensor([-1.7503, -0.0432, 0.5184, -0.0169]) tensor([0.0458, 0.2524, 0.4426, 0.2592]) -Greedy action tensor([-1.8179, -0.2089, 0.5658, -0.1101]) tensor([0.0447, 0.2235, 0.4850, 0.2467]) -Greedy action tensor([-0.8306, 0.8262, 0.1445, -0.0415]) tensor([0.0901, 0.4725, 0.2390, 0.1984]) -Greedy action tensor([-1.5994, -0.4474, 0.5093, -0.1306]) tensor([0.0597, 0.1890, 0.4919, 0.2594]) -Greedy action tensor([-1.7795, 0.0081, 0.4969, -0.0627]) tensor([0.0449, 0.2681, 0.4372, 0.2498]) -Greedy action tensor([-0.5384, 0.6463, -0.1239, -0.1715]) tensor([0.1384, 0.4525, 0.2095, 0.1997]) -Greedy action tensor([-1.9241, -0.4450, 0.6612, -0.1681]) tensor([0.0409, 0.1795, 0.5427, 0.2368]) -Greedy action tensor([-1.8446, -0.3677, 0.6636, -0.0176]) tensor([0.0419, 0.1834, 0.5144, 0.2603]) -Greedy action tensor([-1.6688, -0.3069, 0.5231, -0.1178]) tensor([0.0538, 0.2102, 0.4820, 0.2539]) -Greedy action tensor([-1.7702, -0.1654, -0.1203, -0.6419]) tensor([0.0701, 0.3487, 0.3648, 0.2165]) -Greedy action tensor([-1.5005, -0.3432, 0.4293, -0.0361]) tensor([0.0650, 0.2067, 0.4474, 0.2809]) -Greedy action tensor([-1.6909, -0.4699, 0.5417, -0.0469]) tensor([0.0529, 0.1795, 0.4936, 0.2740]) -Greedy action tensor([-1.7721, -0.3826, 0.5713, -0.1046]) tensor([0.0482, 0.1936, 0.5025, 0.2556]) -Greedy action tensor([-1.8162, -0.4807, 0.6708, -0.0070]) tensor([0.0436, 0.1658, 0.5244, 0.2662]) -Greedy action tensor([-1.8434, -0.3695, 0.6038, -0.1372]) tensor([0.0446, 0.1947, 0.5152, 0.2456]) -Greedy action tensor([-1.1513, -0.2139, 0.3253, -0.0764]) tensor([0.0921, 0.2351, 0.4031, 0.2697]) -Greedy action tensor([-1.5731, -0.1151, 0.6863, 0.1573]) tensor([0.0487, 0.2095, 0.4668, 0.2750]) -Greedy action tensor([-1.8656, -0.3714, 0.6419, -0.1264]) tensor([0.0427, 0.1902, 0.5240, 0.2430]) -Greedy action tensor([-1.5620, -0.1850, 0.4588, -0.0788]) tensor([0.0591, 0.2343, 0.4460, 0.2606]) -Greedy action tensor([-1.8883, -0.2989, 0.6289, -0.1500]) tensor([0.0417, 0.2043, 0.5168, 0.2372]) -Greedy action tensor([-1.9198, -0.3287, 0.6344, -0.1678]) tensor([0.0408, 0.2001, 0.5242, 0.2350]) -Greedy action tensor([-1.6555, -0.4257, 0.5615, -0.1564]) tensor([0.0553, 0.1892, 0.5078, 0.2477]) -Greedy action tensor([-0.7966, 0.7790, 0.1222, -0.0306]) tensor([0.0953, 0.4607, 0.2389, 0.2051]) -Greedy action tensor([-1.0128, 0.2233, 0.1116, 0.2325]) tensor([0.0910, 0.3131, 0.2800, 0.3160]) -Greedy action tensor([-1.7404, -0.3842, 0.5558, -0.0979]) tensor([0.0500, 0.1942, 0.4972, 0.2586]) -Greedy action tensor([-1.9054, -0.4315, 0.6518, -0.1590]) tensor([0.0417, 0.1819, 0.5375, 0.2389]) -Greedy action tensor([-1.8406, -0.0920, 0.5746, -0.1261]) tensor([0.0426, 0.2446, 0.4764, 0.2364]) -Greedy action tensor([-1.0325, -0.3943, 0.4745, 0.0435]) tensor([0.0967, 0.1831, 0.4365, 0.2837]) -Greedy action tensor([-1.8684, -0.3951, 0.6248, -0.1538]) tensor([0.0434, 0.1896, 0.5257, 0.2413]) -Greedy action tensor([-1.9374, -0.4143, 0.6570, -0.1758]) tensor([0.0403, 0.1850, 0.5399, 0.2348]) -Greedy action tensor([-1.9466, -0.4480, 0.6671, -0.1816]) tensor([0.0401, 0.1793, 0.5467, 0.2340]) -Greedy action tensor([-1.8404, -0.3397, 0.5998, -0.1419]) tensor([0.0446, 0.2000, 0.5117, 0.2437]) -Greedy action tensor([-1.6146, -0.4639, 0.5020, -0.0645]) tensor([0.0582, 0.1840, 0.4834, 0.2743]) -Greedy action tensor([-1.4846, -0.5732, 0.4039, 0.1193]) tensor([0.0664, 0.1651, 0.4386, 0.3300]) -Greedy action tensor([-1.8159, -0.2443, 0.5845, -0.1930]) tensor([0.0456, 0.2197, 0.5033, 0.2313]) -Greedy action tensor([-1.8344, -0.4624, 0.6149, -0.0894]) tensor([0.0449, 0.1772, 0.5205, 0.2574]) -Greedy action tensor([-1.6891, -0.1565, 0.5559, 0.0918]) tensor([0.0476, 0.2204, 0.4494, 0.2825]) -Greedy action tensor([-1.9255, -0.4193, 0.6566, -0.1698]) tensor([0.0408, 0.1839, 0.5393, 0.2360]) -Greedy action tensor([-1.4845, -0.2601, 0.3648, -0.0323]) tensor([0.0665, 0.2264, 0.4228, 0.2843]) -Greedy action tensor([-1.7093, -0.3469, 0.6401, -0.0368]) tensor([0.0483, 0.1886, 0.5060, 0.2571]) -Greedy action tensor([-1.9345, -0.4105, 0.6547, -0.1727]) tensor([0.0404, 0.1856, 0.5385, 0.2354]) -Greedy action tensor([-1.7090, -0.4773, 0.6242, -0.0368]) tensor([0.0498, 0.1708, 0.5140, 0.2654]) -Greedy action tensor([-1.7191, -0.4511, 0.7559, 0.1721]) tensor([0.0434, 0.1541, 0.5152, 0.2874]) -Greedy action tensor([-1.4998, -0.3977, 0.0839, -0.3513]) tensor([0.0831, 0.2501, 0.4048, 0.2620]) -Greedy action tensor([-1.8759, -0.4432, 0.6349, -0.1448]) tensor([0.0432, 0.1810, 0.5319, 0.2439]) -Greedy action tensor([-1.4741, -0.4457, 0.4334, 0.0895]) tensor([0.0653, 0.1827, 0.4400, 0.3120]) -Greedy action tensor([-1.3153, -0.3990, 0.4583, -0.1993]) tensor([0.0804, 0.2009, 0.4734, 0.2453]) -Greedy action tensor([-1.8443, -0.4304, 0.6203, -0.1278]) tensor([0.0446, 0.1833, 0.5241, 0.2480]) -Greedy action tensor([ 0.5696, -1.1204, 0.3586, 0.5642]) tensor([0.3346, 0.0617, 0.2709, 0.3328]) -Greedy action tensor([ 0.3033, -1.5787, 0.9312, -0.4457]) tensor([0.2858, 0.0435, 0.5355, 0.1351]) -Greedy action tensor([-1.1631, -0.8746, 0.4551, -0.7247]) tensor([0.1120, 0.1495, 0.5649, 0.1736]) -Greedy action tensor([-0.1122, -0.7704, 0.6316, -0.2271]) tensor([0.2216, 0.1147, 0.4662, 0.1975]) -Greedy action tensor([ 0.2113, -1.5695, 0.3291, -0.8363]) tensor([0.3782, 0.0637, 0.4254, 0.1326]) -Greedy action tensor([-0.9516, -0.2589, -0.2591, 0.0612]) tensor([0.1290, 0.2579, 0.2579, 0.3552]) -Greedy action tensor([ 0.3154, -0.7895, 0.8927, -0.9475]) tensor([0.2945, 0.0976, 0.5246, 0.0833]) -Greedy action tensor([-0.1011, 0.1615, -0.0426, -1.0123]) tensor([0.2658, 0.3456, 0.2818, 0.1068]) -Greedy action tensor([ 0.7177, -0.6559, 0.1506, -0.5771]) tensor([0.4775, 0.1209, 0.2708, 0.1308]) -Greedy action tensor([ 1.0248, -0.9218, -0.9421, 0.5924]) tensor([0.5177, 0.0739, 0.0724, 0.3360]) -Greedy action tensor([-0.8066, -0.0211, -1.4776, 0.4027]) tensor([0.1417, 0.3109, 0.0725, 0.4749]) -Greedy action tensor([-0.0685, 0.5225, 0.2201, -0.1868]) tensor([0.1989, 0.3591, 0.2654, 0.1767]) -Greedy action tensor([ 0.1190, 0.1000, 0.8521, -0.0532]) tensor([0.2039, 0.2001, 0.4244, 0.1716]) -Greedy action tensor([-0.3476, -1.0197, -0.4254, -0.0525]) tensor([0.2646, 0.1351, 0.2448, 0.3555]) -Greedy action tensor([ 0.1421, -0.0560, -0.4478, -0.5013]) tensor([0.3448, 0.2828, 0.1912, 0.1812]) -Greedy action tensor([-0.0244, -0.0279, -0.1811, -0.7060]) tensor([0.2978, 0.2968, 0.2547, 0.1507]) -Greedy action tensor([-0.0219, -1.3080, -0.4397, -0.5805]) tensor([0.3989, 0.1102, 0.2627, 0.2282]) -Greedy action tensor([-0.7696, 0.0789, -0.5465, -0.6456]) tensor([0.1749, 0.4086, 0.2186, 0.1980]) -Greedy action tensor([ 0.5209, 0.3280, -0.4146, -0.4317]) tensor([0.3842, 0.3168, 0.1508, 0.1482]) -Greedy action tensor([-0.4871, -1.0674, 0.5355, -1.1089]) tensor([0.2050, 0.1148, 0.5701, 0.1101]) -Greedy action tensor([ 1.1101, -0.8442, 0.3167, 0.3172]) tensor([0.4886, 0.0692, 0.2210, 0.2211]) -Greedy action tensor([1.2667, 0.0341, 1.0760, 0.5560]) tensor([0.3833, 0.1117, 0.3167, 0.1883]) -Greedy action tensor([ 0.2052, -0.4987, -0.2359, 0.5445]) tensor([0.2823, 0.1397, 0.1816, 0.3964]) -Greedy action tensor([-0.8809, -0.3872, 0.7123, -0.4692]) tensor([0.1103, 0.1807, 0.5426, 0.1665]) -Greedy action tensor([-0.4920, 0.4397, -0.0012, -0.9611]) tensor([0.1725, 0.4379, 0.2818, 0.1079]) -Greedy action tensor([ 0.4474, -0.4073, -0.6050, 0.0319]) tensor([0.4108, 0.1747, 0.1434, 0.2711]) -Greedy action tensor([ 0.1286, -1.3461, 0.6354, -0.0067]) tensor([0.2658, 0.0608, 0.4412, 0.2322]) -Greedy action tensor([ 0.7442, -1.6687, 1.2541, 0.5790]) tensor([0.2776, 0.0249, 0.4622, 0.2353]) -Greedy action tensor([ 0.4875, -1.1312, 0.4873, -0.8819]) tensor([0.4078, 0.0808, 0.4077, 0.1037]) -Greedy action tensor([-0.8280, -0.3005, -0.2291, 0.2432]) tensor([0.1345, 0.2280, 0.2448, 0.3927]) -Greedy action tensor([ 0.4564, -1.4870, 0.1231, 0.6743]) tensor([0.3222, 0.0462, 0.2309, 0.4007]) -Greedy action tensor([ 1.0105, -0.5066, 0.2792, 0.5329]) tensor([0.4309, 0.0945, 0.2074, 0.2673]) -Greedy action tensor([-1.1542, -0.2614, 0.9619, -0.6275]) tensor([0.0744, 0.1818, 0.6177, 0.1261]) -Greedy action tensor([ 0.4062, 0.1822, 0.0717, -0.1055]) tensor([0.3211, 0.2566, 0.2298, 0.1925]) -Greedy action tensor([ 0.2833, -1.0285, -0.1739, -0.7387]) tensor([0.4420, 0.1191, 0.2798, 0.1591]) -Greedy action tensor([ 0.2989, -0.7508, 0.6258, 0.6988]) tensor([0.2365, 0.0828, 0.3279, 0.3528]) -Greedy action tensor([-0.2111, 0.4621, 0.4869, -0.9294]) tensor([0.1832, 0.3592, 0.3682, 0.0893]) -Greedy action tensor([ 0.1415, -1.2893, 0.0113, -0.0541]) tensor([0.3402, 0.0813, 0.2987, 0.2798]) -Greedy action tensor([ 0.4607, -1.0029, 1.1552, 0.2700]) tensor([0.2463, 0.0570, 0.4932, 0.2035]) -Greedy action tensor([-0.9815, 0.2348, -1.1413, -0.4806]) tensor([0.1454, 0.4907, 0.1239, 0.2400]) -Greedy action tensor([ 0.0986, 0.1042, -0.6863, -0.1119]) tensor([0.3056, 0.3073, 0.1394, 0.2476]) -Greedy action tensor([-1.2108, -0.8066, 0.9094, -1.5314]) tensor([0.0865, 0.1296, 0.7210, 0.0628]) -Greedy action tensor([-0.7675, -0.0158, -0.5195, -0.1038]) tensor([0.1576, 0.3343, 0.2020, 0.3061]) -Greedy action tensor([ 1.3683, -0.5603, 1.0501, -0.1434]) tensor([0.4777, 0.0694, 0.3475, 0.1054]) -Greedy action tensor([ 1.0345, -0.0072, 0.0313, 0.1219]) tensor([0.4715, 0.1664, 0.1729, 0.1893]) -Greedy action tensor([ 0.2690, 0.0520, 0.4061, -0.2902]) tensor([0.2838, 0.2285, 0.3255, 0.1622]) -Greedy action tensor([-0.6107, -0.6557, 0.0545, -1.2013]) tensor([0.2245, 0.2146, 0.4366, 0.1244]) -Greedy action tensor([-0.8733, -1.1037, 0.9356, -1.1274]) tensor([0.1153, 0.0916, 0.7037, 0.0894]) -Greedy action tensor([ 0.0585, -0.2359, -0.4254, 0.0032]) tensor([0.3023, 0.2252, 0.1864, 0.2861]) -Greedy action tensor([-1.0445, -1.3295, -0.0216, -0.8006]) tensor([0.1721, 0.1294, 0.4787, 0.2197]) -Greedy action tensor([-0.9583, -0.6589, 0.1256, -0.7819]) tensor([0.1539, 0.2076, 0.4549, 0.1836]) -Greedy action tensor([-0.5342, 0.2992, 0.1832, 0.4498]) tensor([0.1246, 0.2867, 0.2553, 0.3333]) -Greedy action tensor([ 0.5289, -1.4365, 0.2397, -0.0219]) tensor([0.4056, 0.0568, 0.3037, 0.2338]) -Greedy action tensor([ 0.0434, 0.0402, -0.0960, 0.1693]) tensor([0.2500, 0.2491, 0.2174, 0.2835]) -Greedy action tensor([ 0.5622, -1.8122, 0.8084, -0.4530]) tensor([0.3657, 0.0340, 0.4678, 0.1325]) -Greedy action tensor([-0.1510, 0.6032, 0.0678, -1.2421]) tensor([0.2125, 0.4517, 0.2644, 0.0714]) -Greedy action tensor([-0.6504, 0.2096, -1.0245, -0.5162]) tensor([0.1925, 0.4549, 0.1324, 0.2202]) -Greedy action tensor([-0.1341, -0.6783, 0.0393, -0.2521]) tensor([0.2734, 0.1586, 0.3251, 0.2429]) -Greedy action tensor([-0.7119, -1.1431, 0.9169, -0.2561]) tensor([0.1201, 0.0780, 0.6123, 0.1895]) -Greedy action tensor([ 0.6185, -0.5953, 0.2079, 0.6828]) tensor([0.3304, 0.0981, 0.2191, 0.3523]) -Greedy action tensor([ 0.2793, -0.1789, -0.1100, -0.9423]) tensor([0.3839, 0.2428, 0.2601, 0.1132]) -Greedy action tensor([ 0.5406, -0.0181, 0.1509, 0.0171]) tensor([0.3519, 0.2013, 0.2383, 0.2085]) -Greedy action tensor([-0.1154, 0.1243, 0.4503, 0.1128]) tensor([0.1891, 0.2403, 0.3330, 0.2376]) -Greedy action tensor([ 0.9713, 0.4595, 0.2159, -0.9151]) tensor([0.4503, 0.2699, 0.2116, 0.0683]) -Greedy action tensor([ 1.0680, 0.2563, -0.9052, 0.6988]) tensor([0.4397, 0.1953, 0.0611, 0.3039]) -Greedy action tensor([ 0.5705, -0.5080, -0.3604, -0.0124]) tensor([0.4362, 0.1484, 0.1719, 0.2435]) -Greedy action tensor([ 8.6640e-02, -3.7427e-04, -8.7166e-01, -3.1975e-01]) tensor([0.3371, 0.3090, 0.1293, 0.2245]) -Greedy action tensor([ 0.6369, -1.2863, 0.8643, 0.2694]) tensor([0.3232, 0.0472, 0.4057, 0.2238]) -Greedy action tensor([-0.3232, -0.8205, 0.2204, -0.8678]) tensor([0.2557, 0.1555, 0.4404, 0.1483]) -Greedy action tensor([1.3722, 0.0038, 0.1196, 0.0602]) tensor([0.5526, 0.1407, 0.1579, 0.1488]) -Greedy action tensor([-0.1638, -0.1964, -0.3798, -1.2158]) tensor([0.3202, 0.3099, 0.2580, 0.1118]) -Greedy action tensor([ 1.7322, -0.4175, 1.1153, 0.0842]) tensor([0.5410, 0.0630, 0.2919, 0.1041]) -Greedy action tensor([ 0.4640, -0.1629, 0.5853, -0.4979]) tensor([0.3284, 0.1754, 0.3707, 0.1255]) -Greedy action tensor([-1.0162, -0.4042, 0.4624, -1.7491]) tensor([0.1297, 0.2391, 0.5689, 0.0623]) -Greedy action tensor([-0.0458, 1.0015, -0.2040, -0.8705]) tensor([0.1945, 0.5543, 0.1660, 0.0853]) -Greedy action tensor([-1.4320, -0.7238, -0.6600, -1.0927]) tensor([0.1516, 0.3077, 0.3280, 0.2128]) -Greedy action tensor([ 0.2957, -0.8110, 0.1499, -0.1858]) tensor([0.3555, 0.1176, 0.3073, 0.2197]) -Greedy action tensor([ 0.8995, -1.6251, 1.0175, -0.4035]) tensor([0.4037, 0.0323, 0.4543, 0.1097]) -Greedy action tensor([-0.2538, -0.9013, 0.0498, -0.2327]) tensor([0.2565, 0.1342, 0.3474, 0.2619]) -Greedy action tensor([ 0.5848, 0.0628, 0.3010, -0.2139]) tensor([0.3576, 0.2122, 0.2693, 0.1609]) -Greedy action tensor([-0.7387, -0.2973, 0.0647, -1.1088]) tensor([0.1825, 0.2838, 0.4076, 0.1261]) -Greedy action tensor([ 1.6319, -0.7925, -0.6472, 0.5162]) tensor([0.6585, 0.0583, 0.0674, 0.2158]) -Greedy action tensor([ 1.5387, -0.7321, -0.6903, 0.5668]) tensor([0.6292, 0.0650, 0.0677, 0.2381]) -Greedy action tensor([ 1.2418, 0.0078, -0.7225, 0.4360]) tensor([0.5325, 0.1550, 0.0747, 0.2379]) -Greedy action tensor([ 1.2577, -0.6820, -0.7572, -0.3314]) tensor([0.6751, 0.0970, 0.0900, 0.1378]) -Greedy action tensor([ 0.8600, -0.2437, -0.4637, 0.2748]) tensor([0.4641, 0.1539, 0.1235, 0.2585]) -Greedy action tensor([ 1.6391, -1.0724, -0.1796, 0.3539]) tensor([0.6643, 0.0441, 0.1078, 0.1838]) -Greedy action tensor([ 1.6098, -0.7353, -0.2312, 0.1379]) tensor([0.6738, 0.0646, 0.1069, 0.1547]) -Greedy action tensor([ 1.5606, -0.0804, -0.8804, 0.1554]) tensor([0.6552, 0.1270, 0.0571, 0.1607]) -Greedy action tensor([ 1.1994, -0.3622, -0.1188, -0.0962]) tensor([0.5710, 0.1198, 0.1528, 0.1563]) -Greedy action tensor([ 1.7930, -0.4287, -0.5970, 0.3352]) tensor([0.6979, 0.0757, 0.0640, 0.1624]) -Greedy action tensor([ 1.6707, -0.8315, -0.4444, -0.0038]) tensor([0.7195, 0.0589, 0.0868, 0.1348]) -Greedy action tensor([ 0.8889, -0.3257, -0.3518, 0.3410]) tensor([0.4621, 0.1372, 0.1336, 0.2672]) -Greedy action tensor([ 2.5381, -1.1513, -0.1539, 1.1552]) tensor([0.7443, 0.0186, 0.0504, 0.1867]) -Greedy action tensor([ 1.2273, -0.2347, -0.9182, 0.4328]) tensor([0.5554, 0.1287, 0.0650, 0.2509]) -Greedy action tensor([ 1.2102, -0.5377, -0.4926, 0.1906]) tensor([0.5824, 0.1014, 0.1061, 0.2101]) -Greedy action tensor([ 1.3893, -0.2636, -0.1497, 0.2926]) tensor([0.5747, 0.1101, 0.1233, 0.1919]) -Greedy action tensor([ 1.2267, -0.1400, -1.1701, 0.7229]) tensor([0.5128, 0.1307, 0.0467, 0.3098]) -Greedy action tensor([ 1.4036, -0.3898, -0.2482, 0.0339]) tensor([0.6203, 0.1032, 0.1189, 0.1576]) -Greedy action tensor([ 1.3572, -0.2231, -0.6982, 0.0914]) tensor([0.6188, 0.1274, 0.0792, 0.1745]) -Greedy action tensor([ 1.4991, -0.0831, -0.9049, 0.1100]) tensor([0.6472, 0.1330, 0.0585, 0.1613]) -Greedy action tensor([ 1.2791, -0.2078, -0.0730, 0.1898]) tensor([0.5491, 0.1241, 0.1420, 0.1847]) -Greedy action tensor([ 1.4330, -0.3941, -0.3141, 0.1225]) tensor([0.6231, 0.1002, 0.1086, 0.1680]) -Greedy action tensor([ 1.6958, -0.4217, -0.9837, 0.1833]) tensor([0.7096, 0.0854, 0.0487, 0.1564]) -Greedy action tensor([ 1.2521, -0.1509, -0.3760, 0.2878]) tensor([0.5484, 0.1348, 0.1077, 0.2091]) -Greedy action tensor([ 1.6983, -0.3316, -0.2789, 0.3448]) tensor([0.6544, 0.0860, 0.0906, 0.1691]) -Greedy action tensor([ 0.6406, -0.5202, -0.2082, 0.2682]) tensor([0.4115, 0.1289, 0.1761, 0.2835]) -Greedy action tensor([ 1.3253, -0.3487, -0.7157, 0.3536]) tensor([0.5897, 0.1106, 0.0766, 0.2232]) -Greedy action tensor([ 1.4978, -0.3278, -0.5206, 0.1241]) tensor([0.6463, 0.1041, 0.0859, 0.1636]) -Greedy action tensor([ 1.6110, -0.5918, -0.1988, 0.4787]) tensor([0.6264, 0.0692, 0.1025, 0.2019]) -Greedy action tensor([ 1.4719, -0.3771, -0.8944, 0.6549]) tensor([0.5907, 0.0930, 0.0554, 0.2609]) -Greedy action tensor([ 1.4560, 0.0489, -0.6560, 0.4117]) tensor([0.5821, 0.1425, 0.0704, 0.2049]) -Greedy action tensor([ 1.4993, 0.1263, -1.1995, -0.0122]) tensor([0.6488, 0.1644, 0.0437, 0.1431]) -Greedy action tensor([ 1.7843, -0.6868, -0.6331, 0.2458]) tensor([0.7203, 0.0609, 0.0642, 0.1547]) -Greedy action tensor([ 1.4785, -0.5860, -0.5089, 0.2044]) tensor([0.6478, 0.0822, 0.0888, 0.1812]) -Greedy action tensor([ 1.8489, -0.5781, 0.0636, 0.2843]) tensor([0.6825, 0.0603, 0.1145, 0.1428]) -Greedy action tensor([ 0.8943, -0.7420, -0.1134, -0.0535]) tensor([0.5135, 0.1000, 0.1875, 0.1990]) -Greedy action tensor([ 1.7361, -0.9707, -0.7276, 0.1632]) tensor([0.7357, 0.0491, 0.0626, 0.1526]) -Greedy action tensor([2.1490, 0.5954, 0.0053, 0.1597]) tensor([0.6824, 0.1443, 0.0800, 0.0933]) -Greedy action tensor([ 1.3061, -0.2988, -0.2958, 0.3962]) tensor([0.5540, 0.1113, 0.1116, 0.2230]) -Greedy action tensor([ 0.8086, 0.1527, -0.0942, -0.0033]) tensor([0.4222, 0.2191, 0.1712, 0.1875]) -Greedy action tensor([ 1.2910, -0.0327, -0.9233, 0.3938]) tensor([0.5608, 0.1493, 0.0613, 0.2286]) -Greedy action tensor([ 1.6786, -0.6031, -0.6376, -0.0378]) tensor([0.7244, 0.0740, 0.0715, 0.1302]) -Greedy action tensor([ 1.7410, -0.2300, -0.4242, 0.3213]) tensor([0.6685, 0.0931, 0.0767, 0.1616]) -Greedy action tensor([ 1.1616, -0.4910, -0.4267, 0.3743]) tensor([0.5403, 0.1035, 0.1104, 0.2459]) -Greedy action tensor([ 1.8365, -0.5697, -0.4240, 0.1521]) tensor([0.7246, 0.0653, 0.0756, 0.1345]) -Greedy action tensor([ 1.5995, 0.0129, 0.0097, -0.2750]) tensor([0.6402, 0.1310, 0.1306, 0.0982]) -Greedy action tensor([ 1.5059, -0.7252, -0.5365, 0.6341]) tensor([0.6041, 0.0649, 0.0784, 0.2526]) -Greedy action tensor([ 1.2553, -0.3846, -0.5727, 0.1033]) tensor([0.5985, 0.1161, 0.0962, 0.1891]) -Greedy action tensor([ 1.5112, 0.0420, -0.7615, 0.2899]) tensor([0.6142, 0.1413, 0.0633, 0.1811]) -Greedy action tensor([ 1.9908, -0.4538, -0.0103, 0.6749]) tensor([0.6711, 0.0582, 0.0907, 0.1800]) -Greedy action tensor([ 1.3249, -0.6169, -0.6297, 0.4945]) tensor([0.5811, 0.0833, 0.0823, 0.2533]) -Greedy action tensor([ 1.1943, -0.4672, -0.7436, 0.5383]) tensor([0.5397, 0.1025, 0.0777, 0.2801]) -Greedy action tensor([ 2.2578, 0.7036, 0.7467, -0.3457]) tensor([0.6640, 0.1403, 0.1465, 0.0491]) -Greedy action tensor([ 0.4706, -0.2451, 0.1613, -0.2185]) tensor([0.3670, 0.1794, 0.2693, 0.1843]) -Greedy action tensor([ 1.9250, -0.6731, -0.1841, 0.7075]) tensor([0.6704, 0.0499, 0.0813, 0.1984]) -Greedy action tensor([ 1.7038, -0.2669, -0.1805, 0.3181]) tensor([0.6487, 0.0904, 0.0986, 0.1623]) -Greedy action tensor([ 1.5509, -0.9556, -0.4009, 0.6158]) tensor([0.6188, 0.0505, 0.0879, 0.2429]) -Greedy action tensor([ 1.0300, -0.2873, -0.5234, 0.5024]) tensor([0.4832, 0.1294, 0.1022, 0.2851]) -Greedy action tensor([ 1.6100, -0.6718, -0.4207, 0.3545]) tensor([0.6586, 0.0673, 0.0864, 0.1877]) -Greedy action tensor([ 1.5007, -0.5185, -0.6211, 0.6487]) tensor([0.5955, 0.0791, 0.0714, 0.2540]) -Greedy action tensor([ 1.3191, 0.0317, -0.4704, 0.0400]) tensor([0.5810, 0.1603, 0.0970, 0.1617]) -Greedy action tensor([ 2.5481, -1.3605, -0.0319, 0.9756]) tensor([0.7672, 0.0154, 0.0581, 0.1592]) -Greedy action tensor([ 1.2631, -0.5373, -0.4881, 0.1609]) tensor([0.5985, 0.0989, 0.1039, 0.1988]) -Greedy action tensor([ 1.7902, -0.2297, -0.5577, -0.0273]) tensor([0.7191, 0.0954, 0.0687, 0.1168]) -Greedy action tensor([ 2.0900, -0.7236, -0.4586, 0.2971]) tensor([0.7665, 0.0460, 0.0599, 0.1276]) -Greedy action tensor([ 1.6940, -0.6739, -0.5742, 0.1035]) tensor([0.7138, 0.0669, 0.0739, 0.1455]) -Greedy action tensor([ 1.2202, -0.4529, -1.0447, 0.0772]) tensor([0.6210, 0.1165, 0.0645, 0.1980]) -Greedy action tensor([ 1.2599, -0.4068, -0.5436, 0.1387]) tensor([0.5954, 0.1125, 0.0981, 0.1940]) -Greedy action tensor([ 1.5437, -0.0021, -0.8447, 0.4520]) tensor([0.6095, 0.1299, 0.0559, 0.2046]) -Greedy action tensor([ 1.8405, -1.0212, -0.5534, 0.1394]) tensor([0.7513, 0.0430, 0.0686, 0.1371]) -Greedy action tensor([ 1.6994, -0.3542, -0.6066, -0.0264]) tensor([0.7113, 0.0912, 0.0709, 0.1266]) -Greedy action tensor([ 1.4438, -0.4795, -0.1937, -0.0147]) tensor([0.6357, 0.0929, 0.1236, 0.1478]) -Greedy action tensor([ 1.1923, -0.0724, -0.3403, 0.1169]) tensor([0.5436, 0.1535, 0.1174, 0.1855]) -Greedy action tensor([ 1.3056, -0.4666, -0.0422, 0.2446]) tensor([0.5631, 0.0957, 0.1463, 0.1949]) -Greedy action tensor([ 1.9250, 0.0996, -0.2236, -0.2913]) tensor([0.7211, 0.1162, 0.0841, 0.0786]) -Greedy action tensor([ 1.4281, -0.3439, -0.9847, 0.5976]) tensor([0.5898, 0.1003, 0.0528, 0.2571]) -Greedy action tensor([ 1.8303, -1.1796, -0.2942, -0.1324]) tensor([0.7638, 0.0377, 0.0913, 0.1073]) -Greedy action tensor([ 2.1584, -1.8382, -0.0090, 0.3261]) tensor([0.7735, 0.0142, 0.0885, 0.1238]) -Greedy action tensor([ 2.1007, -0.5243, -0.7456, 0.3353]) tensor([0.7683, 0.0557, 0.0446, 0.1315]) -Greedy action tensor([ 1.9765, -0.3833, -0.0176, 0.0886]) tensor([0.7236, 0.0683, 0.0985, 0.1095]) -Greedy action tensor([ 1.0642, -0.1925, -0.2341, 0.2809]) tensor([0.4964, 0.1413, 0.1355, 0.2268]) -Greedy action tensor([0.4890, 0.1195, 0.0587, 0.0751]) tensor([0.3331, 0.2302, 0.2166, 0.2202]) -Greedy action tensor([ 0.8877, -0.5194, -0.1501, -0.4893]) tensor([0.5401, 0.1322, 0.1913, 0.1363]) -Greedy action tensor([ 1.1719, -0.8280, 0.0358, -0.5735]) tensor([0.6131, 0.0830, 0.1969, 0.1070]) -Greedy action tensor([ 0.8754, -0.4570, -0.1563, -0.2494]) tensor([0.5142, 0.1357, 0.1832, 0.1669]) -Greedy action tensor([ 0.7134, -0.2472, -0.0988, -0.2125]) tensor([0.4499, 0.1722, 0.1997, 0.1782]) -Greedy action tensor([ 1.1620, -0.6803, -0.0180, -0.5914]) tensor([0.6102, 0.0967, 0.1875, 0.1057]) -Greedy action tensor([ 1.0081, -0.6575, -0.0653, -0.5691]) tensor([0.5755, 0.1088, 0.1968, 0.1189]) -Greedy action tensor([ 0.8925, -0.4151, 0.1270, -0.2193]) tensor([0.4844, 0.1310, 0.2253, 0.1593]) -Greedy action tensor([ 0.9256, -0.6998, -0.0390, -0.4515]) tensor([0.5464, 0.1075, 0.2082, 0.1379]) -Greedy action tensor([ 1.0142, -0.8509, -0.1544, -0.4185]) tensor([0.5867, 0.0909, 0.1824, 0.1400]) -Greedy action tensor([ 0.7024, -0.5084, -0.0342, -0.2383]) tensor([0.4615, 0.1375, 0.2209, 0.1801]) -Greedy action tensor([ 1.3813, -0.7001, -0.0999, -0.4249]) tensor([0.6595, 0.0823, 0.1499, 0.1083]) -Greedy action tensor([ 0.4575, -0.2446, -0.0192, -0.2605]) tensor([0.3840, 0.1903, 0.2384, 0.1873]) -Greedy action tensor([ 0.6711, -0.0487, -0.0964, -0.2042]) tensor([0.4224, 0.2056, 0.1960, 0.1760]) -Greedy action tensor([ 0.6488, 0.1927, -0.1646, 0.1058]) tensor([0.3762, 0.2384, 0.1668, 0.2186]) -Greedy action tensor([ 0.5798, -0.4867, -0.0887, -0.1410]) tensor([0.4268, 0.1469, 0.2187, 0.2076]) -Greedy action tensor([ 0.9827, -0.4936, -0.2718, -0.3437]) tensor([0.5621, 0.1284, 0.1603, 0.1492]) -Greedy action tensor([ 0.8319, -0.4135, -0.1002, -0.1146]) tensor([0.4832, 0.1391, 0.1902, 0.1875]) -Greedy action tensor([ 0.4403, -0.0729, -0.0334, -0.0909]) tensor([0.3560, 0.2131, 0.2217, 0.2093]) -Greedy action tensor([ 1.0440, -0.7215, -0.0835, -0.2927]) tensor([0.5689, 0.0973, 0.1843, 0.1495]) -Greedy action tensor([ 0.7648, -0.4349, -0.0413, -0.5722]) tensor([0.4974, 0.1498, 0.2221, 0.1306]) -Greedy action tensor([ 0.4872, -0.1818, -0.0617, -0.0706]) tensor([0.3756, 0.1924, 0.2169, 0.2150]) -Greedy action tensor([ 1.3511, -0.6828, -0.0789, -0.7690]) tensor([0.6711, 0.0878, 0.1606, 0.0805]) -Greedy action tensor([ 0.6058, -0.5917, -0.0899, -0.2035]) tensor([0.4453, 0.1344, 0.2221, 0.1982]) -Greedy action tensor([ 1.0491, -0.7830, -0.0030, -0.6535]) tensor([0.5912, 0.0946, 0.2065, 0.1077]) -Greedy action tensor([ 0.5526, -0.0099, -0.0923, 0.0120]) tensor([0.3736, 0.2129, 0.1960, 0.2176]) -Greedy action tensor([ 0.3704, 0.1986, -0.1012, 0.1007]) tensor([0.3096, 0.2607, 0.1932, 0.2364]) -Greedy action tensor([ 0.8533, -0.3473, -0.0655, -0.2758]) tensor([0.4942, 0.1488, 0.1972, 0.1598]) -Greedy action tensor([ 0.4550, -0.1030, -0.0052, 0.0151]) tensor([0.3512, 0.2010, 0.2217, 0.2262]) -Greedy action tensor([ 0.8349, -0.6962, -0.0069, -0.4593]) tensor([0.5205, 0.1126, 0.2243, 0.1427]) -Greedy action tensor([ 0.7741, -0.4914, 0.1624, -0.0976]) tensor([0.4459, 0.1258, 0.2418, 0.1865]) -Greedy action tensor([ 0.9407, -0.4867, -0.1847, -0.3108]) tensor([0.5404, 0.1297, 0.1754, 0.1546]) -Greedy action tensor([ 0.7275, 0.1347, -0.0683, 0.1096]) tensor([0.3932, 0.2174, 0.1774, 0.2120]) -Greedy action tensor([ 0.9457, -0.5430, -0.0563, -0.4081]) tensor([0.5402, 0.1219, 0.1983, 0.1395]) -Greedy action tensor([ 0.6466, -0.4791, -0.1806, -0.7592]) tensor([0.4983, 0.1617, 0.2179, 0.1222]) -Greedy action tensor([ 0.6177, -0.2159, -0.0490, -0.1360]) tensor([0.4135, 0.1797, 0.2123, 0.1946]) -Greedy action tensor([ 0.3026, 0.2420, -0.1023, 0.0490]) tensor([0.2955, 0.2781, 0.1971, 0.2293]) -Greedy action tensor([ 1.1861, -0.8038, 0.0146, -0.6822]) tensor([0.6246, 0.0854, 0.1936, 0.0964]) -Greedy action tensor([ 0.2339, 0.1062, -0.0605, -0.3733]) tensor([0.3155, 0.2776, 0.2350, 0.1719]) -Greedy action tensor([ 0.8105, -0.7260, 0.1043, -0.2324]) tensor([0.4852, 0.1044, 0.2394, 0.1710]) -Greedy action tensor([ 0.5621, -0.1959, 0.0127, -0.4838]) tensor([0.4171, 0.1955, 0.2408, 0.1466]) -Greedy action tensor([ 0.5264, -0.3300, -0.1837, -0.0302]) tensor([0.4017, 0.1706, 0.1975, 0.2302]) -Greedy action tensor([ 1.1929, -0.7259, 0.0580, -0.5287]) tensor([0.6072, 0.0891, 0.1952, 0.1085]) -Greedy action tensor([ 0.7612, -0.5351, 0.0784, -0.2322]) tensor([0.4653, 0.1273, 0.2351, 0.1723]) -Greedy action tensor([ 1.3081, -0.3983, 0.0250, -0.4678]) tensor([0.6142, 0.1115, 0.1703, 0.1040]) -Greedy action tensor([ 0.8365, -0.1708, -0.0353, -0.0218]) tensor([0.4530, 0.1655, 0.1895, 0.1920]) -Greedy action tensor([ 1.2966, -1.0507, 0.1481, -0.5542]) tensor([0.6370, 0.0609, 0.2020, 0.1001]) -Greedy action tensor([ 0.8232, 0.0216, 0.0399, -0.2074]) tensor([0.4420, 0.1983, 0.2020, 0.1577]) -Greedy action tensor([ 0.6540, -0.5243, -0.1501, -0.4328]) tensor([0.4779, 0.1471, 0.2138, 0.1612]) -Greedy action tensor([ 0.6878, -0.2472, 0.0060, -0.1569]) tensor([0.4296, 0.1686, 0.2172, 0.1846]) -Greedy action tensor([ 0.7612, -0.3635, -0.0596, -0.2613]) tensor([0.4707, 0.1529, 0.2071, 0.1693]) -Greedy action tensor([ 0.5731, -0.3647, -0.1144, -0.2155]) tensor([0.4257, 0.1667, 0.2141, 0.1935]) -Greedy action tensor([ 0.9472, -0.6013, 0.0400, -0.4990]) tensor([0.5400, 0.1148, 0.2180, 0.1272]) -Greedy action tensor([ 1.2992, -0.8556, 0.0625, -0.8716]) tensor([0.6577, 0.0762, 0.1910, 0.0750]) -Greedy action tensor([ 0.9124, -0.4835, -0.0213, -0.4253]) tensor([0.5254, 0.1301, 0.2066, 0.1379]) -Greedy action tensor([ 0.7127, -0.5101, -0.0845, -0.2722]) tensor([0.4720, 0.1390, 0.2127, 0.1763]) -Greedy action tensor([ 0.7855, -0.7585, 0.1602, -0.3898]) tensor([0.4861, 0.1038, 0.2601, 0.1501]) -Greedy action tensor([ 1.1498, -0.6563, -0.1551, -0.4957]) tensor([0.6141, 0.1009, 0.1665, 0.1185]) -Greedy action tensor([ 0.6413, -0.3486, -0.1232, -0.4598]) tensor([0.4609, 0.1713, 0.2146, 0.1532]) -Greedy action tensor([ 0.8392, -0.2959, -0.0872, -0.3009]) tensor([0.4909, 0.1578, 0.1944, 0.1570]) -Greedy action tensor([ 0.3744, 0.1549, 0.1882, -0.1788]) tensor([0.3117, 0.2503, 0.2587, 0.1793]) -Greedy action tensor([ 0.0083, 0.3031, -0.1669, -0.6249]) tensor([0.2693, 0.3617, 0.2260, 0.1430]) -Greedy action tensor([ 0.9859, -0.3334, -0.0367, -0.2061]) tensor([0.5180, 0.1385, 0.1863, 0.1573]) -Greedy action tensor([ 0.7235, -0.3699, 0.0347, -0.6220]) tensor([0.4767, 0.1597, 0.2394, 0.1241]) -Greedy action tensor([ 0.5080, 0.0503, -0.1028, -0.1274]) tensor([0.3696, 0.2339, 0.2007, 0.1958]) -Greedy action tensor([ 0.9916, -0.9218, -0.0732, -0.3742]) tensor([0.5722, 0.0844, 0.1973, 0.1460]) -Greedy action tensor([ 0.7830, -0.5065, 0.1838, -0.2237]) tensor([0.4566, 0.1258, 0.2508, 0.1668]) -Greedy action tensor([ 0.7863, -0.6883, 0.1855, -0.3285]) tensor([0.4750, 0.1087, 0.2605, 0.1558]) -Greedy action tensor([ 0.9151, -0.5991, -0.1608, -0.3004]) tensor([0.5383, 0.1184, 0.1836, 0.1597]) -Greedy action tensor([ 0.3629, -0.0733, -0.0728, -0.1957]) tensor([0.3490, 0.2256, 0.2257, 0.1996]) -Greedy action tensor([ 0.8239, -0.1385, 0.1889, -0.2259]) tensor([0.4421, 0.1689, 0.2343, 0.1547]) -Greedy action tensor([ 0.6308, -0.4151, -0.0591, -0.1380]) tensor([0.4317, 0.1517, 0.2165, 0.2001]) -Greedy action tensor([ 0.5609, -0.4355, -0.1679, -0.0442]) tensor([0.4171, 0.1540, 0.2012, 0.2277]) -Greedy action tensor([ 0.8551, -0.3779, -0.1674, -0.1910]) tensor([0.4994, 0.1455, 0.1796, 0.1754]) -Greedy action tensor([ 0.7574, 0.0896, -0.0430, 0.1360]) tensor([0.4001, 0.2052, 0.1797, 0.2149]) -Greedy action tensor([ 0.5108, 0.0505, -0.0140, -0.0353]) tensor([0.3569, 0.2252, 0.2112, 0.2067]) -Greedy action tensor([ 1.1891, -0.9023, 0.0930, -0.6343]) tensor([0.6176, 0.0763, 0.2064, 0.0997]) -Greedy action tensor([ 1.2901, -0.6772, 0.0360, -0.9871]) tensor([0.6546, 0.0915, 0.1868, 0.0671]) -Greedy action tensor([ 0.1834, -0.1220, -0.0565, -0.5227]) tensor([0.3314, 0.2442, 0.2608, 0.1636]) -Greedy action tensor([ 1.2560, -0.6492, 0.0854, -0.7902]) tensor([0.6296, 0.0937, 0.1953, 0.0814]) -Greedy action tensor([ 0.9591, -0.3709, -0.0367, -0.2314]) tensor([0.5160, 0.1365, 0.1906, 0.1569]) -Greedy action tensor([-1.6009, 0.7223, 0.4993, -0.4172]) tensor([0.0442, 0.4508, 0.3607, 0.1443]) -Greedy action tensor([-1.5492, -0.2348, 0.5248, 0.0919]) tensor([0.0561, 0.2087, 0.4460, 0.2893]) -Greedy action tensor([-1.4760, -0.4857, 0.4504, -0.0787]) tensor([0.0685, 0.1844, 0.4702, 0.2770]) -Greedy action tensor([-1.5283, 0.4647, 0.3254, 0.0444]) tensor([0.0512, 0.3755, 0.3267, 0.2466]) -Greedy action tensor([-1.6962, -0.0045, 0.5046, -0.1168]) tensor([0.0492, 0.2673, 0.4446, 0.2389]) -Greedy action tensor([-1.8026, -0.4917, 0.5981, -0.1151]) tensor([0.0473, 0.1754, 0.5216, 0.2557]) -Greedy action tensor([-1.5554, -0.1341, 0.5596, 0.0881]) tensor([0.0538, 0.2227, 0.4455, 0.2781]) -Greedy action tensor([-1.6066, -0.3556, 0.5760, 0.2655]) tensor([0.0503, 0.1759, 0.4465, 0.3273]) -Greedy action tensor([-1.8089, -0.3837, 0.5901, -0.1229]) tensor([0.0464, 0.1928, 0.5106, 0.2503]) -Greedy action tensor([-1.8707, -0.4512, 0.6354, -0.1420]) tensor([0.0434, 0.1796, 0.5323, 0.2447]) -Greedy action tensor([-1.8996, -0.4209, 0.6528, -0.1609]) tensor([0.0418, 0.1835, 0.5368, 0.2379]) -Greedy action tensor([-0.9022, 0.0175, -0.2498, 0.1139]) tensor([0.1221, 0.3063, 0.2344, 0.3372]) -Greedy action tensor([-1.8916, -0.3511, 0.6365, -0.1494]) tensor([0.0418, 0.1952, 0.5241, 0.2388]) -Greedy action tensor([-1.8767, -0.4626, 0.6829, -0.1220]) tensor([0.0420, 0.1726, 0.5427, 0.2427]) -Greedy action tensor([-1.8172, -0.4027, 0.6012, -0.1580]) tensor([0.0463, 0.1905, 0.5199, 0.2433]) -Greedy action tensor([-0.3857, -0.3182, 0.2526, 0.3318]) tensor([0.1663, 0.1779, 0.3149, 0.3409]) -Greedy action tensor([-1.7212, -0.4955, 0.6093, -0.0047]) tensor([0.0494, 0.1682, 0.5077, 0.2748]) -Greedy action tensor([-1.9112, -0.4037, 0.6483, -0.1614]) tensor([0.0413, 0.1866, 0.5343, 0.2378]) -Greedy action tensor([-1.8959, -0.4072, 0.6459, -0.1577]) tensor([0.0420, 0.1860, 0.5333, 0.2387]) -Greedy action tensor([-1.8623, -0.4352, 0.6282, -0.1404]) tensor([0.0438, 0.1825, 0.5286, 0.2451]) -Greedy action tensor([-1.9254, -0.4430, 0.6694, -0.1667]) tensor([0.0406, 0.1790, 0.5444, 0.2360]) -Greedy action tensor([-1.8839, -0.4358, 0.6330, -0.1558]) tensor([0.0430, 0.1828, 0.5323, 0.2419]) -Greedy action tensor([-0.2267, 1.0814, 0.0717, 0.0668]) tensor([0.1354, 0.5007, 0.1824, 0.1815]) -Greedy action tensor([-1.7925, -0.2640, 0.5607, -0.1144]) tensor([0.0465, 0.2146, 0.4896, 0.2493]) -Greedy action tensor([-1.9273, -0.4538, 0.6596, -0.1703]) tensor([0.0409, 0.1785, 0.5436, 0.2370]) -Greedy action tensor([-1.6892, -0.3401, 0.6668, 0.0258]) tensor([0.0477, 0.1839, 0.5033, 0.2651]) -Greedy action tensor([-1.8550, -0.2841, 0.5981, -0.1203]) tensor([0.0433, 0.2082, 0.5032, 0.2453]) -Greedy action tensor([-1.9285, -0.4630, 0.6661, -0.1716]) tensor([0.0408, 0.1766, 0.5462, 0.2364]) -Greedy action tensor([-1.7847, -0.4495, 0.5943, -0.0987]) tensor([0.0476, 0.1811, 0.5142, 0.2571]) -Greedy action tensor([-1.7792, -0.4189, 0.5707, -0.0901]) tensor([0.0481, 0.1874, 0.5041, 0.2604]) -Greedy action tensor([-1.9056, -0.4060, 0.6450, -0.1597]) tensor([0.0416, 0.1865, 0.5334, 0.2385]) -Greedy action tensor([-1.7500, -0.3806, 0.5599, -0.1066]) tensor([0.0496, 0.1949, 0.4992, 0.2563]) -Greedy action tensor([-1.9229, -0.4352, 0.6570, -0.1677]) tensor([0.0410, 0.1814, 0.5406, 0.2370]) -Greedy action tensor([-1.8770, -0.4383, 0.6330, -0.1474]) tensor([0.0432, 0.1820, 0.5313, 0.2435]) -Greedy action tensor([-1.8261, -0.0459, 0.5502, -0.0595]) tensor([0.0425, 0.2519, 0.4572, 0.2485]) -Greedy action tensor([-1.8984, -0.4581, 0.6490, -0.1593]) tensor([0.0422, 0.1782, 0.5393, 0.2403]) -Greedy action tensor([-1.7117, 0.1045, 0.5155, -0.0690]) tensor([0.0463, 0.2848, 0.4295, 0.2394]) -Greedy action tensor([-1.5606, -0.5417, 0.5213, -0.1906]) tensor([0.0636, 0.1762, 0.5100, 0.2503]) -Greedy action tensor([-1.6239, -0.4490, 0.5324, -0.0729]) tensor([0.0568, 0.1840, 0.4910, 0.2681]) -Greedy action tensor([-1.9281, -0.4250, 0.6596, -0.1710]) tensor([0.0407, 0.1828, 0.5408, 0.2357]) -Greedy action tensor([-1.9362, -0.4481, 0.6690, -0.1698]) tensor([0.0403, 0.1785, 0.5454, 0.2358]) -Greedy action tensor([-1.9098, -0.4355, 0.6526, -0.1600]) tensor([0.0415, 0.1813, 0.5383, 0.2388]) -Greedy action tensor([-1.8972, -0.4313, 0.6422, -0.1592]) tensor([0.0422, 0.1828, 0.5349, 0.2400]) -Greedy action tensor([-1.7979, -0.4156, 0.5997, -0.0929]) tensor([0.0466, 0.1855, 0.5119, 0.2561]) -Greedy action tensor([-1.9060, -0.4470, 0.6499, -0.1536]) tensor([0.0417, 0.1796, 0.5378, 0.2408]) -Greedy action tensor([-1.8771, -0.4429, 0.6337, -0.1504]) tensor([0.0432, 0.1814, 0.5324, 0.2430]) -Greedy action tensor([-1.7497, -0.4406, 0.5809, -0.0680]) tensor([0.0491, 0.1819, 0.5051, 0.2640]) -Greedy action tensor([-1.8587, -0.2553, 0.5992, -0.1425]) tensor([0.0431, 0.2141, 0.5032, 0.2397]) -Greedy action tensor([-1.9047, -0.4506, 0.6523, -0.1581]) tensor([0.0418, 0.1790, 0.5393, 0.2398]) -Greedy action tensor([-0.4437, 0.9448, 0.0362, 0.0615]) tensor([0.1207, 0.4840, 0.1951, 0.2001]) -Greedy action tensor([-1.4796, -0.0945, 0.3993, -0.0344]) tensor([0.0634, 0.2531, 0.4147, 0.2688]) -Greedy action tensor([-1.7095, -0.3361, 0.6572, 0.0920]) tensor([0.0461, 0.1822, 0.4921, 0.2796]) -Greedy action tensor([-1.4019, -0.5540, 0.4100, -0.0681]) tensor([0.0755, 0.1762, 0.4620, 0.2864]) -Greedy action tensor([-1.7587, -0.4408, 0.5804, -0.0771]) tensor([0.0488, 0.1824, 0.5064, 0.2624]) -Greedy action tensor([-1.9099, -0.4513, 0.6477, -0.1682]) tensor([0.0418, 0.1798, 0.5397, 0.2387]) -Greedy action tensor([-1.9130, -0.4139, 0.6407, -0.1581]) tensor([0.0415, 0.1857, 0.5331, 0.2398]) -Greedy action tensor([-1.9195, -0.4001, 0.6514, -0.1676]) tensor([0.0410, 0.1872, 0.5357, 0.2362]) -Greedy action tensor([-1.4464, -0.5220, 0.4391, 0.0936]) tensor([0.0677, 0.1706, 0.4460, 0.3157]) -Greedy action tensor([-1.8767, -0.4577, 0.6171, -0.1674]) tensor([0.0439, 0.1816, 0.5318, 0.2427]) -Greedy action tensor([-1.8232, -0.0985, 0.5641, -0.0997]) tensor([0.0433, 0.2429, 0.4712, 0.2426]) -Greedy action tensor([-1.2615, -0.4140, 0.3714, 0.2970]) tensor([0.0757, 0.1768, 0.3876, 0.3599]) -Greedy action tensor([-1.9248, -0.4095, 0.6560, -0.1699]) tensor([0.0407, 0.1854, 0.5382, 0.2356]) -Greedy action tensor([-1.8885, -0.2783, 0.6070, -0.1417]) tensor([0.0419, 0.2097, 0.5081, 0.2403]) -Greedy action tensor([-1.8947, -0.3206, 0.6293, -0.1578]) tensor([0.0417, 0.2012, 0.5203, 0.2368]) -Greedy action tensor([-1.8697, -0.4825, 0.6330, -0.1471]) tensor([0.0438, 0.1755, 0.5353, 0.2454]) -Greedy action tensor([-1.7752, 0.0861, 0.4909, -0.0698]) tensor([0.0443, 0.2849, 0.4270, 0.2438]) -Greedy action tensor([-1.8523, -0.4228, 0.6165, -0.1344]) tensor([0.0443, 0.1851, 0.5235, 0.2470]) -Greedy action tensor([-1.0768e+00, 6.0802e-01, 1.9476e-01, 1.0994e-04]) tensor([0.0776, 0.4182, 0.2766, 0.2277]) -Greedy action tensor([-1.0057, -0.3229, 0.3606, 0.4725]) tensor([0.0886, 0.1754, 0.3474, 0.3886]) -Greedy action tensor([-1.8830, -0.4534, 0.6410, -0.1469]) tensor([0.0429, 0.1790, 0.5349, 0.2432]) -Greedy action tensor([-1.8546, -0.4351, 0.6215, -0.1383]) tensor([0.0443, 0.1830, 0.5265, 0.2463]) -Greedy action tensor([-1.9381, -0.4520, 0.6664, -0.1741]) tensor([0.0404, 0.1784, 0.5458, 0.2355]) -Greedy action tensor([-1.3469, -0.4217, 0.4493, -0.1335]) tensor([0.0774, 0.1953, 0.4667, 0.2606]) -Greedy action tensor([-1.4499, 0.5787, 0.2736, 0.1370]) tensor([0.0524, 0.3982, 0.2935, 0.2560]) -Greedy action tensor([-1.6913, -0.4866, 0.5483, -0.0603]) tensor([0.0531, 0.1771, 0.4985, 0.2713]) -Greedy action tensor([-1.9470, -0.4423, 0.6647, -0.1822]) tensor([0.0401, 0.1804, 0.5456, 0.2339]) -Greedy action tensor([-1.8057, -0.0737, 0.5393, -0.0737]) tensor([0.0440, 0.2486, 0.4589, 0.2486]) -Greedy action tensor([-1.8766, -0.4751, 0.6295, -0.1470]) tensor([0.0436, 0.1769, 0.5339, 0.2456]) -Greedy action tensor([-1.9232, -0.4133, 0.6572, -0.1693]) tensor([0.0408, 0.1847, 0.5387, 0.2357]) -Greedy action tensor([-1.3593, -0.0469, 0.4200, -0.1410]) tensor([0.0713, 0.2649, 0.4226, 0.2411]) -Greedy action tensor([-1.9338, -0.4586, 0.6725, -0.1655]) tensor([0.0404, 0.1764, 0.5467, 0.2365]) -Greedy action tensor([ 0.3670, 0.7146, 0.8676, -0.1470]) tensor([0.2144, 0.3036, 0.3537, 0.1283]) -Greedy action tensor([-0.8543, -0.5539, -0.2733, -0.9633]) tensor([0.1986, 0.2682, 0.3551, 0.1781]) -Greedy action tensor([-0.7030, -1.2684, 0.2388, -0.6357]) tensor([0.1922, 0.1092, 0.4930, 0.2056]) -Greedy action tensor([ 0.9385, -0.7350, 0.6517, -0.2553]) tensor([0.4462, 0.0837, 0.3349, 0.1352]) -Greedy action tensor([ 0.7459, -1.3052, 0.3582, -0.6722]) tensor([0.4879, 0.0627, 0.3311, 0.1182]) -Greedy action tensor([ 0.0313, 0.0863, -0.4087, -0.0873]) tensor([0.2786, 0.2944, 0.1795, 0.2475]) -Greedy action tensor([-0.2575, -0.5584, -0.9469, -0.2219]) tensor([0.3050, 0.2258, 0.1531, 0.3161]) -Greedy action tensor([ 1.2364e+00, -4.2585e-02, 4.4706e-04, 8.1159e-01]) tensor([0.4499, 0.1252, 0.1307, 0.2942]) -Greedy action tensor([-0.5560, -1.0595, -0.1557, -1.2220]) tensor([0.2770, 0.1674, 0.4133, 0.1423]) -Greedy action tensor([ 0.3169, -0.5940, -0.0032, -0.6898]) tensor([0.4010, 0.1613, 0.2912, 0.1465]) -Greedy action tensor([-0.2743, -1.3643, 0.0549, -0.4160]) tensor([0.2782, 0.0936, 0.3867, 0.2415]) -Greedy action tensor([-0.4343, 0.0766, 0.1831, -0.6466]) tensor([0.1876, 0.3127, 0.3479, 0.1517]) -Greedy action tensor([-0.0116, -1.8574, 0.7030, -0.7140]) tensor([0.2705, 0.0427, 0.5528, 0.1340]) -Greedy action tensor([ 0.1375, -0.1095, 0.1747, -0.5864]) tensor([0.3027, 0.2364, 0.3142, 0.1467]) -Greedy action tensor([ 1.3286, -1.5644, 0.4277, 0.3954]) tensor([0.5391, 0.0299, 0.2190, 0.2120]) -Greedy action tensor([ 0.9060, -0.4119, -0.2754, 0.2761]) tensor([0.4746, 0.1270, 0.1456, 0.2528]) -Greedy action tensor([-0.2313, 0.0409, -0.2240, -0.3423]) tensor([0.2372, 0.3115, 0.2390, 0.2123]) -Greedy action tensor([-0.1180, -0.0620, -0.4719, -0.7976]) tensor([0.3062, 0.3238, 0.2149, 0.1552]) -Greedy action tensor([-0.3682, -0.2330, -0.5983, -1.0594]) tensor([0.2907, 0.3328, 0.2309, 0.1456]) -Greedy action tensor([ 0.1079, -0.8918, -0.9285, -0.3173]) tensor([0.4208, 0.1549, 0.1493, 0.2751]) -Greedy action tensor([-0.0290, -0.5489, 0.2696, -0.1876]) tensor([0.2634, 0.1566, 0.3551, 0.2248]) -Greedy action tensor([ 1.0741, 0.0913, -0.0474, -0.1768]) tensor([0.5034, 0.1884, 0.1640, 0.1441]) -Greedy action tensor([-0.1743, -0.7942, 0.2419, 0.1965]) tensor([0.2221, 0.1195, 0.3367, 0.3217]) -Greedy action tensor([-0.9022, -0.9973, 0.7623, -1.2918]) tensor([0.1271, 0.1155, 0.6713, 0.0861]) -Greedy action tensor([-0.1256, -0.7634, 1.2191, -1.3161]) tensor([0.1764, 0.0932, 0.6768, 0.0536]) -Greedy action tensor([-1.0690, -0.3332, -0.0184, -0.1506]) tensor([0.1183, 0.2469, 0.3383, 0.2964]) -Greedy action tensor([ 0.8350, -0.5024, 0.0053, 0.5961]) tensor([0.4022, 0.1056, 0.1754, 0.3167]) -Greedy action tensor([ 0.2583, 0.5440, 0.6197, -0.4474]) tensor([0.2348, 0.3124, 0.3369, 0.1159]) -Greedy action tensor([-0.2556, -0.9422, -0.6297, -1.0882]) tensor([0.3808, 0.1916, 0.2620, 0.1656]) -Greedy action tensor([-0.7698, -0.2663, -0.7499, 0.4868]) tensor([0.1391, 0.2302, 0.1419, 0.4888]) -Greedy action tensor([-1.7195, -0.7670, -0.5965, -0.0775]) tensor([0.0845, 0.2191, 0.2598, 0.4366]) -Greedy action tensor([ 0.3815, -0.0397, 0.6079, -0.4798]) tensor([0.3000, 0.1969, 0.3763, 0.1268]) -Greedy action tensor([ 0.3913, 0.2793, -1.2045, -0.4957]) tensor([0.3986, 0.3564, 0.0808, 0.1642]) -Greedy action tensor([-0.1839, -0.9437, -0.2192, -0.1357]) tensor([0.2872, 0.1343, 0.2772, 0.3013]) -Greedy action tensor([-0.6223, 0.5123, -0.3614, -0.1891]) tensor([0.1439, 0.4475, 0.1868, 0.2219]) -Greedy action tensor([ 0.4094, -0.6646, -0.2727, 0.1777]) tensor([0.3787, 0.1294, 0.1915, 0.3004]) -Greedy action tensor([-0.4129, 0.4011, -0.2933, -0.8923]) tensor([0.1999, 0.4511, 0.2253, 0.1238]) -Greedy action tensor([-0.1972, -0.1521, -1.0453, -0.1175]) tensor([0.2811, 0.2941, 0.1204, 0.3044]) -Greedy action tensor([ 1.3627, -0.1551, -0.6144, 0.3411]) tensor([0.5822, 0.1276, 0.0806, 0.2096]) -Greedy action tensor([ 0.7730, -1.9217, 0.0619, 0.6050]) tensor([0.4160, 0.0281, 0.2043, 0.3516]) -Greedy action tensor([ 0.5785, -0.5760, -0.8269, -0.5154]) tensor([0.5276, 0.1663, 0.1294, 0.1767]) -Greedy action tensor([-0.2653, 0.2857, 0.1167, -0.1055]) tensor([0.1861, 0.3229, 0.2727, 0.2183]) -Greedy action tensor([ 0.3303, -0.1321, 0.3363, 0.2421]) tensor([0.2816, 0.1773, 0.2833, 0.2578]) -Greedy action tensor([ 0.9889, -0.3858, 0.2475, 0.6594]) tensor([0.4084, 0.1033, 0.1946, 0.2937]) -Greedy action tensor([ 0.7504, -1.1697, -0.1894, -0.5051]) tensor([0.5488, 0.0804, 0.2144, 0.1564]) -Greedy action tensor([-0.0097, 0.1140, 0.1768, -0.7174]) tensor([0.2611, 0.2955, 0.3147, 0.1287]) -Greedy action tensor([ 0.6167, 0.7649, -0.5995, -0.5283]) tensor([0.3604, 0.4180, 0.1068, 0.1147]) -Greedy action tensor([-1.3131, -0.9933, 0.8086, -1.2276]) tensor([0.0847, 0.1166, 0.7066, 0.0922]) -Greedy action tensor([-0.3429, -0.1455, -0.1371, -0.5215]) tensor([0.2335, 0.2844, 0.2868, 0.1953]) -Greedy action tensor([-0.0784, -1.0820, -0.3102, 0.2937]) tensor([0.2770, 0.1015, 0.2197, 0.4018]) -Greedy action tensor([ 0.4892, 0.0566, 0.7153, -0.0442]) tensor([0.2866, 0.1860, 0.3593, 0.1681]) -Greedy action tensor([-0.3742, -0.3561, -0.3115, -0.7257]) tensor([0.2641, 0.2689, 0.2812, 0.1858]) -Greedy action tensor([ 0.0866, -1.6774, 1.1900, -0.1246]) tensor([0.2002, 0.0343, 0.6034, 0.1621]) -Greedy action tensor([ 0.0620, -0.7871, -0.5942, -0.4395]) tensor([0.3918, 0.1676, 0.2033, 0.2373]) -Greedy action tensor([ 0.7190, -2.1198, -0.8743, -0.2867]) tensor([0.6144, 0.0359, 0.1249, 0.2247]) -Greedy action tensor([ 2.0105, -1.0723, -0.2314, 0.6281]) tensor([0.7127, 0.0327, 0.0757, 0.1789]) -Greedy action tensor([-0.1748, -1.4637, -0.4203, -0.0975]) tensor([0.3186, 0.0878, 0.2493, 0.3443]) -Greedy action tensor([ 0.8894, 0.2778, 0.0374, -0.6775]) tensor([0.4592, 0.2491, 0.1959, 0.0958]) -Greedy action tensor([-0.1870, -0.7669, -0.5876, -0.7725]) tensor([0.3589, 0.2009, 0.2404, 0.1998]) -Greedy action tensor([ 0.4658, -0.6005, 0.0242, 0.0228]) tensor([0.3803, 0.1309, 0.2445, 0.2442]) -Greedy action tensor([ 0.5711, -0.4839, 0.0867, 0.2617]) tensor([0.3706, 0.1291, 0.2283, 0.2720]) -Greedy action tensor([-1.1122, 0.1036, -0.9693, 0.8310]) tensor([0.0799, 0.2697, 0.0922, 0.5581]) -Greedy action tensor([-0.6294, 0.0965, -0.6326, 0.1211]) tensor([0.1618, 0.3343, 0.1612, 0.3426]) -Greedy action tensor([ 1.0452, -0.8161, -0.8916, 0.1303]) tensor([0.5882, 0.0914, 0.0848, 0.2356]) -Greedy action tensor([ 0.3696, -0.1891, -0.7654, 1.3008]) tensor([0.2257, 0.1291, 0.0725, 0.5727]) -Greedy action tensor([ 0.4069, -1.4932, 0.7592, -0.7334]) tensor([0.3458, 0.0517, 0.4919, 0.1106]) -Greedy action tensor([1.4022, 0.1837, 0.3559, 0.2597]) tensor([0.5087, 0.1504, 0.1787, 0.1623]) -Greedy action tensor([-0.4423, 0.4400, -0.6100, -0.4833]) tensor([0.1915, 0.4628, 0.1619, 0.1838]) -Greedy action tensor([-0.7995, -0.4069, -0.1218, -1.1544]) tensor([0.1941, 0.2875, 0.3823, 0.1361]) -Greedy action tensor([ 1.4622, 0.1273, -0.4569, 1.1258]) tensor([0.4708, 0.1239, 0.0691, 0.3363]) -Greedy action tensor([ 0.3740, -0.4356, -0.2689, -0.0123]) tensor([0.3773, 0.1679, 0.1984, 0.2564]) -Greedy action tensor([-0.9785, 0.1709, -0.8912, -0.3590]) tensor([0.1407, 0.4442, 0.1536, 0.2615]) -Greedy action tensor([-0.5261, -0.3897, 0.4789, -0.9766]) tensor([0.1813, 0.2078, 0.4953, 0.1155]) -Greedy action tensor([-0.3306, -0.5471, 0.1589, -0.4880]) tensor([0.2330, 0.1877, 0.3802, 0.1991]) -Greedy action tensor([ 1.3948, -0.2842, 0.7970, 0.4361]) tensor([0.4717, 0.0880, 0.2594, 0.1808]) -Greedy action tensor([ 0.9570, -0.9526, 0.4350, 0.8577]) tensor([0.3778, 0.0560, 0.2241, 0.3421]) -Greedy action tensor([-0.0504, 0.2324, 0.1468, -0.5450]) tensor([0.2407, 0.3194, 0.2932, 0.1468]) -Greedy action tensor([ 0.1977, -0.6751, -0.3610, 0.2478]) tensor([0.3288, 0.1374, 0.1881, 0.3457]) -Greedy action tensor([ 0.4839, 0.2963, -0.1342, 1.4184]) tensor([0.2035, 0.1687, 0.1097, 0.5181]) -Greedy action tensor([-0.3871, -1.2007, -0.2933, 0.0697]) tensor([0.2427, 0.1076, 0.2666, 0.3832]) -Greedy action tensor([-0.2279, 0.4446, 0.4856, -0.2462]) tensor([0.1672, 0.3275, 0.3412, 0.1641]) -Greedy action tensor([ 1.9122, -1.3450, -0.0496, 0.4373]) tensor([0.7103, 0.0273, 0.0999, 0.1625]) -Greedy action tensor([ 1.7000, -0.2962, -0.3263, 0.6714]) tensor([0.6153, 0.0836, 0.0811, 0.2200]) -Greedy action tensor([ 1.4545, 0.1015, -0.5501, 0.2799]) tensor([0.5875, 0.1519, 0.0791, 0.1815]) -Greedy action tensor([ 1.4756, -0.6725, -0.3074, -0.1530]) tensor([0.6752, 0.0788, 0.1135, 0.1325]) -Greedy action tensor([ 1.3849, -0.5636, -0.5927, 0.5234]) tensor([0.5870, 0.0836, 0.0813, 0.2481]) -Greedy action tensor([ 1.2630, -0.7190, -0.7994, 0.6880]) tensor([0.5472, 0.0754, 0.0696, 0.3079]) -Greedy action tensor([ 1.5277, -0.7440, -0.5492, 0.2702]) tensor([0.6610, 0.0682, 0.0828, 0.1880]) -Greedy action tensor([ 1.3637, -0.1753, -0.5602, 0.3737]) tensor([0.5773, 0.1239, 0.0843, 0.2145]) -Greedy action tensor([ 1.1592, 0.0019, -0.7397, 0.0763]) tensor([0.5547, 0.1744, 0.0831, 0.1878]) -Greedy action tensor([ 1.4868, -0.4250, -0.4425, 0.2460]) tensor([0.6320, 0.0934, 0.0918, 0.1827]) -Greedy action tensor([ 1.5637, -0.6909, -0.5457, 0.2424]) tensor([0.6698, 0.0703, 0.0813, 0.1787]) -Greedy action tensor([ 1.9505, -1.1157, 0.4119, 0.8432]) tensor([0.6282, 0.0293, 0.1349, 0.2076]) -Greedy action tensor([ 1.6460, -0.3419, -1.0126, 0.1885]) tensor([0.6945, 0.0951, 0.0486, 0.1617]) -Greedy action tensor([ 1.8053, -0.6105, -0.6378, 0.4812]) tensor([0.6934, 0.0619, 0.0602, 0.1845]) -Greedy action tensor([ 1.6194, -0.1468, -0.1945, 0.0053]) tensor([0.6523, 0.1115, 0.1063, 0.1298]) -Greedy action tensor([ 0.9889, -0.1845, -0.9624, 0.1048]) tensor([0.5363, 0.1659, 0.0762, 0.2216]) -Greedy action tensor([ 1.2181e+00, 1.1817e-03, -4.2035e-01, 9.4694e-02]) tensor([0.5508, 0.1631, 0.1070, 0.1791]) -Greedy action tensor([ 1.7941, -0.1756, -0.6922, 0.1787]) tensor([0.7035, 0.0981, 0.0585, 0.1399]) -Greedy action tensor([ 1.2464, -0.3023, -0.4713, 0.0103]) tensor([0.5943, 0.1263, 0.1067, 0.1727]) -Greedy action tensor([ 1.2818, -0.2511, -1.1129, 0.3117]) tensor([0.5931, 0.1280, 0.0541, 0.2248]) -Greedy action tensor([ 1.0690, -0.0294, -0.1689, 0.2291]) tensor([0.4866, 0.1622, 0.1411, 0.2101]) -Greedy action tensor([ 0.9560, -0.0815, -0.2630, -0.2044]) tensor([0.5094, 0.1805, 0.1505, 0.1596]) -Greedy action tensor([ 1.4540, -0.6558, -0.6420, 0.2067]) tensor([0.6530, 0.0792, 0.0803, 0.1876]) -Greedy action tensor([ 1.6694, -0.0878, -0.4023, 0.0930]) tensor([0.6643, 0.1146, 0.0837, 0.1373]) -Greedy action tensor([ 1.7550, -0.2263, -0.3163, -0.0862]) tensor([0.7030, 0.0969, 0.0886, 0.1115]) -Greedy action tensor([ 1.1906, -0.4611, -1.1041, 0.2888]) tensor([0.5888, 0.1129, 0.0593, 0.2390]) -Greedy action tensor([ 1.4280, -0.0418, -0.6763, 0.1940]) tensor([0.6086, 0.1400, 0.0742, 0.1772]) -Greedy action tensor([ 1.1971, -0.0123, -1.0008, 0.1935]) tensor([0.5631, 0.1680, 0.0625, 0.2064]) -Greedy action tensor([ 1.5809, -0.1878, -0.4843, -0.1591]) tensor([0.6789, 0.1158, 0.0861, 0.1192]) -Greedy action tensor([ 1.4761, -0.4923, -0.2546, 0.4995]) tensor([0.5905, 0.0825, 0.1046, 0.2224]) -Greedy action tensor([ 2.0121, -1.1184, -0.1797, 0.2028]) tensor([0.7581, 0.0331, 0.0847, 0.1241]) -Greedy action tensor([ 1.3243, -1.0734, -0.0292, 0.7953]) tensor([0.5159, 0.0469, 0.1333, 0.3040]) -Greedy action tensor([ 2.4769, 0.6108, -0.0409, 0.2389]) tensor([0.7451, 0.1153, 0.0601, 0.0795]) -Greedy action tensor([ 1.2363, -0.4143, -0.3669, 0.1493]) tensor([0.5779, 0.1109, 0.1163, 0.1949]) -Greedy action tensor([ 1.6130, -0.8853, -0.1595, 0.4214]) tensor([0.6427, 0.0528, 0.1092, 0.1952]) -Greedy action tensor([ 1.3536, -0.0592, -0.8967, 0.4115]) tensor([0.5752, 0.1400, 0.0606, 0.2242]) -Greedy action tensor([ 1.9232, -1.1649, -0.1077, 0.4819]) tensor([0.7075, 0.0323, 0.0928, 0.1674]) -Greedy action tensor([ 1.2335, -0.6402, -0.4408, 0.0188]) tensor([0.6106, 0.0938, 0.1144, 0.1812]) -Greedy action tensor([ 1.5220, 0.0464, -1.0546, 0.6359]) tensor([0.5824, 0.1332, 0.0443, 0.2401]) -Greedy action tensor([ 1.6107, -1.0118, -0.3717, 0.5600]) tensor([0.6410, 0.0466, 0.0883, 0.2242]) -Greedy action tensor([ 1.3247, -0.3701, -0.0038, -0.2007]) tensor([0.6002, 0.1102, 0.1590, 0.1306]) -Greedy action tensor([ 0.8977, -0.3146, -0.1365, 0.3985]) tensor([0.4425, 0.1316, 0.1573, 0.2686]) -Greedy action tensor([ 1.6019, -0.8135, -0.2948, 0.5903]) tensor([0.6238, 0.0557, 0.0936, 0.2268]) -Greedy action tensor([ 0.8720, -0.4679, -0.5485, 0.4764]) tensor([0.4594, 0.1203, 0.1110, 0.3093]) -Greedy action tensor([ 1.4109, -0.4352, -0.5174, 0.4002]) tensor([0.5998, 0.0947, 0.0872, 0.2183]) -Greedy action tensor([ 1.5382, -0.7684, -0.3993, -0.0061]) tensor([0.6863, 0.0684, 0.0989, 0.1465]) -Greedy action tensor([ 1.0809, -0.1994, -0.8323, -0.1973]) tensor([0.5868, 0.1631, 0.0866, 0.1635]) -Greedy action tensor([ 1.2574e+00, -7.7313e-04, -4.8369e-01, -1.5525e-02]) tensor([0.5749, 0.1634, 0.1008, 0.1610]) -Greedy action tensor([ 0.9841, -0.6056, -0.5584, 0.4238]) tensor([0.5028, 0.1026, 0.1075, 0.2871]) -Greedy action tensor([ 2.5298, -1.5504, -0.1149, 1.0937]) tensor([0.7543, 0.0127, 0.0536, 0.1794]) -Greedy action tensor([ 1.7720, -0.4413, -0.3124, -0.0960]) tensor([0.7204, 0.0788, 0.0896, 0.1113]) -Greedy action tensor([ 1.4789, -0.2610, -0.8451, 0.0045]) tensor([0.6656, 0.1168, 0.0651, 0.1524]) -Greedy action tensor([ 1.3934, -0.4164, -0.2250, 0.3042]) tensor([0.5888, 0.0964, 0.1167, 0.1981]) -Greedy action tensor([ 1.5599, -0.3718, -0.4100, -0.0230]) tensor([0.6712, 0.0973, 0.0936, 0.1379]) -Greedy action tensor([ 1.7434, -0.6409, -1.2051, 0.4518]) tensor([0.7045, 0.0649, 0.0369, 0.1936]) -Greedy action tensor([ 1.1117, -0.1337, -0.5985, -0.0764]) tensor([0.5639, 0.1623, 0.1020, 0.1719]) -Greedy action tensor([ 1.5228, -0.5862, -0.6157, 0.1777]) tensor([0.6668, 0.0809, 0.0786, 0.1737]) -Greedy action tensor([ 1.3405, -0.1443, -0.6083, 0.2493]) tensor([0.5866, 0.1329, 0.0836, 0.1970]) -Greedy action tensor([ 1.0026, -0.0382, -0.6928, 0.2439]) tensor([0.4988, 0.1761, 0.0915, 0.2336]) -Greedy action tensor([ 1.2262, -0.5198, -0.2921, 0.4198]) tensor([0.5435, 0.0948, 0.1191, 0.2426]) -Greedy action tensor([ 0.9232, 0.0752, -0.3797, -0.0738]) tensor([0.4833, 0.2070, 0.1313, 0.1783]) -Greedy action tensor([ 1.2106, -0.3718, -0.5236, 0.0496]) tensor([0.5899, 0.1212, 0.1041, 0.1847]) -Greedy action tensor([ 1.1337, -0.2605, -0.6743, 0.1966]) tensor([0.5544, 0.1375, 0.0909, 0.2172]) -Greedy action tensor([ 0.9929, -0.5185, -0.2706, 0.3612]) tensor([0.4914, 0.1084, 0.1389, 0.2613]) -Greedy action tensor([ 1.2979, -0.2264, -0.2166, 0.0118]) tensor([0.5834, 0.1271, 0.1283, 0.1612]) -Greedy action tensor([ 2.3908, -0.7742, -0.2504, 0.7747]) tensor([0.7621, 0.0322, 0.0543, 0.1514]) -Greedy action tensor([1.4589, 0.1211, 0.0061, 0.2627]) tensor([0.5560, 0.1459, 0.1300, 0.1681]) -Greedy action tensor([ 1.3829, -0.3896, -0.7656, 0.2156]) tensor([0.6259, 0.1063, 0.0730, 0.1948]) -Greedy action tensor([ 1.6216, -0.5461, -0.3346, 0.1510]) tensor([0.6731, 0.0770, 0.0952, 0.1547]) -Greedy action tensor([ 1.8574, -1.1529, -0.2610, 0.2101]) tensor([0.7342, 0.0362, 0.0883, 0.1414]) -Greedy action tensor([ 1.4522, -0.9340, -0.3718, 0.4869]) tensor([0.6119, 0.0563, 0.0987, 0.2331]) -Greedy action tensor([ 2.7779, -1.4208, -0.2477, 0.8763]) tensor([0.8245, 0.0124, 0.0400, 0.1231]) -Greedy action tensor([ 1.2578, -0.7309, -0.3271, 0.6735]) tensor([0.5265, 0.0721, 0.1079, 0.2935]) -Greedy action tensor([ 1.1514, -0.3872, -0.1510, 0.2388]) tensor([0.5297, 0.1137, 0.1440, 0.2126]) -Greedy action tensor([ 1.4929, -0.5744, -0.3506, 0.2287]) tensor([0.6380, 0.0807, 0.1010, 0.1802]) -Greedy action tensor([ 1.5257, -0.5608, -0.7121, 0.2474]) tensor([0.6625, 0.0822, 0.0707, 0.1845]) -Greedy action tensor([ 2.5129, 0.4419, -0.2239, 0.5411]) tensor([0.7519, 0.0948, 0.0487, 0.1047]) -Greedy action tensor([ 1.5811, -0.3404, -0.8147, 0.1257]) tensor([0.6799, 0.0995, 0.0619, 0.1586]) -Greedy action tensor([ 1.8715, 0.3225, -0.4539, 0.1163]) tensor([0.6743, 0.1433, 0.0659, 0.1166]) -Greedy action tensor([ 1.0502, -0.3602, -0.1868, -0.0568]) tensor([0.5362, 0.1309, 0.1556, 0.1773]) -Greedy action tensor([ 1.5322, -0.1557, -0.4878, -0.0871]) tensor([0.6598, 0.1220, 0.0875, 0.1307]) -Greedy action tensor([ 1.2403, -0.8236, 0.1230, -0.6849]) tensor([0.6250, 0.0793, 0.2045, 0.0912]) -Greedy action tensor([ 0.9682, -0.5413, 0.0022, -0.3223]) tensor([0.5328, 0.1178, 0.2028, 0.1466]) -Greedy action tensor([ 0.7537, -0.2864, -0.0399, -0.1035]) tensor([0.4484, 0.1585, 0.2028, 0.1903]) -Greedy action tensor([ 1.0754, -0.8600, -0.0587, -0.4925]) tensor([0.5972, 0.0862, 0.1921, 0.1245]) -Greedy action tensor([ 0.4048, -0.0088, -0.2175, 0.0747]) tensor([0.3428, 0.2267, 0.1840, 0.2465]) -Greedy action tensor([ 0.5889, -0.3762, -0.0742, -0.3010]) tensor([0.4335, 0.1651, 0.2234, 0.1780]) -Greedy action tensor([ 0.5600, 0.0255, -0.0434, -0.2364]) tensor([0.3870, 0.2268, 0.2117, 0.1745]) -Greedy action tensor([ 0.9317, -0.7440, -0.0175, -0.4179]) tensor([0.5454, 0.1021, 0.2111, 0.1414]) -Greedy action tensor([ 0.6393, -0.1191, -0.0019, -0.0048]) tensor([0.3968, 0.1859, 0.2090, 0.2084]) -Greedy action tensor([ 0.5668, 0.1251, -0.1058, -0.1381]) tensor([0.3777, 0.2428, 0.1928, 0.1867]) -Greedy action tensor([ 1.2240, -1.0014, 0.0928, -0.7187]) tensor([0.6353, 0.0686, 0.2050, 0.0910]) -Greedy action tensor([ 1.0659, -0.8111, 0.1161, -0.3947]) tensor([0.5643, 0.0864, 0.2183, 0.1310]) -Greedy action tensor([ 0.8532, -0.4200, 0.0190, -0.2386]) tensor([0.4879, 0.1366, 0.2118, 0.1637]) -Greedy action tensor([ 0.8834, -0.5775, -0.0838, -0.4443]) tensor([0.5327, 0.1236, 0.2025, 0.1412]) -Greedy action tensor([ 0.8564, -0.2848, 0.0533, -0.0873]) tensor([0.4637, 0.1481, 0.2077, 0.1805]) -Greedy action tensor([ 1.2222, -0.7541, -0.1160, -0.7655]) tensor([0.6502, 0.0901, 0.1706, 0.0891]) -Greedy action tensor([ 0.4645, -0.1768, -0.1216, -0.0443]) tensor([0.3725, 0.1962, 0.2073, 0.2240]) -Greedy action tensor([ 0.9519, -0.5972, 0.1079, -0.6114]) tensor([0.5400, 0.1147, 0.2322, 0.1131]) -Greedy action tensor([ 0.9774, -0.6651, 0.1702, -0.5921]) tensor([0.5412, 0.1047, 0.2414, 0.1127]) -Greedy action tensor([ 0.3898, -0.0901, 0.0212, -0.0405]) tensor([0.3377, 0.2090, 0.2336, 0.2196]) -Greedy action tensor([ 0.8871, -0.6891, -0.1092, -0.3789]) tensor([0.5382, 0.1113, 0.1987, 0.1518]) -Greedy action tensor([ 4.7937e-01, -4.3476e-02, 1.6621e-04, -1.7400e-02]) tensor([0.3545, 0.2102, 0.2196, 0.2157]) -Greedy action tensor([ 0.6786, -0.1951, -0.0807, -0.2046]) tensor([0.4350, 0.1816, 0.2036, 0.1799]) -Greedy action tensor([ 0.8723, -0.5803, -0.0181, -0.3840]) tensor([0.5184, 0.1213, 0.2128, 0.1476]) -Greedy action tensor([ 0.9246, -0.5086, -0.0180, -0.2860]) tensor([0.5191, 0.1238, 0.2023, 0.1547]) -Greedy action tensor([ 0.5984, 0.1221, -0.0779, 0.0690]) tensor([0.3679, 0.2285, 0.1870, 0.2166]) -Greedy action tensor([ 1.2811, -0.5726, -0.0776, -0.3522]) tensor([0.6215, 0.0974, 0.1597, 0.1214]) -Greedy action tensor([ 0.6927, -0.3057, 0.0218, -0.3599]) tensor([0.4487, 0.1653, 0.2294, 0.1566]) -Greedy action tensor([ 0.0885, 0.0393, -0.3264, -0.2777]) tensor([0.3025, 0.2880, 0.1998, 0.2097]) -Greedy action tensor([ 0.3237, -0.1211, -0.0762, -0.1915]) tensor([0.3438, 0.2204, 0.2305, 0.2054]) -Greedy action tensor([ 0.6615, -0.4938, -0.0115, -0.4012]) tensor([0.4607, 0.1451, 0.2350, 0.1592]) -Greedy action tensor([ 0.8527, -0.3788, -0.1478, -0.3600]) tensor([0.5110, 0.1491, 0.1879, 0.1520]) -Greedy action tensor([ 0.5523, -0.2764, -0.0481, -0.1956]) tensor([0.4067, 0.1776, 0.2231, 0.1925]) -Greedy action tensor([ 0.9504, -0.0976, -0.1396, -0.2236]) tensor([0.5010, 0.1757, 0.1684, 0.1549]) -Greedy action tensor([ 0.7015, -0.5339, -0.1334, -0.0628]) tensor([0.4566, 0.1327, 0.1981, 0.2126]) -Greedy action tensor([ 0.7254, -0.1467, -0.0588, -0.0906]) tensor([0.4316, 0.1805, 0.1970, 0.1909]) -Greedy action tensor([ 1.1092, -0.7265, -0.0151, -0.6693]) tensor([0.6049, 0.0965, 0.1965, 0.1022]) -Greedy action tensor([ 0.5628, -0.3558, -0.1892, -0.1877]) tensor([0.4269, 0.1704, 0.2012, 0.2015]) -Greedy action tensor([ 0.8904, -0.6210, 0.0562, -0.4370]) tensor([0.5208, 0.1149, 0.2262, 0.1381]) -Greedy action tensor([ 0.8826, -0.8941, -0.0010, -0.3649]) tensor([0.5348, 0.0905, 0.2210, 0.1536]) -Greedy action tensor([ 0.9365, -0.7315, 0.0457, -0.4613]) tensor([0.5417, 0.1022, 0.2223, 0.1339]) -Greedy action tensor([ 0.9591, -0.6464, -0.0210, -0.3633]) tensor([0.5427, 0.1090, 0.2037, 0.1446]) -Greedy action tensor([ 1.1915, -0.6414, 0.1358, -0.6436]) tensor([0.5997, 0.0959, 0.2087, 0.0957]) -Greedy action tensor([ 1.0031, -0.5648, -0.0618, -0.4609]) tensor([0.5604, 0.1168, 0.1932, 0.1296]) -Greedy action tensor([0.5522, 0.1381, 0.0481, 0.1505]) tensor([0.3408, 0.2253, 0.2059, 0.2281]) -Greedy action tensor([ 0.6527, 0.2153, -0.1071, 0.0556]) tensor([0.3754, 0.2424, 0.1756, 0.2066]) -Greedy action tensor([ 1.0331, -0.7108, 0.0082, -0.5967]) tensor([0.5782, 0.1011, 0.2075, 0.1133]) -Greedy action tensor([ 0.7482, -0.3528, -0.0563, -0.2714]) tensor([0.4672, 0.1554, 0.2090, 0.1685]) -Greedy action tensor([ 0.8356, -0.5944, -0.0057, -0.4007]) tensor([0.5100, 0.1220, 0.2199, 0.1481]) -Greedy action tensor([ 0.9682, -0.5820, 0.0309, -0.4025]) tensor([0.5383, 0.1142, 0.2108, 0.1367]) -Greedy action tensor([ 1.1230, -0.5208, -0.0946, -0.5195]) tensor([0.5943, 0.1148, 0.1759, 0.1150]) -Greedy action tensor([ 0.5396, -0.1155, 0.0774, -0.0181]) tensor([0.3674, 0.1908, 0.2314, 0.2103]) -Greedy action tensor([ 0.8730, -0.5731, -0.0279, -0.4031]) tensor([0.5206, 0.1226, 0.2115, 0.1453]) -Greedy action tensor([ 0.3326, 0.2773, -0.0782, 0.0376]) tensor([0.2982, 0.2821, 0.1977, 0.2220]) -Greedy action tensor([ 0.5163, -0.1487, -0.0734, -0.3052]) tensor([0.3986, 0.2050, 0.2210, 0.1753]) -Greedy action tensor([ 0.7651, -0.3019, -0.2010, -0.2561]) tensor([0.4797, 0.1650, 0.1825, 0.1728]) -Greedy action tensor([ 0.9439, -0.7903, -0.0208, -0.5105]) tensor([0.5583, 0.0986, 0.2128, 0.1304]) -Greedy action tensor([ 0.4634, -0.2254, -0.0302, -0.3390]) tensor([0.3905, 0.1961, 0.2384, 0.1750]) -Greedy action tensor([ 0.5261, -0.6050, -0.1621, -0.2080]) tensor([0.4338, 0.1400, 0.2180, 0.2082]) -Greedy action tensor([ 0.5768, -0.1525, 0.0242, -0.2618]) tensor([0.4016, 0.1937, 0.2311, 0.1736]) -Greedy action tensor([ 0.7878, -0.1814, 0.2318, -0.1484]) tensor([0.4264, 0.1618, 0.2446, 0.1672]) -Greedy action tensor([ 0.5457, -0.3987, -0.1068, -0.1973]) tensor([0.4192, 0.1631, 0.2183, 0.1994]) -Greedy action tensor([ 1.2098, -0.7002, 0.0181, -0.7431]) tensor([0.6275, 0.0929, 0.1906, 0.0890]) -Greedy action tensor([ 0.5451, -0.1126, 0.0307, -0.0620]) tensor([0.3758, 0.1947, 0.2247, 0.2048]) -Greedy action tensor([ 0.9065, -1.0144, -0.1162, -0.3886]) tensor([0.5618, 0.0823, 0.2020, 0.1539]) -Greedy action tensor([ 0.7726, -0.6180, -0.0105, -0.2295]) tensor([0.4824, 0.1201, 0.2204, 0.1771]) -Greedy action tensor([ 0.8233, -0.1191, -0.0912, -0.4042]) tensor([0.4800, 0.1870, 0.1923, 0.1406]) -Greedy action tensor([ 0.7901, -0.0782, -0.1205, -0.0073]) tensor([0.4401, 0.1847, 0.1770, 0.1982]) -Greedy action tensor([ 1.3493, -0.6595, -0.1643, -0.5001]) tensor([0.6616, 0.0887, 0.1456, 0.1041]) -Greedy action tensor([ 0.5258, -0.1972, -0.1562, -0.3648]) tensor([0.4164, 0.2021, 0.2106, 0.1709]) -Greedy action tensor([ 0.6164, -0.2012, -0.2086, 0.0163]) tensor([0.4118, 0.1818, 0.1805, 0.2260]) -Greedy action tensor([ 0.8285, -0.1565, 0.0879, -0.0212]) tensor([0.4390, 0.1640, 0.2093, 0.1877]) -Greedy action tensor([ 0.7532, -0.3077, -0.0656, -0.0899]) tensor([0.4510, 0.1561, 0.1989, 0.1941]) -Greedy action tensor([ 1.0229, -0.6402, 0.0655, -0.5336]) tensor([0.5605, 0.1062, 0.2151, 0.1182]) -Greedy action tensor([ 0.8446, -0.2487, 0.1308, -0.2489]) tensor([0.4630, 0.1552, 0.2268, 0.1551]) -Greedy action tensor([ 0.5691, -0.5980, -0.1788, -0.1868]) tensor([0.4436, 0.1381, 0.2100, 0.2083]) -Greedy action tensor([ 0.5367, -0.1481, 0.0089, -0.3562]) tensor([0.3994, 0.2014, 0.2356, 0.1636]) -Greedy action tensor([ 0.8519, -0.8717, -0.0058, -0.4166]) tensor([0.5308, 0.0947, 0.2251, 0.1493]) -Greedy action tensor([ 0.6441, -0.1260, -0.1152, -0.1258]) tensor([0.4177, 0.1934, 0.1955, 0.1934]) -Greedy action tensor([ 1.2054, -0.7429, 0.0078, -0.5050]) tensor([0.6153, 0.0877, 0.1858, 0.1112]) -Greedy action tensor([ 0.9180, -0.3675, -0.0254, -0.2668]) tensor([0.5072, 0.1402, 0.1975, 0.1551]) -Greedy action tensor([-1.0320, 0.7699, 0.2236, -0.1203]) tensor([0.0766, 0.4641, 0.2688, 0.1906]) -Greedy action tensor([-1.8943, -0.4020, 0.6425, -0.1497]) tensor([0.0420, 0.1868, 0.5308, 0.2404]) -Greedy action tensor([-0.8303, -0.2918, 0.2289, -0.0576]) tensor([0.1288, 0.2207, 0.3715, 0.2790]) -Greedy action tensor([-1.8698, -0.2864, 0.6045, -0.1309]) tensor([0.0427, 0.2079, 0.5066, 0.2428]) -Greedy action tensor([-1.8349, -0.4830, 0.6126, -0.1445]) tensor([0.0458, 0.1769, 0.5291, 0.2482]) -Greedy action tensor([-1.5283, -0.3962, 0.4493, -0.0343]) tensor([0.0634, 0.1966, 0.4578, 0.2823]) -Greedy action tensor([-1.6048, -0.5020, 0.4696, 0.0165]) tensor([0.0587, 0.1769, 0.4673, 0.2971]) -Greedy action tensor([-1.9107, -0.3657, 0.6454, -0.1616]) tensor([0.0411, 0.1927, 0.5298, 0.2364]) -Greedy action tensor([-1.5589, -0.4850, 0.4777, -0.0541]) tensor([0.0621, 0.1818, 0.4762, 0.2798]) -Greedy action tensor([-1.8071, -0.3051, 0.5815, -0.1289]) tensor([0.0460, 0.2065, 0.5012, 0.2463]) -Greedy action tensor([-1.6068, -0.3231, 0.4812, -0.0539]) tensor([0.0575, 0.2074, 0.4636, 0.2715]) -Greedy action tensor([-0.7096, 0.5613, 0.1342, -0.1515]) tensor([0.1158, 0.4127, 0.2692, 0.2023]) -Greedy action tensor([-1.8764, -0.4371, 0.6337, -0.1487]) tensor([0.0432, 0.1822, 0.5315, 0.2431]) -Greedy action tensor([ 0.1733, -0.2607, 0.1337, 0.1110]) tensor([0.2818, 0.1826, 0.2708, 0.2648]) -Greedy action tensor([-1.8521, -0.1988, 0.5923, -0.1438]) tensor([0.0430, 0.2245, 0.4953, 0.2372]) -Greedy action tensor([-1.9084, -0.4429, 0.6497, -0.1617]) tensor([0.0417, 0.1806, 0.5385, 0.2392]) -Greedy action tensor([-0.8523, 0.7625, 0.0961, -0.0798]) tensor([0.0928, 0.4666, 0.2396, 0.2010]) -Greedy action tensor([-1.5762, -0.4181, 0.7713, -0.1010]) tensor([0.0526, 0.1674, 0.5501, 0.2299]) -Greedy action tensor([-1.7420, -0.5082, 0.5726, -0.0983]) tensor([0.0507, 0.1741, 0.5130, 0.2623]) -Greedy action tensor([-1.7285, -0.4082, 0.5797, -0.0423]) tensor([0.0495, 0.1854, 0.4978, 0.2673]) -Greedy action tensor([-1.6322, 0.0809, 0.4747, -0.1422]) tensor([0.0521, 0.2888, 0.4281, 0.2310]) -Greedy action tensor([-1.9397, -0.4468, 0.6650, -0.1778]) tensor([0.0403, 0.1794, 0.5454, 0.2348]) -Greedy action tensor([-1.7145, -0.5064, 0.5486, -0.0781]) tensor([0.0524, 0.1753, 0.5034, 0.2690]) -Greedy action tensor([-1.9433, -0.4464, 0.6659, -0.1794]) tensor([0.0402, 0.1795, 0.5459, 0.2344]) -Greedy action tensor([-1.3373, -0.6012, 0.3377, 0.2299]) tensor([0.0756, 0.1579, 0.4038, 0.3626]) -Greedy action tensor([-1.8569, -0.3804, 0.6313, -0.1251]) tensor([0.0433, 0.1898, 0.5219, 0.2450]) -Greedy action tensor([-1.8996, -0.4340, 0.6439, -0.1568]) tensor([0.0421, 0.1822, 0.5354, 0.2404]) -Greedy action tensor([-1.3705, 0.5109, 0.2908, -0.0074]) tensor([0.0598, 0.3921, 0.3146, 0.2335]) -Greedy action tensor([-0.7288, 0.3192, 0.0468, -0.0836]) tensor([0.1261, 0.3596, 0.2739, 0.2404]) -Greedy action tensor([-1.6775, -0.4011, 0.6631, 0.1016]) tensor([0.0479, 0.1715, 0.4971, 0.2835]) -Greedy action tensor([-1.1848, -0.6904, 1.1799, 0.8764]) tensor([0.0473, 0.0776, 0.5035, 0.3717]) -Greedy action tensor([-1.8720, -0.4456, 0.6268, -0.1539]) tensor([0.0437, 0.1818, 0.5312, 0.2433]) -Greedy action tensor([-1.9117, -0.4256, 0.6463, -0.1681]) tensor([0.0416, 0.1838, 0.5369, 0.2378]) -Greedy action tensor([-1.9172, -0.4511, 0.6462, -0.1663]) tensor([0.0415, 0.1800, 0.5392, 0.2393]) -Greedy action tensor([-1.6035, -0.4062, 0.4931, -0.0979]) tensor([0.0590, 0.1953, 0.4799, 0.2658]) -Greedy action tensor([-1.9401, -0.4436, 0.6661, -0.1782]) tensor([0.0403, 0.1798, 0.5454, 0.2345]) -Greedy action tensor([-1.6835, 0.1153, 0.4566, -0.0635]) tensor([0.0486, 0.2934, 0.4127, 0.2453]) -Greedy action tensor([-1.9094, -0.3756, 0.6461, -0.1609]) tensor([0.0412, 0.1911, 0.5308, 0.2369]) -Greedy action tensor([-0.5693, 0.7116, 0.0352, 0.0857]) tensor([0.1197, 0.4309, 0.2191, 0.2304]) -Greedy action tensor([-1.8741, -0.4081, 0.6284, -0.1475]) tensor([0.0432, 0.1870, 0.5272, 0.2427]) -Greedy action tensor([-1.5807, -0.1385, 0.4730, -0.0571]) tensor([0.0568, 0.2401, 0.4426, 0.2605]) -Greedy action tensor([-1.7130, -0.4612, 0.5448, -0.0534]) tensor([0.0518, 0.1810, 0.4950, 0.2722]) -Greedy action tensor([-1.7226, -0.2298, 0.5944, -0.0664]) tensor([0.0480, 0.2136, 0.4870, 0.2515]) -Greedy action tensor([-1.8701, -0.3389, 0.6168, -0.1446]) tensor([0.0430, 0.1988, 0.5169, 0.2414]) -Greedy action tensor([-1.8739, -0.2308, 0.5960, -0.1292]) tensor([0.0422, 0.2180, 0.4984, 0.2414]) -Greedy action tensor([-0.7373, 0.5866, 0.5034, 0.8308]) tensor([0.0768, 0.2888, 0.2657, 0.3686]) -Greedy action tensor([-1.8673, -0.4455, 0.6287, -0.1403]) tensor([0.0437, 0.1810, 0.5298, 0.2456]) -Greedy action tensor([-1.9314, -0.4462, 0.6600, -0.1747]) tensor([0.0407, 0.1798, 0.5436, 0.2359]) -Greedy action tensor([-1.9332, -0.3882, 0.6508, -0.1739]) tensor([0.0404, 0.1894, 0.5354, 0.2347]) -Greedy action tensor([-1.8684, -0.4596, 0.6353, -0.1426]) tensor([0.0436, 0.1784, 0.5331, 0.2449]) -Greedy action tensor([-1.8346, -0.4944, 0.7723, -0.0855]) tensor([0.0414, 0.1583, 0.5619, 0.2383]) -Greedy action tensor([-1.6281, -0.4754, 0.5107, -0.0947]) tensor([0.0578, 0.1832, 0.4910, 0.2680]) -Greedy action tensor([-1.7950, -0.2703, 0.5760, -0.1083]) tensor([0.0461, 0.2117, 0.4934, 0.2489]) -Greedy action tensor([-1.9251, -0.4429, 0.6600, -0.1689]) tensor([0.0409, 0.1800, 0.5423, 0.2368]) -Greedy action tensor([-0.9045, -0.0410, 0.2540, -0.0696]) tensor([0.1129, 0.2676, 0.3595, 0.2601]) -Greedy action tensor([-1.8955, -0.3308, 0.6364, -0.1513]) tensor([0.0415, 0.1986, 0.5223, 0.2376]) -Greedy action tensor([-1.8372, -0.4154, 0.6537, -0.0918]) tensor([0.0436, 0.1806, 0.5261, 0.2496]) -Greedy action tensor([-1.7267, -0.3256, 0.5876, -0.0370]) tensor([0.0486, 0.1971, 0.4913, 0.2631]) -Greedy action tensor([-1.2843, -0.1788, 0.2856, 0.1775]) tensor([0.0761, 0.2299, 0.3658, 0.3283]) -Greedy action tensor([-1.9205, -0.4582, 0.6522, -0.1743]) tensor([0.0414, 0.1787, 0.5425, 0.2374]) -Greedy action tensor([-1.8727, -0.4212, 0.6649, -0.1051]) tensor([0.0421, 0.1796, 0.5320, 0.2463]) -Greedy action tensor([-1.9324, -0.4335, 0.6613, -0.1734]) tensor([0.0405, 0.1815, 0.5425, 0.2354]) -Greedy action tensor([-1.7833, -0.4270, 0.5901, -0.0900]) tensor([0.0475, 0.1844, 0.5098, 0.2583]) -Greedy action tensor([-1.9074, -0.4146, 0.6514, -0.1596]) tensor([0.0415, 0.1845, 0.5359, 0.2381]) -Greedy action tensor([-1.7869, -0.1483, 0.6019, -0.1101]) tensor([0.0446, 0.2299, 0.4867, 0.2388]) -Greedy action tensor([-1.8761, -0.1851, 0.5989, -0.1214]) tensor([0.0415, 0.2252, 0.4932, 0.2400]) -Greedy action tensor([-1.9327, -0.3567, 0.6458, -0.1767]) tensor([0.0403, 0.1950, 0.5313, 0.2334]) -Greedy action tensor([-0.8387, -0.4809, 0.5302, 0.6392]) tensor([0.0931, 0.1331, 0.3658, 0.4080]) -Greedy action tensor([-1.8684, -0.4560, 0.6309, -0.1473]) tensor([0.0437, 0.1795, 0.5323, 0.2444]) -Greedy action tensor([-0.7871, 0.4079, 0.0905, 0.0358]) tensor([0.1113, 0.3676, 0.2677, 0.2534]) -Greedy action tensor([-1.8019, -0.4095, 0.5907, -0.1183]) tensor([0.0468, 0.1885, 0.5125, 0.2522]) -Greedy action tensor([-1.8905, -0.3581, 0.6349, -0.1486]) tensor([0.0420, 0.1942, 0.5243, 0.2395]) -Greedy action tensor([-1.0613, 0.3994, 0.2988, -0.0826]) tensor([0.0843, 0.3631, 0.3284, 0.2243]) -Greedy action tensor([-1.8289, 0.1664, 0.5251, -0.1336]) tensor([0.0411, 0.3023, 0.4327, 0.2239]) -Greedy action tensor([-1.1109, 0.0653, 0.2036, 0.0379]) tensor([0.0899, 0.2916, 0.3348, 0.2837]) -Greedy action tensor([-1.8200, 0.0852, 0.4355, -0.3936]) tensor([0.0467, 0.3137, 0.4453, 0.1943]) -Greedy action tensor([-0.9183, -0.6171, 0.1650, 0.3114]) tensor([0.1146, 0.1549, 0.3386, 0.3920]) -Greedy action tensor([-1.9184, -0.4375, 0.6561, -0.1662]) tensor([0.0412, 0.1810, 0.5404, 0.2374]) -Greedy action tensor([-1.8285, -0.2650, 0.5916, -0.1394]) tensor([0.0446, 0.2128, 0.5013, 0.2413]) -Greedy action tensor([-1.9293, -0.4350, 0.6612, -0.1735]) tensor([0.0407, 0.1813, 0.5426, 0.2355]) -Greedy action tensor([-1.6795, 0.4070, 0.4528, -0.1680]) tensor([0.0454, 0.3658, 0.3830, 0.2058]) -Greedy action tensor([ 0.0596, -0.9187, 0.5547, -0.7447]) tensor([0.2887, 0.1085, 0.4736, 0.1292]) -Greedy action tensor([ 0.2640, -0.7713, 0.3858, -0.5605]) tensor([0.3421, 0.1215, 0.3864, 0.1500]) -Greedy action tensor([-0.4765, -0.3504, 0.2561, -0.9935]) tensor([0.2079, 0.2358, 0.4324, 0.1239]) -Greedy action tensor([-0.3083, -0.3442, -0.2023, -0.3797]) tensor([0.2495, 0.2407, 0.2774, 0.2323]) -Greedy action tensor([-0.1341, 0.4670, -0.0373, 1.0547]) tensor([0.1387, 0.2530, 0.1528, 0.4554]) -Greedy action tensor([ 5.1078e-04, -1.3825e+00, 6.2180e-02, -1.1103e-01]) tensor([0.3116, 0.0782, 0.3315, 0.2787]) -Greedy action tensor([-0.5779, -1.1798, 0.1402, -0.8011]) tensor([0.2274, 0.1245, 0.4662, 0.1819]) -Greedy action tensor([ 0.0423, -0.4411, 0.4999, -0.8637]) tensor([0.2777, 0.1713, 0.4388, 0.1122]) -Greedy action tensor([-0.8285, -1.0411, 0.2416, -1.2645]) tensor([0.1862, 0.1505, 0.5429, 0.1204]) -Greedy action tensor([-0.9022, -1.5437, -0.1462, 0.0370]) tensor([0.1609, 0.0847, 0.3427, 0.4116]) -Greedy action tensor([-0.5229, 0.4026, -0.0575, -1.0989]) tensor([0.1761, 0.4444, 0.2805, 0.0990]) -Greedy action tensor([-0.3330, -0.6080, 1.8326, -0.7915]) tensor([0.0900, 0.0684, 0.7847, 0.0569]) -Greedy action tensor([ 0.1370, -0.5845, 0.0829, -0.4022]) tensor([0.3315, 0.1611, 0.3140, 0.1933]) -Greedy action tensor([ 0.4719, 0.4161, -0.8258, -0.5427]) tensor([0.3874, 0.3664, 0.1058, 0.1404]) -Greedy action tensor([ 0.1208, -1.3622, 0.8128, -0.0735]) tensor([0.2470, 0.0561, 0.4935, 0.2034]) -Greedy action tensor([ 1.5982, -0.3708, 1.4186, -0.4922]) tensor([0.4764, 0.0665, 0.3981, 0.0589]) -Greedy action tensor([ 0.0274, -0.6848, -0.7178, 0.2398]) tensor([0.3123, 0.1532, 0.1482, 0.3862]) -Greedy action tensor([-0.0894, -1.1210, 0.4729, -0.8269]) tensor([0.2786, 0.0993, 0.4889, 0.1333]) -Greedy action tensor([-0.6012, -1.4066, -0.4094, 0.3684]) tensor([0.1889, 0.0844, 0.2288, 0.4980]) -Greedy action tensor([-0.6611, -0.0656, -0.2011, -0.2720]) tensor([0.1703, 0.3088, 0.2697, 0.2512]) -Greedy action tensor([ 0.1634, 0.3348, 0.8350, -0.2703]) tensor([0.2087, 0.2477, 0.4084, 0.1352]) -Greedy action tensor([ 0.3100, -1.4630, -0.2025, 0.1884]) tensor([0.3767, 0.0640, 0.2257, 0.3336]) -Greedy action tensor([ 1.0775, -0.6098, 0.8787, 0.9518]) tensor([0.3464, 0.0641, 0.2840, 0.3055]) -Greedy action tensor([ 0.5099, -0.3353, -0.5224, 0.1062]) tensor([0.4076, 0.1750, 0.1452, 0.2722]) -Greedy action tensor([-0.2809, 0.8290, 0.3124, 0.0981]) tensor([0.1369, 0.4154, 0.2478, 0.2000]) -Greedy action tensor([-0.4268, 0.1431, 0.1533, -1.1449]) tensor([0.1983, 0.3507, 0.3543, 0.0967]) -Greedy action tensor([ 0.2061, -0.1576, -0.8358, 0.7490]) tensor([0.2653, 0.1844, 0.0936, 0.4566]) -Greedy action tensor([-1.6362, 0.8196, -0.4949, -0.9415]) tensor([0.0562, 0.6552, 0.1760, 0.1126]) -Greedy action tensor([ 0.0380, 0.0046, -0.3134, 0.0754]) tensor([0.2696, 0.2608, 0.1897, 0.2799]) -Greedy action tensor([-0.6211, -1.1062, 0.1274, -0.1239]) tensor([0.1861, 0.1146, 0.3934, 0.3060]) -Greedy action tensor([-0.8024, -0.8128, 0.1635, -1.0469]) tensor([0.1852, 0.1833, 0.4865, 0.1450]) -Greedy action tensor([ 0.0369, -0.2392, 0.6429, -0.1626]) tensor([0.2267, 0.1720, 0.4156, 0.1857]) -Greedy action tensor([-0.0614, -0.6855, 0.5908, -0.6700]) tensor([0.2500, 0.1340, 0.4800, 0.1360]) -Greedy action tensor([-0.3909, -1.3921, 0.3052, -0.7093]) tensor([0.2439, 0.0896, 0.4892, 0.1774]) -Greedy action tensor([-0.7871, -0.8494, -0.5790, -0.8054]) tensor([0.2408, 0.2263, 0.2965, 0.2364]) -Greedy action tensor([-0.9232, -0.3673, 0.0763, -1.4319]) tensor([0.1650, 0.2876, 0.4482, 0.0992]) -Greedy action tensor([-0.1461, -0.9723, 0.1799, -0.5954]) tensor([0.2889, 0.1265, 0.4003, 0.1843]) -Greedy action tensor([-0.3579, -0.9206, -1.0461, 0.2189]) tensor([0.2596, 0.1479, 0.1304, 0.4621]) -Greedy action tensor([-0.0194, -1.1933, -0.2268, -0.8024]) tensor([0.3878, 0.1199, 0.3151, 0.1772]) -Greedy action tensor([-0.4387, -0.7740, 0.2479, -0.5276]) tensor([0.2166, 0.1549, 0.4304, 0.1982]) -Greedy action tensor([ 0.7908, -0.4541, -1.0462, -0.0595]) tensor([0.5335, 0.1536, 0.0850, 0.2279]) -Greedy action tensor([ 0.2134, -0.7833, 0.3003, -0.6865]) tensor([0.3489, 0.1288, 0.3805, 0.1419]) -Greedy action tensor([-0.6037, 0.4706, 0.0677, -0.9850]) tensor([0.1523, 0.4458, 0.2980, 0.1040]) -Greedy action tensor([ 0.7254, -0.1009, 0.2794, 0.1533]) tensor([0.3785, 0.1656, 0.2423, 0.2136]) -Greedy action tensor([ 0.6618, 0.0082, 0.6188, -0.4504]) tensor([0.3563, 0.1853, 0.3413, 0.1171]) -Greedy action tensor([ 0.3468, -0.5372, 0.5239, 0.3107]) tensor([0.2800, 0.1157, 0.3343, 0.2701]) -Greedy action tensor([ 1.1266, -0.7958, -0.4855, 0.3188]) tensor([0.5582, 0.0816, 0.1113, 0.2489]) -Greedy action tensor([ 0.2740, -0.6704, -0.1512, 1.2084]) tensor([0.2180, 0.0848, 0.1425, 0.5548]) -Greedy action tensor([-0.4591, 0.0292, -0.0235, -0.6125]) tensor([0.1987, 0.3238, 0.3071, 0.1704]) -Greedy action tensor([-1.3229, -0.3620, 0.5839, -0.8222]) tensor([0.0834, 0.2179, 0.5612, 0.1375]) -Greedy action tensor([-0.5913, 0.7226, 0.4158, 0.2372]) tensor([0.1026, 0.3817, 0.2808, 0.2349]) -Greedy action tensor([-0.8186, -0.3992, -0.9187, -0.1289]) tensor([0.1845, 0.2807, 0.1670, 0.3678]) -Greedy action tensor([-0.4101, -0.6702, 0.4168, -0.7626]) tensor([0.2101, 0.1620, 0.4803, 0.1477]) -Greedy action tensor([ 0.6306, -0.4618, 0.1882, -0.0374]) tensor([0.4015, 0.1347, 0.2580, 0.2059]) -Greedy action tensor([-1.7070, -0.6492, -0.6198, -0.3613]) tensor([0.0936, 0.2695, 0.2775, 0.3594]) -Greedy action tensor([-0.2145, -0.9445, 0.5802, -0.4605]) tensor([0.2233, 0.1076, 0.4944, 0.1746]) -Greedy action tensor([ 0.1337, -0.5727, 0.0338, -0.5753]) tensor([0.3460, 0.1707, 0.3131, 0.1703]) -Greedy action tensor([-0.0355, -0.9875, -0.0346, -0.4549]) tensor([0.3285, 0.1268, 0.3288, 0.2160]) -Greedy action tensor([-0.9479, -1.1533, 1.0211, -0.4922]) tensor([0.0947, 0.0771, 0.6787, 0.1494]) -Greedy action tensor([ 0.2313, 0.2137, 0.1539, -0.5914]) tensor([0.2987, 0.2935, 0.2765, 0.1312]) -Greedy action tensor([-0.3015, 0.2936, 0.0169, -0.8782]) tensor([0.2105, 0.3817, 0.2895, 0.1183]) -Greedy action tensor([-0.0115, -0.5228, 0.2479, -1.0771]) tensor([0.3086, 0.1851, 0.4000, 0.1063]) -Greedy action tensor([-0.6319, 0.7960, -0.6082, -0.4941]) tensor([0.1362, 0.5680, 0.1395, 0.1563]) -Greedy action tensor([ 0.1394, -0.7417, -0.4686, -0.7370]) tensor([0.4211, 0.1744, 0.2292, 0.1753]) -Greedy action tensor([ 0.4784, -0.2164, 0.0785, 0.2613]) tensor([0.3362, 0.1678, 0.2254, 0.2706]) -Greedy action tensor([-0.0613, -1.4513, 0.0603, 0.2890]) tensor([0.2633, 0.0656, 0.2974, 0.3738]) -Greedy action tensor([-1.3051, -1.0510, 0.0714, -0.2486]) tensor([0.1096, 0.1413, 0.4340, 0.3152]) -Greedy action tensor([ 1.1120, -0.1809, 0.4789, -0.5224]) tensor([0.4999, 0.1372, 0.2654, 0.0975]) -Greedy action tensor([-0.1976, -1.0716, 0.2963, -0.5261]) tensor([0.2648, 0.1105, 0.4340, 0.1907]) -Greedy action tensor([-0.0520, -0.2798, -0.4950, -0.8458]) tensor([0.3460, 0.2755, 0.2221, 0.1564]) -Greedy action tensor([-0.3733, -1.3739, 0.2824, -1.0116]) tensor([0.2616, 0.0962, 0.5040, 0.1382]) -Greedy action tensor([-0.2959, -0.0175, -1.2991, -0.1819]) tensor([0.2626, 0.3469, 0.0963, 0.2943]) -Greedy action tensor([ 0.5926, -1.2771, 1.0452, -0.1245]) tensor([0.3111, 0.0480, 0.4891, 0.1518]) -Greedy action tensor([ 0.7441, -0.9564, -0.1222, 1.0953]) tensor([0.3307, 0.0604, 0.1391, 0.4699]) -Greedy action tensor([ 0.9260, -0.7073, -0.0928, 0.7611]) tensor([0.4159, 0.0812, 0.1502, 0.3527]) -Greedy action tensor([-0.7040, -0.7436, -0.1819, -0.3019]) tensor([0.1945, 0.1869, 0.3278, 0.2908]) -Greedy action tensor([ 0.0401, -0.3031, -0.6333, 0.0156]) tensor([0.3130, 0.2221, 0.1596, 0.3054]) -Greedy action tensor([-0.3642, -0.3575, 0.3196, -0.0607]) tensor([0.1872, 0.1884, 0.3709, 0.2535]) -Greedy action tensor([-0.3286, -0.1779, -0.1052, -0.2569]) tensor([0.2229, 0.2591, 0.2786, 0.2394]) -Greedy action tensor([-0.5327, -0.1069, 0.4290, 0.2267]) tensor([0.1373, 0.2102, 0.3592, 0.2934]) -Greedy action tensor([-0.6232, -0.4909, -0.4578, 0.5391]) tensor([0.1534, 0.1751, 0.1810, 0.4905]) -Greedy action tensor([ 1.2910, -0.4939, -0.4287, 0.1466]) tensor([0.6005, 0.1008, 0.1076, 0.1912]) -Greedy action tensor([ 1.0891, -0.5088, -0.1790, 0.4319]) tensor([0.4995, 0.1011, 0.1405, 0.2589]) -Greedy action tensor([ 1.3456, -0.2757, -0.7176, 0.6335]) tensor([0.5509, 0.1089, 0.0700, 0.2703]) -Greedy action tensor([ 1.7717, -0.8326, -0.2134, 0.0851]) tensor([0.7161, 0.0530, 0.0984, 0.1326]) -Greedy action tensor([ 1.3079, -0.2874, -0.3084, 0.7105]) tensor([0.5124, 0.1039, 0.1018, 0.2819]) -Greedy action tensor([ 1.2733, -0.7406, -0.0713, 0.6445]) tensor([0.5188, 0.0693, 0.1352, 0.2767]) -Greedy action tensor([ 1.8840, -0.4436, -0.3877, 0.3778]) tensor([0.7030, 0.0686, 0.0725, 0.1559]) -Greedy action tensor([ 1.8076, -1.0216, -0.4996, 0.3351]) tensor([0.7205, 0.0426, 0.0717, 0.1652]) -Greedy action tensor([ 1.6408, -1.0221, 0.1102, 0.3014]) tensor([0.6459, 0.0451, 0.1398, 0.1692]) -Greedy action tensor([ 1.2721, -0.0722, -0.8726, 0.5080]) tensor([0.5424, 0.1414, 0.0635, 0.2526]) -Greedy action tensor([ 1.1948, -0.2899, -0.3784, 0.0494]) tensor([0.5708, 0.1293, 0.1184, 0.1816]) -Greedy action tensor([ 1.4303, -0.3652, -1.0240, 0.4103]) tensor([0.6201, 0.1030, 0.0533, 0.2236]) -Greedy action tensor([ 1.3090, -0.6902, -0.1647, 0.0535]) tensor([0.6063, 0.0821, 0.1389, 0.1727]) -Greedy action tensor([ 1.2887, -0.6174, -0.0396, 0.0976]) tensor([0.5823, 0.0866, 0.1543, 0.1769]) -Greedy action tensor([ 1.7707, -0.4901, 0.1338, 0.4005]) tensor([0.6440, 0.0671, 0.1253, 0.1636]) -Greedy action tensor([ 1.4833, -0.5225, -0.5493, 0.2508]) tensor([0.6422, 0.0864, 0.0841, 0.1873]) -Greedy action tensor([ 1.3468, -0.2781, -0.2940, 0.0192]) tensor([0.6039, 0.1189, 0.1171, 0.1601]) -Greedy action tensor([ 1.3266, -0.4840, -0.5012, 0.2128]) tensor([0.6051, 0.0990, 0.0973, 0.1987]) -Greedy action tensor([ 1.4586, -0.6221, 0.0877, 0.1156]) tensor([0.6098, 0.0761, 0.1548, 0.1592]) -Greedy action tensor([ 1.0941, -0.4034, -0.3421, 0.1256]) tensor([0.5431, 0.1215, 0.1292, 0.2062]) -Greedy action tensor([ 1.1058, -0.3192, -1.1667, 0.6512]) tensor([0.5055, 0.1216, 0.0521, 0.3208]) -Greedy action tensor([ 1.5173, 0.2762, -0.0062, -0.1411]) tensor([0.5891, 0.1703, 0.1284, 0.1122]) -Greedy action tensor([ 1.7862, -0.4441, -0.6552, 0.3902]) tensor([0.6934, 0.0745, 0.0604, 0.1717]) -Greedy action tensor([ 1.5431, -0.0871, -0.8327, 0.2515]) tensor([0.6395, 0.1253, 0.0594, 0.1758]) -Greedy action tensor([ 1.4798, -0.1713, -0.8243, 0.2941]) tensor([0.6261, 0.1201, 0.0625, 0.1913]) -Greedy action tensor([ 1.2550, -0.0944, -1.3808, 0.2123]) tensor([0.5940, 0.1541, 0.0426, 0.2094]) -Greedy action tensor([ 2.0709, -0.8539, 0.1852, -0.3419]) tensor([0.7722, 0.0415, 0.1172, 0.0692]) -Greedy action tensor([ 0.9196, 0.0609, -0.0812, 0.2633]) tensor([0.4329, 0.1834, 0.1591, 0.2246]) -Greedy action tensor([ 1.8574, -1.2890, -0.5257, 0.1216]) tensor([0.7625, 0.0328, 0.0703, 0.1344]) -Greedy action tensor([ 1.4613, -0.4940, -0.5839, 0.1592]) tensor([0.6482, 0.0917, 0.0838, 0.1763]) -Greedy action tensor([ 1.9787, -0.6839, -0.9950, 0.3790]) tensor([0.7559, 0.0527, 0.0386, 0.1527]) -Greedy action tensor([ 1.9431, 0.3084, -0.0969, -0.2075]) tensor([0.6937, 0.1353, 0.0902, 0.0808]) -Greedy action tensor([ 1.1234, 0.0113, -0.7081, -0.0350]) tensor([0.5546, 0.1824, 0.0888, 0.1741]) -Greedy action tensor([ 1.7881, -1.1611, -0.4743, 0.2066]) tensor([0.7341, 0.0385, 0.0764, 0.1510]) -Greedy action tensor([ 1.4609, 0.2447, -0.4880, 0.3662]) tensor([0.5639, 0.1671, 0.0803, 0.1887]) -Greedy action tensor([ 1.4520, -0.0748, -0.8888, 0.1939]) tensor([0.6259, 0.1360, 0.0602, 0.1779]) -Greedy action tensor([ 1.5542, -0.9767, -0.7403, 0.0148]) tensor([0.7169, 0.0571, 0.0723, 0.1538]) -Greedy action tensor([ 1.8998, -1.1374, -0.5656, 0.4804]) tensor([0.7274, 0.0349, 0.0618, 0.1759]) -Greedy action tensor([ 0.9722, 0.2604, -1.3209, 0.8570]) tensor([0.4028, 0.1976, 0.0407, 0.3589]) -Greedy action tensor([ 1.4265, 0.3265, -0.5573, -0.3128]) tensor([0.6075, 0.2022, 0.0836, 0.1067]) -Greedy action tensor([ 2.0948, -1.4293, -0.0233, 0.3584]) tensor([0.7542, 0.0222, 0.0907, 0.1329]) -Greedy action tensor([ 1.7509, -0.3818, -0.7904, 0.0884]) tensor([0.7210, 0.0854, 0.0568, 0.1367]) -Greedy action tensor([ 1.4021e+00, 4.6790e-05, -1.6351e+00, 2.6083e-01]) tensor([0.6198, 0.1525, 0.0297, 0.1980]) -Greedy action tensor([ 2.2483, -1.1153, -0.2242, 0.5600]) tensor([0.7670, 0.0265, 0.0647, 0.1418]) -Greedy action tensor([ 2.3964, -1.0160, -0.5864, 0.4706]) tensor([0.8134, 0.0268, 0.0412, 0.1186]) -Greedy action tensor([ 1.2349, -0.3293, -0.7448, 0.4322]) tensor([0.5569, 0.1166, 0.0769, 0.2496]) -Greedy action tensor([ 2.1400, 0.4698, 0.0428, -0.2210]) tensor([0.7116, 0.1339, 0.0874, 0.0671]) -Greedy action tensor([ 1.3670, -0.5723, -0.4857, 0.3408]) tensor([0.6028, 0.0867, 0.0945, 0.2160]) -Greedy action tensor([ 1.7628, -0.7455, -0.3728, 0.2169]) tensor([0.7079, 0.0576, 0.0836, 0.1509]) -Greedy action tensor([ 2.1126, -0.6548, -0.5998, 0.2347]) tensor([0.7800, 0.0490, 0.0518, 0.1193]) -Greedy action tensor([ 1.3268, -0.5985, -0.2046, 0.3505]) tensor([0.5751, 0.0839, 0.1244, 0.2167]) -Greedy action tensor([ 1.2719, -0.3302, -0.6575, 0.3098]) tensor([0.5784, 0.1165, 0.0840, 0.2210]) -Greedy action tensor([ 1.3153, -0.6344, -0.2590, 0.5358]) tensor([0.5531, 0.0787, 0.1146, 0.2537]) -Greedy action tensor([ 1.3797, -0.5749, -0.6282, -0.2565]) tensor([0.6800, 0.0963, 0.0913, 0.1324]) -Greedy action tensor([ 1.1605, 0.0042, -0.8718, 0.2217]) tensor([0.5444, 0.1713, 0.0713, 0.2129]) -Greedy action tensor([ 1.3576, -0.3953, -0.3202, 0.1244]) tensor([0.6056, 0.1049, 0.1131, 0.1764]) -Greedy action tensor([ 1.3920, -0.3911, -0.3865, 0.3140]) tensor([0.5962, 0.1002, 0.1007, 0.2029]) -Greedy action tensor([ 1.5683, -0.4578, -0.5353, 0.5474]) tensor([0.6195, 0.0817, 0.0756, 0.2232]) -Greedy action tensor([ 2.0270, -0.7139, -0.2972, 0.5406]) tensor([0.7202, 0.0465, 0.0705, 0.1629]) -Greedy action tensor([ 1.3128, -0.7208, 0.1114, 0.0466]) tensor([0.5836, 0.0764, 0.1755, 0.1645]) -Greedy action tensor([ 1.7179, -0.1470, -0.0548, 0.0728]) tensor([0.6588, 0.1021, 0.1119, 0.1272]) -Greedy action tensor([ 1.6471, 0.5139, -0.8277, 0.1210]) tensor([0.6159, 0.1983, 0.0518, 0.1339]) -Greedy action tensor([ 1.1948, 0.0826, 0.3144, -0.3598]) tensor([0.5116, 0.1682, 0.2121, 0.1081]) -Greedy action tensor([ 1.3053, -0.4460, -0.0566, 0.6868]) tensor([0.5080, 0.0882, 0.1301, 0.2737]) -Greedy action tensor([ 1.4506, -0.8802, -0.1829, 0.3208]) tensor([0.6190, 0.0602, 0.1208, 0.2000]) -Greedy action tensor([ 1.0349, -0.2133, -0.3469, -0.0136]) tensor([0.5295, 0.1520, 0.1330, 0.1856]) -Greedy action tensor([ 1.9404, -0.9757, -0.4163, 0.8304]) tensor([0.6764, 0.0366, 0.0641, 0.2229]) -Greedy action tensor([ 1.2729, -0.3843, -0.5450, 0.1816]) tensor([0.5921, 0.1129, 0.0961, 0.1988]) -Greedy action tensor([1.9788, 0.2536, 0.1243, 0.4432]) tensor([0.6452, 0.1149, 0.1010, 0.1389]) -Greedy action tensor([ 1.6825, -0.4000, -0.2250, 0.4393]) tensor([0.6404, 0.0798, 0.0951, 0.1847]) -Greedy action tensor([ 1.3282, -0.7056, -0.7350, 0.1869]) tensor([0.6340, 0.0830, 0.0806, 0.2025]) -Greedy action tensor([ 1.2262, 0.1424, -0.8084, -0.0158]) tensor([0.5689, 0.1924, 0.0744, 0.1643]) -Greedy action tensor([ 2.3349, 0.1669, -0.2071, 0.5882]) tensor([0.7313, 0.0837, 0.0576, 0.1275]) -Greedy action tensor([ 1.2486, -0.5227, -0.1281, 0.0022]) tensor([0.5848, 0.0995, 0.1476, 0.1681]) -Greedy action tensor([ 1.6472, -0.1704, -0.8029, 0.1458]) tensor([0.6796, 0.1104, 0.0586, 0.1514]) -Greedy action tensor([ 1.0684, -0.4139, -0.5820, 0.1747]) tensor([0.5470, 0.1242, 0.1050, 0.2238]) -Greedy action tensor([ 1.2657, -0.5145, -0.7799, 0.0952]) tensor([0.6219, 0.1048, 0.0804, 0.1929]) -Greedy action tensor([ 2.3232, -0.1413, -0.0136, -0.2747]) tensor([0.7961, 0.0677, 0.0769, 0.0593]) -Greedy action tensor([ 1.8289, -1.1816, -0.3146, 0.5721]) tensor([0.6891, 0.0340, 0.0808, 0.1961]) -Greedy action tensor([ 1.3570, -0.0235, -0.3588, -0.0698]) tensor([0.5983, 0.1504, 0.1076, 0.1436]) -Greedy action tensor([ 1.5894, -0.5953, -0.6833, 0.6627]) tensor([0.6206, 0.0698, 0.0639, 0.2457]) -Greedy action tensor([ 0.6917, -0.1883, -0.0594, -0.0446]) tensor([0.4227, 0.1754, 0.1995, 0.2024]) -Greedy action tensor([ 0.6824, -0.4558, 0.1057, -0.1618]) tensor([0.4325, 0.1386, 0.2430, 0.1859]) -Greedy action tensor([ 0.7525, -0.4246, 0.0210, -0.4808]) tensor([0.4806, 0.1481, 0.2313, 0.1400]) -Greedy action tensor([ 0.5457, -0.4606, -0.0444, -0.0949]) tensor([0.4087, 0.1494, 0.2265, 0.2154]) -Greedy action tensor([ 0.4939, -0.1370, -0.1189, -0.0840]) tensor([0.3795, 0.2019, 0.2056, 0.2129]) -Greedy action tensor([ 0.8228, -0.1221, -0.1939, -0.0447]) tensor([0.4607, 0.1791, 0.1667, 0.1935]) -Greedy action tensor([ 0.5636, -0.3800, -0.1020, -0.1864]) tensor([0.4210, 0.1638, 0.2163, 0.1988]) -Greedy action tensor([ 0.5106, -0.2484, -0.0807, -0.3717]) tensor([0.4106, 0.1922, 0.2273, 0.1699]) -Greedy action tensor([ 1.0811, -0.9926, -0.1163, -0.5705]) tensor([0.6175, 0.0776, 0.1865, 0.1184]) -Greedy action tensor([ 0.7207, -0.3741, 0.1102, -0.4899]) tensor([0.4596, 0.1538, 0.2496, 0.1370]) -Greedy action tensor([ 0.7621, -0.4755, 0.0732, -0.4110]) tensor([0.4758, 0.1380, 0.2389, 0.1472]) -Greedy action tensor([ 0.8977, -0.5880, -0.1932, -0.8844]) tensor([0.5778, 0.1308, 0.1941, 0.0972]) -Greedy action tensor([ 0.3525, 0.1573, -0.0676, -0.2267]) tensor([0.3290, 0.2706, 0.2161, 0.1843]) -Greedy action tensor([ 0.1490, 0.1639, -0.1745, -0.3591]) tensor([0.2994, 0.3039, 0.2166, 0.1801]) -Greedy action tensor([ 1.1543, -0.8045, -0.1126, -0.6203]) tensor([0.6280, 0.0886, 0.1769, 0.1065]) -Greedy action tensor([ 0.6851, -0.3228, -0.1407, -0.1671]) tensor([0.4486, 0.1637, 0.1964, 0.1913]) -Greedy action tensor([ 0.7102, -0.5464, -0.1851, -0.1461]) tensor([0.4722, 0.1344, 0.1929, 0.2005]) -Greedy action tensor([ 0.8869, -0.7886, 0.1634, -0.4283]) tensor([0.5153, 0.0965, 0.2499, 0.1383]) -Greedy action tensor([ 0.7523, -0.3042, -0.1137, -0.2702]) tensor([0.4699, 0.1634, 0.1977, 0.1690]) -Greedy action tensor([ 0.9283, -0.7712, 0.0054, -0.3238]) tensor([0.5359, 0.0979, 0.2129, 0.1532]) -Greedy action tensor([ 0.7755, -0.4770, -0.0269, -0.3960]) tensor([0.4893, 0.1398, 0.2193, 0.1516]) -Greedy action tensor([ 0.9767, -0.6722, 0.0715, -0.4946]) tensor([0.5476, 0.1053, 0.2215, 0.1257]) -Greedy action tensor([ 0.7749, -0.5742, -0.0208, -0.2068]) tensor([0.4795, 0.1244, 0.2164, 0.1797]) -Greedy action tensor([ 0.4184, -0.0112, -0.1967, 0.0979]) tensor([0.3428, 0.2231, 0.1853, 0.2488]) -Greedy action tensor([ 0.3537, -0.1878, 0.0394, -0.3483]) tensor([0.3562, 0.2072, 0.2601, 0.1765]) -Greedy action tensor([ 0.8208, -0.3146, -0.0283, -0.2292]) tensor([0.4764, 0.1531, 0.2038, 0.1667]) -Greedy action tensor([ 0.8376, -0.2451, -0.0921, -0.1217]) tensor([0.4725, 0.1600, 0.1865, 0.1810]) -Greedy action tensor([0.5709, 0.1415, 0.0124, 0.0072]) tensor([0.3582, 0.2331, 0.2049, 0.2038]) -Greedy action tensor([ 0.7057, -0.0950, 0.0219, 0.0887]) tensor([0.4011, 0.1801, 0.2024, 0.2164]) -Greedy action tensor([ 0.9189, -0.3155, -0.2779, -0.5315]) tensor([0.5471, 0.1592, 0.1653, 0.1283]) -Greedy action tensor([ 1.0839, -0.4773, 0.1627, -0.5165]) tensor([0.5526, 0.1160, 0.2199, 0.1115]) -Greedy action tensor([ 1.1617, -0.6087, -0.1569, -0.3821]) tensor([0.6056, 0.1031, 0.1620, 0.1293]) -Greedy action tensor([ 0.7634, -0.4033, -0.1998, -0.5094]) tensor([0.5068, 0.1578, 0.1934, 0.1419]) -Greedy action tensor([ 0.6391, -0.4407, -0.1484, -0.0919]) tensor([0.4394, 0.1492, 0.1999, 0.2115]) -Greedy action tensor([ 0.5826, -0.0751, 0.0332, 0.0420]) tensor([0.3735, 0.1935, 0.2156, 0.2175]) -Greedy action tensor([ 1.3503, -0.9395, 0.0273, -0.8454]) tensor([0.6762, 0.0685, 0.1801, 0.0752]) -Greedy action tensor([ 0.6116, -0.0922, -0.0200, -0.0938]) tensor([0.3968, 0.1963, 0.2110, 0.1960]) -Greedy action tensor([ 0.8490, -0.5478, 0.1543, -0.1358]) tensor([0.4717, 0.1167, 0.2355, 0.1762]) -Greedy action tensor([ 0.6640, -0.6291, 0.2360, -0.6570]) tensor([0.4560, 0.1251, 0.2972, 0.1217]) -Greedy action tensor([ 0.6866, -0.7756, -0.0653, -0.5431]) tensor([0.5011, 0.1161, 0.2363, 0.1465]) -Greedy action tensor([ 0.4162, -0.2533, -0.0774, 0.0754]) tensor([0.3529, 0.1807, 0.2154, 0.2510]) -Greedy action tensor([ 1.2277, -0.1858, -0.0859, -0.3005]) tensor([0.5783, 0.1407, 0.1555, 0.1255]) -Greedy action tensor([ 0.7093, -0.3982, -0.0087, -0.1846]) tensor([0.4490, 0.1483, 0.2190, 0.1837]) -Greedy action tensor([ 0.9318, -0.4629, -0.0681, -0.5371]) tensor([0.5417, 0.1343, 0.1993, 0.1247]) -Greedy action tensor([0.7203, 0.0771, 0.0427, 0.0596]) tensor([0.3922, 0.2061, 0.1992, 0.2025]) -Greedy action tensor([ 0.9741, -0.3163, 0.0334, -0.3112]) tensor([0.5149, 0.1417, 0.2010, 0.1424]) -Greedy action tensor([0.5915, 0.2302, 0.0406, 0.0181]) tensor([0.3525, 0.2456, 0.2032, 0.1987]) -Greedy action tensor([ 0.7490, -0.4867, -0.0688, -0.2947]) tensor([0.4798, 0.1394, 0.2118, 0.1690]) -Greedy action tensor([ 0.7548, -0.4485, -0.0602, -0.1779]) tensor([0.4681, 0.1405, 0.2072, 0.1842]) -Greedy action tensor([ 0.7760, -0.5299, -0.1321, -0.1263]) tensor([0.4808, 0.1303, 0.1939, 0.1950]) -Greedy action tensor([ 0.7644, -0.0938, 0.0383, 0.0542]) tensor([0.4168, 0.1767, 0.2016, 0.2049]) -Greedy action tensor([ 0.6551, -0.4670, -0.0097, -0.1838]) tensor([0.4401, 0.1433, 0.2264, 0.1902]) -Greedy action tensor([ 1.0514, -0.5740, -0.1654, -0.3629]) tensor([0.5760, 0.1134, 0.1706, 0.1400]) -Greedy action tensor([0.4049, 0.0200, 0.0018, 0.0777]) tensor([0.3258, 0.2217, 0.2177, 0.2349]) -Greedy action tensor([ 0.7290, -0.6940, 0.1714, -0.5332]) tensor([0.4770, 0.1149, 0.2731, 0.1350]) -Greedy action tensor([ 0.9471, -0.6587, 0.0192, -0.4901]) tensor([0.5453, 0.1095, 0.2156, 0.1296]) -Greedy action tensor([ 0.6762, -0.3870, -0.0663, -0.2379]) tensor([0.4500, 0.1554, 0.2142, 0.1804]) -Greedy action tensor([ 0.7131, -0.7153, 0.0774, -0.4355]) tensor([0.4793, 0.1149, 0.2538, 0.1520]) -Greedy action tensor([ 0.7680, -0.7399, 0.0530, -0.5900]) tensor([0.5082, 0.1125, 0.2486, 0.1307]) -Greedy action tensor([ 1.0881, -0.5131, -0.1082, -0.3842]) tensor([0.5769, 0.1163, 0.1744, 0.1323]) -Greedy action tensor([ 0.6550, -0.1788, 0.0038, -0.1422]) tensor([0.4156, 0.1805, 0.2167, 0.1872]) -Greedy action tensor([ 0.9546, -0.7796, -0.0287, -0.4834]) tensor([0.5593, 0.0987, 0.2092, 0.1328]) -Greedy action tensor([ 0.8217, -0.5882, -0.0069, -0.3430]) tensor([0.5018, 0.1225, 0.2191, 0.1566]) -Greedy action tensor([ 0.8222, -0.5898, -0.0644, -0.1932]) tensor([0.4955, 0.1207, 0.2042, 0.1795]) -Greedy action tensor([ 0.5150, -0.1891, 0.0510, -0.0712]) tensor([0.3732, 0.1846, 0.2346, 0.2076]) -Greedy action tensor([ 0.6372, -0.4506, 0.0039, -0.4444]) tensor([0.4531, 0.1527, 0.2405, 0.1536]) -Greedy action tensor([ 0.8183, -0.4839, 0.0138, -0.3483]) tensor([0.4925, 0.1339, 0.2203, 0.1534]) -Greedy action tensor([ 1.3468, -0.8331, 0.0681, -0.5369]) tensor([0.6479, 0.0732, 0.1804, 0.0985]) -Greedy action tensor([ 1.1839, -0.6742, -0.1246, -0.7155]) tensor([0.6346, 0.0990, 0.1715, 0.0950]) -Greedy action tensor([ 1.0737, -0.4931, 0.1199, -0.2899]) tensor([0.5406, 0.1128, 0.2083, 0.1383]) -Greedy action tensor([ 0.9015, -0.5125, 0.1178, -0.2042]) tensor([0.4924, 0.1197, 0.2249, 0.1630]) -Greedy action tensor([ 1.1356, -1.0002, 0.1143, -0.7171]) tensor([0.6116, 0.0723, 0.2202, 0.0959]) -Greedy action tensor([ 0.8803, -0.3859, -0.1671, -0.2922]) tensor([0.5148, 0.1451, 0.1806, 0.1594]) -Greedy action tensor([ 0.5247, -0.4830, 0.1396, -0.7955]) tensor([0.4324, 0.1579, 0.2942, 0.1155]) -Greedy action tensor([ 1.0005, -0.6059, -0.0125, -0.7701]) tensor([0.5767, 0.1157, 0.2094, 0.0982]) -Greedy action tensor([ 0.5655, -0.5171, -0.1214, -0.1440]) tensor([0.4285, 0.1451, 0.2156, 0.2108]) -Greedy action tensor([ 1.1912, -1.0039, 0.0922, -0.6944]) tensor([0.6265, 0.0697, 0.2087, 0.0951]) -Greedy action tensor([ 0.7490, -0.5717, -0.0476, -0.3169]) tensor([0.4849, 0.1294, 0.2186, 0.1670]) -Greedy action tensor([ 1.0647, -0.3792, -0.1543, -0.2444]) tensor([0.5551, 0.1310, 0.1640, 0.1499]) -Greedy action tensor([ 0.5906, -0.3212, -0.0898, -0.2640]) tensor([0.4285, 0.1722, 0.2170, 0.1823]) -Greedy action tensor([ 0.7288, -0.1470, 0.0314, -0.2977]) tensor([0.4400, 0.1833, 0.2191, 0.1576]) -Greedy action tensor([ 1.1408, -0.6946, -0.0722, -0.6353]) tensor([0.6149, 0.0981, 0.1828, 0.1041]) -Greedy action tensor([-1.4133, 0.6523, 0.4433, -0.3301]) tensor([0.0548, 0.4324, 0.3509, 0.1619]) -Greedy action tensor([-1.9267, -0.4210, 0.6550, -0.1686]) tensor([0.0408, 0.1838, 0.5389, 0.2365]) -Greedy action tensor([-1.8398, -0.4807, 0.6260, -0.1318]) tensor([0.0451, 0.1755, 0.5307, 0.2487]) -Greedy action tensor([-1.6977, -0.1506, 0.5343, -0.0438]) tensor([0.0494, 0.2321, 0.4603, 0.2582]) -Greedy action tensor([-1.6652, -0.3597, 0.5250, -0.0550]) tensor([0.0537, 0.1980, 0.4797, 0.2686]) -Greedy action tensor([-1.4478, 0.3117, 0.3375, -0.0842]) tensor([0.0599, 0.3483, 0.3574, 0.2344]) -Greedy action tensor([-1.5348, -0.2493, 0.4532, -0.0620]) tensor([0.0614, 0.2222, 0.4485, 0.2679]) -Greedy action tensor([-1.8493, -0.2604, 0.6149, -0.1155]) tensor([0.0429, 0.2101, 0.5042, 0.2429]) -Greedy action tensor([-1.8323, -0.4637, 0.6093, -0.1464]) tensor([0.0458, 0.1801, 0.5267, 0.2474]) -Greedy action tensor([-1.6480, -0.2041, 0.6264, -0.0665]) tensor([0.0505, 0.2138, 0.4905, 0.2453]) -Greedy action tensor([-1.7381, -0.3676, 0.5502, -0.0983]) tensor([0.0501, 0.1974, 0.4942, 0.2584]) -Greedy action tensor([-1.6256, -0.5169, 0.4885, -0.0050]) tensor([0.0576, 0.1745, 0.4769, 0.2911]) -Greedy action tensor([-1.7463, -0.4952, 0.7187, 0.1544]) tensor([0.0436, 0.1523, 0.5126, 0.2916]) -Greedy action tensor([-1.2663, 0.7698, 0.2148, 0.1629]) tensor([0.0580, 0.4445, 0.2552, 0.2423]) -Greedy action tensor([-1.8606, -0.4791, 0.6271, -0.1474]) tensor([0.0443, 0.1764, 0.5334, 0.2459]) -Greedy action tensor([-1.7540, -0.4391, 0.6296, -0.0236]) tensor([0.0471, 0.1756, 0.5112, 0.2661]) -Greedy action tensor([-1.9388, -0.4340, 0.6635, -0.1779]) tensor([0.0403, 0.1815, 0.5438, 0.2344]) -Greedy action tensor([-0.8950, -0.1479, 0.6274, 0.2904]) tensor([0.0912, 0.1925, 0.4180, 0.2984]) -Greedy action tensor([-1.9286, -0.4602, 0.6926, -0.1582]) tensor([0.0401, 0.1739, 0.5508, 0.2352]) -Greedy action tensor([-1.4257, 0.2225, 0.3948, -0.1385]) tensor([0.0625, 0.3249, 0.3860, 0.2265]) -Greedy action tensor([-1.8779, -0.4547, 0.6377, -0.1473]) tensor([0.0432, 0.1791, 0.5341, 0.2436]) -Greedy action tensor([-1.8784, -0.4175, 0.6292, -0.1537]) tensor([0.0431, 0.1858, 0.5292, 0.2419]) -Greedy action tensor([-1.7774, -0.2219, 0.5521, -0.1022]) tensor([0.0468, 0.2219, 0.4812, 0.2501]) -Greedy action tensor([-1.8992, -0.4258, 0.6626, -0.1413]) tensor([0.0415, 0.1809, 0.5372, 0.2404]) -Greedy action tensor([-0.5743, 0.1891, 0.1657, -0.1138]) tensor([0.1465, 0.3143, 0.3070, 0.2322]) -Greedy action tensor([-0.0135, 0.9938, -0.0316, 0.5191]) tensor([0.1557, 0.4263, 0.1529, 0.2652]) -Greedy action tensor([-1.7627, -0.4086, 0.5736, -0.1204]) tensor([0.0491, 0.1900, 0.5074, 0.2535]) -Greedy action tensor([-1.6237, -0.5091, 0.5197, -0.0615]) tensor([0.0577, 0.1757, 0.4917, 0.2750]) -Greedy action tensor([-0.6806, 0.8120, 0.0805, 0.0119]) tensor([0.1043, 0.4640, 0.2233, 0.2085]) -Greedy action tensor([-1.9208, -0.4506, 0.6592, -0.1687]) tensor([0.0411, 0.1789, 0.5428, 0.2372]) -Greedy action tensor([-1.9144, -0.4140, 0.6507, -0.1618]) tensor([0.0412, 0.1848, 0.5361, 0.2379]) -Greedy action tensor([-1.8424, -0.4237, 0.6157, -0.1319]) tensor([0.0448, 0.1849, 0.5228, 0.2475]) -Greedy action tensor([-0.1262, 0.8744, 0.1383, 0.6757]) tensor([0.1379, 0.3750, 0.1796, 0.3075]) -Greedy action tensor([-1.8236, -0.3407, 0.5876, -0.1244]) tensor([0.0454, 0.2001, 0.5062, 0.2484]) -Greedy action tensor([-1.8405, -0.3578, 0.6304, -0.1031]) tensor([0.0436, 0.1922, 0.5163, 0.2479]) -Greedy action tensor([-0.9241, 0.2156, 0.4593, 0.3614]) tensor([0.0852, 0.2665, 0.3400, 0.3083]) -Greedy action tensor([-1.8771, -0.4223, 0.6308, -0.1506]) tensor([0.0431, 0.1848, 0.5296, 0.2424]) -Greedy action tensor([-1.4542, -0.3339, 0.4093, -0.0165]) tensor([0.0679, 0.2082, 0.4379, 0.2860]) -Greedy action tensor([-1.9003, -0.4439, 0.6432, -0.1571]) tensor([0.0421, 0.1808, 0.5362, 0.2409]) -Greedy action tensor([-1.8972, -0.4376, 0.6454, -0.1556]) tensor([0.0422, 0.1814, 0.5359, 0.2406]) -Greedy action tensor([-1.8816, -0.1947, 0.5882, -0.1266]) tensor([0.0417, 0.2251, 0.4924, 0.2409]) -Greedy action tensor([-1.9284, -0.4339, 0.6592, -0.1722]) tensor([0.0407, 0.1816, 0.5418, 0.2359]) -Greedy action tensor([-1.5124, -0.4714, 0.4466, -0.0593]) tensor([0.0658, 0.1863, 0.4666, 0.2813]) -Greedy action tensor([-1.8834, -0.4498, 0.6412, -0.1496]) tensor([0.0428, 0.1797, 0.5349, 0.2426]) -Greedy action tensor([-1.8547, -0.4173, 0.6170, -0.1370]) tensor([0.0442, 0.1861, 0.5234, 0.2463]) -Greedy action tensor([-1.8537, -0.4315, 0.6218, -0.1397]) tensor([0.0443, 0.1836, 0.5264, 0.2458]) -Greedy action tensor([-1.8037, 0.0895, 0.5402, -0.1171]) tensor([0.0426, 0.2830, 0.4442, 0.2302]) -Greedy action tensor([-1.8870, -0.4443, 0.6396, -0.1560]) tensor([0.0428, 0.1809, 0.5349, 0.2414]) -Greedy action tensor([-0.8546, 0.1654, 0.2554, 0.0624]) tensor([0.1074, 0.2979, 0.3260, 0.2687]) -Greedy action tensor([-1.9048, -0.4377, 0.6435, -0.1632]) tensor([0.0420, 0.1820, 0.5366, 0.2395]) -Greedy action tensor([-1.9113, -0.4360, 0.6686, -0.1528]) tensor([0.0410, 0.1794, 0.5414, 0.2381]) -Greedy action tensor([-1.7830, -0.2383, -0.0366, -0.5672]) tensor([0.0676, 0.3168, 0.3876, 0.2280]) -Greedy action tensor([-1.9086, -0.4073, 0.6450, -0.1619]) tensor([0.0415, 0.1864, 0.5339, 0.2382]) -Greedy action tensor([-1.9401, -0.4477, 0.6678, -0.1760]) tensor([0.0402, 0.1790, 0.5460, 0.2348]) -Greedy action tensor([-1.8499, -0.2445, 0.5916, -0.1271]) tensor([0.0433, 0.2159, 0.4980, 0.2427]) -Greedy action tensor([0.4301, 0.5512, 0.5188, 1.1701]) tensor([0.1881, 0.2123, 0.2055, 0.3942]) -Greedy action tensor([-0.8662, 0.8739, 0.0883, 0.3671]) tensor([0.0786, 0.4477, 0.2041, 0.2697]) -Greedy action tensor([-1.6656, -0.4107, 0.5601, 0.0226]) tensor([0.0521, 0.1829, 0.4829, 0.2821]) -Greedy action tensor([-1.7019, -0.3256, 0.5310, -0.0978]) tensor([0.0519, 0.2056, 0.4842, 0.2582]) -Greedy action tensor([-1.8478, -0.3758, 0.6330, -0.1255]) tensor([0.0437, 0.1903, 0.5217, 0.2444]) -Greedy action tensor([-1.7825, -0.4353, 0.6199, -0.1064]) tensor([0.0471, 0.1811, 0.5202, 0.2516]) -Greedy action tensor([-1.9159, -0.4371, 0.6520, -0.1665]) tensor([0.0414, 0.1815, 0.5393, 0.2379]) -Greedy action tensor([-1.8738, -0.4598, 0.6381, -0.1367]) tensor([0.0432, 0.1779, 0.5332, 0.2457]) -Greedy action tensor([-1.7014, -0.2805, 0.5236, -0.0997]) tensor([0.0517, 0.2139, 0.4781, 0.2563]) -Greedy action tensor([-1.8891, -0.3348, 0.6313, -0.1484]) tensor([0.0419, 0.1983, 0.5210, 0.2389]) -Greedy action tensor([-1.8675, -0.3676, 0.6234, -0.1486]) tensor([0.0432, 0.1937, 0.5219, 0.2412]) -Greedy action tensor([-1.8471, -0.4067, 0.6122, -0.1413]) tensor([0.0446, 0.1883, 0.5216, 0.2455]) -Greedy action tensor([-1.8987, -0.4497, 0.6449, -0.1425]) tensor([0.0421, 0.1791, 0.5352, 0.2436]) -Greedy action tensor([-0.9570, -0.2907, 0.2457, -0.0241]) tensor([0.1134, 0.2208, 0.3775, 0.2883]) -Greedy action tensor([-1.9221, -0.4150, 0.6572, -0.1682]) tensor([0.0409, 0.1844, 0.5387, 0.2360]) -Greedy action tensor([-1.6277, -0.4517, 0.5090, -0.0215]) tensor([0.0565, 0.1832, 0.4787, 0.2816]) -Greedy action tensor([-1.3576, -0.3668, 0.7567, 0.5399]) tensor([0.0536, 0.1444, 0.4443, 0.3577]) -Greedy action tensor([-1.8983, -0.3385, 0.6333, -0.1544]) tensor([0.0416, 0.1978, 0.5228, 0.2378]) -Greedy action tensor([-1.5520, -0.4808, 0.5270, -0.0815]) tensor([0.0615, 0.1794, 0.4916, 0.2675]) -Greedy action tensor([0.1482, 0.8189, 0.4068, 0.9864]) tensor([0.1524, 0.2980, 0.1973, 0.3523]) -Greedy action tensor([-0.9062, 0.3013, 0.2948, -0.2076]) tensor([0.1033, 0.3456, 0.3433, 0.2077]) -Greedy action tensor([-1.7120, -0.4852, 0.5576, -0.0710]) tensor([0.0520, 0.1772, 0.5027, 0.2681]) -Greedy action tensor([-0.7845, 0.8993, 0.0394, 0.3154]) tensor([0.0857, 0.4616, 0.1953, 0.2574]) -Greedy action tensor([-1.7178, -0.2442, 0.5312, -0.1029]) tensor([0.0503, 0.2197, 0.4770, 0.2530]) -Greedy action tensor([-1.8727, -0.4615, 0.6364, -0.1471]) tensor([0.0435, 0.1782, 0.5343, 0.2441]) -Greedy action tensor([-1.7712, -0.2071, 0.5306, -0.0869]) tensor([0.0473, 0.2258, 0.4722, 0.2547]) -Greedy action tensor([-1.9470, -0.4529, 0.6680, -0.1815]) tensor([0.0401, 0.1784, 0.5474, 0.2341]) -Greedy action tensor([ 0.8816, -0.3352, -0.3977, 0.0811]) tensor([0.4942, 0.1464, 0.1375, 0.2219]) -Greedy action tensor([-0.3749, -0.8242, -0.9520, 0.2040]) tensor([0.2510, 0.1602, 0.1410, 0.4478]) -Greedy action tensor([-0.8820, -0.0705, -0.9301, 0.1811]) tensor([0.1409, 0.3171, 0.1342, 0.4078]) -Greedy action tensor([-1.0996, -0.6155, 0.9018, -1.6070]) tensor([0.0941, 0.1527, 0.6965, 0.0567]) -Greedy action tensor([-0.4069, -0.9257, -0.8646, -0.9905]) tensor([0.3590, 0.2137, 0.2271, 0.2003]) -Greedy action tensor([-1.1364, -0.9088, 0.5626, -0.5380]) tensor([0.1048, 0.1316, 0.5730, 0.1906]) -Greedy action tensor([-0.1590, -1.6357, -0.4028, 0.1166]) tensor([0.3004, 0.0686, 0.2354, 0.3957]) -Greedy action tensor([-1.7895, -1.1093, 1.1221, -1.4922]) tensor([0.0440, 0.0869, 0.8097, 0.0593]) -Greedy action tensor([ 0.8891, -0.5081, -0.5497, 0.5495]) tensor([0.4552, 0.1126, 0.1080, 0.3242]) -Greedy action tensor([-0.6097, 0.1928, -0.5637, -1.1530]) tensor([0.2058, 0.4592, 0.2155, 0.1195]) -Greedy action tensor([-0.6763, -1.0919, 0.0231, -0.7154]) tensor([0.2158, 0.1424, 0.4343, 0.2075]) -Greedy action tensor([-0.8072, -0.8011, 0.2165, -1.0321]) tensor([0.1789, 0.1800, 0.4981, 0.1429]) -Greedy action tensor([0.9019, 0.0063, 0.1580, 0.0082]) tensor([0.4362, 0.1781, 0.2073, 0.1785]) -Greedy action tensor([ 0.3943, -0.4077, -0.4187, -0.5575]) tensor([0.4390, 0.1969, 0.1947, 0.1695]) -Greedy action tensor([ 1.1960, -0.7304, -0.1444, -0.5772]) tensor([0.6340, 0.0924, 0.1660, 0.1077]) -Greedy action tensor([-0.0160, -0.5599, -0.1050, -1.0195]) tensor([0.3494, 0.2028, 0.3197, 0.1281]) -Greedy action tensor([ 0.2479, -0.7805, 0.3450, -1.0018]) tensor([0.3642, 0.1302, 0.4013, 0.1044]) -Greedy action tensor([-1.3061, -1.1175, 0.1282, 0.2642]) tensor([0.0892, 0.1077, 0.3743, 0.4288]) -Greedy action tensor([-0.2983, -1.3233, 0.8942, -0.2736]) tensor([0.1761, 0.0632, 0.5802, 0.1805]) -Greedy action tensor([-0.1369, -0.0470, 0.1255, -0.2204]) tensor([0.2318, 0.2536, 0.3013, 0.2132]) -Greedy action tensor([-1.0427, -1.6142, 0.5355, -1.0148]) tensor([0.1344, 0.0759, 0.6514, 0.1382]) -Greedy action tensor([-0.1670, 0.0556, 0.6978, -0.7922]) tensor([0.1938, 0.2422, 0.4603, 0.1037]) -Greedy action tensor([-0.1494, -0.3040, 0.0216, -0.5156]) tensor([0.2676, 0.2293, 0.3175, 0.1856]) -Greedy action tensor([-0.2573, 0.0322, -0.9384, -1.0971]) tensor([0.3055, 0.4080, 0.1546, 0.1319]) -Greedy action tensor([-0.7195, -0.2860, 0.1931, -0.8490]) tensor([0.1692, 0.2609, 0.4213, 0.1486]) -Greedy action tensor([ 0.3335, -0.6329, -1.1807, 0.2783]) tensor([0.3927, 0.1494, 0.0864, 0.3716]) -Greedy action tensor([ 0.3576, -0.6633, 0.2409, -0.2349]) tensor([0.3568, 0.1285, 0.3174, 0.1973]) -Greedy action tensor([ 1.0252, -1.3475, 0.1127, 0.2091]) tensor([0.5163, 0.0481, 0.2073, 0.2283]) -Greedy action tensor([ 0.5386, -2.3073, -0.3096, 0.1825]) tensor([0.4573, 0.0266, 0.1958, 0.3203]) -Greedy action tensor([ 1.2254, -0.4635, -0.4083, 0.6733]) tensor([0.5113, 0.0945, 0.0998, 0.2944]) -Greedy action tensor([ 1.0442, -0.6860, -0.5909, 0.3293]) tensor([0.5372, 0.0952, 0.1047, 0.2628]) -Greedy action tensor([ 0.6236, -0.8051, 0.0970, -0.6238]) tensor([0.4723, 0.1132, 0.2789, 0.1357]) -Greedy action tensor([ 0.9757, -0.3667, -0.5403, 0.4706]) tensor([0.4798, 0.1253, 0.1054, 0.2895]) -Greedy action tensor([ 0.1985, -0.8430, 1.0593, -1.0205]) tensor([0.2492, 0.0879, 0.5893, 0.0736]) -Greedy action tensor([-0.6385, -0.3688, -1.6247, -0.2141]) tensor([0.2375, 0.3110, 0.0886, 0.3630]) -Greedy action tensor([ 0.5768, -0.1584, 0.1754, -1.1038]) tensor([0.4283, 0.2053, 0.2867, 0.0798]) -Greedy action tensor([-0.1503, 0.1753, -0.1202, -0.1571]) tensor([0.2268, 0.3141, 0.2338, 0.2253]) -Greedy action tensor([ 0.3141, -1.3506, 1.0367, -0.3237]) tensor([0.2647, 0.0501, 0.5453, 0.1399]) -Greedy action tensor([ 0.0980, -0.0022, 1.1052, 0.2806]) tensor([0.1712, 0.1548, 0.4686, 0.2054]) -Greedy action tensor([ 0.0701, -1.9179, 0.2716, 0.3352]) tensor([0.2729, 0.0374, 0.3339, 0.3558]) -Greedy action tensor([ 0.4958, -0.5224, 1.0591, -0.4874]) tensor([0.2864, 0.1035, 0.5030, 0.1071]) -Greedy action tensor([ 1.4717, -0.0303, -0.2009, 0.1786]) tensor([0.5935, 0.1322, 0.1114, 0.1629]) -Greedy action tensor([-0.2347, -0.1101, 0.2339, -0.4837]) tensor([0.2217, 0.2512, 0.3543, 0.1729]) -Greedy action tensor([ 0.2147, -1.3858, 0.1329, -0.8366]) tensor([0.4044, 0.0816, 0.3726, 0.1413]) -Greedy action tensor([ 0.7696, -1.5515, -0.4853, -0.2285]) tensor([0.5708, 0.0560, 0.1628, 0.2104]) -Greedy action tensor([-0.6599, 0.3217, -0.8817, 0.2136]) tensor([0.1457, 0.3887, 0.1167, 0.3489]) -Greedy action tensor([ 1.1450, 0.1600, 0.4168, -0.3824]) tensor([0.4823, 0.1801, 0.2329, 0.1047]) -Greedy action tensor([-0.8175, -1.1175, 0.5019, -0.9474]) tensor([0.1572, 0.1165, 0.5882, 0.1381]) -Greedy action tensor([-0.1245, -0.9016, 0.5217, -0.1695]) tensor([0.2313, 0.1063, 0.4413, 0.2211]) -Greedy action tensor([-0.5673, -0.9985, -0.2086, -0.4692]) tensor([0.2390, 0.1553, 0.3421, 0.2636]) -Greedy action tensor([-1.3810, -0.6450, 0.4214, -1.0976]) tensor([0.0954, 0.1992, 0.5787, 0.1267]) -Greedy action tensor([-0.0364, -0.9025, 0.1155, 0.3731]) tensor([0.2445, 0.1028, 0.2846, 0.3682]) -Greedy action tensor([ 0.0430, -0.6129, -0.6456, -0.7315]) tensor([0.4029, 0.2091, 0.2024, 0.1857]) -Greedy action tensor([-0.1586, -1.1836, -1.0576, 0.9624]) tensor([0.2069, 0.0742, 0.0842, 0.6347]) -Greedy action tensor([ 0.0029, -0.7628, 0.6813, -0.2594]) tensor([0.2378, 0.1106, 0.4687, 0.1829]) -Greedy action tensor([ 0.5598, 0.0701, 0.4100, -0.2149]) tensor([0.3408, 0.2088, 0.2934, 0.1570]) -Greedy action tensor([ 2.0831, -0.3542, 0.1395, 0.6316]) tensor([0.6827, 0.0597, 0.0978, 0.1599]) -Greedy action tensor([ 0.7667, 0.1669, -0.5788, -0.0246]) tensor([0.4420, 0.2426, 0.1151, 0.2003]) -Greedy action tensor([ 0.4737, -0.0594, 0.3773, -0.1513]) tensor([0.3300, 0.1936, 0.2997, 0.1766]) -Greedy action tensor([ 0.3374, -0.3017, 0.5227, 0.3122]) tensor([0.2698, 0.1424, 0.3247, 0.2631]) -Greedy action tensor([-0.2497, -0.7877, 0.1929, -0.0039]) tensor([0.2263, 0.1321, 0.3523, 0.2893]) -Greedy action tensor([ 0.0137, -1.2441, 0.7202, -0.6336]) tensor([0.2608, 0.0741, 0.5286, 0.1365]) -Greedy action tensor([ 0.9892, -0.8927, 0.3036, -0.2001]) tensor([0.5101, 0.0777, 0.2570, 0.1553]) -Greedy action tensor([ 0.4957, 0.6729, -0.6555, -0.1546]) tensor([0.3298, 0.3938, 0.1043, 0.1721]) -Greedy action tensor([ 0.3124, 0.6631, -0.1348, -0.7082]) tensor([0.2924, 0.4152, 0.1870, 0.1054]) -Greedy action tensor([-1.1411, -1.4660, 0.6945, -0.4364]) tensor([0.0999, 0.0722, 0.6260, 0.2020]) -Greedy action tensor([ 0.8012, -0.8954, 0.8468, 0.4637]) tensor([0.3397, 0.0623, 0.3556, 0.2424]) -Greedy action tensor([-0.7906, -0.8053, 0.2875, -0.5305]) tensor([0.1607, 0.1584, 0.4724, 0.2085]) -Greedy action tensor([ 0.4935, -1.4296, 1.0002, -0.5346]) tensor([0.3161, 0.0462, 0.5246, 0.1131]) -Greedy action tensor([-0.5274, -0.6027, -0.0148, 0.6497]) tensor([0.1462, 0.1356, 0.2440, 0.4743]) -Greedy action tensor([-0.0789, -0.5783, -0.1450, -0.5445]) tensor([0.3154, 0.1914, 0.2952, 0.1980]) -Greedy action tensor([ 0.0775, -0.3702, 0.2566, 0.1420]) tensor([0.2563, 0.1638, 0.3065, 0.2734]) -Greedy action tensor([-0.7775, -1.5024, 0.4546, -0.1611]) tensor([0.1478, 0.0716, 0.5068, 0.2738]) -Greedy action tensor([ 0.2336, 0.2792, 0.2510, -0.2155]) tensor([0.2701, 0.2827, 0.2748, 0.1724]) -Greedy action tensor([ 0.4283, -0.5486, -0.0461, 0.0411]) tensor([0.3735, 0.1406, 0.2324, 0.2536]) -Greedy action tensor([ 0.3671, 0.1450, -0.3124, -0.4081]) tensor([0.3612, 0.2893, 0.1831, 0.1664]) -Greedy action tensor([ 1.1116, 0.1917, -0.7315, 0.7373]) tensor([0.4455, 0.1776, 0.0705, 0.3064]) -Greedy action tensor([ 0.1613, -0.2693, 0.1338, -1.4495]) tensor([0.3543, 0.2303, 0.3447, 0.0708]) -Greedy action tensor([-0.3370, 0.5804, 0.1939, -0.7189]) tensor([0.1699, 0.4252, 0.2889, 0.1160]) -Greedy action tensor([-1.8105, -0.4187, -0.7170, 0.0338]) tensor([0.0698, 0.2807, 0.2083, 0.4413]) -Greedy action tensor([ 1.0640, -1.5375, -0.2333, 0.1483]) tensor([0.5722, 0.0424, 0.1564, 0.2290]) -Greedy action tensor([ 1.4216, -0.5937, -0.4017, 0.4237]) tensor([0.6012, 0.0801, 0.0971, 0.2216]) -Greedy action tensor([ 1.1372, -0.1363, -0.6330, 0.1190]) tensor([0.5520, 0.1545, 0.0940, 0.1994]) -Greedy action tensor([ 1.1157, -0.3965, -0.6614, 0.3184]) tensor([0.5434, 0.1198, 0.0919, 0.2448]) -Greedy action tensor([ 1.2579, -0.7687, -0.2260, 0.5726]) tensor([0.5369, 0.0708, 0.1217, 0.2706]) -Greedy action tensor([ 0.7621, -0.3405, -0.1748, 0.4598]) tensor([0.4060, 0.1348, 0.1591, 0.3001]) -Greedy action tensor([ 1.6447, -0.3735, -0.6449, 0.3445]) tensor([0.6637, 0.0882, 0.0672, 0.1808]) -Greedy action tensor([ 1.9006, -0.2985, -0.5665, 0.4539]) tensor([0.6988, 0.0775, 0.0593, 0.1644]) -Greedy action tensor([ 1.6252, -0.6495, -0.3451, 0.1631]) tensor([0.6784, 0.0698, 0.0946, 0.1572]) -Greedy action tensor([ 2.6071, -0.8888, -0.2442, 0.7970]) tensor([0.7989, 0.0242, 0.0462, 0.1307]) -Greedy action tensor([ 1.5753, -0.4296, 0.1082, 0.2327]) tensor([0.6148, 0.0828, 0.1418, 0.1606]) -Greedy action tensor([ 1.2685, 0.0977, -0.4877, -0.0795]) tensor([0.5739, 0.1780, 0.0991, 0.1491]) -Greedy action tensor([ 1.4365, -0.8029, -0.2523, 0.1984]) tensor([0.6324, 0.0674, 0.1168, 0.1834]) -Greedy action tensor([ 2.2847, -1.0871, -0.0837, 0.7989]) tensor([0.7384, 0.0253, 0.0691, 0.1671]) -Greedy action tensor([ 1.4387, -0.5104, -0.3604, 0.6650]) tensor([0.5653, 0.0805, 0.0935, 0.2607]) -Greedy action tensor([ 0.8872, -0.3535, -0.2091, 0.3553]) tensor([0.4523, 0.1308, 0.1511, 0.2657]) -Greedy action tensor([ 1.4913, -0.6119, -1.1151, 0.1559]) tensor([0.6854, 0.0837, 0.0506, 0.1803]) -Greedy action tensor([ 1.4812, -0.6954, -0.3013, 0.1037]) tensor([0.6520, 0.0739, 0.1097, 0.1644]) -Greedy action tensor([ 1.9786, -1.1722, 0.0701, 0.6306]) tensor([0.6892, 0.0295, 0.1022, 0.1790]) -Greedy action tensor([ 1.0860, -0.1406, -0.5869, 0.0661]) tensor([0.5430, 0.1593, 0.1019, 0.1958]) -Greedy action tensor([ 1.2070, -0.3755, -0.9323, 0.4509]) tensor([0.5578, 0.1146, 0.0657, 0.2619]) -Greedy action tensor([ 1.5456, -0.5573, -1.2636, 0.4141]) tensor([0.6645, 0.0811, 0.0400, 0.2143]) -Greedy action tensor([ 1.5805, -1.1029, -0.5410, 0.3685]) tensor([0.6730, 0.0460, 0.0807, 0.2003]) -Greedy action tensor([ 1.5493, -0.0016, -0.9881, 0.4802]) tensor([0.6118, 0.1297, 0.0484, 0.2101]) -Greedy action tensor([ 1.6634, -0.6532, -0.6604, 0.7047]) tensor([0.6330, 0.0624, 0.0620, 0.2427]) -Greedy action tensor([ 1.6266, -0.7167, -0.3594, 0.6714]) tensor([0.6180, 0.0593, 0.0848, 0.2378]) -Greedy action tensor([ 1.4188, -0.5335, -0.7379, 0.3061]) tensor([0.6304, 0.0895, 0.0729, 0.2072]) -Greedy action tensor([ 2.1117, -0.8923, -0.0582, 0.6810]) tensor([0.7128, 0.0353, 0.0814, 0.1704]) -Greedy action tensor([ 1.5139, -0.9944, -0.2444, 0.4183]) tensor([0.6297, 0.0513, 0.1085, 0.2105]) -Greedy action tensor([ 1.7513, -0.1306, -0.7994, 0.0668]) tensor([0.7063, 0.1076, 0.0551, 0.1310]) -Greedy action tensor([ 1.2123, -0.5503, 0.0929, 0.7601]) tensor([0.4685, 0.0804, 0.1530, 0.2981]) -Greedy action tensor([ 1.1099, -0.2039, -0.3264, -0.1101]) tensor([0.5550, 0.1492, 0.1320, 0.1638]) -Greedy action tensor([ 1.3223e+00, 1.0880e-03, -3.6046e-01, -7.3925e-02]) tensor([0.5882, 0.1569, 0.1093, 0.1456]) -Greedy action tensor([ 1.1415, -0.3321, -0.5508, 0.2554]) tensor([0.5478, 0.1255, 0.1008, 0.2258]) -Greedy action tensor([ 1.3200, -0.3409, -0.1985, 0.1234]) tensor([0.5844, 0.1110, 0.1280, 0.1766]) -Greedy action tensor([ 1.6222, 0.6764, -0.4514, 0.2309]) tensor([0.5673, 0.2203, 0.0713, 0.1411]) -Greedy action tensor([ 1.5826, 0.3177, -0.2780, 0.2807]) tensor([0.5848, 0.1651, 0.0910, 0.1591]) -Greedy action tensor([ 1.0591, -0.3323, -0.6847, 0.0618]) tensor([0.5579, 0.1388, 0.0975, 0.2058]) -Greedy action tensor([ 1.4709, -0.7310, -0.5707, 0.5145]) tensor([0.6155, 0.0681, 0.0799, 0.2365]) -Greedy action tensor([ 1.3700, -0.5652, -0.0877, 0.5198]) tensor([0.5542, 0.0800, 0.1290, 0.2368]) -Greedy action tensor([ 0.8183, -0.3757, 0.0134, 0.1385]) tensor([0.4431, 0.1343, 0.1981, 0.2245]) -Greedy action tensor([ 1.5268, -0.5847, -0.3945, 0.2012]) tensor([0.6523, 0.0790, 0.0955, 0.1733]) -Greedy action tensor([ 1.3017, -0.6207, -0.0774, 0.3102]) tensor([0.5653, 0.0827, 0.1423, 0.2097]) -Greedy action tensor([ 1.2963, -0.1352, -0.1215, 0.0398]) tensor([0.5663, 0.1353, 0.1372, 0.1612]) -Greedy action tensor([ 1.1061, -0.5612, -0.1891, 0.2994]) tensor([0.5239, 0.0989, 0.1435, 0.2338]) -Greedy action tensor([ 1.8741, -0.3408, -0.6825, 0.2620]) tensor([0.7214, 0.0787, 0.0560, 0.1439]) -Greedy action tensor([ 1.5280, -0.4328, -0.2024, 0.1108]) tensor([0.6409, 0.0902, 0.1136, 0.1553]) -Greedy action tensor([ 2.0182, -1.0504, -0.2499, 0.7820]) tensor([0.6942, 0.0323, 0.0719, 0.2017]) -Greedy action tensor([ 1.5363, -0.8095, -0.1657, 0.3420]) tensor([0.6325, 0.0606, 0.1153, 0.1916]) -Greedy action tensor([ 1.5639, -0.2420, -0.1368, 0.4131]) tensor([0.6012, 0.0988, 0.1098, 0.1902]) -Greedy action tensor([ 1.1500, -0.3549, -1.0612, 0.3120]) tensor([0.5668, 0.1259, 0.0621, 0.2452]) -Greedy action tensor([ 1.2767, -0.0771, -0.3845, -0.0646]) tensor([0.5849, 0.1511, 0.1111, 0.1530]) -Greedy action tensor([ 2.2249, -1.1032, -0.2773, 0.7572]) tensor([0.7417, 0.0266, 0.0607, 0.1709]) -Greedy action tensor([ 1.2620, -0.2902, -0.4948, 0.3841]) tensor([0.5556, 0.1177, 0.0959, 0.2309]) -Greedy action tensor([ 1.3785, -0.6948, -0.2683, 0.2884]) tensor([0.6044, 0.0760, 0.1164, 0.2032]) -Greedy action tensor([ 1.2382, -0.2202, -0.6235, 0.2071]) tensor([0.5732, 0.1333, 0.0891, 0.2044]) -Greedy action tensor([ 2.1742, -0.4562, -0.9000, 0.8224]) tensor([0.7262, 0.0523, 0.0336, 0.1879]) -Greedy action tensor([ 0.9865, -0.4198, -0.4710, 0.3089]) tensor([0.5036, 0.1234, 0.1172, 0.2557]) -Greedy action tensor([ 1.4331, 0.0122, -0.8390, 0.2980]) tensor([0.6002, 0.1450, 0.0619, 0.1929]) -Greedy action tensor([ 1.7242, -0.6222, -0.1100, 0.2681]) tensor([0.6718, 0.0643, 0.1073, 0.1566]) -Greedy action tensor([ 1.6067, -0.3758, -0.9019, 0.1621]) tensor([0.6873, 0.0947, 0.0559, 0.1621]) -Greedy action tensor([ 1.7020, -0.9157, -0.3849, 0.6310]) tensor([0.6495, 0.0474, 0.0806, 0.2225]) -Greedy action tensor([ 1.5774, -0.9059, -0.4631, 0.9659]) tensor([0.5695, 0.0475, 0.0740, 0.3090]) -Greedy action tensor([ 1.3000, -0.4031, -0.7910, 0.5114]) tensor([0.5681, 0.1035, 0.0702, 0.2582]) -Greedy action tensor([ 1.8322, -0.5549, -0.1609, 0.3808]) tensor([0.6838, 0.0628, 0.0932, 0.1602]) -Greedy action tensor([ 2.0896, -1.1007, -0.6168, 0.3504]) tensor([0.7791, 0.0321, 0.0520, 0.1369]) -Greedy action tensor([ 1.2767, 0.0907, -0.7546, -0.1471]) tensor([0.5962, 0.1821, 0.0782, 0.1436]) -Greedy action tensor([ 1.6174, 0.4040, -0.5788, -0.0166]) tensor([0.6236, 0.1853, 0.0694, 0.1217]) -Greedy action tensor([ 1.4767, -0.0237, -1.2469, 0.4169]) tensor([0.6115, 0.1364, 0.0401, 0.2119]) -Greedy action tensor([ 1.5366, -0.9588, -0.5006, 0.2205]) tensor([0.6752, 0.0557, 0.0880, 0.1811]) -Greedy action tensor([ 1.3688, -0.6691, -0.3893, 0.2359]) tensor([0.6155, 0.0802, 0.1061, 0.1982]) -Greedy action tensor([ 1.4382, -0.5873, -0.4402, 0.1728]) tensor([0.6382, 0.0842, 0.0975, 0.1801]) -Greedy action tensor([ 1.7998, -0.3482, -0.6279, 0.3396]) tensor([0.6958, 0.0812, 0.0614, 0.1616]) -Greedy action tensor([ 1.6402, -0.6946, -0.4256, 0.4011]) tensor([0.6608, 0.0640, 0.0837, 0.1914]) -Greedy action tensor([ 1.1471, -0.2325, -0.6930, 0.2225]) tensor([0.5534, 0.1393, 0.0879, 0.2195]) -Greedy action tensor([ 2.4144, 0.1876, -0.1356, -0.1394]) tensor([0.7913, 0.0854, 0.0618, 0.0616]) -Greedy action tensor([ 1.6595, -0.5153, -0.6290, 0.3303]) tensor([0.6758, 0.0768, 0.0685, 0.1789]) -Greedy action tensor([ 1.9181, -0.3708, -0.5686, 0.3132]) tensor([0.7218, 0.0732, 0.0600, 0.1450]) -Greedy action tensor([ 1.7090, -0.9684, -0.7149, -0.0288]) tensor([0.7501, 0.0516, 0.0664, 0.1319]) -Greedy action tensor([ 1.5444, -0.1840, -0.8861, 0.4996]) tensor([0.6183, 0.1098, 0.0544, 0.2175]) -Greedy action tensor([ 1.3703, -0.2107, -0.2664, 0.0797]) tensor([0.5968, 0.1228, 0.1162, 0.1642]) -Greedy action tensor([ 1.2224, -0.0249, -0.3220, -0.7094]) tensor([0.6077, 0.1746, 0.1297, 0.0880]) -Greedy action tensor([ 0.4529, -0.1017, 0.0390, -0.3816]) tensor([0.3746, 0.2151, 0.2476, 0.1626]) -Greedy action tensor([ 1.3947, -0.7003, -0.0364, -0.6234]) tensor([0.6689, 0.0823, 0.1599, 0.0889]) -Greedy action tensor([ 0.7143, -0.4530, -0.0721, -0.3559]) tensor([0.4740, 0.1475, 0.2159, 0.1626]) -Greedy action tensor([ 0.7836, -0.5560, -0.0047, -0.4132]) tensor([0.4954, 0.1298, 0.2252, 0.1497]) -Greedy action tensor([ 0.6101, -0.2546, 0.0242, -0.0886]) tensor([0.4040, 0.1702, 0.2249, 0.2009]) -Greedy action tensor([ 0.8049, -0.4298, 0.0435, -0.2589]) tensor([0.4755, 0.1383, 0.2221, 0.1641]) -Greedy action tensor([ 0.9626, -0.3610, 0.0680, -0.1068]) tensor([0.4955, 0.1319, 0.2025, 0.1701]) -Greedy action tensor([ 1.0815, -0.7835, -0.0240, -0.2859]) tensor([0.5745, 0.0890, 0.1902, 0.1464]) -Greedy action tensor([ 0.3395, -0.1220, -0.0697, -0.4515]) tensor([0.3639, 0.2294, 0.2417, 0.1650]) -Greedy action tensor([ 1.0079, -0.5387, -0.2037, -0.4242]) tensor([0.5716, 0.1217, 0.1702, 0.1365]) -Greedy action tensor([ 1.0131, -0.8609, 0.1016, -0.4175]) tensor([0.5572, 0.0855, 0.2240, 0.1333]) -Greedy action tensor([ 0.8894, -0.4134, -0.0745, -0.3184]) tensor([0.5123, 0.1392, 0.1954, 0.1531]) -Greedy action tensor([ 0.5077, 0.2178, 0.0020, -0.0604]) tensor([0.3427, 0.2565, 0.2067, 0.1942]) -Greedy action tensor([ 0.9425, -0.4677, 0.2466, -0.5933]) tensor([0.5107, 0.1247, 0.2547, 0.1099]) -Greedy action tensor([ 0.9377, -0.5753, 0.0977, -0.5247]) tensor([0.5309, 0.1169, 0.2292, 0.1230]) -Greedy action tensor([ 0.5294, -0.2251, -0.0776, -0.1311]) tensor([0.3950, 0.1857, 0.2152, 0.2040]) -Greedy action tensor([ 0.4556, -0.3571, -0.0988, -0.3855]) tensor([0.4083, 0.1811, 0.2345, 0.1761]) -Greedy action tensor([ 0.4423, -0.2176, -0.0175, -0.1643]) tensor([0.3713, 0.1919, 0.2344, 0.2024]) -Greedy action tensor([ 0.7416, -0.4025, 0.0502, -0.3627]) tensor([0.4649, 0.1481, 0.2329, 0.1541]) -Greedy action tensor([ 1.0975, -0.8091, -0.0313, -0.5140]) tensor([0.5982, 0.0889, 0.1935, 0.1194]) -Greedy action tensor([ 0.6156, -0.5197, -0.0401, -0.3344]) tensor([0.4490, 0.1443, 0.2331, 0.1737]) -Greedy action tensor([ 0.3647, -0.0873, -0.0833, -0.4261]) tensor([0.3665, 0.2332, 0.2341, 0.1662]) -Greedy action tensor([ 0.1582, 0.1712, -0.1074, -0.5663]) tensor([0.3063, 0.3103, 0.2349, 0.1484]) -Greedy action tensor([ 0.9995, -0.6753, -0.1113, -0.3797]) tensor([0.5655, 0.1059, 0.1862, 0.1424]) -Greedy action tensor([ 1.0010, -0.8480, 0.1103, -0.6050]) tensor([0.5655, 0.0890, 0.2321, 0.1135]) -Greedy action tensor([ 1.2258, -0.6972, -0.1046, -0.4270]) tensor([0.6242, 0.0912, 0.1650, 0.1195]) -Greedy action tensor([ 0.7917, -0.1433, -0.0751, -0.0868]) tensor([0.4488, 0.1762, 0.1886, 0.1864]) -Greedy action tensor([ 1.0527, -0.7616, 0.0350, -0.4933]) tensor([0.5756, 0.0938, 0.2080, 0.1227]) -Greedy action tensor([ 0.6212, -0.1561, 0.0805, -0.0792]) tensor([0.3939, 0.1811, 0.2294, 0.1956]) -Greedy action tensor([ 1.1741, -1.0937, 0.1742, -0.8267]) tensor([0.6224, 0.0644, 0.2290, 0.0842]) -Greedy action tensor([ 0.7641, -0.4845, -0.1100, -0.5300]) tensor([0.5055, 0.1450, 0.2109, 0.1386]) -Greedy action tensor([ 0.6049, -0.2556, -0.1956, -0.1624]) tensor([0.4280, 0.1810, 0.1922, 0.1987]) -Greedy action tensor([ 0.6959, -0.7490, 0.0467, -0.5193]) tensor([0.4866, 0.1147, 0.2543, 0.1444]) -Greedy action tensor([ 0.7512, -0.3298, 0.1846, -0.1584]) tensor([0.4330, 0.1469, 0.2457, 0.1744]) -Greedy action tensor([ 0.7959, -0.2975, -0.2085, -0.3292]) tensor([0.4936, 0.1654, 0.1808, 0.1602]) -Greedy action tensor([ 0.8716, -0.5232, 0.0336, -0.3654]) tensor([0.5074, 0.1258, 0.2195, 0.1473]) -Greedy action tensor([ 0.8868, -0.5366, 0.1584, -0.4762]) tensor([0.5052, 0.1217, 0.2438, 0.1293]) -Greedy action tensor([ 0.9469, -0.6752, 0.0880, -0.5079]) tensor([0.5392, 0.1065, 0.2284, 0.1259]) -Greedy action tensor([ 0.7580, -0.1247, 0.0684, 0.0169]) tensor([0.4181, 0.1729, 0.2098, 0.1992]) -Greedy action tensor([ 0.5827, -0.2917, -0.0141, -0.2508]) tensor([0.4163, 0.1736, 0.2292, 0.1809]) -Greedy action tensor([ 1.0052, -0.7889, 0.2200, -0.5250]) tensor([0.5438, 0.0904, 0.2480, 0.1177]) -Greedy action tensor([ 0.4867, -0.3241, -0.1685, -0.1713]) tensor([0.4029, 0.1791, 0.2093, 0.2087]) -Greedy action tensor([ 1.1805, -1.0493, 0.1059, -0.7678]) tensor([0.6283, 0.0676, 0.2145, 0.0895]) -Greedy action tensor([ 0.9694, -0.4083, -0.1288, -0.2957]) tensor([0.5354, 0.1350, 0.1785, 0.1511]) -Greedy action tensor([ 1.1518, -0.7585, -0.0376, -0.8250]) tensor([0.6285, 0.0930, 0.1913, 0.0871]) -Greedy action tensor([ 0.9047, -0.4125, 0.1112, -0.3990]) tensor([0.5021, 0.1345, 0.2271, 0.1363]) -Greedy action tensor([ 0.4555, -0.2206, -0.0671, -0.4753]) tensor([0.4007, 0.2038, 0.2376, 0.1580]) -Greedy action tensor([ 0.8159, -0.5317, -0.1066, -0.5516]) tensor([0.5230, 0.1359, 0.2079, 0.1332]) -Greedy action tensor([ 0.9496, -0.3912, -0.1054, -0.1234]) tensor([0.5123, 0.1341, 0.1784, 0.1752]) -Greedy action tensor([ 1.0171, -0.6103, 0.1668, -0.5459]) tensor([0.5455, 0.1071, 0.2331, 0.1143]) -Greedy action tensor([ 0.4959, -0.0673, -0.0396, 0.0020]) tensor([0.3617, 0.2059, 0.2117, 0.2207]) -Greedy action tensor([ 0.6060, -0.5873, -0.1537, -0.2174]) tensor([0.4525, 0.1372, 0.2117, 0.1986]) -Greedy action tensor([ 0.8999, -0.4433, 0.1214, -0.1631]) tensor([0.4841, 0.1264, 0.2223, 0.1672]) -Greedy action tensor([ 0.3627, 0.1876, 0.1710, -0.1578]) tensor([0.3068, 0.2575, 0.2533, 0.1823]) -Greedy action tensor([ 0.7293, -0.4092, -0.0130, -0.2624]) tensor([0.4614, 0.1478, 0.2196, 0.1712]) -Greedy action tensor([ 0.6056, 0.0717, -0.0078, 0.0605]) tensor([0.3693, 0.2166, 0.2000, 0.2141]) -Greedy action tensor([ 0.6653, -0.7142, -0.0864, -0.3126]) tensor([0.4763, 0.1199, 0.2246, 0.1792]) -Greedy action tensor([ 0.8059, -1.0746, 0.2496, -0.4236]) tensor([0.4955, 0.0756, 0.2841, 0.1449]) -Greedy action tensor([ 1.0034, -0.6161, -0.1083, -0.4984]) tensor([0.5715, 0.1132, 0.1880, 0.1273]) -Greedy action tensor([ 0.9542, -0.5635, -0.1096, -0.3918]) tensor([0.5481, 0.1201, 0.1892, 0.1426]) -Greedy action tensor([ 0.5834, -0.4256, 0.0136, -0.1920]) tensor([0.4183, 0.1525, 0.2366, 0.1926]) -Greedy action tensor([ 0.8867, -0.5541, 0.1135, -0.3046]) tensor([0.4995, 0.1182, 0.2305, 0.1518]) -Greedy action tensor([ 0.8156, -0.4451, 0.1028, -0.5372]) tensor([0.4921, 0.1395, 0.2412, 0.1272]) -Greedy action tensor([ 0.9325, -0.5098, -0.0516, -0.4537]) tensor([0.5376, 0.1271, 0.2009, 0.1344]) -Greedy action tensor([ 0.7735, -0.3673, 0.0841, -0.3116]) tensor([0.4631, 0.1480, 0.2324, 0.1565]) -Greedy action tensor([ 0.9366, -0.8002, 0.1089, -0.4297]) tensor([0.5353, 0.0943, 0.2339, 0.1365]) -Greedy action tensor([ 0.6758, -0.3514, 0.0183, -0.1598]) tensor([0.4329, 0.1550, 0.2243, 0.1877]) -Greedy action tensor([ 1.0271, -0.7375, 0.2071, -0.5032]) tensor([0.5470, 0.0937, 0.2409, 0.1184]) -Greedy action tensor([ 0.9683, -0.3214, -0.2423, -0.6823]) tensor([0.5665, 0.1560, 0.1688, 0.1087]) -Greedy action tensor([ 0.6630, -0.5535, 0.0412, -0.1957]) tensor([0.4431, 0.1313, 0.2379, 0.1877]) -Greedy action tensor([ 0.3160, 0.3302, -0.2060, 0.1267]) tensor([0.2911, 0.2953, 0.1727, 0.2409]) -Greedy action tensor([ 0.6893, -0.6572, -0.0279, -0.2668]) tensor([0.4689, 0.1220, 0.2289, 0.1802]) -Greedy action tensor([ 0.7162, -0.2933, 0.1424, -0.2048]) tensor([0.4299, 0.1567, 0.2422, 0.1712]) -Greedy action tensor([ 1.1354, -0.9194, -0.1088, -0.5599]) tensor([0.6251, 0.0801, 0.1801, 0.1147]) -Greedy action tensor([ 1.2347, -0.7959, 0.0506, -0.6640]) tensor([0.6301, 0.0827, 0.1928, 0.0944]) -Greedy action tensor([ 0.8650, -0.5198, -0.0771, -0.3201]) tensor([0.5139, 0.1287, 0.2003, 0.1571]) -Greedy action tensor([ 0.8058, -0.8521, 0.0568, -0.4983]) tensor([0.5168, 0.0985, 0.2444, 0.1403]) -Greedy action tensor([ 0.3926, -0.2152, 0.0269, -0.1953]) tensor([0.3579, 0.1949, 0.2483, 0.1988]) -Greedy action tensor([ 0.6409, -0.0055, -0.0125, -0.2626]) tensor([0.4083, 0.2139, 0.2124, 0.1654]) -Greedy action tensor([ 1.4294, -0.8691, -0.0051, -0.4429]) tensor([0.6701, 0.0673, 0.1596, 0.1030]) -Greedy action tensor([ 0.7465, -0.5101, 0.1108, -0.2187]) tensor([0.4556, 0.1297, 0.2412, 0.1735]) -Greedy action tensor([ 1.0123, -1.0042, -0.5736, -0.3751]) tensor([0.6299, 0.0838, 0.1290, 0.1573]) -Greedy action tensor([-0.4182, -0.5131, -0.4485, -0.5035]) tensor([0.2633, 0.2395, 0.2555, 0.2418]) -Greedy action tensor([ 0.5471, -0.3864, -0.6177, -0.5465]) tensor([0.4901, 0.1927, 0.1529, 0.1642]) -Greedy action tensor([ 1.4841, -1.1355, -0.2135, 0.0432]) tensor([0.6699, 0.0488, 0.1227, 0.1586]) -Greedy action tensor([-1.4833, -0.9430, -0.8586, -0.3049]) tensor([0.1277, 0.2191, 0.2384, 0.4148]) -Greedy action tensor([ 0.0956, -0.6638, 0.0467, -0.7812]) tensor([0.3526, 0.1650, 0.3358, 0.1467]) -Greedy action tensor([ 0.8850, 0.0296, -0.4885, 0.4436]) tensor([0.4308, 0.1831, 0.1091, 0.2770]) -Greedy action tensor([-0.3247, -0.7813, 0.1157, 0.1665]) tensor([0.2074, 0.1314, 0.3222, 0.3390]) -Greedy action tensor([-0.3076, -1.0292, -0.1690, -0.6866]) tensor([0.3013, 0.1464, 0.3461, 0.2062]) -Greedy action tensor([ 1.1222, -0.2306, 0.3654, 0.2348]) tensor([0.4674, 0.1208, 0.2193, 0.1924]) -Greedy action tensor([-0.3822, -1.2997, 1.3304, -1.4879]) tensor([0.1375, 0.0549, 0.7621, 0.0455]) -Greedy action tensor([-0.4543, 0.2484, -0.0356, -1.0710]) tensor([0.1969, 0.3976, 0.2993, 0.1063]) -Greedy action tensor([-0.1143, -0.5360, -0.2229, -0.2561]) tensor([0.2923, 0.1917, 0.2622, 0.2537]) -Greedy action tensor([ 0.4890, -0.8042, -0.9058, -0.3452]) tensor([0.5111, 0.1403, 0.1267, 0.2219]) -Greedy action tensor([ 0.2809, -0.8056, -0.4189, -1.3832]) tensor([0.4942, 0.1667, 0.2455, 0.0936]) -Greedy action tensor([ 0.2411, -0.2146, -0.1170, -0.0298]) tensor([0.3230, 0.2048, 0.2258, 0.2464]) -Greedy action tensor([ 1.1827, -1.0116, 0.7421, -0.7128]) tensor([0.5248, 0.0585, 0.3378, 0.0789]) -Greedy action tensor([-1.4476, -1.1155, 0.0711, 0.8759]) tensor([0.0582, 0.0812, 0.2659, 0.5947]) -Greedy action tensor([ 1.1941, -0.5922, 0.7448, -0.2498]) tensor([0.4898, 0.0821, 0.3125, 0.1156]) -Greedy action tensor([-0.0495, -0.9671, -0.0111, -0.5273]) tensor([0.3269, 0.1306, 0.3397, 0.2027]) -Greedy action tensor([-0.3203, -0.1154, -0.1925, -0.5910]) tensor([0.2423, 0.2974, 0.2754, 0.1849]) -Greedy action tensor([-0.4788, -0.4236, 0.1362, -0.5128]) tensor([0.2052, 0.2169, 0.3796, 0.1984]) -Greedy action tensor([ 0.0087, -0.5911, -0.4673, -0.8227]) tensor([0.3838, 0.2107, 0.2384, 0.1671]) -Greedy action tensor([ 0.0542, -1.1803, 0.0795, 0.8315]) tensor([0.2226, 0.0648, 0.2283, 0.4843]) -Greedy action tensor([-0.5023, -0.9085, -0.4431, 0.3422]) tensor([0.1979, 0.1318, 0.2099, 0.4604]) -Greedy action tensor([ 0.8971, -0.6064, 0.1070, -0.5023]) tensor([0.5200, 0.1156, 0.2360, 0.1283]) -Greedy action tensor([ 0.2108, -0.2515, 0.3396, -0.3358]) tensor([0.2989, 0.1882, 0.3399, 0.1730]) -Greedy action tensor([-0.1570, -0.8315, 0.1631, -0.8043]) tensor([0.2933, 0.1494, 0.4039, 0.1535]) -Greedy action tensor([-0.2756, -0.9354, -0.8161, -0.0205]) tensor([0.2950, 0.1525, 0.1718, 0.3807]) -Greedy action tensor([-0.8393, -1.6509, 0.8796, -0.5141]) tensor([0.1190, 0.0528, 0.6635, 0.1647]) -Greedy action tensor([-0.5457, -0.6562, -0.6110, 0.0758]) tensor([0.2130, 0.1908, 0.1996, 0.3966]) -Greedy action tensor([ 0.7407, -0.1591, -0.0144, 0.2763]) tensor([0.3992, 0.1623, 0.1876, 0.2509]) -Greedy action tensor([ 0.7558, -0.9698, 0.5339, 0.0127]) tensor([0.4074, 0.0725, 0.3263, 0.1938]) -Greedy action tensor([ 0.4618, -0.6573, 0.1833, -0.7076]) tensor([0.4177, 0.1364, 0.3162, 0.1297]) -Greedy action tensor([ 0.9436, -0.2543, 0.6304, 0.2901]) tensor([0.3917, 0.1182, 0.2863, 0.2038]) -Greedy action tensor([-0.2141, 0.0690, -0.3048, -0.7232]) tensor([0.2603, 0.3455, 0.2377, 0.1565]) -Greedy action tensor([-0.6376, -1.4028, 0.3428, -0.4652]) tensor([0.1880, 0.0875, 0.5011, 0.2234]) -Greedy action tensor([ 1.1921, -0.0909, 0.3932, 0.7295]) tensor([0.4243, 0.1176, 0.1909, 0.2672]) -Greedy action tensor([ 0.0857, -0.5187, -0.5350, 0.0494]) tensor([0.3280, 0.1792, 0.1763, 0.3164]) -Greedy action tensor([-0.3790, -0.8279, -0.4151, -1.2155]) tensor([0.3294, 0.2102, 0.3177, 0.1427]) -Greedy action tensor([-0.6118, -0.3980, -1.0583, -0.5878]) tensor([0.2563, 0.3173, 0.1640, 0.2625]) -Greedy action tensor([ 1.5863, 0.0760, -0.7869, -0.0396]) tensor([0.6619, 0.1462, 0.0617, 0.1302]) -Greedy action tensor([ 0.7996, -1.4630, 0.7216, -0.8427]) tensor([0.4499, 0.0468, 0.4162, 0.0871]) -Greedy action tensor([ 0.0734, -0.8660, -0.5721, 0.3963]) tensor([0.3034, 0.1186, 0.1591, 0.4190]) -Greedy action tensor([ 0.1553, -0.0911, -0.4368, -0.6859]) tensor([0.3615, 0.2826, 0.2000, 0.1559]) -Greedy action tensor([ 1.2793, -0.2509, 1.1309, 0.7552]) tensor([0.3745, 0.0811, 0.3228, 0.2217]) -Greedy action tensor([ 0.4248, -0.5205, -0.7479, 0.1350]) tensor([0.4087, 0.1588, 0.1265, 0.3059]) -Greedy action tensor([-1.3479, -0.8130, -0.3963, 0.8924]) tensor([0.0681, 0.1162, 0.1763, 0.6395]) -Greedy action tensor([ 1.6002, -0.6856, 0.8148, 1.3253]) tensor([0.4315, 0.0439, 0.1968, 0.3278]) -Greedy action tensor([-0.2356, -0.6578, -0.1811, -0.5472]) tensor([0.2904, 0.1904, 0.3066, 0.2126]) -Greedy action tensor([-0.6010, -0.0482, -0.2191, -0.5828]) tensor([0.1915, 0.3329, 0.2806, 0.1950]) -Greedy action tensor([ 0.0361, -1.6709, 1.0278, -0.8757]) tensor([0.2337, 0.0424, 0.6300, 0.0939]) -Greedy action tensor([ 0.5643, -1.1480, 0.8648, 0.3797]) tensor([0.2974, 0.0537, 0.4017, 0.2473]) -Greedy action tensor([-0.2798, 0.4381, -0.3066, -1.0922]) tensor([0.2238, 0.4589, 0.2179, 0.0993]) -Greedy action tensor([-0.1807, -0.3547, -0.4444, -0.1696]) tensor([0.2763, 0.2321, 0.2122, 0.2794]) -Greedy action tensor([ 0.3609, -1.9397, 0.2666, -0.4352]) tensor([0.4063, 0.0407, 0.3697, 0.1833]) -Greedy action tensor([ 0.1154, -0.4414, 0.2672, -0.9238]) tensor([0.3236, 0.1854, 0.3766, 0.1145]) -Greedy action tensor([ 0.2842, -0.7392, 0.3810, -0.0433]) tensor([0.3143, 0.1129, 0.3462, 0.2265]) -Greedy action tensor([ 1.1932, -0.0226, -0.6663, 0.1986]) tensor([0.5488, 0.1627, 0.0855, 0.2030]) -Greedy action tensor([ 0.7828, -1.1393, -0.2975, 1.0932]) tensor([0.3509, 0.0513, 0.1191, 0.4786]) -Greedy action tensor([-0.6498, -0.9052, 0.3479, -0.9985]) tensor([0.1926, 0.1492, 0.5223, 0.1359]) -Greedy action tensor([ 0.1719, -0.9381, -0.2414, -0.9123]) tensor([0.4293, 0.1415, 0.2840, 0.1452]) -Greedy action tensor([-0.7321, -0.3382, 0.1062, -1.4197]) tensor([0.1888, 0.2799, 0.4365, 0.0949]) -Greedy action tensor([-0.0972, -0.5923, 0.1716, 0.1039]) tensor([0.2415, 0.1472, 0.3160, 0.2953]) -Greedy action tensor([-0.1628, 0.0582, -0.5398, 0.2979]) tensor([0.2213, 0.2760, 0.1518, 0.3508]) -Greedy action tensor([ 0.4372, 0.1116, -0.0894, -0.3682]) tensor([0.3624, 0.2617, 0.2140, 0.1619]) -Greedy action tensor([ 0.1663, -0.4706, 0.4970, -0.8883]) tensor([0.3059, 0.1618, 0.4258, 0.1065]) -Greedy action tensor([ 1.0489, -0.5335, -0.4569, -0.6147]) tensor([0.6185, 0.1271, 0.1372, 0.1172]) -Greedy action tensor([-0.1340, 0.0942, -0.3939, -0.9818]) tensor([0.2894, 0.3635, 0.2231, 0.1240]) -Greedy action tensor([-0.4121, -1.2901, 0.3051, -1.1138]) tensor([0.2525, 0.1050, 0.5173, 0.1252]) -Greedy action tensor([-0.2337, -0.6722, -0.6100, -0.2726]) tensor([0.3036, 0.1959, 0.2084, 0.2921]) -Greedy action tensor([ 1.0380, -1.1967, -0.2398, 0.0222]) tensor([0.5722, 0.0612, 0.1594, 0.2072]) -Greedy action tensor([ 0.3645, -0.1445, 0.0065, -0.6425]) tensor([0.3752, 0.2255, 0.2623, 0.1370]) -Greedy action tensor([ 0.1428, -0.3538, 0.3453, -0.8971]) tensor([0.3138, 0.1910, 0.3843, 0.1109]) -Greedy action tensor([ 0.0806, 0.0813, -0.5575, -0.3176]) tensor([0.3125, 0.3127, 0.1651, 0.2098]) -Greedy action tensor([-0.9462, -0.6375, 0.1653, -0.5512]) tensor([0.1452, 0.1978, 0.4414, 0.2156]) -Greedy action tensor([-0.9859, -0.1577, -0.8625, -0.3271]) tensor([0.1574, 0.3603, 0.1781, 0.3042]) -Greedy action tensor([-0.0411, -0.8391, 0.1756, 0.2204]) tensor([0.2506, 0.1128, 0.3112, 0.3254]) -Greedy action tensor([ 0.4329, -0.4370, -0.1100, 0.5106]) tensor([0.3246, 0.1360, 0.1886, 0.3508]) -Greedy action tensor([-0.8922, -0.3677, 0.5056, -1.2050]) tensor([0.1339, 0.2263, 0.5419, 0.0979]) -Greedy action tensor([-0.9094, -0.9600, -0.1246, -0.6479]) tensor([0.1838, 0.1747, 0.4028, 0.2387]) -Greedy action tensor([-1.9341, -0.4049, 0.6530, -0.1789]) tensor([0.0405, 0.1869, 0.5383, 0.2343]) -Greedy action tensor([-1.1408, -0.6526, 0.2920, 0.1954]) tensor([0.0941, 0.1534, 0.3944, 0.3581]) -Greedy action tensor([-1.7889, -0.4065, 0.6642, 0.0347]) tensor([0.0439, 0.1747, 0.5098, 0.2716]) -Greedy action tensor([-1.6635, -0.3873, 0.5318, -0.1132]) tensor([0.0547, 0.1960, 0.4914, 0.2578]) -Greedy action tensor([-1.5875, 0.3530, 0.1583, -0.6781]) tensor([0.0618, 0.4304, 0.3543, 0.1535]) -Greedy action tensor([-1.2932, 0.1566, 0.4118, 0.0874]) tensor([0.0678, 0.2891, 0.3732, 0.2698]) -Greedy action tensor([-1.9270, -0.4442, 0.6573, -0.1721]) tensor([0.0409, 0.1802, 0.5423, 0.2366]) -Greedy action tensor([-1.0924, 0.1435, 0.1925, -0.0155]) tensor([0.0910, 0.3131, 0.3288, 0.2671]) -Greedy action tensor([-1.7409, -0.2559, 0.5450, -0.1156]) tensor([0.0492, 0.2172, 0.4837, 0.2499]) -Greedy action tensor([0.4290, 1.2109, 0.2533, 0.8858]) tensor([0.1785, 0.3901, 0.1497, 0.2818]) -Greedy action tensor([-1.9042, -0.4020, 0.6516, -0.1615]) tensor([0.0415, 0.1865, 0.5348, 0.2372]) -Greedy action tensor([-1.9079, -0.4363, 0.6489, -0.1635]) tensor([0.0417, 0.1817, 0.5379, 0.2387]) -Greedy action tensor([-1.7706, -0.1588, 0.5428, -0.1113]) tensor([0.0468, 0.2345, 0.4729, 0.2459]) -Greedy action tensor([-0.4720, 1.0668, 0.0629, 0.2716]) tensor([0.1056, 0.4920, 0.1803, 0.2221]) -Greedy action tensor([-0.6888, 0.0478, 0.1477, -0.0744]) tensor([0.1380, 0.2883, 0.3186, 0.2551]) -Greedy action tensor([-1.6340, -0.4684, 0.5147, -0.0086]) tensor([0.0560, 0.1796, 0.4800, 0.2844]) -Greedy action tensor([-1.9436, -0.4460, 0.6667, -0.1800]) tensor([0.0401, 0.1795, 0.5462, 0.2342]) -Greedy action tensor([-1.8506, -0.4688, 0.7403, -0.0655]) tensor([0.0412, 0.1640, 0.5494, 0.2454]) -Greedy action tensor([-1.8382, 0.4748, 0.3818, -0.2979]) tensor([0.0400, 0.4045, 0.3686, 0.1868]) -Greedy action tensor([-1.8746, -0.4442, 0.6377, -0.1452]) tensor([0.0432, 0.1806, 0.5327, 0.2435]) -Greedy action tensor([-1.1964, -0.1893, -0.4740, -0.7699]) tensor([0.1364, 0.3735, 0.2810, 0.2090]) -Greedy action tensor([-1.8786, -0.2871, 0.6220, -0.1402]) tensor([0.0420, 0.2064, 0.5124, 0.2391]) -Greedy action tensor([-1.7071, -0.3621, 0.5430, -0.1183]) tensor([0.0520, 0.1997, 0.4936, 0.2548]) -Greedy action tensor([-1.1746, 0.8234, 0.1564, 0.3306]) tensor([0.0600, 0.4425, 0.2271, 0.2703]) -Greedy action tensor([-1.8609, -0.1517, 0.5724, -0.1153]) tensor([0.0423, 0.2336, 0.4819, 0.2423]) -Greedy action tensor([-1.6570, -0.2337, 0.5301, -0.0124]) tensor([0.0520, 0.2157, 0.4631, 0.2692]) -Greedy action tensor([-1.8120, -0.4463, 0.6041, -0.0948]) tensor([0.0461, 0.1807, 0.5165, 0.2568]) -Greedy action tensor([-1.7917, 0.1213, 0.5133, -0.0803]) tensor([0.0429, 0.2903, 0.4296, 0.2373]) -Greedy action tensor([-1.9042, -0.4473, 0.6517, -0.1570]) tensor([0.0418, 0.1795, 0.5387, 0.2400]) -Greedy action tensor([-1.9343, -0.4458, 0.6639, -0.1770]) tensor([0.0405, 0.1796, 0.5448, 0.2350]) -Greedy action tensor([-1.9238, -0.3950, 0.6532, -0.1693]) tensor([0.0407, 0.1879, 0.5359, 0.2355]) -Greedy action tensor([-1.7482, 0.2368, 0.6117, -0.4160]) tensor([0.0441, 0.3213, 0.4674, 0.1672]) -Greedy action tensor([-0.9851, 0.6474, 0.1596, -0.1116]) tensor([0.0858, 0.4391, 0.2696, 0.2056]) -Greedy action tensor([-1.8379, -0.4466, 0.6242, -0.1108]) tensor([0.0447, 0.1797, 0.5242, 0.2514]) -Greedy action tensor([-1.7281, -0.5017, 0.5670, -0.0868]) tensor([0.0513, 0.1749, 0.5091, 0.2648]) -Greedy action tensor([-1.4010, -0.3756, 0.6019, -0.2459]) tensor([0.0696, 0.1940, 0.5156, 0.2209]) -Greedy action tensor([-1.8276, -0.4260, 0.6022, -0.1305]) tensor([0.0457, 0.1857, 0.5191, 0.2495]) -Greedy action tensor([-1.8391, -0.3958, 0.6066, -0.1395]) tensor([0.0450, 0.1904, 0.5187, 0.2460]) -Greedy action tensor([-1.9219, -0.4241, 0.6541, -0.1680]) tensor([0.0410, 0.1833, 0.5388, 0.2368]) -Greedy action tensor([-1.2033, -0.1264, 0.3325, -0.0854]) tensor([0.0859, 0.2522, 0.3991, 0.2628]) -Greedy action tensor([-1.4818, -0.6276, 0.4420, 0.0851]) tensor([0.0667, 0.1567, 0.4568, 0.3197]) -Greedy action tensor([-1.7802, 0.0027, 0.5265, -0.1044]) tensor([0.0448, 0.2663, 0.4496, 0.2393]) -Greedy action tensor([-1.7254, -0.2133, 0.5231, -0.0956]) tensor([0.0497, 0.2255, 0.4710, 0.2537]) -Greedy action tensor([-1.9365, -0.4449, 0.6675, -0.1733]) tensor([0.0403, 0.1792, 0.5452, 0.2352]) -Greedy action tensor([-1.8942, -0.2694, 0.6070, -0.1765]) tensor([0.0419, 0.2129, 0.5115, 0.2337]) -Greedy action tensor([-1.8887, -0.4568, 0.6455, -0.1512]) tensor([0.0426, 0.1783, 0.5370, 0.2421]) -Greedy action tensor([-1.2145, 0.1514, 0.3561, -0.6623]) tensor([0.0872, 0.3418, 0.4195, 0.1515]) -Greedy action tensor([-1.7241, 0.1014, 0.4749, -0.0119]) tensor([0.0460, 0.2852, 0.4143, 0.2546]) -Greedy action tensor([-1.9017, -0.3232, 0.6305, -0.1731]) tensor([0.0416, 0.2015, 0.5229, 0.2341]) -Greedy action tensor([-1.8118, -0.4468, 0.6017, -0.1147]) tensor([0.0464, 0.1817, 0.5185, 0.2533]) -Greedy action tensor([-1.9314, -0.4329, 0.6602, -0.1733]) tensor([0.0406, 0.1817, 0.5421, 0.2356]) -Greedy action tensor([-1.9117, -0.4358, 0.6416, -0.1589]) tensor([0.0417, 0.1823, 0.5355, 0.2405]) -Greedy action tensor([-1.9169, -0.4371, 0.6563, -0.1650]) tensor([0.0412, 0.1810, 0.5402, 0.2376]) -Greedy action tensor([-1.8515, -0.3968, 0.6233, -0.1170]) tensor([0.0438, 0.1876, 0.5204, 0.2482]) -Greedy action tensor([-0.8519, 0.4280, 0.1768, -0.0805]) tensor([0.1046, 0.3763, 0.2927, 0.2263]) -Greedy action tensor([-1.5864, -0.5139, 0.4944, -0.0567]) tensor([0.0604, 0.1766, 0.4840, 0.2789]) -Greedy action tensor([-0.9522, -0.2459, 0.1404, -0.3354]) tensor([0.1272, 0.2578, 0.3793, 0.2357]) -Greedy action tensor([-1.5306, -0.0421, 0.4619, -0.1355]) tensor([0.0595, 0.2637, 0.4366, 0.2402]) -Greedy action tensor([-1.0920, 0.6239, 0.1962, 0.0048]) tensor([0.0759, 0.4219, 0.2751, 0.2272]) -Greedy action tensor([-1.8139, -0.5862, 1.1925, 0.4225]) tensor([0.0294, 0.1004, 0.5948, 0.2754]) -Greedy action tensor([-1.9184, -0.4026, 0.6488, -0.1660]) tensor([0.0411, 0.1870, 0.5351, 0.2369]) -Greedy action tensor([-1.7921, -0.3926, 0.5857, -0.1845]) tensor([0.0480, 0.1946, 0.5177, 0.2396]) -Greedy action tensor([-0.5803, -1.0838, 0.7736, -0.3862]) tensor([0.1495, 0.0903, 0.5788, 0.1815]) -Greedy action tensor([-1.9005, -0.4437, 0.6465, -0.1592]) tensor([0.0421, 0.1806, 0.5373, 0.2400]) -Greedy action tensor([-1.2143, -0.2118, 0.3385, -0.0096]) tensor([0.0849, 0.2312, 0.4009, 0.2831]) -Greedy action tensor([-1.8857, -0.3046, 0.6266, -0.1462]) tensor([0.0419, 0.2035, 0.5163, 0.2384]) -Greedy action tensor([-1.8297, 0.0450, 0.5381, -0.1207]) tensor([0.0422, 0.2749, 0.4501, 0.2329]) -Greedy action tensor([-1.6871, -0.5020, 0.6106, -0.1672]) tensor([0.0532, 0.1740, 0.5295, 0.2433]) -Greedy action tensor([-1.9025, -0.2886, 0.6247, -0.1575]) tensor([0.0412, 0.2070, 0.5159, 0.2360]) -Greedy action tensor([-1.2697, -0.6978, 0.3484, 0.2009]) tensor([0.0822, 0.1456, 0.4145, 0.3577]) -Greedy action tensor([-1.8326, -0.3645, 0.5973, -0.1315]) tensor([0.0451, 0.1957, 0.5121, 0.2471]) -Greedy action tensor([-1.9121, -0.4641, 0.6495, -0.1687]) tensor([0.0418, 0.1778, 0.5415, 0.2389]) -Greedy action tensor([-1.1241, -0.2284, 0.2957, 0.0423]) tensor([0.0926, 0.2269, 0.3831, 0.2974]) -Greedy action tensor([-1.7864, -0.3368, 0.6236, -0.0943]) tensor([0.0458, 0.1952, 0.5101, 0.2488]) -Greedy action tensor([-1.3566, -0.3326, 0.3877, -0.0632]) tensor([0.0760, 0.2117, 0.4351, 0.2772]) -Greedy action tensor([-1.8768, -0.4038, 0.6432, -0.1425]) tensor([0.0426, 0.1860, 0.5299, 0.2415]) -Greedy action tensor([-1.8828, -0.3494, 0.6372, -0.1455]) tensor([0.0421, 0.1951, 0.5234, 0.2393]) -Greedy action tensor([-1.2741e+00, 2.6391e-01, 2.7458e-01, 6.0824e-04]) tensor([0.0717, 0.3340, 0.3376, 0.2567]) -Greedy action tensor([-0.5728, 0.9259, 0.0786, 0.0196]) tensor([0.1087, 0.4864, 0.2084, 0.1965]) -Greedy action tensor([-1.7019, -0.0888, 0.4914, -0.0508]) tensor([0.0495, 0.2485, 0.4439, 0.2581]) -Greedy action tensor([-1.3797, -0.5740, 0.4643, -0.1946]) tensor([0.0779, 0.1744, 0.4927, 0.2549]) -Greedy action tensor([ 1.6876, -0.3426, -0.6212, -0.0347]) tensor([0.7095, 0.0932, 0.0705, 0.1268]) -Greedy action tensor([ 1.3802, -0.0018, -0.5919, 0.6157]) tensor([0.5389, 0.1353, 0.0750, 0.2509]) -Greedy action tensor([ 1.3392, -0.4691, -0.1080, 0.0927]) tensor([0.5929, 0.0972, 0.1395, 0.1705]) -Greedy action tensor([ 0.9273, -0.2625, -0.1868, -0.3406]) tensor([0.5225, 0.1590, 0.1715, 0.1470]) -Greedy action tensor([ 1.5154, 0.0603, -1.1229, 0.6418]) tensor([0.5806, 0.1355, 0.0415, 0.2424]) -Greedy action tensor([ 0.8645, -0.5172, 0.1910, 0.1812]) tensor([0.4413, 0.1108, 0.2250, 0.2228]) -Greedy action tensor([ 1.3578, -0.2799, -0.3006, -0.2228]) tensor([0.6286, 0.1222, 0.1197, 0.1294]) -Greedy action tensor([ 1.7042, -0.5354, -0.3174, 0.3905]) tensor([0.6632, 0.0706, 0.0878, 0.1783]) -Greedy action tensor([ 1.5548, -0.2753, -0.5066, 0.0654]) tensor([0.6609, 0.1060, 0.0841, 0.1490]) -Greedy action tensor([ 1.3218, -0.3275, -0.4302, 0.4167]) tensor([0.5649, 0.1086, 0.0980, 0.2285]) -Greedy action tensor([ 1.8772, 0.0548, -1.0164, -0.0173]) tensor([0.7313, 0.1182, 0.0405, 0.1100]) -Greedy action tensor([ 1.4130, -0.4162, -0.5521, -0.1135]) tensor([0.6588, 0.1058, 0.0923, 0.1432]) -Greedy action tensor([ 1.4440, -0.8420, -0.1937, 0.4952]) tensor([0.5941, 0.0604, 0.1155, 0.2300]) -Greedy action tensor([ 1.8162, 0.3063, -0.1404, 0.4607]) tensor([0.6173, 0.1364, 0.0872, 0.1591]) -Greedy action tensor([ 1.0389, -0.4951, 0.1001, 0.2712]) tensor([0.4829, 0.1041, 0.1889, 0.2241]) -Greedy action tensor([ 1.5856, -0.4590, -0.2047, 0.3494]) tensor([0.6302, 0.0816, 0.1052, 0.1831]) -Greedy action tensor([ 1.3427, -0.1962, -0.3611, -0.1201]) tensor([0.6142, 0.1318, 0.1118, 0.1422]) -Greedy action tensor([ 0.9379, -0.2008, -0.1195, 0.2615]) tensor([0.4595, 0.1472, 0.1596, 0.2337]) -Greedy action tensor([ 2.3230, -1.1514, -0.3597, 0.6319]) tensor([0.7790, 0.0241, 0.0533, 0.1436]) -Greedy action tensor([ 1.3428, -0.4745, -0.5782, 0.2344]) tensor([0.6101, 0.0991, 0.0894, 0.2014]) -Greedy action tensor([ 1.4732, -0.7597, -0.5390, 0.0936]) tensor([0.6700, 0.0718, 0.0896, 0.1686]) -Greedy action tensor([ 1.4332, -0.6675, -0.1112, -0.0363]) tensor([0.6386, 0.0781, 0.1363, 0.1469]) -Greedy action tensor([ 1.3216, -0.3812, -0.4583, 0.1618]) tensor([0.6008, 0.1095, 0.1013, 0.1884]) -Greedy action tensor([ 1.6725, -0.9002, -0.4076, 0.6923]) tensor([0.6343, 0.0484, 0.0792, 0.2380]) -Greedy action tensor([ 1.3477, -0.5977, -0.4012, 0.1972]) tensor([0.6122, 0.0875, 0.1065, 0.1938]) -Greedy action tensor([ 0.9756, 0.2592, -0.7556, -0.2091]) tensor([0.5072, 0.2478, 0.0898, 0.1551]) -Greedy action tensor([ 1.7050, -1.1481, -0.7369, 0.1790]) tensor([0.7342, 0.0423, 0.0639, 0.1596]) -Greedy action tensor([ 1.1881, -0.0859, -0.5822, 0.3335]) tensor([0.5332, 0.1491, 0.0908, 0.2269]) -Greedy action tensor([ 1.0944, -0.5332, -0.0578, 0.1698]) tensor([0.5238, 0.1029, 0.1655, 0.2078]) -Greedy action tensor([ 1.5486, -0.2711, -0.6010, 0.0215]) tensor([0.6686, 0.1084, 0.0779, 0.1452]) -Greedy action tensor([ 1.9210, -0.4113, -0.1492, 0.0189]) tensor([0.7286, 0.0707, 0.0919, 0.1087]) -Greedy action tensor([ 1.6351, -0.5897, -0.1991, 0.3786]) tensor([0.6441, 0.0696, 0.1029, 0.1834]) -Greedy action tensor([ 1.4592, -0.4595, -0.6303, 0.4416]) tensor([0.6127, 0.0899, 0.0758, 0.2215]) -Greedy action tensor([ 0.9983, -0.1342, -0.7975, 0.3502]) tensor([0.4972, 0.1602, 0.0825, 0.2601]) -Greedy action tensor([ 1.7548, -0.8708, -0.0262, 0.3565]) tensor([0.6721, 0.0487, 0.1132, 0.1660]) -Greedy action tensor([ 1.0628, -0.6884, -0.3586, 0.4961]) tensor([0.5045, 0.0876, 0.1218, 0.2862]) -Greedy action tensor([ 1.9784, -0.8085, -0.3177, 0.3902]) tensor([0.7318, 0.0451, 0.0737, 0.1495]) -Greedy action tensor([ 1.1374, -0.1606, -1.1377, 0.1604]) tensor([0.5707, 0.1558, 0.0587, 0.2148]) -Greedy action tensor([ 1.9550, -0.7387, -0.8214, 0.0538]) tensor([0.7817, 0.0529, 0.0487, 0.1168]) -Greedy action tensor([ 0.8710, -0.1177, -0.1655, 0.1389]) tensor([0.4530, 0.1685, 0.1607, 0.2178]) -Greedy action tensor([ 1.5376, 0.0979, -0.1699, 0.3279]) tensor([0.5825, 0.1381, 0.1056, 0.1738]) -Greedy action tensor([ 1.5149, -0.5435, -0.4396, 0.4517]) tensor([0.6193, 0.0791, 0.0877, 0.2139]) -Greedy action tensor([ 1.2655, -0.4347, -0.2056, -0.0140]) tensor([0.5916, 0.1080, 0.1359, 0.1646]) -Greedy action tensor([ 0.8481, -0.3684, 0.0352, 0.0336]) tensor([0.4581, 0.1357, 0.2032, 0.2029]) -Greedy action tensor([ 1.6656, -0.6265, -0.4304, 0.1861]) tensor([0.6888, 0.0696, 0.0847, 0.1569]) -Greedy action tensor([ 1.5602, -0.9504, -0.3562, 0.9131]) tensor([0.5708, 0.0464, 0.0840, 0.2989]) -Greedy action tensor([ 1.4174, 0.0798, -0.7391, -0.1029]) tensor([0.6262, 0.1644, 0.0725, 0.1369]) -Greedy action tensor([ 2.0889, -0.6428, -0.4754, 0.2826]) tensor([0.7655, 0.0498, 0.0589, 0.1257]) -Greedy action tensor([ 1.2418, -0.0846, -0.1473, -0.0692]) tensor([0.5605, 0.1488, 0.1397, 0.1511]) -Greedy action tensor([ 1.8498, -1.0995, -0.4830, 0.4168]) tensor([0.7205, 0.0377, 0.0699, 0.1719]) -Greedy action tensor([ 1.3554, -0.7391, -0.4823, 0.1189]) tensor([0.6359, 0.0783, 0.1012, 0.1846]) -Greedy action tensor([ 1.3980, -0.2592, -0.7429, 0.0143]) tensor([0.6415, 0.1223, 0.0754, 0.1608]) -Greedy action tensor([ 1.4218, -0.7969, -0.2036, 0.3204]) tensor([0.6105, 0.0664, 0.1202, 0.2029]) -Greedy action tensor([ 2.1968, -1.1636, -0.5286, 0.2949]) tensor([0.8003, 0.0278, 0.0524, 0.1195]) -Greedy action tensor([ 0.8045, -0.2385, -0.4493, -0.2830]) tensor([0.5064, 0.1784, 0.1445, 0.1707]) -Greedy action tensor([ 2.2325, -0.7794, -0.5778, 0.8629]) tensor([0.7334, 0.0361, 0.0441, 0.1864]) -Greedy action tensor([ 1.9276, -1.0120, -0.1144, 0.4129]) tensor([0.7130, 0.0377, 0.0925, 0.1568]) -Greedy action tensor([ 1.3999, -0.5882, -0.9445, 0.0526]) tensor([0.6699, 0.0917, 0.0642, 0.1741]) -Greedy action tensor([ 1.9255, 0.6843, -0.1539, 0.3470]) tensor([0.6172, 0.1784, 0.0771, 0.1273]) -Greedy action tensor([ 1.9162, -1.1500, -0.0577, 0.9624]) tensor([0.6366, 0.0297, 0.0884, 0.2453]) -Greedy action tensor([ 2.0900, -1.3352, -0.0855, 0.7808]) tensor([0.7062, 0.0230, 0.0802, 0.1907]) -Greedy action tensor([ 1.2371, -0.2563, -0.1940, 0.0595]) tensor([0.5644, 0.1268, 0.1349, 0.1739]) -Greedy action tensor([ 1.8379, -1.0515, 0.1563, 0.0499]) tensor([0.7097, 0.0395, 0.1321, 0.1187]) -Greedy action tensor([ 1.4503, -0.3138, -0.4450, 0.4138]) tensor([0.5965, 0.1022, 0.0896, 0.2116]) -Greedy action tensor([ 2.2219, -1.1457, -0.6831, 0.6360]) tensor([0.7728, 0.0266, 0.0423, 0.1582]) -Greedy action tensor([ 1.0854, -0.2824, -0.1589, 0.1164]) tensor([0.5202, 0.1325, 0.1499, 0.1974]) -Greedy action tensor([ 1.5230, -0.7527, -0.5147, 0.3933]) tensor([0.6426, 0.0660, 0.0837, 0.2076]) -Greedy action tensor([ 0.2299, -0.1208, -0.2010, 0.2428]) tensor([0.2970, 0.2091, 0.1930, 0.3009]) -Greedy action tensor([ 1.1421, -0.3607, -0.6799, 0.1833]) tensor([0.5658, 0.1259, 0.0915, 0.2169]) -Greedy action tensor([ 1.4869, -0.6246, -0.0086, 0.1576]) tensor([0.6212, 0.0752, 0.1392, 0.1644]) -Greedy action tensor([ 1.3482, -0.9020, -0.2493, 0.5530]) tensor([0.5684, 0.0599, 0.1150, 0.2566]) -Greedy action tensor([ 1.2613, -0.2814, -0.3825, -0.2497]) tensor([0.6143, 0.1313, 0.1187, 0.1356]) -Greedy action tensor([ 2.2608, -1.1616, -0.4239, 0.2593]) tensor([0.8091, 0.0264, 0.0552, 0.1093]) -Greedy action tensor([ 1.1420, -0.4062, -0.2596, 0.3142]) tensor([0.5275, 0.1122, 0.1299, 0.2305]) -Greedy action tensor([ 1.2933, -0.3675, -0.4789, 0.3283]) tensor([0.5744, 0.1091, 0.0976, 0.2188]) -Greedy action tensor([ 1.7273, -0.6336, -0.2121, 0.3351]) tensor([0.6726, 0.0635, 0.0967, 0.1672]) -Greedy action tensor([ 1.7031, -0.7646, -0.7958, 0.6073]) tensor([0.6661, 0.0565, 0.0547, 0.2227]) -Greedy action tensor([ 1.6700, -0.1442, -0.4165, 0.4004]) tensor([0.6378, 0.1039, 0.0792, 0.1792]) -Greedy action tensor([ 1.2451, -0.1731, 0.0704, 0.2233]) tensor([0.5233, 0.1267, 0.1617, 0.1884]) -Greedy action tensor([ 1.6021, -0.5468, -0.9378, 0.2386]) tensor([0.6891, 0.0804, 0.0544, 0.1762]) -Greedy action tensor([ 1.8002, -0.2350, -0.4788, 0.6169]) tensor([0.6497, 0.0849, 0.0665, 0.1990]) -Greedy action tensor([ 0.7525, -0.4357, -0.0808, -0.3249]) tensor([0.4808, 0.1465, 0.2090, 0.1637]) -Greedy action tensor([ 0.9600, -0.1635, 0.0565, -0.1251]) tensor([0.4835, 0.1572, 0.1959, 0.1634]) -Greedy action tensor([ 0.9719, -0.6933, -0.0266, -0.5963]) tensor([0.5662, 0.1071, 0.2086, 0.1180]) -Greedy action tensor([ 0.4756, -0.0552, 0.0229, -0.0569]) tensor([0.3557, 0.2092, 0.2262, 0.2088]) -Greedy action tensor([ 0.3799, -0.3878, 0.0066, -0.2839]) tensor([0.3749, 0.1740, 0.2581, 0.1930]) -Greedy action tensor([ 0.5940, -0.4375, -0.0548, -0.1334]) tensor([0.4233, 0.1509, 0.2213, 0.2045]) -Greedy action tensor([ 1.0852, -0.6484, -0.0930, -0.3739]) tensor([0.5824, 0.1029, 0.1793, 0.1354]) -Greedy action tensor([ 0.8447, -0.4392, -0.0786, -0.1118]) tensor([0.4858, 0.1345, 0.1930, 0.1867]) -Greedy action tensor([ 1.2050, -0.7661, 0.0287, -0.6655]) tensor([0.6243, 0.0870, 0.1925, 0.0962]) -Greedy action tensor([ 0.3968, 0.0046, -0.0875, -0.0272]) tensor([0.3394, 0.2293, 0.2091, 0.2221]) -Greedy action tensor([ 0.8003, -0.4609, 0.0886, -0.4161]) tensor([0.4830, 0.1368, 0.2371, 0.1431]) -Greedy action tensor([ 0.8187, -0.8970, -0.1247, -0.2889]) tensor([0.5264, 0.0947, 0.2050, 0.1739]) -Greedy action tensor([ 0.6915, -0.7542, 0.0845, -0.4470]) tensor([0.4760, 0.1121, 0.2594, 0.1525]) -Greedy action tensor([ 0.7451, -0.7315, -0.1000, -0.3622]) tensor([0.5029, 0.1149, 0.2160, 0.1662]) -Greedy action tensor([ 0.5794, -0.1016, 0.0008, -0.0272]) tensor([0.3829, 0.1938, 0.2147, 0.2087]) -Greedy action tensor([ 0.2091, 0.0998, -0.0825, 0.1860]) tensor([0.2762, 0.2476, 0.2063, 0.2699]) -Greedy action tensor([ 0.3850, -0.1879, -0.0598, -0.0527]) tensor([0.3508, 0.1978, 0.2249, 0.2265]) -Greedy action tensor([ 0.6042, -0.2079, -0.0720, -0.4887]) tensor([0.4371, 0.1940, 0.2223, 0.1465]) -Greedy action tensor([ 1.1671, -0.3795, -0.0884, -0.2097]) tensor([0.5713, 0.1217, 0.1628, 0.1442]) -Greedy action tensor([ 0.9208, -0.5372, 0.0195, -0.3045]) tensor([0.5175, 0.1204, 0.2101, 0.1520]) -Greedy action tensor([ 1.2037, -0.3986, -0.1216, -0.5413]) tensor([0.6091, 0.1227, 0.1619, 0.1064]) -Greedy action tensor([ 0.5414, -0.1968, -0.0450, -0.0820]) tensor([0.3890, 0.1860, 0.2164, 0.2086]) -Greedy action tensor([ 1.1601, -0.6419, -0.1922, -0.3099]) tensor([0.6048, 0.0998, 0.1564, 0.1390]) -Greedy action tensor([ 0.8114, -0.4128, -0.1752, -0.3387]) tensor([0.5042, 0.1482, 0.1880, 0.1596]) -Greedy action tensor([ 1.1626, -0.8172, -0.2078, -0.5451]) tensor([0.6356, 0.0878, 0.1614, 0.1152]) -Greedy action tensor([ 0.9179, -0.7834, 0.0330, -0.4800]) tensor([0.5428, 0.0990, 0.2240, 0.1341]) -Greedy action tensor([ 0.9316, -0.7535, -0.0213, -0.7249]) tensor([0.5676, 0.1052, 0.2189, 0.1083]) -Greedy action tensor([ 1.3391, -0.6257, -0.1496, -0.4789]) tensor([0.6544, 0.0917, 0.1477, 0.1062]) -Greedy action tensor([ 0.9189, -0.6067, -0.0361, -0.2896]) tensor([0.5261, 0.1144, 0.2024, 0.1571]) -Greedy action tensor([ 0.7488, -0.5181, 0.0326, -0.1735]) tensor([0.4613, 0.1299, 0.2254, 0.1834]) -Greedy action tensor([ 0.6597, -0.3610, 0.0236, -0.4536]) tensor([0.4508, 0.1624, 0.2386, 0.1481]) -Greedy action tensor([ 0.9841, -0.5342, -0.0139, -0.3421]) tensor([0.5396, 0.1182, 0.1989, 0.1433]) -Greedy action tensor([ 0.7914, -0.6217, -0.0624, -0.3241]) tensor([0.5008, 0.1219, 0.2132, 0.1641]) -Greedy action tensor([ 1.0402, -0.7302, 0.1289, -0.4445]) tensor([0.5559, 0.0947, 0.2235, 0.1260]) -Greedy action tensor([ 1.1180, -0.6904, 0.0461, -0.2653]) tensor([0.5691, 0.0933, 0.1949, 0.1427]) -Greedy action tensor([ 0.6514, -0.5685, -0.1725, -0.2352]) tensor([0.4660, 0.1376, 0.2044, 0.1920]) -Greedy action tensor([ 0.5089, -0.2515, -0.1684, -0.1720]) tensor([0.4030, 0.1884, 0.2047, 0.2040]) -Greedy action tensor([ 0.7746, -0.4002, -0.1716, -0.1270]) tensor([0.4755, 0.1469, 0.1846, 0.1930]) -Greedy action tensor([ 1.0701, -0.7691, 0.0283, -0.4293]) tensor([0.5764, 0.0916, 0.2034, 0.1287]) -Greedy action tensor([ 0.8736, -0.4592, -0.1648, -0.3692]) tensor([0.5246, 0.1383, 0.1857, 0.1514]) -Greedy action tensor([ 0.8231, -0.0983, 0.1302, -0.0940]) tensor([0.4352, 0.1732, 0.2177, 0.1739]) -Greedy action tensor([ 0.9409, -0.6660, 0.0434, -0.6206]) tensor([0.5501, 0.1103, 0.2242, 0.1154]) -Greedy action tensor([ 0.7348, -0.5996, -0.0500, -0.2019]) tensor([0.4736, 0.1247, 0.2161, 0.1856]) -Greedy action tensor([ 0.7781, -0.3203, -0.0463, -0.2148]) tensor([0.4668, 0.1556, 0.2047, 0.1729]) -Greedy action tensor([ 1.2949, -0.9522, -0.0363, -0.4633]) tensor([0.6484, 0.0685, 0.1713, 0.1118]) -Greedy action tensor([ 1.0711, -1.0127, 0.1226, -0.6508]) tensor([0.5915, 0.0736, 0.2291, 0.1057]) -Greedy action tensor([ 1.0123, -0.6374, -0.0457, -0.4476]) tensor([0.5645, 0.1084, 0.1960, 0.1311]) -Greedy action tensor([ 0.8198, -0.7407, 0.1433, -0.4239]) tensor([0.4983, 0.1047, 0.2534, 0.1437]) -Greedy action tensor([ 0.6837, -0.3508, -0.2051, -0.1613]) tensor([0.4554, 0.1618, 0.1872, 0.1956]) -Greedy action tensor([ 0.9117, -0.4930, 0.0076, -0.4646]) tensor([0.5255, 0.1290, 0.2128, 0.1327]) -Greedy action tensor([ 0.9814, -0.9989, 0.0285, -0.4674]) tensor([0.5687, 0.0785, 0.2193, 0.1336]) -Greedy action tensor([ 0.7552, -0.1170, 0.2786, -0.2213]) tensor([0.4140, 0.1731, 0.2570, 0.1559]) -Greedy action tensor([ 0.3771, 0.1844, -0.0243, 0.0544]) tensor([0.3107, 0.2563, 0.2080, 0.2250]) -Greedy action tensor([ 0.5178, 0.2141, -0.1571, 0.1851]) tensor([0.3374, 0.2490, 0.1718, 0.2419]) -Greedy action tensor([ 0.3760, 0.0632, -0.1811, 0.1198]) tensor([0.3249, 0.2376, 0.1861, 0.2514]) -Greedy action tensor([ 0.8857, -0.5801, -0.1420, -0.3029]) tensor([0.5281, 0.1220, 0.1890, 0.1609]) -Greedy action tensor([ 0.5626, -0.1305, -0.0723, -0.2924]) tensor([0.4073, 0.2036, 0.2159, 0.1732]) -Greedy action tensor([ 0.3893, -0.0536, 0.0163, -0.2968]) tensor([0.3528, 0.2266, 0.2430, 0.1777]) -Greedy action tensor([ 0.4764, 0.0352, -0.1060, 0.0247]) tensor([0.3523, 0.2266, 0.1968, 0.2243]) -Greedy action tensor([ 0.5282, -0.0357, -0.1072, -0.0302]) tensor([0.3744, 0.2130, 0.1983, 0.2142]) -Greedy action tensor([ 0.7613, -0.2874, -0.0614, -0.4057]) tensor([0.4760, 0.1668, 0.2091, 0.1482]) -Greedy action tensor([ 0.9199, -0.6952, 0.0414, -0.4285]) tensor([0.5336, 0.1061, 0.2217, 0.1386]) -Greedy action tensor([ 1.2252, -0.6983, -0.0867, -0.5086]) tensor([0.6281, 0.0918, 0.1692, 0.1109]) -Greedy action tensor([ 0.9080, -0.4449, 0.1744, -0.6505]) tensor([0.5131, 0.1326, 0.2463, 0.1080]) -Greedy action tensor([ 0.6748, 0.0943, -0.1223, -0.1119]) tensor([0.4056, 0.2270, 0.1828, 0.1847]) -Greedy action tensor([ 0.7944, -0.5638, -0.0857, -0.4613]) tensor([0.5110, 0.1314, 0.2120, 0.1456]) -Greedy action tensor([ 0.5068, 0.0673, -0.0427, -0.2117]) tensor([0.3691, 0.2379, 0.2131, 0.1799]) -Greedy action tensor([ 0.7738, -0.6979, -0.0140, -0.2719]) tensor([0.4912, 0.1127, 0.2234, 0.1726]) -Greedy action tensor([ 0.5999, -0.2007, 0.0705, -0.0369]) tensor([0.3896, 0.1749, 0.2294, 0.2061]) -Greedy action tensor([ 1.0353, -0.5915, -0.0677, -0.6980]) tensor([0.5865, 0.1153, 0.1946, 0.1036]) -Greedy action tensor([ 0.9461, -0.4481, -0.1178, -0.1840]) tensor([0.5219, 0.1294, 0.1801, 0.1686]) -Greedy action tensor([ 0.7348, -0.1539, -0.0630, -0.0141]) tensor([0.4284, 0.1761, 0.1929, 0.2026]) -Greedy action tensor([ 0.3936, 0.1775, 0.0027, -0.1730]) tensor([0.3279, 0.2642, 0.2218, 0.1861]) -Greedy action tensor([ 0.7968, -0.4843, 0.0240, -0.2873]) tensor([0.4813, 0.1337, 0.2222, 0.1628]) -Greedy action tensor([ 0.6350, -0.2204, 0.0420, -0.0928]) tensor([0.4064, 0.1728, 0.2246, 0.1963]) -Greedy action tensor([ 0.7346, -0.2824, -0.1359, -0.1470]) tensor([0.4557, 0.1648, 0.1908, 0.1887]) -Greedy action tensor([ 0.7676, -0.1651, -0.0714, 0.0093]) tensor([0.4359, 0.1715, 0.1884, 0.2042]) -Greedy action tensor([ 1.1375, -0.8481, 0.0159, -0.5187]) tensor([0.6046, 0.0830, 0.1970, 0.1154]) -Greedy action tensor([ 0.7140, -0.4933, -0.1588, -0.3642]) tensor([0.4861, 0.1454, 0.2031, 0.1654]) -Greedy action tensor([ 0.4420, -0.1293, -0.0055, -0.0961]) tensor([0.3587, 0.2026, 0.2293, 0.2094]) -Greedy action tensor([ 0.4781, -0.3001, -0.1057, -0.1650]) tensor([0.3933, 0.1806, 0.2194, 0.2067])